diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 28c15cf473..648b1d1b7e 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -4,70 +4,69 @@ on:
   merge_group:
 
 jobs:
-  style:
-    name: Check Style
-    runs-on: ubuntu-latest
-    steps:
-    - uses: actions/checkout@v4
-    - name: Install Rust
-      run: rustup update nightly --no-self-update && rustup default nightly
-    - run: ci/style.sh
-
-  docs:
-    name: Build Documentation
-    needs: [style]
-    runs-on: ubuntu-latest
-    steps:
-    - uses: actions/checkout@v4
-    - name: Install Rust
-      run: rustup update nightly --no-self-update && rustup default nightly
-    - run: ci/dox.sh
-      env:
-        CI: 1
-
-  verify:
-    name: Automatic intrinsic verification
-    needs: [style]
-    runs-on: ubuntu-latest
-    steps:
-    - uses: actions/checkout@v4
-    - name: Install Rust
-      run: rustup update nightly --no-self-update && rustup default nightly
-    - run: cargo test --manifest-path crates/stdarch-verify/Cargo.toml
+  # style:
+  #   name: Check Style
+  #   runs-on: ubuntu-latest
+  #   steps:
+  #   - uses: actions/checkout@v6
+  #   - name: Install Rust
+  #     run: rustup update nightly --no-self-update && rustup default nightly
+  #   - run: ci/style.sh
+  #
+  # docs:
+  #   name: Build Documentation
+  #   needs: [style]
+  #   runs-on: ubuntu-latest
+  #   steps:
+  #   - uses: actions/checkout@v6
+  #   - name: Install Rust
+  #     run: rustup update nightly --no-self-update && rustup default nightly
+  #   - run: ci/dox.sh
+  #     env:
+  #       CI: 1
+  #
+  # verify:
+  #   name: Automatic intrinsic verification
+  #   needs: [style]
+  #   runs-on: ubuntu-latest
+  #   steps:
+  #   - uses: actions/checkout@v6
+  #   - name: Install Rust
+  #     run: rustup update nightly --no-self-update && rustup default nightly
+  #   - run: cargo test --manifest-path crates/stdarch-verify/Cargo.toml
 
   test:
-    needs: [style]
     name: Test
     runs-on: ${{ matrix.target.os }}
     strategy:
       matrix:
         profile:
-        - dev
+        #- dev
         - release
         target:
         # Dockers that are run through docker on linux
-        - tuple: i686-unknown-linux-gnu
-          os: ubuntu-latest
-        - tuple: x86_64-unknown-linux-gnu
-          os: ubuntu-latest
-        - tuple: arm-unknown-linux-gnueabihf
-          os: ubuntu-latest
-        - tuple: armv7-unknown-linux-gnueabihf
-          os: ubuntu-latest
-        - tuple: aarch64-unknown-linux-gnu
-          os: ubuntu-latest
-        - tuple: aarch64_be-unknown-linux-gnu
-          os: ubuntu-latest
-        - tuple: riscv32gc-unknown-linux-gnu
-          os: ubuntu-latest
-        - tuple: riscv64gc-unknown-linux-gnu
-          os: ubuntu-latest
-        - tuple: powerpc-unknown-linux-gnu
-          os: ubuntu-latest
-        - tuple: powerpc64-unknown-linux-gnu
-          os: ubuntu-latest
-        - tuple: powerpc64le-unknown-linux-gnu
-          os: ubuntu-latest
+        # tuple: i686-unknown-linux-gnu
+        # os: ubuntu-latest
+        # tuple: x86_64-unknown-linux-gnu
+        # os: ubuntu-latest
+        # tuple: arm-unknown-linux-gnueabihf
+        # os: ubuntu-latest
+        # tuple: armv7-unknown-linux-gnueabihf
+        # os: ubuntu-latest
+        # tuple: aarch64-unknown-linux-gnu
+        # os: ubuntu-latest
+        # tuple: aarch64_be-unknown-linux-gnu
+        # os: ubuntu-latest
+        # tuple: riscv32gc-unknown-linux-gnu
+        # os: ubuntu-latest
+        # tuple: riscv64gc-unknown-linux-gnu
+        # os: ubuntu-latest
+        # tuple: powerpc-unknown-linux-gnu
+        # os: ubuntu-latest
+        # tuple: powerpc64-unknown-linux-gnu
+        # os: ubuntu-latest
+        # tuple: powerpc64le-unknown-linux-gnu
+        # os: ubuntu-latest
         # MIPS targets disabled since they are dropped to tier 3.
         # See https://github.com/rust-lang/compiler-team/issues/648
         #- tuple: mips-unknown-linux-gnu
@@ -78,103 +77,108 @@ jobs:
         #  os: ubuntu-latest
         #- tuple: mipsel-unknown-linux-musl
         #  os: ubuntu-latest
-        - tuple: s390x-unknown-linux-gnu
-          os: ubuntu-latest
-        - tuple: i586-unknown-linux-gnu
-          os: ubuntu-latest
-        - tuple: nvptx64-nvidia-cuda
-          os: ubuntu-latest
-        - tuple: thumbv6m-none-eabi
-          os: ubuntu-latest
-        - tuple: thumbv7m-none-eabi
-          os: ubuntu-latest
-        - tuple: thumbv7em-none-eabi
-          os: ubuntu-latest
-        - tuple: thumbv7em-none-eabihf
-          os: ubuntu-latest
-        - tuple: loongarch64-unknown-linux-gnu
-          os: ubuntu-latest
-        - tuple: wasm32-wasip1
-          os: ubuntu-latest
+        # tuple: s390x-unknown-linux-gnu
+        # os: ubuntu-latest
+        # tuple: i586-unknown-linux-gnu
+        # os: ubuntu-latest
+        # tuple: nvptx64-nvidia-cuda
+        # os: ubuntu-latest
+        # tuple: amdgcn-amd-amdhsa
+        # os: ubuntu-latest
+        # tuple: thumbv6m-none-eabi
+        # os: ubuntu-latest
+        # tuple: thumbv7m-none-eabi
+        # os: ubuntu-latest
+        # tuple: thumbv7em-none-eabi
+        # os: ubuntu-latest
+        # tuple: thumbv7em-none-eabihf
+        # os: ubuntu-latest
+        # tuple: loongarch64-unknown-linux-gnu
+        # os: ubuntu-latest
+        # hexagon doesn't build at the moment due to a libc issue.
+        # - tuple: hexagon-unknown-linux-musl
+        #   os: ubuntu-latest
+        # tuple: wasm32-wasip1
+        # os: ubuntu-latest
 
         # macOS targets
-        - tuple: x86_64-apple-darwin
-          os: macos-15-large
-        - tuple: x86_64-apple-ios-macabi
-          os: macos-15-large
-        - tuple: aarch64-apple-darwin
-          os: macos-15
-        - tuple: aarch64-apple-ios-macabi
-          os: macos-15
+        # tuple: x86_64-apple-darwin
+        # os: macos-15-intel
+        # tuple: x86_64-apple-ios-macabi
+        # os: macos-15-intel
+        # tuple: aarch64-apple-darwin
+        # os: macos-15
+        # tuple: aarch64-apple-ios-macabi
+        # os: macos-15
         # FIXME: gh-actions build environment doesn't have linker support
         # - tuple: i686-apple-darwin
         #   os: macos-13
 
         # Windows targets
-        - tuple: x86_64-pc-windows-msvc
-          os: windows-2025
-        - tuple: i686-pc-windows-msvc
-          os: windows-2025
+        # tuple: x86_64-pc-windows-msvc
+        # os: windows-2025
+        # tuple: i686-pc-windows-msvc
+        # os: windows-2025
         - tuple: aarch64-pc-windows-msvc
           os: windows-11-arm
-        - tuple: arm64ec-pc-windows-msvc
-          os: windows-11-arm
-        - tuple: x86_64-pc-windows-gnu
-          os: windows-2025
+        # tuple: arm64ec-pc-windows-msvc
+        # os: windows-11-arm
+        # tuple: x86_64-pc-windows-gnu
+        # os: windows-2025
         # - tuple: i686-pc-windows-gnu
         #   os: windows-latest
 
         # Add additional variables to the matrix variations generated above using `include`:
-        include:
+        #nclude:
         # `TEST_EVERYTHING` setups - there should be at least 1 for each architecture
-        - target:
-            tuple: aarch64-unknown-linux-gnu
-            os: ubuntu-latest
-          test_everything: true
-        - target:
-            tuple: aarch64_be-unknown-linux-gnu
-            os: ubuntu-latest
-          test_everything: true
-          build_std: true
-        - target:
-            tuple: armv7-unknown-linux-gnueabihf
-            os: ubuntu-latest
-          test_everything: true
-        - target:
-            tuple: loongarch64-unknown-linux-gnu
-            os: ubuntu-latest
-          test_everything: true
-        - target:
-            tuple: powerpc-unknown-linux-gnu
-            os: ubuntu-latest
-          disable_assert_instr: true
-          test_everything: true
-        - target:
-            tuple: powerpc64-unknown-linux-gnu
-            os: ubuntu-latest
-          disable_assert_instr: true
-          test_everything: true
-        - target:
-            tuple: powerpc64le-unknown-linux-gnu
-            os: ubuntu-latest
-          test_everything: true
-        - target:
-            tuple: riscv32gc-unknown-linux-gnu
-            os: ubuntu-latest
-          test_everything: true
-          build_std: true
-        - target:
-            tuple: riscv64gc-unknown-linux-gnu
-            os: ubuntu-latest
-          test_everything: true
-        - target:
-            tuple: s390x-unknown-linux-gnu
-            os: ubuntu-latest
-          test_everything: true
-        - target:
-            tuple: x86_64-unknown-linux-gnu
-            os: ubuntu-latest
-          test_everything: true
+        # target:
+        #   tuple: aarch64-unknown-linux-gnu
+        #   os: ubuntu-latest
+        # test_everything: true
+        # target:
+        #   tuple: aarch64_be-unknown-linux-gnu
+        #   os: ubuntu-latest
+        # test_everything: true
+        # build_std: true
+        # target:
+        #   tuple: armv7-unknown-linux-gnueabihf
+        #   os: ubuntu-latest
+        # test_everything: true
+        # target:
+        #   tuple: loongarch64-unknown-linux-gnu
+        #   os: ubuntu-latest
+        # test_everything: true
+        # target:
+        #   tuple: powerpc-unknown-linux-gnu
+        #   os: ubuntu-latest
+        # disable_assert_instr: true
+        # test_everything: true
+        # target:
+        #   tuple: powerpc64-unknown-linux-gnu
+        #   os: ubuntu-latest
+        # disable_assert_instr: true
+        # test_everything: true
+        # target:
+        #   tuple: powerpc64le-unknown-linux-gnu
+        #   os: ubuntu-latest
+        # test_everything: true
+        # target:
+        #   tuple: riscv32gc-unknown-linux-gnu
+        #   os: ubuntu-latest
+        # test_everything: true
+        # build_std: true
+        # target:
+        #   tuple: riscv64gc-unknown-linux-gnu
+        #   os: ubuntu-latest
+        # test_everything: true
+        # target:
+        #   tuple: s390x-unknown-linux-gnu
+        #   os: ubuntu-latest
+        # test_everything: true
+        # target:
+        #   tuple: x86_64-unknown-linux-gnu
+        #   os: ubuntu-latest
+        # test_everything: true
         # MIPS targets disabled since they are dropped to tier 3.
         # See https://github.com/rust-lang/compiler-team/issues/648
         #- target:
@@ -193,17 +197,27 @@ jobs:
         #    tuple: mipsel-unknown-linux-musl
         #    os: ubuntu-latest
         #  norun: true
-        - target:
-            tuple: aarch64-apple-darwin
-            os: macos-15
-          norun: true # https://github.com/rust-lang/stdarch/issues/1206
-        - target:
-            tuple: aarch64-apple-ios-macabi
-            os: macos-15
-          norun: true # https://github.com/rust-lang/stdarch/issues/1206
+        # target:
+        #   tuple: aarch64-apple-darwin
+        #   os: macos-15
+        # norun: true # https://github.com/rust-lang/stdarch/issues/1206
+        # target:
+        #   tuple: aarch64-apple-ios-macabi
+        #   os: macos-15
+        # norun: true # https://github.com/rust-lang/stdarch/issues/1206
+        # target:
+        #   tuple: amdgcn-amd-amdhsa
+        #   os: ubuntu-latest
+        # norun: true
+        # hexagon doesn't build at the moment due to a libc issue.
+        # - target:
+        #     tuple: hexagon-unknown-linux-musl
+        #     os: ubuntu-latest
+        #   norun: true
+        #   build_std: true
 
     steps:
-    - uses: actions/checkout@v4
+    - uses: actions/checkout@v6
     - name: Install Rust
       run: |
         rustup update nightly --no-self-update
@@ -212,15 +226,20 @@ jobs:
 
     - run: rustup target add ${{ matrix.target.tuple }}
       shell: bash
-      if: matrix.build_std == ''
+      if: matrix.build_std == '' && matrix.target.tuple != 'amdgcn-amd-amdhsa'
     - run: |
         rustup component add rust-src
         echo "CARGO_UNSTABLE_BUILD_STD=std" >> $GITHUB_ENV
       shell: bash
       if: matrix.build_std != ''
+    - run: |
+        rustup component add rust-src
+        echo "CARGO_UNSTABLE_BUILD_STD=core,alloc" >> $GITHUB_ENV
+      shell: bash
+      if: matrix.target.tuple == 'amdgcn-amd-amdhsa'
 
     # Configure some env vars based on matrix configuration
-    - run: echo "PROFILE=--profile=${{matrix.profile}}" >> $GITHUB_ENV
+    - run: echo "PROFILE=${{matrix.profile}}" >> $GITHUB_ENV
       shell: bash
     - run: echo "NORUN=1" >> $GITHUB_ENV
       shell: bash
@@ -233,7 +252,7 @@ jobs:
       if: matrix.disable_assert_instr != ''
     - run: echo "NOSTD=1" >> $GITHUB_ENV
       shell: bash
-      if: startsWith(matrix.target.tuple, 'thumb') || matrix.target.tuple == 'nvptx64-nvidia-cuda'
+      if: startsWith(matrix.target.tuple, 'thumb') || matrix.target.tuple == 'nvptx64-nvidia-cuda' || matrix.target.tuple == 'amdgcn-amd-amdhsa'
 
     # Windows & OSX go straight to `run.sh` ...
     - run: ./ci/run.sh
@@ -249,72 +268,94 @@ jobs:
       env:
         TARGET: ${{ matrix.target.tuple }}
 
-  intrinsic-test:
-    needs: [style]
-    name: Intrinsic Test
-    runs-on: ubuntu-latest 
-    strategy:
-      matrix:
-        target:
-          - aarch64-unknown-linux-gnu
-          - aarch64_be-unknown-linux-gnu
-          - armv7-unknown-linux-gnueabihf
-          - arm-unknown-linux-gnueabihf
-          - x86_64-unknown-linux-gnu
-        profile: [dev, release]
-        include:
-          - target: aarch64_be-unknown-linux-gnu
-            build_std: true
-
-    steps:
-    - uses: actions/checkout@v4
-    - name: Install Rust
-      run: |
-        rustup update nightly --no-self-update
-        rustup default nightly
-    - run: rustup target add ${{ matrix.target }}
-      if: ${{ (matrix.build_std || false) == false }}
-    - run: |
-        rustup component add rust-src
-        echo "CARGO_UNSTABLE_BUILD_STD=std" >> $GITHUB_ENV
-      if: ${{ matrix.build_std }}
-
-    # Configure some env vars based on matrix configuration
-    - run: echo "PROFILE=--profile=${{ matrix.profile }}" >> $GITHUB_ENV
-    - run: ./ci/intrinsic-test-docker.sh ${{ matrix.target }}
-      if: ${{ !startsWith(matrix.target, 'thumb') }}
-      env:
-        TARGET: ${{ matrix.target }}
+  # intrinsic-test:
+  #   needs: [style]
+  #   name: Intrinsic Test
+  #   runs-on: ubuntu-latest
+  #   strategy:
+  #     matrix:
+  #       target:
+  #         - aarch64-unknown-linux-gnu
+  #         - aarch64_be-unknown-linux-gnu
+  #         - armv7-unknown-linux-gnueabihf
+  #         - arm-unknown-linux-gnueabihf
+  #         - x86_64-unknown-linux-gnu
+  #       profile: [dev, release]
+  #       include:
+  #         - target: aarch64_be-unknown-linux-gnu
+  #           build_std: true
+  #
+  #   steps:
+  #   - uses: actions/checkout@v6
+  #   - name: Install Rust
+  #     run: |
+  #       rustup update nightly --no-self-update
+  #       rustup default nightly
+  #   - run: rustup target add ${{ matrix.target }}
+  #     if: ${{ (matrix.build_std || false) == false }}
+  #   - run: |
+  #       rustup component add rust-src
+  #       echo "CARGO_UNSTABLE_BUILD_STD=std" >> $GITHUB_ENV
+  #     if: ${{ matrix.build_std }}
+  #
+  #   # Configure some env vars based on matrix configuration
+  #   - run: echo "PROFILE=${{ matrix.profile }}" >> $GITHUB_ENV
+  #   - run: ./ci/intrinsic-test-docker.sh ${{ matrix.target }}
+  #     if: ${{ !startsWith(matrix.target, 'thumb') }}
+  #     env:
+  #       TARGET: ${{ matrix.target }}
 
   # Check that the generated files agree with the checked-in versions.
-  check-stdarch-gen:
-    needs: [style]
-    name: Check stdarch-gen-{arm, loongarch} output
-    runs-on: ubuntu-latest
-    steps:
-    - uses: actions/checkout@v4
-    - name: Install Rust
-      run: rustup update nightly && rustup default nightly && rustup component add rustfmt
-    - name: Check arm spec
-      run: |
-        cargo run --bin=stdarch-gen-arm --release -- crates/stdarch-gen-arm/spec
-        git diff --exit-code
-    - name: Check lsx.spec
-      run: |
-        cargo run --bin=stdarch-gen-loongarch --release -- crates/stdarch-gen-loongarch/lsx.spec
-        git diff --exit-code
-    - name: Check lasx.spec
-      run: |
-        cargo run --bin=stdarch-gen-loongarch --release -- crates/stdarch-gen-loongarch/lasx.spec
-        git diff --exit-code
+  # check-stdarch-gen:
+  #   needs: [style]
+  #   name: Check stdarch-gen-{arm, loongarch, hexagon} output
+  #   runs-on: ubuntu-latest
+  #   steps:
+  #   - uses: actions/checkout@v6
+  #   - name: Install Rust
+  #     run: rustup update nightly && rustup default nightly && rustup component add rustfmt
+  #   - name: Check arm spec
+  #     run: |
+  #       cargo run --bin=stdarch-gen-arm --release -- crates/stdarch-gen-arm/spec
+  #       git diff --exit-code
+  #   - name: Check lsx.spec
+  #     run: |
+  #       cargo run --bin=stdarch-gen-loongarch --release -- crates/stdarch-gen-loongarch/lsx.spec
+  #       git diff --exit-code
+  #   - name: Check lasx.spec
+  #     run: |
+  #       cargo run --bin=stdarch-gen-loongarch --release -- crates/stdarch-gen-loongarch/lasx.spec
+  #       git diff --exit-code
+  #   - name: Check hexagon
+  #     run: |
+  #       cargo run -p stdarch-gen-hexagon --release
+  #       git diff --exit-code
+  #
+  # Run some tests with Miri. Most stdarch functions use platform-specific intrinsics
+  # that Miri does not support. Also Miri is reltively slow.
+  #
+  # Below we run some tests where Miri might catch UB, for instance on intrinsics that read from
+  # or write to pointers.
+  # miri:
+  #   needs: [style]
+  #   name: Run some tests with miri
+  #   runs-on: ubuntu-latest
+  #   steps:
+  #   - uses: actions/checkout@v6
+  #   - name: Install Rust
+  #     run: rustup update nightly && rustup default nightly && rustup component add miri
+  #   - name: Run miri tests
+  #     env:
+  #       TARGET: "aarch64-unknown-linux-gnu"
+  #       RUSTFLAGS: "-Ctarget-cpu=neoverse-v3"
+  #     run: |
+  #       # read filters and join them with a space.
+  #       FILTERS=$(cat aarch64-miri-tests.txt | tr '\n' ' ')
+  #       cargo miri test -p core_arch --target aarch64-unknown-linux-gnu -- $FILTERS
 
   conclusion:
     needs:
-      - docs
-      - verify
       - test
-      - intrinsic-test
-      - check-stdarch-gen
     runs-on: ubuntu-latest
     # We need to ensure this job does *not* get skipped if its dependencies fail,
     # because a skipped job is considered a success by GitHub. So we have to
diff --git a/.github/workflows/rustc-pull.yml b/.github/workflows/rustc-pull.yml
index 1379bd06b0..d2feb1add6 100644
--- a/.github/workflows/rustc-pull.yml
+++ b/.github/workflows/rustc-pull.yml
@@ -13,10 +13,11 @@ jobs:
     uses: rust-lang/josh-sync/.github/workflows/rustc-pull.yml@main
     with:
       github-app-id: ${{ vars.APP_CLIENT_ID }}
+      pr-author: "workflows-stdarch[bot]"
       # https://rust-lang.zulipchat.com/#narrow/channel/208962-t-libs.2Fstdarch/topic/Subtree.20sync.20automation/with/528461782
       zulip-stream-id: 208962
       zulip-bot-email:  "stdarch-ci-bot@rust-lang.zulipchat.com"
-      pr-base-branch: master
+      pr-base-branch: main
       branch-name: rustc-pull
     secrets:
       zulip-api-token: ${{ secrets.ZULIP_API_TOKEN }}
diff --git a/Cargo.lock b/Cargo.lock
index 70f09adf2c..a1c31fa9f0 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -4,18 +4,18 @@ version = 4
 
 [[package]]
 name = "aho-corasick"
-version = "1.1.3"
+version = "1.1.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916"
+checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301"
 dependencies = [
  "memchr",
 ]
 
 [[package]]
 name = "anstream"
-version = "0.6.20"
+version = "1.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3ae563653d1938f79b1ab1b5e668c87c76a9930414574a6583a7b7e11a8e6192"
+checksum = "824a212faf96e9acacdbd09febd34438f8f711fb84e09a8916013cd7815ca28d"
 dependencies = [
  "anstyle",
  "anstyle-parse",
@@ -28,44 +28,44 @@ dependencies = [
 
 [[package]]
 name = "anstyle"
-version = "1.0.11"
+version = "1.0.14"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "862ed96ca487e809f1c8e5a8447f6ee2cf102f846893800b20cebdf541fc6bbd"
+checksum = "940b3a0ca603d1eade50a4846a2afffd5ef57a9feac2c0e2ec2e14f9ead76000"
 
 [[package]]
 name = "anstyle-parse"
-version = "0.2.7"
+version = "1.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2"
+checksum = "52ce7f38b242319f7cabaa6813055467063ecdc9d355bbb4ce0c68908cd8130e"
 dependencies = [
  "utf8parse",
 ]
 
 [[package]]
 name = "anstyle-query"
-version = "1.1.4"
+version = "1.1.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9e231f6134f61b71076a3eab506c379d4f36122f2af15a9ff04415ea4c3339e2"
+checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc"
 dependencies = [
- "windows-sys 0.60.2",
+ "windows-sys",
 ]
 
 [[package]]
 name = "anstyle-wincon"
-version = "3.0.10"
+version = "3.0.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3e0633414522a32ffaac8ac6cc8f748e090c5717661fddeea04219e2344f5f2a"
+checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d"
 dependencies = [
  "anstyle",
  "once_cell_polyfill",
- "windows-sys 0.60.2",
+ "windows-sys",
 ]
 
 [[package]]
 name = "anyhow"
-version = "1.0.99"
+version = "1.0.102"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b0674a1ddeecb70197781e945de4b3b8ffb61fa939a5597bcf48503737663100"
+checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c"
 
 [[package]]
 name = "assert-instr-macro"
@@ -84,15 +84,15 @@ checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
 
 [[package]]
 name = "bitflags"
-version = "2.9.4"
+version = "2.11.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2261d10cca569e4643e526d8dc2e62e433cc8aba21ab764233731f8d369bf394"
+checksum = "843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af"
 
 [[package]]
 name = "cc"
-version = "1.2.36"
+version = "1.2.59"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5252b3d2648e5eedbc1a6f501e3c795e07025c1e93bbf8bbdd6eef7f447a6d54"
+checksum = "b7a4d3ec6524d28a329fc53654bbadc9bdd7b0431f5d65f1a56ffb28a1ee5283"
 dependencies = [
  "find-msvc-tools",
  "shlex",
@@ -100,15 +100,15 @@ dependencies = [
 
 [[package]]
 name = "cfg-if"
-version = "1.0.3"
+version = "1.0.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2fd1289c04a9ea8cb22300a459a72a385d7c73d3259e2ed7dcb2af674838cfa9"
+checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
 
 [[package]]
 name = "clap"
-version = "4.5.47"
+version = "4.6.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7eac00902d9d136acd712710d71823fb8ac8004ca445a89e73a41d45aa712931"
+checksum = "b193af5b67834b676abd72466a96c1024e6a6ad978a1f484bd90b85c94041351"
 dependencies = [
  "clap_builder",
  "clap_derive",
@@ -116,9 +116,9 @@ dependencies = [
 
 [[package]]
 name = "clap_builder"
-version = "4.5.47"
+version = "4.6.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2ad9bbf750e73b5884fb8a211a9424a1906c1e156724260fdae972f31d70e1d6"
+checksum = "714a53001bf66416adb0e2ef5ac857140e7dc3a0c48fb28b2f10762fc4b5069f"
 dependencies = [
  "anstream",
  "anstyle",
@@ -128,9 +128,9 @@ dependencies = [
 
 [[package]]
 name = "clap_derive"
-version = "4.5.47"
+version = "4.6.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bbfd7eae0b0f1a6e63d4b13c9c478de77c2eb546fba158ad50b4203dc24b9f9c"
+checksum = "1110bd8a634a1ab8cb04345d8d878267d57c3cf1b38d91b71af6686408bbca6a"
 dependencies = [
  "heck",
  "proc-macro2",
@@ -140,15 +140,15 @@ dependencies = [
 
 [[package]]
 name = "clap_lex"
-version = "0.7.5"
+version = "1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b94f61472cee1439c0b966b47e3aca9ae07e45d070759512cd390ea2bebc6675"
+checksum = "c8d4a3bb8b1e0c1050499d1815f5ab16d04f0959b233085fb31653fbfc9d98f9"
 
 [[package]]
 name = "colorchoice"
-version = "1.0.4"
+version = "1.0.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75"
+checksum = "1d07550c9036bf2ae0c684c4297d503f838287c83c53686d05370d0e139ae570"
 
 [[package]]
 name = "core_arch"
@@ -185,9 +185,9 @@ checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
 
 [[package]]
 name = "darling"
-version = "0.20.11"
+version = "0.23.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fc7f46116c46ff9ab3eb1597a45688b6715c6e628b5c133e288e709a29bcb4ee"
+checksum = "25ae13da2f202d56bd7f91c25fba009e7717a1e4a1cc98a76d844b65ae912e9d"
 dependencies = [
  "darling_core",
  "darling_macro",
@@ -195,11 +195,10 @@ dependencies = [
 
 [[package]]
 name = "darling_core"
-version = "0.20.11"
+version = "0.23.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0d00b9596d185e565c2207a0b01f8bd1a135483d02d9b7b0a54b11da8d53412e"
+checksum = "9865a50f7c335f53564bb694ef660825eb8610e0a53d3e11bf1b0d3df31e03b0"
 dependencies = [
- "fnv",
  "ident_case",
  "proc-macro2",
  "quote",
@@ -209,9 +208,9 @@ dependencies = [
 
 [[package]]
 name = "darling_macro"
-version = "0.20.11"
+version = "0.23.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead"
+checksum = "ac3984ec7bd6cfa798e62b4a642426a5be0e68f9401cfc2a01e3fa9ea2fcdb8d"
 dependencies = [
  "darling_core",
  "quote",
@@ -231,10 +230,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
 
 [[package]]
-name = "env_logger"
-version = "0.8.4"
+name = "env_filter"
+version = "1.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a19187fea3ac7e84da7dacf48de0c45d63c6a76f9490dae389aead16c243fce3"
+checksum = "32e90c2accc4b07a8456ea0debdc2e7587bdd890680d71173a15d4ae604f6eef"
 dependencies = [
  "log",
  "regex",
@@ -253,6 +252,16 @@ dependencies = [
  "termcolor",
 ]
 
+[[package]]
+name = "env_logger"
+version = "0.11.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0621c04f2196ac3f488dd583365b9c09be011a4ab8b9f37248ffcc8f6198b56a"
+dependencies = [
+ "env_filter",
+ "log",
+]
+
 [[package]]
 name = "equivalent"
 version = "1.0.2"
@@ -261,27 +270,41 @@ checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
 
 [[package]]
 name = "find-msvc-tools"
-version = "0.1.1"
+version = "0.1.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7fd99930f64d146689264c637b5af2f0233a933bef0d8570e2526bf9e083192d"
+checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582"
 
 [[package]]
-name = "fnv"
-version = "1.0.7"
+name = "foldhash"
+version = "0.1.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
+checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2"
 
 [[package]]
 name = "getrandom"
-version = "0.2.16"
+version = "0.2.17"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592"
+checksum = "ff2abc00be7fca6ebc474524697ae276ad847ad0a6b3faa4bcb027e9a4614ad0"
 dependencies = [
  "cfg-if",
  "libc",
  "wasi",
 ]
 
+[[package]]
+name = "getrandom"
+version = "0.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0de51e6874e94e7bf76d726fc5d13ba782deca734ff60d5bb2fb2607c7406555"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "r-efi",
+ "rand_core 0.10.0",
+ "wasip2",
+ "wasip3",
+]
+
 [[package]]
 name = "hashbrown"
 version = "0.12.3"
@@ -293,6 +316,15 @@ name = "hashbrown"
 version = "0.15.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1"
+dependencies = [
+ "foldhash",
+]
+
+[[package]]
+name = "hashbrown"
+version = "0.16.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100"
 
 [[package]]
 name = "heck"
@@ -308,9 +340,15 @@ checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c"
 
 [[package]]
 name = "humantime"
-version = "2.2.0"
+version = "2.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9b112acc8b3adf4b107a8ec20977da0273a8c386765a3ec0229bd500a1443f9f"
+checksum = "135b12329e5e3ce057a9f972339ea52bc954fe1e9358ef27f95e89716fbc5424"
+
+[[package]]
+name = "id-arena"
+version = "2.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954"
 
 [[package]]
 name = "ident_case"
@@ -330,12 +368,14 @@ dependencies = [
 
 [[package]]
 name = "indexmap"
-version = "2.11.0"
+version = "2.13.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f2481980430f9f78649238835720ddccc57e52df14ffce1c6f37391d61b563e9"
+checksum = "45a8a2b9cb3e0b0c1803dbb0758ffac5de2f425b23c28f518faabd9d805342ff"
 dependencies = [
  "equivalent",
- "hashbrown 0.15.5",
+ "hashbrown 0.16.1",
+ "serde",
+ "serde_core",
 ]
 
 [[package]]
@@ -357,20 +397,20 @@ dependencies = [
 
 [[package]]
 name = "is-terminal"
-version = "0.4.16"
+version = "0.4.17"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e04d7f318608d35d4b61ddd75cbdaee86b023ebe2bd5a66ee0915f0bf93095a9"
+checksum = "3640c1c38b8e4e43584d8df18be5fc6b0aa314ce6ebf51b53313d4306cca8e46"
 dependencies = [
  "hermit-abi",
  "libc",
- "windows-sys 0.59.0",
+ "windows-sys",
 ]
 
 [[package]]
 name = "is_terminal_polyfill"
-version = "1.70.1"
+version = "1.70.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf"
+checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695"
 
 [[package]]
 name = "itertools"
@@ -383,15 +423,21 @@ dependencies = [
 
 [[package]]
 name = "itoa"
-version = "1.0.15"
+version = "1.0.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682"
+
+[[package]]
+name = "leb128fmt"
+version = "0.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c"
+checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2"
 
 [[package]]
 name = "libc"
-version = "0.2.175"
+version = "0.2.184"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6a82ae493e598baaea5209805c49bbf2ea7de956d50d7da0da1164f9c6d28543"
+checksum = "48f5d2a454e16a5ea0f4ced81bd44e4cfc7bd3a507b61887c99fd3538b28e4af"
 
 [[package]]
 name = "linked-hash-map"
@@ -401,21 +447,21 @@ checksum = "0717cef1bc8b636c6e1c1bbdefc09e6322da8a9321966e8928ef80d20f7f770f"
 
 [[package]]
 name = "log"
-version = "0.4.28"
+version = "0.4.29"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "34080505efa8e45a4b816c349525ebe327ceaa8559756f0356cba97ef3bf7432"
+checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897"
 
 [[package]]
 name = "memchr"
-version = "2.7.6"
+version = "2.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273"
+checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79"
 
 [[package]]
 name = "once_cell_polyfill"
-version = "1.70.1"
+version = "1.70.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a4895175b425cb1f87721b59f0f286c2092bd4af812243672510e1ac53e2e0ad"
+checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe"
 
 [[package]]
 name = "ppv-lite86"
@@ -436,11 +482,21 @@ dependencies = [
  "log",
 ]
 
+[[package]]
+name = "prettyplease"
+version = "0.2.37"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b"
+dependencies = [
+ "proc-macro2",
+ "syn",
+]
+
 [[package]]
 name = "proc-macro2"
-version = "1.0.101"
+version = "1.0.106"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "89ae43fd86e4158d6db51ad8e2b80f313af9cc74f5c0e03ccb87de09998732de"
+checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934"
 dependencies = [
  "unicode-ident",
 ]
@@ -467,24 +523,30 @@ dependencies = [
 
 [[package]]
 name = "quickcheck"
-version = "1.0.3"
+version = "1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "588f6378e4dd99458b60ec275b4477add41ce4fa9f64dcba6f15adccb19b50d6"
+checksum = "95c589f335db0f6aaa168a7cd27b1fc6920f5e1470c804f814d9cd6e62a0f70b"
 dependencies = [
- "env_logger 0.8.4",
+ "env_logger 0.11.10",
  "log",
- "rand",
+ "rand 0.10.0",
 ]
 
 [[package]]
 name = "quote"
-version = "1.0.40"
+version = "1.0.45"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d"
+checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924"
 dependencies = [
  "proc-macro2",
 ]
 
+[[package]]
+name = "r-efi"
+version = "6.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f8dcc9c7d52a811697d2151c701e0d08956f92b0e24136cf4cf27b57a6a0d9bf"
+
 [[package]]
 name = "rand"
 version = "0.8.5"
@@ -493,7 +555,17 @@ checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
 dependencies = [
  "libc",
  "rand_chacha",
- "rand_core",
+ "rand_core 0.6.4",
+]
+
+[[package]]
+name = "rand"
+version = "0.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bc266eb313df6c5c09c1c7b1fbe2510961e5bcd3add930c1e31f7ed9da0feff8"
+dependencies = [
+ "getrandom 0.4.2",
+ "rand_core 0.10.0",
 ]
 
 [[package]]
@@ -503,7 +575,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
 dependencies = [
  "ppv-lite86",
- "rand_core",
+ "rand_core 0.6.4",
 ]
 
 [[package]]
@@ -512,9 +584,15 @@ version = "0.6.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
 dependencies = [
- "getrandom",
+ "getrandom 0.2.17",
 ]
 
+[[package]]
+name = "rand_core"
+version = "0.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0c8d0fd677905edcbeedbf2edb6494d676f0e98d54d5cf9bda0b061cb8fb8aba"
+
 [[package]]
 name = "rayon"
 version = "1.11.0"
@@ -537,9 +615,9 @@ dependencies = [
 
 [[package]]
 name = "regex"
-version = "1.11.2"
+version = "1.12.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "23d7fd106d8c02486a8d64e778353d1cffe08ce79ac2e82f540c86d0facf6912"
+checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276"
 dependencies = [
  "aho-corasick",
  "memchr",
@@ -549,9 +627,9 @@ dependencies = [
 
 [[package]]
 name = "regex-automata"
-version = "0.4.10"
+version = "0.4.14"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6b9458fa0bfeeac22b5ca447c63aaf45f28439a709ccd244698632f9aa6394d6"
+checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f"
 dependencies = [
  "aho-corasick",
  "memchr",
@@ -560,21 +638,21 @@ dependencies = [
 
 [[package]]
 name = "regex-syntax"
-version = "0.8.6"
+version = "0.8.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "caf4aa5b0f434c91fe5c7f1ecb6a5ece2130b02ad2a590589dda5146df959001"
+checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a"
 
 [[package]]
 name = "rustc-demangle"
-version = "0.1.26"
+version = "0.1.27"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "56f7d92ca342cea22a06f2121d944b4fd82af56988c270852495420f961d4ace"
+checksum = "b50b8869d9fc858ce7266cce0194bd74df58b9d0e3f6df3a9fc8eb470d95c09d"
 
 [[package]]
 name = "ryu"
-version = "1.0.20"
+version = "1.0.23"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f"
+checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f"
 
 [[package]]
 name = "same-file"
@@ -587,36 +665,46 @@ dependencies = [
 
 [[package]]
 name = "semver"
-version = "1.0.26"
+version = "1.0.28"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "56e6fa9c48d24d85fb3de5ad847117517440f6beceb7798af16b4a87d616b8d0"
+checksum = "8a7852d02fc848982e0c167ef163aaff9cd91dc640ba85e263cb1ce46fae51cd"
 
 [[package]]
 name = "serde"
-version = "1.0.219"
+version = "1.0.228"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6"
+checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e"
 dependencies = [
+ "serde_core",
  "serde_derive",
 ]
 
 [[package]]
 name = "serde-xml-rs"
-version = "0.8.1"
+version = "0.8.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "53630160a98edebde0123eb4dfd0fce6adff091b2305db3154a9e920206eb510"
+checksum = "cc2215ce3e6a77550b80a1c37251b7d294febaf42e36e21b7b411e0bf54d540d"
 dependencies = [
  "log",
  "serde",
  "thiserror",
- "xml-rs",
+ "xml",
+]
+
+[[package]]
+name = "serde_core"
+version = "1.0.228"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad"
+dependencies = [
+ "serde_derive",
 ]
 
 [[package]]
 name = "serde_derive"
-version = "1.0.219"
+version = "1.0.228"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00"
+checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -625,32 +713,32 @@ dependencies = [
 
 [[package]]
 name = "serde_json"
-version = "1.0.143"
+version = "1.0.149"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d401abef1d108fbd9cbaebc3e46611f4b1021f714a0597a71f41ee463f5f4a5a"
+checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86"
 dependencies = [
  "itoa",
  "memchr",
- "ryu",
  "serde",
+ "serde_core",
+ "zmij",
 ]
 
 [[package]]
 name = "serde_with"
-version = "3.14.0"
+version = "3.18.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f2c45cd61fefa9db6f254525d46e392b852e0e61d9a1fd36e5bd183450a556d5"
+checksum = "dd5414fad8e6907dbdd5bc441a50ae8d6e26151a03b1de04d89a5576de61d01f"
 dependencies = [
- "serde",
- "serde_derive",
+ "serde_core",
  "serde_with_macros",
 ]
 
 [[package]]
 name = "serde_with_macros"
-version = "3.14.0"
+version = "3.18.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "de90945e6565ce0d9a25098082ed4ee4002e047cb59892c318d66821e14bb30f"
+checksum = "d3db8978e608f1fe7357e211969fd9abdcae80bac1ba7a3369bb7eb6b404eb65"
 dependencies = [
  "darling",
  "proc-macro2",
@@ -699,11 +787,25 @@ dependencies = [
  "walkdir",
 ]
 
+[[package]]
+name = "stdarch-gen-hexagon"
+version = "0.1.0"
+dependencies = [
+ "regex",
+]
+
+[[package]]
+name = "stdarch-gen-hexagon-scalar"
+version = "0.1.0"
+dependencies = [
+ "regex",
+]
+
 [[package]]
 name = "stdarch-gen-loongarch"
 version = "0.1.0"
 dependencies = [
- "rand",
+ "rand 0.8.5",
 ]
 
 [[package]]
@@ -736,7 +838,7 @@ version = "0.0.0"
 dependencies = [
  "core_arch",
  "quickcheck",
- "rand",
+ "rand 0.8.5",
 ]
 
 [[package]]
@@ -747,9 +849,9 @@ checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
 
 [[package]]
 name = "syn"
-version = "2.0.106"
+version = "2.0.117"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ede7c438028d4436d71104916910f5bb611972c5cfd7f89b8300a8186e6fada6"
+checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -773,18 +875,18 @@ dependencies = [
 
 [[package]]
 name = "thiserror"
-version = "1.0.69"
+version = "2.0.18"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52"
+checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4"
 dependencies = [
  "thiserror-impl",
 ]
 
 [[package]]
 name = "thiserror-impl"
-version = "1.0.69"
+version = "2.0.18"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1"
+checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -793,9 +895,15 @@ dependencies = [
 
 [[package]]
 name = "unicode-ident"
-version = "1.0.18"
+version = "1.0.24"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75"
+
+[[package]]
+name = "unicode-xid"
+version = "0.2.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512"
+checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853"
 
 [[package]]
 name = "utf8parse"
@@ -820,194 +928,196 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b"
 
 [[package]]
-name = "wasmparser"
-version = "0.235.0"
+name = "wasip2"
+version = "1.0.2+wasi-0.2.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "161296c618fa2d63f6ed5fffd1112937e803cb9ec71b32b01a76321555660917"
+checksum = "9517f9239f02c069db75e65f174b3da828fe5f5b945c4dd26bd25d89c03ebcf5"
 dependencies = [
- "bitflags",
- "indexmap 2.11.0",
- "semver",
+ "wit-bindgen",
 ]
 
 [[package]]
-name = "wasmprinter"
-version = "0.235.0"
+name = "wasip3"
+version = "0.4.0+wasi-0.3.0-rc-2026-01-06"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "75aa8e9076de6b9544e6dab4badada518cca0bf4966d35b131bbd057aed8fa0a"
+checksum = "5428f8bf88ea5ddc08faddef2ac4a67e390b88186c703ce6dbd955e1c145aca5"
 dependencies = [
- "anyhow",
- "termcolor",
- "wasmparser",
+ "wit-bindgen",
 ]
 
 [[package]]
-name = "winapi-util"
-version = "0.1.10"
+name = "wasm-encoder"
+version = "0.244.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0978bf7171b3d90bac376700cb56d606feb40f251a475a5d6634613564460b22"
+checksum = "990065f2fe63003fe337b932cfb5e3b80e0b4d0f5ff650e6985b1048f62c8319"
 dependencies = [
- "windows-sys 0.60.2",
+ "leb128fmt",
+ "wasmparser 0.244.0",
 ]
 
 [[package]]
-name = "windows-link"
-version = "0.1.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5e6ad25900d524eaabdbbb96d20b4311e1e7ae1699af4fb28c17ae66c80d798a"
-
-[[package]]
-name = "windows-sys"
-version = "0.59.0"
+name = "wasm-metadata"
+version = "0.244.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
+checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909"
 dependencies = [
- "windows-targets 0.52.6",
+ "anyhow",
+ "indexmap 2.13.1",
+ "wasm-encoder",
+ "wasmparser 0.244.0",
 ]
 
 [[package]]
-name = "windows-sys"
-version = "0.60.2"
+name = "wasmparser"
+version = "0.235.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb"
+checksum = "161296c618fa2d63f6ed5fffd1112937e803cb9ec71b32b01a76321555660917"
 dependencies = [
- "windows-targets 0.53.3",
+ "bitflags",
+ "indexmap 2.13.1",
+ "semver",
 ]
 
 [[package]]
-name = "windows-targets"
-version = "0.52.6"
+name = "wasmparser"
+version = "0.244.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
+checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe"
 dependencies = [
- "windows_aarch64_gnullvm 0.52.6",
- "windows_aarch64_msvc 0.52.6",
- "windows_i686_gnu 0.52.6",
- "windows_i686_gnullvm 0.52.6",
- "windows_i686_msvc 0.52.6",
- "windows_x86_64_gnu 0.52.6",
- "windows_x86_64_gnullvm 0.52.6",
- "windows_x86_64_msvc 0.52.6",
+ "bitflags",
+ "hashbrown 0.15.5",
+ "indexmap 2.13.1",
+ "semver",
 ]
 
 [[package]]
-name = "windows-targets"
-version = "0.53.3"
+name = "wasmprinter"
+version = "0.235.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d5fe6031c4041849d7c496a8ded650796e7b6ecc19df1a431c1a363342e5dc91"
+checksum = "75aa8e9076de6b9544e6dab4badada518cca0bf4966d35b131bbd057aed8fa0a"
 dependencies = [
- "windows-link",
- "windows_aarch64_gnullvm 0.53.0",
- "windows_aarch64_msvc 0.53.0",
- "windows_i686_gnu 0.53.0",
- "windows_i686_gnullvm 0.53.0",
- "windows_i686_msvc 0.53.0",
- "windows_x86_64_gnu 0.53.0",
- "windows_x86_64_gnullvm 0.53.0",
- "windows_x86_64_msvc 0.53.0",
+ "anyhow",
+ "termcolor",
+ "wasmparser 0.235.0",
 ]
 
 [[package]]
-name = "windows_aarch64_gnullvm"
-version = "0.52.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
-
-[[package]]
-name = "windows_aarch64_gnullvm"
-version = "0.53.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "86b8d5f90ddd19cb4a147a5fa63ca848db3df085e25fee3cc10b39b6eebae764"
-
-[[package]]
-name = "windows_aarch64_msvc"
-version = "0.52.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
-
-[[package]]
-name = "windows_aarch64_msvc"
-version = "0.53.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c7651a1f62a11b8cbd5e0d42526e55f2c99886c77e007179efff86c2b137e66c"
-
-[[package]]
-name = "windows_i686_gnu"
-version = "0.52.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
-
-[[package]]
-name = "windows_i686_gnu"
-version = "0.53.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c1dc67659d35f387f5f6c479dc4e28f1d4bb90ddd1a5d3da2e5d97b42d6272c3"
-
-[[package]]
-name = "windows_i686_gnullvm"
-version = "0.52.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
-
-[[package]]
-name = "windows_i686_gnullvm"
-version = "0.53.0"
+name = "winapi-util"
+version = "0.1.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9ce6ccbdedbf6d6354471319e781c0dfef054c81fbc7cf83f338a4296c0cae11"
+checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22"
+dependencies = [
+ "windows-sys",
+]
 
 [[package]]
-name = "windows_i686_msvc"
-version = "0.52.6"
+name = "windows-link"
+version = "0.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
+checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
 
 [[package]]
-name = "windows_i686_msvc"
-version = "0.53.0"
+name = "windows-sys"
+version = "0.61.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "581fee95406bb13382d2f65cd4a908ca7b1e4c2f1917f143ba16efe98a589b5d"
+checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc"
+dependencies = [
+ "windows-link",
+]
 
 [[package]]
-name = "windows_x86_64_gnu"
-version = "0.52.6"
+name = "wit-bindgen"
+version = "0.51.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
+checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5"
+dependencies = [
+ "wit-bindgen-rust-macro",
+]
 
 [[package]]
-name = "windows_x86_64_gnu"
-version = "0.53.0"
+name = "wit-bindgen-core"
+version = "0.51.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2e55b5ac9ea33f2fc1716d1742db15574fd6fc8dadc51caab1c16a3d3b4190ba"
+checksum = "ea61de684c3ea68cb082b7a88508a8b27fcc8b797d738bfc99a82facf1d752dc"
+dependencies = [
+ "anyhow",
+ "heck",
+ "wit-parser",
+]
 
 [[package]]
-name = "windows_x86_64_gnullvm"
-version = "0.52.6"
+name = "wit-bindgen-rust"
+version = "0.51.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
+checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21"
+dependencies = [
+ "anyhow",
+ "heck",
+ "indexmap 2.13.1",
+ "prettyplease",
+ "syn",
+ "wasm-metadata",
+ "wit-bindgen-core",
+ "wit-component",
+]
 
 [[package]]
-name = "windows_x86_64_gnullvm"
-version = "0.53.0"
+name = "wit-bindgen-rust-macro"
+version = "0.51.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0a6e035dd0599267ce1ee132e51c27dd29437f63325753051e71dd9e42406c57"
+checksum = "0c0f9bfd77e6a48eccf51359e3ae77140a7f50b1e2ebfe62422d8afdaffab17a"
+dependencies = [
+ "anyhow",
+ "prettyplease",
+ "proc-macro2",
+ "quote",
+ "syn",
+ "wit-bindgen-core",
+ "wit-bindgen-rust",
+]
 
 [[package]]
-name = "windows_x86_64_msvc"
-version = "0.52.6"
+name = "wit-component"
+version = "0.244.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
+checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2"
+dependencies = [
+ "anyhow",
+ "bitflags",
+ "indexmap 2.13.1",
+ "log",
+ "serde",
+ "serde_derive",
+ "serde_json",
+ "wasm-encoder",
+ "wasm-metadata",
+ "wasmparser 0.244.0",
+ "wit-parser",
+]
 
 [[package]]
-name = "windows_x86_64_msvc"
-version = "0.53.0"
+name = "wit-parser"
+version = "0.244.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486"
+checksum = "ecc8ac4bc1dc3381b7f59c34f00b67e18f910c2c0f50015669dde7def656a736"
+dependencies = [
+ "anyhow",
+ "id-arena",
+ "indexmap 2.13.1",
+ "log",
+ "semver",
+ "serde",
+ "serde_derive",
+ "serde_json",
+ "unicode-xid",
+ "wasmparser 0.244.0",
+]
 
 [[package]]
-name = "xml-rs"
-version = "0.8.27"
+name = "xml"
+version = "1.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6fd8403733700263c6eb89f192880191f1b83e332f7a20371ddcf421c4a337c7"
+checksum = "b8aa498d22c9bbaf482329839bc5620c46be275a19a812e9a22a2b07529a642a"
 
 [[package]]
 name = "yaml-rust"
@@ -1020,20 +1130,26 @@ dependencies = [
 
 [[package]]
 name = "zerocopy"
-version = "0.8.27"
+version = "0.8.48"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0894878a5fa3edfd6da3f88c4805f4c8558e2b996227a3d864f47fe11e38282c"
+checksum = "eed437bf9d6692032087e337407a86f04cd8d6a16a37199ed57949d415bd68e9"
 dependencies = [
  "zerocopy-derive",
 ]
 
 [[package]]
 name = "zerocopy-derive"
-version = "0.8.27"
+version = "0.8.48"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "88d2b8d9c68ad2b9e4340d7832716a4d21a22a1154777ad56ea55c51a9cf3831"
+checksum = "70e3cd084b1788766f53af483dd21f93881ff30d7320490ec3ef7526d203bad4"
 dependencies = [
  "proc-macro2",
  "quote",
  "syn",
 ]
+
+[[package]]
+name = "zmij"
+version = "1.0.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa"
diff --git a/Cargo.toml b/Cargo.toml
index 5979096439..e3963a6987 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,5 +1,5 @@
 [workspace]
-resolver = "1"
+resolver = "3"
 members = [
   "crates/*",
   "examples",
diff --git a/aarch64-miri-tests.txt b/aarch64-miri-tests.txt
new file mode 100644
index 0000000000..2c0dbb8297
--- /dev/null
+++ b/aarch64-miri-tests.txt
@@ -0,0 +1,4 @@
+test_vld3
+test_vld4
+neon::load_tests
+neon::store_tests
diff --git a/ci/docker/aarch64-unknown-linux-gnu/Dockerfile b/ci/docker/aarch64-unknown-linux-gnu/Dockerfile
index 70c0650975..8435dd3ded 100644
--- a/ci/docker/aarch64-unknown-linux-gnu/Dockerfile
+++ b/ci/docker/aarch64-unknown-linux-gnu/Dockerfile
@@ -10,10 +10,15 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
   qemu-user \
   make \
   file \
-  clang \
-  lld
+  xz-utils \
+  wget
+
+RUN wget https://mirrors.edge.kernel.org/pub/tools/llvm/files/llvm-22.1.4-x86_64.tar.gz -O llvm.tar.xz
+RUN mkdir llvm
+RUN tar -xvf llvm.tar.xz --strip-components=1 -C llvm
+
+ENV PATH="/llvm/bin:$PATH"
 
 ENV CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER=aarch64-linux-gnu-gcc \
     CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_RUNNER="qemu-aarch64 -cpu max -L /usr/aarch64-linux-gnu" \
-    OBJDUMP=aarch64-linux-gnu-objdump \
-    STDARCH_TEST_SKIP_FEATURE=tme
+    OBJDUMP=aarch64-linux-gnu-objdump
diff --git a/ci/docker/aarch64_be-unknown-linux-gnu/Dockerfile b/ci/docker/aarch64_be-unknown-linux-gnu/Dockerfile
index 56ddbd990b..0e8efc64bb 100644
--- a/ci/docker/aarch64_be-unknown-linux-gnu/Dockerfile
+++ b/ci/docker/aarch64_be-unknown-linux-gnu/Dockerfile
@@ -9,10 +9,9 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
   qemu-user \
   make \
   file \
-  clang \
   curl \
   xz-utils \
-  lld
+  wget
 
 ENV TOOLCHAIN="arm-gnu-toolchain-14.3.rel1-x86_64-aarch64_be-none-linux-gnu"
 
@@ -21,10 +20,15 @@ RUN curl -L "https://developer.arm.com/-/media/Files/downloads/gnu/14.3.rel1/bin
 RUN tar -xvf "${TOOLCHAIN}.tar.xz"
 RUN mkdir /toolchains && mv "./${TOOLCHAIN}" /toolchains
 
+RUN wget https://mirrors.edge.kernel.org/pub/tools/llvm/files/llvm-22.1.4-x86_64.tar.gz -O llvm.tar.xz
+RUN mkdir llvm
+RUN tar -xvf llvm.tar.xz --strip-components=1 -C llvm
+
+ENV PATH="/llvm/bin:$PATH"
+
 ENV AARCH64_BE_TOOLCHAIN="/toolchains/${TOOLCHAIN}"
 ENV AARCH64_BE_LIBC="${AARCH64_BE_TOOLCHAIN}/aarch64_be-none-linux-gnu/libc"
 
 ENV CARGO_TARGET_AARCH64_BE_UNKNOWN_LINUX_GNU_LINKER="${AARCH64_BE_TOOLCHAIN}/bin/aarch64_be-none-linux-gnu-gcc"
 ENV CARGO_TARGET_AARCH64_BE_UNKNOWN_LINUX_GNU_RUNNER="qemu-aarch64_be -cpu max -L ${AARCH64_BE_LIBC}"
 ENV OBJDUMP="${AARCH64_BE_TOOLCHAIN}/bin/aarch64_be-none-linux-gnu-objdump"
-ENV STDARCH_TEST_SKIP_FEATURE=tme
diff --git a/ci/docker/amdgcn-amd-amdhsa/Dockerfile b/ci/docker/amdgcn-amd-amdhsa/Dockerfile
new file mode 100644
index 0000000000..65cf281b14
--- /dev/null
+++ b/ci/docker/amdgcn-amd-amdhsa/Dockerfile
@@ -0,0 +1,5 @@
+FROM ubuntu:25.10
+RUN apt-get update && apt-get install -y --no-install-recommends \
+  gcc \
+  libc6-dev \
+  ca-certificates
diff --git a/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile b/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile
index 602249c0ec..c0a4ed3e70 100644
--- a/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile
+++ b/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile
@@ -10,8 +10,14 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
   qemu-user \
   make \
   file \
-  clang \
-  lld
+  wget
+
+RUN wget https://mirrors.edge.kernel.org/pub/tools/llvm/files/llvm-22.1.4-x86_64.tar.gz -O llvm.tar.xz
+RUN mkdir llvm
+RUN tar -xvf llvm.tar.xz --strip-components=1 -C llvm
+
+ENV PATH="/llvm/bin:$PATH"
+
 ENV CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_LINKER=arm-linux-gnueabihf-gcc \
     CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_RUNNER="qemu-arm -cpu max -L /usr/arm-linux-gnueabihf" \
     OBJDUMP=arm-linux-gnueabihf-objdump
diff --git a/ci/docker/hexagon-unknown-linux-musl/Dockerfile b/ci/docker/hexagon-unknown-linux-musl/Dockerfile
new file mode 100644
index 0000000000..f6c0efd946
--- /dev/null
+++ b/ci/docker/hexagon-unknown-linux-musl/Dockerfile
@@ -0,0 +1,46 @@
+FROM ubuntu:25.10
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+  gcc \
+  libc6-dev \
+  ca-certificates \
+  curl \
+  zstd \
+  file \
+  make \
+  libc++1 \
+  libglib2.0-0t64 \
+  libunwind-20 \
+  liburing2 \
+  llvm
+
+# The Hexagon toolchain requires libc++ and libunwind at runtime - create symlinks from versioned files
+RUN cd /usr/lib/x86_64-linux-gnu && \
+    for f in libc++.so.1.0.*; do ln -sf "$f" libc++.so.1; done && \
+    for f in libc++abi.so.1.0.*; do ln -sf "$f" libc++abi.so.1; done && \
+    for f in libunwind.so.1.0.*; do ln -sf "$f" libunwind.so.1; done
+
+# Download and install the Hexagon cross toolchain from
+# https://github.com/quic/toolchain_for_hexagon/releases/tag/v21.1.8
+# Includes clang cross-compiler, musl sysroot, and qemu-hexagon.
+#
+# The tarball contains directories with restrictive (0700) permissions.
+# In rootless Podman, chmod fails on tar-extracted files within the same
+# layer due to overlayfs limitations in user namespaces. Splitting into
+# two RUN steps lets chmod work via overlayfs copy-up from the lower layer.
+RUN curl -L -o /tmp/hexagon-toolchain.tar.zst \
+    https://artifacts.codelinaro.org/artifactory/codelinaro-toolchain-for-hexagon/21.1.8/clang+llvm-21.1.8-cross-hexagon-unknown-linux-musl.tar.zst && \
+    mkdir -p /opt/hexagon-toolchain && \
+    cd /opt/hexagon-toolchain && \
+    (unzstd -c /tmp/hexagon-toolchain.tar.zst | tar -xf - --strip-components=2 --no-same-permissions || true) && \
+    rm /tmp/hexagon-toolchain.tar.zst
+RUN find /opt/hexagon-toolchain -type d -exec chmod a+rx {} + 2>/dev/null; \
+    find /opt/hexagon-toolchain -type f -exec chmod a+r {} + 2>/dev/null; \
+    find /opt/hexagon-toolchain -type f -perm /111 -exec chmod a+rx {} + 2>/dev/null; \
+    /opt/hexagon-toolchain/bin/hexagon-unknown-linux-musl-clang --version
+
+ENV PATH="/opt/hexagon-toolchain/bin:${PATH}" \
+    CARGO_TARGET_HEXAGON_UNKNOWN_LINUX_MUSL_LINKER=hexagon-unknown-linux-musl-clang \
+    CARGO_TARGET_HEXAGON_UNKNOWN_LINUX_MUSL_RUNNER="qemu-hexagon -L /opt/hexagon-toolchain/target/hexagon-unknown-linux-musl" \
+    CARGO_UNSTABLE_BUILD_STD_FEATURES=llvm-libunwind \
+    OBJDUMP=llvm-objdump
diff --git a/ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile b/ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile
index a8b352881e..8bcd640945 100644
--- a/ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile
+++ b/ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile
@@ -1,4 +1,4 @@
-FROM ubuntu:25.10
+FROM ubuntu:25.04 # gcc-mips64-linux-gnuabi64 not available in 25.10
 
 RUN apt-get update && apt-get install -y --no-install-recommends \
         gcc libc6-dev qemu-user ca-certificates \
diff --git a/ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile b/ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile
index 147a3df614..9aa0ce0578 100644
--- a/ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile
+++ b/ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile
@@ -1,4 +1,4 @@
-FROM ubuntu:25.10
+FROM ubuntu:25.04 # gcc-mips64el-linux-gnuabi64 not available in 25.10
 
 RUN apt-get update && apt-get install -y --no-install-recommends \
         gcc libc6-dev qemu-user ca-certificates \
diff --git a/ci/docker/wasm32-wasip1/Dockerfile b/ci/docker/wasm32-wasip1/Dockerfile
index 0527c0df17..cb4a9b2948 100644
--- a/ci/docker/wasm32-wasip1/Dockerfile
+++ b/ci/docker/wasm32-wasip1/Dockerfile
@@ -11,5 +11,3 @@ ENV VERSION=v38.0.3
 
 RUN curl -L https://github.com/bytecodealliance/wasmtime/releases/download/${VERSION}/wasmtime-${VERSION}-x86_64-linux.tar.xz | tar xJf -
 ENV PATH=$PATH:/wasmtime-${VERSION}-x86_64-linux
-
-ENV CARGO_TARGET_WASM32_WASIP1_RUNNER="wasmtime -Wexceptions --dir /checkout/target/wasm32-wasip1/release/deps::."
diff --git a/ci/docker/x86_64-unknown-linux-gnu/Dockerfile b/ci/docker/x86_64-unknown-linux-gnu/Dockerfile
index 2743896375..ca6192a38d 100644
--- a/ci/docker/x86_64-unknown-linux-gnu/Dockerfile
+++ b/ci/docker/x86_64-unknown-linux-gnu/Dockerfile
@@ -12,9 +12,16 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
   build-essential \
   lld
 
-RUN wget http://ci-mirrors.rust-lang.org/stdarch/sde-external-9.58.0-2025-06-16-lin.tar.xz -O sde.tar.xz
+RUN wget http://ci-mirrors.rust-lang.org/sde-external-10.8.0-2026-03-15-lin.tar.xz -O sde.tar.xz
 RUN mkdir intel-sde
 RUN tar -xJf sde.tar.xz --strip-components=1 -C intel-sde
+
+RUN wget https://mirrors.edge.kernel.org/pub/tools/llvm/files/llvm-22.1.4-x86_64.tar.gz -O llvm.tar.xz
+RUN mkdir llvm
+RUN tar -xvf llvm.tar.xz --strip-components=1 -C llvm
+
+ENV PATH="/llvm/bin:$PATH"
+
 ENV CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER="/intel-sde/sde64 \
             -cpuid-in /checkout/ci/docker/x86_64-unknown-linux-gnu/cpuid.def \
             -rtm-mode full -tsx --"
diff --git a/ci/docker/x86_64-unknown-linux-gnu/cpuid.def b/ci/docker/x86_64-unknown-linux-gnu/cpuid.def
index 342f7d83a6..3bd657873e 100644
--- a/ci/docker/x86_64-unknown-linux-gnu/cpuid.def
+++ b/ci/docker/x86_64-unknown-linux-gnu/cpuid.def
@@ -1,4 +1,4 @@
-# Copyright (C) 2017-2025 Intel Corporation.
+# Copyright (C) 2017-2026 Intel Corporation.
 # 
 # This software and the related documents are Intel copyrighted materials, and your
 # use of them is governed by the express license under which they were provided to
@@ -12,7 +12,7 @@
 # CPUID_VERSION = 1.0
 #      Input      =>               Output
 # EAX      ECX    =>   EAX      EBX      ECX      EDX
-00000000 ******** => 00000024 756e6547 6c65746e 49656e69
+00000000 ******** => 00000029 756e6547 6c65746e 49656e69
 00000001 ******** => 00400f10 00100800 7ffaf3ff bfebfbff
 00000002 ******** => 76035a01 00f0b6ff 00000000 00c10000
 00000003 ******** => 00000000 00000000 00000000 00000000
@@ -23,8 +23,9 @@
 00000004 00000004 => 00000000 00000000 00000000 00000000
 00000005 ******** => 00000040 00000040 00000003 00042120 #MONITOR/MWAIT
 00000006 ******** => 00000077 00000002 00000001 00000000 #Thermal and Power
-00000007 00000000 => 00000001 f3bfbfbf bac05ffe 03d54130 #Extended Features
+00000007 00000000 => 00000002 f3bfbfbf bac05ffe 03d54130 #Extended Features
 00000007 00000001 => 98ee00bf 00000002 00000020 1d29cd3e
+00000007 00000002 => 00000000 00000000 00000000 00000010
 00000008 ******** => 00000000 00000000 00000000 00000000
 00000009 ******** => 00000000 00000000 00000000 00000000 #Direct Cache
 0000000a ******** => 07300403 00000000 00000000 00000603
@@ -48,6 +49,7 @@
 0000001e 00000001 => 000001ff 00000000 00000000 00000000
 00000024 00000000 => 00000001 00070002 00000000 00000000 #AVX10
 00000024 00000001 => 00000000 00000000 00000004 00000000
+00000029 ******** => 00000000 00000001 00000000 00000000
 80000000 ******** => 80000008 00000000 00000000 00000000
 80000001 ******** => 00000000 00000000 00000121 2c100000
 80000002 ******** => 00000000 00000000 00000000 00000000
diff --git a/ci/dox.sh b/ci/dox.sh
index 94d76d4304..9803f7e371 100755
--- a/ci/dox.sh
+++ b/ci/dox.sh
@@ -15,6 +15,15 @@ dox() {
 
   cargo clean --target "${1}"
 
+  if [ "${1}" == "amdgcn-amd-amdhsa" ]; then
+    if [ "$CI" != "" ]; then
+      rustup component add rust-src
+    fi
+    export CARGO_UNSTABLE_BUILD_STD=core
+    # amdgpu needs a target-cpu, any is fine
+    export RUSTFLAGS="${RUSTFLAGS} -Ctarget-cpu=gfx900"
+  fi
+
   cargo build --verbose --target "${1}" --manifest-path crates/core_arch/Cargo.toml
   cargo doc --verbose --target "${1}" --manifest-path crates/core_arch/Cargo.toml
 }
@@ -33,6 +42,7 @@ if [ -z "$1" ]; then
   #dox mips64-unknown-linux-gnuabi64
   dox wasm32-unknown-unknown
   dox nvptx64-nvidia-cuda
+  dox amdgcn-amd-amdhsa
 else
   dox "${1}"
 fi
diff --git a/ci/intrinsic-test-docker.sh b/ci/intrinsic-test-docker.sh
index 038fc4678e..beeff42c76 100755
--- a/ci/intrinsic-test-docker.sh
+++ b/ci/intrinsic-test-docker.sh
@@ -30,12 +30,14 @@ run() {
       --env CARGO_HOME=/cargo \
       --env CARGO_TARGET_DIR=/checkout/target \
       --env TARGET="${1}" \
+      --env PROFILE \
       --env "${HOST_LINKER}"="cc" \
       --env STDARCH_DISABLE_ASSERT_INSTR \
       --env NOSTD \
       --env NORUN \
       --env RUSTFLAGS \
       --env CARGO_UNSTABLE_BUILD_STD \
+      --env TEST_SAMPLE_INTRINSICS_PERCENTAGE \
       --volume "${HOME}/.cargo":/cargo \
       --volume "$(rustc --print sysroot)":/rust:ro \
       --volume "$(pwd)":/checkout:ro \
diff --git a/ci/intrinsic-test.sh b/ci/intrinsic-test.sh
index e14a824b2a..89104e2672 100755
--- a/ci/intrinsic-test.sh
+++ b/ci/intrinsic-test.sh
@@ -6,7 +6,7 @@ set -ex
 
 export RUSTFLAGS="${RUSTFLAGS} -D warnings -Z merge-functions=disabled -Z verify-llvm-ir"
 export HOST_RUSTFLAGS="${RUSTFLAGS}"
-export PROFILE="${PROFILE:="--profile=release"}"
+export PROFILE="${PROFILE:="release"}"
 
 case ${TARGET} in
     # On 32-bit use a static relocation model which avoids some extra
@@ -51,13 +51,15 @@ case ${TARGET} in
         TEST_SKIP_INTRINSICS=crates/intrinsic-test/missing_aarch64.txt
         TEST_CXX_COMPILER="clang++"
         TEST_RUNNER="${CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_RUNNER}"
+        : "${TEST_SAMPLE_INTRINSICS_PERCENTAGE:=100}"
         ;;
 
     aarch64_be-unknown-linux-gnu*)
         TEST_CPPFLAGS="-fuse-ld=lld"
-        TEST_SKIP_INTRINSICS=crates/intrinsic-test/missing_aarch64.txt
+        TEST_SKIP_INTRINSICS=crates/intrinsic-test/missing_aarch64_be.txt
         TEST_CXX_COMPILER="clang++"
         TEST_RUNNER="${CARGO_TARGET_AARCH64_BE_UNKNOWN_LINUX_GNU_RUNNER}"
+        : "${TEST_SAMPLE_INTRINSICS_PERCENTAGE:=100}"
         ;;
 
     armv7-unknown-linux-gnueabihf*)
@@ -65,6 +67,7 @@ case ${TARGET} in
         TEST_SKIP_INTRINSICS=crates/intrinsic-test/missing_arm.txt
         TEST_CXX_COMPILER="clang++"
         TEST_RUNNER="${CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_RUNNER}"
+        : "${TEST_SAMPLE_INTRINSICS_PERCENTAGE:=100}"
         ;;
 
     x86_64-unknown-linux-gnu*)
@@ -72,7 +75,7 @@ case ${TARGET} in
         TEST_CXX_COMPILER="clang++"
         TEST_RUNNER="${CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER}"
         TEST_SKIP_INTRINSICS=crates/intrinsic-test/missing_x86.txt
-        TEST_SAMPLE_INTRINSICS_PERCENTAGE=5
+        : "${TEST_SAMPLE_INTRINSICS_PERCENTAGE:=20}"
         ;;
     *)
         ;;
@@ -83,24 +86,28 @@ esac
 case "${TARGET}" in
     aarch64-unknown-linux-gnu*|armv7-unknown-linux-gnueabihf*)
         CPPFLAGS="${TEST_CPPFLAGS}" RUSTFLAGS="${HOST_RUSTFLAGS}" RUST_LOG=warn \
-            cargo run "${INTRINSIC_TEST}" "${PROFILE}" \
+            cargo run "${INTRINSIC_TEST}" --release  \
             --bin intrinsic-test -- intrinsics_data/arm_intrinsics.json \
             --runner "${TEST_RUNNER}" \
             --cppcompiler "${TEST_CXX_COMPILER}" \
             --skip "${TEST_SKIP_INTRINSICS}" \
-            --target "${TARGET}"
+            --target "${TARGET}" \
+            --profile "${PROFILE}" \
+            --sample-percentage "${TEST_SAMPLE_INTRINSICS_PERCENTAGE}"
         ;;
 
     aarch64_be-unknown-linux-gnu*)
         CPPFLAGS="${TEST_CPPFLAGS}" RUSTFLAGS="${HOST_RUSTFLAGS}" RUST_LOG=warn \
-            cargo run "${INTRINSIC_TEST}" "${PROFILE}"  \
+            cargo run "${INTRINSIC_TEST}" --release  \
             --bin intrinsic-test -- intrinsics_data/arm_intrinsics.json \
             --runner "${TEST_RUNNER}" \
             --cppcompiler "${TEST_CXX_COMPILER}" \
             --skip "${TEST_SKIP_INTRINSICS}" \
             --target "${TARGET}" \
+            --profile "${PROFILE}" \
             --linker "${CARGO_TARGET_AARCH64_BE_UNKNOWN_LINUX_GNU_LINKER}" \
-            --cxx-toolchain-dir "${AARCH64_BE_TOOLCHAIN}"
+            --cxx-toolchain-dir "${AARCH64_BE_TOOLCHAIN}" \
+            --sample-percentage "${TEST_SAMPLE_INTRINSICS_PERCENTAGE}"
         ;;
 
     x86_64-unknown-linux-gnu*)
@@ -110,12 +117,13 @@ case "${TARGET}" in
         env -u CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER \
             CPPFLAGS="${TEST_CPPFLAGS}" RUSTFLAGS="${HOST_RUSTFLAGS}" \
             RUST_LOG=warn RUST_BACKTRACE=1 \
-            cargo run "${INTRINSIC_TEST}" "${PROFILE}"  \
+            cargo run "${INTRINSIC_TEST}" --release \
             --bin intrinsic-test -- intrinsics_data/x86-intel.xml \
             --runner "${TEST_RUNNER}" \
             --skip "${TEST_SKIP_INTRINSICS}" \
             --cppcompiler "${TEST_CXX_COMPILER}" \
             --target "${TARGET}" \
+            --profile "${PROFILE}" \
             --sample-percentage "${TEST_SAMPLE_INTRINSICS_PERCENTAGE}"
         ;;
      *)
diff --git a/ci/run-docker.sh b/ci/run-docker.sh
index d7aa50a8c9..28dfd5a24a 100755
--- a/ci/run-docker.sh
+++ b/ci/run-docker.sh
@@ -37,6 +37,7 @@ run() {
       --env NORUN \
       --env RUSTFLAGS \
       --env CARGO_UNSTABLE_BUILD_STD \
+      --env PROFILE \
       --volume "${HOME}/.cargo":/cargo \
       --volume "$(rustc --print sysroot)":/rust:ro \
       --volume "$(pwd)":/checkout:ro \
diff --git a/ci/run.sh b/ci/run.sh
index 2bb77bae25..7939fa06c3 100755
--- a/ci/run.sh
+++ b/ci/run.sh
@@ -12,7 +12,7 @@ set -ex
 
 export RUSTFLAGS="${RUSTFLAGS} -D warnings -Z merge-functions=disabled -Z verify-llvm-ir"
 export HOST_RUSTFLAGS="${RUSTFLAGS}"
-export PROFILE="${PROFILE:="--profile=release"}"
+export PROFILE="${PROFILE:="release"}"
 
 case ${TARGET} in
     # On Windows the linker performs identical COMDAT folding (ICF) by default
@@ -40,13 +40,19 @@ case ${TARGET} in
 	export RUSTFLAGS="${RUSTFLAGS} -C llvm-args=-fast-isel=false"
 	;;
     armv7-*eabihf | thumbv7-*eabihf)
-        export RUSTFLAGS="${RUSTFLAGS} -Ctarget-feature=+neon"
+        export RUSTFLAGS="${RUSTFLAGS} -Ctarget-feature=+neon,+fp16"
+        ;;
+    amdgcn-*)
+        export RUSTFLAGS="${RUSTFLAGS} -Ctarget-cpu=gfx1200"
         ;;
     # Some of our test dependencies use the deprecated `gcc` crates which
     # doesn't detect RISC-V compilers automatically, so do it manually here.
     riscv*)
         export RUSTFLAGS="${RUSTFLAGS} -Ctarget-feature=+zk,+zks,+zbb,+zbc"
         ;;
+    hexagon*)
+        export RUSTFLAGS="${RUSTFLAGS} -Ctarget-feature=+hvxv60,+hvx-length128b"
+        ;;
 esac
 
 echo "RUSTFLAGS=${RUSTFLAGS}"
@@ -63,7 +69,7 @@ cargo_test() {
     if [ "$NORUN" = "1" ]; then
         export subcmd="build"
     fi
-    cmd="$cmd ${subcmd} --target=$TARGET $1"
+    cmd="$cmd ${subcmd} core_arch::aarch64::sve::ld_st_tests --target=$TARGET --profile=$PROFILE $1"
     cmd="$cmd -- $2"
 
     case ${TARGET} in
@@ -71,6 +77,12 @@ cargo_test() {
         # harness isn't trying to capture output, otherwise we won't get any useful
         # output.
         wasm32*)
+            if [ "$PROFILE" = "release" ]; then
+              dir="release"
+            else
+              dir="debug"
+            fi
+            export CARGO_TARGET_WASM32_WASIP1_RUNNER="wasmtime -Wexceptions --dir /checkout/target/wasm32-wasip1/$dir/deps::."
             cmd="$cmd --nocapture"
             ;;
     esac
@@ -80,54 +92,47 @@ cargo_test() {
 CORE_ARCH="--manifest-path=crates/core_arch/Cargo.toml"
 STDARCH_EXAMPLES="--manifest-path=examples/Cargo.toml"
 
-cargo_test "${CORE_ARCH} ${PROFILE}"
+for i in {1..20}; do
+  cargo_test "${CORE_ARCH}"
+done
 
 if [ "$NOSTD" != "1" ]; then
-    cargo_test "${STDARCH_EXAMPLES} ${PROFILE}"
+    cargo_test "${STDARCH_EXAMPLES}"
 fi
 
 
 # Test targets compiled with extra features.
 case ${TARGET} in
-    x86_64-unknown-linux-gnu)
-        export STDARCH_DISABLE_ASSERT_INSTR=1
-
-        export RUSTFLAGS="${RUSTFLAGS} -C target-feature=+avx"
-        cargo_test "${PROFILE}"
-
-        export RUSTFLAGS="${RUSTFLAGS} -C target-feature=+avx512f"
-        cargo_test "${PROFILE}"
-        ;;
     x86_64* | i686*)
         export STDARCH_DISABLE_ASSERT_INSTR=1
 
         export RUSTFLAGS="${RUSTFLAGS} -C target-feature=+avx"
-        cargo_test "${PROFILE}"
+        cargo_test 
         ;;
     # FIXME: don't build anymore
     #mips-*gnu* | mipsel-*gnu*)
     #    export RUSTFLAGS="${RUSTFLAGS} -C target-feature=+msa,+fp64,+mips32r5"
-    #    cargo_test "${PROFILE}"
+    #    cargo_test 
 	  #    ;;
     mips64*)
         export RUSTFLAGS="${RUSTFLAGS} -C target-feature=+msa"
-        cargo_test "${PROFILE}"
+        cargo_test 
 	      ;;
     s390x*)
         export RUSTFLAGS="${RUSTFLAGS} -C target-feature=+vector-enhancements-1"
-        cargo_test "${PROFILE}"
+        cargo_test 
 	      ;;
     powerpc64*)
         export RUSTFLAGS="${RUSTFLAGS} -C target-feature=+altivec"
-        cargo_test "${PROFILE}"
+        cargo_test 
 
         export RUSTFLAGS="${RUSTFLAGS} -C target-feature=+vsx"
-        cargo_test "${PROFILE}"
+        cargo_test 
         ;;
     powerpc*)
         # qemu has a bug in PPC32 which leads to a crash when compiled with `vsx`
         export RUSTFLAGS="${RUSTFLAGS} -C target-feature=+altivec"
-        cargo_test "${PROFILE}"
+        cargo_test 
         ;;
     *)
         ;;
@@ -138,7 +143,7 @@ if [ "$NORUN" != "1" ] && [ "$NOSTD" != 1 ]; then
     # Test examples
     (
         cd examples
-        cargo test --target "$TARGET" "${PROFILE}"
-        echo test | cargo run --target "$TARGET" "${PROFILE}" hex
+        cargo test --target "${TARGET}" --profile "${PROFILE}"
+        echo test | cargo run --target "${TARGET}" --profile "${PROFILE}" hex
     )
 fi
diff --git a/crates/assert-instr-macro/src/lib.rs b/crates/assert-instr-macro/src/lib.rs
index 13c3c3851b..839aae67cb 100644
--- a/crates/assert-instr-macro/src/lib.rs
+++ b/crates/assert-instr-macro/src/lib.rs
@@ -14,6 +14,7 @@ extern crate quote;
 
 use proc_macro2::TokenStream;
 use quote::ToTokens;
+use syn::spanned::Spanned;
 
 #[proc_macro_attribute]
 pub fn assert_instr(
@@ -67,21 +68,21 @@ pub fn assert_instr(
     );
     let mut inputs = Vec::new();
     let mut input_vals = Vec::new();
-    let mut const_vals = Vec::new();
+    let mut param_vals = Vec::new();
     let ret = &func.sig.output;
     for arg in func.sig.inputs.iter() {
         let capture = match *arg {
-            syn::FnArg::Typed(ref c) => c,
+            syn::FnArg::Typed(ref c) => c.to_owned(),
             ref v => panic!(
                 "arguments must not have patterns: `{:?}`",
                 v.clone().into_token_stream()
             ),
         };
-        let ident = match *capture.pat {
-            syn::Pat::Ident(ref i) => &i.ident,
+        let ident = match capture.pat.as_ref() {
+            syn::Pat::Ident(i) => &i.ident.to_owned(),
             _ => panic!("must have bare arguments"),
         };
-        if let Some((_, tokens)) = invoc.args.iter().find(|a| *ident == a.0) {
+        if let Some(&(_, ref tokens)) = invoc.args.iter().find(|a| *ident == a.0) {
             input_vals.push(quote! { #tokens });
         } else {
             inputs.push(capture);
@@ -89,18 +90,48 @@ pub fn assert_instr(
         }
     }
     for arg in func.sig.generics.params.iter() {
-        let c = match *arg {
-            syn::GenericParam::Const(ref c) => c,
+        match *arg {
+            syn::GenericParam::Const(ref c) => {
+                if let Some((_, tokens)) = invoc.args.iter().find(|a| c.ident == a.0) {
+                    param_vals.push(quote! { #tokens });
+                } else {
+                    panic!("const generics must have a value for tests");
+                }
+            }
+            syn::GenericParam::Type(ref t) => {
+                if let Some((_, tokens)) = invoc.args.iter().find(|a| t.ident == a.0)
+                    && let syn::Expr::Path(syn::ExprPath { qself, path, .. }) = tokens
+                {
+                    param_vals.push(syn::Token![_](tokens.span()).to_token_stream());
+
+                    let generic_ty_value = syn::TypePath {
+                        qself: qself.clone(),
+                        path: path.clone(),
+                    };
+
+                    // Replace any function arguments that use generic parameters with the
+                    // instantiation provided in the macro invocation.
+                    inputs.iter_mut().for_each(|arg| {
+                        update_type_path(arg.ty.as_mut(), |type_path: &mut syn::TypePath| {
+                            if let Some(syn::PathSegment {
+                                ident: last_ident, ..
+                            }) = type_path.path.segments.last_mut()
+                            {
+                                if *last_ident == t.ident {
+                                    *type_path = generic_ty_value.to_owned()
+                                }
+                            }
+                        })
+                    });
+                } else {
+                    panic!("type generics must have a type for tests");
+                }
+            }
             ref v => panic!(
-                "only const generics are allowed: `{:?}`",
+                "only type and const generics are allowed: `{:?}`",
                 v.clone().into_token_stream()
             ),
         };
-        if let Some((_, tokens)) = invoc.args.iter().find(|a| c.ident == a.0) {
-            const_vals.push(quote! { #tokens });
-        } else {
-            panic!("const generics must have a value for tests");
-        }
     }
 
     let attrs = func
@@ -138,7 +169,7 @@ pub fn assert_instr(
         #[unsafe(no_mangle)]
         #[inline(never)]
         pub unsafe extern #abi fn #shim_name(#(#inputs),*) #ret {
-            #name::<#(#const_vals),*>(#(#input_vals),*)
+            #name::<#(#param_vals),*>(#(#input_vals),*)
         }
     };
 
@@ -222,3 +253,23 @@ where
         }
     }
 }
+
+/// Calls `update` on type paths so that type generics can be replaced with the instantiation from
+/// the attribute.
+fn update_type_path<F>(ty: &mut syn::Type, update: F)
+where
+    F: Fn(&mut syn::TypePath),
+{
+    use syn::Type::*;
+    match ty {
+        Array(syn::TypeArray { elem, .. })
+        | Group(syn::TypeGroup { elem, .. })
+        | Paren(syn::TypeParen { elem, .. })
+        | Ptr(syn::TypePtr { elem, .. })
+        | Reference(syn::TypeReference { elem, .. })
+        | Slice(syn::TypeSlice { elem, .. }) => update_type_path(elem.as_mut(), update),
+        Path(path @ syn::TypePath { .. }) => update(path),
+        Tuple(..) => panic!("tuples and generic types together are not yet supported"),
+        _ => {}
+    }
+}
diff --git a/crates/core_arch/README.md b/crates/core_arch/README.md
index fc18a5759d..d341365b98 100644
--- a/crates/core_arch/README.md
+++ b/crates/core_arch/README.md
@@ -3,7 +3,7 @@
 
 The `core::arch` module implements architecture-dependent intrinsics (e.g. SIMD).
 
-# Usage 
+# Usage
 
 `core::arch` is available as part of `libcore` and it is re-exported by
 `libstd`. Prefer using it via `core::arch` or `std::arch` than via this crate.
@@ -17,7 +17,7 @@ are:
   you need to re-compile it for a non-standard target, please prefer using
   `xargo` and re-compiling `libcore`/`libstd` as appropriate instead of using
   this crate.
-  
+
 * using some features that might not be available even behind unstable Rust
   features. We try to keep these to a minimum. If you need to use some of these
   features, please open an issue so that we can expose them in nightly Rust and
@@ -34,7 +34,7 @@ are:
 * [How to get started][contrib]
 * [How to help implement intrinsics][help-implement]
 
-[contrib]: https://github.com/rust-lang/stdarch/blob/master/CONTRIBUTING.md
+[contrib]: https://github.com/rust-lang/stdarch/blob/HEAD/CONTRIBUTING.md
 [help-implement]: https://github.com/rust-lang/stdarch/issues/40
 [i686]: https://rust-lang.github.io/stdarch/i686/core_arch/
 [x86_64]: https://rust-lang.github.io/stdarch/x86_64/core_arch/
diff --git a/crates/core_arch/missing-x86.md b/crates/core_arch/missing-x86.md
index 640ec7d0fe..e9f68eb9e6 100644
--- a/crates/core_arch/missing-x86.md
+++ b/crates/core_arch/missing-x86.md
@@ -44,22 +44,6 @@
 </p></details>
 
 
-<details><summary>["AVX512_VP2INTERSECT", "AVX512F"]</summary><p>
-
-  * [ ] [`_mm512_2intersect_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_2intersect_epi32)
-  * [ ] [`_mm512_2intersect_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_2intersect_epi64)
-</p></details>
-
-
-<details><summary>["AVX512_VP2INTERSECT", "AVX512VL"]</summary><p>
-
-  * [ ] [`_mm256_2intersect_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_2intersect_epi32)
-  * [ ] [`_mm256_2intersect_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_2intersect_epi64)
-  * [ ] [`_mm_2intersect_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_2intersect_epi32)
-  * [ ] [`_mm_2intersect_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_2intersect_epi64)
-</p></details>
-
-
 <details><summary>["CET_SS"]</summary><p>
 
   * [ ] [`_clrssbsy`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_clrssbsy)
diff --git a/crates/core_arch/rustfmt.toml b/crates/core_arch/rustfmt.toml
index 4ae742ba8d..e69de29bb2 100644
--- a/crates/core_arch/rustfmt.toml
+++ b/crates/core_arch/rustfmt.toml
@@ -1,3 +0,0 @@
-ignore = [
-    "src/simd.rs",
-]
diff --git a/crates/core_arch/src/aarch64/mod.rs b/crates/core_arch/src/aarch64/mod.rs
index f4b9b1c302..0292be2e0d 100644
--- a/crates/core_arch/src/aarch64/mod.rs
+++ b/crates/core_arch/src/aarch64/mod.rs
@@ -17,13 +17,27 @@ mod mte;
 #[unstable(feature = "stdarch_aarch64_mte", issue = "129010")]
 pub use self::mte::*;
 
+mod rand;
+#[unstable(feature = "stdarch_aarch64_rand", issue = "153514")]
+pub use self::rand::*;
+
 mod neon;
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub use self::neon::*;
 
-mod tme;
-#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
-pub use self::tme::*;
+// The rest of `core_arch::aarch64` is available on `arm64ec` but SVE is not supported on `arm64ec`.
+#[cfg(any(target_arch = "aarch64", doc))]
+mod sve;
+#[cfg(any(target_arch = "aarch64", doc))]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub use self::sve::*;
+
+// The rest of `core_arch::aarch64` is available on `arm64ec` but SVE is not supported on `arm64ec`.
+#[cfg(any(target_arch = "aarch64", doc))]
+mod sve2;
+#[cfg(any(target_arch = "aarch64", doc))]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub use self::sve2::*;
 
 mod prefetch;
 #[unstable(feature = "stdarch_aarch64_prefetch", issue = "117217")]
diff --git a/crates/core_arch/src/aarch64/mte.rs b/crates/core_arch/src/aarch64/mte.rs
index c400f774bc..a5031a45c1 100644
--- a/crates/core_arch/src/aarch64/mte.rs
+++ b/crates/core_arch/src/aarch64/mte.rs
@@ -3,35 +3,17 @@
 //! [ACLE documentation](https://arm-software.github.io/acle/main/acle.html#markdown-toc-mte-intrinsics)
 
 unsafe extern "unadjusted" {
-    #[cfg_attr(
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        link_name = "llvm.aarch64.irg"
-    )]
+    #[link_name = "llvm.aarch64.irg"]
     fn irg_(ptr: *const (), exclude: i64) -> *const ();
-    #[cfg_attr(
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        link_name = "llvm.aarch64.gmi"
-    )]
+    #[link_name = "llvm.aarch64.gmi"]
     fn gmi_(ptr: *const (), exclude: i64) -> i64;
-    #[cfg_attr(
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        link_name = "llvm.aarch64.ldg"
-    )]
+    #[link_name = "llvm.aarch64.ldg"]
     fn ldg_(ptr: *const (), tag_ptr: *const ()) -> *const ();
-    #[cfg_attr(
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        link_name = "llvm.aarch64.stg"
-    )]
+    #[link_name = "llvm.aarch64.stg"]
     fn stg_(tagged_ptr: *const (), addr_to_tag: *const ());
-    #[cfg_attr(
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        link_name = "llvm.aarch64.addg"
-    )]
+    #[link_name = "llvm.aarch64.addg"]
     fn addg_(ptr: *const (), value: i64) -> *const ();
-    #[cfg_attr(
-        any(target_arch = "aarch64", target_arch = "arm64ec"),
-        link_name = "llvm.aarch64.subp"
-    )]
+    #[link_name = "llvm.aarch64.subp"]
     fn subp_(ptr_a: *const (), ptr_b: *const ()) -> i64;
 }
 
@@ -127,42 +109,46 @@ mod test {
     use super::*;
     use stdarch_test::assert_instr;
 
-    #[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(irg))] // FIXME: MSVC  `dumpbin` doesn't support MTE
+    // Instruction tests are separate because the functions use generics.
+    //
+    // FIXME: As of 2026 MSVC  `dumpbin` doesn't support MTE.
+
+    #[cfg_attr(not(target_env = "msvc"), assert_instr(irg))]
     #[allow(dead_code)]
     #[target_feature(enable = "mte")]
     unsafe fn test_arm_mte_create_random_tag(src: *const (), mask: u64) -> *const () {
         __arm_mte_create_random_tag(src, mask)
     }
 
-    #[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(addg))]
+    #[cfg_attr(not(target_env = "msvc"), assert_instr(addg))]
     #[allow(dead_code)]
     #[target_feature(enable = "mte")]
     unsafe fn test_arm_mte_increment_tag(src: *const ()) -> *const () {
         __arm_mte_increment_tag::<1, _>(src)
     }
 
-    #[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(gmi))]
+    #[cfg_attr(not(target_env = "msvc"), assert_instr(gmi))]
     #[allow(dead_code)]
     #[target_feature(enable = "mte")]
     unsafe fn test_arm_mte_exclude_tag(src: *const (), excluded: u64) -> u64 {
         __arm_mte_exclude_tag(src, excluded)
     }
 
-    #[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(stg))]
+    #[cfg_attr(not(target_env = "msvc"), assert_instr(stg))]
     #[allow(dead_code)]
     #[target_feature(enable = "mte")]
     unsafe fn test_arm_mte_set_tag(src: *const ()) {
         __arm_mte_set_tag(src)
     }
 
-    #[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(ldg))]
+    #[cfg_attr(not(target_env = "msvc"), assert_instr(ldg))]
     #[allow(dead_code)]
     #[target_feature(enable = "mte")]
     unsafe fn test_arm_mte_get_tag(src: *const ()) -> *const () {
         __arm_mte_get_tag(src)
     }
 
-    #[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(subp))]
+    #[cfg_attr(not(target_env = "msvc"), assert_instr(subp))]
     #[allow(dead_code)]
     #[target_feature(enable = "mte")]
     unsafe fn test_arm_mte_ptrdiff(a: *const (), b: *const ()) -> i64 {
diff --git a/crates/core_arch/src/aarch64/neon/generated.rs b/crates/core_arch/src/aarch64/neon/generated.rs
index 09cf381804..3241583cf0 100644
--- a/crates/core_arch/src/aarch64/neon/generated.rs
+++ b/crates/core_arch/src/aarch64/neon/generated.rs
@@ -49,7 +49,7 @@ pub fn __crc32d(crc: u32, data: u64) -> u32 {
 #[inline]
 #[target_feature(enable = "jsconv")]
 #[cfg_attr(test, assert_instr(fjcvtzs))]
-#[unstable(feature = "stdarch_aarch64_jscvt", issue = "147555")]
+#[stable(feature = "stdarch_aarch64_jscvt", since = "1.95.0")]
 pub fn __jcvt(a: f64) -> i32 {
     unsafe extern "unadjusted" {
         #[cfg_attr(
@@ -65,12 +65,15 @@ pub fn __jcvt(a: f64) -> i32 {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(sabal2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(sabal2)
+)]
 pub fn vabal_high_s8(a: int16x8_t, b: int8x16_t, c: int8x16_t) -> int16x8_t {
+    let d = vget_high_s8(b);
+    let e = vget_high_s8(c);
+    let f = vabd_s8(d, e);
     unsafe {
-        let d: int8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
-        let e: int8x8_t = simd_shuffle!(c, c, [8, 9, 10, 11, 12, 13, 14, 15]);
-        let f: int8x8_t = vabd_s8(d, e);
         let f: uint8x8_t = simd_cast(f);
         simd_add(a, simd_cast(f))
     }
@@ -80,12 +83,15 @@ pub fn vabal_high_s8(a: int16x8_t, b: int8x16_t, c: int8x16_t) -> int16x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(sabal2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(sabal2)
+)]
 pub fn vabal_high_s16(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t {
+    let d = vget_high_s16(b);
+    let e = vget_high_s16(c);
+    let f = vabd_s16(d, e);
     unsafe {
-        let d: int16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]);
-        let e: int16x4_t = simd_shuffle!(c, c, [4, 5, 6, 7]);
-        let f: int16x4_t = vabd_s16(d, e);
         let f: uint16x4_t = simd_cast(f);
         simd_add(a, simd_cast(f))
     }
@@ -95,12 +101,15 @@ pub fn vabal_high_s16(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(sabal2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(sabal2)
+)]
 pub fn vabal_high_s32(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t {
+    let d = vget_high_s32(b);
+    let e = vget_high_s32(c);
+    let f = vabd_s32(d, e);
     unsafe {
-        let d: int32x2_t = simd_shuffle!(b, b, [2, 3]);
-        let e: int32x2_t = simd_shuffle!(c, c, [2, 3]);
-        let f: int32x2_t = vabd_s32(d, e);
         let f: uint32x2_t = simd_cast(f);
         simd_add(a, simd_cast(f))
     }
@@ -110,42 +119,45 @@ pub fn vabal_high_s32(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uabal2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uabal2)
+)]
 pub fn vabal_high_u8(a: uint16x8_t, b: uint8x16_t, c: uint8x16_t) -> uint16x8_t {
-    unsafe {
-        let d: uint8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
-        let e: uint8x8_t = simd_shuffle!(c, c, [8, 9, 10, 11, 12, 13, 14, 15]);
-        let f: uint8x8_t = vabd_u8(d, e);
-        simd_add(a, simd_cast(f))
-    }
+    let d = vget_high_u8(b);
+    let e = vget_high_u8(c);
+    let f = vabd_u8(d, e);
+    unsafe { simd_add(a, simd_cast(f)) }
 }
 #[doc = "Unsigned Absolute difference and Accumulate Long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabal_high_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uabal2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uabal2)
+)]
 pub fn vabal_high_u16(a: uint32x4_t, b: uint16x8_t, c: uint16x8_t) -> uint32x4_t {
-    unsafe {
-        let d: uint16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]);
-        let e: uint16x4_t = simd_shuffle!(c, c, [4, 5, 6, 7]);
-        let f: uint16x4_t = vabd_u16(d, e);
-        simd_add(a, simd_cast(f))
-    }
+    let d = vget_high_u16(b);
+    let e = vget_high_u16(c);
+    let f = vabd_u16(d, e);
+    unsafe { simd_add(a, simd_cast(f)) }
 }
 #[doc = "Unsigned Absolute difference and Accumulate Long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabal_high_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uabal2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uabal2)
+)]
 pub fn vabal_high_u32(a: uint64x2_t, b: uint32x4_t, c: uint32x4_t) -> uint64x2_t {
-    unsafe {
-        let d: uint32x2_t = simd_shuffle!(b, b, [2, 3]);
-        let e: uint32x2_t = simd_shuffle!(c, c, [2, 3]);
-        let f: uint32x2_t = vabd_u32(d, e);
-        simd_add(a, simd_cast(f))
-    }
+    let d = vget_high_u32(b);
+    let e = vget_high_u32(c);
+    let f = vabd_u32(d, e);
+    unsafe { simd_add(a, simd_cast(f)) }
 }
 #[doc = "Absolute difference between the arguments of Floating"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabd_f64)"]
@@ -186,7 +198,7 @@ pub fn vabdq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(fabd))]
 pub fn vabdd_f64(a: f64, b: f64) -> f64 {
-    unsafe { simd_extract!(vabd_f64(vdup_n_f64(a), vdup_n_f64(b)), 0) }
+    vget_lane_f64::<0>(vabd_f64(vdup_n_f64(a), vdup_n_f64(b)))
 }
 #[doc = "Floating-point absolute difference"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabds_f32)"]
@@ -195,7 +207,7 @@ pub fn vabdd_f64(a: f64, b: f64) -> f64 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(fabd))]
 pub fn vabds_f32(a: f32, b: f32) -> f32 {
-    unsafe { simd_extract!(vabd_f32(vdup_n_f32(a), vdup_n_f32(b)), 0) }
+    vget_lane_f32::<0>(vabd_f32(vdup_n_f32(a), vdup_n_f32(b)))
 }
 #[doc = "Floating-point absolute difference"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdh_f16)"]
@@ -205,47 +217,47 @@ pub fn vabds_f32(a: f32, b: f32) -> f32 {
 #[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fabd))]
 pub fn vabdh_f16(a: f16, b: f16) -> f16 {
-    unsafe { simd_extract!(vabd_f16(vdup_n_f16(a), vdup_n_f16(b)), 0) }
+    vget_lane_f16::<0>(vabd_f16(vdup_n_f16(a), vdup_n_f16(b)))
 }
 #[doc = "Signed Absolute difference Long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdl_high_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdl_high_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(sabdl2))]
-pub fn vabdl_high_s16(a: int16x8_t, b: int16x8_t) -> int32x4_t {
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sabdl2))]
+pub fn vabdl_high_s8(a: int8x16_t, b: int8x16_t) -> int16x8_t {
+    let c = vget_high_s8(a);
+    let d = vget_high_s8(b);
     unsafe {
-        let c: int16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
-        let d: int16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]);
-        let e: uint16x4_t = simd_cast(vabd_s16(c, d));
+        let e: uint8x8_t = simd_cast(vabd_s8(c, d));
         simd_cast(e)
     }
 }
 #[doc = "Signed Absolute difference Long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdl_high_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdl_high_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(sabdl2))]
-pub fn vabdl_high_s32(a: int32x4_t, b: int32x4_t) -> int64x2_t {
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sabdl2))]
+pub fn vabdl_high_s16(a: int16x8_t, b: int16x8_t) -> int32x4_t {
+    let c = vget_high_s16(a);
+    let d = vget_high_s16(b);
     unsafe {
-        let c: int32x2_t = simd_shuffle!(a, a, [2, 3]);
-        let d: int32x2_t = simd_shuffle!(b, b, [2, 3]);
-        let e: uint32x2_t = simd_cast(vabd_s32(c, d));
+        let e: uint16x4_t = simd_cast(vabd_s16(c, d));
         simd_cast(e)
     }
 }
 #[doc = "Signed Absolute difference Long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdl_high_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdl_high_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(sabdl2))]
-pub fn vabdl_high_s8(a: int8x16_t, b: int8x16_t) -> int16x8_t {
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sabdl2))]
+pub fn vabdl_high_s32(a: int32x4_t, b: int32x4_t) -> int64x2_t {
+    let c = vget_high_s32(a);
+    let d = vget_high_s32(b);
     unsafe {
-        let c: int8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
-        let d: int8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
-        let e: uint8x8_t = simd_cast(vabd_s8(c, d));
+        let e: uint32x2_t = simd_cast(vabd_s32(c, d));
         simd_cast(e)
     }
 }
@@ -253,40 +265,34 @@ pub fn vabdl_high_s8(a: int8x16_t, b: int8x16_t) -> int16x8_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdl_high_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uabdl2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(uabdl2))]
 pub fn vabdl_high_u8(a: uint8x16_t, b: uint8x16_t) -> uint16x8_t {
-    unsafe {
-        let c: uint8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
-        let d: uint8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
-        simd_cast(vabd_u8(c, d))
-    }
+    let c = vget_high_u8(a);
+    let d = vget_high_u8(b);
+    unsafe { simd_cast(vabd_u8(c, d)) }
 }
 #[doc = "Unsigned Absolute difference Long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdl_high_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uabdl2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(uabdl2))]
 pub fn vabdl_high_u16(a: uint16x8_t, b: uint16x8_t) -> uint32x4_t {
-    unsafe {
-        let c: uint16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
-        let d: uint16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]);
-        simd_cast(vabd_u16(c, d))
-    }
+    let c = vget_high_u16(a);
+    let d = vget_high_u16(b);
+    unsafe { simd_cast(vabd_u16(c, d)) }
 }
 #[doc = "Unsigned Absolute difference Long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabdl_high_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uabdl2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(uabdl2))]
 pub fn vabdl_high_u32(a: uint32x4_t, b: uint32x4_t) -> uint64x2_t {
-    unsafe {
-        let c: uint32x2_t = simd_shuffle!(a, a, [2, 3]);
-        let d: uint32x2_t = simd_shuffle!(b, b, [2, 3]);
-        simd_cast(vabd_u32(c, d))
-    }
+    let c = vget_high_u32(a);
+    let d = vget_high_u32(b);
+    unsafe { simd_cast(vabd_u32(c, d)) }
 }
 #[doc = "Floating-point absolute value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vabs_f64)"]
@@ -613,7 +619,7 @@ pub fn vaddvq_f64(a: float64x2_t) -> f64 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(addp))]
 pub fn vaddv_s32(a: int32x2_t) -> i32 {
-    unsafe { simd_reduce_add_unordered(a) }
+    unsafe { simd_reduce_add_ordered(a, 0) }
 }
 #[doc = "Add across vector"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddv_s8)"]
@@ -622,7 +628,7 @@ pub fn vaddv_s32(a: int32x2_t) -> i32 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(addv))]
 pub fn vaddv_s8(a: int8x8_t) -> i8 {
-    unsafe { simd_reduce_add_unordered(a) }
+    unsafe { simd_reduce_add_ordered(a, 0) }
 }
 #[doc = "Add across vector"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddvq_s8)"]
@@ -631,7 +637,7 @@ pub fn vaddv_s8(a: int8x8_t) -> i8 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(addv))]
 pub fn vaddvq_s8(a: int8x16_t) -> i8 {
-    unsafe { simd_reduce_add_unordered(a) }
+    unsafe { simd_reduce_add_ordered(a, 0) }
 }
 #[doc = "Add across vector"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddv_s16)"]
@@ -640,7 +646,7 @@ pub fn vaddvq_s8(a: int8x16_t) -> i8 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(addv))]
 pub fn vaddv_s16(a: int16x4_t) -> i16 {
-    unsafe { simd_reduce_add_unordered(a) }
+    unsafe { simd_reduce_add_ordered(a, 0) }
 }
 #[doc = "Add across vector"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddvq_s16)"]
@@ -649,7 +655,7 @@ pub fn vaddv_s16(a: int16x4_t) -> i16 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(addv))]
 pub fn vaddvq_s16(a: int16x8_t) -> i16 {
-    unsafe { simd_reduce_add_unordered(a) }
+    unsafe { simd_reduce_add_ordered(a, 0) }
 }
 #[doc = "Add across vector"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddvq_s32)"]
@@ -658,7 +664,7 @@ pub fn vaddvq_s16(a: int16x8_t) -> i16 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(addv))]
 pub fn vaddvq_s32(a: int32x4_t) -> i32 {
-    unsafe { simd_reduce_add_unordered(a) }
+    unsafe { simd_reduce_add_ordered(a, 0) }
 }
 #[doc = "Add across vector"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddv_u32)"]
@@ -667,7 +673,7 @@ pub fn vaddvq_s32(a: int32x4_t) -> i32 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(addp))]
 pub fn vaddv_u32(a: uint32x2_t) -> u32 {
-    unsafe { simd_reduce_add_unordered(a) }
+    unsafe { simd_reduce_add_ordered(a, 0) }
 }
 #[doc = "Add across vector"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddv_u8)"]
@@ -676,7 +682,7 @@ pub fn vaddv_u32(a: uint32x2_t) -> u32 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(addv))]
 pub fn vaddv_u8(a: uint8x8_t) -> u8 {
-    unsafe { simd_reduce_add_unordered(a) }
+    unsafe { simd_reduce_add_ordered(a, 0) }
 }
 #[doc = "Add across vector"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddvq_u8)"]
@@ -685,7 +691,7 @@ pub fn vaddv_u8(a: uint8x8_t) -> u8 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(addv))]
 pub fn vaddvq_u8(a: uint8x16_t) -> u8 {
-    unsafe { simd_reduce_add_unordered(a) }
+    unsafe { simd_reduce_add_ordered(a, 0) }
 }
 #[doc = "Add across vector"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddv_u16)"]
@@ -694,7 +700,7 @@ pub fn vaddvq_u8(a: uint8x16_t) -> u8 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(addv))]
 pub fn vaddv_u16(a: uint16x4_t) -> u16 {
-    unsafe { simd_reduce_add_unordered(a) }
+    unsafe { simd_reduce_add_ordered(a, 0) }
 }
 #[doc = "Add across vector"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddvq_u16)"]
@@ -703,7 +709,7 @@ pub fn vaddv_u16(a: uint16x4_t) -> u16 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(addv))]
 pub fn vaddvq_u16(a: uint16x8_t) -> u16 {
-    unsafe { simd_reduce_add_unordered(a) }
+    unsafe { simd_reduce_add_ordered(a, 0) }
 }
 #[doc = "Add across vector"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddvq_u32)"]
@@ -712,7 +718,7 @@ pub fn vaddvq_u16(a: uint16x8_t) -> u16 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(addv))]
 pub fn vaddvq_u32(a: uint32x4_t) -> u32 {
-    unsafe { simd_reduce_add_unordered(a) }
+    unsafe { simd_reduce_add_ordered(a, 0) }
 }
 #[doc = "Add across vector"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddvq_s64)"]
@@ -721,7 +727,7 @@ pub fn vaddvq_u32(a: uint32x4_t) -> u32 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(addp))]
 pub fn vaddvq_s64(a: int64x2_t) -> i64 {
-    unsafe { simd_reduce_add_unordered(a) }
+    unsafe { simd_reduce_add_ordered(a, 0) }
 }
 #[doc = "Add across vector"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaddvq_u64)"]
@@ -730,13 +736,45 @@ pub fn vaddvq_s64(a: int64x2_t) -> i64 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(addp))]
 pub fn vaddvq_u64(a: uint64x2_t) -> u64 {
-    unsafe { simd_reduce_add_unordered(a) }
+    unsafe { simd_reduce_add_ordered(a, 0) }
+}
+#[doc = "Multi-vector floating-point absolute maximum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vamax_f16)"]
+#[inline]
+#[target_feature(enable = "neon,faminmax")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(famax))]
+#[unstable(feature = "faminmax", issue = "137933")]
+pub fn vamax_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.famax.v4f16"
+        )]
+        fn _vamax_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t;
+    }
+    unsafe { _vamax_f16(a, b) }
+}
+#[doc = "Multi-vector floating-point absolute maximum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vamaxq_f16)"]
+#[inline]
+#[target_feature(enable = "neon,faminmax")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(famax))]
+#[unstable(feature = "faminmax", issue = "137933")]
+pub fn vamaxq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.famax.v8f16"
+        )]
+        fn _vamaxq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t;
+    }
+    unsafe { _vamaxq_f16(a, b) }
 }
 #[doc = "Multi-vector floating-point absolute maximum"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vamax_f32)"]
 #[inline]
 #[target_feature(enable = "neon,faminmax")]
-#[cfg_attr(test, assert_instr(nop))]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(famax))]
 #[unstable(feature = "faminmax", issue = "137933")]
 pub fn vamax_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
     unsafe extern "unadjusted" {
@@ -752,7 +790,7 @@ pub fn vamax_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vamaxq_f32)"]
 #[inline]
 #[target_feature(enable = "neon,faminmax")]
-#[cfg_attr(test, assert_instr(nop))]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(famax))]
 #[unstable(feature = "faminmax", issue = "137933")]
 pub fn vamaxq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
     unsafe extern "unadjusted" {
@@ -768,7 +806,7 @@ pub fn vamaxq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vamaxq_f64)"]
 #[inline]
 #[target_feature(enable = "neon,faminmax")]
-#[cfg_attr(test, assert_instr(nop))]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(famax))]
 #[unstable(feature = "faminmax", issue = "137933")]
 pub fn vamaxq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
     unsafe extern "unadjusted" {
@@ -781,10 +819,42 @@ pub fn vamaxq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
     unsafe { _vamaxq_f64(a, b) }
 }
 #[doc = "Multi-vector floating-point absolute minimum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vamin_f16)"]
+#[inline]
+#[target_feature(enable = "neon,faminmax")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(famin))]
+#[unstable(feature = "faminmax", issue = "137933")]
+pub fn vamin_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.famin.v4f16"
+        )]
+        fn _vamin_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t;
+    }
+    unsafe { _vamin_f16(a, b) }
+}
+#[doc = "Multi-vector floating-point absolute minimum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaminq_f16)"]
+#[inline]
+#[target_feature(enable = "neon,faminmax")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(famin))]
+#[unstable(feature = "faminmax", issue = "137933")]
+pub fn vaminq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.famin.v8f16"
+        )]
+        fn _vaminq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t;
+    }
+    unsafe { _vaminq_f16(a, b) }
+}
+#[doc = "Multi-vector floating-point absolute minimum"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vamin_f32)"]
 #[inline]
 #[target_feature(enable = "neon,faminmax")]
-#[cfg_attr(test, assert_instr(nop))]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(famin))]
 #[unstable(feature = "faminmax", issue = "137933")]
 pub fn vamin_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
     unsafe extern "unadjusted" {
@@ -800,7 +870,7 @@ pub fn vamin_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaminq_f32)"]
 #[inline]
 #[target_feature(enable = "neon,faminmax")]
-#[cfg_attr(test, assert_instr(nop))]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(famin))]
 #[unstable(feature = "faminmax", issue = "137933")]
 pub fn vaminq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
     unsafe extern "unadjusted" {
@@ -816,7 +886,7 @@ pub fn vaminq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vaminq_f64)"]
 #[inline]
 #[target_feature(enable = "neon,faminmax")]
-#[cfg_attr(test, assert_instr(nop))]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(famin))]
 #[unstable(feature = "faminmax", issue = "137933")]
 pub fn vaminq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
     unsafe extern "unadjusted" {
@@ -961,7 +1031,7 @@ pub fn vbcaxq_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t {
 #[inline]
 #[target_feature(enable = "neon,fp16")]
 #[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fcma"))]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
 #[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fcadd))]
 pub fn vcadd_rot270_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
@@ -979,7 +1049,7 @@ pub fn vcadd_rot270_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
 #[inline]
 #[target_feature(enable = "neon,fp16")]
 #[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fcma"))]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
 #[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fcadd))]
 pub fn vcaddq_rot270_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
@@ -1045,7 +1115,7 @@ pub fn vcaddq_rot270_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
 #[inline]
 #[target_feature(enable = "neon,fp16")]
 #[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fcma"))]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
 #[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fcadd))]
 pub fn vcadd_rot90_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
@@ -1063,7 +1133,7 @@ pub fn vcadd_rot90_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
 #[inline]
 #[target_feature(enable = "neon,fp16")]
 #[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fcma"))]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
 #[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fcadd))]
 pub fn vcaddq_rot90_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
@@ -1457,7 +1527,7 @@ pub fn vceqq_p64(a: poly64x2_t, b: poly64x2_t) -> uint64x2_t {
 #[cfg_attr(test, assert_instr(fcmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vceqd_f64(a: f64, b: f64) -> u64 {
-    unsafe { simd_extract!(vceq_f64(vdup_n_f64(a), vdup_n_f64(b)), 0) }
+    vget_lane_u64::<0>(vceq_f64(vdup_n_f64(a), vdup_n_f64(b)))
 }
 #[doc = "Floating-point compare equal"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqs_f32)"]
@@ -1466,7 +1536,7 @@ pub fn vceqd_f64(a: f64, b: f64) -> u64 {
 #[cfg_attr(test, assert_instr(fcmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vceqs_f32(a: f32, b: f32) -> u32 {
-    unsafe { simd_extract!(vceq_f32(vdup_n_f32(a), vdup_n_f32(b)), 0) }
+    vget_lane_u32::<0>(vceq_f32(vdup_n_f32(a), vdup_n_f32(b)))
 }
 #[doc = "Compare bitwise equal"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqd_s64)"]
@@ -1494,14 +1564,14 @@ pub fn vceqd_u64(a: u64, b: u64) -> u64 {
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vceqh_f16(a: f16, b: f16) -> u16 {
-    unsafe { simd_extract!(vceq_f16(vdup_n_f16(a), vdup_n_f16(b)), 0) }
+    vget_lane_u16::<0>(vceq_f16(vdup_n_f16(a), vdup_n_f16(b)))
 }
 #[doc = "Floating-point compare bitwise equal to zero"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqz_f16)"]
 #[inline]
 #[cfg_attr(test, assert_instr(fcmeq))]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vceqz_f16(a: float16x4_t) -> uint16x4_t {
     let b: f16x4 = f16x4::new(0.0, 0.0, 0.0, 0.0);
@@ -1512,7 +1582,7 @@ pub fn vceqz_f16(a: float16x4_t) -> uint16x4_t {
 #[inline]
 #[cfg_attr(test, assert_instr(fcmeq))]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vceqzq_f16(a: float16x8_t) -> uint16x8_t {
     let b: f16x8 = f16x8::new(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0);
@@ -1784,7 +1854,7 @@ pub fn vceqzd_u64(a: u64) -> u64 {
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vceqzh_f16(a: f16) -> u16 {
-    unsafe { simd_extract!(vceqz_f16(vdup_n_f16(a)), 0) }
+    vget_lane_u16::<0>(vceqz_f16(vdup_n_f16(a)))
 }
 #[doc = "Floating-point compare bitwise equal to zero"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqzs_f32)"]
@@ -1793,7 +1863,7 @@ pub fn vceqzh_f16(a: f16) -> u16 {
 #[cfg_attr(test, assert_instr(fcmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vceqzs_f32(a: f32) -> u32 {
-    unsafe { simd_extract!(vceqz_f32(vdup_n_f32(a)), 0) }
+    vget_lane_u32::<0>(vceqz_f32(vdup_n_f32(a)))
 }
 #[doc = "Floating-point compare bitwise equal to zero"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vceqzd_f64)"]
@@ -1802,7 +1872,7 @@ pub fn vceqzs_f32(a: f32) -> u32 {
 #[cfg_attr(test, assert_instr(fcmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vceqzd_f64(a: f64) -> u64 {
-    unsafe { simd_extract!(vceqz_f64(vdup_n_f64(a)), 0) }
+    vget_lane_u64::<0>(vceqz_f64(vdup_n_f64(a)))
 }
 #[doc = "Floating-point compare greater than or equal"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcge_f64)"]
@@ -1865,7 +1935,7 @@ pub fn vcgeq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
 #[cfg_attr(test, assert_instr(fcmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcged_f64(a: f64, b: f64) -> u64 {
-    unsafe { simd_extract!(vcge_f64(vdup_n_f64(a), vdup_n_f64(b)), 0) }
+    vget_lane_u64::<0>(vcge_f64(vdup_n_f64(a), vdup_n_f64(b)))
 }
 #[doc = "Floating-point compare greater than or equal"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcges_f32)"]
@@ -1874,7 +1944,7 @@ pub fn vcged_f64(a: f64, b: f64) -> u64 {
 #[cfg_attr(test, assert_instr(fcmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcges_f32(a: f32, b: f32) -> u32 {
-    unsafe { simd_extract!(vcge_f32(vdup_n_f32(a), vdup_n_f32(b)), 0) }
+    vget_lane_u32::<0>(vcge_f32(vdup_n_f32(a), vdup_n_f32(b)))
 }
 #[doc = "Compare greater than or equal"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcged_s64)"]
@@ -1902,7 +1972,7 @@ pub fn vcged_u64(a: u64, b: u64) -> u64 {
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcgeh_f16(a: f16, b: f16) -> u16 {
-    unsafe { simd_extract!(vcge_f16(vdup_n_f16(a), vdup_n_f16(b)), 0) }
+    vget_lane_u16::<0>(vcge_f16(vdup_n_f16(a), vdup_n_f16(b)))
 }
 #[doc = "Floating-point compare greater than or equal to zero"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgez_f32)"]
@@ -2031,7 +2101,7 @@ pub fn vcgezq_s64(a: int64x2_t) -> uint64x2_t {
 #[cfg_attr(test, assert_instr(fcmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcgezd_f64(a: f64) -> u64 {
-    unsafe { simd_extract!(vcgez_f64(vdup_n_f64(a)), 0) }
+    vget_lane_u64::<0>(vcgez_f64(vdup_n_f64(a)))
 }
 #[doc = "Floating-point compare greater than or equal to zero"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgezs_f32)"]
@@ -2040,7 +2110,7 @@ pub fn vcgezd_f64(a: f64) -> u64 {
 #[cfg_attr(test, assert_instr(fcmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcgezs_f32(a: f32) -> u32 {
-    unsafe { simd_extract!(vcgez_f32(vdup_n_f32(a)), 0) }
+    vget_lane_u32::<0>(vcgez_f32(vdup_n_f32(a)))
 }
 #[doc = "Compare signed greater than or equal to zero"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgezd_s64)"]
@@ -2059,7 +2129,7 @@ pub fn vcgezd_s64(a: i64) -> u64 {
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcgezh_f16(a: f16) -> u16 {
-    unsafe { simd_extract!(vcgez_f16(vdup_n_f16(a)), 0) }
+    vget_lane_u16::<0>(vcgez_f16(vdup_n_f16(a)))
 }
 #[doc = "Floating-point compare greater than"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgt_f64)"]
@@ -2122,7 +2192,7 @@ pub fn vcgtq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
 #[cfg_attr(test, assert_instr(fcmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcgtd_f64(a: f64, b: f64) -> u64 {
-    unsafe { simd_extract!(vcgt_f64(vdup_n_f64(a), vdup_n_f64(b)), 0) }
+    vget_lane_u64::<0>(vcgt_f64(vdup_n_f64(a), vdup_n_f64(b)))
 }
 #[doc = "Floating-point compare greater than"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgts_f32)"]
@@ -2131,7 +2201,7 @@ pub fn vcgtd_f64(a: f64, b: f64) -> u64 {
 #[cfg_attr(test, assert_instr(fcmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcgts_f32(a: f32, b: f32) -> u32 {
-    unsafe { simd_extract!(vcgt_f32(vdup_n_f32(a), vdup_n_f32(b)), 0) }
+    vget_lane_u32::<0>(vcgt_f32(vdup_n_f32(a), vdup_n_f32(b)))
 }
 #[doc = "Compare greater than"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgtd_s64)"]
@@ -2159,7 +2229,7 @@ pub fn vcgtd_u64(a: u64, b: u64) -> u64 {
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcgth_f16(a: f16, b: f16) -> u16 {
-    unsafe { simd_extract!(vcgt_f16(vdup_n_f16(a), vdup_n_f16(b)), 0) }
+    vget_lane_u16::<0>(vcgt_f16(vdup_n_f16(a), vdup_n_f16(b)))
 }
 #[doc = "Floating-point compare greater than zero"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgtz_f32)"]
@@ -2288,7 +2358,7 @@ pub fn vcgtzq_s64(a: int64x2_t) -> uint64x2_t {
 #[cfg_attr(test, assert_instr(fcmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcgtzd_f64(a: f64) -> u64 {
-    unsafe { simd_extract!(vcgtz_f64(vdup_n_f64(a)), 0) }
+    vget_lane_u64::<0>(vcgtz_f64(vdup_n_f64(a)))
 }
 #[doc = "Floating-point compare greater than zero"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgtzs_f32)"]
@@ -2297,7 +2367,7 @@ pub fn vcgtzd_f64(a: f64) -> u64 {
 #[cfg_attr(test, assert_instr(fcmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcgtzs_f32(a: f32) -> u32 {
-    unsafe { simd_extract!(vcgtz_f32(vdup_n_f32(a)), 0) }
+    vget_lane_u32::<0>(vcgtz_f32(vdup_n_f32(a)))
 }
 #[doc = "Compare signed greater than zero"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcgtzd_s64)"]
@@ -2316,7 +2386,7 @@ pub fn vcgtzd_s64(a: i64) -> u64 {
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcgtzh_f16(a: f16) -> u16 {
-    unsafe { simd_extract!(vcgtz_f16(vdup_n_f16(a)), 0) }
+    vget_lane_u16::<0>(vcgtz_f16(vdup_n_f16(a)))
 }
 #[doc = "Floating-point compare less than or equal"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcle_f64)"]
@@ -2379,7 +2449,7 @@ pub fn vcleq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
 #[cfg_attr(test, assert_instr(fcmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcled_f64(a: f64, b: f64) -> u64 {
-    unsafe { simd_extract!(vcle_f64(vdup_n_f64(a), vdup_n_f64(b)), 0) }
+    vget_lane_u64::<0>(vcle_f64(vdup_n_f64(a), vdup_n_f64(b)))
 }
 #[doc = "Floating-point compare less than or equal"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcles_f32)"]
@@ -2388,7 +2458,7 @@ pub fn vcled_f64(a: f64, b: f64) -> u64 {
 #[cfg_attr(test, assert_instr(fcmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcles_f32(a: f32, b: f32) -> u32 {
-    unsafe { simd_extract!(vcle_f32(vdup_n_f32(a), vdup_n_f32(b)), 0) }
+    vget_lane_u32::<0>(vcle_f32(vdup_n_f32(a), vdup_n_f32(b)))
 }
 #[doc = "Compare less than or equal"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcled_u64)"]
@@ -2416,7 +2486,7 @@ pub fn vcled_s64(a: i64, b: i64) -> u64 {
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcleh_f16(a: f16, b: f16) -> u16 {
-    unsafe { simd_extract!(vcle_f16(vdup_n_f16(a), vdup_n_f16(b)), 0) }
+    vget_lane_u16::<0>(vcle_f16(vdup_n_f16(a), vdup_n_f16(b)))
 }
 #[doc = "Floating-point compare less than or equal to zero"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclez_f32)"]
@@ -2545,7 +2615,7 @@ pub fn vclezq_s64(a: int64x2_t) -> uint64x2_t {
 #[cfg_attr(test, assert_instr(fcmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vclezd_f64(a: f64) -> u64 {
-    unsafe { simd_extract!(vclez_f64(vdup_n_f64(a)), 0) }
+    vget_lane_u64::<0>(vclez_f64(vdup_n_f64(a)))
 }
 #[doc = "Floating-point compare less than or equal to zero"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclezs_f32)"]
@@ -2554,7 +2624,7 @@ pub fn vclezd_f64(a: f64) -> u64 {
 #[cfg_attr(test, assert_instr(fcmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vclezs_f32(a: f32) -> u32 {
-    unsafe { simd_extract!(vclez_f32(vdup_n_f32(a)), 0) }
+    vget_lane_u32::<0>(vclez_f32(vdup_n_f32(a)))
 }
 #[doc = "Compare less than or equal to zero"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclezd_s64)"]
@@ -2573,7 +2643,7 @@ pub fn vclezd_s64(a: i64) -> u64 {
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vclezh_f16(a: f16) -> u16 {
-    unsafe { simd_extract!(vclez_f16(vdup_n_f16(a)), 0) }
+    vget_lane_u16::<0>(vclez_f16(vdup_n_f16(a)))
 }
 #[doc = "Floating-point compare less than"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclt_f64)"]
@@ -2655,7 +2725,7 @@ pub fn vcltd_s64(a: i64, b: i64) -> u64 {
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vclth_f16(a: f16, b: f16) -> u16 {
-    unsafe { simd_extract!(vclt_f16(vdup_n_f16(a), vdup_n_f16(b)), 0) }
+    vget_lane_u16::<0>(vclt_f16(vdup_n_f16(a), vdup_n_f16(b)))
 }
 #[doc = "Floating-point compare less than"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclts_f32)"]
@@ -2664,7 +2734,7 @@ pub fn vclth_f16(a: f16, b: f16) -> u16 {
 #[cfg_attr(test, assert_instr(fcmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vclts_f32(a: f32, b: f32) -> u32 {
-    unsafe { simd_extract!(vclt_f32(vdup_n_f32(a), vdup_n_f32(b)), 0) }
+    vget_lane_u32::<0>(vclt_f32(vdup_n_f32(a), vdup_n_f32(b)))
 }
 #[doc = "Floating-point compare less than"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcltd_f64)"]
@@ -2673,7 +2743,7 @@ pub fn vclts_f32(a: f32, b: f32) -> u32 {
 #[cfg_attr(test, assert_instr(fcmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcltd_f64(a: f64, b: f64) -> u64 {
-    unsafe { simd_extract!(vclt_f64(vdup_n_f64(a), vdup_n_f64(b)), 0) }
+    vget_lane_u64::<0>(vclt_f64(vdup_n_f64(a), vdup_n_f64(b)))
 }
 #[doc = "Floating-point compare less than zero"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcltz_f32)"]
@@ -2802,7 +2872,7 @@ pub fn vcltzq_s64(a: int64x2_t) -> uint64x2_t {
 #[cfg_attr(test, assert_instr(fcmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcltzd_f64(a: f64) -> u64 {
-    unsafe { simd_extract!(vcltz_f64(vdup_n_f64(a)), 0) }
+    vget_lane_u64::<0>(vcltz_f64(vdup_n_f64(a)))
 }
 #[doc = "Floating-point compare less than zero"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcltzs_f32)"]
@@ -2811,7 +2881,7 @@ pub fn vcltzd_f64(a: f64) -> u64 {
 #[cfg_attr(test, assert_instr(fcmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcltzs_f32(a: f32) -> u32 {
-    unsafe { simd_extract!(vcltz_f32(vdup_n_f32(a)), 0) }
+    vget_lane_u32::<0>(vcltz_f32(vdup_n_f32(a)))
 }
 #[doc = "Compare less than zero"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcltzd_s64)"]
@@ -2830,14 +2900,14 @@ pub fn vcltzd_s64(a: i64) -> u64 {
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcltzh_f16(a: f16) -> u16 {
-    unsafe { simd_extract!(vcltz_f16(vdup_n_f16(a)), 0) }
+    vget_lane_u16::<0>(vcltz_f16(vdup_n_f16(a)))
 }
 #[doc = "Floating-point complex multiply accumulate"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_f16)"]
 #[inline]
 #[target_feature(enable = "neon,fcma")]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
 #[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fcmla))]
 pub fn vcmla_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t) -> float16x4_t {
@@ -2855,7 +2925,7 @@ pub fn vcmla_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t) -> float16x4_t
 #[inline]
 #[target_feature(enable = "neon,fcma")]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
 #[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fcmla))]
 pub fn vcmlaq_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t {
@@ -2923,7 +2993,7 @@ pub fn vcmlaq_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t
 #[cfg_attr(test, assert_instr(fcmla, LANE = 0))]
 #[rustc_legacy_const_generics(3)]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcmla_lane_f16<const LANE: i32>(
     a: float16x4_t,
@@ -2931,19 +3001,10 @@ pub fn vcmla_lane_f16<const LANE: i32>(
     c: float16x4_t,
 ) -> float16x4_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        let c: float16x4_t = simd_shuffle!(
-            c,
-            c,
-            [
-                2 * LANE as u32,
-                2 * LANE as u32 + 1,
-                2 * LANE as u32,
-                2 * LANE as u32 + 1
-            ]
-        );
-        vcmla_f16(a, b, c)
-    }
+    let c = vreinterpret_u32_f16(c);
+    let c = vdup_lane_u32::<LANE>(c);
+    let c = vreinterpret_f16_u32(c);
+    vcmla_f16(a, b, c)
 }
 #[doc = "Floating-point complex multiply accumulate"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_lane_f16)"]
@@ -2952,7 +3013,7 @@ pub fn vcmla_lane_f16<const LANE: i32>(
 #[cfg_attr(test, assert_instr(fcmla, LANE = 0))]
 #[rustc_legacy_const_generics(3)]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcmlaq_lane_f16<const LANE: i32>(
     a: float16x8_t,
@@ -2960,23 +3021,10 @@ pub fn vcmlaq_lane_f16<const LANE: i32>(
     c: float16x4_t,
 ) -> float16x8_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        let c: float16x8_t = simd_shuffle!(
-            c,
-            c,
-            [
-                2 * LANE as u32,
-                2 * LANE as u32 + 1,
-                2 * LANE as u32,
-                2 * LANE as u32 + 1,
-                2 * LANE as u32,
-                2 * LANE as u32 + 1,
-                2 * LANE as u32,
-                2 * LANE as u32 + 1
-            ]
-        );
-        vcmlaq_f16(a, b, c)
-    }
+    let c = vreinterpret_u32_f16(c);
+    let c = vdupq_lane_u32::<LANE>(c);
+    let c = vreinterpretq_f16_u32(c);
+    vcmlaq_f16(a, b, c)
 }
 #[doc = "Floating-point complex multiply accumulate"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_lane_f32)"]
@@ -2991,10 +3039,10 @@ pub fn vcmla_lane_f32<const LANE: i32>(
     c: float32x2_t,
 ) -> float32x2_t {
     static_assert!(LANE == 0);
-    unsafe {
-        let c: float32x2_t = simd_shuffle!(c, c, [2 * LANE as u32, 2 * LANE as u32 + 1]);
-        vcmla_f32(a, b, c)
-    }
+    let c = vreinterpret_u64_f32(c);
+    let c = vdup_lane_u64::<LANE>(c);
+    let c = vreinterpret_f32_u64(c);
+    vcmla_f32(a, b, c)
 }
 #[doc = "Floating-point complex multiply accumulate"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_lane_f32)"]
@@ -3009,19 +3057,10 @@ pub fn vcmlaq_lane_f32<const LANE: i32>(
     c: float32x2_t,
 ) -> float32x4_t {
     static_assert!(LANE == 0);
-    unsafe {
-        let c: float32x4_t = simd_shuffle!(
-            c,
-            c,
-            [
-                2 * LANE as u32,
-                2 * LANE as u32 + 1,
-                2 * LANE as u32,
-                2 * LANE as u32 + 1
-            ]
-        );
-        vcmlaq_f32(a, b, c)
-    }
+    let c = vreinterpret_u64_f32(c);
+    let c = vdupq_lane_u64::<LANE>(c);
+    let c = vreinterpretq_f32_u64(c);
+    vcmlaq_f32(a, b, c)
 }
 #[doc = "Floating-point complex multiply accumulate"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_laneq_f16)"]
@@ -3030,7 +3069,7 @@ pub fn vcmlaq_lane_f32<const LANE: i32>(
 #[cfg_attr(test, assert_instr(fcmla, LANE = 0))]
 #[rustc_legacy_const_generics(3)]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcmla_laneq_f16<const LANE: i32>(
     a: float16x4_t,
@@ -3038,19 +3077,10 @@ pub fn vcmla_laneq_f16<const LANE: i32>(
     c: float16x8_t,
 ) -> float16x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        let c: float16x4_t = simd_shuffle!(
-            c,
-            c,
-            [
-                2 * LANE as u32,
-                2 * LANE as u32 + 1,
-                2 * LANE as u32,
-                2 * LANE as u32 + 1
-            ]
-        );
-        vcmla_f16(a, b, c)
-    }
+    let c = vreinterpretq_u32_f16(c);
+    let c = vdup_laneq_u32::<LANE>(c);
+    let c = vreinterpret_f16_u32(c);
+    vcmla_f16(a, b, c)
 }
 #[doc = "Floating-point complex multiply accumulate"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_laneq_f16)"]
@@ -3059,7 +3089,7 @@ pub fn vcmla_laneq_f16<const LANE: i32>(
 #[cfg_attr(test, assert_instr(fcmla, LANE = 0))]
 #[rustc_legacy_const_generics(3)]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcmlaq_laneq_f16<const LANE: i32>(
     a: float16x8_t,
@@ -3067,23 +3097,10 @@ pub fn vcmlaq_laneq_f16<const LANE: i32>(
     c: float16x8_t,
 ) -> float16x8_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        let c: float16x8_t = simd_shuffle!(
-            c,
-            c,
-            [
-                2 * LANE as u32,
-                2 * LANE as u32 + 1,
-                2 * LANE as u32,
-                2 * LANE as u32 + 1,
-                2 * LANE as u32,
-                2 * LANE as u32 + 1,
-                2 * LANE as u32,
-                2 * LANE as u32 + 1
-            ]
-        );
-        vcmlaq_f16(a, b, c)
-    }
+    let c = vreinterpretq_u32_f16(c);
+    let c = vdupq_laneq_u32::<LANE>(c);
+    let c = vreinterpretq_f16_u32(c);
+    vcmlaq_f16(a, b, c)
 }
 #[doc = "Floating-point complex multiply accumulate"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_laneq_f32)"]
@@ -3098,10 +3115,10 @@ pub fn vcmla_laneq_f32<const LANE: i32>(
     c: float32x4_t,
 ) -> float32x2_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        let c: float32x2_t = simd_shuffle!(c, c, [2 * LANE as u32, 2 * LANE as u32 + 1]);
-        vcmla_f32(a, b, c)
-    }
+    let c = vreinterpretq_u64_f32(c);
+    let c = vdup_laneq_u64::<LANE>(c);
+    let c = vreinterpret_f32_u64(c);
+    vcmla_f32(a, b, c)
 }
 #[doc = "Floating-point complex multiply accumulate"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_laneq_f32)"]
@@ -3116,26 +3133,17 @@ pub fn vcmlaq_laneq_f32<const LANE: i32>(
     c: float32x4_t,
 ) -> float32x4_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        let c: float32x4_t = simd_shuffle!(
-            c,
-            c,
-            [
-                2 * LANE as u32,
-                2 * LANE as u32 + 1,
-                2 * LANE as u32,
-                2 * LANE as u32 + 1
-            ]
-        );
-        vcmlaq_f32(a, b, c)
-    }
+    let c = vreinterpretq_u64_f32(c);
+    let c = vdupq_laneq_u64::<LANE>(c);
+    let c = vreinterpretq_f32_u64(c);
+    vcmlaq_f32(a, b, c)
 }
 #[doc = "Floating-point complex multiply accumulate"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot180_f16)"]
 #[inline]
 #[target_feature(enable = "neon,fcma")]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
 #[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fcmla))]
 pub fn vcmla_rot180_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t) -> float16x4_t {
@@ -3153,7 +3161,7 @@ pub fn vcmla_rot180_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t) -> float
 #[inline]
 #[target_feature(enable = "neon,fcma")]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
 #[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fcmla))]
 pub fn vcmlaq_rot180_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t {
@@ -3221,7 +3229,7 @@ pub fn vcmlaq_rot180_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> floa
 #[cfg_attr(test, assert_instr(fcmla, LANE = 0))]
 #[rustc_legacy_const_generics(3)]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcmla_rot180_lane_f16<const LANE: i32>(
     a: float16x4_t,
@@ -3229,19 +3237,10 @@ pub fn vcmla_rot180_lane_f16<const LANE: i32>(
     c: float16x4_t,
 ) -> float16x4_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        let c: float16x4_t = simd_shuffle!(
-            c,
-            c,
-            [
-                2 * LANE as u32,
-                2 * LANE as u32 + 1,
-                2 * LANE as u32,
-                2 * LANE as u32 + 1
-            ]
-        );
-        vcmla_rot180_f16(a, b, c)
-    }
+    let c = vreinterpret_u32_f16(c);
+    let c = vdup_lane_u32::<LANE>(c);
+    let c = vreinterpret_f16_u32(c);
+    vcmla_rot180_f16(a, b, c)
 }
 #[doc = "Floating-point complex multiply accumulate"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot180_lane_f16)"]
@@ -3250,7 +3249,7 @@ pub fn vcmla_rot180_lane_f16<const LANE: i32>(
 #[cfg_attr(test, assert_instr(fcmla, LANE = 0))]
 #[rustc_legacy_const_generics(3)]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcmlaq_rot180_lane_f16<const LANE: i32>(
     a: float16x8_t,
@@ -3258,23 +3257,10 @@ pub fn vcmlaq_rot180_lane_f16<const LANE: i32>(
     c: float16x4_t,
 ) -> float16x8_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        let c: float16x8_t = simd_shuffle!(
-            c,
-            c,
-            [
-                2 * LANE as u32,
-                2 * LANE as u32 + 1,
-                2 * LANE as u32,
-                2 * LANE as u32 + 1,
-                2 * LANE as u32,
-                2 * LANE as u32 + 1,
-                2 * LANE as u32,
-                2 * LANE as u32 + 1
-            ]
-        );
-        vcmlaq_rot180_f16(a, b, c)
-    }
+    let c = vreinterpret_u32_f16(c);
+    let c = vdupq_lane_u32::<LANE>(c);
+    let c = vreinterpretq_f16_u32(c);
+    vcmlaq_rot180_f16(a, b, c)
 }
 #[doc = "Floating-point complex multiply accumulate"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot180_lane_f32)"]
@@ -3289,10 +3275,10 @@ pub fn vcmla_rot180_lane_f32<const LANE: i32>(
     c: float32x2_t,
 ) -> float32x2_t {
     static_assert!(LANE == 0);
-    unsafe {
-        let c: float32x2_t = simd_shuffle!(c, c, [2 * LANE as u32, 2 * LANE as u32 + 1]);
-        vcmla_rot180_f32(a, b, c)
-    }
+    let c = vreinterpret_u64_f32(c);
+    let c = vdup_lane_u64::<LANE>(c);
+    let c = vreinterpret_f32_u64(c);
+    vcmla_rot180_f32(a, b, c)
 }
 #[doc = "Floating-point complex multiply accumulate"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot180_lane_f32)"]
@@ -3307,19 +3293,10 @@ pub fn vcmlaq_rot180_lane_f32<const LANE: i32>(
     c: float32x2_t,
 ) -> float32x4_t {
     static_assert!(LANE == 0);
-    unsafe {
-        let c: float32x4_t = simd_shuffle!(
-            c,
-            c,
-            [
-                2 * LANE as u32,
-                2 * LANE as u32 + 1,
-                2 * LANE as u32,
-                2 * LANE as u32 + 1
-            ]
-        );
-        vcmlaq_rot180_f32(a, b, c)
-    }
+    let c = vreinterpret_u64_f32(c);
+    let c = vdupq_lane_u64::<LANE>(c);
+    let c = vreinterpretq_f32_u64(c);
+    vcmlaq_rot180_f32(a, b, c)
 }
 #[doc = "Floating-point complex multiply accumulate"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot180_laneq_f16)"]
@@ -3328,7 +3305,7 @@ pub fn vcmlaq_rot180_lane_f32<const LANE: i32>(
 #[cfg_attr(test, assert_instr(fcmla, LANE = 0))]
 #[rustc_legacy_const_generics(3)]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcmla_rot180_laneq_f16<const LANE: i32>(
     a: float16x4_t,
@@ -3336,19 +3313,10 @@ pub fn vcmla_rot180_laneq_f16<const LANE: i32>(
     c: float16x8_t,
 ) -> float16x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        let c: float16x4_t = simd_shuffle!(
-            c,
-            c,
-            [
-                2 * LANE as u32,
-                2 * LANE as u32 + 1,
-                2 * LANE as u32,
-                2 * LANE as u32 + 1
-            ]
-        );
-        vcmla_rot180_f16(a, b, c)
-    }
+    let c = vreinterpretq_u32_f16(c);
+    let c = vdup_laneq_u32::<LANE>(c);
+    let c = vreinterpret_f16_u32(c);
+    vcmla_rot180_f16(a, b, c)
 }
 #[doc = "Floating-point complex multiply accumulate"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot180_laneq_f16)"]
@@ -3357,7 +3325,7 @@ pub fn vcmla_rot180_laneq_f16<const LANE: i32>(
 #[cfg_attr(test, assert_instr(fcmla, LANE = 0))]
 #[rustc_legacy_const_generics(3)]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcmlaq_rot180_laneq_f16<const LANE: i32>(
     a: float16x8_t,
@@ -3365,23 +3333,10 @@ pub fn vcmlaq_rot180_laneq_f16<const LANE: i32>(
     c: float16x8_t,
 ) -> float16x8_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        let c: float16x8_t = simd_shuffle!(
-            c,
-            c,
-            [
-                2 * LANE as u32,
-                2 * LANE as u32 + 1,
-                2 * LANE as u32,
-                2 * LANE as u32 + 1,
-                2 * LANE as u32,
-                2 * LANE as u32 + 1,
-                2 * LANE as u32,
-                2 * LANE as u32 + 1
-            ]
-        );
-        vcmlaq_rot180_f16(a, b, c)
-    }
+    let c = vreinterpretq_u32_f16(c);
+    let c = vdupq_laneq_u32::<LANE>(c);
+    let c = vreinterpretq_f16_u32(c);
+    vcmlaq_rot180_f16(a, b, c)
 }
 #[doc = "Floating-point complex multiply accumulate"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot180_laneq_f32)"]
@@ -3396,10 +3351,10 @@ pub fn vcmla_rot180_laneq_f32<const LANE: i32>(
     c: float32x4_t,
 ) -> float32x2_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        let c: float32x2_t = simd_shuffle!(c, c, [2 * LANE as u32, 2 * LANE as u32 + 1]);
-        vcmla_rot180_f32(a, b, c)
-    }
+    let c = vreinterpretq_u64_f32(c);
+    let c = vdup_laneq_u64::<LANE>(c);
+    let c = vreinterpret_f32_u64(c);
+    vcmla_rot180_f32(a, b, c)
 }
 #[doc = "Floating-point complex multiply accumulate"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot180_laneq_f32)"]
@@ -3414,26 +3369,17 @@ pub fn vcmlaq_rot180_laneq_f32<const LANE: i32>(
     c: float32x4_t,
 ) -> float32x4_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        let c: float32x4_t = simd_shuffle!(
-            c,
-            c,
-            [
-                2 * LANE as u32,
-                2 * LANE as u32 + 1,
-                2 * LANE as u32,
-                2 * LANE as u32 + 1
-            ]
-        );
-        vcmlaq_rot180_f32(a, b, c)
-    }
+    let c = vreinterpretq_u64_f32(c);
+    let c = vdupq_laneq_u64::<LANE>(c);
+    let c = vreinterpretq_f32_u64(c);
+    vcmlaq_rot180_f32(a, b, c)
 }
 #[doc = "Floating-point complex multiply accumulate"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot270_f16)"]
 #[inline]
 #[target_feature(enable = "neon,fcma")]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
 #[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fcmla))]
 pub fn vcmla_rot270_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t) -> float16x4_t {
@@ -3451,7 +3397,7 @@ pub fn vcmla_rot270_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t) -> float
 #[inline]
 #[target_feature(enable = "neon,fcma")]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
 #[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fcmla))]
 pub fn vcmlaq_rot270_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t {
@@ -3519,7 +3465,7 @@ pub fn vcmlaq_rot270_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> floa
 #[cfg_attr(test, assert_instr(fcmla, LANE = 0))]
 #[rustc_legacy_const_generics(3)]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcmla_rot270_lane_f16<const LANE: i32>(
     a: float16x4_t,
@@ -3527,19 +3473,10 @@ pub fn vcmla_rot270_lane_f16<const LANE: i32>(
     c: float16x4_t,
 ) -> float16x4_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        let c: float16x4_t = simd_shuffle!(
-            c,
-            c,
-            [
-                2 * LANE as u32,
-                2 * LANE as u32 + 1,
-                2 * LANE as u32,
-                2 * LANE as u32 + 1
-            ]
-        );
-        vcmla_rot270_f16(a, b, c)
-    }
+    let c = vreinterpret_u32_f16(c);
+    let c = vdup_lane_u32::<LANE>(c);
+    let c = vreinterpret_f16_u32(c);
+    vcmla_rot270_f16(a, b, c)
 }
 #[doc = "Floating-point complex multiply accumulate"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot270_lane_f16)"]
@@ -3548,7 +3485,7 @@ pub fn vcmla_rot270_lane_f16<const LANE: i32>(
 #[cfg_attr(test, assert_instr(fcmla, LANE = 0))]
 #[rustc_legacy_const_generics(3)]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcmlaq_rot270_lane_f16<const LANE: i32>(
     a: float16x8_t,
@@ -3556,23 +3493,10 @@ pub fn vcmlaq_rot270_lane_f16<const LANE: i32>(
     c: float16x4_t,
 ) -> float16x8_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        let c: float16x8_t = simd_shuffle!(
-            c,
-            c,
-            [
-                2 * LANE as u32,
-                2 * LANE as u32 + 1,
-                2 * LANE as u32,
-                2 * LANE as u32 + 1,
-                2 * LANE as u32,
-                2 * LANE as u32 + 1,
-                2 * LANE as u32,
-                2 * LANE as u32 + 1
-            ]
-        );
-        vcmlaq_rot270_f16(a, b, c)
-    }
+    let c = vreinterpret_u32_f16(c);
+    let c = vdupq_lane_u32::<LANE>(c);
+    let c = vreinterpretq_f16_u32(c);
+    vcmlaq_rot270_f16(a, b, c)
 }
 #[doc = "Floating-point complex multiply accumulate"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot270_lane_f32)"]
@@ -3587,10 +3511,10 @@ pub fn vcmla_rot270_lane_f32<const LANE: i32>(
     c: float32x2_t,
 ) -> float32x2_t {
     static_assert!(LANE == 0);
-    unsafe {
-        let c: float32x2_t = simd_shuffle!(c, c, [2 * LANE as u32, 2 * LANE as u32 + 1]);
-        vcmla_rot270_f32(a, b, c)
-    }
+    let c = vreinterpret_u64_f32(c);
+    let c = vdup_lane_u64::<LANE>(c);
+    let c = vreinterpret_f32_u64(c);
+    vcmla_rot270_f32(a, b, c)
 }
 #[doc = "Floating-point complex multiply accumulate"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot270_lane_f32)"]
@@ -3605,19 +3529,10 @@ pub fn vcmlaq_rot270_lane_f32<const LANE: i32>(
     c: float32x2_t,
 ) -> float32x4_t {
     static_assert!(LANE == 0);
-    unsafe {
-        let c: float32x4_t = simd_shuffle!(
-            c,
-            c,
-            [
-                2 * LANE as u32,
-                2 * LANE as u32 + 1,
-                2 * LANE as u32,
-                2 * LANE as u32 + 1
-            ]
-        );
-        vcmlaq_rot270_f32(a, b, c)
-    }
+    let c = vreinterpret_u64_f32(c);
+    let c = vdupq_lane_u64::<LANE>(c);
+    let c = vreinterpretq_f32_u64(c);
+    vcmlaq_rot270_f32(a, b, c)
 }
 #[doc = "Floating-point complex multiply accumulate"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot270_laneq_f16)"]
@@ -3626,7 +3541,7 @@ pub fn vcmlaq_rot270_lane_f32<const LANE: i32>(
 #[cfg_attr(test, assert_instr(fcmla, LANE = 0))]
 #[rustc_legacy_const_generics(3)]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcmla_rot270_laneq_f16<const LANE: i32>(
     a: float16x4_t,
@@ -3634,19 +3549,10 @@ pub fn vcmla_rot270_laneq_f16<const LANE: i32>(
     c: float16x8_t,
 ) -> float16x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        let c: float16x4_t = simd_shuffle!(
-            c,
-            c,
-            [
-                2 * LANE as u32,
-                2 * LANE as u32 + 1,
-                2 * LANE as u32,
-                2 * LANE as u32 + 1
-            ]
-        );
-        vcmla_rot270_f16(a, b, c)
-    }
+    let c = vreinterpretq_u32_f16(c);
+    let c = vdup_laneq_u32::<LANE>(c);
+    let c = vreinterpret_f16_u32(c);
+    vcmla_rot270_f16(a, b, c)
 }
 #[doc = "Floating-point complex multiply accumulate"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot270_laneq_f16)"]
@@ -3655,7 +3561,7 @@ pub fn vcmla_rot270_laneq_f16<const LANE: i32>(
 #[cfg_attr(test, assert_instr(fcmla, LANE = 0))]
 #[rustc_legacy_const_generics(3)]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcmlaq_rot270_laneq_f16<const LANE: i32>(
     a: float16x8_t,
@@ -3663,23 +3569,10 @@ pub fn vcmlaq_rot270_laneq_f16<const LANE: i32>(
     c: float16x8_t,
 ) -> float16x8_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        let c: float16x8_t = simd_shuffle!(
-            c,
-            c,
-            [
-                2 * LANE as u32,
-                2 * LANE as u32 + 1,
-                2 * LANE as u32,
-                2 * LANE as u32 + 1,
-                2 * LANE as u32,
-                2 * LANE as u32 + 1,
-                2 * LANE as u32,
-                2 * LANE as u32 + 1
-            ]
-        );
-        vcmlaq_rot270_f16(a, b, c)
-    }
+    let c = vreinterpretq_u32_f16(c);
+    let c = vdupq_laneq_u32::<LANE>(c);
+    let c = vreinterpretq_f16_u32(c);
+    vcmlaq_rot270_f16(a, b, c)
 }
 #[doc = "Floating-point complex multiply accumulate"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot270_laneq_f32)"]
@@ -3694,10 +3587,10 @@ pub fn vcmla_rot270_laneq_f32<const LANE: i32>(
     c: float32x4_t,
 ) -> float32x2_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        let c: float32x2_t = simd_shuffle!(c, c, [2 * LANE as u32, 2 * LANE as u32 + 1]);
-        vcmla_rot270_f32(a, b, c)
-    }
+    let c = vreinterpretq_u64_f32(c);
+    let c = vdup_laneq_u64::<LANE>(c);
+    let c = vreinterpret_f32_u64(c);
+    vcmla_rot270_f32(a, b, c)
 }
 #[doc = "Floating-point complex multiply accumulate"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot270_laneq_f32)"]
@@ -3712,26 +3605,17 @@ pub fn vcmlaq_rot270_laneq_f32<const LANE: i32>(
     c: float32x4_t,
 ) -> float32x4_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        let c: float32x4_t = simd_shuffle!(
-            c,
-            c,
-            [
-                2 * LANE as u32,
-                2 * LANE as u32 + 1,
-                2 * LANE as u32,
-                2 * LANE as u32 + 1
-            ]
-        );
-        vcmlaq_rot270_f32(a, b, c)
-    }
+    let c = vreinterpretq_u64_f32(c);
+    let c = vdupq_laneq_u64::<LANE>(c);
+    let c = vreinterpretq_f32_u64(c);
+    vcmlaq_rot270_f32(a, b, c)
 }
 #[doc = "Floating-point complex multiply accumulate"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot90_f16)"]
 #[inline]
 #[target_feature(enable = "neon,fcma")]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
 #[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fcmla))]
 pub fn vcmla_rot90_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t) -> float16x4_t {
@@ -3749,7 +3633,7 @@ pub fn vcmla_rot90_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t) -> float1
 #[inline]
 #[target_feature(enable = "neon,fcma")]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
 #[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fcmla))]
 pub fn vcmlaq_rot90_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t {
@@ -3817,7 +3701,7 @@ pub fn vcmlaq_rot90_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float
 #[cfg_attr(test, assert_instr(fcmla, LANE = 0))]
 #[rustc_legacy_const_generics(3)]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcmla_rot90_lane_f16<const LANE: i32>(
     a: float16x4_t,
@@ -3825,19 +3709,10 @@ pub fn vcmla_rot90_lane_f16<const LANE: i32>(
     c: float16x4_t,
 ) -> float16x4_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        let c: float16x4_t = simd_shuffle!(
-            c,
-            c,
-            [
-                2 * LANE as u32,
-                2 * LANE as u32 + 1,
-                2 * LANE as u32,
-                2 * LANE as u32 + 1
-            ]
-        );
-        vcmla_rot90_f16(a, b, c)
-    }
+    let c = vreinterpret_u32_f16(c);
+    let c = vdup_lane_u32::<LANE>(c);
+    let c = vreinterpret_f16_u32(c);
+    vcmla_rot90_f16(a, b, c)
 }
 #[doc = "Floating-point complex multiply accumulate"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot90_lane_f16)"]
@@ -3846,7 +3721,7 @@ pub fn vcmla_rot90_lane_f16<const LANE: i32>(
 #[cfg_attr(test, assert_instr(fcmla, LANE = 0))]
 #[rustc_legacy_const_generics(3)]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcmlaq_rot90_lane_f16<const LANE: i32>(
     a: float16x8_t,
@@ -3854,23 +3729,10 @@ pub fn vcmlaq_rot90_lane_f16<const LANE: i32>(
     c: float16x4_t,
 ) -> float16x8_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        let c: float16x8_t = simd_shuffle!(
-            c,
-            c,
-            [
-                2 * LANE as u32,
-                2 * LANE as u32 + 1,
-                2 * LANE as u32,
-                2 * LANE as u32 + 1,
-                2 * LANE as u32,
-                2 * LANE as u32 + 1,
-                2 * LANE as u32,
-                2 * LANE as u32 + 1
-            ]
-        );
-        vcmlaq_rot90_f16(a, b, c)
-    }
+    let c = vreinterpret_u32_f16(c);
+    let c = vdupq_lane_u32::<LANE>(c);
+    let c = vreinterpretq_f16_u32(c);
+    vcmlaq_rot90_f16(a, b, c)
 }
 #[doc = "Floating-point complex multiply accumulate"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot90_lane_f32)"]
@@ -3885,10 +3747,10 @@ pub fn vcmla_rot90_lane_f32<const LANE: i32>(
     c: float32x2_t,
 ) -> float32x2_t {
     static_assert!(LANE == 0);
-    unsafe {
-        let c: float32x2_t = simd_shuffle!(c, c, [2 * LANE as u32, 2 * LANE as u32 + 1]);
-        vcmla_rot90_f32(a, b, c)
-    }
+    let c = vreinterpret_u64_f32(c);
+    let c = vdup_lane_u64::<LANE>(c);
+    let c = vreinterpret_f32_u64(c);
+    vcmla_rot90_f32(a, b, c)
 }
 #[doc = "Floating-point complex multiply accumulate"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot90_lane_f32)"]
@@ -3903,19 +3765,10 @@ pub fn vcmlaq_rot90_lane_f32<const LANE: i32>(
     c: float32x2_t,
 ) -> float32x4_t {
     static_assert!(LANE == 0);
-    unsafe {
-        let c: float32x4_t = simd_shuffle!(
-            c,
-            c,
-            [
-                2 * LANE as u32,
-                2 * LANE as u32 + 1,
-                2 * LANE as u32,
-                2 * LANE as u32 + 1
-            ]
-        );
-        vcmlaq_rot90_f32(a, b, c)
-    }
+    let c = vreinterpret_u64_f32(c);
+    let c = vdupq_lane_u64::<LANE>(c);
+    let c = vreinterpretq_f32_u64(c);
+    vcmlaq_rot90_f32(a, b, c)
 }
 #[doc = "Floating-point complex multiply accumulate"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot90_laneq_f16)"]
@@ -3924,7 +3777,7 @@ pub fn vcmlaq_rot90_lane_f32<const LANE: i32>(
 #[cfg_attr(test, assert_instr(fcmla, LANE = 0))]
 #[rustc_legacy_const_generics(3)]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcmla_rot90_laneq_f16<const LANE: i32>(
     a: float16x4_t,
@@ -3932,19 +3785,10 @@ pub fn vcmla_rot90_laneq_f16<const LANE: i32>(
     c: float16x8_t,
 ) -> float16x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        let c: float16x4_t = simd_shuffle!(
-            c,
-            c,
-            [
-                2 * LANE as u32,
-                2 * LANE as u32 + 1,
-                2 * LANE as u32,
-                2 * LANE as u32 + 1
-            ]
-        );
-        vcmla_rot90_f16(a, b, c)
-    }
+    let c = vreinterpretq_u32_f16(c);
+    let c = vdup_laneq_u32::<LANE>(c);
+    let c = vreinterpret_f16_u32(c);
+    vcmla_rot90_f16(a, b, c)
 }
 #[doc = "Floating-point complex multiply accumulate"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot90_laneq_f16)"]
@@ -3953,7 +3797,7 @@ pub fn vcmla_rot90_laneq_f16<const LANE: i32>(
 #[cfg_attr(test, assert_instr(fcmla, LANE = 0))]
 #[rustc_legacy_const_generics(3)]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[unstable(feature = "stdarch_neon_fcma", issue = "117222")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcmlaq_rot90_laneq_f16<const LANE: i32>(
     a: float16x8_t,
@@ -3961,23 +3805,10 @@ pub fn vcmlaq_rot90_laneq_f16<const LANE: i32>(
     c: float16x8_t,
 ) -> float16x8_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        let c: float16x8_t = simd_shuffle!(
-            c,
-            c,
-            [
-                2 * LANE as u32,
-                2 * LANE as u32 + 1,
-                2 * LANE as u32,
-                2 * LANE as u32 + 1,
-                2 * LANE as u32,
-                2 * LANE as u32 + 1,
-                2 * LANE as u32,
-                2 * LANE as u32 + 1
-            ]
-        );
-        vcmlaq_rot90_f16(a, b, c)
-    }
+    let c = vreinterpretq_u32_f16(c);
+    let c = vdupq_laneq_u32::<LANE>(c);
+    let c = vreinterpretq_f16_u32(c);
+    vcmlaq_rot90_f16(a, b, c)
 }
 #[doc = "Floating-point complex multiply accumulate"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmla_rot90_laneq_f32)"]
@@ -3992,10 +3823,10 @@ pub fn vcmla_rot90_laneq_f32<const LANE: i32>(
     c: float32x4_t,
 ) -> float32x2_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        let c: float32x2_t = simd_shuffle!(c, c, [2 * LANE as u32, 2 * LANE as u32 + 1]);
-        vcmla_rot90_f32(a, b, c)
-    }
+    let c = vreinterpretq_u64_f32(c);
+    let c = vdup_laneq_u64::<LANE>(c);
+    let c = vreinterpret_f32_u64(c);
+    vcmla_rot90_f32(a, b, c)
 }
 #[doc = "Floating-point complex multiply accumulate"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcmlaq_rot90_laneq_f32)"]
@@ -4010,25 +3841,28 @@ pub fn vcmlaq_rot90_laneq_f32<const LANE: i32>(
     c: float32x4_t,
 ) -> float32x4_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        let c: float32x4_t = simd_shuffle!(
-            c,
-            c,
-            [
-                2 * LANE as u32,
-                2 * LANE as u32 + 1,
-                2 * LANE as u32,
-                2 * LANE as u32 + 1
-            ]
-        );
-        vcmlaq_rot90_f32(a, b, c)
-    }
+    let c = vreinterpretq_u64_f32(c);
+    let c = vdupq_laneq_u64::<LANE>(c);
+    let c = vreinterpretq_f32_u64(c);
+    vcmlaq_rot90_f32(a, b, c)
+}
+#[doc = "Join two smaller vectors into a single larger vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_f64)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(mov))]
+pub fn vcombine_f64(a: float64x1_t, b: float64x1_t) -> float64x2_t {
+    unsafe { simd_shuffle!(a, b, [0, 1]) }
 }
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_lane_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[cfg_attr(
+    all(test, target_endian = "little"),
+    assert_instr(mov, LANE1 = 0, LANE2 = 0)
+)]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcopy_lane_f32<const LANE1: i32, const LANE2: i32>(
@@ -4037,105 +3871,76 @@ pub fn vcopy_lane_f32<const LANE1: i32, const LANE2: i32>(
 ) -> float32x2_t {
     static_assert_uimm_bits!(LANE1, 1);
     static_assert_uimm_bits!(LANE2, 1);
-    unsafe {
-        match LANE1 & 0b1 {
-            0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]),
-            1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    vset_lane_f32::<LANE1>(vget_lane_f32::<LANE2>(b), a)
 }
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_lane_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[cfg_attr(
+    all(test, target_endian = "little"),
+    assert_instr(mov, LANE1 = 0, LANE2 = 0)
+)]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcopy_lane_s8<const LANE1: i32, const LANE2: i32>(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     static_assert_uimm_bits!(LANE1, 3);
     static_assert_uimm_bits!(LANE2, 3);
-    unsafe {
-        match LANE1 & 0b111 {
-            0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]),
-            1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]),
-            2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]),
-            3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]),
-            4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]),
-            5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]),
-            6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]),
-            7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    vset_lane_s8::<LANE1>(vget_lane_s8::<LANE2>(b), a)
 }
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_lane_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[cfg_attr(
+    all(test, target_endian = "little"),
+    assert_instr(mov, LANE1 = 0, LANE2 = 0)
+)]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcopy_lane_s16<const LANE1: i32, const LANE2: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     static_assert_uimm_bits!(LANE1, 2);
     static_assert_uimm_bits!(LANE2, 2);
-    unsafe {
-        match LANE1 & 0b11 {
-            0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]),
-            1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]),
-            2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]),
-            3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    vset_lane_s16::<LANE1>(vget_lane_s16::<LANE2>(b), a)
 }
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_lane_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[cfg_attr(
+    all(test, target_endian = "little"),
+    assert_instr(mov, LANE1 = 0, LANE2 = 0)
+)]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcopy_lane_s32<const LANE1: i32, const LANE2: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     static_assert_uimm_bits!(LANE1, 1);
     static_assert_uimm_bits!(LANE2, 1);
-    unsafe {
-        match LANE1 & 0b1 {
-            0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]),
-            1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    vset_lane_s32::<LANE1>(vget_lane_s32::<LANE2>(b), a)
 }
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_lane_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[cfg_attr(
+    all(test, target_endian = "little"),
+    assert_instr(mov, LANE1 = 0, LANE2 = 0)
+)]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcopy_lane_u8<const LANE1: i32, const LANE2: i32>(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     static_assert_uimm_bits!(LANE1, 3);
     static_assert_uimm_bits!(LANE2, 3);
-    unsafe {
-        match LANE1 & 0b111 {
-            0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]),
-            1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]),
-            2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]),
-            3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]),
-            4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]),
-            5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]),
-            6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]),
-            7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    vset_lane_u8::<LANE1>(vget_lane_u8::<LANE2>(b), a)
 }
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_lane_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[cfg_attr(
+    all(test, target_endian = "little"),
+    assert_instr(mov, LANE1 = 0, LANE2 = 0)
+)]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcopy_lane_u16<const LANE1: i32, const LANE2: i32>(
@@ -4144,21 +3949,16 @@ pub fn vcopy_lane_u16<const LANE1: i32, const LANE2: i32>(
 ) -> uint16x4_t {
     static_assert_uimm_bits!(LANE1, 2);
     static_assert_uimm_bits!(LANE2, 2);
-    unsafe {
-        match LANE1 & 0b11 {
-            0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]),
-            1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]),
-            2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]),
-            3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    vset_lane_u16::<LANE1>(vget_lane_u16::<LANE2>(b), a)
 }
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_lane_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[cfg_attr(
+    all(test, target_endian = "little"),
+    assert_instr(mov, LANE1 = 0, LANE2 = 0)
+)]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcopy_lane_u32<const LANE1: i32, const LANE2: i32>(
@@ -4167,43 +3967,31 @@ pub fn vcopy_lane_u32<const LANE1: i32, const LANE2: i32>(
 ) -> uint32x2_t {
     static_assert_uimm_bits!(LANE1, 1);
     static_assert_uimm_bits!(LANE2, 1);
-    unsafe {
-        match LANE1 & 0b1 {
-            0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]),
-            1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    vset_lane_u32::<LANE1>(vget_lane_u32::<LANE2>(b), a)
 }
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_lane_p8)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[cfg_attr(
+    all(test, target_endian = "little"),
+    assert_instr(mov, LANE1 = 0, LANE2 = 0)
+)]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcopy_lane_p8<const LANE1: i32, const LANE2: i32>(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
     static_assert_uimm_bits!(LANE1, 3);
     static_assert_uimm_bits!(LANE2, 3);
-    unsafe {
-        match LANE1 & 0b111 {
-            0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]),
-            1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]),
-            2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]),
-            3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]),
-            4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]),
-            5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]),
-            6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]),
-            7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    vset_lane_p8::<LANE1>(vget_lane_p8::<LANE2>(b), a)
 }
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_lane_p16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[cfg_attr(
+    all(test, target_endian = "little"),
+    assert_instr(mov, LANE1 = 0, LANE2 = 0)
+)]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcopy_lane_p16<const LANE1: i32, const LANE2: i32>(
@@ -4212,21 +4000,76 @@ pub fn vcopy_lane_p16<const LANE1: i32, const LANE2: i32>(
 ) -> poly16x4_t {
     static_assert_uimm_bits!(LANE1, 2);
     static_assert_uimm_bits!(LANE2, 2);
-    unsafe {
-        match LANE1 & 0b11 {
-            0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]),
-            1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]),
-            2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]),
-            3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    vset_lane_p16::<LANE1>(vget_lane_p16::<LANE2>(b), a)
+}
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_lane_f64)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, LANE1 = 0, LANE2 = 0))]
+#[rustc_legacy_const_generics(1, 3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vcopy_lane_f64<const LANE1: i32, const LANE2: i32>(
+    _a: float64x1_t,
+    b: float64x1_t,
+) -> float64x1_t {
+    static_assert!(LANE1 == 0);
+    static_assert!(LANE2 == 0);
+    b
+}
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_lane_s64)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, LANE1 = 0, LANE2 = 0))]
+#[rustc_legacy_const_generics(1, 3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vcopy_lane_s64<const LANE1: i32, const LANE2: i32>(
+    _a: int64x1_t,
+    b: int64x1_t,
+) -> int64x1_t {
+    static_assert!(LANE1 == 0);
+    static_assert!(LANE2 == 0);
+    b
+}
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_lane_u64)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, LANE1 = 0, LANE2 = 0))]
+#[rustc_legacy_const_generics(1, 3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vcopy_lane_u64<const LANE1: i32, const LANE2: i32>(
+    _a: uint64x1_t,
+    b: uint64x1_t,
+) -> uint64x1_t {
+    static_assert!(LANE1 == 0);
+    static_assert!(LANE2 == 0);
+    b
+}
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_lane_p64)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, LANE1 = 0, LANE2 = 0))]
+#[rustc_legacy_const_generics(1, 3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vcopy_lane_p64<const LANE1: i32, const LANE2: i32>(
+    _a: poly64x1_t,
+    b: poly64x1_t,
+) -> poly64x1_t {
+    static_assert!(LANE1 == 0);
+    static_assert!(LANE2 == 0);
+    b
 }
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_laneq_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[cfg_attr(
+    all(test, target_endian = "little"),
+    assert_instr(mov, LANE1 = 0, LANE2 = 0)
+)]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcopy_laneq_f32<const LANE1: i32, const LANE2: i32>(
@@ -4235,46 +4078,31 @@ pub fn vcopy_laneq_f32<const LANE1: i32, const LANE2: i32>(
 ) -> float32x2_t {
     static_assert_uimm_bits!(LANE1, 1);
     static_assert_uimm_bits!(LANE2, 2);
-    let a: float32x4_t = unsafe { simd_shuffle!(a, a, [0, 1, 2, 3]) };
-    unsafe {
-        match LANE1 & 0b1 {
-            0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1]),
-            1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    vset_lane_f32::<LANE1>(vgetq_lane_f32::<LANE2>(b), a)
 }
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_laneq_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[cfg_attr(
+    all(test, target_endian = "little"),
+    assert_instr(mov, LANE1 = 0, LANE2 = 0)
+)]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcopy_laneq_s8<const LANE1: i32, const LANE2: i32>(a: int8x8_t, b: int8x16_t) -> int8x8_t {
     static_assert_uimm_bits!(LANE1, 3);
     static_assert_uimm_bits!(LANE2, 4);
-    let a: int8x16_t =
-        unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) };
-    unsafe {
-        match LANE1 & 0b111 {
-            0 => simd_shuffle!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]),
-            1 => simd_shuffle!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7]),
-            2 => simd_shuffle!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7]),
-            3 => simd_shuffle!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7]),
-            4 => simd_shuffle!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7]),
-            5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7]),
-            6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7]),
-            7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    vset_lane_s8::<LANE1>(vgetq_lane_s8::<LANE2>(b), a)
 }
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_laneq_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[cfg_attr(
+    all(test, target_endian = "little"),
+    assert_instr(mov, LANE1 = 0, LANE2 = 0)
+)]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcopy_laneq_s16<const LANE1: i32, const LANE2: i32>(
@@ -4283,22 +4111,16 @@ pub fn vcopy_laneq_s16<const LANE1: i32, const LANE2: i32>(
 ) -> int16x4_t {
     static_assert_uimm_bits!(LANE1, 2);
     static_assert_uimm_bits!(LANE2, 3);
-    let a: int16x8_t = unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]) };
-    unsafe {
-        match LANE1 & 0b11 {
-            0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3]),
-            1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3]),
-            2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3]),
-            3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    vset_lane_s16::<LANE1>(vgetq_lane_s16::<LANE2>(b), a)
 }
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_laneq_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[cfg_attr(
+    all(test, target_endian = "little"),
+    assert_instr(mov, LANE1 = 0, LANE2 = 0)
+)]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcopy_laneq_s32<const LANE1: i32, const LANE2: i32>(
@@ -4307,20 +4129,16 @@ pub fn vcopy_laneq_s32<const LANE1: i32, const LANE2: i32>(
 ) -> int32x2_t {
     static_assert_uimm_bits!(LANE1, 1);
     static_assert_uimm_bits!(LANE2, 2);
-    let a: int32x4_t = unsafe { simd_shuffle!(a, a, [0, 1, 2, 3]) };
-    unsafe {
-        match LANE1 & 0b1 {
-            0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1]),
-            1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    vset_lane_s32::<LANE1>(vgetq_lane_s32::<LANE2>(b), a)
 }
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_laneq_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[cfg_attr(
+    all(test, target_endian = "little"),
+    assert_instr(mov, LANE1 = 0, LANE2 = 0)
+)]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcopy_laneq_u8<const LANE1: i32, const LANE2: i32>(
@@ -4329,27 +4147,16 @@ pub fn vcopy_laneq_u8<const LANE1: i32, const LANE2: i32>(
 ) -> uint8x8_t {
     static_assert_uimm_bits!(LANE1, 3);
     static_assert_uimm_bits!(LANE2, 4);
-    let a: uint8x16_t =
-        unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) };
-    unsafe {
-        match LANE1 & 0b111 {
-            0 => simd_shuffle!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]),
-            1 => simd_shuffle!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7]),
-            2 => simd_shuffle!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7]),
-            3 => simd_shuffle!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7]),
-            4 => simd_shuffle!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7]),
-            5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7]),
-            6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7]),
-            7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    vset_lane_u8::<LANE1>(vgetq_lane_u8::<LANE2>(b), a)
 }
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_laneq_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[cfg_attr(
+    all(test, target_endian = "little"),
+    assert_instr(mov, LANE1 = 0, LANE2 = 0)
+)]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcopy_laneq_u16<const LANE1: i32, const LANE2: i32>(
@@ -4358,22 +4165,16 @@ pub fn vcopy_laneq_u16<const LANE1: i32, const LANE2: i32>(
 ) -> uint16x4_t {
     static_assert_uimm_bits!(LANE1, 2);
     static_assert_uimm_bits!(LANE2, 3);
-    let a: uint16x8_t = unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]) };
-    unsafe {
-        match LANE1 & 0b11 {
-            0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3]),
-            1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3]),
-            2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3]),
-            3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    vset_lane_u16::<LANE1>(vgetq_lane_u16::<LANE2>(b), a)
 }
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_laneq_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[cfg_attr(
+    all(test, target_endian = "little"),
+    assert_instr(mov, LANE1 = 0, LANE2 = 0)
+)]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcopy_laneq_u32<const LANE1: i32, const LANE2: i32>(
@@ -4382,20 +4183,16 @@ pub fn vcopy_laneq_u32<const LANE1: i32, const LANE2: i32>(
 ) -> uint32x2_t {
     static_assert_uimm_bits!(LANE1, 1);
     static_assert_uimm_bits!(LANE2, 2);
-    let a: uint32x4_t = unsafe { simd_shuffle!(a, a, [0, 1, 2, 3]) };
-    unsafe {
-        match LANE1 & 0b1 {
-            0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1]),
-            1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    vset_lane_u32::<LANE1>(vgetq_lane_u32::<LANE2>(b), a)
 }
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_laneq_p8)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[cfg_attr(
+    all(test, target_endian = "little"),
+    assert_instr(mov, LANE1 = 0, LANE2 = 0)
+)]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcopy_laneq_p8<const LANE1: i32, const LANE2: i32>(
@@ -4404,27 +4201,16 @@ pub fn vcopy_laneq_p8<const LANE1: i32, const LANE2: i32>(
 ) -> poly8x8_t {
     static_assert_uimm_bits!(LANE1, 3);
     static_assert_uimm_bits!(LANE2, 4);
-    let a: poly8x16_t =
-        unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) };
-    unsafe {
-        match LANE1 & 0b111 {
-            0 => simd_shuffle!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]),
-            1 => simd_shuffle!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7]),
-            2 => simd_shuffle!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7]),
-            3 => simd_shuffle!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7]),
-            4 => simd_shuffle!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7]),
-            5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7]),
-            6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7]),
-            7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    vset_lane_p8::<LANE1>(vgetq_lane_p8::<LANE2>(b), a)
 }
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_laneq_p16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[cfg_attr(
+    all(test, target_endian = "little"),
+    assert_instr(mov, LANE1 = 0, LANE2 = 0)
+)]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcopy_laneq_p16<const LANE1: i32, const LANE2: i32>(
@@ -4433,16 +4219,67 @@ pub fn vcopy_laneq_p16<const LANE1: i32, const LANE2: i32>(
 ) -> poly16x4_t {
     static_assert_uimm_bits!(LANE1, 2);
     static_assert_uimm_bits!(LANE2, 3);
-    let a: poly16x8_t = unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]) };
-    unsafe {
-        match LANE1 & 0b11 {
-            0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3]),
-            1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3]),
-            2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3]),
-            3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    vset_lane_p16::<LANE1>(vgetq_lane_p16::<LANE2>(b), a)
+}
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_laneq_f64)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, LANE1 = 0, LANE2 = 1))]
+#[rustc_legacy_const_generics(1, 3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vcopy_laneq_f64<const LANE1: i32, const LANE2: i32>(
+    _a: float64x1_t,
+    b: float64x2_t,
+) -> float64x1_t {
+    static_assert!(LANE1 == 0);
+    static_assert_uimm_bits!(LANE2, 1);
+    unsafe { transmute(vgetq_lane_f64::<LANE2>(b)) }
+}
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_laneq_s64)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, LANE1 = 0, LANE2 = 1))]
+#[rustc_legacy_const_generics(1, 3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vcopy_laneq_s64<const LANE1: i32, const LANE2: i32>(
+    _a: int64x1_t,
+    b: int64x2_t,
+) -> int64x1_t {
+    static_assert!(LANE1 == 0);
+    static_assert_uimm_bits!(LANE2, 1);
+    unsafe { transmute(vgetq_lane_s64::<LANE2>(b)) }
+}
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_laneq_u64)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, LANE1 = 0, LANE2 = 1))]
+#[rustc_legacy_const_generics(1, 3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vcopy_laneq_u64<const LANE1: i32, const LANE2: i32>(
+    _a: uint64x1_t,
+    b: uint64x2_t,
+) -> uint64x1_t {
+    static_assert!(LANE1 == 0);
+    static_assert_uimm_bits!(LANE2, 1);
+    unsafe { transmute(vgetq_lane_u64::<LANE2>(b)) }
+}
+#[doc = "Insert vector element from another vector element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopy_laneq_p64)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(test, assert_instr(nop, LANE1 = 0, LANE2 = 1))]
+#[rustc_legacy_const_generics(1, 3)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vcopy_laneq_p64<const LANE1: i32, const LANE2: i32>(
+    _a: poly64x1_t,
+    b: poly64x2_t,
+) -> poly64x1_t {
+    static_assert!(LANE1 == 0);
+    static_assert_uimm_bits!(LANE2, 1);
+    unsafe { transmute(vgetq_lane_p64::<LANE2>(b)) }
 }
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_f32)"]
@@ -4457,22 +4294,16 @@ pub fn vcopyq_lane_f32<const LANE1: i32, const LANE2: i32>(
 ) -> float32x4_t {
     static_assert_uimm_bits!(LANE1, 2);
     static_assert_uimm_bits!(LANE2, 1);
-    let b: float32x4_t = unsafe { simd_shuffle!(b, b, [0, 1, 2, 3]) };
-    unsafe {
-        match LANE1 & 0b11 {
-            0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]),
-            1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]),
-            2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]),
-            3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    vsetq_lane_f32::<LANE1>(vget_lane_f32::<LANE2>(b), a)
 }
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_f64)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 1, LANE2 = 0))]
+#[cfg_attr(
+    all(test, target_endian = "little"),
+    assert_instr(mov, LANE1 = 1, LANE2 = 0)
+)]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcopyq_lane_f64<const LANE1: i32, const LANE2: i32>(
@@ -4481,20 +4312,17 @@ pub fn vcopyq_lane_f64<const LANE1: i32, const LANE2: i32>(
 ) -> float64x2_t {
     static_assert_uimm_bits!(LANE1, 1);
     static_assert!(LANE2 == 0);
-    let b: float64x2_t = unsafe { simd_shuffle!(b, b, [0, 1]) };
-    unsafe {
-        match LANE1 & 0b1 {
-            0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]),
-            1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    let b: float64x2_t = vcombine_f64(b, b);
+    vsetq_lane_f64::<LANE1>(vgetq_lane_f64::<LANE2>(b), a)
 }
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_s64)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 1, LANE2 = 0))]
+#[cfg_attr(
+    all(test, target_endian = "little"),
+    assert_instr(mov, LANE1 = 1, LANE2 = 0)
+)]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcopyq_lane_s64<const LANE1: i32, const LANE2: i32>(
@@ -4503,20 +4331,17 @@ pub fn vcopyq_lane_s64<const LANE1: i32, const LANE2: i32>(
 ) -> int64x2_t {
     static_assert_uimm_bits!(LANE1, 1);
     static_assert!(LANE2 == 0);
-    let b: int64x2_t = unsafe { simd_shuffle!(b, b, [0, 1]) };
-    unsafe {
-        match LANE1 & 0b1 {
-            0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]),
-            1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    let b: int64x2_t = vcombine_s64(b, b);
+    vsetq_lane_s64::<LANE1>(vgetq_lane_s64::<LANE2>(b), a)
 }
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_u64)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 1, LANE2 = 0))]
+#[cfg_attr(
+    all(test, target_endian = "little"),
+    assert_instr(mov, LANE1 = 1, LANE2 = 0)
+)]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcopyq_lane_u64<const LANE1: i32, const LANE2: i32>(
@@ -4525,20 +4350,17 @@ pub fn vcopyq_lane_u64<const LANE1: i32, const LANE2: i32>(
 ) -> uint64x2_t {
     static_assert_uimm_bits!(LANE1, 1);
     static_assert!(LANE2 == 0);
-    let b: uint64x2_t = unsafe { simd_shuffle!(b, b, [0, 1]) };
-    unsafe {
-        match LANE1 & 0b1 {
-            0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]),
-            1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    let b: uint64x2_t = vcombine_u64(b, b);
+    vsetq_lane_u64::<LANE1>(vgetq_lane_u64::<LANE2>(b), a)
 }
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_p64)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 1, LANE2 = 0))]
+#[cfg_attr(
+    all(test, target_endian = "little"),
+    assert_instr(mov, LANE1 = 1, LANE2 = 0)
+)]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcopyq_lane_p64<const LANE1: i32, const LANE2: i32>(
@@ -4547,390 +4369,33 @@ pub fn vcopyq_lane_p64<const LANE1: i32, const LANE2: i32>(
 ) -> poly64x2_t {
     static_assert_uimm_bits!(LANE1, 1);
     static_assert!(LANE2 == 0);
-    let b: poly64x2_t = unsafe { simd_shuffle!(b, b, [0, 1]) };
-    unsafe {
-        match LANE1 & 0b1 {
-            0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]),
-            1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    let b: poly64x2_t = vcombine_p64(b, b);
+    unsafe { simd_insert!(a, LANE1 as u32, simd_extract!(b, LANE2 as u32, p64)) }
 }
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[cfg_attr(
+    all(test, target_endian = "little"),
+    assert_instr(mov, LANE1 = 0, LANE2 = 0)
+)]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcopyq_lane_s8<const LANE1: i32, const LANE2: i32>(a: int8x16_t, b: int8x8_t) -> int8x16_t {
     static_assert_uimm_bits!(LANE1, 4);
     static_assert_uimm_bits!(LANE2, 3);
-    let b: int8x16_t =
-        unsafe { simd_shuffle!(b, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) };
-    unsafe {
-        match LANE1 & 0b1111 {
-            0 => simd_shuffle!(
-                a,
-                b,
-                [
-                    16 + LANE2 as u32,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            1 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    16 + LANE2 as u32,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            2 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    16 + LANE2 as u32,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            3 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    16 + LANE2 as u32,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            4 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    16 + LANE2 as u32,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            5 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    16 + LANE2 as u32,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            6 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    16 + LANE2 as u32,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            7 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    16 + LANE2 as u32,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            8 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    16 + LANE2 as u32,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            9 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    16 + LANE2 as u32,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            10 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    16 + LANE2 as u32,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            11 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    16 + LANE2 as u32,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            12 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    16 + LANE2 as u32,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            13 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    16 + LANE2 as u32,
-                    14,
-                    15
-                ]
-            ),
-            14 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    16 + LANE2 as u32,
-                    15
-                ]
-            ),
-            15 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    16 + LANE2 as u32
-                ]
-            ),
-            _ => unreachable_unchecked(),
-        }
-    }
+    let b: int8x16_t = vcombine_s8(b, b);
+    vsetq_lane_s8::<LANE1>(vgetq_lane_s8::<LANE2>(b), a)
 }
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[cfg_attr(
+    all(test, target_endian = "little"),
+    assert_instr(mov, LANE1 = 0, LANE2 = 0)
+)]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcopyq_lane_s16<const LANE1: i32, const LANE2: i32>(
@@ -4939,26 +4404,17 @@ pub fn vcopyq_lane_s16<const LANE1: i32, const LANE2: i32>(
 ) -> int16x8_t {
     static_assert_uimm_bits!(LANE1, 3);
     static_assert_uimm_bits!(LANE2, 2);
-    let b: int16x8_t = unsafe { simd_shuffle!(b, b, [0, 1, 2, 3, 4, 5, 6, 7]) };
-    unsafe {
-        match LANE1 & 0b111 {
-            0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]),
-            1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]),
-            2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]),
-            3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]),
-            4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]),
-            5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]),
-            6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]),
-            7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    let b: int16x8_t = vcombine_s16(b, b);
+    vsetq_lane_s16::<LANE1>(vgetq_lane_s16::<LANE2>(b), a)
 }
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[cfg_attr(
+    all(test, target_endian = "little"),
+    assert_instr(mov, LANE1 = 0, LANE2 = 0)
+)]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcopyq_lane_s32<const LANE1: i32, const LANE2: i32>(
@@ -4967,22 +4423,17 @@ pub fn vcopyq_lane_s32<const LANE1: i32, const LANE2: i32>(
 ) -> int32x4_t {
     static_assert_uimm_bits!(LANE1, 2);
     static_assert_uimm_bits!(LANE2, 1);
-    let b: int32x4_t = unsafe { simd_shuffle!(b, b, [0, 1, 2, 3]) };
-    unsafe {
-        match LANE1 & 0b11 {
-            0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]),
-            1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]),
-            2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]),
-            3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    let b: int32x4_t = vcombine_s32(b, b);
+    vsetq_lane_s32::<LANE1>(vgetq_lane_s32::<LANE2>(b), a)
 }
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[cfg_attr(
+    all(test, target_endian = "little"),
+    assert_instr(mov, LANE1 = 0, LANE2 = 0)
+)]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcopyq_lane_u8<const LANE1: i32, const LANE2: i32>(
@@ -4991,371 +4442,17 @@ pub fn vcopyq_lane_u8<const LANE1: i32, const LANE2: i32>(
 ) -> uint8x16_t {
     static_assert_uimm_bits!(LANE1, 4);
     static_assert_uimm_bits!(LANE2, 3);
-    let b: uint8x16_t =
-        unsafe { simd_shuffle!(b, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) };
-    unsafe {
-        match LANE1 & 0b1111 {
-            0 => simd_shuffle!(
-                a,
-                b,
-                [
-                    16 + LANE2 as u32,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            1 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    16 + LANE2 as u32,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            2 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    16 + LANE2 as u32,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            3 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    16 + LANE2 as u32,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            4 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    16 + LANE2 as u32,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            5 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    16 + LANE2 as u32,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            6 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    16 + LANE2 as u32,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            7 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    16 + LANE2 as u32,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            8 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    16 + LANE2 as u32,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            9 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    16 + LANE2 as u32,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            10 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    16 + LANE2 as u32,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            11 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    16 + LANE2 as u32,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            12 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    16 + LANE2 as u32,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            13 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    16 + LANE2 as u32,
-                    14,
-                    15
-                ]
-            ),
-            14 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    16 + LANE2 as u32,
-                    15
-                ]
-            ),
-            15 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    16 + LANE2 as u32
-                ]
-            ),
-            _ => unreachable_unchecked(),
-        }
-    }
+    let b: uint8x16_t = vcombine_u8(b, b);
+    vsetq_lane_u8::<LANE1>(vgetq_lane_u8::<LANE2>(b), a)
 }
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[cfg_attr(
+    all(test, target_endian = "little"),
+    assert_instr(mov, LANE1 = 0, LANE2 = 0)
+)]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcopyq_lane_u16<const LANE1: i32, const LANE2: i32>(
@@ -5364,26 +4461,17 @@ pub fn vcopyq_lane_u16<const LANE1: i32, const LANE2: i32>(
 ) -> uint16x8_t {
     static_assert_uimm_bits!(LANE1, 3);
     static_assert_uimm_bits!(LANE2, 2);
-    let b: uint16x8_t = unsafe { simd_shuffle!(b, b, [0, 1, 2, 3, 4, 5, 6, 7]) };
-    unsafe {
-        match LANE1 & 0b111 {
-            0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]),
-            1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]),
-            2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]),
-            3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]),
-            4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]),
-            5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]),
-            6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]),
-            7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    let b: uint16x8_t = vcombine_u16(b, b);
+    vsetq_lane_u16::<LANE1>(vgetq_lane_u16::<LANE2>(b), a)
 }
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[cfg_attr(
+    all(test, target_endian = "little"),
+    assert_instr(mov, LANE1 = 0, LANE2 = 0)
+)]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcopyq_lane_u32<const LANE1: i32, const LANE2: i32>(
@@ -5392,22 +4480,17 @@ pub fn vcopyq_lane_u32<const LANE1: i32, const LANE2: i32>(
 ) -> uint32x4_t {
     static_assert_uimm_bits!(LANE1, 2);
     static_assert_uimm_bits!(LANE2, 1);
-    let b: uint32x4_t = unsafe { simd_shuffle!(b, b, [0, 1, 2, 3]) };
-    unsafe {
-        match LANE1 & 0b11 {
-            0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]),
-            1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]),
-            2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]),
-            3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    let b: uint32x4_t = vcombine_u32(b, b);
+    vsetq_lane_u32::<LANE1>(vgetq_lane_u32::<LANE2>(b), a)
 }
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_p8)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[cfg_attr(
+    all(test, target_endian = "little"),
+    assert_instr(mov, LANE1 = 0, LANE2 = 0)
+)]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcopyq_lane_p8<const LANE1: i32, const LANE2: i32>(
@@ -5416,371 +4499,17 @@ pub fn vcopyq_lane_p8<const LANE1: i32, const LANE2: i32>(
 ) -> poly8x16_t {
     static_assert_uimm_bits!(LANE1, 4);
     static_assert_uimm_bits!(LANE2, 3);
-    let b: poly8x16_t =
-        unsafe { simd_shuffle!(b, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) };
-    unsafe {
-        match LANE1 & 0b1111 {
-            0 => simd_shuffle!(
-                a,
-                b,
-                [
-                    16 + LANE2 as u32,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            1 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    16 + LANE2 as u32,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            2 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    16 + LANE2 as u32,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            3 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    16 + LANE2 as u32,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            4 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    16 + LANE2 as u32,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            5 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    16 + LANE2 as u32,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            6 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    16 + LANE2 as u32,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            7 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    16 + LANE2 as u32,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            8 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    16 + LANE2 as u32,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            9 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    16 + LANE2 as u32,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            10 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    16 + LANE2 as u32,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            11 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    16 + LANE2 as u32,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            12 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    16 + LANE2 as u32,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            13 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    16 + LANE2 as u32,
-                    14,
-                    15
-                ]
-            ),
-            14 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    16 + LANE2 as u32,
-                    15
-                ]
-            ),
-            15 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    16 + LANE2 as u32
-                ]
-            ),
-            _ => unreachable_unchecked(),
-        }
-    }
+    let b: poly8x16_t = vcombine_p8(b, b);
+    vsetq_lane_p8::<LANE1>(vgetq_lane_p8::<LANE2>(b), a)
 }
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_lane_p16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[cfg_attr(
+    all(test, target_endian = "little"),
+    assert_instr(mov, LANE1 = 0, LANE2 = 0)
+)]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcopyq_lane_p16<const LANE1: i32, const LANE2: i32>(
@@ -5789,26 +4518,17 @@ pub fn vcopyq_lane_p16<const LANE1: i32, const LANE2: i32>(
 ) -> poly16x8_t {
     static_assert_uimm_bits!(LANE1, 3);
     static_assert_uimm_bits!(LANE2, 2);
-    let b: poly16x8_t = unsafe { simd_shuffle!(b, b, [0, 1, 2, 3, 4, 5, 6, 7]) };
-    unsafe {
-        match LANE1 & 0b111 {
-            0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]),
-            1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]),
-            2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]),
-            3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]),
-            4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]),
-            5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]),
-            6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]),
-            7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    let b: poly16x8_t = vcombine_p16(b, b);
+    vsetq_lane_p16::<LANE1>(vgetq_lane_p16::<LANE2>(b), a)
 }
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[cfg_attr(
+    all(test, target_endian = "little"),
+    assert_instr(mov, LANE1 = 0, LANE2 = 0)
+)]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcopyq_laneq_f32<const LANE1: i32, const LANE2: i32>(
@@ -5817,21 +4537,16 @@ pub fn vcopyq_laneq_f32<const LANE1: i32, const LANE2: i32>(
 ) -> float32x4_t {
     static_assert_uimm_bits!(LANE1, 2);
     static_assert_uimm_bits!(LANE2, 2);
-    unsafe {
-        match LANE1 & 0b11 {
-            0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]),
-            1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]),
-            2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]),
-            3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    vsetq_lane_f32::<LANE1>(vgetq_lane_f32::<LANE2>(b), a)
 }
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_f64)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[cfg_attr(
+    all(test, target_endian = "little"),
+    assert_instr(mov, LANE1 = 0, LANE2 = 0)
+)]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcopyq_laneq_f64<const LANE1: i32, const LANE2: i32>(
@@ -5840,19 +4555,16 @@ pub fn vcopyq_laneq_f64<const LANE1: i32, const LANE2: i32>(
 ) -> float64x2_t {
     static_assert_uimm_bits!(LANE1, 1);
     static_assert_uimm_bits!(LANE2, 1);
-    unsafe {
-        match LANE1 & 0b1 {
-            0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]),
-            1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    vsetq_lane_f64::<LANE1>(vgetq_lane_f64::<LANE2>(b), a)
 }
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[cfg_attr(
+    all(test, target_endian = "little"),
+    assert_instr(mov, LANE1 = 0, LANE2 = 0)
+)]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcopyq_laneq_s8<const LANE1: i32, const LANE2: i32>(
@@ -5861,369 +4573,16 @@ pub fn vcopyq_laneq_s8<const LANE1: i32, const LANE2: i32>(
 ) -> int8x16_t {
     static_assert_uimm_bits!(LANE1, 4);
     static_assert_uimm_bits!(LANE2, 4);
-    unsafe {
-        match LANE1 & 0b1111 {
-            0 => simd_shuffle!(
-                a,
-                b,
-                [
-                    16 + LANE2 as u32,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            1 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    16 + LANE2 as u32,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            2 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    16 + LANE2 as u32,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            3 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    16 + LANE2 as u32,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            4 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    16 + LANE2 as u32,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            5 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    16 + LANE2 as u32,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            6 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    16 + LANE2 as u32,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            7 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    16 + LANE2 as u32,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            8 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    16 + LANE2 as u32,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            9 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    16 + LANE2 as u32,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            10 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    16 + LANE2 as u32,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            11 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    16 + LANE2 as u32,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            12 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    16 + LANE2 as u32,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            13 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    16 + LANE2 as u32,
-                    14,
-                    15
-                ]
-            ),
-            14 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    16 + LANE2 as u32,
-                    15
-                ]
-            ),
-            15 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    16 + LANE2 as u32
-                ]
-            ),
-            _ => unreachable_unchecked(),
-        }
-    }
+    vsetq_lane_s8::<LANE1>(vgetq_lane_s8::<LANE2>(b), a)
 }
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[cfg_attr(
+    all(test, target_endian = "little"),
+    assert_instr(mov, LANE1 = 0, LANE2 = 0)
+)]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcopyq_laneq_s16<const LANE1: i32, const LANE2: i32>(
@@ -6232,25 +4591,16 @@ pub fn vcopyq_laneq_s16<const LANE1: i32, const LANE2: i32>(
 ) -> int16x8_t {
     static_assert_uimm_bits!(LANE1, 3);
     static_assert_uimm_bits!(LANE2, 3);
-    unsafe {
-        match LANE1 & 0b111 {
-            0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]),
-            1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]),
-            2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]),
-            3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]),
-            4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]),
-            5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]),
-            6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]),
-            7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    vsetq_lane_s16::<LANE1>(vgetq_lane_s16::<LANE2>(b), a)
 }
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[cfg_attr(
+    all(test, target_endian = "little"),
+    assert_instr(mov, LANE1 = 0, LANE2 = 0)
+)]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcopyq_laneq_s32<const LANE1: i32, const LANE2: i32>(
@@ -6259,21 +4609,16 @@ pub fn vcopyq_laneq_s32<const LANE1: i32, const LANE2: i32>(
 ) -> int32x4_t {
     static_assert_uimm_bits!(LANE1, 2);
     static_assert_uimm_bits!(LANE2, 2);
-    unsafe {
-        match LANE1 & 0b11 {
-            0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]),
-            1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]),
-            2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]),
-            3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    vsetq_lane_s32::<LANE1>(vgetq_lane_s32::<LANE2>(b), a)
 }
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_s64)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[cfg_attr(
+    all(test, target_endian = "little"),
+    assert_instr(mov, LANE1 = 0, LANE2 = 0)
+)]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcopyq_laneq_s64<const LANE1: i32, const LANE2: i32>(
@@ -6282,19 +4627,16 @@ pub fn vcopyq_laneq_s64<const LANE1: i32, const LANE2: i32>(
 ) -> int64x2_t {
     static_assert_uimm_bits!(LANE1, 1);
     static_assert_uimm_bits!(LANE2, 1);
-    unsafe {
-        match LANE1 & 0b1 {
-            0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]),
-            1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    vsetq_lane_s64::<LANE1>(vgetq_lane_s64::<LANE2>(b), a)
 }
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[cfg_attr(
+    all(test, target_endian = "little"),
+    assert_instr(mov, LANE1 = 0, LANE2 = 0)
+)]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcopyq_laneq_u8<const LANE1: i32, const LANE2: i32>(
@@ -6303,369 +4645,16 @@ pub fn vcopyq_laneq_u8<const LANE1: i32, const LANE2: i32>(
 ) -> uint8x16_t {
     static_assert_uimm_bits!(LANE1, 4);
     static_assert_uimm_bits!(LANE2, 4);
-    unsafe {
-        match LANE1 & 0b1111 {
-            0 => simd_shuffle!(
-                a,
-                b,
-                [
-                    16 + LANE2 as u32,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            1 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    16 + LANE2 as u32,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            2 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    16 + LANE2 as u32,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            3 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    16 + LANE2 as u32,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            4 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    16 + LANE2 as u32,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            5 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    16 + LANE2 as u32,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            6 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    16 + LANE2 as u32,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            7 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    16 + LANE2 as u32,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            8 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    16 + LANE2 as u32,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            9 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    16 + LANE2 as u32,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            10 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    16 + LANE2 as u32,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            11 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    16 + LANE2 as u32,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            12 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    16 + LANE2 as u32,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            13 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    16 + LANE2 as u32,
-                    14,
-                    15
-                ]
-            ),
-            14 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    16 + LANE2 as u32,
-                    15
-                ]
-            ),
-            15 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    16 + LANE2 as u32
-                ]
-            ),
-            _ => unreachable_unchecked(),
-        }
-    }
+    vsetq_lane_u8::<LANE1>(vgetq_lane_u8::<LANE2>(b), a)
 }
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[cfg_attr(
+    all(test, target_endian = "little"),
+    assert_instr(mov, LANE1 = 0, LANE2 = 0)
+)]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcopyq_laneq_u16<const LANE1: i32, const LANE2: i32>(
@@ -6674,25 +4663,16 @@ pub fn vcopyq_laneq_u16<const LANE1: i32, const LANE2: i32>(
 ) -> uint16x8_t {
     static_assert_uimm_bits!(LANE1, 3);
     static_assert_uimm_bits!(LANE2, 3);
-    unsafe {
-        match LANE1 & 0b111 {
-            0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]),
-            1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]),
-            2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]),
-            3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]),
-            4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]),
-            5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]),
-            6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]),
-            7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    vsetq_lane_u16::<LANE1>(vgetq_lane_u16::<LANE2>(b), a)
 }
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[cfg_attr(
+    all(test, target_endian = "little"),
+    assert_instr(mov, LANE1 = 0, LANE2 = 0)
+)]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcopyq_laneq_u32<const LANE1: i32, const LANE2: i32>(
@@ -6701,21 +4681,16 @@ pub fn vcopyq_laneq_u32<const LANE1: i32, const LANE2: i32>(
 ) -> uint32x4_t {
     static_assert_uimm_bits!(LANE1, 2);
     static_assert_uimm_bits!(LANE2, 2);
-    unsafe {
-        match LANE1 & 0b11 {
-            0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]),
-            1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]),
-            2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]),
-            3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    vsetq_lane_u32::<LANE1>(vgetq_lane_u32::<LANE2>(b), a)
 }
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_u64)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[cfg_attr(
+    all(test, target_endian = "little"),
+    assert_instr(mov, LANE1 = 0, LANE2 = 0)
+)]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcopyq_laneq_u64<const LANE1: i32, const LANE2: i32>(
@@ -6724,19 +4699,16 @@ pub fn vcopyq_laneq_u64<const LANE1: i32, const LANE2: i32>(
 ) -> uint64x2_t {
     static_assert_uimm_bits!(LANE1, 1);
     static_assert_uimm_bits!(LANE2, 1);
-    unsafe {
-        match LANE1 & 0b1 {
-            0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]),
-            1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    vsetq_lane_u64::<LANE1>(vgetq_lane_u64::<LANE2>(b), a)
 }
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_p8)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[cfg_attr(
+    all(test, target_endian = "little"),
+    assert_instr(mov, LANE1 = 0, LANE2 = 0)
+)]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcopyq_laneq_p8<const LANE1: i32, const LANE2: i32>(
@@ -6745,369 +4717,16 @@ pub fn vcopyq_laneq_p8<const LANE1: i32, const LANE2: i32>(
 ) -> poly8x16_t {
     static_assert_uimm_bits!(LANE1, 4);
     static_assert_uimm_bits!(LANE2, 4);
-    unsafe {
-        match LANE1 & 0b1111 {
-            0 => simd_shuffle!(
-                a,
-                b,
-                [
-                    16 + LANE2 as u32,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            1 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    16 + LANE2 as u32,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            2 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    16 + LANE2 as u32,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            3 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    16 + LANE2 as u32,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            4 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    16 + LANE2 as u32,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            5 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    16 + LANE2 as u32,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            6 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    16 + LANE2 as u32,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            7 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    16 + LANE2 as u32,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            8 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    16 + LANE2 as u32,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            9 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    16 + LANE2 as u32,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            10 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    16 + LANE2 as u32,
-                    11,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            11 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    16 + LANE2 as u32,
-                    12,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            12 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    16 + LANE2 as u32,
-                    13,
-                    14,
-                    15
-                ]
-            ),
-            13 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    16 + LANE2 as u32,
-                    14,
-                    15
-                ]
-            ),
-            14 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    16 + LANE2 as u32,
-                    15
-                ]
-            ),
-            15 => simd_shuffle!(
-                a,
-                b,
-                [
-                    0,
-                    1,
-                    2,
-                    3,
-                    4,
-                    5,
-                    6,
-                    7,
-                    8,
-                    9,
-                    10,
-                    11,
-                    12,
-                    13,
-                    14,
-                    16 + LANE2 as u32
-                ]
-            ),
-            _ => unreachable_unchecked(),
-        }
-    }
+    vsetq_lane_p8::<LANE1>(vgetq_lane_p8::<LANE2>(b), a)
 }
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_p16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[cfg_attr(
+    all(test, target_endian = "little"),
+    assert_instr(mov, LANE1 = 0, LANE2 = 0)
+)]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcopyq_laneq_p16<const LANE1: i32, const LANE2: i32>(
@@ -7116,25 +4735,16 @@ pub fn vcopyq_laneq_p16<const LANE1: i32, const LANE2: i32>(
 ) -> poly16x8_t {
     static_assert_uimm_bits!(LANE1, 3);
     static_assert_uimm_bits!(LANE2, 3);
-    unsafe {
-        match LANE1 & 0b111 {
-            0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]),
-            1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]),
-            2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]),
-            3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]),
-            4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]),
-            5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]),
-            6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]),
-            7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    vsetq_lane_p16::<LANE1>(vgetq_lane_p16::<LANE2>(b), a)
 }
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcopyq_laneq_p64)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov, LANE1 = 0, LANE2 = 1))]
+#[cfg_attr(
+    all(test, target_endian = "little"),
+    assert_instr(mov, LANE1 = 0, LANE2 = 0)
+)]
 #[rustc_legacy_const_generics(1, 3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcopyq_laneq_p64<const LANE1: i32, const LANE2: i32>(
@@ -7143,13 +4753,7 @@ pub fn vcopyq_laneq_p64<const LANE1: i32, const LANE2: i32>(
 ) -> poly64x2_t {
     static_assert_uimm_bits!(LANE1, 1);
     static_assert_uimm_bits!(LANE2, 1);
-    unsafe {
-        match LANE1 & 0b1 {
-            0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]),
-            1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    unsafe { simd_insert!(a, LANE1 as u32, simd_extract!(b, LANE2 as u32, p64)) }
 }
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_f64)"]
@@ -7164,7 +4768,7 @@ pub fn vcreate_f64(a: u64) -> float64x1_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_f32_f64)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtn))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(fcvtn))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcvt_f32_f64(a: float64x2_t) -> float32x2_t {
     unsafe { simd_cast(a) }
@@ -7217,9 +4821,9 @@ pub fn vcvtq_f64_u64(a: uint64x2_t) -> float64x2_t {
 #[doc = "Floating-point convert to lower precision"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_high_f16_f32)"]
 #[inline]
-#[cfg_attr(test, assert_instr(fcvtn2))]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(fcvtn2))]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvt_high_f16_f32(a: float16x4_t, b: float32x4_t) -> float16x8_t {
     vcombine_f16(a, vcvt_f16_f32(b))
@@ -7227,9 +4831,9 @@ pub fn vcvt_high_f16_f32(a: float16x4_t, b: float32x4_t) -> float16x8_t {
 #[doc = "Floating-point convert to higher precision"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_high_f32_f16)"]
 #[inline]
-#[cfg_attr(test, assert_instr(fcvtl2))]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(fcvtl2))]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvt_high_f32_f16(a: float16x8_t) -> float32x4_t {
     vcvt_f32_f16(vget_high_f16(a))
@@ -7238,22 +4842,19 @@ pub fn vcvt_high_f32_f16(a: float16x8_t) -> float32x4_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_high_f32_f64)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtn2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(fcvtn2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcvt_high_f32_f64(a: float32x2_t, b: float64x2_t) -> float32x4_t {
-    unsafe { simd_shuffle!(a, simd_cast(b), [0, 1, 2, 3]) }
+    vcombine_f32(a, vcvt_f32_f64(b))
 }
 #[doc = "Floating-point convert to higher precision long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_high_f64_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtl2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(fcvtl2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcvt_high_f64_f32(a: float32x4_t) -> float64x2_t {
-    unsafe {
-        let b: float32x2_t = simd_shuffle!(a, a, [2, 3]);
-        simd_cast(b)
-    }
+    unsafe { simd_cast(vget_high_f32(a)) }
 }
 #[doc = "Fixed-point convert to floating-point"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_n_f64_s64)"]
@@ -7468,7 +5069,7 @@ pub fn vcvtq_u64_f64(a: float64x2_t) -> uint64x2_t {
 #[inline]
 #[cfg_attr(test, assert_instr(fcvtas))]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvta_s16_f16(a: float16x4_t) -> int16x4_t {
     unsafe extern "unadjusted" {
@@ -7485,7 +5086,7 @@ pub fn vcvta_s16_f16(a: float16x4_t) -> int16x4_t {
 #[inline]
 #[cfg_attr(test, assert_instr(fcvtas))]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvtaq_s16_f16(a: float16x8_t) -> int16x8_t {
     unsafe extern "unadjusted" {
@@ -7566,7 +5167,7 @@ pub fn vcvtaq_s64_f64(a: float64x2_t) -> int64x2_t {
 #[inline]
 #[cfg_attr(test, assert_instr(fcvtau))]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvta_u16_f16(a: float16x4_t) -> uint16x4_t {
     unsafe extern "unadjusted" {
@@ -7583,7 +5184,7 @@ pub fn vcvta_u16_f16(a: float16x4_t) -> uint16x4_t {
 #[inline]
 #[cfg_attr(test, assert_instr(fcvtau))]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvtaq_u16_f16(a: float16x8_t) -> uint16x8_t {
     unsafe extern "unadjusted" {
@@ -7667,7 +5268,14 @@ pub fn vcvtaq_u64_f64(a: float64x2_t) -> uint64x2_t {
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvtah_s16_f16(a: f16) -> i16 {
-    vcvtah_s32_f16(a) as i16
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtas.i16.f16"
+        )]
+        fn _vcvtah_s16_f16(a: f16) -> i16;
+    }
+    unsafe { _vcvtah_s16_f16(a) }
 }
 #[doc = "Floating-point convert to integer, rounding to nearest with ties to away"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtah_s32_f16)"]
@@ -7711,7 +5319,14 @@ pub fn vcvtah_s64_f16(a: f16) -> i64 {
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvtah_u16_f16(a: f16) -> u16 {
-    vcvtah_u32_f16(a) as u16
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtau.i16.f16"
+        )]
+        fn _vcvtah_u16_f16(a: f16) -> u16;
+    }
+    unsafe { _vcvtah_u16_f16(a) }
 }
 #[doc = "Floating-point convert to integer, rounding to nearest with ties to away"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtah_u32_f16)"]
@@ -8154,7 +5769,7 @@ pub fn vcvth_u64_f16(a: f16) -> u64 {
 #[inline]
 #[cfg_attr(test, assert_instr(fcvtms))]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvtm_s16_f16(a: float16x4_t) -> int16x4_t {
     unsafe extern "unadjusted" {
@@ -8171,7 +5786,7 @@ pub fn vcvtm_s16_f16(a: float16x4_t) -> int16x4_t {
 #[inline]
 #[cfg_attr(test, assert_instr(fcvtms))]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvtmq_s16_f16(a: float16x8_t) -> int16x8_t {
     unsafe extern "unadjusted" {
@@ -8252,7 +5867,7 @@ pub fn vcvtmq_s64_f64(a: float64x2_t) -> int64x2_t {
 #[inline]
 #[cfg_attr(test, assert_instr(fcvtmu))]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvtm_u16_f16(a: float16x4_t) -> uint16x4_t {
     unsafe extern "unadjusted" {
@@ -8269,7 +5884,7 @@ pub fn vcvtm_u16_f16(a: float16x4_t) -> uint16x4_t {
 #[inline]
 #[cfg_attr(test, assert_instr(fcvtmu))]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvtmq_u16_f16(a: float16x8_t) -> uint16x8_t {
     unsafe extern "unadjusted" {
@@ -8353,7 +5968,14 @@ pub fn vcvtmq_u64_f64(a: float64x2_t) -> uint64x2_t {
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvtmh_s16_f16(a: f16) -> i16 {
-    vcvtmh_s32_f16(a) as i16
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtms.i16.f16"
+        )]
+        fn _vcvtmh_s16_f16(a: f16) -> i16;
+    }
+    unsafe { _vcvtmh_s16_f16(a) }
 }
 #[doc = "Floating-point convert to integer, rounding towards minus infinity"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtmh_s32_f16)"]
@@ -8389,7 +6011,7 @@ pub fn vcvtmh_s64_f16(a: f16) -> i64 {
     }
     unsafe { _vcvtmh_s64_f16(a) }
 }
-#[doc = "Floating-point convert to integer, rounding towards minus infinity"]
+#[doc = "Floating-point convert to unsigned integer, rounding towards minus infinity"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtmh_u16_f16)"]
 #[inline]
 #[cfg_attr(test, assert_instr(fcvtmu))]
@@ -8397,7 +6019,14 @@ pub fn vcvtmh_s64_f16(a: f16) -> i64 {
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvtmh_u16_f16(a: f16) -> u16 {
-    vcvtmh_u32_f16(a) as u16
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtmu.i16.f16"
+        )]
+        fn _vcvtmh_u16_f16(a: f16) -> u16;
+    }
+    unsafe { _vcvtmh_u16_f16(a) }
 }
 #[doc = "Floating-point convert to unsigned integer, rounding towards minus infinity"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtmh_u32_f16)"]
@@ -8502,7 +6131,7 @@ pub fn vcvtmd_u64_f64(a: f64) -> u64 {
 #[inline]
 #[cfg_attr(test, assert_instr(fcvtns))]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvtn_s16_f16(a: float16x4_t) -> int16x4_t {
     unsafe extern "unadjusted" {
@@ -8519,7 +6148,7 @@ pub fn vcvtn_s16_f16(a: float16x4_t) -> int16x4_t {
 #[inline]
 #[cfg_attr(test, assert_instr(fcvtns))]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvtnq_s16_f16(a: float16x8_t) -> int16x8_t {
     unsafe extern "unadjusted" {
@@ -8600,7 +6229,7 @@ pub fn vcvtnq_s64_f64(a: float64x2_t) -> int64x2_t {
 #[inline]
 #[cfg_attr(test, assert_instr(fcvtnu))]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvtn_u16_f16(a: float16x4_t) -> uint16x4_t {
     unsafe extern "unadjusted" {
@@ -8617,7 +6246,7 @@ pub fn vcvtn_u16_f16(a: float16x4_t) -> uint16x4_t {
 #[inline]
 #[cfg_attr(test, assert_instr(fcvtnu))]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvtnq_u16_f16(a: float16x8_t) -> uint16x8_t {
     unsafe extern "unadjusted" {
@@ -8701,7 +6330,14 @@ pub fn vcvtnq_u64_f64(a: float64x2_t) -> uint64x2_t {
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvtnh_s16_f16(a: f16) -> i16 {
-    vcvtnh_s32_f16(a) as i16
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtns.i16.f16"
+        )]
+        fn _vcvtnh_s16_f16(a: f16) -> i16;
+    }
+    unsafe { _vcvtnh_s16_f16(a) }
 }
 #[doc = "Floating-point convert to integer, rounding to nearest with ties to even"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtnh_s32_f16)"]
@@ -8745,7 +6381,14 @@ pub fn vcvtnh_s64_f16(a: f16) -> i64 {
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvtnh_u16_f16(a: f16) -> u16 {
-    vcvtnh_u32_f16(a) as u16
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtnu.i16.f16"
+        )]
+        fn _vcvtnh_u16_f16(a: f16) -> u16;
+    }
+    unsafe { _vcvtnh_u16_f16(a) }
 }
 #[doc = "Floating-point convert to unsigned integer, rounding to nearest with ties to even"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtnh_u32_f16)"]
@@ -8850,7 +6493,7 @@ pub fn vcvtnd_u64_f64(a: f64) -> u64 {
 #[inline]
 #[cfg_attr(test, assert_instr(fcvtps))]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvtp_s16_f16(a: float16x4_t) -> int16x4_t {
     unsafe extern "unadjusted" {
@@ -8867,7 +6510,7 @@ pub fn vcvtp_s16_f16(a: float16x4_t) -> int16x4_t {
 #[inline]
 #[cfg_attr(test, assert_instr(fcvtps))]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvtpq_s16_f16(a: float16x8_t) -> int16x8_t {
     unsafe extern "unadjusted" {
@@ -8948,7 +6591,7 @@ pub fn vcvtpq_s64_f64(a: float64x2_t) -> int64x2_t {
 #[inline]
 #[cfg_attr(test, assert_instr(fcvtpu))]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvtp_u16_f16(a: float16x4_t) -> uint16x4_t {
     unsafe extern "unadjusted" {
@@ -8965,7 +6608,7 @@ pub fn vcvtp_u16_f16(a: float16x4_t) -> uint16x4_t {
 #[inline]
 #[cfg_attr(test, assert_instr(fcvtpu))]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvtpq_u16_f16(a: float16x8_t) -> uint16x8_t {
     unsafe extern "unadjusted" {
@@ -9049,7 +6692,14 @@ pub fn vcvtpq_u64_f64(a: float64x2_t) -> uint64x2_t {
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvtph_s16_f16(a: f16) -> i16 {
-    vcvtph_s32_f16(a) as i16
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtps.i16.f16"
+        )]
+        fn _vcvtph_s16_f16(a: f16) -> i16;
+    }
+    unsafe { _vcvtph_s16_f16(a) }
 }
 #[doc = "Floating-point convert to integer, rounding to plus infinity"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtph_s32_f16)"]
@@ -9093,7 +6743,14 @@ pub fn vcvtph_s64_f16(a: f16) -> i64 {
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvtph_u16_f16(a: f16) -> u16 {
-    vcvtph_u32_f16(a) as u16
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fcvtpu.i16.f16"
+        )]
+        fn _vcvtph_u16_f16(a: f16) -> u16;
+    }
+    unsafe { _vcvtph_u16_f16(a) }
 }
 #[doc = "Floating-point convert to unsigned integer, rounding to plus infinity"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtph_u32_f16)"]
@@ -9395,7 +7052,7 @@ pub fn vcvtd_u64_f64(a: f64) -> u64 {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtx_f32_f64)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtxn))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(fcvtxn))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcvtx_f32_f64(a: float64x2_t) -> float32x2_t {
     unsafe extern "unadjusted" {
@@ -9411,10 +7068,10 @@ pub fn vcvtx_f32_f64(a: float64x2_t) -> float32x2_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtx_high_f32_f64)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtxn2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(fcvtxn2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcvtx_high_f32_f64(a: float32x2_t, b: float64x2_t) -> float32x4_t {
-    unsafe { simd_shuffle!(a, vcvtx_f32_f64(b), [0, 1, 2, 3]) }
+    vcombine_f32(a, vcvtx_f32_f64(b))
 }
 #[doc = "Floating-point convert to lower precision narrow, rounding to odd"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvtxd_f32_f64)"]
@@ -9423,13 +7080,13 @@ pub fn vcvtx_high_f32_f64(a: float32x2_t, b: float64x2_t) -> float32x4_t {
 #[cfg_attr(test, assert_instr(fcvtxn))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vcvtxd_f32_f64(a: f64) -> f32 {
-    unsafe { simd_extract!(vcvtx_f32_f64(vdupq_n_f64(a)), 0) }
+    vget_lane_f32::<0>(vcvtx_f32_f64(vdupq_n_f64(a)))
 }
 #[doc = "Divide"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdiv_f16)"]
 #[inline]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fdiv))]
 pub fn vdiv_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
@@ -9439,7 +7096,7 @@ pub fn vdiv_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdivq_f16)"]
 #[inline]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fdiv))]
 pub fn vdivq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
@@ -9487,72 +7144,10 @@ pub fn vdivq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
-#[cfg_attr(test, assert_instr(nop))]
+#[cfg_attr(test, assert_instr(fdiv))]
 pub fn vdivh_f16(a: f16, b: f16) -> f16 {
     a / b
 }
-#[doc = "Dot product arithmetic (indexed)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_laneq_s32)"]
-#[inline]
-#[target_feature(enable = "neon,dotprod")]
-#[cfg_attr(test, assert_instr(sdot, LANE = 0))]
-#[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_neon_dotprod", issue = "117224")]
-pub fn vdot_laneq_s32<const LANE: i32>(a: int32x2_t, b: int8x8_t, c: int8x16_t) -> int32x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        let c: int32x4_t = transmute(c);
-        let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
-        vdot_s32(a, b, transmute(c))
-    }
-}
-#[doc = "Dot product arithmetic (indexed)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdotq_laneq_s32)"]
-#[inline]
-#[target_feature(enable = "neon,dotprod")]
-#[cfg_attr(test, assert_instr(sdot, LANE = 0))]
-#[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_neon_dotprod", issue = "117224")]
-pub fn vdotq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int8x16_t, c: int8x16_t) -> int32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        let c: int32x4_t = transmute(c);
-        let c: int32x4_t =
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
-        vdotq_s32(a, b, transmute(c))
-    }
-}
-#[doc = "Dot product arithmetic (indexed)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_laneq_u32)"]
-#[inline]
-#[target_feature(enable = "neon,dotprod")]
-#[cfg_attr(test, assert_instr(udot, LANE = 0))]
-#[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_neon_dotprod", issue = "117224")]
-pub fn vdot_laneq_u32<const LANE: i32>(a: uint32x2_t, b: uint8x8_t, c: uint8x16_t) -> uint32x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        let c: uint32x4_t = transmute(c);
-        let c: uint32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
-        vdot_u32(a, b, transmute(c))
-    }
-}
-#[doc = "Dot product arithmetic (indexed)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdotq_laneq_u32)"]
-#[inline]
-#[target_feature(enable = "neon,dotprod")]
-#[cfg_attr(test, assert_instr(udot, LANE = 0))]
-#[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_neon_dotprod", issue = "117224")]
-pub fn vdotq_laneq_u32<const LANE: i32>(a: uint32x4_t, b: uint8x16_t, c: uint8x16_t) -> uint32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        let c: uint32x4_t = transmute(c);
-        let c: uint32x4_t =
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
-        vdotq_u32(a, b, transmute(c))
-    }
-}
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_f64)"]
 #[inline]
@@ -9584,7 +7179,7 @@ pub fn vdup_lane_p64<const N: i32>(a: poly64x1_t) -> poly64x1_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vdup_laneq_f64<const N: i32>(a: float64x2_t) -> float64x1_t {
     static_assert_uimm_bits!(N, 1);
-    unsafe { transmute::<f64, _>(simd_extract!(a, N as u32)) }
+    unsafe { transmute(vgetq_lane_f64::<N>(a)) }
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_p64)"]
@@ -9595,7 +7190,7 @@ pub fn vdup_laneq_f64<const N: i32>(a: float64x2_t) -> float64x1_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vdup_laneq_p64<const N: i32>(a: poly64x2_t) -> poly64x1_t {
     static_assert_uimm_bits!(N, 1);
-    unsafe { transmute::<u64, _>(simd_extract!(a, N as u32)) }
+    unsafe { transmute(vgetq_lane_p64::<N>(a)) }
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupb_lane_s8)"]
@@ -9606,7 +7201,7 @@ pub fn vdup_laneq_p64<const N: i32>(a: poly64x2_t) -> poly64x1_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vdupb_lane_s8<const N: i32>(a: int8x8_t) -> i8 {
     static_assert_uimm_bits!(N, 3);
-    unsafe { simd_extract!(a, N as u32) }
+    vget_lane_s8::<N>(a)
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vduph_laneq_s16)"]
@@ -9617,7 +7212,7 @@ pub fn vdupb_lane_s8<const N: i32>(a: int8x8_t) -> i8 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vduph_laneq_s16<const N: i32>(a: int16x8_t) -> i16 {
     static_assert_uimm_bits!(N, 3);
-    unsafe { simd_extract!(a, N as u32) }
+    vgetq_lane_s16::<N>(a)
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupb_lane_u8)"]
@@ -9628,7 +7223,7 @@ pub fn vduph_laneq_s16<const N: i32>(a: int16x8_t) -> i16 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vdupb_lane_u8<const N: i32>(a: uint8x8_t) -> u8 {
     static_assert_uimm_bits!(N, 3);
-    unsafe { simd_extract!(a, N as u32) }
+    vget_lane_u8::<N>(a)
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vduph_laneq_u16)"]
@@ -9639,7 +7234,7 @@ pub fn vdupb_lane_u8<const N: i32>(a: uint8x8_t) -> u8 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vduph_laneq_u16<const N: i32>(a: uint16x8_t) -> u16 {
     static_assert_uimm_bits!(N, 3);
-    unsafe { simd_extract!(a, N as u32) }
+    vgetq_lane_u16::<N>(a)
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupb_lane_p8)"]
@@ -9650,7 +7245,7 @@ pub fn vduph_laneq_u16<const N: i32>(a: uint16x8_t) -> u16 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vdupb_lane_p8<const N: i32>(a: poly8x8_t) -> p8 {
     static_assert_uimm_bits!(N, 3);
-    unsafe { simd_extract!(a, N as u32) }
+    vget_lane_p8::<N>(a)
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vduph_laneq_p16)"]
@@ -9661,7 +7256,7 @@ pub fn vdupb_lane_p8<const N: i32>(a: poly8x8_t) -> p8 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vduph_laneq_p16<const N: i32>(a: poly16x8_t) -> p16 {
     static_assert_uimm_bits!(N, 3);
-    unsafe { simd_extract!(a, N as u32) }
+    vgetq_lane_p16::<N>(a)
 }
 #[doc = "Extract an element from a vector"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupb_laneq_s8)"]
@@ -9672,7 +7267,7 @@ pub fn vduph_laneq_p16<const N: i32>(a: poly16x8_t) -> p16 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vdupb_laneq_s8<const N: i32>(a: int8x16_t) -> i8 {
     static_assert_uimm_bits!(N, 4);
-    unsafe { simd_extract!(a, N as u32) }
+    vgetq_lane_s8::<N>(a)
 }
 #[doc = "Extract an element from a vector"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupb_laneq_u8)"]
@@ -9683,7 +7278,7 @@ pub fn vdupb_laneq_s8<const N: i32>(a: int8x16_t) -> i8 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vdupb_laneq_u8<const N: i32>(a: uint8x16_t) -> u8 {
     static_assert_uimm_bits!(N, 4);
-    unsafe { simd_extract!(a, N as u32) }
+    vgetq_lane_u8::<N>(a)
 }
 #[doc = "Extract an element from a vector"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupb_laneq_p8)"]
@@ -9694,7 +7289,7 @@ pub fn vdupb_laneq_u8<const N: i32>(a: uint8x16_t) -> u8 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vdupb_laneq_p8<const N: i32>(a: poly8x16_t) -> p8 {
     static_assert_uimm_bits!(N, 4);
-    unsafe { simd_extract!(a, N as u32) }
+    vgetq_lane_p8::<N>(a)
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupd_lane_f64)"]
@@ -9705,7 +7300,7 @@ pub fn vdupb_laneq_p8<const N: i32>(a: poly8x16_t) -> p8 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vdupd_lane_f64<const N: i32>(a: float64x1_t) -> f64 {
     static_assert!(N == 0);
-    unsafe { simd_extract!(a, N as u32) }
+    vget_lane_f64::<N>(a)
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupd_lane_s64)"]
@@ -9716,7 +7311,7 @@ pub fn vdupd_lane_f64<const N: i32>(a: float64x1_t) -> f64 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vdupd_lane_s64<const N: i32>(a: int64x1_t) -> i64 {
     static_assert!(N == 0);
-    unsafe { simd_extract!(a, N as u32) }
+    vget_lane_s64::<N>(a)
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupd_lane_u64)"]
@@ -9727,7 +7322,7 @@ pub fn vdupd_lane_s64<const N: i32>(a: int64x1_t) -> i64 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vdupd_lane_u64<const N: i32>(a: uint64x1_t) -> u64 {
     static_assert!(N == 0);
-    unsafe { simd_extract!(a, N as u32) }
+    vget_lane_u64::<N>(a)
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vduph_lane_f16)"]
@@ -9739,7 +7334,7 @@ pub fn vdupd_lane_u64<const N: i32>(a: uint64x1_t) -> u64 {
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vduph_lane_f16<const N: i32>(a: float16x4_t) -> f16 {
     static_assert_uimm_bits!(N, 2);
-    unsafe { simd_extract!(a, N as u32) }
+    vget_lane_f16::<N>(a)
 }
 #[doc = "Extract an element from a vector"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vduph_laneq_f16)"]
@@ -9751,7 +7346,7 @@ pub fn vduph_lane_f16<const N: i32>(a: float16x4_t) -> f16 {
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vduph_laneq_f16<const N: i32>(a: float16x8_t) -> f16 {
     static_assert_uimm_bits!(N, 4);
-    unsafe { simd_extract!(a, N as u32) }
+    vgetq_lane_f16::<N>(a)
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_f64)"]
@@ -9806,7 +7401,7 @@ pub fn vdupq_laneq_p64<const N: i32>(a: poly64x2_t) -> poly64x2_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vdups_lane_f32<const N: i32>(a: float32x2_t) -> f32 {
     static_assert_uimm_bits!(N, 1);
-    unsafe { simd_extract!(a, N as u32) }
+    vget_lane_f32::<N>(a)
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupd_laneq_f64)"]
@@ -9817,7 +7412,7 @@ pub fn vdups_lane_f32<const N: i32>(a: float32x2_t) -> f32 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vdupd_laneq_f64<const N: i32>(a: float64x2_t) -> f64 {
     static_assert_uimm_bits!(N, 1);
-    unsafe { simd_extract!(a, N as u32) }
+    vgetq_lane_f64::<N>(a)
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdups_lane_s32)"]
@@ -9828,7 +7423,7 @@ pub fn vdupd_laneq_f64<const N: i32>(a: float64x2_t) -> f64 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vdups_lane_s32<const N: i32>(a: int32x2_t) -> i32 {
     static_assert_uimm_bits!(N, 1);
-    unsafe { simd_extract!(a, N as u32) }
+    vget_lane_s32::<N>(a)
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupd_laneq_s64)"]
@@ -9839,7 +7434,7 @@ pub fn vdups_lane_s32<const N: i32>(a: int32x2_t) -> i32 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vdupd_laneq_s64<const N: i32>(a: int64x2_t) -> i64 {
     static_assert_uimm_bits!(N, 1);
-    unsafe { simd_extract!(a, N as u32) }
+    vgetq_lane_s64::<N>(a)
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdups_lane_u32)"]
@@ -9850,7 +7445,7 @@ pub fn vdupd_laneq_s64<const N: i32>(a: int64x2_t) -> i64 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vdups_lane_u32<const N: i32>(a: uint32x2_t) -> u32 {
     static_assert_uimm_bits!(N, 1);
-    unsafe { simd_extract!(a, N as u32) }
+    vget_lane_u32::<N>(a)
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupd_laneq_u64)"]
@@ -9861,7 +7456,7 @@ pub fn vdups_lane_u32<const N: i32>(a: uint32x2_t) -> u32 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vdupd_laneq_u64<const N: i32>(a: uint64x2_t) -> u64 {
     static_assert_uimm_bits!(N, 1);
-    unsafe { simd_extract!(a, N as u32) }
+    vgetq_lane_u64::<N>(a)
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdups_laneq_f32)"]
@@ -9872,7 +7467,7 @@ pub fn vdupd_laneq_u64<const N: i32>(a: uint64x2_t) -> u64 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vdups_laneq_f32<const N: i32>(a: float32x4_t) -> f32 {
     static_assert_uimm_bits!(N, 2);
-    unsafe { simd_extract!(a, N as u32) }
+    vgetq_lane_f32::<N>(a)
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vduph_lane_s16)"]
@@ -9883,7 +7478,7 @@ pub fn vdups_laneq_f32<const N: i32>(a: float32x4_t) -> f32 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vduph_lane_s16<const N: i32>(a: int16x4_t) -> i16 {
     static_assert_uimm_bits!(N, 2);
-    unsafe { simd_extract!(a, N as u32) }
+    vget_lane_s16::<N>(a)
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdups_laneq_s32)"]
@@ -9894,7 +7489,7 @@ pub fn vduph_lane_s16<const N: i32>(a: int16x4_t) -> i16 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vdups_laneq_s32<const N: i32>(a: int32x4_t) -> i32 {
     static_assert_uimm_bits!(N, 2);
-    unsafe { simd_extract!(a, N as u32) }
+    vgetq_lane_s32::<N>(a)
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vduph_lane_u16)"]
@@ -9905,7 +7500,7 @@ pub fn vdups_laneq_s32<const N: i32>(a: int32x4_t) -> i32 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vduph_lane_u16<const N: i32>(a: uint16x4_t) -> u16 {
     static_assert_uimm_bits!(N, 2);
-    unsafe { simd_extract!(a, N as u32) }
+    vget_lane_u16::<N>(a)
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdups_laneq_u32)"]
@@ -9916,7 +7511,7 @@ pub fn vduph_lane_u16<const N: i32>(a: uint16x4_t) -> u16 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vdups_laneq_u32<const N: i32>(a: uint32x4_t) -> u32 {
     static_assert_uimm_bits!(N, 2);
-    unsafe { simd_extract!(a, N as u32) }
+    vgetq_lane_u32::<N>(a)
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vduph_lane_p16)"]
@@ -9927,7 +7522,7 @@ pub fn vdups_laneq_u32<const N: i32>(a: uint32x4_t) -> u32 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vduph_lane_p16<const N: i32>(a: poly16x4_t) -> p16 {
     static_assert_uimm_bits!(N, 2);
-    unsafe { simd_extract!(a, N as u32) }
+    vget_lane_p16::<N>(a)
 }
 #[doc = "Three-way exclusive OR"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/veor3q_s8)"]
@@ -10066,13 +7661,7 @@ pub fn veor3q_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vextq_f64<const N: i32>(a: float64x2_t, b: float64x2_t) -> float64x2_t {
     static_assert_uimm_bits!(N, 1);
-    unsafe {
-        match N & 0b1 {
-            0 => simd_shuffle!(a, b, [0, 1]),
-            1 => simd_shuffle!(a, b, [1, 2]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    unsafe { simd_shuffle!(a, b, [N as u32, N as u32 + 1]) }
 }
 #[doc = "Extract vector from pair of vectors"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_p64)"]
@@ -10083,13 +7672,7 @@ pub fn vextq_f64<const N: i32>(a: float64x2_t, b: float64x2_t) -> float64x2_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vextq_p64<const N: i32>(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
     static_assert_uimm_bits!(N, 1);
-    unsafe {
-        match N & 0b1 {
-            0 => simd_shuffle!(a, b, [0, 1]),
-            1 => simd_shuffle!(a, b, [1, 2]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    unsafe { simd_shuffle!(a, b, [N as u32, N as u32 + 1]) }
 }
 #[doc = "Floating-point fused Multiply-Add to accumulator(vector)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfma_f64)"]
@@ -10106,7 +7689,7 @@ pub fn vfma_f64(a: float64x1_t, b: float64x1_t, c: float64x1_t) -> float64x1_t {
 #[cfg_attr(test, assert_instr(fmla, LANE = 0))]
 #[rustc_legacy_const_generics(3)]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vfma_lane_f16<const LANE: i32>(
     a: float16x4_t,
@@ -10114,7 +7697,7 @@ pub fn vfma_lane_f16<const LANE: i32>(
     c: float16x4_t,
 ) -> float16x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { vfma_f16(a, b, vdup_n_f16(simd_extract!(c, LANE as u32))) }
+    vfma_f16(a, b, vdup_n_f16(vget_lane_f16::<LANE>(c)))
 }
 #[doc = "Floating-point fused multiply-add to accumulator"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfma_laneq_f16)"]
@@ -10122,7 +7705,7 @@ pub fn vfma_lane_f16<const LANE: i32>(
 #[cfg_attr(test, assert_instr(fmla, LANE = 0))]
 #[rustc_legacy_const_generics(3)]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vfma_laneq_f16<const LANE: i32>(
     a: float16x4_t,
@@ -10130,7 +7713,7 @@ pub fn vfma_laneq_f16<const LANE: i32>(
     c: float16x8_t,
 ) -> float16x4_t {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe { vfma_f16(a, b, vdup_n_f16(simd_extract!(c, LANE as u32))) }
+    vfma_f16(a, b, vdup_n_f16(vgetq_lane_f16::<LANE>(c)))
 }
 #[doc = "Floating-point fused multiply-add to accumulator"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmaq_lane_f16)"]
@@ -10138,7 +7721,7 @@ pub fn vfma_laneq_f16<const LANE: i32>(
 #[cfg_attr(test, assert_instr(fmla, LANE = 0))]
 #[rustc_legacy_const_generics(3)]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vfmaq_lane_f16<const LANE: i32>(
     a: float16x8_t,
@@ -10146,7 +7729,7 @@ pub fn vfmaq_lane_f16<const LANE: i32>(
     c: float16x4_t,
 ) -> float16x8_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { vfmaq_f16(a, b, vdupq_n_f16(simd_extract!(c, LANE as u32))) }
+    vfmaq_f16(a, b, vdupq_n_f16(vget_lane_f16::<LANE>(c)))
 }
 #[doc = "Floating-point fused multiply-add to accumulator"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmaq_laneq_f16)"]
@@ -10154,7 +7737,7 @@ pub fn vfmaq_lane_f16<const LANE: i32>(
 #[cfg_attr(test, assert_instr(fmla, LANE = 0))]
 #[rustc_legacy_const_generics(3)]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vfmaq_laneq_f16<const LANE: i32>(
     a: float16x8_t,
@@ -10162,7 +7745,7 @@ pub fn vfmaq_laneq_f16<const LANE: i32>(
     c: float16x8_t,
 ) -> float16x8_t {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe { vfmaq_f16(a, b, vdupq_n_f16(simd_extract!(c, LANE as u32))) }
+    vfmaq_f16(a, b, vdupq_n_f16(vgetq_lane_f16::<LANE>(c)))
 }
 #[doc = "Floating-point fused multiply-add to accumulator"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfma_lane_f32)"]
@@ -10177,7 +7760,7 @@ pub fn vfma_lane_f32<const LANE: i32>(
     c: float32x2_t,
 ) -> float32x2_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe { vfma_f32(a, b, vdup_n_f32(simd_extract!(c, LANE as u32))) }
+    vfma_f32(a, b, vdup_n_f32(vget_lane_f32::<LANE>(c)))
 }
 #[doc = "Floating-point fused multiply-add to accumulator"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfma_laneq_f32)"]
@@ -10192,7 +7775,7 @@ pub fn vfma_laneq_f32<const LANE: i32>(
     c: float32x4_t,
 ) -> float32x2_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { vfma_f32(a, b, vdup_n_f32(simd_extract!(c, LANE as u32))) }
+    vfma_f32(a, b, vdup_n_f32(vgetq_lane_f32::<LANE>(c)))
 }
 #[doc = "Floating-point fused multiply-add to accumulator"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmaq_lane_f32)"]
@@ -10207,7 +7790,7 @@ pub fn vfmaq_lane_f32<const LANE: i32>(
     c: float32x2_t,
 ) -> float32x4_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe { vfmaq_f32(a, b, vdupq_n_f32(simd_extract!(c, LANE as u32))) }
+    vfmaq_f32(a, b, vdupq_n_f32(vget_lane_f32::<LANE>(c)))
 }
 #[doc = "Floating-point fused multiply-add to accumulator"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmaq_laneq_f32)"]
@@ -10222,7 +7805,7 @@ pub fn vfmaq_laneq_f32<const LANE: i32>(
     c: float32x4_t,
 ) -> float32x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { vfmaq_f32(a, b, vdupq_n_f32(simd_extract!(c, LANE as u32))) }
+    vfmaq_f32(a, b, vdupq_n_f32(vgetq_lane_f32::<LANE>(c)))
 }
 #[doc = "Floating-point fused multiply-add to accumulator"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmaq_laneq_f64)"]
@@ -10237,7 +7820,7 @@ pub fn vfmaq_laneq_f64<const LANE: i32>(
     c: float64x2_t,
 ) -> float64x2_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe { vfmaq_f64(a, b, vdupq_n_f64(simd_extract!(c, LANE as u32))) }
+    vfmaq_f64(a, b, vdupq_n_f64(vgetq_lane_f64::<LANE>(c)))
 }
 #[doc = "Floating-point fused multiply-add to accumulator"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfma_lane_f64)"]
@@ -10252,13 +7835,13 @@ pub fn vfma_lane_f64<const LANE: i32>(
     c: float64x1_t,
 ) -> float64x1_t {
     static_assert!(LANE == 0);
-    unsafe { vfma_f64(a, b, vdup_n_f64(simd_extract!(c, LANE as u32))) }
+    vfma_f64(a, b, vdup_n_f64(vget_lane_f64::<LANE>(c)))
 }
 #[doc = "Floating-point fused multiply-add to accumulator"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfma_laneq_f64)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmadd, LANE = 0))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(fmadd, LANE = 0))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vfma_laneq_f64<const LANE: i32>(
@@ -10267,7 +7850,7 @@ pub fn vfma_laneq_f64<const LANE: i32>(
     c: float64x2_t,
 ) -> float64x1_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe { vfma_f64(a, b, vdup_n_f64(simd_extract!(c, LANE as u32))) }
+    vfma_f64(a, b, vdup_n_f64(vgetq_lane_f64::<LANE>(c)))
 }
 #[doc = "Floating-point fused Multiply-Subtract from accumulator."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfma_n_f16)"]
@@ -10307,10 +7890,8 @@ pub fn vfma_n_f64(a: float64x1_t, b: float64x1_t, c: f64) -> float64x1_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vfmad_lane_f64<const LANE: i32>(a: f64, b: f64, c: float64x1_t) -> f64 {
     static_assert!(LANE == 0);
-    unsafe {
-        let c: f64 = simd_extract!(c, LANE as u32);
-        fmaf64(b, c, a)
-    }
+    let c: f64 = vget_lane_f64::<LANE>(c);
+    fmaf64(b, c, a)
 }
 #[doc = "Floating-point fused multiply-add to accumulator"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmah_f16)"]
@@ -10325,32 +7906,28 @@ pub fn vfmah_f16(a: f16, b: f16, c: f16) -> f16 {
 #[doc = "Floating-point fused multiply-add to accumulator"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmah_lane_f16)"]
 #[inline]
-#[cfg_attr(test, assert_instr(fmadd, LANE = 0))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(fmadd, LANE = 0))]
 #[rustc_legacy_const_generics(3)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vfmah_lane_f16<const LANE: i32>(a: f16, b: f16, v: float16x4_t) -> f16 {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        let c: f16 = simd_extract!(v, LANE as u32);
-        vfmah_f16(a, b, c)
-    }
+    let c: f16 = vget_lane_f16::<LANE>(v);
+    vfmah_f16(a, b, c)
 }
 #[doc = "Floating-point fused multiply-add to accumulator"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmah_laneq_f16)"]
 #[inline]
-#[cfg_attr(test, assert_instr(fmadd, LANE = 0))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(fmadd, LANE = 0))]
 #[rustc_legacy_const_generics(3)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vfmah_laneq_f16<const LANE: i32>(a: f16, b: f16, v: float16x8_t) -> f16 {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe {
-        let c: f16 = simd_extract!(v, LANE as u32);
-        vfmah_f16(a, b, c)
-    }
+    let c: f16 = vgetq_lane_f16::<LANE>(v);
+    vfmah_f16(a, b, c)
 }
 #[doc = "Floating-point fused Multiply-Add to accumulator(vector)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmaq_f64)"]
@@ -10374,7 +7951,7 @@ pub fn vfmaq_lane_f64<const LANE: i32>(
     c: float64x1_t,
 ) -> float64x2_t {
     static_assert!(LANE == 0);
-    unsafe { vfmaq_f64(a, b, vdupq_n_f64(simd_extract!(c, LANE as u32))) }
+    vfmaq_f64(a, b, vdupq_n_f64(vget_lane_f64::<LANE>(c)))
 }
 #[doc = "Floating-point fused Multiply-Add to accumulator(vector)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmaq_n_f64)"]
@@ -10389,50 +7966,44 @@ pub fn vfmaq_n_f64(a: float64x2_t, b: float64x2_t, c: f64) -> float64x2_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmas_lane_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmadd, LANE = 0))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(fmadd, LANE = 0))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vfmas_lane_f32<const LANE: i32>(a: f32, b: f32, c: float32x2_t) -> f32 {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        let c: f32 = simd_extract!(c, LANE as u32);
-        fmaf32(b, c, a)
-    }
+    let c: f32 = vget_lane_f32::<LANE>(c);
+    fmaf32(b, c, a)
 }
 #[doc = "Floating-point fused multiply-add to accumulator"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmas_laneq_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmadd, LANE = 0))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(fmadd, LANE = 0))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vfmas_laneq_f32<const LANE: i32>(a: f32, b: f32, c: float32x4_t) -> f32 {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        let c: f32 = simd_extract!(c, LANE as u32);
-        fmaf32(b, c, a)
-    }
+    let c: f32 = vgetq_lane_f32::<LANE>(c);
+    fmaf32(b, c, a)
 }
 #[doc = "Floating-point fused multiply-add to accumulator"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmad_laneq_f64)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmadd, LANE = 0))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(fmadd, LANE = 0))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vfmad_laneq_f64<const LANE: i32>(a: f64, b: f64, c: float64x2_t) -> f64 {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        let c: f64 = simd_extract!(c, LANE as u32);
-        fmaf64(b, c, a)
-    }
+    let c: f64 = vgetq_lane_f64::<LANE>(c);
+    fmaf64(b, c, a)
 }
 #[doc = "Floating-point fused Multiply-Add Long to accumulator (vector)."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlal_high_f16)"]
 #[inline]
 #[target_feature(enable = "neon,fp16")]
 #[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fmlal2))]
 pub fn vfmlal_high_f16(r: float32x2_t, a: float16x4_t, b: float16x4_t) -> float32x2_t {
@@ -10450,7 +8021,7 @@ pub fn vfmlal_high_f16(r: float32x2_t, a: float16x4_t, b: float16x4_t) -> float3
 #[inline]
 #[target_feature(enable = "neon,fp16")]
 #[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fmlal2))]
 pub fn vfmlalq_high_f16(r: float32x4_t, a: float16x8_t, b: float16x8_t) -> float32x4_t {
@@ -10470,7 +8041,7 @@ pub fn vfmlalq_high_f16(r: float32x4_t, a: float16x8_t, b: float16x8_t) -> float
 #[target_feature(enable = "neon,fp16")]
 #[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vfmlal_lane_high_f16<const LANE: i32>(
     r: float32x2_t,
@@ -10478,7 +8049,7 @@ pub fn vfmlal_lane_high_f16<const LANE: i32>(
     b: float16x4_t,
 ) -> float32x2_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { vfmlal_high_f16(r, a, vdup_n_f16(simd_extract!(b, LANE as u32))) }
+    vfmlal_high_f16(r, a, vdup_lane_f16::<LANE>(b))
 }
 #[doc = "Floating-point fused Multiply-Add Long to accumulator (by element)."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlal_laneq_high_f16)"]
@@ -10487,7 +8058,7 @@ pub fn vfmlal_lane_high_f16<const LANE: i32>(
 #[target_feature(enable = "neon,fp16")]
 #[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vfmlal_laneq_high_f16<const LANE: i32>(
     r: float32x2_t,
@@ -10495,7 +8066,7 @@ pub fn vfmlal_laneq_high_f16<const LANE: i32>(
     b: float16x8_t,
 ) -> float32x2_t {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe { vfmlal_high_f16(r, a, vdup_n_f16(simd_extract!(b, LANE as u32))) }
+    vfmlal_high_f16(r, a, vdup_laneq_f16::<LANE>(b))
 }
 #[doc = "Floating-point fused Multiply-Add Long to accumulator (by element)."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlalq_lane_high_f16)"]
@@ -10504,7 +8075,7 @@ pub fn vfmlal_laneq_high_f16<const LANE: i32>(
 #[target_feature(enable = "neon,fp16")]
 #[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vfmlalq_lane_high_f16<const LANE: i32>(
     r: float32x4_t,
@@ -10512,7 +8083,7 @@ pub fn vfmlalq_lane_high_f16<const LANE: i32>(
     b: float16x4_t,
 ) -> float32x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { vfmlalq_high_f16(r, a, vdupq_n_f16(simd_extract!(b, LANE as u32))) }
+    vfmlalq_high_f16(r, a, vdupq_lane_f16::<LANE>(b))
 }
 #[doc = "Floating-point fused Multiply-Add Long to accumulator (by element)."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlalq_laneq_high_f16)"]
@@ -10521,7 +8092,7 @@ pub fn vfmlalq_lane_high_f16<const LANE: i32>(
 #[target_feature(enable = "neon,fp16")]
 #[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vfmlalq_laneq_high_f16<const LANE: i32>(
     r: float32x4_t,
@@ -10529,7 +8100,7 @@ pub fn vfmlalq_laneq_high_f16<const LANE: i32>(
     b: float16x8_t,
 ) -> float32x4_t {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe { vfmlalq_high_f16(r, a, vdupq_n_f16(simd_extract!(b, LANE as u32))) }
+    vfmlalq_high_f16(r, a, vdupq_laneq_f16::<LANE>(b))
 }
 #[doc = "Floating-point fused Multiply-Add Long to accumulator (by element)."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlal_lane_low_f16)"]
@@ -10538,7 +8109,7 @@ pub fn vfmlalq_laneq_high_f16<const LANE: i32>(
 #[target_feature(enable = "neon,fp16")]
 #[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vfmlal_lane_low_f16<const LANE: i32>(
     r: float32x2_t,
@@ -10546,7 +8117,7 @@ pub fn vfmlal_lane_low_f16<const LANE: i32>(
     b: float16x4_t,
 ) -> float32x2_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { vfmlal_low_f16(r, a, vdup_n_f16(simd_extract!(b, LANE as u32))) }
+    vfmlal_low_f16(r, a, vdup_lane_f16::<LANE>(b))
 }
 #[doc = "Floating-point fused Multiply-Add Long to accumulator (by element)."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlal_laneq_low_f16)"]
@@ -10555,7 +8126,7 @@ pub fn vfmlal_lane_low_f16<const LANE: i32>(
 #[target_feature(enable = "neon,fp16")]
 #[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vfmlal_laneq_low_f16<const LANE: i32>(
     r: float32x2_t,
@@ -10563,7 +8134,7 @@ pub fn vfmlal_laneq_low_f16<const LANE: i32>(
     b: float16x8_t,
 ) -> float32x2_t {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe { vfmlal_low_f16(r, a, vdup_n_f16(simd_extract!(b, LANE as u32))) }
+    vfmlal_low_f16(r, a, vdup_laneq_f16::<LANE>(b))
 }
 #[doc = "Floating-point fused Multiply-Add Long to accumulator (by element)."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlalq_lane_low_f16)"]
@@ -10572,7 +8143,7 @@ pub fn vfmlal_laneq_low_f16<const LANE: i32>(
 #[target_feature(enable = "neon,fp16")]
 #[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vfmlalq_lane_low_f16<const LANE: i32>(
     r: float32x4_t,
@@ -10580,7 +8151,7 @@ pub fn vfmlalq_lane_low_f16<const LANE: i32>(
     b: float16x4_t,
 ) -> float32x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { vfmlalq_low_f16(r, a, vdupq_n_f16(simd_extract!(b, LANE as u32))) }
+    vfmlalq_low_f16(r, a, vdupq_lane_f16::<LANE>(b))
 }
 #[doc = "Floating-point fused Multiply-Add Long to accumulator (by element)."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlalq_laneq_low_f16)"]
@@ -10589,7 +8160,7 @@ pub fn vfmlalq_lane_low_f16<const LANE: i32>(
 #[target_feature(enable = "neon,fp16")]
 #[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vfmlalq_laneq_low_f16<const LANE: i32>(
     r: float32x4_t,
@@ -10597,14 +8168,14 @@ pub fn vfmlalq_laneq_low_f16<const LANE: i32>(
     b: float16x8_t,
 ) -> float32x4_t {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe { vfmlalq_low_f16(r, a, vdupq_n_f16(simd_extract!(b, LANE as u32))) }
+    vfmlalq_low_f16(r, a, vdupq_laneq_f16::<LANE>(b))
 }
 #[doc = "Floating-point fused Multiply-Add Long to accumulator (vector)."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlal_low_f16)"]
 #[inline]
 #[target_feature(enable = "neon,fp16")]
 #[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fmlal))]
 pub fn vfmlal_low_f16(r: float32x2_t, a: float16x4_t, b: float16x4_t) -> float32x2_t {
@@ -10622,7 +8193,7 @@ pub fn vfmlal_low_f16(r: float32x2_t, a: float16x4_t, b: float16x4_t) -> float32
 #[inline]
 #[target_feature(enable = "neon,fp16")]
 #[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fmlal))]
 pub fn vfmlalq_low_f16(r: float32x4_t, a: float16x8_t, b: float16x8_t) -> float32x4_t {
@@ -10640,7 +8211,7 @@ pub fn vfmlalq_low_f16(r: float32x4_t, a: float16x8_t, b: float16x8_t) -> float3
 #[inline]
 #[target_feature(enable = "neon,fp16")]
 #[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fmlsl2))]
 pub fn vfmlsl_high_f16(r: float32x2_t, a: float16x4_t, b: float16x4_t) -> float32x2_t {
@@ -10658,7 +8229,7 @@ pub fn vfmlsl_high_f16(r: float32x2_t, a: float16x4_t, b: float16x4_t) -> float3
 #[inline]
 #[target_feature(enable = "neon,fp16")]
 #[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fmlsl2))]
 pub fn vfmlslq_high_f16(r: float32x4_t, a: float16x8_t, b: float16x8_t) -> float32x4_t {
@@ -10678,7 +8249,7 @@ pub fn vfmlslq_high_f16(r: float32x4_t, a: float16x8_t, b: float16x8_t) -> float
 #[target_feature(enable = "neon,fp16")]
 #[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vfmlsl_lane_high_f16<const LANE: i32>(
     r: float32x2_t,
@@ -10686,7 +8257,7 @@ pub fn vfmlsl_lane_high_f16<const LANE: i32>(
     b: float16x4_t,
 ) -> float32x2_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { vfmlsl_high_f16(r, a, vdup_n_f16(simd_extract!(b, LANE as u32))) }
+    vfmlsl_high_f16(r, a, vdup_lane_f16::<LANE>(b))
 }
 #[doc = "Floating-point fused Multiply-Subtract Long from accumulator (by element)."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlsl_laneq_high_f16)"]
@@ -10695,7 +8266,7 @@ pub fn vfmlsl_lane_high_f16<const LANE: i32>(
 #[target_feature(enable = "neon,fp16")]
 #[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vfmlsl_laneq_high_f16<const LANE: i32>(
     r: float32x2_t,
@@ -10703,7 +8274,7 @@ pub fn vfmlsl_laneq_high_f16<const LANE: i32>(
     b: float16x8_t,
 ) -> float32x2_t {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe { vfmlsl_high_f16(r, a, vdup_n_f16(simd_extract!(b, LANE as u32))) }
+    vfmlsl_high_f16(r, a, vdup_laneq_f16::<LANE>(b))
 }
 #[doc = "Floating-point fused Multiply-Subtract Long from accumulator (by element)."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlslq_lane_high_f16)"]
@@ -10712,7 +8283,7 @@ pub fn vfmlsl_laneq_high_f16<const LANE: i32>(
 #[target_feature(enable = "neon,fp16")]
 #[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vfmlslq_lane_high_f16<const LANE: i32>(
     r: float32x4_t,
@@ -10720,7 +8291,7 @@ pub fn vfmlslq_lane_high_f16<const LANE: i32>(
     b: float16x4_t,
 ) -> float32x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { vfmlslq_high_f16(r, a, vdupq_n_f16(simd_extract!(b, LANE as u32))) }
+    vfmlslq_high_f16(r, a, vdupq_lane_f16::<LANE>(b))
 }
 #[doc = "Floating-point fused Multiply-Subtract Long from accumulator (by element)."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlslq_laneq_high_f16)"]
@@ -10729,7 +8300,7 @@ pub fn vfmlslq_lane_high_f16<const LANE: i32>(
 #[target_feature(enable = "neon,fp16")]
 #[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vfmlslq_laneq_high_f16<const LANE: i32>(
     r: float32x4_t,
@@ -10737,7 +8308,7 @@ pub fn vfmlslq_laneq_high_f16<const LANE: i32>(
     b: float16x8_t,
 ) -> float32x4_t {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe { vfmlslq_high_f16(r, a, vdupq_n_f16(simd_extract!(b, LANE as u32))) }
+    vfmlslq_high_f16(r, a, vdupq_laneq_f16::<LANE>(b))
 }
 #[doc = "Floating-point fused Multiply-Subtract Long from accumulator (by element)."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlsl_lane_low_f16)"]
@@ -10746,7 +8317,7 @@ pub fn vfmlslq_laneq_high_f16<const LANE: i32>(
 #[target_feature(enable = "neon,fp16")]
 #[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vfmlsl_lane_low_f16<const LANE: i32>(
     r: float32x2_t,
@@ -10754,7 +8325,7 @@ pub fn vfmlsl_lane_low_f16<const LANE: i32>(
     b: float16x4_t,
 ) -> float32x2_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { vfmlsl_low_f16(r, a, vdup_n_f16(simd_extract!(b, LANE as u32))) }
+    vfmlsl_low_f16(r, a, vdup_lane_f16::<LANE>(b))
 }
 #[doc = "Floating-point fused Multiply-Subtract Long from accumulator (by element)."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlsl_laneq_low_f16)"]
@@ -10763,7 +8334,7 @@ pub fn vfmlsl_lane_low_f16<const LANE: i32>(
 #[target_feature(enable = "neon,fp16")]
 #[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vfmlsl_laneq_low_f16<const LANE: i32>(
     r: float32x2_t,
@@ -10771,7 +8342,7 @@ pub fn vfmlsl_laneq_low_f16<const LANE: i32>(
     b: float16x8_t,
 ) -> float32x2_t {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe { vfmlsl_low_f16(r, a, vdup_n_f16(simd_extract!(b, LANE as u32))) }
+    vfmlsl_low_f16(r, a, vdup_laneq_f16::<LANE>(b))
 }
 #[doc = "Floating-point fused Multiply-Subtract Long from accumulator (by element)."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlslq_lane_low_f16)"]
@@ -10780,7 +8351,7 @@ pub fn vfmlsl_laneq_low_f16<const LANE: i32>(
 #[target_feature(enable = "neon,fp16")]
 #[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vfmlslq_lane_low_f16<const LANE: i32>(
     r: float32x4_t,
@@ -10788,7 +8359,7 @@ pub fn vfmlslq_lane_low_f16<const LANE: i32>(
     b: float16x4_t,
 ) -> float32x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { vfmlslq_low_f16(r, a, vdupq_n_f16(simd_extract!(b, LANE as u32))) }
+    vfmlslq_low_f16(r, a, vdupq_lane_f16::<LANE>(b))
 }
 #[doc = "Floating-point fused Multiply-Subtract Long from accumulator (by element)."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlslq_laneq_low_f16)"]
@@ -10797,7 +8368,7 @@ pub fn vfmlslq_lane_low_f16<const LANE: i32>(
 #[target_feature(enable = "neon,fp16")]
 #[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vfmlslq_laneq_low_f16<const LANE: i32>(
     r: float32x4_t,
@@ -10805,14 +8376,14 @@ pub fn vfmlslq_laneq_low_f16<const LANE: i32>(
     b: float16x8_t,
 ) -> float32x4_t {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe { vfmlslq_low_f16(r, a, vdupq_n_f16(simd_extract!(b, LANE as u32))) }
+    vfmlslq_low_f16(r, a, vdupq_laneq_f16::<LANE>(b))
 }
 #[doc = "Floating-point fused Multiply-Subtract Long from accumulator (vector)."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmlsl_low_f16)"]
 #[inline]
 #[target_feature(enable = "neon,fp16")]
 #[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fmlsl))]
 pub fn vfmlsl_low_f16(r: float32x2_t, a: float16x4_t, b: float16x4_t) -> float32x2_t {
@@ -10830,7 +8401,7 @@ pub fn vfmlsl_low_f16(r: float32x2_t, a: float16x4_t, b: float16x4_t) -> float32
 #[inline]
 #[target_feature(enable = "neon,fp16")]
 #[cfg_attr(not(target_arch = "arm"), target_feature(enable = "fhm"))]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fmlsl))]
 pub fn vfmlslq_low_f16(r: float32x4_t, a: float16x8_t, b: float16x8_t) -> float32x4_t {
@@ -10861,7 +8432,7 @@ pub fn vfms_f64(a: float64x1_t, b: float64x1_t, c: float64x1_t) -> float64x1_t {
 #[cfg_attr(test, assert_instr(fmls, LANE = 0))]
 #[rustc_legacy_const_generics(3)]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vfms_lane_f16<const LANE: i32>(
     a: float16x4_t,
@@ -10869,7 +8440,7 @@ pub fn vfms_lane_f16<const LANE: i32>(
     c: float16x4_t,
 ) -> float16x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { vfms_f16(a, b, vdup_n_f16(simd_extract!(c, LANE as u32))) }
+    vfms_f16(a, b, vdup_n_f16(vget_lane_f16::<LANE>(c)))
 }
 #[doc = "Floating-point fused multiply-subtract from accumulator"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfms_laneq_f16)"]
@@ -10877,7 +8448,7 @@ pub fn vfms_lane_f16<const LANE: i32>(
 #[cfg_attr(test, assert_instr(fmls, LANE = 0))]
 #[rustc_legacy_const_generics(3)]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vfms_laneq_f16<const LANE: i32>(
     a: float16x4_t,
@@ -10885,7 +8456,7 @@ pub fn vfms_laneq_f16<const LANE: i32>(
     c: float16x8_t,
 ) -> float16x4_t {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe { vfms_f16(a, b, vdup_n_f16(simd_extract!(c, LANE as u32))) }
+    vfms_f16(a, b, vdup_n_f16(vgetq_lane_f16::<LANE>(c)))
 }
 #[doc = "Floating-point fused multiply-subtract from accumulator"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsq_lane_f16)"]
@@ -10893,7 +8464,7 @@ pub fn vfms_laneq_f16<const LANE: i32>(
 #[cfg_attr(test, assert_instr(fmls, LANE = 0))]
 #[rustc_legacy_const_generics(3)]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vfmsq_lane_f16<const LANE: i32>(
     a: float16x8_t,
@@ -10901,7 +8472,7 @@ pub fn vfmsq_lane_f16<const LANE: i32>(
     c: float16x4_t,
 ) -> float16x8_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { vfmsq_f16(a, b, vdupq_n_f16(simd_extract!(c, LANE as u32))) }
+    vfmsq_f16(a, b, vdupq_n_f16(vget_lane_f16::<LANE>(c)))
 }
 #[doc = "Floating-point fused multiply-subtract from accumulator"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsq_laneq_f16)"]
@@ -10909,7 +8480,7 @@ pub fn vfmsq_lane_f16<const LANE: i32>(
 #[cfg_attr(test, assert_instr(fmls, LANE = 0))]
 #[rustc_legacy_const_generics(3)]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vfmsq_laneq_f16<const LANE: i32>(
     a: float16x8_t,
@@ -10917,7 +8488,7 @@ pub fn vfmsq_laneq_f16<const LANE: i32>(
     c: float16x8_t,
 ) -> float16x8_t {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe { vfmsq_f16(a, b, vdupq_n_f16(simd_extract!(c, LANE as u32))) }
+    vfmsq_f16(a, b, vdupq_n_f16(vgetq_lane_f16::<LANE>(c)))
 }
 #[doc = "Floating-point fused multiply-subtract to accumulator"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfms_lane_f32)"]
@@ -10932,7 +8503,7 @@ pub fn vfms_lane_f32<const LANE: i32>(
     c: float32x2_t,
 ) -> float32x2_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe { vfms_f32(a, b, vdup_n_f32(simd_extract!(c, LANE as u32))) }
+    vfms_f32(a, b, vdup_n_f32(vget_lane_f32::<LANE>(c)))
 }
 #[doc = "Floating-point fused multiply-subtract to accumulator"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfms_laneq_f32)"]
@@ -10947,7 +8518,7 @@ pub fn vfms_laneq_f32<const LANE: i32>(
     c: float32x4_t,
 ) -> float32x2_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { vfms_f32(a, b, vdup_n_f32(simd_extract!(c, LANE as u32))) }
+    vfms_f32(a, b, vdup_n_f32(vgetq_lane_f32::<LANE>(c)))
 }
 #[doc = "Floating-point fused multiply-subtract to accumulator"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsq_lane_f32)"]
@@ -10962,7 +8533,7 @@ pub fn vfmsq_lane_f32<const LANE: i32>(
     c: float32x2_t,
 ) -> float32x4_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe { vfmsq_f32(a, b, vdupq_n_f32(simd_extract!(c, LANE as u32))) }
+    vfmsq_f32(a, b, vdupq_n_f32(vget_lane_f32::<LANE>(c)))
 }
 #[doc = "Floating-point fused multiply-subtract to accumulator"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsq_laneq_f32)"]
@@ -10977,7 +8548,7 @@ pub fn vfmsq_laneq_f32<const LANE: i32>(
     c: float32x4_t,
 ) -> float32x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { vfmsq_f32(a, b, vdupq_n_f32(simd_extract!(c, LANE as u32))) }
+    vfmsq_f32(a, b, vdupq_n_f32(vgetq_lane_f32::<LANE>(c)))
 }
 #[doc = "Floating-point fused multiply-subtract to accumulator"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsq_laneq_f64)"]
@@ -10992,7 +8563,7 @@ pub fn vfmsq_laneq_f64<const LANE: i32>(
     c: float64x2_t,
 ) -> float64x2_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe { vfmsq_f64(a, b, vdupq_n_f64(simd_extract!(c, LANE as u32))) }
+    vfmsq_f64(a, b, vdupq_n_f64(vgetq_lane_f64::<LANE>(c)))
 }
 #[doc = "Floating-point fused multiply-subtract to accumulator"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfms_lane_f64)"]
@@ -11007,13 +8578,13 @@ pub fn vfms_lane_f64<const LANE: i32>(
     c: float64x1_t,
 ) -> float64x1_t {
     static_assert!(LANE == 0);
-    unsafe { vfms_f64(a, b, vdup_n_f64(simd_extract!(c, LANE as u32))) }
+    vfms_f64(a, b, vdup_n_f64(vget_lane_f64::<LANE>(c)))
 }
 #[doc = "Floating-point fused multiply-subtract to accumulator"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfms_laneq_f64)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmsub, LANE = 0))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(fmsub, LANE = 0))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vfms_laneq_f64<const LANE: i32>(
@@ -11022,7 +8593,7 @@ pub fn vfms_laneq_f64<const LANE: i32>(
     c: float64x2_t,
 ) -> float64x1_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe { vfms_f64(a, b, vdup_n_f64(simd_extract!(c, LANE as u32))) }
+    vfms_f64(a, b, vdup_n_f64(vgetq_lane_f64::<LANE>(c)))
 }
 #[doc = "Floating-point fused Multiply-Subtract from accumulator."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfms_n_f16)"]
@@ -11066,32 +8637,28 @@ pub fn vfmsh_f16(a: f16, b: f16, c: f16) -> f16 {
 #[doc = "Floating-point fused multiply-subtract from accumulator"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsh_lane_f16)"]
 #[inline]
-#[cfg_attr(test, assert_instr(fmsub, LANE = 0))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(fmsub, LANE = 0))]
 #[rustc_legacy_const_generics(3)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vfmsh_lane_f16<const LANE: i32>(a: f16, b: f16, v: float16x4_t) -> f16 {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        let c: f16 = simd_extract!(v, LANE as u32);
-        vfmsh_f16(a, b, c)
-    }
+    let c: f16 = vget_lane_f16::<LANE>(v);
+    vfmsh_f16(a, b, c)
 }
 #[doc = "Floating-point fused multiply-subtract from accumulator"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsh_laneq_f16)"]
 #[inline]
-#[cfg_attr(test, assert_instr(fmsub, LANE = 0))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(fmsub, LANE = 0))]
 #[rustc_legacy_const_generics(3)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vfmsh_laneq_f16<const LANE: i32>(a: f16, b: f16, v: float16x8_t) -> f16 {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe {
-        let c: f16 = simd_extract!(v, LANE as u32);
-        vfmsh_f16(a, b, c)
-    }
+    let c: f16 = vgetq_lane_f16::<LANE>(v);
+    vfmsh_f16(a, b, c)
 }
 #[doc = "Floating-point fused multiply-subtract from accumulator"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsq_f64)"]
@@ -11118,7 +8685,7 @@ pub fn vfmsq_lane_f64<const LANE: i32>(
     c: float64x1_t,
 ) -> float64x2_t {
     static_assert!(LANE == 0);
-    unsafe { vfmsq_f64(a, b, vdupq_n_f64(simd_extract!(c, LANE as u32))) }
+    vfmsq_f64(a, b, vdupq_n_f64(vget_lane_f64::<LANE>(c)))
 }
 #[doc = "Floating-point fused Multiply-subtract to accumulator(vector)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsq_n_f64)"]
@@ -11133,7 +8700,7 @@ pub fn vfmsq_n_f64(a: float64x2_t, b: float64x2_t, c: f64) -> float64x2_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmss_lane_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmsub, LANE = 0))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(fmsub, LANE = 0))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vfmss_lane_f32<const LANE: i32>(a: f32, b: f32, c: float32x2_t) -> f32 {
@@ -11143,7 +8710,7 @@ pub fn vfmss_lane_f32<const LANE: i32>(a: f32, b: f32, c: float32x2_t) -> f32 {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmss_laneq_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmsub, LANE = 0))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(fmsub, LANE = 0))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vfmss_laneq_f32<const LANE: i32>(a: f32, b: f32, c: float32x4_t) -> f32 {
@@ -11153,7 +8720,7 @@ pub fn vfmss_laneq_f32<const LANE: i32>(a: f32, b: f32, c: float32x4_t) -> f32 {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsd_lane_f64)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmsub, LANE = 0))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(fmsub, LANE = 0))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vfmsd_lane_f64<const LANE: i32>(a: f64, b: f64, c: float64x1_t) -> f64 {
@@ -11163,16 +8730,45 @@ pub fn vfmsd_lane_f64<const LANE: i32>(a: f64, b: f64, c: float64x1_t) -> f64 {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vfmsd_laneq_f64)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fmsub, LANE = 0))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(fmsub, LANE = 0))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vfmsd_laneq_f64<const LANE: i32>(a: f64, b: f64, c: float64x2_t) -> f64 {
     vfmad_laneq_f64::<LANE>(a, -b, c)
 }
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_f64)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(fmov))]
+pub fn vget_high_f64(a: float64x2_t) -> float64x1_t {
+    unsafe { float64x1_t([simd_extract!(a, 1)]) }
+}
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_f64)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub fn vget_low_f64(a: float64x2_t) -> float64x1_t {
+    unsafe { float64x1_t([simd_extract!(a, 0)]) }
+}
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_f64)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(nop, IMM5 = 0))]
+pub fn vgetq_lane_f64<const IMM5: i32>(a: float64x2_t) -> f64 {
+    static_assert_uimm_bits!(IMM5, 1);
+    unsafe { simd_extract!(a, IMM5 as u32) }
+}
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,fp16")]
 #[cfg_attr(test, assert_instr(ldr))]
@@ -11184,7 +8780,7 @@ pub unsafe fn vld1_f16(ptr: *const f16) -> float16x4_t {
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,fp16")]
 #[cfg_attr(test, assert_instr(ldr))]
@@ -11196,7 +8792,7 @@ pub unsafe fn vld1q_f16(ptr: *const f16) -> float16x8_t {
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(ldr))]
@@ -11207,7 +8803,7 @@ pub unsafe fn vld1_f32(ptr: *const f32) -> float32x2_t {
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(ldr))]
@@ -11218,7 +8814,7 @@ pub unsafe fn vld1q_f32(ptr: *const f32) -> float32x4_t {
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(ldr))]
@@ -11229,7 +8825,7 @@ pub unsafe fn vld1_f64(ptr: *const f64) -> float64x1_t {
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(ldr))]
@@ -11240,7 +8836,7 @@ pub unsafe fn vld1q_f64(ptr: *const f64) -> float64x2_t {
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(ldr))]
@@ -11251,7 +8847,7 @@ pub unsafe fn vld1_s8(ptr: *const i8) -> int8x8_t {
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(ldr))]
@@ -11262,7 +8858,7 @@ pub unsafe fn vld1q_s8(ptr: *const i8) -> int8x16_t {
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(ldr))]
@@ -11273,7 +8869,7 @@ pub unsafe fn vld1_s16(ptr: *const i16) -> int16x4_t {
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(ldr))]
@@ -11284,7 +8880,7 @@ pub unsafe fn vld1q_s16(ptr: *const i16) -> int16x8_t {
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(ldr))]
@@ -11295,7 +8891,7 @@ pub unsafe fn vld1_s32(ptr: *const i32) -> int32x2_t {
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(ldr))]
@@ -11306,7 +8902,7 @@ pub unsafe fn vld1q_s32(ptr: *const i32) -> int32x4_t {
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(ldr))]
@@ -11317,7 +8913,7 @@ pub unsafe fn vld1_s64(ptr: *const i64) -> int64x1_t {
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(ldr))]
@@ -11328,7 +8924,7 @@ pub unsafe fn vld1q_s64(ptr: *const i64) -> int64x2_t {
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(ldr))]
@@ -11339,7 +8935,7 @@ pub unsafe fn vld1_u8(ptr: *const u8) -> uint8x8_t {
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(ldr))]
@@ -11350,7 +8946,7 @@ pub unsafe fn vld1q_u8(ptr: *const u8) -> uint8x16_t {
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(ldr))]
@@ -11361,7 +8957,7 @@ pub unsafe fn vld1_u16(ptr: *const u16) -> uint16x4_t {
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(ldr))]
@@ -11372,7 +8968,7 @@ pub unsafe fn vld1q_u16(ptr: *const u16) -> uint16x8_t {
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(ldr))]
@@ -11383,7 +8979,7 @@ pub unsafe fn vld1_u32(ptr: *const u32) -> uint32x2_t {
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(ldr))]
@@ -11394,7 +8990,7 @@ pub unsafe fn vld1q_u32(ptr: *const u32) -> uint32x4_t {
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(ldr))]
@@ -11405,7 +9001,7 @@ pub unsafe fn vld1_u64(ptr: *const u64) -> uint64x1_t {
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(ldr))]
@@ -11416,7 +9012,7 @@ pub unsafe fn vld1q_u64(ptr: *const u64) -> uint64x2_t {
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(ldr))]
@@ -11427,7 +9023,7 @@ pub unsafe fn vld1_p8(ptr: *const p8) -> poly8x8_t {
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(ldr))]
@@ -11438,7 +9034,7 @@ pub unsafe fn vld1q_p8(ptr: *const p8) -> poly8x16_t {
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(ldr))]
@@ -11449,7 +9045,7 @@ pub unsafe fn vld1_p16(ptr: *const p16) -> poly16x4_t {
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(ldr))]
@@ -11460,7 +9056,7 @@ pub unsafe fn vld1q_p16(ptr: *const p16) -> poly16x8_t {
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(test, assert_instr(ldr))]
@@ -11471,7 +9067,7 @@ pub unsafe fn vld1_p64(ptr: *const p64) -> poly64x1_t {
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(test, assert_instr(ldr))]
@@ -11482,115 +9078,73 @@ pub unsafe fn vld1q_p64(ptr: *const p64) -> poly64x2_t {
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f64_x2)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld1))]
-pub unsafe fn vld1_f64_x2(a: *const f64) -> float64x1x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld1x2.v1f64.p0"
-        )]
-        fn _vld1_f64_x2(a: *const f64) -> float64x1x2_t;
-    }
-    _vld1_f64_x2(a)
+#[cfg_attr(test, assert_instr(ld))]
+pub unsafe fn vld1_f64_x2(ptr: *const f64) -> float64x1x2_t {
+    crate::ptr::read_unaligned(ptr.cast())
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f64_x3)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld1))]
-pub unsafe fn vld1_f64_x3(a: *const f64) -> float64x1x3_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld1x3.v1f64.p0"
-        )]
-        fn _vld1_f64_x3(a: *const f64) -> float64x1x3_t;
-    }
-    _vld1_f64_x3(a)
+#[cfg_attr(test, assert_instr(ld))]
+pub unsafe fn vld1_f64_x3(ptr: *const f64) -> float64x1x3_t {
+    crate::ptr::read_unaligned(ptr.cast())
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f64_x4)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld1))]
-pub unsafe fn vld1_f64_x4(a: *const f64) -> float64x1x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld1x4.v1f64.p0"
-        )]
-        fn _vld1_f64_x4(a: *const f64) -> float64x1x4_t;
-    }
-    _vld1_f64_x4(a)
+#[cfg_attr(test, assert_instr(ld))]
+pub unsafe fn vld1_f64_x4(ptr: *const f64) -> float64x1x4_t {
+    crate::ptr::read_unaligned(ptr.cast())
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f64_x2)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld1))]
-pub unsafe fn vld1q_f64_x2(a: *const f64) -> float64x2x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld1x2.v2f64.p0"
-        )]
-        fn _vld1q_f64_x2(a: *const f64) -> float64x2x2_t;
-    }
-    _vld1q_f64_x2(a)
+#[cfg_attr(test, assert_instr(ld))]
+pub unsafe fn vld1q_f64_x2(ptr: *const f64) -> float64x2x2_t {
+    crate::ptr::read_unaligned(ptr.cast())
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f64_x3)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld1))]
-pub unsafe fn vld1q_f64_x3(a: *const f64) -> float64x2x3_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld1x3.v2f64.p0"
-        )]
-        fn _vld1q_f64_x3(a: *const f64) -> float64x2x3_t;
-    }
-    _vld1q_f64_x3(a)
+#[cfg_attr(test, assert_instr(ld))]
+pub unsafe fn vld1q_f64_x3(ptr: *const f64) -> float64x2x3_t {
+    crate::ptr::read_unaligned(ptr.cast())
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f64_x4)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld1))]
-pub unsafe fn vld1q_f64_x4(a: *const f64) -> float64x2x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld1x4.v2f64.p0"
-        )]
-        fn _vld1q_f64_x4(a: *const f64) -> float64x2x4_t;
-    }
-    _vld1q_f64_x4(a)
+#[cfg_attr(test, assert_instr(ld))]
+pub unsafe fn vld1q_f64_x4(ptr: *const f64) -> float64x2x4_t {
+    crate::ptr::read_unaligned(ptr.cast())
 }
 #[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_f64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -11608,7 +9162,7 @@ pub unsafe fn vld2_dup_f64(a: *const f64) -> float64x1x2_t {
 #[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_f64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -11626,7 +9180,7 @@ pub unsafe fn vld2q_dup_f64(a: *const f64) -> float64x2x2_t {
 #[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_s64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -11644,25 +9198,18 @@ pub unsafe fn vld2q_dup_s64(a: *const i64) -> int64x2x2_t {
 #[doc = "Load multiple 2-element structures to two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_f64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vld2_f64(a: *const f64) -> float64x1x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2.v1f64.p0"
-        )]
-        fn _vld2_f64(ptr: *const float64x1_t) -> float64x1x2_t;
-    }
-    _vld2_f64(a as _)
+    crate::ptr::read_unaligned(a.cast())
 }
 #[doc = "Load multiple 2-element structures to two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_f64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(ld2, LANE = 0))]
@@ -11682,7 +9229,7 @@ pub unsafe fn vld2_lane_f64<const LANE: i32>(a: *const f64, b: float64x1x2_t) ->
 #[doc = "Load multiple 2-element structures to two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_s64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(ld2, LANE = 0))]
@@ -11702,7 +9249,7 @@ pub unsafe fn vld2_lane_s64<const LANE: i32>(a: *const i64, b: int64x1x2_t) -> i
 #[doc = "Load multiple 2-element structures to two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_p64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(test, assert_instr(ld2, LANE = 0))]
@@ -11715,7 +9262,7 @@ pub unsafe fn vld2_lane_p64<const LANE: i32>(a: *const p64, b: poly64x1x2_t) ->
 #[doc = "Load multiple 2-element structures to two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_u64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(ld2, LANE = 0))]
@@ -11728,9 +9275,8 @@ pub unsafe fn vld2_lane_u64<const LANE: i32>(a: *const u64, b: uint64x1x2_t) ->
 #[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_p64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,aes")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(ld2r))]
@@ -11738,51 +9284,20 @@ pub unsafe fn vld2q_dup_p64(a: *const p64) -> poly64x2x2_t {
     transmute(vld2q_dup_s64(transmute(a)))
 }
 #[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_p64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld2r))]
-pub unsafe fn vld2q_dup_p64(a: *const p64) -> poly64x2x2_t {
-    let mut ret_val: poly64x2x2_t = transmute(vld2q_dup_s64(transmute(a)));
-    ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [1, 0]) };
-    ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [1, 0]) };
-    ret_val
-}
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_u64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(ld2r))]
 pub unsafe fn vld2q_dup_u64(a: *const u64) -> uint64x2x2_t {
     transmute(vld2q_dup_s64(transmute(a)))
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_u64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld2r))]
-pub unsafe fn vld2q_dup_u64(a: *const u64) -> uint64x2x2_t {
-    let mut ret_val: uint64x2x2_t = transmute(vld2q_dup_s64(transmute(a)));
-    ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [1, 0]) };
-    ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [1, 0]) };
-    ret_val
-}
 #[doc = "Load multiple 2-element structures to two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_f64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -11800,7 +9315,7 @@ pub unsafe fn vld2q_f64(a: *const f64) -> float64x2x2_t {
 #[doc = "Load multiple 2-element structures to two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_s64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -11818,7 +9333,7 @@ pub unsafe fn vld2q_s64(a: *const i64) -> int64x2x2_t {
 #[doc = "Load multiple 2-element structures to two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_f64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(ld2, LANE = 0))]
@@ -11839,7 +9354,7 @@ pub unsafe fn vld2q_lane_f64<const LANE: i32>(a: *const f64, b: float64x2x2_t) -
 #[doc = "Load multiple 2-element structures to two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_s8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(ld2, LANE = 0))]
@@ -11859,7 +9374,7 @@ pub unsafe fn vld2q_lane_s8<const LANE: i32>(a: *const i8, b: int8x16x2_t) -> in
 #[doc = "Load multiple 2-element structures to two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_s64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(ld2, LANE = 0))]
@@ -11879,7 +9394,7 @@ pub unsafe fn vld2q_lane_s64<const LANE: i32>(a: *const i64, b: int64x2x2_t) ->
 #[doc = "Load multiple 2-element structures to two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_p64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(test, assert_instr(ld2, LANE = 0))]
@@ -11892,7 +9407,7 @@ pub unsafe fn vld2q_lane_p64<const LANE: i32>(a: *const p64, b: poly64x2x2_t) ->
 #[doc = "Load multiple 2-element structures to two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_u8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(ld2, LANE = 0))]
@@ -11905,7 +9420,7 @@ pub unsafe fn vld2q_lane_u8<const LANE: i32>(a: *const u8, b: uint8x16x2_t) -> u
 #[doc = "Load multiple 2-element structures to two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_u64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(ld2, LANE = 0))]
@@ -11918,7 +9433,7 @@ pub unsafe fn vld2q_lane_u64<const LANE: i32>(a: *const u64, b: uint64x2x2_t) ->
 #[doc = "Load multiple 2-element structures to two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_p8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(ld2, LANE = 0))]
@@ -11931,9 +9446,8 @@ pub unsafe fn vld2q_lane_p8<const LANE: i32>(a: *const p8, b: poly8x16x2_t) -> p
 #[doc = "Load multiple 2-element structures to two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_p64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,aes")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(ld2))]
@@ -11941,51 +9455,20 @@ pub unsafe fn vld2q_p64(a: *const p64) -> poly64x2x2_t {
     transmute(vld2q_s64(transmute(a)))
 }
 #[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_p64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld2))]
-pub unsafe fn vld2q_p64(a: *const p64) -> poly64x2x2_t {
-    let mut ret_val: poly64x2x2_t = transmute(vld2q_s64(transmute(a)));
-    ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [1, 0]) };
-    ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [1, 0]) };
-    ret_val
-}
-#[doc = "Load multiple 2-element structures to two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_u64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(ld2))]
 pub unsafe fn vld2q_u64(a: *const u64) -> uint64x2x2_t {
     transmute(vld2q_s64(transmute(a)))
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_u64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld2))]
-pub unsafe fn vld2q_u64(a: *const u64) -> uint64x2x2_t {
-    let mut ret_val: uint64x2x2_t = transmute(vld2q_s64(transmute(a)));
-    ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [1, 0]) };
-    ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [1, 0]) };
-    ret_val
-}
 #[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_f64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -12003,7 +9486,7 @@ pub unsafe fn vld3_dup_f64(a: *const f64) -> float64x1x3_t {
 #[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_f64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -12021,7 +9504,7 @@ pub unsafe fn vld3q_dup_f64(a: *const f64) -> float64x2x3_t {
 #[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_s64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -12039,25 +9522,18 @@ pub unsafe fn vld3q_dup_s64(a: *const i64) -> int64x2x3_t {
 #[doc = "Load multiple 3-element structures to three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_f64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vld3_f64(a: *const f64) -> float64x1x3_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld3.v1f64.p0"
-        )]
-        fn _vld3_f64(ptr: *const float64x1_t) -> float64x1x3_t;
-    }
-    _vld3_f64(a as _)
+    crate::ptr::read_unaligned(a.cast())
 }
 #[doc = "Load multiple 3-element structures to three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_f64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(ld3, LANE = 0))]
@@ -12083,7 +9559,7 @@ pub unsafe fn vld3_lane_f64<const LANE: i32>(a: *const f64, b: float64x1x3_t) ->
 #[doc = "Load multiple 3-element structures to three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_p64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(test, assert_instr(ld3, LANE = 0))]
@@ -12096,7 +9572,7 @@ pub unsafe fn vld3_lane_p64<const LANE: i32>(a: *const p64, b: poly64x1x3_t) ->
 #[doc = "Load multiple 3-element structures to two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_s64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(ld3, LANE = 0))]
@@ -12122,7 +9598,7 @@ pub unsafe fn vld3_lane_s64<const LANE: i32>(a: *const i64, b: int64x1x3_t) -> i
 #[doc = "Load multiple 3-element structures to three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_u64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(ld3, LANE = 0))]
@@ -12135,9 +9611,8 @@ pub unsafe fn vld3_lane_u64<const LANE: i32>(a: *const u64, b: uint64x1x3_t) ->
 #[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_p64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,aes")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(ld3r))]
@@ -12145,89 +9620,42 @@ pub unsafe fn vld3q_dup_p64(a: *const p64) -> poly64x2x3_t {
     transmute(vld3q_dup_s64(transmute(a)))
 }
 #[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_p64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld3r))]
-pub unsafe fn vld3q_dup_p64(a: *const p64) -> poly64x2x3_t {
-    let mut ret_val: poly64x2x3_t = transmute(vld3q_dup_s64(transmute(a)));
-    ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [1, 0]) };
-    ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [1, 0]) };
-    ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [1, 0]) };
-    ret_val
-}
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_u64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(ld3r))]
 pub unsafe fn vld3q_dup_u64(a: *const u64) -> uint64x2x3_t {
     transmute(vld3q_dup_s64(transmute(a)))
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_u64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld3r))]
-pub unsafe fn vld3q_dup_u64(a: *const u64) -> uint64x2x3_t {
-    let mut ret_val: uint64x2x3_t = transmute(vld3q_dup_s64(transmute(a)));
-    ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [1, 0]) };
-    ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [1, 0]) };
-    ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [1, 0]) };
-    ret_val
-}
 #[doc = "Load multiple 3-element structures to three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_f64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(ld3))]
 pub unsafe fn vld3q_f64(a: *const f64) -> float64x2x3_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld3.v2f64.p0"
-        )]
-        fn _vld3q_f64(ptr: *const float64x2_t) -> float64x2x3_t;
-    }
-    _vld3q_f64(a as _)
+    crate::core_arch::macros::deinterleaving_load!(f64, 2, 3, a)
 }
 #[doc = "Load multiple 3-element structures to three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_s64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(ld3))]
 pub unsafe fn vld3q_s64(a: *const i64) -> int64x2x3_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld3.v2i64.p0"
-        )]
-        fn _vld3q_s64(ptr: *const int64x2_t) -> int64x2x3_t;
-    }
-    _vld3q_s64(a as _)
+    crate::core_arch::macros::deinterleaving_load!(i64, 2, 3, a)
 }
 #[doc = "Load multiple 3-element structures to three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_f64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(ld3, LANE = 0))]
@@ -12253,7 +9681,7 @@ pub unsafe fn vld3q_lane_f64<const LANE: i32>(a: *const f64, b: float64x2x3_t) -
 #[doc = "Load multiple 3-element structures to three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_p64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(test, assert_instr(ld3, LANE = 0))]
@@ -12266,14 +9694,14 @@ pub unsafe fn vld3q_lane_p64<const LANE: i32>(a: *const p64, b: poly64x2x3_t) ->
 #[doc = "Load multiple 3-element structures to two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_s8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(ld3, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vld3q_lane_s8<const LANE: i32>(a: *const i8, b: int8x16x3_t) -> int8x16x3_t {
-    static_assert_uimm_bits!(LANE, 3);
+    static_assert_uimm_bits!(LANE, 4);
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -12292,7 +9720,7 @@ pub unsafe fn vld3q_lane_s8<const LANE: i32>(a: *const i8, b: int8x16x3_t) -> in
 #[doc = "Load multiple 3-element structures to two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_s64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(ld3, LANE = 0))]
@@ -12318,7 +9746,7 @@ pub unsafe fn vld3q_lane_s64<const LANE: i32>(a: *const i64, b: int64x2x3_t) ->
 #[doc = "Load multiple 3-element structures to three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_u8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(ld3, LANE = 0))]
@@ -12331,7 +9759,7 @@ pub unsafe fn vld3q_lane_u8<const LANE: i32>(a: *const u8, b: uint8x16x3_t) -> u
 #[doc = "Load multiple 3-element structures to three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_u64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(ld3, LANE = 0))]
@@ -12344,7 +9772,7 @@ pub unsafe fn vld3q_lane_u64<const LANE: i32>(a: *const u64, b: uint64x2x3_t) ->
 #[doc = "Load multiple 3-element structures to three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_p8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(ld3, LANE = 0))]
@@ -12357,9 +9785,8 @@ pub unsafe fn vld3q_lane_p8<const LANE: i32>(a: *const p8, b: poly8x16x3_t) -> p
 #[doc = "Load multiple 3-element structures to three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_p64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,aes")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(ld3))]
@@ -12367,53 +9794,20 @@ pub unsafe fn vld3q_p64(a: *const p64) -> poly64x2x3_t {
     transmute(vld3q_s64(transmute(a)))
 }
 #[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_p64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld3))]
-pub unsafe fn vld3q_p64(a: *const p64) -> poly64x2x3_t {
-    let mut ret_val: poly64x2x3_t = transmute(vld3q_s64(transmute(a)));
-    ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [1, 0]) };
-    ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [1, 0]) };
-    ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [1, 0]) };
-    ret_val
-}
-#[doc = "Load multiple 3-element structures to three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_u64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(ld3))]
 pub unsafe fn vld3q_u64(a: *const u64) -> uint64x2x3_t {
     transmute(vld3q_s64(transmute(a)))
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_u64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld3))]
-pub unsafe fn vld3q_u64(a: *const u64) -> uint64x2x3_t {
-    let mut ret_val: uint64x2x3_t = transmute(vld3q_s64(transmute(a)));
-    ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [1, 0]) };
-    ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [1, 0]) };
-    ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [1, 0]) };
-    ret_val
-}
 #[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_f64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(ld4r))]
@@ -12431,7 +9825,7 @@ pub unsafe fn vld4_dup_f64(a: *const f64) -> float64x1x4_t {
 #[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_f64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(ld4r))]
@@ -12449,7 +9843,7 @@ pub unsafe fn vld4q_dup_f64(a: *const f64) -> float64x2x4_t {
 #[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_s64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(ld4r))]
@@ -12467,25 +9861,18 @@ pub unsafe fn vld4q_dup_s64(a: *const i64) -> int64x2x4_t {
 #[doc = "Load multiple 4-element structures to four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_f64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vld4_f64(a: *const f64) -> float64x1x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld4.v1f64.p0"
-        )]
-        fn _vld4_f64(ptr: *const float64x1_t) -> float64x1x4_t;
-    }
-    _vld4_f64(a as _)
+    crate::ptr::read_unaligned(a.cast())
 }
 #[doc = "Load multiple 4-element structures to four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_f64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(ld4, LANE = 0))]
@@ -12512,7 +9899,7 @@ pub unsafe fn vld4_lane_f64<const LANE: i32>(a: *const f64, b: float64x1x4_t) ->
 #[doc = "Load multiple 4-element structures to four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_s64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(ld4, LANE = 0))]
@@ -12539,7 +9926,7 @@ pub unsafe fn vld4_lane_s64<const LANE: i32>(a: *const i64, b: int64x1x4_t) -> i
 #[doc = "Load multiple 4-element structures to four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_p64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(test, assert_instr(ld4, LANE = 0))]
@@ -12552,7 +9939,7 @@ pub unsafe fn vld4_lane_p64<const LANE: i32>(a: *const p64, b: poly64x1x4_t) ->
 #[doc = "Load multiple 4-element structures to four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_u64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(ld4, LANE = 0))]
@@ -12565,9 +9952,8 @@ pub unsafe fn vld4_lane_u64<const LANE: i32>(a: *const u64, b: uint64x1x4_t) ->
 #[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_p64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(test, assert_instr(ld4r))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -12575,91 +9961,42 @@ pub unsafe fn vld4q_dup_p64(a: *const p64) -> poly64x2x4_t {
     transmute(vld4q_dup_s64(transmute(a)))
 }
 #[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_p64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(test, assert_instr(ld4r))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4q_dup_p64(a: *const p64) -> poly64x2x4_t {
-    let mut ret_val: poly64x2x4_t = transmute(vld4q_dup_s64(transmute(a)));
-    ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [1, 0]) };
-    ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [1, 0]) };
-    ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [1, 0]) };
-    ret_val.3 = unsafe { simd_shuffle!(ret_val.3, ret_val.3, [1, 0]) };
-    ret_val
-}
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_u64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(ld4r))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vld4q_dup_u64(a: *const u64) -> uint64x2x4_t {
     transmute(vld4q_dup_s64(transmute(a)))
 }
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_u64)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_f64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ld4r))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4q_dup_u64(a: *const u64) -> uint64x2x4_t {
-    let mut ret_val: uint64x2x4_t = transmute(vld4q_dup_s64(transmute(a)));
-    ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [1, 0]) };
-    ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [1, 0]) };
-    ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [1, 0]) };
-    ret_val.3 = unsafe { simd_shuffle!(ret_val.3, ret_val.3, [1, 0]) };
-    ret_val
+#[cfg_attr(test, assert_instr(ld4))]
+pub unsafe fn vld4q_f64(a: *const f64) -> float64x2x4_t {
+    crate::core_arch::macros::deinterleaving_load!(f64, 2, 4, a)
 }
 #[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_f64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_s64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld4))]
-pub unsafe fn vld4q_f64(a: *const f64) -> float64x2x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld4.v2f64.p0"
-        )]
-        fn _vld4q_f64(ptr: *const float64x2_t) -> float64x2x4_t;
-    }
-    _vld4q_f64(a as _)
-}
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_s64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(ld4))]
 pub unsafe fn vld4q_s64(a: *const i64) -> int64x2x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld4.v2i64.p0"
-        )]
-        fn _vld4q_s64(ptr: *const int64x2_t) -> int64x2x4_t;
-    }
-    _vld4q_s64(a as _)
+    crate::core_arch::macros::deinterleaving_load!(i64, 2, 4, a)
 }
 #[doc = "Load multiple 4-element structures to four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_f64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(ld4, LANE = 0))]
@@ -12686,14 +10023,14 @@ pub unsafe fn vld4q_lane_f64<const LANE: i32>(a: *const f64, b: float64x2x4_t) -
 #[doc = "Load multiple 4-element structures to four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_s8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(ld4, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub unsafe fn vld4q_lane_s8<const LANE: i32>(a: *const i8, b: int8x16x4_t) -> int8x16x4_t {
-    static_assert_uimm_bits!(LANE, 3);
+    static_assert_uimm_bits!(LANE, 4);
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
@@ -12713,7 +10050,7 @@ pub unsafe fn vld4q_lane_s8<const LANE: i32>(a: *const i8, b: int8x16x4_t) -> in
 #[doc = "Load multiple 4-element structures to four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_s64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(ld4, LANE = 0))]
@@ -12740,7 +10077,7 @@ pub unsafe fn vld4q_lane_s64<const LANE: i32>(a: *const i64, b: int64x2x4_t) ->
 #[doc = "Load multiple 4-element structures to four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_p64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(test, assert_instr(ld4, LANE = 0))]
@@ -12753,7 +10090,7 @@ pub unsafe fn vld4q_lane_p64<const LANE: i32>(a: *const p64, b: poly64x2x4_t) ->
 #[doc = "Load multiple 4-element structures to four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_u8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(ld4, LANE = 0))]
@@ -12766,7 +10103,7 @@ pub unsafe fn vld4q_lane_u8<const LANE: i32>(a: *const u8, b: uint8x16x4_t) -> u
 #[doc = "Load multiple 4-element structures to four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_u64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(ld4, LANE = 0))]
@@ -12779,7 +10116,7 @@ pub unsafe fn vld4q_lane_u64<const LANE: i32>(a: *const u64, b: uint64x2x4_t) ->
 #[doc = "Load multiple 4-element structures to four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_p8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(ld4, LANE = 0))]
@@ -12792,9 +10129,8 @@ pub unsafe fn vld4q_lane_p8<const LANE: i32>(a: *const p8, b: poly8x16x4_t) -> p
 #[doc = "Load multiple 4-element structures to four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_p64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(test, assert_instr(ld4))]
@@ -12802,55 +10138,258 @@ pub unsafe fn vld4q_p64(a: *const p64) -> poly64x2x4_t {
     transmute(vld4q_s64(transmute(a)))
 }
 #[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_p64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(test, assert_instr(ld4))]
-pub unsafe fn vld4q_p64(a: *const p64) -> poly64x2x4_t {
-    let mut ret_val: poly64x2x4_t = transmute(vld4q_s64(transmute(a)));
-    ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [1, 0]) };
-    ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [1, 0]) };
-    ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [1, 0]) };
-    ret_val.3 = unsafe { simd_shuffle!(ret_val.3, ret_val.3, [1, 0]) };
-    ret_val
-}
-#[doc = "Load multiple 4-element structures to four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_u64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(ld4))]
 pub unsafe fn vld4q_u64(a: *const u64) -> uint64x2x4_t {
     transmute(vld4q_s64(transmute(a)))
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_u64)"]
+#[doc = "Load-acquire RCpc one single-element structure to one lane of one register"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vldap1_lane_s64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld4))]
-pub unsafe fn vld4q_u64(a: *const u64) -> uint64x2x4_t {
-    let mut ret_val: uint64x2x4_t = transmute(vld4q_s64(transmute(a)));
-    ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [1, 0]) };
-    ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [1, 0]) };
-    ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [1, 0]) };
-    ret_val.3 = unsafe { simd_shuffle!(ret_val.3, ret_val.3, [1, 0]) };
-    ret_val
+#[target_feature(enable = "neon,rcpc3")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(ldap1, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_neon_feat_lrcpc3", issue = "none")]
+#[cfg(target_has_atomic = "64")]
+pub unsafe fn vldap1_lane_s64<const LANE: i32>(ptr: *const i64, src: int64x1_t) -> int64x1_t {
+    static_assert!(LANE == 0);
+    let atomic_src = crate::sync::atomic::AtomicI64::from_ptr(ptr as *mut i64);
+    simd_insert!(
+        src,
+        LANE as u32,
+        atomic_src.load(crate::sync::atomic::Ordering::Acquire)
+    )
+}
+#[doc = "Load-acquire RCpc one single-element structure to one lane of one register"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vldap1q_lane_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,rcpc3")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(ldap1, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_neon_feat_lrcpc3", issue = "none")]
+#[cfg(target_has_atomic = "64")]
+pub unsafe fn vldap1q_lane_s64<const LANE: i32>(ptr: *const i64, src: int64x2_t) -> int64x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    let atomic_src = crate::sync::atomic::AtomicI64::from_ptr(ptr as *mut i64);
+    simd_insert!(
+        src,
+        LANE as u32,
+        atomic_src.load(crate::sync::atomic::Ordering::Acquire)
+    )
+}
+#[doc = "Load-acquire RCpc one single-element structure to one lane of one register"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vldap1q_lane_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[rustc_legacy_const_generics(2)]
+#[target_feature(enable = "neon,rcpc3")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(ldap1, LANE = 0))]
+#[unstable(feature = "stdarch_neon_feat_lrcpc3", issue = "none")]
+#[cfg(target_has_atomic = "64")]
+pub unsafe fn vldap1q_lane_f64<const LANE: i32>(ptr: *const f64, src: float64x2_t) -> float64x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    transmute(vldap1q_lane_s64::<LANE>(ptr as *mut i64, transmute(src)))
+}
+#[doc = "Load-acquire RCpc one single-element structure to one lane of one register"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vldap1_lane_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[rustc_legacy_const_generics(2)]
+#[target_feature(enable = "neon,rcpc3")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(ldap1, LANE = 0))]
+#[unstable(feature = "stdarch_neon_feat_lrcpc3", issue = "none")]
+#[cfg(target_has_atomic = "64")]
+pub unsafe fn vldap1_lane_u64<const LANE: i32>(ptr: *const u64, src: uint64x1_t) -> uint64x1_t {
+    static_assert!(LANE == 0);
+    transmute(vldap1_lane_s64::<LANE>(ptr as *mut i64, transmute(src)))
+}
+#[doc = "Load-acquire RCpc one single-element structure to one lane of one register"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vldap1q_lane_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[rustc_legacy_const_generics(2)]
+#[target_feature(enable = "neon,rcpc3")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(ldap1, LANE = 0))]
+#[unstable(feature = "stdarch_neon_feat_lrcpc3", issue = "none")]
+#[cfg(target_has_atomic = "64")]
+pub unsafe fn vldap1q_lane_u64<const LANE: i32>(ptr: *const u64, src: uint64x2_t) -> uint64x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    transmute(vldap1q_lane_s64::<LANE>(ptr as *mut i64, transmute(src)))
+}
+#[doc = "Load-acquire RCpc one single-element structure to one lane of one register"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vldap1_lane_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[rustc_legacy_const_generics(2)]
+#[target_feature(enable = "neon,rcpc3")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(ldap1, LANE = 0))]
+#[unstable(feature = "stdarch_neon_feat_lrcpc3", issue = "none")]
+#[cfg(target_has_atomic = "64")]
+pub unsafe fn vldap1_lane_p64<const LANE: i32>(ptr: *const p64, src: poly64x1_t) -> poly64x1_t {
+    static_assert!(LANE == 0);
+    transmute(vldap1_lane_s64::<LANE>(ptr as *mut i64, transmute(src)))
+}
+#[doc = "Load-acquire RCpc one single-element structure to one lane of one register"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vldap1q_lane_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[rustc_legacy_const_generics(2)]
+#[target_feature(enable = "neon,rcpc3")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(ldap1, LANE = 0))]
+#[unstable(feature = "stdarch_neon_feat_lrcpc3", issue = "none")]
+#[cfg(target_has_atomic = "64")]
+pub unsafe fn vldap1q_lane_p64<const LANE: i32>(ptr: *const p64, src: poly64x2_t) -> poly64x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    transmute(vldap1q_lane_s64::<LANE>(ptr as *mut i64, transmute(src)))
+}
+#[doc = "Lookup table read with 2-bit indices"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti2_lane_f16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,lut")]
+#[cfg_attr(test, assert_instr(nop, INDEX = 1))]
+#[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vluti2_lane_f16<const INDEX: i32>(a: float16x4_t, b: uint8x8_t) -> float16x8_t {
+    static_assert!(INDEX >= 0 && INDEX <= 3);
+    transmute(vluti2_lane_s16::<INDEX>(transmute(a), b))
+}
+#[doc = "Lookup table read with 2-bit indices"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti2q_lane_f16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,lut")]
+#[cfg_attr(test, assert_instr(nop, INDEX = 1))]
+#[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vluti2q_lane_f16<const INDEX: i32>(a: float16x8_t, b: uint8x8_t) -> float16x8_t {
+    static_assert!(INDEX >= 0 && INDEX <= 3);
+    transmute(vluti2q_lane_s16::<INDEX>(transmute(a), b))
+}
+#[doc = "Lookup table read with 2-bit indices"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti2_lane_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,lut")]
+#[cfg_attr(test, assert_instr(nop, INDEX = 1))]
+#[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vluti2_lane_u8<const INDEX: i32>(a: uint8x8_t, b: uint8x8_t) -> uint8x16_t {
+    static_assert!(INDEX >= 0 && INDEX <= 1);
+    transmute(vluti2_lane_s8::<INDEX>(transmute(a), b))
+}
+#[doc = "Lookup table read with 2-bit indices"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti2q_lane_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,lut")]
+#[cfg_attr(test, assert_instr(nop, INDEX = 1))]
+#[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vluti2q_lane_u8<const INDEX: i32>(a: uint8x16_t, b: uint8x8_t) -> uint8x16_t {
+    static_assert!(INDEX >= 0 && INDEX <= 1);
+    transmute(vluti2q_lane_s8::<INDEX>(transmute(a), b))
+}
+#[doc = "Lookup table read with 2-bit indices"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti2_lane_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,lut")]
+#[cfg_attr(test, assert_instr(nop, INDEX = 1))]
+#[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vluti2_lane_u16<const INDEX: i32>(a: uint16x4_t, b: uint8x8_t) -> uint16x8_t {
+    static_assert!(INDEX >= 0 && INDEX <= 3);
+    transmute(vluti2_lane_s16::<INDEX>(transmute(a), b))
+}
+#[doc = "Lookup table read with 2-bit indices"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti2q_lane_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,lut")]
+#[cfg_attr(test, assert_instr(nop, INDEX = 1))]
+#[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vluti2q_lane_u16<const INDEX: i32>(a: uint16x8_t, b: uint8x8_t) -> uint16x8_t {
+    static_assert!(INDEX >= 0 && INDEX <= 3);
+    transmute(vluti2q_lane_s16::<INDEX>(transmute(a), b))
+}
+#[doc = "Lookup table read with 2-bit indices"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti2_lane_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,lut")]
+#[cfg_attr(test, assert_instr(nop, INDEX = 1))]
+#[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vluti2_lane_p8<const INDEX: i32>(a: poly8x8_t, b: uint8x8_t) -> poly8x16_t {
+    static_assert!(INDEX >= 0 && INDEX <= 1);
+    transmute(vluti2_lane_s8::<INDEX>(transmute(a), b))
+}
+#[doc = "Lookup table read with 2-bit indices"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti2q_lane_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,lut")]
+#[cfg_attr(test, assert_instr(nop, INDEX = 1))]
+#[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vluti2q_lane_p8<const INDEX: i32>(a: poly8x16_t, b: uint8x8_t) -> poly8x16_t {
+    static_assert!(INDEX >= 0 && INDEX <= 1);
+    transmute(vluti2q_lane_s8::<INDEX>(transmute(a), b))
+}
+#[doc = "Lookup table read with 2-bit indices"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti2_lane_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,lut")]
+#[cfg_attr(test, assert_instr(nop, INDEX = 1))]
+#[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vluti2_lane_p16<const INDEX: i32>(a: poly16x4_t, b: uint8x8_t) -> poly16x8_t {
+    static_assert!(INDEX >= 0 && INDEX <= 3);
+    transmute(vluti2_lane_s16::<INDEX>(transmute(a), b))
+}
+#[doc = "Lookup table read with 2-bit indices"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti2q_lane_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,lut")]
+#[cfg_attr(test, assert_instr(nop, INDEX = 1))]
+#[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vluti2q_lane_p16<const INDEX: i32>(a: poly16x8_t, b: uint8x8_t) -> poly16x8_t {
+    static_assert!(INDEX >= 0 && INDEX <= 3);
+    transmute(vluti2q_lane_s16::<INDEX>(transmute(a), b))
 }
 #[doc = "Lookup table read with 2-bit indices"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti2_lane_s8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,lut")]
 #[cfg_attr(test, assert_instr(nop, LANE = 1))]
@@ -12870,7 +10409,7 @@ pub unsafe fn vluti2_lane_s8<const LANE: i32>(a: int8x8_t, b: uint8x8_t) -> int8
 #[doc = "Lookup table read with 2-bit indices"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti2q_lane_s8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,lut")]
 #[cfg_attr(test, assert_instr(nop, LANE = 1))]
@@ -12890,7 +10429,7 @@ pub unsafe fn vluti2q_lane_s8<const LANE: i32>(a: int8x16_t, b: uint8x8_t) -> in
 #[doc = "Lookup table read with 2-bit indices"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti2_lane_s16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,lut")]
 #[cfg_attr(test, assert_instr(nop, LANE = 1))]
@@ -12910,7 +10449,7 @@ pub unsafe fn vluti2_lane_s16<const LANE: i32>(a: int16x4_t, b: uint8x8_t) -> in
 #[doc = "Lookup table read with 2-bit indices"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti2q_lane_s16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,lut")]
 #[cfg_attr(test, assert_instr(nop, LANE = 1))]
@@ -12928,113 +10467,219 @@ pub unsafe fn vluti2q_lane_s16<const LANE: i32>(a: int16x8_t, b: uint8x8_t) -> i
     _vluti2q_lane_s16(a, b, LANE)
 }
 #[doc = "Lookup table read with 2-bit indices"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti2_lane_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti2_laneq_f16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,lut")]
-#[cfg_attr(test, assert_instr(nop, LANE = 1))]
+#[cfg_attr(test, assert_instr(nop, INDEX = 1))]
 #[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")]
 #[rustc_legacy_const_generics(2)]
-pub unsafe fn vluti2_lane_u8<const LANE: i32>(a: uint8x8_t, b: uint8x8_t) -> uint8x16_t {
-    static_assert!(LANE >= 0 && LANE <= 1);
-    transmute(vluti2_lane_s8::<LANE>(transmute(a), b))
+pub unsafe fn vluti2_laneq_f16<const INDEX: i32>(a: float16x4_t, b: uint8x16_t) -> float16x8_t {
+    static_assert!(INDEX >= 0 && INDEX <= 7);
+    transmute(vluti2_laneq_s16::<INDEX>(transmute(a), b))
 }
 #[doc = "Lookup table read with 2-bit indices"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti2q_lane_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti2q_laneq_f16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,lut")]
-#[cfg_attr(test, assert_instr(nop, LANE = 1))]
+#[cfg_attr(test, assert_instr(nop, INDEX = 1))]
 #[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")]
 #[rustc_legacy_const_generics(2)]
-pub unsafe fn vluti2q_lane_u8<const LANE: i32>(a: uint8x16_t, b: uint8x8_t) -> uint8x16_t {
-    static_assert!(LANE >= 0 && LANE <= 1);
-    transmute(vluti2q_lane_s8::<LANE>(transmute(a), b))
+pub unsafe fn vluti2q_laneq_f16<const INDEX: i32>(a: float16x8_t, b: uint8x16_t) -> float16x8_t {
+    static_assert!(INDEX >= 0 && INDEX <= 7);
+    transmute(vluti2q_laneq_s16::<INDEX>(transmute(a), b))
 }
 #[doc = "Lookup table read with 2-bit indices"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti2_lane_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti2_laneq_u8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,lut")]
-#[cfg_attr(test, assert_instr(nop, LANE = 1))]
+#[cfg_attr(test, assert_instr(nop, INDEX = 1))]
 #[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")]
 #[rustc_legacy_const_generics(2)]
-pub unsafe fn vluti2_lane_u16<const LANE: i32>(a: uint16x4_t, b: uint8x8_t) -> uint16x8_t {
-    static_assert!(LANE >= 0 && LANE <= 3);
-    transmute(vluti2_lane_s16::<LANE>(transmute(a), b))
+pub unsafe fn vluti2_laneq_u8<const INDEX: i32>(a: uint8x8_t, b: uint8x16_t) -> uint8x16_t {
+    static_assert!(INDEX >= 0 && INDEX <= 3);
+    transmute(vluti2_laneq_s8::<INDEX>(transmute(a), b))
 }
 #[doc = "Lookup table read with 2-bit indices"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti2q_lane_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti2q_laneq_u8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,lut")]
-#[cfg_attr(test, assert_instr(nop, LANE = 1))]
+#[cfg_attr(test, assert_instr(nop, INDEX = 1))]
 #[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")]
 #[rustc_legacy_const_generics(2)]
-pub unsafe fn vluti2q_lane_u16<const LANE: i32>(a: uint16x8_t, b: uint8x8_t) -> uint16x8_t {
-    static_assert!(LANE >= 0 && LANE <= 3);
-    transmute(vluti2q_lane_s16::<LANE>(transmute(a), b))
+pub unsafe fn vluti2q_laneq_u8<const INDEX: i32>(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    static_assert!(INDEX >= 0 && INDEX <= 3);
+    transmute(vluti2q_laneq_s8::<INDEX>(transmute(a), b))
 }
 #[doc = "Lookup table read with 2-bit indices"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti2_lane_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti2_laneq_u16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,lut")]
-#[cfg_attr(test, assert_instr(nop, LANE = 1))]
+#[cfg_attr(test, assert_instr(nop, INDEX = 1))]
 #[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")]
 #[rustc_legacy_const_generics(2)]
-pub unsafe fn vluti2_lane_p8<const LANE: i32>(a: poly8x8_t, b: uint8x8_t) -> poly8x16_t {
-    static_assert!(LANE >= 0 && LANE <= 1);
-    transmute(vluti2_lane_s8::<LANE>(transmute(a), b))
+pub unsafe fn vluti2_laneq_u16<const INDEX: i32>(a: uint16x4_t, b: uint8x16_t) -> uint16x8_t {
+    static_assert!(INDEX >= 0 && INDEX <= 7);
+    transmute(vluti2_laneq_s16::<INDEX>(transmute(a), b))
 }
 #[doc = "Lookup table read with 2-bit indices"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti2q_lane_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti2q_laneq_u16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,lut")]
-#[cfg_attr(test, assert_instr(nop, LANE = 1))]
+#[cfg_attr(test, assert_instr(nop, INDEX = 1))]
 #[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")]
 #[rustc_legacy_const_generics(2)]
-pub unsafe fn vluti2q_lane_p8<const LANE: i32>(a: poly8x16_t, b: uint8x8_t) -> poly8x16_t {
-    static_assert!(LANE >= 0 && LANE <= 1);
-    transmute(vluti2q_lane_s8::<LANE>(transmute(a), b))
+pub unsafe fn vluti2q_laneq_u16<const INDEX: i32>(a: uint16x8_t, b: uint8x16_t) -> uint16x8_t {
+    static_assert!(INDEX >= 0 && INDEX <= 7);
+    transmute(vluti2q_laneq_s16::<INDEX>(transmute(a), b))
 }
 #[doc = "Lookup table read with 2-bit indices"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti2_lane_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti2_laneq_p8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,lut")]
-#[cfg_attr(test, assert_instr(nop, LANE = 1))]
+#[cfg_attr(test, assert_instr(nop, INDEX = 1))]
 #[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")]
 #[rustc_legacy_const_generics(2)]
-pub unsafe fn vluti2_lane_p16<const LANE: i32>(a: poly16x4_t, b: uint8x8_t) -> poly16x8_t {
-    static_assert!(LANE >= 0 && LANE <= 3);
-    transmute(vluti2_lane_s16::<LANE>(transmute(a), b))
+pub unsafe fn vluti2_laneq_p8<const INDEX: i32>(a: poly8x8_t, b: uint8x16_t) -> poly8x16_t {
+    static_assert!(INDEX >= 0 && INDEX <= 3);
+    transmute(vluti2_laneq_s8::<INDEX>(transmute(a), b))
 }
 #[doc = "Lookup table read with 2-bit indices"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti2q_lane_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti2q_laneq_p8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,lut")]
-#[cfg_attr(test, assert_instr(nop, LANE = 1))]
+#[cfg_attr(test, assert_instr(nop, INDEX = 1))]
 #[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")]
 #[rustc_legacy_const_generics(2)]
-pub unsafe fn vluti2q_lane_p16<const LANE: i32>(a: poly16x8_t, b: uint8x8_t) -> poly16x8_t {
-    static_assert!(LANE >= 0 && LANE <= 3);
-    transmute(vluti2q_lane_s16::<LANE>(transmute(a), b))
+pub unsafe fn vluti2q_laneq_p8<const INDEX: i32>(a: poly8x16_t, b: uint8x16_t) -> poly8x16_t {
+    static_assert!(INDEX >= 0 && INDEX <= 3);
+    transmute(vluti2q_laneq_s8::<INDEX>(transmute(a), b))
+}
+#[doc = "Lookup table read with 2-bit indices"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti2_laneq_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,lut")]
+#[cfg_attr(test, assert_instr(nop, INDEX = 1))]
+#[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vluti2_laneq_p16<const INDEX: i32>(a: poly16x4_t, b: uint8x16_t) -> poly16x8_t {
+    static_assert!(INDEX >= 0 && INDEX <= 7);
+    transmute(vluti2_laneq_s16::<INDEX>(transmute(a), b))
+}
+#[doc = "Lookup table read with 2-bit indices"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti2q_laneq_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,lut")]
+#[cfg_attr(test, assert_instr(nop, INDEX = 1))]
+#[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vluti2q_laneq_p16<const INDEX: i32>(a: poly16x8_t, b: uint8x16_t) -> poly16x8_t {
+    static_assert!(INDEX >= 0 && INDEX <= 7);
+    transmute(vluti2q_laneq_s16::<INDEX>(transmute(a), b))
+}
+#[doc = "Lookup table read with 2-bit indices"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti2_laneq_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,lut")]
+#[cfg_attr(test, assert_instr(nop, INDEX = 1))]
+#[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vluti2_laneq_s8<const INDEX: i32>(a: int8x8_t, b: uint8x16_t) -> int8x16_t {
+    static_assert!(INDEX >= 0 && INDEX <= 3);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vluti2.laneq.v16i8.v8i8"
+        )]
+        fn _vluti2_laneq_s8(a: int8x8_t, b: uint8x16_t, n: i32) -> int8x16_t;
+    }
+    _vluti2_laneq_s8(a, b, INDEX)
+}
+#[doc = "Lookup table read with 2-bit indices"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti2q_laneq_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,lut")]
+#[cfg_attr(test, assert_instr(nop, INDEX = 1))]
+#[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vluti2q_laneq_s8<const INDEX: i32>(a: int8x16_t, b: uint8x16_t) -> int8x16_t {
+    static_assert!(INDEX >= 0 && INDEX <= 3);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vluti2.laneq.v16i8.v16i8"
+        )]
+        fn _vluti2q_laneq_s8(a: int8x16_t, b: uint8x16_t, n: i32) -> int8x16_t;
+    }
+    _vluti2q_laneq_s8(a, b, INDEX)
+}
+#[doc = "Lookup table read with 2-bit indices"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti2_laneq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,lut")]
+#[cfg_attr(test, assert_instr(nop, INDEX = 1))]
+#[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vluti2_laneq_s16<const INDEX: i32>(a: int16x4_t, b: uint8x16_t) -> int16x8_t {
+    static_assert!(INDEX >= 0 && INDEX <= 7);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vluti2.laneq.v8i16.v4i16"
+        )]
+        fn _vluti2_laneq_s16(a: int16x4_t, b: uint8x16_t, n: i32) -> int16x8_t;
+    }
+    _vluti2_laneq_s16(a, b, INDEX)
+}
+#[doc = "Lookup table read with 2-bit indices"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti2q_laneq_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,lut")]
+#[cfg_attr(test, assert_instr(nop, INDEX = 1))]
+#[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vluti2q_laneq_s16<const INDEX: i32>(a: int16x8_t, b: uint8x16_t) -> int16x8_t {
+    static_assert!(INDEX >= 0 && INDEX <= 7);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vluti2.laneq.v8i16.v8i16"
+        )]
+        fn _vluti2q_laneq_s16(a: int16x8_t, b: uint8x16_t, n: i32) -> int16x8_t;
+    }
+    _vluti2q_laneq_s16(a, b, INDEX)
 }
 #[doc = "Lookup table read with 4-bit indices"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti4q_lane_f16_x2)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,lut,fp16")]
 #[cfg_attr(test, assert_instr(nop, LANE = 0))]
@@ -13047,7 +10692,7 @@ pub unsafe fn vluti4q_lane_f16_x2<const LANE: i32>(a: float16x8x2_t, b: uint8x8_
 #[doc = "Lookup table read with 4-bit indices"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti4q_lane_u16_x2)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,lut")]
 #[cfg_attr(test, assert_instr(nop, LANE = 0))]
@@ -13060,7 +10705,7 @@ pub unsafe fn vluti4q_lane_u16_x2<const LANE: i32>(a: uint16x8x2_t, b: uint8x8_t
 #[doc = "Lookup table read with 4-bit indices"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti4q_lane_p16_x2)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,lut")]
 #[cfg_attr(test, assert_instr(nop, LANE = 0))]
@@ -13073,7 +10718,7 @@ pub unsafe fn vluti4q_lane_p16_x2<const LANE: i32>(a: poly16x8x2_t, b: uint8x8_t
 #[doc = "Lookup table read with 4-bit indices"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti4q_lane_s16_x2)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,lut")]
 #[cfg_attr(test, assert_instr(nop, LANE = 0))]
@@ -13093,7 +10738,7 @@ pub unsafe fn vluti4q_lane_s16_x2<const LANE: i32>(a: int16x8x2_t, b: uint8x8_t)
 #[doc = "Lookup table read with 4-bit indices"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti4q_lane_s8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,lut")]
 #[cfg_attr(test, assert_instr(nop, LANE = 0))]
@@ -13113,7 +10758,7 @@ pub unsafe fn vluti4q_lane_s8<const LANE: i32>(a: int8x16_t, b: uint8x8_t) -> in
 #[doc = "Lookup table read with 4-bit indices"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti4q_lane_u8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,lut")]
 #[cfg_attr(test, assert_instr(nop, LANE = 0))]
@@ -13126,7 +10771,7 @@ pub unsafe fn vluti4q_lane_u8<const LANE: i32>(a: uint8x16_t, b: uint8x8_t) -> u
 #[doc = "Lookup table read with 4-bit indices"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti4q_lane_p8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,lut")]
 #[cfg_attr(test, assert_instr(nop, LANE = 0))]
@@ -13139,7 +10784,7 @@ pub unsafe fn vluti4q_lane_p8<const LANE: i32>(a: poly8x16_t, b: uint8x8_t) -> p
 #[doc = "Lookup table read with 4-bit indices"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti4q_laneq_f16_x2)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,lut,fp16")]
 #[cfg_attr(test, assert_instr(nop, LANE = 3))]
@@ -13155,7 +10800,7 @@ pub unsafe fn vluti4q_laneq_f16_x2<const LANE: i32>(
 #[doc = "Lookup table read with 4-bit indices"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti4q_laneq_u16_x2)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,lut")]
 #[cfg_attr(test, assert_instr(nop, LANE = 3))]
@@ -13168,7 +10813,7 @@ pub unsafe fn vluti4q_laneq_u16_x2<const LANE: i32>(a: uint16x8x2_t, b: uint8x16
 #[doc = "Lookup table read with 4-bit indices"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti4q_laneq_p16_x2)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,lut")]
 #[cfg_attr(test, assert_instr(nop, LANE = 3))]
@@ -13181,7 +10826,7 @@ pub unsafe fn vluti4q_laneq_p16_x2<const LANE: i32>(a: poly16x8x2_t, b: uint8x16
 #[doc = "Lookup table read with 4-bit indices"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti4q_laneq_s16_x2)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,lut")]
 #[cfg_attr(test, assert_instr(nop, LANE = 3))]
@@ -13201,7 +10846,7 @@ pub unsafe fn vluti4q_laneq_s16_x2<const LANE: i32>(a: int16x8x2_t, b: uint8x16_
 #[doc = "Lookup table read with 4-bit indices"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti4q_laneq_s8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,lut")]
 #[cfg_attr(test, assert_instr(nop, LANE = 0))]
@@ -13221,7 +10866,7 @@ pub unsafe fn vluti4q_laneq_s8<const LANE: i32>(a: int8x16_t, b: uint8x16_t) ->
 #[doc = "Lookup table read with 4-bit indices"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti4q_laneq_u8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,lut")]
 #[cfg_attr(test, assert_instr(nop, LANE = 0))]
@@ -13234,7 +10879,7 @@ pub unsafe fn vluti4q_laneq_u8<const LANE: i32>(a: uint8x16_t, b: uint8x16_t) ->
 #[doc = "Lookup table read with 4-bit indices"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti4q_laneq_p8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,lut")]
 #[cfg_attr(test, assert_instr(nop, LANE = 0))]
@@ -13300,7 +10945,14 @@ pub fn vmaxh_f16(a: f16, b: f16) -> f16 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(fmaxnm))]
 pub fn vmaxnm_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
-    unsafe { simd_fmax(a, b) }
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmaxnm.v1f64"
+        )]
+        fn _vmaxnm_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t;
+    }
+    unsafe { _vmaxnm_f64(a, b) }
 }
 #[doc = "Floating-point Maximum Number (vector)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnmq_f64)"]
@@ -13309,7 +10961,14 @@ pub fn vmaxnm_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(fmaxnm))]
 pub fn vmaxnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
-    unsafe { simd_fmax(a, b) }
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmaxnm.v2f64"
+        )]
+        fn _vmaxnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t;
+    }
+    unsafe { _vmaxnmq_f64(a, b) }
 }
 #[doc = "Floating-point Maximum Number"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnmh_f16)"]
@@ -13319,7 +10978,14 @@ pub fn vmaxnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
 #[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fmaxnm))]
 pub fn vmaxnmh_f16(a: f16, b: f16) -> f16 {
-    f16::max(a, b)
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmaxnm.f16"
+        )]
+        fn _vmaxnmh_f16(a: f16, b: f16) -> f16;
+    }
+    unsafe { _vmaxnmh_f16(a, b) }
 }
 #[doc = "Floating-point maximum number across vector"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnmv_f16)"]
@@ -13329,7 +10995,14 @@ pub fn vmaxnmh_f16(a: f16, b: f16) -> f16 {
 #[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fmaxnmv))]
 pub fn vmaxnmv_f16(a: float16x4_t) -> f16 {
-    unsafe { simd_reduce_max(a) }
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmaxnmv.f16.v4f16"
+        )]
+        fn _vmaxnmv_f16(a: float16x4_t) -> f16;
+    }
+    unsafe { _vmaxnmv_f16(a) }
 }
 #[doc = "Floating-point maximum number across vector"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnmvq_f16)"]
@@ -13339,16 +11012,30 @@ pub fn vmaxnmv_f16(a: float16x4_t) -> f16 {
 #[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fmaxnmv))]
 pub fn vmaxnmvq_f16(a: float16x8_t) -> f16 {
-    unsafe { simd_reduce_max(a) }
-}
-#[doc = "Floating-point maximum number across vector"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnmv_f32)"]
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmaxnmv.f16.v8f16"
+        )]
+        fn _vmaxnmvq_f16(a: float16x8_t) -> f16;
+    }
+    unsafe { _vmaxnmvq_f16(a) }
+}
+#[doc = "Floating-point maximum number across vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnmv_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(fmaxnmp))]
 pub fn vmaxnmv_f32(a: float32x2_t) -> f32 {
-    unsafe { simd_reduce_max(a) }
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmaxnmv.f32.v2f32"
+        )]
+        fn _vmaxnmv_f32(a: float32x2_t) -> f32;
+    }
+    unsafe { _vmaxnmv_f32(a) }
 }
 #[doc = "Floating-point maximum number across vector"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnmvq_f64)"]
@@ -13357,7 +11044,14 @@ pub fn vmaxnmv_f32(a: float32x2_t) -> f32 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(fmaxnmp))]
 pub fn vmaxnmvq_f64(a: float64x2_t) -> f64 {
-    unsafe { simd_reduce_max(a) }
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmaxnmv.f64.v2f64"
+        )]
+        fn _vmaxnmvq_f64(a: float64x2_t) -> f64;
+    }
+    unsafe { _vmaxnmvq_f64(a) }
 }
 #[doc = "Floating-point maximum number across vector"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnmvq_f32)"]
@@ -13366,7 +11060,14 @@ pub fn vmaxnmvq_f64(a: float64x2_t) -> f64 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(fmaxnmv))]
 pub fn vmaxnmvq_f32(a: float32x4_t) -> f32 {
-    unsafe { simd_reduce_max(a) }
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmaxnmv.f32.v4f32"
+        )]
+        fn _vmaxnmvq_f32(a: float32x4_t) -> f32;
+    }
+    unsafe { _vmaxnmvq_f32(a) }
 }
 #[doc = "Floating-point maximum number across vector"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxv_f16)"]
@@ -13614,7 +11315,14 @@ pub fn vminh_f16(a: f16, b: f16) -> f16 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(fminnm))]
 pub fn vminnm_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
-    unsafe { simd_fmin(a, b) }
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fminnm.v1f64"
+        )]
+        fn _vminnm_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t;
+    }
+    unsafe { _vminnm_f64(a, b) }
 }
 #[doc = "Floating-point Minimum Number (vector)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminnmq_f64)"]
@@ -13623,7 +11331,14 @@ pub fn vminnm_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(fminnm))]
 pub fn vminnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
-    unsafe { simd_fmin(a, b) }
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fminnm.v2f64"
+        )]
+        fn _vminnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t;
+    }
+    unsafe { _vminnmq_f64(a, b) }
 }
 #[doc = "Floating-point Minimum Number"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminnmh_f16)"]
@@ -13633,7 +11348,14 @@ pub fn vminnmq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
 #[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fminnm))]
 pub fn vminnmh_f16(a: f16, b: f16) -> f16 {
-    f16::min(a, b)
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fminnm.f16"
+        )]
+        fn _vminnmh_f16(a: f16, b: f16) -> f16;
+    }
+    unsafe { _vminnmh_f16(a, b) }
 }
 #[doc = "Floating-point minimum number across vector"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminnmv_f16)"]
@@ -13643,7 +11365,14 @@ pub fn vminnmh_f16(a: f16, b: f16) -> f16 {
 #[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fminnmv))]
 pub fn vminnmv_f16(a: float16x4_t) -> f16 {
-    unsafe { simd_reduce_min(a) }
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fminnmv.f16.v4f16"
+        )]
+        fn _vminnmv_f16(a: float16x4_t) -> f16;
+    }
+    unsafe { _vminnmv_f16(a) }
 }
 #[doc = "Floating-point minimum number across vector"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminnmvq_f16)"]
@@ -13653,7 +11382,14 @@ pub fn vminnmv_f16(a: float16x4_t) -> f16 {
 #[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fminnmv))]
 pub fn vminnmvq_f16(a: float16x8_t) -> f16 {
-    unsafe { simd_reduce_min(a) }
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fminnmv.f16.v8f16"
+        )]
+        fn _vminnmvq_f16(a: float16x8_t) -> f16;
+    }
+    unsafe { _vminnmvq_f16(a) }
 }
 #[doc = "Floating-point minimum number across vector"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminnmv_f32)"]
@@ -13662,7 +11398,14 @@ pub fn vminnmvq_f16(a: float16x8_t) -> f16 {
 #[cfg_attr(test, assert_instr(fminnmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vminnmv_f32(a: float32x2_t) -> f32 {
-    unsafe { simd_reduce_min(a) }
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fminnmv.f32.v2f32"
+        )]
+        fn _vminnmv_f32(a: float32x2_t) -> f32;
+    }
+    unsafe { _vminnmv_f32(a) }
 }
 #[doc = "Floating-point minimum number across vector"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminnmvq_f64)"]
@@ -13671,7 +11414,14 @@ pub fn vminnmv_f32(a: float32x2_t) -> f32 {
 #[cfg_attr(test, assert_instr(fminnmp))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vminnmvq_f64(a: float64x2_t) -> f64 {
-    unsafe { simd_reduce_min(a) }
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fminnmv.f64.v2f64"
+        )]
+        fn _vminnmvq_f64(a: float64x2_t) -> f64;
+    }
+    unsafe { _vminnmvq_f64(a) }
 }
 #[doc = "Floating-point minimum number across vector"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminnmvq_f32)"]
@@ -13680,7 +11430,14 @@ pub fn vminnmvq_f64(a: float64x2_t) -> f64 {
 #[cfg_attr(test, assert_instr(fminnmv))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vminnmvq_f32(a: float32x4_t) -> f32 {
-    unsafe { simd_reduce_min(a) }
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fminnmv.f32.v4f32"
+        )]
+        fn _vminnmvq_f32(a: float32x4_t) -> f32;
+    }
+    unsafe { _vminnmvq_f32(a) }
 }
 #[doc = "Floating-point minimum number across vector"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminv_f16)"]
@@ -13894,37 +11651,18 @@ pub fn vmlaq_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_lane_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smlal2, LANE = 1))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(smlal2, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmlal_high_lane_s16<const LANE: i32>(a: int32x4_t, b: int16x8_t, c: int16x4_t) -> int32x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        vmlal_high_s16(
-            a,
-            b,
-            simd_shuffle!(
-                c,
-                c,
-                [
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32
-                ]
-            ),
-        )
-    }
+    vmlal_high_s16(a, b, vdupq_lane_s16::<LANE>(c))
 }
 #[doc = "Multiply-add long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_laneq_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smlal2, LANE = 1))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(smlal2, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmlal_high_laneq_s16<const LANE: i32>(
@@ -13933,49 +11671,24 @@ pub fn vmlal_high_laneq_s16<const LANE: i32>(
     c: int16x8_t,
 ) -> int32x4_t {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe {
-        vmlal_high_s16(
-            a,
-            b,
-            simd_shuffle!(
-                c,
-                c,
-                [
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32
-                ]
-            ),
-        )
-    }
+    vmlal_high_s16(a, b, vdupq_laneq_s16::<LANE>(c))
 }
 #[doc = "Multiply-add long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_lane_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smlal2, LANE = 1))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(smlal2, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmlal_high_lane_s32<const LANE: i32>(a: int64x2_t, b: int32x4_t, c: int32x2_t) -> int64x2_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        vmlal_high_s32(
-            a,
-            b,
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+    vmlal_high_s32(a, b, vdupq_lane_s32::<LANE>(c))
 }
 #[doc = "Multiply-add long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_laneq_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smlal2, LANE = 1))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(smlal2, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmlal_high_laneq_s32<const LANE: i32>(
@@ -13984,19 +11697,13 @@ pub fn vmlal_high_laneq_s32<const LANE: i32>(
     c: int32x4_t,
 ) -> int64x2_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        vmlal_high_s32(
-            a,
-            b,
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+    vmlal_high_s32(a, b, vdupq_laneq_s32::<LANE>(c))
 }
 #[doc = "Multiply-add long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_lane_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umlal2, LANE = 1))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(umlal2, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmlal_high_lane_u16<const LANE: i32>(
@@ -14005,32 +11712,13 @@ pub fn vmlal_high_lane_u16<const LANE: i32>(
     c: uint16x4_t,
 ) -> uint32x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        vmlal_high_u16(
-            a,
-            b,
-            simd_shuffle!(
-                c,
-                c,
-                [
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32
-                ]
-            ),
-        )
-    }
+    vmlal_high_u16(a, b, vdupq_lane_u16::<LANE>(c))
 }
 #[doc = "Multiply-add long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_laneq_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umlal2, LANE = 1))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(umlal2, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmlal_high_laneq_u16<const LANE: i32>(
@@ -14039,32 +11727,13 @@ pub fn vmlal_high_laneq_u16<const LANE: i32>(
     c: uint16x8_t,
 ) -> uint32x4_t {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe {
-        vmlal_high_u16(
-            a,
-            b,
-            simd_shuffle!(
-                c,
-                c,
-                [
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32
-                ]
-            ),
-        )
-    }
+    vmlal_high_u16(a, b, vdupq_laneq_u16::<LANE>(c))
 }
 #[doc = "Multiply-add long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_lane_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umlal2, LANE = 1))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(umlal2, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmlal_high_lane_u32<const LANE: i32>(
@@ -14073,19 +11742,13 @@ pub fn vmlal_high_lane_u32<const LANE: i32>(
     c: uint32x2_t,
 ) -> uint64x2_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        vmlal_high_u32(
-            a,
-            b,
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+    vmlal_high_u32(a, b, vdupq_lane_u32::<LANE>(c))
 }
 #[doc = "Multiply-add long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_laneq_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umlal2, LANE = 1))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(umlal2, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmlal_high_laneq_u32<const LANE: i32>(
@@ -14094,19 +11757,13 @@ pub fn vmlal_high_laneq_u32<const LANE: i32>(
     c: uint32x4_t,
 ) -> uint64x2_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        vmlal_high_u32(
-            a,
-            b,
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+    vmlal_high_u32(a, b, vdupq_laneq_u32::<LANE>(c))
 }
 #[doc = "Multiply-add long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_n_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smlal2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(smlal2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmlal_high_n_s16(a: int32x4_t, b: int16x8_t, c: i16) -> int32x4_t {
     vmlal_high_s16(a, b, vdupq_n_s16(c))
@@ -14115,7 +11772,7 @@ pub fn vmlal_high_n_s16(a: int32x4_t, b: int16x8_t, c: i16) -> int32x4_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_n_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smlal2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(smlal2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmlal_high_n_s32(a: int64x2_t, b: int32x4_t, c: i32) -> int64x2_t {
     vmlal_high_s32(a, b, vdupq_n_s32(c))
@@ -14124,7 +11781,7 @@ pub fn vmlal_high_n_s32(a: int64x2_t, b: int32x4_t, c: i32) -> int64x2_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_n_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umlal2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(umlal2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmlal_high_n_u16(a: uint32x4_t, b: uint16x8_t, c: u16) -> uint32x4_t {
     vmlal_high_u16(a, b, vdupq_n_u16(c))
@@ -14133,7 +11790,7 @@ pub fn vmlal_high_n_u16(a: uint32x4_t, b: uint16x8_t, c: u16) -> uint32x4_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_n_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umlal2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(umlal2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmlal_high_n_u32(a: uint64x2_t, b: uint32x4_t, c: u32) -> uint64x2_t {
     vmlal_high_u32(a, b, vdupq_n_u32(c))
@@ -14142,79 +11799,67 @@ pub fn vmlal_high_n_u32(a: uint64x2_t, b: uint32x4_t, c: u32) -> uint64x2_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smlal2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(smlal2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmlal_high_s8(a: int16x8_t, b: int8x16_t, c: int8x16_t) -> int16x8_t {
-    unsafe {
-        let b: int8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
-        let c: int8x8_t = simd_shuffle!(c, c, [8, 9, 10, 11, 12, 13, 14, 15]);
-        vmlal_s8(a, b, c)
-    }
+    let b = vget_high_s8(b);
+    let c = vget_high_s8(c);
+    vmlal_s8(a, b, c)
 }
 #[doc = "Signed multiply-add long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smlal2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(smlal2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmlal_high_s16(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t {
-    unsafe {
-        let b: int16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]);
-        let c: int16x4_t = simd_shuffle!(c, c, [4, 5, 6, 7]);
-        vmlal_s16(a, b, c)
-    }
+    let b = vget_high_s16(b);
+    let c = vget_high_s16(c);
+    vmlal_s16(a, b, c)
 }
 #[doc = "Signed multiply-add long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smlal2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(smlal2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmlal_high_s32(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t {
-    unsafe {
-        let b: int32x2_t = simd_shuffle!(b, b, [2, 3]);
-        let c: int32x2_t = simd_shuffle!(c, c, [2, 3]);
-        vmlal_s32(a, b, c)
-    }
+    let b = vget_high_s32(b);
+    let c = vget_high_s32(c);
+    vmlal_s32(a, b, c)
 }
 #[doc = "Unsigned multiply-add long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umlal2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(umlal2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmlal_high_u8(a: uint16x8_t, b: uint8x16_t, c: uint8x16_t) -> uint16x8_t {
-    unsafe {
-        let b: uint8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
-        let c: uint8x8_t = simd_shuffle!(c, c, [8, 9, 10, 11, 12, 13, 14, 15]);
-        vmlal_u8(a, b, c)
-    }
+    let b = vget_high_u8(b);
+    let c = vget_high_u8(c);
+    vmlal_u8(a, b, c)
 }
 #[doc = "Unsigned multiply-add long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umlal2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(umlal2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmlal_high_u16(a: uint32x4_t, b: uint16x8_t, c: uint16x8_t) -> uint32x4_t {
-    unsafe {
-        let b: uint16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]);
-        let c: uint16x4_t = simd_shuffle!(c, c, [4, 5, 6, 7]);
-        vmlal_u16(a, b, c)
-    }
+    let b = vget_high_u16(b);
+    let c = vget_high_u16(c);
+    vmlal_u16(a, b, c)
 }
 #[doc = "Unsigned multiply-add long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umlal2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(umlal2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmlal_high_u32(a: uint64x2_t, b: uint32x4_t, c: uint32x4_t) -> uint64x2_t {
-    unsafe {
-        let b: uint32x2_t = simd_shuffle!(b, b, [2, 3]);
-        let c: uint32x2_t = simd_shuffle!(c, c, [2, 3]);
-        vmlal_u32(a, b, c)
-    }
+    let b = vget_high_u32(b);
+    let c = vget_high_u32(c);
+    vmlal_u32(a, b, c)
 }
 #[doc = "Floating-point multiply-subtract from accumulator"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_f64)"]
@@ -14238,37 +11883,18 @@ pub fn vmlsq_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_lane_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smlsl2, LANE = 1))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(smlsl2, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmlsl_high_lane_s16<const LANE: i32>(a: int32x4_t, b: int16x8_t, c: int16x4_t) -> int32x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        vmlsl_high_s16(
-            a,
-            b,
-            simd_shuffle!(
-                c,
-                c,
-                [
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32
-                ]
-            ),
-        )
-    }
+    vmlsl_high_s16(a, b, vdupq_lane_s16::<LANE>(c))
 }
 #[doc = "Multiply-subtract long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_laneq_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smlsl2, LANE = 1))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(smlsl2, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmlsl_high_laneq_s16<const LANE: i32>(
@@ -14277,49 +11903,24 @@ pub fn vmlsl_high_laneq_s16<const LANE: i32>(
     c: int16x8_t,
 ) -> int32x4_t {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe {
-        vmlsl_high_s16(
-            a,
-            b,
-            simd_shuffle!(
-                c,
-                c,
-                [
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32
-                ]
-            ),
-        )
-    }
+    vmlsl_high_s16(a, b, vdupq_laneq_s16::<LANE>(c))
 }
 #[doc = "Multiply-subtract long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_lane_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smlsl2, LANE = 1))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(smlsl2, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmlsl_high_lane_s32<const LANE: i32>(a: int64x2_t, b: int32x4_t, c: int32x2_t) -> int64x2_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        vmlsl_high_s32(
-            a,
-            b,
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+    vmlsl_high_s32(a, b, vdupq_lane_s32::<LANE>(c))
 }
 #[doc = "Multiply-subtract long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_laneq_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smlsl2, LANE = 1))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(smlsl2, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmlsl_high_laneq_s32<const LANE: i32>(
@@ -14328,19 +11929,13 @@ pub fn vmlsl_high_laneq_s32<const LANE: i32>(
     c: int32x4_t,
 ) -> int64x2_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        vmlsl_high_s32(
-            a,
-            b,
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+    vmlsl_high_s32(a, b, vdupq_laneq_s32::<LANE>(c))
 }
 #[doc = "Multiply-subtract long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_lane_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umlsl2, LANE = 1))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(umlsl2, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmlsl_high_lane_u16<const LANE: i32>(
@@ -14349,32 +11944,13 @@ pub fn vmlsl_high_lane_u16<const LANE: i32>(
     c: uint16x4_t,
 ) -> uint32x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        vmlsl_high_u16(
-            a,
-            b,
-            simd_shuffle!(
-                c,
-                c,
-                [
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32
-                ]
-            ),
-        )
-    }
+    vmlsl_high_u16(a, b, vdupq_lane_u16::<LANE>(c))
 }
 #[doc = "Multiply-subtract long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_laneq_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umlsl2, LANE = 1))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(umlsl2, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmlsl_high_laneq_u16<const LANE: i32>(
@@ -14383,32 +11959,13 @@ pub fn vmlsl_high_laneq_u16<const LANE: i32>(
     c: uint16x8_t,
 ) -> uint32x4_t {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe {
-        vmlsl_high_u16(
-            a,
-            b,
-            simd_shuffle!(
-                c,
-                c,
-                [
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32
-                ]
-            ),
-        )
-    }
+    vmlsl_high_u16(a, b, vdupq_laneq_u16::<LANE>(c))
 }
 #[doc = "Multiply-subtract long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_lane_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umlsl2, LANE = 1))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(umlsl2, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmlsl_high_lane_u32<const LANE: i32>(
@@ -14417,19 +11974,13 @@ pub fn vmlsl_high_lane_u32<const LANE: i32>(
     c: uint32x2_t,
 ) -> uint64x2_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        vmlsl_high_u32(
-            a,
-            b,
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+    vmlsl_high_u32(a, b, vdupq_lane_u32::<LANE>(c))
 }
 #[doc = "Multiply-subtract long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_laneq_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umlsl2, LANE = 1))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(umlsl2, LANE = 1))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmlsl_high_laneq_u32<const LANE: i32>(
@@ -14438,19 +11989,13 @@ pub fn vmlsl_high_laneq_u32<const LANE: i32>(
     c: uint32x4_t,
 ) -> uint64x2_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        vmlsl_high_u32(
-            a,
-            b,
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+    vmlsl_high_u32(a, b, vdupq_laneq_u32::<LANE>(c))
 }
 #[doc = "Multiply-subtract long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_n_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smlsl2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(smlsl2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmlsl_high_n_s16(a: int32x4_t, b: int16x8_t, c: i16) -> int32x4_t {
     vmlsl_high_s16(a, b, vdupq_n_s16(c))
@@ -14459,7 +12004,7 @@ pub fn vmlsl_high_n_s16(a: int32x4_t, b: int16x8_t, c: i16) -> int32x4_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_n_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smlsl2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(smlsl2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmlsl_high_n_s32(a: int64x2_t, b: int32x4_t, c: i32) -> int64x2_t {
     vmlsl_high_s32(a, b, vdupq_n_s32(c))
@@ -14468,7 +12013,7 @@ pub fn vmlsl_high_n_s32(a: int64x2_t, b: int32x4_t, c: i32) -> int64x2_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_n_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umlsl2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(umlsl2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmlsl_high_n_u16(a: uint32x4_t, b: uint16x8_t, c: u16) -> uint32x4_t {
     vmlsl_high_u16(a, b, vdupq_n_u16(c))
@@ -14477,7 +12022,7 @@ pub fn vmlsl_high_n_u16(a: uint32x4_t, b: uint16x8_t, c: u16) -> uint32x4_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_n_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umlsl2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(umlsl2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmlsl_high_n_u32(a: uint64x2_t, b: uint32x4_t, c: u32) -> uint64x2_t {
     vmlsl_high_u32(a, b, vdupq_n_u32(c))
@@ -14486,223 +12031,181 @@ pub fn vmlsl_high_n_u32(a: uint64x2_t, b: uint32x4_t, c: u32) -> uint64x2_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smlsl2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(smlsl2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmlsl_high_s8(a: int16x8_t, b: int8x16_t, c: int8x16_t) -> int16x8_t {
-    unsafe {
-        let b: int8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
-        let c: int8x8_t = simd_shuffle!(c, c, [8, 9, 10, 11, 12, 13, 14, 15]);
-        vmlsl_s8(a, b, c)
-    }
+    let b = vget_high_s8(b);
+    let c = vget_high_s8(c);
+    vmlsl_s8(a, b, c)
 }
 #[doc = "Signed multiply-subtract long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smlsl2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(smlsl2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmlsl_high_s16(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t {
-    unsafe {
-        let b: int16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]);
-        let c: int16x4_t = simd_shuffle!(c, c, [4, 5, 6, 7]);
-        vmlsl_s16(a, b, c)
-    }
+    let b = vget_high_s16(b);
+    let c = vget_high_s16(c);
+    vmlsl_s16(a, b, c)
 }
 #[doc = "Signed multiply-subtract long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smlsl2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(smlsl2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmlsl_high_s32(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t {
-    unsafe {
-        let b: int32x2_t = simd_shuffle!(b, b, [2, 3]);
-        let c: int32x2_t = simd_shuffle!(c, c, [2, 3]);
-        vmlsl_s32(a, b, c)
-    }
+    let b = vget_high_s32(b);
+    let c = vget_high_s32(c);
+    vmlsl_s32(a, b, c)
 }
 #[doc = "Unsigned multiply-subtract long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umlsl2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(umlsl2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmlsl_high_u8(a: uint16x8_t, b: uint8x16_t, c: uint8x16_t) -> uint16x8_t {
-    unsafe {
-        let b: uint8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
-        let c: uint8x8_t = simd_shuffle!(c, c, [8, 9, 10, 11, 12, 13, 14, 15]);
-        vmlsl_u8(a, b, c)
-    }
+    let b = vget_high_u8(b);
+    let c = vget_high_u8(c);
+    vmlsl_u8(a, b, c)
 }
 #[doc = "Unsigned multiply-subtract long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umlsl2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(umlsl2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmlsl_high_u16(a: uint32x4_t, b: uint16x8_t, c: uint16x8_t) -> uint32x4_t {
-    unsafe {
-        let b: uint16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]);
-        let c: uint16x4_t = simd_shuffle!(c, c, [4, 5, 6, 7]);
-        vmlsl_u16(a, b, c)
-    }
+    let b = vget_high_u16(b);
+    let c = vget_high_u16(c);
+    vmlsl_u16(a, b, c)
 }
 #[doc = "Unsigned multiply-subtract long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umlsl2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(umlsl2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmlsl_high_u32(a: uint64x2_t, b: uint32x4_t, c: uint32x4_t) -> uint64x2_t {
-    unsafe {
-        let b: uint32x2_t = simd_shuffle!(b, b, [2, 3]);
-        let c: uint32x2_t = simd_shuffle!(c, c, [2, 3]);
-        vmlsl_u32(a, b, c)
-    }
+    let b = vget_high_u32(b);
+    let c = vget_high_u32(c);
+    vmlsl_u32(a, b, c)
 }
 #[doc = "Vector move"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovl_high_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(sxtl2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sxtl2))]
 pub fn vmovl_high_s8(a: int8x16_t) -> int16x8_t {
-    unsafe {
-        let a: int8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
-        vmovl_s8(a)
-    }
+    let a = vget_high_s8(a);
+    vmovl_s8(a)
 }
 #[doc = "Vector move"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovl_high_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(sxtl2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sxtl2))]
 pub fn vmovl_high_s16(a: int16x8_t) -> int32x4_t {
-    unsafe {
-        let a: int16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
-        vmovl_s16(a)
-    }
+    let a = vget_high_s16(a);
+    vmovl_s16(a)
 }
 #[doc = "Vector move"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovl_high_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(sxtl2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sxtl2))]
 pub fn vmovl_high_s32(a: int32x4_t) -> int64x2_t {
-    unsafe {
-        let a: int32x2_t = simd_shuffle!(a, a, [2, 3]);
-        vmovl_s32(a)
-    }
+    let a = vget_high_s32(a);
+    vmovl_s32(a)
 }
 #[doc = "Vector move"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovl_high_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(uxtl2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(uxtl2))]
 pub fn vmovl_high_u8(a: uint8x16_t) -> uint16x8_t {
-    unsafe {
-        let a: uint8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
-        vmovl_u8(a)
-    }
+    let a = vget_high_u8(a);
+    vmovl_u8(a)
 }
 #[doc = "Vector move"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovl_high_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(uxtl2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(uxtl2))]
 pub fn vmovl_high_u16(a: uint16x8_t) -> uint32x4_t {
-    unsafe {
-        let a: uint16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
-        vmovl_u16(a)
-    }
+    let a = vget_high_u16(a);
+    vmovl_u16(a)
 }
 #[doc = "Vector move"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovl_high_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(uxtl2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(uxtl2))]
 pub fn vmovl_high_u32(a: uint32x4_t) -> uint64x2_t {
-    unsafe {
-        let a: uint32x2_t = simd_shuffle!(a, a, [2, 3]);
-        vmovl_u32(a)
-    }
+    let a = vget_high_u32(a);
+    vmovl_u32(a)
 }
 #[doc = "Extract narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovn_high_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(xtn2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(xtn2))]
 pub fn vmovn_high_s16(a: int8x8_t, b: int16x8_t) -> int8x16_t {
-    unsafe {
-        let c: int8x8_t = simd_cast(b);
-        simd_shuffle!(a, c, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
-    }
+    unsafe { vcombine_s8(a, simd_cast(b)) }
 }
 #[doc = "Extract narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovn_high_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(xtn2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(xtn2))]
 pub fn vmovn_high_s32(a: int16x4_t, b: int32x4_t) -> int16x8_t {
-    unsafe {
-        let c: int16x4_t = simd_cast(b);
-        simd_shuffle!(a, c, [0, 1, 2, 3, 4, 5, 6, 7])
-    }
+    unsafe { vcombine_s16(a, simd_cast(b)) }
 }
 #[doc = "Extract narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovn_high_s64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(xtn2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(xtn2))]
 pub fn vmovn_high_s64(a: int32x2_t, b: int64x2_t) -> int32x4_t {
-    unsafe {
-        let c: int32x2_t = simd_cast(b);
-        simd_shuffle!(a, c, [0, 1, 2, 3])
-    }
+    unsafe { vcombine_s32(a, simd_cast(b)) }
 }
 #[doc = "Extract narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovn_high_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(xtn2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(xtn2))]
 pub fn vmovn_high_u16(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t {
-    unsafe {
-        let c: uint8x8_t = simd_cast(b);
-        simd_shuffle!(a, c, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
-    }
+    unsafe { vcombine_u8(a, simd_cast(b)) }
 }
 #[doc = "Extract narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovn_high_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(xtn2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(xtn2))]
 pub fn vmovn_high_u32(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t {
-    unsafe {
-        let c: uint16x4_t = simd_cast(b);
-        simd_shuffle!(a, c, [0, 1, 2, 3, 4, 5, 6, 7])
-    }
+    unsafe { vcombine_u16(a, simd_cast(b)) }
 }
 #[doc = "Extract narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovn_high_u64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(xtn2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(xtn2))]
 pub fn vmovn_high_u64(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t {
-    unsafe {
-        let c: uint32x2_t = simd_cast(b);
-        simd_shuffle!(a, c, [0, 1, 2, 3])
-    }
+    unsafe { vcombine_u32(a, simd_cast(b)) }
 }
 #[doc = "Multiply"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_f64)"]
@@ -14731,7 +12234,7 @@ pub fn vmulq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmul_lane_f64<const LANE: i32>(a: float64x1_t, b: float64x1_t) -> float64x1_t {
     static_assert!(LANE == 0);
-    unsafe { simd_mul(a, transmute::<f64, _>(simd_extract!(b, LANE as u32))) }
+    unsafe { simd_mul(a, transmute::<f64, _>(vget_lane_f64::<LANE>(b))) }
 }
 #[doc = "Floating-point multiply"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_laneq_f16)"]
@@ -14739,16 +12242,11 @@ pub fn vmul_lane_f64<const LANE: i32>(a: float64x1_t, b: float64x1_t) -> float64
 #[cfg_attr(test, assert_instr(fmul, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vmul_laneq_f16<const LANE: i32>(a: float16x4_t, b: float16x8_t) -> float16x4_t {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe {
-        simd_mul(
-            a,
-            simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+    unsafe { simd_mul(a, vdup_laneq_f16::<LANE>(b)) }
 }
 #[doc = "Floating-point multiply"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_laneq_f16)"]
@@ -14756,29 +12254,11 @@ pub fn vmul_laneq_f16<const LANE: i32>(a: float16x4_t, b: float16x8_t) -> float1
 #[cfg_attr(test, assert_instr(fmul, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vmulq_laneq_f16<const LANE: i32>(a: float16x8_t, b: float16x8_t) -> float16x8_t {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe {
-        simd_mul(
-            a,
-            simd_shuffle!(
-                b,
-                b,
-                [
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32
-                ]
-            ),
-        )
-    }
+    unsafe { simd_mul(a, vdupq_laneq_f16::<LANE>(b)) }
 }
 #[doc = "Floating-point multiply"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_laneq_f64)"]
@@ -14789,7 +12269,7 @@ pub fn vmulq_laneq_f16<const LANE: i32>(a: float16x8_t, b: float16x8_t) -> float
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmul_laneq_f64<const LANE: i32>(a: float64x1_t, b: float64x2_t) -> float64x1_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe { simd_mul(a, transmute::<f64, _>(simd_extract!(b, LANE as u32))) }
+    unsafe { simd_mul(a, transmute::<f64, _>(vgetq_lane_f64::<LANE>(b))) }
 }
 #[doc = "Vector multiply by scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_n_f64)"]
@@ -14818,10 +12298,8 @@ pub fn vmulq_n_f64(a: float64x2_t, b: f64) -> float64x2_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmuld_lane_f64<const LANE: i32>(a: f64, b: float64x1_t) -> f64 {
     static_assert!(LANE == 0);
-    unsafe {
-        let b: f64 = simd_extract!(b, LANE as u32);
-        a * b
-    }
+    let b: f64 = vget_lane_f64::<LANE>(b);
+    a * b
 }
 #[doc = "Add"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulh_f16)"]
@@ -14829,7 +12307,7 @@ pub fn vmuld_lane_f64<const LANE: i32>(a: f64, b: float64x1_t) -> f64 {
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
-#[cfg_attr(test, assert_instr(nop))]
+#[cfg_attr(test, assert_instr(fmul))]
 pub fn vmulh_f16(a: f16, b: f16) -> f16 {
     a * b
 }
@@ -14843,10 +12321,8 @@ pub fn vmulh_f16(a: f16, b: f16) -> f16 {
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vmulh_lane_f16<const LANE: i32>(a: f16, b: float16x4_t) -> f16 {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        let b: f16 = simd_extract!(b, LANE as u32);
-        a * b
-    }
+    let b: f16 = vget_lane_f16::<LANE>(b);
+    a * b
 }
 #[doc = "Floating-point multiply"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulh_laneq_f16)"]
@@ -14858,196 +12334,102 @@ pub fn vmulh_lane_f16<const LANE: i32>(a: f16, b: float16x4_t) -> f16 {
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vmulh_laneq_f16<const LANE: i32>(a: f16, b: float16x8_t) -> f16 {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe {
-        let b: f16 = simd_extract!(b, LANE as u32);
-        a * b
-    }
+    let b: f16 = vgetq_lane_f16::<LANE>(b);
+    a * b
 }
 #[doc = "Multiply long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_lane_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smull2, LANE = 1))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(smull2, LANE = 1))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmull_high_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x4_t) -> int32x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        vmull_high_s16(
-            a,
-            simd_shuffle!(
-                b,
-                b,
-                [
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32
-                ]
-            ),
-        )
-    }
+    vmull_high_s16(a, vdupq_lane_s16::<LANE>(b))
 }
 #[doc = "Multiply long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_laneq_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smull2, LANE = 1))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(smull2, LANE = 1))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmull_high_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t) -> int32x4_t {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe {
-        vmull_high_s16(
-            a,
-            simd_shuffle!(
-                b,
-                b,
-                [
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32
-                ]
-            ),
-        )
-    }
+    vmull_high_s16(a, vdupq_laneq_s16::<LANE>(b))
 }
 #[doc = "Multiply long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_lane_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smull2, LANE = 1))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(smull2, LANE = 1))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmull_high_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x2_t) -> int64x2_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        vmull_high_s32(
-            a,
-            simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+    vmull_high_s32(a, vdupq_lane_s32::<LANE>(b))
 }
 #[doc = "Multiply long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_laneq_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smull2, LANE = 1))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(smull2, LANE = 1))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmull_high_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t) -> int64x2_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        vmull_high_s32(
-            a,
-            simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+    vmull_high_s32(a, vdupq_laneq_s32::<LANE>(b))
 }
 #[doc = "Multiply long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_lane_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umull2, LANE = 1))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(umull2, LANE = 1))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmull_high_lane_u16<const LANE: i32>(a: uint16x8_t, b: uint16x4_t) -> uint32x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        vmull_high_u16(
-            a,
-            simd_shuffle!(
-                b,
-                b,
-                [
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32
-                ]
-            ),
-        )
-    }
+    vmull_high_u16(a, vdupq_lane_u16::<LANE>(b))
 }
 #[doc = "Multiply long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_laneq_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umull2, LANE = 1))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(umull2, LANE = 1))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmull_high_laneq_u16<const LANE: i32>(a: uint16x8_t, b: uint16x8_t) -> uint32x4_t {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe {
-        vmull_high_u16(
-            a,
-            simd_shuffle!(
-                b,
-                b,
-                [
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32
-                ]
-            ),
-        )
-    }
+    vmull_high_u16(a, vdupq_laneq_u16::<LANE>(b))
 }
 #[doc = "Multiply long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_lane_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umull2, LANE = 1))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(umull2, LANE = 1))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmull_high_lane_u32<const LANE: i32>(a: uint32x4_t, b: uint32x2_t) -> uint64x2_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        vmull_high_u32(
-            a,
-            simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+    vmull_high_u32(a, vdupq_lane_u32::<LANE>(b))
 }
 #[doc = "Multiply long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_laneq_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umull2, LANE = 1))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(umull2, LANE = 1))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmull_high_laneq_u32<const LANE: i32>(a: uint32x4_t, b: uint32x4_t) -> uint64x2_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        vmull_high_u32(
-            a,
-            simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+    vmull_high_u32(a, vdupq_laneq_u32::<LANE>(b))
 }
 #[doc = "Multiply long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_n_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smull2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(smull2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmull_high_n_s16(a: int16x8_t, b: i16) -> int32x4_t {
     vmull_high_s16(a, vdupq_n_s16(b))
@@ -15056,7 +12438,7 @@ pub fn vmull_high_n_s16(a: int16x8_t, b: i16) -> int32x4_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_n_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(smull2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(smull2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmull_high_n_s32(a: int32x4_t, b: i32) -> int64x2_t {
     vmull_high_s32(a, vdupq_n_s32(b))
@@ -15065,7 +12447,7 @@ pub fn vmull_high_n_s32(a: int32x4_t, b: i32) -> int64x2_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_n_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umull2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(umull2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmull_high_n_u16(a: uint16x8_t, b: u16) -> uint32x4_t {
     vmull_high_u16(a, vdupq_n_u16(b))
@@ -15074,7 +12456,7 @@ pub fn vmull_high_n_u16(a: uint16x8_t, b: u16) -> uint32x4_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_n_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(umull2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(umull2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmull_high_n_u32(a: uint32x4_t, b: u32) -> uint64x2_t {
     vmull_high_u32(a, vdupq_n_u32(b))
@@ -15084,100 +12466,86 @@ pub fn vmull_high_n_u32(a: uint32x4_t, b: u32) -> uint64x2_t {
 #[inline]
 #[target_feature(enable = "neon,aes")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(pmull2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(pmull2))]
 pub fn vmull_high_p64(a: poly64x2_t, b: poly64x2_t) -> p128 {
-    unsafe { vmull_p64(simd_extract!(a, 1), simd_extract!(b, 1)) }
+    vmull_p64(vgetq_lane_p64::<1>(a), vgetq_lane_p64::<1>(b))
 }
 #[doc = "Polynomial multiply long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_p8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(pmull2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(pmull2))]
 pub fn vmull_high_p8(a: poly8x16_t, b: poly8x16_t) -> poly16x8_t {
-    unsafe {
-        let a: poly8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
-        let b: poly8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
-        vmull_p8(a, b)
-    }
+    let a = vget_high_p8(a);
+    let b = vget_high_p8(b);
+    vmull_p8(a, b)
 }
 #[doc = "Signed multiply long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(smull2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(smull2))]
 pub fn vmull_high_s8(a: int8x16_t, b: int8x16_t) -> int16x8_t {
-    unsafe {
-        let a: int8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
-        let b: int8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
-        vmull_s8(a, b)
-    }
+    let a = vget_high_s8(a);
+    let b = vget_high_s8(b);
+    vmull_s8(a, b)
 }
 #[doc = "Signed multiply long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(smull2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(smull2))]
 pub fn vmull_high_s16(a: int16x8_t, b: int16x8_t) -> int32x4_t {
-    unsafe {
-        let a: int16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
-        let b: int16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]);
-        vmull_s16(a, b)
-    }
+    let a = vget_high_s16(a);
+    let b = vget_high_s16(b);
+    vmull_s16(a, b)
 }
 #[doc = "Signed multiply long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(smull2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(smull2))]
 pub fn vmull_high_s32(a: int32x4_t, b: int32x4_t) -> int64x2_t {
-    unsafe {
-        let a: int32x2_t = simd_shuffle!(a, a, [2, 3]);
-        let b: int32x2_t = simd_shuffle!(b, b, [2, 3]);
-        vmull_s32(a, b)
-    }
+    let a = vget_high_s32(a);
+    let b = vget_high_s32(b);
+    vmull_s32(a, b)
 }
 #[doc = "Unsigned multiply long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(umull2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(umull2))]
 pub fn vmull_high_u8(a: uint8x16_t, b: uint8x16_t) -> uint16x8_t {
-    unsafe {
-        let a: uint8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
-        let b: uint8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
-        vmull_u8(a, b)
-    }
+    let a = vget_high_u8(a);
+    let b = vget_high_u8(b);
+    vmull_u8(a, b)
 }
 #[doc = "Unsigned multiply long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(umull2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(umull2))]
 pub fn vmull_high_u16(a: uint16x8_t, b: uint16x8_t) -> uint32x4_t {
-    unsafe {
-        let a: uint16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
-        let b: uint16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]);
-        vmull_u16(a, b)
-    }
+    let a = vget_high_u16(a);
+    let b = vget_high_u16(b);
+    vmull_u16(a, b)
 }
 #[doc = "Unsigned multiply long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(umull2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(umull2))]
 pub fn vmull_high_u32(a: uint32x4_t, b: uint32x4_t) -> uint64x2_t {
-    unsafe {
-        let a: uint32x2_t = simd_shuffle!(a, a, [2, 3]);
-        let b: uint32x2_t = simd_shuffle!(b, b, [2, 3]);
-        vmull_u32(a, b)
-    }
+    let a = vget_high_u32(a);
+    let b = vget_high_u32(b);
+    vmull_u32(a, b)
 }
 #[doc = "Polynomial multiply long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_p64)"]
@@ -15204,7 +12572,7 @@ pub fn vmull_p64(a: p64, b: p64) -> p128 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmulq_lane_f64<const LANE: i32>(a: float64x2_t, b: float64x1_t) -> float64x2_t {
     static_assert!(LANE == 0);
-    unsafe { simd_mul(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32])) }
+    unsafe { simd_mul(a, vdupq_lane_f64::<LANE>(b)) }
 }
 #[doc = "Floating-point multiply"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_laneq_f64)"]
@@ -15215,7 +12583,7 @@ pub fn vmulq_lane_f64<const LANE: i32>(a: float64x2_t, b: float64x1_t) -> float6
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmulq_laneq_f64<const LANE: i32>(a: float64x2_t, b: float64x2_t) -> float64x2_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe { simd_mul(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32])) }
+    unsafe { simd_mul(a, vdupq_laneq_f64::<LANE>(b)) }
 }
 #[doc = "Floating-point multiply"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmuls_lane_f32)"]
@@ -15226,10 +12594,8 @@ pub fn vmulq_laneq_f64<const LANE: i32>(a: float64x2_t, b: float64x2_t) -> float
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmuls_lane_f32<const LANE: i32>(a: f32, b: float32x2_t) -> f32 {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        let b: f32 = simd_extract!(b, LANE as u32);
-        a * b
-    }
+    let b: f32 = vget_lane_f32::<LANE>(b);
+    a * b
 }
 #[doc = "Floating-point multiply"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmuls_laneq_f32)"]
@@ -15240,10 +12606,8 @@ pub fn vmuls_lane_f32<const LANE: i32>(a: f32, b: float32x2_t) -> f32 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmuls_laneq_f32<const LANE: i32>(a: f32, b: float32x4_t) -> f32 {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        let b: f32 = simd_extract!(b, LANE as u32);
-        a * b
-    }
+    let b: f32 = vgetq_lane_f32::<LANE>(b);
+    a * b
 }
 #[doc = "Floating-point multiply"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmuld_laneq_f64)"]
@@ -15254,16 +12618,14 @@ pub fn vmuls_laneq_f32<const LANE: i32>(a: f32, b: float32x4_t) -> f32 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmuld_laneq_f64<const LANE: i32>(a: f64, b: float64x2_t) -> f64 {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        let b: f64 = simd_extract!(b, LANE as u32);
-        a * b
-    }
+    let b: f64 = vgetq_lane_f64::<LANE>(b);
+    a * b
 }
 #[doc = "Floating-point multiply extended"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulx_f16)"]
 #[inline]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fmulx))]
 pub fn vmulx_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
@@ -15280,7 +12642,7 @@ pub fn vmulx_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxq_f16)"]
 #[inline]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fmulx))]
 pub fn vmulxq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
@@ -15363,16 +12725,11 @@ pub fn vmulxq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
 #[cfg_attr(test, assert_instr(fmulx, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vmulx_lane_f16<const LANE: i32>(a: float16x4_t, b: float16x4_t) -> float16x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        vmulx_f16(
-            a,
-            simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+    vmulx_f16(a, vdup_lane_f16::<LANE>(b))
 }
 #[doc = "Floating-point multiply extended"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulx_laneq_f16)"]
@@ -15380,16 +12737,11 @@ pub fn vmulx_lane_f16<const LANE: i32>(a: float16x4_t, b: float16x4_t) -> float1
 #[cfg_attr(test, assert_instr(fmulx, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vmulx_laneq_f16<const LANE: i32>(a: float16x4_t, b: float16x8_t) -> float16x4_t {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe {
-        vmulx_f16(
-            a,
-            simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+    vmulx_f16(a, vdup_laneq_f16::<LANE>(b))
 }
 #[doc = "Floating-point multiply extended"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxq_lane_f16)"]
@@ -15397,29 +12749,11 @@ pub fn vmulx_laneq_f16<const LANE: i32>(a: float16x4_t, b: float16x8_t) -> float
 #[cfg_attr(test, assert_instr(fmulx, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vmulxq_lane_f16<const LANE: i32>(a: float16x8_t, b: float16x4_t) -> float16x8_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        vmulxq_f16(
-            a,
-            simd_shuffle!(
-                b,
-                b,
-                [
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32
-                ]
-            ),
-        )
-    }
+    vmulxq_f16(a, vdupq_lane_f16::<LANE>(b))
 }
 #[doc = "Floating-point multiply extended"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxq_laneq_f16)"]
@@ -15427,29 +12761,11 @@ pub fn vmulxq_lane_f16<const LANE: i32>(a: float16x8_t, b: float16x4_t) -> float
 #[cfg_attr(test, assert_instr(fmulx, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vmulxq_laneq_f16<const LANE: i32>(a: float16x8_t, b: float16x8_t) -> float16x8_t {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe {
-        vmulxq_f16(
-            a,
-            simd_shuffle!(
-                b,
-                b,
-                [
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32
-                ]
-            ),
-        )
-    }
+    vmulxq_f16(a, vdupq_laneq_f16::<LANE>(b))
 }
 #[doc = "Floating-point multiply extended"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulx_lane_f32)"]
@@ -15460,7 +12776,7 @@ pub fn vmulxq_laneq_f16<const LANE: i32>(a: float16x8_t, b: float16x8_t) -> floa
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmulx_lane_f32<const LANE: i32>(a: float32x2_t, b: float32x2_t) -> float32x2_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe { vmulx_f32(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32])) }
+    vmulx_f32(a, vdup_lane_f32::<LANE>(b))
 }
 #[doc = "Floating-point multiply extended"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulx_laneq_f32)"]
@@ -15471,7 +12787,7 @@ pub fn vmulx_lane_f32<const LANE: i32>(a: float32x2_t, b: float32x2_t) -> float3
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmulx_laneq_f32<const LANE: i32>(a: float32x2_t, b: float32x4_t) -> float32x2_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { vmulx_f32(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32])) }
+    vmulx_f32(a, vdup_laneq_f32::<LANE>(b))
 }
 #[doc = "Floating-point multiply extended"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxq_lane_f32)"]
@@ -15482,12 +12798,7 @@ pub fn vmulx_laneq_f32<const LANE: i32>(a: float32x2_t, b: float32x4_t) -> float
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmulxq_lane_f32<const LANE: i32>(a: float32x4_t, b: float32x2_t) -> float32x4_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        vmulxq_f32(
-            a,
-            simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+    vmulxq_f32(a, vdupq_lane_f32::<LANE>(b))
 }
 #[doc = "Floating-point multiply extended"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxq_laneq_f32)"]
@@ -15498,12 +12809,7 @@ pub fn vmulxq_lane_f32<const LANE: i32>(a: float32x4_t, b: float32x2_t) -> float
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmulxq_laneq_f32<const LANE: i32>(a: float32x4_t, b: float32x4_t) -> float32x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        vmulxq_f32(
-            a,
-            simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+    vmulxq_f32(a, vdupq_laneq_f32::<LANE>(b))
 }
 #[doc = "Floating-point multiply extended"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxq_laneq_f64)"]
@@ -15514,7 +12820,7 @@ pub fn vmulxq_laneq_f32<const LANE: i32>(a: float32x4_t, b: float32x4_t) -> floa
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmulxq_laneq_f64<const LANE: i32>(a: float64x2_t, b: float64x2_t) -> float64x2_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe { vmulxq_f64(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32])) }
+    vmulxq_f64(a, vdupq_laneq_f64::<LANE>(b))
 }
 #[doc = "Floating-point multiply extended"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulx_lane_f64)"]
@@ -15525,7 +12831,7 @@ pub fn vmulxq_laneq_f64<const LANE: i32>(a: float64x2_t, b: float64x2_t) -> floa
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmulx_lane_f64<const LANE: i32>(a: float64x1_t, b: float64x1_t) -> float64x1_t {
     static_assert!(LANE == 0);
-    unsafe { vmulx_f64(a, transmute::<f64, _>(simd_extract!(b, LANE as u32))) }
+    unsafe { vmulx_f64(a, transmute(vget_lane_f64::<LANE>(b))) }
 }
 #[doc = "Floating-point multiply extended"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulx_laneq_f64)"]
@@ -15536,7 +12842,7 @@ pub fn vmulx_lane_f64<const LANE: i32>(a: float64x1_t, b: float64x1_t) -> float6
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmulx_laneq_f64<const LANE: i32>(a: float64x1_t, b: float64x2_t) -> float64x1_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe { vmulx_f64(a, transmute::<f64, _>(simd_extract!(b, LANE as u32))) }
+    unsafe { vmulx_f64(a, transmute(vgetq_lane_f64::<LANE>(b))) }
 }
 #[doc = "Vector multiply by scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulx_n_f16)"]
@@ -15599,7 +12905,7 @@ pub fn vmulxs_f32(a: f32, b: f32) -> f32 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmulxd_lane_f64<const LANE: i32>(a: f64, b: float64x1_t) -> f64 {
     static_assert!(LANE == 0);
-    unsafe { vmulxd_f64(a, simd_extract!(b, LANE as u32)) }
+    vmulxd_f64(a, vget_lane_f64::<LANE>(b))
 }
 #[doc = "Floating-point multiply extended"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxd_laneq_f64)"]
@@ -15610,7 +12916,7 @@ pub fn vmulxd_lane_f64<const LANE: i32>(a: f64, b: float64x1_t) -> f64 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmulxd_laneq_f64<const LANE: i32>(a: f64, b: float64x2_t) -> f64 {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe { vmulxd_f64(a, simd_extract!(b, LANE as u32)) }
+    vmulxd_f64(a, vgetq_lane_f64::<LANE>(b))
 }
 #[doc = "Floating-point multiply extended"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxs_lane_f32)"]
@@ -15621,7 +12927,7 @@ pub fn vmulxd_laneq_f64<const LANE: i32>(a: f64, b: float64x2_t) -> f64 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmulxs_lane_f32<const LANE: i32>(a: f32, b: float32x2_t) -> f32 {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe { vmulxs_f32(a, simd_extract!(b, LANE as u32)) }
+    vmulxs_f32(a, vget_lane_f32::<LANE>(b))
 }
 #[doc = "Floating-point multiply extended"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxs_laneq_f32)"]
@@ -15632,7 +12938,7 @@ pub fn vmulxs_lane_f32<const LANE: i32>(a: f32, b: float32x2_t) -> f32 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmulxs_laneq_f32<const LANE: i32>(a: f32, b: float32x4_t) -> f32 {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { vmulxs_f32(a, simd_extract!(b, LANE as u32)) }
+    vmulxs_f32(a, vgetq_lane_f32::<LANE>(b))
 }
 #[doc = "Floating-point multiply extended"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxh_f16)"]
@@ -15661,7 +12967,7 @@ pub fn vmulxh_f16(a: f16, b: f16) -> f16 {
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vmulxh_lane_f16<const LANE: i32>(a: f16, b: float16x4_t) -> f16 {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { vmulxh_f16(a, simd_extract!(b, LANE as u32)) }
+    vmulxh_f16(a, vget_lane_f16::<LANE>(b))
 }
 #[doc = "Floating-point multiply extended"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxh_laneq_f16)"]
@@ -15673,7 +12979,7 @@ pub fn vmulxh_lane_f16<const LANE: i32>(a: f16, b: float16x4_t) -> f16 {
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vmulxh_laneq_f16<const LANE: i32>(a: f16, b: float16x8_t) -> f16 {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe { vmulxh_f16(a, simd_extract!(b, LANE as u32)) }
+    vmulxh_f16(a, vgetq_lane_f16::<LANE>(b))
 }
 #[doc = "Floating-point multiply extended"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxq_lane_f64)"]
@@ -15684,7 +12990,7 @@ pub fn vmulxh_laneq_f16<const LANE: i32>(a: f16, b: float16x8_t) -> f16 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vmulxq_lane_f64<const LANE: i32>(a: float64x2_t, b: float64x1_t) -> float64x2_t {
     static_assert!(LANE == 0);
-    unsafe { vmulxq_f64(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32])) }
+    vmulxq_f64(a, vdupq_lane_f64::<LANE>(b))
 }
 #[doc = "Negate"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vneg_f64)"]
@@ -15748,11 +13054,9 @@ pub fn vnegh_f16(a: f16) -> f16 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
 pub fn vpaddd_f64(a: float64x2_t) -> f64 {
-    unsafe {
-        let a1: f64 = simd_extract!(a, 0);
-        let a2: f64 = simd_extract!(a, 1);
-        a1 + a2
-    }
+    let a1: f64 = vgetq_lane_f64::<0>(a);
+    let a2: f64 = vgetq_lane_f64::<1>(a);
+    a1 + a2
 }
 #[doc = "Floating-point add pairwise"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadds_f32)"]
@@ -15761,11 +13065,9 @@ pub fn vpaddd_f64(a: float64x2_t) -> f64 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
 pub fn vpadds_f32(a: float32x2_t) -> f32 {
-    unsafe {
-        let a1: f32 = simd_extract!(a, 0);
-        let a2: f32 = simd_extract!(a, 1);
-        a1 + a2
-    }
+    let a1: f32 = vget_lane_f32::<0>(a);
+    let a2: f32 = vget_lane_f32::<1>(a);
+    a1 + a2
 }
 #[doc = "Add pairwise"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddd_s64)"]
@@ -15774,7 +13076,7 @@ pub fn vpadds_f32(a: float32x2_t) -> f32 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(addp))]
 pub fn vpaddd_s64(a: int64x2_t) -> i64 {
-    unsafe { simd_reduce_add_unordered(a) }
+    unsafe { simd_reduce_add_ordered(a, 0) }
 }
 #[doc = "Add pairwise"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddd_u64)"]
@@ -15783,24 +13085,21 @@ pub fn vpaddd_s64(a: int64x2_t) -> i64 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(addp))]
 pub fn vpaddd_u64(a: uint64x2_t) -> u64 {
-    unsafe { simd_reduce_add_unordered(a) }
+    unsafe { simd_reduce_add_ordered(a, 0) }
 }
 #[doc = "Floating-point add pairwise"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_f16)"]
 #[inline]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(faddp))]
 pub fn vpaddq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.faddp.v8f16"
-        )]
-        fn _vpaddq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t;
+    unsafe {
+        let even = simd_shuffle!(a, b, crate::core_arch::macros::even::<8>());
+        let odd = simd_shuffle!(a, b, crate::core_arch::macros::odd::<8>());
+        simd_add(even, odd)
     }
-    unsafe { _vpaddq_f16(a, b) }
 }
 #[doc = "Floating-point add pairwise"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_f32)"]
@@ -15809,14 +13108,11 @@ pub fn vpaddq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(faddp))]
 pub fn vpaddq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.faddp.v4f32"
-        )]
-        fn _vpaddq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t;
+    unsafe {
+        let even = simd_shuffle!(a, b, crate::core_arch::macros::even::<4>());
+        let odd = simd_shuffle!(a, b, crate::core_arch::macros::odd::<4>());
+        simd_add(even, odd)
     }
-    unsafe { _vpaddq_f32(a, b) }
 }
 #[doc = "Floating-point add pairwise"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_f64)"]
@@ -15825,14 +13121,11 @@ pub fn vpaddq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(faddp))]
 pub fn vpaddq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.faddp.v2f64"
-        )]
-        fn _vpaddq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t;
+    unsafe {
+        let even = simd_shuffle!(a, b, crate::core_arch::macros::even::<2>());
+        let odd = simd_shuffle!(a, b, crate::core_arch::macros::odd::<2>());
+        simd_add(even, odd)
     }
-    unsafe { _vpaddq_f64(a, b) }
 }
 #[doc = "Add Pairwise"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_s8)"]
@@ -15841,14 +13134,11 @@ pub fn vpaddq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(addp))]
 pub fn vpaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.addp.v16i8"
-        )]
-        fn _vpaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t;
+    unsafe {
+        let even = simd_shuffle!(a, b, crate::core_arch::macros::even::<16>());
+        let odd = simd_shuffle!(a, b, crate::core_arch::macros::odd::<16>());
+        simd_add(even, odd)
     }
-    unsafe { _vpaddq_s8(a, b) }
 }
 #[doc = "Add Pairwise"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_s16)"]
@@ -15857,14 +13147,11 @@ pub fn vpaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(addp))]
 pub fn vpaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.addp.v8i16"
-        )]
-        fn _vpaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
+    unsafe {
+        let even = simd_shuffle!(a, b, crate::core_arch::macros::even::<8>());
+        let odd = simd_shuffle!(a, b, crate::core_arch::macros::odd::<8>());
+        simd_add(even, odd)
     }
-    unsafe { _vpaddq_s16(a, b) }
 }
 #[doc = "Add Pairwise"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_s32)"]
@@ -15873,14 +13160,11 @@ pub fn vpaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(addp))]
 pub fn vpaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.addp.v4i32"
-        )]
-        fn _vpaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
+    unsafe {
+        let even = simd_shuffle!(a, b, crate::core_arch::macros::even::<4>());
+        let odd = simd_shuffle!(a, b, crate::core_arch::macros::odd::<4>());
+        simd_add(even, odd)
     }
-    unsafe { _vpaddq_s32(a, b) }
 }
 #[doc = "Add Pairwise"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_s64)"]
@@ -15889,126 +13173,69 @@ pub fn vpaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(addp))]
 pub fn vpaddq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.addp.v2i64"
-        )]
-        fn _vpaddq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t;
+    unsafe {
+        let even = simd_shuffle!(a, b, crate::core_arch::macros::even::<2>());
+        let odd = simd_shuffle!(a, b, crate::core_arch::macros::odd::<2>());
+        simd_add(even, odd)
     }
-    unsafe { _vpaddq_s64(a, b) }
-}
-#[doc = "Add Pairwise"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_u8)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(addp))]
-pub fn vpaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
-    unsafe { transmute(vpaddq_s8(transmute(a), transmute(b))) }
 }
 #[doc = "Add Pairwise"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_u8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(addp))]
 pub fn vpaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
-    let a: uint8x16_t =
-        unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
-    let b: uint8x16_t =
-        unsafe { simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
     unsafe {
-        let ret_val: uint8x16_t = transmute(vpaddq_s8(transmute(a), transmute(b)));
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
+        let even = simd_shuffle!(a, b, crate::core_arch::macros::even::<16>());
+        let odd = simd_shuffle!(a, b, crate::core_arch::macros::odd::<16>());
+        simd_add(even, odd)
     }
 }
 #[doc = "Add Pairwise"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_u16)"]
 #[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(addp))]
-pub fn vpaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    unsafe { transmute(vpaddq_s16(transmute(a), transmute(b))) }
-}
-#[doc = "Add Pairwise"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_u16)"]
-#[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(addp))]
 pub fn vpaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    let a: uint16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    let b: uint16x8_t = unsafe { simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]) };
     unsafe {
-        let ret_val: uint16x8_t = transmute(vpaddq_s16(transmute(a), transmute(b)));
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let even = simd_shuffle!(a, b, crate::core_arch::macros::even::<8>());
+        let odd = simd_shuffle!(a, b, crate::core_arch::macros::odd::<8>());
+        simd_add(even, odd)
     }
 }
 #[doc = "Add Pairwise"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_u32)"]
 #[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(addp))]
-pub fn vpaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    unsafe { transmute(vpaddq_s32(transmute(a), transmute(b))) }
-}
-#[doc = "Add Pairwise"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_u32)"]
-#[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(addp))]
 pub fn vpaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    let a: uint32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    let b: uint32x4_t = unsafe { simd_shuffle!(b, b, [3, 2, 1, 0]) };
     unsafe {
-        let ret_val: uint32x4_t = transmute(vpaddq_s32(transmute(a), transmute(b)));
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let even = simd_shuffle!(a, b, crate::core_arch::macros::even::<4>());
+        let odd = simd_shuffle!(a, b, crate::core_arch::macros::odd::<4>());
+        simd_add(even, odd)
     }
 }
 #[doc = "Add Pairwise"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_u64)"]
 #[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(addp))]
-pub fn vpaddq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
-    unsafe { transmute(vpaddq_s64(transmute(a), transmute(b))) }
-}
-#[doc = "Add Pairwise"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddq_u64)"]
-#[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(addp))]
 pub fn vpaddq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
-    let a: uint64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    let b: uint64x2_t = unsafe { simd_shuffle!(b, b, [1, 0]) };
     unsafe {
-        let ret_val: uint64x2_t = transmute(vpaddq_s64(transmute(a), transmute(b)));
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let even = simd_shuffle!(a, b, crate::core_arch::macros::even::<2>());
+        let odd = simd_shuffle!(a, b, crate::core_arch::macros::odd::<2>());
+        simd_add(even, odd)
     }
 }
 #[doc = "Floating-point add pairwise"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_f16)"]
 #[inline]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fmaxp))]
 pub fn vpmax_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
@@ -16025,7 +13252,7 @@ pub fn vpmax_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxq_f16)"]
 #[inline]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fmaxp))]
 pub fn vpmaxq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
@@ -16042,7 +13269,7 @@ pub fn vpmaxq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxnm_f16)"]
 #[inline]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fmaxnmp))]
 pub fn vpmaxnm_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
@@ -16059,7 +13286,7 @@ pub fn vpmaxnm_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxnmq_f16)"]
 #[inline]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fmaxnmp))]
 pub fn vpmaxnmq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
@@ -16316,7 +13543,7 @@ pub fn vpmaxs_f32(a: float32x2_t) -> f32 {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_f16)"]
 #[inline]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fminp))]
 pub fn vpmin_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
@@ -16333,7 +13560,7 @@ pub fn vpmin_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminq_f16)"]
 #[inline]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fminp))]
 pub fn vpminq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
@@ -16350,7 +13577,7 @@ pub fn vpminq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminnm_f16)"]
 #[inline]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fminnmp))]
 pub fn vpminnm_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
@@ -16367,7 +13594,7 @@ pub fn vpminnm_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpminnmq_f16)"]
 #[inline]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(fminnmp))]
 pub fn vpminnmq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
@@ -16659,7 +13886,7 @@ pub fn vqabsq_s64(a: int64x2_t) -> int64x2_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(sqabs))]
 pub fn vqabsb_s8(a: i8) -> i8 {
-    unsafe { simd_extract!(vqabs_s8(vdup_n_s8(a)), 0) }
+    vget_lane_s8::<0>(vqabs_s8(vdup_n_s8(a)))
 }
 #[doc = "Signed saturating absolute value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabsh_s16)"]
@@ -16668,7 +13895,7 @@ pub fn vqabsb_s8(a: i8) -> i8 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(sqabs))]
 pub fn vqabsh_s16(a: i16) -> i16 {
-    unsafe { simd_extract!(vqabs_s16(vdup_n_s16(a)), 0) }
+    vget_lane_s16::<0>(vqabs_s16(vdup_n_s16(a)))
 }
 #[doc = "Signed saturating absolute value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabss_s32)"]
@@ -16711,7 +13938,7 @@ pub fn vqabsd_s64(a: i64) -> i64 {
 pub fn vqaddb_s8(a: i8, b: i8) -> i8 {
     let a: int8x8_t = vdup_n_s8(a);
     let b: int8x8_t = vdup_n_s8(b);
-    unsafe { simd_extract!(vqadd_s8(a, b), 0) }
+    vget_lane_s8::<0>(vqadd_s8(a, b))
 }
 #[doc = "Saturating add"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddh_s16)"]
@@ -16722,7 +13949,7 @@ pub fn vqaddb_s8(a: i8, b: i8) -> i8 {
 pub fn vqaddh_s16(a: i16, b: i16) -> i16 {
     let a: int16x4_t = vdup_n_s16(a);
     let b: int16x4_t = vdup_n_s16(b);
-    unsafe { simd_extract!(vqadd_s16(a, b), 0) }
+    vget_lane_s16::<0>(vqadd_s16(a, b))
 }
 #[doc = "Saturating add"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddb_u8)"]
@@ -16733,7 +13960,7 @@ pub fn vqaddh_s16(a: i16, b: i16) -> i16 {
 pub fn vqaddb_u8(a: u8, b: u8) -> u8 {
     let a: uint8x8_t = vdup_n_u8(a);
     let b: uint8x8_t = vdup_n_u8(b);
-    unsafe { simd_extract!(vqadd_u8(a, b), 0) }
+    vget_lane_u8::<0>(vqadd_u8(a, b))
 }
 #[doc = "Saturating add"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddh_u16)"]
@@ -16744,7 +13971,7 @@ pub fn vqaddb_u8(a: u8, b: u8) -> u8 {
 pub fn vqaddh_u16(a: u16, b: u16) -> u16 {
     let a: uint16x4_t = vdup_n_u16(a);
     let b: uint16x4_t = vdup_n_u16(b);
-    unsafe { simd_extract!(vqadd_u16(a, b), 0) }
+    vget_lane_u16::<0>(vqadd_u16(a, b))
 }
 #[doc = "Saturating add"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadds_s32)"]
@@ -16814,7 +14041,7 @@ pub fn vqaddd_u64(a: u64, b: u64) -> u64 {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_high_lane_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlal2, N = 1))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmlal2, N = 1))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmlal_high_lane_s16<const N: i32>(a: int32x4_t, b: int16x8_t, c: int16x4_t) -> int32x4_t {
@@ -16825,7 +14052,7 @@ pub fn vqdmlal_high_lane_s16<const N: i32>(a: int32x4_t, b: int16x8_t, c: int16x
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_high_laneq_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlal2, N = 1))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmlal2, N = 1))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmlal_high_laneq_s16<const N: i32>(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t {
@@ -16836,7 +14063,7 @@ pub fn vqdmlal_high_laneq_s16<const N: i32>(a: int32x4_t, b: int16x8_t, c: int16
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_high_lane_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlal2, N = 1))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmlal2, N = 1))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmlal_high_lane_s32<const N: i32>(a: int64x2_t, b: int32x4_t, c: int32x2_t) -> int64x2_t {
@@ -16847,7 +14074,7 @@ pub fn vqdmlal_high_lane_s32<const N: i32>(a: int64x2_t, b: int32x4_t, c: int32x
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_high_laneq_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlal2, N = 1))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmlal2, N = 1))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmlal_high_laneq_s32<const N: i32>(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t {
@@ -16858,7 +14085,7 @@ pub fn vqdmlal_high_laneq_s32<const N: i32>(a: int64x2_t, b: int32x4_t, c: int32
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_high_n_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlal2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmlal2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmlal_high_n_s16(a: int32x4_t, b: int16x8_t, c: i16) -> int32x4_t {
     vqaddq_s32(a, vqdmull_high_n_s16(b, c))
@@ -16867,7 +14094,7 @@ pub fn vqdmlal_high_n_s16(a: int32x4_t, b: int16x8_t, c: i16) -> int32x4_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_high_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlal2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmlal2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmlal_high_s16(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t {
     vqaddq_s32(a, vqdmull_high_s16(b, c))
@@ -16876,7 +14103,7 @@ pub fn vqdmlal_high_s16(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_high_n_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlal2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmlal2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmlal_high_n_s32(a: int64x2_t, b: int32x4_t, c: i32) -> int64x2_t {
     vqaddq_s64(a, vqdmull_high_n_s32(b, c))
@@ -16885,7 +14112,7 @@ pub fn vqdmlal_high_n_s32(a: int64x2_t, b: int32x4_t, c: i32) -> int64x2_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_high_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlal2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmlal2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmlal_high_s32(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t {
     vqaddq_s64(a, vqdmull_high_s32(b, c))
@@ -16894,7 +14121,7 @@ pub fn vqdmlal_high_s32(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_laneq_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlal, N = 2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmlal, N = 2))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmlal_laneq_s16<const N: i32>(a: int32x4_t, b: int16x4_t, c: int16x8_t) -> int32x4_t {
@@ -16905,7 +14132,7 @@ pub fn vqdmlal_laneq_s16<const N: i32>(a: int32x4_t, b: int16x4_t, c: int16x8_t)
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_laneq_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlal, N = 1))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmlal, N = 1))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmlal_laneq_s32<const N: i32>(a: int64x2_t, b: int32x2_t, c: int32x4_t) -> int64x2_t {
@@ -16921,7 +14148,7 @@ pub fn vqdmlal_laneq_s32<const N: i32>(a: int64x2_t, b: int32x2_t, c: int32x4_t)
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmlalh_lane_s16<const LANE: i32>(a: i32, b: i16, c: int16x4_t) -> i32 {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { vqdmlalh_s16(a, b, simd_extract!(c, LANE as u32)) }
+    vqdmlalh_s16(a, b, vget_lane_s16::<LANE>(c))
 }
 #[doc = "Signed saturating doubling multiply-add long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlalh_laneq_s16)"]
@@ -16932,7 +14159,7 @@ pub fn vqdmlalh_lane_s16<const LANE: i32>(a: i32, b: i16, c: int16x4_t) -> i32 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmlalh_laneq_s16<const LANE: i32>(a: i32, b: i16, c: int16x8_t) -> i32 {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe { vqdmlalh_s16(a, b, simd_extract!(c, LANE as u32)) }
+    vqdmlalh_s16(a, b, vgetq_lane_s16::<LANE>(c))
 }
 #[doc = "Signed saturating doubling multiply-add long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlals_lane_s32)"]
@@ -16943,7 +14170,7 @@ pub fn vqdmlalh_laneq_s16<const LANE: i32>(a: i32, b: i16, c: int16x8_t) -> i32
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmlals_lane_s32<const LANE: i32>(a: i64, b: i32, c: int32x2_t) -> i64 {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe { vqdmlals_s32(a, b, simd_extract!(c, LANE as u32)) }
+    vqdmlals_s32(a, b, vget_lane_s32::<LANE>(c))
 }
 #[doc = "Signed saturating doubling multiply-add long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlals_laneq_s32)"]
@@ -16954,7 +14181,7 @@ pub fn vqdmlals_lane_s32<const LANE: i32>(a: i64, b: i32, c: int32x2_t) -> i64 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmlals_laneq_s32<const LANE: i32>(a: i64, b: i32, c: int32x4_t) -> i64 {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { vqdmlals_s32(a, b, simd_extract!(c, LANE as u32)) }
+    vqdmlals_s32(a, b, vgetq_lane_s32::<LANE>(c))
 }
 #[doc = "Signed saturating doubling multiply-add long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlalh_s16)"]
@@ -16964,7 +14191,7 @@ pub fn vqdmlals_laneq_s32<const LANE: i32>(a: i64, b: i32, c: int32x4_t) -> i64
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmlalh_s16(a: i32, b: i16, c: i16) -> i32 {
     let x: int32x4_t = vqdmull_s16(vdup_n_s16(b), vdup_n_s16(c));
-    unsafe { vqadds_s32(a, simd_extract!(x, 0)) }
+    vqadds_s32(a, vgetq_lane_s32::<0>(x))
 }
 #[doc = "Signed saturating doubling multiply-add long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlals_s32)"]
@@ -16980,7 +14207,7 @@ pub fn vqdmlals_s32(a: i64, b: i32, c: i32) -> i64 {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_high_lane_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlsl2, N = 1))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmlsl2, N = 1))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmlsl_high_lane_s16<const N: i32>(a: int32x4_t, b: int16x8_t, c: int16x4_t) -> int32x4_t {
@@ -16991,7 +14218,7 @@ pub fn vqdmlsl_high_lane_s16<const N: i32>(a: int32x4_t, b: int16x8_t, c: int16x
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_high_laneq_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlsl2, N = 1))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmlsl2, N = 1))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmlsl_high_laneq_s16<const N: i32>(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t {
@@ -17002,7 +14229,7 @@ pub fn vqdmlsl_high_laneq_s16<const N: i32>(a: int32x4_t, b: int16x8_t, c: int16
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_high_lane_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlsl2, N = 1))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmlsl2, N = 1))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmlsl_high_lane_s32<const N: i32>(a: int64x2_t, b: int32x4_t, c: int32x2_t) -> int64x2_t {
@@ -17013,7 +14240,7 @@ pub fn vqdmlsl_high_lane_s32<const N: i32>(a: int64x2_t, b: int32x4_t, c: int32x
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_high_laneq_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlsl2, N = 1))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmlsl2, N = 1))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmlsl_high_laneq_s32<const N: i32>(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t {
@@ -17024,7 +14251,7 @@ pub fn vqdmlsl_high_laneq_s32<const N: i32>(a: int64x2_t, b: int32x4_t, c: int32
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_high_n_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlsl2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmlsl2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmlsl_high_n_s16(a: int32x4_t, b: int16x8_t, c: i16) -> int32x4_t {
     vqsubq_s32(a, vqdmull_high_n_s16(b, c))
@@ -17033,7 +14260,7 @@ pub fn vqdmlsl_high_n_s16(a: int32x4_t, b: int16x8_t, c: i16) -> int32x4_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_high_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlsl2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmlsl2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmlsl_high_s16(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t {
     vqsubq_s32(a, vqdmull_high_s16(b, c))
@@ -17042,7 +14269,7 @@ pub fn vqdmlsl_high_s16(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_high_n_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlsl2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmlsl2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmlsl_high_n_s32(a: int64x2_t, b: int32x4_t, c: i32) -> int64x2_t {
     vqsubq_s64(a, vqdmull_high_n_s32(b, c))
@@ -17051,7 +14278,7 @@ pub fn vqdmlsl_high_n_s32(a: int64x2_t, b: int32x4_t, c: i32) -> int64x2_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_high_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlsl2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmlsl2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmlsl_high_s32(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t {
     vqsubq_s64(a, vqdmull_high_s32(b, c))
@@ -17060,7 +14287,7 @@ pub fn vqdmlsl_high_s32(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_laneq_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlsl, N = 2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmlsl, N = 2))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmlsl_laneq_s16<const N: i32>(a: int32x4_t, b: int16x4_t, c: int16x8_t) -> int32x4_t {
@@ -17071,7 +14298,7 @@ pub fn vqdmlsl_laneq_s16<const N: i32>(a: int32x4_t, b: int16x4_t, c: int16x8_t)
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_laneq_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmlsl, N = 1))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmlsl, N = 1))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmlsl_laneq_s32<const N: i32>(a: int64x2_t, b: int32x2_t, c: int32x4_t) -> int64x2_t {
@@ -17087,7 +14314,7 @@ pub fn vqdmlsl_laneq_s32<const N: i32>(a: int64x2_t, b: int32x2_t, c: int32x4_t)
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmlslh_lane_s16<const LANE: i32>(a: i32, b: i16, c: int16x4_t) -> i32 {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { vqdmlslh_s16(a, b, simd_extract!(c, LANE as u32)) }
+    vqdmlslh_s16(a, b, vget_lane_s16::<LANE>(c))
 }
 #[doc = "Signed saturating doubling multiply-subtract long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlslh_laneq_s16)"]
@@ -17098,7 +14325,7 @@ pub fn vqdmlslh_lane_s16<const LANE: i32>(a: i32, b: i16, c: int16x4_t) -> i32 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmlslh_laneq_s16<const LANE: i32>(a: i32, b: i16, c: int16x8_t) -> i32 {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe { vqdmlslh_s16(a, b, simd_extract!(c, LANE as u32)) }
+    vqdmlslh_s16(a, b, vgetq_lane_s16::<LANE>(c))
 }
 #[doc = "Signed saturating doubling multiply-subtract long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsls_lane_s32)"]
@@ -17109,7 +14336,7 @@ pub fn vqdmlslh_laneq_s16<const LANE: i32>(a: i32, b: i16, c: int16x8_t) -> i32
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmlsls_lane_s32<const LANE: i32>(a: i64, b: i32, c: int32x2_t) -> i64 {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe { vqdmlsls_s32(a, b, simd_extract!(c, LANE as u32)) }
+    vqdmlsls_s32(a, b, vget_lane_s32::<LANE>(c))
 }
 #[doc = "Signed saturating doubling multiply-subtract long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsls_laneq_s32)"]
@@ -17120,7 +14347,7 @@ pub fn vqdmlsls_lane_s32<const LANE: i32>(a: i64, b: i32, c: int32x2_t) -> i64 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmlsls_laneq_s32<const LANE: i32>(a: i64, b: i32, c: int32x4_t) -> i64 {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { vqdmlsls_s32(a, b, simd_extract!(c, LANE as u32)) }
+    vqdmlsls_s32(a, b, vgetq_lane_s32::<LANE>(c))
 }
 #[doc = "Signed saturating doubling multiply-subtract long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlslh_s16)"]
@@ -17130,7 +14357,7 @@ pub fn vqdmlsls_laneq_s32<const LANE: i32>(a: i64, b: i32, c: int32x4_t) -> i64
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmlslh_s16(a: i32, b: i16, c: i16) -> i32 {
     let x: int32x4_t = vqdmull_s16(vdup_n_s16(b), vdup_n_s16(c));
-    unsafe { vqsubs_s32(a, simd_extract!(x, 0)) }
+    vqsubs_s32(a, vgetq_lane_s32::<0>(x))
 }
 #[doc = "Signed saturating doubling multiply-subtract long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsls_s32)"]
@@ -17151,7 +14378,7 @@ pub fn vqdmlsls_s32(a: i64, b: i32, c: i32) -> i64 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmulh_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { vqdmulh_s16(a, vdup_n_s16(simd_extract!(b, LANE as u32))) }
+    vqdmulh_s16(a, vdup_n_s16(vget_lane_s16::<LANE>(b)))
 }
 #[doc = "Vector saturating doubling multiply high by scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhq_lane_s16)"]
@@ -17162,7 +14389,7 @@ pub fn vqdmulh_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmulhq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x4_t) -> int16x8_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { vqdmulhq_s16(a, vdupq_n_s16(simd_extract!(b, LANE as u32))) }
+    vqdmulhq_s16(a, vdupq_n_s16(vget_lane_s16::<LANE>(b)))
 }
 #[doc = "Vector saturating doubling multiply high by scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulh_lane_s32)"]
@@ -17173,7 +14400,7 @@ pub fn vqdmulhq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x4_t) -> int16x8
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmulh_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe { vqdmulh_s32(a, vdup_n_s32(simd_extract!(b, LANE as u32))) }
+    vqdmulh_s32(a, vdup_n_s32(vget_lane_s32::<LANE>(b)))
 }
 #[doc = "Vector saturating doubling multiply high by scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhq_lane_s32)"]
@@ -17184,7 +14411,7 @@ pub fn vqdmulh_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmulhq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x2_t) -> int32x4_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe { vqdmulhq_s32(a, vdupq_n_s32(simd_extract!(b, LANE as u32))) }
+    vqdmulhq_s32(a, vdupq_n_s32(vget_lane_s32::<LANE>(b)))
 }
 #[doc = "Signed saturating doubling multiply returning high half"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhh_lane_s16)"]
@@ -17195,10 +14422,8 @@ pub fn vqdmulhq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x2_t) -> int32x4
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmulhh_lane_s16<const N: i32>(a: i16, b: int16x4_t) -> i16 {
     static_assert_uimm_bits!(N, 2);
-    unsafe {
-        let b: i16 = simd_extract!(b, N as u32);
-        vqdmulhh_s16(a, b)
-    }
+    let b: i16 = vget_lane_s16::<N>(b);
+    vqdmulhh_s16(a, b)
 }
 #[doc = "Signed saturating doubling multiply returning high half"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhh_laneq_s16)"]
@@ -17209,10 +14434,8 @@ pub fn vqdmulhh_lane_s16<const N: i32>(a: i16, b: int16x4_t) -> i16 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmulhh_laneq_s16<const N: i32>(a: i16, b: int16x8_t) -> i16 {
     static_assert_uimm_bits!(N, 3);
-    unsafe {
-        let b: i16 = simd_extract!(b, N as u32);
-        vqdmulhh_s16(a, b)
-    }
+    let b: i16 = vgetq_lane_s16::<N>(b);
+    vqdmulhh_s16(a, b)
 }
 #[doc = "Signed saturating doubling multiply returning high half"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhh_s16)"]
@@ -17223,7 +14446,7 @@ pub fn vqdmulhh_laneq_s16<const N: i32>(a: i16, b: int16x8_t) -> i16 {
 pub fn vqdmulhh_s16(a: i16, b: i16) -> i16 {
     let a: int16x4_t = vdup_n_s16(a);
     let b: int16x4_t = vdup_n_s16(b);
-    unsafe { simd_extract!(vqdmulh_s16(a, b), 0) }
+    vget_lane_s16::<0>(vqdmulh_s16(a, b))
 }
 #[doc = "Signed saturating doubling multiply returning high half"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhs_s32)"]
@@ -17234,7 +14457,7 @@ pub fn vqdmulhh_s16(a: i16, b: i16) -> i16 {
 pub fn vqdmulhs_s32(a: i32, b: i32) -> i32 {
     let a: int32x2_t = vdup_n_s32(a);
     let b: int32x2_t = vdup_n_s32(b);
-    unsafe { simd_extract!(vqdmulh_s32(a, b), 0) }
+    vget_lane_s32::<0>(vqdmulh_s32(a, b))
 }
 #[doc = "Signed saturating doubling multiply returning high half"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhs_lane_s32)"]
@@ -17245,10 +14468,8 @@ pub fn vqdmulhs_s32(a: i32, b: i32) -> i32 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmulhs_lane_s32<const N: i32>(a: i32, b: int32x2_t) -> i32 {
     static_assert_uimm_bits!(N, 1);
-    unsafe {
-        let b: i32 = simd_extract!(b, N as u32);
-        vqdmulhs_s32(a, b)
-    }
+    let b: i32 = vget_lane_s32::<N>(b);
+    vqdmulhs_s32(a, b)
 }
 #[doc = "Signed saturating doubling multiply returning high half"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhs_laneq_s32)"]
@@ -17259,122 +14480,104 @@ pub fn vqdmulhs_lane_s32<const N: i32>(a: i32, b: int32x2_t) -> i32 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmulhs_laneq_s32<const N: i32>(a: i32, b: int32x4_t) -> i32 {
     static_assert_uimm_bits!(N, 2);
-    unsafe {
-        let b: i32 = simd_extract!(b, N as u32);
-        vqdmulhs_s32(a, b)
-    }
+    let b: i32 = vgetq_lane_s32::<N>(b);
+    vqdmulhs_s32(a, b)
 }
 #[doc = "Signed saturating doubling multiply long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_high_lane_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmull2, N = 2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmull2, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmull_high_lane_s16<const N: i32>(a: int16x8_t, b: int16x4_t) -> int32x4_t {
     static_assert_uimm_bits!(N, 2);
-    unsafe {
-        let a: int16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
-        let b: int16x4_t = simd_shuffle!(b, b, [N as u32, N as u32, N as u32, N as u32]);
-        vqdmull_s16(a, b)
-    }
+    let a = vget_high_s16(a);
+    let b = vdup_lane_s16::<N>(b);
+    vqdmull_s16(a, b)
 }
 #[doc = "Signed saturating doubling multiply long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_high_laneq_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmull2, N = 2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmull2, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmull_high_laneq_s32<const N: i32>(a: int32x4_t, b: int32x4_t) -> int64x2_t {
     static_assert_uimm_bits!(N, 2);
-    unsafe {
-        let a: int32x2_t = simd_shuffle!(a, a, [2, 3]);
-        let b: int32x2_t = simd_shuffle!(b, b, [N as u32, N as u32]);
-        vqdmull_s32(a, b)
-    }
+    let a = vget_high_s32(a);
+    let b = vdup_laneq_s32::<N>(b);
+    vqdmull_s32(a, b)
 }
 #[doc = "Signed saturating doubling multiply long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_high_lane_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmull2, N = 1))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmull2, N = 1))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmull_high_lane_s32<const N: i32>(a: int32x4_t, b: int32x2_t) -> int64x2_t {
     static_assert_uimm_bits!(N, 1);
-    unsafe {
-        let a: int32x2_t = simd_shuffle!(a, a, [2, 3]);
-        let b: int32x2_t = simd_shuffle!(b, b, [N as u32, N as u32]);
-        vqdmull_s32(a, b)
-    }
+    let a = vget_high_s32(a);
+    let b = vdup_lane_s32::<N>(b);
+    vqdmull_s32(a, b)
 }
 #[doc = "Signed saturating doubling multiply long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_high_laneq_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmull2, N = 4))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmull2, N = 4))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmull_high_laneq_s16<const N: i32>(a: int16x8_t, b: int16x8_t) -> int32x4_t {
     static_assert_uimm_bits!(N, 3);
-    unsafe {
-        let a: int16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
-        let b: int16x4_t = simd_shuffle!(b, b, [N as u32, N as u32, N as u32, N as u32]);
-        vqdmull_s16(a, b)
-    }
+    let a = vget_high_s16(a);
+    let b = vdup_laneq_s16::<N>(b);
+    vqdmull_s16(a, b)
 }
 #[doc = "Signed saturating doubling multiply long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_high_n_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmull2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmull2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmull_high_n_s16(a: int16x8_t, b: i16) -> int32x4_t {
-    unsafe {
-        let a: int16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
-        let b: int16x4_t = vdup_n_s16(b);
-        vqdmull_s16(a, b)
-    }
+    let a = vget_high_s16(a);
+    let b = vdup_n_s16(b);
+    vqdmull_s16(a, b)
 }
 #[doc = "Signed saturating doubling multiply long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_high_n_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmull2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmull2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmull_high_n_s32(a: int32x4_t, b: i32) -> int64x2_t {
-    unsafe {
-        let a: int32x2_t = simd_shuffle!(a, a, [2, 3]);
-        let b: int32x2_t = vdup_n_s32(b);
-        vqdmull_s32(a, b)
-    }
+    let a = vget_high_s32(a);
+    let b = vdup_n_s32(b);
+    vqdmull_s32(a, b)
 }
 #[doc = "Signed saturating doubling multiply long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_high_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmull2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmull2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmull_high_s16(a: int16x8_t, b: int16x8_t) -> int32x4_t {
-    unsafe {
-        let a: int16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
-        let b: int16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]);
-        vqdmull_s16(a, b)
-    }
+    let a = vget_high_s16(a);
+    let b = vget_high_s16(b);
+    vqdmull_s16(a, b)
 }
 #[doc = "Signed saturating doubling multiply long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_high_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqdmull2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmull2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmull_high_s32(a: int32x4_t, b: int32x4_t) -> int64x2_t {
-    unsafe {
-        let a: int32x2_t = simd_shuffle!(a, a, [2, 3]);
-        let b: int32x2_t = simd_shuffle!(b, b, [2, 3]);
-        vqdmull_s32(a, b)
-    }
+    let a = vget_high_s32(a);
+    let b = vget_high_s32(b);
+    vqdmull_s32(a, b)
 }
 #[doc = "Vector saturating doubling long multiply by scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_laneq_s16)"]
@@ -17385,10 +14588,8 @@ pub fn vqdmull_high_s32(a: int32x4_t, b: int32x4_t) -> int64x2_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmull_laneq_s16<const N: i32>(a: int16x4_t, b: int16x8_t) -> int32x4_t {
     static_assert_uimm_bits!(N, 3);
-    unsafe {
-        let b: int16x4_t = simd_shuffle!(b, b, [N as u32, N as u32, N as u32, N as u32]);
-        vqdmull_s16(a, b)
-    }
+    let b = vdup_laneq_s16::<N>(b);
+    vqdmull_s16(a, b)
 }
 #[doc = "Vector saturating doubling long multiply by scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_laneq_s32)"]
@@ -17399,10 +14600,8 @@ pub fn vqdmull_laneq_s16<const N: i32>(a: int16x4_t, b: int16x8_t) -> int32x4_t
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmull_laneq_s32<const N: i32>(a: int32x2_t, b: int32x4_t) -> int64x2_t {
     static_assert_uimm_bits!(N, 2);
-    unsafe {
-        let b: int32x2_t = simd_shuffle!(b, b, [N as u32, N as u32]);
-        vqdmull_s32(a, b)
-    }
+    let b = vdup_laneq_s32::<N>(b);
+    vqdmull_s32(a, b)
 }
 #[doc = "Signed saturating doubling multiply long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmullh_lane_s16)"]
@@ -17413,10 +14612,8 @@ pub fn vqdmull_laneq_s32<const N: i32>(a: int32x2_t, b: int32x4_t) -> int64x2_t
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmullh_lane_s16<const N: i32>(a: i16, b: int16x4_t) -> i32 {
     static_assert_uimm_bits!(N, 2);
-    unsafe {
-        let b: i16 = simd_extract!(b, N as u32);
-        vqdmullh_s16(a, b)
-    }
+    let b: i16 = vget_lane_s16::<N>(b);
+    vqdmullh_s16(a, b)
 }
 #[doc = "Signed saturating doubling multiply long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulls_laneq_s32)"]
@@ -17427,10 +14624,8 @@ pub fn vqdmullh_lane_s16<const N: i32>(a: i16, b: int16x4_t) -> i32 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmulls_laneq_s32<const N: i32>(a: i32, b: int32x4_t) -> i64 {
     static_assert_uimm_bits!(N, 2);
-    unsafe {
-        let b: i32 = simd_extract!(b, N as u32);
-        vqdmulls_s32(a, b)
-    }
+    let b: i32 = vgetq_lane_s32::<N>(b);
+    vqdmulls_s32(a, b)
 }
 #[doc = "Signed saturating doubling multiply long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmullh_laneq_s16)"]
@@ -17441,10 +14636,8 @@ pub fn vqdmulls_laneq_s32<const N: i32>(a: i32, b: int32x4_t) -> i64 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmullh_laneq_s16<const N: i32>(a: i16, b: int16x8_t) -> i32 {
     static_assert_uimm_bits!(N, 3);
-    unsafe {
-        let b: i16 = simd_extract!(b, N as u32);
-        vqdmullh_s16(a, b)
-    }
+    let b: i16 = vgetq_lane_s16::<N>(b);
+    vqdmullh_s16(a, b)
 }
 #[doc = "Signed saturating doubling multiply long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmullh_s16)"]
@@ -17455,7 +14648,7 @@ pub fn vqdmullh_laneq_s16<const N: i32>(a: i16, b: int16x8_t) -> i32 {
 pub fn vqdmullh_s16(a: i16, b: i16) -> i32 {
     let a: int16x4_t = vdup_n_s16(a);
     let b: int16x4_t = vdup_n_s16(b);
-    unsafe { simd_extract!(vqdmull_s16(a, b), 0) }
+    vgetq_lane_s32::<0>(vqdmull_s16(a, b))
 }
 #[doc = "Signed saturating doubling multiply long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulls_lane_s32)"]
@@ -17466,10 +14659,8 @@ pub fn vqdmullh_s16(a: i16, b: i16) -> i32 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqdmulls_lane_s32<const N: i32>(a: i32, b: int32x2_t) -> i64 {
     static_assert_uimm_bits!(N, 1);
-    unsafe {
-        let b: i32 = simd_extract!(b, N as u32);
-        vqdmulls_s32(a, b)
-    }
+    let b: i32 = vget_lane_s32::<N>(b);
+    vqdmulls_s32(a, b)
 }
 #[doc = "Signed saturating doubling multiply long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulls_s32)"]
@@ -17491,67 +14682,55 @@ pub fn vqdmulls_s32(a: i32, b: i32) -> i64 {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_high_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqxtn2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqxtn2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqmovn_high_s16(a: int8x8_t, b: int16x8_t) -> int8x16_t {
-    unsafe {
-        simd_shuffle!(
-            a,
-            vqmovn_s16(b),
-            [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
-        )
-    }
+    vcombine_s8(a, vqmovn_s16(b))
 }
 #[doc = "Signed saturating extract narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_high_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqxtn2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqxtn2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqmovn_high_s32(a: int16x4_t, b: int32x4_t) -> int16x8_t {
-    unsafe { simd_shuffle!(a, vqmovn_s32(b), [0, 1, 2, 3, 4, 5, 6, 7]) }
+    vcombine_s16(a, vqmovn_s32(b))
 }
 #[doc = "Signed saturating extract narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_high_s64)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqxtn2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqxtn2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqmovn_high_s64(a: int32x2_t, b: int64x2_t) -> int32x4_t {
-    unsafe { simd_shuffle!(a, vqmovn_s64(b), [0, 1, 2, 3]) }
+    vcombine_s32(a, vqmovn_s64(b))
 }
 #[doc = "Signed saturating extract narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_high_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqxtn2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(uqxtn2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqmovn_high_u16(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t {
-    unsafe {
-        simd_shuffle!(
-            a,
-            vqmovn_u16(b),
-            [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
-        )
-    }
+    vcombine_u8(a, vqmovn_u16(b))
 }
 #[doc = "Signed saturating extract narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_high_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqxtn2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(uqxtn2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqmovn_high_u32(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t {
-    unsafe { simd_shuffle!(a, vqmovn_u32(b), [0, 1, 2, 3, 4, 5, 6, 7]) }
+    vcombine_u16(a, vqmovn_u32(b))
 }
 #[doc = "Signed saturating extract narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_high_u64)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqxtn2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(uqxtn2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqmovn_high_u64(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t {
-    unsafe { simd_shuffle!(a, vqmovn_u64(b), [0, 1, 2, 3]) }
+    vcombine_u32(a, vqmovn_u64(b))
 }
 #[doc = "Saturating extract narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovnd_s64)"]
@@ -17592,7 +14771,7 @@ pub fn vqmovnd_u64(a: u64) -> u32 {
 #[cfg_attr(test, assert_instr(sqxtn))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqmovnh_s16(a: i16) -> i8 {
-    unsafe { simd_extract!(vqmovn_s16(vdupq_n_s16(a)), 0) }
+    vget_lane_s8::<0>(vqmovn_s16(vdupq_n_s16(a)))
 }
 #[doc = "Saturating extract narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovns_s32)"]
@@ -17601,7 +14780,7 @@ pub fn vqmovnh_s16(a: i16) -> i8 {
 #[cfg_attr(test, assert_instr(sqxtn))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqmovns_s32(a: i32) -> i16 {
-    unsafe { simd_extract!(vqmovn_s32(vdupq_n_s32(a)), 0) }
+    vget_lane_s16::<0>(vqmovn_s32(vdupq_n_s32(a)))
 }
 #[doc = "Saturating extract narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovnh_u16)"]
@@ -17610,7 +14789,7 @@ pub fn vqmovns_s32(a: i32) -> i16 {
 #[cfg_attr(test, assert_instr(uqxtn))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqmovnh_u16(a: u16) -> u8 {
-    unsafe { simd_extract!(vqmovn_u16(vdupq_n_u16(a)), 0) }
+    vget_lane_u8::<0>(vqmovn_u16(vdupq_n_u16(a)))
 }
 #[doc = "Saturating extract narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovns_u32)"]
@@ -17619,40 +14798,34 @@ pub fn vqmovnh_u16(a: u16) -> u8 {
 #[cfg_attr(test, assert_instr(uqxtn))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqmovns_u32(a: u32) -> u16 {
-    unsafe { simd_extract!(vqmovn_u32(vdupq_n_u32(a)), 0) }
+    vget_lane_u16::<0>(vqmovn_u32(vdupq_n_u32(a)))
 }
 #[doc = "Signed saturating extract unsigned narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovun_high_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqxtun2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqxtun2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqmovun_high_s16(a: uint8x8_t, b: int16x8_t) -> uint8x16_t {
-    unsafe {
-        simd_shuffle!(
-            a,
-            vqmovun_s16(b),
-            [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
-        )
-    }
+    vcombine_u8(a, vqmovun_s16(b))
 }
 #[doc = "Signed saturating extract unsigned narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovun_high_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqxtun2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqxtun2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqmovun_high_s32(a: uint16x4_t, b: int32x4_t) -> uint16x8_t {
-    unsafe { simd_shuffle!(a, vqmovun_s32(b), [0, 1, 2, 3, 4, 5, 6, 7]) }
+    vcombine_u16(a, vqmovun_s32(b))
 }
 #[doc = "Signed saturating extract unsigned narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovun_high_s64)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqxtun2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqxtun2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqmovun_high_s64(a: uint32x2_t, b: int64x2_t) -> uint32x4_t {
-    unsafe { simd_shuffle!(a, vqmovun_s64(b), [0, 1, 2, 3]) }
+    vcombine_u32(a, vqmovun_s64(b))
 }
 #[doc = "Signed saturating extract unsigned narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovunh_s16)"]
@@ -17661,7 +14834,7 @@ pub fn vqmovun_high_s64(a: uint32x2_t, b: int64x2_t) -> uint32x4_t {
 #[cfg_attr(test, assert_instr(sqxtun))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqmovunh_s16(a: i16) -> u8 {
-    unsafe { simd_extract!(vqmovun_s16(vdupq_n_s16(a)), 0) }
+    vget_lane_u8::<0>(vqmovun_s16(vdupq_n_s16(a)))
 }
 #[doc = "Signed saturating extract unsigned narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovuns_s32)"]
@@ -17670,7 +14843,7 @@ pub fn vqmovunh_s16(a: i16) -> u8 {
 #[cfg_attr(test, assert_instr(sqxtun))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqmovuns_s32(a: i32) -> u16 {
-    unsafe { simd_extract!(vqmovun_s32(vdupq_n_s32(a)), 0) }
+    vget_lane_u16::<0>(vqmovun_s32(vdupq_n_s32(a)))
 }
 #[doc = "Signed saturating extract unsigned narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovund_s64)"]
@@ -17679,7 +14852,7 @@ pub fn vqmovuns_s32(a: i32) -> u16 {
 #[cfg_attr(test, assert_instr(sqxtun))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqmovund_s64(a: i64) -> u32 {
-    unsafe { simd_extract!(vqmovun_s64(vdupq_n_s64(a)), 0) }
+    vget_lane_u32::<0>(vqmovun_s64(vdupq_n_s64(a)))
 }
 #[doc = "Signed saturating negate"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqneg_s64)"]
@@ -17720,7 +14893,7 @@ pub fn vqnegq_s64(a: int64x2_t) -> int64x2_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(sqneg))]
 pub fn vqnegb_s8(a: i8) -> i8 {
-    unsafe { simd_extract!(vqneg_s8(vdup_n_s8(a)), 0) }
+    vget_lane_s8::<0>(vqneg_s8(vdup_n_s8(a)))
 }
 #[doc = "Signed saturating negate"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqnegh_s16)"]
@@ -17729,7 +14902,7 @@ pub fn vqnegb_s8(a: i8) -> i8 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(sqneg))]
 pub fn vqnegh_s16(a: i16) -> i16 {
-    unsafe { simd_extract!(vqneg_s16(vdup_n_s16(a)), 0) }
+    vget_lane_s16::<0>(vqneg_s16(vdup_n_s16(a)))
 }
 #[doc = "Signed saturating negate"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqnegs_s32)"]
@@ -17738,7 +14911,7 @@ pub fn vqnegh_s16(a: i16) -> i16 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(sqneg))]
 pub fn vqnegs_s32(a: i32) -> i32 {
-    unsafe { simd_extract!(vqneg_s32(vdup_n_s32(a)), 0) }
+    vget_lane_s32::<0>(vqneg_s32(vdup_n_s32(a)))
 }
 #[doc = "Signed saturating negate"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqnegd_s64)"]
@@ -17747,7 +14920,7 @@ pub fn vqnegs_s32(a: i32) -> i32 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(sqneg))]
 pub fn vqnegd_s64(a: i64) -> i64 {
-    unsafe { simd_extract!(vqneg_s64(vdup_n_s64(a)), 0) }
+    vget_lane_s64::<0>(vqneg_s64(vdup_n_s64(a)))
 }
 #[doc = "Signed saturating rounding doubling multiply accumulate returning high half"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlah_lane_s16)"]
@@ -17758,11 +14931,8 @@ pub fn vqnegd_s64(a: i64) -> i64 {
 #[stable(feature = "rdm_intrinsics", since = "1.62.0")]
 pub fn vqrdmlah_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        let c: int16x4_t =
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
-        vqrdmlah_s16(a, b, c)
-    }
+    let c = vdup_lane_s16::<LANE>(c);
+    vqrdmlah_s16(a, b, c)
 }
 #[doc = "Signed saturating rounding doubling multiply accumulate returning high half"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlah_lane_s32)"]
@@ -17773,10 +14943,8 @@ pub fn vqrdmlah_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x4
 #[stable(feature = "rdm_intrinsics", since = "1.62.0")]
 pub fn vqrdmlah_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
-        vqrdmlah_s32(a, b, c)
-    }
+    let c = vdup_lane_s32::<LANE>(c);
+    vqrdmlah_s32(a, b, c)
 }
 #[doc = "Signed saturating rounding doubling multiply accumulate returning high half"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlah_laneq_s16)"]
@@ -17787,11 +14955,8 @@ pub fn vqrdmlah_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int32x2
 #[stable(feature = "rdm_intrinsics", since = "1.62.0")]
 pub fn vqrdmlah_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x8_t) -> int16x4_t {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe {
-        let c: int16x4_t =
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
-        vqrdmlah_s16(a, b, c)
-    }
+    let c = vdup_laneq_s16::<LANE>(c);
+    vqrdmlah_s16(a, b, c)
 }
 #[doc = "Signed saturating rounding doubling multiply accumulate returning high half"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlah_laneq_s32)"]
@@ -17802,10 +14967,8 @@ pub fn vqrdmlah_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x
 #[stable(feature = "rdm_intrinsics", since = "1.62.0")]
 pub fn vqrdmlah_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int32x4_t) -> int32x2_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
-        vqrdmlah_s32(a, b, c)
-    }
+    let c = vdup_laneq_s32::<LANE>(c);
+    vqrdmlah_s32(a, b, c)
 }
 #[doc = "Signed saturating rounding doubling multiply accumulate returning high half"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlahq_lane_s16)"]
@@ -17816,23 +14979,8 @@ pub fn vqrdmlah_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int32x
 #[stable(feature = "rdm_intrinsics", since = "1.62.0")]
 pub fn vqrdmlahq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int16x4_t) -> int16x8_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        let c: int16x8_t = simd_shuffle!(
-            c,
-            c,
-            [
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32
-            ]
-        );
-        vqrdmlahq_s16(a, b, c)
-    }
+    let c = vdupq_lane_s16::<LANE>(c);
+    vqrdmlahq_s16(a, b, c)
 }
 #[doc = "Signed saturating rounding doubling multiply accumulate returning high half"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlahq_lane_s32)"]
@@ -17843,11 +14991,8 @@ pub fn vqrdmlahq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int16x
 #[stable(feature = "rdm_intrinsics", since = "1.62.0")]
 pub fn vqrdmlahq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c: int32x2_t) -> int32x4_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        let c: int32x4_t =
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
-        vqrdmlahq_s32(a, b, c)
-    }
+    let c = vdupq_lane_s32::<LANE>(c);
+    vqrdmlahq_s32(a, b, c)
 }
 #[doc = "Signed saturating rounding doubling multiply accumulate returning high half"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlahq_laneq_s16)"]
@@ -17858,23 +15003,8 @@ pub fn vqrdmlahq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c: int32x
 #[stable(feature = "rdm_intrinsics", since = "1.62.0")]
 pub fn vqrdmlahq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe {
-        let c: int16x8_t = simd_shuffle!(
-            c,
-            c,
-            [
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32
-            ]
-        );
-        vqrdmlahq_s16(a, b, c)
-    }
+    let c = vdupq_laneq_s16::<LANE>(c);
+    vqrdmlahq_s16(a, b, c)
 }
 #[doc = "Signed saturating rounding doubling multiply accumulate returning high half"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlahq_laneq_s32)"]
@@ -17885,11 +15015,8 @@ pub fn vqrdmlahq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int16
 #[stable(feature = "rdm_intrinsics", since = "1.62.0")]
 pub fn vqrdmlahq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        let c: int32x4_t =
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
-        vqrdmlahq_s32(a, b, c)
-    }
+    let c = vdupq_laneq_s32::<LANE>(c);
+    vqrdmlahq_s32(a, b, c)
 }
 #[doc = "Signed saturating rounding doubling multiply accumulate returning high half"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlah_s16)"]
@@ -17964,7 +15091,7 @@ pub fn vqrdmlahq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
 #[stable(feature = "rdm_intrinsics", since = "1.62.0")]
 pub fn vqrdmlahh_lane_s16<const LANE: i32>(a: i16, b: i16, c: int16x4_t) -> i16 {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { vqrdmlahh_s16(a, b, simd_extract!(c, LANE as u32)) }
+    vqrdmlahh_s16(a, b, vget_lane_s16::<LANE>(c))
 }
 #[doc = "Signed saturating rounding doubling multiply accumulate returning high half"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlahh_laneq_s16)"]
@@ -17975,7 +15102,7 @@ pub fn vqrdmlahh_lane_s16<const LANE: i32>(a: i16, b: i16, c: int16x4_t) -> i16
 #[stable(feature = "rdm_intrinsics", since = "1.62.0")]
 pub fn vqrdmlahh_laneq_s16<const LANE: i32>(a: i16, b: i16, c: int16x8_t) -> i16 {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe { vqrdmlahh_s16(a, b, simd_extract!(c, LANE as u32)) }
+    vqrdmlahh_s16(a, b, vgetq_lane_s16::<LANE>(c))
 }
 #[doc = "Signed saturating rounding doubling multiply accumulate returning high half"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlahs_lane_s32)"]
@@ -17986,7 +15113,7 @@ pub fn vqrdmlahh_laneq_s16<const LANE: i32>(a: i16, b: i16, c: int16x8_t) -> i16
 #[stable(feature = "rdm_intrinsics", since = "1.62.0")]
 pub fn vqrdmlahs_lane_s32<const LANE: i32>(a: i32, b: i32, c: int32x2_t) -> i32 {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe { vqrdmlahs_s32(a, b, simd_extract!(c, LANE as u32)) }
+    vqrdmlahs_s32(a, b, vget_lane_s32::<LANE>(c))
 }
 #[doc = "Signed saturating rounding doubling multiply accumulate returning high half"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlahs_laneq_s32)"]
@@ -17997,7 +15124,7 @@ pub fn vqrdmlahs_lane_s32<const LANE: i32>(a: i32, b: i32, c: int32x2_t) -> i32
 #[stable(feature = "rdm_intrinsics", since = "1.62.0")]
 pub fn vqrdmlahs_laneq_s32<const LANE: i32>(a: i32, b: i32, c: int32x4_t) -> i32 {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { vqrdmlahs_s32(a, b, simd_extract!(c, LANE as u32)) }
+    vqrdmlahs_s32(a, b, vgetq_lane_s32::<LANE>(c))
 }
 #[doc = "Signed saturating rounding doubling multiply accumulate returning high half"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlahh_s16)"]
@@ -18009,7 +15136,7 @@ pub fn vqrdmlahh_s16(a: i16, b: i16, c: i16) -> i16 {
     let a: int16x4_t = vdup_n_s16(a);
     let b: int16x4_t = vdup_n_s16(b);
     let c: int16x4_t = vdup_n_s16(c);
-    unsafe { simd_extract!(vqrdmlah_s16(a, b, c), 0) }
+    vget_lane_s16::<0>(vqrdmlah_s16(a, b, c))
 }
 #[doc = "Signed saturating rounding doubling multiply accumulate returning high half"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlahs_s32)"]
@@ -18021,7 +15148,7 @@ pub fn vqrdmlahs_s32(a: i32, b: i32, c: i32) -> i32 {
     let a: int32x2_t = vdup_n_s32(a);
     let b: int32x2_t = vdup_n_s32(b);
     let c: int32x2_t = vdup_n_s32(c);
-    unsafe { simd_extract!(vqrdmlah_s32(a, b, c), 0) }
+    vget_lane_s32::<0>(vqrdmlah_s32(a, b, c))
 }
 #[doc = "Signed saturating rounding doubling multiply subtract returning high half"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlsh_lane_s16)"]
@@ -18032,11 +15159,8 @@ pub fn vqrdmlahs_s32(a: i32, b: i32, c: i32) -> i32 {
 #[stable(feature = "rdm_intrinsics", since = "1.62.0")]
 pub fn vqrdmlsh_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        let c: int16x4_t =
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
-        vqrdmlsh_s16(a, b, c)
-    }
+    let c = vdup_lane_s16::<LANE>(c);
+    vqrdmlsh_s16(a, b, c)
 }
 #[doc = "Signed saturating rounding doubling multiply subtract returning high half"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlsh_lane_s32)"]
@@ -18047,10 +15171,8 @@ pub fn vqrdmlsh_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x4
 #[stable(feature = "rdm_intrinsics", since = "1.62.0")]
 pub fn vqrdmlsh_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
-        vqrdmlsh_s32(a, b, c)
-    }
+    let c = vdup_lane_s32::<LANE>(c);
+    vqrdmlsh_s32(a, b, c)
 }
 #[doc = "Signed saturating rounding doubling multiply subtract returning high half"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlsh_laneq_s16)"]
@@ -18061,11 +15183,8 @@ pub fn vqrdmlsh_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int32x2
 #[stable(feature = "rdm_intrinsics", since = "1.62.0")]
 pub fn vqrdmlsh_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x8_t) -> int16x4_t {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe {
-        let c: int16x4_t =
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
-        vqrdmlsh_s16(a, b, c)
-    }
+    let c = vdup_laneq_s16::<LANE>(c);
+    vqrdmlsh_s16(a, b, c)
 }
 #[doc = "Signed saturating rounding doubling multiply subtract returning high half"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlsh_laneq_s32)"]
@@ -18076,10 +15195,8 @@ pub fn vqrdmlsh_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x
 #[stable(feature = "rdm_intrinsics", since = "1.62.0")]
 pub fn vqrdmlsh_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int32x4_t) -> int32x2_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
-        vqrdmlsh_s32(a, b, c)
-    }
+    let c = vdup_laneq_s32::<LANE>(c);
+    vqrdmlsh_s32(a, b, c)
 }
 #[doc = "Signed saturating rounding doubling multiply subtract returning high half"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlshq_lane_s16)"]
@@ -18090,23 +15207,8 @@ pub fn vqrdmlsh_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int32x
 #[stable(feature = "rdm_intrinsics", since = "1.62.0")]
 pub fn vqrdmlshq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int16x4_t) -> int16x8_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        let c: int16x8_t = simd_shuffle!(
-            c,
-            c,
-            [
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32
-            ]
-        );
-        vqrdmlshq_s16(a, b, c)
-    }
+    let c = vdupq_lane_s16::<LANE>(c);
+    vqrdmlshq_s16(a, b, c)
 }
 #[doc = "Signed saturating rounding doubling multiply subtract returning high half"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlshq_lane_s32)"]
@@ -18117,11 +15219,8 @@ pub fn vqrdmlshq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int16x
 #[stable(feature = "rdm_intrinsics", since = "1.62.0")]
 pub fn vqrdmlshq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c: int32x2_t) -> int32x4_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        let c: int32x4_t =
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
-        vqrdmlshq_s32(a, b, c)
-    }
+    let c = vdupq_lane_s32::<LANE>(c);
+    vqrdmlshq_s32(a, b, c)
 }
 #[doc = "Signed saturating rounding doubling multiply subtract returning high half"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlshq_laneq_s16)"]
@@ -18132,23 +15231,8 @@ pub fn vqrdmlshq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c: int32x
 #[stable(feature = "rdm_intrinsics", since = "1.62.0")]
 pub fn vqrdmlshq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe {
-        let c: int16x8_t = simd_shuffle!(
-            c,
-            c,
-            [
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32
-            ]
-        );
-        vqrdmlshq_s16(a, b, c)
-    }
+    let c = vdupq_laneq_s16::<LANE>(c);
+    vqrdmlshq_s16(a, b, c)
 }
 #[doc = "Signed saturating rounding doubling multiply subtract returning high half"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlshq_laneq_s32)"]
@@ -18159,11 +15243,8 @@ pub fn vqrdmlshq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int16
 #[stable(feature = "rdm_intrinsics", since = "1.62.0")]
 pub fn vqrdmlshq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        let c: int32x4_t =
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
-        vqrdmlshq_s32(a, b, c)
-    }
+    let c = vdupq_laneq_s32::<LANE>(c);
+    vqrdmlshq_s32(a, b, c)
 }
 #[doc = "Signed saturating rounding doubling multiply subtract returning high half"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlsh_s16)"]
@@ -18238,7 +15319,7 @@ pub fn vqrdmlshq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
 #[stable(feature = "rdm_intrinsics", since = "1.62.0")]
 pub fn vqrdmlshh_lane_s16<const LANE: i32>(a: i16, b: i16, c: int16x4_t) -> i16 {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { vqrdmlshh_s16(a, b, simd_extract!(c, LANE as u32)) }
+    vqrdmlshh_s16(a, b, vget_lane_s16::<LANE>(c))
 }
 #[doc = "Signed saturating rounding doubling multiply subtract returning high half"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlshh_laneq_s16)"]
@@ -18249,7 +15330,7 @@ pub fn vqrdmlshh_lane_s16<const LANE: i32>(a: i16, b: i16, c: int16x4_t) -> i16
 #[stable(feature = "rdm_intrinsics", since = "1.62.0")]
 pub fn vqrdmlshh_laneq_s16<const LANE: i32>(a: i16, b: i16, c: int16x8_t) -> i16 {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe { vqrdmlshh_s16(a, b, simd_extract!(c, LANE as u32)) }
+    vqrdmlshh_s16(a, b, vgetq_lane_s16::<LANE>(c))
 }
 #[doc = "Signed saturating rounding doubling multiply subtract returning high half"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlshs_lane_s32)"]
@@ -18260,7 +15341,7 @@ pub fn vqrdmlshh_laneq_s16<const LANE: i32>(a: i16, b: i16, c: int16x8_t) -> i16
 #[stable(feature = "rdm_intrinsics", since = "1.62.0")]
 pub fn vqrdmlshs_lane_s32<const LANE: i32>(a: i32, b: i32, c: int32x2_t) -> i32 {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe { vqrdmlshs_s32(a, b, simd_extract!(c, LANE as u32)) }
+    vqrdmlshs_s32(a, b, vget_lane_s32::<LANE>(c))
 }
 #[doc = "Signed saturating rounding doubling multiply subtract returning high half"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlshs_laneq_s32)"]
@@ -18271,7 +15352,7 @@ pub fn vqrdmlshs_lane_s32<const LANE: i32>(a: i32, b: i32, c: int32x2_t) -> i32
 #[stable(feature = "rdm_intrinsics", since = "1.62.0")]
 pub fn vqrdmlshs_laneq_s32<const LANE: i32>(a: i32, b: i32, c: int32x4_t) -> i32 {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { vqrdmlshs_s32(a, b, simd_extract!(c, LANE as u32)) }
+    vqrdmlshs_s32(a, b, vgetq_lane_s32::<LANE>(c))
 }
 #[doc = "Signed saturating rounding doubling multiply subtract returning high half"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlshh_s16)"]
@@ -18283,7 +15364,7 @@ pub fn vqrdmlshh_s16(a: i16, b: i16, c: i16) -> i16 {
     let a: int16x4_t = vdup_n_s16(a);
     let b: int16x4_t = vdup_n_s16(b);
     let c: int16x4_t = vdup_n_s16(c);
-    unsafe { simd_extract!(vqrdmlsh_s16(a, b, c), 0) }
+    vget_lane_s16::<0>(vqrdmlsh_s16(a, b, c))
 }
 #[doc = "Signed saturating rounding doubling multiply subtract returning high half"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmlshs_s32)"]
@@ -18295,7 +15376,7 @@ pub fn vqrdmlshs_s32(a: i32, b: i32, c: i32) -> i32 {
     let a: int32x2_t = vdup_n_s32(a);
     let b: int32x2_t = vdup_n_s32(b);
     let c: int32x2_t = vdup_n_s32(c);
-    unsafe { simd_extract!(vqrdmlsh_s32(a, b, c), 0) }
+    vget_lane_s32::<0>(vqrdmlsh_s32(a, b, c))
 }
 #[doc = "Signed saturating rounding doubling multiply returning high half"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhh_lane_s16)"]
@@ -18306,7 +15387,7 @@ pub fn vqrdmlshs_s32(a: i32, b: i32, c: i32) -> i32 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqrdmulhh_lane_s16<const LANE: i32>(a: i16, b: int16x4_t) -> i16 {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { vqrdmulhh_s16(a, simd_extract!(b, LANE as u32)) }
+    vqrdmulhh_s16(a, vget_lane_s16::<LANE>(b))
 }
 #[doc = "Signed saturating rounding doubling multiply returning high half"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhh_laneq_s16)"]
@@ -18317,7 +15398,7 @@ pub fn vqrdmulhh_lane_s16<const LANE: i32>(a: i16, b: int16x4_t) -> i16 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqrdmulhh_laneq_s16<const LANE: i32>(a: i16, b: int16x8_t) -> i16 {
     static_assert_uimm_bits!(LANE, 3);
-    unsafe { vqrdmulhh_s16(a, simd_extract!(b, LANE as u32)) }
+    vqrdmulhh_s16(a, vgetq_lane_s16::<LANE>(b))
 }
 #[doc = "Signed saturating rounding doubling multiply returning high half"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhs_lane_s32)"]
@@ -18328,7 +15409,7 @@ pub fn vqrdmulhh_laneq_s16<const LANE: i32>(a: i16, b: int16x8_t) -> i16 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqrdmulhs_lane_s32<const LANE: i32>(a: i32, b: int32x2_t) -> i32 {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe { vqrdmulhs_s32(a, simd_extract!(b, LANE as u32)) }
+    vqrdmulhs_s32(a, vget_lane_s32::<LANE>(b))
 }
 #[doc = "Signed saturating rounding doubling multiply returning high half"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhs_laneq_s32)"]
@@ -18339,7 +15420,7 @@ pub fn vqrdmulhs_lane_s32<const LANE: i32>(a: i32, b: int32x2_t) -> i32 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqrdmulhs_laneq_s32<const LANE: i32>(a: i32, b: int32x4_t) -> i32 {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe { vqrdmulhs_s32(a, simd_extract!(b, LANE as u32)) }
+    vqrdmulhs_s32(a, vgetq_lane_s32::<LANE>(b))
 }
 #[doc = "Signed saturating rounding doubling multiply returning high half"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhh_s16)"]
@@ -18348,7 +15429,7 @@ pub fn vqrdmulhs_laneq_s32<const LANE: i32>(a: i32, b: int32x4_t) -> i32 {
 #[cfg_attr(test, assert_instr(sqrdmulh))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqrdmulhh_s16(a: i16, b: i16) -> i16 {
-    unsafe { simd_extract!(vqrdmulh_s16(vdup_n_s16(a), vdup_n_s16(b)), 0) }
+    vget_lane_s16::<0>(vqrdmulh_s16(vdup_n_s16(a), vdup_n_s16(b)))
 }
 #[doc = "Signed saturating rounding doubling multiply returning high half"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhs_s32)"]
@@ -18357,7 +15438,7 @@ pub fn vqrdmulhh_s16(a: i16, b: i16) -> i16 {
 #[cfg_attr(test, assert_instr(sqrdmulh))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqrdmulhs_s32(a: i32, b: i32) -> i32 {
-    unsafe { simd_extract!(vqrdmulh_s32(vdup_n_s32(a), vdup_n_s32(b)), 0) }
+    vget_lane_s32::<0>(vqrdmulh_s32(vdup_n_s32(a), vdup_n_s32(b)))
 }
 #[doc = "Signed saturating rounding shift left"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlb_s8)"]
@@ -18368,7 +15449,7 @@ pub fn vqrdmulhs_s32(a: i32, b: i32) -> i32 {
 pub fn vqrshlb_s8(a: i8, b: i8) -> i8 {
     let a: int8x8_t = vdup_n_s8(a);
     let b: int8x8_t = vdup_n_s8(b);
-    unsafe { simd_extract!(vqrshl_s8(a, b), 0) }
+    vget_lane_s8::<0>(vqrshl_s8(a, b))
 }
 #[doc = "Signed saturating rounding shift left"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlh_s16)"]
@@ -18379,7 +15460,7 @@ pub fn vqrshlb_s8(a: i8, b: i8) -> i8 {
 pub fn vqrshlh_s16(a: i16, b: i16) -> i16 {
     let a: int16x4_t = vdup_n_s16(a);
     let b: int16x4_t = vdup_n_s16(b);
-    unsafe { simd_extract!(vqrshl_s16(a, b), 0) }
+    vget_lane_s16::<0>(vqrshl_s16(a, b))
 }
 #[doc = "Unsigned signed saturating rounding shift left"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlb_u8)"]
@@ -18390,7 +15471,7 @@ pub fn vqrshlh_s16(a: i16, b: i16) -> i16 {
 pub fn vqrshlb_u8(a: u8, b: i8) -> u8 {
     let a: uint8x8_t = vdup_n_u8(a);
     let b: int8x8_t = vdup_n_s8(b);
-    unsafe { simd_extract!(vqrshl_u8(a, b), 0) }
+    vget_lane_u8::<0>(vqrshl_u8(a, b))
 }
 #[doc = "Unsigned signed saturating rounding shift left"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlh_u16)"]
@@ -18401,7 +15482,7 @@ pub fn vqrshlb_u8(a: u8, b: i8) -> u8 {
 pub fn vqrshlh_u16(a: u16, b: i16) -> u16 {
     let a: uint16x4_t = vdup_n_u16(a);
     let b: int16x4_t = vdup_n_s16(b);
-    unsafe { simd_extract!(vqrshl_u16(a, b), 0) }
+    vget_lane_u16::<0>(vqrshl_u16(a, b))
 }
 #[doc = "Signed saturating rounding shift left"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshld_s64)"]
@@ -18471,79 +15552,67 @@ pub fn vqrshld_u64(a: u64, b: i64) -> u64 {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_high_n_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqrshrn2, N = 2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqrshrn2, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqrshrn_high_n_s16<const N: i32>(a: int8x8_t, b: int16x8_t) -> int8x16_t {
     static_assert!(N >= 1 && N <= 8);
-    unsafe {
-        simd_shuffle!(
-            a,
-            vqrshrn_n_s16::<N>(b),
-            [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
-        )
-    }
+    vcombine_s8(a, vqrshrn_n_s16::<N>(b))
 }
 #[doc = "Signed saturating rounded shift right narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_high_n_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqrshrn2, N = 2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqrshrn2, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqrshrn_high_n_s32<const N: i32>(a: int16x4_t, b: int32x4_t) -> int16x8_t {
     static_assert!(N >= 1 && N <= 16);
-    unsafe { simd_shuffle!(a, vqrshrn_n_s32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7]) }
+    vcombine_s16(a, vqrshrn_n_s32::<N>(b))
 }
 #[doc = "Signed saturating rounded shift right narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_high_n_s64)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqrshrn2, N = 2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqrshrn2, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqrshrn_high_n_s64<const N: i32>(a: int32x2_t, b: int64x2_t) -> int32x4_t {
     static_assert!(N >= 1 && N <= 32);
-    unsafe { simd_shuffle!(a, vqrshrn_n_s64::<N>(b), [0, 1, 2, 3]) }
+    vcombine_s32(a, vqrshrn_n_s64::<N>(b))
 }
 #[doc = "Unsigned saturating rounded shift right narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_high_n_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqrshrn2, N = 2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(uqrshrn2, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqrshrn_high_n_u16<const N: i32>(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t {
     static_assert!(N >= 1 && N <= 8);
-    unsafe {
-        simd_shuffle!(
-            a,
-            vqrshrn_n_u16::<N>(b),
-            [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
-        )
-    }
+    vcombine_u8(a, vqrshrn_n_u16::<N>(b))
 }
 #[doc = "Unsigned saturating rounded shift right narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_high_n_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqrshrn2, N = 2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(uqrshrn2, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqrshrn_high_n_u32<const N: i32>(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t {
     static_assert!(N >= 1 && N <= 16);
-    unsafe { simd_shuffle!(a, vqrshrn_n_u32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7]) }
+    vcombine_u16(a, vqrshrn_n_u32::<N>(b))
 }
 #[doc = "Unsigned saturating rounded shift right narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_high_n_u64)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqrshrn2, N = 2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(uqrshrn2, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqrshrn_high_n_u64<const N: i32>(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t {
     static_assert!(N >= 1 && N <= 32);
-    unsafe { simd_shuffle!(a, vqrshrn_n_u64::<N>(b), [0, 1, 2, 3]) }
+    vcombine_u32(a, vqrshrn_n_u64::<N>(b))
 }
 #[doc = "Unsigned saturating rounded shift right narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrnd_n_u64)"]
@@ -18555,7 +15624,7 @@ pub fn vqrshrn_high_n_u64<const N: i32>(a: uint32x2_t, b: uint64x2_t) -> uint32x
 pub fn vqrshrnd_n_u64<const N: i32>(a: u64) -> u32 {
     static_assert!(N >= 1 && N <= 32);
     let a: uint64x2_t = vdupq_n_u64(a);
-    unsafe { simd_extract!(vqrshrn_n_u64::<N>(a), 0) }
+    vget_lane_u32::<0>(vqrshrn_n_u64::<N>(a))
 }
 #[doc = "Unsigned saturating rounded shift right narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrnh_n_u16)"]
@@ -18567,7 +15636,7 @@ pub fn vqrshrnd_n_u64<const N: i32>(a: u64) -> u32 {
 pub fn vqrshrnh_n_u16<const N: i32>(a: u16) -> u8 {
     static_assert!(N >= 1 && N <= 8);
     let a: uint16x8_t = vdupq_n_u16(a);
-    unsafe { simd_extract!(vqrshrn_n_u16::<N>(a), 0) }
+    vget_lane_u8::<0>(vqrshrn_n_u16::<N>(a))
 }
 #[doc = "Unsigned saturating rounded shift right narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrns_n_u32)"]
@@ -18579,7 +15648,7 @@ pub fn vqrshrnh_n_u16<const N: i32>(a: u16) -> u8 {
 pub fn vqrshrns_n_u32<const N: i32>(a: u32) -> u16 {
     static_assert!(N >= 1 && N <= 16);
     let a: uint32x4_t = vdupq_n_u32(a);
-    unsafe { simd_extract!(vqrshrn_n_u32::<N>(a), 0) }
+    vget_lane_u16::<0>(vqrshrn_n_u32::<N>(a))
 }
 #[doc = "Signed saturating rounded shift right narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrnh_n_s16)"]
@@ -18591,7 +15660,7 @@ pub fn vqrshrns_n_u32<const N: i32>(a: u32) -> u16 {
 pub fn vqrshrnh_n_s16<const N: i32>(a: i16) -> i8 {
     static_assert!(N >= 1 && N <= 8);
     let a: int16x8_t = vdupq_n_s16(a);
-    unsafe { simd_extract!(vqrshrn_n_s16::<N>(a), 0) }
+    vget_lane_s8::<0>(vqrshrn_n_s16::<N>(a))
 }
 #[doc = "Signed saturating rounded shift right narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrns_n_s32)"]
@@ -18603,7 +15672,7 @@ pub fn vqrshrnh_n_s16<const N: i32>(a: i16) -> i8 {
 pub fn vqrshrns_n_s32<const N: i32>(a: i32) -> i16 {
     static_assert!(N >= 1 && N <= 16);
     let a: int32x4_t = vdupq_n_s32(a);
-    unsafe { simd_extract!(vqrshrn_n_s32::<N>(a), 0) }
+    vget_lane_s16::<0>(vqrshrn_n_s32::<N>(a))
 }
 #[doc = "Signed saturating rounded shift right narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrnd_n_s64)"]
@@ -18615,46 +15684,40 @@ pub fn vqrshrns_n_s32<const N: i32>(a: i32) -> i16 {
 pub fn vqrshrnd_n_s64<const N: i32>(a: i64) -> i32 {
     static_assert!(N >= 1 && N <= 32);
     let a: int64x2_t = vdupq_n_s64(a);
-    unsafe { simd_extract!(vqrshrn_n_s64::<N>(a), 0) }
+    vget_lane_s32::<0>(vqrshrn_n_s64::<N>(a))
 }
 #[doc = "Signed saturating rounded shift right unsigned narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_high_n_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqrshrun2, N = 2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqrshrun2, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqrshrun_high_n_s16<const N: i32>(a: uint8x8_t, b: int16x8_t) -> uint8x16_t {
     static_assert!(N >= 1 && N <= 8);
-    unsafe {
-        simd_shuffle!(
-            a,
-            vqrshrun_n_s16::<N>(b),
-            [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
-        )
-    }
+    vcombine_u8(a, vqrshrun_n_s16::<N>(b))
 }
 #[doc = "Signed saturating rounded shift right unsigned narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_high_n_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqrshrun2, N = 2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqrshrun2, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqrshrun_high_n_s32<const N: i32>(a: uint16x4_t, b: int32x4_t) -> uint16x8_t {
     static_assert!(N >= 1 && N <= 16);
-    unsafe { simd_shuffle!(a, vqrshrun_n_s32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7]) }
+    vcombine_u16(a, vqrshrun_n_s32::<N>(b))
 }
 #[doc = "Signed saturating rounded shift right unsigned narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_high_n_s64)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqrshrun2, N = 2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqrshrun2, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqrshrun_high_n_s64<const N: i32>(a: uint32x2_t, b: int64x2_t) -> uint32x4_t {
     static_assert!(N >= 1 && N <= 32);
-    unsafe { simd_shuffle!(a, vqrshrun_n_s64::<N>(b), [0, 1, 2, 3]) }
+    vcombine_u32(a, vqrshrun_n_s64::<N>(b))
 }
 #[doc = "Signed saturating rounded shift right unsigned narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrund_n_s64)"]
@@ -18666,7 +15729,7 @@ pub fn vqrshrun_high_n_s64<const N: i32>(a: uint32x2_t, b: int64x2_t) -> uint32x
 pub fn vqrshrund_n_s64<const N: i32>(a: i64) -> u32 {
     static_assert!(N >= 1 && N <= 32);
     let a: int64x2_t = vdupq_n_s64(a);
-    unsafe { simd_extract!(vqrshrun_n_s64::<N>(a), 0) }
+    vget_lane_u32::<0>(vqrshrun_n_s64::<N>(a))
 }
 #[doc = "Signed saturating rounded shift right unsigned narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrunh_n_s16)"]
@@ -18678,7 +15741,7 @@ pub fn vqrshrund_n_s64<const N: i32>(a: i64) -> u32 {
 pub fn vqrshrunh_n_s16<const N: i32>(a: i16) -> u8 {
     static_assert!(N >= 1 && N <= 8);
     let a: int16x8_t = vdupq_n_s16(a);
-    unsafe { simd_extract!(vqrshrun_n_s16::<N>(a), 0) }
+    vget_lane_u8::<0>(vqrshrun_n_s16::<N>(a))
 }
 #[doc = "Signed saturating rounded shift right unsigned narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshruns_n_s32)"]
@@ -18690,7 +15753,7 @@ pub fn vqrshrunh_n_s16<const N: i32>(a: i16) -> u8 {
 pub fn vqrshruns_n_s32<const N: i32>(a: i32) -> u16 {
     static_assert!(N >= 1 && N <= 16);
     let a: int32x4_t = vdupq_n_s32(a);
-    unsafe { simd_extract!(vqrshrun_n_s32::<N>(a), 0) }
+    vget_lane_u16::<0>(vqrshrun_n_s32::<N>(a))
 }
 #[doc = "Signed saturating shift left"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlb_n_s8)"]
@@ -18701,7 +15764,7 @@ pub fn vqrshruns_n_s32<const N: i32>(a: i32) -> u16 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqshlb_n_s8<const N: i32>(a: i8) -> i8 {
     static_assert_uimm_bits!(N, 3);
-    unsafe { simd_extract!(vqshl_n_s8::<N>(vdup_n_s8(a)), 0) }
+    vget_lane_s8::<0>(vqshl_n_s8::<N>(vdup_n_s8(a)))
 }
 #[doc = "Signed saturating shift left"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshld_n_s64)"]
@@ -18712,7 +15775,7 @@ pub fn vqshlb_n_s8<const N: i32>(a: i8) -> i8 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqshld_n_s64<const N: i32>(a: i64) -> i64 {
     static_assert_uimm_bits!(N, 6);
-    unsafe { simd_extract!(vqshl_n_s64::<N>(vdup_n_s64(a)), 0) }
+    vget_lane_s64::<0>(vqshl_n_s64::<N>(vdup_n_s64(a)))
 }
 #[doc = "Signed saturating shift left"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlh_n_s16)"]
@@ -18723,7 +15786,7 @@ pub fn vqshld_n_s64<const N: i32>(a: i64) -> i64 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqshlh_n_s16<const N: i32>(a: i16) -> i16 {
     static_assert_uimm_bits!(N, 4);
-    unsafe { simd_extract!(vqshl_n_s16::<N>(vdup_n_s16(a)), 0) }
+    vget_lane_s16::<0>(vqshl_n_s16::<N>(vdup_n_s16(a)))
 }
 #[doc = "Signed saturating shift left"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshls_n_s32)"]
@@ -18734,7 +15797,7 @@ pub fn vqshlh_n_s16<const N: i32>(a: i16) -> i16 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqshls_n_s32<const N: i32>(a: i32) -> i32 {
     static_assert_uimm_bits!(N, 5);
-    unsafe { simd_extract!(vqshl_n_s32::<N>(vdup_n_s32(a)), 0) }
+    vget_lane_s32::<0>(vqshl_n_s32::<N>(vdup_n_s32(a)))
 }
 #[doc = "Unsigned saturating shift left"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlb_n_u8)"]
@@ -18745,7 +15808,7 @@ pub fn vqshls_n_s32<const N: i32>(a: i32) -> i32 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqshlb_n_u8<const N: i32>(a: u8) -> u8 {
     static_assert_uimm_bits!(N, 3);
-    unsafe { simd_extract!(vqshl_n_u8::<N>(vdup_n_u8(a)), 0) }
+    vget_lane_u8::<0>(vqshl_n_u8::<N>(vdup_n_u8(a)))
 }
 #[doc = "Unsigned saturating shift left"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshld_n_u64)"]
@@ -18756,7 +15819,7 @@ pub fn vqshlb_n_u8<const N: i32>(a: u8) -> u8 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqshld_n_u64<const N: i32>(a: u64) -> u64 {
     static_assert_uimm_bits!(N, 6);
-    unsafe { simd_extract!(vqshl_n_u64::<N>(vdup_n_u64(a)), 0) }
+    vget_lane_u64::<0>(vqshl_n_u64::<N>(vdup_n_u64(a)))
 }
 #[doc = "Unsigned saturating shift left"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlh_n_u16)"]
@@ -18767,7 +15830,7 @@ pub fn vqshld_n_u64<const N: i32>(a: u64) -> u64 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqshlh_n_u16<const N: i32>(a: u16) -> u16 {
     static_assert_uimm_bits!(N, 4);
-    unsafe { simd_extract!(vqshl_n_u16::<N>(vdup_n_u16(a)), 0) }
+    vget_lane_u16::<0>(vqshl_n_u16::<N>(vdup_n_u16(a)))
 }
 #[doc = "Unsigned saturating shift left"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshls_n_u32)"]
@@ -18778,7 +15841,7 @@ pub fn vqshlh_n_u16<const N: i32>(a: u16) -> u16 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqshls_n_u32<const N: i32>(a: u32) -> u32 {
     static_assert_uimm_bits!(N, 5);
-    unsafe { simd_extract!(vqshl_n_u32::<N>(vdup_n_u32(a)), 0) }
+    vget_lane_u32::<0>(vqshl_n_u32::<N>(vdup_n_u32(a)))
 }
 #[doc = "Signed saturating shift left"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlb_s8)"]
@@ -18788,7 +15851,7 @@ pub fn vqshls_n_u32<const N: i32>(a: u32) -> u32 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqshlb_s8(a: i8, b: i8) -> i8 {
     let c: int8x8_t = vqshl_s8(vdup_n_s8(a), vdup_n_s8(b));
-    unsafe { simd_extract!(c, 0) }
+    vget_lane_s8::<0>(c)
 }
 #[doc = "Signed saturating shift left"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlh_s16)"]
@@ -18798,7 +15861,7 @@ pub fn vqshlb_s8(a: i8, b: i8) -> i8 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqshlh_s16(a: i16, b: i16) -> i16 {
     let c: int16x4_t = vqshl_s16(vdup_n_s16(a), vdup_n_s16(b));
-    unsafe { simd_extract!(c, 0) }
+    vget_lane_s16::<0>(c)
 }
 #[doc = "Signed saturating shift left"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshls_s32)"]
@@ -18808,7 +15871,7 @@ pub fn vqshlh_s16(a: i16, b: i16) -> i16 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqshls_s32(a: i32, b: i32) -> i32 {
     let c: int32x2_t = vqshl_s32(vdup_n_s32(a), vdup_n_s32(b));
-    unsafe { simd_extract!(c, 0) }
+    vget_lane_s32::<0>(c)
 }
 #[doc = "Unsigned saturating shift left"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlb_u8)"]
@@ -18818,7 +15881,7 @@ pub fn vqshls_s32(a: i32, b: i32) -> i32 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqshlb_u8(a: u8, b: i8) -> u8 {
     let c: uint8x8_t = vqshl_u8(vdup_n_u8(a), vdup_n_s8(b));
-    unsafe { simd_extract!(c, 0) }
+    vget_lane_u8::<0>(c)
 }
 #[doc = "Unsigned saturating shift left"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlh_u16)"]
@@ -18828,7 +15891,7 @@ pub fn vqshlb_u8(a: u8, b: i8) -> u8 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqshlh_u16(a: u16, b: i16) -> u16 {
     let c: uint16x4_t = vqshl_u16(vdup_n_u16(a), vdup_n_s16(b));
-    unsafe { simd_extract!(c, 0) }
+    vget_lane_u16::<0>(c)
 }
 #[doc = "Unsigned saturating shift left"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshls_u32)"]
@@ -18838,7 +15901,7 @@ pub fn vqshlh_u16(a: u16, b: i16) -> u16 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqshls_u32(a: u32, b: i32) -> u32 {
     let c: uint32x2_t = vqshl_u32(vdup_n_u32(a), vdup_n_s32(b));
-    unsafe { simd_extract!(c, 0) }
+    vget_lane_u32::<0>(c)
 }
 #[doc = "Signed saturating shift left"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshld_s64)"]
@@ -18881,7 +15944,7 @@ pub fn vqshld_u64(a: u64, b: i64) -> u64 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqshlub_n_s8<const N: i32>(a: i8) -> u8 {
     static_assert_uimm_bits!(N, 3);
-    unsafe { simd_extract!(vqshlu_n_s8::<N>(vdup_n_s8(a)), 0) }
+    vget_lane_u8::<0>(vqshlu_n_s8::<N>(vdup_n_s8(a)))
 }
 #[doc = "Signed saturating shift left unsigned"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlud_n_s64)"]
@@ -18892,7 +15955,7 @@ pub fn vqshlub_n_s8<const N: i32>(a: i8) -> u8 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqshlud_n_s64<const N: i32>(a: i64) -> u64 {
     static_assert_uimm_bits!(N, 6);
-    unsafe { simd_extract!(vqshlu_n_s64::<N>(vdup_n_s64(a)), 0) }
+    vget_lane_u64::<0>(vqshlu_n_s64::<N>(vdup_n_s64(a)))
 }
 #[doc = "Signed saturating shift left unsigned"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluh_n_s16)"]
@@ -18903,7 +15966,7 @@ pub fn vqshlud_n_s64<const N: i32>(a: i64) -> u64 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqshluh_n_s16<const N: i32>(a: i16) -> u16 {
     static_assert_uimm_bits!(N, 4);
-    unsafe { simd_extract!(vqshlu_n_s16::<N>(vdup_n_s16(a)), 0) }
+    vget_lane_u16::<0>(vqshlu_n_s16::<N>(vdup_n_s16(a)))
 }
 #[doc = "Signed saturating shift left unsigned"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlus_n_s32)"]
@@ -18914,85 +15977,73 @@ pub fn vqshluh_n_s16<const N: i32>(a: i16) -> u16 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqshlus_n_s32<const N: i32>(a: i32) -> u32 {
     static_assert_uimm_bits!(N, 5);
-    unsafe { simd_extract!(vqshlu_n_s32::<N>(vdup_n_s32(a)), 0) }
+    vget_lane_u32::<0>(vqshlu_n_s32::<N>(vdup_n_s32(a)))
 }
 #[doc = "Signed saturating shift right narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_high_n_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqshrn2, N = 2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqshrn2, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqshrn_high_n_s16<const N: i32>(a: int8x8_t, b: int16x8_t) -> int8x16_t {
     static_assert!(N >= 1 && N <= 8);
-    unsafe {
-        simd_shuffle!(
-            a,
-            vqshrn_n_s16::<N>(b),
-            [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
-        )
-    }
+    vcombine_s8(a, vqshrn_n_s16::<N>(b))
 }
 #[doc = "Signed saturating shift right narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_high_n_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqshrn2, N = 2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqshrn2, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqshrn_high_n_s32<const N: i32>(a: int16x4_t, b: int32x4_t) -> int16x8_t {
     static_assert!(N >= 1 && N <= 16);
-    unsafe { simd_shuffle!(a, vqshrn_n_s32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7]) }
+    vcombine_s16(a, vqshrn_n_s32::<N>(b))
 }
 #[doc = "Signed saturating shift right narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_high_n_s64)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqshrn2, N = 2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqshrn2, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqshrn_high_n_s64<const N: i32>(a: int32x2_t, b: int64x2_t) -> int32x4_t {
     static_assert!(N >= 1 && N <= 32);
-    unsafe { simd_shuffle!(a, vqshrn_n_s64::<N>(b), [0, 1, 2, 3]) }
+    vcombine_s32(a, vqshrn_n_s64::<N>(b))
 }
 #[doc = "Unsigned saturating shift right narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_high_n_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqshrn2, N = 2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(uqshrn2, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqshrn_high_n_u16<const N: i32>(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t {
     static_assert!(N >= 1 && N <= 8);
-    unsafe {
-        simd_shuffle!(
-            a,
-            vqshrn_n_u16::<N>(b),
-            [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
-        )
-    }
+    vcombine_u8(a, vqshrn_n_u16::<N>(b))
 }
 #[doc = "Unsigned saturating shift right narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_high_n_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqshrn2, N = 2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(uqshrn2, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqshrn_high_n_u32<const N: i32>(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t {
     static_assert!(N >= 1 && N <= 16);
-    unsafe { simd_shuffle!(a, vqshrn_n_u32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7]) }
+    vcombine_u16(a, vqshrn_n_u32::<N>(b))
 }
 #[doc = "Unsigned saturating shift right narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_high_n_u64)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(uqshrn2, N = 2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(uqshrn2, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqshrn_high_n_u64<const N: i32>(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t {
     static_assert!(N >= 1 && N <= 32);
-    unsafe { simd_shuffle!(a, vqshrn_n_u64::<N>(b), [0, 1, 2, 3]) }
+    vcombine_u32(a, vqshrn_n_u64::<N>(b))
 }
 #[doc = "Signed saturating shift right narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrnd_n_s64)"]
@@ -19039,7 +16090,7 @@ pub fn vqshrnd_n_u64<const N: i32>(a: u64) -> u32 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqshrnh_n_s16<const N: i32>(a: i16) -> i8 {
     static_assert!(N >= 1 && N <= 8);
-    unsafe { simd_extract!(vqshrn_n_s16::<N>(vdupq_n_s16(a)), 0) }
+    vget_lane_s8::<0>(vqshrn_n_s16::<N>(vdupq_n_s16(a)))
 }
 #[doc = "Signed saturating shift right narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrns_n_s32)"]
@@ -19050,7 +16101,7 @@ pub fn vqshrnh_n_s16<const N: i32>(a: i16) -> i8 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqshrns_n_s32<const N: i32>(a: i32) -> i16 {
     static_assert!(N >= 1 && N <= 16);
-    unsafe { simd_extract!(vqshrn_n_s32::<N>(vdupq_n_s32(a)), 0) }
+    vget_lane_s16::<0>(vqshrn_n_s32::<N>(vdupq_n_s32(a)))
 }
 #[doc = "Unsigned saturating shift right narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrnh_n_u16)"]
@@ -19061,7 +16112,7 @@ pub fn vqshrns_n_s32<const N: i32>(a: i32) -> i16 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqshrnh_n_u16<const N: i32>(a: u16) -> u8 {
     static_assert!(N >= 1 && N <= 8);
-    unsafe { simd_extract!(vqshrn_n_u16::<N>(vdupq_n_u16(a)), 0) }
+    vget_lane_u8::<0>(vqshrn_n_u16::<N>(vdupq_n_u16(a)))
 }
 #[doc = "Unsigned saturating shift right narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrns_n_u32)"]
@@ -19072,46 +16123,40 @@ pub fn vqshrnh_n_u16<const N: i32>(a: u16) -> u8 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqshrns_n_u32<const N: i32>(a: u32) -> u16 {
     static_assert!(N >= 1 && N <= 16);
-    unsafe { simd_extract!(vqshrn_n_u32::<N>(vdupq_n_u32(a)), 0) }
+    vget_lane_u16::<0>(vqshrn_n_u32::<N>(vdupq_n_u32(a)))
 }
 #[doc = "Signed saturating shift right unsigned narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_high_n_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqshrun2, N = 2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqshrun2, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqshrun_high_n_s16<const N: i32>(a: uint8x8_t, b: int16x8_t) -> uint8x16_t {
     static_assert!(N >= 1 && N <= 8);
-    unsafe {
-        simd_shuffle!(
-            a,
-            vqshrun_n_s16::<N>(b),
-            [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
-        )
-    }
+    vcombine_u8(a, vqshrun_n_s16::<N>(b))
 }
 #[doc = "Signed saturating shift right unsigned narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_high_n_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqshrun2, N = 2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqshrun2, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqshrun_high_n_s32<const N: i32>(a: uint16x4_t, b: int32x4_t) -> uint16x8_t {
     static_assert!(N >= 1 && N <= 16);
-    unsafe { simd_shuffle!(a, vqshrun_n_s32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7]) }
+    vcombine_u16(a, vqshrun_n_s32::<N>(b))
 }
 #[doc = "Signed saturating shift right unsigned narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_high_n_s64)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sqshrun2, N = 2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqshrun2, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqshrun_high_n_s64<const N: i32>(a: uint32x2_t, b: int64x2_t) -> uint32x4_t {
     static_assert!(N >= 1 && N <= 32);
-    unsafe { simd_shuffle!(a, vqshrun_n_s64::<N>(b), [0, 1, 2, 3]) }
+    vcombine_u32(a, vqshrun_n_s64::<N>(b))
 }
 #[doc = "Signed saturating shift right unsigned narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrund_n_s64)"]
@@ -19122,7 +16167,7 @@ pub fn vqshrun_high_n_s64<const N: i32>(a: uint32x2_t, b: int64x2_t) -> uint32x4
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqshrund_n_s64<const N: i32>(a: i64) -> u32 {
     static_assert!(N >= 1 && N <= 32);
-    unsafe { simd_extract!(vqshrun_n_s64::<N>(vdupq_n_s64(a)), 0) }
+    vget_lane_u32::<0>(vqshrun_n_s64::<N>(vdupq_n_s64(a)))
 }
 #[doc = "Signed saturating shift right unsigned narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrunh_n_s16)"]
@@ -19133,7 +16178,7 @@ pub fn vqshrund_n_s64<const N: i32>(a: i64) -> u32 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqshrunh_n_s16<const N: i32>(a: i16) -> u8 {
     static_assert!(N >= 1 && N <= 8);
-    unsafe { simd_extract!(vqshrun_n_s16::<N>(vdupq_n_s16(a)), 0) }
+    vget_lane_u8::<0>(vqshrun_n_s16::<N>(vdupq_n_s16(a)))
 }
 #[doc = "Signed saturating shift right unsigned narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshruns_n_s32)"]
@@ -19144,7 +16189,7 @@ pub fn vqshrunh_n_s16<const N: i32>(a: i16) -> u8 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vqshruns_n_s32<const N: i32>(a: i32) -> u16 {
     static_assert!(N >= 1 && N <= 16);
-    unsafe { simd_extract!(vqshrun_n_s32::<N>(vdupq_n_s32(a)), 0) }
+    vget_lane_u16::<0>(vqshrun_n_s32::<N>(vdupq_n_s32(a)))
 }
 #[doc = "Saturating subtract"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubb_s8)"]
@@ -19155,7 +16200,7 @@ pub fn vqshruns_n_s32<const N: i32>(a: i32) -> u16 {
 pub fn vqsubb_s8(a: i8, b: i8) -> i8 {
     let a: int8x8_t = vdup_n_s8(a);
     let b: int8x8_t = vdup_n_s8(b);
-    unsafe { simd_extract!(vqsub_s8(a, b), 0) }
+    vget_lane_s8::<0>(vqsub_s8(a, b))
 }
 #[doc = "Saturating subtract"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubh_s16)"]
@@ -19166,7 +16211,7 @@ pub fn vqsubb_s8(a: i8, b: i8) -> i8 {
 pub fn vqsubh_s16(a: i16, b: i16) -> i16 {
     let a: int16x4_t = vdup_n_s16(a);
     let b: int16x4_t = vdup_n_s16(b);
-    unsafe { simd_extract!(vqsub_s16(a, b), 0) }
+    vget_lane_s16::<0>(vqsub_s16(a, b))
 }
 #[doc = "Saturating subtract"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubb_u8)"]
@@ -19177,7 +16222,7 @@ pub fn vqsubh_s16(a: i16, b: i16) -> i16 {
 pub fn vqsubb_u8(a: u8, b: u8) -> u8 {
     let a: uint8x8_t = vdup_n_u8(a);
     let b: uint8x8_t = vdup_n_u8(b);
-    unsafe { simd_extract!(vqsub_u8(a, b), 0) }
+    vget_lane_u8::<0>(vqsub_u8(a, b))
 }
 #[doc = "Saturating subtract"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubh_u16)"]
@@ -19188,7 +16233,7 @@ pub fn vqsubb_u8(a: u8, b: u8) -> u8 {
 pub fn vqsubh_u16(a: u16, b: u16) -> u16 {
     let a: uint16x4_t = vdup_n_u16(a);
     let b: uint16x4_t = vdup_n_u16(b);
-    unsafe { simd_extract!(vqsub_u16(a, b), 0) }
+    vget_lane_u16::<0>(vqsub_u16(a, b))
 }
 #[doc = "Saturating subtract"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubs_s32)"]
@@ -19393,7 +16438,6 @@ pub fn vqtbl2q_s8(a: int8x16x2_t, b: uint8x16_t) -> int8x16_t {
 #[doc = "Table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl2_u8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -19401,38 +16445,8 @@ pub fn vqtbl2_u8(a: uint8x16x2_t, b: uint8x8_t) -> uint8x8_t {
     unsafe { transmute(vqtbl2(transmute(a.0), transmute(a.1), b)) }
 }
 #[doc = "Table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl2_u8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbl))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vqtbl2_u8(a: uint8x16x2_t, b: uint8x8_t) -> uint8x8_t {
-    let mut a: uint8x16x2_t = a;
-    a.0 = unsafe {
-        simd_shuffle!(
-            a.0,
-            a.0,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    a.1 = unsafe {
-        simd_shuffle!(
-            a.1,
-            a.1,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    let b: uint8x8_t = unsafe { simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint8x8_t = transmute(vqtbl2(transmute(a.0), transmute(a.1), b));
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl2q_u8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -19440,43 +16454,8 @@ pub fn vqtbl2q_u8(a: uint8x16x2_t, b: uint8x16_t) -> uint8x16_t {
     unsafe { transmute(vqtbl2q(transmute(a.0), transmute(a.1), b)) }
 }
 #[doc = "Table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl2q_u8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbl))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vqtbl2q_u8(a: uint8x16x2_t, b: uint8x16_t) -> uint8x16_t {
-    let mut a: uint8x16x2_t = a;
-    a.0 = unsafe {
-        simd_shuffle!(
-            a.0,
-            a.0,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    a.1 = unsafe {
-        simd_shuffle!(
-            a.1,
-            a.1,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    let b: uint8x16_t =
-        unsafe { simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint8x16_t = transmute(vqtbl2q(transmute(a.0), transmute(a.1), b));
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
-}
-#[doc = "Table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl2_p8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -19484,38 +16463,8 @@ pub fn vqtbl2_p8(a: poly8x16x2_t, b: uint8x8_t) -> poly8x8_t {
     unsafe { transmute(vqtbl2(transmute(a.0), transmute(a.1), b)) }
 }
 #[doc = "Table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl2_p8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbl))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vqtbl2_p8(a: poly8x16x2_t, b: uint8x8_t) -> poly8x8_t {
-    let mut a: poly8x16x2_t = a;
-    a.0 = unsafe {
-        simd_shuffle!(
-            a.0,
-            a.0,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    a.1 = unsafe {
-        simd_shuffle!(
-            a.1,
-            a.1,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    let b: uint8x8_t = unsafe { simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: poly8x8_t = transmute(vqtbl2(transmute(a.0), transmute(a.1), b));
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl2q_p8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -19523,40 +16472,6 @@ pub fn vqtbl2q_p8(a: poly8x16x2_t, b: uint8x16_t) -> poly8x16_t {
     unsafe { transmute(vqtbl2q(transmute(a.0), transmute(a.1), b)) }
 }
 #[doc = "Table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl2q_p8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbl))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vqtbl2q_p8(a: poly8x16x2_t, b: uint8x16_t) -> poly8x16_t {
-    let mut a: poly8x16x2_t = a;
-    a.0 = unsafe {
-        simd_shuffle!(
-            a.0,
-            a.0,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    a.1 = unsafe {
-        simd_shuffle!(
-            a.1,
-            a.1,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    let b: uint8x16_t =
-        unsafe { simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: poly8x16_t = transmute(vqtbl2q(transmute(a.0), transmute(a.1), b));
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
-}
-#[doc = "Table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl3)"]
 #[inline]
 #[target_feature(enable = "neon")]
@@ -19609,7 +16524,6 @@ pub fn vqtbl3q_s8(a: int8x16x3_t, b: uint8x16_t) -> int8x16_t {
 #[doc = "Table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl3_u8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -19617,46 +16531,8 @@ pub fn vqtbl3_u8(a: uint8x16x3_t, b: uint8x8_t) -> uint8x8_t {
     unsafe { transmute(vqtbl3(transmute(a.0), transmute(a.1), transmute(a.2), b)) }
 }
 #[doc = "Table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl3_u8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbl))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vqtbl3_u8(a: uint8x16x3_t, b: uint8x8_t) -> uint8x8_t {
-    let mut a: uint8x16x3_t = a;
-    a.0 = unsafe {
-        simd_shuffle!(
-            a.0,
-            a.0,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    a.1 = unsafe {
-        simd_shuffle!(
-            a.1,
-            a.1,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    a.2 = unsafe {
-        simd_shuffle!(
-            a.2,
-            a.2,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    let b: uint8x8_t = unsafe { simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint8x8_t =
-            transmute(vqtbl3(transmute(a.0), transmute(a.1), transmute(a.2), b));
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl3q_u8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -19664,51 +16540,8 @@ pub fn vqtbl3q_u8(a: uint8x16x3_t, b: uint8x16_t) -> uint8x16_t {
     unsafe { transmute(vqtbl3q(transmute(a.0), transmute(a.1), transmute(a.2), b)) }
 }
 #[doc = "Table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl3q_u8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbl))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vqtbl3q_u8(a: uint8x16x3_t, b: uint8x16_t) -> uint8x16_t {
-    let mut a: uint8x16x3_t = a;
-    a.0 = unsafe {
-        simd_shuffle!(
-            a.0,
-            a.0,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    a.1 = unsafe {
-        simd_shuffle!(
-            a.1,
-            a.1,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    a.2 = unsafe {
-        simd_shuffle!(
-            a.2,
-            a.2,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    let b: uint8x16_t =
-        unsafe { simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint8x16_t =
-            transmute(vqtbl3q(transmute(a.0), transmute(a.1), transmute(a.2), b));
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
-}
-#[doc = "Table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl3_p8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -19716,46 +16549,8 @@ pub fn vqtbl3_p8(a: poly8x16x3_t, b: uint8x8_t) -> poly8x8_t {
     unsafe { transmute(vqtbl3(transmute(a.0), transmute(a.1), transmute(a.2), b)) }
 }
 #[doc = "Table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl3_p8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbl))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vqtbl3_p8(a: poly8x16x3_t, b: uint8x8_t) -> poly8x8_t {
-    let mut a: poly8x16x3_t = a;
-    a.0 = unsafe {
-        simd_shuffle!(
-            a.0,
-            a.0,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    a.1 = unsafe {
-        simd_shuffle!(
-            a.1,
-            a.1,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    a.2 = unsafe {
-        simd_shuffle!(
-            a.2,
-            a.2,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    let b: uint8x8_t = unsafe { simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: poly8x8_t =
-            transmute(vqtbl3(transmute(a.0), transmute(a.1), transmute(a.2), b));
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl3q_p8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -19763,48 +16558,6 @@ pub fn vqtbl3q_p8(a: poly8x16x3_t, b: uint8x16_t) -> poly8x16_t {
     unsafe { transmute(vqtbl3q(transmute(a.0), transmute(a.1), transmute(a.2), b)) }
 }
 #[doc = "Table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl3q_p8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbl))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vqtbl3q_p8(a: poly8x16x3_t, b: uint8x16_t) -> poly8x16_t {
-    let mut a: poly8x16x3_t = a;
-    a.0 = unsafe {
-        simd_shuffle!(
-            a.0,
-            a.0,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    a.1 = unsafe {
-        simd_shuffle!(
-            a.1,
-            a.1,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    a.2 = unsafe {
-        simd_shuffle!(
-            a.2,
-            a.2,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    let b: uint8x16_t =
-        unsafe { simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: poly8x16_t =
-            transmute(vqtbl3q(transmute(a.0), transmute(a.1), transmute(a.2), b));
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
-}
-#[doc = "Table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl4)"]
 #[inline]
 #[target_feature(enable = "neon")]
@@ -19869,7 +16622,6 @@ pub fn vqtbl4q_s8(a: int8x16x4_t, b: uint8x16_t) -> int8x16_t {
 #[doc = "Table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl4_u8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -19885,64 +16637,31 @@ pub fn vqtbl4_u8(a: uint8x16x4_t, b: uint8x8_t) -> uint8x8_t {
     }
 }
 #[doc = "Table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl4_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl4q_u8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vqtbl4_u8(a: uint8x16x4_t, b: uint8x8_t) -> uint8x8_t {
-    let mut a: uint8x16x4_t = a;
-    a.0 = unsafe {
-        simd_shuffle!(
-            a.0,
-            a.0,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    a.1 = unsafe {
-        simd_shuffle!(
-            a.1,
-            a.1,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    a.2 = unsafe {
-        simd_shuffle!(
-            a.2,
-            a.2,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    a.3 = unsafe {
-        simd_shuffle!(
-            a.3,
-            a.3,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    let b: uint8x8_t = unsafe { simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]) };
+pub fn vqtbl4q_u8(a: uint8x16x4_t, b: uint8x16_t) -> uint8x16_t {
     unsafe {
-        let ret_val: uint8x8_t = transmute(vqtbl4(
+        transmute(vqtbl4q(
             transmute(a.0),
             transmute(a.1),
             transmute(a.2),
             transmute(a.3),
             b,
-        ));
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        ))
     }
 }
 #[doc = "Table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl4q_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl4_p8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vqtbl4q_u8(a: uint8x16x4_t, b: uint8x16_t) -> uint8x16_t {
+pub fn vqtbl4_p8(a: poly8x16x4_t, b: uint8x8_t) -> poly8x8_t {
     unsafe {
-        transmute(vqtbl4q(
+        transmute(vqtbl4(
             transmute(a.0),
             transmute(a.1),
             transmute(a.2),
@@ -19952,130 +16671,8 @@ pub fn vqtbl4q_u8(a: uint8x16x4_t, b: uint8x16_t) -> uint8x16_t {
     }
 }
 #[doc = "Table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl4q_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl4q_p8)"]
 #[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbl))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vqtbl4q_u8(a: uint8x16x4_t, b: uint8x16_t) -> uint8x16_t {
-    let mut a: uint8x16x4_t = a;
-    a.0 = unsafe {
-        simd_shuffle!(
-            a.0,
-            a.0,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    a.1 = unsafe {
-        simd_shuffle!(
-            a.1,
-            a.1,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    a.2 = unsafe {
-        simd_shuffle!(
-            a.2,
-            a.2,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    a.3 = unsafe {
-        simd_shuffle!(
-            a.3,
-            a.3,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    let b: uint8x16_t =
-        unsafe { simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint8x16_t = transmute(vqtbl4q(
-            transmute(a.0),
-            transmute(a.1),
-            transmute(a.2),
-            transmute(a.3),
-            b,
-        ));
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
-}
-#[doc = "Table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl4_p8)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbl))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vqtbl4_p8(a: poly8x16x4_t, b: uint8x8_t) -> poly8x8_t {
-    unsafe {
-        transmute(vqtbl4(
-            transmute(a.0),
-            transmute(a.1),
-            transmute(a.2),
-            transmute(a.3),
-            b,
-        ))
-    }
-}
-#[doc = "Table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl4_p8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbl))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vqtbl4_p8(a: poly8x16x4_t, b: uint8x8_t) -> poly8x8_t {
-    let mut a: poly8x16x4_t = a;
-    a.0 = unsafe {
-        simd_shuffle!(
-            a.0,
-            a.0,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    a.1 = unsafe {
-        simd_shuffle!(
-            a.1,
-            a.1,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    a.2 = unsafe {
-        simd_shuffle!(
-            a.2,
-            a.2,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    a.3 = unsafe {
-        simd_shuffle!(
-            a.3,
-            a.3,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    let b: uint8x8_t = unsafe { simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: poly8x8_t = transmute(vqtbl4(
-            transmute(a.0),
-            transmute(a.1),
-            transmute(a.2),
-            transmute(a.3),
-            b,
-        ));
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl4q_p8)"]
-#[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -20090,60 +16687,6 @@ pub fn vqtbl4q_p8(a: poly8x16x4_t, b: uint8x16_t) -> poly8x16_t {
         ))
     }
 }
-#[doc = "Table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbl4q_p8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbl))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vqtbl4q_p8(a: poly8x16x4_t, b: uint8x16_t) -> poly8x16_t {
-    let mut a: poly8x16x4_t = a;
-    a.0 = unsafe {
-        simd_shuffle!(
-            a.0,
-            a.0,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    a.1 = unsafe {
-        simd_shuffle!(
-            a.1,
-            a.1,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    a.2 = unsafe {
-        simd_shuffle!(
-            a.2,
-            a.2,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    a.3 = unsafe {
-        simd_shuffle!(
-            a.3,
-            a.3,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    let b: uint8x16_t =
-        unsafe { simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: poly8x16_t = transmute(vqtbl4q(
-            transmute(a.0),
-            transmute(a.1),
-            transmute(a.2),
-            transmute(a.3),
-            b,
-        ));
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
-}
 #[doc = "Extended table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx1)"]
 #[inline]
@@ -20283,7 +16826,6 @@ pub fn vqtbx2q_s8(a: int8x16_t, b: int8x16x2_t, c: uint8x16_t) -> int8x16_t {
 #[doc = "Extended table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx2_u8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -20291,39 +16833,8 @@ pub fn vqtbx2_u8(a: uint8x8_t, b: uint8x16x2_t, c: uint8x8_t) -> uint8x8_t {
     unsafe { transmute(vqtbx2(transmute(a), transmute(b.0), transmute(b.1), c)) }
 }
 #[doc = "Extended table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx2_u8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbx))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vqtbx2_u8(a: uint8x8_t, b: uint8x16x2_t, c: uint8x8_t) -> uint8x8_t {
-    let mut b: uint8x16x2_t = b;
-    let a: uint8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    b.0 = unsafe {
-        simd_shuffle!(
-            b.0,
-            b.0,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    b.1 = unsafe {
-        simd_shuffle!(
-            b.1,
-            b.1,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    let c: uint8x8_t = unsafe { simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint8x8_t = transmute(vqtbx2(transmute(a), transmute(b.0), transmute(b.1), c));
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Extended table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx2q_u8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -20331,46 +16842,8 @@ pub fn vqtbx2q_u8(a: uint8x16_t, b: uint8x16x2_t, c: uint8x16_t) -> uint8x16_t {
     unsafe { transmute(vqtbx2q(transmute(a), transmute(b.0), transmute(b.1), c)) }
 }
 #[doc = "Extended table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx2q_u8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbx))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vqtbx2q_u8(a: uint8x16_t, b: uint8x16x2_t, c: uint8x16_t) -> uint8x16_t {
-    let mut b: uint8x16x2_t = b;
-    let a: uint8x16_t =
-        unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
-    b.0 = unsafe {
-        simd_shuffle!(
-            b.0,
-            b.0,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    b.1 = unsafe {
-        simd_shuffle!(
-            b.1,
-            b.1,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    let c: uint8x16_t =
-        unsafe { simd_shuffle!(c, c, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint8x16_t =
-            transmute(vqtbx2q(transmute(a), transmute(b.0), transmute(b.1), c));
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
-}
-#[doc = "Extended table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx2_p8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -20378,39 +16851,8 @@ pub fn vqtbx2_p8(a: poly8x8_t, b: poly8x16x2_t, c: uint8x8_t) -> poly8x8_t {
     unsafe { transmute(vqtbx2(transmute(a), transmute(b.0), transmute(b.1), c)) }
 }
 #[doc = "Extended table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx2_p8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbx))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vqtbx2_p8(a: poly8x8_t, b: poly8x16x2_t, c: uint8x8_t) -> poly8x8_t {
-    let mut b: poly8x16x2_t = b;
-    let a: poly8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    b.0 = unsafe {
-        simd_shuffle!(
-            b.0,
-            b.0,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    b.1 = unsafe {
-        simd_shuffle!(
-            b.1,
-            b.1,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    let c: uint8x8_t = unsafe { simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: poly8x8_t = transmute(vqtbx2(transmute(a), transmute(b.0), transmute(b.1), c));
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Extended table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx2q_p8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -20418,43 +16860,6 @@ pub fn vqtbx2q_p8(a: poly8x16_t, b: poly8x16x2_t, c: uint8x16_t) -> poly8x16_t {
     unsafe { transmute(vqtbx2q(transmute(a), transmute(b.0), transmute(b.1), c)) }
 }
 #[doc = "Extended table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx2q_p8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbx))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vqtbx2q_p8(a: poly8x16_t, b: poly8x16x2_t, c: uint8x16_t) -> poly8x16_t {
-    let mut b: poly8x16x2_t = b;
-    let a: poly8x16_t =
-        unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
-    b.0 = unsafe {
-        simd_shuffle!(
-            b.0,
-            b.0,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    b.1 = unsafe {
-        simd_shuffle!(
-            b.1,
-            b.1,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    let c: uint8x16_t =
-        unsafe { simd_shuffle!(c, c, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: poly8x16_t =
-            transmute(vqtbx2q(transmute(a), transmute(b.0), transmute(b.1), c));
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
-}
-#[doc = "Extended table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx3)"]
 #[inline]
 #[target_feature(enable = "neon")]
@@ -20514,7 +16919,6 @@ pub fn vqtbx3q_s8(a: int8x16_t, b: int8x16x3_t, c: uint8x16_t) -> int8x16_t {
 #[doc = "Extended table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx3_u8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -20530,52 +16934,8 @@ pub fn vqtbx3_u8(a: uint8x8_t, b: uint8x16x3_t, c: uint8x8_t) -> uint8x8_t {
     }
 }
 #[doc = "Extended table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx3_u8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbx))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vqtbx3_u8(a: uint8x8_t, b: uint8x16x3_t, c: uint8x8_t) -> uint8x8_t {
-    let mut b: uint8x16x3_t = b;
-    let a: uint8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    b.0 = unsafe {
-        simd_shuffle!(
-            b.0,
-            b.0,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    b.1 = unsafe {
-        simd_shuffle!(
-            b.1,
-            b.1,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    b.2 = unsafe {
-        simd_shuffle!(
-            b.2,
-            b.2,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    let c: uint8x8_t = unsafe { simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint8x8_t = transmute(vqtbx3(
-            transmute(a),
-            transmute(b.0),
-            transmute(b.1),
-            transmute(b.2),
-            c,
-        ));
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Extended table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx3q_u8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -20591,58 +16951,8 @@ pub fn vqtbx3q_u8(a: uint8x16_t, b: uint8x16x3_t, c: uint8x16_t) -> uint8x16_t {
     }
 }
 #[doc = "Extended table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx3q_u8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbx))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vqtbx3q_u8(a: uint8x16_t, b: uint8x16x3_t, c: uint8x16_t) -> uint8x16_t {
-    let mut b: uint8x16x3_t = b;
-    let a: uint8x16_t =
-        unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
-    b.0 = unsafe {
-        simd_shuffle!(
-            b.0,
-            b.0,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    b.1 = unsafe {
-        simd_shuffle!(
-            b.1,
-            b.1,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    b.2 = unsafe {
-        simd_shuffle!(
-            b.2,
-            b.2,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    let c: uint8x16_t =
-        unsafe { simd_shuffle!(c, c, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint8x16_t = transmute(vqtbx3q(
-            transmute(a),
-            transmute(b.0),
-            transmute(b.1),
-            transmute(b.2),
-            c,
-        ));
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
-}
-#[doc = "Extended table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx3_p8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -20658,52 +16968,8 @@ pub fn vqtbx3_p8(a: poly8x8_t, b: poly8x16x3_t, c: uint8x8_t) -> poly8x8_t {
     }
 }
 #[doc = "Extended table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx3_p8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbx))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vqtbx3_p8(a: poly8x8_t, b: poly8x16x3_t, c: uint8x8_t) -> poly8x8_t {
-    let mut b: poly8x16x3_t = b;
-    let a: poly8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    b.0 = unsafe {
-        simd_shuffle!(
-            b.0,
-            b.0,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    b.1 = unsafe {
-        simd_shuffle!(
-            b.1,
-            b.1,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    b.2 = unsafe {
-        simd_shuffle!(
-            b.2,
-            b.2,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    let c: uint8x8_t = unsafe { simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: poly8x8_t = transmute(vqtbx3(
-            transmute(a),
-            transmute(b.0),
-            transmute(b.1),
-            transmute(b.2),
-            c,
-        ));
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Extended table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx3q_p8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -20719,55 +16985,6 @@ pub fn vqtbx3q_p8(a: poly8x16_t, b: poly8x16x3_t, c: uint8x16_t) -> poly8x16_t {
     }
 }
 #[doc = "Extended table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx3q_p8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbx))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vqtbx3q_p8(a: poly8x16_t, b: poly8x16x3_t, c: uint8x16_t) -> poly8x16_t {
-    let mut b: poly8x16x3_t = b;
-    let a: poly8x16_t =
-        unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
-    b.0 = unsafe {
-        simd_shuffle!(
-            b.0,
-            b.0,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    b.1 = unsafe {
-        simd_shuffle!(
-            b.1,
-            b.1,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    b.2 = unsafe {
-        simd_shuffle!(
-            b.2,
-            b.2,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    let c: uint8x16_t =
-        unsafe { simd_shuffle!(c, c, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: poly8x16_t = transmute(vqtbx3q(
-            transmute(a),
-            transmute(b.0),
-            transmute(b.1),
-            transmute(b.2),
-            c,
-        ));
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
-}
-#[doc = "Extended table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx4)"]
 #[inline]
 #[target_feature(enable = "neon")]
@@ -20848,7 +17065,6 @@ pub fn vqtbx4q_s8(a: int8x16_t, b: int8x16x4_t, c: uint8x16_t) -> int8x16_t {
 #[doc = "Extended table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx4_u8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -20865,60 +17081,8 @@ pub fn vqtbx4_u8(a: uint8x8_t, b: uint8x16x4_t, c: uint8x8_t) -> uint8x8_t {
     }
 }
 #[doc = "Extended table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx4_u8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbx))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vqtbx4_u8(a: uint8x8_t, b: uint8x16x4_t, c: uint8x8_t) -> uint8x8_t {
-    let mut b: uint8x16x4_t = b;
-    let a: uint8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    b.0 = unsafe {
-        simd_shuffle!(
-            b.0,
-            b.0,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    b.1 = unsafe {
-        simd_shuffle!(
-            b.1,
-            b.1,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    b.2 = unsafe {
-        simd_shuffle!(
-            b.2,
-            b.2,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    b.3 = unsafe {
-        simd_shuffle!(
-            b.3,
-            b.3,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    let c: uint8x8_t = unsafe { simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint8x8_t = transmute(vqtbx4(
-            transmute(a),
-            transmute(b.0),
-            transmute(b.1),
-            transmute(b.2),
-            transmute(b.3),
-            c,
-        ));
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Extended table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx4q_u8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -20935,66 +17099,8 @@ pub fn vqtbx4q_u8(a: uint8x16_t, b: uint8x16x4_t, c: uint8x16_t) -> uint8x16_t {
     }
 }
 #[doc = "Extended table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx4q_u8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbx))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vqtbx4q_u8(a: uint8x16_t, b: uint8x16x4_t, c: uint8x16_t) -> uint8x16_t {
-    let mut b: uint8x16x4_t = b;
-    let a: uint8x16_t =
-        unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
-    b.0 = unsafe {
-        simd_shuffle!(
-            b.0,
-            b.0,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    b.1 = unsafe {
-        simd_shuffle!(
-            b.1,
-            b.1,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    b.2 = unsafe {
-        simd_shuffle!(
-            b.2,
-            b.2,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    b.3 = unsafe {
-        simd_shuffle!(
-            b.3,
-            b.3,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    let c: uint8x16_t =
-        unsafe { simd_shuffle!(c, c, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint8x16_t = transmute(vqtbx4q(
-            transmute(a),
-            transmute(b.0),
-            transmute(b.1),
-            transmute(b.2),
-            transmute(b.3),
-            c,
-        ));
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
-}
-#[doc = "Extended table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx4_p8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -21011,60 +17117,8 @@ pub fn vqtbx4_p8(a: poly8x8_t, b: poly8x16x4_t, c: uint8x8_t) -> poly8x8_t {
     }
 }
 #[doc = "Extended table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx4_p8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbx))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vqtbx4_p8(a: poly8x8_t, b: poly8x16x4_t, c: uint8x8_t) -> poly8x8_t {
-    let mut b: poly8x16x4_t = b;
-    let a: poly8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    b.0 = unsafe {
-        simd_shuffle!(
-            b.0,
-            b.0,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    b.1 = unsafe {
-        simd_shuffle!(
-            b.1,
-            b.1,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    b.2 = unsafe {
-        simd_shuffle!(
-            b.2,
-            b.2,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    b.3 = unsafe {
-        simd_shuffle!(
-            b.3,
-            b.3,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    let c: uint8x8_t = unsafe { simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: poly8x8_t = transmute(vqtbx4(
-            transmute(a),
-            transmute(b.0),
-            transmute(b.1),
-            transmute(b.2),
-            transmute(b.3),
-            c,
-        ));
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Extended table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx4q_p8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -21080,63 +17134,6 @@ pub fn vqtbx4q_p8(a: poly8x16_t, b: poly8x16x4_t, c: uint8x16_t) -> poly8x16_t {
         ))
     }
 }
-#[doc = "Extended table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqtbx4q_p8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbx))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vqtbx4q_p8(a: poly8x16_t, b: poly8x16x4_t, c: uint8x16_t) -> poly8x16_t {
-    let mut b: poly8x16x4_t = b;
-    let a: poly8x16_t =
-        unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
-    b.0 = unsafe {
-        simd_shuffle!(
-            b.0,
-            b.0,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    b.1 = unsafe {
-        simd_shuffle!(
-            b.1,
-            b.1,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    b.2 = unsafe {
-        simd_shuffle!(
-            b.2,
-            b.2,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    b.3 = unsafe {
-        simd_shuffle!(
-            b.3,
-            b.3,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    let c: uint8x16_t =
-        unsafe { simd_shuffle!(c, c, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: poly8x16_t = transmute(vqtbx4q(
-            transmute(a),
-            transmute(b.0),
-            transmute(b.1),
-            transmute(b.2),
-            transmute(b.3),
-            c,
-        ));
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
-}
 #[doc = "Rotate and exclusive OR"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrax1q_u64)"]
 #[inline]
@@ -21174,108 +17171,38 @@ pub fn vrbitq_s8(a: int8x16_t) -> int8x16_t {
 #[doc = "Reverse bit order"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrbit_u8)"]
 #[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(rbit))]
-pub fn vrbit_u8(a: uint8x8_t) -> uint8x8_t {
-    unsafe { transmute(vrbit_s8(transmute(a))) }
-}
-#[doc = "Reverse bit order"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrbit_u8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(rbit))]
-pub fn vrbit_u8(a: uint8x8_t) -> uint8x8_t {
-    let a: uint8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint8x8_t = transmute(vrbit_s8(transmute(a)));
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Reverse bit order"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrbitq_u8)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(rbit))]
-pub fn vrbitq_u8(a: uint8x16_t) -> uint8x16_t {
-    unsafe { transmute(vrbitq_s8(transmute(a))) }
-}
-#[doc = "Reverse bit order"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrbitq_u8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(rbit))]
-pub fn vrbitq_u8(a: uint8x16_t) -> uint8x16_t {
-    let a: uint8x16_t =
-        unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint8x16_t = transmute(vrbitq_s8(transmute(a)));
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
-}
-#[doc = "Reverse bit order"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrbit_p8)"]
-#[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(rbit))]
-pub fn vrbit_p8(a: poly8x8_t) -> poly8x8_t {
+pub fn vrbit_u8(a: uint8x8_t) -> uint8x8_t {
     unsafe { transmute(vrbit_s8(transmute(a))) }
 }
 #[doc = "Reverse bit order"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrbit_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrbitq_u8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(rbit))]
-pub fn vrbit_p8(a: poly8x8_t) -> poly8x8_t {
-    let a: poly8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: poly8x8_t = transmute(vrbit_s8(transmute(a)));
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+pub fn vrbitq_u8(a: uint8x16_t) -> uint8x16_t {
+    unsafe { transmute(vrbitq_s8(transmute(a))) }
 }
 #[doc = "Reverse bit order"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrbitq_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrbit_p8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(rbit))]
-pub fn vrbitq_p8(a: poly8x16_t) -> poly8x16_t {
-    unsafe { transmute(vrbitq_s8(transmute(a))) }
+pub fn vrbit_p8(a: poly8x8_t) -> poly8x8_t {
+    unsafe { transmute(vrbit_s8(transmute(a))) }
 }
 #[doc = "Reverse bit order"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrbitq_p8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(rbit))]
 pub fn vrbitq_p8(a: poly8x16_t) -> poly8x16_t {
-    let a: poly8x16_t =
-        unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: poly8x16_t = transmute(vrbitq_s8(transmute(a)));
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
+    unsafe { transmute(vrbitq_s8(transmute(a))) }
 }
 #[doc = "Reciprocal estimate."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpe_f64)"]
@@ -21492,8 +17419,8 @@ pub fn vrecpxh_f16(a: f16) -> f16 {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_f16)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(nop))]
 pub fn vreinterpret_f64_f16(a: float16x4_t) -> float64x1_t {
@@ -21503,20 +17430,22 @@ pub fn vreinterpret_f64_f16(a: float16x4_t) -> float64x1_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_f16)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(nop))]
 pub fn vreinterpret_f64_f16(a: float16x4_t) -> float64x1_t {
-    let a: float16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe { transmute(a) }
+    unsafe {
+        let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        transmute(a)
+    }
 }
 #[doc = "Vector reinterpret cast operation"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_f16)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(nop))]
 pub fn vreinterpretq_f64_f16(a: float16x8_t) -> float64x2_t {
@@ -21526,13 +17455,13 @@ pub fn vreinterpretq_f64_f16(a: float16x8_t) -> float64x2_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_f16)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(nop))]
 pub fn vreinterpretq_f64_f16(a: float16x8_t) -> float64x2_t {
-    let a: float16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
     unsafe {
+        let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
         let ret_val: float64x2_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [1, 0])
     }
@@ -21541,8 +17470,8 @@ pub fn vreinterpretq_f64_f16(a: float16x8_t) -> float64x2_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_f64)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(nop))]
 pub fn vreinterpret_f16_f64(a: float64x1_t) -> float16x4_t {
@@ -21552,8 +17481,8 @@ pub fn vreinterpret_f16_f64(a: float64x1_t) -> float16x4_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_f64)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(nop))]
 pub fn vreinterpret_f16_f64(a: float64x1_t) -> float16x4_t {
@@ -21566,8 +17495,8 @@ pub fn vreinterpret_f16_f64(a: float64x1_t) -> float16x4_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_f64)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(nop))]
 pub fn vreinterpretq_f16_f64(a: float64x2_t) -> float16x8_t {
@@ -21577,1324 +17506,1196 @@ pub fn vreinterpretq_f16_f64(a: float64x2_t) -> float16x8_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_f64)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(nop))]
 pub fn vreinterpretq_f16_f64(a: float64x2_t) -> float16x8_t {
-    let a: float64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
     unsafe {
+        let a: float64x2_t = simd_shuffle!(a, a, [1, 0]);
         let ret_val: float16x8_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_p128)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_f64_p128(a: p128) -> float64x2_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_p128)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_f64_p128(a: p128) -> float64x2_t {
-    unsafe {
-        let ret_val: float64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_f32)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_f64_f32(a: float32x2_t) -> float64x1_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_f64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_f64_f32(a: float32x2_t) -> float64x1_t {
-    let a: float32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
+pub fn vreinterpret_s64_f64(a: float64x1_t) -> int64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_f64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_p64_f32(a: float32x2_t) -> poly64x1_t {
+pub fn vreinterpret_u64_f64(a: float64x1_t) -> uint64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_f64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_p64_f32(a: float32x2_t) -> poly64x1_t {
-    let a: float32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
+pub fn vreinterpret_p64_f64(a: float64x1_t) -> poly64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_f64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_f64_f32(a: float32x4_t) -> float64x2_t {
+pub fn vreinterpretq_s64_f64(a: float64x2_t) -> int64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_f32)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_f64_f32(a: float32x4_t) -> float64x2_t {
-    let a: float32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: float64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_f64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_p64_f32(a: float32x4_t) -> poly64x2_t {
+pub fn vreinterpretq_u64_f64(a: float64x2_t) -> uint64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_f32)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_p64_f32(a: float32x4_t) -> poly64x2_t {
-    let a: float32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: poly64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_f64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_f64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_f32_f64(a: float64x1_t) -> float32x2_t {
+pub fn vreinterpretq_p64_f64(a: float64x2_t) -> poly64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_f64)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_f32_f64(a: float64x1_t) -> float32x2_t {
-    unsafe {
-        let ret_val: float32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_f64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_s64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_s8_f64(a: float64x1_t) -> int8x8_t {
+pub fn vreinterpret_f64_s64(a: int64x1_t) -> float64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_f64)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_s8_f64(a: float64x1_t) -> int8x8_t {
-    unsafe {
-        let ret_val: int8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_f64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_s16_f64(a: float64x1_t) -> int16x4_t {
+pub fn vreinterpret_p64_s64(a: int64x1_t) -> poly64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_f64)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_s16_f64(a: float64x1_t) -> int16x4_t {
-    unsafe {
-        let ret_val: int16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_f64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_s64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_s32_f64(a: float64x1_t) -> int32x2_t {
+pub fn vreinterpretq_f64_s64(a: int64x2_t) -> float64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_f64)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_s32_f64(a: float64x1_t) -> int32x2_t {
-    unsafe {
-        let ret_val: int32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_f64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_s64_f64(a: float64x1_t) -> int64x1_t {
+pub fn vreinterpretq_p64_s64(a: int64x2_t) -> poly64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_f64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_u64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_u8_f64(a: float64x1_t) -> uint8x8_t {
+pub fn vreinterpret_f64_u64(a: uint64x1_t) -> float64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_f64)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_u8_f64(a: float64x1_t) -> uint8x8_t {
-    unsafe {
-        let ret_val: uint8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_f64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_u16_f64(a: float64x1_t) -> uint16x4_t {
+pub fn vreinterpret_p64_u64(a: uint64x1_t) -> poly64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_f64)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_u16_f64(a: float64x1_t) -> uint16x4_t {
-    unsafe {
-        let ret_val: uint16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_f64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_u64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_u32_f64(a: float64x1_t) -> uint32x2_t {
+pub fn vreinterpretq_f64_u64(a: uint64x2_t) -> float64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_f64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_u32_f64(a: float64x1_t) -> uint32x2_t {
-    unsafe {
-        let ret_val: uint32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vreinterpretq_p64_u64(a: uint64x2_t) -> poly64x2_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_f64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_p64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_u64_f64(a: float64x1_t) -> uint64x1_t {
+pub fn vreinterpret_f64_p64(a: poly64x1_t) -> float64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_f64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_p64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_p8_f64(a: float64x1_t) -> poly8x8_t {
+pub fn vreinterpret_s64_p64(a: poly64x1_t) -> int64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_f64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_p64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_p8_f64(a: float64x1_t) -> poly8x8_t {
-    unsafe {
-        let ret_val: poly8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+pub fn vreinterpret_u64_p64(a: poly64x1_t) -> uint64x1_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_f64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_p64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_p16_f64(a: float64x1_t) -> poly16x4_t {
+pub fn vreinterpretq_f64_p64(a: poly64x2_t) -> float64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_f64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_p16_f64(a: float64x1_t) -> poly16x4_t {
-    unsafe {
-        let ret_val: poly16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+pub fn vreinterpretq_s64_p64(a: poly64x2_t) -> int64x2_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_f64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_p64_f64(a: float64x1_t) -> poly64x1_t {
+pub fn vreinterpretq_u64_p64(a: poly64x2_t) -> uint64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_f64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_p128)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_p128_f64(a: float64x2_t) -> p128 {
+pub fn vreinterpretq_f64_p128(a: p128) -> float64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_f64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_p128)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_p128_f64(a: float64x2_t) -> p128 {
-    let a: float64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe { transmute(a) }
+pub fn vreinterpretq_f64_p128(a: p128) -> float64x2_t {
+    unsafe {
+        let ret_val: float64x2_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_f64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_f32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_f32_f64(a: float64x2_t) -> float32x4_t {
+pub fn vreinterpret_f64_f32(a: float32x2_t) -> float64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_f64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_f32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_f32_f64(a: float64x2_t) -> float32x4_t {
-    let a: float64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
+pub fn vreinterpret_f64_f32(a: float32x2_t) -> float64x1_t {
     unsafe {
-        let ret_val: float32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let a: float32x2_t = simd_shuffle!(a, a, [1, 0]);
+        transmute(a)
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_f64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_f32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_s8_f64(a: float64x2_t) -> int8x16_t {
+pub fn vreinterpret_p64_f32(a: float32x2_t) -> poly64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_f64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_f32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_s8_f64(a: float64x2_t) -> int8x16_t {
-    let a: float64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
+pub fn vreinterpret_p64_f32(a: float32x2_t) -> poly64x1_t {
     unsafe {
-        let ret_val: int8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
+        let a: float32x2_t = simd_shuffle!(a, a, [1, 0]);
+        transmute(a)
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_f64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_f32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_s16_f64(a: float64x2_t) -> int16x8_t {
+pub fn vreinterpretq_f64_f32(a: float32x4_t) -> float64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_f64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_f32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_s16_f64(a: float64x2_t) -> int16x8_t {
-    let a: float64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
+pub fn vreinterpretq_f64_f32(a: float32x4_t) -> float64x2_t {
     unsafe {
-        let ret_val: int16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: float64x2_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_f64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_f32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_s32_f64(a: float64x2_t) -> int32x4_t {
+pub fn vreinterpretq_p64_f32(a: float32x4_t) -> poly64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_f64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_f32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_s32_f64(a: float64x2_t) -> int32x4_t {
-    let a: float64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
+pub fn vreinterpretq_p64_f32(a: float32x4_t) -> poly64x2_t {
     unsafe {
-        let ret_val: int32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: poly64x2_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_f64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_f64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_s64_f64(a: float64x2_t) -> int64x2_t {
+pub fn vreinterpret_f32_f64(a: float64x1_t) -> float32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_f64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_f64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_s64_f64(a: float64x2_t) -> int64x2_t {
-    let a: float64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
+pub fn vreinterpret_f32_f64(a: float64x1_t) -> float32x2_t {
     unsafe {
-        let ret_val: int64x2_t = transmute(a);
+        let ret_val: float32x2_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_f64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_f64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_u8_f64(a: float64x2_t) -> uint8x16_t {
+pub fn vreinterpret_s8_f64(a: float64x1_t) -> int8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_f64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_f64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_u8_f64(a: float64x2_t) -> uint8x16_t {
-    let a: float64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
+pub fn vreinterpret_s8_f64(a: float64x1_t) -> int8x8_t {
     unsafe {
-        let ret_val: uint8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
+        let ret_val: int8x8_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_f64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_f64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_u16_f64(a: float64x2_t) -> uint16x8_t {
+pub fn vreinterpret_s16_f64(a: float64x1_t) -> int16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_f64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_f64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_u16_f64(a: float64x2_t) -> uint16x8_t {
-    let a: float64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
+pub fn vreinterpret_s16_f64(a: float64x1_t) -> int16x4_t {
     unsafe {
-        let ret_val: uint16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let ret_val: int16x4_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_f64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_f64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_u32_f64(a: float64x2_t) -> uint32x4_t {
+pub fn vreinterpret_s32_f64(a: float64x1_t) -> int32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_f64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_f64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_u32_f64(a: float64x2_t) -> uint32x4_t {
-    let a: float64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
+pub fn vreinterpret_s32_f64(a: float64x1_t) -> int32x2_t {
     unsafe {
-        let ret_val: uint32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let ret_val: int32x2_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_f64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_f64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_u64_f64(a: float64x2_t) -> uint64x2_t {
+pub fn vreinterpret_u8_f64(a: float64x1_t) -> uint8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_f64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_f64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_u64_f64(a: float64x2_t) -> uint64x2_t {
-    let a: float64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
+pub fn vreinterpret_u8_f64(a: float64x1_t) -> uint8x8_t {
     unsafe {
-        let ret_val: uint64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let ret_val: uint8x8_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_f64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_f64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_p8_f64(a: float64x2_t) -> poly8x16_t {
+pub fn vreinterpret_u16_f64(a: float64x1_t) -> uint16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_f64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_f64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_p8_f64(a: float64x2_t) -> poly8x16_t {
-    let a: float64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
+pub fn vreinterpret_u16_f64(a: float64x1_t) -> uint16x4_t {
     unsafe {
-        let ret_val: poly8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
+        let ret_val: uint16x4_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_f64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_f64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_p16_f64(a: float64x2_t) -> poly16x8_t {
+pub fn vreinterpret_u32_f64(a: float64x1_t) -> uint32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_f64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_f64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_p16_f64(a: float64x2_t) -> poly16x8_t {
-    let a: float64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
+pub fn vreinterpret_u32_f64(a: float64x1_t) -> uint32x2_t {
     unsafe {
-        let ret_val: poly16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let ret_val: uint32x2_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_f64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_f64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_p64_f64(a: float64x2_t) -> poly64x2_t {
+pub fn vreinterpret_p8_f64(a: float64x1_t) -> poly8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_f64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_f64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_p64_f64(a: float64x2_t) -> poly64x2_t {
-    let a: float64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
+pub fn vreinterpret_p8_f64(a: float64x1_t) -> poly8x8_t {
     unsafe {
-        let ret_val: poly64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let ret_val: poly8x8_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_f64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_f64_s8(a: int8x8_t) -> float64x1_t {
+pub fn vreinterpret_p16_f64(a: float64x1_t) -> poly16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_f64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_f64_s8(a: int8x8_t) -> float64x1_t {
-    let a: int8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe { transmute(a) }
+pub fn vreinterpret_p16_f64(a: float64x1_t) -> poly16x4_t {
+    unsafe {
+        let ret_val: poly16x4_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_f64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_f64_s8(a: int8x16_t) -> float64x2_t {
+pub fn vreinterpretq_p128_f64(a: float64x2_t) -> p128 {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_f64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_f64_s8(a: int8x16_t) -> float64x2_t {
-    let a: int8x16_t =
-        unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
+pub fn vreinterpretq_p128_f64(a: float64x2_t) -> p128 {
     unsafe {
-        let ret_val: float64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let a: float64x2_t = simd_shuffle!(a, a, [1, 0]);
+        transmute(a)
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_f64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_f64_s16(a: int16x4_t) -> float64x1_t {
+pub fn vreinterpretq_f32_f64(a: float64x2_t) -> float32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_f64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_f64_s16(a: int16x4_t) -> float64x1_t {
-    let a: int16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe { transmute(a) }
+pub fn vreinterpretq_f32_f64(a: float64x2_t) -> float32x4_t {
+    unsafe {
+        let a: float64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let ret_val: float32x4_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_f64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_f64_s16(a: int16x8_t) -> float64x2_t {
+pub fn vreinterpretq_s8_f64(a: float64x2_t) -> int8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_f64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_f64_s16(a: int16x8_t) -> float64x2_t {
-    let a: int16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
+pub fn vreinterpretq_s8_f64(a: float64x2_t) -> int8x16_t {
     unsafe {
-        let ret_val: float64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let a: float64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let ret_val: int8x16_t = transmute(a);
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_f64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_f64_s32(a: int32x2_t) -> float64x1_t {
+pub fn vreinterpretq_s16_f64(a: float64x2_t) -> int16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_f64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_f64_s32(a: int32x2_t) -> float64x1_t {
-    let a: int32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe { transmute(a) }
+pub fn vreinterpretq_s16_f64(a: float64x2_t) -> int16x8_t {
+    unsafe {
+        let a: float64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let ret_val: int16x8_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_f64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_f64_s32(a: int32x4_t) -> float64x2_t {
+pub fn vreinterpretq_s32_f64(a: float64x2_t) -> int32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_f64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_f64_s32(a: int32x4_t) -> float64x2_t {
-    let a: int32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
+pub fn vreinterpretq_s32_f64(a: float64x2_t) -> int32x4_t {
     unsafe {
-        let ret_val: float64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let a: float64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let ret_val: int32x4_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_f64)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_f64_s64(a: int64x1_t) -> float64x1_t {
+pub fn vreinterpretq_u8_f64(a: float64x2_t) -> uint8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_f64)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_p64_s64(a: int64x1_t) -> poly64x1_t {
-    unsafe { transmute(a) }
+pub fn vreinterpretq_u8_f64(a: float64x2_t) -> uint8x16_t {
+    unsafe {
+        let a: float64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let ret_val: uint8x16_t = transmute(a);
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
+    }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_f64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_f64_s64(a: int64x2_t) -> float64x2_t {
+pub fn vreinterpretq_u16_f64(a: float64x2_t) -> uint16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_f64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_f64_s64(a: int64x2_t) -> float64x2_t {
-    let a: int64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
+pub fn vreinterpretq_u16_f64(a: float64x2_t) -> uint16x8_t {
     unsafe {
-        let ret_val: float64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let a: float64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let ret_val: uint16x8_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_f64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_p64_s64(a: int64x2_t) -> poly64x2_t {
+pub fn vreinterpretq_u32_f64(a: float64x2_t) -> uint32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_f64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_p64_s64(a: int64x2_t) -> poly64x2_t {
-    let a: int64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
+pub fn vreinterpretq_u32_f64(a: float64x2_t) -> uint32x4_t {
     unsafe {
-        let ret_val: poly64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let a: float64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let ret_val: uint32x4_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_f64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_f64_u8(a: uint8x8_t) -> float64x1_t {
+pub fn vreinterpretq_p8_f64(a: float64x2_t) -> poly8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_f64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_f64_u8(a: uint8x8_t) -> float64x1_t {
-    let a: uint8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe { transmute(a) }
+pub fn vreinterpretq_p8_f64(a: float64x2_t) -> poly8x16_t {
+    unsafe {
+        let a: float64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let ret_val: poly8x16_t = transmute(a);
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
+    }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_f64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_f64_u8(a: uint8x16_t) -> float64x2_t {
+pub fn vreinterpretq_p16_f64(a: float64x2_t) -> poly16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_f64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_f64_u8(a: uint8x16_t) -> float64x2_t {
-    let a: uint8x16_t =
-        unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
+pub fn vreinterpretq_p16_f64(a: float64x2_t) -> poly16x8_t {
     unsafe {
-        let ret_val: float64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let a: float64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let ret_val: poly16x8_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_s8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_f64_u16(a: uint16x4_t) -> float64x1_t {
+pub fn vreinterpret_f64_s8(a: int8x8_t) -> float64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_s8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_f64_u16(a: uint16x4_t) -> float64x1_t {
-    let a: uint16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe { transmute(a) }
+pub fn vreinterpret_f64_s8(a: int8x8_t) -> float64x1_t {
+    unsafe {
+        let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        transmute(a)
+    }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_s8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_f64_u16(a: uint16x8_t) -> float64x2_t {
+pub fn vreinterpretq_f64_s8(a: int8x16_t) -> float64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_s8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_f64_u16(a: uint16x8_t) -> float64x2_t {
-    let a: uint16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
+pub fn vreinterpretq_f64_s8(a: int8x16_t) -> float64x2_t {
     unsafe {
+        let a: int8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
         let ret_val: float64x2_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_s16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_f64_u32(a: uint32x2_t) -> float64x1_t {
+pub fn vreinterpret_f64_s16(a: int16x4_t) -> float64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_s16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_f64_u32(a: uint32x2_t) -> float64x1_t {
-    let a: uint32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe { transmute(a) }
+pub fn vreinterpret_f64_s16(a: int16x4_t) -> float64x1_t {
+    unsafe {
+        let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        transmute(a)
+    }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_s16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_f64_u32(a: uint32x4_t) -> float64x2_t {
+pub fn vreinterpretq_f64_s16(a: int16x8_t) -> float64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_s16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_f64_u32(a: uint32x4_t) -> float64x2_t {
-    let a: uint32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
+pub fn vreinterpretq_f64_s16(a: int16x8_t) -> float64x2_t {
     unsafe {
+        let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
         let ret_val: float64x2_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_s32)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_f64_u64(a: uint64x1_t) -> float64x1_t {
+pub fn vreinterpret_f64_s32(a: int32x2_t) -> float64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_s32)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_p64_u64(a: uint64x1_t) -> poly64x1_t {
-    unsafe { transmute(a) }
+pub fn vreinterpret_f64_s32(a: int32x2_t) -> float64x1_t {
+    unsafe {
+        let a: int32x2_t = simd_shuffle!(a, a, [1, 0]);
+        transmute(a)
+    }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_s32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_f64_u64(a: uint64x2_t) -> float64x2_t {
+pub fn vreinterpretq_f64_s32(a: int32x4_t) -> float64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_s32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_f64_u64(a: uint64x2_t) -> float64x2_t {
-    let a: uint64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
+pub fn vreinterpretq_f64_s32(a: int32x4_t) -> float64x2_t {
     unsafe {
+        let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
         let ret_val: float64x2_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_u8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_p64_u64(a: uint64x2_t) -> poly64x2_t {
+pub fn vreinterpret_f64_u8(a: uint8x8_t) -> float64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_u8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_p64_u64(a: uint64x2_t) -> poly64x2_t {
-    let a: uint64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
+pub fn vreinterpret_f64_u8(a: uint8x8_t) -> float64x1_t {
     unsafe {
-        let ret_val: poly64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        transmute(a)
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_u8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_f64_p8(a: poly8x8_t) -> float64x1_t {
+pub fn vreinterpretq_f64_u8(a: uint8x16_t) -> float64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_u8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_f64_p8(a: poly8x8_t) -> float64x1_t {
-    let a: poly8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe { transmute(a) }
+pub fn vreinterpretq_f64_u8(a: uint8x16_t) -> float64x2_t {
+    unsafe {
+        let a: uint8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: float64x2_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_u16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_f64_p8(a: poly8x16_t) -> float64x2_t {
+pub fn vreinterpret_f64_u16(a: uint16x4_t) -> float64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_u16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_f64_p8(a: poly8x16_t) -> float64x2_t {
-    let a: poly8x16_t =
-        unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
+pub fn vreinterpret_f64_u16(a: uint16x4_t) -> float64x1_t {
     unsafe {
-        let ret_val: float64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        transmute(a)
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_u16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_f64_p16(a: poly16x4_t) -> float64x1_t {
+pub fn vreinterpretq_f64_u16(a: uint16x8_t) -> float64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_u16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_f64_p16(a: poly16x4_t) -> float64x1_t {
-    let a: poly16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe { transmute(a) }
+pub fn vreinterpretq_f64_u16(a: uint16x8_t) -> float64x2_t {
+    unsafe {
+        let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: float64x2_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_u32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_f64_p16(a: poly16x8_t) -> float64x2_t {
+pub fn vreinterpret_f64_u32(a: uint32x2_t) -> float64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_u32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_f64_p16(a: poly16x8_t) -> float64x2_t {
-    let a: poly16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
+pub fn vreinterpret_f64_u32(a: uint32x2_t) -> float64x1_t {
     unsafe {
-        let ret_val: float64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]);
+        transmute(a)
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_p64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_u32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_f32_p64(a: poly64x1_t) -> float32x2_t {
+pub fn vreinterpretq_f64_u32(a: uint32x4_t) -> float64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_p64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_u32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_f32_p64(a: poly64x1_t) -> float32x2_t {
+pub fn vreinterpretq_f64_u32(a: uint32x4_t) -> float64x2_t {
     unsafe {
-        let ret_val: float32x2_t = transmute(a);
+        let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: float64x2_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_p64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_p8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_f64_p64(a: poly64x1_t) -> float64x1_t {
+pub fn vreinterpret_f64_p8(a: poly8x8_t) -> float64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_p64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_p8)"]
 #[inline]
+#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_s64_p64(a: poly64x1_t) -> int64x1_t {
-    unsafe { transmute(a) }
+pub fn vreinterpret_f64_p8(a: poly8x8_t) -> float64x1_t {
+    unsafe {
+        let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        transmute(a)
+    }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_p64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_p8)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpret_u64_p64(a: poly64x1_t) -> uint64x1_t {
+pub fn vreinterpretq_f64_p8(a: poly8x16_t) -> float64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_p8)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub fn vreinterpretq_f64_p8(a: poly8x16_t) -> float64x2_t {
+    unsafe {
+        let a: poly8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: float64x2_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_p16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_f32_p64(a: poly64x2_t) -> float32x4_t {
+pub fn vreinterpret_f64_p16(a: poly16x4_t) -> float64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f64_p16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_f32_p64(a: poly64x2_t) -> float32x4_t {
-    let a: poly64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
+pub fn vreinterpret_f64_p16(a: poly16x4_t) -> float64x1_t {
     unsafe {
-        let ret_val: float32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        transmute(a)
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_p64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_p16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_f64_p64(a: poly64x2_t) -> float64x2_t {
+pub fn vreinterpretq_f64_p16(a: poly16x8_t) -> float64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_p64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f64_p16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_f64_p64(a: poly64x2_t) -> float64x2_t {
-    let a: poly64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
+pub fn vreinterpretq_f64_p16(a: poly16x8_t) -> float64x2_t {
     unsafe {
+        let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
         let ret_val: float64x2_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_p64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_s64_p64(a: poly64x2_t) -> int64x2_t {
+pub fn vreinterpret_f32_p64(a: poly64x1_t) -> float32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_p64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_s64_p64(a: poly64x2_t) -> int64x2_t {
-    let a: poly64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
+pub fn vreinterpret_f32_p64(a: poly64x1_t) -> float32x2_t {
     unsafe {
-        let ret_val: int64x2_t = transmute(a);
+        let ret_val: float32x2_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_u64_p64(a: poly64x2_t) -> uint64x2_t {
+pub fn vreinterpretq_f32_p64(a: poly64x2_t) -> float32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
-pub fn vreinterpretq_u64_p64(a: poly64x2_t) -> uint64x2_t {
-    let a: poly64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
+pub fn vreinterpretq_f32_p64(a: poly64x2_t) -> float32x4_t {
     unsafe {
-        let ret_val: uint64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let ret_val: float32x4_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Floating-point round to 32-bit integer, using current rounding mode"]
@@ -22959,7 +18760,7 @@ pub fn vrnd32x_f64(a: float64x1_t) -> float64x1_t {
         )]
         fn _vrnd32x_f64(a: f64) -> f64;
     }
-    unsafe { transmute(_vrnd32x_f64(simd_extract!(a, 0))) }
+    unsafe { transmute(_vrnd32x_f64(vget_lane_f64::<0>(a))) }
 }
 #[doc = "Floating-point round to 32-bit integer toward zero"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd32z_f32)"]
@@ -23023,7 +18824,7 @@ pub fn vrnd32z_f64(a: float64x1_t) -> float64x1_t {
         )]
         fn _vrnd32z_f64(a: f64) -> f64;
     }
-    unsafe { transmute(_vrnd32z_f64(simd_extract!(a, 0))) }
+    unsafe { transmute(_vrnd32z_f64(vget_lane_f64::<0>(a))) }
 }
 #[doc = "Floating-point round to 64-bit integer, using current rounding mode"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64x_f32)"]
@@ -23087,7 +18888,7 @@ pub fn vrnd64x_f64(a: float64x1_t) -> float64x1_t {
         )]
         fn _vrnd64x_f64(a: f64) -> f64;
     }
-    unsafe { transmute(_vrnd64x_f64(simd_extract!(a, 0))) }
+    unsafe { transmute(_vrnd64x_f64(vget_lane_f64::<0>(a))) }
 }
 #[doc = "Floating-point round to 64-bit integer toward zero"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64z_f32)"]
@@ -23151,13 +18952,13 @@ pub fn vrnd64z_f64(a: float64x1_t) -> float64x1_t {
         )]
         fn _vrnd64z_f64(a: f64) -> f64;
     }
-    unsafe { transmute(_vrnd64z_f64(simd_extract!(a, 0))) }
+    unsafe { transmute(_vrnd64z_f64(vget_lane_f64::<0>(a))) }
 }
 #[doc = "Floating-point round to integral, toward zero"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd_f16)"]
 #[inline]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(frintz))]
 pub fn vrnd_f16(a: float16x4_t) -> float16x4_t {
@@ -23167,7 +18968,7 @@ pub fn vrnd_f16(a: float16x4_t) -> float16x4_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndq_f16)"]
 #[inline]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(frintz))]
 pub fn vrndq_f16(a: float16x8_t) -> float16x8_t {
@@ -23213,7 +19014,7 @@ pub fn vrndq_f64(a: float64x2_t) -> float64x2_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnda_f16)"]
 #[inline]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(frinta))]
 pub fn vrnda_f16(a: float16x4_t) -> float16x4_t {
@@ -23223,7 +19024,7 @@ pub fn vrnda_f16(a: float16x4_t) -> float16x4_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndaq_f16)"]
 #[inline]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(frinta))]
 pub fn vrndaq_f16(a: float16x8_t) -> float16x8_t {
@@ -23289,7 +19090,7 @@ pub fn vrndh_f16(a: f16) -> f16 {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndi_f16)"]
 #[inline]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(frinti))]
 pub fn vrndi_f16(a: float16x4_t) -> float16x4_t {
@@ -23306,7 +19107,7 @@ pub fn vrndi_f16(a: float16x4_t) -> float16x4_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndiq_f16)"]
 #[inline]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(frinti))]
 pub fn vrndiq_f16(a: float16x8_t) -> float16x8_t {
@@ -23404,7 +19205,7 @@ pub fn vrndih_f16(a: f16) -> f16 {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndm_f16)"]
 #[inline]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(frintm))]
 pub fn vrndm_f16(a: float16x4_t) -> float16x4_t {
@@ -23414,7 +19215,7 @@ pub fn vrndm_f16(a: float16x4_t) -> float16x4_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndmq_f16)"]
 #[inline]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(frintm))]
 pub fn vrndmq_f16(a: float16x8_t) -> float16x8_t {
@@ -23535,7 +19336,7 @@ pub fn vrndns_f32(a: f32) -> f32 {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndp_f16)"]
 #[inline]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(frintp))]
 pub fn vrndp_f16(a: float16x4_t) -> float16x4_t {
@@ -23545,7 +19346,7 @@ pub fn vrndp_f16(a: float16x4_t) -> float16x4_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndpq_f16)"]
 #[inline]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(frintp))]
 pub fn vrndpq_f16(a: float16x8_t) -> float16x8_t {
@@ -23601,7 +19402,7 @@ pub fn vrndph_f16(a: f16) -> f16 {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndx_f16)"]
 #[inline]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(frintx))]
 pub fn vrndx_f16(a: float16x4_t) -> float16x4_t {
@@ -23611,7 +19412,7 @@ pub fn vrndx_f16(a: float16x4_t) -> float16x4_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrndxq_f16)"]
 #[inline]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(frintx))]
 pub fn vrndxq_f16(a: float16x8_t) -> float16x8_t {
@@ -23721,79 +19522,67 @@ pub fn vrshrd_n_u64<const N: i32>(a: u64) -> u64 {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_high_n_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(rshrn2, N = 2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(rshrn2, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vrshrn_high_n_s16<const N: i32>(a: int8x8_t, b: int16x8_t) -> int8x16_t {
     static_assert!(N >= 1 && N <= 8);
-    unsafe {
-        simd_shuffle!(
-            a,
-            vrshrn_n_s16::<N>(b),
-            [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
-        )
-    }
+    vcombine_s8(a, vrshrn_n_s16::<N>(b))
 }
 #[doc = "Rounding shift right narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_high_n_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(rshrn2, N = 2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(rshrn2, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vrshrn_high_n_s32<const N: i32>(a: int16x4_t, b: int32x4_t) -> int16x8_t {
     static_assert!(N >= 1 && N <= 16);
-    unsafe { simd_shuffle!(a, vrshrn_n_s32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7]) }
+    vcombine_s16(a, vrshrn_n_s32::<N>(b))
 }
 #[doc = "Rounding shift right narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_high_n_s64)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(rshrn2, N = 2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(rshrn2, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vrshrn_high_n_s64<const N: i32>(a: int32x2_t, b: int64x2_t) -> int32x4_t {
     static_assert!(N >= 1 && N <= 32);
-    unsafe { simd_shuffle!(a, vrshrn_n_s64::<N>(b), [0, 1, 2, 3]) }
+    vcombine_s32(a, vrshrn_n_s64::<N>(b))
 }
 #[doc = "Rounding shift right narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_high_n_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(rshrn2, N = 2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(rshrn2, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vrshrn_high_n_u16<const N: i32>(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t {
     static_assert!(N >= 1 && N <= 8);
-    unsafe {
-        simd_shuffle!(
-            a,
-            vrshrn_n_u16::<N>(b),
-            [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
-        )
-    }
+    vcombine_u8(a, vrshrn_n_u16::<N>(b))
 }
 #[doc = "Rounding shift right narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_high_n_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(rshrn2, N = 2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(rshrn2, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vrshrn_high_n_u32<const N: i32>(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t {
     static_assert!(N >= 1 && N <= 16);
-    unsafe { simd_shuffle!(a, vrshrn_n_u32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7]) }
+    vcombine_u16(a, vrshrn_n_u32::<N>(b))
 }
 #[doc = "Rounding shift right narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrshrn_high_n_u64)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(rshrn2, N = 2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(rshrn2, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vrshrn_high_n_u64<const N: i32>(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t {
     static_assert!(N >= 1 && N <= 32);
-    unsafe { simd_shuffle!(a, vrshrn_n_u64::<N>(b), [0, 1, 2, 3]) }
+    vcombine_u32(a, vrshrn_n_u64::<N>(b))
 }
 #[doc = "Reciprocal square-root estimate."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsqrte_f64)"]
@@ -23989,8 +19778,7 @@ pub fn vrsrad_n_u64<const N: i32>(a: u64, b: u64) -> u64 {
 #[cfg_attr(test, assert_instr(rsubhn2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vrsubhn_high_s16(a: int8x8_t, b: int16x8_t, c: int16x8_t) -> int8x16_t {
-    let x: int8x8_t = vrsubhn_s16(b, c);
-    unsafe { simd_shuffle!(a, x, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) }
+    vcombine_s8(a, vrsubhn_s16(b, c))
 }
 #[doc = "Rounding subtract returning high narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_s32)"]
@@ -24000,8 +19788,7 @@ pub fn vrsubhn_high_s16(a: int8x8_t, b: int16x8_t, c: int16x8_t) -> int8x16_t {
 #[cfg_attr(test, assert_instr(rsubhn2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vrsubhn_high_s32(a: int16x4_t, b: int32x4_t, c: int32x4_t) -> int16x8_t {
-    let x: int16x4_t = vrsubhn_s32(b, c);
-    unsafe { simd_shuffle!(a, x, [0, 1, 2, 3, 4, 5, 6, 7]) }
+    vcombine_s16(a, vrsubhn_s32(b, c))
 }
 #[doc = "Rounding subtract returning high narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_s64)"]
@@ -24011,8 +19798,7 @@ pub fn vrsubhn_high_s32(a: int16x4_t, b: int32x4_t, c: int32x4_t) -> int16x8_t {
 #[cfg_attr(test, assert_instr(rsubhn2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vrsubhn_high_s64(a: int32x2_t, b: int64x2_t, c: int64x2_t) -> int32x4_t {
-    let x: int32x2_t = vrsubhn_s64(b, c);
-    unsafe { simd_shuffle!(a, x, [0, 1, 2, 3]) }
+    vcombine_s32(a, vrsubhn_s64(b, c))
 }
 #[doc = "Rounding subtract returning high narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_u16)"]
@@ -24022,8 +19808,7 @@ pub fn vrsubhn_high_s64(a: int32x2_t, b: int64x2_t, c: int64x2_t) -> int32x4_t {
 #[cfg_attr(test, assert_instr(rsubhn2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vrsubhn_high_u16(a: uint8x8_t, b: uint16x8_t, c: uint16x8_t) -> uint8x16_t {
-    let x: uint8x8_t = vrsubhn_u16(b, c);
-    unsafe { simd_shuffle!(a, x, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) }
+    vcombine_u8(a, vrsubhn_u16(b, c))
 }
 #[doc = "Rounding subtract returning high narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_u32)"]
@@ -24033,8 +19818,7 @@ pub fn vrsubhn_high_u16(a: uint8x8_t, b: uint16x8_t, c: uint16x8_t) -> uint8x16_
 #[cfg_attr(test, assert_instr(rsubhn2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vrsubhn_high_u32(a: uint16x4_t, b: uint32x4_t, c: uint32x4_t) -> uint16x8_t {
-    let x: uint16x4_t = vrsubhn_u32(b, c);
-    unsafe { simd_shuffle!(a, x, [0, 1, 2, 3, 4, 5, 6, 7]) }
+    vcombine_u16(a, vrsubhn_u32(b, c))
 }
 #[doc = "Rounding subtract returning high narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_u64)"]
@@ -24044,8 +19828,7 @@ pub fn vrsubhn_high_u32(a: uint16x4_t, b: uint32x4_t, c: uint32x4_t) -> uint16x8
 #[cfg_attr(test, assert_instr(rsubhn2))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vrsubhn_high_u64(a: uint32x2_t, b: uint64x2_t, c: uint64x2_t) -> uint32x4_t {
-    let x: uint32x2_t = vrsubhn_u64(b, c);
-    unsafe { simd_shuffle!(a, x, [0, 1, 2, 3]) }
+    vcombine_u32(a, vrsubhn_u64(b, c))
 }
 #[doc = "Rounding subtract returning high narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_s16)"]
@@ -24055,8 +19838,7 @@ pub fn vrsubhn_high_u64(a: uint32x2_t, b: uint64x2_t, c: uint64x2_t) -> uint32x4
 #[cfg_attr(test, assert_instr(rsubhn))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vrsubhn_high_s16(a: int8x8_t, b: int16x8_t, c: int16x8_t) -> int8x16_t {
-    let x: int8x8_t = vrsubhn_s16(b, c);
-    unsafe { simd_shuffle!(a, x, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) }
+    vcombine_s8(a, vrsubhn_s16(b, c))
 }
 #[doc = "Rounding subtract returning high narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_s32)"]
@@ -24066,8 +19848,7 @@ pub fn vrsubhn_high_s16(a: int8x8_t, b: int16x8_t, c: int16x8_t) -> int8x16_t {
 #[cfg_attr(test, assert_instr(rsubhn))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vrsubhn_high_s32(a: int16x4_t, b: int32x4_t, c: int32x4_t) -> int16x8_t {
-    let x: int16x4_t = vrsubhn_s32(b, c);
-    unsafe { simd_shuffle!(a, x, [0, 1, 2, 3, 4, 5, 6, 7]) }
+    vcombine_s16(a, vrsubhn_s32(b, c))
 }
 #[doc = "Rounding subtract returning high narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_s64)"]
@@ -24077,8 +19858,7 @@ pub fn vrsubhn_high_s32(a: int16x4_t, b: int32x4_t, c: int32x4_t) -> int16x8_t {
 #[cfg_attr(test, assert_instr(rsubhn))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vrsubhn_high_s64(a: int32x2_t, b: int64x2_t, c: int64x2_t) -> int32x4_t {
-    let x: int32x2_t = vrsubhn_s64(b, c);
-    unsafe { simd_shuffle!(a, x, [0, 1, 2, 3]) }
+    vcombine_s32(a, vrsubhn_s64(b, c))
 }
 #[doc = "Rounding subtract returning high narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_u16)"]
@@ -24088,8 +19868,7 @@ pub fn vrsubhn_high_s64(a: int32x2_t, b: int64x2_t, c: int64x2_t) -> int32x4_t {
 #[cfg_attr(test, assert_instr(rsubhn))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vrsubhn_high_u16(a: uint8x8_t, b: uint16x8_t, c: uint16x8_t) -> uint8x16_t {
-    let x: uint8x8_t = vrsubhn_u16(b, c);
-    unsafe { simd_shuffle!(a, x, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) }
+    vcombine_u8(a, vrsubhn_u16(b, c))
 }
 #[doc = "Rounding subtract returning high narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_u32)"]
@@ -24099,8 +19878,7 @@ pub fn vrsubhn_high_u16(a: uint8x8_t, b: uint16x8_t, c: uint16x8_t) -> uint8x16_
 #[cfg_attr(test, assert_instr(rsubhn))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vrsubhn_high_u32(a: uint16x4_t, b: uint32x4_t, c: uint32x4_t) -> uint16x8_t {
-    let x: uint16x4_t = vrsubhn_u32(b, c);
-    unsafe { simd_shuffle!(a, x, [0, 1, 2, 3, 4, 5, 6, 7]) }
+    vcombine_u16(a, vrsubhn_u32(b, c))
 }
 #[doc = "Rounding subtract returning high narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_high_u64)"]
@@ -24110,8 +19888,87 @@ pub fn vrsubhn_high_u32(a: uint16x4_t, b: uint32x4_t, c: uint32x4_t) -> uint16x8
 #[cfg_attr(test, assert_instr(rsubhn))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vrsubhn_high_u64(a: uint32x2_t, b: uint64x2_t, c: uint64x2_t) -> uint32x4_t {
-    let x: uint32x2_t = vrsubhn_u64(b, c);
-    unsafe { simd_shuffle!(a, x, [0, 1, 2, 3]) }
+    vcombine_u32(a, vrsubhn_u64(b, c))
+}
+#[doc = "Multi-vector floating-point adjust exponent"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vscale_f16)"]
+#[inline]
+#[unstable(feature = "stdarch_neon_fp8", issue = "none")]
+#[target_feature(enable = "neon,fp8")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(fscale))]
+pub fn vscale_f16(vn: float16x4_t, vm: int16x4_t) -> float16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fp8.fscale.v4f16"
+        )]
+        fn _vscale_f16(vn: float16x4_t, vm: int16x4_t) -> float16x4_t;
+    }
+    unsafe { _vscale_f16(vn, vm) }
+}
+#[doc = "Multi-vector floating-point adjust exponent"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vscaleq_f16)"]
+#[inline]
+#[unstable(feature = "stdarch_neon_fp8", issue = "none")]
+#[target_feature(enable = "neon,fp8")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(fscale))]
+pub fn vscaleq_f16(vn: float16x8_t, vm: int16x8_t) -> float16x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fp8.fscale.v8f16"
+        )]
+        fn _vscaleq_f16(vn: float16x8_t, vm: int16x8_t) -> float16x8_t;
+    }
+    unsafe { _vscaleq_f16(vn, vm) }
+}
+#[doc = "Multi-vector floating-point adjust exponent"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vscale_f32)"]
+#[inline]
+#[unstable(feature = "stdarch_neon_fp8", issue = "none")]
+#[target_feature(enable = "neon,fp8")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(fscale))]
+pub fn vscale_f32(vn: float32x2_t, vm: int32x2_t) -> float32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fp8.fscale.v2f32"
+        )]
+        fn _vscale_f32(vn: float32x2_t, vm: int32x2_t) -> float32x2_t;
+    }
+    unsafe { _vscale_f32(vn, vm) }
+}
+#[doc = "Multi-vector floating-point adjust exponent"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vscaleq_f32)"]
+#[inline]
+#[unstable(feature = "stdarch_neon_fp8", issue = "none")]
+#[target_feature(enable = "neon,fp8")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(fscale))]
+pub fn vscaleq_f32(vn: float32x4_t, vm: int32x4_t) -> float32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fp8.fscale.v4f32"
+        )]
+        fn _vscaleq_f32(vn: float32x4_t, vm: int32x4_t) -> float32x4_t;
+    }
+    unsafe { _vscaleq_f32(vn, vm) }
+}
+#[doc = "Multi-vector floating-point adjust exponent"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vscaleq_f64)"]
+#[inline]
+#[unstable(feature = "stdarch_neon_fp8", issue = "none")]
+#[target_feature(enable = "neon,fp8")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(fscale))]
+pub fn vscaleq_f64(vn: float64x2_t, vm: int64x2_t) -> float64x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fp8.fscale.v2f64"
+        )]
+        fn _vscaleq_f64(vn: float64x2_t, vm: int64x2_t) -> float64x2_t;
+    }
+    unsafe { _vscaleq_f64(vn, vm) }
 }
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_f64)"]
@@ -24221,163 +20078,139 @@ pub fn vshld_u64(a: u64, b: i64) -> u64 {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_high_n_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sshll2, N = 2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sshll2, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vshll_high_n_s8<const N: i32>(a: int8x16_t) -> int16x8_t {
     static_assert!(N >= 0 && N <= 8);
-    unsafe {
-        let b: int8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
-        vshll_n_s8::<N>(b)
-    }
+    let b = vget_high_s8(a);
+    vshll_n_s8::<N>(b)
 }
 #[doc = "Signed shift left long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_high_n_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sshll2, N = 2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sshll2, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vshll_high_n_s16<const N: i32>(a: int16x8_t) -> int32x4_t {
     static_assert!(N >= 0 && N <= 16);
-    unsafe {
-        let b: int16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
-        vshll_n_s16::<N>(b)
-    }
+    let b = vget_high_s16(a);
+    vshll_n_s16::<N>(b)
 }
 #[doc = "Signed shift left long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_high_n_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(sshll2, N = 2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(sshll2, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vshll_high_n_s32<const N: i32>(a: int32x4_t) -> int64x2_t {
     static_assert!(N >= 0 && N <= 32);
-    unsafe {
-        let b: int32x2_t = simd_shuffle!(a, a, [2, 3]);
-        vshll_n_s32::<N>(b)
-    }
+    let b = vget_high_s32(a);
+    vshll_n_s32::<N>(b)
 }
 #[doc = "Signed shift left long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_high_n_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ushll2, N = 2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(ushll2, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vshll_high_n_u8<const N: i32>(a: uint8x16_t) -> uint16x8_t {
     static_assert!(N >= 0 && N <= 8);
-    unsafe {
-        let b: uint8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
-        vshll_n_u8::<N>(b)
-    }
+    let b: uint8x8_t = vget_high_u8(a);
+    vshll_n_u8::<N>(b)
 }
 #[doc = "Signed shift left long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_high_n_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ushll2, N = 2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(ushll2, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vshll_high_n_u16<const N: i32>(a: uint16x8_t) -> uint32x4_t {
     static_assert!(N >= 0 && N <= 16);
-    unsafe {
-        let b: uint16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
-        vshll_n_u16::<N>(b)
-    }
+    let b: uint16x4_t = vget_high_u16(a);
+    vshll_n_u16::<N>(b)
 }
 #[doc = "Signed shift left long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshll_high_n_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ushll2, N = 2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(ushll2, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vshll_high_n_u32<const N: i32>(a: uint32x4_t) -> uint64x2_t {
     static_assert!(N >= 0 && N <= 32);
-    unsafe {
-        let b: uint32x2_t = simd_shuffle!(a, a, [2, 3]);
-        vshll_n_u32::<N>(b)
-    }
+    let b: uint32x2_t = vget_high_u32(a);
+    vshll_n_u32::<N>(b)
 }
 #[doc = "Shift right narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_high_n_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(shrn2, N = 2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(shrn2, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vshrn_high_n_s16<const N: i32>(a: int8x8_t, b: int16x8_t) -> int8x16_t {
     static_assert!(N >= 1 && N <= 8);
-    unsafe {
-        simd_shuffle!(
-            a,
-            vshrn_n_s16::<N>(b),
-            [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
-        )
-    }
+    vcombine_s8(a, vshrn_n_s16::<N>(b))
 }
 #[doc = "Shift right narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_high_n_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(shrn2, N = 2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(shrn2, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vshrn_high_n_s32<const N: i32>(a: int16x4_t, b: int32x4_t) -> int16x8_t {
     static_assert!(N >= 1 && N <= 16);
-    unsafe { simd_shuffle!(a, vshrn_n_s32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7]) }
+    vcombine_s16(a, vshrn_n_s32::<N>(b))
 }
 #[doc = "Shift right narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_high_n_s64)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(shrn2, N = 2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(shrn2, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vshrn_high_n_s64<const N: i32>(a: int32x2_t, b: int64x2_t) -> int32x4_t {
     static_assert!(N >= 1 && N <= 32);
-    unsafe { simd_shuffle!(a, vshrn_n_s64::<N>(b), [0, 1, 2, 3]) }
+    vcombine_s32(a, vshrn_n_s64::<N>(b))
 }
 #[doc = "Shift right narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_high_n_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(shrn2, N = 2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(shrn2, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vshrn_high_n_u16<const N: i32>(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t {
     static_assert!(N >= 1 && N <= 8);
-    unsafe {
-        simd_shuffle!(
-            a,
-            vshrn_n_u16::<N>(b),
-            [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
-        )
-    }
+    vcombine_u8(a, vshrn_n_u16::<N>(b))
 }
 #[doc = "Shift right narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_high_n_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(shrn2, N = 2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(shrn2, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vshrn_high_n_u32<const N: i32>(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t {
     static_assert!(N >= 1 && N <= 16);
-    unsafe { simd_shuffle!(a, vshrn_n_u32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7]) }
+    vcombine_u16(a, vshrn_n_u32::<N>(b))
 }
 #[doc = "Shift right narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshrn_high_n_u64)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(shrn2, N = 2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(shrn2, N = 2))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vshrn_high_n_u64<const N: i32>(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t {
     static_assert!(N >= 1 && N <= 32);
-    unsafe { simd_shuffle!(a, vshrn_n_u64::<N>(b), [0, 1, 2, 3]) }
+    vcombine_u32(a, vshrn_n_u64::<N>(b))
 }
 #[doc = "Shift Left and Insert (immediate)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_s8)"]
@@ -24986,7 +20819,7 @@ pub fn vsqaddq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t {
 #[cfg_attr(test, assert_instr(usqadd))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vsqaddb_u8(a: u8, b: i8) -> u8 {
-    unsafe { simd_extract!(vsqadd_u8(vdup_n_u8(a), vdup_n_s8(b)), 0) }
+    vget_lane_u8::<0>(vsqadd_u8(vdup_n_u8(a), vdup_n_s8(b)))
 }
 #[doc = "Unsigned saturating accumulate of signed value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqaddh_u16)"]
@@ -24995,7 +20828,7 @@ pub fn vsqaddb_u8(a: u8, b: i8) -> u8 {
 #[cfg_attr(test, assert_instr(usqadd))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vsqaddh_u16(a: u16, b: i16) -> u16 {
-    unsafe { simd_extract!(vsqadd_u16(vdup_n_u16(a), vdup_n_s16(b)), 0) }
+    vget_lane_u16::<0>(vsqadd_u16(vdup_n_u16(a), vdup_n_s16(b)))
 }
 #[doc = "Unsigned saturating accumulate of signed value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsqaddd_u64)"]
@@ -25034,7 +20867,7 @@ pub fn vsqadds_u32(a: u32, b: i32) -> u32 {
 #[inline]
 #[cfg_attr(test, assert_instr(fsqrt))]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vsqrt_f16(a: float16x4_t) -> float16x4_t {
     unsafe { simd_fsqrt(a) }
@@ -25044,7 +20877,7 @@ pub fn vsqrt_f16(a: float16x4_t) -> float16x4_t {
 #[inline]
 #[cfg_attr(test, assert_instr(fsqrt))]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vsqrtq_f16(a: float16x8_t) -> float16x8_t {
     unsafe { simd_fsqrt(a) }
@@ -25104,14 +20937,7 @@ pub fn vsqrth_f16(a: f16) -> f16 {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vsri_n_s8<const N: i32>(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     static_assert!(N >= 1 && N <= 8);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.vsri.v8i8"
-        )]
-        fn _vsri_n_s8(a: int8x8_t, b: int8x8_t, n: i32) -> int8x8_t;
-    }
-    unsafe { _vsri_n_s8(a, b, N) }
+    unsafe { super::shift_right_and_insert!(u8, 8, N, a, b) }
 }
 #[doc = "Shift Right and Insert (immediate)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_s8)"]
@@ -25122,14 +20948,7 @@ pub fn vsri_n_s8<const N: i32>(a: int8x8_t, b: int8x8_t) -> int8x8_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vsriq_n_s8<const N: i32>(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     static_assert!(N >= 1 && N <= 8);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.vsri.v16i8"
-        )]
-        fn _vsriq_n_s8(a: int8x16_t, b: int8x16_t, n: i32) -> int8x16_t;
-    }
-    unsafe { _vsriq_n_s8(a, b, N) }
+    unsafe { super::shift_right_and_insert!(u8, 16, N, a, b) }
 }
 #[doc = "Shift Right and Insert (immediate)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_s16)"]
@@ -25140,14 +20959,7 @@ pub fn vsriq_n_s8<const N: i32>(a: int8x16_t, b: int8x16_t) -> int8x16_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vsri_n_s16<const N: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     static_assert!(N >= 1 && N <= 16);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.vsri.v4i16"
-        )]
-        fn _vsri_n_s16(a: int16x4_t, b: int16x4_t, n: i32) -> int16x4_t;
-    }
-    unsafe { _vsri_n_s16(a, b, N) }
+    unsafe { super::shift_right_and_insert!(u16, 4, N, a, b) }
 }
 #[doc = "Shift Right and Insert (immediate)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_s16)"]
@@ -25158,14 +20970,7 @@ pub fn vsri_n_s16<const N: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vsriq_n_s16<const N: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     static_assert!(N >= 1 && N <= 16);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.vsri.v8i16"
-        )]
-        fn _vsriq_n_s16(a: int16x8_t, b: int16x8_t, n: i32) -> int16x8_t;
-    }
-    unsafe { _vsriq_n_s16(a, b, N) }
+    unsafe { super::shift_right_and_insert!(u16, 8, N, a, b) }
 }
 #[doc = "Shift Right and Insert (immediate)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_s32)"]
@@ -25176,14 +20981,7 @@ pub fn vsriq_n_s16<const N: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vsri_n_s32<const N: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     static_assert!(N >= 1 && N <= 32);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.vsri.v2i32"
-        )]
-        fn _vsri_n_s32(a: int32x2_t, b: int32x2_t, n: i32) -> int32x2_t;
-    }
-    unsafe { _vsri_n_s32(a, b, N) }
+    unsafe { super::shift_right_and_insert!(u32, 2, N, a, b) }
 }
 #[doc = "Shift Right and Insert (immediate)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_s32)"]
@@ -25194,14 +20992,7 @@ pub fn vsri_n_s32<const N: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vsriq_n_s32<const N: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     static_assert!(N >= 1 && N <= 32);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.vsri.v4i32"
-        )]
-        fn _vsriq_n_s32(a: int32x4_t, b: int32x4_t, n: i32) -> int32x4_t;
-    }
-    unsafe { _vsriq_n_s32(a, b, N) }
+    unsafe { super::shift_right_and_insert!(u32, 4, N, a, b) }
 }
 #[doc = "Shift Right and Insert (immediate)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_s64)"]
@@ -25212,14 +21003,7 @@ pub fn vsriq_n_s32<const N: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vsri_n_s64<const N: i32>(a: int64x1_t, b: int64x1_t) -> int64x1_t {
     static_assert!(N >= 1 && N <= 64);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.vsri.v1i64"
-        )]
-        fn _vsri_n_s64(a: int64x1_t, b: int64x1_t, n: i32) -> int64x1_t;
-    }
-    unsafe { _vsri_n_s64(a, b, N) }
+    unsafe { super::shift_right_and_insert!(u64, 1, N, a, b) }
 }
 #[doc = "Shift Right and Insert (immediate)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_s64)"]
@@ -25230,14 +21014,7 @@ pub fn vsri_n_s64<const N: i32>(a: int64x1_t, b: int64x1_t) -> int64x1_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vsriq_n_s64<const N: i32>(a: int64x2_t, b: int64x2_t) -> int64x2_t {
     static_assert!(N >= 1 && N <= 64);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.vsri.v2i64"
-        )]
-        fn _vsriq_n_s64(a: int64x2_t, b: int64x2_t, n: i32) -> int64x2_t;
-    }
-    unsafe { _vsriq_n_s64(a, b, N) }
+    unsafe { super::shift_right_and_insert!(u64, 2, N, a, b) }
 }
 #[doc = "Shift Right and Insert (immediate)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_u8)"]
@@ -25399,7 +21176,7 @@ pub fn vsriq_n_p64<const N: i32>(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[rustc_legacy_const_generics(2)]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(sri, N = 2))]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(bfxil, N = 2))]
 pub fn vsrid_n_s64<const N: i32>(a: i64, b: i64) -> i64 {
     static_assert!(N >= 1 && N <= 64);
     unsafe { transmute(vsri_n_s64::<N>(transmute(a), transmute(b))) }
@@ -25410,7 +21187,7 @@ pub fn vsrid_n_s64<const N: i32>(a: i64, b: i64) -> i64 {
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[rustc_legacy_const_generics(2)]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(sri, N = 2))]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(bfxil, N = 2))]
 pub fn vsrid_n_u64<const N: i32>(a: u64, b: u64) -> u64 {
     static_assert!(N >= 1 && N <= 64);
     unsafe { transmute(vsri_n_u64::<N>(transmute(a), transmute(b))) }
@@ -25418,7 +21195,7 @@ pub fn vsrid_n_u64<const N: i32>(a: u64, b: u64) -> u64 {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,fp16")]
 #[cfg_attr(test, assert_instr(str))]
@@ -25431,7 +21208,7 @@ pub unsafe fn vst1_f16(ptr: *mut f16, a: float16x4_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,fp16")]
 #[cfg_attr(test, assert_instr(str))]
@@ -25444,7 +21221,7 @@ pub unsafe fn vst1q_f16(ptr: *mut f16, a: float16x8_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(str))]
@@ -25456,7 +21233,7 @@ pub unsafe fn vst1_f32(ptr: *mut f32, a: float32x2_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(str))]
@@ -25468,7 +21245,7 @@ pub unsafe fn vst1q_f32(ptr: *mut f32, a: float32x4_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(str))]
@@ -25480,7 +21257,7 @@ pub unsafe fn vst1_f64(ptr: *mut f64, a: float64x1_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(str))]
@@ -25492,7 +21269,7 @@ pub unsafe fn vst1q_f64(ptr: *mut f64, a: float64x2_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(str))]
@@ -25504,7 +21281,7 @@ pub unsafe fn vst1_s8(ptr: *mut i8, a: int8x8_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(str))]
@@ -25516,7 +21293,7 @@ pub unsafe fn vst1q_s8(ptr: *mut i8, a: int8x16_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(str))]
@@ -25528,7 +21305,7 @@ pub unsafe fn vst1_s16(ptr: *mut i16, a: int16x4_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(str))]
@@ -25540,7 +21317,7 @@ pub unsafe fn vst1q_s16(ptr: *mut i16, a: int16x8_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(str))]
@@ -25552,7 +21329,7 @@ pub unsafe fn vst1_s32(ptr: *mut i32, a: int32x2_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(str))]
@@ -25564,7 +21341,7 @@ pub unsafe fn vst1q_s32(ptr: *mut i32, a: int32x4_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(str))]
@@ -25576,7 +21353,7 @@ pub unsafe fn vst1_s64(ptr: *mut i64, a: int64x1_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(str))]
@@ -25588,7 +21365,7 @@ pub unsafe fn vst1q_s64(ptr: *mut i64, a: int64x2_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(str))]
@@ -25600,7 +21377,7 @@ pub unsafe fn vst1_u8(ptr: *mut u8, a: uint8x8_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(str))]
@@ -25612,7 +21389,7 @@ pub unsafe fn vst1q_u8(ptr: *mut u8, a: uint8x16_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(str))]
@@ -25624,7 +21401,7 @@ pub unsafe fn vst1_u16(ptr: *mut u16, a: uint16x4_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(str))]
@@ -25636,7 +21413,7 @@ pub unsafe fn vst1q_u16(ptr: *mut u16, a: uint16x8_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(str))]
@@ -25648,7 +21425,7 @@ pub unsafe fn vst1_u32(ptr: *mut u32, a: uint32x2_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(str))]
@@ -25660,7 +21437,7 @@ pub unsafe fn vst1q_u32(ptr: *mut u32, a: uint32x4_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(str))]
@@ -25672,7 +21449,7 @@ pub unsafe fn vst1_u64(ptr: *mut u64, a: uint64x1_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(str))]
@@ -25684,7 +21461,7 @@ pub unsafe fn vst1q_u64(ptr: *mut u64, a: uint64x2_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(str))]
@@ -25696,7 +21473,7 @@ pub unsafe fn vst1_p8(ptr: *mut p8, a: poly8x8_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(str))]
@@ -25708,7 +21485,7 @@ pub unsafe fn vst1q_p8(ptr: *mut p8, a: poly8x16_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(str))]
@@ -25720,7 +21497,7 @@ pub unsafe fn vst1_p16(ptr: *mut p16, a: poly16x4_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(str))]
@@ -25732,7 +21509,7 @@ pub unsafe fn vst1q_p16(ptr: *mut p16, a: poly16x8_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(test, assert_instr(str))]
@@ -25744,7 +21521,7 @@ pub unsafe fn vst1_p64(ptr: *mut p64, a: poly64x1_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(test, assert_instr(str))]
@@ -25756,7 +21533,7 @@ pub unsafe fn vst1q_p64(ptr: *mut p64, a: poly64x2_t) {
 #[doc = "Store multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f64_x2)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(st1))]
@@ -25774,7 +21551,7 @@ pub unsafe fn vst1_f64_x2(a: *mut f64, b: float64x1x2_t) {
 #[doc = "Store multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f64_x2)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(st1))]
@@ -25792,7 +21569,7 @@ pub unsafe fn vst1q_f64_x2(a: *mut f64, b: float64x2x2_t) {
 #[doc = "Store multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f64_x3)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(st1))]
@@ -25810,7 +21587,7 @@ pub unsafe fn vst1_f64_x3(a: *mut f64, b: float64x1x3_t) {
 #[doc = "Store multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f64_x3)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(st1))]
@@ -25828,7 +21605,7 @@ pub unsafe fn vst1q_f64_x3(a: *mut f64, b: float64x2x3_t) {
 #[doc = "Store multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f64_x4)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(st1))]
@@ -25852,7 +21629,7 @@ pub unsafe fn vst1_f64_x4(a: *mut f64, b: float64x1x4_t) {
 #[doc = "Store multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f64_x4)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(st1))]
@@ -25876,7 +21653,7 @@ pub unsafe fn vst1q_f64_x4(a: *mut f64, b: float64x2x4_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_f64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(nop, LANE = 0))]
@@ -25889,7 +21666,7 @@ pub unsafe fn vst1_lane_f64<const LANE: i32>(a: *mut f64, b: float64x1_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_f64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(nop, LANE = 0))]
@@ -25902,25 +21679,18 @@ pub unsafe fn vst1q_lane_f64<const LANE: i32>(a: *mut f64, b: float64x2_t) {
 #[doc = "Store multiple 2-element structures from two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_f64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(st1))]
+#[cfg_attr(test, assert_instr(stp))]
 pub unsafe fn vst2_f64(a: *mut f64, b: float64x1x2_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st2.v1f64.p0"
-        )]
-        fn _vst2_f64(a: float64x1_t, b: float64x1_t, ptr: *mut i8);
-    }
-    _vst2_f64(b.0, b.1, a as _)
+    core::ptr::write_unaligned(a.cast(), b)
 }
 #[doc = "Store multiple 2-element structures from two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_f64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(st2, LANE = 0))]
@@ -25940,7 +21710,7 @@ pub unsafe fn vst2_lane_f64<const LANE: i32>(a: *mut f64, b: float64x1x2_t) {
 #[doc = "Store multiple 2-element structures from two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_s64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(st2, LANE = 0))]
@@ -25960,7 +21730,7 @@ pub unsafe fn vst2_lane_s64<const LANE: i32>(a: *mut i64, b: int64x1x2_t) {
 #[doc = "Store multiple 2-element structures from two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_p64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(test, assert_instr(st2, LANE = 0))]
@@ -25973,7 +21743,7 @@ pub unsafe fn vst2_lane_p64<const LANE: i32>(a: *mut p64, b: poly64x1x2_t) {
 #[doc = "Store multiple 2-element structures from two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_u64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(st2, LANE = 0))]
@@ -25986,43 +21756,29 @@ pub unsafe fn vst2_lane_u64<const LANE: i32>(a: *mut u64, b: uint64x1x2_t) {
 #[doc = "Store multiple 2-element structures from two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_f64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st2))]
 pub unsafe fn vst2q_f64(a: *mut f64, b: float64x2x2_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st2.v2f64.p0"
-        )]
-        fn _vst2q_f64(a: float64x2_t, b: float64x2_t, ptr: *mut i8);
-    }
-    _vst2q_f64(b.0, b.1, a as _)
+    crate::core_arch::macros::interleaving_store!(f64, 2, 2, a, b)
 }
 #[doc = "Store multiple 2-element structures from two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_s64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st2))]
 pub unsafe fn vst2q_s64(a: *mut i64, b: int64x2x2_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st2.v2i64.p0"
-        )]
-        fn _vst2q_s64(a: int64x2_t, b: int64x2_t, ptr: *mut i8);
-    }
-    _vst2q_s64(b.0, b.1, a as _)
+    crate::core_arch::macros::interleaving_store!(i64, 2, 2, a, b)
 }
 #[doc = "Store multiple 2-element structures from two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_f64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(st2, LANE = 0))]
@@ -26042,7 +21798,7 @@ pub unsafe fn vst2q_lane_f64<const LANE: i32>(a: *mut f64, b: float64x2x2_t) {
 #[doc = "Store multiple 2-element structures from two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_s8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(st2, LANE = 0))]
@@ -26062,7 +21818,7 @@ pub unsafe fn vst2q_lane_s8<const LANE: i32>(a: *mut i8, b: int8x16x2_t) {
 #[doc = "Store multiple 2-element structures from two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_s64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(st2, LANE = 0))]
@@ -26082,7 +21838,7 @@ pub unsafe fn vst2q_lane_s64<const LANE: i32>(a: *mut i64, b: int64x2x2_t) {
 #[doc = "Store multiple 2-element structures from two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_p64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(test, assert_instr(st2, LANE = 0))]
@@ -26095,7 +21851,7 @@ pub unsafe fn vst2q_lane_p64<const LANE: i32>(a: *mut p64, b: poly64x2x2_t) {
 #[doc = "Store multiple 2-element structures from two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_u8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(st2, LANE = 0))]
@@ -26108,7 +21864,7 @@ pub unsafe fn vst2q_lane_u8<const LANE: i32>(a: *mut u8, b: uint8x16x2_t) {
 #[doc = "Store multiple 2-element structures from two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_u64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(st2, LANE = 0))]
@@ -26121,7 +21877,7 @@ pub unsafe fn vst2q_lane_u64<const LANE: i32>(a: *mut u64, b: uint64x2x2_t) {
 #[doc = "Store multiple 2-element structures from two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_p8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(st2, LANE = 0))]
@@ -26134,7 +21890,7 @@ pub unsafe fn vst2q_lane_p8<const LANE: i32>(a: *mut p8, b: poly8x16x2_t) {
 #[doc = "Store multiple 2-element structures from two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_p64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(test, assert_instr(st2))]
@@ -26145,7 +21901,7 @@ pub unsafe fn vst2q_p64(a: *mut p64, b: poly64x2x2_t) {
 #[doc = "Store multiple 2-element structures from two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_u64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -26156,25 +21912,18 @@ pub unsafe fn vst2q_u64(a: *mut u64, b: uint64x2x2_t) {
 #[doc = "Store multiple 3-element structures from three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_f64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vst3_f64(a: *mut f64, b: float64x1x3_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st3.v1f64.p0"
-        )]
-        fn _vst3_f64(a: float64x1_t, b: float64x1_t, c: float64x1_t, ptr: *mut i8);
-    }
-    _vst3_f64(b.0, b.1, b.2, a as _)
+    core::ptr::write_unaligned(a.cast(), b)
 }
 #[doc = "Store multiple 3-element structures from three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_f64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(st3, LANE = 0))]
@@ -26194,7 +21943,7 @@ pub unsafe fn vst3_lane_f64<const LANE: i32>(a: *mut f64, b: float64x1x3_t) {
 #[doc = "Store multiple 3-element structures from three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_s64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(st3, LANE = 0))]
@@ -26214,7 +21963,7 @@ pub unsafe fn vst3_lane_s64<const LANE: i32>(a: *mut i64, b: int64x1x3_t) {
 #[doc = "Store multiple 3-element structures from three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_p64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[target_feature(enable = "neon,aes")]
@@ -26227,7 +21976,7 @@ pub unsafe fn vst3_lane_p64<const LANE: i32>(a: *mut p64, b: poly64x1x3_t) {
 #[doc = "Store multiple 3-element structures from three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_u64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -26240,43 +21989,29 @@ pub unsafe fn vst3_lane_u64<const LANE: i32>(a: *mut u64, b: uint64x1x3_t) {
 #[doc = "Store multiple 3-element structures from three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_f64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st3))]
 pub unsafe fn vst3q_f64(a: *mut f64, b: float64x2x3_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st3.v2f64.p0"
-        )]
-        fn _vst3q_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t, ptr: *mut i8);
-    }
-    _vst3q_f64(b.0, b.1, b.2, a as _)
+    crate::core_arch::macros::interleaving_store!(f64, 2, 3, a, b)
 }
 #[doc = "Store multiple 3-element structures from three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_s64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st3))]
 pub unsafe fn vst3q_s64(a: *mut i64, b: int64x2x3_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st3.v2i64.p0"
-        )]
-        fn _vst3q_s64(a: int64x2_t, b: int64x2_t, c: int64x2_t, ptr: *mut i8);
-    }
-    _vst3q_s64(b.0, b.1, b.2, a as _)
+    crate::core_arch::macros::interleaving_store!(i64, 2, 3, a, b)
 }
 #[doc = "Store multiple 3-element structures from three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_f64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(st3, LANE = 0))]
@@ -26296,7 +22031,7 @@ pub unsafe fn vst3q_lane_f64<const LANE: i32>(a: *mut f64, b: float64x2x3_t) {
 #[doc = "Store multiple 3-element structures from three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_s8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(st3, LANE = 0))]
@@ -26316,7 +22051,7 @@ pub unsafe fn vst3q_lane_s8<const LANE: i32>(a: *mut i8, b: int8x16x3_t) {
 #[doc = "Store multiple 3-element structures from three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_s64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(st3, LANE = 0))]
@@ -26336,7 +22071,7 @@ pub unsafe fn vst3q_lane_s64<const LANE: i32>(a: *mut i64, b: int64x2x3_t) {
 #[doc = "Store multiple 3-element structures from three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_p64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[target_feature(enable = "neon,aes")]
@@ -26349,7 +22084,7 @@ pub unsafe fn vst3q_lane_p64<const LANE: i32>(a: *mut p64, b: poly64x2x3_t) {
 #[doc = "Store multiple 3-element structures from three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_u8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -26362,7 +22097,7 @@ pub unsafe fn vst3q_lane_u8<const LANE: i32>(a: *mut u8, b: uint8x16x3_t) {
 #[doc = "Store multiple 3-element structures from three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_u64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -26375,7 +22110,7 @@ pub unsafe fn vst3q_lane_u64<const LANE: i32>(a: *mut u64, b: uint64x2x3_t) {
 #[doc = "Store multiple 3-element structures from three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_p8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -26388,7 +22123,7 @@ pub unsafe fn vst3q_lane_p8<const LANE: i32>(a: *mut p8, b: poly8x16x3_t) {
 #[doc = "Store multiple 3-element structures from three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_p64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[target_feature(enable = "neon,aes")]
@@ -26399,7 +22134,7 @@ pub unsafe fn vst3q_p64(a: *mut p64, b: poly64x2x3_t) {
 #[doc = "Store multiple 3-element structures from three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_u64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -26410,25 +22145,18 @@ pub unsafe fn vst3q_u64(a: *mut u64, b: uint64x2x3_t) {
 #[doc = "Store multiple 4-element structures from four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_f64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vst4_f64(a: *mut f64, b: float64x1x4_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st4.v1f64.p0"
-        )]
-        fn _vst4_f64(a: float64x1_t, b: float64x1_t, c: float64x1_t, d: float64x1_t, ptr: *mut i8);
-    }
-    _vst4_f64(b.0, b.1, b.2, b.3, a as _)
+    core::ptr::write_unaligned(a.cast(), b)
 }
 #[doc = "Store multiple 4-element structures from four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_f64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(st4, LANE = 0))]
@@ -26455,7 +22183,7 @@ pub unsafe fn vst4_lane_f64<const LANE: i32>(a: *mut f64, b: float64x1x4_t) {
 #[doc = "Store multiple 4-element structures from four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_s64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(st4, LANE = 0))]
@@ -26482,7 +22210,7 @@ pub unsafe fn vst4_lane_s64<const LANE: i32>(a: *mut i64, b: int64x1x4_t) {
 #[doc = "Store multiple 4-element structures from four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_p64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[target_feature(enable = "neon,aes")]
@@ -26495,7 +22223,7 @@ pub unsafe fn vst4_lane_p64<const LANE: i32>(a: *mut p64, b: poly64x1x4_t) {
 #[doc = "Store multiple 4-element structures from four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_u64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -26508,43 +22236,29 @@ pub unsafe fn vst4_lane_u64<const LANE: i32>(a: *mut u64, b: uint64x1x4_t) {
 #[doc = "Store multiple 4-element structures from four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_f64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st4))]
 pub unsafe fn vst4q_f64(a: *mut f64, b: float64x2x4_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st4.v2f64.p0"
-        )]
-        fn _vst4q_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t, d: float64x2_t, ptr: *mut i8);
-    }
-    _vst4q_f64(b.0, b.1, b.2, b.3, a as _)
+    crate::core_arch::macros::interleaving_store!(f64, 2, 4, a, b)
 }
 #[doc = "Store multiple 4-element structures from four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_s64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st4))]
 pub unsafe fn vst4q_s64(a: *mut i64, b: int64x2x4_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st4.v2i64.p0"
-        )]
-        fn _vst4q_s64(a: int64x2_t, b: int64x2_t, c: int64x2_t, d: int64x2_t, ptr: *mut i8);
-    }
-    _vst4q_s64(b.0, b.1, b.2, b.3, a as _)
+    crate::core_arch::macros::interleaving_store!(i64, 2, 4, a, b)
 }
 #[doc = "Store multiple 4-element structures from four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_f64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(st4, LANE = 0))]
@@ -26571,7 +22285,7 @@ pub unsafe fn vst4q_lane_f64<const LANE: i32>(a: *mut f64, b: float64x2x4_t) {
 #[doc = "Store multiple 4-element structures from four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_s8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(st4, LANE = 0))]
@@ -26598,7 +22312,7 @@ pub unsafe fn vst4q_lane_s8<const LANE: i32>(a: *mut i8, b: int8x16x4_t) {
 #[doc = "Store multiple 4-element structures from four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_s64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(st4, LANE = 0))]
@@ -26625,7 +22339,7 @@ pub unsafe fn vst4q_lane_s64<const LANE: i32>(a: *mut i64, b: int64x2x4_t) {
 #[doc = "Store multiple 4-element structures from four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_p64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[target_feature(enable = "neon,aes")]
@@ -26638,7 +22352,7 @@ pub unsafe fn vst4q_lane_p64<const LANE: i32>(a: *mut p64, b: poly64x2x4_t) {
 #[doc = "Store multiple 4-element structures from four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_u8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -26651,7 +22365,7 @@ pub unsafe fn vst4q_lane_u8<const LANE: i32>(a: *mut u8, b: uint8x16x4_t) {
 #[doc = "Store multiple 4-element structures from four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_u64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -26664,7 +22378,7 @@ pub unsafe fn vst4q_lane_u64<const LANE: i32>(a: *mut u64, b: uint64x2x4_t) {
 #[doc = "Store multiple 4-element structures from four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_p8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -26677,7 +22391,7 @@ pub unsafe fn vst4q_lane_p8<const LANE: i32>(a: *mut p8, b: poly8x16x4_t) {
 #[doc = "Store multiple 4-element structures from four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_p64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[target_feature(enable = "neon,aes")]
@@ -26688,7 +22402,7 @@ pub unsafe fn vst4q_p64(a: *mut p64, b: poly64x2x4_t) {
 #[doc = "Store multiple 4-element structures from four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_u64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
@@ -26696,6 +22410,122 @@ pub unsafe fn vst4q_p64(a: *mut p64, b: poly64x2x4_t) {
 pub unsafe fn vst4q_u64(a: *mut u64, b: uint64x2x4_t) {
     vst4q_s64(transmute(a), transmute(b))
 }
+#[doc = "Store-Release a single-element structure from one lane of one register."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vstl1_lane_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * The pointer in `ptr` must satisfy the requirements of [`core::ptr::write`]."]
+#[inline]
+#[target_feature(enable = "neon,rcpc3")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(stl1, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_neon_feat_lrcpc3", issue = "none")]
+#[cfg(target_has_atomic = "64")]
+pub unsafe fn vstl1_lane_f64<const LANE: i32>(ptr: *mut f64, val: float64x1_t) {
+    static_assert!(LANE == 0);
+    vstl1_lane_s64::<LANE>(ptr as *mut i64, transmute(val))
+}
+#[doc = "Store-Release a single-element structure from one lane of one register."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vstl1q_lane_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * The pointer in `ptr` must satisfy the requirements of [`core::ptr::write`]."]
+#[inline]
+#[target_feature(enable = "neon,rcpc3")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(stl1, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_neon_feat_lrcpc3", issue = "none")]
+#[cfg(target_has_atomic = "64")]
+pub unsafe fn vstl1q_lane_f64<const LANE: i32>(ptr: *mut f64, val: float64x2_t) {
+    static_assert_uimm_bits!(LANE, 1);
+    vstl1q_lane_s64::<LANE>(ptr as *mut i64, transmute(val))
+}
+#[doc = "Store-Release a single-element structure from one lane of one register."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vstl1_lane_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * The pointer in `ptr` must satisfy the requirements of [`core::ptr::write`]."]
+#[inline]
+#[target_feature(enable = "neon,rcpc3")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(stl1, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_neon_feat_lrcpc3", issue = "none")]
+#[cfg(target_has_atomic = "64")]
+pub unsafe fn vstl1_lane_u64<const LANE: i32>(ptr: *mut u64, val: uint64x1_t) {
+    static_assert!(LANE == 0);
+    vstl1_lane_s64::<LANE>(ptr as *mut i64, transmute(val))
+}
+#[doc = "Store-Release a single-element structure from one lane of one register."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vstl1q_lane_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * The pointer in `ptr` must satisfy the requirements of [`core::ptr::write`]."]
+#[inline]
+#[target_feature(enable = "neon,rcpc3")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(stl1, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_neon_feat_lrcpc3", issue = "none")]
+#[cfg(target_has_atomic = "64")]
+pub unsafe fn vstl1q_lane_u64<const LANE: i32>(ptr: *mut u64, val: uint64x2_t) {
+    static_assert_uimm_bits!(LANE, 1);
+    vstl1q_lane_s64::<LANE>(ptr as *mut i64, transmute(val))
+}
+#[doc = "Store-Release a single-element structure from one lane of one register."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vstl1_lane_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * The pointer in `ptr` must satisfy the requirements of [`core::ptr::write`]."]
+#[inline]
+#[target_feature(enable = "neon,rcpc3")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(stl1, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_neon_feat_lrcpc3", issue = "none")]
+#[cfg(target_has_atomic = "64")]
+pub unsafe fn vstl1_lane_p64<const LANE: i32>(ptr: *mut p64, val: poly64x1_t) {
+    static_assert!(LANE == 0);
+    vstl1_lane_s64::<LANE>(ptr as *mut i64, transmute(val))
+}
+#[doc = "Store-Release a single-element structure from one lane of one register."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vstl1q_lane_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * The pointer in `ptr` must satisfy the requirements of [`core::ptr::write`]."]
+#[inline]
+#[target_feature(enable = "neon,rcpc3")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(stl1, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_neon_feat_lrcpc3", issue = "none")]
+#[cfg(target_has_atomic = "64")]
+pub unsafe fn vstl1q_lane_p64<const LANE: i32>(ptr: *mut p64, val: poly64x2_t) {
+    static_assert_uimm_bits!(LANE, 1);
+    vstl1q_lane_s64::<LANE>(ptr as *mut i64, transmute(val))
+}
+#[doc = "Store-Release a single-element structure from one lane of one register."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vstl1_lane_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * The pointer in `ptr` must satisfy the requirements of [`core::ptr::write`]."]
+#[inline]
+#[target_feature(enable = "neon,rcpc3")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(stl1, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_neon_feat_lrcpc3", issue = "none")]
+#[cfg(target_has_atomic = "64")]
+pub unsafe fn vstl1_lane_s64<const LANE: i32>(ptr: *mut i64, val: int64x1_t) {
+    static_assert!(LANE == 0);
+    let atomic_dst = ptr as *mut crate::sync::atomic::AtomicI64;
+    let lane: i64 = vget_lane_s64::<LANE>(val);
+    (*atomic_dst).store(transmute(lane), crate::sync::atomic::Ordering::Release)
+}
+#[doc = "Store-Release a single-element structure from one lane of one register."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vstl1q_lane_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * The pointer in `ptr` must satisfy the requirements of [`core::ptr::write`]."]
+#[inline]
+#[target_feature(enable = "neon,rcpc3")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(stl1, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_neon_feat_lrcpc3", issue = "none")]
+#[cfg(target_has_atomic = "64")]
+pub unsafe fn vstl1q_lane_s64<const LANE: i32>(ptr: *mut i64, val: int64x2_t) {
+    static_assert_uimm_bits!(LANE, 1);
+    let atomic_dst = ptr as *mut crate::sync::atomic::AtomicI64;
+    let lane: i64 = vgetq_lane_s64::<LANE>(val);
+    (*atomic_dst).store(transmute(lane), crate::sync::atomic::Ordering::Release)
+}
 #[doc = "Subtract"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsub_f64)"]
 #[inline]
@@ -26719,7 +22549,7 @@ pub fn vsubq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
+#[cfg_attr(test, assert_instr(sub))]
 pub fn vsubd_s64(a: i64, b: i64) -> i64 {
     a.wrapping_sub(b)
 }
@@ -26728,7 +22558,7 @@ pub fn vsubd_s64(a: i64, b: i64) -> i64 {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
+#[cfg_attr(test, assert_instr(sub))]
 pub fn vsubd_u64(a: u64, b: u64) -> u64 {
     a.wrapping_sub(b)
 }
@@ -26738,7 +22568,7 @@ pub fn vsubd_u64(a: u64, b: u64) -> u64 {
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
-#[cfg_attr(test, assert_instr(nop))]
+#[cfg_attr(test, assert_instr(fsub))]
 pub fn vsubh_f16(a: f16, b: f16) -> f16 {
     a - b
 }
@@ -26747,14 +22577,12 @@ pub fn vsubh_f16(a: f16, b: f16) -> f16 {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ssubl2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(ssubl2))]
 pub fn vsubl_high_s8(a: int8x16_t, b: int8x16_t) -> int16x8_t {
     unsafe {
-        let c: int8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
-        let d: int16x8_t = simd_cast(c);
-        let e: int8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
-        let f: int16x8_t = simd_cast(e);
-        simd_sub(d, f)
+        let c: int16x8_t = simd_cast(vget_high_s8(a));
+        let d: int16x8_t = simd_cast(vget_high_s8(b));
+        simd_sub(c, d)
     }
 }
 #[doc = "Signed Subtract Long"]
@@ -26762,14 +22590,12 @@ pub fn vsubl_high_s8(a: int8x16_t, b: int8x16_t) -> int16x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ssubl2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(ssubl2))]
 pub fn vsubl_high_s16(a: int16x8_t, b: int16x8_t) -> int32x4_t {
     unsafe {
-        let c: int16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
-        let d: int32x4_t = simd_cast(c);
-        let e: int16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]);
-        let f: int32x4_t = simd_cast(e);
-        simd_sub(d, f)
+        let c: int32x4_t = simd_cast(vget_high_s16(a));
+        let d: int32x4_t = simd_cast(vget_high_s16(b));
+        simd_sub(c, d)
     }
 }
 #[doc = "Signed Subtract Long"]
@@ -26777,14 +22603,12 @@ pub fn vsubl_high_s16(a: int16x8_t, b: int16x8_t) -> int32x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ssubl2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(ssubl2))]
 pub fn vsubl_high_s32(a: int32x4_t, b: int32x4_t) -> int64x2_t {
     unsafe {
-        let c: int32x2_t = simd_shuffle!(a, a, [2, 3]);
-        let d: int64x2_t = simd_cast(c);
-        let e: int32x2_t = simd_shuffle!(b, b, [2, 3]);
-        let f: int64x2_t = simd_cast(e);
-        simd_sub(d, f)
+        let c: int64x2_t = simd_cast(vget_high_s32(a));
+        let d: int64x2_t = simd_cast(vget_high_s32(b));
+        simd_sub(c, d)
     }
 }
 #[doc = "Unsigned Subtract Long"]
@@ -26792,14 +22616,12 @@ pub fn vsubl_high_s32(a: int32x4_t, b: int32x4_t) -> int64x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(usubl2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(usubl2))]
 pub fn vsubl_high_u8(a: uint8x16_t, b: uint8x16_t) -> uint16x8_t {
     unsafe {
-        let c: uint8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
-        let d: uint16x8_t = simd_cast(c);
-        let e: uint8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
-        let f: uint16x8_t = simd_cast(e);
-        simd_sub(d, f)
+        let c: uint16x8_t = simd_cast(vget_high_u8(a));
+        let d: uint16x8_t = simd_cast(vget_high_u8(b));
+        simd_sub(c, d)
     }
 }
 #[doc = "Unsigned Subtract Long"]
@@ -26807,14 +22629,12 @@ pub fn vsubl_high_u8(a: uint8x16_t, b: uint8x16_t) -> uint16x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(usubl2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(usubl2))]
 pub fn vsubl_high_u16(a: uint16x8_t, b: uint16x8_t) -> uint32x4_t {
     unsafe {
-        let c: uint16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
-        let d: uint32x4_t = simd_cast(c);
-        let e: uint16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]);
-        let f: uint32x4_t = simd_cast(e);
-        simd_sub(d, f)
+        let c: uint32x4_t = simd_cast(vget_high_u16(a));
+        let d: uint32x4_t = simd_cast(vget_high_u16(b));
+        simd_sub(c, d)
     }
 }
 #[doc = "Unsigned Subtract Long"]
@@ -26822,14 +22642,12 @@ pub fn vsubl_high_u16(a: uint16x8_t, b: uint16x8_t) -> uint32x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(usubl2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(usubl2))]
 pub fn vsubl_high_u32(a: uint32x4_t, b: uint32x4_t) -> uint64x2_t {
     unsafe {
-        let c: uint32x2_t = simd_shuffle!(a, a, [2, 3]);
-        let d: uint64x2_t = simd_cast(c);
-        let e: uint32x2_t = simd_shuffle!(b, b, [2, 3]);
-        let f: uint64x2_t = simd_cast(e);
-        simd_sub(d, f)
+        let c: uint64x2_t = simd_cast(vget_high_u32(a));
+        let d: uint64x2_t = simd_cast(vget_high_u32(b));
+        simd_sub(c, d)
     }
 }
 #[doc = "Signed Subtract Wide"]
@@ -26837,103 +22655,60 @@ pub fn vsubl_high_u32(a: uint32x4_t, b: uint32x4_t) -> uint64x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ssubw2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(ssubw2))]
 pub fn vsubw_high_s8(a: int16x8_t, b: int8x16_t) -> int16x8_t {
-    unsafe {
-        let c: int8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
-        simd_sub(a, simd_cast(c))
-    }
+    let c = vget_high_s8(b);
+    unsafe { simd_sub(a, simd_cast(c)) }
 }
 #[doc = "Signed Subtract Wide"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_high_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ssubw2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(ssubw2))]
 pub fn vsubw_high_s16(a: int32x4_t, b: int16x8_t) -> int32x4_t {
-    unsafe {
-        let c: int16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]);
-        simd_sub(a, simd_cast(c))
-    }
+    let c = vget_high_s16(b);
+    unsafe { simd_sub(a, simd_cast(c)) }
 }
 #[doc = "Signed Subtract Wide"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_high_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ssubw2))]
-pub fn vsubw_high_s32(a: int64x2_t, b: int32x4_t) -> int64x2_t {
-    unsafe {
-        let c: int32x2_t = simd_shuffle!(b, b, [2, 3]);
-        simd_sub(a, simd_cast(c))
-    }
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(ssubw2))]
+pub fn vsubw_high_s32(a: int64x2_t, b: int32x4_t) -> int64x2_t {
+    let c = vget_high_s32(b);
+    unsafe { simd_sub(a, simd_cast(c)) }
 }
 #[doc = "Unsigned Subtract Wide"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_high_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(usubw2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(usubw2))]
 pub fn vsubw_high_u8(a: uint16x8_t, b: uint8x16_t) -> uint16x8_t {
-    unsafe {
-        let c: uint8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
-        simd_sub(a, simd_cast(c))
-    }
+    let c = vget_high_u8(b);
+    unsafe { simd_sub(a, simd_cast(c)) }
 }
 #[doc = "Unsigned Subtract Wide"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_high_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(usubw2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(usubw2))]
 pub fn vsubw_high_u16(a: uint32x4_t, b: uint16x8_t) -> uint32x4_t {
-    unsafe {
-        let c: uint16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]);
-        simd_sub(a, simd_cast(c))
-    }
+    let c = vget_high_u16(b);
+    unsafe { simd_sub(a, simd_cast(c)) }
 }
 #[doc = "Unsigned Subtract Wide"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubw_high_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(usubw2))]
+#[cfg_attr(all(test, target_endian = "little"), assert_instr(usubw2))]
 pub fn vsubw_high_u32(a: uint64x2_t, b: uint32x4_t) -> uint64x2_t {
-    unsafe {
-        let c: uint32x2_t = simd_shuffle!(b, b, [2, 3]);
-        simd_sub(a, simd_cast(c))
-    }
-}
-#[doc = "Dot product index form with signed and unsigned integers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsudot_laneq_s32)"]
-#[inline]
-#[target_feature(enable = "neon,i8mm")]
-#[cfg_attr(test, assert_instr(sudot, LANE = 3))]
-#[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_neon_i8mm", issue = "117223")]
-pub fn vsudot_laneq_s32<const LANE: i32>(a: int32x2_t, b: int8x8_t, c: uint8x16_t) -> int32x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        let c: uint32x4_t = transmute(c);
-        let c: uint32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
-        vusdot_s32(a, transmute(c), b)
-    }
-}
-#[doc = "Dot product index form with signed and unsigned integers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsudotq_laneq_s32)"]
-#[inline]
-#[target_feature(enable = "neon,i8mm")]
-#[cfg_attr(test, assert_instr(sudot, LANE = 3))]
-#[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_neon_i8mm", issue = "117223")]
-pub fn vsudotq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int8x16_t, c: uint8x16_t) -> int32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        let c: uint32x4_t = transmute(c);
-        let c: uint32x4_t =
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
-        vusdotq_s32(a, transmute(c), b)
-    }
+    let c = vget_high_u32(b);
+    unsafe { simd_sub(a, simd_cast(c)) }
 }
 #[doc = "Table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl1_s8)"]
@@ -26973,61 +22748,25 @@ pub fn vtbl1_p8(a: poly8x8_t, b: uint8x8_t) -> poly8x8_t {
 #[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vtbl2_s8(a: int8x8x2_t, b: int8x8_t) -> int8x8_t {
-    unsafe { vqtbl1(transmute(vcombine_s8(a.0, a.1)), transmute(b)) }
-}
-#[doc = "Table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl2_u8)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbl))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vtbl2_u8(a: uint8x8x2_t, b: uint8x8_t) -> uint8x8_t {
-    unsafe { transmute(vqtbl1(transmute(vcombine_u8(a.0, a.1)), b)) }
+    vqtbl1_s8(vcombine_s8(a.0, a.1), vreinterpret_u8_s8(b))
 }
 #[doc = "Table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl2_u8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vtbl2_u8(a: uint8x8x2_t, b: uint8x8_t) -> uint8x8_t {
-    let mut a: uint8x8x2_t = a;
-    a.0 = unsafe { simd_shuffle!(a.0, a.0, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    a.1 = unsafe { simd_shuffle!(a.1, a.1, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    let b: uint8x8_t = unsafe { simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint8x8_t = transmute(vqtbl1(transmute(vcombine_u8(a.0, a.1)), b));
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+    vqtbl1_u8(vcombine_u8(a.0, a.1), b)
 }
 #[doc = "Table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl2_p8)"]
 #[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbl))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vtbl2_p8(a: poly8x8x2_t, b: uint8x8_t) -> poly8x8_t {
-    unsafe { transmute(vqtbl1(transmute(vcombine_p8(a.0, a.1)), b)) }
-}
-#[doc = "Table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl2_p8)"]
-#[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vtbl2_p8(a: poly8x8x2_t, b: uint8x8_t) -> poly8x8_t {
-    let mut a: poly8x8x2_t = a;
-    a.0 = unsafe { simd_shuffle!(a.0, a.0, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    a.1 = unsafe { simd_shuffle!(a.1, a.1, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    let b: uint8x8_t = unsafe { simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: poly8x8_t = transmute(vqtbl1(transmute(vcombine_p8(a.0, a.1)), b));
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+    vqtbl1_p8(vcombine_p8(a.0, a.1), b)
 }
 #[doc = "Table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl3_s8)"]
@@ -27040,79 +22779,33 @@ pub fn vtbl3_s8(a: int8x8x3_t, b: int8x8_t) -> int8x8_t {
         vcombine_s8(a.0, a.1),
         vcombine_s8(a.2, unsafe { crate::mem::zeroed() }),
     );
-    unsafe { transmute(vqtbl2(transmute(x.0), transmute(x.1), transmute(b))) }
-}
-#[doc = "Table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl3_u8)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbl))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vtbl3_u8(a: uint8x8x3_t, b: uint8x8_t) -> uint8x8_t {
-    let x = uint8x16x2_t(
-        vcombine_u8(a.0, a.1),
-        vcombine_u8(a.2, unsafe { crate::mem::zeroed() }),
-    );
-    unsafe { transmute(vqtbl2(transmute(x.0), transmute(x.1), b)) }
+    vqtbl2_s8(x, vreinterpret_u8_s8(b))
 }
 #[doc = "Table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl3_u8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vtbl3_u8(a: uint8x8x3_t, b: uint8x8_t) -> uint8x8_t {
-    let mut a: uint8x8x3_t = a;
-    a.0 = unsafe { simd_shuffle!(a.0, a.0, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    a.1 = unsafe { simd_shuffle!(a.1, a.1, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    a.2 = unsafe { simd_shuffle!(a.2, a.2, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    let b: uint8x8_t = unsafe { simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]) };
     let x = uint8x16x2_t(
         vcombine_u8(a.0, a.1),
         vcombine_u8(a.2, unsafe { crate::mem::zeroed() }),
     );
-    unsafe {
-        let ret_val: uint8x8_t = transmute(vqtbl2(transmute(x.0), transmute(x.1), b));
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl3_p8)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbl))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vtbl3_p8(a: poly8x8x3_t, b: uint8x8_t) -> poly8x8_t {
-    let x = poly8x16x2_t(
-        vcombine_p8(a.0, a.1),
-        vcombine_p8(a.2, unsafe { crate::mem::zeroed() }),
-    );
-    unsafe { transmute(vqtbl2(transmute(x.0), transmute(x.1), b)) }
+    vqtbl2_u8(x, b)
 }
 #[doc = "Table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl3_p8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vtbl3_p8(a: poly8x8x3_t, b: uint8x8_t) -> poly8x8_t {
-    let mut a: poly8x8x3_t = a;
-    a.0 = unsafe { simd_shuffle!(a.0, a.0, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    a.1 = unsafe { simd_shuffle!(a.1, a.1, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    a.2 = unsafe { simd_shuffle!(a.2, a.2, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    let b: uint8x8_t = unsafe { simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]) };
     let x = poly8x16x2_t(
         vcombine_p8(a.0, a.1),
         vcombine_p8(a.2, unsafe { crate::mem::zeroed() }),
     );
-    unsafe {
-        let ret_val: poly8x8_t = transmute(vqtbl2(transmute(x.0), transmute(x.1), b));
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+    vqtbl2_p8(x, b)
 }
 #[doc = "Table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl4_s8)"]
@@ -27122,69 +22815,27 @@ pub fn vtbl3_p8(a: poly8x8x3_t, b: uint8x8_t) -> poly8x8_t {
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vtbl4_s8(a: int8x8x4_t, b: int8x8_t) -> int8x8_t {
     let x = int8x16x2_t(vcombine_s8(a.0, a.1), vcombine_s8(a.2, a.3));
-    unsafe { transmute(vqtbl2(transmute(x.0), transmute(x.1), transmute(b))) }
-}
-#[doc = "Table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl4_u8)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbl))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vtbl4_u8(a: uint8x8x4_t, b: uint8x8_t) -> uint8x8_t {
-    let x = uint8x16x2_t(vcombine_u8(a.0, a.1), vcombine_u8(a.2, a.3));
-    unsafe { transmute(vqtbl2(transmute(x.0), transmute(x.1), b)) }
+    vqtbl2_s8(x, vreinterpret_u8_s8(b))
 }
 #[doc = "Table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl4_u8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vtbl4_u8(a: uint8x8x4_t, b: uint8x8_t) -> uint8x8_t {
-    let mut a: uint8x8x4_t = a;
-    a.0 = unsafe { simd_shuffle!(a.0, a.0, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    a.1 = unsafe { simd_shuffle!(a.1, a.1, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    a.2 = unsafe { simd_shuffle!(a.2, a.2, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    a.3 = unsafe { simd_shuffle!(a.3, a.3, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    let b: uint8x8_t = unsafe { simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]) };
     let x = uint8x16x2_t(vcombine_u8(a.0, a.1), vcombine_u8(a.2, a.3));
-    unsafe {
-        let ret_val: uint8x8_t = transmute(vqtbl2(transmute(x.0), transmute(x.1), b));
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl4_p8)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbl))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vtbl4_p8(a: poly8x8x4_t, b: uint8x8_t) -> poly8x8_t {
-    let x = poly8x16x2_t(vcombine_p8(a.0, a.1), vcombine_p8(a.2, a.3));
-    unsafe { transmute(vqtbl2(transmute(x.0), transmute(x.1), b)) }
+    vqtbl2_u8(x, b)
 }
 #[doc = "Table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl4_p8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(tbl))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vtbl4_p8(a: poly8x8x4_t, b: uint8x8_t) -> poly8x8_t {
-    let mut a: poly8x8x4_t = a;
-    a.0 = unsafe { simd_shuffle!(a.0, a.0, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    a.1 = unsafe { simd_shuffle!(a.1, a.1, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    a.2 = unsafe { simd_shuffle!(a.2, a.2, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    a.3 = unsafe { simd_shuffle!(a.3, a.3, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    let b: uint8x8_t = unsafe { simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]) };
     let x = poly8x16x2_t(vcombine_p8(a.0, a.1), vcombine_p8(a.2, a.3));
-    unsafe {
-        let ret_val: poly8x8_t = transmute(vqtbl2(transmute(x.0), transmute(x.1), b));
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+    vqtbl2_p8(x, b)
 }
 #[doc = "Extended table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx1_s8)"]
@@ -27196,11 +22847,11 @@ pub fn vtbx1_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t {
     unsafe {
         simd_select(
             simd_lt::<int8x8_t, int8x8_t>(c, transmute(i8x8::splat(8))),
-            transmute(vqtbx1(
-                transmute(a),
-                transmute(vcombine_s8(b, crate::mem::zeroed())),
-                transmute(c),
-            )),
+            vqtbx1_s8(
+                a,
+                vcombine_s8(b, crate::mem::zeroed()),
+                vreinterpret_u8_s8(c),
+            ),
             a,
         )
     }
@@ -27215,11 +22866,7 @@ pub fn vtbx1_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t {
     unsafe {
         simd_select(
             simd_lt::<uint8x8_t, int8x8_t>(c, transmute(u8x8::splat(8))),
-            transmute(vqtbx1(
-                transmute(a),
-                transmute(vcombine_u8(b, crate::mem::zeroed())),
-                c,
-            )),
+            vqtbx1_u8(a, vcombine_u8(b, crate::mem::zeroed()), c),
             a,
         )
     }
@@ -27234,11 +22881,7 @@ pub fn vtbx1_p8(a: poly8x8_t, b: poly8x8_t, c: uint8x8_t) -> poly8x8_t {
     unsafe {
         simd_select(
             simd_lt::<uint8x8_t, int8x8_t>(c, transmute(u8x8::splat(8))),
-            transmute(vqtbx1(
-                transmute(a),
-                transmute(vcombine_p8(b, crate::mem::zeroed())),
-                c,
-            )),
+            vqtbx1_p8(a, vcombine_p8(b, crate::mem::zeroed()), c),
             a,
         )
     }
@@ -27250,64 +22893,42 @@ pub fn vtbx1_p8(a: poly8x8_t, b: poly8x8_t, c: uint8x8_t) -> poly8x8_t {
 #[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vtbx2_s8(a: int8x8_t, b: int8x8x2_t, c: int8x8_t) -> int8x8_t {
-    unsafe { vqtbx1(transmute(a), transmute(vcombine_s8(b.0, b.1)), transmute(c)) }
-}
-#[doc = "Extended table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx2_u8)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbx))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vtbx2_u8(a: uint8x8_t, b: uint8x8x2_t, c: uint8x8_t) -> uint8x8_t {
-    unsafe { transmute(vqtbx1(transmute(a), transmute(vcombine_u8(b.0, b.1)), c)) }
+    unsafe {
+        simd_select(
+            simd_lt::<int8x8_t, int8x8_t>(c, transmute(i8x8::splat(16))),
+            vqtbx1_s8(a, vcombine_s8(b.0, b.1), vreinterpret_u8_s8(c)),
+            a,
+        )
+    }
 }
 #[doc = "Extended table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx2_u8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vtbx2_u8(a: uint8x8_t, b: uint8x8x2_t, c: uint8x8_t) -> uint8x8_t {
-    let mut b: uint8x8x2_t = b;
-    let a: uint8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    b.0 = unsafe { simd_shuffle!(b.0, b.0, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    b.1 = unsafe { simd_shuffle!(b.1, b.1, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    let c: uint8x8_t = unsafe { simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]) };
     unsafe {
-        let ret_val: uint8x8_t =
-            transmute(vqtbx1(transmute(a), transmute(vcombine_u8(b.0, b.1)), c));
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        simd_select(
+            simd_lt::<uint8x8_t, int8x8_t>(c, transmute(u8x8::splat(16))),
+            vqtbx1_u8(a, vcombine_u8(b.0, b.1), c),
+            a,
+        )
     }
 }
 #[doc = "Extended table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx2_p8)"]
 #[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbx))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vtbx2_p8(a: poly8x8_t, b: poly8x8x2_t, c: uint8x8_t) -> poly8x8_t {
-    unsafe { transmute(vqtbx1(transmute(a), transmute(vcombine_p8(b.0, b.1)), c)) }
-}
-#[doc = "Extended table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx2_p8)"]
-#[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vtbx2_p8(a: poly8x8_t, b: poly8x8x2_t, c: uint8x8_t) -> poly8x8_t {
-    let mut b: poly8x8x2_t = b;
-    let a: poly8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    b.0 = unsafe { simd_shuffle!(b.0, b.0, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    b.1 = unsafe { simd_shuffle!(b.1, b.1, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    let c: uint8x8_t = unsafe { simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]) };
     unsafe {
-        let ret_val: poly8x8_t =
-            transmute(vqtbx1(transmute(a), transmute(vcombine_p8(b.0, b.1)), c));
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        simd_select(
+            simd_lt::<uint8x8_t, int8x8_t>(c, transmute(u8x8::splat(16))),
+            vqtbx1_p8(a, vcombine_p8(b.0, b.1), c),
+            a,
+        )
     }
 }
 #[doc = "Extended table look-up"]
@@ -27322,110 +22943,49 @@ pub fn vtbx3_s8(a: int8x8_t, b: int8x8x3_t, c: int8x8_t) -> int8x8_t {
         vcombine_s8(b.2, unsafe { crate::mem::zeroed() }),
     );
     unsafe {
-        transmute(simd_select(
-            simd_lt::<int8x8_t, int8x8_t>(transmute(c), transmute(i8x8::splat(24))),
-            transmute(vqtbx2(
-                transmute(a),
-                transmute(x.0),
-                transmute(x.1),
-                transmute(c),
-            )),
-            a,
-        ))
-    }
-}
-#[doc = "Extended table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx3_u8)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbx))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vtbx3_u8(a: uint8x8_t, b: uint8x8x3_t, c: uint8x8_t) -> uint8x8_t {
-    let x = uint8x16x2_t(
-        vcombine_u8(b.0, b.1),
-        vcombine_u8(b.2, unsafe { crate::mem::zeroed() }),
-    );
-    unsafe {
-        transmute(simd_select(
-            simd_lt::<uint8x8_t, int8x8_t>(transmute(c), transmute(u8x8::splat(24))),
-            transmute(vqtbx2(transmute(a), transmute(x.0), transmute(x.1), c)),
+        simd_select(
+            simd_lt::<int8x8_t, int8x8_t>(c, transmute(i8x8::splat(24))),
+            vqtbx2_s8(a, x, vreinterpret_u8_s8(c)),
             a,
-        ))
+        )
     }
 }
 #[doc = "Extended table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx3_u8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vtbx3_u8(a: uint8x8_t, b: uint8x8x3_t, c: uint8x8_t) -> uint8x8_t {
-    let mut b: uint8x8x3_t = b;
-    let a: uint8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    b.0 = unsafe { simd_shuffle!(b.0, b.0, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    b.1 = unsafe { simd_shuffle!(b.1, b.1, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    b.2 = unsafe { simd_shuffle!(b.2, b.2, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    let c: uint8x8_t = unsafe { simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]) };
     let x = uint8x16x2_t(
         vcombine_u8(b.0, b.1),
         vcombine_u8(b.2, unsafe { crate::mem::zeroed() }),
     );
     unsafe {
-        let ret_val: uint8x8_t = transmute(simd_select(
-            simd_lt::<uint8x8_t, int8x8_t>(transmute(c), transmute(u8x8::splat(24))),
-            transmute(vqtbx2(transmute(a), transmute(x.0), transmute(x.1), c)),
-            a,
-        ));
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Extended table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx3_p8)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbx))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vtbx3_p8(a: poly8x8_t, b: poly8x8x3_t, c: uint8x8_t) -> poly8x8_t {
-    let x = poly8x16x2_t(
-        vcombine_p8(b.0, b.1),
-        vcombine_p8(b.2, unsafe { crate::mem::zeroed() }),
-    );
-    unsafe {
-        transmute(simd_select(
-            simd_lt::<poly8x8_t, int8x8_t>(transmute(c), transmute(u8x8::splat(24))),
-            transmute(vqtbx2(transmute(a), transmute(x.0), transmute(x.1), c)),
+        simd_select(
+            simd_lt::<uint8x8_t, int8x8_t>(c, transmute(u8x8::splat(24))),
+            vqtbx2_u8(a, x, c),
             a,
-        ))
+        )
     }
 }
 #[doc = "Extended table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx3_p8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vtbx3_p8(a: poly8x8_t, b: poly8x8x3_t, c: uint8x8_t) -> poly8x8_t {
-    let mut b: poly8x8x3_t = b;
-    let a: poly8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    b.0 = unsafe { simd_shuffle!(b.0, b.0, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    b.1 = unsafe { simd_shuffle!(b.1, b.1, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    b.2 = unsafe { simd_shuffle!(b.2, b.2, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    let c: uint8x8_t = unsafe { simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]) };
     let x = poly8x16x2_t(
         vcombine_p8(b.0, b.1),
         vcombine_p8(b.2, unsafe { crate::mem::zeroed() }),
     );
     unsafe {
-        let ret_val: poly8x8_t = transmute(simd_select(
-            simd_lt::<poly8x8_t, int8x8_t>(transmute(c), transmute(u8x8::splat(24))),
-            transmute(vqtbx2(transmute(a), transmute(x.0), transmute(x.1), c)),
+        simd_select(
+            simd_lt::<uint8x8_t, int8x8_t>(c, transmute(u8x8::splat(24))),
+            vqtbx2_p8(a, x, c),
             a,
-        ));
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        )
     }
 }
 #[doc = "Extended table look-up"]
@@ -27435,106 +22995,57 @@ pub fn vtbx3_p8(a: poly8x8_t, b: poly8x8x3_t, c: uint8x8_t) -> poly8x8_t {
 #[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vtbx4_s8(a: int8x8_t, b: int8x8x4_t, c: int8x8_t) -> int8x8_t {
+    let x = int8x16x2_t(vcombine_s8(b.0, b.1), vcombine_s8(b.2, b.3));
     unsafe {
-        vqtbx2(
-            transmute(a),
-            transmute(vcombine_s8(b.0, b.1)),
-            transmute(vcombine_s8(b.2, b.3)),
-            transmute(c),
+        simd_select(
+            simd_lt::<int8x8_t, int8x8_t>(c, transmute(i8x8::splat(32))),
+            vqtbx2_s8(a, x, vreinterpret_u8_s8(c)),
+            a,
         )
     }
 }
 #[doc = "Extended table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx4_u8)"]
 #[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbx))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vtbx4_u8(a: uint8x8_t, b: uint8x8x4_t, c: uint8x8_t) -> uint8x8_t {
-    unsafe {
-        transmute(vqtbx2(
-            transmute(a),
-            transmute(vcombine_u8(b.0, b.1)),
-            transmute(vcombine_u8(b.2, b.3)),
-            c,
-        ))
-    }
-}
-#[doc = "Extended table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx4_u8)"]
-#[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vtbx4_u8(a: uint8x8_t, b: uint8x8x4_t, c: uint8x8_t) -> uint8x8_t {
-    let mut b: uint8x8x4_t = b;
-    let a: uint8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    b.0 = unsafe { simd_shuffle!(b.0, b.0, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    b.1 = unsafe { simd_shuffle!(b.1, b.1, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    b.2 = unsafe { simd_shuffle!(b.2, b.2, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    b.3 = unsafe { simd_shuffle!(b.3, b.3, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    let c: uint8x8_t = unsafe { simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint8x8_t = transmute(vqtbx2(
-            transmute(a),
-            transmute(vcombine_u8(b.0, b.1)),
-            transmute(vcombine_u8(b.2, b.3)),
-            c,
-        ));
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Extended table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx4_p8)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(tbx))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vtbx4_p8(a: poly8x8_t, b: poly8x8x4_t, c: uint8x8_t) -> poly8x8_t {
+    let x = uint8x16x2_t(vcombine_u8(b.0, b.1), vcombine_u8(b.2, b.3));
     unsafe {
-        transmute(vqtbx2(
-            transmute(a),
-            transmute(vcombine_p8(b.0, b.1)),
-            transmute(vcombine_p8(b.2, b.3)),
-            c,
-        ))
+        simd_select(
+            simd_lt::<uint8x8_t, int8x8_t>(c, transmute(u8x8::splat(32))),
+            vqtbx2_u8(a, x, c),
+            a,
+        )
     }
 }
 #[doc = "Extended table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx4_p8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(tbx))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vtbx4_p8(a: poly8x8_t, b: poly8x8x4_t, c: uint8x8_t) -> poly8x8_t {
-    let mut b: poly8x8x4_t = b;
-    let a: poly8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    b.0 = unsafe { simd_shuffle!(b.0, b.0, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    b.1 = unsafe { simd_shuffle!(b.1, b.1, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    b.2 = unsafe { simd_shuffle!(b.2, b.2, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    b.3 = unsafe { simd_shuffle!(b.3, b.3, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    let c: uint8x8_t = unsafe { simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]) };
+    let x = poly8x16x2_t(vcombine_p8(b.0, b.1), vcombine_p8(b.2, b.3));
     unsafe {
-        let ret_val: poly8x8_t = transmute(vqtbx2(
-            transmute(a),
-            transmute(vcombine_p8(b.0, b.1)),
-            transmute(vcombine_p8(b.2, b.3)),
-            c,
-        ));
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        simd_select(
+            simd_lt::<uint8x8_t, int8x8_t>(c, transmute(u8x8::splat(32))),
+            vqtbx2_p8(a, x, c),
+            a,
+        )
     }
 }
 #[doc = "Transpose vectors"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_f16)"]
 #[inline]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn1)
+)]
 pub fn vtrn1_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
     unsafe { simd_shuffle!(a, b, [0, 4, 2, 6]) }
 }
@@ -27542,9 +23053,12 @@ pub fn vtrn1_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1q_f16)"]
 #[inline]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn1)
+)]
 pub fn vtrn1q_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
     unsafe { simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]) }
 }
@@ -27553,7 +23067,10 @@ pub fn vtrn1q_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
 pub fn vtrn1_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
     unsafe { simd_shuffle!(a, b, [0, 2]) }
 }
@@ -27562,7 +23079,10 @@ pub fn vtrn1_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
 pub fn vtrn1q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
     unsafe { simd_shuffle!(a, b, [0, 2]) }
 }
@@ -27571,7 +23091,10 @@ pub fn vtrn1q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
 pub fn vtrn1_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     unsafe { simd_shuffle!(a, b, [0, 2]) }
 }
@@ -27580,7 +23103,10 @@ pub fn vtrn1_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
 pub fn vtrn1q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
     unsafe { simd_shuffle!(a, b, [0, 2]) }
 }
@@ -27589,7 +23115,10 @@ pub fn vtrn1q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
 pub fn vtrn1_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
     unsafe { simd_shuffle!(a, b, [0, 2]) }
 }
@@ -27598,7 +23127,10 @@ pub fn vtrn1_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
 pub fn vtrn1q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
     unsafe { simd_shuffle!(a, b, [0, 2]) }
 }
@@ -27607,7 +23139,10 @@ pub fn vtrn1q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
 pub fn vtrn1q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
     unsafe { simd_shuffle!(a, b, [0, 2]) }
 }
@@ -27616,7 +23151,10 @@ pub fn vtrn1q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn1)
+)]
 pub fn vtrn1q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
     unsafe { simd_shuffle!(a, b, [0, 4, 2, 6]) }
 }
@@ -27625,7 +23163,10 @@ pub fn vtrn1q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn1)
+)]
 pub fn vtrn1_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     unsafe { simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]) }
 }
@@ -27634,7 +23175,10 @@ pub fn vtrn1_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn1)
+)]
 pub fn vtrn1q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     unsafe {
         simd_shuffle!(
@@ -27649,7 +23193,10 @@ pub fn vtrn1q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn1)
+)]
 pub fn vtrn1_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     unsafe { simd_shuffle!(a, b, [0, 4, 2, 6]) }
 }
@@ -27658,7 +23205,10 @@ pub fn vtrn1_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn1)
+)]
 pub fn vtrn1q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     unsafe { simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]) }
 }
@@ -27667,7 +23217,10 @@ pub fn vtrn1q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn1)
+)]
 pub fn vtrn1q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     unsafe { simd_shuffle!(a, b, [0, 4, 2, 6]) }
 }
@@ -27676,7 +23229,10 @@ pub fn vtrn1q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn1)
+)]
 pub fn vtrn1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     unsafe { simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]) }
 }
@@ -27685,7 +23241,10 @@ pub fn vtrn1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn1)
+)]
 pub fn vtrn1q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
     unsafe {
         simd_shuffle!(
@@ -27700,7 +23259,10 @@ pub fn vtrn1q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn1)
+)]
 pub fn vtrn1_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
     unsafe { simd_shuffle!(a, b, [0, 4, 2, 6]) }
 }
@@ -27709,7 +23271,10 @@ pub fn vtrn1_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn1)
+)]
 pub fn vtrn1q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
     unsafe { simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]) }
 }
@@ -27718,7 +23283,10 @@ pub fn vtrn1q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn1)
+)]
 pub fn vtrn1q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
     unsafe { simd_shuffle!(a, b, [0, 4, 2, 6]) }
 }
@@ -27727,7 +23295,10 @@ pub fn vtrn1q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn1)
+)]
 pub fn vtrn1_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
     unsafe { simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]) }
 }
@@ -27736,7 +23307,10 @@ pub fn vtrn1_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn1)
+)]
 pub fn vtrn1q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
     unsafe {
         simd_shuffle!(
@@ -27751,7 +23325,10 @@ pub fn vtrn1q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn1)
+)]
 pub fn vtrn1_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
     unsafe { simd_shuffle!(a, b, [0, 4, 2, 6]) }
 }
@@ -27760,7 +23337,10 @@ pub fn vtrn1_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn1)
+)]
 pub fn vtrn1q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
     unsafe { simd_shuffle!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]) }
 }
@@ -27768,9 +23348,12 @@ pub fn vtrn1q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2_f16)"]
 #[inline]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn2)
+)]
 pub fn vtrn2_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
     unsafe { simd_shuffle!(a, b, [1, 5, 3, 7]) }
 }
@@ -27778,9 +23361,12 @@ pub fn vtrn2_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn2q_f16)"]
 #[inline]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn2)
+)]
 pub fn vtrn2q_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
     unsafe { simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]) }
 }
@@ -27789,7 +23375,10 @@ pub fn vtrn2q_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip2)
+)]
 pub fn vtrn2_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
     unsafe { simd_shuffle!(a, b, [1, 3]) }
 }
@@ -27798,7 +23387,10 @@ pub fn vtrn2_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip2)
+)]
 pub fn vtrn2q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
     unsafe { simd_shuffle!(a, b, [1, 3]) }
 }
@@ -27807,7 +23399,10 @@ pub fn vtrn2q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip2)
+)]
 pub fn vtrn2_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     unsafe { simd_shuffle!(a, b, [1, 3]) }
 }
@@ -27816,7 +23411,10 @@ pub fn vtrn2_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip2)
+)]
 pub fn vtrn2q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
     unsafe { simd_shuffle!(a, b, [1, 3]) }
 }
@@ -27825,7 +23423,10 @@ pub fn vtrn2q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip2)
+)]
 pub fn vtrn2_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
     unsafe { simd_shuffle!(a, b, [1, 3]) }
 }
@@ -27834,7 +23435,10 @@ pub fn vtrn2_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip2)
+)]
 pub fn vtrn2q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
     unsafe { simd_shuffle!(a, b, [1, 3]) }
 }
@@ -27843,7 +23447,10 @@ pub fn vtrn2q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip2)
+)]
 pub fn vtrn2q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
     unsafe { simd_shuffle!(a, b, [1, 3]) }
 }
@@ -27852,7 +23459,10 @@ pub fn vtrn2q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn2)
+)]
 pub fn vtrn2q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
     unsafe { simd_shuffle!(a, b, [1, 5, 3, 7]) }
 }
@@ -27861,7 +23471,10 @@ pub fn vtrn2q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn2)
+)]
 pub fn vtrn2_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     unsafe { simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]) }
 }
@@ -27870,7 +23483,10 @@ pub fn vtrn2_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn2)
+)]
 pub fn vtrn2q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     unsafe {
         simd_shuffle!(
@@ -27885,7 +23501,10 @@ pub fn vtrn2q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn2)
+)]
 pub fn vtrn2_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     unsafe { simd_shuffle!(a, b, [1, 5, 3, 7]) }
 }
@@ -27894,7 +23513,10 @@ pub fn vtrn2_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn2)
+)]
 pub fn vtrn2q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     unsafe { simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]) }
 }
@@ -27903,7 +23525,10 @@ pub fn vtrn2q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn2)
+)]
 pub fn vtrn2q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     unsafe { simd_shuffle!(a, b, [1, 5, 3, 7]) }
 }
@@ -27912,7 +23537,10 @@ pub fn vtrn2q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn2)
+)]
 pub fn vtrn2_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     unsafe { simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]) }
 }
@@ -27921,7 +23549,10 @@ pub fn vtrn2_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn2)
+)]
 pub fn vtrn2q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
     unsafe {
         simd_shuffle!(
@@ -27936,7 +23567,10 @@ pub fn vtrn2q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn2)
+)]
 pub fn vtrn2_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
     unsafe { simd_shuffle!(a, b, [1, 5, 3, 7]) }
 }
@@ -27945,7 +23579,10 @@ pub fn vtrn2_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn2)
+)]
 pub fn vtrn2q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
     unsafe { simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]) }
 }
@@ -27954,7 +23591,10 @@ pub fn vtrn2q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn2)
+)]
 pub fn vtrn2q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
     unsafe { simd_shuffle!(a, b, [1, 5, 3, 7]) }
 }
@@ -27963,7 +23603,10 @@ pub fn vtrn2q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn2)
+)]
 pub fn vtrn2_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
     unsafe { simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]) }
 }
@@ -27972,7 +23615,10 @@ pub fn vtrn2_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn2)
+)]
 pub fn vtrn2q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
     unsafe {
         simd_shuffle!(
@@ -27987,7 +23633,10 @@ pub fn vtrn2q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn2)
+)]
 pub fn vtrn2_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
     unsafe { simd_shuffle!(a, b, [1, 5, 3, 7]) }
 }
@@ -27996,7 +23645,10 @@ pub fn vtrn2_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(trn2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(trn2)
+)]
 pub fn vtrn2q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
     unsafe { simd_shuffle!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]) }
 }
@@ -28231,7 +23883,7 @@ pub fn vuqaddq_s64(a: int64x2_t, b: uint64x2_t) -> int64x2_t {
 #[cfg_attr(test, assert_instr(suqadd))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vuqaddb_s8(a: i8, b: u8) -> i8 {
-    unsafe { simd_extract!(vuqadd_s8(vdup_n_s8(a), vdup_n_u8(b)), 0) }
+    vget_lane_s8::<0>(vuqadd_s8(vdup_n_s8(a), vdup_n_u8(b)))
 }
 #[doc = "Signed saturating accumulate of unsigned value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuqaddh_s16)"]
@@ -28240,7 +23892,7 @@ pub fn vuqaddb_s8(a: i8, b: u8) -> i8 {
 #[cfg_attr(test, assert_instr(suqadd))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 pub fn vuqaddh_s16(a: i16, b: u16) -> i16 {
-    unsafe { simd_extract!(vuqadd_s16(vdup_n_s16(a), vdup_n_u16(b)), 0) }
+    vget_lane_s16::<0>(vuqadd_s16(vdup_n_s16(a), vdup_n_u16(b)))
 }
 #[doc = "Signed saturating accumulate of unsigned value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuqaddd_s64)"]
@@ -28274,44 +23926,16 @@ pub fn vuqadds_s32(a: i32, b: u32) -> i32 {
     }
     unsafe { _vuqadds_s32(a, b) }
 }
-#[doc = "Dot product index form with unsigned and signed integers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdot_laneq_s32)"]
-#[inline]
-#[target_feature(enable = "neon,i8mm")]
-#[cfg_attr(test, assert_instr(usdot, LANE = 3))]
-#[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_neon_i8mm", issue = "117223")]
-pub fn vusdot_laneq_s32<const LANE: i32>(a: int32x2_t, b: uint8x8_t, c: int8x16_t) -> int32x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        let c: int32x4_t = transmute(c);
-        let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
-        vusdot_s32(a, b, transmute(c))
-    }
-}
-#[doc = "Dot product index form with unsigned and signed integers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdotq_laneq_s32)"]
-#[inline]
-#[target_feature(enable = "neon,i8mm")]
-#[cfg_attr(test, assert_instr(usdot, LANE = 3))]
-#[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_neon_i8mm", issue = "117223")]
-pub fn vusdotq_laneq_s32<const LANE: i32>(a: int32x4_t, b: uint8x16_t, c: int8x16_t) -> int32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        let c: int32x4_t = transmute(c);
-        let c: int32x4_t =
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
-        vusdotq_s32(a, b, transmute(c))
-    }
-}
 #[doc = "Unzip vectors"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1_f16)"]
 #[inline]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp1)
+)]
 pub fn vuzp1_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
     unsafe { simd_shuffle!(a, b, [0, 2, 4, 6]) }
 }
@@ -28319,9 +23943,12 @@ pub fn vuzp1_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp1q_f16)"]
 #[inline]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp1)
+)]
 pub fn vuzp1q_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
     unsafe { simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]) }
 }
@@ -28330,7 +23957,10 @@ pub fn vuzp1q_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
 pub fn vuzp1_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
     unsafe { simd_shuffle!(a, b, [0, 2]) }
 }
@@ -28339,7 +23969,10 @@ pub fn vuzp1_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
 pub fn vuzp1q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
     unsafe { simd_shuffle!(a, b, [0, 2]) }
 }
@@ -28348,7 +23981,10 @@ pub fn vuzp1q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
 pub fn vuzp1_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     unsafe { simd_shuffle!(a, b, [0, 2]) }
 }
@@ -28357,7 +23993,10 @@ pub fn vuzp1_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
 pub fn vuzp1q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
     unsafe { simd_shuffle!(a, b, [0, 2]) }
 }
@@ -28366,7 +24005,10 @@ pub fn vuzp1q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
 pub fn vuzp1_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
     unsafe { simd_shuffle!(a, b, [0, 2]) }
 }
@@ -28375,7 +24017,10 @@ pub fn vuzp1_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
 pub fn vuzp1q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
     unsafe { simd_shuffle!(a, b, [0, 2]) }
 }
@@ -28384,7 +24029,10 @@ pub fn vuzp1q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
 pub fn vuzp1q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
     unsafe { simd_shuffle!(a, b, [0, 2]) }
 }
@@ -28393,7 +24041,10 @@ pub fn vuzp1q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp1)
+)]
 pub fn vuzp1q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
     unsafe { simd_shuffle!(a, b, [0, 2, 4, 6]) }
 }
@@ -28402,7 +24053,10 @@ pub fn vuzp1q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp1)
+)]
 pub fn vuzp1_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     unsafe { simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]) }
 }
@@ -28411,7 +24065,10 @@ pub fn vuzp1_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp1)
+)]
 pub fn vuzp1q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     unsafe {
         simd_shuffle!(
@@ -28426,7 +24083,10 @@ pub fn vuzp1q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp1)
+)]
 pub fn vuzp1_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     unsafe { simd_shuffle!(a, b, [0, 2, 4, 6]) }
 }
@@ -28435,7 +24095,10 @@ pub fn vuzp1_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp1)
+)]
 pub fn vuzp1q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     unsafe { simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]) }
 }
@@ -28444,7 +24107,10 @@ pub fn vuzp1q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp1)
+)]
 pub fn vuzp1q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     unsafe { simd_shuffle!(a, b, [0, 2, 4, 6]) }
 }
@@ -28453,7 +24119,10 @@ pub fn vuzp1q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp1)
+)]
 pub fn vuzp1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     unsafe { simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]) }
 }
@@ -28462,7 +24131,10 @@ pub fn vuzp1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp1)
+)]
 pub fn vuzp1q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
     unsafe {
         simd_shuffle!(
@@ -28477,7 +24149,10 @@ pub fn vuzp1q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp1)
+)]
 pub fn vuzp1_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
     unsafe { simd_shuffle!(a, b, [0, 2, 4, 6]) }
 }
@@ -28486,7 +24161,10 @@ pub fn vuzp1_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp1)
+)]
 pub fn vuzp1q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
     unsafe { simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]) }
 }
@@ -28495,7 +24173,10 @@ pub fn vuzp1q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp1)
+)]
 pub fn vuzp1q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
     unsafe { simd_shuffle!(a, b, [0, 2, 4, 6]) }
 }
@@ -28504,7 +24185,10 @@ pub fn vuzp1q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp1)
+)]
 pub fn vuzp1_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
     unsafe { simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]) }
 }
@@ -28513,7 +24197,10 @@ pub fn vuzp1_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp1)
+)]
 pub fn vuzp1q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
     unsafe {
         simd_shuffle!(
@@ -28528,7 +24215,10 @@ pub fn vuzp1q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp1)
+)]
 pub fn vuzp1_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
     unsafe { simd_shuffle!(a, b, [0, 2, 4, 6]) }
 }
@@ -28537,7 +24227,10 @@ pub fn vuzp1_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp1)
+)]
 pub fn vuzp1q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
     unsafe { simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]) }
 }
@@ -28545,9 +24238,12 @@ pub fn vuzp1q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2_f16)"]
 #[inline]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp2)
+)]
 pub fn vuzp2_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
     unsafe { simd_shuffle!(a, b, [1, 3, 5, 7]) }
 }
@@ -28555,9 +24251,12 @@ pub fn vuzp2_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vuzp2q_f16)"]
 #[inline]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp2)
+)]
 pub fn vuzp2q_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
     unsafe { simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]) }
 }
@@ -28566,7 +24265,10 @@ pub fn vuzp2q_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip2)
+)]
 pub fn vuzp2_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
     unsafe { simd_shuffle!(a, b, [1, 3]) }
 }
@@ -28575,7 +24277,10 @@ pub fn vuzp2_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip2)
+)]
 pub fn vuzp2q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
     unsafe { simd_shuffle!(a, b, [1, 3]) }
 }
@@ -28584,7 +24289,10 @@ pub fn vuzp2q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip2)
+)]
 pub fn vuzp2_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     unsafe { simd_shuffle!(a, b, [1, 3]) }
 }
@@ -28593,7 +24301,10 @@ pub fn vuzp2_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip2)
+)]
 pub fn vuzp2q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
     unsafe { simd_shuffle!(a, b, [1, 3]) }
 }
@@ -28602,7 +24313,10 @@ pub fn vuzp2q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip2)
+)]
 pub fn vuzp2_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
     unsafe { simd_shuffle!(a, b, [1, 3]) }
 }
@@ -28611,7 +24325,10 @@ pub fn vuzp2_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip2)
+)]
 pub fn vuzp2q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
     unsafe { simd_shuffle!(a, b, [1, 3]) }
 }
@@ -28620,7 +24337,10 @@ pub fn vuzp2q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip2)
+)]
 pub fn vuzp2q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
     unsafe { simd_shuffle!(a, b, [1, 3]) }
 }
@@ -28629,7 +24349,10 @@ pub fn vuzp2q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp2)
+)]
 pub fn vuzp2q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
     unsafe { simd_shuffle!(a, b, [1, 3, 5, 7]) }
 }
@@ -28638,7 +24361,10 @@ pub fn vuzp2q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp2)
+)]
 pub fn vuzp2_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     unsafe { simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]) }
 }
@@ -28647,7 +24373,10 @@ pub fn vuzp2_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp2)
+)]
 pub fn vuzp2q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     unsafe {
         simd_shuffle!(
@@ -28662,7 +24391,10 @@ pub fn vuzp2q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp2)
+)]
 pub fn vuzp2_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     unsafe { simd_shuffle!(a, b, [1, 3, 5, 7]) }
 }
@@ -28671,7 +24403,10 @@ pub fn vuzp2_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp2)
+)]
 pub fn vuzp2q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     unsafe { simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]) }
 }
@@ -28680,7 +24415,10 @@ pub fn vuzp2q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp2)
+)]
 pub fn vuzp2q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     unsafe { simd_shuffle!(a, b, [1, 3, 5, 7]) }
 }
@@ -28689,7 +24427,10 @@ pub fn vuzp2q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp2)
+)]
 pub fn vuzp2_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     unsafe { simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]) }
 }
@@ -28698,7 +24439,10 @@ pub fn vuzp2_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp2)
+)]
 pub fn vuzp2q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
     unsafe {
         simd_shuffle!(
@@ -28713,7 +24457,10 @@ pub fn vuzp2q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp2)
+)]
 pub fn vuzp2_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
     unsafe { simd_shuffle!(a, b, [1, 3, 5, 7]) }
 }
@@ -28722,7 +24469,10 @@ pub fn vuzp2_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp2)
+)]
 pub fn vuzp2q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
     unsafe { simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]) }
 }
@@ -28731,7 +24481,10 @@ pub fn vuzp2q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp2)
+)]
 pub fn vuzp2q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
     unsafe { simd_shuffle!(a, b, [1, 3, 5, 7]) }
 }
@@ -28740,7 +24493,10 @@ pub fn vuzp2q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp2)
+)]
 pub fn vuzp2_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
     unsafe { simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]) }
 }
@@ -28749,7 +24505,10 @@ pub fn vuzp2_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp2)
+)]
 pub fn vuzp2q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
     unsafe {
         simd_shuffle!(
@@ -28764,7 +24523,10 @@ pub fn vuzp2q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp2)
+)]
 pub fn vuzp2_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
     unsafe { simd_shuffle!(a, b, [1, 3, 5, 7]) }
 }
@@ -28773,7 +24535,10 @@ pub fn vuzp2_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(uzp2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(uzp2)
+)]
 pub fn vuzp2q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
     unsafe { simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]) }
 }
@@ -28799,9 +24564,12 @@ pub fn vxarq_u64<const IMM6: i32>(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1_f16)"]
 #[inline]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
 pub fn vzip1_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
     unsafe { simd_shuffle!(a, b, [0, 4, 1, 5]) }
 }
@@ -28809,9 +24577,12 @@ pub fn vzip1_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip1q_f16)"]
 #[inline]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
 pub fn vzip1q_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
     unsafe { simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]) }
 }
@@ -28820,7 +24591,10 @@ pub fn vzip1q_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
 pub fn vzip1_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
     unsafe { simd_shuffle!(a, b, [0, 2]) }
 }
@@ -28829,7 +24603,10 @@ pub fn vzip1_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
 pub fn vzip1q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
     unsafe { simd_shuffle!(a, b, [0, 4, 1, 5]) }
 }
@@ -28838,7 +24615,10 @@ pub fn vzip1q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
 pub fn vzip1q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
     unsafe { simd_shuffle!(a, b, [0, 2]) }
 }
@@ -28847,7 +24627,10 @@ pub fn vzip1q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
 pub fn vzip1_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     unsafe { simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]) }
 }
@@ -28856,7 +24639,10 @@ pub fn vzip1_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
 pub fn vzip1q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     unsafe {
         simd_shuffle!(
@@ -28871,7 +24657,10 @@ pub fn vzip1q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
 pub fn vzip1_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     unsafe { simd_shuffle!(a, b, [0, 4, 1, 5]) }
 }
@@ -28880,7 +24669,10 @@ pub fn vzip1_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
 pub fn vzip1q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     unsafe { simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]) }
 }
@@ -28889,7 +24681,10 @@ pub fn vzip1q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
 pub fn vzip1_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     unsafe { simd_shuffle!(a, b, [0, 2]) }
 }
@@ -28898,7 +24693,10 @@ pub fn vzip1_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
 pub fn vzip1q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     unsafe { simd_shuffle!(a, b, [0, 4, 1, 5]) }
 }
@@ -28907,7 +24705,10 @@ pub fn vzip1q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
 pub fn vzip1q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
     unsafe { simd_shuffle!(a, b, [0, 2]) }
 }
@@ -28916,7 +24717,10 @@ pub fn vzip1q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
 pub fn vzip1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     unsafe { simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]) }
 }
@@ -28925,7 +24729,10 @@ pub fn vzip1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
 pub fn vzip1q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
     unsafe {
         simd_shuffle!(
@@ -28940,7 +24747,10 @@ pub fn vzip1q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
 pub fn vzip1_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
     unsafe { simd_shuffle!(a, b, [0, 4, 1, 5]) }
 }
@@ -28949,7 +24759,10 @@ pub fn vzip1_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
 pub fn vzip1q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
     unsafe { simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]) }
 }
@@ -28958,7 +24771,10 @@ pub fn vzip1q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
 pub fn vzip1_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
     unsafe { simd_shuffle!(a, b, [0, 2]) }
 }
@@ -28967,7 +24783,10 @@ pub fn vzip1_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
 pub fn vzip1q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
     unsafe { simd_shuffle!(a, b, [0, 4, 1, 5]) }
 }
@@ -28976,7 +24795,10 @@ pub fn vzip1q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
 pub fn vzip1q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
     unsafe { simd_shuffle!(a, b, [0, 2]) }
 }
@@ -28985,7 +24807,10 @@ pub fn vzip1q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
 pub fn vzip1_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
     unsafe { simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]) }
 }
@@ -28994,7 +24819,10 @@ pub fn vzip1_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
 pub fn vzip1q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
     unsafe {
         simd_shuffle!(
@@ -29009,7 +24837,10 @@ pub fn vzip1q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
 pub fn vzip1_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
     unsafe { simd_shuffle!(a, b, [0, 4, 1, 5]) }
 }
@@ -29018,7 +24849,10 @@ pub fn vzip1_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
 pub fn vzip1q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
     unsafe { simd_shuffle!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]) }
 }
@@ -29027,7 +24861,10 @@ pub fn vzip1q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip1))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip1)
+)]
 pub fn vzip1q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
     unsafe { simd_shuffle!(a, b, [0, 2]) }
 }
@@ -29035,9 +24872,12 @@ pub fn vzip1q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2_f16)"]
 #[inline]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip2)
+)]
 pub fn vzip2_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
     unsafe { simd_shuffle!(a, b, [2, 6, 3, 7]) }
 }
@@ -29045,9 +24885,12 @@ pub fn vzip2_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vzip2q_f16)"]
 #[inline]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
 #[cfg(not(target_arch = "arm64ec"))]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip2)
+)]
 pub fn vzip2q_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
     unsafe { simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]) }
 }
@@ -29056,7 +24899,10 @@ pub fn vzip2q_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip2)
+)]
 pub fn vzip2_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
     unsafe { simd_shuffle!(a, b, [1, 3]) }
 }
@@ -29065,7 +24911,10 @@ pub fn vzip2_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip2)
+)]
 pub fn vzip2q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
     unsafe { simd_shuffle!(a, b, [2, 6, 3, 7]) }
 }
@@ -29074,7 +24923,10 @@ pub fn vzip2q_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip2)
+)]
 pub fn vzip2q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
     unsafe { simd_shuffle!(a, b, [1, 3]) }
 }
@@ -29083,7 +24935,10 @@ pub fn vzip2q_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip2)
+)]
 pub fn vzip2_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     unsafe { simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]) }
 }
@@ -29092,7 +24947,10 @@ pub fn vzip2_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip2)
+)]
 pub fn vzip2q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     unsafe {
         simd_shuffle!(
@@ -29107,7 +24965,10 @@ pub fn vzip2q_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip2)
+)]
 pub fn vzip2_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     unsafe { simd_shuffle!(a, b, [2, 6, 3, 7]) }
 }
@@ -29116,7 +24977,10 @@ pub fn vzip2_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip2)
+)]
 pub fn vzip2q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     unsafe { simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]) }
 }
@@ -29125,7 +24989,10 @@ pub fn vzip2q_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip2)
+)]
 pub fn vzip2_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     unsafe { simd_shuffle!(a, b, [1, 3]) }
 }
@@ -29134,7 +25001,10 @@ pub fn vzip2_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip2)
+)]
 pub fn vzip2q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     unsafe { simd_shuffle!(a, b, [2, 6, 3, 7]) }
 }
@@ -29143,7 +25013,10 @@ pub fn vzip2q_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip2)
+)]
 pub fn vzip2q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
     unsafe { simd_shuffle!(a, b, [1, 3]) }
 }
@@ -29152,7 +25025,10 @@ pub fn vzip2q_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip2)
+)]
 pub fn vzip2_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     unsafe { simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]) }
 }
@@ -29161,7 +25037,10 @@ pub fn vzip2_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip2)
+)]
 pub fn vzip2q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
     unsafe {
         simd_shuffle!(
@@ -29176,7 +25055,10 @@ pub fn vzip2q_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip2)
+)]
 pub fn vzip2_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
     unsafe { simd_shuffle!(a, b, [2, 6, 3, 7]) }
 }
@@ -29185,7 +25067,10 @@ pub fn vzip2_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip2)
+)]
 pub fn vzip2q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
     unsafe { simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]) }
 }
@@ -29194,7 +25079,10 @@ pub fn vzip2q_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip2)
+)]
 pub fn vzip2_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
     unsafe { simd_shuffle!(a, b, [1, 3]) }
 }
@@ -29203,7 +25091,10 @@ pub fn vzip2_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip2)
+)]
 pub fn vzip2q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
     unsafe { simd_shuffle!(a, b, [2, 6, 3, 7]) }
 }
@@ -29212,7 +25103,10 @@ pub fn vzip2q_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip2)
+)]
 pub fn vzip2q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
     unsafe { simd_shuffle!(a, b, [1, 3]) }
 }
@@ -29221,7 +25115,10 @@ pub fn vzip2q_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip2)
+)]
 pub fn vzip2_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
     unsafe { simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]) }
 }
@@ -29230,7 +25127,10 @@ pub fn vzip2_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip2)
+)]
 pub fn vzip2q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
     unsafe {
         simd_shuffle!(
@@ -29245,7 +25145,10 @@ pub fn vzip2q_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip2)
+)]
 pub fn vzip2_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
     unsafe { simd_shuffle!(a, b, [2, 6, 3, 7]) }
 }
@@ -29254,7 +25157,10 @@ pub fn vzip2_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip2)
+)]
 pub fn vzip2q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
     unsafe { simd_shuffle!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]) }
 }
@@ -29263,7 +25169,10 @@ pub fn vzip2q_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
 #[inline]
 #[target_feature(enable = "neon")]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(zip2))]
+#[cfg_attr(
+    all(test, not(target_env = "msvc"), target_endian = "little"),
+    assert_instr(zip2)
+)]
 pub fn vzip2q_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t {
     unsafe { simd_shuffle!(a, b, [1, 3]) }
 }
diff --git a/crates/core_arch/src/aarch64/neon/mod.rs b/crates/core_arch/src/aarch64/neon/mod.rs
index b172b57f32..c66702814c 100644
--- a/crates/core_arch/src/aarch64/neon/mod.rs
+++ b/crates/core_arch/src/aarch64/neon/mod.rs
@@ -12,7 +12,6 @@ pub use self::generated::*;
 
 use crate::{
     core_arch::{arm_shared::*, simd::*},
-    hint::unreachable_unchecked,
     intrinsics::{simd::*, *},
     mem::transmute,
 };
@@ -70,116 +69,29 @@ pub struct float64x2x4_t(
     pub float64x2_t,
 );
 
-/// Duplicate vector element to vector or scalar
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop, N1 = 0, N2 = 0))]
-#[rustc_legacy_const_generics(1, 3)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vcopy_lane_s64<const N1: i32, const N2: i32>(_a: int64x1_t, b: int64x1_t) -> int64x1_t {
-    static_assert!(N1 == 0);
-    static_assert!(N2 == 0);
-    b
-}
-
-/// Duplicate vector element to vector or scalar
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop, N1 = 0, N2 = 0))]
-#[rustc_legacy_const_generics(1, 3)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vcopy_lane_u64<const N1: i32, const N2: i32>(_a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
-    static_assert!(N1 == 0);
-    static_assert!(N2 == 0);
-    b
-}
-
-/// Duplicate vector element to vector or scalar
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop, N1 = 0, N2 = 0))]
-#[rustc_legacy_const_generics(1, 3)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vcopy_lane_p64<const N1: i32, const N2: i32>(_a: poly64x1_t, b: poly64x1_t) -> poly64x1_t {
-    static_assert!(N1 == 0);
-    static_assert!(N2 == 0);
-    b
-}
+/// Helper for the 'shift right and insert' functions.
+macro_rules! shift_right_and_insert {
+    ($ty:ty, $width:literal, $N:expr, $a:expr, $b:expr) => {{
+        type V = Simd<$ty, $width>;
 
-/// Duplicate vector element to vector or scalar
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop, N1 = 0, N2 = 0))]
-#[rustc_legacy_const_generics(1, 3)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vcopy_lane_f64<const N1: i32, const N2: i32>(
-    _a: float64x1_t,
-    b: float64x1_t,
-) -> float64x1_t {
-    static_assert!(N1 == 0);
-    static_assert!(N2 == 0);
-    b
-}
+        if $N as u32 == <$ty>::BITS {
+            $a
+        } else {
+            let a: V = transmute($a);
+            let b: V = transmute($b);
 
-/// Duplicate vector element to vector or scalar
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop, LANE1 = 0, LANE2 = 1))]
-#[rustc_legacy_const_generics(1, 3)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vcopy_laneq_s64<const LANE1: i32, const LANE2: i32>(
-    _a: int64x1_t,
-    b: int64x2_t,
-) -> int64x1_t {
-    static_assert!(LANE1 == 0);
-    static_assert_uimm_bits!(LANE2, 1);
-    unsafe { transmute::<i64, _>(simd_extract!(b, LANE2 as u32)) }
-}
+            let mask = <$ty>::MAX >> $N;
+            let kept: V = simd_and(a, V::splat(!mask));
 
-/// Duplicate vector element to vector or scalar
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop, LANE1 = 0, LANE2 = 1))]
-#[rustc_legacy_const_generics(1, 3)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vcopy_laneq_u64<const LANE1: i32, const LANE2: i32>(
-    _a: uint64x1_t,
-    b: uint64x2_t,
-) -> uint64x1_t {
-    static_assert!(LANE1 == 0);
-    static_assert_uimm_bits!(LANE2, 1);
-    unsafe { transmute::<u64, _>(simd_extract!(b, LANE2 as u32)) }
-}
+            let shift_counts = V::splat($N as $ty);
+            let shifted = simd_shr(b, shift_counts);
 
-/// Duplicate vector element to vector or scalar
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop, LANE1 = 0, LANE2 = 1))]
-#[rustc_legacy_const_generics(1, 3)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vcopy_laneq_p64<const LANE1: i32, const LANE2: i32>(
-    _a: poly64x1_t,
-    b: poly64x2_t,
-) -> poly64x1_t {
-    static_assert!(LANE1 == 0);
-    static_assert_uimm_bits!(LANE2, 1);
-    unsafe { transmute::<u64, _>(simd_extract!(b, LANE2 as u32)) }
+            transmute(simd_or(kept, shifted))
+        }
+    }};
 }
 
-/// Duplicate vector element to vector or scalar
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop, LANE1 = 0, LANE2 = 1))]
-#[rustc_legacy_const_generics(1, 3)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vcopy_laneq_f64<const LANE1: i32, const LANE2: i32>(
-    _a: float64x1_t,
-    b: float64x2_t,
-) -> float64x1_t {
-    static_assert!(LANE1 == 0);
-    static_assert_uimm_bits!(LANE2, 1);
-    unsafe { transmute::<f64, _>(simd_extract!(b, LANE2 as u32)) }
-}
+pub(crate) use shift_right_and_insert;
 
 /// Load multiple single-element structures to one, two, three, or four registers
 #[inline]
@@ -419,42 +331,6 @@ pub fn vmovq_n_f64(value: f64) -> float64x2_t {
     vdupq_n_f64(value)
 }
 
-/// Duplicate vector element to vector or scalar
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vget_high_f64(a: float64x2_t) -> float64x1_t {
-    unsafe { float64x1_t([simd_extract!(a, 1)]) }
-}
-
-/// Duplicate vector element to vector or scalar
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(ext))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vget_high_p64(a: poly64x2_t) -> poly64x1_t {
-    unsafe { transmute(u64x1::new(simd_extract!(a, 1))) }
-}
-
-/// Duplicate vector element to vector or scalar
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vget_low_f64(a: float64x2_t) -> float64x1_t {
-    unsafe { float64x1_t([simd_extract!(a, 0)]) }
-}
-
-/// Duplicate vector element to vector or scalar
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(nop))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vget_low_p64(a: poly64x2_t) -> poly64x1_t {
-    unsafe { transmute(u64x1::new(simd_extract!(a, 0))) }
-}
-
 /// Duplicate vector element to vector or scalar
 #[inline]
 #[target_feature(enable = "neon")]
@@ -469,29 +345,6 @@ pub fn vget_lane_f64<const IMM5: i32>(v: float64x1_t) -> f64 {
     unsafe { simd_extract!(v, IMM5 as u32) }
 }
 
-/// Duplicate vector element to vector or scalar
-#[inline]
-#[target_feature(enable = "neon")]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop, IMM5 = 0)
-)]
-pub fn vgetq_lane_f64<const IMM5: i32>(v: float64x2_t) -> f64 {
-    static_assert_uimm_bits!(IMM5, 1);
-    unsafe { simd_extract!(v, IMM5 as u32) }
-}
-
-/// Vector combine
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(mov))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vcombine_f64(low: float64x1_t, high: float64x1_t) -> float64x2_t {
-    unsafe { simd_shuffle!(low, high, [0, 1]) }
-}
-
 /// Shift left
 #[inline]
 #[target_feature(enable = "neon")]
@@ -569,47 +422,46 @@ mod tests {
     use crate::core_arch::aarch64::test_support::*;
     use crate::core_arch::arm_shared::test_support::*;
     use crate::core_arch::{aarch64::neon::*, aarch64::*, simd::*};
-    use std::mem::transmute;
     use stdarch_test::simd_test;
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vadd_f64() {
-        let a = 1.;
-        let b = 8.;
-        let e = 9.;
-        let r: f64 = transmute(vadd_f64(transmute(a), transmute(b)));
+    fn test_vadd_f64() {
+        let a = f64x1::from_array([1.]);
+        let b = f64x1::from_array([8.]);
+        let e = f64x1::from_array([9.]);
+        let r = f64x1::from(vadd_f64(a.into(), b.into()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vaddq_f64() {
+    fn test_vaddq_f64() {
         let a = f64x2::new(1., 2.);
         let b = f64x2::new(8., 7.);
         let e = f64x2::new(9., 9.);
-        let r: f64x2 = transmute(vaddq_f64(transmute(a), transmute(b)));
+        let r = f64x2::from(vaddq_f64(a.into(), b.into()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vadd_s64() {
-        let a = 1_i64;
-        let b = 8_i64;
-        let e = 9_i64;
-        let r: i64 = transmute(vadd_s64(transmute(a), transmute(b)));
+    fn test_vadd_s64() {
+        let a = i64x1::from_array([1]);
+        let b = i64x1::from_array([8]);
+        let e = i64x1::from_array([9]);
+        let r = i64x1::from(vadd_s64(a.into(), b.into()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vadd_u64() {
-        let a = 1_u64;
-        let b = 8_u64;
-        let e = 9_u64;
-        let r: u64 = transmute(vadd_u64(transmute(a), transmute(b)));
+    fn test_vadd_u64() {
+        let a = u64x1::from_array([1]);
+        let b = u64x1::from_array([8]);
+        let e = u64x1::from_array([9]);
+        let r = u64x1::from(vadd_u64(a.into(), b.into()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vaddd_s64() {
+    fn test_vaddd_s64() {
         let a = 1_i64;
         let b = 8_i64;
         let e = 9_i64;
@@ -618,7 +470,7 @@ mod tests {
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vaddd_u64() {
+    fn test_vaddd_u64() {
         let a = 1_u64;
         let b = 8_u64;
         let e = 9_u64;
@@ -627,25 +479,25 @@ mod tests {
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vext_p64() {
-        let a: i64x1 = i64x1::new(0);
-        let b: i64x1 = i64x1::new(1);
-        let e: i64x1 = i64x1::new(0);
-        let r: i64x1 = transmute(vext_p64::<0>(transmute(a), transmute(b)));
+    fn test_vext_p64() {
+        let a = u64x1::new(0);
+        let b = u64x1::new(1);
+        let e = u64x1::new(0);
+        let r = u64x1::from(vext_p64::<0>(a.into(), b.into()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vext_f64() {
-        let a: f64x1 = f64x1::new(0.);
-        let b: f64x1 = f64x1::new(1.);
-        let e: f64x1 = f64x1::new(0.);
-        let r: f64x1 = transmute(vext_f64::<0>(transmute(a), transmute(b)));
+    fn test_vext_f64() {
+        let a = f64x1::new(0.);
+        let b = f64x1::new(1.);
+        let e = f64x1::new(0.);
+        let r = f64x1::from(vext_f64::<0>(a.into(), b.into()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vshld_n_s64() {
+    fn test_vshld_n_s64() {
         let a: i64 = 1;
         let e: i64 = 4;
         let r: i64 = vshld_n_s64::<2>(a);
@@ -653,7 +505,7 @@ mod tests {
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vshld_n_u64() {
+    fn test_vshld_n_u64() {
         let a: u64 = 1;
         let e: u64 = 4;
         let r: u64 = vshld_n_u64::<2>(a);
@@ -661,7 +513,7 @@ mod tests {
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vshrd_n_s64() {
+    fn test_vshrd_n_s64() {
         let a: i64 = 4;
         let e: i64 = 1;
         let r: i64 = vshrd_n_s64::<2>(a);
@@ -669,7 +521,7 @@ mod tests {
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vshrd_n_u64() {
+    fn test_vshrd_n_u64() {
         let a: u64 = 4;
         let e: u64 = 1;
         let r: u64 = vshrd_n_u64::<2>(a);
@@ -677,7 +529,7 @@ mod tests {
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vsrad_n_s64() {
+    fn test_vsrad_n_s64() {
         let a: i64 = 1;
         let b: i64 = 4;
         let e: i64 = 2;
@@ -686,7 +538,7 @@ mod tests {
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vsrad_n_u64() {
+    fn test_vsrad_n_u64() {
         let a: u64 = 1;
         let b: u64 = 4;
         let e: u64 = 2;
@@ -695,298 +547,551 @@ mod tests {
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdup_n_f64() {
+    fn test_vdup_n_f64() {
         let a: f64 = 3.3;
         let e = f64x1::new(3.3);
-        let r: f64x1 = transmute(vdup_n_f64(a));
+        let r = f64x1::from(vdup_n_f64(a));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdup_n_p64() {
+    fn test_vdup_n_p64() {
         let a: u64 = 3;
         let e = u64x1::new(3);
-        let r: u64x1 = transmute(vdup_n_p64(a));
+        let r = u64x1::from(vdup_n_p64(a));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdupq_n_f64() {
+    fn test_vdupq_n_f64() {
         let a: f64 = 3.3;
         let e = f64x2::new(3.3, 3.3);
-        let r: f64x2 = transmute(vdupq_n_f64(a));
+        let r = f64x2::from(vdupq_n_f64(a));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdupq_n_p64() {
+    fn test_vdupq_n_p64() {
         let a: u64 = 3;
         let e = u64x2::new(3, 3);
-        let r: u64x2 = transmute(vdupq_n_p64(a));
+        let r = u64x2::from(vdupq_n_p64(a));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmov_n_p64() {
+    fn test_vmov_n_p64() {
         let a: u64 = 3;
         let e = u64x1::new(3);
-        let r: u64x1 = transmute(vmov_n_p64(a));
+        let r = u64x1::from(vmov_n_p64(a));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmov_n_f64() {
+    fn test_vmov_n_f64() {
         let a: f64 = 3.3;
         let e = f64x1::new(3.3);
-        let r: f64x1 = transmute(vmov_n_f64(a));
+        let r = f64x1::from(vmov_n_f64(a));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmovq_n_p64() {
+    fn test_vmovq_n_p64() {
         let a: u64 = 3;
         let e = u64x2::new(3, 3);
-        let r: u64x2 = transmute(vmovq_n_p64(a));
+        let r = u64x2::from(vmovq_n_p64(a));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmovq_n_f64() {
+    fn test_vmovq_n_f64() {
         let a: f64 = 3.3;
         let e = f64x2::new(3.3, 3.3);
-        let r: f64x2 = transmute(vmovq_n_f64(a));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vget_high_f64() {
-        let a = f64x2::new(1.0, 2.0);
-        let e = f64x1::new(2.0);
-        let r: f64x1 = transmute(vget_high_f64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vget_high_p64() {
-        let a = u64x2::new(1, 2);
-        let e = u64x1::new(2);
-        let r: u64x1 = transmute(vget_high_p64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vget_low_f64() {
-        let a = f64x2::new(1.0, 2.0);
-        let e = f64x1::new(1.0);
-        let r: f64x1 = transmute(vget_low_f64(transmute(a)));
+        let r = f64x2::from(vmovq_n_f64(a));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vget_low_p64() {
-        let a = u64x2::new(1, 2);
-        let e = u64x1::new(1);
-        let r: u64x1 = transmute(vget_low_p64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vget_lane_f64() {
+    fn test_vget_lane_f64() {
         let v = f64x1::new(1.0);
-        let r = vget_lane_f64::<0>(transmute(v));
+        let r = vget_lane_f64::<0>(v.into());
         assert_eq!(r, 1.0);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vgetq_lane_f64() {
-        let v = f64x2::new(0.0, 1.0);
-        let r = vgetq_lane_f64::<1>(transmute(v));
-        assert_eq!(r, 1.0);
-        let r = vgetq_lane_f64::<0>(transmute(v));
-        assert_eq!(r, 0.0);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcopy_lane_s64() {
-        let a: i64x1 = i64x1::new(1);
-        let b: i64x1 = i64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF);
-        let e: i64x1 = i64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF);
-        let r: i64x1 = transmute(vcopy_lane_s64::<0, 0>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcopy_lane_u64() {
-        let a: u64x1 = u64x1::new(1);
-        let b: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x1 = transmute(vcopy_lane_u64::<0, 0>(transmute(a), transmute(b)));
+    fn test_vcopy_lane_s64() {
+        let a = i64x1::new(1);
+        let b = i64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF);
+        let e = i64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF);
+        let r = i64x1::from(vcopy_lane_s64::<0, 0>(a.into(), b.into()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcopy_lane_p64() {
-        let a: i64x1 = i64x1::new(1);
-        let b: i64x1 = i64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF);
-        let e: i64x1 = i64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF);
-        let r: i64x1 = transmute(vcopy_lane_p64::<0, 0>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcopy_lane_f64() {
-        let a: f64 = 1.;
-        let b: f64 = 0.;
-        let e: f64 = 0.;
-        let r: f64 = transmute(vcopy_lane_f64::<0, 0>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcopy_laneq_s64() {
-        let a: i64x1 = i64x1::new(1);
-        let b: i64x2 = i64x2::new(0, 0x7F_FF_FF_FF_FF_FF_FF_FF);
-        let e: i64x1 = i64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF);
-        let r: i64x1 = transmute(vcopy_laneq_s64::<0, 1>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcopy_laneq_u64() {
-        let a: u64x1 = u64x1::new(1);
-        let b: u64x2 = u64x2::new(0, 0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
-        let r: u64x1 = transmute(vcopy_laneq_u64::<0, 1>(transmute(a), transmute(b)));
+    fn test_vcopy_lane_u64() {
+        let a = u64x1::new(1);
+        let b = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let e = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
+        let r = u64x1::from(vcopy_lane_u64::<0, 0>(a.into(), b.into()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcopy_laneq_p64() {
-        let a: i64x1 = i64x1::new(1);
-        let b: i64x2 = i64x2::new(0, 0x7F_FF_FF_FF_FF_FF_FF_FF);
-        let e: i64x1 = i64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF);
-        let r: i64x1 = transmute(vcopy_laneq_p64::<0, 1>(transmute(a), transmute(b)));
+    fn test_vcopy_lane_p64() {
+        let a = u64x1::new(1);
+        let b = u64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF);
+        let e = u64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF);
+        let r = u64x1::from(vcopy_lane_p64::<0, 0>(a.into(), b.into()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcopy_laneq_f64() {
-        let a: f64 = 1.;
-        let b: f64x2 = f64x2::new(0., 0.5);
-        let e: f64 = 0.5;
-        let r: f64 = transmute(vcopy_laneq_f64::<0, 1>(transmute(a), transmute(b)));
+    fn test_vcopy_lane_f64() {
+        let a = f64x1::from_array([1.]);
+        let b = f64x1::from_array([0.]);
+        let e = f64x1::from_array([0.]);
+        let r = f64x1::from(vcopy_lane_f64::<0, 0>(a.into(), b.into()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vbsl_f64() {
+    fn test_vbsl_f64() {
         let a = u64x1::new(0x8000000000000000);
         let b = f64x1::new(-1.23f64);
         let c = f64x1::new(2.34f64);
         let e = f64x1::new(-2.34f64);
-        let r: f64x1 = transmute(vbsl_f64(transmute(a), transmute(b), transmute(c)));
+        let r = f64x1::from(vbsl_f64(a.into(), b.into(), c.into()));
         assert_eq!(r, e);
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vbsl_p64() {
+    fn test_vbsl_p64() {
         let a = u64x1::new(1);
         let b = u64x1::new(u64::MAX);
         let c = u64x1::new(u64::MIN);
         let e = u64x1::new(1);
-        let r: u64x1 = transmute(vbsl_p64(transmute(a), transmute(b), transmute(c)));
+        let r = u64x1::from(vbsl_p64(a.into(), b.into(), c.into()));
         assert_eq!(r, e);
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vbslq_f64() {
+    fn test_vbslq_f64() {
         let a = u64x2::new(1, 0x8000000000000000);
         let b = f64x2::new(f64::MAX, -1.23f64);
         let c = f64x2::new(f64::MIN, 2.34f64);
         let e = f64x2::new(f64::MIN, -2.34f64);
-        let r: f64x2 = transmute(vbslq_f64(transmute(a), transmute(b), transmute(c)));
+        let r = f64x2::from(vbslq_f64(a.into(), b.into(), c.into()));
         assert_eq!(r, e);
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vbslq_p64() {
+    fn test_vbslq_p64() {
         let a = u64x2::new(u64::MAX, 1);
         let b = u64x2::new(u64::MAX, u64::MAX);
         let c = u64x2::new(u64::MIN, u64::MIN);
         let e = u64x2::new(u64::MAX, 1);
-        let r: u64x2 = transmute(vbslq_p64(transmute(a), transmute(b), transmute(c)));
+        let r = u64x2::from(vbslq_p64(a.into(), b.into(), c.into()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld1_f64() {
+    fn test_vld1_f64() {
         let a: [f64; 2] = [0., 1.];
         let e = f64x1::new(1.);
-        let r: f64x1 = transmute(vld1_f64(a[1..].as_ptr()));
+        let r = unsafe { f64x1::from(vld1_f64(a[1..].as_ptr())) };
         assert_eq!(r, e)
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld1q_f64() {
+    fn test_vld1q_f64() {
         let a: [f64; 3] = [0., 1., 2.];
         let e = f64x2::new(1., 2.);
-        let r: f64x2 = transmute(vld1q_f64(a[1..].as_ptr()));
+        let r = unsafe { f64x2::from(vld1q_f64(a[1..].as_ptr())) };
         assert_eq!(r, e)
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld1_dup_f64() {
+    fn test_vld1_dup_f64() {
         let a: [f64; 2] = [1., 42.];
         let e = f64x1::new(42.);
-        let r: f64x1 = transmute(vld1_dup_f64(a[1..].as_ptr()));
+        let r = unsafe { f64x1::from(vld1_dup_f64(a[1..].as_ptr())) };
         assert_eq!(r, e)
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld1q_dup_f64() {
+    fn test_vld1q_dup_f64() {
         let elem: f64 = 42.;
         let e = f64x2::new(42., 42.);
-        let r: f64x2 = transmute(vld1q_dup_f64(&elem));
+        let r = unsafe { f64x2::from(vld1q_dup_f64(&elem)) };
         assert_eq!(r, e)
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld1_lane_f64() {
+    fn test_vld1_lane_f64() {
         let a = f64x1::new(0.);
         let elem: f64 = 42.;
         let e = f64x1::new(42.);
-        let r: f64x1 = transmute(vld1_lane_f64::<0>(&elem, transmute(a)));
+        let r = unsafe { f64x1::from(vld1_lane_f64::<0>(&elem, a.into())) };
         assert_eq!(r, e)
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld1q_lane_f64() {
+    fn test_vld1q_lane_f64() {
         let a = f64x2::new(0., 1.);
         let elem: f64 = 42.;
         let e = f64x2::new(0., 42.);
-        let r: f64x2 = transmute(vld1q_lane_f64::<1>(&elem, transmute(a)));
+        let r = unsafe { f64x2::from(vld1q_lane_f64::<1>(&elem, a.into())) };
         assert_eq!(r, e)
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vst1_f64() {
+    fn test_vst1_f64() {
         let mut vals = [0_f64; 2];
         let a = f64x1::new(1.);
 
-        vst1_f64(vals[1..].as_mut_ptr(), transmute(a));
+        unsafe {
+            vst1_f64(vals[1..].as_mut_ptr(), a.into());
+        }
 
         assert_eq!(vals[0], 0.);
         assert_eq!(vals[1], 1.);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vst1q_f64() {
+    fn test_vst1q_f64() {
         let mut vals = [0_f64; 3];
         let a = f64x2::new(1., 2.);
 
-        vst1q_f64(vals[1..].as_mut_ptr(), transmute(a));
+        unsafe {
+            vst1q_f64(vals[1..].as_mut_ptr(), a.into());
+        }
 
         assert_eq!(vals[0], 0.);
         assert_eq!(vals[1], 1.);
         assert_eq!(vals[2], 2.);
     }
+
+    macro_rules! wide_store_load_roundtrip {
+        ($elem_ty:ty, $len:expr, $vec_ty:ty, $store:expr, $load:expr) => {
+            let vals: [$elem_ty; $len] = crate::array::from_fn(|i| i as $elem_ty);
+            let a: $vec_ty = transmute(vals);
+            let mut tmp = core::mem::MaybeUninit::<[$elem_ty; $len]>::uninit();
+            $store(tmp.as_mut_ptr().cast(), a);
+
+            // With Miri this will check that all elements were initialized.
+            let tmp = tmp.assume_init();
+
+            let r: $vec_ty = $load(tmp.as_ptr().cast());
+            let out: [$elem_ty; $len] = transmute(r);
+            assert_eq!(out, vals);
+        };
+    }
+
+    macro_rules! wide_store_load_roundtrip_fp16 {
+        ($( $name:ident $args:tt);* $(;)?) => {
+            $(
+                #[cfg_attr(miri, ignore)] // uses unsupported vendor intrinsics
+                #[simd_test(enable = "neon,fp16")]
+                #[cfg(not(target_arch = "arm64ec"))]
+                unsafe fn $name() {
+                    wide_store_load_roundtrip! $args;
+                }
+            )*
+        };
+    }
+
+    wide_store_load_roundtrip_fp16! {
+        test_vld1_f16_x2(f16, 8, float16x4x2_t, vst1_f16_x2, vld1_f16_x2);
+        test_vld1_f16_x3(f16, 12, float16x4x3_t, vst1_f16_x3, vld1_f16_x3);
+        test_vld1_f16_x4(f16, 16, float16x4x4_t, vst1_f16_x4, vld1_f16_x4);
+
+        test_vld1q_f16_x2(f16, 16, float16x8x2_t, vst1q_f16_x2, vld1q_f16_x2);
+        test_vld1q_f16_x3(f16, 24, float16x8x3_t, vst1q_f16_x3, vld1q_f16_x3);
+        test_vld1q_f16_x4(f16, 32, float16x8x4_t, vst1q_f16_x4, vld1q_f16_x4);
+
+        test_vld2_f16(f16, 8, float16x4x2_t, vst2_f16, vld2_f16);
+        test_vld3_f16(f16, 12, float16x4x3_t, vst3_f16, vld3_f16);
+        test_vld4_f16(f16, 16, float16x4x4_t, vst4_f16, vld4_f16);
+
+        test_vld2q_f16(f16, 16, float16x8x2_t, vst2q_f16, vld2q_f16);
+        test_vld3q_f16(f16, 24, float16x8x3_t, vst3q_f16, vld3q_f16);
+        test_vld4q_f16(f16, 32, float16x8x4_t, vst4q_f16, vld4q_f16);
+    }
+
+    macro_rules! wide_store_load_roundtrip_aes {
+        ($( $name:ident $args:tt);* $(;)?) => {
+            $(
+                #[simd_test(enable = "neon,aes")]
+                unsafe fn $name() {
+                    wide_store_load_roundtrip! $args;
+                }
+            )*
+        };
+    }
+
+    wide_store_load_roundtrip_aes! {
+        test_vld1_p64_x2(p64, 2, poly64x1x2_t, vst1_p64_x2, vld1_p64_x2);
+        test_vld1_p64_x3(p64, 3, poly64x1x3_t, vst1_p64_x3, vld1_p64_x3);
+        test_vld1_p64_x4(p64, 4, poly64x1x4_t, vst1_p64_x4, vld1_p64_x4);
+
+        test_vld1q_p64_x2(p64, 4, poly64x2x2_t, vst1q_p64_x2, vld1q_p64_x2);
+        test_vld1q_p64_x3(p64, 6, poly64x2x3_t, vst1q_p64_x3, vld1q_p64_x3);
+        test_vld1q_p64_x4(p64, 8, poly64x2x4_t, vst1q_p64_x4, vld1q_p64_x4);
+    }
+
+    macro_rules! wide_store_load_roundtrip_neon {
+        ($( $name:ident $args:tt);* $(;)?) => {
+            $(
+                #[simd_test(enable = "neon")]
+                unsafe fn $name() {
+                    wide_store_load_roundtrip! $args;
+                }
+            )*
+        };
+    }
+
+    wide_store_load_roundtrip_neon! {
+        test_vld1_f32_x2(f32, 4, float32x2x2_t, vst1_f32_x2, vld1_f32_x2);
+        test_vld1_f32_x3(f32, 6, float32x2x3_t, vst1_f32_x3, vld1_f32_x3);
+        test_vld1_f32_x4(f32, 8, float32x2x4_t, vst1_f32_x4, vld1_f32_x4);
+
+        test_vld1q_f32_x2(f32, 8, float32x4x2_t, vst1q_f32_x2, vld1q_f32_x2);
+        test_vld1q_f32_x3(f32, 12, float32x4x3_t, vst1q_f32_x3, vld1q_f32_x3);
+        test_vld1q_f32_x4(f32, 16, float32x4x4_t, vst1q_f32_x4, vld1q_f32_x4);
+
+        test_vld1_f64_x2(f64, 2, float64x1x2_t, vst1_f64_x2, vld1_f64_x2);
+        test_vld1_f64_x3(f64, 3, float64x1x3_t, vst1_f64_x3, vld1_f64_x3);
+        test_vld1_f64_x4(f64, 4, float64x1x4_t, vst1_f64_x4, vld1_f64_x4);
+
+        test_vld1q_f64_x2(f64, 4, float64x2x2_t, vst1q_f64_x2, vld1q_f64_x2);
+        test_vld1q_f64_x3(f64, 6, float64x2x3_t, vst1q_f64_x3, vld1q_f64_x3);
+        test_vld1q_f64_x4(f64, 8, float64x2x4_t, vst1q_f64_x4, vld1q_f64_x4);
+
+        test_vld1_s8_x2(i8, 16, int8x8x2_t, vst1_s8_x2, vld1_s8_x2);
+        test_vld1_s8_x3(i8, 24, int8x8x3_t, vst1_s8_x3, vld1_s8_x3);
+        test_vld1_s8_x4(i8, 32, int8x8x4_t, vst1_s8_x4, vld1_s8_x4);
+
+        test_vld1q_s8_x2(i8, 32, int8x16x2_t, vst1q_s8_x2, vld1q_s8_x2);
+        test_vld1q_s8_x3(i8, 48, int8x16x3_t, vst1q_s8_x3, vld1q_s8_x3);
+        test_vld1q_s8_x4(i8, 64, int8x16x4_t, vst1q_s8_x4, vld1q_s8_x4);
+
+        test_vld1_s16_x2(i16, 8, int16x4x2_t, vst1_s16_x2, vld1_s16_x2);
+        test_vld1_s16_x3(i16, 12, int16x4x3_t, vst1_s16_x3, vld1_s16_x3);
+        test_vld1_s16_x4(i16, 16, int16x4x4_t, vst1_s16_x4, vld1_s16_x4);
+
+        test_vld1q_s16_x2(i16, 16, int16x8x2_t, vst1q_s16_x2, vld1q_s16_x2);
+        test_vld1q_s16_x3(i16, 24, int16x8x3_t, vst1q_s16_x3, vld1q_s16_x3);
+        test_vld1q_s16_x4(i16, 32, int16x8x4_t, vst1q_s16_x4, vld1q_s16_x4);
+
+        test_vld1_s32_x2(i32, 4, int32x2x2_t, vst1_s32_x2, vld1_s32_x2);
+        test_vld1_s32_x3(i32, 6, int32x2x3_t, vst1_s32_x3, vld1_s32_x3);
+        test_vld1_s32_x4(i32, 8, int32x2x4_t, vst1_s32_x4, vld1_s32_x4);
+
+        test_vld1q_s32_x2(i32, 8, int32x4x2_t, vst1q_s32_x2, vld1q_s32_x2);
+        test_vld1q_s32_x3(i32, 12, int32x4x3_t, vst1q_s32_x3, vld1q_s32_x3);
+        test_vld1q_s32_x4(i32, 16, int32x4x4_t, vst1q_s32_x4, vld1q_s32_x4);
+
+        test_vld1_s64_x2(i64, 2, int64x1x2_t, vst1_s64_x2, vld1_s64_x2);
+        test_vld1_s64_x3(i64, 3, int64x1x3_t, vst1_s64_x3, vld1_s64_x3);
+        test_vld1_s64_x4(i64, 4, int64x1x4_t, vst1_s64_x4, vld1_s64_x4);
+
+        test_vld1q_s64_x2(i64, 4, int64x2x2_t, vst1q_s64_x2, vld1q_s64_x2);
+        test_vld1q_s64_x3(i64, 6, int64x2x3_t, vst1q_s64_x3, vld1q_s64_x3);
+        test_vld1q_s64_x4(i64, 8, int64x2x4_t, vst1q_s64_x4, vld1q_s64_x4);
+
+        test_vld1_u8_x2(u8, 16, uint8x8x2_t, vst1_u8_x2, vld1_u8_x2);
+        test_vld1_u8_x3(u8, 24, uint8x8x3_t, vst1_u8_x3, vld1_u8_x3);
+        test_vld1_u8_x4(u8, 32, uint8x8x4_t, vst1_u8_x4, vld1_u8_x4);
+
+        test_vld1q_u8_x2(u8, 32, uint8x16x2_t, vst1q_u8_x2, vld1q_u8_x2);
+        test_vld1q_u8_x3(u8, 48, uint8x16x3_t, vst1q_u8_x3, vld1q_u8_x3);
+        test_vld1q_u8_x4(u8, 64, uint8x16x4_t, vst1q_u8_x4, vld1q_u8_x4);
+
+        test_vld1_u16_x2(u16, 8, uint16x4x2_t, vst1_u16_x2, vld1_u16_x2);
+        test_vld1_u16_x3(u16, 12, uint16x4x3_t, vst1_u16_x3, vld1_u16_x3);
+        test_vld1_u16_x4(u16, 16, uint16x4x4_t, vst1_u16_x4, vld1_u16_x4);
+
+        test_vld1q_u16_x2(u16, 16, uint16x8x2_t, vst1q_u16_x2, vld1q_u16_x2);
+        test_vld1q_u16_x3(u16, 24, uint16x8x3_t, vst1q_u16_x3, vld1q_u16_x3);
+        test_vld1q_u16_x4(u16, 32, uint16x8x4_t, vst1q_u16_x4, vld1q_u16_x4);
+
+        test_vld1_u32_x2(u32, 4, uint32x2x2_t, vst1_u32_x2, vld1_u32_x2);
+        test_vld1_u32_x3(u32, 6, uint32x2x3_t, vst1_u32_x3, vld1_u32_x3);
+        test_vld1_u32_x4(u32, 8, uint32x2x4_t, vst1_u32_x4, vld1_u32_x4);
+
+        test_vld1q_u32_x2(u32, 8, uint32x4x2_t, vst1q_u32_x2, vld1q_u32_x2);
+        test_vld1q_u32_x3(u32, 12, uint32x4x3_t, vst1q_u32_x3, vld1q_u32_x3);
+        test_vld1q_u32_x4(u32, 16, uint32x4x4_t, vst1q_u32_x4, vld1q_u32_x4);
+
+        test_vld1_u64_x2(u64, 2, uint64x1x2_t, vst1_u64_x2, vld1_u64_x2);
+        test_vld1_u64_x3(u64, 3, uint64x1x3_t, vst1_u64_x3, vld1_u64_x3);
+        test_vld1_u64_x4(u64, 4, uint64x1x4_t, vst1_u64_x4, vld1_u64_x4);
+
+        test_vld1q_u64_x2(u64, 4, uint64x2x2_t, vst1q_u64_x2, vld1q_u64_x2);
+        test_vld1q_u64_x3(u64, 6, uint64x2x3_t, vst1q_u64_x3, vld1q_u64_x3);
+        test_vld1q_u64_x4(u64, 8, uint64x2x4_t, vst1q_u64_x4, vld1q_u64_x4);
+
+        test_vld1_p8_x2(p8, 16, poly8x8x2_t, vst1_p8_x2, vld1_p8_x2);
+        test_vld1_p8_x3(p8, 24, poly8x8x3_t, vst1_p8_x3, vld1_p8_x3);
+        test_vld1_p8_x4(p8, 32, poly8x8x4_t, vst1_p8_x4, vld1_p8_x4);
+
+        test_vld1q_p8_x2(p8, 32, poly8x16x2_t, vst1q_p8_x2, vld1q_p8_x2);
+        test_vld1q_p8_x3(p8, 48, poly8x16x3_t, vst1q_p8_x3, vld1q_p8_x3);
+        test_vld1q_p8_x4(p8, 64, poly8x16x4_t, vst1q_p8_x4, vld1q_p8_x4);
+
+        test_vld1_p16_x2(p16, 8, poly16x4x2_t, vst1_p16_x2, vld1_p16_x2);
+        test_vld1_p16_x3(p16, 12, poly16x4x3_t, vst1_p16_x3, vld1_p16_x3);
+        test_vld1_p16_x4(p16, 16, poly16x4x4_t, vst1_p16_x4, vld1_p16_x4);
+
+        test_vld1q_p16_x2(p16, 16, poly16x8x2_t, vst1q_p16_x2, vld1q_p16_x2);
+        test_vld1q_p16_x3(p16, 24, poly16x8x3_t, vst1q_p16_x3, vld1q_p16_x3);
+        test_vld1q_p16_x4(p16, 32, poly16x8x4_t, vst1q_p16_x4, vld1q_p16_x4);
+    }
+
+    wide_store_load_roundtrip_neon! {
+        test_vld2_f32(f32, 4, float32x2x2_t, vst2_f32, vld2_f32);
+        test_vld3_f32(f32, 6, float32x2x3_t, vst3_f32, vld3_f32);
+        test_vld4_f32(f32, 8, float32x2x4_t, vst4_f32, vld4_f32);
+
+        test_vld2q_f32(f32, 8, float32x4x2_t, vst2q_f32, vld2q_f32);
+        test_vld3q_f32(f32, 12, float32x4x3_t, vst3q_f32, vld3q_f32);
+        test_vld4q_f32(f32, 16, float32x4x4_t, vst4q_f32, vld4q_f32);
+
+        test_vld2_f64(f64, 2, float64x1x2_t, vst2_f64, vld2_f64);
+        test_vld3_f64(f64, 3, float64x1x3_t, vst3_f64, vld3_f64);
+        test_vld4_f64(f64, 4, float64x1x4_t, vst4_f64, vld4_f64);
+
+        test_vld2q_f64(f64, 4, float64x2x2_t, vst2q_f64, vld2q_f64);
+        test_vld3q_f64(f64, 6, float64x2x3_t, vst3q_f64, vld3q_f64);
+        test_vld4q_f64(f64, 8, float64x2x4_t, vst4q_f64, vld4q_f64);
+
+        test_vld2_s8(i8, 16, int8x8x2_t, vst2_s8, vld2_s8);
+        test_vld3_s8(i8, 24, int8x8x3_t, vst3_s8, vld3_s8);
+        test_vld4_s8(i8, 32, int8x8x4_t, vst4_s8, vld4_s8);
+
+        test_vld2q_s8(i8, 32, int8x16x2_t, vst2q_s8, vld2q_s8);
+        test_vld3q_s8(i8, 48, int8x16x3_t, vst3q_s8, vld3q_s8);
+        test_vld4q_s8(i8, 64, int8x16x4_t, vst4q_s8, vld4q_s8);
+
+        test_vld2_s16(i16, 8, int16x4x2_t, vst2_s16, vld2_s16);
+        test_vld3_s16(i16, 12, int16x4x3_t, vst3_s16, vld3_s16);
+        test_vld4_s16(i16, 16, int16x4x4_t, vst4_s16, vld4_s16);
+
+        test_vld2q_s16(i16, 16, int16x8x2_t, vst2q_s16, vld2q_s16);
+        test_vld3q_s16(i16, 24, int16x8x3_t, vst3q_s16, vld3q_s16);
+        test_vld4q_s16(i16, 32, int16x8x4_t, vst4q_s16, vld4q_s16);
+
+        test_vld2_s32(i32, 4, int32x2x2_t, vst2_s32, vld2_s32);
+        test_vld3_s32(i32, 6, int32x2x3_t, vst3_s32, vld3_s32);
+        test_vld4_s32(i32, 8, int32x2x4_t, vst4_s32, vld4_s32);
+
+        test_vld2q_s32(i32, 8, int32x4x2_t, vst2q_s32, vld2q_s32);
+        test_vld3q_s32(i32, 12, int32x4x3_t, vst3q_s32, vld3q_s32);
+        test_vld4q_s32(i32, 16, int32x4x4_t, vst4q_s32, vld4q_s32);
+
+        test_vld2_s64(i64, 2, int64x1x2_t, vst2_s64, vld2_s64);
+        test_vld3_s64(i64, 3, int64x1x3_t, vst3_s64, vld3_s64);
+        test_vld4_s64(i64, 4, int64x1x4_t, vst4_s64, vld4_s64);
+
+        test_vld2q_s64(i64, 4, int64x2x2_t, vst2q_s64, vld2q_s64);
+        test_vld3q_s64(i64, 6, int64x2x3_t, vst3q_s64, vld3q_s64);
+        test_vld4q_s64(i64, 8, int64x2x4_t, vst4q_s64, vld4q_s64);
+
+        test_vld2_u8(u8, 16, uint8x8x2_t, vst2_u8, vld2_u8);
+        test_vld3_u8(u8, 24, uint8x8x3_t, vst3_u8, vld3_u8);
+        test_vld4_u8(u8, 32, uint8x8x4_t, vst4_u8, vld4_u8);
+
+        test_vld2q_u8(u8, 32, uint8x16x2_t, vst2q_u8, vld2q_u8);
+        test_vld3q_u8(u8, 48, uint8x16x3_t, vst3q_u8, vld3q_u8);
+        test_vld4q_u8(u8, 64, uint8x16x4_t, vst4q_u8, vld4q_u8);
+
+        test_vld2_u16(u16, 8, uint16x4x2_t, vst2_u16, vld2_u16);
+        test_vld3_u16(u16, 12, uint16x4x3_t, vst3_u16, vld3_u16);
+        test_vld4_u16(u16, 16, uint16x4x4_t, vst4_u16, vld4_u16);
+
+        test_vld2q_u16(u16, 16, uint16x8x2_t, vst2q_u16, vld2q_u16);
+        test_vld3q_u16(u16, 24, uint16x8x3_t, vst3q_u16, vld3q_u16);
+        test_vld4q_u16(u16, 32, uint16x8x4_t, vst4q_u16, vld4q_u16);
+
+        test_vld2_u32(u32, 4, uint32x2x2_t, vst2_u32, vld2_u32);
+        test_vld3_u32(u32, 6, uint32x2x3_t, vst3_u32, vld3_u32);
+        test_vld4_u32(u32, 8, uint32x2x4_t, vst4_u32, vld4_u32);
+
+        test_vld2q_u32(u32, 8, uint32x4x2_t, vst2q_u32, vld2q_u32);
+        test_vld3q_u32(u32, 12, uint32x4x3_t, vst3q_u32, vld3q_u32);
+        test_vld4q_u32(u32, 16, uint32x4x4_t, vst4q_u32, vld4q_u32);
+
+        test_vld2_u64(u64, 2, uint64x1x2_t, vst2_u64, vld2_u64);
+        test_vld3_u64(u64, 3, uint64x1x3_t, vst3_u64, vld3_u64);
+        test_vld4_u64(u64, 4, uint64x1x4_t, vst4_u64, vld4_u64);
+
+        test_vld2q_u64(u64, 4, uint64x2x2_t, vst2q_u64, vld2q_u64);
+        test_vld3q_u64(u64, 6, uint64x2x3_t, vst3q_u64, vld3q_u64);
+        test_vld4q_u64(u64, 8, uint64x2x4_t, vst4q_u64, vld4q_u64);
+
+        test_vld2_p8(p8, 16, poly8x8x2_t, vst2_p8, vld2_p8);
+        test_vld3_p8(p8, 24, poly8x8x3_t, vst3_p8, vld3_p8);
+        test_vld4_p8(p8, 32, poly8x8x4_t, vst4_p8, vld4_p8);
+
+        test_vld2q_p8(p8, 32, poly8x16x2_t, vst2q_p8, vld2q_p8);
+        test_vld3q_p8(p8, 48, poly8x16x3_t, vst3q_p8, vld3q_p8);
+        test_vld4q_p8(p8, 64, poly8x16x4_t, vst4q_p8, vld4q_p8);
+
+        test_vld2_p16(p16, 8, poly16x4x2_t, vst2_p16, vld2_p16);
+        test_vld3_p16(p16, 12, poly16x4x3_t, vst3_p16, vld3_p16);
+        test_vld4_p16(p16, 16, poly16x4x4_t, vst4_p16, vld4_p16);
+
+        test_vld2q_p16(p16, 16, poly16x8x2_t, vst2q_p16, vld2q_p16);
+        test_vld3q_p16(p16, 24, poly16x8x3_t, vst3q_p16, vld3q_p16);
+        test_vld4q_p16(p16, 32, poly16x8x4_t, vst4q_p16, vld4q_p16);
+    }
+
+    macro_rules! lane_wide_store_load_roundtrip {
+        ($elem_ty:ty, $len:expr, $idx:expr, $vec_ty:ty, $store:ident, $load:ident) => {
+            let vals: [$elem_ty; $len] = crate::array::from_fn(|i| i as $elem_ty);
+            let a: $vec_ty = transmute(vals);
+            let mut tmp = [0 as $elem_ty; 4];
+            $store::<$idx>(tmp.as_mut_ptr().cast(), a);
+            let r: $vec_ty = $load::<$idx>(tmp.as_ptr().cast(), a);
+            let out: [$elem_ty; $len] = transmute(r);
+            assert_eq!(out, vals);
+        };
+    }
+
+    macro_rules! lane_wide_store_load_roundtrip_neon {
+        ($( $name:ident $args:tt);* $(;)?) => {
+            $(
+                #[cfg_attr(miri, ignore)] // uses unsupported vendor intrinsics
+                #[simd_test(enable = "neon")]
+                unsafe fn $name() {
+                    lane_wide_store_load_roundtrip! $args;
+                }
+            )*
+        };
+    }
+
+    lane_wide_store_load_roundtrip_neon! {
+        test_vld2q_lane_s8(i8, 32, 15, int8x16x2_t, vst2q_lane_s8, vld2q_lane_s8);
+        test_vld3q_lane_s8(i8, 48, 15, int8x16x3_t, vst3q_lane_s8, vld3q_lane_s8);
+        test_vld4q_lane_s8(i8, 64, 15, int8x16x4_t, vst4q_lane_s8, vld4q_lane_s8);
+
+        test_vld2q_lane_u8(u8, 32, 15, uint8x16x2_t, vst2q_lane_u8, vld2q_lane_u8);
+        test_vld3q_lane_u8(u8, 48, 15, uint8x16x3_t, vst3q_lane_u8, vld3q_lane_u8);
+        test_vld4q_lane_u8(u8, 64, 15, uint8x16x4_t, vst4q_lane_u8, vld4q_lane_u8);
+
+        test_vld2_lane_s64(i64, 2, 0, int64x1x2_t, vst2_lane_s64, vld2_lane_s64);
+        test_vld3_lane_s64(i64, 3, 0, int64x1x3_t, vst3_lane_s64, vld3_lane_s64);
+        test_vld4_lane_s64(i64, 4, 0, int64x1x4_t, vst4_lane_s64, vld4_lane_s64);
+        test_vld2q_lane_s64(i64, 4, 1, int64x2x2_t, vst2q_lane_s64, vld2q_lane_s64);
+        test_vld3q_lane_s64(i64, 6, 1, int64x2x3_t, vst3q_lane_s64, vld3q_lane_s64);
+        test_vld4q_lane_s64(i64, 8, 1, int64x2x4_t, vst4q_lane_s64, vld4q_lane_s64);
+
+        test_vld2_lane_u64(u64, 2, 0, uint64x1x2_t, vst2_lane_u64, vld2_lane_u64);
+        test_vld3_lane_u64(u64, 3, 0, uint64x1x3_t, vst3_lane_u64, vld3_lane_u64);
+        test_vld4_lane_u64(u64, 4, 0, uint64x1x4_t, vst4_lane_u64, vld4_lane_u64);
+        test_vld2q_lane_u64(u64, 4, 1, uint64x2x2_t, vst2q_lane_u64, vld2q_lane_u64);
+        test_vld3q_lane_u64(u64, 6, 1, uint64x2x3_t, vst3q_lane_u64, vld3q_lane_u64);
+        test_vld4q_lane_u64(u64, 8, 1, uint64x2x4_t, vst4q_lane_u64, vld4q_lane_u64);
+    }
 }
 
 #[cfg(test)]
diff --git a/crates/core_arch/src/aarch64/rand.rs b/crates/core_arch/src/aarch64/rand.rs
new file mode 100644
index 0000000000..3f52cf2ce8
--- /dev/null
+++ b/crates/core_arch/src/aarch64/rand.rs
@@ -0,0 +1,48 @@
+//! AArch64 Random Number intrinsics
+//!
+//! [ACLE documentation](https://arm-software.github.io/acle/main/acle.html#random-number-generation-intrinsics)
+
+#[cfg(test)]
+use stdarch_test::assert_instr;
+
+unsafe extern "unadjusted" {
+    #[link_name = "llvm.aarch64.rndr"]
+    fn rndr_() -> Tuple;
+
+    #[link_name = "llvm.aarch64.rndrrs"]
+    fn rndrrs_() -> Tuple;
+}
+
+#[repr(C)]
+struct Tuple {
+    bits: u64,
+    status: bool,
+}
+
+/// Stores a 64-bit random number into the object pointed to by the argument and returns
+/// zero. If the implementation could not generate a random number within a reasonable
+/// period of time the object pointed to by the input is set to zero and a non-zero value
+/// is returned.
+#[inline]
+#[target_feature(enable = "rand")]
+#[cfg_attr(test, assert_instr(mrs))]
+#[unstable(feature = "stdarch_aarch64_rand", issue = "153514")]
+pub unsafe fn __rndr(value: *mut u64) -> i32 {
+    let Tuple { bits, status } = rndr_();
+    unsafe { *value = bits };
+    status as i32
+}
+
+/// Reseeds the random number generator. After that stores a 64-bit random number into
+/// the object pointed to by the argument and returns zero. If the implementation could
+/// not generate a random number within a reasonable period of time the object pointed
+/// to by the input is set to zero and a non-zero value is returned.
+#[inline]
+#[target_feature(enable = "rand")]
+#[cfg_attr(test, assert_instr(mrs))]
+#[unstable(feature = "stdarch_aarch64_rand", issue = "153514")]
+pub unsafe fn __rndrrs(value: *mut u64) -> i32 {
+    let Tuple { bits, status } = rndrrs_();
+    unsafe { *value = bits };
+    status as i32
+}
diff --git a/crates/core_arch/src/aarch64/sve/generated.rs b/crates/core_arch/src/aarch64/sve/generated.rs
new file mode 100644
index 0000000000..6c6a2476a4
--- /dev/null
+++ b/crates/core_arch/src/aarch64/sve/generated.rs
@@ -0,0 +1,44957 @@
+// This code is automatically generated. DO NOT MODIFY.
+//
+// Instead, modify `crates/stdarch-gen-arm/spec/` and run the following command to re-generate this file:
+//
+// ```
+// cargo run --bin=stdarch-gen-arm -- crates/stdarch-gen-arm/spec
+// ```
+#![allow(improper_ctypes)]
+
+#[cfg(test)]
+use stdarch_test::assert_instr;
+
+use super::*;
+use crate::core_arch::arch::aarch64::*;
+
+#[doc = "Absolute difference"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabd[_f32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fabd))]
+pub fn svabd_f32_m(pg: svbool_t, op1: svfloat32_t, op2: svfloat32_t) -> svfloat32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.fabd.nxv4f32")]
+        fn _svabd_f32_m(pg: svbool4_t, op1: svfloat32_t, op2: svfloat32_t) -> svfloat32_t;
+    }
+    unsafe { _svabd_f32_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Absolute difference"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabd[_n_f32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fabd))]
+pub fn svabd_n_f32_m(pg: svbool_t, op1: svfloat32_t, op2: f32) -> svfloat32_t {
+    svabd_f32_m(pg, op1, svdup_n_f32(op2))
+}
+#[doc = "Absolute difference"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabd[_f32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fabd))]
+pub fn svabd_f32_x(pg: svbool_t, op1: svfloat32_t, op2: svfloat32_t) -> svfloat32_t {
+    svabd_f32_m(pg, op1, op2)
+}
+#[doc = "Absolute difference"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabd[_n_f32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fabd))]
+pub fn svabd_n_f32_x(pg: svbool_t, op1: svfloat32_t, op2: f32) -> svfloat32_t {
+    svabd_f32_x(pg, op1, svdup_n_f32(op2))
+}
+#[doc = "Absolute difference"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabd[_f32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fabd))]
+pub fn svabd_f32_z(pg: svbool_t, op1: svfloat32_t, op2: svfloat32_t) -> svfloat32_t {
+    svabd_f32_m(pg, svsel_f32(pg, op1, svdup_n_f32(0.0)), op2)
+}
+#[doc = "Absolute difference"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabd[_n_f32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fabd))]
+pub fn svabd_n_f32_z(pg: svbool_t, op1: svfloat32_t, op2: f32) -> svfloat32_t {
+    svabd_f32_z(pg, op1, svdup_n_f32(op2))
+}
+#[doc = "Absolute difference"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabd[_f64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fabd))]
+pub fn svabd_f64_m(pg: svbool_t, op1: svfloat64_t, op2: svfloat64_t) -> svfloat64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.fabd.nxv2f64")]
+        fn _svabd_f64_m(pg: svbool2_t, op1: svfloat64_t, op2: svfloat64_t) -> svfloat64_t;
+    }
+    unsafe { _svabd_f64_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Absolute difference"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabd[_n_f64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fabd))]
+pub fn svabd_n_f64_m(pg: svbool_t, op1: svfloat64_t, op2: f64) -> svfloat64_t {
+    svabd_f64_m(pg, op1, svdup_n_f64(op2))
+}
+#[doc = "Absolute difference"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabd[_f64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fabd))]
+pub fn svabd_f64_x(pg: svbool_t, op1: svfloat64_t, op2: svfloat64_t) -> svfloat64_t {
+    svabd_f64_m(pg, op1, op2)
+}
+#[doc = "Absolute difference"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabd[_n_f64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fabd))]
+pub fn svabd_n_f64_x(pg: svbool_t, op1: svfloat64_t, op2: f64) -> svfloat64_t {
+    svabd_f64_x(pg, op1, svdup_n_f64(op2))
+}
+#[doc = "Absolute difference"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabd[_f64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fabd))]
+pub fn svabd_f64_z(pg: svbool_t, op1: svfloat64_t, op2: svfloat64_t) -> svfloat64_t {
+    svabd_f64_m(pg, svsel_f64(pg, op1, svdup_n_f64(0.0)), op2)
+}
+#[doc = "Absolute difference"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabd[_n_f64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fabd))]
+pub fn svabd_n_f64_z(pg: svbool_t, op1: svfloat64_t, op2: f64) -> svfloat64_t {
+    svabd_f64_z(pg, op1, svdup_n_f64(op2))
+}
+#[doc = "Absolute difference"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabd[_s8]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sabd))]
+pub fn svabd_s8_m(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sabd.nxv16i8")]
+        fn _svabd_s8_m(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t;
+    }
+    unsafe { _svabd_s8_m(pg, op1, op2) }
+}
+#[doc = "Absolute difference"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabd[_n_s8]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sabd))]
+pub fn svabd_n_s8_m(pg: svbool_t, op1: svint8_t, op2: i8) -> svint8_t {
+    svabd_s8_m(pg, op1, svdup_n_s8(op2))
+}
+#[doc = "Absolute difference"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabd[_s8]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sabd))]
+pub fn svabd_s8_x(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t {
+    svabd_s8_m(pg, op1, op2)
+}
+#[doc = "Absolute difference"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabd[_n_s8]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sabd))]
+pub fn svabd_n_s8_x(pg: svbool_t, op1: svint8_t, op2: i8) -> svint8_t {
+    svabd_s8_x(pg, op1, svdup_n_s8(op2))
+}
+#[doc = "Absolute difference"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabd[_s8]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sabd))]
+pub fn svabd_s8_z(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t {
+    svabd_s8_m(pg, svsel_s8(pg, op1, svdup_n_s8(0)), op2)
+}
+#[doc = "Absolute difference"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabd[_n_s8]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sabd))]
+pub fn svabd_n_s8_z(pg: svbool_t, op1: svint8_t, op2: i8) -> svint8_t {
+    svabd_s8_z(pg, op1, svdup_n_s8(op2))
+}
+#[doc = "Absolute difference"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabd[_s16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sabd))]
+pub fn svabd_s16_m(pg: svbool_t, op1: svint16_t, op2: svint16_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sabd.nxv8i16")]
+        fn _svabd_s16_m(pg: svbool8_t, op1: svint16_t, op2: svint16_t) -> svint16_t;
+    }
+    unsafe { _svabd_s16_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Absolute difference"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabd[_n_s16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sabd))]
+pub fn svabd_n_s16_m(pg: svbool_t, op1: svint16_t, op2: i16) -> svint16_t {
+    svabd_s16_m(pg, op1, svdup_n_s16(op2))
+}
+#[doc = "Absolute difference"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabd[_s16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sabd))]
+pub fn svabd_s16_x(pg: svbool_t, op1: svint16_t, op2: svint16_t) -> svint16_t {
+    svabd_s16_m(pg, op1, op2)
+}
+#[doc = "Absolute difference"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabd[_n_s16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sabd))]
+pub fn svabd_n_s16_x(pg: svbool_t, op1: svint16_t, op2: i16) -> svint16_t {
+    svabd_s16_x(pg, op1, svdup_n_s16(op2))
+}
+#[doc = "Absolute difference"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabd[_s16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sabd))]
+pub fn svabd_s16_z(pg: svbool_t, op1: svint16_t, op2: svint16_t) -> svint16_t {
+    svabd_s16_m(pg, svsel_s16(pg, op1, svdup_n_s16(0)), op2)
+}
+#[doc = "Absolute difference"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabd[_n_s16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sabd))]
+pub fn svabd_n_s16_z(pg: svbool_t, op1: svint16_t, op2: i16) -> svint16_t {
+    svabd_s16_z(pg, op1, svdup_n_s16(op2))
+}
+#[doc = "Absolute difference"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabd[_s32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sabd))]
+pub fn svabd_s32_m(pg: svbool_t, op1: svint32_t, op2: svint32_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sabd.nxv4i32")]
+        fn _svabd_s32_m(pg: svbool4_t, op1: svint32_t, op2: svint32_t) -> svint32_t;
+    }
+    unsafe { _svabd_s32_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Absolute difference"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabd[_n_s32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sabd))]
+pub fn svabd_n_s32_m(pg: svbool_t, op1: svint32_t, op2: i32) -> svint32_t {
+    svabd_s32_m(pg, op1, svdup_n_s32(op2))
+}
+#[doc = "Absolute difference"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabd[_s32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sabd))]
+pub fn svabd_s32_x(pg: svbool_t, op1: svint32_t, op2: svint32_t) -> svint32_t {
+    svabd_s32_m(pg, op1, op2)
+}
+#[doc = "Absolute difference"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabd[_n_s32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sabd))]
+pub fn svabd_n_s32_x(pg: svbool_t, op1: svint32_t, op2: i32) -> svint32_t {
+    svabd_s32_x(pg, op1, svdup_n_s32(op2))
+}
+#[doc = "Absolute difference"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabd[_s32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sabd))]
+pub fn svabd_s32_z(pg: svbool_t, op1: svint32_t, op2: svint32_t) -> svint32_t {
+    svabd_s32_m(pg, svsel_s32(pg, op1, svdup_n_s32(0)), op2)
+}
+#[doc = "Absolute difference"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabd[_n_s32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sabd))]
+pub fn svabd_n_s32_z(pg: svbool_t, op1: svint32_t, op2: i32) -> svint32_t {
+    svabd_s32_z(pg, op1, svdup_n_s32(op2))
+}
+#[doc = "Absolute difference"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabd[_s64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sabd))]
+pub fn svabd_s64_m(pg: svbool_t, op1: svint64_t, op2: svint64_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sabd.nxv2i64")]
+        fn _svabd_s64_m(pg: svbool2_t, op1: svint64_t, op2: svint64_t) -> svint64_t;
+    }
+    unsafe { _svabd_s64_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Absolute difference"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabd[_n_s64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sabd))]
+pub fn svabd_n_s64_m(pg: svbool_t, op1: svint64_t, op2: i64) -> svint64_t {
+    svabd_s64_m(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Absolute difference"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabd[_s64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sabd))]
+pub fn svabd_s64_x(pg: svbool_t, op1: svint64_t, op2: svint64_t) -> svint64_t {
+    svabd_s64_m(pg, op1, op2)
+}
+#[doc = "Absolute difference"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabd[_n_s64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sabd))]
+pub fn svabd_n_s64_x(pg: svbool_t, op1: svint64_t, op2: i64) -> svint64_t {
+    svabd_s64_x(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Absolute difference"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabd[_s64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sabd))]
+pub fn svabd_s64_z(pg: svbool_t, op1: svint64_t, op2: svint64_t) -> svint64_t {
+    svabd_s64_m(pg, svsel_s64(pg, op1, svdup_n_s64(0)), op2)
+}
+#[doc = "Absolute difference"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabd[_n_s64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sabd))]
+pub fn svabd_n_s64_z(pg: svbool_t, op1: svint64_t, op2: i64) -> svint64_t {
+    svabd_s64_z(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Absolute difference"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabd[_u8]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uabd))]
+pub fn svabd_u8_m(pg: svbool_t, op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uabd.nxv16i8")]
+        fn _svabd_u8_m(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t;
+    }
+    unsafe { _svabd_u8_m(pg, op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Absolute difference"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabd[_n_u8]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uabd))]
+pub fn svabd_n_u8_m(pg: svbool_t, op1: svuint8_t, op2: u8) -> svuint8_t {
+    svabd_u8_m(pg, op1, svdup_n_u8(op2))
+}
+#[doc = "Absolute difference"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabd[_u8]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uabd))]
+pub fn svabd_u8_x(pg: svbool_t, op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    svabd_u8_m(pg, op1, op2)
+}
+#[doc = "Absolute difference"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabd[_n_u8]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uabd))]
+pub fn svabd_n_u8_x(pg: svbool_t, op1: svuint8_t, op2: u8) -> svuint8_t {
+    svabd_u8_x(pg, op1, svdup_n_u8(op2))
+}
+#[doc = "Absolute difference"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabd[_u8]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uabd))]
+pub fn svabd_u8_z(pg: svbool_t, op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    svabd_u8_m(pg, svsel_u8(pg, op1, svdup_n_u8(0)), op2)
+}
+#[doc = "Absolute difference"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabd[_n_u8]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uabd))]
+pub fn svabd_n_u8_z(pg: svbool_t, op1: svuint8_t, op2: u8) -> svuint8_t {
+    svabd_u8_z(pg, op1, svdup_n_u8(op2))
+}
+#[doc = "Absolute difference"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabd[_u16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uabd))]
+pub fn svabd_u16_m(pg: svbool_t, op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uabd.nxv8i16")]
+        fn _svabd_u16_m(pg: svbool8_t, op1: svint16_t, op2: svint16_t) -> svint16_t;
+    }
+    unsafe { _svabd_u16_m(pg.sve_into(), op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Absolute difference"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabd[_n_u16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uabd))]
+pub fn svabd_n_u16_m(pg: svbool_t, op1: svuint16_t, op2: u16) -> svuint16_t {
+    svabd_u16_m(pg, op1, svdup_n_u16(op2))
+}
+#[doc = "Absolute difference"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabd[_u16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uabd))]
+pub fn svabd_u16_x(pg: svbool_t, op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    svabd_u16_m(pg, op1, op2)
+}
+#[doc = "Absolute difference"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabd[_n_u16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uabd))]
+pub fn svabd_n_u16_x(pg: svbool_t, op1: svuint16_t, op2: u16) -> svuint16_t {
+    svabd_u16_x(pg, op1, svdup_n_u16(op2))
+}
+#[doc = "Absolute difference"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabd[_u16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uabd))]
+pub fn svabd_u16_z(pg: svbool_t, op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    svabd_u16_m(pg, svsel_u16(pg, op1, svdup_n_u16(0)), op2)
+}
+#[doc = "Absolute difference"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabd[_n_u16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uabd))]
+pub fn svabd_n_u16_z(pg: svbool_t, op1: svuint16_t, op2: u16) -> svuint16_t {
+    svabd_u16_z(pg, op1, svdup_n_u16(op2))
+}
+#[doc = "Absolute difference"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabd[_u32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uabd))]
+pub fn svabd_u32_m(pg: svbool_t, op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uabd.nxv4i32")]
+        fn _svabd_u32_m(pg: svbool4_t, op1: svint32_t, op2: svint32_t) -> svint32_t;
+    }
+    unsafe { _svabd_u32_m(pg.sve_into(), op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Absolute difference"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabd[_n_u32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uabd))]
+pub fn svabd_n_u32_m(pg: svbool_t, op1: svuint32_t, op2: u32) -> svuint32_t {
+    svabd_u32_m(pg, op1, svdup_n_u32(op2))
+}
+#[doc = "Absolute difference"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabd[_u32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uabd))]
+pub fn svabd_u32_x(pg: svbool_t, op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    svabd_u32_m(pg, op1, op2)
+}
+#[doc = "Absolute difference"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabd[_n_u32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uabd))]
+pub fn svabd_n_u32_x(pg: svbool_t, op1: svuint32_t, op2: u32) -> svuint32_t {
+    svabd_u32_x(pg, op1, svdup_n_u32(op2))
+}
+#[doc = "Absolute difference"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabd[_u32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uabd))]
+pub fn svabd_u32_z(pg: svbool_t, op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    svabd_u32_m(pg, svsel_u32(pg, op1, svdup_n_u32(0)), op2)
+}
+#[doc = "Absolute difference"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabd[_n_u32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uabd))]
+pub fn svabd_n_u32_z(pg: svbool_t, op1: svuint32_t, op2: u32) -> svuint32_t {
+    svabd_u32_z(pg, op1, svdup_n_u32(op2))
+}
+#[doc = "Absolute difference"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabd[_u64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uabd))]
+pub fn svabd_u64_m(pg: svbool_t, op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uabd.nxv2i64")]
+        fn _svabd_u64_m(pg: svbool2_t, op1: svint64_t, op2: svint64_t) -> svint64_t;
+    }
+    unsafe { _svabd_u64_m(pg.sve_into(), op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Absolute difference"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabd[_n_u64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uabd))]
+pub fn svabd_n_u64_m(pg: svbool_t, op1: svuint64_t, op2: u64) -> svuint64_t {
+    svabd_u64_m(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Absolute difference"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabd[_u64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uabd))]
+pub fn svabd_u64_x(pg: svbool_t, op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    svabd_u64_m(pg, op1, op2)
+}
+#[doc = "Absolute difference"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabd[_n_u64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uabd))]
+pub fn svabd_n_u64_x(pg: svbool_t, op1: svuint64_t, op2: u64) -> svuint64_t {
+    svabd_u64_x(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Absolute difference"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabd[_u64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uabd))]
+pub fn svabd_u64_z(pg: svbool_t, op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    svabd_u64_m(pg, svsel_u64(pg, op1, svdup_n_u64(0)), op2)
+}
+#[doc = "Absolute difference"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabd[_n_u64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uabd))]
+pub fn svabd_n_u64_z(pg: svbool_t, op1: svuint64_t, op2: u64) -> svuint64_t {
+    svabd_u64_z(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Absolute value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabs[_f32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fabs))]
+pub fn svabs_f32_m(inactive: svfloat32_t, pg: svbool_t, op: svfloat32_t) -> svfloat32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.fabs.nxv4f32")]
+        fn _svabs_f32_m(inactive: svfloat32_t, pg: svbool4_t, op: svfloat32_t) -> svfloat32_t;
+    }
+    unsafe { _svabs_f32_m(inactive, pg.sve_into(), op) }
+}
+#[doc = "Absolute value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabs[_f32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fabs))]
+pub fn svabs_f32_x(pg: svbool_t, op: svfloat32_t) -> svfloat32_t {
+    svabs_f32_m(op, pg, op)
+}
+#[doc = "Absolute value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabs[_f32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fabs))]
+pub fn svabs_f32_z(pg: svbool_t, op: svfloat32_t) -> svfloat32_t {
+    svabs_f32_m(svdup_n_f32(0.0), pg, op)
+}
+#[doc = "Absolute value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabs[_f64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fabs))]
+pub fn svabs_f64_m(inactive: svfloat64_t, pg: svbool_t, op: svfloat64_t) -> svfloat64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.fabs.nxv2f64")]
+        fn _svabs_f64_m(inactive: svfloat64_t, pg: svbool2_t, op: svfloat64_t) -> svfloat64_t;
+    }
+    unsafe { _svabs_f64_m(inactive, pg.sve_into(), op) }
+}
+#[doc = "Absolute value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabs[_f64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fabs))]
+pub fn svabs_f64_x(pg: svbool_t, op: svfloat64_t) -> svfloat64_t {
+    svabs_f64_m(op, pg, op)
+}
+#[doc = "Absolute value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabs[_f64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fabs))]
+pub fn svabs_f64_z(pg: svbool_t, op: svfloat64_t) -> svfloat64_t {
+    svabs_f64_m(svdup_n_f64(0.0), pg, op)
+}
+#[doc = "Absolute value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabs[_s8]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(abs))]
+pub fn svabs_s8_m(inactive: svint8_t, pg: svbool_t, op: svint8_t) -> svint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.abs.nxv16i8")]
+        fn _svabs_s8_m(inactive: svint8_t, pg: svbool_t, op: svint8_t) -> svint8_t;
+    }
+    unsafe { _svabs_s8_m(inactive, pg, op) }
+}
+#[doc = "Absolute value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabs[_s8]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(abs))]
+pub fn svabs_s8_x(pg: svbool_t, op: svint8_t) -> svint8_t {
+    svabs_s8_m(op, pg, op)
+}
+#[doc = "Absolute value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabs[_s8]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(abs))]
+pub fn svabs_s8_z(pg: svbool_t, op: svint8_t) -> svint8_t {
+    svabs_s8_m(svdup_n_s8(0), pg, op)
+}
+#[doc = "Absolute value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabs[_s16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(abs))]
+pub fn svabs_s16_m(inactive: svint16_t, pg: svbool_t, op: svint16_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.abs.nxv8i16")]
+        fn _svabs_s16_m(inactive: svint16_t, pg: svbool8_t, op: svint16_t) -> svint16_t;
+    }
+    unsafe { _svabs_s16_m(inactive, pg.sve_into(), op) }
+}
+#[doc = "Absolute value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabs[_s16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(abs))]
+pub fn svabs_s16_x(pg: svbool_t, op: svint16_t) -> svint16_t {
+    svabs_s16_m(op, pg, op)
+}
+#[doc = "Absolute value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabs[_s16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(abs))]
+pub fn svabs_s16_z(pg: svbool_t, op: svint16_t) -> svint16_t {
+    svabs_s16_m(svdup_n_s16(0), pg, op)
+}
+#[doc = "Absolute value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabs[_s32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(abs))]
+pub fn svabs_s32_m(inactive: svint32_t, pg: svbool_t, op: svint32_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.abs.nxv4i32")]
+        fn _svabs_s32_m(inactive: svint32_t, pg: svbool4_t, op: svint32_t) -> svint32_t;
+    }
+    unsafe { _svabs_s32_m(inactive, pg.sve_into(), op) }
+}
+#[doc = "Absolute value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabs[_s32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(abs))]
+pub fn svabs_s32_x(pg: svbool_t, op: svint32_t) -> svint32_t {
+    svabs_s32_m(op, pg, op)
+}
+#[doc = "Absolute value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabs[_s32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(abs))]
+pub fn svabs_s32_z(pg: svbool_t, op: svint32_t) -> svint32_t {
+    svabs_s32_m(svdup_n_s32(0), pg, op)
+}
+#[doc = "Absolute value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabs[_s64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(abs))]
+pub fn svabs_s64_m(inactive: svint64_t, pg: svbool_t, op: svint64_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.abs.nxv2i64")]
+        fn _svabs_s64_m(inactive: svint64_t, pg: svbool2_t, op: svint64_t) -> svint64_t;
+    }
+    unsafe { _svabs_s64_m(inactive, pg.sve_into(), op) }
+}
+#[doc = "Absolute value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabs[_s64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(abs))]
+pub fn svabs_s64_x(pg: svbool_t, op: svint64_t) -> svint64_t {
+    svabs_s64_m(op, pg, op)
+}
+#[doc = "Absolute value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabs[_s64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(abs))]
+pub fn svabs_s64_z(pg: svbool_t, op: svint64_t) -> svint64_t {
+    svabs_s64_m(svdup_n_s64(0), pg, op)
+}
+#[doc = "Absolute compare greater than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svacge[_f32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(facge))]
+pub fn svacge_f32(pg: svbool_t, op1: svfloat32_t, op2: svfloat32_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.facge.nxv4f32")]
+        fn _svacge_f32(pg: svbool4_t, op1: svfloat32_t, op2: svfloat32_t) -> svbool4_t;
+    }
+    unsafe { _svacge_f32(pg.sve_into(), op1, op2).sve_into() }
+}
+#[doc = "Absolute compare greater than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svacge[_n_f32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(facge))]
+pub fn svacge_n_f32(pg: svbool_t, op1: svfloat32_t, op2: f32) -> svbool_t {
+    svacge_f32(pg, op1, svdup_n_f32(op2))
+}
+#[doc = "Absolute compare greater than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svacge[_f64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(facge))]
+pub fn svacge_f64(pg: svbool_t, op1: svfloat64_t, op2: svfloat64_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.facge.nxv2f64")]
+        fn _svacge_f64(pg: svbool2_t, op1: svfloat64_t, op2: svfloat64_t) -> svbool2_t;
+    }
+    unsafe { _svacge_f64(pg.sve_into(), op1, op2).sve_into() }
+}
+#[doc = "Absolute compare greater than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svacge[_n_f64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(facge))]
+pub fn svacge_n_f64(pg: svbool_t, op1: svfloat64_t, op2: f64) -> svbool_t {
+    svacge_f64(pg, op1, svdup_n_f64(op2))
+}
+#[doc = "Absolute compare greater than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svacgt[_f32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(facgt))]
+pub fn svacgt_f32(pg: svbool_t, op1: svfloat32_t, op2: svfloat32_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.facgt.nxv4f32")]
+        fn _svacgt_f32(pg: svbool4_t, op1: svfloat32_t, op2: svfloat32_t) -> svbool4_t;
+    }
+    unsafe { _svacgt_f32(pg.sve_into(), op1, op2).sve_into() }
+}
+#[doc = "Absolute compare greater than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svacgt[_n_f32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(facgt))]
+pub fn svacgt_n_f32(pg: svbool_t, op1: svfloat32_t, op2: f32) -> svbool_t {
+    svacgt_f32(pg, op1, svdup_n_f32(op2))
+}
+#[doc = "Absolute compare greater than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svacgt[_f64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(facgt))]
+pub fn svacgt_f64(pg: svbool_t, op1: svfloat64_t, op2: svfloat64_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.facgt.nxv2f64")]
+        fn _svacgt_f64(pg: svbool2_t, op1: svfloat64_t, op2: svfloat64_t) -> svbool2_t;
+    }
+    unsafe { _svacgt_f64(pg.sve_into(), op1, op2).sve_into() }
+}
+#[doc = "Absolute compare greater than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svacgt[_n_f64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(facgt))]
+pub fn svacgt_n_f64(pg: svbool_t, op1: svfloat64_t, op2: f64) -> svbool_t {
+    svacgt_f64(pg, op1, svdup_n_f64(op2))
+}
+#[doc = "Absolute compare less than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svacle[_f32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(facge))]
+pub fn svacle_f32(pg: svbool_t, op1: svfloat32_t, op2: svfloat32_t) -> svbool_t {
+    svacge_f32(pg, op2, op1)
+}
+#[doc = "Absolute compare less than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svacle[_n_f32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(facge))]
+pub fn svacle_n_f32(pg: svbool_t, op1: svfloat32_t, op2: f32) -> svbool_t {
+    svacle_f32(pg, op1, svdup_n_f32(op2))
+}
+#[doc = "Absolute compare less than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svacle[_f64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(facge))]
+pub fn svacle_f64(pg: svbool_t, op1: svfloat64_t, op2: svfloat64_t) -> svbool_t {
+    svacge_f64(pg, op2, op1)
+}
+#[doc = "Absolute compare less than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svacle[_n_f64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(facge))]
+pub fn svacle_n_f64(pg: svbool_t, op1: svfloat64_t, op2: f64) -> svbool_t {
+    svacle_f64(pg, op1, svdup_n_f64(op2))
+}
+#[doc = "Absolute compare less than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaclt[_f32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(facgt))]
+pub fn svaclt_f32(pg: svbool_t, op1: svfloat32_t, op2: svfloat32_t) -> svbool_t {
+    svacgt_f32(pg, op2, op1)
+}
+#[doc = "Absolute compare less than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaclt[_n_f32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(facgt))]
+pub fn svaclt_n_f32(pg: svbool_t, op1: svfloat32_t, op2: f32) -> svbool_t {
+    svaclt_f32(pg, op1, svdup_n_f32(op2))
+}
+#[doc = "Absolute compare less than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaclt[_f64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(facgt))]
+pub fn svaclt_f64(pg: svbool_t, op1: svfloat64_t, op2: svfloat64_t) -> svbool_t {
+    svacgt_f64(pg, op2, op1)
+}
+#[doc = "Absolute compare less than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaclt[_n_f64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(facgt))]
+pub fn svaclt_n_f64(pg: svbool_t, op1: svfloat64_t, op2: f64) -> svbool_t {
+    svaclt_f64(pg, op1, svdup_n_f64(op2))
+}
+#[doc = "Add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadd[_f32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fadd))]
+pub fn svadd_f32_m(pg: svbool_t, op1: svfloat32_t, op2: svfloat32_t) -> svfloat32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.fadd.nxv4f32")]
+        fn _svadd_f32_m(pg: svbool4_t, op1: svfloat32_t, op2: svfloat32_t) -> svfloat32_t;
+    }
+    unsafe { _svadd_f32_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadd[_n_f32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fadd))]
+pub fn svadd_n_f32_m(pg: svbool_t, op1: svfloat32_t, op2: f32) -> svfloat32_t {
+    svadd_f32_m(pg, op1, svdup_n_f32(op2))
+}
+#[doc = "Add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadd[_f32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fadd))]
+pub fn svadd_f32_x(pg: svbool_t, op1: svfloat32_t, op2: svfloat32_t) -> svfloat32_t {
+    svadd_f32_m(pg, op1, op2)
+}
+#[doc = "Add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadd[_n_f32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fadd))]
+pub fn svadd_n_f32_x(pg: svbool_t, op1: svfloat32_t, op2: f32) -> svfloat32_t {
+    svadd_f32_x(pg, op1, svdup_n_f32(op2))
+}
+#[doc = "Add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadd[_f32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fadd))]
+pub fn svadd_f32_z(pg: svbool_t, op1: svfloat32_t, op2: svfloat32_t) -> svfloat32_t {
+    svadd_f32_m(pg, svsel_f32(pg, op1, svdup_n_f32(0.0)), op2)
+}
+#[doc = "Add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadd[_n_f32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fadd))]
+pub fn svadd_n_f32_z(pg: svbool_t, op1: svfloat32_t, op2: f32) -> svfloat32_t {
+    svadd_f32_z(pg, op1, svdup_n_f32(op2))
+}
+#[doc = "Add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadd[_f64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fadd))]
+pub fn svadd_f64_m(pg: svbool_t, op1: svfloat64_t, op2: svfloat64_t) -> svfloat64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.fadd.nxv2f64")]
+        fn _svadd_f64_m(pg: svbool2_t, op1: svfloat64_t, op2: svfloat64_t) -> svfloat64_t;
+    }
+    unsafe { _svadd_f64_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadd[_n_f64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fadd))]
+pub fn svadd_n_f64_m(pg: svbool_t, op1: svfloat64_t, op2: f64) -> svfloat64_t {
+    svadd_f64_m(pg, op1, svdup_n_f64(op2))
+}
+#[doc = "Add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadd[_f64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fadd))]
+pub fn svadd_f64_x(pg: svbool_t, op1: svfloat64_t, op2: svfloat64_t) -> svfloat64_t {
+    svadd_f64_m(pg, op1, op2)
+}
+#[doc = "Add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadd[_n_f64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fadd))]
+pub fn svadd_n_f64_x(pg: svbool_t, op1: svfloat64_t, op2: f64) -> svfloat64_t {
+    svadd_f64_x(pg, op1, svdup_n_f64(op2))
+}
+#[doc = "Add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadd[_f64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fadd))]
+pub fn svadd_f64_z(pg: svbool_t, op1: svfloat64_t, op2: svfloat64_t) -> svfloat64_t {
+    svadd_f64_m(pg, svsel_f64(pg, op1, svdup_n_f64(0.0)), op2)
+}
+#[doc = "Add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadd[_n_f64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fadd))]
+pub fn svadd_n_f64_z(pg: svbool_t, op1: svfloat64_t, op2: f64) -> svfloat64_t {
+    svadd_f64_z(pg, op1, svdup_n_f64(op2))
+}
+#[doc = "Add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadd[_s8]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(add))]
+pub fn svadd_s8_m(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.add.nxv16i8")]
+        fn _svadd_s8_m(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t;
+    }
+    unsafe { _svadd_s8_m(pg, op1, op2) }
+}
+#[doc = "Add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadd[_n_s8]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(add))]
+pub fn svadd_n_s8_m(pg: svbool_t, op1: svint8_t, op2: i8) -> svint8_t {
+    svadd_s8_m(pg, op1, svdup_n_s8(op2))
+}
+#[doc = "Add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadd[_s8]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(add))]
+pub fn svadd_s8_x(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t {
+    svadd_s8_m(pg, op1, op2)
+}
+#[doc = "Add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadd[_n_s8]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(add))]
+pub fn svadd_n_s8_x(pg: svbool_t, op1: svint8_t, op2: i8) -> svint8_t {
+    svadd_s8_x(pg, op1, svdup_n_s8(op2))
+}
+#[doc = "Add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadd[_s8]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(add))]
+pub fn svadd_s8_z(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t {
+    svadd_s8_m(pg, svsel_s8(pg, op1, svdup_n_s8(0)), op2)
+}
+#[doc = "Add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadd[_n_s8]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(add))]
+pub fn svadd_n_s8_z(pg: svbool_t, op1: svint8_t, op2: i8) -> svint8_t {
+    svadd_s8_z(pg, op1, svdup_n_s8(op2))
+}
+#[doc = "Add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadd[_s16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(add))]
+pub fn svadd_s16_m(pg: svbool_t, op1: svint16_t, op2: svint16_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.add.nxv8i16")]
+        fn _svadd_s16_m(pg: svbool8_t, op1: svint16_t, op2: svint16_t) -> svint16_t;
+    }
+    unsafe { _svadd_s16_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadd[_n_s16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(add))]
+pub fn svadd_n_s16_m(pg: svbool_t, op1: svint16_t, op2: i16) -> svint16_t {
+    svadd_s16_m(pg, op1, svdup_n_s16(op2))
+}
+#[doc = "Add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadd[_s16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(add))]
+pub fn svadd_s16_x(pg: svbool_t, op1: svint16_t, op2: svint16_t) -> svint16_t {
+    svadd_s16_m(pg, op1, op2)
+}
+#[doc = "Add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadd[_n_s16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(add))]
+pub fn svadd_n_s16_x(pg: svbool_t, op1: svint16_t, op2: i16) -> svint16_t {
+    svadd_s16_x(pg, op1, svdup_n_s16(op2))
+}
+#[doc = "Add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadd[_s16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(add))]
+pub fn svadd_s16_z(pg: svbool_t, op1: svint16_t, op2: svint16_t) -> svint16_t {
+    svadd_s16_m(pg, svsel_s16(pg, op1, svdup_n_s16(0)), op2)
+}
+#[doc = "Add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadd[_n_s16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(add))]
+pub fn svadd_n_s16_z(pg: svbool_t, op1: svint16_t, op2: i16) -> svint16_t {
+    svadd_s16_z(pg, op1, svdup_n_s16(op2))
+}
+#[doc = "Add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadd[_s32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(add))]
+pub fn svadd_s32_m(pg: svbool_t, op1: svint32_t, op2: svint32_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.add.nxv4i32")]
+        fn _svadd_s32_m(pg: svbool4_t, op1: svint32_t, op2: svint32_t) -> svint32_t;
+    }
+    unsafe { _svadd_s32_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadd[_n_s32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(add))]
+pub fn svadd_n_s32_m(pg: svbool_t, op1: svint32_t, op2: i32) -> svint32_t {
+    svadd_s32_m(pg, op1, svdup_n_s32(op2))
+}
+#[doc = "Add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadd[_s32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(add))]
+pub fn svadd_s32_x(pg: svbool_t, op1: svint32_t, op2: svint32_t) -> svint32_t {
+    svadd_s32_m(pg, op1, op2)
+}
+#[doc = "Add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadd[_n_s32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(add))]
+pub fn svadd_n_s32_x(pg: svbool_t, op1: svint32_t, op2: i32) -> svint32_t {
+    svadd_s32_x(pg, op1, svdup_n_s32(op2))
+}
+#[doc = "Add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadd[_s32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(add))]
+pub fn svadd_s32_z(pg: svbool_t, op1: svint32_t, op2: svint32_t) -> svint32_t {
+    svadd_s32_m(pg, svsel_s32(pg, op1, svdup_n_s32(0)), op2)
+}
+#[doc = "Add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadd[_n_s32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(add))]
+pub fn svadd_n_s32_z(pg: svbool_t, op1: svint32_t, op2: i32) -> svint32_t {
+    svadd_s32_z(pg, op1, svdup_n_s32(op2))
+}
+#[doc = "Add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadd[_s64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(add))]
+pub fn svadd_s64_m(pg: svbool_t, op1: svint64_t, op2: svint64_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.add.nxv2i64")]
+        fn _svadd_s64_m(pg: svbool2_t, op1: svint64_t, op2: svint64_t) -> svint64_t;
+    }
+    unsafe { _svadd_s64_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadd[_n_s64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(add))]
+pub fn svadd_n_s64_m(pg: svbool_t, op1: svint64_t, op2: i64) -> svint64_t {
+    svadd_s64_m(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadd[_s64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(add))]
+pub fn svadd_s64_x(pg: svbool_t, op1: svint64_t, op2: svint64_t) -> svint64_t {
+    svadd_s64_m(pg, op1, op2)
+}
+#[doc = "Add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadd[_n_s64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(add))]
+pub fn svadd_n_s64_x(pg: svbool_t, op1: svint64_t, op2: i64) -> svint64_t {
+    svadd_s64_x(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadd[_s64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(add))]
+pub fn svadd_s64_z(pg: svbool_t, op1: svint64_t, op2: svint64_t) -> svint64_t {
+    svadd_s64_m(pg, svsel_s64(pg, op1, svdup_n_s64(0)), op2)
+}
+#[doc = "Add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadd[_n_s64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(add))]
+pub fn svadd_n_s64_z(pg: svbool_t, op1: svint64_t, op2: i64) -> svint64_t {
+    svadd_s64_z(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadd[_u8]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(add))]
+pub fn svadd_u8_m(pg: svbool_t, op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    unsafe { svadd_s8_m(pg, op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadd[_n_u8]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(add))]
+pub fn svadd_n_u8_m(pg: svbool_t, op1: svuint8_t, op2: u8) -> svuint8_t {
+    svadd_u8_m(pg, op1, svdup_n_u8(op2))
+}
+#[doc = "Add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadd[_u8]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(add))]
+pub fn svadd_u8_x(pg: svbool_t, op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    svadd_u8_m(pg, op1, op2)
+}
+#[doc = "Add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadd[_n_u8]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(add))]
+pub fn svadd_n_u8_x(pg: svbool_t, op1: svuint8_t, op2: u8) -> svuint8_t {
+    svadd_u8_x(pg, op1, svdup_n_u8(op2))
+}
+#[doc = "Add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadd[_u8]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(add))]
+pub fn svadd_u8_z(pg: svbool_t, op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    svadd_u8_m(pg, svsel_u8(pg, op1, svdup_n_u8(0)), op2)
+}
+#[doc = "Add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadd[_n_u8]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(add))]
+pub fn svadd_n_u8_z(pg: svbool_t, op1: svuint8_t, op2: u8) -> svuint8_t {
+    svadd_u8_z(pg, op1, svdup_n_u8(op2))
+}
+#[doc = "Add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadd[_u16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(add))]
+pub fn svadd_u16_m(pg: svbool_t, op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    unsafe { svadd_s16_m(pg, op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadd[_n_u16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(add))]
+pub fn svadd_n_u16_m(pg: svbool_t, op1: svuint16_t, op2: u16) -> svuint16_t {
+    svadd_u16_m(pg, op1, svdup_n_u16(op2))
+}
+#[doc = "Add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadd[_u16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(add))]
+pub fn svadd_u16_x(pg: svbool_t, op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    svadd_u16_m(pg, op1, op2)
+}
+#[doc = "Add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadd[_n_u16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(add))]
+pub fn svadd_n_u16_x(pg: svbool_t, op1: svuint16_t, op2: u16) -> svuint16_t {
+    svadd_u16_x(pg, op1, svdup_n_u16(op2))
+}
+#[doc = "Add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadd[_u16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(add))]
+pub fn svadd_u16_z(pg: svbool_t, op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    svadd_u16_m(pg, svsel_u16(pg, op1, svdup_n_u16(0)), op2)
+}
+#[doc = "Add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadd[_n_u16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(add))]
+pub fn svadd_n_u16_z(pg: svbool_t, op1: svuint16_t, op2: u16) -> svuint16_t {
+    svadd_u16_z(pg, op1, svdup_n_u16(op2))
+}
+#[doc = "Add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadd[_u32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(add))]
+pub fn svadd_u32_m(pg: svbool_t, op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    unsafe { svadd_s32_m(pg, op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadd[_n_u32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(add))]
+pub fn svadd_n_u32_m(pg: svbool_t, op1: svuint32_t, op2: u32) -> svuint32_t {
+    svadd_u32_m(pg, op1, svdup_n_u32(op2))
+}
+#[doc = "Add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadd[_u32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(add))]
+pub fn svadd_u32_x(pg: svbool_t, op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    svadd_u32_m(pg, op1, op2)
+}
+#[doc = "Add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadd[_n_u32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(add))]
+pub fn svadd_n_u32_x(pg: svbool_t, op1: svuint32_t, op2: u32) -> svuint32_t {
+    svadd_u32_x(pg, op1, svdup_n_u32(op2))
+}
+#[doc = "Add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadd[_u32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(add))]
+pub fn svadd_u32_z(pg: svbool_t, op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    svadd_u32_m(pg, svsel_u32(pg, op1, svdup_n_u32(0)), op2)
+}
+#[doc = "Add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadd[_n_u32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(add))]
+pub fn svadd_n_u32_z(pg: svbool_t, op1: svuint32_t, op2: u32) -> svuint32_t {
+    svadd_u32_z(pg, op1, svdup_n_u32(op2))
+}
+#[doc = "Add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadd[_u64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(add))]
+pub fn svadd_u64_m(pg: svbool_t, op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    unsafe { svadd_s64_m(pg, op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadd[_n_u64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(add))]
+pub fn svadd_n_u64_m(pg: svbool_t, op1: svuint64_t, op2: u64) -> svuint64_t {
+    svadd_u64_m(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadd[_u64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(add))]
+pub fn svadd_u64_x(pg: svbool_t, op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    svadd_u64_m(pg, op1, op2)
+}
+#[doc = "Add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadd[_n_u64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(add))]
+pub fn svadd_n_u64_x(pg: svbool_t, op1: svuint64_t, op2: u64) -> svuint64_t {
+    svadd_u64_x(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadd[_u64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(add))]
+pub fn svadd_u64_z(pg: svbool_t, op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    svadd_u64_m(pg, svsel_u64(pg, op1, svdup_n_u64(0)), op2)
+}
+#[doc = "Add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadd[_n_u64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(add))]
+pub fn svadd_n_u64_z(pg: svbool_t, op1: svuint64_t, op2: u64) -> svuint64_t {
+    svadd_u64_z(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Add reduction (strictly-ordered)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadda[_f32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fadda))]
+pub fn svadda_f32(pg: svbool_t, initial: f32, op: svfloat32_t) -> f32 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.fadda.nxv4f32")]
+        fn _svadda_f32(pg: svbool4_t, initial: f32, op: svfloat32_t) -> f32;
+    }
+    unsafe { _svadda_f32(pg.sve_into(), initial, op) }
+}
+#[doc = "Add reduction (strictly-ordered)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadda[_f64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fadda))]
+pub fn svadda_f64(pg: svbool_t, initial: f64, op: svfloat64_t) -> f64 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.fadda.nxv2f64")]
+        fn _svadda_f64(pg: svbool2_t, initial: f64, op: svfloat64_t) -> f64;
+    }
+    unsafe { _svadda_f64(pg.sve_into(), initial, op) }
+}
+#[doc = "Add reduction"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddv[_f32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(faddv))]
+pub fn svaddv_f32(pg: svbool_t, op: svfloat32_t) -> f32 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.faddv.nxv4f32")]
+        fn _svaddv_f32(pg: svbool4_t, op: svfloat32_t) -> f32;
+    }
+    unsafe { _svaddv_f32(pg.sve_into(), op) }
+}
+#[doc = "Add reduction"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddv[_f64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(faddv))]
+pub fn svaddv_f64(pg: svbool_t, op: svfloat64_t) -> f64 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.faddv.nxv2f64")]
+        fn _svaddv_f64(pg: svbool2_t, op: svfloat64_t) -> f64;
+    }
+    unsafe { _svaddv_f64(pg.sve_into(), op) }
+}
+#[doc = "Add reduction"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddv[_s64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uaddv))]
+pub fn svaddv_s64(pg: svbool_t, op: svint64_t) -> i64 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.saddv.nxv2i64")]
+        fn _svaddv_s64(pg: svbool2_t, op: svint64_t) -> i64;
+    }
+    unsafe { _svaddv_s64(pg.sve_into(), op) }
+}
+#[doc = "Add reduction"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddv[_u64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uaddv))]
+pub fn svaddv_u64(pg: svbool_t, op: svuint64_t) -> u64 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uaddv.nxv2i64")]
+        fn _svaddv_u64(pg: svbool2_t, op: svint64_t) -> i64;
+    }
+    unsafe { _svaddv_u64(pg.sve_into(), op.as_signed()).as_unsigned() }
+}
+#[doc = "Add reduction"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddv[_s8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(saddv))]
+pub fn svaddv_s8(pg: svbool_t, op: svint8_t) -> i64 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.saddv.nxv16i8")]
+        fn _svaddv_s8(pg: svbool_t, op: svint8_t) -> i64;
+    }
+    unsafe { _svaddv_s8(pg, op) }
+}
+#[doc = "Add reduction"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddv[_s16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(saddv))]
+pub fn svaddv_s16(pg: svbool_t, op: svint16_t) -> i64 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.saddv.nxv8i16")]
+        fn _svaddv_s16(pg: svbool8_t, op: svint16_t) -> i64;
+    }
+    unsafe { _svaddv_s16(pg.sve_into(), op) }
+}
+#[doc = "Add reduction"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddv[_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(saddv))]
+pub fn svaddv_s32(pg: svbool_t, op: svint32_t) -> i64 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.saddv.nxv4i32")]
+        fn _svaddv_s32(pg: svbool4_t, op: svint32_t) -> i64;
+    }
+    unsafe { _svaddv_s32(pg.sve_into(), op) }
+}
+#[doc = "Add reduction"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddv[_u8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uaddv))]
+pub fn svaddv_u8(pg: svbool_t, op: svuint8_t) -> u64 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uaddv.nxv16i8")]
+        fn _svaddv_u8(pg: svbool_t, op: svint8_t) -> i64;
+    }
+    unsafe { _svaddv_u8(pg, op.as_signed()).as_unsigned() }
+}
+#[doc = "Add reduction"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddv[_u16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uaddv))]
+pub fn svaddv_u16(pg: svbool_t, op: svuint16_t) -> u64 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uaddv.nxv8i16")]
+        fn _svaddv_u16(pg: svbool8_t, op: svint16_t) -> i64;
+    }
+    unsafe { _svaddv_u16(pg.sve_into(), op.as_signed()).as_unsigned() }
+}
+#[doc = "Add reduction"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddv[_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uaddv))]
+pub fn svaddv_u32(pg: svbool_t, op: svuint32_t) -> u64 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uaddv.nxv4i32")]
+        fn _svaddv_u32(pg: svbool4_t, op: svint32_t) -> i64;
+    }
+    unsafe { _svaddv_u32(pg.sve_into(), op.as_signed()).as_unsigned() }
+}
+#[doc = "Compute vector addresses for 8-bit data"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadrb[_u32base]_[s32]offset)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(adr))]
+pub fn svadrb_u32base_s32offset(bases: svuint32_t, offsets: svint32_t) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.adrb.nxv4i32")]
+        fn _svadrb_u32base_s32offset(bases: svint32_t, offsets: svint32_t) -> svint32_t;
+    }
+    unsafe { _svadrb_u32base_s32offset(bases.as_signed(), offsets).as_unsigned() }
+}
+#[doc = "Compute vector addresses for 16-bit data"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadrh[_u32base]_[s32]index)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(adr))]
+pub fn svadrh_u32base_s32index(bases: svuint32_t, indices: svint32_t) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.adrh.nxv4i32")]
+        fn _svadrh_u32base_s32index(bases: svint32_t, indices: svint32_t) -> svint32_t;
+    }
+    unsafe { _svadrh_u32base_s32index(bases.as_signed(), indices).as_unsigned() }
+}
+#[doc = "Compute vector addresses for 32-bit data"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadrw[_u32base]_[s32]index)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(adr))]
+pub fn svadrw_u32base_s32index(bases: svuint32_t, indices: svint32_t) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.adrw.nxv4i32")]
+        fn _svadrw_u32base_s32index(bases: svint32_t, indices: svint32_t) -> svint32_t;
+    }
+    unsafe { _svadrw_u32base_s32index(bases.as_signed(), indices).as_unsigned() }
+}
+#[doc = "Compute vector addresses for 64-bit data"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadrd[_u32base]_[s32]index)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(adr))]
+pub fn svadrd_u32base_s32index(bases: svuint32_t, indices: svint32_t) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.adrd.nxv4i32")]
+        fn _svadrd_u32base_s32index(bases: svint32_t, indices: svint32_t) -> svint32_t;
+    }
+    unsafe { _svadrd_u32base_s32index(bases.as_signed(), indices).as_unsigned() }
+}
+#[doc = "Compute vector addresses for 8-bit data"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadrb[_u32base]_[u32]offset)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(adr))]
+pub fn svadrb_u32base_u32offset(bases: svuint32_t, offsets: svuint32_t) -> svuint32_t {
+    unsafe { svadrb_u32base_s32offset(bases, offsets.as_signed()) }
+}
+#[doc = "Compute vector addresses for 16-bit data"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadrh[_u32base]_[u32]index)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(adr))]
+pub fn svadrh_u32base_u32index(bases: svuint32_t, indices: svuint32_t) -> svuint32_t {
+    unsafe { svadrh_u32base_s32index(bases, indices.as_signed()) }
+}
+#[doc = "Compute vector addresses for 32-bit data"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadrw[_u32base]_[u32]index)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(adr))]
+pub fn svadrw_u32base_u32index(bases: svuint32_t, indices: svuint32_t) -> svuint32_t {
+    unsafe { svadrw_u32base_s32index(bases, indices.as_signed()) }
+}
+#[doc = "Compute vector addresses for 64-bit data"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadrd[_u32base]_[u32]index)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(adr))]
+pub fn svadrd_u32base_u32index(bases: svuint32_t, indices: svuint32_t) -> svuint32_t {
+    unsafe { svadrd_u32base_s32index(bases, indices.as_signed()) }
+}
+#[doc = "Compute vector addresses for 8-bit data"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadrb[_u64base]_[s64]offset)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(adr))]
+pub fn svadrb_u64base_s64offset(bases: svuint64_t, offsets: svint64_t) -> svuint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.adrb.nxv2i64")]
+        fn _svadrb_u64base_s64offset(bases: svint64_t, offsets: svint64_t) -> svint64_t;
+    }
+    unsafe { _svadrb_u64base_s64offset(bases.as_signed(), offsets).as_unsigned() }
+}
+#[doc = "Compute vector addresses for 16-bit data"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadrh[_u64base]_[s64]index)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(adr))]
+pub fn svadrh_u64base_s64index(bases: svuint64_t, indices: svint64_t) -> svuint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.adrh.nxv2i64")]
+        fn _svadrh_u64base_s64index(bases: svint64_t, indices: svint64_t) -> svint64_t;
+    }
+    unsafe { _svadrh_u64base_s64index(bases.as_signed(), indices).as_unsigned() }
+}
+#[doc = "Compute vector addresses for 32-bit data"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadrw[_u64base]_[s64]index)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(adr))]
+pub fn svadrw_u64base_s64index(bases: svuint64_t, indices: svint64_t) -> svuint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.adrw.nxv2i64")]
+        fn _svadrw_u64base_s64index(bases: svint64_t, indices: svint64_t) -> svint64_t;
+    }
+    unsafe { _svadrw_u64base_s64index(bases.as_signed(), indices).as_unsigned() }
+}
+#[doc = "Compute vector addresses for 64-bit data"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadrd[_u64base]_[s64]index)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(adr))]
+pub fn svadrd_u64base_s64index(bases: svuint64_t, indices: svint64_t) -> svuint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.adrd.nxv2i64")]
+        fn _svadrd_u64base_s64index(bases: svint64_t, indices: svint64_t) -> svint64_t;
+    }
+    unsafe { _svadrd_u64base_s64index(bases.as_signed(), indices).as_unsigned() }
+}
+#[doc = "Compute vector addresses for 8-bit data"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadrb[_u64base]_[u64]offset)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(adr))]
+pub fn svadrb_u64base_u64offset(bases: svuint64_t, offsets: svuint64_t) -> svuint64_t {
+    unsafe { svadrb_u64base_s64offset(bases, offsets.as_signed()) }
+}
+#[doc = "Compute vector addresses for 16-bit data"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadrh[_u64base]_[u64]index)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(adr))]
+pub fn svadrh_u64base_u64index(bases: svuint64_t, indices: svuint64_t) -> svuint64_t {
+    unsafe { svadrh_u64base_s64index(bases, indices.as_signed()) }
+}
+#[doc = "Compute vector addresses for 32-bit data"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadrw[_u64base]_[u64]index)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(adr))]
+pub fn svadrw_u64base_u64index(bases: svuint64_t, indices: svuint64_t) -> svuint64_t {
+    unsafe { svadrw_u64base_s64index(bases, indices.as_signed()) }
+}
+#[doc = "Compute vector addresses for 64-bit data"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadrd[_u64base]_[u64]index)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(adr))]
+pub fn svadrd_u64base_u64index(bases: svuint64_t, indices: svuint64_t) -> svuint64_t {
+    unsafe { svadrd_u64base_s64index(bases, indices.as_signed()) }
+}
+#[doc = "Bitwise AND"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svand[_b]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(and))]
+pub fn svand_b_z(pg: svbool_t, op1: svbool_t, op2: svbool_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.and.z.nvx16i1")]
+        fn _svand_b_z(pg: svbool_t, op1: svbool_t, op2: svbool_t) -> svbool_t;
+    }
+    unsafe { _svand_b_z(pg, op1, op2) }
+}
+#[doc = "Bitwise AND"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svand[_s8]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(and))]
+pub fn svand_s8_m(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.and.nxv16i8")]
+        fn _svand_s8_m(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t;
+    }
+    unsafe { _svand_s8_m(pg, op1, op2) }
+}
+#[doc = "Bitwise AND"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svand[_n_s8]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(and))]
+pub fn svand_n_s8_m(pg: svbool_t, op1: svint8_t, op2: i8) -> svint8_t {
+    svand_s8_m(pg, op1, svdup_n_s8(op2))
+}
+#[doc = "Bitwise AND"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svand[_s8]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(and))]
+pub fn svand_s8_x(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t {
+    svand_s8_m(pg, op1, op2)
+}
+#[doc = "Bitwise AND"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svand[_n_s8]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(and))]
+pub fn svand_n_s8_x(pg: svbool_t, op1: svint8_t, op2: i8) -> svint8_t {
+    svand_s8_x(pg, op1, svdup_n_s8(op2))
+}
+#[doc = "Bitwise AND"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svand[_s8]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(and))]
+pub fn svand_s8_z(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t {
+    svand_s8_m(pg, svsel_s8(pg, op1, svdup_n_s8(0)), op2)
+}
+#[doc = "Bitwise AND"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svand[_n_s8]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(and))]
+pub fn svand_n_s8_z(pg: svbool_t, op1: svint8_t, op2: i8) -> svint8_t {
+    svand_s8_z(pg, op1, svdup_n_s8(op2))
+}
+#[doc = "Bitwise AND"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svand[_s16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(and))]
+pub fn svand_s16_m(pg: svbool_t, op1: svint16_t, op2: svint16_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.and.nxv8i16")]
+        fn _svand_s16_m(pg: svbool8_t, op1: svint16_t, op2: svint16_t) -> svint16_t;
+    }
+    unsafe { _svand_s16_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Bitwise AND"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svand[_n_s16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(and))]
+pub fn svand_n_s16_m(pg: svbool_t, op1: svint16_t, op2: i16) -> svint16_t {
+    svand_s16_m(pg, op1, svdup_n_s16(op2))
+}
+#[doc = "Bitwise AND"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svand[_s16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(and))]
+pub fn svand_s16_x(pg: svbool_t, op1: svint16_t, op2: svint16_t) -> svint16_t {
+    svand_s16_m(pg, op1, op2)
+}
+#[doc = "Bitwise AND"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svand[_n_s16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(and))]
+pub fn svand_n_s16_x(pg: svbool_t, op1: svint16_t, op2: i16) -> svint16_t {
+    svand_s16_x(pg, op1, svdup_n_s16(op2))
+}
+#[doc = "Bitwise AND"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svand[_s16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(and))]
+pub fn svand_s16_z(pg: svbool_t, op1: svint16_t, op2: svint16_t) -> svint16_t {
+    svand_s16_m(pg, svsel_s16(pg, op1, svdup_n_s16(0)), op2)
+}
+#[doc = "Bitwise AND"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svand[_n_s16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(and))]
+pub fn svand_n_s16_z(pg: svbool_t, op1: svint16_t, op2: i16) -> svint16_t {
+    svand_s16_z(pg, op1, svdup_n_s16(op2))
+}
+#[doc = "Bitwise AND"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svand[_s32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(and))]
+pub fn svand_s32_m(pg: svbool_t, op1: svint32_t, op2: svint32_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.and.nxv4i32")]
+        fn _svand_s32_m(pg: svbool4_t, op1: svint32_t, op2: svint32_t) -> svint32_t;
+    }
+    unsafe { _svand_s32_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Bitwise AND"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svand[_n_s32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(and))]
+pub fn svand_n_s32_m(pg: svbool_t, op1: svint32_t, op2: i32) -> svint32_t {
+    svand_s32_m(pg, op1, svdup_n_s32(op2))
+}
+#[doc = "Bitwise AND"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svand[_s32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(and))]
+pub fn svand_s32_x(pg: svbool_t, op1: svint32_t, op2: svint32_t) -> svint32_t {
+    svand_s32_m(pg, op1, op2)
+}
+#[doc = "Bitwise AND"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svand[_n_s32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(and))]
+pub fn svand_n_s32_x(pg: svbool_t, op1: svint32_t, op2: i32) -> svint32_t {
+    svand_s32_x(pg, op1, svdup_n_s32(op2))
+}
+#[doc = "Bitwise AND"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svand[_s32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(and))]
+pub fn svand_s32_z(pg: svbool_t, op1: svint32_t, op2: svint32_t) -> svint32_t {
+    svand_s32_m(pg, svsel_s32(pg, op1, svdup_n_s32(0)), op2)
+}
+#[doc = "Bitwise AND"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svand[_n_s32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(and))]
+pub fn svand_n_s32_z(pg: svbool_t, op1: svint32_t, op2: i32) -> svint32_t {
+    svand_s32_z(pg, op1, svdup_n_s32(op2))
+}
+#[doc = "Bitwise AND"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svand[_s64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(and))]
+pub fn svand_s64_m(pg: svbool_t, op1: svint64_t, op2: svint64_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.and.nxv2i64")]
+        fn _svand_s64_m(pg: svbool2_t, op1: svint64_t, op2: svint64_t) -> svint64_t;
+    }
+    unsafe { _svand_s64_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Bitwise AND"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svand[_n_s64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(and))]
+pub fn svand_n_s64_m(pg: svbool_t, op1: svint64_t, op2: i64) -> svint64_t {
+    svand_s64_m(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Bitwise AND"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svand[_s64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(and))]
+pub fn svand_s64_x(pg: svbool_t, op1: svint64_t, op2: svint64_t) -> svint64_t {
+    svand_s64_m(pg, op1, op2)
+}
+#[doc = "Bitwise AND"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svand[_n_s64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(and))]
+pub fn svand_n_s64_x(pg: svbool_t, op1: svint64_t, op2: i64) -> svint64_t {
+    svand_s64_x(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Bitwise AND"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svand[_s64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(and))]
+pub fn svand_s64_z(pg: svbool_t, op1: svint64_t, op2: svint64_t) -> svint64_t {
+    svand_s64_m(pg, svsel_s64(pg, op1, svdup_n_s64(0)), op2)
+}
+#[doc = "Bitwise AND"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svand[_n_s64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(and))]
+pub fn svand_n_s64_z(pg: svbool_t, op1: svint64_t, op2: i64) -> svint64_t {
+    svand_s64_z(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Bitwise AND"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svand[_u8]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(and))]
+pub fn svand_u8_m(pg: svbool_t, op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    unsafe { svand_s8_m(pg, op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Bitwise AND"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svand[_n_u8]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(and))]
+pub fn svand_n_u8_m(pg: svbool_t, op1: svuint8_t, op2: u8) -> svuint8_t {
+    svand_u8_m(pg, op1, svdup_n_u8(op2))
+}
+#[doc = "Bitwise AND"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svand[_u8]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(and))]
+pub fn svand_u8_x(pg: svbool_t, op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    svand_u8_m(pg, op1, op2)
+}
+#[doc = "Bitwise AND"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svand[_n_u8]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(and))]
+pub fn svand_n_u8_x(pg: svbool_t, op1: svuint8_t, op2: u8) -> svuint8_t {
+    svand_u8_x(pg, op1, svdup_n_u8(op2))
+}
+#[doc = "Bitwise AND"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svand[_u8]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(and))]
+pub fn svand_u8_z(pg: svbool_t, op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    svand_u8_m(pg, svsel_u8(pg, op1, svdup_n_u8(0)), op2)
+}
+#[doc = "Bitwise AND"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svand[_n_u8]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(and))]
+pub fn svand_n_u8_z(pg: svbool_t, op1: svuint8_t, op2: u8) -> svuint8_t {
+    svand_u8_z(pg, op1, svdup_n_u8(op2))
+}
+#[doc = "Bitwise AND"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svand[_u16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(and))]
+pub fn svand_u16_m(pg: svbool_t, op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    unsafe { svand_s16_m(pg, op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Bitwise AND"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svand[_n_u16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(and))]
+pub fn svand_n_u16_m(pg: svbool_t, op1: svuint16_t, op2: u16) -> svuint16_t {
+    svand_u16_m(pg, op1, svdup_n_u16(op2))
+}
+#[doc = "Bitwise AND"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svand[_u16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(and))]
+pub fn svand_u16_x(pg: svbool_t, op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    svand_u16_m(pg, op1, op2)
+}
+#[doc = "Bitwise AND"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svand[_n_u16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(and))]
+pub fn svand_n_u16_x(pg: svbool_t, op1: svuint16_t, op2: u16) -> svuint16_t {
+    svand_u16_x(pg, op1, svdup_n_u16(op2))
+}
+#[doc = "Bitwise AND"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svand[_u16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(and))]
+pub fn svand_u16_z(pg: svbool_t, op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    svand_u16_m(pg, svsel_u16(pg, op1, svdup_n_u16(0)), op2)
+}
+#[doc = "Bitwise AND"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svand[_n_u16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(and))]
+pub fn svand_n_u16_z(pg: svbool_t, op1: svuint16_t, op2: u16) -> svuint16_t {
+    svand_u16_z(pg, op1, svdup_n_u16(op2))
+}
+#[doc = "Bitwise AND"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svand[_u32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(and))]
+pub fn svand_u32_m(pg: svbool_t, op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    unsafe { svand_s32_m(pg, op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Bitwise AND"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svand[_n_u32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(and))]
+pub fn svand_n_u32_m(pg: svbool_t, op1: svuint32_t, op2: u32) -> svuint32_t {
+    svand_u32_m(pg, op1, svdup_n_u32(op2))
+}
+#[doc = "Bitwise AND"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svand[_u32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(and))]
+pub fn svand_u32_x(pg: svbool_t, op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    svand_u32_m(pg, op1, op2)
+}
+#[doc = "Bitwise AND"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svand[_n_u32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(and))]
+pub fn svand_n_u32_x(pg: svbool_t, op1: svuint32_t, op2: u32) -> svuint32_t {
+    svand_u32_x(pg, op1, svdup_n_u32(op2))
+}
+#[doc = "Bitwise AND"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svand[_u32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(and))]
+pub fn svand_u32_z(pg: svbool_t, op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    svand_u32_m(pg, svsel_u32(pg, op1, svdup_n_u32(0)), op2)
+}
+#[doc = "Bitwise AND"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svand[_n_u32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(and))]
+pub fn svand_n_u32_z(pg: svbool_t, op1: svuint32_t, op2: u32) -> svuint32_t {
+    svand_u32_z(pg, op1, svdup_n_u32(op2))
+}
+#[doc = "Bitwise AND"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svand[_u64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(and))]
+pub fn svand_u64_m(pg: svbool_t, op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    unsafe { svand_s64_m(pg, op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Bitwise AND"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svand[_n_u64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(and))]
+pub fn svand_n_u64_m(pg: svbool_t, op1: svuint64_t, op2: u64) -> svuint64_t {
+    svand_u64_m(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Bitwise AND"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svand[_u64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(and))]
+pub fn svand_u64_x(pg: svbool_t, op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    svand_u64_m(pg, op1, op2)
+}
+#[doc = "Bitwise AND"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svand[_n_u64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(and))]
+pub fn svand_n_u64_x(pg: svbool_t, op1: svuint64_t, op2: u64) -> svuint64_t {
+    svand_u64_x(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Bitwise AND"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svand[_u64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(and))]
+pub fn svand_u64_z(pg: svbool_t, op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    svand_u64_m(pg, svsel_u64(pg, op1, svdup_n_u64(0)), op2)
+}
+#[doc = "Bitwise AND"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svand[_n_u64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(and))]
+pub fn svand_n_u64_z(pg: svbool_t, op1: svuint64_t, op2: u64) -> svuint64_t {
+    svand_u64_z(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Bitwise AND reduction to scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svandv[_s8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(andv))]
+pub fn svandv_s8(pg: svbool_t, op: svint8_t) -> i8 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.andv.nxv16i8")]
+        fn _svandv_s8(pg: svbool_t, op: svint8_t) -> i8;
+    }
+    unsafe { _svandv_s8(pg, op) }
+}
+#[doc = "Bitwise AND reduction to scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svandv[_s16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(andv))]
+pub fn svandv_s16(pg: svbool_t, op: svint16_t) -> i16 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.andv.nxv8i16")]
+        fn _svandv_s16(pg: svbool8_t, op: svint16_t) -> i16;
+    }
+    unsafe { _svandv_s16(pg.sve_into(), op) }
+}
+#[doc = "Bitwise AND reduction to scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svandv[_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(andv))]
+pub fn svandv_s32(pg: svbool_t, op: svint32_t) -> i32 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.andv.nxv4i32")]
+        fn _svandv_s32(pg: svbool4_t, op: svint32_t) -> i32;
+    }
+    unsafe { _svandv_s32(pg.sve_into(), op) }
+}
+#[doc = "Bitwise AND reduction to scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svandv[_s64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(andv))]
+pub fn svandv_s64(pg: svbool_t, op: svint64_t) -> i64 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.andv.nxv2i64")]
+        fn _svandv_s64(pg: svbool2_t, op: svint64_t) -> i64;
+    }
+    unsafe { _svandv_s64(pg.sve_into(), op) }
+}
+#[doc = "Bitwise AND reduction to scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svandv[_u8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(andv))]
+pub fn svandv_u8(pg: svbool_t, op: svuint8_t) -> u8 {
+    unsafe { svandv_s8(pg, op.as_signed()).as_unsigned() }
+}
+#[doc = "Bitwise AND reduction to scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svandv[_u16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(andv))]
+pub fn svandv_u16(pg: svbool_t, op: svuint16_t) -> u16 {
+    unsafe { svandv_s16(pg, op.as_signed()).as_unsigned() }
+}
+#[doc = "Bitwise AND reduction to scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svandv[_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(andv))]
+pub fn svandv_u32(pg: svbool_t, op: svuint32_t) -> u32 {
+    unsafe { svandv_s32(pg, op.as_signed()).as_unsigned() }
+}
+#[doc = "Bitwise AND reduction to scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svandv[_u64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(andv))]
+pub fn svandv_u64(pg: svbool_t, op: svuint64_t) -> u64 {
+    unsafe { svandv_s64(pg, op.as_signed()).as_unsigned() }
+}
+#[doc = "Arithmetic shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svasr[_s8]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(asr))]
+pub fn svasr_s8_m(pg: svbool_t, op1: svint8_t, op2: svuint8_t) -> svint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.asr.nxv16i8")]
+        fn _svasr_s8_m(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t;
+    }
+    unsafe { _svasr_s8_m(pg, op1, op2.as_signed()) }
+}
+#[doc = "Arithmetic shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svasr[_n_s8]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(asr))]
+pub fn svasr_n_s8_m(pg: svbool_t, op1: svint8_t, op2: u8) -> svint8_t {
+    svasr_s8_m(pg, op1, svdup_n_u8(op2))
+}
+#[doc = "Arithmetic shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svasr[_s8]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(asr))]
+pub fn svasr_s8_x(pg: svbool_t, op1: svint8_t, op2: svuint8_t) -> svint8_t {
+    svasr_s8_m(pg, op1, op2)
+}
+#[doc = "Arithmetic shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svasr[_n_s8]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(asr))]
+pub fn svasr_n_s8_x(pg: svbool_t, op1: svint8_t, op2: u8) -> svint8_t {
+    svasr_s8_x(pg, op1, svdup_n_u8(op2))
+}
+#[doc = "Arithmetic shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svasr[_s8]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(asr))]
+pub fn svasr_s8_z(pg: svbool_t, op1: svint8_t, op2: svuint8_t) -> svint8_t {
+    svasr_s8_m(pg, svsel_s8(pg, op1, svdup_n_s8(0)), op2)
+}
+#[doc = "Arithmetic shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svasr[_n_s8]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(asr))]
+pub fn svasr_n_s8_z(pg: svbool_t, op1: svint8_t, op2: u8) -> svint8_t {
+    svasr_s8_z(pg, op1, svdup_n_u8(op2))
+}
+#[doc = "Arithmetic shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svasr[_s16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(asr))]
+pub fn svasr_s16_m(pg: svbool_t, op1: svint16_t, op2: svuint16_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.asr.nxv8i16")]
+        fn _svasr_s16_m(pg: svbool8_t, op1: svint16_t, op2: svint16_t) -> svint16_t;
+    }
+    unsafe { _svasr_s16_m(pg.sve_into(), op1, op2.as_signed()) }
+}
+#[doc = "Arithmetic shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svasr[_n_s16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(asr))]
+pub fn svasr_n_s16_m(pg: svbool_t, op1: svint16_t, op2: u16) -> svint16_t {
+    svasr_s16_m(pg, op1, svdup_n_u16(op2))
+}
+#[doc = "Arithmetic shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svasr[_s16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(asr))]
+pub fn svasr_s16_x(pg: svbool_t, op1: svint16_t, op2: svuint16_t) -> svint16_t {
+    svasr_s16_m(pg, op1, op2)
+}
+#[doc = "Arithmetic shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svasr[_n_s16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(asr))]
+pub fn svasr_n_s16_x(pg: svbool_t, op1: svint16_t, op2: u16) -> svint16_t {
+    svasr_s16_x(pg, op1, svdup_n_u16(op2))
+}
+#[doc = "Arithmetic shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svasr[_s16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(asr))]
+pub fn svasr_s16_z(pg: svbool_t, op1: svint16_t, op2: svuint16_t) -> svint16_t {
+    svasr_s16_m(pg, svsel_s16(pg, op1, svdup_n_s16(0)), op2)
+}
+#[doc = "Arithmetic shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svasr[_n_s16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(asr))]
+pub fn svasr_n_s16_z(pg: svbool_t, op1: svint16_t, op2: u16) -> svint16_t {
+    svasr_s16_z(pg, op1, svdup_n_u16(op2))
+}
+#[doc = "Arithmetic shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svasr[_s32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(asr))]
+pub fn svasr_s32_m(pg: svbool_t, op1: svint32_t, op2: svuint32_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.asr.nxv4i32")]
+        fn _svasr_s32_m(pg: svbool4_t, op1: svint32_t, op2: svint32_t) -> svint32_t;
+    }
+    unsafe { _svasr_s32_m(pg.sve_into(), op1, op2.as_signed()) }
+}
+#[doc = "Arithmetic shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svasr[_n_s32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(asr))]
+pub fn svasr_n_s32_m(pg: svbool_t, op1: svint32_t, op2: u32) -> svint32_t {
+    svasr_s32_m(pg, op1, svdup_n_u32(op2))
+}
+#[doc = "Arithmetic shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svasr[_s32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(asr))]
+pub fn svasr_s32_x(pg: svbool_t, op1: svint32_t, op2: svuint32_t) -> svint32_t {
+    svasr_s32_m(pg, op1, op2)
+}
+#[doc = "Arithmetic shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svasr[_n_s32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(asr))]
+pub fn svasr_n_s32_x(pg: svbool_t, op1: svint32_t, op2: u32) -> svint32_t {
+    svasr_s32_x(pg, op1, svdup_n_u32(op2))
+}
+#[doc = "Arithmetic shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svasr[_s32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(asr))]
+pub fn svasr_s32_z(pg: svbool_t, op1: svint32_t, op2: svuint32_t) -> svint32_t {
+    svasr_s32_m(pg, svsel_s32(pg, op1, svdup_n_s32(0)), op2)
+}
+#[doc = "Arithmetic shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svasr[_n_s32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(asr))]
+pub fn svasr_n_s32_z(pg: svbool_t, op1: svint32_t, op2: u32) -> svint32_t {
+    svasr_s32_z(pg, op1, svdup_n_u32(op2))
+}
+#[doc = "Arithmetic shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svasr[_s64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(asr))]
+pub fn svasr_s64_m(pg: svbool_t, op1: svint64_t, op2: svuint64_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.asr.nxv2i64")]
+        fn _svasr_s64_m(pg: svbool2_t, op1: svint64_t, op2: svint64_t) -> svint64_t;
+    }
+    unsafe { _svasr_s64_m(pg.sve_into(), op1, op2.as_signed()) }
+}
+#[doc = "Arithmetic shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svasr[_n_s64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(asr))]
+pub fn svasr_n_s64_m(pg: svbool_t, op1: svint64_t, op2: u64) -> svint64_t {
+    svasr_s64_m(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Arithmetic shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svasr[_s64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(asr))]
+pub fn svasr_s64_x(pg: svbool_t, op1: svint64_t, op2: svuint64_t) -> svint64_t {
+    svasr_s64_m(pg, op1, op2)
+}
+#[doc = "Arithmetic shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svasr[_n_s64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(asr))]
+pub fn svasr_n_s64_x(pg: svbool_t, op1: svint64_t, op2: u64) -> svint64_t {
+    svasr_s64_x(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Arithmetic shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svasr[_s64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(asr))]
+pub fn svasr_s64_z(pg: svbool_t, op1: svint64_t, op2: svuint64_t) -> svint64_t {
+    svasr_s64_m(pg, svsel_s64(pg, op1, svdup_n_s64(0)), op2)
+}
+#[doc = "Arithmetic shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svasr[_n_s64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(asr))]
+pub fn svasr_n_s64_z(pg: svbool_t, op1: svint64_t, op2: u64) -> svint64_t {
+    svasr_s64_z(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Arithmetic shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svasr_wide[_s8]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(asr))]
+pub fn svasr_wide_s8_m(pg: svbool_t, op1: svint8_t, op2: svuint64_t) -> svint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.asr.wide.nxv16i8"
+        )]
+        fn _svasr_wide_s8_m(pg: svbool_t, op1: svint8_t, op2: svint64_t) -> svint8_t;
+    }
+    unsafe { _svasr_wide_s8_m(pg, op1, op2.as_signed()) }
+}
+#[doc = "Arithmetic shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svasr_wide[_n_s8]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(asr))]
+pub fn svasr_wide_n_s8_m(pg: svbool_t, op1: svint8_t, op2: u64) -> svint8_t {
+    svasr_wide_s8_m(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Arithmetic shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svasr_wide[_s8]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(asr))]
+pub fn svasr_wide_s8_x(pg: svbool_t, op1: svint8_t, op2: svuint64_t) -> svint8_t {
+    svasr_wide_s8_m(pg, op1, op2)
+}
+#[doc = "Arithmetic shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svasr_wide[_n_s8]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(asr))]
+pub fn svasr_wide_n_s8_x(pg: svbool_t, op1: svint8_t, op2: u64) -> svint8_t {
+    svasr_wide_s8_x(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Arithmetic shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svasr_wide[_s8]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(asr))]
+pub fn svasr_wide_s8_z(pg: svbool_t, op1: svint8_t, op2: svuint64_t) -> svint8_t {
+    svasr_wide_s8_m(pg, svsel_s8(pg, op1, svdup_n_s8(0)), op2)
+}
+#[doc = "Arithmetic shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svasr_wide[_n_s8]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(asr))]
+pub fn svasr_wide_n_s8_z(pg: svbool_t, op1: svint8_t, op2: u64) -> svint8_t {
+    svasr_wide_s8_z(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Arithmetic shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svasr_wide[_s16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(asr))]
+pub fn svasr_wide_s16_m(pg: svbool_t, op1: svint16_t, op2: svuint64_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.asr.wide.nxv8i16"
+        )]
+        fn _svasr_wide_s16_m(pg: svbool8_t, op1: svint16_t, op2: svint64_t) -> svint16_t;
+    }
+    unsafe { _svasr_wide_s16_m(pg.sve_into(), op1, op2.as_signed()) }
+}
+#[doc = "Arithmetic shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svasr_wide[_n_s16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(asr))]
+pub fn svasr_wide_n_s16_m(pg: svbool_t, op1: svint16_t, op2: u64) -> svint16_t {
+    svasr_wide_s16_m(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Arithmetic shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svasr_wide[_s16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(asr))]
+pub fn svasr_wide_s16_x(pg: svbool_t, op1: svint16_t, op2: svuint64_t) -> svint16_t {
+    svasr_wide_s16_m(pg, op1, op2)
+}
+#[doc = "Arithmetic shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svasr_wide[_n_s16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(asr))]
+pub fn svasr_wide_n_s16_x(pg: svbool_t, op1: svint16_t, op2: u64) -> svint16_t {
+    svasr_wide_s16_x(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Arithmetic shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svasr_wide[_s16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(asr))]
+pub fn svasr_wide_s16_z(pg: svbool_t, op1: svint16_t, op2: svuint64_t) -> svint16_t {
+    svasr_wide_s16_m(pg, svsel_s16(pg, op1, svdup_n_s16(0)), op2)
+}
+#[doc = "Arithmetic shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svasr_wide[_n_s16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(asr))]
+pub fn svasr_wide_n_s16_z(pg: svbool_t, op1: svint16_t, op2: u64) -> svint16_t {
+    svasr_wide_s16_z(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Arithmetic shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svasr_wide[_s32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(asr))]
+pub fn svasr_wide_s32_m(pg: svbool_t, op1: svint32_t, op2: svuint64_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.asr.wide.nxv4i32"
+        )]
+        fn _svasr_wide_s32_m(pg: svbool4_t, op1: svint32_t, op2: svint64_t) -> svint32_t;
+    }
+    unsafe { _svasr_wide_s32_m(pg.sve_into(), op1, op2.as_signed()) }
+}
+#[doc = "Arithmetic shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svasr_wide[_n_s32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(asr))]
+pub fn svasr_wide_n_s32_m(pg: svbool_t, op1: svint32_t, op2: u64) -> svint32_t {
+    svasr_wide_s32_m(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Arithmetic shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svasr_wide[_s32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(asr))]
+pub fn svasr_wide_s32_x(pg: svbool_t, op1: svint32_t, op2: svuint64_t) -> svint32_t {
+    svasr_wide_s32_m(pg, op1, op2)
+}
+#[doc = "Arithmetic shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svasr_wide[_n_s32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(asr))]
+pub fn svasr_wide_n_s32_x(pg: svbool_t, op1: svint32_t, op2: u64) -> svint32_t {
+    svasr_wide_s32_x(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Arithmetic shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svasr_wide[_s32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(asr))]
+pub fn svasr_wide_s32_z(pg: svbool_t, op1: svint32_t, op2: svuint64_t) -> svint32_t {
+    svasr_wide_s32_m(pg, svsel_s32(pg, op1, svdup_n_s32(0)), op2)
+}
+#[doc = "Arithmetic shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svasr_wide[_n_s32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(asr))]
+pub fn svasr_wide_n_s32_z(pg: svbool_t, op1: svint32_t, op2: u64) -> svint32_t {
+    svasr_wide_s32_z(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Arithmetic shift right for divide by immediate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svasrd[_n_s8]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(asrd, IMM2 = 1))]
+pub fn svasrd_n_s8_m<const IMM2: i32>(pg: svbool_t, op1: svint8_t) -> svint8_t {
+    static_assert_range!(IMM2, 1..=8);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.asrd.nxv16i8")]
+        fn _svasrd_n_s8_m(pg: svbool_t, op1: svint8_t, imm2: i32) -> svint8_t;
+    }
+    unsafe { _svasrd_n_s8_m(pg, op1, IMM2) }
+}
+#[doc = "Arithmetic shift right for divide by immediate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svasrd[_n_s8]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(asrd, IMM2 = 1))]
+pub fn svasrd_n_s8_x<const IMM2: i32>(pg: svbool_t, op1: svint8_t) -> svint8_t {
+    svasrd_n_s8_m::<IMM2>(pg, op1)
+}
+#[doc = "Arithmetic shift right for divide by immediate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svasrd[_n_s8]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(asrd, IMM2 = 1))]
+pub fn svasrd_n_s8_z<const IMM2: i32>(pg: svbool_t, op1: svint8_t) -> svint8_t {
+    svasrd_n_s8_m::<IMM2>(pg, svsel_s8(pg, op1, svdup_n_s8(0)))
+}
+#[doc = "Arithmetic shift right for divide by immediate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svasrd[_n_s16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(asrd, IMM2 = 1))]
+pub fn svasrd_n_s16_m<const IMM2: i32>(pg: svbool_t, op1: svint16_t) -> svint16_t {
+    static_assert_range!(IMM2, 1..=16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.asrd.nxv8i16")]
+        fn _svasrd_n_s16_m(pg: svbool8_t, op1: svint16_t, imm2: i32) -> svint16_t;
+    }
+    unsafe { _svasrd_n_s16_m(pg.sve_into(), op1, IMM2) }
+}
+#[doc = "Arithmetic shift right for divide by immediate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svasrd[_n_s16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(asrd, IMM2 = 1))]
+pub fn svasrd_n_s16_x<const IMM2: i32>(pg: svbool_t, op1: svint16_t) -> svint16_t {
+    svasrd_n_s16_m::<IMM2>(pg, op1)
+}
+#[doc = "Arithmetic shift right for divide by immediate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svasrd[_n_s16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(asrd, IMM2 = 1))]
+pub fn svasrd_n_s16_z<const IMM2: i32>(pg: svbool_t, op1: svint16_t) -> svint16_t {
+    svasrd_n_s16_m::<IMM2>(pg, svsel_s16(pg, op1, svdup_n_s16(0)))
+}
+#[doc = "Arithmetic shift right for divide by immediate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svasrd[_n_s32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(asrd, IMM2 = 1))]
+pub fn svasrd_n_s32_m<const IMM2: i32>(pg: svbool_t, op1: svint32_t) -> svint32_t {
+    static_assert_range!(IMM2, 1..=32);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.asrd.nxv4i32")]
+        fn _svasrd_n_s32_m(pg: svbool4_t, op1: svint32_t, imm2: i32) -> svint32_t;
+    }
+    unsafe { _svasrd_n_s32_m(pg.sve_into(), op1, IMM2) }
+}
+#[doc = "Arithmetic shift right for divide by immediate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svasrd[_n_s32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(asrd, IMM2 = 1))]
+pub fn svasrd_n_s32_x<const IMM2: i32>(pg: svbool_t, op1: svint32_t) -> svint32_t {
+    svasrd_n_s32_m::<IMM2>(pg, op1)
+}
+#[doc = "Arithmetic shift right for divide by immediate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svasrd[_n_s32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(asrd, IMM2 = 1))]
+pub fn svasrd_n_s32_z<const IMM2: i32>(pg: svbool_t, op1: svint32_t) -> svint32_t {
+    svasrd_n_s32_m::<IMM2>(pg, svsel_s32(pg, op1, svdup_n_s32(0)))
+}
+#[doc = "Arithmetic shift right for divide by immediate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svasrd[_n_s64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(asrd, IMM2 = 1))]
+pub fn svasrd_n_s64_m<const IMM2: i32>(pg: svbool_t, op1: svint64_t) -> svint64_t {
+    static_assert_range!(IMM2, 1..=64);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.asrd.nxv2i64")]
+        fn _svasrd_n_s64_m(pg: svbool2_t, op1: svint64_t, imm2: i32) -> svint64_t;
+    }
+    unsafe { _svasrd_n_s64_m(pg.sve_into(), op1, IMM2) }
+}
+#[doc = "Arithmetic shift right for divide by immediate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svasrd[_n_s64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(asrd, IMM2 = 1))]
+pub fn svasrd_n_s64_x<const IMM2: i32>(pg: svbool_t, op1: svint64_t) -> svint64_t {
+    svasrd_n_s64_m::<IMM2>(pg, op1)
+}
+#[doc = "Arithmetic shift right for divide by immediate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svasrd[_n_s64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(asrd, IMM2 = 1))]
+pub fn svasrd_n_s64_z<const IMM2: i32>(pg: svbool_t, op1: svint64_t) -> svint64_t {
+    svasrd_n_s64_m::<IMM2>(pg, svsel_s64(pg, op1, svdup_n_s64(0)))
+}
+#[doc = "Bitwise clear"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbic[_b]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bic))]
+pub fn svbic_b_z(pg: svbool_t, op1: svbool_t, op2: svbool_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.bic.z.nvx16i1")]
+        fn _svbic_b_z(pg: svbool_t, op1: svbool_t, op2: svbool_t) -> svbool_t;
+    }
+    unsafe { _svbic_b_z(pg, op1, op2) }
+}
+#[doc = "Bitwise clear"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbic[_s8]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bic))]
+pub fn svbic_s8_m(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.bic.nxv16i8")]
+        fn _svbic_s8_m(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t;
+    }
+    unsafe { _svbic_s8_m(pg, op1, op2) }
+}
+#[doc = "Bitwise clear"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbic[_n_s8]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bic))]
+pub fn svbic_n_s8_m(pg: svbool_t, op1: svint8_t, op2: i8) -> svint8_t {
+    svbic_s8_m(pg, op1, svdup_n_s8(op2))
+}
+#[doc = "Bitwise clear"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbic[_s8]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bic))]
+pub fn svbic_s8_x(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t {
+    svbic_s8_m(pg, op1, op2)
+}
+#[doc = "Bitwise clear"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbic[_n_s8]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bic))]
+pub fn svbic_n_s8_x(pg: svbool_t, op1: svint8_t, op2: i8) -> svint8_t {
+    svbic_s8_x(pg, op1, svdup_n_s8(op2))
+}
+#[doc = "Bitwise clear"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbic[_s8]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bic))]
+pub fn svbic_s8_z(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t {
+    svbic_s8_m(pg, svsel_s8(pg, op1, svdup_n_s8(0)), op2)
+}
+#[doc = "Bitwise clear"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbic[_n_s8]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bic))]
+pub fn svbic_n_s8_z(pg: svbool_t, op1: svint8_t, op2: i8) -> svint8_t {
+    svbic_s8_z(pg, op1, svdup_n_s8(op2))
+}
+#[doc = "Bitwise clear"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbic[_s16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bic))]
+pub fn svbic_s16_m(pg: svbool_t, op1: svint16_t, op2: svint16_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.bic.nxv8i16")]
+        fn _svbic_s16_m(pg: svbool8_t, op1: svint16_t, op2: svint16_t) -> svint16_t;
+    }
+    unsafe { _svbic_s16_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Bitwise clear"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbic[_n_s16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bic))]
+pub fn svbic_n_s16_m(pg: svbool_t, op1: svint16_t, op2: i16) -> svint16_t {
+    svbic_s16_m(pg, op1, svdup_n_s16(op2))
+}
+#[doc = "Bitwise clear"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbic[_s16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bic))]
+pub fn svbic_s16_x(pg: svbool_t, op1: svint16_t, op2: svint16_t) -> svint16_t {
+    svbic_s16_m(pg, op1, op2)
+}
+#[doc = "Bitwise clear"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbic[_n_s16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bic))]
+pub fn svbic_n_s16_x(pg: svbool_t, op1: svint16_t, op2: i16) -> svint16_t {
+    svbic_s16_x(pg, op1, svdup_n_s16(op2))
+}
+#[doc = "Bitwise clear"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbic[_s16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bic))]
+pub fn svbic_s16_z(pg: svbool_t, op1: svint16_t, op2: svint16_t) -> svint16_t {
+    svbic_s16_m(pg, svsel_s16(pg, op1, svdup_n_s16(0)), op2)
+}
+#[doc = "Bitwise clear"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbic[_n_s16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bic))]
+pub fn svbic_n_s16_z(pg: svbool_t, op1: svint16_t, op2: i16) -> svint16_t {
+    svbic_s16_z(pg, op1, svdup_n_s16(op2))
+}
+#[doc = "Bitwise clear"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbic[_s32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bic))]
+pub fn svbic_s32_m(pg: svbool_t, op1: svint32_t, op2: svint32_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.bic.nxv4i32")]
+        fn _svbic_s32_m(pg: svbool4_t, op1: svint32_t, op2: svint32_t) -> svint32_t;
+    }
+    unsafe { _svbic_s32_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Bitwise clear"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbic[_n_s32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bic))]
+pub fn svbic_n_s32_m(pg: svbool_t, op1: svint32_t, op2: i32) -> svint32_t {
+    svbic_s32_m(pg, op1, svdup_n_s32(op2))
+}
+#[doc = "Bitwise clear"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbic[_s32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bic))]
+pub fn svbic_s32_x(pg: svbool_t, op1: svint32_t, op2: svint32_t) -> svint32_t {
+    svbic_s32_m(pg, op1, op2)
+}
+#[doc = "Bitwise clear"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbic[_n_s32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bic))]
+pub fn svbic_n_s32_x(pg: svbool_t, op1: svint32_t, op2: i32) -> svint32_t {
+    svbic_s32_x(pg, op1, svdup_n_s32(op2))
+}
+#[doc = "Bitwise clear"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbic[_s32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bic))]
+pub fn svbic_s32_z(pg: svbool_t, op1: svint32_t, op2: svint32_t) -> svint32_t {
+    svbic_s32_m(pg, svsel_s32(pg, op1, svdup_n_s32(0)), op2)
+}
+#[doc = "Bitwise clear"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbic[_n_s32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bic))]
+pub fn svbic_n_s32_z(pg: svbool_t, op1: svint32_t, op2: i32) -> svint32_t {
+    svbic_s32_z(pg, op1, svdup_n_s32(op2))
+}
+#[doc = "Bitwise clear"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbic[_s64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bic))]
+pub fn svbic_s64_m(pg: svbool_t, op1: svint64_t, op2: svint64_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.bic.nxv2i64")]
+        fn _svbic_s64_m(pg: svbool2_t, op1: svint64_t, op2: svint64_t) -> svint64_t;
+    }
+    unsafe { _svbic_s64_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Bitwise clear"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbic[_n_s64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bic))]
+pub fn svbic_n_s64_m(pg: svbool_t, op1: svint64_t, op2: i64) -> svint64_t {
+    svbic_s64_m(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Bitwise clear"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbic[_s64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bic))]
+pub fn svbic_s64_x(pg: svbool_t, op1: svint64_t, op2: svint64_t) -> svint64_t {
+    svbic_s64_m(pg, op1, op2)
+}
+#[doc = "Bitwise clear"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbic[_n_s64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bic))]
+pub fn svbic_n_s64_x(pg: svbool_t, op1: svint64_t, op2: i64) -> svint64_t {
+    svbic_s64_x(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Bitwise clear"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbic[_s64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bic))]
+pub fn svbic_s64_z(pg: svbool_t, op1: svint64_t, op2: svint64_t) -> svint64_t {
+    svbic_s64_m(pg, svsel_s64(pg, op1, svdup_n_s64(0)), op2)
+}
+#[doc = "Bitwise clear"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbic[_n_s64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bic))]
+pub fn svbic_n_s64_z(pg: svbool_t, op1: svint64_t, op2: i64) -> svint64_t {
+    svbic_s64_z(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Bitwise clear"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbic[_u8]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bic))]
+pub fn svbic_u8_m(pg: svbool_t, op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    unsafe { svbic_s8_m(pg, op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Bitwise clear"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbic[_n_u8]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bic))]
+pub fn svbic_n_u8_m(pg: svbool_t, op1: svuint8_t, op2: u8) -> svuint8_t {
+    svbic_u8_m(pg, op1, svdup_n_u8(op2))
+}
+#[doc = "Bitwise clear"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbic[_u8]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bic))]
+pub fn svbic_u8_x(pg: svbool_t, op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    svbic_u8_m(pg, op1, op2)
+}
+#[doc = "Bitwise clear"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbic[_n_u8]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bic))]
+pub fn svbic_n_u8_x(pg: svbool_t, op1: svuint8_t, op2: u8) -> svuint8_t {
+    svbic_u8_x(pg, op1, svdup_n_u8(op2))
+}
+#[doc = "Bitwise clear"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbic[_u8]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bic))]
+pub fn svbic_u8_z(pg: svbool_t, op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    svbic_u8_m(pg, svsel_u8(pg, op1, svdup_n_u8(0)), op2)
+}
+#[doc = "Bitwise clear"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbic[_n_u8]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bic))]
+pub fn svbic_n_u8_z(pg: svbool_t, op1: svuint8_t, op2: u8) -> svuint8_t {
+    svbic_u8_z(pg, op1, svdup_n_u8(op2))
+}
+#[doc = "Bitwise clear"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbic[_u16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bic))]
+pub fn svbic_u16_m(pg: svbool_t, op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    unsafe { svbic_s16_m(pg, op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Bitwise clear"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbic[_n_u16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bic))]
+pub fn svbic_n_u16_m(pg: svbool_t, op1: svuint16_t, op2: u16) -> svuint16_t {
+    svbic_u16_m(pg, op1, svdup_n_u16(op2))
+}
+#[doc = "Bitwise clear"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbic[_u16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bic))]
+pub fn svbic_u16_x(pg: svbool_t, op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    svbic_u16_m(pg, op1, op2)
+}
+#[doc = "Bitwise clear"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbic[_n_u16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bic))]
+pub fn svbic_n_u16_x(pg: svbool_t, op1: svuint16_t, op2: u16) -> svuint16_t {
+    svbic_u16_x(pg, op1, svdup_n_u16(op2))
+}
+#[doc = "Bitwise clear"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbic[_u16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bic))]
+pub fn svbic_u16_z(pg: svbool_t, op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    svbic_u16_m(pg, svsel_u16(pg, op1, svdup_n_u16(0)), op2)
+}
+#[doc = "Bitwise clear"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbic[_n_u16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bic))]
+pub fn svbic_n_u16_z(pg: svbool_t, op1: svuint16_t, op2: u16) -> svuint16_t {
+    svbic_u16_z(pg, op1, svdup_n_u16(op2))
+}
+#[doc = "Bitwise clear"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbic[_u32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bic))]
+pub fn svbic_u32_m(pg: svbool_t, op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    unsafe { svbic_s32_m(pg, op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Bitwise clear"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbic[_n_u32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bic))]
+pub fn svbic_n_u32_m(pg: svbool_t, op1: svuint32_t, op2: u32) -> svuint32_t {
+    svbic_u32_m(pg, op1, svdup_n_u32(op2))
+}
+#[doc = "Bitwise clear"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbic[_u32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bic))]
+pub fn svbic_u32_x(pg: svbool_t, op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    svbic_u32_m(pg, op1, op2)
+}
+#[doc = "Bitwise clear"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbic[_n_u32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bic))]
+pub fn svbic_n_u32_x(pg: svbool_t, op1: svuint32_t, op2: u32) -> svuint32_t {
+    svbic_u32_x(pg, op1, svdup_n_u32(op2))
+}
+#[doc = "Bitwise clear"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbic[_u32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bic))]
+pub fn svbic_u32_z(pg: svbool_t, op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    svbic_u32_m(pg, svsel_u32(pg, op1, svdup_n_u32(0)), op2)
+}
+#[doc = "Bitwise clear"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbic[_n_u32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bic))]
+pub fn svbic_n_u32_z(pg: svbool_t, op1: svuint32_t, op2: u32) -> svuint32_t {
+    svbic_u32_z(pg, op1, svdup_n_u32(op2))
+}
+#[doc = "Bitwise clear"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbic[_u64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bic))]
+pub fn svbic_u64_m(pg: svbool_t, op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    unsafe { svbic_s64_m(pg, op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Bitwise clear"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbic[_n_u64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bic))]
+pub fn svbic_n_u64_m(pg: svbool_t, op1: svuint64_t, op2: u64) -> svuint64_t {
+    svbic_u64_m(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Bitwise clear"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbic[_u64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bic))]
+pub fn svbic_u64_x(pg: svbool_t, op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    svbic_u64_m(pg, op1, op2)
+}
+#[doc = "Bitwise clear"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbic[_n_u64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bic))]
+pub fn svbic_n_u64_x(pg: svbool_t, op1: svuint64_t, op2: u64) -> svuint64_t {
+    svbic_u64_x(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Bitwise clear"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbic[_u64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bic))]
+pub fn svbic_u64_z(pg: svbool_t, op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    svbic_u64_m(pg, svsel_u64(pg, op1, svdup_n_u64(0)), op2)
+}
+#[doc = "Bitwise clear"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbic[_n_u64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bic))]
+pub fn svbic_n_u64_z(pg: svbool_t, op1: svuint64_t, op2: u64) -> svuint64_t {
+    svbic_u64_z(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Break after first true condition"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbrka[_b]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(brka))]
+pub fn svbrka_b_m(inactive: svbool_t, pg: svbool_t, op: svbool_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.brka.nxv16i1")]
+        fn _svbrka_b_m(inactive: svbool_t, pg: svbool_t, op: svbool_t) -> svbool_t;
+    }
+    unsafe { _svbrka_b_m(inactive, pg, op) }
+}
+#[doc = "Break after first true condition"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbrka[_b]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(brka))]
+pub fn svbrka_b_z(pg: svbool_t, op: svbool_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.brka.z.nxv16i1")]
+        fn _svbrka_b_z(pg: svbool_t, op: svbool_t) -> svbool_t;
+    }
+    unsafe { _svbrka_b_z(pg, op) }
+}
+#[doc = "Break before first true condition"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbrkb[_b]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(brkb))]
+pub fn svbrkb_b_m(inactive: svbool_t, pg: svbool_t, op: svbool_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.brkb.nxv16i1")]
+        fn _svbrkb_b_m(inactive: svbool_t, pg: svbool_t, op: svbool_t) -> svbool_t;
+    }
+    unsafe { _svbrkb_b_m(inactive, pg, op) }
+}
+#[doc = "Break before first true condition"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbrkb[_b]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(brkb))]
+pub fn svbrkb_b_z(pg: svbool_t, op: svbool_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.brkb.z.nxv16i1")]
+        fn _svbrkb_b_z(pg: svbool_t, op: svbool_t) -> svbool_t;
+    }
+    unsafe { _svbrkb_b_z(pg, op) }
+}
+#[doc = "Propagate break to next partition"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbrkn[_b]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(brkn))]
+pub fn svbrkn_b_z(pg: svbool_t, op1: svbool_t, op2: svbool_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.brkn.z.nxv16i1")]
+        fn _svbrkn_b_z(pg: svbool_t, op1: svbool_t, op2: svbool_t) -> svbool_t;
+    }
+    unsafe { _svbrkn_b_z(pg, op1, op2) }
+}
+#[doc = "Break after first true condition, propagating from previous partition"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbrkpa[_b]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(brkpa))]
+pub fn svbrkpa_b_z(pg: svbool_t, op1: svbool_t, op2: svbool_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.brkpa.z.nxv16i1"
+        )]
+        fn _svbrkpa_b_z(pg: svbool_t, op1: svbool_t, op2: svbool_t) -> svbool_t;
+    }
+    unsafe { _svbrkpa_b_z(pg, op1, op2) }
+}
+#[doc = "Break before first true condition, propagating from previous partition"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbrkpb[_b]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(brkpb))]
+pub fn svbrkpb_b_z(pg: svbool_t, op1: svbool_t, op2: svbool_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.brkpb.z.nxv16i1"
+        )]
+        fn _svbrkpb_b_z(pg: svbool_t, op1: svbool_t, op2: svbool_t) -> svbool_t;
+    }
+    unsafe { _svbrkpb_b_z(pg, op1, op2) }
+}
+#[doc = "Complex add with rotate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcadd[_f32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fcadd, IMM_ROTATION = 90))]
+pub fn svcadd_f32_m<const IMM_ROTATION: i32>(
+    pg: svbool_t,
+    op1: svfloat32_t,
+    op2: svfloat32_t,
+) -> svfloat32_t {
+    static_assert!(IMM_ROTATION == 90 || IMM_ROTATION == 270);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.fcadd.nxv4f32")]
+        fn _svcadd_f32_m(
+            pg: svbool4_t,
+            op1: svfloat32_t,
+            op2: svfloat32_t,
+            imm_rotation: i32,
+        ) -> svfloat32_t;
+    }
+    unsafe { _svcadd_f32_m(pg.sve_into(), op1, op2, IMM_ROTATION) }
+}
+#[doc = "Complex add with rotate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcadd[_f32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fcadd, IMM_ROTATION = 90))]
+pub fn svcadd_f32_x<const IMM_ROTATION: i32>(
+    pg: svbool_t,
+    op1: svfloat32_t,
+    op2: svfloat32_t,
+) -> svfloat32_t {
+    svcadd_f32_m::<IMM_ROTATION>(pg, op1, op2)
+}
+#[doc = "Complex add with rotate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcadd[_f32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fcadd, IMM_ROTATION = 90))]
+pub fn svcadd_f32_z<const IMM_ROTATION: i32>(
+    pg: svbool_t,
+    op1: svfloat32_t,
+    op2: svfloat32_t,
+) -> svfloat32_t {
+    svcadd_f32_m::<IMM_ROTATION>(pg, svsel_f32(pg, op1, svdup_n_f32(0.0)), op2)
+}
+#[doc = "Complex add with rotate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcadd[_f64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fcadd, IMM_ROTATION = 90))]
+pub fn svcadd_f64_m<const IMM_ROTATION: i32>(
+    pg: svbool_t,
+    op1: svfloat64_t,
+    op2: svfloat64_t,
+) -> svfloat64_t {
+    static_assert!(IMM_ROTATION == 90 || IMM_ROTATION == 270);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.fcadd.nxv2f64")]
+        fn _svcadd_f64_m(
+            pg: svbool2_t,
+            op1: svfloat64_t,
+            op2: svfloat64_t,
+            imm_rotation: i32,
+        ) -> svfloat64_t;
+    }
+    unsafe { _svcadd_f64_m(pg.sve_into(), op1, op2, IMM_ROTATION) }
+}
+#[doc = "Complex add with rotate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcadd[_f64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fcadd, IMM_ROTATION = 90))]
+pub fn svcadd_f64_x<const IMM_ROTATION: i32>(
+    pg: svbool_t,
+    op1: svfloat64_t,
+    op2: svfloat64_t,
+) -> svfloat64_t {
+    svcadd_f64_m::<IMM_ROTATION>(pg, op1, op2)
+}
+#[doc = "Complex add with rotate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcadd[_f64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fcadd, IMM_ROTATION = 90))]
+pub fn svcadd_f64_z<const IMM_ROTATION: i32>(
+    pg: svbool_t,
+    op1: svfloat64_t,
+    op2: svfloat64_t,
+) -> svfloat64_t {
+    svcadd_f64_m::<IMM_ROTATION>(pg, svsel_f64(pg, op1, svdup_n_f64(0.0)), op2)
+}
+#[doc = "Conditionally extract element after last"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svclasta[_f32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(clasta))]
+pub fn svclasta_f32(pg: svbool_t, fallback: svfloat32_t, data: svfloat32_t) -> svfloat32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.clasta.nxv4f32")]
+        fn _svclasta_f32(pg: svbool4_t, fallback: svfloat32_t, data: svfloat32_t) -> svfloat32_t;
+    }
+    unsafe { _svclasta_f32(pg.sve_into(), fallback, data) }
+}
+#[doc = "Conditionally extract element after last"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svclasta[_f64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(clasta))]
+pub fn svclasta_f64(pg: svbool_t, fallback: svfloat64_t, data: svfloat64_t) -> svfloat64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.clasta.nxv2f64")]
+        fn _svclasta_f64(pg: svbool2_t, fallback: svfloat64_t, data: svfloat64_t) -> svfloat64_t;
+    }
+    unsafe { _svclasta_f64(pg.sve_into(), fallback, data) }
+}
+#[doc = "Conditionally extract element after last"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svclasta[_s8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(clasta))]
+pub fn svclasta_s8(pg: svbool_t, fallback: svint8_t, data: svint8_t) -> svint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.clasta.nxv16i8")]
+        fn _svclasta_s8(pg: svbool_t, fallback: svint8_t, data: svint8_t) -> svint8_t;
+    }
+    unsafe { _svclasta_s8(pg, fallback, data) }
+}
+#[doc = "Conditionally extract element after last"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svclasta[_s16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(clasta))]
+pub fn svclasta_s16(pg: svbool_t, fallback: svint16_t, data: svint16_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.clasta.nxv8i16")]
+        fn _svclasta_s16(pg: svbool8_t, fallback: svint16_t, data: svint16_t) -> svint16_t;
+    }
+    unsafe { _svclasta_s16(pg.sve_into(), fallback, data) }
+}
+#[doc = "Conditionally extract element after last"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svclasta[_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(clasta))]
+pub fn svclasta_s32(pg: svbool_t, fallback: svint32_t, data: svint32_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.clasta.nxv4i32")]
+        fn _svclasta_s32(pg: svbool4_t, fallback: svint32_t, data: svint32_t) -> svint32_t;
+    }
+    unsafe { _svclasta_s32(pg.sve_into(), fallback, data) }
+}
+#[doc = "Conditionally extract element after last"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svclasta[_s64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(clasta))]
+pub fn svclasta_s64(pg: svbool_t, fallback: svint64_t, data: svint64_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.clasta.nxv2i64")]
+        fn _svclasta_s64(pg: svbool2_t, fallback: svint64_t, data: svint64_t) -> svint64_t;
+    }
+    unsafe { _svclasta_s64(pg.sve_into(), fallback, data) }
+}
+#[doc = "Conditionally extract element after last"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svclasta[_u8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(clasta))]
+pub fn svclasta_u8(pg: svbool_t, fallback: svuint8_t, data: svuint8_t) -> svuint8_t {
+    unsafe { svclasta_s8(pg, fallback.as_signed(), data.as_signed()).as_unsigned() }
+}
+#[doc = "Conditionally extract element after last"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svclasta[_u16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(clasta))]
+pub fn svclasta_u16(pg: svbool_t, fallback: svuint16_t, data: svuint16_t) -> svuint16_t {
+    unsafe { svclasta_s16(pg, fallback.as_signed(), data.as_signed()).as_unsigned() }
+}
+#[doc = "Conditionally extract element after last"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svclasta[_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(clasta))]
+pub fn svclasta_u32(pg: svbool_t, fallback: svuint32_t, data: svuint32_t) -> svuint32_t {
+    unsafe { svclasta_s32(pg, fallback.as_signed(), data.as_signed()).as_unsigned() }
+}
+#[doc = "Conditionally extract element after last"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svclasta[_u64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(clasta))]
+pub fn svclasta_u64(pg: svbool_t, fallback: svuint64_t, data: svuint64_t) -> svuint64_t {
+    unsafe { svclasta_s64(pg, fallback.as_signed(), data.as_signed()).as_unsigned() }
+}
+#[doc = "Conditionally extract element after last"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svclasta[_n_f32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(clasta))]
+pub fn svclasta_n_f32(pg: svbool_t, fallback: f32, data: svfloat32_t) -> f32 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.clasta.n.nxv4f32"
+        )]
+        fn _svclasta_n_f32(pg: svbool4_t, fallback: f32, data: svfloat32_t) -> f32;
+    }
+    unsafe { _svclasta_n_f32(pg.sve_into(), fallback, data) }
+}
+#[doc = "Conditionally extract element after last"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svclasta[_n_f64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(clasta))]
+pub fn svclasta_n_f64(pg: svbool_t, fallback: f64, data: svfloat64_t) -> f64 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.clasta.n.nxv2f64"
+        )]
+        fn _svclasta_n_f64(pg: svbool2_t, fallback: f64, data: svfloat64_t) -> f64;
+    }
+    unsafe { _svclasta_n_f64(pg.sve_into(), fallback, data) }
+}
+#[doc = "Conditionally extract element after last"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svclasta[_n_s8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(clasta))]
+pub fn svclasta_n_s8(pg: svbool_t, fallback: i8, data: svint8_t) -> i8 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.clasta.n.nxv16i8"
+        )]
+        fn _svclasta_n_s8(pg: svbool_t, fallback: i8, data: svint8_t) -> i8;
+    }
+    unsafe { _svclasta_n_s8(pg, fallback, data) }
+}
+#[doc = "Conditionally extract element after last"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svclasta[_n_s16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(clasta))]
+pub fn svclasta_n_s16(pg: svbool_t, fallback: i16, data: svint16_t) -> i16 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.clasta.n.nxv8i16"
+        )]
+        fn _svclasta_n_s16(pg: svbool8_t, fallback: i16, data: svint16_t) -> i16;
+    }
+    unsafe { _svclasta_n_s16(pg.sve_into(), fallback, data) }
+}
+#[doc = "Conditionally extract element after last"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svclasta[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(clasta))]
+pub fn svclasta_n_s32(pg: svbool_t, fallback: i32, data: svint32_t) -> i32 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.clasta.n.nxv4i32"
+        )]
+        fn _svclasta_n_s32(pg: svbool4_t, fallback: i32, data: svint32_t) -> i32;
+    }
+    unsafe { _svclasta_n_s32(pg.sve_into(), fallback, data) }
+}
+#[doc = "Conditionally extract element after last"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svclasta[_n_s64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(clasta))]
+pub fn svclasta_n_s64(pg: svbool_t, fallback: i64, data: svint64_t) -> i64 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.clasta.n.nxv2i64"
+        )]
+        fn _svclasta_n_s64(pg: svbool2_t, fallback: i64, data: svint64_t) -> i64;
+    }
+    unsafe { _svclasta_n_s64(pg.sve_into(), fallback, data) }
+}
+#[doc = "Conditionally extract element after last"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svclasta[_n_u8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(clasta))]
+pub fn svclasta_n_u8(pg: svbool_t, fallback: u8, data: svuint8_t) -> u8 {
+    unsafe { svclasta_n_s8(pg, fallback.as_signed(), data.as_signed()).as_unsigned() }
+}
+#[doc = "Conditionally extract element after last"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svclasta[_n_u16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(clasta))]
+pub fn svclasta_n_u16(pg: svbool_t, fallback: u16, data: svuint16_t) -> u16 {
+    unsafe { svclasta_n_s16(pg, fallback.as_signed(), data.as_signed()).as_unsigned() }
+}
+#[doc = "Conditionally extract element after last"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svclasta[_n_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(clasta))]
+pub fn svclasta_n_u32(pg: svbool_t, fallback: u32, data: svuint32_t) -> u32 {
+    unsafe { svclasta_n_s32(pg, fallback.as_signed(), data.as_signed()).as_unsigned() }
+}
+#[doc = "Conditionally extract element after last"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svclasta[_n_u64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(clasta))]
+pub fn svclasta_n_u64(pg: svbool_t, fallback: u64, data: svuint64_t) -> u64 {
+    unsafe { svclasta_n_s64(pg, fallback.as_signed(), data.as_signed()).as_unsigned() }
+}
+#[doc = "Conditionally extract last element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svclastb[_f32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(clastb))]
+pub fn svclastb_f32(pg: svbool_t, fallback: svfloat32_t, data: svfloat32_t) -> svfloat32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.clastb.nxv4f32")]
+        fn _svclastb_f32(pg: svbool4_t, fallback: svfloat32_t, data: svfloat32_t) -> svfloat32_t;
+    }
+    unsafe { _svclastb_f32(pg.sve_into(), fallback, data) }
+}
+#[doc = "Conditionally extract last element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svclastb[_f64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(clastb))]
+pub fn svclastb_f64(pg: svbool_t, fallback: svfloat64_t, data: svfloat64_t) -> svfloat64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.clastb.nxv2f64")]
+        fn _svclastb_f64(pg: svbool2_t, fallback: svfloat64_t, data: svfloat64_t) -> svfloat64_t;
+    }
+    unsafe { _svclastb_f64(pg.sve_into(), fallback, data) }
+}
+#[doc = "Conditionally extract last element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svclastb[_s8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(clastb))]
+pub fn svclastb_s8(pg: svbool_t, fallback: svint8_t, data: svint8_t) -> svint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.clastb.nxv16i8")]
+        fn _svclastb_s8(pg: svbool_t, fallback: svint8_t, data: svint8_t) -> svint8_t;
+    }
+    unsafe { _svclastb_s8(pg, fallback, data) }
+}
+#[doc = "Conditionally extract last element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svclastb[_s16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(clastb))]
+pub fn svclastb_s16(pg: svbool_t, fallback: svint16_t, data: svint16_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.clastb.nxv8i16")]
+        fn _svclastb_s16(pg: svbool8_t, fallback: svint16_t, data: svint16_t) -> svint16_t;
+    }
+    unsafe { _svclastb_s16(pg.sve_into(), fallback, data) }
+}
+#[doc = "Conditionally extract last element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svclastb[_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(clastb))]
+pub fn svclastb_s32(pg: svbool_t, fallback: svint32_t, data: svint32_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.clastb.nxv4i32")]
+        fn _svclastb_s32(pg: svbool4_t, fallback: svint32_t, data: svint32_t) -> svint32_t;
+    }
+    unsafe { _svclastb_s32(pg.sve_into(), fallback, data) }
+}
+#[doc = "Conditionally extract last element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svclastb[_s64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(clastb))]
+pub fn svclastb_s64(pg: svbool_t, fallback: svint64_t, data: svint64_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.clastb.nxv2i64")]
+        fn _svclastb_s64(pg: svbool2_t, fallback: svint64_t, data: svint64_t) -> svint64_t;
+    }
+    unsafe { _svclastb_s64(pg.sve_into(), fallback, data) }
+}
+#[doc = "Conditionally extract last element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svclastb[_u8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(clastb))]
+pub fn svclastb_u8(pg: svbool_t, fallback: svuint8_t, data: svuint8_t) -> svuint8_t {
+    unsafe { svclastb_s8(pg, fallback.as_signed(), data.as_signed()).as_unsigned() }
+}
+#[doc = "Conditionally extract last element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svclastb[_u16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(clastb))]
+pub fn svclastb_u16(pg: svbool_t, fallback: svuint16_t, data: svuint16_t) -> svuint16_t {
+    unsafe { svclastb_s16(pg, fallback.as_signed(), data.as_signed()).as_unsigned() }
+}
+#[doc = "Conditionally extract last element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svclastb[_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(clastb))]
+pub fn svclastb_u32(pg: svbool_t, fallback: svuint32_t, data: svuint32_t) -> svuint32_t {
+    unsafe { svclastb_s32(pg, fallback.as_signed(), data.as_signed()).as_unsigned() }
+}
+#[doc = "Conditionally extract last element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svclastb[_u64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(clastb))]
+pub fn svclastb_u64(pg: svbool_t, fallback: svuint64_t, data: svuint64_t) -> svuint64_t {
+    unsafe { svclastb_s64(pg, fallback.as_signed(), data.as_signed()).as_unsigned() }
+}
+#[doc = "Conditionally extract last element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svclastb[_n_f32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(clastb))]
+pub fn svclastb_n_f32(pg: svbool_t, fallback: f32, data: svfloat32_t) -> f32 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.clastb.n.nxv4f32"
+        )]
+        fn _svclastb_n_f32(pg: svbool4_t, fallback: f32, data: svfloat32_t) -> f32;
+    }
+    unsafe { _svclastb_n_f32(pg.sve_into(), fallback, data) }
+}
+#[doc = "Conditionally extract last element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svclastb[_n_f64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(clastb))]
+pub fn svclastb_n_f64(pg: svbool_t, fallback: f64, data: svfloat64_t) -> f64 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.clastb.n.nxv2f64"
+        )]
+        fn _svclastb_n_f64(pg: svbool2_t, fallback: f64, data: svfloat64_t) -> f64;
+    }
+    unsafe { _svclastb_n_f64(pg.sve_into(), fallback, data) }
+}
+#[doc = "Conditionally extract last element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svclastb[_n_s8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(clastb))]
+pub fn svclastb_n_s8(pg: svbool_t, fallback: i8, data: svint8_t) -> i8 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.clastb.n.nxv16i8"
+        )]
+        fn _svclastb_n_s8(pg: svbool_t, fallback: i8, data: svint8_t) -> i8;
+    }
+    unsafe { _svclastb_n_s8(pg, fallback, data) }
+}
+#[doc = "Conditionally extract last element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svclastb[_n_s16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(clastb))]
+pub fn svclastb_n_s16(pg: svbool_t, fallback: i16, data: svint16_t) -> i16 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.clastb.n.nxv8i16"
+        )]
+        fn _svclastb_n_s16(pg: svbool8_t, fallback: i16, data: svint16_t) -> i16;
+    }
+    unsafe { _svclastb_n_s16(pg.sve_into(), fallback, data) }
+}
+#[doc = "Conditionally extract last element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svclastb[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(clastb))]
+pub fn svclastb_n_s32(pg: svbool_t, fallback: i32, data: svint32_t) -> i32 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.clastb.n.nxv4i32"
+        )]
+        fn _svclastb_n_s32(pg: svbool4_t, fallback: i32, data: svint32_t) -> i32;
+    }
+    unsafe { _svclastb_n_s32(pg.sve_into(), fallback, data) }
+}
+#[doc = "Conditionally extract last element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svclastb[_n_s64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(clastb))]
+pub fn svclastb_n_s64(pg: svbool_t, fallback: i64, data: svint64_t) -> i64 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.clastb.n.nxv2i64"
+        )]
+        fn _svclastb_n_s64(pg: svbool2_t, fallback: i64, data: svint64_t) -> i64;
+    }
+    unsafe { _svclastb_n_s64(pg.sve_into(), fallback, data) }
+}
+#[doc = "Conditionally extract last element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svclastb[_n_u8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(clastb))]
+pub fn svclastb_n_u8(pg: svbool_t, fallback: u8, data: svuint8_t) -> u8 {
+    unsafe { svclastb_n_s8(pg, fallback.as_signed(), data.as_signed()).as_unsigned() }
+}
+#[doc = "Conditionally extract last element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svclastb[_n_u16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(clastb))]
+pub fn svclastb_n_u16(pg: svbool_t, fallback: u16, data: svuint16_t) -> u16 {
+    unsafe { svclastb_n_s16(pg, fallback.as_signed(), data.as_signed()).as_unsigned() }
+}
+#[doc = "Conditionally extract last element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svclastb[_n_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(clastb))]
+pub fn svclastb_n_u32(pg: svbool_t, fallback: u32, data: svuint32_t) -> u32 {
+    unsafe { svclastb_n_s32(pg, fallback.as_signed(), data.as_signed()).as_unsigned() }
+}
+#[doc = "Conditionally extract last element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svclastb[_n_u64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(clastb))]
+pub fn svclastb_n_u64(pg: svbool_t, fallback: u64, data: svuint64_t) -> u64 {
+    unsafe { svclastb_n_s64(pg, fallback.as_signed(), data.as_signed()).as_unsigned() }
+}
+#[doc = "Count leading sign bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcls[_s8]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cls))]
+pub fn svcls_s8_m(inactive: svuint8_t, pg: svbool_t, op: svint8_t) -> svuint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.cls.nxv16i8")]
+        fn _svcls_s8_m(inactive: svint8_t, pg: svbool_t, op: svint8_t) -> svint8_t;
+    }
+    unsafe { _svcls_s8_m(inactive.as_signed(), pg, op).as_unsigned() }
+}
+#[doc = "Count leading sign bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcls[_s8]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cls))]
+pub fn svcls_s8_x(pg: svbool_t, op: svint8_t) -> svuint8_t {
+    unsafe { svcls_s8_m(op.as_unsigned(), pg, op) }
+}
+#[doc = "Count leading sign bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcls[_s8]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cls))]
+pub fn svcls_s8_z(pg: svbool_t, op: svint8_t) -> svuint8_t {
+    svcls_s8_m(svdup_n_u8(0), pg, op)
+}
+#[doc = "Count leading sign bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcls[_s16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cls))]
+pub fn svcls_s16_m(inactive: svuint16_t, pg: svbool_t, op: svint16_t) -> svuint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.cls.nxv8i16")]
+        fn _svcls_s16_m(inactive: svint16_t, pg: svbool8_t, op: svint16_t) -> svint16_t;
+    }
+    unsafe { _svcls_s16_m(inactive.as_signed(), pg.sve_into(), op).as_unsigned() }
+}
+#[doc = "Count leading sign bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcls[_s16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cls))]
+pub fn svcls_s16_x(pg: svbool_t, op: svint16_t) -> svuint16_t {
+    unsafe { svcls_s16_m(op.as_unsigned(), pg, op) }
+}
+#[doc = "Count leading sign bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcls[_s16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cls))]
+pub fn svcls_s16_z(pg: svbool_t, op: svint16_t) -> svuint16_t {
+    svcls_s16_m(svdup_n_u16(0), pg, op)
+}
+#[doc = "Count leading sign bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcls[_s32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cls))]
+pub fn svcls_s32_m(inactive: svuint32_t, pg: svbool_t, op: svint32_t) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.cls.nxv4i32")]
+        fn _svcls_s32_m(inactive: svint32_t, pg: svbool4_t, op: svint32_t) -> svint32_t;
+    }
+    unsafe { _svcls_s32_m(inactive.as_signed(), pg.sve_into(), op).as_unsigned() }
+}
+#[doc = "Count leading sign bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcls[_s32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cls))]
+pub fn svcls_s32_x(pg: svbool_t, op: svint32_t) -> svuint32_t {
+    unsafe { svcls_s32_m(op.as_unsigned(), pg, op) }
+}
+#[doc = "Count leading sign bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcls[_s32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cls))]
+pub fn svcls_s32_z(pg: svbool_t, op: svint32_t) -> svuint32_t {
+    svcls_s32_m(svdup_n_u32(0), pg, op)
+}
+#[doc = "Count leading sign bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcls[_s64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cls))]
+pub fn svcls_s64_m(inactive: svuint64_t, pg: svbool_t, op: svint64_t) -> svuint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.cls.nxv2i64")]
+        fn _svcls_s64_m(inactive: svint64_t, pg: svbool2_t, op: svint64_t) -> svint64_t;
+    }
+    unsafe { _svcls_s64_m(inactive.as_signed(), pg.sve_into(), op).as_unsigned() }
+}
+#[doc = "Count leading sign bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcls[_s64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cls))]
+pub fn svcls_s64_x(pg: svbool_t, op: svint64_t) -> svuint64_t {
+    unsafe { svcls_s64_m(op.as_unsigned(), pg, op) }
+}
+#[doc = "Count leading sign bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcls[_s64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cls))]
+pub fn svcls_s64_z(pg: svbool_t, op: svint64_t) -> svuint64_t {
+    svcls_s64_m(svdup_n_u64(0), pg, op)
+}
+#[doc = "Count leading zero bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svclz[_s8]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(clz))]
+pub fn svclz_s8_m(inactive: svuint8_t, pg: svbool_t, op: svint8_t) -> svuint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.clz.nxv16i8")]
+        fn _svclz_s8_m(inactive: svint8_t, pg: svbool_t, op: svint8_t) -> svint8_t;
+    }
+    unsafe { _svclz_s8_m(inactive.as_signed(), pg, op).as_unsigned() }
+}
+#[doc = "Count leading zero bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svclz[_s8]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(clz))]
+pub fn svclz_s8_x(pg: svbool_t, op: svint8_t) -> svuint8_t {
+    unsafe { svclz_s8_m(op.as_unsigned(), pg, op) }
+}
+#[doc = "Count leading zero bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svclz[_s8]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(clz))]
+pub fn svclz_s8_z(pg: svbool_t, op: svint8_t) -> svuint8_t {
+    svclz_s8_m(svdup_n_u8(0), pg, op)
+}
+#[doc = "Count leading zero bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svclz[_s16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(clz))]
+pub fn svclz_s16_m(inactive: svuint16_t, pg: svbool_t, op: svint16_t) -> svuint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.clz.nxv8i16")]
+        fn _svclz_s16_m(inactive: svint16_t, pg: svbool8_t, op: svint16_t) -> svint16_t;
+    }
+    unsafe { _svclz_s16_m(inactive.as_signed(), pg.sve_into(), op).as_unsigned() }
+}
+#[doc = "Count leading zero bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svclz[_s16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(clz))]
+pub fn svclz_s16_x(pg: svbool_t, op: svint16_t) -> svuint16_t {
+    unsafe { svclz_s16_m(op.as_unsigned(), pg, op) }
+}
+#[doc = "Count leading zero bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svclz[_s16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(clz))]
+pub fn svclz_s16_z(pg: svbool_t, op: svint16_t) -> svuint16_t {
+    svclz_s16_m(svdup_n_u16(0), pg, op)
+}
+#[doc = "Count leading zero bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svclz[_s32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(clz))]
+pub fn svclz_s32_m(inactive: svuint32_t, pg: svbool_t, op: svint32_t) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.clz.nxv4i32")]
+        fn _svclz_s32_m(inactive: svint32_t, pg: svbool4_t, op: svint32_t) -> svint32_t;
+    }
+    unsafe { _svclz_s32_m(inactive.as_signed(), pg.sve_into(), op).as_unsigned() }
+}
+#[doc = "Count leading zero bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svclz[_s32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(clz))]
+pub fn svclz_s32_x(pg: svbool_t, op: svint32_t) -> svuint32_t {
+    unsafe { svclz_s32_m(op.as_unsigned(), pg, op) }
+}
+#[doc = "Count leading zero bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svclz[_s32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(clz))]
+pub fn svclz_s32_z(pg: svbool_t, op: svint32_t) -> svuint32_t {
+    svclz_s32_m(svdup_n_u32(0), pg, op)
+}
+#[doc = "Count leading zero bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svclz[_s64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(clz))]
+pub fn svclz_s64_m(inactive: svuint64_t, pg: svbool_t, op: svint64_t) -> svuint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.clz.nxv2i64")]
+        fn _svclz_s64_m(inactive: svint64_t, pg: svbool2_t, op: svint64_t) -> svint64_t;
+    }
+    unsafe { _svclz_s64_m(inactive.as_signed(), pg.sve_into(), op).as_unsigned() }
+}
+#[doc = "Count leading zero bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svclz[_s64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(clz))]
+pub fn svclz_s64_x(pg: svbool_t, op: svint64_t) -> svuint64_t {
+    unsafe { svclz_s64_m(op.as_unsigned(), pg, op) }
+}
+#[doc = "Count leading zero bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svclz[_s64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(clz))]
+pub fn svclz_s64_z(pg: svbool_t, op: svint64_t) -> svuint64_t {
+    svclz_s64_m(svdup_n_u64(0), pg, op)
+}
+#[doc = "Count leading zero bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svclz[_u8]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(clz))]
+pub fn svclz_u8_m(inactive: svuint8_t, pg: svbool_t, op: svuint8_t) -> svuint8_t {
+    unsafe { svclz_s8_m(inactive, pg, op.as_signed()) }
+}
+#[doc = "Count leading zero bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svclz[_u8]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(clz))]
+pub fn svclz_u8_x(pg: svbool_t, op: svuint8_t) -> svuint8_t {
+    svclz_u8_m(op, pg, op)
+}
+#[doc = "Count leading zero bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svclz[_u8]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(clz))]
+pub fn svclz_u8_z(pg: svbool_t, op: svuint8_t) -> svuint8_t {
+    svclz_u8_m(svdup_n_u8(0), pg, op)
+}
+#[doc = "Count leading zero bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svclz[_u16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(clz))]
+pub fn svclz_u16_m(inactive: svuint16_t, pg: svbool_t, op: svuint16_t) -> svuint16_t {
+    unsafe { svclz_s16_m(inactive, pg, op.as_signed()) }
+}
+#[doc = "Count leading zero bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svclz[_u16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(clz))]
+pub fn svclz_u16_x(pg: svbool_t, op: svuint16_t) -> svuint16_t {
+    svclz_u16_m(op, pg, op)
+}
+#[doc = "Count leading zero bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svclz[_u16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(clz))]
+pub fn svclz_u16_z(pg: svbool_t, op: svuint16_t) -> svuint16_t {
+    svclz_u16_m(svdup_n_u16(0), pg, op)
+}
+#[doc = "Count leading zero bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svclz[_u32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(clz))]
+pub fn svclz_u32_m(inactive: svuint32_t, pg: svbool_t, op: svuint32_t) -> svuint32_t {
+    unsafe { svclz_s32_m(inactive, pg, op.as_signed()) }
+}
+#[doc = "Count leading zero bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svclz[_u32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(clz))]
+pub fn svclz_u32_x(pg: svbool_t, op: svuint32_t) -> svuint32_t {
+    svclz_u32_m(op, pg, op)
+}
+#[doc = "Count leading zero bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svclz[_u32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(clz))]
+pub fn svclz_u32_z(pg: svbool_t, op: svuint32_t) -> svuint32_t {
+    svclz_u32_m(svdup_n_u32(0), pg, op)
+}
+#[doc = "Count leading zero bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svclz[_u64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(clz))]
+pub fn svclz_u64_m(inactive: svuint64_t, pg: svbool_t, op: svuint64_t) -> svuint64_t {
+    unsafe { svclz_s64_m(inactive, pg, op.as_signed()) }
+}
+#[doc = "Count leading zero bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svclz[_u64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(clz))]
+pub fn svclz_u64_x(pg: svbool_t, op: svuint64_t) -> svuint64_t {
+    svclz_u64_m(op, pg, op)
+}
+#[doc = "Count leading zero bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svclz[_u64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(clz))]
+pub fn svclz_u64_z(pg: svbool_t, op: svuint64_t) -> svuint64_t {
+    svclz_u64_m(svdup_n_u64(0), pg, op)
+}
+#[doc = "Complex multiply-add with rotate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmla[_f32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fcmla, IMM_ROTATION = 90))]
+pub fn svcmla_f32_m<const IMM_ROTATION: i32>(
+    pg: svbool_t,
+    op1: svfloat32_t,
+    op2: svfloat32_t,
+    op3: svfloat32_t,
+) -> svfloat32_t {
+    static_assert!(
+        IMM_ROTATION == 0 || IMM_ROTATION == 90 || IMM_ROTATION == 180 || IMM_ROTATION == 270
+    );
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.fcmla.nxv4f32")]
+        fn _svcmla_f32_m(
+            pg: svbool4_t,
+            op1: svfloat32_t,
+            op2: svfloat32_t,
+            op3: svfloat32_t,
+            imm_rotation: i32,
+        ) -> svfloat32_t;
+    }
+    unsafe { _svcmla_f32_m(pg.sve_into(), op1, op2, op3, IMM_ROTATION) }
+}
+#[doc = "Complex multiply-add with rotate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmla[_f32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fcmla, IMM_ROTATION = 90))]
+pub fn svcmla_f32_x<const IMM_ROTATION: i32>(
+    pg: svbool_t,
+    op1: svfloat32_t,
+    op2: svfloat32_t,
+    op3: svfloat32_t,
+) -> svfloat32_t {
+    svcmla_f32_m::<IMM_ROTATION>(pg, op1, op2, op3)
+}
+#[doc = "Complex multiply-add with rotate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmla[_f32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fcmla, IMM_ROTATION = 90))]
+pub fn svcmla_f32_z<const IMM_ROTATION: i32>(
+    pg: svbool_t,
+    op1: svfloat32_t,
+    op2: svfloat32_t,
+    op3: svfloat32_t,
+) -> svfloat32_t {
+    svcmla_f32_m::<IMM_ROTATION>(pg, svsel_f32(pg, op1, svdup_n_f32(0.0)), op2, op3)
+}
+#[doc = "Complex multiply-add with rotate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmla[_f64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fcmla, IMM_ROTATION = 90))]
+pub fn svcmla_f64_m<const IMM_ROTATION: i32>(
+    pg: svbool_t,
+    op1: svfloat64_t,
+    op2: svfloat64_t,
+    op3: svfloat64_t,
+) -> svfloat64_t {
+    static_assert!(
+        IMM_ROTATION == 0 || IMM_ROTATION == 90 || IMM_ROTATION == 180 || IMM_ROTATION == 270
+    );
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.fcmla.nxv2f64")]
+        fn _svcmla_f64_m(
+            pg: svbool2_t,
+            op1: svfloat64_t,
+            op2: svfloat64_t,
+            op3: svfloat64_t,
+            imm_rotation: i32,
+        ) -> svfloat64_t;
+    }
+    unsafe { _svcmla_f64_m(pg.sve_into(), op1, op2, op3, IMM_ROTATION) }
+}
+#[doc = "Complex multiply-add with rotate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmla[_f64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fcmla, IMM_ROTATION = 90))]
+pub fn svcmla_f64_x<const IMM_ROTATION: i32>(
+    pg: svbool_t,
+    op1: svfloat64_t,
+    op2: svfloat64_t,
+    op3: svfloat64_t,
+) -> svfloat64_t {
+    svcmla_f64_m::<IMM_ROTATION>(pg, op1, op2, op3)
+}
+#[doc = "Complex multiply-add with rotate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmla[_f64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fcmla, IMM_ROTATION = 90))]
+pub fn svcmla_f64_z<const IMM_ROTATION: i32>(
+    pg: svbool_t,
+    op1: svfloat64_t,
+    op2: svfloat64_t,
+    op3: svfloat64_t,
+) -> svfloat64_t {
+    svcmla_f64_m::<IMM_ROTATION>(pg, svsel_f64(pg, op1, svdup_n_f64(0.0)), op2, op3)
+}
+#[doc = "Complex multiply-add with rotate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmla_lane[_f32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fcmla, IMM_INDEX = 0, IMM_ROTATION = 90))]
+pub fn svcmla_lane_f32<const IMM_INDEX: i32, const IMM_ROTATION: i32>(
+    op1: svfloat32_t,
+    op2: svfloat32_t,
+    op3: svfloat32_t,
+) -> svfloat32_t {
+    static_assert_range!(IMM_INDEX, 0..=1);
+    static_assert!(
+        IMM_ROTATION == 0 || IMM_ROTATION == 90 || IMM_ROTATION == 180 || IMM_ROTATION == 270
+    );
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.fcmla.lane.x.nxv4f32"
+        )]
+        fn _svcmla_lane_f32(
+            op1: svfloat32_t,
+            op2: svfloat32_t,
+            op3: svfloat32_t,
+            imm_index: i32,
+            imm_rotation: i32,
+        ) -> svfloat32_t;
+    }
+    unsafe { _svcmla_lane_f32(op1, op2, op3, IMM_INDEX, IMM_ROTATION) }
+}
+#[doc = "Compare equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpeq[_f32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fcmeq))]
+pub fn svcmpeq_f32(pg: svbool_t, op1: svfloat32_t, op2: svfloat32_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.fcmpeq.nxv4f32")]
+        fn _svcmpeq_f32(pg: svbool4_t, op1: svfloat32_t, op2: svfloat32_t) -> svbool4_t;
+    }
+    unsafe { _svcmpeq_f32(pg.sve_into(), op1, op2).sve_into() }
+}
+#[doc = "Compare equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpeq[_n_f32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fcmeq))]
+pub fn svcmpeq_n_f32(pg: svbool_t, op1: svfloat32_t, op2: f32) -> svbool_t {
+    svcmpeq_f32(pg, op1, svdup_n_f32(op2))
+}
+#[doc = "Compare equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpeq[_f64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fcmeq))]
+pub fn svcmpeq_f64(pg: svbool_t, op1: svfloat64_t, op2: svfloat64_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.fcmpeq.nxv2f64")]
+        fn _svcmpeq_f64(pg: svbool2_t, op1: svfloat64_t, op2: svfloat64_t) -> svbool2_t;
+    }
+    unsafe { _svcmpeq_f64(pg.sve_into(), op1, op2).sve_into() }
+}
+#[doc = "Compare equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpeq[_n_f64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fcmeq))]
+pub fn svcmpeq_n_f64(pg: svbool_t, op1: svfloat64_t, op2: f64) -> svbool_t {
+    svcmpeq_f64(pg, op1, svdup_n_f64(op2))
+}
+#[doc = "Compare equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpeq[_s8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmpeq))]
+pub fn svcmpeq_s8(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.cmpeq.nxv16i8")]
+        fn _svcmpeq_s8(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svbool_t;
+    }
+    unsafe { _svcmpeq_s8(pg, op1, op2) }
+}
+#[doc = "Compare equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpeq[_n_s8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmpeq))]
+pub fn svcmpeq_n_s8(pg: svbool_t, op1: svint8_t, op2: i8) -> svbool_t {
+    svcmpeq_s8(pg, op1, svdup_n_s8(op2))
+}
+#[doc = "Compare equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpeq[_s16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmpeq))]
+pub fn svcmpeq_s16(pg: svbool_t, op1: svint16_t, op2: svint16_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.cmpeq.nxv8i16")]
+        fn _svcmpeq_s16(pg: svbool8_t, op1: svint16_t, op2: svint16_t) -> svbool8_t;
+    }
+    unsafe { _svcmpeq_s16(pg.sve_into(), op1, op2).sve_into() }
+}
+#[doc = "Compare equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpeq[_n_s16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmpeq))]
+pub fn svcmpeq_n_s16(pg: svbool_t, op1: svint16_t, op2: i16) -> svbool_t {
+    svcmpeq_s16(pg, op1, svdup_n_s16(op2))
+}
+#[doc = "Compare equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpeq[_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmpeq))]
+pub fn svcmpeq_s32(pg: svbool_t, op1: svint32_t, op2: svint32_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.cmpeq.nxv4i32")]
+        fn _svcmpeq_s32(pg: svbool4_t, op1: svint32_t, op2: svint32_t) -> svbool4_t;
+    }
+    unsafe { _svcmpeq_s32(pg.sve_into(), op1, op2).sve_into() }
+}
+#[doc = "Compare equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpeq[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmpeq))]
+pub fn svcmpeq_n_s32(pg: svbool_t, op1: svint32_t, op2: i32) -> svbool_t {
+    svcmpeq_s32(pg, op1, svdup_n_s32(op2))
+}
+#[doc = "Compare equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpeq[_s64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmpeq))]
+pub fn svcmpeq_s64(pg: svbool_t, op1: svint64_t, op2: svint64_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.cmpeq.nxv2i64")]
+        fn _svcmpeq_s64(pg: svbool2_t, op1: svint64_t, op2: svint64_t) -> svbool2_t;
+    }
+    unsafe { _svcmpeq_s64(pg.sve_into(), op1, op2).sve_into() }
+}
+#[doc = "Compare equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpeq[_n_s64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmpeq))]
+pub fn svcmpeq_n_s64(pg: svbool_t, op1: svint64_t, op2: i64) -> svbool_t {
+    svcmpeq_s64(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Compare equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpeq[_u8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmpeq))]
+pub fn svcmpeq_u8(pg: svbool_t, op1: svuint8_t, op2: svuint8_t) -> svbool_t {
+    unsafe { svcmpeq_s8(pg, op1.as_signed(), op2.as_signed()) }
+}
+#[doc = "Compare equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpeq[_n_u8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmpeq))]
+pub fn svcmpeq_n_u8(pg: svbool_t, op1: svuint8_t, op2: u8) -> svbool_t {
+    svcmpeq_u8(pg, op1, svdup_n_u8(op2))
+}
+#[doc = "Compare equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpeq[_u16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmpeq))]
+pub fn svcmpeq_u16(pg: svbool_t, op1: svuint16_t, op2: svuint16_t) -> svbool_t {
+    unsafe { svcmpeq_s16(pg, op1.as_signed(), op2.as_signed()) }
+}
+#[doc = "Compare equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpeq[_n_u16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmpeq))]
+pub fn svcmpeq_n_u16(pg: svbool_t, op1: svuint16_t, op2: u16) -> svbool_t {
+    svcmpeq_u16(pg, op1, svdup_n_u16(op2))
+}
+#[doc = "Compare equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpeq[_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmpeq))]
+pub fn svcmpeq_u32(pg: svbool_t, op1: svuint32_t, op2: svuint32_t) -> svbool_t {
+    unsafe { svcmpeq_s32(pg, op1.as_signed(), op2.as_signed()) }
+}
+#[doc = "Compare equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpeq[_n_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmpeq))]
+pub fn svcmpeq_n_u32(pg: svbool_t, op1: svuint32_t, op2: u32) -> svbool_t {
+    svcmpeq_u32(pg, op1, svdup_n_u32(op2))
+}
+#[doc = "Compare equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpeq[_u64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmpeq))]
+pub fn svcmpeq_u64(pg: svbool_t, op1: svuint64_t, op2: svuint64_t) -> svbool_t {
+    unsafe { svcmpeq_s64(pg, op1.as_signed(), op2.as_signed()) }
+}
+#[doc = "Compare equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpeq[_n_u64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmpeq))]
+pub fn svcmpeq_n_u64(pg: svbool_t, op1: svuint64_t, op2: u64) -> svbool_t {
+    svcmpeq_u64(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Compare equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpeq_wide[_s8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmpeq))]
+pub fn svcmpeq_wide_s8(pg: svbool_t, op1: svint8_t, op2: svint64_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.cmpeq.wide.nxv16i8"
+        )]
+        fn _svcmpeq_wide_s8(pg: svbool_t, op1: svint8_t, op2: svint64_t) -> svbool_t;
+    }
+    unsafe { _svcmpeq_wide_s8(pg, op1, op2) }
+}
+#[doc = "Compare equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpeq_wide[_n_s8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmpeq))]
+pub fn svcmpeq_wide_n_s8(pg: svbool_t, op1: svint8_t, op2: i64) -> svbool_t {
+    svcmpeq_wide_s8(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Compare equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpeq_wide[_s16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmpeq))]
+pub fn svcmpeq_wide_s16(pg: svbool_t, op1: svint16_t, op2: svint64_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.cmpeq.wide.nxv8i16"
+        )]
+        fn _svcmpeq_wide_s16(pg: svbool8_t, op1: svint16_t, op2: svint64_t) -> svbool8_t;
+    }
+    unsafe { _svcmpeq_wide_s16(pg.sve_into(), op1, op2).sve_into() }
+}
+#[doc = "Compare equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpeq_wide[_n_s16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmpeq))]
+pub fn svcmpeq_wide_n_s16(pg: svbool_t, op1: svint16_t, op2: i64) -> svbool_t {
+    svcmpeq_wide_s16(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Compare equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpeq_wide[_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmpeq))]
+pub fn svcmpeq_wide_s32(pg: svbool_t, op1: svint32_t, op2: svint64_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.cmpeq.wide.nxv4i32"
+        )]
+        fn _svcmpeq_wide_s32(pg: svbool4_t, op1: svint32_t, op2: svint64_t) -> svbool4_t;
+    }
+    unsafe { _svcmpeq_wide_s32(pg.sve_into(), op1, op2).sve_into() }
+}
+#[doc = "Compare equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpeq_wide[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmpeq))]
+pub fn svcmpeq_wide_n_s32(pg: svbool_t, op1: svint32_t, op2: i64) -> svbool_t {
+    svcmpeq_wide_s32(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Compare greater than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpge[_f32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fcmge))]
+pub fn svcmpge_f32(pg: svbool_t, op1: svfloat32_t, op2: svfloat32_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.fcmpge.nxv4f32")]
+        fn _svcmpge_f32(pg: svbool4_t, op1: svfloat32_t, op2: svfloat32_t) -> svbool4_t;
+    }
+    unsafe { _svcmpge_f32(pg.sve_into(), op1, op2).sve_into() }
+}
+#[doc = "Compare greater than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpge[_n_f32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fcmge))]
+pub fn svcmpge_n_f32(pg: svbool_t, op1: svfloat32_t, op2: f32) -> svbool_t {
+    svcmpge_f32(pg, op1, svdup_n_f32(op2))
+}
+#[doc = "Compare greater than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpge[_f64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fcmge))]
+pub fn svcmpge_f64(pg: svbool_t, op1: svfloat64_t, op2: svfloat64_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.fcmpge.nxv2f64")]
+        fn _svcmpge_f64(pg: svbool2_t, op1: svfloat64_t, op2: svfloat64_t) -> svbool2_t;
+    }
+    unsafe { _svcmpge_f64(pg.sve_into(), op1, op2).sve_into() }
+}
+#[doc = "Compare greater than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpge[_n_f64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fcmge))]
+pub fn svcmpge_n_f64(pg: svbool_t, op1: svfloat64_t, op2: f64) -> svbool_t {
+    svcmpge_f64(pg, op1, svdup_n_f64(op2))
+}
+#[doc = "Compare greater than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpge[_s8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmpge))]
+pub fn svcmpge_s8(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.cmpge.nxv16i8")]
+        fn _svcmpge_s8(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svbool_t;
+    }
+    unsafe { _svcmpge_s8(pg, op1, op2) }
+}
+#[doc = "Compare greater than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpge[_n_s8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmpge))]
+pub fn svcmpge_n_s8(pg: svbool_t, op1: svint8_t, op2: i8) -> svbool_t {
+    svcmpge_s8(pg, op1, svdup_n_s8(op2))
+}
+#[doc = "Compare greater than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpge[_s16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmpge))]
+pub fn svcmpge_s16(pg: svbool_t, op1: svint16_t, op2: svint16_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.cmpge.nxv8i16")]
+        fn _svcmpge_s16(pg: svbool8_t, op1: svint16_t, op2: svint16_t) -> svbool8_t;
+    }
+    unsafe { _svcmpge_s16(pg.sve_into(), op1, op2).sve_into() }
+}
+#[doc = "Compare greater than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpge[_n_s16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmpge))]
+pub fn svcmpge_n_s16(pg: svbool_t, op1: svint16_t, op2: i16) -> svbool_t {
+    svcmpge_s16(pg, op1, svdup_n_s16(op2))
+}
+#[doc = "Compare greater than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpge[_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmpge))]
+pub fn svcmpge_s32(pg: svbool_t, op1: svint32_t, op2: svint32_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.cmpge.nxv4i32")]
+        fn _svcmpge_s32(pg: svbool4_t, op1: svint32_t, op2: svint32_t) -> svbool4_t;
+    }
+    unsafe { _svcmpge_s32(pg.sve_into(), op1, op2).sve_into() }
+}
+#[doc = "Compare greater than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpge[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmpge))]
+pub fn svcmpge_n_s32(pg: svbool_t, op1: svint32_t, op2: i32) -> svbool_t {
+    svcmpge_s32(pg, op1, svdup_n_s32(op2))
+}
+#[doc = "Compare greater than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpge[_s64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmpge))]
+pub fn svcmpge_s64(pg: svbool_t, op1: svint64_t, op2: svint64_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.cmpge.nxv2i64")]
+        fn _svcmpge_s64(pg: svbool2_t, op1: svint64_t, op2: svint64_t) -> svbool2_t;
+    }
+    unsafe { _svcmpge_s64(pg.sve_into(), op1, op2).sve_into() }
+}
+#[doc = "Compare greater than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpge[_n_s64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmpge))]
+pub fn svcmpge_n_s64(pg: svbool_t, op1: svint64_t, op2: i64) -> svbool_t {
+    svcmpge_s64(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Compare greater than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpge[_u8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmphs))]
+pub fn svcmpge_u8(pg: svbool_t, op1: svuint8_t, op2: svuint8_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.cmphs.nxv16i8")]
+        fn _svcmpge_u8(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svbool_t;
+    }
+    unsafe { _svcmpge_u8(pg, op1.as_signed(), op2.as_signed()) }
+}
+#[doc = "Compare greater than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpge[_n_u8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmphs))]
+pub fn svcmpge_n_u8(pg: svbool_t, op1: svuint8_t, op2: u8) -> svbool_t {
+    svcmpge_u8(pg, op1, svdup_n_u8(op2))
+}
+#[doc = "Compare greater than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpge[_u16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmphs))]
+pub fn svcmpge_u16(pg: svbool_t, op1: svuint16_t, op2: svuint16_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.cmphs.nxv8i16")]
+        fn _svcmpge_u16(pg: svbool8_t, op1: svint16_t, op2: svint16_t) -> svbool8_t;
+    }
+    unsafe { _svcmpge_u16(pg.sve_into(), op1.as_signed(), op2.as_signed()).sve_into() }
+}
+#[doc = "Compare greater than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpge[_n_u16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmphs))]
+pub fn svcmpge_n_u16(pg: svbool_t, op1: svuint16_t, op2: u16) -> svbool_t {
+    svcmpge_u16(pg, op1, svdup_n_u16(op2))
+}
+#[doc = "Compare greater than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpge[_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmphs))]
+pub fn svcmpge_u32(pg: svbool_t, op1: svuint32_t, op2: svuint32_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.cmphs.nxv4i32")]
+        fn _svcmpge_u32(pg: svbool4_t, op1: svint32_t, op2: svint32_t) -> svbool4_t;
+    }
+    unsafe { _svcmpge_u32(pg.sve_into(), op1.as_signed(), op2.as_signed()).sve_into() }
+}
+#[doc = "Compare greater than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpge[_n_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmphs))]
+pub fn svcmpge_n_u32(pg: svbool_t, op1: svuint32_t, op2: u32) -> svbool_t {
+    svcmpge_u32(pg, op1, svdup_n_u32(op2))
+}
+#[doc = "Compare greater than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpge[_u64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmphs))]
+pub fn svcmpge_u64(pg: svbool_t, op1: svuint64_t, op2: svuint64_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.cmphs.nxv2i64")]
+        fn _svcmpge_u64(pg: svbool2_t, op1: svint64_t, op2: svint64_t) -> svbool2_t;
+    }
+    unsafe { _svcmpge_u64(pg.sve_into(), op1.as_signed(), op2.as_signed()).sve_into() }
+}
+#[doc = "Compare greater than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpge[_n_u64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmphs))]
+pub fn svcmpge_n_u64(pg: svbool_t, op1: svuint64_t, op2: u64) -> svbool_t {
+    svcmpge_u64(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Compare greater than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpge_wide[_s8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmpge))]
+pub fn svcmpge_wide_s8(pg: svbool_t, op1: svint8_t, op2: svint64_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.cmpge.wide.nxv16i8"
+        )]
+        fn _svcmpge_wide_s8(pg: svbool_t, op1: svint8_t, op2: svint64_t) -> svbool_t;
+    }
+    unsafe { _svcmpge_wide_s8(pg, op1, op2) }
+}
+#[doc = "Compare greater than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpge_wide[_n_s8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmpge))]
+pub fn svcmpge_wide_n_s8(pg: svbool_t, op1: svint8_t, op2: i64) -> svbool_t {
+    svcmpge_wide_s8(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Compare greater than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpge_wide[_s16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmpge))]
+pub fn svcmpge_wide_s16(pg: svbool_t, op1: svint16_t, op2: svint64_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.cmpge.wide.nxv8i16"
+        )]
+        fn _svcmpge_wide_s16(pg: svbool8_t, op1: svint16_t, op2: svint64_t) -> svbool8_t;
+    }
+    unsafe { _svcmpge_wide_s16(pg.sve_into(), op1, op2).sve_into() }
+}
+#[doc = "Compare greater than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpge_wide[_n_s16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmpge))]
+pub fn svcmpge_wide_n_s16(pg: svbool_t, op1: svint16_t, op2: i64) -> svbool_t {
+    svcmpge_wide_s16(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Compare greater than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpge_wide[_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmpge))]
+pub fn svcmpge_wide_s32(pg: svbool_t, op1: svint32_t, op2: svint64_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.cmpge.wide.nxv4i32"
+        )]
+        fn _svcmpge_wide_s32(pg: svbool4_t, op1: svint32_t, op2: svint64_t) -> svbool4_t;
+    }
+    unsafe { _svcmpge_wide_s32(pg.sve_into(), op1, op2).sve_into() }
+}
+#[doc = "Compare greater than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpge_wide[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmpge))]
+pub fn svcmpge_wide_n_s32(pg: svbool_t, op1: svint32_t, op2: i64) -> svbool_t {
+    svcmpge_wide_s32(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Compare greater than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpge_wide[_u8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmphs))]
+pub fn svcmpge_wide_u8(pg: svbool_t, op1: svuint8_t, op2: svuint64_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.cmphs.wide.nxv16i8"
+        )]
+        fn _svcmpge_wide_u8(pg: svbool_t, op1: svint8_t, op2: svint64_t) -> svbool_t;
+    }
+    unsafe { _svcmpge_wide_u8(pg, op1.as_signed(), op2.as_signed()) }
+}
+#[doc = "Compare greater than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpge_wide[_n_u8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmphs))]
+pub fn svcmpge_wide_n_u8(pg: svbool_t, op1: svuint8_t, op2: u64) -> svbool_t {
+    svcmpge_wide_u8(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Compare greater than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpge_wide[_u16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmphs))]
+pub fn svcmpge_wide_u16(pg: svbool_t, op1: svuint16_t, op2: svuint64_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.cmphs.wide.nxv8i16"
+        )]
+        fn _svcmpge_wide_u16(pg: svbool8_t, op1: svint16_t, op2: svint64_t) -> svbool8_t;
+    }
+    unsafe { _svcmpge_wide_u16(pg.sve_into(), op1.as_signed(), op2.as_signed()).sve_into() }
+}
+#[doc = "Compare greater than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpge_wide[_n_u16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmphs))]
+pub fn svcmpge_wide_n_u16(pg: svbool_t, op1: svuint16_t, op2: u64) -> svbool_t {
+    svcmpge_wide_u16(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Compare greater than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpge_wide[_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmphs))]
+pub fn svcmpge_wide_u32(pg: svbool_t, op1: svuint32_t, op2: svuint64_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.cmphs.wide.nxv4i32"
+        )]
+        fn _svcmpge_wide_u32(pg: svbool4_t, op1: svint32_t, op2: svint64_t) -> svbool4_t;
+    }
+    unsafe { _svcmpge_wide_u32(pg.sve_into(), op1.as_signed(), op2.as_signed()).sve_into() }
+}
+#[doc = "Compare greater than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpge_wide[_n_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmphs))]
+pub fn svcmpge_wide_n_u32(pg: svbool_t, op1: svuint32_t, op2: u64) -> svbool_t {
+    svcmpge_wide_u32(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Compare greater than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpgt[_f32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fcmgt))]
+pub fn svcmpgt_f32(pg: svbool_t, op1: svfloat32_t, op2: svfloat32_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.fcmpgt.nxv4f32")]
+        fn _svcmpgt_f32(pg: svbool4_t, op1: svfloat32_t, op2: svfloat32_t) -> svbool4_t;
+    }
+    unsafe { _svcmpgt_f32(pg.sve_into(), op1, op2).sve_into() }
+}
+#[doc = "Compare greater than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpgt[_n_f32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fcmgt))]
+pub fn svcmpgt_n_f32(pg: svbool_t, op1: svfloat32_t, op2: f32) -> svbool_t {
+    svcmpgt_f32(pg, op1, svdup_n_f32(op2))
+}
+#[doc = "Compare greater than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpgt[_f64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fcmgt))]
+pub fn svcmpgt_f64(pg: svbool_t, op1: svfloat64_t, op2: svfloat64_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.fcmpgt.nxv2f64")]
+        fn _svcmpgt_f64(pg: svbool2_t, op1: svfloat64_t, op2: svfloat64_t) -> svbool2_t;
+    }
+    unsafe { _svcmpgt_f64(pg.sve_into(), op1, op2).sve_into() }
+}
+#[doc = "Compare greater than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpgt[_n_f64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fcmgt))]
+pub fn svcmpgt_n_f64(pg: svbool_t, op1: svfloat64_t, op2: f64) -> svbool_t {
+    svcmpgt_f64(pg, op1, svdup_n_f64(op2))
+}
+#[doc = "Compare greater than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpgt[_s8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmpgt))]
+pub fn svcmpgt_s8(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.cmpgt.nxv16i8")]
+        fn _svcmpgt_s8(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svbool_t;
+    }
+    unsafe { _svcmpgt_s8(pg, op1, op2) }
+}
+#[doc = "Compare greater than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpgt[_n_s8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmpgt))]
+pub fn svcmpgt_n_s8(pg: svbool_t, op1: svint8_t, op2: i8) -> svbool_t {
+    svcmpgt_s8(pg, op1, svdup_n_s8(op2))
+}
+#[doc = "Compare greater than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpgt[_s16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmpgt))]
+pub fn svcmpgt_s16(pg: svbool_t, op1: svint16_t, op2: svint16_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.cmpgt.nxv8i16")]
+        fn _svcmpgt_s16(pg: svbool8_t, op1: svint16_t, op2: svint16_t) -> svbool8_t;
+    }
+    unsafe { _svcmpgt_s16(pg.sve_into(), op1, op2).sve_into() }
+}
+#[doc = "Compare greater than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpgt[_n_s16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmpgt))]
+pub fn svcmpgt_n_s16(pg: svbool_t, op1: svint16_t, op2: i16) -> svbool_t {
+    svcmpgt_s16(pg, op1, svdup_n_s16(op2))
+}
+#[doc = "Compare greater than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpgt[_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmpgt))]
+pub fn svcmpgt_s32(pg: svbool_t, op1: svint32_t, op2: svint32_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.cmpgt.nxv4i32")]
+        fn _svcmpgt_s32(pg: svbool4_t, op1: svint32_t, op2: svint32_t) -> svbool4_t;
+    }
+    unsafe { _svcmpgt_s32(pg.sve_into(), op1, op2).sve_into() }
+}
+#[doc = "Compare greater than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpgt[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmpgt))]
+pub fn svcmpgt_n_s32(pg: svbool_t, op1: svint32_t, op2: i32) -> svbool_t {
+    svcmpgt_s32(pg, op1, svdup_n_s32(op2))
+}
+#[doc = "Compare greater than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpgt[_s64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmpgt))]
+pub fn svcmpgt_s64(pg: svbool_t, op1: svint64_t, op2: svint64_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.cmpgt.nxv2i64")]
+        fn _svcmpgt_s64(pg: svbool2_t, op1: svint64_t, op2: svint64_t) -> svbool2_t;
+    }
+    unsafe { _svcmpgt_s64(pg.sve_into(), op1, op2).sve_into() }
+}
+#[doc = "Compare greater than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpgt[_n_s64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmpgt))]
+pub fn svcmpgt_n_s64(pg: svbool_t, op1: svint64_t, op2: i64) -> svbool_t {
+    svcmpgt_s64(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Compare greater than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpgt[_u8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmphi))]
+pub fn svcmpgt_u8(pg: svbool_t, op1: svuint8_t, op2: svuint8_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.cmphi.nxv16i8")]
+        fn _svcmpgt_u8(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svbool_t;
+    }
+    unsafe { _svcmpgt_u8(pg, op1.as_signed(), op2.as_signed()) }
+}
+#[doc = "Compare greater than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpgt[_n_u8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmphi))]
+pub fn svcmpgt_n_u8(pg: svbool_t, op1: svuint8_t, op2: u8) -> svbool_t {
+    svcmpgt_u8(pg, op1, svdup_n_u8(op2))
+}
+#[doc = "Compare greater than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpgt[_u16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmphi))]
+pub fn svcmpgt_u16(pg: svbool_t, op1: svuint16_t, op2: svuint16_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.cmphi.nxv8i16")]
+        fn _svcmpgt_u16(pg: svbool8_t, op1: svint16_t, op2: svint16_t) -> svbool8_t;
+    }
+    unsafe { _svcmpgt_u16(pg.sve_into(), op1.as_signed(), op2.as_signed()).sve_into() }
+}
+#[doc = "Compare greater than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpgt[_n_u16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmphi))]
+pub fn svcmpgt_n_u16(pg: svbool_t, op1: svuint16_t, op2: u16) -> svbool_t {
+    svcmpgt_u16(pg, op1, svdup_n_u16(op2))
+}
+#[doc = "Compare greater than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpgt[_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmphi))]
+pub fn svcmpgt_u32(pg: svbool_t, op1: svuint32_t, op2: svuint32_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.cmphi.nxv4i32")]
+        fn _svcmpgt_u32(pg: svbool4_t, op1: svint32_t, op2: svint32_t) -> svbool4_t;
+    }
+    unsafe { _svcmpgt_u32(pg.sve_into(), op1.as_signed(), op2.as_signed()).sve_into() }
+}
+#[doc = "Compare greater than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpgt[_n_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmphi))]
+pub fn svcmpgt_n_u32(pg: svbool_t, op1: svuint32_t, op2: u32) -> svbool_t {
+    svcmpgt_u32(pg, op1, svdup_n_u32(op2))
+}
+#[doc = "Compare greater than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpgt[_u64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmphi))]
+pub fn svcmpgt_u64(pg: svbool_t, op1: svuint64_t, op2: svuint64_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.cmphi.nxv2i64")]
+        fn _svcmpgt_u64(pg: svbool2_t, op1: svint64_t, op2: svint64_t) -> svbool2_t;
+    }
+    unsafe { _svcmpgt_u64(pg.sve_into(), op1.as_signed(), op2.as_signed()).sve_into() }
+}
+#[doc = "Compare greater than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpgt[_n_u64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmphi))]
+pub fn svcmpgt_n_u64(pg: svbool_t, op1: svuint64_t, op2: u64) -> svbool_t {
+    svcmpgt_u64(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Compare greater than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpgt_wide[_s8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmpgt))]
+pub fn svcmpgt_wide_s8(pg: svbool_t, op1: svint8_t, op2: svint64_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.cmpgt.wide.nxv16i8"
+        )]
+        fn _svcmpgt_wide_s8(pg: svbool_t, op1: svint8_t, op2: svint64_t) -> svbool_t;
+    }
+    unsafe { _svcmpgt_wide_s8(pg, op1, op2) }
+}
+#[doc = "Compare greater than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpgt_wide[_n_s8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmpgt))]
+pub fn svcmpgt_wide_n_s8(pg: svbool_t, op1: svint8_t, op2: i64) -> svbool_t {
+    svcmpgt_wide_s8(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Compare greater than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpgt_wide[_s16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmpgt))]
+pub fn svcmpgt_wide_s16(pg: svbool_t, op1: svint16_t, op2: svint64_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.cmpgt.wide.nxv8i16"
+        )]
+        fn _svcmpgt_wide_s16(pg: svbool8_t, op1: svint16_t, op2: svint64_t) -> svbool8_t;
+    }
+    unsafe { _svcmpgt_wide_s16(pg.sve_into(), op1, op2).sve_into() }
+}
+#[doc = "Compare greater than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpgt_wide[_n_s16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmpgt))]
+pub fn svcmpgt_wide_n_s16(pg: svbool_t, op1: svint16_t, op2: i64) -> svbool_t {
+    svcmpgt_wide_s16(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Compare greater than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpgt_wide[_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmpgt))]
+pub fn svcmpgt_wide_s32(pg: svbool_t, op1: svint32_t, op2: svint64_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.cmpgt.wide.nxv4i32"
+        )]
+        fn _svcmpgt_wide_s32(pg: svbool4_t, op1: svint32_t, op2: svint64_t) -> svbool4_t;
+    }
+    unsafe { _svcmpgt_wide_s32(pg.sve_into(), op1, op2).sve_into() }
+}
+#[doc = "Compare greater than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpgt_wide[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmpgt))]
+pub fn svcmpgt_wide_n_s32(pg: svbool_t, op1: svint32_t, op2: i64) -> svbool_t {
+    svcmpgt_wide_s32(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Compare greater than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpgt_wide[_u8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmphi))]
+pub fn svcmpgt_wide_u8(pg: svbool_t, op1: svuint8_t, op2: svuint64_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.cmphi.wide.nxv16i8"
+        )]
+        fn _svcmpgt_wide_u8(pg: svbool_t, op1: svint8_t, op2: svint64_t) -> svbool_t;
+    }
+    unsafe { _svcmpgt_wide_u8(pg, op1.as_signed(), op2.as_signed()) }
+}
+#[doc = "Compare greater than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpgt_wide[_n_u8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmphi))]
+pub fn svcmpgt_wide_n_u8(pg: svbool_t, op1: svuint8_t, op2: u64) -> svbool_t {
+    svcmpgt_wide_u8(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Compare greater than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpgt_wide[_u16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmphi))]
+pub fn svcmpgt_wide_u16(pg: svbool_t, op1: svuint16_t, op2: svuint64_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.cmphi.wide.nxv8i16"
+        )]
+        fn _svcmpgt_wide_u16(pg: svbool8_t, op1: svint16_t, op2: svint64_t) -> svbool8_t;
+    }
+    unsafe { _svcmpgt_wide_u16(pg.sve_into(), op1.as_signed(), op2.as_signed()).sve_into() }
+}
+#[doc = "Compare greater than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpgt_wide[_n_u16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmphi))]
+pub fn svcmpgt_wide_n_u16(pg: svbool_t, op1: svuint16_t, op2: u64) -> svbool_t {
+    svcmpgt_wide_u16(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Compare greater than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpgt_wide[_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmphi))]
+pub fn svcmpgt_wide_u32(pg: svbool_t, op1: svuint32_t, op2: svuint64_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.cmphi.wide.nxv4i32"
+        )]
+        fn _svcmpgt_wide_u32(pg: svbool4_t, op1: svint32_t, op2: svint64_t) -> svbool4_t;
+    }
+    unsafe { _svcmpgt_wide_u32(pg.sve_into(), op1.as_signed(), op2.as_signed()).sve_into() }
+}
+#[doc = "Compare greater than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpgt_wide[_n_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmphi))]
+pub fn svcmpgt_wide_n_u32(pg: svbool_t, op1: svuint32_t, op2: u64) -> svbool_t {
+    svcmpgt_wide_u32(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Compare less than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmple[_f32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fcmge))]
+pub fn svcmple_f32(pg: svbool_t, op1: svfloat32_t, op2: svfloat32_t) -> svbool_t {
+    svcmpge_f32(pg, op2, op1)
+}
+#[doc = "Compare less than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmple[_n_f32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fcmge))]
+pub fn svcmple_n_f32(pg: svbool_t, op1: svfloat32_t, op2: f32) -> svbool_t {
+    svcmple_f32(pg, op1, svdup_n_f32(op2))
+}
+#[doc = "Compare less than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmple[_f64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fcmge))]
+pub fn svcmple_f64(pg: svbool_t, op1: svfloat64_t, op2: svfloat64_t) -> svbool_t {
+    svcmpge_f64(pg, op2, op1)
+}
+#[doc = "Compare less than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmple[_n_f64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fcmge))]
+pub fn svcmple_n_f64(pg: svbool_t, op1: svfloat64_t, op2: f64) -> svbool_t {
+    svcmple_f64(pg, op1, svdup_n_f64(op2))
+}
+#[doc = "Compare less than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmple[_s8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmpge))]
+pub fn svcmple_s8(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svbool_t {
+    svcmpge_s8(pg, op2, op1)
+}
+#[doc = "Compare less than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmple[_n_s8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmpge))]
+pub fn svcmple_n_s8(pg: svbool_t, op1: svint8_t, op2: i8) -> svbool_t {
+    svcmple_s8(pg, op1, svdup_n_s8(op2))
+}
+#[doc = "Compare less than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmple[_s16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmpge))]
+pub fn svcmple_s16(pg: svbool_t, op1: svint16_t, op2: svint16_t) -> svbool_t {
+    svcmpge_s16(pg, op2, op1)
+}
+#[doc = "Compare less than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmple[_n_s16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmpge))]
+pub fn svcmple_n_s16(pg: svbool_t, op1: svint16_t, op2: i16) -> svbool_t {
+    svcmple_s16(pg, op1, svdup_n_s16(op2))
+}
+#[doc = "Compare less than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmple[_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmpge))]
+pub fn svcmple_s32(pg: svbool_t, op1: svint32_t, op2: svint32_t) -> svbool_t {
+    svcmpge_s32(pg, op2, op1)
+}
+#[doc = "Compare less than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmple[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmpge))]
+pub fn svcmple_n_s32(pg: svbool_t, op1: svint32_t, op2: i32) -> svbool_t {
+    svcmple_s32(pg, op1, svdup_n_s32(op2))
+}
+#[doc = "Compare less than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmple[_s64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmpge))]
+pub fn svcmple_s64(pg: svbool_t, op1: svint64_t, op2: svint64_t) -> svbool_t {
+    svcmpge_s64(pg, op2, op1)
+}
+#[doc = "Compare less than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmple[_n_s64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmpge))]
+pub fn svcmple_n_s64(pg: svbool_t, op1: svint64_t, op2: i64) -> svbool_t {
+    svcmple_s64(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Compare less than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmple[_u8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmphs))]
+pub fn svcmple_u8(pg: svbool_t, op1: svuint8_t, op2: svuint8_t) -> svbool_t {
+    svcmpge_u8(pg, op2, op1)
+}
+#[doc = "Compare less than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmple[_n_u8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmphs))]
+pub fn svcmple_n_u8(pg: svbool_t, op1: svuint8_t, op2: u8) -> svbool_t {
+    svcmple_u8(pg, op1, svdup_n_u8(op2))
+}
+#[doc = "Compare less than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmple[_u16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmphs))]
+pub fn svcmple_u16(pg: svbool_t, op1: svuint16_t, op2: svuint16_t) -> svbool_t {
+    svcmpge_u16(pg, op2, op1)
+}
+#[doc = "Compare less than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmple[_n_u16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmphs))]
+pub fn svcmple_n_u16(pg: svbool_t, op1: svuint16_t, op2: u16) -> svbool_t {
+    svcmple_u16(pg, op1, svdup_n_u16(op2))
+}
+#[doc = "Compare less than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmple[_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmphs))]
+pub fn svcmple_u32(pg: svbool_t, op1: svuint32_t, op2: svuint32_t) -> svbool_t {
+    svcmpge_u32(pg, op2, op1)
+}
+#[doc = "Compare less than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmple[_n_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmphs))]
+pub fn svcmple_n_u32(pg: svbool_t, op1: svuint32_t, op2: u32) -> svbool_t {
+    svcmple_u32(pg, op1, svdup_n_u32(op2))
+}
+#[doc = "Compare less than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmple[_u64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmphs))]
+pub fn svcmple_u64(pg: svbool_t, op1: svuint64_t, op2: svuint64_t) -> svbool_t {
+    svcmpge_u64(pg, op2, op1)
+}
+#[doc = "Compare less than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmple[_n_u64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmphs))]
+pub fn svcmple_n_u64(pg: svbool_t, op1: svuint64_t, op2: u64) -> svbool_t {
+    svcmple_u64(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Compare less than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmple_wide[_s8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmple))]
+pub fn svcmple_wide_s8(pg: svbool_t, op1: svint8_t, op2: svint64_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.cmple.wide.nxv16i8"
+        )]
+        fn _svcmple_wide_s8(pg: svbool_t, op1: svint8_t, op2: svint64_t) -> svbool_t;
+    }
+    unsafe { _svcmple_wide_s8(pg, op1, op2) }
+}
+#[doc = "Compare less than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmple_wide[_n_s8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmple))]
+pub fn svcmple_wide_n_s8(pg: svbool_t, op1: svint8_t, op2: i64) -> svbool_t {
+    svcmple_wide_s8(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Compare less than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmple_wide[_s16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmple))]
+pub fn svcmple_wide_s16(pg: svbool_t, op1: svint16_t, op2: svint64_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.cmple.wide.nxv8i16"
+        )]
+        fn _svcmple_wide_s16(pg: svbool8_t, op1: svint16_t, op2: svint64_t) -> svbool8_t;
+    }
+    unsafe { _svcmple_wide_s16(pg.sve_into(), op1, op2).sve_into() }
+}
+#[doc = "Compare less than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmple_wide[_n_s16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmple))]
+pub fn svcmple_wide_n_s16(pg: svbool_t, op1: svint16_t, op2: i64) -> svbool_t {
+    svcmple_wide_s16(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Compare less than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmple_wide[_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmple))]
+pub fn svcmple_wide_s32(pg: svbool_t, op1: svint32_t, op2: svint64_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.cmple.wide.nxv4i32"
+        )]
+        fn _svcmple_wide_s32(pg: svbool4_t, op1: svint32_t, op2: svint64_t) -> svbool4_t;
+    }
+    unsafe { _svcmple_wide_s32(pg.sve_into(), op1, op2).sve_into() }
+}
+#[doc = "Compare less than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmple_wide[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmple))]
+pub fn svcmple_wide_n_s32(pg: svbool_t, op1: svint32_t, op2: i64) -> svbool_t {
+    svcmple_wide_s32(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Compare less than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmple_wide[_u8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmpls))]
+pub fn svcmple_wide_u8(pg: svbool_t, op1: svuint8_t, op2: svuint64_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.cmpls.wide.nxv16i8"
+        )]
+        fn _svcmple_wide_u8(pg: svbool_t, op1: svint8_t, op2: svint64_t) -> svbool_t;
+    }
+    unsafe { _svcmple_wide_u8(pg, op1.as_signed(), op2.as_signed()) }
+}
+#[doc = "Compare less than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmple_wide[_n_u8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmpls))]
+pub fn svcmple_wide_n_u8(pg: svbool_t, op1: svuint8_t, op2: u64) -> svbool_t {
+    svcmple_wide_u8(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Compare less than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmple_wide[_u16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmpls))]
+pub fn svcmple_wide_u16(pg: svbool_t, op1: svuint16_t, op2: svuint64_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.cmpls.wide.nxv8i16"
+        )]
+        fn _svcmple_wide_u16(pg: svbool8_t, op1: svint16_t, op2: svint64_t) -> svbool8_t;
+    }
+    unsafe { _svcmple_wide_u16(pg.sve_into(), op1.as_signed(), op2.as_signed()).sve_into() }
+}
+#[doc = "Compare less than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmple_wide[_n_u16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmpls))]
+pub fn svcmple_wide_n_u16(pg: svbool_t, op1: svuint16_t, op2: u64) -> svbool_t {
+    svcmple_wide_u16(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Compare less than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmple_wide[_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmpls))]
+pub fn svcmple_wide_u32(pg: svbool_t, op1: svuint32_t, op2: svuint64_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.cmpls.wide.nxv4i32"
+        )]
+        fn _svcmple_wide_u32(pg: svbool4_t, op1: svint32_t, op2: svint64_t) -> svbool4_t;
+    }
+    unsafe { _svcmple_wide_u32(pg.sve_into(), op1.as_signed(), op2.as_signed()).sve_into() }
+}
+#[doc = "Compare less than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmple_wide[_n_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmpls))]
+pub fn svcmple_wide_n_u32(pg: svbool_t, op1: svuint32_t, op2: u64) -> svbool_t {
+    svcmple_wide_u32(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Compare less than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmplt[_f32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fcmgt))]
+pub fn svcmplt_f32(pg: svbool_t, op1: svfloat32_t, op2: svfloat32_t) -> svbool_t {
+    svcmpgt_f32(pg, op2, op1)
+}
+#[doc = "Compare less than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmplt[_n_f32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fcmgt))]
+pub fn svcmplt_n_f32(pg: svbool_t, op1: svfloat32_t, op2: f32) -> svbool_t {
+    svcmplt_f32(pg, op1, svdup_n_f32(op2))
+}
+#[doc = "Compare less than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmplt[_f64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fcmgt))]
+pub fn svcmplt_f64(pg: svbool_t, op1: svfloat64_t, op2: svfloat64_t) -> svbool_t {
+    svcmpgt_f64(pg, op2, op1)
+}
+#[doc = "Compare less than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmplt[_n_f64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fcmgt))]
+pub fn svcmplt_n_f64(pg: svbool_t, op1: svfloat64_t, op2: f64) -> svbool_t {
+    svcmplt_f64(pg, op1, svdup_n_f64(op2))
+}
+#[doc = "Compare less than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmplt[_s8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmpgt))]
+pub fn svcmplt_s8(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svbool_t {
+    svcmpgt_s8(pg, op2, op1)
+}
+#[doc = "Compare less than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmplt[_n_s8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmpgt))]
+pub fn svcmplt_n_s8(pg: svbool_t, op1: svint8_t, op2: i8) -> svbool_t {
+    svcmplt_s8(pg, op1, svdup_n_s8(op2))
+}
+#[doc = "Compare less than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmplt[_s16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmpgt))]
+pub fn svcmplt_s16(pg: svbool_t, op1: svint16_t, op2: svint16_t) -> svbool_t {
+    svcmpgt_s16(pg, op2, op1)
+}
+#[doc = "Compare less than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmplt[_n_s16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmpgt))]
+pub fn svcmplt_n_s16(pg: svbool_t, op1: svint16_t, op2: i16) -> svbool_t {
+    svcmplt_s16(pg, op1, svdup_n_s16(op2))
+}
+#[doc = "Compare less than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmplt[_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmpgt))]
+pub fn svcmplt_s32(pg: svbool_t, op1: svint32_t, op2: svint32_t) -> svbool_t {
+    svcmpgt_s32(pg, op2, op1)
+}
+#[doc = "Compare less than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmplt[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmpgt))]
+pub fn svcmplt_n_s32(pg: svbool_t, op1: svint32_t, op2: i32) -> svbool_t {
+    svcmplt_s32(pg, op1, svdup_n_s32(op2))
+}
+#[doc = "Compare less than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmplt[_s64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmpgt))]
+pub fn svcmplt_s64(pg: svbool_t, op1: svint64_t, op2: svint64_t) -> svbool_t {
+    svcmpgt_s64(pg, op2, op1)
+}
+#[doc = "Compare less than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmplt[_n_s64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmpgt))]
+pub fn svcmplt_n_s64(pg: svbool_t, op1: svint64_t, op2: i64) -> svbool_t {
+    svcmplt_s64(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Compare less than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmplt[_u8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmphi))]
+pub fn svcmplt_u8(pg: svbool_t, op1: svuint8_t, op2: svuint8_t) -> svbool_t {
+    svcmpgt_u8(pg, op2, op1)
+}
+#[doc = "Compare less than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmplt[_n_u8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmphi))]
+pub fn svcmplt_n_u8(pg: svbool_t, op1: svuint8_t, op2: u8) -> svbool_t {
+    svcmplt_u8(pg, op1, svdup_n_u8(op2))
+}
+#[doc = "Compare less than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmplt[_u16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmphi))]
+pub fn svcmplt_u16(pg: svbool_t, op1: svuint16_t, op2: svuint16_t) -> svbool_t {
+    svcmpgt_u16(pg, op2, op1)
+}
+#[doc = "Compare less than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmplt[_n_u16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmphi))]
+pub fn svcmplt_n_u16(pg: svbool_t, op1: svuint16_t, op2: u16) -> svbool_t {
+    svcmplt_u16(pg, op1, svdup_n_u16(op2))
+}
+#[doc = "Compare less than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmplt[_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmphi))]
+pub fn svcmplt_u32(pg: svbool_t, op1: svuint32_t, op2: svuint32_t) -> svbool_t {
+    svcmpgt_u32(pg, op2, op1)
+}
+#[doc = "Compare less than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmplt[_n_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmphi))]
+pub fn svcmplt_n_u32(pg: svbool_t, op1: svuint32_t, op2: u32) -> svbool_t {
+    svcmplt_u32(pg, op1, svdup_n_u32(op2))
+}
+#[doc = "Compare less than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmplt[_u64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmphi))]
+pub fn svcmplt_u64(pg: svbool_t, op1: svuint64_t, op2: svuint64_t) -> svbool_t {
+    svcmpgt_u64(pg, op2, op1)
+}
+#[doc = "Compare less than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmplt[_n_u64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmphi))]
+pub fn svcmplt_n_u64(pg: svbool_t, op1: svuint64_t, op2: u64) -> svbool_t {
+    svcmplt_u64(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Compare less than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmplt_wide[_s8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmplt))]
+pub fn svcmplt_wide_s8(pg: svbool_t, op1: svint8_t, op2: svint64_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.cmplt.wide.nxv16i8"
+        )]
+        fn _svcmplt_wide_s8(pg: svbool_t, op1: svint8_t, op2: svint64_t) -> svbool_t;
+    }
+    unsafe { _svcmplt_wide_s8(pg, op1, op2) }
+}
+#[doc = "Compare less than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmplt_wide[_n_s8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmplt))]
+pub fn svcmplt_wide_n_s8(pg: svbool_t, op1: svint8_t, op2: i64) -> svbool_t {
+    svcmplt_wide_s8(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Compare less than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmplt_wide[_s16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmplt))]
+pub fn svcmplt_wide_s16(pg: svbool_t, op1: svint16_t, op2: svint64_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.cmplt.wide.nxv8i16"
+        )]
+        fn _svcmplt_wide_s16(pg: svbool8_t, op1: svint16_t, op2: svint64_t) -> svbool8_t;
+    }
+    unsafe { _svcmplt_wide_s16(pg.sve_into(), op1, op2).sve_into() }
+}
+#[doc = "Compare less than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmplt_wide[_n_s16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmplt))]
+pub fn svcmplt_wide_n_s16(pg: svbool_t, op1: svint16_t, op2: i64) -> svbool_t {
+    svcmplt_wide_s16(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Compare less than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmplt_wide[_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmplt))]
+pub fn svcmplt_wide_s32(pg: svbool_t, op1: svint32_t, op2: svint64_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.cmplt.wide.nxv4i32"
+        )]
+        fn _svcmplt_wide_s32(pg: svbool4_t, op1: svint32_t, op2: svint64_t) -> svbool4_t;
+    }
+    unsafe { _svcmplt_wide_s32(pg.sve_into(), op1, op2).sve_into() }
+}
+#[doc = "Compare less than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmplt_wide[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmplt))]
+pub fn svcmplt_wide_n_s32(pg: svbool_t, op1: svint32_t, op2: i64) -> svbool_t {
+    svcmplt_wide_s32(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Compare less than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmplt_wide[_u8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmplo))]
+pub fn svcmplt_wide_u8(pg: svbool_t, op1: svuint8_t, op2: svuint64_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.cmplo.wide.nxv16i8"
+        )]
+        fn _svcmplt_wide_u8(pg: svbool_t, op1: svint8_t, op2: svint64_t) -> svbool_t;
+    }
+    unsafe { _svcmplt_wide_u8(pg, op1.as_signed(), op2.as_signed()) }
+}
+#[doc = "Compare less than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmplt_wide[_n_u8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmplo))]
+pub fn svcmplt_wide_n_u8(pg: svbool_t, op1: svuint8_t, op2: u64) -> svbool_t {
+    svcmplt_wide_u8(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Compare less than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmplt_wide[_u16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmplo))]
+pub fn svcmplt_wide_u16(pg: svbool_t, op1: svuint16_t, op2: svuint64_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.cmplo.wide.nxv8i16"
+        )]
+        fn _svcmplt_wide_u16(pg: svbool8_t, op1: svint16_t, op2: svint64_t) -> svbool8_t;
+    }
+    unsafe { _svcmplt_wide_u16(pg.sve_into(), op1.as_signed(), op2.as_signed()).sve_into() }
+}
+#[doc = "Compare less than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmplt_wide[_n_u16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmplo))]
+pub fn svcmplt_wide_n_u16(pg: svbool_t, op1: svuint16_t, op2: u64) -> svbool_t {
+    svcmplt_wide_u16(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Compare less than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmplt_wide[_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmplo))]
+pub fn svcmplt_wide_u32(pg: svbool_t, op1: svuint32_t, op2: svuint64_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.cmplo.wide.nxv4i32"
+        )]
+        fn _svcmplt_wide_u32(pg: svbool4_t, op1: svint32_t, op2: svint64_t) -> svbool4_t;
+    }
+    unsafe { _svcmplt_wide_u32(pg.sve_into(), op1.as_signed(), op2.as_signed()).sve_into() }
+}
+#[doc = "Compare less than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmplt_wide[_n_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmplo))]
+pub fn svcmplt_wide_n_u32(pg: svbool_t, op1: svuint32_t, op2: u64) -> svbool_t {
+    svcmplt_wide_u32(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Compare not equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpne[_f32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fcmne))]
+pub fn svcmpne_f32(pg: svbool_t, op1: svfloat32_t, op2: svfloat32_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.fcmpne.nxv4f32")]
+        fn _svcmpne_f32(pg: svbool4_t, op1: svfloat32_t, op2: svfloat32_t) -> svbool4_t;
+    }
+    unsafe { _svcmpne_f32(pg.sve_into(), op1, op2).sve_into() }
+}
+#[doc = "Compare not equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpne[_n_f32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fcmne))]
+pub fn svcmpne_n_f32(pg: svbool_t, op1: svfloat32_t, op2: f32) -> svbool_t {
+    svcmpne_f32(pg, op1, svdup_n_f32(op2))
+}
+#[doc = "Compare not equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpne[_f64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fcmne))]
+pub fn svcmpne_f64(pg: svbool_t, op1: svfloat64_t, op2: svfloat64_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.fcmpne.nxv2f64")]
+        fn _svcmpne_f64(pg: svbool2_t, op1: svfloat64_t, op2: svfloat64_t) -> svbool2_t;
+    }
+    unsafe { _svcmpne_f64(pg.sve_into(), op1, op2).sve_into() }
+}
+#[doc = "Compare not equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpne[_n_f64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fcmne))]
+pub fn svcmpne_n_f64(pg: svbool_t, op1: svfloat64_t, op2: f64) -> svbool_t {
+    svcmpne_f64(pg, op1, svdup_n_f64(op2))
+}
+#[doc = "Compare not equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpne[_s8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmpne))]
+pub fn svcmpne_s8(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.cmpne.nxv16i8")]
+        fn _svcmpne_s8(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svbool_t;
+    }
+    unsafe { _svcmpne_s8(pg, op1, op2) }
+}
+#[doc = "Compare not equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpne[_n_s8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmpne))]
+pub fn svcmpne_n_s8(pg: svbool_t, op1: svint8_t, op2: i8) -> svbool_t {
+    svcmpne_s8(pg, op1, svdup_n_s8(op2))
+}
+#[doc = "Compare not equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpne[_s16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmpne))]
+pub fn svcmpne_s16(pg: svbool_t, op1: svint16_t, op2: svint16_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.cmpne.nxv8i16")]
+        fn _svcmpne_s16(pg: svbool8_t, op1: svint16_t, op2: svint16_t) -> svbool8_t;
+    }
+    unsafe { _svcmpne_s16(pg.sve_into(), op1, op2).sve_into() }
+}
+#[doc = "Compare not equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpne[_n_s16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmpne))]
+pub fn svcmpne_n_s16(pg: svbool_t, op1: svint16_t, op2: i16) -> svbool_t {
+    svcmpne_s16(pg, op1, svdup_n_s16(op2))
+}
+#[doc = "Compare not equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpne[_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmpne))]
+pub fn svcmpne_s32(pg: svbool_t, op1: svint32_t, op2: svint32_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.cmpne.nxv4i32")]
+        fn _svcmpne_s32(pg: svbool4_t, op1: svint32_t, op2: svint32_t) -> svbool4_t;
+    }
+    unsafe { _svcmpne_s32(pg.sve_into(), op1, op2).sve_into() }
+}
+#[doc = "Compare not equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpne[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmpne))]
+pub fn svcmpne_n_s32(pg: svbool_t, op1: svint32_t, op2: i32) -> svbool_t {
+    svcmpne_s32(pg, op1, svdup_n_s32(op2))
+}
+#[doc = "Compare not equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpne[_s64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmpne))]
+pub fn svcmpne_s64(pg: svbool_t, op1: svint64_t, op2: svint64_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.cmpne.nxv2i64")]
+        fn _svcmpne_s64(pg: svbool2_t, op1: svint64_t, op2: svint64_t) -> svbool2_t;
+    }
+    unsafe { _svcmpne_s64(pg.sve_into(), op1, op2).sve_into() }
+}
+#[doc = "Compare not equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpne[_n_s64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmpne))]
+pub fn svcmpne_n_s64(pg: svbool_t, op1: svint64_t, op2: i64) -> svbool_t {
+    svcmpne_s64(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Compare not equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpne[_u8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmpne))]
+pub fn svcmpne_u8(pg: svbool_t, op1: svuint8_t, op2: svuint8_t) -> svbool_t {
+    unsafe { svcmpne_s8(pg, op1.as_signed(), op2.as_signed()) }
+}
+#[doc = "Compare not equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpne[_n_u8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmpne))]
+pub fn svcmpne_n_u8(pg: svbool_t, op1: svuint8_t, op2: u8) -> svbool_t {
+    svcmpne_u8(pg, op1, svdup_n_u8(op2))
+}
+#[doc = "Compare not equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpne[_u16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmpne))]
+pub fn svcmpne_u16(pg: svbool_t, op1: svuint16_t, op2: svuint16_t) -> svbool_t {
+    unsafe { svcmpne_s16(pg, op1.as_signed(), op2.as_signed()) }
+}
+#[doc = "Compare not equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpne[_n_u16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmpne))]
+pub fn svcmpne_n_u16(pg: svbool_t, op1: svuint16_t, op2: u16) -> svbool_t {
+    svcmpne_u16(pg, op1, svdup_n_u16(op2))
+}
+#[doc = "Compare not equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpne[_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmpne))]
+pub fn svcmpne_u32(pg: svbool_t, op1: svuint32_t, op2: svuint32_t) -> svbool_t {
+    unsafe { svcmpne_s32(pg, op1.as_signed(), op2.as_signed()) }
+}
+#[doc = "Compare not equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpne[_n_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmpne))]
+pub fn svcmpne_n_u32(pg: svbool_t, op1: svuint32_t, op2: u32) -> svbool_t {
+    svcmpne_u32(pg, op1, svdup_n_u32(op2))
+}
+#[doc = "Compare not equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpne[_u64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmpne))]
+pub fn svcmpne_u64(pg: svbool_t, op1: svuint64_t, op2: svuint64_t) -> svbool_t {
+    unsafe { svcmpne_s64(pg, op1.as_signed(), op2.as_signed()) }
+}
+#[doc = "Compare not equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpne[_n_u64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmpne))]
+pub fn svcmpne_n_u64(pg: svbool_t, op1: svuint64_t, op2: u64) -> svbool_t {
+    svcmpne_u64(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Compare not equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpne_wide[_s8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmpne))]
+pub fn svcmpne_wide_s8(pg: svbool_t, op1: svint8_t, op2: svint64_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.cmpne.wide.nxv16i8"
+        )]
+        fn _svcmpne_wide_s8(pg: svbool_t, op1: svint8_t, op2: svint64_t) -> svbool_t;
+    }
+    unsafe { _svcmpne_wide_s8(pg, op1, op2) }
+}
+#[doc = "Compare not equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpne_wide[_n_s8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmpne))]
+pub fn svcmpne_wide_n_s8(pg: svbool_t, op1: svint8_t, op2: i64) -> svbool_t {
+    svcmpne_wide_s8(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Compare not equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpne_wide[_s16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmpne))]
+pub fn svcmpne_wide_s16(pg: svbool_t, op1: svint16_t, op2: svint64_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.cmpne.wide.nxv8i16"
+        )]
+        fn _svcmpne_wide_s16(pg: svbool8_t, op1: svint16_t, op2: svint64_t) -> svbool8_t;
+    }
+    unsafe { _svcmpne_wide_s16(pg.sve_into(), op1, op2).sve_into() }
+}
+#[doc = "Compare not equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpne_wide[_n_s16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmpne))]
+pub fn svcmpne_wide_n_s16(pg: svbool_t, op1: svint16_t, op2: i64) -> svbool_t {
+    svcmpne_wide_s16(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Compare not equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpne_wide[_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmpne))]
+pub fn svcmpne_wide_s32(pg: svbool_t, op1: svint32_t, op2: svint64_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.cmpne.wide.nxv4i32"
+        )]
+        fn _svcmpne_wide_s32(pg: svbool4_t, op1: svint32_t, op2: svint64_t) -> svbool4_t;
+    }
+    unsafe { _svcmpne_wide_s32(pg.sve_into(), op1, op2).sve_into() }
+}
+#[doc = "Compare not equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpne_wide[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmpne))]
+pub fn svcmpne_wide_n_s32(pg: svbool_t, op1: svint32_t, op2: i64) -> svbool_t {
+    svcmpne_wide_s32(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Compare unordered with"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpuo[_f32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fcmuo))]
+pub fn svcmpuo_f32(pg: svbool_t, op1: svfloat32_t, op2: svfloat32_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.fcmpuo.nxv4f32")]
+        fn _svcmpuo_f32(pg: svbool4_t, op1: svfloat32_t, op2: svfloat32_t) -> svbool4_t;
+    }
+    unsafe { _svcmpuo_f32(pg.sve_into(), op1, op2).sve_into() }
+}
+#[doc = "Compare unordered with"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpuo[_n_f32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fcmuo))]
+pub fn svcmpuo_n_f32(pg: svbool_t, op1: svfloat32_t, op2: f32) -> svbool_t {
+    svcmpuo_f32(pg, op1, svdup_n_f32(op2))
+}
+#[doc = "Compare unordered with"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpuo[_f64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fcmuo))]
+pub fn svcmpuo_f64(pg: svbool_t, op1: svfloat64_t, op2: svfloat64_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.fcmpuo.nxv2f64")]
+        fn _svcmpuo_f64(pg: svbool2_t, op1: svfloat64_t, op2: svfloat64_t) -> svbool2_t;
+    }
+    unsafe { _svcmpuo_f64(pg.sve_into(), op1, op2).sve_into() }
+}
+#[doc = "Compare unordered with"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmpuo[_n_f64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fcmuo))]
+pub fn svcmpuo_n_f64(pg: svbool_t, op1: svfloat64_t, op2: f64) -> svbool_t {
+    svcmpuo_f64(pg, op1, svdup_n_f64(op2))
+}
+#[doc = "Logically invert boolean condition"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcnot[_s8]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cnot))]
+pub fn svcnot_s8_m(inactive: svint8_t, pg: svbool_t, op: svint8_t) -> svint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.cnot.nxv16i8")]
+        fn _svcnot_s8_m(inactive: svint8_t, pg: svbool_t, op: svint8_t) -> svint8_t;
+    }
+    unsafe { _svcnot_s8_m(inactive, pg, op) }
+}
+#[doc = "Logically invert boolean condition"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcnot[_s8]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cnot))]
+pub fn svcnot_s8_x(pg: svbool_t, op: svint8_t) -> svint8_t {
+    svcnot_s8_m(op, pg, op)
+}
+#[doc = "Logically invert boolean condition"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcnot[_s8]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cnot))]
+pub fn svcnot_s8_z(pg: svbool_t, op: svint8_t) -> svint8_t {
+    svcnot_s8_m(svdup_n_s8(0), pg, op)
+}
+#[doc = "Logically invert boolean condition"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcnot[_s16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cnot))]
+pub fn svcnot_s16_m(inactive: svint16_t, pg: svbool_t, op: svint16_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.cnot.nxv8i16")]
+        fn _svcnot_s16_m(inactive: svint16_t, pg: svbool8_t, op: svint16_t) -> svint16_t;
+    }
+    unsafe { _svcnot_s16_m(inactive, pg.sve_into(), op) }
+}
+#[doc = "Logically invert boolean condition"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcnot[_s16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cnot))]
+pub fn svcnot_s16_x(pg: svbool_t, op: svint16_t) -> svint16_t {
+    svcnot_s16_m(op, pg, op)
+}
+#[doc = "Logically invert boolean condition"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcnot[_s16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cnot))]
+pub fn svcnot_s16_z(pg: svbool_t, op: svint16_t) -> svint16_t {
+    svcnot_s16_m(svdup_n_s16(0), pg, op)
+}
+#[doc = "Logically invert boolean condition"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcnot[_s32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cnot))]
+pub fn svcnot_s32_m(inactive: svint32_t, pg: svbool_t, op: svint32_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.cnot.nxv4i32")]
+        fn _svcnot_s32_m(inactive: svint32_t, pg: svbool4_t, op: svint32_t) -> svint32_t;
+    }
+    unsafe { _svcnot_s32_m(inactive, pg.sve_into(), op) }
+}
+#[doc = "Logically invert boolean condition"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcnot[_s32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cnot))]
+pub fn svcnot_s32_x(pg: svbool_t, op: svint32_t) -> svint32_t {
+    svcnot_s32_m(op, pg, op)
+}
+#[doc = "Logically invert boolean condition"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcnot[_s32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cnot))]
+pub fn svcnot_s32_z(pg: svbool_t, op: svint32_t) -> svint32_t {
+    svcnot_s32_m(svdup_n_s32(0), pg, op)
+}
+#[doc = "Logically invert boolean condition"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcnot[_s64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cnot))]
+pub fn svcnot_s64_m(inactive: svint64_t, pg: svbool_t, op: svint64_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.cnot.nxv2i64")]
+        fn _svcnot_s64_m(inactive: svint64_t, pg: svbool2_t, op: svint64_t) -> svint64_t;
+    }
+    unsafe { _svcnot_s64_m(inactive, pg.sve_into(), op) }
+}
+#[doc = "Logically invert boolean condition"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcnot[_s64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cnot))]
+pub fn svcnot_s64_x(pg: svbool_t, op: svint64_t) -> svint64_t {
+    svcnot_s64_m(op, pg, op)
+}
+#[doc = "Logically invert boolean condition"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcnot[_s64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cnot))]
+pub fn svcnot_s64_z(pg: svbool_t, op: svint64_t) -> svint64_t {
+    svcnot_s64_m(svdup_n_s64(0), pg, op)
+}
+#[doc = "Logically invert boolean condition"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcnot[_u8]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cnot))]
+pub fn svcnot_u8_m(inactive: svuint8_t, pg: svbool_t, op: svuint8_t) -> svuint8_t {
+    unsafe { svcnot_s8_m(inactive.as_signed(), pg, op.as_signed()).as_unsigned() }
+}
+#[doc = "Logically invert boolean condition"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcnot[_u8]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cnot))]
+pub fn svcnot_u8_x(pg: svbool_t, op: svuint8_t) -> svuint8_t {
+    svcnot_u8_m(op, pg, op)
+}
+#[doc = "Logically invert boolean condition"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcnot[_u8]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cnot))]
+pub fn svcnot_u8_z(pg: svbool_t, op: svuint8_t) -> svuint8_t {
+    svcnot_u8_m(svdup_n_u8(0), pg, op)
+}
+#[doc = "Logically invert boolean condition"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcnot[_u16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cnot))]
+pub fn svcnot_u16_m(inactive: svuint16_t, pg: svbool_t, op: svuint16_t) -> svuint16_t {
+    unsafe { svcnot_s16_m(inactive.as_signed(), pg, op.as_signed()).as_unsigned() }
+}
+#[doc = "Logically invert boolean condition"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcnot[_u16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cnot))]
+pub fn svcnot_u16_x(pg: svbool_t, op: svuint16_t) -> svuint16_t {
+    svcnot_u16_m(op, pg, op)
+}
+#[doc = "Logically invert boolean condition"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcnot[_u16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cnot))]
+pub fn svcnot_u16_z(pg: svbool_t, op: svuint16_t) -> svuint16_t {
+    svcnot_u16_m(svdup_n_u16(0), pg, op)
+}
+#[doc = "Logically invert boolean condition"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcnot[_u32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cnot))]
+pub fn svcnot_u32_m(inactive: svuint32_t, pg: svbool_t, op: svuint32_t) -> svuint32_t {
+    unsafe { svcnot_s32_m(inactive.as_signed(), pg, op.as_signed()).as_unsigned() }
+}
+#[doc = "Logically invert boolean condition"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcnot[_u32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cnot))]
+pub fn svcnot_u32_x(pg: svbool_t, op: svuint32_t) -> svuint32_t {
+    svcnot_u32_m(op, pg, op)
+}
+#[doc = "Logically invert boolean condition"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcnot[_u32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cnot))]
+pub fn svcnot_u32_z(pg: svbool_t, op: svuint32_t) -> svuint32_t {
+    svcnot_u32_m(svdup_n_u32(0), pg, op)
+}
+#[doc = "Logically invert boolean condition"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcnot[_u64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cnot))]
+pub fn svcnot_u64_m(inactive: svuint64_t, pg: svbool_t, op: svuint64_t) -> svuint64_t {
+    unsafe { svcnot_s64_m(inactive.as_signed(), pg, op.as_signed()).as_unsigned() }
+}
+#[doc = "Logically invert boolean condition"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcnot[_u64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cnot))]
+pub fn svcnot_u64_x(pg: svbool_t, op: svuint64_t) -> svuint64_t {
+    svcnot_u64_m(op, pg, op)
+}
+#[doc = "Logically invert boolean condition"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcnot[_u64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cnot))]
+pub fn svcnot_u64_z(pg: svbool_t, op: svuint64_t) -> svuint64_t {
+    svcnot_u64_m(svdup_n_u64(0), pg, op)
+}
+#[doc = "Count nonzero bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcnt[_f32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cnt))]
+pub fn svcnt_f32_m(inactive: svuint32_t, pg: svbool_t, op: svfloat32_t) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.cnt.nxv4f32")]
+        fn _svcnt_f32_m(inactive: svint32_t, pg: svbool4_t, op: svfloat32_t) -> svint32_t;
+    }
+    unsafe { _svcnt_f32_m(inactive.as_signed(), pg.sve_into(), op).as_unsigned() }
+}
+#[doc = "Count nonzero bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcnt[_f32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cnt))]
+pub fn svcnt_f32_x(pg: svbool_t, op: svfloat32_t) -> svuint32_t {
+    unsafe { svcnt_f32_m(transmute_unchecked(op), pg, op) }
+}
+#[doc = "Count nonzero bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcnt[_f32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cnt))]
+pub fn svcnt_f32_z(pg: svbool_t, op: svfloat32_t) -> svuint32_t {
+    svcnt_f32_m(svdup_n_u32(0), pg, op)
+}
+#[doc = "Count nonzero bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcnt[_f64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cnt))]
+pub fn svcnt_f64_m(inactive: svuint64_t, pg: svbool_t, op: svfloat64_t) -> svuint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.cnt.nxv2f64")]
+        fn _svcnt_f64_m(inactive: svint64_t, pg: svbool2_t, op: svfloat64_t) -> svint64_t;
+    }
+    unsafe { _svcnt_f64_m(inactive.as_signed(), pg.sve_into(), op).as_unsigned() }
+}
+#[doc = "Count nonzero bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcnt[_f64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cnt))]
+pub fn svcnt_f64_x(pg: svbool_t, op: svfloat64_t) -> svuint64_t {
+    unsafe { svcnt_f64_m(transmute_unchecked(op), pg, op) }
+}
+#[doc = "Count nonzero bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcnt[_f64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cnt))]
+pub fn svcnt_f64_z(pg: svbool_t, op: svfloat64_t) -> svuint64_t {
+    svcnt_f64_m(svdup_n_u64(0), pg, op)
+}
+#[doc = "Count nonzero bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcnt[_s8]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cnt))]
+pub fn svcnt_s8_m(inactive: svuint8_t, pg: svbool_t, op: svint8_t) -> svuint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.cnt.nxv16i8")]
+        fn _svcnt_s8_m(inactive: svint8_t, pg: svbool_t, op: svint8_t) -> svint8_t;
+    }
+    unsafe { _svcnt_s8_m(inactive.as_signed(), pg, op).as_unsigned() }
+}
+#[doc = "Count nonzero bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcnt[_s8]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cnt))]
+pub fn svcnt_s8_x(pg: svbool_t, op: svint8_t) -> svuint8_t {
+    unsafe { svcnt_s8_m(op.as_unsigned(), pg, op) }
+}
+#[doc = "Count nonzero bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcnt[_s8]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cnt))]
+pub fn svcnt_s8_z(pg: svbool_t, op: svint8_t) -> svuint8_t {
+    svcnt_s8_m(svdup_n_u8(0), pg, op)
+}
+#[doc = "Count nonzero bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcnt[_s16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cnt))]
+pub fn svcnt_s16_m(inactive: svuint16_t, pg: svbool_t, op: svint16_t) -> svuint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.cnt.nxv8i16")]
+        fn _svcnt_s16_m(inactive: svint16_t, pg: svbool8_t, op: svint16_t) -> svint16_t;
+    }
+    unsafe { _svcnt_s16_m(inactive.as_signed(), pg.sve_into(), op).as_unsigned() }
+}
+#[doc = "Count nonzero bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcnt[_s16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cnt))]
+pub fn svcnt_s16_x(pg: svbool_t, op: svint16_t) -> svuint16_t {
+    unsafe { svcnt_s16_m(op.as_unsigned(), pg, op) }
+}
+#[doc = "Count nonzero bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcnt[_s16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cnt))]
+pub fn svcnt_s16_z(pg: svbool_t, op: svint16_t) -> svuint16_t {
+    svcnt_s16_m(svdup_n_u16(0), pg, op)
+}
+#[doc = "Count nonzero bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcnt[_s32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cnt))]
+pub fn svcnt_s32_m(inactive: svuint32_t, pg: svbool_t, op: svint32_t) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.cnt.nxv4i32")]
+        fn _svcnt_s32_m(inactive: svint32_t, pg: svbool4_t, op: svint32_t) -> svint32_t;
+    }
+    unsafe { _svcnt_s32_m(inactive.as_signed(), pg.sve_into(), op).as_unsigned() }
+}
+#[doc = "Count nonzero bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcnt[_s32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cnt))]
+pub fn svcnt_s32_x(pg: svbool_t, op: svint32_t) -> svuint32_t {
+    unsafe { svcnt_s32_m(op.as_unsigned(), pg, op) }
+}
+#[doc = "Count nonzero bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcnt[_s32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cnt))]
+pub fn svcnt_s32_z(pg: svbool_t, op: svint32_t) -> svuint32_t {
+    svcnt_s32_m(svdup_n_u32(0), pg, op)
+}
+#[doc = "Count nonzero bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcnt[_s64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cnt))]
+pub fn svcnt_s64_m(inactive: svuint64_t, pg: svbool_t, op: svint64_t) -> svuint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.cnt.nxv2i64")]
+        fn _svcnt_s64_m(inactive: svint64_t, pg: svbool2_t, op: svint64_t) -> svint64_t;
+    }
+    unsafe { _svcnt_s64_m(inactive.as_signed(), pg.sve_into(), op).as_unsigned() }
+}
+#[doc = "Count nonzero bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcnt[_s64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cnt))]
+pub fn svcnt_s64_x(pg: svbool_t, op: svint64_t) -> svuint64_t {
+    unsafe { svcnt_s64_m(op.as_unsigned(), pg, op) }
+}
+#[doc = "Count nonzero bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcnt[_s64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cnt))]
+pub fn svcnt_s64_z(pg: svbool_t, op: svint64_t) -> svuint64_t {
+    svcnt_s64_m(svdup_n_u64(0), pg, op)
+}
+#[doc = "Count nonzero bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcnt[_u8]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cnt))]
+pub fn svcnt_u8_m(inactive: svuint8_t, pg: svbool_t, op: svuint8_t) -> svuint8_t {
+    unsafe { svcnt_s8_m(inactive, pg, op.as_signed()) }
+}
+#[doc = "Count nonzero bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcnt[_u8]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cnt))]
+pub fn svcnt_u8_x(pg: svbool_t, op: svuint8_t) -> svuint8_t {
+    svcnt_u8_m(op, pg, op)
+}
+#[doc = "Count nonzero bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcnt[_u8]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cnt))]
+pub fn svcnt_u8_z(pg: svbool_t, op: svuint8_t) -> svuint8_t {
+    svcnt_u8_m(svdup_n_u8(0), pg, op)
+}
+#[doc = "Count nonzero bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcnt[_u16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cnt))]
+pub fn svcnt_u16_m(inactive: svuint16_t, pg: svbool_t, op: svuint16_t) -> svuint16_t {
+    unsafe { svcnt_s16_m(inactive, pg, op.as_signed()) }
+}
+#[doc = "Count nonzero bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcnt[_u16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cnt))]
+pub fn svcnt_u16_x(pg: svbool_t, op: svuint16_t) -> svuint16_t {
+    svcnt_u16_m(op, pg, op)
+}
+#[doc = "Count nonzero bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcnt[_u16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cnt))]
+pub fn svcnt_u16_z(pg: svbool_t, op: svuint16_t) -> svuint16_t {
+    svcnt_u16_m(svdup_n_u16(0), pg, op)
+}
+#[doc = "Count nonzero bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcnt[_u32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cnt))]
+pub fn svcnt_u32_m(inactive: svuint32_t, pg: svbool_t, op: svuint32_t) -> svuint32_t {
+    unsafe { svcnt_s32_m(inactive, pg, op.as_signed()) }
+}
+#[doc = "Count nonzero bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcnt[_u32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cnt))]
+pub fn svcnt_u32_x(pg: svbool_t, op: svuint32_t) -> svuint32_t {
+    svcnt_u32_m(op, pg, op)
+}
+#[doc = "Count nonzero bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcnt[_u32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cnt))]
+pub fn svcnt_u32_z(pg: svbool_t, op: svuint32_t) -> svuint32_t {
+    svcnt_u32_m(svdup_n_u32(0), pg, op)
+}
+#[doc = "Count nonzero bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcnt[_u64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cnt))]
+pub fn svcnt_u64_m(inactive: svuint64_t, pg: svbool_t, op: svuint64_t) -> svuint64_t {
+    unsafe { svcnt_s64_m(inactive, pg, op.as_signed()) }
+}
+#[doc = "Count nonzero bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcnt[_u64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cnt))]
+pub fn svcnt_u64_x(pg: svbool_t, op: svuint64_t) -> svuint64_t {
+    svcnt_u64_m(op, pg, op)
+}
+#[doc = "Count nonzero bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcnt[_u64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cnt))]
+pub fn svcnt_u64_z(pg: svbool_t, op: svuint64_t) -> svuint64_t {
+    svcnt_u64_m(svdup_n_u64(0), pg, op)
+}
+#[doc = "Count the number of 8-bit elements in a vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcntb)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(rdvl))]
+pub fn svcntb() -> u64 {
+    svcntb_pat::<{ svpattern::SV_ALL }>()
+}
+#[doc = "Count the number of 16-bit elements in a vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcnth)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cnth))]
+pub fn svcnth() -> u64 {
+    svcnth_pat::<{ svpattern::SV_ALL }>()
+}
+#[doc = "Count the number of 32-bit elements in a vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcntw)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cntw))]
+pub fn svcntw() -> u64 {
+    svcntw_pat::<{ svpattern::SV_ALL }>()
+}
+#[doc = "Count the number of 64-bit elements in a vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcntd)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cntd))]
+pub fn svcntd() -> u64 {
+    svcntd_pat::<{ svpattern::SV_ALL }>()
+}
+#[doc = "Count the number of 8-bit elements in a vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcntb_pat)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+# [cfg_attr (test , assert_instr (rdvl , PATTERN = { svpattern :: SV_ALL }))]
+# [cfg_attr (test , assert_instr (cntb , PATTERN = { svpattern :: SV_MUL4 }))]
+pub fn svcntb_pat<const PATTERN: svpattern>() -> u64 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.cntb")]
+        fn _svcntb_pat(pattern: svpattern) -> i64;
+    }
+    unsafe { _svcntb_pat(PATTERN).as_unsigned() }
+}
+#[doc = "Count the number of 16-bit elements in a vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcnth_pat)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+# [cfg_attr (test , assert_instr (cnth , PATTERN = { svpattern :: SV_ALL }))]
+pub fn svcnth_pat<const PATTERN: svpattern>() -> u64 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.cnth")]
+        fn _svcnth_pat(pattern: svpattern) -> i64;
+    }
+    unsafe { _svcnth_pat(PATTERN).as_unsigned() }
+}
+#[doc = "Count the number of 32-bit elements in a vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcntw_pat)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+# [cfg_attr (test , assert_instr (cntw , PATTERN = { svpattern :: SV_ALL }))]
+pub fn svcntw_pat<const PATTERN: svpattern>() -> u64 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.cntw")]
+        fn _svcntw_pat(pattern: svpattern) -> i64;
+    }
+    unsafe { _svcntw_pat(PATTERN).as_unsigned() }
+}
+#[doc = "Count the number of 64-bit elements in a vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcntd_pat)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+# [cfg_attr (test , assert_instr (cntd , PATTERN = { svpattern :: SV_ALL }))]
+pub fn svcntd_pat<const PATTERN: svpattern>() -> u64 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.cntd")]
+        fn _svcntd_pat(pattern: svpattern) -> i64;
+    }
+    unsafe { _svcntd_pat(PATTERN).as_unsigned() }
+}
+#[doc = "Count set predicate bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcntp_b8)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cntp))]
+pub fn svcntp_b8(pg: svbool_t, op: svbool_t) -> u64 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.cntp.nxv16i1")]
+        fn _svcntp_b8(pg: svbool_t, op: svbool_t) -> i64;
+    }
+    unsafe { _svcntp_b8(pg, op).as_unsigned() }
+}
+#[doc = "Count set predicate bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcntp_b16)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cntp))]
+pub fn svcntp_b16(pg: svbool_t, op: svbool_t) -> u64 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.cntp.nxv8i1")]
+        fn _svcntp_b16(pg: svbool8_t, op: svbool8_t) -> i64;
+    }
+    unsafe { _svcntp_b16(pg.sve_into(), op.sve_into()).as_unsigned() }
+}
+#[doc = "Count set predicate bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcntp_b32)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cntp))]
+pub fn svcntp_b32(pg: svbool_t, op: svbool_t) -> u64 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.cntp.nxv4i1")]
+        fn _svcntp_b32(pg: svbool4_t, op: svbool4_t) -> i64;
+    }
+    unsafe { _svcntp_b32(pg.sve_into(), op.sve_into()).as_unsigned() }
+}
+#[doc = "Count set predicate bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcntp_b64)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cntp))]
+pub fn svcntp_b64(pg: svbool_t, op: svbool_t) -> u64 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.cntp.nxv2i1")]
+        fn _svcntp_b64(pg: svbool2_t, op: svbool2_t) -> i64;
+    }
+    unsafe { _svcntp_b64(pg.sve_into(), op.sve_into()).as_unsigned() }
+}
+#[doc = "Shuffle active elements of vector to the right and fill with zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcompact[_f32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(compact))]
+pub fn svcompact_f32(pg: svbool_t, op: svfloat32_t) -> svfloat32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.compact.nxv4f32"
+        )]
+        fn _svcompact_f32(pg: svbool4_t, op: svfloat32_t) -> svfloat32_t;
+    }
+    unsafe { _svcompact_f32(pg.sve_into(), op) }
+}
+#[doc = "Shuffle active elements of vector to the right and fill with zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcompact[_f64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(compact))]
+pub fn svcompact_f64(pg: svbool_t, op: svfloat64_t) -> svfloat64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.compact.nxv2f64"
+        )]
+        fn _svcompact_f64(pg: svbool2_t, op: svfloat64_t) -> svfloat64_t;
+    }
+    unsafe { _svcompact_f64(pg.sve_into(), op) }
+}
+#[doc = "Shuffle active elements of vector to the right and fill with zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcompact[_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(compact))]
+pub fn svcompact_s32(pg: svbool_t, op: svint32_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.compact.nxv4i32"
+        )]
+        fn _svcompact_s32(pg: svbool4_t, op: svint32_t) -> svint32_t;
+    }
+    unsafe { _svcompact_s32(pg.sve_into(), op) }
+}
+#[doc = "Shuffle active elements of vector to the right and fill with zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcompact[_s64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(compact))]
+pub fn svcompact_s64(pg: svbool_t, op: svint64_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.compact.nxv2i64"
+        )]
+        fn _svcompact_s64(pg: svbool2_t, op: svint64_t) -> svint64_t;
+    }
+    unsafe { _svcompact_s64(pg.sve_into(), op) }
+}
+#[doc = "Shuffle active elements of vector to the right and fill with zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcompact[_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(compact))]
+pub fn svcompact_u32(pg: svbool_t, op: svuint32_t) -> svuint32_t {
+    unsafe { svcompact_s32(pg, op.as_signed()).as_unsigned() }
+}
+#[doc = "Shuffle active elements of vector to the right and fill with zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcompact[_u64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(compact))]
+pub fn svcompact_u64(pg: svbool_t, op: svuint64_t) -> svuint64_t {
+    unsafe { svcompact_s64(pg, op.as_signed()).as_unsigned() }
+}
+#[doc = "Create a tuple of two vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcreate2[_f32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svcreate2_f32(x0: svfloat32_t, x1: svfloat32_t) -> svfloat32x2_t {
+    unsafe { crate::intrinsics::simd::scalable::sve_tuple_create2(x0, x1) }
+}
+#[doc = "Create a tuple of two vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcreate2[_f64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svcreate2_f64(x0: svfloat64_t, x1: svfloat64_t) -> svfloat64x2_t {
+    unsafe { crate::intrinsics::simd::scalable::sve_tuple_create2(x0, x1) }
+}
+#[doc = "Create a tuple of two vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcreate2[_s8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svcreate2_s8(x0: svint8_t, x1: svint8_t) -> svint8x2_t {
+    unsafe { crate::intrinsics::simd::scalable::sve_tuple_create2(x0, x1) }
+}
+#[doc = "Create a tuple of two vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcreate2[_s16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svcreate2_s16(x0: svint16_t, x1: svint16_t) -> svint16x2_t {
+    unsafe { crate::intrinsics::simd::scalable::sve_tuple_create2(x0, x1) }
+}
+#[doc = "Create a tuple of two vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcreate2[_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svcreate2_s32(x0: svint32_t, x1: svint32_t) -> svint32x2_t {
+    unsafe { crate::intrinsics::simd::scalable::sve_tuple_create2(x0, x1) }
+}
+#[doc = "Create a tuple of two vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcreate2[_s64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svcreate2_s64(x0: svint64_t, x1: svint64_t) -> svint64x2_t {
+    unsafe { crate::intrinsics::simd::scalable::sve_tuple_create2(x0, x1) }
+}
+#[doc = "Create a tuple of two vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcreate2[_u8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svcreate2_u8(x0: svuint8_t, x1: svuint8_t) -> svuint8x2_t {
+    unsafe { crate::intrinsics::simd::scalable::sve_tuple_create2(x0, x1) }
+}
+#[doc = "Create a tuple of two vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcreate2[_u16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svcreate2_u16(x0: svuint16_t, x1: svuint16_t) -> svuint16x2_t {
+    unsafe { crate::intrinsics::simd::scalable::sve_tuple_create2(x0, x1) }
+}
+#[doc = "Create a tuple of two vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcreate2[_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svcreate2_u32(x0: svuint32_t, x1: svuint32_t) -> svuint32x2_t {
+    unsafe { crate::intrinsics::simd::scalable::sve_tuple_create2(x0, x1) }
+}
+#[doc = "Create a tuple of two vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcreate2[_u64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svcreate2_u64(x0: svuint64_t, x1: svuint64_t) -> svuint64x2_t {
+    unsafe { crate::intrinsics::simd::scalable::sve_tuple_create2(x0, x1) }
+}
+#[doc = "Create a tuple of three vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcreate3[_f32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svcreate3_f32(x0: svfloat32_t, x1: svfloat32_t, x2: svfloat32_t) -> svfloat32x3_t {
+    unsafe { crate::intrinsics::simd::scalable::sve_tuple_create3(x0, x1, x2) }
+}
+#[doc = "Create a tuple of three vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcreate3[_f64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svcreate3_f64(x0: svfloat64_t, x1: svfloat64_t, x2: svfloat64_t) -> svfloat64x3_t {
+    unsafe { crate::intrinsics::simd::scalable::sve_tuple_create3(x0, x1, x2) }
+}
+#[doc = "Create a tuple of three vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcreate3[_s8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svcreate3_s8(x0: svint8_t, x1: svint8_t, x2: svint8_t) -> svint8x3_t {
+    unsafe { crate::intrinsics::simd::scalable::sve_tuple_create3(x0, x1, x2) }
+}
+#[doc = "Create a tuple of three vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcreate3[_s16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svcreate3_s16(x0: svint16_t, x1: svint16_t, x2: svint16_t) -> svint16x3_t {
+    unsafe { crate::intrinsics::simd::scalable::sve_tuple_create3(x0, x1, x2) }
+}
+#[doc = "Create a tuple of three vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcreate3[_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svcreate3_s32(x0: svint32_t, x1: svint32_t, x2: svint32_t) -> svint32x3_t {
+    unsafe { crate::intrinsics::simd::scalable::sve_tuple_create3(x0, x1, x2) }
+}
+#[doc = "Create a tuple of three vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcreate3[_s64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svcreate3_s64(x0: svint64_t, x1: svint64_t, x2: svint64_t) -> svint64x3_t {
+    unsafe { crate::intrinsics::simd::scalable::sve_tuple_create3(x0, x1, x2) }
+}
+#[doc = "Create a tuple of three vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcreate3[_u8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svcreate3_u8(x0: svuint8_t, x1: svuint8_t, x2: svuint8_t) -> svuint8x3_t {
+    unsafe { crate::intrinsics::simd::scalable::sve_tuple_create3(x0, x1, x2) }
+}
+#[doc = "Create a tuple of three vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcreate3[_u16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svcreate3_u16(x0: svuint16_t, x1: svuint16_t, x2: svuint16_t) -> svuint16x3_t {
+    unsafe { crate::intrinsics::simd::scalable::sve_tuple_create3(x0, x1, x2) }
+}
+#[doc = "Create a tuple of three vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcreate3[_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svcreate3_u32(x0: svuint32_t, x1: svuint32_t, x2: svuint32_t) -> svuint32x3_t {
+    unsafe { crate::intrinsics::simd::scalable::sve_tuple_create3(x0, x1, x2) }
+}
+#[doc = "Create a tuple of three vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcreate3[_u64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svcreate3_u64(x0: svuint64_t, x1: svuint64_t, x2: svuint64_t) -> svuint64x3_t {
+    unsafe { crate::intrinsics::simd::scalable::sve_tuple_create3(x0, x1, x2) }
+}
+#[doc = "Create a tuple of four vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcreate4[_f32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svcreate4_f32(
+    x0: svfloat32_t,
+    x1: svfloat32_t,
+    x2: svfloat32_t,
+    x3: svfloat32_t,
+) -> svfloat32x4_t {
+    unsafe { crate::intrinsics::simd::scalable::sve_tuple_create4(x0, x1, x2, x3) }
+}
+#[doc = "Create a tuple of four vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcreate4[_f64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svcreate4_f64(
+    x0: svfloat64_t,
+    x1: svfloat64_t,
+    x2: svfloat64_t,
+    x3: svfloat64_t,
+) -> svfloat64x4_t {
+    unsafe { crate::intrinsics::simd::scalable::sve_tuple_create4(x0, x1, x2, x3) }
+}
+#[doc = "Create a tuple of four vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcreate4[_s8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svcreate4_s8(x0: svint8_t, x1: svint8_t, x2: svint8_t, x3: svint8_t) -> svint8x4_t {
+    unsafe { crate::intrinsics::simd::scalable::sve_tuple_create4(x0, x1, x2, x3) }
+}
+#[doc = "Create a tuple of four vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcreate4[_s16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svcreate4_s16(x0: svint16_t, x1: svint16_t, x2: svint16_t, x3: svint16_t) -> svint16x4_t {
+    unsafe { crate::intrinsics::simd::scalable::sve_tuple_create4(x0, x1, x2, x3) }
+}
+#[doc = "Create a tuple of four vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcreate4[_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svcreate4_s32(x0: svint32_t, x1: svint32_t, x2: svint32_t, x3: svint32_t) -> svint32x4_t {
+    unsafe { crate::intrinsics::simd::scalable::sve_tuple_create4(x0, x1, x2, x3) }
+}
+#[doc = "Create a tuple of four vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcreate4[_s64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svcreate4_s64(x0: svint64_t, x1: svint64_t, x2: svint64_t, x3: svint64_t) -> svint64x4_t {
+    unsafe { crate::intrinsics::simd::scalable::sve_tuple_create4(x0, x1, x2, x3) }
+}
+#[doc = "Create a tuple of four vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcreate4[_u8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svcreate4_u8(x0: svuint8_t, x1: svuint8_t, x2: svuint8_t, x3: svuint8_t) -> svuint8x4_t {
+    unsafe { crate::intrinsics::simd::scalable::sve_tuple_create4(x0, x1, x2, x3) }
+}
+#[doc = "Create a tuple of four vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcreate4[_u16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svcreate4_u16(
+    x0: svuint16_t,
+    x1: svuint16_t,
+    x2: svuint16_t,
+    x3: svuint16_t,
+) -> svuint16x4_t {
+    unsafe { crate::intrinsics::simd::scalable::sve_tuple_create4(x0, x1, x2, x3) }
+}
+#[doc = "Create a tuple of four vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcreate4[_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svcreate4_u32(
+    x0: svuint32_t,
+    x1: svuint32_t,
+    x2: svuint32_t,
+    x3: svuint32_t,
+) -> svuint32x4_t {
+    unsafe { crate::intrinsics::simd::scalable::sve_tuple_create4(x0, x1, x2, x3) }
+}
+#[doc = "Create a tuple of four vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcreate4[_u64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svcreate4_u64(
+    x0: svuint64_t,
+    x1: svuint64_t,
+    x2: svuint64_t,
+    x3: svuint64_t,
+) -> svuint64x4_t {
+    unsafe { crate::intrinsics::simd::scalable::sve_tuple_create4(x0, x1, x2, x3) }
+}
+#[doc = "Floating-point convert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcvt_f32[_f64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fcvt))]
+pub fn svcvt_f32_f64_m(inactive: svfloat32_t, pg: svbool_t, op: svfloat64_t) -> svfloat32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.fcvt.f32f64")]
+        fn _svcvt_f32_f64_m(inactive: svfloat32_t, pg: svbool2_t, op: svfloat64_t) -> svfloat32_t;
+    }
+    unsafe { _svcvt_f32_f64_m(inactive, pg.sve_into(), op) }
+}
+#[doc = "Floating-point convert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcvt_f32[_f64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fcvt))]
+pub fn svcvt_f32_f64_x(pg: svbool_t, op: svfloat64_t) -> svfloat32_t {
+    unsafe { svcvt_f32_f64_m(transmute_unchecked(op), pg, op) }
+}
+#[doc = "Floating-point convert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcvt_f32[_f64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fcvt))]
+pub fn svcvt_f32_f64_z(pg: svbool_t, op: svfloat64_t) -> svfloat32_t {
+    svcvt_f32_f64_m(svdup_n_f32(0.0), pg, op)
+}
+#[doc = "Floating-point convert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcvt_f64[_f32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fcvt))]
+pub fn svcvt_f64_f32_m(inactive: svfloat64_t, pg: svbool_t, op: svfloat32_t) -> svfloat64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.fcvt.f64f32")]
+        fn _svcvt_f64_f32_m(inactive: svfloat64_t, pg: svbool2_t, op: svfloat32_t) -> svfloat64_t;
+    }
+    unsafe { _svcvt_f64_f32_m(inactive, pg.sve_into(), op) }
+}
+#[doc = "Floating-point convert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcvt_f64[_f32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fcvt))]
+pub fn svcvt_f64_f32_x(pg: svbool_t, op: svfloat32_t) -> svfloat64_t {
+    unsafe { svcvt_f64_f32_m(transmute_unchecked(op), pg, op) }
+}
+#[doc = "Floating-point convert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcvt_f64[_f32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fcvt))]
+pub fn svcvt_f64_f32_z(pg: svbool_t, op: svfloat32_t) -> svfloat64_t {
+    svcvt_f64_f32_m(svdup_n_f64(0.0), pg, op)
+}
+#[doc = "Floating-point convert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcvt_f32[_s32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(scvtf))]
+pub fn svcvt_f32_s32_m(inactive: svfloat32_t, pg: svbool_t, op: svint32_t) -> svfloat32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.scvtf.f32i32")]
+        fn _svcvt_f32_s32_m(inactive: svfloat32_t, pg: svbool4_t, op: svint32_t) -> svfloat32_t;
+    }
+    unsafe { _svcvt_f32_s32_m(inactive, pg.sve_into(), op) }
+}
+#[doc = "Floating-point convert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcvt_f32[_s32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(scvtf))]
+pub fn svcvt_f32_s32_x(pg: svbool_t, op: svint32_t) -> svfloat32_t {
+    unsafe { svcvt_f32_s32_m(transmute_unchecked(op), pg, op) }
+}
+#[doc = "Floating-point convert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcvt_f32[_s32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(scvtf))]
+pub fn svcvt_f32_s32_z(pg: svbool_t, op: svint32_t) -> svfloat32_t {
+    svcvt_f32_s32_m(svdup_n_f32(0.0), pg, op)
+}
+#[doc = "Floating-point convert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcvt_f32[_s64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(scvtf))]
+pub fn svcvt_f32_s64_m(inactive: svfloat32_t, pg: svbool_t, op: svint64_t) -> svfloat32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.scvtf.f32i64")]
+        fn _svcvt_f32_s64_m(inactive: svfloat32_t, pg: svbool2_t, op: svint64_t) -> svfloat32_t;
+    }
+    unsafe { _svcvt_f32_s64_m(inactive, pg.sve_into(), op) }
+}
+#[doc = "Floating-point convert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcvt_f32[_s64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(scvtf))]
+pub fn svcvt_f32_s64_x(pg: svbool_t, op: svint64_t) -> svfloat32_t {
+    unsafe { svcvt_f32_s64_m(transmute_unchecked(op), pg, op) }
+}
+#[doc = "Floating-point convert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcvt_f32[_s64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(scvtf))]
+pub fn svcvt_f32_s64_z(pg: svbool_t, op: svint64_t) -> svfloat32_t {
+    svcvt_f32_s64_m(svdup_n_f32(0.0), pg, op)
+}
+#[doc = "Floating-point convert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcvt_f32[_u32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ucvtf))]
+pub fn svcvt_f32_u32_m(inactive: svfloat32_t, pg: svbool_t, op: svuint32_t) -> svfloat32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ucvtf.f32i32")]
+        fn _svcvt_f32_u32_m(inactive: svfloat32_t, pg: svbool4_t, op: svint32_t) -> svfloat32_t;
+    }
+    unsafe { _svcvt_f32_u32_m(inactive, pg.sve_into(), op.as_signed()) }
+}
+#[doc = "Floating-point convert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcvt_f32[_u32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ucvtf))]
+pub fn svcvt_f32_u32_x(pg: svbool_t, op: svuint32_t) -> svfloat32_t {
+    unsafe { svcvt_f32_u32_m(transmute_unchecked(op), pg, op) }
+}
+#[doc = "Floating-point convert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcvt_f32[_u32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ucvtf))]
+pub fn svcvt_f32_u32_z(pg: svbool_t, op: svuint32_t) -> svfloat32_t {
+    svcvt_f32_u32_m(svdup_n_f32(0.0), pg, op)
+}
+#[doc = "Floating-point convert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcvt_f32[_u64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ucvtf))]
+pub fn svcvt_f32_u64_m(inactive: svfloat32_t, pg: svbool_t, op: svuint64_t) -> svfloat32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ucvtf.f32i64")]
+        fn _svcvt_f32_u64_m(inactive: svfloat32_t, pg: svbool2_t, op: svint64_t) -> svfloat32_t;
+    }
+    unsafe { _svcvt_f32_u64_m(inactive, pg.sve_into(), op.as_signed()) }
+}
+#[doc = "Floating-point convert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcvt_f32[_u64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ucvtf))]
+pub fn svcvt_f32_u64_x(pg: svbool_t, op: svuint64_t) -> svfloat32_t {
+    unsafe { svcvt_f32_u64_m(transmute_unchecked(op), pg, op) }
+}
+#[doc = "Floating-point convert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcvt_f32[_u64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ucvtf))]
+pub fn svcvt_f32_u64_z(pg: svbool_t, op: svuint64_t) -> svfloat32_t {
+    svcvt_f32_u64_m(svdup_n_f32(0.0), pg, op)
+}
+#[doc = "Floating-point convert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcvt_f64[_s32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(scvtf))]
+pub fn svcvt_f64_s32_m(inactive: svfloat64_t, pg: svbool_t, op: svint32_t) -> svfloat64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.scvtf.f64i32")]
+        fn _svcvt_f64_s32_m(inactive: svfloat64_t, pg: svbool2_t, op: svint32_t) -> svfloat64_t;
+    }
+    unsafe { _svcvt_f64_s32_m(inactive, pg.sve_into(), op) }
+}
+#[doc = "Floating-point convert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcvt_f64[_s32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(scvtf))]
+pub fn svcvt_f64_s32_x(pg: svbool_t, op: svint32_t) -> svfloat64_t {
+    unsafe { svcvt_f64_s32_m(transmute_unchecked(op), pg, op) }
+}
+#[doc = "Floating-point convert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcvt_f64[_s32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(scvtf))]
+pub fn svcvt_f64_s32_z(pg: svbool_t, op: svint32_t) -> svfloat64_t {
+    svcvt_f64_s32_m(svdup_n_f64(0.0), pg, op)
+}
+#[doc = "Floating-point convert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcvt_f64[_s64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(scvtf))]
+pub fn svcvt_f64_s64_m(inactive: svfloat64_t, pg: svbool_t, op: svint64_t) -> svfloat64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.scvtf.f64i64")]
+        fn _svcvt_f64_s64_m(inactive: svfloat64_t, pg: svbool2_t, op: svint64_t) -> svfloat64_t;
+    }
+    unsafe { _svcvt_f64_s64_m(inactive, pg.sve_into(), op) }
+}
+#[doc = "Floating-point convert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcvt_f64[_s64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(scvtf))]
+pub fn svcvt_f64_s64_x(pg: svbool_t, op: svint64_t) -> svfloat64_t {
+    unsafe { svcvt_f64_s64_m(transmute_unchecked(op), pg, op) }
+}
+#[doc = "Floating-point convert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcvt_f64[_s64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(scvtf))]
+pub fn svcvt_f64_s64_z(pg: svbool_t, op: svint64_t) -> svfloat64_t {
+    svcvt_f64_s64_m(svdup_n_f64(0.0), pg, op)
+}
+#[doc = "Floating-point convert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcvt_f64[_u32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ucvtf))]
+pub fn svcvt_f64_u32_m(inactive: svfloat64_t, pg: svbool_t, op: svuint32_t) -> svfloat64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ucvtf.f64i32")]
+        fn _svcvt_f64_u32_m(inactive: svfloat64_t, pg: svbool2_t, op: svint32_t) -> svfloat64_t;
+    }
+    unsafe { _svcvt_f64_u32_m(inactive, pg.sve_into(), op.as_signed()) }
+}
+#[doc = "Floating-point convert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcvt_f64[_u32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ucvtf))]
+pub fn svcvt_f64_u32_x(pg: svbool_t, op: svuint32_t) -> svfloat64_t {
+    unsafe { svcvt_f64_u32_m(transmute_unchecked(op), pg, op) }
+}
+#[doc = "Floating-point convert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcvt_f64[_u32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ucvtf))]
+pub fn svcvt_f64_u32_z(pg: svbool_t, op: svuint32_t) -> svfloat64_t {
+    svcvt_f64_u32_m(svdup_n_f64(0.0), pg, op)
+}
+#[doc = "Floating-point convert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcvt_f64[_u64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ucvtf))]
+pub fn svcvt_f64_u64_m(inactive: svfloat64_t, pg: svbool_t, op: svuint64_t) -> svfloat64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ucvtf.f64i64")]
+        fn _svcvt_f64_u64_m(inactive: svfloat64_t, pg: svbool2_t, op: svint64_t) -> svfloat64_t;
+    }
+    unsafe { _svcvt_f64_u64_m(inactive, pg.sve_into(), op.as_signed()) }
+}
+#[doc = "Floating-point convert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcvt_f64[_u64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ucvtf))]
+pub fn svcvt_f64_u64_x(pg: svbool_t, op: svuint64_t) -> svfloat64_t {
+    unsafe { svcvt_f64_u64_m(transmute_unchecked(op), pg, op) }
+}
+#[doc = "Floating-point convert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcvt_f64[_u64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ucvtf))]
+pub fn svcvt_f64_u64_z(pg: svbool_t, op: svuint64_t) -> svfloat64_t {
+    svcvt_f64_u64_m(svdup_n_f64(0.0), pg, op)
+}
+#[doc = "Floating-point convert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcvt_s32[_f32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fcvtzs))]
+pub fn svcvt_s32_f32_m(inactive: svint32_t, pg: svbool_t, op: svfloat32_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.fcvtzs.i32f32")]
+        fn _svcvt_s32_f32_m(inactive: svint32_t, pg: svbool4_t, op: svfloat32_t) -> svint32_t;
+    }
+    unsafe { _svcvt_s32_f32_m(inactive, pg.sve_into(), op) }
+}
+#[doc = "Floating-point convert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcvt_s32[_f32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fcvtzs))]
+pub fn svcvt_s32_f32_x(pg: svbool_t, op: svfloat32_t) -> svint32_t {
+    unsafe { svcvt_s32_f32_m(transmute_unchecked(op), pg, op) }
+}
+#[doc = "Floating-point convert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcvt_s32[_f32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fcvtzs))]
+pub fn svcvt_s32_f32_z(pg: svbool_t, op: svfloat32_t) -> svint32_t {
+    svcvt_s32_f32_m(svdup_n_s32(0), pg, op)
+}
+#[doc = "Floating-point convert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcvt_s32[_f64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fcvtzs))]
+pub fn svcvt_s32_f64_m(inactive: svint32_t, pg: svbool_t, op: svfloat64_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.fcvtzs.i32f64")]
+        fn _svcvt_s32_f64_m(inactive: svint32_t, pg: svbool2_t, op: svfloat64_t) -> svint32_t;
+    }
+    unsafe { _svcvt_s32_f64_m(inactive, pg.sve_into(), op) }
+}
+#[doc = "Floating-point convert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcvt_s32[_f64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fcvtzs))]
+pub fn svcvt_s32_f64_x(pg: svbool_t, op: svfloat64_t) -> svint32_t {
+    unsafe { svcvt_s32_f64_m(transmute_unchecked(op), pg, op) }
+}
+#[doc = "Floating-point convert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcvt_s32[_f64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fcvtzs))]
+pub fn svcvt_s32_f64_z(pg: svbool_t, op: svfloat64_t) -> svint32_t {
+    svcvt_s32_f64_m(svdup_n_s32(0), pg, op)
+}
+#[doc = "Floating-point convert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcvt_s64[_f32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fcvtzs))]
+pub fn svcvt_s64_f32_m(inactive: svint64_t, pg: svbool_t, op: svfloat32_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.fcvtzs.i64f32")]
+        fn _svcvt_s64_f32_m(inactive: svint64_t, pg: svbool2_t, op: svfloat32_t) -> svint64_t;
+    }
+    unsafe { _svcvt_s64_f32_m(inactive, pg.sve_into(), op) }
+}
+#[doc = "Floating-point convert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcvt_s64[_f32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fcvtzs))]
+pub fn svcvt_s64_f32_x(pg: svbool_t, op: svfloat32_t) -> svint64_t {
+    unsafe { svcvt_s64_f32_m(transmute_unchecked(op), pg, op) }
+}
+#[doc = "Floating-point convert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcvt_s64[_f32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fcvtzs))]
+pub fn svcvt_s64_f32_z(pg: svbool_t, op: svfloat32_t) -> svint64_t {
+    svcvt_s64_f32_m(svdup_n_s64(0), pg, op)
+}
+#[doc = "Floating-point convert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcvt_s64[_f64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fcvtzs))]
+pub fn svcvt_s64_f64_m(inactive: svint64_t, pg: svbool_t, op: svfloat64_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.fcvtzs.i64f64")]
+        fn _svcvt_s64_f64_m(inactive: svint64_t, pg: svbool2_t, op: svfloat64_t) -> svint64_t;
+    }
+    unsafe { _svcvt_s64_f64_m(inactive, pg.sve_into(), op) }
+}
+#[doc = "Floating-point convert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcvt_s64[_f64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fcvtzs))]
+pub fn svcvt_s64_f64_x(pg: svbool_t, op: svfloat64_t) -> svint64_t {
+    unsafe { svcvt_s64_f64_m(transmute_unchecked(op), pg, op) }
+}
+#[doc = "Floating-point convert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcvt_s64[_f64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fcvtzs))]
+pub fn svcvt_s64_f64_z(pg: svbool_t, op: svfloat64_t) -> svint64_t {
+    svcvt_s64_f64_m(svdup_n_s64(0), pg, op)
+}
+#[doc = "Floating-point convert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcvt_u32[_f32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fcvtzu))]
+pub fn svcvt_u32_f32_m(inactive: svuint32_t, pg: svbool_t, op: svfloat32_t) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.fcvtzu.i32f32")]
+        fn _svcvt_u32_f32_m(inactive: svint32_t, pg: svbool4_t, op: svfloat32_t) -> svint32_t;
+    }
+    unsafe { _svcvt_u32_f32_m(inactive.as_signed(), pg.sve_into(), op).as_unsigned() }
+}
+#[doc = "Floating-point convert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcvt_u32[_f32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fcvtzu))]
+pub fn svcvt_u32_f32_x(pg: svbool_t, op: svfloat32_t) -> svuint32_t {
+    unsafe { svcvt_u32_f32_m(transmute_unchecked(op), pg, op) }
+}
+#[doc = "Floating-point convert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcvt_u32[_f32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fcvtzu))]
+pub fn svcvt_u32_f32_z(pg: svbool_t, op: svfloat32_t) -> svuint32_t {
+    svcvt_u32_f32_m(svdup_n_u32(0), pg, op)
+}
+#[doc = "Floating-point convert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcvt_u32[_f64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fcvtzu))]
+pub fn svcvt_u32_f64_m(inactive: svuint32_t, pg: svbool_t, op: svfloat64_t) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.fcvtzu.i32f64")]
+        fn _svcvt_u32_f64_m(inactive: svint32_t, pg: svbool2_t, op: svfloat64_t) -> svint32_t;
+    }
+    unsafe { _svcvt_u32_f64_m(inactive.as_signed(), pg.sve_into(), op).as_unsigned() }
+}
+#[doc = "Floating-point convert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcvt_u32[_f64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fcvtzu))]
+pub fn svcvt_u32_f64_x(pg: svbool_t, op: svfloat64_t) -> svuint32_t {
+    unsafe { svcvt_u32_f64_m(transmute_unchecked(op), pg, op) }
+}
+#[doc = "Floating-point convert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcvt_u32[_f64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fcvtzu))]
+pub fn svcvt_u32_f64_z(pg: svbool_t, op: svfloat64_t) -> svuint32_t {
+    svcvt_u32_f64_m(svdup_n_u32(0), pg, op)
+}
+#[doc = "Floating-point convert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcvt_u64[_f32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fcvtzu))]
+pub fn svcvt_u64_f32_m(inactive: svuint64_t, pg: svbool_t, op: svfloat32_t) -> svuint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.fcvtzu.i64f32")]
+        fn _svcvt_u64_f32_m(inactive: svint64_t, pg: svbool2_t, op: svfloat32_t) -> svint64_t;
+    }
+    unsafe { _svcvt_u64_f32_m(inactive.as_signed(), pg.sve_into(), op).as_unsigned() }
+}
+#[doc = "Floating-point convert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcvt_u64[_f32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fcvtzu))]
+pub fn svcvt_u64_f32_x(pg: svbool_t, op: svfloat32_t) -> svuint64_t {
+    unsafe { svcvt_u64_f32_m(transmute_unchecked(op), pg, op) }
+}
+#[doc = "Floating-point convert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcvt_u64[_f32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fcvtzu))]
+pub fn svcvt_u64_f32_z(pg: svbool_t, op: svfloat32_t) -> svuint64_t {
+    svcvt_u64_f32_m(svdup_n_u64(0), pg, op)
+}
+#[doc = "Floating-point convert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcvt_u64[_f64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fcvtzu))]
+pub fn svcvt_u64_f64_m(inactive: svuint64_t, pg: svbool_t, op: svfloat64_t) -> svuint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.fcvtzu.i64f64")]
+        fn _svcvt_u64_f64_m(inactive: svint64_t, pg: svbool2_t, op: svfloat64_t) -> svint64_t;
+    }
+    unsafe { _svcvt_u64_f64_m(inactive.as_signed(), pg.sve_into(), op).as_unsigned() }
+}
+#[doc = "Floating-point convert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcvt_u64[_f64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fcvtzu))]
+pub fn svcvt_u64_f64_x(pg: svbool_t, op: svfloat64_t) -> svuint64_t {
+    unsafe { svcvt_u64_f64_m(transmute_unchecked(op), pg, op) }
+}
+#[doc = "Floating-point convert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcvt_u64[_f64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fcvtzu))]
+pub fn svcvt_u64_f64_z(pg: svbool_t, op: svfloat64_t) -> svuint64_t {
+    svcvt_u64_f64_m(svdup_n_u64(0), pg, op)
+}
+#[doc = "Divide"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdiv[_f32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fdiv))]
+pub fn svdiv_f32_m(pg: svbool_t, op1: svfloat32_t, op2: svfloat32_t) -> svfloat32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.fdiv.nxv4f32")]
+        fn _svdiv_f32_m(pg: svbool4_t, op1: svfloat32_t, op2: svfloat32_t) -> svfloat32_t;
+    }
+    unsafe { _svdiv_f32_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Divide"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdiv[_n_f32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fdiv))]
+pub fn svdiv_n_f32_m(pg: svbool_t, op1: svfloat32_t, op2: f32) -> svfloat32_t {
+    svdiv_f32_m(pg, op1, svdup_n_f32(op2))
+}
+#[doc = "Divide"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdiv[_f32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fdiv))]
+pub fn svdiv_f32_x(pg: svbool_t, op1: svfloat32_t, op2: svfloat32_t) -> svfloat32_t {
+    svdiv_f32_m(pg, op1, op2)
+}
+#[doc = "Divide"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdiv[_n_f32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fdiv))]
+pub fn svdiv_n_f32_x(pg: svbool_t, op1: svfloat32_t, op2: f32) -> svfloat32_t {
+    svdiv_f32_x(pg, op1, svdup_n_f32(op2))
+}
+#[doc = "Divide"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdiv[_f32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fdiv))]
+pub fn svdiv_f32_z(pg: svbool_t, op1: svfloat32_t, op2: svfloat32_t) -> svfloat32_t {
+    svdiv_f32_m(pg, svsel_f32(pg, op1, svdup_n_f32(0.0)), op2)
+}
+#[doc = "Divide"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdiv[_n_f32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fdiv))]
+pub fn svdiv_n_f32_z(pg: svbool_t, op1: svfloat32_t, op2: f32) -> svfloat32_t {
+    svdiv_f32_z(pg, op1, svdup_n_f32(op2))
+}
+#[doc = "Divide"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdiv[_f64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fdiv))]
+pub fn svdiv_f64_m(pg: svbool_t, op1: svfloat64_t, op2: svfloat64_t) -> svfloat64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.fdiv.nxv2f64")]
+        fn _svdiv_f64_m(pg: svbool2_t, op1: svfloat64_t, op2: svfloat64_t) -> svfloat64_t;
+    }
+    unsafe { _svdiv_f64_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Divide"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdiv[_n_f64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fdiv))]
+pub fn svdiv_n_f64_m(pg: svbool_t, op1: svfloat64_t, op2: f64) -> svfloat64_t {
+    svdiv_f64_m(pg, op1, svdup_n_f64(op2))
+}
+#[doc = "Divide"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdiv[_f64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fdiv))]
+pub fn svdiv_f64_x(pg: svbool_t, op1: svfloat64_t, op2: svfloat64_t) -> svfloat64_t {
+    svdiv_f64_m(pg, op1, op2)
+}
+#[doc = "Divide"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdiv[_n_f64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fdiv))]
+pub fn svdiv_n_f64_x(pg: svbool_t, op1: svfloat64_t, op2: f64) -> svfloat64_t {
+    svdiv_f64_x(pg, op1, svdup_n_f64(op2))
+}
+#[doc = "Divide"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdiv[_f64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fdiv))]
+pub fn svdiv_f64_z(pg: svbool_t, op1: svfloat64_t, op2: svfloat64_t) -> svfloat64_t {
+    svdiv_f64_m(pg, svsel_f64(pg, op1, svdup_n_f64(0.0)), op2)
+}
+#[doc = "Divide"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdiv[_n_f64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fdiv))]
+pub fn svdiv_n_f64_z(pg: svbool_t, op1: svfloat64_t, op2: f64) -> svfloat64_t {
+    svdiv_f64_z(pg, op1, svdup_n_f64(op2))
+}
+#[doc = "Divide"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdiv[_s32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sdiv))]
+pub fn svdiv_s32_m(pg: svbool_t, op1: svint32_t, op2: svint32_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sdiv.nxv4i32")]
+        fn _svdiv_s32_m(pg: svbool4_t, op1: svint32_t, op2: svint32_t) -> svint32_t;
+    }
+    unsafe { _svdiv_s32_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Divide"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdiv[_n_s32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sdiv))]
+pub fn svdiv_n_s32_m(pg: svbool_t, op1: svint32_t, op2: i32) -> svint32_t {
+    svdiv_s32_m(pg, op1, svdup_n_s32(op2))
+}
+#[doc = "Divide"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdiv[_s32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sdiv))]
+pub fn svdiv_s32_x(pg: svbool_t, op1: svint32_t, op2: svint32_t) -> svint32_t {
+    svdiv_s32_m(pg, op1, op2)
+}
+#[doc = "Divide"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdiv[_n_s32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sdiv))]
+pub fn svdiv_n_s32_x(pg: svbool_t, op1: svint32_t, op2: i32) -> svint32_t {
+    svdiv_s32_x(pg, op1, svdup_n_s32(op2))
+}
+#[doc = "Divide"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdiv[_s32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sdiv))]
+pub fn svdiv_s32_z(pg: svbool_t, op1: svint32_t, op2: svint32_t) -> svint32_t {
+    svdiv_s32_m(pg, svsel_s32(pg, op1, svdup_n_s32(0)), op2)
+}
+#[doc = "Divide"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdiv[_n_s32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sdiv))]
+pub fn svdiv_n_s32_z(pg: svbool_t, op1: svint32_t, op2: i32) -> svint32_t {
+    svdiv_s32_z(pg, op1, svdup_n_s32(op2))
+}
+#[doc = "Divide"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdiv[_s64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sdiv))]
+pub fn svdiv_s64_m(pg: svbool_t, op1: svint64_t, op2: svint64_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sdiv.nxv2i64")]
+        fn _svdiv_s64_m(pg: svbool2_t, op1: svint64_t, op2: svint64_t) -> svint64_t;
+    }
+    unsafe { _svdiv_s64_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Divide"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdiv[_n_s64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sdiv))]
+pub fn svdiv_n_s64_m(pg: svbool_t, op1: svint64_t, op2: i64) -> svint64_t {
+    svdiv_s64_m(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Divide"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdiv[_s64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sdiv))]
+pub fn svdiv_s64_x(pg: svbool_t, op1: svint64_t, op2: svint64_t) -> svint64_t {
+    svdiv_s64_m(pg, op1, op2)
+}
+#[doc = "Divide"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdiv[_n_s64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sdiv))]
+pub fn svdiv_n_s64_x(pg: svbool_t, op1: svint64_t, op2: i64) -> svint64_t {
+    svdiv_s64_x(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Divide"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdiv[_s64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sdiv))]
+pub fn svdiv_s64_z(pg: svbool_t, op1: svint64_t, op2: svint64_t) -> svint64_t {
+    svdiv_s64_m(pg, svsel_s64(pg, op1, svdup_n_s64(0)), op2)
+}
+#[doc = "Divide"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdiv[_n_s64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sdiv))]
+pub fn svdiv_n_s64_z(pg: svbool_t, op1: svint64_t, op2: i64) -> svint64_t {
+    svdiv_s64_z(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Divide"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdiv[_u32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(udiv))]
+pub fn svdiv_u32_m(pg: svbool_t, op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.udiv.nxv4i32")]
+        fn _svdiv_u32_m(pg: svbool4_t, op1: svint32_t, op2: svint32_t) -> svint32_t;
+    }
+    unsafe { _svdiv_u32_m(pg.sve_into(), op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Divide"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdiv[_n_u32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(udiv))]
+pub fn svdiv_n_u32_m(pg: svbool_t, op1: svuint32_t, op2: u32) -> svuint32_t {
+    svdiv_u32_m(pg, op1, svdup_n_u32(op2))
+}
+#[doc = "Divide"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdiv[_u32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(udiv))]
+pub fn svdiv_u32_x(pg: svbool_t, op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    svdiv_u32_m(pg, op1, op2)
+}
+#[doc = "Divide"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdiv[_n_u32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(udiv))]
+pub fn svdiv_n_u32_x(pg: svbool_t, op1: svuint32_t, op2: u32) -> svuint32_t {
+    svdiv_u32_x(pg, op1, svdup_n_u32(op2))
+}
+#[doc = "Divide"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdiv[_u32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(udiv))]
+pub fn svdiv_u32_z(pg: svbool_t, op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    svdiv_u32_m(pg, svsel_u32(pg, op1, svdup_n_u32(0)), op2)
+}
+#[doc = "Divide"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdiv[_n_u32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(udiv))]
+pub fn svdiv_n_u32_z(pg: svbool_t, op1: svuint32_t, op2: u32) -> svuint32_t {
+    svdiv_u32_z(pg, op1, svdup_n_u32(op2))
+}
+#[doc = "Divide"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdiv[_u64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(udiv))]
+pub fn svdiv_u64_m(pg: svbool_t, op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.udiv.nxv2i64")]
+        fn _svdiv_u64_m(pg: svbool2_t, op1: svint64_t, op2: svint64_t) -> svint64_t;
+    }
+    unsafe { _svdiv_u64_m(pg.sve_into(), op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Divide"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdiv[_n_u64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(udiv))]
+pub fn svdiv_n_u64_m(pg: svbool_t, op1: svuint64_t, op2: u64) -> svuint64_t {
+    svdiv_u64_m(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Divide"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdiv[_u64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(udiv))]
+pub fn svdiv_u64_x(pg: svbool_t, op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    svdiv_u64_m(pg, op1, op2)
+}
+#[doc = "Divide"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdiv[_n_u64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(udiv))]
+pub fn svdiv_n_u64_x(pg: svbool_t, op1: svuint64_t, op2: u64) -> svuint64_t {
+    svdiv_u64_x(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Divide"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdiv[_u64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(udiv))]
+pub fn svdiv_u64_z(pg: svbool_t, op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    svdiv_u64_m(pg, svsel_u64(pg, op1, svdup_n_u64(0)), op2)
+}
+#[doc = "Divide"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdiv[_n_u64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(udiv))]
+pub fn svdiv_n_u64_z(pg: svbool_t, op1: svuint64_t, op2: u64) -> svuint64_t {
+    svdiv_u64_z(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Divide reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdivr[_f32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fdivr))]
+pub fn svdivr_f32_m(pg: svbool_t, op1: svfloat32_t, op2: svfloat32_t) -> svfloat32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.fdivr.nxv4f32")]
+        fn _svdivr_f32_m(pg: svbool4_t, op1: svfloat32_t, op2: svfloat32_t) -> svfloat32_t;
+    }
+    unsafe { _svdivr_f32_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Divide reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdivr[_n_f32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fdivr))]
+pub fn svdivr_n_f32_m(pg: svbool_t, op1: svfloat32_t, op2: f32) -> svfloat32_t {
+    svdivr_f32_m(pg, op1, svdup_n_f32(op2))
+}
+#[doc = "Divide reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdivr[_f32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fdivr))]
+pub fn svdivr_f32_x(pg: svbool_t, op1: svfloat32_t, op2: svfloat32_t) -> svfloat32_t {
+    svdivr_f32_m(pg, op1, op2)
+}
+#[doc = "Divide reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdivr[_n_f32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fdivr))]
+pub fn svdivr_n_f32_x(pg: svbool_t, op1: svfloat32_t, op2: f32) -> svfloat32_t {
+    svdivr_f32_x(pg, op1, svdup_n_f32(op2))
+}
+#[doc = "Divide reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdivr[_f32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fdivr))]
+pub fn svdivr_f32_z(pg: svbool_t, op1: svfloat32_t, op2: svfloat32_t) -> svfloat32_t {
+    svdivr_f32_m(pg, svsel_f32(pg, op1, svdup_n_f32(0.0)), op2)
+}
+#[doc = "Divide reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdivr[_n_f32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fdivr))]
+pub fn svdivr_n_f32_z(pg: svbool_t, op1: svfloat32_t, op2: f32) -> svfloat32_t {
+    svdivr_f32_z(pg, op1, svdup_n_f32(op2))
+}
+#[doc = "Divide reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdivr[_f64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fdivr))]
+pub fn svdivr_f64_m(pg: svbool_t, op1: svfloat64_t, op2: svfloat64_t) -> svfloat64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.fdivr.nxv2f64")]
+        fn _svdivr_f64_m(pg: svbool2_t, op1: svfloat64_t, op2: svfloat64_t) -> svfloat64_t;
+    }
+    unsafe { _svdivr_f64_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Divide reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdivr[_n_f64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fdivr))]
+pub fn svdivr_n_f64_m(pg: svbool_t, op1: svfloat64_t, op2: f64) -> svfloat64_t {
+    svdivr_f64_m(pg, op1, svdup_n_f64(op2))
+}
+#[doc = "Divide reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdivr[_f64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fdivr))]
+pub fn svdivr_f64_x(pg: svbool_t, op1: svfloat64_t, op2: svfloat64_t) -> svfloat64_t {
+    svdivr_f64_m(pg, op1, op2)
+}
+#[doc = "Divide reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdivr[_n_f64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fdivr))]
+pub fn svdivr_n_f64_x(pg: svbool_t, op1: svfloat64_t, op2: f64) -> svfloat64_t {
+    svdivr_f64_x(pg, op1, svdup_n_f64(op2))
+}
+#[doc = "Divide reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdivr[_f64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fdivr))]
+pub fn svdivr_f64_z(pg: svbool_t, op1: svfloat64_t, op2: svfloat64_t) -> svfloat64_t {
+    svdivr_f64_m(pg, svsel_f64(pg, op1, svdup_n_f64(0.0)), op2)
+}
+#[doc = "Divide reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdivr[_n_f64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fdivr))]
+pub fn svdivr_n_f64_z(pg: svbool_t, op1: svfloat64_t, op2: f64) -> svfloat64_t {
+    svdivr_f64_z(pg, op1, svdup_n_f64(op2))
+}
+#[doc = "Divide reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdivr[_s32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sdivr))]
+pub fn svdivr_s32_m(pg: svbool_t, op1: svint32_t, op2: svint32_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sdivr.nxv4i32")]
+        fn _svdivr_s32_m(pg: svbool4_t, op1: svint32_t, op2: svint32_t) -> svint32_t;
+    }
+    unsafe { _svdivr_s32_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Divide reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdivr[_n_s32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sdivr))]
+pub fn svdivr_n_s32_m(pg: svbool_t, op1: svint32_t, op2: i32) -> svint32_t {
+    svdivr_s32_m(pg, op1, svdup_n_s32(op2))
+}
+#[doc = "Divide reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdivr[_s32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sdivr))]
+pub fn svdivr_s32_x(pg: svbool_t, op1: svint32_t, op2: svint32_t) -> svint32_t {
+    svdivr_s32_m(pg, op1, op2)
+}
+#[doc = "Divide reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdivr[_n_s32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sdivr))]
+pub fn svdivr_n_s32_x(pg: svbool_t, op1: svint32_t, op2: i32) -> svint32_t {
+    svdivr_s32_x(pg, op1, svdup_n_s32(op2))
+}
+#[doc = "Divide reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdivr[_s32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sdivr))]
+pub fn svdivr_s32_z(pg: svbool_t, op1: svint32_t, op2: svint32_t) -> svint32_t {
+    svdivr_s32_m(pg, svsel_s32(pg, op1, svdup_n_s32(0)), op2)
+}
+#[doc = "Divide reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdivr[_n_s32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sdivr))]
+pub fn svdivr_n_s32_z(pg: svbool_t, op1: svint32_t, op2: i32) -> svint32_t {
+    svdivr_s32_z(pg, op1, svdup_n_s32(op2))
+}
+#[doc = "Divide reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdivr[_s64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sdivr))]
+pub fn svdivr_s64_m(pg: svbool_t, op1: svint64_t, op2: svint64_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sdivr.nxv2i64")]
+        fn _svdivr_s64_m(pg: svbool2_t, op1: svint64_t, op2: svint64_t) -> svint64_t;
+    }
+    unsafe { _svdivr_s64_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Divide reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdivr[_n_s64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sdivr))]
+pub fn svdivr_n_s64_m(pg: svbool_t, op1: svint64_t, op2: i64) -> svint64_t {
+    svdivr_s64_m(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Divide reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdivr[_s64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sdivr))]
+pub fn svdivr_s64_x(pg: svbool_t, op1: svint64_t, op2: svint64_t) -> svint64_t {
+    svdivr_s64_m(pg, op1, op2)
+}
+#[doc = "Divide reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdivr[_n_s64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sdivr))]
+pub fn svdivr_n_s64_x(pg: svbool_t, op1: svint64_t, op2: i64) -> svint64_t {
+    svdivr_s64_x(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Divide reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdivr[_s64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sdivr))]
+pub fn svdivr_s64_z(pg: svbool_t, op1: svint64_t, op2: svint64_t) -> svint64_t {
+    svdivr_s64_m(pg, svsel_s64(pg, op1, svdup_n_s64(0)), op2)
+}
+#[doc = "Divide reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdivr[_n_s64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sdivr))]
+pub fn svdivr_n_s64_z(pg: svbool_t, op1: svint64_t, op2: i64) -> svint64_t {
+    svdivr_s64_z(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Divide reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdivr[_u32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(udivr))]
+pub fn svdivr_u32_m(pg: svbool_t, op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.udivr.nxv4i32")]
+        fn _svdivr_u32_m(pg: svbool4_t, op1: svint32_t, op2: svint32_t) -> svint32_t;
+    }
+    unsafe { _svdivr_u32_m(pg.sve_into(), op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Divide reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdivr[_n_u32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(udivr))]
+pub fn svdivr_n_u32_m(pg: svbool_t, op1: svuint32_t, op2: u32) -> svuint32_t {
+    svdivr_u32_m(pg, op1, svdup_n_u32(op2))
+}
+#[doc = "Divide reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdivr[_u32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(udivr))]
+pub fn svdivr_u32_x(pg: svbool_t, op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    svdivr_u32_m(pg, op1, op2)
+}
+#[doc = "Divide reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdivr[_n_u32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(udivr))]
+pub fn svdivr_n_u32_x(pg: svbool_t, op1: svuint32_t, op2: u32) -> svuint32_t {
+    svdivr_u32_x(pg, op1, svdup_n_u32(op2))
+}
+#[doc = "Divide reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdivr[_u32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(udivr))]
+pub fn svdivr_u32_z(pg: svbool_t, op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    svdivr_u32_m(pg, svsel_u32(pg, op1, svdup_n_u32(0)), op2)
+}
+#[doc = "Divide reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdivr[_n_u32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(udivr))]
+pub fn svdivr_n_u32_z(pg: svbool_t, op1: svuint32_t, op2: u32) -> svuint32_t {
+    svdivr_u32_z(pg, op1, svdup_n_u32(op2))
+}
+#[doc = "Divide reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdivr[_u64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(udivr))]
+pub fn svdivr_u64_m(pg: svbool_t, op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.udivr.nxv2i64")]
+        fn _svdivr_u64_m(pg: svbool2_t, op1: svint64_t, op2: svint64_t) -> svint64_t;
+    }
+    unsafe { _svdivr_u64_m(pg.sve_into(), op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Divide reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdivr[_n_u64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(udivr))]
+pub fn svdivr_n_u64_m(pg: svbool_t, op1: svuint64_t, op2: u64) -> svuint64_t {
+    svdivr_u64_m(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Divide reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdivr[_u64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(udivr))]
+pub fn svdivr_u64_x(pg: svbool_t, op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    svdivr_u64_m(pg, op1, op2)
+}
+#[doc = "Divide reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdivr[_n_u64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(udivr))]
+pub fn svdivr_n_u64_x(pg: svbool_t, op1: svuint64_t, op2: u64) -> svuint64_t {
+    svdivr_u64_x(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Divide reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdivr[_u64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(udivr))]
+pub fn svdivr_u64_z(pg: svbool_t, op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    svdivr_u64_m(pg, svsel_u64(pg, op1, svdup_n_u64(0)), op2)
+}
+#[doc = "Divide reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdivr[_n_u64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(udivr))]
+pub fn svdivr_n_u64_z(pg: svbool_t, op1: svuint64_t, op2: u64) -> svuint64_t {
+    svdivr_u64_z(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Dot product"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdot_lane[_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sdot, IMM_INDEX = 0))]
+pub fn svdot_lane_s32<const IMM_INDEX: i32>(
+    op1: svint32_t,
+    op2: svint8_t,
+    op3: svint8_t,
+) -> svint32_t {
+    static_assert_range!(IMM_INDEX, 0..=3);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sdot.lane.nxv4i32"
+        )]
+        fn _svdot_lane_s32(
+            op1: svint32_t,
+            op2: svint8_t,
+            op3: svint8_t,
+            imm_index: i32,
+        ) -> svint32_t;
+    }
+    unsafe { _svdot_lane_s32(op1, op2, op3, IMM_INDEX) }
+}
+#[doc = "Dot product"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdot_lane[_s64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sdot, IMM_INDEX = 0))]
+pub fn svdot_lane_s64<const IMM_INDEX: i32>(
+    op1: svint64_t,
+    op2: svint16_t,
+    op3: svint16_t,
+) -> svint64_t {
+    static_assert_range!(IMM_INDEX, 0..=1);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sdot.lane.nxv2i64"
+        )]
+        fn _svdot_lane_s64(
+            op1: svint64_t,
+            op2: svint16_t,
+            op3: svint16_t,
+            imm_index: i32,
+        ) -> svint64_t;
+    }
+    unsafe { _svdot_lane_s64(op1, op2, op3, IMM_INDEX) }
+}
+#[doc = "Dot product"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdot_lane[_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(udot, IMM_INDEX = 0))]
+pub fn svdot_lane_u32<const IMM_INDEX: i32>(
+    op1: svuint32_t,
+    op2: svuint8_t,
+    op3: svuint8_t,
+) -> svuint32_t {
+    static_assert_range!(IMM_INDEX, 0..=3);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.udot.lane.nxv4i32"
+        )]
+        fn _svdot_lane_u32(
+            op1: svint32_t,
+            op2: svint8_t,
+            op3: svint8_t,
+            imm_index: i32,
+        ) -> svint32_t;
+    }
+    unsafe {
+        _svdot_lane_u32(op1.as_signed(), op2.as_signed(), op3.as_signed(), IMM_INDEX).as_unsigned()
+    }
+}
+#[doc = "Dot product"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdot_lane[_u64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(udot, IMM_INDEX = 0))]
+pub fn svdot_lane_u64<const IMM_INDEX: i32>(
+    op1: svuint64_t,
+    op2: svuint16_t,
+    op3: svuint16_t,
+) -> svuint64_t {
+    static_assert_range!(IMM_INDEX, 0..=1);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.udot.lane.nxv2i64"
+        )]
+        fn _svdot_lane_u64(
+            op1: svint64_t,
+            op2: svint16_t,
+            op3: svint16_t,
+            imm_index: i32,
+        ) -> svint64_t;
+    }
+    unsafe {
+        _svdot_lane_u64(op1.as_signed(), op2.as_signed(), op3.as_signed(), IMM_INDEX).as_unsigned()
+    }
+}
+#[doc = "Dot product"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdot[_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sdot))]
+pub fn svdot_s32(op1: svint32_t, op2: svint8_t, op3: svint8_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sdot.nxv4i32")]
+        fn _svdot_s32(op1: svint32_t, op2: svint8_t, op3: svint8_t) -> svint32_t;
+    }
+    unsafe { _svdot_s32(op1, op2, op3) }
+}
+#[doc = "Dot product"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdot[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sdot))]
+pub fn svdot_n_s32(op1: svint32_t, op2: svint8_t, op3: i8) -> svint32_t {
+    svdot_s32(op1, op2, svdup_n_s8(op3))
+}
+#[doc = "Dot product"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdot[_s64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sdot))]
+pub fn svdot_s64(op1: svint64_t, op2: svint16_t, op3: svint16_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sdot.nxv2i64")]
+        fn _svdot_s64(op1: svint64_t, op2: svint16_t, op3: svint16_t) -> svint64_t;
+    }
+    unsafe { _svdot_s64(op1, op2, op3) }
+}
+#[doc = "Dot product"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdot[_n_s64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sdot))]
+pub fn svdot_n_s64(op1: svint64_t, op2: svint16_t, op3: i16) -> svint64_t {
+    svdot_s64(op1, op2, svdup_n_s16(op3))
+}
+#[doc = "Dot product"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdot[_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(udot))]
+pub fn svdot_u32(op1: svuint32_t, op2: svuint8_t, op3: svuint8_t) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.udot.nxv4i32")]
+        fn _svdot_u32(op1: svint32_t, op2: svint8_t, op3: svint8_t) -> svint32_t;
+    }
+    unsafe { _svdot_u32(op1.as_signed(), op2.as_signed(), op3.as_signed()).as_unsigned() }
+}
+#[doc = "Dot product"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdot[_n_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(udot))]
+pub fn svdot_n_u32(op1: svuint32_t, op2: svuint8_t, op3: u8) -> svuint32_t {
+    svdot_u32(op1, op2, svdup_n_u8(op3))
+}
+#[doc = "Dot product"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdot[_u64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(udot))]
+pub fn svdot_u64(op1: svuint64_t, op2: svuint16_t, op3: svuint16_t) -> svuint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.udot.nxv2i64")]
+        fn _svdot_u64(op1: svint64_t, op2: svint16_t, op3: svint16_t) -> svint64_t;
+    }
+    unsafe { _svdot_u64(op1.as_signed(), op2.as_signed(), op3.as_signed()).as_unsigned() }
+}
+#[doc = "Dot product"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdot[_n_u64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(udot))]
+pub fn svdot_n_u64(op1: svuint64_t, op2: svuint16_t, op3: u16) -> svuint64_t {
+    svdot_u64(op1, op2, svdup_n_u16(op3))
+}
+#[doc = "Broadcast a scalar value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdup_lane[_f32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(tbl))]
+pub fn svdup_lane_f32(data: svfloat32_t, index: u32) -> svfloat32_t {
+    svtbl_f32(data, svdup_n_u32(index))
+}
+#[doc = "Broadcast a scalar value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdup_lane[_f64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(tbl))]
+pub fn svdup_lane_f64(data: svfloat64_t, index: u64) -> svfloat64_t {
+    svtbl_f64(data, svdup_n_u64(index))
+}
+#[doc = "Broadcast a scalar value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdup_lane[_s8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(tbl))]
+pub fn svdup_lane_s8(data: svint8_t, index: u8) -> svint8_t {
+    svtbl_s8(data, svdup_n_u8(index))
+}
+#[doc = "Broadcast a scalar value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdup_lane[_s16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(tbl))]
+pub fn svdup_lane_s16(data: svint16_t, index: u16) -> svint16_t {
+    svtbl_s16(data, svdup_n_u16(index))
+}
+#[doc = "Broadcast a scalar value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdup_lane[_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(tbl))]
+pub fn svdup_lane_s32(data: svint32_t, index: u32) -> svint32_t {
+    svtbl_s32(data, svdup_n_u32(index))
+}
+#[doc = "Broadcast a scalar value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdup_lane[_s64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(tbl))]
+pub fn svdup_lane_s64(data: svint64_t, index: u64) -> svint64_t {
+    svtbl_s64(data, svdup_n_u64(index))
+}
+#[doc = "Broadcast a scalar value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdup_lane[_u8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(tbl))]
+pub fn svdup_lane_u8(data: svuint8_t, index: u8) -> svuint8_t {
+    svtbl_u8(data, svdup_n_u8(index))
+}
+#[doc = "Broadcast a scalar value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdup_lane[_u16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(tbl))]
+pub fn svdup_lane_u16(data: svuint16_t, index: u16) -> svuint16_t {
+    svtbl_u16(data, svdup_n_u16(index))
+}
+#[doc = "Broadcast a scalar value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdup_lane[_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(tbl))]
+pub fn svdup_lane_u32(data: svuint32_t, index: u32) -> svuint32_t {
+    svtbl_u32(data, svdup_n_u32(index))
+}
+#[doc = "Broadcast a scalar value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdup_lane[_u64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(tbl))]
+pub fn svdup_lane_u64(data: svuint64_t, index: u64) -> svuint64_t {
+    svtbl_u64(data, svdup_n_u64(index))
+}
+#[doc = "Broadcast a scalar value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdup[_n]_b8)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sbfx))]
+#[cfg_attr(test, assert_instr(whilelo))]
+pub fn svdup_n_b8(op: bool) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.dup.x.nxv16i1")]
+        fn _svdup_n_b8(op: bool) -> svbool_t;
+    }
+    unsafe { _svdup_n_b8(op) }
+}
+#[doc = "Broadcast a scalar value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdup[_n]_b16)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sbfx))]
+#[cfg_attr(test, assert_instr(whilelo))]
+pub fn svdup_n_b16(op: bool) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.dup.x.nxv8i1")]
+        fn _svdup_n_b16(op: bool) -> svbool8_t;
+    }
+    unsafe { _svdup_n_b16(op).sve_into() }
+}
+#[doc = "Broadcast a scalar value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdup[_n]_b32)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sbfx))]
+#[cfg_attr(test, assert_instr(whilelo))]
+pub fn svdup_n_b32(op: bool) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.dup.x.nxv4i1")]
+        fn _svdup_n_b32(op: bool) -> svbool4_t;
+    }
+    unsafe { _svdup_n_b32(op).sve_into() }
+}
+#[doc = "Broadcast a scalar value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdup[_n]_b64)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sbfx))]
+#[cfg_attr(test, assert_instr(whilelo))]
+pub fn svdup_n_b64(op: bool) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.dup.x.nxv2i1")]
+        fn _svdup_n_b64(op: bool) -> svbool2_t;
+    }
+    unsafe { _svdup_n_b64(op).sve_into() }
+}
+#[doc = "Broadcast a scalar value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdup[_n]_f32)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mov))]
+pub fn svdup_n_f32(op: f32) -> svfloat32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.dup.x.nxv4f32")]
+        fn _svdup_n_f32(op: f32) -> svfloat32_t;
+    }
+    unsafe { _svdup_n_f32(op) }
+}
+#[doc = "Broadcast a scalar value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdup[_n]_f64)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mov))]
+pub fn svdup_n_f64(op: f64) -> svfloat64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.dup.x.nxv2f64")]
+        fn _svdup_n_f64(op: f64) -> svfloat64_t;
+    }
+    unsafe { _svdup_n_f64(op) }
+}
+#[doc = "Broadcast a scalar value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdup[_n]_s8)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mov))]
+pub fn svdup_n_s8(op: i8) -> svint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.dup.x.nxv16i8")]
+        fn _svdup_n_s8(op: i8) -> svint8_t;
+    }
+    unsafe { _svdup_n_s8(op) }
+}
+#[doc = "Broadcast a scalar value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdup[_n]_s16)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mov))]
+pub fn svdup_n_s16(op: i16) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.dup.x.nxv8i16")]
+        fn _svdup_n_s16(op: i16) -> svint16_t;
+    }
+    unsafe { _svdup_n_s16(op) }
+}
+#[doc = "Broadcast a scalar value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdup[_n]_s32)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mov))]
+pub fn svdup_n_s32(op: i32) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.dup.x.nxv4i32")]
+        fn _svdup_n_s32(op: i32) -> svint32_t;
+    }
+    unsafe { _svdup_n_s32(op) }
+}
+#[doc = "Broadcast a scalar value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdup[_n]_s64)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mov))]
+pub fn svdup_n_s64(op: i64) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.dup.x.nxv2i64")]
+        fn _svdup_n_s64(op: i64) -> svint64_t;
+    }
+    unsafe { _svdup_n_s64(op) }
+}
+#[doc = "Broadcast a scalar value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdup[_n]_u8)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mov))]
+pub fn svdup_n_u8(op: u8) -> svuint8_t {
+    unsafe { svdup_n_s8(op.as_signed()).as_unsigned() }
+}
+#[doc = "Broadcast a scalar value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdup[_n]_u16)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mov))]
+pub fn svdup_n_u16(op: u16) -> svuint16_t {
+    unsafe { svdup_n_s16(op.as_signed()).as_unsigned() }
+}
+#[doc = "Broadcast a scalar value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdup[_n]_u32)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mov))]
+pub fn svdup_n_u32(op: u32) -> svuint32_t {
+    unsafe { svdup_n_s32(op.as_signed()).as_unsigned() }
+}
+#[doc = "Broadcast a scalar value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdup[_n]_u64)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mov))]
+pub fn svdup_n_u64(op: u64) -> svuint64_t {
+    unsafe { svdup_n_s64(op.as_signed()).as_unsigned() }
+}
+#[doc = "Broadcast a scalar value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdup[_n]_f32_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mov))]
+pub fn svdup_n_f32_m(inactive: svfloat32_t, pg: svbool_t, op: f32) -> svfloat32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.dup.nxv4f32")]
+        fn _svdup_n_f32_m(inactive: svfloat32_t, pg: svbool4_t, op: f32) -> svfloat32_t;
+    }
+    unsafe { _svdup_n_f32_m(inactive, pg.sve_into(), op) }
+}
+#[doc = "Broadcast a scalar value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdup[_n]_f32_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mov))]
+pub fn svdup_n_f32_x(pg: svbool_t, op: f32) -> svfloat32_t {
+    svdup_n_f32_m(svdup_n_f32(0.0), pg, op)
+}
+#[doc = "Broadcast a scalar value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdup[_n]_f32_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mov))]
+pub fn svdup_n_f32_z(pg: svbool_t, op: f32) -> svfloat32_t {
+    svdup_n_f32_m(svdup_n_f32(0.0), pg, op)
+}
+#[doc = "Broadcast a scalar value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdup[_n]_f64_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mov))]
+pub fn svdup_n_f64_m(inactive: svfloat64_t, pg: svbool_t, op: f64) -> svfloat64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.dup.nxv2f64")]
+        fn _svdup_n_f64_m(inactive: svfloat64_t, pg: svbool2_t, op: f64) -> svfloat64_t;
+    }
+    unsafe { _svdup_n_f64_m(inactive, pg.sve_into(), op) }
+}
+#[doc = "Broadcast a scalar value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdup[_n]_f64_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mov))]
+pub fn svdup_n_f64_x(pg: svbool_t, op: f64) -> svfloat64_t {
+    svdup_n_f64_m(svdup_n_f64(0.0), pg, op)
+}
+#[doc = "Broadcast a scalar value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdup[_n]_f64_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mov))]
+pub fn svdup_n_f64_z(pg: svbool_t, op: f64) -> svfloat64_t {
+    svdup_n_f64_m(svdup_n_f64(0.0), pg, op)
+}
+#[doc = "Broadcast a scalar value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdup[_n]_s8_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mov))]
+pub fn svdup_n_s8_m(inactive: svint8_t, pg: svbool_t, op: i8) -> svint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.dup.nxv16i8")]
+        fn _svdup_n_s8_m(inactive: svint8_t, pg: svbool_t, op: i8) -> svint8_t;
+    }
+    unsafe { _svdup_n_s8_m(inactive, pg, op) }
+}
+#[doc = "Broadcast a scalar value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdup[_n]_s8_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mov))]
+pub fn svdup_n_s8_x(pg: svbool_t, op: i8) -> svint8_t {
+    svdup_n_s8_m(svdup_n_s8(0), pg, op)
+}
+#[doc = "Broadcast a scalar value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdup[_n]_s8_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mov))]
+pub fn svdup_n_s8_z(pg: svbool_t, op: i8) -> svint8_t {
+    svdup_n_s8_m(svdup_n_s8(0), pg, op)
+}
+#[doc = "Broadcast a scalar value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdup[_n]_s16_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mov))]
+pub fn svdup_n_s16_m(inactive: svint16_t, pg: svbool_t, op: i16) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.dup.nxv8i16")]
+        fn _svdup_n_s16_m(inactive: svint16_t, pg: svbool8_t, op: i16) -> svint16_t;
+    }
+    unsafe { _svdup_n_s16_m(inactive, pg.sve_into(), op) }
+}
+#[doc = "Broadcast a scalar value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdup[_n]_s16_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mov))]
+pub fn svdup_n_s16_x(pg: svbool_t, op: i16) -> svint16_t {
+    svdup_n_s16_m(svdup_n_s16(0), pg, op)
+}
+#[doc = "Broadcast a scalar value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdup[_n]_s16_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mov))]
+pub fn svdup_n_s16_z(pg: svbool_t, op: i16) -> svint16_t {
+    svdup_n_s16_m(svdup_n_s16(0), pg, op)
+}
+#[doc = "Broadcast a scalar value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdup[_n]_s32_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mov))]
+pub fn svdup_n_s32_m(inactive: svint32_t, pg: svbool_t, op: i32) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.dup.nxv4i32")]
+        fn _svdup_n_s32_m(inactive: svint32_t, pg: svbool4_t, op: i32) -> svint32_t;
+    }
+    unsafe { _svdup_n_s32_m(inactive, pg.sve_into(), op) }
+}
+#[doc = "Broadcast a scalar value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdup[_n]_s32_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mov))]
+pub fn svdup_n_s32_x(pg: svbool_t, op: i32) -> svint32_t {
+    svdup_n_s32_m(svdup_n_s32(0), pg, op)
+}
+#[doc = "Broadcast a scalar value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdup[_n]_s32_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mov))]
+pub fn svdup_n_s32_z(pg: svbool_t, op: i32) -> svint32_t {
+    svdup_n_s32_m(svdup_n_s32(0), pg, op)
+}
+#[doc = "Broadcast a scalar value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdup[_n]_s64_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mov))]
+pub fn svdup_n_s64_m(inactive: svint64_t, pg: svbool_t, op: i64) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.dup.nxv2i64")]
+        fn _svdup_n_s64_m(inactive: svint64_t, pg: svbool2_t, op: i64) -> svint64_t;
+    }
+    unsafe { _svdup_n_s64_m(inactive, pg.sve_into(), op) }
+}
+#[doc = "Broadcast a scalar value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdup[_n]_s64_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mov))]
+pub fn svdup_n_s64_x(pg: svbool_t, op: i64) -> svint64_t {
+    svdup_n_s64_m(svdup_n_s64(0), pg, op)
+}
+#[doc = "Broadcast a scalar value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdup[_n]_s64_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mov))]
+pub fn svdup_n_s64_z(pg: svbool_t, op: i64) -> svint64_t {
+    svdup_n_s64_m(svdup_n_s64(0), pg, op)
+}
+#[doc = "Broadcast a scalar value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdup[_n]_u8_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mov))]
+pub fn svdup_n_u8_m(inactive: svuint8_t, pg: svbool_t, op: u8) -> svuint8_t {
+    unsafe { svdup_n_s8_m(inactive.as_signed(), pg, op.as_signed()).as_unsigned() }
+}
+#[doc = "Broadcast a scalar value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdup[_n]_u8_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mov))]
+pub fn svdup_n_u8_x(pg: svbool_t, op: u8) -> svuint8_t {
+    svdup_n_u8_m(svdup_n_u8(0), pg, op)
+}
+#[doc = "Broadcast a scalar value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdup[_n]_u8_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mov))]
+pub fn svdup_n_u8_z(pg: svbool_t, op: u8) -> svuint8_t {
+    svdup_n_u8_m(svdup_n_u8(0), pg, op)
+}
+#[doc = "Broadcast a scalar value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdup[_n]_u16_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mov))]
+pub fn svdup_n_u16_m(inactive: svuint16_t, pg: svbool_t, op: u16) -> svuint16_t {
+    unsafe { svdup_n_s16_m(inactive.as_signed(), pg, op.as_signed()).as_unsigned() }
+}
+#[doc = "Broadcast a scalar value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdup[_n]_u16_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mov))]
+pub fn svdup_n_u16_x(pg: svbool_t, op: u16) -> svuint16_t {
+    svdup_n_u16_m(svdup_n_u16(0), pg, op)
+}
+#[doc = "Broadcast a scalar value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdup[_n]_u16_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mov))]
+pub fn svdup_n_u16_z(pg: svbool_t, op: u16) -> svuint16_t {
+    svdup_n_u16_m(svdup_n_u16(0), pg, op)
+}
+#[doc = "Broadcast a scalar value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdup[_n]_u32_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mov))]
+pub fn svdup_n_u32_m(inactive: svuint32_t, pg: svbool_t, op: u32) -> svuint32_t {
+    unsafe { svdup_n_s32_m(inactive.as_signed(), pg, op.as_signed()).as_unsigned() }
+}
+#[doc = "Broadcast a scalar value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdup[_n]_u32_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mov))]
+pub fn svdup_n_u32_x(pg: svbool_t, op: u32) -> svuint32_t {
+    svdup_n_u32_m(svdup_n_u32(0), pg, op)
+}
+#[doc = "Broadcast a scalar value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdup[_n]_u32_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mov))]
+pub fn svdup_n_u32_z(pg: svbool_t, op: u32) -> svuint32_t {
+    svdup_n_u32_m(svdup_n_u32(0), pg, op)
+}
+#[doc = "Broadcast a scalar value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdup[_n]_u64_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mov))]
+pub fn svdup_n_u64_m(inactive: svuint64_t, pg: svbool_t, op: u64) -> svuint64_t {
+    unsafe { svdup_n_s64_m(inactive.as_signed(), pg, op.as_signed()).as_unsigned() }
+}
+#[doc = "Broadcast a scalar value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdup[_n]_u64_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mov))]
+pub fn svdup_n_u64_x(pg: svbool_t, op: u64) -> svuint64_t {
+    svdup_n_u64_m(svdup_n_u64(0), pg, op)
+}
+#[doc = "Broadcast a scalar value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdup[_n]_u64_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mov))]
+pub fn svdup_n_u64_z(pg: svbool_t, op: u64) -> svuint64_t {
+    svdup_n_u64_m(svdup_n_u64(0), pg, op)
+}
+#[doc = "Broadcast a quadword of scalars"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdupq_lane[_f32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(tbl))]
+pub fn svdupq_lane_f32(data: svfloat32_t, index: u64) -> svfloat32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.dupq.lane.nxv4f32"
+        )]
+        fn _svdupq_lane_f32(data: svfloat32_t, index: i64) -> svfloat32_t;
+    }
+    unsafe { _svdupq_lane_f32(data, index.as_signed()) }
+}
+#[doc = "Broadcast a quadword of scalars"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdupq_lane[_f64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(tbl))]
+pub fn svdupq_lane_f64(data: svfloat64_t, index: u64) -> svfloat64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.dupq.lane.nxv2f64"
+        )]
+        fn _svdupq_lane_f64(data: svfloat64_t, index: i64) -> svfloat64_t;
+    }
+    unsafe { _svdupq_lane_f64(data, index.as_signed()) }
+}
+#[doc = "Broadcast a quadword of scalars"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdupq_lane[_s8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(tbl))]
+pub fn svdupq_lane_s8(data: svint8_t, index: u64) -> svint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.dupq.lane.nxv16i8"
+        )]
+        fn _svdupq_lane_s8(data: svint8_t, index: i64) -> svint8_t;
+    }
+    unsafe { _svdupq_lane_s8(data, index.as_signed()) }
+}
+#[doc = "Broadcast a quadword of scalars"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdupq_lane[_s16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(tbl))]
+pub fn svdupq_lane_s16(data: svint16_t, index: u64) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.dupq.lane.nxv8i16"
+        )]
+        fn _svdupq_lane_s16(data: svint16_t, index: i64) -> svint16_t;
+    }
+    unsafe { _svdupq_lane_s16(data, index.as_signed()) }
+}
+#[doc = "Broadcast a quadword of scalars"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdupq_lane[_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(tbl))]
+pub fn svdupq_lane_s32(data: svint32_t, index: u64) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.dupq.lane.nxv4i32"
+        )]
+        fn _svdupq_lane_s32(data: svint32_t, index: i64) -> svint32_t;
+    }
+    unsafe { _svdupq_lane_s32(data, index.as_signed()) }
+}
+#[doc = "Broadcast a quadword of scalars"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdupq_lane[_s64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(tbl))]
+pub fn svdupq_lane_s64(data: svint64_t, index: u64) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.dupq.lane.nxv2i64"
+        )]
+        fn _svdupq_lane_s64(data: svint64_t, index: i64) -> svint64_t;
+    }
+    unsafe { _svdupq_lane_s64(data, index.as_signed()) }
+}
+#[doc = "Broadcast a quadword of scalars"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdupq_lane[_u8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(tbl))]
+pub fn svdupq_lane_u8(data: svuint8_t, index: u64) -> svuint8_t {
+    unsafe { svdupq_lane_s8(data.as_signed(), index).as_unsigned() }
+}
+#[doc = "Broadcast a quadword of scalars"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdupq_lane[_u16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(tbl))]
+pub fn svdupq_lane_u16(data: svuint16_t, index: u64) -> svuint16_t {
+    unsafe { svdupq_lane_s16(data.as_signed(), index).as_unsigned() }
+}
+#[doc = "Broadcast a quadword of scalars"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdupq_lane[_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(tbl))]
+pub fn svdupq_lane_u32(data: svuint32_t, index: u64) -> svuint32_t {
+    unsafe { svdupq_lane_s32(data.as_signed(), index).as_unsigned() }
+}
+#[doc = "Broadcast a quadword of scalars"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdupq_lane[_u64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(tbl))]
+pub fn svdupq_lane_u64(data: svuint64_t, index: u64) -> svuint64_t {
+    unsafe { svdupq_lane_s64(data.as_signed(), index).as_unsigned() }
+}
+#[doc = "Broadcast a quadword of scalars"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdupq[_n]_b16)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svdupq_n_b16(
+    x0: bool,
+    x1: bool,
+    x2: bool,
+    x3: bool,
+    x4: bool,
+    x5: bool,
+    x6: bool,
+    x7: bool,
+) -> svbool_t {
+    let op1 = svdupq_n_s16(
+        x0 as i16, x1 as i16, x2 as i16, x3 as i16, x4 as i16, x5 as i16, x6 as i16, x7 as i16,
+    );
+    svcmpne_wide_s16(svptrue_b16(), op1, svdup_n_s64(0))
+}
+#[doc = "Broadcast a quadword of scalars"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdupq[_n]_b32)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svdupq_n_b32(x0: bool, x1: bool, x2: bool, x3: bool) -> svbool_t {
+    let op1 = svdupq_n_s32(x0 as i32, x1 as i32, x2 as i32, x3 as i32);
+    svcmpne_wide_s32(svptrue_b32(), op1, svdup_n_s64(0))
+}
+#[doc = "Broadcast a quadword of scalars"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdupq[_n]_b64)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svdupq_n_b64(x0: bool, x1: bool) -> svbool_t {
+    let op1 = svdupq_n_s64(x0 as i64, x1 as i64);
+    svcmpne_s64(svptrue_b64(), op1, svdup_n_s64(0))
+}
+#[doc = "Broadcast a quadword of scalars"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdupq[_n]_b8)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svdupq_n_b8(
+    x0: bool,
+    x1: bool,
+    x2: bool,
+    x3: bool,
+    x4: bool,
+    x5: bool,
+    x6: bool,
+    x7: bool,
+    x8: bool,
+    x9: bool,
+    x10: bool,
+    x11: bool,
+    x12: bool,
+    x13: bool,
+    x14: bool,
+    x15: bool,
+) -> svbool_t {
+    let op1 = svdupq_n_s8(
+        x0 as i8, x1 as i8, x2 as i8, x3 as i8, x4 as i8, x5 as i8, x6 as i8, x7 as i8, x8 as i8,
+        x9 as i8, x10 as i8, x11 as i8, x12 as i8, x13 as i8, x14 as i8, x15 as i8,
+    );
+    svcmpne_wide_s8(svptrue_b8(), op1, svdup_n_s64(0))
+}
+#[doc = "Broadcast a quadword of scalars"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdupq[_n]_f32)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svdupq_n_f32(x0: f32, x1: f32, x2: f32, x3: f32) -> svfloat32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.vector.insert.nxv4f32.v4f32"
+        )]
+        fn _svdupq_n_f32(op0: svfloat32_t, op1: float32x4_t, idx: i64) -> svfloat32_t;
+    }
+    unsafe {
+        let op = _svdupq_n_f32(svundef_f32(), crate::mem::transmute([x0, x1, x2, x3]), 0);
+        svdupq_lane_f32(op, 0)
+    }
+}
+#[doc = "Broadcast a quadword of scalars"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdupq[_n]_s32)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svdupq_n_s32(x0: i32, x1: i32, x2: i32, x3: i32) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.vector.insert.nxv4i32.v4i32"
+        )]
+        fn _svdupq_n_s32(op0: svint32_t, op1: int32x4_t, idx: i64) -> svint32_t;
+    }
+    unsafe {
+        let op = _svdupq_n_s32(svundef_s32(), crate::mem::transmute([x0, x1, x2, x3]), 0);
+        svdupq_lane_s32(op, 0)
+    }
+}
+#[doc = "Broadcast a quadword of scalars"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdupq[_n]_u32)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svdupq_n_u32(x0: u32, x1: u32, x2: u32, x3: u32) -> svuint32_t {
+    unsafe {
+        svdupq_n_s32(
+            x0.as_signed(),
+            x1.as_signed(),
+            x2.as_signed(),
+            x3.as_signed(),
+        )
+        .as_unsigned()
+    }
+}
+#[doc = "Broadcast a quadword of scalars"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdupq[_n]_f64)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svdupq_n_f64(x0: f64, x1: f64) -> svfloat64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.vector.insert.nxv2f64.v2f64"
+        )]
+        fn _svdupq_n_f64(op0: svfloat64_t, op1: float64x2_t, idx: i64) -> svfloat64_t;
+    }
+    unsafe {
+        let op = _svdupq_n_f64(svundef_f64(), crate::mem::transmute([x0, x1]), 0);
+        svdupq_lane_f64(op, 0)
+    }
+}
+#[doc = "Broadcast a quadword of scalars"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdupq[_n]_s64)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svdupq_n_s64(x0: i64, x1: i64) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.vector.insert.nxv2i64.v2i64"
+        )]
+        fn _svdupq_n_s64(op0: svint64_t, op1: int64x2_t, idx: i64) -> svint64_t;
+    }
+    unsafe {
+        let op = _svdupq_n_s64(svundef_s64(), crate::mem::transmute([x0, x1]), 0);
+        svdupq_lane_s64(op, 0)
+    }
+}
+#[doc = "Broadcast a quadword of scalars"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdupq[_n]_u64)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svdupq_n_u64(x0: u64, x1: u64) -> svuint64_t {
+    unsafe { svdupq_n_s64(x0.as_signed(), x1.as_signed()).as_unsigned() }
+}
+#[doc = "Broadcast a quadword of scalars"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdupq[_n]_s16)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svdupq_n_s16(
+    x0: i16,
+    x1: i16,
+    x2: i16,
+    x3: i16,
+    x4: i16,
+    x5: i16,
+    x6: i16,
+    x7: i16,
+) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.vector.insert.nxv8i16.v8i16"
+        )]
+        fn _svdupq_n_s16(op0: svint16_t, op1: int16x8_t, idx: i64) -> svint16_t;
+    }
+    unsafe {
+        let op = _svdupq_n_s16(
+            svundef_s16(),
+            crate::mem::transmute([x0, x1, x2, x3, x4, x5, x6, x7]),
+            0,
+        );
+        svdupq_lane_s16(op, 0)
+    }
+}
+#[doc = "Broadcast a quadword of scalars"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdupq[_n]_u16)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svdupq_n_u16(
+    x0: u16,
+    x1: u16,
+    x2: u16,
+    x3: u16,
+    x4: u16,
+    x5: u16,
+    x6: u16,
+    x7: u16,
+) -> svuint16_t {
+    unsafe {
+        svdupq_n_s16(
+            x0.as_signed(),
+            x1.as_signed(),
+            x2.as_signed(),
+            x3.as_signed(),
+            x4.as_signed(),
+            x5.as_signed(),
+            x6.as_signed(),
+            x7.as_signed(),
+        )
+        .as_unsigned()
+    }
+}
+#[doc = "Broadcast a quadword of scalars"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdupq[_n]_s8)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svdupq_n_s8(
+    x0: i8,
+    x1: i8,
+    x2: i8,
+    x3: i8,
+    x4: i8,
+    x5: i8,
+    x6: i8,
+    x7: i8,
+    x8: i8,
+    x9: i8,
+    x10: i8,
+    x11: i8,
+    x12: i8,
+    x13: i8,
+    x14: i8,
+    x15: i8,
+) -> svint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.vector.insert.nxv16i8.v16i8"
+        )]
+        fn _svdupq_n_s8(op0: svint8_t, op1: int8x16_t, idx: i64) -> svint8_t;
+    }
+    unsafe {
+        let op = _svdupq_n_s8(
+            svundef_s8(),
+            crate::mem::transmute([
+                x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15,
+            ]),
+            0,
+        );
+        svdupq_lane_s8(op, 0)
+    }
+}
+#[doc = "Broadcast a quadword of scalars"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svdupq[_n]_u8)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svdupq_n_u8(
+    x0: u8,
+    x1: u8,
+    x2: u8,
+    x3: u8,
+    x4: u8,
+    x5: u8,
+    x6: u8,
+    x7: u8,
+    x8: u8,
+    x9: u8,
+    x10: u8,
+    x11: u8,
+    x12: u8,
+    x13: u8,
+    x14: u8,
+    x15: u8,
+) -> svuint8_t {
+    unsafe {
+        svdupq_n_s8(
+            x0.as_signed(),
+            x1.as_signed(),
+            x2.as_signed(),
+            x3.as_signed(),
+            x4.as_signed(),
+            x5.as_signed(),
+            x6.as_signed(),
+            x7.as_signed(),
+            x8.as_signed(),
+            x9.as_signed(),
+            x10.as_signed(),
+            x11.as_signed(),
+            x12.as_signed(),
+            x13.as_signed(),
+            x14.as_signed(),
+            x15.as_signed(),
+        )
+        .as_unsigned()
+    }
+}
+#[doc = "Bitwise exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveor[_b]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eor))]
+pub fn sveor_b_z(pg: svbool_t, op1: svbool_t, op2: svbool_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.eor.z.nvx16i1")]
+        fn _sveor_b_z(pg: svbool_t, op1: svbool_t, op2: svbool_t) -> svbool_t;
+    }
+    unsafe { _sveor_b_z(pg, op1, op2) }
+}
+#[doc = "Bitwise exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveor[_s8]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eor))]
+pub fn sveor_s8_m(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.eor.nxv16i8")]
+        fn _sveor_s8_m(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t;
+    }
+    unsafe { _sveor_s8_m(pg, op1, op2) }
+}
+#[doc = "Bitwise exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveor[_n_s8]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eor))]
+pub fn sveor_n_s8_m(pg: svbool_t, op1: svint8_t, op2: i8) -> svint8_t {
+    sveor_s8_m(pg, op1, svdup_n_s8(op2))
+}
+#[doc = "Bitwise exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveor[_s8]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eor))]
+pub fn sveor_s8_x(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t {
+    sveor_s8_m(pg, op1, op2)
+}
+#[doc = "Bitwise exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveor[_n_s8]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eor))]
+pub fn sveor_n_s8_x(pg: svbool_t, op1: svint8_t, op2: i8) -> svint8_t {
+    sveor_s8_x(pg, op1, svdup_n_s8(op2))
+}
+#[doc = "Bitwise exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveor[_s8]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eor))]
+pub fn sveor_s8_z(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t {
+    sveor_s8_m(pg, svsel_s8(pg, op1, svdup_n_s8(0)), op2)
+}
+#[doc = "Bitwise exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveor[_n_s8]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eor))]
+pub fn sveor_n_s8_z(pg: svbool_t, op1: svint8_t, op2: i8) -> svint8_t {
+    sveor_s8_z(pg, op1, svdup_n_s8(op2))
+}
+#[doc = "Bitwise exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveor[_s16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eor))]
+pub fn sveor_s16_m(pg: svbool_t, op1: svint16_t, op2: svint16_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.eor.nxv8i16")]
+        fn _sveor_s16_m(pg: svbool8_t, op1: svint16_t, op2: svint16_t) -> svint16_t;
+    }
+    unsafe { _sveor_s16_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Bitwise exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveor[_n_s16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eor))]
+pub fn sveor_n_s16_m(pg: svbool_t, op1: svint16_t, op2: i16) -> svint16_t {
+    sveor_s16_m(pg, op1, svdup_n_s16(op2))
+}
+#[doc = "Bitwise exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveor[_s16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eor))]
+pub fn sveor_s16_x(pg: svbool_t, op1: svint16_t, op2: svint16_t) -> svint16_t {
+    sveor_s16_m(pg, op1, op2)
+}
+#[doc = "Bitwise exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveor[_n_s16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eor))]
+pub fn sveor_n_s16_x(pg: svbool_t, op1: svint16_t, op2: i16) -> svint16_t {
+    sveor_s16_x(pg, op1, svdup_n_s16(op2))
+}
+#[doc = "Bitwise exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveor[_s16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eor))]
+pub fn sveor_s16_z(pg: svbool_t, op1: svint16_t, op2: svint16_t) -> svint16_t {
+    sveor_s16_m(pg, svsel_s16(pg, op1, svdup_n_s16(0)), op2)
+}
+#[doc = "Bitwise exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveor[_n_s16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eor))]
+pub fn sveor_n_s16_z(pg: svbool_t, op1: svint16_t, op2: i16) -> svint16_t {
+    sveor_s16_z(pg, op1, svdup_n_s16(op2))
+}
+#[doc = "Bitwise exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveor[_s32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eor))]
+pub fn sveor_s32_m(pg: svbool_t, op1: svint32_t, op2: svint32_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.eor.nxv4i32")]
+        fn _sveor_s32_m(pg: svbool4_t, op1: svint32_t, op2: svint32_t) -> svint32_t;
+    }
+    unsafe { _sveor_s32_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Bitwise exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveor[_n_s32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eor))]
+pub fn sveor_n_s32_m(pg: svbool_t, op1: svint32_t, op2: i32) -> svint32_t {
+    sveor_s32_m(pg, op1, svdup_n_s32(op2))
+}
+#[doc = "Bitwise exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveor[_s32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eor))]
+pub fn sveor_s32_x(pg: svbool_t, op1: svint32_t, op2: svint32_t) -> svint32_t {
+    sveor_s32_m(pg, op1, op2)
+}
+#[doc = "Bitwise exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveor[_n_s32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eor))]
+pub fn sveor_n_s32_x(pg: svbool_t, op1: svint32_t, op2: i32) -> svint32_t {
+    sveor_s32_x(pg, op1, svdup_n_s32(op2))
+}
+#[doc = "Bitwise exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveor[_s32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eor))]
+pub fn sveor_s32_z(pg: svbool_t, op1: svint32_t, op2: svint32_t) -> svint32_t {
+    sveor_s32_m(pg, svsel_s32(pg, op1, svdup_n_s32(0)), op2)
+}
+#[doc = "Bitwise exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveor[_n_s32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eor))]
+pub fn sveor_n_s32_z(pg: svbool_t, op1: svint32_t, op2: i32) -> svint32_t {
+    sveor_s32_z(pg, op1, svdup_n_s32(op2))
+}
+#[doc = "Bitwise exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveor[_s64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eor))]
+pub fn sveor_s64_m(pg: svbool_t, op1: svint64_t, op2: svint64_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.eor.nxv2i64")]
+        fn _sveor_s64_m(pg: svbool2_t, op1: svint64_t, op2: svint64_t) -> svint64_t;
+    }
+    unsafe { _sveor_s64_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Bitwise exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveor[_n_s64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eor))]
+pub fn sveor_n_s64_m(pg: svbool_t, op1: svint64_t, op2: i64) -> svint64_t {
+    sveor_s64_m(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Bitwise exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveor[_s64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eor))]
+pub fn sveor_s64_x(pg: svbool_t, op1: svint64_t, op2: svint64_t) -> svint64_t {
+    sveor_s64_m(pg, op1, op2)
+}
+#[doc = "Bitwise exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveor[_n_s64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eor))]
+pub fn sveor_n_s64_x(pg: svbool_t, op1: svint64_t, op2: i64) -> svint64_t {
+    sveor_s64_x(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Bitwise exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveor[_s64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eor))]
+pub fn sveor_s64_z(pg: svbool_t, op1: svint64_t, op2: svint64_t) -> svint64_t {
+    sveor_s64_m(pg, svsel_s64(pg, op1, svdup_n_s64(0)), op2)
+}
+#[doc = "Bitwise exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveor[_n_s64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eor))]
+pub fn sveor_n_s64_z(pg: svbool_t, op1: svint64_t, op2: i64) -> svint64_t {
+    sveor_s64_z(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Bitwise exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveor[_u8]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eor))]
+pub fn sveor_u8_m(pg: svbool_t, op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    unsafe { sveor_s8_m(pg, op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Bitwise exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveor[_n_u8]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eor))]
+pub fn sveor_n_u8_m(pg: svbool_t, op1: svuint8_t, op2: u8) -> svuint8_t {
+    sveor_u8_m(pg, op1, svdup_n_u8(op2))
+}
+#[doc = "Bitwise exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveor[_u8]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eor))]
+pub fn sveor_u8_x(pg: svbool_t, op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    sveor_u8_m(pg, op1, op2)
+}
+#[doc = "Bitwise exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveor[_n_u8]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eor))]
+pub fn sveor_n_u8_x(pg: svbool_t, op1: svuint8_t, op2: u8) -> svuint8_t {
+    sveor_u8_x(pg, op1, svdup_n_u8(op2))
+}
+#[doc = "Bitwise exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveor[_u8]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eor))]
+pub fn sveor_u8_z(pg: svbool_t, op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    sveor_u8_m(pg, svsel_u8(pg, op1, svdup_n_u8(0)), op2)
+}
+#[doc = "Bitwise exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveor[_n_u8]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eor))]
+pub fn sveor_n_u8_z(pg: svbool_t, op1: svuint8_t, op2: u8) -> svuint8_t {
+    sveor_u8_z(pg, op1, svdup_n_u8(op2))
+}
+#[doc = "Bitwise exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveor[_u16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eor))]
+pub fn sveor_u16_m(pg: svbool_t, op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    unsafe { sveor_s16_m(pg, op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Bitwise exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveor[_n_u16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eor))]
+pub fn sveor_n_u16_m(pg: svbool_t, op1: svuint16_t, op2: u16) -> svuint16_t {
+    sveor_u16_m(pg, op1, svdup_n_u16(op2))
+}
+#[doc = "Bitwise exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveor[_u16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eor))]
+pub fn sveor_u16_x(pg: svbool_t, op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    sveor_u16_m(pg, op1, op2)
+}
+#[doc = "Bitwise exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveor[_n_u16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eor))]
+pub fn sveor_n_u16_x(pg: svbool_t, op1: svuint16_t, op2: u16) -> svuint16_t {
+    sveor_u16_x(pg, op1, svdup_n_u16(op2))
+}
+#[doc = "Bitwise exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveor[_u16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eor))]
+pub fn sveor_u16_z(pg: svbool_t, op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    sveor_u16_m(pg, svsel_u16(pg, op1, svdup_n_u16(0)), op2)
+}
+#[doc = "Bitwise exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveor[_n_u16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eor))]
+pub fn sveor_n_u16_z(pg: svbool_t, op1: svuint16_t, op2: u16) -> svuint16_t {
+    sveor_u16_z(pg, op1, svdup_n_u16(op2))
+}
+#[doc = "Bitwise exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveor[_u32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eor))]
+pub fn sveor_u32_m(pg: svbool_t, op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    unsafe { sveor_s32_m(pg, op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Bitwise exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveor[_n_u32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eor))]
+pub fn sveor_n_u32_m(pg: svbool_t, op1: svuint32_t, op2: u32) -> svuint32_t {
+    sveor_u32_m(pg, op1, svdup_n_u32(op2))
+}
+#[doc = "Bitwise exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveor[_u32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eor))]
+pub fn sveor_u32_x(pg: svbool_t, op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    sveor_u32_m(pg, op1, op2)
+}
+#[doc = "Bitwise exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveor[_n_u32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eor))]
+pub fn sveor_n_u32_x(pg: svbool_t, op1: svuint32_t, op2: u32) -> svuint32_t {
+    sveor_u32_x(pg, op1, svdup_n_u32(op2))
+}
+#[doc = "Bitwise exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveor[_u32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eor))]
+pub fn sveor_u32_z(pg: svbool_t, op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    sveor_u32_m(pg, svsel_u32(pg, op1, svdup_n_u32(0)), op2)
+}
+#[doc = "Bitwise exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveor[_n_u32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eor))]
+pub fn sveor_n_u32_z(pg: svbool_t, op1: svuint32_t, op2: u32) -> svuint32_t {
+    sveor_u32_z(pg, op1, svdup_n_u32(op2))
+}
+#[doc = "Bitwise exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveor[_u64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eor))]
+pub fn sveor_u64_m(pg: svbool_t, op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    unsafe { sveor_s64_m(pg, op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Bitwise exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveor[_n_u64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eor))]
+pub fn sveor_n_u64_m(pg: svbool_t, op1: svuint64_t, op2: u64) -> svuint64_t {
+    sveor_u64_m(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Bitwise exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveor[_u64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eor))]
+pub fn sveor_u64_x(pg: svbool_t, op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    sveor_u64_m(pg, op1, op2)
+}
+#[doc = "Bitwise exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveor[_n_u64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eor))]
+pub fn sveor_n_u64_x(pg: svbool_t, op1: svuint64_t, op2: u64) -> svuint64_t {
+    sveor_u64_x(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Bitwise exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveor[_u64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eor))]
+pub fn sveor_u64_z(pg: svbool_t, op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    sveor_u64_m(pg, svsel_u64(pg, op1, svdup_n_u64(0)), op2)
+}
+#[doc = "Bitwise exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveor[_n_u64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eor))]
+pub fn sveor_n_u64_z(pg: svbool_t, op1: svuint64_t, op2: u64) -> svuint64_t {
+    sveor_u64_z(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Bitwise exclusive OR reduction to scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveorv[_s8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eorv))]
+pub fn sveorv_s8(pg: svbool_t, op: svint8_t) -> i8 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.eorv.nxv16i8")]
+        fn _sveorv_s8(pg: svbool_t, op: svint8_t) -> i8;
+    }
+    unsafe { _sveorv_s8(pg, op) }
+}
+#[doc = "Bitwise exclusive OR reduction to scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveorv[_s16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eorv))]
+pub fn sveorv_s16(pg: svbool_t, op: svint16_t) -> i16 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.eorv.nxv8i16")]
+        fn _sveorv_s16(pg: svbool8_t, op: svint16_t) -> i16;
+    }
+    unsafe { _sveorv_s16(pg.sve_into(), op) }
+}
+#[doc = "Bitwise exclusive OR reduction to scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveorv[_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eorv))]
+pub fn sveorv_s32(pg: svbool_t, op: svint32_t) -> i32 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.eorv.nxv4i32")]
+        fn _sveorv_s32(pg: svbool4_t, op: svint32_t) -> i32;
+    }
+    unsafe { _sveorv_s32(pg.sve_into(), op) }
+}
+#[doc = "Bitwise exclusive OR reduction to scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveorv[_s64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eorv))]
+pub fn sveorv_s64(pg: svbool_t, op: svint64_t) -> i64 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.eorv.nxv2i64")]
+        fn _sveorv_s64(pg: svbool2_t, op: svint64_t) -> i64;
+    }
+    unsafe { _sveorv_s64(pg.sve_into(), op) }
+}
+#[doc = "Bitwise exclusive OR reduction to scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveorv[_u8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eorv))]
+pub fn sveorv_u8(pg: svbool_t, op: svuint8_t) -> u8 {
+    unsafe { sveorv_s8(pg, op.as_signed()).as_unsigned() }
+}
+#[doc = "Bitwise exclusive OR reduction to scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveorv[_u16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eorv))]
+pub fn sveorv_u16(pg: svbool_t, op: svuint16_t) -> u16 {
+    unsafe { sveorv_s16(pg, op.as_signed()).as_unsigned() }
+}
+#[doc = "Bitwise exclusive OR reduction to scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveorv[_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eorv))]
+pub fn sveorv_u32(pg: svbool_t, op: svuint32_t) -> u32 {
+    unsafe { sveorv_s32(pg, op.as_signed()).as_unsigned() }
+}
+#[doc = "Bitwise exclusive OR reduction to scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveorv[_u64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eorv))]
+pub fn sveorv_u64(pg: svbool_t, op: svuint64_t) -> u64 {
+    unsafe { sveorv_s64(pg, op.as_signed()).as_unsigned() }
+}
+#[doc = "Floating-point exponential accelerator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svexpa[_f32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fexpa))]
+pub fn svexpa_f32(op: svuint32_t) -> svfloat32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.fexpa.x.nxv4f32 "
+        )]
+        fn _svexpa_f32(op: svint32_t) -> svfloat32_t;
+    }
+    unsafe { _svexpa_f32(op.as_signed()) }
+}
+#[doc = "Floating-point exponential accelerator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svexpa[_f64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fexpa))]
+pub fn svexpa_f64(op: svuint64_t) -> svfloat64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.fexpa.x.nxv2f64 "
+        )]
+        fn _svexpa_f64(op: svint64_t) -> svfloat64_t;
+    }
+    unsafe { _svexpa_f64(op.as_signed()) }
+}
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svext[_f32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ext, IMM3 = 1))]
+pub fn svext_f32<const IMM3: i32>(op1: svfloat32_t, op2: svfloat32_t) -> svfloat32_t {
+    static_assert_range!(IMM3, 0..=63);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ext.nxv4f32")]
+        fn _svext_f32(op1: svfloat32_t, op2: svfloat32_t, imm3: i32) -> svfloat32_t;
+    }
+    unsafe { _svext_f32(op1, op2, IMM3) }
+}
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svext[_f64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ext, IMM3 = 1))]
+pub fn svext_f64<const IMM3: i32>(op1: svfloat64_t, op2: svfloat64_t) -> svfloat64_t {
+    static_assert_range!(IMM3, 0..=31);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ext.nxv2f64")]
+        fn _svext_f64(op1: svfloat64_t, op2: svfloat64_t, imm3: i32) -> svfloat64_t;
+    }
+    unsafe { _svext_f64(op1, op2, IMM3) }
+}
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svext[_s8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ext, IMM3 = 1))]
+pub fn svext_s8<const IMM3: i32>(op1: svint8_t, op2: svint8_t) -> svint8_t {
+    static_assert_range!(IMM3, 0..=255);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ext.nxv16i8")]
+        fn _svext_s8(op1: svint8_t, op2: svint8_t, imm3: i32) -> svint8_t;
+    }
+    unsafe { _svext_s8(op1, op2, IMM3) }
+}
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svext[_s16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ext, IMM3 = 1))]
+pub fn svext_s16<const IMM3: i32>(op1: svint16_t, op2: svint16_t) -> svint16_t {
+    static_assert_range!(IMM3, 0..=127);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ext.nxv8i16")]
+        fn _svext_s16(op1: svint16_t, op2: svint16_t, imm3: i32) -> svint16_t;
+    }
+    unsafe { _svext_s16(op1, op2, IMM3) }
+}
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svext[_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ext, IMM3 = 1))]
+pub fn svext_s32<const IMM3: i32>(op1: svint32_t, op2: svint32_t) -> svint32_t {
+    static_assert_range!(IMM3, 0..=63);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ext.nxv4i32")]
+        fn _svext_s32(op1: svint32_t, op2: svint32_t, imm3: i32) -> svint32_t;
+    }
+    unsafe { _svext_s32(op1, op2, IMM3) }
+}
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svext[_s64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ext, IMM3 = 1))]
+pub fn svext_s64<const IMM3: i32>(op1: svint64_t, op2: svint64_t) -> svint64_t {
+    static_assert_range!(IMM3, 0..=31);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ext.nxv2i64")]
+        fn _svext_s64(op1: svint64_t, op2: svint64_t, imm3: i32) -> svint64_t;
+    }
+    unsafe { _svext_s64(op1, op2, IMM3) }
+}
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svext[_u8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ext, IMM3 = 1))]
+pub fn svext_u8<const IMM3: i32>(op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    static_assert_range!(IMM3, 0..=255);
+    unsafe { svext_s8::<IMM3>(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svext[_u16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ext, IMM3 = 1))]
+pub fn svext_u16<const IMM3: i32>(op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    static_assert_range!(IMM3, 0..=127);
+    unsafe { svext_s16::<IMM3>(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svext[_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ext, IMM3 = 1))]
+pub fn svext_u32<const IMM3: i32>(op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    static_assert_range!(IMM3, 0..=63);
+    unsafe { svext_s32::<IMM3>(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Extract vector from pair of vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svext[_u64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ext, IMM3 = 1))]
+pub fn svext_u64<const IMM3: i32>(op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    static_assert_range!(IMM3, 0..=31);
+    unsafe { svext_s64::<IMM3>(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Sign-extend the low 8 bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svextb[_s16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sxtb))]
+pub fn svextb_s16_m(inactive: svint16_t, pg: svbool_t, op: svint16_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sxtb.nxv8i16")]
+        fn _svextb_s16_m(inactive: svint16_t, pg: svbool8_t, op: svint16_t) -> svint16_t;
+    }
+    unsafe { _svextb_s16_m(inactive, pg.sve_into(), op) }
+}
+#[doc = "Sign-extend the low 8 bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svextb[_s16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sxtb))]
+pub fn svextb_s16_x(pg: svbool_t, op: svint16_t) -> svint16_t {
+    svextb_s16_m(op, pg, op)
+}
+#[doc = "Sign-extend the low 8 bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svextb[_s16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sxtb))]
+pub fn svextb_s16_z(pg: svbool_t, op: svint16_t) -> svint16_t {
+    svextb_s16_m(svdup_n_s16(0), pg, op)
+}
+#[doc = "Sign-extend the low 8 bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svextb[_s32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sxtb))]
+pub fn svextb_s32_m(inactive: svint32_t, pg: svbool_t, op: svint32_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sxtb.nxv4i32")]
+        fn _svextb_s32_m(inactive: svint32_t, pg: svbool4_t, op: svint32_t) -> svint32_t;
+    }
+    unsafe { _svextb_s32_m(inactive, pg.sve_into(), op) }
+}
+#[doc = "Sign-extend the low 8 bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svextb[_s32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sxtb))]
+pub fn svextb_s32_x(pg: svbool_t, op: svint32_t) -> svint32_t {
+    svextb_s32_m(op, pg, op)
+}
+#[doc = "Sign-extend the low 8 bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svextb[_s32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sxtb))]
+pub fn svextb_s32_z(pg: svbool_t, op: svint32_t) -> svint32_t {
+    svextb_s32_m(svdup_n_s32(0), pg, op)
+}
+#[doc = "Sign-extend the low 16 bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svexth[_s32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sxth))]
+pub fn svexth_s32_m(inactive: svint32_t, pg: svbool_t, op: svint32_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sxth.nxv4i32")]
+        fn _svexth_s32_m(inactive: svint32_t, pg: svbool4_t, op: svint32_t) -> svint32_t;
+    }
+    unsafe { _svexth_s32_m(inactive, pg.sve_into(), op) }
+}
+#[doc = "Sign-extend the low 16 bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svexth[_s32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sxth))]
+pub fn svexth_s32_x(pg: svbool_t, op: svint32_t) -> svint32_t {
+    svexth_s32_m(op, pg, op)
+}
+#[doc = "Sign-extend the low 16 bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svexth[_s32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sxth))]
+pub fn svexth_s32_z(pg: svbool_t, op: svint32_t) -> svint32_t {
+    svexth_s32_m(svdup_n_s32(0), pg, op)
+}
+#[doc = "Sign-extend the low 8 bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svextb[_s64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sxtb))]
+pub fn svextb_s64_m(inactive: svint64_t, pg: svbool_t, op: svint64_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sxtb.nxv2i64")]
+        fn _svextb_s64_m(inactive: svint64_t, pg: svbool2_t, op: svint64_t) -> svint64_t;
+    }
+    unsafe { _svextb_s64_m(inactive, pg.sve_into(), op) }
+}
+#[doc = "Sign-extend the low 8 bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svextb[_s64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sxtb))]
+pub fn svextb_s64_x(pg: svbool_t, op: svint64_t) -> svint64_t {
+    svextb_s64_m(op, pg, op)
+}
+#[doc = "Sign-extend the low 8 bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svextb[_s64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sxtb))]
+pub fn svextb_s64_z(pg: svbool_t, op: svint64_t) -> svint64_t {
+    svextb_s64_m(svdup_n_s64(0), pg, op)
+}
+#[doc = "Sign-extend the low 16 bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svexth[_s64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sxth))]
+pub fn svexth_s64_m(inactive: svint64_t, pg: svbool_t, op: svint64_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sxth.nxv2i64")]
+        fn _svexth_s64_m(inactive: svint64_t, pg: svbool2_t, op: svint64_t) -> svint64_t;
+    }
+    unsafe { _svexth_s64_m(inactive, pg.sve_into(), op) }
+}
+#[doc = "Sign-extend the low 16 bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svexth[_s64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sxth))]
+pub fn svexth_s64_x(pg: svbool_t, op: svint64_t) -> svint64_t {
+    svexth_s64_m(op, pg, op)
+}
+#[doc = "Sign-extend the low 16 bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svexth[_s64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sxth))]
+pub fn svexth_s64_z(pg: svbool_t, op: svint64_t) -> svint64_t {
+    svexth_s64_m(svdup_n_s64(0), pg, op)
+}
+#[doc = "Sign-extend the low 32 bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svextw[_s64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sxtw))]
+pub fn svextw_s64_m(inactive: svint64_t, pg: svbool_t, op: svint64_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sxtw.nxv2i64")]
+        fn _svextw_s64_m(inactive: svint64_t, pg: svbool2_t, op: svint64_t) -> svint64_t;
+    }
+    unsafe { _svextw_s64_m(inactive, pg.sve_into(), op) }
+}
+#[doc = "Sign-extend the low 32 bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svextw[_s64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sxtw))]
+pub fn svextw_s64_x(pg: svbool_t, op: svint64_t) -> svint64_t {
+    svextw_s64_m(op, pg, op)
+}
+#[doc = "Sign-extend the low 32 bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svextw[_s64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sxtw))]
+pub fn svextw_s64_z(pg: svbool_t, op: svint64_t) -> svint64_t {
+    svextw_s64_m(svdup_n_s64(0), pg, op)
+}
+#[doc = "Zero-extend the low 8 bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svextb[_u16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uxtb))]
+pub fn svextb_u16_m(inactive: svuint16_t, pg: svbool_t, op: svuint16_t) -> svuint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uxtb.nxv8i16")]
+        fn _svextb_u16_m(inactive: svint16_t, pg: svbool8_t, op: svint16_t) -> svint16_t;
+    }
+    unsafe { _svextb_u16_m(inactive.as_signed(), pg.sve_into(), op.as_signed()).as_unsigned() }
+}
+#[doc = "Zero-extend the low 8 bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svextb[_u16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uxtb))]
+pub fn svextb_u16_x(pg: svbool_t, op: svuint16_t) -> svuint16_t {
+    svextb_u16_m(op, pg, op)
+}
+#[doc = "Zero-extend the low 8 bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svextb[_u16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uxtb))]
+pub fn svextb_u16_z(pg: svbool_t, op: svuint16_t) -> svuint16_t {
+    svextb_u16_m(svdup_n_u16(0), pg, op)
+}
+#[doc = "Zero-extend the low 8 bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svextb[_u32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uxtb))]
+pub fn svextb_u32_m(inactive: svuint32_t, pg: svbool_t, op: svuint32_t) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uxtb.nxv4i32")]
+        fn _svextb_u32_m(inactive: svint32_t, pg: svbool4_t, op: svint32_t) -> svint32_t;
+    }
+    unsafe { _svextb_u32_m(inactive.as_signed(), pg.sve_into(), op.as_signed()).as_unsigned() }
+}
+#[doc = "Zero-extend the low 8 bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svextb[_u32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uxtb))]
+pub fn svextb_u32_x(pg: svbool_t, op: svuint32_t) -> svuint32_t {
+    svextb_u32_m(op, pg, op)
+}
+#[doc = "Zero-extend the low 8 bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svextb[_u32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uxtb))]
+pub fn svextb_u32_z(pg: svbool_t, op: svuint32_t) -> svuint32_t {
+    svextb_u32_m(svdup_n_u32(0), pg, op)
+}
+#[doc = "Zero-extend the low 16 bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svexth[_u32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uxth))]
+pub fn svexth_u32_m(inactive: svuint32_t, pg: svbool_t, op: svuint32_t) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uxth.nxv4i32")]
+        fn _svexth_u32_m(inactive: svint32_t, pg: svbool4_t, op: svint32_t) -> svint32_t;
+    }
+    unsafe { _svexth_u32_m(inactive.as_signed(), pg.sve_into(), op.as_signed()).as_unsigned() }
+}
+#[doc = "Zero-extend the low 16 bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svexth[_u32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uxth))]
+pub fn svexth_u32_x(pg: svbool_t, op: svuint32_t) -> svuint32_t {
+    svexth_u32_m(op, pg, op)
+}
+#[doc = "Zero-extend the low 16 bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svexth[_u32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uxth))]
+pub fn svexth_u32_z(pg: svbool_t, op: svuint32_t) -> svuint32_t {
+    svexth_u32_m(svdup_n_u32(0), pg, op)
+}
+#[doc = "Zero-extend the low 8 bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svextb[_u64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uxtb))]
+pub fn svextb_u64_m(inactive: svuint64_t, pg: svbool_t, op: svuint64_t) -> svuint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uxtb.nxv2i64")]
+        fn _svextb_u64_m(inactive: svint64_t, pg: svbool2_t, op: svint64_t) -> svint64_t;
+    }
+    unsafe { _svextb_u64_m(inactive.as_signed(), pg.sve_into(), op.as_signed()).as_unsigned() }
+}
+#[doc = "Zero-extend the low 8 bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svextb[_u64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uxtb))]
+pub fn svextb_u64_x(pg: svbool_t, op: svuint64_t) -> svuint64_t {
+    svextb_u64_m(op, pg, op)
+}
+#[doc = "Zero-extend the low 8 bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svextb[_u64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uxtb))]
+pub fn svextb_u64_z(pg: svbool_t, op: svuint64_t) -> svuint64_t {
+    svextb_u64_m(svdup_n_u64(0), pg, op)
+}
+#[doc = "Zero-extend the low 16 bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svexth[_u64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uxth))]
+pub fn svexth_u64_m(inactive: svuint64_t, pg: svbool_t, op: svuint64_t) -> svuint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uxth.nxv2i64")]
+        fn _svexth_u64_m(inactive: svint64_t, pg: svbool2_t, op: svint64_t) -> svint64_t;
+    }
+    unsafe { _svexth_u64_m(inactive.as_signed(), pg.sve_into(), op.as_signed()).as_unsigned() }
+}
+#[doc = "Zero-extend the low 16 bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svexth[_u64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uxth))]
+pub fn svexth_u64_x(pg: svbool_t, op: svuint64_t) -> svuint64_t {
+    svexth_u64_m(op, pg, op)
+}
+#[doc = "Zero-extend the low 16 bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svexth[_u64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uxth))]
+pub fn svexth_u64_z(pg: svbool_t, op: svuint64_t) -> svuint64_t {
+    svexth_u64_m(svdup_n_u64(0), pg, op)
+}
+#[doc = "Zero-extend the low 32 bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svextw[_u64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uxtw))]
+pub fn svextw_u64_m(inactive: svuint64_t, pg: svbool_t, op: svuint64_t) -> svuint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uxtw.nxv2i64")]
+        fn _svextw_u64_m(inactive: svint64_t, pg: svbool2_t, op: svint64_t) -> svint64_t;
+    }
+    unsafe { _svextw_u64_m(inactive.as_signed(), pg.sve_into(), op.as_signed()).as_unsigned() }
+}
+#[doc = "Zero-extend the low 32 bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svextw[_u64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uxtw))]
+pub fn svextw_u64_x(pg: svbool_t, op: svuint64_t) -> svuint64_t {
+    svextw_u64_m(op, pg, op)
+}
+#[doc = "Zero-extend the low 32 bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svextw[_u64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uxtw))]
+pub fn svextw_u64_z(pg: svbool_t, op: svuint64_t) -> svuint64_t {
+    svextw_u64_m(svdup_n_u64(0), pg, op)
+}
+#[doc = "Extract one vector from a tuple of two vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svget2[_f32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svget2_f32<const IMM_INDEX: i32>(tuple: svfloat32x2_t) -> svfloat32_t {
+    static_assert_range!(IMM_INDEX, 0..=1);
+    unsafe { crate::intrinsics::simd::scalable::sve_tuple_get::<_, _, { IMM_INDEX }>(tuple) }
+}
+#[doc = "Extract one vector from a tuple of two vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svget2[_f64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svget2_f64<const IMM_INDEX: i32>(tuple: svfloat64x2_t) -> svfloat64_t {
+    static_assert_range!(IMM_INDEX, 0..=1);
+    unsafe { crate::intrinsics::simd::scalable::sve_tuple_get::<_, _, { IMM_INDEX }>(tuple) }
+}
+#[doc = "Extract one vector from a tuple of two vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svget2[_s8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svget2_s8<const IMM_INDEX: i32>(tuple: svint8x2_t) -> svint8_t {
+    static_assert_range!(IMM_INDEX, 0..=1);
+    unsafe { crate::intrinsics::simd::scalable::sve_tuple_get::<_, _, { IMM_INDEX }>(tuple) }
+}
+#[doc = "Extract one vector from a tuple of two vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svget2[_s16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svget2_s16<const IMM_INDEX: i32>(tuple: svint16x2_t) -> svint16_t {
+    static_assert_range!(IMM_INDEX, 0..=1);
+    unsafe { crate::intrinsics::simd::scalable::sve_tuple_get::<_, _, { IMM_INDEX }>(tuple) }
+}
+#[doc = "Extract one vector from a tuple of two vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svget2[_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svget2_s32<const IMM_INDEX: i32>(tuple: svint32x2_t) -> svint32_t {
+    static_assert_range!(IMM_INDEX, 0..=1);
+    unsafe { crate::intrinsics::simd::scalable::sve_tuple_get::<_, _, { IMM_INDEX }>(tuple) }
+}
+#[doc = "Extract one vector from a tuple of two vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svget2[_s64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svget2_s64<const IMM_INDEX: i32>(tuple: svint64x2_t) -> svint64_t {
+    static_assert_range!(IMM_INDEX, 0..=1);
+    unsafe { crate::intrinsics::simd::scalable::sve_tuple_get::<_, _, { IMM_INDEX }>(tuple) }
+}
+#[doc = "Extract one vector from a tuple of two vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svget2[_u8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svget2_u8<const IMM_INDEX: i32>(tuple: svuint8x2_t) -> svuint8_t {
+    static_assert_range!(IMM_INDEX, 0..=1);
+    unsafe { crate::intrinsics::simd::scalable::sve_tuple_get::<_, _, { IMM_INDEX }>(tuple) }
+}
+#[doc = "Extract one vector from a tuple of two vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svget2[_u16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svget2_u16<const IMM_INDEX: i32>(tuple: svuint16x2_t) -> svuint16_t {
+    static_assert_range!(IMM_INDEX, 0..=1);
+    unsafe { crate::intrinsics::simd::scalable::sve_tuple_get::<_, _, { IMM_INDEX }>(tuple) }
+}
+#[doc = "Extract one vector from a tuple of two vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svget2[_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svget2_u32<const IMM_INDEX: i32>(tuple: svuint32x2_t) -> svuint32_t {
+    static_assert_range!(IMM_INDEX, 0..=1);
+    unsafe { crate::intrinsics::simd::scalable::sve_tuple_get::<_, _, { IMM_INDEX }>(tuple) }
+}
+#[doc = "Extract one vector from a tuple of two vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svget2[_u64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svget2_u64<const IMM_INDEX: i32>(tuple: svuint64x2_t) -> svuint64_t {
+    static_assert_range!(IMM_INDEX, 0..=1);
+    unsafe { crate::intrinsics::simd::scalable::sve_tuple_get::<_, _, { IMM_INDEX }>(tuple) }
+}
+#[doc = "Extract one vector from a tuple of three vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svget3[_f32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svget3_f32<const IMM_INDEX: i32>(tuple: svfloat32x3_t) -> svfloat32_t {
+    static_assert_range!(IMM_INDEX, 0..=2);
+    unsafe { crate::intrinsics::simd::scalable::sve_tuple_get::<_, _, { IMM_INDEX }>(tuple) }
+}
+#[doc = "Extract one vector from a tuple of three vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svget3[_f64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svget3_f64<const IMM_INDEX: i32>(tuple: svfloat64x3_t) -> svfloat64_t {
+    static_assert_range!(IMM_INDEX, 0..=2);
+    unsafe { crate::intrinsics::simd::scalable::sve_tuple_get::<_, _, { IMM_INDEX }>(tuple) }
+}
+#[doc = "Extract one vector from a tuple of three vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svget3[_s8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svget3_s8<const IMM_INDEX: i32>(tuple: svint8x3_t) -> svint8_t {
+    static_assert_range!(IMM_INDEX, 0..=2);
+    unsafe { crate::intrinsics::simd::scalable::sve_tuple_get::<_, _, { IMM_INDEX }>(tuple) }
+}
+#[doc = "Extract one vector from a tuple of three vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svget3[_s16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svget3_s16<const IMM_INDEX: i32>(tuple: svint16x3_t) -> svint16_t {
+    static_assert_range!(IMM_INDEX, 0..=2);
+    unsafe { crate::intrinsics::simd::scalable::sve_tuple_get::<_, _, { IMM_INDEX }>(tuple) }
+}
+#[doc = "Extract one vector from a tuple of three vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svget3[_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svget3_s32<const IMM_INDEX: i32>(tuple: svint32x3_t) -> svint32_t {
+    static_assert_range!(IMM_INDEX, 0..=2);
+    unsafe { crate::intrinsics::simd::scalable::sve_tuple_get::<_, _, { IMM_INDEX }>(tuple) }
+}
+#[doc = "Extract one vector from a tuple of three vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svget3[_s64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svget3_s64<const IMM_INDEX: i32>(tuple: svint64x3_t) -> svint64_t {
+    static_assert_range!(IMM_INDEX, 0..=2);
+    unsafe { crate::intrinsics::simd::scalable::sve_tuple_get::<_, _, { IMM_INDEX }>(tuple) }
+}
+#[doc = "Extract one vector from a tuple of three vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svget3[_u8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svget3_u8<const IMM_INDEX: i32>(tuple: svuint8x3_t) -> svuint8_t {
+    static_assert_range!(IMM_INDEX, 0..=2);
+    unsafe { crate::intrinsics::simd::scalable::sve_tuple_get::<_, _, { IMM_INDEX }>(tuple) }
+}
+#[doc = "Extract one vector from a tuple of three vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svget3[_u16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svget3_u16<const IMM_INDEX: i32>(tuple: svuint16x3_t) -> svuint16_t {
+    static_assert_range!(IMM_INDEX, 0..=2);
+    unsafe { crate::intrinsics::simd::scalable::sve_tuple_get::<_, _, { IMM_INDEX }>(tuple) }
+}
+#[doc = "Extract one vector from a tuple of three vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svget3[_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svget3_u32<const IMM_INDEX: i32>(tuple: svuint32x3_t) -> svuint32_t {
+    static_assert_range!(IMM_INDEX, 0..=2);
+    unsafe { crate::intrinsics::simd::scalable::sve_tuple_get::<_, _, { IMM_INDEX }>(tuple) }
+}
+#[doc = "Extract one vector from a tuple of three vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svget3[_u64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svget3_u64<const IMM_INDEX: i32>(tuple: svuint64x3_t) -> svuint64_t {
+    static_assert_range!(IMM_INDEX, 0..=2);
+    unsafe { crate::intrinsics::simd::scalable::sve_tuple_get::<_, _, { IMM_INDEX }>(tuple) }
+}
+#[doc = "Extract one vector from a tuple of four vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svget4[_f32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svget4_f32<const IMM_INDEX: i32>(tuple: svfloat32x4_t) -> svfloat32_t {
+    static_assert_range!(IMM_INDEX, 0..=3);
+    unsafe { crate::intrinsics::simd::scalable::sve_tuple_get::<_, _, { IMM_INDEX }>(tuple) }
+}
+#[doc = "Extract one vector from a tuple of four vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svget4[_f64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svget4_f64<const IMM_INDEX: i32>(tuple: svfloat64x4_t) -> svfloat64_t {
+    static_assert_range!(IMM_INDEX, 0..=3);
+    unsafe { crate::intrinsics::simd::scalable::sve_tuple_get::<_, _, { IMM_INDEX }>(tuple) }
+}
+#[doc = "Extract one vector from a tuple of four vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svget4[_s8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svget4_s8<const IMM_INDEX: i32>(tuple: svint8x4_t) -> svint8_t {
+    static_assert_range!(IMM_INDEX, 0..=3);
+    unsafe { crate::intrinsics::simd::scalable::sve_tuple_get::<_, _, { IMM_INDEX }>(tuple) }
+}
+#[doc = "Extract one vector from a tuple of four vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svget4[_s16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svget4_s16<const IMM_INDEX: i32>(tuple: svint16x4_t) -> svint16_t {
+    static_assert_range!(IMM_INDEX, 0..=3);
+    unsafe { crate::intrinsics::simd::scalable::sve_tuple_get::<_, _, { IMM_INDEX }>(tuple) }
+}
+#[doc = "Extract one vector from a tuple of four vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svget4[_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svget4_s32<const IMM_INDEX: i32>(tuple: svint32x4_t) -> svint32_t {
+    static_assert_range!(IMM_INDEX, 0..=3);
+    unsafe { crate::intrinsics::simd::scalable::sve_tuple_get::<_, _, { IMM_INDEX }>(tuple) }
+}
+#[doc = "Extract one vector from a tuple of four vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svget4[_s64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svget4_s64<const IMM_INDEX: i32>(tuple: svint64x4_t) -> svint64_t {
+    static_assert_range!(IMM_INDEX, 0..=3);
+    unsafe { crate::intrinsics::simd::scalable::sve_tuple_get::<_, _, { IMM_INDEX }>(tuple) }
+}
+#[doc = "Extract one vector from a tuple of four vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svget4[_u8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svget4_u8<const IMM_INDEX: i32>(tuple: svuint8x4_t) -> svuint8_t {
+    static_assert_range!(IMM_INDEX, 0..=3);
+    unsafe { crate::intrinsics::simd::scalable::sve_tuple_get::<_, _, { IMM_INDEX }>(tuple) }
+}
+#[doc = "Extract one vector from a tuple of four vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svget4[_u16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svget4_u16<const IMM_INDEX: i32>(tuple: svuint16x4_t) -> svuint16_t {
+    static_assert_range!(IMM_INDEX, 0..=3);
+    unsafe { crate::intrinsics::simd::scalable::sve_tuple_get::<_, _, { IMM_INDEX }>(tuple) }
+}
+#[doc = "Extract one vector from a tuple of four vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svget4[_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svget4_u32<const IMM_INDEX: i32>(tuple: svuint32x4_t) -> svuint32_t {
+    static_assert_range!(IMM_INDEX, 0..=3);
+    unsafe { crate::intrinsics::simd::scalable::sve_tuple_get::<_, _, { IMM_INDEX }>(tuple) }
+}
+#[doc = "Extract one vector from a tuple of four vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svget4[_u64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svget4_u64<const IMM_INDEX: i32>(tuple: svuint64x4_t) -> svuint64_t {
+    static_assert_range!(IMM_INDEX, 0..=3);
+    unsafe { crate::intrinsics::simd::scalable::sve_tuple_get::<_, _, { IMM_INDEX }>(tuple) }
+}
+#[doc = "Create linear series"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svindex_s8)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(index))]
+pub fn svindex_s8(base: i8, step: i8) -> svint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.index.nxv16i8")]
+        fn _svindex_s8(base: i8, step: i8) -> svint8_t;
+    }
+    unsafe { _svindex_s8(base, step) }
+}
+#[doc = "Create linear series"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svindex_s16)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(index))]
+pub fn svindex_s16(base: i16, step: i16) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.index.nxv8i16")]
+        fn _svindex_s16(base: i16, step: i16) -> svint16_t;
+    }
+    unsafe { _svindex_s16(base, step) }
+}
+#[doc = "Create linear series"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svindex_s32)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(index))]
+pub fn svindex_s32(base: i32, step: i32) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.index.nxv4i32")]
+        fn _svindex_s32(base: i32, step: i32) -> svint32_t;
+    }
+    unsafe { _svindex_s32(base, step) }
+}
+#[doc = "Create linear series"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svindex_s64)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(index))]
+pub fn svindex_s64(base: i64, step: i64) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.index.nxv2i64")]
+        fn _svindex_s64(base: i64, step: i64) -> svint64_t;
+    }
+    unsafe { _svindex_s64(base, step) }
+}
+#[doc = "Create linear series"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svindex_u8)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(index))]
+pub fn svindex_u8(base: u8, step: u8) -> svuint8_t {
+    unsafe { svindex_s8(base.as_signed(), step.as_signed()).as_unsigned() }
+}
+#[doc = "Create linear series"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svindex_u16)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(index))]
+pub fn svindex_u16(base: u16, step: u16) -> svuint16_t {
+    unsafe { svindex_s16(base.as_signed(), step.as_signed()).as_unsigned() }
+}
+#[doc = "Create linear series"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svindex_u32)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(index))]
+pub fn svindex_u32(base: u32, step: u32) -> svuint32_t {
+    unsafe { svindex_s32(base.as_signed(), step.as_signed()).as_unsigned() }
+}
+#[doc = "Create linear series"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svindex_u64)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(index))]
+pub fn svindex_u64(base: u64, step: u64) -> svuint64_t {
+    unsafe { svindex_s64(base.as_signed(), step.as_signed()).as_unsigned() }
+}
+#[doc = "Insert scalar in shifted vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svinsr[_n_f32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(insr))]
+pub fn svinsr_n_f32(op1: svfloat32_t, op2: f32) -> svfloat32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.insr.nxv4f32")]
+        fn _svinsr_n_f32(op1: svfloat32_t, op2: f32) -> svfloat32_t;
+    }
+    unsafe { _svinsr_n_f32(op1, op2) }
+}
+#[doc = "Insert scalar in shifted vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svinsr[_n_f64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(insr))]
+pub fn svinsr_n_f64(op1: svfloat64_t, op2: f64) -> svfloat64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.insr.nxv2f64")]
+        fn _svinsr_n_f64(op1: svfloat64_t, op2: f64) -> svfloat64_t;
+    }
+    unsafe { _svinsr_n_f64(op1, op2) }
+}
+#[doc = "Insert scalar in shifted vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svinsr[_n_s8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(insr))]
+pub fn svinsr_n_s8(op1: svint8_t, op2: i8) -> svint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.insr.nxv16i8")]
+        fn _svinsr_n_s8(op1: svint8_t, op2: i8) -> svint8_t;
+    }
+    unsafe { _svinsr_n_s8(op1, op2) }
+}
+#[doc = "Insert scalar in shifted vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svinsr[_n_s16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(insr))]
+pub fn svinsr_n_s16(op1: svint16_t, op2: i16) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.insr.nxv8i16")]
+        fn _svinsr_n_s16(op1: svint16_t, op2: i16) -> svint16_t;
+    }
+    unsafe { _svinsr_n_s16(op1, op2) }
+}
+#[doc = "Insert scalar in shifted vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svinsr[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(insr))]
+pub fn svinsr_n_s32(op1: svint32_t, op2: i32) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.insr.nxv4i32")]
+        fn _svinsr_n_s32(op1: svint32_t, op2: i32) -> svint32_t;
+    }
+    unsafe { _svinsr_n_s32(op1, op2) }
+}
+#[doc = "Insert scalar in shifted vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svinsr[_n_s64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(insr))]
+pub fn svinsr_n_s64(op1: svint64_t, op2: i64) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.insr.nxv2i64")]
+        fn _svinsr_n_s64(op1: svint64_t, op2: i64) -> svint64_t;
+    }
+    unsafe { _svinsr_n_s64(op1, op2) }
+}
+#[doc = "Insert scalar in shifted vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svinsr[_n_u8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(insr))]
+pub fn svinsr_n_u8(op1: svuint8_t, op2: u8) -> svuint8_t {
+    unsafe { svinsr_n_s8(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Insert scalar in shifted vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svinsr[_n_u16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(insr))]
+pub fn svinsr_n_u16(op1: svuint16_t, op2: u16) -> svuint16_t {
+    unsafe { svinsr_n_s16(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Insert scalar in shifted vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svinsr[_n_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(insr))]
+pub fn svinsr_n_u32(op1: svuint32_t, op2: u32) -> svuint32_t {
+    unsafe { svinsr_n_s32(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Insert scalar in shifted vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svinsr[_n_u64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(insr))]
+pub fn svinsr_n_u64(op1: svuint64_t, op2: u64) -> svuint64_t {
+    unsafe { svinsr_n_s64(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Extract element after last"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlasta[_f32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lasta))]
+pub fn svlasta_f32(pg: svbool_t, op: svfloat32_t) -> f32 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.lasta.nxv4f32")]
+        fn _svlasta_f32(pg: svbool4_t, op: svfloat32_t) -> f32;
+    }
+    unsafe { _svlasta_f32(pg.sve_into(), op) }
+}
+#[doc = "Extract element after last"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlasta[_f64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lasta))]
+pub fn svlasta_f64(pg: svbool_t, op: svfloat64_t) -> f64 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.lasta.nxv2f64")]
+        fn _svlasta_f64(pg: svbool2_t, op: svfloat64_t) -> f64;
+    }
+    unsafe { _svlasta_f64(pg.sve_into(), op) }
+}
+#[doc = "Extract element after last"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlasta[_s8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lasta))]
+pub fn svlasta_s8(pg: svbool_t, op: svint8_t) -> i8 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.lasta.nxv16i8")]
+        fn _svlasta_s8(pg: svbool_t, op: svint8_t) -> i8;
+    }
+    unsafe { _svlasta_s8(pg, op) }
+}
+#[doc = "Extract element after last"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlasta[_s16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lasta))]
+pub fn svlasta_s16(pg: svbool_t, op: svint16_t) -> i16 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.lasta.nxv8i16")]
+        fn _svlasta_s16(pg: svbool8_t, op: svint16_t) -> i16;
+    }
+    unsafe { _svlasta_s16(pg.sve_into(), op) }
+}
+#[doc = "Extract element after last"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlasta[_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lasta))]
+pub fn svlasta_s32(pg: svbool_t, op: svint32_t) -> i32 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.lasta.nxv4i32")]
+        fn _svlasta_s32(pg: svbool4_t, op: svint32_t) -> i32;
+    }
+    unsafe { _svlasta_s32(pg.sve_into(), op) }
+}
+#[doc = "Extract element after last"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlasta[_s64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lasta))]
+pub fn svlasta_s64(pg: svbool_t, op: svint64_t) -> i64 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.lasta.nxv2i64")]
+        fn _svlasta_s64(pg: svbool2_t, op: svint64_t) -> i64;
+    }
+    unsafe { _svlasta_s64(pg.sve_into(), op) }
+}
+#[doc = "Extract element after last"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlasta[_u8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lasta))]
+pub fn svlasta_u8(pg: svbool_t, op: svuint8_t) -> u8 {
+    unsafe { svlasta_s8(pg, op.as_signed()).as_unsigned() }
+}
+#[doc = "Extract element after last"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlasta[_u16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lasta))]
+pub fn svlasta_u16(pg: svbool_t, op: svuint16_t) -> u16 {
+    unsafe { svlasta_s16(pg, op.as_signed()).as_unsigned() }
+}
+#[doc = "Extract element after last"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlasta[_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lasta))]
+pub fn svlasta_u32(pg: svbool_t, op: svuint32_t) -> u32 {
+    unsafe { svlasta_s32(pg, op.as_signed()).as_unsigned() }
+}
+#[doc = "Extract element after last"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlasta[_u64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lasta))]
+pub fn svlasta_u64(pg: svbool_t, op: svuint64_t) -> u64 {
+    unsafe { svlasta_s64(pg, op.as_signed()).as_unsigned() }
+}
+#[doc = "Extract last element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlastb[_f32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lastb))]
+pub fn svlastb_f32(pg: svbool_t, op: svfloat32_t) -> f32 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.lastb.nxv4f32")]
+        fn _svlastb_f32(pg: svbool4_t, op: svfloat32_t) -> f32;
+    }
+    unsafe { _svlastb_f32(pg.sve_into(), op) }
+}
+#[doc = "Extract last element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlastb[_f64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lastb))]
+pub fn svlastb_f64(pg: svbool_t, op: svfloat64_t) -> f64 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.lastb.nxv2f64")]
+        fn _svlastb_f64(pg: svbool2_t, op: svfloat64_t) -> f64;
+    }
+    unsafe { _svlastb_f64(pg.sve_into(), op) }
+}
+#[doc = "Extract last element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlastb[_s8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lastb))]
+pub fn svlastb_s8(pg: svbool_t, op: svint8_t) -> i8 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.lastb.nxv16i8")]
+        fn _svlastb_s8(pg: svbool_t, op: svint8_t) -> i8;
+    }
+    unsafe { _svlastb_s8(pg, op) }
+}
+#[doc = "Extract last element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlastb[_s16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lastb))]
+pub fn svlastb_s16(pg: svbool_t, op: svint16_t) -> i16 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.lastb.nxv8i16")]
+        fn _svlastb_s16(pg: svbool8_t, op: svint16_t) -> i16;
+    }
+    unsafe { _svlastb_s16(pg.sve_into(), op) }
+}
+#[doc = "Extract last element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlastb[_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lastb))]
+pub fn svlastb_s32(pg: svbool_t, op: svint32_t) -> i32 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.lastb.nxv4i32")]
+        fn _svlastb_s32(pg: svbool4_t, op: svint32_t) -> i32;
+    }
+    unsafe { _svlastb_s32(pg.sve_into(), op) }
+}
+#[doc = "Extract last element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlastb[_s64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lastb))]
+pub fn svlastb_s64(pg: svbool_t, op: svint64_t) -> i64 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.lastb.nxv2i64")]
+        fn _svlastb_s64(pg: svbool2_t, op: svint64_t) -> i64;
+    }
+    unsafe { _svlastb_s64(pg.sve_into(), op) }
+}
+#[doc = "Extract last element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlastb[_u8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lastb))]
+pub fn svlastb_u8(pg: svbool_t, op: svuint8_t) -> u8 {
+    unsafe { svlastb_s8(pg, op.as_signed()).as_unsigned() }
+}
+#[doc = "Extract last element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlastb[_u16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lastb))]
+pub fn svlastb_u16(pg: svbool_t, op: svuint16_t) -> u16 {
+    unsafe { svlastb_s16(pg, op.as_signed()).as_unsigned() }
+}
+#[doc = "Extract last element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlastb[_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lastb))]
+pub fn svlastb_u32(pg: svbool_t, op: svuint32_t) -> u32 {
+    unsafe { svlastb_s32(pg, op.as_signed()).as_unsigned() }
+}
+#[doc = "Extract last element"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlastb[_u64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lastb))]
+pub fn svlastb_u64(pg: svbool_t, op: svuint64_t) -> u64 {
+    unsafe { svlastb_s64(pg, op.as_signed()).as_unsigned() }
+}
+#[doc = "Unextended load"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1[_f32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1w))]
+pub unsafe fn svld1_f32(pg: svbool_t, base: *const f32) -> svfloat32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ld1.nxv4f32")]
+        fn _svld1_f32(pg: svbool4_t, base: *const f32) -> svfloat32_t;
+    }
+    _svld1_f32(pg.sve_into(), base)
+}
+#[doc = "Unextended load"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1[_f64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1d))]
+pub unsafe fn svld1_f64(pg: svbool_t, base: *const f64) -> svfloat64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ld1.nxv2f64")]
+        fn _svld1_f64(pg: svbool2_t, base: *const f64) -> svfloat64_t;
+    }
+    _svld1_f64(pg.sve_into(), base)
+}
+#[doc = "Unextended load"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1[_s8])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1b))]
+pub unsafe fn svld1_s8(pg: svbool_t, base: *const i8) -> svint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ld1.nxv16i8")]
+        fn _svld1_s8(pg: svbool_t, base: *const i8) -> svint8_t;
+    }
+    _svld1_s8(pg, base)
+}
+#[doc = "Unextended load"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1[_s16])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1h))]
+pub unsafe fn svld1_s16(pg: svbool_t, base: *const i16) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ld1.nxv8i16")]
+        fn _svld1_s16(pg: svbool8_t, base: *const i16) -> svint16_t;
+    }
+    _svld1_s16(pg.sve_into(), base)
+}
+#[doc = "Unextended load"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1[_s32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1w))]
+pub unsafe fn svld1_s32(pg: svbool_t, base: *const i32) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ld1.nxv4i32")]
+        fn _svld1_s32(pg: svbool4_t, base: *const i32) -> svint32_t;
+    }
+    _svld1_s32(pg.sve_into(), base)
+}
+#[doc = "Unextended load"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1[_s64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1d))]
+pub unsafe fn svld1_s64(pg: svbool_t, base: *const i64) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ld1.nxv2i64")]
+        fn _svld1_s64(pg: svbool2_t, base: *const i64) -> svint64_t;
+    }
+    _svld1_s64(pg.sve_into(), base)
+}
+#[doc = "Unextended load"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1[_u8])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1b))]
+pub unsafe fn svld1_u8(pg: svbool_t, base: *const u8) -> svuint8_t {
+    svld1_s8(pg, base.as_signed()).as_unsigned()
+}
+#[doc = "Unextended load"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1[_u16])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1h))]
+pub unsafe fn svld1_u16(pg: svbool_t, base: *const u16) -> svuint16_t {
+    svld1_s16(pg, base.as_signed()).as_unsigned()
+}
+#[doc = "Unextended load"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1[_u32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1w))]
+pub unsafe fn svld1_u32(pg: svbool_t, base: *const u32) -> svuint32_t {
+    svld1_s32(pg, base.as_signed()).as_unsigned()
+}
+#[doc = "Unextended load"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1[_u64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1d))]
+pub unsafe fn svld1_u64(pg: svbool_t, base: *const u64) -> svuint64_t {
+    svld1_s64(pg, base.as_signed()).as_unsigned()
+}
+#[doc = "Unextended load"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1_gather_[s32]index[_f32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1w))]
+pub unsafe fn svld1_gather_s32index_f32(
+    pg: svbool_t,
+    base: *const f32,
+    indices: svint32_t,
+) -> svfloat32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4f32"
+        )]
+        fn _svld1_gather_s32index_f32(
+            pg: svbool4_t,
+            base: *const f32,
+            indices: svint32_t,
+        ) -> svfloat32_t;
+    }
+    _svld1_gather_s32index_f32(pg.sve_into(), base, indices)
+}
+#[doc = "Unextended load"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1_gather_[s32]index[_s32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1w))]
+pub unsafe fn svld1_gather_s32index_s32(
+    pg: svbool_t,
+    base: *const i32,
+    indices: svint32_t,
+) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4i32"
+        )]
+        fn _svld1_gather_s32index_s32(
+            pg: svbool4_t,
+            base: *const i32,
+            indices: svint32_t,
+        ) -> svint32_t;
+    }
+    _svld1_gather_s32index_s32(pg.sve_into(), base, indices)
+}
+#[doc = "Unextended load"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1_gather_[s32]index[_u32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1w))]
+pub unsafe fn svld1_gather_s32index_u32(
+    pg: svbool_t,
+    base: *const u32,
+    indices: svint32_t,
+) -> svuint32_t {
+    svld1_gather_s32index_s32(pg, base.as_signed(), indices).as_unsigned()
+}
+#[doc = "Unextended load"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1_gather_[s64]index[_f64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1d))]
+pub unsafe fn svld1_gather_s64index_f64(
+    pg: svbool_t,
+    base: *const f64,
+    indices: svint64_t,
+) -> svfloat64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ld1.gather.index.nxv2f64"
+        )]
+        fn _svld1_gather_s64index_f64(
+            pg: svbool2_t,
+            base: *const f64,
+            indices: svint64_t,
+        ) -> svfloat64_t;
+    }
+    _svld1_gather_s64index_f64(pg.sve_into(), base, indices)
+}
+#[doc = "Unextended load"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1_gather_[s64]index[_s64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1d))]
+pub unsafe fn svld1_gather_s64index_s64(
+    pg: svbool_t,
+    base: *const i64,
+    indices: svint64_t,
+) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ld1.gather.index.nxv2i64"
+        )]
+        fn _svld1_gather_s64index_s64(
+            pg: svbool2_t,
+            base: *const i64,
+            indices: svint64_t,
+        ) -> svint64_t;
+    }
+    _svld1_gather_s64index_s64(pg.sve_into(), base, indices)
+}
+#[doc = "Unextended load"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1_gather_[s64]index[_u64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1d))]
+pub unsafe fn svld1_gather_s64index_u64(
+    pg: svbool_t,
+    base: *const u64,
+    indices: svint64_t,
+) -> svuint64_t {
+    svld1_gather_s64index_s64(pg, base.as_signed(), indices).as_unsigned()
+}
+#[doc = "Unextended load"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1_gather_[u32]index[_f32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1w))]
+pub unsafe fn svld1_gather_u32index_f32(
+    pg: svbool_t,
+    base: *const f32,
+    indices: svuint32_t,
+) -> svfloat32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4f32"
+        )]
+        fn _svld1_gather_u32index_f32(
+            pg: svbool4_t,
+            base: *const f32,
+            indices: svint32_t,
+        ) -> svfloat32_t;
+    }
+    _svld1_gather_u32index_f32(pg.sve_into(), base, indices.as_signed())
+}
+#[doc = "Unextended load"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1_gather_[u32]index[_s32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1w))]
+pub unsafe fn svld1_gather_u32index_s32(
+    pg: svbool_t,
+    base: *const i32,
+    indices: svuint32_t,
+) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4i32"
+        )]
+        fn _svld1_gather_u32index_s32(
+            pg: svbool4_t,
+            base: *const i32,
+            indices: svint32_t,
+        ) -> svint32_t;
+    }
+    _svld1_gather_u32index_s32(pg.sve_into(), base, indices.as_signed())
+}
+#[doc = "Unextended load"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1_gather_[u32]index[_u32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1w))]
+pub unsafe fn svld1_gather_u32index_u32(
+    pg: svbool_t,
+    base: *const u32,
+    indices: svuint32_t,
+) -> svuint32_t {
+    svld1_gather_u32index_s32(pg, base.as_signed(), indices).as_unsigned()
+}
+#[doc = "Unextended load"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1_gather_[u64]index[_f64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1d))]
+pub unsafe fn svld1_gather_u64index_f64(
+    pg: svbool_t,
+    base: *const f64,
+    indices: svuint64_t,
+) -> svfloat64_t {
+    svld1_gather_s64index_f64(pg, base, indices.as_signed())
+}
+#[doc = "Unextended load"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1_gather_[u64]index[_s64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1d))]
+pub unsafe fn svld1_gather_u64index_s64(
+    pg: svbool_t,
+    base: *const i64,
+    indices: svuint64_t,
+) -> svint64_t {
+    svld1_gather_s64index_s64(pg, base, indices.as_signed())
+}
+#[doc = "Unextended load"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1_gather_[u64]index[_u64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1d))]
+pub unsafe fn svld1_gather_u64index_u64(
+    pg: svbool_t,
+    base: *const u64,
+    indices: svuint64_t,
+) -> svuint64_t {
+    svld1_gather_s64index_s64(pg, base.as_signed(), indices.as_signed()).as_unsigned()
+}
+#[doc = "Unextended load"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1_gather_[s32]offset[_f32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1w))]
+pub unsafe fn svld1_gather_s32offset_f32(
+    pg: svbool_t,
+    base: *const f32,
+    offsets: svint32_t,
+) -> svfloat32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ld1.gather.sxtw.nxv4f32"
+        )]
+        fn _svld1_gather_s32offset_f32(
+            pg: svbool4_t,
+            base: *const f32,
+            offsets: svint32_t,
+        ) -> svfloat32_t;
+    }
+    _svld1_gather_s32offset_f32(pg.sve_into(), base, offsets)
+}
+#[doc = "Unextended load"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1_gather_[s32]offset[_s32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1w))]
+pub unsafe fn svld1_gather_s32offset_s32(
+    pg: svbool_t,
+    base: *const i32,
+    offsets: svint32_t,
+) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ld1.gather.sxtw.nxv4i32"
+        )]
+        fn _svld1_gather_s32offset_s32(
+            pg: svbool4_t,
+            base: *const i32,
+            offsets: svint32_t,
+        ) -> svint32_t;
+    }
+    _svld1_gather_s32offset_s32(pg.sve_into(), base, offsets)
+}
+#[doc = "Unextended load"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1_gather_[s32]offset[_u32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1w))]
+pub unsafe fn svld1_gather_s32offset_u32(
+    pg: svbool_t,
+    base: *const u32,
+    offsets: svint32_t,
+) -> svuint32_t {
+    svld1_gather_s32offset_s32(pg, base.as_signed(), offsets).as_unsigned()
+}
+#[doc = "Unextended load"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1_gather_[s64]offset[_f64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1d))]
+pub unsafe fn svld1_gather_s64offset_f64(
+    pg: svbool_t,
+    base: *const f64,
+    offsets: svint64_t,
+) -> svfloat64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ld1.gather.nxv2f64"
+        )]
+        fn _svld1_gather_s64offset_f64(
+            pg: svbool2_t,
+            base: *const f64,
+            offsets: svint64_t,
+        ) -> svfloat64_t;
+    }
+    _svld1_gather_s64offset_f64(pg.sve_into(), base, offsets)
+}
+#[doc = "Unextended load"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1_gather_[s64]offset[_s64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1d))]
+pub unsafe fn svld1_gather_s64offset_s64(
+    pg: svbool_t,
+    base: *const i64,
+    offsets: svint64_t,
+) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ld1.gather.nxv2i64"
+        )]
+        fn _svld1_gather_s64offset_s64(
+            pg: svbool2_t,
+            base: *const i64,
+            offsets: svint64_t,
+        ) -> svint64_t;
+    }
+    _svld1_gather_s64offset_s64(pg.sve_into(), base, offsets)
+}
+#[doc = "Unextended load"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1_gather_[s64]offset[_u64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1d))]
+pub unsafe fn svld1_gather_s64offset_u64(
+    pg: svbool_t,
+    base: *const u64,
+    offsets: svint64_t,
+) -> svuint64_t {
+    svld1_gather_s64offset_s64(pg, base.as_signed(), offsets).as_unsigned()
+}
+#[doc = "Unextended load"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1_gather_[u32]offset[_f32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1w))]
+pub unsafe fn svld1_gather_u32offset_f32(
+    pg: svbool_t,
+    base: *const f32,
+    offsets: svuint32_t,
+) -> svfloat32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ld1.gather.uxtw.nxv4f32"
+        )]
+        fn _svld1_gather_u32offset_f32(
+            pg: svbool4_t,
+            base: *const f32,
+            offsets: svint32_t,
+        ) -> svfloat32_t;
+    }
+    _svld1_gather_u32offset_f32(pg.sve_into(), base, offsets.as_signed())
+}
+#[doc = "Unextended load"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1_gather_[u32]offset[_s32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1w))]
+pub unsafe fn svld1_gather_u32offset_s32(
+    pg: svbool_t,
+    base: *const i32,
+    offsets: svuint32_t,
+) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ld1.gather.uxtw.nxv4i32"
+        )]
+        fn _svld1_gather_u32offset_s32(
+            pg: svbool4_t,
+            base: *const i32,
+            offsets: svint32_t,
+        ) -> svint32_t;
+    }
+    _svld1_gather_u32offset_s32(pg.sve_into(), base, offsets.as_signed())
+}
+#[doc = "Unextended load"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1_gather_[u32]offset[_u32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1w))]
+pub unsafe fn svld1_gather_u32offset_u32(
+    pg: svbool_t,
+    base: *const u32,
+    offsets: svuint32_t,
+) -> svuint32_t {
+    svld1_gather_u32offset_s32(pg, base.as_signed(), offsets).as_unsigned()
+}
+#[doc = "Unextended load"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1_gather_[u64]offset[_f64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1d))]
+pub unsafe fn svld1_gather_u64offset_f64(
+    pg: svbool_t,
+    base: *const f64,
+    offsets: svuint64_t,
+) -> svfloat64_t {
+    svld1_gather_s64offset_f64(pg, base, offsets.as_signed())
+}
+#[doc = "Unextended load"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1_gather_[u64]offset[_s64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1d))]
+pub unsafe fn svld1_gather_u64offset_s64(
+    pg: svbool_t,
+    base: *const i64,
+    offsets: svuint64_t,
+) -> svint64_t {
+    svld1_gather_s64offset_s64(pg, base, offsets.as_signed())
+}
+#[doc = "Unextended load"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1_gather_[u64]offset[_u64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1d))]
+pub unsafe fn svld1_gather_u64offset_u64(
+    pg: svbool_t,
+    base: *const u64,
+    offsets: svuint64_t,
+) -> svuint64_t {
+    svld1_gather_s64offset_s64(pg, base.as_signed(), offsets.as_signed()).as_unsigned()
+}
+#[doc = "Unextended load"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1_gather[_u32base]_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1w))]
+pub unsafe fn svld1_gather_u32base_f32(pg: svbool_t, bases: svuint32_t) -> svfloat32_t {
+    svld1_gather_u32base_offset_f32(pg, bases, 0)
+}
+#[doc = "Unextended load"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1_gather[_u32base]_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1w))]
+pub unsafe fn svld1_gather_u32base_s32(pg: svbool_t, bases: svuint32_t) -> svint32_t {
+    svld1_gather_u32base_offset_s32(pg, bases, 0)
+}
+#[doc = "Unextended load"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1_gather[_u32base]_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1w))]
+pub unsafe fn svld1_gather_u32base_u32(pg: svbool_t, bases: svuint32_t) -> svuint32_t {
+    svld1_gather_u32base_offset_u32(pg, bases, 0)
+}
+#[doc = "Unextended load"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1_gather[_u64base]_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1d))]
+pub unsafe fn svld1_gather_u64base_f64(pg: svbool_t, bases: svuint64_t) -> svfloat64_t {
+    svld1_gather_u64base_offset_f64(pg, bases, 0)
+}
+#[doc = "Unextended load"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1_gather[_u64base]_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1d))]
+pub unsafe fn svld1_gather_u64base_s64(pg: svbool_t, bases: svuint64_t) -> svint64_t {
+    svld1_gather_u64base_offset_s64(pg, bases, 0)
+}
+#[doc = "Unextended load"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1_gather[_u64base]_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1d))]
+pub unsafe fn svld1_gather_u64base_u64(pg: svbool_t, bases: svuint64_t) -> svuint64_t {
+    svld1_gather_u64base_offset_u64(pg, bases, 0)
+}
+#[doc = "Unextended load"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1_gather[_u32base]_index_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1w))]
+pub unsafe fn svld1_gather_u32base_index_f32(
+    pg: svbool_t,
+    bases: svuint32_t,
+    index: i64,
+) -> svfloat32_t {
+    svld1_gather_u32base_offset_f32(pg, bases, index.unchecked_shl(2))
+}
+#[doc = "Unextended load"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1_gather[_u32base]_index_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1w))]
+pub unsafe fn svld1_gather_u32base_index_s32(
+    pg: svbool_t,
+    bases: svuint32_t,
+    index: i64,
+) -> svint32_t {
+    svld1_gather_u32base_offset_s32(pg, bases, index.unchecked_shl(2))
+}
+#[doc = "Unextended load"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1_gather[_u32base]_index_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1w))]
+pub unsafe fn svld1_gather_u32base_index_u32(
+    pg: svbool_t,
+    bases: svuint32_t,
+    index: i64,
+) -> svuint32_t {
+    svld1_gather_u32base_offset_u32(pg, bases, index.unchecked_shl(2))
+}
+#[doc = "Unextended load"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1_gather[_u64base]_index_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1d))]
+pub unsafe fn svld1_gather_u64base_index_f64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    index: i64,
+) -> svfloat64_t {
+    svld1_gather_u64base_offset_f64(pg, bases, index.unchecked_shl(3))
+}
+#[doc = "Unextended load"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1_gather[_u64base]_index_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1d))]
+pub unsafe fn svld1_gather_u64base_index_s64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    index: i64,
+) -> svint64_t {
+    svld1_gather_u64base_offset_s64(pg, bases, index.unchecked_shl(3))
+}
+#[doc = "Unextended load"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1_gather[_u64base]_index_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1d))]
+pub unsafe fn svld1_gather_u64base_index_u64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    index: i64,
+) -> svuint64_t {
+    svld1_gather_u64base_offset_u64(pg, bases, index.unchecked_shl(3))
+}
+#[doc = "Unextended load"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1_gather[_u32base]_offset_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1w))]
+pub unsafe fn svld1_gather_u32base_offset_f32(
+    pg: svbool_t,
+    bases: svuint32_t,
+    offset: i64,
+) -> svfloat32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4f32.nxv4i32"
+        )]
+        fn _svld1_gather_u32base_offset_f32(
+            pg: svbool4_t,
+            bases: svint32_t,
+            offset: i64,
+        ) -> svfloat32_t;
+    }
+    _svld1_gather_u32base_offset_f32(pg.sve_into(), bases.as_signed(), offset)
+}
+#[doc = "Unextended load"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1_gather[_u32base]_offset_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1w))]
+pub unsafe fn svld1_gather_u32base_offset_s32(
+    pg: svbool_t,
+    bases: svuint32_t,
+    offset: i64,
+) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i32.nxv4i32"
+        )]
+        fn _svld1_gather_u32base_offset_s32(
+            pg: svbool4_t,
+            bases: svint32_t,
+            offset: i64,
+        ) -> svint32_t;
+    }
+    _svld1_gather_u32base_offset_s32(pg.sve_into(), bases.as_signed(), offset)
+}
+#[doc = "Unextended load"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1_gather[_u32base]_offset_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1w))]
+pub unsafe fn svld1_gather_u32base_offset_u32(
+    pg: svbool_t,
+    bases: svuint32_t,
+    offset: i64,
+) -> svuint32_t {
+    svld1_gather_u32base_offset_s32(pg, bases, offset).as_unsigned()
+}
+#[doc = "Unextended load"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1_gather[_u64base]_offset_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1d))]
+pub unsafe fn svld1_gather_u64base_offset_f64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    offset: i64,
+) -> svfloat64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2f64.nxv2i64"
+        )]
+        fn _svld1_gather_u64base_offset_f64(
+            pg: svbool2_t,
+            bases: svint64_t,
+            offset: i64,
+        ) -> svfloat64_t;
+    }
+    _svld1_gather_u64base_offset_f64(pg.sve_into(), bases.as_signed(), offset)
+}
+#[doc = "Unextended load"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1_gather[_u64base]_offset_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1d))]
+pub unsafe fn svld1_gather_u64base_offset_s64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    offset: i64,
+) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i64.nxv2i64"
+        )]
+        fn _svld1_gather_u64base_offset_s64(
+            pg: svbool2_t,
+            bases: svint64_t,
+            offset: i64,
+        ) -> svint64_t;
+    }
+    _svld1_gather_u64base_offset_s64(pg.sve_into(), bases.as_signed(), offset)
+}
+#[doc = "Unextended load"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1_gather[_u64base]_offset_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1d))]
+pub unsafe fn svld1_gather_u64base_offset_u64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    offset: i64,
+) -> svuint64_t {
+    svld1_gather_u64base_offset_s64(pg, bases, offset).as_unsigned()
+}
+#[doc = "Unextended load"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1_vnum[_f32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1w))]
+pub unsafe fn svld1_vnum_f32(pg: svbool_t, base: *const f32, vnum: i64) -> svfloat32_t {
+    svld1_f32(pg, base.offset(svcntw() as isize * vnum as isize))
+}
+#[doc = "Unextended load"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1_vnum[_f64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1d))]
+pub unsafe fn svld1_vnum_f64(pg: svbool_t, base: *const f64, vnum: i64) -> svfloat64_t {
+    svld1_f64(pg, base.offset(svcntd() as isize * vnum as isize))
+}
+#[doc = "Unextended load"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1_vnum[_s8])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1b))]
+pub unsafe fn svld1_vnum_s8(pg: svbool_t, base: *const i8, vnum: i64) -> svint8_t {
+    svld1_s8(pg, base.offset(svcntb() as isize * vnum as isize))
+}
+#[doc = "Unextended load"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1_vnum[_s16])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1h))]
+pub unsafe fn svld1_vnum_s16(pg: svbool_t, base: *const i16, vnum: i64) -> svint16_t {
+    svld1_s16(pg, base.offset(svcnth() as isize * vnum as isize))
+}
+#[doc = "Unextended load"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1_vnum[_s32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1w))]
+pub unsafe fn svld1_vnum_s32(pg: svbool_t, base: *const i32, vnum: i64) -> svint32_t {
+    svld1_s32(pg, base.offset(svcntw() as isize * vnum as isize))
+}
+#[doc = "Unextended load"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1_vnum[_s64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1d))]
+pub unsafe fn svld1_vnum_s64(pg: svbool_t, base: *const i64, vnum: i64) -> svint64_t {
+    svld1_s64(pg, base.offset(svcntd() as isize * vnum as isize))
+}
+#[doc = "Unextended load"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1_vnum[_u8])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1b))]
+pub unsafe fn svld1_vnum_u8(pg: svbool_t, base: *const u8, vnum: i64) -> svuint8_t {
+    svld1_u8(pg, base.offset(svcntb() as isize * vnum as isize))
+}
+#[doc = "Unextended load"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1_vnum[_u16])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1h))]
+pub unsafe fn svld1_vnum_u16(pg: svbool_t, base: *const u16, vnum: i64) -> svuint16_t {
+    svld1_u16(pg, base.offset(svcnth() as isize * vnum as isize))
+}
+#[doc = "Unextended load"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1_vnum[_u32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1w))]
+pub unsafe fn svld1_vnum_u32(pg: svbool_t, base: *const u32, vnum: i64) -> svuint32_t {
+    svld1_u32(pg, base.offset(svcntw() as isize * vnum as isize))
+}
+#[doc = "Unextended load"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1_vnum[_u64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1d))]
+pub unsafe fn svld1_vnum_u64(pg: svbool_t, base: *const u64, vnum: i64) -> svuint64_t {
+    svld1_u64(pg, base.offset(svcntd() as isize * vnum as isize))
+}
+#[doc = "Load and replicate 256 bits of data"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1ro[_f32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve,f64mm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1row))]
+pub unsafe fn svld1ro_f32(pg: svbool_t, base: *const f32) -> svfloat32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ld1ro.nxv4f32")]
+        fn _svld1ro_f32(pg: svbool4_t, base: *const f32) -> svfloat32_t;
+    }
+    _svld1ro_f32(pg.sve_into(), base)
+}
+#[doc = "Load and replicate 256 bits of data"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1ro[_f64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve,f64mm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1rod))]
+pub unsafe fn svld1ro_f64(pg: svbool_t, base: *const f64) -> svfloat64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ld1ro.nxv2f64")]
+        fn _svld1ro_f64(pg: svbool2_t, base: *const f64) -> svfloat64_t;
+    }
+    _svld1ro_f64(pg.sve_into(), base)
+}
+#[doc = "Load and replicate 256 bits of data"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1ro[_s8])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve,f64mm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1rob))]
+pub unsafe fn svld1ro_s8(pg: svbool_t, base: *const i8) -> svint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ld1ro.nxv16i8")]
+        fn _svld1ro_s8(pg: svbool_t, base: *const i8) -> svint8_t;
+    }
+    _svld1ro_s8(pg, base)
+}
+#[doc = "Load and replicate 256 bits of data"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1ro[_s16])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve,f64mm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1roh))]
+pub unsafe fn svld1ro_s16(pg: svbool_t, base: *const i16) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ld1ro.nxv8i16")]
+        fn _svld1ro_s16(pg: svbool8_t, base: *const i16) -> svint16_t;
+    }
+    _svld1ro_s16(pg.sve_into(), base)
+}
+#[doc = "Load and replicate 256 bits of data"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1ro[_s32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve,f64mm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1row))]
+pub unsafe fn svld1ro_s32(pg: svbool_t, base: *const i32) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ld1ro.nxv4i32")]
+        fn _svld1ro_s32(pg: svbool4_t, base: *const i32) -> svint32_t;
+    }
+    _svld1ro_s32(pg.sve_into(), base)
+}
+#[doc = "Load and replicate 256 bits of data"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1ro[_s64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve,f64mm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1rod))]
+pub unsafe fn svld1ro_s64(pg: svbool_t, base: *const i64) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ld1ro.nxv2i64")]
+        fn _svld1ro_s64(pg: svbool2_t, base: *const i64) -> svint64_t;
+    }
+    _svld1ro_s64(pg.sve_into(), base)
+}
+#[doc = "Load and replicate 256 bits of data"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1ro[_u8])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve,f64mm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1rob))]
+pub unsafe fn svld1ro_u8(pg: svbool_t, base: *const u8) -> svuint8_t {
+    svld1ro_s8(pg, base.as_signed()).as_unsigned()
+}
+#[doc = "Load and replicate 256 bits of data"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1ro[_u16])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve,f64mm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1roh))]
+pub unsafe fn svld1ro_u16(pg: svbool_t, base: *const u16) -> svuint16_t {
+    svld1ro_s16(pg, base.as_signed()).as_unsigned()
+}
+#[doc = "Load and replicate 256 bits of data"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1ro[_u32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve,f64mm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1row))]
+pub unsafe fn svld1ro_u32(pg: svbool_t, base: *const u32) -> svuint32_t {
+    svld1ro_s32(pg, base.as_signed()).as_unsigned()
+}
+#[doc = "Load and replicate 256 bits of data"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1ro[_u64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve,f64mm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1rod))]
+pub unsafe fn svld1ro_u64(pg: svbool_t, base: *const u64) -> svuint64_t {
+    svld1ro_s64(pg, base.as_signed()).as_unsigned()
+}
+#[doc = "Load and replicate 128 bits of data"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1rq[_f32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1rqw))]
+pub unsafe fn svld1rq_f32(pg: svbool_t, base: *const f32) -> svfloat32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ld1rq.nxv4f32")]
+        fn _svld1rq_f32(pg: svbool4_t, base: *const f32) -> svfloat32_t;
+    }
+    _svld1rq_f32(pg.sve_into(), base)
+}
+#[doc = "Load and replicate 128 bits of data"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1rq[_f64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1rqd))]
+pub unsafe fn svld1rq_f64(pg: svbool_t, base: *const f64) -> svfloat64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ld1rq.nxv2f64")]
+        fn _svld1rq_f64(pg: svbool2_t, base: *const f64) -> svfloat64_t;
+    }
+    _svld1rq_f64(pg.sve_into(), base)
+}
+#[doc = "Load and replicate 128 bits of data"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1rq[_s8])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1rqb))]
+pub unsafe fn svld1rq_s8(pg: svbool_t, base: *const i8) -> svint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ld1rq.nxv16i8")]
+        fn _svld1rq_s8(pg: svbool_t, base: *const i8) -> svint8_t;
+    }
+    _svld1rq_s8(pg, base)
+}
+#[doc = "Load and replicate 128 bits of data"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1rq[_s16])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1rqh))]
+pub unsafe fn svld1rq_s16(pg: svbool_t, base: *const i16) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ld1rq.nxv8i16")]
+        fn _svld1rq_s16(pg: svbool8_t, base: *const i16) -> svint16_t;
+    }
+    _svld1rq_s16(pg.sve_into(), base)
+}
+#[doc = "Load and replicate 128 bits of data"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1rq[_s32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1rqw))]
+pub unsafe fn svld1rq_s32(pg: svbool_t, base: *const i32) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ld1rq.nxv4i32")]
+        fn _svld1rq_s32(pg: svbool4_t, base: *const i32) -> svint32_t;
+    }
+    _svld1rq_s32(pg.sve_into(), base)
+}
+#[doc = "Load and replicate 128 bits of data"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1rq[_s64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1rqd))]
+pub unsafe fn svld1rq_s64(pg: svbool_t, base: *const i64) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ld1rq.nxv2i64")]
+        fn _svld1rq_s64(pg: svbool2_t, base: *const i64) -> svint64_t;
+    }
+    _svld1rq_s64(pg.sve_into(), base)
+}
+#[doc = "Load and replicate 128 bits of data"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1rq[_u8])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1rqb))]
+pub unsafe fn svld1rq_u8(pg: svbool_t, base: *const u8) -> svuint8_t {
+    svld1rq_s8(pg, base.as_signed()).as_unsigned()
+}
+#[doc = "Load and replicate 128 bits of data"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1rq[_u16])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1rqh))]
+pub unsafe fn svld1rq_u16(pg: svbool_t, base: *const u16) -> svuint16_t {
+    svld1rq_s16(pg, base.as_signed()).as_unsigned()
+}
+#[doc = "Load and replicate 128 bits of data"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1rq[_u32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1rqw))]
+pub unsafe fn svld1rq_u32(pg: svbool_t, base: *const u32) -> svuint32_t {
+    svld1rq_s32(pg, base.as_signed()).as_unsigned()
+}
+#[doc = "Load and replicate 128 bits of data"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1rq[_u64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1rqd))]
+pub unsafe fn svld1rq_u64(pg: svbool_t, base: *const u64) -> svuint64_t {
+    svld1rq_s64(pg, base.as_signed()).as_unsigned()
+}
+#[doc = "Load 8-bit data and sign-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1sb_gather_[s32]offset_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1sb))]
+pub unsafe fn svld1sb_gather_s32offset_s32(
+    pg: svbool_t,
+    base: *const i8,
+    offsets: svint32_t,
+) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ld1.gather.sxtw.nxv4i8"
+        )]
+        fn _svld1sb_gather_s32offset_s32(
+            pg: svbool4_t,
+            base: *const i8,
+            offsets: svint32_t,
+        ) -> nxv4i8;
+    }
+    crate::intrinsics::simd::simd_cast(_svld1sb_gather_s32offset_s32(pg.sve_into(), base, offsets))
+}
+#[doc = "Load 16-bit data and sign-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1sh_gather_[s32]offset_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1sh))]
+pub unsafe fn svld1sh_gather_s32offset_s32(
+    pg: svbool_t,
+    base: *const i16,
+    offsets: svint32_t,
+) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ld1.gather.sxtw.nxv4i16"
+        )]
+        fn _svld1sh_gather_s32offset_s32(
+            pg: svbool4_t,
+            base: *const i16,
+            offsets: svint32_t,
+        ) -> nxv4i16;
+    }
+    crate::intrinsics::simd::simd_cast(_svld1sh_gather_s32offset_s32(pg.sve_into(), base, offsets))
+}
+#[doc = "Load 8-bit data and sign-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1sb_gather_[s32]offset_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1sb))]
+pub unsafe fn svld1sb_gather_s32offset_u32(
+    pg: svbool_t,
+    base: *const i8,
+    offsets: svint32_t,
+) -> svuint32_t {
+    svld1sb_gather_s32offset_s32(pg, base, offsets).as_unsigned()
+}
+#[doc = "Load 16-bit data and sign-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1sh_gather_[s32]offset_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1sh))]
+pub unsafe fn svld1sh_gather_s32offset_u32(
+    pg: svbool_t,
+    base: *const i16,
+    offsets: svint32_t,
+) -> svuint32_t {
+    svld1sh_gather_s32offset_s32(pg, base, offsets).as_unsigned()
+}
+#[doc = "Load 8-bit data and sign-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1sb_gather_[s64]offset_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1sb))]
+pub unsafe fn svld1sb_gather_s64offset_s64(
+    pg: svbool_t,
+    base: *const i8,
+    offsets: svint64_t,
+) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ld1.gather.nxv2i8"
+        )]
+        fn _svld1sb_gather_s64offset_s64(
+            pg: svbool2_t,
+            base: *const i8,
+            offsets: svint64_t,
+        ) -> nxv2i8;
+    }
+    crate::intrinsics::simd::simd_cast(_svld1sb_gather_s64offset_s64(pg.sve_into(), base, offsets))
+}
+#[doc = "Load 16-bit data and sign-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1sh_gather_[s64]offset_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1sh))]
+pub unsafe fn svld1sh_gather_s64offset_s64(
+    pg: svbool_t,
+    base: *const i16,
+    offsets: svint64_t,
+) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ld1.gather.nxv2i16"
+        )]
+        fn _svld1sh_gather_s64offset_s64(
+            pg: svbool2_t,
+            base: *const i16,
+            offsets: svint64_t,
+        ) -> nxv2i16;
+    }
+    crate::intrinsics::simd::simd_cast(_svld1sh_gather_s64offset_s64(pg.sve_into(), base, offsets))
+}
+#[doc = "Load 32-bit data and sign-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1sw_gather_[s64]offset_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1sw))]
+pub unsafe fn svld1sw_gather_s64offset_s64(
+    pg: svbool_t,
+    base: *const i32,
+    offsets: svint64_t,
+) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ld1.gather.nxv2i32"
+        )]
+        fn _svld1sw_gather_s64offset_s64(
+            pg: svbool2_t,
+            base: *const i32,
+            offsets: svint64_t,
+        ) -> nxv2i32;
+    }
+    crate::intrinsics::simd::simd_cast(_svld1sw_gather_s64offset_s64(pg.sve_into(), base, offsets))
+}
+#[doc = "Load 8-bit data and sign-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1sb_gather_[s64]offset_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1sb))]
+pub unsafe fn svld1sb_gather_s64offset_u64(
+    pg: svbool_t,
+    base: *const i8,
+    offsets: svint64_t,
+) -> svuint64_t {
+    svld1sb_gather_s64offset_s64(pg, base, offsets).as_unsigned()
+}
+#[doc = "Load 16-bit data and sign-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1sh_gather_[s64]offset_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1sh))]
+pub unsafe fn svld1sh_gather_s64offset_u64(
+    pg: svbool_t,
+    base: *const i16,
+    offsets: svint64_t,
+) -> svuint64_t {
+    svld1sh_gather_s64offset_s64(pg, base, offsets).as_unsigned()
+}
+#[doc = "Load 32-bit data and sign-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1sw_gather_[s64]offset_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1sw))]
+pub unsafe fn svld1sw_gather_s64offset_u64(
+    pg: svbool_t,
+    base: *const i32,
+    offsets: svint64_t,
+) -> svuint64_t {
+    svld1sw_gather_s64offset_s64(pg, base, offsets).as_unsigned()
+}
+#[doc = "Load 8-bit data and sign-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1sb_gather_[u32]offset_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1sb))]
+pub unsafe fn svld1sb_gather_u32offset_s32(
+    pg: svbool_t,
+    base: *const i8,
+    offsets: svuint32_t,
+) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ld1.gather.uxtw.nxv4i8"
+        )]
+        fn _svld1sb_gather_u32offset_s32(
+            pg: svbool4_t,
+            base: *const i8,
+            offsets: svint32_t,
+        ) -> nxv4i8;
+    }
+    crate::intrinsics::simd::simd_cast(_svld1sb_gather_u32offset_s32(
+        pg.sve_into(),
+        base,
+        offsets.as_signed(),
+    ))
+}
+#[doc = "Load 16-bit data and sign-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1sh_gather_[u32]offset_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1sh))]
+pub unsafe fn svld1sh_gather_u32offset_s32(
+    pg: svbool_t,
+    base: *const i16,
+    offsets: svuint32_t,
+) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ld1.gather.uxtw.nxv4i16"
+        )]
+        fn _svld1sh_gather_u32offset_s32(
+            pg: svbool4_t,
+            base: *const i16,
+            offsets: svint32_t,
+        ) -> nxv4i16;
+    }
+    crate::intrinsics::simd::simd_cast(_svld1sh_gather_u32offset_s32(
+        pg.sve_into(),
+        base,
+        offsets.as_signed(),
+    ))
+}
+#[doc = "Load 8-bit data and sign-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1sb_gather_[u32]offset_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1sb))]
+pub unsafe fn svld1sb_gather_u32offset_u32(
+    pg: svbool_t,
+    base: *const i8,
+    offsets: svuint32_t,
+) -> svuint32_t {
+    svld1sb_gather_u32offset_s32(pg, base, offsets).as_unsigned()
+}
+#[doc = "Load 16-bit data and sign-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1sh_gather_[u32]offset_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1sh))]
+pub unsafe fn svld1sh_gather_u32offset_u32(
+    pg: svbool_t,
+    base: *const i16,
+    offsets: svuint32_t,
+) -> svuint32_t {
+    svld1sh_gather_u32offset_s32(pg, base, offsets).as_unsigned()
+}
+#[doc = "Load 8-bit data and sign-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1sb_gather_[u64]offset_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1sb))]
+pub unsafe fn svld1sb_gather_u64offset_s64(
+    pg: svbool_t,
+    base: *const i8,
+    offsets: svuint64_t,
+) -> svint64_t {
+    svld1sb_gather_s64offset_s64(pg, base, offsets.as_signed())
+}
+#[doc = "Load 16-bit data and sign-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1sh_gather_[u64]offset_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1sh))]
+pub unsafe fn svld1sh_gather_u64offset_s64(
+    pg: svbool_t,
+    base: *const i16,
+    offsets: svuint64_t,
+) -> svint64_t {
+    svld1sh_gather_s64offset_s64(pg, base, offsets.as_signed())
+}
+#[doc = "Load 32-bit data and sign-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1sw_gather_[u64]offset_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1sw))]
+pub unsafe fn svld1sw_gather_u64offset_s64(
+    pg: svbool_t,
+    base: *const i32,
+    offsets: svuint64_t,
+) -> svint64_t {
+    svld1sw_gather_s64offset_s64(pg, base, offsets.as_signed())
+}
+#[doc = "Load 8-bit data and sign-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1sb_gather_[u64]offset_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1sb))]
+pub unsafe fn svld1sb_gather_u64offset_u64(
+    pg: svbool_t,
+    base: *const i8,
+    offsets: svuint64_t,
+) -> svuint64_t {
+    svld1sb_gather_s64offset_s64(pg, base, offsets.as_signed()).as_unsigned()
+}
+#[doc = "Load 16-bit data and sign-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1sh_gather_[u64]offset_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1sh))]
+pub unsafe fn svld1sh_gather_u64offset_u64(
+    pg: svbool_t,
+    base: *const i16,
+    offsets: svuint64_t,
+) -> svuint64_t {
+    svld1sh_gather_s64offset_s64(pg, base, offsets.as_signed()).as_unsigned()
+}
+#[doc = "Load 32-bit data and sign-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1sw_gather_[u64]offset_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1sw))]
+pub unsafe fn svld1sw_gather_u64offset_u64(
+    pg: svbool_t,
+    base: *const i32,
+    offsets: svuint64_t,
+) -> svuint64_t {
+    svld1sw_gather_s64offset_s64(pg, base, offsets.as_signed()).as_unsigned()
+}
+#[doc = "Load 8-bit data and sign-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1sb_gather[_u32base]_offset_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1sb))]
+pub unsafe fn svld1sb_gather_u32base_offset_s32(
+    pg: svbool_t,
+    bases: svuint32_t,
+    offset: i64,
+) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i8.nxv4i32"
+        )]
+        fn _svld1sb_gather_u32base_offset_s32(
+            pg: svbool4_t,
+            bases: svint32_t,
+            offset: i64,
+        ) -> nxv4i8;
+    }
+    crate::intrinsics::simd::simd_cast(_svld1sb_gather_u32base_offset_s32(
+        pg.sve_into(),
+        bases.as_signed(),
+        offset,
+    ))
+}
+#[doc = "Load 16-bit data and sign-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1sh_gather[_u32base]_offset_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1sh))]
+pub unsafe fn svld1sh_gather_u32base_offset_s32(
+    pg: svbool_t,
+    bases: svuint32_t,
+    offset: i64,
+) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i16.nxv4i32"
+        )]
+        fn _svld1sh_gather_u32base_offset_s32(
+            pg: svbool4_t,
+            bases: svint32_t,
+            offset: i64,
+        ) -> nxv4i16;
+    }
+    crate::intrinsics::simd::simd_cast(_svld1sh_gather_u32base_offset_s32(
+        pg.sve_into(),
+        bases.as_signed(),
+        offset,
+    ))
+}
+#[doc = "Load 8-bit data and sign-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1sb_gather[_u32base]_offset_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1sb))]
+pub unsafe fn svld1sb_gather_u32base_offset_u32(
+    pg: svbool_t,
+    bases: svuint32_t,
+    offset: i64,
+) -> svuint32_t {
+    svld1sb_gather_u32base_offset_s32(pg, bases, offset).as_unsigned()
+}
+#[doc = "Load 16-bit data and sign-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1sh_gather[_u32base]_offset_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1sh))]
+pub unsafe fn svld1sh_gather_u32base_offset_u32(
+    pg: svbool_t,
+    bases: svuint32_t,
+    offset: i64,
+) -> svuint32_t {
+    svld1sh_gather_u32base_offset_s32(pg, bases, offset).as_unsigned()
+}
+#[doc = "Load 8-bit data and sign-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1sb_gather[_u64base]_offset_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1sb))]
+pub unsafe fn svld1sb_gather_u64base_offset_s64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    offset: i64,
+) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i8.nxv2i64"
+        )]
+        fn _svld1sb_gather_u64base_offset_s64(
+            pg: svbool2_t,
+            bases: svint64_t,
+            offset: i64,
+        ) -> nxv2i8;
+    }
+    crate::intrinsics::simd::simd_cast(_svld1sb_gather_u64base_offset_s64(
+        pg.sve_into(),
+        bases.as_signed(),
+        offset,
+    ))
+}
+#[doc = "Load 16-bit data and sign-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1sh_gather[_u64base]_offset_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1sh))]
+pub unsafe fn svld1sh_gather_u64base_offset_s64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    offset: i64,
+) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i16.nxv2i64"
+        )]
+        fn _svld1sh_gather_u64base_offset_s64(
+            pg: svbool2_t,
+            bases: svint64_t,
+            offset: i64,
+        ) -> nxv2i16;
+    }
+    crate::intrinsics::simd::simd_cast(_svld1sh_gather_u64base_offset_s64(
+        pg.sve_into(),
+        bases.as_signed(),
+        offset,
+    ))
+}
+#[doc = "Load 32-bit data and sign-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1sw_gather[_u64base]_offset_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1sw))]
+pub unsafe fn svld1sw_gather_u64base_offset_s64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    offset: i64,
+) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i32.nxv2i64"
+        )]
+        fn _svld1sw_gather_u64base_offset_s64(
+            pg: svbool2_t,
+            bases: svint64_t,
+            offset: i64,
+        ) -> nxv2i32;
+    }
+    crate::intrinsics::simd::simd_cast(_svld1sw_gather_u64base_offset_s64(
+        pg.sve_into(),
+        bases.as_signed(),
+        offset,
+    ))
+}
+#[doc = "Load 8-bit data and sign-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1sb_gather[_u64base]_offset_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1sb))]
+pub unsafe fn svld1sb_gather_u64base_offset_u64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    offset: i64,
+) -> svuint64_t {
+    svld1sb_gather_u64base_offset_s64(pg, bases, offset).as_unsigned()
+}
+#[doc = "Load 16-bit data and sign-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1sh_gather[_u64base]_offset_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1sh))]
+pub unsafe fn svld1sh_gather_u64base_offset_u64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    offset: i64,
+) -> svuint64_t {
+    svld1sh_gather_u64base_offset_s64(pg, bases, offset).as_unsigned()
+}
+#[doc = "Load 32-bit data and sign-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1sw_gather[_u64base]_offset_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1sw))]
+pub unsafe fn svld1sw_gather_u64base_offset_u64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    offset: i64,
+) -> svuint64_t {
+    svld1sw_gather_u64base_offset_s64(pg, bases, offset).as_unsigned()
+}
+#[doc = "Load 8-bit data and sign-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1sb_gather[_u32base]_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1sb))]
+pub unsafe fn svld1sb_gather_u32base_s32(pg: svbool_t, bases: svuint32_t) -> svint32_t {
+    svld1sb_gather_u32base_offset_s32(pg, bases, 0)
+}
+#[doc = "Load 16-bit data and sign-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1sh_gather[_u32base]_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1sh))]
+pub unsafe fn svld1sh_gather_u32base_s32(pg: svbool_t, bases: svuint32_t) -> svint32_t {
+    svld1sh_gather_u32base_offset_s32(pg, bases, 0)
+}
+#[doc = "Load 8-bit data and sign-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1sb_gather[_u32base]_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1sb))]
+pub unsafe fn svld1sb_gather_u32base_u32(pg: svbool_t, bases: svuint32_t) -> svuint32_t {
+    svld1sb_gather_u32base_offset_u32(pg, bases, 0)
+}
+#[doc = "Load 16-bit data and sign-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1sh_gather[_u32base]_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1sh))]
+pub unsafe fn svld1sh_gather_u32base_u32(pg: svbool_t, bases: svuint32_t) -> svuint32_t {
+    svld1sh_gather_u32base_offset_u32(pg, bases, 0)
+}
+#[doc = "Load 8-bit data and sign-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1sb_gather[_u64base]_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1sb))]
+pub unsafe fn svld1sb_gather_u64base_s64(pg: svbool_t, bases: svuint64_t) -> svint64_t {
+    svld1sb_gather_u64base_offset_s64(pg, bases, 0)
+}
+#[doc = "Load 16-bit data and sign-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1sh_gather[_u64base]_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1sh))]
+pub unsafe fn svld1sh_gather_u64base_s64(pg: svbool_t, bases: svuint64_t) -> svint64_t {
+    svld1sh_gather_u64base_offset_s64(pg, bases, 0)
+}
+#[doc = "Load 32-bit data and sign-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1sw_gather[_u64base]_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1sw))]
+pub unsafe fn svld1sw_gather_u64base_s64(pg: svbool_t, bases: svuint64_t) -> svint64_t {
+    svld1sw_gather_u64base_offset_s64(pg, bases, 0)
+}
+#[doc = "Load 8-bit data and sign-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1sb_gather[_u64base]_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1sb))]
+pub unsafe fn svld1sb_gather_u64base_u64(pg: svbool_t, bases: svuint64_t) -> svuint64_t {
+    svld1sb_gather_u64base_offset_u64(pg, bases, 0)
+}
+#[doc = "Load 16-bit data and sign-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1sh_gather[_u64base]_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1sh))]
+pub unsafe fn svld1sh_gather_u64base_u64(pg: svbool_t, bases: svuint64_t) -> svuint64_t {
+    svld1sh_gather_u64base_offset_u64(pg, bases, 0)
+}
+#[doc = "Load 32-bit data and sign-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1sw_gather[_u64base]_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1sw))]
+pub unsafe fn svld1sw_gather_u64base_u64(pg: svbool_t, bases: svuint64_t) -> svuint64_t {
+    svld1sw_gather_u64base_offset_u64(pg, bases, 0)
+}
+#[doc = "Load 8-bit data and sign-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1sb_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1sb))]
+pub unsafe fn svld1sb_s16(pg: svbool_t, base: *const i8) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ld1.nxv8i8")]
+        fn _svld1sb_s16(pg: svbool8_t, base: *const i8) -> nxv8i8;
+    }
+    crate::intrinsics::simd::simd_cast(_svld1sb_s16(pg.sve_into(), base))
+}
+#[doc = "Load 8-bit data and sign-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1sb_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1sb))]
+pub unsafe fn svld1sb_s32(pg: svbool_t, base: *const i8) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ld1.nxv4i8")]
+        fn _svld1sb_s32(pg: svbool4_t, base: *const i8) -> nxv4i8;
+    }
+    crate::intrinsics::simd::simd_cast(_svld1sb_s32(pg.sve_into(), base))
+}
+#[doc = "Load 16-bit data and sign-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1sh_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1sh))]
+pub unsafe fn svld1sh_s32(pg: svbool_t, base: *const i16) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ld1.nxv4i16")]
+        fn _svld1sh_s32(pg: svbool4_t, base: *const i16) -> nxv4i16;
+    }
+    crate::intrinsics::simd::simd_cast(_svld1sh_s32(pg.sve_into(), base))
+}
+#[doc = "Load 8-bit data and sign-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1sb_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1sb))]
+pub unsafe fn svld1sb_s64(pg: svbool_t, base: *const i8) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ld1.nxv2i8")]
+        fn _svld1sb_s64(pg: svbool2_t, base: *const i8) -> nxv2i8;
+    }
+    crate::intrinsics::simd::simd_cast(_svld1sb_s64(pg.sve_into(), base))
+}
+#[doc = "Load 16-bit data and sign-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1sh_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1sh))]
+pub unsafe fn svld1sh_s64(pg: svbool_t, base: *const i16) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ld1.nxv2i16")]
+        fn _svld1sh_s64(pg: svbool2_t, base: *const i16) -> nxv2i16;
+    }
+    crate::intrinsics::simd::simd_cast(_svld1sh_s64(pg.sve_into(), base))
+}
+#[doc = "Load 32-bit data and sign-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1sw_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1sw))]
+pub unsafe fn svld1sw_s64(pg: svbool_t, base: *const i32) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ld1.nxv2i32")]
+        fn _svld1sw_s64(pg: svbool2_t, base: *const i32) -> nxv2i32;
+    }
+    crate::intrinsics::simd::simd_cast(_svld1sw_s64(pg.sve_into(), base))
+}
+#[doc = "Load 8-bit data and sign-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1sb_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1sb))]
+pub unsafe fn svld1sb_u16(pg: svbool_t, base: *const i8) -> svuint16_t {
+    svld1sb_s16(pg, base).as_unsigned()
+}
+#[doc = "Load 8-bit data and sign-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1sb_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1sb))]
+pub unsafe fn svld1sb_u32(pg: svbool_t, base: *const i8) -> svuint32_t {
+    svld1sb_s32(pg, base).as_unsigned()
+}
+#[doc = "Load 16-bit data and sign-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1sh_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1sh))]
+pub unsafe fn svld1sh_u32(pg: svbool_t, base: *const i16) -> svuint32_t {
+    svld1sh_s32(pg, base).as_unsigned()
+}
+#[doc = "Load 8-bit data and sign-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1sb_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1sb))]
+pub unsafe fn svld1sb_u64(pg: svbool_t, base: *const i8) -> svuint64_t {
+    svld1sb_s64(pg, base).as_unsigned()
+}
+#[doc = "Load 16-bit data and sign-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1sh_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1sh))]
+pub unsafe fn svld1sh_u64(pg: svbool_t, base: *const i16) -> svuint64_t {
+    svld1sh_s64(pg, base).as_unsigned()
+}
+#[doc = "Load 32-bit data and sign-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1sw_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1sw))]
+pub unsafe fn svld1sw_u64(pg: svbool_t, base: *const i32) -> svuint64_t {
+    svld1sw_s64(pg, base).as_unsigned()
+}
+#[doc = "Load 8-bit data and sign-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1sb_vnum_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1sb))]
+pub unsafe fn svld1sb_vnum_s16(pg: svbool_t, base: *const i8, vnum: i64) -> svint16_t {
+    svld1sb_s16(pg, base.offset(svcnth() as isize * vnum as isize))
+}
+#[doc = "Load 8-bit data and sign-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1sb_vnum_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1sb))]
+pub unsafe fn svld1sb_vnum_s32(pg: svbool_t, base: *const i8, vnum: i64) -> svint32_t {
+    svld1sb_s32(pg, base.offset(svcntw() as isize * vnum as isize))
+}
+#[doc = "Load 16-bit data and sign-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1sh_vnum_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1sh))]
+pub unsafe fn svld1sh_vnum_s32(pg: svbool_t, base: *const i16, vnum: i64) -> svint32_t {
+    svld1sh_s32(pg, base.offset(svcntw() as isize * vnum as isize))
+}
+#[doc = "Load 8-bit data and sign-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1sb_vnum_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1sb))]
+pub unsafe fn svld1sb_vnum_s64(pg: svbool_t, base: *const i8, vnum: i64) -> svint64_t {
+    svld1sb_s64(pg, base.offset(svcntd() as isize * vnum as isize))
+}
+#[doc = "Load 16-bit data and sign-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1sh_vnum_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1sh))]
+pub unsafe fn svld1sh_vnum_s64(pg: svbool_t, base: *const i16, vnum: i64) -> svint64_t {
+    svld1sh_s64(pg, base.offset(svcntd() as isize * vnum as isize))
+}
+#[doc = "Load 32-bit data and sign-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1sw_vnum_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1sw))]
+pub unsafe fn svld1sw_vnum_s64(pg: svbool_t, base: *const i32, vnum: i64) -> svint64_t {
+    svld1sw_s64(pg, base.offset(svcntd() as isize * vnum as isize))
+}
+#[doc = "Load 8-bit data and sign-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1sb_vnum_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1sb))]
+pub unsafe fn svld1sb_vnum_u16(pg: svbool_t, base: *const i8, vnum: i64) -> svuint16_t {
+    svld1sb_u16(pg, base.offset(svcnth() as isize * vnum as isize))
+}
+#[doc = "Load 8-bit data and sign-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1sb_vnum_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1sb))]
+pub unsafe fn svld1sb_vnum_u32(pg: svbool_t, base: *const i8, vnum: i64) -> svuint32_t {
+    svld1sb_u32(pg, base.offset(svcntw() as isize * vnum as isize))
+}
+#[doc = "Load 16-bit data and sign-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1sh_vnum_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1sh))]
+pub unsafe fn svld1sh_vnum_u32(pg: svbool_t, base: *const i16, vnum: i64) -> svuint32_t {
+    svld1sh_u32(pg, base.offset(svcntw() as isize * vnum as isize))
+}
+#[doc = "Load 8-bit data and sign-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1sb_vnum_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1sb))]
+pub unsafe fn svld1sb_vnum_u64(pg: svbool_t, base: *const i8, vnum: i64) -> svuint64_t {
+    svld1sb_u64(pg, base.offset(svcntd() as isize * vnum as isize))
+}
+#[doc = "Load 16-bit data and sign-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1sh_vnum_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1sh))]
+pub unsafe fn svld1sh_vnum_u64(pg: svbool_t, base: *const i16, vnum: i64) -> svuint64_t {
+    svld1sh_u64(pg, base.offset(svcntd() as isize * vnum as isize))
+}
+#[doc = "Load 32-bit data and sign-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1sw_vnum_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1sw))]
+pub unsafe fn svld1sw_vnum_u64(pg: svbool_t, base: *const i32, vnum: i64) -> svuint64_t {
+    svld1sw_u64(pg, base.offset(svcntd() as isize * vnum as isize))
+}
+#[doc = "Load 16-bit data and sign-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1sh_gather_[s32]index_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1sh))]
+pub unsafe fn svld1sh_gather_s32index_s32(
+    pg: svbool_t,
+    base: *const i16,
+    indices: svint32_t,
+) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4i16"
+        )]
+        fn _svld1sh_gather_s32index_s32(
+            pg: svbool4_t,
+            base: *const i16,
+            indices: svint32_t,
+        ) -> nxv4i16;
+    }
+    crate::intrinsics::simd::simd_cast(_svld1sh_gather_s32index_s32(pg.sve_into(), base, indices))
+}
+#[doc = "Load 16-bit data and sign-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1sh_gather_[s32]index_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1sh))]
+pub unsafe fn svld1sh_gather_s32index_u32(
+    pg: svbool_t,
+    base: *const i16,
+    indices: svint32_t,
+) -> svuint32_t {
+    svld1sh_gather_s32index_s32(pg, base, indices).as_unsigned()
+}
+#[doc = "Load 16-bit data and sign-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1sh_gather_[s64]index_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1sh))]
+pub unsafe fn svld1sh_gather_s64index_s64(
+    pg: svbool_t,
+    base: *const i16,
+    indices: svint64_t,
+) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ld1.gather.index.nxv2i16"
+        )]
+        fn _svld1sh_gather_s64index_s64(
+            pg: svbool2_t,
+            base: *const i16,
+            indices: svint64_t,
+        ) -> nxv2i16;
+    }
+    crate::intrinsics::simd::simd_cast(_svld1sh_gather_s64index_s64(pg.sve_into(), base, indices))
+}
+#[doc = "Load 32-bit data and sign-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1sw_gather_[s64]index_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1sw))]
+pub unsafe fn svld1sw_gather_s64index_s64(
+    pg: svbool_t,
+    base: *const i32,
+    indices: svint64_t,
+) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ld1.gather.index.nxv2i32"
+        )]
+        fn _svld1sw_gather_s64index_s64(
+            pg: svbool2_t,
+            base: *const i32,
+            indices: svint64_t,
+        ) -> nxv2i32;
+    }
+    crate::intrinsics::simd::simd_cast(_svld1sw_gather_s64index_s64(pg.sve_into(), base, indices))
+}
+#[doc = "Load 16-bit data and sign-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1sh_gather_[s64]index_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1sh))]
+pub unsafe fn svld1sh_gather_s64index_u64(
+    pg: svbool_t,
+    base: *const i16,
+    indices: svint64_t,
+) -> svuint64_t {
+    svld1sh_gather_s64index_s64(pg, base, indices).as_unsigned()
+}
+#[doc = "Load 32-bit data and sign-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1sw_gather_[s64]index_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1sw))]
+pub unsafe fn svld1sw_gather_s64index_u64(
+    pg: svbool_t,
+    base: *const i32,
+    indices: svint64_t,
+) -> svuint64_t {
+    svld1sw_gather_s64index_s64(pg, base, indices).as_unsigned()
+}
+#[doc = "Load 16-bit data and sign-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1sh_gather_[u32]index_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1sh))]
+pub unsafe fn svld1sh_gather_u32index_s32(
+    pg: svbool_t,
+    base: *const i16,
+    indices: svuint32_t,
+) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4i16"
+        )]
+        fn _svld1sh_gather_u32index_s32(
+            pg: svbool4_t,
+            base: *const i16,
+            indices: svint32_t,
+        ) -> nxv4i16;
+    }
+    crate::intrinsics::simd::simd_cast(_svld1sh_gather_u32index_s32(
+        pg.sve_into(),
+        base,
+        indices.as_signed(),
+    ))
+}
+#[doc = "Load 16-bit data and sign-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1sh_gather_[u32]index_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1sh))]
+pub unsafe fn svld1sh_gather_u32index_u32(
+    pg: svbool_t,
+    base: *const i16,
+    indices: svuint32_t,
+) -> svuint32_t {
+    svld1sh_gather_u32index_s32(pg, base, indices).as_unsigned()
+}
+#[doc = "Load 16-bit data and sign-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1sh_gather_[u64]index_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1sh))]
+pub unsafe fn svld1sh_gather_u64index_s64(
+    pg: svbool_t,
+    base: *const i16,
+    indices: svuint64_t,
+) -> svint64_t {
+    svld1sh_gather_s64index_s64(pg, base, indices.as_signed())
+}
+#[doc = "Load 32-bit data and sign-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1sw_gather_[u64]index_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1sw))]
+pub unsafe fn svld1sw_gather_u64index_s64(
+    pg: svbool_t,
+    base: *const i32,
+    indices: svuint64_t,
+) -> svint64_t {
+    svld1sw_gather_s64index_s64(pg, base, indices.as_signed())
+}
+#[doc = "Load 16-bit data and sign-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1sh_gather_[u64]index_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1sh))]
+pub unsafe fn svld1sh_gather_u64index_u64(
+    pg: svbool_t,
+    base: *const i16,
+    indices: svuint64_t,
+) -> svuint64_t {
+    svld1sh_gather_s64index_s64(pg, base, indices.as_signed()).as_unsigned()
+}
+#[doc = "Load 32-bit data and sign-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1sw_gather_[u64]index_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1sw))]
+pub unsafe fn svld1sw_gather_u64index_u64(
+    pg: svbool_t,
+    base: *const i32,
+    indices: svuint64_t,
+) -> svuint64_t {
+    svld1sw_gather_s64index_s64(pg, base, indices.as_signed()).as_unsigned()
+}
+#[doc = "Load 16-bit data and sign-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1sh_gather[_u32base]_index_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1sh))]
+pub unsafe fn svld1sh_gather_u32base_index_s32(
+    pg: svbool_t,
+    bases: svuint32_t,
+    index: i64,
+) -> svint32_t {
+    svld1sh_gather_u32base_offset_s32(pg, bases, index.unchecked_shl(1))
+}
+#[doc = "Load 16-bit data and sign-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1sh_gather[_u32base]_index_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1sh))]
+pub unsafe fn svld1sh_gather_u32base_index_u32(
+    pg: svbool_t,
+    bases: svuint32_t,
+    index: i64,
+) -> svuint32_t {
+    svld1sh_gather_u32base_offset_u32(pg, bases, index.unchecked_shl(1))
+}
+#[doc = "Load 16-bit data and sign-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1sh_gather[_u64base]_index_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1sh))]
+pub unsafe fn svld1sh_gather_u64base_index_s64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    index: i64,
+) -> svint64_t {
+    svld1sh_gather_u64base_offset_s64(pg, bases, index.unchecked_shl(1))
+}
+#[doc = "Load 32-bit data and sign-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1sw_gather[_u64base]_index_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1sw))]
+pub unsafe fn svld1sw_gather_u64base_index_s64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    index: i64,
+) -> svint64_t {
+    svld1sw_gather_u64base_offset_s64(pg, bases, index.unchecked_shl(2))
+}
+#[doc = "Load 16-bit data and sign-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1sh_gather[_u64base]_index_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1sh))]
+pub unsafe fn svld1sh_gather_u64base_index_u64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    index: i64,
+) -> svuint64_t {
+    svld1sh_gather_u64base_offset_u64(pg, bases, index.unchecked_shl(1))
+}
+#[doc = "Load 32-bit data and sign-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1sw_gather[_u64base]_index_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1sw))]
+pub unsafe fn svld1sw_gather_u64base_index_u64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    index: i64,
+) -> svuint64_t {
+    svld1sw_gather_u64base_offset_u64(pg, bases, index.unchecked_shl(2))
+}
+#[doc = "Load 8-bit data and zero-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1ub_gather_[s32]offset_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1b))]
+pub unsafe fn svld1ub_gather_s32offset_s32(
+    pg: svbool_t,
+    base: *const u8,
+    offsets: svint32_t,
+) -> svint32_t {
+    svld1ub_gather_s32offset_u32(pg, base, offsets).as_signed()
+}
+#[doc = "Load 16-bit data and zero-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1uh_gather_[s32]offset_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1h))]
+pub unsafe fn svld1uh_gather_s32offset_s32(
+    pg: svbool_t,
+    base: *const u16,
+    offsets: svint32_t,
+) -> svint32_t {
+    svld1uh_gather_s32offset_u32(pg, base, offsets).as_signed()
+}
+#[doc = "Load 8-bit data and zero-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1ub_gather_[s32]offset_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1b))]
+pub unsafe fn svld1ub_gather_s32offset_u32(
+    pg: svbool_t,
+    base: *const u8,
+    offsets: svint32_t,
+) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ld1.gather.sxtw.nxv4i8"
+        )]
+        fn _svld1ub_gather_s32offset_u32(
+            pg: svbool4_t,
+            base: *const i8,
+            offsets: svint32_t,
+        ) -> nxv4i8;
+    }
+    crate::intrinsics::simd::simd_cast::<nxv4u8, _>(
+        _svld1ub_gather_s32offset_u32(pg.sve_into(), base.as_signed(), offsets).as_unsigned(),
+    )
+}
+#[doc = "Load 16-bit data and zero-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1uh_gather_[s32]offset_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1h))]
+pub unsafe fn svld1uh_gather_s32offset_u32(
+    pg: svbool_t,
+    base: *const u16,
+    offsets: svint32_t,
+) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ld1.gather.sxtw.nxv4i16"
+        )]
+        fn _svld1uh_gather_s32offset_u32(
+            pg: svbool4_t,
+            base: *const i16,
+            offsets: svint32_t,
+        ) -> nxv4i16;
+    }
+    crate::intrinsics::simd::simd_cast::<nxv4u16, _>(
+        _svld1uh_gather_s32offset_u32(pg.sve_into(), base.as_signed(), offsets).as_unsigned(),
+    )
+}
+#[doc = "Load 8-bit data and zero-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1ub_gather_[s64]offset_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1b))]
+pub unsafe fn svld1ub_gather_s64offset_s64(
+    pg: svbool_t,
+    base: *const u8,
+    offsets: svint64_t,
+) -> svint64_t {
+    svld1ub_gather_s64offset_u64(pg, base, offsets).as_signed()
+}
+#[doc = "Load 16-bit data and zero-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1uh_gather_[s64]offset_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1h))]
+pub unsafe fn svld1uh_gather_s64offset_s64(
+    pg: svbool_t,
+    base: *const u16,
+    offsets: svint64_t,
+) -> svint64_t {
+    svld1uh_gather_s64offset_u64(pg, base, offsets).as_signed()
+}
+#[doc = "Load 32-bit data and zero-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1uw_gather_[s64]offset_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1w))]
+pub unsafe fn svld1uw_gather_s64offset_s64(
+    pg: svbool_t,
+    base: *const u32,
+    offsets: svint64_t,
+) -> svint64_t {
+    svld1uw_gather_s64offset_u64(pg, base, offsets).as_signed()
+}
+#[doc = "Load 8-bit data and zero-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1ub_gather_[s64]offset_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1b))]
+pub unsafe fn svld1ub_gather_s64offset_u64(
+    pg: svbool_t,
+    base: *const u8,
+    offsets: svint64_t,
+) -> svuint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ld1.gather.nxv2i8"
+        )]
+        fn _svld1ub_gather_s64offset_u64(
+            pg: svbool2_t,
+            base: *const i8,
+            offsets: svint64_t,
+        ) -> nxv2i8;
+    }
+    crate::intrinsics::simd::simd_cast::<nxv2u8, _>(
+        _svld1ub_gather_s64offset_u64(pg.sve_into(), base.as_signed(), offsets).as_unsigned(),
+    )
+}
+#[doc = "Load 16-bit data and zero-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1uh_gather_[s64]offset_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1h))]
+pub unsafe fn svld1uh_gather_s64offset_u64(
+    pg: svbool_t,
+    base: *const u16,
+    offsets: svint64_t,
+) -> svuint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ld1.gather.nxv2i16"
+        )]
+        fn _svld1uh_gather_s64offset_u64(
+            pg: svbool2_t,
+            base: *const i16,
+            offsets: svint64_t,
+        ) -> nxv2i16;
+    }
+    crate::intrinsics::simd::simd_cast::<nxv2u16, _>(
+        _svld1uh_gather_s64offset_u64(pg.sve_into(), base.as_signed(), offsets).as_unsigned(),
+    )
+}
+#[doc = "Load 32-bit data and zero-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1uw_gather_[s64]offset_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1w))]
+pub unsafe fn svld1uw_gather_s64offset_u64(
+    pg: svbool_t,
+    base: *const u32,
+    offsets: svint64_t,
+) -> svuint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ld1.gather.nxv2i32"
+        )]
+        fn _svld1uw_gather_s64offset_u64(
+            pg: svbool2_t,
+            base: *const i32,
+            offsets: svint64_t,
+        ) -> nxv2i32;
+    }
+    crate::intrinsics::simd::simd_cast::<nxv2u32, _>(
+        _svld1uw_gather_s64offset_u64(pg.sve_into(), base.as_signed(), offsets).as_unsigned(),
+    )
+}
+#[doc = "Load 8-bit data and zero-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1ub_gather_[u32]offset_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1b))]
+pub unsafe fn svld1ub_gather_u32offset_s32(
+    pg: svbool_t,
+    base: *const u8,
+    offsets: svuint32_t,
+) -> svint32_t {
+    svld1ub_gather_u32offset_u32(pg, base, offsets).as_signed()
+}
+#[doc = "Load 16-bit data and zero-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1uh_gather_[u32]offset_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1h))]
+pub unsafe fn svld1uh_gather_u32offset_s32(
+    pg: svbool_t,
+    base: *const u16,
+    offsets: svuint32_t,
+) -> svint32_t {
+    svld1uh_gather_u32offset_u32(pg, base, offsets).as_signed()
+}
+#[doc = "Load 8-bit data and zero-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1ub_gather_[u32]offset_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1b))]
+pub unsafe fn svld1ub_gather_u32offset_u32(
+    pg: svbool_t,
+    base: *const u8,
+    offsets: svuint32_t,
+) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ld1.gather.uxtw.nxv4i8"
+        )]
+        fn _svld1ub_gather_u32offset_u32(
+            pg: svbool4_t,
+            base: *const i8,
+            offsets: svint32_t,
+        ) -> nxv4i8;
+    }
+    crate::intrinsics::simd::simd_cast::<nxv4u8, _>(
+        _svld1ub_gather_u32offset_u32(pg.sve_into(), base.as_signed(), offsets.as_signed())
+            .as_unsigned(),
+    )
+}
+#[doc = "Load 16-bit data and zero-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1uh_gather_[u32]offset_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1h))]
+pub unsafe fn svld1uh_gather_u32offset_u32(
+    pg: svbool_t,
+    base: *const u16,
+    offsets: svuint32_t,
+) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ld1.gather.uxtw.nxv4i16"
+        )]
+        fn _svld1uh_gather_u32offset_u32(
+            pg: svbool4_t,
+            base: *const i16,
+            offsets: svint32_t,
+        ) -> nxv4i16;
+    }
+    crate::intrinsics::simd::simd_cast::<nxv4u16, _>(
+        _svld1uh_gather_u32offset_u32(pg.sve_into(), base.as_signed(), offsets.as_signed())
+            .as_unsigned(),
+    )
+}
+#[doc = "Load 8-bit data and zero-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1ub_gather_[u64]offset_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1b))]
+pub unsafe fn svld1ub_gather_u64offset_s64(
+    pg: svbool_t,
+    base: *const u8,
+    offsets: svuint64_t,
+) -> svint64_t {
+    svld1ub_gather_s64offset_u64(pg, base, offsets.as_signed()).as_signed()
+}
+#[doc = "Load 16-bit data and zero-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1uh_gather_[u64]offset_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1h))]
+pub unsafe fn svld1uh_gather_u64offset_s64(
+    pg: svbool_t,
+    base: *const u16,
+    offsets: svuint64_t,
+) -> svint64_t {
+    svld1uh_gather_s64offset_u64(pg, base, offsets.as_signed()).as_signed()
+}
+#[doc = "Load 32-bit data and zero-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1uw_gather_[u64]offset_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1w))]
+pub unsafe fn svld1uw_gather_u64offset_s64(
+    pg: svbool_t,
+    base: *const u32,
+    offsets: svuint64_t,
+) -> svint64_t {
+    svld1uw_gather_s64offset_u64(pg, base, offsets.as_signed()).as_signed()
+}
+#[doc = "Load 8-bit data and zero-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1ub_gather_[u64]offset_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1b))]
+pub unsafe fn svld1ub_gather_u64offset_u64(
+    pg: svbool_t,
+    base: *const u8,
+    offsets: svuint64_t,
+) -> svuint64_t {
+    svld1ub_gather_s64offset_u64(pg, base, offsets.as_signed())
+}
+#[doc = "Load 16-bit data and zero-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1uh_gather_[u64]offset_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1h))]
+pub unsafe fn svld1uh_gather_u64offset_u64(
+    pg: svbool_t,
+    base: *const u16,
+    offsets: svuint64_t,
+) -> svuint64_t {
+    svld1uh_gather_s64offset_u64(pg, base, offsets.as_signed())
+}
+#[doc = "Load 32-bit data and zero-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1uw_gather_[u64]offset_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1w))]
+pub unsafe fn svld1uw_gather_u64offset_u64(
+    pg: svbool_t,
+    base: *const u32,
+    offsets: svuint64_t,
+) -> svuint64_t {
+    svld1uw_gather_s64offset_u64(pg, base, offsets.as_signed())
+}
+#[doc = "Load 8-bit data and zero-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1ub_gather[_u32base]_offset_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1b))]
+pub unsafe fn svld1ub_gather_u32base_offset_s32(
+    pg: svbool_t,
+    bases: svuint32_t,
+    offset: i64,
+) -> svint32_t {
+    svld1ub_gather_u32base_offset_u32(pg, bases, offset).as_signed()
+}
+#[doc = "Load 16-bit data and zero-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1uh_gather[_u32base]_offset_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1h))]
+pub unsafe fn svld1uh_gather_u32base_offset_s32(
+    pg: svbool_t,
+    bases: svuint32_t,
+    offset: i64,
+) -> svint32_t {
+    svld1uh_gather_u32base_offset_u32(pg, bases, offset).as_signed()
+}
+#[doc = "Load 8-bit data and zero-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1ub_gather[_u32base]_offset_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1b))]
+pub unsafe fn svld1ub_gather_u32base_offset_u32(
+    pg: svbool_t,
+    bases: svuint32_t,
+    offset: i64,
+) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i8.nxv4i32"
+        )]
+        fn _svld1ub_gather_u32base_offset_u32(
+            pg: svbool4_t,
+            bases: svint32_t,
+            offset: i64,
+        ) -> nxv4i8;
+    }
+    crate::intrinsics::simd::simd_cast::<nxv4u8, _>(
+        _svld1ub_gather_u32base_offset_u32(pg.sve_into(), bases.as_signed(), offset).as_unsigned(),
+    )
+}
+#[doc = "Load 16-bit data and zero-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1uh_gather[_u32base]_offset_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1h))]
+pub unsafe fn svld1uh_gather_u32base_offset_u32(
+    pg: svbool_t,
+    bases: svuint32_t,
+    offset: i64,
+) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i16.nxv4i32"
+        )]
+        fn _svld1uh_gather_u32base_offset_u32(
+            pg: svbool4_t,
+            bases: svint32_t,
+            offset: i64,
+        ) -> nxv4i16;
+    }
+    crate::intrinsics::simd::simd_cast::<nxv4u16, _>(
+        _svld1uh_gather_u32base_offset_u32(pg.sve_into(), bases.as_signed(), offset).as_unsigned(),
+    )
+}
+#[doc = "Load 8-bit data and zero-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1ub_gather[_u64base]_offset_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1b))]
+pub unsafe fn svld1ub_gather_u64base_offset_s64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    offset: i64,
+) -> svint64_t {
+    svld1ub_gather_u64base_offset_u64(pg, bases, offset).as_signed()
+}
+#[doc = "Load 16-bit data and zero-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1uh_gather[_u64base]_offset_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1h))]
+pub unsafe fn svld1uh_gather_u64base_offset_s64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    offset: i64,
+) -> svint64_t {
+    svld1uh_gather_u64base_offset_u64(pg, bases, offset).as_signed()
+}
+#[doc = "Load 32-bit data and zero-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1uw_gather[_u64base]_offset_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1w))]
+pub unsafe fn svld1uw_gather_u64base_offset_s64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    offset: i64,
+) -> svint64_t {
+    svld1uw_gather_u64base_offset_u64(pg, bases, offset).as_signed()
+}
+#[doc = "Load 8-bit data and zero-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1ub_gather[_u64base]_offset_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1b))]
+pub unsafe fn svld1ub_gather_u64base_offset_u64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    offset: i64,
+) -> svuint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i8.nxv2i64"
+        )]
+        fn _svld1ub_gather_u64base_offset_u64(
+            pg: svbool2_t,
+            bases: svint64_t,
+            offset: i64,
+        ) -> nxv2i8;
+    }
+    crate::intrinsics::simd::simd_cast::<nxv2u8, _>(
+        _svld1ub_gather_u64base_offset_u64(pg.sve_into(), bases.as_signed(), offset).as_unsigned(),
+    )
+}
+#[doc = "Load 16-bit data and zero-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1uh_gather[_u64base]_offset_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1h))]
+pub unsafe fn svld1uh_gather_u64base_offset_u64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    offset: i64,
+) -> svuint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i16.nxv2i64"
+        )]
+        fn _svld1uh_gather_u64base_offset_u64(
+            pg: svbool2_t,
+            bases: svint64_t,
+            offset: i64,
+        ) -> nxv2i16;
+    }
+    crate::intrinsics::simd::simd_cast::<nxv2u16, _>(
+        _svld1uh_gather_u64base_offset_u64(pg.sve_into(), bases.as_signed(), offset).as_unsigned(),
+    )
+}
+#[doc = "Load 32-bit data and zero-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1uw_gather[_u64base]_offset_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1w))]
+pub unsafe fn svld1uw_gather_u64base_offset_u64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    offset: i64,
+) -> svuint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i32.nxv2i64"
+        )]
+        fn _svld1uw_gather_u64base_offset_u64(
+            pg: svbool2_t,
+            bases: svint64_t,
+            offset: i64,
+        ) -> nxv2i32;
+    }
+    crate::intrinsics::simd::simd_cast::<nxv2u32, _>(
+        _svld1uw_gather_u64base_offset_u64(pg.sve_into(), bases.as_signed(), offset).as_unsigned(),
+    )
+}
+#[doc = "Load 8-bit data and zero-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1ub_gather[_u32base]_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1b))]
+pub unsafe fn svld1ub_gather_u32base_s32(pg: svbool_t, bases: svuint32_t) -> svint32_t {
+    svld1ub_gather_u32base_offset_s32(pg, bases, 0)
+}
+#[doc = "Load 16-bit data and zero-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1uh_gather[_u32base]_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1h))]
+pub unsafe fn svld1uh_gather_u32base_s32(pg: svbool_t, bases: svuint32_t) -> svint32_t {
+    svld1uh_gather_u32base_offset_s32(pg, bases, 0)
+}
+#[doc = "Load 8-bit data and zero-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1ub_gather[_u32base]_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1b))]
+pub unsafe fn svld1ub_gather_u32base_u32(pg: svbool_t, bases: svuint32_t) -> svuint32_t {
+    svld1ub_gather_u32base_offset_u32(pg, bases, 0)
+}
+#[doc = "Load 16-bit data and zero-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1uh_gather[_u32base]_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1h))]
+pub unsafe fn svld1uh_gather_u32base_u32(pg: svbool_t, bases: svuint32_t) -> svuint32_t {
+    svld1uh_gather_u32base_offset_u32(pg, bases, 0)
+}
+#[doc = "Load 8-bit data and zero-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1ub_gather[_u64base]_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1b))]
+pub unsafe fn svld1ub_gather_u64base_s64(pg: svbool_t, bases: svuint64_t) -> svint64_t {
+    svld1ub_gather_u64base_offset_s64(pg, bases, 0)
+}
+#[doc = "Load 16-bit data and zero-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1uh_gather[_u64base]_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1h))]
+pub unsafe fn svld1uh_gather_u64base_s64(pg: svbool_t, bases: svuint64_t) -> svint64_t {
+    svld1uh_gather_u64base_offset_s64(pg, bases, 0)
+}
+#[doc = "Load 32-bit data and zero-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1uw_gather[_u64base]_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1w))]
+pub unsafe fn svld1uw_gather_u64base_s64(pg: svbool_t, bases: svuint64_t) -> svint64_t {
+    svld1uw_gather_u64base_offset_s64(pg, bases, 0)
+}
+#[doc = "Load 8-bit data and zero-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1ub_gather[_u64base]_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1b))]
+pub unsafe fn svld1ub_gather_u64base_u64(pg: svbool_t, bases: svuint64_t) -> svuint64_t {
+    svld1ub_gather_u64base_offset_u64(pg, bases, 0)
+}
+#[doc = "Load 16-bit data and zero-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1uh_gather[_u64base]_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1h))]
+pub unsafe fn svld1uh_gather_u64base_u64(pg: svbool_t, bases: svuint64_t) -> svuint64_t {
+    svld1uh_gather_u64base_offset_u64(pg, bases, 0)
+}
+#[doc = "Load 32-bit data and zero-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1uw_gather[_u64base]_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1w))]
+pub unsafe fn svld1uw_gather_u64base_u64(pg: svbool_t, bases: svuint64_t) -> svuint64_t {
+    svld1uw_gather_u64base_offset_u64(pg, bases, 0)
+}
+#[doc = "Load 8-bit data and zero-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1ub_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1b))]
+pub unsafe fn svld1ub_s16(pg: svbool_t, base: *const u8) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ld1.nxv8i8")]
+        fn _svld1ub_s16(pg: svbool8_t, base: *const i8) -> nxv8i8;
+    }
+    crate::intrinsics::simd::simd_cast::<nxv8u8, _>(
+        _svld1ub_s16(pg.sve_into(), base.as_signed()).as_unsigned(),
+    )
+}
+#[doc = "Load 8-bit data and zero-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1ub_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1b))]
+pub unsafe fn svld1ub_s32(pg: svbool_t, base: *const u8) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ld1.nxv4i8")]
+        fn _svld1ub_s32(pg: svbool4_t, base: *const i8) -> nxv4i8;
+    }
+    crate::intrinsics::simd::simd_cast::<nxv4u8, _>(
+        _svld1ub_s32(pg.sve_into(), base.as_signed()).as_unsigned(),
+    )
+}
+#[doc = "Load 16-bit data and zero-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1uh_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1h))]
+pub unsafe fn svld1uh_s32(pg: svbool_t, base: *const u16) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ld1.nxv4i16")]
+        fn _svld1uh_s32(pg: svbool4_t, base: *const i16) -> nxv4i16;
+    }
+    crate::intrinsics::simd::simd_cast::<nxv4u16, _>(
+        _svld1uh_s32(pg.sve_into(), base.as_signed()).as_unsigned(),
+    )
+}
+#[doc = "Load 8-bit data and zero-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1ub_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1b))]
+pub unsafe fn svld1ub_s64(pg: svbool_t, base: *const u8) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ld1.nxv2i8")]
+        fn _svld1ub_s64(pg: svbool2_t, base: *const i8) -> nxv2i8;
+    }
+    crate::intrinsics::simd::simd_cast::<nxv2u8, _>(
+        _svld1ub_s64(pg.sve_into(), base.as_signed()).as_unsigned(),
+    )
+}
+#[doc = "Load 16-bit data and zero-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1uh_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1h))]
+pub unsafe fn svld1uh_s64(pg: svbool_t, base: *const u16) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ld1.nxv2i16")]
+        fn _svld1uh_s64(pg: svbool2_t, base: *const i16) -> nxv2i16;
+    }
+    crate::intrinsics::simd::simd_cast::<nxv2u16, _>(
+        _svld1uh_s64(pg.sve_into(), base.as_signed()).as_unsigned(),
+    )
+}
+#[doc = "Load 32-bit data and zero-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1uw_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1w))]
+pub unsafe fn svld1uw_s64(pg: svbool_t, base: *const u32) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ld1.nxv2i32")]
+        fn _svld1uw_s64(pg: svbool2_t, base: *const i32) -> nxv2i32;
+    }
+    crate::intrinsics::simd::simd_cast::<nxv2u32, _>(
+        _svld1uw_s64(pg.sve_into(), base.as_signed()).as_unsigned(),
+    )
+}
+#[doc = "Load 8-bit data and zero-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1ub_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1b))]
+pub unsafe fn svld1ub_u16(pg: svbool_t, base: *const u8) -> svuint16_t {
+    svld1ub_s16(pg, base).as_unsigned()
+}
+#[doc = "Load 8-bit data and zero-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1ub_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1b))]
+pub unsafe fn svld1ub_u32(pg: svbool_t, base: *const u8) -> svuint32_t {
+    svld1ub_s32(pg, base).as_unsigned()
+}
+#[doc = "Load 16-bit data and zero-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1uh_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1h))]
+pub unsafe fn svld1uh_u32(pg: svbool_t, base: *const u16) -> svuint32_t {
+    svld1uh_s32(pg, base).as_unsigned()
+}
+#[doc = "Load 8-bit data and zero-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1ub_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1b))]
+pub unsafe fn svld1ub_u64(pg: svbool_t, base: *const u8) -> svuint64_t {
+    svld1ub_s64(pg, base).as_unsigned()
+}
+#[doc = "Load 16-bit data and zero-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1uh_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1h))]
+pub unsafe fn svld1uh_u64(pg: svbool_t, base: *const u16) -> svuint64_t {
+    svld1uh_s64(pg, base).as_unsigned()
+}
+#[doc = "Load 32-bit data and zero-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1uw_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1w))]
+pub unsafe fn svld1uw_u64(pg: svbool_t, base: *const u32) -> svuint64_t {
+    svld1uw_s64(pg, base).as_unsigned()
+}
+#[doc = "Load 8-bit data and zero-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1ub_vnum_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1b))]
+pub unsafe fn svld1ub_vnum_s16(pg: svbool_t, base: *const u8, vnum: i64) -> svint16_t {
+    svld1ub_s16(pg, base.offset(svcnth() as isize * vnum as isize))
+}
+#[doc = "Load 8-bit data and zero-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1ub_vnum_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1b))]
+pub unsafe fn svld1ub_vnum_s32(pg: svbool_t, base: *const u8, vnum: i64) -> svint32_t {
+    svld1ub_s32(pg, base.offset(svcntw() as isize * vnum as isize))
+}
+#[doc = "Load 16-bit data and zero-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1uh_vnum_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1h))]
+pub unsafe fn svld1uh_vnum_s32(pg: svbool_t, base: *const u16, vnum: i64) -> svint32_t {
+    svld1uh_s32(pg, base.offset(svcntw() as isize * vnum as isize))
+}
+#[doc = "Load 8-bit data and zero-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1ub_vnum_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1b))]
+pub unsafe fn svld1ub_vnum_s64(pg: svbool_t, base: *const u8, vnum: i64) -> svint64_t {
+    svld1ub_s64(pg, base.offset(svcntd() as isize * vnum as isize))
+}
+#[doc = "Load 16-bit data and zero-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1uh_vnum_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1h))]
+pub unsafe fn svld1uh_vnum_s64(pg: svbool_t, base: *const u16, vnum: i64) -> svint64_t {
+    svld1uh_s64(pg, base.offset(svcntd() as isize * vnum as isize))
+}
+#[doc = "Load 32-bit data and zero-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1uw_vnum_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1w))]
+pub unsafe fn svld1uw_vnum_s64(pg: svbool_t, base: *const u32, vnum: i64) -> svint64_t {
+    svld1uw_s64(pg, base.offset(svcntd() as isize * vnum as isize))
+}
+#[doc = "Load 8-bit data and zero-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1ub_vnum_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1b))]
+pub unsafe fn svld1ub_vnum_u16(pg: svbool_t, base: *const u8, vnum: i64) -> svuint16_t {
+    svld1ub_u16(pg, base.offset(svcnth() as isize * vnum as isize))
+}
+#[doc = "Load 8-bit data and zero-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1ub_vnum_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1b))]
+pub unsafe fn svld1ub_vnum_u32(pg: svbool_t, base: *const u8, vnum: i64) -> svuint32_t {
+    svld1ub_u32(pg, base.offset(svcntw() as isize * vnum as isize))
+}
+#[doc = "Load 16-bit data and zero-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1uh_vnum_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1h))]
+pub unsafe fn svld1uh_vnum_u32(pg: svbool_t, base: *const u16, vnum: i64) -> svuint32_t {
+    svld1uh_u32(pg, base.offset(svcntw() as isize * vnum as isize))
+}
+#[doc = "Load 8-bit data and zero-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1ub_vnum_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1b))]
+pub unsafe fn svld1ub_vnum_u64(pg: svbool_t, base: *const u8, vnum: i64) -> svuint64_t {
+    svld1ub_u64(pg, base.offset(svcntd() as isize * vnum as isize))
+}
+#[doc = "Load 16-bit data and zero-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1uh_vnum_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1h))]
+pub unsafe fn svld1uh_vnum_u64(pg: svbool_t, base: *const u16, vnum: i64) -> svuint64_t {
+    svld1uh_u64(pg, base.offset(svcntd() as isize * vnum as isize))
+}
+#[doc = "Load 32-bit data and zero-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1uw_vnum_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1w))]
+pub unsafe fn svld1uw_vnum_u64(pg: svbool_t, base: *const u32, vnum: i64) -> svuint64_t {
+    svld1uw_u64(pg, base.offset(svcntd() as isize * vnum as isize))
+}
+#[doc = "Load 16-bit data and zero-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1uh_gather_[s32]index_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1h))]
+pub unsafe fn svld1uh_gather_s32index_s32(
+    pg: svbool_t,
+    base: *const u16,
+    indices: svint32_t,
+) -> svint32_t {
+    svld1uh_gather_s32index_u32(pg, base, indices).as_signed()
+}
+#[doc = "Load 16-bit data and zero-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1uh_gather_[s32]index_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1h))]
+pub unsafe fn svld1uh_gather_s32index_u32(
+    pg: svbool_t,
+    base: *const u16,
+    indices: svint32_t,
+) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4i16"
+        )]
+        fn _svld1uh_gather_s32index_u32(
+            pg: svbool4_t,
+            base: *const i16,
+            indices: svint32_t,
+        ) -> nxv4i16;
+    }
+    crate::intrinsics::simd::simd_cast::<nxv4u16, _>(
+        _svld1uh_gather_s32index_u32(pg.sve_into(), base.as_signed(), indices).as_unsigned(),
+    )
+}
+#[doc = "Load 16-bit data and zero-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1uh_gather_[s64]index_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1h))]
+pub unsafe fn svld1uh_gather_s64index_s64(
+    pg: svbool_t,
+    base: *const u16,
+    indices: svint64_t,
+) -> svint64_t {
+    svld1uh_gather_s64index_u64(pg, base, indices).as_signed()
+}
+#[doc = "Load 32-bit data and zero-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1uw_gather_[s64]index_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1w))]
+pub unsafe fn svld1uw_gather_s64index_s64(
+    pg: svbool_t,
+    base: *const u32,
+    indices: svint64_t,
+) -> svint64_t {
+    svld1uw_gather_s64index_u64(pg, base, indices).as_signed()
+}
+#[doc = "Load 16-bit data and zero-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1uh_gather_[s64]index_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1h))]
+pub unsafe fn svld1uh_gather_s64index_u64(
+    pg: svbool_t,
+    base: *const u16,
+    indices: svint64_t,
+) -> svuint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ld1.gather.index.nxv2i16"
+        )]
+        fn _svld1uh_gather_s64index_u64(
+            pg: svbool2_t,
+            base: *const i16,
+            indices: svint64_t,
+        ) -> nxv2i16;
+    }
+    crate::intrinsics::simd::simd_cast::<nxv2u16, _>(
+        _svld1uh_gather_s64index_u64(pg.sve_into(), base.as_signed(), indices).as_unsigned(),
+    )
+}
+#[doc = "Load 32-bit data and zero-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1uw_gather_[s64]index_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1w))]
+pub unsafe fn svld1uw_gather_s64index_u64(
+    pg: svbool_t,
+    base: *const u32,
+    indices: svint64_t,
+) -> svuint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ld1.gather.index.nxv2i32"
+        )]
+        fn _svld1uw_gather_s64index_u64(
+            pg: svbool2_t,
+            base: *const i32,
+            indices: svint64_t,
+        ) -> nxv2i32;
+    }
+    crate::intrinsics::simd::simd_cast::<nxv2u32, _>(
+        _svld1uw_gather_s64index_u64(pg.sve_into(), base.as_signed(), indices).as_unsigned(),
+    )
+}
+#[doc = "Load 16-bit data and zero-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1uh_gather_[u32]index_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1h))]
+pub unsafe fn svld1uh_gather_u32index_s32(
+    pg: svbool_t,
+    base: *const u16,
+    indices: svuint32_t,
+) -> svint32_t {
+    svld1uh_gather_u32index_u32(pg, base, indices).as_signed()
+}
+#[doc = "Load 16-bit data and zero-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1uh_gather_[u32]index_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1h))]
+pub unsafe fn svld1uh_gather_u32index_u32(
+    pg: svbool_t,
+    base: *const u16,
+    indices: svuint32_t,
+) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4i16"
+        )]
+        fn _svld1uh_gather_u32index_u32(
+            pg: svbool4_t,
+            base: *const i16,
+            indices: svint32_t,
+        ) -> nxv4i16;
+    }
+    crate::intrinsics::simd::simd_cast::<nxv4u16, _>(
+        _svld1uh_gather_u32index_u32(pg.sve_into(), base.as_signed(), indices.as_signed())
+            .as_unsigned(),
+    )
+}
+#[doc = "Load 16-bit data and zero-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1uh_gather_[u64]index_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1h))]
+pub unsafe fn svld1uh_gather_u64index_s64(
+    pg: svbool_t,
+    base: *const u16,
+    indices: svuint64_t,
+) -> svint64_t {
+    svld1uh_gather_s64index_u64(pg, base, indices.as_signed()).as_signed()
+}
+#[doc = "Load 32-bit data and zero-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1uw_gather_[u64]index_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1w))]
+pub unsafe fn svld1uw_gather_u64index_s64(
+    pg: svbool_t,
+    base: *const u32,
+    indices: svuint64_t,
+) -> svint64_t {
+    svld1uw_gather_s64index_u64(pg, base, indices.as_signed()).as_signed()
+}
+#[doc = "Load 16-bit data and zero-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1uh_gather_[u64]index_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1h))]
+pub unsafe fn svld1uh_gather_u64index_u64(
+    pg: svbool_t,
+    base: *const u16,
+    indices: svuint64_t,
+) -> svuint64_t {
+    svld1uh_gather_s64index_u64(pg, base, indices.as_signed())
+}
+#[doc = "Load 32-bit data and zero-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1uw_gather_[u64]index_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1w))]
+pub unsafe fn svld1uw_gather_u64index_u64(
+    pg: svbool_t,
+    base: *const u32,
+    indices: svuint64_t,
+) -> svuint64_t {
+    svld1uw_gather_s64index_u64(pg, base, indices.as_signed())
+}
+#[doc = "Load 16-bit data and zero-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1uh_gather[_u32base]_index_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1h))]
+pub unsafe fn svld1uh_gather_u32base_index_s32(
+    pg: svbool_t,
+    bases: svuint32_t,
+    index: i64,
+) -> svint32_t {
+    svld1uh_gather_u32base_offset_s32(pg, bases, index.unchecked_shl(1))
+}
+#[doc = "Load 16-bit data and zero-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1uh_gather[_u32base]_index_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1h))]
+pub unsafe fn svld1uh_gather_u32base_index_u32(
+    pg: svbool_t,
+    bases: svuint32_t,
+    index: i64,
+) -> svuint32_t {
+    svld1uh_gather_u32base_offset_u32(pg, bases, index.unchecked_shl(1))
+}
+#[doc = "Load 16-bit data and zero-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1uh_gather[_u64base]_index_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1h))]
+pub unsafe fn svld1uh_gather_u64base_index_s64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    index: i64,
+) -> svint64_t {
+    svld1uh_gather_u64base_offset_s64(pg, bases, index.unchecked_shl(1))
+}
+#[doc = "Load 32-bit data and zero-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1uw_gather[_u64base]_index_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1w))]
+pub unsafe fn svld1uw_gather_u64base_index_s64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    index: i64,
+) -> svint64_t {
+    svld1uw_gather_u64base_offset_s64(pg, bases, index.unchecked_shl(2))
+}
+#[doc = "Load 16-bit data and zero-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1uh_gather[_u64base]_index_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1h))]
+pub unsafe fn svld1uh_gather_u64base_index_u64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    index: i64,
+) -> svuint64_t {
+    svld1uh_gather_u64base_offset_u64(pg, bases, index.unchecked_shl(1))
+}
+#[doc = "Load 32-bit data and zero-extend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld1uw_gather[_u64base]_index_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld1w))]
+pub unsafe fn svld1uw_gather_u64base_index_u64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    index: i64,
+) -> svuint64_t {
+    svld1uw_gather_u64base_offset_u64(pg, bases, index.unchecked_shl(2))
+}
+#[doc = "Load two-element tuples into two vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld2[_f32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld2w))]
+pub unsafe fn svld2_f32(pg: svbool_t, base: *const f32) -> svfloat32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ld2.sret.nxv4f32"
+        )]
+        fn _svld2_f32(pg: svbool4_t, base: *const f32) -> svfloat32x2_t;
+    }
+    _svld2_f32(pg.sve_into(), base)
+}
+#[doc = "Load two-element tuples into two vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld2[_f64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld2d))]
+pub unsafe fn svld2_f64(pg: svbool_t, base: *const f64) -> svfloat64x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ld2.sret.nxv2f64"
+        )]
+        fn _svld2_f64(pg: svbool2_t, base: *const f64) -> svfloat64x2_t;
+    }
+    _svld2_f64(pg.sve_into(), base)
+}
+#[doc = "Load two-element tuples into two vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld2[_s8])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld2b))]
+pub unsafe fn svld2_s8(pg: svbool_t, base: *const i8) -> svint8x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ld2.sret.nxv16i8"
+        )]
+        fn _svld2_s8(pg: svbool_t, base: *const i8) -> svint8x2_t;
+    }
+    _svld2_s8(pg, base)
+}
+#[doc = "Load two-element tuples into two vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld2[_s16])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld2h))]
+pub unsafe fn svld2_s16(pg: svbool_t, base: *const i16) -> svint16x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ld2.sret.nxv8i16"
+        )]
+        fn _svld2_s16(pg: svbool8_t, base: *const i16) -> svint16x2_t;
+    }
+    _svld2_s16(pg.sve_into(), base)
+}
+#[doc = "Load two-element tuples into two vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld2[_s32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld2w))]
+pub unsafe fn svld2_s32(pg: svbool_t, base: *const i32) -> svint32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ld2.sret.nxv4i32"
+        )]
+        fn _svld2_s32(pg: svbool4_t, base: *const i32) -> svint32x2_t;
+    }
+    _svld2_s32(pg.sve_into(), base)
+}
+#[doc = "Load two-element tuples into two vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld2[_s64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld2d))]
+pub unsafe fn svld2_s64(pg: svbool_t, base: *const i64) -> svint64x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ld2.sret.nxv2i64"
+        )]
+        fn _svld2_s64(pg: svbool2_t, base: *const i64) -> svint64x2_t;
+    }
+    _svld2_s64(pg.sve_into(), base)
+}
+#[doc = "Load two-element tuples into two vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld2[_u8])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld2b))]
+pub unsafe fn svld2_u8(pg: svbool_t, base: *const u8) -> svuint8x2_t {
+    svld2_s8(pg, base.as_signed()).as_unsigned()
+}
+#[doc = "Load two-element tuples into two vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld2[_u16])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld2h))]
+pub unsafe fn svld2_u16(pg: svbool_t, base: *const u16) -> svuint16x2_t {
+    svld2_s16(pg, base.as_signed()).as_unsigned()
+}
+#[doc = "Load two-element tuples into two vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld2[_u32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld2w))]
+pub unsafe fn svld2_u32(pg: svbool_t, base: *const u32) -> svuint32x2_t {
+    svld2_s32(pg, base.as_signed()).as_unsigned()
+}
+#[doc = "Load two-element tuples into two vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld2[_u64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld2d))]
+pub unsafe fn svld2_u64(pg: svbool_t, base: *const u64) -> svuint64x2_t {
+    svld2_s64(pg, base.as_signed()).as_unsigned()
+}
+#[doc = "Load two-element tuples into two vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld2_vnum[_f32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld2w))]
+pub unsafe fn svld2_vnum_f32(pg: svbool_t, base: *const f32, vnum: i64) -> svfloat32x2_t {
+    svld2_f32(pg, base.offset(svcntw() as isize * vnum as isize))
+}
+#[doc = "Load two-element tuples into two vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld2_vnum[_f64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld2d))]
+pub unsafe fn svld2_vnum_f64(pg: svbool_t, base: *const f64, vnum: i64) -> svfloat64x2_t {
+    svld2_f64(pg, base.offset(svcntd() as isize * vnum as isize))
+}
+#[doc = "Load two-element tuples into two vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld2_vnum[_s8])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld2b))]
+pub unsafe fn svld2_vnum_s8(pg: svbool_t, base: *const i8, vnum: i64) -> svint8x2_t {
+    svld2_s8(pg, base.offset(svcntb() as isize * vnum as isize))
+}
+#[doc = "Load two-element tuples into two vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld2_vnum[_s16])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld2h))]
+pub unsafe fn svld2_vnum_s16(pg: svbool_t, base: *const i16, vnum: i64) -> svint16x2_t {
+    svld2_s16(pg, base.offset(svcnth() as isize * vnum as isize))
+}
+#[doc = "Load two-element tuples into two vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld2_vnum[_s32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld2w))]
+pub unsafe fn svld2_vnum_s32(pg: svbool_t, base: *const i32, vnum: i64) -> svint32x2_t {
+    svld2_s32(pg, base.offset(svcntw() as isize * vnum as isize))
+}
+#[doc = "Load two-element tuples into two vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld2_vnum[_s64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld2d))]
+pub unsafe fn svld2_vnum_s64(pg: svbool_t, base: *const i64, vnum: i64) -> svint64x2_t {
+    svld2_s64(pg, base.offset(svcntd() as isize * vnum as isize))
+}
+#[doc = "Load two-element tuples into two vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld2_vnum[_u8])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld2b))]
+pub unsafe fn svld2_vnum_u8(pg: svbool_t, base: *const u8, vnum: i64) -> svuint8x2_t {
+    svld2_u8(pg, base.offset(svcntb() as isize * vnum as isize))
+}
+#[doc = "Load two-element tuples into two vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld2_vnum[_u16])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld2h))]
+pub unsafe fn svld2_vnum_u16(pg: svbool_t, base: *const u16, vnum: i64) -> svuint16x2_t {
+    svld2_u16(pg, base.offset(svcnth() as isize * vnum as isize))
+}
+#[doc = "Load two-element tuples into two vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld2_vnum[_u32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld2w))]
+pub unsafe fn svld2_vnum_u32(pg: svbool_t, base: *const u32, vnum: i64) -> svuint32x2_t {
+    svld2_u32(pg, base.offset(svcntw() as isize * vnum as isize))
+}
+#[doc = "Load two-element tuples into two vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld2_vnum[_u64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld2d))]
+pub unsafe fn svld2_vnum_u64(pg: svbool_t, base: *const u64, vnum: i64) -> svuint64x2_t {
+    svld2_u64(pg, base.offset(svcntd() as isize * vnum as isize))
+}
+#[doc = "Load three-element tuples into three vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld3[_f32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld3w))]
+pub unsafe fn svld3_f32(pg: svbool_t, base: *const f32) -> svfloat32x3_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ld3.sret.nxv4f32"
+        )]
+        fn _svld3_f32(pg: svbool4_t, base: *const f32) -> svfloat32x3_t;
+    }
+    _svld3_f32(pg.sve_into(), base)
+}
+#[doc = "Load three-element tuples into three vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld3[_f64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld3d))]
+pub unsafe fn svld3_f64(pg: svbool_t, base: *const f64) -> svfloat64x3_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ld3.sret.nxv2f64"
+        )]
+        fn _svld3_f64(pg: svbool2_t, base: *const f64) -> svfloat64x3_t;
+    }
+    _svld3_f64(pg.sve_into(), base)
+}
+#[doc = "Load three-element tuples into three vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld3[_s8])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld3b))]
+pub unsafe fn svld3_s8(pg: svbool_t, base: *const i8) -> svint8x3_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ld3.sret.nxv16i8"
+        )]
+        fn _svld3_s8(pg: svbool_t, base: *const i8) -> svint8x3_t;
+    }
+    _svld3_s8(pg, base)
+}
+#[doc = "Load three-element tuples into three vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld3[_s16])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld3h))]
+pub unsafe fn svld3_s16(pg: svbool_t, base: *const i16) -> svint16x3_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ld3.sret.nxv8i16"
+        )]
+        fn _svld3_s16(pg: svbool8_t, base: *const i16) -> svint16x3_t;
+    }
+    _svld3_s16(pg.sve_into(), base)
+}
+#[doc = "Load three-element tuples into three vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld3[_s32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld3w))]
+pub unsafe fn svld3_s32(pg: svbool_t, base: *const i32) -> svint32x3_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ld3.sret.nxv4i32"
+        )]
+        fn _svld3_s32(pg: svbool4_t, base: *const i32) -> svint32x3_t;
+    }
+    _svld3_s32(pg.sve_into(), base)
+}
+#[doc = "Load three-element tuples into three vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld3[_s64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld3d))]
+pub unsafe fn svld3_s64(pg: svbool_t, base: *const i64) -> svint64x3_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ld3.sret.nxv2i64"
+        )]
+        fn _svld3_s64(pg: svbool2_t, base: *const i64) -> svint64x3_t;
+    }
+    _svld3_s64(pg.sve_into(), base)
+}
+#[doc = "Load three-element tuples into three vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld3[_u8])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld3b))]
+pub unsafe fn svld3_u8(pg: svbool_t, base: *const u8) -> svuint8x3_t {
+    svld3_s8(pg, base.as_signed()).as_unsigned()
+}
+#[doc = "Load three-element tuples into three vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld3[_u16])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld3h))]
+pub unsafe fn svld3_u16(pg: svbool_t, base: *const u16) -> svuint16x3_t {
+    svld3_s16(pg, base.as_signed()).as_unsigned()
+}
+#[doc = "Load three-element tuples into three vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld3[_u32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld3w))]
+pub unsafe fn svld3_u32(pg: svbool_t, base: *const u32) -> svuint32x3_t {
+    svld3_s32(pg, base.as_signed()).as_unsigned()
+}
+#[doc = "Load three-element tuples into three vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld3[_u64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld3d))]
+pub unsafe fn svld3_u64(pg: svbool_t, base: *const u64) -> svuint64x3_t {
+    svld3_s64(pg, base.as_signed()).as_unsigned()
+}
+#[doc = "Load three-element tuples into three vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld3_vnum[_f32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld3w))]
+pub unsafe fn svld3_vnum_f32(pg: svbool_t, base: *const f32, vnum: i64) -> svfloat32x3_t {
+    svld3_f32(pg, base.offset(svcntw() as isize * vnum as isize))
+}
+#[doc = "Load three-element tuples into three vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld3_vnum[_f64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld3d))]
+pub unsafe fn svld3_vnum_f64(pg: svbool_t, base: *const f64, vnum: i64) -> svfloat64x3_t {
+    svld3_f64(pg, base.offset(svcntd() as isize * vnum as isize))
+}
+#[doc = "Load three-element tuples into three vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld3_vnum[_s8])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld3b))]
+pub unsafe fn svld3_vnum_s8(pg: svbool_t, base: *const i8, vnum: i64) -> svint8x3_t {
+    svld3_s8(pg, base.offset(svcntb() as isize * vnum as isize))
+}
+#[doc = "Load three-element tuples into three vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld3_vnum[_s16])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld3h))]
+pub unsafe fn svld3_vnum_s16(pg: svbool_t, base: *const i16, vnum: i64) -> svint16x3_t {
+    svld3_s16(pg, base.offset(svcnth() as isize * vnum as isize))
+}
+#[doc = "Load three-element tuples into three vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld3_vnum[_s32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld3w))]
+pub unsafe fn svld3_vnum_s32(pg: svbool_t, base: *const i32, vnum: i64) -> svint32x3_t {
+    svld3_s32(pg, base.offset(svcntw() as isize * vnum as isize))
+}
+#[doc = "Load three-element tuples into three vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld3_vnum[_s64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld3d))]
+pub unsafe fn svld3_vnum_s64(pg: svbool_t, base: *const i64, vnum: i64) -> svint64x3_t {
+    svld3_s64(pg, base.offset(svcntd() as isize * vnum as isize))
+}
+#[doc = "Load three-element tuples into three vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld3_vnum[_u8])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld3b))]
+pub unsafe fn svld3_vnum_u8(pg: svbool_t, base: *const u8, vnum: i64) -> svuint8x3_t {
+    svld3_u8(pg, base.offset(svcntb() as isize * vnum as isize))
+}
+#[doc = "Load three-element tuples into three vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld3_vnum[_u16])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld3h))]
+pub unsafe fn svld3_vnum_u16(pg: svbool_t, base: *const u16, vnum: i64) -> svuint16x3_t {
+    svld3_u16(pg, base.offset(svcnth() as isize * vnum as isize))
+}
+#[doc = "Load three-element tuples into three vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld3_vnum[_u32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld3w))]
+pub unsafe fn svld3_vnum_u32(pg: svbool_t, base: *const u32, vnum: i64) -> svuint32x3_t {
+    svld3_u32(pg, base.offset(svcntw() as isize * vnum as isize))
+}
+#[doc = "Load three-element tuples into three vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld3_vnum[_u64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld3d))]
+pub unsafe fn svld3_vnum_u64(pg: svbool_t, base: *const u64, vnum: i64) -> svuint64x3_t {
+    svld3_u64(pg, base.offset(svcntd() as isize * vnum as isize))
+}
+#[doc = "Load four-element tuples into four vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld4[_f32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld4w))]
+pub unsafe fn svld4_f32(pg: svbool_t, base: *const f32) -> svfloat32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ld4.sret.nxv4f32"
+        )]
+        fn _svld4_f32(pg: svbool4_t, base: *const f32) -> svfloat32x4_t;
+    }
+    _svld4_f32(pg.sve_into(), base)
+}
+#[doc = "Load four-element tuples into four vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld4[_f64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld4d))]
+pub unsafe fn svld4_f64(pg: svbool_t, base: *const f64) -> svfloat64x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ld4.sret.nxv2f64"
+        )]
+        fn _svld4_f64(pg: svbool2_t, base: *const f64) -> svfloat64x4_t;
+    }
+    _svld4_f64(pg.sve_into(), base)
+}
+#[doc = "Load four-element tuples into four vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld4[_s8])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld4b))]
+pub unsafe fn svld4_s8(pg: svbool_t, base: *const i8) -> svint8x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ld4.sret.nxv16i8"
+        )]
+        fn _svld4_s8(pg: svbool_t, base: *const i8) -> svint8x4_t;
+    }
+    _svld4_s8(pg, base)
+}
+#[doc = "Load four-element tuples into four vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld4[_s16])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld4h))]
+pub unsafe fn svld4_s16(pg: svbool_t, base: *const i16) -> svint16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ld4.sret.nxv8i16"
+        )]
+        fn _svld4_s16(pg: svbool8_t, base: *const i16) -> svint16x4_t;
+    }
+    _svld4_s16(pg.sve_into(), base)
+}
+#[doc = "Load four-element tuples into four vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld4[_s32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld4w))]
+pub unsafe fn svld4_s32(pg: svbool_t, base: *const i32) -> svint32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ld4.sret.nxv4i32"
+        )]
+        fn _svld4_s32(pg: svbool4_t, base: *const i32) -> svint32x4_t;
+    }
+    _svld4_s32(pg.sve_into(), base)
+}
+#[doc = "Load four-element tuples into four vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld4[_s64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld4d))]
+pub unsafe fn svld4_s64(pg: svbool_t, base: *const i64) -> svint64x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ld4.sret.nxv2i64"
+        )]
+        fn _svld4_s64(pg: svbool2_t, base: *const i64) -> svint64x4_t;
+    }
+    _svld4_s64(pg.sve_into(), base)
+}
+#[doc = "Load four-element tuples into four vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld4[_u8])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld4b))]
+pub unsafe fn svld4_u8(pg: svbool_t, base: *const u8) -> svuint8x4_t {
+    svld4_s8(pg, base.as_signed()).as_unsigned()
+}
+#[doc = "Load four-element tuples into four vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld4[_u16])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld4h))]
+pub unsafe fn svld4_u16(pg: svbool_t, base: *const u16) -> svuint16x4_t {
+    svld4_s16(pg, base.as_signed()).as_unsigned()
+}
+#[doc = "Load four-element tuples into four vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld4[_u32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld4w))]
+pub unsafe fn svld4_u32(pg: svbool_t, base: *const u32) -> svuint32x4_t {
+    svld4_s32(pg, base.as_signed()).as_unsigned()
+}
+#[doc = "Load four-element tuples into four vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld4[_u64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld4d))]
+pub unsafe fn svld4_u64(pg: svbool_t, base: *const u64) -> svuint64x4_t {
+    svld4_s64(pg, base.as_signed()).as_unsigned()
+}
+#[doc = "Load four-element tuples into four vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld4_vnum[_f32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld4w))]
+pub unsafe fn svld4_vnum_f32(pg: svbool_t, base: *const f32, vnum: i64) -> svfloat32x4_t {
+    svld4_f32(pg, base.offset(svcntw() as isize * vnum as isize))
+}
+#[doc = "Load four-element tuples into four vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld4_vnum[_f64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld4d))]
+pub unsafe fn svld4_vnum_f64(pg: svbool_t, base: *const f64, vnum: i64) -> svfloat64x4_t {
+    svld4_f64(pg, base.offset(svcntd() as isize * vnum as isize))
+}
+#[doc = "Load four-element tuples into four vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld4_vnum[_s8])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld4b))]
+pub unsafe fn svld4_vnum_s8(pg: svbool_t, base: *const i8, vnum: i64) -> svint8x4_t {
+    svld4_s8(pg, base.offset(svcntb() as isize * vnum as isize))
+}
+#[doc = "Load four-element tuples into four vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld4_vnum[_s16])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld4h))]
+pub unsafe fn svld4_vnum_s16(pg: svbool_t, base: *const i16, vnum: i64) -> svint16x4_t {
+    svld4_s16(pg, base.offset(svcnth() as isize * vnum as isize))
+}
+#[doc = "Load four-element tuples into four vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld4_vnum[_s32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld4w))]
+pub unsafe fn svld4_vnum_s32(pg: svbool_t, base: *const i32, vnum: i64) -> svint32x4_t {
+    svld4_s32(pg, base.offset(svcntw() as isize * vnum as isize))
+}
+#[doc = "Load four-element tuples into four vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld4_vnum[_s64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld4d))]
+pub unsafe fn svld4_vnum_s64(pg: svbool_t, base: *const i64, vnum: i64) -> svint64x4_t {
+    svld4_s64(pg, base.offset(svcntd() as isize * vnum as isize))
+}
+#[doc = "Load four-element tuples into four vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld4_vnum[_u8])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld4b))]
+pub unsafe fn svld4_vnum_u8(pg: svbool_t, base: *const u8, vnum: i64) -> svuint8x4_t {
+    svld4_u8(pg, base.offset(svcntb() as isize * vnum as isize))
+}
+#[doc = "Load four-element tuples into four vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld4_vnum[_u16])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld4h))]
+pub unsafe fn svld4_vnum_u16(pg: svbool_t, base: *const u16, vnum: i64) -> svuint16x4_t {
+    svld4_u16(pg, base.offset(svcnth() as isize * vnum as isize))
+}
+#[doc = "Load four-element tuples into four vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld4_vnum[_u32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld4w))]
+pub unsafe fn svld4_vnum_u32(pg: svbool_t, base: *const u32, vnum: i64) -> svuint32x4_t {
+    svld4_u32(pg, base.offset(svcntw() as isize * vnum as isize))
+}
+#[doc = "Load four-element tuples into four vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svld4_vnum[_u64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ld4d))]
+pub unsafe fn svld4_vnum_u64(pg: svbool_t, base: *const u64, vnum: i64) -> svuint64x4_t {
+    svld4_u64(pg, base.offset(svcntd() as isize * vnum as isize))
+}
+#[doc = "Unextended load, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1[_f32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1w))]
+pub unsafe fn svldff1_f32(pg: svbool_t, base: *const f32) -> svfloat32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ldff1.nxv4f32")]
+        fn _svldff1_f32(pg: svbool4_t, base: *const f32) -> svfloat32_t;
+    }
+    _svldff1_f32(pg.sve_into(), base)
+}
+#[doc = "Unextended load, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1[_f64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1d))]
+pub unsafe fn svldff1_f64(pg: svbool_t, base: *const f64) -> svfloat64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ldff1.nxv2f64")]
+        fn _svldff1_f64(pg: svbool2_t, base: *const f64) -> svfloat64_t;
+    }
+    _svldff1_f64(pg.sve_into(), base)
+}
+#[doc = "Unextended load, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1[_s8])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1b))]
+pub unsafe fn svldff1_s8(pg: svbool_t, base: *const i8) -> svint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ldff1.nxv16i8")]
+        fn _svldff1_s8(pg: svbool_t, base: *const i8) -> svint8_t;
+    }
+    _svldff1_s8(pg, base)
+}
+#[doc = "Unextended load, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1[_s16])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1h))]
+pub unsafe fn svldff1_s16(pg: svbool_t, base: *const i16) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ldff1.nxv8i16")]
+        fn _svldff1_s16(pg: svbool8_t, base: *const i16) -> svint16_t;
+    }
+    _svldff1_s16(pg.sve_into(), base)
+}
+#[doc = "Unextended load, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1[_s32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1w))]
+pub unsafe fn svldff1_s32(pg: svbool_t, base: *const i32) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ldff1.nxv4i32")]
+        fn _svldff1_s32(pg: svbool4_t, base: *const i32) -> svint32_t;
+    }
+    _svldff1_s32(pg.sve_into(), base)
+}
+#[doc = "Unextended load, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1[_s64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1d))]
+pub unsafe fn svldff1_s64(pg: svbool_t, base: *const i64) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ldff1.nxv2i64")]
+        fn _svldff1_s64(pg: svbool2_t, base: *const i64) -> svint64_t;
+    }
+    _svldff1_s64(pg.sve_into(), base)
+}
+#[doc = "Unextended load, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1[_u8])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1b))]
+pub unsafe fn svldff1_u8(pg: svbool_t, base: *const u8) -> svuint8_t {
+    svldff1_s8(pg, base.as_signed()).as_unsigned()
+}
+#[doc = "Unextended load, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1[_u16])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1h))]
+pub unsafe fn svldff1_u16(pg: svbool_t, base: *const u16) -> svuint16_t {
+    svldff1_s16(pg, base.as_signed()).as_unsigned()
+}
+#[doc = "Unextended load, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1[_u32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1w))]
+pub unsafe fn svldff1_u32(pg: svbool_t, base: *const u32) -> svuint32_t {
+    svldff1_s32(pg, base.as_signed()).as_unsigned()
+}
+#[doc = "Unextended load, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1[_u64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1d))]
+pub unsafe fn svldff1_u64(pg: svbool_t, base: *const u64) -> svuint64_t {
+    svldff1_s64(pg, base.as_signed()).as_unsigned()
+}
+#[doc = "Unextended load, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1_gather_[s32]index[_f32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1w))]
+pub unsafe fn svldff1_gather_s32index_f32(
+    pg: svbool_t,
+    base: *const f32,
+    indices: svint32_t,
+) -> svfloat32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ldff1.gather.sxtw.index.nxv4f32"
+        )]
+        fn _svldff1_gather_s32index_f32(
+            pg: svbool4_t,
+            base: *const f32,
+            indices: svint32_t,
+        ) -> svfloat32_t;
+    }
+    _svldff1_gather_s32index_f32(pg.sve_into(), base, indices)
+}
+#[doc = "Unextended load, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1_gather_[s32]index[_s32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1w))]
+pub unsafe fn svldff1_gather_s32index_s32(
+    pg: svbool_t,
+    base: *const i32,
+    indices: svint32_t,
+) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ldff1.gather.sxtw.index.nxv4i32"
+        )]
+        fn _svldff1_gather_s32index_s32(
+            pg: svbool4_t,
+            base: *const i32,
+            indices: svint32_t,
+        ) -> svint32_t;
+    }
+    _svldff1_gather_s32index_s32(pg.sve_into(), base, indices)
+}
+#[doc = "Unextended load, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1_gather_[s32]index[_u32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1w))]
+pub unsafe fn svldff1_gather_s32index_u32(
+    pg: svbool_t,
+    base: *const u32,
+    indices: svint32_t,
+) -> svuint32_t {
+    svldff1_gather_s32index_s32(pg, base.as_signed(), indices).as_unsigned()
+}
+#[doc = "Unextended load, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1_gather_[s64]index[_f64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1d))]
+pub unsafe fn svldff1_gather_s64index_f64(
+    pg: svbool_t,
+    base: *const f64,
+    indices: svint64_t,
+) -> svfloat64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ldff1.gather.index.nxv2f64"
+        )]
+        fn _svldff1_gather_s64index_f64(
+            pg: svbool2_t,
+            base: *const f64,
+            indices: svint64_t,
+        ) -> svfloat64_t;
+    }
+    _svldff1_gather_s64index_f64(pg.sve_into(), base, indices)
+}
+#[doc = "Unextended load, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1_gather_[s64]index[_s64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1d))]
+pub unsafe fn svldff1_gather_s64index_s64(
+    pg: svbool_t,
+    base: *const i64,
+    indices: svint64_t,
+) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ldff1.gather.index.nxv2i64"
+        )]
+        fn _svldff1_gather_s64index_s64(
+            pg: svbool2_t,
+            base: *const i64,
+            indices: svint64_t,
+        ) -> svint64_t;
+    }
+    _svldff1_gather_s64index_s64(pg.sve_into(), base, indices)
+}
+#[doc = "Unextended load, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1_gather_[s64]index[_u64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1d))]
+pub unsafe fn svldff1_gather_s64index_u64(
+    pg: svbool_t,
+    base: *const u64,
+    indices: svint64_t,
+) -> svuint64_t {
+    svldff1_gather_s64index_s64(pg, base.as_signed(), indices).as_unsigned()
+}
+#[doc = "Unextended load, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1_gather_[u32]index[_f32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1w))]
+pub unsafe fn svldff1_gather_u32index_f32(
+    pg: svbool_t,
+    base: *const f32,
+    indices: svuint32_t,
+) -> svfloat32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ldff1.gather.uxtw.index.nxv4f32"
+        )]
+        fn _svldff1_gather_u32index_f32(
+            pg: svbool4_t,
+            base: *const f32,
+            indices: svint32_t,
+        ) -> svfloat32_t;
+    }
+    _svldff1_gather_u32index_f32(pg.sve_into(), base, indices.as_signed())
+}
+#[doc = "Unextended load, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1_gather_[u32]index[_s32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1w))]
+pub unsafe fn svldff1_gather_u32index_s32(
+    pg: svbool_t,
+    base: *const i32,
+    indices: svuint32_t,
+) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ldff1.gather.uxtw.index.nxv4i32"
+        )]
+        fn _svldff1_gather_u32index_s32(
+            pg: svbool4_t,
+            base: *const i32,
+            indices: svint32_t,
+        ) -> svint32_t;
+    }
+    _svldff1_gather_u32index_s32(pg.sve_into(), base, indices.as_signed())
+}
+#[doc = "Unextended load, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1_gather_[u32]index[_u32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1w))]
+pub unsafe fn svldff1_gather_u32index_u32(
+    pg: svbool_t,
+    base: *const u32,
+    indices: svuint32_t,
+) -> svuint32_t {
+    svldff1_gather_u32index_s32(pg, base.as_signed(), indices).as_unsigned()
+}
+#[doc = "Unextended load, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1_gather_[u64]index[_f64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1d))]
+pub unsafe fn svldff1_gather_u64index_f64(
+    pg: svbool_t,
+    base: *const f64,
+    indices: svuint64_t,
+) -> svfloat64_t {
+    svldff1_gather_s64index_f64(pg, base, indices.as_signed())
+}
+#[doc = "Unextended load, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1_gather_[u64]index[_s64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1d))]
+pub unsafe fn svldff1_gather_u64index_s64(
+    pg: svbool_t,
+    base: *const i64,
+    indices: svuint64_t,
+) -> svint64_t {
+    svldff1_gather_s64index_s64(pg, base, indices.as_signed())
+}
+#[doc = "Unextended load, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1_gather_[u64]index[_u64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1d))]
+pub unsafe fn svldff1_gather_u64index_u64(
+    pg: svbool_t,
+    base: *const u64,
+    indices: svuint64_t,
+) -> svuint64_t {
+    svldff1_gather_s64index_s64(pg, base.as_signed(), indices.as_signed()).as_unsigned()
+}
+#[doc = "Unextended load, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1_gather_[s32]offset[_f32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1w))]
+pub unsafe fn svldff1_gather_s32offset_f32(
+    pg: svbool_t,
+    base: *const f32,
+    offsets: svint32_t,
+) -> svfloat32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ldff1.gather.sxtw.nxv4f32"
+        )]
+        fn _svldff1_gather_s32offset_f32(
+            pg: svbool4_t,
+            base: *const f32,
+            offsets: svint32_t,
+        ) -> svfloat32_t;
+    }
+    _svldff1_gather_s32offset_f32(pg.sve_into(), base, offsets)
+}
+#[doc = "Unextended load, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1_gather_[s32]offset[_s32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1w))]
+pub unsafe fn svldff1_gather_s32offset_s32(
+    pg: svbool_t,
+    base: *const i32,
+    offsets: svint32_t,
+) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ldff1.gather.sxtw.nxv4i32"
+        )]
+        fn _svldff1_gather_s32offset_s32(
+            pg: svbool4_t,
+            base: *const i32,
+            offsets: svint32_t,
+        ) -> svint32_t;
+    }
+    _svldff1_gather_s32offset_s32(pg.sve_into(), base, offsets)
+}
+#[doc = "Unextended load, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1_gather_[s32]offset[_u32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1w))]
+pub unsafe fn svldff1_gather_s32offset_u32(
+    pg: svbool_t,
+    base: *const u32,
+    offsets: svint32_t,
+) -> svuint32_t {
+    svldff1_gather_s32offset_s32(pg, base.as_signed(), offsets).as_unsigned()
+}
+#[doc = "Unextended load, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1_gather_[s64]offset[_f64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1d))]
+pub unsafe fn svldff1_gather_s64offset_f64(
+    pg: svbool_t,
+    base: *const f64,
+    offsets: svint64_t,
+) -> svfloat64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ldff1.gather.nxv2f64"
+        )]
+        fn _svldff1_gather_s64offset_f64(
+            pg: svbool2_t,
+            base: *const f64,
+            offsets: svint64_t,
+        ) -> svfloat64_t;
+    }
+    _svldff1_gather_s64offset_f64(pg.sve_into(), base, offsets)
+}
+#[doc = "Unextended load, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1_gather_[s64]offset[_s64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1d))]
+pub unsafe fn svldff1_gather_s64offset_s64(
+    pg: svbool_t,
+    base: *const i64,
+    offsets: svint64_t,
+) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ldff1.gather.nxv2i64"
+        )]
+        fn _svldff1_gather_s64offset_s64(
+            pg: svbool2_t,
+            base: *const i64,
+            offsets: svint64_t,
+        ) -> svint64_t;
+    }
+    _svldff1_gather_s64offset_s64(pg.sve_into(), base, offsets)
+}
+#[doc = "Unextended load, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1_gather_[s64]offset[_u64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1d))]
+pub unsafe fn svldff1_gather_s64offset_u64(
+    pg: svbool_t,
+    base: *const u64,
+    offsets: svint64_t,
+) -> svuint64_t {
+    svldff1_gather_s64offset_s64(pg, base.as_signed(), offsets).as_unsigned()
+}
+#[doc = "Unextended load, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1_gather_[u32]offset[_f32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1w))]
+pub unsafe fn svldff1_gather_u32offset_f32(
+    pg: svbool_t,
+    base: *const f32,
+    offsets: svuint32_t,
+) -> svfloat32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ldff1.gather.uxtw.nxv4f32"
+        )]
+        fn _svldff1_gather_u32offset_f32(
+            pg: svbool4_t,
+            base: *const f32,
+            offsets: svint32_t,
+        ) -> svfloat32_t;
+    }
+    _svldff1_gather_u32offset_f32(pg.sve_into(), base, offsets.as_signed())
+}
+#[doc = "Unextended load, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1_gather_[u32]offset[_s32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1w))]
+pub unsafe fn svldff1_gather_u32offset_s32(
+    pg: svbool_t,
+    base: *const i32,
+    offsets: svuint32_t,
+) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ldff1.gather.uxtw.nxv4i32"
+        )]
+        fn _svldff1_gather_u32offset_s32(
+            pg: svbool4_t,
+            base: *const i32,
+            offsets: svint32_t,
+        ) -> svint32_t;
+    }
+    _svldff1_gather_u32offset_s32(pg.sve_into(), base, offsets.as_signed())
+}
+#[doc = "Unextended load, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1_gather_[u32]offset[_u32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1w))]
+pub unsafe fn svldff1_gather_u32offset_u32(
+    pg: svbool_t,
+    base: *const u32,
+    offsets: svuint32_t,
+) -> svuint32_t {
+    svldff1_gather_u32offset_s32(pg, base.as_signed(), offsets).as_unsigned()
+}
+#[doc = "Unextended load, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1_gather_[u64]offset[_f64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1d))]
+pub unsafe fn svldff1_gather_u64offset_f64(
+    pg: svbool_t,
+    base: *const f64,
+    offsets: svuint64_t,
+) -> svfloat64_t {
+    svldff1_gather_s64offset_f64(pg, base, offsets.as_signed())
+}
+#[doc = "Unextended load, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1_gather_[u64]offset[_s64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1d))]
+pub unsafe fn svldff1_gather_u64offset_s64(
+    pg: svbool_t,
+    base: *const i64,
+    offsets: svuint64_t,
+) -> svint64_t {
+    svldff1_gather_s64offset_s64(pg, base, offsets.as_signed())
+}
+#[doc = "Unextended load, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1_gather_[u64]offset[_u64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1d))]
+pub unsafe fn svldff1_gather_u64offset_u64(
+    pg: svbool_t,
+    base: *const u64,
+    offsets: svuint64_t,
+) -> svuint64_t {
+    svldff1_gather_s64offset_s64(pg, base.as_signed(), offsets.as_signed()).as_unsigned()
+}
+#[doc = "Unextended load, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1_gather[_u32base]_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1w))]
+pub unsafe fn svldff1_gather_u32base_f32(pg: svbool_t, bases: svuint32_t) -> svfloat32_t {
+    svldff1_gather_u32base_offset_f32(pg, bases, 0)
+}
+#[doc = "Unextended load, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1_gather[_u32base]_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1w))]
+pub unsafe fn svldff1_gather_u32base_s32(pg: svbool_t, bases: svuint32_t) -> svint32_t {
+    svldff1_gather_u32base_offset_s32(pg, bases, 0)
+}
+#[doc = "Unextended load, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1_gather[_u32base]_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1w))]
+pub unsafe fn svldff1_gather_u32base_u32(pg: svbool_t, bases: svuint32_t) -> svuint32_t {
+    svldff1_gather_u32base_offset_u32(pg, bases, 0)
+}
+#[doc = "Unextended load, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1_gather[_u64base]_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1d))]
+pub unsafe fn svldff1_gather_u64base_f64(pg: svbool_t, bases: svuint64_t) -> svfloat64_t {
+    svldff1_gather_u64base_offset_f64(pg, bases, 0)
+}
+#[doc = "Unextended load, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1_gather[_u64base]_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1d))]
+pub unsafe fn svldff1_gather_u64base_s64(pg: svbool_t, bases: svuint64_t) -> svint64_t {
+    svldff1_gather_u64base_offset_s64(pg, bases, 0)
+}
+#[doc = "Unextended load, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1_gather[_u64base]_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1d))]
+pub unsafe fn svldff1_gather_u64base_u64(pg: svbool_t, bases: svuint64_t) -> svuint64_t {
+    svldff1_gather_u64base_offset_u64(pg, bases, 0)
+}
+#[doc = "Unextended load, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1_gather[_u32base]_index_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1w))]
+pub unsafe fn svldff1_gather_u32base_index_f32(
+    pg: svbool_t,
+    bases: svuint32_t,
+    index: i64,
+) -> svfloat32_t {
+    svldff1_gather_u32base_offset_f32(pg, bases, index.unchecked_shl(2))
+}
+#[doc = "Unextended load, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1_gather[_u32base]_index_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1w))]
+pub unsafe fn svldff1_gather_u32base_index_s32(
+    pg: svbool_t,
+    bases: svuint32_t,
+    index: i64,
+) -> svint32_t {
+    svldff1_gather_u32base_offset_s32(pg, bases, index.unchecked_shl(2))
+}
+#[doc = "Unextended load, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1_gather[_u32base]_index_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1w))]
+pub unsafe fn svldff1_gather_u32base_index_u32(
+    pg: svbool_t,
+    bases: svuint32_t,
+    index: i64,
+) -> svuint32_t {
+    svldff1_gather_u32base_offset_u32(pg, bases, index.unchecked_shl(2))
+}
+#[doc = "Unextended load, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1_gather[_u64base]_index_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1d))]
+pub unsafe fn svldff1_gather_u64base_index_f64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    index: i64,
+) -> svfloat64_t {
+    svldff1_gather_u64base_offset_f64(pg, bases, index.unchecked_shl(3))
+}
+#[doc = "Unextended load, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1_gather[_u64base]_index_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1d))]
+pub unsafe fn svldff1_gather_u64base_index_s64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    index: i64,
+) -> svint64_t {
+    svldff1_gather_u64base_offset_s64(pg, bases, index.unchecked_shl(3))
+}
+#[doc = "Unextended load, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1_gather[_u64base]_index_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1d))]
+pub unsafe fn svldff1_gather_u64base_index_u64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    index: i64,
+) -> svuint64_t {
+    svldff1_gather_u64base_offset_u64(pg, bases, index.unchecked_shl(3))
+}
+#[doc = "Unextended load, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1_gather[_u32base]_offset_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1w))]
+pub unsafe fn svldff1_gather_u32base_offset_f32(
+    pg: svbool_t,
+    bases: svuint32_t,
+    offset: i64,
+) -> svfloat32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4f32.nxv4i32"
+        )]
+        fn _svldff1_gather_u32base_offset_f32(
+            pg: svbool4_t,
+            bases: svint32_t,
+            offset: i64,
+        ) -> svfloat32_t;
+    }
+    _svldff1_gather_u32base_offset_f32(pg.sve_into(), bases.as_signed(), offset)
+}
+#[doc = "Unextended load, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1_gather[_u32base]_offset_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1w))]
+pub unsafe fn svldff1_gather_u32base_offset_s32(
+    pg: svbool_t,
+    bases: svuint32_t,
+    offset: i64,
+) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i32.nxv4i32"
+        )]
+        fn _svldff1_gather_u32base_offset_s32(
+            pg: svbool4_t,
+            bases: svint32_t,
+            offset: i64,
+        ) -> svint32_t;
+    }
+    _svldff1_gather_u32base_offset_s32(pg.sve_into(), bases.as_signed(), offset)
+}
+#[doc = "Unextended load, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1_gather[_u32base]_offset_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1w))]
+pub unsafe fn svldff1_gather_u32base_offset_u32(
+    pg: svbool_t,
+    bases: svuint32_t,
+    offset: i64,
+) -> svuint32_t {
+    svldff1_gather_u32base_offset_s32(pg, bases, offset).as_unsigned()
+}
+#[doc = "Unextended load, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1_gather[_u64base]_offset_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1d))]
+pub unsafe fn svldff1_gather_u64base_offset_f64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    offset: i64,
+) -> svfloat64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2f64.nxv2i64"
+        )]
+        fn _svldff1_gather_u64base_offset_f64(
+            pg: svbool2_t,
+            bases: svint64_t,
+            offset: i64,
+        ) -> svfloat64_t;
+    }
+    _svldff1_gather_u64base_offset_f64(pg.sve_into(), bases.as_signed(), offset)
+}
+#[doc = "Unextended load, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1_gather[_u64base]_offset_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1d))]
+pub unsafe fn svldff1_gather_u64base_offset_s64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    offset: i64,
+) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i64.nxv2i64"
+        )]
+        fn _svldff1_gather_u64base_offset_s64(
+            pg: svbool2_t,
+            bases: svint64_t,
+            offset: i64,
+        ) -> svint64_t;
+    }
+    _svldff1_gather_u64base_offset_s64(pg.sve_into(), bases.as_signed(), offset)
+}
+#[doc = "Unextended load, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1_gather[_u64base]_offset_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1d))]
+pub unsafe fn svldff1_gather_u64base_offset_u64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    offset: i64,
+) -> svuint64_t {
+    svldff1_gather_u64base_offset_s64(pg, bases, offset).as_unsigned()
+}
+#[doc = "Unextended load, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1_vnum[_f32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1w))]
+pub unsafe fn svldff1_vnum_f32(pg: svbool_t, base: *const f32, vnum: i64) -> svfloat32_t {
+    svldff1_f32(pg, base.offset(svcntw() as isize * vnum as isize))
+}
+#[doc = "Unextended load, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1_vnum[_f64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1d))]
+pub unsafe fn svldff1_vnum_f64(pg: svbool_t, base: *const f64, vnum: i64) -> svfloat64_t {
+    svldff1_f64(pg, base.offset(svcntd() as isize * vnum as isize))
+}
+#[doc = "Unextended load, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1_vnum[_s8])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1b))]
+pub unsafe fn svldff1_vnum_s8(pg: svbool_t, base: *const i8, vnum: i64) -> svint8_t {
+    svldff1_s8(pg, base.offset(svcntb() as isize * vnum as isize))
+}
+#[doc = "Unextended load, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1_vnum[_s16])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1h))]
+pub unsafe fn svldff1_vnum_s16(pg: svbool_t, base: *const i16, vnum: i64) -> svint16_t {
+    svldff1_s16(pg, base.offset(svcnth() as isize * vnum as isize))
+}
+#[doc = "Unextended load, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1_vnum[_s32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1w))]
+pub unsafe fn svldff1_vnum_s32(pg: svbool_t, base: *const i32, vnum: i64) -> svint32_t {
+    svldff1_s32(pg, base.offset(svcntw() as isize * vnum as isize))
+}
+#[doc = "Unextended load, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1_vnum[_s64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1d))]
+pub unsafe fn svldff1_vnum_s64(pg: svbool_t, base: *const i64, vnum: i64) -> svint64_t {
+    svldff1_s64(pg, base.offset(svcntd() as isize * vnum as isize))
+}
+#[doc = "Unextended load, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1_vnum[_u8])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1b))]
+pub unsafe fn svldff1_vnum_u8(pg: svbool_t, base: *const u8, vnum: i64) -> svuint8_t {
+    svldff1_u8(pg, base.offset(svcntb() as isize * vnum as isize))
+}
+#[doc = "Unextended load, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1_vnum[_u16])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1h))]
+pub unsafe fn svldff1_vnum_u16(pg: svbool_t, base: *const u16, vnum: i64) -> svuint16_t {
+    svldff1_u16(pg, base.offset(svcnth() as isize * vnum as isize))
+}
+#[doc = "Unextended load, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1_vnum[_u32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1w))]
+pub unsafe fn svldff1_vnum_u32(pg: svbool_t, base: *const u32, vnum: i64) -> svuint32_t {
+    svldff1_u32(pg, base.offset(svcntw() as isize * vnum as isize))
+}
+#[doc = "Unextended load, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1_vnum[_u64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1d))]
+pub unsafe fn svldff1_vnum_u64(pg: svbool_t, base: *const u64, vnum: i64) -> svuint64_t {
+    svldff1_u64(pg, base.offset(svcntd() as isize * vnum as isize))
+}
+#[doc = "Load 8-bit data and sign-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1sb_gather_[s32]offset_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1sb))]
+pub unsafe fn svldff1sb_gather_s32offset_s32(
+    pg: svbool_t,
+    base: *const i8,
+    offsets: svint32_t,
+) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ldff1.gather.sxtw.nxv4i8"
+        )]
+        fn _svldff1sb_gather_s32offset_s32(
+            pg: svbool4_t,
+            base: *const i8,
+            offsets: svint32_t,
+        ) -> nxv4i8;
+    }
+    crate::intrinsics::simd::simd_cast(_svldff1sb_gather_s32offset_s32(
+        pg.sve_into(),
+        base,
+        offsets,
+    ))
+}
+#[doc = "Load 16-bit data and sign-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1sh_gather_[s32]offset_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1sh))]
+pub unsafe fn svldff1sh_gather_s32offset_s32(
+    pg: svbool_t,
+    base: *const i16,
+    offsets: svint32_t,
+) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ldff1.gather.sxtw.nxv4i16"
+        )]
+        fn _svldff1sh_gather_s32offset_s32(
+            pg: svbool4_t,
+            base: *const i16,
+            offsets: svint32_t,
+        ) -> nxv4i16;
+    }
+    crate::intrinsics::simd::simd_cast(_svldff1sh_gather_s32offset_s32(
+        pg.sve_into(),
+        base,
+        offsets,
+    ))
+}
+#[doc = "Load 8-bit data and sign-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1sb_gather_[s32]offset_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1sb))]
+pub unsafe fn svldff1sb_gather_s32offset_u32(
+    pg: svbool_t,
+    base: *const i8,
+    offsets: svint32_t,
+) -> svuint32_t {
+    svldff1sb_gather_s32offset_s32(pg, base, offsets).as_unsigned()
+}
+#[doc = "Load 16-bit data and sign-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1sh_gather_[s32]offset_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1sh))]
+pub unsafe fn svldff1sh_gather_s32offset_u32(
+    pg: svbool_t,
+    base: *const i16,
+    offsets: svint32_t,
+) -> svuint32_t {
+    svldff1sh_gather_s32offset_s32(pg, base, offsets).as_unsigned()
+}
+#[doc = "Load 8-bit data and sign-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1sb_gather_[s64]offset_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1sb))]
+pub unsafe fn svldff1sb_gather_s64offset_s64(
+    pg: svbool_t,
+    base: *const i8,
+    offsets: svint64_t,
+) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ldff1.gather.nxv2i8"
+        )]
+        fn _svldff1sb_gather_s64offset_s64(
+            pg: svbool2_t,
+            base: *const i8,
+            offsets: svint64_t,
+        ) -> nxv2i8;
+    }
+    crate::intrinsics::simd::simd_cast(_svldff1sb_gather_s64offset_s64(
+        pg.sve_into(),
+        base,
+        offsets,
+    ))
+}
+#[doc = "Load 16-bit data and sign-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1sh_gather_[s64]offset_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1sh))]
+pub unsafe fn svldff1sh_gather_s64offset_s64(
+    pg: svbool_t,
+    base: *const i16,
+    offsets: svint64_t,
+) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ldff1.gather.nxv2i16"
+        )]
+        fn _svldff1sh_gather_s64offset_s64(
+            pg: svbool2_t,
+            base: *const i16,
+            offsets: svint64_t,
+        ) -> nxv2i16;
+    }
+    crate::intrinsics::simd::simd_cast(_svldff1sh_gather_s64offset_s64(
+        pg.sve_into(),
+        base,
+        offsets,
+    ))
+}
+#[doc = "Load 32-bit data and sign-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1sw_gather_[s64]offset_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1sw))]
+pub unsafe fn svldff1sw_gather_s64offset_s64(
+    pg: svbool_t,
+    base: *const i32,
+    offsets: svint64_t,
+) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ldff1.gather.nxv2i32"
+        )]
+        fn _svldff1sw_gather_s64offset_s64(
+            pg: svbool2_t,
+            base: *const i32,
+            offsets: svint64_t,
+        ) -> nxv2i32;
+    }
+    crate::intrinsics::simd::simd_cast(_svldff1sw_gather_s64offset_s64(
+        pg.sve_into(),
+        base,
+        offsets,
+    ))
+}
+#[doc = "Load 8-bit data and sign-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1sb_gather_[s64]offset_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1sb))]
+pub unsafe fn svldff1sb_gather_s64offset_u64(
+    pg: svbool_t,
+    base: *const i8,
+    offsets: svint64_t,
+) -> svuint64_t {
+    svldff1sb_gather_s64offset_s64(pg, base, offsets).as_unsigned()
+}
+#[doc = "Load 16-bit data and sign-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1sh_gather_[s64]offset_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1sh))]
+pub unsafe fn svldff1sh_gather_s64offset_u64(
+    pg: svbool_t,
+    base: *const i16,
+    offsets: svint64_t,
+) -> svuint64_t {
+    svldff1sh_gather_s64offset_s64(pg, base, offsets).as_unsigned()
+}
+#[doc = "Load 32-bit data and sign-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1sw_gather_[s64]offset_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1sw))]
+pub unsafe fn svldff1sw_gather_s64offset_u64(
+    pg: svbool_t,
+    base: *const i32,
+    offsets: svint64_t,
+) -> svuint64_t {
+    svldff1sw_gather_s64offset_s64(pg, base, offsets).as_unsigned()
+}
+#[doc = "Load 8-bit data and sign-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1sb_gather_[u32]offset_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1sb))]
+pub unsafe fn svldff1sb_gather_u32offset_s32(
+    pg: svbool_t,
+    base: *const i8,
+    offsets: svuint32_t,
+) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ldff1.gather.uxtw.nxv4i8"
+        )]
+        fn _svldff1sb_gather_u32offset_s32(
+            pg: svbool4_t,
+            base: *const i8,
+            offsets: svint32_t,
+        ) -> nxv4i8;
+    }
+    crate::intrinsics::simd::simd_cast(_svldff1sb_gather_u32offset_s32(
+        pg.sve_into(),
+        base,
+        offsets.as_signed(),
+    ))
+}
+#[doc = "Load 16-bit data and sign-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1sh_gather_[u32]offset_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1sh))]
+pub unsafe fn svldff1sh_gather_u32offset_s32(
+    pg: svbool_t,
+    base: *const i16,
+    offsets: svuint32_t,
+) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ldff1.gather.uxtw.nxv4i16"
+        )]
+        fn _svldff1sh_gather_u32offset_s32(
+            pg: svbool4_t,
+            base: *const i16,
+            offsets: svint32_t,
+        ) -> nxv4i16;
+    }
+    crate::intrinsics::simd::simd_cast(_svldff1sh_gather_u32offset_s32(
+        pg.sve_into(),
+        base,
+        offsets.as_signed(),
+    ))
+}
+#[doc = "Load 8-bit data and sign-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1sb_gather_[u32]offset_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1sb))]
+pub unsafe fn svldff1sb_gather_u32offset_u32(
+    pg: svbool_t,
+    base: *const i8,
+    offsets: svuint32_t,
+) -> svuint32_t {
+    svldff1sb_gather_u32offset_s32(pg, base, offsets).as_unsigned()
+}
+#[doc = "Load 16-bit data and sign-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1sh_gather_[u32]offset_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1sh))]
+pub unsafe fn svldff1sh_gather_u32offset_u32(
+    pg: svbool_t,
+    base: *const i16,
+    offsets: svuint32_t,
+) -> svuint32_t {
+    svldff1sh_gather_u32offset_s32(pg, base, offsets).as_unsigned()
+}
+#[doc = "Load 8-bit data and sign-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1sb_gather_[u64]offset_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1sb))]
+pub unsafe fn svldff1sb_gather_u64offset_s64(
+    pg: svbool_t,
+    base: *const i8,
+    offsets: svuint64_t,
+) -> svint64_t {
+    svldff1sb_gather_s64offset_s64(pg, base, offsets.as_signed())
+}
+#[doc = "Load 16-bit data and sign-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1sh_gather_[u64]offset_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1sh))]
+pub unsafe fn svldff1sh_gather_u64offset_s64(
+    pg: svbool_t,
+    base: *const i16,
+    offsets: svuint64_t,
+) -> svint64_t {
+    svldff1sh_gather_s64offset_s64(pg, base, offsets.as_signed())
+}
+#[doc = "Load 32-bit data and sign-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1sw_gather_[u64]offset_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1sw))]
+pub unsafe fn svldff1sw_gather_u64offset_s64(
+    pg: svbool_t,
+    base: *const i32,
+    offsets: svuint64_t,
+) -> svint64_t {
+    svldff1sw_gather_s64offset_s64(pg, base, offsets.as_signed())
+}
+#[doc = "Load 8-bit data and sign-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1sb_gather_[u64]offset_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1sb))]
+pub unsafe fn svldff1sb_gather_u64offset_u64(
+    pg: svbool_t,
+    base: *const i8,
+    offsets: svuint64_t,
+) -> svuint64_t {
+    svldff1sb_gather_s64offset_s64(pg, base, offsets.as_signed()).as_unsigned()
+}
+#[doc = "Load 16-bit data and sign-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1sh_gather_[u64]offset_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1sh))]
+pub unsafe fn svldff1sh_gather_u64offset_u64(
+    pg: svbool_t,
+    base: *const i16,
+    offsets: svuint64_t,
+) -> svuint64_t {
+    svldff1sh_gather_s64offset_s64(pg, base, offsets.as_signed()).as_unsigned()
+}
+#[doc = "Load 32-bit data and sign-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1sw_gather_[u64]offset_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1sw))]
+pub unsafe fn svldff1sw_gather_u64offset_u64(
+    pg: svbool_t,
+    base: *const i32,
+    offsets: svuint64_t,
+) -> svuint64_t {
+    svldff1sw_gather_s64offset_s64(pg, base, offsets.as_signed()).as_unsigned()
+}
+#[doc = "Load 8-bit data and sign-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1sb_gather[_u32base]_offset_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1sb))]
+pub unsafe fn svldff1sb_gather_u32base_offset_s32(
+    pg: svbool_t,
+    bases: svuint32_t,
+    offset: i64,
+) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i8.nxv4i32"
+        )]
+        fn _svldff1sb_gather_u32base_offset_s32(
+            pg: svbool4_t,
+            bases: svint32_t,
+            offset: i64,
+        ) -> nxv4i8;
+    }
+    crate::intrinsics::simd::simd_cast(_svldff1sb_gather_u32base_offset_s32(
+        pg.sve_into(),
+        bases.as_signed(),
+        offset,
+    ))
+}
+#[doc = "Load 16-bit data and sign-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1sh_gather[_u32base]_offset_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1sh))]
+pub unsafe fn svldff1sh_gather_u32base_offset_s32(
+    pg: svbool_t,
+    bases: svuint32_t,
+    offset: i64,
+) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i16.nxv4i32"
+        )]
+        fn _svldff1sh_gather_u32base_offset_s32(
+            pg: svbool4_t,
+            bases: svint32_t,
+            offset: i64,
+        ) -> nxv4i16;
+    }
+    crate::intrinsics::simd::simd_cast(_svldff1sh_gather_u32base_offset_s32(
+        pg.sve_into(),
+        bases.as_signed(),
+        offset,
+    ))
+}
+#[doc = "Load 8-bit data and sign-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1sb_gather[_u32base]_offset_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1sb))]
+pub unsafe fn svldff1sb_gather_u32base_offset_u32(
+    pg: svbool_t,
+    bases: svuint32_t,
+    offset: i64,
+) -> svuint32_t {
+    svldff1sb_gather_u32base_offset_s32(pg, bases, offset).as_unsigned()
+}
+#[doc = "Load 16-bit data and sign-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1sh_gather[_u32base]_offset_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1sh))]
+pub unsafe fn svldff1sh_gather_u32base_offset_u32(
+    pg: svbool_t,
+    bases: svuint32_t,
+    offset: i64,
+) -> svuint32_t {
+    svldff1sh_gather_u32base_offset_s32(pg, bases, offset).as_unsigned()
+}
+#[doc = "Load 8-bit data and sign-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1sb_gather[_u64base]_offset_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1sb))]
+pub unsafe fn svldff1sb_gather_u64base_offset_s64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    offset: i64,
+) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i8.nxv2i64"
+        )]
+        fn _svldff1sb_gather_u64base_offset_s64(
+            pg: svbool2_t,
+            bases: svint64_t,
+            offset: i64,
+        ) -> nxv2i8;
+    }
+    crate::intrinsics::simd::simd_cast(_svldff1sb_gather_u64base_offset_s64(
+        pg.sve_into(),
+        bases.as_signed(),
+        offset,
+    ))
+}
+#[doc = "Load 16-bit data and sign-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1sh_gather[_u64base]_offset_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1sh))]
+pub unsafe fn svldff1sh_gather_u64base_offset_s64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    offset: i64,
+) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i16.nxv2i64"
+        )]
+        fn _svldff1sh_gather_u64base_offset_s64(
+            pg: svbool2_t,
+            bases: svint64_t,
+            offset: i64,
+        ) -> nxv2i16;
+    }
+    crate::intrinsics::simd::simd_cast(_svldff1sh_gather_u64base_offset_s64(
+        pg.sve_into(),
+        bases.as_signed(),
+        offset,
+    ))
+}
+#[doc = "Load 32-bit data and sign-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1sw_gather[_u64base]_offset_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1sw))]
+pub unsafe fn svldff1sw_gather_u64base_offset_s64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    offset: i64,
+) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i32.nxv2i64"
+        )]
+        fn _svldff1sw_gather_u64base_offset_s64(
+            pg: svbool2_t,
+            bases: svint64_t,
+            offset: i64,
+        ) -> nxv2i32;
+    }
+    crate::intrinsics::simd::simd_cast(_svldff1sw_gather_u64base_offset_s64(
+        pg.sve_into(),
+        bases.as_signed(),
+        offset,
+    ))
+}
+#[doc = "Load 8-bit data and sign-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1sb_gather[_u64base]_offset_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1sb))]
+pub unsafe fn svldff1sb_gather_u64base_offset_u64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    offset: i64,
+) -> svuint64_t {
+    svldff1sb_gather_u64base_offset_s64(pg, bases, offset).as_unsigned()
+}
+#[doc = "Load 16-bit data and sign-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1sh_gather[_u64base]_offset_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1sh))]
+pub unsafe fn svldff1sh_gather_u64base_offset_u64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    offset: i64,
+) -> svuint64_t {
+    svldff1sh_gather_u64base_offset_s64(pg, bases, offset).as_unsigned()
+}
+#[doc = "Load 32-bit data and sign-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1sw_gather[_u64base]_offset_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1sw))]
+pub unsafe fn svldff1sw_gather_u64base_offset_u64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    offset: i64,
+) -> svuint64_t {
+    svldff1sw_gather_u64base_offset_s64(pg, bases, offset).as_unsigned()
+}
+#[doc = "Load 8-bit data and sign-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1sb_gather[_u32base]_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1sb))]
+pub unsafe fn svldff1sb_gather_u32base_s32(pg: svbool_t, bases: svuint32_t) -> svint32_t {
+    svldff1sb_gather_u32base_offset_s32(pg, bases, 0)
+}
+#[doc = "Load 16-bit data and sign-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1sh_gather[_u32base]_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1sh))]
+pub unsafe fn svldff1sh_gather_u32base_s32(pg: svbool_t, bases: svuint32_t) -> svint32_t {
+    svldff1sh_gather_u32base_offset_s32(pg, bases, 0)
+}
+#[doc = "Load 8-bit data and sign-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1sb_gather[_u32base]_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1sb))]
+pub unsafe fn svldff1sb_gather_u32base_u32(pg: svbool_t, bases: svuint32_t) -> svuint32_t {
+    svldff1sb_gather_u32base_offset_u32(pg, bases, 0)
+}
+#[doc = "Load 16-bit data and sign-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1sh_gather[_u32base]_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1sh))]
+pub unsafe fn svldff1sh_gather_u32base_u32(pg: svbool_t, bases: svuint32_t) -> svuint32_t {
+    svldff1sh_gather_u32base_offset_u32(pg, bases, 0)
+}
+#[doc = "Load 8-bit data and sign-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1sb_gather[_u64base]_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1sb))]
+pub unsafe fn svldff1sb_gather_u64base_s64(pg: svbool_t, bases: svuint64_t) -> svint64_t {
+    svldff1sb_gather_u64base_offset_s64(pg, bases, 0)
+}
+#[doc = "Load 16-bit data and sign-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1sh_gather[_u64base]_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1sh))]
+pub unsafe fn svldff1sh_gather_u64base_s64(pg: svbool_t, bases: svuint64_t) -> svint64_t {
+    svldff1sh_gather_u64base_offset_s64(pg, bases, 0)
+}
+#[doc = "Load 32-bit data and sign-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1sw_gather[_u64base]_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1sw))]
+pub unsafe fn svldff1sw_gather_u64base_s64(pg: svbool_t, bases: svuint64_t) -> svint64_t {
+    svldff1sw_gather_u64base_offset_s64(pg, bases, 0)
+}
+#[doc = "Load 8-bit data and sign-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1sb_gather[_u64base]_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1sb))]
+pub unsafe fn svldff1sb_gather_u64base_u64(pg: svbool_t, bases: svuint64_t) -> svuint64_t {
+    svldff1sb_gather_u64base_offset_u64(pg, bases, 0)
+}
+#[doc = "Load 16-bit data and sign-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1sh_gather[_u64base]_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1sh))]
+pub unsafe fn svldff1sh_gather_u64base_u64(pg: svbool_t, bases: svuint64_t) -> svuint64_t {
+    svldff1sh_gather_u64base_offset_u64(pg, bases, 0)
+}
+#[doc = "Load 32-bit data and sign-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1sw_gather[_u64base]_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1sw))]
+pub unsafe fn svldff1sw_gather_u64base_u64(pg: svbool_t, bases: svuint64_t) -> svuint64_t {
+    svldff1sw_gather_u64base_offset_u64(pg, bases, 0)
+}
+#[doc = "Load 8-bit data and sign-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1sb_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1sb))]
+pub unsafe fn svldff1sb_s16(pg: svbool_t, base: *const i8) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ldff1.nxv8i8")]
+        fn _svldff1sb_s16(pg: svbool8_t, base: *const i8) -> nxv8i8;
+    }
+    crate::intrinsics::simd::simd_cast(_svldff1sb_s16(pg.sve_into(), base))
+}
+#[doc = "Load 8-bit data and sign-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1sb_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1sb))]
+pub unsafe fn svldff1sb_s32(pg: svbool_t, base: *const i8) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ldff1.nxv4i8")]
+        fn _svldff1sb_s32(pg: svbool4_t, base: *const i8) -> nxv4i8;
+    }
+    crate::intrinsics::simd::simd_cast(_svldff1sb_s32(pg.sve_into(), base))
+}
+#[doc = "Load 16-bit data and sign-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1sh_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1sh))]
+pub unsafe fn svldff1sh_s32(pg: svbool_t, base: *const i16) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ldff1.nxv4i16")]
+        fn _svldff1sh_s32(pg: svbool4_t, base: *const i16) -> nxv4i16;
+    }
+    crate::intrinsics::simd::simd_cast(_svldff1sh_s32(pg.sve_into(), base))
+}
+#[doc = "Load 8-bit data and sign-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1sb_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1sb))]
+pub unsafe fn svldff1sb_s64(pg: svbool_t, base: *const i8) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ldff1.nxv2i8")]
+        fn _svldff1sb_s64(pg: svbool2_t, base: *const i8) -> nxv2i8;
+    }
+    crate::intrinsics::simd::simd_cast(_svldff1sb_s64(pg.sve_into(), base))
+}
+#[doc = "Load 16-bit data and sign-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1sh_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1sh))]
+pub unsafe fn svldff1sh_s64(pg: svbool_t, base: *const i16) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ldff1.nxv2i16")]
+        fn _svldff1sh_s64(pg: svbool2_t, base: *const i16) -> nxv2i16;
+    }
+    crate::intrinsics::simd::simd_cast(_svldff1sh_s64(pg.sve_into(), base))
+}
+#[doc = "Load 32-bit data and sign-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1sw_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1sw))]
+pub unsafe fn svldff1sw_s64(pg: svbool_t, base: *const i32) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ldff1.nxv2i32")]
+        fn _svldff1sw_s64(pg: svbool2_t, base: *const i32) -> nxv2i32;
+    }
+    crate::intrinsics::simd::simd_cast(_svldff1sw_s64(pg.sve_into(), base))
+}
+#[doc = "Load 8-bit data and sign-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1sb_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1sb))]
+pub unsafe fn svldff1sb_u16(pg: svbool_t, base: *const i8) -> svuint16_t {
+    svldff1sb_s16(pg, base).as_unsigned()
+}
+#[doc = "Load 8-bit data and sign-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1sb_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1sb))]
+pub unsafe fn svldff1sb_u32(pg: svbool_t, base: *const i8) -> svuint32_t {
+    svldff1sb_s32(pg, base).as_unsigned()
+}
+#[doc = "Load 16-bit data and sign-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1sh_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1sh))]
+pub unsafe fn svldff1sh_u32(pg: svbool_t, base: *const i16) -> svuint32_t {
+    svldff1sh_s32(pg, base).as_unsigned()
+}
+#[doc = "Load 8-bit data and sign-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1sb_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1sb))]
+pub unsafe fn svldff1sb_u64(pg: svbool_t, base: *const i8) -> svuint64_t {
+    svldff1sb_s64(pg, base).as_unsigned()
+}
+#[doc = "Load 16-bit data and sign-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1sh_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1sh))]
+pub unsafe fn svldff1sh_u64(pg: svbool_t, base: *const i16) -> svuint64_t {
+    svldff1sh_s64(pg, base).as_unsigned()
+}
+#[doc = "Load 32-bit data and sign-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1sw_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1sw))]
+pub unsafe fn svldff1sw_u64(pg: svbool_t, base: *const i32) -> svuint64_t {
+    svldff1sw_s64(pg, base).as_unsigned()
+}
+#[doc = "Load 8-bit data and sign-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1sb_vnum_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1sb))]
+pub unsafe fn svldff1sb_vnum_s16(pg: svbool_t, base: *const i8, vnum: i64) -> svint16_t {
+    svldff1sb_s16(pg, base.offset(svcnth() as isize * vnum as isize))
+}
+#[doc = "Load 8-bit data and sign-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1sb_vnum_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1sb))]
+pub unsafe fn svldff1sb_vnum_s32(pg: svbool_t, base: *const i8, vnum: i64) -> svint32_t {
+    svldff1sb_s32(pg, base.offset(svcntw() as isize * vnum as isize))
+}
+#[doc = "Load 16-bit data and sign-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1sh_vnum_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1sh))]
+pub unsafe fn svldff1sh_vnum_s32(pg: svbool_t, base: *const i16, vnum: i64) -> svint32_t {
+    svldff1sh_s32(pg, base.offset(svcntw() as isize * vnum as isize))
+}
+#[doc = "Load 8-bit data and sign-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1sb_vnum_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1sb))]
+pub unsafe fn svldff1sb_vnum_s64(pg: svbool_t, base: *const i8, vnum: i64) -> svint64_t {
+    svldff1sb_s64(pg, base.offset(svcntd() as isize * vnum as isize))
+}
+#[doc = "Load 16-bit data and sign-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1sh_vnum_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1sh))]
+pub unsafe fn svldff1sh_vnum_s64(pg: svbool_t, base: *const i16, vnum: i64) -> svint64_t {
+    svldff1sh_s64(pg, base.offset(svcntd() as isize * vnum as isize))
+}
+#[doc = "Load 32-bit data and sign-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1sw_vnum_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1sw))]
+pub unsafe fn svldff1sw_vnum_s64(pg: svbool_t, base: *const i32, vnum: i64) -> svint64_t {
+    svldff1sw_s64(pg, base.offset(svcntd() as isize * vnum as isize))
+}
+#[doc = "Load 8-bit data and sign-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1sb_vnum_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1sb))]
+pub unsafe fn svldff1sb_vnum_u16(pg: svbool_t, base: *const i8, vnum: i64) -> svuint16_t {
+    svldff1sb_u16(pg, base.offset(svcnth() as isize * vnum as isize))
+}
+#[doc = "Load 8-bit data and sign-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1sb_vnum_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1sb))]
+pub unsafe fn svldff1sb_vnum_u32(pg: svbool_t, base: *const i8, vnum: i64) -> svuint32_t {
+    svldff1sb_u32(pg, base.offset(svcntw() as isize * vnum as isize))
+}
+#[doc = "Load 16-bit data and sign-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1sh_vnum_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1sh))]
+pub unsafe fn svldff1sh_vnum_u32(pg: svbool_t, base: *const i16, vnum: i64) -> svuint32_t {
+    svldff1sh_u32(pg, base.offset(svcntw() as isize * vnum as isize))
+}
+#[doc = "Load 8-bit data and sign-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1sb_vnum_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1sb))]
+pub unsafe fn svldff1sb_vnum_u64(pg: svbool_t, base: *const i8, vnum: i64) -> svuint64_t {
+    svldff1sb_u64(pg, base.offset(svcntd() as isize * vnum as isize))
+}
+#[doc = "Load 16-bit data and sign-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1sh_vnum_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1sh))]
+pub unsafe fn svldff1sh_vnum_u64(pg: svbool_t, base: *const i16, vnum: i64) -> svuint64_t {
+    svldff1sh_u64(pg, base.offset(svcntd() as isize * vnum as isize))
+}
+#[doc = "Load 32-bit data and sign-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1sw_vnum_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1sw))]
+pub unsafe fn svldff1sw_vnum_u64(pg: svbool_t, base: *const i32, vnum: i64) -> svuint64_t {
+    svldff1sw_u64(pg, base.offset(svcntd() as isize * vnum as isize))
+}
+#[doc = "Load 16-bit data and sign-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1sh_gather_[s32]index_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1sh))]
+pub unsafe fn svldff1sh_gather_s32index_s32(
+    pg: svbool_t,
+    base: *const i16,
+    indices: svint32_t,
+) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ldff1.gather.sxtw.index.nxv4i16"
+        )]
+        fn _svldff1sh_gather_s32index_s32(
+            pg: svbool4_t,
+            base: *const i16,
+            indices: svint32_t,
+        ) -> nxv4i16;
+    }
+    crate::intrinsics::simd::simd_cast(_svldff1sh_gather_s32index_s32(pg.sve_into(), base, indices))
+}
+#[doc = "Load 16-bit data and sign-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1sh_gather_[s32]index_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1sh))]
+pub unsafe fn svldff1sh_gather_s32index_u32(
+    pg: svbool_t,
+    base: *const i16,
+    indices: svint32_t,
+) -> svuint32_t {
+    svldff1sh_gather_s32index_s32(pg, base, indices).as_unsigned()
+}
+#[doc = "Load 16-bit data and sign-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1sh_gather_[s64]index_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1sh))]
+pub unsafe fn svldff1sh_gather_s64index_s64(
+    pg: svbool_t,
+    base: *const i16,
+    indices: svint64_t,
+) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ldff1.gather.index.nxv2i16"
+        )]
+        fn _svldff1sh_gather_s64index_s64(
+            pg: svbool2_t,
+            base: *const i16,
+            indices: svint64_t,
+        ) -> nxv2i16;
+    }
+    crate::intrinsics::simd::simd_cast(_svldff1sh_gather_s64index_s64(pg.sve_into(), base, indices))
+}
+#[doc = "Load 32-bit data and sign-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1sw_gather_[s64]index_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1sw))]
+pub unsafe fn svldff1sw_gather_s64index_s64(
+    pg: svbool_t,
+    base: *const i32,
+    indices: svint64_t,
+) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ldff1.gather.index.nxv2i32"
+        )]
+        fn _svldff1sw_gather_s64index_s64(
+            pg: svbool2_t,
+            base: *const i32,
+            indices: svint64_t,
+        ) -> nxv2i32;
+    }
+    crate::intrinsics::simd::simd_cast(_svldff1sw_gather_s64index_s64(pg.sve_into(), base, indices))
+}
+#[doc = "Load 16-bit data and sign-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1sh_gather_[s64]index_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1sh))]
+pub unsafe fn svldff1sh_gather_s64index_u64(
+    pg: svbool_t,
+    base: *const i16,
+    indices: svint64_t,
+) -> svuint64_t {
+    svldff1sh_gather_s64index_s64(pg, base, indices).as_unsigned()
+}
+#[doc = "Load 32-bit data and sign-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1sw_gather_[s64]index_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1sw))]
+pub unsafe fn svldff1sw_gather_s64index_u64(
+    pg: svbool_t,
+    base: *const i32,
+    indices: svint64_t,
+) -> svuint64_t {
+    svldff1sw_gather_s64index_s64(pg, base, indices).as_unsigned()
+}
+#[doc = "Load 16-bit data and sign-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1sh_gather_[u32]index_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1sh))]
+pub unsafe fn svldff1sh_gather_u32index_s32(
+    pg: svbool_t,
+    base: *const i16,
+    indices: svuint32_t,
+) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ldff1.gather.uxtw.index.nxv4i16"
+        )]
+        fn _svldff1sh_gather_u32index_s32(
+            pg: svbool4_t,
+            base: *const i16,
+            indices: svint32_t,
+        ) -> nxv4i16;
+    }
+    crate::intrinsics::simd::simd_cast(_svldff1sh_gather_u32index_s32(
+        pg.sve_into(),
+        base,
+        indices.as_signed(),
+    ))
+}
+#[doc = "Load 16-bit data and sign-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1sh_gather_[u32]index_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1sh))]
+pub unsafe fn svldff1sh_gather_u32index_u32(
+    pg: svbool_t,
+    base: *const i16,
+    indices: svuint32_t,
+) -> svuint32_t {
+    svldff1sh_gather_u32index_s32(pg, base, indices).as_unsigned()
+}
+#[doc = "Load 16-bit data and sign-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1sh_gather_[u64]index_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1sh))]
+pub unsafe fn svldff1sh_gather_u64index_s64(
+    pg: svbool_t,
+    base: *const i16,
+    indices: svuint64_t,
+) -> svint64_t {
+    svldff1sh_gather_s64index_s64(pg, base, indices.as_signed())
+}
+#[doc = "Load 32-bit data and sign-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1sw_gather_[u64]index_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1sw))]
+pub unsafe fn svldff1sw_gather_u64index_s64(
+    pg: svbool_t,
+    base: *const i32,
+    indices: svuint64_t,
+) -> svint64_t {
+    svldff1sw_gather_s64index_s64(pg, base, indices.as_signed())
+}
+#[doc = "Load 16-bit data and sign-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1sh_gather_[u64]index_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1sh))]
+pub unsafe fn svldff1sh_gather_u64index_u64(
+    pg: svbool_t,
+    base: *const i16,
+    indices: svuint64_t,
+) -> svuint64_t {
+    svldff1sh_gather_s64index_s64(pg, base, indices.as_signed()).as_unsigned()
+}
+#[doc = "Load 32-bit data and sign-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1sw_gather_[u64]index_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1sw))]
+pub unsafe fn svldff1sw_gather_u64index_u64(
+    pg: svbool_t,
+    base: *const i32,
+    indices: svuint64_t,
+) -> svuint64_t {
+    svldff1sw_gather_s64index_s64(pg, base, indices.as_signed()).as_unsigned()
+}
+#[doc = "Load 16-bit data and sign-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1sh_gather[_u32base]_index_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1sh))]
+pub unsafe fn svldff1sh_gather_u32base_index_s32(
+    pg: svbool_t,
+    bases: svuint32_t,
+    index: i64,
+) -> svint32_t {
+    svldff1sh_gather_u32base_offset_s32(pg, bases, index.unchecked_shl(1))
+}
+#[doc = "Load 16-bit data and sign-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1sh_gather[_u32base]_index_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1sh))]
+pub unsafe fn svldff1sh_gather_u32base_index_u32(
+    pg: svbool_t,
+    bases: svuint32_t,
+    index: i64,
+) -> svuint32_t {
+    svldff1sh_gather_u32base_offset_u32(pg, bases, index.unchecked_shl(1))
+}
+#[doc = "Load 16-bit data and sign-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1sh_gather[_u64base]_index_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1sh))]
+pub unsafe fn svldff1sh_gather_u64base_index_s64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    index: i64,
+) -> svint64_t {
+    svldff1sh_gather_u64base_offset_s64(pg, bases, index.unchecked_shl(1))
+}
+#[doc = "Load 32-bit data and sign-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1sw_gather[_u64base]_index_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1sw))]
+pub unsafe fn svldff1sw_gather_u64base_index_s64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    index: i64,
+) -> svint64_t {
+    svldff1sw_gather_u64base_offset_s64(pg, bases, index.unchecked_shl(2))
+}
+#[doc = "Load 16-bit data and sign-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1sh_gather[_u64base]_index_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1sh))]
+pub unsafe fn svldff1sh_gather_u64base_index_u64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    index: i64,
+) -> svuint64_t {
+    svldff1sh_gather_u64base_offset_u64(pg, bases, index.unchecked_shl(1))
+}
+#[doc = "Load 32-bit data and sign-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1sw_gather[_u64base]_index_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1sw))]
+pub unsafe fn svldff1sw_gather_u64base_index_u64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    index: i64,
+) -> svuint64_t {
+    svldff1sw_gather_u64base_offset_u64(pg, bases, index.unchecked_shl(2))
+}
+#[doc = "Load 8-bit data and zero-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1ub_gather_[s32]offset_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1b))]
+pub unsafe fn svldff1ub_gather_s32offset_s32(
+    pg: svbool_t,
+    base: *const u8,
+    offsets: svint32_t,
+) -> svint32_t {
+    svldff1ub_gather_s32offset_u32(pg, base, offsets).as_signed()
+}
+#[doc = "Load 16-bit data and zero-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1uh_gather_[s32]offset_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1h))]
+pub unsafe fn svldff1uh_gather_s32offset_s32(
+    pg: svbool_t,
+    base: *const u16,
+    offsets: svint32_t,
+) -> svint32_t {
+    svldff1uh_gather_s32offset_u32(pg, base, offsets).as_signed()
+}
+#[doc = "Load 8-bit data and zero-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1ub_gather_[s32]offset_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1b))]
+pub unsafe fn svldff1ub_gather_s32offset_u32(
+    pg: svbool_t,
+    base: *const u8,
+    offsets: svint32_t,
+) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ldff1.gather.sxtw.nxv4i8"
+        )]
+        fn _svldff1ub_gather_s32offset_u32(
+            pg: svbool4_t,
+            base: *const i8,
+            offsets: svint32_t,
+        ) -> nxv4i8;
+    }
+    crate::intrinsics::simd::simd_cast::<nxv4u8, _>(
+        _svldff1ub_gather_s32offset_u32(pg.sve_into(), base.as_signed(), offsets).as_unsigned(),
+    )
+}
+#[doc = "Load 16-bit data and zero-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1uh_gather_[s32]offset_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1h))]
+pub unsafe fn svldff1uh_gather_s32offset_u32(
+    pg: svbool_t,
+    base: *const u16,
+    offsets: svint32_t,
+) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ldff1.gather.sxtw.nxv4i16"
+        )]
+        fn _svldff1uh_gather_s32offset_u32(
+            pg: svbool4_t,
+            base: *const i16,
+            offsets: svint32_t,
+        ) -> nxv4i16;
+    }
+    crate::intrinsics::simd::simd_cast::<nxv4u16, _>(
+        _svldff1uh_gather_s32offset_u32(pg.sve_into(), base.as_signed(), offsets).as_unsigned(),
+    )
+}
+#[doc = "Load 8-bit data and zero-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1ub_gather_[s64]offset_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1b))]
+pub unsafe fn svldff1ub_gather_s64offset_s64(
+    pg: svbool_t,
+    base: *const u8,
+    offsets: svint64_t,
+) -> svint64_t {
+    svldff1ub_gather_s64offset_u64(pg, base, offsets).as_signed()
+}
+#[doc = "Load 16-bit data and zero-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1uh_gather_[s64]offset_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1h))]
+pub unsafe fn svldff1uh_gather_s64offset_s64(
+    pg: svbool_t,
+    base: *const u16,
+    offsets: svint64_t,
+) -> svint64_t {
+    svldff1uh_gather_s64offset_u64(pg, base, offsets).as_signed()
+}
+#[doc = "Load 32-bit data and zero-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1uw_gather_[s64]offset_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1w))]
+pub unsafe fn svldff1uw_gather_s64offset_s64(
+    pg: svbool_t,
+    base: *const u32,
+    offsets: svint64_t,
+) -> svint64_t {
+    svldff1uw_gather_s64offset_u64(pg, base, offsets).as_signed()
+}
+#[doc = "Load 8-bit data and zero-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1ub_gather_[s64]offset_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1b))]
+pub unsafe fn svldff1ub_gather_s64offset_u64(
+    pg: svbool_t,
+    base: *const u8,
+    offsets: svint64_t,
+) -> svuint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ldff1.gather.nxv2i8"
+        )]
+        fn _svldff1ub_gather_s64offset_u64(
+            pg: svbool2_t,
+            base: *const i8,
+            offsets: svint64_t,
+        ) -> nxv2i8;
+    }
+    crate::intrinsics::simd::simd_cast::<nxv2u8, _>(
+        _svldff1ub_gather_s64offset_u64(pg.sve_into(), base.as_signed(), offsets).as_unsigned(),
+    )
+}
+#[doc = "Load 16-bit data and zero-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1uh_gather_[s64]offset_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1h))]
+pub unsafe fn svldff1uh_gather_s64offset_u64(
+    pg: svbool_t,
+    base: *const u16,
+    offsets: svint64_t,
+) -> svuint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ldff1.gather.nxv2i16"
+        )]
+        fn _svldff1uh_gather_s64offset_u64(
+            pg: svbool2_t,
+            base: *const i16,
+            offsets: svint64_t,
+        ) -> nxv2i16;
+    }
+    crate::intrinsics::simd::simd_cast::<nxv2u16, _>(
+        _svldff1uh_gather_s64offset_u64(pg.sve_into(), base.as_signed(), offsets).as_unsigned(),
+    )
+}
+#[doc = "Load 32-bit data and zero-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1uw_gather_[s64]offset_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1w))]
+pub unsafe fn svldff1uw_gather_s64offset_u64(
+    pg: svbool_t,
+    base: *const u32,
+    offsets: svint64_t,
+) -> svuint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ldff1.gather.nxv2i32"
+        )]
+        fn _svldff1uw_gather_s64offset_u64(
+            pg: svbool2_t,
+            base: *const i32,
+            offsets: svint64_t,
+        ) -> nxv2i32;
+    }
+    crate::intrinsics::simd::simd_cast::<nxv2u32, _>(
+        _svldff1uw_gather_s64offset_u64(pg.sve_into(), base.as_signed(), offsets).as_unsigned(),
+    )
+}
+#[doc = "Load 8-bit data and zero-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1ub_gather_[u32]offset_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1b))]
+pub unsafe fn svldff1ub_gather_u32offset_s32(
+    pg: svbool_t,
+    base: *const u8,
+    offsets: svuint32_t,
+) -> svint32_t {
+    svldff1ub_gather_u32offset_u32(pg, base, offsets).as_signed()
+}
+#[doc = "Load 16-bit data and zero-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1uh_gather_[u32]offset_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1h))]
+pub unsafe fn svldff1uh_gather_u32offset_s32(
+    pg: svbool_t,
+    base: *const u16,
+    offsets: svuint32_t,
+) -> svint32_t {
+    svldff1uh_gather_u32offset_u32(pg, base, offsets).as_signed()
+}
+#[doc = "Load 8-bit data and zero-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1ub_gather_[u32]offset_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1b))]
+pub unsafe fn svldff1ub_gather_u32offset_u32(
+    pg: svbool_t,
+    base: *const u8,
+    offsets: svuint32_t,
+) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ldff1.gather.uxtw.nxv4i8"
+        )]
+        fn _svldff1ub_gather_u32offset_u32(
+            pg: svbool4_t,
+            base: *const i8,
+            offsets: svint32_t,
+        ) -> nxv4i8;
+    }
+    crate::intrinsics::simd::simd_cast::<nxv4u8, _>(
+        _svldff1ub_gather_u32offset_u32(pg.sve_into(), base.as_signed(), offsets.as_signed())
+            .as_unsigned(),
+    )
+}
+#[doc = "Load 16-bit data and zero-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1uh_gather_[u32]offset_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1h))]
+pub unsafe fn svldff1uh_gather_u32offset_u32(
+    pg: svbool_t,
+    base: *const u16,
+    offsets: svuint32_t,
+) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ldff1.gather.uxtw.nxv4i16"
+        )]
+        fn _svldff1uh_gather_u32offset_u32(
+            pg: svbool4_t,
+            base: *const i16,
+            offsets: svint32_t,
+        ) -> nxv4i16;
+    }
+    crate::intrinsics::simd::simd_cast::<nxv4u16, _>(
+        _svldff1uh_gather_u32offset_u32(pg.sve_into(), base.as_signed(), offsets.as_signed())
+            .as_unsigned(),
+    )
+}
+#[doc = "Load 8-bit data and zero-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1ub_gather_[u64]offset_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1b))]
+pub unsafe fn svldff1ub_gather_u64offset_s64(
+    pg: svbool_t,
+    base: *const u8,
+    offsets: svuint64_t,
+) -> svint64_t {
+    svldff1ub_gather_s64offset_u64(pg, base, offsets.as_signed()).as_signed()
+}
+#[doc = "Load 16-bit data and zero-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1uh_gather_[u64]offset_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1h))]
+pub unsafe fn svldff1uh_gather_u64offset_s64(
+    pg: svbool_t,
+    base: *const u16,
+    offsets: svuint64_t,
+) -> svint64_t {
+    svldff1uh_gather_s64offset_u64(pg, base, offsets.as_signed()).as_signed()
+}
+#[doc = "Load 32-bit data and zero-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1uw_gather_[u64]offset_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1w))]
+pub unsafe fn svldff1uw_gather_u64offset_s64(
+    pg: svbool_t,
+    base: *const u32,
+    offsets: svuint64_t,
+) -> svint64_t {
+    svldff1uw_gather_s64offset_u64(pg, base, offsets.as_signed()).as_signed()
+}
+#[doc = "Load 8-bit data and zero-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1ub_gather_[u64]offset_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1b))]
+pub unsafe fn svldff1ub_gather_u64offset_u64(
+    pg: svbool_t,
+    base: *const u8,
+    offsets: svuint64_t,
+) -> svuint64_t {
+    svldff1ub_gather_s64offset_u64(pg, base, offsets.as_signed())
+}
+#[doc = "Load 16-bit data and zero-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1uh_gather_[u64]offset_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1h))]
+pub unsafe fn svldff1uh_gather_u64offset_u64(
+    pg: svbool_t,
+    base: *const u16,
+    offsets: svuint64_t,
+) -> svuint64_t {
+    svldff1uh_gather_s64offset_u64(pg, base, offsets.as_signed())
+}
+#[doc = "Load 32-bit data and zero-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1uw_gather_[u64]offset_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1w))]
+pub unsafe fn svldff1uw_gather_u64offset_u64(
+    pg: svbool_t,
+    base: *const u32,
+    offsets: svuint64_t,
+) -> svuint64_t {
+    svldff1uw_gather_s64offset_u64(pg, base, offsets.as_signed())
+}
+#[doc = "Load 8-bit data and zero-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1ub_gather[_u32base]_offset_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1b))]
+pub unsafe fn svldff1ub_gather_u32base_offset_s32(
+    pg: svbool_t,
+    bases: svuint32_t,
+    offset: i64,
+) -> svint32_t {
+    svldff1ub_gather_u32base_offset_u32(pg, bases, offset).as_signed()
+}
+#[doc = "Load 16-bit data and zero-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1uh_gather[_u32base]_offset_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1h))]
+pub unsafe fn svldff1uh_gather_u32base_offset_s32(
+    pg: svbool_t,
+    bases: svuint32_t,
+    offset: i64,
+) -> svint32_t {
+    svldff1uh_gather_u32base_offset_u32(pg, bases, offset).as_signed()
+}
+#[doc = "Load 8-bit data and zero-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1ub_gather[_u32base]_offset_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1b))]
+pub unsafe fn svldff1ub_gather_u32base_offset_u32(
+    pg: svbool_t,
+    bases: svuint32_t,
+    offset: i64,
+) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i8.nxv4i32"
+        )]
+        fn _svldff1ub_gather_u32base_offset_u32(
+            pg: svbool4_t,
+            bases: svint32_t,
+            offset: i64,
+        ) -> nxv4i8;
+    }
+    crate::intrinsics::simd::simd_cast::<nxv4u8, _>(
+        _svldff1ub_gather_u32base_offset_u32(pg.sve_into(), bases.as_signed(), offset)
+            .as_unsigned(),
+    )
+}
+#[doc = "Load 16-bit data and zero-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1uh_gather[_u32base]_offset_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1h))]
+pub unsafe fn svldff1uh_gather_u32base_offset_u32(
+    pg: svbool_t,
+    bases: svuint32_t,
+    offset: i64,
+) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i16.nxv4i32"
+        )]
+        fn _svldff1uh_gather_u32base_offset_u32(
+            pg: svbool4_t,
+            bases: svint32_t,
+            offset: i64,
+        ) -> nxv4i16;
+    }
+    crate::intrinsics::simd::simd_cast::<nxv4u16, _>(
+        _svldff1uh_gather_u32base_offset_u32(pg.sve_into(), bases.as_signed(), offset)
+            .as_unsigned(),
+    )
+}
+#[doc = "Load 8-bit data and zero-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1ub_gather[_u64base]_offset_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1b))]
+pub unsafe fn svldff1ub_gather_u64base_offset_s64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    offset: i64,
+) -> svint64_t {
+    svldff1ub_gather_u64base_offset_u64(pg, bases, offset).as_signed()
+}
+#[doc = "Load 16-bit data and zero-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1uh_gather[_u64base]_offset_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1h))]
+pub unsafe fn svldff1uh_gather_u64base_offset_s64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    offset: i64,
+) -> svint64_t {
+    svldff1uh_gather_u64base_offset_u64(pg, bases, offset).as_signed()
+}
+#[doc = "Load 32-bit data and zero-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1uw_gather[_u64base]_offset_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1w))]
+pub unsafe fn svldff1uw_gather_u64base_offset_s64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    offset: i64,
+) -> svint64_t {
+    svldff1uw_gather_u64base_offset_u64(pg, bases, offset).as_signed()
+}
+#[doc = "Load 8-bit data and zero-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1ub_gather[_u64base]_offset_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1b))]
+pub unsafe fn svldff1ub_gather_u64base_offset_u64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    offset: i64,
+) -> svuint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i8.nxv2i64"
+        )]
+        fn _svldff1ub_gather_u64base_offset_u64(
+            pg: svbool2_t,
+            bases: svint64_t,
+            offset: i64,
+        ) -> nxv2i8;
+    }
+    crate::intrinsics::simd::simd_cast::<nxv2u8, _>(
+        _svldff1ub_gather_u64base_offset_u64(pg.sve_into(), bases.as_signed(), offset)
+            .as_unsigned(),
+    )
+}
+#[doc = "Load 16-bit data and zero-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1uh_gather[_u64base]_offset_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1h))]
+pub unsafe fn svldff1uh_gather_u64base_offset_u64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    offset: i64,
+) -> svuint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i16.nxv2i64"
+        )]
+        fn _svldff1uh_gather_u64base_offset_u64(
+            pg: svbool2_t,
+            bases: svint64_t,
+            offset: i64,
+        ) -> nxv2i16;
+    }
+    crate::intrinsics::simd::simd_cast::<nxv2u16, _>(
+        _svldff1uh_gather_u64base_offset_u64(pg.sve_into(), bases.as_signed(), offset)
+            .as_unsigned(),
+    )
+}
+#[doc = "Load 32-bit data and zero-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1uw_gather[_u64base]_offset_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1w))]
+pub unsafe fn svldff1uw_gather_u64base_offset_u64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    offset: i64,
+) -> svuint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i32.nxv2i64"
+        )]
+        fn _svldff1uw_gather_u64base_offset_u64(
+            pg: svbool2_t,
+            bases: svint64_t,
+            offset: i64,
+        ) -> nxv2i32;
+    }
+    crate::intrinsics::simd::simd_cast::<nxv2u32, _>(
+        _svldff1uw_gather_u64base_offset_u64(pg.sve_into(), bases.as_signed(), offset)
+            .as_unsigned(),
+    )
+}
+#[doc = "Load 8-bit data and zero-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1ub_gather[_u32base]_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1b))]
+pub unsafe fn svldff1ub_gather_u32base_s32(pg: svbool_t, bases: svuint32_t) -> svint32_t {
+    svldff1ub_gather_u32base_offset_s32(pg, bases, 0)
+}
+#[doc = "Load 16-bit data and zero-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1uh_gather[_u32base]_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1h))]
+pub unsafe fn svldff1uh_gather_u32base_s32(pg: svbool_t, bases: svuint32_t) -> svint32_t {
+    svldff1uh_gather_u32base_offset_s32(pg, bases, 0)
+}
+#[doc = "Load 8-bit data and zero-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1ub_gather[_u32base]_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1b))]
+pub unsafe fn svldff1ub_gather_u32base_u32(pg: svbool_t, bases: svuint32_t) -> svuint32_t {
+    svldff1ub_gather_u32base_offset_u32(pg, bases, 0)
+}
+#[doc = "Load 16-bit data and zero-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1uh_gather[_u32base]_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1h))]
+pub unsafe fn svldff1uh_gather_u32base_u32(pg: svbool_t, bases: svuint32_t) -> svuint32_t {
+    svldff1uh_gather_u32base_offset_u32(pg, bases, 0)
+}
+#[doc = "Load 8-bit data and zero-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1ub_gather[_u64base]_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1b))]
+pub unsafe fn svldff1ub_gather_u64base_s64(pg: svbool_t, bases: svuint64_t) -> svint64_t {
+    svldff1ub_gather_u64base_offset_s64(pg, bases, 0)
+}
+#[doc = "Load 16-bit data and zero-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1uh_gather[_u64base]_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1h))]
+pub unsafe fn svldff1uh_gather_u64base_s64(pg: svbool_t, bases: svuint64_t) -> svint64_t {
+    svldff1uh_gather_u64base_offset_s64(pg, bases, 0)
+}
+#[doc = "Load 32-bit data and zero-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1uw_gather[_u64base]_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1w))]
+pub unsafe fn svldff1uw_gather_u64base_s64(pg: svbool_t, bases: svuint64_t) -> svint64_t {
+    svldff1uw_gather_u64base_offset_s64(pg, bases, 0)
+}
+#[doc = "Load 8-bit data and zero-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1ub_gather[_u64base]_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1b))]
+pub unsafe fn svldff1ub_gather_u64base_u64(pg: svbool_t, bases: svuint64_t) -> svuint64_t {
+    svldff1ub_gather_u64base_offset_u64(pg, bases, 0)
+}
+#[doc = "Load 16-bit data and zero-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1uh_gather[_u64base]_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1h))]
+pub unsafe fn svldff1uh_gather_u64base_u64(pg: svbool_t, bases: svuint64_t) -> svuint64_t {
+    svldff1uh_gather_u64base_offset_u64(pg, bases, 0)
+}
+#[doc = "Load 32-bit data and zero-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1uw_gather[_u64base]_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1w))]
+pub unsafe fn svldff1uw_gather_u64base_u64(pg: svbool_t, bases: svuint64_t) -> svuint64_t {
+    svldff1uw_gather_u64base_offset_u64(pg, bases, 0)
+}
+#[doc = "Load 8-bit data and zero-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1ub_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1b))]
+pub unsafe fn svldff1ub_s16(pg: svbool_t, base: *const u8) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ldff1.nxv8i8")]
+        fn _svldff1ub_s16(pg: svbool8_t, base: *const i8) -> nxv8i8;
+    }
+    crate::intrinsics::simd::simd_cast::<nxv8u8, _>(
+        _svldff1ub_s16(pg.sve_into(), base.as_signed()).as_unsigned(),
+    )
+}
+#[doc = "Load 8-bit data and zero-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1ub_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1b))]
+pub unsafe fn svldff1ub_s32(pg: svbool_t, base: *const u8) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ldff1.nxv4i8")]
+        fn _svldff1ub_s32(pg: svbool4_t, base: *const i8) -> nxv4i8;
+    }
+    crate::intrinsics::simd::simd_cast::<nxv4u8, _>(
+        _svldff1ub_s32(pg.sve_into(), base.as_signed()).as_unsigned(),
+    )
+}
+#[doc = "Load 16-bit data and zero-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1uh_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1h))]
+pub unsafe fn svldff1uh_s32(pg: svbool_t, base: *const u16) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ldff1.nxv4i16")]
+        fn _svldff1uh_s32(pg: svbool4_t, base: *const i16) -> nxv4i16;
+    }
+    crate::intrinsics::simd::simd_cast::<nxv4u16, _>(
+        _svldff1uh_s32(pg.sve_into(), base.as_signed()).as_unsigned(),
+    )
+}
+#[doc = "Load 8-bit data and zero-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1ub_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1b))]
+pub unsafe fn svldff1ub_s64(pg: svbool_t, base: *const u8) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ldff1.nxv2i8")]
+        fn _svldff1ub_s64(pg: svbool2_t, base: *const i8) -> nxv2i8;
+    }
+    crate::intrinsics::simd::simd_cast::<nxv2u8, _>(
+        _svldff1ub_s64(pg.sve_into(), base.as_signed()).as_unsigned(),
+    )
+}
+#[doc = "Load 16-bit data and zero-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1uh_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1h))]
+pub unsafe fn svldff1uh_s64(pg: svbool_t, base: *const u16) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ldff1.nxv2i16")]
+        fn _svldff1uh_s64(pg: svbool2_t, base: *const i16) -> nxv2i16;
+    }
+    crate::intrinsics::simd::simd_cast::<nxv2u16, _>(
+        _svldff1uh_s64(pg.sve_into(), base.as_signed()).as_unsigned(),
+    )
+}
+#[doc = "Load 32-bit data and zero-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1uw_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1w))]
+pub unsafe fn svldff1uw_s64(pg: svbool_t, base: *const u32) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ldff1.nxv2i32")]
+        fn _svldff1uw_s64(pg: svbool2_t, base: *const i32) -> nxv2i32;
+    }
+    crate::intrinsics::simd::simd_cast::<nxv2u32, _>(
+        _svldff1uw_s64(pg.sve_into(), base.as_signed()).as_unsigned(),
+    )
+}
+#[doc = "Load 8-bit data and zero-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1ub_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1b))]
+pub unsafe fn svldff1ub_u16(pg: svbool_t, base: *const u8) -> svuint16_t {
+    svldff1ub_s16(pg, base).as_unsigned()
+}
+#[doc = "Load 8-bit data and zero-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1ub_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1b))]
+pub unsafe fn svldff1ub_u32(pg: svbool_t, base: *const u8) -> svuint32_t {
+    svldff1ub_s32(pg, base).as_unsigned()
+}
+#[doc = "Load 16-bit data and zero-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1uh_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1h))]
+pub unsafe fn svldff1uh_u32(pg: svbool_t, base: *const u16) -> svuint32_t {
+    svldff1uh_s32(pg, base).as_unsigned()
+}
+#[doc = "Load 8-bit data and zero-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1ub_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1b))]
+pub unsafe fn svldff1ub_u64(pg: svbool_t, base: *const u8) -> svuint64_t {
+    svldff1ub_s64(pg, base).as_unsigned()
+}
+#[doc = "Load 16-bit data and zero-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1uh_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1h))]
+pub unsafe fn svldff1uh_u64(pg: svbool_t, base: *const u16) -> svuint64_t {
+    svldff1uh_s64(pg, base).as_unsigned()
+}
+#[doc = "Load 32-bit data and zero-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1uw_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1w))]
+pub unsafe fn svldff1uw_u64(pg: svbool_t, base: *const u32) -> svuint64_t {
+    svldff1uw_s64(pg, base).as_unsigned()
+}
+#[doc = "Load 8-bit data and zero-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1ub_vnum_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1b))]
+pub unsafe fn svldff1ub_vnum_s16(pg: svbool_t, base: *const u8, vnum: i64) -> svint16_t {
+    svldff1ub_s16(pg, base.offset(svcnth() as isize * vnum as isize))
+}
+#[doc = "Load 8-bit data and zero-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1ub_vnum_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1b))]
+pub unsafe fn svldff1ub_vnum_s32(pg: svbool_t, base: *const u8, vnum: i64) -> svint32_t {
+    svldff1ub_s32(pg, base.offset(svcntw() as isize * vnum as isize))
+}
+#[doc = "Load 16-bit data and zero-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1uh_vnum_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1h))]
+pub unsafe fn svldff1uh_vnum_s32(pg: svbool_t, base: *const u16, vnum: i64) -> svint32_t {
+    svldff1uh_s32(pg, base.offset(svcntw() as isize * vnum as isize))
+}
+#[doc = "Load 8-bit data and zero-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1ub_vnum_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1b))]
+pub unsafe fn svldff1ub_vnum_s64(pg: svbool_t, base: *const u8, vnum: i64) -> svint64_t {
+    svldff1ub_s64(pg, base.offset(svcntd() as isize * vnum as isize))
+}
+#[doc = "Load 16-bit data and zero-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1uh_vnum_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1h))]
+pub unsafe fn svldff1uh_vnum_s64(pg: svbool_t, base: *const u16, vnum: i64) -> svint64_t {
+    svldff1uh_s64(pg, base.offset(svcntd() as isize * vnum as isize))
+}
+#[doc = "Load 32-bit data and zero-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1uw_vnum_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1w))]
+pub unsafe fn svldff1uw_vnum_s64(pg: svbool_t, base: *const u32, vnum: i64) -> svint64_t {
+    svldff1uw_s64(pg, base.offset(svcntd() as isize * vnum as isize))
+}
+#[doc = "Load 8-bit data and zero-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1ub_vnum_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1b))]
+pub unsafe fn svldff1ub_vnum_u16(pg: svbool_t, base: *const u8, vnum: i64) -> svuint16_t {
+    svldff1ub_u16(pg, base.offset(svcnth() as isize * vnum as isize))
+}
+#[doc = "Load 8-bit data and zero-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1ub_vnum_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1b))]
+pub unsafe fn svldff1ub_vnum_u32(pg: svbool_t, base: *const u8, vnum: i64) -> svuint32_t {
+    svldff1ub_u32(pg, base.offset(svcntw() as isize * vnum as isize))
+}
+#[doc = "Load 16-bit data and zero-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1uh_vnum_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1h))]
+pub unsafe fn svldff1uh_vnum_u32(pg: svbool_t, base: *const u16, vnum: i64) -> svuint32_t {
+    svldff1uh_u32(pg, base.offset(svcntw() as isize * vnum as isize))
+}
+#[doc = "Load 8-bit data and zero-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1ub_vnum_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1b))]
+pub unsafe fn svldff1ub_vnum_u64(pg: svbool_t, base: *const u8, vnum: i64) -> svuint64_t {
+    svldff1ub_u64(pg, base.offset(svcntd() as isize * vnum as isize))
+}
+#[doc = "Load 16-bit data and zero-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1uh_vnum_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1h))]
+pub unsafe fn svldff1uh_vnum_u64(pg: svbool_t, base: *const u16, vnum: i64) -> svuint64_t {
+    svldff1uh_u64(pg, base.offset(svcntd() as isize * vnum as isize))
+}
+#[doc = "Load 32-bit data and zero-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1uw_vnum_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1w))]
+pub unsafe fn svldff1uw_vnum_u64(pg: svbool_t, base: *const u32, vnum: i64) -> svuint64_t {
+    svldff1uw_u64(pg, base.offset(svcntd() as isize * vnum as isize))
+}
+#[doc = "Load 16-bit data and zero-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1uh_gather_[s32]index_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1h))]
+pub unsafe fn svldff1uh_gather_s32index_s32(
+    pg: svbool_t,
+    base: *const u16,
+    indices: svint32_t,
+) -> svint32_t {
+    svldff1uh_gather_s32index_u32(pg, base, indices).as_signed()
+}
+#[doc = "Load 16-bit data and zero-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1uh_gather_[s32]index_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1h))]
+pub unsafe fn svldff1uh_gather_s32index_u32(
+    pg: svbool_t,
+    base: *const u16,
+    indices: svint32_t,
+) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ldff1.gather.sxtw.index.nxv4i16"
+        )]
+        fn _svldff1uh_gather_s32index_u32(
+            pg: svbool4_t,
+            base: *const i16,
+            indices: svint32_t,
+        ) -> nxv4i16;
+    }
+    crate::intrinsics::simd::simd_cast::<nxv4u16, _>(
+        _svldff1uh_gather_s32index_u32(pg.sve_into(), base.as_signed(), indices).as_unsigned(),
+    )
+}
+#[doc = "Load 16-bit data and zero-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1uh_gather_[s64]index_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1h))]
+pub unsafe fn svldff1uh_gather_s64index_s64(
+    pg: svbool_t,
+    base: *const u16,
+    indices: svint64_t,
+) -> svint64_t {
+    svldff1uh_gather_s64index_u64(pg, base, indices).as_signed()
+}
+#[doc = "Load 32-bit data and zero-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1uw_gather_[s64]index_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1w))]
+pub unsafe fn svldff1uw_gather_s64index_s64(
+    pg: svbool_t,
+    base: *const u32,
+    indices: svint64_t,
+) -> svint64_t {
+    svldff1uw_gather_s64index_u64(pg, base, indices).as_signed()
+}
+#[doc = "Load 16-bit data and zero-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1uh_gather_[s64]index_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1h))]
+pub unsafe fn svldff1uh_gather_s64index_u64(
+    pg: svbool_t,
+    base: *const u16,
+    indices: svint64_t,
+) -> svuint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ldff1.gather.index.nxv2i16"
+        )]
+        fn _svldff1uh_gather_s64index_u64(
+            pg: svbool2_t,
+            base: *const i16,
+            indices: svint64_t,
+        ) -> nxv2i16;
+    }
+    crate::intrinsics::simd::simd_cast::<nxv2u16, _>(
+        _svldff1uh_gather_s64index_u64(pg.sve_into(), base.as_signed(), indices).as_unsigned(),
+    )
+}
+#[doc = "Load 32-bit data and zero-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1uw_gather_[s64]index_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1w))]
+pub unsafe fn svldff1uw_gather_s64index_u64(
+    pg: svbool_t,
+    base: *const u32,
+    indices: svint64_t,
+) -> svuint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ldff1.gather.index.nxv2i32"
+        )]
+        fn _svldff1uw_gather_s64index_u64(
+            pg: svbool2_t,
+            base: *const i32,
+            indices: svint64_t,
+        ) -> nxv2i32;
+    }
+    crate::intrinsics::simd::simd_cast::<nxv2u32, _>(
+        _svldff1uw_gather_s64index_u64(pg.sve_into(), base.as_signed(), indices).as_unsigned(),
+    )
+}
+#[doc = "Load 16-bit data and zero-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1uh_gather_[u32]index_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1h))]
+pub unsafe fn svldff1uh_gather_u32index_s32(
+    pg: svbool_t,
+    base: *const u16,
+    indices: svuint32_t,
+) -> svint32_t {
+    svldff1uh_gather_u32index_u32(pg, base, indices).as_signed()
+}
+#[doc = "Load 16-bit data and zero-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1uh_gather_[u32]index_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1h))]
+pub unsafe fn svldff1uh_gather_u32index_u32(
+    pg: svbool_t,
+    base: *const u16,
+    indices: svuint32_t,
+) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ldff1.gather.uxtw.index.nxv4i16"
+        )]
+        fn _svldff1uh_gather_u32index_u32(
+            pg: svbool4_t,
+            base: *const i16,
+            indices: svint32_t,
+        ) -> nxv4i16;
+    }
+    crate::intrinsics::simd::simd_cast::<nxv4u16, _>(
+        _svldff1uh_gather_u32index_u32(pg.sve_into(), base.as_signed(), indices.as_signed())
+            .as_unsigned(),
+    )
+}
+#[doc = "Load 16-bit data and zero-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1uh_gather_[u64]index_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1h))]
+pub unsafe fn svldff1uh_gather_u64index_s64(
+    pg: svbool_t,
+    base: *const u16,
+    indices: svuint64_t,
+) -> svint64_t {
+    svldff1uh_gather_s64index_u64(pg, base, indices.as_signed()).as_signed()
+}
+#[doc = "Load 32-bit data and zero-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1uw_gather_[u64]index_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1w))]
+pub unsafe fn svldff1uw_gather_u64index_s64(
+    pg: svbool_t,
+    base: *const u32,
+    indices: svuint64_t,
+) -> svint64_t {
+    svldff1uw_gather_s64index_u64(pg, base, indices.as_signed()).as_signed()
+}
+#[doc = "Load 16-bit data and zero-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1uh_gather_[u64]index_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1h))]
+pub unsafe fn svldff1uh_gather_u64index_u64(
+    pg: svbool_t,
+    base: *const u16,
+    indices: svuint64_t,
+) -> svuint64_t {
+    svldff1uh_gather_s64index_u64(pg, base, indices.as_signed())
+}
+#[doc = "Load 32-bit data and zero-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1uw_gather_[u64]index_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1w))]
+pub unsafe fn svldff1uw_gather_u64index_u64(
+    pg: svbool_t,
+    base: *const u32,
+    indices: svuint64_t,
+) -> svuint64_t {
+    svldff1uw_gather_s64index_u64(pg, base, indices.as_signed())
+}
+#[doc = "Load 16-bit data and zero-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1uh_gather[_u32base]_index_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1h))]
+pub unsafe fn svldff1uh_gather_u32base_index_s32(
+    pg: svbool_t,
+    bases: svuint32_t,
+    index: i64,
+) -> svint32_t {
+    svldff1uh_gather_u32base_offset_s32(pg, bases, index.unchecked_shl(1))
+}
+#[doc = "Load 16-bit data and zero-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1uh_gather[_u32base]_index_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1h))]
+pub unsafe fn svldff1uh_gather_u32base_index_u32(
+    pg: svbool_t,
+    bases: svuint32_t,
+    index: i64,
+) -> svuint32_t {
+    svldff1uh_gather_u32base_offset_u32(pg, bases, index.unchecked_shl(1))
+}
+#[doc = "Load 16-bit data and zero-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1uh_gather[_u64base]_index_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1h))]
+pub unsafe fn svldff1uh_gather_u64base_index_s64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    index: i64,
+) -> svint64_t {
+    svldff1uh_gather_u64base_offset_s64(pg, bases, index.unchecked_shl(1))
+}
+#[doc = "Load 32-bit data and zero-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1uw_gather[_u64base]_index_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1w))]
+pub unsafe fn svldff1uw_gather_u64base_index_s64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    index: i64,
+) -> svint64_t {
+    svldff1uw_gather_u64base_offset_s64(pg, bases, index.unchecked_shl(2))
+}
+#[doc = "Load 16-bit data and zero-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1uh_gather[_u64base]_index_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1h))]
+pub unsafe fn svldff1uh_gather_u64base_index_u64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    index: i64,
+) -> svuint64_t {
+    svldff1uh_gather_u64base_offset_u64(pg, bases, index.unchecked_shl(1))
+}
+#[doc = "Load 32-bit data and zero-extend, first-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldff1uw_gather[_u64base]_index_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and first-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldff1w))]
+pub unsafe fn svldff1uw_gather_u64base_index_u64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    index: i64,
+) -> svuint64_t {
+    svldff1uw_gather_u64base_offset_u64(pg, bases, index.unchecked_shl(2))
+}
+#[doc = "Unextended load, non-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnf1[_f32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnf1w))]
+pub unsafe fn svldnf1_f32(pg: svbool_t, base: *const f32) -> svfloat32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ldnf1.nxv4f32")]
+        fn _svldnf1_f32(pg: svbool4_t, base: *const f32) -> svfloat32_t;
+    }
+    _svldnf1_f32(pg.sve_into(), base)
+}
+#[doc = "Unextended load, non-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnf1[_f64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnf1d))]
+pub unsafe fn svldnf1_f64(pg: svbool_t, base: *const f64) -> svfloat64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ldnf1.nxv2f64")]
+        fn _svldnf1_f64(pg: svbool2_t, base: *const f64) -> svfloat64_t;
+    }
+    _svldnf1_f64(pg.sve_into(), base)
+}
+#[doc = "Unextended load, non-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnf1[_s8])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnf1b))]
+pub unsafe fn svldnf1_s8(pg: svbool_t, base: *const i8) -> svint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ldnf1.nxv16i8")]
+        fn _svldnf1_s8(pg: svbool_t, base: *const i8) -> svint8_t;
+    }
+    _svldnf1_s8(pg, base)
+}
+#[doc = "Unextended load, non-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnf1[_s16])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnf1h))]
+pub unsafe fn svldnf1_s16(pg: svbool_t, base: *const i16) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ldnf1.nxv8i16")]
+        fn _svldnf1_s16(pg: svbool8_t, base: *const i16) -> svint16_t;
+    }
+    _svldnf1_s16(pg.sve_into(), base)
+}
+#[doc = "Unextended load, non-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnf1[_s32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnf1w))]
+pub unsafe fn svldnf1_s32(pg: svbool_t, base: *const i32) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ldnf1.nxv4i32")]
+        fn _svldnf1_s32(pg: svbool4_t, base: *const i32) -> svint32_t;
+    }
+    _svldnf1_s32(pg.sve_into(), base)
+}
+#[doc = "Unextended load, non-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnf1[_s64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnf1d))]
+pub unsafe fn svldnf1_s64(pg: svbool_t, base: *const i64) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ldnf1.nxv2i64")]
+        fn _svldnf1_s64(pg: svbool2_t, base: *const i64) -> svint64_t;
+    }
+    _svldnf1_s64(pg.sve_into(), base)
+}
+#[doc = "Unextended load, non-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnf1[_u8])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnf1b))]
+pub unsafe fn svldnf1_u8(pg: svbool_t, base: *const u8) -> svuint8_t {
+    svldnf1_s8(pg, base.as_signed()).as_unsigned()
+}
+#[doc = "Unextended load, non-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnf1[_u16])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnf1h))]
+pub unsafe fn svldnf1_u16(pg: svbool_t, base: *const u16) -> svuint16_t {
+    svldnf1_s16(pg, base.as_signed()).as_unsigned()
+}
+#[doc = "Unextended load, non-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnf1[_u32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnf1w))]
+pub unsafe fn svldnf1_u32(pg: svbool_t, base: *const u32) -> svuint32_t {
+    svldnf1_s32(pg, base.as_signed()).as_unsigned()
+}
+#[doc = "Unextended load, non-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnf1[_u64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnf1d))]
+pub unsafe fn svldnf1_u64(pg: svbool_t, base: *const u64) -> svuint64_t {
+    svldnf1_s64(pg, base.as_signed()).as_unsigned()
+}
+#[doc = "Unextended load, non-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnf1_vnum[_f32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnf1w))]
+pub unsafe fn svldnf1_vnum_f32(pg: svbool_t, base: *const f32, vnum: i64) -> svfloat32_t {
+    svldnf1_f32(pg, base.offset(svcntw() as isize * vnum as isize))
+}
+#[doc = "Unextended load, non-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnf1_vnum[_f64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnf1d))]
+pub unsafe fn svldnf1_vnum_f64(pg: svbool_t, base: *const f64, vnum: i64) -> svfloat64_t {
+    svldnf1_f64(pg, base.offset(svcntd() as isize * vnum as isize))
+}
+#[doc = "Unextended load, non-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnf1_vnum[_s8])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnf1b))]
+pub unsafe fn svldnf1_vnum_s8(pg: svbool_t, base: *const i8, vnum: i64) -> svint8_t {
+    svldnf1_s8(pg, base.offset(svcntb() as isize * vnum as isize))
+}
+#[doc = "Unextended load, non-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnf1_vnum[_s16])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnf1h))]
+pub unsafe fn svldnf1_vnum_s16(pg: svbool_t, base: *const i16, vnum: i64) -> svint16_t {
+    svldnf1_s16(pg, base.offset(svcnth() as isize * vnum as isize))
+}
+#[doc = "Unextended load, non-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnf1_vnum[_s32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnf1w))]
+pub unsafe fn svldnf1_vnum_s32(pg: svbool_t, base: *const i32, vnum: i64) -> svint32_t {
+    svldnf1_s32(pg, base.offset(svcntw() as isize * vnum as isize))
+}
+#[doc = "Unextended load, non-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnf1_vnum[_s64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnf1d))]
+pub unsafe fn svldnf1_vnum_s64(pg: svbool_t, base: *const i64, vnum: i64) -> svint64_t {
+    svldnf1_s64(pg, base.offset(svcntd() as isize * vnum as isize))
+}
+#[doc = "Unextended load, non-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnf1_vnum[_u8])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnf1b))]
+pub unsafe fn svldnf1_vnum_u8(pg: svbool_t, base: *const u8, vnum: i64) -> svuint8_t {
+    svldnf1_u8(pg, base.offset(svcntb() as isize * vnum as isize))
+}
+#[doc = "Unextended load, non-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnf1_vnum[_u16])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnf1h))]
+pub unsafe fn svldnf1_vnum_u16(pg: svbool_t, base: *const u16, vnum: i64) -> svuint16_t {
+    svldnf1_u16(pg, base.offset(svcnth() as isize * vnum as isize))
+}
+#[doc = "Unextended load, non-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnf1_vnum[_u32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnf1w))]
+pub unsafe fn svldnf1_vnum_u32(pg: svbool_t, base: *const u32, vnum: i64) -> svuint32_t {
+    svldnf1_u32(pg, base.offset(svcntw() as isize * vnum as isize))
+}
+#[doc = "Unextended load, non-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnf1_vnum[_u64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnf1d))]
+pub unsafe fn svldnf1_vnum_u64(pg: svbool_t, base: *const u64, vnum: i64) -> svuint64_t {
+    svldnf1_u64(pg, base.offset(svcntd() as isize * vnum as isize))
+}
+#[doc = "Load 8-bit data and sign-extend, non-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnf1sb_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnf1sb))]
+pub unsafe fn svldnf1sb_s16(pg: svbool_t, base: *const i8) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ldnf1.nxv8i8")]
+        fn _svldnf1sb_s16(pg: svbool8_t, base: *const i8) -> nxv8i8;
+    }
+    crate::intrinsics::simd::simd_cast(_svldnf1sb_s16(pg.sve_into(), base))
+}
+#[doc = "Load 8-bit data and sign-extend, non-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnf1sb_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnf1sb))]
+pub unsafe fn svldnf1sb_s32(pg: svbool_t, base: *const i8) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ldnf1.nxv4i8")]
+        fn _svldnf1sb_s32(pg: svbool4_t, base: *const i8) -> nxv4i8;
+    }
+    crate::intrinsics::simd::simd_cast(_svldnf1sb_s32(pg.sve_into(), base))
+}
+#[doc = "Load 16-bit data and sign-extend, non-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnf1sh_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnf1sh))]
+pub unsafe fn svldnf1sh_s32(pg: svbool_t, base: *const i16) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ldnf1.nxv4i16")]
+        fn _svldnf1sh_s32(pg: svbool4_t, base: *const i16) -> nxv4i16;
+    }
+    crate::intrinsics::simd::simd_cast(_svldnf1sh_s32(pg.sve_into(), base))
+}
+#[doc = "Load 8-bit data and sign-extend, non-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnf1sb_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnf1sb))]
+pub unsafe fn svldnf1sb_s64(pg: svbool_t, base: *const i8) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ldnf1.nxv2i8")]
+        fn _svldnf1sb_s64(pg: svbool2_t, base: *const i8) -> nxv2i8;
+    }
+    crate::intrinsics::simd::simd_cast(_svldnf1sb_s64(pg.sve_into(), base))
+}
+#[doc = "Load 16-bit data and sign-extend, non-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnf1sh_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnf1sh))]
+pub unsafe fn svldnf1sh_s64(pg: svbool_t, base: *const i16) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ldnf1.nxv2i16")]
+        fn _svldnf1sh_s64(pg: svbool2_t, base: *const i16) -> nxv2i16;
+    }
+    crate::intrinsics::simd::simd_cast(_svldnf1sh_s64(pg.sve_into(), base))
+}
+#[doc = "Load 32-bit data and sign-extend, non-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnf1sw_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnf1sw))]
+pub unsafe fn svldnf1sw_s64(pg: svbool_t, base: *const i32) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ldnf1.nxv2i32")]
+        fn _svldnf1sw_s64(pg: svbool2_t, base: *const i32) -> nxv2i32;
+    }
+    crate::intrinsics::simd::simd_cast(_svldnf1sw_s64(pg.sve_into(), base))
+}
+#[doc = "Load 8-bit data and sign-extend, non-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnf1sb_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnf1sb))]
+pub unsafe fn svldnf1sb_u16(pg: svbool_t, base: *const i8) -> svuint16_t {
+    svldnf1sb_s16(pg, base).as_unsigned()
+}
+#[doc = "Load 8-bit data and sign-extend, non-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnf1sb_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnf1sb))]
+pub unsafe fn svldnf1sb_u32(pg: svbool_t, base: *const i8) -> svuint32_t {
+    svldnf1sb_s32(pg, base).as_unsigned()
+}
+#[doc = "Load 16-bit data and sign-extend, non-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnf1sh_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnf1sh))]
+pub unsafe fn svldnf1sh_u32(pg: svbool_t, base: *const i16) -> svuint32_t {
+    svldnf1sh_s32(pg, base).as_unsigned()
+}
+#[doc = "Load 8-bit data and sign-extend, non-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnf1sb_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnf1sb))]
+pub unsafe fn svldnf1sb_u64(pg: svbool_t, base: *const i8) -> svuint64_t {
+    svldnf1sb_s64(pg, base).as_unsigned()
+}
+#[doc = "Load 16-bit data and sign-extend, non-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnf1sh_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnf1sh))]
+pub unsafe fn svldnf1sh_u64(pg: svbool_t, base: *const i16) -> svuint64_t {
+    svldnf1sh_s64(pg, base).as_unsigned()
+}
+#[doc = "Load 32-bit data and sign-extend, non-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnf1sw_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnf1sw))]
+pub unsafe fn svldnf1sw_u64(pg: svbool_t, base: *const i32) -> svuint64_t {
+    svldnf1sw_s64(pg, base).as_unsigned()
+}
+#[doc = "Load 8-bit data and sign-extend, non-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnf1sb_vnum_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnf1sb))]
+pub unsafe fn svldnf1sb_vnum_s16(pg: svbool_t, base: *const i8, vnum: i64) -> svint16_t {
+    svldnf1sb_s16(pg, base.offset(svcnth() as isize * vnum as isize))
+}
+#[doc = "Load 8-bit data and sign-extend, non-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnf1sb_vnum_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnf1sb))]
+pub unsafe fn svldnf1sb_vnum_s32(pg: svbool_t, base: *const i8, vnum: i64) -> svint32_t {
+    svldnf1sb_s32(pg, base.offset(svcntw() as isize * vnum as isize))
+}
+#[doc = "Load 16-bit data and sign-extend, non-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnf1sh_vnum_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnf1sh))]
+pub unsafe fn svldnf1sh_vnum_s32(pg: svbool_t, base: *const i16, vnum: i64) -> svint32_t {
+    svldnf1sh_s32(pg, base.offset(svcntw() as isize * vnum as isize))
+}
+#[doc = "Load 8-bit data and sign-extend, non-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnf1sb_vnum_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnf1sb))]
+pub unsafe fn svldnf1sb_vnum_s64(pg: svbool_t, base: *const i8, vnum: i64) -> svint64_t {
+    svldnf1sb_s64(pg, base.offset(svcntd() as isize * vnum as isize))
+}
+#[doc = "Load 16-bit data and sign-extend, non-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnf1sh_vnum_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnf1sh))]
+pub unsafe fn svldnf1sh_vnum_s64(pg: svbool_t, base: *const i16, vnum: i64) -> svint64_t {
+    svldnf1sh_s64(pg, base.offset(svcntd() as isize * vnum as isize))
+}
+#[doc = "Load 32-bit data and sign-extend, non-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnf1sw_vnum_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnf1sw))]
+pub unsafe fn svldnf1sw_vnum_s64(pg: svbool_t, base: *const i32, vnum: i64) -> svint64_t {
+    svldnf1sw_s64(pg, base.offset(svcntd() as isize * vnum as isize))
+}
+#[doc = "Load 8-bit data and sign-extend, non-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnf1sb_vnum_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnf1sb))]
+pub unsafe fn svldnf1sb_vnum_u16(pg: svbool_t, base: *const i8, vnum: i64) -> svuint16_t {
+    svldnf1sb_u16(pg, base.offset(svcnth() as isize * vnum as isize))
+}
+#[doc = "Load 8-bit data and sign-extend, non-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnf1sb_vnum_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnf1sb))]
+pub unsafe fn svldnf1sb_vnum_u32(pg: svbool_t, base: *const i8, vnum: i64) -> svuint32_t {
+    svldnf1sb_u32(pg, base.offset(svcntw() as isize * vnum as isize))
+}
+#[doc = "Load 16-bit data and sign-extend, non-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnf1sh_vnum_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnf1sh))]
+pub unsafe fn svldnf1sh_vnum_u32(pg: svbool_t, base: *const i16, vnum: i64) -> svuint32_t {
+    svldnf1sh_u32(pg, base.offset(svcntw() as isize * vnum as isize))
+}
+#[doc = "Load 8-bit data and sign-extend, non-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnf1sb_vnum_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnf1sb))]
+pub unsafe fn svldnf1sb_vnum_u64(pg: svbool_t, base: *const i8, vnum: i64) -> svuint64_t {
+    svldnf1sb_u64(pg, base.offset(svcntd() as isize * vnum as isize))
+}
+#[doc = "Load 16-bit data and sign-extend, non-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnf1sh_vnum_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnf1sh))]
+pub unsafe fn svldnf1sh_vnum_u64(pg: svbool_t, base: *const i16, vnum: i64) -> svuint64_t {
+    svldnf1sh_u64(pg, base.offset(svcntd() as isize * vnum as isize))
+}
+#[doc = "Load 32-bit data and sign-extend, non-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnf1sw_vnum_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnf1sw))]
+pub unsafe fn svldnf1sw_vnum_u64(pg: svbool_t, base: *const i32, vnum: i64) -> svuint64_t {
+    svldnf1sw_u64(pg, base.offset(svcntd() as isize * vnum as isize))
+}
+#[doc = "Load 8-bit data and zero-extend, non-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnf1ub_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnf1b))]
+pub unsafe fn svldnf1ub_s16(pg: svbool_t, base: *const u8) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ldnf1.nxv8i8")]
+        fn _svldnf1ub_s16(pg: svbool8_t, base: *const i8) -> nxv8i8;
+    }
+    crate::intrinsics::simd::simd_cast::<nxv8u8, _>(
+        _svldnf1ub_s16(pg.sve_into(), base.as_signed()).as_unsigned(),
+    )
+}
+#[doc = "Load 8-bit data and zero-extend, non-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnf1ub_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnf1b))]
+pub unsafe fn svldnf1ub_s32(pg: svbool_t, base: *const u8) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ldnf1.nxv4i8")]
+        fn _svldnf1ub_s32(pg: svbool4_t, base: *const i8) -> nxv4i8;
+    }
+    crate::intrinsics::simd::simd_cast::<nxv4u8, _>(
+        _svldnf1ub_s32(pg.sve_into(), base.as_signed()).as_unsigned(),
+    )
+}
+#[doc = "Load 16-bit data and zero-extend, non-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnf1uh_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnf1h))]
+pub unsafe fn svldnf1uh_s32(pg: svbool_t, base: *const u16) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ldnf1.nxv4i16")]
+        fn _svldnf1uh_s32(pg: svbool4_t, base: *const i16) -> nxv4i16;
+    }
+    crate::intrinsics::simd::simd_cast::<nxv4u16, _>(
+        _svldnf1uh_s32(pg.sve_into(), base.as_signed()).as_unsigned(),
+    )
+}
+#[doc = "Load 8-bit data and zero-extend, non-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnf1ub_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnf1b))]
+pub unsafe fn svldnf1ub_s64(pg: svbool_t, base: *const u8) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ldnf1.nxv2i8")]
+        fn _svldnf1ub_s64(pg: svbool2_t, base: *const i8) -> nxv2i8;
+    }
+    crate::intrinsics::simd::simd_cast::<nxv2u8, _>(
+        _svldnf1ub_s64(pg.sve_into(), base.as_signed()).as_unsigned(),
+    )
+}
+#[doc = "Load 16-bit data and zero-extend, non-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnf1uh_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnf1h))]
+pub unsafe fn svldnf1uh_s64(pg: svbool_t, base: *const u16) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ldnf1.nxv2i16")]
+        fn _svldnf1uh_s64(pg: svbool2_t, base: *const i16) -> nxv2i16;
+    }
+    crate::intrinsics::simd::simd_cast::<nxv2u16, _>(
+        _svldnf1uh_s64(pg.sve_into(), base.as_signed()).as_unsigned(),
+    )
+}
+#[doc = "Load 32-bit data and zero-extend, non-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnf1uw_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnf1w))]
+pub unsafe fn svldnf1uw_s64(pg: svbool_t, base: *const u32) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ldnf1.nxv2i32")]
+        fn _svldnf1uw_s64(pg: svbool2_t, base: *const i32) -> nxv2i32;
+    }
+    crate::intrinsics::simd::simd_cast::<nxv2u32, _>(
+        _svldnf1uw_s64(pg.sve_into(), base.as_signed()).as_unsigned(),
+    )
+}
+#[doc = "Load 8-bit data and zero-extend, non-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnf1ub_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnf1b))]
+pub unsafe fn svldnf1ub_u16(pg: svbool_t, base: *const u8) -> svuint16_t {
+    svldnf1ub_s16(pg, base).as_unsigned()
+}
+#[doc = "Load 8-bit data and zero-extend, non-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnf1ub_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnf1b))]
+pub unsafe fn svldnf1ub_u32(pg: svbool_t, base: *const u8) -> svuint32_t {
+    svldnf1ub_s32(pg, base).as_unsigned()
+}
+#[doc = "Load 16-bit data and zero-extend, non-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnf1uh_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnf1h))]
+pub unsafe fn svldnf1uh_u32(pg: svbool_t, base: *const u16) -> svuint32_t {
+    svldnf1uh_s32(pg, base).as_unsigned()
+}
+#[doc = "Load 8-bit data and zero-extend, non-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnf1ub_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnf1b))]
+pub unsafe fn svldnf1ub_u64(pg: svbool_t, base: *const u8) -> svuint64_t {
+    svldnf1ub_s64(pg, base).as_unsigned()
+}
+#[doc = "Load 16-bit data and zero-extend, non-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnf1uh_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnf1h))]
+pub unsafe fn svldnf1uh_u64(pg: svbool_t, base: *const u16) -> svuint64_t {
+    svldnf1uh_s64(pg, base).as_unsigned()
+}
+#[doc = "Load 32-bit data and zero-extend, non-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnf1uw_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnf1w))]
+pub unsafe fn svldnf1uw_u64(pg: svbool_t, base: *const u32) -> svuint64_t {
+    svldnf1uw_s64(pg, base).as_unsigned()
+}
+#[doc = "Load 8-bit data and zero-extend, non-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnf1ub_vnum_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnf1b))]
+pub unsafe fn svldnf1ub_vnum_s16(pg: svbool_t, base: *const u8, vnum: i64) -> svint16_t {
+    svldnf1ub_s16(pg, base.offset(svcnth() as isize * vnum as isize))
+}
+#[doc = "Load 8-bit data and zero-extend, non-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnf1ub_vnum_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnf1b))]
+pub unsafe fn svldnf1ub_vnum_s32(pg: svbool_t, base: *const u8, vnum: i64) -> svint32_t {
+    svldnf1ub_s32(pg, base.offset(svcntw() as isize * vnum as isize))
+}
+#[doc = "Load 16-bit data and zero-extend, non-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnf1uh_vnum_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnf1h))]
+pub unsafe fn svldnf1uh_vnum_s32(pg: svbool_t, base: *const u16, vnum: i64) -> svint32_t {
+    svldnf1uh_s32(pg, base.offset(svcntw() as isize * vnum as isize))
+}
+#[doc = "Load 8-bit data and zero-extend, non-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnf1ub_vnum_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnf1b))]
+pub unsafe fn svldnf1ub_vnum_s64(pg: svbool_t, base: *const u8, vnum: i64) -> svint64_t {
+    svldnf1ub_s64(pg, base.offset(svcntd() as isize * vnum as isize))
+}
+#[doc = "Load 16-bit data and zero-extend, non-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnf1uh_vnum_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnf1h))]
+pub unsafe fn svldnf1uh_vnum_s64(pg: svbool_t, base: *const u16, vnum: i64) -> svint64_t {
+    svldnf1uh_s64(pg, base.offset(svcntd() as isize * vnum as isize))
+}
+#[doc = "Load 32-bit data and zero-extend, non-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnf1uw_vnum_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnf1w))]
+pub unsafe fn svldnf1uw_vnum_s64(pg: svbool_t, base: *const u32, vnum: i64) -> svint64_t {
+    svldnf1uw_s64(pg, base.offset(svcntd() as isize * vnum as isize))
+}
+#[doc = "Load 8-bit data and zero-extend, non-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnf1ub_vnum_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnf1b))]
+pub unsafe fn svldnf1ub_vnum_u16(pg: svbool_t, base: *const u8, vnum: i64) -> svuint16_t {
+    svldnf1ub_u16(pg, base.offset(svcnth() as isize * vnum as isize))
+}
+#[doc = "Load 8-bit data and zero-extend, non-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnf1ub_vnum_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnf1b))]
+pub unsafe fn svldnf1ub_vnum_u32(pg: svbool_t, base: *const u8, vnum: i64) -> svuint32_t {
+    svldnf1ub_u32(pg, base.offset(svcntw() as isize * vnum as isize))
+}
+#[doc = "Load 16-bit data and zero-extend, non-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnf1uh_vnum_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnf1h))]
+pub unsafe fn svldnf1uh_vnum_u32(pg: svbool_t, base: *const u16, vnum: i64) -> svuint32_t {
+    svldnf1uh_u32(pg, base.offset(svcntw() as isize * vnum as isize))
+}
+#[doc = "Load 8-bit data and zero-extend, non-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnf1ub_vnum_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnf1b))]
+pub unsafe fn svldnf1ub_vnum_u64(pg: svbool_t, base: *const u8, vnum: i64) -> svuint64_t {
+    svldnf1ub_u64(pg, base.offset(svcntd() as isize * vnum as isize))
+}
+#[doc = "Load 16-bit data and zero-extend, non-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnf1uh_vnum_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnf1h))]
+pub unsafe fn svldnf1uh_vnum_u64(pg: svbool_t, base: *const u16, vnum: i64) -> svuint64_t {
+    svldnf1uh_u64(pg, base.offset(svcntd() as isize * vnum as isize))
+}
+#[doc = "Load 32-bit data and zero-extend, non-faulting"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnf1uw_vnum_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`, the first-fault register (`FFR`) and non-faulting behaviour)."]
+#[doc = "  * Result lanes corresponding to inactive FFR lanes (either before or as a result of this intrinsic) have \"CONSTRAINED UNPREDICTABLE\" values, irrespective of predication. Refer to architectural documentation for details."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnf1w))]
+pub unsafe fn svldnf1uw_vnum_u64(pg: svbool_t, base: *const u32, vnum: i64) -> svuint64_t {
+    svldnf1uw_u64(pg, base.offset(svcntd() as isize * vnum as isize))
+}
+#[doc = "Unextended load, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1[_f32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1w))]
+pub unsafe fn svldnt1_f32(pg: svbool_t, base: *const f32) -> svfloat32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ldnt1.nxv4f32")]
+        fn _svldnt1_f32(pg: svbool4_t, base: *const f32) -> svfloat32_t;
+    }
+    _svldnt1_f32(pg.sve_into(), base)
+}
+#[doc = "Unextended load, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1[_f64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1d))]
+pub unsafe fn svldnt1_f64(pg: svbool_t, base: *const f64) -> svfloat64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ldnt1.nxv2f64")]
+        fn _svldnt1_f64(pg: svbool2_t, base: *const f64) -> svfloat64_t;
+    }
+    _svldnt1_f64(pg.sve_into(), base)
+}
+#[doc = "Unextended load, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1[_s8])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1b))]
+pub unsafe fn svldnt1_s8(pg: svbool_t, base: *const i8) -> svint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ldnt1.nxv16i8")]
+        fn _svldnt1_s8(pg: svbool_t, base: *const i8) -> svint8_t;
+    }
+    _svldnt1_s8(pg, base)
+}
+#[doc = "Unextended load, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1[_s16])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1h))]
+pub unsafe fn svldnt1_s16(pg: svbool_t, base: *const i16) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ldnt1.nxv8i16")]
+        fn _svldnt1_s16(pg: svbool8_t, base: *const i16) -> svint16_t;
+    }
+    _svldnt1_s16(pg.sve_into(), base)
+}
+#[doc = "Unextended load, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1[_s32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1w))]
+pub unsafe fn svldnt1_s32(pg: svbool_t, base: *const i32) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ldnt1.nxv4i32")]
+        fn _svldnt1_s32(pg: svbool4_t, base: *const i32) -> svint32_t;
+    }
+    _svldnt1_s32(pg.sve_into(), base)
+}
+#[doc = "Unextended load, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1[_s64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1d))]
+pub unsafe fn svldnt1_s64(pg: svbool_t, base: *const i64) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ldnt1.nxv2i64")]
+        fn _svldnt1_s64(pg: svbool2_t, base: *const i64) -> svint64_t;
+    }
+    _svldnt1_s64(pg.sve_into(), base)
+}
+#[doc = "Unextended load, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1[_u8])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1b))]
+pub unsafe fn svldnt1_u8(pg: svbool_t, base: *const u8) -> svuint8_t {
+    svldnt1_s8(pg, base.as_signed()).as_unsigned()
+}
+#[doc = "Unextended load, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1[_u16])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1h))]
+pub unsafe fn svldnt1_u16(pg: svbool_t, base: *const u16) -> svuint16_t {
+    svldnt1_s16(pg, base.as_signed()).as_unsigned()
+}
+#[doc = "Unextended load, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1[_u32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1w))]
+pub unsafe fn svldnt1_u32(pg: svbool_t, base: *const u32) -> svuint32_t {
+    svldnt1_s32(pg, base.as_signed()).as_unsigned()
+}
+#[doc = "Unextended load, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1[_u64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1d))]
+pub unsafe fn svldnt1_u64(pg: svbool_t, base: *const u64) -> svuint64_t {
+    svldnt1_s64(pg, base.as_signed()).as_unsigned()
+}
+#[doc = "Unextended load, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1_vnum[_f32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1w))]
+pub unsafe fn svldnt1_vnum_f32(pg: svbool_t, base: *const f32, vnum: i64) -> svfloat32_t {
+    svldnt1_f32(pg, base.offset(svcntw() as isize * vnum as isize))
+}
+#[doc = "Unextended load, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1_vnum[_f64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1d))]
+pub unsafe fn svldnt1_vnum_f64(pg: svbool_t, base: *const f64, vnum: i64) -> svfloat64_t {
+    svldnt1_f64(pg, base.offset(svcntd() as isize * vnum as isize))
+}
+#[doc = "Unextended load, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1_vnum[_s8])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1b))]
+pub unsafe fn svldnt1_vnum_s8(pg: svbool_t, base: *const i8, vnum: i64) -> svint8_t {
+    svldnt1_s8(pg, base.offset(svcntb() as isize * vnum as isize))
+}
+#[doc = "Unextended load, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1_vnum[_s16])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1h))]
+pub unsafe fn svldnt1_vnum_s16(pg: svbool_t, base: *const i16, vnum: i64) -> svint16_t {
+    svldnt1_s16(pg, base.offset(svcnth() as isize * vnum as isize))
+}
+#[doc = "Unextended load, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1_vnum[_s32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1w))]
+pub unsafe fn svldnt1_vnum_s32(pg: svbool_t, base: *const i32, vnum: i64) -> svint32_t {
+    svldnt1_s32(pg, base.offset(svcntw() as isize * vnum as isize))
+}
+#[doc = "Unextended load, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1_vnum[_s64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1d))]
+pub unsafe fn svldnt1_vnum_s64(pg: svbool_t, base: *const i64, vnum: i64) -> svint64_t {
+    svldnt1_s64(pg, base.offset(svcntd() as isize * vnum as isize))
+}
+#[doc = "Unextended load, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1_vnum[_u8])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1b))]
+pub unsafe fn svldnt1_vnum_u8(pg: svbool_t, base: *const u8, vnum: i64) -> svuint8_t {
+    svldnt1_u8(pg, base.offset(svcntb() as isize * vnum as isize))
+}
+#[doc = "Unextended load, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1_vnum[_u16])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1h))]
+pub unsafe fn svldnt1_vnum_u16(pg: svbool_t, base: *const u16, vnum: i64) -> svuint16_t {
+    svldnt1_u16(pg, base.offset(svcnth() as isize * vnum as isize))
+}
+#[doc = "Unextended load, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1_vnum[_u32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1w))]
+pub unsafe fn svldnt1_vnum_u32(pg: svbool_t, base: *const u32, vnum: i64) -> svuint32_t {
+    svldnt1_u32(pg, base.offset(svcntw() as isize * vnum as isize))
+}
+#[doc = "Unextended load, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1_vnum[_u64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1d))]
+pub unsafe fn svldnt1_vnum_u64(pg: svbool_t, base: *const u64, vnum: i64) -> svuint64_t {
+    svldnt1_u64(pg, base.offset(svcntd() as isize * vnum as isize))
+}
+#[doc = "Count the number of elements in a full vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlen[_f32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cntw))]
+pub fn svlen_f32(_op: svfloat32_t) -> u64 {
+    svcntw()
+}
+#[doc = "Count the number of elements in a full vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlen[_f64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cntd))]
+pub fn svlen_f64(_op: svfloat64_t) -> u64 {
+    svcntd()
+}
+#[doc = "Count the number of elements in a full vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlen[_s8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(rdvl))]
+pub fn svlen_s8(_op: svint8_t) -> u64 {
+    svcntb()
+}
+#[doc = "Count the number of elements in a full vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlen[_s16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cnth))]
+pub fn svlen_s16(_op: svint16_t) -> u64 {
+    svcnth()
+}
+#[doc = "Count the number of elements in a full vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlen[_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cntw))]
+pub fn svlen_s32(_op: svint32_t) -> u64 {
+    svcntw()
+}
+#[doc = "Count the number of elements in a full vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlen[_s64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cntd))]
+pub fn svlen_s64(_op: svint64_t) -> u64 {
+    svcntd()
+}
+#[doc = "Count the number of elements in a full vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlen[_u8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(rdvl))]
+pub fn svlen_u8(_op: svuint8_t) -> u64 {
+    svcntb()
+}
+#[doc = "Count the number of elements in a full vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlen[_u16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cnth))]
+pub fn svlen_u16(_op: svuint16_t) -> u64 {
+    svcnth()
+}
+#[doc = "Count the number of elements in a full vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlen[_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cntw))]
+pub fn svlen_u32(_op: svuint32_t) -> u64 {
+    svcntw()
+}
+#[doc = "Count the number of elements in a full vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlen[_u64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cntd))]
+pub fn svlen_u64(_op: svuint64_t) -> u64 {
+    svcntd()
+}
+#[doc = "Logical shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsl[_s8]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsl))]
+pub fn svlsl_s8_m(pg: svbool_t, op1: svint8_t, op2: svuint8_t) -> svint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.lsl.nxv16i8")]
+        fn _svlsl_s8_m(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t;
+    }
+    unsafe { _svlsl_s8_m(pg, op1, op2.as_signed()) }
+}
+#[doc = "Logical shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsl[_n_s8]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsl))]
+pub fn svlsl_n_s8_m(pg: svbool_t, op1: svint8_t, op2: u8) -> svint8_t {
+    svlsl_s8_m(pg, op1, svdup_n_u8(op2))
+}
+#[doc = "Logical shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsl[_s8]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsl))]
+pub fn svlsl_s8_x(pg: svbool_t, op1: svint8_t, op2: svuint8_t) -> svint8_t {
+    svlsl_s8_m(pg, op1, op2)
+}
+#[doc = "Logical shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsl[_n_s8]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsl))]
+pub fn svlsl_n_s8_x(pg: svbool_t, op1: svint8_t, op2: u8) -> svint8_t {
+    svlsl_s8_x(pg, op1, svdup_n_u8(op2))
+}
+#[doc = "Logical shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsl[_s8]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsl))]
+pub fn svlsl_s8_z(pg: svbool_t, op1: svint8_t, op2: svuint8_t) -> svint8_t {
+    svlsl_s8_m(pg, svsel_s8(pg, op1, svdup_n_s8(0)), op2)
+}
+#[doc = "Logical shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsl[_n_s8]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsl))]
+pub fn svlsl_n_s8_z(pg: svbool_t, op1: svint8_t, op2: u8) -> svint8_t {
+    svlsl_s8_z(pg, op1, svdup_n_u8(op2))
+}
+#[doc = "Logical shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsl[_s16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsl))]
+pub fn svlsl_s16_m(pg: svbool_t, op1: svint16_t, op2: svuint16_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.lsl.nxv8i16")]
+        fn _svlsl_s16_m(pg: svbool8_t, op1: svint16_t, op2: svint16_t) -> svint16_t;
+    }
+    unsafe { _svlsl_s16_m(pg.sve_into(), op1, op2.as_signed()) }
+}
+#[doc = "Logical shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsl[_n_s16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsl))]
+pub fn svlsl_n_s16_m(pg: svbool_t, op1: svint16_t, op2: u16) -> svint16_t {
+    svlsl_s16_m(pg, op1, svdup_n_u16(op2))
+}
+#[doc = "Logical shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsl[_s16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsl))]
+pub fn svlsl_s16_x(pg: svbool_t, op1: svint16_t, op2: svuint16_t) -> svint16_t {
+    svlsl_s16_m(pg, op1, op2)
+}
+#[doc = "Logical shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsl[_n_s16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsl))]
+pub fn svlsl_n_s16_x(pg: svbool_t, op1: svint16_t, op2: u16) -> svint16_t {
+    svlsl_s16_x(pg, op1, svdup_n_u16(op2))
+}
+#[doc = "Logical shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsl[_s16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsl))]
+pub fn svlsl_s16_z(pg: svbool_t, op1: svint16_t, op2: svuint16_t) -> svint16_t {
+    svlsl_s16_m(pg, svsel_s16(pg, op1, svdup_n_s16(0)), op2)
+}
+#[doc = "Logical shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsl[_n_s16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsl))]
+pub fn svlsl_n_s16_z(pg: svbool_t, op1: svint16_t, op2: u16) -> svint16_t {
+    svlsl_s16_z(pg, op1, svdup_n_u16(op2))
+}
+#[doc = "Logical shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsl[_s32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsl))]
+pub fn svlsl_s32_m(pg: svbool_t, op1: svint32_t, op2: svuint32_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.lsl.nxv4i32")]
+        fn _svlsl_s32_m(pg: svbool4_t, op1: svint32_t, op2: svint32_t) -> svint32_t;
+    }
+    unsafe { _svlsl_s32_m(pg.sve_into(), op1, op2.as_signed()) }
+}
+#[doc = "Logical shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsl[_n_s32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsl))]
+pub fn svlsl_n_s32_m(pg: svbool_t, op1: svint32_t, op2: u32) -> svint32_t {
+    svlsl_s32_m(pg, op1, svdup_n_u32(op2))
+}
+#[doc = "Logical shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsl[_s32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsl))]
+pub fn svlsl_s32_x(pg: svbool_t, op1: svint32_t, op2: svuint32_t) -> svint32_t {
+    svlsl_s32_m(pg, op1, op2)
+}
+#[doc = "Logical shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsl[_n_s32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsl))]
+pub fn svlsl_n_s32_x(pg: svbool_t, op1: svint32_t, op2: u32) -> svint32_t {
+    svlsl_s32_x(pg, op1, svdup_n_u32(op2))
+}
+#[doc = "Logical shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsl[_s32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsl))]
+pub fn svlsl_s32_z(pg: svbool_t, op1: svint32_t, op2: svuint32_t) -> svint32_t {
+    svlsl_s32_m(pg, svsel_s32(pg, op1, svdup_n_s32(0)), op2)
+}
+#[doc = "Logical shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsl[_n_s32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsl))]
+pub fn svlsl_n_s32_z(pg: svbool_t, op1: svint32_t, op2: u32) -> svint32_t {
+    svlsl_s32_z(pg, op1, svdup_n_u32(op2))
+}
+#[doc = "Logical shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsl[_s64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsl))]
+pub fn svlsl_s64_m(pg: svbool_t, op1: svint64_t, op2: svuint64_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.lsl.nxv2i64")]
+        fn _svlsl_s64_m(pg: svbool2_t, op1: svint64_t, op2: svint64_t) -> svint64_t;
+    }
+    unsafe { _svlsl_s64_m(pg.sve_into(), op1, op2.as_signed()) }
+}
+#[doc = "Logical shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsl[_n_s64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsl))]
+pub fn svlsl_n_s64_m(pg: svbool_t, op1: svint64_t, op2: u64) -> svint64_t {
+    svlsl_s64_m(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Logical shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsl[_s64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsl))]
+pub fn svlsl_s64_x(pg: svbool_t, op1: svint64_t, op2: svuint64_t) -> svint64_t {
+    svlsl_s64_m(pg, op1, op2)
+}
+#[doc = "Logical shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsl[_n_s64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsl))]
+pub fn svlsl_n_s64_x(pg: svbool_t, op1: svint64_t, op2: u64) -> svint64_t {
+    svlsl_s64_x(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Logical shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsl[_s64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsl))]
+pub fn svlsl_s64_z(pg: svbool_t, op1: svint64_t, op2: svuint64_t) -> svint64_t {
+    svlsl_s64_m(pg, svsel_s64(pg, op1, svdup_n_s64(0)), op2)
+}
+#[doc = "Logical shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsl[_n_s64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsl))]
+pub fn svlsl_n_s64_z(pg: svbool_t, op1: svint64_t, op2: u64) -> svint64_t {
+    svlsl_s64_z(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Logical shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsl[_u8]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsl))]
+pub fn svlsl_u8_m(pg: svbool_t, op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    unsafe { svlsl_s8_m(pg, op1.as_signed(), op2).as_unsigned() }
+}
+#[doc = "Logical shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsl[_n_u8]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsl))]
+pub fn svlsl_n_u8_m(pg: svbool_t, op1: svuint8_t, op2: u8) -> svuint8_t {
+    svlsl_u8_m(pg, op1, svdup_n_u8(op2))
+}
+#[doc = "Logical shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsl[_u8]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsl))]
+pub fn svlsl_u8_x(pg: svbool_t, op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    svlsl_u8_m(pg, op1, op2)
+}
+#[doc = "Logical shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsl[_n_u8]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsl))]
+pub fn svlsl_n_u8_x(pg: svbool_t, op1: svuint8_t, op2: u8) -> svuint8_t {
+    svlsl_u8_x(pg, op1, svdup_n_u8(op2))
+}
+#[doc = "Logical shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsl[_u8]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsl))]
+pub fn svlsl_u8_z(pg: svbool_t, op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    svlsl_u8_m(pg, svsel_u8(pg, op1, svdup_n_u8(0)), op2)
+}
+#[doc = "Logical shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsl[_n_u8]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsl))]
+pub fn svlsl_n_u8_z(pg: svbool_t, op1: svuint8_t, op2: u8) -> svuint8_t {
+    svlsl_u8_z(pg, op1, svdup_n_u8(op2))
+}
+#[doc = "Logical shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsl[_u16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsl))]
+pub fn svlsl_u16_m(pg: svbool_t, op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    unsafe { svlsl_s16_m(pg, op1.as_signed(), op2).as_unsigned() }
+}
+#[doc = "Logical shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsl[_n_u16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsl))]
+pub fn svlsl_n_u16_m(pg: svbool_t, op1: svuint16_t, op2: u16) -> svuint16_t {
+    svlsl_u16_m(pg, op1, svdup_n_u16(op2))
+}
+#[doc = "Logical shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsl[_u16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsl))]
+pub fn svlsl_u16_x(pg: svbool_t, op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    svlsl_u16_m(pg, op1, op2)
+}
+#[doc = "Logical shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsl[_n_u16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsl))]
+pub fn svlsl_n_u16_x(pg: svbool_t, op1: svuint16_t, op2: u16) -> svuint16_t {
+    svlsl_u16_x(pg, op1, svdup_n_u16(op2))
+}
+#[doc = "Logical shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsl[_u16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsl))]
+pub fn svlsl_u16_z(pg: svbool_t, op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    svlsl_u16_m(pg, svsel_u16(pg, op1, svdup_n_u16(0)), op2)
+}
+#[doc = "Logical shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsl[_n_u16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsl))]
+pub fn svlsl_n_u16_z(pg: svbool_t, op1: svuint16_t, op2: u16) -> svuint16_t {
+    svlsl_u16_z(pg, op1, svdup_n_u16(op2))
+}
+#[doc = "Logical shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsl[_u32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsl))]
+pub fn svlsl_u32_m(pg: svbool_t, op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    unsafe { svlsl_s32_m(pg, op1.as_signed(), op2).as_unsigned() }
+}
+#[doc = "Logical shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsl[_n_u32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsl))]
+pub fn svlsl_n_u32_m(pg: svbool_t, op1: svuint32_t, op2: u32) -> svuint32_t {
+    svlsl_u32_m(pg, op1, svdup_n_u32(op2))
+}
+#[doc = "Logical shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsl[_u32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsl))]
+pub fn svlsl_u32_x(pg: svbool_t, op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    svlsl_u32_m(pg, op1, op2)
+}
+#[doc = "Logical shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsl[_n_u32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsl))]
+pub fn svlsl_n_u32_x(pg: svbool_t, op1: svuint32_t, op2: u32) -> svuint32_t {
+    svlsl_u32_x(pg, op1, svdup_n_u32(op2))
+}
+#[doc = "Logical shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsl[_u32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsl))]
+pub fn svlsl_u32_z(pg: svbool_t, op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    svlsl_u32_m(pg, svsel_u32(pg, op1, svdup_n_u32(0)), op2)
+}
+#[doc = "Logical shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsl[_n_u32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsl))]
+pub fn svlsl_n_u32_z(pg: svbool_t, op1: svuint32_t, op2: u32) -> svuint32_t {
+    svlsl_u32_z(pg, op1, svdup_n_u32(op2))
+}
+#[doc = "Logical shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsl[_u64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsl))]
+pub fn svlsl_u64_m(pg: svbool_t, op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    unsafe { svlsl_s64_m(pg, op1.as_signed(), op2).as_unsigned() }
+}
+#[doc = "Logical shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsl[_n_u64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsl))]
+pub fn svlsl_n_u64_m(pg: svbool_t, op1: svuint64_t, op2: u64) -> svuint64_t {
+    svlsl_u64_m(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Logical shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsl[_u64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsl))]
+pub fn svlsl_u64_x(pg: svbool_t, op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    svlsl_u64_m(pg, op1, op2)
+}
+#[doc = "Logical shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsl[_n_u64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsl))]
+pub fn svlsl_n_u64_x(pg: svbool_t, op1: svuint64_t, op2: u64) -> svuint64_t {
+    svlsl_u64_x(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Logical shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsl[_u64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsl))]
+pub fn svlsl_u64_z(pg: svbool_t, op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    svlsl_u64_m(pg, svsel_u64(pg, op1, svdup_n_u64(0)), op2)
+}
+#[doc = "Logical shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsl[_n_u64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsl))]
+pub fn svlsl_n_u64_z(pg: svbool_t, op1: svuint64_t, op2: u64) -> svuint64_t {
+    svlsl_u64_z(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Logical shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsl_wide[_s8]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsl))]
+pub fn svlsl_wide_s8_m(pg: svbool_t, op1: svint8_t, op2: svuint64_t) -> svint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.lsl.wide.nxv16i8"
+        )]
+        fn _svlsl_wide_s8_m(pg: svbool_t, op1: svint8_t, op2: svint64_t) -> svint8_t;
+    }
+    unsafe { _svlsl_wide_s8_m(pg, op1, op2.as_signed()) }
+}
+#[doc = "Logical shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsl_wide[_n_s8]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsl))]
+pub fn svlsl_wide_n_s8_m(pg: svbool_t, op1: svint8_t, op2: u64) -> svint8_t {
+    svlsl_wide_s8_m(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Logical shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsl_wide[_s8]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsl))]
+pub fn svlsl_wide_s8_x(pg: svbool_t, op1: svint8_t, op2: svuint64_t) -> svint8_t {
+    svlsl_wide_s8_m(pg, op1, op2)
+}
+#[doc = "Logical shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsl_wide[_n_s8]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsl))]
+pub fn svlsl_wide_n_s8_x(pg: svbool_t, op1: svint8_t, op2: u64) -> svint8_t {
+    svlsl_wide_s8_x(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Logical shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsl_wide[_s8]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsl))]
+pub fn svlsl_wide_s8_z(pg: svbool_t, op1: svint8_t, op2: svuint64_t) -> svint8_t {
+    svlsl_wide_s8_m(pg, svsel_s8(pg, op1, svdup_n_s8(0)), op2)
+}
+#[doc = "Logical shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsl_wide[_n_s8]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsl))]
+pub fn svlsl_wide_n_s8_z(pg: svbool_t, op1: svint8_t, op2: u64) -> svint8_t {
+    svlsl_wide_s8_z(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Logical shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsl_wide[_s16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsl))]
+pub fn svlsl_wide_s16_m(pg: svbool_t, op1: svint16_t, op2: svuint64_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.lsl.wide.nxv8i16"
+        )]
+        fn _svlsl_wide_s16_m(pg: svbool8_t, op1: svint16_t, op2: svint64_t) -> svint16_t;
+    }
+    unsafe { _svlsl_wide_s16_m(pg.sve_into(), op1, op2.as_signed()) }
+}
+#[doc = "Logical shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsl_wide[_n_s16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsl))]
+pub fn svlsl_wide_n_s16_m(pg: svbool_t, op1: svint16_t, op2: u64) -> svint16_t {
+    svlsl_wide_s16_m(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Logical shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsl_wide[_s16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsl))]
+pub fn svlsl_wide_s16_x(pg: svbool_t, op1: svint16_t, op2: svuint64_t) -> svint16_t {
+    svlsl_wide_s16_m(pg, op1, op2)
+}
+#[doc = "Logical shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsl_wide[_n_s16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsl))]
+pub fn svlsl_wide_n_s16_x(pg: svbool_t, op1: svint16_t, op2: u64) -> svint16_t {
+    svlsl_wide_s16_x(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Logical shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsl_wide[_s16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsl))]
+pub fn svlsl_wide_s16_z(pg: svbool_t, op1: svint16_t, op2: svuint64_t) -> svint16_t {
+    svlsl_wide_s16_m(pg, svsel_s16(pg, op1, svdup_n_s16(0)), op2)
+}
+#[doc = "Logical shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsl_wide[_n_s16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsl))]
+pub fn svlsl_wide_n_s16_z(pg: svbool_t, op1: svint16_t, op2: u64) -> svint16_t {
+    svlsl_wide_s16_z(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Logical shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsl_wide[_s32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsl))]
+pub fn svlsl_wide_s32_m(pg: svbool_t, op1: svint32_t, op2: svuint64_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.lsl.wide.nxv4i32"
+        )]
+        fn _svlsl_wide_s32_m(pg: svbool4_t, op1: svint32_t, op2: svint64_t) -> svint32_t;
+    }
+    unsafe { _svlsl_wide_s32_m(pg.sve_into(), op1, op2.as_signed()) }
+}
+#[doc = "Logical shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsl_wide[_n_s32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsl))]
+pub fn svlsl_wide_n_s32_m(pg: svbool_t, op1: svint32_t, op2: u64) -> svint32_t {
+    svlsl_wide_s32_m(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Logical shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsl_wide[_s32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsl))]
+pub fn svlsl_wide_s32_x(pg: svbool_t, op1: svint32_t, op2: svuint64_t) -> svint32_t {
+    svlsl_wide_s32_m(pg, op1, op2)
+}
+#[doc = "Logical shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsl_wide[_n_s32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsl))]
+pub fn svlsl_wide_n_s32_x(pg: svbool_t, op1: svint32_t, op2: u64) -> svint32_t {
+    svlsl_wide_s32_x(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Logical shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsl_wide[_s32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsl))]
+pub fn svlsl_wide_s32_z(pg: svbool_t, op1: svint32_t, op2: svuint64_t) -> svint32_t {
+    svlsl_wide_s32_m(pg, svsel_s32(pg, op1, svdup_n_s32(0)), op2)
+}
+#[doc = "Logical shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsl_wide[_n_s32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsl))]
+pub fn svlsl_wide_n_s32_z(pg: svbool_t, op1: svint32_t, op2: u64) -> svint32_t {
+    svlsl_wide_s32_z(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Logical shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsl_wide[_u8]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsl))]
+pub fn svlsl_wide_u8_m(pg: svbool_t, op1: svuint8_t, op2: svuint64_t) -> svuint8_t {
+    unsafe { svlsl_wide_s8_m(pg, op1.as_signed(), op2).as_unsigned() }
+}
+#[doc = "Logical shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsl_wide[_n_u8]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsl))]
+pub fn svlsl_wide_n_u8_m(pg: svbool_t, op1: svuint8_t, op2: u64) -> svuint8_t {
+    svlsl_wide_u8_m(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Logical shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsl_wide[_u8]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsl))]
+pub fn svlsl_wide_u8_x(pg: svbool_t, op1: svuint8_t, op2: svuint64_t) -> svuint8_t {
+    svlsl_wide_u8_m(pg, op1, op2)
+}
+#[doc = "Logical shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsl_wide[_n_u8]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsl))]
+pub fn svlsl_wide_n_u8_x(pg: svbool_t, op1: svuint8_t, op2: u64) -> svuint8_t {
+    svlsl_wide_u8_x(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Logical shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsl_wide[_u8]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsl))]
+pub fn svlsl_wide_u8_z(pg: svbool_t, op1: svuint8_t, op2: svuint64_t) -> svuint8_t {
+    svlsl_wide_u8_m(pg, svsel_u8(pg, op1, svdup_n_u8(0)), op2)
+}
+#[doc = "Logical shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsl_wide[_n_u8]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsl))]
+pub fn svlsl_wide_n_u8_z(pg: svbool_t, op1: svuint8_t, op2: u64) -> svuint8_t {
+    svlsl_wide_u8_z(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Logical shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsl_wide[_u16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsl))]
+pub fn svlsl_wide_u16_m(pg: svbool_t, op1: svuint16_t, op2: svuint64_t) -> svuint16_t {
+    unsafe { svlsl_wide_s16_m(pg, op1.as_signed(), op2).as_unsigned() }
+}
+#[doc = "Logical shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsl_wide[_n_u16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsl))]
+pub fn svlsl_wide_n_u16_m(pg: svbool_t, op1: svuint16_t, op2: u64) -> svuint16_t {
+    svlsl_wide_u16_m(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Logical shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsl_wide[_u16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsl))]
+pub fn svlsl_wide_u16_x(pg: svbool_t, op1: svuint16_t, op2: svuint64_t) -> svuint16_t {
+    svlsl_wide_u16_m(pg, op1, op2)
+}
+#[doc = "Logical shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsl_wide[_n_u16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsl))]
+pub fn svlsl_wide_n_u16_x(pg: svbool_t, op1: svuint16_t, op2: u64) -> svuint16_t {
+    svlsl_wide_u16_x(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Logical shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsl_wide[_u16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsl))]
+pub fn svlsl_wide_u16_z(pg: svbool_t, op1: svuint16_t, op2: svuint64_t) -> svuint16_t {
+    svlsl_wide_u16_m(pg, svsel_u16(pg, op1, svdup_n_u16(0)), op2)
+}
+#[doc = "Logical shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsl_wide[_n_u16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsl))]
+pub fn svlsl_wide_n_u16_z(pg: svbool_t, op1: svuint16_t, op2: u64) -> svuint16_t {
+    svlsl_wide_u16_z(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Logical shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsl_wide[_u32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsl))]
+pub fn svlsl_wide_u32_m(pg: svbool_t, op1: svuint32_t, op2: svuint64_t) -> svuint32_t {
+    unsafe { svlsl_wide_s32_m(pg, op1.as_signed(), op2).as_unsigned() }
+}
+#[doc = "Logical shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsl_wide[_n_u32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsl))]
+pub fn svlsl_wide_n_u32_m(pg: svbool_t, op1: svuint32_t, op2: u64) -> svuint32_t {
+    svlsl_wide_u32_m(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Logical shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsl_wide[_u32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsl))]
+pub fn svlsl_wide_u32_x(pg: svbool_t, op1: svuint32_t, op2: svuint64_t) -> svuint32_t {
+    svlsl_wide_u32_m(pg, op1, op2)
+}
+#[doc = "Logical shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsl_wide[_n_u32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsl))]
+pub fn svlsl_wide_n_u32_x(pg: svbool_t, op1: svuint32_t, op2: u64) -> svuint32_t {
+    svlsl_wide_u32_x(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Logical shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsl_wide[_u32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsl))]
+pub fn svlsl_wide_u32_z(pg: svbool_t, op1: svuint32_t, op2: svuint64_t) -> svuint32_t {
+    svlsl_wide_u32_m(pg, svsel_u32(pg, op1, svdup_n_u32(0)), op2)
+}
+#[doc = "Logical shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsl_wide[_n_u32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsl))]
+pub fn svlsl_wide_n_u32_z(pg: svbool_t, op1: svuint32_t, op2: u64) -> svuint32_t {
+    svlsl_wide_u32_z(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Logical shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsr[_u8]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsr))]
+pub fn svlsr_u8_m(pg: svbool_t, op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.lsr.nxv16i8")]
+        fn _svlsr_u8_m(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t;
+    }
+    unsafe { _svlsr_u8_m(pg, op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Logical shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsr[_n_u8]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsr))]
+pub fn svlsr_n_u8_m(pg: svbool_t, op1: svuint8_t, op2: u8) -> svuint8_t {
+    svlsr_u8_m(pg, op1, svdup_n_u8(op2))
+}
+#[doc = "Logical shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsr[_u8]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsr))]
+pub fn svlsr_u8_x(pg: svbool_t, op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    svlsr_u8_m(pg, op1, op2)
+}
+#[doc = "Logical shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsr[_n_u8]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsr))]
+pub fn svlsr_n_u8_x(pg: svbool_t, op1: svuint8_t, op2: u8) -> svuint8_t {
+    svlsr_u8_x(pg, op1, svdup_n_u8(op2))
+}
+#[doc = "Logical shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsr[_u8]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsr))]
+pub fn svlsr_u8_z(pg: svbool_t, op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    svlsr_u8_m(pg, svsel_u8(pg, op1, svdup_n_u8(0)), op2)
+}
+#[doc = "Logical shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsr[_n_u8]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsr))]
+pub fn svlsr_n_u8_z(pg: svbool_t, op1: svuint8_t, op2: u8) -> svuint8_t {
+    svlsr_u8_z(pg, op1, svdup_n_u8(op2))
+}
+#[doc = "Logical shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsr[_u16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsr))]
+pub fn svlsr_u16_m(pg: svbool_t, op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.lsr.nxv8i16")]
+        fn _svlsr_u16_m(pg: svbool8_t, op1: svint16_t, op2: svint16_t) -> svint16_t;
+    }
+    unsafe { _svlsr_u16_m(pg.sve_into(), op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Logical shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsr[_n_u16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsr))]
+pub fn svlsr_n_u16_m(pg: svbool_t, op1: svuint16_t, op2: u16) -> svuint16_t {
+    svlsr_u16_m(pg, op1, svdup_n_u16(op2))
+}
+#[doc = "Logical shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsr[_u16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsr))]
+pub fn svlsr_u16_x(pg: svbool_t, op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    svlsr_u16_m(pg, op1, op2)
+}
+#[doc = "Logical shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsr[_n_u16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsr))]
+pub fn svlsr_n_u16_x(pg: svbool_t, op1: svuint16_t, op2: u16) -> svuint16_t {
+    svlsr_u16_x(pg, op1, svdup_n_u16(op2))
+}
+#[doc = "Logical shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsr[_u16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsr))]
+pub fn svlsr_u16_z(pg: svbool_t, op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    svlsr_u16_m(pg, svsel_u16(pg, op1, svdup_n_u16(0)), op2)
+}
+#[doc = "Logical shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsr[_n_u16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsr))]
+pub fn svlsr_n_u16_z(pg: svbool_t, op1: svuint16_t, op2: u16) -> svuint16_t {
+    svlsr_u16_z(pg, op1, svdup_n_u16(op2))
+}
+#[doc = "Logical shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsr[_u32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsr))]
+pub fn svlsr_u32_m(pg: svbool_t, op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.lsr.nxv4i32")]
+        fn _svlsr_u32_m(pg: svbool4_t, op1: svint32_t, op2: svint32_t) -> svint32_t;
+    }
+    unsafe { _svlsr_u32_m(pg.sve_into(), op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Logical shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsr[_n_u32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsr))]
+pub fn svlsr_n_u32_m(pg: svbool_t, op1: svuint32_t, op2: u32) -> svuint32_t {
+    svlsr_u32_m(pg, op1, svdup_n_u32(op2))
+}
+#[doc = "Logical shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsr[_u32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsr))]
+pub fn svlsr_u32_x(pg: svbool_t, op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    svlsr_u32_m(pg, op1, op2)
+}
+#[doc = "Logical shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsr[_n_u32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsr))]
+pub fn svlsr_n_u32_x(pg: svbool_t, op1: svuint32_t, op2: u32) -> svuint32_t {
+    svlsr_u32_x(pg, op1, svdup_n_u32(op2))
+}
+#[doc = "Logical shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsr[_u32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsr))]
+pub fn svlsr_u32_z(pg: svbool_t, op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    svlsr_u32_m(pg, svsel_u32(pg, op1, svdup_n_u32(0)), op2)
+}
+#[doc = "Logical shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsr[_n_u32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsr))]
+pub fn svlsr_n_u32_z(pg: svbool_t, op1: svuint32_t, op2: u32) -> svuint32_t {
+    svlsr_u32_z(pg, op1, svdup_n_u32(op2))
+}
+#[doc = "Logical shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsr[_u64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsr))]
+pub fn svlsr_u64_m(pg: svbool_t, op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.lsr.nxv2i64")]
+        fn _svlsr_u64_m(pg: svbool2_t, op1: svint64_t, op2: svint64_t) -> svint64_t;
+    }
+    unsafe { _svlsr_u64_m(pg.sve_into(), op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Logical shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsr[_n_u64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsr))]
+pub fn svlsr_n_u64_m(pg: svbool_t, op1: svuint64_t, op2: u64) -> svuint64_t {
+    svlsr_u64_m(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Logical shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsr[_u64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsr))]
+pub fn svlsr_u64_x(pg: svbool_t, op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    svlsr_u64_m(pg, op1, op2)
+}
+#[doc = "Logical shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsr[_n_u64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsr))]
+pub fn svlsr_n_u64_x(pg: svbool_t, op1: svuint64_t, op2: u64) -> svuint64_t {
+    svlsr_u64_x(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Logical shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsr[_u64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsr))]
+pub fn svlsr_u64_z(pg: svbool_t, op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    svlsr_u64_m(pg, svsel_u64(pg, op1, svdup_n_u64(0)), op2)
+}
+#[doc = "Logical shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsr[_n_u64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsr))]
+pub fn svlsr_n_u64_z(pg: svbool_t, op1: svuint64_t, op2: u64) -> svuint64_t {
+    svlsr_u64_z(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Logical shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsr_wide[_u8]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsr))]
+pub fn svlsr_wide_u8_m(pg: svbool_t, op1: svuint8_t, op2: svuint64_t) -> svuint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.lsr.wide.nxv16i8"
+        )]
+        fn _svlsr_wide_u8_m(pg: svbool_t, op1: svint8_t, op2: svint64_t) -> svint8_t;
+    }
+    unsafe { _svlsr_wide_u8_m(pg, op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Logical shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsr_wide[_n_u8]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsr))]
+pub fn svlsr_wide_n_u8_m(pg: svbool_t, op1: svuint8_t, op2: u64) -> svuint8_t {
+    svlsr_wide_u8_m(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Logical shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsr_wide[_u8]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsr))]
+pub fn svlsr_wide_u8_x(pg: svbool_t, op1: svuint8_t, op2: svuint64_t) -> svuint8_t {
+    svlsr_wide_u8_m(pg, op1, op2)
+}
+#[doc = "Logical shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsr_wide[_n_u8]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsr))]
+pub fn svlsr_wide_n_u8_x(pg: svbool_t, op1: svuint8_t, op2: u64) -> svuint8_t {
+    svlsr_wide_u8_x(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Logical shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsr_wide[_u8]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsr))]
+pub fn svlsr_wide_u8_z(pg: svbool_t, op1: svuint8_t, op2: svuint64_t) -> svuint8_t {
+    svlsr_wide_u8_m(pg, svsel_u8(pg, op1, svdup_n_u8(0)), op2)
+}
+#[doc = "Logical shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsr_wide[_n_u8]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsr))]
+pub fn svlsr_wide_n_u8_z(pg: svbool_t, op1: svuint8_t, op2: u64) -> svuint8_t {
+    svlsr_wide_u8_z(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Logical shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsr_wide[_u16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsr))]
+pub fn svlsr_wide_u16_m(pg: svbool_t, op1: svuint16_t, op2: svuint64_t) -> svuint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.lsr.wide.nxv8i16"
+        )]
+        fn _svlsr_wide_u16_m(pg: svbool8_t, op1: svint16_t, op2: svint64_t) -> svint16_t;
+    }
+    unsafe { _svlsr_wide_u16_m(pg.sve_into(), op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Logical shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsr_wide[_n_u16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsr))]
+pub fn svlsr_wide_n_u16_m(pg: svbool_t, op1: svuint16_t, op2: u64) -> svuint16_t {
+    svlsr_wide_u16_m(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Logical shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsr_wide[_u16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsr))]
+pub fn svlsr_wide_u16_x(pg: svbool_t, op1: svuint16_t, op2: svuint64_t) -> svuint16_t {
+    svlsr_wide_u16_m(pg, op1, op2)
+}
+#[doc = "Logical shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsr_wide[_n_u16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsr))]
+pub fn svlsr_wide_n_u16_x(pg: svbool_t, op1: svuint16_t, op2: u64) -> svuint16_t {
+    svlsr_wide_u16_x(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Logical shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsr_wide[_u16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsr))]
+pub fn svlsr_wide_u16_z(pg: svbool_t, op1: svuint16_t, op2: svuint64_t) -> svuint16_t {
+    svlsr_wide_u16_m(pg, svsel_u16(pg, op1, svdup_n_u16(0)), op2)
+}
+#[doc = "Logical shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsr_wide[_n_u16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsr))]
+pub fn svlsr_wide_n_u16_z(pg: svbool_t, op1: svuint16_t, op2: u64) -> svuint16_t {
+    svlsr_wide_u16_z(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Logical shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsr_wide[_u32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsr))]
+pub fn svlsr_wide_u32_m(pg: svbool_t, op1: svuint32_t, op2: svuint64_t) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.lsr.wide.nxv4i32"
+        )]
+        fn _svlsr_wide_u32_m(pg: svbool4_t, op1: svint32_t, op2: svint64_t) -> svint32_t;
+    }
+    unsafe { _svlsr_wide_u32_m(pg.sve_into(), op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Logical shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsr_wide[_n_u32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsr))]
+pub fn svlsr_wide_n_u32_m(pg: svbool_t, op1: svuint32_t, op2: u64) -> svuint32_t {
+    svlsr_wide_u32_m(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Logical shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsr_wide[_u32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsr))]
+pub fn svlsr_wide_u32_x(pg: svbool_t, op1: svuint32_t, op2: svuint64_t) -> svuint32_t {
+    svlsr_wide_u32_m(pg, op1, op2)
+}
+#[doc = "Logical shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsr_wide[_n_u32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsr))]
+pub fn svlsr_wide_n_u32_x(pg: svbool_t, op1: svuint32_t, op2: u64) -> svuint32_t {
+    svlsr_wide_u32_x(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Logical shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsr_wide[_u32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsr))]
+pub fn svlsr_wide_u32_z(pg: svbool_t, op1: svuint32_t, op2: svuint64_t) -> svuint32_t {
+    svlsr_wide_u32_m(pg, svsel_u32(pg, op1, svdup_n_u32(0)), op2)
+}
+#[doc = "Logical shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlsr_wide[_n_u32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(lsr))]
+pub fn svlsr_wide_n_u32_z(pg: svbool_t, op1: svuint32_t, op2: u64) -> svuint32_t {
+    svlsr_wide_u32_z(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Multiply-add, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmad[_f32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmad))]
+pub fn svmad_f32_m(
+    pg: svbool_t,
+    op1: svfloat32_t,
+    op2: svfloat32_t,
+    op3: svfloat32_t,
+) -> svfloat32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.fmad.nxv4f32")]
+        fn _svmad_f32_m(
+            pg: svbool4_t,
+            op1: svfloat32_t,
+            op2: svfloat32_t,
+            op3: svfloat32_t,
+        ) -> svfloat32_t;
+    }
+    unsafe { _svmad_f32_m(pg.sve_into(), op1, op2, op3) }
+}
+#[doc = "Multiply-add, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmad[_n_f32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmad))]
+pub fn svmad_n_f32_m(pg: svbool_t, op1: svfloat32_t, op2: svfloat32_t, op3: f32) -> svfloat32_t {
+    svmad_f32_m(pg, op1, op2, svdup_n_f32(op3))
+}
+#[doc = "Multiply-add, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmad[_f32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmad))]
+pub fn svmad_f32_x(
+    pg: svbool_t,
+    op1: svfloat32_t,
+    op2: svfloat32_t,
+    op3: svfloat32_t,
+) -> svfloat32_t {
+    svmad_f32_m(pg, op1, op2, op3)
+}
+#[doc = "Multiply-add, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmad[_n_f32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmad))]
+pub fn svmad_n_f32_x(pg: svbool_t, op1: svfloat32_t, op2: svfloat32_t, op3: f32) -> svfloat32_t {
+    svmad_f32_x(pg, op1, op2, svdup_n_f32(op3))
+}
+#[doc = "Multiply-add, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmad[_f32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmad))]
+pub fn svmad_f32_z(
+    pg: svbool_t,
+    op1: svfloat32_t,
+    op2: svfloat32_t,
+    op3: svfloat32_t,
+) -> svfloat32_t {
+    svmad_f32_m(pg, svsel_f32(pg, op1, svdup_n_f32(0.0)), op2, op3)
+}
+#[doc = "Multiply-add, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmad[_n_f32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmad))]
+pub fn svmad_n_f32_z(pg: svbool_t, op1: svfloat32_t, op2: svfloat32_t, op3: f32) -> svfloat32_t {
+    svmad_f32_z(pg, op1, op2, svdup_n_f32(op3))
+}
+#[doc = "Multiply-add, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmad[_f64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmad))]
+pub fn svmad_f64_m(
+    pg: svbool_t,
+    op1: svfloat64_t,
+    op2: svfloat64_t,
+    op3: svfloat64_t,
+) -> svfloat64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.fmad.nxv2f64")]
+        fn _svmad_f64_m(
+            pg: svbool2_t,
+            op1: svfloat64_t,
+            op2: svfloat64_t,
+            op3: svfloat64_t,
+        ) -> svfloat64_t;
+    }
+    unsafe { _svmad_f64_m(pg.sve_into(), op1, op2, op3) }
+}
+#[doc = "Multiply-add, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmad[_n_f64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmad))]
+pub fn svmad_n_f64_m(pg: svbool_t, op1: svfloat64_t, op2: svfloat64_t, op3: f64) -> svfloat64_t {
+    svmad_f64_m(pg, op1, op2, svdup_n_f64(op3))
+}
+#[doc = "Multiply-add, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmad[_f64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmad))]
+pub fn svmad_f64_x(
+    pg: svbool_t,
+    op1: svfloat64_t,
+    op2: svfloat64_t,
+    op3: svfloat64_t,
+) -> svfloat64_t {
+    svmad_f64_m(pg, op1, op2, op3)
+}
+#[doc = "Multiply-add, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmad[_n_f64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmad))]
+pub fn svmad_n_f64_x(pg: svbool_t, op1: svfloat64_t, op2: svfloat64_t, op3: f64) -> svfloat64_t {
+    svmad_f64_x(pg, op1, op2, svdup_n_f64(op3))
+}
+#[doc = "Multiply-add, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmad[_f64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmad))]
+pub fn svmad_f64_z(
+    pg: svbool_t,
+    op1: svfloat64_t,
+    op2: svfloat64_t,
+    op3: svfloat64_t,
+) -> svfloat64_t {
+    svmad_f64_m(pg, svsel_f64(pg, op1, svdup_n_f64(0.0)), op2, op3)
+}
+#[doc = "Multiply-add, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmad[_n_f64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmad))]
+pub fn svmad_n_f64_z(pg: svbool_t, op1: svfloat64_t, op2: svfloat64_t, op3: f64) -> svfloat64_t {
+    svmad_f64_z(pg, op1, op2, svdup_n_f64(op3))
+}
+#[doc = "Multiply-add, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmad[_s8]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mad))]
+pub fn svmad_s8_m(pg: svbool_t, op1: svint8_t, op2: svint8_t, op3: svint8_t) -> svint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.mad.nxv16i8")]
+        fn _svmad_s8_m(pg: svbool_t, op1: svint8_t, op2: svint8_t, op3: svint8_t) -> svint8_t;
+    }
+    unsafe { _svmad_s8_m(pg, op1, op2, op3) }
+}
+#[doc = "Multiply-add, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmad[_n_s8]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mad))]
+pub fn svmad_n_s8_m(pg: svbool_t, op1: svint8_t, op2: svint8_t, op3: i8) -> svint8_t {
+    svmad_s8_m(pg, op1, op2, svdup_n_s8(op3))
+}
+#[doc = "Multiply-add, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmad[_s8]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mad))]
+pub fn svmad_s8_x(pg: svbool_t, op1: svint8_t, op2: svint8_t, op3: svint8_t) -> svint8_t {
+    svmad_s8_m(pg, op1, op2, op3)
+}
+#[doc = "Multiply-add, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmad[_n_s8]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mad))]
+pub fn svmad_n_s8_x(pg: svbool_t, op1: svint8_t, op2: svint8_t, op3: i8) -> svint8_t {
+    svmad_s8_x(pg, op1, op2, svdup_n_s8(op3))
+}
+#[doc = "Multiply-add, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmad[_s8]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mad))]
+pub fn svmad_s8_z(pg: svbool_t, op1: svint8_t, op2: svint8_t, op3: svint8_t) -> svint8_t {
+    svmad_s8_m(pg, svsel_s8(pg, op1, svdup_n_s8(0)), op2, op3)
+}
+#[doc = "Multiply-add, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmad[_n_s8]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mad))]
+pub fn svmad_n_s8_z(pg: svbool_t, op1: svint8_t, op2: svint8_t, op3: i8) -> svint8_t {
+    svmad_s8_z(pg, op1, op2, svdup_n_s8(op3))
+}
+#[doc = "Multiply-add, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmad[_s16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mad))]
+pub fn svmad_s16_m(pg: svbool_t, op1: svint16_t, op2: svint16_t, op3: svint16_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.mad.nxv8i16")]
+        fn _svmad_s16_m(pg: svbool8_t, op1: svint16_t, op2: svint16_t, op3: svint16_t)
+            -> svint16_t;
+    }
+    unsafe { _svmad_s16_m(pg.sve_into(), op1, op2, op3) }
+}
+#[doc = "Multiply-add, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmad[_n_s16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mad))]
+pub fn svmad_n_s16_m(pg: svbool_t, op1: svint16_t, op2: svint16_t, op3: i16) -> svint16_t {
+    svmad_s16_m(pg, op1, op2, svdup_n_s16(op3))
+}
+#[doc = "Multiply-add, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmad[_s16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mad))]
+pub fn svmad_s16_x(pg: svbool_t, op1: svint16_t, op2: svint16_t, op3: svint16_t) -> svint16_t {
+    svmad_s16_m(pg, op1, op2, op3)
+}
+#[doc = "Multiply-add, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmad[_n_s16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mad))]
+pub fn svmad_n_s16_x(pg: svbool_t, op1: svint16_t, op2: svint16_t, op3: i16) -> svint16_t {
+    svmad_s16_x(pg, op1, op2, svdup_n_s16(op3))
+}
+#[doc = "Multiply-add, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmad[_s16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mad))]
+pub fn svmad_s16_z(pg: svbool_t, op1: svint16_t, op2: svint16_t, op3: svint16_t) -> svint16_t {
+    svmad_s16_m(pg, svsel_s16(pg, op1, svdup_n_s16(0)), op2, op3)
+}
+#[doc = "Multiply-add, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmad[_n_s16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mad))]
+pub fn svmad_n_s16_z(pg: svbool_t, op1: svint16_t, op2: svint16_t, op3: i16) -> svint16_t {
+    svmad_s16_z(pg, op1, op2, svdup_n_s16(op3))
+}
+#[doc = "Multiply-add, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmad[_s32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mad))]
+pub fn svmad_s32_m(pg: svbool_t, op1: svint32_t, op2: svint32_t, op3: svint32_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.mad.nxv4i32")]
+        fn _svmad_s32_m(pg: svbool4_t, op1: svint32_t, op2: svint32_t, op3: svint32_t)
+            -> svint32_t;
+    }
+    unsafe { _svmad_s32_m(pg.sve_into(), op1, op2, op3) }
+}
+#[doc = "Multiply-add, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmad[_n_s32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mad))]
+pub fn svmad_n_s32_m(pg: svbool_t, op1: svint32_t, op2: svint32_t, op3: i32) -> svint32_t {
+    svmad_s32_m(pg, op1, op2, svdup_n_s32(op3))
+}
+#[doc = "Multiply-add, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmad[_s32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mad))]
+pub fn svmad_s32_x(pg: svbool_t, op1: svint32_t, op2: svint32_t, op3: svint32_t) -> svint32_t {
+    svmad_s32_m(pg, op1, op2, op3)
+}
+#[doc = "Multiply-add, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmad[_n_s32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mad))]
+pub fn svmad_n_s32_x(pg: svbool_t, op1: svint32_t, op2: svint32_t, op3: i32) -> svint32_t {
+    svmad_s32_x(pg, op1, op2, svdup_n_s32(op3))
+}
+#[doc = "Multiply-add, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmad[_s32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mad))]
+pub fn svmad_s32_z(pg: svbool_t, op1: svint32_t, op2: svint32_t, op3: svint32_t) -> svint32_t {
+    svmad_s32_m(pg, svsel_s32(pg, op1, svdup_n_s32(0)), op2, op3)
+}
+#[doc = "Multiply-add, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmad[_n_s32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mad))]
+pub fn svmad_n_s32_z(pg: svbool_t, op1: svint32_t, op2: svint32_t, op3: i32) -> svint32_t {
+    svmad_s32_z(pg, op1, op2, svdup_n_s32(op3))
+}
+#[doc = "Multiply-add, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmad[_s64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mad))]
+pub fn svmad_s64_m(pg: svbool_t, op1: svint64_t, op2: svint64_t, op3: svint64_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.mad.nxv2i64")]
+        fn _svmad_s64_m(pg: svbool2_t, op1: svint64_t, op2: svint64_t, op3: svint64_t)
+            -> svint64_t;
+    }
+    unsafe { _svmad_s64_m(pg.sve_into(), op1, op2, op3) }
+}
+#[doc = "Multiply-add, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmad[_n_s64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mad))]
+pub fn svmad_n_s64_m(pg: svbool_t, op1: svint64_t, op2: svint64_t, op3: i64) -> svint64_t {
+    svmad_s64_m(pg, op1, op2, svdup_n_s64(op3))
+}
+#[doc = "Multiply-add, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmad[_s64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mad))]
+pub fn svmad_s64_x(pg: svbool_t, op1: svint64_t, op2: svint64_t, op3: svint64_t) -> svint64_t {
+    svmad_s64_m(pg, op1, op2, op3)
+}
+#[doc = "Multiply-add, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmad[_n_s64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mad))]
+pub fn svmad_n_s64_x(pg: svbool_t, op1: svint64_t, op2: svint64_t, op3: i64) -> svint64_t {
+    svmad_s64_x(pg, op1, op2, svdup_n_s64(op3))
+}
+#[doc = "Multiply-add, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmad[_s64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mad))]
+pub fn svmad_s64_z(pg: svbool_t, op1: svint64_t, op2: svint64_t, op3: svint64_t) -> svint64_t {
+    svmad_s64_m(pg, svsel_s64(pg, op1, svdup_n_s64(0)), op2, op3)
+}
+#[doc = "Multiply-add, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmad[_n_s64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mad))]
+pub fn svmad_n_s64_z(pg: svbool_t, op1: svint64_t, op2: svint64_t, op3: i64) -> svint64_t {
+    svmad_s64_z(pg, op1, op2, svdup_n_s64(op3))
+}
+#[doc = "Multiply-add, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmad[_u8]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mad))]
+pub fn svmad_u8_m(pg: svbool_t, op1: svuint8_t, op2: svuint8_t, op3: svuint8_t) -> svuint8_t {
+    unsafe { svmad_s8_m(pg, op1.as_signed(), op2.as_signed(), op3.as_signed()).as_unsigned() }
+}
+#[doc = "Multiply-add, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmad[_n_u8]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mad))]
+pub fn svmad_n_u8_m(pg: svbool_t, op1: svuint8_t, op2: svuint8_t, op3: u8) -> svuint8_t {
+    svmad_u8_m(pg, op1, op2, svdup_n_u8(op3))
+}
+#[doc = "Multiply-add, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmad[_u8]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mad))]
+pub fn svmad_u8_x(pg: svbool_t, op1: svuint8_t, op2: svuint8_t, op3: svuint8_t) -> svuint8_t {
+    svmad_u8_m(pg, op1, op2, op3)
+}
+#[doc = "Multiply-add, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmad[_n_u8]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mad))]
+pub fn svmad_n_u8_x(pg: svbool_t, op1: svuint8_t, op2: svuint8_t, op3: u8) -> svuint8_t {
+    svmad_u8_x(pg, op1, op2, svdup_n_u8(op3))
+}
+#[doc = "Multiply-add, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmad[_u8]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mad))]
+pub fn svmad_u8_z(pg: svbool_t, op1: svuint8_t, op2: svuint8_t, op3: svuint8_t) -> svuint8_t {
+    svmad_u8_m(pg, svsel_u8(pg, op1, svdup_n_u8(0)), op2, op3)
+}
+#[doc = "Multiply-add, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmad[_n_u8]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mad))]
+pub fn svmad_n_u8_z(pg: svbool_t, op1: svuint8_t, op2: svuint8_t, op3: u8) -> svuint8_t {
+    svmad_u8_z(pg, op1, op2, svdup_n_u8(op3))
+}
+#[doc = "Multiply-add, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmad[_u16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mad))]
+pub fn svmad_u16_m(pg: svbool_t, op1: svuint16_t, op2: svuint16_t, op3: svuint16_t) -> svuint16_t {
+    unsafe { svmad_s16_m(pg, op1.as_signed(), op2.as_signed(), op3.as_signed()).as_unsigned() }
+}
+#[doc = "Multiply-add, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmad[_n_u16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mad))]
+pub fn svmad_n_u16_m(pg: svbool_t, op1: svuint16_t, op2: svuint16_t, op3: u16) -> svuint16_t {
+    svmad_u16_m(pg, op1, op2, svdup_n_u16(op3))
+}
+#[doc = "Multiply-add, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmad[_u16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mad))]
+pub fn svmad_u16_x(pg: svbool_t, op1: svuint16_t, op2: svuint16_t, op3: svuint16_t) -> svuint16_t {
+    svmad_u16_m(pg, op1, op2, op3)
+}
+#[doc = "Multiply-add, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmad[_n_u16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mad))]
+pub fn svmad_n_u16_x(pg: svbool_t, op1: svuint16_t, op2: svuint16_t, op3: u16) -> svuint16_t {
+    svmad_u16_x(pg, op1, op2, svdup_n_u16(op3))
+}
+#[doc = "Multiply-add, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmad[_u16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mad))]
+pub fn svmad_u16_z(pg: svbool_t, op1: svuint16_t, op2: svuint16_t, op3: svuint16_t) -> svuint16_t {
+    svmad_u16_m(pg, svsel_u16(pg, op1, svdup_n_u16(0)), op2, op3)
+}
+#[doc = "Multiply-add, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmad[_n_u16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mad))]
+pub fn svmad_n_u16_z(pg: svbool_t, op1: svuint16_t, op2: svuint16_t, op3: u16) -> svuint16_t {
+    svmad_u16_z(pg, op1, op2, svdup_n_u16(op3))
+}
+#[doc = "Multiply-add, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmad[_u32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mad))]
+pub fn svmad_u32_m(pg: svbool_t, op1: svuint32_t, op2: svuint32_t, op3: svuint32_t) -> svuint32_t {
+    unsafe { svmad_s32_m(pg, op1.as_signed(), op2.as_signed(), op3.as_signed()).as_unsigned() }
+}
+#[doc = "Multiply-add, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmad[_n_u32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mad))]
+pub fn svmad_n_u32_m(pg: svbool_t, op1: svuint32_t, op2: svuint32_t, op3: u32) -> svuint32_t {
+    svmad_u32_m(pg, op1, op2, svdup_n_u32(op3))
+}
+#[doc = "Multiply-add, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmad[_u32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mad))]
+pub fn svmad_u32_x(pg: svbool_t, op1: svuint32_t, op2: svuint32_t, op3: svuint32_t) -> svuint32_t {
+    svmad_u32_m(pg, op1, op2, op3)
+}
+#[doc = "Multiply-add, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmad[_n_u32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mad))]
+pub fn svmad_n_u32_x(pg: svbool_t, op1: svuint32_t, op2: svuint32_t, op3: u32) -> svuint32_t {
+    svmad_u32_x(pg, op1, op2, svdup_n_u32(op3))
+}
+#[doc = "Multiply-add, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmad[_u32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mad))]
+pub fn svmad_u32_z(pg: svbool_t, op1: svuint32_t, op2: svuint32_t, op3: svuint32_t) -> svuint32_t {
+    svmad_u32_m(pg, svsel_u32(pg, op1, svdup_n_u32(0)), op2, op3)
+}
+#[doc = "Multiply-add, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmad[_n_u32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mad))]
+pub fn svmad_n_u32_z(pg: svbool_t, op1: svuint32_t, op2: svuint32_t, op3: u32) -> svuint32_t {
+    svmad_u32_z(pg, op1, op2, svdup_n_u32(op3))
+}
+#[doc = "Multiply-add, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmad[_u64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mad))]
+pub fn svmad_u64_m(pg: svbool_t, op1: svuint64_t, op2: svuint64_t, op3: svuint64_t) -> svuint64_t {
+    unsafe { svmad_s64_m(pg, op1.as_signed(), op2.as_signed(), op3.as_signed()).as_unsigned() }
+}
+#[doc = "Multiply-add, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmad[_n_u64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mad))]
+pub fn svmad_n_u64_m(pg: svbool_t, op1: svuint64_t, op2: svuint64_t, op3: u64) -> svuint64_t {
+    svmad_u64_m(pg, op1, op2, svdup_n_u64(op3))
+}
+#[doc = "Multiply-add, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmad[_u64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mad))]
+pub fn svmad_u64_x(pg: svbool_t, op1: svuint64_t, op2: svuint64_t, op3: svuint64_t) -> svuint64_t {
+    svmad_u64_m(pg, op1, op2, op3)
+}
+#[doc = "Multiply-add, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmad[_n_u64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mad))]
+pub fn svmad_n_u64_x(pg: svbool_t, op1: svuint64_t, op2: svuint64_t, op3: u64) -> svuint64_t {
+    svmad_u64_x(pg, op1, op2, svdup_n_u64(op3))
+}
+#[doc = "Multiply-add, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmad[_u64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mad))]
+pub fn svmad_u64_z(pg: svbool_t, op1: svuint64_t, op2: svuint64_t, op3: svuint64_t) -> svuint64_t {
+    svmad_u64_m(pg, svsel_u64(pg, op1, svdup_n_u64(0)), op2, op3)
+}
+#[doc = "Multiply-add, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmad[_n_u64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mad))]
+pub fn svmad_n_u64_z(pg: svbool_t, op1: svuint64_t, op2: svuint64_t, op3: u64) -> svuint64_t {
+    svmad_u64_z(pg, op1, op2, svdup_n_u64(op3))
+}
+#[doc = "Maximum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmax[_f32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmax))]
+pub fn svmax_f32_m(pg: svbool_t, op1: svfloat32_t, op2: svfloat32_t) -> svfloat32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.fmax.nxv4f32")]
+        fn _svmax_f32_m(pg: svbool4_t, op1: svfloat32_t, op2: svfloat32_t) -> svfloat32_t;
+    }
+    unsafe { _svmax_f32_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Maximum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmax[_n_f32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmax))]
+pub fn svmax_n_f32_m(pg: svbool_t, op1: svfloat32_t, op2: f32) -> svfloat32_t {
+    svmax_f32_m(pg, op1, svdup_n_f32(op2))
+}
+#[doc = "Maximum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmax[_f32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmax))]
+pub fn svmax_f32_x(pg: svbool_t, op1: svfloat32_t, op2: svfloat32_t) -> svfloat32_t {
+    svmax_f32_m(pg, op1, op2)
+}
+#[doc = "Maximum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmax[_n_f32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmax))]
+pub fn svmax_n_f32_x(pg: svbool_t, op1: svfloat32_t, op2: f32) -> svfloat32_t {
+    svmax_f32_x(pg, op1, svdup_n_f32(op2))
+}
+#[doc = "Maximum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmax[_f32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmax))]
+pub fn svmax_f32_z(pg: svbool_t, op1: svfloat32_t, op2: svfloat32_t) -> svfloat32_t {
+    svmax_f32_m(pg, svsel_f32(pg, op1, svdup_n_f32(0.0)), op2)
+}
+#[doc = "Maximum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmax[_n_f32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmax))]
+pub fn svmax_n_f32_z(pg: svbool_t, op1: svfloat32_t, op2: f32) -> svfloat32_t {
+    svmax_f32_z(pg, op1, svdup_n_f32(op2))
+}
+#[doc = "Maximum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmax[_f64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmax))]
+pub fn svmax_f64_m(pg: svbool_t, op1: svfloat64_t, op2: svfloat64_t) -> svfloat64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.fmax.nxv2f64")]
+        fn _svmax_f64_m(pg: svbool2_t, op1: svfloat64_t, op2: svfloat64_t) -> svfloat64_t;
+    }
+    unsafe { _svmax_f64_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Maximum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmax[_n_f64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmax))]
+pub fn svmax_n_f64_m(pg: svbool_t, op1: svfloat64_t, op2: f64) -> svfloat64_t {
+    svmax_f64_m(pg, op1, svdup_n_f64(op2))
+}
+#[doc = "Maximum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmax[_f64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmax))]
+pub fn svmax_f64_x(pg: svbool_t, op1: svfloat64_t, op2: svfloat64_t) -> svfloat64_t {
+    svmax_f64_m(pg, op1, op2)
+}
+#[doc = "Maximum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmax[_n_f64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmax))]
+pub fn svmax_n_f64_x(pg: svbool_t, op1: svfloat64_t, op2: f64) -> svfloat64_t {
+    svmax_f64_x(pg, op1, svdup_n_f64(op2))
+}
+#[doc = "Maximum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmax[_f64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmax))]
+pub fn svmax_f64_z(pg: svbool_t, op1: svfloat64_t, op2: svfloat64_t) -> svfloat64_t {
+    svmax_f64_m(pg, svsel_f64(pg, op1, svdup_n_f64(0.0)), op2)
+}
+#[doc = "Maximum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmax[_n_f64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmax))]
+pub fn svmax_n_f64_z(pg: svbool_t, op1: svfloat64_t, op2: f64) -> svfloat64_t {
+    svmax_f64_z(pg, op1, svdup_n_f64(op2))
+}
+#[doc = "Maximum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmax[_s8]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smax))]
+pub fn svmax_s8_m(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.smax.nxv16i8")]
+        fn _svmax_s8_m(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t;
+    }
+    unsafe { _svmax_s8_m(pg, op1, op2) }
+}
+#[doc = "Maximum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmax[_n_s8]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smax))]
+pub fn svmax_n_s8_m(pg: svbool_t, op1: svint8_t, op2: i8) -> svint8_t {
+    svmax_s8_m(pg, op1, svdup_n_s8(op2))
+}
+#[doc = "Maximum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmax[_s8]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smax))]
+pub fn svmax_s8_x(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t {
+    svmax_s8_m(pg, op1, op2)
+}
+#[doc = "Maximum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmax[_n_s8]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smax))]
+pub fn svmax_n_s8_x(pg: svbool_t, op1: svint8_t, op2: i8) -> svint8_t {
+    svmax_s8_x(pg, op1, svdup_n_s8(op2))
+}
+#[doc = "Maximum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmax[_s8]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smax))]
+pub fn svmax_s8_z(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t {
+    svmax_s8_m(pg, svsel_s8(pg, op1, svdup_n_s8(0)), op2)
+}
+#[doc = "Maximum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmax[_n_s8]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smax))]
+pub fn svmax_n_s8_z(pg: svbool_t, op1: svint8_t, op2: i8) -> svint8_t {
+    svmax_s8_z(pg, op1, svdup_n_s8(op2))
+}
+#[doc = "Maximum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmax[_s16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smax))]
+pub fn svmax_s16_m(pg: svbool_t, op1: svint16_t, op2: svint16_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.smax.nxv8i16")]
+        fn _svmax_s16_m(pg: svbool8_t, op1: svint16_t, op2: svint16_t) -> svint16_t;
+    }
+    unsafe { _svmax_s16_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Maximum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmax[_n_s16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smax))]
+pub fn svmax_n_s16_m(pg: svbool_t, op1: svint16_t, op2: i16) -> svint16_t {
+    svmax_s16_m(pg, op1, svdup_n_s16(op2))
+}
+#[doc = "Maximum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmax[_s16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smax))]
+pub fn svmax_s16_x(pg: svbool_t, op1: svint16_t, op2: svint16_t) -> svint16_t {
+    svmax_s16_m(pg, op1, op2)
+}
+#[doc = "Maximum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmax[_n_s16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smax))]
+pub fn svmax_n_s16_x(pg: svbool_t, op1: svint16_t, op2: i16) -> svint16_t {
+    svmax_s16_x(pg, op1, svdup_n_s16(op2))
+}
+#[doc = "Maximum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmax[_s16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smax))]
+pub fn svmax_s16_z(pg: svbool_t, op1: svint16_t, op2: svint16_t) -> svint16_t {
+    svmax_s16_m(pg, svsel_s16(pg, op1, svdup_n_s16(0)), op2)
+}
+#[doc = "Maximum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmax[_n_s16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smax))]
+pub fn svmax_n_s16_z(pg: svbool_t, op1: svint16_t, op2: i16) -> svint16_t {
+    svmax_s16_z(pg, op1, svdup_n_s16(op2))
+}
+#[doc = "Maximum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmax[_s32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smax))]
+pub fn svmax_s32_m(pg: svbool_t, op1: svint32_t, op2: svint32_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.smax.nxv4i32")]
+        fn _svmax_s32_m(pg: svbool4_t, op1: svint32_t, op2: svint32_t) -> svint32_t;
+    }
+    unsafe { _svmax_s32_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Maximum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmax[_n_s32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smax))]
+pub fn svmax_n_s32_m(pg: svbool_t, op1: svint32_t, op2: i32) -> svint32_t {
+    svmax_s32_m(pg, op1, svdup_n_s32(op2))
+}
+#[doc = "Maximum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmax[_s32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smax))]
+pub fn svmax_s32_x(pg: svbool_t, op1: svint32_t, op2: svint32_t) -> svint32_t {
+    svmax_s32_m(pg, op1, op2)
+}
+#[doc = "Maximum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmax[_n_s32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smax))]
+pub fn svmax_n_s32_x(pg: svbool_t, op1: svint32_t, op2: i32) -> svint32_t {
+    svmax_s32_x(pg, op1, svdup_n_s32(op2))
+}
+#[doc = "Maximum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmax[_s32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smax))]
+pub fn svmax_s32_z(pg: svbool_t, op1: svint32_t, op2: svint32_t) -> svint32_t {
+    svmax_s32_m(pg, svsel_s32(pg, op1, svdup_n_s32(0)), op2)
+}
+#[doc = "Maximum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmax[_n_s32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smax))]
+pub fn svmax_n_s32_z(pg: svbool_t, op1: svint32_t, op2: i32) -> svint32_t {
+    svmax_s32_z(pg, op1, svdup_n_s32(op2))
+}
+#[doc = "Maximum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmax[_s64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smax))]
+pub fn svmax_s64_m(pg: svbool_t, op1: svint64_t, op2: svint64_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.smax.nxv2i64")]
+        fn _svmax_s64_m(pg: svbool2_t, op1: svint64_t, op2: svint64_t) -> svint64_t;
+    }
+    unsafe { _svmax_s64_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Maximum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmax[_n_s64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smax))]
+pub fn svmax_n_s64_m(pg: svbool_t, op1: svint64_t, op2: i64) -> svint64_t {
+    svmax_s64_m(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Maximum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmax[_s64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smax))]
+pub fn svmax_s64_x(pg: svbool_t, op1: svint64_t, op2: svint64_t) -> svint64_t {
+    svmax_s64_m(pg, op1, op2)
+}
+#[doc = "Maximum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmax[_n_s64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smax))]
+pub fn svmax_n_s64_x(pg: svbool_t, op1: svint64_t, op2: i64) -> svint64_t {
+    svmax_s64_x(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Maximum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmax[_s64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smax))]
+pub fn svmax_s64_z(pg: svbool_t, op1: svint64_t, op2: svint64_t) -> svint64_t {
+    svmax_s64_m(pg, svsel_s64(pg, op1, svdup_n_s64(0)), op2)
+}
+#[doc = "Maximum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmax[_n_s64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smax))]
+pub fn svmax_n_s64_z(pg: svbool_t, op1: svint64_t, op2: i64) -> svint64_t {
+    svmax_s64_z(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Maximum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmax[_u8]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umax))]
+pub fn svmax_u8_m(pg: svbool_t, op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.umax.nxv16i8")]
+        fn _svmax_u8_m(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t;
+    }
+    unsafe { _svmax_u8_m(pg, op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Maximum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmax[_n_u8]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umax))]
+pub fn svmax_n_u8_m(pg: svbool_t, op1: svuint8_t, op2: u8) -> svuint8_t {
+    svmax_u8_m(pg, op1, svdup_n_u8(op2))
+}
+#[doc = "Maximum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmax[_u8]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umax))]
+pub fn svmax_u8_x(pg: svbool_t, op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    svmax_u8_m(pg, op1, op2)
+}
+#[doc = "Maximum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmax[_n_u8]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umax))]
+pub fn svmax_n_u8_x(pg: svbool_t, op1: svuint8_t, op2: u8) -> svuint8_t {
+    svmax_u8_x(pg, op1, svdup_n_u8(op2))
+}
+#[doc = "Maximum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmax[_u8]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umax))]
+pub fn svmax_u8_z(pg: svbool_t, op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    svmax_u8_m(pg, svsel_u8(pg, op1, svdup_n_u8(0)), op2)
+}
+#[doc = "Maximum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmax[_n_u8]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umax))]
+pub fn svmax_n_u8_z(pg: svbool_t, op1: svuint8_t, op2: u8) -> svuint8_t {
+    svmax_u8_z(pg, op1, svdup_n_u8(op2))
+}
+#[doc = "Maximum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmax[_u16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umax))]
+pub fn svmax_u16_m(pg: svbool_t, op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.umax.nxv8i16")]
+        fn _svmax_u16_m(pg: svbool8_t, op1: svint16_t, op2: svint16_t) -> svint16_t;
+    }
+    unsafe { _svmax_u16_m(pg.sve_into(), op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Maximum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmax[_n_u16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umax))]
+pub fn svmax_n_u16_m(pg: svbool_t, op1: svuint16_t, op2: u16) -> svuint16_t {
+    svmax_u16_m(pg, op1, svdup_n_u16(op2))
+}
+#[doc = "Maximum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmax[_u16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umax))]
+pub fn svmax_u16_x(pg: svbool_t, op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    svmax_u16_m(pg, op1, op2)
+}
+#[doc = "Maximum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmax[_n_u16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umax))]
+pub fn svmax_n_u16_x(pg: svbool_t, op1: svuint16_t, op2: u16) -> svuint16_t {
+    svmax_u16_x(pg, op1, svdup_n_u16(op2))
+}
+#[doc = "Maximum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmax[_u16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umax))]
+pub fn svmax_u16_z(pg: svbool_t, op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    svmax_u16_m(pg, svsel_u16(pg, op1, svdup_n_u16(0)), op2)
+}
+#[doc = "Maximum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmax[_n_u16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umax))]
+pub fn svmax_n_u16_z(pg: svbool_t, op1: svuint16_t, op2: u16) -> svuint16_t {
+    svmax_u16_z(pg, op1, svdup_n_u16(op2))
+}
+#[doc = "Maximum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmax[_u32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umax))]
+pub fn svmax_u32_m(pg: svbool_t, op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.umax.nxv4i32")]
+        fn _svmax_u32_m(pg: svbool4_t, op1: svint32_t, op2: svint32_t) -> svint32_t;
+    }
+    unsafe { _svmax_u32_m(pg.sve_into(), op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Maximum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmax[_n_u32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umax))]
+pub fn svmax_n_u32_m(pg: svbool_t, op1: svuint32_t, op2: u32) -> svuint32_t {
+    svmax_u32_m(pg, op1, svdup_n_u32(op2))
+}
+#[doc = "Maximum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmax[_u32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umax))]
+pub fn svmax_u32_x(pg: svbool_t, op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    svmax_u32_m(pg, op1, op2)
+}
+#[doc = "Maximum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmax[_n_u32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umax))]
+pub fn svmax_n_u32_x(pg: svbool_t, op1: svuint32_t, op2: u32) -> svuint32_t {
+    svmax_u32_x(pg, op1, svdup_n_u32(op2))
+}
+#[doc = "Maximum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmax[_u32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umax))]
+pub fn svmax_u32_z(pg: svbool_t, op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    svmax_u32_m(pg, svsel_u32(pg, op1, svdup_n_u32(0)), op2)
+}
+#[doc = "Maximum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmax[_n_u32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umax))]
+pub fn svmax_n_u32_z(pg: svbool_t, op1: svuint32_t, op2: u32) -> svuint32_t {
+    svmax_u32_z(pg, op1, svdup_n_u32(op2))
+}
+#[doc = "Maximum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmax[_u64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umax))]
+pub fn svmax_u64_m(pg: svbool_t, op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.umax.nxv2i64")]
+        fn _svmax_u64_m(pg: svbool2_t, op1: svint64_t, op2: svint64_t) -> svint64_t;
+    }
+    unsafe { _svmax_u64_m(pg.sve_into(), op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Maximum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmax[_n_u64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umax))]
+pub fn svmax_n_u64_m(pg: svbool_t, op1: svuint64_t, op2: u64) -> svuint64_t {
+    svmax_u64_m(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Maximum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmax[_u64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umax))]
+pub fn svmax_u64_x(pg: svbool_t, op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    svmax_u64_m(pg, op1, op2)
+}
+#[doc = "Maximum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmax[_n_u64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umax))]
+pub fn svmax_n_u64_x(pg: svbool_t, op1: svuint64_t, op2: u64) -> svuint64_t {
+    svmax_u64_x(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Maximum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmax[_u64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umax))]
+pub fn svmax_u64_z(pg: svbool_t, op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    svmax_u64_m(pg, svsel_u64(pg, op1, svdup_n_u64(0)), op2)
+}
+#[doc = "Maximum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmax[_n_u64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umax))]
+pub fn svmax_n_u64_z(pg: svbool_t, op1: svuint64_t, op2: u64) -> svuint64_t {
+    svmax_u64_z(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Maximum number"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmaxnm[_f32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmaxnm))]
+pub fn svmaxnm_f32_m(pg: svbool_t, op1: svfloat32_t, op2: svfloat32_t) -> svfloat32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.fmaxnm.nxv4f32")]
+        fn _svmaxnm_f32_m(pg: svbool4_t, op1: svfloat32_t, op2: svfloat32_t) -> svfloat32_t;
+    }
+    unsafe { _svmaxnm_f32_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Maximum number"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmaxnm[_n_f32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmaxnm))]
+pub fn svmaxnm_n_f32_m(pg: svbool_t, op1: svfloat32_t, op2: f32) -> svfloat32_t {
+    svmaxnm_f32_m(pg, op1, svdup_n_f32(op2))
+}
+#[doc = "Maximum number"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmaxnm[_f32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmaxnm))]
+pub fn svmaxnm_f32_x(pg: svbool_t, op1: svfloat32_t, op2: svfloat32_t) -> svfloat32_t {
+    svmaxnm_f32_m(pg, op1, op2)
+}
+#[doc = "Maximum number"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmaxnm[_n_f32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmaxnm))]
+pub fn svmaxnm_n_f32_x(pg: svbool_t, op1: svfloat32_t, op2: f32) -> svfloat32_t {
+    svmaxnm_f32_x(pg, op1, svdup_n_f32(op2))
+}
+#[doc = "Maximum number"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmaxnm[_f32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmaxnm))]
+pub fn svmaxnm_f32_z(pg: svbool_t, op1: svfloat32_t, op2: svfloat32_t) -> svfloat32_t {
+    svmaxnm_f32_m(pg, svsel_f32(pg, op1, svdup_n_f32(0.0)), op2)
+}
+#[doc = "Maximum number"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmaxnm[_n_f32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmaxnm))]
+pub fn svmaxnm_n_f32_z(pg: svbool_t, op1: svfloat32_t, op2: f32) -> svfloat32_t {
+    svmaxnm_f32_z(pg, op1, svdup_n_f32(op2))
+}
+#[doc = "Maximum number"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmaxnm[_f64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmaxnm))]
+pub fn svmaxnm_f64_m(pg: svbool_t, op1: svfloat64_t, op2: svfloat64_t) -> svfloat64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.fmaxnm.nxv2f64")]
+        fn _svmaxnm_f64_m(pg: svbool2_t, op1: svfloat64_t, op2: svfloat64_t) -> svfloat64_t;
+    }
+    unsafe { _svmaxnm_f64_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Maximum number"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmaxnm[_n_f64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmaxnm))]
+pub fn svmaxnm_n_f64_m(pg: svbool_t, op1: svfloat64_t, op2: f64) -> svfloat64_t {
+    svmaxnm_f64_m(pg, op1, svdup_n_f64(op2))
+}
+#[doc = "Maximum number"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmaxnm[_f64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmaxnm))]
+pub fn svmaxnm_f64_x(pg: svbool_t, op1: svfloat64_t, op2: svfloat64_t) -> svfloat64_t {
+    svmaxnm_f64_m(pg, op1, op2)
+}
+#[doc = "Maximum number"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmaxnm[_n_f64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmaxnm))]
+pub fn svmaxnm_n_f64_x(pg: svbool_t, op1: svfloat64_t, op2: f64) -> svfloat64_t {
+    svmaxnm_f64_x(pg, op1, svdup_n_f64(op2))
+}
+#[doc = "Maximum number"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmaxnm[_f64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmaxnm))]
+pub fn svmaxnm_f64_z(pg: svbool_t, op1: svfloat64_t, op2: svfloat64_t) -> svfloat64_t {
+    svmaxnm_f64_m(pg, svsel_f64(pg, op1, svdup_n_f64(0.0)), op2)
+}
+#[doc = "Maximum number"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmaxnm[_n_f64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmaxnm))]
+pub fn svmaxnm_n_f64_z(pg: svbool_t, op1: svfloat64_t, op2: f64) -> svfloat64_t {
+    svmaxnm_f64_z(pg, op1, svdup_n_f64(op2))
+}
+#[doc = "Maximum number reduction to scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmaxnmv[_f32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmaxnmv))]
+pub fn svmaxnmv_f32(pg: svbool_t, op: svfloat32_t) -> f32 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.fmaxnmv.nxv4f32"
+        )]
+        fn _svmaxnmv_f32(pg: svbool4_t, op: svfloat32_t) -> f32;
+    }
+    unsafe { _svmaxnmv_f32(pg.sve_into(), op) }
+}
+#[doc = "Maximum number reduction to scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmaxnmv[_f64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmaxnmv))]
+pub fn svmaxnmv_f64(pg: svbool_t, op: svfloat64_t) -> f64 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.fmaxnmv.nxv2f64"
+        )]
+        fn _svmaxnmv_f64(pg: svbool2_t, op: svfloat64_t) -> f64;
+    }
+    unsafe { _svmaxnmv_f64(pg.sve_into(), op) }
+}
+#[doc = "Maximum reduction to scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmaxv[_f32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmaxv))]
+pub fn svmaxv_f32(pg: svbool_t, op: svfloat32_t) -> f32 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.fmaxv.nxv4f32")]
+        fn _svmaxv_f32(pg: svbool4_t, op: svfloat32_t) -> f32;
+    }
+    unsafe { _svmaxv_f32(pg.sve_into(), op) }
+}
+#[doc = "Maximum reduction to scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmaxv[_f64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmaxv))]
+pub fn svmaxv_f64(pg: svbool_t, op: svfloat64_t) -> f64 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.fmaxv.nxv2f64")]
+        fn _svmaxv_f64(pg: svbool2_t, op: svfloat64_t) -> f64;
+    }
+    unsafe { _svmaxv_f64(pg.sve_into(), op) }
+}
+#[doc = "Maximum reduction to scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmaxv[_s8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smaxv))]
+pub fn svmaxv_s8(pg: svbool_t, op: svint8_t) -> i8 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.smaxv.nxv16i8")]
+        fn _svmaxv_s8(pg: svbool_t, op: svint8_t) -> i8;
+    }
+    unsafe { _svmaxv_s8(pg, op) }
+}
+#[doc = "Maximum reduction to scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmaxv[_s16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smaxv))]
+pub fn svmaxv_s16(pg: svbool_t, op: svint16_t) -> i16 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.smaxv.nxv8i16")]
+        fn _svmaxv_s16(pg: svbool8_t, op: svint16_t) -> i16;
+    }
+    unsafe { _svmaxv_s16(pg.sve_into(), op) }
+}
+#[doc = "Maximum reduction to scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmaxv[_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smaxv))]
+pub fn svmaxv_s32(pg: svbool_t, op: svint32_t) -> i32 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.smaxv.nxv4i32")]
+        fn _svmaxv_s32(pg: svbool4_t, op: svint32_t) -> i32;
+    }
+    unsafe { _svmaxv_s32(pg.sve_into(), op) }
+}
+#[doc = "Maximum reduction to scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmaxv[_s64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smaxv))]
+pub fn svmaxv_s64(pg: svbool_t, op: svint64_t) -> i64 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.smaxv.nxv2i64")]
+        fn _svmaxv_s64(pg: svbool2_t, op: svint64_t) -> i64;
+    }
+    unsafe { _svmaxv_s64(pg.sve_into(), op) }
+}
+#[doc = "Maximum reduction to scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmaxv[_u8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umaxv))]
+pub fn svmaxv_u8(pg: svbool_t, op: svuint8_t) -> u8 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.umaxv.nxv16i8")]
+        fn _svmaxv_u8(pg: svbool_t, op: svint8_t) -> i8;
+    }
+    unsafe { _svmaxv_u8(pg, op.as_signed()).as_unsigned() }
+}
+#[doc = "Maximum reduction to scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmaxv[_u16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umaxv))]
+pub fn svmaxv_u16(pg: svbool_t, op: svuint16_t) -> u16 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.umaxv.nxv8i16")]
+        fn _svmaxv_u16(pg: svbool8_t, op: svint16_t) -> i16;
+    }
+    unsafe { _svmaxv_u16(pg.sve_into(), op.as_signed()).as_unsigned() }
+}
+#[doc = "Maximum reduction to scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmaxv[_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umaxv))]
+pub fn svmaxv_u32(pg: svbool_t, op: svuint32_t) -> u32 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.umaxv.nxv4i32")]
+        fn _svmaxv_u32(pg: svbool4_t, op: svint32_t) -> i32;
+    }
+    unsafe { _svmaxv_u32(pg.sve_into(), op.as_signed()).as_unsigned() }
+}
+#[doc = "Maximum reduction to scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmaxv[_u64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umaxv))]
+pub fn svmaxv_u64(pg: svbool_t, op: svuint64_t) -> u64 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.umaxv.nxv2i64")]
+        fn _svmaxv_u64(pg: svbool2_t, op: svint64_t) -> i64;
+    }
+    unsafe { _svmaxv_u64(pg.sve_into(), op.as_signed()).as_unsigned() }
+}
+#[doc = "Minimum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmin[_f32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmin))]
+pub fn svmin_f32_m(pg: svbool_t, op1: svfloat32_t, op2: svfloat32_t) -> svfloat32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.fmin.nxv4f32")]
+        fn _svmin_f32_m(pg: svbool4_t, op1: svfloat32_t, op2: svfloat32_t) -> svfloat32_t;
+    }
+    unsafe { _svmin_f32_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Minimum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmin[_n_f32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmin))]
+pub fn svmin_n_f32_m(pg: svbool_t, op1: svfloat32_t, op2: f32) -> svfloat32_t {
+    svmin_f32_m(pg, op1, svdup_n_f32(op2))
+}
+#[doc = "Minimum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmin[_f32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmin))]
+pub fn svmin_f32_x(pg: svbool_t, op1: svfloat32_t, op2: svfloat32_t) -> svfloat32_t {
+    svmin_f32_m(pg, op1, op2)
+}
+#[doc = "Minimum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmin[_n_f32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmin))]
+pub fn svmin_n_f32_x(pg: svbool_t, op1: svfloat32_t, op2: f32) -> svfloat32_t {
+    svmin_f32_x(pg, op1, svdup_n_f32(op2))
+}
+#[doc = "Minimum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmin[_f32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmin))]
+pub fn svmin_f32_z(pg: svbool_t, op1: svfloat32_t, op2: svfloat32_t) -> svfloat32_t {
+    svmin_f32_m(pg, svsel_f32(pg, op1, svdup_n_f32(0.0)), op2)
+}
+#[doc = "Minimum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmin[_n_f32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmin))]
+pub fn svmin_n_f32_z(pg: svbool_t, op1: svfloat32_t, op2: f32) -> svfloat32_t {
+    svmin_f32_z(pg, op1, svdup_n_f32(op2))
+}
+#[doc = "Minimum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmin[_f64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmin))]
+pub fn svmin_f64_m(pg: svbool_t, op1: svfloat64_t, op2: svfloat64_t) -> svfloat64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.fmin.nxv2f64")]
+        fn _svmin_f64_m(pg: svbool2_t, op1: svfloat64_t, op2: svfloat64_t) -> svfloat64_t;
+    }
+    unsafe { _svmin_f64_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Minimum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmin[_n_f64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmin))]
+pub fn svmin_n_f64_m(pg: svbool_t, op1: svfloat64_t, op2: f64) -> svfloat64_t {
+    svmin_f64_m(pg, op1, svdup_n_f64(op2))
+}
+#[doc = "Minimum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmin[_f64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmin))]
+pub fn svmin_f64_x(pg: svbool_t, op1: svfloat64_t, op2: svfloat64_t) -> svfloat64_t {
+    svmin_f64_m(pg, op1, op2)
+}
+#[doc = "Minimum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmin[_n_f64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmin))]
+pub fn svmin_n_f64_x(pg: svbool_t, op1: svfloat64_t, op2: f64) -> svfloat64_t {
+    svmin_f64_x(pg, op1, svdup_n_f64(op2))
+}
+#[doc = "Minimum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmin[_f64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmin))]
+pub fn svmin_f64_z(pg: svbool_t, op1: svfloat64_t, op2: svfloat64_t) -> svfloat64_t {
+    svmin_f64_m(pg, svsel_f64(pg, op1, svdup_n_f64(0.0)), op2)
+}
+#[doc = "Minimum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmin[_n_f64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmin))]
+pub fn svmin_n_f64_z(pg: svbool_t, op1: svfloat64_t, op2: f64) -> svfloat64_t {
+    svmin_f64_z(pg, op1, svdup_n_f64(op2))
+}
+#[doc = "Minimum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmin[_s8]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smin))]
+pub fn svmin_s8_m(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.smin.nxv16i8")]
+        fn _svmin_s8_m(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t;
+    }
+    unsafe { _svmin_s8_m(pg, op1, op2) }
+}
+#[doc = "Minimum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmin[_n_s8]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smin))]
+pub fn svmin_n_s8_m(pg: svbool_t, op1: svint8_t, op2: i8) -> svint8_t {
+    svmin_s8_m(pg, op1, svdup_n_s8(op2))
+}
+#[doc = "Minimum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmin[_s8]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smin))]
+pub fn svmin_s8_x(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t {
+    svmin_s8_m(pg, op1, op2)
+}
+#[doc = "Minimum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmin[_n_s8]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smin))]
+pub fn svmin_n_s8_x(pg: svbool_t, op1: svint8_t, op2: i8) -> svint8_t {
+    svmin_s8_x(pg, op1, svdup_n_s8(op2))
+}
+#[doc = "Minimum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmin[_s8]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smin))]
+pub fn svmin_s8_z(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t {
+    svmin_s8_m(pg, svsel_s8(pg, op1, svdup_n_s8(0)), op2)
+}
+#[doc = "Minimum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmin[_n_s8]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smin))]
+pub fn svmin_n_s8_z(pg: svbool_t, op1: svint8_t, op2: i8) -> svint8_t {
+    svmin_s8_z(pg, op1, svdup_n_s8(op2))
+}
+#[doc = "Minimum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmin[_s16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smin))]
+pub fn svmin_s16_m(pg: svbool_t, op1: svint16_t, op2: svint16_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.smin.nxv8i16")]
+        fn _svmin_s16_m(pg: svbool8_t, op1: svint16_t, op2: svint16_t) -> svint16_t;
+    }
+    unsafe { _svmin_s16_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Minimum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmin[_n_s16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smin))]
+pub fn svmin_n_s16_m(pg: svbool_t, op1: svint16_t, op2: i16) -> svint16_t {
+    svmin_s16_m(pg, op1, svdup_n_s16(op2))
+}
+#[doc = "Minimum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmin[_s16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smin))]
+pub fn svmin_s16_x(pg: svbool_t, op1: svint16_t, op2: svint16_t) -> svint16_t {
+    svmin_s16_m(pg, op1, op2)
+}
+#[doc = "Minimum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmin[_n_s16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smin))]
+pub fn svmin_n_s16_x(pg: svbool_t, op1: svint16_t, op2: i16) -> svint16_t {
+    svmin_s16_x(pg, op1, svdup_n_s16(op2))
+}
+#[doc = "Minimum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmin[_s16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smin))]
+pub fn svmin_s16_z(pg: svbool_t, op1: svint16_t, op2: svint16_t) -> svint16_t {
+    svmin_s16_m(pg, svsel_s16(pg, op1, svdup_n_s16(0)), op2)
+}
+#[doc = "Minimum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmin[_n_s16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smin))]
+pub fn svmin_n_s16_z(pg: svbool_t, op1: svint16_t, op2: i16) -> svint16_t {
+    svmin_s16_z(pg, op1, svdup_n_s16(op2))
+}
+#[doc = "Minimum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmin[_s32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smin))]
+pub fn svmin_s32_m(pg: svbool_t, op1: svint32_t, op2: svint32_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.smin.nxv4i32")]
+        fn _svmin_s32_m(pg: svbool4_t, op1: svint32_t, op2: svint32_t) -> svint32_t;
+    }
+    unsafe { _svmin_s32_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Minimum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmin[_n_s32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smin))]
+pub fn svmin_n_s32_m(pg: svbool_t, op1: svint32_t, op2: i32) -> svint32_t {
+    svmin_s32_m(pg, op1, svdup_n_s32(op2))
+}
+#[doc = "Minimum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmin[_s32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smin))]
+pub fn svmin_s32_x(pg: svbool_t, op1: svint32_t, op2: svint32_t) -> svint32_t {
+    svmin_s32_m(pg, op1, op2)
+}
+#[doc = "Minimum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmin[_n_s32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smin))]
+pub fn svmin_n_s32_x(pg: svbool_t, op1: svint32_t, op2: i32) -> svint32_t {
+    svmin_s32_x(pg, op1, svdup_n_s32(op2))
+}
+#[doc = "Minimum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmin[_s32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smin))]
+pub fn svmin_s32_z(pg: svbool_t, op1: svint32_t, op2: svint32_t) -> svint32_t {
+    svmin_s32_m(pg, svsel_s32(pg, op1, svdup_n_s32(0)), op2)
+}
+#[doc = "Minimum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmin[_n_s32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smin))]
+pub fn svmin_n_s32_z(pg: svbool_t, op1: svint32_t, op2: i32) -> svint32_t {
+    svmin_s32_z(pg, op1, svdup_n_s32(op2))
+}
+#[doc = "Minimum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmin[_s64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smin))]
+pub fn svmin_s64_m(pg: svbool_t, op1: svint64_t, op2: svint64_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.smin.nxv2i64")]
+        fn _svmin_s64_m(pg: svbool2_t, op1: svint64_t, op2: svint64_t) -> svint64_t;
+    }
+    unsafe { _svmin_s64_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Minimum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmin[_n_s64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smin))]
+pub fn svmin_n_s64_m(pg: svbool_t, op1: svint64_t, op2: i64) -> svint64_t {
+    svmin_s64_m(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Minimum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmin[_s64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smin))]
+pub fn svmin_s64_x(pg: svbool_t, op1: svint64_t, op2: svint64_t) -> svint64_t {
+    svmin_s64_m(pg, op1, op2)
+}
+#[doc = "Minimum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmin[_n_s64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smin))]
+pub fn svmin_n_s64_x(pg: svbool_t, op1: svint64_t, op2: i64) -> svint64_t {
+    svmin_s64_x(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Minimum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmin[_s64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smin))]
+pub fn svmin_s64_z(pg: svbool_t, op1: svint64_t, op2: svint64_t) -> svint64_t {
+    svmin_s64_m(pg, svsel_s64(pg, op1, svdup_n_s64(0)), op2)
+}
+#[doc = "Minimum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmin[_n_s64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smin))]
+pub fn svmin_n_s64_z(pg: svbool_t, op1: svint64_t, op2: i64) -> svint64_t {
+    svmin_s64_z(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Minimum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmin[_u8]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umin))]
+pub fn svmin_u8_m(pg: svbool_t, op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.umin.nxv16i8")]
+        fn _svmin_u8_m(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t;
+    }
+    unsafe { _svmin_u8_m(pg, op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Minimum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmin[_n_u8]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umin))]
+pub fn svmin_n_u8_m(pg: svbool_t, op1: svuint8_t, op2: u8) -> svuint8_t {
+    svmin_u8_m(pg, op1, svdup_n_u8(op2))
+}
+#[doc = "Minimum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmin[_u8]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umin))]
+pub fn svmin_u8_x(pg: svbool_t, op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    svmin_u8_m(pg, op1, op2)
+}
+#[doc = "Minimum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmin[_n_u8]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umin))]
+pub fn svmin_n_u8_x(pg: svbool_t, op1: svuint8_t, op2: u8) -> svuint8_t {
+    svmin_u8_x(pg, op1, svdup_n_u8(op2))
+}
+#[doc = "Minimum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmin[_u8]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umin))]
+pub fn svmin_u8_z(pg: svbool_t, op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    svmin_u8_m(pg, svsel_u8(pg, op1, svdup_n_u8(0)), op2)
+}
+#[doc = "Minimum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmin[_n_u8]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umin))]
+pub fn svmin_n_u8_z(pg: svbool_t, op1: svuint8_t, op2: u8) -> svuint8_t {
+    svmin_u8_z(pg, op1, svdup_n_u8(op2))
+}
+#[doc = "Minimum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmin[_u16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umin))]
+pub fn svmin_u16_m(pg: svbool_t, op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.umin.nxv8i16")]
+        fn _svmin_u16_m(pg: svbool8_t, op1: svint16_t, op2: svint16_t) -> svint16_t;
+    }
+    unsafe { _svmin_u16_m(pg.sve_into(), op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Minimum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmin[_n_u16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umin))]
+pub fn svmin_n_u16_m(pg: svbool_t, op1: svuint16_t, op2: u16) -> svuint16_t {
+    svmin_u16_m(pg, op1, svdup_n_u16(op2))
+}
+#[doc = "Minimum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmin[_u16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umin))]
+pub fn svmin_u16_x(pg: svbool_t, op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    svmin_u16_m(pg, op1, op2)
+}
+#[doc = "Minimum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmin[_n_u16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umin))]
+pub fn svmin_n_u16_x(pg: svbool_t, op1: svuint16_t, op2: u16) -> svuint16_t {
+    svmin_u16_x(pg, op1, svdup_n_u16(op2))
+}
+#[doc = "Minimum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmin[_u16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umin))]
+pub fn svmin_u16_z(pg: svbool_t, op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    svmin_u16_m(pg, svsel_u16(pg, op1, svdup_n_u16(0)), op2)
+}
+#[doc = "Minimum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmin[_n_u16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umin))]
+pub fn svmin_n_u16_z(pg: svbool_t, op1: svuint16_t, op2: u16) -> svuint16_t {
+    svmin_u16_z(pg, op1, svdup_n_u16(op2))
+}
+#[doc = "Minimum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmin[_u32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umin))]
+pub fn svmin_u32_m(pg: svbool_t, op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.umin.nxv4i32")]
+        fn _svmin_u32_m(pg: svbool4_t, op1: svint32_t, op2: svint32_t) -> svint32_t;
+    }
+    unsafe { _svmin_u32_m(pg.sve_into(), op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Minimum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmin[_n_u32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umin))]
+pub fn svmin_n_u32_m(pg: svbool_t, op1: svuint32_t, op2: u32) -> svuint32_t {
+    svmin_u32_m(pg, op1, svdup_n_u32(op2))
+}
+#[doc = "Minimum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmin[_u32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umin))]
+pub fn svmin_u32_x(pg: svbool_t, op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    svmin_u32_m(pg, op1, op2)
+}
+#[doc = "Minimum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmin[_n_u32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umin))]
+pub fn svmin_n_u32_x(pg: svbool_t, op1: svuint32_t, op2: u32) -> svuint32_t {
+    svmin_u32_x(pg, op1, svdup_n_u32(op2))
+}
+#[doc = "Minimum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmin[_u32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umin))]
+pub fn svmin_u32_z(pg: svbool_t, op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    svmin_u32_m(pg, svsel_u32(pg, op1, svdup_n_u32(0)), op2)
+}
+#[doc = "Minimum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmin[_n_u32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umin))]
+pub fn svmin_n_u32_z(pg: svbool_t, op1: svuint32_t, op2: u32) -> svuint32_t {
+    svmin_u32_z(pg, op1, svdup_n_u32(op2))
+}
+#[doc = "Minimum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmin[_u64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umin))]
+pub fn svmin_u64_m(pg: svbool_t, op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.umin.nxv2i64")]
+        fn _svmin_u64_m(pg: svbool2_t, op1: svint64_t, op2: svint64_t) -> svint64_t;
+    }
+    unsafe { _svmin_u64_m(pg.sve_into(), op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Minimum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmin[_n_u64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umin))]
+pub fn svmin_n_u64_m(pg: svbool_t, op1: svuint64_t, op2: u64) -> svuint64_t {
+    svmin_u64_m(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Minimum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmin[_u64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umin))]
+pub fn svmin_u64_x(pg: svbool_t, op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    svmin_u64_m(pg, op1, op2)
+}
+#[doc = "Minimum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmin[_n_u64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umin))]
+pub fn svmin_n_u64_x(pg: svbool_t, op1: svuint64_t, op2: u64) -> svuint64_t {
+    svmin_u64_x(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Minimum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmin[_u64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umin))]
+pub fn svmin_u64_z(pg: svbool_t, op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    svmin_u64_m(pg, svsel_u64(pg, op1, svdup_n_u64(0)), op2)
+}
+#[doc = "Minimum"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmin[_n_u64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umin))]
+pub fn svmin_n_u64_z(pg: svbool_t, op1: svuint64_t, op2: u64) -> svuint64_t {
+    svmin_u64_z(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Minimum number"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svminnm[_f32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fminnm))]
+pub fn svminnm_f32_m(pg: svbool_t, op1: svfloat32_t, op2: svfloat32_t) -> svfloat32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.fminnm.nxv4f32")]
+        fn _svminnm_f32_m(pg: svbool4_t, op1: svfloat32_t, op2: svfloat32_t) -> svfloat32_t;
+    }
+    unsafe { _svminnm_f32_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Minimum number"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svminnm[_n_f32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fminnm))]
+pub fn svminnm_n_f32_m(pg: svbool_t, op1: svfloat32_t, op2: f32) -> svfloat32_t {
+    svminnm_f32_m(pg, op1, svdup_n_f32(op2))
+}
+#[doc = "Minimum number"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svminnm[_f32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fminnm))]
+pub fn svminnm_f32_x(pg: svbool_t, op1: svfloat32_t, op2: svfloat32_t) -> svfloat32_t {
+    svminnm_f32_m(pg, op1, op2)
+}
+#[doc = "Minimum number"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svminnm[_n_f32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fminnm))]
+pub fn svminnm_n_f32_x(pg: svbool_t, op1: svfloat32_t, op2: f32) -> svfloat32_t {
+    svminnm_f32_x(pg, op1, svdup_n_f32(op2))
+}
+#[doc = "Minimum number"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svminnm[_f32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fminnm))]
+pub fn svminnm_f32_z(pg: svbool_t, op1: svfloat32_t, op2: svfloat32_t) -> svfloat32_t {
+    svminnm_f32_m(pg, svsel_f32(pg, op1, svdup_n_f32(0.0)), op2)
+}
+#[doc = "Minimum number"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svminnm[_n_f32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fminnm))]
+pub fn svminnm_n_f32_z(pg: svbool_t, op1: svfloat32_t, op2: f32) -> svfloat32_t {
+    svminnm_f32_z(pg, op1, svdup_n_f32(op2))
+}
+#[doc = "Minimum number"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svminnm[_f64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fminnm))]
+pub fn svminnm_f64_m(pg: svbool_t, op1: svfloat64_t, op2: svfloat64_t) -> svfloat64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.fminnm.nxv2f64")]
+        fn _svminnm_f64_m(pg: svbool2_t, op1: svfloat64_t, op2: svfloat64_t) -> svfloat64_t;
+    }
+    unsafe { _svminnm_f64_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Minimum number"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svminnm[_n_f64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fminnm))]
+pub fn svminnm_n_f64_m(pg: svbool_t, op1: svfloat64_t, op2: f64) -> svfloat64_t {
+    svminnm_f64_m(pg, op1, svdup_n_f64(op2))
+}
+#[doc = "Minimum number"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svminnm[_f64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fminnm))]
+pub fn svminnm_f64_x(pg: svbool_t, op1: svfloat64_t, op2: svfloat64_t) -> svfloat64_t {
+    svminnm_f64_m(pg, op1, op2)
+}
+#[doc = "Minimum number"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svminnm[_n_f64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fminnm))]
+pub fn svminnm_n_f64_x(pg: svbool_t, op1: svfloat64_t, op2: f64) -> svfloat64_t {
+    svminnm_f64_x(pg, op1, svdup_n_f64(op2))
+}
+#[doc = "Minimum number"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svminnm[_f64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fminnm))]
+pub fn svminnm_f64_z(pg: svbool_t, op1: svfloat64_t, op2: svfloat64_t) -> svfloat64_t {
+    svminnm_f64_m(pg, svsel_f64(pg, op1, svdup_n_f64(0.0)), op2)
+}
+#[doc = "Minimum number"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svminnm[_n_f64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fminnm))]
+pub fn svminnm_n_f64_z(pg: svbool_t, op1: svfloat64_t, op2: f64) -> svfloat64_t {
+    svminnm_f64_z(pg, op1, svdup_n_f64(op2))
+}
+#[doc = "Minimum number reduction to scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svminnmv[_f32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fminnmv))]
+pub fn svminnmv_f32(pg: svbool_t, op: svfloat32_t) -> f32 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.fminnmv.nxv4f32"
+        )]
+        fn _svminnmv_f32(pg: svbool4_t, op: svfloat32_t) -> f32;
+    }
+    unsafe { _svminnmv_f32(pg.sve_into(), op) }
+}
+#[doc = "Minimum number reduction to scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svminnmv[_f64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fminnmv))]
+pub fn svminnmv_f64(pg: svbool_t, op: svfloat64_t) -> f64 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.fminnmv.nxv2f64"
+        )]
+        fn _svminnmv_f64(pg: svbool2_t, op: svfloat64_t) -> f64;
+    }
+    unsafe { _svminnmv_f64(pg.sve_into(), op) }
+}
+#[doc = "Minimum reduction to scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svminv[_f32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fminv))]
+pub fn svminv_f32(pg: svbool_t, op: svfloat32_t) -> f32 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.fminv.nxv4f32")]
+        fn _svminv_f32(pg: svbool4_t, op: svfloat32_t) -> f32;
+    }
+    unsafe { _svminv_f32(pg.sve_into(), op) }
+}
+#[doc = "Minimum reduction to scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svminv[_f64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fminv))]
+pub fn svminv_f64(pg: svbool_t, op: svfloat64_t) -> f64 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.fminv.nxv2f64")]
+        fn _svminv_f64(pg: svbool2_t, op: svfloat64_t) -> f64;
+    }
+    unsafe { _svminv_f64(pg.sve_into(), op) }
+}
+#[doc = "Minimum reduction to scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svminv[_s8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sminv))]
+pub fn svminv_s8(pg: svbool_t, op: svint8_t) -> i8 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sminv.nxv16i8")]
+        fn _svminv_s8(pg: svbool_t, op: svint8_t) -> i8;
+    }
+    unsafe { _svminv_s8(pg, op) }
+}
+#[doc = "Minimum reduction to scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svminv[_s16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sminv))]
+pub fn svminv_s16(pg: svbool_t, op: svint16_t) -> i16 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sminv.nxv8i16")]
+        fn _svminv_s16(pg: svbool8_t, op: svint16_t) -> i16;
+    }
+    unsafe { _svminv_s16(pg.sve_into(), op) }
+}
+#[doc = "Minimum reduction to scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svminv[_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sminv))]
+pub fn svminv_s32(pg: svbool_t, op: svint32_t) -> i32 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sminv.nxv4i32")]
+        fn _svminv_s32(pg: svbool4_t, op: svint32_t) -> i32;
+    }
+    unsafe { _svminv_s32(pg.sve_into(), op) }
+}
+#[doc = "Minimum reduction to scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svminv[_s64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sminv))]
+pub fn svminv_s64(pg: svbool_t, op: svint64_t) -> i64 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sminv.nxv2i64")]
+        fn _svminv_s64(pg: svbool2_t, op: svint64_t) -> i64;
+    }
+    unsafe { _svminv_s64(pg.sve_into(), op) }
+}
+#[doc = "Minimum reduction to scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svminv[_u8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uminv))]
+pub fn svminv_u8(pg: svbool_t, op: svuint8_t) -> u8 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uminv.nxv16i8")]
+        fn _svminv_u8(pg: svbool_t, op: svint8_t) -> i8;
+    }
+    unsafe { _svminv_u8(pg, op.as_signed()).as_unsigned() }
+}
+#[doc = "Minimum reduction to scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svminv[_u16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uminv))]
+pub fn svminv_u16(pg: svbool_t, op: svuint16_t) -> u16 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uminv.nxv8i16")]
+        fn _svminv_u16(pg: svbool8_t, op: svint16_t) -> i16;
+    }
+    unsafe { _svminv_u16(pg.sve_into(), op.as_signed()).as_unsigned() }
+}
+#[doc = "Minimum reduction to scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svminv[_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uminv))]
+pub fn svminv_u32(pg: svbool_t, op: svuint32_t) -> u32 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uminv.nxv4i32")]
+        fn _svminv_u32(pg: svbool4_t, op: svint32_t) -> i32;
+    }
+    unsafe { _svminv_u32(pg.sve_into(), op.as_signed()).as_unsigned() }
+}
+#[doc = "Minimum reduction to scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svminv[_u64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uminv))]
+pub fn svminv_u64(pg: svbool_t, op: svuint64_t) -> u64 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uminv.nxv2i64")]
+        fn _svminv_u64(pg: svbool2_t, op: svint64_t) -> i64;
+    }
+    unsafe { _svminv_u64(pg.sve_into(), op.as_signed()).as_unsigned() }
+}
+#[doc = "Multiply-add, addend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmla[_f32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmla))]
+pub fn svmla_f32_m(
+    pg: svbool_t,
+    op1: svfloat32_t,
+    op2: svfloat32_t,
+    op3: svfloat32_t,
+) -> svfloat32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.fmla.nxv4f32")]
+        fn _svmla_f32_m(
+            pg: svbool4_t,
+            op1: svfloat32_t,
+            op2: svfloat32_t,
+            op3: svfloat32_t,
+        ) -> svfloat32_t;
+    }
+    unsafe { _svmla_f32_m(pg.sve_into(), op1, op2, op3) }
+}
+#[doc = "Multiply-add, addend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmla[_n_f32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmla))]
+pub fn svmla_n_f32_m(pg: svbool_t, op1: svfloat32_t, op2: svfloat32_t, op3: f32) -> svfloat32_t {
+    svmla_f32_m(pg, op1, op2, svdup_n_f32(op3))
+}
+#[doc = "Multiply-add, addend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmla[_f32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmla))]
+pub fn svmla_f32_x(
+    pg: svbool_t,
+    op1: svfloat32_t,
+    op2: svfloat32_t,
+    op3: svfloat32_t,
+) -> svfloat32_t {
+    svmla_f32_m(pg, op1, op2, op3)
+}
+#[doc = "Multiply-add, addend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmla[_n_f32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmla))]
+pub fn svmla_n_f32_x(pg: svbool_t, op1: svfloat32_t, op2: svfloat32_t, op3: f32) -> svfloat32_t {
+    svmla_f32_x(pg, op1, op2, svdup_n_f32(op3))
+}
+#[doc = "Multiply-add, addend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmla[_f32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmla))]
+pub fn svmla_f32_z(
+    pg: svbool_t,
+    op1: svfloat32_t,
+    op2: svfloat32_t,
+    op3: svfloat32_t,
+) -> svfloat32_t {
+    svmla_f32_m(pg, svsel_f32(pg, op1, svdup_n_f32(0.0)), op2, op3)
+}
+#[doc = "Multiply-add, addend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmla[_n_f32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmla))]
+pub fn svmla_n_f32_z(pg: svbool_t, op1: svfloat32_t, op2: svfloat32_t, op3: f32) -> svfloat32_t {
+    svmla_f32_z(pg, op1, op2, svdup_n_f32(op3))
+}
+#[doc = "Multiply-add, addend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmla[_f64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmla))]
+pub fn svmla_f64_m(
+    pg: svbool_t,
+    op1: svfloat64_t,
+    op2: svfloat64_t,
+    op3: svfloat64_t,
+) -> svfloat64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.fmla.nxv2f64")]
+        fn _svmla_f64_m(
+            pg: svbool2_t,
+            op1: svfloat64_t,
+            op2: svfloat64_t,
+            op3: svfloat64_t,
+        ) -> svfloat64_t;
+    }
+    unsafe { _svmla_f64_m(pg.sve_into(), op1, op2, op3) }
+}
+#[doc = "Multiply-add, addend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmla[_n_f64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmla))]
+pub fn svmla_n_f64_m(pg: svbool_t, op1: svfloat64_t, op2: svfloat64_t, op3: f64) -> svfloat64_t {
+    svmla_f64_m(pg, op1, op2, svdup_n_f64(op3))
+}
+#[doc = "Multiply-add, addend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmla[_f64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmla))]
+pub fn svmla_f64_x(
+    pg: svbool_t,
+    op1: svfloat64_t,
+    op2: svfloat64_t,
+    op3: svfloat64_t,
+) -> svfloat64_t {
+    svmla_f64_m(pg, op1, op2, op3)
+}
+#[doc = "Multiply-add, addend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmla[_n_f64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmla))]
+pub fn svmla_n_f64_x(pg: svbool_t, op1: svfloat64_t, op2: svfloat64_t, op3: f64) -> svfloat64_t {
+    svmla_f64_x(pg, op1, op2, svdup_n_f64(op3))
+}
+#[doc = "Multiply-add, addend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmla[_f64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmla))]
+pub fn svmla_f64_z(
+    pg: svbool_t,
+    op1: svfloat64_t,
+    op2: svfloat64_t,
+    op3: svfloat64_t,
+) -> svfloat64_t {
+    svmla_f64_m(pg, svsel_f64(pg, op1, svdup_n_f64(0.0)), op2, op3)
+}
+#[doc = "Multiply-add, addend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmla[_n_f64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmla))]
+pub fn svmla_n_f64_z(pg: svbool_t, op1: svfloat64_t, op2: svfloat64_t, op3: f64) -> svfloat64_t {
+    svmla_f64_z(pg, op1, op2, svdup_n_f64(op3))
+}
+#[doc = "Multiply-add, addend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmla[_s8]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mla))]
+pub fn svmla_s8_m(pg: svbool_t, op1: svint8_t, op2: svint8_t, op3: svint8_t) -> svint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.mla.nxv16i8")]
+        fn _svmla_s8_m(pg: svbool_t, op1: svint8_t, op2: svint8_t, op3: svint8_t) -> svint8_t;
+    }
+    unsafe { _svmla_s8_m(pg, op1, op2, op3) }
+}
+#[doc = "Multiply-add, addend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmla[_n_s8]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mla))]
+pub fn svmla_n_s8_m(pg: svbool_t, op1: svint8_t, op2: svint8_t, op3: i8) -> svint8_t {
+    svmla_s8_m(pg, op1, op2, svdup_n_s8(op3))
+}
+#[doc = "Multiply-add, addend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmla[_s8]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mla))]
+pub fn svmla_s8_x(pg: svbool_t, op1: svint8_t, op2: svint8_t, op3: svint8_t) -> svint8_t {
+    svmla_s8_m(pg, op1, op2, op3)
+}
+#[doc = "Multiply-add, addend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmla[_n_s8]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mla))]
+pub fn svmla_n_s8_x(pg: svbool_t, op1: svint8_t, op2: svint8_t, op3: i8) -> svint8_t {
+    svmla_s8_x(pg, op1, op2, svdup_n_s8(op3))
+}
+#[doc = "Multiply-add, addend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmla[_s8]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mla))]
+pub fn svmla_s8_z(pg: svbool_t, op1: svint8_t, op2: svint8_t, op3: svint8_t) -> svint8_t {
+    svmla_s8_m(pg, svsel_s8(pg, op1, svdup_n_s8(0)), op2, op3)
+}
+#[doc = "Multiply-add, addend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmla[_n_s8]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mla))]
+pub fn svmla_n_s8_z(pg: svbool_t, op1: svint8_t, op2: svint8_t, op3: i8) -> svint8_t {
+    svmla_s8_z(pg, op1, op2, svdup_n_s8(op3))
+}
+#[doc = "Multiply-add, addend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmla[_s16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mla))]
+pub fn svmla_s16_m(pg: svbool_t, op1: svint16_t, op2: svint16_t, op3: svint16_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.mla.nxv8i16")]
+        fn _svmla_s16_m(pg: svbool8_t, op1: svint16_t, op2: svint16_t, op3: svint16_t)
+            -> svint16_t;
+    }
+    unsafe { _svmla_s16_m(pg.sve_into(), op1, op2, op3) }
+}
+#[doc = "Multiply-add, addend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmla[_n_s16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mla))]
+pub fn svmla_n_s16_m(pg: svbool_t, op1: svint16_t, op2: svint16_t, op3: i16) -> svint16_t {
+    svmla_s16_m(pg, op1, op2, svdup_n_s16(op3))
+}
+#[doc = "Multiply-add, addend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmla[_s16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mla))]
+pub fn svmla_s16_x(pg: svbool_t, op1: svint16_t, op2: svint16_t, op3: svint16_t) -> svint16_t {
+    svmla_s16_m(pg, op1, op2, op3)
+}
+#[doc = "Multiply-add, addend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmla[_n_s16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mla))]
+pub fn svmla_n_s16_x(pg: svbool_t, op1: svint16_t, op2: svint16_t, op3: i16) -> svint16_t {
+    svmla_s16_x(pg, op1, op2, svdup_n_s16(op3))
+}
+#[doc = "Multiply-add, addend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmla[_s16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mla))]
+pub fn svmla_s16_z(pg: svbool_t, op1: svint16_t, op2: svint16_t, op3: svint16_t) -> svint16_t {
+    svmla_s16_m(pg, svsel_s16(pg, op1, svdup_n_s16(0)), op2, op3)
+}
+#[doc = "Multiply-add, addend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmla[_n_s16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mla))]
+pub fn svmla_n_s16_z(pg: svbool_t, op1: svint16_t, op2: svint16_t, op3: i16) -> svint16_t {
+    svmla_s16_z(pg, op1, op2, svdup_n_s16(op3))
+}
+#[doc = "Multiply-add, addend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmla[_s32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mla))]
+pub fn svmla_s32_m(pg: svbool_t, op1: svint32_t, op2: svint32_t, op3: svint32_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.mla.nxv4i32")]
+        fn _svmla_s32_m(pg: svbool4_t, op1: svint32_t, op2: svint32_t, op3: svint32_t)
+            -> svint32_t;
+    }
+    unsafe { _svmla_s32_m(pg.sve_into(), op1, op2, op3) }
+}
+#[doc = "Multiply-add, addend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmla[_n_s32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mla))]
+pub fn svmla_n_s32_m(pg: svbool_t, op1: svint32_t, op2: svint32_t, op3: i32) -> svint32_t {
+    svmla_s32_m(pg, op1, op2, svdup_n_s32(op3))
+}
+#[doc = "Multiply-add, addend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmla[_s32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mla))]
+pub fn svmla_s32_x(pg: svbool_t, op1: svint32_t, op2: svint32_t, op3: svint32_t) -> svint32_t {
+    svmla_s32_m(pg, op1, op2, op3)
+}
+#[doc = "Multiply-add, addend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmla[_n_s32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mla))]
+pub fn svmla_n_s32_x(pg: svbool_t, op1: svint32_t, op2: svint32_t, op3: i32) -> svint32_t {
+    svmla_s32_x(pg, op1, op2, svdup_n_s32(op3))
+}
+#[doc = "Multiply-add, addend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmla[_s32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mla))]
+pub fn svmla_s32_z(pg: svbool_t, op1: svint32_t, op2: svint32_t, op3: svint32_t) -> svint32_t {
+    svmla_s32_m(pg, svsel_s32(pg, op1, svdup_n_s32(0)), op2, op3)
+}
+#[doc = "Multiply-add, addend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmla[_n_s32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mla))]
+pub fn svmla_n_s32_z(pg: svbool_t, op1: svint32_t, op2: svint32_t, op3: i32) -> svint32_t {
+    svmla_s32_z(pg, op1, op2, svdup_n_s32(op3))
+}
+#[doc = "Multiply-add, addend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmla[_s64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mla))]
+pub fn svmla_s64_m(pg: svbool_t, op1: svint64_t, op2: svint64_t, op3: svint64_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.mla.nxv2i64")]
+        fn _svmla_s64_m(pg: svbool2_t, op1: svint64_t, op2: svint64_t, op3: svint64_t)
+            -> svint64_t;
+    }
+    unsafe { _svmla_s64_m(pg.sve_into(), op1, op2, op3) }
+}
+#[doc = "Multiply-add, addend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmla[_n_s64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mla))]
+pub fn svmla_n_s64_m(pg: svbool_t, op1: svint64_t, op2: svint64_t, op3: i64) -> svint64_t {
+    svmla_s64_m(pg, op1, op2, svdup_n_s64(op3))
+}
+#[doc = "Multiply-add, addend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmla[_s64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mla))]
+pub fn svmla_s64_x(pg: svbool_t, op1: svint64_t, op2: svint64_t, op3: svint64_t) -> svint64_t {
+    svmla_s64_m(pg, op1, op2, op3)
+}
+#[doc = "Multiply-add, addend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmla[_n_s64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mla))]
+pub fn svmla_n_s64_x(pg: svbool_t, op1: svint64_t, op2: svint64_t, op3: i64) -> svint64_t {
+    svmla_s64_x(pg, op1, op2, svdup_n_s64(op3))
+}
+#[doc = "Multiply-add, addend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmla[_s64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mla))]
+pub fn svmla_s64_z(pg: svbool_t, op1: svint64_t, op2: svint64_t, op3: svint64_t) -> svint64_t {
+    svmla_s64_m(pg, svsel_s64(pg, op1, svdup_n_s64(0)), op2, op3)
+}
+#[doc = "Multiply-add, addend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmla[_n_s64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mla))]
+pub fn svmla_n_s64_z(pg: svbool_t, op1: svint64_t, op2: svint64_t, op3: i64) -> svint64_t {
+    svmla_s64_z(pg, op1, op2, svdup_n_s64(op3))
+}
+#[doc = "Multiply-add, addend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmla[_u8]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mla))]
+pub fn svmla_u8_m(pg: svbool_t, op1: svuint8_t, op2: svuint8_t, op3: svuint8_t) -> svuint8_t {
+    unsafe { svmla_s8_m(pg, op1.as_signed(), op2.as_signed(), op3.as_signed()).as_unsigned() }
+}
+#[doc = "Multiply-add, addend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmla[_n_u8]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mla))]
+pub fn svmla_n_u8_m(pg: svbool_t, op1: svuint8_t, op2: svuint8_t, op3: u8) -> svuint8_t {
+    svmla_u8_m(pg, op1, op2, svdup_n_u8(op3))
+}
+#[doc = "Multiply-add, addend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmla[_u8]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mla))]
+pub fn svmla_u8_x(pg: svbool_t, op1: svuint8_t, op2: svuint8_t, op3: svuint8_t) -> svuint8_t {
+    svmla_u8_m(pg, op1, op2, op3)
+}
+#[doc = "Multiply-add, addend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmla[_n_u8]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mla))]
+pub fn svmla_n_u8_x(pg: svbool_t, op1: svuint8_t, op2: svuint8_t, op3: u8) -> svuint8_t {
+    svmla_u8_x(pg, op1, op2, svdup_n_u8(op3))
+}
+#[doc = "Multiply-add, addend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmla[_u8]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mla))]
+pub fn svmla_u8_z(pg: svbool_t, op1: svuint8_t, op2: svuint8_t, op3: svuint8_t) -> svuint8_t {
+    svmla_u8_m(pg, svsel_u8(pg, op1, svdup_n_u8(0)), op2, op3)
+}
+#[doc = "Multiply-add, addend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmla[_n_u8]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mla))]
+pub fn svmla_n_u8_z(pg: svbool_t, op1: svuint8_t, op2: svuint8_t, op3: u8) -> svuint8_t {
+    svmla_u8_z(pg, op1, op2, svdup_n_u8(op3))
+}
+#[doc = "Multiply-add, addend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmla[_u16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mla))]
+pub fn svmla_u16_m(pg: svbool_t, op1: svuint16_t, op2: svuint16_t, op3: svuint16_t) -> svuint16_t {
+    unsafe { svmla_s16_m(pg, op1.as_signed(), op2.as_signed(), op3.as_signed()).as_unsigned() }
+}
+#[doc = "Multiply-add, addend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmla[_n_u16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mla))]
+pub fn svmla_n_u16_m(pg: svbool_t, op1: svuint16_t, op2: svuint16_t, op3: u16) -> svuint16_t {
+    svmla_u16_m(pg, op1, op2, svdup_n_u16(op3))
+}
+#[doc = "Multiply-add, addend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmla[_u16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mla))]
+pub fn svmla_u16_x(pg: svbool_t, op1: svuint16_t, op2: svuint16_t, op3: svuint16_t) -> svuint16_t {
+    svmla_u16_m(pg, op1, op2, op3)
+}
+#[doc = "Multiply-add, addend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmla[_n_u16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mla))]
+pub fn svmla_n_u16_x(pg: svbool_t, op1: svuint16_t, op2: svuint16_t, op3: u16) -> svuint16_t {
+    svmla_u16_x(pg, op1, op2, svdup_n_u16(op3))
+}
+#[doc = "Multiply-add, addend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmla[_u16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mla))]
+pub fn svmla_u16_z(pg: svbool_t, op1: svuint16_t, op2: svuint16_t, op3: svuint16_t) -> svuint16_t {
+    svmla_u16_m(pg, svsel_u16(pg, op1, svdup_n_u16(0)), op2, op3)
+}
+#[doc = "Multiply-add, addend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmla[_n_u16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mla))]
+pub fn svmla_n_u16_z(pg: svbool_t, op1: svuint16_t, op2: svuint16_t, op3: u16) -> svuint16_t {
+    svmla_u16_z(pg, op1, op2, svdup_n_u16(op3))
+}
+#[doc = "Multiply-add, addend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmla[_u32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mla))]
+pub fn svmla_u32_m(pg: svbool_t, op1: svuint32_t, op2: svuint32_t, op3: svuint32_t) -> svuint32_t {
+    unsafe { svmla_s32_m(pg, op1.as_signed(), op2.as_signed(), op3.as_signed()).as_unsigned() }
+}
+#[doc = "Multiply-add, addend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmla[_n_u32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mla))]
+pub fn svmla_n_u32_m(pg: svbool_t, op1: svuint32_t, op2: svuint32_t, op3: u32) -> svuint32_t {
+    svmla_u32_m(pg, op1, op2, svdup_n_u32(op3))
+}
+#[doc = "Multiply-add, addend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmla[_u32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mla))]
+pub fn svmla_u32_x(pg: svbool_t, op1: svuint32_t, op2: svuint32_t, op3: svuint32_t) -> svuint32_t {
+    svmla_u32_m(pg, op1, op2, op3)
+}
+#[doc = "Multiply-add, addend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmla[_n_u32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mla))]
+pub fn svmla_n_u32_x(pg: svbool_t, op1: svuint32_t, op2: svuint32_t, op3: u32) -> svuint32_t {
+    svmla_u32_x(pg, op1, op2, svdup_n_u32(op3))
+}
+#[doc = "Multiply-add, addend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmla[_u32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mla))]
+pub fn svmla_u32_z(pg: svbool_t, op1: svuint32_t, op2: svuint32_t, op3: svuint32_t) -> svuint32_t {
+    svmla_u32_m(pg, svsel_u32(pg, op1, svdup_n_u32(0)), op2, op3)
+}
+#[doc = "Multiply-add, addend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmla[_n_u32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mla))]
+pub fn svmla_n_u32_z(pg: svbool_t, op1: svuint32_t, op2: svuint32_t, op3: u32) -> svuint32_t {
+    svmla_u32_z(pg, op1, op2, svdup_n_u32(op3))
+}
+#[doc = "Multiply-add, addend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmla[_u64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mla))]
+pub fn svmla_u64_m(pg: svbool_t, op1: svuint64_t, op2: svuint64_t, op3: svuint64_t) -> svuint64_t {
+    unsafe { svmla_s64_m(pg, op1.as_signed(), op2.as_signed(), op3.as_signed()).as_unsigned() }
+}
+#[doc = "Multiply-add, addend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmla[_n_u64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mla))]
+pub fn svmla_n_u64_m(pg: svbool_t, op1: svuint64_t, op2: svuint64_t, op3: u64) -> svuint64_t {
+    svmla_u64_m(pg, op1, op2, svdup_n_u64(op3))
+}
+#[doc = "Multiply-add, addend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmla[_u64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mla))]
+pub fn svmla_u64_x(pg: svbool_t, op1: svuint64_t, op2: svuint64_t, op3: svuint64_t) -> svuint64_t {
+    svmla_u64_m(pg, op1, op2, op3)
+}
+#[doc = "Multiply-add, addend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmla[_n_u64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mla))]
+pub fn svmla_n_u64_x(pg: svbool_t, op1: svuint64_t, op2: svuint64_t, op3: u64) -> svuint64_t {
+    svmla_u64_x(pg, op1, op2, svdup_n_u64(op3))
+}
+#[doc = "Multiply-add, addend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmla[_u64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mla))]
+pub fn svmla_u64_z(pg: svbool_t, op1: svuint64_t, op2: svuint64_t, op3: svuint64_t) -> svuint64_t {
+    svmla_u64_m(pg, svsel_u64(pg, op1, svdup_n_u64(0)), op2, op3)
+}
+#[doc = "Multiply-add, addend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmla[_n_u64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mla))]
+pub fn svmla_n_u64_z(pg: svbool_t, op1: svuint64_t, op2: svuint64_t, op3: u64) -> svuint64_t {
+    svmla_u64_z(pg, op1, op2, svdup_n_u64(op3))
+}
+#[doc = "Multiply-add, addend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmla_lane[_f32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmla, IMM_INDEX = 0))]
+pub fn svmla_lane_f32<const IMM_INDEX: i32>(
+    op1: svfloat32_t,
+    op2: svfloat32_t,
+    op3: svfloat32_t,
+) -> svfloat32_t {
+    static_assert_range!(IMM_INDEX, 0..=3);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.fmla.lane.nxv4f32"
+        )]
+        fn _svmla_lane_f32(
+            op1: svfloat32_t,
+            op2: svfloat32_t,
+            op3: svfloat32_t,
+            IMM_INDEX: i32,
+        ) -> svfloat32_t;
+    }
+    unsafe { _svmla_lane_f32(op1, op2, op3, IMM_INDEX) }
+}
+#[doc = "Multiply-add, addend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmla_lane[_f64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmla, IMM_INDEX = 0))]
+pub fn svmla_lane_f64<const IMM_INDEX: i32>(
+    op1: svfloat64_t,
+    op2: svfloat64_t,
+    op3: svfloat64_t,
+) -> svfloat64_t {
+    static_assert_range!(IMM_INDEX, 0..=1);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.fmla.lane.nxv2f64"
+        )]
+        fn _svmla_lane_f64(
+            op1: svfloat64_t,
+            op2: svfloat64_t,
+            op3: svfloat64_t,
+            IMM_INDEX: i32,
+        ) -> svfloat64_t;
+    }
+    unsafe { _svmla_lane_f64(op1, op2, op3, IMM_INDEX) }
+}
+#[doc = "Multiply-subtract, minuend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmls[_f32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmls))]
+pub fn svmls_f32_m(
+    pg: svbool_t,
+    op1: svfloat32_t,
+    op2: svfloat32_t,
+    op3: svfloat32_t,
+) -> svfloat32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.fmls.nxv4f32")]
+        fn _svmls_f32_m(
+            pg: svbool4_t,
+            op1: svfloat32_t,
+            op2: svfloat32_t,
+            op3: svfloat32_t,
+        ) -> svfloat32_t;
+    }
+    unsafe { _svmls_f32_m(pg.sve_into(), op1, op2, op3) }
+}
+#[doc = "Multiply-subtract, minuend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmls[_n_f32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmls))]
+pub fn svmls_n_f32_m(pg: svbool_t, op1: svfloat32_t, op2: svfloat32_t, op3: f32) -> svfloat32_t {
+    svmls_f32_m(pg, op1, op2, svdup_n_f32(op3))
+}
+#[doc = "Multiply-subtract, minuend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmls[_f32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmls))]
+pub fn svmls_f32_x(
+    pg: svbool_t,
+    op1: svfloat32_t,
+    op2: svfloat32_t,
+    op3: svfloat32_t,
+) -> svfloat32_t {
+    svmls_f32_m(pg, op1, op2, op3)
+}
+#[doc = "Multiply-subtract, minuend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmls[_n_f32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmls))]
+pub fn svmls_n_f32_x(pg: svbool_t, op1: svfloat32_t, op2: svfloat32_t, op3: f32) -> svfloat32_t {
+    svmls_f32_x(pg, op1, op2, svdup_n_f32(op3))
+}
+#[doc = "Multiply-subtract, minuend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmls[_f32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmls))]
+pub fn svmls_f32_z(
+    pg: svbool_t,
+    op1: svfloat32_t,
+    op2: svfloat32_t,
+    op3: svfloat32_t,
+) -> svfloat32_t {
+    svmls_f32_m(pg, svsel_f32(pg, op1, svdup_n_f32(0.0)), op2, op3)
+}
+#[doc = "Multiply-subtract, minuend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmls[_n_f32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmls))]
+pub fn svmls_n_f32_z(pg: svbool_t, op1: svfloat32_t, op2: svfloat32_t, op3: f32) -> svfloat32_t {
+    svmls_f32_z(pg, op1, op2, svdup_n_f32(op3))
+}
+#[doc = "Multiply-subtract, minuend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmls[_f64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmls))]
+pub fn svmls_f64_m(
+    pg: svbool_t,
+    op1: svfloat64_t,
+    op2: svfloat64_t,
+    op3: svfloat64_t,
+) -> svfloat64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.fmls.nxv2f64")]
+        fn _svmls_f64_m(
+            pg: svbool2_t,
+            op1: svfloat64_t,
+            op2: svfloat64_t,
+            op3: svfloat64_t,
+        ) -> svfloat64_t;
+    }
+    unsafe { _svmls_f64_m(pg.sve_into(), op1, op2, op3) }
+}
+#[doc = "Multiply-subtract, minuend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmls[_n_f64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmls))]
+pub fn svmls_n_f64_m(pg: svbool_t, op1: svfloat64_t, op2: svfloat64_t, op3: f64) -> svfloat64_t {
+    svmls_f64_m(pg, op1, op2, svdup_n_f64(op3))
+}
+#[doc = "Multiply-subtract, minuend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmls[_f64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmls))]
+pub fn svmls_f64_x(
+    pg: svbool_t,
+    op1: svfloat64_t,
+    op2: svfloat64_t,
+    op3: svfloat64_t,
+) -> svfloat64_t {
+    svmls_f64_m(pg, op1, op2, op3)
+}
+#[doc = "Multiply-subtract, minuend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmls[_n_f64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmls))]
+pub fn svmls_n_f64_x(pg: svbool_t, op1: svfloat64_t, op2: svfloat64_t, op3: f64) -> svfloat64_t {
+    svmls_f64_x(pg, op1, op2, svdup_n_f64(op3))
+}
+#[doc = "Multiply-subtract, minuend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmls[_f64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmls))]
+pub fn svmls_f64_z(
+    pg: svbool_t,
+    op1: svfloat64_t,
+    op2: svfloat64_t,
+    op3: svfloat64_t,
+) -> svfloat64_t {
+    svmls_f64_m(pg, svsel_f64(pg, op1, svdup_n_f64(0.0)), op2, op3)
+}
+#[doc = "Multiply-subtract, minuend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmls[_n_f64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmls))]
+pub fn svmls_n_f64_z(pg: svbool_t, op1: svfloat64_t, op2: svfloat64_t, op3: f64) -> svfloat64_t {
+    svmls_f64_z(pg, op1, op2, svdup_n_f64(op3))
+}
+#[doc = "Multiply-subtract, minuend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmls[_s8]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mls))]
+pub fn svmls_s8_m(pg: svbool_t, op1: svint8_t, op2: svint8_t, op3: svint8_t) -> svint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.mls.nxv16i8")]
+        fn _svmls_s8_m(pg: svbool_t, op1: svint8_t, op2: svint8_t, op3: svint8_t) -> svint8_t;
+    }
+    unsafe { _svmls_s8_m(pg, op1, op2, op3) }
+}
+#[doc = "Multiply-subtract, minuend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmls[_n_s8]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mls))]
+pub fn svmls_n_s8_m(pg: svbool_t, op1: svint8_t, op2: svint8_t, op3: i8) -> svint8_t {
+    svmls_s8_m(pg, op1, op2, svdup_n_s8(op3))
+}
+#[doc = "Multiply-subtract, minuend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmls[_s8]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mls))]
+pub fn svmls_s8_x(pg: svbool_t, op1: svint8_t, op2: svint8_t, op3: svint8_t) -> svint8_t {
+    svmls_s8_m(pg, op1, op2, op3)
+}
+#[doc = "Multiply-subtract, minuend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmls[_n_s8]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mls))]
+pub fn svmls_n_s8_x(pg: svbool_t, op1: svint8_t, op2: svint8_t, op3: i8) -> svint8_t {
+    svmls_s8_x(pg, op1, op2, svdup_n_s8(op3))
+}
+#[doc = "Multiply-subtract, minuend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmls[_s8]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mls))]
+pub fn svmls_s8_z(pg: svbool_t, op1: svint8_t, op2: svint8_t, op3: svint8_t) -> svint8_t {
+    svmls_s8_m(pg, svsel_s8(pg, op1, svdup_n_s8(0)), op2, op3)
+}
+#[doc = "Multiply-subtract, minuend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmls[_n_s8]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mls))]
+pub fn svmls_n_s8_z(pg: svbool_t, op1: svint8_t, op2: svint8_t, op3: i8) -> svint8_t {
+    svmls_s8_z(pg, op1, op2, svdup_n_s8(op3))
+}
+#[doc = "Multiply-subtract, minuend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmls[_s16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mls))]
+pub fn svmls_s16_m(pg: svbool_t, op1: svint16_t, op2: svint16_t, op3: svint16_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.mls.nxv8i16")]
+        fn _svmls_s16_m(pg: svbool8_t, op1: svint16_t, op2: svint16_t, op3: svint16_t)
+            -> svint16_t;
+    }
+    unsafe { _svmls_s16_m(pg.sve_into(), op1, op2, op3) }
+}
+#[doc = "Multiply-subtract, minuend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmls[_n_s16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mls))]
+pub fn svmls_n_s16_m(pg: svbool_t, op1: svint16_t, op2: svint16_t, op3: i16) -> svint16_t {
+    svmls_s16_m(pg, op1, op2, svdup_n_s16(op3))
+}
+#[doc = "Multiply-subtract, minuend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmls[_s16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mls))]
+pub fn svmls_s16_x(pg: svbool_t, op1: svint16_t, op2: svint16_t, op3: svint16_t) -> svint16_t {
+    svmls_s16_m(pg, op1, op2, op3)
+}
+#[doc = "Multiply-subtract, minuend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmls[_n_s16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mls))]
+pub fn svmls_n_s16_x(pg: svbool_t, op1: svint16_t, op2: svint16_t, op3: i16) -> svint16_t {
+    svmls_s16_x(pg, op1, op2, svdup_n_s16(op3))
+}
+#[doc = "Multiply-subtract, minuend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmls[_s16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mls))]
+pub fn svmls_s16_z(pg: svbool_t, op1: svint16_t, op2: svint16_t, op3: svint16_t) -> svint16_t {
+    svmls_s16_m(pg, svsel_s16(pg, op1, svdup_n_s16(0)), op2, op3)
+}
+#[doc = "Multiply-subtract, minuend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmls[_n_s16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mls))]
+pub fn svmls_n_s16_z(pg: svbool_t, op1: svint16_t, op2: svint16_t, op3: i16) -> svint16_t {
+    svmls_s16_z(pg, op1, op2, svdup_n_s16(op3))
+}
+#[doc = "Multiply-subtract, minuend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmls[_s32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mls))]
+pub fn svmls_s32_m(pg: svbool_t, op1: svint32_t, op2: svint32_t, op3: svint32_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.mls.nxv4i32")]
+        fn _svmls_s32_m(pg: svbool4_t, op1: svint32_t, op2: svint32_t, op3: svint32_t)
+            -> svint32_t;
+    }
+    unsafe { _svmls_s32_m(pg.sve_into(), op1, op2, op3) }
+}
+#[doc = "Multiply-subtract, minuend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmls[_n_s32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mls))]
+pub fn svmls_n_s32_m(pg: svbool_t, op1: svint32_t, op2: svint32_t, op3: i32) -> svint32_t {
+    svmls_s32_m(pg, op1, op2, svdup_n_s32(op3))
+}
+#[doc = "Multiply-subtract, minuend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmls[_s32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mls))]
+pub fn svmls_s32_x(pg: svbool_t, op1: svint32_t, op2: svint32_t, op3: svint32_t) -> svint32_t {
+    svmls_s32_m(pg, op1, op2, op3)
+}
+#[doc = "Multiply-subtract, minuend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmls[_n_s32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mls))]
+pub fn svmls_n_s32_x(pg: svbool_t, op1: svint32_t, op2: svint32_t, op3: i32) -> svint32_t {
+    svmls_s32_x(pg, op1, op2, svdup_n_s32(op3))
+}
+#[doc = "Multiply-subtract, minuend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmls[_s32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mls))]
+pub fn svmls_s32_z(pg: svbool_t, op1: svint32_t, op2: svint32_t, op3: svint32_t) -> svint32_t {
+    svmls_s32_m(pg, svsel_s32(pg, op1, svdup_n_s32(0)), op2, op3)
+}
+#[doc = "Multiply-subtract, minuend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmls[_n_s32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mls))]
+pub fn svmls_n_s32_z(pg: svbool_t, op1: svint32_t, op2: svint32_t, op3: i32) -> svint32_t {
+    svmls_s32_z(pg, op1, op2, svdup_n_s32(op3))
+}
+#[doc = "Multiply-subtract, minuend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmls[_s64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mls))]
+pub fn svmls_s64_m(pg: svbool_t, op1: svint64_t, op2: svint64_t, op3: svint64_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.mls.nxv2i64")]
+        fn _svmls_s64_m(pg: svbool2_t, op1: svint64_t, op2: svint64_t, op3: svint64_t)
+            -> svint64_t;
+    }
+    unsafe { _svmls_s64_m(pg.sve_into(), op1, op2, op3) }
+}
+#[doc = "Multiply-subtract, minuend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmls[_n_s64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mls))]
+pub fn svmls_n_s64_m(pg: svbool_t, op1: svint64_t, op2: svint64_t, op3: i64) -> svint64_t {
+    svmls_s64_m(pg, op1, op2, svdup_n_s64(op3))
+}
+#[doc = "Multiply-subtract, minuend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmls[_s64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mls))]
+pub fn svmls_s64_x(pg: svbool_t, op1: svint64_t, op2: svint64_t, op3: svint64_t) -> svint64_t {
+    svmls_s64_m(pg, op1, op2, op3)
+}
+#[doc = "Multiply-subtract, minuend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmls[_n_s64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mls))]
+pub fn svmls_n_s64_x(pg: svbool_t, op1: svint64_t, op2: svint64_t, op3: i64) -> svint64_t {
+    svmls_s64_x(pg, op1, op2, svdup_n_s64(op3))
+}
+#[doc = "Multiply-subtract, minuend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmls[_s64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mls))]
+pub fn svmls_s64_z(pg: svbool_t, op1: svint64_t, op2: svint64_t, op3: svint64_t) -> svint64_t {
+    svmls_s64_m(pg, svsel_s64(pg, op1, svdup_n_s64(0)), op2, op3)
+}
+#[doc = "Multiply-subtract, minuend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmls[_n_s64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mls))]
+pub fn svmls_n_s64_z(pg: svbool_t, op1: svint64_t, op2: svint64_t, op3: i64) -> svint64_t {
+    svmls_s64_z(pg, op1, op2, svdup_n_s64(op3))
+}
+#[doc = "Multiply-subtract, minuend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmls[_u8]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mls))]
+pub fn svmls_u8_m(pg: svbool_t, op1: svuint8_t, op2: svuint8_t, op3: svuint8_t) -> svuint8_t {
+    unsafe { svmls_s8_m(pg, op1.as_signed(), op2.as_signed(), op3.as_signed()).as_unsigned() }
+}
+#[doc = "Multiply-subtract, minuend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmls[_n_u8]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mls))]
+pub fn svmls_n_u8_m(pg: svbool_t, op1: svuint8_t, op2: svuint8_t, op3: u8) -> svuint8_t {
+    svmls_u8_m(pg, op1, op2, svdup_n_u8(op3))
+}
+#[doc = "Multiply-subtract, minuend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmls[_u8]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mls))]
+pub fn svmls_u8_x(pg: svbool_t, op1: svuint8_t, op2: svuint8_t, op3: svuint8_t) -> svuint8_t {
+    svmls_u8_m(pg, op1, op2, op3)
+}
+#[doc = "Multiply-subtract, minuend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmls[_n_u8]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mls))]
+pub fn svmls_n_u8_x(pg: svbool_t, op1: svuint8_t, op2: svuint8_t, op3: u8) -> svuint8_t {
+    svmls_u8_x(pg, op1, op2, svdup_n_u8(op3))
+}
+#[doc = "Multiply-subtract, minuend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmls[_u8]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mls))]
+pub fn svmls_u8_z(pg: svbool_t, op1: svuint8_t, op2: svuint8_t, op3: svuint8_t) -> svuint8_t {
+    svmls_u8_m(pg, svsel_u8(pg, op1, svdup_n_u8(0)), op2, op3)
+}
+#[doc = "Multiply-subtract, minuend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmls[_n_u8]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mls))]
+pub fn svmls_n_u8_z(pg: svbool_t, op1: svuint8_t, op2: svuint8_t, op3: u8) -> svuint8_t {
+    svmls_u8_z(pg, op1, op2, svdup_n_u8(op3))
+}
+#[doc = "Multiply-subtract, minuend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmls[_u16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mls))]
+pub fn svmls_u16_m(pg: svbool_t, op1: svuint16_t, op2: svuint16_t, op3: svuint16_t) -> svuint16_t {
+    unsafe { svmls_s16_m(pg, op1.as_signed(), op2.as_signed(), op3.as_signed()).as_unsigned() }
+}
+#[doc = "Multiply-subtract, minuend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmls[_n_u16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mls))]
+pub fn svmls_n_u16_m(pg: svbool_t, op1: svuint16_t, op2: svuint16_t, op3: u16) -> svuint16_t {
+    svmls_u16_m(pg, op1, op2, svdup_n_u16(op3))
+}
+#[doc = "Multiply-subtract, minuend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmls[_u16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mls))]
+pub fn svmls_u16_x(pg: svbool_t, op1: svuint16_t, op2: svuint16_t, op3: svuint16_t) -> svuint16_t {
+    svmls_u16_m(pg, op1, op2, op3)
+}
+#[doc = "Multiply-subtract, minuend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmls[_n_u16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mls))]
+pub fn svmls_n_u16_x(pg: svbool_t, op1: svuint16_t, op2: svuint16_t, op3: u16) -> svuint16_t {
+    svmls_u16_x(pg, op1, op2, svdup_n_u16(op3))
+}
+#[doc = "Multiply-subtract, minuend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmls[_u16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mls))]
+pub fn svmls_u16_z(pg: svbool_t, op1: svuint16_t, op2: svuint16_t, op3: svuint16_t) -> svuint16_t {
+    svmls_u16_m(pg, svsel_u16(pg, op1, svdup_n_u16(0)), op2, op3)
+}
+#[doc = "Multiply-subtract, minuend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmls[_n_u16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mls))]
+pub fn svmls_n_u16_z(pg: svbool_t, op1: svuint16_t, op2: svuint16_t, op3: u16) -> svuint16_t {
+    svmls_u16_z(pg, op1, op2, svdup_n_u16(op3))
+}
+#[doc = "Multiply-subtract, minuend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmls[_u32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mls))]
+pub fn svmls_u32_m(pg: svbool_t, op1: svuint32_t, op2: svuint32_t, op3: svuint32_t) -> svuint32_t {
+    unsafe { svmls_s32_m(pg, op1.as_signed(), op2.as_signed(), op3.as_signed()).as_unsigned() }
+}
+#[doc = "Multiply-subtract, minuend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmls[_n_u32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mls))]
+pub fn svmls_n_u32_m(pg: svbool_t, op1: svuint32_t, op2: svuint32_t, op3: u32) -> svuint32_t {
+    svmls_u32_m(pg, op1, op2, svdup_n_u32(op3))
+}
+#[doc = "Multiply-subtract, minuend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmls[_u32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mls))]
+pub fn svmls_u32_x(pg: svbool_t, op1: svuint32_t, op2: svuint32_t, op3: svuint32_t) -> svuint32_t {
+    svmls_u32_m(pg, op1, op2, op3)
+}
+#[doc = "Multiply-subtract, minuend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmls[_n_u32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mls))]
+pub fn svmls_n_u32_x(pg: svbool_t, op1: svuint32_t, op2: svuint32_t, op3: u32) -> svuint32_t {
+    svmls_u32_x(pg, op1, op2, svdup_n_u32(op3))
+}
+#[doc = "Multiply-subtract, minuend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmls[_u32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mls))]
+pub fn svmls_u32_z(pg: svbool_t, op1: svuint32_t, op2: svuint32_t, op3: svuint32_t) -> svuint32_t {
+    svmls_u32_m(pg, svsel_u32(pg, op1, svdup_n_u32(0)), op2, op3)
+}
+#[doc = "Multiply-subtract, minuend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmls[_n_u32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mls))]
+pub fn svmls_n_u32_z(pg: svbool_t, op1: svuint32_t, op2: svuint32_t, op3: u32) -> svuint32_t {
+    svmls_u32_z(pg, op1, op2, svdup_n_u32(op3))
+}
+#[doc = "Multiply-subtract, minuend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmls[_u64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mls))]
+pub fn svmls_u64_m(pg: svbool_t, op1: svuint64_t, op2: svuint64_t, op3: svuint64_t) -> svuint64_t {
+    unsafe { svmls_s64_m(pg, op1.as_signed(), op2.as_signed(), op3.as_signed()).as_unsigned() }
+}
+#[doc = "Multiply-subtract, minuend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmls[_n_u64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mls))]
+pub fn svmls_n_u64_m(pg: svbool_t, op1: svuint64_t, op2: svuint64_t, op3: u64) -> svuint64_t {
+    svmls_u64_m(pg, op1, op2, svdup_n_u64(op3))
+}
+#[doc = "Multiply-subtract, minuend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmls[_u64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mls))]
+pub fn svmls_u64_x(pg: svbool_t, op1: svuint64_t, op2: svuint64_t, op3: svuint64_t) -> svuint64_t {
+    svmls_u64_m(pg, op1, op2, op3)
+}
+#[doc = "Multiply-subtract, minuend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmls[_n_u64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mls))]
+pub fn svmls_n_u64_x(pg: svbool_t, op1: svuint64_t, op2: svuint64_t, op3: u64) -> svuint64_t {
+    svmls_u64_x(pg, op1, op2, svdup_n_u64(op3))
+}
+#[doc = "Multiply-subtract, minuend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmls[_u64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mls))]
+pub fn svmls_u64_z(pg: svbool_t, op1: svuint64_t, op2: svuint64_t, op3: svuint64_t) -> svuint64_t {
+    svmls_u64_m(pg, svsel_u64(pg, op1, svdup_n_u64(0)), op2, op3)
+}
+#[doc = "Multiply-subtract, minuend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmls[_n_u64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mls))]
+pub fn svmls_n_u64_z(pg: svbool_t, op1: svuint64_t, op2: svuint64_t, op3: u64) -> svuint64_t {
+    svmls_u64_z(pg, op1, op2, svdup_n_u64(op3))
+}
+#[doc = "Multiply-subtract, minuend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmls_lane[_f32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmls, IMM_INDEX = 0))]
+pub fn svmls_lane_f32<const IMM_INDEX: i32>(
+    op1: svfloat32_t,
+    op2: svfloat32_t,
+    op3: svfloat32_t,
+) -> svfloat32_t {
+    static_assert_range!(IMM_INDEX, 0..=3);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.fmls.lane.nxv4f32"
+        )]
+        fn _svmls_lane_f32(
+            op1: svfloat32_t,
+            op2: svfloat32_t,
+            op3: svfloat32_t,
+            IMM_INDEX: i32,
+        ) -> svfloat32_t;
+    }
+    unsafe { _svmls_lane_f32(op1, op2, op3, IMM_INDEX) }
+}
+#[doc = "Multiply-subtract, minuend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmls_lane[_f64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmls, IMM_INDEX = 0))]
+pub fn svmls_lane_f64<const IMM_INDEX: i32>(
+    op1: svfloat64_t,
+    op2: svfloat64_t,
+    op3: svfloat64_t,
+) -> svfloat64_t {
+    static_assert_range!(IMM_INDEX, 0..=1);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.fmls.lane.nxv2f64"
+        )]
+        fn _svmls_lane_f64(
+            op1: svfloat64_t,
+            op2: svfloat64_t,
+            op3: svfloat64_t,
+            IMM_INDEX: i32,
+        ) -> svfloat64_t;
+    }
+    unsafe { _svmls_lane_f64(op1, op2, op3, IMM_INDEX) }
+}
+#[doc = "Matrix multiply-accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmmla[_f32])"]
+#[inline]
+#[target_feature(enable = "sve,f32mm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmmla))]
+pub fn svmmla_f32(op1: svfloat32_t, op2: svfloat32_t, op3: svfloat32_t) -> svfloat32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.fmmla.nxv4f32")]
+        fn _svmmla_f32(op1: svfloat32_t, op2: svfloat32_t, op3: svfloat32_t) -> svfloat32_t;
+    }
+    unsafe { _svmmla_f32(op1, op2, op3) }
+}
+#[doc = "Matrix multiply-accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmmla[_f64])"]
+#[inline]
+#[target_feature(enable = "sve,f64mm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmmla))]
+pub fn svmmla_f64(op1: svfloat64_t, op2: svfloat64_t, op3: svfloat64_t) -> svfloat64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.fmmla.nxv2f64")]
+        fn _svmmla_f64(op1: svfloat64_t, op2: svfloat64_t, op3: svfloat64_t) -> svfloat64_t;
+    }
+    unsafe { _svmmla_f64(op1, op2, op3) }
+}
+#[doc = "Matrix multiply-accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmmla[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,i8mm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smmla))]
+pub fn svmmla_s32(op1: svint32_t, op2: svint8_t, op3: svint8_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.smmla.nxv4i32")]
+        fn _svmmla_s32(op1: svint32_t, op2: svint8_t, op3: svint8_t) -> svint32_t;
+    }
+    unsafe { _svmmla_s32(op1, op2, op3) }
+}
+#[doc = "Matrix multiply-accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmmla[_u32])"]
+#[inline]
+#[target_feature(enable = "sve,i8mm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ummla))]
+pub fn svmmla_u32(op1: svuint32_t, op2: svuint8_t, op3: svuint8_t) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ummla.nxv4i32")]
+        fn _svmmla_u32(op1: svint32_t, op2: svint8_t, op3: svint8_t) -> svint32_t;
+    }
+    unsafe { _svmmla_u32(op1.as_signed(), op2.as_signed(), op3.as_signed()).as_unsigned() }
+}
+#[doc = "Move"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmov[_b]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mov))]
+pub fn svmov_b_z(pg: svbool_t, op: svbool_t) -> svbool_t {
+    svand_b_z(pg, op, op)
+}
+#[doc = "Multiply-subtract, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmsb[_f32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmsb))]
+pub fn svmsb_f32_m(
+    pg: svbool_t,
+    op1: svfloat32_t,
+    op2: svfloat32_t,
+    op3: svfloat32_t,
+) -> svfloat32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.fmsb.nxv4f32")]
+        fn _svmsb_f32_m(
+            pg: svbool4_t,
+            op1: svfloat32_t,
+            op2: svfloat32_t,
+            op3: svfloat32_t,
+        ) -> svfloat32_t;
+    }
+    unsafe { _svmsb_f32_m(pg.sve_into(), op1, op2, op3) }
+}
+#[doc = "Multiply-subtract, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmsb[_n_f32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmsb))]
+pub fn svmsb_n_f32_m(pg: svbool_t, op1: svfloat32_t, op2: svfloat32_t, op3: f32) -> svfloat32_t {
+    svmsb_f32_m(pg, op1, op2, svdup_n_f32(op3))
+}
+#[doc = "Multiply-subtract, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmsb[_f32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmsb))]
+pub fn svmsb_f32_x(
+    pg: svbool_t,
+    op1: svfloat32_t,
+    op2: svfloat32_t,
+    op3: svfloat32_t,
+) -> svfloat32_t {
+    svmsb_f32_m(pg, op1, op2, op3)
+}
+#[doc = "Multiply-subtract, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmsb[_n_f32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmsb))]
+pub fn svmsb_n_f32_x(pg: svbool_t, op1: svfloat32_t, op2: svfloat32_t, op3: f32) -> svfloat32_t {
+    svmsb_f32_x(pg, op1, op2, svdup_n_f32(op3))
+}
+#[doc = "Multiply-subtract, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmsb[_f32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmsb))]
+pub fn svmsb_f32_z(
+    pg: svbool_t,
+    op1: svfloat32_t,
+    op2: svfloat32_t,
+    op3: svfloat32_t,
+) -> svfloat32_t {
+    svmsb_f32_m(pg, svsel_f32(pg, op1, svdup_n_f32(0.0)), op2, op3)
+}
+#[doc = "Multiply-subtract, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmsb[_n_f32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmsb))]
+pub fn svmsb_n_f32_z(pg: svbool_t, op1: svfloat32_t, op2: svfloat32_t, op3: f32) -> svfloat32_t {
+    svmsb_f32_z(pg, op1, op2, svdup_n_f32(op3))
+}
+#[doc = "Multiply-subtract, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmsb[_f64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmsb))]
+pub fn svmsb_f64_m(
+    pg: svbool_t,
+    op1: svfloat64_t,
+    op2: svfloat64_t,
+    op3: svfloat64_t,
+) -> svfloat64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.fmsb.nxv2f64")]
+        fn _svmsb_f64_m(
+            pg: svbool2_t,
+            op1: svfloat64_t,
+            op2: svfloat64_t,
+            op3: svfloat64_t,
+        ) -> svfloat64_t;
+    }
+    unsafe { _svmsb_f64_m(pg.sve_into(), op1, op2, op3) }
+}
+#[doc = "Multiply-subtract, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmsb[_n_f64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmsb))]
+pub fn svmsb_n_f64_m(pg: svbool_t, op1: svfloat64_t, op2: svfloat64_t, op3: f64) -> svfloat64_t {
+    svmsb_f64_m(pg, op1, op2, svdup_n_f64(op3))
+}
+#[doc = "Multiply-subtract, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmsb[_f64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmsb))]
+pub fn svmsb_f64_x(
+    pg: svbool_t,
+    op1: svfloat64_t,
+    op2: svfloat64_t,
+    op3: svfloat64_t,
+) -> svfloat64_t {
+    svmsb_f64_m(pg, op1, op2, op3)
+}
+#[doc = "Multiply-subtract, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmsb[_n_f64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmsb))]
+pub fn svmsb_n_f64_x(pg: svbool_t, op1: svfloat64_t, op2: svfloat64_t, op3: f64) -> svfloat64_t {
+    svmsb_f64_x(pg, op1, op2, svdup_n_f64(op3))
+}
+#[doc = "Multiply-subtract, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmsb[_f64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmsb))]
+pub fn svmsb_f64_z(
+    pg: svbool_t,
+    op1: svfloat64_t,
+    op2: svfloat64_t,
+    op3: svfloat64_t,
+) -> svfloat64_t {
+    svmsb_f64_m(pg, svsel_f64(pg, op1, svdup_n_f64(0.0)), op2, op3)
+}
+#[doc = "Multiply-subtract, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmsb[_n_f64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmsb))]
+pub fn svmsb_n_f64_z(pg: svbool_t, op1: svfloat64_t, op2: svfloat64_t, op3: f64) -> svfloat64_t {
+    svmsb_f64_z(pg, op1, op2, svdup_n_f64(op3))
+}
+#[doc = "Multiply-subtract, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmsb[_s8]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(msb))]
+pub fn svmsb_s8_m(pg: svbool_t, op1: svint8_t, op2: svint8_t, op3: svint8_t) -> svint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.msb.nxv16i8")]
+        fn _svmsb_s8_m(pg: svbool_t, op1: svint8_t, op2: svint8_t, op3: svint8_t) -> svint8_t;
+    }
+    unsafe { _svmsb_s8_m(pg, op1, op2, op3) }
+}
+#[doc = "Multiply-subtract, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmsb[_n_s8]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(msb))]
+pub fn svmsb_n_s8_m(pg: svbool_t, op1: svint8_t, op2: svint8_t, op3: i8) -> svint8_t {
+    svmsb_s8_m(pg, op1, op2, svdup_n_s8(op3))
+}
+#[doc = "Multiply-subtract, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmsb[_s8]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(msb))]
+pub fn svmsb_s8_x(pg: svbool_t, op1: svint8_t, op2: svint8_t, op3: svint8_t) -> svint8_t {
+    svmsb_s8_m(pg, op1, op2, op3)
+}
+#[doc = "Multiply-subtract, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmsb[_n_s8]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(msb))]
+pub fn svmsb_n_s8_x(pg: svbool_t, op1: svint8_t, op2: svint8_t, op3: i8) -> svint8_t {
+    svmsb_s8_x(pg, op1, op2, svdup_n_s8(op3))
+}
+#[doc = "Multiply-subtract, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmsb[_s8]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(msb))]
+pub fn svmsb_s8_z(pg: svbool_t, op1: svint8_t, op2: svint8_t, op3: svint8_t) -> svint8_t {
+    svmsb_s8_m(pg, svsel_s8(pg, op1, svdup_n_s8(0)), op2, op3)
+}
+#[doc = "Multiply-subtract, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmsb[_n_s8]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(msb))]
+pub fn svmsb_n_s8_z(pg: svbool_t, op1: svint8_t, op2: svint8_t, op3: i8) -> svint8_t {
+    svmsb_s8_z(pg, op1, op2, svdup_n_s8(op3))
+}
+#[doc = "Multiply-subtract, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmsb[_s16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(msb))]
+pub fn svmsb_s16_m(pg: svbool_t, op1: svint16_t, op2: svint16_t, op3: svint16_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.msb.nxv8i16")]
+        fn _svmsb_s16_m(pg: svbool8_t, op1: svint16_t, op2: svint16_t, op3: svint16_t)
+            -> svint16_t;
+    }
+    unsafe { _svmsb_s16_m(pg.sve_into(), op1, op2, op3) }
+}
+#[doc = "Multiply-subtract, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmsb[_n_s16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(msb))]
+pub fn svmsb_n_s16_m(pg: svbool_t, op1: svint16_t, op2: svint16_t, op3: i16) -> svint16_t {
+    svmsb_s16_m(pg, op1, op2, svdup_n_s16(op3))
+}
+#[doc = "Multiply-subtract, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmsb[_s16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(msb))]
+pub fn svmsb_s16_x(pg: svbool_t, op1: svint16_t, op2: svint16_t, op3: svint16_t) -> svint16_t {
+    svmsb_s16_m(pg, op1, op2, op3)
+}
+#[doc = "Multiply-subtract, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmsb[_n_s16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(msb))]
+pub fn svmsb_n_s16_x(pg: svbool_t, op1: svint16_t, op2: svint16_t, op3: i16) -> svint16_t {
+    svmsb_s16_x(pg, op1, op2, svdup_n_s16(op3))
+}
+#[doc = "Multiply-subtract, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmsb[_s16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(msb))]
+pub fn svmsb_s16_z(pg: svbool_t, op1: svint16_t, op2: svint16_t, op3: svint16_t) -> svint16_t {
+    svmsb_s16_m(pg, svsel_s16(pg, op1, svdup_n_s16(0)), op2, op3)
+}
+#[doc = "Multiply-subtract, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmsb[_n_s16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(msb))]
+pub fn svmsb_n_s16_z(pg: svbool_t, op1: svint16_t, op2: svint16_t, op3: i16) -> svint16_t {
+    svmsb_s16_z(pg, op1, op2, svdup_n_s16(op3))
+}
+#[doc = "Multiply-subtract, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmsb[_s32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(msb))]
+pub fn svmsb_s32_m(pg: svbool_t, op1: svint32_t, op2: svint32_t, op3: svint32_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.msb.nxv4i32")]
+        fn _svmsb_s32_m(pg: svbool4_t, op1: svint32_t, op2: svint32_t, op3: svint32_t)
+            -> svint32_t;
+    }
+    unsafe { _svmsb_s32_m(pg.sve_into(), op1, op2, op3) }
+}
+#[doc = "Multiply-subtract, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmsb[_n_s32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(msb))]
+pub fn svmsb_n_s32_m(pg: svbool_t, op1: svint32_t, op2: svint32_t, op3: i32) -> svint32_t {
+    svmsb_s32_m(pg, op1, op2, svdup_n_s32(op3))
+}
+#[doc = "Multiply-subtract, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmsb[_s32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(msb))]
+pub fn svmsb_s32_x(pg: svbool_t, op1: svint32_t, op2: svint32_t, op3: svint32_t) -> svint32_t {
+    svmsb_s32_m(pg, op1, op2, op3)
+}
+#[doc = "Multiply-subtract, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmsb[_n_s32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(msb))]
+pub fn svmsb_n_s32_x(pg: svbool_t, op1: svint32_t, op2: svint32_t, op3: i32) -> svint32_t {
+    svmsb_s32_x(pg, op1, op2, svdup_n_s32(op3))
+}
+#[doc = "Multiply-subtract, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmsb[_s32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(msb))]
+pub fn svmsb_s32_z(pg: svbool_t, op1: svint32_t, op2: svint32_t, op3: svint32_t) -> svint32_t {
+    svmsb_s32_m(pg, svsel_s32(pg, op1, svdup_n_s32(0)), op2, op3)
+}
+#[doc = "Multiply-subtract, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmsb[_n_s32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(msb))]
+pub fn svmsb_n_s32_z(pg: svbool_t, op1: svint32_t, op2: svint32_t, op3: i32) -> svint32_t {
+    svmsb_s32_z(pg, op1, op2, svdup_n_s32(op3))
+}
+#[doc = "Multiply-subtract, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmsb[_s64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(msb))]
+pub fn svmsb_s64_m(pg: svbool_t, op1: svint64_t, op2: svint64_t, op3: svint64_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.msb.nxv2i64")]
+        fn _svmsb_s64_m(pg: svbool2_t, op1: svint64_t, op2: svint64_t, op3: svint64_t)
+            -> svint64_t;
+    }
+    unsafe { _svmsb_s64_m(pg.sve_into(), op1, op2, op3) }
+}
+#[doc = "Multiply-subtract, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmsb[_n_s64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(msb))]
+pub fn svmsb_n_s64_m(pg: svbool_t, op1: svint64_t, op2: svint64_t, op3: i64) -> svint64_t {
+    svmsb_s64_m(pg, op1, op2, svdup_n_s64(op3))
+}
+#[doc = "Multiply-subtract, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmsb[_s64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(msb))]
+pub fn svmsb_s64_x(pg: svbool_t, op1: svint64_t, op2: svint64_t, op3: svint64_t) -> svint64_t {
+    svmsb_s64_m(pg, op1, op2, op3)
+}
+#[doc = "Multiply-subtract, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmsb[_n_s64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(msb))]
+pub fn svmsb_n_s64_x(pg: svbool_t, op1: svint64_t, op2: svint64_t, op3: i64) -> svint64_t {
+    svmsb_s64_x(pg, op1, op2, svdup_n_s64(op3))
+}
+#[doc = "Multiply-subtract, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmsb[_s64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(msb))]
+pub fn svmsb_s64_z(pg: svbool_t, op1: svint64_t, op2: svint64_t, op3: svint64_t) -> svint64_t {
+    svmsb_s64_m(pg, svsel_s64(pg, op1, svdup_n_s64(0)), op2, op3)
+}
+#[doc = "Multiply-subtract, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmsb[_n_s64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(msb))]
+pub fn svmsb_n_s64_z(pg: svbool_t, op1: svint64_t, op2: svint64_t, op3: i64) -> svint64_t {
+    svmsb_s64_z(pg, op1, op2, svdup_n_s64(op3))
+}
+#[doc = "Multiply-subtract, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmsb[_u8]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(msb))]
+pub fn svmsb_u8_m(pg: svbool_t, op1: svuint8_t, op2: svuint8_t, op3: svuint8_t) -> svuint8_t {
+    unsafe { svmsb_s8_m(pg, op1.as_signed(), op2.as_signed(), op3.as_signed()).as_unsigned() }
+}
+#[doc = "Multiply-subtract, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmsb[_n_u8]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(msb))]
+pub fn svmsb_n_u8_m(pg: svbool_t, op1: svuint8_t, op2: svuint8_t, op3: u8) -> svuint8_t {
+    svmsb_u8_m(pg, op1, op2, svdup_n_u8(op3))
+}
+#[doc = "Multiply-subtract, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmsb[_u8]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(msb))]
+pub fn svmsb_u8_x(pg: svbool_t, op1: svuint8_t, op2: svuint8_t, op3: svuint8_t) -> svuint8_t {
+    svmsb_u8_m(pg, op1, op2, op3)
+}
+#[doc = "Multiply-subtract, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmsb[_n_u8]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(msb))]
+pub fn svmsb_n_u8_x(pg: svbool_t, op1: svuint8_t, op2: svuint8_t, op3: u8) -> svuint8_t {
+    svmsb_u8_x(pg, op1, op2, svdup_n_u8(op3))
+}
+#[doc = "Multiply-subtract, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmsb[_u8]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(msb))]
+pub fn svmsb_u8_z(pg: svbool_t, op1: svuint8_t, op2: svuint8_t, op3: svuint8_t) -> svuint8_t {
+    svmsb_u8_m(pg, svsel_u8(pg, op1, svdup_n_u8(0)), op2, op3)
+}
+#[doc = "Multiply-subtract, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmsb[_n_u8]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(msb))]
+pub fn svmsb_n_u8_z(pg: svbool_t, op1: svuint8_t, op2: svuint8_t, op3: u8) -> svuint8_t {
+    svmsb_u8_z(pg, op1, op2, svdup_n_u8(op3))
+}
+#[doc = "Multiply-subtract, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmsb[_u16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(msb))]
+pub fn svmsb_u16_m(pg: svbool_t, op1: svuint16_t, op2: svuint16_t, op3: svuint16_t) -> svuint16_t {
+    unsafe { svmsb_s16_m(pg, op1.as_signed(), op2.as_signed(), op3.as_signed()).as_unsigned() }
+}
+#[doc = "Multiply-subtract, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmsb[_n_u16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(msb))]
+pub fn svmsb_n_u16_m(pg: svbool_t, op1: svuint16_t, op2: svuint16_t, op3: u16) -> svuint16_t {
+    svmsb_u16_m(pg, op1, op2, svdup_n_u16(op3))
+}
+#[doc = "Multiply-subtract, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmsb[_u16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(msb))]
+pub fn svmsb_u16_x(pg: svbool_t, op1: svuint16_t, op2: svuint16_t, op3: svuint16_t) -> svuint16_t {
+    svmsb_u16_m(pg, op1, op2, op3)
+}
+#[doc = "Multiply-subtract, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmsb[_n_u16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(msb))]
+pub fn svmsb_n_u16_x(pg: svbool_t, op1: svuint16_t, op2: svuint16_t, op3: u16) -> svuint16_t {
+    svmsb_u16_x(pg, op1, op2, svdup_n_u16(op3))
+}
+#[doc = "Multiply-subtract, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmsb[_u16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(msb))]
+pub fn svmsb_u16_z(pg: svbool_t, op1: svuint16_t, op2: svuint16_t, op3: svuint16_t) -> svuint16_t {
+    svmsb_u16_m(pg, svsel_u16(pg, op1, svdup_n_u16(0)), op2, op3)
+}
+#[doc = "Multiply-subtract, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmsb[_n_u16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(msb))]
+pub fn svmsb_n_u16_z(pg: svbool_t, op1: svuint16_t, op2: svuint16_t, op3: u16) -> svuint16_t {
+    svmsb_u16_z(pg, op1, op2, svdup_n_u16(op3))
+}
+#[doc = "Multiply-subtract, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmsb[_u32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(msb))]
+pub fn svmsb_u32_m(pg: svbool_t, op1: svuint32_t, op2: svuint32_t, op3: svuint32_t) -> svuint32_t {
+    unsafe { svmsb_s32_m(pg, op1.as_signed(), op2.as_signed(), op3.as_signed()).as_unsigned() }
+}
+#[doc = "Multiply-subtract, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmsb[_n_u32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(msb))]
+pub fn svmsb_n_u32_m(pg: svbool_t, op1: svuint32_t, op2: svuint32_t, op3: u32) -> svuint32_t {
+    svmsb_u32_m(pg, op1, op2, svdup_n_u32(op3))
+}
+#[doc = "Multiply-subtract, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmsb[_u32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(msb))]
+pub fn svmsb_u32_x(pg: svbool_t, op1: svuint32_t, op2: svuint32_t, op3: svuint32_t) -> svuint32_t {
+    svmsb_u32_m(pg, op1, op2, op3)
+}
+#[doc = "Multiply-subtract, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmsb[_n_u32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(msb))]
+pub fn svmsb_n_u32_x(pg: svbool_t, op1: svuint32_t, op2: svuint32_t, op3: u32) -> svuint32_t {
+    svmsb_u32_x(pg, op1, op2, svdup_n_u32(op3))
+}
+#[doc = "Multiply-subtract, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmsb[_u32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(msb))]
+pub fn svmsb_u32_z(pg: svbool_t, op1: svuint32_t, op2: svuint32_t, op3: svuint32_t) -> svuint32_t {
+    svmsb_u32_m(pg, svsel_u32(pg, op1, svdup_n_u32(0)), op2, op3)
+}
+#[doc = "Multiply-subtract, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmsb[_n_u32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(msb))]
+pub fn svmsb_n_u32_z(pg: svbool_t, op1: svuint32_t, op2: svuint32_t, op3: u32) -> svuint32_t {
+    svmsb_u32_z(pg, op1, op2, svdup_n_u32(op3))
+}
+#[doc = "Multiply-subtract, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmsb[_u64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(msb))]
+pub fn svmsb_u64_m(pg: svbool_t, op1: svuint64_t, op2: svuint64_t, op3: svuint64_t) -> svuint64_t {
+    unsafe { svmsb_s64_m(pg, op1.as_signed(), op2.as_signed(), op3.as_signed()).as_unsigned() }
+}
+#[doc = "Multiply-subtract, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmsb[_n_u64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(msb))]
+pub fn svmsb_n_u64_m(pg: svbool_t, op1: svuint64_t, op2: svuint64_t, op3: u64) -> svuint64_t {
+    svmsb_u64_m(pg, op1, op2, svdup_n_u64(op3))
+}
+#[doc = "Multiply-subtract, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmsb[_u64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(msb))]
+pub fn svmsb_u64_x(pg: svbool_t, op1: svuint64_t, op2: svuint64_t, op3: svuint64_t) -> svuint64_t {
+    svmsb_u64_m(pg, op1, op2, op3)
+}
+#[doc = "Multiply-subtract, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmsb[_n_u64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(msb))]
+pub fn svmsb_n_u64_x(pg: svbool_t, op1: svuint64_t, op2: svuint64_t, op3: u64) -> svuint64_t {
+    svmsb_u64_x(pg, op1, op2, svdup_n_u64(op3))
+}
+#[doc = "Multiply-subtract, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmsb[_u64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(msb))]
+pub fn svmsb_u64_z(pg: svbool_t, op1: svuint64_t, op2: svuint64_t, op3: svuint64_t) -> svuint64_t {
+    svmsb_u64_m(pg, svsel_u64(pg, op1, svdup_n_u64(0)), op2, op3)
+}
+#[doc = "Multiply-subtract, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmsb[_n_u64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(msb))]
+pub fn svmsb_n_u64_z(pg: svbool_t, op1: svuint64_t, op2: svuint64_t, op3: u64) -> svuint64_t {
+    svmsb_u64_z(pg, op1, op2, svdup_n_u64(op3))
+}
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmul[_f32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmul))]
+pub fn svmul_f32_m(pg: svbool_t, op1: svfloat32_t, op2: svfloat32_t) -> svfloat32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.fmul.nxv4f32")]
+        fn _svmul_f32_m(pg: svbool4_t, op1: svfloat32_t, op2: svfloat32_t) -> svfloat32_t;
+    }
+    unsafe { _svmul_f32_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmul[_n_f32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmul))]
+pub fn svmul_n_f32_m(pg: svbool_t, op1: svfloat32_t, op2: f32) -> svfloat32_t {
+    svmul_f32_m(pg, op1, svdup_n_f32(op2))
+}
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmul[_f32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmul))]
+pub fn svmul_f32_x(pg: svbool_t, op1: svfloat32_t, op2: svfloat32_t) -> svfloat32_t {
+    svmul_f32_m(pg, op1, op2)
+}
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmul[_n_f32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmul))]
+pub fn svmul_n_f32_x(pg: svbool_t, op1: svfloat32_t, op2: f32) -> svfloat32_t {
+    svmul_f32_x(pg, op1, svdup_n_f32(op2))
+}
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmul[_f32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmul))]
+pub fn svmul_f32_z(pg: svbool_t, op1: svfloat32_t, op2: svfloat32_t) -> svfloat32_t {
+    svmul_f32_m(pg, svsel_f32(pg, op1, svdup_n_f32(0.0)), op2)
+}
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmul[_n_f32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmul))]
+pub fn svmul_n_f32_z(pg: svbool_t, op1: svfloat32_t, op2: f32) -> svfloat32_t {
+    svmul_f32_z(pg, op1, svdup_n_f32(op2))
+}
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmul[_f64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmul))]
+pub fn svmul_f64_m(pg: svbool_t, op1: svfloat64_t, op2: svfloat64_t) -> svfloat64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.fmul.nxv2f64")]
+        fn _svmul_f64_m(pg: svbool2_t, op1: svfloat64_t, op2: svfloat64_t) -> svfloat64_t;
+    }
+    unsafe { _svmul_f64_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmul[_n_f64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmul))]
+pub fn svmul_n_f64_m(pg: svbool_t, op1: svfloat64_t, op2: f64) -> svfloat64_t {
+    svmul_f64_m(pg, op1, svdup_n_f64(op2))
+}
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmul[_f64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmul))]
+pub fn svmul_f64_x(pg: svbool_t, op1: svfloat64_t, op2: svfloat64_t) -> svfloat64_t {
+    svmul_f64_m(pg, op1, op2)
+}
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmul[_n_f64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmul))]
+pub fn svmul_n_f64_x(pg: svbool_t, op1: svfloat64_t, op2: f64) -> svfloat64_t {
+    svmul_f64_x(pg, op1, svdup_n_f64(op2))
+}
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmul[_f64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmul))]
+pub fn svmul_f64_z(pg: svbool_t, op1: svfloat64_t, op2: svfloat64_t) -> svfloat64_t {
+    svmul_f64_m(pg, svsel_f64(pg, op1, svdup_n_f64(0.0)), op2)
+}
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmul[_n_f64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmul))]
+pub fn svmul_n_f64_z(pg: svbool_t, op1: svfloat64_t, op2: f64) -> svfloat64_t {
+    svmul_f64_z(pg, op1, svdup_n_f64(op2))
+}
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmul[_s8]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mul))]
+pub fn svmul_s8_m(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.mul.nxv16i8")]
+        fn _svmul_s8_m(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t;
+    }
+    unsafe { _svmul_s8_m(pg, op1, op2) }
+}
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmul[_n_s8]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mul))]
+pub fn svmul_n_s8_m(pg: svbool_t, op1: svint8_t, op2: i8) -> svint8_t {
+    svmul_s8_m(pg, op1, svdup_n_s8(op2))
+}
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmul[_s8]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mul))]
+pub fn svmul_s8_x(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t {
+    svmul_s8_m(pg, op1, op2)
+}
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmul[_n_s8]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mul))]
+pub fn svmul_n_s8_x(pg: svbool_t, op1: svint8_t, op2: i8) -> svint8_t {
+    svmul_s8_x(pg, op1, svdup_n_s8(op2))
+}
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmul[_s8]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mul))]
+pub fn svmul_s8_z(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t {
+    svmul_s8_m(pg, svsel_s8(pg, op1, svdup_n_s8(0)), op2)
+}
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmul[_n_s8]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mul))]
+pub fn svmul_n_s8_z(pg: svbool_t, op1: svint8_t, op2: i8) -> svint8_t {
+    svmul_s8_z(pg, op1, svdup_n_s8(op2))
+}
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmul[_s16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mul))]
+pub fn svmul_s16_m(pg: svbool_t, op1: svint16_t, op2: svint16_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.mul.nxv8i16")]
+        fn _svmul_s16_m(pg: svbool8_t, op1: svint16_t, op2: svint16_t) -> svint16_t;
+    }
+    unsafe { _svmul_s16_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmul[_n_s16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mul))]
+pub fn svmul_n_s16_m(pg: svbool_t, op1: svint16_t, op2: i16) -> svint16_t {
+    svmul_s16_m(pg, op1, svdup_n_s16(op2))
+}
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmul[_s16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mul))]
+pub fn svmul_s16_x(pg: svbool_t, op1: svint16_t, op2: svint16_t) -> svint16_t {
+    svmul_s16_m(pg, op1, op2)
+}
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmul[_n_s16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mul))]
+pub fn svmul_n_s16_x(pg: svbool_t, op1: svint16_t, op2: i16) -> svint16_t {
+    svmul_s16_x(pg, op1, svdup_n_s16(op2))
+}
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmul[_s16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mul))]
+pub fn svmul_s16_z(pg: svbool_t, op1: svint16_t, op2: svint16_t) -> svint16_t {
+    svmul_s16_m(pg, svsel_s16(pg, op1, svdup_n_s16(0)), op2)
+}
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmul[_n_s16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mul))]
+pub fn svmul_n_s16_z(pg: svbool_t, op1: svint16_t, op2: i16) -> svint16_t {
+    svmul_s16_z(pg, op1, svdup_n_s16(op2))
+}
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmul[_s32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mul))]
+pub fn svmul_s32_m(pg: svbool_t, op1: svint32_t, op2: svint32_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.mul.nxv4i32")]
+        fn _svmul_s32_m(pg: svbool4_t, op1: svint32_t, op2: svint32_t) -> svint32_t;
+    }
+    unsafe { _svmul_s32_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmul[_n_s32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mul))]
+pub fn svmul_n_s32_m(pg: svbool_t, op1: svint32_t, op2: i32) -> svint32_t {
+    svmul_s32_m(pg, op1, svdup_n_s32(op2))
+}
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmul[_s32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mul))]
+pub fn svmul_s32_x(pg: svbool_t, op1: svint32_t, op2: svint32_t) -> svint32_t {
+    svmul_s32_m(pg, op1, op2)
+}
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmul[_n_s32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mul))]
+pub fn svmul_n_s32_x(pg: svbool_t, op1: svint32_t, op2: i32) -> svint32_t {
+    svmul_s32_x(pg, op1, svdup_n_s32(op2))
+}
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmul[_s32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mul))]
+pub fn svmul_s32_z(pg: svbool_t, op1: svint32_t, op2: svint32_t) -> svint32_t {
+    svmul_s32_m(pg, svsel_s32(pg, op1, svdup_n_s32(0)), op2)
+}
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmul[_n_s32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mul))]
+pub fn svmul_n_s32_z(pg: svbool_t, op1: svint32_t, op2: i32) -> svint32_t {
+    svmul_s32_z(pg, op1, svdup_n_s32(op2))
+}
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmul[_s64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mul))]
+pub fn svmul_s64_m(pg: svbool_t, op1: svint64_t, op2: svint64_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.mul.nxv2i64")]
+        fn _svmul_s64_m(pg: svbool2_t, op1: svint64_t, op2: svint64_t) -> svint64_t;
+    }
+    unsafe { _svmul_s64_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmul[_n_s64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mul))]
+pub fn svmul_n_s64_m(pg: svbool_t, op1: svint64_t, op2: i64) -> svint64_t {
+    svmul_s64_m(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmul[_s64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mul))]
+pub fn svmul_s64_x(pg: svbool_t, op1: svint64_t, op2: svint64_t) -> svint64_t {
+    svmul_s64_m(pg, op1, op2)
+}
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmul[_n_s64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mul))]
+pub fn svmul_n_s64_x(pg: svbool_t, op1: svint64_t, op2: i64) -> svint64_t {
+    svmul_s64_x(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmul[_s64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mul))]
+pub fn svmul_s64_z(pg: svbool_t, op1: svint64_t, op2: svint64_t) -> svint64_t {
+    svmul_s64_m(pg, svsel_s64(pg, op1, svdup_n_s64(0)), op2)
+}
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmul[_n_s64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mul))]
+pub fn svmul_n_s64_z(pg: svbool_t, op1: svint64_t, op2: i64) -> svint64_t {
+    svmul_s64_z(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmul[_u8]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mul))]
+pub fn svmul_u8_m(pg: svbool_t, op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    unsafe { svmul_s8_m(pg, op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmul[_n_u8]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mul))]
+pub fn svmul_n_u8_m(pg: svbool_t, op1: svuint8_t, op2: u8) -> svuint8_t {
+    svmul_u8_m(pg, op1, svdup_n_u8(op2))
+}
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmul[_u8]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mul))]
+pub fn svmul_u8_x(pg: svbool_t, op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    svmul_u8_m(pg, op1, op2)
+}
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmul[_n_u8]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mul))]
+pub fn svmul_n_u8_x(pg: svbool_t, op1: svuint8_t, op2: u8) -> svuint8_t {
+    svmul_u8_x(pg, op1, svdup_n_u8(op2))
+}
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmul[_u8]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mul))]
+pub fn svmul_u8_z(pg: svbool_t, op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    svmul_u8_m(pg, svsel_u8(pg, op1, svdup_n_u8(0)), op2)
+}
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmul[_n_u8]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mul))]
+pub fn svmul_n_u8_z(pg: svbool_t, op1: svuint8_t, op2: u8) -> svuint8_t {
+    svmul_u8_z(pg, op1, svdup_n_u8(op2))
+}
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmul[_u16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mul))]
+pub fn svmul_u16_m(pg: svbool_t, op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    unsafe { svmul_s16_m(pg, op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmul[_n_u16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mul))]
+pub fn svmul_n_u16_m(pg: svbool_t, op1: svuint16_t, op2: u16) -> svuint16_t {
+    svmul_u16_m(pg, op1, svdup_n_u16(op2))
+}
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmul[_u16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mul))]
+pub fn svmul_u16_x(pg: svbool_t, op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    svmul_u16_m(pg, op1, op2)
+}
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmul[_n_u16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mul))]
+pub fn svmul_n_u16_x(pg: svbool_t, op1: svuint16_t, op2: u16) -> svuint16_t {
+    svmul_u16_x(pg, op1, svdup_n_u16(op2))
+}
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmul[_u16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mul))]
+pub fn svmul_u16_z(pg: svbool_t, op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    svmul_u16_m(pg, svsel_u16(pg, op1, svdup_n_u16(0)), op2)
+}
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmul[_n_u16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mul))]
+pub fn svmul_n_u16_z(pg: svbool_t, op1: svuint16_t, op2: u16) -> svuint16_t {
+    svmul_u16_z(pg, op1, svdup_n_u16(op2))
+}
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmul[_u32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mul))]
+pub fn svmul_u32_m(pg: svbool_t, op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    unsafe { svmul_s32_m(pg, op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmul[_n_u32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mul))]
+pub fn svmul_n_u32_m(pg: svbool_t, op1: svuint32_t, op2: u32) -> svuint32_t {
+    svmul_u32_m(pg, op1, svdup_n_u32(op2))
+}
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmul[_u32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mul))]
+pub fn svmul_u32_x(pg: svbool_t, op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    svmul_u32_m(pg, op1, op2)
+}
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmul[_n_u32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mul))]
+pub fn svmul_n_u32_x(pg: svbool_t, op1: svuint32_t, op2: u32) -> svuint32_t {
+    svmul_u32_x(pg, op1, svdup_n_u32(op2))
+}
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmul[_u32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mul))]
+pub fn svmul_u32_z(pg: svbool_t, op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    svmul_u32_m(pg, svsel_u32(pg, op1, svdup_n_u32(0)), op2)
+}
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmul[_n_u32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mul))]
+pub fn svmul_n_u32_z(pg: svbool_t, op1: svuint32_t, op2: u32) -> svuint32_t {
+    svmul_u32_z(pg, op1, svdup_n_u32(op2))
+}
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmul[_u64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mul))]
+pub fn svmul_u64_m(pg: svbool_t, op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    unsafe { svmul_s64_m(pg, op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmul[_n_u64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mul))]
+pub fn svmul_n_u64_m(pg: svbool_t, op1: svuint64_t, op2: u64) -> svuint64_t {
+    svmul_u64_m(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmul[_u64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mul))]
+pub fn svmul_u64_x(pg: svbool_t, op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    svmul_u64_m(pg, op1, op2)
+}
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmul[_n_u64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mul))]
+pub fn svmul_n_u64_x(pg: svbool_t, op1: svuint64_t, op2: u64) -> svuint64_t {
+    svmul_u64_x(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmul[_u64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mul))]
+pub fn svmul_u64_z(pg: svbool_t, op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    svmul_u64_m(pg, svsel_u64(pg, op1, svdup_n_u64(0)), op2)
+}
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmul[_n_u64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mul))]
+pub fn svmul_n_u64_z(pg: svbool_t, op1: svuint64_t, op2: u64) -> svuint64_t {
+    svmul_u64_z(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Multiply, returning high-half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmulh[_s8]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smulh))]
+pub fn svmulh_s8_m(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.smulh.nxv16i8")]
+        fn _svmulh_s8_m(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t;
+    }
+    unsafe { _svmulh_s8_m(pg, op1, op2) }
+}
+#[doc = "Multiply, returning high-half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmulh[_n_s8]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smulh))]
+pub fn svmulh_n_s8_m(pg: svbool_t, op1: svint8_t, op2: i8) -> svint8_t {
+    svmulh_s8_m(pg, op1, svdup_n_s8(op2))
+}
+#[doc = "Multiply, returning high-half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmulh[_s8]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smulh))]
+pub fn svmulh_s8_x(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t {
+    svmulh_s8_m(pg, op1, op2)
+}
+#[doc = "Multiply, returning high-half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmulh[_n_s8]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smulh))]
+pub fn svmulh_n_s8_x(pg: svbool_t, op1: svint8_t, op2: i8) -> svint8_t {
+    svmulh_s8_x(pg, op1, svdup_n_s8(op2))
+}
+#[doc = "Multiply, returning high-half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmulh[_s8]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smulh))]
+pub fn svmulh_s8_z(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t {
+    svmulh_s8_m(pg, svsel_s8(pg, op1, svdup_n_s8(0)), op2)
+}
+#[doc = "Multiply, returning high-half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmulh[_n_s8]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smulh))]
+pub fn svmulh_n_s8_z(pg: svbool_t, op1: svint8_t, op2: i8) -> svint8_t {
+    svmulh_s8_z(pg, op1, svdup_n_s8(op2))
+}
+#[doc = "Multiply, returning high-half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmulh[_s16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smulh))]
+pub fn svmulh_s16_m(pg: svbool_t, op1: svint16_t, op2: svint16_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.smulh.nxv8i16")]
+        fn _svmulh_s16_m(pg: svbool8_t, op1: svint16_t, op2: svint16_t) -> svint16_t;
+    }
+    unsafe { _svmulh_s16_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Multiply, returning high-half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmulh[_n_s16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smulh))]
+pub fn svmulh_n_s16_m(pg: svbool_t, op1: svint16_t, op2: i16) -> svint16_t {
+    svmulh_s16_m(pg, op1, svdup_n_s16(op2))
+}
+#[doc = "Multiply, returning high-half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmulh[_s16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smulh))]
+pub fn svmulh_s16_x(pg: svbool_t, op1: svint16_t, op2: svint16_t) -> svint16_t {
+    svmulh_s16_m(pg, op1, op2)
+}
+#[doc = "Multiply, returning high-half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmulh[_n_s16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smulh))]
+pub fn svmulh_n_s16_x(pg: svbool_t, op1: svint16_t, op2: i16) -> svint16_t {
+    svmulh_s16_x(pg, op1, svdup_n_s16(op2))
+}
+#[doc = "Multiply, returning high-half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmulh[_s16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smulh))]
+pub fn svmulh_s16_z(pg: svbool_t, op1: svint16_t, op2: svint16_t) -> svint16_t {
+    svmulh_s16_m(pg, svsel_s16(pg, op1, svdup_n_s16(0)), op2)
+}
+#[doc = "Multiply, returning high-half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmulh[_n_s16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smulh))]
+pub fn svmulh_n_s16_z(pg: svbool_t, op1: svint16_t, op2: i16) -> svint16_t {
+    svmulh_s16_z(pg, op1, svdup_n_s16(op2))
+}
+#[doc = "Multiply, returning high-half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmulh[_s32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smulh))]
+pub fn svmulh_s32_m(pg: svbool_t, op1: svint32_t, op2: svint32_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.smulh.nxv4i32")]
+        fn _svmulh_s32_m(pg: svbool4_t, op1: svint32_t, op2: svint32_t) -> svint32_t;
+    }
+    unsafe { _svmulh_s32_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Multiply, returning high-half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmulh[_n_s32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smulh))]
+pub fn svmulh_n_s32_m(pg: svbool_t, op1: svint32_t, op2: i32) -> svint32_t {
+    svmulh_s32_m(pg, op1, svdup_n_s32(op2))
+}
+#[doc = "Multiply, returning high-half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmulh[_s32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smulh))]
+pub fn svmulh_s32_x(pg: svbool_t, op1: svint32_t, op2: svint32_t) -> svint32_t {
+    svmulh_s32_m(pg, op1, op2)
+}
+#[doc = "Multiply, returning high-half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmulh[_n_s32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smulh))]
+pub fn svmulh_n_s32_x(pg: svbool_t, op1: svint32_t, op2: i32) -> svint32_t {
+    svmulh_s32_x(pg, op1, svdup_n_s32(op2))
+}
+#[doc = "Multiply, returning high-half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmulh[_s32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smulh))]
+pub fn svmulh_s32_z(pg: svbool_t, op1: svint32_t, op2: svint32_t) -> svint32_t {
+    svmulh_s32_m(pg, svsel_s32(pg, op1, svdup_n_s32(0)), op2)
+}
+#[doc = "Multiply, returning high-half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmulh[_n_s32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smulh))]
+pub fn svmulh_n_s32_z(pg: svbool_t, op1: svint32_t, op2: i32) -> svint32_t {
+    svmulh_s32_z(pg, op1, svdup_n_s32(op2))
+}
+#[doc = "Multiply, returning high-half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmulh[_s64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smulh))]
+pub fn svmulh_s64_m(pg: svbool_t, op1: svint64_t, op2: svint64_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.smulh.nxv2i64")]
+        fn _svmulh_s64_m(pg: svbool2_t, op1: svint64_t, op2: svint64_t) -> svint64_t;
+    }
+    unsafe { _svmulh_s64_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Multiply, returning high-half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmulh[_n_s64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smulh))]
+pub fn svmulh_n_s64_m(pg: svbool_t, op1: svint64_t, op2: i64) -> svint64_t {
+    svmulh_s64_m(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Multiply, returning high-half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmulh[_s64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smulh))]
+pub fn svmulh_s64_x(pg: svbool_t, op1: svint64_t, op2: svint64_t) -> svint64_t {
+    svmulh_s64_m(pg, op1, op2)
+}
+#[doc = "Multiply, returning high-half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmulh[_n_s64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smulh))]
+pub fn svmulh_n_s64_x(pg: svbool_t, op1: svint64_t, op2: i64) -> svint64_t {
+    svmulh_s64_x(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Multiply, returning high-half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmulh[_s64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smulh))]
+pub fn svmulh_s64_z(pg: svbool_t, op1: svint64_t, op2: svint64_t) -> svint64_t {
+    svmulh_s64_m(pg, svsel_s64(pg, op1, svdup_n_s64(0)), op2)
+}
+#[doc = "Multiply, returning high-half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmulh[_n_s64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smulh))]
+pub fn svmulh_n_s64_z(pg: svbool_t, op1: svint64_t, op2: i64) -> svint64_t {
+    svmulh_s64_z(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Multiply, returning high-half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmulh[_u8]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umulh))]
+pub fn svmulh_u8_m(pg: svbool_t, op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.umulh.nxv16i8")]
+        fn _svmulh_u8_m(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t;
+    }
+    unsafe { _svmulh_u8_m(pg, op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Multiply, returning high-half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmulh[_n_u8]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umulh))]
+pub fn svmulh_n_u8_m(pg: svbool_t, op1: svuint8_t, op2: u8) -> svuint8_t {
+    svmulh_u8_m(pg, op1, svdup_n_u8(op2))
+}
+#[doc = "Multiply, returning high-half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmulh[_u8]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umulh))]
+pub fn svmulh_u8_x(pg: svbool_t, op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    svmulh_u8_m(pg, op1, op2)
+}
+#[doc = "Multiply, returning high-half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmulh[_n_u8]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umulh))]
+pub fn svmulh_n_u8_x(pg: svbool_t, op1: svuint8_t, op2: u8) -> svuint8_t {
+    svmulh_u8_x(pg, op1, svdup_n_u8(op2))
+}
+#[doc = "Multiply, returning high-half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmulh[_u8]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umulh))]
+pub fn svmulh_u8_z(pg: svbool_t, op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    svmulh_u8_m(pg, svsel_u8(pg, op1, svdup_n_u8(0)), op2)
+}
+#[doc = "Multiply, returning high-half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmulh[_n_u8]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umulh))]
+pub fn svmulh_n_u8_z(pg: svbool_t, op1: svuint8_t, op2: u8) -> svuint8_t {
+    svmulh_u8_z(pg, op1, svdup_n_u8(op2))
+}
+#[doc = "Multiply, returning high-half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmulh[_u16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umulh))]
+pub fn svmulh_u16_m(pg: svbool_t, op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.umulh.nxv8i16")]
+        fn _svmulh_u16_m(pg: svbool8_t, op1: svint16_t, op2: svint16_t) -> svint16_t;
+    }
+    unsafe { _svmulh_u16_m(pg.sve_into(), op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Multiply, returning high-half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmulh[_n_u16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umulh))]
+pub fn svmulh_n_u16_m(pg: svbool_t, op1: svuint16_t, op2: u16) -> svuint16_t {
+    svmulh_u16_m(pg, op1, svdup_n_u16(op2))
+}
+#[doc = "Multiply, returning high-half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmulh[_u16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umulh))]
+pub fn svmulh_u16_x(pg: svbool_t, op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    svmulh_u16_m(pg, op1, op2)
+}
+#[doc = "Multiply, returning high-half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmulh[_n_u16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umulh))]
+pub fn svmulh_n_u16_x(pg: svbool_t, op1: svuint16_t, op2: u16) -> svuint16_t {
+    svmulh_u16_x(pg, op1, svdup_n_u16(op2))
+}
+#[doc = "Multiply, returning high-half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmulh[_u16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umulh))]
+pub fn svmulh_u16_z(pg: svbool_t, op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    svmulh_u16_m(pg, svsel_u16(pg, op1, svdup_n_u16(0)), op2)
+}
+#[doc = "Multiply, returning high-half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmulh[_n_u16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umulh))]
+pub fn svmulh_n_u16_z(pg: svbool_t, op1: svuint16_t, op2: u16) -> svuint16_t {
+    svmulh_u16_z(pg, op1, svdup_n_u16(op2))
+}
+#[doc = "Multiply, returning high-half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmulh[_u32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umulh))]
+pub fn svmulh_u32_m(pg: svbool_t, op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.umulh.nxv4i32")]
+        fn _svmulh_u32_m(pg: svbool4_t, op1: svint32_t, op2: svint32_t) -> svint32_t;
+    }
+    unsafe { _svmulh_u32_m(pg.sve_into(), op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Multiply, returning high-half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmulh[_n_u32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umulh))]
+pub fn svmulh_n_u32_m(pg: svbool_t, op1: svuint32_t, op2: u32) -> svuint32_t {
+    svmulh_u32_m(pg, op1, svdup_n_u32(op2))
+}
+#[doc = "Multiply, returning high-half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmulh[_u32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umulh))]
+pub fn svmulh_u32_x(pg: svbool_t, op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    svmulh_u32_m(pg, op1, op2)
+}
+#[doc = "Multiply, returning high-half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmulh[_n_u32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umulh))]
+pub fn svmulh_n_u32_x(pg: svbool_t, op1: svuint32_t, op2: u32) -> svuint32_t {
+    svmulh_u32_x(pg, op1, svdup_n_u32(op2))
+}
+#[doc = "Multiply, returning high-half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmulh[_u32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umulh))]
+pub fn svmulh_u32_z(pg: svbool_t, op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    svmulh_u32_m(pg, svsel_u32(pg, op1, svdup_n_u32(0)), op2)
+}
+#[doc = "Multiply, returning high-half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmulh[_n_u32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umulh))]
+pub fn svmulh_n_u32_z(pg: svbool_t, op1: svuint32_t, op2: u32) -> svuint32_t {
+    svmulh_u32_z(pg, op1, svdup_n_u32(op2))
+}
+#[doc = "Multiply, returning high-half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmulh[_u64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umulh))]
+pub fn svmulh_u64_m(pg: svbool_t, op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.umulh.nxv2i64")]
+        fn _svmulh_u64_m(pg: svbool2_t, op1: svint64_t, op2: svint64_t) -> svint64_t;
+    }
+    unsafe { _svmulh_u64_m(pg.sve_into(), op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Multiply, returning high-half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmulh[_n_u64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umulh))]
+pub fn svmulh_n_u64_m(pg: svbool_t, op1: svuint64_t, op2: u64) -> svuint64_t {
+    svmulh_u64_m(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Multiply, returning high-half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmulh[_u64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umulh))]
+pub fn svmulh_u64_x(pg: svbool_t, op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    svmulh_u64_m(pg, op1, op2)
+}
+#[doc = "Multiply, returning high-half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmulh[_n_u64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umulh))]
+pub fn svmulh_n_u64_x(pg: svbool_t, op1: svuint64_t, op2: u64) -> svuint64_t {
+    svmulh_u64_x(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Multiply, returning high-half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmulh[_u64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umulh))]
+pub fn svmulh_u64_z(pg: svbool_t, op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    svmulh_u64_m(pg, svsel_u64(pg, op1, svdup_n_u64(0)), op2)
+}
+#[doc = "Multiply, returning high-half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmulh[_n_u64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umulh))]
+pub fn svmulh_n_u64_z(pg: svbool_t, op1: svuint64_t, op2: u64) -> svuint64_t {
+    svmulh_u64_z(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Multiply extended (∞×0=2)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmulx[_f32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmulx))]
+pub fn svmulx_f32_m(pg: svbool_t, op1: svfloat32_t, op2: svfloat32_t) -> svfloat32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.fmulx.nxv4f32")]
+        fn _svmulx_f32_m(pg: svbool4_t, op1: svfloat32_t, op2: svfloat32_t) -> svfloat32_t;
+    }
+    unsafe { _svmulx_f32_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Multiply extended (∞×0=2)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmulx[_n_f32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmulx))]
+pub fn svmulx_n_f32_m(pg: svbool_t, op1: svfloat32_t, op2: f32) -> svfloat32_t {
+    svmulx_f32_m(pg, op1, svdup_n_f32(op2))
+}
+#[doc = "Multiply extended (∞×0=2)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmulx[_f32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmulx))]
+pub fn svmulx_f32_x(pg: svbool_t, op1: svfloat32_t, op2: svfloat32_t) -> svfloat32_t {
+    svmulx_f32_m(pg, op1, op2)
+}
+#[doc = "Multiply extended (∞×0=2)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmulx[_n_f32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmulx))]
+pub fn svmulx_n_f32_x(pg: svbool_t, op1: svfloat32_t, op2: f32) -> svfloat32_t {
+    svmulx_f32_x(pg, op1, svdup_n_f32(op2))
+}
+#[doc = "Multiply extended (∞×0=2)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmulx[_f32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmulx))]
+pub fn svmulx_f32_z(pg: svbool_t, op1: svfloat32_t, op2: svfloat32_t) -> svfloat32_t {
+    svmulx_f32_m(pg, svsel_f32(pg, op1, svdup_n_f32(0.0)), op2)
+}
+#[doc = "Multiply extended (∞×0=2)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmulx[_n_f32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmulx))]
+pub fn svmulx_n_f32_z(pg: svbool_t, op1: svfloat32_t, op2: f32) -> svfloat32_t {
+    svmulx_f32_z(pg, op1, svdup_n_f32(op2))
+}
+#[doc = "Multiply extended (∞×0=2)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmulx[_f64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmulx))]
+pub fn svmulx_f64_m(pg: svbool_t, op1: svfloat64_t, op2: svfloat64_t) -> svfloat64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.fmulx.nxv2f64")]
+        fn _svmulx_f64_m(pg: svbool2_t, op1: svfloat64_t, op2: svfloat64_t) -> svfloat64_t;
+    }
+    unsafe { _svmulx_f64_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Multiply extended (∞×0=2)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmulx[_n_f64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmulx))]
+pub fn svmulx_n_f64_m(pg: svbool_t, op1: svfloat64_t, op2: f64) -> svfloat64_t {
+    svmulx_f64_m(pg, op1, svdup_n_f64(op2))
+}
+#[doc = "Multiply extended (∞×0=2)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmulx[_f64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmulx))]
+pub fn svmulx_f64_x(pg: svbool_t, op1: svfloat64_t, op2: svfloat64_t) -> svfloat64_t {
+    svmulx_f64_m(pg, op1, op2)
+}
+#[doc = "Multiply extended (∞×0=2)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmulx[_n_f64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmulx))]
+pub fn svmulx_n_f64_x(pg: svbool_t, op1: svfloat64_t, op2: f64) -> svfloat64_t {
+    svmulx_f64_x(pg, op1, svdup_n_f64(op2))
+}
+#[doc = "Multiply extended (∞×0=2)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmulx[_f64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmulx))]
+pub fn svmulx_f64_z(pg: svbool_t, op1: svfloat64_t, op2: svfloat64_t) -> svfloat64_t {
+    svmulx_f64_m(pg, svsel_f64(pg, op1, svdup_n_f64(0.0)), op2)
+}
+#[doc = "Multiply extended (∞×0=2)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmulx[_n_f64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmulx))]
+pub fn svmulx_n_f64_z(pg: svbool_t, op1: svfloat64_t, op2: f64) -> svfloat64_t {
+    svmulx_f64_z(pg, op1, svdup_n_f64(op2))
+}
+#[doc = "Bitwise NAND"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svnand[_b]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(nand))]
+pub fn svnand_b_z(pg: svbool_t, op1: svbool_t, op2: svbool_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.nand.z.nxv16i1")]
+        fn _svnand_b_z(pg: svbool_t, op1: svbool_t, op2: svbool_t) -> svbool_t;
+    }
+    unsafe { _svnand_b_z(pg, op1, op2) }
+}
+#[doc = "Negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svneg[_f32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fneg))]
+pub fn svneg_f32_m(inactive: svfloat32_t, pg: svbool_t, op: svfloat32_t) -> svfloat32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.fneg.nxv4f32")]
+        fn _svneg_f32_m(inactive: svfloat32_t, pg: svbool4_t, op: svfloat32_t) -> svfloat32_t;
+    }
+    unsafe { _svneg_f32_m(inactive, pg.sve_into(), op) }
+}
+#[doc = "Negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svneg[_f32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fneg))]
+pub fn svneg_f32_x(pg: svbool_t, op: svfloat32_t) -> svfloat32_t {
+    svneg_f32_m(op, pg, op)
+}
+#[doc = "Negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svneg[_f32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fneg))]
+pub fn svneg_f32_z(pg: svbool_t, op: svfloat32_t) -> svfloat32_t {
+    svneg_f32_m(svdup_n_f32(0.0), pg, op)
+}
+#[doc = "Negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svneg[_f64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fneg))]
+pub fn svneg_f64_m(inactive: svfloat64_t, pg: svbool_t, op: svfloat64_t) -> svfloat64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.fneg.nxv2f64")]
+        fn _svneg_f64_m(inactive: svfloat64_t, pg: svbool2_t, op: svfloat64_t) -> svfloat64_t;
+    }
+    unsafe { _svneg_f64_m(inactive, pg.sve_into(), op) }
+}
+#[doc = "Negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svneg[_f64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fneg))]
+pub fn svneg_f64_x(pg: svbool_t, op: svfloat64_t) -> svfloat64_t {
+    svneg_f64_m(op, pg, op)
+}
+#[doc = "Negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svneg[_f64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fneg))]
+pub fn svneg_f64_z(pg: svbool_t, op: svfloat64_t) -> svfloat64_t {
+    svneg_f64_m(svdup_n_f64(0.0), pg, op)
+}
+#[doc = "Negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svneg[_s8]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(neg))]
+pub fn svneg_s8_m(inactive: svint8_t, pg: svbool_t, op: svint8_t) -> svint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.neg.nxv16i8")]
+        fn _svneg_s8_m(inactive: svint8_t, pg: svbool_t, op: svint8_t) -> svint8_t;
+    }
+    unsafe { _svneg_s8_m(inactive, pg, op) }
+}
+#[doc = "Negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svneg[_s8]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(neg))]
+pub fn svneg_s8_x(pg: svbool_t, op: svint8_t) -> svint8_t {
+    svneg_s8_m(op, pg, op)
+}
+#[doc = "Negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svneg[_s8]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(neg))]
+pub fn svneg_s8_z(pg: svbool_t, op: svint8_t) -> svint8_t {
+    svneg_s8_m(svdup_n_s8(0), pg, op)
+}
+#[doc = "Negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svneg[_s16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(neg))]
+pub fn svneg_s16_m(inactive: svint16_t, pg: svbool_t, op: svint16_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.neg.nxv8i16")]
+        fn _svneg_s16_m(inactive: svint16_t, pg: svbool8_t, op: svint16_t) -> svint16_t;
+    }
+    unsafe { _svneg_s16_m(inactive, pg.sve_into(), op) }
+}
+#[doc = "Negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svneg[_s16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(neg))]
+pub fn svneg_s16_x(pg: svbool_t, op: svint16_t) -> svint16_t {
+    svneg_s16_m(op, pg, op)
+}
+#[doc = "Negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svneg[_s16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(neg))]
+pub fn svneg_s16_z(pg: svbool_t, op: svint16_t) -> svint16_t {
+    svneg_s16_m(svdup_n_s16(0), pg, op)
+}
+#[doc = "Negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svneg[_s32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(neg))]
+pub fn svneg_s32_m(inactive: svint32_t, pg: svbool_t, op: svint32_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.neg.nxv4i32")]
+        fn _svneg_s32_m(inactive: svint32_t, pg: svbool4_t, op: svint32_t) -> svint32_t;
+    }
+    unsafe { _svneg_s32_m(inactive, pg.sve_into(), op) }
+}
+#[doc = "Negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svneg[_s32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(neg))]
+pub fn svneg_s32_x(pg: svbool_t, op: svint32_t) -> svint32_t {
+    svneg_s32_m(op, pg, op)
+}
+#[doc = "Negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svneg[_s32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(neg))]
+pub fn svneg_s32_z(pg: svbool_t, op: svint32_t) -> svint32_t {
+    svneg_s32_m(svdup_n_s32(0), pg, op)
+}
+#[doc = "Negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svneg[_s64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(neg))]
+pub fn svneg_s64_m(inactive: svint64_t, pg: svbool_t, op: svint64_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.neg.nxv2i64")]
+        fn _svneg_s64_m(inactive: svint64_t, pg: svbool2_t, op: svint64_t) -> svint64_t;
+    }
+    unsafe { _svneg_s64_m(inactive, pg.sve_into(), op) }
+}
+#[doc = "Negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svneg[_s64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(neg))]
+pub fn svneg_s64_x(pg: svbool_t, op: svint64_t) -> svint64_t {
+    svneg_s64_m(op, pg, op)
+}
+#[doc = "Negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svneg[_s64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(neg))]
+pub fn svneg_s64_z(pg: svbool_t, op: svint64_t) -> svint64_t {
+    svneg_s64_m(svdup_n_s64(0), pg, op)
+}
+#[doc = "Negated multiply-add, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svnmad[_f32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fnmad))]
+pub fn svnmad_f32_m(
+    pg: svbool_t,
+    op1: svfloat32_t,
+    op2: svfloat32_t,
+    op3: svfloat32_t,
+) -> svfloat32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.fnmad.nxv4f32")]
+        fn _svnmad_f32_m(
+            pg: svbool4_t,
+            op1: svfloat32_t,
+            op2: svfloat32_t,
+            op3: svfloat32_t,
+        ) -> svfloat32_t;
+    }
+    unsafe { _svnmad_f32_m(pg.sve_into(), op1, op2, op3) }
+}
+#[doc = "Negated multiply-add, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svnmad[_n_f32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fnmad))]
+pub fn svnmad_n_f32_m(pg: svbool_t, op1: svfloat32_t, op2: svfloat32_t, op3: f32) -> svfloat32_t {
+    svnmad_f32_m(pg, op1, op2, svdup_n_f32(op3))
+}
+#[doc = "Negated multiply-add, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svnmad[_f32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fnmad))]
+pub fn svnmad_f32_x(
+    pg: svbool_t,
+    op1: svfloat32_t,
+    op2: svfloat32_t,
+    op3: svfloat32_t,
+) -> svfloat32_t {
+    svnmad_f32_m(pg, op1, op2, op3)
+}
+#[doc = "Negated multiply-add, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svnmad[_n_f32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fnmad))]
+pub fn svnmad_n_f32_x(pg: svbool_t, op1: svfloat32_t, op2: svfloat32_t, op3: f32) -> svfloat32_t {
+    svnmad_f32_x(pg, op1, op2, svdup_n_f32(op3))
+}
+#[doc = "Negated multiply-add, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svnmad[_f32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fnmad))]
+pub fn svnmad_f32_z(
+    pg: svbool_t,
+    op1: svfloat32_t,
+    op2: svfloat32_t,
+    op3: svfloat32_t,
+) -> svfloat32_t {
+    svnmad_f32_m(pg, svsel_f32(pg, op1, svdup_n_f32(0.0)), op2, op3)
+}
+#[doc = "Negated multiply-add, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svnmad[_n_f32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fnmad))]
+pub fn svnmad_n_f32_z(pg: svbool_t, op1: svfloat32_t, op2: svfloat32_t, op3: f32) -> svfloat32_t {
+    svnmad_f32_z(pg, op1, op2, svdup_n_f32(op3))
+}
+#[doc = "Negated multiply-add, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svnmad[_f64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fnmad))]
+pub fn svnmad_f64_m(
+    pg: svbool_t,
+    op1: svfloat64_t,
+    op2: svfloat64_t,
+    op3: svfloat64_t,
+) -> svfloat64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.fnmad.nxv2f64")]
+        fn _svnmad_f64_m(
+            pg: svbool2_t,
+            op1: svfloat64_t,
+            op2: svfloat64_t,
+            op3: svfloat64_t,
+        ) -> svfloat64_t;
+    }
+    unsafe { _svnmad_f64_m(pg.sve_into(), op1, op2, op3) }
+}
+#[doc = "Negated multiply-add, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svnmad[_n_f64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fnmad))]
+pub fn svnmad_n_f64_m(pg: svbool_t, op1: svfloat64_t, op2: svfloat64_t, op3: f64) -> svfloat64_t {
+    svnmad_f64_m(pg, op1, op2, svdup_n_f64(op3))
+}
+#[doc = "Negated multiply-add, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svnmad[_f64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fnmad))]
+pub fn svnmad_f64_x(
+    pg: svbool_t,
+    op1: svfloat64_t,
+    op2: svfloat64_t,
+    op3: svfloat64_t,
+) -> svfloat64_t {
+    svnmad_f64_m(pg, op1, op2, op3)
+}
+#[doc = "Negated multiply-add, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svnmad[_n_f64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fnmad))]
+pub fn svnmad_n_f64_x(pg: svbool_t, op1: svfloat64_t, op2: svfloat64_t, op3: f64) -> svfloat64_t {
+    svnmad_f64_x(pg, op1, op2, svdup_n_f64(op3))
+}
+#[doc = "Negated multiply-add, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svnmad[_f64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fnmad))]
+pub fn svnmad_f64_z(
+    pg: svbool_t,
+    op1: svfloat64_t,
+    op2: svfloat64_t,
+    op3: svfloat64_t,
+) -> svfloat64_t {
+    svnmad_f64_m(pg, svsel_f64(pg, op1, svdup_n_f64(0.0)), op2, op3)
+}
+#[doc = "Negated multiply-add, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svnmad[_n_f64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fnmad))]
+pub fn svnmad_n_f64_z(pg: svbool_t, op1: svfloat64_t, op2: svfloat64_t, op3: f64) -> svfloat64_t {
+    svnmad_f64_z(pg, op1, op2, svdup_n_f64(op3))
+}
+#[doc = "Negated multiply-add, addend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svnmla[_f32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fnmla))]
+pub fn svnmla_f32_m(
+    pg: svbool_t,
+    op1: svfloat32_t,
+    op2: svfloat32_t,
+    op3: svfloat32_t,
+) -> svfloat32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.fnmla.nxv4f32")]
+        fn _svnmla_f32_m(
+            pg: svbool4_t,
+            op1: svfloat32_t,
+            op2: svfloat32_t,
+            op3: svfloat32_t,
+        ) -> svfloat32_t;
+    }
+    unsafe { _svnmla_f32_m(pg.sve_into(), op1, op2, op3) }
+}
+#[doc = "Negated multiply-add, addend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svnmla[_n_f32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fnmla))]
+pub fn svnmla_n_f32_m(pg: svbool_t, op1: svfloat32_t, op2: svfloat32_t, op3: f32) -> svfloat32_t {
+    svnmla_f32_m(pg, op1, op2, svdup_n_f32(op3))
+}
+#[doc = "Negated multiply-add, addend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svnmla[_f32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fnmla))]
+pub fn svnmla_f32_x(
+    pg: svbool_t,
+    op1: svfloat32_t,
+    op2: svfloat32_t,
+    op3: svfloat32_t,
+) -> svfloat32_t {
+    svnmla_f32_m(pg, op1, op2, op3)
+}
+#[doc = "Negated multiply-add, addend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svnmla[_n_f32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fnmla))]
+pub fn svnmla_n_f32_x(pg: svbool_t, op1: svfloat32_t, op2: svfloat32_t, op3: f32) -> svfloat32_t {
+    svnmla_f32_x(pg, op1, op2, svdup_n_f32(op3))
+}
+#[doc = "Negated multiply-add, addend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svnmla[_f32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fnmla))]
+pub fn svnmla_f32_z(
+    pg: svbool_t,
+    op1: svfloat32_t,
+    op2: svfloat32_t,
+    op3: svfloat32_t,
+) -> svfloat32_t {
+    svnmla_f32_m(pg, svsel_f32(pg, op1, svdup_n_f32(0.0)), op2, op3)
+}
+#[doc = "Negated multiply-add, addend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svnmla[_n_f32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fnmla))]
+pub fn svnmla_n_f32_z(pg: svbool_t, op1: svfloat32_t, op2: svfloat32_t, op3: f32) -> svfloat32_t {
+    svnmla_f32_z(pg, op1, op2, svdup_n_f32(op3))
+}
+#[doc = "Negated multiply-add, addend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svnmla[_f64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fnmla))]
+pub fn svnmla_f64_m(
+    pg: svbool_t,
+    op1: svfloat64_t,
+    op2: svfloat64_t,
+    op3: svfloat64_t,
+) -> svfloat64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.fnmla.nxv2f64")]
+        fn _svnmla_f64_m(
+            pg: svbool2_t,
+            op1: svfloat64_t,
+            op2: svfloat64_t,
+            op3: svfloat64_t,
+        ) -> svfloat64_t;
+    }
+    unsafe { _svnmla_f64_m(pg.sve_into(), op1, op2, op3) }
+}
+#[doc = "Negated multiply-add, addend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svnmla[_n_f64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fnmla))]
+pub fn svnmla_n_f64_m(pg: svbool_t, op1: svfloat64_t, op2: svfloat64_t, op3: f64) -> svfloat64_t {
+    svnmla_f64_m(pg, op1, op2, svdup_n_f64(op3))
+}
+#[doc = "Negated multiply-add, addend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svnmla[_f64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fnmla))]
+pub fn svnmla_f64_x(
+    pg: svbool_t,
+    op1: svfloat64_t,
+    op2: svfloat64_t,
+    op3: svfloat64_t,
+) -> svfloat64_t {
+    svnmla_f64_m(pg, op1, op2, op3)
+}
+#[doc = "Negated multiply-add, addend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svnmla[_n_f64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fnmla))]
+pub fn svnmla_n_f64_x(pg: svbool_t, op1: svfloat64_t, op2: svfloat64_t, op3: f64) -> svfloat64_t {
+    svnmla_f64_x(pg, op1, op2, svdup_n_f64(op3))
+}
+#[doc = "Negated multiply-add, addend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svnmla[_f64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fnmla))]
+pub fn svnmla_f64_z(
+    pg: svbool_t,
+    op1: svfloat64_t,
+    op2: svfloat64_t,
+    op3: svfloat64_t,
+) -> svfloat64_t {
+    svnmla_f64_m(pg, svsel_f64(pg, op1, svdup_n_f64(0.0)), op2, op3)
+}
+#[doc = "Negated multiply-add, addend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svnmla[_n_f64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fnmla))]
+pub fn svnmla_n_f64_z(pg: svbool_t, op1: svfloat64_t, op2: svfloat64_t, op3: f64) -> svfloat64_t {
+    svnmla_f64_z(pg, op1, op2, svdup_n_f64(op3))
+}
+#[doc = "Negated multiply-subtract, minuend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svnmls[_f32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fnmls))]
+pub fn svnmls_f32_m(
+    pg: svbool_t,
+    op1: svfloat32_t,
+    op2: svfloat32_t,
+    op3: svfloat32_t,
+) -> svfloat32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.fnmls.nxv4f32")]
+        fn _svnmls_f32_m(
+            pg: svbool4_t,
+            op1: svfloat32_t,
+            op2: svfloat32_t,
+            op3: svfloat32_t,
+        ) -> svfloat32_t;
+    }
+    unsafe { _svnmls_f32_m(pg.sve_into(), op1, op2, op3) }
+}
+#[doc = "Negated multiply-subtract, minuend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svnmls[_n_f32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fnmls))]
+pub fn svnmls_n_f32_m(pg: svbool_t, op1: svfloat32_t, op2: svfloat32_t, op3: f32) -> svfloat32_t {
+    svnmls_f32_m(pg, op1, op2, svdup_n_f32(op3))
+}
+#[doc = "Negated multiply-subtract, minuend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svnmls[_f32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fnmls))]
+pub fn svnmls_f32_x(
+    pg: svbool_t,
+    op1: svfloat32_t,
+    op2: svfloat32_t,
+    op3: svfloat32_t,
+) -> svfloat32_t {
+    svnmls_f32_m(pg, op1, op2, op3)
+}
+#[doc = "Negated multiply-subtract, minuend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svnmls[_n_f32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fnmls))]
+pub fn svnmls_n_f32_x(pg: svbool_t, op1: svfloat32_t, op2: svfloat32_t, op3: f32) -> svfloat32_t {
+    svnmls_f32_x(pg, op1, op2, svdup_n_f32(op3))
+}
+#[doc = "Negated multiply-subtract, minuend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svnmls[_f32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fnmls))]
+pub fn svnmls_f32_z(
+    pg: svbool_t,
+    op1: svfloat32_t,
+    op2: svfloat32_t,
+    op3: svfloat32_t,
+) -> svfloat32_t {
+    svnmls_f32_m(pg, svsel_f32(pg, op1, svdup_n_f32(0.0)), op2, op3)
+}
+#[doc = "Negated multiply-subtract, minuend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svnmls[_n_f32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fnmls))]
+pub fn svnmls_n_f32_z(pg: svbool_t, op1: svfloat32_t, op2: svfloat32_t, op3: f32) -> svfloat32_t {
+    svnmls_f32_z(pg, op1, op2, svdup_n_f32(op3))
+}
+#[doc = "Negated multiply-subtract, minuend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svnmls[_f64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fnmls))]
+pub fn svnmls_f64_m(
+    pg: svbool_t,
+    op1: svfloat64_t,
+    op2: svfloat64_t,
+    op3: svfloat64_t,
+) -> svfloat64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.fnmls.nxv2f64")]
+        fn _svnmls_f64_m(
+            pg: svbool2_t,
+            op1: svfloat64_t,
+            op2: svfloat64_t,
+            op3: svfloat64_t,
+        ) -> svfloat64_t;
+    }
+    unsafe { _svnmls_f64_m(pg.sve_into(), op1, op2, op3) }
+}
+#[doc = "Negated multiply-subtract, minuend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svnmls[_n_f64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fnmls))]
+pub fn svnmls_n_f64_m(pg: svbool_t, op1: svfloat64_t, op2: svfloat64_t, op3: f64) -> svfloat64_t {
+    svnmls_f64_m(pg, op1, op2, svdup_n_f64(op3))
+}
+#[doc = "Negated multiply-subtract, minuend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svnmls[_f64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fnmls))]
+pub fn svnmls_f64_x(
+    pg: svbool_t,
+    op1: svfloat64_t,
+    op2: svfloat64_t,
+    op3: svfloat64_t,
+) -> svfloat64_t {
+    svnmls_f64_m(pg, op1, op2, op3)
+}
+#[doc = "Negated multiply-subtract, minuend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svnmls[_n_f64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fnmls))]
+pub fn svnmls_n_f64_x(pg: svbool_t, op1: svfloat64_t, op2: svfloat64_t, op3: f64) -> svfloat64_t {
+    svnmls_f64_x(pg, op1, op2, svdup_n_f64(op3))
+}
+#[doc = "Negated multiply-subtract, minuend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svnmls[_f64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fnmls))]
+pub fn svnmls_f64_z(
+    pg: svbool_t,
+    op1: svfloat64_t,
+    op2: svfloat64_t,
+    op3: svfloat64_t,
+) -> svfloat64_t {
+    svnmls_f64_m(pg, svsel_f64(pg, op1, svdup_n_f64(0.0)), op2, op3)
+}
+#[doc = "Negated multiply-subtract, minuend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svnmls[_n_f64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fnmls))]
+pub fn svnmls_n_f64_z(pg: svbool_t, op1: svfloat64_t, op2: svfloat64_t, op3: f64) -> svfloat64_t {
+    svnmls_f64_z(pg, op1, op2, svdup_n_f64(op3))
+}
+#[doc = "Negated multiply-subtract, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svnmsb[_f32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fnmsb))]
+pub fn svnmsb_f32_m(
+    pg: svbool_t,
+    op1: svfloat32_t,
+    op2: svfloat32_t,
+    op3: svfloat32_t,
+) -> svfloat32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.fnmsb.nxv4f32")]
+        fn _svnmsb_f32_m(
+            pg: svbool4_t,
+            op1: svfloat32_t,
+            op2: svfloat32_t,
+            op3: svfloat32_t,
+        ) -> svfloat32_t;
+    }
+    unsafe { _svnmsb_f32_m(pg.sve_into(), op1, op2, op3) }
+}
+#[doc = "Negated multiply-subtract, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svnmsb[_n_f32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fnmsb))]
+pub fn svnmsb_n_f32_m(pg: svbool_t, op1: svfloat32_t, op2: svfloat32_t, op3: f32) -> svfloat32_t {
+    svnmsb_f32_m(pg, op1, op2, svdup_n_f32(op3))
+}
+#[doc = "Negated multiply-subtract, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svnmsb[_f32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fnmsb))]
+pub fn svnmsb_f32_x(
+    pg: svbool_t,
+    op1: svfloat32_t,
+    op2: svfloat32_t,
+    op3: svfloat32_t,
+) -> svfloat32_t {
+    svnmsb_f32_m(pg, op1, op2, op3)
+}
+#[doc = "Negated multiply-subtract, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svnmsb[_n_f32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fnmsb))]
+pub fn svnmsb_n_f32_x(pg: svbool_t, op1: svfloat32_t, op2: svfloat32_t, op3: f32) -> svfloat32_t {
+    svnmsb_f32_x(pg, op1, op2, svdup_n_f32(op3))
+}
+#[doc = "Negated multiply-subtract, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svnmsb[_f32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fnmsb))]
+pub fn svnmsb_f32_z(
+    pg: svbool_t,
+    op1: svfloat32_t,
+    op2: svfloat32_t,
+    op3: svfloat32_t,
+) -> svfloat32_t {
+    svnmsb_f32_m(pg, svsel_f32(pg, op1, svdup_n_f32(0.0)), op2, op3)
+}
+#[doc = "Negated multiply-subtract, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svnmsb[_n_f32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fnmsb))]
+pub fn svnmsb_n_f32_z(pg: svbool_t, op1: svfloat32_t, op2: svfloat32_t, op3: f32) -> svfloat32_t {
+    svnmsb_f32_z(pg, op1, op2, svdup_n_f32(op3))
+}
+#[doc = "Negated multiply-subtract, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svnmsb[_f64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fnmsb))]
+pub fn svnmsb_f64_m(
+    pg: svbool_t,
+    op1: svfloat64_t,
+    op2: svfloat64_t,
+    op3: svfloat64_t,
+) -> svfloat64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.fnmsb.nxv2f64")]
+        fn _svnmsb_f64_m(
+            pg: svbool2_t,
+            op1: svfloat64_t,
+            op2: svfloat64_t,
+            op3: svfloat64_t,
+        ) -> svfloat64_t;
+    }
+    unsafe { _svnmsb_f64_m(pg.sve_into(), op1, op2, op3) }
+}
+#[doc = "Negated multiply-subtract, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svnmsb[_n_f64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fnmsb))]
+pub fn svnmsb_n_f64_m(pg: svbool_t, op1: svfloat64_t, op2: svfloat64_t, op3: f64) -> svfloat64_t {
+    svnmsb_f64_m(pg, op1, op2, svdup_n_f64(op3))
+}
+#[doc = "Negated multiply-subtract, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svnmsb[_f64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fnmsb))]
+pub fn svnmsb_f64_x(
+    pg: svbool_t,
+    op1: svfloat64_t,
+    op2: svfloat64_t,
+    op3: svfloat64_t,
+) -> svfloat64_t {
+    svnmsb_f64_m(pg, op1, op2, op3)
+}
+#[doc = "Negated multiply-subtract, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svnmsb[_n_f64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fnmsb))]
+pub fn svnmsb_n_f64_x(pg: svbool_t, op1: svfloat64_t, op2: svfloat64_t, op3: f64) -> svfloat64_t {
+    svnmsb_f64_x(pg, op1, op2, svdup_n_f64(op3))
+}
+#[doc = "Negated multiply-subtract, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svnmsb[_f64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fnmsb))]
+pub fn svnmsb_f64_z(
+    pg: svbool_t,
+    op1: svfloat64_t,
+    op2: svfloat64_t,
+    op3: svfloat64_t,
+) -> svfloat64_t {
+    svnmsb_f64_m(pg, svsel_f64(pg, op1, svdup_n_f64(0.0)), op2, op3)
+}
+#[doc = "Negated multiply-subtract, multiplicand first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svnmsb[_n_f64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fnmsb))]
+pub fn svnmsb_n_f64_z(pg: svbool_t, op1: svfloat64_t, op2: svfloat64_t, op3: f64) -> svfloat64_t {
+    svnmsb_f64_z(pg, op1, op2, svdup_n_f64(op3))
+}
+#[doc = "Bitwise NOR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svnor[_b]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(nor))]
+pub fn svnor_b_z(pg: svbool_t, op1: svbool_t, op2: svbool_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.nor.z.nxv16i1")]
+        fn _svnor_b_z(pg: svbool_t, op1: svbool_t, op2: svbool_t) -> svbool_t;
+    }
+    unsafe { _svnor_b_z(pg, op1, op2) }
+}
+#[doc = "Bitwise invert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svnot[_b]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(not))]
+pub fn svnot_b_z(pg: svbool_t, op: svbool_t) -> svbool_t {
+    sveor_b_z(pg, op, pg)
+}
+#[doc = "Bitwise invert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svnot[_s8]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(not))]
+pub fn svnot_s8_m(inactive: svint8_t, pg: svbool_t, op: svint8_t) -> svint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.not.nxv16i8")]
+        fn _svnot_s8_m(inactive: svint8_t, pg: svbool_t, op: svint8_t) -> svint8_t;
+    }
+    unsafe { _svnot_s8_m(inactive, pg, op) }
+}
+#[doc = "Bitwise invert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svnot[_s8]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(not))]
+pub fn svnot_s8_x(pg: svbool_t, op: svint8_t) -> svint8_t {
+    svnot_s8_m(op, pg, op)
+}
+#[doc = "Bitwise invert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svnot[_s8]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(not))]
+pub fn svnot_s8_z(pg: svbool_t, op: svint8_t) -> svint8_t {
+    svnot_s8_m(svdup_n_s8(0), pg, op)
+}
+#[doc = "Bitwise invert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svnot[_s16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(not))]
+pub fn svnot_s16_m(inactive: svint16_t, pg: svbool_t, op: svint16_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.not.nxv8i16")]
+        fn _svnot_s16_m(inactive: svint16_t, pg: svbool8_t, op: svint16_t) -> svint16_t;
+    }
+    unsafe { _svnot_s16_m(inactive, pg.sve_into(), op) }
+}
+#[doc = "Bitwise invert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svnot[_s16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(not))]
+pub fn svnot_s16_x(pg: svbool_t, op: svint16_t) -> svint16_t {
+    svnot_s16_m(op, pg, op)
+}
+#[doc = "Bitwise invert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svnot[_s16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(not))]
+pub fn svnot_s16_z(pg: svbool_t, op: svint16_t) -> svint16_t {
+    svnot_s16_m(svdup_n_s16(0), pg, op)
+}
+#[doc = "Bitwise invert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svnot[_s32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(not))]
+pub fn svnot_s32_m(inactive: svint32_t, pg: svbool_t, op: svint32_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.not.nxv4i32")]
+        fn _svnot_s32_m(inactive: svint32_t, pg: svbool4_t, op: svint32_t) -> svint32_t;
+    }
+    unsafe { _svnot_s32_m(inactive, pg.sve_into(), op) }
+}
+#[doc = "Bitwise invert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svnot[_s32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(not))]
+pub fn svnot_s32_x(pg: svbool_t, op: svint32_t) -> svint32_t {
+    svnot_s32_m(op, pg, op)
+}
+#[doc = "Bitwise invert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svnot[_s32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(not))]
+pub fn svnot_s32_z(pg: svbool_t, op: svint32_t) -> svint32_t {
+    svnot_s32_m(svdup_n_s32(0), pg, op)
+}
+#[doc = "Bitwise invert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svnot[_s64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(not))]
+pub fn svnot_s64_m(inactive: svint64_t, pg: svbool_t, op: svint64_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.not.nxv2i64")]
+        fn _svnot_s64_m(inactive: svint64_t, pg: svbool2_t, op: svint64_t) -> svint64_t;
+    }
+    unsafe { _svnot_s64_m(inactive, pg.sve_into(), op) }
+}
+#[doc = "Bitwise invert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svnot[_s64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(not))]
+pub fn svnot_s64_x(pg: svbool_t, op: svint64_t) -> svint64_t {
+    svnot_s64_m(op, pg, op)
+}
+#[doc = "Bitwise invert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svnot[_s64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(not))]
+pub fn svnot_s64_z(pg: svbool_t, op: svint64_t) -> svint64_t {
+    svnot_s64_m(svdup_n_s64(0), pg, op)
+}
+#[doc = "Bitwise invert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svnot[_u8]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(not))]
+pub fn svnot_u8_m(inactive: svuint8_t, pg: svbool_t, op: svuint8_t) -> svuint8_t {
+    unsafe { svnot_s8_m(inactive.as_signed(), pg, op.as_signed()).as_unsigned() }
+}
+#[doc = "Bitwise invert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svnot[_u8]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(not))]
+pub fn svnot_u8_x(pg: svbool_t, op: svuint8_t) -> svuint8_t {
+    svnot_u8_m(op, pg, op)
+}
+#[doc = "Bitwise invert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svnot[_u8]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(not))]
+pub fn svnot_u8_z(pg: svbool_t, op: svuint8_t) -> svuint8_t {
+    svnot_u8_m(svdup_n_u8(0), pg, op)
+}
+#[doc = "Bitwise invert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svnot[_u16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(not))]
+pub fn svnot_u16_m(inactive: svuint16_t, pg: svbool_t, op: svuint16_t) -> svuint16_t {
+    unsafe { svnot_s16_m(inactive.as_signed(), pg, op.as_signed()).as_unsigned() }
+}
+#[doc = "Bitwise invert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svnot[_u16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(not))]
+pub fn svnot_u16_x(pg: svbool_t, op: svuint16_t) -> svuint16_t {
+    svnot_u16_m(op, pg, op)
+}
+#[doc = "Bitwise invert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svnot[_u16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(not))]
+pub fn svnot_u16_z(pg: svbool_t, op: svuint16_t) -> svuint16_t {
+    svnot_u16_m(svdup_n_u16(0), pg, op)
+}
+#[doc = "Bitwise invert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svnot[_u32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(not))]
+pub fn svnot_u32_m(inactive: svuint32_t, pg: svbool_t, op: svuint32_t) -> svuint32_t {
+    unsafe { svnot_s32_m(inactive.as_signed(), pg, op.as_signed()).as_unsigned() }
+}
+#[doc = "Bitwise invert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svnot[_u32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(not))]
+pub fn svnot_u32_x(pg: svbool_t, op: svuint32_t) -> svuint32_t {
+    svnot_u32_m(op, pg, op)
+}
+#[doc = "Bitwise invert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svnot[_u32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(not))]
+pub fn svnot_u32_z(pg: svbool_t, op: svuint32_t) -> svuint32_t {
+    svnot_u32_m(svdup_n_u32(0), pg, op)
+}
+#[doc = "Bitwise invert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svnot[_u64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(not))]
+pub fn svnot_u64_m(inactive: svuint64_t, pg: svbool_t, op: svuint64_t) -> svuint64_t {
+    unsafe { svnot_s64_m(inactive.as_signed(), pg, op.as_signed()).as_unsigned() }
+}
+#[doc = "Bitwise invert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svnot[_u64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(not))]
+pub fn svnot_u64_x(pg: svbool_t, op: svuint64_t) -> svuint64_t {
+    svnot_u64_m(op, pg, op)
+}
+#[doc = "Bitwise invert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svnot[_u64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(not))]
+pub fn svnot_u64_z(pg: svbool_t, op: svuint64_t) -> svuint64_t {
+    svnot_u64_m(svdup_n_u64(0), pg, op)
+}
+#[doc = "Bitwise inclusive OR, inverting second argument"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svorn[_b]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(orn))]
+pub fn svorn_b_z(pg: svbool_t, op1: svbool_t, op2: svbool_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.orn.z.nvx16i1")]
+        fn _svorn_b_z(pg: svbool_t, op1: svbool_t, op2: svbool_t) -> svbool_t;
+    }
+    unsafe { _svorn_b_z(pg, op1, op2) }
+}
+#[doc = "Bitwise inclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svorr[_b]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(orr))]
+pub fn svorr_b_z(pg: svbool_t, op1: svbool_t, op2: svbool_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.orr.z.nvx16i1")]
+        fn _svorr_b_z(pg: svbool_t, op1: svbool_t, op2: svbool_t) -> svbool_t;
+    }
+    unsafe { _svorr_b_z(pg, op1, op2) }
+}
+#[doc = "Bitwise inclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svorr[_s8]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(orr))]
+pub fn svorr_s8_m(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.orr.nxv16i8")]
+        fn _svorr_s8_m(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t;
+    }
+    unsafe { _svorr_s8_m(pg, op1, op2) }
+}
+#[doc = "Bitwise inclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svorr[_n_s8]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(orr))]
+pub fn svorr_n_s8_m(pg: svbool_t, op1: svint8_t, op2: i8) -> svint8_t {
+    svorr_s8_m(pg, op1, svdup_n_s8(op2))
+}
+#[doc = "Bitwise inclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svorr[_s8]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(orr))]
+pub fn svorr_s8_x(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t {
+    svorr_s8_m(pg, op1, op2)
+}
+#[doc = "Bitwise inclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svorr[_n_s8]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(orr))]
+pub fn svorr_n_s8_x(pg: svbool_t, op1: svint8_t, op2: i8) -> svint8_t {
+    svorr_s8_x(pg, op1, svdup_n_s8(op2))
+}
+#[doc = "Bitwise inclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svorr[_s8]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(orr))]
+pub fn svorr_s8_z(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t {
+    svorr_s8_m(pg, svsel_s8(pg, op1, svdup_n_s8(0)), op2)
+}
+#[doc = "Bitwise inclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svorr[_n_s8]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(orr))]
+pub fn svorr_n_s8_z(pg: svbool_t, op1: svint8_t, op2: i8) -> svint8_t {
+    svorr_s8_z(pg, op1, svdup_n_s8(op2))
+}
+#[doc = "Bitwise inclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svorr[_s16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(orr))]
+pub fn svorr_s16_m(pg: svbool_t, op1: svint16_t, op2: svint16_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.orr.nxv8i16")]
+        fn _svorr_s16_m(pg: svbool8_t, op1: svint16_t, op2: svint16_t) -> svint16_t;
+    }
+    unsafe { _svorr_s16_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Bitwise inclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svorr[_n_s16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(orr))]
+pub fn svorr_n_s16_m(pg: svbool_t, op1: svint16_t, op2: i16) -> svint16_t {
+    svorr_s16_m(pg, op1, svdup_n_s16(op2))
+}
+#[doc = "Bitwise inclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svorr[_s16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(orr))]
+pub fn svorr_s16_x(pg: svbool_t, op1: svint16_t, op2: svint16_t) -> svint16_t {
+    svorr_s16_m(pg, op1, op2)
+}
+#[doc = "Bitwise inclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svorr[_n_s16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(orr))]
+pub fn svorr_n_s16_x(pg: svbool_t, op1: svint16_t, op2: i16) -> svint16_t {
+    svorr_s16_x(pg, op1, svdup_n_s16(op2))
+}
+#[doc = "Bitwise inclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svorr[_s16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(orr))]
+pub fn svorr_s16_z(pg: svbool_t, op1: svint16_t, op2: svint16_t) -> svint16_t {
+    svorr_s16_m(pg, svsel_s16(pg, op1, svdup_n_s16(0)), op2)
+}
+#[doc = "Bitwise inclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svorr[_n_s16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(orr))]
+pub fn svorr_n_s16_z(pg: svbool_t, op1: svint16_t, op2: i16) -> svint16_t {
+    svorr_s16_z(pg, op1, svdup_n_s16(op2))
+}
+#[doc = "Bitwise inclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svorr[_s32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(orr))]
+pub fn svorr_s32_m(pg: svbool_t, op1: svint32_t, op2: svint32_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.orr.nxv4i32")]
+        fn _svorr_s32_m(pg: svbool4_t, op1: svint32_t, op2: svint32_t) -> svint32_t;
+    }
+    unsafe { _svorr_s32_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Bitwise inclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svorr[_n_s32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(orr))]
+pub fn svorr_n_s32_m(pg: svbool_t, op1: svint32_t, op2: i32) -> svint32_t {
+    svorr_s32_m(pg, op1, svdup_n_s32(op2))
+}
+#[doc = "Bitwise inclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svorr[_s32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(orr))]
+pub fn svorr_s32_x(pg: svbool_t, op1: svint32_t, op2: svint32_t) -> svint32_t {
+    svorr_s32_m(pg, op1, op2)
+}
+#[doc = "Bitwise inclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svorr[_n_s32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(orr))]
+pub fn svorr_n_s32_x(pg: svbool_t, op1: svint32_t, op2: i32) -> svint32_t {
+    svorr_s32_x(pg, op1, svdup_n_s32(op2))
+}
+#[doc = "Bitwise inclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svorr[_s32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(orr))]
+pub fn svorr_s32_z(pg: svbool_t, op1: svint32_t, op2: svint32_t) -> svint32_t {
+    svorr_s32_m(pg, svsel_s32(pg, op1, svdup_n_s32(0)), op2)
+}
+#[doc = "Bitwise inclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svorr[_n_s32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(orr))]
+pub fn svorr_n_s32_z(pg: svbool_t, op1: svint32_t, op2: i32) -> svint32_t {
+    svorr_s32_z(pg, op1, svdup_n_s32(op2))
+}
+#[doc = "Bitwise inclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svorr[_s64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(orr))]
+pub fn svorr_s64_m(pg: svbool_t, op1: svint64_t, op2: svint64_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.orr.nxv2i64")]
+        fn _svorr_s64_m(pg: svbool2_t, op1: svint64_t, op2: svint64_t) -> svint64_t;
+    }
+    unsafe { _svorr_s64_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Bitwise inclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svorr[_n_s64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(orr))]
+pub fn svorr_n_s64_m(pg: svbool_t, op1: svint64_t, op2: i64) -> svint64_t {
+    svorr_s64_m(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Bitwise inclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svorr[_s64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(orr))]
+pub fn svorr_s64_x(pg: svbool_t, op1: svint64_t, op2: svint64_t) -> svint64_t {
+    svorr_s64_m(pg, op1, op2)
+}
+#[doc = "Bitwise inclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svorr[_n_s64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(orr))]
+pub fn svorr_n_s64_x(pg: svbool_t, op1: svint64_t, op2: i64) -> svint64_t {
+    svorr_s64_x(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Bitwise inclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svorr[_s64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(orr))]
+pub fn svorr_s64_z(pg: svbool_t, op1: svint64_t, op2: svint64_t) -> svint64_t {
+    svorr_s64_m(pg, svsel_s64(pg, op1, svdup_n_s64(0)), op2)
+}
+#[doc = "Bitwise inclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svorr[_n_s64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(orr))]
+pub fn svorr_n_s64_z(pg: svbool_t, op1: svint64_t, op2: i64) -> svint64_t {
+    svorr_s64_z(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Bitwise inclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svorr[_u8]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(orr))]
+pub fn svorr_u8_m(pg: svbool_t, op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    unsafe { svorr_s8_m(pg, op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Bitwise inclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svorr[_n_u8]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(orr))]
+pub fn svorr_n_u8_m(pg: svbool_t, op1: svuint8_t, op2: u8) -> svuint8_t {
+    svorr_u8_m(pg, op1, svdup_n_u8(op2))
+}
+#[doc = "Bitwise inclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svorr[_u8]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(orr))]
+pub fn svorr_u8_x(pg: svbool_t, op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    svorr_u8_m(pg, op1, op2)
+}
+#[doc = "Bitwise inclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svorr[_n_u8]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(orr))]
+pub fn svorr_n_u8_x(pg: svbool_t, op1: svuint8_t, op2: u8) -> svuint8_t {
+    svorr_u8_x(pg, op1, svdup_n_u8(op2))
+}
+#[doc = "Bitwise inclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svorr[_u8]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(orr))]
+pub fn svorr_u8_z(pg: svbool_t, op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    svorr_u8_m(pg, svsel_u8(pg, op1, svdup_n_u8(0)), op2)
+}
+#[doc = "Bitwise inclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svorr[_n_u8]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(orr))]
+pub fn svorr_n_u8_z(pg: svbool_t, op1: svuint8_t, op2: u8) -> svuint8_t {
+    svorr_u8_z(pg, op1, svdup_n_u8(op2))
+}
+#[doc = "Bitwise inclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svorr[_u16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(orr))]
+pub fn svorr_u16_m(pg: svbool_t, op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    unsafe { svorr_s16_m(pg, op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Bitwise inclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svorr[_n_u16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(orr))]
+pub fn svorr_n_u16_m(pg: svbool_t, op1: svuint16_t, op2: u16) -> svuint16_t {
+    svorr_u16_m(pg, op1, svdup_n_u16(op2))
+}
+#[doc = "Bitwise inclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svorr[_u16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(orr))]
+pub fn svorr_u16_x(pg: svbool_t, op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    svorr_u16_m(pg, op1, op2)
+}
+#[doc = "Bitwise inclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svorr[_n_u16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(orr))]
+pub fn svorr_n_u16_x(pg: svbool_t, op1: svuint16_t, op2: u16) -> svuint16_t {
+    svorr_u16_x(pg, op1, svdup_n_u16(op2))
+}
+#[doc = "Bitwise inclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svorr[_u16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(orr))]
+pub fn svorr_u16_z(pg: svbool_t, op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    svorr_u16_m(pg, svsel_u16(pg, op1, svdup_n_u16(0)), op2)
+}
+#[doc = "Bitwise inclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svorr[_n_u16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(orr))]
+pub fn svorr_n_u16_z(pg: svbool_t, op1: svuint16_t, op2: u16) -> svuint16_t {
+    svorr_u16_z(pg, op1, svdup_n_u16(op2))
+}
+#[doc = "Bitwise inclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svorr[_u32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(orr))]
+pub fn svorr_u32_m(pg: svbool_t, op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    unsafe { svorr_s32_m(pg, op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Bitwise inclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svorr[_n_u32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(orr))]
+pub fn svorr_n_u32_m(pg: svbool_t, op1: svuint32_t, op2: u32) -> svuint32_t {
+    svorr_u32_m(pg, op1, svdup_n_u32(op2))
+}
+#[doc = "Bitwise inclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svorr[_u32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(orr))]
+pub fn svorr_u32_x(pg: svbool_t, op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    svorr_u32_m(pg, op1, op2)
+}
+#[doc = "Bitwise inclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svorr[_n_u32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(orr))]
+pub fn svorr_n_u32_x(pg: svbool_t, op1: svuint32_t, op2: u32) -> svuint32_t {
+    svorr_u32_x(pg, op1, svdup_n_u32(op2))
+}
+#[doc = "Bitwise inclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svorr[_u32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(orr))]
+pub fn svorr_u32_z(pg: svbool_t, op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    svorr_u32_m(pg, svsel_u32(pg, op1, svdup_n_u32(0)), op2)
+}
+#[doc = "Bitwise inclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svorr[_n_u32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(orr))]
+pub fn svorr_n_u32_z(pg: svbool_t, op1: svuint32_t, op2: u32) -> svuint32_t {
+    svorr_u32_z(pg, op1, svdup_n_u32(op2))
+}
+#[doc = "Bitwise inclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svorr[_u64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(orr))]
+pub fn svorr_u64_m(pg: svbool_t, op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    unsafe { svorr_s64_m(pg, op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Bitwise inclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svorr[_n_u64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(orr))]
+pub fn svorr_n_u64_m(pg: svbool_t, op1: svuint64_t, op2: u64) -> svuint64_t {
+    svorr_u64_m(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Bitwise inclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svorr[_u64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(orr))]
+pub fn svorr_u64_x(pg: svbool_t, op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    svorr_u64_m(pg, op1, op2)
+}
+#[doc = "Bitwise inclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svorr[_n_u64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(orr))]
+pub fn svorr_n_u64_x(pg: svbool_t, op1: svuint64_t, op2: u64) -> svuint64_t {
+    svorr_u64_x(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Bitwise inclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svorr[_u64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(orr))]
+pub fn svorr_u64_z(pg: svbool_t, op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    svorr_u64_m(pg, svsel_u64(pg, op1, svdup_n_u64(0)), op2)
+}
+#[doc = "Bitwise inclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svorr[_n_u64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(orr))]
+pub fn svorr_n_u64_z(pg: svbool_t, op1: svuint64_t, op2: u64) -> svuint64_t {
+    svorr_u64_z(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Bitwise inclusive OR reduction to scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svorv[_s8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(orv))]
+pub fn svorv_s8(pg: svbool_t, op: svint8_t) -> i8 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.orv.nxv16i8")]
+        fn _svorv_s8(pg: svbool_t, op: svint8_t) -> i8;
+    }
+    unsafe { _svorv_s8(pg, op) }
+}
+#[doc = "Bitwise inclusive OR reduction to scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svorv[_s16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(orv))]
+pub fn svorv_s16(pg: svbool_t, op: svint16_t) -> i16 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.orv.nxv8i16")]
+        fn _svorv_s16(pg: svbool8_t, op: svint16_t) -> i16;
+    }
+    unsafe { _svorv_s16(pg.sve_into(), op) }
+}
+#[doc = "Bitwise inclusive OR reduction to scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svorv[_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(orv))]
+pub fn svorv_s32(pg: svbool_t, op: svint32_t) -> i32 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.orv.nxv4i32")]
+        fn _svorv_s32(pg: svbool4_t, op: svint32_t) -> i32;
+    }
+    unsafe { _svorv_s32(pg.sve_into(), op) }
+}
+#[doc = "Bitwise inclusive OR reduction to scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svorv[_s64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(orv))]
+pub fn svorv_s64(pg: svbool_t, op: svint64_t) -> i64 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.orv.nxv2i64")]
+        fn _svorv_s64(pg: svbool2_t, op: svint64_t) -> i64;
+    }
+    unsafe { _svorv_s64(pg.sve_into(), op) }
+}
+#[doc = "Bitwise inclusive OR reduction to scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svorv[_u8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(orv))]
+pub fn svorv_u8(pg: svbool_t, op: svuint8_t) -> u8 {
+    unsafe { svorv_s8(pg, op.as_signed()).as_unsigned() }
+}
+#[doc = "Bitwise inclusive OR reduction to scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svorv[_u16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(orv))]
+pub fn svorv_u16(pg: svbool_t, op: svuint16_t) -> u16 {
+    unsafe { svorv_s16(pg, op.as_signed()).as_unsigned() }
+}
+#[doc = "Bitwise inclusive OR reduction to scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svorv[_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(orv))]
+pub fn svorv_u32(pg: svbool_t, op: svuint32_t) -> u32 {
+    unsafe { svorv_s32(pg, op.as_signed()).as_unsigned() }
+}
+#[doc = "Bitwise inclusive OR reduction to scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svorv[_u64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(orv))]
+pub fn svorv_u64(pg: svbool_t, op: svuint64_t) -> u64 {
+    unsafe { svorv_s64(pg, op.as_signed()).as_unsigned() }
+}
+#[doc = "Set all predicate elements to false"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svpfalse[_b])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svpfalse_b() -> svbool_t {
+    svdupq_n_b8(
+        false, false, false, false, false, false, false, false, false, false, false, false, false,
+        false, false, false,
+    )
+}
+#[doc = "Set the first active predicate element to true"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svpfirst[_b])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(pfirst))]
+pub fn svpfirst_b(pg: svbool_t, op: svbool_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.pfirst.nxv16i1")]
+        fn _svpfirst_b(pg: svbool_t, op: svbool_t) -> svbool_t;
+    }
+    unsafe { _svpfirst_b(pg, op) }
+}
+#[doc = "Find next active predicate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svpnext_b8)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(pnext))]
+pub fn svpnext_b8(pg: svbool_t, op: svbool_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.pnext.nxv16i1")]
+        fn _svpnext_b8(pg: svbool_t, op: svbool_t) -> svbool_t;
+    }
+    unsafe { _svpnext_b8(pg, op) }
+}
+#[doc = "Find next active predicate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svpnext_b16)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(pnext))]
+pub fn svpnext_b16(pg: svbool_t, op: svbool_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.pnext.nxv8i1")]
+        fn _svpnext_b16(pg: svbool8_t, op: svbool8_t) -> svbool8_t;
+    }
+    unsafe { _svpnext_b16(pg.sve_into(), op.sve_into()).sve_into() }
+}
+#[doc = "Find next active predicate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svpnext_b32)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(pnext))]
+pub fn svpnext_b32(pg: svbool_t, op: svbool_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.pnext.nxv4i1")]
+        fn _svpnext_b32(pg: svbool4_t, op: svbool4_t) -> svbool4_t;
+    }
+    unsafe { _svpnext_b32(pg.sve_into(), op.sve_into()).sve_into() }
+}
+#[doc = "Find next active predicate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svpnext_b64)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(pnext))]
+pub fn svpnext_b64(pg: svbool_t, op: svbool_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.pnext.nxv2i1")]
+        fn _svpnext_b64(pg: svbool2_t, op: svbool2_t) -> svbool2_t;
+    }
+    unsafe { _svpnext_b64(pg.sve_into(), op.sve_into()).sve_into() }
+}
+#[doc = "Prefetch bytes"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svprfb)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+# [cfg_attr (test , assert_instr (prfb , OP = { svprfop :: SV_PLDL1KEEP } , T = i64))]
+pub unsafe fn svprfb<const OP: svprfop, T>(pg: svbool_t, base: *const T) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.prf.nxv16i1")]
+        fn _svprfb(pg: svbool_t, base: *const crate::ffi::c_void, op: svprfop);
+    }
+    _svprfb(pg, base as *const crate::ffi::c_void, OP)
+}
+#[doc = "Prefetch halfwords"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svprfh)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+# [cfg_attr (test , assert_instr (prfh , OP = { svprfop :: SV_PLDL1KEEP } , T = i64))]
+pub unsafe fn svprfh<const OP: svprfop, T>(pg: svbool_t, base: *const T) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.prf.nxv8i1")]
+        fn _svprfh(pg: svbool8_t, base: *const crate::ffi::c_void, op: svprfop);
+    }
+    _svprfh(pg.sve_into(), base as *const crate::ffi::c_void, OP)
+}
+#[doc = "Prefetch words"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svprfw)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+# [cfg_attr (test , assert_instr (prfw , OP = { svprfop :: SV_PLDL1KEEP } , T = i64))]
+pub unsafe fn svprfw<const OP: svprfop, T>(pg: svbool_t, base: *const T) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.prf.nxv4i1")]
+        fn _svprfw(pg: svbool4_t, base: *const crate::ffi::c_void, op: svprfop);
+    }
+    _svprfw(pg.sve_into(), base as *const crate::ffi::c_void, OP)
+}
+#[doc = "Prefetch doublewords"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svprfd)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+# [cfg_attr (test , assert_instr (prfd , OP = { svprfop :: SV_PLDL1KEEP } , T = i64))]
+pub unsafe fn svprfd<const OP: svprfop, T>(pg: svbool_t, base: *const T) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.prf.nxv2i1")]
+        fn _svprfd(pg: svbool2_t, base: *const crate::ffi::c_void, op: svprfop);
+    }
+    _svprfd(pg.sve_into(), base as *const crate::ffi::c_void, OP)
+}
+#[doc = "Prefetch bytes"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svprfb_gather_[s32]offset)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+# [cfg_attr (test , assert_instr (prfb , OP = { svprfop :: SV_PLDL1KEEP } , T = i64))]
+pub unsafe fn svprfb_gather_s32offset<const OP: svprfop, T>(
+    pg: svbool_t,
+    base: *const T,
+    offsets: svint32_t,
+) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.prfb.gather.sxtw.index.nxv4i32"
+        )]
+        fn _svprfb_gather_s32offset(
+            pg: svbool4_t,
+            base: *const crate::ffi::c_void,
+            offsets: svint32_t,
+            op: svprfop,
+        );
+    }
+    _svprfb_gather_s32offset(
+        pg.sve_into(),
+        base as *const crate::ffi::c_void,
+        offsets,
+        OP,
+    )
+}
+#[doc = "Prefetch halfwords"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svprfh_gather_[s32]index)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+# [cfg_attr (test , assert_instr (prfh , OP = { svprfop :: SV_PLDL1KEEP } , T = i64))]
+pub unsafe fn svprfh_gather_s32index<const OP: svprfop, T>(
+    pg: svbool_t,
+    base: *const T,
+    indices: svint32_t,
+) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.prfh.gather.sxtw.index.nxv4i32"
+        )]
+        fn _svprfh_gather_s32index(
+            pg: svbool4_t,
+            base: *const crate::ffi::c_void,
+            indices: svint32_t,
+            op: svprfop,
+        );
+    }
+    _svprfh_gather_s32index(
+        pg.sve_into(),
+        base as *const crate::ffi::c_void,
+        indices,
+        OP,
+    )
+}
+#[doc = "Prefetch words"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svprfw_gather_[s32]index)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+# [cfg_attr (test , assert_instr (prfw , OP = { svprfop :: SV_PLDL1KEEP } , T = i64))]
+pub unsafe fn svprfw_gather_s32index<const OP: svprfop, T>(
+    pg: svbool_t,
+    base: *const T,
+    indices: svint32_t,
+) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.prfw.gather.sxtw.index.nxv4i32"
+        )]
+        fn _svprfw_gather_s32index(
+            pg: svbool4_t,
+            base: *const crate::ffi::c_void,
+            indices: svint32_t,
+            op: svprfop,
+        );
+    }
+    _svprfw_gather_s32index(
+        pg.sve_into(),
+        base as *const crate::ffi::c_void,
+        indices,
+        OP,
+    )
+}
+#[doc = "Prefetch doublewords"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svprfd_gather_[s32]index)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+# [cfg_attr (test , assert_instr (prfd , OP = { svprfop :: SV_PLDL1KEEP } , T = i64))]
+pub unsafe fn svprfd_gather_s32index<const OP: svprfop, T>(
+    pg: svbool_t,
+    base: *const T,
+    indices: svint32_t,
+) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.prfd.gather.sxtw.index.nxv4i32"
+        )]
+        fn _svprfd_gather_s32index(
+            pg: svbool4_t,
+            base: *const crate::ffi::c_void,
+            indices: svint32_t,
+            op: svprfop,
+        );
+    }
+    _svprfd_gather_s32index(
+        pg.sve_into(),
+        base as *const crate::ffi::c_void,
+        indices,
+        OP,
+    )
+}
+#[doc = "Prefetch bytes"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svprfb_gather_[s64]offset)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+# [cfg_attr (test , assert_instr (prfb , OP = { svprfop :: SV_PLDL1KEEP } , T = i64))]
+pub unsafe fn svprfb_gather_s64offset<const OP: svprfop, T>(
+    pg: svbool_t,
+    base: *const T,
+    offsets: svint64_t,
+) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.prfb.gather.index.nxv2i64"
+        )]
+        fn _svprfb_gather_s64offset(
+            pg: svbool2_t,
+            base: *const crate::ffi::c_void,
+            offsets: svint64_t,
+            op: svprfop,
+        );
+    }
+    _svprfb_gather_s64offset(
+        pg.sve_into(),
+        base as *const crate::ffi::c_void,
+        offsets,
+        OP,
+    )
+}
+#[doc = "Prefetch halfwords"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svprfh_gather_[s64]index)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+# [cfg_attr (test , assert_instr (prfh , OP = { svprfop :: SV_PLDL1KEEP } , T = i64))]
+pub unsafe fn svprfh_gather_s64index<const OP: svprfop, T>(
+    pg: svbool_t,
+    base: *const T,
+    indices: svint64_t,
+) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.prfh.gather.index.nxv2i64"
+        )]
+        fn _svprfh_gather_s64index(
+            pg: svbool2_t,
+            base: *const crate::ffi::c_void,
+            indices: svint64_t,
+            op: svprfop,
+        );
+    }
+    _svprfh_gather_s64index(
+        pg.sve_into(),
+        base as *const crate::ffi::c_void,
+        indices,
+        OP,
+    )
+}
+#[doc = "Prefetch words"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svprfw_gather_[s64]index)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+# [cfg_attr (test , assert_instr (prfw , OP = { svprfop :: SV_PLDL1KEEP } , T = i64))]
+pub unsafe fn svprfw_gather_s64index<const OP: svprfop, T>(
+    pg: svbool_t,
+    base: *const T,
+    indices: svint64_t,
+) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.prfw.gather.index.nxv2i64"
+        )]
+        fn _svprfw_gather_s64index(
+            pg: svbool2_t,
+            base: *const crate::ffi::c_void,
+            indices: svint64_t,
+            op: svprfop,
+        );
+    }
+    _svprfw_gather_s64index(
+        pg.sve_into(),
+        base as *const crate::ffi::c_void,
+        indices,
+        OP,
+    )
+}
+#[doc = "Prefetch doublewords"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svprfd_gather_[s64]index)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+# [cfg_attr (test , assert_instr (prfd , OP = { svprfop :: SV_PLDL1KEEP } , T = i64))]
+pub unsafe fn svprfd_gather_s64index<const OP: svprfop, T>(
+    pg: svbool_t,
+    base: *const T,
+    indices: svint64_t,
+) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.prfd.gather.index.nxv2i64"
+        )]
+        fn _svprfd_gather_s64index(
+            pg: svbool2_t,
+            base: *const crate::ffi::c_void,
+            indices: svint64_t,
+            op: svprfop,
+        );
+    }
+    _svprfd_gather_s64index(
+        pg.sve_into(),
+        base as *const crate::ffi::c_void,
+        indices,
+        OP,
+    )
+}
+#[doc = "Prefetch bytes"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svprfb_gather_[u32]offset)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+# [cfg_attr (test , assert_instr (prfb , OP = { svprfop :: SV_PLDL1KEEP } , T = i64))]
+pub unsafe fn svprfb_gather_u32offset<const OP: svprfop, T>(
+    pg: svbool_t,
+    base: *const T,
+    offsets: svuint32_t,
+) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.prfb.gather.uxtw.index.nxv4i32"
+        )]
+        fn _svprfb_gather_u32offset(
+            pg: svbool4_t,
+            base: *const crate::ffi::c_void,
+            offsets: svint32_t,
+            op: svprfop,
+        );
+    }
+    _svprfb_gather_u32offset(
+        pg.sve_into(),
+        base as *const crate::ffi::c_void,
+        offsets.as_signed(),
+        OP,
+    )
+}
+#[doc = "Prefetch halfwords"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svprfh_gather_[u32]index)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+# [cfg_attr (test , assert_instr (prfh , OP = { svprfop :: SV_PLDL1KEEP } , T = i64))]
+pub unsafe fn svprfh_gather_u32index<const OP: svprfop, T>(
+    pg: svbool_t,
+    base: *const T,
+    indices: svuint32_t,
+) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.prfh.gather.uxtw.index.nxv4i32"
+        )]
+        fn _svprfh_gather_u32index(
+            pg: svbool4_t,
+            base: *const crate::ffi::c_void,
+            indices: svint32_t,
+            op: svprfop,
+        );
+    }
+    _svprfh_gather_u32index(
+        pg.sve_into(),
+        base as *const crate::ffi::c_void,
+        indices.as_signed(),
+        OP,
+    )
+}
+#[doc = "Prefetch words"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svprfw_gather_[u32]index)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+# [cfg_attr (test , assert_instr (prfw , OP = { svprfop :: SV_PLDL1KEEP } , T = i64))]
+pub unsafe fn svprfw_gather_u32index<const OP: svprfop, T>(
+    pg: svbool_t,
+    base: *const T,
+    indices: svuint32_t,
+) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.prfw.gather.uxtw.index.nxv4i32"
+        )]
+        fn _svprfw_gather_u32index(
+            pg: svbool4_t,
+            base: *const crate::ffi::c_void,
+            indices: svint32_t,
+            op: svprfop,
+        );
+    }
+    _svprfw_gather_u32index(
+        pg.sve_into(),
+        base as *const crate::ffi::c_void,
+        indices.as_signed(),
+        OP,
+    )
+}
+#[doc = "Prefetch doublewords"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svprfd_gather_[u32]index)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+# [cfg_attr (test , assert_instr (prfd , OP = { svprfop :: SV_PLDL1KEEP } , T = i64))]
+pub unsafe fn svprfd_gather_u32index<const OP: svprfop, T>(
+    pg: svbool_t,
+    base: *const T,
+    indices: svuint32_t,
+) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.prfd.gather.uxtw.index.nxv4i32"
+        )]
+        fn _svprfd_gather_u32index(
+            pg: svbool4_t,
+            base: *const crate::ffi::c_void,
+            indices: svint32_t,
+            op: svprfop,
+        );
+    }
+    _svprfd_gather_u32index(
+        pg.sve_into(),
+        base as *const crate::ffi::c_void,
+        indices.as_signed(),
+        OP,
+    )
+}
+#[doc = "Prefetch bytes"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svprfb_gather_[u64]offset)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+# [cfg_attr (test , assert_instr (prfb , OP = { svprfop :: SV_PLDL1KEEP } , T = i64))]
+pub unsafe fn svprfb_gather_u64offset<const OP: svprfop, T>(
+    pg: svbool_t,
+    base: *const T,
+    offsets: svuint64_t,
+) {
+    svprfb_gather_s64offset::<OP, T>(pg, base, offsets.as_signed())
+}
+#[doc = "Prefetch halfwords"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svprfh_gather_[u64]index)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+# [cfg_attr (test , assert_instr (prfh , OP = { svprfop :: SV_PLDL1KEEP } , T = i64))]
+pub unsafe fn svprfh_gather_u64index<const OP: svprfop, T>(
+    pg: svbool_t,
+    base: *const T,
+    indices: svuint64_t,
+) {
+    svprfh_gather_s64index::<OP, T>(pg, base, indices.as_signed())
+}
+#[doc = "Prefetch words"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svprfw_gather_[u64]index)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+# [cfg_attr (test , assert_instr (prfw , OP = { svprfop :: SV_PLDL1KEEP } , T = i64))]
+pub unsafe fn svprfw_gather_u64index<const OP: svprfop, T>(
+    pg: svbool_t,
+    base: *const T,
+    indices: svuint64_t,
+) {
+    svprfw_gather_s64index::<OP, T>(pg, base, indices.as_signed())
+}
+#[doc = "Prefetch doublewords"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svprfd_gather_[u64]index)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+# [cfg_attr (test , assert_instr (prfd , OP = { svprfop :: SV_PLDL1KEEP } , T = i64))]
+pub unsafe fn svprfd_gather_u64index<const OP: svprfop, T>(
+    pg: svbool_t,
+    base: *const T,
+    indices: svuint64_t,
+) {
+    svprfd_gather_s64index::<OP, T>(pg, base, indices.as_signed())
+}
+#[doc = "Prefetch bytes"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svprfb_gather[_u32base])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+# [cfg_attr (test , assert_instr (prfb , OP = { svprfop :: SV_PLDL1KEEP }))]
+pub unsafe fn svprfb_gather_u32base<const OP: svprfop>(pg: svbool_t, bases: svuint32_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.prfb.gather.scalar.offset.nxv4i32"
+        )]
+        fn _svprfb_gather_u32base(pg: svbool4_t, bases: svint32_t, index: i64, op: svprfop);
+    }
+    _svprfb_gather_u32base(pg.sve_into(), bases.as_signed(), 0, OP)
+}
+#[doc = "Prefetch halfwords"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svprfh_gather[_u32base])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+# [cfg_attr (test , assert_instr (prfh , OP = { svprfop :: SV_PLDL1KEEP }))]
+pub unsafe fn svprfh_gather_u32base<const OP: svprfop>(pg: svbool_t, bases: svuint32_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.prfh.gather.scalar.offset.nxv4i32"
+        )]
+        fn _svprfh_gather_u32base(pg: svbool4_t, bases: svint32_t, index: i64, op: svprfop);
+    }
+    _svprfh_gather_u32base(pg.sve_into(), bases.as_signed(), 0, OP)
+}
+#[doc = "Prefetch words"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svprfw_gather[_u32base])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+# [cfg_attr (test , assert_instr (prfw , OP = { svprfop :: SV_PLDL1KEEP }))]
+pub unsafe fn svprfw_gather_u32base<const OP: svprfop>(pg: svbool_t, bases: svuint32_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.prfw.gather.scalar.offset.nxv4i32"
+        )]
+        fn _svprfw_gather_u32base(pg: svbool4_t, bases: svint32_t, index: i64, op: svprfop);
+    }
+    _svprfw_gather_u32base(pg.sve_into(), bases.as_signed(), 0, OP)
+}
+#[doc = "Prefetch doublewords"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svprfd_gather[_u32base])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+# [cfg_attr (test , assert_instr (prfd , OP = { svprfop :: SV_PLDL1KEEP }))]
+pub unsafe fn svprfd_gather_u32base<const OP: svprfop>(pg: svbool_t, bases: svuint32_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.prfd.gather.scalar.offset.nxv4i32"
+        )]
+        fn _svprfd_gather_u32base(pg: svbool4_t, bases: svint32_t, index: i64, op: svprfop);
+    }
+    _svprfd_gather_u32base(pg.sve_into(), bases.as_signed(), 0, OP)
+}
+#[doc = "Prefetch bytes"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svprfb_gather[_u64base])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+# [cfg_attr (test , assert_instr (prfb , OP = { svprfop :: SV_PLDL1KEEP }))]
+pub unsafe fn svprfb_gather_u64base<const OP: svprfop>(pg: svbool_t, bases: svuint64_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.prfb.gather.scalar.offset.nxv2i64"
+        )]
+        fn _svprfb_gather_u64base(pg: svbool2_t, bases: svint64_t, index: i64, op: svprfop);
+    }
+    _svprfb_gather_u64base(pg.sve_into(), bases.as_signed(), 0, OP)
+}
+#[doc = "Prefetch halfwords"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svprfh_gather[_u64base])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+# [cfg_attr (test , assert_instr (prfh , OP = { svprfop :: SV_PLDL1KEEP }))]
+pub unsafe fn svprfh_gather_u64base<const OP: svprfop>(pg: svbool_t, bases: svuint64_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.prfh.gather.scalar.offset.nxv2i64"
+        )]
+        fn _svprfh_gather_u64base(pg: svbool2_t, bases: svint64_t, index: i64, op: svprfop);
+    }
+    _svprfh_gather_u64base(pg.sve_into(), bases.as_signed(), 0, OP)
+}
+#[doc = "Prefetch words"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svprfw_gather[_u64base])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+# [cfg_attr (test , assert_instr (prfw , OP = { svprfop :: SV_PLDL1KEEP }))]
+pub unsafe fn svprfw_gather_u64base<const OP: svprfop>(pg: svbool_t, bases: svuint64_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.prfw.gather.scalar.offset.nxv2i64"
+        )]
+        fn _svprfw_gather_u64base(pg: svbool2_t, bases: svint64_t, index: i64, op: svprfop);
+    }
+    _svprfw_gather_u64base(pg.sve_into(), bases.as_signed(), 0, OP)
+}
+#[doc = "Prefetch doublewords"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svprfd_gather[_u64base])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+# [cfg_attr (test , assert_instr (prfd , OP = { svprfop :: SV_PLDL1KEEP }))]
+pub unsafe fn svprfd_gather_u64base<const OP: svprfop>(pg: svbool_t, bases: svuint64_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.prfd.gather.scalar.offset.nxv2i64"
+        )]
+        fn _svprfd_gather_u64base(pg: svbool2_t, bases: svint64_t, index: i64, op: svprfop);
+    }
+    _svprfd_gather_u64base(pg.sve_into(), bases.as_signed(), 0, OP)
+}
+#[doc = "Prefetch bytes"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svprfb_gather[_u32base]_offset)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+# [cfg_attr (test , assert_instr (prfb , OP = { svprfop :: SV_PLDL1KEEP }))]
+pub unsafe fn svprfb_gather_u32base_offset<const OP: svprfop>(
+    pg: svbool_t,
+    bases: svuint32_t,
+    offset: i64,
+) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.prfb.gather.scalar.offset.nxv4i32"
+        )]
+        fn _svprfb_gather_u32base_offset(pg: svbool4_t, bases: svint32_t, offset: i64, op: svprfop);
+    }
+    _svprfb_gather_u32base_offset(pg.sve_into(), bases.as_signed(), offset, OP)
+}
+#[doc = "Prefetch halfwords"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svprfh_gather[_u32base]_index)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+# [cfg_attr (test , assert_instr (prfb , OP = { svprfop :: SV_PLDL1KEEP }))]
+pub unsafe fn svprfh_gather_u32base_index<const OP: svprfop>(
+    pg: svbool_t,
+    bases: svuint32_t,
+    index: i64,
+) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.prfh.gather.scalar.offset.nxv4i32"
+        )]
+        fn _svprfh_gather_u32base_index(pg: svbool4_t, bases: svint32_t, index: i64, op: svprfop);
+    }
+    _svprfh_gather_u32base_index(pg.sve_into(), bases.as_signed(), index.unchecked_shl(1), OP)
+}
+#[doc = "Prefetch words"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svprfw_gather[_u32base]_index)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+# [cfg_attr (test , assert_instr (prfb , OP = { svprfop :: SV_PLDL1KEEP }))]
+pub unsafe fn svprfw_gather_u32base_index<const OP: svprfop>(
+    pg: svbool_t,
+    bases: svuint32_t,
+    index: i64,
+) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.prfw.gather.scalar.offset.nxv4i32"
+        )]
+        fn _svprfw_gather_u32base_index(pg: svbool4_t, bases: svint32_t, index: i64, op: svprfop);
+    }
+    _svprfw_gather_u32base_index(pg.sve_into(), bases.as_signed(), index.unchecked_shl(2), OP)
+}
+#[doc = "Prefetch doublewords"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svprfd_gather[_u32base]_index)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+# [cfg_attr (test , assert_instr (prfb , OP = { svprfop :: SV_PLDL1KEEP }))]
+pub unsafe fn svprfd_gather_u32base_index<const OP: svprfop>(
+    pg: svbool_t,
+    bases: svuint32_t,
+    index: i64,
+) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.prfd.gather.scalar.offset.nxv4i32"
+        )]
+        fn _svprfd_gather_u32base_index(pg: svbool4_t, bases: svint32_t, index: i64, op: svprfop);
+    }
+    _svprfd_gather_u32base_index(pg.sve_into(), bases.as_signed(), index.unchecked_shl(3), OP)
+}
+#[doc = "Prefetch bytes"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svprfb_gather[_u64base]_offset)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+# [cfg_attr (test , assert_instr (prfb , OP = { svprfop :: SV_PLDL1KEEP }))]
+pub unsafe fn svprfb_gather_u64base_offset<const OP: svprfop>(
+    pg: svbool_t,
+    bases: svuint64_t,
+    offset: i64,
+) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.prfb.gather.scalar.offset.nxv2i64"
+        )]
+        fn _svprfb_gather_u64base_offset(pg: svbool2_t, bases: svint64_t, offset: i64, op: svprfop);
+    }
+    _svprfb_gather_u64base_offset(pg.sve_into(), bases.as_signed(), offset, OP)
+}
+#[doc = "Prefetch halfwords"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svprfh_gather[_u64base]_index)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+# [cfg_attr (test , assert_instr (prfb , OP = { svprfop :: SV_PLDL1KEEP }))]
+pub unsafe fn svprfh_gather_u64base_index<const OP: svprfop>(
+    pg: svbool_t,
+    bases: svuint64_t,
+    index: i64,
+) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.prfh.gather.scalar.offset.nxv2i64"
+        )]
+        fn _svprfh_gather_u64base_index(pg: svbool2_t, bases: svint64_t, index: i64, op: svprfop);
+    }
+    _svprfh_gather_u64base_index(pg.sve_into(), bases.as_signed(), index.unchecked_shl(1), OP)
+}
+#[doc = "Prefetch words"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svprfw_gather[_u64base]_index)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+# [cfg_attr (test , assert_instr (prfb , OP = { svprfop :: SV_PLDL1KEEP }))]
+pub unsafe fn svprfw_gather_u64base_index<const OP: svprfop>(
+    pg: svbool_t,
+    bases: svuint64_t,
+    index: i64,
+) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.prfw.gather.scalar.offset.nxv2i64"
+        )]
+        fn _svprfw_gather_u64base_index(pg: svbool2_t, bases: svint64_t, index: i64, op: svprfop);
+    }
+    _svprfw_gather_u64base_index(pg.sve_into(), bases.as_signed(), index.unchecked_shl(2), OP)
+}
+#[doc = "Prefetch doublewords"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svprfd_gather[_u64base]_index)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+# [cfg_attr (test , assert_instr (prfb , OP = { svprfop :: SV_PLDL1KEEP }))]
+pub unsafe fn svprfd_gather_u64base_index<const OP: svprfop>(
+    pg: svbool_t,
+    bases: svuint64_t,
+    index: i64,
+) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.prfd.gather.scalar.offset.nxv2i64"
+        )]
+        fn _svprfd_gather_u64base_index(pg: svbool2_t, bases: svint64_t, index: i64, op: svprfop);
+    }
+    _svprfd_gather_u64base_index(pg.sve_into(), bases.as_signed(), index.unchecked_shl(3), OP)
+}
+#[doc = "Prefetch bytes"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svprfb_vnum)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+# [cfg_attr (test , assert_instr (prfb , OP = { svprfop :: SV_PLDL1KEEP } , T = i64))]
+pub unsafe fn svprfb_vnum<const OP: svprfop, T>(pg: svbool_t, base: *const T, vnum: i64) {
+    svprfb::<OP, _>(pg, base.offset(svcntb() as isize * vnum as isize))
+}
+#[doc = "Prefetch halfwords"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svprfh_vnum)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+# [cfg_attr (test , assert_instr (prfh , OP = { svprfop :: SV_PLDL1KEEP } , T = i64))]
+pub unsafe fn svprfh_vnum<const OP: svprfop, T>(pg: svbool_t, base: *const T, vnum: i64) {
+    svprfh::<OP, _>(pg, base.offset(svcnth() as isize * vnum as isize))
+}
+#[doc = "Prefetch words"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svprfw_vnum)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+# [cfg_attr (test , assert_instr (prfw , OP = { svprfop :: SV_PLDL1KEEP } , T = i64))]
+pub unsafe fn svprfw_vnum<const OP: svprfop, T>(pg: svbool_t, base: *const T, vnum: i64) {
+    svprfw::<OP, _>(pg, base.offset(svcntw() as isize * vnum as isize))
+}
+#[doc = "Prefetch doublewords"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svprfd_vnum)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+# [cfg_attr (test , assert_instr (prfd , OP = { svprfop :: SV_PLDL1KEEP } , T = i64))]
+pub unsafe fn svprfd_vnum<const OP: svprfop, T>(pg: svbool_t, base: *const T, vnum: i64) {
+    svprfd::<OP, _>(pg, base.offset(svcntd() as isize * vnum as isize))
+}
+#[doc = "Test whether any active element is true"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svptest_any)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ptest))]
+pub fn svptest_any(pg: svbool_t, op: svbool_t) -> bool {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ptest.any.nxv16i1"
+        )]
+        fn _svptest_any(pg: svbool_t, op: svbool_t) -> bool;
+    }
+    unsafe { _svptest_any(pg, op) }
+}
+#[doc = "Test whether first active element is true"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svptest_first)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ptest))]
+pub fn svptest_first(pg: svbool_t, op: svbool_t) -> bool {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ptest.first.nxv16i1"
+        )]
+        fn _svptest_first(pg: svbool_t, op: svbool_t) -> bool;
+    }
+    unsafe { _svptest_first(pg, op) }
+}
+#[doc = "Test whether last active element is true"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svptest_last)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ptest))]
+pub fn svptest_last(pg: svbool_t, op: svbool_t) -> bool {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ptest.last.nxv16i1"
+        )]
+        fn _svptest_last(pg: svbool_t, op: svbool_t) -> bool;
+    }
+    unsafe { _svptest_last(pg, op) }
+}
+#[doc = "Set predicate elements to true"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svptrue_b8)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ptrue))]
+pub fn svptrue_b8() -> svbool_t {
+    svptrue_pat_b8::<{ svpattern::SV_ALL }>()
+}
+#[doc = "Set predicate elements to true"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svptrue_b16)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ptrue))]
+pub fn svptrue_b16() -> svbool_t {
+    svptrue_pat_b16::<{ svpattern::SV_ALL }>()
+}
+#[doc = "Set predicate elements to true"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svptrue_b32)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ptrue))]
+pub fn svptrue_b32() -> svbool_t {
+    svptrue_pat_b32::<{ svpattern::SV_ALL }>()
+}
+#[doc = "Set predicate elements to true"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svptrue_b64)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ptrue))]
+pub fn svptrue_b64() -> svbool_t {
+    svptrue_pat_b64::<{ svpattern::SV_ALL }>()
+}
+#[doc = "Set predicate elements to true"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svptrue_pat_b8)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+# [cfg_attr (test , assert_instr (ptrue , PATTERN = { svpattern :: SV_ALL }))]
+pub fn svptrue_pat_b8<const PATTERN: svpattern>() -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ptrue.nxv16i1")]
+        fn _svptrue_pat_b8(pattern: svpattern) -> svbool_t;
+    }
+    unsafe { _svptrue_pat_b8(PATTERN) }
+}
+#[doc = "Set predicate elements to true"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svptrue_pat_b16)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+# [cfg_attr (test , assert_instr (ptrue , PATTERN = { svpattern :: SV_ALL }))]
+pub fn svptrue_pat_b16<const PATTERN: svpattern>() -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ptrue.nxv8i1")]
+        fn _svptrue_pat_b16(pattern: svpattern) -> svbool8_t;
+    }
+    unsafe { _svptrue_pat_b16(PATTERN).sve_into() }
+}
+#[doc = "Set predicate elements to true"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svptrue_pat_b32)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+# [cfg_attr (test , assert_instr (ptrue , PATTERN = { svpattern :: SV_ALL }))]
+pub fn svptrue_pat_b32<const PATTERN: svpattern>() -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ptrue.nxv4i1")]
+        fn _svptrue_pat_b32(pattern: svpattern) -> svbool4_t;
+    }
+    unsafe { _svptrue_pat_b32(PATTERN).sve_into() }
+}
+#[doc = "Set predicate elements to true"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svptrue_pat_b64)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+# [cfg_attr (test , assert_instr (ptrue , PATTERN = { svpattern :: SV_ALL }))]
+pub fn svptrue_pat_b64<const PATTERN: svpattern>() -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ptrue.nxv2i1")]
+        fn _svptrue_pat_b64(pattern: svpattern) -> svbool2_t;
+    }
+    unsafe { _svptrue_pat_b64(PATTERN).sve_into() }
+}
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqadd[_s8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqadd))]
+pub fn svqadd_s8(op1: svint8_t, op2: svint8_t) -> svint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqadd.x.nxv16i8"
+        )]
+        fn _svqadd_s8(op1: svint8_t, op2: svint8_t) -> svint8_t;
+    }
+    unsafe { _svqadd_s8(op1, op2) }
+}
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqadd[_n_s8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqadd))]
+pub fn svqadd_n_s8(op1: svint8_t, op2: i8) -> svint8_t {
+    svqadd_s8(op1, svdup_n_s8(op2))
+}
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqadd[_s16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqadd))]
+pub fn svqadd_s16(op1: svint16_t, op2: svint16_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqadd.x.nxv8i16"
+        )]
+        fn _svqadd_s16(op1: svint16_t, op2: svint16_t) -> svint16_t;
+    }
+    unsafe { _svqadd_s16(op1, op2) }
+}
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqadd[_n_s16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqadd))]
+pub fn svqadd_n_s16(op1: svint16_t, op2: i16) -> svint16_t {
+    svqadd_s16(op1, svdup_n_s16(op2))
+}
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqadd[_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqadd))]
+pub fn svqadd_s32(op1: svint32_t, op2: svint32_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqadd.x.nxv4i32"
+        )]
+        fn _svqadd_s32(op1: svint32_t, op2: svint32_t) -> svint32_t;
+    }
+    unsafe { _svqadd_s32(op1, op2) }
+}
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqadd[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqadd))]
+pub fn svqadd_n_s32(op1: svint32_t, op2: i32) -> svint32_t {
+    svqadd_s32(op1, svdup_n_s32(op2))
+}
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqadd[_s64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqadd))]
+pub fn svqadd_s64(op1: svint64_t, op2: svint64_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqadd.x.nxv2i64"
+        )]
+        fn _svqadd_s64(op1: svint64_t, op2: svint64_t) -> svint64_t;
+    }
+    unsafe { _svqadd_s64(op1, op2) }
+}
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqadd[_n_s64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqadd))]
+pub fn svqadd_n_s64(op1: svint64_t, op2: i64) -> svint64_t {
+    svqadd_s64(op1, svdup_n_s64(op2))
+}
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqadd[_u8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqadd))]
+pub fn svqadd_u8(op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.uqadd.x.nxv16i8"
+        )]
+        fn _svqadd_u8(op1: svint8_t, op2: svint8_t) -> svint8_t;
+    }
+    unsafe { _svqadd_u8(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqadd[_n_u8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqadd))]
+pub fn svqadd_n_u8(op1: svuint8_t, op2: u8) -> svuint8_t {
+    svqadd_u8(op1, svdup_n_u8(op2))
+}
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqadd[_u16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqadd))]
+pub fn svqadd_u16(op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.uqadd.x.nxv8i16"
+        )]
+        fn _svqadd_u16(op1: svint16_t, op2: svint16_t) -> svint16_t;
+    }
+    unsafe { _svqadd_u16(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqadd[_n_u16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqadd))]
+pub fn svqadd_n_u16(op1: svuint16_t, op2: u16) -> svuint16_t {
+    svqadd_u16(op1, svdup_n_u16(op2))
+}
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqadd[_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqadd))]
+pub fn svqadd_u32(op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.uqadd.x.nxv4i32"
+        )]
+        fn _svqadd_u32(op1: svint32_t, op2: svint32_t) -> svint32_t;
+    }
+    unsafe { _svqadd_u32(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqadd[_n_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqadd))]
+pub fn svqadd_n_u32(op1: svuint32_t, op2: u32) -> svuint32_t {
+    svqadd_u32(op1, svdup_n_u32(op2))
+}
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqadd[_u64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqadd))]
+pub fn svqadd_u64(op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.uqadd.x.nxv2i64"
+        )]
+        fn _svqadd_u64(op1: svint64_t, op2: svint64_t) -> svint64_t;
+    }
+    unsafe { _svqadd_u64(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqadd[_n_u64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqadd))]
+pub fn svqadd_n_u64(op1: svuint64_t, op2: u64) -> svuint64_t {
+    svqadd_u64(op1, svdup_n_u64(op2))
+}
+#[doc = "Saturating decrement by number of byte elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdecb[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqdecb, IMM_FACTOR = 1))]
+pub fn svqdecb_n_s32<const IMM_FACTOR: i32>(op: i32) -> i32 {
+    svqdecb_pat_n_s32::<{ svpattern::SV_ALL }, IMM_FACTOR>(op)
+}
+#[doc = "Saturating decrement by number of halfword elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdech[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqdech, IMM_FACTOR = 1))]
+pub fn svqdech_n_s32<const IMM_FACTOR: i32>(op: i32) -> i32 {
+    svqdech_pat_n_s32::<{ svpattern::SV_ALL }, IMM_FACTOR>(op)
+}
+#[doc = "Saturating decrement by number of word elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdecw[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqdecw, IMM_FACTOR = 1))]
+pub fn svqdecw_n_s32<const IMM_FACTOR: i32>(op: i32) -> i32 {
+    svqdecw_pat_n_s32::<{ svpattern::SV_ALL }, IMM_FACTOR>(op)
+}
+#[doc = "Saturating decrement by number of doubleword elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdecd[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqdecd, IMM_FACTOR = 1))]
+pub fn svqdecd_n_s32<const IMM_FACTOR: i32>(op: i32) -> i32 {
+    svqdecd_pat_n_s32::<{ svpattern::SV_ALL }, IMM_FACTOR>(op)
+}
+#[doc = "Saturating decrement by number of byte elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdecb[_n_s64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqdecb, IMM_FACTOR = 1))]
+pub fn svqdecb_n_s64<const IMM_FACTOR: i32>(op: i64) -> i64 {
+    svqdecb_pat_n_s64::<{ svpattern::SV_ALL }, IMM_FACTOR>(op)
+}
+#[doc = "Saturating decrement by number of halfword elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdech[_n_s64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqdech, IMM_FACTOR = 1))]
+pub fn svqdech_n_s64<const IMM_FACTOR: i32>(op: i64) -> i64 {
+    svqdech_pat_n_s64::<{ svpattern::SV_ALL }, IMM_FACTOR>(op)
+}
+#[doc = "Saturating decrement by number of word elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdecw[_n_s64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqdecw, IMM_FACTOR = 1))]
+pub fn svqdecw_n_s64<const IMM_FACTOR: i32>(op: i64) -> i64 {
+    svqdecw_pat_n_s64::<{ svpattern::SV_ALL }, IMM_FACTOR>(op)
+}
+#[doc = "Saturating decrement by number of doubleword elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdecd[_n_s64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqdecd, IMM_FACTOR = 1))]
+pub fn svqdecd_n_s64<const IMM_FACTOR: i32>(op: i64) -> i64 {
+    svqdecd_pat_n_s64::<{ svpattern::SV_ALL }, IMM_FACTOR>(op)
+}
+#[doc = "Saturating decrement by number of byte elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdecb[_n_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqdecb, IMM_FACTOR = 1))]
+pub fn svqdecb_n_u32<const IMM_FACTOR: i32>(op: u32) -> u32 {
+    svqdecb_pat_n_u32::<{ svpattern::SV_ALL }, IMM_FACTOR>(op)
+}
+#[doc = "Saturating decrement by number of halfword elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdech[_n_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqdech, IMM_FACTOR = 1))]
+pub fn svqdech_n_u32<const IMM_FACTOR: i32>(op: u32) -> u32 {
+    svqdech_pat_n_u32::<{ svpattern::SV_ALL }, IMM_FACTOR>(op)
+}
+#[doc = "Saturating decrement by number of word elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdecw[_n_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqdecw, IMM_FACTOR = 1))]
+pub fn svqdecw_n_u32<const IMM_FACTOR: i32>(op: u32) -> u32 {
+    svqdecw_pat_n_u32::<{ svpattern::SV_ALL }, IMM_FACTOR>(op)
+}
+#[doc = "Saturating decrement by number of doubleword elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdecd[_n_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqdecd, IMM_FACTOR = 1))]
+pub fn svqdecd_n_u32<const IMM_FACTOR: i32>(op: u32) -> u32 {
+    svqdecd_pat_n_u32::<{ svpattern::SV_ALL }, IMM_FACTOR>(op)
+}
+#[doc = "Saturating decrement by number of byte elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdecb[_n_u64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqdecb, IMM_FACTOR = 1))]
+pub fn svqdecb_n_u64<const IMM_FACTOR: i32>(op: u64) -> u64 {
+    svqdecb_pat_n_u64::<{ svpattern::SV_ALL }, IMM_FACTOR>(op)
+}
+#[doc = "Saturating decrement by number of halfword elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdech[_n_u64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqdech, IMM_FACTOR = 1))]
+pub fn svqdech_n_u64<const IMM_FACTOR: i32>(op: u64) -> u64 {
+    svqdech_pat_n_u64::<{ svpattern::SV_ALL }, IMM_FACTOR>(op)
+}
+#[doc = "Saturating decrement by number of word elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdecw[_n_u64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqdecw, IMM_FACTOR = 1))]
+pub fn svqdecw_n_u64<const IMM_FACTOR: i32>(op: u64) -> u64 {
+    svqdecw_pat_n_u64::<{ svpattern::SV_ALL }, IMM_FACTOR>(op)
+}
+#[doc = "Saturating decrement by number of doubleword elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdecd[_n_u64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqdecd, IMM_FACTOR = 1))]
+pub fn svqdecd_n_u64<const IMM_FACTOR: i32>(op: u64) -> u64 {
+    svqdecd_pat_n_u64::<{ svpattern::SV_ALL }, IMM_FACTOR>(op)
+}
+#[doc = "Saturating decrement by number of byte elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdecb_pat[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+# [cfg_attr (test , assert_instr (sqdecb , PATTERN = { svpattern :: SV_ALL } , IMM_FACTOR = 1))]
+pub fn svqdecb_pat_n_s32<const PATTERN: svpattern, const IMM_FACTOR: i32>(op: i32) -> i32 {
+    static_assert_range!(IMM_FACTOR, 1..=16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sqdecb.n32")]
+        fn _svqdecb_pat_n_s32(op: i32, pattern: svpattern, imm_factor: i32) -> i32;
+    }
+    unsafe { _svqdecb_pat_n_s32(op, PATTERN, IMM_FACTOR) }
+}
+#[doc = "Saturating decrement by number of halfword elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdech_pat[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+# [cfg_attr (test , assert_instr (sqdech , PATTERN = { svpattern :: SV_ALL } , IMM_FACTOR = 1))]
+pub fn svqdech_pat_n_s32<const PATTERN: svpattern, const IMM_FACTOR: i32>(op: i32) -> i32 {
+    static_assert_range!(IMM_FACTOR, 1..=16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sqdech.n32")]
+        fn _svqdech_pat_n_s32(op: i32, pattern: svpattern, imm_factor: i32) -> i32;
+    }
+    unsafe { _svqdech_pat_n_s32(op, PATTERN, IMM_FACTOR) }
+}
+#[doc = "Saturating decrement by number of word elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdecw_pat[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+# [cfg_attr (test , assert_instr (sqdecw , PATTERN = { svpattern :: SV_ALL } , IMM_FACTOR = 1))]
+pub fn svqdecw_pat_n_s32<const PATTERN: svpattern, const IMM_FACTOR: i32>(op: i32) -> i32 {
+    static_assert_range!(IMM_FACTOR, 1..=16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sqdecw.n32")]
+        fn _svqdecw_pat_n_s32(op: i32, pattern: svpattern, imm_factor: i32) -> i32;
+    }
+    unsafe { _svqdecw_pat_n_s32(op, PATTERN, IMM_FACTOR) }
+}
+#[doc = "Saturating decrement by number of doubleword elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdecd_pat[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+# [cfg_attr (test , assert_instr (sqdecd , PATTERN = { svpattern :: SV_ALL } , IMM_FACTOR = 1))]
+pub fn svqdecd_pat_n_s32<const PATTERN: svpattern, const IMM_FACTOR: i32>(op: i32) -> i32 {
+    static_assert_range!(IMM_FACTOR, 1..=16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sqdecd.n32")]
+        fn _svqdecd_pat_n_s32(op: i32, pattern: svpattern, imm_factor: i32) -> i32;
+    }
+    unsafe { _svqdecd_pat_n_s32(op, PATTERN, IMM_FACTOR) }
+}
+#[doc = "Saturating decrement by number of byte elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdecb_pat[_n_s64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+# [cfg_attr (test , assert_instr (sqdecb , PATTERN = { svpattern :: SV_ALL } , IMM_FACTOR = 1))]
+pub fn svqdecb_pat_n_s64<const PATTERN: svpattern, const IMM_FACTOR: i32>(op: i64) -> i64 {
+    static_assert_range!(IMM_FACTOR, 1..=16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sqdecb.n64")]
+        fn _svqdecb_pat_n_s64(op: i64, pattern: svpattern, imm_factor: i32) -> i64;
+    }
+    unsafe { _svqdecb_pat_n_s64(op, PATTERN, IMM_FACTOR) }
+}
+#[doc = "Saturating decrement by number of halfword elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdech_pat[_n_s64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+# [cfg_attr (test , assert_instr (sqdech , PATTERN = { svpattern :: SV_ALL } , IMM_FACTOR = 1))]
+pub fn svqdech_pat_n_s64<const PATTERN: svpattern, const IMM_FACTOR: i32>(op: i64) -> i64 {
+    static_assert_range!(IMM_FACTOR, 1..=16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sqdech.n64")]
+        fn _svqdech_pat_n_s64(op: i64, pattern: svpattern, imm_factor: i32) -> i64;
+    }
+    unsafe { _svqdech_pat_n_s64(op, PATTERN, IMM_FACTOR) }
+}
+#[doc = "Saturating decrement by number of word elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdecw_pat[_n_s64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+# [cfg_attr (test , assert_instr (sqdecw , PATTERN = { svpattern :: SV_ALL } , IMM_FACTOR = 1))]
+pub fn svqdecw_pat_n_s64<const PATTERN: svpattern, const IMM_FACTOR: i32>(op: i64) -> i64 {
+    static_assert_range!(IMM_FACTOR, 1..=16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sqdecw.n64")]
+        fn _svqdecw_pat_n_s64(op: i64, pattern: svpattern, imm_factor: i32) -> i64;
+    }
+    unsafe { _svqdecw_pat_n_s64(op, PATTERN, IMM_FACTOR) }
+}
+#[doc = "Saturating decrement by number of doubleword elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdecd_pat[_n_s64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+# [cfg_attr (test , assert_instr (sqdecd , PATTERN = { svpattern :: SV_ALL } , IMM_FACTOR = 1))]
+pub fn svqdecd_pat_n_s64<const PATTERN: svpattern, const IMM_FACTOR: i32>(op: i64) -> i64 {
+    static_assert_range!(IMM_FACTOR, 1..=16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sqdecd.n64")]
+        fn _svqdecd_pat_n_s64(op: i64, pattern: svpattern, imm_factor: i32) -> i64;
+    }
+    unsafe { _svqdecd_pat_n_s64(op, PATTERN, IMM_FACTOR) }
+}
+#[doc = "Saturating decrement by number of byte elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdecb_pat[_n_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+# [cfg_attr (test , assert_instr (uqdecb , PATTERN = { svpattern :: SV_ALL } , IMM_FACTOR = 1))]
+pub fn svqdecb_pat_n_u32<const PATTERN: svpattern, const IMM_FACTOR: i32>(op: u32) -> u32 {
+    static_assert_range!(IMM_FACTOR, 1..=16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uqdecb.n32")]
+        fn _svqdecb_pat_n_u32(op: i32, pattern: svpattern, imm_factor: i32) -> i32;
+    }
+    unsafe { _svqdecb_pat_n_u32(op.as_signed(), PATTERN, IMM_FACTOR).as_unsigned() }
+}
+#[doc = "Saturating decrement by number of halfword elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdech_pat[_n_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+# [cfg_attr (test , assert_instr (uqdech , PATTERN = { svpattern :: SV_ALL } , IMM_FACTOR = 1))]
+pub fn svqdech_pat_n_u32<const PATTERN: svpattern, const IMM_FACTOR: i32>(op: u32) -> u32 {
+    static_assert_range!(IMM_FACTOR, 1..=16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uqdech.n32")]
+        fn _svqdech_pat_n_u32(op: i32, pattern: svpattern, imm_factor: i32) -> i32;
+    }
+    unsafe { _svqdech_pat_n_u32(op.as_signed(), PATTERN, IMM_FACTOR).as_unsigned() }
+}
+#[doc = "Saturating decrement by number of word elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdecw_pat[_n_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+# [cfg_attr (test , assert_instr (uqdecw , PATTERN = { svpattern :: SV_ALL } , IMM_FACTOR = 1))]
+pub fn svqdecw_pat_n_u32<const PATTERN: svpattern, const IMM_FACTOR: i32>(op: u32) -> u32 {
+    static_assert_range!(IMM_FACTOR, 1..=16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uqdecw.n32")]
+        fn _svqdecw_pat_n_u32(op: i32, pattern: svpattern, imm_factor: i32) -> i32;
+    }
+    unsafe { _svqdecw_pat_n_u32(op.as_signed(), PATTERN, IMM_FACTOR).as_unsigned() }
+}
+#[doc = "Saturating decrement by number of doubleword elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdecd_pat[_n_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+# [cfg_attr (test , assert_instr (uqdecd , PATTERN = { svpattern :: SV_ALL } , IMM_FACTOR = 1))]
+pub fn svqdecd_pat_n_u32<const PATTERN: svpattern, const IMM_FACTOR: i32>(op: u32) -> u32 {
+    static_assert_range!(IMM_FACTOR, 1..=16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uqdecd.n32")]
+        fn _svqdecd_pat_n_u32(op: i32, pattern: svpattern, imm_factor: i32) -> i32;
+    }
+    unsafe { _svqdecd_pat_n_u32(op.as_signed(), PATTERN, IMM_FACTOR).as_unsigned() }
+}
+#[doc = "Saturating decrement by number of byte elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdecb_pat[_n_u64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+# [cfg_attr (test , assert_instr (uqdecb , PATTERN = { svpattern :: SV_ALL } , IMM_FACTOR = 1))]
+pub fn svqdecb_pat_n_u64<const PATTERN: svpattern, const IMM_FACTOR: i32>(op: u64) -> u64 {
+    static_assert_range!(IMM_FACTOR, 1..=16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uqdecb.n64")]
+        fn _svqdecb_pat_n_u64(op: i64, pattern: svpattern, imm_factor: i32) -> i64;
+    }
+    unsafe { _svqdecb_pat_n_u64(op.as_signed(), PATTERN, IMM_FACTOR).as_unsigned() }
+}
+#[doc = "Saturating decrement by number of halfword elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdech_pat[_n_u64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+# [cfg_attr (test , assert_instr (uqdech , PATTERN = { svpattern :: SV_ALL } , IMM_FACTOR = 1))]
+pub fn svqdech_pat_n_u64<const PATTERN: svpattern, const IMM_FACTOR: i32>(op: u64) -> u64 {
+    static_assert_range!(IMM_FACTOR, 1..=16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uqdech.n64")]
+        fn _svqdech_pat_n_u64(op: i64, pattern: svpattern, imm_factor: i32) -> i64;
+    }
+    unsafe { _svqdech_pat_n_u64(op.as_signed(), PATTERN, IMM_FACTOR).as_unsigned() }
+}
+#[doc = "Saturating decrement by number of word elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdecw_pat[_n_u64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+# [cfg_attr (test , assert_instr (uqdecw , PATTERN = { svpattern :: SV_ALL } , IMM_FACTOR = 1))]
+pub fn svqdecw_pat_n_u64<const PATTERN: svpattern, const IMM_FACTOR: i32>(op: u64) -> u64 {
+    static_assert_range!(IMM_FACTOR, 1..=16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uqdecw.n64")]
+        fn _svqdecw_pat_n_u64(op: i64, pattern: svpattern, imm_factor: i32) -> i64;
+    }
+    unsafe { _svqdecw_pat_n_u64(op.as_signed(), PATTERN, IMM_FACTOR).as_unsigned() }
+}
+#[doc = "Saturating decrement by number of doubleword elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdecd_pat[_n_u64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+# [cfg_attr (test , assert_instr (uqdecd , PATTERN = { svpattern :: SV_ALL } , IMM_FACTOR = 1))]
+pub fn svqdecd_pat_n_u64<const PATTERN: svpattern, const IMM_FACTOR: i32>(op: u64) -> u64 {
+    static_assert_range!(IMM_FACTOR, 1..=16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uqdecd.n64")]
+        fn _svqdecd_pat_n_u64(op: i64, pattern: svpattern, imm_factor: i32) -> i64;
+    }
+    unsafe { _svqdecd_pat_n_u64(op.as_signed(), PATTERN, IMM_FACTOR).as_unsigned() }
+}
+#[doc = "Saturating decrement by number of halfword elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdech_pat[_s16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+# [cfg_attr (test , assert_instr (sqdech , PATTERN = { svpattern :: SV_ALL } , IMM_FACTOR = 1))]
+pub fn svqdech_pat_s16<const PATTERN: svpattern, const IMM_FACTOR: i32>(
+    op: svint16_t,
+) -> svint16_t {
+    static_assert_range!(IMM_FACTOR, 1..=16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sqdech.nxv8i16")]
+        fn _svqdech_pat_s16(op: svint16_t, pattern: svpattern, imm_factor: i32) -> svint16_t;
+    }
+    unsafe { _svqdech_pat_s16(op, PATTERN, IMM_FACTOR) }
+}
+#[doc = "Saturating decrement by number of word elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdecw_pat[_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+# [cfg_attr (test , assert_instr (sqdecw , PATTERN = { svpattern :: SV_ALL } , IMM_FACTOR = 1))]
+pub fn svqdecw_pat_s32<const PATTERN: svpattern, const IMM_FACTOR: i32>(
+    op: svint32_t,
+) -> svint32_t {
+    static_assert_range!(IMM_FACTOR, 1..=16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sqdecw.nxv4i32")]
+        fn _svqdecw_pat_s32(op: svint32_t, pattern: svpattern, imm_factor: i32) -> svint32_t;
+    }
+    unsafe { _svqdecw_pat_s32(op, PATTERN, IMM_FACTOR) }
+}
+#[doc = "Saturating decrement by number of doubleword elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdecd_pat[_s64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+# [cfg_attr (test , assert_instr (sqdecd , PATTERN = { svpattern :: SV_ALL } , IMM_FACTOR = 1))]
+pub fn svqdecd_pat_s64<const PATTERN: svpattern, const IMM_FACTOR: i32>(
+    op: svint64_t,
+) -> svint64_t {
+    static_assert_range!(IMM_FACTOR, 1..=16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sqdecd.nxv2i64")]
+        fn _svqdecd_pat_s64(op: svint64_t, pattern: svpattern, imm_factor: i32) -> svint64_t;
+    }
+    unsafe { _svqdecd_pat_s64(op, PATTERN, IMM_FACTOR) }
+}
+#[doc = "Saturating decrement by number of halfword elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdech_pat[_u16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+# [cfg_attr (test , assert_instr (uqdech , PATTERN = { svpattern :: SV_ALL } , IMM_FACTOR = 1))]
+pub fn svqdech_pat_u16<const PATTERN: svpattern, const IMM_FACTOR: i32>(
+    op: svuint16_t,
+) -> svuint16_t {
+    static_assert_range!(IMM_FACTOR, 1..=16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uqdech.nxv8i16")]
+        fn _svqdech_pat_u16(op: svint16_t, pattern: svpattern, imm_factor: i32) -> svint16_t;
+    }
+    unsafe { _svqdech_pat_u16(op.as_signed(), PATTERN, IMM_FACTOR).as_unsigned() }
+}
+#[doc = "Saturating decrement by number of word elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdecw_pat[_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+# [cfg_attr (test , assert_instr (uqdecw , PATTERN = { svpattern :: SV_ALL } , IMM_FACTOR = 1))]
+pub fn svqdecw_pat_u32<const PATTERN: svpattern, const IMM_FACTOR: i32>(
+    op: svuint32_t,
+) -> svuint32_t {
+    static_assert_range!(IMM_FACTOR, 1..=16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uqdecw.nxv4i32")]
+        fn _svqdecw_pat_u32(op: svint32_t, pattern: svpattern, imm_factor: i32) -> svint32_t;
+    }
+    unsafe { _svqdecw_pat_u32(op.as_signed(), PATTERN, IMM_FACTOR).as_unsigned() }
+}
+#[doc = "Saturating decrement by number of doubleword elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdecd_pat[_u64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+# [cfg_attr (test , assert_instr (uqdecd , PATTERN = { svpattern :: SV_ALL } , IMM_FACTOR = 1))]
+pub fn svqdecd_pat_u64<const PATTERN: svpattern, const IMM_FACTOR: i32>(
+    op: svuint64_t,
+) -> svuint64_t {
+    static_assert_range!(IMM_FACTOR, 1..=16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uqdecd.nxv2i64")]
+        fn _svqdecd_pat_u64(op: svint64_t, pattern: svpattern, imm_factor: i32) -> svint64_t;
+    }
+    unsafe { _svqdecd_pat_u64(op.as_signed(), PATTERN, IMM_FACTOR).as_unsigned() }
+}
+#[doc = "Saturating decrement by number of halfword elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdech[_s16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqdech, IMM_FACTOR = 1))]
+pub fn svqdech_s16<const IMM_FACTOR: i32>(op: svint16_t) -> svint16_t {
+    svqdech_pat_s16::<{ svpattern::SV_ALL }, IMM_FACTOR>(op)
+}
+#[doc = "Saturating decrement by number of word elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdecw[_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqdecw, IMM_FACTOR = 1))]
+pub fn svqdecw_s32<const IMM_FACTOR: i32>(op: svint32_t) -> svint32_t {
+    svqdecw_pat_s32::<{ svpattern::SV_ALL }, IMM_FACTOR>(op)
+}
+#[doc = "Saturating decrement by number of doubleword elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdecd[_s64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqdecd, IMM_FACTOR = 1))]
+pub fn svqdecd_s64<const IMM_FACTOR: i32>(op: svint64_t) -> svint64_t {
+    svqdecd_pat_s64::<{ svpattern::SV_ALL }, IMM_FACTOR>(op)
+}
+#[doc = "Saturating decrement by number of halfword elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdech[_u16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqdech, IMM_FACTOR = 1))]
+pub fn svqdech_u16<const IMM_FACTOR: i32>(op: svuint16_t) -> svuint16_t {
+    svqdech_pat_u16::<{ svpattern::SV_ALL }, IMM_FACTOR>(op)
+}
+#[doc = "Saturating decrement by number of word elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdecw[_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqdecw, IMM_FACTOR = 1))]
+pub fn svqdecw_u32<const IMM_FACTOR: i32>(op: svuint32_t) -> svuint32_t {
+    svqdecw_pat_u32::<{ svpattern::SV_ALL }, IMM_FACTOR>(op)
+}
+#[doc = "Saturating decrement by number of doubleword elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdecd[_u64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqdecd, IMM_FACTOR = 1))]
+pub fn svqdecd_u64<const IMM_FACTOR: i32>(op: svuint64_t) -> svuint64_t {
+    svqdecd_pat_u64::<{ svpattern::SV_ALL }, IMM_FACTOR>(op)
+}
+#[doc = "Saturating decrement by active element count"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdecp[_n_s32]_b8)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqdecp))]
+pub fn svqdecp_n_s32_b8(op: i32, pg: svbool_t) -> i32 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqdecp.n32.nxv16i1"
+        )]
+        fn _svqdecp_n_s32_b8(op: i32, pg: svbool_t) -> i32;
+    }
+    unsafe { _svqdecp_n_s32_b8(op, pg) }
+}
+#[doc = "Saturating decrement by active element count"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdecp[_n_s32]_b16)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqdecp))]
+pub fn svqdecp_n_s32_b16(op: i32, pg: svbool_t) -> i32 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqdecp.n32.nxv8i1"
+        )]
+        fn _svqdecp_n_s32_b16(op: i32, pg: svbool8_t) -> i32;
+    }
+    unsafe { _svqdecp_n_s32_b16(op, pg.sve_into()) }
+}
+#[doc = "Saturating decrement by active element count"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdecp[_n_s32]_b32)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqdecp))]
+pub fn svqdecp_n_s32_b32(op: i32, pg: svbool_t) -> i32 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqdecp.n32.nxv4i1"
+        )]
+        fn _svqdecp_n_s32_b32(op: i32, pg: svbool4_t) -> i32;
+    }
+    unsafe { _svqdecp_n_s32_b32(op, pg.sve_into()) }
+}
+#[doc = "Saturating decrement by active element count"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdecp[_n_s32]_b64)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqdecp))]
+pub fn svqdecp_n_s32_b64(op: i32, pg: svbool_t) -> i32 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqdecp.n32.nxv2i1"
+        )]
+        fn _svqdecp_n_s32_b64(op: i32, pg: svbool2_t) -> i32;
+    }
+    unsafe { _svqdecp_n_s32_b64(op, pg.sve_into()) }
+}
+#[doc = "Saturating decrement by active element count"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdecp[_n_s64]_b8)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqdecp))]
+pub fn svqdecp_n_s64_b8(op: i64, pg: svbool_t) -> i64 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqdecp.n64.nxv16i1"
+        )]
+        fn _svqdecp_n_s64_b8(op: i64, pg: svbool_t) -> i64;
+    }
+    unsafe { _svqdecp_n_s64_b8(op, pg) }
+}
+#[doc = "Saturating decrement by active element count"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdecp[_n_s64]_b16)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqdecp))]
+pub fn svqdecp_n_s64_b16(op: i64, pg: svbool_t) -> i64 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqdecp.n64.nxv8i1"
+        )]
+        fn _svqdecp_n_s64_b16(op: i64, pg: svbool8_t) -> i64;
+    }
+    unsafe { _svqdecp_n_s64_b16(op, pg.sve_into()) }
+}
+#[doc = "Saturating decrement by active element count"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdecp[_n_s64]_b32)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqdecp))]
+pub fn svqdecp_n_s64_b32(op: i64, pg: svbool_t) -> i64 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqdecp.n64.nxv4i1"
+        )]
+        fn _svqdecp_n_s64_b32(op: i64, pg: svbool4_t) -> i64;
+    }
+    unsafe { _svqdecp_n_s64_b32(op, pg.sve_into()) }
+}
+#[doc = "Saturating decrement by active element count"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdecp[_n_s64]_b64)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqdecp))]
+pub fn svqdecp_n_s64_b64(op: i64, pg: svbool_t) -> i64 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqdecp.n64.nxv2i1"
+        )]
+        fn _svqdecp_n_s64_b64(op: i64, pg: svbool2_t) -> i64;
+    }
+    unsafe { _svqdecp_n_s64_b64(op, pg.sve_into()) }
+}
+#[doc = "Saturating decrement by active element count"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdecp[_n_u32]_b8)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqdecp))]
+pub fn svqdecp_n_u32_b8(op: u32, pg: svbool_t) -> u32 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.uqdecp.n32.nxv16i1"
+        )]
+        fn _svqdecp_n_u32_b8(op: i32, pg: svbool_t) -> i32;
+    }
+    unsafe { _svqdecp_n_u32_b8(op.as_signed(), pg).as_unsigned() }
+}
+#[doc = "Saturating decrement by active element count"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdecp[_n_u32]_b16)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqdecp))]
+pub fn svqdecp_n_u32_b16(op: u32, pg: svbool_t) -> u32 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.uqdecp.n32.nxv8i1"
+        )]
+        fn _svqdecp_n_u32_b16(op: i32, pg: svbool8_t) -> i32;
+    }
+    unsafe { _svqdecp_n_u32_b16(op.as_signed(), pg.sve_into()).as_unsigned() }
+}
+#[doc = "Saturating decrement by active element count"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdecp[_n_u32]_b32)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqdecp))]
+pub fn svqdecp_n_u32_b32(op: u32, pg: svbool_t) -> u32 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.uqdecp.n32.nxv4i1"
+        )]
+        fn _svqdecp_n_u32_b32(op: i32, pg: svbool4_t) -> i32;
+    }
+    unsafe { _svqdecp_n_u32_b32(op.as_signed(), pg.sve_into()).as_unsigned() }
+}
+#[doc = "Saturating decrement by active element count"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdecp[_n_u32]_b64)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqdecp))]
+pub fn svqdecp_n_u32_b64(op: u32, pg: svbool_t) -> u32 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.uqdecp.n32.nxv2i1"
+        )]
+        fn _svqdecp_n_u32_b64(op: i32, pg: svbool2_t) -> i32;
+    }
+    unsafe { _svqdecp_n_u32_b64(op.as_signed(), pg.sve_into()).as_unsigned() }
+}
+#[doc = "Saturating decrement by active element count"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdecp[_n_u64]_b8)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqdecp))]
+pub fn svqdecp_n_u64_b8(op: u64, pg: svbool_t) -> u64 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.uqdecp.n64.nxv16i1"
+        )]
+        fn _svqdecp_n_u64_b8(op: i64, pg: svbool_t) -> i64;
+    }
+    unsafe { _svqdecp_n_u64_b8(op.as_signed(), pg).as_unsigned() }
+}
+#[doc = "Saturating decrement by active element count"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdecp[_n_u64]_b16)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqdecp))]
+pub fn svqdecp_n_u64_b16(op: u64, pg: svbool_t) -> u64 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.uqdecp.n64.nxv8i1"
+        )]
+        fn _svqdecp_n_u64_b16(op: i64, pg: svbool8_t) -> i64;
+    }
+    unsafe { _svqdecp_n_u64_b16(op.as_signed(), pg.sve_into()).as_unsigned() }
+}
+#[doc = "Saturating decrement by active element count"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdecp[_n_u64]_b32)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqdecp))]
+pub fn svqdecp_n_u64_b32(op: u64, pg: svbool_t) -> u64 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.uqdecp.n64.nxv4i1"
+        )]
+        fn _svqdecp_n_u64_b32(op: i64, pg: svbool4_t) -> i64;
+    }
+    unsafe { _svqdecp_n_u64_b32(op.as_signed(), pg.sve_into()).as_unsigned() }
+}
+#[doc = "Saturating decrement by active element count"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdecp[_n_u64]_b64)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqdecp))]
+pub fn svqdecp_n_u64_b64(op: u64, pg: svbool_t) -> u64 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.uqdecp.n64.nxv2i1"
+        )]
+        fn _svqdecp_n_u64_b64(op: i64, pg: svbool2_t) -> i64;
+    }
+    unsafe { _svqdecp_n_u64_b64(op.as_signed(), pg.sve_into()).as_unsigned() }
+}
+#[doc = "Saturating decrement by active element count"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdecp[_s16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqdecp))]
+pub fn svqdecp_s16(op: svint16_t, pg: svbool_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sqdecp.nxv8i16")]
+        fn _svqdecp_s16(op: svint16_t, pg: svbool8_t) -> svint16_t;
+    }
+    unsafe { _svqdecp_s16(op, pg.sve_into()) }
+}
+#[doc = "Saturating decrement by active element count"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdecp[_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqdecp))]
+pub fn svqdecp_s32(op: svint32_t, pg: svbool_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sqdecp.nxv4i32")]
+        fn _svqdecp_s32(op: svint32_t, pg: svbool4_t) -> svint32_t;
+    }
+    unsafe { _svqdecp_s32(op, pg.sve_into()) }
+}
+#[doc = "Saturating decrement by active element count"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdecp[_s64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqdecp))]
+pub fn svqdecp_s64(op: svint64_t, pg: svbool_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sqdecp.nxv2i64")]
+        fn _svqdecp_s64(op: svint64_t, pg: svbool2_t) -> svint64_t;
+    }
+    unsafe { _svqdecp_s64(op, pg.sve_into()) }
+}
+#[doc = "Saturating decrement by active element count"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdecp[_u16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqdecp))]
+pub fn svqdecp_u16(op: svuint16_t, pg: svbool_t) -> svuint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uqdecp.nxv8i16")]
+        fn _svqdecp_u16(op: svint16_t, pg: svbool8_t) -> svint16_t;
+    }
+    unsafe { _svqdecp_u16(op.as_signed(), pg.sve_into()).as_unsigned() }
+}
+#[doc = "Saturating decrement by active element count"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdecp[_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqdecp))]
+pub fn svqdecp_u32(op: svuint32_t, pg: svbool_t) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uqdecp.nxv4i32")]
+        fn _svqdecp_u32(op: svint32_t, pg: svbool4_t) -> svint32_t;
+    }
+    unsafe { _svqdecp_u32(op.as_signed(), pg.sve_into()).as_unsigned() }
+}
+#[doc = "Saturating decrement by active element count"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdecp[_u64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqdecp))]
+pub fn svqdecp_u64(op: svuint64_t, pg: svbool_t) -> svuint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uqdecp.nxv2i64")]
+        fn _svqdecp_u64(op: svint64_t, pg: svbool2_t) -> svint64_t;
+    }
+    unsafe { _svqdecp_u64(op.as_signed(), pg.sve_into()).as_unsigned() }
+}
+#[doc = "Saturating increment by number of byte elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqincb[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqincb, IMM_FACTOR = 1))]
+pub fn svqincb_n_s32<const IMM_FACTOR: i32>(op: i32) -> i32 {
+    svqincb_pat_n_s32::<{ svpattern::SV_ALL }, IMM_FACTOR>(op)
+}
+#[doc = "Saturating increment by number of halfword elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqinch[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqinch, IMM_FACTOR = 1))]
+pub fn svqinch_n_s32<const IMM_FACTOR: i32>(op: i32) -> i32 {
+    svqinch_pat_n_s32::<{ svpattern::SV_ALL }, IMM_FACTOR>(op)
+}
+#[doc = "Saturating increment by number of word elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqincw[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqincw, IMM_FACTOR = 1))]
+pub fn svqincw_n_s32<const IMM_FACTOR: i32>(op: i32) -> i32 {
+    svqincw_pat_n_s32::<{ svpattern::SV_ALL }, IMM_FACTOR>(op)
+}
+#[doc = "Saturating increment by number of doubleword elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqincd[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqincd, IMM_FACTOR = 1))]
+pub fn svqincd_n_s32<const IMM_FACTOR: i32>(op: i32) -> i32 {
+    svqincd_pat_n_s32::<{ svpattern::SV_ALL }, IMM_FACTOR>(op)
+}
+#[doc = "Saturating increment by number of byte elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqincb[_n_s64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqincb, IMM_FACTOR = 1))]
+pub fn svqincb_n_s64<const IMM_FACTOR: i32>(op: i64) -> i64 {
+    svqincb_pat_n_s64::<{ svpattern::SV_ALL }, IMM_FACTOR>(op)
+}
+#[doc = "Saturating increment by number of halfword elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqinch[_n_s64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqinch, IMM_FACTOR = 1))]
+pub fn svqinch_n_s64<const IMM_FACTOR: i32>(op: i64) -> i64 {
+    svqinch_pat_n_s64::<{ svpattern::SV_ALL }, IMM_FACTOR>(op)
+}
+#[doc = "Saturating increment by number of word elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqincw[_n_s64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqincw, IMM_FACTOR = 1))]
+pub fn svqincw_n_s64<const IMM_FACTOR: i32>(op: i64) -> i64 {
+    svqincw_pat_n_s64::<{ svpattern::SV_ALL }, IMM_FACTOR>(op)
+}
+#[doc = "Saturating increment by number of doubleword elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqincd[_n_s64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqincd, IMM_FACTOR = 1))]
+pub fn svqincd_n_s64<const IMM_FACTOR: i32>(op: i64) -> i64 {
+    svqincd_pat_n_s64::<{ svpattern::SV_ALL }, IMM_FACTOR>(op)
+}
+#[doc = "Saturating increment by number of byte elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqincb[_n_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqincb, IMM_FACTOR = 1))]
+pub fn svqincb_n_u32<const IMM_FACTOR: i32>(op: u32) -> u32 {
+    svqincb_pat_n_u32::<{ svpattern::SV_ALL }, IMM_FACTOR>(op)
+}
+#[doc = "Saturating increment by number of halfword elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqinch[_n_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqinch, IMM_FACTOR = 1))]
+pub fn svqinch_n_u32<const IMM_FACTOR: i32>(op: u32) -> u32 {
+    svqinch_pat_n_u32::<{ svpattern::SV_ALL }, IMM_FACTOR>(op)
+}
+#[doc = "Saturating increment by number of word elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqincw[_n_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqincw, IMM_FACTOR = 1))]
+pub fn svqincw_n_u32<const IMM_FACTOR: i32>(op: u32) -> u32 {
+    svqincw_pat_n_u32::<{ svpattern::SV_ALL }, IMM_FACTOR>(op)
+}
+#[doc = "Saturating increment by number of doubleword elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqincd[_n_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqincd, IMM_FACTOR = 1))]
+pub fn svqincd_n_u32<const IMM_FACTOR: i32>(op: u32) -> u32 {
+    svqincd_pat_n_u32::<{ svpattern::SV_ALL }, IMM_FACTOR>(op)
+}
+#[doc = "Saturating increment by number of byte elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqincb[_n_u64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqincb, IMM_FACTOR = 1))]
+pub fn svqincb_n_u64<const IMM_FACTOR: i32>(op: u64) -> u64 {
+    svqincb_pat_n_u64::<{ svpattern::SV_ALL }, IMM_FACTOR>(op)
+}
+#[doc = "Saturating increment by number of halfword elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqinch[_n_u64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqinch, IMM_FACTOR = 1))]
+pub fn svqinch_n_u64<const IMM_FACTOR: i32>(op: u64) -> u64 {
+    svqinch_pat_n_u64::<{ svpattern::SV_ALL }, IMM_FACTOR>(op)
+}
+#[doc = "Saturating increment by number of word elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqincw[_n_u64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqincw, IMM_FACTOR = 1))]
+pub fn svqincw_n_u64<const IMM_FACTOR: i32>(op: u64) -> u64 {
+    svqincw_pat_n_u64::<{ svpattern::SV_ALL }, IMM_FACTOR>(op)
+}
+#[doc = "Saturating increment by number of doubleword elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqincd[_n_u64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqincd, IMM_FACTOR = 1))]
+pub fn svqincd_n_u64<const IMM_FACTOR: i32>(op: u64) -> u64 {
+    svqincd_pat_n_u64::<{ svpattern::SV_ALL }, IMM_FACTOR>(op)
+}
+#[doc = "Saturating increment by number of byte elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqincb_pat[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+# [cfg_attr (test , assert_instr (sqincb , PATTERN = { svpattern :: SV_ALL } , IMM_FACTOR = 1))]
+pub fn svqincb_pat_n_s32<const PATTERN: svpattern, const IMM_FACTOR: i32>(op: i32) -> i32 {
+    static_assert_range!(IMM_FACTOR, 1..=16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sqincb.n32")]
+        fn _svqincb_pat_n_s32(op: i32, pattern: svpattern, imm_factor: i32) -> i32;
+    }
+    unsafe { _svqincb_pat_n_s32(op, PATTERN, IMM_FACTOR) }
+}
+#[doc = "Saturating increment by number of halfword elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqinch_pat[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+# [cfg_attr (test , assert_instr (sqinch , PATTERN = { svpattern :: SV_ALL } , IMM_FACTOR = 1))]
+pub fn svqinch_pat_n_s32<const PATTERN: svpattern, const IMM_FACTOR: i32>(op: i32) -> i32 {
+    static_assert_range!(IMM_FACTOR, 1..=16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sqinch.n32")]
+        fn _svqinch_pat_n_s32(op: i32, pattern: svpattern, imm_factor: i32) -> i32;
+    }
+    unsafe { _svqinch_pat_n_s32(op, PATTERN, IMM_FACTOR) }
+}
+#[doc = "Saturating increment by number of word elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqincw_pat[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+# [cfg_attr (test , assert_instr (sqincw , PATTERN = { svpattern :: SV_ALL } , IMM_FACTOR = 1))]
+pub fn svqincw_pat_n_s32<const PATTERN: svpattern, const IMM_FACTOR: i32>(op: i32) -> i32 {
+    static_assert_range!(IMM_FACTOR, 1..=16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sqincw.n32")]
+        fn _svqincw_pat_n_s32(op: i32, pattern: svpattern, imm_factor: i32) -> i32;
+    }
+    unsafe { _svqincw_pat_n_s32(op, PATTERN, IMM_FACTOR) }
+}
+#[doc = "Saturating increment by number of doubleword elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqincd_pat[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+# [cfg_attr (test , assert_instr (sqincd , PATTERN = { svpattern :: SV_ALL } , IMM_FACTOR = 1))]
+pub fn svqincd_pat_n_s32<const PATTERN: svpattern, const IMM_FACTOR: i32>(op: i32) -> i32 {
+    static_assert_range!(IMM_FACTOR, 1..=16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sqincd.n32")]
+        fn _svqincd_pat_n_s32(op: i32, pattern: svpattern, imm_factor: i32) -> i32;
+    }
+    unsafe { _svqincd_pat_n_s32(op, PATTERN, IMM_FACTOR) }
+}
+#[doc = "Saturating increment by number of byte elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqincb_pat[_n_s64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+# [cfg_attr (test , assert_instr (sqincb , PATTERN = { svpattern :: SV_ALL } , IMM_FACTOR = 1))]
+pub fn svqincb_pat_n_s64<const PATTERN: svpattern, const IMM_FACTOR: i32>(op: i64) -> i64 {
+    static_assert_range!(IMM_FACTOR, 1..=16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sqincb.n64")]
+        fn _svqincb_pat_n_s64(op: i64, pattern: svpattern, imm_factor: i32) -> i64;
+    }
+    unsafe { _svqincb_pat_n_s64(op, PATTERN, IMM_FACTOR) }
+}
+#[doc = "Saturating increment by number of halfword elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqinch_pat[_n_s64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+# [cfg_attr (test , assert_instr (sqinch , PATTERN = { svpattern :: SV_ALL } , IMM_FACTOR = 1))]
+pub fn svqinch_pat_n_s64<const PATTERN: svpattern, const IMM_FACTOR: i32>(op: i64) -> i64 {
+    static_assert_range!(IMM_FACTOR, 1..=16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sqinch.n64")]
+        fn _svqinch_pat_n_s64(op: i64, pattern: svpattern, imm_factor: i32) -> i64;
+    }
+    unsafe { _svqinch_pat_n_s64(op, PATTERN, IMM_FACTOR) }
+}
+#[doc = "Saturating increment by number of word elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqincw_pat[_n_s64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+# [cfg_attr (test , assert_instr (sqincw , PATTERN = { svpattern :: SV_ALL } , IMM_FACTOR = 1))]
+pub fn svqincw_pat_n_s64<const PATTERN: svpattern, const IMM_FACTOR: i32>(op: i64) -> i64 {
+    static_assert_range!(IMM_FACTOR, 1..=16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sqincw.n64")]
+        fn _svqincw_pat_n_s64(op: i64, pattern: svpattern, imm_factor: i32) -> i64;
+    }
+    unsafe { _svqincw_pat_n_s64(op, PATTERN, IMM_FACTOR) }
+}
+#[doc = "Saturating increment by number of doubleword elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqincd_pat[_n_s64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+# [cfg_attr (test , assert_instr (sqincd , PATTERN = { svpattern :: SV_ALL } , IMM_FACTOR = 1))]
+pub fn svqincd_pat_n_s64<const PATTERN: svpattern, const IMM_FACTOR: i32>(op: i64) -> i64 {
+    static_assert_range!(IMM_FACTOR, 1..=16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sqincd.n64")]
+        fn _svqincd_pat_n_s64(op: i64, pattern: svpattern, imm_factor: i32) -> i64;
+    }
+    unsafe { _svqincd_pat_n_s64(op, PATTERN, IMM_FACTOR) }
+}
+#[doc = "Saturating increment by number of byte elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqincb_pat[_n_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+# [cfg_attr (test , assert_instr (uqincb , PATTERN = { svpattern :: SV_ALL } , IMM_FACTOR = 1))]
+pub fn svqincb_pat_n_u32<const PATTERN: svpattern, const IMM_FACTOR: i32>(op: u32) -> u32 {
+    static_assert_range!(IMM_FACTOR, 1..=16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uqincb.n32")]
+        fn _svqincb_pat_n_u32(op: i32, pattern: svpattern, imm_factor: i32) -> i32;
+    }
+    unsafe { _svqincb_pat_n_u32(op.as_signed(), PATTERN, IMM_FACTOR).as_unsigned() }
+}
+#[doc = "Saturating increment by number of halfword elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqinch_pat[_n_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+# [cfg_attr (test , assert_instr (uqinch , PATTERN = { svpattern :: SV_ALL } , IMM_FACTOR = 1))]
+pub fn svqinch_pat_n_u32<const PATTERN: svpattern, const IMM_FACTOR: i32>(op: u32) -> u32 {
+    static_assert_range!(IMM_FACTOR, 1..=16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uqinch.n32")]
+        fn _svqinch_pat_n_u32(op: i32, pattern: svpattern, imm_factor: i32) -> i32;
+    }
+    unsafe { _svqinch_pat_n_u32(op.as_signed(), PATTERN, IMM_FACTOR).as_unsigned() }
+}
+#[doc = "Saturating increment by number of word elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqincw_pat[_n_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+# [cfg_attr (test , assert_instr (uqincw , PATTERN = { svpattern :: SV_ALL } , IMM_FACTOR = 1))]
+pub fn svqincw_pat_n_u32<const PATTERN: svpattern, const IMM_FACTOR: i32>(op: u32) -> u32 {
+    static_assert_range!(IMM_FACTOR, 1..=16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uqincw.n32")]
+        fn _svqincw_pat_n_u32(op: i32, pattern: svpattern, imm_factor: i32) -> i32;
+    }
+    unsafe { _svqincw_pat_n_u32(op.as_signed(), PATTERN, IMM_FACTOR).as_unsigned() }
+}
+#[doc = "Saturating increment by number of doubleword elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqincd_pat[_n_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+# [cfg_attr (test , assert_instr (uqincd , PATTERN = { svpattern :: SV_ALL } , IMM_FACTOR = 1))]
+pub fn svqincd_pat_n_u32<const PATTERN: svpattern, const IMM_FACTOR: i32>(op: u32) -> u32 {
+    static_assert_range!(IMM_FACTOR, 1..=16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uqincd.n32")]
+        fn _svqincd_pat_n_u32(op: i32, pattern: svpattern, imm_factor: i32) -> i32;
+    }
+    unsafe { _svqincd_pat_n_u32(op.as_signed(), PATTERN, IMM_FACTOR).as_unsigned() }
+}
+#[doc = "Saturating increment by number of byte elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqincb_pat[_n_u64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+# [cfg_attr (test , assert_instr (uqincb , PATTERN = { svpattern :: SV_ALL } , IMM_FACTOR = 1))]
+pub fn svqincb_pat_n_u64<const PATTERN: svpattern, const IMM_FACTOR: i32>(op: u64) -> u64 {
+    static_assert_range!(IMM_FACTOR, 1..=16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uqincb.n64")]
+        fn _svqincb_pat_n_u64(op: i64, pattern: svpattern, imm_factor: i32) -> i64;
+    }
+    unsafe { _svqincb_pat_n_u64(op.as_signed(), PATTERN, IMM_FACTOR).as_unsigned() }
+}
+#[doc = "Saturating increment by number of halfword elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqinch_pat[_n_u64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+# [cfg_attr (test , assert_instr (uqinch , PATTERN = { svpattern :: SV_ALL } , IMM_FACTOR = 1))]
+pub fn svqinch_pat_n_u64<const PATTERN: svpattern, const IMM_FACTOR: i32>(op: u64) -> u64 {
+    static_assert_range!(IMM_FACTOR, 1..=16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uqinch.n64")]
+        fn _svqinch_pat_n_u64(op: i64, pattern: svpattern, imm_factor: i32) -> i64;
+    }
+    unsafe { _svqinch_pat_n_u64(op.as_signed(), PATTERN, IMM_FACTOR).as_unsigned() }
+}
+#[doc = "Saturating increment by number of word elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqincw_pat[_n_u64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+# [cfg_attr (test , assert_instr (uqincw , PATTERN = { svpattern :: SV_ALL } , IMM_FACTOR = 1))]
+pub fn svqincw_pat_n_u64<const PATTERN: svpattern, const IMM_FACTOR: i32>(op: u64) -> u64 {
+    static_assert_range!(IMM_FACTOR, 1..=16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uqincw.n64")]
+        fn _svqincw_pat_n_u64(op: i64, pattern: svpattern, imm_factor: i32) -> i64;
+    }
+    unsafe { _svqincw_pat_n_u64(op.as_signed(), PATTERN, IMM_FACTOR).as_unsigned() }
+}
+#[doc = "Saturating increment by number of doubleword elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqincd_pat[_n_u64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+# [cfg_attr (test , assert_instr (uqincd , PATTERN = { svpattern :: SV_ALL } , IMM_FACTOR = 1))]
+pub fn svqincd_pat_n_u64<const PATTERN: svpattern, const IMM_FACTOR: i32>(op: u64) -> u64 {
+    static_assert_range!(IMM_FACTOR, 1..=16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uqincd.n64")]
+        fn _svqincd_pat_n_u64(op: i64, pattern: svpattern, imm_factor: i32) -> i64;
+    }
+    unsafe { _svqincd_pat_n_u64(op.as_signed(), PATTERN, IMM_FACTOR).as_unsigned() }
+}
+#[doc = "Saturating increment by number of halfword elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqinch_pat[_s16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+# [cfg_attr (test , assert_instr (sqinch , PATTERN = { svpattern :: SV_ALL } , IMM_FACTOR = 1))]
+pub fn svqinch_pat_s16<const PATTERN: svpattern, const IMM_FACTOR: i32>(
+    op: svint16_t,
+) -> svint16_t {
+    static_assert_range!(IMM_FACTOR, 1..=16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sqinch.nxv8i16")]
+        fn _svqinch_pat_s16(op: svint16_t, pattern: svpattern, imm_factor: i32) -> svint16_t;
+    }
+    unsafe { _svqinch_pat_s16(op, PATTERN, IMM_FACTOR) }
+}
+#[doc = "Saturating increment by number of word elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqincw_pat[_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+# [cfg_attr (test , assert_instr (sqincw , PATTERN = { svpattern :: SV_ALL } , IMM_FACTOR = 1))]
+pub fn svqincw_pat_s32<const PATTERN: svpattern, const IMM_FACTOR: i32>(
+    op: svint32_t,
+) -> svint32_t {
+    static_assert_range!(IMM_FACTOR, 1..=16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sqincw.nxv4i32")]
+        fn _svqincw_pat_s32(op: svint32_t, pattern: svpattern, imm_factor: i32) -> svint32_t;
+    }
+    unsafe { _svqincw_pat_s32(op, PATTERN, IMM_FACTOR) }
+}
+#[doc = "Saturating increment by number of doubleword elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqincd_pat[_s64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+# [cfg_attr (test , assert_instr (sqincd , PATTERN = { svpattern :: SV_ALL } , IMM_FACTOR = 1))]
+pub fn svqincd_pat_s64<const PATTERN: svpattern, const IMM_FACTOR: i32>(
+    op: svint64_t,
+) -> svint64_t {
+    static_assert_range!(IMM_FACTOR, 1..=16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sqincd.nxv2i64")]
+        fn _svqincd_pat_s64(op: svint64_t, pattern: svpattern, imm_factor: i32) -> svint64_t;
+    }
+    unsafe { _svqincd_pat_s64(op, PATTERN, IMM_FACTOR) }
+}
+#[doc = "Saturating increment by number of halfword elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqinch_pat[_u16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+# [cfg_attr (test , assert_instr (uqinch , PATTERN = { svpattern :: SV_ALL } , IMM_FACTOR = 1))]
+pub fn svqinch_pat_u16<const PATTERN: svpattern, const IMM_FACTOR: i32>(
+    op: svuint16_t,
+) -> svuint16_t {
+    static_assert_range!(IMM_FACTOR, 1..=16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uqinch.nxv8i16")]
+        fn _svqinch_pat_u16(op: svint16_t, pattern: svpattern, imm_factor: i32) -> svint16_t;
+    }
+    unsafe { _svqinch_pat_u16(op.as_signed(), PATTERN, IMM_FACTOR).as_unsigned() }
+}
+#[doc = "Saturating increment by number of word elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqincw_pat[_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+# [cfg_attr (test , assert_instr (uqincw , PATTERN = { svpattern :: SV_ALL } , IMM_FACTOR = 1))]
+pub fn svqincw_pat_u32<const PATTERN: svpattern, const IMM_FACTOR: i32>(
+    op: svuint32_t,
+) -> svuint32_t {
+    static_assert_range!(IMM_FACTOR, 1..=16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uqincw.nxv4i32")]
+        fn _svqincw_pat_u32(op: svint32_t, pattern: svpattern, imm_factor: i32) -> svint32_t;
+    }
+    unsafe { _svqincw_pat_u32(op.as_signed(), PATTERN, IMM_FACTOR).as_unsigned() }
+}
+#[doc = "Saturating increment by number of doubleword elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqincd_pat[_u64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+# [cfg_attr (test , assert_instr (uqincd , PATTERN = { svpattern :: SV_ALL } , IMM_FACTOR = 1))]
+pub fn svqincd_pat_u64<const PATTERN: svpattern, const IMM_FACTOR: i32>(
+    op: svuint64_t,
+) -> svuint64_t {
+    static_assert_range!(IMM_FACTOR, 1..=16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uqincd.nxv2i64")]
+        fn _svqincd_pat_u64(op: svint64_t, pattern: svpattern, imm_factor: i32) -> svint64_t;
+    }
+    unsafe { _svqincd_pat_u64(op.as_signed(), PATTERN, IMM_FACTOR).as_unsigned() }
+}
+#[doc = "Saturating increment by number of halfword elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqinch[_s16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqinch, IMM_FACTOR = 1))]
+pub fn svqinch_s16<const IMM_FACTOR: i32>(op: svint16_t) -> svint16_t {
+    svqinch_pat_s16::<{ svpattern::SV_ALL }, IMM_FACTOR>(op)
+}
+#[doc = "Saturating increment by number of word elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqincw[_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqincw, IMM_FACTOR = 1))]
+pub fn svqincw_s32<const IMM_FACTOR: i32>(op: svint32_t) -> svint32_t {
+    svqincw_pat_s32::<{ svpattern::SV_ALL }, IMM_FACTOR>(op)
+}
+#[doc = "Saturating increment by number of doubleword elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqincd[_s64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqincd, IMM_FACTOR = 1))]
+pub fn svqincd_s64<const IMM_FACTOR: i32>(op: svint64_t) -> svint64_t {
+    svqincd_pat_s64::<{ svpattern::SV_ALL }, IMM_FACTOR>(op)
+}
+#[doc = "Saturating increment by number of halfword elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqinch[_u16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqinch, IMM_FACTOR = 1))]
+pub fn svqinch_u16<const IMM_FACTOR: i32>(op: svuint16_t) -> svuint16_t {
+    svqinch_pat_u16::<{ svpattern::SV_ALL }, IMM_FACTOR>(op)
+}
+#[doc = "Saturating increment by number of word elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqincw[_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqincw, IMM_FACTOR = 1))]
+pub fn svqincw_u32<const IMM_FACTOR: i32>(op: svuint32_t) -> svuint32_t {
+    svqincw_pat_u32::<{ svpattern::SV_ALL }, IMM_FACTOR>(op)
+}
+#[doc = "Saturating increment by number of doubleword elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqincd[_u64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqincd, IMM_FACTOR = 1))]
+pub fn svqincd_u64<const IMM_FACTOR: i32>(op: svuint64_t) -> svuint64_t {
+    svqincd_pat_u64::<{ svpattern::SV_ALL }, IMM_FACTOR>(op)
+}
+#[doc = "Saturating increment by active element count"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqincp[_n_s32]_b8)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqincp))]
+pub fn svqincp_n_s32_b8(op: i32, pg: svbool_t) -> i32 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqincp.n32.nxv16i1"
+        )]
+        fn _svqincp_n_s32_b8(op: i32, pg: svbool_t) -> i32;
+    }
+    unsafe { _svqincp_n_s32_b8(op, pg) }
+}
+#[doc = "Saturating increment by active element count"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqincp[_n_s32]_b16)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqincp))]
+pub fn svqincp_n_s32_b16(op: i32, pg: svbool_t) -> i32 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqincp.n32.nxv8i1"
+        )]
+        fn _svqincp_n_s32_b16(op: i32, pg: svbool8_t) -> i32;
+    }
+    unsafe { _svqincp_n_s32_b16(op, pg.sve_into()) }
+}
+#[doc = "Saturating increment by active element count"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqincp[_n_s32]_b32)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqincp))]
+pub fn svqincp_n_s32_b32(op: i32, pg: svbool_t) -> i32 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqincp.n32.nxv4i1"
+        )]
+        fn _svqincp_n_s32_b32(op: i32, pg: svbool4_t) -> i32;
+    }
+    unsafe { _svqincp_n_s32_b32(op, pg.sve_into()) }
+}
+#[doc = "Saturating increment by active element count"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqincp[_n_s32]_b64)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqincp))]
+pub fn svqincp_n_s32_b64(op: i32, pg: svbool_t) -> i32 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqincp.n32.nxv2i1"
+        )]
+        fn _svqincp_n_s32_b64(op: i32, pg: svbool2_t) -> i32;
+    }
+    unsafe { _svqincp_n_s32_b64(op, pg.sve_into()) }
+}
+#[doc = "Saturating increment by active element count"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqincp[_n_s64]_b8)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqincp))]
+pub fn svqincp_n_s64_b8(op: i64, pg: svbool_t) -> i64 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqincp.n64.nxv16i1"
+        )]
+        fn _svqincp_n_s64_b8(op: i64, pg: svbool_t) -> i64;
+    }
+    unsafe { _svqincp_n_s64_b8(op, pg) }
+}
+#[doc = "Saturating increment by active element count"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqincp[_n_s64]_b16)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqincp))]
+pub fn svqincp_n_s64_b16(op: i64, pg: svbool_t) -> i64 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqincp.n64.nxv8i1"
+        )]
+        fn _svqincp_n_s64_b16(op: i64, pg: svbool8_t) -> i64;
+    }
+    unsafe { _svqincp_n_s64_b16(op, pg.sve_into()) }
+}
+#[doc = "Saturating increment by active element count"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqincp[_n_s64]_b32)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqincp))]
+pub fn svqincp_n_s64_b32(op: i64, pg: svbool_t) -> i64 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqincp.n64.nxv4i1"
+        )]
+        fn _svqincp_n_s64_b32(op: i64, pg: svbool4_t) -> i64;
+    }
+    unsafe { _svqincp_n_s64_b32(op, pg.sve_into()) }
+}
+#[doc = "Saturating increment by active element count"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqincp[_n_s64]_b64)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqincp))]
+pub fn svqincp_n_s64_b64(op: i64, pg: svbool_t) -> i64 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqincp.n64.nxv2i1"
+        )]
+        fn _svqincp_n_s64_b64(op: i64, pg: svbool2_t) -> i64;
+    }
+    unsafe { _svqincp_n_s64_b64(op, pg.sve_into()) }
+}
+#[doc = "Saturating increment by active element count"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqincp[_n_u32]_b8)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqincp))]
+pub fn svqincp_n_u32_b8(op: u32, pg: svbool_t) -> u32 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.uqincp.n32.nxv16i1"
+        )]
+        fn _svqincp_n_u32_b8(op: i32, pg: svbool_t) -> i32;
+    }
+    unsafe { _svqincp_n_u32_b8(op.as_signed(), pg).as_unsigned() }
+}
+#[doc = "Saturating increment by active element count"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqincp[_n_u32]_b16)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqincp))]
+pub fn svqincp_n_u32_b16(op: u32, pg: svbool_t) -> u32 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.uqincp.n32.nxv8i1"
+        )]
+        fn _svqincp_n_u32_b16(op: i32, pg: svbool8_t) -> i32;
+    }
+    unsafe { _svqincp_n_u32_b16(op.as_signed(), pg.sve_into()).as_unsigned() }
+}
+#[doc = "Saturating increment by active element count"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqincp[_n_u32]_b32)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqincp))]
+pub fn svqincp_n_u32_b32(op: u32, pg: svbool_t) -> u32 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.uqincp.n32.nxv4i1"
+        )]
+        fn _svqincp_n_u32_b32(op: i32, pg: svbool4_t) -> i32;
+    }
+    unsafe { _svqincp_n_u32_b32(op.as_signed(), pg.sve_into()).as_unsigned() }
+}
+#[doc = "Saturating increment by active element count"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqincp[_n_u32]_b64)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqincp))]
+pub fn svqincp_n_u32_b64(op: u32, pg: svbool_t) -> u32 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.uqincp.n32.nxv2i1"
+        )]
+        fn _svqincp_n_u32_b64(op: i32, pg: svbool2_t) -> i32;
+    }
+    unsafe { _svqincp_n_u32_b64(op.as_signed(), pg.sve_into()).as_unsigned() }
+}
+#[doc = "Saturating increment by active element count"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqincp[_n_u64]_b8)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqincp))]
+pub fn svqincp_n_u64_b8(op: u64, pg: svbool_t) -> u64 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.uqincp.n64.nxv16i1"
+        )]
+        fn _svqincp_n_u64_b8(op: i64, pg: svbool_t) -> i64;
+    }
+    unsafe { _svqincp_n_u64_b8(op.as_signed(), pg).as_unsigned() }
+}
+#[doc = "Saturating increment by active element count"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqincp[_n_u64]_b16)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqincp))]
+pub fn svqincp_n_u64_b16(op: u64, pg: svbool_t) -> u64 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.uqincp.n64.nxv8i1"
+        )]
+        fn _svqincp_n_u64_b16(op: i64, pg: svbool8_t) -> i64;
+    }
+    unsafe { _svqincp_n_u64_b16(op.as_signed(), pg.sve_into()).as_unsigned() }
+}
+#[doc = "Saturating increment by active element count"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqincp[_n_u64]_b32)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqincp))]
+pub fn svqincp_n_u64_b32(op: u64, pg: svbool_t) -> u64 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.uqincp.n64.nxv4i1"
+        )]
+        fn _svqincp_n_u64_b32(op: i64, pg: svbool4_t) -> i64;
+    }
+    unsafe { _svqincp_n_u64_b32(op.as_signed(), pg.sve_into()).as_unsigned() }
+}
+#[doc = "Saturating increment by active element count"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqincp[_n_u64]_b64)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqincp))]
+pub fn svqincp_n_u64_b64(op: u64, pg: svbool_t) -> u64 {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.uqincp.n64.nxv2i1"
+        )]
+        fn _svqincp_n_u64_b64(op: i64, pg: svbool2_t) -> i64;
+    }
+    unsafe { _svqincp_n_u64_b64(op.as_signed(), pg.sve_into()).as_unsigned() }
+}
+#[doc = "Saturating increment by active element count"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqincp[_s16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqincp))]
+pub fn svqincp_s16(op: svint16_t, pg: svbool_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sqincp.nxv8i16")]
+        fn _svqincp_s16(op: svint16_t, pg: svbool8_t) -> svint16_t;
+    }
+    unsafe { _svqincp_s16(op, pg.sve_into()) }
+}
+#[doc = "Saturating increment by active element count"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqincp[_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqincp))]
+pub fn svqincp_s32(op: svint32_t, pg: svbool_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sqincp.nxv4i32")]
+        fn _svqincp_s32(op: svint32_t, pg: svbool4_t) -> svint32_t;
+    }
+    unsafe { _svqincp_s32(op, pg.sve_into()) }
+}
+#[doc = "Saturating increment by active element count"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqincp[_s64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqincp))]
+pub fn svqincp_s64(op: svint64_t, pg: svbool_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sqincp.nxv2i64")]
+        fn _svqincp_s64(op: svint64_t, pg: svbool2_t) -> svint64_t;
+    }
+    unsafe { _svqincp_s64(op, pg.sve_into()) }
+}
+#[doc = "Saturating increment by active element count"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqincp[_u16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqincp))]
+pub fn svqincp_u16(op: svuint16_t, pg: svbool_t) -> svuint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uqincp.nxv8i16")]
+        fn _svqincp_u16(op: svint16_t, pg: svbool8_t) -> svint16_t;
+    }
+    unsafe { _svqincp_u16(op.as_signed(), pg.sve_into()).as_unsigned() }
+}
+#[doc = "Saturating increment by active element count"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqincp[_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqincp))]
+pub fn svqincp_u32(op: svuint32_t, pg: svbool_t) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uqincp.nxv4i32")]
+        fn _svqincp_u32(op: svint32_t, pg: svbool4_t) -> svint32_t;
+    }
+    unsafe { _svqincp_u32(op.as_signed(), pg.sve_into()).as_unsigned() }
+}
+#[doc = "Saturating increment by active element count"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqincp[_u64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqincp))]
+pub fn svqincp_u64(op: svuint64_t, pg: svbool_t) -> svuint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uqincp.nxv2i64")]
+        fn _svqincp_u64(op: svint64_t, pg: svbool2_t) -> svint64_t;
+    }
+    unsafe { _svqincp_u64(op.as_signed(), pg.sve_into()).as_unsigned() }
+}
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsub[_s8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqsub))]
+pub fn svqsub_s8(op1: svint8_t, op2: svint8_t) -> svint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqsub.x.nxv16i8"
+        )]
+        fn _svqsub_s8(op1: svint8_t, op2: svint8_t) -> svint8_t;
+    }
+    unsafe { _svqsub_s8(op1, op2) }
+}
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsub[_n_s8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqsub))]
+pub fn svqsub_n_s8(op1: svint8_t, op2: i8) -> svint8_t {
+    svqsub_s8(op1, svdup_n_s8(op2))
+}
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsub[_s16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqsub))]
+pub fn svqsub_s16(op1: svint16_t, op2: svint16_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqsub.x.nxv8i16"
+        )]
+        fn _svqsub_s16(op1: svint16_t, op2: svint16_t) -> svint16_t;
+    }
+    unsafe { _svqsub_s16(op1, op2) }
+}
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsub[_n_s16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqsub))]
+pub fn svqsub_n_s16(op1: svint16_t, op2: i16) -> svint16_t {
+    svqsub_s16(op1, svdup_n_s16(op2))
+}
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsub[_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqsub))]
+pub fn svqsub_s32(op1: svint32_t, op2: svint32_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqsub.x.nxv4i32"
+        )]
+        fn _svqsub_s32(op1: svint32_t, op2: svint32_t) -> svint32_t;
+    }
+    unsafe { _svqsub_s32(op1, op2) }
+}
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsub[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqsub))]
+pub fn svqsub_n_s32(op1: svint32_t, op2: i32) -> svint32_t {
+    svqsub_s32(op1, svdup_n_s32(op2))
+}
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsub[_s64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqsub))]
+pub fn svqsub_s64(op1: svint64_t, op2: svint64_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqsub.x.nxv2i64"
+        )]
+        fn _svqsub_s64(op1: svint64_t, op2: svint64_t) -> svint64_t;
+    }
+    unsafe { _svqsub_s64(op1, op2) }
+}
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsub[_n_s64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqsub))]
+pub fn svqsub_n_s64(op1: svint64_t, op2: i64) -> svint64_t {
+    svqsub_s64(op1, svdup_n_s64(op2))
+}
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsub[_u8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqsub))]
+pub fn svqsub_u8(op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.uqsub.x.nxv16i8"
+        )]
+        fn _svqsub_u8(op1: svint8_t, op2: svint8_t) -> svint8_t;
+    }
+    unsafe { _svqsub_u8(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsub[_n_u8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqsub))]
+pub fn svqsub_n_u8(op1: svuint8_t, op2: u8) -> svuint8_t {
+    svqsub_u8(op1, svdup_n_u8(op2))
+}
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsub[_u16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqsub))]
+pub fn svqsub_u16(op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.uqsub.x.nxv8i16"
+        )]
+        fn _svqsub_u16(op1: svint16_t, op2: svint16_t) -> svint16_t;
+    }
+    unsafe { _svqsub_u16(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsub[_n_u16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqsub))]
+pub fn svqsub_n_u16(op1: svuint16_t, op2: u16) -> svuint16_t {
+    svqsub_u16(op1, svdup_n_u16(op2))
+}
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsub[_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqsub))]
+pub fn svqsub_u32(op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.uqsub.x.nxv4i32"
+        )]
+        fn _svqsub_u32(op1: svint32_t, op2: svint32_t) -> svint32_t;
+    }
+    unsafe { _svqsub_u32(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsub[_n_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqsub))]
+pub fn svqsub_n_u32(op1: svuint32_t, op2: u32) -> svuint32_t {
+    svqsub_u32(op1, svdup_n_u32(op2))
+}
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsub[_u64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqsub))]
+pub fn svqsub_u64(op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.uqsub.x.nxv2i64"
+        )]
+        fn _svqsub_u64(op1: svint64_t, op2: svint64_t) -> svint64_t;
+    }
+    unsafe { _svqsub_u64(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsub[_n_u64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqsub))]
+pub fn svqsub_n_u64(op1: svuint64_t, op2: u64) -> svuint64_t {
+    svqsub_u64(op1, svdup_n_u64(op2))
+}
+#[doc = "Reverse bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrbit[_s8]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(rbit))]
+pub fn svrbit_s8_m(inactive: svint8_t, pg: svbool_t, op: svint8_t) -> svint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.rbit.nxv16i8")]
+        fn _svrbit_s8_m(inactive: svint8_t, pg: svbool_t, op: svint8_t) -> svint8_t;
+    }
+    unsafe { _svrbit_s8_m(inactive, pg, op) }
+}
+#[doc = "Reverse bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrbit[_s8]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(rbit))]
+pub fn svrbit_s8_x(pg: svbool_t, op: svint8_t) -> svint8_t {
+    svrbit_s8_m(op, pg, op)
+}
+#[doc = "Reverse bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrbit[_s8]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(rbit))]
+pub fn svrbit_s8_z(pg: svbool_t, op: svint8_t) -> svint8_t {
+    svrbit_s8_m(svdup_n_s8(0), pg, op)
+}
+#[doc = "Reverse bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrbit[_s16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(rbit))]
+pub fn svrbit_s16_m(inactive: svint16_t, pg: svbool_t, op: svint16_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.rbit.nxv8i16")]
+        fn _svrbit_s16_m(inactive: svint16_t, pg: svbool8_t, op: svint16_t) -> svint16_t;
+    }
+    unsafe { _svrbit_s16_m(inactive, pg.sve_into(), op) }
+}
+#[doc = "Reverse bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrbit[_s16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(rbit))]
+pub fn svrbit_s16_x(pg: svbool_t, op: svint16_t) -> svint16_t {
+    svrbit_s16_m(op, pg, op)
+}
+#[doc = "Reverse bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrbit[_s16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(rbit))]
+pub fn svrbit_s16_z(pg: svbool_t, op: svint16_t) -> svint16_t {
+    svrbit_s16_m(svdup_n_s16(0), pg, op)
+}
+#[doc = "Reverse bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrbit[_s32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(rbit))]
+pub fn svrbit_s32_m(inactive: svint32_t, pg: svbool_t, op: svint32_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.rbit.nxv4i32")]
+        fn _svrbit_s32_m(inactive: svint32_t, pg: svbool4_t, op: svint32_t) -> svint32_t;
+    }
+    unsafe { _svrbit_s32_m(inactive, pg.sve_into(), op) }
+}
+#[doc = "Reverse bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrbit[_s32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(rbit))]
+pub fn svrbit_s32_x(pg: svbool_t, op: svint32_t) -> svint32_t {
+    svrbit_s32_m(op, pg, op)
+}
+#[doc = "Reverse bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrbit[_s32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(rbit))]
+pub fn svrbit_s32_z(pg: svbool_t, op: svint32_t) -> svint32_t {
+    svrbit_s32_m(svdup_n_s32(0), pg, op)
+}
+#[doc = "Reverse bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrbit[_s64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(rbit))]
+pub fn svrbit_s64_m(inactive: svint64_t, pg: svbool_t, op: svint64_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.rbit.nxv2i64")]
+        fn _svrbit_s64_m(inactive: svint64_t, pg: svbool2_t, op: svint64_t) -> svint64_t;
+    }
+    unsafe { _svrbit_s64_m(inactive, pg.sve_into(), op) }
+}
+#[doc = "Reverse bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrbit[_s64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(rbit))]
+pub fn svrbit_s64_x(pg: svbool_t, op: svint64_t) -> svint64_t {
+    svrbit_s64_m(op, pg, op)
+}
+#[doc = "Reverse bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrbit[_s64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(rbit))]
+pub fn svrbit_s64_z(pg: svbool_t, op: svint64_t) -> svint64_t {
+    svrbit_s64_m(svdup_n_s64(0), pg, op)
+}
+#[doc = "Reverse bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrbit[_u8]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(rbit))]
+pub fn svrbit_u8_m(inactive: svuint8_t, pg: svbool_t, op: svuint8_t) -> svuint8_t {
+    unsafe { svrbit_s8_m(inactive.as_signed(), pg, op.as_signed()).as_unsigned() }
+}
+#[doc = "Reverse bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrbit[_u8]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(rbit))]
+pub fn svrbit_u8_x(pg: svbool_t, op: svuint8_t) -> svuint8_t {
+    svrbit_u8_m(op, pg, op)
+}
+#[doc = "Reverse bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrbit[_u8]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(rbit))]
+pub fn svrbit_u8_z(pg: svbool_t, op: svuint8_t) -> svuint8_t {
+    svrbit_u8_m(svdup_n_u8(0), pg, op)
+}
+#[doc = "Reverse bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrbit[_u16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(rbit))]
+pub fn svrbit_u16_m(inactive: svuint16_t, pg: svbool_t, op: svuint16_t) -> svuint16_t {
+    unsafe { svrbit_s16_m(inactive.as_signed(), pg, op.as_signed()).as_unsigned() }
+}
+#[doc = "Reverse bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrbit[_u16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(rbit))]
+pub fn svrbit_u16_x(pg: svbool_t, op: svuint16_t) -> svuint16_t {
+    svrbit_u16_m(op, pg, op)
+}
+#[doc = "Reverse bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrbit[_u16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(rbit))]
+pub fn svrbit_u16_z(pg: svbool_t, op: svuint16_t) -> svuint16_t {
+    svrbit_u16_m(svdup_n_u16(0), pg, op)
+}
+#[doc = "Reverse bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrbit[_u32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(rbit))]
+pub fn svrbit_u32_m(inactive: svuint32_t, pg: svbool_t, op: svuint32_t) -> svuint32_t {
+    unsafe { svrbit_s32_m(inactive.as_signed(), pg, op.as_signed()).as_unsigned() }
+}
+#[doc = "Reverse bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrbit[_u32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(rbit))]
+pub fn svrbit_u32_x(pg: svbool_t, op: svuint32_t) -> svuint32_t {
+    svrbit_u32_m(op, pg, op)
+}
+#[doc = "Reverse bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrbit[_u32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(rbit))]
+pub fn svrbit_u32_z(pg: svbool_t, op: svuint32_t) -> svuint32_t {
+    svrbit_u32_m(svdup_n_u32(0), pg, op)
+}
+#[doc = "Reverse bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrbit[_u64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(rbit))]
+pub fn svrbit_u64_m(inactive: svuint64_t, pg: svbool_t, op: svuint64_t) -> svuint64_t {
+    unsafe { svrbit_s64_m(inactive.as_signed(), pg, op.as_signed()).as_unsigned() }
+}
+#[doc = "Reverse bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrbit[_u64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(rbit))]
+pub fn svrbit_u64_x(pg: svbool_t, op: svuint64_t) -> svuint64_t {
+    svrbit_u64_m(op, pg, op)
+}
+#[doc = "Reverse bits"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrbit[_u64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(rbit))]
+pub fn svrbit_u64_z(pg: svbool_t, op: svuint64_t) -> svuint64_t {
+    svrbit_u64_m(svdup_n_u64(0), pg, op)
+}
+#[doc = "Read FFR, returning predicate of succesfully loaded elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrdffr)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(rdffr))]
+pub fn svrdffr() -> svbool_t {
+    svrdffr_z(svptrue_b8())
+}
+#[doc = "Read FFR, returning predicate of succesfully loaded elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrdffr_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(rdffr))]
+pub fn svrdffr_z(pg: svbool_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.rdffr.z")]
+        fn _svrdffr_z(pg: svbool_t) -> svbool_t;
+    }
+    unsafe { _svrdffr_z(pg) }
+}
+#[doc = "Reciprocal estimate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrecpe[_f32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(frecpe))]
+pub fn svrecpe_f32(op: svfloat32_t) -> svfloat32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.frecpe.x.nxv4f32"
+        )]
+        fn _svrecpe_f32(op: svfloat32_t) -> svfloat32_t;
+    }
+    unsafe { _svrecpe_f32(op) }
+}
+#[doc = "Reciprocal estimate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrecpe[_f64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(frecpe))]
+pub fn svrecpe_f64(op: svfloat64_t) -> svfloat64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.frecpe.x.nxv2f64"
+        )]
+        fn _svrecpe_f64(op: svfloat64_t) -> svfloat64_t;
+    }
+    unsafe { _svrecpe_f64(op) }
+}
+#[doc = "Reciprocal step"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrecps[_f32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(frecps))]
+pub fn svrecps_f32(op1: svfloat32_t, op2: svfloat32_t) -> svfloat32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.frecps.x.nxv4f32"
+        )]
+        fn _svrecps_f32(op1: svfloat32_t, op2: svfloat32_t) -> svfloat32_t;
+    }
+    unsafe { _svrecps_f32(op1, op2) }
+}
+#[doc = "Reciprocal step"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrecps[_f64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(frecps))]
+pub fn svrecps_f64(op1: svfloat64_t, op2: svfloat64_t) -> svfloat64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.frecps.x.nxv2f64"
+        )]
+        fn _svrecps_f64(op1: svfloat64_t, op2: svfloat64_t) -> svfloat64_t;
+    }
+    unsafe { _svrecps_f64(op1, op2) }
+}
+#[doc = "Reciprocal exponent"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrecpx[_f32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(frecpx))]
+pub fn svrecpx_f32_m(inactive: svfloat32_t, pg: svbool_t, op: svfloat32_t) -> svfloat32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.frecpx.x.nxv4f32"
+        )]
+        fn _svrecpx_f32_m(inactive: svfloat32_t, pg: svbool4_t, op: svfloat32_t) -> svfloat32_t;
+    }
+    unsafe { _svrecpx_f32_m(inactive, pg.sve_into(), op) }
+}
+#[doc = "Reciprocal exponent"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrecpx[_f32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(frecpx))]
+pub fn svrecpx_f32_x(pg: svbool_t, op: svfloat32_t) -> svfloat32_t {
+    svrecpx_f32_m(op, pg, op)
+}
+#[doc = "Reciprocal exponent"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrecpx[_f32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(frecpx))]
+pub fn svrecpx_f32_z(pg: svbool_t, op: svfloat32_t) -> svfloat32_t {
+    svrecpx_f32_m(svdup_n_f32(0.0), pg, op)
+}
+#[doc = "Reciprocal exponent"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrecpx[_f64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(frecpx))]
+pub fn svrecpx_f64_m(inactive: svfloat64_t, pg: svbool_t, op: svfloat64_t) -> svfloat64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.frecpx.x.nxv2f64"
+        )]
+        fn _svrecpx_f64_m(inactive: svfloat64_t, pg: svbool2_t, op: svfloat64_t) -> svfloat64_t;
+    }
+    unsafe { _svrecpx_f64_m(inactive, pg.sve_into(), op) }
+}
+#[doc = "Reciprocal exponent"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrecpx[_f64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(frecpx))]
+pub fn svrecpx_f64_x(pg: svbool_t, op: svfloat64_t) -> svfloat64_t {
+    svrecpx_f64_m(op, pg, op)
+}
+#[doc = "Reciprocal exponent"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrecpx[_f64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(frecpx))]
+pub fn svrecpx_f64_z(pg: svbool_t, op: svfloat64_t) -> svfloat64_t {
+    svrecpx_f64_m(svdup_n_f64(0.0), pg, op)
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_f32[_f32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_f32_f32(op: svfloat32_t) -> svfloat32_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_f32[_f64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_f32_f64(op: svfloat64_t) -> svfloat32_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_f32[_s8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_f32_s8(op: svint8_t) -> svfloat32_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_f32[_s16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_f32_s16(op: svint16_t) -> svfloat32_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_f32[_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_f32_s32(op: svint32_t) -> svfloat32_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_f32[_s64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_f32_s64(op: svint64_t) -> svfloat32_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_f32[_u8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_f32_u8(op: svuint8_t) -> svfloat32_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_f32[_u16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_f32_u16(op: svuint16_t) -> svfloat32_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_f32[_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_f32_u32(op: svuint32_t) -> svfloat32_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_f32[_u64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_f32_u64(op: svuint64_t) -> svfloat32_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_f64[_f32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_f64_f32(op: svfloat32_t) -> svfloat64_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_f64[_f64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_f64_f64(op: svfloat64_t) -> svfloat64_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_f64[_s8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_f64_s8(op: svint8_t) -> svfloat64_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_f64[_s16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_f64_s16(op: svint16_t) -> svfloat64_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_f64[_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_f64_s32(op: svint32_t) -> svfloat64_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_f64[_s64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_f64_s64(op: svint64_t) -> svfloat64_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_f64[_u8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_f64_u8(op: svuint8_t) -> svfloat64_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_f64[_u16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_f64_u16(op: svuint16_t) -> svfloat64_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_f64[_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_f64_u32(op: svuint32_t) -> svfloat64_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_f64[_u64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_f64_u64(op: svuint64_t) -> svfloat64_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_s8[_f32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_s8_f32(op: svfloat32_t) -> svint8_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_s8[_f64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_s8_f64(op: svfloat64_t) -> svint8_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_s8[_s8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_s8_s8(op: svint8_t) -> svint8_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_s8[_s16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_s8_s16(op: svint16_t) -> svint8_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_s8[_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_s8_s32(op: svint32_t) -> svint8_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_s8[_s64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_s8_s64(op: svint64_t) -> svint8_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_s8[_u8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_s8_u8(op: svuint8_t) -> svint8_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_s8[_u16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_s8_u16(op: svuint16_t) -> svint8_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_s8[_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_s8_u32(op: svuint32_t) -> svint8_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_s8[_u64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_s8_u64(op: svuint64_t) -> svint8_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_s16[_f32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_s16_f32(op: svfloat32_t) -> svint16_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_s16[_f64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_s16_f64(op: svfloat64_t) -> svint16_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_s16[_s8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_s16_s8(op: svint8_t) -> svint16_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_s16[_s16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_s16_s16(op: svint16_t) -> svint16_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_s16[_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_s16_s32(op: svint32_t) -> svint16_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_s16[_s64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_s16_s64(op: svint64_t) -> svint16_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_s16[_u8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_s16_u8(op: svuint8_t) -> svint16_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_s16[_u16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_s16_u16(op: svuint16_t) -> svint16_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_s16[_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_s16_u32(op: svuint32_t) -> svint16_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_s16[_u64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_s16_u64(op: svuint64_t) -> svint16_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_s32[_f32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_s32_f32(op: svfloat32_t) -> svint32_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_s32[_f64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_s32_f64(op: svfloat64_t) -> svint32_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_s32[_s8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_s32_s8(op: svint8_t) -> svint32_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_s32[_s16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_s32_s16(op: svint16_t) -> svint32_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_s32[_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_s32_s32(op: svint32_t) -> svint32_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_s32[_s64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_s32_s64(op: svint64_t) -> svint32_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_s32[_u8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_s32_u8(op: svuint8_t) -> svint32_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_s32[_u16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_s32_u16(op: svuint16_t) -> svint32_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_s32[_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_s32_u32(op: svuint32_t) -> svint32_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_s32[_u64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_s32_u64(op: svuint64_t) -> svint32_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_s64[_f32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_s64_f32(op: svfloat32_t) -> svint64_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_s64[_f64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_s64_f64(op: svfloat64_t) -> svint64_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_s64[_s8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_s64_s8(op: svint8_t) -> svint64_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_s64[_s16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_s64_s16(op: svint16_t) -> svint64_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_s64[_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_s64_s32(op: svint32_t) -> svint64_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_s64[_s64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_s64_s64(op: svint64_t) -> svint64_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_s64[_u8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_s64_u8(op: svuint8_t) -> svint64_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_s64[_u16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_s64_u16(op: svuint16_t) -> svint64_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_s64[_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_s64_u32(op: svuint32_t) -> svint64_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_s64[_u64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_s64_u64(op: svuint64_t) -> svint64_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_u8[_f32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_u8_f32(op: svfloat32_t) -> svuint8_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_u8[_f64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_u8_f64(op: svfloat64_t) -> svuint8_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_u8[_s8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_u8_s8(op: svint8_t) -> svuint8_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_u8[_s16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_u8_s16(op: svint16_t) -> svuint8_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_u8[_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_u8_s32(op: svint32_t) -> svuint8_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_u8[_s64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_u8_s64(op: svint64_t) -> svuint8_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_u8[_u8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_u8_u8(op: svuint8_t) -> svuint8_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_u8[_u16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_u8_u16(op: svuint16_t) -> svuint8_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_u8[_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_u8_u32(op: svuint32_t) -> svuint8_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_u8[_u64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_u8_u64(op: svuint64_t) -> svuint8_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_u16[_f32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_u16_f32(op: svfloat32_t) -> svuint16_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_u16[_f64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_u16_f64(op: svfloat64_t) -> svuint16_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_u16[_s8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_u16_s8(op: svint8_t) -> svuint16_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_u16[_s16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_u16_s16(op: svint16_t) -> svuint16_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_u16[_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_u16_s32(op: svint32_t) -> svuint16_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_u16[_s64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_u16_s64(op: svint64_t) -> svuint16_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_u16[_u8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_u16_u8(op: svuint8_t) -> svuint16_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_u16[_u16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_u16_u16(op: svuint16_t) -> svuint16_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_u16[_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_u16_u32(op: svuint32_t) -> svuint16_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_u16[_u64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_u16_u64(op: svuint64_t) -> svuint16_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_u32[_f32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_u32_f32(op: svfloat32_t) -> svuint32_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_u32[_f64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_u32_f64(op: svfloat64_t) -> svuint32_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_u32[_s8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_u32_s8(op: svint8_t) -> svuint32_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_u32[_s16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_u32_s16(op: svint16_t) -> svuint32_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_u32[_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_u32_s32(op: svint32_t) -> svuint32_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_u32[_s64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_u32_s64(op: svint64_t) -> svuint32_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_u32[_u8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_u32_u8(op: svuint8_t) -> svuint32_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_u32[_u16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_u32_u16(op: svuint16_t) -> svuint32_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_u32[_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_u32_u32(op: svuint32_t) -> svuint32_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_u32[_u64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_u32_u64(op: svuint64_t) -> svuint32_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_u64[_f32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_u64_f32(op: svfloat32_t) -> svuint64_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_u64[_f64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_u64_f64(op: svfloat64_t) -> svuint64_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_u64[_s8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_u64_s8(op: svint8_t) -> svuint64_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_u64[_s16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_u64_s16(op: svint16_t) -> svuint64_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_u64[_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_u64_s32(op: svint32_t) -> svuint64_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_u64[_s64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_u64_s64(op: svint64_t) -> svuint64_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_u64[_u8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_u64_u8(op: svuint8_t) -> svuint64_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_u64[_u16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_u64_u16(op: svuint16_t) -> svuint64_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_u64[_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_u64_u32(op: svuint32_t) -> svuint64_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reinterpret vector contents"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svreinterpret_u64[_u64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svreinterpret_u64_u64(op: svuint64_t) -> svuint64_t {
+    unsafe { crate::intrinsics::transmute_unchecked(op) }
+}
+#[doc = "Reverse all elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrev_b8)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(rev))]
+pub fn svrev_b8(op: svbool_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.vector.reverse.nxv16i1")]
+        fn _svrev_b8(op: svbool_t) -> svbool_t;
+    }
+    unsafe { _svrev_b8(op) }
+}
+#[doc = "Reverse all elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrev_b16)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(rev))]
+pub fn svrev_b16(op: svbool_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.vector.reverse.nxv8i1")]
+        fn _svrev_b16(op: svbool8_t) -> svbool8_t;
+    }
+    unsafe { _svrev_b16(op.sve_into()).sve_into() }
+}
+#[doc = "Reverse all elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrev_b32)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(rev))]
+pub fn svrev_b32(op: svbool_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.vector.reverse.nxv4i1")]
+        fn _svrev_b32(op: svbool4_t) -> svbool4_t;
+    }
+    unsafe { _svrev_b32(op.sve_into()).sve_into() }
+}
+#[doc = "Reverse all elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrev_b64)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(rev))]
+pub fn svrev_b64(op: svbool_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.vector.reverse.nxv2i1")]
+        fn _svrev_b64(op: svbool2_t) -> svbool2_t;
+    }
+    unsafe { _svrev_b64(op.sve_into()).sve_into() }
+}
+#[doc = "Reverse all elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrev[_f32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(rev))]
+pub fn svrev_f32(op: svfloat32_t) -> svfloat32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.vector.reverse.nxv4f32")]
+        fn _svrev_f32(op: svfloat32_t) -> svfloat32_t;
+    }
+    unsafe { _svrev_f32(op) }
+}
+#[doc = "Reverse all elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrev[_f64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(rev))]
+pub fn svrev_f64(op: svfloat64_t) -> svfloat64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.vector.reverse.nxv2f64")]
+        fn _svrev_f64(op: svfloat64_t) -> svfloat64_t;
+    }
+    unsafe { _svrev_f64(op) }
+}
+#[doc = "Reverse all elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrev[_s8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(rev))]
+pub fn svrev_s8(op: svint8_t) -> svint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.vector.reverse.nxv16i8")]
+        fn _svrev_s8(op: svint8_t) -> svint8_t;
+    }
+    unsafe { _svrev_s8(op) }
+}
+#[doc = "Reverse all elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrev[_s16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(rev))]
+pub fn svrev_s16(op: svint16_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.vector.reverse.nxv8i16")]
+        fn _svrev_s16(op: svint16_t) -> svint16_t;
+    }
+    unsafe { _svrev_s16(op) }
+}
+#[doc = "Reverse all elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrev[_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(rev))]
+pub fn svrev_s32(op: svint32_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.vector.reverse.nxv4i32")]
+        fn _svrev_s32(op: svint32_t) -> svint32_t;
+    }
+    unsafe { _svrev_s32(op) }
+}
+#[doc = "Reverse all elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrev[_s64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(rev))]
+pub fn svrev_s64(op: svint64_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.vector.reverse.nxv2i64")]
+        fn _svrev_s64(op: svint64_t) -> svint64_t;
+    }
+    unsafe { _svrev_s64(op) }
+}
+#[doc = "Reverse all elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrev[_u8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(rev))]
+pub fn svrev_u8(op: svuint8_t) -> svuint8_t {
+    unsafe { svrev_s8(op.as_signed()).as_unsigned() }
+}
+#[doc = "Reverse all elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrev[_u16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(rev))]
+pub fn svrev_u16(op: svuint16_t) -> svuint16_t {
+    unsafe { svrev_s16(op.as_signed()).as_unsigned() }
+}
+#[doc = "Reverse all elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrev[_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(rev))]
+pub fn svrev_u32(op: svuint32_t) -> svuint32_t {
+    unsafe { svrev_s32(op.as_signed()).as_unsigned() }
+}
+#[doc = "Reverse all elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrev[_u64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(rev))]
+pub fn svrev_u64(op: svuint64_t) -> svuint64_t {
+    unsafe { svrev_s64(op.as_signed()).as_unsigned() }
+}
+#[doc = "Reverse bytes within elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrevb[_s16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(revb))]
+pub fn svrevb_s16_m(inactive: svint16_t, pg: svbool_t, op: svint16_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.revb.nxv8i16")]
+        fn _svrevb_s16_m(inactive: svint16_t, pg: svbool8_t, op: svint16_t) -> svint16_t;
+    }
+    unsafe { _svrevb_s16_m(inactive, pg.sve_into(), op) }
+}
+#[doc = "Reverse bytes within elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrevb[_s16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(revb))]
+pub fn svrevb_s16_x(pg: svbool_t, op: svint16_t) -> svint16_t {
+    svrevb_s16_m(op, pg, op)
+}
+#[doc = "Reverse bytes within elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrevb[_s16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(revb))]
+pub fn svrevb_s16_z(pg: svbool_t, op: svint16_t) -> svint16_t {
+    svrevb_s16_m(svdup_n_s16(0), pg, op)
+}
+#[doc = "Reverse bytes within elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrevb[_s32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(revb))]
+pub fn svrevb_s32_m(inactive: svint32_t, pg: svbool_t, op: svint32_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.revb.nxv4i32")]
+        fn _svrevb_s32_m(inactive: svint32_t, pg: svbool4_t, op: svint32_t) -> svint32_t;
+    }
+    unsafe { _svrevb_s32_m(inactive, pg.sve_into(), op) }
+}
+#[doc = "Reverse bytes within elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrevb[_s32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(revb))]
+pub fn svrevb_s32_x(pg: svbool_t, op: svint32_t) -> svint32_t {
+    svrevb_s32_m(op, pg, op)
+}
+#[doc = "Reverse bytes within elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrevb[_s32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(revb))]
+pub fn svrevb_s32_z(pg: svbool_t, op: svint32_t) -> svint32_t {
+    svrevb_s32_m(svdup_n_s32(0), pg, op)
+}
+#[doc = "Reverse bytes within elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrevb[_s64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(revb))]
+pub fn svrevb_s64_m(inactive: svint64_t, pg: svbool_t, op: svint64_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.revb.nxv2i64")]
+        fn _svrevb_s64_m(inactive: svint64_t, pg: svbool2_t, op: svint64_t) -> svint64_t;
+    }
+    unsafe { _svrevb_s64_m(inactive, pg.sve_into(), op) }
+}
+#[doc = "Reverse bytes within elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrevb[_s64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(revb))]
+pub fn svrevb_s64_x(pg: svbool_t, op: svint64_t) -> svint64_t {
+    svrevb_s64_m(op, pg, op)
+}
+#[doc = "Reverse bytes within elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrevb[_s64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(revb))]
+pub fn svrevb_s64_z(pg: svbool_t, op: svint64_t) -> svint64_t {
+    svrevb_s64_m(svdup_n_s64(0), pg, op)
+}
+#[doc = "Reverse bytes within elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrevb[_u16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(revb))]
+pub fn svrevb_u16_m(inactive: svuint16_t, pg: svbool_t, op: svuint16_t) -> svuint16_t {
+    unsafe { svrevb_s16_m(inactive.as_signed(), pg, op.as_signed()).as_unsigned() }
+}
+#[doc = "Reverse bytes within elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrevb[_u16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(revb))]
+pub fn svrevb_u16_x(pg: svbool_t, op: svuint16_t) -> svuint16_t {
+    svrevb_u16_m(op, pg, op)
+}
+#[doc = "Reverse bytes within elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrevb[_u16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(revb))]
+pub fn svrevb_u16_z(pg: svbool_t, op: svuint16_t) -> svuint16_t {
+    svrevb_u16_m(svdup_n_u16(0), pg, op)
+}
+#[doc = "Reverse bytes within elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrevb[_u32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(revb))]
+pub fn svrevb_u32_m(inactive: svuint32_t, pg: svbool_t, op: svuint32_t) -> svuint32_t {
+    unsafe { svrevb_s32_m(inactive.as_signed(), pg, op.as_signed()).as_unsigned() }
+}
+#[doc = "Reverse bytes within elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrevb[_u32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(revb))]
+pub fn svrevb_u32_x(pg: svbool_t, op: svuint32_t) -> svuint32_t {
+    svrevb_u32_m(op, pg, op)
+}
+#[doc = "Reverse bytes within elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrevb[_u32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(revb))]
+pub fn svrevb_u32_z(pg: svbool_t, op: svuint32_t) -> svuint32_t {
+    svrevb_u32_m(svdup_n_u32(0), pg, op)
+}
+#[doc = "Reverse bytes within elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrevb[_u64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(revb))]
+pub fn svrevb_u64_m(inactive: svuint64_t, pg: svbool_t, op: svuint64_t) -> svuint64_t {
+    unsafe { svrevb_s64_m(inactive.as_signed(), pg, op.as_signed()).as_unsigned() }
+}
+#[doc = "Reverse bytes within elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrevb[_u64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(revb))]
+pub fn svrevb_u64_x(pg: svbool_t, op: svuint64_t) -> svuint64_t {
+    svrevb_u64_m(op, pg, op)
+}
+#[doc = "Reverse bytes within elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrevb[_u64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(revb))]
+pub fn svrevb_u64_z(pg: svbool_t, op: svuint64_t) -> svuint64_t {
+    svrevb_u64_m(svdup_n_u64(0), pg, op)
+}
+#[doc = "Reverse halfwords within elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrevh[_s32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(revh))]
+pub fn svrevh_s32_m(inactive: svint32_t, pg: svbool_t, op: svint32_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.revh.nxv4i32")]
+        fn _svrevh_s32_m(inactive: svint32_t, pg: svbool4_t, op: svint32_t) -> svint32_t;
+    }
+    unsafe { _svrevh_s32_m(inactive, pg.sve_into(), op) }
+}
+#[doc = "Reverse halfwords within elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrevh[_s32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(revh))]
+pub fn svrevh_s32_x(pg: svbool_t, op: svint32_t) -> svint32_t {
+    svrevh_s32_m(op, pg, op)
+}
+#[doc = "Reverse halfwords within elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrevh[_s32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(revh))]
+pub fn svrevh_s32_z(pg: svbool_t, op: svint32_t) -> svint32_t {
+    svrevh_s32_m(svdup_n_s32(0), pg, op)
+}
+#[doc = "Reverse halfwords within elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrevh[_s64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(revh))]
+pub fn svrevh_s64_m(inactive: svint64_t, pg: svbool_t, op: svint64_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.revh.nxv2i64")]
+        fn _svrevh_s64_m(inactive: svint64_t, pg: svbool2_t, op: svint64_t) -> svint64_t;
+    }
+    unsafe { _svrevh_s64_m(inactive, pg.sve_into(), op) }
+}
+#[doc = "Reverse halfwords within elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrevh[_s64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(revh))]
+pub fn svrevh_s64_x(pg: svbool_t, op: svint64_t) -> svint64_t {
+    svrevh_s64_m(op, pg, op)
+}
+#[doc = "Reverse halfwords within elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrevh[_s64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(revh))]
+pub fn svrevh_s64_z(pg: svbool_t, op: svint64_t) -> svint64_t {
+    svrevh_s64_m(svdup_n_s64(0), pg, op)
+}
+#[doc = "Reverse halfwords within elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrevh[_u32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(revh))]
+pub fn svrevh_u32_m(inactive: svuint32_t, pg: svbool_t, op: svuint32_t) -> svuint32_t {
+    unsafe { svrevh_s32_m(inactive.as_signed(), pg, op.as_signed()).as_unsigned() }
+}
+#[doc = "Reverse halfwords within elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrevh[_u32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(revh))]
+pub fn svrevh_u32_x(pg: svbool_t, op: svuint32_t) -> svuint32_t {
+    svrevh_u32_m(op, pg, op)
+}
+#[doc = "Reverse halfwords within elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrevh[_u32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(revh))]
+pub fn svrevh_u32_z(pg: svbool_t, op: svuint32_t) -> svuint32_t {
+    svrevh_u32_m(svdup_n_u32(0), pg, op)
+}
+#[doc = "Reverse halfwords within elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrevh[_u64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(revh))]
+pub fn svrevh_u64_m(inactive: svuint64_t, pg: svbool_t, op: svuint64_t) -> svuint64_t {
+    unsafe { svrevh_s64_m(inactive.as_signed(), pg, op.as_signed()).as_unsigned() }
+}
+#[doc = "Reverse halfwords within elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrevh[_u64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(revh))]
+pub fn svrevh_u64_x(pg: svbool_t, op: svuint64_t) -> svuint64_t {
+    svrevh_u64_m(op, pg, op)
+}
+#[doc = "Reverse halfwords within elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrevh[_u64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(revh))]
+pub fn svrevh_u64_z(pg: svbool_t, op: svuint64_t) -> svuint64_t {
+    svrevh_u64_m(svdup_n_u64(0), pg, op)
+}
+#[doc = "Reverse words within elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrevw[_s64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(revw))]
+pub fn svrevw_s64_m(inactive: svint64_t, pg: svbool_t, op: svint64_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.revw.nxv2i64")]
+        fn _svrevw_s64_m(inactive: svint64_t, pg: svbool2_t, op: svint64_t) -> svint64_t;
+    }
+    unsafe { _svrevw_s64_m(inactive, pg.sve_into(), op) }
+}
+#[doc = "Reverse words within elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrevw[_s64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(revw))]
+pub fn svrevw_s64_x(pg: svbool_t, op: svint64_t) -> svint64_t {
+    svrevw_s64_m(op, pg, op)
+}
+#[doc = "Reverse words within elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrevw[_s64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(revw))]
+pub fn svrevw_s64_z(pg: svbool_t, op: svint64_t) -> svint64_t {
+    svrevw_s64_m(svdup_n_s64(0), pg, op)
+}
+#[doc = "Reverse words within elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrevw[_u64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(revw))]
+pub fn svrevw_u64_m(inactive: svuint64_t, pg: svbool_t, op: svuint64_t) -> svuint64_t {
+    unsafe { svrevw_s64_m(inactive.as_signed(), pg, op.as_signed()).as_unsigned() }
+}
+#[doc = "Reverse words within elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrevw[_u64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(revw))]
+pub fn svrevw_u64_x(pg: svbool_t, op: svuint64_t) -> svuint64_t {
+    svrevw_u64_m(op, pg, op)
+}
+#[doc = "Reverse words within elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrevw[_u64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(revw))]
+pub fn svrevw_u64_z(pg: svbool_t, op: svuint64_t) -> svuint64_t {
+    svrevw_u64_m(svdup_n_u64(0), pg, op)
+}
+#[doc = "Round to nearest, ties away from zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrinta[_f32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(frinta))]
+pub fn svrinta_f32_m(inactive: svfloat32_t, pg: svbool_t, op: svfloat32_t) -> svfloat32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.frinta.nxv4f32")]
+        fn _svrinta_f32_m(inactive: svfloat32_t, pg: svbool4_t, op: svfloat32_t) -> svfloat32_t;
+    }
+    unsafe { _svrinta_f32_m(inactive, pg.sve_into(), op) }
+}
+#[doc = "Round to nearest, ties away from zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrinta[_f32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(frinta))]
+pub fn svrinta_f32_x(pg: svbool_t, op: svfloat32_t) -> svfloat32_t {
+    svrinta_f32_m(op, pg, op)
+}
+#[doc = "Round to nearest, ties away from zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrinta[_f32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(frinta))]
+pub fn svrinta_f32_z(pg: svbool_t, op: svfloat32_t) -> svfloat32_t {
+    svrinta_f32_m(svdup_n_f32(0.0), pg, op)
+}
+#[doc = "Round to nearest, ties away from zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrinta[_f64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(frinta))]
+pub fn svrinta_f64_m(inactive: svfloat64_t, pg: svbool_t, op: svfloat64_t) -> svfloat64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.frinta.nxv2f64")]
+        fn _svrinta_f64_m(inactive: svfloat64_t, pg: svbool2_t, op: svfloat64_t) -> svfloat64_t;
+    }
+    unsafe { _svrinta_f64_m(inactive, pg.sve_into(), op) }
+}
+#[doc = "Round to nearest, ties away from zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrinta[_f64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(frinta))]
+pub fn svrinta_f64_x(pg: svbool_t, op: svfloat64_t) -> svfloat64_t {
+    svrinta_f64_m(op, pg, op)
+}
+#[doc = "Round to nearest, ties away from zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrinta[_f64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(frinta))]
+pub fn svrinta_f64_z(pg: svbool_t, op: svfloat64_t) -> svfloat64_t {
+    svrinta_f64_m(svdup_n_f64(0.0), pg, op)
+}
+#[doc = "Round using current rounding mode (inexact)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrinti[_f32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(frinti))]
+pub fn svrinti_f32_m(inactive: svfloat32_t, pg: svbool_t, op: svfloat32_t) -> svfloat32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.frinti.nxv4f32")]
+        fn _svrinti_f32_m(inactive: svfloat32_t, pg: svbool4_t, op: svfloat32_t) -> svfloat32_t;
+    }
+    unsafe { _svrinti_f32_m(inactive, pg.sve_into(), op) }
+}
+#[doc = "Round using current rounding mode (inexact)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrinti[_f32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(frinti))]
+pub fn svrinti_f32_x(pg: svbool_t, op: svfloat32_t) -> svfloat32_t {
+    svrinti_f32_m(op, pg, op)
+}
+#[doc = "Round using current rounding mode (inexact)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrinti[_f32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(frinti))]
+pub fn svrinti_f32_z(pg: svbool_t, op: svfloat32_t) -> svfloat32_t {
+    svrinti_f32_m(svdup_n_f32(0.0), pg, op)
+}
+#[doc = "Round using current rounding mode (inexact)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrinti[_f64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(frinti))]
+pub fn svrinti_f64_m(inactive: svfloat64_t, pg: svbool_t, op: svfloat64_t) -> svfloat64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.frinti.nxv2f64")]
+        fn _svrinti_f64_m(inactive: svfloat64_t, pg: svbool2_t, op: svfloat64_t) -> svfloat64_t;
+    }
+    unsafe { _svrinti_f64_m(inactive, pg.sve_into(), op) }
+}
+#[doc = "Round using current rounding mode (inexact)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrinti[_f64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(frinti))]
+pub fn svrinti_f64_x(pg: svbool_t, op: svfloat64_t) -> svfloat64_t {
+    svrinti_f64_m(op, pg, op)
+}
+#[doc = "Round using current rounding mode (inexact)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrinti[_f64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(frinti))]
+pub fn svrinti_f64_z(pg: svbool_t, op: svfloat64_t) -> svfloat64_t {
+    svrinti_f64_m(svdup_n_f64(0.0), pg, op)
+}
+#[doc = "Round towards -∞"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrintm[_f32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(frintm))]
+pub fn svrintm_f32_m(inactive: svfloat32_t, pg: svbool_t, op: svfloat32_t) -> svfloat32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.frintm.nxv4f32")]
+        fn _svrintm_f32_m(inactive: svfloat32_t, pg: svbool4_t, op: svfloat32_t) -> svfloat32_t;
+    }
+    unsafe { _svrintm_f32_m(inactive, pg.sve_into(), op) }
+}
+#[doc = "Round towards -∞"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrintm[_f32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(frintm))]
+pub fn svrintm_f32_x(pg: svbool_t, op: svfloat32_t) -> svfloat32_t {
+    svrintm_f32_m(op, pg, op)
+}
+#[doc = "Round towards -∞"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrintm[_f32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(frintm))]
+pub fn svrintm_f32_z(pg: svbool_t, op: svfloat32_t) -> svfloat32_t {
+    svrintm_f32_m(svdup_n_f32(0.0), pg, op)
+}
+#[doc = "Round towards -∞"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrintm[_f64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(frintm))]
+pub fn svrintm_f64_m(inactive: svfloat64_t, pg: svbool_t, op: svfloat64_t) -> svfloat64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.frintm.nxv2f64")]
+        fn _svrintm_f64_m(inactive: svfloat64_t, pg: svbool2_t, op: svfloat64_t) -> svfloat64_t;
+    }
+    unsafe { _svrintm_f64_m(inactive, pg.sve_into(), op) }
+}
+#[doc = "Round towards -∞"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrintm[_f64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(frintm))]
+pub fn svrintm_f64_x(pg: svbool_t, op: svfloat64_t) -> svfloat64_t {
+    svrintm_f64_m(op, pg, op)
+}
+#[doc = "Round towards -∞"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrintm[_f64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(frintm))]
+pub fn svrintm_f64_z(pg: svbool_t, op: svfloat64_t) -> svfloat64_t {
+    svrintm_f64_m(svdup_n_f64(0.0), pg, op)
+}
+#[doc = "Round to nearest, ties to even"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrintn[_f32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(frintn))]
+pub fn svrintn_f32_m(inactive: svfloat32_t, pg: svbool_t, op: svfloat32_t) -> svfloat32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.frintn.nxv4f32")]
+        fn _svrintn_f32_m(inactive: svfloat32_t, pg: svbool4_t, op: svfloat32_t) -> svfloat32_t;
+    }
+    unsafe { _svrintn_f32_m(inactive, pg.sve_into(), op) }
+}
+#[doc = "Round to nearest, ties to even"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrintn[_f32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(frintn))]
+pub fn svrintn_f32_x(pg: svbool_t, op: svfloat32_t) -> svfloat32_t {
+    svrintn_f32_m(op, pg, op)
+}
+#[doc = "Round to nearest, ties to even"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrintn[_f32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(frintn))]
+pub fn svrintn_f32_z(pg: svbool_t, op: svfloat32_t) -> svfloat32_t {
+    svrintn_f32_m(svdup_n_f32(0.0), pg, op)
+}
+#[doc = "Round to nearest, ties to even"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrintn[_f64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(frintn))]
+pub fn svrintn_f64_m(inactive: svfloat64_t, pg: svbool_t, op: svfloat64_t) -> svfloat64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.frintn.nxv2f64")]
+        fn _svrintn_f64_m(inactive: svfloat64_t, pg: svbool2_t, op: svfloat64_t) -> svfloat64_t;
+    }
+    unsafe { _svrintn_f64_m(inactive, pg.sve_into(), op) }
+}
+#[doc = "Round to nearest, ties to even"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrintn[_f64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(frintn))]
+pub fn svrintn_f64_x(pg: svbool_t, op: svfloat64_t) -> svfloat64_t {
+    svrintn_f64_m(op, pg, op)
+}
+#[doc = "Round to nearest, ties to even"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrintn[_f64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(frintn))]
+pub fn svrintn_f64_z(pg: svbool_t, op: svfloat64_t) -> svfloat64_t {
+    svrintn_f64_m(svdup_n_f64(0.0), pg, op)
+}
+#[doc = "Round towards +∞"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrintp[_f32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(frintp))]
+pub fn svrintp_f32_m(inactive: svfloat32_t, pg: svbool_t, op: svfloat32_t) -> svfloat32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.frintp.nxv4f32")]
+        fn _svrintp_f32_m(inactive: svfloat32_t, pg: svbool4_t, op: svfloat32_t) -> svfloat32_t;
+    }
+    unsafe { _svrintp_f32_m(inactive, pg.sve_into(), op) }
+}
+#[doc = "Round towards +∞"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrintp[_f32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(frintp))]
+pub fn svrintp_f32_x(pg: svbool_t, op: svfloat32_t) -> svfloat32_t {
+    svrintp_f32_m(op, pg, op)
+}
+#[doc = "Round towards +∞"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrintp[_f32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(frintp))]
+pub fn svrintp_f32_z(pg: svbool_t, op: svfloat32_t) -> svfloat32_t {
+    svrintp_f32_m(svdup_n_f32(0.0), pg, op)
+}
+#[doc = "Round towards +∞"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrintp[_f64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(frintp))]
+pub fn svrintp_f64_m(inactive: svfloat64_t, pg: svbool_t, op: svfloat64_t) -> svfloat64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.frintp.nxv2f64")]
+        fn _svrintp_f64_m(inactive: svfloat64_t, pg: svbool2_t, op: svfloat64_t) -> svfloat64_t;
+    }
+    unsafe { _svrintp_f64_m(inactive, pg.sve_into(), op) }
+}
+#[doc = "Round towards +∞"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrintp[_f64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(frintp))]
+pub fn svrintp_f64_x(pg: svbool_t, op: svfloat64_t) -> svfloat64_t {
+    svrintp_f64_m(op, pg, op)
+}
+#[doc = "Round towards +∞"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrintp[_f64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(frintp))]
+pub fn svrintp_f64_z(pg: svbool_t, op: svfloat64_t) -> svfloat64_t {
+    svrintp_f64_m(svdup_n_f64(0.0), pg, op)
+}
+#[doc = "Round using current rounding mode (exact)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrintx[_f32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(frintx))]
+pub fn svrintx_f32_m(inactive: svfloat32_t, pg: svbool_t, op: svfloat32_t) -> svfloat32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.frintx.nxv4f32")]
+        fn _svrintx_f32_m(inactive: svfloat32_t, pg: svbool4_t, op: svfloat32_t) -> svfloat32_t;
+    }
+    unsafe { _svrintx_f32_m(inactive, pg.sve_into(), op) }
+}
+#[doc = "Round using current rounding mode (exact)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrintx[_f32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(frintx))]
+pub fn svrintx_f32_x(pg: svbool_t, op: svfloat32_t) -> svfloat32_t {
+    svrintx_f32_m(op, pg, op)
+}
+#[doc = "Round using current rounding mode (exact)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrintx[_f32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(frintx))]
+pub fn svrintx_f32_z(pg: svbool_t, op: svfloat32_t) -> svfloat32_t {
+    svrintx_f32_m(svdup_n_f32(0.0), pg, op)
+}
+#[doc = "Round using current rounding mode (exact)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrintx[_f64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(frintx))]
+pub fn svrintx_f64_m(inactive: svfloat64_t, pg: svbool_t, op: svfloat64_t) -> svfloat64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.frintx.nxv2f64")]
+        fn _svrintx_f64_m(inactive: svfloat64_t, pg: svbool2_t, op: svfloat64_t) -> svfloat64_t;
+    }
+    unsafe { _svrintx_f64_m(inactive, pg.sve_into(), op) }
+}
+#[doc = "Round using current rounding mode (exact)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrintx[_f64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(frintx))]
+pub fn svrintx_f64_x(pg: svbool_t, op: svfloat64_t) -> svfloat64_t {
+    svrintx_f64_m(op, pg, op)
+}
+#[doc = "Round using current rounding mode (exact)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrintx[_f64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(frintx))]
+pub fn svrintx_f64_z(pg: svbool_t, op: svfloat64_t) -> svfloat64_t {
+    svrintx_f64_m(svdup_n_f64(0.0), pg, op)
+}
+#[doc = "Round towards zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrintz[_f32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(frintz))]
+pub fn svrintz_f32_m(inactive: svfloat32_t, pg: svbool_t, op: svfloat32_t) -> svfloat32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.frintz.nxv4f32")]
+        fn _svrintz_f32_m(inactive: svfloat32_t, pg: svbool4_t, op: svfloat32_t) -> svfloat32_t;
+    }
+    unsafe { _svrintz_f32_m(inactive, pg.sve_into(), op) }
+}
+#[doc = "Round towards zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrintz[_f32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(frintz))]
+pub fn svrintz_f32_x(pg: svbool_t, op: svfloat32_t) -> svfloat32_t {
+    svrintz_f32_m(op, pg, op)
+}
+#[doc = "Round towards zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrintz[_f32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(frintz))]
+pub fn svrintz_f32_z(pg: svbool_t, op: svfloat32_t) -> svfloat32_t {
+    svrintz_f32_m(svdup_n_f32(0.0), pg, op)
+}
+#[doc = "Round towards zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrintz[_f64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(frintz))]
+pub fn svrintz_f64_m(inactive: svfloat64_t, pg: svbool_t, op: svfloat64_t) -> svfloat64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.frintz.nxv2f64")]
+        fn _svrintz_f64_m(inactive: svfloat64_t, pg: svbool2_t, op: svfloat64_t) -> svfloat64_t;
+    }
+    unsafe { _svrintz_f64_m(inactive, pg.sve_into(), op) }
+}
+#[doc = "Round towards zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrintz[_f64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(frintz))]
+pub fn svrintz_f64_x(pg: svbool_t, op: svfloat64_t) -> svfloat64_t {
+    svrintz_f64_m(op, pg, op)
+}
+#[doc = "Round towards zero"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrintz[_f64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(frintz))]
+pub fn svrintz_f64_z(pg: svbool_t, op: svfloat64_t) -> svfloat64_t {
+    svrintz_f64_m(svdup_n_f64(0.0), pg, op)
+}
+#[doc = "Reciprocal square root estimate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrsqrte[_f32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(frsqrte))]
+pub fn svrsqrte_f32(op: svfloat32_t) -> svfloat32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.frsqrte.x.nxv4f32"
+        )]
+        fn _svrsqrte_f32(op: svfloat32_t) -> svfloat32_t;
+    }
+    unsafe { _svrsqrte_f32(op) }
+}
+#[doc = "Reciprocal square root estimate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrsqrte[_f64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(frsqrte))]
+pub fn svrsqrte_f64(op: svfloat64_t) -> svfloat64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.frsqrte.x.nxv2f64"
+        )]
+        fn _svrsqrte_f64(op: svfloat64_t) -> svfloat64_t;
+    }
+    unsafe { _svrsqrte_f64(op) }
+}
+#[doc = "Reciprocal square root step"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrsqrts[_f32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(frsqrts))]
+pub fn svrsqrts_f32(op1: svfloat32_t, op2: svfloat32_t) -> svfloat32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.frsqrts.x.nxv4f32"
+        )]
+        fn _svrsqrts_f32(op1: svfloat32_t, op2: svfloat32_t) -> svfloat32_t;
+    }
+    unsafe { _svrsqrts_f32(op1, op2) }
+}
+#[doc = "Reciprocal square root step"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrsqrts[_f64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(frsqrts))]
+pub fn svrsqrts_f64(op1: svfloat64_t, op2: svfloat64_t) -> svfloat64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.frsqrts.x.nxv2f64"
+        )]
+        fn _svrsqrts_f64(op1: svfloat64_t, op2: svfloat64_t) -> svfloat64_t;
+    }
+    unsafe { _svrsqrts_f64(op1, op2) }
+}
+#[doc = "Adjust exponent"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svscale[_f32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fscale))]
+pub fn svscale_f32_m(pg: svbool_t, op1: svfloat32_t, op2: svint32_t) -> svfloat32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.fscale.nxv4f32")]
+        fn _svscale_f32_m(pg: svbool4_t, op1: svfloat32_t, op2: svint32_t) -> svfloat32_t;
+    }
+    unsafe { _svscale_f32_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Adjust exponent"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svscale[_n_f32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fscale))]
+pub fn svscale_n_f32_m(pg: svbool_t, op1: svfloat32_t, op2: i32) -> svfloat32_t {
+    svscale_f32_m(pg, op1, svdup_n_s32(op2))
+}
+#[doc = "Adjust exponent"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svscale[_f32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fscale))]
+pub fn svscale_f32_x(pg: svbool_t, op1: svfloat32_t, op2: svint32_t) -> svfloat32_t {
+    svscale_f32_m(pg, op1, op2)
+}
+#[doc = "Adjust exponent"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svscale[_n_f32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fscale))]
+pub fn svscale_n_f32_x(pg: svbool_t, op1: svfloat32_t, op2: i32) -> svfloat32_t {
+    svscale_f32_x(pg, op1, svdup_n_s32(op2))
+}
+#[doc = "Adjust exponent"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svscale[_f32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fscale))]
+pub fn svscale_f32_z(pg: svbool_t, op1: svfloat32_t, op2: svint32_t) -> svfloat32_t {
+    svscale_f32_m(pg, svsel_f32(pg, op1, svdup_n_f32(0.0)), op2)
+}
+#[doc = "Adjust exponent"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svscale[_n_f32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fscale))]
+pub fn svscale_n_f32_z(pg: svbool_t, op1: svfloat32_t, op2: i32) -> svfloat32_t {
+    svscale_f32_z(pg, op1, svdup_n_s32(op2))
+}
+#[doc = "Adjust exponent"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svscale[_f64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fscale))]
+pub fn svscale_f64_m(pg: svbool_t, op1: svfloat64_t, op2: svint64_t) -> svfloat64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.fscale.nxv2f64")]
+        fn _svscale_f64_m(pg: svbool2_t, op1: svfloat64_t, op2: svint64_t) -> svfloat64_t;
+    }
+    unsafe { _svscale_f64_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Adjust exponent"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svscale[_n_f64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fscale))]
+pub fn svscale_n_f64_m(pg: svbool_t, op1: svfloat64_t, op2: i64) -> svfloat64_t {
+    svscale_f64_m(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Adjust exponent"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svscale[_f64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fscale))]
+pub fn svscale_f64_x(pg: svbool_t, op1: svfloat64_t, op2: svint64_t) -> svfloat64_t {
+    svscale_f64_m(pg, op1, op2)
+}
+#[doc = "Adjust exponent"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svscale[_n_f64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fscale))]
+pub fn svscale_n_f64_x(pg: svbool_t, op1: svfloat64_t, op2: i64) -> svfloat64_t {
+    svscale_f64_x(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Adjust exponent"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svscale[_f64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fscale))]
+pub fn svscale_f64_z(pg: svbool_t, op1: svfloat64_t, op2: svint64_t) -> svfloat64_t {
+    svscale_f64_m(pg, svsel_f64(pg, op1, svdup_n_f64(0.0)), op2)
+}
+#[doc = "Adjust exponent"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svscale[_n_f64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fscale))]
+pub fn svscale_n_f64_z(pg: svbool_t, op1: svfloat64_t, op2: i64) -> svfloat64_t {
+    svscale_f64_z(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Conditionally select elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsel[_b])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sel))]
+pub fn svsel_b(pg: svbool_t, op1: svbool_t, op2: svbool_t) -> svbool_t {
+    unsafe { simd_select(pg, op1, op2) }
+}
+#[doc = "Conditionally select elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsel[_f32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sel))]
+pub fn svsel_f32(pg: svbool_t, op1: svfloat32_t, op2: svfloat32_t) -> svfloat32_t {
+    unsafe { simd_select::<svbool4_t, _>(pg.sve_into(), op1, op2) }
+}
+#[doc = "Conditionally select elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsel[_f64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sel))]
+pub fn svsel_f64(pg: svbool_t, op1: svfloat64_t, op2: svfloat64_t) -> svfloat64_t {
+    unsafe { simd_select::<svbool2_t, _>(pg.sve_into(), op1, op2) }
+}
+#[doc = "Conditionally select elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsel[_s8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sel))]
+pub fn svsel_s8(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t {
+    unsafe { simd_select::<svbool_t, _>(pg, op1, op2) }
+}
+#[doc = "Conditionally select elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsel[_s16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sel))]
+pub fn svsel_s16(pg: svbool_t, op1: svint16_t, op2: svint16_t) -> svint16_t {
+    unsafe { simd_select::<svbool8_t, _>(pg.sve_into(), op1, op2) }
+}
+#[doc = "Conditionally select elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsel[_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sel))]
+pub fn svsel_s32(pg: svbool_t, op1: svint32_t, op2: svint32_t) -> svint32_t {
+    unsafe { simd_select::<svbool4_t, _>(pg.sve_into(), op1, op2) }
+}
+#[doc = "Conditionally select elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsel[_s64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sel))]
+pub fn svsel_s64(pg: svbool_t, op1: svint64_t, op2: svint64_t) -> svint64_t {
+    unsafe { simd_select::<svbool2_t, _>(pg.sve_into(), op1, op2) }
+}
+#[doc = "Conditionally select elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsel[_u8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sel))]
+pub fn svsel_u8(pg: svbool_t, op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    unsafe { simd_select::<svbool_t, _>(pg, op1, op2) }
+}
+#[doc = "Conditionally select elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsel[_u16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sel))]
+pub fn svsel_u16(pg: svbool_t, op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    unsafe { simd_select::<svbool8_t, _>(pg.sve_into(), op1, op2) }
+}
+#[doc = "Conditionally select elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsel[_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sel))]
+pub fn svsel_u32(pg: svbool_t, op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    unsafe { simd_select::<svbool4_t, _>(pg.sve_into(), op1, op2) }
+}
+#[doc = "Conditionally select elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsel[_u64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sel))]
+pub fn svsel_u64(pg: svbool_t, op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    unsafe { simd_select::<svbool2_t, _>(pg.sve_into(), op1, op2) }
+}
+#[doc = "Change one vector in a tuple of two vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svset2[_f32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svset2_f32<const IMM_INDEX: i32>(tuple: svfloat32x2_t, x: svfloat32_t) -> svfloat32x2_t {
+    static_assert_range!(IMM_INDEX, 0..=1);
+    unsafe { crate::intrinsics::simd::scalable::sve_tuple_set::<_, _, { IMM_INDEX }>(tuple, x) }
+}
+#[doc = "Change one vector in a tuple of two vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svset2[_f64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svset2_f64<const IMM_INDEX: i32>(tuple: svfloat64x2_t, x: svfloat64_t) -> svfloat64x2_t {
+    static_assert_range!(IMM_INDEX, 0..=1);
+    unsafe { crate::intrinsics::simd::scalable::sve_tuple_set::<_, _, { IMM_INDEX }>(tuple, x) }
+}
+#[doc = "Change one vector in a tuple of two vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svset2[_s8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svset2_s8<const IMM_INDEX: i32>(tuple: svint8x2_t, x: svint8_t) -> svint8x2_t {
+    static_assert_range!(IMM_INDEX, 0..=1);
+    unsafe { crate::intrinsics::simd::scalable::sve_tuple_set::<_, _, { IMM_INDEX }>(tuple, x) }
+}
+#[doc = "Change one vector in a tuple of two vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svset2[_s16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svset2_s16<const IMM_INDEX: i32>(tuple: svint16x2_t, x: svint16_t) -> svint16x2_t {
+    static_assert_range!(IMM_INDEX, 0..=1);
+    unsafe { crate::intrinsics::simd::scalable::sve_tuple_set::<_, _, { IMM_INDEX }>(tuple, x) }
+}
+#[doc = "Change one vector in a tuple of two vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svset2[_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svset2_s32<const IMM_INDEX: i32>(tuple: svint32x2_t, x: svint32_t) -> svint32x2_t {
+    static_assert_range!(IMM_INDEX, 0..=1);
+    unsafe { crate::intrinsics::simd::scalable::sve_tuple_set::<_, _, { IMM_INDEX }>(tuple, x) }
+}
+#[doc = "Change one vector in a tuple of two vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svset2[_s64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svset2_s64<const IMM_INDEX: i32>(tuple: svint64x2_t, x: svint64_t) -> svint64x2_t {
+    static_assert_range!(IMM_INDEX, 0..=1);
+    unsafe { crate::intrinsics::simd::scalable::sve_tuple_set::<_, _, { IMM_INDEX }>(tuple, x) }
+}
+#[doc = "Change one vector in a tuple of two vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svset2[_u8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svset2_u8<const IMM_INDEX: i32>(tuple: svuint8x2_t, x: svuint8_t) -> svuint8x2_t {
+    static_assert_range!(IMM_INDEX, 0..=1);
+    unsafe { crate::intrinsics::simd::scalable::sve_tuple_set::<_, _, { IMM_INDEX }>(tuple, x) }
+}
+#[doc = "Change one vector in a tuple of two vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svset2[_u16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svset2_u16<const IMM_INDEX: i32>(tuple: svuint16x2_t, x: svuint16_t) -> svuint16x2_t {
+    static_assert_range!(IMM_INDEX, 0..=1);
+    unsafe { crate::intrinsics::simd::scalable::sve_tuple_set::<_, _, { IMM_INDEX }>(tuple, x) }
+}
+#[doc = "Change one vector in a tuple of two vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svset2[_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svset2_u32<const IMM_INDEX: i32>(tuple: svuint32x2_t, x: svuint32_t) -> svuint32x2_t {
+    static_assert_range!(IMM_INDEX, 0..=1);
+    unsafe { crate::intrinsics::simd::scalable::sve_tuple_set::<_, _, { IMM_INDEX }>(tuple, x) }
+}
+#[doc = "Change one vector in a tuple of two vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svset2[_u64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svset2_u64<const IMM_INDEX: i32>(tuple: svuint64x2_t, x: svuint64_t) -> svuint64x2_t {
+    static_assert_range!(IMM_INDEX, 0..=1);
+    unsafe { crate::intrinsics::simd::scalable::sve_tuple_set::<_, _, { IMM_INDEX }>(tuple, x) }
+}
+#[doc = "Change one vector in a tuple of three vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svset3[_f32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svset3_f32<const IMM_INDEX: i32>(tuple: svfloat32x3_t, x: svfloat32_t) -> svfloat32x3_t {
+    static_assert_range!(IMM_INDEX, 0..=2);
+    unsafe { crate::intrinsics::simd::scalable::sve_tuple_set::<_, _, { IMM_INDEX }>(tuple, x) }
+}
+#[doc = "Change one vector in a tuple of three vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svset3[_f64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svset3_f64<const IMM_INDEX: i32>(tuple: svfloat64x3_t, x: svfloat64_t) -> svfloat64x3_t {
+    static_assert_range!(IMM_INDEX, 0..=2);
+    unsafe { crate::intrinsics::simd::scalable::sve_tuple_set::<_, _, { IMM_INDEX }>(tuple, x) }
+}
+#[doc = "Change one vector in a tuple of three vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svset3[_s8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svset3_s8<const IMM_INDEX: i32>(tuple: svint8x3_t, x: svint8_t) -> svint8x3_t {
+    static_assert_range!(IMM_INDEX, 0..=2);
+    unsafe { crate::intrinsics::simd::scalable::sve_tuple_set::<_, _, { IMM_INDEX }>(tuple, x) }
+}
+#[doc = "Change one vector in a tuple of three vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svset3[_s16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svset3_s16<const IMM_INDEX: i32>(tuple: svint16x3_t, x: svint16_t) -> svint16x3_t {
+    static_assert_range!(IMM_INDEX, 0..=2);
+    unsafe { crate::intrinsics::simd::scalable::sve_tuple_set::<_, _, { IMM_INDEX }>(tuple, x) }
+}
+#[doc = "Change one vector in a tuple of three vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svset3[_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svset3_s32<const IMM_INDEX: i32>(tuple: svint32x3_t, x: svint32_t) -> svint32x3_t {
+    static_assert_range!(IMM_INDEX, 0..=2);
+    unsafe { crate::intrinsics::simd::scalable::sve_tuple_set::<_, _, { IMM_INDEX }>(tuple, x) }
+}
+#[doc = "Change one vector in a tuple of three vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svset3[_s64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svset3_s64<const IMM_INDEX: i32>(tuple: svint64x3_t, x: svint64_t) -> svint64x3_t {
+    static_assert_range!(IMM_INDEX, 0..=2);
+    unsafe { crate::intrinsics::simd::scalable::sve_tuple_set::<_, _, { IMM_INDEX }>(tuple, x) }
+}
+#[doc = "Change one vector in a tuple of three vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svset3[_u8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svset3_u8<const IMM_INDEX: i32>(tuple: svuint8x3_t, x: svuint8_t) -> svuint8x3_t {
+    static_assert_range!(IMM_INDEX, 0..=2);
+    unsafe { crate::intrinsics::simd::scalable::sve_tuple_set::<_, _, { IMM_INDEX }>(tuple, x) }
+}
+#[doc = "Change one vector in a tuple of three vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svset3[_u16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svset3_u16<const IMM_INDEX: i32>(tuple: svuint16x3_t, x: svuint16_t) -> svuint16x3_t {
+    static_assert_range!(IMM_INDEX, 0..=2);
+    unsafe { crate::intrinsics::simd::scalable::sve_tuple_set::<_, _, { IMM_INDEX }>(tuple, x) }
+}
+#[doc = "Change one vector in a tuple of three vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svset3[_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svset3_u32<const IMM_INDEX: i32>(tuple: svuint32x3_t, x: svuint32_t) -> svuint32x3_t {
+    static_assert_range!(IMM_INDEX, 0..=2);
+    unsafe { crate::intrinsics::simd::scalable::sve_tuple_set::<_, _, { IMM_INDEX }>(tuple, x) }
+}
+#[doc = "Change one vector in a tuple of three vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svset3[_u64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svset3_u64<const IMM_INDEX: i32>(tuple: svuint64x3_t, x: svuint64_t) -> svuint64x3_t {
+    static_assert_range!(IMM_INDEX, 0..=2);
+    unsafe { crate::intrinsics::simd::scalable::sve_tuple_set::<_, _, { IMM_INDEX }>(tuple, x) }
+}
+#[doc = "Change one vector in a tuple of four vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svset4[_f32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svset4_f32<const IMM_INDEX: i32>(tuple: svfloat32x4_t, x: svfloat32_t) -> svfloat32x4_t {
+    static_assert_range!(IMM_INDEX, 0..=3);
+    unsafe { crate::intrinsics::simd::scalable::sve_tuple_set::<_, _, { IMM_INDEX }>(tuple, x) }
+}
+#[doc = "Change one vector in a tuple of four vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svset4[_f64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svset4_f64<const IMM_INDEX: i32>(tuple: svfloat64x4_t, x: svfloat64_t) -> svfloat64x4_t {
+    static_assert_range!(IMM_INDEX, 0..=3);
+    unsafe { crate::intrinsics::simd::scalable::sve_tuple_set::<_, _, { IMM_INDEX }>(tuple, x) }
+}
+#[doc = "Change one vector in a tuple of four vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svset4[_s8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svset4_s8<const IMM_INDEX: i32>(tuple: svint8x4_t, x: svint8_t) -> svint8x4_t {
+    static_assert_range!(IMM_INDEX, 0..=3);
+    unsafe { crate::intrinsics::simd::scalable::sve_tuple_set::<_, _, { IMM_INDEX }>(tuple, x) }
+}
+#[doc = "Change one vector in a tuple of four vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svset4[_s16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svset4_s16<const IMM_INDEX: i32>(tuple: svint16x4_t, x: svint16_t) -> svint16x4_t {
+    static_assert_range!(IMM_INDEX, 0..=3);
+    unsafe { crate::intrinsics::simd::scalable::sve_tuple_set::<_, _, { IMM_INDEX }>(tuple, x) }
+}
+#[doc = "Change one vector in a tuple of four vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svset4[_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svset4_s32<const IMM_INDEX: i32>(tuple: svint32x4_t, x: svint32_t) -> svint32x4_t {
+    static_assert_range!(IMM_INDEX, 0..=3);
+    unsafe { crate::intrinsics::simd::scalable::sve_tuple_set::<_, _, { IMM_INDEX }>(tuple, x) }
+}
+#[doc = "Change one vector in a tuple of four vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svset4[_s64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svset4_s64<const IMM_INDEX: i32>(tuple: svint64x4_t, x: svint64_t) -> svint64x4_t {
+    static_assert_range!(IMM_INDEX, 0..=3);
+    unsafe { crate::intrinsics::simd::scalable::sve_tuple_set::<_, _, { IMM_INDEX }>(tuple, x) }
+}
+#[doc = "Change one vector in a tuple of four vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svset4[_u8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svset4_u8<const IMM_INDEX: i32>(tuple: svuint8x4_t, x: svuint8_t) -> svuint8x4_t {
+    static_assert_range!(IMM_INDEX, 0..=3);
+    unsafe { crate::intrinsics::simd::scalable::sve_tuple_set::<_, _, { IMM_INDEX }>(tuple, x) }
+}
+#[doc = "Change one vector in a tuple of four vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svset4[_u16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svset4_u16<const IMM_INDEX: i32>(tuple: svuint16x4_t, x: svuint16_t) -> svuint16x4_t {
+    static_assert_range!(IMM_INDEX, 0..=3);
+    unsafe { crate::intrinsics::simd::scalable::sve_tuple_set::<_, _, { IMM_INDEX }>(tuple, x) }
+}
+#[doc = "Change one vector in a tuple of four vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svset4[_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svset4_u32<const IMM_INDEX: i32>(tuple: svuint32x4_t, x: svuint32_t) -> svuint32x4_t {
+    static_assert_range!(IMM_INDEX, 0..=3);
+    unsafe { crate::intrinsics::simd::scalable::sve_tuple_set::<_, _, { IMM_INDEX }>(tuple, x) }
+}
+#[doc = "Change one vector in a tuple of four vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svset4[_u64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub fn svset4_u64<const IMM_INDEX: i32>(tuple: svuint64x4_t, x: svuint64_t) -> svuint64x4_t {
+    static_assert_range!(IMM_INDEX, 0..=3);
+    unsafe { crate::intrinsics::simd::scalable::sve_tuple_set::<_, _, { IMM_INDEX }>(tuple, x) }
+}
+#[doc = "Initialize the first-fault register to all-true"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsetffr)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(setffr))]
+pub fn svsetffr() {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.setffr")]
+        fn _svsetffr();
+    }
+    unsafe { _svsetffr() }
+}
+#[doc = "Splice two vectors under predicate control"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsplice[_f32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(splice))]
+pub fn svsplice_f32(pg: svbool_t, op1: svfloat32_t, op2: svfloat32_t) -> svfloat32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.splice.nxv4f32")]
+        fn _svsplice_f32(pg: svbool4_t, op1: svfloat32_t, op2: svfloat32_t) -> svfloat32_t;
+    }
+    unsafe { _svsplice_f32(pg.sve_into(), op1, op2) }
+}
+#[doc = "Splice two vectors under predicate control"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsplice[_f64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(splice))]
+pub fn svsplice_f64(pg: svbool_t, op1: svfloat64_t, op2: svfloat64_t) -> svfloat64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.splice.nxv2f64")]
+        fn _svsplice_f64(pg: svbool2_t, op1: svfloat64_t, op2: svfloat64_t) -> svfloat64_t;
+    }
+    unsafe { _svsplice_f64(pg.sve_into(), op1, op2) }
+}
+#[doc = "Splice two vectors under predicate control"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsplice[_s8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(splice))]
+pub fn svsplice_s8(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.splice.nxv16i8")]
+        fn _svsplice_s8(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t;
+    }
+    unsafe { _svsplice_s8(pg, op1, op2) }
+}
+#[doc = "Splice two vectors under predicate control"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsplice[_s16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(splice))]
+pub fn svsplice_s16(pg: svbool_t, op1: svint16_t, op2: svint16_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.splice.nxv8i16")]
+        fn _svsplice_s16(pg: svbool8_t, op1: svint16_t, op2: svint16_t) -> svint16_t;
+    }
+    unsafe { _svsplice_s16(pg.sve_into(), op1, op2) }
+}
+#[doc = "Splice two vectors under predicate control"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsplice[_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(splice))]
+pub fn svsplice_s32(pg: svbool_t, op1: svint32_t, op2: svint32_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.splice.nxv4i32")]
+        fn _svsplice_s32(pg: svbool4_t, op1: svint32_t, op2: svint32_t) -> svint32_t;
+    }
+    unsafe { _svsplice_s32(pg.sve_into(), op1, op2) }
+}
+#[doc = "Splice two vectors under predicate control"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsplice[_s64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(splice))]
+pub fn svsplice_s64(pg: svbool_t, op1: svint64_t, op2: svint64_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.splice.nxv2i64")]
+        fn _svsplice_s64(pg: svbool2_t, op1: svint64_t, op2: svint64_t) -> svint64_t;
+    }
+    unsafe { _svsplice_s64(pg.sve_into(), op1, op2) }
+}
+#[doc = "Splice two vectors under predicate control"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsplice[_u8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(splice))]
+pub fn svsplice_u8(pg: svbool_t, op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    unsafe { svsplice_s8(pg, op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Splice two vectors under predicate control"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsplice[_u16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(splice))]
+pub fn svsplice_u16(pg: svbool_t, op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    unsafe { svsplice_s16(pg, op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Splice two vectors under predicate control"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsplice[_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(splice))]
+pub fn svsplice_u32(pg: svbool_t, op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    unsafe { svsplice_s32(pg, op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Splice two vectors under predicate control"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsplice[_u64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(splice))]
+pub fn svsplice_u64(pg: svbool_t, op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    unsafe { svsplice_s64(pg, op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Square root"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsqrt[_f32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fsqrt))]
+pub fn svsqrt_f32_m(inactive: svfloat32_t, pg: svbool_t, op: svfloat32_t) -> svfloat32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.fsqrt.nxv4f32")]
+        fn _svsqrt_f32_m(inactive: svfloat32_t, pg: svbool4_t, op: svfloat32_t) -> svfloat32_t;
+    }
+    unsafe { _svsqrt_f32_m(inactive, pg.sve_into(), op) }
+}
+#[doc = "Square root"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsqrt[_f32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fsqrt))]
+pub fn svsqrt_f32_x(pg: svbool_t, op: svfloat32_t) -> svfloat32_t {
+    svsqrt_f32_m(op, pg, op)
+}
+#[doc = "Square root"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsqrt[_f32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fsqrt))]
+pub fn svsqrt_f32_z(pg: svbool_t, op: svfloat32_t) -> svfloat32_t {
+    svsqrt_f32_m(svdup_n_f32(0.0), pg, op)
+}
+#[doc = "Square root"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsqrt[_f64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fsqrt))]
+pub fn svsqrt_f64_m(inactive: svfloat64_t, pg: svbool_t, op: svfloat64_t) -> svfloat64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.fsqrt.nxv2f64")]
+        fn _svsqrt_f64_m(inactive: svfloat64_t, pg: svbool2_t, op: svfloat64_t) -> svfloat64_t;
+    }
+    unsafe { _svsqrt_f64_m(inactive, pg.sve_into(), op) }
+}
+#[doc = "Square root"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsqrt[_f64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fsqrt))]
+pub fn svsqrt_f64_x(pg: svbool_t, op: svfloat64_t) -> svfloat64_t {
+    svsqrt_f64_m(op, pg, op)
+}
+#[doc = "Square root"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsqrt[_f64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fsqrt))]
+pub fn svsqrt_f64_z(pg: svbool_t, op: svfloat64_t) -> svfloat64_t {
+    svsqrt_f64_m(svdup_n_f64(0.0), pg, op)
+}
+#[doc = "Non-truncating store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1[_f32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1w))]
+pub unsafe fn svst1_f32(pg: svbool_t, base: *mut f32, data: svfloat32_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.st1.nxv4f32")]
+        fn _svst1_f32(data: svfloat32_t, pg: svbool4_t, ptr: *mut f32);
+    }
+    _svst1_f32(data, pg.sve_into(), base)
+}
+#[doc = "Non-truncating store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1[_f64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1d))]
+pub unsafe fn svst1_f64(pg: svbool_t, base: *mut f64, data: svfloat64_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.st1.nxv2f64")]
+        fn _svst1_f64(data: svfloat64_t, pg: svbool2_t, ptr: *mut f64);
+    }
+    _svst1_f64(data, pg.sve_into(), base)
+}
+#[doc = "Non-truncating store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1[_s8])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1b))]
+pub unsafe fn svst1_s8(pg: svbool_t, base: *mut i8, data: svint8_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.st1.nxv16i8")]
+        fn _svst1_s8(data: svint8_t, pg: svbool_t, ptr: *mut i8);
+    }
+    _svst1_s8(data, pg, base)
+}
+#[doc = "Non-truncating store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1[_s16])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1h))]
+pub unsafe fn svst1_s16(pg: svbool_t, base: *mut i16, data: svint16_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.st1.nxv8i16")]
+        fn _svst1_s16(data: svint16_t, pg: svbool8_t, ptr: *mut i16);
+    }
+    _svst1_s16(data, pg.sve_into(), base)
+}
+#[doc = "Non-truncating store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1[_s32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1w))]
+pub unsafe fn svst1_s32(pg: svbool_t, base: *mut i32, data: svint32_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.st1.nxv4i32")]
+        fn _svst1_s32(data: svint32_t, pg: svbool4_t, ptr: *mut i32);
+    }
+    _svst1_s32(data, pg.sve_into(), base)
+}
+#[doc = "Non-truncating store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1[_s64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1d))]
+pub unsafe fn svst1_s64(pg: svbool_t, base: *mut i64, data: svint64_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.st1.nxv2i64")]
+        fn _svst1_s64(data: svint64_t, pg: svbool2_t, ptr: *mut i64);
+    }
+    _svst1_s64(data, pg.sve_into(), base)
+}
+#[doc = "Non-truncating store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1[_u8])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1b))]
+pub unsafe fn svst1_u8(pg: svbool_t, base: *mut u8, data: svuint8_t) {
+    svst1_s8(pg, base.as_signed(), data.as_signed())
+}
+#[doc = "Non-truncating store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1[_u16])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1h))]
+pub unsafe fn svst1_u16(pg: svbool_t, base: *mut u16, data: svuint16_t) {
+    svst1_s16(pg, base.as_signed(), data.as_signed())
+}
+#[doc = "Non-truncating store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1[_u32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1w))]
+pub unsafe fn svst1_u32(pg: svbool_t, base: *mut u32, data: svuint32_t) {
+    svst1_s32(pg, base.as_signed(), data.as_signed())
+}
+#[doc = "Non-truncating store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1[_u64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1d))]
+pub unsafe fn svst1_u64(pg: svbool_t, base: *mut u64, data: svuint64_t) {
+    svst1_s64(pg, base.as_signed(), data.as_signed())
+}
+#[doc = "Non-truncating store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1_scatter_[s32]index[_f32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1w))]
+pub unsafe fn svst1_scatter_s32index_f32(
+    pg: svbool_t,
+    base: *mut f32,
+    indices: svint32_t,
+    data: svfloat32_t,
+) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.st1.scatter.sxtw.index.nxv4f32"
+        )]
+        fn _svst1_scatter_s32index_f32(
+            data: svfloat32_t,
+            pg: svbool4_t,
+            base: *mut f32,
+            indices: svint32_t,
+        );
+    }
+    _svst1_scatter_s32index_f32(data, pg.sve_into(), base, indices)
+}
+#[doc = "Non-truncating store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1_scatter_[s32]index[_s32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1w))]
+pub unsafe fn svst1_scatter_s32index_s32(
+    pg: svbool_t,
+    base: *mut i32,
+    indices: svint32_t,
+    data: svint32_t,
+) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.st1.scatter.sxtw.index.nxv4i32"
+        )]
+        fn _svst1_scatter_s32index_s32(
+            data: svint32_t,
+            pg: svbool4_t,
+            base: *mut i32,
+            indices: svint32_t,
+        );
+    }
+    _svst1_scatter_s32index_s32(data, pg.sve_into(), base, indices)
+}
+#[doc = "Non-truncating store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1_scatter_[s32]index[_u32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1w))]
+pub unsafe fn svst1_scatter_s32index_u32(
+    pg: svbool_t,
+    base: *mut u32,
+    indices: svint32_t,
+    data: svuint32_t,
+) {
+    svst1_scatter_s32index_s32(pg, base.as_signed(), indices, data.as_signed())
+}
+#[doc = "Non-truncating store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1_scatter_[s64]index[_f64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1d))]
+pub unsafe fn svst1_scatter_s64index_f64(
+    pg: svbool_t,
+    base: *mut f64,
+    indices: svint64_t,
+    data: svfloat64_t,
+) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.st1.scatter.index.nxv2f64"
+        )]
+        fn _svst1_scatter_s64index_f64(
+            data: svfloat64_t,
+            pg: svbool2_t,
+            base: *mut f64,
+            indices: svint64_t,
+        );
+    }
+    _svst1_scatter_s64index_f64(data, pg.sve_into(), base, indices)
+}
+#[doc = "Non-truncating store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1_scatter_[s64]index[_s64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1d))]
+pub unsafe fn svst1_scatter_s64index_s64(
+    pg: svbool_t,
+    base: *mut i64,
+    indices: svint64_t,
+    data: svint64_t,
+) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.st1.scatter.index.nxv2i64"
+        )]
+        fn _svst1_scatter_s64index_s64(
+            data: svint64_t,
+            pg: svbool2_t,
+            base: *mut i64,
+            indices: svint64_t,
+        );
+    }
+    _svst1_scatter_s64index_s64(data, pg.sve_into(), base, indices)
+}
+#[doc = "Non-truncating store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1_scatter_[s64]index[_u64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1d))]
+pub unsafe fn svst1_scatter_s64index_u64(
+    pg: svbool_t,
+    base: *mut u64,
+    indices: svint64_t,
+    data: svuint64_t,
+) {
+    svst1_scatter_s64index_s64(pg, base.as_signed(), indices, data.as_signed())
+}
+#[doc = "Non-truncating store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1_scatter_[u32]index[_f32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1w))]
+pub unsafe fn svst1_scatter_u32index_f32(
+    pg: svbool_t,
+    base: *mut f32,
+    indices: svuint32_t,
+    data: svfloat32_t,
+) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.st1.scatter.uxtw.index.nxv4f32"
+        )]
+        fn _svst1_scatter_u32index_f32(
+            data: svfloat32_t,
+            pg: svbool4_t,
+            base: *mut f32,
+            indices: svint32_t,
+        );
+    }
+    _svst1_scatter_u32index_f32(data, pg.sve_into(), base, indices.as_signed())
+}
+#[doc = "Non-truncating store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1_scatter_[u32]index[_s32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1w))]
+pub unsafe fn svst1_scatter_u32index_s32(
+    pg: svbool_t,
+    base: *mut i32,
+    indices: svuint32_t,
+    data: svint32_t,
+) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.st1.scatter.uxtw.index.nxv4i32"
+        )]
+        fn _svst1_scatter_u32index_s32(
+            data: svint32_t,
+            pg: svbool4_t,
+            base: *mut i32,
+            indices: svint32_t,
+        );
+    }
+    _svst1_scatter_u32index_s32(data, pg.sve_into(), base, indices.as_signed())
+}
+#[doc = "Non-truncating store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1_scatter_[u32]index[_u32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1w))]
+pub unsafe fn svst1_scatter_u32index_u32(
+    pg: svbool_t,
+    base: *mut u32,
+    indices: svuint32_t,
+    data: svuint32_t,
+) {
+    svst1_scatter_u32index_s32(pg, base.as_signed(), indices, data.as_signed())
+}
+#[doc = "Non-truncating store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1_scatter_[u64]index[_f64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1d))]
+pub unsafe fn svst1_scatter_u64index_f64(
+    pg: svbool_t,
+    base: *mut f64,
+    indices: svuint64_t,
+    data: svfloat64_t,
+) {
+    svst1_scatter_s64index_f64(pg, base, indices.as_signed(), data)
+}
+#[doc = "Non-truncating store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1_scatter_[u64]index[_s64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1d))]
+pub unsafe fn svst1_scatter_u64index_s64(
+    pg: svbool_t,
+    base: *mut i64,
+    indices: svuint64_t,
+    data: svint64_t,
+) {
+    svst1_scatter_s64index_s64(pg, base, indices.as_signed(), data)
+}
+#[doc = "Non-truncating store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1_scatter_[u64]index[_u64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1d))]
+pub unsafe fn svst1_scatter_u64index_u64(
+    pg: svbool_t,
+    base: *mut u64,
+    indices: svuint64_t,
+    data: svuint64_t,
+) {
+    svst1_scatter_s64index_s64(pg, base.as_signed(), indices.as_signed(), data.as_signed())
+}
+#[doc = "Non-truncating store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1_scatter_[s32]offset[_f32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1w))]
+pub unsafe fn svst1_scatter_s32offset_f32(
+    pg: svbool_t,
+    base: *mut f32,
+    offsets: svint32_t,
+    data: svfloat32_t,
+) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.st1.scatter.sxtw.nxv4f32"
+        )]
+        fn _svst1_scatter_s32offset_f32(
+            data: svfloat32_t,
+            pg: svbool4_t,
+            base: *mut f32,
+            offsets: svint32_t,
+        );
+    }
+    _svst1_scatter_s32offset_f32(data, pg.sve_into(), base, offsets)
+}
+#[doc = "Non-truncating store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1_scatter_[s32]offset[_s32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1w))]
+pub unsafe fn svst1_scatter_s32offset_s32(
+    pg: svbool_t,
+    base: *mut i32,
+    offsets: svint32_t,
+    data: svint32_t,
+) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.st1.scatter.sxtw.nxv4i32"
+        )]
+        fn _svst1_scatter_s32offset_s32(
+            data: svint32_t,
+            pg: svbool4_t,
+            base: *mut i32,
+            offsets: svint32_t,
+        );
+    }
+    _svst1_scatter_s32offset_s32(data, pg.sve_into(), base, offsets)
+}
+#[doc = "Non-truncating store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1_scatter_[s32]offset[_u32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1w))]
+pub unsafe fn svst1_scatter_s32offset_u32(
+    pg: svbool_t,
+    base: *mut u32,
+    offsets: svint32_t,
+    data: svuint32_t,
+) {
+    svst1_scatter_s32offset_s32(pg, base.as_signed(), offsets, data.as_signed())
+}
+#[doc = "Non-truncating store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1_scatter_[s64]offset[_f64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1d))]
+pub unsafe fn svst1_scatter_s64offset_f64(
+    pg: svbool_t,
+    base: *mut f64,
+    offsets: svint64_t,
+    data: svfloat64_t,
+) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.st1.scatter.nxv2f64"
+        )]
+        fn _svst1_scatter_s64offset_f64(
+            data: svfloat64_t,
+            pg: svbool2_t,
+            base: *mut f64,
+            offsets: svint64_t,
+        );
+    }
+    _svst1_scatter_s64offset_f64(data, pg.sve_into(), base, offsets)
+}
+#[doc = "Non-truncating store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1_scatter_[s64]offset[_s64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1d))]
+pub unsafe fn svst1_scatter_s64offset_s64(
+    pg: svbool_t,
+    base: *mut i64,
+    offsets: svint64_t,
+    data: svint64_t,
+) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.st1.scatter.nxv2i64"
+        )]
+        fn _svst1_scatter_s64offset_s64(
+            data: svint64_t,
+            pg: svbool2_t,
+            base: *mut i64,
+            offsets: svint64_t,
+        );
+    }
+    _svst1_scatter_s64offset_s64(data, pg.sve_into(), base, offsets)
+}
+#[doc = "Non-truncating store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1_scatter_[s64]offset[_u64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1d))]
+pub unsafe fn svst1_scatter_s64offset_u64(
+    pg: svbool_t,
+    base: *mut u64,
+    offsets: svint64_t,
+    data: svuint64_t,
+) {
+    svst1_scatter_s64offset_s64(pg, base.as_signed(), offsets, data.as_signed())
+}
+#[doc = "Non-truncating store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1_scatter_[u32]offset[_f32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1w))]
+pub unsafe fn svst1_scatter_u32offset_f32(
+    pg: svbool_t,
+    base: *mut f32,
+    offsets: svuint32_t,
+    data: svfloat32_t,
+) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.st1.scatter.uxtw.nxv4f32"
+        )]
+        fn _svst1_scatter_u32offset_f32(
+            data: svfloat32_t,
+            pg: svbool4_t,
+            base: *mut f32,
+            offsets: svint32_t,
+        );
+    }
+    _svst1_scatter_u32offset_f32(data, pg.sve_into(), base, offsets.as_signed())
+}
+#[doc = "Non-truncating store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1_scatter_[u32]offset[_s32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1w))]
+pub unsafe fn svst1_scatter_u32offset_s32(
+    pg: svbool_t,
+    base: *mut i32,
+    offsets: svuint32_t,
+    data: svint32_t,
+) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.st1.scatter.uxtw.nxv4i32"
+        )]
+        fn _svst1_scatter_u32offset_s32(
+            data: svint32_t,
+            pg: svbool4_t,
+            base: *mut i32,
+            offsets: svint32_t,
+        );
+    }
+    _svst1_scatter_u32offset_s32(data, pg.sve_into(), base, offsets.as_signed())
+}
+#[doc = "Non-truncating store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1_scatter_[u32]offset[_u32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1w))]
+pub unsafe fn svst1_scatter_u32offset_u32(
+    pg: svbool_t,
+    base: *mut u32,
+    offsets: svuint32_t,
+    data: svuint32_t,
+) {
+    svst1_scatter_u32offset_s32(pg, base.as_signed(), offsets, data.as_signed())
+}
+#[doc = "Non-truncating store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1_scatter_[u64]offset[_f64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1d))]
+pub unsafe fn svst1_scatter_u64offset_f64(
+    pg: svbool_t,
+    base: *mut f64,
+    offsets: svuint64_t,
+    data: svfloat64_t,
+) {
+    svst1_scatter_s64offset_f64(pg, base, offsets.as_signed(), data)
+}
+#[doc = "Non-truncating store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1_scatter_[u64]offset[_s64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1d))]
+pub unsafe fn svst1_scatter_u64offset_s64(
+    pg: svbool_t,
+    base: *mut i64,
+    offsets: svuint64_t,
+    data: svint64_t,
+) {
+    svst1_scatter_s64offset_s64(pg, base, offsets.as_signed(), data)
+}
+#[doc = "Non-truncating store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1_scatter_[u64]offset[_u64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1d))]
+pub unsafe fn svst1_scatter_u64offset_u64(
+    pg: svbool_t,
+    base: *mut u64,
+    offsets: svuint64_t,
+    data: svuint64_t,
+) {
+    svst1_scatter_s64offset_s64(pg, base.as_signed(), offsets.as_signed(), data.as_signed())
+}
+#[doc = "Non-truncating store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1_scatter[_u32base_f32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1w))]
+pub unsafe fn svst1_scatter_u32base_f32(pg: svbool_t, bases: svuint32_t, data: svfloat32_t) {
+    svst1_scatter_u32base_offset_f32(pg, bases, 0, data)
+}
+#[doc = "Non-truncating store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1_scatter[_u32base_s32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1w))]
+pub unsafe fn svst1_scatter_u32base_s32(pg: svbool_t, bases: svuint32_t, data: svint32_t) {
+    svst1_scatter_u32base_offset_s32(pg, bases, 0, data)
+}
+#[doc = "Non-truncating store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1_scatter[_u32base_u32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1w))]
+pub unsafe fn svst1_scatter_u32base_u32(pg: svbool_t, bases: svuint32_t, data: svuint32_t) {
+    svst1_scatter_u32base_offset_u32(pg, bases, 0, data)
+}
+#[doc = "Non-truncating store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1_scatter[_u64base_f64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1d))]
+pub unsafe fn svst1_scatter_u64base_f64(pg: svbool_t, bases: svuint64_t, data: svfloat64_t) {
+    svst1_scatter_u64base_offset_f64(pg, bases, 0, data)
+}
+#[doc = "Non-truncating store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1_scatter[_u64base_s64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1d))]
+pub unsafe fn svst1_scatter_u64base_s64(pg: svbool_t, bases: svuint64_t, data: svint64_t) {
+    svst1_scatter_u64base_offset_s64(pg, bases, 0, data)
+}
+#[doc = "Non-truncating store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1_scatter[_u64base_u64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1d))]
+pub unsafe fn svst1_scatter_u64base_u64(pg: svbool_t, bases: svuint64_t, data: svuint64_t) {
+    svst1_scatter_u64base_offset_u64(pg, bases, 0, data)
+}
+#[doc = "Non-truncating store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1_scatter[_u32base]_index[_f32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1w))]
+pub unsafe fn svst1_scatter_u32base_index_f32(
+    pg: svbool_t,
+    bases: svuint32_t,
+    index: i64,
+    data: svfloat32_t,
+) {
+    svst1_scatter_u32base_offset_f32(pg, bases, index.unchecked_shl(2), data)
+}
+#[doc = "Non-truncating store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1_scatter[_u32base]_index[_s32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1w))]
+pub unsafe fn svst1_scatter_u32base_index_s32(
+    pg: svbool_t,
+    bases: svuint32_t,
+    index: i64,
+    data: svint32_t,
+) {
+    svst1_scatter_u32base_offset_s32(pg, bases, index.unchecked_shl(2), data)
+}
+#[doc = "Non-truncating store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1_scatter[_u32base]_index[_u32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1w))]
+pub unsafe fn svst1_scatter_u32base_index_u32(
+    pg: svbool_t,
+    bases: svuint32_t,
+    index: i64,
+    data: svuint32_t,
+) {
+    svst1_scatter_u32base_offset_u32(pg, bases, index.unchecked_shl(2), data)
+}
+#[doc = "Non-truncating store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1_scatter[_u64base]_index[_f64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1d))]
+pub unsafe fn svst1_scatter_u64base_index_f64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    index: i64,
+    data: svfloat64_t,
+) {
+    svst1_scatter_u64base_offset_f64(pg, bases, index.unchecked_shl(3), data)
+}
+#[doc = "Non-truncating store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1_scatter[_u64base]_index[_s64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1d))]
+pub unsafe fn svst1_scatter_u64base_index_s64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    index: i64,
+    data: svint64_t,
+) {
+    svst1_scatter_u64base_offset_s64(pg, bases, index.unchecked_shl(3), data)
+}
+#[doc = "Non-truncating store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1_scatter[_u64base]_index[_u64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1d))]
+pub unsafe fn svst1_scatter_u64base_index_u64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    index: i64,
+    data: svuint64_t,
+) {
+    svst1_scatter_u64base_offset_u64(pg, bases, index.unchecked_shl(3), data)
+}
+#[doc = "Non-truncating store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1_scatter[_u32base]_offset[_f32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1w))]
+pub unsafe fn svst1_scatter_u32base_offset_f32(
+    pg: svbool_t,
+    bases: svuint32_t,
+    offset: i64,
+    data: svfloat32_t,
+) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.st1.scatter.scalar.offset.nxv4f32.nxv4i32"
+        )]
+        fn _svst1_scatter_u32base_offset_f32(
+            data: svfloat32_t,
+            pg: svbool4_t,
+            bases: svint32_t,
+            offset: i64,
+        );
+    }
+    _svst1_scatter_u32base_offset_f32(data, pg.sve_into(), bases.as_signed(), offset)
+}
+#[doc = "Non-truncating store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1_scatter[_u32base]_offset[_s32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1w))]
+pub unsafe fn svst1_scatter_u32base_offset_s32(
+    pg: svbool_t,
+    bases: svuint32_t,
+    offset: i64,
+    data: svint32_t,
+) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.st1.scatter.scalar.offset.nxv4i32.nxv4i32"
+        )]
+        fn _svst1_scatter_u32base_offset_s32(
+            data: svint32_t,
+            pg: svbool4_t,
+            bases: svint32_t,
+            offset: i64,
+        );
+    }
+    _svst1_scatter_u32base_offset_s32(data, pg.sve_into(), bases.as_signed(), offset)
+}
+#[doc = "Non-truncating store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1_scatter[_u32base]_offset[_u32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1w))]
+pub unsafe fn svst1_scatter_u32base_offset_u32(
+    pg: svbool_t,
+    bases: svuint32_t,
+    offset: i64,
+    data: svuint32_t,
+) {
+    svst1_scatter_u32base_offset_s32(pg, bases, offset, data.as_signed())
+}
+#[doc = "Non-truncating store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1_scatter[_u64base]_offset[_f64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1d))]
+pub unsafe fn svst1_scatter_u64base_offset_f64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    offset: i64,
+    data: svfloat64_t,
+) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2f64.nxv2i64"
+        )]
+        fn _svst1_scatter_u64base_offset_f64(
+            data: svfloat64_t,
+            pg: svbool2_t,
+            bases: svint64_t,
+            offset: i64,
+        );
+    }
+    _svst1_scatter_u64base_offset_f64(data, pg.sve_into(), bases.as_signed(), offset)
+}
+#[doc = "Non-truncating store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1_scatter[_u64base]_offset[_s64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1d))]
+pub unsafe fn svst1_scatter_u64base_offset_s64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    offset: i64,
+    data: svint64_t,
+) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2i64.nxv2i64"
+        )]
+        fn _svst1_scatter_u64base_offset_s64(
+            data: svint64_t,
+            pg: svbool2_t,
+            bases: svint64_t,
+            offset: i64,
+        );
+    }
+    _svst1_scatter_u64base_offset_s64(data, pg.sve_into(), bases.as_signed(), offset)
+}
+#[doc = "Non-truncating store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1_scatter[_u64base]_offset[_u64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1d))]
+pub unsafe fn svst1_scatter_u64base_offset_u64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    offset: i64,
+    data: svuint64_t,
+) {
+    svst1_scatter_u64base_offset_s64(pg, bases, offset, data.as_signed())
+}
+#[doc = "Non-truncating store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1_vnum[_f32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1w))]
+pub unsafe fn svst1_vnum_f32(pg: svbool_t, base: *mut f32, vnum: i64, data: svfloat32_t) {
+    svst1_f32(pg, base.offset(svcntw() as isize * vnum as isize), data)
+}
+#[doc = "Non-truncating store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1_vnum[_f64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1d))]
+pub unsafe fn svst1_vnum_f64(pg: svbool_t, base: *mut f64, vnum: i64, data: svfloat64_t) {
+    svst1_f64(pg, base.offset(svcntd() as isize * vnum as isize), data)
+}
+#[doc = "Non-truncating store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1_vnum[_s8])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1b))]
+pub unsafe fn svst1_vnum_s8(pg: svbool_t, base: *mut i8, vnum: i64, data: svint8_t) {
+    svst1_s8(pg, base.offset(svcntb() as isize * vnum as isize), data)
+}
+#[doc = "Non-truncating store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1_vnum[_s16])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1h))]
+pub unsafe fn svst1_vnum_s16(pg: svbool_t, base: *mut i16, vnum: i64, data: svint16_t) {
+    svst1_s16(pg, base.offset(svcnth() as isize * vnum as isize), data)
+}
+#[doc = "Non-truncating store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1_vnum[_s32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1w))]
+pub unsafe fn svst1_vnum_s32(pg: svbool_t, base: *mut i32, vnum: i64, data: svint32_t) {
+    svst1_s32(pg, base.offset(svcntw() as isize * vnum as isize), data)
+}
+#[doc = "Non-truncating store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1_vnum[_s64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1d))]
+pub unsafe fn svst1_vnum_s64(pg: svbool_t, base: *mut i64, vnum: i64, data: svint64_t) {
+    svst1_s64(pg, base.offset(svcntd() as isize * vnum as isize), data)
+}
+#[doc = "Non-truncating store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1_vnum[_u8])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1b))]
+pub unsafe fn svst1_vnum_u8(pg: svbool_t, base: *mut u8, vnum: i64, data: svuint8_t) {
+    svst1_u8(pg, base.offset(svcntb() as isize * vnum as isize), data)
+}
+#[doc = "Non-truncating store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1_vnum[_u16])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1h))]
+pub unsafe fn svst1_vnum_u16(pg: svbool_t, base: *mut u16, vnum: i64, data: svuint16_t) {
+    svst1_u16(pg, base.offset(svcnth() as isize * vnum as isize), data)
+}
+#[doc = "Non-truncating store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1_vnum[_u32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1w))]
+pub unsafe fn svst1_vnum_u32(pg: svbool_t, base: *mut u32, vnum: i64, data: svuint32_t) {
+    svst1_u32(pg, base.offset(svcntw() as isize * vnum as isize), data)
+}
+#[doc = "Non-truncating store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1_vnum[_u64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1d))]
+pub unsafe fn svst1_vnum_u64(pg: svbool_t, base: *mut u64, vnum: i64, data: svuint64_t) {
+    svst1_u64(pg, base.offset(svcntd() as isize * vnum as isize), data)
+}
+#[doc = "Truncate to 8 bits and store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1b[_s16])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1b))]
+pub unsafe fn svst1b_s16(pg: svbool_t, base: *mut i8, data: svint16_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.st1.nxv8i8")]
+        fn _svst1b_s16(data: nxv8i8, pg: svbool8_t, ptr: *mut i8);
+    }
+    _svst1b_s16(
+        crate::intrinsics::simd::simd_cast(data),
+        pg.sve_into(),
+        base,
+    )
+}
+#[doc = "Truncate to 8 bits and store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1b[_s32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1b))]
+pub unsafe fn svst1b_s32(pg: svbool_t, base: *mut i8, data: svint32_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.st1.nxv4i8")]
+        fn _svst1b_s32(data: nxv4i8, pg: svbool4_t, ptr: *mut i8);
+    }
+    _svst1b_s32(
+        crate::intrinsics::simd::simd_cast(data),
+        pg.sve_into(),
+        base,
+    )
+}
+#[doc = "Truncate to 16 bits and store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1h[_s32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1h))]
+pub unsafe fn svst1h_s32(pg: svbool_t, base: *mut i16, data: svint32_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.st1.nxv4i16")]
+        fn _svst1h_s32(data: nxv4i16, pg: svbool4_t, ptr: *mut i16);
+    }
+    _svst1h_s32(
+        crate::intrinsics::simd::simd_cast(data),
+        pg.sve_into(),
+        base,
+    )
+}
+#[doc = "Truncate to 8 bits and store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1b[_s64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1b))]
+pub unsafe fn svst1b_s64(pg: svbool_t, base: *mut i8, data: svint64_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.st1.nxv2i8")]
+        fn _svst1b_s64(data: nxv2i8, pg: svbool2_t, ptr: *mut i8);
+    }
+    _svst1b_s64(
+        crate::intrinsics::simd::simd_cast(data),
+        pg.sve_into(),
+        base,
+    )
+}
+#[doc = "Truncate to 16 bits and store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1h[_s64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1h))]
+pub unsafe fn svst1h_s64(pg: svbool_t, base: *mut i16, data: svint64_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.st1.nxv2i16")]
+        fn _svst1h_s64(data: nxv2i16, pg: svbool2_t, ptr: *mut i16);
+    }
+    _svst1h_s64(
+        crate::intrinsics::simd::simd_cast(data),
+        pg.sve_into(),
+        base,
+    )
+}
+#[doc = "Truncate to 32 bits and store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1w[_s64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1w))]
+pub unsafe fn svst1w_s64(pg: svbool_t, base: *mut i32, data: svint64_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.st1.nxv2i32")]
+        fn _svst1w_s64(data: nxv2i32, pg: svbool2_t, ptr: *mut i32);
+    }
+    _svst1w_s64(
+        crate::intrinsics::simd::simd_cast(data),
+        pg.sve_into(),
+        base,
+    )
+}
+#[doc = "Truncate to 8 bits and store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1b[_u16])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1b))]
+pub unsafe fn svst1b_u16(pg: svbool_t, base: *mut u8, data: svuint16_t) {
+    svst1b_s16(pg, base.as_signed(), data.as_signed())
+}
+#[doc = "Truncate to 8 bits and store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1b[_u32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1b))]
+pub unsafe fn svst1b_u32(pg: svbool_t, base: *mut u8, data: svuint32_t) {
+    svst1b_s32(pg, base.as_signed(), data.as_signed())
+}
+#[doc = "Truncate to 16 bits and store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1h[_u32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1h))]
+pub unsafe fn svst1h_u32(pg: svbool_t, base: *mut u16, data: svuint32_t) {
+    svst1h_s32(pg, base.as_signed(), data.as_signed())
+}
+#[doc = "Truncate to 8 bits and store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1b[_u64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1b))]
+pub unsafe fn svst1b_u64(pg: svbool_t, base: *mut u8, data: svuint64_t) {
+    svst1b_s64(pg, base.as_signed(), data.as_signed())
+}
+#[doc = "Truncate to 16 bits and store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1h[_u64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1h))]
+pub unsafe fn svst1h_u64(pg: svbool_t, base: *mut u16, data: svuint64_t) {
+    svst1h_s64(pg, base.as_signed(), data.as_signed())
+}
+#[doc = "Truncate to 32 bits and store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1w[_u64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1w))]
+pub unsafe fn svst1w_u64(pg: svbool_t, base: *mut u32, data: svuint64_t) {
+    svst1w_s64(pg, base.as_signed(), data.as_signed())
+}
+#[doc = "Truncate to 8 bits and store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1b_scatter_[s32]offset[_s32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1b))]
+pub unsafe fn svst1b_scatter_s32offset_s32(
+    pg: svbool_t,
+    base: *mut i8,
+    offsets: svint32_t,
+    data: svint32_t,
+) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.st1.scatter.sxtw.nxv4i8"
+        )]
+        fn _svst1b_scatter_s32offset_s32(
+            data: nxv4i8,
+            pg: svbool4_t,
+            base: *mut i8,
+            offsets: svint32_t,
+        );
+    }
+    _svst1b_scatter_s32offset_s32(
+        crate::intrinsics::simd::simd_cast(data),
+        pg.sve_into(),
+        base,
+        offsets,
+    )
+}
+#[doc = "Truncate to 16 bits and store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1h_scatter_[s32]offset[_s32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1h))]
+pub unsafe fn svst1h_scatter_s32offset_s32(
+    pg: svbool_t,
+    base: *mut i16,
+    offsets: svint32_t,
+    data: svint32_t,
+) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.st1.scatter.sxtw.nxv4i16"
+        )]
+        fn _svst1h_scatter_s32offset_s32(
+            data: nxv4i16,
+            pg: svbool4_t,
+            base: *mut i16,
+            offsets: svint32_t,
+        );
+    }
+    _svst1h_scatter_s32offset_s32(
+        crate::intrinsics::simd::simd_cast(data),
+        pg.sve_into(),
+        base,
+        offsets,
+    )
+}
+#[doc = "Truncate to 8 bits and store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1b_scatter_[s32]offset[_u32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1b))]
+pub unsafe fn svst1b_scatter_s32offset_u32(
+    pg: svbool_t,
+    base: *mut u8,
+    offsets: svint32_t,
+    data: svuint32_t,
+) {
+    svst1b_scatter_s32offset_s32(pg, base.as_signed(), offsets, data.as_signed())
+}
+#[doc = "Truncate to 16 bits and store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1h_scatter_[s32]offset[_u32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1h))]
+pub unsafe fn svst1h_scatter_s32offset_u32(
+    pg: svbool_t,
+    base: *mut u16,
+    offsets: svint32_t,
+    data: svuint32_t,
+) {
+    svst1h_scatter_s32offset_s32(pg, base.as_signed(), offsets, data.as_signed())
+}
+#[doc = "Truncate to 8 bits and store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1b_scatter_[s64]offset[_s64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1b))]
+pub unsafe fn svst1b_scatter_s64offset_s64(
+    pg: svbool_t,
+    base: *mut i8,
+    offsets: svint64_t,
+    data: svint64_t,
+) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.st1.scatter.nxv2i8"
+        )]
+        fn _svst1b_scatter_s64offset_s64(
+            data: nxv2i8,
+            pg: svbool2_t,
+            base: *mut i8,
+            offsets: svint64_t,
+        );
+    }
+    _svst1b_scatter_s64offset_s64(
+        crate::intrinsics::simd::simd_cast(data),
+        pg.sve_into(),
+        base,
+        offsets,
+    )
+}
+#[doc = "Truncate to 16 bits and store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1h_scatter_[s64]offset[_s64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1h))]
+pub unsafe fn svst1h_scatter_s64offset_s64(
+    pg: svbool_t,
+    base: *mut i16,
+    offsets: svint64_t,
+    data: svint64_t,
+) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.st1.scatter.nxv2i16"
+        )]
+        fn _svst1h_scatter_s64offset_s64(
+            data: nxv2i16,
+            pg: svbool2_t,
+            base: *mut i16,
+            offsets: svint64_t,
+        );
+    }
+    _svst1h_scatter_s64offset_s64(
+        crate::intrinsics::simd::simd_cast(data),
+        pg.sve_into(),
+        base,
+        offsets,
+    )
+}
+#[doc = "Truncate to 32 bits and store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1w_scatter_[s64]offset[_s64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1w))]
+pub unsafe fn svst1w_scatter_s64offset_s64(
+    pg: svbool_t,
+    base: *mut i32,
+    offsets: svint64_t,
+    data: svint64_t,
+) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.st1.scatter.nxv2i32"
+        )]
+        fn _svst1w_scatter_s64offset_s64(
+            data: nxv2i32,
+            pg: svbool2_t,
+            base: *mut i32,
+            offsets: svint64_t,
+        );
+    }
+    _svst1w_scatter_s64offset_s64(
+        crate::intrinsics::simd::simd_cast(data),
+        pg.sve_into(),
+        base,
+        offsets,
+    )
+}
+#[doc = "Truncate to 8 bits and store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1b_scatter_[s64]offset[_u64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1b))]
+pub unsafe fn svst1b_scatter_s64offset_u64(
+    pg: svbool_t,
+    base: *mut u8,
+    offsets: svint64_t,
+    data: svuint64_t,
+) {
+    svst1b_scatter_s64offset_s64(pg, base.as_signed(), offsets, data.as_signed())
+}
+#[doc = "Truncate to 16 bits and store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1h_scatter_[s64]offset[_u64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1h))]
+pub unsafe fn svst1h_scatter_s64offset_u64(
+    pg: svbool_t,
+    base: *mut u16,
+    offsets: svint64_t,
+    data: svuint64_t,
+) {
+    svst1h_scatter_s64offset_s64(pg, base.as_signed(), offsets, data.as_signed())
+}
+#[doc = "Truncate to 32 bits and store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1w_scatter_[s64]offset[_u64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1w))]
+pub unsafe fn svst1w_scatter_s64offset_u64(
+    pg: svbool_t,
+    base: *mut u32,
+    offsets: svint64_t,
+    data: svuint64_t,
+) {
+    svst1w_scatter_s64offset_s64(pg, base.as_signed(), offsets, data.as_signed())
+}
+#[doc = "Truncate to 8 bits and store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1b_scatter_[u32]offset[_s32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1b))]
+pub unsafe fn svst1b_scatter_u32offset_s32(
+    pg: svbool_t,
+    base: *mut i8,
+    offsets: svuint32_t,
+    data: svint32_t,
+) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.st1.scatter.uxtw.nxv4i8"
+        )]
+        fn _svst1b_scatter_u32offset_s32(
+            data: nxv4i8,
+            pg: svbool4_t,
+            base: *mut i8,
+            offsets: svint32_t,
+        );
+    }
+    _svst1b_scatter_u32offset_s32(
+        crate::intrinsics::simd::simd_cast(data),
+        pg.sve_into(),
+        base,
+        offsets.as_signed(),
+    )
+}
+#[doc = "Truncate to 16 bits and store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1h_scatter_[u32]offset[_s32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1h))]
+pub unsafe fn svst1h_scatter_u32offset_s32(
+    pg: svbool_t,
+    base: *mut i16,
+    offsets: svuint32_t,
+    data: svint32_t,
+) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.st1.scatter.uxtw.nxv4i16"
+        )]
+        fn _svst1h_scatter_u32offset_s32(
+            data: nxv4i16,
+            pg: svbool4_t,
+            base: *mut i16,
+            offsets: svint32_t,
+        );
+    }
+    _svst1h_scatter_u32offset_s32(
+        crate::intrinsics::simd::simd_cast(data),
+        pg.sve_into(),
+        base,
+        offsets.as_signed(),
+    )
+}
+#[doc = "Truncate to 8 bits and store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1b_scatter_[u32]offset[_u32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1b))]
+pub unsafe fn svst1b_scatter_u32offset_u32(
+    pg: svbool_t,
+    base: *mut u8,
+    offsets: svuint32_t,
+    data: svuint32_t,
+) {
+    svst1b_scatter_u32offset_s32(pg, base.as_signed(), offsets, data.as_signed())
+}
+#[doc = "Truncate to 16 bits and store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1h_scatter_[u32]offset[_u32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1h))]
+pub unsafe fn svst1h_scatter_u32offset_u32(
+    pg: svbool_t,
+    base: *mut u16,
+    offsets: svuint32_t,
+    data: svuint32_t,
+) {
+    svst1h_scatter_u32offset_s32(pg, base.as_signed(), offsets, data.as_signed())
+}
+#[doc = "Truncate to 8 bits and store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1b_scatter_[u64]offset[_s64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1b))]
+pub unsafe fn svst1b_scatter_u64offset_s64(
+    pg: svbool_t,
+    base: *mut i8,
+    offsets: svuint64_t,
+    data: svint64_t,
+) {
+    svst1b_scatter_s64offset_s64(pg, base, offsets.as_signed(), data)
+}
+#[doc = "Truncate to 16 bits and store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1h_scatter_[u64]offset[_s64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1h))]
+pub unsafe fn svst1h_scatter_u64offset_s64(
+    pg: svbool_t,
+    base: *mut i16,
+    offsets: svuint64_t,
+    data: svint64_t,
+) {
+    svst1h_scatter_s64offset_s64(pg, base, offsets.as_signed(), data)
+}
+#[doc = "Truncate to 32 bits and store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1w_scatter_[u64]offset[_s64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1w))]
+pub unsafe fn svst1w_scatter_u64offset_s64(
+    pg: svbool_t,
+    base: *mut i32,
+    offsets: svuint64_t,
+    data: svint64_t,
+) {
+    svst1w_scatter_s64offset_s64(pg, base, offsets.as_signed(), data)
+}
+#[doc = "Truncate to 8 bits and store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1b_scatter_[u64]offset[_u64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1b))]
+pub unsafe fn svst1b_scatter_u64offset_u64(
+    pg: svbool_t,
+    base: *mut u8,
+    offsets: svuint64_t,
+    data: svuint64_t,
+) {
+    svst1b_scatter_s64offset_s64(pg, base.as_signed(), offsets.as_signed(), data.as_signed())
+}
+#[doc = "Truncate to 16 bits and store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1h_scatter_[u64]offset[_u64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1h))]
+pub unsafe fn svst1h_scatter_u64offset_u64(
+    pg: svbool_t,
+    base: *mut u16,
+    offsets: svuint64_t,
+    data: svuint64_t,
+) {
+    svst1h_scatter_s64offset_s64(pg, base.as_signed(), offsets.as_signed(), data.as_signed())
+}
+#[doc = "Truncate to 32 bits and store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1w_scatter_[u64]offset[_u64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1w))]
+pub unsafe fn svst1w_scatter_u64offset_u64(
+    pg: svbool_t,
+    base: *mut u32,
+    offsets: svuint64_t,
+    data: svuint64_t,
+) {
+    svst1w_scatter_s64offset_s64(pg, base.as_signed(), offsets.as_signed(), data.as_signed())
+}
+#[doc = "Truncate to 8 bits and store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1b_scatter[_u32base]_offset[_s32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1b))]
+pub unsafe fn svst1b_scatter_u32base_offset_s32(
+    pg: svbool_t,
+    bases: svuint32_t,
+    offset: i64,
+    data: svint32_t,
+) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.st1.scatter.scalar.offset.nxv4i8.nxv4i32"
+        )]
+        fn _svst1b_scatter_u32base_offset_s32(
+            data: nxv4i8,
+            pg: svbool4_t,
+            bases: svint32_t,
+            offset: i64,
+        );
+    }
+    _svst1b_scatter_u32base_offset_s32(
+        crate::intrinsics::simd::simd_cast(data),
+        pg.sve_into(),
+        bases.as_signed(),
+        offset,
+    )
+}
+#[doc = "Truncate to 16 bits and store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1h_scatter[_u32base]_offset[_s32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1h))]
+pub unsafe fn svst1h_scatter_u32base_offset_s32(
+    pg: svbool_t,
+    bases: svuint32_t,
+    offset: i64,
+    data: svint32_t,
+) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.st1.scatter.scalar.offset.nxv4i16.nxv4i32"
+        )]
+        fn _svst1h_scatter_u32base_offset_s32(
+            data: nxv4i16,
+            pg: svbool4_t,
+            bases: svint32_t,
+            offset: i64,
+        );
+    }
+    _svst1h_scatter_u32base_offset_s32(
+        crate::intrinsics::simd::simd_cast(data),
+        pg.sve_into(),
+        bases.as_signed(),
+        offset,
+    )
+}
+#[doc = "Truncate to 8 bits and store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1b_scatter[_u32base]_offset[_u32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1b))]
+pub unsafe fn svst1b_scatter_u32base_offset_u32(
+    pg: svbool_t,
+    bases: svuint32_t,
+    offset: i64,
+    data: svuint32_t,
+) {
+    svst1b_scatter_u32base_offset_s32(pg, bases, offset, data.as_signed())
+}
+#[doc = "Truncate to 16 bits and store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1h_scatter[_u32base]_offset[_u32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1h))]
+pub unsafe fn svst1h_scatter_u32base_offset_u32(
+    pg: svbool_t,
+    bases: svuint32_t,
+    offset: i64,
+    data: svuint32_t,
+) {
+    svst1h_scatter_u32base_offset_s32(pg, bases, offset, data.as_signed())
+}
+#[doc = "Truncate to 8 bits and store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1b_scatter[_u64base]_offset[_s64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1b))]
+pub unsafe fn svst1b_scatter_u64base_offset_s64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    offset: i64,
+    data: svint64_t,
+) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2i8.nxv2i64"
+        )]
+        fn _svst1b_scatter_u64base_offset_s64(
+            data: nxv2i8,
+            pg: svbool2_t,
+            bases: svint64_t,
+            offset: i64,
+        );
+    }
+    _svst1b_scatter_u64base_offset_s64(
+        crate::intrinsics::simd::simd_cast(data),
+        pg.sve_into(),
+        bases.as_signed(),
+        offset,
+    )
+}
+#[doc = "Truncate to 16 bits and store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1h_scatter[_u64base]_offset[_s64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1h))]
+pub unsafe fn svst1h_scatter_u64base_offset_s64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    offset: i64,
+    data: svint64_t,
+) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2i16.nxv2i64"
+        )]
+        fn _svst1h_scatter_u64base_offset_s64(
+            data: nxv2i16,
+            pg: svbool2_t,
+            bases: svint64_t,
+            offset: i64,
+        );
+    }
+    _svst1h_scatter_u64base_offset_s64(
+        crate::intrinsics::simd::simd_cast(data),
+        pg.sve_into(),
+        bases.as_signed(),
+        offset,
+    )
+}
+#[doc = "Truncate to 32 bits and store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1w_scatter[_u64base]_offset[_s64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1w))]
+pub unsafe fn svst1w_scatter_u64base_offset_s64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    offset: i64,
+    data: svint64_t,
+) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.st1.scatter.scalar.offset.nxv2i32.nxv2i64"
+        )]
+        fn _svst1w_scatter_u64base_offset_s64(
+            data: nxv2i32,
+            pg: svbool2_t,
+            bases: svint64_t,
+            offset: i64,
+        );
+    }
+    _svst1w_scatter_u64base_offset_s64(
+        crate::intrinsics::simd::simd_cast(data),
+        pg.sve_into(),
+        bases.as_signed(),
+        offset,
+    )
+}
+#[doc = "Truncate to 8 bits and store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1b_scatter[_u64base]_offset[_u64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1b))]
+pub unsafe fn svst1b_scatter_u64base_offset_u64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    offset: i64,
+    data: svuint64_t,
+) {
+    svst1b_scatter_u64base_offset_s64(pg, bases, offset, data.as_signed())
+}
+#[doc = "Truncate to 16 bits and store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1h_scatter[_u64base]_offset[_u64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1h))]
+pub unsafe fn svst1h_scatter_u64base_offset_u64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    offset: i64,
+    data: svuint64_t,
+) {
+    svst1h_scatter_u64base_offset_s64(pg, bases, offset, data.as_signed())
+}
+#[doc = "Truncate to 32 bits and store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1w_scatter[_u64base]_offset[_u64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1w))]
+pub unsafe fn svst1w_scatter_u64base_offset_u64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    offset: i64,
+    data: svuint64_t,
+) {
+    svst1w_scatter_u64base_offset_s64(pg, bases, offset, data.as_signed())
+}
+#[doc = "Truncate to 8 bits and store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1b_scatter[_u32base_s32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1b))]
+pub unsafe fn svst1b_scatter_u32base_s32(pg: svbool_t, bases: svuint32_t, data: svint32_t) {
+    svst1b_scatter_u32base_offset_s32(pg, bases, 0, data)
+}
+#[doc = "Truncate to 16 bits and store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1h_scatter[_u32base_s32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1h))]
+pub unsafe fn svst1h_scatter_u32base_s32(pg: svbool_t, bases: svuint32_t, data: svint32_t) {
+    svst1h_scatter_u32base_offset_s32(pg, bases, 0, data)
+}
+#[doc = "Truncate to 8 bits and store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1b_scatter[_u32base_u32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1b))]
+pub unsafe fn svst1b_scatter_u32base_u32(pg: svbool_t, bases: svuint32_t, data: svuint32_t) {
+    svst1b_scatter_u32base_offset_u32(pg, bases, 0, data)
+}
+#[doc = "Truncate to 16 bits and store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1h_scatter[_u32base_u32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1h))]
+pub unsafe fn svst1h_scatter_u32base_u32(pg: svbool_t, bases: svuint32_t, data: svuint32_t) {
+    svst1h_scatter_u32base_offset_u32(pg, bases, 0, data)
+}
+#[doc = "Truncate to 8 bits and store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1b_scatter[_u64base_s64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1b))]
+pub unsafe fn svst1b_scatter_u64base_s64(pg: svbool_t, bases: svuint64_t, data: svint64_t) {
+    svst1b_scatter_u64base_offset_s64(pg, bases, 0, data)
+}
+#[doc = "Truncate to 16 bits and store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1h_scatter[_u64base_s64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1h))]
+pub unsafe fn svst1h_scatter_u64base_s64(pg: svbool_t, bases: svuint64_t, data: svint64_t) {
+    svst1h_scatter_u64base_offset_s64(pg, bases, 0, data)
+}
+#[doc = "Truncate to 32 bits and store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1w_scatter[_u64base_s64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1w))]
+pub unsafe fn svst1w_scatter_u64base_s64(pg: svbool_t, bases: svuint64_t, data: svint64_t) {
+    svst1w_scatter_u64base_offset_s64(pg, bases, 0, data)
+}
+#[doc = "Truncate to 8 bits and store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1b_scatter[_u64base_u64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1b))]
+pub unsafe fn svst1b_scatter_u64base_u64(pg: svbool_t, bases: svuint64_t, data: svuint64_t) {
+    svst1b_scatter_u64base_offset_u64(pg, bases, 0, data)
+}
+#[doc = "Truncate to 16 bits and store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1h_scatter[_u64base_u64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1h))]
+pub unsafe fn svst1h_scatter_u64base_u64(pg: svbool_t, bases: svuint64_t, data: svuint64_t) {
+    svst1h_scatter_u64base_offset_u64(pg, bases, 0, data)
+}
+#[doc = "Truncate to 32 bits and store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1w_scatter[_u64base_u64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1w))]
+pub unsafe fn svst1w_scatter_u64base_u64(pg: svbool_t, bases: svuint64_t, data: svuint64_t) {
+    svst1w_scatter_u64base_offset_u64(pg, bases, 0, data)
+}
+#[doc = "Truncate to 8 bits and store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1b_vnum[_s16])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1b))]
+pub unsafe fn svst1b_vnum_s16(pg: svbool_t, base: *mut i8, vnum: i64, data: svint16_t) {
+    svst1b_s16(pg, base.offset(svcnth() as isize * vnum as isize), data)
+}
+#[doc = "Truncate to 8 bits and store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1b_vnum[_s32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1b))]
+pub unsafe fn svst1b_vnum_s32(pg: svbool_t, base: *mut i8, vnum: i64, data: svint32_t) {
+    svst1b_s32(pg, base.offset(svcntw() as isize * vnum as isize), data)
+}
+#[doc = "Truncate to 16 bits and store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1h_vnum[_s32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1h))]
+pub unsafe fn svst1h_vnum_s32(pg: svbool_t, base: *mut i16, vnum: i64, data: svint32_t) {
+    svst1h_s32(pg, base.offset(svcntw() as isize * vnum as isize), data)
+}
+#[doc = "Truncate to 8 bits and store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1b_vnum[_s64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1b))]
+pub unsafe fn svst1b_vnum_s64(pg: svbool_t, base: *mut i8, vnum: i64, data: svint64_t) {
+    svst1b_s64(pg, base.offset(svcntd() as isize * vnum as isize), data)
+}
+#[doc = "Truncate to 16 bits and store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1h_vnum[_s64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1h))]
+pub unsafe fn svst1h_vnum_s64(pg: svbool_t, base: *mut i16, vnum: i64, data: svint64_t) {
+    svst1h_s64(pg, base.offset(svcntd() as isize * vnum as isize), data)
+}
+#[doc = "Truncate to 32 bits and store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1w_vnum[_s64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1w))]
+pub unsafe fn svst1w_vnum_s64(pg: svbool_t, base: *mut i32, vnum: i64, data: svint64_t) {
+    svst1w_s64(pg, base.offset(svcntd() as isize * vnum as isize), data)
+}
+#[doc = "Truncate to 8 bits and store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1b_vnum[_u16])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1b))]
+pub unsafe fn svst1b_vnum_u16(pg: svbool_t, base: *mut u8, vnum: i64, data: svuint16_t) {
+    svst1b_u16(pg, base.offset(svcnth() as isize * vnum as isize), data)
+}
+#[doc = "Truncate to 8 bits and store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1b_vnum[_u32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1b))]
+pub unsafe fn svst1b_vnum_u32(pg: svbool_t, base: *mut u8, vnum: i64, data: svuint32_t) {
+    svst1b_u32(pg, base.offset(svcntw() as isize * vnum as isize), data)
+}
+#[doc = "Truncate to 16 bits and store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1h_vnum[_u32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1h))]
+pub unsafe fn svst1h_vnum_u32(pg: svbool_t, base: *mut u16, vnum: i64, data: svuint32_t) {
+    svst1h_u32(pg, base.offset(svcntw() as isize * vnum as isize), data)
+}
+#[doc = "Truncate to 8 bits and store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1b_vnum[_u64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1b))]
+pub unsafe fn svst1b_vnum_u64(pg: svbool_t, base: *mut u8, vnum: i64, data: svuint64_t) {
+    svst1b_u64(pg, base.offset(svcntd() as isize * vnum as isize), data)
+}
+#[doc = "Truncate to 16 bits and store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1h_vnum[_u64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1h))]
+pub unsafe fn svst1h_vnum_u64(pg: svbool_t, base: *mut u16, vnum: i64, data: svuint64_t) {
+    svst1h_u64(pg, base.offset(svcntd() as isize * vnum as isize), data)
+}
+#[doc = "Truncate to 32 bits and store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1w_vnum[_u64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1w))]
+pub unsafe fn svst1w_vnum_u64(pg: svbool_t, base: *mut u32, vnum: i64, data: svuint64_t) {
+    svst1w_u64(pg, base.offset(svcntd() as isize * vnum as isize), data)
+}
+#[doc = "Truncate to 16 bits and store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1h_scatter_[s32]index[_s32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1h))]
+pub unsafe fn svst1h_scatter_s32index_s32(
+    pg: svbool_t,
+    base: *mut i16,
+    indices: svint32_t,
+    data: svint32_t,
+) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.st1.scatter.sxtw.index.nxv4i16"
+        )]
+        fn _svst1h_scatter_s32index_s32(
+            data: nxv4i16,
+            pg: svbool4_t,
+            base: *mut i16,
+            indices: svint32_t,
+        );
+    }
+    _svst1h_scatter_s32index_s32(
+        crate::intrinsics::simd::simd_cast(data),
+        pg.sve_into(),
+        base,
+        indices,
+    )
+}
+#[doc = "Truncate to 16 bits and store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1h_scatter_[s32]index[_u32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1h))]
+pub unsafe fn svst1h_scatter_s32index_u32(
+    pg: svbool_t,
+    base: *mut u16,
+    indices: svint32_t,
+    data: svuint32_t,
+) {
+    svst1h_scatter_s32index_s32(pg, base.as_signed(), indices, data.as_signed())
+}
+#[doc = "Truncate to 16 bits and store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1h_scatter_[s64]index[_s64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1h))]
+pub unsafe fn svst1h_scatter_s64index_s64(
+    pg: svbool_t,
+    base: *mut i16,
+    indices: svint64_t,
+    data: svint64_t,
+) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.st1.scatter.index.nxv2i16"
+        )]
+        fn _svst1h_scatter_s64index_s64(
+            data: nxv2i16,
+            pg: svbool2_t,
+            base: *mut i16,
+            indices: svint64_t,
+        );
+    }
+    _svst1h_scatter_s64index_s64(
+        crate::intrinsics::simd::simd_cast(data),
+        pg.sve_into(),
+        base,
+        indices,
+    )
+}
+#[doc = "Truncate to 32 bits and store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1w_scatter_[s64]index[_s64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1w))]
+pub unsafe fn svst1w_scatter_s64index_s64(
+    pg: svbool_t,
+    base: *mut i32,
+    indices: svint64_t,
+    data: svint64_t,
+) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.st1.scatter.index.nxv2i32"
+        )]
+        fn _svst1w_scatter_s64index_s64(
+            data: nxv2i32,
+            pg: svbool2_t,
+            base: *mut i32,
+            indices: svint64_t,
+        );
+    }
+    _svst1w_scatter_s64index_s64(
+        crate::intrinsics::simd::simd_cast(data),
+        pg.sve_into(),
+        base,
+        indices,
+    )
+}
+#[doc = "Truncate to 16 bits and store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1h_scatter_[s64]index[_u64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1h))]
+pub unsafe fn svst1h_scatter_s64index_u64(
+    pg: svbool_t,
+    base: *mut u16,
+    indices: svint64_t,
+    data: svuint64_t,
+) {
+    svst1h_scatter_s64index_s64(pg, base.as_signed(), indices, data.as_signed())
+}
+#[doc = "Truncate to 32 bits and store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1w_scatter_[s64]index[_u64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1w))]
+pub unsafe fn svst1w_scatter_s64index_u64(
+    pg: svbool_t,
+    base: *mut u32,
+    indices: svint64_t,
+    data: svuint64_t,
+) {
+    svst1w_scatter_s64index_s64(pg, base.as_signed(), indices, data.as_signed())
+}
+#[doc = "Truncate to 16 bits and store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1h_scatter_[u32]index[_s32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1h))]
+pub unsafe fn svst1h_scatter_u32index_s32(
+    pg: svbool_t,
+    base: *mut i16,
+    indices: svuint32_t,
+    data: svint32_t,
+) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.st1.scatter.uxtw.index.nxv4i16"
+        )]
+        fn _svst1h_scatter_u32index_s32(
+            data: nxv4i16,
+            pg: svbool4_t,
+            base: *mut i16,
+            indices: svint32_t,
+        );
+    }
+    _svst1h_scatter_u32index_s32(
+        crate::intrinsics::simd::simd_cast(data),
+        pg.sve_into(),
+        base,
+        indices.as_signed(),
+    )
+}
+#[doc = "Truncate to 16 bits and store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1h_scatter_[u32]index[_u32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1h))]
+pub unsafe fn svst1h_scatter_u32index_u32(
+    pg: svbool_t,
+    base: *mut u16,
+    indices: svuint32_t,
+    data: svuint32_t,
+) {
+    svst1h_scatter_u32index_s32(pg, base.as_signed(), indices, data.as_signed())
+}
+#[doc = "Truncate to 16 bits and store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1h_scatter_[u64]index[_s64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1h))]
+pub unsafe fn svst1h_scatter_u64index_s64(
+    pg: svbool_t,
+    base: *mut i16,
+    indices: svuint64_t,
+    data: svint64_t,
+) {
+    svst1h_scatter_s64index_s64(pg, base, indices.as_signed(), data)
+}
+#[doc = "Truncate to 32 bits and store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1w_scatter_[u64]index[_s64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1w))]
+pub unsafe fn svst1w_scatter_u64index_s64(
+    pg: svbool_t,
+    base: *mut i32,
+    indices: svuint64_t,
+    data: svint64_t,
+) {
+    svst1w_scatter_s64index_s64(pg, base, indices.as_signed(), data)
+}
+#[doc = "Truncate to 16 bits and store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1h_scatter_[u64]index[_u64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1h))]
+pub unsafe fn svst1h_scatter_u64index_u64(
+    pg: svbool_t,
+    base: *mut u16,
+    indices: svuint64_t,
+    data: svuint64_t,
+) {
+    svst1h_scatter_s64index_s64(pg, base.as_signed(), indices.as_signed(), data.as_signed())
+}
+#[doc = "Truncate to 32 bits and store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1w_scatter_[u64]index[_u64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1w))]
+pub unsafe fn svst1w_scatter_u64index_u64(
+    pg: svbool_t,
+    base: *mut u32,
+    indices: svuint64_t,
+    data: svuint64_t,
+) {
+    svst1w_scatter_s64index_s64(pg, base.as_signed(), indices.as_signed(), data.as_signed())
+}
+#[doc = "Truncate to 16 bits and store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1h_scatter[_u32base]_index[_s32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1h))]
+pub unsafe fn svst1h_scatter_u32base_index_s32(
+    pg: svbool_t,
+    bases: svuint32_t,
+    index: i64,
+    data: svint32_t,
+) {
+    svst1h_scatter_u32base_offset_s32(pg, bases, index.unchecked_shl(1), data)
+}
+#[doc = "Truncate to 16 bits and store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1h_scatter[_u32base]_index[_u32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1h))]
+pub unsafe fn svst1h_scatter_u32base_index_u32(
+    pg: svbool_t,
+    bases: svuint32_t,
+    index: i64,
+    data: svuint32_t,
+) {
+    svst1h_scatter_u32base_offset_u32(pg, bases, index.unchecked_shl(1), data)
+}
+#[doc = "Truncate to 16 bits and store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1h_scatter[_u64base]_index[_s64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1h))]
+pub unsafe fn svst1h_scatter_u64base_index_s64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    index: i64,
+    data: svint64_t,
+) {
+    svst1h_scatter_u64base_offset_s64(pg, bases, index.unchecked_shl(1), data)
+}
+#[doc = "Truncate to 32 bits and store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1w_scatter[_u64base]_index[_s64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1w))]
+pub unsafe fn svst1w_scatter_u64base_index_s64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    index: i64,
+    data: svint64_t,
+) {
+    svst1w_scatter_u64base_offset_s64(pg, bases, index.unchecked_shl(2), data)
+}
+#[doc = "Truncate to 16 bits and store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1h_scatter[_u64base]_index[_u64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1h))]
+pub unsafe fn svst1h_scatter_u64base_index_u64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    index: i64,
+    data: svuint64_t,
+) {
+    svst1h_scatter_u64base_offset_u64(pg, bases, index.unchecked_shl(1), data)
+}
+#[doc = "Truncate to 32 bits and store"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst1w_scatter[_u64base]_index[_u64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st1w))]
+pub unsafe fn svst1w_scatter_u64base_index_u64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    index: i64,
+    data: svuint64_t,
+) {
+    svst1w_scatter_u64base_offset_u64(pg, bases, index.unchecked_shl(2), data)
+}
+#[doc = "Store two vectors into two-element tuples"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst2[_f32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st2w))]
+pub unsafe fn svst2_f32(pg: svbool_t, base: *mut f32, data: svfloat32x2_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.st2.nxv4f32")]
+        fn _svst2_f32(data0: svfloat32_t, data1: svfloat32_t, pg: svbool4_t, ptr: *mut f32);
+    }
+    _svst2_f32(
+        svget2_f32::<0>(data),
+        svget2_f32::<1>(data),
+        pg.sve_into(),
+        base,
+    )
+}
+#[doc = "Store two vectors into two-element tuples"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst2[_f64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st2d))]
+pub unsafe fn svst2_f64(pg: svbool_t, base: *mut f64, data: svfloat64x2_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.st2.nxv2f64")]
+        fn _svst2_f64(data0: svfloat64_t, data1: svfloat64_t, pg: svbool2_t, ptr: *mut f64);
+    }
+    _svst2_f64(
+        svget2_f64::<0>(data),
+        svget2_f64::<1>(data),
+        pg.sve_into(),
+        base,
+    )
+}
+#[doc = "Store two vectors into two-element tuples"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst2[_s8])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st2b))]
+pub unsafe fn svst2_s8(pg: svbool_t, base: *mut i8, data: svint8x2_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.st2.nxv16i8")]
+        fn _svst2_s8(data0: svint8_t, data1: svint8_t, pg: svbool_t, ptr: *mut i8);
+    }
+    _svst2_s8(svget2_s8::<0>(data), svget2_s8::<1>(data), pg, base)
+}
+#[doc = "Store two vectors into two-element tuples"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst2[_s16])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st2h))]
+pub unsafe fn svst2_s16(pg: svbool_t, base: *mut i16, data: svint16x2_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.st2.nxv8i16")]
+        fn _svst2_s16(data0: svint16_t, data1: svint16_t, pg: svbool8_t, ptr: *mut i16);
+    }
+    _svst2_s16(
+        svget2_s16::<0>(data),
+        svget2_s16::<1>(data),
+        pg.sve_into(),
+        base,
+    )
+}
+#[doc = "Store two vectors into two-element tuples"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst2[_s32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st2w))]
+pub unsafe fn svst2_s32(pg: svbool_t, base: *mut i32, data: svint32x2_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.st2.nxv4i32")]
+        fn _svst2_s32(data0: svint32_t, data1: svint32_t, pg: svbool4_t, ptr: *mut i32);
+    }
+    _svst2_s32(
+        svget2_s32::<0>(data),
+        svget2_s32::<1>(data),
+        pg.sve_into(),
+        base,
+    )
+}
+#[doc = "Store two vectors into two-element tuples"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst2[_s64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st2d))]
+pub unsafe fn svst2_s64(pg: svbool_t, base: *mut i64, data: svint64x2_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.st2.nxv2i64")]
+        fn _svst2_s64(data0: svint64_t, data1: svint64_t, pg: svbool2_t, ptr: *mut i64);
+    }
+    _svst2_s64(
+        svget2_s64::<0>(data),
+        svget2_s64::<1>(data),
+        pg.sve_into(),
+        base,
+    )
+}
+#[doc = "Store two vectors into two-element tuples"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst2[_u8])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st2b))]
+pub unsafe fn svst2_u8(pg: svbool_t, base: *mut u8, data: svuint8x2_t) {
+    svst2_s8(pg, base.as_signed(), data.as_signed())
+}
+#[doc = "Store two vectors into two-element tuples"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst2[_u16])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st2h))]
+pub unsafe fn svst2_u16(pg: svbool_t, base: *mut u16, data: svuint16x2_t) {
+    svst2_s16(pg, base.as_signed(), data.as_signed())
+}
+#[doc = "Store two vectors into two-element tuples"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst2[_u32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st2w))]
+pub unsafe fn svst2_u32(pg: svbool_t, base: *mut u32, data: svuint32x2_t) {
+    svst2_s32(pg, base.as_signed(), data.as_signed())
+}
+#[doc = "Store two vectors into two-element tuples"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst2[_u64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st2d))]
+pub unsafe fn svst2_u64(pg: svbool_t, base: *mut u64, data: svuint64x2_t) {
+    svst2_s64(pg, base.as_signed(), data.as_signed())
+}
+#[doc = "Store two vectors into two-element tuples"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst2_vnum[_f32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st2w))]
+pub unsafe fn svst2_vnum_f32(pg: svbool_t, base: *mut f32, vnum: i64, data: svfloat32x2_t) {
+    svst2_f32(pg, base.offset(svcntw() as isize * vnum as isize), data)
+}
+#[doc = "Store two vectors into two-element tuples"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst2_vnum[_f64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st2d))]
+pub unsafe fn svst2_vnum_f64(pg: svbool_t, base: *mut f64, vnum: i64, data: svfloat64x2_t) {
+    svst2_f64(pg, base.offset(svcntd() as isize * vnum as isize), data)
+}
+#[doc = "Store two vectors into two-element tuples"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst2_vnum[_s8])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st2b))]
+pub unsafe fn svst2_vnum_s8(pg: svbool_t, base: *mut i8, vnum: i64, data: svint8x2_t) {
+    svst2_s8(pg, base.offset(svcntb() as isize * vnum as isize), data)
+}
+#[doc = "Store two vectors into two-element tuples"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst2_vnum[_s16])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st2h))]
+pub unsafe fn svst2_vnum_s16(pg: svbool_t, base: *mut i16, vnum: i64, data: svint16x2_t) {
+    svst2_s16(pg, base.offset(svcnth() as isize * vnum as isize), data)
+}
+#[doc = "Store two vectors into two-element tuples"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst2_vnum[_s32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st2w))]
+pub unsafe fn svst2_vnum_s32(pg: svbool_t, base: *mut i32, vnum: i64, data: svint32x2_t) {
+    svst2_s32(pg, base.offset(svcntw() as isize * vnum as isize), data)
+}
+#[doc = "Store two vectors into two-element tuples"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst2_vnum[_s64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st2d))]
+pub unsafe fn svst2_vnum_s64(pg: svbool_t, base: *mut i64, vnum: i64, data: svint64x2_t) {
+    svst2_s64(pg, base.offset(svcntd() as isize * vnum as isize), data)
+}
+#[doc = "Store two vectors into two-element tuples"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst2_vnum[_u8])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st2b))]
+pub unsafe fn svst2_vnum_u8(pg: svbool_t, base: *mut u8, vnum: i64, data: svuint8x2_t) {
+    svst2_u8(pg, base.offset(svcntb() as isize * vnum as isize), data)
+}
+#[doc = "Store two vectors into two-element tuples"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst2_vnum[_u16])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st2h))]
+pub unsafe fn svst2_vnum_u16(pg: svbool_t, base: *mut u16, vnum: i64, data: svuint16x2_t) {
+    svst2_u16(pg, base.offset(svcnth() as isize * vnum as isize), data)
+}
+#[doc = "Store two vectors into two-element tuples"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst2_vnum[_u32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st2w))]
+pub unsafe fn svst2_vnum_u32(pg: svbool_t, base: *mut u32, vnum: i64, data: svuint32x2_t) {
+    svst2_u32(pg, base.offset(svcntw() as isize * vnum as isize), data)
+}
+#[doc = "Store two vectors into two-element tuples"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst2_vnum[_u64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st2d))]
+pub unsafe fn svst2_vnum_u64(pg: svbool_t, base: *mut u64, vnum: i64, data: svuint64x2_t) {
+    svst2_u64(pg, base.offset(svcntd() as isize * vnum as isize), data)
+}
+#[doc = "Store three vectors into three-element tuples"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst3[_f32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st3w))]
+pub unsafe fn svst3_f32(pg: svbool_t, base: *mut f32, data: svfloat32x3_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.st3.nxv4f32")]
+        fn _svst3_f32(
+            data0: svfloat32_t,
+            data1: svfloat32_t,
+            data2: svfloat32_t,
+            pg: svbool4_t,
+            ptr: *mut f32,
+        );
+    }
+    _svst3_f32(
+        svget3_f32::<0>(data),
+        svget3_f32::<1>(data),
+        svget3_f32::<2>(data),
+        pg.sve_into(),
+        base,
+    )
+}
+#[doc = "Store three vectors into three-element tuples"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst3[_f64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st3d))]
+pub unsafe fn svst3_f64(pg: svbool_t, base: *mut f64, data: svfloat64x3_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.st3.nxv2f64")]
+        fn _svst3_f64(
+            data0: svfloat64_t,
+            data1: svfloat64_t,
+            data2: svfloat64_t,
+            pg: svbool2_t,
+            ptr: *mut f64,
+        );
+    }
+    _svst3_f64(
+        svget3_f64::<0>(data),
+        svget3_f64::<1>(data),
+        svget3_f64::<2>(data),
+        pg.sve_into(),
+        base,
+    )
+}
+#[doc = "Store three vectors into three-element tuples"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst3[_s8])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st3b))]
+pub unsafe fn svst3_s8(pg: svbool_t, base: *mut i8, data: svint8x3_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.st3.nxv16i8")]
+        fn _svst3_s8(data0: svint8_t, data1: svint8_t, data2: svint8_t, pg: svbool_t, ptr: *mut i8);
+    }
+    _svst3_s8(
+        svget3_s8::<0>(data),
+        svget3_s8::<1>(data),
+        svget3_s8::<2>(data),
+        pg,
+        base,
+    )
+}
+#[doc = "Store three vectors into three-element tuples"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst3[_s16])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st3h))]
+pub unsafe fn svst3_s16(pg: svbool_t, base: *mut i16, data: svint16x3_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.st3.nxv8i16")]
+        fn _svst3_s16(
+            data0: svint16_t,
+            data1: svint16_t,
+            data2: svint16_t,
+            pg: svbool8_t,
+            ptr: *mut i16,
+        );
+    }
+    _svst3_s16(
+        svget3_s16::<0>(data),
+        svget3_s16::<1>(data),
+        svget3_s16::<2>(data),
+        pg.sve_into(),
+        base,
+    )
+}
+#[doc = "Store three vectors into three-element tuples"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst3[_s32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st3w))]
+pub unsafe fn svst3_s32(pg: svbool_t, base: *mut i32, data: svint32x3_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.st3.nxv4i32")]
+        fn _svst3_s32(
+            data0: svint32_t,
+            data1: svint32_t,
+            data2: svint32_t,
+            pg: svbool4_t,
+            ptr: *mut i32,
+        );
+    }
+    _svst3_s32(
+        svget3_s32::<0>(data),
+        svget3_s32::<1>(data),
+        svget3_s32::<2>(data),
+        pg.sve_into(),
+        base,
+    )
+}
+#[doc = "Store three vectors into three-element tuples"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst3[_s64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st3d))]
+pub unsafe fn svst3_s64(pg: svbool_t, base: *mut i64, data: svint64x3_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.st3.nxv2i64")]
+        fn _svst3_s64(
+            data0: svint64_t,
+            data1: svint64_t,
+            data2: svint64_t,
+            pg: svbool2_t,
+            ptr: *mut i64,
+        );
+    }
+    _svst3_s64(
+        svget3_s64::<0>(data),
+        svget3_s64::<1>(data),
+        svget3_s64::<2>(data),
+        pg.sve_into(),
+        base,
+    )
+}
+#[doc = "Store three vectors into three-element tuples"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst3[_u8])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st3b))]
+pub unsafe fn svst3_u8(pg: svbool_t, base: *mut u8, data: svuint8x3_t) {
+    svst3_s8(pg, base.as_signed(), data.as_signed())
+}
+#[doc = "Store three vectors into three-element tuples"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst3[_u16])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st3h))]
+pub unsafe fn svst3_u16(pg: svbool_t, base: *mut u16, data: svuint16x3_t) {
+    svst3_s16(pg, base.as_signed(), data.as_signed())
+}
+#[doc = "Store three vectors into three-element tuples"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst3[_u32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st3w))]
+pub unsafe fn svst3_u32(pg: svbool_t, base: *mut u32, data: svuint32x3_t) {
+    svst3_s32(pg, base.as_signed(), data.as_signed())
+}
+#[doc = "Store three vectors into three-element tuples"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst3[_u64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st3d))]
+pub unsafe fn svst3_u64(pg: svbool_t, base: *mut u64, data: svuint64x3_t) {
+    svst3_s64(pg, base.as_signed(), data.as_signed())
+}
+#[doc = "Store three vectors into three-element tuples"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst3_vnum[_f32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st3w))]
+pub unsafe fn svst3_vnum_f32(pg: svbool_t, base: *mut f32, vnum: i64, data: svfloat32x3_t) {
+    svst3_f32(pg, base.offset(svcntw() as isize * vnum as isize), data)
+}
+#[doc = "Store three vectors into three-element tuples"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst3_vnum[_f64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st3d))]
+pub unsafe fn svst3_vnum_f64(pg: svbool_t, base: *mut f64, vnum: i64, data: svfloat64x3_t) {
+    svst3_f64(pg, base.offset(svcntd() as isize * vnum as isize), data)
+}
+#[doc = "Store three vectors into three-element tuples"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst3_vnum[_s8])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st3b))]
+pub unsafe fn svst3_vnum_s8(pg: svbool_t, base: *mut i8, vnum: i64, data: svint8x3_t) {
+    svst3_s8(pg, base.offset(svcntb() as isize * vnum as isize), data)
+}
+#[doc = "Store three vectors into three-element tuples"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst3_vnum[_s16])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st3h))]
+pub unsafe fn svst3_vnum_s16(pg: svbool_t, base: *mut i16, vnum: i64, data: svint16x3_t) {
+    svst3_s16(pg, base.offset(svcnth() as isize * vnum as isize), data)
+}
+#[doc = "Store three vectors into three-element tuples"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst3_vnum[_s32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st3w))]
+pub unsafe fn svst3_vnum_s32(pg: svbool_t, base: *mut i32, vnum: i64, data: svint32x3_t) {
+    svst3_s32(pg, base.offset(svcntw() as isize * vnum as isize), data)
+}
+#[doc = "Store three vectors into three-element tuples"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst3_vnum[_s64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st3d))]
+pub unsafe fn svst3_vnum_s64(pg: svbool_t, base: *mut i64, vnum: i64, data: svint64x3_t) {
+    svst3_s64(pg, base.offset(svcntd() as isize * vnum as isize), data)
+}
+#[doc = "Store three vectors into three-element tuples"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst3_vnum[_u8])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st3b))]
+pub unsafe fn svst3_vnum_u8(pg: svbool_t, base: *mut u8, vnum: i64, data: svuint8x3_t) {
+    svst3_u8(pg, base.offset(svcntb() as isize * vnum as isize), data)
+}
+#[doc = "Store three vectors into three-element tuples"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst3_vnum[_u16])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st3h))]
+pub unsafe fn svst3_vnum_u16(pg: svbool_t, base: *mut u16, vnum: i64, data: svuint16x3_t) {
+    svst3_u16(pg, base.offset(svcnth() as isize * vnum as isize), data)
+}
+#[doc = "Store three vectors into three-element tuples"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst3_vnum[_u32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st3w))]
+pub unsafe fn svst3_vnum_u32(pg: svbool_t, base: *mut u32, vnum: i64, data: svuint32x3_t) {
+    svst3_u32(pg, base.offset(svcntw() as isize * vnum as isize), data)
+}
+#[doc = "Store three vectors into three-element tuples"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst3_vnum[_u64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st3d))]
+pub unsafe fn svst3_vnum_u64(pg: svbool_t, base: *mut u64, vnum: i64, data: svuint64x3_t) {
+    svst3_u64(pg, base.offset(svcntd() as isize * vnum as isize), data)
+}
+#[doc = "Store four vectors into four-element tuples"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst4[_f32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st4w))]
+pub unsafe fn svst4_f32(pg: svbool_t, base: *mut f32, data: svfloat32x4_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.st4.nxv4f32")]
+        fn _svst4_f32(
+            data0: svfloat32_t,
+            data1: svfloat32_t,
+            data2: svfloat32_t,
+            data3: svfloat32_t,
+            pg: svbool4_t,
+            ptr: *mut f32,
+        );
+    }
+    _svst4_f32(
+        svget4_f32::<0>(data),
+        svget4_f32::<1>(data),
+        svget4_f32::<2>(data),
+        svget4_f32::<3>(data),
+        pg.sve_into(),
+        base,
+    )
+}
+#[doc = "Store four vectors into four-element tuples"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst4[_f64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st4d))]
+pub unsafe fn svst4_f64(pg: svbool_t, base: *mut f64, data: svfloat64x4_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.st4.nxv2f64")]
+        fn _svst4_f64(
+            data0: svfloat64_t,
+            data1: svfloat64_t,
+            data2: svfloat64_t,
+            data3: svfloat64_t,
+            pg: svbool2_t,
+            ptr: *mut f64,
+        );
+    }
+    _svst4_f64(
+        svget4_f64::<0>(data),
+        svget4_f64::<1>(data),
+        svget4_f64::<2>(data),
+        svget4_f64::<3>(data),
+        pg.sve_into(),
+        base,
+    )
+}
+#[doc = "Store four vectors into four-element tuples"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst4[_s8])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st4b))]
+pub unsafe fn svst4_s8(pg: svbool_t, base: *mut i8, data: svint8x4_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.st4.nxv16i8")]
+        fn _svst4_s8(
+            data0: svint8_t,
+            data1: svint8_t,
+            data2: svint8_t,
+            data3: svint8_t,
+            pg: svbool_t,
+            ptr: *mut i8,
+        );
+    }
+    _svst4_s8(
+        svget4_s8::<0>(data),
+        svget4_s8::<1>(data),
+        svget4_s8::<2>(data),
+        svget4_s8::<3>(data),
+        pg,
+        base,
+    )
+}
+#[doc = "Store four vectors into four-element tuples"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst4[_s16])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st4h))]
+pub unsafe fn svst4_s16(pg: svbool_t, base: *mut i16, data: svint16x4_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.st4.nxv8i16")]
+        fn _svst4_s16(
+            data0: svint16_t,
+            data1: svint16_t,
+            data2: svint16_t,
+            data3: svint16_t,
+            pg: svbool8_t,
+            ptr: *mut i16,
+        );
+    }
+    _svst4_s16(
+        svget4_s16::<0>(data),
+        svget4_s16::<1>(data),
+        svget4_s16::<2>(data),
+        svget4_s16::<3>(data),
+        pg.sve_into(),
+        base,
+    )
+}
+#[doc = "Store four vectors into four-element tuples"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst4[_s32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st4w))]
+pub unsafe fn svst4_s32(pg: svbool_t, base: *mut i32, data: svint32x4_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.st4.nxv4i32")]
+        fn _svst4_s32(
+            data0: svint32_t,
+            data1: svint32_t,
+            data2: svint32_t,
+            data3: svint32_t,
+            pg: svbool4_t,
+            ptr: *mut i32,
+        );
+    }
+    _svst4_s32(
+        svget4_s32::<0>(data),
+        svget4_s32::<1>(data),
+        svget4_s32::<2>(data),
+        svget4_s32::<3>(data),
+        pg.sve_into(),
+        base,
+    )
+}
+#[doc = "Store four vectors into four-element tuples"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst4[_s64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st4d))]
+pub unsafe fn svst4_s64(pg: svbool_t, base: *mut i64, data: svint64x4_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.st4.nxv2i64")]
+        fn _svst4_s64(
+            data0: svint64_t,
+            data1: svint64_t,
+            data2: svint64_t,
+            data3: svint64_t,
+            pg: svbool2_t,
+            ptr: *mut i64,
+        );
+    }
+    _svst4_s64(
+        svget4_s64::<0>(data),
+        svget4_s64::<1>(data),
+        svget4_s64::<2>(data),
+        svget4_s64::<3>(data),
+        pg.sve_into(),
+        base,
+    )
+}
+#[doc = "Store four vectors into four-element tuples"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst4[_u8])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st4b))]
+pub unsafe fn svst4_u8(pg: svbool_t, base: *mut u8, data: svuint8x4_t) {
+    svst4_s8(pg, base.as_signed(), data.as_signed())
+}
+#[doc = "Store four vectors into four-element tuples"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst4[_u16])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st4h))]
+pub unsafe fn svst4_u16(pg: svbool_t, base: *mut u16, data: svuint16x4_t) {
+    svst4_s16(pg, base.as_signed(), data.as_signed())
+}
+#[doc = "Store four vectors into four-element tuples"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst4[_u32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st4w))]
+pub unsafe fn svst4_u32(pg: svbool_t, base: *mut u32, data: svuint32x4_t) {
+    svst4_s32(pg, base.as_signed(), data.as_signed())
+}
+#[doc = "Store four vectors into four-element tuples"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst4[_u64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st4d))]
+pub unsafe fn svst4_u64(pg: svbool_t, base: *mut u64, data: svuint64x4_t) {
+    svst4_s64(pg, base.as_signed(), data.as_signed())
+}
+#[doc = "Store four vectors into four-element tuples"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst4_vnum[_f32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st4w))]
+pub unsafe fn svst4_vnum_f32(pg: svbool_t, base: *mut f32, vnum: i64, data: svfloat32x4_t) {
+    svst4_f32(pg, base.offset(svcntw() as isize * vnum as isize), data)
+}
+#[doc = "Store four vectors into four-element tuples"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst4_vnum[_f64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st4d))]
+pub unsafe fn svst4_vnum_f64(pg: svbool_t, base: *mut f64, vnum: i64, data: svfloat64x4_t) {
+    svst4_f64(pg, base.offset(svcntd() as isize * vnum as isize), data)
+}
+#[doc = "Store four vectors into four-element tuples"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst4_vnum[_s8])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st4b))]
+pub unsafe fn svst4_vnum_s8(pg: svbool_t, base: *mut i8, vnum: i64, data: svint8x4_t) {
+    svst4_s8(pg, base.offset(svcntb() as isize * vnum as isize), data)
+}
+#[doc = "Store four vectors into four-element tuples"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst4_vnum[_s16])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st4h))]
+pub unsafe fn svst4_vnum_s16(pg: svbool_t, base: *mut i16, vnum: i64, data: svint16x4_t) {
+    svst4_s16(pg, base.offset(svcnth() as isize * vnum as isize), data)
+}
+#[doc = "Store four vectors into four-element tuples"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst4_vnum[_s32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st4w))]
+pub unsafe fn svst4_vnum_s32(pg: svbool_t, base: *mut i32, vnum: i64, data: svint32x4_t) {
+    svst4_s32(pg, base.offset(svcntw() as isize * vnum as isize), data)
+}
+#[doc = "Store four vectors into four-element tuples"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst4_vnum[_s64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st4d))]
+pub unsafe fn svst4_vnum_s64(pg: svbool_t, base: *mut i64, vnum: i64, data: svint64x4_t) {
+    svst4_s64(pg, base.offset(svcntd() as isize * vnum as isize), data)
+}
+#[doc = "Store four vectors into four-element tuples"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst4_vnum[_u8])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st4b))]
+pub unsafe fn svst4_vnum_u8(pg: svbool_t, base: *mut u8, vnum: i64, data: svuint8x4_t) {
+    svst4_u8(pg, base.offset(svcntb() as isize * vnum as isize), data)
+}
+#[doc = "Store four vectors into four-element tuples"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst4_vnum[_u16])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st4h))]
+pub unsafe fn svst4_vnum_u16(pg: svbool_t, base: *mut u16, vnum: i64, data: svuint16x4_t) {
+    svst4_u16(pg, base.offset(svcnth() as isize * vnum as isize), data)
+}
+#[doc = "Store four vectors into four-element tuples"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst4_vnum[_u32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st4w))]
+pub unsafe fn svst4_vnum_u32(pg: svbool_t, base: *mut u32, vnum: i64, data: svuint32x4_t) {
+    svst4_u32(pg, base.offset(svcntw() as isize * vnum as isize), data)
+}
+#[doc = "Store four vectors into four-element tuples"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svst4_vnum[_u64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`). In particular, note that `vnum` is scaled by the vector length, `VL`, which is not known at compile time."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(st4d))]
+pub unsafe fn svst4_vnum_u64(pg: svbool_t, base: *mut u64, vnum: i64, data: svuint64x4_t) {
+    svst4_u64(pg, base.offset(svcntd() as isize * vnum as isize), data)
+}
+#[doc = "Non-truncating store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1[_f32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1w))]
+pub unsafe fn svstnt1_f32(pg: svbool_t, base: *mut f32, data: svfloat32_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.stnt1.nxv4f32")]
+        fn _svstnt1_f32(data: svfloat32_t, pg: svbool4_t, ptr: *mut f32);
+    }
+    _svstnt1_f32(data, pg.sve_into(), base)
+}
+#[doc = "Non-truncating store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1[_f64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1d))]
+pub unsafe fn svstnt1_f64(pg: svbool_t, base: *mut f64, data: svfloat64_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.stnt1.nxv2f64")]
+        fn _svstnt1_f64(data: svfloat64_t, pg: svbool2_t, ptr: *mut f64);
+    }
+    _svstnt1_f64(data, pg.sve_into(), base)
+}
+#[doc = "Non-truncating store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1[_s8])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1b))]
+pub unsafe fn svstnt1_s8(pg: svbool_t, base: *mut i8, data: svint8_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.stnt1.nxv16i8")]
+        fn _svstnt1_s8(data: svint8_t, pg: svbool_t, ptr: *mut i8);
+    }
+    _svstnt1_s8(data, pg, base)
+}
+#[doc = "Non-truncating store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1[_s16])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1h))]
+pub unsafe fn svstnt1_s16(pg: svbool_t, base: *mut i16, data: svint16_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.stnt1.nxv8i16")]
+        fn _svstnt1_s16(data: svint16_t, pg: svbool8_t, ptr: *mut i16);
+    }
+    _svstnt1_s16(data, pg.sve_into(), base)
+}
+#[doc = "Non-truncating store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1[_s32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1w))]
+pub unsafe fn svstnt1_s32(pg: svbool_t, base: *mut i32, data: svint32_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.stnt1.nxv4i32")]
+        fn _svstnt1_s32(data: svint32_t, pg: svbool4_t, ptr: *mut i32);
+    }
+    _svstnt1_s32(data, pg.sve_into(), base)
+}
+#[doc = "Non-truncating store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1[_s64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1d))]
+pub unsafe fn svstnt1_s64(pg: svbool_t, base: *mut i64, data: svint64_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.stnt1.nxv2i64")]
+        fn _svstnt1_s64(data: svint64_t, pg: svbool2_t, ptr: *mut i64);
+    }
+    _svstnt1_s64(data, pg.sve_into(), base)
+}
+#[doc = "Non-truncating store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1[_u8])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1b))]
+pub unsafe fn svstnt1_u8(pg: svbool_t, base: *mut u8, data: svuint8_t) {
+    svstnt1_s8(pg, base.as_signed(), data.as_signed())
+}
+#[doc = "Non-truncating store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1[_u16])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1h))]
+pub unsafe fn svstnt1_u16(pg: svbool_t, base: *mut u16, data: svuint16_t) {
+    svstnt1_s16(pg, base.as_signed(), data.as_signed())
+}
+#[doc = "Non-truncating store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1[_u32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1w))]
+pub unsafe fn svstnt1_u32(pg: svbool_t, base: *mut u32, data: svuint32_t) {
+    svstnt1_s32(pg, base.as_signed(), data.as_signed())
+}
+#[doc = "Non-truncating store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1[_u64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1d))]
+pub unsafe fn svstnt1_u64(pg: svbool_t, base: *mut u64, data: svuint64_t) {
+    svstnt1_s64(pg, base.as_signed(), data.as_signed())
+}
+#[doc = "Non-truncating store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1_vnum[_f32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1w))]
+pub unsafe fn svstnt1_vnum_f32(pg: svbool_t, base: *mut f32, vnum: i64, data: svfloat32_t) {
+    svstnt1_f32(pg, base.offset(svcntw() as isize * vnum as isize), data)
+}
+#[doc = "Non-truncating store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1_vnum[_f64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1d))]
+pub unsafe fn svstnt1_vnum_f64(pg: svbool_t, base: *mut f64, vnum: i64, data: svfloat64_t) {
+    svstnt1_f64(pg, base.offset(svcntd() as isize * vnum as isize), data)
+}
+#[doc = "Non-truncating store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1_vnum[_s8])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1b))]
+pub unsafe fn svstnt1_vnum_s8(pg: svbool_t, base: *mut i8, vnum: i64, data: svint8_t) {
+    svstnt1_s8(pg, base.offset(svcntb() as isize * vnum as isize), data)
+}
+#[doc = "Non-truncating store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1_vnum[_s16])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1h))]
+pub unsafe fn svstnt1_vnum_s16(pg: svbool_t, base: *mut i16, vnum: i64, data: svint16_t) {
+    svstnt1_s16(pg, base.offset(svcnth() as isize * vnum as isize), data)
+}
+#[doc = "Non-truncating store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1_vnum[_s32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1w))]
+pub unsafe fn svstnt1_vnum_s32(pg: svbool_t, base: *mut i32, vnum: i64, data: svint32_t) {
+    svstnt1_s32(pg, base.offset(svcntw() as isize * vnum as isize), data)
+}
+#[doc = "Non-truncating store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1_vnum[_s64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1d))]
+pub unsafe fn svstnt1_vnum_s64(pg: svbool_t, base: *mut i64, vnum: i64, data: svint64_t) {
+    svstnt1_s64(pg, base.offset(svcntd() as isize * vnum as isize), data)
+}
+#[doc = "Non-truncating store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1_vnum[_u8])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1b))]
+pub unsafe fn svstnt1_vnum_u8(pg: svbool_t, base: *mut u8, vnum: i64, data: svuint8_t) {
+    svstnt1_u8(pg, base.offset(svcntb() as isize * vnum as isize), data)
+}
+#[doc = "Non-truncating store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1_vnum[_u16])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1h))]
+pub unsafe fn svstnt1_vnum_u16(pg: svbool_t, base: *mut u16, vnum: i64, data: svuint16_t) {
+    svstnt1_u16(pg, base.offset(svcnth() as isize * vnum as isize), data)
+}
+#[doc = "Non-truncating store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1_vnum[_u32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1w))]
+pub unsafe fn svstnt1_vnum_u32(pg: svbool_t, base: *mut u32, vnum: i64, data: svuint32_t) {
+    svstnt1_u32(pg, base.offset(svcntw() as isize * vnum as isize), data)
+}
+#[doc = "Non-truncating store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1_vnum[_u64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1d))]
+pub unsafe fn svstnt1_vnum_u64(pg: svbool_t, base: *mut u64, vnum: i64, data: svuint64_t) {
+    svstnt1_u64(pg, base.offset(svcntd() as isize * vnum as isize), data)
+}
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsub[_f32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fsub))]
+pub fn svsub_f32_m(pg: svbool_t, op1: svfloat32_t, op2: svfloat32_t) -> svfloat32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.fsub.nxv4f32")]
+        fn _svsub_f32_m(pg: svbool4_t, op1: svfloat32_t, op2: svfloat32_t) -> svfloat32_t;
+    }
+    unsafe { _svsub_f32_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsub[_n_f32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fsub))]
+pub fn svsub_n_f32_m(pg: svbool_t, op1: svfloat32_t, op2: f32) -> svfloat32_t {
+    svsub_f32_m(pg, op1, svdup_n_f32(op2))
+}
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsub[_f32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fsub))]
+pub fn svsub_f32_x(pg: svbool_t, op1: svfloat32_t, op2: svfloat32_t) -> svfloat32_t {
+    svsub_f32_m(pg, op1, op2)
+}
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsub[_n_f32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fsub))]
+pub fn svsub_n_f32_x(pg: svbool_t, op1: svfloat32_t, op2: f32) -> svfloat32_t {
+    svsub_f32_x(pg, op1, svdup_n_f32(op2))
+}
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsub[_f32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fsub))]
+pub fn svsub_f32_z(pg: svbool_t, op1: svfloat32_t, op2: svfloat32_t) -> svfloat32_t {
+    svsub_f32_m(pg, svsel_f32(pg, op1, svdup_n_f32(0.0)), op2)
+}
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsub[_n_f32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fsub))]
+pub fn svsub_n_f32_z(pg: svbool_t, op1: svfloat32_t, op2: f32) -> svfloat32_t {
+    svsub_f32_z(pg, op1, svdup_n_f32(op2))
+}
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsub[_f64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fsub))]
+pub fn svsub_f64_m(pg: svbool_t, op1: svfloat64_t, op2: svfloat64_t) -> svfloat64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.fsub.nxv2f64")]
+        fn _svsub_f64_m(pg: svbool2_t, op1: svfloat64_t, op2: svfloat64_t) -> svfloat64_t;
+    }
+    unsafe { _svsub_f64_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsub[_n_f64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fsub))]
+pub fn svsub_n_f64_m(pg: svbool_t, op1: svfloat64_t, op2: f64) -> svfloat64_t {
+    svsub_f64_m(pg, op1, svdup_n_f64(op2))
+}
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsub[_f64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fsub))]
+pub fn svsub_f64_x(pg: svbool_t, op1: svfloat64_t, op2: svfloat64_t) -> svfloat64_t {
+    svsub_f64_m(pg, op1, op2)
+}
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsub[_n_f64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fsub))]
+pub fn svsub_n_f64_x(pg: svbool_t, op1: svfloat64_t, op2: f64) -> svfloat64_t {
+    svsub_f64_x(pg, op1, svdup_n_f64(op2))
+}
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsub[_f64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fsub))]
+pub fn svsub_f64_z(pg: svbool_t, op1: svfloat64_t, op2: svfloat64_t) -> svfloat64_t {
+    svsub_f64_m(pg, svsel_f64(pg, op1, svdup_n_f64(0.0)), op2)
+}
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsub[_n_f64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fsub))]
+pub fn svsub_n_f64_z(pg: svbool_t, op1: svfloat64_t, op2: f64) -> svfloat64_t {
+    svsub_f64_z(pg, op1, svdup_n_f64(op2))
+}
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsub[_s8]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sub))]
+pub fn svsub_s8_m(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sub.nxv16i8")]
+        fn _svsub_s8_m(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t;
+    }
+    unsafe { _svsub_s8_m(pg, op1, op2) }
+}
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsub[_n_s8]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sub))]
+pub fn svsub_n_s8_m(pg: svbool_t, op1: svint8_t, op2: i8) -> svint8_t {
+    svsub_s8_m(pg, op1, svdup_n_s8(op2))
+}
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsub[_s8]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sub))]
+pub fn svsub_s8_x(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t {
+    svsub_s8_m(pg, op1, op2)
+}
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsub[_n_s8]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sub))]
+pub fn svsub_n_s8_x(pg: svbool_t, op1: svint8_t, op2: i8) -> svint8_t {
+    svsub_s8_x(pg, op1, svdup_n_s8(op2))
+}
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsub[_s8]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sub))]
+pub fn svsub_s8_z(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t {
+    svsub_s8_m(pg, svsel_s8(pg, op1, svdup_n_s8(0)), op2)
+}
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsub[_n_s8]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sub))]
+pub fn svsub_n_s8_z(pg: svbool_t, op1: svint8_t, op2: i8) -> svint8_t {
+    svsub_s8_z(pg, op1, svdup_n_s8(op2))
+}
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsub[_s16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sub))]
+pub fn svsub_s16_m(pg: svbool_t, op1: svint16_t, op2: svint16_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sub.nxv8i16")]
+        fn _svsub_s16_m(pg: svbool8_t, op1: svint16_t, op2: svint16_t) -> svint16_t;
+    }
+    unsafe { _svsub_s16_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsub[_n_s16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sub))]
+pub fn svsub_n_s16_m(pg: svbool_t, op1: svint16_t, op2: i16) -> svint16_t {
+    svsub_s16_m(pg, op1, svdup_n_s16(op2))
+}
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsub[_s16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sub))]
+pub fn svsub_s16_x(pg: svbool_t, op1: svint16_t, op2: svint16_t) -> svint16_t {
+    svsub_s16_m(pg, op1, op2)
+}
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsub[_n_s16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sub))]
+pub fn svsub_n_s16_x(pg: svbool_t, op1: svint16_t, op2: i16) -> svint16_t {
+    svsub_s16_x(pg, op1, svdup_n_s16(op2))
+}
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsub[_s16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sub))]
+pub fn svsub_s16_z(pg: svbool_t, op1: svint16_t, op2: svint16_t) -> svint16_t {
+    svsub_s16_m(pg, svsel_s16(pg, op1, svdup_n_s16(0)), op2)
+}
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsub[_n_s16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sub))]
+pub fn svsub_n_s16_z(pg: svbool_t, op1: svint16_t, op2: i16) -> svint16_t {
+    svsub_s16_z(pg, op1, svdup_n_s16(op2))
+}
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsub[_s32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sub))]
+pub fn svsub_s32_m(pg: svbool_t, op1: svint32_t, op2: svint32_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sub.nxv4i32")]
+        fn _svsub_s32_m(pg: svbool4_t, op1: svint32_t, op2: svint32_t) -> svint32_t;
+    }
+    unsafe { _svsub_s32_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsub[_n_s32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sub))]
+pub fn svsub_n_s32_m(pg: svbool_t, op1: svint32_t, op2: i32) -> svint32_t {
+    svsub_s32_m(pg, op1, svdup_n_s32(op2))
+}
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsub[_s32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sub))]
+pub fn svsub_s32_x(pg: svbool_t, op1: svint32_t, op2: svint32_t) -> svint32_t {
+    svsub_s32_m(pg, op1, op2)
+}
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsub[_n_s32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sub))]
+pub fn svsub_n_s32_x(pg: svbool_t, op1: svint32_t, op2: i32) -> svint32_t {
+    svsub_s32_x(pg, op1, svdup_n_s32(op2))
+}
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsub[_s32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sub))]
+pub fn svsub_s32_z(pg: svbool_t, op1: svint32_t, op2: svint32_t) -> svint32_t {
+    svsub_s32_m(pg, svsel_s32(pg, op1, svdup_n_s32(0)), op2)
+}
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsub[_n_s32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sub))]
+pub fn svsub_n_s32_z(pg: svbool_t, op1: svint32_t, op2: i32) -> svint32_t {
+    svsub_s32_z(pg, op1, svdup_n_s32(op2))
+}
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsub[_s64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sub))]
+pub fn svsub_s64_m(pg: svbool_t, op1: svint64_t, op2: svint64_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sub.nxv2i64")]
+        fn _svsub_s64_m(pg: svbool2_t, op1: svint64_t, op2: svint64_t) -> svint64_t;
+    }
+    unsafe { _svsub_s64_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsub[_n_s64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sub))]
+pub fn svsub_n_s64_m(pg: svbool_t, op1: svint64_t, op2: i64) -> svint64_t {
+    svsub_s64_m(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsub[_s64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sub))]
+pub fn svsub_s64_x(pg: svbool_t, op1: svint64_t, op2: svint64_t) -> svint64_t {
+    svsub_s64_m(pg, op1, op2)
+}
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsub[_n_s64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sub))]
+pub fn svsub_n_s64_x(pg: svbool_t, op1: svint64_t, op2: i64) -> svint64_t {
+    svsub_s64_x(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsub[_s64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sub))]
+pub fn svsub_s64_z(pg: svbool_t, op1: svint64_t, op2: svint64_t) -> svint64_t {
+    svsub_s64_m(pg, svsel_s64(pg, op1, svdup_n_s64(0)), op2)
+}
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsub[_n_s64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sub))]
+pub fn svsub_n_s64_z(pg: svbool_t, op1: svint64_t, op2: i64) -> svint64_t {
+    svsub_s64_z(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsub[_u8]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sub))]
+pub fn svsub_u8_m(pg: svbool_t, op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    unsafe { svsub_s8_m(pg, op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsub[_n_u8]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sub))]
+pub fn svsub_n_u8_m(pg: svbool_t, op1: svuint8_t, op2: u8) -> svuint8_t {
+    svsub_u8_m(pg, op1, svdup_n_u8(op2))
+}
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsub[_u8]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sub))]
+pub fn svsub_u8_x(pg: svbool_t, op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    svsub_u8_m(pg, op1, op2)
+}
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsub[_n_u8]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sub))]
+pub fn svsub_n_u8_x(pg: svbool_t, op1: svuint8_t, op2: u8) -> svuint8_t {
+    svsub_u8_x(pg, op1, svdup_n_u8(op2))
+}
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsub[_u8]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sub))]
+pub fn svsub_u8_z(pg: svbool_t, op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    svsub_u8_m(pg, svsel_u8(pg, op1, svdup_n_u8(0)), op2)
+}
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsub[_n_u8]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sub))]
+pub fn svsub_n_u8_z(pg: svbool_t, op1: svuint8_t, op2: u8) -> svuint8_t {
+    svsub_u8_z(pg, op1, svdup_n_u8(op2))
+}
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsub[_u16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sub))]
+pub fn svsub_u16_m(pg: svbool_t, op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    unsafe { svsub_s16_m(pg, op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsub[_n_u16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sub))]
+pub fn svsub_n_u16_m(pg: svbool_t, op1: svuint16_t, op2: u16) -> svuint16_t {
+    svsub_u16_m(pg, op1, svdup_n_u16(op2))
+}
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsub[_u16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sub))]
+pub fn svsub_u16_x(pg: svbool_t, op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    svsub_u16_m(pg, op1, op2)
+}
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsub[_n_u16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sub))]
+pub fn svsub_n_u16_x(pg: svbool_t, op1: svuint16_t, op2: u16) -> svuint16_t {
+    svsub_u16_x(pg, op1, svdup_n_u16(op2))
+}
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsub[_u16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sub))]
+pub fn svsub_u16_z(pg: svbool_t, op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    svsub_u16_m(pg, svsel_u16(pg, op1, svdup_n_u16(0)), op2)
+}
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsub[_n_u16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sub))]
+pub fn svsub_n_u16_z(pg: svbool_t, op1: svuint16_t, op2: u16) -> svuint16_t {
+    svsub_u16_z(pg, op1, svdup_n_u16(op2))
+}
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsub[_u32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sub))]
+pub fn svsub_u32_m(pg: svbool_t, op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    unsafe { svsub_s32_m(pg, op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsub[_n_u32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sub))]
+pub fn svsub_n_u32_m(pg: svbool_t, op1: svuint32_t, op2: u32) -> svuint32_t {
+    svsub_u32_m(pg, op1, svdup_n_u32(op2))
+}
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsub[_u32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sub))]
+pub fn svsub_u32_x(pg: svbool_t, op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    svsub_u32_m(pg, op1, op2)
+}
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsub[_n_u32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sub))]
+pub fn svsub_n_u32_x(pg: svbool_t, op1: svuint32_t, op2: u32) -> svuint32_t {
+    svsub_u32_x(pg, op1, svdup_n_u32(op2))
+}
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsub[_u32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sub))]
+pub fn svsub_u32_z(pg: svbool_t, op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    svsub_u32_m(pg, svsel_u32(pg, op1, svdup_n_u32(0)), op2)
+}
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsub[_n_u32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sub))]
+pub fn svsub_n_u32_z(pg: svbool_t, op1: svuint32_t, op2: u32) -> svuint32_t {
+    svsub_u32_z(pg, op1, svdup_n_u32(op2))
+}
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsub[_u64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sub))]
+pub fn svsub_u64_m(pg: svbool_t, op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    unsafe { svsub_s64_m(pg, op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsub[_n_u64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sub))]
+pub fn svsub_n_u64_m(pg: svbool_t, op1: svuint64_t, op2: u64) -> svuint64_t {
+    svsub_u64_m(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsub[_u64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sub))]
+pub fn svsub_u64_x(pg: svbool_t, op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    svsub_u64_m(pg, op1, op2)
+}
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsub[_n_u64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sub))]
+pub fn svsub_n_u64_x(pg: svbool_t, op1: svuint64_t, op2: u64) -> svuint64_t {
+    svsub_u64_x(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsub[_u64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sub))]
+pub fn svsub_u64_z(pg: svbool_t, op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    svsub_u64_m(pg, svsel_u64(pg, op1, svdup_n_u64(0)), op2)
+}
+#[doc = "Subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsub[_n_u64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sub))]
+pub fn svsub_n_u64_z(pg: svbool_t, op1: svuint64_t, op2: u64) -> svuint64_t {
+    svsub_u64_z(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubr[_f32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fsubr))]
+pub fn svsubr_f32_m(pg: svbool_t, op1: svfloat32_t, op2: svfloat32_t) -> svfloat32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.fsubr.nxv4f32")]
+        fn _svsubr_f32_m(pg: svbool4_t, op1: svfloat32_t, op2: svfloat32_t) -> svfloat32_t;
+    }
+    unsafe { _svsubr_f32_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubr[_n_f32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fsubr))]
+pub fn svsubr_n_f32_m(pg: svbool_t, op1: svfloat32_t, op2: f32) -> svfloat32_t {
+    svsubr_f32_m(pg, op1, svdup_n_f32(op2))
+}
+#[doc = "Subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubr[_f32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fsubr))]
+pub fn svsubr_f32_x(pg: svbool_t, op1: svfloat32_t, op2: svfloat32_t) -> svfloat32_t {
+    svsubr_f32_m(pg, op1, op2)
+}
+#[doc = "Subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubr[_n_f32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fsubr))]
+pub fn svsubr_n_f32_x(pg: svbool_t, op1: svfloat32_t, op2: f32) -> svfloat32_t {
+    svsubr_f32_x(pg, op1, svdup_n_f32(op2))
+}
+#[doc = "Subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubr[_f32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fsubr))]
+pub fn svsubr_f32_z(pg: svbool_t, op1: svfloat32_t, op2: svfloat32_t) -> svfloat32_t {
+    svsubr_f32_m(pg, svsel_f32(pg, op1, svdup_n_f32(0.0)), op2)
+}
+#[doc = "Subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubr[_n_f32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fsubr))]
+pub fn svsubr_n_f32_z(pg: svbool_t, op1: svfloat32_t, op2: f32) -> svfloat32_t {
+    svsubr_f32_z(pg, op1, svdup_n_f32(op2))
+}
+#[doc = "Subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubr[_f64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fsubr))]
+pub fn svsubr_f64_m(pg: svbool_t, op1: svfloat64_t, op2: svfloat64_t) -> svfloat64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.fsubr.nxv2f64")]
+        fn _svsubr_f64_m(pg: svbool2_t, op1: svfloat64_t, op2: svfloat64_t) -> svfloat64_t;
+    }
+    unsafe { _svsubr_f64_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubr[_n_f64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fsubr))]
+pub fn svsubr_n_f64_m(pg: svbool_t, op1: svfloat64_t, op2: f64) -> svfloat64_t {
+    svsubr_f64_m(pg, op1, svdup_n_f64(op2))
+}
+#[doc = "Subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubr[_f64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fsubr))]
+pub fn svsubr_f64_x(pg: svbool_t, op1: svfloat64_t, op2: svfloat64_t) -> svfloat64_t {
+    svsubr_f64_m(pg, op1, op2)
+}
+#[doc = "Subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubr[_n_f64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fsubr))]
+pub fn svsubr_n_f64_x(pg: svbool_t, op1: svfloat64_t, op2: f64) -> svfloat64_t {
+    svsubr_f64_x(pg, op1, svdup_n_f64(op2))
+}
+#[doc = "Subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubr[_f64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fsubr))]
+pub fn svsubr_f64_z(pg: svbool_t, op1: svfloat64_t, op2: svfloat64_t) -> svfloat64_t {
+    svsubr_f64_m(pg, svsel_f64(pg, op1, svdup_n_f64(0.0)), op2)
+}
+#[doc = "Subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubr[_n_f64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fsubr))]
+pub fn svsubr_n_f64_z(pg: svbool_t, op1: svfloat64_t, op2: f64) -> svfloat64_t {
+    svsubr_f64_z(pg, op1, svdup_n_f64(op2))
+}
+#[doc = "Subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubr[_s8]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(subr))]
+pub fn svsubr_s8_m(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.subr.nxv16i8")]
+        fn _svsubr_s8_m(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t;
+    }
+    unsafe { _svsubr_s8_m(pg, op1, op2) }
+}
+#[doc = "Subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubr[_n_s8]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(subr))]
+pub fn svsubr_n_s8_m(pg: svbool_t, op1: svint8_t, op2: i8) -> svint8_t {
+    svsubr_s8_m(pg, op1, svdup_n_s8(op2))
+}
+#[doc = "Subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubr[_s8]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(subr))]
+pub fn svsubr_s8_x(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t {
+    svsubr_s8_m(pg, op1, op2)
+}
+#[doc = "Subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubr[_n_s8]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(subr))]
+pub fn svsubr_n_s8_x(pg: svbool_t, op1: svint8_t, op2: i8) -> svint8_t {
+    svsubr_s8_x(pg, op1, svdup_n_s8(op2))
+}
+#[doc = "Subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubr[_s8]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(subr))]
+pub fn svsubr_s8_z(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t {
+    svsubr_s8_m(pg, svsel_s8(pg, op1, svdup_n_s8(0)), op2)
+}
+#[doc = "Subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubr[_n_s8]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(subr))]
+pub fn svsubr_n_s8_z(pg: svbool_t, op1: svint8_t, op2: i8) -> svint8_t {
+    svsubr_s8_z(pg, op1, svdup_n_s8(op2))
+}
+#[doc = "Subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubr[_s16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(subr))]
+pub fn svsubr_s16_m(pg: svbool_t, op1: svint16_t, op2: svint16_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.subr.nxv8i16")]
+        fn _svsubr_s16_m(pg: svbool8_t, op1: svint16_t, op2: svint16_t) -> svint16_t;
+    }
+    unsafe { _svsubr_s16_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubr[_n_s16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(subr))]
+pub fn svsubr_n_s16_m(pg: svbool_t, op1: svint16_t, op2: i16) -> svint16_t {
+    svsubr_s16_m(pg, op1, svdup_n_s16(op2))
+}
+#[doc = "Subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubr[_s16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(subr))]
+pub fn svsubr_s16_x(pg: svbool_t, op1: svint16_t, op2: svint16_t) -> svint16_t {
+    svsubr_s16_m(pg, op1, op2)
+}
+#[doc = "Subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubr[_n_s16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(subr))]
+pub fn svsubr_n_s16_x(pg: svbool_t, op1: svint16_t, op2: i16) -> svint16_t {
+    svsubr_s16_x(pg, op1, svdup_n_s16(op2))
+}
+#[doc = "Subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubr[_s16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(subr))]
+pub fn svsubr_s16_z(pg: svbool_t, op1: svint16_t, op2: svint16_t) -> svint16_t {
+    svsubr_s16_m(pg, svsel_s16(pg, op1, svdup_n_s16(0)), op2)
+}
+#[doc = "Subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubr[_n_s16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(subr))]
+pub fn svsubr_n_s16_z(pg: svbool_t, op1: svint16_t, op2: i16) -> svint16_t {
+    svsubr_s16_z(pg, op1, svdup_n_s16(op2))
+}
+#[doc = "Subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubr[_s32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(subr))]
+pub fn svsubr_s32_m(pg: svbool_t, op1: svint32_t, op2: svint32_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.subr.nxv4i32")]
+        fn _svsubr_s32_m(pg: svbool4_t, op1: svint32_t, op2: svint32_t) -> svint32_t;
+    }
+    unsafe { _svsubr_s32_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubr[_n_s32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(subr))]
+pub fn svsubr_n_s32_m(pg: svbool_t, op1: svint32_t, op2: i32) -> svint32_t {
+    svsubr_s32_m(pg, op1, svdup_n_s32(op2))
+}
+#[doc = "Subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubr[_s32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(subr))]
+pub fn svsubr_s32_x(pg: svbool_t, op1: svint32_t, op2: svint32_t) -> svint32_t {
+    svsubr_s32_m(pg, op1, op2)
+}
+#[doc = "Subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubr[_n_s32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(subr))]
+pub fn svsubr_n_s32_x(pg: svbool_t, op1: svint32_t, op2: i32) -> svint32_t {
+    svsubr_s32_x(pg, op1, svdup_n_s32(op2))
+}
+#[doc = "Subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubr[_s32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(subr))]
+pub fn svsubr_s32_z(pg: svbool_t, op1: svint32_t, op2: svint32_t) -> svint32_t {
+    svsubr_s32_m(pg, svsel_s32(pg, op1, svdup_n_s32(0)), op2)
+}
+#[doc = "Subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubr[_n_s32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(subr))]
+pub fn svsubr_n_s32_z(pg: svbool_t, op1: svint32_t, op2: i32) -> svint32_t {
+    svsubr_s32_z(pg, op1, svdup_n_s32(op2))
+}
+#[doc = "Subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubr[_s64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(subr))]
+pub fn svsubr_s64_m(pg: svbool_t, op1: svint64_t, op2: svint64_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.subr.nxv2i64")]
+        fn _svsubr_s64_m(pg: svbool2_t, op1: svint64_t, op2: svint64_t) -> svint64_t;
+    }
+    unsafe { _svsubr_s64_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubr[_n_s64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(subr))]
+pub fn svsubr_n_s64_m(pg: svbool_t, op1: svint64_t, op2: i64) -> svint64_t {
+    svsubr_s64_m(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubr[_s64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(subr))]
+pub fn svsubr_s64_x(pg: svbool_t, op1: svint64_t, op2: svint64_t) -> svint64_t {
+    svsubr_s64_m(pg, op1, op2)
+}
+#[doc = "Subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubr[_n_s64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(subr))]
+pub fn svsubr_n_s64_x(pg: svbool_t, op1: svint64_t, op2: i64) -> svint64_t {
+    svsubr_s64_x(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubr[_s64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(subr))]
+pub fn svsubr_s64_z(pg: svbool_t, op1: svint64_t, op2: svint64_t) -> svint64_t {
+    svsubr_s64_m(pg, svsel_s64(pg, op1, svdup_n_s64(0)), op2)
+}
+#[doc = "Subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubr[_n_s64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(subr))]
+pub fn svsubr_n_s64_z(pg: svbool_t, op1: svint64_t, op2: i64) -> svint64_t {
+    svsubr_s64_z(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubr[_u8]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(subr))]
+pub fn svsubr_u8_m(pg: svbool_t, op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    unsafe { svsubr_s8_m(pg, op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubr[_n_u8]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(subr))]
+pub fn svsubr_n_u8_m(pg: svbool_t, op1: svuint8_t, op2: u8) -> svuint8_t {
+    svsubr_u8_m(pg, op1, svdup_n_u8(op2))
+}
+#[doc = "Subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubr[_u8]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(subr))]
+pub fn svsubr_u8_x(pg: svbool_t, op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    svsubr_u8_m(pg, op1, op2)
+}
+#[doc = "Subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubr[_n_u8]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(subr))]
+pub fn svsubr_n_u8_x(pg: svbool_t, op1: svuint8_t, op2: u8) -> svuint8_t {
+    svsubr_u8_x(pg, op1, svdup_n_u8(op2))
+}
+#[doc = "Subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubr[_u8]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(subr))]
+pub fn svsubr_u8_z(pg: svbool_t, op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    svsubr_u8_m(pg, svsel_u8(pg, op1, svdup_n_u8(0)), op2)
+}
+#[doc = "Subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubr[_n_u8]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(subr))]
+pub fn svsubr_n_u8_z(pg: svbool_t, op1: svuint8_t, op2: u8) -> svuint8_t {
+    svsubr_u8_z(pg, op1, svdup_n_u8(op2))
+}
+#[doc = "Subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubr[_u16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(subr))]
+pub fn svsubr_u16_m(pg: svbool_t, op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    unsafe { svsubr_s16_m(pg, op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubr[_n_u16]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(subr))]
+pub fn svsubr_n_u16_m(pg: svbool_t, op1: svuint16_t, op2: u16) -> svuint16_t {
+    svsubr_u16_m(pg, op1, svdup_n_u16(op2))
+}
+#[doc = "Subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubr[_u16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(subr))]
+pub fn svsubr_u16_x(pg: svbool_t, op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    svsubr_u16_m(pg, op1, op2)
+}
+#[doc = "Subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubr[_n_u16]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(subr))]
+pub fn svsubr_n_u16_x(pg: svbool_t, op1: svuint16_t, op2: u16) -> svuint16_t {
+    svsubr_u16_x(pg, op1, svdup_n_u16(op2))
+}
+#[doc = "Subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubr[_u16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(subr))]
+pub fn svsubr_u16_z(pg: svbool_t, op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    svsubr_u16_m(pg, svsel_u16(pg, op1, svdup_n_u16(0)), op2)
+}
+#[doc = "Subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubr[_n_u16]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(subr))]
+pub fn svsubr_n_u16_z(pg: svbool_t, op1: svuint16_t, op2: u16) -> svuint16_t {
+    svsubr_u16_z(pg, op1, svdup_n_u16(op2))
+}
+#[doc = "Subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubr[_u32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(subr))]
+pub fn svsubr_u32_m(pg: svbool_t, op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    unsafe { svsubr_s32_m(pg, op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubr[_n_u32]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(subr))]
+pub fn svsubr_n_u32_m(pg: svbool_t, op1: svuint32_t, op2: u32) -> svuint32_t {
+    svsubr_u32_m(pg, op1, svdup_n_u32(op2))
+}
+#[doc = "Subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubr[_u32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(subr))]
+pub fn svsubr_u32_x(pg: svbool_t, op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    svsubr_u32_m(pg, op1, op2)
+}
+#[doc = "Subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubr[_n_u32]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(subr))]
+pub fn svsubr_n_u32_x(pg: svbool_t, op1: svuint32_t, op2: u32) -> svuint32_t {
+    svsubr_u32_x(pg, op1, svdup_n_u32(op2))
+}
+#[doc = "Subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubr[_u32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(subr))]
+pub fn svsubr_u32_z(pg: svbool_t, op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    svsubr_u32_m(pg, svsel_u32(pg, op1, svdup_n_u32(0)), op2)
+}
+#[doc = "Subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubr[_n_u32]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(subr))]
+pub fn svsubr_n_u32_z(pg: svbool_t, op1: svuint32_t, op2: u32) -> svuint32_t {
+    svsubr_u32_z(pg, op1, svdup_n_u32(op2))
+}
+#[doc = "Subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubr[_u64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(subr))]
+pub fn svsubr_u64_m(pg: svbool_t, op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    unsafe { svsubr_s64_m(pg, op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubr[_n_u64]_m)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(subr))]
+pub fn svsubr_n_u64_m(pg: svbool_t, op1: svuint64_t, op2: u64) -> svuint64_t {
+    svsubr_u64_m(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubr[_u64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(subr))]
+pub fn svsubr_u64_x(pg: svbool_t, op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    svsubr_u64_m(pg, op1, op2)
+}
+#[doc = "Subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubr[_n_u64]_x)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(subr))]
+pub fn svsubr_n_u64_x(pg: svbool_t, op1: svuint64_t, op2: u64) -> svuint64_t {
+    svsubr_u64_x(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubr[_u64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(subr))]
+pub fn svsubr_u64_z(pg: svbool_t, op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    svsubr_u64_m(pg, svsel_u64(pg, op1, svdup_n_u64(0)), op2)
+}
+#[doc = "Subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubr[_n_u64]_z)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(subr))]
+pub fn svsubr_n_u64_z(pg: svbool_t, op1: svuint64_t, op2: u64) -> svuint64_t {
+    svsubr_u64_z(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Dot product (signed × unsigned)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsudot_lane[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,i8mm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sudot, IMM_INDEX = 0))]
+pub fn svsudot_lane_s32<const IMM_INDEX: i32>(
+    op1: svint32_t,
+    op2: svint8_t,
+    op3: svuint8_t,
+) -> svint32_t {
+    static_assert_range!(IMM_INDEX, 0..=3);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sudot.lane.nxv4i32"
+        )]
+        fn _svsudot_lane_s32(
+            op1: svint32_t,
+            op2: svint8_t,
+            op3: svint8_t,
+            imm_index: i32,
+        ) -> svint32_t;
+    }
+    unsafe { _svsudot_lane_s32(op1, op2, op3.as_signed(), IMM_INDEX) }
+}
+#[doc = "Dot product (signed × unsigned)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsudot[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,i8mm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(usdot))]
+pub fn svsudot_s32(op1: svint32_t, op2: svint8_t, op3: svuint8_t) -> svint32_t {
+    svusdot_s32(op1, op3, op2)
+}
+#[doc = "Dot product (signed × unsigned)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsudot[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve,i8mm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(usdot))]
+pub fn svsudot_n_s32(op1: svint32_t, op2: svint8_t, op3: u8) -> svint32_t {
+    svsudot_s32(op1, op2, svdup_n_u8(op3))
+}
+#[doc = "Table lookup in single-vector table"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtbl[_f32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(tbl))]
+pub fn svtbl_f32(data: svfloat32_t, indices: svuint32_t) -> svfloat32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.tbl.nxv4f32")]
+        fn _svtbl_f32(data: svfloat32_t, indices: svint32_t) -> svfloat32_t;
+    }
+    unsafe { _svtbl_f32(data, indices.as_signed()) }
+}
+#[doc = "Table lookup in single-vector table"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtbl[_f64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(tbl))]
+pub fn svtbl_f64(data: svfloat64_t, indices: svuint64_t) -> svfloat64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.tbl.nxv2f64")]
+        fn _svtbl_f64(data: svfloat64_t, indices: svint64_t) -> svfloat64_t;
+    }
+    unsafe { _svtbl_f64(data, indices.as_signed()) }
+}
+#[doc = "Table lookup in single-vector table"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtbl[_s8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(tbl))]
+pub fn svtbl_s8(data: svint8_t, indices: svuint8_t) -> svint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.tbl.nxv16i8")]
+        fn _svtbl_s8(data: svint8_t, indices: svint8_t) -> svint8_t;
+    }
+    unsafe { _svtbl_s8(data, indices.as_signed()) }
+}
+#[doc = "Table lookup in single-vector table"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtbl[_s16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(tbl))]
+pub fn svtbl_s16(data: svint16_t, indices: svuint16_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.tbl.nxv8i16")]
+        fn _svtbl_s16(data: svint16_t, indices: svint16_t) -> svint16_t;
+    }
+    unsafe { _svtbl_s16(data, indices.as_signed()) }
+}
+#[doc = "Table lookup in single-vector table"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtbl[_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(tbl))]
+pub fn svtbl_s32(data: svint32_t, indices: svuint32_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.tbl.nxv4i32")]
+        fn _svtbl_s32(data: svint32_t, indices: svint32_t) -> svint32_t;
+    }
+    unsafe { _svtbl_s32(data, indices.as_signed()) }
+}
+#[doc = "Table lookup in single-vector table"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtbl[_s64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(tbl))]
+pub fn svtbl_s64(data: svint64_t, indices: svuint64_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.tbl.nxv2i64")]
+        fn _svtbl_s64(data: svint64_t, indices: svint64_t) -> svint64_t;
+    }
+    unsafe { _svtbl_s64(data, indices.as_signed()) }
+}
+#[doc = "Table lookup in single-vector table"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtbl[_u8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(tbl))]
+pub fn svtbl_u8(data: svuint8_t, indices: svuint8_t) -> svuint8_t {
+    unsafe { svtbl_s8(data.as_signed(), indices).as_unsigned() }
+}
+#[doc = "Table lookup in single-vector table"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtbl[_u16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(tbl))]
+pub fn svtbl_u16(data: svuint16_t, indices: svuint16_t) -> svuint16_t {
+    unsafe { svtbl_s16(data.as_signed(), indices).as_unsigned() }
+}
+#[doc = "Table lookup in single-vector table"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtbl[_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(tbl))]
+pub fn svtbl_u32(data: svuint32_t, indices: svuint32_t) -> svuint32_t {
+    unsafe { svtbl_s32(data.as_signed(), indices).as_unsigned() }
+}
+#[doc = "Table lookup in single-vector table"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtbl[_u64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(tbl))]
+pub fn svtbl_u64(data: svuint64_t, indices: svuint64_t) -> svuint64_t {
+    unsafe { svtbl_s64(data.as_signed(), indices).as_unsigned() }
+}
+#[doc = "Trigonometric multiply-add coefficient"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtmad[_f32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ftmad, IMM3 = 0))]
+pub fn svtmad_f32<const IMM3: i32>(op1: svfloat32_t, op2: svfloat32_t) -> svfloat32_t {
+    static_assert_range!(IMM3, 0..=7);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ftmad.x.nxv4f32"
+        )]
+        fn _svtmad_f32(op1: svfloat32_t, op2: svfloat32_t, imm3: i32) -> svfloat32_t;
+    }
+    unsafe { _svtmad_f32(op1, op2, IMM3) }
+}
+#[doc = "Trigonometric multiply-add coefficient"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtmad[_f64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ftmad, IMM3 = 0))]
+pub fn svtmad_f64<const IMM3: i32>(op1: svfloat64_t, op2: svfloat64_t) -> svfloat64_t {
+    static_assert_range!(IMM3, 0..=7);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ftmad.x.nxv2f64"
+        )]
+        fn _svtmad_f64(op1: svfloat64_t, op2: svfloat64_t, imm3: i32) -> svfloat64_t;
+    }
+    unsafe { _svtmad_f64(op1, op2, IMM3) }
+}
+#[doc = "Interleave even elements from two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtrn1_b8)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(trn1))]
+pub fn svtrn1_b8(op1: svbool_t, op2: svbool_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.trn1.nxv16i1")]
+        fn _svtrn1_b8(op1: svbool_t, op2: svbool_t) -> svbool_t;
+    }
+    unsafe { _svtrn1_b8(op1, op2) }
+}
+#[doc = "Interleave even elements from two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtrn1_b16)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(trn1))]
+pub fn svtrn1_b16(op1: svbool_t, op2: svbool_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.trn1.nxv8i1")]
+        fn _svtrn1_b16(op1: svbool8_t, op2: svbool8_t) -> svbool8_t;
+    }
+    unsafe { _svtrn1_b16(op1.sve_into(), op2.sve_into()).sve_into() }
+}
+#[doc = "Interleave even elements from two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtrn1_b32)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(trn1))]
+pub fn svtrn1_b32(op1: svbool_t, op2: svbool_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.trn1.nxv4i1")]
+        fn _svtrn1_b32(op1: svbool4_t, op2: svbool4_t) -> svbool4_t;
+    }
+    unsafe { _svtrn1_b32(op1.sve_into(), op2.sve_into()).sve_into() }
+}
+#[doc = "Interleave even elements from two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtrn1_b64)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(trn1))]
+pub fn svtrn1_b64(op1: svbool_t, op2: svbool_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.trn1.nxv2i1")]
+        fn _svtrn1_b64(op1: svbool2_t, op2: svbool2_t) -> svbool2_t;
+    }
+    unsafe { _svtrn1_b64(op1.sve_into(), op2.sve_into()).sve_into() }
+}
+#[doc = "Interleave even elements from two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtrn1[_f32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(trn1))]
+pub fn svtrn1_f32(op1: svfloat32_t, op2: svfloat32_t) -> svfloat32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.trn1.nxv4f32")]
+        fn _svtrn1_f32(op1: svfloat32_t, op2: svfloat32_t) -> svfloat32_t;
+    }
+    unsafe { _svtrn1_f32(op1, op2) }
+}
+#[doc = "Interleave even elements from two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtrn1[_f64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(trn1))]
+pub fn svtrn1_f64(op1: svfloat64_t, op2: svfloat64_t) -> svfloat64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.trn1.nxv2f64")]
+        fn _svtrn1_f64(op1: svfloat64_t, op2: svfloat64_t) -> svfloat64_t;
+    }
+    unsafe { _svtrn1_f64(op1, op2) }
+}
+#[doc = "Interleave even elements from two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtrn1[_s8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(trn1))]
+pub fn svtrn1_s8(op1: svint8_t, op2: svint8_t) -> svint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.trn1.nxv16i8")]
+        fn _svtrn1_s8(op1: svint8_t, op2: svint8_t) -> svint8_t;
+    }
+    unsafe { _svtrn1_s8(op1, op2) }
+}
+#[doc = "Interleave even elements from two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtrn1[_s16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(trn1))]
+pub fn svtrn1_s16(op1: svint16_t, op2: svint16_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.trn1.nxv8i16")]
+        fn _svtrn1_s16(op1: svint16_t, op2: svint16_t) -> svint16_t;
+    }
+    unsafe { _svtrn1_s16(op1, op2) }
+}
+#[doc = "Interleave even elements from two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtrn1[_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(trn1))]
+pub fn svtrn1_s32(op1: svint32_t, op2: svint32_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.trn1.nxv4i32")]
+        fn _svtrn1_s32(op1: svint32_t, op2: svint32_t) -> svint32_t;
+    }
+    unsafe { _svtrn1_s32(op1, op2) }
+}
+#[doc = "Interleave even elements from two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtrn1[_s64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(trn1))]
+pub fn svtrn1_s64(op1: svint64_t, op2: svint64_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.trn1.nxv2i64")]
+        fn _svtrn1_s64(op1: svint64_t, op2: svint64_t) -> svint64_t;
+    }
+    unsafe { _svtrn1_s64(op1, op2) }
+}
+#[doc = "Interleave even elements from two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtrn1[_u8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(trn1))]
+pub fn svtrn1_u8(op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    unsafe { svtrn1_s8(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Interleave even elements from two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtrn1[_u16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(trn1))]
+pub fn svtrn1_u16(op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    unsafe { svtrn1_s16(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Interleave even elements from two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtrn1[_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(trn1))]
+pub fn svtrn1_u32(op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    unsafe { svtrn1_s32(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Interleave even elements from two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtrn1[_u64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(trn1))]
+pub fn svtrn1_u64(op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    unsafe { svtrn1_s64(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Interleave even quadwords from two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtrn1q[_f32])"]
+#[inline]
+#[target_feature(enable = "sve,f64mm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(trn1))]
+pub fn svtrn1q_f32(op1: svfloat32_t, op2: svfloat32_t) -> svfloat32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.trn1q.nxv4f32")]
+        fn _svtrn1q_f32(op1: svfloat32_t, op2: svfloat32_t) -> svfloat32_t;
+    }
+    unsafe { _svtrn1q_f32(op1, op2) }
+}
+#[doc = "Interleave even quadwords from two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtrn1q[_f64])"]
+#[inline]
+#[target_feature(enable = "sve,f64mm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(trn1))]
+pub fn svtrn1q_f64(op1: svfloat64_t, op2: svfloat64_t) -> svfloat64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.trn1q.nxv2f64")]
+        fn _svtrn1q_f64(op1: svfloat64_t, op2: svfloat64_t) -> svfloat64_t;
+    }
+    unsafe { _svtrn1q_f64(op1, op2) }
+}
+#[doc = "Interleave even quadwords from two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtrn1q[_s8])"]
+#[inline]
+#[target_feature(enable = "sve,f64mm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(trn1))]
+pub fn svtrn1q_s8(op1: svint8_t, op2: svint8_t) -> svint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.trn1q.nxv16i8")]
+        fn _svtrn1q_s8(op1: svint8_t, op2: svint8_t) -> svint8_t;
+    }
+    unsafe { _svtrn1q_s8(op1, op2) }
+}
+#[doc = "Interleave even quadwords from two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtrn1q[_s16])"]
+#[inline]
+#[target_feature(enable = "sve,f64mm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(trn1))]
+pub fn svtrn1q_s16(op1: svint16_t, op2: svint16_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.trn1q.nxv8i16")]
+        fn _svtrn1q_s16(op1: svint16_t, op2: svint16_t) -> svint16_t;
+    }
+    unsafe { _svtrn1q_s16(op1, op2) }
+}
+#[doc = "Interleave even quadwords from two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtrn1q[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,f64mm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(trn1))]
+pub fn svtrn1q_s32(op1: svint32_t, op2: svint32_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.trn1q.nxv4i32")]
+        fn _svtrn1q_s32(op1: svint32_t, op2: svint32_t) -> svint32_t;
+    }
+    unsafe { _svtrn1q_s32(op1, op2) }
+}
+#[doc = "Interleave even quadwords from two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtrn1q[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,f64mm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(trn1))]
+pub fn svtrn1q_s64(op1: svint64_t, op2: svint64_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.trn1q.nxv2i64")]
+        fn _svtrn1q_s64(op1: svint64_t, op2: svint64_t) -> svint64_t;
+    }
+    unsafe { _svtrn1q_s64(op1, op2) }
+}
+#[doc = "Interleave even quadwords from two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtrn1q[_u8])"]
+#[inline]
+#[target_feature(enable = "sve,f64mm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(trn1))]
+pub fn svtrn1q_u8(op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    unsafe { svtrn1q_s8(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Interleave even quadwords from two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtrn1q[_u16])"]
+#[inline]
+#[target_feature(enable = "sve,f64mm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(trn1))]
+pub fn svtrn1q_u16(op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    unsafe { svtrn1q_s16(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Interleave even quadwords from two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtrn1q[_u32])"]
+#[inline]
+#[target_feature(enable = "sve,f64mm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(trn1))]
+pub fn svtrn1q_u32(op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    unsafe { svtrn1q_s32(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Interleave even quadwords from two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtrn1q[_u64])"]
+#[inline]
+#[target_feature(enable = "sve,f64mm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(trn1))]
+pub fn svtrn1q_u64(op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    unsafe { svtrn1q_s64(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Interleave odd elements from two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtrn2_b8)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(trn2))]
+pub fn svtrn2_b8(op1: svbool_t, op2: svbool_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.trn2.nxv16i1")]
+        fn _svtrn2_b8(op1: svbool_t, op2: svbool_t) -> svbool_t;
+    }
+    unsafe { _svtrn2_b8(op1, op2) }
+}
+#[doc = "Interleave odd elements from two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtrn2_b16)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(trn2))]
+pub fn svtrn2_b16(op1: svbool_t, op2: svbool_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.trn2.nxv8i1")]
+        fn _svtrn2_b16(op1: svbool8_t, op2: svbool8_t) -> svbool8_t;
+    }
+    unsafe { _svtrn2_b16(op1.sve_into(), op2.sve_into()).sve_into() }
+}
+#[doc = "Interleave odd elements from two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtrn2_b32)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(trn2))]
+pub fn svtrn2_b32(op1: svbool_t, op2: svbool_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.trn2.nxv4i1")]
+        fn _svtrn2_b32(op1: svbool4_t, op2: svbool4_t) -> svbool4_t;
+    }
+    unsafe { _svtrn2_b32(op1.sve_into(), op2.sve_into()).sve_into() }
+}
+#[doc = "Interleave odd elements from two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtrn2_b64)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(trn2))]
+pub fn svtrn2_b64(op1: svbool_t, op2: svbool_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.trn2.nxv2i1")]
+        fn _svtrn2_b64(op1: svbool2_t, op2: svbool2_t) -> svbool2_t;
+    }
+    unsafe { _svtrn2_b64(op1.sve_into(), op2.sve_into()).sve_into() }
+}
+#[doc = "Interleave odd elements from two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtrn2[_f32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(trn2))]
+pub fn svtrn2_f32(op1: svfloat32_t, op2: svfloat32_t) -> svfloat32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.trn2.nxv4f32")]
+        fn _svtrn2_f32(op1: svfloat32_t, op2: svfloat32_t) -> svfloat32_t;
+    }
+    unsafe { _svtrn2_f32(op1, op2) }
+}
+#[doc = "Interleave odd elements from two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtrn2[_f64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(trn2))]
+pub fn svtrn2_f64(op1: svfloat64_t, op2: svfloat64_t) -> svfloat64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.trn2.nxv2f64")]
+        fn _svtrn2_f64(op1: svfloat64_t, op2: svfloat64_t) -> svfloat64_t;
+    }
+    unsafe { _svtrn2_f64(op1, op2) }
+}
+#[doc = "Interleave odd elements from two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtrn2[_s8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(trn2))]
+pub fn svtrn2_s8(op1: svint8_t, op2: svint8_t) -> svint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.trn2.nxv16i8")]
+        fn _svtrn2_s8(op1: svint8_t, op2: svint8_t) -> svint8_t;
+    }
+    unsafe { _svtrn2_s8(op1, op2) }
+}
+#[doc = "Interleave odd elements from two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtrn2[_s16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(trn2))]
+pub fn svtrn2_s16(op1: svint16_t, op2: svint16_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.trn2.nxv8i16")]
+        fn _svtrn2_s16(op1: svint16_t, op2: svint16_t) -> svint16_t;
+    }
+    unsafe { _svtrn2_s16(op1, op2) }
+}
+#[doc = "Interleave odd elements from two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtrn2[_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(trn2))]
+pub fn svtrn2_s32(op1: svint32_t, op2: svint32_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.trn2.nxv4i32")]
+        fn _svtrn2_s32(op1: svint32_t, op2: svint32_t) -> svint32_t;
+    }
+    unsafe { _svtrn2_s32(op1, op2) }
+}
+#[doc = "Interleave odd elements from two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtrn2[_s64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(trn2))]
+pub fn svtrn2_s64(op1: svint64_t, op2: svint64_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.trn2.nxv2i64")]
+        fn _svtrn2_s64(op1: svint64_t, op2: svint64_t) -> svint64_t;
+    }
+    unsafe { _svtrn2_s64(op1, op2) }
+}
+#[doc = "Interleave odd elements from two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtrn2[_u8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(trn2))]
+pub fn svtrn2_u8(op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    unsafe { svtrn2_s8(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Interleave odd elements from two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtrn2[_u16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(trn2))]
+pub fn svtrn2_u16(op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    unsafe { svtrn2_s16(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Interleave odd elements from two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtrn2[_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(trn2))]
+pub fn svtrn2_u32(op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    unsafe { svtrn2_s32(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Interleave odd elements from two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtrn2[_u64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(trn2))]
+pub fn svtrn2_u64(op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    unsafe { svtrn2_s64(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Interleave odd quadwords from two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtrn2q[_f32])"]
+#[inline]
+#[target_feature(enable = "sve,f64mm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(trn2))]
+pub fn svtrn2q_f32(op1: svfloat32_t, op2: svfloat32_t) -> svfloat32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.trn2q.nxv4f32")]
+        fn _svtrn2q_f32(op1: svfloat32_t, op2: svfloat32_t) -> svfloat32_t;
+    }
+    unsafe { _svtrn2q_f32(op1, op2) }
+}
+#[doc = "Interleave odd quadwords from two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtrn2q[_f64])"]
+#[inline]
+#[target_feature(enable = "sve,f64mm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(trn2))]
+pub fn svtrn2q_f64(op1: svfloat64_t, op2: svfloat64_t) -> svfloat64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.trn2q.nxv2f64")]
+        fn _svtrn2q_f64(op1: svfloat64_t, op2: svfloat64_t) -> svfloat64_t;
+    }
+    unsafe { _svtrn2q_f64(op1, op2) }
+}
+#[doc = "Interleave odd quadwords from two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtrn2q[_s8])"]
+#[inline]
+#[target_feature(enable = "sve,f64mm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(trn2))]
+pub fn svtrn2q_s8(op1: svint8_t, op2: svint8_t) -> svint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.trn2q.nxv16i8")]
+        fn _svtrn2q_s8(op1: svint8_t, op2: svint8_t) -> svint8_t;
+    }
+    unsafe { _svtrn2q_s8(op1, op2) }
+}
+#[doc = "Interleave odd quadwords from two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtrn2q[_s16])"]
+#[inline]
+#[target_feature(enable = "sve,f64mm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(trn2))]
+pub fn svtrn2q_s16(op1: svint16_t, op2: svint16_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.trn2q.nxv8i16")]
+        fn _svtrn2q_s16(op1: svint16_t, op2: svint16_t) -> svint16_t;
+    }
+    unsafe { _svtrn2q_s16(op1, op2) }
+}
+#[doc = "Interleave odd quadwords from two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtrn2q[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,f64mm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(trn2))]
+pub fn svtrn2q_s32(op1: svint32_t, op2: svint32_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.trn2q.nxv4i32")]
+        fn _svtrn2q_s32(op1: svint32_t, op2: svint32_t) -> svint32_t;
+    }
+    unsafe { _svtrn2q_s32(op1, op2) }
+}
+#[doc = "Interleave odd quadwords from two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtrn2q[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,f64mm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(trn2))]
+pub fn svtrn2q_s64(op1: svint64_t, op2: svint64_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.trn2q.nxv2i64")]
+        fn _svtrn2q_s64(op1: svint64_t, op2: svint64_t) -> svint64_t;
+    }
+    unsafe { _svtrn2q_s64(op1, op2) }
+}
+#[doc = "Interleave odd quadwords from two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtrn2q[_u8])"]
+#[inline]
+#[target_feature(enable = "sve,f64mm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(trn2))]
+pub fn svtrn2q_u8(op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    unsafe { svtrn2q_s8(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Interleave odd quadwords from two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtrn2q[_u16])"]
+#[inline]
+#[target_feature(enable = "sve,f64mm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(trn2))]
+pub fn svtrn2q_u16(op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    unsafe { svtrn2q_s16(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Interleave odd quadwords from two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtrn2q[_u32])"]
+#[inline]
+#[target_feature(enable = "sve,f64mm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(trn2))]
+pub fn svtrn2q_u32(op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    unsafe { svtrn2q_s32(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Interleave odd quadwords from two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtrn2q[_u64])"]
+#[inline]
+#[target_feature(enable = "sve,f64mm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(trn2))]
+pub fn svtrn2q_u64(op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    unsafe { svtrn2q_s64(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Trigonometric starting value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtsmul[_f32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ftsmul))]
+pub fn svtsmul_f32(op1: svfloat32_t, op2: svuint32_t) -> svfloat32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ftsmul.x.nxv4f32"
+        )]
+        fn _svtsmul_f32(op1: svfloat32_t, op2: svint32_t) -> svfloat32_t;
+    }
+    unsafe { _svtsmul_f32(op1, op2.as_signed()) }
+}
+#[doc = "Trigonometric starting value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtsmul[_f64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ftsmul))]
+pub fn svtsmul_f64(op1: svfloat64_t, op2: svuint64_t) -> svfloat64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ftsmul.x.nxv2f64"
+        )]
+        fn _svtsmul_f64(op1: svfloat64_t, op2: svint64_t) -> svfloat64_t;
+    }
+    unsafe { _svtsmul_f64(op1, op2.as_signed()) }
+}
+#[doc = "Trigonometric select coefficient"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtssel[_f32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ftssel))]
+pub fn svtssel_f32(op1: svfloat32_t, op2: svuint32_t) -> svfloat32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ftssel.x.nxv4f32"
+        )]
+        fn _svtssel_f32(op1: svfloat32_t, op2: svint32_t) -> svfloat32_t;
+    }
+    unsafe { _svtssel_f32(op1, op2.as_signed()) }
+}
+#[doc = "Trigonometric select coefficient"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtssel[_f64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ftssel))]
+pub fn svtssel_f64(op1: svfloat64_t, op2: svuint64_t) -> svfloat64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ftssel.x.nxv2f64"
+        )]
+        fn _svtssel_f64(op1: svfloat64_t, op2: svint64_t) -> svfloat64_t;
+    }
+    unsafe { _svtssel_f64(op1, op2.as_signed()) }
+}
+#[doc = "Create an uninitialized tuple of two vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svundef2_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * This creates an uninitialized value, and may be unsound (like [`core::mem::uninitialized`])."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub unsafe fn svundef2_f32() -> svfloat32x2_t {
+    svcreate2_f32(svdup_n_f32(0f32), svdup_n_f32(0f32))
+}
+#[doc = "Create an uninitialized tuple of two vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svundef2_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * This creates an uninitialized value, and may be unsound (like [`core::mem::uninitialized`])."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub unsafe fn svundef2_f64() -> svfloat64x2_t {
+    svcreate2_f64(svdup_n_f64(0f64), svdup_n_f64(0f64))
+}
+#[doc = "Create an uninitialized tuple of two vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svundef2_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * This creates an uninitialized value, and may be unsound (like [`core::mem::uninitialized`])."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub unsafe fn svundef2_s8() -> svint8x2_t {
+    svcreate2_s8(svdup_n_s8(0), svdup_n_s8(0))
+}
+#[doc = "Create an uninitialized tuple of two vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svundef2_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * This creates an uninitialized value, and may be unsound (like [`core::mem::uninitialized`])."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub unsafe fn svundef2_s16() -> svint16x2_t {
+    svcreate2_s16(svdup_n_s16(0), svdup_n_s16(0))
+}
+#[doc = "Create an uninitialized tuple of two vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svundef2_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * This creates an uninitialized value, and may be unsound (like [`core::mem::uninitialized`])."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub unsafe fn svundef2_s32() -> svint32x2_t {
+    svcreate2_s32(svdup_n_s32(0), svdup_n_s32(0))
+}
+#[doc = "Create an uninitialized tuple of two vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svundef2_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * This creates an uninitialized value, and may be unsound (like [`core::mem::uninitialized`])."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub unsafe fn svundef2_s64() -> svint64x2_t {
+    svcreate2_s64(svdup_n_s64(0), svdup_n_s64(0))
+}
+#[doc = "Create an uninitialized tuple of two vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svundef2_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * This creates an uninitialized value, and may be unsound (like [`core::mem::uninitialized`])."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub unsafe fn svundef2_u8() -> svuint8x2_t {
+    svcreate2_u8(svdup_n_u8(0), svdup_n_u8(0))
+}
+#[doc = "Create an uninitialized tuple of two vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svundef2_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * This creates an uninitialized value, and may be unsound (like [`core::mem::uninitialized`])."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub unsafe fn svundef2_u16() -> svuint16x2_t {
+    svcreate2_u16(svdup_n_u16(0), svdup_n_u16(0))
+}
+#[doc = "Create an uninitialized tuple of two vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svundef2_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * This creates an uninitialized value, and may be unsound (like [`core::mem::uninitialized`])."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub unsafe fn svundef2_u32() -> svuint32x2_t {
+    svcreate2_u32(svdup_n_u32(0), svdup_n_u32(0))
+}
+#[doc = "Create an uninitialized tuple of two vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svundef2_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * This creates an uninitialized value, and may be unsound (like [`core::mem::uninitialized`])."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub unsafe fn svundef2_u64() -> svuint64x2_t {
+    svcreate2_u64(svdup_n_u64(0), svdup_n_u64(0))
+}
+#[doc = "Create an uninitialized tuple of three vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svundef3_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * This creates an uninitialized value, and may be unsound (like [`core::mem::uninitialized`])."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub unsafe fn svundef3_f32() -> svfloat32x3_t {
+    svcreate3_f32(svdup_n_f32(0f32), svdup_n_f32(0f32), svdup_n_f32(0f32))
+}
+#[doc = "Create an uninitialized tuple of three vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svundef3_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * This creates an uninitialized value, and may be unsound (like [`core::mem::uninitialized`])."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub unsafe fn svundef3_f64() -> svfloat64x3_t {
+    svcreate3_f64(svdup_n_f64(0f64), svdup_n_f64(0f64), svdup_n_f64(0f64))
+}
+#[doc = "Create an uninitialized tuple of three vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svundef3_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * This creates an uninitialized value, and may be unsound (like [`core::mem::uninitialized`])."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub unsafe fn svundef3_s8() -> svint8x3_t {
+    svcreate3_s8(svdup_n_s8(0), svdup_n_s8(0), svdup_n_s8(0))
+}
+#[doc = "Create an uninitialized tuple of three vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svundef3_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * This creates an uninitialized value, and may be unsound (like [`core::mem::uninitialized`])."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub unsafe fn svundef3_s16() -> svint16x3_t {
+    svcreate3_s16(svdup_n_s16(0), svdup_n_s16(0), svdup_n_s16(0))
+}
+#[doc = "Create an uninitialized tuple of three vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svundef3_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * This creates an uninitialized value, and may be unsound (like [`core::mem::uninitialized`])."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub unsafe fn svundef3_s32() -> svint32x3_t {
+    svcreate3_s32(svdup_n_s32(0), svdup_n_s32(0), svdup_n_s32(0))
+}
+#[doc = "Create an uninitialized tuple of three vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svundef3_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * This creates an uninitialized value, and may be unsound (like [`core::mem::uninitialized`])."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub unsafe fn svundef3_s64() -> svint64x3_t {
+    svcreate3_s64(svdup_n_s64(0), svdup_n_s64(0), svdup_n_s64(0))
+}
+#[doc = "Create an uninitialized tuple of three vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svundef3_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * This creates an uninitialized value, and may be unsound (like [`core::mem::uninitialized`])."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub unsafe fn svundef3_u8() -> svuint8x3_t {
+    svcreate3_u8(svdup_n_u8(0), svdup_n_u8(0), svdup_n_u8(0))
+}
+#[doc = "Create an uninitialized tuple of three vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svundef3_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * This creates an uninitialized value, and may be unsound (like [`core::mem::uninitialized`])."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub unsafe fn svundef3_u16() -> svuint16x3_t {
+    svcreate3_u16(svdup_n_u16(0), svdup_n_u16(0), svdup_n_u16(0))
+}
+#[doc = "Create an uninitialized tuple of three vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svundef3_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * This creates an uninitialized value, and may be unsound (like [`core::mem::uninitialized`])."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub unsafe fn svundef3_u32() -> svuint32x3_t {
+    svcreate3_u32(svdup_n_u32(0), svdup_n_u32(0), svdup_n_u32(0))
+}
+#[doc = "Create an uninitialized tuple of three vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svundef3_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * This creates an uninitialized value, and may be unsound (like [`core::mem::uninitialized`])."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub unsafe fn svundef3_u64() -> svuint64x3_t {
+    svcreate3_u64(svdup_n_u64(0), svdup_n_u64(0), svdup_n_u64(0))
+}
+#[doc = "Create an uninitialized tuple of four vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svundef4_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * This creates an uninitialized value, and may be unsound (like [`core::mem::uninitialized`])."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub unsafe fn svundef4_f32() -> svfloat32x4_t {
+    svcreate4_f32(
+        svdup_n_f32(0f32),
+        svdup_n_f32(0f32),
+        svdup_n_f32(0f32),
+        svdup_n_f32(0f32),
+    )
+}
+#[doc = "Create an uninitialized tuple of four vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svundef4_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * This creates an uninitialized value, and may be unsound (like [`core::mem::uninitialized`])."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub unsafe fn svundef4_f64() -> svfloat64x4_t {
+    svcreate4_f64(
+        svdup_n_f64(0f64),
+        svdup_n_f64(0f64),
+        svdup_n_f64(0f64),
+        svdup_n_f64(0f64),
+    )
+}
+#[doc = "Create an uninitialized tuple of four vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svundef4_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * This creates an uninitialized value, and may be unsound (like [`core::mem::uninitialized`])."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub unsafe fn svundef4_s8() -> svint8x4_t {
+    svcreate4_s8(svdup_n_s8(0), svdup_n_s8(0), svdup_n_s8(0), svdup_n_s8(0))
+}
+#[doc = "Create an uninitialized tuple of four vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svundef4_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * This creates an uninitialized value, and may be unsound (like [`core::mem::uninitialized`])."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub unsafe fn svundef4_s16() -> svint16x4_t {
+    svcreate4_s16(
+        svdup_n_s16(0),
+        svdup_n_s16(0),
+        svdup_n_s16(0),
+        svdup_n_s16(0),
+    )
+}
+#[doc = "Create an uninitialized tuple of four vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svundef4_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * This creates an uninitialized value, and may be unsound (like [`core::mem::uninitialized`])."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub unsafe fn svundef4_s32() -> svint32x4_t {
+    svcreate4_s32(
+        svdup_n_s32(0),
+        svdup_n_s32(0),
+        svdup_n_s32(0),
+        svdup_n_s32(0),
+    )
+}
+#[doc = "Create an uninitialized tuple of four vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svundef4_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * This creates an uninitialized value, and may be unsound (like [`core::mem::uninitialized`])."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub unsafe fn svundef4_s64() -> svint64x4_t {
+    svcreate4_s64(
+        svdup_n_s64(0),
+        svdup_n_s64(0),
+        svdup_n_s64(0),
+        svdup_n_s64(0),
+    )
+}
+#[doc = "Create an uninitialized tuple of four vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svundef4_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * This creates an uninitialized value, and may be unsound (like [`core::mem::uninitialized`])."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub unsafe fn svundef4_u8() -> svuint8x4_t {
+    svcreate4_u8(svdup_n_u8(0), svdup_n_u8(0), svdup_n_u8(0), svdup_n_u8(0))
+}
+#[doc = "Create an uninitialized tuple of four vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svundef4_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * This creates an uninitialized value, and may be unsound (like [`core::mem::uninitialized`])."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub unsafe fn svundef4_u16() -> svuint16x4_t {
+    svcreate4_u16(
+        svdup_n_u16(0),
+        svdup_n_u16(0),
+        svdup_n_u16(0),
+        svdup_n_u16(0),
+    )
+}
+#[doc = "Create an uninitialized tuple of four vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svundef4_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * This creates an uninitialized value, and may be unsound (like [`core::mem::uninitialized`])."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub unsafe fn svundef4_u32() -> svuint32x4_t {
+    svcreate4_u32(
+        svdup_n_u32(0),
+        svdup_n_u32(0),
+        svdup_n_u32(0),
+        svdup_n_u32(0),
+    )
+}
+#[doc = "Create an uninitialized tuple of four vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svundef4_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * This creates an uninitialized value, and may be unsound (like [`core::mem::uninitialized`])."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub unsafe fn svundef4_u64() -> svuint64x4_t {
+    svcreate4_u64(
+        svdup_n_u64(0),
+        svdup_n_u64(0),
+        svdup_n_u64(0),
+        svdup_n_u64(0),
+    )
+}
+#[doc = "Create an uninitialized vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svundef_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * This creates an uninitialized value, and may be unsound (like [`core::mem::uninitialized`])."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub unsafe fn svundef_f32() -> svfloat32_t {
+    svdup_n_f32(0f32)
+}
+#[doc = "Create an uninitialized vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svundef_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * This creates an uninitialized value, and may be unsound (like [`core::mem::uninitialized`])."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub unsafe fn svundef_f64() -> svfloat64_t {
+    svdup_n_f64(0f64)
+}
+#[doc = "Create an uninitialized vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svundef_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * This creates an uninitialized value, and may be unsound (like [`core::mem::uninitialized`])."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub unsafe fn svundef_s8() -> svint8_t {
+    svdup_n_s8(0)
+}
+#[doc = "Create an uninitialized vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svundef_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * This creates an uninitialized value, and may be unsound (like [`core::mem::uninitialized`])."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub unsafe fn svundef_s16() -> svint16_t {
+    svdup_n_s16(0)
+}
+#[doc = "Create an uninitialized vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svundef_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * This creates an uninitialized value, and may be unsound (like [`core::mem::uninitialized`])."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub unsafe fn svundef_s32() -> svint32_t {
+    svdup_n_s32(0)
+}
+#[doc = "Create an uninitialized vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svundef_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * This creates an uninitialized value, and may be unsound (like [`core::mem::uninitialized`])."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub unsafe fn svundef_s64() -> svint64_t {
+    svdup_n_s64(0)
+}
+#[doc = "Create an uninitialized vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svundef_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * This creates an uninitialized value, and may be unsound (like [`core::mem::uninitialized`])."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub unsafe fn svundef_u8() -> svuint8_t {
+    svdup_n_u8(0)
+}
+#[doc = "Create an uninitialized vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svundef_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * This creates an uninitialized value, and may be unsound (like [`core::mem::uninitialized`])."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub unsafe fn svundef_u16() -> svuint16_t {
+    svdup_n_u16(0)
+}
+#[doc = "Create an uninitialized vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svundef_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * This creates an uninitialized value, and may be unsound (like [`core::mem::uninitialized`])."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub unsafe fn svundef_u32() -> svuint32_t {
+    svdup_n_u32(0)
+}
+#[doc = "Create an uninitialized vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svundef_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * This creates an uninitialized value, and may be unsound (like [`core::mem::uninitialized`])."]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub unsafe fn svundef_u64() -> svuint64_t {
+    svdup_n_u64(0)
+}
+#[doc = "Dot product (unsigned × signed)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svusdot_lane[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,i8mm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(usdot, IMM_INDEX = 0))]
+pub fn svusdot_lane_s32<const IMM_INDEX: i32>(
+    op1: svint32_t,
+    op2: svuint8_t,
+    op3: svint8_t,
+) -> svint32_t {
+    static_assert_range!(IMM_INDEX, 0..=3);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.usdot.lane.nxv4i32"
+        )]
+        fn _svusdot_lane_s32(
+            op1: svint32_t,
+            op2: svint8_t,
+            op3: svint8_t,
+            imm_index: i32,
+        ) -> svint32_t;
+    }
+    unsafe { _svusdot_lane_s32(op1, op2.as_signed(), op3, IMM_INDEX) }
+}
+#[doc = "Dot product (unsigned × signed)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svusdot[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,i8mm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(usdot))]
+pub fn svusdot_s32(op1: svint32_t, op2: svuint8_t, op3: svint8_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.usdot.nxv4i32")]
+        fn _svusdot_s32(op1: svint32_t, op2: svint8_t, op3: svint8_t) -> svint32_t;
+    }
+    unsafe { _svusdot_s32(op1, op2.as_signed(), op3) }
+}
+#[doc = "Dot product (unsigned × signed)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svusdot[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve,i8mm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(usdot))]
+pub fn svusdot_n_s32(op1: svint32_t, op2: svuint8_t, op3: i8) -> svint32_t {
+    svusdot_s32(op1, op2, svdup_n_s8(op3))
+}
+#[doc = "Matrix multiply-accumulate (unsigned × signed)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svusmmla[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,i8mm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(usmmla))]
+pub fn svusmmla_s32(op1: svint32_t, op2: svuint8_t, op3: svint8_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.usmmla.nxv4i32")]
+        fn _svusmmla_s32(op1: svint32_t, op2: svint8_t, op3: svint8_t) -> svint32_t;
+    }
+    unsafe { _svusmmla_s32(op1, op2.as_signed(), op3) }
+}
+#[doc = "Concatenate even elements from two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuzp1_b8)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uzp1))]
+pub fn svuzp1_b8(op1: svbool_t, op2: svbool_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uzp1.nxv16i1")]
+        fn _svuzp1_b8(op1: svbool_t, op2: svbool_t) -> svbool_t;
+    }
+    unsafe { _svuzp1_b8(op1, op2) }
+}
+#[doc = "Concatenate even elements from two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuzp1_b16)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uzp1))]
+pub fn svuzp1_b16(op1: svbool_t, op2: svbool_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uzp1.nxv8i1")]
+        fn _svuzp1_b16(op1: svbool8_t, op2: svbool8_t) -> svbool8_t;
+    }
+    unsafe { _svuzp1_b16(op1.sve_into(), op2.sve_into()).sve_into() }
+}
+#[doc = "Concatenate even elements from two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuzp1_b32)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uzp1))]
+pub fn svuzp1_b32(op1: svbool_t, op2: svbool_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uzp1.nxv4i1")]
+        fn _svuzp1_b32(op1: svbool4_t, op2: svbool4_t) -> svbool4_t;
+    }
+    unsafe { _svuzp1_b32(op1.sve_into(), op2.sve_into()).sve_into() }
+}
+#[doc = "Concatenate even elements from two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuzp1_b64)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uzp1))]
+pub fn svuzp1_b64(op1: svbool_t, op2: svbool_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uzp1.nxv2i1")]
+        fn _svuzp1_b64(op1: svbool2_t, op2: svbool2_t) -> svbool2_t;
+    }
+    unsafe { _svuzp1_b64(op1.sve_into(), op2.sve_into()).sve_into() }
+}
+#[doc = "Concatenate even elements from two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuzp1[_f32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uzp1))]
+pub fn svuzp1_f32(op1: svfloat32_t, op2: svfloat32_t) -> svfloat32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uzp1.nxv4f32")]
+        fn _svuzp1_f32(op1: svfloat32_t, op2: svfloat32_t) -> svfloat32_t;
+    }
+    unsafe { _svuzp1_f32(op1, op2) }
+}
+#[doc = "Concatenate even elements from two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuzp1[_f64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uzp1))]
+pub fn svuzp1_f64(op1: svfloat64_t, op2: svfloat64_t) -> svfloat64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uzp1.nxv2f64")]
+        fn _svuzp1_f64(op1: svfloat64_t, op2: svfloat64_t) -> svfloat64_t;
+    }
+    unsafe { _svuzp1_f64(op1, op2) }
+}
+#[doc = "Concatenate even elements from two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuzp1[_s8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uzp1))]
+pub fn svuzp1_s8(op1: svint8_t, op2: svint8_t) -> svint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uzp1.nxv16i8")]
+        fn _svuzp1_s8(op1: svint8_t, op2: svint8_t) -> svint8_t;
+    }
+    unsafe { _svuzp1_s8(op1, op2) }
+}
+#[doc = "Concatenate even elements from two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuzp1[_s16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uzp1))]
+pub fn svuzp1_s16(op1: svint16_t, op2: svint16_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uzp1.nxv8i16")]
+        fn _svuzp1_s16(op1: svint16_t, op2: svint16_t) -> svint16_t;
+    }
+    unsafe { _svuzp1_s16(op1, op2) }
+}
+#[doc = "Concatenate even elements from two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuzp1[_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uzp1))]
+pub fn svuzp1_s32(op1: svint32_t, op2: svint32_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uzp1.nxv4i32")]
+        fn _svuzp1_s32(op1: svint32_t, op2: svint32_t) -> svint32_t;
+    }
+    unsafe { _svuzp1_s32(op1, op2) }
+}
+#[doc = "Concatenate even elements from two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuzp1[_s64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uzp1))]
+pub fn svuzp1_s64(op1: svint64_t, op2: svint64_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uzp1.nxv2i64")]
+        fn _svuzp1_s64(op1: svint64_t, op2: svint64_t) -> svint64_t;
+    }
+    unsafe { _svuzp1_s64(op1, op2) }
+}
+#[doc = "Concatenate even elements from two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuzp1[_u8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uzp1))]
+pub fn svuzp1_u8(op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    unsafe { svuzp1_s8(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Concatenate even elements from two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuzp1[_u16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uzp1))]
+pub fn svuzp1_u16(op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    unsafe { svuzp1_s16(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Concatenate even elements from two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuzp1[_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uzp1))]
+pub fn svuzp1_u32(op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    unsafe { svuzp1_s32(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Concatenate even elements from two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuzp1[_u64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uzp1))]
+pub fn svuzp1_u64(op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    unsafe { svuzp1_s64(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Concatenate even quadwords from two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuzp1q[_f32])"]
+#[inline]
+#[target_feature(enable = "sve,f64mm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uzp1))]
+pub fn svuzp1q_f32(op1: svfloat32_t, op2: svfloat32_t) -> svfloat32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uzp1q.nxv4f32")]
+        fn _svuzp1q_f32(op1: svfloat32_t, op2: svfloat32_t) -> svfloat32_t;
+    }
+    unsafe { _svuzp1q_f32(op1, op2) }
+}
+#[doc = "Concatenate even quadwords from two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuzp1q[_f64])"]
+#[inline]
+#[target_feature(enable = "sve,f64mm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uzp1))]
+pub fn svuzp1q_f64(op1: svfloat64_t, op2: svfloat64_t) -> svfloat64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uzp1q.nxv2f64")]
+        fn _svuzp1q_f64(op1: svfloat64_t, op2: svfloat64_t) -> svfloat64_t;
+    }
+    unsafe { _svuzp1q_f64(op1, op2) }
+}
+#[doc = "Concatenate even quadwords from two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuzp1q[_s8])"]
+#[inline]
+#[target_feature(enable = "sve,f64mm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uzp1))]
+pub fn svuzp1q_s8(op1: svint8_t, op2: svint8_t) -> svint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uzp1q.nxv16i8")]
+        fn _svuzp1q_s8(op1: svint8_t, op2: svint8_t) -> svint8_t;
+    }
+    unsafe { _svuzp1q_s8(op1, op2) }
+}
+#[doc = "Concatenate even quadwords from two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuzp1q[_s16])"]
+#[inline]
+#[target_feature(enable = "sve,f64mm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uzp1))]
+pub fn svuzp1q_s16(op1: svint16_t, op2: svint16_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uzp1q.nxv8i16")]
+        fn _svuzp1q_s16(op1: svint16_t, op2: svint16_t) -> svint16_t;
+    }
+    unsafe { _svuzp1q_s16(op1, op2) }
+}
+#[doc = "Concatenate even quadwords from two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuzp1q[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,f64mm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uzp1))]
+pub fn svuzp1q_s32(op1: svint32_t, op2: svint32_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uzp1q.nxv4i32")]
+        fn _svuzp1q_s32(op1: svint32_t, op2: svint32_t) -> svint32_t;
+    }
+    unsafe { _svuzp1q_s32(op1, op2) }
+}
+#[doc = "Concatenate even quadwords from two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuzp1q[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,f64mm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uzp1))]
+pub fn svuzp1q_s64(op1: svint64_t, op2: svint64_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uzp1q.nxv2i64")]
+        fn _svuzp1q_s64(op1: svint64_t, op2: svint64_t) -> svint64_t;
+    }
+    unsafe { _svuzp1q_s64(op1, op2) }
+}
+#[doc = "Concatenate even quadwords from two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuzp1q[_u8])"]
+#[inline]
+#[target_feature(enable = "sve,f64mm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uzp1))]
+pub fn svuzp1q_u8(op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    unsafe { svuzp1q_s8(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Concatenate even quadwords from two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuzp1q[_u16])"]
+#[inline]
+#[target_feature(enable = "sve,f64mm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uzp1))]
+pub fn svuzp1q_u16(op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    unsafe { svuzp1q_s16(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Concatenate even quadwords from two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuzp1q[_u32])"]
+#[inline]
+#[target_feature(enable = "sve,f64mm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uzp1))]
+pub fn svuzp1q_u32(op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    unsafe { svuzp1q_s32(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Concatenate even quadwords from two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuzp1q[_u64])"]
+#[inline]
+#[target_feature(enable = "sve,f64mm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uzp1))]
+pub fn svuzp1q_u64(op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    unsafe { svuzp1q_s64(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Concatenate odd elements from two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuzp2_b8)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uzp2))]
+pub fn svuzp2_b8(op1: svbool_t, op2: svbool_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uzp2.nxv16i1")]
+        fn _svuzp2_b8(op1: svbool_t, op2: svbool_t) -> svbool_t;
+    }
+    unsafe { _svuzp2_b8(op1, op2) }
+}
+#[doc = "Concatenate odd elements from two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuzp2_b16)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uzp2))]
+pub fn svuzp2_b16(op1: svbool_t, op2: svbool_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uzp2.nxv8i1")]
+        fn _svuzp2_b16(op1: svbool8_t, op2: svbool8_t) -> svbool8_t;
+    }
+    unsafe { _svuzp2_b16(op1.sve_into(), op2.sve_into()).sve_into() }
+}
+#[doc = "Concatenate odd elements from two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuzp2_b32)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uzp2))]
+pub fn svuzp2_b32(op1: svbool_t, op2: svbool_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uzp2.nxv4i1")]
+        fn _svuzp2_b32(op1: svbool4_t, op2: svbool4_t) -> svbool4_t;
+    }
+    unsafe { _svuzp2_b32(op1.sve_into(), op2.sve_into()).sve_into() }
+}
+#[doc = "Concatenate odd elements from two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuzp2_b64)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uzp2))]
+pub fn svuzp2_b64(op1: svbool_t, op2: svbool_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uzp2.nxv2i1")]
+        fn _svuzp2_b64(op1: svbool2_t, op2: svbool2_t) -> svbool2_t;
+    }
+    unsafe { _svuzp2_b64(op1.sve_into(), op2.sve_into()).sve_into() }
+}
+#[doc = "Concatenate odd elements from two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuzp2[_f32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uzp2))]
+pub fn svuzp2_f32(op1: svfloat32_t, op2: svfloat32_t) -> svfloat32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uzp2.nxv4f32")]
+        fn _svuzp2_f32(op1: svfloat32_t, op2: svfloat32_t) -> svfloat32_t;
+    }
+    unsafe { _svuzp2_f32(op1, op2) }
+}
+#[doc = "Concatenate odd elements from two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuzp2[_f64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uzp2))]
+pub fn svuzp2_f64(op1: svfloat64_t, op2: svfloat64_t) -> svfloat64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uzp2.nxv2f64")]
+        fn _svuzp2_f64(op1: svfloat64_t, op2: svfloat64_t) -> svfloat64_t;
+    }
+    unsafe { _svuzp2_f64(op1, op2) }
+}
+#[doc = "Concatenate odd elements from two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuzp2[_s8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uzp2))]
+pub fn svuzp2_s8(op1: svint8_t, op2: svint8_t) -> svint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uzp2.nxv16i8")]
+        fn _svuzp2_s8(op1: svint8_t, op2: svint8_t) -> svint8_t;
+    }
+    unsafe { _svuzp2_s8(op1, op2) }
+}
+#[doc = "Concatenate odd elements from two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuzp2[_s16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uzp2))]
+pub fn svuzp2_s16(op1: svint16_t, op2: svint16_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uzp2.nxv8i16")]
+        fn _svuzp2_s16(op1: svint16_t, op2: svint16_t) -> svint16_t;
+    }
+    unsafe { _svuzp2_s16(op1, op2) }
+}
+#[doc = "Concatenate odd elements from two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuzp2[_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uzp2))]
+pub fn svuzp2_s32(op1: svint32_t, op2: svint32_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uzp2.nxv4i32")]
+        fn _svuzp2_s32(op1: svint32_t, op2: svint32_t) -> svint32_t;
+    }
+    unsafe { _svuzp2_s32(op1, op2) }
+}
+#[doc = "Concatenate odd elements from two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuzp2[_s64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uzp2))]
+pub fn svuzp2_s64(op1: svint64_t, op2: svint64_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uzp2.nxv2i64")]
+        fn _svuzp2_s64(op1: svint64_t, op2: svint64_t) -> svint64_t;
+    }
+    unsafe { _svuzp2_s64(op1, op2) }
+}
+#[doc = "Concatenate odd elements from two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuzp2[_u8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uzp2))]
+pub fn svuzp2_u8(op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    unsafe { svuzp2_s8(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Concatenate odd elements from two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuzp2[_u16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uzp2))]
+pub fn svuzp2_u16(op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    unsafe { svuzp2_s16(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Concatenate odd elements from two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuzp2[_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uzp2))]
+pub fn svuzp2_u32(op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    unsafe { svuzp2_s32(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Concatenate odd elements from two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuzp2[_u64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uzp2))]
+pub fn svuzp2_u64(op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    unsafe { svuzp2_s64(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Concatenate odd quadwords from two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuzp2q[_f32])"]
+#[inline]
+#[target_feature(enable = "sve,f64mm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uzp2))]
+pub fn svuzp2q_f32(op1: svfloat32_t, op2: svfloat32_t) -> svfloat32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uzp2q.nxv4f32")]
+        fn _svuzp2q_f32(op1: svfloat32_t, op2: svfloat32_t) -> svfloat32_t;
+    }
+    unsafe { _svuzp2q_f32(op1, op2) }
+}
+#[doc = "Concatenate odd quadwords from two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuzp2q[_f64])"]
+#[inline]
+#[target_feature(enable = "sve,f64mm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uzp2))]
+pub fn svuzp2q_f64(op1: svfloat64_t, op2: svfloat64_t) -> svfloat64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uzp2q.nxv2f64")]
+        fn _svuzp2q_f64(op1: svfloat64_t, op2: svfloat64_t) -> svfloat64_t;
+    }
+    unsafe { _svuzp2q_f64(op1, op2) }
+}
+#[doc = "Concatenate odd quadwords from two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuzp2q[_s8])"]
+#[inline]
+#[target_feature(enable = "sve,f64mm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uzp2))]
+pub fn svuzp2q_s8(op1: svint8_t, op2: svint8_t) -> svint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uzp2q.nxv16i8")]
+        fn _svuzp2q_s8(op1: svint8_t, op2: svint8_t) -> svint8_t;
+    }
+    unsafe { _svuzp2q_s8(op1, op2) }
+}
+#[doc = "Concatenate odd quadwords from two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuzp2q[_s16])"]
+#[inline]
+#[target_feature(enable = "sve,f64mm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uzp2))]
+pub fn svuzp2q_s16(op1: svint16_t, op2: svint16_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uzp2q.nxv8i16")]
+        fn _svuzp2q_s16(op1: svint16_t, op2: svint16_t) -> svint16_t;
+    }
+    unsafe { _svuzp2q_s16(op1, op2) }
+}
+#[doc = "Concatenate odd quadwords from two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuzp2q[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,f64mm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uzp2))]
+pub fn svuzp2q_s32(op1: svint32_t, op2: svint32_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uzp2q.nxv4i32")]
+        fn _svuzp2q_s32(op1: svint32_t, op2: svint32_t) -> svint32_t;
+    }
+    unsafe { _svuzp2q_s32(op1, op2) }
+}
+#[doc = "Concatenate odd quadwords from two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuzp2q[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,f64mm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uzp2))]
+pub fn svuzp2q_s64(op1: svint64_t, op2: svint64_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uzp2q.nxv2i64")]
+        fn _svuzp2q_s64(op1: svint64_t, op2: svint64_t) -> svint64_t;
+    }
+    unsafe { _svuzp2q_s64(op1, op2) }
+}
+#[doc = "Concatenate odd quadwords from two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuzp2q[_u8])"]
+#[inline]
+#[target_feature(enable = "sve,f64mm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uzp2))]
+pub fn svuzp2q_u8(op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    unsafe { svuzp2q_s8(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Concatenate odd quadwords from two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuzp2q[_u16])"]
+#[inline]
+#[target_feature(enable = "sve,f64mm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uzp2))]
+pub fn svuzp2q_u16(op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    unsafe { svuzp2q_s16(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Concatenate odd quadwords from two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuzp2q[_u32])"]
+#[inline]
+#[target_feature(enable = "sve,f64mm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uzp2))]
+pub fn svuzp2q_u32(op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    unsafe { svuzp2q_s32(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Concatenate odd quadwords from two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuzp2q[_u64])"]
+#[inline]
+#[target_feature(enable = "sve,f64mm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uzp2))]
+pub fn svuzp2q_u64(op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    unsafe { svuzp2q_s64(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "While incrementing scalar is less than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svwhilele_b8[_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(whilele))]
+pub fn svwhilele_b8_s32(op1: i32, op2: i32) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.whilele.nxv16i1.i32"
+        )]
+        fn _svwhilele_b8_s32(op1: i32, op2: i32) -> svbool_t;
+    }
+    unsafe { _svwhilele_b8_s32(op1, op2) }
+}
+#[doc = "While incrementing scalar is less than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svwhilele_b16[_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(whilele))]
+pub fn svwhilele_b16_s32(op1: i32, op2: i32) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.whilele.nxv8i1.i32"
+        )]
+        fn _svwhilele_b16_s32(op1: i32, op2: i32) -> svbool8_t;
+    }
+    unsafe { _svwhilele_b16_s32(op1, op2).sve_into() }
+}
+#[doc = "While incrementing scalar is less than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svwhilele_b32[_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(whilele))]
+pub fn svwhilele_b32_s32(op1: i32, op2: i32) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.whilele.nxv4i1.i32"
+        )]
+        fn _svwhilele_b32_s32(op1: i32, op2: i32) -> svbool4_t;
+    }
+    unsafe { _svwhilele_b32_s32(op1, op2).sve_into() }
+}
+#[doc = "While incrementing scalar is less than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svwhilele_b64[_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(whilele))]
+pub fn svwhilele_b64_s32(op1: i32, op2: i32) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.whilele.nxv2i1.i32"
+        )]
+        fn _svwhilele_b64_s32(op1: i32, op2: i32) -> svbool2_t;
+    }
+    unsafe { _svwhilele_b64_s32(op1, op2).sve_into() }
+}
+#[doc = "While incrementing scalar is less than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svwhilele_b8[_s64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(whilele))]
+pub fn svwhilele_b8_s64(op1: i64, op2: i64) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.whilele.nxv16i1.i64"
+        )]
+        fn _svwhilele_b8_s64(op1: i64, op2: i64) -> svbool_t;
+    }
+    unsafe { _svwhilele_b8_s64(op1, op2) }
+}
+#[doc = "While incrementing scalar is less than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svwhilele_b16[_s64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(whilele))]
+pub fn svwhilele_b16_s64(op1: i64, op2: i64) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.whilele.nxv8i1.i64"
+        )]
+        fn _svwhilele_b16_s64(op1: i64, op2: i64) -> svbool8_t;
+    }
+    unsafe { _svwhilele_b16_s64(op1, op2).sve_into() }
+}
+#[doc = "While incrementing scalar is less than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svwhilele_b32[_s64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(whilele))]
+pub fn svwhilele_b32_s64(op1: i64, op2: i64) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.whilele.nxv4i1.i64"
+        )]
+        fn _svwhilele_b32_s64(op1: i64, op2: i64) -> svbool4_t;
+    }
+    unsafe { _svwhilele_b32_s64(op1, op2).sve_into() }
+}
+#[doc = "While incrementing scalar is less than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svwhilele_b64[_s64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(whilele))]
+pub fn svwhilele_b64_s64(op1: i64, op2: i64) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.whilele.nxv2i1.i64"
+        )]
+        fn _svwhilele_b64_s64(op1: i64, op2: i64) -> svbool2_t;
+    }
+    unsafe { _svwhilele_b64_s64(op1, op2).sve_into() }
+}
+#[doc = "While incrementing scalar is less than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svwhilele_b8[_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(whilels))]
+pub fn svwhilele_b8_u32(op1: u32, op2: u32) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.whilels.nxv16i1.i32"
+        )]
+        fn _svwhilele_b8_u32(op1: i32, op2: i32) -> svbool_t;
+    }
+    unsafe { _svwhilele_b8_u32(op1.as_signed(), op2.as_signed()) }
+}
+#[doc = "While incrementing scalar is less than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svwhilele_b16[_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(whilels))]
+pub fn svwhilele_b16_u32(op1: u32, op2: u32) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.whilels.nxv8i1.i32"
+        )]
+        fn _svwhilele_b16_u32(op1: i32, op2: i32) -> svbool8_t;
+    }
+    unsafe { _svwhilele_b16_u32(op1.as_signed(), op2.as_signed()).sve_into() }
+}
+#[doc = "While incrementing scalar is less than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svwhilele_b32[_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(whilels))]
+pub fn svwhilele_b32_u32(op1: u32, op2: u32) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.whilels.nxv4i1.i32"
+        )]
+        fn _svwhilele_b32_u32(op1: i32, op2: i32) -> svbool4_t;
+    }
+    unsafe { _svwhilele_b32_u32(op1.as_signed(), op2.as_signed()).sve_into() }
+}
+#[doc = "While incrementing scalar is less than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svwhilele_b64[_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(whilels))]
+pub fn svwhilele_b64_u32(op1: u32, op2: u32) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.whilels.nxv2i1.i32"
+        )]
+        fn _svwhilele_b64_u32(op1: i32, op2: i32) -> svbool2_t;
+    }
+    unsafe { _svwhilele_b64_u32(op1.as_signed(), op2.as_signed()).sve_into() }
+}
+#[doc = "While incrementing scalar is less than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svwhilele_b8[_u64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(whilels))]
+pub fn svwhilele_b8_u64(op1: u64, op2: u64) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.whilels.nxv16i1.i64"
+        )]
+        fn _svwhilele_b8_u64(op1: i64, op2: i64) -> svbool_t;
+    }
+    unsafe { _svwhilele_b8_u64(op1.as_signed(), op2.as_signed()) }
+}
+#[doc = "While incrementing scalar is less than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svwhilele_b16[_u64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(whilels))]
+pub fn svwhilele_b16_u64(op1: u64, op2: u64) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.whilels.nxv8i1.i64"
+        )]
+        fn _svwhilele_b16_u64(op1: i64, op2: i64) -> svbool8_t;
+    }
+    unsafe { _svwhilele_b16_u64(op1.as_signed(), op2.as_signed()).sve_into() }
+}
+#[doc = "While incrementing scalar is less than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svwhilele_b32[_u64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(whilels))]
+pub fn svwhilele_b32_u64(op1: u64, op2: u64) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.whilels.nxv4i1.i64"
+        )]
+        fn _svwhilele_b32_u64(op1: i64, op2: i64) -> svbool4_t;
+    }
+    unsafe { _svwhilele_b32_u64(op1.as_signed(), op2.as_signed()).sve_into() }
+}
+#[doc = "While incrementing scalar is less than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svwhilele_b64[_u64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(whilels))]
+pub fn svwhilele_b64_u64(op1: u64, op2: u64) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.whilels.nxv2i1.i64"
+        )]
+        fn _svwhilele_b64_u64(op1: i64, op2: i64) -> svbool2_t;
+    }
+    unsafe { _svwhilele_b64_u64(op1.as_signed(), op2.as_signed()).sve_into() }
+}
+#[doc = "While incrementing scalar is less than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svwhilelt_b8[_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(whilelt))]
+pub fn svwhilelt_b8_s32(op1: i32, op2: i32) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.whilelt.nxv16i1.i32"
+        )]
+        fn _svwhilelt_b8_s32(op1: i32, op2: i32) -> svbool_t;
+    }
+    unsafe { _svwhilelt_b8_s32(op1, op2) }
+}
+#[doc = "While incrementing scalar is less than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svwhilelt_b16[_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(whilelt))]
+pub fn svwhilelt_b16_s32(op1: i32, op2: i32) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.whilelt.nxv8i1.i32"
+        )]
+        fn _svwhilelt_b16_s32(op1: i32, op2: i32) -> svbool8_t;
+    }
+    unsafe { _svwhilelt_b16_s32(op1, op2).sve_into() }
+}
+#[doc = "While incrementing scalar is less than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svwhilelt_b32[_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(whilelt))]
+pub fn svwhilelt_b32_s32(op1: i32, op2: i32) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.whilelt.nxv4i1.i32"
+        )]
+        fn _svwhilelt_b32_s32(op1: i32, op2: i32) -> svbool4_t;
+    }
+    unsafe { _svwhilelt_b32_s32(op1, op2).sve_into() }
+}
+#[doc = "While incrementing scalar is less than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svwhilelt_b64[_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(whilelt))]
+pub fn svwhilelt_b64_s32(op1: i32, op2: i32) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.whilelt.nxv2i1.i32"
+        )]
+        fn _svwhilelt_b64_s32(op1: i32, op2: i32) -> svbool2_t;
+    }
+    unsafe { _svwhilelt_b64_s32(op1, op2).sve_into() }
+}
+#[doc = "While incrementing scalar is less than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svwhilelt_b8[_s64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(whilelt))]
+pub fn svwhilelt_b8_s64(op1: i64, op2: i64) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.whilelt.nxv16i1.i64"
+        )]
+        fn _svwhilelt_b8_s64(op1: i64, op2: i64) -> svbool_t;
+    }
+    unsafe { _svwhilelt_b8_s64(op1, op2) }
+}
+#[doc = "While incrementing scalar is less than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svwhilelt_b16[_s64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(whilelt))]
+pub fn svwhilelt_b16_s64(op1: i64, op2: i64) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.whilelt.nxv8i1.i64"
+        )]
+        fn _svwhilelt_b16_s64(op1: i64, op2: i64) -> svbool8_t;
+    }
+    unsafe { _svwhilelt_b16_s64(op1, op2).sve_into() }
+}
+#[doc = "While incrementing scalar is less than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svwhilelt_b32[_s64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(whilelt))]
+pub fn svwhilelt_b32_s64(op1: i64, op2: i64) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.whilelt.nxv4i1.i64"
+        )]
+        fn _svwhilelt_b32_s64(op1: i64, op2: i64) -> svbool4_t;
+    }
+    unsafe { _svwhilelt_b32_s64(op1, op2).sve_into() }
+}
+#[doc = "While incrementing scalar is less than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svwhilelt_b64[_s64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(whilelt))]
+pub fn svwhilelt_b64_s64(op1: i64, op2: i64) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.whilelt.nxv2i1.i64"
+        )]
+        fn _svwhilelt_b64_s64(op1: i64, op2: i64) -> svbool2_t;
+    }
+    unsafe { _svwhilelt_b64_s64(op1, op2).sve_into() }
+}
+#[doc = "While incrementing scalar is less than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svwhilelt_b8[_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(whilelo))]
+pub fn svwhilelt_b8_u32(op1: u32, op2: u32) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.whilelo.nxv16i1.i32"
+        )]
+        fn _svwhilelt_b8_u32(op1: i32, op2: i32) -> svbool_t;
+    }
+    unsafe { _svwhilelt_b8_u32(op1.as_signed(), op2.as_signed()) }
+}
+#[doc = "While incrementing scalar is less than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svwhilelt_b16[_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(whilelo))]
+pub fn svwhilelt_b16_u32(op1: u32, op2: u32) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.whilelo.nxv8i1.i32"
+        )]
+        fn _svwhilelt_b16_u32(op1: i32, op2: i32) -> svbool8_t;
+    }
+    unsafe { _svwhilelt_b16_u32(op1.as_signed(), op2.as_signed()).sve_into() }
+}
+#[doc = "While incrementing scalar is less than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svwhilelt_b32[_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(whilelo))]
+pub fn svwhilelt_b32_u32(op1: u32, op2: u32) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.whilelo.nxv4i1.i32"
+        )]
+        fn _svwhilelt_b32_u32(op1: i32, op2: i32) -> svbool4_t;
+    }
+    unsafe { _svwhilelt_b32_u32(op1.as_signed(), op2.as_signed()).sve_into() }
+}
+#[doc = "While incrementing scalar is less than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svwhilelt_b64[_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(whilelo))]
+pub fn svwhilelt_b64_u32(op1: u32, op2: u32) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.whilelo.nxv2i1.i32"
+        )]
+        fn _svwhilelt_b64_u32(op1: i32, op2: i32) -> svbool2_t;
+    }
+    unsafe { _svwhilelt_b64_u32(op1.as_signed(), op2.as_signed()).sve_into() }
+}
+#[doc = "While incrementing scalar is less than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svwhilelt_b8[_u64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(whilelo))]
+pub fn svwhilelt_b8_u64(op1: u64, op2: u64) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.whilelo.nxv16i1.i64"
+        )]
+        fn _svwhilelt_b8_u64(op1: i64, op2: i64) -> svbool_t;
+    }
+    unsafe { _svwhilelt_b8_u64(op1.as_signed(), op2.as_signed()) }
+}
+#[doc = "While incrementing scalar is less than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svwhilelt_b16[_u64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(whilelo))]
+pub fn svwhilelt_b16_u64(op1: u64, op2: u64) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.whilelo.nxv8i1.i64"
+        )]
+        fn _svwhilelt_b16_u64(op1: i64, op2: i64) -> svbool8_t;
+    }
+    unsafe { _svwhilelt_b16_u64(op1.as_signed(), op2.as_signed()).sve_into() }
+}
+#[doc = "While incrementing scalar is less than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svwhilelt_b32[_u64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(whilelo))]
+pub fn svwhilelt_b32_u64(op1: u64, op2: u64) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.whilelo.nxv4i1.i64"
+        )]
+        fn _svwhilelt_b32_u64(op1: i64, op2: i64) -> svbool4_t;
+    }
+    unsafe { _svwhilelt_b32_u64(op1.as_signed(), op2.as_signed()).sve_into() }
+}
+#[doc = "While incrementing scalar is less than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svwhilelt_b64[_u64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(whilelo))]
+pub fn svwhilelt_b64_u64(op1: u64, op2: u64) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.whilelo.nxv2i1.i64"
+        )]
+        fn _svwhilelt_b64_u64(op1: i64, op2: i64) -> svbool2_t;
+    }
+    unsafe { _svwhilelt_b64_u64(op1.as_signed(), op2.as_signed()).sve_into() }
+}
+#[doc = "Write to the first-fault register"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svwrffr)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(wrffr))]
+pub fn svwrffr(op: svbool_t) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.wrffr")]
+        fn _svwrffr(op: svbool_t);
+    }
+    unsafe { _svwrffr(op) }
+}
+#[doc = "Interleave elements from low halves of two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svzip1_b8)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(zip1))]
+pub fn svzip1_b8(op1: svbool_t, op2: svbool_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.zip1.nxv16i1")]
+        fn _svzip1_b8(op1: svbool_t, op2: svbool_t) -> svbool_t;
+    }
+    unsafe { _svzip1_b8(op1, op2) }
+}
+#[doc = "Interleave elements from low halves of two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svzip1_b16)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(zip1))]
+pub fn svzip1_b16(op1: svbool_t, op2: svbool_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.zip1.nxv8i1")]
+        fn _svzip1_b16(op1: svbool8_t, op2: svbool8_t) -> svbool8_t;
+    }
+    unsafe { _svzip1_b16(op1.sve_into(), op2.sve_into()).sve_into() }
+}
+#[doc = "Interleave elements from low halves of two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svzip1_b32)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(zip1))]
+pub fn svzip1_b32(op1: svbool_t, op2: svbool_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.zip1.nxv4i1")]
+        fn _svzip1_b32(op1: svbool4_t, op2: svbool4_t) -> svbool4_t;
+    }
+    unsafe { _svzip1_b32(op1.sve_into(), op2.sve_into()).sve_into() }
+}
+#[doc = "Interleave elements from low halves of two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svzip1_b64)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(zip1))]
+pub fn svzip1_b64(op1: svbool_t, op2: svbool_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.zip1.nxv2i1")]
+        fn _svzip1_b64(op1: svbool2_t, op2: svbool2_t) -> svbool2_t;
+    }
+    unsafe { _svzip1_b64(op1.sve_into(), op2.sve_into()).sve_into() }
+}
+#[doc = "Interleave elements from low halves of two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svzip1[_f32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(zip1))]
+pub fn svzip1_f32(op1: svfloat32_t, op2: svfloat32_t) -> svfloat32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.zip1.nxv4f32")]
+        fn _svzip1_f32(op1: svfloat32_t, op2: svfloat32_t) -> svfloat32_t;
+    }
+    unsafe { _svzip1_f32(op1, op2) }
+}
+#[doc = "Interleave elements from low halves of two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svzip1[_f64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(zip1))]
+pub fn svzip1_f64(op1: svfloat64_t, op2: svfloat64_t) -> svfloat64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.zip1.nxv2f64")]
+        fn _svzip1_f64(op1: svfloat64_t, op2: svfloat64_t) -> svfloat64_t;
+    }
+    unsafe { _svzip1_f64(op1, op2) }
+}
+#[doc = "Interleave elements from low halves of two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svzip1[_s8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(zip1))]
+pub fn svzip1_s8(op1: svint8_t, op2: svint8_t) -> svint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.zip1.nxv16i8")]
+        fn _svzip1_s8(op1: svint8_t, op2: svint8_t) -> svint8_t;
+    }
+    unsafe { _svzip1_s8(op1, op2) }
+}
+#[doc = "Interleave elements from low halves of two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svzip1[_s16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(zip1))]
+pub fn svzip1_s16(op1: svint16_t, op2: svint16_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.zip1.nxv8i16")]
+        fn _svzip1_s16(op1: svint16_t, op2: svint16_t) -> svint16_t;
+    }
+    unsafe { _svzip1_s16(op1, op2) }
+}
+#[doc = "Interleave elements from low halves of two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svzip1[_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(zip1))]
+pub fn svzip1_s32(op1: svint32_t, op2: svint32_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.zip1.nxv4i32")]
+        fn _svzip1_s32(op1: svint32_t, op2: svint32_t) -> svint32_t;
+    }
+    unsafe { _svzip1_s32(op1, op2) }
+}
+#[doc = "Interleave elements from low halves of two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svzip1[_s64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(zip1))]
+pub fn svzip1_s64(op1: svint64_t, op2: svint64_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.zip1.nxv2i64")]
+        fn _svzip1_s64(op1: svint64_t, op2: svint64_t) -> svint64_t;
+    }
+    unsafe { _svzip1_s64(op1, op2) }
+}
+#[doc = "Interleave elements from low halves of two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svzip1[_u8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(zip1))]
+pub fn svzip1_u8(op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    unsafe { svzip1_s8(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Interleave elements from low halves of two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svzip1[_u16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(zip1))]
+pub fn svzip1_u16(op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    unsafe { svzip1_s16(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Interleave elements from low halves of two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svzip1[_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(zip1))]
+pub fn svzip1_u32(op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    unsafe { svzip1_s32(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Interleave elements from low halves of two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svzip1[_u64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(zip1))]
+pub fn svzip1_u64(op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    unsafe { svzip1_s64(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Interleave quadwords from low halves of two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svzip1q[_f32])"]
+#[inline]
+#[target_feature(enable = "sve,f64mm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(zip1))]
+pub fn svzip1q_f32(op1: svfloat32_t, op2: svfloat32_t) -> svfloat32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.zip1q.nxv4f32")]
+        fn _svzip1q_f32(op1: svfloat32_t, op2: svfloat32_t) -> svfloat32_t;
+    }
+    unsafe { _svzip1q_f32(op1, op2) }
+}
+#[doc = "Interleave quadwords from low halves of two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svzip1q[_f64])"]
+#[inline]
+#[target_feature(enable = "sve,f64mm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(zip1))]
+pub fn svzip1q_f64(op1: svfloat64_t, op2: svfloat64_t) -> svfloat64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.zip1q.nxv2f64")]
+        fn _svzip1q_f64(op1: svfloat64_t, op2: svfloat64_t) -> svfloat64_t;
+    }
+    unsafe { _svzip1q_f64(op1, op2) }
+}
+#[doc = "Interleave quadwords from low halves of two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svzip1q[_s8])"]
+#[inline]
+#[target_feature(enable = "sve,f64mm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(zip1))]
+pub fn svzip1q_s8(op1: svint8_t, op2: svint8_t) -> svint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.zip1q.nxv16i8")]
+        fn _svzip1q_s8(op1: svint8_t, op2: svint8_t) -> svint8_t;
+    }
+    unsafe { _svzip1q_s8(op1, op2) }
+}
+#[doc = "Interleave quadwords from low halves of two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svzip1q[_s16])"]
+#[inline]
+#[target_feature(enable = "sve,f64mm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(zip1))]
+pub fn svzip1q_s16(op1: svint16_t, op2: svint16_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.zip1q.nxv8i16")]
+        fn _svzip1q_s16(op1: svint16_t, op2: svint16_t) -> svint16_t;
+    }
+    unsafe { _svzip1q_s16(op1, op2) }
+}
+#[doc = "Interleave quadwords from low halves of two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svzip1q[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,f64mm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(zip1))]
+pub fn svzip1q_s32(op1: svint32_t, op2: svint32_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.zip1q.nxv4i32")]
+        fn _svzip1q_s32(op1: svint32_t, op2: svint32_t) -> svint32_t;
+    }
+    unsafe { _svzip1q_s32(op1, op2) }
+}
+#[doc = "Interleave quadwords from low halves of two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svzip1q[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,f64mm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(zip1))]
+pub fn svzip1q_s64(op1: svint64_t, op2: svint64_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.zip1q.nxv2i64")]
+        fn _svzip1q_s64(op1: svint64_t, op2: svint64_t) -> svint64_t;
+    }
+    unsafe { _svzip1q_s64(op1, op2) }
+}
+#[doc = "Interleave quadwords from low halves of two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svzip1q[_u8])"]
+#[inline]
+#[target_feature(enable = "sve,f64mm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(zip1))]
+pub fn svzip1q_u8(op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    unsafe { svzip1q_s8(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Interleave quadwords from low halves of two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svzip1q[_u16])"]
+#[inline]
+#[target_feature(enable = "sve,f64mm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(zip1))]
+pub fn svzip1q_u16(op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    unsafe { svzip1q_s16(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Interleave quadwords from low halves of two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svzip1q[_u32])"]
+#[inline]
+#[target_feature(enable = "sve,f64mm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(zip1))]
+pub fn svzip1q_u32(op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    unsafe { svzip1q_s32(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Interleave quadwords from low halves of two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svzip1q[_u64])"]
+#[inline]
+#[target_feature(enable = "sve,f64mm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(zip1))]
+pub fn svzip1q_u64(op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    unsafe { svzip1q_s64(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Interleave elements from high halves of two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svzip2_b8)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(zip2))]
+pub fn svzip2_b8(op1: svbool_t, op2: svbool_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.zip2.nxv16i1")]
+        fn _svzip2_b8(op1: svbool_t, op2: svbool_t) -> svbool_t;
+    }
+    unsafe { _svzip2_b8(op1, op2) }
+}
+#[doc = "Interleave elements from high halves of two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svzip2_b16)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(zip2))]
+pub fn svzip2_b16(op1: svbool_t, op2: svbool_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.zip2.nxv8i1")]
+        fn _svzip2_b16(op1: svbool8_t, op2: svbool8_t) -> svbool8_t;
+    }
+    unsafe { _svzip2_b16(op1.sve_into(), op2.sve_into()).sve_into() }
+}
+#[doc = "Interleave elements from high halves of two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svzip2_b32)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(zip2))]
+pub fn svzip2_b32(op1: svbool_t, op2: svbool_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.zip2.nxv4i1")]
+        fn _svzip2_b32(op1: svbool4_t, op2: svbool4_t) -> svbool4_t;
+    }
+    unsafe { _svzip2_b32(op1.sve_into(), op2.sve_into()).sve_into() }
+}
+#[doc = "Interleave elements from high halves of two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svzip2_b64)"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(zip2))]
+pub fn svzip2_b64(op1: svbool_t, op2: svbool_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.zip2.nxv2i1")]
+        fn _svzip2_b64(op1: svbool2_t, op2: svbool2_t) -> svbool2_t;
+    }
+    unsafe { _svzip2_b64(op1.sve_into(), op2.sve_into()).sve_into() }
+}
+#[doc = "Interleave elements from high halves of two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svzip2[_f32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(zip2))]
+pub fn svzip2_f32(op1: svfloat32_t, op2: svfloat32_t) -> svfloat32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.zip2.nxv4f32")]
+        fn _svzip2_f32(op1: svfloat32_t, op2: svfloat32_t) -> svfloat32_t;
+    }
+    unsafe { _svzip2_f32(op1, op2) }
+}
+#[doc = "Interleave elements from high halves of two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svzip2[_f64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(zip2))]
+pub fn svzip2_f64(op1: svfloat64_t, op2: svfloat64_t) -> svfloat64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.zip2.nxv2f64")]
+        fn _svzip2_f64(op1: svfloat64_t, op2: svfloat64_t) -> svfloat64_t;
+    }
+    unsafe { _svzip2_f64(op1, op2) }
+}
+#[doc = "Interleave elements from high halves of two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svzip2[_s8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(zip2))]
+pub fn svzip2_s8(op1: svint8_t, op2: svint8_t) -> svint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.zip2.nxv16i8")]
+        fn _svzip2_s8(op1: svint8_t, op2: svint8_t) -> svint8_t;
+    }
+    unsafe { _svzip2_s8(op1, op2) }
+}
+#[doc = "Interleave elements from high halves of two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svzip2[_s16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(zip2))]
+pub fn svzip2_s16(op1: svint16_t, op2: svint16_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.zip2.nxv8i16")]
+        fn _svzip2_s16(op1: svint16_t, op2: svint16_t) -> svint16_t;
+    }
+    unsafe { _svzip2_s16(op1, op2) }
+}
+#[doc = "Interleave elements from high halves of two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svzip2[_s32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(zip2))]
+pub fn svzip2_s32(op1: svint32_t, op2: svint32_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.zip2.nxv4i32")]
+        fn _svzip2_s32(op1: svint32_t, op2: svint32_t) -> svint32_t;
+    }
+    unsafe { _svzip2_s32(op1, op2) }
+}
+#[doc = "Interleave elements from high halves of two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svzip2[_s64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(zip2))]
+pub fn svzip2_s64(op1: svint64_t, op2: svint64_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.zip2.nxv2i64")]
+        fn _svzip2_s64(op1: svint64_t, op2: svint64_t) -> svint64_t;
+    }
+    unsafe { _svzip2_s64(op1, op2) }
+}
+#[doc = "Interleave elements from high halves of two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svzip2[_u8])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(zip2))]
+pub fn svzip2_u8(op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    unsafe { svzip2_s8(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Interleave elements from high halves of two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svzip2[_u16])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(zip2))]
+pub fn svzip2_u16(op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    unsafe { svzip2_s16(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Interleave elements from high halves of two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svzip2[_u32])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(zip2))]
+pub fn svzip2_u32(op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    unsafe { svzip2_s32(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Interleave elements from high halves of two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svzip2[_u64])"]
+#[inline]
+#[target_feature(enable = "sve")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(zip2))]
+pub fn svzip2_u64(op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    unsafe { svzip2_s64(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Interleave quadwords from high halves of two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svzip2q[_f32])"]
+#[inline]
+#[target_feature(enable = "sve,f64mm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(zip2))]
+pub fn svzip2q_f32(op1: svfloat32_t, op2: svfloat32_t) -> svfloat32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.zip2q.nxv4f32")]
+        fn _svzip2q_f32(op1: svfloat32_t, op2: svfloat32_t) -> svfloat32_t;
+    }
+    unsafe { _svzip2q_f32(op1, op2) }
+}
+#[doc = "Interleave quadwords from high halves of two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svzip2q[_f64])"]
+#[inline]
+#[target_feature(enable = "sve,f64mm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(zip2))]
+pub fn svzip2q_f64(op1: svfloat64_t, op2: svfloat64_t) -> svfloat64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.zip2q.nxv2f64")]
+        fn _svzip2q_f64(op1: svfloat64_t, op2: svfloat64_t) -> svfloat64_t;
+    }
+    unsafe { _svzip2q_f64(op1, op2) }
+}
+#[doc = "Interleave quadwords from high halves of two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svzip2q[_s8])"]
+#[inline]
+#[target_feature(enable = "sve,f64mm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(zip2))]
+pub fn svzip2q_s8(op1: svint8_t, op2: svint8_t) -> svint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.zip2q.nxv16i8")]
+        fn _svzip2q_s8(op1: svint8_t, op2: svint8_t) -> svint8_t;
+    }
+    unsafe { _svzip2q_s8(op1, op2) }
+}
+#[doc = "Interleave quadwords from high halves of two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svzip2q[_s16])"]
+#[inline]
+#[target_feature(enable = "sve,f64mm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(zip2))]
+pub fn svzip2q_s16(op1: svint16_t, op2: svint16_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.zip2q.nxv8i16")]
+        fn _svzip2q_s16(op1: svint16_t, op2: svint16_t) -> svint16_t;
+    }
+    unsafe { _svzip2q_s16(op1, op2) }
+}
+#[doc = "Interleave quadwords from high halves of two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svzip2q[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,f64mm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(zip2))]
+pub fn svzip2q_s32(op1: svint32_t, op2: svint32_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.zip2q.nxv4i32")]
+        fn _svzip2q_s32(op1: svint32_t, op2: svint32_t) -> svint32_t;
+    }
+    unsafe { _svzip2q_s32(op1, op2) }
+}
+#[doc = "Interleave quadwords from high halves of two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svzip2q[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,f64mm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(zip2))]
+pub fn svzip2q_s64(op1: svint64_t, op2: svint64_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.zip2q.nxv2i64")]
+        fn _svzip2q_s64(op1: svint64_t, op2: svint64_t) -> svint64_t;
+    }
+    unsafe { _svzip2q_s64(op1, op2) }
+}
+#[doc = "Interleave quadwords from high halves of two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svzip2q[_u8])"]
+#[inline]
+#[target_feature(enable = "sve,f64mm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(zip2))]
+pub fn svzip2q_u8(op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    unsafe { svzip2q_s8(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Interleave quadwords from high halves of two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svzip2q[_u16])"]
+#[inline]
+#[target_feature(enable = "sve,f64mm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(zip2))]
+pub fn svzip2q_u16(op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    unsafe { svzip2q_s16(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Interleave quadwords from high halves of two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svzip2q[_u32])"]
+#[inline]
+#[target_feature(enable = "sve,f64mm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(zip2))]
+pub fn svzip2q_u32(op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    unsafe { svzip2q_s32(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Interleave quadwords from high halves of two inputs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svzip2q[_u64])"]
+#[inline]
+#[target_feature(enable = "sve,f64mm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(zip2))]
+pub fn svzip2q_u64(op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    unsafe { svzip2q_s64(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
diff --git a/crates/core_arch/src/aarch64/sve/ld_st_tests_aarch64.rs b/crates/core_arch/src/aarch64/sve/ld_st_tests_aarch64.rs
new file mode 100644
index 0000000000..3007ba4ee6
--- /dev/null
+++ b/crates/core_arch/src/aarch64/sve/ld_st_tests_aarch64.rs
@@ -0,0 +1,11235 @@
+// This code is automatically generated. DO NOT MODIFY.
+//
+// Instead, modify `crates/stdarch-gen-arm/spec/sve` and run the following command to re-generate
+// this file:
+//
+// ```
+// cargo run --bin=stdarch-gen-arm -- crates/stdarch-gen-arm/spec
+// ```
+#![allow(unused)]
+use super::*;
+use std::boxed::Box;
+use std::convert::{TryFrom, TryInto};
+use std::sync::LazyLock;
+use std::vec::Vec;
+use stdarch_test::simd_test;
+static F32_DATA: LazyLock<[f32; 64 * 5]> = LazyLock::new(|| {
+    (0..64 * 5)
+        .map(|i| i as f32)
+        .collect::<Vec<_>>()
+        .try_into()
+        .expect("f32 data incorrectly initialised")
+});
+static F64_DATA: LazyLock<[f64; 32 * 5]> = LazyLock::new(|| {
+    (0..32 * 5)
+        .map(|i| i as f64)
+        .collect::<Vec<_>>()
+        .try_into()
+        .expect("f64 data incorrectly initialised")
+});
+static I8_DATA: LazyLock<[i8; 256 * 5]> = LazyLock::new(|| {
+    (0..256 * 5)
+        .map(|i| ((i + 128) % 256 - 128) as i8)
+        .collect::<Vec<_>>()
+        .try_into()
+        .expect("i8 data incorrectly initialised")
+});
+static I16_DATA: LazyLock<[i16; 128 * 5]> = LazyLock::new(|| {
+    (0..128 * 5)
+        .map(|i| i as i16)
+        .collect::<Vec<_>>()
+        .try_into()
+        .expect("i16 data incorrectly initialised")
+});
+static I32_DATA: LazyLock<[i32; 64 * 5]> = LazyLock::new(|| {
+    (0..64 * 5)
+        .map(|i| i as i32)
+        .collect::<Vec<_>>()
+        .try_into()
+        .expect("i32 data incorrectly initialised")
+});
+static I64_DATA: LazyLock<[i64; 32 * 5]> = LazyLock::new(|| {
+    (0..32 * 5)
+        .map(|i| i as i64)
+        .collect::<Vec<_>>()
+        .try_into()
+        .expect("i64 data incorrectly initialised")
+});
+static U8_DATA: LazyLock<[u8; 256 * 5]> = LazyLock::new(|| {
+    (0..256 * 5)
+        .map(|i| i as u8)
+        .collect::<Vec<_>>()
+        .try_into()
+        .expect("u8 data incorrectly initialised")
+});
+static U16_DATA: LazyLock<[u16; 128 * 5]> = LazyLock::new(|| {
+    (0..128 * 5)
+        .map(|i| i as u16)
+        .collect::<Vec<_>>()
+        .try_into()
+        .expect("u16 data incorrectly initialised")
+});
+static U32_DATA: LazyLock<[u32; 64 * 5]> = LazyLock::new(|| {
+    (0..64 * 5)
+        .map(|i| i as u32)
+        .collect::<Vec<_>>()
+        .try_into()
+        .expect("u32 data incorrectly initialised")
+});
+static U64_DATA: LazyLock<[u64; 32 * 5]> = LazyLock::new(|| {
+    (0..32 * 5)
+        .map(|i| i as u64)
+        .collect::<Vec<_>>()
+        .try_into()
+        .expect("u64 data incorrectly initialised")
+});
+#[target_feature(enable = "sve")]
+fn assert_vector_matches_f32(vector: svfloat32_t, expected: svfloat32_t) {
+    let defined = svrdffr();
+    assert!(svptest_first(svptrue_b32(), defined));
+    let cmp = svcmpne_f32(defined, vector, expected);
+    assert!(!svptest_any(defined, cmp))
+}
+#[target_feature(enable = "sve")]
+fn assert_vector_matches_f64(vector: svfloat64_t, expected: svfloat64_t) {
+    let defined = svrdffr();
+    assert!(svptest_first(svptrue_b64(), defined));
+    let cmp = svcmpne_f64(defined, vector, expected);
+    assert!(!svptest_any(defined, cmp))
+}
+#[target_feature(enable = "sve")]
+fn assert_vector_matches_i8(vector: svint8_t, expected: svint8_t) {
+    let defined = svrdffr();
+    assert!(svptest_first(svptrue_b8(), defined));
+    let cmp = svcmpne_s8(defined, vector, expected);
+    assert!(!svptest_any(defined, cmp))
+}
+#[target_feature(enable = "sve")]
+fn assert_vector_matches_i16(vector: svint16_t, expected: svint16_t) {
+    let defined = svrdffr();
+    assert!(svptest_first(svptrue_b16(), defined));
+    let cmp = svcmpne_s16(defined, vector, expected);
+    assert!(!svptest_any(defined, cmp))
+}
+#[target_feature(enable = "sve")]
+fn assert_vector_matches_i32(vector: svint32_t, expected: svint32_t) {
+    let defined = svrdffr();
+    assert!(svptest_first(svptrue_b32(), defined));
+    let cmp = svcmpne_s32(defined, vector, expected);
+    assert!(!svptest_any(defined, cmp))
+}
+#[target_feature(enable = "sve")]
+fn assert_vector_matches_i64(vector: svint64_t, expected: svint64_t) {
+    let defined = svrdffr();
+    assert!(svptest_first(svptrue_b64(), defined));
+    let cmp = svcmpne_s64(defined, vector, expected);
+    assert!(!svptest_any(defined, cmp))
+}
+#[target_feature(enable = "sve")]
+fn assert_vector_matches_u8(vector: svuint8_t, expected: svuint8_t) {
+    let defined = svrdffr();
+    assert!(svptest_first(svptrue_b8(), defined));
+    let cmp = svcmpne_u8(defined, vector, expected);
+    assert!(!svptest_any(defined, cmp))
+}
+#[target_feature(enable = "sve")]
+fn assert_vector_matches_u16(vector: svuint16_t, expected: svuint16_t) {
+    let defined = svrdffr();
+    assert!(svptest_first(svptrue_b16(), defined));
+    let cmp = svcmpne_u16(defined, vector, expected);
+    assert!(!svptest_any(defined, cmp))
+}
+#[target_feature(enable = "sve")]
+fn assert_vector_matches_u32(vector: svuint32_t, expected: svuint32_t) {
+    let defined = svrdffr();
+    assert!(svptest_first(svptrue_b32(), defined));
+    let cmp = svcmpne_u32(defined, vector, expected);
+    assert!(!svptest_any(defined, cmp))
+}
+#[target_feature(enable = "sve")]
+fn assert_vector_matches_u64(vector: svuint64_t, expected: svuint64_t) {
+    let defined = svrdffr();
+    assert!(svptest_first(svptrue_b64(), defined));
+    let cmp = svcmpne_u64(defined, vector, expected);
+    assert!(!svptest_any(defined, cmp))
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1_f32_with_svst1_f32() {
+    let mut storage = [0 as f32; 320usize];
+    let data = svcvt_f32_s32_x(
+        svptrue_b32(),
+        svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+    svst1_f32(svptrue_b32(), storage.as_mut_ptr(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as f32 || val == i as f32);
+    }
+    svsetffr();
+    let loaded = svld1_f32(svptrue_b32(), storage.as_ptr() as *const f32);
+    assert_vector_matches_f32(
+        loaded,
+        svcvt_f32_s32_x(
+            svptrue_b32(),
+            svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1_f64_with_svst1_f64() {
+    let mut storage = [0 as f64; 160usize];
+    let data = svcvt_f64_s64_x(
+        svptrue_b64(),
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+    svst1_f64(svptrue_b64(), storage.as_mut_ptr(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as f64 || val == i as f64);
+    }
+    svsetffr();
+    let loaded = svld1_f64(svptrue_b64(), storage.as_ptr() as *const f64);
+    assert_vector_matches_f64(
+        loaded,
+        svcvt_f64_s64_x(
+            svptrue_b64(),
+            svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1_s8_with_svst1_s8() {
+    let mut storage = [0 as i8; 1280usize];
+    let data = svindex_s8((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    svst1_s8(svptrue_b8(), storage.as_mut_ptr(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i8 || val == i as i8);
+    }
+    svsetffr();
+    let loaded = svld1_s8(svptrue_b8(), storage.as_ptr() as *const i8);
+    assert_vector_matches_i8(
+        loaded,
+        svindex_s8((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1_s16_with_svst1_s16() {
+    let mut storage = [0 as i16; 640usize];
+    let data = svindex_s16((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    svst1_s16(svptrue_b16(), storage.as_mut_ptr(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i16 || val == i as i16);
+    }
+    svsetffr();
+    let loaded = svld1_s16(svptrue_b16(), storage.as_ptr() as *const i16);
+    assert_vector_matches_i16(
+        loaded,
+        svindex_s16((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1_s32_with_svst1_s32() {
+    let mut storage = [0 as i32; 320usize];
+    let data = svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    svst1_s32(svptrue_b32(), storage.as_mut_ptr(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i32 || val == i as i32);
+    }
+    svsetffr();
+    let loaded = svld1_s32(svptrue_b32(), storage.as_ptr() as *const i32);
+    assert_vector_matches_i32(
+        loaded,
+        svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1_s64_with_svst1_s64() {
+    let mut storage = [0 as i64; 160usize];
+    let data = svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    svst1_s64(svptrue_b64(), storage.as_mut_ptr(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i64 || val == i as i64);
+    }
+    svsetffr();
+    let loaded = svld1_s64(svptrue_b64(), storage.as_ptr() as *const i64);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1_u8_with_svst1_u8() {
+    let mut storage = [0 as u8; 1280usize];
+    let data = svindex_u8((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    svst1_u8(svptrue_b8(), storage.as_mut_ptr(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u8 || val == i as u8);
+    }
+    svsetffr();
+    let loaded = svld1_u8(svptrue_b8(), storage.as_ptr() as *const u8);
+    assert_vector_matches_u8(
+        loaded,
+        svindex_u8((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1_u16_with_svst1_u16() {
+    let mut storage = [0 as u16; 640usize];
+    let data = svindex_u16((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    svst1_u16(svptrue_b16(), storage.as_mut_ptr(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u16 || val == i as u16);
+    }
+    svsetffr();
+    let loaded = svld1_u16(svptrue_b16(), storage.as_ptr() as *const u16);
+    assert_vector_matches_u16(
+        loaded,
+        svindex_u16((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1_u32_with_svst1_u32() {
+    let mut storage = [0 as u32; 320usize];
+    let data = svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    svst1_u32(svptrue_b32(), storage.as_mut_ptr(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u32 || val == i as u32);
+    }
+    svsetffr();
+    let loaded = svld1_u32(svptrue_b32(), storage.as_ptr() as *const u32);
+    assert_vector_matches_u32(
+        loaded,
+        svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1_u64_with_svst1_u64() {
+    let mut storage = [0 as u64; 160usize];
+    let data = svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    svst1_u64(svptrue_b64(), storage.as_mut_ptr(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u64 || val == i as u64);
+    }
+    svsetffr();
+    let loaded = svld1_u64(svptrue_b64(), storage.as_ptr() as *const u64);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1_gather_s32index_f32_with_svst1_scatter_s32index_f32() {
+    let mut storage = [0 as f32; 320usize];
+    let data = svcvt_f32_s32_x(
+        svptrue_b32(),
+        svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+    let indices = svindex_s32(0, 1);
+    svst1_scatter_s32index_f32(svptrue_b32(), storage.as_mut_ptr(), indices, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as f32 || val == i as f32);
+    }
+    svsetffr();
+    let loaded = svld1_gather_s32index_f32(svptrue_b32(), storage.as_ptr() as *const f32, indices);
+    assert_vector_matches_f32(
+        loaded,
+        svcvt_f32_s32_x(
+            svptrue_b32(),
+            svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1_gather_s32index_s32_with_svst1_scatter_s32index_s32() {
+    let mut storage = [0 as i32; 320usize];
+    let data = svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let indices = svindex_s32(0, 1);
+    svst1_scatter_s32index_s32(svptrue_b32(), storage.as_mut_ptr(), indices, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i32 || val == i as i32);
+    }
+    svsetffr();
+    let loaded = svld1_gather_s32index_s32(svptrue_b32(), storage.as_ptr() as *const i32, indices);
+    assert_vector_matches_i32(
+        loaded,
+        svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1_gather_s32index_u32_with_svst1_scatter_s32index_u32() {
+    let mut storage = [0 as u32; 320usize];
+    let data = svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let indices = svindex_s32(0, 1);
+    svst1_scatter_s32index_u32(svptrue_b32(), storage.as_mut_ptr(), indices, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u32 || val == i as u32);
+    }
+    svsetffr();
+    let loaded = svld1_gather_s32index_u32(svptrue_b32(), storage.as_ptr() as *const u32, indices);
+    assert_vector_matches_u32(
+        loaded,
+        svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1_gather_s64index_f64_with_svst1_scatter_s64index_f64() {
+    let mut storage = [0 as f64; 160usize];
+    let data = svcvt_f64_s64_x(
+        svptrue_b64(),
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+    let indices = svindex_s64(0, 1);
+    svst1_scatter_s64index_f64(svptrue_b64(), storage.as_mut_ptr(), indices, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as f64 || val == i as f64);
+    }
+    svsetffr();
+    let loaded = svld1_gather_s64index_f64(svptrue_b64(), storage.as_ptr() as *const f64, indices);
+    assert_vector_matches_f64(
+        loaded,
+        svcvt_f64_s64_x(
+            svptrue_b64(),
+            svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1_gather_s64index_s64_with_svst1_scatter_s64index_s64() {
+    let mut storage = [0 as i64; 160usize];
+    let data = svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let indices = svindex_s64(0, 1);
+    svst1_scatter_s64index_s64(svptrue_b64(), storage.as_mut_ptr(), indices, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i64 || val == i as i64);
+    }
+    svsetffr();
+    let loaded = svld1_gather_s64index_s64(svptrue_b64(), storage.as_ptr() as *const i64, indices);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1_gather_s64index_u64_with_svst1_scatter_s64index_u64() {
+    let mut storage = [0 as u64; 160usize];
+    let data = svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let indices = svindex_s64(0, 1);
+    svst1_scatter_s64index_u64(svptrue_b64(), storage.as_mut_ptr(), indices, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u64 || val == i as u64);
+    }
+    svsetffr();
+    let loaded = svld1_gather_s64index_u64(svptrue_b64(), storage.as_ptr() as *const u64, indices);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1_gather_u32index_f32_with_svst1_scatter_u32index_f32() {
+    let mut storage = [0 as f32; 320usize];
+    let data = svcvt_f32_s32_x(
+        svptrue_b32(),
+        svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+    let indices = svindex_u32(0, 1);
+    svst1_scatter_u32index_f32(svptrue_b32(), storage.as_mut_ptr(), indices, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as f32 || val == i as f32);
+    }
+    svsetffr();
+    let loaded = svld1_gather_u32index_f32(svptrue_b32(), storage.as_ptr() as *const f32, indices);
+    assert_vector_matches_f32(
+        loaded,
+        svcvt_f32_s32_x(
+            svptrue_b32(),
+            svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1_gather_u32index_s32_with_svst1_scatter_u32index_s32() {
+    let mut storage = [0 as i32; 320usize];
+    let data = svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let indices = svindex_u32(0, 1);
+    svst1_scatter_u32index_s32(svptrue_b32(), storage.as_mut_ptr(), indices, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i32 || val == i as i32);
+    }
+    svsetffr();
+    let loaded = svld1_gather_u32index_s32(svptrue_b32(), storage.as_ptr() as *const i32, indices);
+    assert_vector_matches_i32(
+        loaded,
+        svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1_gather_u32index_u32_with_svst1_scatter_u32index_u32() {
+    let mut storage = [0 as u32; 320usize];
+    let data = svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let indices = svindex_u32(0, 1);
+    svst1_scatter_u32index_u32(svptrue_b32(), storage.as_mut_ptr(), indices, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u32 || val == i as u32);
+    }
+    svsetffr();
+    let loaded = svld1_gather_u32index_u32(svptrue_b32(), storage.as_ptr() as *const u32, indices);
+    assert_vector_matches_u32(
+        loaded,
+        svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1_gather_u64index_f64_with_svst1_scatter_u64index_f64() {
+    let mut storage = [0 as f64; 160usize];
+    let data = svcvt_f64_s64_x(
+        svptrue_b64(),
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+    let indices = svindex_u64(0, 1);
+    svst1_scatter_u64index_f64(svptrue_b64(), storage.as_mut_ptr(), indices, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as f64 || val == i as f64);
+    }
+    svsetffr();
+    let loaded = svld1_gather_u64index_f64(svptrue_b64(), storage.as_ptr() as *const f64, indices);
+    assert_vector_matches_f64(
+        loaded,
+        svcvt_f64_s64_x(
+            svptrue_b64(),
+            svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1_gather_u64index_s64_with_svst1_scatter_u64index_s64() {
+    let mut storage = [0 as i64; 160usize];
+    let data = svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let indices = svindex_u64(0, 1);
+    svst1_scatter_u64index_s64(svptrue_b64(), storage.as_mut_ptr(), indices, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i64 || val == i as i64);
+    }
+    svsetffr();
+    let loaded = svld1_gather_u64index_s64(svptrue_b64(), storage.as_ptr() as *const i64, indices);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1_gather_u64index_u64_with_svst1_scatter_u64index_u64() {
+    let mut storage = [0 as u64; 160usize];
+    let data = svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let indices = svindex_u64(0, 1);
+    svst1_scatter_u64index_u64(svptrue_b64(), storage.as_mut_ptr(), indices, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u64 || val == i as u64);
+    }
+    svsetffr();
+    let loaded = svld1_gather_u64index_u64(svptrue_b64(), storage.as_ptr() as *const u64, indices);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1_gather_s32offset_f32_with_svst1_scatter_s32offset_f32() {
+    let mut storage = [0 as f32; 320usize];
+    let data = svcvt_f32_s32_x(
+        svptrue_b32(),
+        svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+    let offsets = svindex_s32(0, 4u32.try_into().unwrap());
+    svst1_scatter_s32offset_f32(svptrue_b32(), storage.as_mut_ptr(), offsets, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as f32 || val == i as f32);
+    }
+    svsetffr();
+    let loaded = svld1_gather_s32offset_f32(svptrue_b32(), storage.as_ptr() as *const f32, offsets);
+    assert_vector_matches_f32(
+        loaded,
+        svcvt_f32_s32_x(
+            svptrue_b32(),
+            svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1_gather_s32offset_s32_with_svst1_scatter_s32offset_s32() {
+    let mut storage = [0 as i32; 320usize];
+    let data = svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let offsets = svindex_s32(0, 4u32.try_into().unwrap());
+    svst1_scatter_s32offset_s32(svptrue_b32(), storage.as_mut_ptr(), offsets, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i32 || val == i as i32);
+    }
+    svsetffr();
+    let loaded = svld1_gather_s32offset_s32(svptrue_b32(), storage.as_ptr() as *const i32, offsets);
+    assert_vector_matches_i32(
+        loaded,
+        svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1_gather_s32offset_u32_with_svst1_scatter_s32offset_u32() {
+    let mut storage = [0 as u32; 320usize];
+    let data = svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let offsets = svindex_s32(0, 4u32.try_into().unwrap());
+    svst1_scatter_s32offset_u32(svptrue_b32(), storage.as_mut_ptr(), offsets, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u32 || val == i as u32);
+    }
+    svsetffr();
+    let loaded = svld1_gather_s32offset_u32(svptrue_b32(), storage.as_ptr() as *const u32, offsets);
+    assert_vector_matches_u32(
+        loaded,
+        svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1_gather_s64offset_f64_with_svst1_scatter_s64offset_f64() {
+    let mut storage = [0 as f64; 160usize];
+    let data = svcvt_f64_s64_x(
+        svptrue_b64(),
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+    let offsets = svindex_s64(0, 8u32.try_into().unwrap());
+    svst1_scatter_s64offset_f64(svptrue_b64(), storage.as_mut_ptr(), offsets, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as f64 || val == i as f64);
+    }
+    svsetffr();
+    let loaded = svld1_gather_s64offset_f64(svptrue_b64(), storage.as_ptr() as *const f64, offsets);
+    assert_vector_matches_f64(
+        loaded,
+        svcvt_f64_s64_x(
+            svptrue_b64(),
+            svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1_gather_s64offset_s64_with_svst1_scatter_s64offset_s64() {
+    let mut storage = [0 as i64; 160usize];
+    let data = svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let offsets = svindex_s64(0, 8u32.try_into().unwrap());
+    svst1_scatter_s64offset_s64(svptrue_b64(), storage.as_mut_ptr(), offsets, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i64 || val == i as i64);
+    }
+    svsetffr();
+    let loaded = svld1_gather_s64offset_s64(svptrue_b64(), storage.as_ptr() as *const i64, offsets);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1_gather_s64offset_u64_with_svst1_scatter_s64offset_u64() {
+    let mut storage = [0 as u64; 160usize];
+    let data = svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let offsets = svindex_s64(0, 8u32.try_into().unwrap());
+    svst1_scatter_s64offset_u64(svptrue_b64(), storage.as_mut_ptr(), offsets, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u64 || val == i as u64);
+    }
+    svsetffr();
+    let loaded = svld1_gather_s64offset_u64(svptrue_b64(), storage.as_ptr() as *const u64, offsets);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1_gather_u32offset_f32_with_svst1_scatter_u32offset_f32() {
+    let mut storage = [0 as f32; 320usize];
+    let data = svcvt_f32_s32_x(
+        svptrue_b32(),
+        svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+    let offsets = svindex_u32(0, 4u32.try_into().unwrap());
+    svst1_scatter_u32offset_f32(svptrue_b32(), storage.as_mut_ptr(), offsets, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as f32 || val == i as f32);
+    }
+    svsetffr();
+    let loaded = svld1_gather_u32offset_f32(svptrue_b32(), storage.as_ptr() as *const f32, offsets);
+    assert_vector_matches_f32(
+        loaded,
+        svcvt_f32_s32_x(
+            svptrue_b32(),
+            svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1_gather_u32offset_s32_with_svst1_scatter_u32offset_s32() {
+    let mut storage = [0 as i32; 320usize];
+    let data = svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let offsets = svindex_u32(0, 4u32.try_into().unwrap());
+    svst1_scatter_u32offset_s32(svptrue_b32(), storage.as_mut_ptr(), offsets, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i32 || val == i as i32);
+    }
+    svsetffr();
+    let loaded = svld1_gather_u32offset_s32(svptrue_b32(), storage.as_ptr() as *const i32, offsets);
+    assert_vector_matches_i32(
+        loaded,
+        svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1_gather_u32offset_u32_with_svst1_scatter_u32offset_u32() {
+    let mut storage = [0 as u32; 320usize];
+    let data = svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let offsets = svindex_u32(0, 4u32.try_into().unwrap());
+    svst1_scatter_u32offset_u32(svptrue_b32(), storage.as_mut_ptr(), offsets, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u32 || val == i as u32);
+    }
+    svsetffr();
+    let loaded = svld1_gather_u32offset_u32(svptrue_b32(), storage.as_ptr() as *const u32, offsets);
+    assert_vector_matches_u32(
+        loaded,
+        svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1_gather_u64offset_f64_with_svst1_scatter_u64offset_f64() {
+    let mut storage = [0 as f64; 160usize];
+    let data = svcvt_f64_s64_x(
+        svptrue_b64(),
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+    let offsets = svindex_u64(0, 8u32.try_into().unwrap());
+    svst1_scatter_u64offset_f64(svptrue_b64(), storage.as_mut_ptr(), offsets, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as f64 || val == i as f64);
+    }
+    svsetffr();
+    let loaded = svld1_gather_u64offset_f64(svptrue_b64(), storage.as_ptr() as *const f64, offsets);
+    assert_vector_matches_f64(
+        loaded,
+        svcvt_f64_s64_x(
+            svptrue_b64(),
+            svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1_gather_u64offset_s64_with_svst1_scatter_u64offset_s64() {
+    let mut storage = [0 as i64; 160usize];
+    let data = svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let offsets = svindex_u64(0, 8u32.try_into().unwrap());
+    svst1_scatter_u64offset_s64(svptrue_b64(), storage.as_mut_ptr(), offsets, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i64 || val == i as i64);
+    }
+    svsetffr();
+    let loaded = svld1_gather_u64offset_s64(svptrue_b64(), storage.as_ptr() as *const i64, offsets);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1_gather_u64offset_u64_with_svst1_scatter_u64offset_u64() {
+    let mut storage = [0 as u64; 160usize];
+    let data = svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let offsets = svindex_u64(0, 8u32.try_into().unwrap());
+    svst1_scatter_u64offset_u64(svptrue_b64(), storage.as_mut_ptr(), offsets, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u64 || val == i as u64);
+    }
+    svsetffr();
+    let loaded = svld1_gather_u64offset_u64(svptrue_b64(), storage.as_ptr() as *const u64, offsets);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1_gather_u64base_f64_with_svst1_scatter_u64base_f64() {
+    let mut storage = [0 as f64; 160usize];
+    let data = svcvt_f64_s64_x(
+        svptrue_b64(),
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+    let bases = svdup_n_u64(storage.as_ptr() as u64);
+    let offsets = svindex_u64(0, 8u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b64(), bases, offsets);
+    svst1_scatter_u64base_f64(svptrue_b64(), bases, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as f64 || val == i as f64);
+    }
+    svsetffr();
+    let loaded = svld1_gather_u64base_f64(svptrue_b64(), bases);
+    assert_vector_matches_f64(
+        loaded,
+        svcvt_f64_s64_x(
+            svptrue_b64(),
+            svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1_gather_u64base_s64_with_svst1_scatter_u64base_s64() {
+    let mut storage = [0 as i64; 160usize];
+    let data = svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svdup_n_u64(storage.as_ptr() as u64);
+    let offsets = svindex_u64(0, 8u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b64(), bases, offsets);
+    svst1_scatter_u64base_s64(svptrue_b64(), bases, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i64 || val == i as i64);
+    }
+    svsetffr();
+    let loaded = svld1_gather_u64base_s64(svptrue_b64(), bases);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1_gather_u64base_u64_with_svst1_scatter_u64base_u64() {
+    let mut storage = [0 as u64; 160usize];
+    let data = svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svdup_n_u64(storage.as_ptr() as u64);
+    let offsets = svindex_u64(0, 8u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b64(), bases, offsets);
+    svst1_scatter_u64base_u64(svptrue_b64(), bases, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u64 || val == i as u64);
+    }
+    svsetffr();
+    let loaded = svld1_gather_u64base_u64(svptrue_b64(), bases);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1_gather_u32base_index_f32_with_svst1_scatter_u32base_index_f32() {
+    let mut storage = [0 as f32; 320usize];
+    let data = svcvt_f32_s32_x(
+        svptrue_b32(),
+        svindex_s32((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+    let bases = svindex_u32(0, 4u32.try_into().unwrap());
+    svst1_scatter_u32base_index_f32(
+        svptrue_b32(),
+        bases,
+        storage.as_ptr() as i64 / (4u32 as i64) + 1,
+        data,
+    );
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as f32 || val == i as f32);
+    }
+    svsetffr();
+    let loaded = svld1_gather_u32base_index_f32(
+        svptrue_b32(),
+        bases,
+        storage.as_ptr() as i64 / (4u32 as i64) + 1,
+    );
+    assert_vector_matches_f32(
+        loaded,
+        svcvt_f32_s32_x(
+            svptrue_b32(),
+            svindex_s32((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1_gather_u32base_index_s32_with_svst1_scatter_u32base_index_s32() {
+    let mut storage = [0 as i32; 320usize];
+    let data = svindex_s32((1usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svindex_u32(0, 4u32.try_into().unwrap());
+    svst1_scatter_u32base_index_s32(
+        svptrue_b32(),
+        bases,
+        storage.as_ptr() as i64 / (4u32 as i64) + 1,
+        data,
+    );
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i32 || val == i as i32);
+    }
+    svsetffr();
+    let loaded = svld1_gather_u32base_index_s32(
+        svptrue_b32(),
+        bases,
+        storage.as_ptr() as i64 / (4u32 as i64) + 1,
+    );
+    assert_vector_matches_i32(
+        loaded,
+        svindex_s32((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1_gather_u32base_index_u32_with_svst1_scatter_u32base_index_u32() {
+    let mut storage = [0 as u32; 320usize];
+    let data = svindex_u32((1usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svindex_u32(0, 4u32.try_into().unwrap());
+    svst1_scatter_u32base_index_u32(
+        svptrue_b32(),
+        bases,
+        storage.as_ptr() as i64 / (4u32 as i64) + 1,
+        data,
+    );
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u32 || val == i as u32);
+    }
+    svsetffr();
+    let loaded = svld1_gather_u32base_index_u32(
+        svptrue_b32(),
+        bases,
+        storage.as_ptr() as i64 / (4u32 as i64) + 1,
+    );
+    assert_vector_matches_u32(
+        loaded,
+        svindex_u32((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1_gather_u64base_index_f64_with_svst1_scatter_u64base_index_f64() {
+    let mut storage = [0 as f64; 160usize];
+    let data = svcvt_f64_s64_x(
+        svptrue_b64(),
+        svindex_s64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+    let bases = svdup_n_u64(storage.as_ptr() as u64);
+    let offsets = svindex_u64(0, 8u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b64(), bases, offsets);
+    svst1_scatter_u64base_index_f64(svptrue_b64(), bases, 1.try_into().unwrap(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as f64 || val == i as f64);
+    }
+    svsetffr();
+    let loaded = svld1_gather_u64base_index_f64(svptrue_b64(), bases, 1.try_into().unwrap());
+    assert_vector_matches_f64(
+        loaded,
+        svcvt_f64_s64_x(
+            svptrue_b64(),
+            svindex_s64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1_gather_u64base_index_s64_with_svst1_scatter_u64base_index_s64() {
+    let mut storage = [0 as i64; 160usize];
+    let data = svindex_s64((1usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svdup_n_u64(storage.as_ptr() as u64);
+    let offsets = svindex_u64(0, 8u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b64(), bases, offsets);
+    svst1_scatter_u64base_index_s64(svptrue_b64(), bases, 1.try_into().unwrap(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i64 || val == i as i64);
+    }
+    svsetffr();
+    let loaded = svld1_gather_u64base_index_s64(svptrue_b64(), bases, 1.try_into().unwrap());
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1_gather_u64base_index_u64_with_svst1_scatter_u64base_index_u64() {
+    let mut storage = [0 as u64; 160usize];
+    let data = svindex_u64((1usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svdup_n_u64(storage.as_ptr() as u64);
+    let offsets = svindex_u64(0, 8u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b64(), bases, offsets);
+    svst1_scatter_u64base_index_u64(svptrue_b64(), bases, 1.try_into().unwrap(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u64 || val == i as u64);
+    }
+    svsetffr();
+    let loaded = svld1_gather_u64base_index_u64(svptrue_b64(), bases, 1.try_into().unwrap());
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1_gather_u32base_offset_f32_with_svst1_scatter_u32base_offset_f32() {
+    let mut storage = [0 as f32; 320usize];
+    let data = svcvt_f32_s32_x(
+        svptrue_b32(),
+        svindex_s32((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+    let bases = svindex_u32(0, 4u32.try_into().unwrap());
+    svst1_scatter_u32base_offset_f32(
+        svptrue_b32(),
+        bases,
+        storage.as_ptr() as i64 + 4u32 as i64,
+        data,
+    );
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as f32 || val == i as f32);
+    }
+    svsetffr();
+    let loaded = svld1_gather_u32base_offset_f32(
+        svptrue_b32(),
+        bases,
+        storage.as_ptr() as i64 + 4u32 as i64,
+    );
+    assert_vector_matches_f32(
+        loaded,
+        svcvt_f32_s32_x(
+            svptrue_b32(),
+            svindex_s32((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1_gather_u32base_offset_s32_with_svst1_scatter_u32base_offset_s32() {
+    let mut storage = [0 as i32; 320usize];
+    let data = svindex_s32((1usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svindex_u32(0, 4u32.try_into().unwrap());
+    svst1_scatter_u32base_offset_s32(
+        svptrue_b32(),
+        bases,
+        storage.as_ptr() as i64 + 4u32 as i64,
+        data,
+    );
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i32 || val == i as i32);
+    }
+    svsetffr();
+    let loaded = svld1_gather_u32base_offset_s32(
+        svptrue_b32(),
+        bases,
+        storage.as_ptr() as i64 + 4u32 as i64,
+    );
+    assert_vector_matches_i32(
+        loaded,
+        svindex_s32((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1_gather_u32base_offset_u32_with_svst1_scatter_u32base_offset_u32() {
+    let mut storage = [0 as u32; 320usize];
+    let data = svindex_u32((1usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svindex_u32(0, 4u32.try_into().unwrap());
+    svst1_scatter_u32base_offset_u32(
+        svptrue_b32(),
+        bases,
+        storage.as_ptr() as i64 + 4u32 as i64,
+        data,
+    );
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u32 || val == i as u32);
+    }
+    svsetffr();
+    let loaded = svld1_gather_u32base_offset_u32(
+        svptrue_b32(),
+        bases,
+        storage.as_ptr() as i64 + 4u32 as i64,
+    );
+    assert_vector_matches_u32(
+        loaded,
+        svindex_u32((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1_gather_u64base_offset_f64_with_svst1_scatter_u64base_offset_f64() {
+    let mut storage = [0 as f64; 160usize];
+    let data = svcvt_f64_s64_x(
+        svptrue_b64(),
+        svindex_s64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+    let bases = svdup_n_u64(storage.as_ptr() as u64);
+    let offsets = svindex_u64(0, 8u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b64(), bases, offsets);
+    svst1_scatter_u64base_offset_f64(svptrue_b64(), bases, 8u32.try_into().unwrap(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as f64 || val == i as f64);
+    }
+    svsetffr();
+    let loaded = svld1_gather_u64base_offset_f64(svptrue_b64(), bases, 8u32.try_into().unwrap());
+    assert_vector_matches_f64(
+        loaded,
+        svcvt_f64_s64_x(
+            svptrue_b64(),
+            svindex_s64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1_gather_u64base_offset_s64_with_svst1_scatter_u64base_offset_s64() {
+    let mut storage = [0 as i64; 160usize];
+    let data = svindex_s64((1usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svdup_n_u64(storage.as_ptr() as u64);
+    let offsets = svindex_u64(0, 8u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b64(), bases, offsets);
+    svst1_scatter_u64base_offset_s64(svptrue_b64(), bases, 8u32.try_into().unwrap(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i64 || val == i as i64);
+    }
+    svsetffr();
+    let loaded = svld1_gather_u64base_offset_s64(svptrue_b64(), bases, 8u32.try_into().unwrap());
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1_gather_u64base_offset_u64_with_svst1_scatter_u64base_offset_u64() {
+    let mut storage = [0 as u64; 160usize];
+    let data = svindex_u64((1usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svdup_n_u64(storage.as_ptr() as u64);
+    let offsets = svindex_u64(0, 8u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b64(), bases, offsets);
+    svst1_scatter_u64base_offset_u64(svptrue_b64(), bases, 8u32.try_into().unwrap(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u64 || val == i as u64);
+    }
+    svsetffr();
+    let loaded = svld1_gather_u64base_offset_u64(svptrue_b64(), bases, 8u32.try_into().unwrap());
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1_vnum_f32_with_svst1_vnum_f32() {
+    let len = svcntw() as usize;
+    let mut storage = [0 as f32; 320usize];
+    let data = svcvt_f32_s32_x(
+        svptrue_b32(),
+        svindex_s32(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+    svst1_vnum_f32(svptrue_b32(), storage.as_mut_ptr(), 1, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as f32 || val == i as f32);
+    }
+    svsetffr();
+    let loaded = svld1_vnum_f32(svptrue_b32(), storage.as_ptr() as *const f32, 1);
+    assert_vector_matches_f32(
+        loaded,
+        svcvt_f32_s32_x(
+            svptrue_b32(),
+            svindex_s32(
+                (len + 0usize).try_into().unwrap(),
+                1usize.try_into().unwrap(),
+            ),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1_vnum_f64_with_svst1_vnum_f64() {
+    let len = svcntd() as usize;
+    let mut storage = [0 as f64; 160usize];
+    let data = svcvt_f64_s64_x(
+        svptrue_b64(),
+        svindex_s64(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+    svst1_vnum_f64(svptrue_b64(), storage.as_mut_ptr(), 1, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as f64 || val == i as f64);
+    }
+    svsetffr();
+    let loaded = svld1_vnum_f64(svptrue_b64(), storage.as_ptr() as *const f64, 1);
+    assert_vector_matches_f64(
+        loaded,
+        svcvt_f64_s64_x(
+            svptrue_b64(),
+            svindex_s64(
+                (len + 0usize).try_into().unwrap(),
+                1usize.try_into().unwrap(),
+            ),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1_vnum_s8_with_svst1_vnum_s8() {
+    let len = svcntb() as usize;
+    let mut storage = [0 as i8; 1280usize];
+    let data = svindex_s8(
+        (len + 0usize).try_into().unwrap(),
+        1usize.try_into().unwrap(),
+    );
+    svst1_vnum_s8(svptrue_b8(), storage.as_mut_ptr(), 1, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i8 || val == i as i8);
+    }
+    svsetffr();
+    let loaded = svld1_vnum_s8(svptrue_b8(), storage.as_ptr() as *const i8, 1);
+    assert_vector_matches_i8(
+        loaded,
+        svindex_s8(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1_vnum_s16_with_svst1_vnum_s16() {
+    let len = svcnth() as usize;
+    let mut storage = [0 as i16; 640usize];
+    let data = svindex_s16(
+        (len + 0usize).try_into().unwrap(),
+        1usize.try_into().unwrap(),
+    );
+    svst1_vnum_s16(svptrue_b16(), storage.as_mut_ptr(), 1, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i16 || val == i as i16);
+    }
+    svsetffr();
+    let loaded = svld1_vnum_s16(svptrue_b16(), storage.as_ptr() as *const i16, 1);
+    assert_vector_matches_i16(
+        loaded,
+        svindex_s16(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1_vnum_s32_with_svst1_vnum_s32() {
+    let len = svcntw() as usize;
+    let mut storage = [0 as i32; 320usize];
+    let data = svindex_s32(
+        (len + 0usize).try_into().unwrap(),
+        1usize.try_into().unwrap(),
+    );
+    svst1_vnum_s32(svptrue_b32(), storage.as_mut_ptr(), 1, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i32 || val == i as i32);
+    }
+    svsetffr();
+    let loaded = svld1_vnum_s32(svptrue_b32(), storage.as_ptr() as *const i32, 1);
+    assert_vector_matches_i32(
+        loaded,
+        svindex_s32(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1_vnum_s64_with_svst1_vnum_s64() {
+    let len = svcntd() as usize;
+    let mut storage = [0 as i64; 160usize];
+    let data = svindex_s64(
+        (len + 0usize).try_into().unwrap(),
+        1usize.try_into().unwrap(),
+    );
+    svst1_vnum_s64(svptrue_b64(), storage.as_mut_ptr(), 1, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i64 || val == i as i64);
+    }
+    svsetffr();
+    let loaded = svld1_vnum_s64(svptrue_b64(), storage.as_ptr() as *const i64, 1);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1_vnum_u8_with_svst1_vnum_u8() {
+    let len = svcntb() as usize;
+    let mut storage = [0 as u8; 1280usize];
+    let data = svindex_u8(
+        (len + 0usize).try_into().unwrap(),
+        1usize.try_into().unwrap(),
+    );
+    svst1_vnum_u8(svptrue_b8(), storage.as_mut_ptr(), 1, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u8 || val == i as u8);
+    }
+    svsetffr();
+    let loaded = svld1_vnum_u8(svptrue_b8(), storage.as_ptr() as *const u8, 1);
+    assert_vector_matches_u8(
+        loaded,
+        svindex_u8(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1_vnum_u16_with_svst1_vnum_u16() {
+    let len = svcnth() as usize;
+    let mut storage = [0 as u16; 640usize];
+    let data = svindex_u16(
+        (len + 0usize).try_into().unwrap(),
+        1usize.try_into().unwrap(),
+    );
+    svst1_vnum_u16(svptrue_b16(), storage.as_mut_ptr(), 1, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u16 || val == i as u16);
+    }
+    svsetffr();
+    let loaded = svld1_vnum_u16(svptrue_b16(), storage.as_ptr() as *const u16, 1);
+    assert_vector_matches_u16(
+        loaded,
+        svindex_u16(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1_vnum_u32_with_svst1_vnum_u32() {
+    let len = svcntw() as usize;
+    let mut storage = [0 as u32; 320usize];
+    let data = svindex_u32(
+        (len + 0usize).try_into().unwrap(),
+        1usize.try_into().unwrap(),
+    );
+    svst1_vnum_u32(svptrue_b32(), storage.as_mut_ptr(), 1, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u32 || val == i as u32);
+    }
+    svsetffr();
+    let loaded = svld1_vnum_u32(svptrue_b32(), storage.as_ptr() as *const u32, 1);
+    assert_vector_matches_u32(
+        loaded,
+        svindex_u32(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1_vnum_u64_with_svst1_vnum_u64() {
+    let len = svcntd() as usize;
+    let mut storage = [0 as u64; 160usize];
+    let data = svindex_u64(
+        (len + 0usize).try_into().unwrap(),
+        1usize.try_into().unwrap(),
+    );
+    svst1_vnum_u64(svptrue_b64(), storage.as_mut_ptr(), 1, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u64 || val == i as u64);
+    }
+    svsetffr();
+    let loaded = svld1_vnum_u64(svptrue_b64(), storage.as_ptr() as *const u64, 1);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve,f64mm")]
+unsafe fn test_svld1ro_f32() {
+    if svcntb() < 32 {
+        println!("Skipping test_svld1ro_f32 due to SVE vector length");
+        return;
+    }
+    svsetffr();
+    let loaded = svld1ro_f32(svptrue_b32(), F32_DATA.as_ptr());
+    assert_vector_matches_f32(
+        loaded,
+        svtrn1q_f32(
+            svdupq_n_f32(0usize as f32, 1usize as f32, 2usize as f32, 3usize as f32),
+            svdupq_n_f32(4usize as f32, 5usize as f32, 6usize as f32, 7usize as f32),
+        ),
+    );
+}
+#[simd_test(enable = "sve,f64mm")]
+unsafe fn test_svld1ro_f64() {
+    if svcntb() < 32 {
+        println!("Skipping test_svld1ro_f64 due to SVE vector length");
+        return;
+    }
+    svsetffr();
+    let loaded = svld1ro_f64(svptrue_b64(), F64_DATA.as_ptr());
+    assert_vector_matches_f64(
+        loaded,
+        svtrn1q_f64(
+            svdupq_n_f64(0usize as f64, 1usize as f64),
+            svdupq_n_f64(2usize as f64, 3usize as f64),
+        ),
+    );
+}
+#[simd_test(enable = "sve,f64mm")]
+unsafe fn test_svld1ro_s8() {
+    if svcntb() < 32 {
+        println!("Skipping test_svld1ro_s8 due to SVE vector length");
+        return;
+    }
+    svsetffr();
+    let loaded = svld1ro_s8(svptrue_b8(), I8_DATA.as_ptr());
+    assert_vector_matches_i8(
+        loaded,
+        svtrn1q_s8(
+            svdupq_n_s8(
+                0usize as i8,
+                1usize as i8,
+                2usize as i8,
+                3usize as i8,
+                4usize as i8,
+                5usize as i8,
+                6usize as i8,
+                7usize as i8,
+                8usize as i8,
+                9usize as i8,
+                10usize as i8,
+                11usize as i8,
+                12usize as i8,
+                13usize as i8,
+                14usize as i8,
+                15usize as i8,
+            ),
+            svdupq_n_s8(
+                16usize as i8,
+                17usize as i8,
+                18usize as i8,
+                19usize as i8,
+                20usize as i8,
+                21usize as i8,
+                22usize as i8,
+                23usize as i8,
+                24usize as i8,
+                25usize as i8,
+                26usize as i8,
+                27usize as i8,
+                28usize as i8,
+                29usize as i8,
+                30usize as i8,
+                31usize as i8,
+            ),
+        ),
+    );
+}
+#[simd_test(enable = "sve,f64mm")]
+unsafe fn test_svld1ro_s16() {
+    if svcntb() < 32 {
+        println!("Skipping test_svld1ro_s16 due to SVE vector length");
+        return;
+    }
+    svsetffr();
+    let loaded = svld1ro_s16(svptrue_b16(), I16_DATA.as_ptr());
+    assert_vector_matches_i16(
+        loaded,
+        svtrn1q_s16(
+            svdupq_n_s16(
+                0usize as i16,
+                1usize as i16,
+                2usize as i16,
+                3usize as i16,
+                4usize as i16,
+                5usize as i16,
+                6usize as i16,
+                7usize as i16,
+            ),
+            svdupq_n_s16(
+                8usize as i16,
+                9usize as i16,
+                10usize as i16,
+                11usize as i16,
+                12usize as i16,
+                13usize as i16,
+                14usize as i16,
+                15usize as i16,
+            ),
+        ),
+    );
+}
+#[simd_test(enable = "sve,f64mm")]
+unsafe fn test_svld1ro_s32() {
+    if svcntb() < 32 {
+        println!("Skipping test_svld1ro_s32 due to SVE vector length");
+        return;
+    }
+    svsetffr();
+    let loaded = svld1ro_s32(svptrue_b32(), I32_DATA.as_ptr());
+    assert_vector_matches_i32(
+        loaded,
+        svtrn1q_s32(
+            svdupq_n_s32(0usize as i32, 1usize as i32, 2usize as i32, 3usize as i32),
+            svdupq_n_s32(4usize as i32, 5usize as i32, 6usize as i32, 7usize as i32),
+        ),
+    );
+}
+#[simd_test(enable = "sve,f64mm")]
+unsafe fn test_svld1ro_s64() {
+    if svcntb() < 32 {
+        println!("Skipping test_svld1ro_s64 due to SVE vector length");
+        return;
+    }
+    svsetffr();
+    let loaded = svld1ro_s64(svptrue_b64(), I64_DATA.as_ptr());
+    assert_vector_matches_i64(
+        loaded,
+        svtrn1q_s64(
+            svdupq_n_s64(0usize as i64, 1usize as i64),
+            svdupq_n_s64(2usize as i64, 3usize as i64),
+        ),
+    );
+}
+#[simd_test(enable = "sve,f64mm")]
+unsafe fn test_svld1ro_u8() {
+    if svcntb() < 32 {
+        println!("Skipping test_svld1ro_u8 due to SVE vector length");
+        return;
+    }
+    svsetffr();
+    let loaded = svld1ro_u8(svptrue_b8(), U8_DATA.as_ptr());
+    assert_vector_matches_u8(
+        loaded,
+        svtrn1q_u8(
+            svdupq_n_u8(
+                0usize as u8,
+                1usize as u8,
+                2usize as u8,
+                3usize as u8,
+                4usize as u8,
+                5usize as u8,
+                6usize as u8,
+                7usize as u8,
+                8usize as u8,
+                9usize as u8,
+                10usize as u8,
+                11usize as u8,
+                12usize as u8,
+                13usize as u8,
+                14usize as u8,
+                15usize as u8,
+            ),
+            svdupq_n_u8(
+                16usize as u8,
+                17usize as u8,
+                18usize as u8,
+                19usize as u8,
+                20usize as u8,
+                21usize as u8,
+                22usize as u8,
+                23usize as u8,
+                24usize as u8,
+                25usize as u8,
+                26usize as u8,
+                27usize as u8,
+                28usize as u8,
+                29usize as u8,
+                30usize as u8,
+                31usize as u8,
+            ),
+        ),
+    );
+}
+#[simd_test(enable = "sve,f64mm")]
+unsafe fn test_svld1ro_u16() {
+    if svcntb() < 32 {
+        println!("Skipping test_svld1ro_u16 due to SVE vector length");
+        return;
+    }
+    svsetffr();
+    let loaded = svld1ro_u16(svptrue_b16(), U16_DATA.as_ptr());
+    assert_vector_matches_u16(
+        loaded,
+        svtrn1q_u16(
+            svdupq_n_u16(
+                0usize as u16,
+                1usize as u16,
+                2usize as u16,
+                3usize as u16,
+                4usize as u16,
+                5usize as u16,
+                6usize as u16,
+                7usize as u16,
+            ),
+            svdupq_n_u16(
+                8usize as u16,
+                9usize as u16,
+                10usize as u16,
+                11usize as u16,
+                12usize as u16,
+                13usize as u16,
+                14usize as u16,
+                15usize as u16,
+            ),
+        ),
+    );
+}
+#[simd_test(enable = "sve,f64mm")]
+unsafe fn test_svld1ro_u32() {
+    if svcntb() < 32 {
+        println!("Skipping test_svld1ro_u32 due to SVE vector length");
+        return;
+    }
+    svsetffr();
+    let loaded = svld1ro_u32(svptrue_b32(), U32_DATA.as_ptr());
+    assert_vector_matches_u32(
+        loaded,
+        svtrn1q_u32(
+            svdupq_n_u32(0usize as u32, 1usize as u32, 2usize as u32, 3usize as u32),
+            svdupq_n_u32(4usize as u32, 5usize as u32, 6usize as u32, 7usize as u32),
+        ),
+    );
+}
+#[simd_test(enable = "sve,f64mm")]
+unsafe fn test_svld1ro_u64() {
+    if svcntb() < 32 {
+        println!("Skipping test_svld1ro_u64 due to SVE vector length");
+        return;
+    }
+    svsetffr();
+    let loaded = svld1ro_u64(svptrue_b64(), U64_DATA.as_ptr());
+    assert_vector_matches_u64(
+        loaded,
+        svtrn1q_u64(
+            svdupq_n_u64(0usize as u64, 1usize as u64),
+            svdupq_n_u64(2usize as u64, 3usize as u64),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1rq_f32() {
+    svsetffr();
+    let loaded = svld1rq_f32(svptrue_b32(), F32_DATA.as_ptr());
+    assert_vector_matches_f32(
+        loaded,
+        svdupq_n_f32(0usize as f32, 1usize as f32, 2usize as f32, 3usize as f32),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1rq_f64() {
+    svsetffr();
+    let loaded = svld1rq_f64(svptrue_b64(), F64_DATA.as_ptr());
+    assert_vector_matches_f64(loaded, svdupq_n_f64(0usize as f64, 1usize as f64));
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1rq_s8() {
+    svsetffr();
+    let loaded = svld1rq_s8(svptrue_b8(), I8_DATA.as_ptr());
+    assert_vector_matches_i8(
+        loaded,
+        svdupq_n_s8(
+            0usize as i8,
+            1usize as i8,
+            2usize as i8,
+            3usize as i8,
+            4usize as i8,
+            5usize as i8,
+            6usize as i8,
+            7usize as i8,
+            8usize as i8,
+            9usize as i8,
+            10usize as i8,
+            11usize as i8,
+            12usize as i8,
+            13usize as i8,
+            14usize as i8,
+            15usize as i8,
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1rq_s16() {
+    svsetffr();
+    let loaded = svld1rq_s16(svptrue_b16(), I16_DATA.as_ptr());
+    assert_vector_matches_i16(
+        loaded,
+        svdupq_n_s16(
+            0usize as i16,
+            1usize as i16,
+            2usize as i16,
+            3usize as i16,
+            4usize as i16,
+            5usize as i16,
+            6usize as i16,
+            7usize as i16,
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1rq_s32() {
+    svsetffr();
+    let loaded = svld1rq_s32(svptrue_b32(), I32_DATA.as_ptr());
+    assert_vector_matches_i32(
+        loaded,
+        svdupq_n_s32(0usize as i32, 1usize as i32, 2usize as i32, 3usize as i32),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1rq_s64() {
+    svsetffr();
+    let loaded = svld1rq_s64(svptrue_b64(), I64_DATA.as_ptr());
+    assert_vector_matches_i64(loaded, svdupq_n_s64(0usize as i64, 1usize as i64));
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1rq_u8() {
+    svsetffr();
+    let loaded = svld1rq_u8(svptrue_b8(), U8_DATA.as_ptr());
+    assert_vector_matches_u8(
+        loaded,
+        svdupq_n_u8(
+            0usize as u8,
+            1usize as u8,
+            2usize as u8,
+            3usize as u8,
+            4usize as u8,
+            5usize as u8,
+            6usize as u8,
+            7usize as u8,
+            8usize as u8,
+            9usize as u8,
+            10usize as u8,
+            11usize as u8,
+            12usize as u8,
+            13usize as u8,
+            14usize as u8,
+            15usize as u8,
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1rq_u16() {
+    svsetffr();
+    let loaded = svld1rq_u16(svptrue_b16(), U16_DATA.as_ptr());
+    assert_vector_matches_u16(
+        loaded,
+        svdupq_n_u16(
+            0usize as u16,
+            1usize as u16,
+            2usize as u16,
+            3usize as u16,
+            4usize as u16,
+            5usize as u16,
+            6usize as u16,
+            7usize as u16,
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1rq_u32() {
+    svsetffr();
+    let loaded = svld1rq_u32(svptrue_b32(), U32_DATA.as_ptr());
+    assert_vector_matches_u32(
+        loaded,
+        svdupq_n_u32(0usize as u32, 1usize as u32, 2usize as u32, 3usize as u32),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1rq_u64() {
+    svsetffr();
+    let loaded = svld1rq_u64(svptrue_b64(), U64_DATA.as_ptr());
+    assert_vector_matches_u64(loaded, svdupq_n_u64(0usize as u64, 1usize as u64));
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1sb_gather_s32offset_s32_with_svst1b_scatter_s32offset_s32() {
+    let mut storage = [0 as i8; 1280usize];
+    let data = svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let offsets = svindex_s32(0, 1u32.try_into().unwrap());
+    svst1b_scatter_s32offset_s32(svptrue_b8(), storage.as_mut_ptr(), offsets, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i8 || val == i as i8);
+    }
+    svsetffr();
+    let loaded = svld1sb_gather_s32offset_s32(svptrue_b8(), storage.as_ptr() as *const i8, offsets);
+    assert_vector_matches_i32(
+        loaded,
+        svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1sh_gather_s32offset_s32_with_svst1h_scatter_s32offset_s32() {
+    let mut storage = [0 as i16; 640usize];
+    let data = svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let offsets = svindex_s32(0, 2u32.try_into().unwrap());
+    svst1h_scatter_s32offset_s32(svptrue_b16(), storage.as_mut_ptr(), offsets, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i16 || val == i as i16);
+    }
+    svsetffr();
+    let loaded =
+        svld1sh_gather_s32offset_s32(svptrue_b16(), storage.as_ptr() as *const i16, offsets);
+    assert_vector_matches_i32(
+        loaded,
+        svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1sb_gather_s32offset_u32_with_svst1b_scatter_s32offset_u32() {
+    let mut storage = [0 as u8; 1280usize];
+    let data = svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let offsets = svindex_s32(0, 1u32.try_into().unwrap());
+    svst1b_scatter_s32offset_u32(svptrue_b8(), storage.as_mut_ptr(), offsets, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u8 || val == i as u8);
+    }
+    svsetffr();
+    let loaded = svld1sb_gather_s32offset_u32(svptrue_b8(), storage.as_ptr() as *const i8, offsets);
+    assert_vector_matches_u32(
+        loaded,
+        svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1sh_gather_s32offset_u32_with_svst1h_scatter_s32offset_u32() {
+    let mut storage = [0 as u16; 640usize];
+    let data = svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let offsets = svindex_s32(0, 2u32.try_into().unwrap());
+    svst1h_scatter_s32offset_u32(svptrue_b16(), storage.as_mut_ptr(), offsets, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u16 || val == i as u16);
+    }
+    svsetffr();
+    let loaded =
+        svld1sh_gather_s32offset_u32(svptrue_b16(), storage.as_ptr() as *const i16, offsets);
+    assert_vector_matches_u32(
+        loaded,
+        svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1sb_gather_s64offset_s64_with_svst1b_scatter_s64offset_s64() {
+    let mut storage = [0 as i8; 1280usize];
+    let data = svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let offsets = svindex_s64(0, 1u32.try_into().unwrap());
+    svst1b_scatter_s64offset_s64(svptrue_b8(), storage.as_mut_ptr(), offsets, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i8 || val == i as i8);
+    }
+    svsetffr();
+    let loaded = svld1sb_gather_s64offset_s64(svptrue_b8(), storage.as_ptr() as *const i8, offsets);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1sh_gather_s64offset_s64_with_svst1h_scatter_s64offset_s64() {
+    let mut storage = [0 as i16; 640usize];
+    let data = svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let offsets = svindex_s64(0, 2u32.try_into().unwrap());
+    svst1h_scatter_s64offset_s64(svptrue_b16(), storage.as_mut_ptr(), offsets, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i16 || val == i as i16);
+    }
+    svsetffr();
+    let loaded =
+        svld1sh_gather_s64offset_s64(svptrue_b16(), storage.as_ptr() as *const i16, offsets);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1sw_gather_s64offset_s64_with_svst1w_scatter_s64offset_s64() {
+    let mut storage = [0 as i32; 320usize];
+    let data = svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let offsets = svindex_s64(0, 4u32.try_into().unwrap());
+    svst1w_scatter_s64offset_s64(svptrue_b32(), storage.as_mut_ptr(), offsets, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i32 || val == i as i32);
+    }
+    svsetffr();
+    let loaded =
+        svld1sw_gather_s64offset_s64(svptrue_b32(), storage.as_ptr() as *const i32, offsets);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1sb_gather_s64offset_u64_with_svst1b_scatter_s64offset_u64() {
+    let mut storage = [0 as u8; 1280usize];
+    let data = svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let offsets = svindex_s64(0, 1u32.try_into().unwrap());
+    svst1b_scatter_s64offset_u64(svptrue_b8(), storage.as_mut_ptr(), offsets, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u8 || val == i as u8);
+    }
+    svsetffr();
+    let loaded = svld1sb_gather_s64offset_u64(svptrue_b8(), storage.as_ptr() as *const i8, offsets);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1sh_gather_s64offset_u64_with_svst1h_scatter_s64offset_u64() {
+    let mut storage = [0 as u16; 640usize];
+    let data = svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let offsets = svindex_s64(0, 2u32.try_into().unwrap());
+    svst1h_scatter_s64offset_u64(svptrue_b16(), storage.as_mut_ptr(), offsets, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u16 || val == i as u16);
+    }
+    svsetffr();
+    let loaded =
+        svld1sh_gather_s64offset_u64(svptrue_b16(), storage.as_ptr() as *const i16, offsets);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1sw_gather_s64offset_u64_with_svst1w_scatter_s64offset_u64() {
+    let mut storage = [0 as u32; 320usize];
+    let data = svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let offsets = svindex_s64(0, 4u32.try_into().unwrap());
+    svst1w_scatter_s64offset_u64(svptrue_b32(), storage.as_mut_ptr(), offsets, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u32 || val == i as u32);
+    }
+    svsetffr();
+    let loaded =
+        svld1sw_gather_s64offset_u64(svptrue_b32(), storage.as_ptr() as *const i32, offsets);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1sb_gather_u32offset_s32_with_svst1b_scatter_u32offset_s32() {
+    let mut storage = [0 as i8; 1280usize];
+    let data = svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let offsets = svindex_u32(0, 1u32.try_into().unwrap());
+    svst1b_scatter_u32offset_s32(svptrue_b8(), storage.as_mut_ptr(), offsets, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i8 || val == i as i8);
+    }
+    svsetffr();
+    let loaded = svld1sb_gather_u32offset_s32(svptrue_b8(), storage.as_ptr() as *const i8, offsets);
+    assert_vector_matches_i32(
+        loaded,
+        svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1sh_gather_u32offset_s32_with_svst1h_scatter_u32offset_s32() {
+    let mut storage = [0 as i16; 640usize];
+    let data = svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let offsets = svindex_u32(0, 2u32.try_into().unwrap());
+    svst1h_scatter_u32offset_s32(svptrue_b16(), storage.as_mut_ptr(), offsets, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i16 || val == i as i16);
+    }
+    svsetffr();
+    let loaded =
+        svld1sh_gather_u32offset_s32(svptrue_b16(), storage.as_ptr() as *const i16, offsets);
+    assert_vector_matches_i32(
+        loaded,
+        svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1sb_gather_u32offset_u32_with_svst1b_scatter_u32offset_u32() {
+    let mut storage = [0 as u8; 1280usize];
+    let data = svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let offsets = svindex_u32(0, 1u32.try_into().unwrap());
+    svst1b_scatter_u32offset_u32(svptrue_b8(), storage.as_mut_ptr(), offsets, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u8 || val == i as u8);
+    }
+    svsetffr();
+    let loaded = svld1sb_gather_u32offset_u32(svptrue_b8(), storage.as_ptr() as *const i8, offsets);
+    assert_vector_matches_u32(
+        loaded,
+        svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1sh_gather_u32offset_u32_with_svst1h_scatter_u32offset_u32() {
+    let mut storage = [0 as u16; 640usize];
+    let data = svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let offsets = svindex_u32(0, 2u32.try_into().unwrap());
+    svst1h_scatter_u32offset_u32(svptrue_b16(), storage.as_mut_ptr(), offsets, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u16 || val == i as u16);
+    }
+    svsetffr();
+    let loaded =
+        svld1sh_gather_u32offset_u32(svptrue_b16(), storage.as_ptr() as *const i16, offsets);
+    assert_vector_matches_u32(
+        loaded,
+        svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1sb_gather_u64offset_s64_with_svst1b_scatter_u64offset_s64() {
+    let mut storage = [0 as i8; 1280usize];
+    let data = svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let offsets = svindex_u64(0, 1u32.try_into().unwrap());
+    svst1b_scatter_u64offset_s64(svptrue_b8(), storage.as_mut_ptr(), offsets, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i8 || val == i as i8);
+    }
+    svsetffr();
+    let loaded = svld1sb_gather_u64offset_s64(svptrue_b8(), storage.as_ptr() as *const i8, offsets);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1sh_gather_u64offset_s64_with_svst1h_scatter_u64offset_s64() {
+    let mut storage = [0 as i16; 640usize];
+    let data = svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let offsets = svindex_u64(0, 2u32.try_into().unwrap());
+    svst1h_scatter_u64offset_s64(svptrue_b16(), storage.as_mut_ptr(), offsets, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i16 || val == i as i16);
+    }
+    svsetffr();
+    let loaded =
+        svld1sh_gather_u64offset_s64(svptrue_b16(), storage.as_ptr() as *const i16, offsets);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1sw_gather_u64offset_s64_with_svst1w_scatter_u64offset_s64() {
+    let mut storage = [0 as i32; 320usize];
+    let data = svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let offsets = svindex_u64(0, 4u32.try_into().unwrap());
+    svst1w_scatter_u64offset_s64(svptrue_b32(), storage.as_mut_ptr(), offsets, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i32 || val == i as i32);
+    }
+    svsetffr();
+    let loaded =
+        svld1sw_gather_u64offset_s64(svptrue_b32(), storage.as_ptr() as *const i32, offsets);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1sb_gather_u64offset_u64_with_svst1b_scatter_u64offset_u64() {
+    let mut storage = [0 as u8; 1280usize];
+    let data = svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let offsets = svindex_u64(0, 1u32.try_into().unwrap());
+    svst1b_scatter_u64offset_u64(svptrue_b8(), storage.as_mut_ptr(), offsets, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u8 || val == i as u8);
+    }
+    svsetffr();
+    let loaded = svld1sb_gather_u64offset_u64(svptrue_b8(), storage.as_ptr() as *const i8, offsets);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1sh_gather_u64offset_u64_with_svst1h_scatter_u64offset_u64() {
+    let mut storage = [0 as u16; 640usize];
+    let data = svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let offsets = svindex_u64(0, 2u32.try_into().unwrap());
+    svst1h_scatter_u64offset_u64(svptrue_b16(), storage.as_mut_ptr(), offsets, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u16 || val == i as u16);
+    }
+    svsetffr();
+    let loaded =
+        svld1sh_gather_u64offset_u64(svptrue_b16(), storage.as_ptr() as *const i16, offsets);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1sw_gather_u64offset_u64_with_svst1w_scatter_u64offset_u64() {
+    let mut storage = [0 as u32; 320usize];
+    let data = svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let offsets = svindex_u64(0, 4u32.try_into().unwrap());
+    svst1w_scatter_u64offset_u64(svptrue_b32(), storage.as_mut_ptr(), offsets, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u32 || val == i as u32);
+    }
+    svsetffr();
+    let loaded =
+        svld1sw_gather_u64offset_u64(svptrue_b32(), storage.as_ptr() as *const i32, offsets);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1sb_gather_u32base_offset_s32_with_svst1b_scatter_u32base_offset_s32() {
+    let mut storage = [0 as i8; 1280usize];
+    let data = svindex_s32((1usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svindex_u32(0, 1u32.try_into().unwrap());
+    svst1b_scatter_u32base_offset_s32(
+        svptrue_b8(),
+        bases,
+        storage.as_ptr() as i64 + 1u32 as i64,
+        data,
+    );
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i8 || val == i as i8);
+    }
+    svsetffr();
+    let loaded = svld1sb_gather_u32base_offset_s32(
+        svptrue_b8(),
+        bases,
+        storage.as_ptr() as i64 + 1u32 as i64,
+    );
+    assert_vector_matches_i32(
+        loaded,
+        svindex_s32((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1sh_gather_u32base_offset_s32_with_svst1h_scatter_u32base_offset_s32() {
+    let mut storage = [0 as i16; 640usize];
+    let data = svindex_s32((1usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svindex_u32(0, 2u32.try_into().unwrap());
+    svst1h_scatter_u32base_offset_s32(
+        svptrue_b16(),
+        bases,
+        storage.as_ptr() as i64 + 2u32 as i64,
+        data,
+    );
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i16 || val == i as i16);
+    }
+    svsetffr();
+    let loaded = svld1sh_gather_u32base_offset_s32(
+        svptrue_b16(),
+        bases,
+        storage.as_ptr() as i64 + 2u32 as i64,
+    );
+    assert_vector_matches_i32(
+        loaded,
+        svindex_s32((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1sb_gather_u32base_offset_u32_with_svst1b_scatter_u32base_offset_u32() {
+    let mut storage = [0 as i8; 1280usize];
+    let data = svindex_u32((1usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svindex_u32(0, 1u32.try_into().unwrap());
+    svst1b_scatter_u32base_offset_u32(
+        svptrue_b8(),
+        bases,
+        storage.as_ptr() as i64 + 1u32 as i64,
+        data,
+    );
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i8 || val == i as i8);
+    }
+    svsetffr();
+    let loaded = svld1sb_gather_u32base_offset_u32(
+        svptrue_b8(),
+        bases,
+        storage.as_ptr() as i64 + 1u32 as i64,
+    );
+    assert_vector_matches_u32(
+        loaded,
+        svindex_u32((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1sh_gather_u32base_offset_u32_with_svst1h_scatter_u32base_offset_u32() {
+    let mut storage = [0 as i16; 640usize];
+    let data = svindex_u32((1usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svindex_u32(0, 2u32.try_into().unwrap());
+    svst1h_scatter_u32base_offset_u32(
+        svptrue_b16(),
+        bases,
+        storage.as_ptr() as i64 + 2u32 as i64,
+        data,
+    );
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i16 || val == i as i16);
+    }
+    svsetffr();
+    let loaded = svld1sh_gather_u32base_offset_u32(
+        svptrue_b16(),
+        bases,
+        storage.as_ptr() as i64 + 2u32 as i64,
+    );
+    assert_vector_matches_u32(
+        loaded,
+        svindex_u32((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1sb_gather_u64base_offset_s64_with_svst1b_scatter_u64base_offset_s64() {
+    let mut storage = [0 as i8; 1280usize];
+    let data = svindex_s64((1usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svdup_n_u64(storage.as_ptr() as u64);
+    let offsets = svindex_u64(0, 1u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b8(), bases, offsets);
+    svst1b_scatter_u64base_offset_s64(svptrue_b8(), bases, 1u32.try_into().unwrap(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i8 || val == i as i8);
+    }
+    svsetffr();
+    let loaded = svld1sb_gather_u64base_offset_s64(svptrue_b8(), bases, 1u32.try_into().unwrap());
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1sh_gather_u64base_offset_s64_with_svst1h_scatter_u64base_offset_s64() {
+    let mut storage = [0 as i16; 640usize];
+    let data = svindex_s64((1usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svdup_n_u64(storage.as_ptr() as u64);
+    let offsets = svindex_u64(0, 2u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b16(), bases, offsets);
+    svst1h_scatter_u64base_offset_s64(svptrue_b16(), bases, 2u32.try_into().unwrap(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i16 || val == i as i16);
+    }
+    svsetffr();
+    let loaded = svld1sh_gather_u64base_offset_s64(svptrue_b16(), bases, 2u32.try_into().unwrap());
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1sw_gather_u64base_offset_s64_with_svst1w_scatter_u64base_offset_s64() {
+    let mut storage = [0 as i32; 320usize];
+    let data = svindex_s64((1usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svdup_n_u64(storage.as_ptr() as u64);
+    let offsets = svindex_u64(0, 4u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b32(), bases, offsets);
+    svst1w_scatter_u64base_offset_s64(svptrue_b32(), bases, 4u32.try_into().unwrap(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i32 || val == i as i32);
+    }
+    svsetffr();
+    let loaded = svld1sw_gather_u64base_offset_s64(svptrue_b32(), bases, 4u32.try_into().unwrap());
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1sb_gather_u64base_offset_u64_with_svst1b_scatter_u64base_offset_u64() {
+    let mut storage = [0 as i8; 1280usize];
+    let data = svindex_u64((1usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svdup_n_u64(storage.as_ptr() as u64);
+    let offsets = svindex_u64(0, 1u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b8(), bases, offsets);
+    svst1b_scatter_u64base_offset_u64(svptrue_b8(), bases, 1u32.try_into().unwrap(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i8 || val == i as i8);
+    }
+    svsetffr();
+    let loaded = svld1sb_gather_u64base_offset_u64(svptrue_b8(), bases, 1u32.try_into().unwrap());
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1sh_gather_u64base_offset_u64_with_svst1h_scatter_u64base_offset_u64() {
+    let mut storage = [0 as i16; 640usize];
+    let data = svindex_u64((1usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svdup_n_u64(storage.as_ptr() as u64);
+    let offsets = svindex_u64(0, 2u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b16(), bases, offsets);
+    svst1h_scatter_u64base_offset_u64(svptrue_b16(), bases, 2u32.try_into().unwrap(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i16 || val == i as i16);
+    }
+    svsetffr();
+    let loaded = svld1sh_gather_u64base_offset_u64(svptrue_b16(), bases, 2u32.try_into().unwrap());
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1sw_gather_u64base_offset_u64_with_svst1w_scatter_u64base_offset_u64() {
+    let mut storage = [0 as i32; 320usize];
+    let data = svindex_u64((1usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svdup_n_u64(storage.as_ptr() as u64);
+    let offsets = svindex_u64(0, 4u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b32(), bases, offsets);
+    svst1w_scatter_u64base_offset_u64(svptrue_b32(), bases, 4u32.try_into().unwrap(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i32 || val == i as i32);
+    }
+    svsetffr();
+    let loaded = svld1sw_gather_u64base_offset_u64(svptrue_b32(), bases, 4u32.try_into().unwrap());
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1sb_gather_u64base_s64_with_svst1b_scatter_u64base_s64() {
+    let mut storage = [0 as i8; 1280usize];
+    let data = svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svdup_n_u64(storage.as_ptr() as u64);
+    let offsets = svindex_u64(0, 1u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b8(), bases, offsets);
+    svst1b_scatter_u64base_s64(svptrue_b8(), bases, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i8 || val == i as i8);
+    }
+    svsetffr();
+    let loaded = svld1sb_gather_u64base_s64(svptrue_b8(), bases);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1sh_gather_u64base_s64_with_svst1h_scatter_u64base_s64() {
+    let mut storage = [0 as i16; 640usize];
+    let data = svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svdup_n_u64(storage.as_ptr() as u64);
+    let offsets = svindex_u64(0, 2u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b16(), bases, offsets);
+    svst1h_scatter_u64base_s64(svptrue_b16(), bases, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i16 || val == i as i16);
+    }
+    svsetffr();
+    let loaded = svld1sh_gather_u64base_s64(svptrue_b16(), bases);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1sw_gather_u64base_s64_with_svst1w_scatter_u64base_s64() {
+    let mut storage = [0 as i32; 320usize];
+    let data = svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svdup_n_u64(storage.as_ptr() as u64);
+    let offsets = svindex_u64(0, 4u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b32(), bases, offsets);
+    svst1w_scatter_u64base_s64(svptrue_b32(), bases, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i32 || val == i as i32);
+    }
+    svsetffr();
+    let loaded = svld1sw_gather_u64base_s64(svptrue_b32(), bases);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1sb_gather_u64base_u64_with_svst1b_scatter_u64base_u64() {
+    let mut storage = [0 as i8; 1280usize];
+    let data = svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svdup_n_u64(storage.as_ptr() as u64);
+    let offsets = svindex_u64(0, 1u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b8(), bases, offsets);
+    svst1b_scatter_u64base_u64(svptrue_b8(), bases, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i8 || val == i as i8);
+    }
+    svsetffr();
+    let loaded = svld1sb_gather_u64base_u64(svptrue_b8(), bases);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1sh_gather_u64base_u64_with_svst1h_scatter_u64base_u64() {
+    let mut storage = [0 as i16; 640usize];
+    let data = svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svdup_n_u64(storage.as_ptr() as u64);
+    let offsets = svindex_u64(0, 2u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b16(), bases, offsets);
+    svst1h_scatter_u64base_u64(svptrue_b16(), bases, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i16 || val == i as i16);
+    }
+    svsetffr();
+    let loaded = svld1sh_gather_u64base_u64(svptrue_b16(), bases);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1sw_gather_u64base_u64_with_svst1w_scatter_u64base_u64() {
+    let mut storage = [0 as i32; 320usize];
+    let data = svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svdup_n_u64(storage.as_ptr() as u64);
+    let offsets = svindex_u64(0, 4u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b32(), bases, offsets);
+    svst1w_scatter_u64base_u64(svptrue_b32(), bases, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i32 || val == i as i32);
+    }
+    svsetffr();
+    let loaded = svld1sw_gather_u64base_u64(svptrue_b32(), bases);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1sb_s16_with_svst1b_s16() {
+    let mut storage = [0 as i8; 1280usize];
+    let data = svindex_s16((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    svst1b_s16(svptrue_b8(), storage.as_mut_ptr(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i8 || val == i as i8);
+    }
+    svsetffr();
+    let loaded = svld1sb_s16(svptrue_b8(), storage.as_ptr() as *const i8);
+    assert_vector_matches_i16(
+        loaded,
+        svindex_s16((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1sb_s32_with_svst1b_s32() {
+    let mut storage = [0 as i8; 1280usize];
+    let data = svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    svst1b_s32(svptrue_b8(), storage.as_mut_ptr(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i8 || val == i as i8);
+    }
+    svsetffr();
+    let loaded = svld1sb_s32(svptrue_b8(), storage.as_ptr() as *const i8);
+    assert_vector_matches_i32(
+        loaded,
+        svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1sh_s32_with_svst1h_s32() {
+    let mut storage = [0 as i16; 640usize];
+    let data = svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    svst1h_s32(svptrue_b16(), storage.as_mut_ptr(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i16 || val == i as i16);
+    }
+    svsetffr();
+    let loaded = svld1sh_s32(svptrue_b16(), storage.as_ptr() as *const i16);
+    assert_vector_matches_i32(
+        loaded,
+        svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1sb_s64_with_svst1b_s64() {
+    let mut storage = [0 as i8; 1280usize];
+    let data = svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    svst1b_s64(svptrue_b8(), storage.as_mut_ptr(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i8 || val == i as i8);
+    }
+    svsetffr();
+    let loaded = svld1sb_s64(svptrue_b8(), storage.as_ptr() as *const i8);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1sh_s64_with_svst1h_s64() {
+    let mut storage = [0 as i16; 640usize];
+    let data = svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    svst1h_s64(svptrue_b16(), storage.as_mut_ptr(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i16 || val == i as i16);
+    }
+    svsetffr();
+    let loaded = svld1sh_s64(svptrue_b16(), storage.as_ptr() as *const i16);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1sw_s64_with_svst1w_s64() {
+    let mut storage = [0 as i32; 320usize];
+    let data = svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    svst1w_s64(svptrue_b32(), storage.as_mut_ptr(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i32 || val == i as i32);
+    }
+    svsetffr();
+    let loaded = svld1sw_s64(svptrue_b32(), storage.as_ptr() as *const i32);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1sb_u16_with_svst1b_u16() {
+    let mut storage = [0 as u8; 1280usize];
+    let data = svindex_u16((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    svst1b_u16(svptrue_b8(), storage.as_mut_ptr(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u8 || val == i as u8);
+    }
+    svsetffr();
+    let loaded = svld1sb_u16(svptrue_b8(), storage.as_ptr() as *const i8);
+    assert_vector_matches_u16(
+        loaded,
+        svindex_u16((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1sb_u32_with_svst1b_u32() {
+    let mut storage = [0 as u8; 1280usize];
+    let data = svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    svst1b_u32(svptrue_b8(), storage.as_mut_ptr(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u8 || val == i as u8);
+    }
+    svsetffr();
+    let loaded = svld1sb_u32(svptrue_b8(), storage.as_ptr() as *const i8);
+    assert_vector_matches_u32(
+        loaded,
+        svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1sh_u32_with_svst1h_u32() {
+    let mut storage = [0 as u16; 640usize];
+    let data = svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    svst1h_u32(svptrue_b16(), storage.as_mut_ptr(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u16 || val == i as u16);
+    }
+    svsetffr();
+    let loaded = svld1sh_u32(svptrue_b16(), storage.as_ptr() as *const i16);
+    assert_vector_matches_u32(
+        loaded,
+        svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1sb_u64_with_svst1b_u64() {
+    let mut storage = [0 as u8; 1280usize];
+    let data = svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    svst1b_u64(svptrue_b8(), storage.as_mut_ptr(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u8 || val == i as u8);
+    }
+    svsetffr();
+    let loaded = svld1sb_u64(svptrue_b8(), storage.as_ptr() as *const i8);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1sh_u64_with_svst1h_u64() {
+    let mut storage = [0 as u16; 640usize];
+    let data = svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    svst1h_u64(svptrue_b16(), storage.as_mut_ptr(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u16 || val == i as u16);
+    }
+    svsetffr();
+    let loaded = svld1sh_u64(svptrue_b16(), storage.as_ptr() as *const i16);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1sw_u64_with_svst1w_u64() {
+    let mut storage = [0 as u32; 320usize];
+    let data = svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    svst1w_u64(svptrue_b32(), storage.as_mut_ptr(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u32 || val == i as u32);
+    }
+    svsetffr();
+    let loaded = svld1sw_u64(svptrue_b32(), storage.as_ptr() as *const i32);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1sb_vnum_s16_with_svst1b_vnum_s16() {
+    let len = svcnth() as usize;
+    let mut storage = [0 as i8; 1280usize];
+    let data = svindex_s16(
+        (len + 0usize).try_into().unwrap(),
+        1usize.try_into().unwrap(),
+    );
+    svst1b_vnum_s16(svptrue_b8(), storage.as_mut_ptr(), 1, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i8 || val == i as i8);
+    }
+    svsetffr();
+    let loaded = svld1sb_vnum_s16(svptrue_b8(), storage.as_ptr() as *const i8, 1);
+    assert_vector_matches_i16(
+        loaded,
+        svindex_s16(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1sb_vnum_s32_with_svst1b_vnum_s32() {
+    let len = svcntw() as usize;
+    let mut storage = [0 as i8; 1280usize];
+    let data = svindex_s32(
+        (len + 0usize).try_into().unwrap(),
+        1usize.try_into().unwrap(),
+    );
+    svst1b_vnum_s32(svptrue_b8(), storage.as_mut_ptr(), 1, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i8 || val == i as i8);
+    }
+    svsetffr();
+    let loaded = svld1sb_vnum_s32(svptrue_b8(), storage.as_ptr() as *const i8, 1);
+    assert_vector_matches_i32(
+        loaded,
+        svindex_s32(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1sh_vnum_s32_with_svst1h_vnum_s32() {
+    let len = svcntw() as usize;
+    let mut storage = [0 as i16; 640usize];
+    let data = svindex_s32(
+        (len + 0usize).try_into().unwrap(),
+        1usize.try_into().unwrap(),
+    );
+    svst1h_vnum_s32(svptrue_b16(), storage.as_mut_ptr(), 1, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i16 || val == i as i16);
+    }
+    svsetffr();
+    let loaded = svld1sh_vnum_s32(svptrue_b16(), storage.as_ptr() as *const i16, 1);
+    assert_vector_matches_i32(
+        loaded,
+        svindex_s32(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1sb_vnum_s64_with_svst1b_vnum_s64() {
+    let len = svcntd() as usize;
+    let mut storage = [0 as i8; 1280usize];
+    let data = svindex_s64(
+        (len + 0usize).try_into().unwrap(),
+        1usize.try_into().unwrap(),
+    );
+    svst1b_vnum_s64(svptrue_b8(), storage.as_mut_ptr(), 1, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i8 || val == i as i8);
+    }
+    svsetffr();
+    let loaded = svld1sb_vnum_s64(svptrue_b8(), storage.as_ptr() as *const i8, 1);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1sh_vnum_s64_with_svst1h_vnum_s64() {
+    let len = svcntd() as usize;
+    let mut storage = [0 as i16; 640usize];
+    let data = svindex_s64(
+        (len + 0usize).try_into().unwrap(),
+        1usize.try_into().unwrap(),
+    );
+    svst1h_vnum_s64(svptrue_b16(), storage.as_mut_ptr(), 1, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i16 || val == i as i16);
+    }
+    svsetffr();
+    let loaded = svld1sh_vnum_s64(svptrue_b16(), storage.as_ptr() as *const i16, 1);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1sw_vnum_s64_with_svst1w_vnum_s64() {
+    let len = svcntd() as usize;
+    let mut storage = [0 as i32; 320usize];
+    let data = svindex_s64(
+        (len + 0usize).try_into().unwrap(),
+        1usize.try_into().unwrap(),
+    );
+    svst1w_vnum_s64(svptrue_b32(), storage.as_mut_ptr(), 1, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i32 || val == i as i32);
+    }
+    svsetffr();
+    let loaded = svld1sw_vnum_s64(svptrue_b32(), storage.as_ptr() as *const i32, 1);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1sb_vnum_u16_with_svst1b_vnum_u16() {
+    let len = svcnth() as usize;
+    let mut storage = [0 as u8; 1280usize];
+    let data = svindex_u16(
+        (len + 0usize).try_into().unwrap(),
+        1usize.try_into().unwrap(),
+    );
+    svst1b_vnum_u16(svptrue_b8(), storage.as_mut_ptr(), 1, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u8 || val == i as u8);
+    }
+    svsetffr();
+    let loaded = svld1sb_vnum_u16(svptrue_b8(), storage.as_ptr() as *const i8, 1);
+    assert_vector_matches_u16(
+        loaded,
+        svindex_u16(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1sb_vnum_u32_with_svst1b_vnum_u32() {
+    let len = svcntw() as usize;
+    let mut storage = [0 as u8; 1280usize];
+    let data = svindex_u32(
+        (len + 0usize).try_into().unwrap(),
+        1usize.try_into().unwrap(),
+    );
+    svst1b_vnum_u32(svptrue_b8(), storage.as_mut_ptr(), 1, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u8 || val == i as u8);
+    }
+    svsetffr();
+    let loaded = svld1sb_vnum_u32(svptrue_b8(), storage.as_ptr() as *const i8, 1);
+    assert_vector_matches_u32(
+        loaded,
+        svindex_u32(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1sh_vnum_u32_with_svst1h_vnum_u32() {
+    let len = svcntw() as usize;
+    let mut storage = [0 as u16; 640usize];
+    let data = svindex_u32(
+        (len + 0usize).try_into().unwrap(),
+        1usize.try_into().unwrap(),
+    );
+    svst1h_vnum_u32(svptrue_b16(), storage.as_mut_ptr(), 1, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u16 || val == i as u16);
+    }
+    svsetffr();
+    let loaded = svld1sh_vnum_u32(svptrue_b16(), storage.as_ptr() as *const i16, 1);
+    assert_vector_matches_u32(
+        loaded,
+        svindex_u32(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1sb_vnum_u64_with_svst1b_vnum_u64() {
+    let len = svcntd() as usize;
+    let mut storage = [0 as u8; 1280usize];
+    let data = svindex_u64(
+        (len + 0usize).try_into().unwrap(),
+        1usize.try_into().unwrap(),
+    );
+    svst1b_vnum_u64(svptrue_b8(), storage.as_mut_ptr(), 1, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u8 || val == i as u8);
+    }
+    svsetffr();
+    let loaded = svld1sb_vnum_u64(svptrue_b8(), storage.as_ptr() as *const i8, 1);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1sh_vnum_u64_with_svst1h_vnum_u64() {
+    let len = svcntd() as usize;
+    let mut storage = [0 as u16; 640usize];
+    let data = svindex_u64(
+        (len + 0usize).try_into().unwrap(),
+        1usize.try_into().unwrap(),
+    );
+    svst1h_vnum_u64(svptrue_b16(), storage.as_mut_ptr(), 1, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u16 || val == i as u16);
+    }
+    svsetffr();
+    let loaded = svld1sh_vnum_u64(svptrue_b16(), storage.as_ptr() as *const i16, 1);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1sw_vnum_u64_with_svst1w_vnum_u64() {
+    let len = svcntd() as usize;
+    let mut storage = [0 as u32; 320usize];
+    let data = svindex_u64(
+        (len + 0usize).try_into().unwrap(),
+        1usize.try_into().unwrap(),
+    );
+    svst1w_vnum_u64(svptrue_b32(), storage.as_mut_ptr(), 1, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u32 || val == i as u32);
+    }
+    svsetffr();
+    let loaded = svld1sw_vnum_u64(svptrue_b32(), storage.as_ptr() as *const i32, 1);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1sh_gather_s32index_s32_with_svst1h_scatter_s32index_s32() {
+    let mut storage = [0 as i16; 640usize];
+    let data = svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let indices = svindex_s32(0, 1);
+    svst1h_scatter_s32index_s32(svptrue_b16(), storage.as_mut_ptr(), indices, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i16 || val == i as i16);
+    }
+    svsetffr();
+    let loaded =
+        svld1sh_gather_s32index_s32(svptrue_b16(), storage.as_ptr() as *const i16, indices);
+    assert_vector_matches_i32(
+        loaded,
+        svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1sh_gather_s32index_u32_with_svst1h_scatter_s32index_u32() {
+    let mut storage = [0 as u16; 640usize];
+    let data = svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let indices = svindex_s32(0, 1);
+    svst1h_scatter_s32index_u32(svptrue_b16(), storage.as_mut_ptr(), indices, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u16 || val == i as u16);
+    }
+    svsetffr();
+    let loaded =
+        svld1sh_gather_s32index_u32(svptrue_b16(), storage.as_ptr() as *const i16, indices);
+    assert_vector_matches_u32(
+        loaded,
+        svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1sh_gather_s64index_s64_with_svst1h_scatter_s64index_s64() {
+    let mut storage = [0 as i16; 640usize];
+    let data = svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let indices = svindex_s64(0, 1);
+    svst1h_scatter_s64index_s64(svptrue_b16(), storage.as_mut_ptr(), indices, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i16 || val == i as i16);
+    }
+    svsetffr();
+    let loaded =
+        svld1sh_gather_s64index_s64(svptrue_b16(), storage.as_ptr() as *const i16, indices);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1sw_gather_s64index_s64_with_svst1w_scatter_s64index_s64() {
+    let mut storage = [0 as i32; 320usize];
+    let data = svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let indices = svindex_s64(0, 1);
+    svst1w_scatter_s64index_s64(svptrue_b32(), storage.as_mut_ptr(), indices, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i32 || val == i as i32);
+    }
+    svsetffr();
+    let loaded =
+        svld1sw_gather_s64index_s64(svptrue_b32(), storage.as_ptr() as *const i32, indices);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1sh_gather_s64index_u64_with_svst1h_scatter_s64index_u64() {
+    let mut storage = [0 as u16; 640usize];
+    let data = svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let indices = svindex_s64(0, 1);
+    svst1h_scatter_s64index_u64(svptrue_b16(), storage.as_mut_ptr(), indices, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u16 || val == i as u16);
+    }
+    svsetffr();
+    let loaded =
+        svld1sh_gather_s64index_u64(svptrue_b16(), storage.as_ptr() as *const i16, indices);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1sw_gather_s64index_u64_with_svst1w_scatter_s64index_u64() {
+    let mut storage = [0 as u32; 320usize];
+    let data = svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let indices = svindex_s64(0, 1);
+    svst1w_scatter_s64index_u64(svptrue_b32(), storage.as_mut_ptr(), indices, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u32 || val == i as u32);
+    }
+    svsetffr();
+    let loaded =
+        svld1sw_gather_s64index_u64(svptrue_b32(), storage.as_ptr() as *const i32, indices);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1sh_gather_u32index_s32_with_svst1h_scatter_u32index_s32() {
+    let mut storage = [0 as i16; 640usize];
+    let data = svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let indices = svindex_u32(0, 1);
+    svst1h_scatter_u32index_s32(svptrue_b16(), storage.as_mut_ptr(), indices, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i16 || val == i as i16);
+    }
+    svsetffr();
+    let loaded =
+        svld1sh_gather_u32index_s32(svptrue_b16(), storage.as_ptr() as *const i16, indices);
+    assert_vector_matches_i32(
+        loaded,
+        svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1sh_gather_u32index_u32_with_svst1h_scatter_u32index_u32() {
+    let mut storage = [0 as u16; 640usize];
+    let data = svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let indices = svindex_u32(0, 1);
+    svst1h_scatter_u32index_u32(svptrue_b16(), storage.as_mut_ptr(), indices, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u16 || val == i as u16);
+    }
+    svsetffr();
+    let loaded =
+        svld1sh_gather_u32index_u32(svptrue_b16(), storage.as_ptr() as *const i16, indices);
+    assert_vector_matches_u32(
+        loaded,
+        svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1sh_gather_u64index_s64_with_svst1h_scatter_u64index_s64() {
+    let mut storage = [0 as i16; 640usize];
+    let data = svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let indices = svindex_u64(0, 1);
+    svst1h_scatter_u64index_s64(svptrue_b16(), storage.as_mut_ptr(), indices, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i16 || val == i as i16);
+    }
+    svsetffr();
+    let loaded =
+        svld1sh_gather_u64index_s64(svptrue_b16(), storage.as_ptr() as *const i16, indices);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1sw_gather_u64index_s64_with_svst1w_scatter_u64index_s64() {
+    let mut storage = [0 as i32; 320usize];
+    let data = svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let indices = svindex_u64(0, 1);
+    svst1w_scatter_u64index_s64(svptrue_b32(), storage.as_mut_ptr(), indices, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i32 || val == i as i32);
+    }
+    svsetffr();
+    let loaded =
+        svld1sw_gather_u64index_s64(svptrue_b32(), storage.as_ptr() as *const i32, indices);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1sh_gather_u64index_u64_with_svst1h_scatter_u64index_u64() {
+    let mut storage = [0 as u16; 640usize];
+    let data = svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let indices = svindex_u64(0, 1);
+    svst1h_scatter_u64index_u64(svptrue_b16(), storage.as_mut_ptr(), indices, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u16 || val == i as u16);
+    }
+    svsetffr();
+    let loaded =
+        svld1sh_gather_u64index_u64(svptrue_b16(), storage.as_ptr() as *const i16, indices);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1sw_gather_u64index_u64_with_svst1w_scatter_u64index_u64() {
+    let mut storage = [0 as u32; 320usize];
+    let data = svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let indices = svindex_u64(0, 1);
+    svst1w_scatter_u64index_u64(svptrue_b32(), storage.as_mut_ptr(), indices, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u32 || val == i as u32);
+    }
+    svsetffr();
+    let loaded =
+        svld1sw_gather_u64index_u64(svptrue_b32(), storage.as_ptr() as *const i32, indices);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1sh_gather_u32base_index_s32_with_svst1h_scatter_u32base_index_s32() {
+    let mut storage = [0 as i16; 640usize];
+    let data = svindex_s32((1usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svindex_u32(0, 2u32.try_into().unwrap());
+    svst1h_scatter_u32base_index_s32(
+        svptrue_b16(),
+        bases,
+        storage.as_ptr() as i64 / (2u32 as i64) + 1,
+        data,
+    );
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i16 || val == i as i16);
+    }
+    svsetffr();
+    let loaded = svld1sh_gather_u32base_index_s32(
+        svptrue_b16(),
+        bases,
+        storage.as_ptr() as i64 / (2u32 as i64) + 1,
+    );
+    assert_vector_matches_i32(
+        loaded,
+        svindex_s32((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1sh_gather_u32base_index_u32_with_svst1h_scatter_u32base_index_u32() {
+    let mut storage = [0 as i16; 640usize];
+    let data = svindex_u32((1usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svindex_u32(0, 2u32.try_into().unwrap());
+    svst1h_scatter_u32base_index_u32(
+        svptrue_b16(),
+        bases,
+        storage.as_ptr() as i64 / (2u32 as i64) + 1,
+        data,
+    );
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i16 || val == i as i16);
+    }
+    svsetffr();
+    let loaded = svld1sh_gather_u32base_index_u32(
+        svptrue_b16(),
+        bases,
+        storage.as_ptr() as i64 / (2u32 as i64) + 1,
+    );
+    assert_vector_matches_u32(
+        loaded,
+        svindex_u32((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1sh_gather_u64base_index_s64_with_svst1h_scatter_u64base_index_s64() {
+    let mut storage = [0 as i16; 640usize];
+    let data = svindex_s64((1usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svdup_n_u64(storage.as_ptr() as u64);
+    let offsets = svindex_u64(0, 2u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b16(), bases, offsets);
+    svst1h_scatter_u64base_index_s64(svptrue_b16(), bases, 1.try_into().unwrap(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i16 || val == i as i16);
+    }
+    svsetffr();
+    let loaded = svld1sh_gather_u64base_index_s64(svptrue_b16(), bases, 1.try_into().unwrap());
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1sw_gather_u64base_index_s64_with_svst1w_scatter_u64base_index_s64() {
+    let mut storage = [0 as i32; 320usize];
+    let data = svindex_s64((1usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svdup_n_u64(storage.as_ptr() as u64);
+    let offsets = svindex_u64(0, 4u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b32(), bases, offsets);
+    svst1w_scatter_u64base_index_s64(svptrue_b32(), bases, 1.try_into().unwrap(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i32 || val == i as i32);
+    }
+    svsetffr();
+    let loaded = svld1sw_gather_u64base_index_s64(svptrue_b32(), bases, 1.try_into().unwrap());
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1sh_gather_u64base_index_u64_with_svst1h_scatter_u64base_index_u64() {
+    let mut storage = [0 as i16; 640usize];
+    let data = svindex_u64((1usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svdup_n_u64(storage.as_ptr() as u64);
+    let offsets = svindex_u64(0, 2u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b16(), bases, offsets);
+    svst1h_scatter_u64base_index_u64(svptrue_b16(), bases, 1.try_into().unwrap(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i16 || val == i as i16);
+    }
+    svsetffr();
+    let loaded = svld1sh_gather_u64base_index_u64(svptrue_b16(), bases, 1.try_into().unwrap());
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1sw_gather_u64base_index_u64_with_svst1w_scatter_u64base_index_u64() {
+    let mut storage = [0 as i32; 320usize];
+    let data = svindex_u64((1usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svdup_n_u64(storage.as_ptr() as u64);
+    let offsets = svindex_u64(0, 4u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b32(), bases, offsets);
+    svst1w_scatter_u64base_index_u64(svptrue_b32(), bases, 1.try_into().unwrap(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i32 || val == i as i32);
+    }
+    svsetffr();
+    let loaded = svld1sw_gather_u64base_index_u64(svptrue_b32(), bases, 1.try_into().unwrap());
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1ub_gather_s32offset_s32_with_svst1b_scatter_s32offset_s32() {
+    let mut storage = [0 as i8; 1280usize];
+    let data = svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let offsets = svindex_s32(0, 1u32.try_into().unwrap());
+    svst1b_scatter_s32offset_s32(svptrue_b8(), storage.as_mut_ptr(), offsets, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i8 || val == i as i8);
+    }
+    svsetffr();
+    let loaded = svld1ub_gather_s32offset_s32(svptrue_b8(), storage.as_ptr() as *const u8, offsets);
+    assert_vector_matches_i32(
+        loaded,
+        svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1uh_gather_s32offset_s32_with_svst1h_scatter_s32offset_s32() {
+    let mut storage = [0 as i16; 640usize];
+    let data = svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let offsets = svindex_s32(0, 2u32.try_into().unwrap());
+    svst1h_scatter_s32offset_s32(svptrue_b16(), storage.as_mut_ptr(), offsets, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i16 || val == i as i16);
+    }
+    svsetffr();
+    let loaded =
+        svld1uh_gather_s32offset_s32(svptrue_b16(), storage.as_ptr() as *const u16, offsets);
+    assert_vector_matches_i32(
+        loaded,
+        svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1ub_gather_s32offset_u32_with_svst1b_scatter_s32offset_u32() {
+    let mut storage = [0 as u8; 1280usize];
+    let data = svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let offsets = svindex_s32(0, 1u32.try_into().unwrap());
+    svst1b_scatter_s32offset_u32(svptrue_b8(), storage.as_mut_ptr(), offsets, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u8 || val == i as u8);
+    }
+    svsetffr();
+    let loaded = svld1ub_gather_s32offset_u32(svptrue_b8(), storage.as_ptr() as *const u8, offsets);
+    assert_vector_matches_u32(
+        loaded,
+        svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1uh_gather_s32offset_u32_with_svst1h_scatter_s32offset_u32() {
+    let mut storage = [0 as u16; 640usize];
+    let data = svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let offsets = svindex_s32(0, 2u32.try_into().unwrap());
+    svst1h_scatter_s32offset_u32(svptrue_b16(), storage.as_mut_ptr(), offsets, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u16 || val == i as u16);
+    }
+    svsetffr();
+    let loaded =
+        svld1uh_gather_s32offset_u32(svptrue_b16(), storage.as_ptr() as *const u16, offsets);
+    assert_vector_matches_u32(
+        loaded,
+        svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1ub_gather_s64offset_s64_with_svst1b_scatter_s64offset_s64() {
+    let mut storage = [0 as i8; 1280usize];
+    let data = svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let offsets = svindex_s64(0, 1u32.try_into().unwrap());
+    svst1b_scatter_s64offset_s64(svptrue_b8(), storage.as_mut_ptr(), offsets, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i8 || val == i as i8);
+    }
+    svsetffr();
+    let loaded = svld1ub_gather_s64offset_s64(svptrue_b8(), storage.as_ptr() as *const u8, offsets);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1uh_gather_s64offset_s64_with_svst1h_scatter_s64offset_s64() {
+    let mut storage = [0 as i16; 640usize];
+    let data = svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let offsets = svindex_s64(0, 2u32.try_into().unwrap());
+    svst1h_scatter_s64offset_s64(svptrue_b16(), storage.as_mut_ptr(), offsets, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i16 || val == i as i16);
+    }
+    svsetffr();
+    let loaded =
+        svld1uh_gather_s64offset_s64(svptrue_b16(), storage.as_ptr() as *const u16, offsets);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1uw_gather_s64offset_s64_with_svst1w_scatter_s64offset_s64() {
+    let mut storage = [0 as i32; 320usize];
+    let data = svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let offsets = svindex_s64(0, 4u32.try_into().unwrap());
+    svst1w_scatter_s64offset_s64(svptrue_b32(), storage.as_mut_ptr(), offsets, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i32 || val == i as i32);
+    }
+    svsetffr();
+    let loaded =
+        svld1uw_gather_s64offset_s64(svptrue_b32(), storage.as_ptr() as *const u32, offsets);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1ub_gather_s64offset_u64_with_svst1b_scatter_s64offset_u64() {
+    let mut storage = [0 as u8; 1280usize];
+    let data = svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let offsets = svindex_s64(0, 1u32.try_into().unwrap());
+    svst1b_scatter_s64offset_u64(svptrue_b8(), storage.as_mut_ptr(), offsets, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u8 || val == i as u8);
+    }
+    svsetffr();
+    let loaded = svld1ub_gather_s64offset_u64(svptrue_b8(), storage.as_ptr() as *const u8, offsets);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1uh_gather_s64offset_u64_with_svst1h_scatter_s64offset_u64() {
+    let mut storage = [0 as u16; 640usize];
+    let data = svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let offsets = svindex_s64(0, 2u32.try_into().unwrap());
+    svst1h_scatter_s64offset_u64(svptrue_b16(), storage.as_mut_ptr(), offsets, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u16 || val == i as u16);
+    }
+    svsetffr();
+    let loaded =
+        svld1uh_gather_s64offset_u64(svptrue_b16(), storage.as_ptr() as *const u16, offsets);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1uw_gather_s64offset_u64_with_svst1w_scatter_s64offset_u64() {
+    let mut storage = [0 as u32; 320usize];
+    let data = svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let offsets = svindex_s64(0, 4u32.try_into().unwrap());
+    svst1w_scatter_s64offset_u64(svptrue_b32(), storage.as_mut_ptr(), offsets, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u32 || val == i as u32);
+    }
+    svsetffr();
+    let loaded =
+        svld1uw_gather_s64offset_u64(svptrue_b32(), storage.as_ptr() as *const u32, offsets);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1ub_gather_u32offset_s32_with_svst1b_scatter_u32offset_s32() {
+    let mut storage = [0 as i8; 1280usize];
+    let data = svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let offsets = svindex_u32(0, 1u32.try_into().unwrap());
+    svst1b_scatter_u32offset_s32(svptrue_b8(), storage.as_mut_ptr(), offsets, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i8 || val == i as i8);
+    }
+    svsetffr();
+    let loaded = svld1ub_gather_u32offset_s32(svptrue_b8(), storage.as_ptr() as *const u8, offsets);
+    assert_vector_matches_i32(
+        loaded,
+        svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1uh_gather_u32offset_s32_with_svst1h_scatter_u32offset_s32() {
+    let mut storage = [0 as i16; 640usize];
+    let data = svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let offsets = svindex_u32(0, 2u32.try_into().unwrap());
+    svst1h_scatter_u32offset_s32(svptrue_b16(), storage.as_mut_ptr(), offsets, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i16 || val == i as i16);
+    }
+    svsetffr();
+    let loaded =
+        svld1uh_gather_u32offset_s32(svptrue_b16(), storage.as_ptr() as *const u16, offsets);
+    assert_vector_matches_i32(
+        loaded,
+        svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1ub_gather_u32offset_u32_with_svst1b_scatter_u32offset_u32() {
+    let mut storage = [0 as u8; 1280usize];
+    let data = svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let offsets = svindex_u32(0, 1u32.try_into().unwrap());
+    svst1b_scatter_u32offset_u32(svptrue_b8(), storage.as_mut_ptr(), offsets, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u8 || val == i as u8);
+    }
+    svsetffr();
+    let loaded = svld1ub_gather_u32offset_u32(svptrue_b8(), storage.as_ptr() as *const u8, offsets);
+    assert_vector_matches_u32(
+        loaded,
+        svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1uh_gather_u32offset_u32_with_svst1h_scatter_u32offset_u32() {
+    let mut storage = [0 as u16; 640usize];
+    let data = svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let offsets = svindex_u32(0, 2u32.try_into().unwrap());
+    svst1h_scatter_u32offset_u32(svptrue_b16(), storage.as_mut_ptr(), offsets, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u16 || val == i as u16);
+    }
+    svsetffr();
+    let loaded =
+        svld1uh_gather_u32offset_u32(svptrue_b16(), storage.as_ptr() as *const u16, offsets);
+    assert_vector_matches_u32(
+        loaded,
+        svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1ub_gather_u64offset_s64_with_svst1b_scatter_u64offset_s64() {
+    let mut storage = [0 as i8; 1280usize];
+    let data = svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let offsets = svindex_u64(0, 1u32.try_into().unwrap());
+    svst1b_scatter_u64offset_s64(svptrue_b8(), storage.as_mut_ptr(), offsets, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i8 || val == i as i8);
+    }
+    svsetffr();
+    let loaded = svld1ub_gather_u64offset_s64(svptrue_b8(), storage.as_ptr() as *const u8, offsets);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1uh_gather_u64offset_s64_with_svst1h_scatter_u64offset_s64() {
+    let mut storage = [0 as i16; 640usize];
+    let data = svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let offsets = svindex_u64(0, 2u32.try_into().unwrap());
+    svst1h_scatter_u64offset_s64(svptrue_b16(), storage.as_mut_ptr(), offsets, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i16 || val == i as i16);
+    }
+    svsetffr();
+    let loaded =
+        svld1uh_gather_u64offset_s64(svptrue_b16(), storage.as_ptr() as *const u16, offsets);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1uw_gather_u64offset_s64_with_svst1w_scatter_u64offset_s64() {
+    let mut storage = [0 as i32; 320usize];
+    let data = svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let offsets = svindex_u64(0, 4u32.try_into().unwrap());
+    svst1w_scatter_u64offset_s64(svptrue_b32(), storage.as_mut_ptr(), offsets, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i32 || val == i as i32);
+    }
+    svsetffr();
+    let loaded =
+        svld1uw_gather_u64offset_s64(svptrue_b32(), storage.as_ptr() as *const u32, offsets);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1ub_gather_u64offset_u64_with_svst1b_scatter_u64offset_u64() {
+    let mut storage = [0 as u8; 1280usize];
+    let data = svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let offsets = svindex_u64(0, 1u32.try_into().unwrap());
+    svst1b_scatter_u64offset_u64(svptrue_b8(), storage.as_mut_ptr(), offsets, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u8 || val == i as u8);
+    }
+    svsetffr();
+    let loaded = svld1ub_gather_u64offset_u64(svptrue_b8(), storage.as_ptr() as *const u8, offsets);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1uh_gather_u64offset_u64_with_svst1h_scatter_u64offset_u64() {
+    let mut storage = [0 as u16; 640usize];
+    let data = svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let offsets = svindex_u64(0, 2u32.try_into().unwrap());
+    svst1h_scatter_u64offset_u64(svptrue_b16(), storage.as_mut_ptr(), offsets, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u16 || val == i as u16);
+    }
+    svsetffr();
+    let loaded =
+        svld1uh_gather_u64offset_u64(svptrue_b16(), storage.as_ptr() as *const u16, offsets);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1uw_gather_u64offset_u64_with_svst1w_scatter_u64offset_u64() {
+    let mut storage = [0 as u32; 320usize];
+    let data = svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let offsets = svindex_u64(0, 4u32.try_into().unwrap());
+    svst1w_scatter_u64offset_u64(svptrue_b32(), storage.as_mut_ptr(), offsets, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u32 || val == i as u32);
+    }
+    svsetffr();
+    let loaded =
+        svld1uw_gather_u64offset_u64(svptrue_b32(), storage.as_ptr() as *const u32, offsets);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1ub_gather_u32base_offset_s32_with_svst1b_scatter_u32base_offset_s32() {
+    let mut storage = [0 as i8; 1280usize];
+    let data = svindex_s32((1usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svindex_u32(0, 1u32.try_into().unwrap());
+    svst1b_scatter_u32base_offset_s32(
+        svptrue_b8(),
+        bases,
+        storage.as_ptr() as i64 + 1u32 as i64,
+        data,
+    );
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i8 || val == i as i8);
+    }
+    svsetffr();
+    let loaded = svld1ub_gather_u32base_offset_s32(
+        svptrue_b8(),
+        bases,
+        storage.as_ptr() as i64 + 1u32 as i64,
+    );
+    assert_vector_matches_i32(
+        loaded,
+        svindex_s32((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1uh_gather_u32base_offset_s32_with_svst1h_scatter_u32base_offset_s32() {
+    let mut storage = [0 as i16; 640usize];
+    let data = svindex_s32((1usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svindex_u32(0, 2u32.try_into().unwrap());
+    svst1h_scatter_u32base_offset_s32(
+        svptrue_b16(),
+        bases,
+        storage.as_ptr() as i64 + 2u32 as i64,
+        data,
+    );
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i16 || val == i as i16);
+    }
+    svsetffr();
+    let loaded = svld1uh_gather_u32base_offset_s32(
+        svptrue_b16(),
+        bases,
+        storage.as_ptr() as i64 + 2u32 as i64,
+    );
+    assert_vector_matches_i32(
+        loaded,
+        svindex_s32((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1ub_gather_u32base_offset_u32_with_svst1b_scatter_u32base_offset_u32() {
+    let mut storage = [0 as i8; 1280usize];
+    let data = svindex_u32((1usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svindex_u32(0, 1u32.try_into().unwrap());
+    svst1b_scatter_u32base_offset_u32(
+        svptrue_b8(),
+        bases,
+        storage.as_ptr() as i64 + 1u32 as i64,
+        data,
+    );
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i8 || val == i as i8);
+    }
+    svsetffr();
+    let loaded = svld1ub_gather_u32base_offset_u32(
+        svptrue_b8(),
+        bases,
+        storage.as_ptr() as i64 + 1u32 as i64,
+    );
+    assert_vector_matches_u32(
+        loaded,
+        svindex_u32((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1uh_gather_u32base_offset_u32_with_svst1h_scatter_u32base_offset_u32() {
+    let mut storage = [0 as i16; 640usize];
+    let data = svindex_u32((1usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svindex_u32(0, 2u32.try_into().unwrap());
+    svst1h_scatter_u32base_offset_u32(
+        svptrue_b16(),
+        bases,
+        storage.as_ptr() as i64 + 2u32 as i64,
+        data,
+    );
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i16 || val == i as i16);
+    }
+    svsetffr();
+    let loaded = svld1uh_gather_u32base_offset_u32(
+        svptrue_b16(),
+        bases,
+        storage.as_ptr() as i64 + 2u32 as i64,
+    );
+    assert_vector_matches_u32(
+        loaded,
+        svindex_u32((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1ub_gather_u64base_offset_s64_with_svst1b_scatter_u64base_offset_s64() {
+    let mut storage = [0 as i8; 1280usize];
+    let data = svindex_s64((1usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svdup_n_u64(storage.as_ptr() as u64);
+    let offsets = svindex_u64(0, 1u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b8(), bases, offsets);
+    svst1b_scatter_u64base_offset_s64(svptrue_b8(), bases, 1u32.try_into().unwrap(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i8 || val == i as i8);
+    }
+    svsetffr();
+    let loaded = svld1ub_gather_u64base_offset_s64(svptrue_b8(), bases, 1u32.try_into().unwrap());
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1uh_gather_u64base_offset_s64_with_svst1h_scatter_u64base_offset_s64() {
+    let mut storage = [0 as i16; 640usize];
+    let data = svindex_s64((1usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svdup_n_u64(storage.as_ptr() as u64);
+    let offsets = svindex_u64(0, 2u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b16(), bases, offsets);
+    svst1h_scatter_u64base_offset_s64(svptrue_b16(), bases, 2u32.try_into().unwrap(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i16 || val == i as i16);
+    }
+    svsetffr();
+    let loaded = svld1uh_gather_u64base_offset_s64(svptrue_b16(), bases, 2u32.try_into().unwrap());
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1uw_gather_u64base_offset_s64_with_svst1w_scatter_u64base_offset_s64() {
+    let mut storage = [0 as i32; 320usize];
+    let data = svindex_s64((1usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svdup_n_u64(storage.as_ptr() as u64);
+    let offsets = svindex_u64(0, 4u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b32(), bases, offsets);
+    svst1w_scatter_u64base_offset_s64(svptrue_b32(), bases, 4u32.try_into().unwrap(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i32 || val == i as i32);
+    }
+    svsetffr();
+    let loaded = svld1uw_gather_u64base_offset_s64(svptrue_b32(), bases, 4u32.try_into().unwrap());
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1ub_gather_u64base_offset_u64_with_svst1b_scatter_u64base_offset_u64() {
+    let mut storage = [0 as i8; 1280usize];
+    let data = svindex_u64((1usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svdup_n_u64(storage.as_ptr() as u64);
+    let offsets = svindex_u64(0, 1u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b8(), bases, offsets);
+    svst1b_scatter_u64base_offset_u64(svptrue_b8(), bases, 1u32.try_into().unwrap(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i8 || val == i as i8);
+    }
+    svsetffr();
+    let loaded = svld1ub_gather_u64base_offset_u64(svptrue_b8(), bases, 1u32.try_into().unwrap());
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1uh_gather_u64base_offset_u64_with_svst1h_scatter_u64base_offset_u64() {
+    let mut storage = [0 as i16; 640usize];
+    let data = svindex_u64((1usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svdup_n_u64(storage.as_ptr() as u64);
+    let offsets = svindex_u64(0, 2u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b16(), bases, offsets);
+    svst1h_scatter_u64base_offset_u64(svptrue_b16(), bases, 2u32.try_into().unwrap(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i16 || val == i as i16);
+    }
+    svsetffr();
+    let loaded = svld1uh_gather_u64base_offset_u64(svptrue_b16(), bases, 2u32.try_into().unwrap());
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1uw_gather_u64base_offset_u64_with_svst1w_scatter_u64base_offset_u64() {
+    let mut storage = [0 as i32; 320usize];
+    let data = svindex_u64((1usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svdup_n_u64(storage.as_ptr() as u64);
+    let offsets = svindex_u64(0, 4u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b32(), bases, offsets);
+    svst1w_scatter_u64base_offset_u64(svptrue_b32(), bases, 4u32.try_into().unwrap(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i32 || val == i as i32);
+    }
+    svsetffr();
+    let loaded = svld1uw_gather_u64base_offset_u64(svptrue_b32(), bases, 4u32.try_into().unwrap());
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1ub_gather_u64base_s64_with_svst1b_scatter_u64base_s64() {
+    let mut storage = [0 as i8; 1280usize];
+    let data = svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svdup_n_u64(storage.as_ptr() as u64);
+    let offsets = svindex_u64(0, 1u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b8(), bases, offsets);
+    svst1b_scatter_u64base_s64(svptrue_b8(), bases, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i8 || val == i as i8);
+    }
+    svsetffr();
+    let loaded = svld1ub_gather_u64base_s64(svptrue_b8(), bases);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1uh_gather_u64base_s64_with_svst1h_scatter_u64base_s64() {
+    let mut storage = [0 as i16; 640usize];
+    let data = svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svdup_n_u64(storage.as_ptr() as u64);
+    let offsets = svindex_u64(0, 2u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b16(), bases, offsets);
+    svst1h_scatter_u64base_s64(svptrue_b16(), bases, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i16 || val == i as i16);
+    }
+    svsetffr();
+    let loaded = svld1uh_gather_u64base_s64(svptrue_b16(), bases);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1uw_gather_u64base_s64_with_svst1w_scatter_u64base_s64() {
+    let mut storage = [0 as i32; 320usize];
+    let data = svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svdup_n_u64(storage.as_ptr() as u64);
+    let offsets = svindex_u64(0, 4u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b32(), bases, offsets);
+    svst1w_scatter_u64base_s64(svptrue_b32(), bases, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i32 || val == i as i32);
+    }
+    svsetffr();
+    let loaded = svld1uw_gather_u64base_s64(svptrue_b32(), bases);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1ub_gather_u64base_u64_with_svst1b_scatter_u64base_u64() {
+    let mut storage = [0 as i8; 1280usize];
+    let data = svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svdup_n_u64(storage.as_ptr() as u64);
+    let offsets = svindex_u64(0, 1u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b8(), bases, offsets);
+    svst1b_scatter_u64base_u64(svptrue_b8(), bases, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i8 || val == i as i8);
+    }
+    svsetffr();
+    let loaded = svld1ub_gather_u64base_u64(svptrue_b8(), bases);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1uh_gather_u64base_u64_with_svst1h_scatter_u64base_u64() {
+    let mut storage = [0 as i16; 640usize];
+    let data = svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svdup_n_u64(storage.as_ptr() as u64);
+    let offsets = svindex_u64(0, 2u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b16(), bases, offsets);
+    svst1h_scatter_u64base_u64(svptrue_b16(), bases, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i16 || val == i as i16);
+    }
+    svsetffr();
+    let loaded = svld1uh_gather_u64base_u64(svptrue_b16(), bases);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1uw_gather_u64base_u64_with_svst1w_scatter_u64base_u64() {
+    let mut storage = [0 as i32; 320usize];
+    let data = svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svdup_n_u64(storage.as_ptr() as u64);
+    let offsets = svindex_u64(0, 4u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b32(), bases, offsets);
+    svst1w_scatter_u64base_u64(svptrue_b32(), bases, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i32 || val == i as i32);
+    }
+    svsetffr();
+    let loaded = svld1uw_gather_u64base_u64(svptrue_b32(), bases);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1ub_s16_with_svst1b_s16() {
+    let mut storage = [0 as i8; 1280usize];
+    let data = svindex_s16((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    svst1b_s16(svptrue_b8(), storage.as_mut_ptr(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i8 || val == i as i8);
+    }
+    svsetffr();
+    let loaded = svld1ub_s16(svptrue_b8(), storage.as_ptr() as *const u8);
+    assert_vector_matches_i16(
+        loaded,
+        svindex_s16((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1ub_s32_with_svst1b_s32() {
+    let mut storage = [0 as i8; 1280usize];
+    let data = svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    svst1b_s32(svptrue_b8(), storage.as_mut_ptr(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i8 || val == i as i8);
+    }
+    svsetffr();
+    let loaded = svld1ub_s32(svptrue_b8(), storage.as_ptr() as *const u8);
+    assert_vector_matches_i32(
+        loaded,
+        svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1uh_s32_with_svst1h_s32() {
+    let mut storage = [0 as i16; 640usize];
+    let data = svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    svst1h_s32(svptrue_b16(), storage.as_mut_ptr(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i16 || val == i as i16);
+    }
+    svsetffr();
+    let loaded = svld1uh_s32(svptrue_b16(), storage.as_ptr() as *const u16);
+    assert_vector_matches_i32(
+        loaded,
+        svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1ub_s64_with_svst1b_s64() {
+    let mut storage = [0 as i8; 1280usize];
+    let data = svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    svst1b_s64(svptrue_b8(), storage.as_mut_ptr(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i8 || val == i as i8);
+    }
+    svsetffr();
+    let loaded = svld1ub_s64(svptrue_b8(), storage.as_ptr() as *const u8);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1uh_s64_with_svst1h_s64() {
+    let mut storage = [0 as i16; 640usize];
+    let data = svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    svst1h_s64(svptrue_b16(), storage.as_mut_ptr(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i16 || val == i as i16);
+    }
+    svsetffr();
+    let loaded = svld1uh_s64(svptrue_b16(), storage.as_ptr() as *const u16);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1uw_s64_with_svst1w_s64() {
+    let mut storage = [0 as i32; 320usize];
+    let data = svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    svst1w_s64(svptrue_b32(), storage.as_mut_ptr(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i32 || val == i as i32);
+    }
+    svsetffr();
+    let loaded = svld1uw_s64(svptrue_b32(), storage.as_ptr() as *const u32);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1ub_u16_with_svst1b_u16() {
+    let mut storage = [0 as u8; 1280usize];
+    let data = svindex_u16((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    svst1b_u16(svptrue_b8(), storage.as_mut_ptr(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u8 || val == i as u8);
+    }
+    svsetffr();
+    let loaded = svld1ub_u16(svptrue_b8(), storage.as_ptr() as *const u8);
+    assert_vector_matches_u16(
+        loaded,
+        svindex_u16((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1ub_u32_with_svst1b_u32() {
+    let mut storage = [0 as u8; 1280usize];
+    let data = svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    svst1b_u32(svptrue_b8(), storage.as_mut_ptr(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u8 || val == i as u8);
+    }
+    svsetffr();
+    let loaded = svld1ub_u32(svptrue_b8(), storage.as_ptr() as *const u8);
+    assert_vector_matches_u32(
+        loaded,
+        svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1uh_u32_with_svst1h_u32() {
+    let mut storage = [0 as u16; 640usize];
+    let data = svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    svst1h_u32(svptrue_b16(), storage.as_mut_ptr(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u16 || val == i as u16);
+    }
+    svsetffr();
+    let loaded = svld1uh_u32(svptrue_b16(), storage.as_ptr() as *const u16);
+    assert_vector_matches_u32(
+        loaded,
+        svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1ub_u64_with_svst1b_u64() {
+    let mut storage = [0 as u8; 1280usize];
+    let data = svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    svst1b_u64(svptrue_b8(), storage.as_mut_ptr(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u8 || val == i as u8);
+    }
+    svsetffr();
+    let loaded = svld1ub_u64(svptrue_b8(), storage.as_ptr() as *const u8);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1uh_u64_with_svst1h_u64() {
+    let mut storage = [0 as u16; 640usize];
+    let data = svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    svst1h_u64(svptrue_b16(), storage.as_mut_ptr(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u16 || val == i as u16);
+    }
+    svsetffr();
+    let loaded = svld1uh_u64(svptrue_b16(), storage.as_ptr() as *const u16);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1uw_u64_with_svst1w_u64() {
+    let mut storage = [0 as u32; 320usize];
+    let data = svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    svst1w_u64(svptrue_b32(), storage.as_mut_ptr(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u32 || val == i as u32);
+    }
+    svsetffr();
+    let loaded = svld1uw_u64(svptrue_b32(), storage.as_ptr() as *const u32);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1ub_vnum_s16_with_svst1b_vnum_s16() {
+    let len = svcnth() as usize;
+    let mut storage = [0 as i8; 1280usize];
+    let data = svindex_s16(
+        (len + 0usize).try_into().unwrap(),
+        1usize.try_into().unwrap(),
+    );
+    svst1b_vnum_s16(svptrue_b8(), storage.as_mut_ptr(), 1, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i8 || val == i as i8);
+    }
+    svsetffr();
+    let loaded = svld1ub_vnum_s16(svptrue_b8(), storage.as_ptr() as *const u8, 1);
+    assert_vector_matches_i16(
+        loaded,
+        svindex_s16(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1ub_vnum_s32_with_svst1b_vnum_s32() {
+    let len = svcntw() as usize;
+    let mut storage = [0 as i8; 1280usize];
+    let data = svindex_s32(
+        (len + 0usize).try_into().unwrap(),
+        1usize.try_into().unwrap(),
+    );
+    svst1b_vnum_s32(svptrue_b8(), storage.as_mut_ptr(), 1, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i8 || val == i as i8);
+    }
+    svsetffr();
+    let loaded = svld1ub_vnum_s32(svptrue_b8(), storage.as_ptr() as *const u8, 1);
+    assert_vector_matches_i32(
+        loaded,
+        svindex_s32(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1uh_vnum_s32_with_svst1h_vnum_s32() {
+    let len = svcntw() as usize;
+    let mut storage = [0 as i16; 640usize];
+    let data = svindex_s32(
+        (len + 0usize).try_into().unwrap(),
+        1usize.try_into().unwrap(),
+    );
+    svst1h_vnum_s32(svptrue_b16(), storage.as_mut_ptr(), 1, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i16 || val == i as i16);
+    }
+    svsetffr();
+    let loaded = svld1uh_vnum_s32(svptrue_b16(), storage.as_ptr() as *const u16, 1);
+    assert_vector_matches_i32(
+        loaded,
+        svindex_s32(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1ub_vnum_s64_with_svst1b_vnum_s64() {
+    let len = svcntd() as usize;
+    let mut storage = [0 as i8; 1280usize];
+    let data = svindex_s64(
+        (len + 0usize).try_into().unwrap(),
+        1usize.try_into().unwrap(),
+    );
+    svst1b_vnum_s64(svptrue_b8(), storage.as_mut_ptr(), 1, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i8 || val == i as i8);
+    }
+    svsetffr();
+    let loaded = svld1ub_vnum_s64(svptrue_b8(), storage.as_ptr() as *const u8, 1);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1uh_vnum_s64_with_svst1h_vnum_s64() {
+    let len = svcntd() as usize;
+    let mut storage = [0 as i16; 640usize];
+    let data = svindex_s64(
+        (len + 0usize).try_into().unwrap(),
+        1usize.try_into().unwrap(),
+    );
+    svst1h_vnum_s64(svptrue_b16(), storage.as_mut_ptr(), 1, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i16 || val == i as i16);
+    }
+    svsetffr();
+    let loaded = svld1uh_vnum_s64(svptrue_b16(), storage.as_ptr() as *const u16, 1);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1uw_vnum_s64_with_svst1w_vnum_s64() {
+    let len = svcntd() as usize;
+    let mut storage = [0 as i32; 320usize];
+    let data = svindex_s64(
+        (len + 0usize).try_into().unwrap(),
+        1usize.try_into().unwrap(),
+    );
+    svst1w_vnum_s64(svptrue_b32(), storage.as_mut_ptr(), 1, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i32 || val == i as i32);
+    }
+    svsetffr();
+    let loaded = svld1uw_vnum_s64(svptrue_b32(), storage.as_ptr() as *const u32, 1);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1ub_vnum_u16_with_svst1b_vnum_u16() {
+    let len = svcnth() as usize;
+    let mut storage = [0 as u8; 1280usize];
+    let data = svindex_u16(
+        (len + 0usize).try_into().unwrap(),
+        1usize.try_into().unwrap(),
+    );
+    svst1b_vnum_u16(svptrue_b8(), storage.as_mut_ptr(), 1, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u8 || val == i as u8);
+    }
+    svsetffr();
+    let loaded = svld1ub_vnum_u16(svptrue_b8(), storage.as_ptr() as *const u8, 1);
+    assert_vector_matches_u16(
+        loaded,
+        svindex_u16(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1ub_vnum_u32_with_svst1b_vnum_u32() {
+    let len = svcntw() as usize;
+    let mut storage = [0 as u8; 1280usize];
+    let data = svindex_u32(
+        (len + 0usize).try_into().unwrap(),
+        1usize.try_into().unwrap(),
+    );
+    svst1b_vnum_u32(svptrue_b8(), storage.as_mut_ptr(), 1, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u8 || val == i as u8);
+    }
+    svsetffr();
+    let loaded = svld1ub_vnum_u32(svptrue_b8(), storage.as_ptr() as *const u8, 1);
+    assert_vector_matches_u32(
+        loaded,
+        svindex_u32(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1uh_vnum_u32_with_svst1h_vnum_u32() {
+    let len = svcntw() as usize;
+    let mut storage = [0 as u16; 640usize];
+    let data = svindex_u32(
+        (len + 0usize).try_into().unwrap(),
+        1usize.try_into().unwrap(),
+    );
+    svst1h_vnum_u32(svptrue_b16(), storage.as_mut_ptr(), 1, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u16 || val == i as u16);
+    }
+    svsetffr();
+    let loaded = svld1uh_vnum_u32(svptrue_b16(), storage.as_ptr() as *const u16, 1);
+    assert_vector_matches_u32(
+        loaded,
+        svindex_u32(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1ub_vnum_u64_with_svst1b_vnum_u64() {
+    let len = svcntd() as usize;
+    let mut storage = [0 as u8; 1280usize];
+    let data = svindex_u64(
+        (len + 0usize).try_into().unwrap(),
+        1usize.try_into().unwrap(),
+    );
+    svst1b_vnum_u64(svptrue_b8(), storage.as_mut_ptr(), 1, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u8 || val == i as u8);
+    }
+    svsetffr();
+    let loaded = svld1ub_vnum_u64(svptrue_b8(), storage.as_ptr() as *const u8, 1);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1uh_vnum_u64_with_svst1h_vnum_u64() {
+    let len = svcntd() as usize;
+    let mut storage = [0 as u16; 640usize];
+    let data = svindex_u64(
+        (len + 0usize).try_into().unwrap(),
+        1usize.try_into().unwrap(),
+    );
+    svst1h_vnum_u64(svptrue_b16(), storage.as_mut_ptr(), 1, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u16 || val == i as u16);
+    }
+    svsetffr();
+    let loaded = svld1uh_vnum_u64(svptrue_b16(), storage.as_ptr() as *const u16, 1);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1uw_vnum_u64_with_svst1w_vnum_u64() {
+    let len = svcntd() as usize;
+    let mut storage = [0 as u32; 320usize];
+    let data = svindex_u64(
+        (len + 0usize).try_into().unwrap(),
+        1usize.try_into().unwrap(),
+    );
+    svst1w_vnum_u64(svptrue_b32(), storage.as_mut_ptr(), 1, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u32 || val == i as u32);
+    }
+    svsetffr();
+    let loaded = svld1uw_vnum_u64(svptrue_b32(), storage.as_ptr() as *const u32, 1);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1uh_gather_s32index_s32_with_svst1h_scatter_s32index_s32() {
+    let mut storage = [0 as i16; 640usize];
+    let data = svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let indices = svindex_s32(0, 1);
+    svst1h_scatter_s32index_s32(svptrue_b16(), storage.as_mut_ptr(), indices, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i16 || val == i as i16);
+    }
+    svsetffr();
+    let loaded =
+        svld1uh_gather_s32index_s32(svptrue_b16(), storage.as_ptr() as *const u16, indices);
+    assert_vector_matches_i32(
+        loaded,
+        svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1uh_gather_s32index_u32_with_svst1h_scatter_s32index_u32() {
+    let mut storage = [0 as u16; 640usize];
+    let data = svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let indices = svindex_s32(0, 1);
+    svst1h_scatter_s32index_u32(svptrue_b16(), storage.as_mut_ptr(), indices, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u16 || val == i as u16);
+    }
+    svsetffr();
+    let loaded =
+        svld1uh_gather_s32index_u32(svptrue_b16(), storage.as_ptr() as *const u16, indices);
+    assert_vector_matches_u32(
+        loaded,
+        svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1uh_gather_s64index_s64_with_svst1h_scatter_s64index_s64() {
+    let mut storage = [0 as i16; 640usize];
+    let data = svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let indices = svindex_s64(0, 1);
+    svst1h_scatter_s64index_s64(svptrue_b16(), storage.as_mut_ptr(), indices, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i16 || val == i as i16);
+    }
+    svsetffr();
+    let loaded =
+        svld1uh_gather_s64index_s64(svptrue_b16(), storage.as_ptr() as *const u16, indices);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1uw_gather_s64index_s64_with_svst1w_scatter_s64index_s64() {
+    let mut storage = [0 as i32; 320usize];
+    let data = svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let indices = svindex_s64(0, 1);
+    svst1w_scatter_s64index_s64(svptrue_b32(), storage.as_mut_ptr(), indices, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i32 || val == i as i32);
+    }
+    svsetffr();
+    let loaded =
+        svld1uw_gather_s64index_s64(svptrue_b32(), storage.as_ptr() as *const u32, indices);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1uh_gather_s64index_u64_with_svst1h_scatter_s64index_u64() {
+    let mut storage = [0 as u16; 640usize];
+    let data = svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let indices = svindex_s64(0, 1);
+    svst1h_scatter_s64index_u64(svptrue_b16(), storage.as_mut_ptr(), indices, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u16 || val == i as u16);
+    }
+    svsetffr();
+    let loaded =
+        svld1uh_gather_s64index_u64(svptrue_b16(), storage.as_ptr() as *const u16, indices);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1uw_gather_s64index_u64_with_svst1w_scatter_s64index_u64() {
+    let mut storage = [0 as u32; 320usize];
+    let data = svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let indices = svindex_s64(0, 1);
+    svst1w_scatter_s64index_u64(svptrue_b32(), storage.as_mut_ptr(), indices, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u32 || val == i as u32);
+    }
+    svsetffr();
+    let loaded =
+        svld1uw_gather_s64index_u64(svptrue_b32(), storage.as_ptr() as *const u32, indices);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1uh_gather_u32index_s32_with_svst1h_scatter_u32index_s32() {
+    let mut storage = [0 as i16; 640usize];
+    let data = svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let indices = svindex_u32(0, 1);
+    svst1h_scatter_u32index_s32(svptrue_b16(), storage.as_mut_ptr(), indices, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i16 || val == i as i16);
+    }
+    svsetffr();
+    let loaded =
+        svld1uh_gather_u32index_s32(svptrue_b16(), storage.as_ptr() as *const u16, indices);
+    assert_vector_matches_i32(
+        loaded,
+        svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1uh_gather_u32index_u32_with_svst1h_scatter_u32index_u32() {
+    let mut storage = [0 as u16; 640usize];
+    let data = svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let indices = svindex_u32(0, 1);
+    svst1h_scatter_u32index_u32(svptrue_b16(), storage.as_mut_ptr(), indices, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u16 || val == i as u16);
+    }
+    svsetffr();
+    let loaded =
+        svld1uh_gather_u32index_u32(svptrue_b16(), storage.as_ptr() as *const u16, indices);
+    assert_vector_matches_u32(
+        loaded,
+        svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1uh_gather_u64index_s64_with_svst1h_scatter_u64index_s64() {
+    let mut storage = [0 as i16; 640usize];
+    let data = svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let indices = svindex_u64(0, 1);
+    svst1h_scatter_u64index_s64(svptrue_b16(), storage.as_mut_ptr(), indices, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i16 || val == i as i16);
+    }
+    svsetffr();
+    let loaded =
+        svld1uh_gather_u64index_s64(svptrue_b16(), storage.as_ptr() as *const u16, indices);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1uw_gather_u64index_s64_with_svst1w_scatter_u64index_s64() {
+    let mut storage = [0 as i32; 320usize];
+    let data = svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let indices = svindex_u64(0, 1);
+    svst1w_scatter_u64index_s64(svptrue_b32(), storage.as_mut_ptr(), indices, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i32 || val == i as i32);
+    }
+    svsetffr();
+    let loaded =
+        svld1uw_gather_u64index_s64(svptrue_b32(), storage.as_ptr() as *const u32, indices);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1uh_gather_u64index_u64_with_svst1h_scatter_u64index_u64() {
+    let mut storage = [0 as u16; 640usize];
+    let data = svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let indices = svindex_u64(0, 1);
+    svst1h_scatter_u64index_u64(svptrue_b16(), storage.as_mut_ptr(), indices, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u16 || val == i as u16);
+    }
+    svsetffr();
+    let loaded =
+        svld1uh_gather_u64index_u64(svptrue_b16(), storage.as_ptr() as *const u16, indices);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1uw_gather_u64index_u64_with_svst1w_scatter_u64index_u64() {
+    let mut storage = [0 as u32; 320usize];
+    let data = svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let indices = svindex_u64(0, 1);
+    svst1w_scatter_u64index_u64(svptrue_b32(), storage.as_mut_ptr(), indices, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u32 || val == i as u32);
+    }
+    svsetffr();
+    let loaded =
+        svld1uw_gather_u64index_u64(svptrue_b32(), storage.as_ptr() as *const u32, indices);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1uh_gather_u32base_index_s32_with_svst1h_scatter_u32base_index_s32() {
+    let mut storage = [0 as i16; 640usize];
+    let data = svindex_s32((1usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svindex_u32(0, 2u32.try_into().unwrap());
+    svst1h_scatter_u32base_index_s32(
+        svptrue_b16(),
+        bases,
+        storage.as_ptr() as i64 / (2u32 as i64) + 1,
+        data,
+    );
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i16 || val == i as i16);
+    }
+    svsetffr();
+    let loaded = svld1uh_gather_u32base_index_s32(
+        svptrue_b16(),
+        bases,
+        storage.as_ptr() as i64 / (2u32 as i64) + 1,
+    );
+    assert_vector_matches_i32(
+        loaded,
+        svindex_s32((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1uh_gather_u32base_index_u32_with_svst1h_scatter_u32base_index_u32() {
+    let mut storage = [0 as i16; 640usize];
+    let data = svindex_u32((1usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svindex_u32(0, 2u32.try_into().unwrap());
+    svst1h_scatter_u32base_index_u32(
+        svptrue_b16(),
+        bases,
+        storage.as_ptr() as i64 / (2u32 as i64) + 1,
+        data,
+    );
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i16 || val == i as i16);
+    }
+    svsetffr();
+    let loaded = svld1uh_gather_u32base_index_u32(
+        svptrue_b16(),
+        bases,
+        storage.as_ptr() as i64 / (2u32 as i64) + 1,
+    );
+    assert_vector_matches_u32(
+        loaded,
+        svindex_u32((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1uh_gather_u64base_index_s64_with_svst1h_scatter_u64base_index_s64() {
+    let mut storage = [0 as i16; 640usize];
+    let data = svindex_s64((1usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svdup_n_u64(storage.as_ptr() as u64);
+    let offsets = svindex_u64(0, 2u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b16(), bases, offsets);
+    svst1h_scatter_u64base_index_s64(svptrue_b16(), bases, 1.try_into().unwrap(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i16 || val == i as i16);
+    }
+    svsetffr();
+    let loaded = svld1uh_gather_u64base_index_s64(svptrue_b16(), bases, 1.try_into().unwrap());
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1uw_gather_u64base_index_s64_with_svst1w_scatter_u64base_index_s64() {
+    let mut storage = [0 as i32; 320usize];
+    let data = svindex_s64((1usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svdup_n_u64(storage.as_ptr() as u64);
+    let offsets = svindex_u64(0, 4u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b32(), bases, offsets);
+    svst1w_scatter_u64base_index_s64(svptrue_b32(), bases, 1.try_into().unwrap(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i32 || val == i as i32);
+    }
+    svsetffr();
+    let loaded = svld1uw_gather_u64base_index_s64(svptrue_b32(), bases, 1.try_into().unwrap());
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1uh_gather_u64base_index_u64_with_svst1h_scatter_u64base_index_u64() {
+    let mut storage = [0 as i16; 640usize];
+    let data = svindex_u64((1usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svdup_n_u64(storage.as_ptr() as u64);
+    let offsets = svindex_u64(0, 2u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b16(), bases, offsets);
+    svst1h_scatter_u64base_index_u64(svptrue_b16(), bases, 1.try_into().unwrap(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i16 || val == i as i16);
+    }
+    svsetffr();
+    let loaded = svld1uh_gather_u64base_index_u64(svptrue_b16(), bases, 1.try_into().unwrap());
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld1uw_gather_u64base_index_u64_with_svst1w_scatter_u64base_index_u64() {
+    let mut storage = [0 as i32; 320usize];
+    let data = svindex_u64((1usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svdup_n_u64(storage.as_ptr() as u64);
+    let offsets = svindex_u64(0, 4u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b32(), bases, offsets);
+    svst1w_scatter_u64base_index_u64(svptrue_b32(), bases, 1.try_into().unwrap(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i32 || val == i as i32);
+    }
+    svsetffr();
+    let loaded = svld1uw_gather_u64base_index_u64(svptrue_b32(), bases, 1.try_into().unwrap());
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld2_f32_with_svst2_f32() {
+    let mut storage = [0 as f32; 320usize];
+    let data = svcreate2_f32(
+        svcvt_f32_s32_x(
+            svptrue_b32(),
+            svindex_s32((0usize).try_into().unwrap(), 2usize.try_into().unwrap()),
+        ),
+        svcvt_f32_s32_x(
+            svptrue_b32(),
+            svindex_s32((1usize).try_into().unwrap(), 2usize.try_into().unwrap()),
+        ),
+    );
+    svst2_f32(svptrue_b32(), storage.as_mut_ptr(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as f32 || val == i as f32);
+    }
+    svsetffr();
+    let loaded = svld2_f32(svptrue_b32(), storage.as_ptr() as *const f32);
+    assert_vector_matches_f32(
+        svget2_f32::<{ 0usize as i32 }>(loaded),
+        svcvt_f32_s32_x(
+            svptrue_b32(),
+            svindex_s32((0usize).try_into().unwrap(), 2usize.try_into().unwrap()),
+        ),
+    );
+    assert_vector_matches_f32(
+        svget2_f32::<{ 1usize as i32 }>(loaded),
+        svcvt_f32_s32_x(
+            svptrue_b32(),
+            svindex_s32((1usize).try_into().unwrap(), 2usize.try_into().unwrap()),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld2_f64_with_svst2_f64() {
+    let mut storage = [0 as f64; 160usize];
+    let data = svcreate2_f64(
+        svcvt_f64_s64_x(
+            svptrue_b64(),
+            svindex_s64((0usize).try_into().unwrap(), 2usize.try_into().unwrap()),
+        ),
+        svcvt_f64_s64_x(
+            svptrue_b64(),
+            svindex_s64((1usize).try_into().unwrap(), 2usize.try_into().unwrap()),
+        ),
+    );
+    svst2_f64(svptrue_b64(), storage.as_mut_ptr(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as f64 || val == i as f64);
+    }
+    svsetffr();
+    let loaded = svld2_f64(svptrue_b64(), storage.as_ptr() as *const f64);
+    assert_vector_matches_f64(
+        svget2_f64::<{ 0usize as i32 }>(loaded),
+        svcvt_f64_s64_x(
+            svptrue_b64(),
+            svindex_s64((0usize).try_into().unwrap(), 2usize.try_into().unwrap()),
+        ),
+    );
+    assert_vector_matches_f64(
+        svget2_f64::<{ 1usize as i32 }>(loaded),
+        svcvt_f64_s64_x(
+            svptrue_b64(),
+            svindex_s64((1usize).try_into().unwrap(), 2usize.try_into().unwrap()),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld2_s8_with_svst2_s8() {
+    let mut storage = [0 as i8; 1280usize];
+    let data = svcreate2_s8(
+        svindex_s8((0usize).try_into().unwrap(), 2usize.try_into().unwrap()),
+        svindex_s8((1usize).try_into().unwrap(), 2usize.try_into().unwrap()),
+    );
+    svst2_s8(svptrue_b8(), storage.as_mut_ptr(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i8 || val == i as i8);
+    }
+    svsetffr();
+    let loaded = svld2_s8(svptrue_b8(), storage.as_ptr() as *const i8);
+    assert_vector_matches_i8(
+        svget2_s8::<{ 0usize as i32 }>(loaded),
+        svindex_s8((0usize).try_into().unwrap(), 2usize.try_into().unwrap()),
+    );
+    assert_vector_matches_i8(
+        svget2_s8::<{ 1usize as i32 }>(loaded),
+        svindex_s8((1usize).try_into().unwrap(), 2usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld2_s16_with_svst2_s16() {
+    let mut storage = [0 as i16; 640usize];
+    let data = svcreate2_s16(
+        svindex_s16((0usize).try_into().unwrap(), 2usize.try_into().unwrap()),
+        svindex_s16((1usize).try_into().unwrap(), 2usize.try_into().unwrap()),
+    );
+    svst2_s16(svptrue_b16(), storage.as_mut_ptr(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i16 || val == i as i16);
+    }
+    svsetffr();
+    let loaded = svld2_s16(svptrue_b16(), storage.as_ptr() as *const i16);
+    assert_vector_matches_i16(
+        svget2_s16::<{ 0usize as i32 }>(loaded),
+        svindex_s16((0usize).try_into().unwrap(), 2usize.try_into().unwrap()),
+    );
+    assert_vector_matches_i16(
+        svget2_s16::<{ 1usize as i32 }>(loaded),
+        svindex_s16((1usize).try_into().unwrap(), 2usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld2_s32_with_svst2_s32() {
+    let mut storage = [0 as i32; 320usize];
+    let data = svcreate2_s32(
+        svindex_s32((0usize).try_into().unwrap(), 2usize.try_into().unwrap()),
+        svindex_s32((1usize).try_into().unwrap(), 2usize.try_into().unwrap()),
+    );
+    svst2_s32(svptrue_b32(), storage.as_mut_ptr(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i32 || val == i as i32);
+    }
+    svsetffr();
+    let loaded = svld2_s32(svptrue_b32(), storage.as_ptr() as *const i32);
+    assert_vector_matches_i32(
+        svget2_s32::<{ 0usize as i32 }>(loaded),
+        svindex_s32((0usize).try_into().unwrap(), 2usize.try_into().unwrap()),
+    );
+    assert_vector_matches_i32(
+        svget2_s32::<{ 1usize as i32 }>(loaded),
+        svindex_s32((1usize).try_into().unwrap(), 2usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld2_s64_with_svst2_s64() {
+    let mut storage = [0 as i64; 160usize];
+    let data = svcreate2_s64(
+        svindex_s64((0usize).try_into().unwrap(), 2usize.try_into().unwrap()),
+        svindex_s64((1usize).try_into().unwrap(), 2usize.try_into().unwrap()),
+    );
+    svst2_s64(svptrue_b64(), storage.as_mut_ptr(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i64 || val == i as i64);
+    }
+    svsetffr();
+    let loaded = svld2_s64(svptrue_b64(), storage.as_ptr() as *const i64);
+    assert_vector_matches_i64(
+        svget2_s64::<{ 0usize as i32 }>(loaded),
+        svindex_s64((0usize).try_into().unwrap(), 2usize.try_into().unwrap()),
+    );
+    assert_vector_matches_i64(
+        svget2_s64::<{ 1usize as i32 }>(loaded),
+        svindex_s64((1usize).try_into().unwrap(), 2usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld2_u8_with_svst2_u8() {
+    let mut storage = [0 as u8; 1280usize];
+    let data = svcreate2_u8(
+        svindex_u8((0usize).try_into().unwrap(), 2usize.try_into().unwrap()),
+        svindex_u8((1usize).try_into().unwrap(), 2usize.try_into().unwrap()),
+    );
+    svst2_u8(svptrue_b8(), storage.as_mut_ptr(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u8 || val == i as u8);
+    }
+    svsetffr();
+    let loaded = svld2_u8(svptrue_b8(), storage.as_ptr() as *const u8);
+    assert_vector_matches_u8(
+        svget2_u8::<{ 0usize as i32 }>(loaded),
+        svindex_u8((0usize).try_into().unwrap(), 2usize.try_into().unwrap()),
+    );
+    assert_vector_matches_u8(
+        svget2_u8::<{ 1usize as i32 }>(loaded),
+        svindex_u8((1usize).try_into().unwrap(), 2usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld2_u16_with_svst2_u16() {
+    let mut storage = [0 as u16; 640usize];
+    let data = svcreate2_u16(
+        svindex_u16((0usize).try_into().unwrap(), 2usize.try_into().unwrap()),
+        svindex_u16((1usize).try_into().unwrap(), 2usize.try_into().unwrap()),
+    );
+    svst2_u16(svptrue_b16(), storage.as_mut_ptr(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u16 || val == i as u16);
+    }
+    svsetffr();
+    let loaded = svld2_u16(svptrue_b16(), storage.as_ptr() as *const u16);
+    assert_vector_matches_u16(
+        svget2_u16::<{ 0usize as i32 }>(loaded),
+        svindex_u16((0usize).try_into().unwrap(), 2usize.try_into().unwrap()),
+    );
+    assert_vector_matches_u16(
+        svget2_u16::<{ 1usize as i32 }>(loaded),
+        svindex_u16((1usize).try_into().unwrap(), 2usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld2_u32_with_svst2_u32() {
+    let mut storage = [0 as u32; 320usize];
+    let data = svcreate2_u32(
+        svindex_u32((0usize).try_into().unwrap(), 2usize.try_into().unwrap()),
+        svindex_u32((1usize).try_into().unwrap(), 2usize.try_into().unwrap()),
+    );
+    svst2_u32(svptrue_b32(), storage.as_mut_ptr(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u32 || val == i as u32);
+    }
+    svsetffr();
+    let loaded = svld2_u32(svptrue_b32(), storage.as_ptr() as *const u32);
+    assert_vector_matches_u32(
+        svget2_u32::<{ 0usize as i32 }>(loaded),
+        svindex_u32((0usize).try_into().unwrap(), 2usize.try_into().unwrap()),
+    );
+    assert_vector_matches_u32(
+        svget2_u32::<{ 1usize as i32 }>(loaded),
+        svindex_u32((1usize).try_into().unwrap(), 2usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld2_u64_with_svst2_u64() {
+    let mut storage = [0 as u64; 160usize];
+    let data = svcreate2_u64(
+        svindex_u64((0usize).try_into().unwrap(), 2usize.try_into().unwrap()),
+        svindex_u64((1usize).try_into().unwrap(), 2usize.try_into().unwrap()),
+    );
+    svst2_u64(svptrue_b64(), storage.as_mut_ptr(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u64 || val == i as u64);
+    }
+    svsetffr();
+    let loaded = svld2_u64(svptrue_b64(), storage.as_ptr() as *const u64);
+    assert_vector_matches_u64(
+        svget2_u64::<{ 0usize as i32 }>(loaded),
+        svindex_u64((0usize).try_into().unwrap(), 2usize.try_into().unwrap()),
+    );
+    assert_vector_matches_u64(
+        svget2_u64::<{ 1usize as i32 }>(loaded),
+        svindex_u64((1usize).try_into().unwrap(), 2usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld2_vnum_f32_with_svst2_vnum_f32() {
+    let len = svcntw() as usize;
+    let mut storage = [0 as f32; 320usize];
+    let data = svcreate2_f32(
+        svcvt_f32_s32_x(
+            svptrue_b32(),
+            svindex_s32(
+                (len + 0usize).try_into().unwrap(),
+                2usize.try_into().unwrap(),
+            ),
+        ),
+        svcvt_f32_s32_x(
+            svptrue_b32(),
+            svindex_s32(
+                (len + 1usize).try_into().unwrap(),
+                2usize.try_into().unwrap(),
+            ),
+        ),
+    );
+    svst2_vnum_f32(svptrue_b32(), storage.as_mut_ptr(), 1, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as f32 || val == i as f32);
+    }
+    svsetffr();
+    let loaded = svld2_vnum_f32(svptrue_b32(), storage.as_ptr() as *const f32, 1);
+    assert_vector_matches_f32(
+        svget2_f32::<{ 0usize as i32 }>(loaded),
+        svcvt_f32_s32_x(
+            svptrue_b32(),
+            svindex_s32(
+                (len + 0usize).try_into().unwrap(),
+                2usize.try_into().unwrap(),
+            ),
+        ),
+    );
+    assert_vector_matches_f32(
+        svget2_f32::<{ 1usize as i32 }>(loaded),
+        svcvt_f32_s32_x(
+            svptrue_b32(),
+            svindex_s32(
+                (len + 1usize).try_into().unwrap(),
+                2usize.try_into().unwrap(),
+            ),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld2_vnum_f64_with_svst2_vnum_f64() {
+    let len = svcntd() as usize;
+    let mut storage = [0 as f64; 160usize];
+    let data = svcreate2_f64(
+        svcvt_f64_s64_x(
+            svptrue_b64(),
+            svindex_s64(
+                (len + 0usize).try_into().unwrap(),
+                2usize.try_into().unwrap(),
+            ),
+        ),
+        svcvt_f64_s64_x(
+            svptrue_b64(),
+            svindex_s64(
+                (len + 1usize).try_into().unwrap(),
+                2usize.try_into().unwrap(),
+            ),
+        ),
+    );
+    svst2_vnum_f64(svptrue_b64(), storage.as_mut_ptr(), 1, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as f64 || val == i as f64);
+    }
+    svsetffr();
+    let loaded = svld2_vnum_f64(svptrue_b64(), storage.as_ptr() as *const f64, 1);
+    assert_vector_matches_f64(
+        svget2_f64::<{ 0usize as i32 }>(loaded),
+        svcvt_f64_s64_x(
+            svptrue_b64(),
+            svindex_s64(
+                (len + 0usize).try_into().unwrap(),
+                2usize.try_into().unwrap(),
+            ),
+        ),
+    );
+    assert_vector_matches_f64(
+        svget2_f64::<{ 1usize as i32 }>(loaded),
+        svcvt_f64_s64_x(
+            svptrue_b64(),
+            svindex_s64(
+                (len + 1usize).try_into().unwrap(),
+                2usize.try_into().unwrap(),
+            ),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld2_vnum_s8_with_svst2_vnum_s8() {
+    let len = svcntb() as usize;
+    let mut storage = [0 as i8; 1280usize];
+    let data = svcreate2_s8(
+        svindex_s8(
+            (len + 0usize).try_into().unwrap(),
+            2usize.try_into().unwrap(),
+        ),
+        svindex_s8(
+            (len + 1usize).try_into().unwrap(),
+            2usize.try_into().unwrap(),
+        ),
+    );
+    svst2_vnum_s8(svptrue_b8(), storage.as_mut_ptr(), 1, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i8 || val == i as i8);
+    }
+    svsetffr();
+    let loaded = svld2_vnum_s8(svptrue_b8(), storage.as_ptr() as *const i8, 1);
+    assert_vector_matches_i8(
+        svget2_s8::<{ 0usize as i32 }>(loaded),
+        svindex_s8(
+            (len + 0usize).try_into().unwrap(),
+            2usize.try_into().unwrap(),
+        ),
+    );
+    assert_vector_matches_i8(
+        svget2_s8::<{ 1usize as i32 }>(loaded),
+        svindex_s8(
+            (len + 1usize).try_into().unwrap(),
+            2usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld2_vnum_s16_with_svst2_vnum_s16() {
+    let len = svcnth() as usize;
+    let mut storage = [0 as i16; 640usize];
+    let data = svcreate2_s16(
+        svindex_s16(
+            (len + 0usize).try_into().unwrap(),
+            2usize.try_into().unwrap(),
+        ),
+        svindex_s16(
+            (len + 1usize).try_into().unwrap(),
+            2usize.try_into().unwrap(),
+        ),
+    );
+    svst2_vnum_s16(svptrue_b16(), storage.as_mut_ptr(), 1, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i16 || val == i as i16);
+    }
+    svsetffr();
+    let loaded = svld2_vnum_s16(svptrue_b16(), storage.as_ptr() as *const i16, 1);
+    assert_vector_matches_i16(
+        svget2_s16::<{ 0usize as i32 }>(loaded),
+        svindex_s16(
+            (len + 0usize).try_into().unwrap(),
+            2usize.try_into().unwrap(),
+        ),
+    );
+    assert_vector_matches_i16(
+        svget2_s16::<{ 1usize as i32 }>(loaded),
+        svindex_s16(
+            (len + 1usize).try_into().unwrap(),
+            2usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld2_vnum_s32_with_svst2_vnum_s32() {
+    let len = svcntw() as usize;
+    let mut storage = [0 as i32; 320usize];
+    let data = svcreate2_s32(
+        svindex_s32(
+            (len + 0usize).try_into().unwrap(),
+            2usize.try_into().unwrap(),
+        ),
+        svindex_s32(
+            (len + 1usize).try_into().unwrap(),
+            2usize.try_into().unwrap(),
+        ),
+    );
+    svst2_vnum_s32(svptrue_b32(), storage.as_mut_ptr(), 1, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i32 || val == i as i32);
+    }
+    svsetffr();
+    let loaded = svld2_vnum_s32(svptrue_b32(), storage.as_ptr() as *const i32, 1);
+    assert_vector_matches_i32(
+        svget2_s32::<{ 0usize as i32 }>(loaded),
+        svindex_s32(
+            (len + 0usize).try_into().unwrap(),
+            2usize.try_into().unwrap(),
+        ),
+    );
+    assert_vector_matches_i32(
+        svget2_s32::<{ 1usize as i32 }>(loaded),
+        svindex_s32(
+            (len + 1usize).try_into().unwrap(),
+            2usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld2_vnum_s64_with_svst2_vnum_s64() {
+    let len = svcntd() as usize;
+    let mut storage = [0 as i64; 160usize];
+    let data = svcreate2_s64(
+        svindex_s64(
+            (len + 0usize).try_into().unwrap(),
+            2usize.try_into().unwrap(),
+        ),
+        svindex_s64(
+            (len + 1usize).try_into().unwrap(),
+            2usize.try_into().unwrap(),
+        ),
+    );
+    svst2_vnum_s64(svptrue_b64(), storage.as_mut_ptr(), 1, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i64 || val == i as i64);
+    }
+    svsetffr();
+    let loaded = svld2_vnum_s64(svptrue_b64(), storage.as_ptr() as *const i64, 1);
+    assert_vector_matches_i64(
+        svget2_s64::<{ 0usize as i32 }>(loaded),
+        svindex_s64(
+            (len + 0usize).try_into().unwrap(),
+            2usize.try_into().unwrap(),
+        ),
+    );
+    assert_vector_matches_i64(
+        svget2_s64::<{ 1usize as i32 }>(loaded),
+        svindex_s64(
+            (len + 1usize).try_into().unwrap(),
+            2usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld2_vnum_u8_with_svst2_vnum_u8() {
+    let len = svcntb() as usize;
+    let mut storage = [0 as u8; 1280usize];
+    let data = svcreate2_u8(
+        svindex_u8(
+            (len + 0usize).try_into().unwrap(),
+            2usize.try_into().unwrap(),
+        ),
+        svindex_u8(
+            (len + 1usize).try_into().unwrap(),
+            2usize.try_into().unwrap(),
+        ),
+    );
+    svst2_vnum_u8(svptrue_b8(), storage.as_mut_ptr(), 1, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u8 || val == i as u8);
+    }
+    svsetffr();
+    let loaded = svld2_vnum_u8(svptrue_b8(), storage.as_ptr() as *const u8, 1);
+    assert_vector_matches_u8(
+        svget2_u8::<{ 0usize as i32 }>(loaded),
+        svindex_u8(
+            (len + 0usize).try_into().unwrap(),
+            2usize.try_into().unwrap(),
+        ),
+    );
+    assert_vector_matches_u8(
+        svget2_u8::<{ 1usize as i32 }>(loaded),
+        svindex_u8(
+            (len + 1usize).try_into().unwrap(),
+            2usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld2_vnum_u16_with_svst2_vnum_u16() {
+    let len = svcnth() as usize;
+    let mut storage = [0 as u16; 640usize];
+    let data = svcreate2_u16(
+        svindex_u16(
+            (len + 0usize).try_into().unwrap(),
+            2usize.try_into().unwrap(),
+        ),
+        svindex_u16(
+            (len + 1usize).try_into().unwrap(),
+            2usize.try_into().unwrap(),
+        ),
+    );
+    svst2_vnum_u16(svptrue_b16(), storage.as_mut_ptr(), 1, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u16 || val == i as u16);
+    }
+    svsetffr();
+    let loaded = svld2_vnum_u16(svptrue_b16(), storage.as_ptr() as *const u16, 1);
+    assert_vector_matches_u16(
+        svget2_u16::<{ 0usize as i32 }>(loaded),
+        svindex_u16(
+            (len + 0usize).try_into().unwrap(),
+            2usize.try_into().unwrap(),
+        ),
+    );
+    assert_vector_matches_u16(
+        svget2_u16::<{ 1usize as i32 }>(loaded),
+        svindex_u16(
+            (len + 1usize).try_into().unwrap(),
+            2usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld2_vnum_u32_with_svst2_vnum_u32() {
+    let len = svcntw() as usize;
+    let mut storage = [0 as u32; 320usize];
+    let data = svcreate2_u32(
+        svindex_u32(
+            (len + 0usize).try_into().unwrap(),
+            2usize.try_into().unwrap(),
+        ),
+        svindex_u32(
+            (len + 1usize).try_into().unwrap(),
+            2usize.try_into().unwrap(),
+        ),
+    );
+    svst2_vnum_u32(svptrue_b32(), storage.as_mut_ptr(), 1, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u32 || val == i as u32);
+    }
+    svsetffr();
+    let loaded = svld2_vnum_u32(svptrue_b32(), storage.as_ptr() as *const u32, 1);
+    assert_vector_matches_u32(
+        svget2_u32::<{ 0usize as i32 }>(loaded),
+        svindex_u32(
+            (len + 0usize).try_into().unwrap(),
+            2usize.try_into().unwrap(),
+        ),
+    );
+    assert_vector_matches_u32(
+        svget2_u32::<{ 1usize as i32 }>(loaded),
+        svindex_u32(
+            (len + 1usize).try_into().unwrap(),
+            2usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld2_vnum_u64_with_svst2_vnum_u64() {
+    let len = svcntd() as usize;
+    let mut storage = [0 as u64; 160usize];
+    let data = svcreate2_u64(
+        svindex_u64(
+            (len + 0usize).try_into().unwrap(),
+            2usize.try_into().unwrap(),
+        ),
+        svindex_u64(
+            (len + 1usize).try_into().unwrap(),
+            2usize.try_into().unwrap(),
+        ),
+    );
+    svst2_vnum_u64(svptrue_b64(), storage.as_mut_ptr(), 1, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u64 || val == i as u64);
+    }
+    svsetffr();
+    let loaded = svld2_vnum_u64(svptrue_b64(), storage.as_ptr() as *const u64, 1);
+    assert_vector_matches_u64(
+        svget2_u64::<{ 0usize as i32 }>(loaded),
+        svindex_u64(
+            (len + 0usize).try_into().unwrap(),
+            2usize.try_into().unwrap(),
+        ),
+    );
+    assert_vector_matches_u64(
+        svget2_u64::<{ 1usize as i32 }>(loaded),
+        svindex_u64(
+            (len + 1usize).try_into().unwrap(),
+            2usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld3_f32_with_svst3_f32() {
+    let mut storage = [0 as f32; 320usize];
+    let data = svcreate3_f32(
+        svcvt_f32_s32_x(
+            svptrue_b32(),
+            svindex_s32((0usize).try_into().unwrap(), 3usize.try_into().unwrap()),
+        ),
+        svcvt_f32_s32_x(
+            svptrue_b32(),
+            svindex_s32((1usize).try_into().unwrap(), 3usize.try_into().unwrap()),
+        ),
+        svcvt_f32_s32_x(
+            svptrue_b32(),
+            svindex_s32((2usize).try_into().unwrap(), 3usize.try_into().unwrap()),
+        ),
+    );
+    svst3_f32(svptrue_b32(), storage.as_mut_ptr(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as f32 || val == i as f32);
+    }
+    svsetffr();
+    let loaded = svld3_f32(svptrue_b32(), storage.as_ptr() as *const f32);
+    assert_vector_matches_f32(
+        svget3_f32::<{ 0usize as i32 }>(loaded),
+        svcvt_f32_s32_x(
+            svptrue_b32(),
+            svindex_s32((0usize).try_into().unwrap(), 3usize.try_into().unwrap()),
+        ),
+    );
+    assert_vector_matches_f32(
+        svget3_f32::<{ 1usize as i32 }>(loaded),
+        svcvt_f32_s32_x(
+            svptrue_b32(),
+            svindex_s32((1usize).try_into().unwrap(), 3usize.try_into().unwrap()),
+        ),
+    );
+    assert_vector_matches_f32(
+        svget3_f32::<{ 2usize as i32 }>(loaded),
+        svcvt_f32_s32_x(
+            svptrue_b32(),
+            svindex_s32((2usize).try_into().unwrap(), 3usize.try_into().unwrap()),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld3_f64_with_svst3_f64() {
+    let mut storage = [0 as f64; 160usize];
+    let data = svcreate3_f64(
+        svcvt_f64_s64_x(
+            svptrue_b64(),
+            svindex_s64((0usize).try_into().unwrap(), 3usize.try_into().unwrap()),
+        ),
+        svcvt_f64_s64_x(
+            svptrue_b64(),
+            svindex_s64((1usize).try_into().unwrap(), 3usize.try_into().unwrap()),
+        ),
+        svcvt_f64_s64_x(
+            svptrue_b64(),
+            svindex_s64((2usize).try_into().unwrap(), 3usize.try_into().unwrap()),
+        ),
+    );
+    svst3_f64(svptrue_b64(), storage.as_mut_ptr(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as f64 || val == i as f64);
+    }
+    svsetffr();
+    let loaded = svld3_f64(svptrue_b64(), storage.as_ptr() as *const f64);
+    assert_vector_matches_f64(
+        svget3_f64::<{ 0usize as i32 }>(loaded),
+        svcvt_f64_s64_x(
+            svptrue_b64(),
+            svindex_s64((0usize).try_into().unwrap(), 3usize.try_into().unwrap()),
+        ),
+    );
+    assert_vector_matches_f64(
+        svget3_f64::<{ 1usize as i32 }>(loaded),
+        svcvt_f64_s64_x(
+            svptrue_b64(),
+            svindex_s64((1usize).try_into().unwrap(), 3usize.try_into().unwrap()),
+        ),
+    );
+    assert_vector_matches_f64(
+        svget3_f64::<{ 2usize as i32 }>(loaded),
+        svcvt_f64_s64_x(
+            svptrue_b64(),
+            svindex_s64((2usize).try_into().unwrap(), 3usize.try_into().unwrap()),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld3_s8_with_svst3_s8() {
+    let mut storage = [0 as i8; 1280usize];
+    let data = svcreate3_s8(
+        svindex_s8((0usize).try_into().unwrap(), 3usize.try_into().unwrap()),
+        svindex_s8((1usize).try_into().unwrap(), 3usize.try_into().unwrap()),
+        svindex_s8((2usize).try_into().unwrap(), 3usize.try_into().unwrap()),
+    );
+    svst3_s8(svptrue_b8(), storage.as_mut_ptr(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i8 || val == i as i8);
+    }
+    svsetffr();
+    let loaded = svld3_s8(svptrue_b8(), storage.as_ptr() as *const i8);
+    assert_vector_matches_i8(
+        svget3_s8::<{ 0usize as i32 }>(loaded),
+        svindex_s8((0usize).try_into().unwrap(), 3usize.try_into().unwrap()),
+    );
+    assert_vector_matches_i8(
+        svget3_s8::<{ 1usize as i32 }>(loaded),
+        svindex_s8((1usize).try_into().unwrap(), 3usize.try_into().unwrap()),
+    );
+    assert_vector_matches_i8(
+        svget3_s8::<{ 2usize as i32 }>(loaded),
+        svindex_s8((2usize).try_into().unwrap(), 3usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld3_s16_with_svst3_s16() {
+    let mut storage = [0 as i16; 640usize];
+    let data = svcreate3_s16(
+        svindex_s16((0usize).try_into().unwrap(), 3usize.try_into().unwrap()),
+        svindex_s16((1usize).try_into().unwrap(), 3usize.try_into().unwrap()),
+        svindex_s16((2usize).try_into().unwrap(), 3usize.try_into().unwrap()),
+    );
+    svst3_s16(svptrue_b16(), storage.as_mut_ptr(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i16 || val == i as i16);
+    }
+    svsetffr();
+    let loaded = svld3_s16(svptrue_b16(), storage.as_ptr() as *const i16);
+    assert_vector_matches_i16(
+        svget3_s16::<{ 0usize as i32 }>(loaded),
+        svindex_s16((0usize).try_into().unwrap(), 3usize.try_into().unwrap()),
+    );
+    assert_vector_matches_i16(
+        svget3_s16::<{ 1usize as i32 }>(loaded),
+        svindex_s16((1usize).try_into().unwrap(), 3usize.try_into().unwrap()),
+    );
+    assert_vector_matches_i16(
+        svget3_s16::<{ 2usize as i32 }>(loaded),
+        svindex_s16((2usize).try_into().unwrap(), 3usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld3_s32_with_svst3_s32() {
+    let mut storage = [0 as i32; 320usize];
+    let data = svcreate3_s32(
+        svindex_s32((0usize).try_into().unwrap(), 3usize.try_into().unwrap()),
+        svindex_s32((1usize).try_into().unwrap(), 3usize.try_into().unwrap()),
+        svindex_s32((2usize).try_into().unwrap(), 3usize.try_into().unwrap()),
+    );
+    svst3_s32(svptrue_b32(), storage.as_mut_ptr(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i32 || val == i as i32);
+    }
+    svsetffr();
+    let loaded = svld3_s32(svptrue_b32(), storage.as_ptr() as *const i32);
+    assert_vector_matches_i32(
+        svget3_s32::<{ 0usize as i32 }>(loaded),
+        svindex_s32((0usize).try_into().unwrap(), 3usize.try_into().unwrap()),
+    );
+    assert_vector_matches_i32(
+        svget3_s32::<{ 1usize as i32 }>(loaded),
+        svindex_s32((1usize).try_into().unwrap(), 3usize.try_into().unwrap()),
+    );
+    assert_vector_matches_i32(
+        svget3_s32::<{ 2usize as i32 }>(loaded),
+        svindex_s32((2usize).try_into().unwrap(), 3usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld3_s64_with_svst3_s64() {
+    let mut storage = [0 as i64; 160usize];
+    let data = svcreate3_s64(
+        svindex_s64((0usize).try_into().unwrap(), 3usize.try_into().unwrap()),
+        svindex_s64((1usize).try_into().unwrap(), 3usize.try_into().unwrap()),
+        svindex_s64((2usize).try_into().unwrap(), 3usize.try_into().unwrap()),
+    );
+    svst3_s64(svptrue_b64(), storage.as_mut_ptr(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i64 || val == i as i64);
+    }
+    svsetffr();
+    let loaded = svld3_s64(svptrue_b64(), storage.as_ptr() as *const i64);
+    assert_vector_matches_i64(
+        svget3_s64::<{ 0usize as i32 }>(loaded),
+        svindex_s64((0usize).try_into().unwrap(), 3usize.try_into().unwrap()),
+    );
+    assert_vector_matches_i64(
+        svget3_s64::<{ 1usize as i32 }>(loaded),
+        svindex_s64((1usize).try_into().unwrap(), 3usize.try_into().unwrap()),
+    );
+    assert_vector_matches_i64(
+        svget3_s64::<{ 2usize as i32 }>(loaded),
+        svindex_s64((2usize).try_into().unwrap(), 3usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld3_u8_with_svst3_u8() {
+    let mut storage = [0 as u8; 1280usize];
+    let data = svcreate3_u8(
+        svindex_u8((0usize).try_into().unwrap(), 3usize.try_into().unwrap()),
+        svindex_u8((1usize).try_into().unwrap(), 3usize.try_into().unwrap()),
+        svindex_u8((2usize).try_into().unwrap(), 3usize.try_into().unwrap()),
+    );
+    svst3_u8(svptrue_b8(), storage.as_mut_ptr(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u8 || val == i as u8);
+    }
+    svsetffr();
+    let loaded = svld3_u8(svptrue_b8(), storage.as_ptr() as *const u8);
+    assert_vector_matches_u8(
+        svget3_u8::<{ 0usize as i32 }>(loaded),
+        svindex_u8((0usize).try_into().unwrap(), 3usize.try_into().unwrap()),
+    );
+    assert_vector_matches_u8(
+        svget3_u8::<{ 1usize as i32 }>(loaded),
+        svindex_u8((1usize).try_into().unwrap(), 3usize.try_into().unwrap()),
+    );
+    assert_vector_matches_u8(
+        svget3_u8::<{ 2usize as i32 }>(loaded),
+        svindex_u8((2usize).try_into().unwrap(), 3usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld3_u16_with_svst3_u16() {
+    let mut storage = [0 as u16; 640usize];
+    let data = svcreate3_u16(
+        svindex_u16((0usize).try_into().unwrap(), 3usize.try_into().unwrap()),
+        svindex_u16((1usize).try_into().unwrap(), 3usize.try_into().unwrap()),
+        svindex_u16((2usize).try_into().unwrap(), 3usize.try_into().unwrap()),
+    );
+    svst3_u16(svptrue_b16(), storage.as_mut_ptr(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u16 || val == i as u16);
+    }
+    svsetffr();
+    let loaded = svld3_u16(svptrue_b16(), storage.as_ptr() as *const u16);
+    assert_vector_matches_u16(
+        svget3_u16::<{ 0usize as i32 }>(loaded),
+        svindex_u16((0usize).try_into().unwrap(), 3usize.try_into().unwrap()),
+    );
+    assert_vector_matches_u16(
+        svget3_u16::<{ 1usize as i32 }>(loaded),
+        svindex_u16((1usize).try_into().unwrap(), 3usize.try_into().unwrap()),
+    );
+    assert_vector_matches_u16(
+        svget3_u16::<{ 2usize as i32 }>(loaded),
+        svindex_u16((2usize).try_into().unwrap(), 3usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld3_u32_with_svst3_u32() {
+    let mut storage = [0 as u32; 320usize];
+    let data = svcreate3_u32(
+        svindex_u32((0usize).try_into().unwrap(), 3usize.try_into().unwrap()),
+        svindex_u32((1usize).try_into().unwrap(), 3usize.try_into().unwrap()),
+        svindex_u32((2usize).try_into().unwrap(), 3usize.try_into().unwrap()),
+    );
+    svst3_u32(svptrue_b32(), storage.as_mut_ptr(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u32 || val == i as u32);
+    }
+    svsetffr();
+    let loaded = svld3_u32(svptrue_b32(), storage.as_ptr() as *const u32);
+    assert_vector_matches_u32(
+        svget3_u32::<{ 0usize as i32 }>(loaded),
+        svindex_u32((0usize).try_into().unwrap(), 3usize.try_into().unwrap()),
+    );
+    assert_vector_matches_u32(
+        svget3_u32::<{ 1usize as i32 }>(loaded),
+        svindex_u32((1usize).try_into().unwrap(), 3usize.try_into().unwrap()),
+    );
+    assert_vector_matches_u32(
+        svget3_u32::<{ 2usize as i32 }>(loaded),
+        svindex_u32((2usize).try_into().unwrap(), 3usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld3_u64_with_svst3_u64() {
+    let mut storage = [0 as u64; 160usize];
+    let data = svcreate3_u64(
+        svindex_u64((0usize).try_into().unwrap(), 3usize.try_into().unwrap()),
+        svindex_u64((1usize).try_into().unwrap(), 3usize.try_into().unwrap()),
+        svindex_u64((2usize).try_into().unwrap(), 3usize.try_into().unwrap()),
+    );
+    svst3_u64(svptrue_b64(), storage.as_mut_ptr(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u64 || val == i as u64);
+    }
+    svsetffr();
+    let loaded = svld3_u64(svptrue_b64(), storage.as_ptr() as *const u64);
+    assert_vector_matches_u64(
+        svget3_u64::<{ 0usize as i32 }>(loaded),
+        svindex_u64((0usize).try_into().unwrap(), 3usize.try_into().unwrap()),
+    );
+    assert_vector_matches_u64(
+        svget3_u64::<{ 1usize as i32 }>(loaded),
+        svindex_u64((1usize).try_into().unwrap(), 3usize.try_into().unwrap()),
+    );
+    assert_vector_matches_u64(
+        svget3_u64::<{ 2usize as i32 }>(loaded),
+        svindex_u64((2usize).try_into().unwrap(), 3usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld3_vnum_f32_with_svst3_vnum_f32() {
+    let len = svcntw() as usize;
+    let mut storage = [0 as f32; 320usize];
+    let data = svcreate3_f32(
+        svcvt_f32_s32_x(
+            svptrue_b32(),
+            svindex_s32(
+                (len + 0usize).try_into().unwrap(),
+                3usize.try_into().unwrap(),
+            ),
+        ),
+        svcvt_f32_s32_x(
+            svptrue_b32(),
+            svindex_s32(
+                (len + 1usize).try_into().unwrap(),
+                3usize.try_into().unwrap(),
+            ),
+        ),
+        svcvt_f32_s32_x(
+            svptrue_b32(),
+            svindex_s32(
+                (len + 2usize).try_into().unwrap(),
+                3usize.try_into().unwrap(),
+            ),
+        ),
+    );
+    svst3_vnum_f32(svptrue_b32(), storage.as_mut_ptr(), 1, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as f32 || val == i as f32);
+    }
+    svsetffr();
+    let loaded = svld3_vnum_f32(svptrue_b32(), storage.as_ptr() as *const f32, 1);
+    assert_vector_matches_f32(
+        svget3_f32::<{ 0usize as i32 }>(loaded),
+        svcvt_f32_s32_x(
+            svptrue_b32(),
+            svindex_s32(
+                (len + 0usize).try_into().unwrap(),
+                3usize.try_into().unwrap(),
+            ),
+        ),
+    );
+    assert_vector_matches_f32(
+        svget3_f32::<{ 1usize as i32 }>(loaded),
+        svcvt_f32_s32_x(
+            svptrue_b32(),
+            svindex_s32(
+                (len + 1usize).try_into().unwrap(),
+                3usize.try_into().unwrap(),
+            ),
+        ),
+    );
+    assert_vector_matches_f32(
+        svget3_f32::<{ 2usize as i32 }>(loaded),
+        svcvt_f32_s32_x(
+            svptrue_b32(),
+            svindex_s32(
+                (len + 2usize).try_into().unwrap(),
+                3usize.try_into().unwrap(),
+            ),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld3_vnum_f64_with_svst3_vnum_f64() {
+    let len = svcntd() as usize;
+    let mut storage = [0 as f64; 160usize];
+    let data = svcreate3_f64(
+        svcvt_f64_s64_x(
+            svptrue_b64(),
+            svindex_s64(
+                (len + 0usize).try_into().unwrap(),
+                3usize.try_into().unwrap(),
+            ),
+        ),
+        svcvt_f64_s64_x(
+            svptrue_b64(),
+            svindex_s64(
+                (len + 1usize).try_into().unwrap(),
+                3usize.try_into().unwrap(),
+            ),
+        ),
+        svcvt_f64_s64_x(
+            svptrue_b64(),
+            svindex_s64(
+                (len + 2usize).try_into().unwrap(),
+                3usize.try_into().unwrap(),
+            ),
+        ),
+    );
+    svst3_vnum_f64(svptrue_b64(), storage.as_mut_ptr(), 1, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as f64 || val == i as f64);
+    }
+    svsetffr();
+    let loaded = svld3_vnum_f64(svptrue_b64(), storage.as_ptr() as *const f64, 1);
+    assert_vector_matches_f64(
+        svget3_f64::<{ 0usize as i32 }>(loaded),
+        svcvt_f64_s64_x(
+            svptrue_b64(),
+            svindex_s64(
+                (len + 0usize).try_into().unwrap(),
+                3usize.try_into().unwrap(),
+            ),
+        ),
+    );
+    assert_vector_matches_f64(
+        svget3_f64::<{ 1usize as i32 }>(loaded),
+        svcvt_f64_s64_x(
+            svptrue_b64(),
+            svindex_s64(
+                (len + 1usize).try_into().unwrap(),
+                3usize.try_into().unwrap(),
+            ),
+        ),
+    );
+    assert_vector_matches_f64(
+        svget3_f64::<{ 2usize as i32 }>(loaded),
+        svcvt_f64_s64_x(
+            svptrue_b64(),
+            svindex_s64(
+                (len + 2usize).try_into().unwrap(),
+                3usize.try_into().unwrap(),
+            ),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld3_vnum_s8_with_svst3_vnum_s8() {
+    let len = svcntb() as usize;
+    let mut storage = [0 as i8; 1280usize];
+    let data = svcreate3_s8(
+        svindex_s8(
+            (len + 0usize).try_into().unwrap(),
+            3usize.try_into().unwrap(),
+        ),
+        svindex_s8(
+            (len + 1usize).try_into().unwrap(),
+            3usize.try_into().unwrap(),
+        ),
+        svindex_s8(
+            (len + 2usize).try_into().unwrap(),
+            3usize.try_into().unwrap(),
+        ),
+    );
+    svst3_vnum_s8(svptrue_b8(), storage.as_mut_ptr(), 1, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i8 || val == i as i8);
+    }
+    svsetffr();
+    let loaded = svld3_vnum_s8(svptrue_b8(), storage.as_ptr() as *const i8, 1);
+    assert_vector_matches_i8(
+        svget3_s8::<{ 0usize as i32 }>(loaded),
+        svindex_s8(
+            (len + 0usize).try_into().unwrap(),
+            3usize.try_into().unwrap(),
+        ),
+    );
+    assert_vector_matches_i8(
+        svget3_s8::<{ 1usize as i32 }>(loaded),
+        svindex_s8(
+            (len + 1usize).try_into().unwrap(),
+            3usize.try_into().unwrap(),
+        ),
+    );
+    assert_vector_matches_i8(
+        svget3_s8::<{ 2usize as i32 }>(loaded),
+        svindex_s8(
+            (len + 2usize).try_into().unwrap(),
+            3usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld3_vnum_s16_with_svst3_vnum_s16() {
+    let len = svcnth() as usize;
+    let mut storage = [0 as i16; 640usize];
+    let data = svcreate3_s16(
+        svindex_s16(
+            (len + 0usize).try_into().unwrap(),
+            3usize.try_into().unwrap(),
+        ),
+        svindex_s16(
+            (len + 1usize).try_into().unwrap(),
+            3usize.try_into().unwrap(),
+        ),
+        svindex_s16(
+            (len + 2usize).try_into().unwrap(),
+            3usize.try_into().unwrap(),
+        ),
+    );
+    svst3_vnum_s16(svptrue_b16(), storage.as_mut_ptr(), 1, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i16 || val == i as i16);
+    }
+    svsetffr();
+    let loaded = svld3_vnum_s16(svptrue_b16(), storage.as_ptr() as *const i16, 1);
+    assert_vector_matches_i16(
+        svget3_s16::<{ 0usize as i32 }>(loaded),
+        svindex_s16(
+            (len + 0usize).try_into().unwrap(),
+            3usize.try_into().unwrap(),
+        ),
+    );
+    assert_vector_matches_i16(
+        svget3_s16::<{ 1usize as i32 }>(loaded),
+        svindex_s16(
+            (len + 1usize).try_into().unwrap(),
+            3usize.try_into().unwrap(),
+        ),
+    );
+    assert_vector_matches_i16(
+        svget3_s16::<{ 2usize as i32 }>(loaded),
+        svindex_s16(
+            (len + 2usize).try_into().unwrap(),
+            3usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld3_vnum_s32_with_svst3_vnum_s32() {
+    let len = svcntw() as usize;
+    let mut storage = [0 as i32; 320usize];
+    let data = svcreate3_s32(
+        svindex_s32(
+            (len + 0usize).try_into().unwrap(),
+            3usize.try_into().unwrap(),
+        ),
+        svindex_s32(
+            (len + 1usize).try_into().unwrap(),
+            3usize.try_into().unwrap(),
+        ),
+        svindex_s32(
+            (len + 2usize).try_into().unwrap(),
+            3usize.try_into().unwrap(),
+        ),
+    );
+    svst3_vnum_s32(svptrue_b32(), storage.as_mut_ptr(), 1, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i32 || val == i as i32);
+    }
+    svsetffr();
+    let loaded = svld3_vnum_s32(svptrue_b32(), storage.as_ptr() as *const i32, 1);
+    assert_vector_matches_i32(
+        svget3_s32::<{ 0usize as i32 }>(loaded),
+        svindex_s32(
+            (len + 0usize).try_into().unwrap(),
+            3usize.try_into().unwrap(),
+        ),
+    );
+    assert_vector_matches_i32(
+        svget3_s32::<{ 1usize as i32 }>(loaded),
+        svindex_s32(
+            (len + 1usize).try_into().unwrap(),
+            3usize.try_into().unwrap(),
+        ),
+    );
+    assert_vector_matches_i32(
+        svget3_s32::<{ 2usize as i32 }>(loaded),
+        svindex_s32(
+            (len + 2usize).try_into().unwrap(),
+            3usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld3_vnum_s64_with_svst3_vnum_s64() {
+    let len = svcntd() as usize;
+    let mut storage = [0 as i64; 160usize];
+    let data = svcreate3_s64(
+        svindex_s64(
+            (len + 0usize).try_into().unwrap(),
+            3usize.try_into().unwrap(),
+        ),
+        svindex_s64(
+            (len + 1usize).try_into().unwrap(),
+            3usize.try_into().unwrap(),
+        ),
+        svindex_s64(
+            (len + 2usize).try_into().unwrap(),
+            3usize.try_into().unwrap(),
+        ),
+    );
+    svst3_vnum_s64(svptrue_b64(), storage.as_mut_ptr(), 1, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i64 || val == i as i64);
+    }
+    svsetffr();
+    let loaded = svld3_vnum_s64(svptrue_b64(), storage.as_ptr() as *const i64, 1);
+    assert_vector_matches_i64(
+        svget3_s64::<{ 0usize as i32 }>(loaded),
+        svindex_s64(
+            (len + 0usize).try_into().unwrap(),
+            3usize.try_into().unwrap(),
+        ),
+    );
+    assert_vector_matches_i64(
+        svget3_s64::<{ 1usize as i32 }>(loaded),
+        svindex_s64(
+            (len + 1usize).try_into().unwrap(),
+            3usize.try_into().unwrap(),
+        ),
+    );
+    assert_vector_matches_i64(
+        svget3_s64::<{ 2usize as i32 }>(loaded),
+        svindex_s64(
+            (len + 2usize).try_into().unwrap(),
+            3usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld3_vnum_u8_with_svst3_vnum_u8() {
+    let len = svcntb() as usize;
+    let mut storage = [0 as u8; 1280usize];
+    let data = svcreate3_u8(
+        svindex_u8(
+            (len + 0usize).try_into().unwrap(),
+            3usize.try_into().unwrap(),
+        ),
+        svindex_u8(
+            (len + 1usize).try_into().unwrap(),
+            3usize.try_into().unwrap(),
+        ),
+        svindex_u8(
+            (len + 2usize).try_into().unwrap(),
+            3usize.try_into().unwrap(),
+        ),
+    );
+    svst3_vnum_u8(svptrue_b8(), storage.as_mut_ptr(), 1, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u8 || val == i as u8);
+    }
+    svsetffr();
+    let loaded = svld3_vnum_u8(svptrue_b8(), storage.as_ptr() as *const u8, 1);
+    assert_vector_matches_u8(
+        svget3_u8::<{ 0usize as i32 }>(loaded),
+        svindex_u8(
+            (len + 0usize).try_into().unwrap(),
+            3usize.try_into().unwrap(),
+        ),
+    );
+    assert_vector_matches_u8(
+        svget3_u8::<{ 1usize as i32 }>(loaded),
+        svindex_u8(
+            (len + 1usize).try_into().unwrap(),
+            3usize.try_into().unwrap(),
+        ),
+    );
+    assert_vector_matches_u8(
+        svget3_u8::<{ 2usize as i32 }>(loaded),
+        svindex_u8(
+            (len + 2usize).try_into().unwrap(),
+            3usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld3_vnum_u16_with_svst3_vnum_u16() {
+    let len = svcnth() as usize;
+    let mut storage = [0 as u16; 640usize];
+    let data = svcreate3_u16(
+        svindex_u16(
+            (len + 0usize).try_into().unwrap(),
+            3usize.try_into().unwrap(),
+        ),
+        svindex_u16(
+            (len + 1usize).try_into().unwrap(),
+            3usize.try_into().unwrap(),
+        ),
+        svindex_u16(
+            (len + 2usize).try_into().unwrap(),
+            3usize.try_into().unwrap(),
+        ),
+    );
+    svst3_vnum_u16(svptrue_b16(), storage.as_mut_ptr(), 1, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u16 || val == i as u16);
+    }
+    svsetffr();
+    let loaded = svld3_vnum_u16(svptrue_b16(), storage.as_ptr() as *const u16, 1);
+    assert_vector_matches_u16(
+        svget3_u16::<{ 0usize as i32 }>(loaded),
+        svindex_u16(
+            (len + 0usize).try_into().unwrap(),
+            3usize.try_into().unwrap(),
+        ),
+    );
+    assert_vector_matches_u16(
+        svget3_u16::<{ 1usize as i32 }>(loaded),
+        svindex_u16(
+            (len + 1usize).try_into().unwrap(),
+            3usize.try_into().unwrap(),
+        ),
+    );
+    assert_vector_matches_u16(
+        svget3_u16::<{ 2usize as i32 }>(loaded),
+        svindex_u16(
+            (len + 2usize).try_into().unwrap(),
+            3usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld3_vnum_u32_with_svst3_vnum_u32() {
+    let len = svcntw() as usize;
+    let mut storage = [0 as u32; 320usize];
+    let data = svcreate3_u32(
+        svindex_u32(
+            (len + 0usize).try_into().unwrap(),
+            3usize.try_into().unwrap(),
+        ),
+        svindex_u32(
+            (len + 1usize).try_into().unwrap(),
+            3usize.try_into().unwrap(),
+        ),
+        svindex_u32(
+            (len + 2usize).try_into().unwrap(),
+            3usize.try_into().unwrap(),
+        ),
+    );
+    svst3_vnum_u32(svptrue_b32(), storage.as_mut_ptr(), 1, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u32 || val == i as u32);
+    }
+    svsetffr();
+    let loaded = svld3_vnum_u32(svptrue_b32(), storage.as_ptr() as *const u32, 1);
+    assert_vector_matches_u32(
+        svget3_u32::<{ 0usize as i32 }>(loaded),
+        svindex_u32(
+            (len + 0usize).try_into().unwrap(),
+            3usize.try_into().unwrap(),
+        ),
+    );
+    assert_vector_matches_u32(
+        svget3_u32::<{ 1usize as i32 }>(loaded),
+        svindex_u32(
+            (len + 1usize).try_into().unwrap(),
+            3usize.try_into().unwrap(),
+        ),
+    );
+    assert_vector_matches_u32(
+        svget3_u32::<{ 2usize as i32 }>(loaded),
+        svindex_u32(
+            (len + 2usize).try_into().unwrap(),
+            3usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld3_vnum_u64_with_svst3_vnum_u64() {
+    let len = svcntd() as usize;
+    let mut storage = [0 as u64; 160usize];
+    let data = svcreate3_u64(
+        svindex_u64(
+            (len + 0usize).try_into().unwrap(),
+            3usize.try_into().unwrap(),
+        ),
+        svindex_u64(
+            (len + 1usize).try_into().unwrap(),
+            3usize.try_into().unwrap(),
+        ),
+        svindex_u64(
+            (len + 2usize).try_into().unwrap(),
+            3usize.try_into().unwrap(),
+        ),
+    );
+    svst3_vnum_u64(svptrue_b64(), storage.as_mut_ptr(), 1, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u64 || val == i as u64);
+    }
+    svsetffr();
+    let loaded = svld3_vnum_u64(svptrue_b64(), storage.as_ptr() as *const u64, 1);
+    assert_vector_matches_u64(
+        svget3_u64::<{ 0usize as i32 }>(loaded),
+        svindex_u64(
+            (len + 0usize).try_into().unwrap(),
+            3usize.try_into().unwrap(),
+        ),
+    );
+    assert_vector_matches_u64(
+        svget3_u64::<{ 1usize as i32 }>(loaded),
+        svindex_u64(
+            (len + 1usize).try_into().unwrap(),
+            3usize.try_into().unwrap(),
+        ),
+    );
+    assert_vector_matches_u64(
+        svget3_u64::<{ 2usize as i32 }>(loaded),
+        svindex_u64(
+            (len + 2usize).try_into().unwrap(),
+            3usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld4_f32_with_svst4_f32() {
+    let mut storage = [0 as f32; 320usize];
+    let data = svcreate4_f32(
+        svcvt_f32_s32_x(
+            svptrue_b32(),
+            svindex_s32((0usize).try_into().unwrap(), 4usize.try_into().unwrap()),
+        ),
+        svcvt_f32_s32_x(
+            svptrue_b32(),
+            svindex_s32((1usize).try_into().unwrap(), 4usize.try_into().unwrap()),
+        ),
+        svcvt_f32_s32_x(
+            svptrue_b32(),
+            svindex_s32((2usize).try_into().unwrap(), 4usize.try_into().unwrap()),
+        ),
+        svcvt_f32_s32_x(
+            svptrue_b32(),
+            svindex_s32((3usize).try_into().unwrap(), 4usize.try_into().unwrap()),
+        ),
+    );
+    svst4_f32(svptrue_b32(), storage.as_mut_ptr(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as f32 || val == i as f32);
+    }
+    svsetffr();
+    let loaded = svld4_f32(svptrue_b32(), storage.as_ptr() as *const f32);
+    assert_vector_matches_f32(
+        svget4_f32::<{ 0usize as i32 }>(loaded),
+        svcvt_f32_s32_x(
+            svptrue_b32(),
+            svindex_s32((0usize).try_into().unwrap(), 4usize.try_into().unwrap()),
+        ),
+    );
+    assert_vector_matches_f32(
+        svget4_f32::<{ 1usize as i32 }>(loaded),
+        svcvt_f32_s32_x(
+            svptrue_b32(),
+            svindex_s32((1usize).try_into().unwrap(), 4usize.try_into().unwrap()),
+        ),
+    );
+    assert_vector_matches_f32(
+        svget4_f32::<{ 2usize as i32 }>(loaded),
+        svcvt_f32_s32_x(
+            svptrue_b32(),
+            svindex_s32((2usize).try_into().unwrap(), 4usize.try_into().unwrap()),
+        ),
+    );
+    assert_vector_matches_f32(
+        svget4_f32::<{ 3usize as i32 }>(loaded),
+        svcvt_f32_s32_x(
+            svptrue_b32(),
+            svindex_s32((3usize).try_into().unwrap(), 4usize.try_into().unwrap()),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld4_f64_with_svst4_f64() {
+    let mut storage = [0 as f64; 160usize];
+    let data = svcreate4_f64(
+        svcvt_f64_s64_x(
+            svptrue_b64(),
+            svindex_s64((0usize).try_into().unwrap(), 4usize.try_into().unwrap()),
+        ),
+        svcvt_f64_s64_x(
+            svptrue_b64(),
+            svindex_s64((1usize).try_into().unwrap(), 4usize.try_into().unwrap()),
+        ),
+        svcvt_f64_s64_x(
+            svptrue_b64(),
+            svindex_s64((2usize).try_into().unwrap(), 4usize.try_into().unwrap()),
+        ),
+        svcvt_f64_s64_x(
+            svptrue_b64(),
+            svindex_s64((3usize).try_into().unwrap(), 4usize.try_into().unwrap()),
+        ),
+    );
+    svst4_f64(svptrue_b64(), storage.as_mut_ptr(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as f64 || val == i as f64);
+    }
+    svsetffr();
+    let loaded = svld4_f64(svptrue_b64(), storage.as_ptr() as *const f64);
+    assert_vector_matches_f64(
+        svget4_f64::<{ 0usize as i32 }>(loaded),
+        svcvt_f64_s64_x(
+            svptrue_b64(),
+            svindex_s64((0usize).try_into().unwrap(), 4usize.try_into().unwrap()),
+        ),
+    );
+    assert_vector_matches_f64(
+        svget4_f64::<{ 1usize as i32 }>(loaded),
+        svcvt_f64_s64_x(
+            svptrue_b64(),
+            svindex_s64((1usize).try_into().unwrap(), 4usize.try_into().unwrap()),
+        ),
+    );
+    assert_vector_matches_f64(
+        svget4_f64::<{ 2usize as i32 }>(loaded),
+        svcvt_f64_s64_x(
+            svptrue_b64(),
+            svindex_s64((2usize).try_into().unwrap(), 4usize.try_into().unwrap()),
+        ),
+    );
+    assert_vector_matches_f64(
+        svget4_f64::<{ 3usize as i32 }>(loaded),
+        svcvt_f64_s64_x(
+            svptrue_b64(),
+            svindex_s64((3usize).try_into().unwrap(), 4usize.try_into().unwrap()),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld4_s8_with_svst4_s8() {
+    let mut storage = [0 as i8; 1280usize];
+    let data = svcreate4_s8(
+        svindex_s8((0usize).try_into().unwrap(), 4usize.try_into().unwrap()),
+        svindex_s8((1usize).try_into().unwrap(), 4usize.try_into().unwrap()),
+        svindex_s8((2usize).try_into().unwrap(), 4usize.try_into().unwrap()),
+        svindex_s8((3usize).try_into().unwrap(), 4usize.try_into().unwrap()),
+    );
+    svst4_s8(svptrue_b8(), storage.as_mut_ptr(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i8 || val == i as i8);
+    }
+    svsetffr();
+    let loaded = svld4_s8(svptrue_b8(), storage.as_ptr() as *const i8);
+    assert_vector_matches_i8(
+        svget4_s8::<{ 0usize as i32 }>(loaded),
+        svindex_s8((0usize).try_into().unwrap(), 4usize.try_into().unwrap()),
+    );
+    assert_vector_matches_i8(
+        svget4_s8::<{ 1usize as i32 }>(loaded),
+        svindex_s8((1usize).try_into().unwrap(), 4usize.try_into().unwrap()),
+    );
+    assert_vector_matches_i8(
+        svget4_s8::<{ 2usize as i32 }>(loaded),
+        svindex_s8((2usize).try_into().unwrap(), 4usize.try_into().unwrap()),
+    );
+    assert_vector_matches_i8(
+        svget4_s8::<{ 3usize as i32 }>(loaded),
+        svindex_s8((3usize).try_into().unwrap(), 4usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld4_s16_with_svst4_s16() {
+    let mut storage = [0 as i16; 640usize];
+    let data = svcreate4_s16(
+        svindex_s16((0usize).try_into().unwrap(), 4usize.try_into().unwrap()),
+        svindex_s16((1usize).try_into().unwrap(), 4usize.try_into().unwrap()),
+        svindex_s16((2usize).try_into().unwrap(), 4usize.try_into().unwrap()),
+        svindex_s16((3usize).try_into().unwrap(), 4usize.try_into().unwrap()),
+    );
+    svst4_s16(svptrue_b16(), storage.as_mut_ptr(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i16 || val == i as i16);
+    }
+    svsetffr();
+    let loaded = svld4_s16(svptrue_b16(), storage.as_ptr() as *const i16);
+    assert_vector_matches_i16(
+        svget4_s16::<{ 0usize as i32 }>(loaded),
+        svindex_s16((0usize).try_into().unwrap(), 4usize.try_into().unwrap()),
+    );
+    assert_vector_matches_i16(
+        svget4_s16::<{ 1usize as i32 }>(loaded),
+        svindex_s16((1usize).try_into().unwrap(), 4usize.try_into().unwrap()),
+    );
+    assert_vector_matches_i16(
+        svget4_s16::<{ 2usize as i32 }>(loaded),
+        svindex_s16((2usize).try_into().unwrap(), 4usize.try_into().unwrap()),
+    );
+    assert_vector_matches_i16(
+        svget4_s16::<{ 3usize as i32 }>(loaded),
+        svindex_s16((3usize).try_into().unwrap(), 4usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld4_s32_with_svst4_s32() {
+    let mut storage = [0 as i32; 320usize];
+    let data = svcreate4_s32(
+        svindex_s32((0usize).try_into().unwrap(), 4usize.try_into().unwrap()),
+        svindex_s32((1usize).try_into().unwrap(), 4usize.try_into().unwrap()),
+        svindex_s32((2usize).try_into().unwrap(), 4usize.try_into().unwrap()),
+        svindex_s32((3usize).try_into().unwrap(), 4usize.try_into().unwrap()),
+    );
+    svst4_s32(svptrue_b32(), storage.as_mut_ptr(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i32 || val == i as i32);
+    }
+    svsetffr();
+    let loaded = svld4_s32(svptrue_b32(), storage.as_ptr() as *const i32);
+    assert_vector_matches_i32(
+        svget4_s32::<{ 0usize as i32 }>(loaded),
+        svindex_s32((0usize).try_into().unwrap(), 4usize.try_into().unwrap()),
+    );
+    assert_vector_matches_i32(
+        svget4_s32::<{ 1usize as i32 }>(loaded),
+        svindex_s32((1usize).try_into().unwrap(), 4usize.try_into().unwrap()),
+    );
+    assert_vector_matches_i32(
+        svget4_s32::<{ 2usize as i32 }>(loaded),
+        svindex_s32((2usize).try_into().unwrap(), 4usize.try_into().unwrap()),
+    );
+    assert_vector_matches_i32(
+        svget4_s32::<{ 3usize as i32 }>(loaded),
+        svindex_s32((3usize).try_into().unwrap(), 4usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld4_s64_with_svst4_s64() {
+    let mut storage = [0 as i64; 160usize];
+    let data = svcreate4_s64(
+        svindex_s64((0usize).try_into().unwrap(), 4usize.try_into().unwrap()),
+        svindex_s64((1usize).try_into().unwrap(), 4usize.try_into().unwrap()),
+        svindex_s64((2usize).try_into().unwrap(), 4usize.try_into().unwrap()),
+        svindex_s64((3usize).try_into().unwrap(), 4usize.try_into().unwrap()),
+    );
+    svst4_s64(svptrue_b64(), storage.as_mut_ptr(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i64 || val == i as i64);
+    }
+    svsetffr();
+    let loaded = svld4_s64(svptrue_b64(), storage.as_ptr() as *const i64);
+    assert_vector_matches_i64(
+        svget4_s64::<{ 0usize as i32 }>(loaded),
+        svindex_s64((0usize).try_into().unwrap(), 4usize.try_into().unwrap()),
+    );
+    assert_vector_matches_i64(
+        svget4_s64::<{ 1usize as i32 }>(loaded),
+        svindex_s64((1usize).try_into().unwrap(), 4usize.try_into().unwrap()),
+    );
+    assert_vector_matches_i64(
+        svget4_s64::<{ 2usize as i32 }>(loaded),
+        svindex_s64((2usize).try_into().unwrap(), 4usize.try_into().unwrap()),
+    );
+    assert_vector_matches_i64(
+        svget4_s64::<{ 3usize as i32 }>(loaded),
+        svindex_s64((3usize).try_into().unwrap(), 4usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld4_u8_with_svst4_u8() {
+    let mut storage = [0 as u8; 1280usize];
+    let data = svcreate4_u8(
+        svindex_u8((0usize).try_into().unwrap(), 4usize.try_into().unwrap()),
+        svindex_u8((1usize).try_into().unwrap(), 4usize.try_into().unwrap()),
+        svindex_u8((2usize).try_into().unwrap(), 4usize.try_into().unwrap()),
+        svindex_u8((3usize).try_into().unwrap(), 4usize.try_into().unwrap()),
+    );
+    svst4_u8(svptrue_b8(), storage.as_mut_ptr(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u8 || val == i as u8);
+    }
+    svsetffr();
+    let loaded = svld4_u8(svptrue_b8(), storage.as_ptr() as *const u8);
+    assert_vector_matches_u8(
+        svget4_u8::<{ 0usize as i32 }>(loaded),
+        svindex_u8((0usize).try_into().unwrap(), 4usize.try_into().unwrap()),
+    );
+    assert_vector_matches_u8(
+        svget4_u8::<{ 1usize as i32 }>(loaded),
+        svindex_u8((1usize).try_into().unwrap(), 4usize.try_into().unwrap()),
+    );
+    assert_vector_matches_u8(
+        svget4_u8::<{ 2usize as i32 }>(loaded),
+        svindex_u8((2usize).try_into().unwrap(), 4usize.try_into().unwrap()),
+    );
+    assert_vector_matches_u8(
+        svget4_u8::<{ 3usize as i32 }>(loaded),
+        svindex_u8((3usize).try_into().unwrap(), 4usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld4_u16_with_svst4_u16() {
+    let mut storage = [0 as u16; 640usize];
+    let data = svcreate4_u16(
+        svindex_u16((0usize).try_into().unwrap(), 4usize.try_into().unwrap()),
+        svindex_u16((1usize).try_into().unwrap(), 4usize.try_into().unwrap()),
+        svindex_u16((2usize).try_into().unwrap(), 4usize.try_into().unwrap()),
+        svindex_u16((3usize).try_into().unwrap(), 4usize.try_into().unwrap()),
+    );
+    svst4_u16(svptrue_b16(), storage.as_mut_ptr(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u16 || val == i as u16);
+    }
+    svsetffr();
+    let loaded = svld4_u16(svptrue_b16(), storage.as_ptr() as *const u16);
+    assert_vector_matches_u16(
+        svget4_u16::<{ 0usize as i32 }>(loaded),
+        svindex_u16((0usize).try_into().unwrap(), 4usize.try_into().unwrap()),
+    );
+    assert_vector_matches_u16(
+        svget4_u16::<{ 1usize as i32 }>(loaded),
+        svindex_u16((1usize).try_into().unwrap(), 4usize.try_into().unwrap()),
+    );
+    assert_vector_matches_u16(
+        svget4_u16::<{ 2usize as i32 }>(loaded),
+        svindex_u16((2usize).try_into().unwrap(), 4usize.try_into().unwrap()),
+    );
+    assert_vector_matches_u16(
+        svget4_u16::<{ 3usize as i32 }>(loaded),
+        svindex_u16((3usize).try_into().unwrap(), 4usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld4_u32_with_svst4_u32() {
+    let mut storage = [0 as u32; 320usize];
+    let data = svcreate4_u32(
+        svindex_u32((0usize).try_into().unwrap(), 4usize.try_into().unwrap()),
+        svindex_u32((1usize).try_into().unwrap(), 4usize.try_into().unwrap()),
+        svindex_u32((2usize).try_into().unwrap(), 4usize.try_into().unwrap()),
+        svindex_u32((3usize).try_into().unwrap(), 4usize.try_into().unwrap()),
+    );
+    svst4_u32(svptrue_b32(), storage.as_mut_ptr(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u32 || val == i as u32);
+    }
+    svsetffr();
+    let loaded = svld4_u32(svptrue_b32(), storage.as_ptr() as *const u32);
+    assert_vector_matches_u32(
+        svget4_u32::<{ 0usize as i32 }>(loaded),
+        svindex_u32((0usize).try_into().unwrap(), 4usize.try_into().unwrap()),
+    );
+    assert_vector_matches_u32(
+        svget4_u32::<{ 1usize as i32 }>(loaded),
+        svindex_u32((1usize).try_into().unwrap(), 4usize.try_into().unwrap()),
+    );
+    assert_vector_matches_u32(
+        svget4_u32::<{ 2usize as i32 }>(loaded),
+        svindex_u32((2usize).try_into().unwrap(), 4usize.try_into().unwrap()),
+    );
+    assert_vector_matches_u32(
+        svget4_u32::<{ 3usize as i32 }>(loaded),
+        svindex_u32((3usize).try_into().unwrap(), 4usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld4_u64_with_svst4_u64() {
+    let mut storage = [0 as u64; 160usize];
+    let data = svcreate4_u64(
+        svindex_u64((0usize).try_into().unwrap(), 4usize.try_into().unwrap()),
+        svindex_u64((1usize).try_into().unwrap(), 4usize.try_into().unwrap()),
+        svindex_u64((2usize).try_into().unwrap(), 4usize.try_into().unwrap()),
+        svindex_u64((3usize).try_into().unwrap(), 4usize.try_into().unwrap()),
+    );
+    svst4_u64(svptrue_b64(), storage.as_mut_ptr(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u64 || val == i as u64);
+    }
+    svsetffr();
+    let loaded = svld4_u64(svptrue_b64(), storage.as_ptr() as *const u64);
+    assert_vector_matches_u64(
+        svget4_u64::<{ 0usize as i32 }>(loaded),
+        svindex_u64((0usize).try_into().unwrap(), 4usize.try_into().unwrap()),
+    );
+    assert_vector_matches_u64(
+        svget4_u64::<{ 1usize as i32 }>(loaded),
+        svindex_u64((1usize).try_into().unwrap(), 4usize.try_into().unwrap()),
+    );
+    assert_vector_matches_u64(
+        svget4_u64::<{ 2usize as i32 }>(loaded),
+        svindex_u64((2usize).try_into().unwrap(), 4usize.try_into().unwrap()),
+    );
+    assert_vector_matches_u64(
+        svget4_u64::<{ 3usize as i32 }>(loaded),
+        svindex_u64((3usize).try_into().unwrap(), 4usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld4_vnum_f32_with_svst4_vnum_f32() {
+    let len = svcntw() as usize;
+    let mut storage = [0 as f32; 320usize];
+    let data = svcreate4_f32(
+        svcvt_f32_s32_x(
+            svptrue_b32(),
+            svindex_s32(
+                (len + 0usize).try_into().unwrap(),
+                4usize.try_into().unwrap(),
+            ),
+        ),
+        svcvt_f32_s32_x(
+            svptrue_b32(),
+            svindex_s32(
+                (len + 1usize).try_into().unwrap(),
+                4usize.try_into().unwrap(),
+            ),
+        ),
+        svcvt_f32_s32_x(
+            svptrue_b32(),
+            svindex_s32(
+                (len + 2usize).try_into().unwrap(),
+                4usize.try_into().unwrap(),
+            ),
+        ),
+        svcvt_f32_s32_x(
+            svptrue_b32(),
+            svindex_s32(
+                (len + 3usize).try_into().unwrap(),
+                4usize.try_into().unwrap(),
+            ),
+        ),
+    );
+    svst4_vnum_f32(svptrue_b32(), storage.as_mut_ptr(), 1, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as f32 || val == i as f32);
+    }
+    svsetffr();
+    let loaded = svld4_vnum_f32(svptrue_b32(), storage.as_ptr() as *const f32, 1);
+    assert_vector_matches_f32(
+        svget4_f32::<{ 0usize as i32 }>(loaded),
+        svcvt_f32_s32_x(
+            svptrue_b32(),
+            svindex_s32(
+                (len + 0usize).try_into().unwrap(),
+                4usize.try_into().unwrap(),
+            ),
+        ),
+    );
+    assert_vector_matches_f32(
+        svget4_f32::<{ 1usize as i32 }>(loaded),
+        svcvt_f32_s32_x(
+            svptrue_b32(),
+            svindex_s32(
+                (len + 1usize).try_into().unwrap(),
+                4usize.try_into().unwrap(),
+            ),
+        ),
+    );
+    assert_vector_matches_f32(
+        svget4_f32::<{ 2usize as i32 }>(loaded),
+        svcvt_f32_s32_x(
+            svptrue_b32(),
+            svindex_s32(
+                (len + 2usize).try_into().unwrap(),
+                4usize.try_into().unwrap(),
+            ),
+        ),
+    );
+    assert_vector_matches_f32(
+        svget4_f32::<{ 3usize as i32 }>(loaded),
+        svcvt_f32_s32_x(
+            svptrue_b32(),
+            svindex_s32(
+                (len + 3usize).try_into().unwrap(),
+                4usize.try_into().unwrap(),
+            ),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld4_vnum_f64_with_svst4_vnum_f64() {
+    let len = svcntd() as usize;
+    let mut storage = [0 as f64; 160usize];
+    let data = svcreate4_f64(
+        svcvt_f64_s64_x(
+            svptrue_b64(),
+            svindex_s64(
+                (len + 0usize).try_into().unwrap(),
+                4usize.try_into().unwrap(),
+            ),
+        ),
+        svcvt_f64_s64_x(
+            svptrue_b64(),
+            svindex_s64(
+                (len + 1usize).try_into().unwrap(),
+                4usize.try_into().unwrap(),
+            ),
+        ),
+        svcvt_f64_s64_x(
+            svptrue_b64(),
+            svindex_s64(
+                (len + 2usize).try_into().unwrap(),
+                4usize.try_into().unwrap(),
+            ),
+        ),
+        svcvt_f64_s64_x(
+            svptrue_b64(),
+            svindex_s64(
+                (len + 3usize).try_into().unwrap(),
+                4usize.try_into().unwrap(),
+            ),
+        ),
+    );
+    svst4_vnum_f64(svptrue_b64(), storage.as_mut_ptr(), 1, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as f64 || val == i as f64);
+    }
+    svsetffr();
+    let loaded = svld4_vnum_f64(svptrue_b64(), storage.as_ptr() as *const f64, 1);
+    assert_vector_matches_f64(
+        svget4_f64::<{ 0usize as i32 }>(loaded),
+        svcvt_f64_s64_x(
+            svptrue_b64(),
+            svindex_s64(
+                (len + 0usize).try_into().unwrap(),
+                4usize.try_into().unwrap(),
+            ),
+        ),
+    );
+    assert_vector_matches_f64(
+        svget4_f64::<{ 1usize as i32 }>(loaded),
+        svcvt_f64_s64_x(
+            svptrue_b64(),
+            svindex_s64(
+                (len + 1usize).try_into().unwrap(),
+                4usize.try_into().unwrap(),
+            ),
+        ),
+    );
+    assert_vector_matches_f64(
+        svget4_f64::<{ 2usize as i32 }>(loaded),
+        svcvt_f64_s64_x(
+            svptrue_b64(),
+            svindex_s64(
+                (len + 2usize).try_into().unwrap(),
+                4usize.try_into().unwrap(),
+            ),
+        ),
+    );
+    assert_vector_matches_f64(
+        svget4_f64::<{ 3usize as i32 }>(loaded),
+        svcvt_f64_s64_x(
+            svptrue_b64(),
+            svindex_s64(
+                (len + 3usize).try_into().unwrap(),
+                4usize.try_into().unwrap(),
+            ),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld4_vnum_s8_with_svst4_vnum_s8() {
+    let len = svcntb() as usize;
+    let mut storage = [0 as i8; 1280usize];
+    let data = svcreate4_s8(
+        svindex_s8(
+            (len + 0usize).try_into().unwrap(),
+            4usize.try_into().unwrap(),
+        ),
+        svindex_s8(
+            (len + 1usize).try_into().unwrap(),
+            4usize.try_into().unwrap(),
+        ),
+        svindex_s8(
+            (len + 2usize).try_into().unwrap(),
+            4usize.try_into().unwrap(),
+        ),
+        svindex_s8(
+            (len + 3usize).try_into().unwrap(),
+            4usize.try_into().unwrap(),
+        ),
+    );
+    svst4_vnum_s8(svptrue_b8(), storage.as_mut_ptr(), 1, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i8 || val == i as i8);
+    }
+    svsetffr();
+    let loaded = svld4_vnum_s8(svptrue_b8(), storage.as_ptr() as *const i8, 1);
+    assert_vector_matches_i8(
+        svget4_s8::<{ 0usize as i32 }>(loaded),
+        svindex_s8(
+            (len + 0usize).try_into().unwrap(),
+            4usize.try_into().unwrap(),
+        ),
+    );
+    assert_vector_matches_i8(
+        svget4_s8::<{ 1usize as i32 }>(loaded),
+        svindex_s8(
+            (len + 1usize).try_into().unwrap(),
+            4usize.try_into().unwrap(),
+        ),
+    );
+    assert_vector_matches_i8(
+        svget4_s8::<{ 2usize as i32 }>(loaded),
+        svindex_s8(
+            (len + 2usize).try_into().unwrap(),
+            4usize.try_into().unwrap(),
+        ),
+    );
+    assert_vector_matches_i8(
+        svget4_s8::<{ 3usize as i32 }>(loaded),
+        svindex_s8(
+            (len + 3usize).try_into().unwrap(),
+            4usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld4_vnum_s16_with_svst4_vnum_s16() {
+    let len = svcnth() as usize;
+    let mut storage = [0 as i16; 640usize];
+    let data = svcreate4_s16(
+        svindex_s16(
+            (len + 0usize).try_into().unwrap(),
+            4usize.try_into().unwrap(),
+        ),
+        svindex_s16(
+            (len + 1usize).try_into().unwrap(),
+            4usize.try_into().unwrap(),
+        ),
+        svindex_s16(
+            (len + 2usize).try_into().unwrap(),
+            4usize.try_into().unwrap(),
+        ),
+        svindex_s16(
+            (len + 3usize).try_into().unwrap(),
+            4usize.try_into().unwrap(),
+        ),
+    );
+    svst4_vnum_s16(svptrue_b16(), storage.as_mut_ptr(), 1, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i16 || val == i as i16);
+    }
+    svsetffr();
+    let loaded = svld4_vnum_s16(svptrue_b16(), storage.as_ptr() as *const i16, 1);
+    assert_vector_matches_i16(
+        svget4_s16::<{ 0usize as i32 }>(loaded),
+        svindex_s16(
+            (len + 0usize).try_into().unwrap(),
+            4usize.try_into().unwrap(),
+        ),
+    );
+    assert_vector_matches_i16(
+        svget4_s16::<{ 1usize as i32 }>(loaded),
+        svindex_s16(
+            (len + 1usize).try_into().unwrap(),
+            4usize.try_into().unwrap(),
+        ),
+    );
+    assert_vector_matches_i16(
+        svget4_s16::<{ 2usize as i32 }>(loaded),
+        svindex_s16(
+            (len + 2usize).try_into().unwrap(),
+            4usize.try_into().unwrap(),
+        ),
+    );
+    assert_vector_matches_i16(
+        svget4_s16::<{ 3usize as i32 }>(loaded),
+        svindex_s16(
+            (len + 3usize).try_into().unwrap(),
+            4usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld4_vnum_s32_with_svst4_vnum_s32() {
+    let len = svcntw() as usize;
+    let mut storage = [0 as i32; 320usize];
+    let data = svcreate4_s32(
+        svindex_s32(
+            (len + 0usize).try_into().unwrap(),
+            4usize.try_into().unwrap(),
+        ),
+        svindex_s32(
+            (len + 1usize).try_into().unwrap(),
+            4usize.try_into().unwrap(),
+        ),
+        svindex_s32(
+            (len + 2usize).try_into().unwrap(),
+            4usize.try_into().unwrap(),
+        ),
+        svindex_s32(
+            (len + 3usize).try_into().unwrap(),
+            4usize.try_into().unwrap(),
+        ),
+    );
+    svst4_vnum_s32(svptrue_b32(), storage.as_mut_ptr(), 1, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i32 || val == i as i32);
+    }
+    svsetffr();
+    let loaded = svld4_vnum_s32(svptrue_b32(), storage.as_ptr() as *const i32, 1);
+    assert_vector_matches_i32(
+        svget4_s32::<{ 0usize as i32 }>(loaded),
+        svindex_s32(
+            (len + 0usize).try_into().unwrap(),
+            4usize.try_into().unwrap(),
+        ),
+    );
+    assert_vector_matches_i32(
+        svget4_s32::<{ 1usize as i32 }>(loaded),
+        svindex_s32(
+            (len + 1usize).try_into().unwrap(),
+            4usize.try_into().unwrap(),
+        ),
+    );
+    assert_vector_matches_i32(
+        svget4_s32::<{ 2usize as i32 }>(loaded),
+        svindex_s32(
+            (len + 2usize).try_into().unwrap(),
+            4usize.try_into().unwrap(),
+        ),
+    );
+    assert_vector_matches_i32(
+        svget4_s32::<{ 3usize as i32 }>(loaded),
+        svindex_s32(
+            (len + 3usize).try_into().unwrap(),
+            4usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld4_vnum_s64_with_svst4_vnum_s64() {
+    let len = svcntd() as usize;
+    let mut storage = [0 as i64; 160usize];
+    let data = svcreate4_s64(
+        svindex_s64(
+            (len + 0usize).try_into().unwrap(),
+            4usize.try_into().unwrap(),
+        ),
+        svindex_s64(
+            (len + 1usize).try_into().unwrap(),
+            4usize.try_into().unwrap(),
+        ),
+        svindex_s64(
+            (len + 2usize).try_into().unwrap(),
+            4usize.try_into().unwrap(),
+        ),
+        svindex_s64(
+            (len + 3usize).try_into().unwrap(),
+            4usize.try_into().unwrap(),
+        ),
+    );
+    svst4_vnum_s64(svptrue_b64(), storage.as_mut_ptr(), 1, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i64 || val == i as i64);
+    }
+    svsetffr();
+    let loaded = svld4_vnum_s64(svptrue_b64(), storage.as_ptr() as *const i64, 1);
+    assert_vector_matches_i64(
+        svget4_s64::<{ 0usize as i32 }>(loaded),
+        svindex_s64(
+            (len + 0usize).try_into().unwrap(),
+            4usize.try_into().unwrap(),
+        ),
+    );
+    assert_vector_matches_i64(
+        svget4_s64::<{ 1usize as i32 }>(loaded),
+        svindex_s64(
+            (len + 1usize).try_into().unwrap(),
+            4usize.try_into().unwrap(),
+        ),
+    );
+    assert_vector_matches_i64(
+        svget4_s64::<{ 2usize as i32 }>(loaded),
+        svindex_s64(
+            (len + 2usize).try_into().unwrap(),
+            4usize.try_into().unwrap(),
+        ),
+    );
+    assert_vector_matches_i64(
+        svget4_s64::<{ 3usize as i32 }>(loaded),
+        svindex_s64(
+            (len + 3usize).try_into().unwrap(),
+            4usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld4_vnum_u8_with_svst4_vnum_u8() {
+    let len = svcntb() as usize;
+    let mut storage = [0 as u8; 1280usize];
+    let data = svcreate4_u8(
+        svindex_u8(
+            (len + 0usize).try_into().unwrap(),
+            4usize.try_into().unwrap(),
+        ),
+        svindex_u8(
+            (len + 1usize).try_into().unwrap(),
+            4usize.try_into().unwrap(),
+        ),
+        svindex_u8(
+            (len + 2usize).try_into().unwrap(),
+            4usize.try_into().unwrap(),
+        ),
+        svindex_u8(
+            (len + 3usize).try_into().unwrap(),
+            4usize.try_into().unwrap(),
+        ),
+    );
+    svst4_vnum_u8(svptrue_b8(), storage.as_mut_ptr(), 1, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u8 || val == i as u8);
+    }
+    svsetffr();
+    let loaded = svld4_vnum_u8(svptrue_b8(), storage.as_ptr() as *const u8, 1);
+    assert_vector_matches_u8(
+        svget4_u8::<{ 0usize as i32 }>(loaded),
+        svindex_u8(
+            (len + 0usize).try_into().unwrap(),
+            4usize.try_into().unwrap(),
+        ),
+    );
+    assert_vector_matches_u8(
+        svget4_u8::<{ 1usize as i32 }>(loaded),
+        svindex_u8(
+            (len + 1usize).try_into().unwrap(),
+            4usize.try_into().unwrap(),
+        ),
+    );
+    assert_vector_matches_u8(
+        svget4_u8::<{ 2usize as i32 }>(loaded),
+        svindex_u8(
+            (len + 2usize).try_into().unwrap(),
+            4usize.try_into().unwrap(),
+        ),
+    );
+    assert_vector_matches_u8(
+        svget4_u8::<{ 3usize as i32 }>(loaded),
+        svindex_u8(
+            (len + 3usize).try_into().unwrap(),
+            4usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld4_vnum_u16_with_svst4_vnum_u16() {
+    let len = svcnth() as usize;
+    let mut storage = [0 as u16; 640usize];
+    let data = svcreate4_u16(
+        svindex_u16(
+            (len + 0usize).try_into().unwrap(),
+            4usize.try_into().unwrap(),
+        ),
+        svindex_u16(
+            (len + 1usize).try_into().unwrap(),
+            4usize.try_into().unwrap(),
+        ),
+        svindex_u16(
+            (len + 2usize).try_into().unwrap(),
+            4usize.try_into().unwrap(),
+        ),
+        svindex_u16(
+            (len + 3usize).try_into().unwrap(),
+            4usize.try_into().unwrap(),
+        ),
+    );
+    svst4_vnum_u16(svptrue_b16(), storage.as_mut_ptr(), 1, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u16 || val == i as u16);
+    }
+    svsetffr();
+    let loaded = svld4_vnum_u16(svptrue_b16(), storage.as_ptr() as *const u16, 1);
+    assert_vector_matches_u16(
+        svget4_u16::<{ 0usize as i32 }>(loaded),
+        svindex_u16(
+            (len + 0usize).try_into().unwrap(),
+            4usize.try_into().unwrap(),
+        ),
+    );
+    assert_vector_matches_u16(
+        svget4_u16::<{ 1usize as i32 }>(loaded),
+        svindex_u16(
+            (len + 1usize).try_into().unwrap(),
+            4usize.try_into().unwrap(),
+        ),
+    );
+    assert_vector_matches_u16(
+        svget4_u16::<{ 2usize as i32 }>(loaded),
+        svindex_u16(
+            (len + 2usize).try_into().unwrap(),
+            4usize.try_into().unwrap(),
+        ),
+    );
+    assert_vector_matches_u16(
+        svget4_u16::<{ 3usize as i32 }>(loaded),
+        svindex_u16(
+            (len + 3usize).try_into().unwrap(),
+            4usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld4_vnum_u32_with_svst4_vnum_u32() {
+    let len = svcntw() as usize;
+    let mut storage = [0 as u32; 320usize];
+    let data = svcreate4_u32(
+        svindex_u32(
+            (len + 0usize).try_into().unwrap(),
+            4usize.try_into().unwrap(),
+        ),
+        svindex_u32(
+            (len + 1usize).try_into().unwrap(),
+            4usize.try_into().unwrap(),
+        ),
+        svindex_u32(
+            (len + 2usize).try_into().unwrap(),
+            4usize.try_into().unwrap(),
+        ),
+        svindex_u32(
+            (len + 3usize).try_into().unwrap(),
+            4usize.try_into().unwrap(),
+        ),
+    );
+    svst4_vnum_u32(svptrue_b32(), storage.as_mut_ptr(), 1, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u32 || val == i as u32);
+    }
+    svsetffr();
+    let loaded = svld4_vnum_u32(svptrue_b32(), storage.as_ptr() as *const u32, 1);
+    assert_vector_matches_u32(
+        svget4_u32::<{ 0usize as i32 }>(loaded),
+        svindex_u32(
+            (len + 0usize).try_into().unwrap(),
+            4usize.try_into().unwrap(),
+        ),
+    );
+    assert_vector_matches_u32(
+        svget4_u32::<{ 1usize as i32 }>(loaded),
+        svindex_u32(
+            (len + 1usize).try_into().unwrap(),
+            4usize.try_into().unwrap(),
+        ),
+    );
+    assert_vector_matches_u32(
+        svget4_u32::<{ 2usize as i32 }>(loaded),
+        svindex_u32(
+            (len + 2usize).try_into().unwrap(),
+            4usize.try_into().unwrap(),
+        ),
+    );
+    assert_vector_matches_u32(
+        svget4_u32::<{ 3usize as i32 }>(loaded),
+        svindex_u32(
+            (len + 3usize).try_into().unwrap(),
+            4usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svld4_vnum_u64_with_svst4_vnum_u64() {
+    let len = svcntd() as usize;
+    let mut storage = [0 as u64; 160usize];
+    let data = svcreate4_u64(
+        svindex_u64(
+            (len + 0usize).try_into().unwrap(),
+            4usize.try_into().unwrap(),
+        ),
+        svindex_u64(
+            (len + 1usize).try_into().unwrap(),
+            4usize.try_into().unwrap(),
+        ),
+        svindex_u64(
+            (len + 2usize).try_into().unwrap(),
+            4usize.try_into().unwrap(),
+        ),
+        svindex_u64(
+            (len + 3usize).try_into().unwrap(),
+            4usize.try_into().unwrap(),
+        ),
+    );
+    svst4_vnum_u64(svptrue_b64(), storage.as_mut_ptr(), 1, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u64 || val == i as u64);
+    }
+    svsetffr();
+    let loaded = svld4_vnum_u64(svptrue_b64(), storage.as_ptr() as *const u64, 1);
+    assert_vector_matches_u64(
+        svget4_u64::<{ 0usize as i32 }>(loaded),
+        svindex_u64(
+            (len + 0usize).try_into().unwrap(),
+            4usize.try_into().unwrap(),
+        ),
+    );
+    assert_vector_matches_u64(
+        svget4_u64::<{ 1usize as i32 }>(loaded),
+        svindex_u64(
+            (len + 1usize).try_into().unwrap(),
+            4usize.try_into().unwrap(),
+        ),
+    );
+    assert_vector_matches_u64(
+        svget4_u64::<{ 2usize as i32 }>(loaded),
+        svindex_u64(
+            (len + 2usize).try_into().unwrap(),
+            4usize.try_into().unwrap(),
+        ),
+    );
+    assert_vector_matches_u64(
+        svget4_u64::<{ 3usize as i32 }>(loaded),
+        svindex_u64(
+            (len + 3usize).try_into().unwrap(),
+            4usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1_f32() {
+    svsetffr();
+    let _ = svld1_f32(svptrue_b32(), F32_DATA.as_ptr());
+    let loaded = svldff1_f32(svptrue_b32(), F32_DATA.as_ptr());
+    assert_vector_matches_f32(
+        loaded,
+        svcvt_f32_s32_x(
+            svptrue_b32(),
+            svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1_f64() {
+    svsetffr();
+    let _ = svld1_f64(svptrue_b64(), F64_DATA.as_ptr());
+    let loaded = svldff1_f64(svptrue_b64(), F64_DATA.as_ptr());
+    assert_vector_matches_f64(
+        loaded,
+        svcvt_f64_s64_x(
+            svptrue_b64(),
+            svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1_s8() {
+    svsetffr();
+    let _ = svld1_s8(svptrue_b8(), I8_DATA.as_ptr());
+    let loaded = svldff1_s8(svptrue_b8(), I8_DATA.as_ptr());
+    assert_vector_matches_i8(
+        loaded,
+        svindex_s8((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1_s16() {
+    svsetffr();
+    let _ = svld1_s16(svptrue_b16(), I16_DATA.as_ptr());
+    let loaded = svldff1_s16(svptrue_b16(), I16_DATA.as_ptr());
+    assert_vector_matches_i16(
+        loaded,
+        svindex_s16((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1_s32() {
+    svsetffr();
+    let _ = svld1_s32(svptrue_b32(), I32_DATA.as_ptr());
+    let loaded = svldff1_s32(svptrue_b32(), I32_DATA.as_ptr());
+    assert_vector_matches_i32(
+        loaded,
+        svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1_s64() {
+    svsetffr();
+    let _ = svld1_s64(svptrue_b64(), I64_DATA.as_ptr());
+    let loaded = svldff1_s64(svptrue_b64(), I64_DATA.as_ptr());
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1_u8() {
+    svsetffr();
+    let _ = svld1_u8(svptrue_b8(), U8_DATA.as_ptr());
+    let loaded = svldff1_u8(svptrue_b8(), U8_DATA.as_ptr());
+    assert_vector_matches_u8(
+        loaded,
+        svindex_u8((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1_u16() {
+    svsetffr();
+    let _ = svld1_u16(svptrue_b16(), U16_DATA.as_ptr());
+    let loaded = svldff1_u16(svptrue_b16(), U16_DATA.as_ptr());
+    assert_vector_matches_u16(
+        loaded,
+        svindex_u16((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1_u32() {
+    svsetffr();
+    let _ = svld1_u32(svptrue_b32(), U32_DATA.as_ptr());
+    let loaded = svldff1_u32(svptrue_b32(), U32_DATA.as_ptr());
+    assert_vector_matches_u32(
+        loaded,
+        svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1_u64() {
+    svsetffr();
+    let _ = svld1_u64(svptrue_b64(), U64_DATA.as_ptr());
+    let loaded = svldff1_u64(svptrue_b64(), U64_DATA.as_ptr());
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1_gather_s32index_f32() {
+    let indices = svindex_s32(0, 1);
+    svsetffr();
+    let _ = svld1_gather_s32index_f32(svptrue_b32(), F32_DATA.as_ptr(), indices);
+    let loaded = svldff1_gather_s32index_f32(svptrue_b32(), F32_DATA.as_ptr(), indices);
+    assert_vector_matches_f32(
+        loaded,
+        svcvt_f32_s32_x(
+            svptrue_b32(),
+            svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1_gather_s32index_s32() {
+    let indices = svindex_s32(0, 1);
+    svsetffr();
+    let _ = svld1_gather_s32index_s32(svptrue_b32(), I32_DATA.as_ptr(), indices);
+    let loaded = svldff1_gather_s32index_s32(svptrue_b32(), I32_DATA.as_ptr(), indices);
+    assert_vector_matches_i32(
+        loaded,
+        svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1_gather_s32index_u32() {
+    let indices = svindex_s32(0, 1);
+    svsetffr();
+    let _ = svld1_gather_s32index_u32(svptrue_b32(), U32_DATA.as_ptr(), indices);
+    let loaded = svldff1_gather_s32index_u32(svptrue_b32(), U32_DATA.as_ptr(), indices);
+    assert_vector_matches_u32(
+        loaded,
+        svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1_gather_s64index_f64() {
+    let indices = svindex_s64(0, 1);
+    svsetffr();
+    let _ = svld1_gather_s64index_f64(svptrue_b64(), F64_DATA.as_ptr(), indices);
+    let loaded = svldff1_gather_s64index_f64(svptrue_b64(), F64_DATA.as_ptr(), indices);
+    assert_vector_matches_f64(
+        loaded,
+        svcvt_f64_s64_x(
+            svptrue_b64(),
+            svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1_gather_s64index_s64() {
+    let indices = svindex_s64(0, 1);
+    svsetffr();
+    let _ = svld1_gather_s64index_s64(svptrue_b64(), I64_DATA.as_ptr(), indices);
+    let loaded = svldff1_gather_s64index_s64(svptrue_b64(), I64_DATA.as_ptr(), indices);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1_gather_s64index_u64() {
+    let indices = svindex_s64(0, 1);
+    svsetffr();
+    let _ = svld1_gather_s64index_u64(svptrue_b64(), U64_DATA.as_ptr(), indices);
+    let loaded = svldff1_gather_s64index_u64(svptrue_b64(), U64_DATA.as_ptr(), indices);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1_gather_u32index_f32() {
+    let indices = svindex_u32(0, 1);
+    svsetffr();
+    let _ = svld1_gather_u32index_f32(svptrue_b32(), F32_DATA.as_ptr(), indices);
+    let loaded = svldff1_gather_u32index_f32(svptrue_b32(), F32_DATA.as_ptr(), indices);
+    assert_vector_matches_f32(
+        loaded,
+        svcvt_f32_s32_x(
+            svptrue_b32(),
+            svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1_gather_u32index_s32() {
+    let indices = svindex_u32(0, 1);
+    svsetffr();
+    let _ = svld1_gather_u32index_s32(svptrue_b32(), I32_DATA.as_ptr(), indices);
+    let loaded = svldff1_gather_u32index_s32(svptrue_b32(), I32_DATA.as_ptr(), indices);
+    assert_vector_matches_i32(
+        loaded,
+        svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1_gather_u32index_u32() {
+    let indices = svindex_u32(0, 1);
+    svsetffr();
+    let _ = svld1_gather_u32index_u32(svptrue_b32(), U32_DATA.as_ptr(), indices);
+    let loaded = svldff1_gather_u32index_u32(svptrue_b32(), U32_DATA.as_ptr(), indices);
+    assert_vector_matches_u32(
+        loaded,
+        svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1_gather_u64index_f64() {
+    let indices = svindex_u64(0, 1);
+    svsetffr();
+    let _ = svld1_gather_u64index_f64(svptrue_b64(), F64_DATA.as_ptr(), indices);
+    let loaded = svldff1_gather_u64index_f64(svptrue_b64(), F64_DATA.as_ptr(), indices);
+    assert_vector_matches_f64(
+        loaded,
+        svcvt_f64_s64_x(
+            svptrue_b64(),
+            svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1_gather_u64index_s64() {
+    let indices = svindex_u64(0, 1);
+    svsetffr();
+    let _ = svld1_gather_u64index_s64(svptrue_b64(), I64_DATA.as_ptr(), indices);
+    let loaded = svldff1_gather_u64index_s64(svptrue_b64(), I64_DATA.as_ptr(), indices);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1_gather_u64index_u64() {
+    let indices = svindex_u64(0, 1);
+    svsetffr();
+    let _ = svld1_gather_u64index_u64(svptrue_b64(), U64_DATA.as_ptr(), indices);
+    let loaded = svldff1_gather_u64index_u64(svptrue_b64(), U64_DATA.as_ptr(), indices);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1_gather_s32offset_f32() {
+    let offsets = svindex_s32(0, 4u32.try_into().unwrap());
+    svsetffr();
+    let _ = svld1_gather_s32offset_f32(svptrue_b32(), F32_DATA.as_ptr(), offsets);
+    let loaded = svldff1_gather_s32offset_f32(svptrue_b32(), F32_DATA.as_ptr(), offsets);
+    assert_vector_matches_f32(
+        loaded,
+        svcvt_f32_s32_x(
+            svptrue_b32(),
+            svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1_gather_s32offset_s32() {
+    let offsets = svindex_s32(0, 4u32.try_into().unwrap());
+    svsetffr();
+    let _ = svld1_gather_s32offset_s32(svptrue_b32(), I32_DATA.as_ptr(), offsets);
+    let loaded = svldff1_gather_s32offset_s32(svptrue_b32(), I32_DATA.as_ptr(), offsets);
+    assert_vector_matches_i32(
+        loaded,
+        svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1_gather_s32offset_u32() {
+    let offsets = svindex_s32(0, 4u32.try_into().unwrap());
+    svsetffr();
+    let _ = svld1_gather_s32offset_u32(svptrue_b32(), U32_DATA.as_ptr(), offsets);
+    let loaded = svldff1_gather_s32offset_u32(svptrue_b32(), U32_DATA.as_ptr(), offsets);
+    assert_vector_matches_u32(
+        loaded,
+        svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1_gather_s64offset_f64() {
+    let offsets = svindex_s64(0, 8u32.try_into().unwrap());
+    svsetffr();
+    let _ = svld1_gather_s64offset_f64(svptrue_b64(), F64_DATA.as_ptr(), offsets);
+    let loaded = svldff1_gather_s64offset_f64(svptrue_b64(), F64_DATA.as_ptr(), offsets);
+    assert_vector_matches_f64(
+        loaded,
+        svcvt_f64_s64_x(
+            svptrue_b64(),
+            svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1_gather_s64offset_s64() {
+    let offsets = svindex_s64(0, 8u32.try_into().unwrap());
+    svsetffr();
+    let _ = svld1_gather_s64offset_s64(svptrue_b64(), I64_DATA.as_ptr(), offsets);
+    let loaded = svldff1_gather_s64offset_s64(svptrue_b64(), I64_DATA.as_ptr(), offsets);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1_gather_s64offset_u64() {
+    let offsets = svindex_s64(0, 8u32.try_into().unwrap());
+    svsetffr();
+    let _ = svld1_gather_s64offset_u64(svptrue_b64(), U64_DATA.as_ptr(), offsets);
+    let loaded = svldff1_gather_s64offset_u64(svptrue_b64(), U64_DATA.as_ptr(), offsets);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1_gather_u32offset_f32() {
+    let offsets = svindex_u32(0, 4u32.try_into().unwrap());
+    svsetffr();
+    let _ = svld1_gather_u32offset_f32(svptrue_b32(), F32_DATA.as_ptr(), offsets);
+    let loaded = svldff1_gather_u32offset_f32(svptrue_b32(), F32_DATA.as_ptr(), offsets);
+    assert_vector_matches_f32(
+        loaded,
+        svcvt_f32_s32_x(
+            svptrue_b32(),
+            svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1_gather_u32offset_s32() {
+    let offsets = svindex_u32(0, 4u32.try_into().unwrap());
+    svsetffr();
+    let _ = svld1_gather_u32offset_s32(svptrue_b32(), I32_DATA.as_ptr(), offsets);
+    let loaded = svldff1_gather_u32offset_s32(svptrue_b32(), I32_DATA.as_ptr(), offsets);
+    assert_vector_matches_i32(
+        loaded,
+        svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1_gather_u32offset_u32() {
+    let offsets = svindex_u32(0, 4u32.try_into().unwrap());
+    svsetffr();
+    let _ = svld1_gather_u32offset_u32(svptrue_b32(), U32_DATA.as_ptr(), offsets);
+    let loaded = svldff1_gather_u32offset_u32(svptrue_b32(), U32_DATA.as_ptr(), offsets);
+    assert_vector_matches_u32(
+        loaded,
+        svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1_gather_u64offset_f64() {
+    let offsets = svindex_u64(0, 8u32.try_into().unwrap());
+    svsetffr();
+    let _ = svld1_gather_u64offset_f64(svptrue_b64(), F64_DATA.as_ptr(), offsets);
+    let loaded = svldff1_gather_u64offset_f64(svptrue_b64(), F64_DATA.as_ptr(), offsets);
+    assert_vector_matches_f64(
+        loaded,
+        svcvt_f64_s64_x(
+            svptrue_b64(),
+            svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1_gather_u64offset_s64() {
+    let offsets = svindex_u64(0, 8u32.try_into().unwrap());
+    svsetffr();
+    let _ = svld1_gather_u64offset_s64(svptrue_b64(), I64_DATA.as_ptr(), offsets);
+    let loaded = svldff1_gather_u64offset_s64(svptrue_b64(), I64_DATA.as_ptr(), offsets);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1_gather_u64offset_u64() {
+    let offsets = svindex_u64(0, 8u32.try_into().unwrap());
+    svsetffr();
+    let _ = svld1_gather_u64offset_u64(svptrue_b64(), U64_DATA.as_ptr(), offsets);
+    let loaded = svldff1_gather_u64offset_u64(svptrue_b64(), U64_DATA.as_ptr(), offsets);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1_gather_u64base_f64() {
+    let bases = svdup_n_u64(F64_DATA.as_ptr() as u64);
+    let offsets = svindex_u64(0, 8u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b64(), bases, offsets);
+    svsetffr();
+    let _ = svld1_gather_u64base_f64(svptrue_b64(), bases);
+    let loaded = svldff1_gather_u64base_f64(svptrue_b64(), bases);
+    assert_vector_matches_f64(
+        loaded,
+        svcvt_f64_s64_x(
+            svptrue_b64(),
+            svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1_gather_u64base_s64() {
+    let bases = svdup_n_u64(I64_DATA.as_ptr() as u64);
+    let offsets = svindex_u64(0, 8u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b64(), bases, offsets);
+    svsetffr();
+    let _ = svld1_gather_u64base_s64(svptrue_b64(), bases);
+    let loaded = svldff1_gather_u64base_s64(svptrue_b64(), bases);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1_gather_u64base_u64() {
+    let bases = svdup_n_u64(U64_DATA.as_ptr() as u64);
+    let offsets = svindex_u64(0, 8u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b64(), bases, offsets);
+    svsetffr();
+    let _ = svld1_gather_u64base_u64(svptrue_b64(), bases);
+    let loaded = svldff1_gather_u64base_u64(svptrue_b64(), bases);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1_gather_u32base_index_f32() {
+    let bases = svindex_u32(0, 4u32.try_into().unwrap());
+    svsetffr();
+    let _ = svld1_gather_u32base_index_f32(
+        svptrue_b32(),
+        bases,
+        F32_DATA.as_ptr() as i64 / (4u32 as i64) + 1,
+    );
+    let loaded = svldff1_gather_u32base_index_f32(
+        svptrue_b32(),
+        bases,
+        F32_DATA.as_ptr() as i64 / (4u32 as i64) + 1,
+    );
+    assert_vector_matches_f32(
+        loaded,
+        svcvt_f32_s32_x(
+            svptrue_b32(),
+            svindex_s32((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1_gather_u32base_index_s32() {
+    let bases = svindex_u32(0, 4u32.try_into().unwrap());
+    svsetffr();
+    let _ = svld1_gather_u32base_index_s32(
+        svptrue_b32(),
+        bases,
+        I32_DATA.as_ptr() as i64 / (4u32 as i64) + 1,
+    );
+    let loaded = svldff1_gather_u32base_index_s32(
+        svptrue_b32(),
+        bases,
+        I32_DATA.as_ptr() as i64 / (4u32 as i64) + 1,
+    );
+    assert_vector_matches_i32(
+        loaded,
+        svindex_s32((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1_gather_u32base_index_u32() {
+    let bases = svindex_u32(0, 4u32.try_into().unwrap());
+    svsetffr();
+    let _ = svld1_gather_u32base_index_u32(
+        svptrue_b32(),
+        bases,
+        U32_DATA.as_ptr() as i64 / (4u32 as i64) + 1,
+    );
+    let loaded = svldff1_gather_u32base_index_u32(
+        svptrue_b32(),
+        bases,
+        U32_DATA.as_ptr() as i64 / (4u32 as i64) + 1,
+    );
+    assert_vector_matches_u32(
+        loaded,
+        svindex_u32((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1_gather_u64base_index_f64() {
+    let bases = svdup_n_u64(F64_DATA.as_ptr() as u64);
+    let offsets = svindex_u64(0, 8u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b64(), bases, offsets);
+    svsetffr();
+    let _ = svld1_gather_u64base_index_f64(svptrue_b64(), bases, 1.try_into().unwrap());
+    let loaded = svldff1_gather_u64base_index_f64(svptrue_b64(), bases, 1.try_into().unwrap());
+    assert_vector_matches_f64(
+        loaded,
+        svcvt_f64_s64_x(
+            svptrue_b64(),
+            svindex_s64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1_gather_u64base_index_s64() {
+    let bases = svdup_n_u64(I64_DATA.as_ptr() as u64);
+    let offsets = svindex_u64(0, 8u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b64(), bases, offsets);
+    svsetffr();
+    let _ = svld1_gather_u64base_index_s64(svptrue_b64(), bases, 1.try_into().unwrap());
+    let loaded = svldff1_gather_u64base_index_s64(svptrue_b64(), bases, 1.try_into().unwrap());
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1_gather_u64base_index_u64() {
+    let bases = svdup_n_u64(U64_DATA.as_ptr() as u64);
+    let offsets = svindex_u64(0, 8u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b64(), bases, offsets);
+    svsetffr();
+    let _ = svld1_gather_u64base_index_u64(svptrue_b64(), bases, 1.try_into().unwrap());
+    let loaded = svldff1_gather_u64base_index_u64(svptrue_b64(), bases, 1.try_into().unwrap());
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1_gather_u32base_offset_f32() {
+    let bases = svindex_u32(0, 4u32.try_into().unwrap());
+    svsetffr();
+    let _ = svld1_gather_u32base_offset_f32(
+        svptrue_b32(),
+        bases,
+        F32_DATA.as_ptr() as i64 + 4u32 as i64,
+    );
+    let loaded = svldff1_gather_u32base_offset_f32(
+        svptrue_b32(),
+        bases,
+        F32_DATA.as_ptr() as i64 + 4u32 as i64,
+    );
+    assert_vector_matches_f32(
+        loaded,
+        svcvt_f32_s32_x(
+            svptrue_b32(),
+            svindex_s32((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1_gather_u32base_offset_s32() {
+    let bases = svindex_u32(0, 4u32.try_into().unwrap());
+    svsetffr();
+    let _ = svld1_gather_u32base_offset_s32(
+        svptrue_b32(),
+        bases,
+        I32_DATA.as_ptr() as i64 + 4u32 as i64,
+    );
+    let loaded = svldff1_gather_u32base_offset_s32(
+        svptrue_b32(),
+        bases,
+        I32_DATA.as_ptr() as i64 + 4u32 as i64,
+    );
+    assert_vector_matches_i32(
+        loaded,
+        svindex_s32((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1_gather_u32base_offset_u32() {
+    let bases = svindex_u32(0, 4u32.try_into().unwrap());
+    svsetffr();
+    let _ = svld1_gather_u32base_offset_u32(
+        svptrue_b32(),
+        bases,
+        U32_DATA.as_ptr() as i64 + 4u32 as i64,
+    );
+    let loaded = svldff1_gather_u32base_offset_u32(
+        svptrue_b32(),
+        bases,
+        U32_DATA.as_ptr() as i64 + 4u32 as i64,
+    );
+    assert_vector_matches_u32(
+        loaded,
+        svindex_u32((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1_gather_u64base_offset_f64() {
+    let bases = svdup_n_u64(F64_DATA.as_ptr() as u64);
+    let offsets = svindex_u64(0, 8u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b64(), bases, offsets);
+    svsetffr();
+    let _ = svld1_gather_u64base_offset_f64(svptrue_b64(), bases, 8u32.try_into().unwrap());
+    let loaded = svldff1_gather_u64base_offset_f64(svptrue_b64(), bases, 8u32.try_into().unwrap());
+    assert_vector_matches_f64(
+        loaded,
+        svcvt_f64_s64_x(
+            svptrue_b64(),
+            svindex_s64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1_gather_u64base_offset_s64() {
+    let bases = svdup_n_u64(I64_DATA.as_ptr() as u64);
+    let offsets = svindex_u64(0, 8u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b64(), bases, offsets);
+    svsetffr();
+    let _ = svld1_gather_u64base_offset_s64(svptrue_b64(), bases, 8u32.try_into().unwrap());
+    let loaded = svldff1_gather_u64base_offset_s64(svptrue_b64(), bases, 8u32.try_into().unwrap());
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1_gather_u64base_offset_u64() {
+    let bases = svdup_n_u64(U64_DATA.as_ptr() as u64);
+    let offsets = svindex_u64(0, 8u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b64(), bases, offsets);
+    svsetffr();
+    let _ = svld1_gather_u64base_offset_u64(svptrue_b64(), bases, 8u32.try_into().unwrap());
+    let loaded = svldff1_gather_u64base_offset_u64(svptrue_b64(), bases, 8u32.try_into().unwrap());
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1_vnum_f32() {
+    svsetffr();
+    let _ = svld1_vnum_f32(svptrue_b32(), F32_DATA.as_ptr(), 1);
+    let loaded = svldff1_vnum_f32(svptrue_b32(), F32_DATA.as_ptr(), 1);
+    let len = svcntw() as usize;
+    assert_vector_matches_f32(
+        loaded,
+        svcvt_f32_s32_x(
+            svptrue_b32(),
+            svindex_s32(
+                (len + 0usize).try_into().unwrap(),
+                1usize.try_into().unwrap(),
+            ),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1_vnum_f64() {
+    svsetffr();
+    let _ = svld1_vnum_f64(svptrue_b64(), F64_DATA.as_ptr(), 1);
+    let loaded = svldff1_vnum_f64(svptrue_b64(), F64_DATA.as_ptr(), 1);
+    let len = svcntd() as usize;
+    assert_vector_matches_f64(
+        loaded,
+        svcvt_f64_s64_x(
+            svptrue_b64(),
+            svindex_s64(
+                (len + 0usize).try_into().unwrap(),
+                1usize.try_into().unwrap(),
+            ),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1_vnum_s8() {
+    svsetffr();
+    let _ = svld1_vnum_s8(svptrue_b8(), I8_DATA.as_ptr(), 1);
+    let loaded = svldff1_vnum_s8(svptrue_b8(), I8_DATA.as_ptr(), 1);
+    let len = svcntb() as usize;
+    assert_vector_matches_i8(
+        loaded,
+        svindex_s8(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1_vnum_s16() {
+    svsetffr();
+    let _ = svld1_vnum_s16(svptrue_b16(), I16_DATA.as_ptr(), 1);
+    let loaded = svldff1_vnum_s16(svptrue_b16(), I16_DATA.as_ptr(), 1);
+    let len = svcnth() as usize;
+    assert_vector_matches_i16(
+        loaded,
+        svindex_s16(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1_vnum_s32() {
+    svsetffr();
+    let _ = svld1_vnum_s32(svptrue_b32(), I32_DATA.as_ptr(), 1);
+    let loaded = svldff1_vnum_s32(svptrue_b32(), I32_DATA.as_ptr(), 1);
+    let len = svcntw() as usize;
+    assert_vector_matches_i32(
+        loaded,
+        svindex_s32(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1_vnum_s64() {
+    svsetffr();
+    let _ = svld1_vnum_s64(svptrue_b64(), I64_DATA.as_ptr(), 1);
+    let loaded = svldff1_vnum_s64(svptrue_b64(), I64_DATA.as_ptr(), 1);
+    let len = svcntd() as usize;
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1_vnum_u8() {
+    svsetffr();
+    let _ = svld1_vnum_u8(svptrue_b8(), U8_DATA.as_ptr(), 1);
+    let loaded = svldff1_vnum_u8(svptrue_b8(), U8_DATA.as_ptr(), 1);
+    let len = svcntb() as usize;
+    assert_vector_matches_u8(
+        loaded,
+        svindex_u8(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1_vnum_u16() {
+    svsetffr();
+    let _ = svld1_vnum_u16(svptrue_b16(), U16_DATA.as_ptr(), 1);
+    let loaded = svldff1_vnum_u16(svptrue_b16(), U16_DATA.as_ptr(), 1);
+    let len = svcnth() as usize;
+    assert_vector_matches_u16(
+        loaded,
+        svindex_u16(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1_vnum_u32() {
+    svsetffr();
+    let _ = svld1_vnum_u32(svptrue_b32(), U32_DATA.as_ptr(), 1);
+    let loaded = svldff1_vnum_u32(svptrue_b32(), U32_DATA.as_ptr(), 1);
+    let len = svcntw() as usize;
+    assert_vector_matches_u32(
+        loaded,
+        svindex_u32(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1_vnum_u64() {
+    svsetffr();
+    let _ = svld1_vnum_u64(svptrue_b64(), U64_DATA.as_ptr(), 1);
+    let loaded = svldff1_vnum_u64(svptrue_b64(), U64_DATA.as_ptr(), 1);
+    let len = svcntd() as usize;
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1sb_gather_s32offset_s32() {
+    let offsets = svindex_s32(0, 1u32.try_into().unwrap());
+    svsetffr();
+    let _ = svld1sb_gather_s32offset_s32(svptrue_b8(), I8_DATA.as_ptr(), offsets);
+    let loaded = svldff1sb_gather_s32offset_s32(svptrue_b8(), I8_DATA.as_ptr(), offsets);
+    assert_vector_matches_i32(
+        loaded,
+        svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1sh_gather_s32offset_s32() {
+    let offsets = svindex_s32(0, 2u32.try_into().unwrap());
+    svsetffr();
+    let _ = svld1sh_gather_s32offset_s32(svptrue_b16(), I16_DATA.as_ptr(), offsets);
+    let loaded = svldff1sh_gather_s32offset_s32(svptrue_b16(), I16_DATA.as_ptr(), offsets);
+    assert_vector_matches_i32(
+        loaded,
+        svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1sb_gather_s32offset_u32() {
+    let offsets = svindex_s32(0, 1u32.try_into().unwrap());
+    svsetffr();
+    let _ = svld1sb_gather_s32offset_u32(svptrue_b8(), I8_DATA.as_ptr(), offsets);
+    let loaded = svldff1sb_gather_s32offset_u32(svptrue_b8(), I8_DATA.as_ptr(), offsets);
+    assert_vector_matches_u32(
+        loaded,
+        svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1sh_gather_s32offset_u32() {
+    let offsets = svindex_s32(0, 2u32.try_into().unwrap());
+    svsetffr();
+    let _ = svld1sh_gather_s32offset_u32(svptrue_b16(), I16_DATA.as_ptr(), offsets);
+    let loaded = svldff1sh_gather_s32offset_u32(svptrue_b16(), I16_DATA.as_ptr(), offsets);
+    assert_vector_matches_u32(
+        loaded,
+        svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1sb_gather_s64offset_s64() {
+    let offsets = svindex_s64(0, 1u32.try_into().unwrap());
+    svsetffr();
+    let _ = svld1sb_gather_s64offset_s64(svptrue_b8(), I8_DATA.as_ptr(), offsets);
+    let loaded = svldff1sb_gather_s64offset_s64(svptrue_b8(), I8_DATA.as_ptr(), offsets);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1sh_gather_s64offset_s64() {
+    let offsets = svindex_s64(0, 2u32.try_into().unwrap());
+    svsetffr();
+    let _ = svld1sh_gather_s64offset_s64(svptrue_b16(), I16_DATA.as_ptr(), offsets);
+    let loaded = svldff1sh_gather_s64offset_s64(svptrue_b16(), I16_DATA.as_ptr(), offsets);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1sw_gather_s64offset_s64() {
+    let offsets = svindex_s64(0, 4u32.try_into().unwrap());
+    svsetffr();
+    let _ = svld1sw_gather_s64offset_s64(svptrue_b32(), I32_DATA.as_ptr(), offsets);
+    let loaded = svldff1sw_gather_s64offset_s64(svptrue_b32(), I32_DATA.as_ptr(), offsets);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1sb_gather_s64offset_u64() {
+    let offsets = svindex_s64(0, 1u32.try_into().unwrap());
+    svsetffr();
+    let _ = svld1sb_gather_s64offset_u64(svptrue_b8(), I8_DATA.as_ptr(), offsets);
+    let loaded = svldff1sb_gather_s64offset_u64(svptrue_b8(), I8_DATA.as_ptr(), offsets);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1sh_gather_s64offset_u64() {
+    let offsets = svindex_s64(0, 2u32.try_into().unwrap());
+    svsetffr();
+    let _ = svld1sh_gather_s64offset_u64(svptrue_b16(), I16_DATA.as_ptr(), offsets);
+    let loaded = svldff1sh_gather_s64offset_u64(svptrue_b16(), I16_DATA.as_ptr(), offsets);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1sw_gather_s64offset_u64() {
+    let offsets = svindex_s64(0, 4u32.try_into().unwrap());
+    svsetffr();
+    let _ = svld1sw_gather_s64offset_u64(svptrue_b32(), I32_DATA.as_ptr(), offsets);
+    let loaded = svldff1sw_gather_s64offset_u64(svptrue_b32(), I32_DATA.as_ptr(), offsets);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1sb_gather_u32offset_s32() {
+    let offsets = svindex_u32(0, 1u32.try_into().unwrap());
+    svsetffr();
+    let _ = svld1sb_gather_u32offset_s32(svptrue_b8(), I8_DATA.as_ptr(), offsets);
+    let loaded = svldff1sb_gather_u32offset_s32(svptrue_b8(), I8_DATA.as_ptr(), offsets);
+    assert_vector_matches_i32(
+        loaded,
+        svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1sh_gather_u32offset_s32() {
+    let offsets = svindex_u32(0, 2u32.try_into().unwrap());
+    svsetffr();
+    let _ = svld1sh_gather_u32offset_s32(svptrue_b16(), I16_DATA.as_ptr(), offsets);
+    let loaded = svldff1sh_gather_u32offset_s32(svptrue_b16(), I16_DATA.as_ptr(), offsets);
+    assert_vector_matches_i32(
+        loaded,
+        svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1sb_gather_u32offset_u32() {
+    let offsets = svindex_u32(0, 1u32.try_into().unwrap());
+    svsetffr();
+    let _ = svld1sb_gather_u32offset_u32(svptrue_b8(), I8_DATA.as_ptr(), offsets);
+    let loaded = svldff1sb_gather_u32offset_u32(svptrue_b8(), I8_DATA.as_ptr(), offsets);
+    assert_vector_matches_u32(
+        loaded,
+        svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1sh_gather_u32offset_u32() {
+    let offsets = svindex_u32(0, 2u32.try_into().unwrap());
+    svsetffr();
+    let _ = svld1sh_gather_u32offset_u32(svptrue_b16(), I16_DATA.as_ptr(), offsets);
+    let loaded = svldff1sh_gather_u32offset_u32(svptrue_b16(), I16_DATA.as_ptr(), offsets);
+    assert_vector_matches_u32(
+        loaded,
+        svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1sb_gather_u64offset_s64() {
+    let offsets = svindex_u64(0, 1u32.try_into().unwrap());
+    svsetffr();
+    let _ = svld1sb_gather_u64offset_s64(svptrue_b8(), I8_DATA.as_ptr(), offsets);
+    let loaded = svldff1sb_gather_u64offset_s64(svptrue_b8(), I8_DATA.as_ptr(), offsets);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1sh_gather_u64offset_s64() {
+    let offsets = svindex_u64(0, 2u32.try_into().unwrap());
+    svsetffr();
+    let _ = svld1sh_gather_u64offset_s64(svptrue_b16(), I16_DATA.as_ptr(), offsets);
+    let loaded = svldff1sh_gather_u64offset_s64(svptrue_b16(), I16_DATA.as_ptr(), offsets);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1sw_gather_u64offset_s64() {
+    let offsets = svindex_u64(0, 4u32.try_into().unwrap());
+    svsetffr();
+    let _ = svld1sw_gather_u64offset_s64(svptrue_b32(), I32_DATA.as_ptr(), offsets);
+    let loaded = svldff1sw_gather_u64offset_s64(svptrue_b32(), I32_DATA.as_ptr(), offsets);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1sb_gather_u64offset_u64() {
+    let offsets = svindex_u64(0, 1u32.try_into().unwrap());
+    svsetffr();
+    let _ = svld1sb_gather_u64offset_u64(svptrue_b8(), I8_DATA.as_ptr(), offsets);
+    let loaded = svldff1sb_gather_u64offset_u64(svptrue_b8(), I8_DATA.as_ptr(), offsets);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1sh_gather_u64offset_u64() {
+    let offsets = svindex_u64(0, 2u32.try_into().unwrap());
+    svsetffr();
+    let _ = svld1sh_gather_u64offset_u64(svptrue_b16(), I16_DATA.as_ptr(), offsets);
+    let loaded = svldff1sh_gather_u64offset_u64(svptrue_b16(), I16_DATA.as_ptr(), offsets);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1sw_gather_u64offset_u64() {
+    let offsets = svindex_u64(0, 4u32.try_into().unwrap());
+    svsetffr();
+    let _ = svld1sw_gather_u64offset_u64(svptrue_b32(), I32_DATA.as_ptr(), offsets);
+    let loaded = svldff1sw_gather_u64offset_u64(svptrue_b32(), I32_DATA.as_ptr(), offsets);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1sb_gather_u32base_offset_s32() {
+    let bases = svindex_u32(0, 1u32.try_into().unwrap());
+    svsetffr();
+    let _ = svld1sb_gather_u32base_offset_s32(
+        svptrue_b8(),
+        bases,
+        I8_DATA.as_ptr() as i64 + 1u32 as i64,
+    );
+    let loaded = svldff1sb_gather_u32base_offset_s32(
+        svptrue_b8(),
+        bases,
+        I8_DATA.as_ptr() as i64 + 1u32 as i64,
+    );
+    assert_vector_matches_i32(
+        loaded,
+        svindex_s32((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1sh_gather_u32base_offset_s32() {
+    let bases = svindex_u32(0, 2u32.try_into().unwrap());
+    svsetffr();
+    let _ = svld1sh_gather_u32base_offset_s32(
+        svptrue_b16(),
+        bases,
+        I16_DATA.as_ptr() as i64 + 2u32 as i64,
+    );
+    let loaded = svldff1sh_gather_u32base_offset_s32(
+        svptrue_b16(),
+        bases,
+        I16_DATA.as_ptr() as i64 + 2u32 as i64,
+    );
+    assert_vector_matches_i32(
+        loaded,
+        svindex_s32((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1sb_gather_u32base_offset_u32() {
+    let bases = svindex_u32(0, 1u32.try_into().unwrap());
+    svsetffr();
+    let _ = svld1sb_gather_u32base_offset_u32(
+        svptrue_b8(),
+        bases,
+        I8_DATA.as_ptr() as i64 + 1u32 as i64,
+    );
+    let loaded = svldff1sb_gather_u32base_offset_u32(
+        svptrue_b8(),
+        bases,
+        I8_DATA.as_ptr() as i64 + 1u32 as i64,
+    );
+    assert_vector_matches_u32(
+        loaded,
+        svindex_u32((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1sh_gather_u32base_offset_u32() {
+    let bases = svindex_u32(0, 2u32.try_into().unwrap());
+    svsetffr();
+    let _ = svld1sh_gather_u32base_offset_u32(
+        svptrue_b16(),
+        bases,
+        I16_DATA.as_ptr() as i64 + 2u32 as i64,
+    );
+    let loaded = svldff1sh_gather_u32base_offset_u32(
+        svptrue_b16(),
+        bases,
+        I16_DATA.as_ptr() as i64 + 2u32 as i64,
+    );
+    assert_vector_matches_u32(
+        loaded,
+        svindex_u32((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1sb_gather_u64base_offset_s64() {
+    let bases = svdup_n_u64(I8_DATA.as_ptr() as u64);
+    let offsets = svindex_u64(0, 1u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b8(), bases, offsets);
+    svsetffr();
+    let _ = svld1sb_gather_u64base_offset_s64(svptrue_b8(), bases, 1u32.try_into().unwrap());
+    let loaded = svldff1sb_gather_u64base_offset_s64(svptrue_b8(), bases, 1u32.try_into().unwrap());
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1sh_gather_u64base_offset_s64() {
+    let bases = svdup_n_u64(I16_DATA.as_ptr() as u64);
+    let offsets = svindex_u64(0, 2u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b16(), bases, offsets);
+    svsetffr();
+    let _ = svld1sh_gather_u64base_offset_s64(svptrue_b16(), bases, 2u32.try_into().unwrap());
+    let loaded =
+        svldff1sh_gather_u64base_offset_s64(svptrue_b16(), bases, 2u32.try_into().unwrap());
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1sw_gather_u64base_offset_s64() {
+    let bases = svdup_n_u64(I32_DATA.as_ptr() as u64);
+    let offsets = svindex_u64(0, 4u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b32(), bases, offsets);
+    svsetffr();
+    let _ = svld1sw_gather_u64base_offset_s64(svptrue_b32(), bases, 4u32.try_into().unwrap());
+    let loaded =
+        svldff1sw_gather_u64base_offset_s64(svptrue_b32(), bases, 4u32.try_into().unwrap());
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1sb_gather_u64base_offset_u64() {
+    let bases = svdup_n_u64(I8_DATA.as_ptr() as u64);
+    let offsets = svindex_u64(0, 1u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b8(), bases, offsets);
+    svsetffr();
+    let _ = svld1sb_gather_u64base_offset_u64(svptrue_b8(), bases, 1u32.try_into().unwrap());
+    let loaded = svldff1sb_gather_u64base_offset_u64(svptrue_b8(), bases, 1u32.try_into().unwrap());
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1sh_gather_u64base_offset_u64() {
+    let bases = svdup_n_u64(I16_DATA.as_ptr() as u64);
+    let offsets = svindex_u64(0, 2u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b16(), bases, offsets);
+    svsetffr();
+    let _ = svld1sh_gather_u64base_offset_u64(svptrue_b16(), bases, 2u32.try_into().unwrap());
+    let loaded =
+        svldff1sh_gather_u64base_offset_u64(svptrue_b16(), bases, 2u32.try_into().unwrap());
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1sw_gather_u64base_offset_u64() {
+    let bases = svdup_n_u64(I32_DATA.as_ptr() as u64);
+    let offsets = svindex_u64(0, 4u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b32(), bases, offsets);
+    svsetffr();
+    let _ = svld1sw_gather_u64base_offset_u64(svptrue_b32(), bases, 4u32.try_into().unwrap());
+    let loaded =
+        svldff1sw_gather_u64base_offset_u64(svptrue_b32(), bases, 4u32.try_into().unwrap());
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1sb_gather_u64base_s64() {
+    let bases = svdup_n_u64(I8_DATA.as_ptr() as u64);
+    let offsets = svindex_u64(0, 1u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b8(), bases, offsets);
+    svsetffr();
+    let _ = svld1sb_gather_u64base_s64(svptrue_b8(), bases);
+    let loaded = svldff1sb_gather_u64base_s64(svptrue_b8(), bases);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1sh_gather_u64base_s64() {
+    let bases = svdup_n_u64(I16_DATA.as_ptr() as u64);
+    let offsets = svindex_u64(0, 2u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b16(), bases, offsets);
+    svsetffr();
+    let _ = svld1sh_gather_u64base_s64(svptrue_b16(), bases);
+    let loaded = svldff1sh_gather_u64base_s64(svptrue_b16(), bases);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1sw_gather_u64base_s64() {
+    let bases = svdup_n_u64(I32_DATA.as_ptr() as u64);
+    let offsets = svindex_u64(0, 4u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b32(), bases, offsets);
+    svsetffr();
+    let _ = svld1sw_gather_u64base_s64(svptrue_b32(), bases);
+    let loaded = svldff1sw_gather_u64base_s64(svptrue_b32(), bases);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1sb_gather_u64base_u64() {
+    let bases = svdup_n_u64(I8_DATA.as_ptr() as u64);
+    let offsets = svindex_u64(0, 1u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b8(), bases, offsets);
+    svsetffr();
+    let _ = svld1sb_gather_u64base_u64(svptrue_b8(), bases);
+    let loaded = svldff1sb_gather_u64base_u64(svptrue_b8(), bases);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1sh_gather_u64base_u64() {
+    let bases = svdup_n_u64(I16_DATA.as_ptr() as u64);
+    let offsets = svindex_u64(0, 2u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b16(), bases, offsets);
+    svsetffr();
+    let _ = svld1sh_gather_u64base_u64(svptrue_b16(), bases);
+    let loaded = svldff1sh_gather_u64base_u64(svptrue_b16(), bases);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1sw_gather_u64base_u64() {
+    let bases = svdup_n_u64(I32_DATA.as_ptr() as u64);
+    let offsets = svindex_u64(0, 4u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b32(), bases, offsets);
+    svsetffr();
+    let _ = svld1sw_gather_u64base_u64(svptrue_b32(), bases);
+    let loaded = svldff1sw_gather_u64base_u64(svptrue_b32(), bases);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1sb_s16() {
+    svsetffr();
+    let _ = svld1sb_s16(svptrue_b8(), I8_DATA.as_ptr());
+    let loaded = svldff1sb_s16(svptrue_b8(), I8_DATA.as_ptr());
+    assert_vector_matches_i16(
+        loaded,
+        svindex_s16((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1sb_s32() {
+    svsetffr();
+    let _ = svld1sb_s32(svptrue_b8(), I8_DATA.as_ptr());
+    let loaded = svldff1sb_s32(svptrue_b8(), I8_DATA.as_ptr());
+    assert_vector_matches_i32(
+        loaded,
+        svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1sh_s32() {
+    svsetffr();
+    let _ = svld1sh_s32(svptrue_b16(), I16_DATA.as_ptr());
+    let loaded = svldff1sh_s32(svptrue_b16(), I16_DATA.as_ptr());
+    assert_vector_matches_i32(
+        loaded,
+        svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1sb_s64() {
+    svsetffr();
+    let _ = svld1sb_s64(svptrue_b8(), I8_DATA.as_ptr());
+    let loaded = svldff1sb_s64(svptrue_b8(), I8_DATA.as_ptr());
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1sh_s64() {
+    svsetffr();
+    let _ = svld1sh_s64(svptrue_b16(), I16_DATA.as_ptr());
+    let loaded = svldff1sh_s64(svptrue_b16(), I16_DATA.as_ptr());
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1sw_s64() {
+    svsetffr();
+    let _ = svld1sw_s64(svptrue_b32(), I32_DATA.as_ptr());
+    let loaded = svldff1sw_s64(svptrue_b32(), I32_DATA.as_ptr());
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1sb_u16() {
+    svsetffr();
+    let _ = svld1sb_u16(svptrue_b8(), I8_DATA.as_ptr());
+    let loaded = svldff1sb_u16(svptrue_b8(), I8_DATA.as_ptr());
+    assert_vector_matches_u16(
+        loaded,
+        svindex_u16((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1sb_u32() {
+    svsetffr();
+    let _ = svld1sb_u32(svptrue_b8(), I8_DATA.as_ptr());
+    let loaded = svldff1sb_u32(svptrue_b8(), I8_DATA.as_ptr());
+    assert_vector_matches_u32(
+        loaded,
+        svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1sh_u32() {
+    svsetffr();
+    let _ = svld1sh_u32(svptrue_b16(), I16_DATA.as_ptr());
+    let loaded = svldff1sh_u32(svptrue_b16(), I16_DATA.as_ptr());
+    assert_vector_matches_u32(
+        loaded,
+        svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1sb_u64() {
+    svsetffr();
+    let _ = svld1sb_u64(svptrue_b8(), I8_DATA.as_ptr());
+    let loaded = svldff1sb_u64(svptrue_b8(), I8_DATA.as_ptr());
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1sh_u64() {
+    svsetffr();
+    let _ = svld1sh_u64(svptrue_b16(), I16_DATA.as_ptr());
+    let loaded = svldff1sh_u64(svptrue_b16(), I16_DATA.as_ptr());
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1sw_u64() {
+    svsetffr();
+    let _ = svld1sw_u64(svptrue_b32(), I32_DATA.as_ptr());
+    let loaded = svldff1sw_u64(svptrue_b32(), I32_DATA.as_ptr());
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1sb_vnum_s16() {
+    svsetffr();
+    let _ = svld1sb_vnum_s16(svptrue_b8(), I8_DATA.as_ptr(), 1);
+    let loaded = svldff1sb_vnum_s16(svptrue_b8(), I8_DATA.as_ptr(), 1);
+    let len = svcnth() as usize;
+    assert_vector_matches_i16(
+        loaded,
+        svindex_s16(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1sb_vnum_s32() {
+    svsetffr();
+    let _ = svld1sb_vnum_s32(svptrue_b8(), I8_DATA.as_ptr(), 1);
+    let loaded = svldff1sb_vnum_s32(svptrue_b8(), I8_DATA.as_ptr(), 1);
+    let len = svcntw() as usize;
+    assert_vector_matches_i32(
+        loaded,
+        svindex_s32(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1sh_vnum_s32() {
+    svsetffr();
+    let _ = svld1sh_vnum_s32(svptrue_b16(), I16_DATA.as_ptr(), 1);
+    let loaded = svldff1sh_vnum_s32(svptrue_b16(), I16_DATA.as_ptr(), 1);
+    let len = svcntw() as usize;
+    assert_vector_matches_i32(
+        loaded,
+        svindex_s32(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1sb_vnum_s64() {
+    svsetffr();
+    let _ = svld1sb_vnum_s64(svptrue_b8(), I8_DATA.as_ptr(), 1);
+    let loaded = svldff1sb_vnum_s64(svptrue_b8(), I8_DATA.as_ptr(), 1);
+    let len = svcntd() as usize;
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1sh_vnum_s64() {
+    svsetffr();
+    let _ = svld1sh_vnum_s64(svptrue_b16(), I16_DATA.as_ptr(), 1);
+    let loaded = svldff1sh_vnum_s64(svptrue_b16(), I16_DATA.as_ptr(), 1);
+    let len = svcntd() as usize;
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1sw_vnum_s64() {
+    svsetffr();
+    let _ = svld1sw_vnum_s64(svptrue_b32(), I32_DATA.as_ptr(), 1);
+    let loaded = svldff1sw_vnum_s64(svptrue_b32(), I32_DATA.as_ptr(), 1);
+    let len = svcntd() as usize;
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1sb_vnum_u16() {
+    svsetffr();
+    let _ = svld1sb_vnum_u16(svptrue_b8(), I8_DATA.as_ptr(), 1);
+    let loaded = svldff1sb_vnum_u16(svptrue_b8(), I8_DATA.as_ptr(), 1);
+    let len = svcnth() as usize;
+    assert_vector_matches_u16(
+        loaded,
+        svindex_u16(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1sb_vnum_u32() {
+    svsetffr();
+    let _ = svld1sb_vnum_u32(svptrue_b8(), I8_DATA.as_ptr(), 1);
+    let loaded = svldff1sb_vnum_u32(svptrue_b8(), I8_DATA.as_ptr(), 1);
+    let len = svcntw() as usize;
+    assert_vector_matches_u32(
+        loaded,
+        svindex_u32(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1sh_vnum_u32() {
+    svsetffr();
+    let _ = svld1sh_vnum_u32(svptrue_b16(), I16_DATA.as_ptr(), 1);
+    let loaded = svldff1sh_vnum_u32(svptrue_b16(), I16_DATA.as_ptr(), 1);
+    let len = svcntw() as usize;
+    assert_vector_matches_u32(
+        loaded,
+        svindex_u32(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1sb_vnum_u64() {
+    svsetffr();
+    let _ = svld1sb_vnum_u64(svptrue_b8(), I8_DATA.as_ptr(), 1);
+    let loaded = svldff1sb_vnum_u64(svptrue_b8(), I8_DATA.as_ptr(), 1);
+    let len = svcntd() as usize;
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1sh_vnum_u64() {
+    svsetffr();
+    let _ = svld1sh_vnum_u64(svptrue_b16(), I16_DATA.as_ptr(), 1);
+    let loaded = svldff1sh_vnum_u64(svptrue_b16(), I16_DATA.as_ptr(), 1);
+    let len = svcntd() as usize;
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1sw_vnum_u64() {
+    svsetffr();
+    let _ = svld1sw_vnum_u64(svptrue_b32(), I32_DATA.as_ptr(), 1);
+    let loaded = svldff1sw_vnum_u64(svptrue_b32(), I32_DATA.as_ptr(), 1);
+    let len = svcntd() as usize;
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1sh_gather_s32index_s32() {
+    let indices = svindex_s32(0, 1);
+    svsetffr();
+    let _ = svld1sh_gather_s32index_s32(svptrue_b16(), I16_DATA.as_ptr(), indices);
+    let loaded = svldff1sh_gather_s32index_s32(svptrue_b16(), I16_DATA.as_ptr(), indices);
+    assert_vector_matches_i32(
+        loaded,
+        svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1sh_gather_s32index_u32() {
+    let indices = svindex_s32(0, 1);
+    svsetffr();
+    let _ = svld1sh_gather_s32index_u32(svptrue_b16(), I16_DATA.as_ptr(), indices);
+    let loaded = svldff1sh_gather_s32index_u32(svptrue_b16(), I16_DATA.as_ptr(), indices);
+    assert_vector_matches_u32(
+        loaded,
+        svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1sh_gather_s64index_s64() {
+    let indices = svindex_s64(0, 1);
+    svsetffr();
+    let _ = svld1sh_gather_s64index_s64(svptrue_b16(), I16_DATA.as_ptr(), indices);
+    let loaded = svldff1sh_gather_s64index_s64(svptrue_b16(), I16_DATA.as_ptr(), indices);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1sw_gather_s64index_s64() {
+    let indices = svindex_s64(0, 1);
+    svsetffr();
+    let _ = svld1sw_gather_s64index_s64(svptrue_b32(), I32_DATA.as_ptr(), indices);
+    let loaded = svldff1sw_gather_s64index_s64(svptrue_b32(), I32_DATA.as_ptr(), indices);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1sh_gather_s64index_u64() {
+    let indices = svindex_s64(0, 1);
+    svsetffr();
+    let _ = svld1sh_gather_s64index_u64(svptrue_b16(), I16_DATA.as_ptr(), indices);
+    let loaded = svldff1sh_gather_s64index_u64(svptrue_b16(), I16_DATA.as_ptr(), indices);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1sw_gather_s64index_u64() {
+    let indices = svindex_s64(0, 1);
+    svsetffr();
+    let _ = svld1sw_gather_s64index_u64(svptrue_b32(), I32_DATA.as_ptr(), indices);
+    let loaded = svldff1sw_gather_s64index_u64(svptrue_b32(), I32_DATA.as_ptr(), indices);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1sh_gather_u32index_s32() {
+    let indices = svindex_u32(0, 1);
+    svsetffr();
+    let _ = svld1sh_gather_u32index_s32(svptrue_b16(), I16_DATA.as_ptr(), indices);
+    let loaded = svldff1sh_gather_u32index_s32(svptrue_b16(), I16_DATA.as_ptr(), indices);
+    assert_vector_matches_i32(
+        loaded,
+        svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1sh_gather_u32index_u32() {
+    let indices = svindex_u32(0, 1);
+    svsetffr();
+    let _ = svld1sh_gather_u32index_u32(svptrue_b16(), I16_DATA.as_ptr(), indices);
+    let loaded = svldff1sh_gather_u32index_u32(svptrue_b16(), I16_DATA.as_ptr(), indices);
+    assert_vector_matches_u32(
+        loaded,
+        svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1sh_gather_u64index_s64() {
+    let indices = svindex_u64(0, 1);
+    svsetffr();
+    let _ = svld1sh_gather_u64index_s64(svptrue_b16(), I16_DATA.as_ptr(), indices);
+    let loaded = svldff1sh_gather_u64index_s64(svptrue_b16(), I16_DATA.as_ptr(), indices);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1sw_gather_u64index_s64() {
+    let indices = svindex_u64(0, 1);
+    svsetffr();
+    let _ = svld1sw_gather_u64index_s64(svptrue_b32(), I32_DATA.as_ptr(), indices);
+    let loaded = svldff1sw_gather_u64index_s64(svptrue_b32(), I32_DATA.as_ptr(), indices);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1sh_gather_u64index_u64() {
+    let indices = svindex_u64(0, 1);
+    svsetffr();
+    let _ = svld1sh_gather_u64index_u64(svptrue_b16(), I16_DATA.as_ptr(), indices);
+    let loaded = svldff1sh_gather_u64index_u64(svptrue_b16(), I16_DATA.as_ptr(), indices);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1sw_gather_u64index_u64() {
+    let indices = svindex_u64(0, 1);
+    svsetffr();
+    let _ = svld1sw_gather_u64index_u64(svptrue_b32(), I32_DATA.as_ptr(), indices);
+    let loaded = svldff1sw_gather_u64index_u64(svptrue_b32(), I32_DATA.as_ptr(), indices);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1sh_gather_u32base_index_s32() {
+    let bases = svindex_u32(0, 2u32.try_into().unwrap());
+    svsetffr();
+    let _ = svld1sh_gather_u32base_index_s32(
+        svptrue_b16(),
+        bases,
+        I16_DATA.as_ptr() as i64 / (2u32 as i64) + 1,
+    );
+    let loaded = svldff1sh_gather_u32base_index_s32(
+        svptrue_b16(),
+        bases,
+        I16_DATA.as_ptr() as i64 / (2u32 as i64) + 1,
+    );
+    assert_vector_matches_i32(
+        loaded,
+        svindex_s32((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1sh_gather_u32base_index_u32() {
+    let bases = svindex_u32(0, 2u32.try_into().unwrap());
+    svsetffr();
+    let _ = svld1sh_gather_u32base_index_u32(
+        svptrue_b16(),
+        bases,
+        I16_DATA.as_ptr() as i64 / (2u32 as i64) + 1,
+    );
+    let loaded = svldff1sh_gather_u32base_index_u32(
+        svptrue_b16(),
+        bases,
+        I16_DATA.as_ptr() as i64 / (2u32 as i64) + 1,
+    );
+    assert_vector_matches_u32(
+        loaded,
+        svindex_u32((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1sh_gather_u64base_index_s64() {
+    let bases = svdup_n_u64(I16_DATA.as_ptr() as u64);
+    let offsets = svindex_u64(0, 2u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b16(), bases, offsets);
+    svsetffr();
+    let _ = svld1sh_gather_u64base_index_s64(svptrue_b16(), bases, 1.try_into().unwrap());
+    let loaded = svldff1sh_gather_u64base_index_s64(svptrue_b16(), bases, 1.try_into().unwrap());
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1sw_gather_u64base_index_s64() {
+    let bases = svdup_n_u64(I32_DATA.as_ptr() as u64);
+    let offsets = svindex_u64(0, 4u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b32(), bases, offsets);
+    svsetffr();
+    let _ = svld1sw_gather_u64base_index_s64(svptrue_b32(), bases, 1.try_into().unwrap());
+    let loaded = svldff1sw_gather_u64base_index_s64(svptrue_b32(), bases, 1.try_into().unwrap());
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1sh_gather_u64base_index_u64() {
+    let bases = svdup_n_u64(I16_DATA.as_ptr() as u64);
+    let offsets = svindex_u64(0, 2u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b16(), bases, offsets);
+    svsetffr();
+    let _ = svld1sh_gather_u64base_index_u64(svptrue_b16(), bases, 1.try_into().unwrap());
+    let loaded = svldff1sh_gather_u64base_index_u64(svptrue_b16(), bases, 1.try_into().unwrap());
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1sw_gather_u64base_index_u64() {
+    let bases = svdup_n_u64(I32_DATA.as_ptr() as u64);
+    let offsets = svindex_u64(0, 4u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b32(), bases, offsets);
+    svsetffr();
+    let _ = svld1sw_gather_u64base_index_u64(svptrue_b32(), bases, 1.try_into().unwrap());
+    let loaded = svldff1sw_gather_u64base_index_u64(svptrue_b32(), bases, 1.try_into().unwrap());
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1ub_gather_s32offset_s32() {
+    let offsets = svindex_s32(0, 1u32.try_into().unwrap());
+    svsetffr();
+    let _ = svld1ub_gather_s32offset_s32(svptrue_b8(), U8_DATA.as_ptr(), offsets);
+    let loaded = svldff1ub_gather_s32offset_s32(svptrue_b8(), U8_DATA.as_ptr(), offsets);
+    assert_vector_matches_i32(
+        loaded,
+        svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1uh_gather_s32offset_s32() {
+    let offsets = svindex_s32(0, 2u32.try_into().unwrap());
+    svsetffr();
+    let _ = svld1uh_gather_s32offset_s32(svptrue_b16(), U16_DATA.as_ptr(), offsets);
+    let loaded = svldff1uh_gather_s32offset_s32(svptrue_b16(), U16_DATA.as_ptr(), offsets);
+    assert_vector_matches_i32(
+        loaded,
+        svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1ub_gather_s32offset_u32() {
+    let offsets = svindex_s32(0, 1u32.try_into().unwrap());
+    svsetffr();
+    let _ = svld1ub_gather_s32offset_u32(svptrue_b8(), U8_DATA.as_ptr(), offsets);
+    let loaded = svldff1ub_gather_s32offset_u32(svptrue_b8(), U8_DATA.as_ptr(), offsets);
+    assert_vector_matches_u32(
+        loaded,
+        svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1uh_gather_s32offset_u32() {
+    let offsets = svindex_s32(0, 2u32.try_into().unwrap());
+    svsetffr();
+    let _ = svld1uh_gather_s32offset_u32(svptrue_b16(), U16_DATA.as_ptr(), offsets);
+    let loaded = svldff1uh_gather_s32offset_u32(svptrue_b16(), U16_DATA.as_ptr(), offsets);
+    assert_vector_matches_u32(
+        loaded,
+        svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1ub_gather_s64offset_s64() {
+    let offsets = svindex_s64(0, 1u32.try_into().unwrap());
+    svsetffr();
+    let _ = svld1ub_gather_s64offset_s64(svptrue_b8(), U8_DATA.as_ptr(), offsets);
+    let loaded = svldff1ub_gather_s64offset_s64(svptrue_b8(), U8_DATA.as_ptr(), offsets);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1uh_gather_s64offset_s64() {
+    let offsets = svindex_s64(0, 2u32.try_into().unwrap());
+    svsetffr();
+    let _ = svld1uh_gather_s64offset_s64(svptrue_b16(), U16_DATA.as_ptr(), offsets);
+    let loaded = svldff1uh_gather_s64offset_s64(svptrue_b16(), U16_DATA.as_ptr(), offsets);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1uw_gather_s64offset_s64() {
+    let offsets = svindex_s64(0, 4u32.try_into().unwrap());
+    svsetffr();
+    let _ = svld1uw_gather_s64offset_s64(svptrue_b32(), U32_DATA.as_ptr(), offsets);
+    let loaded = svldff1uw_gather_s64offset_s64(svptrue_b32(), U32_DATA.as_ptr(), offsets);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1ub_gather_s64offset_u64() {
+    let offsets = svindex_s64(0, 1u32.try_into().unwrap());
+    svsetffr();
+    let _ = svld1ub_gather_s64offset_u64(svptrue_b8(), U8_DATA.as_ptr(), offsets);
+    let loaded = svldff1ub_gather_s64offset_u64(svptrue_b8(), U8_DATA.as_ptr(), offsets);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1uh_gather_s64offset_u64() {
+    let offsets = svindex_s64(0, 2u32.try_into().unwrap());
+    svsetffr();
+    let _ = svld1uh_gather_s64offset_u64(svptrue_b16(), U16_DATA.as_ptr(), offsets);
+    let loaded = svldff1uh_gather_s64offset_u64(svptrue_b16(), U16_DATA.as_ptr(), offsets);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1uw_gather_s64offset_u64() {
+    let offsets = svindex_s64(0, 4u32.try_into().unwrap());
+    svsetffr();
+    let _ = svld1uw_gather_s64offset_u64(svptrue_b32(), U32_DATA.as_ptr(), offsets);
+    let loaded = svldff1uw_gather_s64offset_u64(svptrue_b32(), U32_DATA.as_ptr(), offsets);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1ub_gather_u32offset_s32() {
+    let offsets = svindex_u32(0, 1u32.try_into().unwrap());
+    svsetffr();
+    let _ = svld1ub_gather_u32offset_s32(svptrue_b8(), U8_DATA.as_ptr(), offsets);
+    let loaded = svldff1ub_gather_u32offset_s32(svptrue_b8(), U8_DATA.as_ptr(), offsets);
+    assert_vector_matches_i32(
+        loaded,
+        svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1uh_gather_u32offset_s32() {
+    let offsets = svindex_u32(0, 2u32.try_into().unwrap());
+    svsetffr();
+    let _ = svld1uh_gather_u32offset_s32(svptrue_b16(), U16_DATA.as_ptr(), offsets);
+    let loaded = svldff1uh_gather_u32offset_s32(svptrue_b16(), U16_DATA.as_ptr(), offsets);
+    assert_vector_matches_i32(
+        loaded,
+        svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1ub_gather_u32offset_u32() {
+    let offsets = svindex_u32(0, 1u32.try_into().unwrap());
+    svsetffr();
+    let _ = svld1ub_gather_u32offset_u32(svptrue_b8(), U8_DATA.as_ptr(), offsets);
+    let loaded = svldff1ub_gather_u32offset_u32(svptrue_b8(), U8_DATA.as_ptr(), offsets);
+    assert_vector_matches_u32(
+        loaded,
+        svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1uh_gather_u32offset_u32() {
+    let offsets = svindex_u32(0, 2u32.try_into().unwrap());
+    svsetffr();
+    let _ = svld1uh_gather_u32offset_u32(svptrue_b16(), U16_DATA.as_ptr(), offsets);
+    let loaded = svldff1uh_gather_u32offset_u32(svptrue_b16(), U16_DATA.as_ptr(), offsets);
+    assert_vector_matches_u32(
+        loaded,
+        svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1ub_gather_u64offset_s64() {
+    let offsets = svindex_u64(0, 1u32.try_into().unwrap());
+    svsetffr();
+    let _ = svld1ub_gather_u64offset_s64(svptrue_b8(), U8_DATA.as_ptr(), offsets);
+    let loaded = svldff1ub_gather_u64offset_s64(svptrue_b8(), U8_DATA.as_ptr(), offsets);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1uh_gather_u64offset_s64() {
+    let offsets = svindex_u64(0, 2u32.try_into().unwrap());
+    svsetffr();
+    let _ = svld1uh_gather_u64offset_s64(svptrue_b16(), U16_DATA.as_ptr(), offsets);
+    let loaded = svldff1uh_gather_u64offset_s64(svptrue_b16(), U16_DATA.as_ptr(), offsets);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1uw_gather_u64offset_s64() {
+    let offsets = svindex_u64(0, 4u32.try_into().unwrap());
+    svsetffr();
+    let _ = svld1uw_gather_u64offset_s64(svptrue_b32(), U32_DATA.as_ptr(), offsets);
+    let loaded = svldff1uw_gather_u64offset_s64(svptrue_b32(), U32_DATA.as_ptr(), offsets);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1ub_gather_u64offset_u64() {
+    let offsets = svindex_u64(0, 1u32.try_into().unwrap());
+    svsetffr();
+    let _ = svld1ub_gather_u64offset_u64(svptrue_b8(), U8_DATA.as_ptr(), offsets);
+    let loaded = svldff1ub_gather_u64offset_u64(svptrue_b8(), U8_DATA.as_ptr(), offsets);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1uh_gather_u64offset_u64() {
+    let offsets = svindex_u64(0, 2u32.try_into().unwrap());
+    svsetffr();
+    let _ = svld1uh_gather_u64offset_u64(svptrue_b16(), U16_DATA.as_ptr(), offsets);
+    let loaded = svldff1uh_gather_u64offset_u64(svptrue_b16(), U16_DATA.as_ptr(), offsets);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1uw_gather_u64offset_u64() {
+    let offsets = svindex_u64(0, 4u32.try_into().unwrap());
+    svsetffr();
+    let _ = svld1uw_gather_u64offset_u64(svptrue_b32(), U32_DATA.as_ptr(), offsets);
+    let loaded = svldff1uw_gather_u64offset_u64(svptrue_b32(), U32_DATA.as_ptr(), offsets);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1ub_gather_u32base_offset_s32() {
+    let bases = svindex_u32(0, 1u32.try_into().unwrap());
+    svsetffr();
+    let _ = svld1ub_gather_u32base_offset_s32(
+        svptrue_b8(),
+        bases,
+        U8_DATA.as_ptr() as i64 + 1u32 as i64,
+    );
+    let loaded = svldff1ub_gather_u32base_offset_s32(
+        svptrue_b8(),
+        bases,
+        U8_DATA.as_ptr() as i64 + 1u32 as i64,
+    );
+    assert_vector_matches_i32(
+        loaded,
+        svindex_s32((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1uh_gather_u32base_offset_s32() {
+    let bases = svindex_u32(0, 2u32.try_into().unwrap());
+    svsetffr();
+    let _ = svld1uh_gather_u32base_offset_s32(
+        svptrue_b16(),
+        bases,
+        U16_DATA.as_ptr() as i64 + 2u32 as i64,
+    );
+    let loaded = svldff1uh_gather_u32base_offset_s32(
+        svptrue_b16(),
+        bases,
+        U16_DATA.as_ptr() as i64 + 2u32 as i64,
+    );
+    assert_vector_matches_i32(
+        loaded,
+        svindex_s32((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1ub_gather_u32base_offset_u32() {
+    let bases = svindex_u32(0, 1u32.try_into().unwrap());
+    svsetffr();
+    let _ = svld1ub_gather_u32base_offset_u32(
+        svptrue_b8(),
+        bases,
+        U8_DATA.as_ptr() as i64 + 1u32 as i64,
+    );
+    let loaded = svldff1ub_gather_u32base_offset_u32(
+        svptrue_b8(),
+        bases,
+        U8_DATA.as_ptr() as i64 + 1u32 as i64,
+    );
+    assert_vector_matches_u32(
+        loaded,
+        svindex_u32((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1uh_gather_u32base_offset_u32() {
+    let bases = svindex_u32(0, 2u32.try_into().unwrap());
+    svsetffr();
+    let _ = svld1uh_gather_u32base_offset_u32(
+        svptrue_b16(),
+        bases,
+        U16_DATA.as_ptr() as i64 + 2u32 as i64,
+    );
+    let loaded = svldff1uh_gather_u32base_offset_u32(
+        svptrue_b16(),
+        bases,
+        U16_DATA.as_ptr() as i64 + 2u32 as i64,
+    );
+    assert_vector_matches_u32(
+        loaded,
+        svindex_u32((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1ub_gather_u64base_offset_s64() {
+    let bases = svdup_n_u64(U8_DATA.as_ptr() as u64);
+    let offsets = svindex_u64(0, 1u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b8(), bases, offsets);
+    svsetffr();
+    let _ = svld1ub_gather_u64base_offset_s64(svptrue_b8(), bases, 1u32.try_into().unwrap());
+    let loaded = svldff1ub_gather_u64base_offset_s64(svptrue_b8(), bases, 1u32.try_into().unwrap());
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1uh_gather_u64base_offset_s64() {
+    let bases = svdup_n_u64(U16_DATA.as_ptr() as u64);
+    let offsets = svindex_u64(0, 2u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b16(), bases, offsets);
+    svsetffr();
+    let _ = svld1uh_gather_u64base_offset_s64(svptrue_b16(), bases, 2u32.try_into().unwrap());
+    let loaded =
+        svldff1uh_gather_u64base_offset_s64(svptrue_b16(), bases, 2u32.try_into().unwrap());
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1uw_gather_u64base_offset_s64() {
+    let bases = svdup_n_u64(U32_DATA.as_ptr() as u64);
+    let offsets = svindex_u64(0, 4u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b32(), bases, offsets);
+    svsetffr();
+    let _ = svld1uw_gather_u64base_offset_s64(svptrue_b32(), bases, 4u32.try_into().unwrap());
+    let loaded =
+        svldff1uw_gather_u64base_offset_s64(svptrue_b32(), bases, 4u32.try_into().unwrap());
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1ub_gather_u64base_offset_u64() {
+    let bases = svdup_n_u64(U8_DATA.as_ptr() as u64);
+    let offsets = svindex_u64(0, 1u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b8(), bases, offsets);
+    svsetffr();
+    let _ = svld1ub_gather_u64base_offset_u64(svptrue_b8(), bases, 1u32.try_into().unwrap());
+    let loaded = svldff1ub_gather_u64base_offset_u64(svptrue_b8(), bases, 1u32.try_into().unwrap());
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1uh_gather_u64base_offset_u64() {
+    let bases = svdup_n_u64(U16_DATA.as_ptr() as u64);
+    let offsets = svindex_u64(0, 2u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b16(), bases, offsets);
+    svsetffr();
+    let _ = svld1uh_gather_u64base_offset_u64(svptrue_b16(), bases, 2u32.try_into().unwrap());
+    let loaded =
+        svldff1uh_gather_u64base_offset_u64(svptrue_b16(), bases, 2u32.try_into().unwrap());
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1uw_gather_u64base_offset_u64() {
+    let bases = svdup_n_u64(U32_DATA.as_ptr() as u64);
+    let offsets = svindex_u64(0, 4u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b32(), bases, offsets);
+    svsetffr();
+    let _ = svld1uw_gather_u64base_offset_u64(svptrue_b32(), bases, 4u32.try_into().unwrap());
+    let loaded =
+        svldff1uw_gather_u64base_offset_u64(svptrue_b32(), bases, 4u32.try_into().unwrap());
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1ub_gather_u64base_s64() {
+    let bases = svdup_n_u64(U8_DATA.as_ptr() as u64);
+    let offsets = svindex_u64(0, 1u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b8(), bases, offsets);
+    svsetffr();
+    let _ = svld1ub_gather_u64base_s64(svptrue_b8(), bases);
+    let loaded = svldff1ub_gather_u64base_s64(svptrue_b8(), bases);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1uh_gather_u64base_s64() {
+    let bases = svdup_n_u64(U16_DATA.as_ptr() as u64);
+    let offsets = svindex_u64(0, 2u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b16(), bases, offsets);
+    svsetffr();
+    let _ = svld1uh_gather_u64base_s64(svptrue_b16(), bases);
+    let loaded = svldff1uh_gather_u64base_s64(svptrue_b16(), bases);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1uw_gather_u64base_s64() {
+    let bases = svdup_n_u64(U32_DATA.as_ptr() as u64);
+    let offsets = svindex_u64(0, 4u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b32(), bases, offsets);
+    svsetffr();
+    let _ = svld1uw_gather_u64base_s64(svptrue_b32(), bases);
+    let loaded = svldff1uw_gather_u64base_s64(svptrue_b32(), bases);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1ub_gather_u64base_u64() {
+    let bases = svdup_n_u64(U8_DATA.as_ptr() as u64);
+    let offsets = svindex_u64(0, 1u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b8(), bases, offsets);
+    svsetffr();
+    let _ = svld1ub_gather_u64base_u64(svptrue_b8(), bases);
+    let loaded = svldff1ub_gather_u64base_u64(svptrue_b8(), bases);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1uh_gather_u64base_u64() {
+    let bases = svdup_n_u64(U16_DATA.as_ptr() as u64);
+    let offsets = svindex_u64(0, 2u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b16(), bases, offsets);
+    svsetffr();
+    let _ = svld1uh_gather_u64base_u64(svptrue_b16(), bases);
+    let loaded = svldff1uh_gather_u64base_u64(svptrue_b16(), bases);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1uw_gather_u64base_u64() {
+    let bases = svdup_n_u64(U32_DATA.as_ptr() as u64);
+    let offsets = svindex_u64(0, 4u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b32(), bases, offsets);
+    svsetffr();
+    let _ = svld1uw_gather_u64base_u64(svptrue_b32(), bases);
+    let loaded = svldff1uw_gather_u64base_u64(svptrue_b32(), bases);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1ub_s16() {
+    svsetffr();
+    let _ = svld1ub_s16(svptrue_b8(), U8_DATA.as_ptr());
+    let loaded = svldff1ub_s16(svptrue_b8(), U8_DATA.as_ptr());
+    assert_vector_matches_i16(
+        loaded,
+        svindex_s16((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1ub_s32() {
+    svsetffr();
+    let _ = svld1ub_s32(svptrue_b8(), U8_DATA.as_ptr());
+    let loaded = svldff1ub_s32(svptrue_b8(), U8_DATA.as_ptr());
+    assert_vector_matches_i32(
+        loaded,
+        svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1uh_s32() {
+    svsetffr();
+    let _ = svld1uh_s32(svptrue_b16(), U16_DATA.as_ptr());
+    let loaded = svldff1uh_s32(svptrue_b16(), U16_DATA.as_ptr());
+    assert_vector_matches_i32(
+        loaded,
+        svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1ub_s64() {
+    svsetffr();
+    let _ = svld1ub_s64(svptrue_b8(), U8_DATA.as_ptr());
+    let loaded = svldff1ub_s64(svptrue_b8(), U8_DATA.as_ptr());
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1uh_s64() {
+    svsetffr();
+    let _ = svld1uh_s64(svptrue_b16(), U16_DATA.as_ptr());
+    let loaded = svldff1uh_s64(svptrue_b16(), U16_DATA.as_ptr());
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1uw_s64() {
+    svsetffr();
+    let _ = svld1uw_s64(svptrue_b32(), U32_DATA.as_ptr());
+    let loaded = svldff1uw_s64(svptrue_b32(), U32_DATA.as_ptr());
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1ub_u16() {
+    svsetffr();
+    let _ = svld1ub_u16(svptrue_b8(), U8_DATA.as_ptr());
+    let loaded = svldff1ub_u16(svptrue_b8(), U8_DATA.as_ptr());
+    assert_vector_matches_u16(
+        loaded,
+        svindex_u16((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1ub_u32() {
+    svsetffr();
+    let _ = svld1ub_u32(svptrue_b8(), U8_DATA.as_ptr());
+    let loaded = svldff1ub_u32(svptrue_b8(), U8_DATA.as_ptr());
+    assert_vector_matches_u32(
+        loaded,
+        svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1uh_u32() {
+    svsetffr();
+    let _ = svld1uh_u32(svptrue_b16(), U16_DATA.as_ptr());
+    let loaded = svldff1uh_u32(svptrue_b16(), U16_DATA.as_ptr());
+    assert_vector_matches_u32(
+        loaded,
+        svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1ub_u64() {
+    svsetffr();
+    let _ = svld1ub_u64(svptrue_b8(), U8_DATA.as_ptr());
+    let loaded = svldff1ub_u64(svptrue_b8(), U8_DATA.as_ptr());
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1uh_u64() {
+    svsetffr();
+    let _ = svld1uh_u64(svptrue_b16(), U16_DATA.as_ptr());
+    let loaded = svldff1uh_u64(svptrue_b16(), U16_DATA.as_ptr());
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1uw_u64() {
+    svsetffr();
+    let _ = svld1uw_u64(svptrue_b32(), U32_DATA.as_ptr());
+    let loaded = svldff1uw_u64(svptrue_b32(), U32_DATA.as_ptr());
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1ub_vnum_s16() {
+    svsetffr();
+    let _ = svld1ub_vnum_s16(svptrue_b8(), U8_DATA.as_ptr(), 1);
+    let loaded = svldff1ub_vnum_s16(svptrue_b8(), U8_DATA.as_ptr(), 1);
+    let len = svcnth() as usize;
+    assert_vector_matches_i16(
+        loaded,
+        svindex_s16(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1ub_vnum_s32() {
+    svsetffr();
+    let _ = svld1ub_vnum_s32(svptrue_b8(), U8_DATA.as_ptr(), 1);
+    let loaded = svldff1ub_vnum_s32(svptrue_b8(), U8_DATA.as_ptr(), 1);
+    let len = svcntw() as usize;
+    assert_vector_matches_i32(
+        loaded,
+        svindex_s32(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1uh_vnum_s32() {
+    svsetffr();
+    let _ = svld1uh_vnum_s32(svptrue_b16(), U16_DATA.as_ptr(), 1);
+    let loaded = svldff1uh_vnum_s32(svptrue_b16(), U16_DATA.as_ptr(), 1);
+    let len = svcntw() as usize;
+    assert_vector_matches_i32(
+        loaded,
+        svindex_s32(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1ub_vnum_s64() {
+    svsetffr();
+    let _ = svld1ub_vnum_s64(svptrue_b8(), U8_DATA.as_ptr(), 1);
+    let loaded = svldff1ub_vnum_s64(svptrue_b8(), U8_DATA.as_ptr(), 1);
+    let len = svcntd() as usize;
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1uh_vnum_s64() {
+    svsetffr();
+    let _ = svld1uh_vnum_s64(svptrue_b16(), U16_DATA.as_ptr(), 1);
+    let loaded = svldff1uh_vnum_s64(svptrue_b16(), U16_DATA.as_ptr(), 1);
+    let len = svcntd() as usize;
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1uw_vnum_s64() {
+    svsetffr();
+    let _ = svld1uw_vnum_s64(svptrue_b32(), U32_DATA.as_ptr(), 1);
+    let loaded = svldff1uw_vnum_s64(svptrue_b32(), U32_DATA.as_ptr(), 1);
+    let len = svcntd() as usize;
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1ub_vnum_u16() {
+    svsetffr();
+    let _ = svld1ub_vnum_u16(svptrue_b8(), U8_DATA.as_ptr(), 1);
+    let loaded = svldff1ub_vnum_u16(svptrue_b8(), U8_DATA.as_ptr(), 1);
+    let len = svcnth() as usize;
+    assert_vector_matches_u16(
+        loaded,
+        svindex_u16(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1ub_vnum_u32() {
+    svsetffr();
+    let _ = svld1ub_vnum_u32(svptrue_b8(), U8_DATA.as_ptr(), 1);
+    let loaded = svldff1ub_vnum_u32(svptrue_b8(), U8_DATA.as_ptr(), 1);
+    let len = svcntw() as usize;
+    assert_vector_matches_u32(
+        loaded,
+        svindex_u32(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1uh_vnum_u32() {
+    svsetffr();
+    let _ = svld1uh_vnum_u32(svptrue_b16(), U16_DATA.as_ptr(), 1);
+    let loaded = svldff1uh_vnum_u32(svptrue_b16(), U16_DATA.as_ptr(), 1);
+    let len = svcntw() as usize;
+    assert_vector_matches_u32(
+        loaded,
+        svindex_u32(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1ub_vnum_u64() {
+    svsetffr();
+    let _ = svld1ub_vnum_u64(svptrue_b8(), U8_DATA.as_ptr(), 1);
+    let loaded = svldff1ub_vnum_u64(svptrue_b8(), U8_DATA.as_ptr(), 1);
+    let len = svcntd() as usize;
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1uh_vnum_u64() {
+    svsetffr();
+    let _ = svld1uh_vnum_u64(svptrue_b16(), U16_DATA.as_ptr(), 1);
+    let loaded = svldff1uh_vnum_u64(svptrue_b16(), U16_DATA.as_ptr(), 1);
+    let len = svcntd() as usize;
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1uw_vnum_u64() {
+    svsetffr();
+    let _ = svld1uw_vnum_u64(svptrue_b32(), U32_DATA.as_ptr(), 1);
+    let loaded = svldff1uw_vnum_u64(svptrue_b32(), U32_DATA.as_ptr(), 1);
+    let len = svcntd() as usize;
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1uh_gather_s32index_s32() {
+    let indices = svindex_s32(0, 1);
+    svsetffr();
+    let _ = svld1uh_gather_s32index_s32(svptrue_b16(), U16_DATA.as_ptr(), indices);
+    let loaded = svldff1uh_gather_s32index_s32(svptrue_b16(), U16_DATA.as_ptr(), indices);
+    assert_vector_matches_i32(
+        loaded,
+        svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1uh_gather_s32index_u32() {
+    let indices = svindex_s32(0, 1);
+    svsetffr();
+    let _ = svld1uh_gather_s32index_u32(svptrue_b16(), U16_DATA.as_ptr(), indices);
+    let loaded = svldff1uh_gather_s32index_u32(svptrue_b16(), U16_DATA.as_ptr(), indices);
+    assert_vector_matches_u32(
+        loaded,
+        svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1uh_gather_s64index_s64() {
+    let indices = svindex_s64(0, 1);
+    svsetffr();
+    let _ = svld1uh_gather_s64index_s64(svptrue_b16(), U16_DATA.as_ptr(), indices);
+    let loaded = svldff1uh_gather_s64index_s64(svptrue_b16(), U16_DATA.as_ptr(), indices);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1uw_gather_s64index_s64() {
+    let indices = svindex_s64(0, 1);
+    svsetffr();
+    let _ = svld1uw_gather_s64index_s64(svptrue_b32(), U32_DATA.as_ptr(), indices);
+    let loaded = svldff1uw_gather_s64index_s64(svptrue_b32(), U32_DATA.as_ptr(), indices);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1uh_gather_s64index_u64() {
+    let indices = svindex_s64(0, 1);
+    svsetffr();
+    let _ = svld1uh_gather_s64index_u64(svptrue_b16(), U16_DATA.as_ptr(), indices);
+    let loaded = svldff1uh_gather_s64index_u64(svptrue_b16(), U16_DATA.as_ptr(), indices);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1uw_gather_s64index_u64() {
+    let indices = svindex_s64(0, 1);
+    svsetffr();
+    let _ = svld1uw_gather_s64index_u64(svptrue_b32(), U32_DATA.as_ptr(), indices);
+    let loaded = svldff1uw_gather_s64index_u64(svptrue_b32(), U32_DATA.as_ptr(), indices);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1uh_gather_u32index_s32() {
+    let indices = svindex_u32(0, 1);
+    svsetffr();
+    let _ = svld1uh_gather_u32index_s32(svptrue_b16(), U16_DATA.as_ptr(), indices);
+    let loaded = svldff1uh_gather_u32index_s32(svptrue_b16(), U16_DATA.as_ptr(), indices);
+    assert_vector_matches_i32(
+        loaded,
+        svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1uh_gather_u32index_u32() {
+    let indices = svindex_u32(0, 1);
+    svsetffr();
+    let _ = svld1uh_gather_u32index_u32(svptrue_b16(), U16_DATA.as_ptr(), indices);
+    let loaded = svldff1uh_gather_u32index_u32(svptrue_b16(), U16_DATA.as_ptr(), indices);
+    assert_vector_matches_u32(
+        loaded,
+        svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1uh_gather_u64index_s64() {
+    let indices = svindex_u64(0, 1);
+    svsetffr();
+    let _ = svld1uh_gather_u64index_s64(svptrue_b16(), U16_DATA.as_ptr(), indices);
+    let loaded = svldff1uh_gather_u64index_s64(svptrue_b16(), U16_DATA.as_ptr(), indices);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1uw_gather_u64index_s64() {
+    let indices = svindex_u64(0, 1);
+    svsetffr();
+    let _ = svld1uw_gather_u64index_s64(svptrue_b32(), U32_DATA.as_ptr(), indices);
+    let loaded = svldff1uw_gather_u64index_s64(svptrue_b32(), U32_DATA.as_ptr(), indices);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1uh_gather_u64index_u64() {
+    let indices = svindex_u64(0, 1);
+    svsetffr();
+    let _ = svld1uh_gather_u64index_u64(svptrue_b16(), U16_DATA.as_ptr(), indices);
+    let loaded = svldff1uh_gather_u64index_u64(svptrue_b16(), U16_DATA.as_ptr(), indices);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1uw_gather_u64index_u64() {
+    let indices = svindex_u64(0, 1);
+    svsetffr();
+    let _ = svld1uw_gather_u64index_u64(svptrue_b32(), U32_DATA.as_ptr(), indices);
+    let loaded = svldff1uw_gather_u64index_u64(svptrue_b32(), U32_DATA.as_ptr(), indices);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1uh_gather_u32base_index_s32() {
+    let bases = svindex_u32(0, 2u32.try_into().unwrap());
+    svsetffr();
+    let _ = svld1uh_gather_u32base_index_s32(
+        svptrue_b16(),
+        bases,
+        U16_DATA.as_ptr() as i64 / (2u32 as i64) + 1,
+    );
+    let loaded = svldff1uh_gather_u32base_index_s32(
+        svptrue_b16(),
+        bases,
+        U16_DATA.as_ptr() as i64 / (2u32 as i64) + 1,
+    );
+    assert_vector_matches_i32(
+        loaded,
+        svindex_s32((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1uh_gather_u32base_index_u32() {
+    let bases = svindex_u32(0, 2u32.try_into().unwrap());
+    svsetffr();
+    let _ = svld1uh_gather_u32base_index_u32(
+        svptrue_b16(),
+        bases,
+        U16_DATA.as_ptr() as i64 / (2u32 as i64) + 1,
+    );
+    let loaded = svldff1uh_gather_u32base_index_u32(
+        svptrue_b16(),
+        bases,
+        U16_DATA.as_ptr() as i64 / (2u32 as i64) + 1,
+    );
+    assert_vector_matches_u32(
+        loaded,
+        svindex_u32((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1uh_gather_u64base_index_s64() {
+    let bases = svdup_n_u64(U16_DATA.as_ptr() as u64);
+    let offsets = svindex_u64(0, 2u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b16(), bases, offsets);
+    svsetffr();
+    let _ = svld1uh_gather_u64base_index_s64(svptrue_b16(), bases, 1.try_into().unwrap());
+    let loaded = svldff1uh_gather_u64base_index_s64(svptrue_b16(), bases, 1.try_into().unwrap());
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1uw_gather_u64base_index_s64() {
+    let bases = svdup_n_u64(U32_DATA.as_ptr() as u64);
+    let offsets = svindex_u64(0, 4u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b32(), bases, offsets);
+    svsetffr();
+    let _ = svld1uw_gather_u64base_index_s64(svptrue_b32(), bases, 1.try_into().unwrap());
+    let loaded = svldff1uw_gather_u64base_index_s64(svptrue_b32(), bases, 1.try_into().unwrap());
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1uh_gather_u64base_index_u64() {
+    let bases = svdup_n_u64(U16_DATA.as_ptr() as u64);
+    let offsets = svindex_u64(0, 2u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b16(), bases, offsets);
+    svsetffr();
+    let _ = svld1uh_gather_u64base_index_u64(svptrue_b16(), bases, 1.try_into().unwrap());
+    let loaded = svldff1uh_gather_u64base_index_u64(svptrue_b16(), bases, 1.try_into().unwrap());
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldff1uw_gather_u64base_index_u64() {
+    let bases = svdup_n_u64(U32_DATA.as_ptr() as u64);
+    let offsets = svindex_u64(0, 4u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b32(), bases, offsets);
+    svsetffr();
+    let _ = svld1uw_gather_u64base_index_u64(svptrue_b32(), bases, 1.try_into().unwrap());
+    let loaded = svldff1uw_gather_u64base_index_u64(svptrue_b32(), bases, 1.try_into().unwrap());
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldnf1_f32() {
+    svsetffr();
+    let _ = svld1_f32(svptrue_b32(), F32_DATA.as_ptr());
+    let loaded = svldnf1_f32(svptrue_b32(), F32_DATA.as_ptr());
+    assert_vector_matches_f32(
+        loaded,
+        svcvt_f32_s32_x(
+            svptrue_b32(),
+            svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldnf1_f64() {
+    svsetffr();
+    let _ = svld1_f64(svptrue_b64(), F64_DATA.as_ptr());
+    let loaded = svldnf1_f64(svptrue_b64(), F64_DATA.as_ptr());
+    assert_vector_matches_f64(
+        loaded,
+        svcvt_f64_s64_x(
+            svptrue_b64(),
+            svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldnf1_s8() {
+    svsetffr();
+    let _ = svld1_s8(svptrue_b8(), I8_DATA.as_ptr());
+    let loaded = svldnf1_s8(svptrue_b8(), I8_DATA.as_ptr());
+    assert_vector_matches_i8(
+        loaded,
+        svindex_s8((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldnf1_s16() {
+    svsetffr();
+    let _ = svld1_s16(svptrue_b16(), I16_DATA.as_ptr());
+    let loaded = svldnf1_s16(svptrue_b16(), I16_DATA.as_ptr());
+    assert_vector_matches_i16(
+        loaded,
+        svindex_s16((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldnf1_s32() {
+    svsetffr();
+    let _ = svld1_s32(svptrue_b32(), I32_DATA.as_ptr());
+    let loaded = svldnf1_s32(svptrue_b32(), I32_DATA.as_ptr());
+    assert_vector_matches_i32(
+        loaded,
+        svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldnf1_s64() {
+    svsetffr();
+    let _ = svld1_s64(svptrue_b64(), I64_DATA.as_ptr());
+    let loaded = svldnf1_s64(svptrue_b64(), I64_DATA.as_ptr());
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldnf1_u8() {
+    svsetffr();
+    let _ = svld1_u8(svptrue_b8(), U8_DATA.as_ptr());
+    let loaded = svldnf1_u8(svptrue_b8(), U8_DATA.as_ptr());
+    assert_vector_matches_u8(
+        loaded,
+        svindex_u8((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldnf1_u16() {
+    svsetffr();
+    let _ = svld1_u16(svptrue_b16(), U16_DATA.as_ptr());
+    let loaded = svldnf1_u16(svptrue_b16(), U16_DATA.as_ptr());
+    assert_vector_matches_u16(
+        loaded,
+        svindex_u16((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldnf1_u32() {
+    svsetffr();
+    let _ = svld1_u32(svptrue_b32(), U32_DATA.as_ptr());
+    let loaded = svldnf1_u32(svptrue_b32(), U32_DATA.as_ptr());
+    assert_vector_matches_u32(
+        loaded,
+        svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldnf1_u64() {
+    svsetffr();
+    let _ = svld1_u64(svptrue_b64(), U64_DATA.as_ptr());
+    let loaded = svldnf1_u64(svptrue_b64(), U64_DATA.as_ptr());
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldnf1_vnum_f32() {
+    svsetffr();
+    let _ = svld1_vnum_f32(svptrue_b32(), F32_DATA.as_ptr(), 1);
+    let loaded = svldnf1_vnum_f32(svptrue_b32(), F32_DATA.as_ptr(), 1);
+    let len = svcntw() as usize;
+    assert_vector_matches_f32(
+        loaded,
+        svcvt_f32_s32_x(
+            svptrue_b32(),
+            svindex_s32(
+                (len + 0usize).try_into().unwrap(),
+                1usize.try_into().unwrap(),
+            ),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldnf1_vnum_f64() {
+    svsetffr();
+    let _ = svld1_vnum_f64(svptrue_b64(), F64_DATA.as_ptr(), 1);
+    let loaded = svldnf1_vnum_f64(svptrue_b64(), F64_DATA.as_ptr(), 1);
+    let len = svcntd() as usize;
+    assert_vector_matches_f64(
+        loaded,
+        svcvt_f64_s64_x(
+            svptrue_b64(),
+            svindex_s64(
+                (len + 0usize).try_into().unwrap(),
+                1usize.try_into().unwrap(),
+            ),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldnf1_vnum_s8() {
+    svsetffr();
+    let _ = svld1_vnum_s8(svptrue_b8(), I8_DATA.as_ptr(), 1);
+    let loaded = svldnf1_vnum_s8(svptrue_b8(), I8_DATA.as_ptr(), 1);
+    let len = svcntb() as usize;
+    assert_vector_matches_i8(
+        loaded,
+        svindex_s8(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldnf1_vnum_s16() {
+    svsetffr();
+    let _ = svld1_vnum_s16(svptrue_b16(), I16_DATA.as_ptr(), 1);
+    let loaded = svldnf1_vnum_s16(svptrue_b16(), I16_DATA.as_ptr(), 1);
+    let len = svcnth() as usize;
+    assert_vector_matches_i16(
+        loaded,
+        svindex_s16(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldnf1_vnum_s32() {
+    svsetffr();
+    let _ = svld1_vnum_s32(svptrue_b32(), I32_DATA.as_ptr(), 1);
+    let loaded = svldnf1_vnum_s32(svptrue_b32(), I32_DATA.as_ptr(), 1);
+    let len = svcntw() as usize;
+    assert_vector_matches_i32(
+        loaded,
+        svindex_s32(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldnf1_vnum_s64() {
+    svsetffr();
+    let _ = svld1_vnum_s64(svptrue_b64(), I64_DATA.as_ptr(), 1);
+    let loaded = svldnf1_vnum_s64(svptrue_b64(), I64_DATA.as_ptr(), 1);
+    let len = svcntd() as usize;
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldnf1_vnum_u8() {
+    svsetffr();
+    let _ = svld1_vnum_u8(svptrue_b8(), U8_DATA.as_ptr(), 1);
+    let loaded = svldnf1_vnum_u8(svptrue_b8(), U8_DATA.as_ptr(), 1);
+    let len = svcntb() as usize;
+    assert_vector_matches_u8(
+        loaded,
+        svindex_u8(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldnf1_vnum_u16() {
+    svsetffr();
+    let _ = svld1_vnum_u16(svptrue_b16(), U16_DATA.as_ptr(), 1);
+    let loaded = svldnf1_vnum_u16(svptrue_b16(), U16_DATA.as_ptr(), 1);
+    let len = svcnth() as usize;
+    assert_vector_matches_u16(
+        loaded,
+        svindex_u16(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldnf1_vnum_u32() {
+    svsetffr();
+    let _ = svld1_vnum_u32(svptrue_b32(), U32_DATA.as_ptr(), 1);
+    let loaded = svldnf1_vnum_u32(svptrue_b32(), U32_DATA.as_ptr(), 1);
+    let len = svcntw() as usize;
+    assert_vector_matches_u32(
+        loaded,
+        svindex_u32(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldnf1_vnum_u64() {
+    svsetffr();
+    let _ = svld1_vnum_u64(svptrue_b64(), U64_DATA.as_ptr(), 1);
+    let loaded = svldnf1_vnum_u64(svptrue_b64(), U64_DATA.as_ptr(), 1);
+    let len = svcntd() as usize;
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldnf1sb_s16() {
+    svsetffr();
+    let _ = svld1sb_s16(svptrue_b8(), I8_DATA.as_ptr());
+    let loaded = svldnf1sb_s16(svptrue_b8(), I8_DATA.as_ptr());
+    assert_vector_matches_i16(
+        loaded,
+        svindex_s16((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldnf1sb_s32() {
+    svsetffr();
+    let _ = svld1sb_s32(svptrue_b8(), I8_DATA.as_ptr());
+    let loaded = svldnf1sb_s32(svptrue_b8(), I8_DATA.as_ptr());
+    assert_vector_matches_i32(
+        loaded,
+        svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldnf1sh_s32() {
+    svsetffr();
+    let _ = svld1sh_s32(svptrue_b16(), I16_DATA.as_ptr());
+    let loaded = svldnf1sh_s32(svptrue_b16(), I16_DATA.as_ptr());
+    assert_vector_matches_i32(
+        loaded,
+        svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldnf1sb_s64() {
+    svsetffr();
+    let _ = svld1sb_s64(svptrue_b8(), I8_DATA.as_ptr());
+    let loaded = svldnf1sb_s64(svptrue_b8(), I8_DATA.as_ptr());
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldnf1sh_s64() {
+    svsetffr();
+    let _ = svld1sh_s64(svptrue_b16(), I16_DATA.as_ptr());
+    let loaded = svldnf1sh_s64(svptrue_b16(), I16_DATA.as_ptr());
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldnf1sw_s64() {
+    svsetffr();
+    let _ = svld1sw_s64(svptrue_b32(), I32_DATA.as_ptr());
+    let loaded = svldnf1sw_s64(svptrue_b32(), I32_DATA.as_ptr());
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldnf1sb_u16() {
+    svsetffr();
+    let _ = svld1sb_u16(svptrue_b8(), I8_DATA.as_ptr());
+    let loaded = svldnf1sb_u16(svptrue_b8(), I8_DATA.as_ptr());
+    assert_vector_matches_u16(
+        loaded,
+        svindex_u16((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldnf1sb_u32() {
+    svsetffr();
+    let _ = svld1sb_u32(svptrue_b8(), I8_DATA.as_ptr());
+    let loaded = svldnf1sb_u32(svptrue_b8(), I8_DATA.as_ptr());
+    assert_vector_matches_u32(
+        loaded,
+        svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldnf1sh_u32() {
+    svsetffr();
+    let _ = svld1sh_u32(svptrue_b16(), I16_DATA.as_ptr());
+    let loaded = svldnf1sh_u32(svptrue_b16(), I16_DATA.as_ptr());
+    assert_vector_matches_u32(
+        loaded,
+        svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldnf1sb_u64() {
+    svsetffr();
+    let _ = svld1sb_u64(svptrue_b8(), I8_DATA.as_ptr());
+    let loaded = svldnf1sb_u64(svptrue_b8(), I8_DATA.as_ptr());
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldnf1sh_u64() {
+    svsetffr();
+    let _ = svld1sh_u64(svptrue_b16(), I16_DATA.as_ptr());
+    let loaded = svldnf1sh_u64(svptrue_b16(), I16_DATA.as_ptr());
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldnf1sw_u64() {
+    svsetffr();
+    let _ = svld1sw_u64(svptrue_b32(), I32_DATA.as_ptr());
+    let loaded = svldnf1sw_u64(svptrue_b32(), I32_DATA.as_ptr());
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldnf1sb_vnum_s16() {
+    svsetffr();
+    let _ = svld1sb_vnum_s16(svptrue_b8(), I8_DATA.as_ptr(), 1);
+    let loaded = svldnf1sb_vnum_s16(svptrue_b8(), I8_DATA.as_ptr(), 1);
+    let len = svcnth() as usize;
+    assert_vector_matches_i16(
+        loaded,
+        svindex_s16(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldnf1sb_vnum_s32() {
+    svsetffr();
+    let _ = svld1sb_vnum_s32(svptrue_b8(), I8_DATA.as_ptr(), 1);
+    let loaded = svldnf1sb_vnum_s32(svptrue_b8(), I8_DATA.as_ptr(), 1);
+    let len = svcntw() as usize;
+    assert_vector_matches_i32(
+        loaded,
+        svindex_s32(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldnf1sh_vnum_s32() {
+    svsetffr();
+    let _ = svld1sh_vnum_s32(svptrue_b16(), I16_DATA.as_ptr(), 1);
+    let loaded = svldnf1sh_vnum_s32(svptrue_b16(), I16_DATA.as_ptr(), 1);
+    let len = svcntw() as usize;
+    assert_vector_matches_i32(
+        loaded,
+        svindex_s32(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldnf1sb_vnum_s64() {
+    svsetffr();
+    let _ = svld1sb_vnum_s64(svptrue_b8(), I8_DATA.as_ptr(), 1);
+    let loaded = svldnf1sb_vnum_s64(svptrue_b8(), I8_DATA.as_ptr(), 1);
+    let len = svcntd() as usize;
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldnf1sh_vnum_s64() {
+    svsetffr();
+    let _ = svld1sh_vnum_s64(svptrue_b16(), I16_DATA.as_ptr(), 1);
+    let loaded = svldnf1sh_vnum_s64(svptrue_b16(), I16_DATA.as_ptr(), 1);
+    let len = svcntd() as usize;
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldnf1sw_vnum_s64() {
+    svsetffr();
+    let _ = svld1sw_vnum_s64(svptrue_b32(), I32_DATA.as_ptr(), 1);
+    let loaded = svldnf1sw_vnum_s64(svptrue_b32(), I32_DATA.as_ptr(), 1);
+    let len = svcntd() as usize;
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldnf1sb_vnum_u16() {
+    svsetffr();
+    let _ = svld1sb_vnum_u16(svptrue_b8(), I8_DATA.as_ptr(), 1);
+    let loaded = svldnf1sb_vnum_u16(svptrue_b8(), I8_DATA.as_ptr(), 1);
+    let len = svcnth() as usize;
+    assert_vector_matches_u16(
+        loaded,
+        svindex_u16(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldnf1sb_vnum_u32() {
+    svsetffr();
+    let _ = svld1sb_vnum_u32(svptrue_b8(), I8_DATA.as_ptr(), 1);
+    let loaded = svldnf1sb_vnum_u32(svptrue_b8(), I8_DATA.as_ptr(), 1);
+    let len = svcntw() as usize;
+    assert_vector_matches_u32(
+        loaded,
+        svindex_u32(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldnf1sh_vnum_u32() {
+    svsetffr();
+    let _ = svld1sh_vnum_u32(svptrue_b16(), I16_DATA.as_ptr(), 1);
+    let loaded = svldnf1sh_vnum_u32(svptrue_b16(), I16_DATA.as_ptr(), 1);
+    let len = svcntw() as usize;
+    assert_vector_matches_u32(
+        loaded,
+        svindex_u32(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldnf1sb_vnum_u64() {
+    svsetffr();
+    let _ = svld1sb_vnum_u64(svptrue_b8(), I8_DATA.as_ptr(), 1);
+    let loaded = svldnf1sb_vnum_u64(svptrue_b8(), I8_DATA.as_ptr(), 1);
+    let len = svcntd() as usize;
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldnf1sh_vnum_u64() {
+    svsetffr();
+    let _ = svld1sh_vnum_u64(svptrue_b16(), I16_DATA.as_ptr(), 1);
+    let loaded = svldnf1sh_vnum_u64(svptrue_b16(), I16_DATA.as_ptr(), 1);
+    let len = svcntd() as usize;
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldnf1sw_vnum_u64() {
+    svsetffr();
+    let _ = svld1sw_vnum_u64(svptrue_b32(), I32_DATA.as_ptr(), 1);
+    let loaded = svldnf1sw_vnum_u64(svptrue_b32(), I32_DATA.as_ptr(), 1);
+    let len = svcntd() as usize;
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldnf1ub_s16() {
+    svsetffr();
+    let _ = svld1ub_s16(svptrue_b8(), U8_DATA.as_ptr());
+    let loaded = svldnf1ub_s16(svptrue_b8(), U8_DATA.as_ptr());
+    assert_vector_matches_i16(
+        loaded,
+        svindex_s16((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldnf1ub_s32() {
+    svsetffr();
+    let _ = svld1ub_s32(svptrue_b8(), U8_DATA.as_ptr());
+    let loaded = svldnf1ub_s32(svptrue_b8(), U8_DATA.as_ptr());
+    assert_vector_matches_i32(
+        loaded,
+        svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldnf1uh_s32() {
+    svsetffr();
+    let _ = svld1uh_s32(svptrue_b16(), U16_DATA.as_ptr());
+    let loaded = svldnf1uh_s32(svptrue_b16(), U16_DATA.as_ptr());
+    assert_vector_matches_i32(
+        loaded,
+        svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldnf1ub_s64() {
+    svsetffr();
+    let _ = svld1ub_s64(svptrue_b8(), U8_DATA.as_ptr());
+    let loaded = svldnf1ub_s64(svptrue_b8(), U8_DATA.as_ptr());
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldnf1uh_s64() {
+    svsetffr();
+    let _ = svld1uh_s64(svptrue_b16(), U16_DATA.as_ptr());
+    let loaded = svldnf1uh_s64(svptrue_b16(), U16_DATA.as_ptr());
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldnf1uw_s64() {
+    svsetffr();
+    let _ = svld1uw_s64(svptrue_b32(), U32_DATA.as_ptr());
+    let loaded = svldnf1uw_s64(svptrue_b32(), U32_DATA.as_ptr());
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldnf1ub_u16() {
+    svsetffr();
+    let _ = svld1ub_u16(svptrue_b8(), U8_DATA.as_ptr());
+    let loaded = svldnf1ub_u16(svptrue_b8(), U8_DATA.as_ptr());
+    assert_vector_matches_u16(
+        loaded,
+        svindex_u16((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldnf1ub_u32() {
+    svsetffr();
+    let _ = svld1ub_u32(svptrue_b8(), U8_DATA.as_ptr());
+    let loaded = svldnf1ub_u32(svptrue_b8(), U8_DATA.as_ptr());
+    assert_vector_matches_u32(
+        loaded,
+        svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldnf1uh_u32() {
+    svsetffr();
+    let _ = svld1uh_u32(svptrue_b16(), U16_DATA.as_ptr());
+    let loaded = svldnf1uh_u32(svptrue_b16(), U16_DATA.as_ptr());
+    assert_vector_matches_u32(
+        loaded,
+        svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldnf1ub_u64() {
+    svsetffr();
+    let _ = svld1ub_u64(svptrue_b8(), U8_DATA.as_ptr());
+    let loaded = svldnf1ub_u64(svptrue_b8(), U8_DATA.as_ptr());
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldnf1uh_u64() {
+    svsetffr();
+    let _ = svld1uh_u64(svptrue_b16(), U16_DATA.as_ptr());
+    let loaded = svldnf1uh_u64(svptrue_b16(), U16_DATA.as_ptr());
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldnf1uw_u64() {
+    svsetffr();
+    let _ = svld1uw_u64(svptrue_b32(), U32_DATA.as_ptr());
+    let loaded = svldnf1uw_u64(svptrue_b32(), U32_DATA.as_ptr());
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldnf1ub_vnum_s16() {
+    svsetffr();
+    let _ = svld1ub_vnum_s16(svptrue_b8(), U8_DATA.as_ptr(), 1);
+    let loaded = svldnf1ub_vnum_s16(svptrue_b8(), U8_DATA.as_ptr(), 1);
+    let len = svcnth() as usize;
+    assert_vector_matches_i16(
+        loaded,
+        svindex_s16(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldnf1ub_vnum_s32() {
+    svsetffr();
+    let _ = svld1ub_vnum_s32(svptrue_b8(), U8_DATA.as_ptr(), 1);
+    let loaded = svldnf1ub_vnum_s32(svptrue_b8(), U8_DATA.as_ptr(), 1);
+    let len = svcntw() as usize;
+    assert_vector_matches_i32(
+        loaded,
+        svindex_s32(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldnf1uh_vnum_s32() {
+    svsetffr();
+    let _ = svld1uh_vnum_s32(svptrue_b16(), U16_DATA.as_ptr(), 1);
+    let loaded = svldnf1uh_vnum_s32(svptrue_b16(), U16_DATA.as_ptr(), 1);
+    let len = svcntw() as usize;
+    assert_vector_matches_i32(
+        loaded,
+        svindex_s32(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldnf1ub_vnum_s64() {
+    svsetffr();
+    let _ = svld1ub_vnum_s64(svptrue_b8(), U8_DATA.as_ptr(), 1);
+    let loaded = svldnf1ub_vnum_s64(svptrue_b8(), U8_DATA.as_ptr(), 1);
+    let len = svcntd() as usize;
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldnf1uh_vnum_s64() {
+    svsetffr();
+    let _ = svld1uh_vnum_s64(svptrue_b16(), U16_DATA.as_ptr(), 1);
+    let loaded = svldnf1uh_vnum_s64(svptrue_b16(), U16_DATA.as_ptr(), 1);
+    let len = svcntd() as usize;
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldnf1uw_vnum_s64() {
+    svsetffr();
+    let _ = svld1uw_vnum_s64(svptrue_b32(), U32_DATA.as_ptr(), 1);
+    let loaded = svldnf1uw_vnum_s64(svptrue_b32(), U32_DATA.as_ptr(), 1);
+    let len = svcntd() as usize;
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldnf1ub_vnum_u16() {
+    svsetffr();
+    let _ = svld1ub_vnum_u16(svptrue_b8(), U8_DATA.as_ptr(), 1);
+    let loaded = svldnf1ub_vnum_u16(svptrue_b8(), U8_DATA.as_ptr(), 1);
+    let len = svcnth() as usize;
+    assert_vector_matches_u16(
+        loaded,
+        svindex_u16(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldnf1ub_vnum_u32() {
+    svsetffr();
+    let _ = svld1ub_vnum_u32(svptrue_b8(), U8_DATA.as_ptr(), 1);
+    let loaded = svldnf1ub_vnum_u32(svptrue_b8(), U8_DATA.as_ptr(), 1);
+    let len = svcntw() as usize;
+    assert_vector_matches_u32(
+        loaded,
+        svindex_u32(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldnf1uh_vnum_u32() {
+    svsetffr();
+    let _ = svld1uh_vnum_u32(svptrue_b16(), U16_DATA.as_ptr(), 1);
+    let loaded = svldnf1uh_vnum_u32(svptrue_b16(), U16_DATA.as_ptr(), 1);
+    let len = svcntw() as usize;
+    assert_vector_matches_u32(
+        loaded,
+        svindex_u32(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldnf1ub_vnum_u64() {
+    svsetffr();
+    let _ = svld1ub_vnum_u64(svptrue_b8(), U8_DATA.as_ptr(), 1);
+    let loaded = svldnf1ub_vnum_u64(svptrue_b8(), U8_DATA.as_ptr(), 1);
+    let len = svcntd() as usize;
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldnf1uh_vnum_u64() {
+    svsetffr();
+    let _ = svld1uh_vnum_u64(svptrue_b16(), U16_DATA.as_ptr(), 1);
+    let loaded = svldnf1uh_vnum_u64(svptrue_b16(), U16_DATA.as_ptr(), 1);
+    let len = svcntd() as usize;
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldnf1uw_vnum_u64() {
+    svsetffr();
+    let _ = svld1uw_vnum_u64(svptrue_b32(), U32_DATA.as_ptr(), 1);
+    let loaded = svldnf1uw_vnum_u64(svptrue_b32(), U32_DATA.as_ptr(), 1);
+    let len = svcntd() as usize;
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldnt1_f32_with_svstnt1_f32() {
+    let mut storage = [0 as f32; 320usize];
+    let data = svcvt_f32_s32_x(
+        svptrue_b32(),
+        svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+    svstnt1_f32(svptrue_b32(), storage.as_mut_ptr(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as f32 || val == i as f32);
+    }
+    svsetffr();
+    let loaded = svldnt1_f32(svptrue_b32(), storage.as_ptr() as *const f32);
+    assert_vector_matches_f32(
+        loaded,
+        svcvt_f32_s32_x(
+            svptrue_b32(),
+            svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldnt1_f64_with_svstnt1_f64() {
+    let mut storage = [0 as f64; 160usize];
+    let data = svcvt_f64_s64_x(
+        svptrue_b64(),
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+    svstnt1_f64(svptrue_b64(), storage.as_mut_ptr(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as f64 || val == i as f64);
+    }
+    svsetffr();
+    let loaded = svldnt1_f64(svptrue_b64(), storage.as_ptr() as *const f64);
+    assert_vector_matches_f64(
+        loaded,
+        svcvt_f64_s64_x(
+            svptrue_b64(),
+            svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldnt1_s8_with_svstnt1_s8() {
+    let mut storage = [0 as i8; 1280usize];
+    let data = svindex_s8((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    svstnt1_s8(svptrue_b8(), storage.as_mut_ptr(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i8 || val == i as i8);
+    }
+    svsetffr();
+    let loaded = svldnt1_s8(svptrue_b8(), storage.as_ptr() as *const i8);
+    assert_vector_matches_i8(
+        loaded,
+        svindex_s8((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldnt1_s16_with_svstnt1_s16() {
+    let mut storage = [0 as i16; 640usize];
+    let data = svindex_s16((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    svstnt1_s16(svptrue_b16(), storage.as_mut_ptr(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i16 || val == i as i16);
+    }
+    svsetffr();
+    let loaded = svldnt1_s16(svptrue_b16(), storage.as_ptr() as *const i16);
+    assert_vector_matches_i16(
+        loaded,
+        svindex_s16((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldnt1_s32_with_svstnt1_s32() {
+    let mut storage = [0 as i32; 320usize];
+    let data = svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    svstnt1_s32(svptrue_b32(), storage.as_mut_ptr(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i32 || val == i as i32);
+    }
+    svsetffr();
+    let loaded = svldnt1_s32(svptrue_b32(), storage.as_ptr() as *const i32);
+    assert_vector_matches_i32(
+        loaded,
+        svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldnt1_s64_with_svstnt1_s64() {
+    let mut storage = [0 as i64; 160usize];
+    let data = svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    svstnt1_s64(svptrue_b64(), storage.as_mut_ptr(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i64 || val == i as i64);
+    }
+    svsetffr();
+    let loaded = svldnt1_s64(svptrue_b64(), storage.as_ptr() as *const i64);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldnt1_u8_with_svstnt1_u8() {
+    let mut storage = [0 as u8; 1280usize];
+    let data = svindex_u8((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    svstnt1_u8(svptrue_b8(), storage.as_mut_ptr(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u8 || val == i as u8);
+    }
+    svsetffr();
+    let loaded = svldnt1_u8(svptrue_b8(), storage.as_ptr() as *const u8);
+    assert_vector_matches_u8(
+        loaded,
+        svindex_u8((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldnt1_u16_with_svstnt1_u16() {
+    let mut storage = [0 as u16; 640usize];
+    let data = svindex_u16((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    svstnt1_u16(svptrue_b16(), storage.as_mut_ptr(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u16 || val == i as u16);
+    }
+    svsetffr();
+    let loaded = svldnt1_u16(svptrue_b16(), storage.as_ptr() as *const u16);
+    assert_vector_matches_u16(
+        loaded,
+        svindex_u16((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldnt1_u32_with_svstnt1_u32() {
+    let mut storage = [0 as u32; 320usize];
+    let data = svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    svstnt1_u32(svptrue_b32(), storage.as_mut_ptr(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u32 || val == i as u32);
+    }
+    svsetffr();
+    let loaded = svldnt1_u32(svptrue_b32(), storage.as_ptr() as *const u32);
+    assert_vector_matches_u32(
+        loaded,
+        svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldnt1_u64_with_svstnt1_u64() {
+    let mut storage = [0 as u64; 160usize];
+    let data = svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    svstnt1_u64(svptrue_b64(), storage.as_mut_ptr(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u64 || val == i as u64);
+    }
+    svsetffr();
+    let loaded = svldnt1_u64(svptrue_b64(), storage.as_ptr() as *const u64);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldnt1_vnum_f32_with_svstnt1_vnum_f32() {
+    let len = svcntw() as usize;
+    let mut storage = [0 as f32; 320usize];
+    let data = svcvt_f32_s32_x(
+        svptrue_b32(),
+        svindex_s32(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+    svstnt1_vnum_f32(svptrue_b32(), storage.as_mut_ptr(), 1, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as f32 || val == i as f32);
+    }
+    svsetffr();
+    let loaded = svldnt1_vnum_f32(svptrue_b32(), storage.as_ptr() as *const f32, 1);
+    assert_vector_matches_f32(
+        loaded,
+        svcvt_f32_s32_x(
+            svptrue_b32(),
+            svindex_s32(
+                (len + 0usize).try_into().unwrap(),
+                1usize.try_into().unwrap(),
+            ),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldnt1_vnum_f64_with_svstnt1_vnum_f64() {
+    let len = svcntd() as usize;
+    let mut storage = [0 as f64; 160usize];
+    let data = svcvt_f64_s64_x(
+        svptrue_b64(),
+        svindex_s64(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+    svstnt1_vnum_f64(svptrue_b64(), storage.as_mut_ptr(), 1, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as f64 || val == i as f64);
+    }
+    svsetffr();
+    let loaded = svldnt1_vnum_f64(svptrue_b64(), storage.as_ptr() as *const f64, 1);
+    assert_vector_matches_f64(
+        loaded,
+        svcvt_f64_s64_x(
+            svptrue_b64(),
+            svindex_s64(
+                (len + 0usize).try_into().unwrap(),
+                1usize.try_into().unwrap(),
+            ),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldnt1_vnum_s8_with_svstnt1_vnum_s8() {
+    let len = svcntb() as usize;
+    let mut storage = [0 as i8; 1280usize];
+    let data = svindex_s8(
+        (len + 0usize).try_into().unwrap(),
+        1usize.try_into().unwrap(),
+    );
+    svstnt1_vnum_s8(svptrue_b8(), storage.as_mut_ptr(), 1, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i8 || val == i as i8);
+    }
+    svsetffr();
+    let loaded = svldnt1_vnum_s8(svptrue_b8(), storage.as_ptr() as *const i8, 1);
+    assert_vector_matches_i8(
+        loaded,
+        svindex_s8(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldnt1_vnum_s16_with_svstnt1_vnum_s16() {
+    let len = svcnth() as usize;
+    let mut storage = [0 as i16; 640usize];
+    let data = svindex_s16(
+        (len + 0usize).try_into().unwrap(),
+        1usize.try_into().unwrap(),
+    );
+    svstnt1_vnum_s16(svptrue_b16(), storage.as_mut_ptr(), 1, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i16 || val == i as i16);
+    }
+    svsetffr();
+    let loaded = svldnt1_vnum_s16(svptrue_b16(), storage.as_ptr() as *const i16, 1);
+    assert_vector_matches_i16(
+        loaded,
+        svindex_s16(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldnt1_vnum_s32_with_svstnt1_vnum_s32() {
+    let len = svcntw() as usize;
+    let mut storage = [0 as i32; 320usize];
+    let data = svindex_s32(
+        (len + 0usize).try_into().unwrap(),
+        1usize.try_into().unwrap(),
+    );
+    svstnt1_vnum_s32(svptrue_b32(), storage.as_mut_ptr(), 1, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i32 || val == i as i32);
+    }
+    svsetffr();
+    let loaded = svldnt1_vnum_s32(svptrue_b32(), storage.as_ptr() as *const i32, 1);
+    assert_vector_matches_i32(
+        loaded,
+        svindex_s32(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldnt1_vnum_s64_with_svstnt1_vnum_s64() {
+    let len = svcntd() as usize;
+    let mut storage = [0 as i64; 160usize];
+    let data = svindex_s64(
+        (len + 0usize).try_into().unwrap(),
+        1usize.try_into().unwrap(),
+    );
+    svstnt1_vnum_s64(svptrue_b64(), storage.as_mut_ptr(), 1, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i64 || val == i as i64);
+    }
+    svsetffr();
+    let loaded = svldnt1_vnum_s64(svptrue_b64(), storage.as_ptr() as *const i64, 1);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldnt1_vnum_u8_with_svstnt1_vnum_u8() {
+    let len = svcntb() as usize;
+    let mut storage = [0 as u8; 1280usize];
+    let data = svindex_u8(
+        (len + 0usize).try_into().unwrap(),
+        1usize.try_into().unwrap(),
+    );
+    svstnt1_vnum_u8(svptrue_b8(), storage.as_mut_ptr(), 1, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u8 || val == i as u8);
+    }
+    svsetffr();
+    let loaded = svldnt1_vnum_u8(svptrue_b8(), storage.as_ptr() as *const u8, 1);
+    assert_vector_matches_u8(
+        loaded,
+        svindex_u8(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldnt1_vnum_u16_with_svstnt1_vnum_u16() {
+    let len = svcnth() as usize;
+    let mut storage = [0 as u16; 640usize];
+    let data = svindex_u16(
+        (len + 0usize).try_into().unwrap(),
+        1usize.try_into().unwrap(),
+    );
+    svstnt1_vnum_u16(svptrue_b16(), storage.as_mut_ptr(), 1, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u16 || val == i as u16);
+    }
+    svsetffr();
+    let loaded = svldnt1_vnum_u16(svptrue_b16(), storage.as_ptr() as *const u16, 1);
+    assert_vector_matches_u16(
+        loaded,
+        svindex_u16(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldnt1_vnum_u32_with_svstnt1_vnum_u32() {
+    let len = svcntw() as usize;
+    let mut storage = [0 as u32; 320usize];
+    let data = svindex_u32(
+        (len + 0usize).try_into().unwrap(),
+        1usize.try_into().unwrap(),
+    );
+    svstnt1_vnum_u32(svptrue_b32(), storage.as_mut_ptr(), 1, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u32 || val == i as u32);
+    }
+    svsetffr();
+    let loaded = svldnt1_vnum_u32(svptrue_b32(), storage.as_ptr() as *const u32, 1);
+    assert_vector_matches_u32(
+        loaded,
+        svindex_u32(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svldnt1_vnum_u64_with_svstnt1_vnum_u64() {
+    let len = svcntd() as usize;
+    let mut storage = [0 as u64; 160usize];
+    let data = svindex_u64(
+        (len + 0usize).try_into().unwrap(),
+        1usize.try_into().unwrap(),
+    );
+    svstnt1_vnum_u64(svptrue_b64(), storage.as_mut_ptr(), 1, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u64 || val == i as u64);
+    }
+    svsetffr();
+    let loaded = svldnt1_vnum_u64(svptrue_b64(), storage.as_ptr() as *const u64, 1);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64(
+            (len + 0usize).try_into().unwrap(),
+            1usize.try_into().unwrap(),
+        ),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svprfb() {
+    svsetffr();
+    let loaded = svprfb::<{ svprfop::SV_PLDL1KEEP }, i64>(svptrue_b8(), I64_DATA.as_ptr());
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svprfh() {
+    svsetffr();
+    let loaded = svprfh::<{ svprfop::SV_PLDL1KEEP }, i64>(svptrue_b16(), I64_DATA.as_ptr());
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svprfw() {
+    svsetffr();
+    let loaded = svprfw::<{ svprfop::SV_PLDL1KEEP }, i64>(svptrue_b32(), I64_DATA.as_ptr());
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svprfd() {
+    svsetffr();
+    let loaded = svprfd::<{ svprfop::SV_PLDL1KEEP }, i64>(svptrue_b64(), I64_DATA.as_ptr());
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svprfb_gather_s32offset() {
+    let offsets = svindex_s32(0, 4u32.try_into().unwrap());
+    svsetffr();
+    let loaded = svprfb_gather_s32offset::<{ svprfop::SV_PLDL1KEEP }, i64>(
+        svptrue_b32(),
+        I64_DATA.as_ptr(),
+        offsets,
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svprfh_gather_s32index() {
+    let indices = svindex_s32(0, 1);
+    svsetffr();
+    let loaded = svprfh_gather_s32index::<{ svprfop::SV_PLDL1KEEP }, i64>(
+        svptrue_b32(),
+        I64_DATA.as_ptr(),
+        indices,
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svprfw_gather_s32index() {
+    let indices = svindex_s32(0, 1);
+    svsetffr();
+    let loaded = svprfw_gather_s32index::<{ svprfop::SV_PLDL1KEEP }, i64>(
+        svptrue_b32(),
+        I64_DATA.as_ptr(),
+        indices,
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svprfd_gather_s32index() {
+    let indices = svindex_s32(0, 1);
+    svsetffr();
+    let loaded = svprfd_gather_s32index::<{ svprfop::SV_PLDL1KEEP }, i64>(
+        svptrue_b32(),
+        I64_DATA.as_ptr(),
+        indices,
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svprfb_gather_s64offset() {
+    let offsets = svindex_s64(0, 8u32.try_into().unwrap());
+    svsetffr();
+    let loaded = svprfb_gather_s64offset::<{ svprfop::SV_PLDL1KEEP }, i64>(
+        svptrue_b64(),
+        I64_DATA.as_ptr(),
+        offsets,
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svprfh_gather_s64index() {
+    let indices = svindex_s64(0, 1);
+    svsetffr();
+    let loaded = svprfh_gather_s64index::<{ svprfop::SV_PLDL1KEEP }, i64>(
+        svptrue_b64(),
+        I64_DATA.as_ptr(),
+        indices,
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svprfw_gather_s64index() {
+    let indices = svindex_s64(0, 1);
+    svsetffr();
+    let loaded = svprfw_gather_s64index::<{ svprfop::SV_PLDL1KEEP }, i64>(
+        svptrue_b64(),
+        I64_DATA.as_ptr(),
+        indices,
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svprfd_gather_s64index() {
+    let indices = svindex_s64(0, 1);
+    svsetffr();
+    let loaded = svprfd_gather_s64index::<{ svprfop::SV_PLDL1KEEP }, i64>(
+        svptrue_b64(),
+        I64_DATA.as_ptr(),
+        indices,
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svprfb_gather_u32offset() {
+    let offsets = svindex_u32(0, 4u32.try_into().unwrap());
+    svsetffr();
+    let loaded = svprfb_gather_u32offset::<{ svprfop::SV_PLDL1KEEP }, i64>(
+        svptrue_b32(),
+        I64_DATA.as_ptr(),
+        offsets,
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svprfh_gather_u32index() {
+    let indices = svindex_u32(0, 1);
+    svsetffr();
+    let loaded = svprfh_gather_u32index::<{ svprfop::SV_PLDL1KEEP }, i64>(
+        svptrue_b32(),
+        I64_DATA.as_ptr(),
+        indices,
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svprfw_gather_u32index() {
+    let indices = svindex_u32(0, 1);
+    svsetffr();
+    let loaded = svprfw_gather_u32index::<{ svprfop::SV_PLDL1KEEP }, i64>(
+        svptrue_b32(),
+        I64_DATA.as_ptr(),
+        indices,
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svprfd_gather_u32index() {
+    let indices = svindex_u32(0, 1);
+    svsetffr();
+    let loaded = svprfd_gather_u32index::<{ svprfop::SV_PLDL1KEEP }, i64>(
+        svptrue_b32(),
+        I64_DATA.as_ptr(),
+        indices,
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svprfb_gather_u64offset() {
+    let offsets = svindex_u64(0, 8u32.try_into().unwrap());
+    svsetffr();
+    let loaded = svprfb_gather_u64offset::<{ svprfop::SV_PLDL1KEEP }, i64>(
+        svptrue_b64(),
+        I64_DATA.as_ptr(),
+        offsets,
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svprfh_gather_u64index() {
+    let indices = svindex_u64(0, 1);
+    svsetffr();
+    let loaded = svprfh_gather_u64index::<{ svprfop::SV_PLDL1KEEP }, i64>(
+        svptrue_b64(),
+        I64_DATA.as_ptr(),
+        indices,
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svprfw_gather_u64index() {
+    let indices = svindex_u64(0, 1);
+    svsetffr();
+    let loaded = svprfw_gather_u64index::<{ svprfop::SV_PLDL1KEEP }, i64>(
+        svptrue_b64(),
+        I64_DATA.as_ptr(),
+        indices,
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svprfd_gather_u64index() {
+    let indices = svindex_u64(0, 1);
+    svsetffr();
+    let loaded = svprfd_gather_u64index::<{ svprfop::SV_PLDL1KEEP }, i64>(
+        svptrue_b64(),
+        I64_DATA.as_ptr(),
+        indices,
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svprfb_gather_u64base() {
+    let bases = svdup_n_u64(U64_DATA.as_ptr() as u64);
+    let offsets = svindex_u64(0, 8u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b64(), bases, offsets);
+    svsetffr();
+    let loaded = svprfb_gather_u64base::<{ svprfop::SV_PLDL1KEEP }>(svptrue_b64(), bases);
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svprfh_gather_u64base() {
+    let bases = svdup_n_u64(U64_DATA.as_ptr() as u64);
+    let offsets = svindex_u64(0, 8u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b64(), bases, offsets);
+    svsetffr();
+    let loaded = svprfh_gather_u64base::<{ svprfop::SV_PLDL1KEEP }>(svptrue_b64(), bases);
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svprfw_gather_u64base() {
+    let bases = svdup_n_u64(U64_DATA.as_ptr() as u64);
+    let offsets = svindex_u64(0, 8u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b64(), bases, offsets);
+    svsetffr();
+    let loaded = svprfw_gather_u64base::<{ svprfop::SV_PLDL1KEEP }>(svptrue_b64(), bases);
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svprfd_gather_u64base() {
+    let bases = svdup_n_u64(U64_DATA.as_ptr() as u64);
+    let offsets = svindex_u64(0, 8u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b64(), bases, offsets);
+    svsetffr();
+    let loaded = svprfd_gather_u64base::<{ svprfop::SV_PLDL1KEEP }>(svptrue_b64(), bases);
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svprfb_gather_u32base_offset() {
+    let bases = svindex_u32(0, 4u32.try_into().unwrap());
+    svsetffr();
+    let loaded = svprfb_gather_u32base_offset::<{ svprfop::SV_PLDL1KEEP }>(
+        svptrue_b32(),
+        bases,
+        U32_DATA.as_ptr() as i64 + 4u32 as i64,
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svprfh_gather_u32base_index() {
+    let bases = svindex_u32(0, 4u32.try_into().unwrap());
+    svsetffr();
+    let loaded = svprfh_gather_u32base_index::<{ svprfop::SV_PLDL1KEEP }>(
+        svptrue_b32(),
+        bases,
+        U32_DATA.as_ptr() as i64 / (4u32 as i64) + 1,
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svprfw_gather_u32base_index() {
+    let bases = svindex_u32(0, 4u32.try_into().unwrap());
+    svsetffr();
+    let loaded = svprfw_gather_u32base_index::<{ svprfop::SV_PLDL1KEEP }>(
+        svptrue_b32(),
+        bases,
+        U32_DATA.as_ptr() as i64 / (4u32 as i64) + 1,
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svprfd_gather_u32base_index() {
+    let bases = svindex_u32(0, 4u32.try_into().unwrap());
+    svsetffr();
+    let loaded = svprfd_gather_u32base_index::<{ svprfop::SV_PLDL1KEEP }>(
+        svptrue_b32(),
+        bases,
+        U32_DATA.as_ptr() as i64 / (4u32 as i64) + 1,
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svprfb_gather_u64base_offset() {
+    let bases = svdup_n_u64(U64_DATA.as_ptr() as u64);
+    let offsets = svindex_u64(0, 8u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b64(), bases, offsets);
+    svsetffr();
+    let loaded = svprfb_gather_u64base_offset::<{ svprfop::SV_PLDL1KEEP }>(
+        svptrue_b64(),
+        bases,
+        8u32.try_into().unwrap(),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svprfh_gather_u64base_index() {
+    let bases = svdup_n_u64(U64_DATA.as_ptr() as u64);
+    let offsets = svindex_u64(0, 8u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b64(), bases, offsets);
+    svsetffr();
+    let loaded = svprfh_gather_u64base_index::<{ svprfop::SV_PLDL1KEEP }>(
+        svptrue_b64(),
+        bases,
+        1.try_into().unwrap(),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svprfw_gather_u64base_index() {
+    let bases = svdup_n_u64(U64_DATA.as_ptr() as u64);
+    let offsets = svindex_u64(0, 8u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b64(), bases, offsets);
+    svsetffr();
+    let loaded = svprfw_gather_u64base_index::<{ svprfop::SV_PLDL1KEEP }>(
+        svptrue_b64(),
+        bases,
+        1.try_into().unwrap(),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svprfd_gather_u64base_index() {
+    let bases = svdup_n_u64(U64_DATA.as_ptr() as u64);
+    let offsets = svindex_u64(0, 8u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b64(), bases, offsets);
+    svsetffr();
+    let loaded = svprfd_gather_u64base_index::<{ svprfop::SV_PLDL1KEEP }>(
+        svptrue_b64(),
+        bases,
+        1.try_into().unwrap(),
+    );
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svprfb_vnum() {
+    svsetffr();
+    let loaded = svprfb_vnum::<{ svprfop::SV_PLDL1KEEP }, i64>(svptrue_b8(), I64_DATA.as_ptr(), 1);
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svprfh_vnum() {
+    svsetffr();
+    let loaded = svprfh_vnum::<{ svprfop::SV_PLDL1KEEP }, i64>(svptrue_b16(), I64_DATA.as_ptr(), 1);
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svprfw_vnum() {
+    svsetffr();
+    let loaded = svprfw_vnum::<{ svprfop::SV_PLDL1KEEP }, i64>(svptrue_b32(), I64_DATA.as_ptr(), 1);
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_svprfd_vnum() {
+    svsetffr();
+    let loaded = svprfd_vnum::<{ svprfop::SV_PLDL1KEEP }, i64>(svptrue_b64(), I64_DATA.as_ptr(), 1);
+}
+#[simd_test(enable = "sve")]
+unsafe fn test_ffr() {
+    svsetffr();
+    let ffr = svrdffr();
+    assert_vector_matches_u8(svdup_n_u8_z(ffr, 1), svindex_u8(1, 0));
+    let pred = svdupq_n_b8(
+        true, false, true, false, true, false, true, false, true, false, true, false, true, false,
+        true, false,
+    );
+    svwrffr(pred);
+    let ffr = svrdffr_z(svptrue_b8());
+    assert_vector_matches_u8(svdup_n_u8_z(ffr, 1), svdup_n_u8_z(pred, 1));
+}
diff --git a/crates/core_arch/src/aarch64/sve/mod.rs b/crates/core_arch/src/aarch64/sve/mod.rs
new file mode 100644
index 0000000000..a3f70ab61c
--- /dev/null
+++ b/crates/core_arch/src/aarch64/sve/mod.rs
@@ -0,0 +1,379 @@
+//! SVE intrinsics
+
+#![allow(non_camel_case_types)]
+
+// `generated.rs` has a `super::*` and this import is for that
+use crate::intrinsics::{simd::*, *};
+
+#[rustfmt::skip]
+mod generated;
+#[rustfmt::skip]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub use self::generated::*;
+
+use crate::{marker::ConstParamTy, mem::transmute};
+
+pub(super) trait AsUnsigned {
+    type Unsigned;
+    unsafe fn as_unsigned(self) -> Self::Unsigned;
+}
+
+pub(super) trait AsSigned {
+    type Signed;
+    unsafe fn as_signed(self) -> Self::Signed;
+}
+
+/// Same as `Into` but with into being unsafe so that it can have the required `target_feature`
+pub(super) trait SveInto<T>: Sized {
+    unsafe fn sve_into(self) -> T;
+}
+
+macro_rules! impl_sve_type {
+    ($(($v:vis, $elem_type:ty, $name:ident, $elt:literal))*) => ($(
+        #[doc = concat!("Scalable vector of type ", stringify!($elem_type))]
+        #[derive(Clone, Copy, Debug)]
+        #[rustc_scalable_vector($elt)]
+        #[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+        $v struct $name($elem_type);
+    )*)
+}
+
+macro_rules! impl_sve_tuple_type {
+    ($(($v:vis, $vec_type:ty, $elt:tt, $name:ident))*) => ($(
+        impl_sve_tuple_type!(@ ($v, $vec_type, $elt, $name));
+    )*);
+    (@ ($v:vis, $vec_type:ty, 2, $name:ident)) => (
+        #[doc = concat!("Two-element tuple of scalable vectors of type ", stringify!($vec_type))]
+        #[derive(Clone, Copy, Debug)]
+        #[rustc_scalable_vector]
+        #[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+        $v struct $name($vec_type, $vec_type);
+    );
+    (@ ($v:vis, $vec_type:ty, 3, $name:ident)) => (
+        #[doc = concat!("Three-element tuple of scalable vectors of type ", stringify!($vec_type))]
+        #[derive(Clone, Copy, Debug)]
+        #[rustc_scalable_vector]
+        #[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+        $v struct $name($vec_type, $vec_type, $vec_type);
+    );
+    (@ ($v:vis, $vec_type:ty, 4, $name:ident)) => (
+        #[doc = concat!("Four-element tuple of scalable vectors of type ", stringify!($vec_type))]
+        #[derive(Clone, Copy, Debug)]
+        #[rustc_scalable_vector]
+        #[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+        $v struct $name($vec_type, $vec_type, $vec_type, $vec_type);
+    );
+}
+
+macro_rules! impl_sign_conversions_sv {
+    ($(($signed:ty, $unsigned:ty))*) => ($(
+        impl AsUnsigned for $signed {
+            type Unsigned = $unsigned;
+
+            #[inline]
+            #[target_feature(enable = "sve")]
+            unsafe fn as_unsigned(self) -> $unsigned {
+                transmute_unchecked(self)
+            }
+        }
+
+        impl AsSigned for $unsigned {
+            type Signed = $signed;
+
+            #[inline]
+            #[target_feature(enable = "sve")]
+            unsafe fn as_signed(self) -> $signed {
+                transmute_unchecked(self)
+            }
+        }
+    )*)
+}
+
+macro_rules! impl_sign_conversions {
+    ($(($signed:ty, $unsigned:ty))*) => ($(
+        impl AsUnsigned for $signed {
+            type Unsigned = $unsigned;
+
+            #[inline]
+            #[target_feature(enable = "sve")]
+            unsafe fn as_unsigned(self) -> $unsigned {
+                transmute(self)
+            }
+        }
+
+        impl AsSigned for $unsigned {
+            type Signed = $signed;
+
+            #[inline]
+            #[target_feature(enable = "sve")]
+            unsafe fn as_signed(self) -> $signed {
+                transmute(self)
+            }
+        }
+    )*)
+}
+
+/// LLVM requires the predicate lane count to be the same as the lane count
+/// it's working with. However the ACLE only defines one bool type and the
+/// instruction set doesn't have this distinction. As a result we have to
+/// create these internal types so we can match the LLVM signature. Each of
+/// these internal types can be converted to the public `svbool_t` type and
+/// the `svbool_t` type can be converted into these.
+macro_rules! impl_internal_sve_predicate {
+    ($(($name:ident, $elt:literal))*) => ($(
+        impl_sve_type! {
+            (pub(super), bool, $name, $elt)
+        }
+
+        impl SveInto<svbool_t> for $name {
+            #[inline]
+            #[target_feature(enable = "sve")]
+            unsafe fn sve_into(self) -> svbool_t {
+                #[allow(improper_ctypes)]
+                unsafe extern "C" {
+                    #[cfg_attr(
+                        target_arch = "aarch64",
+                        link_name = concat!("llvm.aarch64.sve.convert.to.svbool.nxv", $elt, "i1")
+                    )]
+                    fn convert_to_svbool(b: $name) -> svbool_t;
+                }
+                unsafe { convert_to_svbool(self) }
+            }
+        }
+
+        #[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+        impl SveInto<$name> for svbool_t {
+            #[inline]
+            #[target_feature(enable = "sve")]
+            unsafe fn sve_into(self) -> $name {
+                #[allow(improper_ctypes)]
+                unsafe extern "C" {
+                    #[cfg_attr(
+                        target_arch = "aarch64",
+                        link_name = concat!("llvm.aarch64.sve.convert.from.svbool.nxv", $elt, "i1")
+                    )]
+                    fn convert_from_svbool(b: svbool_t) -> $name;
+                }
+                unsafe { convert_from_svbool(self) }
+            }
+        }
+    )*)
+}
+
+impl_sve_type! {
+    (pub, bool, svbool_t, 16)
+
+    (pub, i8, svint8_t, 16)
+    (pub, u8, svuint8_t, 16)
+
+    (pub, i16, svint16_t, 8)
+    (pub, u16, svuint16_t, 8)
+    (pub, f32, svfloat32_t, 4)
+    (pub, i32, svint32_t, 4)
+    (pub, u32, svuint32_t, 4)
+    (pub, f64, svfloat64_t, 2)
+    (pub, i64, svint64_t, 2)
+    (pub, u64, svuint64_t, 2)
+
+    // Internal types:
+    (pub(super), i8, nxv2i8, 2)
+    (pub(super), i8, nxv4i8, 4)
+    (pub(super), i8, nxv8i8, 8)
+
+    (pub(super), i16, nxv2i16, 2)
+    (pub(super), i16, nxv4i16, 4)
+
+    (pub(super), i32, nxv2i32, 2)
+
+    (pub(super), u8, nxv2u8, 2)
+    (pub(super), u8, nxv4u8, 4)
+    (pub(super), u8, nxv8u8, 8)
+
+    (pub(super), u16, nxv2u16, 2)
+    (pub(super), u16, nxv4u16, 4)
+
+    (pub(super), u32, nxv2u32, 2)
+}
+
+impl_sve_tuple_type! {
+    (pub, svint8_t, 2, svint8x2_t)
+    (pub, svuint8_t, 2, svuint8x2_t)
+    (pub, svint16_t, 2, svint16x2_t)
+    (pub, svuint16_t, 2, svuint16x2_t)
+    (pub, svfloat32_t, 2, svfloat32x2_t)
+    (pub, svint32_t, 2, svint32x2_t)
+    (pub, svuint32_t, 2, svuint32x2_t)
+    (pub, svfloat64_t, 2, svfloat64x2_t)
+    (pub, svint64_t, 2, svint64x2_t)
+    (pub, svuint64_t, 2, svuint64x2_t)
+
+    (pub, svint8_t, 3, svint8x3_t)
+    (pub, svuint8_t, 3, svuint8x3_t)
+    (pub, svint16_t, 3, svint16x3_t)
+    (pub, svuint16_t, 3, svuint16x3_t)
+    (pub, svfloat32_t, 3, svfloat32x3_t)
+    (pub, svint32_t, 3, svint32x3_t)
+    (pub, svuint32_t, 3, svuint32x3_t)
+    (pub, svfloat64_t, 3, svfloat64x3_t)
+    (pub, svint64_t, 3, svint64x3_t)
+    (pub, svuint64_t, 3, svuint64x3_t)
+
+    (pub, svint8_t, 4, svint8x4_t)
+    (pub, svuint8_t, 4, svuint8x4_t)
+    (pub, svint16_t, 4, svint16x4_t)
+    (pub, svuint16_t, 4, svuint16x4_t)
+    (pub, svfloat32_t, 4, svfloat32x4_t)
+    (pub, svint32_t, 4, svint32x4_t)
+    (pub, svuint32_t, 4, svuint32x4_t)
+    (pub, svfloat64_t, 4, svfloat64x4_t)
+    (pub, svint64_t, 4, svint64x4_t)
+    (pub, svuint64_t, 4, svuint64x4_t)
+}
+
+impl_sign_conversions! {
+    (i8, u8)
+    (i16, u16)
+    (i32, u32)
+    (i64, u64)
+    (*const i8, *const u8)
+    (*const i16, *const u16)
+    (*const i32, *const u32)
+    (*const i64, *const u64)
+    (*mut i8, *mut u8)
+    (*mut i16, *mut u16)
+    (*mut i32, *mut u32)
+    (*mut i64, *mut u64)
+}
+
+impl_sign_conversions_sv! {
+    (svint8_t, svuint8_t)
+    (svint16_t, svuint16_t)
+    (svint32_t, svuint32_t)
+    (svint64_t, svuint64_t)
+
+    (svint8x2_t, svuint8x2_t)
+    (svint16x2_t, svuint16x2_t)
+    (svint32x2_t, svuint32x2_t)
+    (svint64x2_t, svuint64x2_t)
+
+    (svint8x3_t, svuint8x3_t)
+    (svint16x3_t, svuint16x3_t)
+    (svint32x3_t, svuint32x3_t)
+    (svint64x3_t, svuint64x3_t)
+
+    (svint8x4_t, svuint8x4_t)
+    (svint16x4_t, svuint16x4_t)
+    (svint32x4_t, svuint32x4_t)
+    (svint64x4_t, svuint64x4_t)
+
+    // Internal types:
+    (nxv2i8, nxv2u8)
+    (nxv4i8, nxv4u8)
+    (nxv8i8, nxv8u8)
+
+    (nxv2i16, nxv2u16)
+    (nxv4i16, nxv4u16)
+
+    (nxv2i32, nxv2u32)
+}
+
+impl_internal_sve_predicate! {
+    (svbool2_t, 2)
+    (svbool4_t, 4)
+    (svbool8_t, 8)
+}
+
+/// Patterns returned by a `PTRUE`
+#[repr(i32)]
+#[allow(non_camel_case_types)]
+#[derive(Clone, Copy, Debug, PartialEq, Eq, ConstParamTy)]
+#[non_exhaustive]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub enum svpattern {
+    /// Activate the largest power-of-two number of elements that is less than the vector length
+    SV_POW2 = 0,
+    /// Activate the first element
+    SV_VL1 = 1,
+    /// Activate the first two elements
+    SV_VL2 = 2,
+    /// Activate the first three elements
+    SV_VL3 = 3,
+    /// Activate the first four elements
+    SV_VL4 = 4,
+    /// Activate the first five elements
+    SV_VL5 = 5,
+    /// Activate the first six elements
+    SV_VL6 = 6,
+    /// Activate the first seven elements
+    SV_VL7 = 7,
+    /// Activate the first eight elements
+    SV_VL8 = 8,
+    /// Activate the first sixteen elements
+    SV_VL16 = 9,
+    /// Activate the first thirty-two elements
+    SV_VL32 = 10,
+    /// Activate the first sixty-four elements
+    SV_VL64 = 11,
+    /// Activate the first one-hundred-and-twenty-eight elements
+    SV_VL128 = 12,
+    /// Activate the first two-hundred-and-fifty-six elements
+    SV_VL256 = 13,
+    /// Activate the largest multiple-of-four number of elements that is less than the vector length
+    SV_MUL4 = 29,
+    /// Activate the largest multiple-of-three number of elements that is less than the vector
+    /// length
+    SV_MUL3 = 30,
+    /// Activate all elements
+    SV_ALL = 31,
+}
+
+/// Addressing mode for prefetch intrinsics - allows the specification of the expected access
+/// kind (read or write), the cache level to load the data, the data retention policy
+/// (temporal or streaming)
+#[repr(i32)]
+#[allow(non_camel_case_types)]
+#[derive(Clone, Copy, Debug, PartialEq, Eq, ConstParamTy)]
+#[non_exhaustive]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub enum svprfop {
+    /// Temporal fetch of the addressed location for reading to the L1 cache (i.e. allocate in
+    /// cache normally)
+    SV_PLDL1KEEP = 0,
+    /// Streaming fetch of the addressed location for reading to the L1 cache (i.e. memory only
+    /// used once)
+    SV_PLDL1STRM = 1,
+    /// Temporal fetch of the addressed location for reading to the L2 cache (i.e. allocate in
+    /// cache normally)
+    SV_PLDL2KEEP = 2,
+    /// Streaming fetch of the addressed location for reading to the L2 cache (i.e. memory only
+    /// used once)
+    SV_PLDL2STRM = 3,
+    /// Temporal fetch of the addressed location for reading to the L3 cache (i.e. allocate in
+    /// cache normally)
+    SV_PLDL3KEEP = 4,
+    /// Streaming fetch of the addressed location for reading to the L3 cache (i.e. memory only
+    /// used once)
+    SV_PLDL3STRM = 5,
+    /// Temporal fetch of the addressed location for writing to the L1 cache (i.e. allocate in
+    /// cache normally)
+    SV_PSTL1KEEP = 8,
+    /// Temporal fetch of the addressed location for writing to the L1 cache (i.e. memory only
+    /// used once)
+    SV_PSTL1STRM = 9,
+    /// Temporal fetch of the addressed location for writing to the L2 cache (i.e. allocate in
+    /// cache normally)
+    SV_PSTL2KEEP = 10,
+    /// Temporal fetch of the addressed location for writing to the L2 cache (i.e. memory only
+    /// used once)
+    SV_PSTL2STRM = 11,
+    /// Temporal fetch of the addressed location for writing to the L3 cache (i.e. allocate in
+    /// cache normally)
+    SV_PSTL3KEEP = 12,
+    /// Temporal fetch of the addressed location for writing to the L3 cache (i.e. memory only
+    /// used once)
+    SV_PSTL3STRM = 13,
+}
+
+#[cfg(test)]
+#[path = "ld_st_tests_aarch64.rs"]
+mod ld_st_tests;
diff --git a/crates/core_arch/src/aarch64/sve2/generated.rs b/crates/core_arch/src/aarch64/sve2/generated.rs
new file mode 100644
index 0000000000..e11c20e5dd
--- /dev/null
+++ b/crates/core_arch/src/aarch64/sve2/generated.rs
@@ -0,0 +1,23881 @@
+// This code is automatically generated. DO NOT MODIFY.
+//
+// Instead, modify `crates/stdarch-gen-arm/spec/` and run the following command to re-generate this file:
+//
+// ```
+// cargo run --bin=stdarch-gen-arm -- crates/stdarch-gen-arm/spec
+// ```
+#![allow(improper_ctypes)]
+
+#[cfg(test)]
+use stdarch_test::assert_instr;
+
+use super::*;
+
+#[doc = "Absolute difference and accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaba[_s8])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(saba))]
+pub fn svaba_s8(op1: svint8_t, op2: svint8_t, op3: svint8_t) -> svint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.saba.nxv16i8")]
+        fn _svaba_s8(op1: svint8_t, op2: svint8_t, op3: svint8_t) -> svint8_t;
+    }
+    unsafe { _svaba_s8(op1, op2, op3) }
+}
+#[doc = "Absolute difference and accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaba[_n_s8])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(saba))]
+pub fn svaba_n_s8(op1: svint8_t, op2: svint8_t, op3: i8) -> svint8_t {
+    svaba_s8(op1, op2, svdup_n_s8(op3))
+}
+#[doc = "Absolute difference and accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaba[_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(saba))]
+pub fn svaba_s16(op1: svint16_t, op2: svint16_t, op3: svint16_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.saba.nxv8i16")]
+        fn _svaba_s16(op1: svint16_t, op2: svint16_t, op3: svint16_t) -> svint16_t;
+    }
+    unsafe { _svaba_s16(op1, op2, op3) }
+}
+#[doc = "Absolute difference and accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaba[_n_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(saba))]
+pub fn svaba_n_s16(op1: svint16_t, op2: svint16_t, op3: i16) -> svint16_t {
+    svaba_s16(op1, op2, svdup_n_s16(op3))
+}
+#[doc = "Absolute difference and accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaba[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(saba))]
+pub fn svaba_s32(op1: svint32_t, op2: svint32_t, op3: svint32_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.saba.nxv4i32")]
+        fn _svaba_s32(op1: svint32_t, op2: svint32_t, op3: svint32_t) -> svint32_t;
+    }
+    unsafe { _svaba_s32(op1, op2, op3) }
+}
+#[doc = "Absolute difference and accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaba[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(saba))]
+pub fn svaba_n_s32(op1: svint32_t, op2: svint32_t, op3: i32) -> svint32_t {
+    svaba_s32(op1, op2, svdup_n_s32(op3))
+}
+#[doc = "Absolute difference and accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaba[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(saba))]
+pub fn svaba_s64(op1: svint64_t, op2: svint64_t, op3: svint64_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.saba.nxv2i64")]
+        fn _svaba_s64(op1: svint64_t, op2: svint64_t, op3: svint64_t) -> svint64_t;
+    }
+    unsafe { _svaba_s64(op1, op2, op3) }
+}
+#[doc = "Absolute difference and accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaba[_n_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(saba))]
+pub fn svaba_n_s64(op1: svint64_t, op2: svint64_t, op3: i64) -> svint64_t {
+    svaba_s64(op1, op2, svdup_n_s64(op3))
+}
+#[doc = "Absolute difference and accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaba[_u8])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uaba))]
+pub fn svaba_u8(op1: svuint8_t, op2: svuint8_t, op3: svuint8_t) -> svuint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uaba.nxv16i8")]
+        fn _svaba_u8(op1: svint8_t, op2: svint8_t, op3: svint8_t) -> svint8_t;
+    }
+    unsafe { _svaba_u8(op1.as_signed(), op2.as_signed(), op3.as_signed()).as_unsigned() }
+}
+#[doc = "Absolute difference and accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaba[_n_u8])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uaba))]
+pub fn svaba_n_u8(op1: svuint8_t, op2: svuint8_t, op3: u8) -> svuint8_t {
+    svaba_u8(op1, op2, svdup_n_u8(op3))
+}
+#[doc = "Absolute difference and accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaba[_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uaba))]
+pub fn svaba_u16(op1: svuint16_t, op2: svuint16_t, op3: svuint16_t) -> svuint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uaba.nxv8i16")]
+        fn _svaba_u16(op1: svint16_t, op2: svint16_t, op3: svint16_t) -> svint16_t;
+    }
+    unsafe { _svaba_u16(op1.as_signed(), op2.as_signed(), op3.as_signed()).as_unsigned() }
+}
+#[doc = "Absolute difference and accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaba[_n_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uaba))]
+pub fn svaba_n_u16(op1: svuint16_t, op2: svuint16_t, op3: u16) -> svuint16_t {
+    svaba_u16(op1, op2, svdup_n_u16(op3))
+}
+#[doc = "Absolute difference and accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaba[_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uaba))]
+pub fn svaba_u32(op1: svuint32_t, op2: svuint32_t, op3: svuint32_t) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uaba.nxv4i32")]
+        fn _svaba_u32(op1: svint32_t, op2: svint32_t, op3: svint32_t) -> svint32_t;
+    }
+    unsafe { _svaba_u32(op1.as_signed(), op2.as_signed(), op3.as_signed()).as_unsigned() }
+}
+#[doc = "Absolute difference and accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaba[_n_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uaba))]
+pub fn svaba_n_u32(op1: svuint32_t, op2: svuint32_t, op3: u32) -> svuint32_t {
+    svaba_u32(op1, op2, svdup_n_u32(op3))
+}
+#[doc = "Absolute difference and accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaba[_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uaba))]
+pub fn svaba_u64(op1: svuint64_t, op2: svuint64_t, op3: svuint64_t) -> svuint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uaba.nxv2i64")]
+        fn _svaba_u64(op1: svint64_t, op2: svint64_t, op3: svint64_t) -> svint64_t;
+    }
+    unsafe { _svaba_u64(op1.as_signed(), op2.as_signed(), op3.as_signed()).as_unsigned() }
+}
+#[doc = "Absolute difference and accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaba[_n_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uaba))]
+pub fn svaba_n_u64(op1: svuint64_t, op2: svuint64_t, op3: u64) -> svuint64_t {
+    svaba_u64(op1, op2, svdup_n_u64(op3))
+}
+#[doc = "Absolute difference long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabalb[_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sabalb))]
+pub fn svabalb_s16(op1: svint16_t, op2: svint8_t, op3: svint8_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sabalb.nxv8i16")]
+        fn _svabalb_s16(op1: svint16_t, op2: svint8_t, op3: svint8_t) -> svint16_t;
+    }
+    unsafe { _svabalb_s16(op1, op2, op3) }
+}
+#[doc = "Absolute difference long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabalb[_n_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sabalb))]
+pub fn svabalb_n_s16(op1: svint16_t, op2: svint8_t, op3: i8) -> svint16_t {
+    svabalb_s16(op1, op2, svdup_n_s8(op3))
+}
+#[doc = "Absolute difference long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabalb[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sabalb))]
+pub fn svabalb_s32(op1: svint32_t, op2: svint16_t, op3: svint16_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sabalb.nxv4i32")]
+        fn _svabalb_s32(op1: svint32_t, op2: svint16_t, op3: svint16_t) -> svint32_t;
+    }
+    unsafe { _svabalb_s32(op1, op2, op3) }
+}
+#[doc = "Absolute difference long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabalb[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sabalb))]
+pub fn svabalb_n_s32(op1: svint32_t, op2: svint16_t, op3: i16) -> svint32_t {
+    svabalb_s32(op1, op2, svdup_n_s16(op3))
+}
+#[doc = "Absolute difference long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabalb[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sabalb))]
+pub fn svabalb_s64(op1: svint64_t, op2: svint32_t, op3: svint32_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sabalb.nxv2i64")]
+        fn _svabalb_s64(op1: svint64_t, op2: svint32_t, op3: svint32_t) -> svint64_t;
+    }
+    unsafe { _svabalb_s64(op1, op2, op3) }
+}
+#[doc = "Absolute difference long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabalb[_n_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sabalb))]
+pub fn svabalb_n_s64(op1: svint64_t, op2: svint32_t, op3: i32) -> svint64_t {
+    svabalb_s64(op1, op2, svdup_n_s32(op3))
+}
+#[doc = "Absolute difference long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabalb[_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uabalb))]
+pub fn svabalb_u16(op1: svuint16_t, op2: svuint8_t, op3: svuint8_t) -> svuint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uabalb.nxv8i16")]
+        fn _svabalb_u16(op1: svint16_t, op2: svint8_t, op3: svint8_t) -> svint16_t;
+    }
+    unsafe { _svabalb_u16(op1.as_signed(), op2.as_signed(), op3.as_signed()).as_unsigned() }
+}
+#[doc = "Absolute difference long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabalb[_n_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uabalb))]
+pub fn svabalb_n_u16(op1: svuint16_t, op2: svuint8_t, op3: u8) -> svuint16_t {
+    svabalb_u16(op1, op2, svdup_n_u8(op3))
+}
+#[doc = "Absolute difference long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabalb[_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uabalb))]
+pub fn svabalb_u32(op1: svuint32_t, op2: svuint16_t, op3: svuint16_t) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uabalb.nxv4i32")]
+        fn _svabalb_u32(op1: svint32_t, op2: svint16_t, op3: svint16_t) -> svint32_t;
+    }
+    unsafe { _svabalb_u32(op1.as_signed(), op2.as_signed(), op3.as_signed()).as_unsigned() }
+}
+#[doc = "Absolute difference long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabalb[_n_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uabalb))]
+pub fn svabalb_n_u32(op1: svuint32_t, op2: svuint16_t, op3: u16) -> svuint32_t {
+    svabalb_u32(op1, op2, svdup_n_u16(op3))
+}
+#[doc = "Absolute difference long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabalb[_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uabalb))]
+pub fn svabalb_u64(op1: svuint64_t, op2: svuint32_t, op3: svuint32_t) -> svuint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uabalb.nxv2i64")]
+        fn _svabalb_u64(op1: svint64_t, op2: svint32_t, op3: svint32_t) -> svint64_t;
+    }
+    unsafe { _svabalb_u64(op1.as_signed(), op2.as_signed(), op3.as_signed()).as_unsigned() }
+}
+#[doc = "Absolute difference long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabalb[_n_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uabalb))]
+pub fn svabalb_n_u64(op1: svuint64_t, op2: svuint32_t, op3: u32) -> svuint64_t {
+    svabalb_u64(op1, op2, svdup_n_u32(op3))
+}
+#[doc = "Absolute difference long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabalt[_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sabalt))]
+pub fn svabalt_s16(op1: svint16_t, op2: svint8_t, op3: svint8_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sabalt.nxv8i16")]
+        fn _svabalt_s16(op1: svint16_t, op2: svint8_t, op3: svint8_t) -> svint16_t;
+    }
+    unsafe { _svabalt_s16(op1, op2, op3) }
+}
+#[doc = "Absolute difference long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabalt[_n_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sabalt))]
+pub fn svabalt_n_s16(op1: svint16_t, op2: svint8_t, op3: i8) -> svint16_t {
+    svabalt_s16(op1, op2, svdup_n_s8(op3))
+}
+#[doc = "Absolute difference long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabalt[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sabalt))]
+pub fn svabalt_s32(op1: svint32_t, op2: svint16_t, op3: svint16_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sabalt.nxv4i32")]
+        fn _svabalt_s32(op1: svint32_t, op2: svint16_t, op3: svint16_t) -> svint32_t;
+    }
+    unsafe { _svabalt_s32(op1, op2, op3) }
+}
+#[doc = "Absolute difference long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabalt[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sabalt))]
+pub fn svabalt_n_s32(op1: svint32_t, op2: svint16_t, op3: i16) -> svint32_t {
+    svabalt_s32(op1, op2, svdup_n_s16(op3))
+}
+#[doc = "Absolute difference long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabalt[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sabalt))]
+pub fn svabalt_s64(op1: svint64_t, op2: svint32_t, op3: svint32_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sabalt.nxv2i64")]
+        fn _svabalt_s64(op1: svint64_t, op2: svint32_t, op3: svint32_t) -> svint64_t;
+    }
+    unsafe { _svabalt_s64(op1, op2, op3) }
+}
+#[doc = "Absolute difference long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabalt[_n_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sabalt))]
+pub fn svabalt_n_s64(op1: svint64_t, op2: svint32_t, op3: i32) -> svint64_t {
+    svabalt_s64(op1, op2, svdup_n_s32(op3))
+}
+#[doc = "Absolute difference long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabalt[_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uabalt))]
+pub fn svabalt_u16(op1: svuint16_t, op2: svuint8_t, op3: svuint8_t) -> svuint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uabalt.nxv8i16")]
+        fn _svabalt_u16(op1: svint16_t, op2: svint8_t, op3: svint8_t) -> svint16_t;
+    }
+    unsafe { _svabalt_u16(op1.as_signed(), op2.as_signed(), op3.as_signed()).as_unsigned() }
+}
+#[doc = "Absolute difference long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabalt[_n_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uabalt))]
+pub fn svabalt_n_u16(op1: svuint16_t, op2: svuint8_t, op3: u8) -> svuint16_t {
+    svabalt_u16(op1, op2, svdup_n_u8(op3))
+}
+#[doc = "Absolute difference long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabalt[_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uabalt))]
+pub fn svabalt_u32(op1: svuint32_t, op2: svuint16_t, op3: svuint16_t) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uabalt.nxv4i32")]
+        fn _svabalt_u32(op1: svint32_t, op2: svint16_t, op3: svint16_t) -> svint32_t;
+    }
+    unsafe { _svabalt_u32(op1.as_signed(), op2.as_signed(), op3.as_signed()).as_unsigned() }
+}
+#[doc = "Absolute difference long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabalt[_n_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uabalt))]
+pub fn svabalt_n_u32(op1: svuint32_t, op2: svuint16_t, op3: u16) -> svuint32_t {
+    svabalt_u32(op1, op2, svdup_n_u16(op3))
+}
+#[doc = "Absolute difference long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabalt[_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uabalt))]
+pub fn svabalt_u64(op1: svuint64_t, op2: svuint32_t, op3: svuint32_t) -> svuint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uabalt.nxv2i64")]
+        fn _svabalt_u64(op1: svint64_t, op2: svint32_t, op3: svint32_t) -> svint64_t;
+    }
+    unsafe { _svabalt_u64(op1.as_signed(), op2.as_signed(), op3.as_signed()).as_unsigned() }
+}
+#[doc = "Absolute difference long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabalt[_n_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uabalt))]
+pub fn svabalt_n_u64(op1: svuint64_t, op2: svuint32_t, op3: u32) -> svuint64_t {
+    svabalt_u64(op1, op2, svdup_n_u32(op3))
+}
+#[doc = "Absolute difference long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabdlb[_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sabdlb))]
+pub fn svabdlb_s16(op1: svint8_t, op2: svint8_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sabdlb.nxv8i16")]
+        fn _svabdlb_s16(op1: svint8_t, op2: svint8_t) -> svint16_t;
+    }
+    unsafe { _svabdlb_s16(op1, op2) }
+}
+#[doc = "Absolute difference long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabdlb[_n_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sabdlb))]
+pub fn svabdlb_n_s16(op1: svint8_t, op2: i8) -> svint16_t {
+    svabdlb_s16(op1, svdup_n_s8(op2))
+}
+#[doc = "Absolute difference long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabdlb[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sabdlb))]
+pub fn svabdlb_s32(op1: svint16_t, op2: svint16_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sabdlb.nxv4i32")]
+        fn _svabdlb_s32(op1: svint16_t, op2: svint16_t) -> svint32_t;
+    }
+    unsafe { _svabdlb_s32(op1, op2) }
+}
+#[doc = "Absolute difference long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabdlb[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sabdlb))]
+pub fn svabdlb_n_s32(op1: svint16_t, op2: i16) -> svint32_t {
+    svabdlb_s32(op1, svdup_n_s16(op2))
+}
+#[doc = "Absolute difference long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabdlb[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sabdlb))]
+pub fn svabdlb_s64(op1: svint32_t, op2: svint32_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sabdlb.nxv2i64")]
+        fn _svabdlb_s64(op1: svint32_t, op2: svint32_t) -> svint64_t;
+    }
+    unsafe { _svabdlb_s64(op1, op2) }
+}
+#[doc = "Absolute difference long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabdlb[_n_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sabdlb))]
+pub fn svabdlb_n_s64(op1: svint32_t, op2: i32) -> svint64_t {
+    svabdlb_s64(op1, svdup_n_s32(op2))
+}
+#[doc = "Absolute difference long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabdlb[_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uabdlb))]
+pub fn svabdlb_u16(op1: svuint8_t, op2: svuint8_t) -> svuint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uabdlb.nxv8i16")]
+        fn _svabdlb_u16(op1: svint8_t, op2: svint8_t) -> svint16_t;
+    }
+    unsafe { _svabdlb_u16(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Absolute difference long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabdlb[_n_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uabdlb))]
+pub fn svabdlb_n_u16(op1: svuint8_t, op2: u8) -> svuint16_t {
+    svabdlb_u16(op1, svdup_n_u8(op2))
+}
+#[doc = "Absolute difference long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabdlb[_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uabdlb))]
+pub fn svabdlb_u32(op1: svuint16_t, op2: svuint16_t) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uabdlb.nxv4i32")]
+        fn _svabdlb_u32(op1: svint16_t, op2: svint16_t) -> svint32_t;
+    }
+    unsafe { _svabdlb_u32(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Absolute difference long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabdlb[_n_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uabdlb))]
+pub fn svabdlb_n_u32(op1: svuint16_t, op2: u16) -> svuint32_t {
+    svabdlb_u32(op1, svdup_n_u16(op2))
+}
+#[doc = "Absolute difference long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabdlb[_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uabdlb))]
+pub fn svabdlb_u64(op1: svuint32_t, op2: svuint32_t) -> svuint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uabdlb.nxv2i64")]
+        fn _svabdlb_u64(op1: svint32_t, op2: svint32_t) -> svint64_t;
+    }
+    unsafe { _svabdlb_u64(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Absolute difference long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabdlb[_n_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uabdlb))]
+pub fn svabdlb_n_u64(op1: svuint32_t, op2: u32) -> svuint64_t {
+    svabdlb_u64(op1, svdup_n_u32(op2))
+}
+#[doc = "Absolute difference long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabdlt[_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sabdlt))]
+pub fn svabdlt_s16(op1: svint8_t, op2: svint8_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sabdlt.nxv8i16")]
+        fn _svabdlt_s16(op1: svint8_t, op2: svint8_t) -> svint16_t;
+    }
+    unsafe { _svabdlt_s16(op1, op2) }
+}
+#[doc = "Absolute difference long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabdlt[_n_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sabdlt))]
+pub fn svabdlt_n_s16(op1: svint8_t, op2: i8) -> svint16_t {
+    svabdlt_s16(op1, svdup_n_s8(op2))
+}
+#[doc = "Absolute difference long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabdlt[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sabdlt))]
+pub fn svabdlt_s32(op1: svint16_t, op2: svint16_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sabdlt.nxv4i32")]
+        fn _svabdlt_s32(op1: svint16_t, op2: svint16_t) -> svint32_t;
+    }
+    unsafe { _svabdlt_s32(op1, op2) }
+}
+#[doc = "Absolute difference long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabdlt[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sabdlt))]
+pub fn svabdlt_n_s32(op1: svint16_t, op2: i16) -> svint32_t {
+    svabdlt_s32(op1, svdup_n_s16(op2))
+}
+#[doc = "Absolute difference long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabdlt[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sabdlt))]
+pub fn svabdlt_s64(op1: svint32_t, op2: svint32_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sabdlt.nxv2i64")]
+        fn _svabdlt_s64(op1: svint32_t, op2: svint32_t) -> svint64_t;
+    }
+    unsafe { _svabdlt_s64(op1, op2) }
+}
+#[doc = "Absolute difference long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabdlt[_n_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sabdlt))]
+pub fn svabdlt_n_s64(op1: svint32_t, op2: i32) -> svint64_t {
+    svabdlt_s64(op1, svdup_n_s32(op2))
+}
+#[doc = "Absolute difference long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabdlt[_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uabdlt))]
+pub fn svabdlt_u16(op1: svuint8_t, op2: svuint8_t) -> svuint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uabdlt.nxv8i16")]
+        fn _svabdlt_u16(op1: svint8_t, op2: svint8_t) -> svint16_t;
+    }
+    unsafe { _svabdlt_u16(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Absolute difference long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabdlt[_n_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uabdlt))]
+pub fn svabdlt_n_u16(op1: svuint8_t, op2: u8) -> svuint16_t {
+    svabdlt_u16(op1, svdup_n_u8(op2))
+}
+#[doc = "Absolute difference long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabdlt[_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uabdlt))]
+pub fn svabdlt_u32(op1: svuint16_t, op2: svuint16_t) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uabdlt.nxv4i32")]
+        fn _svabdlt_u32(op1: svint16_t, op2: svint16_t) -> svint32_t;
+    }
+    unsafe { _svabdlt_u32(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Absolute difference long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabdlt[_n_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uabdlt))]
+pub fn svabdlt_n_u32(op1: svuint16_t, op2: u16) -> svuint32_t {
+    svabdlt_u32(op1, svdup_n_u16(op2))
+}
+#[doc = "Absolute difference long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabdlt[_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uabdlt))]
+pub fn svabdlt_u64(op1: svuint32_t, op2: svuint32_t) -> svuint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uabdlt.nxv2i64")]
+        fn _svabdlt_u64(op1: svint32_t, op2: svint32_t) -> svint64_t;
+    }
+    unsafe { _svabdlt_u64(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Absolute difference long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svabdlt[_n_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uabdlt))]
+pub fn svabdlt_n_u64(op1: svuint32_t, op2: u32) -> svuint64_t {
+    svabdlt_u64(op1, svdup_n_u32(op2))
+}
+#[doc = "Add and accumulate long pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadalp[_s16]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sadalp))]
+pub fn svadalp_s16_m(pg: svbool_t, op1: svint16_t, op2: svint8_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sadalp.nxv8i16")]
+        fn _svadalp_s16_m(pg: svbool8_t, op1: svint16_t, op2: svint8_t) -> svint16_t;
+    }
+    unsafe { _svadalp_s16_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Add and accumulate long pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadalp[_s16]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sadalp))]
+pub fn svadalp_s16_x(pg: svbool_t, op1: svint16_t, op2: svint8_t) -> svint16_t {
+    svadalp_s16_m(pg, op1, op2)
+}
+#[doc = "Add and accumulate long pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadalp[_s16]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sadalp))]
+pub fn svadalp_s16_z(pg: svbool_t, op1: svint16_t, op2: svint8_t) -> svint16_t {
+    svadalp_s16_m(pg, svsel_s16(pg, op1, svdup_n_s16(0)), op2)
+}
+#[doc = "Add and accumulate long pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadalp[_s32]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sadalp))]
+pub fn svadalp_s32_m(pg: svbool_t, op1: svint32_t, op2: svint16_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sadalp.nxv4i32")]
+        fn _svadalp_s32_m(pg: svbool4_t, op1: svint32_t, op2: svint16_t) -> svint32_t;
+    }
+    unsafe { _svadalp_s32_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Add and accumulate long pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadalp[_s32]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sadalp))]
+pub fn svadalp_s32_x(pg: svbool_t, op1: svint32_t, op2: svint16_t) -> svint32_t {
+    svadalp_s32_m(pg, op1, op2)
+}
+#[doc = "Add and accumulate long pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadalp[_s32]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sadalp))]
+pub fn svadalp_s32_z(pg: svbool_t, op1: svint32_t, op2: svint16_t) -> svint32_t {
+    svadalp_s32_m(pg, svsel_s32(pg, op1, svdup_n_s32(0)), op2)
+}
+#[doc = "Add and accumulate long pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadalp[_s64]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sadalp))]
+pub fn svadalp_s64_m(pg: svbool_t, op1: svint64_t, op2: svint32_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sadalp.nxv2i64")]
+        fn _svadalp_s64_m(pg: svbool2_t, op1: svint64_t, op2: svint32_t) -> svint64_t;
+    }
+    unsafe { _svadalp_s64_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Add and accumulate long pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadalp[_s64]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sadalp))]
+pub fn svadalp_s64_x(pg: svbool_t, op1: svint64_t, op2: svint32_t) -> svint64_t {
+    svadalp_s64_m(pg, op1, op2)
+}
+#[doc = "Add and accumulate long pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadalp[_s64]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sadalp))]
+pub fn svadalp_s64_z(pg: svbool_t, op1: svint64_t, op2: svint32_t) -> svint64_t {
+    svadalp_s64_m(pg, svsel_s64(pg, op1, svdup_n_s64(0)), op2)
+}
+#[doc = "Add and accumulate long pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadalp[_u16]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uadalp))]
+pub fn svadalp_u16_m(pg: svbool_t, op1: svuint16_t, op2: svuint8_t) -> svuint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uadalp.nxv8i16")]
+        fn _svadalp_u16_m(pg: svbool8_t, op1: svint16_t, op2: svint8_t) -> svint16_t;
+    }
+    unsafe { _svadalp_u16_m(pg.sve_into(), op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Add and accumulate long pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadalp[_u16]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uadalp))]
+pub fn svadalp_u16_x(pg: svbool_t, op1: svuint16_t, op2: svuint8_t) -> svuint16_t {
+    svadalp_u16_m(pg, op1, op2)
+}
+#[doc = "Add and accumulate long pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadalp[_u16]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uadalp))]
+pub fn svadalp_u16_z(pg: svbool_t, op1: svuint16_t, op2: svuint8_t) -> svuint16_t {
+    svadalp_u16_m(pg, svsel_u16(pg, op1, svdup_n_u16(0)), op2)
+}
+#[doc = "Add and accumulate long pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadalp[_u32]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uadalp))]
+pub fn svadalp_u32_m(pg: svbool_t, op1: svuint32_t, op2: svuint16_t) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uadalp.nxv4i32")]
+        fn _svadalp_u32_m(pg: svbool4_t, op1: svint32_t, op2: svint16_t) -> svint32_t;
+    }
+    unsafe { _svadalp_u32_m(pg.sve_into(), op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Add and accumulate long pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadalp[_u32]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uadalp))]
+pub fn svadalp_u32_x(pg: svbool_t, op1: svuint32_t, op2: svuint16_t) -> svuint32_t {
+    svadalp_u32_m(pg, op1, op2)
+}
+#[doc = "Add and accumulate long pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadalp[_u32]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uadalp))]
+pub fn svadalp_u32_z(pg: svbool_t, op1: svuint32_t, op2: svuint16_t) -> svuint32_t {
+    svadalp_u32_m(pg, svsel_u32(pg, op1, svdup_n_u32(0)), op2)
+}
+#[doc = "Add and accumulate long pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadalp[_u64]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uadalp))]
+pub fn svadalp_u64_m(pg: svbool_t, op1: svuint64_t, op2: svuint32_t) -> svuint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uadalp.nxv2i64")]
+        fn _svadalp_u64_m(pg: svbool2_t, op1: svint64_t, op2: svint32_t) -> svint64_t;
+    }
+    unsafe { _svadalp_u64_m(pg.sve_into(), op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Add and accumulate long pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadalp[_u64]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uadalp))]
+pub fn svadalp_u64_x(pg: svbool_t, op1: svuint64_t, op2: svuint32_t) -> svuint64_t {
+    svadalp_u64_m(pg, op1, op2)
+}
+#[doc = "Add and accumulate long pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadalp[_u64]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uadalp))]
+pub fn svadalp_u64_z(pg: svbool_t, op1: svuint64_t, op2: svuint32_t) -> svuint64_t {
+    svadalp_u64_m(pg, svsel_u64(pg, op1, svdup_n_u64(0)), op2)
+}
+#[doc = "Add with carry long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadclb[_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(adclb))]
+pub fn svadclb_u32(op1: svuint32_t, op2: svuint32_t, op3: svuint32_t) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.adclb.nxv4i32")]
+        fn _svadclb_u32(op1: svint32_t, op2: svint32_t, op3: svint32_t) -> svint32_t;
+    }
+    unsafe { _svadclb_u32(op1.as_signed(), op2.as_signed(), op3.as_signed()).as_unsigned() }
+}
+#[doc = "Add with carry long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadclb[_n_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(adclb))]
+pub fn svadclb_n_u32(op1: svuint32_t, op2: svuint32_t, op3: u32) -> svuint32_t {
+    svadclb_u32(op1, op2, svdup_n_u32(op3))
+}
+#[doc = "Add with carry long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadclb[_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(adclb))]
+pub fn svadclb_u64(op1: svuint64_t, op2: svuint64_t, op3: svuint64_t) -> svuint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.adclb.nxv2i64")]
+        fn _svadclb_u64(op1: svint64_t, op2: svint64_t, op3: svint64_t) -> svint64_t;
+    }
+    unsafe { _svadclb_u64(op1.as_signed(), op2.as_signed(), op3.as_signed()).as_unsigned() }
+}
+#[doc = "Add with carry long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadclb[_n_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(adclb))]
+pub fn svadclb_n_u64(op1: svuint64_t, op2: svuint64_t, op3: u64) -> svuint64_t {
+    svadclb_u64(op1, op2, svdup_n_u64(op3))
+}
+#[doc = "Add with carry long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadclt[_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(adclt))]
+pub fn svadclt_u32(op1: svuint32_t, op2: svuint32_t, op3: svuint32_t) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.adclt.nxv4i32")]
+        fn _svadclt_u32(op1: svint32_t, op2: svint32_t, op3: svint32_t) -> svint32_t;
+    }
+    unsafe { _svadclt_u32(op1.as_signed(), op2.as_signed(), op3.as_signed()).as_unsigned() }
+}
+#[doc = "Add with carry long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadclt[_n_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(adclt))]
+pub fn svadclt_n_u32(op1: svuint32_t, op2: svuint32_t, op3: u32) -> svuint32_t {
+    svadclt_u32(op1, op2, svdup_n_u32(op3))
+}
+#[doc = "Add with carry long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadclt[_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(adclt))]
+pub fn svadclt_u64(op1: svuint64_t, op2: svuint64_t, op3: svuint64_t) -> svuint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.adclt.nxv2i64")]
+        fn _svadclt_u64(op1: svint64_t, op2: svint64_t, op3: svint64_t) -> svint64_t;
+    }
+    unsafe { _svadclt_u64(op1.as_signed(), op2.as_signed(), op3.as_signed()).as_unsigned() }
+}
+#[doc = "Add with carry long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svadclt[_n_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(adclt))]
+pub fn svadclt_n_u64(op1: svuint64_t, op2: svuint64_t, op3: u64) -> svuint64_t {
+    svadclt_u64(op1, op2, svdup_n_u64(op3))
+}
+#[doc = "Add narrow high part (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddhnb[_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(addhnb))]
+pub fn svaddhnb_s16(op1: svint16_t, op2: svint16_t) -> svint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.addhnb.nxv8i16")]
+        fn _svaddhnb_s16(op1: svint16_t, op2: svint16_t) -> svint8_t;
+    }
+    unsafe { _svaddhnb_s16(op1, op2) }
+}
+#[doc = "Add narrow high part (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddhnb[_n_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(addhnb))]
+pub fn svaddhnb_n_s16(op1: svint16_t, op2: i16) -> svint8_t {
+    svaddhnb_s16(op1, svdup_n_s16(op2))
+}
+#[doc = "Add narrow high part (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddhnb[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(addhnb))]
+pub fn svaddhnb_s32(op1: svint32_t, op2: svint32_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.addhnb.nxv4i32")]
+        fn _svaddhnb_s32(op1: svint32_t, op2: svint32_t) -> svint16_t;
+    }
+    unsafe { _svaddhnb_s32(op1, op2) }
+}
+#[doc = "Add narrow high part (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddhnb[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(addhnb))]
+pub fn svaddhnb_n_s32(op1: svint32_t, op2: i32) -> svint16_t {
+    svaddhnb_s32(op1, svdup_n_s32(op2))
+}
+#[doc = "Add narrow high part (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddhnb[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(addhnb))]
+pub fn svaddhnb_s64(op1: svint64_t, op2: svint64_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.addhnb.nxv2i64")]
+        fn _svaddhnb_s64(op1: svint64_t, op2: svint64_t) -> svint32_t;
+    }
+    unsafe { _svaddhnb_s64(op1, op2) }
+}
+#[doc = "Add narrow high part (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddhnb[_n_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(addhnb))]
+pub fn svaddhnb_n_s64(op1: svint64_t, op2: i64) -> svint32_t {
+    svaddhnb_s64(op1, svdup_n_s64(op2))
+}
+#[doc = "Add narrow high part (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddhnb[_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(addhnb))]
+pub fn svaddhnb_u16(op1: svuint16_t, op2: svuint16_t) -> svuint8_t {
+    unsafe { svaddhnb_s16(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Add narrow high part (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddhnb[_n_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(addhnb))]
+pub fn svaddhnb_n_u16(op1: svuint16_t, op2: u16) -> svuint8_t {
+    svaddhnb_u16(op1, svdup_n_u16(op2))
+}
+#[doc = "Add narrow high part (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddhnb[_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(addhnb))]
+pub fn svaddhnb_u32(op1: svuint32_t, op2: svuint32_t) -> svuint16_t {
+    unsafe { svaddhnb_s32(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Add narrow high part (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddhnb[_n_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(addhnb))]
+pub fn svaddhnb_n_u32(op1: svuint32_t, op2: u32) -> svuint16_t {
+    svaddhnb_u32(op1, svdup_n_u32(op2))
+}
+#[doc = "Add narrow high part (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddhnb[_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(addhnb))]
+pub fn svaddhnb_u64(op1: svuint64_t, op2: svuint64_t) -> svuint32_t {
+    unsafe { svaddhnb_s64(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Add narrow high part (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddhnb[_n_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(addhnb))]
+pub fn svaddhnb_n_u64(op1: svuint64_t, op2: u64) -> svuint32_t {
+    svaddhnb_u64(op1, svdup_n_u64(op2))
+}
+#[doc = "Add narrow high part (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddhnt[_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(addhnt))]
+pub fn svaddhnt_s16(even: svint8_t, op1: svint16_t, op2: svint16_t) -> svint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.addhnt.nxv8i16")]
+        fn _svaddhnt_s16(even: svint8_t, op1: svint16_t, op2: svint16_t) -> svint8_t;
+    }
+    unsafe { _svaddhnt_s16(even, op1, op2) }
+}
+#[doc = "Add narrow high part (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddhnt[_n_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(addhnt))]
+pub fn svaddhnt_n_s16(even: svint8_t, op1: svint16_t, op2: i16) -> svint8_t {
+    svaddhnt_s16(even, op1, svdup_n_s16(op2))
+}
+#[doc = "Add narrow high part (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddhnt[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(addhnt))]
+pub fn svaddhnt_s32(even: svint16_t, op1: svint32_t, op2: svint32_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.addhnt.nxv4i32")]
+        fn _svaddhnt_s32(even: svint16_t, op1: svint32_t, op2: svint32_t) -> svint16_t;
+    }
+    unsafe { _svaddhnt_s32(even, op1, op2) }
+}
+#[doc = "Add narrow high part (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddhnt[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(addhnt))]
+pub fn svaddhnt_n_s32(even: svint16_t, op1: svint32_t, op2: i32) -> svint16_t {
+    svaddhnt_s32(even, op1, svdup_n_s32(op2))
+}
+#[doc = "Add narrow high part (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddhnt[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(addhnt))]
+pub fn svaddhnt_s64(even: svint32_t, op1: svint64_t, op2: svint64_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.addhnt.nxv2i64")]
+        fn _svaddhnt_s64(even: svint32_t, op1: svint64_t, op2: svint64_t) -> svint32_t;
+    }
+    unsafe { _svaddhnt_s64(even, op1, op2) }
+}
+#[doc = "Add narrow high part (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddhnt[_n_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(addhnt))]
+pub fn svaddhnt_n_s64(even: svint32_t, op1: svint64_t, op2: i64) -> svint32_t {
+    svaddhnt_s64(even, op1, svdup_n_s64(op2))
+}
+#[doc = "Add narrow high part (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddhnt[_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(addhnt))]
+pub fn svaddhnt_u16(even: svuint8_t, op1: svuint16_t, op2: svuint16_t) -> svuint8_t {
+    unsafe { svaddhnt_s16(even.as_signed(), op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Add narrow high part (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddhnt[_n_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(addhnt))]
+pub fn svaddhnt_n_u16(even: svuint8_t, op1: svuint16_t, op2: u16) -> svuint8_t {
+    svaddhnt_u16(even, op1, svdup_n_u16(op2))
+}
+#[doc = "Add narrow high part (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddhnt[_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(addhnt))]
+pub fn svaddhnt_u32(even: svuint16_t, op1: svuint32_t, op2: svuint32_t) -> svuint16_t {
+    unsafe { svaddhnt_s32(even.as_signed(), op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Add narrow high part (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddhnt[_n_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(addhnt))]
+pub fn svaddhnt_n_u32(even: svuint16_t, op1: svuint32_t, op2: u32) -> svuint16_t {
+    svaddhnt_u32(even, op1, svdup_n_u32(op2))
+}
+#[doc = "Add narrow high part (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddhnt[_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(addhnt))]
+pub fn svaddhnt_u64(even: svuint32_t, op1: svuint64_t, op2: svuint64_t) -> svuint32_t {
+    unsafe { svaddhnt_s64(even.as_signed(), op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Add narrow high part (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddhnt[_n_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(addhnt))]
+pub fn svaddhnt_n_u64(even: svuint32_t, op1: svuint64_t, op2: u64) -> svuint32_t {
+    svaddhnt_u64(even, op1, svdup_n_u64(op2))
+}
+#[doc = "Add long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddlb[_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(saddlb))]
+pub fn svaddlb_s16(op1: svint8_t, op2: svint8_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.saddlb.nxv8i16")]
+        fn _svaddlb_s16(op1: svint8_t, op2: svint8_t) -> svint16_t;
+    }
+    unsafe { _svaddlb_s16(op1, op2) }
+}
+#[doc = "Add long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddlb[_n_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(saddlb))]
+pub fn svaddlb_n_s16(op1: svint8_t, op2: i8) -> svint16_t {
+    svaddlb_s16(op1, svdup_n_s8(op2))
+}
+#[doc = "Add long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddlb[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(saddlb))]
+pub fn svaddlb_s32(op1: svint16_t, op2: svint16_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.saddlb.nxv4i32")]
+        fn _svaddlb_s32(op1: svint16_t, op2: svint16_t) -> svint32_t;
+    }
+    unsafe { _svaddlb_s32(op1, op2) }
+}
+#[doc = "Add long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddlb[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(saddlb))]
+pub fn svaddlb_n_s32(op1: svint16_t, op2: i16) -> svint32_t {
+    svaddlb_s32(op1, svdup_n_s16(op2))
+}
+#[doc = "Add long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddlb[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(saddlb))]
+pub fn svaddlb_s64(op1: svint32_t, op2: svint32_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.saddlb.nxv2i64")]
+        fn _svaddlb_s64(op1: svint32_t, op2: svint32_t) -> svint64_t;
+    }
+    unsafe { _svaddlb_s64(op1, op2) }
+}
+#[doc = "Add long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddlb[_n_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(saddlb))]
+pub fn svaddlb_n_s64(op1: svint32_t, op2: i32) -> svint64_t {
+    svaddlb_s64(op1, svdup_n_s32(op2))
+}
+#[doc = "Add long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddlb[_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uaddlb))]
+pub fn svaddlb_u16(op1: svuint8_t, op2: svuint8_t) -> svuint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uaddlb.nxv8i16")]
+        fn _svaddlb_u16(op1: svint8_t, op2: svint8_t) -> svint16_t;
+    }
+    unsafe { _svaddlb_u16(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Add long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddlb[_n_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uaddlb))]
+pub fn svaddlb_n_u16(op1: svuint8_t, op2: u8) -> svuint16_t {
+    svaddlb_u16(op1, svdup_n_u8(op2))
+}
+#[doc = "Add long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddlb[_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uaddlb))]
+pub fn svaddlb_u32(op1: svuint16_t, op2: svuint16_t) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uaddlb.nxv4i32")]
+        fn _svaddlb_u32(op1: svint16_t, op2: svint16_t) -> svint32_t;
+    }
+    unsafe { _svaddlb_u32(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Add long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddlb[_n_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uaddlb))]
+pub fn svaddlb_n_u32(op1: svuint16_t, op2: u16) -> svuint32_t {
+    svaddlb_u32(op1, svdup_n_u16(op2))
+}
+#[doc = "Add long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddlb[_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uaddlb))]
+pub fn svaddlb_u64(op1: svuint32_t, op2: svuint32_t) -> svuint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uaddlb.nxv2i64")]
+        fn _svaddlb_u64(op1: svint32_t, op2: svint32_t) -> svint64_t;
+    }
+    unsafe { _svaddlb_u64(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Add long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddlb[_n_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uaddlb))]
+pub fn svaddlb_n_u64(op1: svuint32_t, op2: u32) -> svuint64_t {
+    svaddlb_u64(op1, svdup_n_u32(op2))
+}
+#[doc = "Add long (bottom + top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddlbt[_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(saddlbt))]
+pub fn svaddlbt_s16(op1: svint8_t, op2: svint8_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.saddlbt.nxv8i16"
+        )]
+        fn _svaddlbt_s16(op1: svint8_t, op2: svint8_t) -> svint16_t;
+    }
+    unsafe { _svaddlbt_s16(op1, op2) }
+}
+#[doc = "Add long (bottom + top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddlbt[_n_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(saddlbt))]
+pub fn svaddlbt_n_s16(op1: svint8_t, op2: i8) -> svint16_t {
+    svaddlbt_s16(op1, svdup_n_s8(op2))
+}
+#[doc = "Add long (bottom + top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddlbt[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(saddlbt))]
+pub fn svaddlbt_s32(op1: svint16_t, op2: svint16_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.saddlbt.nxv4i32"
+        )]
+        fn _svaddlbt_s32(op1: svint16_t, op2: svint16_t) -> svint32_t;
+    }
+    unsafe { _svaddlbt_s32(op1, op2) }
+}
+#[doc = "Add long (bottom + top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddlbt[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(saddlbt))]
+pub fn svaddlbt_n_s32(op1: svint16_t, op2: i16) -> svint32_t {
+    svaddlbt_s32(op1, svdup_n_s16(op2))
+}
+#[doc = "Add long (bottom + top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddlbt[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(saddlbt))]
+pub fn svaddlbt_s64(op1: svint32_t, op2: svint32_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.saddlbt.nxv2i64"
+        )]
+        fn _svaddlbt_s64(op1: svint32_t, op2: svint32_t) -> svint64_t;
+    }
+    unsafe { _svaddlbt_s64(op1, op2) }
+}
+#[doc = "Add long (bottom + top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddlbt[_n_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(saddlbt))]
+pub fn svaddlbt_n_s64(op1: svint32_t, op2: i32) -> svint64_t {
+    svaddlbt_s64(op1, svdup_n_s32(op2))
+}
+#[doc = "Add long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddlt[_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(saddlt))]
+pub fn svaddlt_s16(op1: svint8_t, op2: svint8_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.saddlt.nxv8i16")]
+        fn _svaddlt_s16(op1: svint8_t, op2: svint8_t) -> svint16_t;
+    }
+    unsafe { _svaddlt_s16(op1, op2) }
+}
+#[doc = "Add long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddlt[_n_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(saddlt))]
+pub fn svaddlt_n_s16(op1: svint8_t, op2: i8) -> svint16_t {
+    svaddlt_s16(op1, svdup_n_s8(op2))
+}
+#[doc = "Add long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddlt[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(saddlt))]
+pub fn svaddlt_s32(op1: svint16_t, op2: svint16_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.saddlt.nxv4i32")]
+        fn _svaddlt_s32(op1: svint16_t, op2: svint16_t) -> svint32_t;
+    }
+    unsafe { _svaddlt_s32(op1, op2) }
+}
+#[doc = "Add long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddlt[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(saddlt))]
+pub fn svaddlt_n_s32(op1: svint16_t, op2: i16) -> svint32_t {
+    svaddlt_s32(op1, svdup_n_s16(op2))
+}
+#[doc = "Add long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddlt[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(saddlt))]
+pub fn svaddlt_s64(op1: svint32_t, op2: svint32_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.saddlt.nxv2i64")]
+        fn _svaddlt_s64(op1: svint32_t, op2: svint32_t) -> svint64_t;
+    }
+    unsafe { _svaddlt_s64(op1, op2) }
+}
+#[doc = "Add long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddlt[_n_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(saddlt))]
+pub fn svaddlt_n_s64(op1: svint32_t, op2: i32) -> svint64_t {
+    svaddlt_s64(op1, svdup_n_s32(op2))
+}
+#[doc = "Add long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddlt[_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uaddlt))]
+pub fn svaddlt_u16(op1: svuint8_t, op2: svuint8_t) -> svuint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uaddlt.nxv8i16")]
+        fn _svaddlt_u16(op1: svint8_t, op2: svint8_t) -> svint16_t;
+    }
+    unsafe { _svaddlt_u16(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Add long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddlt[_n_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uaddlt))]
+pub fn svaddlt_n_u16(op1: svuint8_t, op2: u8) -> svuint16_t {
+    svaddlt_u16(op1, svdup_n_u8(op2))
+}
+#[doc = "Add long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddlt[_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uaddlt))]
+pub fn svaddlt_u32(op1: svuint16_t, op2: svuint16_t) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uaddlt.nxv4i32")]
+        fn _svaddlt_u32(op1: svint16_t, op2: svint16_t) -> svint32_t;
+    }
+    unsafe { _svaddlt_u32(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Add long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddlt[_n_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uaddlt))]
+pub fn svaddlt_n_u32(op1: svuint16_t, op2: u16) -> svuint32_t {
+    svaddlt_u32(op1, svdup_n_u16(op2))
+}
+#[doc = "Add long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddlt[_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uaddlt))]
+pub fn svaddlt_u64(op1: svuint32_t, op2: svuint32_t) -> svuint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uaddlt.nxv2i64")]
+        fn _svaddlt_u64(op1: svint32_t, op2: svint32_t) -> svint64_t;
+    }
+    unsafe { _svaddlt_u64(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Add long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddlt[_n_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uaddlt))]
+pub fn svaddlt_n_u64(op1: svuint32_t, op2: u32) -> svuint64_t {
+    svaddlt_u64(op1, svdup_n_u32(op2))
+}
+#[doc = "Add pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddp[_f32]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(faddp))]
+pub fn svaddp_f32_m(pg: svbool_t, op1: svfloat32_t, op2: svfloat32_t) -> svfloat32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.faddp.nxv4f32")]
+        fn _svaddp_f32_m(pg: svbool4_t, op1: svfloat32_t, op2: svfloat32_t) -> svfloat32_t;
+    }
+    unsafe { _svaddp_f32_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Add pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddp[_f32]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(faddp))]
+pub fn svaddp_f32_x(pg: svbool_t, op1: svfloat32_t, op2: svfloat32_t) -> svfloat32_t {
+    svaddp_f32_m(pg, op1, op2)
+}
+#[doc = "Add pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddp[_f64]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(faddp))]
+pub fn svaddp_f64_m(pg: svbool_t, op1: svfloat64_t, op2: svfloat64_t) -> svfloat64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.faddp.nxv2f64")]
+        fn _svaddp_f64_m(pg: svbool2_t, op1: svfloat64_t, op2: svfloat64_t) -> svfloat64_t;
+    }
+    unsafe { _svaddp_f64_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Add pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddp[_f64]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(faddp))]
+pub fn svaddp_f64_x(pg: svbool_t, op1: svfloat64_t, op2: svfloat64_t) -> svfloat64_t {
+    svaddp_f64_m(pg, op1, op2)
+}
+#[doc = "Add pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddp[_s8]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(addp))]
+pub fn svaddp_s8_m(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.addp.nxv16i8")]
+        fn _svaddp_s8_m(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t;
+    }
+    unsafe { _svaddp_s8_m(pg, op1, op2) }
+}
+#[doc = "Add pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddp[_s8]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(addp))]
+pub fn svaddp_s8_x(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t {
+    svaddp_s8_m(pg, op1, op2)
+}
+#[doc = "Add pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddp[_s16]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(addp))]
+pub fn svaddp_s16_m(pg: svbool_t, op1: svint16_t, op2: svint16_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.addp.nxv8i16")]
+        fn _svaddp_s16_m(pg: svbool8_t, op1: svint16_t, op2: svint16_t) -> svint16_t;
+    }
+    unsafe { _svaddp_s16_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Add pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddp[_s16]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(addp))]
+pub fn svaddp_s16_x(pg: svbool_t, op1: svint16_t, op2: svint16_t) -> svint16_t {
+    svaddp_s16_m(pg, op1, op2)
+}
+#[doc = "Add pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddp[_s32]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(addp))]
+pub fn svaddp_s32_m(pg: svbool_t, op1: svint32_t, op2: svint32_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.addp.nxv4i32")]
+        fn _svaddp_s32_m(pg: svbool4_t, op1: svint32_t, op2: svint32_t) -> svint32_t;
+    }
+    unsafe { _svaddp_s32_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Add pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddp[_s32]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(addp))]
+pub fn svaddp_s32_x(pg: svbool_t, op1: svint32_t, op2: svint32_t) -> svint32_t {
+    svaddp_s32_m(pg, op1, op2)
+}
+#[doc = "Add pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddp[_s64]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(addp))]
+pub fn svaddp_s64_m(pg: svbool_t, op1: svint64_t, op2: svint64_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.addp.nxv2i64")]
+        fn _svaddp_s64_m(pg: svbool2_t, op1: svint64_t, op2: svint64_t) -> svint64_t;
+    }
+    unsafe { _svaddp_s64_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Add pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddp[_s64]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(addp))]
+pub fn svaddp_s64_x(pg: svbool_t, op1: svint64_t, op2: svint64_t) -> svint64_t {
+    svaddp_s64_m(pg, op1, op2)
+}
+#[doc = "Add pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddp[_u8]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(addp))]
+pub fn svaddp_u8_m(pg: svbool_t, op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    unsafe { svaddp_s8_m(pg, op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Add pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddp[_u8]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(addp))]
+pub fn svaddp_u8_x(pg: svbool_t, op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    svaddp_u8_m(pg, op1, op2)
+}
+#[doc = "Add pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddp[_u16]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(addp))]
+pub fn svaddp_u16_m(pg: svbool_t, op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    unsafe { svaddp_s16_m(pg, op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Add pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddp[_u16]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(addp))]
+pub fn svaddp_u16_x(pg: svbool_t, op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    svaddp_u16_m(pg, op1, op2)
+}
+#[doc = "Add pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddp[_u32]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(addp))]
+pub fn svaddp_u32_m(pg: svbool_t, op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    unsafe { svaddp_s32_m(pg, op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Add pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddp[_u32]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(addp))]
+pub fn svaddp_u32_x(pg: svbool_t, op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    svaddp_u32_m(pg, op1, op2)
+}
+#[doc = "Add pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddp[_u64]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(addp))]
+pub fn svaddp_u64_m(pg: svbool_t, op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    unsafe { svaddp_s64_m(pg, op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Add pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddp[_u64]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(addp))]
+pub fn svaddp_u64_x(pg: svbool_t, op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    svaddp_u64_m(pg, op1, op2)
+}
+#[doc = "Add wide (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddwb[_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(saddwb))]
+pub fn svaddwb_s16(op1: svint16_t, op2: svint8_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.saddwb.nxv8i16")]
+        fn _svaddwb_s16(op1: svint16_t, op2: svint8_t) -> svint16_t;
+    }
+    unsafe { _svaddwb_s16(op1, op2) }
+}
+#[doc = "Add wide (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddwb[_n_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(saddwb))]
+pub fn svaddwb_n_s16(op1: svint16_t, op2: i8) -> svint16_t {
+    svaddwb_s16(op1, svdup_n_s8(op2))
+}
+#[doc = "Add wide (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddwb[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(saddwb))]
+pub fn svaddwb_s32(op1: svint32_t, op2: svint16_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.saddwb.nxv4i32")]
+        fn _svaddwb_s32(op1: svint32_t, op2: svint16_t) -> svint32_t;
+    }
+    unsafe { _svaddwb_s32(op1, op2) }
+}
+#[doc = "Add wide (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddwb[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(saddwb))]
+pub fn svaddwb_n_s32(op1: svint32_t, op2: i16) -> svint32_t {
+    svaddwb_s32(op1, svdup_n_s16(op2))
+}
+#[doc = "Add wide (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddwb[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(saddwb))]
+pub fn svaddwb_s64(op1: svint64_t, op2: svint32_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.saddwb.nxv2i64")]
+        fn _svaddwb_s64(op1: svint64_t, op2: svint32_t) -> svint64_t;
+    }
+    unsafe { _svaddwb_s64(op1, op2) }
+}
+#[doc = "Add wide (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddwb[_n_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(saddwb))]
+pub fn svaddwb_n_s64(op1: svint64_t, op2: i32) -> svint64_t {
+    svaddwb_s64(op1, svdup_n_s32(op2))
+}
+#[doc = "Add wide (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddwb[_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uaddwb))]
+pub fn svaddwb_u16(op1: svuint16_t, op2: svuint8_t) -> svuint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uaddwb.nxv8i16")]
+        fn _svaddwb_u16(op1: svint16_t, op2: svint8_t) -> svint16_t;
+    }
+    unsafe { _svaddwb_u16(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Add wide (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddwb[_n_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uaddwb))]
+pub fn svaddwb_n_u16(op1: svuint16_t, op2: u8) -> svuint16_t {
+    svaddwb_u16(op1, svdup_n_u8(op2))
+}
+#[doc = "Add wide (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddwb[_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uaddwb))]
+pub fn svaddwb_u32(op1: svuint32_t, op2: svuint16_t) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uaddwb.nxv4i32")]
+        fn _svaddwb_u32(op1: svint32_t, op2: svint16_t) -> svint32_t;
+    }
+    unsafe { _svaddwb_u32(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Add wide (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddwb[_n_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uaddwb))]
+pub fn svaddwb_n_u32(op1: svuint32_t, op2: u16) -> svuint32_t {
+    svaddwb_u32(op1, svdup_n_u16(op2))
+}
+#[doc = "Add wide (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddwb[_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uaddwb))]
+pub fn svaddwb_u64(op1: svuint64_t, op2: svuint32_t) -> svuint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uaddwb.nxv2i64")]
+        fn _svaddwb_u64(op1: svint64_t, op2: svint32_t) -> svint64_t;
+    }
+    unsafe { _svaddwb_u64(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Add wide (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddwb[_n_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uaddwb))]
+pub fn svaddwb_n_u64(op1: svuint64_t, op2: u32) -> svuint64_t {
+    svaddwb_u64(op1, svdup_n_u32(op2))
+}
+#[doc = "Add wide (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddwt[_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(saddwt))]
+pub fn svaddwt_s16(op1: svint16_t, op2: svint8_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.saddwt.nxv8i16")]
+        fn _svaddwt_s16(op1: svint16_t, op2: svint8_t) -> svint16_t;
+    }
+    unsafe { _svaddwt_s16(op1, op2) }
+}
+#[doc = "Add wide (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddwt[_n_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(saddwt))]
+pub fn svaddwt_n_s16(op1: svint16_t, op2: i8) -> svint16_t {
+    svaddwt_s16(op1, svdup_n_s8(op2))
+}
+#[doc = "Add wide (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddwt[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(saddwt))]
+pub fn svaddwt_s32(op1: svint32_t, op2: svint16_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.saddwt.nxv4i32")]
+        fn _svaddwt_s32(op1: svint32_t, op2: svint16_t) -> svint32_t;
+    }
+    unsafe { _svaddwt_s32(op1, op2) }
+}
+#[doc = "Add wide (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddwt[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(saddwt))]
+pub fn svaddwt_n_s32(op1: svint32_t, op2: i16) -> svint32_t {
+    svaddwt_s32(op1, svdup_n_s16(op2))
+}
+#[doc = "Add wide (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddwt[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(saddwt))]
+pub fn svaddwt_s64(op1: svint64_t, op2: svint32_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.saddwt.nxv2i64")]
+        fn _svaddwt_s64(op1: svint64_t, op2: svint32_t) -> svint64_t;
+    }
+    unsafe { _svaddwt_s64(op1, op2) }
+}
+#[doc = "Add wide (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddwt[_n_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(saddwt))]
+pub fn svaddwt_n_s64(op1: svint64_t, op2: i32) -> svint64_t {
+    svaddwt_s64(op1, svdup_n_s32(op2))
+}
+#[doc = "Add wide (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddwt[_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uaddwt))]
+pub fn svaddwt_u16(op1: svuint16_t, op2: svuint8_t) -> svuint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uaddwt.nxv8i16")]
+        fn _svaddwt_u16(op1: svint16_t, op2: svint8_t) -> svint16_t;
+    }
+    unsafe { _svaddwt_u16(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Add wide (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddwt[_n_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uaddwt))]
+pub fn svaddwt_n_u16(op1: svuint16_t, op2: u8) -> svuint16_t {
+    svaddwt_u16(op1, svdup_n_u8(op2))
+}
+#[doc = "Add wide (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddwt[_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uaddwt))]
+pub fn svaddwt_u32(op1: svuint32_t, op2: svuint16_t) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uaddwt.nxv4i32")]
+        fn _svaddwt_u32(op1: svint32_t, op2: svint16_t) -> svint32_t;
+    }
+    unsafe { _svaddwt_u32(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Add wide (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddwt[_n_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uaddwt))]
+pub fn svaddwt_n_u32(op1: svuint32_t, op2: u16) -> svuint32_t {
+    svaddwt_u32(op1, svdup_n_u16(op2))
+}
+#[doc = "Add wide (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddwt[_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uaddwt))]
+pub fn svaddwt_u64(op1: svuint64_t, op2: svuint32_t) -> svuint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uaddwt.nxv2i64")]
+        fn _svaddwt_u64(op1: svint64_t, op2: svint32_t) -> svint64_t;
+    }
+    unsafe { _svaddwt_u64(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Add wide (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaddwt[_n_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uaddwt))]
+pub fn svaddwt_n_u64(op1: svuint64_t, op2: u32) -> svuint64_t {
+    svaddwt_u64(op1, svdup_n_u32(op2))
+}
+#[doc = "AES single round decryption"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaesd[_u8])"]
+#[inline]
+#[target_feature(enable = "sve,sve2,sve2-aes")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(aesd))]
+pub fn svaesd_u8(op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.aesd")]
+        fn _svaesd_u8(op1: svint8_t, op2: svint8_t) -> svint8_t;
+    }
+    unsafe { _svaesd_u8(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "AES single round encryption"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaese[_u8])"]
+#[inline]
+#[target_feature(enable = "sve,sve2,sve2-aes")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(aese))]
+pub fn svaese_u8(op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.aese")]
+        fn _svaese_u8(op1: svint8_t, op2: svint8_t) -> svint8_t;
+    }
+    unsafe { _svaese_u8(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "AES inverse mix columns"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaesimc[_u8])"]
+#[inline]
+#[target_feature(enable = "sve,sve2,sve2-aes")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(aesimc))]
+pub fn svaesimc_u8(op: svuint8_t) -> svuint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.aesimc")]
+        fn _svaesimc_u8(op: svint8_t) -> svint8_t;
+    }
+    unsafe { _svaesimc_u8(op.as_signed()).as_unsigned() }
+}
+#[doc = "AES mix columns"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svaesmc[_u8])"]
+#[inline]
+#[target_feature(enable = "sve,sve2,sve2-aes")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(aesmc))]
+pub fn svaesmc_u8(op: svuint8_t) -> svuint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.aesmc")]
+        fn _svaesmc_u8(op: svint8_t) -> svint8_t;
+    }
+    unsafe { _svaesmc_u8(op.as_signed()).as_unsigned() }
+}
+#[doc = "Bitwise clear and exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbcax[_s8])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bcax))]
+pub fn svbcax_s8(op1: svint8_t, op2: svint8_t, op3: svint8_t) -> svint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.bcax.nxv16i8")]
+        fn _svbcax_s8(op1: svint8_t, op2: svint8_t, op3: svint8_t) -> svint8_t;
+    }
+    unsafe { _svbcax_s8(op1, op2, op3) }
+}
+#[doc = "Bitwise clear and exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbcax[_n_s8])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bcax))]
+pub fn svbcax_n_s8(op1: svint8_t, op2: svint8_t, op3: i8) -> svint8_t {
+    svbcax_s8(op1, op2, svdup_n_s8(op3))
+}
+#[doc = "Bitwise clear and exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbcax[_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bcax))]
+pub fn svbcax_s16(op1: svint16_t, op2: svint16_t, op3: svint16_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.bcax.nxv8i16")]
+        fn _svbcax_s16(op1: svint16_t, op2: svint16_t, op3: svint16_t) -> svint16_t;
+    }
+    unsafe { _svbcax_s16(op1, op2, op3) }
+}
+#[doc = "Bitwise clear and exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbcax[_n_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bcax))]
+pub fn svbcax_n_s16(op1: svint16_t, op2: svint16_t, op3: i16) -> svint16_t {
+    svbcax_s16(op1, op2, svdup_n_s16(op3))
+}
+#[doc = "Bitwise clear and exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbcax[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bcax))]
+pub fn svbcax_s32(op1: svint32_t, op2: svint32_t, op3: svint32_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.bcax.nxv4i32")]
+        fn _svbcax_s32(op1: svint32_t, op2: svint32_t, op3: svint32_t) -> svint32_t;
+    }
+    unsafe { _svbcax_s32(op1, op2, op3) }
+}
+#[doc = "Bitwise clear and exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbcax[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bcax))]
+pub fn svbcax_n_s32(op1: svint32_t, op2: svint32_t, op3: i32) -> svint32_t {
+    svbcax_s32(op1, op2, svdup_n_s32(op3))
+}
+#[doc = "Bitwise clear and exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbcax[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bcax))]
+pub fn svbcax_s64(op1: svint64_t, op2: svint64_t, op3: svint64_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.bcax.nxv2i64")]
+        fn _svbcax_s64(op1: svint64_t, op2: svint64_t, op3: svint64_t) -> svint64_t;
+    }
+    unsafe { _svbcax_s64(op1, op2, op3) }
+}
+#[doc = "Bitwise clear and exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbcax[_n_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bcax))]
+pub fn svbcax_n_s64(op1: svint64_t, op2: svint64_t, op3: i64) -> svint64_t {
+    svbcax_s64(op1, op2, svdup_n_s64(op3))
+}
+#[doc = "Bitwise clear and exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbcax[_u8])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bcax))]
+pub fn svbcax_u8(op1: svuint8_t, op2: svuint8_t, op3: svuint8_t) -> svuint8_t {
+    unsafe { svbcax_s8(op1.as_signed(), op2.as_signed(), op3.as_signed()).as_unsigned() }
+}
+#[doc = "Bitwise clear and exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbcax[_n_u8])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bcax))]
+pub fn svbcax_n_u8(op1: svuint8_t, op2: svuint8_t, op3: u8) -> svuint8_t {
+    svbcax_u8(op1, op2, svdup_n_u8(op3))
+}
+#[doc = "Bitwise clear and exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbcax[_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bcax))]
+pub fn svbcax_u16(op1: svuint16_t, op2: svuint16_t, op3: svuint16_t) -> svuint16_t {
+    unsafe { svbcax_s16(op1.as_signed(), op2.as_signed(), op3.as_signed()).as_unsigned() }
+}
+#[doc = "Bitwise clear and exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbcax[_n_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bcax))]
+pub fn svbcax_n_u16(op1: svuint16_t, op2: svuint16_t, op3: u16) -> svuint16_t {
+    svbcax_u16(op1, op2, svdup_n_u16(op3))
+}
+#[doc = "Bitwise clear and exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbcax[_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bcax))]
+pub fn svbcax_u32(op1: svuint32_t, op2: svuint32_t, op3: svuint32_t) -> svuint32_t {
+    unsafe { svbcax_s32(op1.as_signed(), op2.as_signed(), op3.as_signed()).as_unsigned() }
+}
+#[doc = "Bitwise clear and exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbcax[_n_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bcax))]
+pub fn svbcax_n_u32(op1: svuint32_t, op2: svuint32_t, op3: u32) -> svuint32_t {
+    svbcax_u32(op1, op2, svdup_n_u32(op3))
+}
+#[doc = "Bitwise clear and exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbcax[_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bcax))]
+pub fn svbcax_u64(op1: svuint64_t, op2: svuint64_t, op3: svuint64_t) -> svuint64_t {
+    unsafe { svbcax_s64(op1.as_signed(), op2.as_signed(), op3.as_signed()).as_unsigned() }
+}
+#[doc = "Bitwise clear and exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbcax[_n_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bcax))]
+pub fn svbcax_n_u64(op1: svuint64_t, op2: svuint64_t, op3: u64) -> svuint64_t {
+    svbcax_u64(op1, op2, svdup_n_u64(op3))
+}
+#[doc = "Scatter lower bits into positions selected by bitmask"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbdep[_u8])"]
+#[inline]
+#[target_feature(enable = "sve,sve2,sve2-bitperm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bdep))]
+pub fn svbdep_u8(op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.bdep.x.nxv16i8")]
+        fn _svbdep_u8(op1: svint8_t, op2: svint8_t) -> svint8_t;
+    }
+    unsafe { _svbdep_u8(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Scatter lower bits into positions selected by bitmask"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbdep[_n_u8])"]
+#[inline]
+#[target_feature(enable = "sve,sve2,sve2-bitperm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bdep))]
+pub fn svbdep_n_u8(op1: svuint8_t, op2: u8) -> svuint8_t {
+    svbdep_u8(op1, svdup_n_u8(op2))
+}
+#[doc = "Scatter lower bits into positions selected by bitmask"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbdep[_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2,sve2-bitperm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bdep))]
+pub fn svbdep_u16(op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.bdep.x.nxv8i16")]
+        fn _svbdep_u16(op1: svint16_t, op2: svint16_t) -> svint16_t;
+    }
+    unsafe { _svbdep_u16(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Scatter lower bits into positions selected by bitmask"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbdep[_n_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2,sve2-bitperm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bdep))]
+pub fn svbdep_n_u16(op1: svuint16_t, op2: u16) -> svuint16_t {
+    svbdep_u16(op1, svdup_n_u16(op2))
+}
+#[doc = "Scatter lower bits into positions selected by bitmask"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbdep[_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2,sve2-bitperm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bdep))]
+pub fn svbdep_u32(op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.bdep.x.nxv4i32")]
+        fn _svbdep_u32(op1: svint32_t, op2: svint32_t) -> svint32_t;
+    }
+    unsafe { _svbdep_u32(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Scatter lower bits into positions selected by bitmask"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbdep[_n_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2,sve2-bitperm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bdep))]
+pub fn svbdep_n_u32(op1: svuint32_t, op2: u32) -> svuint32_t {
+    svbdep_u32(op1, svdup_n_u32(op2))
+}
+#[doc = "Scatter lower bits into positions selected by bitmask"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbdep[_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2,sve2-bitperm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bdep))]
+pub fn svbdep_u64(op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.bdep.x.nxv2i64")]
+        fn _svbdep_u64(op1: svint64_t, op2: svint64_t) -> svint64_t;
+    }
+    unsafe { _svbdep_u64(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Scatter lower bits into positions selected by bitmask"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbdep[_n_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2,sve2-bitperm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bdep))]
+pub fn svbdep_n_u64(op1: svuint64_t, op2: u64) -> svuint64_t {
+    svbdep_u64(op1, svdup_n_u64(op2))
+}
+#[doc = "Gather lower bits from positions selected by bitmask"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbext[_u8])"]
+#[inline]
+#[target_feature(enable = "sve,sve2,sve2-bitperm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bext))]
+pub fn svbext_u8(op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.bext.x.nxv16i8")]
+        fn _svbext_u8(op1: svint8_t, op2: svint8_t) -> svint8_t;
+    }
+    unsafe { _svbext_u8(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Gather lower bits from positions selected by bitmask"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbext[_n_u8])"]
+#[inline]
+#[target_feature(enable = "sve,sve2,sve2-bitperm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bext))]
+pub fn svbext_n_u8(op1: svuint8_t, op2: u8) -> svuint8_t {
+    svbext_u8(op1, svdup_n_u8(op2))
+}
+#[doc = "Gather lower bits from positions selected by bitmask"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbext[_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2,sve2-bitperm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bext))]
+pub fn svbext_u16(op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.bext.x.nxv8i16")]
+        fn _svbext_u16(op1: svint16_t, op2: svint16_t) -> svint16_t;
+    }
+    unsafe { _svbext_u16(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Gather lower bits from positions selected by bitmask"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbext[_n_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2,sve2-bitperm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bext))]
+pub fn svbext_n_u16(op1: svuint16_t, op2: u16) -> svuint16_t {
+    svbext_u16(op1, svdup_n_u16(op2))
+}
+#[doc = "Gather lower bits from positions selected by bitmask"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbext[_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2,sve2-bitperm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bext))]
+pub fn svbext_u32(op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.bext.x.nxv4i32")]
+        fn _svbext_u32(op1: svint32_t, op2: svint32_t) -> svint32_t;
+    }
+    unsafe { _svbext_u32(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Gather lower bits from positions selected by bitmask"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbext[_n_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2,sve2-bitperm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bext))]
+pub fn svbext_n_u32(op1: svuint32_t, op2: u32) -> svuint32_t {
+    svbext_u32(op1, svdup_n_u32(op2))
+}
+#[doc = "Gather lower bits from positions selected by bitmask"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbext[_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2,sve2-bitperm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bext))]
+pub fn svbext_u64(op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.bext.x.nxv2i64")]
+        fn _svbext_u64(op1: svint64_t, op2: svint64_t) -> svint64_t;
+    }
+    unsafe { _svbext_u64(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Gather lower bits from positions selected by bitmask"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbext[_n_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2,sve2-bitperm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bext))]
+pub fn svbext_n_u64(op1: svuint64_t, op2: u64) -> svuint64_t {
+    svbext_u64(op1, svdup_n_u64(op2))
+}
+#[doc = "Group bits to right or left as selected by bitmask"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbgrp[_u8])"]
+#[inline]
+#[target_feature(enable = "sve,sve2,sve2-bitperm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bgrp))]
+pub fn svbgrp_u8(op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.bgrp.x.nxv16i8")]
+        fn _svbgrp_u8(op1: svint8_t, op2: svint8_t) -> svint8_t;
+    }
+    unsafe { _svbgrp_u8(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Group bits to right or left as selected by bitmask"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbgrp[_n_u8])"]
+#[inline]
+#[target_feature(enable = "sve,sve2,sve2-bitperm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bgrp))]
+pub fn svbgrp_n_u8(op1: svuint8_t, op2: u8) -> svuint8_t {
+    svbgrp_u8(op1, svdup_n_u8(op2))
+}
+#[doc = "Group bits to right or left as selected by bitmask"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbgrp[_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2,sve2-bitperm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bgrp))]
+pub fn svbgrp_u16(op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.bgrp.x.nxv8i16")]
+        fn _svbgrp_u16(op1: svint16_t, op2: svint16_t) -> svint16_t;
+    }
+    unsafe { _svbgrp_u16(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Group bits to right or left as selected by bitmask"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbgrp[_n_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2,sve2-bitperm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bgrp))]
+pub fn svbgrp_n_u16(op1: svuint16_t, op2: u16) -> svuint16_t {
+    svbgrp_u16(op1, svdup_n_u16(op2))
+}
+#[doc = "Group bits to right or left as selected by bitmask"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbgrp[_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2,sve2-bitperm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bgrp))]
+pub fn svbgrp_u32(op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.bgrp.x.nxv4i32")]
+        fn _svbgrp_u32(op1: svint32_t, op2: svint32_t) -> svint32_t;
+    }
+    unsafe { _svbgrp_u32(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Group bits to right or left as selected by bitmask"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbgrp[_n_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2,sve2-bitperm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bgrp))]
+pub fn svbgrp_n_u32(op1: svuint32_t, op2: u32) -> svuint32_t {
+    svbgrp_u32(op1, svdup_n_u32(op2))
+}
+#[doc = "Group bits to right or left as selected by bitmask"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbgrp[_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2,sve2-bitperm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bgrp))]
+pub fn svbgrp_u64(op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.bgrp.x.nxv2i64")]
+        fn _svbgrp_u64(op1: svint64_t, op2: svint64_t) -> svint64_t;
+    }
+    unsafe { _svbgrp_u64(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Group bits to right or left as selected by bitmask"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbgrp[_n_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2,sve2-bitperm")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bgrp))]
+pub fn svbgrp_n_u64(op1: svuint64_t, op2: u64) -> svuint64_t {
+    svbgrp_u64(op1, svdup_n_u64(op2))
+}
+#[doc = "Bitwise select with first input inverted"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbsl1n[_s8])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bsl1n))]
+pub fn svbsl1n_s8(op1: svint8_t, op2: svint8_t, op3: svint8_t) -> svint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.bsl1n.nxv16i8")]
+        fn _svbsl1n_s8(op1: svint8_t, op2: svint8_t, op3: svint8_t) -> svint8_t;
+    }
+    unsafe { _svbsl1n_s8(op1, op2, op3) }
+}
+#[doc = "Bitwise select with first input inverted"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbsl1n[_n_s8])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bsl1n))]
+pub fn svbsl1n_n_s8(op1: svint8_t, op2: svint8_t, op3: i8) -> svint8_t {
+    svbsl1n_s8(op1, op2, svdup_n_s8(op3))
+}
+#[doc = "Bitwise select with first input inverted"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbsl1n[_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bsl1n))]
+pub fn svbsl1n_s16(op1: svint16_t, op2: svint16_t, op3: svint16_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.bsl1n.nxv8i16")]
+        fn _svbsl1n_s16(op1: svint16_t, op2: svint16_t, op3: svint16_t) -> svint16_t;
+    }
+    unsafe { _svbsl1n_s16(op1, op2, op3) }
+}
+#[doc = "Bitwise select with first input inverted"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbsl1n[_n_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bsl1n))]
+pub fn svbsl1n_n_s16(op1: svint16_t, op2: svint16_t, op3: i16) -> svint16_t {
+    svbsl1n_s16(op1, op2, svdup_n_s16(op3))
+}
+#[doc = "Bitwise select with first input inverted"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbsl1n[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bsl1n))]
+pub fn svbsl1n_s32(op1: svint32_t, op2: svint32_t, op3: svint32_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.bsl1n.nxv4i32")]
+        fn _svbsl1n_s32(op1: svint32_t, op2: svint32_t, op3: svint32_t) -> svint32_t;
+    }
+    unsafe { _svbsl1n_s32(op1, op2, op3) }
+}
+#[doc = "Bitwise select with first input inverted"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbsl1n[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bsl1n))]
+pub fn svbsl1n_n_s32(op1: svint32_t, op2: svint32_t, op3: i32) -> svint32_t {
+    svbsl1n_s32(op1, op2, svdup_n_s32(op3))
+}
+#[doc = "Bitwise select with first input inverted"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbsl1n[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bsl1n))]
+pub fn svbsl1n_s64(op1: svint64_t, op2: svint64_t, op3: svint64_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.bsl1n.nxv2i64")]
+        fn _svbsl1n_s64(op1: svint64_t, op2: svint64_t, op3: svint64_t) -> svint64_t;
+    }
+    unsafe { _svbsl1n_s64(op1, op2, op3) }
+}
+#[doc = "Bitwise select with first input inverted"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbsl1n[_n_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bsl1n))]
+pub fn svbsl1n_n_s64(op1: svint64_t, op2: svint64_t, op3: i64) -> svint64_t {
+    svbsl1n_s64(op1, op2, svdup_n_s64(op3))
+}
+#[doc = "Bitwise select with first input inverted"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbsl1n[_u8])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bsl1n))]
+pub fn svbsl1n_u8(op1: svuint8_t, op2: svuint8_t, op3: svuint8_t) -> svuint8_t {
+    unsafe { svbsl1n_s8(op1.as_signed(), op2.as_signed(), op3.as_signed()).as_unsigned() }
+}
+#[doc = "Bitwise select with first input inverted"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbsl1n[_n_u8])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bsl1n))]
+pub fn svbsl1n_n_u8(op1: svuint8_t, op2: svuint8_t, op3: u8) -> svuint8_t {
+    svbsl1n_u8(op1, op2, svdup_n_u8(op3))
+}
+#[doc = "Bitwise select with first input inverted"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbsl1n[_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bsl1n))]
+pub fn svbsl1n_u16(op1: svuint16_t, op2: svuint16_t, op3: svuint16_t) -> svuint16_t {
+    unsafe { svbsl1n_s16(op1.as_signed(), op2.as_signed(), op3.as_signed()).as_unsigned() }
+}
+#[doc = "Bitwise select with first input inverted"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbsl1n[_n_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bsl1n))]
+pub fn svbsl1n_n_u16(op1: svuint16_t, op2: svuint16_t, op3: u16) -> svuint16_t {
+    svbsl1n_u16(op1, op2, svdup_n_u16(op3))
+}
+#[doc = "Bitwise select with first input inverted"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbsl1n[_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bsl1n))]
+pub fn svbsl1n_u32(op1: svuint32_t, op2: svuint32_t, op3: svuint32_t) -> svuint32_t {
+    unsafe { svbsl1n_s32(op1.as_signed(), op2.as_signed(), op3.as_signed()).as_unsigned() }
+}
+#[doc = "Bitwise select with first input inverted"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbsl1n[_n_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bsl1n))]
+pub fn svbsl1n_n_u32(op1: svuint32_t, op2: svuint32_t, op3: u32) -> svuint32_t {
+    svbsl1n_u32(op1, op2, svdup_n_u32(op3))
+}
+#[doc = "Bitwise select with first input inverted"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbsl1n[_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bsl1n))]
+pub fn svbsl1n_u64(op1: svuint64_t, op2: svuint64_t, op3: svuint64_t) -> svuint64_t {
+    unsafe { svbsl1n_s64(op1.as_signed(), op2.as_signed(), op3.as_signed()).as_unsigned() }
+}
+#[doc = "Bitwise select with first input inverted"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbsl1n[_n_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bsl1n))]
+pub fn svbsl1n_n_u64(op1: svuint64_t, op2: svuint64_t, op3: u64) -> svuint64_t {
+    svbsl1n_u64(op1, op2, svdup_n_u64(op3))
+}
+#[doc = "Bitwise select with second input inverted"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbsl2n[_s8])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bsl2n))]
+pub fn svbsl2n_s8(op1: svint8_t, op2: svint8_t, op3: svint8_t) -> svint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.bsl2n.nxv16i8")]
+        fn _svbsl2n_s8(op1: svint8_t, op2: svint8_t, op3: svint8_t) -> svint8_t;
+    }
+    unsafe { _svbsl2n_s8(op1, op2, op3) }
+}
+#[doc = "Bitwise select with second input inverted"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbsl2n[_n_s8])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bsl2n))]
+pub fn svbsl2n_n_s8(op1: svint8_t, op2: svint8_t, op3: i8) -> svint8_t {
+    svbsl2n_s8(op1, op2, svdup_n_s8(op3))
+}
+#[doc = "Bitwise select with second input inverted"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbsl2n[_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bsl2n))]
+pub fn svbsl2n_s16(op1: svint16_t, op2: svint16_t, op3: svint16_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.bsl2n.nxv8i16")]
+        fn _svbsl2n_s16(op1: svint16_t, op2: svint16_t, op3: svint16_t) -> svint16_t;
+    }
+    unsafe { _svbsl2n_s16(op1, op2, op3) }
+}
+#[doc = "Bitwise select with second input inverted"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbsl2n[_n_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bsl2n))]
+pub fn svbsl2n_n_s16(op1: svint16_t, op2: svint16_t, op3: i16) -> svint16_t {
+    svbsl2n_s16(op1, op2, svdup_n_s16(op3))
+}
+#[doc = "Bitwise select with second input inverted"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbsl2n[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bsl2n))]
+pub fn svbsl2n_s32(op1: svint32_t, op2: svint32_t, op3: svint32_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.bsl2n.nxv4i32")]
+        fn _svbsl2n_s32(op1: svint32_t, op2: svint32_t, op3: svint32_t) -> svint32_t;
+    }
+    unsafe { _svbsl2n_s32(op1, op2, op3) }
+}
+#[doc = "Bitwise select with second input inverted"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbsl2n[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bsl2n))]
+pub fn svbsl2n_n_s32(op1: svint32_t, op2: svint32_t, op3: i32) -> svint32_t {
+    svbsl2n_s32(op1, op2, svdup_n_s32(op3))
+}
+#[doc = "Bitwise select with second input inverted"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbsl2n[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bsl2n))]
+pub fn svbsl2n_s64(op1: svint64_t, op2: svint64_t, op3: svint64_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.bsl2n.nxv2i64")]
+        fn _svbsl2n_s64(op1: svint64_t, op2: svint64_t, op3: svint64_t) -> svint64_t;
+    }
+    unsafe { _svbsl2n_s64(op1, op2, op3) }
+}
+#[doc = "Bitwise select with second input inverted"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbsl2n[_n_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bsl2n))]
+pub fn svbsl2n_n_s64(op1: svint64_t, op2: svint64_t, op3: i64) -> svint64_t {
+    svbsl2n_s64(op1, op2, svdup_n_s64(op3))
+}
+#[doc = "Bitwise select with second input inverted"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbsl2n[_u8])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bsl2n))]
+pub fn svbsl2n_u8(op1: svuint8_t, op2: svuint8_t, op3: svuint8_t) -> svuint8_t {
+    unsafe { svbsl2n_s8(op1.as_signed(), op2.as_signed(), op3.as_signed()).as_unsigned() }
+}
+#[doc = "Bitwise select with second input inverted"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbsl2n[_n_u8])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bsl2n))]
+pub fn svbsl2n_n_u8(op1: svuint8_t, op2: svuint8_t, op3: u8) -> svuint8_t {
+    svbsl2n_u8(op1, op2, svdup_n_u8(op3))
+}
+#[doc = "Bitwise select with second input inverted"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbsl2n[_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bsl2n))]
+pub fn svbsl2n_u16(op1: svuint16_t, op2: svuint16_t, op3: svuint16_t) -> svuint16_t {
+    unsafe { svbsl2n_s16(op1.as_signed(), op2.as_signed(), op3.as_signed()).as_unsigned() }
+}
+#[doc = "Bitwise select with second input inverted"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbsl2n[_n_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bsl2n))]
+pub fn svbsl2n_n_u16(op1: svuint16_t, op2: svuint16_t, op3: u16) -> svuint16_t {
+    svbsl2n_u16(op1, op2, svdup_n_u16(op3))
+}
+#[doc = "Bitwise select with second input inverted"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbsl2n[_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bsl2n))]
+pub fn svbsl2n_u32(op1: svuint32_t, op2: svuint32_t, op3: svuint32_t) -> svuint32_t {
+    unsafe { svbsl2n_s32(op1.as_signed(), op2.as_signed(), op3.as_signed()).as_unsigned() }
+}
+#[doc = "Bitwise select with second input inverted"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbsl2n[_n_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bsl2n))]
+pub fn svbsl2n_n_u32(op1: svuint32_t, op2: svuint32_t, op3: u32) -> svuint32_t {
+    svbsl2n_u32(op1, op2, svdup_n_u32(op3))
+}
+#[doc = "Bitwise select with second input inverted"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbsl2n[_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bsl2n))]
+pub fn svbsl2n_u64(op1: svuint64_t, op2: svuint64_t, op3: svuint64_t) -> svuint64_t {
+    unsafe { svbsl2n_s64(op1.as_signed(), op2.as_signed(), op3.as_signed()).as_unsigned() }
+}
+#[doc = "Bitwise select with second input inverted"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbsl2n[_n_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bsl2n))]
+pub fn svbsl2n_n_u64(op1: svuint64_t, op2: svuint64_t, op3: u64) -> svuint64_t {
+    svbsl2n_u64(op1, op2, svdup_n_u64(op3))
+}
+#[doc = "Bitwise select"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbsl[_s8])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bsl))]
+pub fn svbsl_s8(op1: svint8_t, op2: svint8_t, op3: svint8_t) -> svint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.bsl.nxv16i8")]
+        fn _svbsl_s8(op1: svint8_t, op2: svint8_t, op3: svint8_t) -> svint8_t;
+    }
+    unsafe { _svbsl_s8(op1, op2, op3) }
+}
+#[doc = "Bitwise select"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbsl[_n_s8])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bsl))]
+pub fn svbsl_n_s8(op1: svint8_t, op2: svint8_t, op3: i8) -> svint8_t {
+    svbsl_s8(op1, op2, svdup_n_s8(op3))
+}
+#[doc = "Bitwise select"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbsl[_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bsl))]
+pub fn svbsl_s16(op1: svint16_t, op2: svint16_t, op3: svint16_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.bsl.nxv8i16")]
+        fn _svbsl_s16(op1: svint16_t, op2: svint16_t, op3: svint16_t) -> svint16_t;
+    }
+    unsafe { _svbsl_s16(op1, op2, op3) }
+}
+#[doc = "Bitwise select"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbsl[_n_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bsl))]
+pub fn svbsl_n_s16(op1: svint16_t, op2: svint16_t, op3: i16) -> svint16_t {
+    svbsl_s16(op1, op2, svdup_n_s16(op3))
+}
+#[doc = "Bitwise select"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbsl[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bsl))]
+pub fn svbsl_s32(op1: svint32_t, op2: svint32_t, op3: svint32_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.bsl.nxv4i32")]
+        fn _svbsl_s32(op1: svint32_t, op2: svint32_t, op3: svint32_t) -> svint32_t;
+    }
+    unsafe { _svbsl_s32(op1, op2, op3) }
+}
+#[doc = "Bitwise select"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbsl[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bsl))]
+pub fn svbsl_n_s32(op1: svint32_t, op2: svint32_t, op3: i32) -> svint32_t {
+    svbsl_s32(op1, op2, svdup_n_s32(op3))
+}
+#[doc = "Bitwise select"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbsl[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bsl))]
+pub fn svbsl_s64(op1: svint64_t, op2: svint64_t, op3: svint64_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.bsl.nxv2i64")]
+        fn _svbsl_s64(op1: svint64_t, op2: svint64_t, op3: svint64_t) -> svint64_t;
+    }
+    unsafe { _svbsl_s64(op1, op2, op3) }
+}
+#[doc = "Bitwise select"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbsl[_n_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bsl))]
+pub fn svbsl_n_s64(op1: svint64_t, op2: svint64_t, op3: i64) -> svint64_t {
+    svbsl_s64(op1, op2, svdup_n_s64(op3))
+}
+#[doc = "Bitwise select"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbsl[_u8])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bsl))]
+pub fn svbsl_u8(op1: svuint8_t, op2: svuint8_t, op3: svuint8_t) -> svuint8_t {
+    unsafe { svbsl_s8(op1.as_signed(), op2.as_signed(), op3.as_signed()).as_unsigned() }
+}
+#[doc = "Bitwise select"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbsl[_n_u8])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bsl))]
+pub fn svbsl_n_u8(op1: svuint8_t, op2: svuint8_t, op3: u8) -> svuint8_t {
+    svbsl_u8(op1, op2, svdup_n_u8(op3))
+}
+#[doc = "Bitwise select"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbsl[_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bsl))]
+pub fn svbsl_u16(op1: svuint16_t, op2: svuint16_t, op3: svuint16_t) -> svuint16_t {
+    unsafe { svbsl_s16(op1.as_signed(), op2.as_signed(), op3.as_signed()).as_unsigned() }
+}
+#[doc = "Bitwise select"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbsl[_n_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bsl))]
+pub fn svbsl_n_u16(op1: svuint16_t, op2: svuint16_t, op3: u16) -> svuint16_t {
+    svbsl_u16(op1, op2, svdup_n_u16(op3))
+}
+#[doc = "Bitwise select"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbsl[_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bsl))]
+pub fn svbsl_u32(op1: svuint32_t, op2: svuint32_t, op3: svuint32_t) -> svuint32_t {
+    unsafe { svbsl_s32(op1.as_signed(), op2.as_signed(), op3.as_signed()).as_unsigned() }
+}
+#[doc = "Bitwise select"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbsl[_n_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bsl))]
+pub fn svbsl_n_u32(op1: svuint32_t, op2: svuint32_t, op3: u32) -> svuint32_t {
+    svbsl_u32(op1, op2, svdup_n_u32(op3))
+}
+#[doc = "Bitwise select"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbsl[_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bsl))]
+pub fn svbsl_u64(op1: svuint64_t, op2: svuint64_t, op3: svuint64_t) -> svuint64_t {
+    unsafe { svbsl_s64(op1.as_signed(), op2.as_signed(), op3.as_signed()).as_unsigned() }
+}
+#[doc = "Bitwise select"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svbsl[_n_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(bsl))]
+pub fn svbsl_n_u64(op1: svuint64_t, op2: svuint64_t, op3: u64) -> svuint64_t {
+    svbsl_u64(op1, op2, svdup_n_u64(op3))
+}
+#[doc = "Complex add with rotate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcadd[_s8])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cadd, IMM_ROTATION = 90))]
+pub fn svcadd_s8<const IMM_ROTATION: i32>(op1: svint8_t, op2: svint8_t) -> svint8_t {
+    static_assert!(IMM_ROTATION == 90 || IMM_ROTATION == 270);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.cadd.x.nxv16i8")]
+        fn _svcadd_s8(op1: svint8_t, op2: svint8_t, imm_rotation: i32) -> svint8_t;
+    }
+    unsafe { _svcadd_s8(op1, op2, IMM_ROTATION) }
+}
+#[doc = "Complex add with rotate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcadd[_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cadd, IMM_ROTATION = 90))]
+pub fn svcadd_s16<const IMM_ROTATION: i32>(op1: svint16_t, op2: svint16_t) -> svint16_t {
+    static_assert!(IMM_ROTATION == 90 || IMM_ROTATION == 270);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.cadd.x.nxv8i16")]
+        fn _svcadd_s16(op1: svint16_t, op2: svint16_t, imm_rotation: i32) -> svint16_t;
+    }
+    unsafe { _svcadd_s16(op1, op2, IMM_ROTATION) }
+}
+#[doc = "Complex add with rotate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcadd[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cadd, IMM_ROTATION = 90))]
+pub fn svcadd_s32<const IMM_ROTATION: i32>(op1: svint32_t, op2: svint32_t) -> svint32_t {
+    static_assert!(IMM_ROTATION == 90 || IMM_ROTATION == 270);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.cadd.x.nxv4i32")]
+        fn _svcadd_s32(op1: svint32_t, op2: svint32_t, imm_rotation: i32) -> svint32_t;
+    }
+    unsafe { _svcadd_s32(op1, op2, IMM_ROTATION) }
+}
+#[doc = "Complex add with rotate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcadd[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cadd, IMM_ROTATION = 90))]
+pub fn svcadd_s64<const IMM_ROTATION: i32>(op1: svint64_t, op2: svint64_t) -> svint64_t {
+    static_assert!(IMM_ROTATION == 90 || IMM_ROTATION == 270);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.cadd.x.nxv2i64")]
+        fn _svcadd_s64(op1: svint64_t, op2: svint64_t, imm_rotation: i32) -> svint64_t;
+    }
+    unsafe { _svcadd_s64(op1, op2, IMM_ROTATION) }
+}
+#[doc = "Complex add with rotate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcadd[_u8])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cadd, IMM_ROTATION = 90))]
+pub fn svcadd_u8<const IMM_ROTATION: i32>(op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    static_assert!(IMM_ROTATION == 90 || IMM_ROTATION == 270);
+    unsafe { svcadd_s8::<IMM_ROTATION>(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Complex add with rotate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcadd[_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cadd, IMM_ROTATION = 90))]
+pub fn svcadd_u16<const IMM_ROTATION: i32>(op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    static_assert!(IMM_ROTATION == 90 || IMM_ROTATION == 270);
+    unsafe { svcadd_s16::<IMM_ROTATION>(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Complex add with rotate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcadd[_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cadd, IMM_ROTATION = 90))]
+pub fn svcadd_u32<const IMM_ROTATION: i32>(op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    static_assert!(IMM_ROTATION == 90 || IMM_ROTATION == 270);
+    unsafe { svcadd_s32::<IMM_ROTATION>(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Complex add with rotate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcadd[_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cadd, IMM_ROTATION = 90))]
+pub fn svcadd_u64<const IMM_ROTATION: i32>(op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    static_assert!(IMM_ROTATION == 90 || IMM_ROTATION == 270);
+    unsafe { svcadd_s64::<IMM_ROTATION>(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Complex dot product"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcdot_lane[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cdot, IMM_INDEX = 0, IMM_ROTATION = 90))]
+pub fn svcdot_lane_s32<const IMM_INDEX: i32, const IMM_ROTATION: i32>(
+    op1: svint32_t,
+    op2: svint8_t,
+    op3: svint8_t,
+) -> svint32_t {
+    static_assert_range!(IMM_INDEX, 0..=3);
+    static_assert!(
+        IMM_ROTATION == 0 || IMM_ROTATION == 90 || IMM_ROTATION == 180 || IMM_ROTATION == 270
+    );
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.cdot.lane.nxv4i32"
+        )]
+        fn _svcdot_lane_s32(
+            op1: svint32_t,
+            op2: svint8_t,
+            op3: svint8_t,
+            imm_index: i32,
+            imm_rotation: i32,
+        ) -> svint32_t;
+    }
+    unsafe { _svcdot_lane_s32(op1, op2, op3, IMM_INDEX, IMM_ROTATION) }
+}
+#[doc = "Complex dot product"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcdot_lane[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cdot, IMM_INDEX = 0, IMM_ROTATION = 90))]
+pub fn svcdot_lane_s64<const IMM_INDEX: i32, const IMM_ROTATION: i32>(
+    op1: svint64_t,
+    op2: svint16_t,
+    op3: svint16_t,
+) -> svint64_t {
+    static_assert_range!(IMM_INDEX, 0..=1);
+    static_assert!(
+        IMM_ROTATION == 0 || IMM_ROTATION == 90 || IMM_ROTATION == 180 || IMM_ROTATION == 270
+    );
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.cdot.lane.nxv2i64"
+        )]
+        fn _svcdot_lane_s64(
+            op1: svint64_t,
+            op2: svint16_t,
+            op3: svint16_t,
+            imm_index: i32,
+            imm_rotation: i32,
+        ) -> svint64_t;
+    }
+    unsafe { _svcdot_lane_s64(op1, op2, op3, IMM_INDEX, IMM_ROTATION) }
+}
+#[doc = "Complex dot product"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcdot[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cdot, IMM_ROTATION = 90))]
+pub fn svcdot_s32<const IMM_ROTATION: i32>(
+    op1: svint32_t,
+    op2: svint8_t,
+    op3: svint8_t,
+) -> svint32_t {
+    static_assert!(
+        IMM_ROTATION == 0 || IMM_ROTATION == 90 || IMM_ROTATION == 180 || IMM_ROTATION == 270
+    );
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.cdot.nxv4i32")]
+        fn _svcdot_s32(
+            op1: svint32_t,
+            op2: svint8_t,
+            op3: svint8_t,
+            imm_rotation: i32,
+        ) -> svint32_t;
+    }
+    unsafe { _svcdot_s32(op1, op2, op3, IMM_ROTATION) }
+}
+#[doc = "Complex dot product"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcdot[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cdot, IMM_ROTATION = 90))]
+pub fn svcdot_s64<const IMM_ROTATION: i32>(
+    op1: svint64_t,
+    op2: svint16_t,
+    op3: svint16_t,
+) -> svint64_t {
+    static_assert!(
+        IMM_ROTATION == 0 || IMM_ROTATION == 90 || IMM_ROTATION == 180 || IMM_ROTATION == 270
+    );
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.cdot.nxv2i64")]
+        fn _svcdot_s64(
+            op1: svint64_t,
+            op2: svint16_t,
+            op3: svint16_t,
+            imm_rotation: i32,
+        ) -> svint64_t;
+    }
+    unsafe { _svcdot_s64(op1, op2, op3, IMM_ROTATION) }
+}
+#[doc = "Complex multiply-add with rotate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmla_lane[_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmla, IMM_INDEX = 0, IMM_ROTATION = 90))]
+pub fn svcmla_lane_s16<const IMM_INDEX: i32, const IMM_ROTATION: i32>(
+    op1: svint16_t,
+    op2: svint16_t,
+    op3: svint16_t,
+) -> svint16_t {
+    static_assert_range!(IMM_INDEX, 0..=3);
+    static_assert!(
+        IMM_ROTATION == 0 || IMM_ROTATION == 90 || IMM_ROTATION == 180 || IMM_ROTATION == 270
+    );
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.cmla.lane.x.nxv8i16"
+        )]
+        fn _svcmla_lane_s16(
+            op1: svint16_t,
+            op2: svint16_t,
+            op3: svint16_t,
+            imm_index: i32,
+            imm_rotation: i32,
+        ) -> svint16_t;
+    }
+    unsafe { _svcmla_lane_s16(op1, op2, op3, IMM_INDEX, IMM_ROTATION) }
+}
+#[doc = "Complex multiply-add with rotate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmla_lane[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmla, IMM_INDEX = 0, IMM_ROTATION = 90))]
+pub fn svcmla_lane_s32<const IMM_INDEX: i32, const IMM_ROTATION: i32>(
+    op1: svint32_t,
+    op2: svint32_t,
+    op3: svint32_t,
+) -> svint32_t {
+    static_assert_range!(IMM_INDEX, 0..=1);
+    static_assert!(
+        IMM_ROTATION == 0 || IMM_ROTATION == 90 || IMM_ROTATION == 180 || IMM_ROTATION == 270
+    );
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.cmla.lane.x.nxv4i32"
+        )]
+        fn _svcmla_lane_s32(
+            op1: svint32_t,
+            op2: svint32_t,
+            op3: svint32_t,
+            imm_index: i32,
+            imm_rotation: i32,
+        ) -> svint32_t;
+    }
+    unsafe { _svcmla_lane_s32(op1, op2, op3, IMM_INDEX, IMM_ROTATION) }
+}
+#[doc = "Complex multiply-add with rotate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmla_lane[_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmla, IMM_INDEX = 0, IMM_ROTATION = 90))]
+pub fn svcmla_lane_u16<const IMM_INDEX: i32, const IMM_ROTATION: i32>(
+    op1: svuint16_t,
+    op2: svuint16_t,
+    op3: svuint16_t,
+) -> svuint16_t {
+    static_assert_range!(IMM_INDEX, 0..=3);
+    static_assert!(
+        IMM_ROTATION == 0 || IMM_ROTATION == 90 || IMM_ROTATION == 180 || IMM_ROTATION == 270
+    );
+    unsafe {
+        svcmla_lane_s16::<IMM_INDEX, IMM_ROTATION>(
+            op1.as_signed(),
+            op2.as_signed(),
+            op3.as_signed(),
+        )
+        .as_unsigned()
+    }
+}
+#[doc = "Complex multiply-add with rotate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmla_lane[_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmla, IMM_INDEX = 0, IMM_ROTATION = 90))]
+pub fn svcmla_lane_u32<const IMM_INDEX: i32, const IMM_ROTATION: i32>(
+    op1: svuint32_t,
+    op2: svuint32_t,
+    op3: svuint32_t,
+) -> svuint32_t {
+    static_assert_range!(IMM_INDEX, 0..=1);
+    static_assert!(
+        IMM_ROTATION == 0 || IMM_ROTATION == 90 || IMM_ROTATION == 180 || IMM_ROTATION == 270
+    );
+    unsafe {
+        svcmla_lane_s32::<IMM_INDEX, IMM_ROTATION>(
+            op1.as_signed(),
+            op2.as_signed(),
+            op3.as_signed(),
+        )
+        .as_unsigned()
+    }
+}
+#[doc = "Complex multiply-add with rotate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmla[_s8])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmla, IMM_ROTATION = 90))]
+pub fn svcmla_s8<const IMM_ROTATION: i32>(op1: svint8_t, op2: svint8_t, op3: svint8_t) -> svint8_t {
+    static_assert!(
+        IMM_ROTATION == 0 || IMM_ROTATION == 90 || IMM_ROTATION == 180 || IMM_ROTATION == 270
+    );
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.cmla.x.nxv16i8")]
+        fn _svcmla_s8(op1: svint8_t, op2: svint8_t, op3: svint8_t, imm_rotation: i32) -> svint8_t;
+    }
+    unsafe { _svcmla_s8(op1, op2, op3, IMM_ROTATION) }
+}
+#[doc = "Complex multiply-add with rotate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmla[_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmla, IMM_ROTATION = 90))]
+pub fn svcmla_s16<const IMM_ROTATION: i32>(
+    op1: svint16_t,
+    op2: svint16_t,
+    op3: svint16_t,
+) -> svint16_t {
+    static_assert!(
+        IMM_ROTATION == 0 || IMM_ROTATION == 90 || IMM_ROTATION == 180 || IMM_ROTATION == 270
+    );
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.cmla.x.nxv8i16")]
+        fn _svcmla_s16(
+            op1: svint16_t,
+            op2: svint16_t,
+            op3: svint16_t,
+            imm_rotation: i32,
+        ) -> svint16_t;
+    }
+    unsafe { _svcmla_s16(op1, op2, op3, IMM_ROTATION) }
+}
+#[doc = "Complex multiply-add with rotate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmla[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmla, IMM_ROTATION = 90))]
+pub fn svcmla_s32<const IMM_ROTATION: i32>(
+    op1: svint32_t,
+    op2: svint32_t,
+    op3: svint32_t,
+) -> svint32_t {
+    static_assert!(
+        IMM_ROTATION == 0 || IMM_ROTATION == 90 || IMM_ROTATION == 180 || IMM_ROTATION == 270
+    );
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.cmla.x.nxv4i32")]
+        fn _svcmla_s32(
+            op1: svint32_t,
+            op2: svint32_t,
+            op3: svint32_t,
+            imm_rotation: i32,
+        ) -> svint32_t;
+    }
+    unsafe { _svcmla_s32(op1, op2, op3, IMM_ROTATION) }
+}
+#[doc = "Complex multiply-add with rotate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmla[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmla, IMM_ROTATION = 90))]
+pub fn svcmla_s64<const IMM_ROTATION: i32>(
+    op1: svint64_t,
+    op2: svint64_t,
+    op3: svint64_t,
+) -> svint64_t {
+    static_assert!(
+        IMM_ROTATION == 0 || IMM_ROTATION == 90 || IMM_ROTATION == 180 || IMM_ROTATION == 270
+    );
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.cmla.x.nxv2i64")]
+        fn _svcmla_s64(
+            op1: svint64_t,
+            op2: svint64_t,
+            op3: svint64_t,
+            imm_rotation: i32,
+        ) -> svint64_t;
+    }
+    unsafe { _svcmla_s64(op1, op2, op3, IMM_ROTATION) }
+}
+#[doc = "Complex multiply-add with rotate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmla[_u8])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmla, IMM_ROTATION = 90))]
+pub fn svcmla_u8<const IMM_ROTATION: i32>(
+    op1: svuint8_t,
+    op2: svuint8_t,
+    op3: svuint8_t,
+) -> svuint8_t {
+    static_assert!(
+        IMM_ROTATION == 0 || IMM_ROTATION == 90 || IMM_ROTATION == 180 || IMM_ROTATION == 270
+    );
+    unsafe {
+        svcmla_s8::<IMM_ROTATION>(op1.as_signed(), op2.as_signed(), op3.as_signed()).as_unsigned()
+    }
+}
+#[doc = "Complex multiply-add with rotate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmla[_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmla, IMM_ROTATION = 90))]
+pub fn svcmla_u16<const IMM_ROTATION: i32>(
+    op1: svuint16_t,
+    op2: svuint16_t,
+    op3: svuint16_t,
+) -> svuint16_t {
+    static_assert!(
+        IMM_ROTATION == 0 || IMM_ROTATION == 90 || IMM_ROTATION == 180 || IMM_ROTATION == 270
+    );
+    unsafe {
+        svcmla_s16::<IMM_ROTATION>(op1.as_signed(), op2.as_signed(), op3.as_signed()).as_unsigned()
+    }
+}
+#[doc = "Complex multiply-add with rotate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmla[_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmla, IMM_ROTATION = 90))]
+pub fn svcmla_u32<const IMM_ROTATION: i32>(
+    op1: svuint32_t,
+    op2: svuint32_t,
+    op3: svuint32_t,
+) -> svuint32_t {
+    static_assert!(
+        IMM_ROTATION == 0 || IMM_ROTATION == 90 || IMM_ROTATION == 180 || IMM_ROTATION == 270
+    );
+    unsafe {
+        svcmla_s32::<IMM_ROTATION>(op1.as_signed(), op2.as_signed(), op3.as_signed()).as_unsigned()
+    }
+}
+#[doc = "Complex multiply-add with rotate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcmla[_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(cmla, IMM_ROTATION = 90))]
+pub fn svcmla_u64<const IMM_ROTATION: i32>(
+    op1: svuint64_t,
+    op2: svuint64_t,
+    op3: svuint64_t,
+) -> svuint64_t {
+    static_assert!(
+        IMM_ROTATION == 0 || IMM_ROTATION == 90 || IMM_ROTATION == 180 || IMM_ROTATION == 270
+    );
+    unsafe {
+        svcmla_s64::<IMM_ROTATION>(op1.as_signed(), op2.as_signed(), op3.as_signed()).as_unsigned()
+    }
+}
+#[doc = "Up convert long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcvtlt_f64[_f32]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fcvtlt))]
+pub fn svcvtlt_f64_f32_m(inactive: svfloat64_t, pg: svbool_t, op: svfloat32_t) -> svfloat64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.fcvtlt.f64f32")]
+        fn _svcvtlt_f64_f32_m(inactive: svfloat64_t, pg: svbool2_t, op: svfloat32_t)
+            -> svfloat64_t;
+    }
+    unsafe { _svcvtlt_f64_f32_m(inactive, pg.sve_into(), op) }
+}
+#[doc = "Up convert long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcvtlt_f64[_f32]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fcvtlt))]
+pub fn svcvtlt_f64_f32_x(pg: svbool_t, op: svfloat32_t) -> svfloat64_t {
+    unsafe { svcvtlt_f64_f32_m(crate::intrinsics::transmute_unchecked(op), pg, op) }
+}
+#[doc = "Down convert and narrow (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcvtnt_f32[_f64]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fcvtnt))]
+pub fn svcvtnt_f32_f64_m(even: svfloat32_t, pg: svbool_t, op: svfloat64_t) -> svfloat32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.fcvtnt.f32f64")]
+        fn _svcvtnt_f32_f64_m(even: svfloat32_t, pg: svbool2_t, op: svfloat64_t) -> svfloat32_t;
+    }
+    unsafe { _svcvtnt_f32_f64_m(even, pg.sve_into(), op) }
+}
+#[doc = "Down convert and narrow (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcvtnt_f32[_f64]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fcvtnt))]
+pub fn svcvtnt_f32_f64_x(even: svfloat32_t, pg: svbool_t, op: svfloat64_t) -> svfloat32_t {
+    svcvtnt_f32_f64_m(even, pg, op)
+}
+#[doc = "Down convert, rounding to odd"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcvtx_f32[_f64]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fcvtx))]
+pub fn svcvtx_f32_f64_m(inactive: svfloat32_t, pg: svbool_t, op: svfloat64_t) -> svfloat32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.fcvtx.f32f64")]
+        fn _svcvtx_f32_f64_m(inactive: svfloat32_t, pg: svbool2_t, op: svfloat64_t) -> svfloat32_t;
+    }
+    unsafe { _svcvtx_f32_f64_m(inactive, pg.sve_into(), op) }
+}
+#[doc = "Down convert, rounding to odd"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcvtx_f32[_f64]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fcvtx))]
+pub fn svcvtx_f32_f64_x(pg: svbool_t, op: svfloat64_t) -> svfloat32_t {
+    unsafe { svcvtx_f32_f64_m(transmute_unchecked(op), pg, op) }
+}
+#[doc = "Down convert, rounding to odd"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcvtx_f32[_f64]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fcvtx))]
+pub fn svcvtx_f32_f64_z(pg: svbool_t, op: svfloat64_t) -> svfloat32_t {
+    svcvtx_f32_f64_m(svdup_n_f32(0.0), pg, op)
+}
+#[doc = "Down convert, rounding to odd (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcvtxnt_f32[_f64]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fcvtxnt))]
+pub fn svcvtxnt_f32_f64_m(even: svfloat32_t, pg: svbool_t, op: svfloat64_t) -> svfloat32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.fcvtxnt.f32f64")]
+        fn _svcvtxnt_f32_f64_m(even: svfloat32_t, pg: svbool2_t, op: svfloat64_t) -> svfloat32_t;
+    }
+    unsafe { _svcvtxnt_f32_f64_m(even, pg.sve_into(), op) }
+}
+#[doc = "Down convert, rounding to odd (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svcvtxnt_f32[_f64]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fcvtxnt))]
+pub fn svcvtxnt_f32_f64_x(even: svfloat32_t, pg: svbool_t, op: svfloat64_t) -> svfloat32_t {
+    svcvtxnt_f32_f64_m(even, pg, op)
+}
+#[doc = "Bitwise exclusive OR of three vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveor3[_s8])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eor3))]
+pub fn sveor3_s8(op1: svint8_t, op2: svint8_t, op3: svint8_t) -> svint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.eor3.nxv16i8")]
+        fn _sveor3_s8(op1: svint8_t, op2: svint8_t, op3: svint8_t) -> svint8_t;
+    }
+    unsafe { _sveor3_s8(op1, op2, op3) }
+}
+#[doc = "Bitwise exclusive OR of three vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveor3[_n_s8])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eor3))]
+pub fn sveor3_n_s8(op1: svint8_t, op2: svint8_t, op3: i8) -> svint8_t {
+    sveor3_s8(op1, op2, svdup_n_s8(op3))
+}
+#[doc = "Bitwise exclusive OR of three vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveor3[_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eor3))]
+pub fn sveor3_s16(op1: svint16_t, op2: svint16_t, op3: svint16_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.eor3.nxv8i16")]
+        fn _sveor3_s16(op1: svint16_t, op2: svint16_t, op3: svint16_t) -> svint16_t;
+    }
+    unsafe { _sveor3_s16(op1, op2, op3) }
+}
+#[doc = "Bitwise exclusive OR of three vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveor3[_n_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eor3))]
+pub fn sveor3_n_s16(op1: svint16_t, op2: svint16_t, op3: i16) -> svint16_t {
+    sveor3_s16(op1, op2, svdup_n_s16(op3))
+}
+#[doc = "Bitwise exclusive OR of three vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveor3[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eor3))]
+pub fn sveor3_s32(op1: svint32_t, op2: svint32_t, op3: svint32_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.eor3.nxv4i32")]
+        fn _sveor3_s32(op1: svint32_t, op2: svint32_t, op3: svint32_t) -> svint32_t;
+    }
+    unsafe { _sveor3_s32(op1, op2, op3) }
+}
+#[doc = "Bitwise exclusive OR of three vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveor3[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eor3))]
+pub fn sveor3_n_s32(op1: svint32_t, op2: svint32_t, op3: i32) -> svint32_t {
+    sveor3_s32(op1, op2, svdup_n_s32(op3))
+}
+#[doc = "Bitwise exclusive OR of three vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveor3[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eor3))]
+pub fn sveor3_s64(op1: svint64_t, op2: svint64_t, op3: svint64_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.eor3.nxv2i64")]
+        fn _sveor3_s64(op1: svint64_t, op2: svint64_t, op3: svint64_t) -> svint64_t;
+    }
+    unsafe { _sveor3_s64(op1, op2, op3) }
+}
+#[doc = "Bitwise exclusive OR of three vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveor3[_n_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eor3))]
+pub fn sveor3_n_s64(op1: svint64_t, op2: svint64_t, op3: i64) -> svint64_t {
+    sveor3_s64(op1, op2, svdup_n_s64(op3))
+}
+#[doc = "Bitwise exclusive OR of three vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveor3[_u8])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eor3))]
+pub fn sveor3_u8(op1: svuint8_t, op2: svuint8_t, op3: svuint8_t) -> svuint8_t {
+    unsafe { sveor3_s8(op1.as_signed(), op2.as_signed(), op3.as_signed()).as_unsigned() }
+}
+#[doc = "Bitwise exclusive OR of three vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveor3[_n_u8])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eor3))]
+pub fn sveor3_n_u8(op1: svuint8_t, op2: svuint8_t, op3: u8) -> svuint8_t {
+    sveor3_u8(op1, op2, svdup_n_u8(op3))
+}
+#[doc = "Bitwise exclusive OR of three vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveor3[_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eor3))]
+pub fn sveor3_u16(op1: svuint16_t, op2: svuint16_t, op3: svuint16_t) -> svuint16_t {
+    unsafe { sveor3_s16(op1.as_signed(), op2.as_signed(), op3.as_signed()).as_unsigned() }
+}
+#[doc = "Bitwise exclusive OR of three vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveor3[_n_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eor3))]
+pub fn sveor3_n_u16(op1: svuint16_t, op2: svuint16_t, op3: u16) -> svuint16_t {
+    sveor3_u16(op1, op2, svdup_n_u16(op3))
+}
+#[doc = "Bitwise exclusive OR of three vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveor3[_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eor3))]
+pub fn sveor3_u32(op1: svuint32_t, op2: svuint32_t, op3: svuint32_t) -> svuint32_t {
+    unsafe { sveor3_s32(op1.as_signed(), op2.as_signed(), op3.as_signed()).as_unsigned() }
+}
+#[doc = "Bitwise exclusive OR of three vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveor3[_n_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eor3))]
+pub fn sveor3_n_u32(op1: svuint32_t, op2: svuint32_t, op3: u32) -> svuint32_t {
+    sveor3_u32(op1, op2, svdup_n_u32(op3))
+}
+#[doc = "Bitwise exclusive OR of three vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveor3[_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eor3))]
+pub fn sveor3_u64(op1: svuint64_t, op2: svuint64_t, op3: svuint64_t) -> svuint64_t {
+    unsafe { sveor3_s64(op1.as_signed(), op2.as_signed(), op3.as_signed()).as_unsigned() }
+}
+#[doc = "Bitwise exclusive OR of three vectors"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveor3[_n_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eor3))]
+pub fn sveor3_n_u64(op1: svuint64_t, op2: svuint64_t, op3: u64) -> svuint64_t {
+    sveor3_u64(op1, op2, svdup_n_u64(op3))
+}
+#[doc = "Interleaving exclusive OR (bottom, top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveorbt[_s8])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eorbt))]
+pub fn sveorbt_s8(odd: svint8_t, op1: svint8_t, op2: svint8_t) -> svint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.eorbt.nxv16i8")]
+        fn _sveorbt_s8(odd: svint8_t, op1: svint8_t, op2: svint8_t) -> svint8_t;
+    }
+    unsafe { _sveorbt_s8(odd, op1, op2) }
+}
+#[doc = "Interleaving exclusive OR (bottom, top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveorbt[_n_s8])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eorbt))]
+pub fn sveorbt_n_s8(odd: svint8_t, op1: svint8_t, op2: i8) -> svint8_t {
+    sveorbt_s8(odd, op1, svdup_n_s8(op2))
+}
+#[doc = "Interleaving exclusive OR (bottom, top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveorbt[_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eorbt))]
+pub fn sveorbt_s16(odd: svint16_t, op1: svint16_t, op2: svint16_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.eorbt.nxv8i16")]
+        fn _sveorbt_s16(odd: svint16_t, op1: svint16_t, op2: svint16_t) -> svint16_t;
+    }
+    unsafe { _sveorbt_s16(odd, op1, op2) }
+}
+#[doc = "Interleaving exclusive OR (bottom, top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveorbt[_n_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eorbt))]
+pub fn sveorbt_n_s16(odd: svint16_t, op1: svint16_t, op2: i16) -> svint16_t {
+    sveorbt_s16(odd, op1, svdup_n_s16(op2))
+}
+#[doc = "Interleaving exclusive OR (bottom, top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveorbt[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eorbt))]
+pub fn sveorbt_s32(odd: svint32_t, op1: svint32_t, op2: svint32_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.eorbt.nxv4i32")]
+        fn _sveorbt_s32(odd: svint32_t, op1: svint32_t, op2: svint32_t) -> svint32_t;
+    }
+    unsafe { _sveorbt_s32(odd, op1, op2) }
+}
+#[doc = "Interleaving exclusive OR (bottom, top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveorbt[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eorbt))]
+pub fn sveorbt_n_s32(odd: svint32_t, op1: svint32_t, op2: i32) -> svint32_t {
+    sveorbt_s32(odd, op1, svdup_n_s32(op2))
+}
+#[doc = "Interleaving exclusive OR (bottom, top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveorbt[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eorbt))]
+pub fn sveorbt_s64(odd: svint64_t, op1: svint64_t, op2: svint64_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.eorbt.nxv2i64")]
+        fn _sveorbt_s64(odd: svint64_t, op1: svint64_t, op2: svint64_t) -> svint64_t;
+    }
+    unsafe { _sveorbt_s64(odd, op1, op2) }
+}
+#[doc = "Interleaving exclusive OR (bottom, top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveorbt[_n_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eorbt))]
+pub fn sveorbt_n_s64(odd: svint64_t, op1: svint64_t, op2: i64) -> svint64_t {
+    sveorbt_s64(odd, op1, svdup_n_s64(op2))
+}
+#[doc = "Interleaving exclusive OR (bottom, top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveorbt[_u8])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eorbt))]
+pub fn sveorbt_u8(odd: svuint8_t, op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    unsafe { sveorbt_s8(odd.as_signed(), op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Interleaving exclusive OR (bottom, top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveorbt[_n_u8])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eorbt))]
+pub fn sveorbt_n_u8(odd: svuint8_t, op1: svuint8_t, op2: u8) -> svuint8_t {
+    sveorbt_u8(odd, op1, svdup_n_u8(op2))
+}
+#[doc = "Interleaving exclusive OR (bottom, top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveorbt[_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eorbt))]
+pub fn sveorbt_u16(odd: svuint16_t, op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    unsafe { sveorbt_s16(odd.as_signed(), op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Interleaving exclusive OR (bottom, top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveorbt[_n_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eorbt))]
+pub fn sveorbt_n_u16(odd: svuint16_t, op1: svuint16_t, op2: u16) -> svuint16_t {
+    sveorbt_u16(odd, op1, svdup_n_u16(op2))
+}
+#[doc = "Interleaving exclusive OR (bottom, top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveorbt[_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eorbt))]
+pub fn sveorbt_u32(odd: svuint32_t, op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    unsafe { sveorbt_s32(odd.as_signed(), op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Interleaving exclusive OR (bottom, top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveorbt[_n_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eorbt))]
+pub fn sveorbt_n_u32(odd: svuint32_t, op1: svuint32_t, op2: u32) -> svuint32_t {
+    sveorbt_u32(odd, op1, svdup_n_u32(op2))
+}
+#[doc = "Interleaving exclusive OR (bottom, top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveorbt[_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eorbt))]
+pub fn sveorbt_u64(odd: svuint64_t, op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    unsafe { sveorbt_s64(odd.as_signed(), op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Interleaving exclusive OR (bottom, top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveorbt[_n_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eorbt))]
+pub fn sveorbt_n_u64(odd: svuint64_t, op1: svuint64_t, op2: u64) -> svuint64_t {
+    sveorbt_u64(odd, op1, svdup_n_u64(op2))
+}
+#[doc = "Interleaving exclusive OR (top, bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveortb[_s8])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eortb))]
+pub fn sveortb_s8(even: svint8_t, op1: svint8_t, op2: svint8_t) -> svint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.eortb.nxv16i8")]
+        fn _sveortb_s8(even: svint8_t, op1: svint8_t, op2: svint8_t) -> svint8_t;
+    }
+    unsafe { _sveortb_s8(even, op1, op2) }
+}
+#[doc = "Interleaving exclusive OR (top, bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveortb[_n_s8])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eortb))]
+pub fn sveortb_n_s8(even: svint8_t, op1: svint8_t, op2: i8) -> svint8_t {
+    sveortb_s8(even, op1, svdup_n_s8(op2))
+}
+#[doc = "Interleaving exclusive OR (top, bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveortb[_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eortb))]
+pub fn sveortb_s16(even: svint16_t, op1: svint16_t, op2: svint16_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.eortb.nxv8i16")]
+        fn _sveortb_s16(even: svint16_t, op1: svint16_t, op2: svint16_t) -> svint16_t;
+    }
+    unsafe { _sveortb_s16(even, op1, op2) }
+}
+#[doc = "Interleaving exclusive OR (top, bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveortb[_n_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eortb))]
+pub fn sveortb_n_s16(even: svint16_t, op1: svint16_t, op2: i16) -> svint16_t {
+    sveortb_s16(even, op1, svdup_n_s16(op2))
+}
+#[doc = "Interleaving exclusive OR (top, bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveortb[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eortb))]
+pub fn sveortb_s32(even: svint32_t, op1: svint32_t, op2: svint32_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.eortb.nxv4i32")]
+        fn _sveortb_s32(even: svint32_t, op1: svint32_t, op2: svint32_t) -> svint32_t;
+    }
+    unsafe { _sveortb_s32(even, op1, op2) }
+}
+#[doc = "Interleaving exclusive OR (top, bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveortb[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eortb))]
+pub fn sveortb_n_s32(even: svint32_t, op1: svint32_t, op2: i32) -> svint32_t {
+    sveortb_s32(even, op1, svdup_n_s32(op2))
+}
+#[doc = "Interleaving exclusive OR (top, bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveortb[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eortb))]
+pub fn sveortb_s64(even: svint64_t, op1: svint64_t, op2: svint64_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.eortb.nxv2i64")]
+        fn _sveortb_s64(even: svint64_t, op1: svint64_t, op2: svint64_t) -> svint64_t;
+    }
+    unsafe { _sveortb_s64(even, op1, op2) }
+}
+#[doc = "Interleaving exclusive OR (top, bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveortb[_n_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eortb))]
+pub fn sveortb_n_s64(even: svint64_t, op1: svint64_t, op2: i64) -> svint64_t {
+    sveortb_s64(even, op1, svdup_n_s64(op2))
+}
+#[doc = "Interleaving exclusive OR (top, bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveortb[_u8])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eortb))]
+pub fn sveortb_u8(even: svuint8_t, op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    unsafe { sveortb_s8(even.as_signed(), op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Interleaving exclusive OR (top, bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveortb[_n_u8])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eortb))]
+pub fn sveortb_n_u8(even: svuint8_t, op1: svuint8_t, op2: u8) -> svuint8_t {
+    sveortb_u8(even, op1, svdup_n_u8(op2))
+}
+#[doc = "Interleaving exclusive OR (top, bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveortb[_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eortb))]
+pub fn sveortb_u16(even: svuint16_t, op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    unsafe { sveortb_s16(even.as_signed(), op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Interleaving exclusive OR (top, bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveortb[_n_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eortb))]
+pub fn sveortb_n_u16(even: svuint16_t, op1: svuint16_t, op2: u16) -> svuint16_t {
+    sveortb_u16(even, op1, svdup_n_u16(op2))
+}
+#[doc = "Interleaving exclusive OR (top, bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveortb[_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eortb))]
+pub fn sveortb_u32(even: svuint32_t, op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    unsafe { sveortb_s32(even.as_signed(), op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Interleaving exclusive OR (top, bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveortb[_n_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eortb))]
+pub fn sveortb_n_u32(even: svuint32_t, op1: svuint32_t, op2: u32) -> svuint32_t {
+    sveortb_u32(even, op1, svdup_n_u32(op2))
+}
+#[doc = "Interleaving exclusive OR (top, bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveortb[_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eortb))]
+pub fn sveortb_u64(even: svuint64_t, op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    unsafe { sveortb_s64(even.as_signed(), op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Interleaving exclusive OR (top, bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/sveortb[_n_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(eortb))]
+pub fn sveortb_n_u64(even: svuint64_t, op1: svuint64_t, op2: u64) -> svuint64_t {
+    sveortb_u64(even, op1, svdup_n_u64(op2))
+}
+#[doc = "Halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhadd[_s8]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(shadd))]
+pub fn svhadd_s8_m(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.shadd.nxv16i8")]
+        fn _svhadd_s8_m(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t;
+    }
+    unsafe { _svhadd_s8_m(pg, op1, op2) }
+}
+#[doc = "Halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhadd[_n_s8]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(shadd))]
+pub fn svhadd_n_s8_m(pg: svbool_t, op1: svint8_t, op2: i8) -> svint8_t {
+    svhadd_s8_m(pg, op1, svdup_n_s8(op2))
+}
+#[doc = "Halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhadd[_s8]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(shadd))]
+pub fn svhadd_s8_x(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t {
+    svhadd_s8_m(pg, op1, op2)
+}
+#[doc = "Halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhadd[_n_s8]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(shadd))]
+pub fn svhadd_n_s8_x(pg: svbool_t, op1: svint8_t, op2: i8) -> svint8_t {
+    svhadd_s8_x(pg, op1, svdup_n_s8(op2))
+}
+#[doc = "Halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhadd[_s8]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(shadd))]
+pub fn svhadd_s8_z(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t {
+    svhadd_s8_m(pg, svsel_s8(pg, op1, svdup_n_s8(0)), op2)
+}
+#[doc = "Halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhadd[_n_s8]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(shadd))]
+pub fn svhadd_n_s8_z(pg: svbool_t, op1: svint8_t, op2: i8) -> svint8_t {
+    svhadd_s8_z(pg, op1, svdup_n_s8(op2))
+}
+#[doc = "Halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhadd[_s16]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(shadd))]
+pub fn svhadd_s16_m(pg: svbool_t, op1: svint16_t, op2: svint16_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.shadd.nxv8i16")]
+        fn _svhadd_s16_m(pg: svbool8_t, op1: svint16_t, op2: svint16_t) -> svint16_t;
+    }
+    unsafe { _svhadd_s16_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhadd[_n_s16]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(shadd))]
+pub fn svhadd_n_s16_m(pg: svbool_t, op1: svint16_t, op2: i16) -> svint16_t {
+    svhadd_s16_m(pg, op1, svdup_n_s16(op2))
+}
+#[doc = "Halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhadd[_s16]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(shadd))]
+pub fn svhadd_s16_x(pg: svbool_t, op1: svint16_t, op2: svint16_t) -> svint16_t {
+    svhadd_s16_m(pg, op1, op2)
+}
+#[doc = "Halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhadd[_n_s16]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(shadd))]
+pub fn svhadd_n_s16_x(pg: svbool_t, op1: svint16_t, op2: i16) -> svint16_t {
+    svhadd_s16_x(pg, op1, svdup_n_s16(op2))
+}
+#[doc = "Halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhadd[_s16]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(shadd))]
+pub fn svhadd_s16_z(pg: svbool_t, op1: svint16_t, op2: svint16_t) -> svint16_t {
+    svhadd_s16_m(pg, svsel_s16(pg, op1, svdup_n_s16(0)), op2)
+}
+#[doc = "Halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhadd[_n_s16]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(shadd))]
+pub fn svhadd_n_s16_z(pg: svbool_t, op1: svint16_t, op2: i16) -> svint16_t {
+    svhadd_s16_z(pg, op1, svdup_n_s16(op2))
+}
+#[doc = "Halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhadd[_s32]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(shadd))]
+pub fn svhadd_s32_m(pg: svbool_t, op1: svint32_t, op2: svint32_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.shadd.nxv4i32")]
+        fn _svhadd_s32_m(pg: svbool4_t, op1: svint32_t, op2: svint32_t) -> svint32_t;
+    }
+    unsafe { _svhadd_s32_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhadd[_n_s32]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(shadd))]
+pub fn svhadd_n_s32_m(pg: svbool_t, op1: svint32_t, op2: i32) -> svint32_t {
+    svhadd_s32_m(pg, op1, svdup_n_s32(op2))
+}
+#[doc = "Halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhadd[_s32]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(shadd))]
+pub fn svhadd_s32_x(pg: svbool_t, op1: svint32_t, op2: svint32_t) -> svint32_t {
+    svhadd_s32_m(pg, op1, op2)
+}
+#[doc = "Halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhadd[_n_s32]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(shadd))]
+pub fn svhadd_n_s32_x(pg: svbool_t, op1: svint32_t, op2: i32) -> svint32_t {
+    svhadd_s32_x(pg, op1, svdup_n_s32(op2))
+}
+#[doc = "Halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhadd[_s32]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(shadd))]
+pub fn svhadd_s32_z(pg: svbool_t, op1: svint32_t, op2: svint32_t) -> svint32_t {
+    svhadd_s32_m(pg, svsel_s32(pg, op1, svdup_n_s32(0)), op2)
+}
+#[doc = "Halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhadd[_n_s32]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(shadd))]
+pub fn svhadd_n_s32_z(pg: svbool_t, op1: svint32_t, op2: i32) -> svint32_t {
+    svhadd_s32_z(pg, op1, svdup_n_s32(op2))
+}
+#[doc = "Halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhadd[_s64]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(shadd))]
+pub fn svhadd_s64_m(pg: svbool_t, op1: svint64_t, op2: svint64_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.shadd.nxv2i64")]
+        fn _svhadd_s64_m(pg: svbool2_t, op1: svint64_t, op2: svint64_t) -> svint64_t;
+    }
+    unsafe { _svhadd_s64_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhadd[_n_s64]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(shadd))]
+pub fn svhadd_n_s64_m(pg: svbool_t, op1: svint64_t, op2: i64) -> svint64_t {
+    svhadd_s64_m(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhadd[_s64]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(shadd))]
+pub fn svhadd_s64_x(pg: svbool_t, op1: svint64_t, op2: svint64_t) -> svint64_t {
+    svhadd_s64_m(pg, op1, op2)
+}
+#[doc = "Halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhadd[_n_s64]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(shadd))]
+pub fn svhadd_n_s64_x(pg: svbool_t, op1: svint64_t, op2: i64) -> svint64_t {
+    svhadd_s64_x(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhadd[_s64]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(shadd))]
+pub fn svhadd_s64_z(pg: svbool_t, op1: svint64_t, op2: svint64_t) -> svint64_t {
+    svhadd_s64_m(pg, svsel_s64(pg, op1, svdup_n_s64(0)), op2)
+}
+#[doc = "Halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhadd[_n_s64]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(shadd))]
+pub fn svhadd_n_s64_z(pg: svbool_t, op1: svint64_t, op2: i64) -> svint64_t {
+    svhadd_s64_z(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhadd[_u8]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uhadd))]
+pub fn svhadd_u8_m(pg: svbool_t, op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uhadd.nxv16i8")]
+        fn _svhadd_u8_m(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t;
+    }
+    unsafe { _svhadd_u8_m(pg, op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhadd[_n_u8]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uhadd))]
+pub fn svhadd_n_u8_m(pg: svbool_t, op1: svuint8_t, op2: u8) -> svuint8_t {
+    svhadd_u8_m(pg, op1, svdup_n_u8(op2))
+}
+#[doc = "Halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhadd[_u8]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uhadd))]
+pub fn svhadd_u8_x(pg: svbool_t, op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    svhadd_u8_m(pg, op1, op2)
+}
+#[doc = "Halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhadd[_n_u8]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uhadd))]
+pub fn svhadd_n_u8_x(pg: svbool_t, op1: svuint8_t, op2: u8) -> svuint8_t {
+    svhadd_u8_x(pg, op1, svdup_n_u8(op2))
+}
+#[doc = "Halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhadd[_u8]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uhadd))]
+pub fn svhadd_u8_z(pg: svbool_t, op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    svhadd_u8_m(pg, svsel_u8(pg, op1, svdup_n_u8(0)), op2)
+}
+#[doc = "Halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhadd[_n_u8]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uhadd))]
+pub fn svhadd_n_u8_z(pg: svbool_t, op1: svuint8_t, op2: u8) -> svuint8_t {
+    svhadd_u8_z(pg, op1, svdup_n_u8(op2))
+}
+#[doc = "Halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhadd[_u16]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uhadd))]
+pub fn svhadd_u16_m(pg: svbool_t, op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uhadd.nxv8i16")]
+        fn _svhadd_u16_m(pg: svbool8_t, op1: svint16_t, op2: svint16_t) -> svint16_t;
+    }
+    unsafe { _svhadd_u16_m(pg.sve_into(), op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhadd[_n_u16]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uhadd))]
+pub fn svhadd_n_u16_m(pg: svbool_t, op1: svuint16_t, op2: u16) -> svuint16_t {
+    svhadd_u16_m(pg, op1, svdup_n_u16(op2))
+}
+#[doc = "Halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhadd[_u16]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uhadd))]
+pub fn svhadd_u16_x(pg: svbool_t, op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    svhadd_u16_m(pg, op1, op2)
+}
+#[doc = "Halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhadd[_n_u16]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uhadd))]
+pub fn svhadd_n_u16_x(pg: svbool_t, op1: svuint16_t, op2: u16) -> svuint16_t {
+    svhadd_u16_x(pg, op1, svdup_n_u16(op2))
+}
+#[doc = "Halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhadd[_u16]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uhadd))]
+pub fn svhadd_u16_z(pg: svbool_t, op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    svhadd_u16_m(pg, svsel_u16(pg, op1, svdup_n_u16(0)), op2)
+}
+#[doc = "Halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhadd[_n_u16]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uhadd))]
+pub fn svhadd_n_u16_z(pg: svbool_t, op1: svuint16_t, op2: u16) -> svuint16_t {
+    svhadd_u16_z(pg, op1, svdup_n_u16(op2))
+}
+#[doc = "Halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhadd[_u32]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uhadd))]
+pub fn svhadd_u32_m(pg: svbool_t, op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uhadd.nxv4i32")]
+        fn _svhadd_u32_m(pg: svbool4_t, op1: svint32_t, op2: svint32_t) -> svint32_t;
+    }
+    unsafe { _svhadd_u32_m(pg.sve_into(), op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhadd[_n_u32]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uhadd))]
+pub fn svhadd_n_u32_m(pg: svbool_t, op1: svuint32_t, op2: u32) -> svuint32_t {
+    svhadd_u32_m(pg, op1, svdup_n_u32(op2))
+}
+#[doc = "Halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhadd[_u32]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uhadd))]
+pub fn svhadd_u32_x(pg: svbool_t, op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    svhadd_u32_m(pg, op1, op2)
+}
+#[doc = "Halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhadd[_n_u32]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uhadd))]
+pub fn svhadd_n_u32_x(pg: svbool_t, op1: svuint32_t, op2: u32) -> svuint32_t {
+    svhadd_u32_x(pg, op1, svdup_n_u32(op2))
+}
+#[doc = "Halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhadd[_u32]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uhadd))]
+pub fn svhadd_u32_z(pg: svbool_t, op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    svhadd_u32_m(pg, svsel_u32(pg, op1, svdup_n_u32(0)), op2)
+}
+#[doc = "Halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhadd[_n_u32]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uhadd))]
+pub fn svhadd_n_u32_z(pg: svbool_t, op1: svuint32_t, op2: u32) -> svuint32_t {
+    svhadd_u32_z(pg, op1, svdup_n_u32(op2))
+}
+#[doc = "Halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhadd[_u64]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uhadd))]
+pub fn svhadd_u64_m(pg: svbool_t, op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uhadd.nxv2i64")]
+        fn _svhadd_u64_m(pg: svbool2_t, op1: svint64_t, op2: svint64_t) -> svint64_t;
+    }
+    unsafe { _svhadd_u64_m(pg.sve_into(), op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhadd[_n_u64]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uhadd))]
+pub fn svhadd_n_u64_m(pg: svbool_t, op1: svuint64_t, op2: u64) -> svuint64_t {
+    svhadd_u64_m(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhadd[_u64]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uhadd))]
+pub fn svhadd_u64_x(pg: svbool_t, op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    svhadd_u64_m(pg, op1, op2)
+}
+#[doc = "Halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhadd[_n_u64]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uhadd))]
+pub fn svhadd_n_u64_x(pg: svbool_t, op1: svuint64_t, op2: u64) -> svuint64_t {
+    svhadd_u64_x(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhadd[_u64]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uhadd))]
+pub fn svhadd_u64_z(pg: svbool_t, op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    svhadd_u64_m(pg, svsel_u64(pg, op1, svdup_n_u64(0)), op2)
+}
+#[doc = "Halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhadd[_n_u64]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uhadd))]
+pub fn svhadd_n_u64_z(pg: svbool_t, op1: svuint64_t, op2: u64) -> svuint64_t {
+    svhadd_u64_z(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Count matching elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhistcnt[_s32]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(histcnt))]
+pub fn svhistcnt_s32_z(pg: svbool_t, op1: svint32_t, op2: svint32_t) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.histcnt.nxv4i32"
+        )]
+        fn _svhistcnt_s32_z(pg: svbool4_t, op1: svint32_t, op2: svint32_t) -> svint32_t;
+    }
+    unsafe { _svhistcnt_s32_z(pg.sve_into(), op1, op2).as_unsigned() }
+}
+#[doc = "Count matching elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhistcnt[_s64]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(histcnt))]
+pub fn svhistcnt_s64_z(pg: svbool_t, op1: svint64_t, op2: svint64_t) -> svuint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.histcnt.nxv2i64"
+        )]
+        fn _svhistcnt_s64_z(pg: svbool2_t, op1: svint64_t, op2: svint64_t) -> svint64_t;
+    }
+    unsafe { _svhistcnt_s64_z(pg.sve_into(), op1, op2).as_unsigned() }
+}
+#[doc = "Count matching elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhistcnt[_u32]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(histcnt))]
+pub fn svhistcnt_u32_z(pg: svbool_t, op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    unsafe { svhistcnt_s32_z(pg, op1.as_signed(), op2.as_signed()) }
+}
+#[doc = "Count matching elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhistcnt[_u64]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(histcnt))]
+pub fn svhistcnt_u64_z(pg: svbool_t, op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    unsafe { svhistcnt_s64_z(pg, op1.as_signed(), op2.as_signed()) }
+}
+#[doc = "Count matching elements in 128-bit segments"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhistseg[_s8])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(histseg))]
+pub fn svhistseg_s8(op1: svint8_t, op2: svint8_t) -> svuint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.histseg.nxv16i8"
+        )]
+        fn _svhistseg_s8(op1: svint8_t, op2: svint8_t) -> svint8_t;
+    }
+    unsafe { _svhistseg_s8(op1, op2).as_unsigned() }
+}
+#[doc = "Count matching elements in 128-bit segments"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhistseg[_u8])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(histseg))]
+pub fn svhistseg_u8(op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    unsafe { svhistseg_s8(op1.as_signed(), op2.as_signed()) }
+}
+#[doc = "Halving subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhsub[_s8]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(shsub))]
+pub fn svhsub_s8_m(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.shsub.nxv16i8")]
+        fn _svhsub_s8_m(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t;
+    }
+    unsafe { _svhsub_s8_m(pg, op1, op2) }
+}
+#[doc = "Halving subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhsub[_n_s8]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(shsub))]
+pub fn svhsub_n_s8_m(pg: svbool_t, op1: svint8_t, op2: i8) -> svint8_t {
+    svhsub_s8_m(pg, op1, svdup_n_s8(op2))
+}
+#[doc = "Halving subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhsub[_s8]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(shsub))]
+pub fn svhsub_s8_x(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t {
+    svhsub_s8_m(pg, op1, op2)
+}
+#[doc = "Halving subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhsub[_n_s8]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(shsub))]
+pub fn svhsub_n_s8_x(pg: svbool_t, op1: svint8_t, op2: i8) -> svint8_t {
+    svhsub_s8_x(pg, op1, svdup_n_s8(op2))
+}
+#[doc = "Halving subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhsub[_s8]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(shsub))]
+pub fn svhsub_s8_z(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t {
+    svhsub_s8_m(pg, svsel_s8(pg, op1, svdup_n_s8(0)), op2)
+}
+#[doc = "Halving subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhsub[_n_s8]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(shsub))]
+pub fn svhsub_n_s8_z(pg: svbool_t, op1: svint8_t, op2: i8) -> svint8_t {
+    svhsub_s8_z(pg, op1, svdup_n_s8(op2))
+}
+#[doc = "Halving subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhsub[_s16]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(shsub))]
+pub fn svhsub_s16_m(pg: svbool_t, op1: svint16_t, op2: svint16_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.shsub.nxv8i16")]
+        fn _svhsub_s16_m(pg: svbool8_t, op1: svint16_t, op2: svint16_t) -> svint16_t;
+    }
+    unsafe { _svhsub_s16_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Halving subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhsub[_n_s16]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(shsub))]
+pub fn svhsub_n_s16_m(pg: svbool_t, op1: svint16_t, op2: i16) -> svint16_t {
+    svhsub_s16_m(pg, op1, svdup_n_s16(op2))
+}
+#[doc = "Halving subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhsub[_s16]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(shsub))]
+pub fn svhsub_s16_x(pg: svbool_t, op1: svint16_t, op2: svint16_t) -> svint16_t {
+    svhsub_s16_m(pg, op1, op2)
+}
+#[doc = "Halving subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhsub[_n_s16]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(shsub))]
+pub fn svhsub_n_s16_x(pg: svbool_t, op1: svint16_t, op2: i16) -> svint16_t {
+    svhsub_s16_x(pg, op1, svdup_n_s16(op2))
+}
+#[doc = "Halving subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhsub[_s16]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(shsub))]
+pub fn svhsub_s16_z(pg: svbool_t, op1: svint16_t, op2: svint16_t) -> svint16_t {
+    svhsub_s16_m(pg, svsel_s16(pg, op1, svdup_n_s16(0)), op2)
+}
+#[doc = "Halving subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhsub[_n_s16]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(shsub))]
+pub fn svhsub_n_s16_z(pg: svbool_t, op1: svint16_t, op2: i16) -> svint16_t {
+    svhsub_s16_z(pg, op1, svdup_n_s16(op2))
+}
+#[doc = "Halving subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhsub[_s32]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(shsub))]
+pub fn svhsub_s32_m(pg: svbool_t, op1: svint32_t, op2: svint32_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.shsub.nxv4i32")]
+        fn _svhsub_s32_m(pg: svbool4_t, op1: svint32_t, op2: svint32_t) -> svint32_t;
+    }
+    unsafe { _svhsub_s32_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Halving subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhsub[_n_s32]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(shsub))]
+pub fn svhsub_n_s32_m(pg: svbool_t, op1: svint32_t, op2: i32) -> svint32_t {
+    svhsub_s32_m(pg, op1, svdup_n_s32(op2))
+}
+#[doc = "Halving subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhsub[_s32]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(shsub))]
+pub fn svhsub_s32_x(pg: svbool_t, op1: svint32_t, op2: svint32_t) -> svint32_t {
+    svhsub_s32_m(pg, op1, op2)
+}
+#[doc = "Halving subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhsub[_n_s32]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(shsub))]
+pub fn svhsub_n_s32_x(pg: svbool_t, op1: svint32_t, op2: i32) -> svint32_t {
+    svhsub_s32_x(pg, op1, svdup_n_s32(op2))
+}
+#[doc = "Halving subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhsub[_s32]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(shsub))]
+pub fn svhsub_s32_z(pg: svbool_t, op1: svint32_t, op2: svint32_t) -> svint32_t {
+    svhsub_s32_m(pg, svsel_s32(pg, op1, svdup_n_s32(0)), op2)
+}
+#[doc = "Halving subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhsub[_n_s32]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(shsub))]
+pub fn svhsub_n_s32_z(pg: svbool_t, op1: svint32_t, op2: i32) -> svint32_t {
+    svhsub_s32_z(pg, op1, svdup_n_s32(op2))
+}
+#[doc = "Halving subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhsub[_s64]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(shsub))]
+pub fn svhsub_s64_m(pg: svbool_t, op1: svint64_t, op2: svint64_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.shsub.nxv2i64")]
+        fn _svhsub_s64_m(pg: svbool2_t, op1: svint64_t, op2: svint64_t) -> svint64_t;
+    }
+    unsafe { _svhsub_s64_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Halving subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhsub[_n_s64]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(shsub))]
+pub fn svhsub_n_s64_m(pg: svbool_t, op1: svint64_t, op2: i64) -> svint64_t {
+    svhsub_s64_m(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Halving subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhsub[_s64]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(shsub))]
+pub fn svhsub_s64_x(pg: svbool_t, op1: svint64_t, op2: svint64_t) -> svint64_t {
+    svhsub_s64_m(pg, op1, op2)
+}
+#[doc = "Halving subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhsub[_n_s64]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(shsub))]
+pub fn svhsub_n_s64_x(pg: svbool_t, op1: svint64_t, op2: i64) -> svint64_t {
+    svhsub_s64_x(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Halving subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhsub[_s64]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(shsub))]
+pub fn svhsub_s64_z(pg: svbool_t, op1: svint64_t, op2: svint64_t) -> svint64_t {
+    svhsub_s64_m(pg, svsel_s64(pg, op1, svdup_n_s64(0)), op2)
+}
+#[doc = "Halving subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhsub[_n_s64]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(shsub))]
+pub fn svhsub_n_s64_z(pg: svbool_t, op1: svint64_t, op2: i64) -> svint64_t {
+    svhsub_s64_z(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Halving subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhsub[_u8]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uhsub))]
+pub fn svhsub_u8_m(pg: svbool_t, op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uhsub.nxv16i8")]
+        fn _svhsub_u8_m(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t;
+    }
+    unsafe { _svhsub_u8_m(pg, op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Halving subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhsub[_n_u8]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uhsub))]
+pub fn svhsub_n_u8_m(pg: svbool_t, op1: svuint8_t, op2: u8) -> svuint8_t {
+    svhsub_u8_m(pg, op1, svdup_n_u8(op2))
+}
+#[doc = "Halving subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhsub[_u8]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uhsub))]
+pub fn svhsub_u8_x(pg: svbool_t, op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    svhsub_u8_m(pg, op1, op2)
+}
+#[doc = "Halving subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhsub[_n_u8]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uhsub))]
+pub fn svhsub_n_u8_x(pg: svbool_t, op1: svuint8_t, op2: u8) -> svuint8_t {
+    svhsub_u8_x(pg, op1, svdup_n_u8(op2))
+}
+#[doc = "Halving subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhsub[_u8]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uhsub))]
+pub fn svhsub_u8_z(pg: svbool_t, op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    svhsub_u8_m(pg, svsel_u8(pg, op1, svdup_n_u8(0)), op2)
+}
+#[doc = "Halving subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhsub[_n_u8]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uhsub))]
+pub fn svhsub_n_u8_z(pg: svbool_t, op1: svuint8_t, op2: u8) -> svuint8_t {
+    svhsub_u8_z(pg, op1, svdup_n_u8(op2))
+}
+#[doc = "Halving subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhsub[_u16]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uhsub))]
+pub fn svhsub_u16_m(pg: svbool_t, op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uhsub.nxv8i16")]
+        fn _svhsub_u16_m(pg: svbool8_t, op1: svint16_t, op2: svint16_t) -> svint16_t;
+    }
+    unsafe { _svhsub_u16_m(pg.sve_into(), op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Halving subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhsub[_n_u16]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uhsub))]
+pub fn svhsub_n_u16_m(pg: svbool_t, op1: svuint16_t, op2: u16) -> svuint16_t {
+    svhsub_u16_m(pg, op1, svdup_n_u16(op2))
+}
+#[doc = "Halving subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhsub[_u16]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uhsub))]
+pub fn svhsub_u16_x(pg: svbool_t, op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    svhsub_u16_m(pg, op1, op2)
+}
+#[doc = "Halving subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhsub[_n_u16]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uhsub))]
+pub fn svhsub_n_u16_x(pg: svbool_t, op1: svuint16_t, op2: u16) -> svuint16_t {
+    svhsub_u16_x(pg, op1, svdup_n_u16(op2))
+}
+#[doc = "Halving subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhsub[_u16]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uhsub))]
+pub fn svhsub_u16_z(pg: svbool_t, op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    svhsub_u16_m(pg, svsel_u16(pg, op1, svdup_n_u16(0)), op2)
+}
+#[doc = "Halving subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhsub[_n_u16]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uhsub))]
+pub fn svhsub_n_u16_z(pg: svbool_t, op1: svuint16_t, op2: u16) -> svuint16_t {
+    svhsub_u16_z(pg, op1, svdup_n_u16(op2))
+}
+#[doc = "Halving subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhsub[_u32]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uhsub))]
+pub fn svhsub_u32_m(pg: svbool_t, op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uhsub.nxv4i32")]
+        fn _svhsub_u32_m(pg: svbool4_t, op1: svint32_t, op2: svint32_t) -> svint32_t;
+    }
+    unsafe { _svhsub_u32_m(pg.sve_into(), op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Halving subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhsub[_n_u32]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uhsub))]
+pub fn svhsub_n_u32_m(pg: svbool_t, op1: svuint32_t, op2: u32) -> svuint32_t {
+    svhsub_u32_m(pg, op1, svdup_n_u32(op2))
+}
+#[doc = "Halving subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhsub[_u32]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uhsub))]
+pub fn svhsub_u32_x(pg: svbool_t, op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    svhsub_u32_m(pg, op1, op2)
+}
+#[doc = "Halving subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhsub[_n_u32]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uhsub))]
+pub fn svhsub_n_u32_x(pg: svbool_t, op1: svuint32_t, op2: u32) -> svuint32_t {
+    svhsub_u32_x(pg, op1, svdup_n_u32(op2))
+}
+#[doc = "Halving subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhsub[_u32]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uhsub))]
+pub fn svhsub_u32_z(pg: svbool_t, op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    svhsub_u32_m(pg, svsel_u32(pg, op1, svdup_n_u32(0)), op2)
+}
+#[doc = "Halving subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhsub[_n_u32]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uhsub))]
+pub fn svhsub_n_u32_z(pg: svbool_t, op1: svuint32_t, op2: u32) -> svuint32_t {
+    svhsub_u32_z(pg, op1, svdup_n_u32(op2))
+}
+#[doc = "Halving subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhsub[_u64]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uhsub))]
+pub fn svhsub_u64_m(pg: svbool_t, op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uhsub.nxv2i64")]
+        fn _svhsub_u64_m(pg: svbool2_t, op1: svint64_t, op2: svint64_t) -> svint64_t;
+    }
+    unsafe { _svhsub_u64_m(pg.sve_into(), op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Halving subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhsub[_n_u64]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uhsub))]
+pub fn svhsub_n_u64_m(pg: svbool_t, op1: svuint64_t, op2: u64) -> svuint64_t {
+    svhsub_u64_m(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Halving subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhsub[_u64]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uhsub))]
+pub fn svhsub_u64_x(pg: svbool_t, op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    svhsub_u64_m(pg, op1, op2)
+}
+#[doc = "Halving subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhsub[_n_u64]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uhsub))]
+pub fn svhsub_n_u64_x(pg: svbool_t, op1: svuint64_t, op2: u64) -> svuint64_t {
+    svhsub_u64_x(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Halving subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhsub[_u64]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uhsub))]
+pub fn svhsub_u64_z(pg: svbool_t, op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    svhsub_u64_m(pg, svsel_u64(pg, op1, svdup_n_u64(0)), op2)
+}
+#[doc = "Halving subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhsub[_n_u64]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uhsub))]
+pub fn svhsub_n_u64_z(pg: svbool_t, op1: svuint64_t, op2: u64) -> svuint64_t {
+    svhsub_u64_z(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Halving subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhsubr[_s8]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(shsub))]
+pub fn svhsubr_s8_m(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.shsubr.nxv16i8")]
+        fn _svhsubr_s8_m(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t;
+    }
+    unsafe { _svhsubr_s8_m(pg, op1, op2) }
+}
+#[doc = "Halving subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhsubr[_n_s8]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(shsub))]
+pub fn svhsubr_n_s8_m(pg: svbool_t, op1: svint8_t, op2: i8) -> svint8_t {
+    svhsubr_s8_m(pg, op1, svdup_n_s8(op2))
+}
+#[doc = "Halving subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhsubr[_s8]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(shsub))]
+pub fn svhsubr_s8_x(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t {
+    svhsubr_s8_m(pg, op1, op2)
+}
+#[doc = "Halving subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhsubr[_n_s8]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(shsub))]
+pub fn svhsubr_n_s8_x(pg: svbool_t, op1: svint8_t, op2: i8) -> svint8_t {
+    svhsubr_s8_x(pg, op1, svdup_n_s8(op2))
+}
+#[doc = "Halving subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhsubr[_s8]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(shsub))]
+pub fn svhsubr_s8_z(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t {
+    svhsubr_s8_m(pg, svsel_s8(pg, op1, svdup_n_s8(0)), op2)
+}
+#[doc = "Halving subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhsubr[_n_s8]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(shsub))]
+pub fn svhsubr_n_s8_z(pg: svbool_t, op1: svint8_t, op2: i8) -> svint8_t {
+    svhsubr_s8_z(pg, op1, svdup_n_s8(op2))
+}
+#[doc = "Halving subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhsubr[_s16]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(shsub))]
+pub fn svhsubr_s16_m(pg: svbool_t, op1: svint16_t, op2: svint16_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.shsubr.nxv8i16")]
+        fn _svhsubr_s16_m(pg: svbool8_t, op1: svint16_t, op2: svint16_t) -> svint16_t;
+    }
+    unsafe { _svhsubr_s16_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Halving subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhsubr[_n_s16]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(shsub))]
+pub fn svhsubr_n_s16_m(pg: svbool_t, op1: svint16_t, op2: i16) -> svint16_t {
+    svhsubr_s16_m(pg, op1, svdup_n_s16(op2))
+}
+#[doc = "Halving subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhsubr[_s16]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(shsub))]
+pub fn svhsubr_s16_x(pg: svbool_t, op1: svint16_t, op2: svint16_t) -> svint16_t {
+    svhsubr_s16_m(pg, op1, op2)
+}
+#[doc = "Halving subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhsubr[_n_s16]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(shsub))]
+pub fn svhsubr_n_s16_x(pg: svbool_t, op1: svint16_t, op2: i16) -> svint16_t {
+    svhsubr_s16_x(pg, op1, svdup_n_s16(op2))
+}
+#[doc = "Halving subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhsubr[_s16]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(shsub))]
+pub fn svhsubr_s16_z(pg: svbool_t, op1: svint16_t, op2: svint16_t) -> svint16_t {
+    svhsubr_s16_m(pg, svsel_s16(pg, op1, svdup_n_s16(0)), op2)
+}
+#[doc = "Halving subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhsubr[_n_s16]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(shsub))]
+pub fn svhsubr_n_s16_z(pg: svbool_t, op1: svint16_t, op2: i16) -> svint16_t {
+    svhsubr_s16_z(pg, op1, svdup_n_s16(op2))
+}
+#[doc = "Halving subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhsubr[_s32]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(shsub))]
+pub fn svhsubr_s32_m(pg: svbool_t, op1: svint32_t, op2: svint32_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.shsubr.nxv4i32")]
+        fn _svhsubr_s32_m(pg: svbool4_t, op1: svint32_t, op2: svint32_t) -> svint32_t;
+    }
+    unsafe { _svhsubr_s32_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Halving subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhsubr[_n_s32]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(shsub))]
+pub fn svhsubr_n_s32_m(pg: svbool_t, op1: svint32_t, op2: i32) -> svint32_t {
+    svhsubr_s32_m(pg, op1, svdup_n_s32(op2))
+}
+#[doc = "Halving subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhsubr[_s32]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(shsub))]
+pub fn svhsubr_s32_x(pg: svbool_t, op1: svint32_t, op2: svint32_t) -> svint32_t {
+    svhsubr_s32_m(pg, op1, op2)
+}
+#[doc = "Halving subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhsubr[_n_s32]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(shsub))]
+pub fn svhsubr_n_s32_x(pg: svbool_t, op1: svint32_t, op2: i32) -> svint32_t {
+    svhsubr_s32_x(pg, op1, svdup_n_s32(op2))
+}
+#[doc = "Halving subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhsubr[_s32]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(shsub))]
+pub fn svhsubr_s32_z(pg: svbool_t, op1: svint32_t, op2: svint32_t) -> svint32_t {
+    svhsubr_s32_m(pg, svsel_s32(pg, op1, svdup_n_s32(0)), op2)
+}
+#[doc = "Halving subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhsubr[_n_s32]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(shsub))]
+pub fn svhsubr_n_s32_z(pg: svbool_t, op1: svint32_t, op2: i32) -> svint32_t {
+    svhsubr_s32_z(pg, op1, svdup_n_s32(op2))
+}
+#[doc = "Halving subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhsubr[_s64]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(shsub))]
+pub fn svhsubr_s64_m(pg: svbool_t, op1: svint64_t, op2: svint64_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.shsubr.nxv2i64")]
+        fn _svhsubr_s64_m(pg: svbool2_t, op1: svint64_t, op2: svint64_t) -> svint64_t;
+    }
+    unsafe { _svhsubr_s64_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Halving subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhsubr[_n_s64]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(shsub))]
+pub fn svhsubr_n_s64_m(pg: svbool_t, op1: svint64_t, op2: i64) -> svint64_t {
+    svhsubr_s64_m(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Halving subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhsubr[_s64]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(shsub))]
+pub fn svhsubr_s64_x(pg: svbool_t, op1: svint64_t, op2: svint64_t) -> svint64_t {
+    svhsubr_s64_m(pg, op1, op2)
+}
+#[doc = "Halving subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhsubr[_n_s64]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(shsub))]
+pub fn svhsubr_n_s64_x(pg: svbool_t, op1: svint64_t, op2: i64) -> svint64_t {
+    svhsubr_s64_x(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Halving subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhsubr[_s64]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(shsub))]
+pub fn svhsubr_s64_z(pg: svbool_t, op1: svint64_t, op2: svint64_t) -> svint64_t {
+    svhsubr_s64_m(pg, svsel_s64(pg, op1, svdup_n_s64(0)), op2)
+}
+#[doc = "Halving subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhsubr[_n_s64]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(shsub))]
+pub fn svhsubr_n_s64_z(pg: svbool_t, op1: svint64_t, op2: i64) -> svint64_t {
+    svhsubr_s64_z(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Halving subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhsubr[_u8]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uhsub))]
+pub fn svhsubr_u8_m(pg: svbool_t, op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uhsubr.nxv16i8")]
+        fn _svhsubr_u8_m(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t;
+    }
+    unsafe { _svhsubr_u8_m(pg, op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Halving subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhsubr[_n_u8]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uhsub))]
+pub fn svhsubr_n_u8_m(pg: svbool_t, op1: svuint8_t, op2: u8) -> svuint8_t {
+    svhsubr_u8_m(pg, op1, svdup_n_u8(op2))
+}
+#[doc = "Halving subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhsubr[_u8]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uhsub))]
+pub fn svhsubr_u8_x(pg: svbool_t, op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    svhsubr_u8_m(pg, op1, op2)
+}
+#[doc = "Halving subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhsubr[_n_u8]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uhsub))]
+pub fn svhsubr_n_u8_x(pg: svbool_t, op1: svuint8_t, op2: u8) -> svuint8_t {
+    svhsubr_u8_x(pg, op1, svdup_n_u8(op2))
+}
+#[doc = "Halving subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhsubr[_u8]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uhsub))]
+pub fn svhsubr_u8_z(pg: svbool_t, op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    svhsubr_u8_m(pg, svsel_u8(pg, op1, svdup_n_u8(0)), op2)
+}
+#[doc = "Halving subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhsubr[_n_u8]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uhsub))]
+pub fn svhsubr_n_u8_z(pg: svbool_t, op1: svuint8_t, op2: u8) -> svuint8_t {
+    svhsubr_u8_z(pg, op1, svdup_n_u8(op2))
+}
+#[doc = "Halving subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhsubr[_u16]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uhsub))]
+pub fn svhsubr_u16_m(pg: svbool_t, op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uhsubr.nxv8i16")]
+        fn _svhsubr_u16_m(pg: svbool8_t, op1: svint16_t, op2: svint16_t) -> svint16_t;
+    }
+    unsafe { _svhsubr_u16_m(pg.sve_into(), op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Halving subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhsubr[_n_u16]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uhsub))]
+pub fn svhsubr_n_u16_m(pg: svbool_t, op1: svuint16_t, op2: u16) -> svuint16_t {
+    svhsubr_u16_m(pg, op1, svdup_n_u16(op2))
+}
+#[doc = "Halving subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhsubr[_u16]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uhsub))]
+pub fn svhsubr_u16_x(pg: svbool_t, op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    svhsubr_u16_m(pg, op1, op2)
+}
+#[doc = "Halving subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhsubr[_n_u16]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uhsub))]
+pub fn svhsubr_n_u16_x(pg: svbool_t, op1: svuint16_t, op2: u16) -> svuint16_t {
+    svhsubr_u16_x(pg, op1, svdup_n_u16(op2))
+}
+#[doc = "Halving subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhsubr[_u16]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uhsub))]
+pub fn svhsubr_u16_z(pg: svbool_t, op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    svhsubr_u16_m(pg, svsel_u16(pg, op1, svdup_n_u16(0)), op2)
+}
+#[doc = "Halving subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhsubr[_n_u16]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uhsub))]
+pub fn svhsubr_n_u16_z(pg: svbool_t, op1: svuint16_t, op2: u16) -> svuint16_t {
+    svhsubr_u16_z(pg, op1, svdup_n_u16(op2))
+}
+#[doc = "Halving subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhsubr[_u32]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uhsub))]
+pub fn svhsubr_u32_m(pg: svbool_t, op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uhsubr.nxv4i32")]
+        fn _svhsubr_u32_m(pg: svbool4_t, op1: svint32_t, op2: svint32_t) -> svint32_t;
+    }
+    unsafe { _svhsubr_u32_m(pg.sve_into(), op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Halving subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhsubr[_n_u32]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uhsub))]
+pub fn svhsubr_n_u32_m(pg: svbool_t, op1: svuint32_t, op2: u32) -> svuint32_t {
+    svhsubr_u32_m(pg, op1, svdup_n_u32(op2))
+}
+#[doc = "Halving subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhsubr[_u32]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uhsub))]
+pub fn svhsubr_u32_x(pg: svbool_t, op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    svhsubr_u32_m(pg, op1, op2)
+}
+#[doc = "Halving subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhsubr[_n_u32]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uhsub))]
+pub fn svhsubr_n_u32_x(pg: svbool_t, op1: svuint32_t, op2: u32) -> svuint32_t {
+    svhsubr_u32_x(pg, op1, svdup_n_u32(op2))
+}
+#[doc = "Halving subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhsubr[_u32]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uhsub))]
+pub fn svhsubr_u32_z(pg: svbool_t, op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    svhsubr_u32_m(pg, svsel_u32(pg, op1, svdup_n_u32(0)), op2)
+}
+#[doc = "Halving subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhsubr[_n_u32]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uhsub))]
+pub fn svhsubr_n_u32_z(pg: svbool_t, op1: svuint32_t, op2: u32) -> svuint32_t {
+    svhsubr_u32_z(pg, op1, svdup_n_u32(op2))
+}
+#[doc = "Halving subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhsubr[_u64]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uhsub))]
+pub fn svhsubr_u64_m(pg: svbool_t, op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uhsubr.nxv2i64")]
+        fn _svhsubr_u64_m(pg: svbool2_t, op1: svint64_t, op2: svint64_t) -> svint64_t;
+    }
+    unsafe { _svhsubr_u64_m(pg.sve_into(), op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Halving subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhsubr[_n_u64]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uhsub))]
+pub fn svhsubr_n_u64_m(pg: svbool_t, op1: svuint64_t, op2: u64) -> svuint64_t {
+    svhsubr_u64_m(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Halving subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhsubr[_u64]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uhsub))]
+pub fn svhsubr_u64_x(pg: svbool_t, op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    svhsubr_u64_m(pg, op1, op2)
+}
+#[doc = "Halving subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhsubr[_n_u64]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uhsub))]
+pub fn svhsubr_n_u64_x(pg: svbool_t, op1: svuint64_t, op2: u64) -> svuint64_t {
+    svhsubr_u64_x(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Halving subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhsubr[_u64]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uhsub))]
+pub fn svhsubr_u64_z(pg: svbool_t, op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    svhsubr_u64_m(pg, svsel_u64(pg, op1, svdup_n_u64(0)), op2)
+}
+#[doc = "Halving subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svhsubr[_n_u64]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uhsub))]
+pub fn svhsubr_n_u64_z(pg: svbool_t, op1: svuint64_t, op2: u64) -> svuint64_t {
+    svhsubr_u64_z(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Unextended load, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1_gather_[s64]index[_f64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1d))]
+pub unsafe fn svldnt1_gather_s64index_f64(
+    pg: svbool_t,
+    base: *const f64,
+    indices: svint64_t,
+) -> svfloat64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ldnt1.gather.index.nxv2f64"
+        )]
+        fn _svldnt1_gather_s64index_f64(
+            pg: svbool2_t,
+            base: *const f64,
+            indices: svint64_t,
+        ) -> svfloat64_t;
+    }
+    _svldnt1_gather_s64index_f64(pg.sve_into(), base, indices)
+}
+#[doc = "Unextended load, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1_gather_[s64]index[_s64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1d))]
+pub unsafe fn svldnt1_gather_s64index_s64(
+    pg: svbool_t,
+    base: *const i64,
+    indices: svint64_t,
+) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ldnt1.gather.index.nxv2i64"
+        )]
+        fn _svldnt1_gather_s64index_s64(
+            pg: svbool2_t,
+            base: *const i64,
+            indices: svint64_t,
+        ) -> svint64_t;
+    }
+    _svldnt1_gather_s64index_s64(pg.sve_into(), base, indices)
+}
+#[doc = "Unextended load, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1_gather_[s64]index[_u64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1d))]
+pub unsafe fn svldnt1_gather_s64index_u64(
+    pg: svbool_t,
+    base: *const u64,
+    indices: svint64_t,
+) -> svuint64_t {
+    svldnt1_gather_s64index_s64(pg, base.as_signed(), indices).as_unsigned()
+}
+#[doc = "Unextended load, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1_gather_[u64]index[_f64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1d))]
+pub unsafe fn svldnt1_gather_u64index_f64(
+    pg: svbool_t,
+    base: *const f64,
+    indices: svuint64_t,
+) -> svfloat64_t {
+    svldnt1_gather_s64index_f64(pg, base, indices.as_signed())
+}
+#[doc = "Unextended load, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1_gather_[u64]index[_s64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1d))]
+pub unsafe fn svldnt1_gather_u64index_s64(
+    pg: svbool_t,
+    base: *const i64,
+    indices: svuint64_t,
+) -> svint64_t {
+    svldnt1_gather_s64index_s64(pg, base, indices.as_signed())
+}
+#[doc = "Unextended load, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1_gather_[u64]index[_u64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1d))]
+pub unsafe fn svldnt1_gather_u64index_u64(
+    pg: svbool_t,
+    base: *const u64,
+    indices: svuint64_t,
+) -> svuint64_t {
+    svldnt1_gather_s64index_s64(pg, base.as_signed(), indices.as_signed()).as_unsigned()
+}
+#[doc = "Unextended load, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1_gather_[s64]offset[_f64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1d))]
+pub unsafe fn svldnt1_gather_s64offset_f64(
+    pg: svbool_t,
+    base: *const f64,
+    offsets: svint64_t,
+) -> svfloat64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ldnt1.gather.nxv2f64"
+        )]
+        fn _svldnt1_gather_s64offset_f64(
+            pg: svbool2_t,
+            base: *const f64,
+            offsets: svint64_t,
+        ) -> svfloat64_t;
+    }
+    _svldnt1_gather_s64offset_f64(pg.sve_into(), base, offsets)
+}
+#[doc = "Unextended load, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1_gather_[s64]offset[_s64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1d))]
+pub unsafe fn svldnt1_gather_s64offset_s64(
+    pg: svbool_t,
+    base: *const i64,
+    offsets: svint64_t,
+) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ldnt1.gather.nxv2i64"
+        )]
+        fn _svldnt1_gather_s64offset_s64(
+            pg: svbool2_t,
+            base: *const i64,
+            offsets: svint64_t,
+        ) -> svint64_t;
+    }
+    _svldnt1_gather_s64offset_s64(pg.sve_into(), base, offsets)
+}
+#[doc = "Unextended load, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1_gather_[s64]offset[_u64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1d))]
+pub unsafe fn svldnt1_gather_s64offset_u64(
+    pg: svbool_t,
+    base: *const u64,
+    offsets: svint64_t,
+) -> svuint64_t {
+    svldnt1_gather_s64offset_s64(pg, base.as_signed(), offsets).as_unsigned()
+}
+#[doc = "Unextended load, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1_gather_[u32]offset[_f32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1w))]
+pub unsafe fn svldnt1_gather_u32offset_f32(
+    pg: svbool_t,
+    base: *const f32,
+    offsets: svuint32_t,
+) -> svfloat32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4f32"
+        )]
+        fn _svldnt1_gather_u32offset_f32(
+            pg: svbool4_t,
+            base: *const f32,
+            offsets: svint32_t,
+        ) -> svfloat32_t;
+    }
+    _svldnt1_gather_u32offset_f32(pg.sve_into(), base, offsets.as_signed())
+}
+#[doc = "Unextended load, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1_gather_[u32]offset[_s32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1w))]
+pub unsafe fn svldnt1_gather_u32offset_s32(
+    pg: svbool_t,
+    base: *const i32,
+    offsets: svuint32_t,
+) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4i32"
+        )]
+        fn _svldnt1_gather_u32offset_s32(
+            pg: svbool4_t,
+            base: *const i32,
+            offsets: svint32_t,
+        ) -> svint32_t;
+    }
+    _svldnt1_gather_u32offset_s32(pg.sve_into(), base, offsets.as_signed())
+}
+#[doc = "Unextended load, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1_gather_[u32]offset[_u32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1w))]
+pub unsafe fn svldnt1_gather_u32offset_u32(
+    pg: svbool_t,
+    base: *const u32,
+    offsets: svuint32_t,
+) -> svuint32_t {
+    svldnt1_gather_u32offset_s32(pg, base.as_signed(), offsets).as_unsigned()
+}
+#[doc = "Unextended load, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1_gather_[u64]offset[_f64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1d))]
+pub unsafe fn svldnt1_gather_u64offset_f64(
+    pg: svbool_t,
+    base: *const f64,
+    offsets: svuint64_t,
+) -> svfloat64_t {
+    svldnt1_gather_s64offset_f64(pg, base, offsets.as_signed())
+}
+#[doc = "Unextended load, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1_gather_[u64]offset[_s64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1d))]
+pub unsafe fn svldnt1_gather_u64offset_s64(
+    pg: svbool_t,
+    base: *const i64,
+    offsets: svuint64_t,
+) -> svint64_t {
+    svldnt1_gather_s64offset_s64(pg, base, offsets.as_signed())
+}
+#[doc = "Unextended load, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1_gather_[u64]offset[_u64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1d))]
+pub unsafe fn svldnt1_gather_u64offset_u64(
+    pg: svbool_t,
+    base: *const u64,
+    offsets: svuint64_t,
+) -> svuint64_t {
+    svldnt1_gather_s64offset_s64(pg, base.as_signed(), offsets.as_signed()).as_unsigned()
+}
+#[doc = "Unextended load, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1_gather[_u32base]_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1w))]
+pub unsafe fn svldnt1_gather_u32base_f32(pg: svbool_t, bases: svuint32_t) -> svfloat32_t {
+    svldnt1_gather_u32base_offset_f32(pg, bases, 0)
+}
+#[doc = "Unextended load, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1_gather[_u32base]_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1w))]
+pub unsafe fn svldnt1_gather_u32base_s32(pg: svbool_t, bases: svuint32_t) -> svint32_t {
+    svldnt1_gather_u32base_offset_s32(pg, bases, 0)
+}
+#[doc = "Unextended load, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1_gather[_u32base]_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1w))]
+pub unsafe fn svldnt1_gather_u32base_u32(pg: svbool_t, bases: svuint32_t) -> svuint32_t {
+    svldnt1_gather_u32base_offset_u32(pg, bases, 0)
+}
+#[doc = "Unextended load, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1_gather[_u64base]_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1d))]
+pub unsafe fn svldnt1_gather_u64base_f64(pg: svbool_t, bases: svuint64_t) -> svfloat64_t {
+    svldnt1_gather_u64base_offset_f64(pg, bases, 0)
+}
+#[doc = "Unextended load, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1_gather[_u64base]_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1d))]
+pub unsafe fn svldnt1_gather_u64base_s64(pg: svbool_t, bases: svuint64_t) -> svint64_t {
+    svldnt1_gather_u64base_offset_s64(pg, bases, 0)
+}
+#[doc = "Unextended load, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1_gather[_u64base]_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1d))]
+pub unsafe fn svldnt1_gather_u64base_u64(pg: svbool_t, bases: svuint64_t) -> svuint64_t {
+    svldnt1_gather_u64base_offset_u64(pg, bases, 0)
+}
+#[doc = "Unextended load, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1_gather[_u32base]_index_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1w))]
+pub unsafe fn svldnt1_gather_u32base_index_f32(
+    pg: svbool_t,
+    bases: svuint32_t,
+    index: i64,
+) -> svfloat32_t {
+    svldnt1_gather_u32base_offset_f32(pg, bases, index.unchecked_shl(2))
+}
+#[doc = "Unextended load, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1_gather[_u32base]_index_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1w))]
+pub unsafe fn svldnt1_gather_u32base_index_s32(
+    pg: svbool_t,
+    bases: svuint32_t,
+    index: i64,
+) -> svint32_t {
+    svldnt1_gather_u32base_offset_s32(pg, bases, index.unchecked_shl(2))
+}
+#[doc = "Unextended load, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1_gather[_u32base]_index_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1w))]
+pub unsafe fn svldnt1_gather_u32base_index_u32(
+    pg: svbool_t,
+    bases: svuint32_t,
+    index: i64,
+) -> svuint32_t {
+    svldnt1_gather_u32base_offset_u32(pg, bases, index.unchecked_shl(2))
+}
+#[doc = "Unextended load, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1_gather[_u64base]_index_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1d))]
+pub unsafe fn svldnt1_gather_u64base_index_f64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    index: i64,
+) -> svfloat64_t {
+    svldnt1_gather_u64base_offset_f64(pg, bases, index.unchecked_shl(3))
+}
+#[doc = "Unextended load, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1_gather[_u64base]_index_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1d))]
+pub unsafe fn svldnt1_gather_u64base_index_s64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    index: i64,
+) -> svint64_t {
+    svldnt1_gather_u64base_offset_s64(pg, bases, index.unchecked_shl(3))
+}
+#[doc = "Unextended load, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1_gather[_u64base]_index_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1d))]
+pub unsafe fn svldnt1_gather_u64base_index_u64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    index: i64,
+) -> svuint64_t {
+    svldnt1_gather_u64base_offset_u64(pg, bases, index.unchecked_shl(3))
+}
+#[doc = "Unextended load, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1_gather[_u32base]_offset_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1w))]
+pub unsafe fn svldnt1_gather_u32base_offset_f32(
+    pg: svbool_t,
+    bases: svuint32_t,
+    offset: i64,
+) -> svfloat32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4f32.nxv4i32"
+        )]
+        fn _svldnt1_gather_u32base_offset_f32(
+            pg: svbool4_t,
+            bases: svint32_t,
+            offset: i64,
+        ) -> svfloat32_t;
+    }
+    _svldnt1_gather_u32base_offset_f32(pg.sve_into(), bases.as_signed(), offset)
+}
+#[doc = "Unextended load, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1_gather[_u32base]_offset_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1w))]
+pub unsafe fn svldnt1_gather_u32base_offset_s32(
+    pg: svbool_t,
+    bases: svuint32_t,
+    offset: i64,
+) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4i32.nxv4i32"
+        )]
+        fn _svldnt1_gather_u32base_offset_s32(
+            pg: svbool4_t,
+            bases: svint32_t,
+            offset: i64,
+        ) -> svint32_t;
+    }
+    _svldnt1_gather_u32base_offset_s32(pg.sve_into(), bases.as_signed(), offset)
+}
+#[doc = "Unextended load, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1_gather[_u32base]_offset_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1w))]
+pub unsafe fn svldnt1_gather_u32base_offset_u32(
+    pg: svbool_t,
+    bases: svuint32_t,
+    offset: i64,
+) -> svuint32_t {
+    svldnt1_gather_u32base_offset_s32(pg, bases, offset).as_unsigned()
+}
+#[doc = "Unextended load, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1_gather[_u64base]_offset_f64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1d))]
+pub unsafe fn svldnt1_gather_u64base_offset_f64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    offset: i64,
+) -> svfloat64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2f64.nxv2i64"
+        )]
+        fn _svldnt1_gather_u64base_offset_f64(
+            pg: svbool2_t,
+            bases: svint64_t,
+            offset: i64,
+        ) -> svfloat64_t;
+    }
+    _svldnt1_gather_u64base_offset_f64(pg.sve_into(), bases.as_signed(), offset)
+}
+#[doc = "Unextended load, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1_gather[_u64base]_offset_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1d))]
+pub unsafe fn svldnt1_gather_u64base_offset_s64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    offset: i64,
+) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i64.nxv2i64"
+        )]
+        fn _svldnt1_gather_u64base_offset_s64(
+            pg: svbool2_t,
+            bases: svint64_t,
+            offset: i64,
+        ) -> svint64_t;
+    }
+    _svldnt1_gather_u64base_offset_s64(pg.sve_into(), bases.as_signed(), offset)
+}
+#[doc = "Unextended load, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1_gather[_u64base]_offset_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1d))]
+pub unsafe fn svldnt1_gather_u64base_offset_u64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    offset: i64,
+) -> svuint64_t {
+    svldnt1_gather_u64base_offset_s64(pg, bases, offset).as_unsigned()
+}
+#[doc = "Load 8-bit data and sign-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1sb_gather_[s64]offset_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1sb))]
+pub unsafe fn svldnt1sb_gather_s64offset_s64(
+    pg: svbool_t,
+    base: *const i8,
+    offsets: svint64_t,
+) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ldnt1.gather.nxv2i8"
+        )]
+        fn _svldnt1sb_gather_s64offset_s64(
+            pg: svbool2_t,
+            base: *const i8,
+            offsets: svint64_t,
+        ) -> nxv2i8;
+    }
+    crate::intrinsics::simd::simd_cast(_svldnt1sb_gather_s64offset_s64(
+        pg.sve_into(),
+        base,
+        offsets,
+    ))
+}
+#[doc = "Load 16-bit data and sign-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1sh_gather_[s64]offset_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1sh))]
+pub unsafe fn svldnt1sh_gather_s64offset_s64(
+    pg: svbool_t,
+    base: *const i16,
+    offsets: svint64_t,
+) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ldnt1.gather.nxv2i16"
+        )]
+        fn _svldnt1sh_gather_s64offset_s64(
+            pg: svbool2_t,
+            base: *const i16,
+            offsets: svint64_t,
+        ) -> nxv2i16;
+    }
+    crate::intrinsics::simd::simd_cast(_svldnt1sh_gather_s64offset_s64(
+        pg.sve_into(),
+        base,
+        offsets,
+    ))
+}
+#[doc = "Load 32-bit data and sign-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1sw_gather_[s64]offset_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1sw))]
+pub unsafe fn svldnt1sw_gather_s64offset_s64(
+    pg: svbool_t,
+    base: *const i32,
+    offsets: svint64_t,
+) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ldnt1.gather.nxv2i32"
+        )]
+        fn _svldnt1sw_gather_s64offset_s64(
+            pg: svbool2_t,
+            base: *const i32,
+            offsets: svint64_t,
+        ) -> nxv2i32;
+    }
+    crate::intrinsics::simd::simd_cast(_svldnt1sw_gather_s64offset_s64(
+        pg.sve_into(),
+        base,
+        offsets,
+    ))
+}
+#[doc = "Load 8-bit data and sign-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1sb_gather_[s64]offset_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1sb))]
+pub unsafe fn svldnt1sb_gather_s64offset_u64(
+    pg: svbool_t,
+    base: *const i8,
+    offsets: svint64_t,
+) -> svuint64_t {
+    svldnt1sb_gather_s64offset_s64(pg, base, offsets).as_unsigned()
+}
+#[doc = "Load 16-bit data and sign-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1sh_gather_[s64]offset_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1sh))]
+pub unsafe fn svldnt1sh_gather_s64offset_u64(
+    pg: svbool_t,
+    base: *const i16,
+    offsets: svint64_t,
+) -> svuint64_t {
+    svldnt1sh_gather_s64offset_s64(pg, base, offsets).as_unsigned()
+}
+#[doc = "Load 32-bit data and sign-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1sw_gather_[s64]offset_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1sw))]
+pub unsafe fn svldnt1sw_gather_s64offset_u64(
+    pg: svbool_t,
+    base: *const i32,
+    offsets: svint64_t,
+) -> svuint64_t {
+    svldnt1sw_gather_s64offset_s64(pg, base, offsets).as_unsigned()
+}
+#[doc = "Load 8-bit data and sign-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1sb_gather_[u32]offset_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1sb))]
+pub unsafe fn svldnt1sb_gather_u32offset_s32(
+    pg: svbool_t,
+    base: *const i8,
+    offsets: svuint32_t,
+) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4i8"
+        )]
+        fn _svldnt1sb_gather_u32offset_s32(
+            pg: svbool4_t,
+            base: *const i8,
+            offsets: svint32_t,
+        ) -> nxv4i8;
+    }
+    crate::intrinsics::simd::simd_cast(_svldnt1sb_gather_u32offset_s32(
+        pg.sve_into(),
+        base,
+        offsets.as_signed(),
+    ))
+}
+#[doc = "Load 16-bit data and sign-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1sh_gather_[u32]offset_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1sh))]
+pub unsafe fn svldnt1sh_gather_u32offset_s32(
+    pg: svbool_t,
+    base: *const i16,
+    offsets: svuint32_t,
+) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4i16"
+        )]
+        fn _svldnt1sh_gather_u32offset_s32(
+            pg: svbool4_t,
+            base: *const i16,
+            offsets: svint32_t,
+        ) -> nxv4i16;
+    }
+    crate::intrinsics::simd::simd_cast(_svldnt1sh_gather_u32offset_s32(
+        pg.sve_into(),
+        base,
+        offsets.as_signed(),
+    ))
+}
+#[doc = "Load 8-bit data and sign-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1sb_gather_[u32]offset_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1sb))]
+pub unsafe fn svldnt1sb_gather_u32offset_u32(
+    pg: svbool_t,
+    base: *const i8,
+    offsets: svuint32_t,
+) -> svuint32_t {
+    svldnt1sb_gather_u32offset_s32(pg, base, offsets).as_unsigned()
+}
+#[doc = "Load 16-bit data and sign-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1sh_gather_[u32]offset_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1sh))]
+pub unsafe fn svldnt1sh_gather_u32offset_u32(
+    pg: svbool_t,
+    base: *const i16,
+    offsets: svuint32_t,
+) -> svuint32_t {
+    svldnt1sh_gather_u32offset_s32(pg, base, offsets).as_unsigned()
+}
+#[doc = "Load 8-bit data and sign-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1sb_gather_[u64]offset_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1sb))]
+pub unsafe fn svldnt1sb_gather_u64offset_s64(
+    pg: svbool_t,
+    base: *const i8,
+    offsets: svuint64_t,
+) -> svint64_t {
+    svldnt1sb_gather_s64offset_s64(pg, base, offsets.as_signed())
+}
+#[doc = "Load 16-bit data and sign-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1sh_gather_[u64]offset_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1sh))]
+pub unsafe fn svldnt1sh_gather_u64offset_s64(
+    pg: svbool_t,
+    base: *const i16,
+    offsets: svuint64_t,
+) -> svint64_t {
+    svldnt1sh_gather_s64offset_s64(pg, base, offsets.as_signed())
+}
+#[doc = "Load 32-bit data and sign-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1sw_gather_[u64]offset_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1sw))]
+pub unsafe fn svldnt1sw_gather_u64offset_s64(
+    pg: svbool_t,
+    base: *const i32,
+    offsets: svuint64_t,
+) -> svint64_t {
+    svldnt1sw_gather_s64offset_s64(pg, base, offsets.as_signed())
+}
+#[doc = "Load 8-bit data and sign-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1sb_gather_[u64]offset_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1sb))]
+pub unsafe fn svldnt1sb_gather_u64offset_u64(
+    pg: svbool_t,
+    base: *const i8,
+    offsets: svuint64_t,
+) -> svuint64_t {
+    svldnt1sb_gather_s64offset_s64(pg, base, offsets.as_signed()).as_unsigned()
+}
+#[doc = "Load 16-bit data and sign-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1sh_gather_[u64]offset_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1sh))]
+pub unsafe fn svldnt1sh_gather_u64offset_u64(
+    pg: svbool_t,
+    base: *const i16,
+    offsets: svuint64_t,
+) -> svuint64_t {
+    svldnt1sh_gather_s64offset_s64(pg, base, offsets.as_signed()).as_unsigned()
+}
+#[doc = "Load 32-bit data and sign-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1sw_gather_[u64]offset_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1sw))]
+pub unsafe fn svldnt1sw_gather_u64offset_u64(
+    pg: svbool_t,
+    base: *const i32,
+    offsets: svuint64_t,
+) -> svuint64_t {
+    svldnt1sw_gather_s64offset_s64(pg, base, offsets.as_signed()).as_unsigned()
+}
+#[doc = "Load 8-bit data and sign-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1sb_gather[_u32base]_offset_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1sb))]
+pub unsafe fn svldnt1sb_gather_u32base_offset_s32(
+    pg: svbool_t,
+    bases: svuint32_t,
+    offset: i64,
+) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4i8.nxv4i32"
+        )]
+        fn _svldnt1sb_gather_u32base_offset_s32(
+            pg: svbool4_t,
+            bases: svint32_t,
+            offset: i64,
+        ) -> nxv4i8;
+    }
+    crate::intrinsics::simd::simd_cast(_svldnt1sb_gather_u32base_offset_s32(
+        pg.sve_into(),
+        bases.as_signed(),
+        offset,
+    ))
+}
+#[doc = "Load 16-bit data and sign-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1sh_gather[_u32base]_offset_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1sh))]
+pub unsafe fn svldnt1sh_gather_u32base_offset_s32(
+    pg: svbool_t,
+    bases: svuint32_t,
+    offset: i64,
+) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4i16.nxv4i32"
+        )]
+        fn _svldnt1sh_gather_u32base_offset_s32(
+            pg: svbool4_t,
+            bases: svint32_t,
+            offset: i64,
+        ) -> nxv4i16;
+    }
+    crate::intrinsics::simd::simd_cast(_svldnt1sh_gather_u32base_offset_s32(
+        pg.sve_into(),
+        bases.as_signed(),
+        offset,
+    ))
+}
+#[doc = "Load 8-bit data and sign-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1sb_gather[_u32base]_offset_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1sb))]
+pub unsafe fn svldnt1sb_gather_u32base_offset_u32(
+    pg: svbool_t,
+    bases: svuint32_t,
+    offset: i64,
+) -> svuint32_t {
+    svldnt1sb_gather_u32base_offset_s32(pg, bases, offset).as_unsigned()
+}
+#[doc = "Load 16-bit data and sign-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1sh_gather[_u32base]_offset_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1sh))]
+pub unsafe fn svldnt1sh_gather_u32base_offset_u32(
+    pg: svbool_t,
+    bases: svuint32_t,
+    offset: i64,
+) -> svuint32_t {
+    svldnt1sh_gather_u32base_offset_s32(pg, bases, offset).as_unsigned()
+}
+#[doc = "Load 8-bit data and sign-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1sb_gather[_u64base]_offset_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1sb))]
+pub unsafe fn svldnt1sb_gather_u64base_offset_s64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    offset: i64,
+) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i8.nxv2i64"
+        )]
+        fn _svldnt1sb_gather_u64base_offset_s64(
+            pg: svbool2_t,
+            bases: svint64_t,
+            offset: i64,
+        ) -> nxv2i8;
+    }
+    crate::intrinsics::simd::simd_cast(_svldnt1sb_gather_u64base_offset_s64(
+        pg.sve_into(),
+        bases.as_signed(),
+        offset,
+    ))
+}
+#[doc = "Load 16-bit data and sign-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1sh_gather[_u64base]_offset_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1sh))]
+pub unsafe fn svldnt1sh_gather_u64base_offset_s64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    offset: i64,
+) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i16.nxv2i64"
+        )]
+        fn _svldnt1sh_gather_u64base_offset_s64(
+            pg: svbool2_t,
+            bases: svint64_t,
+            offset: i64,
+        ) -> nxv2i16;
+    }
+    crate::intrinsics::simd::simd_cast(_svldnt1sh_gather_u64base_offset_s64(
+        pg.sve_into(),
+        bases.as_signed(),
+        offset,
+    ))
+}
+#[doc = "Load 32-bit data and sign-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1sw_gather[_u64base]_offset_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1sw))]
+pub unsafe fn svldnt1sw_gather_u64base_offset_s64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    offset: i64,
+) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i32.nxv2i64"
+        )]
+        fn _svldnt1sw_gather_u64base_offset_s64(
+            pg: svbool2_t,
+            bases: svint64_t,
+            offset: i64,
+        ) -> nxv2i32;
+    }
+    crate::intrinsics::simd::simd_cast(_svldnt1sw_gather_u64base_offset_s64(
+        pg.sve_into(),
+        bases.as_signed(),
+        offset,
+    ))
+}
+#[doc = "Load 8-bit data and sign-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1sb_gather[_u64base]_offset_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1sb))]
+pub unsafe fn svldnt1sb_gather_u64base_offset_u64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    offset: i64,
+) -> svuint64_t {
+    svldnt1sb_gather_u64base_offset_s64(pg, bases, offset).as_unsigned()
+}
+#[doc = "Load 16-bit data and sign-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1sh_gather[_u64base]_offset_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1sh))]
+pub unsafe fn svldnt1sh_gather_u64base_offset_u64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    offset: i64,
+) -> svuint64_t {
+    svldnt1sh_gather_u64base_offset_s64(pg, bases, offset).as_unsigned()
+}
+#[doc = "Load 32-bit data and sign-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1sw_gather[_u64base]_offset_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1sw))]
+pub unsafe fn svldnt1sw_gather_u64base_offset_u64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    offset: i64,
+) -> svuint64_t {
+    svldnt1sw_gather_u64base_offset_s64(pg, bases, offset).as_unsigned()
+}
+#[doc = "Load 8-bit data and sign-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1sb_gather[_u32base]_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1sb))]
+pub unsafe fn svldnt1sb_gather_u32base_s32(pg: svbool_t, bases: svuint32_t) -> svint32_t {
+    svldnt1sb_gather_u32base_offset_s32(pg, bases, 0)
+}
+#[doc = "Load 16-bit data and sign-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1sh_gather[_u32base]_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1sh))]
+pub unsafe fn svldnt1sh_gather_u32base_s32(pg: svbool_t, bases: svuint32_t) -> svint32_t {
+    svldnt1sh_gather_u32base_offset_s32(pg, bases, 0)
+}
+#[doc = "Load 8-bit data and sign-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1sb_gather[_u32base]_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1sb))]
+pub unsafe fn svldnt1sb_gather_u32base_u32(pg: svbool_t, bases: svuint32_t) -> svuint32_t {
+    svldnt1sb_gather_u32base_offset_u32(pg, bases, 0)
+}
+#[doc = "Load 16-bit data and sign-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1sh_gather[_u32base]_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1sh))]
+pub unsafe fn svldnt1sh_gather_u32base_u32(pg: svbool_t, bases: svuint32_t) -> svuint32_t {
+    svldnt1sh_gather_u32base_offset_u32(pg, bases, 0)
+}
+#[doc = "Load 8-bit data and sign-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1sb_gather[_u64base]_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1sb))]
+pub unsafe fn svldnt1sb_gather_u64base_s64(pg: svbool_t, bases: svuint64_t) -> svint64_t {
+    svldnt1sb_gather_u64base_offset_s64(pg, bases, 0)
+}
+#[doc = "Load 16-bit data and sign-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1sh_gather[_u64base]_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1sh))]
+pub unsafe fn svldnt1sh_gather_u64base_s64(pg: svbool_t, bases: svuint64_t) -> svint64_t {
+    svldnt1sh_gather_u64base_offset_s64(pg, bases, 0)
+}
+#[doc = "Load 32-bit data and sign-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1sw_gather[_u64base]_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1sw))]
+pub unsafe fn svldnt1sw_gather_u64base_s64(pg: svbool_t, bases: svuint64_t) -> svint64_t {
+    svldnt1sw_gather_u64base_offset_s64(pg, bases, 0)
+}
+#[doc = "Load 8-bit data and sign-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1sb_gather[_u64base]_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1sb))]
+pub unsafe fn svldnt1sb_gather_u64base_u64(pg: svbool_t, bases: svuint64_t) -> svuint64_t {
+    svldnt1sb_gather_u64base_offset_u64(pg, bases, 0)
+}
+#[doc = "Load 16-bit data and sign-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1sh_gather[_u64base]_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1sh))]
+pub unsafe fn svldnt1sh_gather_u64base_u64(pg: svbool_t, bases: svuint64_t) -> svuint64_t {
+    svldnt1sh_gather_u64base_offset_u64(pg, bases, 0)
+}
+#[doc = "Load 32-bit data and sign-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1sw_gather[_u64base]_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1sw))]
+pub unsafe fn svldnt1sw_gather_u64base_u64(pg: svbool_t, bases: svuint64_t) -> svuint64_t {
+    svldnt1sw_gather_u64base_offset_u64(pg, bases, 0)
+}
+#[doc = "Load 16-bit data and sign-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1sh_gather_[s64]index_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1sh))]
+pub unsafe fn svldnt1sh_gather_s64index_s64(
+    pg: svbool_t,
+    base: *const i16,
+    indices: svint64_t,
+) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ldnt1.gather.index.nxv2i16"
+        )]
+        fn _svldnt1sh_gather_s64index_s64(
+            pg: svbool2_t,
+            base: *const i16,
+            indices: svint64_t,
+        ) -> nxv2i16;
+    }
+    crate::intrinsics::simd::simd_cast(_svldnt1sh_gather_s64index_s64(pg.sve_into(), base, indices))
+}
+#[doc = "Load 32-bit data and sign-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1sw_gather_[s64]index_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1sw))]
+pub unsafe fn svldnt1sw_gather_s64index_s64(
+    pg: svbool_t,
+    base: *const i32,
+    indices: svint64_t,
+) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ldnt1.gather.index.nxv2i32"
+        )]
+        fn _svldnt1sw_gather_s64index_s64(
+            pg: svbool2_t,
+            base: *const i32,
+            indices: svint64_t,
+        ) -> nxv2i32;
+    }
+    crate::intrinsics::simd::simd_cast(_svldnt1sw_gather_s64index_s64(pg.sve_into(), base, indices))
+}
+#[doc = "Load 16-bit data and sign-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1sh_gather_[s64]index_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1sh))]
+pub unsafe fn svldnt1sh_gather_s64index_u64(
+    pg: svbool_t,
+    base: *const i16,
+    indices: svint64_t,
+) -> svuint64_t {
+    svldnt1sh_gather_s64index_s64(pg, base, indices).as_unsigned()
+}
+#[doc = "Load 32-bit data and sign-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1sw_gather_[s64]index_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1sw))]
+pub unsafe fn svldnt1sw_gather_s64index_u64(
+    pg: svbool_t,
+    base: *const i32,
+    indices: svint64_t,
+) -> svuint64_t {
+    svldnt1sw_gather_s64index_s64(pg, base, indices).as_unsigned()
+}
+#[doc = "Load 16-bit data and sign-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1sh_gather_[u64]index_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1sh))]
+pub unsafe fn svldnt1sh_gather_u64index_s64(
+    pg: svbool_t,
+    base: *const i16,
+    indices: svuint64_t,
+) -> svint64_t {
+    svldnt1sh_gather_s64index_s64(pg, base, indices.as_signed())
+}
+#[doc = "Load 32-bit data and sign-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1sw_gather_[u64]index_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1sw))]
+pub unsafe fn svldnt1sw_gather_u64index_s64(
+    pg: svbool_t,
+    base: *const i32,
+    indices: svuint64_t,
+) -> svint64_t {
+    svldnt1sw_gather_s64index_s64(pg, base, indices.as_signed())
+}
+#[doc = "Load 16-bit data and sign-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1sh_gather_[u64]index_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1sh))]
+pub unsafe fn svldnt1sh_gather_u64index_u64(
+    pg: svbool_t,
+    base: *const i16,
+    indices: svuint64_t,
+) -> svuint64_t {
+    svldnt1sh_gather_s64index_s64(pg, base, indices.as_signed()).as_unsigned()
+}
+#[doc = "Load 32-bit data and sign-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1sw_gather_[u64]index_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1sw))]
+pub unsafe fn svldnt1sw_gather_u64index_u64(
+    pg: svbool_t,
+    base: *const i32,
+    indices: svuint64_t,
+) -> svuint64_t {
+    svldnt1sw_gather_s64index_s64(pg, base, indices.as_signed()).as_unsigned()
+}
+#[doc = "Load 16-bit data and sign-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1sh_gather[_u32base]_index_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1sh))]
+pub unsafe fn svldnt1sh_gather_u32base_index_s32(
+    pg: svbool_t,
+    bases: svuint32_t,
+    index: i64,
+) -> svint32_t {
+    svldnt1sh_gather_u32base_offset_s32(pg, bases, index.unchecked_shl(1))
+}
+#[doc = "Load 16-bit data and sign-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1sh_gather[_u32base]_index_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1sh))]
+pub unsafe fn svldnt1sh_gather_u32base_index_u32(
+    pg: svbool_t,
+    bases: svuint32_t,
+    index: i64,
+) -> svuint32_t {
+    svldnt1sh_gather_u32base_offset_u32(pg, bases, index.unchecked_shl(1))
+}
+#[doc = "Load 16-bit data and sign-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1sh_gather[_u64base]_index_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1sh))]
+pub unsafe fn svldnt1sh_gather_u64base_index_s64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    index: i64,
+) -> svint64_t {
+    svldnt1sh_gather_u64base_offset_s64(pg, bases, index.unchecked_shl(1))
+}
+#[doc = "Load 32-bit data and sign-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1sw_gather[_u64base]_index_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1sw))]
+pub unsafe fn svldnt1sw_gather_u64base_index_s64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    index: i64,
+) -> svint64_t {
+    svldnt1sw_gather_u64base_offset_s64(pg, bases, index.unchecked_shl(2))
+}
+#[doc = "Load 16-bit data and sign-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1sh_gather[_u64base]_index_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1sh))]
+pub unsafe fn svldnt1sh_gather_u64base_index_u64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    index: i64,
+) -> svuint64_t {
+    svldnt1sh_gather_u64base_offset_u64(pg, bases, index.unchecked_shl(1))
+}
+#[doc = "Load 32-bit data and sign-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1sw_gather[_u64base]_index_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1sw))]
+pub unsafe fn svldnt1sw_gather_u64base_index_u64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    index: i64,
+) -> svuint64_t {
+    svldnt1sw_gather_u64base_offset_u64(pg, bases, index.unchecked_shl(2))
+}
+#[doc = "Load 8-bit data and zero-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1ub_gather_[s64]offset_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1b))]
+pub unsafe fn svldnt1ub_gather_s64offset_s64(
+    pg: svbool_t,
+    base: *const u8,
+    offsets: svint64_t,
+) -> svint64_t {
+    svldnt1ub_gather_s64offset_u64(pg, base, offsets).as_signed()
+}
+#[doc = "Load 16-bit data and zero-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1uh_gather_[s64]offset_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1h))]
+pub unsafe fn svldnt1uh_gather_s64offset_s64(
+    pg: svbool_t,
+    base: *const u16,
+    offsets: svint64_t,
+) -> svint64_t {
+    svldnt1uh_gather_s64offset_u64(pg, base, offsets).as_signed()
+}
+#[doc = "Load 32-bit data and zero-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1uw_gather_[s64]offset_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1w))]
+pub unsafe fn svldnt1uw_gather_s64offset_s64(
+    pg: svbool_t,
+    base: *const u32,
+    offsets: svint64_t,
+) -> svint64_t {
+    svldnt1uw_gather_s64offset_u64(pg, base, offsets).as_signed()
+}
+#[doc = "Load 8-bit data and zero-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1ub_gather_[s64]offset_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1b))]
+pub unsafe fn svldnt1ub_gather_s64offset_u64(
+    pg: svbool_t,
+    base: *const u8,
+    offsets: svint64_t,
+) -> svuint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ldnt1.gather.nxv2i8"
+        )]
+        fn _svldnt1ub_gather_s64offset_u64(
+            pg: svbool2_t,
+            base: *const i8,
+            offsets: svint64_t,
+        ) -> nxv2i8;
+    }
+    crate::intrinsics::simd::simd_cast::<nxv2u8, _>(
+        _svldnt1ub_gather_s64offset_u64(pg.sve_into(), base.as_signed(), offsets).as_unsigned(),
+    )
+}
+#[doc = "Load 16-bit data and zero-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1uh_gather_[s64]offset_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1h))]
+pub unsafe fn svldnt1uh_gather_s64offset_u64(
+    pg: svbool_t,
+    base: *const u16,
+    offsets: svint64_t,
+) -> svuint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ldnt1.gather.nxv2i16"
+        )]
+        fn _svldnt1uh_gather_s64offset_u64(
+            pg: svbool2_t,
+            base: *const i16,
+            offsets: svint64_t,
+        ) -> nxv2i16;
+    }
+    crate::intrinsics::simd::simd_cast::<nxv2u16, _>(
+        _svldnt1uh_gather_s64offset_u64(pg.sve_into(), base.as_signed(), offsets).as_unsigned(),
+    )
+}
+#[doc = "Load 32-bit data and zero-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1uw_gather_[s64]offset_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1w))]
+pub unsafe fn svldnt1uw_gather_s64offset_u64(
+    pg: svbool_t,
+    base: *const u32,
+    offsets: svint64_t,
+) -> svuint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ldnt1.gather.nxv2i32"
+        )]
+        fn _svldnt1uw_gather_s64offset_u64(
+            pg: svbool2_t,
+            base: *const i32,
+            offsets: svint64_t,
+        ) -> nxv2i32;
+    }
+    crate::intrinsics::simd::simd_cast::<nxv2u32, _>(
+        _svldnt1uw_gather_s64offset_u64(pg.sve_into(), base.as_signed(), offsets).as_unsigned(),
+    )
+}
+#[doc = "Load 8-bit data and zero-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1ub_gather_[u32]offset_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1b))]
+pub unsafe fn svldnt1ub_gather_u32offset_s32(
+    pg: svbool_t,
+    base: *const u8,
+    offsets: svuint32_t,
+) -> svint32_t {
+    svldnt1ub_gather_u32offset_u32(pg, base, offsets).as_signed()
+}
+#[doc = "Load 16-bit data and zero-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1uh_gather_[u32]offset_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1h))]
+pub unsafe fn svldnt1uh_gather_u32offset_s32(
+    pg: svbool_t,
+    base: *const u16,
+    offsets: svuint32_t,
+) -> svint32_t {
+    svldnt1uh_gather_u32offset_u32(pg, base, offsets).as_signed()
+}
+#[doc = "Load 8-bit data and zero-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1ub_gather_[u32]offset_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1b))]
+pub unsafe fn svldnt1ub_gather_u32offset_u32(
+    pg: svbool_t,
+    base: *const u8,
+    offsets: svuint32_t,
+) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4i8"
+        )]
+        fn _svldnt1ub_gather_u32offset_u32(
+            pg: svbool4_t,
+            base: *const i8,
+            offsets: svint32_t,
+        ) -> nxv4i8;
+    }
+    crate::intrinsics::simd::simd_cast::<nxv4u8, _>(
+        _svldnt1ub_gather_u32offset_u32(pg.sve_into(), base.as_signed(), offsets.as_signed())
+            .as_unsigned(),
+    )
+}
+#[doc = "Load 16-bit data and zero-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1uh_gather_[u32]offset_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1h))]
+pub unsafe fn svldnt1uh_gather_u32offset_u32(
+    pg: svbool_t,
+    base: *const u16,
+    offsets: svuint32_t,
+) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4i16"
+        )]
+        fn _svldnt1uh_gather_u32offset_u32(
+            pg: svbool4_t,
+            base: *const i16,
+            offsets: svint32_t,
+        ) -> nxv4i16;
+    }
+    crate::intrinsics::simd::simd_cast::<nxv4u16, _>(
+        _svldnt1uh_gather_u32offset_u32(pg.sve_into(), base.as_signed(), offsets.as_signed())
+            .as_unsigned(),
+    )
+}
+#[doc = "Load 8-bit data and zero-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1ub_gather_[u64]offset_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1b))]
+pub unsafe fn svldnt1ub_gather_u64offset_s64(
+    pg: svbool_t,
+    base: *const u8,
+    offsets: svuint64_t,
+) -> svint64_t {
+    svldnt1ub_gather_s64offset_u64(pg, base, offsets.as_signed()).as_signed()
+}
+#[doc = "Load 16-bit data and zero-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1uh_gather_[u64]offset_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1h))]
+pub unsafe fn svldnt1uh_gather_u64offset_s64(
+    pg: svbool_t,
+    base: *const u16,
+    offsets: svuint64_t,
+) -> svint64_t {
+    svldnt1uh_gather_s64offset_u64(pg, base, offsets.as_signed()).as_signed()
+}
+#[doc = "Load 32-bit data and zero-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1uw_gather_[u64]offset_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1w))]
+pub unsafe fn svldnt1uw_gather_u64offset_s64(
+    pg: svbool_t,
+    base: *const u32,
+    offsets: svuint64_t,
+) -> svint64_t {
+    svldnt1uw_gather_s64offset_u64(pg, base, offsets.as_signed()).as_signed()
+}
+#[doc = "Load 8-bit data and zero-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1ub_gather_[u64]offset_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1b))]
+pub unsafe fn svldnt1ub_gather_u64offset_u64(
+    pg: svbool_t,
+    base: *const u8,
+    offsets: svuint64_t,
+) -> svuint64_t {
+    svldnt1ub_gather_s64offset_u64(pg, base, offsets.as_signed())
+}
+#[doc = "Load 16-bit data and zero-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1uh_gather_[u64]offset_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1h))]
+pub unsafe fn svldnt1uh_gather_u64offset_u64(
+    pg: svbool_t,
+    base: *const u16,
+    offsets: svuint64_t,
+) -> svuint64_t {
+    svldnt1uh_gather_s64offset_u64(pg, base, offsets.as_signed())
+}
+#[doc = "Load 32-bit data and zero-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1uw_gather_[u64]offset_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1w))]
+pub unsafe fn svldnt1uw_gather_u64offset_u64(
+    pg: svbool_t,
+    base: *const u32,
+    offsets: svuint64_t,
+) -> svuint64_t {
+    svldnt1uw_gather_s64offset_u64(pg, base, offsets.as_signed())
+}
+#[doc = "Load 8-bit data and zero-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1ub_gather[_u32base]_offset_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1b))]
+pub unsafe fn svldnt1ub_gather_u32base_offset_s32(
+    pg: svbool_t,
+    bases: svuint32_t,
+    offset: i64,
+) -> svint32_t {
+    svldnt1ub_gather_u32base_offset_u32(pg, bases, offset).as_signed()
+}
+#[doc = "Load 16-bit data and zero-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1uh_gather[_u32base]_offset_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1h))]
+pub unsafe fn svldnt1uh_gather_u32base_offset_s32(
+    pg: svbool_t,
+    bases: svuint32_t,
+    offset: i64,
+) -> svint32_t {
+    svldnt1uh_gather_u32base_offset_u32(pg, bases, offset).as_signed()
+}
+#[doc = "Load 8-bit data and zero-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1ub_gather[_u32base]_offset_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1b))]
+pub unsafe fn svldnt1ub_gather_u32base_offset_u32(
+    pg: svbool_t,
+    bases: svuint32_t,
+    offset: i64,
+) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4i8.nxv4i32"
+        )]
+        fn _svldnt1ub_gather_u32base_offset_u32(
+            pg: svbool4_t,
+            bases: svint32_t,
+            offset: i64,
+        ) -> nxv4i8;
+    }
+    crate::intrinsics::simd::simd_cast::<nxv4u8, _>(
+        _svldnt1ub_gather_u32base_offset_u32(pg.sve_into(), bases.as_signed(), offset)
+            .as_unsigned(),
+    )
+}
+#[doc = "Load 16-bit data and zero-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1uh_gather[_u32base]_offset_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1h))]
+pub unsafe fn svldnt1uh_gather_u32base_offset_u32(
+    pg: svbool_t,
+    bases: svuint32_t,
+    offset: i64,
+) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv4i16.nxv4i32"
+        )]
+        fn _svldnt1uh_gather_u32base_offset_u32(
+            pg: svbool4_t,
+            bases: svint32_t,
+            offset: i64,
+        ) -> nxv4i16;
+    }
+    crate::intrinsics::simd::simd_cast::<nxv4u16, _>(
+        _svldnt1uh_gather_u32base_offset_u32(pg.sve_into(), bases.as_signed(), offset)
+            .as_unsigned(),
+    )
+}
+#[doc = "Load 8-bit data and zero-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1ub_gather[_u64base]_offset_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1b))]
+pub unsafe fn svldnt1ub_gather_u64base_offset_s64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    offset: i64,
+) -> svint64_t {
+    svldnt1ub_gather_u64base_offset_u64(pg, bases, offset).as_signed()
+}
+#[doc = "Load 16-bit data and zero-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1uh_gather[_u64base]_offset_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1h))]
+pub unsafe fn svldnt1uh_gather_u64base_offset_s64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    offset: i64,
+) -> svint64_t {
+    svldnt1uh_gather_u64base_offset_u64(pg, bases, offset).as_signed()
+}
+#[doc = "Load 32-bit data and zero-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1uw_gather[_u64base]_offset_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1w))]
+pub unsafe fn svldnt1uw_gather_u64base_offset_s64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    offset: i64,
+) -> svint64_t {
+    svldnt1uw_gather_u64base_offset_u64(pg, bases, offset).as_signed()
+}
+#[doc = "Load 8-bit data and zero-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1ub_gather[_u64base]_offset_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1b))]
+pub unsafe fn svldnt1ub_gather_u64base_offset_u64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    offset: i64,
+) -> svuint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i8.nxv2i64"
+        )]
+        fn _svldnt1ub_gather_u64base_offset_u64(
+            pg: svbool2_t,
+            bases: svint64_t,
+            offset: i64,
+        ) -> nxv2i8;
+    }
+    crate::intrinsics::simd::simd_cast::<nxv2u8, _>(
+        _svldnt1ub_gather_u64base_offset_u64(pg.sve_into(), bases.as_signed(), offset)
+            .as_unsigned(),
+    )
+}
+#[doc = "Load 16-bit data and zero-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1uh_gather[_u64base]_offset_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1h))]
+pub unsafe fn svldnt1uh_gather_u64base_offset_u64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    offset: i64,
+) -> svuint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i16.nxv2i64"
+        )]
+        fn _svldnt1uh_gather_u64base_offset_u64(
+            pg: svbool2_t,
+            bases: svint64_t,
+            offset: i64,
+        ) -> nxv2i16;
+    }
+    crate::intrinsics::simd::simd_cast::<nxv2u16, _>(
+        _svldnt1uh_gather_u64base_offset_u64(pg.sve_into(), bases.as_signed(), offset)
+            .as_unsigned(),
+    )
+}
+#[doc = "Load 32-bit data and zero-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1uw_gather[_u64base]_offset_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1w))]
+pub unsafe fn svldnt1uw_gather_u64base_offset_u64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    offset: i64,
+) -> svuint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ldnt1.gather.scalar.offset.nxv2i32.nxv2i64"
+        )]
+        fn _svldnt1uw_gather_u64base_offset_u64(
+            pg: svbool2_t,
+            bases: svint64_t,
+            offset: i64,
+        ) -> nxv2i32;
+    }
+    crate::intrinsics::simd::simd_cast::<nxv2u32, _>(
+        _svldnt1uw_gather_u64base_offset_u64(pg.sve_into(), bases.as_signed(), offset)
+            .as_unsigned(),
+    )
+}
+#[doc = "Load 8-bit data and zero-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1ub_gather[_u32base]_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1b))]
+pub unsafe fn svldnt1ub_gather_u32base_s32(pg: svbool_t, bases: svuint32_t) -> svint32_t {
+    svldnt1ub_gather_u32base_offset_s32(pg, bases, 0)
+}
+#[doc = "Load 16-bit data and zero-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1uh_gather[_u32base]_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1h))]
+pub unsafe fn svldnt1uh_gather_u32base_s32(pg: svbool_t, bases: svuint32_t) -> svint32_t {
+    svldnt1uh_gather_u32base_offset_s32(pg, bases, 0)
+}
+#[doc = "Load 8-bit data and zero-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1ub_gather[_u32base]_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1b))]
+pub unsafe fn svldnt1ub_gather_u32base_u32(pg: svbool_t, bases: svuint32_t) -> svuint32_t {
+    svldnt1ub_gather_u32base_offset_u32(pg, bases, 0)
+}
+#[doc = "Load 16-bit data and zero-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1uh_gather[_u32base]_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1h))]
+pub unsafe fn svldnt1uh_gather_u32base_u32(pg: svbool_t, bases: svuint32_t) -> svuint32_t {
+    svldnt1uh_gather_u32base_offset_u32(pg, bases, 0)
+}
+#[doc = "Load 8-bit data and zero-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1ub_gather[_u64base]_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1b))]
+pub unsafe fn svldnt1ub_gather_u64base_s64(pg: svbool_t, bases: svuint64_t) -> svint64_t {
+    svldnt1ub_gather_u64base_offset_s64(pg, bases, 0)
+}
+#[doc = "Load 16-bit data and zero-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1uh_gather[_u64base]_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1h))]
+pub unsafe fn svldnt1uh_gather_u64base_s64(pg: svbool_t, bases: svuint64_t) -> svint64_t {
+    svldnt1uh_gather_u64base_offset_s64(pg, bases, 0)
+}
+#[doc = "Load 32-bit data and zero-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1uw_gather[_u64base]_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1w))]
+pub unsafe fn svldnt1uw_gather_u64base_s64(pg: svbool_t, bases: svuint64_t) -> svint64_t {
+    svldnt1uw_gather_u64base_offset_s64(pg, bases, 0)
+}
+#[doc = "Load 8-bit data and zero-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1ub_gather[_u64base]_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1b))]
+pub unsafe fn svldnt1ub_gather_u64base_u64(pg: svbool_t, bases: svuint64_t) -> svuint64_t {
+    svldnt1ub_gather_u64base_offset_u64(pg, bases, 0)
+}
+#[doc = "Load 16-bit data and zero-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1uh_gather[_u64base]_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1h))]
+pub unsafe fn svldnt1uh_gather_u64base_u64(pg: svbool_t, bases: svuint64_t) -> svuint64_t {
+    svldnt1uh_gather_u64base_offset_u64(pg, bases, 0)
+}
+#[doc = "Load 32-bit data and zero-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1uw_gather[_u64base]_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1w))]
+pub unsafe fn svldnt1uw_gather_u64base_u64(pg: svbool_t, bases: svuint64_t) -> svuint64_t {
+    svldnt1uw_gather_u64base_offset_u64(pg, bases, 0)
+}
+#[doc = "Load 16-bit data and zero-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1uh_gather_[s64]index_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1h))]
+pub unsafe fn svldnt1uh_gather_s64index_s64(
+    pg: svbool_t,
+    base: *const u16,
+    indices: svint64_t,
+) -> svint64_t {
+    svldnt1uh_gather_s64index_u64(pg, base, indices).as_signed()
+}
+#[doc = "Load 32-bit data and zero-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1uw_gather_[s64]index_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1w))]
+pub unsafe fn svldnt1uw_gather_s64index_s64(
+    pg: svbool_t,
+    base: *const u32,
+    indices: svint64_t,
+) -> svint64_t {
+    svldnt1uw_gather_s64index_u64(pg, base, indices).as_signed()
+}
+#[doc = "Load 16-bit data and zero-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1uh_gather_[s64]index_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1h))]
+pub unsafe fn svldnt1uh_gather_s64index_u64(
+    pg: svbool_t,
+    base: *const u16,
+    indices: svint64_t,
+) -> svuint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ldnt1.gather.index.nxv2i16"
+        )]
+        fn _svldnt1uh_gather_s64index_u64(
+            pg: svbool2_t,
+            base: *const i16,
+            indices: svint64_t,
+        ) -> nxv2i16;
+    }
+    crate::intrinsics::simd::simd_cast::<nxv2u16, _>(
+        _svldnt1uh_gather_s64index_u64(pg.sve_into(), base.as_signed(), indices).as_unsigned(),
+    )
+}
+#[doc = "Load 32-bit data and zero-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1uw_gather_[s64]index_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1w))]
+pub unsafe fn svldnt1uw_gather_s64index_u64(
+    pg: svbool_t,
+    base: *const u32,
+    indices: svint64_t,
+) -> svuint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ldnt1.gather.index.nxv2i32"
+        )]
+        fn _svldnt1uw_gather_s64index_u64(
+            pg: svbool2_t,
+            base: *const i32,
+            indices: svint64_t,
+        ) -> nxv2i32;
+    }
+    crate::intrinsics::simd::simd_cast::<nxv2u32, _>(
+        _svldnt1uw_gather_s64index_u64(pg.sve_into(), base.as_signed(), indices).as_unsigned(),
+    )
+}
+#[doc = "Load 16-bit data and zero-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1uh_gather_[u64]index_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1h))]
+pub unsafe fn svldnt1uh_gather_u64index_s64(
+    pg: svbool_t,
+    base: *const u16,
+    indices: svuint64_t,
+) -> svint64_t {
+    svldnt1uh_gather_s64index_u64(pg, base, indices.as_signed()).as_signed()
+}
+#[doc = "Load 32-bit data and zero-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1uw_gather_[u64]index_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1w))]
+pub unsafe fn svldnt1uw_gather_u64index_s64(
+    pg: svbool_t,
+    base: *const u32,
+    indices: svuint64_t,
+) -> svint64_t {
+    svldnt1uw_gather_s64index_u64(pg, base, indices.as_signed()).as_signed()
+}
+#[doc = "Load 16-bit data and zero-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1uh_gather_[u64]index_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1h))]
+pub unsafe fn svldnt1uh_gather_u64index_u64(
+    pg: svbool_t,
+    base: *const u16,
+    indices: svuint64_t,
+) -> svuint64_t {
+    svldnt1uh_gather_s64index_u64(pg, base, indices.as_signed())
+}
+#[doc = "Load 32-bit data and zero-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1uw_gather_[u64]index_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1w))]
+pub unsafe fn svldnt1uw_gather_u64index_u64(
+    pg: svbool_t,
+    base: *const u32,
+    indices: svuint64_t,
+) -> svuint64_t {
+    svldnt1uw_gather_s64index_u64(pg, base, indices.as_signed())
+}
+#[doc = "Load 16-bit data and zero-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1uh_gather[_u32base]_index_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1h))]
+pub unsafe fn svldnt1uh_gather_u32base_index_s32(
+    pg: svbool_t,
+    bases: svuint32_t,
+    index: i64,
+) -> svint32_t {
+    svldnt1uh_gather_u32base_offset_s32(pg, bases, index.unchecked_shl(1))
+}
+#[doc = "Load 16-bit data and zero-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1uh_gather[_u32base]_index_u32)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1h))]
+pub unsafe fn svldnt1uh_gather_u32base_index_u32(
+    pg: svbool_t,
+    bases: svuint32_t,
+    index: i64,
+) -> svuint32_t {
+    svldnt1uh_gather_u32base_offset_u32(pg, bases, index.unchecked_shl(1))
+}
+#[doc = "Load 16-bit data and zero-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1uh_gather[_u64base]_index_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1h))]
+pub unsafe fn svldnt1uh_gather_u64base_index_s64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    index: i64,
+) -> svint64_t {
+    svldnt1uh_gather_u64base_offset_s64(pg, bases, index.unchecked_shl(1))
+}
+#[doc = "Load 32-bit data and zero-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1uw_gather[_u64base]_index_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1w))]
+pub unsafe fn svldnt1uw_gather_u64base_index_s64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    index: i64,
+) -> svint64_t {
+    svldnt1uw_gather_u64base_offset_s64(pg, bases, index.unchecked_shl(2))
+}
+#[doc = "Load 16-bit data and zero-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1uh_gather[_u64base]_index_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1h))]
+pub unsafe fn svldnt1uh_gather_u64base_index_u64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    index: i64,
+) -> svuint64_t {
+    svldnt1uh_gather_u64base_offset_u64(pg, bases, index.unchecked_shl(1))
+}
+#[doc = "Load 32-bit data and zero-extend, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svldnt1uw_gather[_u64base]_index_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ldnt1w))]
+pub unsafe fn svldnt1uw_gather_u64base_index_u64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    index: i64,
+) -> svuint64_t {
+    svldnt1uw_gather_u64base_offset_u64(pg, bases, index.unchecked_shl(2))
+}
+#[doc = "Base 2 logarithm as integer"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlogb[_f32]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(flogb))]
+pub fn svlogb_f32_m(inactive: svint32_t, pg: svbool_t, op: svfloat32_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.flogb.nxv4f32")]
+        fn _svlogb_f32_m(inactive: svint32_t, pg: svbool4_t, op: svfloat32_t) -> svint32_t;
+    }
+    unsafe { _svlogb_f32_m(inactive, pg.sve_into(), op) }
+}
+#[doc = "Base 2 logarithm as integer"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlogb[_f32]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(flogb))]
+pub fn svlogb_f32_x(pg: svbool_t, op: svfloat32_t) -> svint32_t {
+    unsafe { svlogb_f32_m(transmute_unchecked(op), pg, op) }
+}
+#[doc = "Base 2 logarithm as integer"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlogb[_f32]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(flogb))]
+pub fn svlogb_f32_z(pg: svbool_t, op: svfloat32_t) -> svint32_t {
+    svlogb_f32_m(svdup_n_s32(0), pg, op)
+}
+#[doc = "Base 2 logarithm as integer"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlogb[_f64]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(flogb))]
+pub fn svlogb_f64_m(inactive: svint64_t, pg: svbool_t, op: svfloat64_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.flogb.nxv2f64")]
+        fn _svlogb_f64_m(inactive: svint64_t, pg: svbool2_t, op: svfloat64_t) -> svint64_t;
+    }
+    unsafe { _svlogb_f64_m(inactive, pg.sve_into(), op) }
+}
+#[doc = "Base 2 logarithm as integer"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlogb[_f64]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(flogb))]
+pub fn svlogb_f64_x(pg: svbool_t, op: svfloat64_t) -> svint64_t {
+    unsafe { svlogb_f64_m(transmute_unchecked(op), pg, op) }
+}
+#[doc = "Base 2 logarithm as integer"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svlogb[_f64]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(flogb))]
+pub fn svlogb_f64_z(pg: svbool_t, op: svfloat64_t) -> svint64_t {
+    svlogb_f64_m(svdup_n_s64(0), pg, op)
+}
+#[doc = "Detect any matching elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmatch[_s8])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(match))]
+pub fn svmatch_s8(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.match.nxv16i8")]
+        fn _svmatch_s8(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svbool_t;
+    }
+    unsafe { _svmatch_s8(pg, op1, op2) }
+}
+#[doc = "Detect any matching elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmatch[_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(match))]
+pub fn svmatch_s16(pg: svbool_t, op1: svint16_t, op2: svint16_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.match.nxv8i16")]
+        fn _svmatch_s16(pg: svbool8_t, op1: svint16_t, op2: svint16_t) -> svbool8_t;
+    }
+    unsafe { _svmatch_s16(pg.sve_into(), op1, op2).sve_into() }
+}
+#[doc = "Detect any matching elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmatch[_u8])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(match))]
+pub fn svmatch_u8(pg: svbool_t, op1: svuint8_t, op2: svuint8_t) -> svbool_t {
+    unsafe { svmatch_s8(pg, op1.as_signed(), op2.as_signed()) }
+}
+#[doc = "Detect any matching elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmatch[_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(match))]
+pub fn svmatch_u16(pg: svbool_t, op1: svuint16_t, op2: svuint16_t) -> svbool_t {
+    unsafe { svmatch_s16(pg, op1.as_signed(), op2.as_signed()) }
+}
+#[doc = "Maximum number pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmaxnmp[_f32]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmaxnmp))]
+pub fn svmaxnmp_f32_m(pg: svbool_t, op1: svfloat32_t, op2: svfloat32_t) -> svfloat32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.fmaxnmp.nxv4f32"
+        )]
+        fn _svmaxnmp_f32_m(pg: svbool4_t, op1: svfloat32_t, op2: svfloat32_t) -> svfloat32_t;
+    }
+    unsafe { _svmaxnmp_f32_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Maximum number pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmaxnmp[_f32]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmaxnmp))]
+pub fn svmaxnmp_f32_x(pg: svbool_t, op1: svfloat32_t, op2: svfloat32_t) -> svfloat32_t {
+    svmaxnmp_f32_m(pg, op1, op2)
+}
+#[doc = "Maximum number pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmaxnmp[_f64]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmaxnmp))]
+pub fn svmaxnmp_f64_m(pg: svbool_t, op1: svfloat64_t, op2: svfloat64_t) -> svfloat64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.fmaxnmp.nxv2f64"
+        )]
+        fn _svmaxnmp_f64_m(pg: svbool2_t, op1: svfloat64_t, op2: svfloat64_t) -> svfloat64_t;
+    }
+    unsafe { _svmaxnmp_f64_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Maximum number pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmaxnmp[_f64]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmaxnmp))]
+pub fn svmaxnmp_f64_x(pg: svbool_t, op1: svfloat64_t, op2: svfloat64_t) -> svfloat64_t {
+    svmaxnmp_f64_m(pg, op1, op2)
+}
+#[doc = "Maximum pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmaxp[_f32]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmaxp))]
+pub fn svmaxp_f32_m(pg: svbool_t, op1: svfloat32_t, op2: svfloat32_t) -> svfloat32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.fmaxp.nxv4f32")]
+        fn _svmaxp_f32_m(pg: svbool4_t, op1: svfloat32_t, op2: svfloat32_t) -> svfloat32_t;
+    }
+    unsafe { _svmaxp_f32_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Maximum pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmaxp[_f32]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmaxp))]
+pub fn svmaxp_f32_x(pg: svbool_t, op1: svfloat32_t, op2: svfloat32_t) -> svfloat32_t {
+    svmaxp_f32_m(pg, op1, op2)
+}
+#[doc = "Maximum pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmaxp[_f64]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmaxp))]
+pub fn svmaxp_f64_m(pg: svbool_t, op1: svfloat64_t, op2: svfloat64_t) -> svfloat64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.fmaxp.nxv2f64")]
+        fn _svmaxp_f64_m(pg: svbool2_t, op1: svfloat64_t, op2: svfloat64_t) -> svfloat64_t;
+    }
+    unsafe { _svmaxp_f64_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Maximum pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmaxp[_f64]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmaxp))]
+pub fn svmaxp_f64_x(pg: svbool_t, op1: svfloat64_t, op2: svfloat64_t) -> svfloat64_t {
+    svmaxp_f64_m(pg, op1, op2)
+}
+#[doc = "Maximum pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmaxp[_s8]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smaxp))]
+pub fn svmaxp_s8_m(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.smaxp.nxv16i8")]
+        fn _svmaxp_s8_m(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t;
+    }
+    unsafe { _svmaxp_s8_m(pg, op1, op2) }
+}
+#[doc = "Maximum pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmaxp[_s8]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smaxp))]
+pub fn svmaxp_s8_x(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t {
+    svmaxp_s8_m(pg, op1, op2)
+}
+#[doc = "Maximum pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmaxp[_s16]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smaxp))]
+pub fn svmaxp_s16_m(pg: svbool_t, op1: svint16_t, op2: svint16_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.smaxp.nxv8i16")]
+        fn _svmaxp_s16_m(pg: svbool8_t, op1: svint16_t, op2: svint16_t) -> svint16_t;
+    }
+    unsafe { _svmaxp_s16_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Maximum pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmaxp[_s16]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smaxp))]
+pub fn svmaxp_s16_x(pg: svbool_t, op1: svint16_t, op2: svint16_t) -> svint16_t {
+    svmaxp_s16_m(pg, op1, op2)
+}
+#[doc = "Maximum pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmaxp[_s32]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smaxp))]
+pub fn svmaxp_s32_m(pg: svbool_t, op1: svint32_t, op2: svint32_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.smaxp.nxv4i32")]
+        fn _svmaxp_s32_m(pg: svbool4_t, op1: svint32_t, op2: svint32_t) -> svint32_t;
+    }
+    unsafe { _svmaxp_s32_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Maximum pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmaxp[_s32]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smaxp))]
+pub fn svmaxp_s32_x(pg: svbool_t, op1: svint32_t, op2: svint32_t) -> svint32_t {
+    svmaxp_s32_m(pg, op1, op2)
+}
+#[doc = "Maximum pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmaxp[_s64]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smaxp))]
+pub fn svmaxp_s64_m(pg: svbool_t, op1: svint64_t, op2: svint64_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.smaxp.nxv2i64")]
+        fn _svmaxp_s64_m(pg: svbool2_t, op1: svint64_t, op2: svint64_t) -> svint64_t;
+    }
+    unsafe { _svmaxp_s64_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Maximum pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmaxp[_s64]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smaxp))]
+pub fn svmaxp_s64_x(pg: svbool_t, op1: svint64_t, op2: svint64_t) -> svint64_t {
+    svmaxp_s64_m(pg, op1, op2)
+}
+#[doc = "Maximum pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmaxp[_u8]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umaxp))]
+pub fn svmaxp_u8_m(pg: svbool_t, op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.umaxp.nxv16i8")]
+        fn _svmaxp_u8_m(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t;
+    }
+    unsafe { _svmaxp_u8_m(pg, op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Maximum pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmaxp[_u8]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umaxp))]
+pub fn svmaxp_u8_x(pg: svbool_t, op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    svmaxp_u8_m(pg, op1, op2)
+}
+#[doc = "Maximum pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmaxp[_u16]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umaxp))]
+pub fn svmaxp_u16_m(pg: svbool_t, op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.umaxp.nxv8i16")]
+        fn _svmaxp_u16_m(pg: svbool8_t, op1: svint16_t, op2: svint16_t) -> svint16_t;
+    }
+    unsafe { _svmaxp_u16_m(pg.sve_into(), op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Maximum pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmaxp[_u16]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umaxp))]
+pub fn svmaxp_u16_x(pg: svbool_t, op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    svmaxp_u16_m(pg, op1, op2)
+}
+#[doc = "Maximum pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmaxp[_u32]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umaxp))]
+pub fn svmaxp_u32_m(pg: svbool_t, op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.umaxp.nxv4i32")]
+        fn _svmaxp_u32_m(pg: svbool4_t, op1: svint32_t, op2: svint32_t) -> svint32_t;
+    }
+    unsafe { _svmaxp_u32_m(pg.sve_into(), op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Maximum pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmaxp[_u32]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umaxp))]
+pub fn svmaxp_u32_x(pg: svbool_t, op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    svmaxp_u32_m(pg, op1, op2)
+}
+#[doc = "Maximum pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmaxp[_u64]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umaxp))]
+pub fn svmaxp_u64_m(pg: svbool_t, op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.umaxp.nxv2i64")]
+        fn _svmaxp_u64_m(pg: svbool2_t, op1: svint64_t, op2: svint64_t) -> svint64_t;
+    }
+    unsafe { _svmaxp_u64_m(pg.sve_into(), op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Maximum pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmaxp[_u64]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umaxp))]
+pub fn svmaxp_u64_x(pg: svbool_t, op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    svmaxp_u64_m(pg, op1, op2)
+}
+#[doc = "Minimum number pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svminnmp[_f32]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fminnmp))]
+pub fn svminnmp_f32_m(pg: svbool_t, op1: svfloat32_t, op2: svfloat32_t) -> svfloat32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.fminnmp.nxv4f32"
+        )]
+        fn _svminnmp_f32_m(pg: svbool4_t, op1: svfloat32_t, op2: svfloat32_t) -> svfloat32_t;
+    }
+    unsafe { _svminnmp_f32_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Minimum number pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svminnmp[_f32]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fminnmp))]
+pub fn svminnmp_f32_x(pg: svbool_t, op1: svfloat32_t, op2: svfloat32_t) -> svfloat32_t {
+    svminnmp_f32_m(pg, op1, op2)
+}
+#[doc = "Minimum number pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svminnmp[_f64]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fminnmp))]
+pub fn svminnmp_f64_m(pg: svbool_t, op1: svfloat64_t, op2: svfloat64_t) -> svfloat64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.fminnmp.nxv2f64"
+        )]
+        fn _svminnmp_f64_m(pg: svbool2_t, op1: svfloat64_t, op2: svfloat64_t) -> svfloat64_t;
+    }
+    unsafe { _svminnmp_f64_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Minimum number pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svminnmp[_f64]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fminnmp))]
+pub fn svminnmp_f64_x(pg: svbool_t, op1: svfloat64_t, op2: svfloat64_t) -> svfloat64_t {
+    svminnmp_f64_m(pg, op1, op2)
+}
+#[doc = "Minimum pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svminp[_f32]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fminp))]
+pub fn svminp_f32_m(pg: svbool_t, op1: svfloat32_t, op2: svfloat32_t) -> svfloat32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.fminp.nxv4f32")]
+        fn _svminp_f32_m(pg: svbool4_t, op1: svfloat32_t, op2: svfloat32_t) -> svfloat32_t;
+    }
+    unsafe { _svminp_f32_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Minimum pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svminp[_f32]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fminp))]
+pub fn svminp_f32_x(pg: svbool_t, op1: svfloat32_t, op2: svfloat32_t) -> svfloat32_t {
+    svminp_f32_m(pg, op1, op2)
+}
+#[doc = "Minimum pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svminp[_f64]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fminp))]
+pub fn svminp_f64_m(pg: svbool_t, op1: svfloat64_t, op2: svfloat64_t) -> svfloat64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.fminp.nxv2f64")]
+        fn _svminp_f64_m(pg: svbool2_t, op1: svfloat64_t, op2: svfloat64_t) -> svfloat64_t;
+    }
+    unsafe { _svminp_f64_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Minimum pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svminp[_f64]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fminp))]
+pub fn svminp_f64_x(pg: svbool_t, op1: svfloat64_t, op2: svfloat64_t) -> svfloat64_t {
+    svminp_f64_m(pg, op1, op2)
+}
+#[doc = "Minimum pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svminp[_s8]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sminp))]
+pub fn svminp_s8_m(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sminp.nxv16i8")]
+        fn _svminp_s8_m(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t;
+    }
+    unsafe { _svminp_s8_m(pg, op1, op2) }
+}
+#[doc = "Minimum pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svminp[_s8]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sminp))]
+pub fn svminp_s8_x(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t {
+    svminp_s8_m(pg, op1, op2)
+}
+#[doc = "Minimum pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svminp[_s16]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sminp))]
+pub fn svminp_s16_m(pg: svbool_t, op1: svint16_t, op2: svint16_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sminp.nxv8i16")]
+        fn _svminp_s16_m(pg: svbool8_t, op1: svint16_t, op2: svint16_t) -> svint16_t;
+    }
+    unsafe { _svminp_s16_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Minimum pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svminp[_s16]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sminp))]
+pub fn svminp_s16_x(pg: svbool_t, op1: svint16_t, op2: svint16_t) -> svint16_t {
+    svminp_s16_m(pg, op1, op2)
+}
+#[doc = "Minimum pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svminp[_s32]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sminp))]
+pub fn svminp_s32_m(pg: svbool_t, op1: svint32_t, op2: svint32_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sminp.nxv4i32")]
+        fn _svminp_s32_m(pg: svbool4_t, op1: svint32_t, op2: svint32_t) -> svint32_t;
+    }
+    unsafe { _svminp_s32_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Minimum pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svminp[_s32]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sminp))]
+pub fn svminp_s32_x(pg: svbool_t, op1: svint32_t, op2: svint32_t) -> svint32_t {
+    svminp_s32_m(pg, op1, op2)
+}
+#[doc = "Minimum pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svminp[_s64]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sminp))]
+pub fn svminp_s64_m(pg: svbool_t, op1: svint64_t, op2: svint64_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sminp.nxv2i64")]
+        fn _svminp_s64_m(pg: svbool2_t, op1: svint64_t, op2: svint64_t) -> svint64_t;
+    }
+    unsafe { _svminp_s64_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Minimum pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svminp[_s64]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sminp))]
+pub fn svminp_s64_x(pg: svbool_t, op1: svint64_t, op2: svint64_t) -> svint64_t {
+    svminp_s64_m(pg, op1, op2)
+}
+#[doc = "Minimum pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svminp[_u8]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uminp))]
+pub fn svminp_u8_m(pg: svbool_t, op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uminp.nxv16i8")]
+        fn _svminp_u8_m(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t;
+    }
+    unsafe { _svminp_u8_m(pg, op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Minimum pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svminp[_u8]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uminp))]
+pub fn svminp_u8_x(pg: svbool_t, op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    svminp_u8_m(pg, op1, op2)
+}
+#[doc = "Minimum pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svminp[_u16]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uminp))]
+pub fn svminp_u16_m(pg: svbool_t, op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uminp.nxv8i16")]
+        fn _svminp_u16_m(pg: svbool8_t, op1: svint16_t, op2: svint16_t) -> svint16_t;
+    }
+    unsafe { _svminp_u16_m(pg.sve_into(), op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Minimum pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svminp[_u16]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uminp))]
+pub fn svminp_u16_x(pg: svbool_t, op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    svminp_u16_m(pg, op1, op2)
+}
+#[doc = "Minimum pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svminp[_u32]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uminp))]
+pub fn svminp_u32_m(pg: svbool_t, op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uminp.nxv4i32")]
+        fn _svminp_u32_m(pg: svbool4_t, op1: svint32_t, op2: svint32_t) -> svint32_t;
+    }
+    unsafe { _svminp_u32_m(pg.sve_into(), op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Minimum pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svminp[_u32]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uminp))]
+pub fn svminp_u32_x(pg: svbool_t, op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    svminp_u32_m(pg, op1, op2)
+}
+#[doc = "Minimum pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svminp[_u64]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uminp))]
+pub fn svminp_u64_m(pg: svbool_t, op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uminp.nxv2i64")]
+        fn _svminp_u64_m(pg: svbool2_t, op1: svint64_t, op2: svint64_t) -> svint64_t;
+    }
+    unsafe { _svminp_u64_m(pg.sve_into(), op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Minimum pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svminp[_u64]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uminp))]
+pub fn svminp_u64_x(pg: svbool_t, op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    svminp_u64_m(pg, op1, op2)
+}
+#[doc = "Multiply-add, addend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmla_lane[_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mla, IMM_INDEX = 0))]
+pub fn svmla_lane_s16<const IMM_INDEX: i32>(
+    op1: svint16_t,
+    op2: svint16_t,
+    op3: svint16_t,
+) -> svint16_t {
+    static_assert_range!(IMM_INDEX, 0..=7);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.mla.lane.nxv8i16"
+        )]
+        fn _svmla_lane_s16(
+            op1: svint16_t,
+            op2: svint16_t,
+            op3: svint16_t,
+            IMM_INDEX: i32,
+        ) -> svint16_t;
+    }
+    unsafe { _svmla_lane_s16(op1, op2, op3, IMM_INDEX) }
+}
+#[doc = "Multiply-add, addend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmla_lane[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mla, IMM_INDEX = 0))]
+pub fn svmla_lane_s32<const IMM_INDEX: i32>(
+    op1: svint32_t,
+    op2: svint32_t,
+    op3: svint32_t,
+) -> svint32_t {
+    static_assert_range!(IMM_INDEX, 0..=3);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.mla.lane.nxv4i32"
+        )]
+        fn _svmla_lane_s32(
+            op1: svint32_t,
+            op2: svint32_t,
+            op3: svint32_t,
+            IMM_INDEX: i32,
+        ) -> svint32_t;
+    }
+    unsafe { _svmla_lane_s32(op1, op2, op3, IMM_INDEX) }
+}
+#[doc = "Multiply-add, addend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmla_lane[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mla, IMM_INDEX = 0))]
+pub fn svmla_lane_s64<const IMM_INDEX: i32>(
+    op1: svint64_t,
+    op2: svint64_t,
+    op3: svint64_t,
+) -> svint64_t {
+    static_assert_range!(IMM_INDEX, 0..=1);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.mla.lane.nxv2i64"
+        )]
+        fn _svmla_lane_s64(
+            op1: svint64_t,
+            op2: svint64_t,
+            op3: svint64_t,
+            IMM_INDEX: i32,
+        ) -> svint64_t;
+    }
+    unsafe { _svmla_lane_s64(op1, op2, op3, IMM_INDEX) }
+}
+#[doc = "Multiply-add, addend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmla_lane[_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mla, IMM_INDEX = 0))]
+pub fn svmla_lane_u16<const IMM_INDEX: i32>(
+    op1: svuint16_t,
+    op2: svuint16_t,
+    op3: svuint16_t,
+) -> svuint16_t {
+    static_assert_range!(IMM_INDEX, 0..=7);
+    unsafe {
+        svmla_lane_s16::<IMM_INDEX>(op1.as_signed(), op2.as_signed(), op3.as_signed()).as_unsigned()
+    }
+}
+#[doc = "Multiply-add, addend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmla_lane[_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mla, IMM_INDEX = 0))]
+pub fn svmla_lane_u32<const IMM_INDEX: i32>(
+    op1: svuint32_t,
+    op2: svuint32_t,
+    op3: svuint32_t,
+) -> svuint32_t {
+    static_assert_range!(IMM_INDEX, 0..=3);
+    unsafe {
+        svmla_lane_s32::<IMM_INDEX>(op1.as_signed(), op2.as_signed(), op3.as_signed()).as_unsigned()
+    }
+}
+#[doc = "Multiply-add, addend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmla_lane[_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mla, IMM_INDEX = 0))]
+pub fn svmla_lane_u64<const IMM_INDEX: i32>(
+    op1: svuint64_t,
+    op2: svuint64_t,
+    op3: svuint64_t,
+) -> svuint64_t {
+    static_assert_range!(IMM_INDEX, 0..=1);
+    unsafe {
+        svmla_lane_s64::<IMM_INDEX>(op1.as_signed(), op2.as_signed(), op3.as_signed()).as_unsigned()
+    }
+}
+#[doc = "Multiply-add long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmlalb_lane[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smlalb, IMM_INDEX = 0))]
+pub fn svmlalb_lane_s32<const IMM_INDEX: i32>(
+    op1: svint32_t,
+    op2: svint16_t,
+    op3: svint16_t,
+) -> svint32_t {
+    static_assert_range!(IMM_INDEX, 0..=7);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.smlalb.lane.nxv4i32"
+        )]
+        fn _svmlalb_lane_s32(
+            op1: svint32_t,
+            op2: svint16_t,
+            op3: svint16_t,
+            IMM_INDEX: i32,
+        ) -> svint32_t;
+    }
+    unsafe { _svmlalb_lane_s32(op1, op2, op3, IMM_INDEX) }
+}
+#[doc = "Multiply-add long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmlalb_lane[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smlalb, IMM_INDEX = 0))]
+pub fn svmlalb_lane_s64<const IMM_INDEX: i32>(
+    op1: svint64_t,
+    op2: svint32_t,
+    op3: svint32_t,
+) -> svint64_t {
+    static_assert_range!(IMM_INDEX, 0..=3);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.smlalb.lane.nxv2i64"
+        )]
+        fn _svmlalb_lane_s64(
+            op1: svint64_t,
+            op2: svint32_t,
+            op3: svint32_t,
+            IMM_INDEX: i32,
+        ) -> svint64_t;
+    }
+    unsafe { _svmlalb_lane_s64(op1, op2, op3, IMM_INDEX) }
+}
+#[doc = "Multiply-add long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmlalb_lane[_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umlalb, IMM_INDEX = 0))]
+pub fn svmlalb_lane_u32<const IMM_INDEX: i32>(
+    op1: svuint32_t,
+    op2: svuint16_t,
+    op3: svuint16_t,
+) -> svuint32_t {
+    static_assert_range!(IMM_INDEX, 0..=7);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.umlalb.lane.nxv4i32"
+        )]
+        fn _svmlalb_lane_u32(
+            op1: svint32_t,
+            op2: svint16_t,
+            op3: svint16_t,
+            IMM_INDEX: i32,
+        ) -> svint32_t;
+    }
+    unsafe {
+        _svmlalb_lane_u32(op1.as_signed(), op2.as_signed(), op3.as_signed(), IMM_INDEX)
+            .as_unsigned()
+    }
+}
+#[doc = "Multiply-add long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmlalb_lane[_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umlalb, IMM_INDEX = 0))]
+pub fn svmlalb_lane_u64<const IMM_INDEX: i32>(
+    op1: svuint64_t,
+    op2: svuint32_t,
+    op3: svuint32_t,
+) -> svuint64_t {
+    static_assert_range!(IMM_INDEX, 0..=3);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.umlalb.lane.nxv2i64"
+        )]
+        fn _svmlalb_lane_u64(
+            op1: svint64_t,
+            op2: svint32_t,
+            op3: svint32_t,
+            IMM_INDEX: i32,
+        ) -> svint64_t;
+    }
+    unsafe {
+        _svmlalb_lane_u64(op1.as_signed(), op2.as_signed(), op3.as_signed(), IMM_INDEX)
+            .as_unsigned()
+    }
+}
+#[doc = "Multiply-add long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmlalb[_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smlalb))]
+pub fn svmlalb_s16(op1: svint16_t, op2: svint8_t, op3: svint8_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.smlalb.nxv8i16")]
+        fn _svmlalb_s16(op1: svint16_t, op2: svint8_t, op3: svint8_t) -> svint16_t;
+    }
+    unsafe { _svmlalb_s16(op1, op2, op3) }
+}
+#[doc = "Multiply-add long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmlalb[_n_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smlalb))]
+pub fn svmlalb_n_s16(op1: svint16_t, op2: svint8_t, op3: i8) -> svint16_t {
+    svmlalb_s16(op1, op2, svdup_n_s8(op3))
+}
+#[doc = "Multiply-add long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmlalb[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smlalb))]
+pub fn svmlalb_s32(op1: svint32_t, op2: svint16_t, op3: svint16_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.smlalb.nxv4i32")]
+        fn _svmlalb_s32(op1: svint32_t, op2: svint16_t, op3: svint16_t) -> svint32_t;
+    }
+    unsafe { _svmlalb_s32(op1, op2, op3) }
+}
+#[doc = "Multiply-add long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmlalb[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smlalb))]
+pub fn svmlalb_n_s32(op1: svint32_t, op2: svint16_t, op3: i16) -> svint32_t {
+    svmlalb_s32(op1, op2, svdup_n_s16(op3))
+}
+#[doc = "Multiply-add long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmlalb[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smlalb))]
+pub fn svmlalb_s64(op1: svint64_t, op2: svint32_t, op3: svint32_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.smlalb.nxv2i64")]
+        fn _svmlalb_s64(op1: svint64_t, op2: svint32_t, op3: svint32_t) -> svint64_t;
+    }
+    unsafe { _svmlalb_s64(op1, op2, op3) }
+}
+#[doc = "Multiply-add long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmlalb[_n_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smlalb))]
+pub fn svmlalb_n_s64(op1: svint64_t, op2: svint32_t, op3: i32) -> svint64_t {
+    svmlalb_s64(op1, op2, svdup_n_s32(op3))
+}
+#[doc = "Multiply-add long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmlalb[_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umlalb))]
+pub fn svmlalb_u16(op1: svuint16_t, op2: svuint8_t, op3: svuint8_t) -> svuint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.umlalb.nxv8i16")]
+        fn _svmlalb_u16(op1: svint16_t, op2: svint8_t, op3: svint8_t) -> svint16_t;
+    }
+    unsafe { _svmlalb_u16(op1.as_signed(), op2.as_signed(), op3.as_signed()).as_unsigned() }
+}
+#[doc = "Multiply-add long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmlalb[_n_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umlalb))]
+pub fn svmlalb_n_u16(op1: svuint16_t, op2: svuint8_t, op3: u8) -> svuint16_t {
+    svmlalb_u16(op1, op2, svdup_n_u8(op3))
+}
+#[doc = "Multiply-add long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmlalb[_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umlalb))]
+pub fn svmlalb_u32(op1: svuint32_t, op2: svuint16_t, op3: svuint16_t) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.umlalb.nxv4i32")]
+        fn _svmlalb_u32(op1: svint32_t, op2: svint16_t, op3: svint16_t) -> svint32_t;
+    }
+    unsafe { _svmlalb_u32(op1.as_signed(), op2.as_signed(), op3.as_signed()).as_unsigned() }
+}
+#[doc = "Multiply-add long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmlalb[_n_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umlalb))]
+pub fn svmlalb_n_u32(op1: svuint32_t, op2: svuint16_t, op3: u16) -> svuint32_t {
+    svmlalb_u32(op1, op2, svdup_n_u16(op3))
+}
+#[doc = "Multiply-add long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmlalb[_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umlalb))]
+pub fn svmlalb_u64(op1: svuint64_t, op2: svuint32_t, op3: svuint32_t) -> svuint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.umlalb.nxv2i64")]
+        fn _svmlalb_u64(op1: svint64_t, op2: svint32_t, op3: svint32_t) -> svint64_t;
+    }
+    unsafe { _svmlalb_u64(op1.as_signed(), op2.as_signed(), op3.as_signed()).as_unsigned() }
+}
+#[doc = "Multiply-add long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmlalb[_n_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umlalb))]
+pub fn svmlalb_n_u64(op1: svuint64_t, op2: svuint32_t, op3: u32) -> svuint64_t {
+    svmlalb_u64(op1, op2, svdup_n_u32(op3))
+}
+#[doc = "Multiply-add long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmlalt_lane[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smlalt, IMM_INDEX = 0))]
+pub fn svmlalt_lane_s32<const IMM_INDEX: i32>(
+    op1: svint32_t,
+    op2: svint16_t,
+    op3: svint16_t,
+) -> svint32_t {
+    static_assert_range!(IMM_INDEX, 0..=7);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.smlalt.lane.nxv4i32"
+        )]
+        fn _svmlalt_lane_s32(
+            op1: svint32_t,
+            op2: svint16_t,
+            op3: svint16_t,
+            IMM_INDEX: i32,
+        ) -> svint32_t;
+    }
+    unsafe { _svmlalt_lane_s32(op1, op2, op3, IMM_INDEX) }
+}
+#[doc = "Multiply-add long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmlalt_lane[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smlalt, IMM_INDEX = 0))]
+pub fn svmlalt_lane_s64<const IMM_INDEX: i32>(
+    op1: svint64_t,
+    op2: svint32_t,
+    op3: svint32_t,
+) -> svint64_t {
+    static_assert_range!(IMM_INDEX, 0..=3);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.smlalt.lane.nxv2i64"
+        )]
+        fn _svmlalt_lane_s64(
+            op1: svint64_t,
+            op2: svint32_t,
+            op3: svint32_t,
+            IMM_INDEX: i32,
+        ) -> svint64_t;
+    }
+    unsafe { _svmlalt_lane_s64(op1, op2, op3, IMM_INDEX) }
+}
+#[doc = "Multiply-add long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmlalt_lane[_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umlalt, IMM_INDEX = 0))]
+pub fn svmlalt_lane_u32<const IMM_INDEX: i32>(
+    op1: svuint32_t,
+    op2: svuint16_t,
+    op3: svuint16_t,
+) -> svuint32_t {
+    static_assert_range!(IMM_INDEX, 0..=7);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.umlalt.lane.nxv4i32"
+        )]
+        fn _svmlalt_lane_u32(
+            op1: svint32_t,
+            op2: svint16_t,
+            op3: svint16_t,
+            IMM_INDEX: i32,
+        ) -> svint32_t;
+    }
+    unsafe {
+        _svmlalt_lane_u32(op1.as_signed(), op2.as_signed(), op3.as_signed(), IMM_INDEX)
+            .as_unsigned()
+    }
+}
+#[doc = "Multiply-add long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmlalt_lane[_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umlalt, IMM_INDEX = 0))]
+pub fn svmlalt_lane_u64<const IMM_INDEX: i32>(
+    op1: svuint64_t,
+    op2: svuint32_t,
+    op3: svuint32_t,
+) -> svuint64_t {
+    static_assert_range!(IMM_INDEX, 0..=3);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.umlalt.lane.nxv2i64"
+        )]
+        fn _svmlalt_lane_u64(
+            op1: svint64_t,
+            op2: svint32_t,
+            op3: svint32_t,
+            IMM_INDEX: i32,
+        ) -> svint64_t;
+    }
+    unsafe {
+        _svmlalt_lane_u64(op1.as_signed(), op2.as_signed(), op3.as_signed(), IMM_INDEX)
+            .as_unsigned()
+    }
+}
+#[doc = "Multiply-add long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmlalt[_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smlalt))]
+pub fn svmlalt_s16(op1: svint16_t, op2: svint8_t, op3: svint8_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.smlalt.nxv8i16")]
+        fn _svmlalt_s16(op1: svint16_t, op2: svint8_t, op3: svint8_t) -> svint16_t;
+    }
+    unsafe { _svmlalt_s16(op1, op2, op3) }
+}
+#[doc = "Multiply-add long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmlalt[_n_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smlalt))]
+pub fn svmlalt_n_s16(op1: svint16_t, op2: svint8_t, op3: i8) -> svint16_t {
+    svmlalt_s16(op1, op2, svdup_n_s8(op3))
+}
+#[doc = "Multiply-add long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmlalt[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smlalt))]
+pub fn svmlalt_s32(op1: svint32_t, op2: svint16_t, op3: svint16_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.smlalt.nxv4i32")]
+        fn _svmlalt_s32(op1: svint32_t, op2: svint16_t, op3: svint16_t) -> svint32_t;
+    }
+    unsafe { _svmlalt_s32(op1, op2, op3) }
+}
+#[doc = "Multiply-add long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmlalt[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smlalt))]
+pub fn svmlalt_n_s32(op1: svint32_t, op2: svint16_t, op3: i16) -> svint32_t {
+    svmlalt_s32(op1, op2, svdup_n_s16(op3))
+}
+#[doc = "Multiply-add long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmlalt[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smlalt))]
+pub fn svmlalt_s64(op1: svint64_t, op2: svint32_t, op3: svint32_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.smlalt.nxv2i64")]
+        fn _svmlalt_s64(op1: svint64_t, op2: svint32_t, op3: svint32_t) -> svint64_t;
+    }
+    unsafe { _svmlalt_s64(op1, op2, op3) }
+}
+#[doc = "Multiply-add long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmlalt[_n_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smlalt))]
+pub fn svmlalt_n_s64(op1: svint64_t, op2: svint32_t, op3: i32) -> svint64_t {
+    svmlalt_s64(op1, op2, svdup_n_s32(op3))
+}
+#[doc = "Multiply-add long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmlalt[_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umlalt))]
+pub fn svmlalt_u16(op1: svuint16_t, op2: svuint8_t, op3: svuint8_t) -> svuint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.umlalt.nxv8i16")]
+        fn _svmlalt_u16(op1: svint16_t, op2: svint8_t, op3: svint8_t) -> svint16_t;
+    }
+    unsafe { _svmlalt_u16(op1.as_signed(), op2.as_signed(), op3.as_signed()).as_unsigned() }
+}
+#[doc = "Multiply-add long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmlalt[_n_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umlalt))]
+pub fn svmlalt_n_u16(op1: svuint16_t, op2: svuint8_t, op3: u8) -> svuint16_t {
+    svmlalt_u16(op1, op2, svdup_n_u8(op3))
+}
+#[doc = "Multiply-add long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmlalt[_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umlalt))]
+pub fn svmlalt_u32(op1: svuint32_t, op2: svuint16_t, op3: svuint16_t) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.umlalt.nxv4i32")]
+        fn _svmlalt_u32(op1: svint32_t, op2: svint16_t, op3: svint16_t) -> svint32_t;
+    }
+    unsafe { _svmlalt_u32(op1.as_signed(), op2.as_signed(), op3.as_signed()).as_unsigned() }
+}
+#[doc = "Multiply-add long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmlalt[_n_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umlalt))]
+pub fn svmlalt_n_u32(op1: svuint32_t, op2: svuint16_t, op3: u16) -> svuint32_t {
+    svmlalt_u32(op1, op2, svdup_n_u16(op3))
+}
+#[doc = "Multiply-add long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmlalt[_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umlalt))]
+pub fn svmlalt_u64(op1: svuint64_t, op2: svuint32_t, op3: svuint32_t) -> svuint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.umlalt.nxv2i64")]
+        fn _svmlalt_u64(op1: svint64_t, op2: svint32_t, op3: svint32_t) -> svint64_t;
+    }
+    unsafe { _svmlalt_u64(op1.as_signed(), op2.as_signed(), op3.as_signed()).as_unsigned() }
+}
+#[doc = "Multiply-add long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmlalt[_n_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umlalt))]
+pub fn svmlalt_n_u64(op1: svuint64_t, op2: svuint32_t, op3: u32) -> svuint64_t {
+    svmlalt_u64(op1, op2, svdup_n_u32(op3))
+}
+#[doc = "Multiply-subtract, minuend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmls_lane[_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mls, IMM_INDEX = 0))]
+pub fn svmls_lane_s16<const IMM_INDEX: i32>(
+    op1: svint16_t,
+    op2: svint16_t,
+    op3: svint16_t,
+) -> svint16_t {
+    static_assert_range!(IMM_INDEX, 0..=7);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.mls.lane.nxv8i16"
+        )]
+        fn _svmls_lane_s16(
+            op1: svint16_t,
+            op2: svint16_t,
+            op3: svint16_t,
+            IMM_INDEX: i32,
+        ) -> svint16_t;
+    }
+    unsafe { _svmls_lane_s16(op1, op2, op3, IMM_INDEX) }
+}
+#[doc = "Multiply-subtract, minuend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmls_lane[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mls, IMM_INDEX = 0))]
+pub fn svmls_lane_s32<const IMM_INDEX: i32>(
+    op1: svint32_t,
+    op2: svint32_t,
+    op3: svint32_t,
+) -> svint32_t {
+    static_assert_range!(IMM_INDEX, 0..=3);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.mls.lane.nxv4i32"
+        )]
+        fn _svmls_lane_s32(
+            op1: svint32_t,
+            op2: svint32_t,
+            op3: svint32_t,
+            IMM_INDEX: i32,
+        ) -> svint32_t;
+    }
+    unsafe { _svmls_lane_s32(op1, op2, op3, IMM_INDEX) }
+}
+#[doc = "Multiply-subtract, minuend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmls_lane[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mls, IMM_INDEX = 0))]
+pub fn svmls_lane_s64<const IMM_INDEX: i32>(
+    op1: svint64_t,
+    op2: svint64_t,
+    op3: svint64_t,
+) -> svint64_t {
+    static_assert_range!(IMM_INDEX, 0..=1);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.mls.lane.nxv2i64"
+        )]
+        fn _svmls_lane_s64(
+            op1: svint64_t,
+            op2: svint64_t,
+            op3: svint64_t,
+            IMM_INDEX: i32,
+        ) -> svint64_t;
+    }
+    unsafe { _svmls_lane_s64(op1, op2, op3, IMM_INDEX) }
+}
+#[doc = "Multiply-subtract, minuend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmls_lane[_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mls, IMM_INDEX = 0))]
+pub fn svmls_lane_u16<const IMM_INDEX: i32>(
+    op1: svuint16_t,
+    op2: svuint16_t,
+    op3: svuint16_t,
+) -> svuint16_t {
+    static_assert_range!(IMM_INDEX, 0..=7);
+    unsafe {
+        svmls_lane_s16::<IMM_INDEX>(op1.as_signed(), op2.as_signed(), op3.as_signed()).as_unsigned()
+    }
+}
+#[doc = "Multiply-subtract, minuend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmls_lane[_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mls, IMM_INDEX = 0))]
+pub fn svmls_lane_u32<const IMM_INDEX: i32>(
+    op1: svuint32_t,
+    op2: svuint32_t,
+    op3: svuint32_t,
+) -> svuint32_t {
+    static_assert_range!(IMM_INDEX, 0..=3);
+    unsafe {
+        svmls_lane_s32::<IMM_INDEX>(op1.as_signed(), op2.as_signed(), op3.as_signed()).as_unsigned()
+    }
+}
+#[doc = "Multiply-subtract, minuend first"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmls_lane[_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mls, IMM_INDEX = 0))]
+pub fn svmls_lane_u64<const IMM_INDEX: i32>(
+    op1: svuint64_t,
+    op2: svuint64_t,
+    op3: svuint64_t,
+) -> svuint64_t {
+    static_assert_range!(IMM_INDEX, 0..=1);
+    unsafe {
+        svmls_lane_s64::<IMM_INDEX>(op1.as_signed(), op2.as_signed(), op3.as_signed()).as_unsigned()
+    }
+}
+#[doc = "Multiply-subtract long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmlslb_lane[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smlslb, IMM_INDEX = 0))]
+pub fn svmlslb_lane_s32<const IMM_INDEX: i32>(
+    op1: svint32_t,
+    op2: svint16_t,
+    op3: svint16_t,
+) -> svint32_t {
+    static_assert_range!(IMM_INDEX, 0..=7);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.smlslb.lane.nxv4i32"
+        )]
+        fn _svmlslb_lane_s32(
+            op1: svint32_t,
+            op2: svint16_t,
+            op3: svint16_t,
+            IMM_INDEX: i32,
+        ) -> svint32_t;
+    }
+    unsafe { _svmlslb_lane_s32(op1, op2, op3, IMM_INDEX) }
+}
+#[doc = "Multiply-subtract long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmlslb_lane[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smlslb, IMM_INDEX = 0))]
+pub fn svmlslb_lane_s64<const IMM_INDEX: i32>(
+    op1: svint64_t,
+    op2: svint32_t,
+    op3: svint32_t,
+) -> svint64_t {
+    static_assert_range!(IMM_INDEX, 0..=3);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.smlslb.lane.nxv2i64"
+        )]
+        fn _svmlslb_lane_s64(
+            op1: svint64_t,
+            op2: svint32_t,
+            op3: svint32_t,
+            IMM_INDEX: i32,
+        ) -> svint64_t;
+    }
+    unsafe { _svmlslb_lane_s64(op1, op2, op3, IMM_INDEX) }
+}
+#[doc = "Multiply-subtract long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmlslb_lane[_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umlslb, IMM_INDEX = 0))]
+pub fn svmlslb_lane_u32<const IMM_INDEX: i32>(
+    op1: svuint32_t,
+    op2: svuint16_t,
+    op3: svuint16_t,
+) -> svuint32_t {
+    static_assert_range!(IMM_INDEX, 0..=7);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.umlslb.lane.nxv4i32"
+        )]
+        fn _svmlslb_lane_u32(
+            op1: svint32_t,
+            op2: svint16_t,
+            op3: svint16_t,
+            IMM_INDEX: i32,
+        ) -> svint32_t;
+    }
+    unsafe {
+        _svmlslb_lane_u32(op1.as_signed(), op2.as_signed(), op3.as_signed(), IMM_INDEX)
+            .as_unsigned()
+    }
+}
+#[doc = "Multiply-subtract long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmlslb_lane[_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umlslb, IMM_INDEX = 0))]
+pub fn svmlslb_lane_u64<const IMM_INDEX: i32>(
+    op1: svuint64_t,
+    op2: svuint32_t,
+    op3: svuint32_t,
+) -> svuint64_t {
+    static_assert_range!(IMM_INDEX, 0..=3);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.umlslb.lane.nxv2i64"
+        )]
+        fn _svmlslb_lane_u64(
+            op1: svint64_t,
+            op2: svint32_t,
+            op3: svint32_t,
+            IMM_INDEX: i32,
+        ) -> svint64_t;
+    }
+    unsafe {
+        _svmlslb_lane_u64(op1.as_signed(), op2.as_signed(), op3.as_signed(), IMM_INDEX)
+            .as_unsigned()
+    }
+}
+#[doc = "Multiply-subtract long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmlslb[_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smlslb))]
+pub fn svmlslb_s16(op1: svint16_t, op2: svint8_t, op3: svint8_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.smlslb.nxv8i16")]
+        fn _svmlslb_s16(op1: svint16_t, op2: svint8_t, op3: svint8_t) -> svint16_t;
+    }
+    unsafe { _svmlslb_s16(op1, op2, op3) }
+}
+#[doc = "Multiply-subtract long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmlslb[_n_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smlslb))]
+pub fn svmlslb_n_s16(op1: svint16_t, op2: svint8_t, op3: i8) -> svint16_t {
+    svmlslb_s16(op1, op2, svdup_n_s8(op3))
+}
+#[doc = "Multiply-subtract long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmlslb[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smlslb))]
+pub fn svmlslb_s32(op1: svint32_t, op2: svint16_t, op3: svint16_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.smlslb.nxv4i32")]
+        fn _svmlslb_s32(op1: svint32_t, op2: svint16_t, op3: svint16_t) -> svint32_t;
+    }
+    unsafe { _svmlslb_s32(op1, op2, op3) }
+}
+#[doc = "Multiply-subtract long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmlslb[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smlslb))]
+pub fn svmlslb_n_s32(op1: svint32_t, op2: svint16_t, op3: i16) -> svint32_t {
+    svmlslb_s32(op1, op2, svdup_n_s16(op3))
+}
+#[doc = "Multiply-subtract long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmlslb[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smlslb))]
+pub fn svmlslb_s64(op1: svint64_t, op2: svint32_t, op3: svint32_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.smlslb.nxv2i64")]
+        fn _svmlslb_s64(op1: svint64_t, op2: svint32_t, op3: svint32_t) -> svint64_t;
+    }
+    unsafe { _svmlslb_s64(op1, op2, op3) }
+}
+#[doc = "Multiply-subtract long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmlslb[_n_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smlslb))]
+pub fn svmlslb_n_s64(op1: svint64_t, op2: svint32_t, op3: i32) -> svint64_t {
+    svmlslb_s64(op1, op2, svdup_n_s32(op3))
+}
+#[doc = "Multiply-subtract long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmlslb[_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umlslb))]
+pub fn svmlslb_u16(op1: svuint16_t, op2: svuint8_t, op3: svuint8_t) -> svuint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.umlslb.nxv8i16")]
+        fn _svmlslb_u16(op1: svint16_t, op2: svint8_t, op3: svint8_t) -> svint16_t;
+    }
+    unsafe { _svmlslb_u16(op1.as_signed(), op2.as_signed(), op3.as_signed()).as_unsigned() }
+}
+#[doc = "Multiply-subtract long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmlslb[_n_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umlslb))]
+pub fn svmlslb_n_u16(op1: svuint16_t, op2: svuint8_t, op3: u8) -> svuint16_t {
+    svmlslb_u16(op1, op2, svdup_n_u8(op3))
+}
+#[doc = "Multiply-subtract long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmlslb[_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umlslb))]
+pub fn svmlslb_u32(op1: svuint32_t, op2: svuint16_t, op3: svuint16_t) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.umlslb.nxv4i32")]
+        fn _svmlslb_u32(op1: svint32_t, op2: svint16_t, op3: svint16_t) -> svint32_t;
+    }
+    unsafe { _svmlslb_u32(op1.as_signed(), op2.as_signed(), op3.as_signed()).as_unsigned() }
+}
+#[doc = "Multiply-subtract long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmlslb[_n_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umlslb))]
+pub fn svmlslb_n_u32(op1: svuint32_t, op2: svuint16_t, op3: u16) -> svuint32_t {
+    svmlslb_u32(op1, op2, svdup_n_u16(op3))
+}
+#[doc = "Multiply-subtract long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmlslb[_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umlslb))]
+pub fn svmlslb_u64(op1: svuint64_t, op2: svuint32_t, op3: svuint32_t) -> svuint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.umlslb.nxv2i64")]
+        fn _svmlslb_u64(op1: svint64_t, op2: svint32_t, op3: svint32_t) -> svint64_t;
+    }
+    unsafe { _svmlslb_u64(op1.as_signed(), op2.as_signed(), op3.as_signed()).as_unsigned() }
+}
+#[doc = "Multiply-subtract long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmlslb[_n_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umlslb))]
+pub fn svmlslb_n_u64(op1: svuint64_t, op2: svuint32_t, op3: u32) -> svuint64_t {
+    svmlslb_u64(op1, op2, svdup_n_u32(op3))
+}
+#[doc = "Multiply-subtract long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmlslt_lane[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smlslt, IMM_INDEX = 0))]
+pub fn svmlslt_lane_s32<const IMM_INDEX: i32>(
+    op1: svint32_t,
+    op2: svint16_t,
+    op3: svint16_t,
+) -> svint32_t {
+    static_assert_range!(IMM_INDEX, 0..=7);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.smlslt.lane.nxv4i32"
+        )]
+        fn _svmlslt_lane_s32(
+            op1: svint32_t,
+            op2: svint16_t,
+            op3: svint16_t,
+            IMM_INDEX: i32,
+        ) -> svint32_t;
+    }
+    unsafe { _svmlslt_lane_s32(op1, op2, op3, IMM_INDEX) }
+}
+#[doc = "Multiply-subtract long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmlslt_lane[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smlslt, IMM_INDEX = 0))]
+pub fn svmlslt_lane_s64<const IMM_INDEX: i32>(
+    op1: svint64_t,
+    op2: svint32_t,
+    op3: svint32_t,
+) -> svint64_t {
+    static_assert_range!(IMM_INDEX, 0..=3);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.smlslt.lane.nxv2i64"
+        )]
+        fn _svmlslt_lane_s64(
+            op1: svint64_t,
+            op2: svint32_t,
+            op3: svint32_t,
+            IMM_INDEX: i32,
+        ) -> svint64_t;
+    }
+    unsafe { _svmlslt_lane_s64(op1, op2, op3, IMM_INDEX) }
+}
+#[doc = "Multiply-subtract long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmlslt_lane[_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umlslt, IMM_INDEX = 0))]
+pub fn svmlslt_lane_u32<const IMM_INDEX: i32>(
+    op1: svuint32_t,
+    op2: svuint16_t,
+    op3: svuint16_t,
+) -> svuint32_t {
+    static_assert_range!(IMM_INDEX, 0..=7);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.umlslt.lane.nxv4i32"
+        )]
+        fn _svmlslt_lane_u32(
+            op1: svint32_t,
+            op2: svint16_t,
+            op3: svint16_t,
+            IMM_INDEX: i32,
+        ) -> svint32_t;
+    }
+    unsafe {
+        _svmlslt_lane_u32(op1.as_signed(), op2.as_signed(), op3.as_signed(), IMM_INDEX)
+            .as_unsigned()
+    }
+}
+#[doc = "Multiply-subtract long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmlslt_lane[_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umlslt, IMM_INDEX = 0))]
+pub fn svmlslt_lane_u64<const IMM_INDEX: i32>(
+    op1: svuint64_t,
+    op2: svuint32_t,
+    op3: svuint32_t,
+) -> svuint64_t {
+    static_assert_range!(IMM_INDEX, 0..=3);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.umlslt.lane.nxv2i64"
+        )]
+        fn _svmlslt_lane_u64(
+            op1: svint64_t,
+            op2: svint32_t,
+            op3: svint32_t,
+            IMM_INDEX: i32,
+        ) -> svint64_t;
+    }
+    unsafe {
+        _svmlslt_lane_u64(op1.as_signed(), op2.as_signed(), op3.as_signed(), IMM_INDEX)
+            .as_unsigned()
+    }
+}
+#[doc = "Multiply-subtract long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmlslt[_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smlslt))]
+pub fn svmlslt_s16(op1: svint16_t, op2: svint8_t, op3: svint8_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.smlslt.nxv8i16")]
+        fn _svmlslt_s16(op1: svint16_t, op2: svint8_t, op3: svint8_t) -> svint16_t;
+    }
+    unsafe { _svmlslt_s16(op1, op2, op3) }
+}
+#[doc = "Multiply-subtract long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmlslt[_n_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smlslt))]
+pub fn svmlslt_n_s16(op1: svint16_t, op2: svint8_t, op3: i8) -> svint16_t {
+    svmlslt_s16(op1, op2, svdup_n_s8(op3))
+}
+#[doc = "Multiply-subtract long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmlslt[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smlslt))]
+pub fn svmlslt_s32(op1: svint32_t, op2: svint16_t, op3: svint16_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.smlslt.nxv4i32")]
+        fn _svmlslt_s32(op1: svint32_t, op2: svint16_t, op3: svint16_t) -> svint32_t;
+    }
+    unsafe { _svmlslt_s32(op1, op2, op3) }
+}
+#[doc = "Multiply-subtract long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmlslt[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smlslt))]
+pub fn svmlslt_n_s32(op1: svint32_t, op2: svint16_t, op3: i16) -> svint32_t {
+    svmlslt_s32(op1, op2, svdup_n_s16(op3))
+}
+#[doc = "Multiply-subtract long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmlslt[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smlslt))]
+pub fn svmlslt_s64(op1: svint64_t, op2: svint32_t, op3: svint32_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.smlslt.nxv2i64")]
+        fn _svmlslt_s64(op1: svint64_t, op2: svint32_t, op3: svint32_t) -> svint64_t;
+    }
+    unsafe { _svmlslt_s64(op1, op2, op3) }
+}
+#[doc = "Multiply-subtract long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmlslt[_n_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smlslt))]
+pub fn svmlslt_n_s64(op1: svint64_t, op2: svint32_t, op3: i32) -> svint64_t {
+    svmlslt_s64(op1, op2, svdup_n_s32(op3))
+}
+#[doc = "Multiply-subtract long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmlslt[_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umlslt))]
+pub fn svmlslt_u16(op1: svuint16_t, op2: svuint8_t, op3: svuint8_t) -> svuint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.umlslt.nxv8i16")]
+        fn _svmlslt_u16(op1: svint16_t, op2: svint8_t, op3: svint8_t) -> svint16_t;
+    }
+    unsafe { _svmlslt_u16(op1.as_signed(), op2.as_signed(), op3.as_signed()).as_unsigned() }
+}
+#[doc = "Multiply-subtract long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmlslt[_n_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umlslt))]
+pub fn svmlslt_n_u16(op1: svuint16_t, op2: svuint8_t, op3: u8) -> svuint16_t {
+    svmlslt_u16(op1, op2, svdup_n_u8(op3))
+}
+#[doc = "Multiply-subtract long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmlslt[_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umlslt))]
+pub fn svmlslt_u32(op1: svuint32_t, op2: svuint16_t, op3: svuint16_t) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.umlslt.nxv4i32")]
+        fn _svmlslt_u32(op1: svint32_t, op2: svint16_t, op3: svint16_t) -> svint32_t;
+    }
+    unsafe { _svmlslt_u32(op1.as_signed(), op2.as_signed(), op3.as_signed()).as_unsigned() }
+}
+#[doc = "Multiply-subtract long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmlslt[_n_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umlslt))]
+pub fn svmlslt_n_u32(op1: svuint32_t, op2: svuint16_t, op3: u16) -> svuint32_t {
+    svmlslt_u32(op1, op2, svdup_n_u16(op3))
+}
+#[doc = "Multiply-subtract long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmlslt[_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umlslt))]
+pub fn svmlslt_u64(op1: svuint64_t, op2: svuint32_t, op3: svuint32_t) -> svuint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.umlslt.nxv2i64")]
+        fn _svmlslt_u64(op1: svint64_t, op2: svint32_t, op3: svint32_t) -> svint64_t;
+    }
+    unsafe { _svmlslt_u64(op1.as_signed(), op2.as_signed(), op3.as_signed()).as_unsigned() }
+}
+#[doc = "Multiply-subtract long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmlslt[_n_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umlslt))]
+pub fn svmlslt_n_u64(op1: svuint64_t, op2: svuint32_t, op3: u32) -> svuint64_t {
+    svmlslt_u64(op1, op2, svdup_n_u32(op3))
+}
+#[doc = "Move long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmovlb[_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sshllb))]
+pub fn svmovlb_s16(op: svint8_t) -> svint16_t {
+    svshllb_n_s16::<0>(op)
+}
+#[doc = "Move long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmovlb[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sshllb))]
+pub fn svmovlb_s32(op: svint16_t) -> svint32_t {
+    svshllb_n_s32::<0>(op)
+}
+#[doc = "Move long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmovlb[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sshllb))]
+pub fn svmovlb_s64(op: svint32_t) -> svint64_t {
+    svshllb_n_s64::<0>(op)
+}
+#[doc = "Move long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmovlb[_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ushllb))]
+pub fn svmovlb_u16(op: svuint8_t) -> svuint16_t {
+    svshllb_n_u16::<0>(op)
+}
+#[doc = "Move long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmovlb[_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ushllb))]
+pub fn svmovlb_u32(op: svuint16_t) -> svuint32_t {
+    svshllb_n_u32::<0>(op)
+}
+#[doc = "Move long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmovlb[_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ushllb))]
+pub fn svmovlb_u64(op: svuint32_t) -> svuint64_t {
+    svshllb_n_u64::<0>(op)
+}
+#[doc = "Move long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmovlt[_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sshllt))]
+pub fn svmovlt_s16(op: svint8_t) -> svint16_t {
+    svshllt_n_s16::<0>(op)
+}
+#[doc = "Move long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmovlt[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sshllt))]
+pub fn svmovlt_s32(op: svint16_t) -> svint32_t {
+    svshllt_n_s32::<0>(op)
+}
+#[doc = "Move long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmovlt[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sshllt))]
+pub fn svmovlt_s64(op: svint32_t) -> svint64_t {
+    svshllt_n_s64::<0>(op)
+}
+#[doc = "Move long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmovlt[_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ushllt))]
+pub fn svmovlt_u16(op: svuint8_t) -> svuint16_t {
+    svshllt_n_u16::<0>(op)
+}
+#[doc = "Move long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmovlt[_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ushllt))]
+pub fn svmovlt_u32(op: svuint16_t) -> svuint32_t {
+    svshllt_n_u32::<0>(op)
+}
+#[doc = "Move long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmovlt[_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ushllt))]
+pub fn svmovlt_u64(op: svuint32_t) -> svuint64_t {
+    svshllt_n_u64::<0>(op)
+}
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmul_lane[_f32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmul, IMM_INDEX = 0))]
+pub fn svmul_lane_f32<const IMM_INDEX: i32>(op1: svfloat32_t, op2: svfloat32_t) -> svfloat32_t {
+    static_assert_range!(IMM_INDEX, 0..=3);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.fmul.lane.nxv4f32"
+        )]
+        fn _svmul_lane_f32(op1: svfloat32_t, op2: svfloat32_t, imm_index: i32) -> svfloat32_t;
+    }
+    unsafe { _svmul_lane_f32(op1, op2, IMM_INDEX) }
+}
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmul_lane[_f64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(fmul, IMM_INDEX = 0))]
+pub fn svmul_lane_f64<const IMM_INDEX: i32>(op1: svfloat64_t, op2: svfloat64_t) -> svfloat64_t {
+    static_assert_range!(IMM_INDEX, 0..=1);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.fmul.lane.nxv2f64"
+        )]
+        fn _svmul_lane_f64(op1: svfloat64_t, op2: svfloat64_t, imm_index: i32) -> svfloat64_t;
+    }
+    unsafe { _svmul_lane_f64(op1, op2, IMM_INDEX) }
+}
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmul_lane[_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mul, IMM_INDEX = 0))]
+pub fn svmul_lane_s16<const IMM_INDEX: i32>(op1: svint16_t, op2: svint16_t) -> svint16_t {
+    static_assert_range!(IMM_INDEX, 0..=7);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.mul.lane.nxv8i16"
+        )]
+        fn _svmul_lane_s16(op1: svint16_t, op2: svint16_t, imm_index: i32) -> svint16_t;
+    }
+    unsafe { _svmul_lane_s16(op1, op2, IMM_INDEX) }
+}
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmul_lane[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mul, IMM_INDEX = 0))]
+pub fn svmul_lane_s32<const IMM_INDEX: i32>(op1: svint32_t, op2: svint32_t) -> svint32_t {
+    static_assert_range!(IMM_INDEX, 0..=3);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.mul.lane.nxv4i32"
+        )]
+        fn _svmul_lane_s32(op1: svint32_t, op2: svint32_t, imm_index: i32) -> svint32_t;
+    }
+    unsafe { _svmul_lane_s32(op1, op2, IMM_INDEX) }
+}
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmul_lane[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mul, IMM_INDEX = 0))]
+pub fn svmul_lane_s64<const IMM_INDEX: i32>(op1: svint64_t, op2: svint64_t) -> svint64_t {
+    static_assert_range!(IMM_INDEX, 0..=1);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.mul.lane.nxv2i64"
+        )]
+        fn _svmul_lane_s64(op1: svint64_t, op2: svint64_t, imm_index: i32) -> svint64_t;
+    }
+    unsafe { _svmul_lane_s64(op1, op2, IMM_INDEX) }
+}
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmul_lane[_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mul, IMM_INDEX = 0))]
+pub fn svmul_lane_u16<const IMM_INDEX: i32>(op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    static_assert_range!(IMM_INDEX, 0..=7);
+    unsafe { svmul_lane_s16::<IMM_INDEX>(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmul_lane[_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mul, IMM_INDEX = 0))]
+pub fn svmul_lane_u32<const IMM_INDEX: i32>(op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    static_assert_range!(IMM_INDEX, 0..=3);
+    unsafe { svmul_lane_s32::<IMM_INDEX>(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmul_lane[_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(mul, IMM_INDEX = 0))]
+pub fn svmul_lane_u64<const IMM_INDEX: i32>(op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    static_assert_range!(IMM_INDEX, 0..=1);
+    unsafe { svmul_lane_s64::<IMM_INDEX>(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Multiply long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmullb_lane[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc")),
+    assert_instr(smullb, IMM_INDEX = 0)
+)]
+pub fn svmullb_lane_s32<const IMM_INDEX: i32>(op1: svint16_t, op2: svint16_t) -> svint32_t {
+    static_assert_range!(IMM_INDEX, 0..=7);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.smullb.lane.nxv4i32"
+        )]
+        fn _svmullb_lane_s32(op1: svint16_t, op2: svint16_t, imm_index: i32) -> svint32_t;
+    }
+    unsafe { _svmullb_lane_s32(op1, op2, IMM_INDEX) }
+}
+#[doc = "Multiply long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmullb_lane[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc")),
+    assert_instr(smullb, IMM_INDEX = 0)
+)]
+pub fn svmullb_lane_s64<const IMM_INDEX: i32>(op1: svint32_t, op2: svint32_t) -> svint64_t {
+    static_assert_range!(IMM_INDEX, 0..=3);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.smullb.lane.nxv2i64"
+        )]
+        fn _svmullb_lane_s64(op1: svint32_t, op2: svint32_t, imm_index: i32) -> svint64_t;
+    }
+    unsafe { _svmullb_lane_s64(op1, op2, IMM_INDEX) }
+}
+#[doc = "Multiply long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmullb_lane[_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc")),
+    assert_instr(umullb, IMM_INDEX = 0)
+)]
+pub fn svmullb_lane_u32<const IMM_INDEX: i32>(op1: svuint16_t, op2: svuint16_t) -> svuint32_t {
+    static_assert_range!(IMM_INDEX, 0..=7);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.umullb.lane.nxv4i32"
+        )]
+        fn _svmullb_lane_u32(op1: svint16_t, op2: svint16_t, imm_index: i32) -> svint32_t;
+    }
+    unsafe { _svmullb_lane_u32(op1.as_signed(), op2.as_signed(), IMM_INDEX).as_unsigned() }
+}
+#[doc = "Multiply long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmullb_lane[_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc")),
+    assert_instr(umullb, IMM_INDEX = 0)
+)]
+pub fn svmullb_lane_u64<const IMM_INDEX: i32>(op1: svuint32_t, op2: svuint32_t) -> svuint64_t {
+    static_assert_range!(IMM_INDEX, 0..=3);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.umullb.lane.nxv2i64"
+        )]
+        fn _svmullb_lane_u64(op1: svint32_t, op2: svint32_t, imm_index: i32) -> svint64_t;
+    }
+    unsafe { _svmullb_lane_u64(op1.as_signed(), op2.as_signed(), IMM_INDEX).as_unsigned() }
+}
+#[doc = "Multiply long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmullb[_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smullb))]
+pub fn svmullb_s16(op1: svint8_t, op2: svint8_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.smullb.nxv8i16")]
+        fn _svmullb_s16(op1: svint8_t, op2: svint8_t) -> svint16_t;
+    }
+    unsafe { _svmullb_s16(op1, op2) }
+}
+#[doc = "Multiply long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmullb[_n_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smullb))]
+pub fn svmullb_n_s16(op1: svint8_t, op2: i8) -> svint16_t {
+    svmullb_s16(op1, svdup_n_s8(op2))
+}
+#[doc = "Multiply long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmullb[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smullb))]
+pub fn svmullb_s32(op1: svint16_t, op2: svint16_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.smullb.nxv4i32")]
+        fn _svmullb_s32(op1: svint16_t, op2: svint16_t) -> svint32_t;
+    }
+    unsafe { _svmullb_s32(op1, op2) }
+}
+#[doc = "Multiply long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmullb[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smullb))]
+pub fn svmullb_n_s32(op1: svint16_t, op2: i16) -> svint32_t {
+    svmullb_s32(op1, svdup_n_s16(op2))
+}
+#[doc = "Multiply long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmullb[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smullb))]
+pub fn svmullb_s64(op1: svint32_t, op2: svint32_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.smullb.nxv2i64")]
+        fn _svmullb_s64(op1: svint32_t, op2: svint32_t) -> svint64_t;
+    }
+    unsafe { _svmullb_s64(op1, op2) }
+}
+#[doc = "Multiply long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmullb[_n_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(smullb))]
+pub fn svmullb_n_s64(op1: svint32_t, op2: i32) -> svint64_t {
+    svmullb_s64(op1, svdup_n_s32(op2))
+}
+#[doc = "Multiply long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmullb[_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umullb))]
+pub fn svmullb_u16(op1: svuint8_t, op2: svuint8_t) -> svuint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.umullb.nxv8i16")]
+        fn _svmullb_u16(op1: svint8_t, op2: svint8_t) -> svint16_t;
+    }
+    unsafe { _svmullb_u16(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Multiply long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmullb[_n_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umullb))]
+pub fn svmullb_n_u16(op1: svuint8_t, op2: u8) -> svuint16_t {
+    svmullb_u16(op1, svdup_n_u8(op2))
+}
+#[doc = "Multiply long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmullb[_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umullb))]
+pub fn svmullb_u32(op1: svuint16_t, op2: svuint16_t) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.umullb.nxv4i32")]
+        fn _svmullb_u32(op1: svint16_t, op2: svint16_t) -> svint32_t;
+    }
+    unsafe { _svmullb_u32(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Multiply long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmullb[_n_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umullb))]
+pub fn svmullb_n_u32(op1: svuint16_t, op2: u16) -> svuint32_t {
+    svmullb_u32(op1, svdup_n_u16(op2))
+}
+#[doc = "Multiply long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmullb[_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umullb))]
+pub fn svmullb_u64(op1: svuint32_t, op2: svuint32_t) -> svuint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.umullb.nxv2i64")]
+        fn _svmullb_u64(op1: svint32_t, op2: svint32_t) -> svint64_t;
+    }
+    unsafe { _svmullb_u64(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Multiply long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmullb[_n_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(umullb))]
+pub fn svmullb_n_u64(op1: svuint32_t, op2: u32) -> svuint64_t {
+    svmullb_u64(op1, svdup_n_u32(op2))
+}
+#[doc = "Multiply long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmullt_lane[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc")),
+    assert_instr(smullt, IMM_INDEX = 0)
+)]
+pub fn svmullt_lane_s32<const IMM_INDEX: i32>(op1: svint16_t, op2: svint16_t) -> svint32_t {
+    static_assert_range!(IMM_INDEX, 0..=7);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.smullt.lane.nxv4i32"
+        )]
+        fn _svmullt_lane_s32(op1: svint16_t, op2: svint16_t, imm_index: i32) -> svint32_t;
+    }
+    unsafe { _svmullt_lane_s32(op1, op2, IMM_INDEX) }
+}
+#[doc = "Multiply long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmullt_lane[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc")),
+    assert_instr(smullt, IMM_INDEX = 0)
+)]
+pub fn svmullt_lane_s64<const IMM_INDEX: i32>(op1: svint32_t, op2: svint32_t) -> svint64_t {
+    static_assert_range!(IMM_INDEX, 0..=3);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.smullt.lane.nxv2i64"
+        )]
+        fn _svmullt_lane_s64(op1: svint32_t, op2: svint32_t, imm_index: i32) -> svint64_t;
+    }
+    unsafe { _svmullt_lane_s64(op1, op2, IMM_INDEX) }
+}
+#[doc = "Multiply long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmullt_lane[_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc")),
+    assert_instr(umullt, IMM_INDEX = 0)
+)]
+pub fn svmullt_lane_u32<const IMM_INDEX: i32>(op1: svuint16_t, op2: svuint16_t) -> svuint32_t {
+    static_assert_range!(IMM_INDEX, 0..=7);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.umullt.lane.nxv4i32"
+        )]
+        fn _svmullt_lane_u32(op1: svint16_t, op2: svint16_t, imm_index: i32) -> svint32_t;
+    }
+    unsafe { _svmullt_lane_u32(op1.as_signed(), op2.as_signed(), IMM_INDEX).as_unsigned() }
+}
+#[doc = "Multiply long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmullt_lane[_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(
+    all(test, not(target_env = "msvc")),
+    assert_instr(umullt, IMM_INDEX = 0)
+)]
+pub fn svmullt_lane_u64<const IMM_INDEX: i32>(op1: svuint32_t, op2: svuint32_t) -> svuint64_t {
+    static_assert_range!(IMM_INDEX, 0..=3);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.umullt.lane.nxv2i64"
+        )]
+        fn _svmullt_lane_u64(op1: svint32_t, op2: svint32_t, imm_index: i32) -> svint64_t;
+    }
+    unsafe { _svmullt_lane_u64(op1.as_signed(), op2.as_signed(), IMM_INDEX).as_unsigned() }
+}
+#[doc = "Multiply long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmullt[_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(smullt))]
+pub fn svmullt_s16(op1: svint8_t, op2: svint8_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.smullt.nxv8i16")]
+        fn _svmullt_s16(op1: svint8_t, op2: svint8_t) -> svint16_t;
+    }
+    unsafe { _svmullt_s16(op1, op2) }
+}
+#[doc = "Multiply long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmullt[_n_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(smullt))]
+pub fn svmullt_n_s16(op1: svint8_t, op2: i8) -> svint16_t {
+    svmullt_s16(op1, svdup_n_s8(op2))
+}
+#[doc = "Multiply long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmullt[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(smullt))]
+pub fn svmullt_s32(op1: svint16_t, op2: svint16_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.smullt.nxv4i32")]
+        fn _svmullt_s32(op1: svint16_t, op2: svint16_t) -> svint32_t;
+    }
+    unsafe { _svmullt_s32(op1, op2) }
+}
+#[doc = "Multiply long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmullt[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(smullt))]
+pub fn svmullt_n_s32(op1: svint16_t, op2: i16) -> svint32_t {
+    svmullt_s32(op1, svdup_n_s16(op2))
+}
+#[doc = "Multiply long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmullt[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(smullt))]
+pub fn svmullt_s64(op1: svint32_t, op2: svint32_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.smullt.nxv2i64")]
+        fn _svmullt_s64(op1: svint32_t, op2: svint32_t) -> svint64_t;
+    }
+    unsafe { _svmullt_s64(op1, op2) }
+}
+#[doc = "Multiply long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmullt[_n_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(smullt))]
+pub fn svmullt_n_s64(op1: svint32_t, op2: i32) -> svint64_t {
+    svmullt_s64(op1, svdup_n_s32(op2))
+}
+#[doc = "Multiply long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmullt[_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(umullt))]
+pub fn svmullt_u16(op1: svuint8_t, op2: svuint8_t) -> svuint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.umullt.nxv8i16")]
+        fn _svmullt_u16(op1: svint8_t, op2: svint8_t) -> svint16_t;
+    }
+    unsafe { _svmullt_u16(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Multiply long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmullt[_n_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(umullt))]
+pub fn svmullt_n_u16(op1: svuint8_t, op2: u8) -> svuint16_t {
+    svmullt_u16(op1, svdup_n_u8(op2))
+}
+#[doc = "Multiply long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmullt[_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(umullt))]
+pub fn svmullt_u32(op1: svuint16_t, op2: svuint16_t) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.umullt.nxv4i32")]
+        fn _svmullt_u32(op1: svint16_t, op2: svint16_t) -> svint32_t;
+    }
+    unsafe { _svmullt_u32(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Multiply long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmullt[_n_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(umullt))]
+pub fn svmullt_n_u32(op1: svuint16_t, op2: u16) -> svuint32_t {
+    svmullt_u32(op1, svdup_n_u16(op2))
+}
+#[doc = "Multiply long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmullt[_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(umullt))]
+pub fn svmullt_u64(op1: svuint32_t, op2: svuint32_t) -> svuint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.umullt.nxv2i64")]
+        fn _svmullt_u64(op1: svint32_t, op2: svint32_t) -> svint64_t;
+    }
+    unsafe { _svmullt_u64(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Multiply long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svmullt[_n_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(umullt))]
+pub fn svmullt_n_u64(op1: svuint32_t, op2: u32) -> svuint64_t {
+    svmullt_u64(op1, svdup_n_u32(op2))
+}
+#[doc = "Bitwise select"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svnbsl[_s8])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(nbsl))]
+pub fn svnbsl_s8(op1: svint8_t, op2: svint8_t, op3: svint8_t) -> svint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.nbsl.nxv16i8")]
+        fn _svnbsl_s8(op1: svint8_t, op2: svint8_t, op3: svint8_t) -> svint8_t;
+    }
+    unsafe { _svnbsl_s8(op1, op2, op3) }
+}
+#[doc = "Bitwise select"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svnbsl[_n_s8])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(nbsl))]
+pub fn svnbsl_n_s8(op1: svint8_t, op2: svint8_t, op3: i8) -> svint8_t {
+    svnbsl_s8(op1, op2, svdup_n_s8(op3))
+}
+#[doc = "Bitwise select"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svnbsl[_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(nbsl))]
+pub fn svnbsl_s16(op1: svint16_t, op2: svint16_t, op3: svint16_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.nbsl.nxv8i16")]
+        fn _svnbsl_s16(op1: svint16_t, op2: svint16_t, op3: svint16_t) -> svint16_t;
+    }
+    unsafe { _svnbsl_s16(op1, op2, op3) }
+}
+#[doc = "Bitwise select"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svnbsl[_n_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(nbsl))]
+pub fn svnbsl_n_s16(op1: svint16_t, op2: svint16_t, op3: i16) -> svint16_t {
+    svnbsl_s16(op1, op2, svdup_n_s16(op3))
+}
+#[doc = "Bitwise select"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svnbsl[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(nbsl))]
+pub fn svnbsl_s32(op1: svint32_t, op2: svint32_t, op3: svint32_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.nbsl.nxv4i32")]
+        fn _svnbsl_s32(op1: svint32_t, op2: svint32_t, op3: svint32_t) -> svint32_t;
+    }
+    unsafe { _svnbsl_s32(op1, op2, op3) }
+}
+#[doc = "Bitwise select"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svnbsl[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(nbsl))]
+pub fn svnbsl_n_s32(op1: svint32_t, op2: svint32_t, op3: i32) -> svint32_t {
+    svnbsl_s32(op1, op2, svdup_n_s32(op3))
+}
+#[doc = "Bitwise select"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svnbsl[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(nbsl))]
+pub fn svnbsl_s64(op1: svint64_t, op2: svint64_t, op3: svint64_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.nbsl.nxv2i64")]
+        fn _svnbsl_s64(op1: svint64_t, op2: svint64_t, op3: svint64_t) -> svint64_t;
+    }
+    unsafe { _svnbsl_s64(op1, op2, op3) }
+}
+#[doc = "Bitwise select"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svnbsl[_n_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(nbsl))]
+pub fn svnbsl_n_s64(op1: svint64_t, op2: svint64_t, op3: i64) -> svint64_t {
+    svnbsl_s64(op1, op2, svdup_n_s64(op3))
+}
+#[doc = "Bitwise select"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svnbsl[_u8])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(nbsl))]
+pub fn svnbsl_u8(op1: svuint8_t, op2: svuint8_t, op3: svuint8_t) -> svuint8_t {
+    unsafe { svnbsl_s8(op1.as_signed(), op2.as_signed(), op3.as_signed()).as_unsigned() }
+}
+#[doc = "Bitwise select"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svnbsl[_n_u8])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(nbsl))]
+pub fn svnbsl_n_u8(op1: svuint8_t, op2: svuint8_t, op3: u8) -> svuint8_t {
+    svnbsl_u8(op1, op2, svdup_n_u8(op3))
+}
+#[doc = "Bitwise select"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svnbsl[_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(nbsl))]
+pub fn svnbsl_u16(op1: svuint16_t, op2: svuint16_t, op3: svuint16_t) -> svuint16_t {
+    unsafe { svnbsl_s16(op1.as_signed(), op2.as_signed(), op3.as_signed()).as_unsigned() }
+}
+#[doc = "Bitwise select"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svnbsl[_n_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(nbsl))]
+pub fn svnbsl_n_u16(op1: svuint16_t, op2: svuint16_t, op3: u16) -> svuint16_t {
+    svnbsl_u16(op1, op2, svdup_n_u16(op3))
+}
+#[doc = "Bitwise select"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svnbsl[_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(nbsl))]
+pub fn svnbsl_u32(op1: svuint32_t, op2: svuint32_t, op3: svuint32_t) -> svuint32_t {
+    unsafe { svnbsl_s32(op1.as_signed(), op2.as_signed(), op3.as_signed()).as_unsigned() }
+}
+#[doc = "Bitwise select"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svnbsl[_n_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(nbsl))]
+pub fn svnbsl_n_u32(op1: svuint32_t, op2: svuint32_t, op3: u32) -> svuint32_t {
+    svnbsl_u32(op1, op2, svdup_n_u32(op3))
+}
+#[doc = "Bitwise select"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svnbsl[_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(nbsl))]
+pub fn svnbsl_u64(op1: svuint64_t, op2: svuint64_t, op3: svuint64_t) -> svuint64_t {
+    unsafe { svnbsl_s64(op1.as_signed(), op2.as_signed(), op3.as_signed()).as_unsigned() }
+}
+#[doc = "Bitwise select"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svnbsl[_n_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(nbsl))]
+pub fn svnbsl_n_u64(op1: svuint64_t, op2: svuint64_t, op3: u64) -> svuint64_t {
+    svnbsl_u64(op1, op2, svdup_n_u64(op3))
+}
+#[doc = "Detect no matching elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svnmatch[_s8])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(nmatch))]
+pub fn svnmatch_s8(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.nmatch.nxv16i8")]
+        fn _svnmatch_s8(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svbool_t;
+    }
+    unsafe { _svnmatch_s8(pg, op1, op2) }
+}
+#[doc = "Detect no matching elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svnmatch[_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(nmatch))]
+pub fn svnmatch_s16(pg: svbool_t, op1: svint16_t, op2: svint16_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.nmatch.nxv8i16")]
+        fn _svnmatch_s16(pg: svbool8_t, op1: svint16_t, op2: svint16_t) -> svbool8_t;
+    }
+    unsafe { _svnmatch_s16(pg.sve_into(), op1, op2).sve_into() }
+}
+#[doc = "Detect no matching elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svnmatch[_u8])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(nmatch))]
+pub fn svnmatch_u8(pg: svbool_t, op1: svuint8_t, op2: svuint8_t) -> svbool_t {
+    unsafe { svnmatch_s8(pg, op1.as_signed(), op2.as_signed()) }
+}
+#[doc = "Detect no matching elements"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svnmatch[_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(nmatch))]
+pub fn svnmatch_u16(pg: svbool_t, op1: svuint16_t, op2: svuint16_t) -> svbool_t {
+    unsafe { svnmatch_s16(pg, op1.as_signed(), op2.as_signed()) }
+}
+#[doc = "Polynomial multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svpmul[_u8])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(pmul))]
+pub fn svpmul_u8(op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.pmul.nxv16i8")]
+        fn _svpmul_u8(op1: svint8_t, op2: svint8_t) -> svint8_t;
+    }
+    unsafe { _svpmul_u8(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Polynomial multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svpmul[_n_u8])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(pmul))]
+pub fn svpmul_n_u8(op1: svuint8_t, op2: u8) -> svuint8_t {
+    svpmul_u8(op1, svdup_n_u8(op2))
+}
+#[doc = "Polynomial multiply long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svpmullb_pair[_u8])"]
+#[inline]
+#[target_feature(enable = "sve,sve2,sve2-aes")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(pmullb))]
+pub fn svpmullb_pair_u8(op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.pmullb.pair.nxv16i8"
+        )]
+        fn _svpmullb_pair_u8(op1: svint8_t, op2: svint8_t) -> svint8_t;
+    }
+    unsafe { _svpmullb_pair_u8(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Polynomial multiply long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svpmullb_pair[_n_u8])"]
+#[inline]
+#[target_feature(enable = "sve,sve2,sve2-aes")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(pmullb))]
+pub fn svpmullb_pair_n_u8(op1: svuint8_t, op2: u8) -> svuint8_t {
+    svpmullb_pair_u8(op1, svdup_n_u8(op2))
+}
+#[doc = "Polynomial multiply long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svpmullb_pair[_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2,sve2-aes")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(pmullb))]
+pub fn svpmullb_pair_u32(op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.pmullb.pair.nxv4i32"
+        )]
+        fn _svpmullb_pair_u32(op1: svint32_t, op2: svint32_t) -> svint32_t;
+    }
+    unsafe { _svpmullb_pair_u32(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Polynomial multiply long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svpmullb_pair[_n_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2,sve2-aes")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(pmullb))]
+pub fn svpmullb_pair_n_u32(op1: svuint32_t, op2: u32) -> svuint32_t {
+    svpmullb_pair_u32(op1, svdup_n_u32(op2))
+}
+#[doc = "Polynomial multiply long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svpmullb_pair[_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2,sve2-aes")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(pmullb))]
+pub fn svpmullb_pair_u64(op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.pmullb.pair.nxv2i64"
+        )]
+        fn _svpmullb_pair_u64(op1: svint64_t, op2: svint64_t) -> svint64_t;
+    }
+    unsafe { _svpmullb_pair_u64(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Polynomial multiply long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svpmullb_pair[_n_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2,sve2-aes")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(pmullb))]
+pub fn svpmullb_pair_n_u64(op1: svuint64_t, op2: u64) -> svuint64_t {
+    svpmullb_pair_u64(op1, svdup_n_u64(op2))
+}
+#[doc = "Polynomial multiply long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svpmullb[_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2,sve2-aes")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(pmullb))]
+pub fn svpmullb_u16(op1: svuint8_t, op2: svuint8_t) -> svuint16_t {
+    unsafe { crate::intrinsics::transmute_unchecked(svpmullb_pair_u8(op1, op2)) }
+}
+#[doc = "Polynomial multiply long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svpmullb[_n_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2,sve2-aes")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(pmullb))]
+pub fn svpmullb_n_u16(op1: svuint8_t, op2: u8) -> svuint16_t {
+    svpmullb_u16(op1, svdup_n_u8(op2))
+}
+#[doc = "Polynomial multiply long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svpmullb[_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2,sve2-aes")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(pmullb))]
+pub fn svpmullb_u64(op1: svuint32_t, op2: svuint32_t) -> svuint64_t {
+    unsafe { crate::intrinsics::transmute_unchecked(svpmullb_pair_u32(op1, op2)) }
+}
+#[doc = "Polynomial multiply long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svpmullb[_n_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2,sve2-aes")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(pmullb))]
+pub fn svpmullb_n_u64(op1: svuint32_t, op2: u32) -> svuint64_t {
+    svpmullb_u64(op1, svdup_n_u32(op2))
+}
+#[doc = "Polynomial multiply long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svpmullt_pair[_u8])"]
+#[inline]
+#[target_feature(enable = "sve,sve2,sve2-aes")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(pmullt))]
+pub fn svpmullt_pair_u8(op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.pmullt.pair.nxv16i8"
+        )]
+        fn _svpmullt_pair_u8(op1: svint8_t, op2: svint8_t) -> svint8_t;
+    }
+    unsafe { _svpmullt_pair_u8(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Polynomial multiply long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svpmullt_pair[_n_u8])"]
+#[inline]
+#[target_feature(enable = "sve,sve2,sve2-aes")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(pmullt))]
+pub fn svpmullt_pair_n_u8(op1: svuint8_t, op2: u8) -> svuint8_t {
+    svpmullt_pair_u8(op1, svdup_n_u8(op2))
+}
+#[doc = "Polynomial multiply long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svpmullt_pair[_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2,sve2-aes")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(pmullt))]
+pub fn svpmullt_pair_u32(op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.pmullt.pair.nxv4i32"
+        )]
+        fn _svpmullt_pair_u32(op1: svint32_t, op2: svint32_t) -> svint32_t;
+    }
+    unsafe { _svpmullt_pair_u32(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Polynomial multiply long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svpmullt_pair[_n_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2,sve2-aes")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(pmullt))]
+pub fn svpmullt_pair_n_u32(op1: svuint32_t, op2: u32) -> svuint32_t {
+    svpmullt_pair_u32(op1, svdup_n_u32(op2))
+}
+#[doc = "Polynomial multiply long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svpmullt_pair[_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2,sve2-aes")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(pmullt))]
+pub fn svpmullt_pair_u64(op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.pmullt.pair.nxv2i64"
+        )]
+        fn _svpmullt_pair_u64(op1: svint64_t, op2: svint64_t) -> svint64_t;
+    }
+    unsafe { _svpmullt_pair_u64(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Polynomial multiply long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svpmullt_pair[_n_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2,sve2-aes")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(pmullt))]
+pub fn svpmullt_pair_n_u64(op1: svuint64_t, op2: u64) -> svuint64_t {
+    svpmullt_pair_u64(op1, svdup_n_u64(op2))
+}
+#[doc = "Polynomial multiply long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svpmullt[_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2,sve2-aes")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(pmullt))]
+pub fn svpmullt_u16(op1: svuint8_t, op2: svuint8_t) -> svuint16_t {
+    unsafe { crate::intrinsics::transmute_unchecked(svpmullt_pair_u8(op1, op2)) }
+}
+#[doc = "Polynomial multiply long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svpmullt[_n_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2,sve2-aes")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(pmullt))]
+pub fn svpmullt_n_u16(op1: svuint8_t, op2: u8) -> svuint16_t {
+    svpmullt_u16(op1, svdup_n_u8(op2))
+}
+#[doc = "Polynomial multiply long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svpmullt[_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2,sve2-aes")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(pmullt))]
+pub fn svpmullt_u64(op1: svuint32_t, op2: svuint32_t) -> svuint64_t {
+    unsafe { crate::intrinsics::transmute_unchecked(svpmullt_pair_u32(op1, op2)) }
+}
+#[doc = "Polynomial multiply long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svpmullt[_n_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2,sve2-aes")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(pmullt))]
+pub fn svpmullt_n_u64(op1: svuint32_t, op2: u32) -> svuint64_t {
+    svpmullt_u64(op1, svdup_n_u32(op2))
+}
+#[doc = "Saturating absolute value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqabs[_s8]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqabs))]
+pub fn svqabs_s8_m(inactive: svint8_t, pg: svbool_t, op: svint8_t) -> svint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sqabs.nxv16i8")]
+        fn _svqabs_s8_m(inactive: svint8_t, pg: svbool_t, op: svint8_t) -> svint8_t;
+    }
+    unsafe { _svqabs_s8_m(inactive, pg, op) }
+}
+#[doc = "Saturating absolute value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqabs[_s8]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqabs))]
+pub fn svqabs_s8_x(pg: svbool_t, op: svint8_t) -> svint8_t {
+    svqabs_s8_m(op, pg, op)
+}
+#[doc = "Saturating absolute value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqabs[_s8]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqabs))]
+pub fn svqabs_s8_z(pg: svbool_t, op: svint8_t) -> svint8_t {
+    svqabs_s8_m(svdup_n_s8(0), pg, op)
+}
+#[doc = "Saturating absolute value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqabs[_s16]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqabs))]
+pub fn svqabs_s16_m(inactive: svint16_t, pg: svbool_t, op: svint16_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sqabs.nxv8i16")]
+        fn _svqabs_s16_m(inactive: svint16_t, pg: svbool8_t, op: svint16_t) -> svint16_t;
+    }
+    unsafe { _svqabs_s16_m(inactive, pg.sve_into(), op) }
+}
+#[doc = "Saturating absolute value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqabs[_s16]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqabs))]
+pub fn svqabs_s16_x(pg: svbool_t, op: svint16_t) -> svint16_t {
+    svqabs_s16_m(op, pg, op)
+}
+#[doc = "Saturating absolute value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqabs[_s16]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqabs))]
+pub fn svqabs_s16_z(pg: svbool_t, op: svint16_t) -> svint16_t {
+    svqabs_s16_m(svdup_n_s16(0), pg, op)
+}
+#[doc = "Saturating absolute value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqabs[_s32]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqabs))]
+pub fn svqabs_s32_m(inactive: svint32_t, pg: svbool_t, op: svint32_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sqabs.nxv4i32")]
+        fn _svqabs_s32_m(inactive: svint32_t, pg: svbool4_t, op: svint32_t) -> svint32_t;
+    }
+    unsafe { _svqabs_s32_m(inactive, pg.sve_into(), op) }
+}
+#[doc = "Saturating absolute value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqabs[_s32]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqabs))]
+pub fn svqabs_s32_x(pg: svbool_t, op: svint32_t) -> svint32_t {
+    svqabs_s32_m(op, pg, op)
+}
+#[doc = "Saturating absolute value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqabs[_s32]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqabs))]
+pub fn svqabs_s32_z(pg: svbool_t, op: svint32_t) -> svint32_t {
+    svqabs_s32_m(svdup_n_s32(0), pg, op)
+}
+#[doc = "Saturating absolute value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqabs[_s64]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqabs))]
+pub fn svqabs_s64_m(inactive: svint64_t, pg: svbool_t, op: svint64_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sqabs.nxv2i64")]
+        fn _svqabs_s64_m(inactive: svint64_t, pg: svbool2_t, op: svint64_t) -> svint64_t;
+    }
+    unsafe { _svqabs_s64_m(inactive, pg.sve_into(), op) }
+}
+#[doc = "Saturating absolute value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqabs[_s64]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqabs))]
+pub fn svqabs_s64_x(pg: svbool_t, op: svint64_t) -> svint64_t {
+    svqabs_s64_m(op, pg, op)
+}
+#[doc = "Saturating absolute value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqabs[_s64]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqabs))]
+pub fn svqabs_s64_z(pg: svbool_t, op: svint64_t) -> svint64_t {
+    svqabs_s64_m(svdup_n_s64(0), pg, op)
+}
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqadd[_s8]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqadd))]
+pub fn svqadd_s8_m(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sqadd.nxv16i8")]
+        fn _svqadd_s8_m(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t;
+    }
+    unsafe { _svqadd_s8_m(pg, op1, op2) }
+}
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqadd[_n_s8]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqadd))]
+pub fn svqadd_n_s8_m(pg: svbool_t, op1: svint8_t, op2: i8) -> svint8_t {
+    svqadd_s8_m(pg, op1, svdup_n_s8(op2))
+}
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqadd[_s8]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqadd))]
+pub fn svqadd_s8_x(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t {
+    svqadd_s8_m(pg, op1, op2)
+}
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqadd[_n_s8]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqadd))]
+pub fn svqadd_n_s8_x(pg: svbool_t, op1: svint8_t, op2: i8) -> svint8_t {
+    svqadd_s8_x(pg, op1, svdup_n_s8(op2))
+}
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqadd[_s8]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqadd))]
+pub fn svqadd_s8_z(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t {
+    svqadd_s8_m(pg, svsel_s8(pg, op1, svdup_n_s8(0)), op2)
+}
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqadd[_n_s8]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqadd))]
+pub fn svqadd_n_s8_z(pg: svbool_t, op1: svint8_t, op2: i8) -> svint8_t {
+    svqadd_s8_z(pg, op1, svdup_n_s8(op2))
+}
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqadd[_s16]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqadd))]
+pub fn svqadd_s16_m(pg: svbool_t, op1: svint16_t, op2: svint16_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sqadd.nxv8i16")]
+        fn _svqadd_s16_m(pg: svbool8_t, op1: svint16_t, op2: svint16_t) -> svint16_t;
+    }
+    unsafe { _svqadd_s16_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqadd[_n_s16]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqadd))]
+pub fn svqadd_n_s16_m(pg: svbool_t, op1: svint16_t, op2: i16) -> svint16_t {
+    svqadd_s16_m(pg, op1, svdup_n_s16(op2))
+}
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqadd[_s16]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqadd))]
+pub fn svqadd_s16_x(pg: svbool_t, op1: svint16_t, op2: svint16_t) -> svint16_t {
+    svqadd_s16_m(pg, op1, op2)
+}
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqadd[_n_s16]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqadd))]
+pub fn svqadd_n_s16_x(pg: svbool_t, op1: svint16_t, op2: i16) -> svint16_t {
+    svqadd_s16_x(pg, op1, svdup_n_s16(op2))
+}
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqadd[_s16]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqadd))]
+pub fn svqadd_s16_z(pg: svbool_t, op1: svint16_t, op2: svint16_t) -> svint16_t {
+    svqadd_s16_m(pg, svsel_s16(pg, op1, svdup_n_s16(0)), op2)
+}
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqadd[_n_s16]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqadd))]
+pub fn svqadd_n_s16_z(pg: svbool_t, op1: svint16_t, op2: i16) -> svint16_t {
+    svqadd_s16_z(pg, op1, svdup_n_s16(op2))
+}
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqadd[_s32]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqadd))]
+pub fn svqadd_s32_m(pg: svbool_t, op1: svint32_t, op2: svint32_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sqadd.nxv4i32")]
+        fn _svqadd_s32_m(pg: svbool4_t, op1: svint32_t, op2: svint32_t) -> svint32_t;
+    }
+    unsafe { _svqadd_s32_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqadd[_n_s32]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqadd))]
+pub fn svqadd_n_s32_m(pg: svbool_t, op1: svint32_t, op2: i32) -> svint32_t {
+    svqadd_s32_m(pg, op1, svdup_n_s32(op2))
+}
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqadd[_s32]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqadd))]
+pub fn svqadd_s32_x(pg: svbool_t, op1: svint32_t, op2: svint32_t) -> svint32_t {
+    svqadd_s32_m(pg, op1, op2)
+}
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqadd[_n_s32]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqadd))]
+pub fn svqadd_n_s32_x(pg: svbool_t, op1: svint32_t, op2: i32) -> svint32_t {
+    svqadd_s32_x(pg, op1, svdup_n_s32(op2))
+}
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqadd[_s32]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqadd))]
+pub fn svqadd_s32_z(pg: svbool_t, op1: svint32_t, op2: svint32_t) -> svint32_t {
+    svqadd_s32_m(pg, svsel_s32(pg, op1, svdup_n_s32(0)), op2)
+}
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqadd[_n_s32]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqadd))]
+pub fn svqadd_n_s32_z(pg: svbool_t, op1: svint32_t, op2: i32) -> svint32_t {
+    svqadd_s32_z(pg, op1, svdup_n_s32(op2))
+}
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqadd[_s64]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqadd))]
+pub fn svqadd_s64_m(pg: svbool_t, op1: svint64_t, op2: svint64_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sqadd.nxv2i64")]
+        fn _svqadd_s64_m(pg: svbool2_t, op1: svint64_t, op2: svint64_t) -> svint64_t;
+    }
+    unsafe { _svqadd_s64_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqadd[_n_s64]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqadd))]
+pub fn svqadd_n_s64_m(pg: svbool_t, op1: svint64_t, op2: i64) -> svint64_t {
+    svqadd_s64_m(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqadd[_s64]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqadd))]
+pub fn svqadd_s64_x(pg: svbool_t, op1: svint64_t, op2: svint64_t) -> svint64_t {
+    svqadd_s64_m(pg, op1, op2)
+}
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqadd[_n_s64]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqadd))]
+pub fn svqadd_n_s64_x(pg: svbool_t, op1: svint64_t, op2: i64) -> svint64_t {
+    svqadd_s64_x(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqadd[_s64]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqadd))]
+pub fn svqadd_s64_z(pg: svbool_t, op1: svint64_t, op2: svint64_t) -> svint64_t {
+    svqadd_s64_m(pg, svsel_s64(pg, op1, svdup_n_s64(0)), op2)
+}
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqadd[_n_s64]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqadd))]
+pub fn svqadd_n_s64_z(pg: svbool_t, op1: svint64_t, op2: i64) -> svint64_t {
+    svqadd_s64_z(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqadd[_u8]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqadd))]
+pub fn svqadd_u8_m(pg: svbool_t, op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uqadd.nxv16i8")]
+        fn _svqadd_u8_m(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t;
+    }
+    unsafe { _svqadd_u8_m(pg, op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqadd[_n_u8]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqadd))]
+pub fn svqadd_n_u8_m(pg: svbool_t, op1: svuint8_t, op2: u8) -> svuint8_t {
+    svqadd_u8_m(pg, op1, svdup_n_u8(op2))
+}
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqadd[_u8]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqadd))]
+pub fn svqadd_u8_x(pg: svbool_t, op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    svqadd_u8_m(pg, op1, op2)
+}
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqadd[_n_u8]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqadd))]
+pub fn svqadd_n_u8_x(pg: svbool_t, op1: svuint8_t, op2: u8) -> svuint8_t {
+    svqadd_u8_x(pg, op1, svdup_n_u8(op2))
+}
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqadd[_u8]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqadd))]
+pub fn svqadd_u8_z(pg: svbool_t, op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    svqadd_u8_m(pg, svsel_u8(pg, op1, svdup_n_u8(0)), op2)
+}
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqadd[_n_u8]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqadd))]
+pub fn svqadd_n_u8_z(pg: svbool_t, op1: svuint8_t, op2: u8) -> svuint8_t {
+    svqadd_u8_z(pg, op1, svdup_n_u8(op2))
+}
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqadd[_u16]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqadd))]
+pub fn svqadd_u16_m(pg: svbool_t, op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uqadd.nxv8i16")]
+        fn _svqadd_u16_m(pg: svbool8_t, op1: svint16_t, op2: svint16_t) -> svint16_t;
+    }
+    unsafe { _svqadd_u16_m(pg.sve_into(), op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqadd[_n_u16]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqadd))]
+pub fn svqadd_n_u16_m(pg: svbool_t, op1: svuint16_t, op2: u16) -> svuint16_t {
+    svqadd_u16_m(pg, op1, svdup_n_u16(op2))
+}
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqadd[_u16]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqadd))]
+pub fn svqadd_u16_x(pg: svbool_t, op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    svqadd_u16_m(pg, op1, op2)
+}
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqadd[_n_u16]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqadd))]
+pub fn svqadd_n_u16_x(pg: svbool_t, op1: svuint16_t, op2: u16) -> svuint16_t {
+    svqadd_u16_x(pg, op1, svdup_n_u16(op2))
+}
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqadd[_u16]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqadd))]
+pub fn svqadd_u16_z(pg: svbool_t, op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    svqadd_u16_m(pg, svsel_u16(pg, op1, svdup_n_u16(0)), op2)
+}
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqadd[_n_u16]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqadd))]
+pub fn svqadd_n_u16_z(pg: svbool_t, op1: svuint16_t, op2: u16) -> svuint16_t {
+    svqadd_u16_z(pg, op1, svdup_n_u16(op2))
+}
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqadd[_u32]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqadd))]
+pub fn svqadd_u32_m(pg: svbool_t, op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uqadd.nxv4i32")]
+        fn _svqadd_u32_m(pg: svbool4_t, op1: svint32_t, op2: svint32_t) -> svint32_t;
+    }
+    unsafe { _svqadd_u32_m(pg.sve_into(), op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqadd[_n_u32]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqadd))]
+pub fn svqadd_n_u32_m(pg: svbool_t, op1: svuint32_t, op2: u32) -> svuint32_t {
+    svqadd_u32_m(pg, op1, svdup_n_u32(op2))
+}
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqadd[_u32]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqadd))]
+pub fn svqadd_u32_x(pg: svbool_t, op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    svqadd_u32_m(pg, op1, op2)
+}
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqadd[_n_u32]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqadd))]
+pub fn svqadd_n_u32_x(pg: svbool_t, op1: svuint32_t, op2: u32) -> svuint32_t {
+    svqadd_u32_x(pg, op1, svdup_n_u32(op2))
+}
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqadd[_u32]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqadd))]
+pub fn svqadd_u32_z(pg: svbool_t, op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    svqadd_u32_m(pg, svsel_u32(pg, op1, svdup_n_u32(0)), op2)
+}
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqadd[_n_u32]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqadd))]
+pub fn svqadd_n_u32_z(pg: svbool_t, op1: svuint32_t, op2: u32) -> svuint32_t {
+    svqadd_u32_z(pg, op1, svdup_n_u32(op2))
+}
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqadd[_u64]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqadd))]
+pub fn svqadd_u64_m(pg: svbool_t, op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uqadd.nxv2i64")]
+        fn _svqadd_u64_m(pg: svbool2_t, op1: svint64_t, op2: svint64_t) -> svint64_t;
+    }
+    unsafe { _svqadd_u64_m(pg.sve_into(), op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqadd[_n_u64]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqadd))]
+pub fn svqadd_n_u64_m(pg: svbool_t, op1: svuint64_t, op2: u64) -> svuint64_t {
+    svqadd_u64_m(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqadd[_u64]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqadd))]
+pub fn svqadd_u64_x(pg: svbool_t, op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    svqadd_u64_m(pg, op1, op2)
+}
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqadd[_n_u64]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqadd))]
+pub fn svqadd_n_u64_x(pg: svbool_t, op1: svuint64_t, op2: u64) -> svuint64_t {
+    svqadd_u64_x(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqadd[_u64]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqadd))]
+pub fn svqadd_u64_z(pg: svbool_t, op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    svqadd_u64_m(pg, svsel_u64(pg, op1, svdup_n_u64(0)), op2)
+}
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqadd[_n_u64]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqadd))]
+pub fn svqadd_n_u64_z(pg: svbool_t, op1: svuint64_t, op2: u64) -> svuint64_t {
+    svqadd_u64_z(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Saturating complex add with rotate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqcadd[_s8])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqcadd, IMM_ROTATION = 90))]
+pub fn svqcadd_s8<const IMM_ROTATION: i32>(op1: svint8_t, op2: svint8_t) -> svint8_t {
+    static_assert!(IMM_ROTATION == 90 || IMM_ROTATION == 270);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqcadd.x.nxv16i8"
+        )]
+        fn _svqcadd_s8(op1: svint8_t, op2: svint8_t, imm_rotation: i32) -> svint8_t;
+    }
+    unsafe { _svqcadd_s8(op1, op2, IMM_ROTATION) }
+}
+#[doc = "Saturating complex add with rotate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqcadd[_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqcadd, IMM_ROTATION = 90))]
+pub fn svqcadd_s16<const IMM_ROTATION: i32>(op1: svint16_t, op2: svint16_t) -> svint16_t {
+    static_assert!(IMM_ROTATION == 90 || IMM_ROTATION == 270);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqcadd.x.nxv8i16"
+        )]
+        fn _svqcadd_s16(op1: svint16_t, op2: svint16_t, imm_rotation: i32) -> svint16_t;
+    }
+    unsafe { _svqcadd_s16(op1, op2, IMM_ROTATION) }
+}
+#[doc = "Saturating complex add with rotate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqcadd[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqcadd, IMM_ROTATION = 90))]
+pub fn svqcadd_s32<const IMM_ROTATION: i32>(op1: svint32_t, op2: svint32_t) -> svint32_t {
+    static_assert!(IMM_ROTATION == 90 || IMM_ROTATION == 270);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqcadd.x.nxv4i32"
+        )]
+        fn _svqcadd_s32(op1: svint32_t, op2: svint32_t, imm_rotation: i32) -> svint32_t;
+    }
+    unsafe { _svqcadd_s32(op1, op2, IMM_ROTATION) }
+}
+#[doc = "Saturating complex add with rotate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqcadd[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqcadd, IMM_ROTATION = 90))]
+pub fn svqcadd_s64<const IMM_ROTATION: i32>(op1: svint64_t, op2: svint64_t) -> svint64_t {
+    static_assert!(IMM_ROTATION == 90 || IMM_ROTATION == 270);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqcadd.x.nxv2i64"
+        )]
+        fn _svqcadd_s64(op1: svint64_t, op2: svint64_t, imm_rotation: i32) -> svint64_t;
+    }
+    unsafe { _svqcadd_s64(op1, op2, IMM_ROTATION) }
+}
+#[doc = "Saturating doubling multiply-add long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdmlalb_lane[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqdmlalb, IMM_INDEX = 0))]
+pub fn svqdmlalb_lane_s32<const IMM_INDEX: i32>(
+    op1: svint32_t,
+    op2: svint16_t,
+    op3: svint16_t,
+) -> svint32_t {
+    static_assert_range!(IMM_INDEX, 0..=7);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqdmlalb.lane.nxv4i32"
+        )]
+        fn _svqdmlalb_lane_s32(
+            op1: svint32_t,
+            op2: svint16_t,
+            op3: svint16_t,
+            IMM_INDEX: i32,
+        ) -> svint32_t;
+    }
+    unsafe { _svqdmlalb_lane_s32(op1, op2, op3, IMM_INDEX) }
+}
+#[doc = "Saturating doubling multiply-add long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdmlalb_lane[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqdmlalb, IMM_INDEX = 0))]
+pub fn svqdmlalb_lane_s64<const IMM_INDEX: i32>(
+    op1: svint64_t,
+    op2: svint32_t,
+    op3: svint32_t,
+) -> svint64_t {
+    static_assert_range!(IMM_INDEX, 0..=3);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqdmlalb.lane.nxv2i64"
+        )]
+        fn _svqdmlalb_lane_s64(
+            op1: svint64_t,
+            op2: svint32_t,
+            op3: svint32_t,
+            IMM_INDEX: i32,
+        ) -> svint64_t;
+    }
+    unsafe { _svqdmlalb_lane_s64(op1, op2, op3, IMM_INDEX) }
+}
+#[doc = "Saturating doubling multiply-add long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdmlalb[_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqdmlalb))]
+pub fn svqdmlalb_s16(op1: svint16_t, op2: svint8_t, op3: svint8_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqdmlalb.nxv8i16"
+        )]
+        fn _svqdmlalb_s16(op1: svint16_t, op2: svint8_t, op3: svint8_t) -> svint16_t;
+    }
+    unsafe { _svqdmlalb_s16(op1, op2, op3) }
+}
+#[doc = "Saturating doubling multiply-add long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdmlalb[_n_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqdmlalb))]
+pub fn svqdmlalb_n_s16(op1: svint16_t, op2: svint8_t, op3: i8) -> svint16_t {
+    svqdmlalb_s16(op1, op2, svdup_n_s8(op3))
+}
+#[doc = "Saturating doubling multiply-add long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdmlalb[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqdmlalb))]
+pub fn svqdmlalb_s32(op1: svint32_t, op2: svint16_t, op3: svint16_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqdmlalb.nxv4i32"
+        )]
+        fn _svqdmlalb_s32(op1: svint32_t, op2: svint16_t, op3: svint16_t) -> svint32_t;
+    }
+    unsafe { _svqdmlalb_s32(op1, op2, op3) }
+}
+#[doc = "Saturating doubling multiply-add long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdmlalb[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqdmlalb))]
+pub fn svqdmlalb_n_s32(op1: svint32_t, op2: svint16_t, op3: i16) -> svint32_t {
+    svqdmlalb_s32(op1, op2, svdup_n_s16(op3))
+}
+#[doc = "Saturating doubling multiply-add long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdmlalb[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqdmlalb))]
+pub fn svqdmlalb_s64(op1: svint64_t, op2: svint32_t, op3: svint32_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqdmlalb.nxv2i64"
+        )]
+        fn _svqdmlalb_s64(op1: svint64_t, op2: svint32_t, op3: svint32_t) -> svint64_t;
+    }
+    unsafe { _svqdmlalb_s64(op1, op2, op3) }
+}
+#[doc = "Saturating doubling multiply-add long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdmlalb[_n_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqdmlalb))]
+pub fn svqdmlalb_n_s64(op1: svint64_t, op2: svint32_t, op3: i32) -> svint64_t {
+    svqdmlalb_s64(op1, op2, svdup_n_s32(op3))
+}
+#[doc = "Saturating doubling multiply-add long (bottom × top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdmlalbt[_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqdmlalbt))]
+pub fn svqdmlalbt_s16(op1: svint16_t, op2: svint8_t, op3: svint8_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqdmlalbt.nxv8i16"
+        )]
+        fn _svqdmlalbt_s16(op1: svint16_t, op2: svint8_t, op3: svint8_t) -> svint16_t;
+    }
+    unsafe { _svqdmlalbt_s16(op1, op2, op3) }
+}
+#[doc = "Saturating doubling multiply-add long (bottom × top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdmlalbt[_n_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqdmlalbt))]
+pub fn svqdmlalbt_n_s16(op1: svint16_t, op2: svint8_t, op3: i8) -> svint16_t {
+    svqdmlalbt_s16(op1, op2, svdup_n_s8(op3))
+}
+#[doc = "Saturating doubling multiply-add long (bottom × top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdmlalbt[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqdmlalbt))]
+pub fn svqdmlalbt_s32(op1: svint32_t, op2: svint16_t, op3: svint16_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqdmlalbt.nxv4i32"
+        )]
+        fn _svqdmlalbt_s32(op1: svint32_t, op2: svint16_t, op3: svint16_t) -> svint32_t;
+    }
+    unsafe { _svqdmlalbt_s32(op1, op2, op3) }
+}
+#[doc = "Saturating doubling multiply-add long (bottom × top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdmlalbt[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqdmlalbt))]
+pub fn svqdmlalbt_n_s32(op1: svint32_t, op2: svint16_t, op3: i16) -> svint32_t {
+    svqdmlalbt_s32(op1, op2, svdup_n_s16(op3))
+}
+#[doc = "Saturating doubling multiply-add long (bottom × top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdmlalbt[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqdmlalbt))]
+pub fn svqdmlalbt_s64(op1: svint64_t, op2: svint32_t, op3: svint32_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqdmlalbt.nxv2i64"
+        )]
+        fn _svqdmlalbt_s64(op1: svint64_t, op2: svint32_t, op3: svint32_t) -> svint64_t;
+    }
+    unsafe { _svqdmlalbt_s64(op1, op2, op3) }
+}
+#[doc = "Saturating doubling multiply-add long (bottom × top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdmlalbt[_n_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqdmlalbt))]
+pub fn svqdmlalbt_n_s64(op1: svint64_t, op2: svint32_t, op3: i32) -> svint64_t {
+    svqdmlalbt_s64(op1, op2, svdup_n_s32(op3))
+}
+#[doc = "Saturating doubling multiply-add long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdmlalt_lane[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqdmlalt, IMM_INDEX = 0))]
+pub fn svqdmlalt_lane_s32<const IMM_INDEX: i32>(
+    op1: svint32_t,
+    op2: svint16_t,
+    op3: svint16_t,
+) -> svint32_t {
+    static_assert_range!(IMM_INDEX, 0..=7);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqdmlalt.lane.nxv4i32"
+        )]
+        fn _svqdmlalt_lane_s32(
+            op1: svint32_t,
+            op2: svint16_t,
+            op3: svint16_t,
+            IMM_INDEX: i32,
+        ) -> svint32_t;
+    }
+    unsafe { _svqdmlalt_lane_s32(op1, op2, op3, IMM_INDEX) }
+}
+#[doc = "Saturating doubling multiply-add long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdmlalt_lane[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqdmlalt, IMM_INDEX = 0))]
+pub fn svqdmlalt_lane_s64<const IMM_INDEX: i32>(
+    op1: svint64_t,
+    op2: svint32_t,
+    op3: svint32_t,
+) -> svint64_t {
+    static_assert_range!(IMM_INDEX, 0..=3);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqdmlalt.lane.nxv2i64"
+        )]
+        fn _svqdmlalt_lane_s64(
+            op1: svint64_t,
+            op2: svint32_t,
+            op3: svint32_t,
+            IMM_INDEX: i32,
+        ) -> svint64_t;
+    }
+    unsafe { _svqdmlalt_lane_s64(op1, op2, op3, IMM_INDEX) }
+}
+#[doc = "Saturating doubling multiply-add long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdmlalt[_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqdmlalt))]
+pub fn svqdmlalt_s16(op1: svint16_t, op2: svint8_t, op3: svint8_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqdmlalt.nxv8i16"
+        )]
+        fn _svqdmlalt_s16(op1: svint16_t, op2: svint8_t, op3: svint8_t) -> svint16_t;
+    }
+    unsafe { _svqdmlalt_s16(op1, op2, op3) }
+}
+#[doc = "Saturating doubling multiply-add long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdmlalt[_n_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqdmlalt))]
+pub fn svqdmlalt_n_s16(op1: svint16_t, op2: svint8_t, op3: i8) -> svint16_t {
+    svqdmlalt_s16(op1, op2, svdup_n_s8(op3))
+}
+#[doc = "Saturating doubling multiply-add long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdmlalt[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqdmlalt))]
+pub fn svqdmlalt_s32(op1: svint32_t, op2: svint16_t, op3: svint16_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqdmlalt.nxv4i32"
+        )]
+        fn _svqdmlalt_s32(op1: svint32_t, op2: svint16_t, op3: svint16_t) -> svint32_t;
+    }
+    unsafe { _svqdmlalt_s32(op1, op2, op3) }
+}
+#[doc = "Saturating doubling multiply-add long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdmlalt[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqdmlalt))]
+pub fn svqdmlalt_n_s32(op1: svint32_t, op2: svint16_t, op3: i16) -> svint32_t {
+    svqdmlalt_s32(op1, op2, svdup_n_s16(op3))
+}
+#[doc = "Saturating doubling multiply-add long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdmlalt[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqdmlalt))]
+pub fn svqdmlalt_s64(op1: svint64_t, op2: svint32_t, op3: svint32_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqdmlalt.nxv2i64"
+        )]
+        fn _svqdmlalt_s64(op1: svint64_t, op2: svint32_t, op3: svint32_t) -> svint64_t;
+    }
+    unsafe { _svqdmlalt_s64(op1, op2, op3) }
+}
+#[doc = "Saturating doubling multiply-add long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdmlalt[_n_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqdmlalt))]
+pub fn svqdmlalt_n_s64(op1: svint64_t, op2: svint32_t, op3: i32) -> svint64_t {
+    svqdmlalt_s64(op1, op2, svdup_n_s32(op3))
+}
+#[doc = "Saturating doubling multiply-subtract long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdmlslb_lane[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqdmlslb, IMM_INDEX = 0))]
+pub fn svqdmlslb_lane_s32<const IMM_INDEX: i32>(
+    op1: svint32_t,
+    op2: svint16_t,
+    op3: svint16_t,
+) -> svint32_t {
+    static_assert_range!(IMM_INDEX, 0..=7);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqdmlslb.lane.nxv4i32"
+        )]
+        fn _svqdmlslb_lane_s32(
+            op1: svint32_t,
+            op2: svint16_t,
+            op3: svint16_t,
+            IMM_INDEX: i32,
+        ) -> svint32_t;
+    }
+    unsafe { _svqdmlslb_lane_s32(op1, op2, op3, IMM_INDEX) }
+}
+#[doc = "Saturating doubling multiply-subtract long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdmlslb_lane[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqdmlslb, IMM_INDEX = 0))]
+pub fn svqdmlslb_lane_s64<const IMM_INDEX: i32>(
+    op1: svint64_t,
+    op2: svint32_t,
+    op3: svint32_t,
+) -> svint64_t {
+    static_assert_range!(IMM_INDEX, 0..=3);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqdmlslb.lane.nxv2i64"
+        )]
+        fn _svqdmlslb_lane_s64(
+            op1: svint64_t,
+            op2: svint32_t,
+            op3: svint32_t,
+            IMM_INDEX: i32,
+        ) -> svint64_t;
+    }
+    unsafe { _svqdmlslb_lane_s64(op1, op2, op3, IMM_INDEX) }
+}
+#[doc = "Saturating doubling multiply-subtract long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdmlslb[_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqdmlslb))]
+pub fn svqdmlslb_s16(op1: svint16_t, op2: svint8_t, op3: svint8_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqdmlslb.nxv8i16"
+        )]
+        fn _svqdmlslb_s16(op1: svint16_t, op2: svint8_t, op3: svint8_t) -> svint16_t;
+    }
+    unsafe { _svqdmlslb_s16(op1, op2, op3) }
+}
+#[doc = "Saturating doubling multiply-subtract long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdmlslb[_n_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqdmlslb))]
+pub fn svqdmlslb_n_s16(op1: svint16_t, op2: svint8_t, op3: i8) -> svint16_t {
+    svqdmlslb_s16(op1, op2, svdup_n_s8(op3))
+}
+#[doc = "Saturating doubling multiply-subtract long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdmlslb[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqdmlslb))]
+pub fn svqdmlslb_s32(op1: svint32_t, op2: svint16_t, op3: svint16_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqdmlslb.nxv4i32"
+        )]
+        fn _svqdmlslb_s32(op1: svint32_t, op2: svint16_t, op3: svint16_t) -> svint32_t;
+    }
+    unsafe { _svqdmlslb_s32(op1, op2, op3) }
+}
+#[doc = "Saturating doubling multiply-subtract long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdmlslb[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqdmlslb))]
+pub fn svqdmlslb_n_s32(op1: svint32_t, op2: svint16_t, op3: i16) -> svint32_t {
+    svqdmlslb_s32(op1, op2, svdup_n_s16(op3))
+}
+#[doc = "Saturating doubling multiply-subtract long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdmlslb[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqdmlslb))]
+pub fn svqdmlslb_s64(op1: svint64_t, op2: svint32_t, op3: svint32_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqdmlslb.nxv2i64"
+        )]
+        fn _svqdmlslb_s64(op1: svint64_t, op2: svint32_t, op3: svint32_t) -> svint64_t;
+    }
+    unsafe { _svqdmlslb_s64(op1, op2, op3) }
+}
+#[doc = "Saturating doubling multiply-subtract long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdmlslb[_n_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqdmlslb))]
+pub fn svqdmlslb_n_s64(op1: svint64_t, op2: svint32_t, op3: i32) -> svint64_t {
+    svqdmlslb_s64(op1, op2, svdup_n_s32(op3))
+}
+#[doc = "Saturating doubling multiply-subtract long (bottom × top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdmlslbt[_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqdmlslbt))]
+pub fn svqdmlslbt_s16(op1: svint16_t, op2: svint8_t, op3: svint8_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqdmlslbt.nxv8i16"
+        )]
+        fn _svqdmlslbt_s16(op1: svint16_t, op2: svint8_t, op3: svint8_t) -> svint16_t;
+    }
+    unsafe { _svqdmlslbt_s16(op1, op2, op3) }
+}
+#[doc = "Saturating doubling multiply-subtract long (bottom × top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdmlslbt[_n_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqdmlslbt))]
+pub fn svqdmlslbt_n_s16(op1: svint16_t, op2: svint8_t, op3: i8) -> svint16_t {
+    svqdmlslbt_s16(op1, op2, svdup_n_s8(op3))
+}
+#[doc = "Saturating doubling multiply-subtract long (bottom × top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdmlslbt[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqdmlslbt))]
+pub fn svqdmlslbt_s32(op1: svint32_t, op2: svint16_t, op3: svint16_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqdmlslbt.nxv4i32"
+        )]
+        fn _svqdmlslbt_s32(op1: svint32_t, op2: svint16_t, op3: svint16_t) -> svint32_t;
+    }
+    unsafe { _svqdmlslbt_s32(op1, op2, op3) }
+}
+#[doc = "Saturating doubling multiply-subtract long (bottom × top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdmlslbt[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqdmlslbt))]
+pub fn svqdmlslbt_n_s32(op1: svint32_t, op2: svint16_t, op3: i16) -> svint32_t {
+    svqdmlslbt_s32(op1, op2, svdup_n_s16(op3))
+}
+#[doc = "Saturating doubling multiply-subtract long (bottom × top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdmlslbt[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqdmlslbt))]
+pub fn svqdmlslbt_s64(op1: svint64_t, op2: svint32_t, op3: svint32_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqdmlslbt.nxv2i64"
+        )]
+        fn _svqdmlslbt_s64(op1: svint64_t, op2: svint32_t, op3: svint32_t) -> svint64_t;
+    }
+    unsafe { _svqdmlslbt_s64(op1, op2, op3) }
+}
+#[doc = "Saturating doubling multiply-subtract long (bottom × top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdmlslbt[_n_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqdmlslbt))]
+pub fn svqdmlslbt_n_s64(op1: svint64_t, op2: svint32_t, op3: i32) -> svint64_t {
+    svqdmlslbt_s64(op1, op2, svdup_n_s32(op3))
+}
+#[doc = "Saturating doubling multiply-subtract long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdmlslt_lane[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqdmlslt, IMM_INDEX = 0))]
+pub fn svqdmlslt_lane_s32<const IMM_INDEX: i32>(
+    op1: svint32_t,
+    op2: svint16_t,
+    op3: svint16_t,
+) -> svint32_t {
+    static_assert_range!(IMM_INDEX, 0..=7);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqdmlslt.lane.nxv4i32"
+        )]
+        fn _svqdmlslt_lane_s32(
+            op1: svint32_t,
+            op2: svint16_t,
+            op3: svint16_t,
+            IMM_INDEX: i32,
+        ) -> svint32_t;
+    }
+    unsafe { _svqdmlslt_lane_s32(op1, op2, op3, IMM_INDEX) }
+}
+#[doc = "Saturating doubling multiply-subtract long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdmlslt_lane[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqdmlslt, IMM_INDEX = 0))]
+pub fn svqdmlslt_lane_s64<const IMM_INDEX: i32>(
+    op1: svint64_t,
+    op2: svint32_t,
+    op3: svint32_t,
+) -> svint64_t {
+    static_assert_range!(IMM_INDEX, 0..=3);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqdmlslt.lane.nxv2i64"
+        )]
+        fn _svqdmlslt_lane_s64(
+            op1: svint64_t,
+            op2: svint32_t,
+            op3: svint32_t,
+            IMM_INDEX: i32,
+        ) -> svint64_t;
+    }
+    unsafe { _svqdmlslt_lane_s64(op1, op2, op3, IMM_INDEX) }
+}
+#[doc = "Saturating doubling multiply-subtract long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdmlslt[_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqdmlslt))]
+pub fn svqdmlslt_s16(op1: svint16_t, op2: svint8_t, op3: svint8_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqdmlslt.nxv8i16"
+        )]
+        fn _svqdmlslt_s16(op1: svint16_t, op2: svint8_t, op3: svint8_t) -> svint16_t;
+    }
+    unsafe { _svqdmlslt_s16(op1, op2, op3) }
+}
+#[doc = "Saturating doubling multiply-subtract long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdmlslt[_n_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqdmlslt))]
+pub fn svqdmlslt_n_s16(op1: svint16_t, op2: svint8_t, op3: i8) -> svint16_t {
+    svqdmlslt_s16(op1, op2, svdup_n_s8(op3))
+}
+#[doc = "Saturating doubling multiply-subtract long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdmlslt[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqdmlslt))]
+pub fn svqdmlslt_s32(op1: svint32_t, op2: svint16_t, op3: svint16_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqdmlslt.nxv4i32"
+        )]
+        fn _svqdmlslt_s32(op1: svint32_t, op2: svint16_t, op3: svint16_t) -> svint32_t;
+    }
+    unsafe { _svqdmlslt_s32(op1, op2, op3) }
+}
+#[doc = "Saturating doubling multiply-subtract long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdmlslt[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqdmlslt))]
+pub fn svqdmlslt_n_s32(op1: svint32_t, op2: svint16_t, op3: i16) -> svint32_t {
+    svqdmlslt_s32(op1, op2, svdup_n_s16(op3))
+}
+#[doc = "Saturating doubling multiply-subtract long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdmlslt[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqdmlslt))]
+pub fn svqdmlslt_s64(op1: svint64_t, op2: svint32_t, op3: svint32_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqdmlslt.nxv2i64"
+        )]
+        fn _svqdmlslt_s64(op1: svint64_t, op2: svint32_t, op3: svint32_t) -> svint64_t;
+    }
+    unsafe { _svqdmlslt_s64(op1, op2, op3) }
+}
+#[doc = "Saturating doubling multiply-subtract long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdmlslt[_n_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqdmlslt))]
+pub fn svqdmlslt_n_s64(op1: svint64_t, op2: svint32_t, op3: i32) -> svint64_t {
+    svqdmlslt_s64(op1, op2, svdup_n_s32(op3))
+}
+#[doc = "Saturating doubling multiply high"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdmulh_lane[_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqdmulh, IMM_INDEX = 0))]
+pub fn svqdmulh_lane_s16<const IMM_INDEX: i32>(op1: svint16_t, op2: svint16_t) -> svint16_t {
+    static_assert_range!(IMM_INDEX, 0..=7);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqdmulh.lane.nxv8i16"
+        )]
+        fn _svqdmulh_lane_s16(op1: svint16_t, op2: svint16_t, imm_index: i32) -> svint16_t;
+    }
+    unsafe { _svqdmulh_lane_s16(op1, op2, IMM_INDEX) }
+}
+#[doc = "Saturating doubling multiply high"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdmulh_lane[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqdmulh, IMM_INDEX = 0))]
+pub fn svqdmulh_lane_s32<const IMM_INDEX: i32>(op1: svint32_t, op2: svint32_t) -> svint32_t {
+    static_assert_range!(IMM_INDEX, 0..=3);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqdmulh.lane.nxv4i32"
+        )]
+        fn _svqdmulh_lane_s32(op1: svint32_t, op2: svint32_t, imm_index: i32) -> svint32_t;
+    }
+    unsafe { _svqdmulh_lane_s32(op1, op2, IMM_INDEX) }
+}
+#[doc = "Saturating doubling multiply high"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdmulh_lane[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqdmulh, IMM_INDEX = 0))]
+pub fn svqdmulh_lane_s64<const IMM_INDEX: i32>(op1: svint64_t, op2: svint64_t) -> svint64_t {
+    static_assert_range!(IMM_INDEX, 0..=1);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqdmulh.lane.nxv2i64"
+        )]
+        fn _svqdmulh_lane_s64(op1: svint64_t, op2: svint64_t, imm_index: i32) -> svint64_t;
+    }
+    unsafe { _svqdmulh_lane_s64(op1, op2, IMM_INDEX) }
+}
+#[doc = "Saturating doubling multiply high"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdmulh[_s8])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqdmulh))]
+pub fn svqdmulh_s8(op1: svint8_t, op2: svint8_t) -> svint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqdmulh.nxv16i8"
+        )]
+        fn _svqdmulh_s8(op1: svint8_t, op2: svint8_t) -> svint8_t;
+    }
+    unsafe { _svqdmulh_s8(op1, op2) }
+}
+#[doc = "Saturating doubling multiply high"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdmulh[_n_s8])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqdmulh))]
+pub fn svqdmulh_n_s8(op1: svint8_t, op2: i8) -> svint8_t {
+    svqdmulh_s8(op1, svdup_n_s8(op2))
+}
+#[doc = "Saturating doubling multiply high"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdmulh[_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqdmulh))]
+pub fn svqdmulh_s16(op1: svint16_t, op2: svint16_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqdmulh.nxv8i16"
+        )]
+        fn _svqdmulh_s16(op1: svint16_t, op2: svint16_t) -> svint16_t;
+    }
+    unsafe { _svqdmulh_s16(op1, op2) }
+}
+#[doc = "Saturating doubling multiply high"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdmulh[_n_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqdmulh))]
+pub fn svqdmulh_n_s16(op1: svint16_t, op2: i16) -> svint16_t {
+    svqdmulh_s16(op1, svdup_n_s16(op2))
+}
+#[doc = "Saturating doubling multiply high"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdmulh[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqdmulh))]
+pub fn svqdmulh_s32(op1: svint32_t, op2: svint32_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqdmulh.nxv4i32"
+        )]
+        fn _svqdmulh_s32(op1: svint32_t, op2: svint32_t) -> svint32_t;
+    }
+    unsafe { _svqdmulh_s32(op1, op2) }
+}
+#[doc = "Saturating doubling multiply high"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdmulh[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqdmulh))]
+pub fn svqdmulh_n_s32(op1: svint32_t, op2: i32) -> svint32_t {
+    svqdmulh_s32(op1, svdup_n_s32(op2))
+}
+#[doc = "Saturating doubling multiply high"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdmulh[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqdmulh))]
+pub fn svqdmulh_s64(op1: svint64_t, op2: svint64_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqdmulh.nxv2i64"
+        )]
+        fn _svqdmulh_s64(op1: svint64_t, op2: svint64_t) -> svint64_t;
+    }
+    unsafe { _svqdmulh_s64(op1, op2) }
+}
+#[doc = "Saturating doubling multiply high"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdmulh[_n_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqdmulh))]
+pub fn svqdmulh_n_s64(op1: svint64_t, op2: i64) -> svint64_t {
+    svqdmulh_s64(op1, svdup_n_s64(op2))
+}
+#[doc = "Saturating doubling multiply long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdmullb_lane[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqdmullb, IMM_INDEX = 0))]
+pub fn svqdmullb_lane_s32<const IMM_INDEX: i32>(op1: svint16_t, op2: svint16_t) -> svint32_t {
+    static_assert_range!(IMM_INDEX, 0..=7);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqdmullb.lane.nxv4i32"
+        )]
+        fn _svqdmullb_lane_s32(op1: svint16_t, op2: svint16_t, imm_index: i32) -> svint32_t;
+    }
+    unsafe { _svqdmullb_lane_s32(op1, op2, IMM_INDEX) }
+}
+#[doc = "Saturating doubling multiply long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdmullb_lane[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqdmullb, IMM_INDEX = 0))]
+pub fn svqdmullb_lane_s64<const IMM_INDEX: i32>(op1: svint32_t, op2: svint32_t) -> svint64_t {
+    static_assert_range!(IMM_INDEX, 0..=3);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqdmullb.lane.nxv2i64"
+        )]
+        fn _svqdmullb_lane_s64(op1: svint32_t, op2: svint32_t, imm_index: i32) -> svint64_t;
+    }
+    unsafe { _svqdmullb_lane_s64(op1, op2, IMM_INDEX) }
+}
+#[doc = "Saturating doubling multiply long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdmullb[_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqdmullb))]
+pub fn svqdmullb_s16(op1: svint8_t, op2: svint8_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqdmullb.nxv8i16"
+        )]
+        fn _svqdmullb_s16(op1: svint8_t, op2: svint8_t) -> svint16_t;
+    }
+    unsafe { _svqdmullb_s16(op1, op2) }
+}
+#[doc = "Saturating doubling multiply long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdmullb[_n_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqdmullb))]
+pub fn svqdmullb_n_s16(op1: svint8_t, op2: i8) -> svint16_t {
+    svqdmullb_s16(op1, svdup_n_s8(op2))
+}
+#[doc = "Saturating doubling multiply long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdmullb[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqdmullb))]
+pub fn svqdmullb_s32(op1: svint16_t, op2: svint16_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqdmullb.nxv4i32"
+        )]
+        fn _svqdmullb_s32(op1: svint16_t, op2: svint16_t) -> svint32_t;
+    }
+    unsafe { _svqdmullb_s32(op1, op2) }
+}
+#[doc = "Saturating doubling multiply long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdmullb[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqdmullb))]
+pub fn svqdmullb_n_s32(op1: svint16_t, op2: i16) -> svint32_t {
+    svqdmullb_s32(op1, svdup_n_s16(op2))
+}
+#[doc = "Saturating doubling multiply long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdmullb[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqdmullb))]
+pub fn svqdmullb_s64(op1: svint32_t, op2: svint32_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqdmullb.nxv2i64"
+        )]
+        fn _svqdmullb_s64(op1: svint32_t, op2: svint32_t) -> svint64_t;
+    }
+    unsafe { _svqdmullb_s64(op1, op2) }
+}
+#[doc = "Saturating doubling multiply long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdmullb[_n_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqdmullb))]
+pub fn svqdmullb_n_s64(op1: svint32_t, op2: i32) -> svint64_t {
+    svqdmullb_s64(op1, svdup_n_s32(op2))
+}
+#[doc = "Saturating doubling multiply long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdmullt_lane[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqdmullt, IMM_INDEX = 0))]
+pub fn svqdmullt_lane_s32<const IMM_INDEX: i32>(op1: svint16_t, op2: svint16_t) -> svint32_t {
+    static_assert_range!(IMM_INDEX, 0..=7);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqdmullt.lane.nxv4i32"
+        )]
+        fn _svqdmullt_lane_s32(op1: svint16_t, op2: svint16_t, imm_index: i32) -> svint32_t;
+    }
+    unsafe { _svqdmullt_lane_s32(op1, op2, IMM_INDEX) }
+}
+#[doc = "Saturating doubling multiply long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdmullt_lane[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqdmullt, IMM_INDEX = 0))]
+pub fn svqdmullt_lane_s64<const IMM_INDEX: i32>(op1: svint32_t, op2: svint32_t) -> svint64_t {
+    static_assert_range!(IMM_INDEX, 0..=3);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqdmullt.lane.nxv2i64"
+        )]
+        fn _svqdmullt_lane_s64(op1: svint32_t, op2: svint32_t, imm_index: i32) -> svint64_t;
+    }
+    unsafe { _svqdmullt_lane_s64(op1, op2, IMM_INDEX) }
+}
+#[doc = "Saturating doubling multiply long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdmullt[_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqdmullt))]
+pub fn svqdmullt_s16(op1: svint8_t, op2: svint8_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqdmullt.nxv8i16"
+        )]
+        fn _svqdmullt_s16(op1: svint8_t, op2: svint8_t) -> svint16_t;
+    }
+    unsafe { _svqdmullt_s16(op1, op2) }
+}
+#[doc = "Saturating doubling multiply long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdmullt[_n_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqdmullt))]
+pub fn svqdmullt_n_s16(op1: svint8_t, op2: i8) -> svint16_t {
+    svqdmullt_s16(op1, svdup_n_s8(op2))
+}
+#[doc = "Saturating doubling multiply long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdmullt[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqdmullt))]
+pub fn svqdmullt_s32(op1: svint16_t, op2: svint16_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqdmullt.nxv4i32"
+        )]
+        fn _svqdmullt_s32(op1: svint16_t, op2: svint16_t) -> svint32_t;
+    }
+    unsafe { _svqdmullt_s32(op1, op2) }
+}
+#[doc = "Saturating doubling multiply long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdmullt[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqdmullt))]
+pub fn svqdmullt_n_s32(op1: svint16_t, op2: i16) -> svint32_t {
+    svqdmullt_s32(op1, svdup_n_s16(op2))
+}
+#[doc = "Saturating doubling multiply long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdmullt[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqdmullt))]
+pub fn svqdmullt_s64(op1: svint32_t, op2: svint32_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqdmullt.nxv2i64"
+        )]
+        fn _svqdmullt_s64(op1: svint32_t, op2: svint32_t) -> svint64_t;
+    }
+    unsafe { _svqdmullt_s64(op1, op2) }
+}
+#[doc = "Saturating doubling multiply long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqdmullt[_n_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqdmullt))]
+pub fn svqdmullt_n_s64(op1: svint32_t, op2: i32) -> svint64_t {
+    svqdmullt_s64(op1, svdup_n_s32(op2))
+}
+#[doc = "Saturating negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqneg[_s8]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqneg))]
+pub fn svqneg_s8_m(inactive: svint8_t, pg: svbool_t, op: svint8_t) -> svint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sqneg.nxv16i8")]
+        fn _svqneg_s8_m(inactive: svint8_t, pg: svbool_t, op: svint8_t) -> svint8_t;
+    }
+    unsafe { _svqneg_s8_m(inactive, pg, op) }
+}
+#[doc = "Saturating negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqneg[_s8]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqneg))]
+pub fn svqneg_s8_x(pg: svbool_t, op: svint8_t) -> svint8_t {
+    svqneg_s8_m(op, pg, op)
+}
+#[doc = "Saturating negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqneg[_s8]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqneg))]
+pub fn svqneg_s8_z(pg: svbool_t, op: svint8_t) -> svint8_t {
+    svqneg_s8_m(svdup_n_s8(0), pg, op)
+}
+#[doc = "Saturating negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqneg[_s16]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqneg))]
+pub fn svqneg_s16_m(inactive: svint16_t, pg: svbool_t, op: svint16_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sqneg.nxv8i16")]
+        fn _svqneg_s16_m(inactive: svint16_t, pg: svbool8_t, op: svint16_t) -> svint16_t;
+    }
+    unsafe { _svqneg_s16_m(inactive, pg.sve_into(), op) }
+}
+#[doc = "Saturating negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqneg[_s16]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqneg))]
+pub fn svqneg_s16_x(pg: svbool_t, op: svint16_t) -> svint16_t {
+    svqneg_s16_m(op, pg, op)
+}
+#[doc = "Saturating negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqneg[_s16]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqneg))]
+pub fn svqneg_s16_z(pg: svbool_t, op: svint16_t) -> svint16_t {
+    svqneg_s16_m(svdup_n_s16(0), pg, op)
+}
+#[doc = "Saturating negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqneg[_s32]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqneg))]
+pub fn svqneg_s32_m(inactive: svint32_t, pg: svbool_t, op: svint32_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sqneg.nxv4i32")]
+        fn _svqneg_s32_m(inactive: svint32_t, pg: svbool4_t, op: svint32_t) -> svint32_t;
+    }
+    unsafe { _svqneg_s32_m(inactive, pg.sve_into(), op) }
+}
+#[doc = "Saturating negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqneg[_s32]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqneg))]
+pub fn svqneg_s32_x(pg: svbool_t, op: svint32_t) -> svint32_t {
+    svqneg_s32_m(op, pg, op)
+}
+#[doc = "Saturating negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqneg[_s32]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqneg))]
+pub fn svqneg_s32_z(pg: svbool_t, op: svint32_t) -> svint32_t {
+    svqneg_s32_m(svdup_n_s32(0), pg, op)
+}
+#[doc = "Saturating negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqneg[_s64]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqneg))]
+pub fn svqneg_s64_m(inactive: svint64_t, pg: svbool_t, op: svint64_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sqneg.nxv2i64")]
+        fn _svqneg_s64_m(inactive: svint64_t, pg: svbool2_t, op: svint64_t) -> svint64_t;
+    }
+    unsafe { _svqneg_s64_m(inactive, pg.sve_into(), op) }
+}
+#[doc = "Saturating negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqneg[_s64]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqneg))]
+pub fn svqneg_s64_x(pg: svbool_t, op: svint64_t) -> svint64_t {
+    svqneg_s64_m(op, pg, op)
+}
+#[doc = "Saturating negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqneg[_s64]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqneg))]
+pub fn svqneg_s64_z(pg: svbool_t, op: svint64_t) -> svint64_t {
+    svqneg_s64_m(svdup_n_s64(0), pg, op)
+}
+#[doc = "Saturating rounding doubling complex multiply-add high with rotate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrdcmlah_lane[_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqrdcmlah, IMM_INDEX = 0, IMM_ROTATION = 90))]
+pub fn svqrdcmlah_lane_s16<const IMM_INDEX: i32, const IMM_ROTATION: i32>(
+    op1: svint16_t,
+    op2: svint16_t,
+    op3: svint16_t,
+) -> svint16_t {
+    static_assert_range!(IMM_INDEX, 0..=3);
+    static_assert!(
+        IMM_ROTATION == 0 || IMM_ROTATION == 90 || IMM_ROTATION == 180 || IMM_ROTATION == 270
+    );
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqrdcmlah.lane.x.nxv8i16"
+        )]
+        fn _svqrdcmlah_lane_s16(
+            op1: svint16_t,
+            op2: svint16_t,
+            op3: svint16_t,
+            imm_index: i32,
+            imm_rotation: i32,
+        ) -> svint16_t;
+    }
+    unsafe { _svqrdcmlah_lane_s16(op1, op2, op3, IMM_INDEX, IMM_ROTATION) }
+}
+#[doc = "Saturating rounding doubling complex multiply-add high with rotate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrdcmlah_lane[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqrdcmlah, IMM_INDEX = 0, IMM_ROTATION = 90))]
+pub fn svqrdcmlah_lane_s32<const IMM_INDEX: i32, const IMM_ROTATION: i32>(
+    op1: svint32_t,
+    op2: svint32_t,
+    op3: svint32_t,
+) -> svint32_t {
+    static_assert_range!(IMM_INDEX, 0..=1);
+    static_assert!(
+        IMM_ROTATION == 0 || IMM_ROTATION == 90 || IMM_ROTATION == 180 || IMM_ROTATION == 270
+    );
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqrdcmlah.lane.x.nxv4i32"
+        )]
+        fn _svqrdcmlah_lane_s32(
+            op1: svint32_t,
+            op2: svint32_t,
+            op3: svint32_t,
+            imm_index: i32,
+            imm_rotation: i32,
+        ) -> svint32_t;
+    }
+    unsafe { _svqrdcmlah_lane_s32(op1, op2, op3, IMM_INDEX, IMM_ROTATION) }
+}
+#[doc = "Saturating rounding doubling complex multiply-add high with rotate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrdcmlah[_s8])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqrdcmlah, IMM_ROTATION = 90))]
+pub fn svqrdcmlah_s8<const IMM_ROTATION: i32>(
+    op1: svint8_t,
+    op2: svint8_t,
+    op3: svint8_t,
+) -> svint8_t {
+    static_assert!(
+        IMM_ROTATION == 0 || IMM_ROTATION == 90 || IMM_ROTATION == 180 || IMM_ROTATION == 270
+    );
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqrdcmlah.x.nxv16i8"
+        )]
+        fn _svqrdcmlah_s8(
+            op1: svint8_t,
+            op2: svint8_t,
+            op3: svint8_t,
+            imm_rotation: i32,
+        ) -> svint8_t;
+    }
+    unsafe { _svqrdcmlah_s8(op1, op2, op3, IMM_ROTATION) }
+}
+#[doc = "Saturating rounding doubling complex multiply-add high with rotate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrdcmlah[_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqrdcmlah, IMM_ROTATION = 90))]
+pub fn svqrdcmlah_s16<const IMM_ROTATION: i32>(
+    op1: svint16_t,
+    op2: svint16_t,
+    op3: svint16_t,
+) -> svint16_t {
+    static_assert!(
+        IMM_ROTATION == 0 || IMM_ROTATION == 90 || IMM_ROTATION == 180 || IMM_ROTATION == 270
+    );
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqrdcmlah.x.nxv8i16"
+        )]
+        fn _svqrdcmlah_s16(
+            op1: svint16_t,
+            op2: svint16_t,
+            op3: svint16_t,
+            imm_rotation: i32,
+        ) -> svint16_t;
+    }
+    unsafe { _svqrdcmlah_s16(op1, op2, op3, IMM_ROTATION) }
+}
+#[doc = "Saturating rounding doubling complex multiply-add high with rotate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrdcmlah[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqrdcmlah, IMM_ROTATION = 90))]
+pub fn svqrdcmlah_s32<const IMM_ROTATION: i32>(
+    op1: svint32_t,
+    op2: svint32_t,
+    op3: svint32_t,
+) -> svint32_t {
+    static_assert!(
+        IMM_ROTATION == 0 || IMM_ROTATION == 90 || IMM_ROTATION == 180 || IMM_ROTATION == 270
+    );
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqrdcmlah.x.nxv4i32"
+        )]
+        fn _svqrdcmlah_s32(
+            op1: svint32_t,
+            op2: svint32_t,
+            op3: svint32_t,
+            imm_rotation: i32,
+        ) -> svint32_t;
+    }
+    unsafe { _svqrdcmlah_s32(op1, op2, op3, IMM_ROTATION) }
+}
+#[doc = "Saturating rounding doubling complex multiply-add high with rotate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrdcmlah[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqrdcmlah, IMM_ROTATION = 90))]
+pub fn svqrdcmlah_s64<const IMM_ROTATION: i32>(
+    op1: svint64_t,
+    op2: svint64_t,
+    op3: svint64_t,
+) -> svint64_t {
+    static_assert!(
+        IMM_ROTATION == 0 || IMM_ROTATION == 90 || IMM_ROTATION == 180 || IMM_ROTATION == 270
+    );
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqrdcmlah.x.nxv2i64"
+        )]
+        fn _svqrdcmlah_s64(
+            op1: svint64_t,
+            op2: svint64_t,
+            op3: svint64_t,
+            imm_rotation: i32,
+        ) -> svint64_t;
+    }
+    unsafe { _svqrdcmlah_s64(op1, op2, op3, IMM_ROTATION) }
+}
+#[doc = "Saturating rounding doubling multiply-add high"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrdmlah_lane[_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqrdmlah, IMM_INDEX = 0))]
+pub fn svqrdmlah_lane_s16<const IMM_INDEX: i32>(
+    op1: svint16_t,
+    op2: svint16_t,
+    op3: svint16_t,
+) -> svint16_t {
+    static_assert_range!(IMM_INDEX, 0..=7);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqrdmlah.lane.nxv8i16"
+        )]
+        fn _svqrdmlah_lane_s16(
+            op1: svint16_t,
+            op2: svint16_t,
+            op3: svint16_t,
+            IMM_INDEX: i32,
+        ) -> svint16_t;
+    }
+    unsafe { _svqrdmlah_lane_s16(op1, op2, op3, IMM_INDEX) }
+}
+#[doc = "Saturating rounding doubling multiply-add high"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrdmlah_lane[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqrdmlah, IMM_INDEX = 0))]
+pub fn svqrdmlah_lane_s32<const IMM_INDEX: i32>(
+    op1: svint32_t,
+    op2: svint32_t,
+    op3: svint32_t,
+) -> svint32_t {
+    static_assert_range!(IMM_INDEX, 0..=3);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqrdmlah.lane.nxv4i32"
+        )]
+        fn _svqrdmlah_lane_s32(
+            op1: svint32_t,
+            op2: svint32_t,
+            op3: svint32_t,
+            IMM_INDEX: i32,
+        ) -> svint32_t;
+    }
+    unsafe { _svqrdmlah_lane_s32(op1, op2, op3, IMM_INDEX) }
+}
+#[doc = "Saturating rounding doubling multiply-add high"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrdmlah_lane[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqrdmlah, IMM_INDEX = 0))]
+pub fn svqrdmlah_lane_s64<const IMM_INDEX: i32>(
+    op1: svint64_t,
+    op2: svint64_t,
+    op3: svint64_t,
+) -> svint64_t {
+    static_assert_range!(IMM_INDEX, 0..=1);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqrdmlah.lane.nxv2i64"
+        )]
+        fn _svqrdmlah_lane_s64(
+            op1: svint64_t,
+            op2: svint64_t,
+            op3: svint64_t,
+            IMM_INDEX: i32,
+        ) -> svint64_t;
+    }
+    unsafe { _svqrdmlah_lane_s64(op1, op2, op3, IMM_INDEX) }
+}
+#[doc = "Saturating rounding doubling multiply-add high"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrdmlah[_s8])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqrdmlah))]
+pub fn svqrdmlah_s8(op1: svint8_t, op2: svint8_t, op3: svint8_t) -> svint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqrdmlah.nxv16i8"
+        )]
+        fn _svqrdmlah_s8(op1: svint8_t, op2: svint8_t, op3: svint8_t) -> svint8_t;
+    }
+    unsafe { _svqrdmlah_s8(op1, op2, op3) }
+}
+#[doc = "Saturating rounding doubling multiply-add high"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrdmlah[_n_s8])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqrdmlah))]
+pub fn svqrdmlah_n_s8(op1: svint8_t, op2: svint8_t, op3: i8) -> svint8_t {
+    svqrdmlah_s8(op1, op2, svdup_n_s8(op3))
+}
+#[doc = "Saturating rounding doubling multiply-add high"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrdmlah[_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqrdmlah))]
+pub fn svqrdmlah_s16(op1: svint16_t, op2: svint16_t, op3: svint16_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqrdmlah.nxv8i16"
+        )]
+        fn _svqrdmlah_s16(op1: svint16_t, op2: svint16_t, op3: svint16_t) -> svint16_t;
+    }
+    unsafe { _svqrdmlah_s16(op1, op2, op3) }
+}
+#[doc = "Saturating rounding doubling multiply-add high"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrdmlah[_n_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqrdmlah))]
+pub fn svqrdmlah_n_s16(op1: svint16_t, op2: svint16_t, op3: i16) -> svint16_t {
+    svqrdmlah_s16(op1, op2, svdup_n_s16(op3))
+}
+#[doc = "Saturating rounding doubling multiply-add high"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrdmlah[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqrdmlah))]
+pub fn svqrdmlah_s32(op1: svint32_t, op2: svint32_t, op3: svint32_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqrdmlah.nxv4i32"
+        )]
+        fn _svqrdmlah_s32(op1: svint32_t, op2: svint32_t, op3: svint32_t) -> svint32_t;
+    }
+    unsafe { _svqrdmlah_s32(op1, op2, op3) }
+}
+#[doc = "Saturating rounding doubling multiply-add high"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrdmlah[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqrdmlah))]
+pub fn svqrdmlah_n_s32(op1: svint32_t, op2: svint32_t, op3: i32) -> svint32_t {
+    svqrdmlah_s32(op1, op2, svdup_n_s32(op3))
+}
+#[doc = "Saturating rounding doubling multiply-add high"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrdmlah[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqrdmlah))]
+pub fn svqrdmlah_s64(op1: svint64_t, op2: svint64_t, op3: svint64_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqrdmlah.nxv2i64"
+        )]
+        fn _svqrdmlah_s64(op1: svint64_t, op2: svint64_t, op3: svint64_t) -> svint64_t;
+    }
+    unsafe { _svqrdmlah_s64(op1, op2, op3) }
+}
+#[doc = "Saturating rounding doubling multiply-add high"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrdmlah[_n_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqrdmlah))]
+pub fn svqrdmlah_n_s64(op1: svint64_t, op2: svint64_t, op3: i64) -> svint64_t {
+    svqrdmlah_s64(op1, op2, svdup_n_s64(op3))
+}
+#[doc = "Saturating rounding doubling multiply-subtract high"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrdmlsh_lane[_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqrdmlsh, IMM_INDEX = 0))]
+pub fn svqrdmlsh_lane_s16<const IMM_INDEX: i32>(
+    op1: svint16_t,
+    op2: svint16_t,
+    op3: svint16_t,
+) -> svint16_t {
+    static_assert_range!(IMM_INDEX, 0..=7);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqrdmlsh.lane.nxv8i16"
+        )]
+        fn _svqrdmlsh_lane_s16(
+            op1: svint16_t,
+            op2: svint16_t,
+            op3: svint16_t,
+            IMM_INDEX: i32,
+        ) -> svint16_t;
+    }
+    unsafe { _svqrdmlsh_lane_s16(op1, op2, op3, IMM_INDEX) }
+}
+#[doc = "Saturating rounding doubling multiply-subtract high"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrdmlsh_lane[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqrdmlsh, IMM_INDEX = 0))]
+pub fn svqrdmlsh_lane_s32<const IMM_INDEX: i32>(
+    op1: svint32_t,
+    op2: svint32_t,
+    op3: svint32_t,
+) -> svint32_t {
+    static_assert_range!(IMM_INDEX, 0..=3);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqrdmlsh.lane.nxv4i32"
+        )]
+        fn _svqrdmlsh_lane_s32(
+            op1: svint32_t,
+            op2: svint32_t,
+            op3: svint32_t,
+            IMM_INDEX: i32,
+        ) -> svint32_t;
+    }
+    unsafe { _svqrdmlsh_lane_s32(op1, op2, op3, IMM_INDEX) }
+}
+#[doc = "Saturating rounding doubling multiply-subtract high"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrdmlsh_lane[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqrdmlsh, IMM_INDEX = 0))]
+pub fn svqrdmlsh_lane_s64<const IMM_INDEX: i32>(
+    op1: svint64_t,
+    op2: svint64_t,
+    op3: svint64_t,
+) -> svint64_t {
+    static_assert_range!(IMM_INDEX, 0..=1);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqrdmlsh.lane.nxv2i64"
+        )]
+        fn _svqrdmlsh_lane_s64(
+            op1: svint64_t,
+            op2: svint64_t,
+            op3: svint64_t,
+            IMM_INDEX: i32,
+        ) -> svint64_t;
+    }
+    unsafe { _svqrdmlsh_lane_s64(op1, op2, op3, IMM_INDEX) }
+}
+#[doc = "Saturating rounding doubling multiply-subtract high"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrdmlsh[_s8])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqrdmlsh))]
+pub fn svqrdmlsh_s8(op1: svint8_t, op2: svint8_t, op3: svint8_t) -> svint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqrdmlsh.nxv16i8"
+        )]
+        fn _svqrdmlsh_s8(op1: svint8_t, op2: svint8_t, op3: svint8_t) -> svint8_t;
+    }
+    unsafe { _svqrdmlsh_s8(op1, op2, op3) }
+}
+#[doc = "Saturating rounding doubling multiply-subtract high"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrdmlsh[_n_s8])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqrdmlsh))]
+pub fn svqrdmlsh_n_s8(op1: svint8_t, op2: svint8_t, op3: i8) -> svint8_t {
+    svqrdmlsh_s8(op1, op2, svdup_n_s8(op3))
+}
+#[doc = "Saturating rounding doubling multiply-subtract high"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrdmlsh[_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqrdmlsh))]
+pub fn svqrdmlsh_s16(op1: svint16_t, op2: svint16_t, op3: svint16_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqrdmlsh.nxv8i16"
+        )]
+        fn _svqrdmlsh_s16(op1: svint16_t, op2: svint16_t, op3: svint16_t) -> svint16_t;
+    }
+    unsafe { _svqrdmlsh_s16(op1, op2, op3) }
+}
+#[doc = "Saturating rounding doubling multiply-subtract high"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrdmlsh[_n_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqrdmlsh))]
+pub fn svqrdmlsh_n_s16(op1: svint16_t, op2: svint16_t, op3: i16) -> svint16_t {
+    svqrdmlsh_s16(op1, op2, svdup_n_s16(op3))
+}
+#[doc = "Saturating rounding doubling multiply-subtract high"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrdmlsh[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqrdmlsh))]
+pub fn svqrdmlsh_s32(op1: svint32_t, op2: svint32_t, op3: svint32_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqrdmlsh.nxv4i32"
+        )]
+        fn _svqrdmlsh_s32(op1: svint32_t, op2: svint32_t, op3: svint32_t) -> svint32_t;
+    }
+    unsafe { _svqrdmlsh_s32(op1, op2, op3) }
+}
+#[doc = "Saturating rounding doubling multiply-subtract high"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrdmlsh[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqrdmlsh))]
+pub fn svqrdmlsh_n_s32(op1: svint32_t, op2: svint32_t, op3: i32) -> svint32_t {
+    svqrdmlsh_s32(op1, op2, svdup_n_s32(op3))
+}
+#[doc = "Saturating rounding doubling multiply-subtract high"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrdmlsh[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqrdmlsh))]
+pub fn svqrdmlsh_s64(op1: svint64_t, op2: svint64_t, op3: svint64_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqrdmlsh.nxv2i64"
+        )]
+        fn _svqrdmlsh_s64(op1: svint64_t, op2: svint64_t, op3: svint64_t) -> svint64_t;
+    }
+    unsafe { _svqrdmlsh_s64(op1, op2, op3) }
+}
+#[doc = "Saturating rounding doubling multiply-subtract high"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrdmlsh[_n_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqrdmlsh))]
+pub fn svqrdmlsh_n_s64(op1: svint64_t, op2: svint64_t, op3: i64) -> svint64_t {
+    svqrdmlsh_s64(op1, op2, svdup_n_s64(op3))
+}
+#[doc = "Saturating rounding doubling multiply high"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrdmulh_lane[_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqrdmulh, IMM_INDEX = 0))]
+pub fn svqrdmulh_lane_s16<const IMM_INDEX: i32>(op1: svint16_t, op2: svint16_t) -> svint16_t {
+    static_assert_range!(IMM_INDEX, 0..=7);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqrdmulh.lane.nxv8i16"
+        )]
+        fn _svqrdmulh_lane_s16(op1: svint16_t, op2: svint16_t, imm_index: i32) -> svint16_t;
+    }
+    unsafe { _svqrdmulh_lane_s16(op1, op2, IMM_INDEX) }
+}
+#[doc = "Saturating rounding doubling multiply high"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrdmulh_lane[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqrdmulh, IMM_INDEX = 0))]
+pub fn svqrdmulh_lane_s32<const IMM_INDEX: i32>(op1: svint32_t, op2: svint32_t) -> svint32_t {
+    static_assert_range!(IMM_INDEX, 0..=3);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqrdmulh.lane.nxv4i32"
+        )]
+        fn _svqrdmulh_lane_s32(op1: svint32_t, op2: svint32_t, imm_index: i32) -> svint32_t;
+    }
+    unsafe { _svqrdmulh_lane_s32(op1, op2, IMM_INDEX) }
+}
+#[doc = "Saturating rounding doubling multiply high"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrdmulh_lane[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqrdmulh, IMM_INDEX = 0))]
+pub fn svqrdmulh_lane_s64<const IMM_INDEX: i32>(op1: svint64_t, op2: svint64_t) -> svint64_t {
+    static_assert_range!(IMM_INDEX, 0..=1);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqrdmulh.lane.nxv2i64"
+        )]
+        fn _svqrdmulh_lane_s64(op1: svint64_t, op2: svint64_t, imm_index: i32) -> svint64_t;
+    }
+    unsafe { _svqrdmulh_lane_s64(op1, op2, IMM_INDEX) }
+}
+#[doc = "Saturating rounding doubling multiply high"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrdmulh[_s8])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqrdmulh))]
+pub fn svqrdmulh_s8(op1: svint8_t, op2: svint8_t) -> svint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqrdmulh.nxv16i8"
+        )]
+        fn _svqrdmulh_s8(op1: svint8_t, op2: svint8_t) -> svint8_t;
+    }
+    unsafe { _svqrdmulh_s8(op1, op2) }
+}
+#[doc = "Saturating rounding doubling multiply high"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrdmulh[_n_s8])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqrdmulh))]
+pub fn svqrdmulh_n_s8(op1: svint8_t, op2: i8) -> svint8_t {
+    svqrdmulh_s8(op1, svdup_n_s8(op2))
+}
+#[doc = "Saturating rounding doubling multiply high"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrdmulh[_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqrdmulh))]
+pub fn svqrdmulh_s16(op1: svint16_t, op2: svint16_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqrdmulh.nxv8i16"
+        )]
+        fn _svqrdmulh_s16(op1: svint16_t, op2: svint16_t) -> svint16_t;
+    }
+    unsafe { _svqrdmulh_s16(op1, op2) }
+}
+#[doc = "Saturating rounding doubling multiply high"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrdmulh[_n_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqrdmulh))]
+pub fn svqrdmulh_n_s16(op1: svint16_t, op2: i16) -> svint16_t {
+    svqrdmulh_s16(op1, svdup_n_s16(op2))
+}
+#[doc = "Saturating rounding doubling multiply high"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrdmulh[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqrdmulh))]
+pub fn svqrdmulh_s32(op1: svint32_t, op2: svint32_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqrdmulh.nxv4i32"
+        )]
+        fn _svqrdmulh_s32(op1: svint32_t, op2: svint32_t) -> svint32_t;
+    }
+    unsafe { _svqrdmulh_s32(op1, op2) }
+}
+#[doc = "Saturating rounding doubling multiply high"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrdmulh[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqrdmulh))]
+pub fn svqrdmulh_n_s32(op1: svint32_t, op2: i32) -> svint32_t {
+    svqrdmulh_s32(op1, svdup_n_s32(op2))
+}
+#[doc = "Saturating rounding doubling multiply high"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrdmulh[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqrdmulh))]
+pub fn svqrdmulh_s64(op1: svint64_t, op2: svint64_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqrdmulh.nxv2i64"
+        )]
+        fn _svqrdmulh_s64(op1: svint64_t, op2: svint64_t) -> svint64_t;
+    }
+    unsafe { _svqrdmulh_s64(op1, op2) }
+}
+#[doc = "Saturating rounding doubling multiply high"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrdmulh[_n_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqrdmulh))]
+pub fn svqrdmulh_n_s64(op1: svint64_t, op2: i64) -> svint64_t {
+    svqrdmulh_s64(op1, svdup_n_s64(op2))
+}
+#[doc = "Saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrshl[_s8]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqrshl))]
+pub fn svqrshl_s8_m(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sqrshl.nxv16i8")]
+        fn _svqrshl_s8_m(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t;
+    }
+    unsafe { _svqrshl_s8_m(pg, op1, op2) }
+}
+#[doc = "Saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrshl[_n_s8]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqrshl))]
+pub fn svqrshl_n_s8_m(pg: svbool_t, op1: svint8_t, op2: i8) -> svint8_t {
+    svqrshl_s8_m(pg, op1, svdup_n_s8(op2))
+}
+#[doc = "Saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrshl[_s8]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqrshl))]
+pub fn svqrshl_s8_x(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t {
+    svqrshl_s8_m(pg, op1, op2)
+}
+#[doc = "Saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrshl[_n_s8]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqrshl))]
+pub fn svqrshl_n_s8_x(pg: svbool_t, op1: svint8_t, op2: i8) -> svint8_t {
+    svqrshl_s8_x(pg, op1, svdup_n_s8(op2))
+}
+#[doc = "Saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrshl[_s8]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqrshl))]
+pub fn svqrshl_s8_z(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t {
+    svqrshl_s8_m(pg, svsel_s8(pg, op1, svdup_n_s8(0)), op2)
+}
+#[doc = "Saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrshl[_n_s8]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqrshl))]
+pub fn svqrshl_n_s8_z(pg: svbool_t, op1: svint8_t, op2: i8) -> svint8_t {
+    svqrshl_s8_z(pg, op1, svdup_n_s8(op2))
+}
+#[doc = "Saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrshl[_s16]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqrshl))]
+pub fn svqrshl_s16_m(pg: svbool_t, op1: svint16_t, op2: svint16_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sqrshl.nxv8i16")]
+        fn _svqrshl_s16_m(pg: svbool8_t, op1: svint16_t, op2: svint16_t) -> svint16_t;
+    }
+    unsafe { _svqrshl_s16_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrshl[_n_s16]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqrshl))]
+pub fn svqrshl_n_s16_m(pg: svbool_t, op1: svint16_t, op2: i16) -> svint16_t {
+    svqrshl_s16_m(pg, op1, svdup_n_s16(op2))
+}
+#[doc = "Saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrshl[_s16]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqrshl))]
+pub fn svqrshl_s16_x(pg: svbool_t, op1: svint16_t, op2: svint16_t) -> svint16_t {
+    svqrshl_s16_m(pg, op1, op2)
+}
+#[doc = "Saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrshl[_n_s16]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqrshl))]
+pub fn svqrshl_n_s16_x(pg: svbool_t, op1: svint16_t, op2: i16) -> svint16_t {
+    svqrshl_s16_x(pg, op1, svdup_n_s16(op2))
+}
+#[doc = "Saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrshl[_s16]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqrshl))]
+pub fn svqrshl_s16_z(pg: svbool_t, op1: svint16_t, op2: svint16_t) -> svint16_t {
+    svqrshl_s16_m(pg, svsel_s16(pg, op1, svdup_n_s16(0)), op2)
+}
+#[doc = "Saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrshl[_n_s16]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqrshl))]
+pub fn svqrshl_n_s16_z(pg: svbool_t, op1: svint16_t, op2: i16) -> svint16_t {
+    svqrshl_s16_z(pg, op1, svdup_n_s16(op2))
+}
+#[doc = "Saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrshl[_s32]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqrshl))]
+pub fn svqrshl_s32_m(pg: svbool_t, op1: svint32_t, op2: svint32_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sqrshl.nxv4i32")]
+        fn _svqrshl_s32_m(pg: svbool4_t, op1: svint32_t, op2: svint32_t) -> svint32_t;
+    }
+    unsafe { _svqrshl_s32_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrshl[_n_s32]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqrshl))]
+pub fn svqrshl_n_s32_m(pg: svbool_t, op1: svint32_t, op2: i32) -> svint32_t {
+    svqrshl_s32_m(pg, op1, svdup_n_s32(op2))
+}
+#[doc = "Saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrshl[_s32]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqrshl))]
+pub fn svqrshl_s32_x(pg: svbool_t, op1: svint32_t, op2: svint32_t) -> svint32_t {
+    svqrshl_s32_m(pg, op1, op2)
+}
+#[doc = "Saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrshl[_n_s32]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqrshl))]
+pub fn svqrshl_n_s32_x(pg: svbool_t, op1: svint32_t, op2: i32) -> svint32_t {
+    svqrshl_s32_x(pg, op1, svdup_n_s32(op2))
+}
+#[doc = "Saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrshl[_s32]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqrshl))]
+pub fn svqrshl_s32_z(pg: svbool_t, op1: svint32_t, op2: svint32_t) -> svint32_t {
+    svqrshl_s32_m(pg, svsel_s32(pg, op1, svdup_n_s32(0)), op2)
+}
+#[doc = "Saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrshl[_n_s32]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqrshl))]
+pub fn svqrshl_n_s32_z(pg: svbool_t, op1: svint32_t, op2: i32) -> svint32_t {
+    svqrshl_s32_z(pg, op1, svdup_n_s32(op2))
+}
+#[doc = "Saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrshl[_s64]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqrshl))]
+pub fn svqrshl_s64_m(pg: svbool_t, op1: svint64_t, op2: svint64_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sqrshl.nxv2i64")]
+        fn _svqrshl_s64_m(pg: svbool2_t, op1: svint64_t, op2: svint64_t) -> svint64_t;
+    }
+    unsafe { _svqrshl_s64_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrshl[_n_s64]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqrshl))]
+pub fn svqrshl_n_s64_m(pg: svbool_t, op1: svint64_t, op2: i64) -> svint64_t {
+    svqrshl_s64_m(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrshl[_s64]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqrshl))]
+pub fn svqrshl_s64_x(pg: svbool_t, op1: svint64_t, op2: svint64_t) -> svint64_t {
+    svqrshl_s64_m(pg, op1, op2)
+}
+#[doc = "Saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrshl[_n_s64]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqrshl))]
+pub fn svqrshl_n_s64_x(pg: svbool_t, op1: svint64_t, op2: i64) -> svint64_t {
+    svqrshl_s64_x(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrshl[_s64]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqrshl))]
+pub fn svqrshl_s64_z(pg: svbool_t, op1: svint64_t, op2: svint64_t) -> svint64_t {
+    svqrshl_s64_m(pg, svsel_s64(pg, op1, svdup_n_s64(0)), op2)
+}
+#[doc = "Saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrshl[_n_s64]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqrshl))]
+pub fn svqrshl_n_s64_z(pg: svbool_t, op1: svint64_t, op2: i64) -> svint64_t {
+    svqrshl_s64_z(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrshl[_u8]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqrshl))]
+pub fn svqrshl_u8_m(pg: svbool_t, op1: svuint8_t, op2: svint8_t) -> svuint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uqrshl.nxv16i8")]
+        fn _svqrshl_u8_m(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t;
+    }
+    unsafe { _svqrshl_u8_m(pg, op1.as_signed(), op2).as_unsigned() }
+}
+#[doc = "Saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrshl[_n_u8]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqrshl))]
+pub fn svqrshl_n_u8_m(pg: svbool_t, op1: svuint8_t, op2: i8) -> svuint8_t {
+    svqrshl_u8_m(pg, op1, svdup_n_s8(op2))
+}
+#[doc = "Saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrshl[_u8]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqrshl))]
+pub fn svqrshl_u8_x(pg: svbool_t, op1: svuint8_t, op2: svint8_t) -> svuint8_t {
+    svqrshl_u8_m(pg, op1, op2)
+}
+#[doc = "Saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrshl[_n_u8]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqrshl))]
+pub fn svqrshl_n_u8_x(pg: svbool_t, op1: svuint8_t, op2: i8) -> svuint8_t {
+    svqrshl_u8_x(pg, op1, svdup_n_s8(op2))
+}
+#[doc = "Saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrshl[_u8]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqrshl))]
+pub fn svqrshl_u8_z(pg: svbool_t, op1: svuint8_t, op2: svint8_t) -> svuint8_t {
+    svqrshl_u8_m(pg, svsel_u8(pg, op1, svdup_n_u8(0)), op2)
+}
+#[doc = "Saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrshl[_n_u8]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqrshl))]
+pub fn svqrshl_n_u8_z(pg: svbool_t, op1: svuint8_t, op2: i8) -> svuint8_t {
+    svqrshl_u8_z(pg, op1, svdup_n_s8(op2))
+}
+#[doc = "Saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrshl[_u16]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqrshl))]
+pub fn svqrshl_u16_m(pg: svbool_t, op1: svuint16_t, op2: svint16_t) -> svuint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uqrshl.nxv8i16")]
+        fn _svqrshl_u16_m(pg: svbool8_t, op1: svint16_t, op2: svint16_t) -> svint16_t;
+    }
+    unsafe { _svqrshl_u16_m(pg.sve_into(), op1.as_signed(), op2).as_unsigned() }
+}
+#[doc = "Saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrshl[_n_u16]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqrshl))]
+pub fn svqrshl_n_u16_m(pg: svbool_t, op1: svuint16_t, op2: i16) -> svuint16_t {
+    svqrshl_u16_m(pg, op1, svdup_n_s16(op2))
+}
+#[doc = "Saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrshl[_u16]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqrshl))]
+pub fn svqrshl_u16_x(pg: svbool_t, op1: svuint16_t, op2: svint16_t) -> svuint16_t {
+    svqrshl_u16_m(pg, op1, op2)
+}
+#[doc = "Saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrshl[_n_u16]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqrshl))]
+pub fn svqrshl_n_u16_x(pg: svbool_t, op1: svuint16_t, op2: i16) -> svuint16_t {
+    svqrshl_u16_x(pg, op1, svdup_n_s16(op2))
+}
+#[doc = "Saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrshl[_u16]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqrshl))]
+pub fn svqrshl_u16_z(pg: svbool_t, op1: svuint16_t, op2: svint16_t) -> svuint16_t {
+    svqrshl_u16_m(pg, svsel_u16(pg, op1, svdup_n_u16(0)), op2)
+}
+#[doc = "Saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrshl[_n_u16]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqrshl))]
+pub fn svqrshl_n_u16_z(pg: svbool_t, op1: svuint16_t, op2: i16) -> svuint16_t {
+    svqrshl_u16_z(pg, op1, svdup_n_s16(op2))
+}
+#[doc = "Saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrshl[_u32]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqrshl))]
+pub fn svqrshl_u32_m(pg: svbool_t, op1: svuint32_t, op2: svint32_t) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uqrshl.nxv4i32")]
+        fn _svqrshl_u32_m(pg: svbool4_t, op1: svint32_t, op2: svint32_t) -> svint32_t;
+    }
+    unsafe { _svqrshl_u32_m(pg.sve_into(), op1.as_signed(), op2).as_unsigned() }
+}
+#[doc = "Saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrshl[_n_u32]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqrshl))]
+pub fn svqrshl_n_u32_m(pg: svbool_t, op1: svuint32_t, op2: i32) -> svuint32_t {
+    svqrshl_u32_m(pg, op1, svdup_n_s32(op2))
+}
+#[doc = "Saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrshl[_u32]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqrshl))]
+pub fn svqrshl_u32_x(pg: svbool_t, op1: svuint32_t, op2: svint32_t) -> svuint32_t {
+    svqrshl_u32_m(pg, op1, op2)
+}
+#[doc = "Saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrshl[_n_u32]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqrshl))]
+pub fn svqrshl_n_u32_x(pg: svbool_t, op1: svuint32_t, op2: i32) -> svuint32_t {
+    svqrshl_u32_x(pg, op1, svdup_n_s32(op2))
+}
+#[doc = "Saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrshl[_u32]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqrshl))]
+pub fn svqrshl_u32_z(pg: svbool_t, op1: svuint32_t, op2: svint32_t) -> svuint32_t {
+    svqrshl_u32_m(pg, svsel_u32(pg, op1, svdup_n_u32(0)), op2)
+}
+#[doc = "Saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrshl[_n_u32]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqrshl))]
+pub fn svqrshl_n_u32_z(pg: svbool_t, op1: svuint32_t, op2: i32) -> svuint32_t {
+    svqrshl_u32_z(pg, op1, svdup_n_s32(op2))
+}
+#[doc = "Saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrshl[_u64]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqrshl))]
+pub fn svqrshl_u64_m(pg: svbool_t, op1: svuint64_t, op2: svint64_t) -> svuint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uqrshl.nxv2i64")]
+        fn _svqrshl_u64_m(pg: svbool2_t, op1: svint64_t, op2: svint64_t) -> svint64_t;
+    }
+    unsafe { _svqrshl_u64_m(pg.sve_into(), op1.as_signed(), op2).as_unsigned() }
+}
+#[doc = "Saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrshl[_n_u64]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqrshl))]
+pub fn svqrshl_n_u64_m(pg: svbool_t, op1: svuint64_t, op2: i64) -> svuint64_t {
+    svqrshl_u64_m(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrshl[_u64]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqrshl))]
+pub fn svqrshl_u64_x(pg: svbool_t, op1: svuint64_t, op2: svint64_t) -> svuint64_t {
+    svqrshl_u64_m(pg, op1, op2)
+}
+#[doc = "Saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrshl[_n_u64]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqrshl))]
+pub fn svqrshl_n_u64_x(pg: svbool_t, op1: svuint64_t, op2: i64) -> svuint64_t {
+    svqrshl_u64_x(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrshl[_u64]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqrshl))]
+pub fn svqrshl_u64_z(pg: svbool_t, op1: svuint64_t, op2: svint64_t) -> svuint64_t {
+    svqrshl_u64_m(pg, svsel_u64(pg, op1, svdup_n_u64(0)), op2)
+}
+#[doc = "Saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrshl[_n_u64]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqrshl))]
+pub fn svqrshl_n_u64_z(pg: svbool_t, op1: svuint64_t, op2: i64) -> svuint64_t {
+    svqrshl_u64_z(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Saturating rounding shift right narrow (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrshrnb[_n_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqrshrnb, IMM2 = 1))]
+pub fn svqrshrnb_n_s16<const IMM2: i32>(op1: svint16_t) -> svint8_t {
+    static_assert_range!(IMM2, 1..=8);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqrshrnb.nxv8i16"
+        )]
+        fn _svqrshrnb_n_s16(op1: svint16_t, imm2: i32) -> svint8_t;
+    }
+    unsafe { _svqrshrnb_n_s16(op1, IMM2) }
+}
+#[doc = "Saturating rounding shift right narrow (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrshrnb[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqrshrnb, IMM2 = 1))]
+pub fn svqrshrnb_n_s32<const IMM2: i32>(op1: svint32_t) -> svint16_t {
+    static_assert_range!(IMM2, 1..=16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqrshrnb.nxv4i32"
+        )]
+        fn _svqrshrnb_n_s32(op1: svint32_t, imm2: i32) -> svint16_t;
+    }
+    unsafe { _svqrshrnb_n_s32(op1, IMM2) }
+}
+#[doc = "Saturating rounding shift right narrow (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrshrnb[_n_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqrshrnb, IMM2 = 1))]
+pub fn svqrshrnb_n_s64<const IMM2: i32>(op1: svint64_t) -> svint32_t {
+    static_assert_range!(IMM2, 1..=32);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqrshrnb.nxv2i64"
+        )]
+        fn _svqrshrnb_n_s64(op1: svint64_t, imm2: i32) -> svint32_t;
+    }
+    unsafe { _svqrshrnb_n_s64(op1, IMM2) }
+}
+#[doc = "Saturating rounding shift right narrow (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrshrnb[_n_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqrshrnb, IMM2 = 1))]
+pub fn svqrshrnb_n_u16<const IMM2: i32>(op1: svuint16_t) -> svuint8_t {
+    static_assert_range!(IMM2, 1..=8);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.uqrshrnb.nxv8i16"
+        )]
+        fn _svqrshrnb_n_u16(op1: svint16_t, imm2: i32) -> svint8_t;
+    }
+    unsafe { _svqrshrnb_n_u16(op1.as_signed(), IMM2).as_unsigned() }
+}
+#[doc = "Saturating rounding shift right narrow (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrshrnb[_n_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqrshrnb, IMM2 = 1))]
+pub fn svqrshrnb_n_u32<const IMM2: i32>(op1: svuint32_t) -> svuint16_t {
+    static_assert_range!(IMM2, 1..=16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.uqrshrnb.nxv4i32"
+        )]
+        fn _svqrshrnb_n_u32(op1: svint32_t, imm2: i32) -> svint16_t;
+    }
+    unsafe { _svqrshrnb_n_u32(op1.as_signed(), IMM2).as_unsigned() }
+}
+#[doc = "Saturating rounding shift right narrow (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrshrnb[_n_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqrshrnb, IMM2 = 1))]
+pub fn svqrshrnb_n_u64<const IMM2: i32>(op1: svuint64_t) -> svuint32_t {
+    static_assert_range!(IMM2, 1..=32);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.uqrshrnb.nxv2i64"
+        )]
+        fn _svqrshrnb_n_u64(op1: svint64_t, imm2: i32) -> svint32_t;
+    }
+    unsafe { _svqrshrnb_n_u64(op1.as_signed(), IMM2).as_unsigned() }
+}
+#[doc = "Saturating rounding shift right narrow (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrshrnt[_n_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqrshrnt, IMM2 = 1))]
+pub fn svqrshrnt_n_s16<const IMM2: i32>(even: svint8_t, op1: svint16_t) -> svint8_t {
+    static_assert_range!(IMM2, 1..=8);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqrshrnt.nxv8i16"
+        )]
+        fn _svqrshrnt_n_s16(even: svint8_t, op1: svint16_t, imm2: i32) -> svint8_t;
+    }
+    unsafe { _svqrshrnt_n_s16(even, op1, IMM2) }
+}
+#[doc = "Saturating rounding shift right narrow (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrshrnt[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqrshrnt, IMM2 = 1))]
+pub fn svqrshrnt_n_s32<const IMM2: i32>(even: svint16_t, op1: svint32_t) -> svint16_t {
+    static_assert_range!(IMM2, 1..=16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqrshrnt.nxv4i32"
+        )]
+        fn _svqrshrnt_n_s32(even: svint16_t, op1: svint32_t, imm2: i32) -> svint16_t;
+    }
+    unsafe { _svqrshrnt_n_s32(even, op1, IMM2) }
+}
+#[doc = "Saturating rounding shift right narrow (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrshrnt[_n_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqrshrnt, IMM2 = 1))]
+pub fn svqrshrnt_n_s64<const IMM2: i32>(even: svint32_t, op1: svint64_t) -> svint32_t {
+    static_assert_range!(IMM2, 1..=32);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqrshrnt.nxv2i64"
+        )]
+        fn _svqrshrnt_n_s64(even: svint32_t, op1: svint64_t, imm2: i32) -> svint32_t;
+    }
+    unsafe { _svqrshrnt_n_s64(even, op1, IMM2) }
+}
+#[doc = "Saturating rounding shift right narrow (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrshrnt[_n_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqrshrnt, IMM2 = 1))]
+pub fn svqrshrnt_n_u16<const IMM2: i32>(even: svuint8_t, op1: svuint16_t) -> svuint8_t {
+    static_assert_range!(IMM2, 1..=8);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.uqrshrnt.nxv8i16"
+        )]
+        fn _svqrshrnt_n_u16(even: svint8_t, op1: svint16_t, imm2: i32) -> svint8_t;
+    }
+    unsafe { _svqrshrnt_n_u16(even.as_signed(), op1.as_signed(), IMM2).as_unsigned() }
+}
+#[doc = "Saturating rounding shift right narrow (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrshrnt[_n_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqrshrnt, IMM2 = 1))]
+pub fn svqrshrnt_n_u32<const IMM2: i32>(even: svuint16_t, op1: svuint32_t) -> svuint16_t {
+    static_assert_range!(IMM2, 1..=16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.uqrshrnt.nxv4i32"
+        )]
+        fn _svqrshrnt_n_u32(even: svint16_t, op1: svint32_t, imm2: i32) -> svint16_t;
+    }
+    unsafe { _svqrshrnt_n_u32(even.as_signed(), op1.as_signed(), IMM2).as_unsigned() }
+}
+#[doc = "Saturating rounding shift right narrow (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrshrnt[_n_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqrshrnt, IMM2 = 1))]
+pub fn svqrshrnt_n_u64<const IMM2: i32>(even: svuint32_t, op1: svuint64_t) -> svuint32_t {
+    static_assert_range!(IMM2, 1..=32);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.uqrshrnt.nxv2i64"
+        )]
+        fn _svqrshrnt_n_u64(even: svint32_t, op1: svint64_t, imm2: i32) -> svint32_t;
+    }
+    unsafe { _svqrshrnt_n_u64(even.as_signed(), op1.as_signed(), IMM2).as_unsigned() }
+}
+#[doc = "Saturating rounding shift right unsigned narrow (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrshrunb[_n_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqrshrunb, IMM2 = 1))]
+pub fn svqrshrunb_n_s16<const IMM2: i32>(op1: svint16_t) -> svuint8_t {
+    static_assert_range!(IMM2, 1..=8);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqrshrunb.nxv8i16"
+        )]
+        fn _svqrshrunb_n_s16(op1: svint16_t, imm2: i32) -> svint8_t;
+    }
+    unsafe { _svqrshrunb_n_s16(op1, IMM2).as_unsigned() }
+}
+#[doc = "Saturating rounding shift right unsigned narrow (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrshrunb[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqrshrunb, IMM2 = 1))]
+pub fn svqrshrunb_n_s32<const IMM2: i32>(op1: svint32_t) -> svuint16_t {
+    static_assert_range!(IMM2, 1..=16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqrshrunb.nxv4i32"
+        )]
+        fn _svqrshrunb_n_s32(op1: svint32_t, imm2: i32) -> svint16_t;
+    }
+    unsafe { _svqrshrunb_n_s32(op1, IMM2).as_unsigned() }
+}
+#[doc = "Saturating rounding shift right unsigned narrow (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrshrunb[_n_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqrshrunb, IMM2 = 1))]
+pub fn svqrshrunb_n_s64<const IMM2: i32>(op1: svint64_t) -> svuint32_t {
+    static_assert_range!(IMM2, 1..=32);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqrshrunb.nxv2i64"
+        )]
+        fn _svqrshrunb_n_s64(op1: svint64_t, imm2: i32) -> svint32_t;
+    }
+    unsafe { _svqrshrunb_n_s64(op1, IMM2).as_unsigned() }
+}
+#[doc = "Saturating rounding shift right unsigned narrow (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrshrunt[_n_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqrshrunt, IMM2 = 1))]
+pub fn svqrshrunt_n_s16<const IMM2: i32>(even: svuint8_t, op1: svint16_t) -> svuint8_t {
+    static_assert_range!(IMM2, 1..=8);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqrshrunt.nxv8i16"
+        )]
+        fn _svqrshrunt_n_s16(even: svint8_t, op1: svint16_t, imm2: i32) -> svint8_t;
+    }
+    unsafe { _svqrshrunt_n_s16(even.as_signed(), op1, IMM2).as_unsigned() }
+}
+#[doc = "Saturating rounding shift right unsigned narrow (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrshrunt[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqrshrunt, IMM2 = 1))]
+pub fn svqrshrunt_n_s32<const IMM2: i32>(even: svuint16_t, op1: svint32_t) -> svuint16_t {
+    static_assert_range!(IMM2, 1..=16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqrshrunt.nxv4i32"
+        )]
+        fn _svqrshrunt_n_s32(even: svint16_t, op1: svint32_t, imm2: i32) -> svint16_t;
+    }
+    unsafe { _svqrshrunt_n_s32(even.as_signed(), op1, IMM2).as_unsigned() }
+}
+#[doc = "Saturating rounding shift right unsigned narrow (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqrshrunt[_n_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqrshrunt, IMM2 = 1))]
+pub fn svqrshrunt_n_s64<const IMM2: i32>(even: svuint32_t, op1: svint64_t) -> svuint32_t {
+    static_assert_range!(IMM2, 1..=32);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqrshrunt.nxv2i64"
+        )]
+        fn _svqrshrunt_n_s64(even: svint32_t, op1: svint64_t, imm2: i32) -> svint32_t;
+    }
+    unsafe { _svqrshrunt_n_s64(even.as_signed(), op1, IMM2).as_unsigned() }
+}
+#[doc = "Saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqshl[_s8]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqshl))]
+pub fn svqshl_s8_m(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sqshl.nxv16i8")]
+        fn _svqshl_s8_m(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t;
+    }
+    unsafe { _svqshl_s8_m(pg, op1, op2) }
+}
+#[doc = "Saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqshl[_n_s8]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqshl))]
+pub fn svqshl_n_s8_m(pg: svbool_t, op1: svint8_t, op2: i8) -> svint8_t {
+    svqshl_s8_m(pg, op1, svdup_n_s8(op2))
+}
+#[doc = "Saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqshl[_s8]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqshl))]
+pub fn svqshl_s8_x(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t {
+    svqshl_s8_m(pg, op1, op2)
+}
+#[doc = "Saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqshl[_n_s8]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqshl))]
+pub fn svqshl_n_s8_x(pg: svbool_t, op1: svint8_t, op2: i8) -> svint8_t {
+    svqshl_s8_x(pg, op1, svdup_n_s8(op2))
+}
+#[doc = "Saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqshl[_s8]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqshl))]
+pub fn svqshl_s8_z(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t {
+    svqshl_s8_m(pg, svsel_s8(pg, op1, svdup_n_s8(0)), op2)
+}
+#[doc = "Saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqshl[_n_s8]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqshl))]
+pub fn svqshl_n_s8_z(pg: svbool_t, op1: svint8_t, op2: i8) -> svint8_t {
+    svqshl_s8_z(pg, op1, svdup_n_s8(op2))
+}
+#[doc = "Saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqshl[_s16]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqshl))]
+pub fn svqshl_s16_m(pg: svbool_t, op1: svint16_t, op2: svint16_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sqshl.nxv8i16")]
+        fn _svqshl_s16_m(pg: svbool8_t, op1: svint16_t, op2: svint16_t) -> svint16_t;
+    }
+    unsafe { _svqshl_s16_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqshl[_n_s16]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqshl))]
+pub fn svqshl_n_s16_m(pg: svbool_t, op1: svint16_t, op2: i16) -> svint16_t {
+    svqshl_s16_m(pg, op1, svdup_n_s16(op2))
+}
+#[doc = "Saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqshl[_s16]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqshl))]
+pub fn svqshl_s16_x(pg: svbool_t, op1: svint16_t, op2: svint16_t) -> svint16_t {
+    svqshl_s16_m(pg, op1, op2)
+}
+#[doc = "Saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqshl[_n_s16]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqshl))]
+pub fn svqshl_n_s16_x(pg: svbool_t, op1: svint16_t, op2: i16) -> svint16_t {
+    svqshl_s16_x(pg, op1, svdup_n_s16(op2))
+}
+#[doc = "Saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqshl[_s16]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqshl))]
+pub fn svqshl_s16_z(pg: svbool_t, op1: svint16_t, op2: svint16_t) -> svint16_t {
+    svqshl_s16_m(pg, svsel_s16(pg, op1, svdup_n_s16(0)), op2)
+}
+#[doc = "Saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqshl[_n_s16]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqshl))]
+pub fn svqshl_n_s16_z(pg: svbool_t, op1: svint16_t, op2: i16) -> svint16_t {
+    svqshl_s16_z(pg, op1, svdup_n_s16(op2))
+}
+#[doc = "Saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqshl[_s32]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqshl))]
+pub fn svqshl_s32_m(pg: svbool_t, op1: svint32_t, op2: svint32_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sqshl.nxv4i32")]
+        fn _svqshl_s32_m(pg: svbool4_t, op1: svint32_t, op2: svint32_t) -> svint32_t;
+    }
+    unsafe { _svqshl_s32_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqshl[_n_s32]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqshl))]
+pub fn svqshl_n_s32_m(pg: svbool_t, op1: svint32_t, op2: i32) -> svint32_t {
+    svqshl_s32_m(pg, op1, svdup_n_s32(op2))
+}
+#[doc = "Saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqshl[_s32]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqshl))]
+pub fn svqshl_s32_x(pg: svbool_t, op1: svint32_t, op2: svint32_t) -> svint32_t {
+    svqshl_s32_m(pg, op1, op2)
+}
+#[doc = "Saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqshl[_n_s32]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqshl))]
+pub fn svqshl_n_s32_x(pg: svbool_t, op1: svint32_t, op2: i32) -> svint32_t {
+    svqshl_s32_x(pg, op1, svdup_n_s32(op2))
+}
+#[doc = "Saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqshl[_s32]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqshl))]
+pub fn svqshl_s32_z(pg: svbool_t, op1: svint32_t, op2: svint32_t) -> svint32_t {
+    svqshl_s32_m(pg, svsel_s32(pg, op1, svdup_n_s32(0)), op2)
+}
+#[doc = "Saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqshl[_n_s32]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqshl))]
+pub fn svqshl_n_s32_z(pg: svbool_t, op1: svint32_t, op2: i32) -> svint32_t {
+    svqshl_s32_z(pg, op1, svdup_n_s32(op2))
+}
+#[doc = "Saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqshl[_s64]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqshl))]
+pub fn svqshl_s64_m(pg: svbool_t, op1: svint64_t, op2: svint64_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sqshl.nxv2i64")]
+        fn _svqshl_s64_m(pg: svbool2_t, op1: svint64_t, op2: svint64_t) -> svint64_t;
+    }
+    unsafe { _svqshl_s64_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqshl[_n_s64]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqshl))]
+pub fn svqshl_n_s64_m(pg: svbool_t, op1: svint64_t, op2: i64) -> svint64_t {
+    svqshl_s64_m(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqshl[_s64]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqshl))]
+pub fn svqshl_s64_x(pg: svbool_t, op1: svint64_t, op2: svint64_t) -> svint64_t {
+    svqshl_s64_m(pg, op1, op2)
+}
+#[doc = "Saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqshl[_n_s64]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqshl))]
+pub fn svqshl_n_s64_x(pg: svbool_t, op1: svint64_t, op2: i64) -> svint64_t {
+    svqshl_s64_x(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqshl[_s64]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqshl))]
+pub fn svqshl_s64_z(pg: svbool_t, op1: svint64_t, op2: svint64_t) -> svint64_t {
+    svqshl_s64_m(pg, svsel_s64(pg, op1, svdup_n_s64(0)), op2)
+}
+#[doc = "Saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqshl[_n_s64]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqshl))]
+pub fn svqshl_n_s64_z(pg: svbool_t, op1: svint64_t, op2: i64) -> svint64_t {
+    svqshl_s64_z(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqshl[_u8]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqshl))]
+pub fn svqshl_u8_m(pg: svbool_t, op1: svuint8_t, op2: svint8_t) -> svuint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uqshl.nxv16i8")]
+        fn _svqshl_u8_m(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t;
+    }
+    unsafe { _svqshl_u8_m(pg, op1.as_signed(), op2).as_unsigned() }
+}
+#[doc = "Saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqshl[_n_u8]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqshl))]
+pub fn svqshl_n_u8_m(pg: svbool_t, op1: svuint8_t, op2: i8) -> svuint8_t {
+    svqshl_u8_m(pg, op1, svdup_n_s8(op2))
+}
+#[doc = "Saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqshl[_u8]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqshl))]
+pub fn svqshl_u8_x(pg: svbool_t, op1: svuint8_t, op2: svint8_t) -> svuint8_t {
+    svqshl_u8_m(pg, op1, op2)
+}
+#[doc = "Saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqshl[_n_u8]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqshl))]
+pub fn svqshl_n_u8_x(pg: svbool_t, op1: svuint8_t, op2: i8) -> svuint8_t {
+    svqshl_u8_x(pg, op1, svdup_n_s8(op2))
+}
+#[doc = "Saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqshl[_u8]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqshl))]
+pub fn svqshl_u8_z(pg: svbool_t, op1: svuint8_t, op2: svint8_t) -> svuint8_t {
+    svqshl_u8_m(pg, svsel_u8(pg, op1, svdup_n_u8(0)), op2)
+}
+#[doc = "Saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqshl[_n_u8]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqshl))]
+pub fn svqshl_n_u8_z(pg: svbool_t, op1: svuint8_t, op2: i8) -> svuint8_t {
+    svqshl_u8_z(pg, op1, svdup_n_s8(op2))
+}
+#[doc = "Saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqshl[_u16]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqshl))]
+pub fn svqshl_u16_m(pg: svbool_t, op1: svuint16_t, op2: svint16_t) -> svuint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uqshl.nxv8i16")]
+        fn _svqshl_u16_m(pg: svbool8_t, op1: svint16_t, op2: svint16_t) -> svint16_t;
+    }
+    unsafe { _svqshl_u16_m(pg.sve_into(), op1.as_signed(), op2).as_unsigned() }
+}
+#[doc = "Saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqshl[_n_u16]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqshl))]
+pub fn svqshl_n_u16_m(pg: svbool_t, op1: svuint16_t, op2: i16) -> svuint16_t {
+    svqshl_u16_m(pg, op1, svdup_n_s16(op2))
+}
+#[doc = "Saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqshl[_u16]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqshl))]
+pub fn svqshl_u16_x(pg: svbool_t, op1: svuint16_t, op2: svint16_t) -> svuint16_t {
+    svqshl_u16_m(pg, op1, op2)
+}
+#[doc = "Saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqshl[_n_u16]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqshl))]
+pub fn svqshl_n_u16_x(pg: svbool_t, op1: svuint16_t, op2: i16) -> svuint16_t {
+    svqshl_u16_x(pg, op1, svdup_n_s16(op2))
+}
+#[doc = "Saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqshl[_u16]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqshl))]
+pub fn svqshl_u16_z(pg: svbool_t, op1: svuint16_t, op2: svint16_t) -> svuint16_t {
+    svqshl_u16_m(pg, svsel_u16(pg, op1, svdup_n_u16(0)), op2)
+}
+#[doc = "Saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqshl[_n_u16]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqshl))]
+pub fn svqshl_n_u16_z(pg: svbool_t, op1: svuint16_t, op2: i16) -> svuint16_t {
+    svqshl_u16_z(pg, op1, svdup_n_s16(op2))
+}
+#[doc = "Saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqshl[_u32]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqshl))]
+pub fn svqshl_u32_m(pg: svbool_t, op1: svuint32_t, op2: svint32_t) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uqshl.nxv4i32")]
+        fn _svqshl_u32_m(pg: svbool4_t, op1: svint32_t, op2: svint32_t) -> svint32_t;
+    }
+    unsafe { _svqshl_u32_m(pg.sve_into(), op1.as_signed(), op2).as_unsigned() }
+}
+#[doc = "Saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqshl[_n_u32]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqshl))]
+pub fn svqshl_n_u32_m(pg: svbool_t, op1: svuint32_t, op2: i32) -> svuint32_t {
+    svqshl_u32_m(pg, op1, svdup_n_s32(op2))
+}
+#[doc = "Saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqshl[_u32]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqshl))]
+pub fn svqshl_u32_x(pg: svbool_t, op1: svuint32_t, op2: svint32_t) -> svuint32_t {
+    svqshl_u32_m(pg, op1, op2)
+}
+#[doc = "Saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqshl[_n_u32]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqshl))]
+pub fn svqshl_n_u32_x(pg: svbool_t, op1: svuint32_t, op2: i32) -> svuint32_t {
+    svqshl_u32_x(pg, op1, svdup_n_s32(op2))
+}
+#[doc = "Saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqshl[_u32]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqshl))]
+pub fn svqshl_u32_z(pg: svbool_t, op1: svuint32_t, op2: svint32_t) -> svuint32_t {
+    svqshl_u32_m(pg, svsel_u32(pg, op1, svdup_n_u32(0)), op2)
+}
+#[doc = "Saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqshl[_n_u32]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqshl))]
+pub fn svqshl_n_u32_z(pg: svbool_t, op1: svuint32_t, op2: i32) -> svuint32_t {
+    svqshl_u32_z(pg, op1, svdup_n_s32(op2))
+}
+#[doc = "Saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqshl[_u64]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqshl))]
+pub fn svqshl_u64_m(pg: svbool_t, op1: svuint64_t, op2: svint64_t) -> svuint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uqshl.nxv2i64")]
+        fn _svqshl_u64_m(pg: svbool2_t, op1: svint64_t, op2: svint64_t) -> svint64_t;
+    }
+    unsafe { _svqshl_u64_m(pg.sve_into(), op1.as_signed(), op2).as_unsigned() }
+}
+#[doc = "Saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqshl[_n_u64]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqshl))]
+pub fn svqshl_n_u64_m(pg: svbool_t, op1: svuint64_t, op2: i64) -> svuint64_t {
+    svqshl_u64_m(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqshl[_u64]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqshl))]
+pub fn svqshl_u64_x(pg: svbool_t, op1: svuint64_t, op2: svint64_t) -> svuint64_t {
+    svqshl_u64_m(pg, op1, op2)
+}
+#[doc = "Saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqshl[_n_u64]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqshl))]
+pub fn svqshl_n_u64_x(pg: svbool_t, op1: svuint64_t, op2: i64) -> svuint64_t {
+    svqshl_u64_x(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqshl[_u64]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqshl))]
+pub fn svqshl_u64_z(pg: svbool_t, op1: svuint64_t, op2: svint64_t) -> svuint64_t {
+    svqshl_u64_m(pg, svsel_u64(pg, op1, svdup_n_u64(0)), op2)
+}
+#[doc = "Saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqshl[_n_u64]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqshl))]
+pub fn svqshl_n_u64_z(pg: svbool_t, op1: svuint64_t, op2: i64) -> svuint64_t {
+    svqshl_u64_z(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Saturating shift left unsigned"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqshlu[_n_s8]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqshlu, IMM2 = 0))]
+pub fn svqshlu_n_s8_m<const IMM2: i32>(pg: svbool_t, op1: svint8_t) -> svuint8_t {
+    static_assert_range!(IMM2, 0..=7);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sqshlu.nxv16i8")]
+        fn _svqshlu_n_s8_m(pg: svbool_t, op1: svint8_t, imm2: i32) -> svint8_t;
+    }
+    unsafe { _svqshlu_n_s8_m(pg, op1, IMM2).as_unsigned() }
+}
+#[doc = "Saturating shift left unsigned"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqshlu[_n_s8]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqshlu, IMM2 = 0))]
+pub fn svqshlu_n_s8_x<const IMM2: i32>(pg: svbool_t, op1: svint8_t) -> svuint8_t {
+    svqshlu_n_s8_m::<IMM2>(pg, op1)
+}
+#[doc = "Saturating shift left unsigned"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqshlu[_n_s8]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqshlu, IMM2 = 0))]
+pub fn svqshlu_n_s8_z<const IMM2: i32>(pg: svbool_t, op1: svint8_t) -> svuint8_t {
+    svqshlu_n_s8_m::<IMM2>(pg, svsel_s8(pg, op1, svdup_n_s8(0)))
+}
+#[doc = "Saturating shift left unsigned"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqshlu[_n_s16]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqshlu, IMM2 = 0))]
+pub fn svqshlu_n_s16_m<const IMM2: i32>(pg: svbool_t, op1: svint16_t) -> svuint16_t {
+    static_assert_range!(IMM2, 0..=15);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sqshlu.nxv8i16")]
+        fn _svqshlu_n_s16_m(pg: svbool8_t, op1: svint16_t, imm2: i32) -> svint16_t;
+    }
+    unsafe { _svqshlu_n_s16_m(pg.sve_into(), op1, IMM2).as_unsigned() }
+}
+#[doc = "Saturating shift left unsigned"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqshlu[_n_s16]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqshlu, IMM2 = 0))]
+pub fn svqshlu_n_s16_x<const IMM2: i32>(pg: svbool_t, op1: svint16_t) -> svuint16_t {
+    svqshlu_n_s16_m::<IMM2>(pg, op1)
+}
+#[doc = "Saturating shift left unsigned"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqshlu[_n_s16]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqshlu, IMM2 = 0))]
+pub fn svqshlu_n_s16_z<const IMM2: i32>(pg: svbool_t, op1: svint16_t) -> svuint16_t {
+    svqshlu_n_s16_m::<IMM2>(pg, svsel_s16(pg, op1, svdup_n_s16(0)))
+}
+#[doc = "Saturating shift left unsigned"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqshlu[_n_s32]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqshlu, IMM2 = 0))]
+pub fn svqshlu_n_s32_m<const IMM2: i32>(pg: svbool_t, op1: svint32_t) -> svuint32_t {
+    static_assert_range!(IMM2, 0..=31);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sqshlu.nxv4i32")]
+        fn _svqshlu_n_s32_m(pg: svbool4_t, op1: svint32_t, imm2: i32) -> svint32_t;
+    }
+    unsafe { _svqshlu_n_s32_m(pg.sve_into(), op1, IMM2).as_unsigned() }
+}
+#[doc = "Saturating shift left unsigned"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqshlu[_n_s32]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqshlu, IMM2 = 0))]
+pub fn svqshlu_n_s32_x<const IMM2: i32>(pg: svbool_t, op1: svint32_t) -> svuint32_t {
+    svqshlu_n_s32_m::<IMM2>(pg, op1)
+}
+#[doc = "Saturating shift left unsigned"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqshlu[_n_s32]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqshlu, IMM2 = 0))]
+pub fn svqshlu_n_s32_z<const IMM2: i32>(pg: svbool_t, op1: svint32_t) -> svuint32_t {
+    svqshlu_n_s32_m::<IMM2>(pg, svsel_s32(pg, op1, svdup_n_s32(0)))
+}
+#[doc = "Saturating shift left unsigned"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqshlu[_n_s64]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqshlu, IMM2 = 0))]
+pub fn svqshlu_n_s64_m<const IMM2: i32>(pg: svbool_t, op1: svint64_t) -> svuint64_t {
+    static_assert_range!(IMM2, 0..=63);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sqshlu.nxv2i64")]
+        fn _svqshlu_n_s64_m(pg: svbool2_t, op1: svint64_t, imm2: i32) -> svint64_t;
+    }
+    unsafe { _svqshlu_n_s64_m(pg.sve_into(), op1, IMM2).as_unsigned() }
+}
+#[doc = "Saturating shift left unsigned"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqshlu[_n_s64]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqshlu, IMM2 = 0))]
+pub fn svqshlu_n_s64_x<const IMM2: i32>(pg: svbool_t, op1: svint64_t) -> svuint64_t {
+    svqshlu_n_s64_m::<IMM2>(pg, op1)
+}
+#[doc = "Saturating shift left unsigned"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqshlu[_n_s64]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqshlu, IMM2 = 0))]
+pub fn svqshlu_n_s64_z<const IMM2: i32>(pg: svbool_t, op1: svint64_t) -> svuint64_t {
+    svqshlu_n_s64_m::<IMM2>(pg, svsel_s64(pg, op1, svdup_n_s64(0)))
+}
+#[doc = "Saturating shift right narrow (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqshrnb[_n_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqshrnb, IMM2 = 1))]
+pub fn svqshrnb_n_s16<const IMM2: i32>(op1: svint16_t) -> svint8_t {
+    static_assert_range!(IMM2, 1..=8);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqshrnb.nxv8i16"
+        )]
+        fn _svqshrnb_n_s16(op1: svint16_t, imm2: i32) -> svint8_t;
+    }
+    unsafe { _svqshrnb_n_s16(op1, IMM2) }
+}
+#[doc = "Saturating shift right narrow (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqshrnb[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqshrnb, IMM2 = 1))]
+pub fn svqshrnb_n_s32<const IMM2: i32>(op1: svint32_t) -> svint16_t {
+    static_assert_range!(IMM2, 1..=16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqshrnb.nxv4i32"
+        )]
+        fn _svqshrnb_n_s32(op1: svint32_t, imm2: i32) -> svint16_t;
+    }
+    unsafe { _svqshrnb_n_s32(op1, IMM2) }
+}
+#[doc = "Saturating shift right narrow (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqshrnb[_n_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqshrnb, IMM2 = 1))]
+pub fn svqshrnb_n_s64<const IMM2: i32>(op1: svint64_t) -> svint32_t {
+    static_assert_range!(IMM2, 1..=32);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqshrnb.nxv2i64"
+        )]
+        fn _svqshrnb_n_s64(op1: svint64_t, imm2: i32) -> svint32_t;
+    }
+    unsafe { _svqshrnb_n_s64(op1, IMM2) }
+}
+#[doc = "Saturating shift right narrow (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqshrnb[_n_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqshrnb, IMM2 = 1))]
+pub fn svqshrnb_n_u16<const IMM2: i32>(op1: svuint16_t) -> svuint8_t {
+    static_assert_range!(IMM2, 1..=8);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.uqshrnb.nxv8i16"
+        )]
+        fn _svqshrnb_n_u16(op1: svint16_t, imm2: i32) -> svint8_t;
+    }
+    unsafe { _svqshrnb_n_u16(op1.as_signed(), IMM2).as_unsigned() }
+}
+#[doc = "Saturating shift right narrow (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqshrnb[_n_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqshrnb, IMM2 = 1))]
+pub fn svqshrnb_n_u32<const IMM2: i32>(op1: svuint32_t) -> svuint16_t {
+    static_assert_range!(IMM2, 1..=16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.uqshrnb.nxv4i32"
+        )]
+        fn _svqshrnb_n_u32(op1: svint32_t, imm2: i32) -> svint16_t;
+    }
+    unsafe { _svqshrnb_n_u32(op1.as_signed(), IMM2).as_unsigned() }
+}
+#[doc = "Saturating shift right narrow (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqshrnb[_n_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqshrnb, IMM2 = 1))]
+pub fn svqshrnb_n_u64<const IMM2: i32>(op1: svuint64_t) -> svuint32_t {
+    static_assert_range!(IMM2, 1..=32);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.uqshrnb.nxv2i64"
+        )]
+        fn _svqshrnb_n_u64(op1: svint64_t, imm2: i32) -> svint32_t;
+    }
+    unsafe { _svqshrnb_n_u64(op1.as_signed(), IMM2).as_unsigned() }
+}
+#[doc = "Saturating shift right narrow (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqshrnt[_n_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqshrnt, IMM2 = 1))]
+pub fn svqshrnt_n_s16<const IMM2: i32>(even: svint8_t, op1: svint16_t) -> svint8_t {
+    static_assert_range!(IMM2, 1..=8);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqshrnt.nxv8i16"
+        )]
+        fn _svqshrnt_n_s16(even: svint8_t, op1: svint16_t, imm2: i32) -> svint8_t;
+    }
+    unsafe { _svqshrnt_n_s16(even, op1, IMM2) }
+}
+#[doc = "Saturating shift right narrow (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqshrnt[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqshrnt, IMM2 = 1))]
+pub fn svqshrnt_n_s32<const IMM2: i32>(even: svint16_t, op1: svint32_t) -> svint16_t {
+    static_assert_range!(IMM2, 1..=16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqshrnt.nxv4i32"
+        )]
+        fn _svqshrnt_n_s32(even: svint16_t, op1: svint32_t, imm2: i32) -> svint16_t;
+    }
+    unsafe { _svqshrnt_n_s32(even, op1, IMM2) }
+}
+#[doc = "Saturating shift right narrow (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqshrnt[_n_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqshrnt, IMM2 = 1))]
+pub fn svqshrnt_n_s64<const IMM2: i32>(even: svint32_t, op1: svint64_t) -> svint32_t {
+    static_assert_range!(IMM2, 1..=32);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqshrnt.nxv2i64"
+        )]
+        fn _svqshrnt_n_s64(even: svint32_t, op1: svint64_t, imm2: i32) -> svint32_t;
+    }
+    unsafe { _svqshrnt_n_s64(even, op1, IMM2) }
+}
+#[doc = "Saturating shift right narrow (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqshrnt[_n_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqshrnt, IMM2 = 1))]
+pub fn svqshrnt_n_u16<const IMM2: i32>(even: svuint8_t, op1: svuint16_t) -> svuint8_t {
+    static_assert_range!(IMM2, 1..=8);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.uqshrnt.nxv8i16"
+        )]
+        fn _svqshrnt_n_u16(even: svint8_t, op1: svint16_t, imm2: i32) -> svint8_t;
+    }
+    unsafe { _svqshrnt_n_u16(even.as_signed(), op1.as_signed(), IMM2).as_unsigned() }
+}
+#[doc = "Saturating shift right narrow (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqshrnt[_n_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqshrnt, IMM2 = 1))]
+pub fn svqshrnt_n_u32<const IMM2: i32>(even: svuint16_t, op1: svuint32_t) -> svuint16_t {
+    static_assert_range!(IMM2, 1..=16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.uqshrnt.nxv4i32"
+        )]
+        fn _svqshrnt_n_u32(even: svint16_t, op1: svint32_t, imm2: i32) -> svint16_t;
+    }
+    unsafe { _svqshrnt_n_u32(even.as_signed(), op1.as_signed(), IMM2).as_unsigned() }
+}
+#[doc = "Saturating shift right narrow (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqshrnt[_n_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqshrnt, IMM2 = 1))]
+pub fn svqshrnt_n_u64<const IMM2: i32>(even: svuint32_t, op1: svuint64_t) -> svuint32_t {
+    static_assert_range!(IMM2, 1..=32);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.uqshrnt.nxv2i64"
+        )]
+        fn _svqshrnt_n_u64(even: svint32_t, op1: svint64_t, imm2: i32) -> svint32_t;
+    }
+    unsafe { _svqshrnt_n_u64(even.as_signed(), op1.as_signed(), IMM2).as_unsigned() }
+}
+#[doc = "Saturating shift right unsigned narrow (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqshrunb[_n_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqshrunb, IMM2 = 1))]
+pub fn svqshrunb_n_s16<const IMM2: i32>(op1: svint16_t) -> svuint8_t {
+    static_assert_range!(IMM2, 1..=8);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqshrunb.nxv8i16"
+        )]
+        fn _svqshrunb_n_s16(op1: svint16_t, imm2: i32) -> svint8_t;
+    }
+    unsafe { _svqshrunb_n_s16(op1, IMM2).as_unsigned() }
+}
+#[doc = "Saturating shift right unsigned narrow (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqshrunb[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqshrunb, IMM2 = 1))]
+pub fn svqshrunb_n_s32<const IMM2: i32>(op1: svint32_t) -> svuint16_t {
+    static_assert_range!(IMM2, 1..=16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqshrunb.nxv4i32"
+        )]
+        fn _svqshrunb_n_s32(op1: svint32_t, imm2: i32) -> svint16_t;
+    }
+    unsafe { _svqshrunb_n_s32(op1, IMM2).as_unsigned() }
+}
+#[doc = "Saturating shift right unsigned narrow (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqshrunb[_n_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqshrunb, IMM2 = 1))]
+pub fn svqshrunb_n_s64<const IMM2: i32>(op1: svint64_t) -> svuint32_t {
+    static_assert_range!(IMM2, 1..=32);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqshrunb.nxv2i64"
+        )]
+        fn _svqshrunb_n_s64(op1: svint64_t, imm2: i32) -> svint32_t;
+    }
+    unsafe { _svqshrunb_n_s64(op1, IMM2).as_unsigned() }
+}
+#[doc = "Saturating shift right unsigned narrow (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqshrunt[_n_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqshrunt, IMM2 = 1))]
+pub fn svqshrunt_n_s16<const IMM2: i32>(even: svuint8_t, op1: svint16_t) -> svuint8_t {
+    static_assert_range!(IMM2, 1..=8);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqshrunt.nxv8i16"
+        )]
+        fn _svqshrunt_n_s16(even: svint8_t, op1: svint16_t, imm2: i32) -> svint8_t;
+    }
+    unsafe { _svqshrunt_n_s16(even.as_signed(), op1, IMM2).as_unsigned() }
+}
+#[doc = "Saturating shift right unsigned narrow (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqshrunt[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqshrunt, IMM2 = 1))]
+pub fn svqshrunt_n_s32<const IMM2: i32>(even: svuint16_t, op1: svint32_t) -> svuint16_t {
+    static_assert_range!(IMM2, 1..=16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqshrunt.nxv4i32"
+        )]
+        fn _svqshrunt_n_s32(even: svint16_t, op1: svint32_t, imm2: i32) -> svint16_t;
+    }
+    unsafe { _svqshrunt_n_s32(even.as_signed(), op1, IMM2).as_unsigned() }
+}
+#[doc = "Saturating shift right unsigned narrow (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqshrunt[_n_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqshrunt, IMM2 = 1))]
+pub fn svqshrunt_n_s64<const IMM2: i32>(even: svuint32_t, op1: svint64_t) -> svuint32_t {
+    static_assert_range!(IMM2, 1..=32);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqshrunt.nxv2i64"
+        )]
+        fn _svqshrunt_n_s64(even: svint32_t, op1: svint64_t, imm2: i32) -> svint32_t;
+    }
+    unsafe { _svqshrunt_n_s64(even.as_signed(), op1, IMM2).as_unsigned() }
+}
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsub[_s8]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqsub))]
+pub fn svqsub_s8_m(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sqsub.nxv16i8")]
+        fn _svqsub_s8_m(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t;
+    }
+    unsafe { _svqsub_s8_m(pg, op1, op2) }
+}
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsub[_n_s8]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqsub))]
+pub fn svqsub_n_s8_m(pg: svbool_t, op1: svint8_t, op2: i8) -> svint8_t {
+    svqsub_s8_m(pg, op1, svdup_n_s8(op2))
+}
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsub[_s8]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqsub))]
+pub fn svqsub_s8_x(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t {
+    svqsub_s8_m(pg, op1, op2)
+}
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsub[_n_s8]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqsub))]
+pub fn svqsub_n_s8_x(pg: svbool_t, op1: svint8_t, op2: i8) -> svint8_t {
+    svqsub_s8_x(pg, op1, svdup_n_s8(op2))
+}
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsub[_s8]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqsub))]
+pub fn svqsub_s8_z(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t {
+    svqsub_s8_m(pg, svsel_s8(pg, op1, svdup_n_s8(0)), op2)
+}
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsub[_n_s8]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqsub))]
+pub fn svqsub_n_s8_z(pg: svbool_t, op1: svint8_t, op2: i8) -> svint8_t {
+    svqsub_s8_z(pg, op1, svdup_n_s8(op2))
+}
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsub[_s16]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqsub))]
+pub fn svqsub_s16_m(pg: svbool_t, op1: svint16_t, op2: svint16_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sqsub.nxv8i16")]
+        fn _svqsub_s16_m(pg: svbool8_t, op1: svint16_t, op2: svint16_t) -> svint16_t;
+    }
+    unsafe { _svqsub_s16_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsub[_n_s16]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqsub))]
+pub fn svqsub_n_s16_m(pg: svbool_t, op1: svint16_t, op2: i16) -> svint16_t {
+    svqsub_s16_m(pg, op1, svdup_n_s16(op2))
+}
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsub[_s16]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqsub))]
+pub fn svqsub_s16_x(pg: svbool_t, op1: svint16_t, op2: svint16_t) -> svint16_t {
+    svqsub_s16_m(pg, op1, op2)
+}
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsub[_n_s16]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqsub))]
+pub fn svqsub_n_s16_x(pg: svbool_t, op1: svint16_t, op2: i16) -> svint16_t {
+    svqsub_s16_x(pg, op1, svdup_n_s16(op2))
+}
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsub[_s16]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqsub))]
+pub fn svqsub_s16_z(pg: svbool_t, op1: svint16_t, op2: svint16_t) -> svint16_t {
+    svqsub_s16_m(pg, svsel_s16(pg, op1, svdup_n_s16(0)), op2)
+}
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsub[_n_s16]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqsub))]
+pub fn svqsub_n_s16_z(pg: svbool_t, op1: svint16_t, op2: i16) -> svint16_t {
+    svqsub_s16_z(pg, op1, svdup_n_s16(op2))
+}
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsub[_s32]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqsub))]
+pub fn svqsub_s32_m(pg: svbool_t, op1: svint32_t, op2: svint32_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sqsub.nxv4i32")]
+        fn _svqsub_s32_m(pg: svbool4_t, op1: svint32_t, op2: svint32_t) -> svint32_t;
+    }
+    unsafe { _svqsub_s32_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsub[_n_s32]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqsub))]
+pub fn svqsub_n_s32_m(pg: svbool_t, op1: svint32_t, op2: i32) -> svint32_t {
+    svqsub_s32_m(pg, op1, svdup_n_s32(op2))
+}
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsub[_s32]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqsub))]
+pub fn svqsub_s32_x(pg: svbool_t, op1: svint32_t, op2: svint32_t) -> svint32_t {
+    svqsub_s32_m(pg, op1, op2)
+}
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsub[_n_s32]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqsub))]
+pub fn svqsub_n_s32_x(pg: svbool_t, op1: svint32_t, op2: i32) -> svint32_t {
+    svqsub_s32_x(pg, op1, svdup_n_s32(op2))
+}
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsub[_s32]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqsub))]
+pub fn svqsub_s32_z(pg: svbool_t, op1: svint32_t, op2: svint32_t) -> svint32_t {
+    svqsub_s32_m(pg, svsel_s32(pg, op1, svdup_n_s32(0)), op2)
+}
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsub[_n_s32]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqsub))]
+pub fn svqsub_n_s32_z(pg: svbool_t, op1: svint32_t, op2: i32) -> svint32_t {
+    svqsub_s32_z(pg, op1, svdup_n_s32(op2))
+}
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsub[_s64]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqsub))]
+pub fn svqsub_s64_m(pg: svbool_t, op1: svint64_t, op2: svint64_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sqsub.nxv2i64")]
+        fn _svqsub_s64_m(pg: svbool2_t, op1: svint64_t, op2: svint64_t) -> svint64_t;
+    }
+    unsafe { _svqsub_s64_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsub[_n_s64]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqsub))]
+pub fn svqsub_n_s64_m(pg: svbool_t, op1: svint64_t, op2: i64) -> svint64_t {
+    svqsub_s64_m(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsub[_s64]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqsub))]
+pub fn svqsub_s64_x(pg: svbool_t, op1: svint64_t, op2: svint64_t) -> svint64_t {
+    svqsub_s64_m(pg, op1, op2)
+}
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsub[_n_s64]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqsub))]
+pub fn svqsub_n_s64_x(pg: svbool_t, op1: svint64_t, op2: i64) -> svint64_t {
+    svqsub_s64_x(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsub[_s64]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqsub))]
+pub fn svqsub_s64_z(pg: svbool_t, op1: svint64_t, op2: svint64_t) -> svint64_t {
+    svqsub_s64_m(pg, svsel_s64(pg, op1, svdup_n_s64(0)), op2)
+}
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsub[_n_s64]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqsub))]
+pub fn svqsub_n_s64_z(pg: svbool_t, op1: svint64_t, op2: i64) -> svint64_t {
+    svqsub_s64_z(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsub[_u8]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqsub))]
+pub fn svqsub_u8_m(pg: svbool_t, op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uqsub.nxv16i8")]
+        fn _svqsub_u8_m(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t;
+    }
+    unsafe { _svqsub_u8_m(pg, op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsub[_n_u8]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqsub))]
+pub fn svqsub_n_u8_m(pg: svbool_t, op1: svuint8_t, op2: u8) -> svuint8_t {
+    svqsub_u8_m(pg, op1, svdup_n_u8(op2))
+}
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsub[_u8]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqsub))]
+pub fn svqsub_u8_x(pg: svbool_t, op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    svqsub_u8_m(pg, op1, op2)
+}
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsub[_n_u8]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqsub))]
+pub fn svqsub_n_u8_x(pg: svbool_t, op1: svuint8_t, op2: u8) -> svuint8_t {
+    svqsub_u8_x(pg, op1, svdup_n_u8(op2))
+}
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsub[_u8]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqsub))]
+pub fn svqsub_u8_z(pg: svbool_t, op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    svqsub_u8_m(pg, svsel_u8(pg, op1, svdup_n_u8(0)), op2)
+}
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsub[_n_u8]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqsub))]
+pub fn svqsub_n_u8_z(pg: svbool_t, op1: svuint8_t, op2: u8) -> svuint8_t {
+    svqsub_u8_z(pg, op1, svdup_n_u8(op2))
+}
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsub[_u16]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqsub))]
+pub fn svqsub_u16_m(pg: svbool_t, op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uqsub.nxv8i16")]
+        fn _svqsub_u16_m(pg: svbool8_t, op1: svint16_t, op2: svint16_t) -> svint16_t;
+    }
+    unsafe { _svqsub_u16_m(pg.sve_into(), op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsub[_n_u16]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqsub))]
+pub fn svqsub_n_u16_m(pg: svbool_t, op1: svuint16_t, op2: u16) -> svuint16_t {
+    svqsub_u16_m(pg, op1, svdup_n_u16(op2))
+}
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsub[_u16]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqsub))]
+pub fn svqsub_u16_x(pg: svbool_t, op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    svqsub_u16_m(pg, op1, op2)
+}
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsub[_n_u16]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqsub))]
+pub fn svqsub_n_u16_x(pg: svbool_t, op1: svuint16_t, op2: u16) -> svuint16_t {
+    svqsub_u16_x(pg, op1, svdup_n_u16(op2))
+}
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsub[_u16]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqsub))]
+pub fn svqsub_u16_z(pg: svbool_t, op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    svqsub_u16_m(pg, svsel_u16(pg, op1, svdup_n_u16(0)), op2)
+}
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsub[_n_u16]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqsub))]
+pub fn svqsub_n_u16_z(pg: svbool_t, op1: svuint16_t, op2: u16) -> svuint16_t {
+    svqsub_u16_z(pg, op1, svdup_n_u16(op2))
+}
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsub[_u32]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqsub))]
+pub fn svqsub_u32_m(pg: svbool_t, op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uqsub.nxv4i32")]
+        fn _svqsub_u32_m(pg: svbool4_t, op1: svint32_t, op2: svint32_t) -> svint32_t;
+    }
+    unsafe { _svqsub_u32_m(pg.sve_into(), op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsub[_n_u32]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqsub))]
+pub fn svqsub_n_u32_m(pg: svbool_t, op1: svuint32_t, op2: u32) -> svuint32_t {
+    svqsub_u32_m(pg, op1, svdup_n_u32(op2))
+}
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsub[_u32]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqsub))]
+pub fn svqsub_u32_x(pg: svbool_t, op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    svqsub_u32_m(pg, op1, op2)
+}
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsub[_n_u32]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqsub))]
+pub fn svqsub_n_u32_x(pg: svbool_t, op1: svuint32_t, op2: u32) -> svuint32_t {
+    svqsub_u32_x(pg, op1, svdup_n_u32(op2))
+}
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsub[_u32]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqsub))]
+pub fn svqsub_u32_z(pg: svbool_t, op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    svqsub_u32_m(pg, svsel_u32(pg, op1, svdup_n_u32(0)), op2)
+}
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsub[_n_u32]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqsub))]
+pub fn svqsub_n_u32_z(pg: svbool_t, op1: svuint32_t, op2: u32) -> svuint32_t {
+    svqsub_u32_z(pg, op1, svdup_n_u32(op2))
+}
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsub[_u64]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqsub))]
+pub fn svqsub_u64_m(pg: svbool_t, op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uqsub.nxv2i64")]
+        fn _svqsub_u64_m(pg: svbool2_t, op1: svint64_t, op2: svint64_t) -> svint64_t;
+    }
+    unsafe { _svqsub_u64_m(pg.sve_into(), op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsub[_n_u64]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqsub))]
+pub fn svqsub_n_u64_m(pg: svbool_t, op1: svuint64_t, op2: u64) -> svuint64_t {
+    svqsub_u64_m(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsub[_u64]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqsub))]
+pub fn svqsub_u64_x(pg: svbool_t, op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    svqsub_u64_m(pg, op1, op2)
+}
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsub[_n_u64]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqsub))]
+pub fn svqsub_n_u64_x(pg: svbool_t, op1: svuint64_t, op2: u64) -> svuint64_t {
+    svqsub_u64_x(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsub[_u64]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqsub))]
+pub fn svqsub_u64_z(pg: svbool_t, op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    svqsub_u64_m(pg, svsel_u64(pg, op1, svdup_n_u64(0)), op2)
+}
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsub[_n_u64]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqsub))]
+pub fn svqsub_n_u64_z(pg: svbool_t, op1: svuint64_t, op2: u64) -> svuint64_t {
+    svqsub_u64_z(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Saturating subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsubr[_s8]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqsubr))]
+pub fn svqsubr_s8_m(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sqsubr.nxv16i8")]
+        fn _svqsubr_s8_m(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t;
+    }
+    unsafe { _svqsubr_s8_m(pg, op1, op2) }
+}
+#[doc = "Saturating subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsubr[_n_s8]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqsubr))]
+pub fn svqsubr_n_s8_m(pg: svbool_t, op1: svint8_t, op2: i8) -> svint8_t {
+    svqsubr_s8_m(pg, op1, svdup_n_s8(op2))
+}
+#[doc = "Saturating subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsubr[_s8]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqsubr))]
+pub fn svqsubr_s8_x(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t {
+    svqsubr_s8_m(pg, op1, op2)
+}
+#[doc = "Saturating subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsubr[_n_s8]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqsubr))]
+pub fn svqsubr_n_s8_x(pg: svbool_t, op1: svint8_t, op2: i8) -> svint8_t {
+    svqsubr_s8_x(pg, op1, svdup_n_s8(op2))
+}
+#[doc = "Saturating subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsubr[_s8]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqsubr))]
+pub fn svqsubr_s8_z(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t {
+    svqsubr_s8_m(pg, svsel_s8(pg, op1, svdup_n_s8(0)), op2)
+}
+#[doc = "Saturating subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsubr[_n_s8]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqsubr))]
+pub fn svqsubr_n_s8_z(pg: svbool_t, op1: svint8_t, op2: i8) -> svint8_t {
+    svqsubr_s8_z(pg, op1, svdup_n_s8(op2))
+}
+#[doc = "Saturating subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsubr[_s16]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqsubr))]
+pub fn svqsubr_s16_m(pg: svbool_t, op1: svint16_t, op2: svint16_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sqsubr.nxv8i16")]
+        fn _svqsubr_s16_m(pg: svbool8_t, op1: svint16_t, op2: svint16_t) -> svint16_t;
+    }
+    unsafe { _svqsubr_s16_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Saturating subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsubr[_n_s16]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqsubr))]
+pub fn svqsubr_n_s16_m(pg: svbool_t, op1: svint16_t, op2: i16) -> svint16_t {
+    svqsubr_s16_m(pg, op1, svdup_n_s16(op2))
+}
+#[doc = "Saturating subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsubr[_s16]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqsubr))]
+pub fn svqsubr_s16_x(pg: svbool_t, op1: svint16_t, op2: svint16_t) -> svint16_t {
+    svqsubr_s16_m(pg, op1, op2)
+}
+#[doc = "Saturating subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsubr[_n_s16]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqsubr))]
+pub fn svqsubr_n_s16_x(pg: svbool_t, op1: svint16_t, op2: i16) -> svint16_t {
+    svqsubr_s16_x(pg, op1, svdup_n_s16(op2))
+}
+#[doc = "Saturating subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsubr[_s16]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqsubr))]
+pub fn svqsubr_s16_z(pg: svbool_t, op1: svint16_t, op2: svint16_t) -> svint16_t {
+    svqsubr_s16_m(pg, svsel_s16(pg, op1, svdup_n_s16(0)), op2)
+}
+#[doc = "Saturating subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsubr[_n_s16]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqsubr))]
+pub fn svqsubr_n_s16_z(pg: svbool_t, op1: svint16_t, op2: i16) -> svint16_t {
+    svqsubr_s16_z(pg, op1, svdup_n_s16(op2))
+}
+#[doc = "Saturating subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsubr[_s32]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqsubr))]
+pub fn svqsubr_s32_m(pg: svbool_t, op1: svint32_t, op2: svint32_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sqsubr.nxv4i32")]
+        fn _svqsubr_s32_m(pg: svbool4_t, op1: svint32_t, op2: svint32_t) -> svint32_t;
+    }
+    unsafe { _svqsubr_s32_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Saturating subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsubr[_n_s32]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqsubr))]
+pub fn svqsubr_n_s32_m(pg: svbool_t, op1: svint32_t, op2: i32) -> svint32_t {
+    svqsubr_s32_m(pg, op1, svdup_n_s32(op2))
+}
+#[doc = "Saturating subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsubr[_s32]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqsubr))]
+pub fn svqsubr_s32_x(pg: svbool_t, op1: svint32_t, op2: svint32_t) -> svint32_t {
+    svqsubr_s32_m(pg, op1, op2)
+}
+#[doc = "Saturating subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsubr[_n_s32]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqsubr))]
+pub fn svqsubr_n_s32_x(pg: svbool_t, op1: svint32_t, op2: i32) -> svint32_t {
+    svqsubr_s32_x(pg, op1, svdup_n_s32(op2))
+}
+#[doc = "Saturating subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsubr[_s32]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqsubr))]
+pub fn svqsubr_s32_z(pg: svbool_t, op1: svint32_t, op2: svint32_t) -> svint32_t {
+    svqsubr_s32_m(pg, svsel_s32(pg, op1, svdup_n_s32(0)), op2)
+}
+#[doc = "Saturating subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsubr[_n_s32]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqsubr))]
+pub fn svqsubr_n_s32_z(pg: svbool_t, op1: svint32_t, op2: i32) -> svint32_t {
+    svqsubr_s32_z(pg, op1, svdup_n_s32(op2))
+}
+#[doc = "Saturating subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsubr[_s64]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqsubr))]
+pub fn svqsubr_s64_m(pg: svbool_t, op1: svint64_t, op2: svint64_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sqsubr.nxv2i64")]
+        fn _svqsubr_s64_m(pg: svbool2_t, op1: svint64_t, op2: svint64_t) -> svint64_t;
+    }
+    unsafe { _svqsubr_s64_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Saturating subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsubr[_n_s64]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqsubr))]
+pub fn svqsubr_n_s64_m(pg: svbool_t, op1: svint64_t, op2: i64) -> svint64_t {
+    svqsubr_s64_m(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Saturating subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsubr[_s64]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqsubr))]
+pub fn svqsubr_s64_x(pg: svbool_t, op1: svint64_t, op2: svint64_t) -> svint64_t {
+    svqsubr_s64_m(pg, op1, op2)
+}
+#[doc = "Saturating subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsubr[_n_s64]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqsubr))]
+pub fn svqsubr_n_s64_x(pg: svbool_t, op1: svint64_t, op2: i64) -> svint64_t {
+    svqsubr_s64_x(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Saturating subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsubr[_s64]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqsubr))]
+pub fn svqsubr_s64_z(pg: svbool_t, op1: svint64_t, op2: svint64_t) -> svint64_t {
+    svqsubr_s64_m(pg, svsel_s64(pg, op1, svdup_n_s64(0)), op2)
+}
+#[doc = "Saturating subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsubr[_n_s64]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqsubr))]
+pub fn svqsubr_n_s64_z(pg: svbool_t, op1: svint64_t, op2: i64) -> svint64_t {
+    svqsubr_s64_z(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Saturating subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsubr[_u8]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqsubr))]
+pub fn svqsubr_u8_m(pg: svbool_t, op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uqsubr.nxv16i8")]
+        fn _svqsubr_u8_m(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t;
+    }
+    unsafe { _svqsubr_u8_m(pg, op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Saturating subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsubr[_n_u8]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqsubr))]
+pub fn svqsubr_n_u8_m(pg: svbool_t, op1: svuint8_t, op2: u8) -> svuint8_t {
+    svqsubr_u8_m(pg, op1, svdup_n_u8(op2))
+}
+#[doc = "Saturating subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsubr[_u8]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqsubr))]
+pub fn svqsubr_u8_x(pg: svbool_t, op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    svqsubr_u8_m(pg, op1, op2)
+}
+#[doc = "Saturating subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsubr[_n_u8]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqsubr))]
+pub fn svqsubr_n_u8_x(pg: svbool_t, op1: svuint8_t, op2: u8) -> svuint8_t {
+    svqsubr_u8_x(pg, op1, svdup_n_u8(op2))
+}
+#[doc = "Saturating subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsubr[_u8]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqsubr))]
+pub fn svqsubr_u8_z(pg: svbool_t, op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    svqsubr_u8_m(pg, svsel_u8(pg, op1, svdup_n_u8(0)), op2)
+}
+#[doc = "Saturating subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsubr[_n_u8]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqsubr))]
+pub fn svqsubr_n_u8_z(pg: svbool_t, op1: svuint8_t, op2: u8) -> svuint8_t {
+    svqsubr_u8_z(pg, op1, svdup_n_u8(op2))
+}
+#[doc = "Saturating subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsubr[_u16]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqsubr))]
+pub fn svqsubr_u16_m(pg: svbool_t, op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uqsubr.nxv8i16")]
+        fn _svqsubr_u16_m(pg: svbool8_t, op1: svint16_t, op2: svint16_t) -> svint16_t;
+    }
+    unsafe { _svqsubr_u16_m(pg.sve_into(), op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Saturating subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsubr[_n_u16]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqsubr))]
+pub fn svqsubr_n_u16_m(pg: svbool_t, op1: svuint16_t, op2: u16) -> svuint16_t {
+    svqsubr_u16_m(pg, op1, svdup_n_u16(op2))
+}
+#[doc = "Saturating subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsubr[_u16]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqsubr))]
+pub fn svqsubr_u16_x(pg: svbool_t, op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    svqsubr_u16_m(pg, op1, op2)
+}
+#[doc = "Saturating subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsubr[_n_u16]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqsubr))]
+pub fn svqsubr_n_u16_x(pg: svbool_t, op1: svuint16_t, op2: u16) -> svuint16_t {
+    svqsubr_u16_x(pg, op1, svdup_n_u16(op2))
+}
+#[doc = "Saturating subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsubr[_u16]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqsubr))]
+pub fn svqsubr_u16_z(pg: svbool_t, op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    svqsubr_u16_m(pg, svsel_u16(pg, op1, svdup_n_u16(0)), op2)
+}
+#[doc = "Saturating subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsubr[_n_u16]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqsubr))]
+pub fn svqsubr_n_u16_z(pg: svbool_t, op1: svuint16_t, op2: u16) -> svuint16_t {
+    svqsubr_u16_z(pg, op1, svdup_n_u16(op2))
+}
+#[doc = "Saturating subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsubr[_u32]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqsubr))]
+pub fn svqsubr_u32_m(pg: svbool_t, op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uqsubr.nxv4i32")]
+        fn _svqsubr_u32_m(pg: svbool4_t, op1: svint32_t, op2: svint32_t) -> svint32_t;
+    }
+    unsafe { _svqsubr_u32_m(pg.sve_into(), op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Saturating subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsubr[_n_u32]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqsubr))]
+pub fn svqsubr_n_u32_m(pg: svbool_t, op1: svuint32_t, op2: u32) -> svuint32_t {
+    svqsubr_u32_m(pg, op1, svdup_n_u32(op2))
+}
+#[doc = "Saturating subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsubr[_u32]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqsubr))]
+pub fn svqsubr_u32_x(pg: svbool_t, op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    svqsubr_u32_m(pg, op1, op2)
+}
+#[doc = "Saturating subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsubr[_n_u32]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqsubr))]
+pub fn svqsubr_n_u32_x(pg: svbool_t, op1: svuint32_t, op2: u32) -> svuint32_t {
+    svqsubr_u32_x(pg, op1, svdup_n_u32(op2))
+}
+#[doc = "Saturating subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsubr[_u32]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqsubr))]
+pub fn svqsubr_u32_z(pg: svbool_t, op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    svqsubr_u32_m(pg, svsel_u32(pg, op1, svdup_n_u32(0)), op2)
+}
+#[doc = "Saturating subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsubr[_n_u32]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqsubr))]
+pub fn svqsubr_n_u32_z(pg: svbool_t, op1: svuint32_t, op2: u32) -> svuint32_t {
+    svqsubr_u32_z(pg, op1, svdup_n_u32(op2))
+}
+#[doc = "Saturating subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsubr[_u64]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqsubr))]
+pub fn svqsubr_u64_m(pg: svbool_t, op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uqsubr.nxv2i64")]
+        fn _svqsubr_u64_m(pg: svbool2_t, op1: svint64_t, op2: svint64_t) -> svint64_t;
+    }
+    unsafe { _svqsubr_u64_m(pg.sve_into(), op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Saturating subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsubr[_n_u64]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqsubr))]
+pub fn svqsubr_n_u64_m(pg: svbool_t, op1: svuint64_t, op2: u64) -> svuint64_t {
+    svqsubr_u64_m(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Saturating subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsubr[_u64]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqsubr))]
+pub fn svqsubr_u64_x(pg: svbool_t, op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    svqsubr_u64_m(pg, op1, op2)
+}
+#[doc = "Saturating subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsubr[_n_u64]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqsubr))]
+pub fn svqsubr_n_u64_x(pg: svbool_t, op1: svuint64_t, op2: u64) -> svuint64_t {
+    svqsubr_u64_x(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Saturating subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsubr[_u64]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqsubr))]
+pub fn svqsubr_u64_z(pg: svbool_t, op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    svqsubr_u64_m(pg, svsel_u64(pg, op1, svdup_n_u64(0)), op2)
+}
+#[doc = "Saturating subtract reversed"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqsubr[_n_u64]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqsubr))]
+pub fn svqsubr_n_u64_z(pg: svbool_t, op1: svuint64_t, op2: u64) -> svuint64_t {
+    svqsubr_u64_z(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Saturating extract narrow (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqxtnb[_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqxtnb))]
+pub fn svqxtnb_s16(op: svint16_t) -> svint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sqxtnb.nxv8i16")]
+        fn _svqxtnb_s16(op: svint16_t) -> svint8_t;
+    }
+    unsafe { _svqxtnb_s16(op) }
+}
+#[doc = "Saturating extract narrow (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqxtnb[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqxtnb))]
+pub fn svqxtnb_s32(op: svint32_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sqxtnb.nxv4i32")]
+        fn _svqxtnb_s32(op: svint32_t) -> svint16_t;
+    }
+    unsafe { _svqxtnb_s32(op) }
+}
+#[doc = "Saturating extract narrow (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqxtnb[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqxtnb))]
+pub fn svqxtnb_s64(op: svint64_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sqxtnb.nxv2i64")]
+        fn _svqxtnb_s64(op: svint64_t) -> svint32_t;
+    }
+    unsafe { _svqxtnb_s64(op) }
+}
+#[doc = "Saturating extract narrow (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqxtnb[_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqxtnb))]
+pub fn svqxtnb_u16(op: svuint16_t) -> svuint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uqxtnb.nxv8i16")]
+        fn _svqxtnb_u16(op: svint16_t) -> svint8_t;
+    }
+    unsafe { _svqxtnb_u16(op.as_signed()).as_unsigned() }
+}
+#[doc = "Saturating extract narrow (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqxtnb[_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqxtnb))]
+pub fn svqxtnb_u32(op: svuint32_t) -> svuint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uqxtnb.nxv4i32")]
+        fn _svqxtnb_u32(op: svint32_t) -> svint16_t;
+    }
+    unsafe { _svqxtnb_u32(op.as_signed()).as_unsigned() }
+}
+#[doc = "Saturating extract narrow (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqxtnb[_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqxtnb))]
+pub fn svqxtnb_u64(op: svuint64_t) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uqxtnb.nxv2i64")]
+        fn _svqxtnb_u64(op: svint64_t) -> svint32_t;
+    }
+    unsafe { _svqxtnb_u64(op.as_signed()).as_unsigned() }
+}
+#[doc = "Saturating extract narrow (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqxtnt[_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqxtnt))]
+pub fn svqxtnt_s16(even: svint8_t, op: svint16_t) -> svint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sqxtnt.nxv8i16")]
+        fn _svqxtnt_s16(even: svint8_t, op: svint16_t) -> svint8_t;
+    }
+    unsafe { _svqxtnt_s16(even, op) }
+}
+#[doc = "Saturating extract narrow (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqxtnt[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqxtnt))]
+pub fn svqxtnt_s32(even: svint16_t, op: svint32_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sqxtnt.nxv4i32")]
+        fn _svqxtnt_s32(even: svint16_t, op: svint32_t) -> svint16_t;
+    }
+    unsafe { _svqxtnt_s32(even, op) }
+}
+#[doc = "Saturating extract narrow (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqxtnt[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqxtnt))]
+pub fn svqxtnt_s64(even: svint32_t, op: svint64_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sqxtnt.nxv2i64")]
+        fn _svqxtnt_s64(even: svint32_t, op: svint64_t) -> svint32_t;
+    }
+    unsafe { _svqxtnt_s64(even, op) }
+}
+#[doc = "Saturating extract narrow (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqxtnt[_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqxtnt))]
+pub fn svqxtnt_u16(even: svuint8_t, op: svuint16_t) -> svuint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uqxtnt.nxv8i16")]
+        fn _svqxtnt_u16(even: svint8_t, op: svint16_t) -> svint8_t;
+    }
+    unsafe { _svqxtnt_u16(even.as_signed(), op.as_signed()).as_unsigned() }
+}
+#[doc = "Saturating extract narrow (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqxtnt[_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqxtnt))]
+pub fn svqxtnt_u32(even: svuint16_t, op: svuint32_t) -> svuint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uqxtnt.nxv4i32")]
+        fn _svqxtnt_u32(even: svint16_t, op: svint32_t) -> svint16_t;
+    }
+    unsafe { _svqxtnt_u32(even.as_signed(), op.as_signed()).as_unsigned() }
+}
+#[doc = "Saturating extract narrow (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqxtnt[_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uqxtnt))]
+pub fn svqxtnt_u64(even: svuint32_t, op: svuint64_t) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.uqxtnt.nxv2i64")]
+        fn _svqxtnt_u64(even: svint32_t, op: svint64_t) -> svint32_t;
+    }
+    unsafe { _svqxtnt_u64(even.as_signed(), op.as_signed()).as_unsigned() }
+}
+#[doc = "Saturating extract unsigned narrow (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqxtunb[_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqxtunb))]
+pub fn svqxtunb_s16(op: svint16_t) -> svuint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqxtunb.nxv8i16"
+        )]
+        fn _svqxtunb_s16(op: svint16_t) -> svint8_t;
+    }
+    unsafe { _svqxtunb_s16(op).as_unsigned() }
+}
+#[doc = "Saturating extract unsigned narrow (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqxtunb[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqxtunb))]
+pub fn svqxtunb_s32(op: svint32_t) -> svuint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqxtunb.nxv4i32"
+        )]
+        fn _svqxtunb_s32(op: svint32_t) -> svint16_t;
+    }
+    unsafe { _svqxtunb_s32(op).as_unsigned() }
+}
+#[doc = "Saturating extract unsigned narrow (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqxtunb[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqxtunb))]
+pub fn svqxtunb_s64(op: svint64_t) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqxtunb.nxv2i64"
+        )]
+        fn _svqxtunb_s64(op: svint64_t) -> svint32_t;
+    }
+    unsafe { _svqxtunb_s64(op).as_unsigned() }
+}
+#[doc = "Saturating extract unsigned narrow (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqxtunt[_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqxtunt))]
+pub fn svqxtunt_s16(even: svuint8_t, op: svint16_t) -> svuint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqxtunt.nxv8i16"
+        )]
+        fn _svqxtunt_s16(even: svint8_t, op: svint16_t) -> svint8_t;
+    }
+    unsafe { _svqxtunt_s16(even.as_signed(), op).as_unsigned() }
+}
+#[doc = "Saturating extract unsigned narrow (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqxtunt[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqxtunt))]
+pub fn svqxtunt_s32(even: svuint16_t, op: svint32_t) -> svuint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqxtunt.nxv4i32"
+        )]
+        fn _svqxtunt_s32(even: svint16_t, op: svint32_t) -> svint16_t;
+    }
+    unsafe { _svqxtunt_s32(even.as_signed(), op).as_unsigned() }
+}
+#[doc = "Saturating extract unsigned narrow (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svqxtunt[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sqxtunt))]
+pub fn svqxtunt_s64(even: svuint32_t, op: svint64_t) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sqxtunt.nxv2i64"
+        )]
+        fn _svqxtunt_s64(even: svint32_t, op: svint64_t) -> svint32_t;
+    }
+    unsafe { _svqxtunt_s64(even.as_signed(), op).as_unsigned() }
+}
+#[doc = "Rounding add narrow high part (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svraddhnb[_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(raddhnb))]
+pub fn svraddhnb_s16(op1: svint16_t, op2: svint16_t) -> svint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.raddhnb.nxv8i16"
+        )]
+        fn _svraddhnb_s16(op1: svint16_t, op2: svint16_t) -> svint8_t;
+    }
+    unsafe { _svraddhnb_s16(op1, op2) }
+}
+#[doc = "Rounding add narrow high part (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svraddhnb[_n_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(raddhnb))]
+pub fn svraddhnb_n_s16(op1: svint16_t, op2: i16) -> svint8_t {
+    svraddhnb_s16(op1, svdup_n_s16(op2))
+}
+#[doc = "Rounding add narrow high part (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svraddhnb[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(raddhnb))]
+pub fn svraddhnb_s32(op1: svint32_t, op2: svint32_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.raddhnb.nxv4i32"
+        )]
+        fn _svraddhnb_s32(op1: svint32_t, op2: svint32_t) -> svint16_t;
+    }
+    unsafe { _svraddhnb_s32(op1, op2) }
+}
+#[doc = "Rounding add narrow high part (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svraddhnb[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(raddhnb))]
+pub fn svraddhnb_n_s32(op1: svint32_t, op2: i32) -> svint16_t {
+    svraddhnb_s32(op1, svdup_n_s32(op2))
+}
+#[doc = "Rounding add narrow high part (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svraddhnb[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(raddhnb))]
+pub fn svraddhnb_s64(op1: svint64_t, op2: svint64_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.raddhnb.nxv2i64"
+        )]
+        fn _svraddhnb_s64(op1: svint64_t, op2: svint64_t) -> svint32_t;
+    }
+    unsafe { _svraddhnb_s64(op1, op2) }
+}
+#[doc = "Rounding add narrow high part (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svraddhnb[_n_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(raddhnb))]
+pub fn svraddhnb_n_s64(op1: svint64_t, op2: i64) -> svint32_t {
+    svraddhnb_s64(op1, svdup_n_s64(op2))
+}
+#[doc = "Rounding add narrow high part (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svraddhnb[_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(raddhnb))]
+pub fn svraddhnb_u16(op1: svuint16_t, op2: svuint16_t) -> svuint8_t {
+    unsafe { svraddhnb_s16(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Rounding add narrow high part (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svraddhnb[_n_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(raddhnb))]
+pub fn svraddhnb_n_u16(op1: svuint16_t, op2: u16) -> svuint8_t {
+    svraddhnb_u16(op1, svdup_n_u16(op2))
+}
+#[doc = "Rounding add narrow high part (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svraddhnb[_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(raddhnb))]
+pub fn svraddhnb_u32(op1: svuint32_t, op2: svuint32_t) -> svuint16_t {
+    unsafe { svraddhnb_s32(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Rounding add narrow high part (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svraddhnb[_n_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(raddhnb))]
+pub fn svraddhnb_n_u32(op1: svuint32_t, op2: u32) -> svuint16_t {
+    svraddhnb_u32(op1, svdup_n_u32(op2))
+}
+#[doc = "Rounding add narrow high part (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svraddhnb[_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(raddhnb))]
+pub fn svraddhnb_u64(op1: svuint64_t, op2: svuint64_t) -> svuint32_t {
+    unsafe { svraddhnb_s64(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Rounding add narrow high part (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svraddhnb[_n_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(raddhnb))]
+pub fn svraddhnb_n_u64(op1: svuint64_t, op2: u64) -> svuint32_t {
+    svraddhnb_u64(op1, svdup_n_u64(op2))
+}
+#[doc = "Rounding add narrow high part (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svraddhnt[_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(raddhnt))]
+pub fn svraddhnt_s16(even: svint8_t, op1: svint16_t, op2: svint16_t) -> svint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.raddhnt.nxv8i16"
+        )]
+        fn _svraddhnt_s16(even: svint8_t, op1: svint16_t, op2: svint16_t) -> svint8_t;
+    }
+    unsafe { _svraddhnt_s16(even, op1, op2) }
+}
+#[doc = "Rounding add narrow high part (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svraddhnt[_n_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(raddhnt))]
+pub fn svraddhnt_n_s16(even: svint8_t, op1: svint16_t, op2: i16) -> svint8_t {
+    svraddhnt_s16(even, op1, svdup_n_s16(op2))
+}
+#[doc = "Rounding add narrow high part (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svraddhnt[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(raddhnt))]
+pub fn svraddhnt_s32(even: svint16_t, op1: svint32_t, op2: svint32_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.raddhnt.nxv4i32"
+        )]
+        fn _svraddhnt_s32(even: svint16_t, op1: svint32_t, op2: svint32_t) -> svint16_t;
+    }
+    unsafe { _svraddhnt_s32(even, op1, op2) }
+}
+#[doc = "Rounding add narrow high part (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svraddhnt[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(raddhnt))]
+pub fn svraddhnt_n_s32(even: svint16_t, op1: svint32_t, op2: i32) -> svint16_t {
+    svraddhnt_s32(even, op1, svdup_n_s32(op2))
+}
+#[doc = "Rounding add narrow high part (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svraddhnt[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(raddhnt))]
+pub fn svraddhnt_s64(even: svint32_t, op1: svint64_t, op2: svint64_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.raddhnt.nxv2i64"
+        )]
+        fn _svraddhnt_s64(even: svint32_t, op1: svint64_t, op2: svint64_t) -> svint32_t;
+    }
+    unsafe { _svraddhnt_s64(even, op1, op2) }
+}
+#[doc = "Rounding add narrow high part (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svraddhnt[_n_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(raddhnt))]
+pub fn svraddhnt_n_s64(even: svint32_t, op1: svint64_t, op2: i64) -> svint32_t {
+    svraddhnt_s64(even, op1, svdup_n_s64(op2))
+}
+#[doc = "Rounding add narrow high part (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svraddhnt[_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(raddhnt))]
+pub fn svraddhnt_u16(even: svuint8_t, op1: svuint16_t, op2: svuint16_t) -> svuint8_t {
+    unsafe { svraddhnt_s16(even.as_signed(), op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Rounding add narrow high part (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svraddhnt[_n_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(raddhnt))]
+pub fn svraddhnt_n_u16(even: svuint8_t, op1: svuint16_t, op2: u16) -> svuint8_t {
+    svraddhnt_u16(even, op1, svdup_n_u16(op2))
+}
+#[doc = "Rounding add narrow high part (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svraddhnt[_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(raddhnt))]
+pub fn svraddhnt_u32(even: svuint16_t, op1: svuint32_t, op2: svuint32_t) -> svuint16_t {
+    unsafe { svraddhnt_s32(even.as_signed(), op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Rounding add narrow high part (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svraddhnt[_n_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(raddhnt))]
+pub fn svraddhnt_n_u32(even: svuint16_t, op1: svuint32_t, op2: u32) -> svuint16_t {
+    svraddhnt_u32(even, op1, svdup_n_u32(op2))
+}
+#[doc = "Rounding add narrow high part (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svraddhnt[_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(raddhnt))]
+pub fn svraddhnt_u64(even: svuint32_t, op1: svuint64_t, op2: svuint64_t) -> svuint32_t {
+    unsafe { svraddhnt_s64(even.as_signed(), op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Rounding add narrow high part (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svraddhnt[_n_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(raddhnt))]
+pub fn svraddhnt_n_u64(even: svuint32_t, op1: svuint64_t, op2: u64) -> svuint32_t {
+    svraddhnt_u64(even, op1, svdup_n_u64(op2))
+}
+#[doc = "Bitwise rotate left by 1 and exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrax1[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2,sve2-sha3")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(rax1))]
+pub fn svrax1_s64(op1: svint64_t, op2: svint64_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.rax1")]
+        fn _svrax1_s64(op1: svint64_t, op2: svint64_t) -> svint64_t;
+    }
+    unsafe { _svrax1_s64(op1, op2) }
+}
+#[doc = "Bitwise rotate left by 1 and exclusive OR"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrax1[_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2,sve2-sha3")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(rax1))]
+pub fn svrax1_u64(op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    unsafe { svrax1_s64(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Reciprocal estimate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrecpe[_u32]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(urecpe))]
+pub fn svrecpe_u32_m(inactive: svuint32_t, pg: svbool_t, op: svuint32_t) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.urecpe.nxv4i32")]
+        fn _svrecpe_u32_m(inactive: svint32_t, pg: svbool4_t, op: svint32_t) -> svint32_t;
+    }
+    unsafe { _svrecpe_u32_m(inactive.as_signed(), pg.sve_into(), op.as_signed()).as_unsigned() }
+}
+#[doc = "Reciprocal estimate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrecpe[_u32]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(urecpe))]
+pub fn svrecpe_u32_x(pg: svbool_t, op: svuint32_t) -> svuint32_t {
+    svrecpe_u32_m(op, pg, op)
+}
+#[doc = "Reciprocal estimate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrecpe[_u32]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(urecpe))]
+pub fn svrecpe_u32_z(pg: svbool_t, op: svuint32_t) -> svuint32_t {
+    svrecpe_u32_m(svdup_n_u32(0), pg, op)
+}
+#[doc = "Rounding halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrhadd[_s8]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(srhadd))]
+pub fn svrhadd_s8_m(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.srhadd.nxv16i8")]
+        fn _svrhadd_s8_m(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t;
+    }
+    unsafe { _svrhadd_s8_m(pg, op1, op2) }
+}
+#[doc = "Rounding halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrhadd[_n_s8]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(srhadd))]
+pub fn svrhadd_n_s8_m(pg: svbool_t, op1: svint8_t, op2: i8) -> svint8_t {
+    svrhadd_s8_m(pg, op1, svdup_n_s8(op2))
+}
+#[doc = "Rounding halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrhadd[_s8]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(srhadd))]
+pub fn svrhadd_s8_x(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t {
+    svrhadd_s8_m(pg, op1, op2)
+}
+#[doc = "Rounding halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrhadd[_n_s8]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(srhadd))]
+pub fn svrhadd_n_s8_x(pg: svbool_t, op1: svint8_t, op2: i8) -> svint8_t {
+    svrhadd_s8_x(pg, op1, svdup_n_s8(op2))
+}
+#[doc = "Rounding halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrhadd[_s8]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(srhadd))]
+pub fn svrhadd_s8_z(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t {
+    svrhadd_s8_m(pg, svsel_s8(pg, op1, svdup_n_s8(0)), op2)
+}
+#[doc = "Rounding halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrhadd[_n_s8]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(srhadd))]
+pub fn svrhadd_n_s8_z(pg: svbool_t, op1: svint8_t, op2: i8) -> svint8_t {
+    svrhadd_s8_z(pg, op1, svdup_n_s8(op2))
+}
+#[doc = "Rounding halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrhadd[_s16]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(srhadd))]
+pub fn svrhadd_s16_m(pg: svbool_t, op1: svint16_t, op2: svint16_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.srhadd.nxv8i16")]
+        fn _svrhadd_s16_m(pg: svbool8_t, op1: svint16_t, op2: svint16_t) -> svint16_t;
+    }
+    unsafe { _svrhadd_s16_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Rounding halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrhadd[_n_s16]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(srhadd))]
+pub fn svrhadd_n_s16_m(pg: svbool_t, op1: svint16_t, op2: i16) -> svint16_t {
+    svrhadd_s16_m(pg, op1, svdup_n_s16(op2))
+}
+#[doc = "Rounding halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrhadd[_s16]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(srhadd))]
+pub fn svrhadd_s16_x(pg: svbool_t, op1: svint16_t, op2: svint16_t) -> svint16_t {
+    svrhadd_s16_m(pg, op1, op2)
+}
+#[doc = "Rounding halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrhadd[_n_s16]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(srhadd))]
+pub fn svrhadd_n_s16_x(pg: svbool_t, op1: svint16_t, op2: i16) -> svint16_t {
+    svrhadd_s16_x(pg, op1, svdup_n_s16(op2))
+}
+#[doc = "Rounding halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrhadd[_s16]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(srhadd))]
+pub fn svrhadd_s16_z(pg: svbool_t, op1: svint16_t, op2: svint16_t) -> svint16_t {
+    svrhadd_s16_m(pg, svsel_s16(pg, op1, svdup_n_s16(0)), op2)
+}
+#[doc = "Rounding halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrhadd[_n_s16]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(srhadd))]
+pub fn svrhadd_n_s16_z(pg: svbool_t, op1: svint16_t, op2: i16) -> svint16_t {
+    svrhadd_s16_z(pg, op1, svdup_n_s16(op2))
+}
+#[doc = "Rounding halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrhadd[_s32]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(srhadd))]
+pub fn svrhadd_s32_m(pg: svbool_t, op1: svint32_t, op2: svint32_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.srhadd.nxv4i32")]
+        fn _svrhadd_s32_m(pg: svbool4_t, op1: svint32_t, op2: svint32_t) -> svint32_t;
+    }
+    unsafe { _svrhadd_s32_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Rounding halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrhadd[_n_s32]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(srhadd))]
+pub fn svrhadd_n_s32_m(pg: svbool_t, op1: svint32_t, op2: i32) -> svint32_t {
+    svrhadd_s32_m(pg, op1, svdup_n_s32(op2))
+}
+#[doc = "Rounding halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrhadd[_s32]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(srhadd))]
+pub fn svrhadd_s32_x(pg: svbool_t, op1: svint32_t, op2: svint32_t) -> svint32_t {
+    svrhadd_s32_m(pg, op1, op2)
+}
+#[doc = "Rounding halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrhadd[_n_s32]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(srhadd))]
+pub fn svrhadd_n_s32_x(pg: svbool_t, op1: svint32_t, op2: i32) -> svint32_t {
+    svrhadd_s32_x(pg, op1, svdup_n_s32(op2))
+}
+#[doc = "Rounding halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrhadd[_s32]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(srhadd))]
+pub fn svrhadd_s32_z(pg: svbool_t, op1: svint32_t, op2: svint32_t) -> svint32_t {
+    svrhadd_s32_m(pg, svsel_s32(pg, op1, svdup_n_s32(0)), op2)
+}
+#[doc = "Rounding halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrhadd[_n_s32]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(srhadd))]
+pub fn svrhadd_n_s32_z(pg: svbool_t, op1: svint32_t, op2: i32) -> svint32_t {
+    svrhadd_s32_z(pg, op1, svdup_n_s32(op2))
+}
+#[doc = "Rounding halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrhadd[_s64]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(srhadd))]
+pub fn svrhadd_s64_m(pg: svbool_t, op1: svint64_t, op2: svint64_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.srhadd.nxv2i64")]
+        fn _svrhadd_s64_m(pg: svbool2_t, op1: svint64_t, op2: svint64_t) -> svint64_t;
+    }
+    unsafe { _svrhadd_s64_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Rounding halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrhadd[_n_s64]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(srhadd))]
+pub fn svrhadd_n_s64_m(pg: svbool_t, op1: svint64_t, op2: i64) -> svint64_t {
+    svrhadd_s64_m(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Rounding halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrhadd[_s64]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(srhadd))]
+pub fn svrhadd_s64_x(pg: svbool_t, op1: svint64_t, op2: svint64_t) -> svint64_t {
+    svrhadd_s64_m(pg, op1, op2)
+}
+#[doc = "Rounding halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrhadd[_n_s64]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(srhadd))]
+pub fn svrhadd_n_s64_x(pg: svbool_t, op1: svint64_t, op2: i64) -> svint64_t {
+    svrhadd_s64_x(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Rounding halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrhadd[_s64]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(srhadd))]
+pub fn svrhadd_s64_z(pg: svbool_t, op1: svint64_t, op2: svint64_t) -> svint64_t {
+    svrhadd_s64_m(pg, svsel_s64(pg, op1, svdup_n_s64(0)), op2)
+}
+#[doc = "Rounding halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrhadd[_n_s64]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(srhadd))]
+pub fn svrhadd_n_s64_z(pg: svbool_t, op1: svint64_t, op2: i64) -> svint64_t {
+    svrhadd_s64_z(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Rounding halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrhadd[_u8]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(urhadd))]
+pub fn svrhadd_u8_m(pg: svbool_t, op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.urhadd.nxv16i8")]
+        fn _svrhadd_u8_m(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t;
+    }
+    unsafe { _svrhadd_u8_m(pg, op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Rounding halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrhadd[_n_u8]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(urhadd))]
+pub fn svrhadd_n_u8_m(pg: svbool_t, op1: svuint8_t, op2: u8) -> svuint8_t {
+    svrhadd_u8_m(pg, op1, svdup_n_u8(op2))
+}
+#[doc = "Rounding halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrhadd[_u8]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(urhadd))]
+pub fn svrhadd_u8_x(pg: svbool_t, op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    svrhadd_u8_m(pg, op1, op2)
+}
+#[doc = "Rounding halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrhadd[_n_u8]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(urhadd))]
+pub fn svrhadd_n_u8_x(pg: svbool_t, op1: svuint8_t, op2: u8) -> svuint8_t {
+    svrhadd_u8_x(pg, op1, svdup_n_u8(op2))
+}
+#[doc = "Rounding halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrhadd[_u8]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(urhadd))]
+pub fn svrhadd_u8_z(pg: svbool_t, op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    svrhadd_u8_m(pg, svsel_u8(pg, op1, svdup_n_u8(0)), op2)
+}
+#[doc = "Rounding halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrhadd[_n_u8]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(urhadd))]
+pub fn svrhadd_n_u8_z(pg: svbool_t, op1: svuint8_t, op2: u8) -> svuint8_t {
+    svrhadd_u8_z(pg, op1, svdup_n_u8(op2))
+}
+#[doc = "Rounding halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrhadd[_u16]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(urhadd))]
+pub fn svrhadd_u16_m(pg: svbool_t, op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.urhadd.nxv8i16")]
+        fn _svrhadd_u16_m(pg: svbool8_t, op1: svint16_t, op2: svint16_t) -> svint16_t;
+    }
+    unsafe { _svrhadd_u16_m(pg.sve_into(), op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Rounding halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrhadd[_n_u16]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(urhadd))]
+pub fn svrhadd_n_u16_m(pg: svbool_t, op1: svuint16_t, op2: u16) -> svuint16_t {
+    svrhadd_u16_m(pg, op1, svdup_n_u16(op2))
+}
+#[doc = "Rounding halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrhadd[_u16]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(urhadd))]
+pub fn svrhadd_u16_x(pg: svbool_t, op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    svrhadd_u16_m(pg, op1, op2)
+}
+#[doc = "Rounding halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrhadd[_n_u16]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(urhadd))]
+pub fn svrhadd_n_u16_x(pg: svbool_t, op1: svuint16_t, op2: u16) -> svuint16_t {
+    svrhadd_u16_x(pg, op1, svdup_n_u16(op2))
+}
+#[doc = "Rounding halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrhadd[_u16]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(urhadd))]
+pub fn svrhadd_u16_z(pg: svbool_t, op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    svrhadd_u16_m(pg, svsel_u16(pg, op1, svdup_n_u16(0)), op2)
+}
+#[doc = "Rounding halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrhadd[_n_u16]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(urhadd))]
+pub fn svrhadd_n_u16_z(pg: svbool_t, op1: svuint16_t, op2: u16) -> svuint16_t {
+    svrhadd_u16_z(pg, op1, svdup_n_u16(op2))
+}
+#[doc = "Rounding halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrhadd[_u32]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(urhadd))]
+pub fn svrhadd_u32_m(pg: svbool_t, op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.urhadd.nxv4i32")]
+        fn _svrhadd_u32_m(pg: svbool4_t, op1: svint32_t, op2: svint32_t) -> svint32_t;
+    }
+    unsafe { _svrhadd_u32_m(pg.sve_into(), op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Rounding halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrhadd[_n_u32]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(urhadd))]
+pub fn svrhadd_n_u32_m(pg: svbool_t, op1: svuint32_t, op2: u32) -> svuint32_t {
+    svrhadd_u32_m(pg, op1, svdup_n_u32(op2))
+}
+#[doc = "Rounding halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrhadd[_u32]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(urhadd))]
+pub fn svrhadd_u32_x(pg: svbool_t, op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    svrhadd_u32_m(pg, op1, op2)
+}
+#[doc = "Rounding halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrhadd[_n_u32]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(urhadd))]
+pub fn svrhadd_n_u32_x(pg: svbool_t, op1: svuint32_t, op2: u32) -> svuint32_t {
+    svrhadd_u32_x(pg, op1, svdup_n_u32(op2))
+}
+#[doc = "Rounding halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrhadd[_u32]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(urhadd))]
+pub fn svrhadd_u32_z(pg: svbool_t, op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    svrhadd_u32_m(pg, svsel_u32(pg, op1, svdup_n_u32(0)), op2)
+}
+#[doc = "Rounding halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrhadd[_n_u32]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(urhadd))]
+pub fn svrhadd_n_u32_z(pg: svbool_t, op1: svuint32_t, op2: u32) -> svuint32_t {
+    svrhadd_u32_z(pg, op1, svdup_n_u32(op2))
+}
+#[doc = "Rounding halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrhadd[_u64]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(urhadd))]
+pub fn svrhadd_u64_m(pg: svbool_t, op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.urhadd.nxv2i64")]
+        fn _svrhadd_u64_m(pg: svbool2_t, op1: svint64_t, op2: svint64_t) -> svint64_t;
+    }
+    unsafe { _svrhadd_u64_m(pg.sve_into(), op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Rounding halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrhadd[_n_u64]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(urhadd))]
+pub fn svrhadd_n_u64_m(pg: svbool_t, op1: svuint64_t, op2: u64) -> svuint64_t {
+    svrhadd_u64_m(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Rounding halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrhadd[_u64]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(urhadd))]
+pub fn svrhadd_u64_x(pg: svbool_t, op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    svrhadd_u64_m(pg, op1, op2)
+}
+#[doc = "Rounding halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrhadd[_n_u64]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(urhadd))]
+pub fn svrhadd_n_u64_x(pg: svbool_t, op1: svuint64_t, op2: u64) -> svuint64_t {
+    svrhadd_u64_x(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Rounding halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrhadd[_u64]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(urhadd))]
+pub fn svrhadd_u64_z(pg: svbool_t, op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    svrhadd_u64_m(pg, svsel_u64(pg, op1, svdup_n_u64(0)), op2)
+}
+#[doc = "Rounding halving add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrhadd[_n_u64]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(urhadd))]
+pub fn svrhadd_n_u64_z(pg: svbool_t, op1: svuint64_t, op2: u64) -> svuint64_t {
+    svrhadd_u64_z(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrshl[_s8]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(srshl))]
+pub fn svrshl_s8_m(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.srshl.nxv16i8")]
+        fn _svrshl_s8_m(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t;
+    }
+    unsafe { _svrshl_s8_m(pg, op1, op2) }
+}
+#[doc = "Rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrshl[_n_s8]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(srshl))]
+pub fn svrshl_n_s8_m(pg: svbool_t, op1: svint8_t, op2: i8) -> svint8_t {
+    svrshl_s8_m(pg, op1, svdup_n_s8(op2))
+}
+#[doc = "Rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrshl[_s8]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(srshl))]
+pub fn svrshl_s8_x(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t {
+    svrshl_s8_m(pg, op1, op2)
+}
+#[doc = "Rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrshl[_n_s8]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(srshl))]
+pub fn svrshl_n_s8_x(pg: svbool_t, op1: svint8_t, op2: i8) -> svint8_t {
+    svrshl_s8_x(pg, op1, svdup_n_s8(op2))
+}
+#[doc = "Rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrshl[_s8]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(srshl))]
+pub fn svrshl_s8_z(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t {
+    svrshl_s8_m(pg, svsel_s8(pg, op1, svdup_n_s8(0)), op2)
+}
+#[doc = "Rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrshl[_n_s8]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(srshl))]
+pub fn svrshl_n_s8_z(pg: svbool_t, op1: svint8_t, op2: i8) -> svint8_t {
+    svrshl_s8_z(pg, op1, svdup_n_s8(op2))
+}
+#[doc = "Rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrshl[_s16]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(srshl))]
+pub fn svrshl_s16_m(pg: svbool_t, op1: svint16_t, op2: svint16_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.srshl.nxv8i16")]
+        fn _svrshl_s16_m(pg: svbool8_t, op1: svint16_t, op2: svint16_t) -> svint16_t;
+    }
+    unsafe { _svrshl_s16_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrshl[_n_s16]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(srshl))]
+pub fn svrshl_n_s16_m(pg: svbool_t, op1: svint16_t, op2: i16) -> svint16_t {
+    svrshl_s16_m(pg, op1, svdup_n_s16(op2))
+}
+#[doc = "Rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrshl[_s16]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(srshl))]
+pub fn svrshl_s16_x(pg: svbool_t, op1: svint16_t, op2: svint16_t) -> svint16_t {
+    svrshl_s16_m(pg, op1, op2)
+}
+#[doc = "Rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrshl[_n_s16]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(srshl))]
+pub fn svrshl_n_s16_x(pg: svbool_t, op1: svint16_t, op2: i16) -> svint16_t {
+    svrshl_s16_x(pg, op1, svdup_n_s16(op2))
+}
+#[doc = "Rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrshl[_s16]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(srshl))]
+pub fn svrshl_s16_z(pg: svbool_t, op1: svint16_t, op2: svint16_t) -> svint16_t {
+    svrshl_s16_m(pg, svsel_s16(pg, op1, svdup_n_s16(0)), op2)
+}
+#[doc = "Rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrshl[_n_s16]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(srshl))]
+pub fn svrshl_n_s16_z(pg: svbool_t, op1: svint16_t, op2: i16) -> svint16_t {
+    svrshl_s16_z(pg, op1, svdup_n_s16(op2))
+}
+#[doc = "Rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrshl[_s32]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(srshl))]
+pub fn svrshl_s32_m(pg: svbool_t, op1: svint32_t, op2: svint32_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.srshl.nxv4i32")]
+        fn _svrshl_s32_m(pg: svbool4_t, op1: svint32_t, op2: svint32_t) -> svint32_t;
+    }
+    unsafe { _svrshl_s32_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrshl[_n_s32]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(srshl))]
+pub fn svrshl_n_s32_m(pg: svbool_t, op1: svint32_t, op2: i32) -> svint32_t {
+    svrshl_s32_m(pg, op1, svdup_n_s32(op2))
+}
+#[doc = "Rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrshl[_s32]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(srshl))]
+pub fn svrshl_s32_x(pg: svbool_t, op1: svint32_t, op2: svint32_t) -> svint32_t {
+    svrshl_s32_m(pg, op1, op2)
+}
+#[doc = "Rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrshl[_n_s32]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(srshl))]
+pub fn svrshl_n_s32_x(pg: svbool_t, op1: svint32_t, op2: i32) -> svint32_t {
+    svrshl_s32_x(pg, op1, svdup_n_s32(op2))
+}
+#[doc = "Rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrshl[_s32]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(srshl))]
+pub fn svrshl_s32_z(pg: svbool_t, op1: svint32_t, op2: svint32_t) -> svint32_t {
+    svrshl_s32_m(pg, svsel_s32(pg, op1, svdup_n_s32(0)), op2)
+}
+#[doc = "Rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrshl[_n_s32]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(srshl))]
+pub fn svrshl_n_s32_z(pg: svbool_t, op1: svint32_t, op2: i32) -> svint32_t {
+    svrshl_s32_z(pg, op1, svdup_n_s32(op2))
+}
+#[doc = "Rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrshl[_s64]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(srshl))]
+pub fn svrshl_s64_m(pg: svbool_t, op1: svint64_t, op2: svint64_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.srshl.nxv2i64")]
+        fn _svrshl_s64_m(pg: svbool2_t, op1: svint64_t, op2: svint64_t) -> svint64_t;
+    }
+    unsafe { _svrshl_s64_m(pg.sve_into(), op1, op2) }
+}
+#[doc = "Rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrshl[_n_s64]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(srshl))]
+pub fn svrshl_n_s64_m(pg: svbool_t, op1: svint64_t, op2: i64) -> svint64_t {
+    svrshl_s64_m(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrshl[_s64]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(srshl))]
+pub fn svrshl_s64_x(pg: svbool_t, op1: svint64_t, op2: svint64_t) -> svint64_t {
+    svrshl_s64_m(pg, op1, op2)
+}
+#[doc = "Rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrshl[_n_s64]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(srshl))]
+pub fn svrshl_n_s64_x(pg: svbool_t, op1: svint64_t, op2: i64) -> svint64_t {
+    svrshl_s64_x(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrshl[_s64]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(srshl))]
+pub fn svrshl_s64_z(pg: svbool_t, op1: svint64_t, op2: svint64_t) -> svint64_t {
+    svrshl_s64_m(pg, svsel_s64(pg, op1, svdup_n_s64(0)), op2)
+}
+#[doc = "Rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrshl[_n_s64]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(srshl))]
+pub fn svrshl_n_s64_z(pg: svbool_t, op1: svint64_t, op2: i64) -> svint64_t {
+    svrshl_s64_z(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrshl[_u8]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(urshl))]
+pub fn svrshl_u8_m(pg: svbool_t, op1: svuint8_t, op2: svint8_t) -> svuint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.urshl.nxv16i8")]
+        fn _svrshl_u8_m(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t;
+    }
+    unsafe { _svrshl_u8_m(pg, op1.as_signed(), op2).as_unsigned() }
+}
+#[doc = "Rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrshl[_n_u8]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(urshl))]
+pub fn svrshl_n_u8_m(pg: svbool_t, op1: svuint8_t, op2: i8) -> svuint8_t {
+    svrshl_u8_m(pg, op1, svdup_n_s8(op2))
+}
+#[doc = "Rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrshl[_u8]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(urshl))]
+pub fn svrshl_u8_x(pg: svbool_t, op1: svuint8_t, op2: svint8_t) -> svuint8_t {
+    svrshl_u8_m(pg, op1, op2)
+}
+#[doc = "Rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrshl[_n_u8]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(urshl))]
+pub fn svrshl_n_u8_x(pg: svbool_t, op1: svuint8_t, op2: i8) -> svuint8_t {
+    svrshl_u8_x(pg, op1, svdup_n_s8(op2))
+}
+#[doc = "Rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrshl[_u8]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(urshl))]
+pub fn svrshl_u8_z(pg: svbool_t, op1: svuint8_t, op2: svint8_t) -> svuint8_t {
+    svrshl_u8_m(pg, svsel_u8(pg, op1, svdup_n_u8(0)), op2)
+}
+#[doc = "Rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrshl[_n_u8]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(urshl))]
+pub fn svrshl_n_u8_z(pg: svbool_t, op1: svuint8_t, op2: i8) -> svuint8_t {
+    svrshl_u8_z(pg, op1, svdup_n_s8(op2))
+}
+#[doc = "Rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrshl[_u16]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(urshl))]
+pub fn svrshl_u16_m(pg: svbool_t, op1: svuint16_t, op2: svint16_t) -> svuint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.urshl.nxv8i16")]
+        fn _svrshl_u16_m(pg: svbool8_t, op1: svint16_t, op2: svint16_t) -> svint16_t;
+    }
+    unsafe { _svrshl_u16_m(pg.sve_into(), op1.as_signed(), op2).as_unsigned() }
+}
+#[doc = "Rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrshl[_n_u16]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(urshl))]
+pub fn svrshl_n_u16_m(pg: svbool_t, op1: svuint16_t, op2: i16) -> svuint16_t {
+    svrshl_u16_m(pg, op1, svdup_n_s16(op2))
+}
+#[doc = "Rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrshl[_u16]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(urshl))]
+pub fn svrshl_u16_x(pg: svbool_t, op1: svuint16_t, op2: svint16_t) -> svuint16_t {
+    svrshl_u16_m(pg, op1, op2)
+}
+#[doc = "Rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrshl[_n_u16]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(urshl))]
+pub fn svrshl_n_u16_x(pg: svbool_t, op1: svuint16_t, op2: i16) -> svuint16_t {
+    svrshl_u16_x(pg, op1, svdup_n_s16(op2))
+}
+#[doc = "Rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrshl[_u16]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(urshl))]
+pub fn svrshl_u16_z(pg: svbool_t, op1: svuint16_t, op2: svint16_t) -> svuint16_t {
+    svrshl_u16_m(pg, svsel_u16(pg, op1, svdup_n_u16(0)), op2)
+}
+#[doc = "Rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrshl[_n_u16]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(urshl))]
+pub fn svrshl_n_u16_z(pg: svbool_t, op1: svuint16_t, op2: i16) -> svuint16_t {
+    svrshl_u16_z(pg, op1, svdup_n_s16(op2))
+}
+#[doc = "Rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrshl[_u32]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(urshl))]
+pub fn svrshl_u32_m(pg: svbool_t, op1: svuint32_t, op2: svint32_t) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.urshl.nxv4i32")]
+        fn _svrshl_u32_m(pg: svbool4_t, op1: svint32_t, op2: svint32_t) -> svint32_t;
+    }
+    unsafe { _svrshl_u32_m(pg.sve_into(), op1.as_signed(), op2).as_unsigned() }
+}
+#[doc = "Rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrshl[_n_u32]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(urshl))]
+pub fn svrshl_n_u32_m(pg: svbool_t, op1: svuint32_t, op2: i32) -> svuint32_t {
+    svrshl_u32_m(pg, op1, svdup_n_s32(op2))
+}
+#[doc = "Rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrshl[_u32]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(urshl))]
+pub fn svrshl_u32_x(pg: svbool_t, op1: svuint32_t, op2: svint32_t) -> svuint32_t {
+    svrshl_u32_m(pg, op1, op2)
+}
+#[doc = "Rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrshl[_n_u32]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(urshl))]
+pub fn svrshl_n_u32_x(pg: svbool_t, op1: svuint32_t, op2: i32) -> svuint32_t {
+    svrshl_u32_x(pg, op1, svdup_n_s32(op2))
+}
+#[doc = "Rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrshl[_u32]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(urshl))]
+pub fn svrshl_u32_z(pg: svbool_t, op1: svuint32_t, op2: svint32_t) -> svuint32_t {
+    svrshl_u32_m(pg, svsel_u32(pg, op1, svdup_n_u32(0)), op2)
+}
+#[doc = "Rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrshl[_n_u32]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(urshl))]
+pub fn svrshl_n_u32_z(pg: svbool_t, op1: svuint32_t, op2: i32) -> svuint32_t {
+    svrshl_u32_z(pg, op1, svdup_n_s32(op2))
+}
+#[doc = "Rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrshl[_u64]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(urshl))]
+pub fn svrshl_u64_m(pg: svbool_t, op1: svuint64_t, op2: svint64_t) -> svuint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.urshl.nxv2i64")]
+        fn _svrshl_u64_m(pg: svbool2_t, op1: svint64_t, op2: svint64_t) -> svint64_t;
+    }
+    unsafe { _svrshl_u64_m(pg.sve_into(), op1.as_signed(), op2).as_unsigned() }
+}
+#[doc = "Rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrshl[_n_u64]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(urshl))]
+pub fn svrshl_n_u64_m(pg: svbool_t, op1: svuint64_t, op2: i64) -> svuint64_t {
+    svrshl_u64_m(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrshl[_u64]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(urshl))]
+pub fn svrshl_u64_x(pg: svbool_t, op1: svuint64_t, op2: svint64_t) -> svuint64_t {
+    svrshl_u64_m(pg, op1, op2)
+}
+#[doc = "Rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrshl[_n_u64]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(urshl))]
+pub fn svrshl_n_u64_x(pg: svbool_t, op1: svuint64_t, op2: i64) -> svuint64_t {
+    svrshl_u64_x(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrshl[_u64]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(urshl))]
+pub fn svrshl_u64_z(pg: svbool_t, op1: svuint64_t, op2: svint64_t) -> svuint64_t {
+    svrshl_u64_m(pg, svsel_u64(pg, op1, svdup_n_u64(0)), op2)
+}
+#[doc = "Rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrshl[_n_u64]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(urshl))]
+pub fn svrshl_n_u64_z(pg: svbool_t, op1: svuint64_t, op2: i64) -> svuint64_t {
+    svrshl_u64_z(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Rounding shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrshr[_n_s8]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(srshr, IMM2 = 1))]
+pub fn svrshr_n_s8_m<const IMM2: i32>(pg: svbool_t, op1: svint8_t) -> svint8_t {
+    static_assert_range!(IMM2, 1..=8);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.srshr.nxv16i8")]
+        fn _svrshr_n_s8_m(pg: svbool_t, op1: svint8_t, imm2: i32) -> svint8_t;
+    }
+    unsafe { _svrshr_n_s8_m(pg, op1, IMM2) }
+}
+#[doc = "Rounding shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrshr[_n_s8]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(srshr, IMM2 = 1))]
+pub fn svrshr_n_s8_x<const IMM2: i32>(pg: svbool_t, op1: svint8_t) -> svint8_t {
+    svrshr_n_s8_m::<IMM2>(pg, op1)
+}
+#[doc = "Rounding shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrshr[_n_s8]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(srshr, IMM2 = 1))]
+pub fn svrshr_n_s8_z<const IMM2: i32>(pg: svbool_t, op1: svint8_t) -> svint8_t {
+    svrshr_n_s8_m::<IMM2>(pg, svsel_s8(pg, op1, svdup_n_s8(0)))
+}
+#[doc = "Rounding shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrshr[_n_s16]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(srshr, IMM2 = 1))]
+pub fn svrshr_n_s16_m<const IMM2: i32>(pg: svbool_t, op1: svint16_t) -> svint16_t {
+    static_assert_range!(IMM2, 1..=16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.srshr.nxv8i16")]
+        fn _svrshr_n_s16_m(pg: svbool8_t, op1: svint16_t, imm2: i32) -> svint16_t;
+    }
+    unsafe { _svrshr_n_s16_m(pg.sve_into(), op1, IMM2) }
+}
+#[doc = "Rounding shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrshr[_n_s16]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(srshr, IMM2 = 1))]
+pub fn svrshr_n_s16_x<const IMM2: i32>(pg: svbool_t, op1: svint16_t) -> svint16_t {
+    svrshr_n_s16_m::<IMM2>(pg, op1)
+}
+#[doc = "Rounding shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrshr[_n_s16]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(srshr, IMM2 = 1))]
+pub fn svrshr_n_s16_z<const IMM2: i32>(pg: svbool_t, op1: svint16_t) -> svint16_t {
+    svrshr_n_s16_m::<IMM2>(pg, svsel_s16(pg, op1, svdup_n_s16(0)))
+}
+#[doc = "Rounding shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrshr[_n_s32]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(srshr, IMM2 = 1))]
+pub fn svrshr_n_s32_m<const IMM2: i32>(pg: svbool_t, op1: svint32_t) -> svint32_t {
+    static_assert_range!(IMM2, 1..=32);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.srshr.nxv4i32")]
+        fn _svrshr_n_s32_m(pg: svbool4_t, op1: svint32_t, imm2: i32) -> svint32_t;
+    }
+    unsafe { _svrshr_n_s32_m(pg.sve_into(), op1, IMM2) }
+}
+#[doc = "Rounding shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrshr[_n_s32]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(srshr, IMM2 = 1))]
+pub fn svrshr_n_s32_x<const IMM2: i32>(pg: svbool_t, op1: svint32_t) -> svint32_t {
+    svrshr_n_s32_m::<IMM2>(pg, op1)
+}
+#[doc = "Rounding shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrshr[_n_s32]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(srshr, IMM2 = 1))]
+pub fn svrshr_n_s32_z<const IMM2: i32>(pg: svbool_t, op1: svint32_t) -> svint32_t {
+    svrshr_n_s32_m::<IMM2>(pg, svsel_s32(pg, op1, svdup_n_s32(0)))
+}
+#[doc = "Rounding shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrshr[_n_s64]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(srshr, IMM2 = 1))]
+pub fn svrshr_n_s64_m<const IMM2: i32>(pg: svbool_t, op1: svint64_t) -> svint64_t {
+    static_assert_range!(IMM2, 1..=64);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.srshr.nxv2i64")]
+        fn _svrshr_n_s64_m(pg: svbool2_t, op1: svint64_t, imm2: i32) -> svint64_t;
+    }
+    unsafe { _svrshr_n_s64_m(pg.sve_into(), op1, IMM2) }
+}
+#[doc = "Rounding shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrshr[_n_s64]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(srshr, IMM2 = 1))]
+pub fn svrshr_n_s64_x<const IMM2: i32>(pg: svbool_t, op1: svint64_t) -> svint64_t {
+    svrshr_n_s64_m::<IMM2>(pg, op1)
+}
+#[doc = "Rounding shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrshr[_n_s64]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(srshr, IMM2 = 1))]
+pub fn svrshr_n_s64_z<const IMM2: i32>(pg: svbool_t, op1: svint64_t) -> svint64_t {
+    svrshr_n_s64_m::<IMM2>(pg, svsel_s64(pg, op1, svdup_n_s64(0)))
+}
+#[doc = "Rounding shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrshr[_n_u8]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(urshr, IMM2 = 1))]
+pub fn svrshr_n_u8_m<const IMM2: i32>(pg: svbool_t, op1: svuint8_t) -> svuint8_t {
+    static_assert_range!(IMM2, 1..=8);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.urshr.nxv16i8")]
+        fn _svrshr_n_u8_m(pg: svbool_t, op1: svint8_t, imm2: i32) -> svint8_t;
+    }
+    unsafe { _svrshr_n_u8_m(pg, op1.as_signed(), IMM2).as_unsigned() }
+}
+#[doc = "Rounding shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrshr[_n_u8]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(urshr, IMM2 = 1))]
+pub fn svrshr_n_u8_x<const IMM2: i32>(pg: svbool_t, op1: svuint8_t) -> svuint8_t {
+    svrshr_n_u8_m::<IMM2>(pg, op1)
+}
+#[doc = "Rounding shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrshr[_n_u8]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(urshr, IMM2 = 1))]
+pub fn svrshr_n_u8_z<const IMM2: i32>(pg: svbool_t, op1: svuint8_t) -> svuint8_t {
+    svrshr_n_u8_m::<IMM2>(pg, svsel_u8(pg, op1, svdup_n_u8(0)))
+}
+#[doc = "Rounding shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrshr[_n_u16]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(urshr, IMM2 = 1))]
+pub fn svrshr_n_u16_m<const IMM2: i32>(pg: svbool_t, op1: svuint16_t) -> svuint16_t {
+    static_assert_range!(IMM2, 1..=16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.urshr.nxv8i16")]
+        fn _svrshr_n_u16_m(pg: svbool8_t, op1: svint16_t, imm2: i32) -> svint16_t;
+    }
+    unsafe { _svrshr_n_u16_m(pg.sve_into(), op1.as_signed(), IMM2).as_unsigned() }
+}
+#[doc = "Rounding shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrshr[_n_u16]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(urshr, IMM2 = 1))]
+pub fn svrshr_n_u16_x<const IMM2: i32>(pg: svbool_t, op1: svuint16_t) -> svuint16_t {
+    svrshr_n_u16_m::<IMM2>(pg, op1)
+}
+#[doc = "Rounding shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrshr[_n_u16]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(urshr, IMM2 = 1))]
+pub fn svrshr_n_u16_z<const IMM2: i32>(pg: svbool_t, op1: svuint16_t) -> svuint16_t {
+    svrshr_n_u16_m::<IMM2>(pg, svsel_u16(pg, op1, svdup_n_u16(0)))
+}
+#[doc = "Rounding shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrshr[_n_u32]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(urshr, IMM2 = 1))]
+pub fn svrshr_n_u32_m<const IMM2: i32>(pg: svbool_t, op1: svuint32_t) -> svuint32_t {
+    static_assert_range!(IMM2, 1..=32);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.urshr.nxv4i32")]
+        fn _svrshr_n_u32_m(pg: svbool4_t, op1: svint32_t, imm2: i32) -> svint32_t;
+    }
+    unsafe { _svrshr_n_u32_m(pg.sve_into(), op1.as_signed(), IMM2).as_unsigned() }
+}
+#[doc = "Rounding shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrshr[_n_u32]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(urshr, IMM2 = 1))]
+pub fn svrshr_n_u32_x<const IMM2: i32>(pg: svbool_t, op1: svuint32_t) -> svuint32_t {
+    svrshr_n_u32_m::<IMM2>(pg, op1)
+}
+#[doc = "Rounding shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrshr[_n_u32]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(urshr, IMM2 = 1))]
+pub fn svrshr_n_u32_z<const IMM2: i32>(pg: svbool_t, op1: svuint32_t) -> svuint32_t {
+    svrshr_n_u32_m::<IMM2>(pg, svsel_u32(pg, op1, svdup_n_u32(0)))
+}
+#[doc = "Rounding shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrshr[_n_u64]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(urshr, IMM2 = 1))]
+pub fn svrshr_n_u64_m<const IMM2: i32>(pg: svbool_t, op1: svuint64_t) -> svuint64_t {
+    static_assert_range!(IMM2, 1..=64);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.urshr.nxv2i64")]
+        fn _svrshr_n_u64_m(pg: svbool2_t, op1: svint64_t, imm2: i32) -> svint64_t;
+    }
+    unsafe { _svrshr_n_u64_m(pg.sve_into(), op1.as_signed(), IMM2).as_unsigned() }
+}
+#[doc = "Rounding shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrshr[_n_u64]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(urshr, IMM2 = 1))]
+pub fn svrshr_n_u64_x<const IMM2: i32>(pg: svbool_t, op1: svuint64_t) -> svuint64_t {
+    svrshr_n_u64_m::<IMM2>(pg, op1)
+}
+#[doc = "Rounding shift right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrshr[_n_u64]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(urshr, IMM2 = 1))]
+pub fn svrshr_n_u64_z<const IMM2: i32>(pg: svbool_t, op1: svuint64_t) -> svuint64_t {
+    svrshr_n_u64_m::<IMM2>(pg, svsel_u64(pg, op1, svdup_n_u64(0)))
+}
+#[doc = "Rounding shift right narrow (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrshrnb[_n_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(rshrnb, IMM2 = 1))]
+pub fn svrshrnb_n_s16<const IMM2: i32>(op1: svint16_t) -> svint8_t {
+    static_assert_range!(IMM2, 1..=8);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.rshrnb.nxv8i16")]
+        fn _svrshrnb_n_s16(op1: svint16_t, imm2: i32) -> svint8_t;
+    }
+    unsafe { _svrshrnb_n_s16(op1, IMM2) }
+}
+#[doc = "Rounding shift right narrow (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrshrnb[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(rshrnb, IMM2 = 1))]
+pub fn svrshrnb_n_s32<const IMM2: i32>(op1: svint32_t) -> svint16_t {
+    static_assert_range!(IMM2, 1..=16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.rshrnb.nxv4i32")]
+        fn _svrshrnb_n_s32(op1: svint32_t, imm2: i32) -> svint16_t;
+    }
+    unsafe { _svrshrnb_n_s32(op1, IMM2) }
+}
+#[doc = "Rounding shift right narrow (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrshrnb[_n_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(rshrnb, IMM2 = 1))]
+pub fn svrshrnb_n_s64<const IMM2: i32>(op1: svint64_t) -> svint32_t {
+    static_assert_range!(IMM2, 1..=32);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.rshrnb.nxv2i64")]
+        fn _svrshrnb_n_s64(op1: svint64_t, imm2: i32) -> svint32_t;
+    }
+    unsafe { _svrshrnb_n_s64(op1, IMM2) }
+}
+#[doc = "Rounding shift right narrow (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrshrnb[_n_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(rshrnb, IMM2 = 1))]
+pub fn svrshrnb_n_u16<const IMM2: i32>(op1: svuint16_t) -> svuint8_t {
+    static_assert_range!(IMM2, 1..=8);
+    unsafe { svrshrnb_n_s16::<IMM2>(op1.as_signed()).as_unsigned() }
+}
+#[doc = "Rounding shift right narrow (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrshrnb[_n_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(rshrnb, IMM2 = 1))]
+pub fn svrshrnb_n_u32<const IMM2: i32>(op1: svuint32_t) -> svuint16_t {
+    static_assert_range!(IMM2, 1..=16);
+    unsafe { svrshrnb_n_s32::<IMM2>(op1.as_signed()).as_unsigned() }
+}
+#[doc = "Rounding shift right narrow (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrshrnb[_n_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(rshrnb, IMM2 = 1))]
+pub fn svrshrnb_n_u64<const IMM2: i32>(op1: svuint64_t) -> svuint32_t {
+    static_assert_range!(IMM2, 1..=32);
+    unsafe { svrshrnb_n_s64::<IMM2>(op1.as_signed()).as_unsigned() }
+}
+#[doc = "Rounding shift right narrow (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrshrnt[_n_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(rshrnt, IMM2 = 1))]
+pub fn svrshrnt_n_s16<const IMM2: i32>(even: svint8_t, op1: svint16_t) -> svint8_t {
+    static_assert_range!(IMM2, 1..=8);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.rshrnt.nxv8i16")]
+        fn _svrshrnt_n_s16(even: svint8_t, op1: svint16_t, imm2: i32) -> svint8_t;
+    }
+    unsafe { _svrshrnt_n_s16(even, op1, IMM2) }
+}
+#[doc = "Rounding shift right narrow (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrshrnt[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(rshrnt, IMM2 = 1))]
+pub fn svrshrnt_n_s32<const IMM2: i32>(even: svint16_t, op1: svint32_t) -> svint16_t {
+    static_assert_range!(IMM2, 1..=16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.rshrnt.nxv4i32")]
+        fn _svrshrnt_n_s32(even: svint16_t, op1: svint32_t, imm2: i32) -> svint16_t;
+    }
+    unsafe { _svrshrnt_n_s32(even, op1, IMM2) }
+}
+#[doc = "Rounding shift right narrow (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrshrnt[_n_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(rshrnt, IMM2 = 1))]
+pub fn svrshrnt_n_s64<const IMM2: i32>(even: svint32_t, op1: svint64_t) -> svint32_t {
+    static_assert_range!(IMM2, 1..=32);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.rshrnt.nxv2i64")]
+        fn _svrshrnt_n_s64(even: svint32_t, op1: svint64_t, imm2: i32) -> svint32_t;
+    }
+    unsafe { _svrshrnt_n_s64(even, op1, IMM2) }
+}
+#[doc = "Rounding shift right narrow (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrshrnt[_n_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(rshrnt, IMM2 = 1))]
+pub fn svrshrnt_n_u16<const IMM2: i32>(even: svuint8_t, op1: svuint16_t) -> svuint8_t {
+    static_assert_range!(IMM2, 1..=8);
+    unsafe { svrshrnt_n_s16::<IMM2>(even.as_signed(), op1.as_signed()).as_unsigned() }
+}
+#[doc = "Rounding shift right narrow (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrshrnt[_n_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(rshrnt, IMM2 = 1))]
+pub fn svrshrnt_n_u32<const IMM2: i32>(even: svuint16_t, op1: svuint32_t) -> svuint16_t {
+    static_assert_range!(IMM2, 1..=16);
+    unsafe { svrshrnt_n_s32::<IMM2>(even.as_signed(), op1.as_signed()).as_unsigned() }
+}
+#[doc = "Rounding shift right narrow (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrshrnt[_n_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(rshrnt, IMM2 = 1))]
+pub fn svrshrnt_n_u64<const IMM2: i32>(even: svuint32_t, op1: svuint64_t) -> svuint32_t {
+    static_assert_range!(IMM2, 1..=32);
+    unsafe { svrshrnt_n_s64::<IMM2>(even.as_signed(), op1.as_signed()).as_unsigned() }
+}
+#[doc = "Reciprocal square root estimate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrsqrte[_u32]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ursqrte))]
+pub fn svrsqrte_u32_m(inactive: svuint32_t, pg: svbool_t, op: svuint32_t) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ursqrte.nxv4i32"
+        )]
+        fn _svrsqrte_u32_m(inactive: svint32_t, pg: svbool4_t, op: svint32_t) -> svint32_t;
+    }
+    unsafe { _svrsqrte_u32_m(inactive.as_signed(), pg.sve_into(), op.as_signed()).as_unsigned() }
+}
+#[doc = "Reciprocal square root estimate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrsqrte[_u32]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ursqrte))]
+pub fn svrsqrte_u32_x(pg: svbool_t, op: svuint32_t) -> svuint32_t {
+    svrsqrte_u32_m(op, pg, op)
+}
+#[doc = "Reciprocal square root estimate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrsqrte[_u32]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ursqrte))]
+pub fn svrsqrte_u32_z(pg: svbool_t, op: svuint32_t) -> svuint32_t {
+    svrsqrte_u32_m(svdup_n_u32(0), pg, op)
+}
+#[doc = "Rounding shift right and accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrsra[_n_s8])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(srsra, IMM3 = 1))]
+pub fn svrsra_n_s8<const IMM3: i32>(op1: svint8_t, op2: svint8_t) -> svint8_t {
+    static_assert_range!(IMM3, 1..=8);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.srsra.nxv16i8")]
+        fn _svrsra_n_s8(op1: svint8_t, op2: svint8_t, imm3: i32) -> svint8_t;
+    }
+    unsafe { _svrsra_n_s8(op1, op2, IMM3) }
+}
+#[doc = "Rounding shift right and accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrsra[_n_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(srsra, IMM3 = 1))]
+pub fn svrsra_n_s16<const IMM3: i32>(op1: svint16_t, op2: svint16_t) -> svint16_t {
+    static_assert_range!(IMM3, 1..=16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.srsra.nxv8i16")]
+        fn _svrsra_n_s16(op1: svint16_t, op2: svint16_t, imm3: i32) -> svint16_t;
+    }
+    unsafe { _svrsra_n_s16(op1, op2, IMM3) }
+}
+#[doc = "Rounding shift right and accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrsra[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(srsra, IMM3 = 1))]
+pub fn svrsra_n_s32<const IMM3: i32>(op1: svint32_t, op2: svint32_t) -> svint32_t {
+    static_assert_range!(IMM3, 1..=32);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.srsra.nxv4i32")]
+        fn _svrsra_n_s32(op1: svint32_t, op2: svint32_t, imm3: i32) -> svint32_t;
+    }
+    unsafe { _svrsra_n_s32(op1, op2, IMM3) }
+}
+#[doc = "Rounding shift right and accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrsra[_n_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(srsra, IMM3 = 1))]
+pub fn svrsra_n_s64<const IMM3: i32>(op1: svint64_t, op2: svint64_t) -> svint64_t {
+    static_assert_range!(IMM3, 1..=64);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.srsra.nxv2i64")]
+        fn _svrsra_n_s64(op1: svint64_t, op2: svint64_t, imm3: i32) -> svint64_t;
+    }
+    unsafe { _svrsra_n_s64(op1, op2, IMM3) }
+}
+#[doc = "Rounding shift right and accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrsra[_n_u8])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ursra, IMM3 = 1))]
+pub fn svrsra_n_u8<const IMM3: i32>(op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    static_assert_range!(IMM3, 1..=8);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ursra.nxv16i8")]
+        fn _svrsra_n_u8(op1: svint8_t, op2: svint8_t, imm3: i32) -> svint8_t;
+    }
+    unsafe { _svrsra_n_u8(op1.as_signed(), op2.as_signed(), IMM3).as_unsigned() }
+}
+#[doc = "Rounding shift right and accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrsra[_n_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ursra, IMM3 = 1))]
+pub fn svrsra_n_u16<const IMM3: i32>(op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    static_assert_range!(IMM3, 1..=16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ursra.nxv8i16")]
+        fn _svrsra_n_u16(op1: svint16_t, op2: svint16_t, imm3: i32) -> svint16_t;
+    }
+    unsafe { _svrsra_n_u16(op1.as_signed(), op2.as_signed(), IMM3).as_unsigned() }
+}
+#[doc = "Rounding shift right and accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrsra[_n_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ursra, IMM3 = 1))]
+pub fn svrsra_n_u32<const IMM3: i32>(op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    static_assert_range!(IMM3, 1..=32);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ursra.nxv4i32")]
+        fn _svrsra_n_u32(op1: svint32_t, op2: svint32_t, imm3: i32) -> svint32_t;
+    }
+    unsafe { _svrsra_n_u32(op1.as_signed(), op2.as_signed(), IMM3).as_unsigned() }
+}
+#[doc = "Rounding shift right and accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrsra[_n_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ursra, IMM3 = 1))]
+pub fn svrsra_n_u64<const IMM3: i32>(op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    static_assert_range!(IMM3, 1..=64);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ursra.nxv2i64")]
+        fn _svrsra_n_u64(op1: svint64_t, op2: svint64_t, imm3: i32) -> svint64_t;
+    }
+    unsafe { _svrsra_n_u64(op1.as_signed(), op2.as_signed(), IMM3).as_unsigned() }
+}
+#[doc = "Rounding subtract narrow high part (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrsubhnb[_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(rsubhnb))]
+pub fn svrsubhnb_s16(op1: svint16_t, op2: svint16_t) -> svint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.rsubhnb.nxv8i16"
+        )]
+        fn _svrsubhnb_s16(op1: svint16_t, op2: svint16_t) -> svint8_t;
+    }
+    unsafe { _svrsubhnb_s16(op1, op2) }
+}
+#[doc = "Rounding subtract narrow high part (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrsubhnb[_n_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(rsubhnb))]
+pub fn svrsubhnb_n_s16(op1: svint16_t, op2: i16) -> svint8_t {
+    svrsubhnb_s16(op1, svdup_n_s16(op2))
+}
+#[doc = "Rounding subtract narrow high part (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrsubhnb[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(rsubhnb))]
+pub fn svrsubhnb_s32(op1: svint32_t, op2: svint32_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.rsubhnb.nxv4i32"
+        )]
+        fn _svrsubhnb_s32(op1: svint32_t, op2: svint32_t) -> svint16_t;
+    }
+    unsafe { _svrsubhnb_s32(op1, op2) }
+}
+#[doc = "Rounding subtract narrow high part (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrsubhnb[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(rsubhnb))]
+pub fn svrsubhnb_n_s32(op1: svint32_t, op2: i32) -> svint16_t {
+    svrsubhnb_s32(op1, svdup_n_s32(op2))
+}
+#[doc = "Rounding subtract narrow high part (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrsubhnb[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(rsubhnb))]
+pub fn svrsubhnb_s64(op1: svint64_t, op2: svint64_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.rsubhnb.nxv2i64"
+        )]
+        fn _svrsubhnb_s64(op1: svint64_t, op2: svint64_t) -> svint32_t;
+    }
+    unsafe { _svrsubhnb_s64(op1, op2) }
+}
+#[doc = "Rounding subtract narrow high part (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrsubhnb[_n_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(rsubhnb))]
+pub fn svrsubhnb_n_s64(op1: svint64_t, op2: i64) -> svint32_t {
+    svrsubhnb_s64(op1, svdup_n_s64(op2))
+}
+#[doc = "Rounding subtract narrow high part (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrsubhnb[_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(rsubhnb))]
+pub fn svrsubhnb_u16(op1: svuint16_t, op2: svuint16_t) -> svuint8_t {
+    unsafe { svrsubhnb_s16(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Rounding subtract narrow high part (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrsubhnb[_n_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(rsubhnb))]
+pub fn svrsubhnb_n_u16(op1: svuint16_t, op2: u16) -> svuint8_t {
+    svrsubhnb_u16(op1, svdup_n_u16(op2))
+}
+#[doc = "Rounding subtract narrow high part (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrsubhnb[_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(rsubhnb))]
+pub fn svrsubhnb_u32(op1: svuint32_t, op2: svuint32_t) -> svuint16_t {
+    unsafe { svrsubhnb_s32(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Rounding subtract narrow high part (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrsubhnb[_n_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(rsubhnb))]
+pub fn svrsubhnb_n_u32(op1: svuint32_t, op2: u32) -> svuint16_t {
+    svrsubhnb_u32(op1, svdup_n_u32(op2))
+}
+#[doc = "Rounding subtract narrow high part (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrsubhnb[_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(rsubhnb))]
+pub fn svrsubhnb_u64(op1: svuint64_t, op2: svuint64_t) -> svuint32_t {
+    unsafe { svrsubhnb_s64(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Rounding subtract narrow high part (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrsubhnb[_n_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(rsubhnb))]
+pub fn svrsubhnb_n_u64(op1: svuint64_t, op2: u64) -> svuint32_t {
+    svrsubhnb_u64(op1, svdup_n_u64(op2))
+}
+#[doc = "Rounding subtract narrow high part (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrsubhnt[_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(rsubhnt))]
+pub fn svrsubhnt_s16(even: svint8_t, op1: svint16_t, op2: svint16_t) -> svint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.rsubhnt.nxv8i16"
+        )]
+        fn _svrsubhnt_s16(even: svint8_t, op1: svint16_t, op2: svint16_t) -> svint8_t;
+    }
+    unsafe { _svrsubhnt_s16(even, op1, op2) }
+}
+#[doc = "Rounding subtract narrow high part (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrsubhnt[_n_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(rsubhnt))]
+pub fn svrsubhnt_n_s16(even: svint8_t, op1: svint16_t, op2: i16) -> svint8_t {
+    svrsubhnt_s16(even, op1, svdup_n_s16(op2))
+}
+#[doc = "Rounding subtract narrow high part (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrsubhnt[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(rsubhnt))]
+pub fn svrsubhnt_s32(even: svint16_t, op1: svint32_t, op2: svint32_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.rsubhnt.nxv4i32"
+        )]
+        fn _svrsubhnt_s32(even: svint16_t, op1: svint32_t, op2: svint32_t) -> svint16_t;
+    }
+    unsafe { _svrsubhnt_s32(even, op1, op2) }
+}
+#[doc = "Rounding subtract narrow high part (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrsubhnt[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(rsubhnt))]
+pub fn svrsubhnt_n_s32(even: svint16_t, op1: svint32_t, op2: i32) -> svint16_t {
+    svrsubhnt_s32(even, op1, svdup_n_s32(op2))
+}
+#[doc = "Rounding subtract narrow high part (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrsubhnt[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(rsubhnt))]
+pub fn svrsubhnt_s64(even: svint32_t, op1: svint64_t, op2: svint64_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.rsubhnt.nxv2i64"
+        )]
+        fn _svrsubhnt_s64(even: svint32_t, op1: svint64_t, op2: svint64_t) -> svint32_t;
+    }
+    unsafe { _svrsubhnt_s64(even, op1, op2) }
+}
+#[doc = "Rounding subtract narrow high part (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrsubhnt[_n_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(rsubhnt))]
+pub fn svrsubhnt_n_s64(even: svint32_t, op1: svint64_t, op2: i64) -> svint32_t {
+    svrsubhnt_s64(even, op1, svdup_n_s64(op2))
+}
+#[doc = "Rounding subtract narrow high part (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrsubhnt[_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(rsubhnt))]
+pub fn svrsubhnt_u16(even: svuint8_t, op1: svuint16_t, op2: svuint16_t) -> svuint8_t {
+    unsafe { svrsubhnt_s16(even.as_signed(), op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Rounding subtract narrow high part (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrsubhnt[_n_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(rsubhnt))]
+pub fn svrsubhnt_n_u16(even: svuint8_t, op1: svuint16_t, op2: u16) -> svuint8_t {
+    svrsubhnt_u16(even, op1, svdup_n_u16(op2))
+}
+#[doc = "Rounding subtract narrow high part (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrsubhnt[_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(rsubhnt))]
+pub fn svrsubhnt_u32(even: svuint16_t, op1: svuint32_t, op2: svuint32_t) -> svuint16_t {
+    unsafe { svrsubhnt_s32(even.as_signed(), op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Rounding subtract narrow high part (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrsubhnt[_n_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(rsubhnt))]
+pub fn svrsubhnt_n_u32(even: svuint16_t, op1: svuint32_t, op2: u32) -> svuint16_t {
+    svrsubhnt_u32(even, op1, svdup_n_u32(op2))
+}
+#[doc = "Rounding subtract narrow high part (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrsubhnt[_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(rsubhnt))]
+pub fn svrsubhnt_u64(even: svuint32_t, op1: svuint64_t, op2: svuint64_t) -> svuint32_t {
+    unsafe { svrsubhnt_s64(even.as_signed(), op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Rounding subtract narrow high part (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svrsubhnt[_n_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(rsubhnt))]
+pub fn svrsubhnt_n_u64(even: svuint32_t, op1: svuint64_t, op2: u64) -> svuint32_t {
+    svrsubhnt_u64(even, op1, svdup_n_u64(op2))
+}
+#[doc = "Subtract with borrow long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsbclb[_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sbclb))]
+pub fn svsbclb_u32(op1: svuint32_t, op2: svuint32_t, op3: svuint32_t) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sbclb.nxv4i32")]
+        fn _svsbclb_u32(op1: svint32_t, op2: svint32_t, op3: svint32_t) -> svint32_t;
+    }
+    unsafe { _svsbclb_u32(op1.as_signed(), op2.as_signed(), op3.as_signed()).as_unsigned() }
+}
+#[doc = "Subtract with borrow long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsbclb[_n_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sbclb))]
+pub fn svsbclb_n_u32(op1: svuint32_t, op2: svuint32_t, op3: u32) -> svuint32_t {
+    svsbclb_u32(op1, op2, svdup_n_u32(op3))
+}
+#[doc = "Subtract with borrow long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsbclb[_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sbclb))]
+pub fn svsbclb_u64(op1: svuint64_t, op2: svuint64_t, op3: svuint64_t) -> svuint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sbclb.nxv2i64")]
+        fn _svsbclb_u64(op1: svint64_t, op2: svint64_t, op3: svint64_t) -> svint64_t;
+    }
+    unsafe { _svsbclb_u64(op1.as_signed(), op2.as_signed(), op3.as_signed()).as_unsigned() }
+}
+#[doc = "Subtract with borrow long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsbclb[_n_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sbclb))]
+pub fn svsbclb_n_u64(op1: svuint64_t, op2: svuint64_t, op3: u64) -> svuint64_t {
+    svsbclb_u64(op1, op2, svdup_n_u64(op3))
+}
+#[doc = "Subtract with borrow long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsbclt[_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sbclt))]
+pub fn svsbclt_u32(op1: svuint32_t, op2: svuint32_t, op3: svuint32_t) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sbclt.nxv4i32")]
+        fn _svsbclt_u32(op1: svint32_t, op2: svint32_t, op3: svint32_t) -> svint32_t;
+    }
+    unsafe { _svsbclt_u32(op1.as_signed(), op2.as_signed(), op3.as_signed()).as_unsigned() }
+}
+#[doc = "Subtract with borrow long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsbclt[_n_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sbclt))]
+pub fn svsbclt_n_u32(op1: svuint32_t, op2: svuint32_t, op3: u32) -> svuint32_t {
+    svsbclt_u32(op1, op2, svdup_n_u32(op3))
+}
+#[doc = "Subtract with borrow long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsbclt[_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sbclt))]
+pub fn svsbclt_u64(op1: svuint64_t, op2: svuint64_t, op3: svuint64_t) -> svuint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sbclt.nxv2i64")]
+        fn _svsbclt_u64(op1: svint64_t, op2: svint64_t, op3: svint64_t) -> svint64_t;
+    }
+    unsafe { _svsbclt_u64(op1.as_signed(), op2.as_signed(), op3.as_signed()).as_unsigned() }
+}
+#[doc = "Subtract with borrow long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsbclt[_n_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sbclt))]
+pub fn svsbclt_n_u64(op1: svuint64_t, op2: svuint64_t, op3: u64) -> svuint64_t {
+    svsbclt_u64(op1, op2, svdup_n_u64(op3))
+}
+#[doc = "Shift left long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svshllb[_n_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sshllb, IMM2 = 0))]
+pub fn svshllb_n_s16<const IMM2: i32>(op1: svint8_t) -> svint16_t {
+    static_assert_range!(IMM2, 0..=7);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sshllb.nxv8i16")]
+        fn _svshllb_n_s16(op1: svint8_t, imm2: i32) -> svint16_t;
+    }
+    unsafe { _svshllb_n_s16(op1, IMM2) }
+}
+#[doc = "Shift left long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svshllb[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sshllb, IMM2 = 0))]
+pub fn svshllb_n_s32<const IMM2: i32>(op1: svint16_t) -> svint32_t {
+    static_assert_range!(IMM2, 0..=15);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sshllb.nxv4i32")]
+        fn _svshllb_n_s32(op1: svint16_t, imm2: i32) -> svint32_t;
+    }
+    unsafe { _svshllb_n_s32(op1, IMM2) }
+}
+#[doc = "Shift left long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svshllb[_n_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sshllb, IMM2 = 0))]
+pub fn svshllb_n_s64<const IMM2: i32>(op1: svint32_t) -> svint64_t {
+    static_assert_range!(IMM2, 0..=31);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sshllb.nxv2i64")]
+        fn _svshllb_n_s64(op1: svint32_t, imm2: i32) -> svint64_t;
+    }
+    unsafe { _svshllb_n_s64(op1, IMM2) }
+}
+#[doc = "Shift left long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svshllb[_n_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ushllb, IMM2 = 0))]
+pub fn svshllb_n_u16<const IMM2: i32>(op1: svuint8_t) -> svuint16_t {
+    static_assert_range!(IMM2, 0..=7);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ushllb.nxv8i16")]
+        fn _svshllb_n_u16(op1: svint8_t, imm2: i32) -> svint16_t;
+    }
+    unsafe { _svshllb_n_u16(op1.as_signed(), IMM2).as_unsigned() }
+}
+#[doc = "Shift left long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svshllb[_n_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ushllb, IMM2 = 0))]
+pub fn svshllb_n_u32<const IMM2: i32>(op1: svuint16_t) -> svuint32_t {
+    static_assert_range!(IMM2, 0..=15);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ushllb.nxv4i32")]
+        fn _svshllb_n_u32(op1: svint16_t, imm2: i32) -> svint32_t;
+    }
+    unsafe { _svshllb_n_u32(op1.as_signed(), IMM2).as_unsigned() }
+}
+#[doc = "Shift left long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svshllb[_n_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ushllb, IMM2 = 0))]
+pub fn svshllb_n_u64<const IMM2: i32>(op1: svuint32_t) -> svuint64_t {
+    static_assert_range!(IMM2, 0..=31);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ushllb.nxv2i64")]
+        fn _svshllb_n_u64(op1: svint32_t, imm2: i32) -> svint64_t;
+    }
+    unsafe { _svshllb_n_u64(op1.as_signed(), IMM2).as_unsigned() }
+}
+#[doc = "Shift left long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svshllt[_n_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sshllt, IMM2 = 0))]
+pub fn svshllt_n_s16<const IMM2: i32>(op1: svint8_t) -> svint16_t {
+    static_assert_range!(IMM2, 0..=7);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sshllt.nxv8i16")]
+        fn _svshllt_n_s16(op1: svint8_t, imm2: i32) -> svint16_t;
+    }
+    unsafe { _svshllt_n_s16(op1, IMM2) }
+}
+#[doc = "Shift left long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svshllt[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sshllt, IMM2 = 0))]
+pub fn svshllt_n_s32<const IMM2: i32>(op1: svint16_t) -> svint32_t {
+    static_assert_range!(IMM2, 0..=15);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sshllt.nxv4i32")]
+        fn _svshllt_n_s32(op1: svint16_t, imm2: i32) -> svint32_t;
+    }
+    unsafe { _svshllt_n_s32(op1, IMM2) }
+}
+#[doc = "Shift left long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svshllt[_n_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sshllt, IMM2 = 0))]
+pub fn svshllt_n_s64<const IMM2: i32>(op1: svint32_t) -> svint64_t {
+    static_assert_range!(IMM2, 0..=31);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sshllt.nxv2i64")]
+        fn _svshllt_n_s64(op1: svint32_t, imm2: i32) -> svint64_t;
+    }
+    unsafe { _svshllt_n_s64(op1, IMM2) }
+}
+#[doc = "Shift left long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svshllt[_n_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ushllt, IMM2 = 0))]
+pub fn svshllt_n_u16<const IMM2: i32>(op1: svuint8_t) -> svuint16_t {
+    static_assert_range!(IMM2, 0..=7);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ushllt.nxv8i16")]
+        fn _svshllt_n_u16(op1: svint8_t, imm2: i32) -> svint16_t;
+    }
+    unsafe { _svshllt_n_u16(op1.as_signed(), IMM2).as_unsigned() }
+}
+#[doc = "Shift left long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svshllt[_n_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ushllt, IMM2 = 0))]
+pub fn svshllt_n_u32<const IMM2: i32>(op1: svuint16_t) -> svuint32_t {
+    static_assert_range!(IMM2, 0..=15);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ushllt.nxv4i32")]
+        fn _svshllt_n_u32(op1: svint16_t, imm2: i32) -> svint32_t;
+    }
+    unsafe { _svshllt_n_u32(op1.as_signed(), IMM2).as_unsigned() }
+}
+#[doc = "Shift left long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svshllt[_n_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ushllt, IMM2 = 0))]
+pub fn svshllt_n_u64<const IMM2: i32>(op1: svuint32_t) -> svuint64_t {
+    static_assert_range!(IMM2, 0..=31);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ushllt.nxv2i64")]
+        fn _svshllt_n_u64(op1: svint32_t, imm2: i32) -> svint64_t;
+    }
+    unsafe { _svshllt_n_u64(op1.as_signed(), IMM2).as_unsigned() }
+}
+#[doc = "Shift right narrow (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svshrnb[_n_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(shrnb, IMM2 = 1))]
+pub fn svshrnb_n_s16<const IMM2: i32>(op1: svint16_t) -> svint8_t {
+    static_assert_range!(IMM2, 1..=8);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.shrnb.nxv8i16")]
+        fn _svshrnb_n_s16(op1: svint16_t, imm2: i32) -> svint8_t;
+    }
+    unsafe { _svshrnb_n_s16(op1, IMM2) }
+}
+#[doc = "Shift right narrow (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svshrnb[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(shrnb, IMM2 = 1))]
+pub fn svshrnb_n_s32<const IMM2: i32>(op1: svint32_t) -> svint16_t {
+    static_assert_range!(IMM2, 1..=16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.shrnb.nxv4i32")]
+        fn _svshrnb_n_s32(op1: svint32_t, imm2: i32) -> svint16_t;
+    }
+    unsafe { _svshrnb_n_s32(op1, IMM2) }
+}
+#[doc = "Shift right narrow (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svshrnb[_n_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(shrnb, IMM2 = 1))]
+pub fn svshrnb_n_s64<const IMM2: i32>(op1: svint64_t) -> svint32_t {
+    static_assert_range!(IMM2, 1..=32);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.shrnb.nxv2i64")]
+        fn _svshrnb_n_s64(op1: svint64_t, imm2: i32) -> svint32_t;
+    }
+    unsafe { _svshrnb_n_s64(op1, IMM2) }
+}
+#[doc = "Shift right narrow (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svshrnb[_n_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(shrnb, IMM2 = 1))]
+pub fn svshrnb_n_u16<const IMM2: i32>(op1: svuint16_t) -> svuint8_t {
+    static_assert_range!(IMM2, 1..=8);
+    unsafe { svshrnb_n_s16::<IMM2>(op1.as_signed()).as_unsigned() }
+}
+#[doc = "Shift right narrow (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svshrnb[_n_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(shrnb, IMM2 = 1))]
+pub fn svshrnb_n_u32<const IMM2: i32>(op1: svuint32_t) -> svuint16_t {
+    static_assert_range!(IMM2, 1..=16);
+    unsafe { svshrnb_n_s32::<IMM2>(op1.as_signed()).as_unsigned() }
+}
+#[doc = "Shift right narrow (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svshrnb[_n_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(shrnb, IMM2 = 1))]
+pub fn svshrnb_n_u64<const IMM2: i32>(op1: svuint64_t) -> svuint32_t {
+    static_assert_range!(IMM2, 1..=32);
+    unsafe { svshrnb_n_s64::<IMM2>(op1.as_signed()).as_unsigned() }
+}
+#[doc = "Shift right narrow (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svshrnt[_n_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(shrnt, IMM2 = 1))]
+pub fn svshrnt_n_s16<const IMM2: i32>(even: svint8_t, op1: svint16_t) -> svint8_t {
+    static_assert_range!(IMM2, 1..=8);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.shrnt.nxv8i16")]
+        fn _svshrnt_n_s16(even: svint8_t, op1: svint16_t, imm2: i32) -> svint8_t;
+    }
+    unsafe { _svshrnt_n_s16(even, op1, IMM2) }
+}
+#[doc = "Shift right narrow (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svshrnt[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(shrnt, IMM2 = 1))]
+pub fn svshrnt_n_s32<const IMM2: i32>(even: svint16_t, op1: svint32_t) -> svint16_t {
+    static_assert_range!(IMM2, 1..=16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.shrnt.nxv4i32")]
+        fn _svshrnt_n_s32(even: svint16_t, op1: svint32_t, imm2: i32) -> svint16_t;
+    }
+    unsafe { _svshrnt_n_s32(even, op1, IMM2) }
+}
+#[doc = "Shift right narrow (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svshrnt[_n_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(shrnt, IMM2 = 1))]
+pub fn svshrnt_n_s64<const IMM2: i32>(even: svint32_t, op1: svint64_t) -> svint32_t {
+    static_assert_range!(IMM2, 1..=32);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.shrnt.nxv2i64")]
+        fn _svshrnt_n_s64(even: svint32_t, op1: svint64_t, imm2: i32) -> svint32_t;
+    }
+    unsafe { _svshrnt_n_s64(even, op1, IMM2) }
+}
+#[doc = "Shift right narrow (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svshrnt[_n_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(shrnt, IMM2 = 1))]
+pub fn svshrnt_n_u16<const IMM2: i32>(even: svuint8_t, op1: svuint16_t) -> svuint8_t {
+    static_assert_range!(IMM2, 1..=8);
+    unsafe { svshrnt_n_s16::<IMM2>(even.as_signed(), op1.as_signed()).as_unsigned() }
+}
+#[doc = "Shift right narrow (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svshrnt[_n_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(shrnt, IMM2 = 1))]
+pub fn svshrnt_n_u32<const IMM2: i32>(even: svuint16_t, op1: svuint32_t) -> svuint16_t {
+    static_assert_range!(IMM2, 1..=16);
+    unsafe { svshrnt_n_s32::<IMM2>(even.as_signed(), op1.as_signed()).as_unsigned() }
+}
+#[doc = "Shift right narrow (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svshrnt[_n_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(shrnt, IMM2 = 1))]
+pub fn svshrnt_n_u64<const IMM2: i32>(even: svuint32_t, op1: svuint64_t) -> svuint32_t {
+    static_assert_range!(IMM2, 1..=32);
+    unsafe { svshrnt_n_s64::<IMM2>(even.as_signed(), op1.as_signed()).as_unsigned() }
+}
+#[doc = "Shift left and insert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsli[_n_s8])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sli, IMM3 = 0))]
+pub fn svsli_n_s8<const IMM3: i32>(op1: svint8_t, op2: svint8_t) -> svint8_t {
+    static_assert_range!(IMM3, 0..=7);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sli.nxv16i8")]
+        fn _svsli_n_s8(op1: svint8_t, op2: svint8_t, imm3: i32) -> svint8_t;
+    }
+    unsafe { _svsli_n_s8(op1, op2, IMM3) }
+}
+#[doc = "Shift left and insert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsli[_n_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sli, IMM3 = 0))]
+pub fn svsli_n_s16<const IMM3: i32>(op1: svint16_t, op2: svint16_t) -> svint16_t {
+    static_assert_range!(IMM3, 0..=15);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sli.nxv8i16")]
+        fn _svsli_n_s16(op1: svint16_t, op2: svint16_t, imm3: i32) -> svint16_t;
+    }
+    unsafe { _svsli_n_s16(op1, op2, IMM3) }
+}
+#[doc = "Shift left and insert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsli[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sli, IMM3 = 0))]
+pub fn svsli_n_s32<const IMM3: i32>(op1: svint32_t, op2: svint32_t) -> svint32_t {
+    static_assert_range!(IMM3, 0..=31);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sli.nxv4i32")]
+        fn _svsli_n_s32(op1: svint32_t, op2: svint32_t, imm3: i32) -> svint32_t;
+    }
+    unsafe { _svsli_n_s32(op1, op2, IMM3) }
+}
+#[doc = "Shift left and insert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsli[_n_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sli, IMM3 = 0))]
+pub fn svsli_n_s64<const IMM3: i32>(op1: svint64_t, op2: svint64_t) -> svint64_t {
+    static_assert_range!(IMM3, 0..=63);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sli.nxv2i64")]
+        fn _svsli_n_s64(op1: svint64_t, op2: svint64_t, imm3: i32) -> svint64_t;
+    }
+    unsafe { _svsli_n_s64(op1, op2, IMM3) }
+}
+#[doc = "Shift left and insert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsli[_n_u8])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sli, IMM3 = 0))]
+pub fn svsli_n_u8<const IMM3: i32>(op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    static_assert_range!(IMM3, 0..=7);
+    unsafe { svsli_n_s8::<IMM3>(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Shift left and insert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsli[_n_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sli, IMM3 = 0))]
+pub fn svsli_n_u16<const IMM3: i32>(op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    static_assert_range!(IMM3, 0..=15);
+    unsafe { svsli_n_s16::<IMM3>(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Shift left and insert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsli[_n_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sli, IMM3 = 0))]
+pub fn svsli_n_u32<const IMM3: i32>(op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    static_assert_range!(IMM3, 0..=31);
+    unsafe { svsli_n_s32::<IMM3>(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Shift left and insert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsli[_n_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sli, IMM3 = 0))]
+pub fn svsli_n_u64<const IMM3: i32>(op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    static_assert_range!(IMM3, 0..=63);
+    unsafe { svsli_n_s64::<IMM3>(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "SM4 encryption and decryption"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsm4e[_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2,sve2-sm4")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sm4e))]
+pub fn svsm4e_u32(op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sm4e")]
+        fn _svsm4e_u32(op1: svint32_t, op2: svint32_t) -> svint32_t;
+    }
+    unsafe { _svsm4e_u32(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "SM4 key updates"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsm4ekey[_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2,sve2-sm4")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sm4ekey))]
+pub fn svsm4ekey_u32(op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sm4ekey")]
+        fn _svsm4ekey_u32(op1: svint32_t, op2: svint32_t) -> svint32_t;
+    }
+    unsafe { _svsm4ekey_u32(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Saturating add with signed addend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsqadd[_u8]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(usqadd))]
+pub fn svsqadd_u8_m(pg: svbool_t, op1: svuint8_t, op2: svint8_t) -> svuint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.usqadd.nxv16i8")]
+        fn _svsqadd_u8_m(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t;
+    }
+    unsafe { _svsqadd_u8_m(pg, op1.as_signed(), op2).as_unsigned() }
+}
+#[doc = "Saturating add with signed addend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsqadd[_n_u8]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(usqadd))]
+pub fn svsqadd_n_u8_m(pg: svbool_t, op1: svuint8_t, op2: i8) -> svuint8_t {
+    svsqadd_u8_m(pg, op1, svdup_n_s8(op2))
+}
+#[doc = "Saturating add with signed addend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsqadd[_u8]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(usqadd))]
+pub fn svsqadd_u8_x(pg: svbool_t, op1: svuint8_t, op2: svint8_t) -> svuint8_t {
+    svsqadd_u8_m(pg, op1, op2)
+}
+#[doc = "Saturating add with signed addend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsqadd[_n_u8]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(usqadd))]
+pub fn svsqadd_n_u8_x(pg: svbool_t, op1: svuint8_t, op2: i8) -> svuint8_t {
+    svsqadd_u8_x(pg, op1, svdup_n_s8(op2))
+}
+#[doc = "Saturating add with signed addend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsqadd[_u8]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(usqadd))]
+pub fn svsqadd_u8_z(pg: svbool_t, op1: svuint8_t, op2: svint8_t) -> svuint8_t {
+    svsqadd_u8_m(pg, svsel_u8(pg, op1, svdup_n_u8(0)), op2)
+}
+#[doc = "Saturating add with signed addend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsqadd[_n_u8]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(usqadd))]
+pub fn svsqadd_n_u8_z(pg: svbool_t, op1: svuint8_t, op2: i8) -> svuint8_t {
+    svsqadd_u8_z(pg, op1, svdup_n_s8(op2))
+}
+#[doc = "Saturating add with signed addend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsqadd[_u16]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(usqadd))]
+pub fn svsqadd_u16_m(pg: svbool_t, op1: svuint16_t, op2: svint16_t) -> svuint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.usqadd.nxv8i16")]
+        fn _svsqadd_u16_m(pg: svbool8_t, op1: svint16_t, op2: svint16_t) -> svint16_t;
+    }
+    unsafe { _svsqadd_u16_m(pg.sve_into(), op1.as_signed(), op2).as_unsigned() }
+}
+#[doc = "Saturating add with signed addend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsqadd[_n_u16]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(usqadd))]
+pub fn svsqadd_n_u16_m(pg: svbool_t, op1: svuint16_t, op2: i16) -> svuint16_t {
+    svsqadd_u16_m(pg, op1, svdup_n_s16(op2))
+}
+#[doc = "Saturating add with signed addend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsqadd[_u16]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(usqadd))]
+pub fn svsqadd_u16_x(pg: svbool_t, op1: svuint16_t, op2: svint16_t) -> svuint16_t {
+    svsqadd_u16_m(pg, op1, op2)
+}
+#[doc = "Saturating add with signed addend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsqadd[_n_u16]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(usqadd))]
+pub fn svsqadd_n_u16_x(pg: svbool_t, op1: svuint16_t, op2: i16) -> svuint16_t {
+    svsqadd_u16_x(pg, op1, svdup_n_s16(op2))
+}
+#[doc = "Saturating add with signed addend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsqadd[_u16]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(usqadd))]
+pub fn svsqadd_u16_z(pg: svbool_t, op1: svuint16_t, op2: svint16_t) -> svuint16_t {
+    svsqadd_u16_m(pg, svsel_u16(pg, op1, svdup_n_u16(0)), op2)
+}
+#[doc = "Saturating add with signed addend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsqadd[_n_u16]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(usqadd))]
+pub fn svsqadd_n_u16_z(pg: svbool_t, op1: svuint16_t, op2: i16) -> svuint16_t {
+    svsqadd_u16_z(pg, op1, svdup_n_s16(op2))
+}
+#[doc = "Saturating add with signed addend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsqadd[_u32]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(usqadd))]
+pub fn svsqadd_u32_m(pg: svbool_t, op1: svuint32_t, op2: svint32_t) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.usqadd.nxv4i32")]
+        fn _svsqadd_u32_m(pg: svbool4_t, op1: svint32_t, op2: svint32_t) -> svint32_t;
+    }
+    unsafe { _svsqadd_u32_m(pg.sve_into(), op1.as_signed(), op2).as_unsigned() }
+}
+#[doc = "Saturating add with signed addend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsqadd[_n_u32]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(usqadd))]
+pub fn svsqadd_n_u32_m(pg: svbool_t, op1: svuint32_t, op2: i32) -> svuint32_t {
+    svsqadd_u32_m(pg, op1, svdup_n_s32(op2))
+}
+#[doc = "Saturating add with signed addend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsqadd[_u32]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(usqadd))]
+pub fn svsqadd_u32_x(pg: svbool_t, op1: svuint32_t, op2: svint32_t) -> svuint32_t {
+    svsqadd_u32_m(pg, op1, op2)
+}
+#[doc = "Saturating add with signed addend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsqadd[_n_u32]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(usqadd))]
+pub fn svsqadd_n_u32_x(pg: svbool_t, op1: svuint32_t, op2: i32) -> svuint32_t {
+    svsqadd_u32_x(pg, op1, svdup_n_s32(op2))
+}
+#[doc = "Saturating add with signed addend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsqadd[_u32]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(usqadd))]
+pub fn svsqadd_u32_z(pg: svbool_t, op1: svuint32_t, op2: svint32_t) -> svuint32_t {
+    svsqadd_u32_m(pg, svsel_u32(pg, op1, svdup_n_u32(0)), op2)
+}
+#[doc = "Saturating add with signed addend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsqadd[_n_u32]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(usqadd))]
+pub fn svsqadd_n_u32_z(pg: svbool_t, op1: svuint32_t, op2: i32) -> svuint32_t {
+    svsqadd_u32_z(pg, op1, svdup_n_s32(op2))
+}
+#[doc = "Saturating add with signed addend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsqadd[_u64]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(usqadd))]
+pub fn svsqadd_u64_m(pg: svbool_t, op1: svuint64_t, op2: svint64_t) -> svuint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.usqadd.nxv2i64")]
+        fn _svsqadd_u64_m(pg: svbool2_t, op1: svint64_t, op2: svint64_t) -> svint64_t;
+    }
+    unsafe { _svsqadd_u64_m(pg.sve_into(), op1.as_signed(), op2).as_unsigned() }
+}
+#[doc = "Saturating add with signed addend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsqadd[_n_u64]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(usqadd))]
+pub fn svsqadd_n_u64_m(pg: svbool_t, op1: svuint64_t, op2: i64) -> svuint64_t {
+    svsqadd_u64_m(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Saturating add with signed addend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsqadd[_u64]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(usqadd))]
+pub fn svsqadd_u64_x(pg: svbool_t, op1: svuint64_t, op2: svint64_t) -> svuint64_t {
+    svsqadd_u64_m(pg, op1, op2)
+}
+#[doc = "Saturating add with signed addend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsqadd[_n_u64]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(usqadd))]
+pub fn svsqadd_n_u64_x(pg: svbool_t, op1: svuint64_t, op2: i64) -> svuint64_t {
+    svsqadd_u64_x(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Saturating add with signed addend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsqadd[_u64]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(usqadd))]
+pub fn svsqadd_u64_z(pg: svbool_t, op1: svuint64_t, op2: svint64_t) -> svuint64_t {
+    svsqadd_u64_m(pg, svsel_u64(pg, op1, svdup_n_u64(0)), op2)
+}
+#[doc = "Saturating add with signed addend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsqadd[_n_u64]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(usqadd))]
+pub fn svsqadd_n_u64_z(pg: svbool_t, op1: svuint64_t, op2: i64) -> svuint64_t {
+    svsqadd_u64_z(pg, op1, svdup_n_s64(op2))
+}
+#[doc = "Shift right and accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsra[_n_s8])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ssra, IMM3 = 1))]
+pub fn svsra_n_s8<const IMM3: i32>(op1: svint8_t, op2: svint8_t) -> svint8_t {
+    static_assert_range!(IMM3, 1..=8);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ssra.nxv16i8")]
+        fn _svsra_n_s8(op1: svint8_t, op2: svint8_t, imm3: i32) -> svint8_t;
+    }
+    unsafe { _svsra_n_s8(op1, op2, IMM3) }
+}
+#[doc = "Shift right and accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsra[_n_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ssra, IMM3 = 1))]
+pub fn svsra_n_s16<const IMM3: i32>(op1: svint16_t, op2: svint16_t) -> svint16_t {
+    static_assert_range!(IMM3, 1..=16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ssra.nxv8i16")]
+        fn _svsra_n_s16(op1: svint16_t, op2: svint16_t, imm3: i32) -> svint16_t;
+    }
+    unsafe { _svsra_n_s16(op1, op2, IMM3) }
+}
+#[doc = "Shift right and accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsra[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ssra, IMM3 = 1))]
+pub fn svsra_n_s32<const IMM3: i32>(op1: svint32_t, op2: svint32_t) -> svint32_t {
+    static_assert_range!(IMM3, 1..=32);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ssra.nxv4i32")]
+        fn _svsra_n_s32(op1: svint32_t, op2: svint32_t, imm3: i32) -> svint32_t;
+    }
+    unsafe { _svsra_n_s32(op1, op2, IMM3) }
+}
+#[doc = "Shift right and accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsra[_n_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ssra, IMM3 = 1))]
+pub fn svsra_n_s64<const IMM3: i32>(op1: svint64_t, op2: svint64_t) -> svint64_t {
+    static_assert_range!(IMM3, 1..=64);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ssra.nxv2i64")]
+        fn _svsra_n_s64(op1: svint64_t, op2: svint64_t, imm3: i32) -> svint64_t;
+    }
+    unsafe { _svsra_n_s64(op1, op2, IMM3) }
+}
+#[doc = "Shift right and accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsra[_n_u8])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(usra, IMM3 = 1))]
+pub fn svsra_n_u8<const IMM3: i32>(op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    static_assert_range!(IMM3, 1..=8);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.usra.nxv16i8")]
+        fn _svsra_n_u8(op1: svint8_t, op2: svint8_t, imm3: i32) -> svint8_t;
+    }
+    unsafe { _svsra_n_u8(op1.as_signed(), op2.as_signed(), IMM3).as_unsigned() }
+}
+#[doc = "Shift right and accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsra[_n_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(usra, IMM3 = 1))]
+pub fn svsra_n_u16<const IMM3: i32>(op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    static_assert_range!(IMM3, 1..=16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.usra.nxv8i16")]
+        fn _svsra_n_u16(op1: svint16_t, op2: svint16_t, imm3: i32) -> svint16_t;
+    }
+    unsafe { _svsra_n_u16(op1.as_signed(), op2.as_signed(), IMM3).as_unsigned() }
+}
+#[doc = "Shift right and accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsra[_n_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(usra, IMM3 = 1))]
+pub fn svsra_n_u32<const IMM3: i32>(op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    static_assert_range!(IMM3, 1..=32);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.usra.nxv4i32")]
+        fn _svsra_n_u32(op1: svint32_t, op2: svint32_t, imm3: i32) -> svint32_t;
+    }
+    unsafe { _svsra_n_u32(op1.as_signed(), op2.as_signed(), IMM3).as_unsigned() }
+}
+#[doc = "Shift right and accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsra[_n_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(usra, IMM3 = 1))]
+pub fn svsra_n_u64<const IMM3: i32>(op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    static_assert_range!(IMM3, 1..=64);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.usra.nxv2i64")]
+        fn _svsra_n_u64(op1: svint64_t, op2: svint64_t, imm3: i32) -> svint64_t;
+    }
+    unsafe { _svsra_n_u64(op1.as_signed(), op2.as_signed(), IMM3).as_unsigned() }
+}
+#[doc = "Shift right and insert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsri[_n_s8])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sri, IMM3 = 1))]
+pub fn svsri_n_s8<const IMM3: i32>(op1: svint8_t, op2: svint8_t) -> svint8_t {
+    static_assert_range!(IMM3, 1..=8);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sri.nxv16i8")]
+        fn _svsri_n_s8(op1: svint8_t, op2: svint8_t, imm3: i32) -> svint8_t;
+    }
+    unsafe { _svsri_n_s8(op1, op2, IMM3) }
+}
+#[doc = "Shift right and insert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsri[_n_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sri, IMM3 = 1))]
+pub fn svsri_n_s16<const IMM3: i32>(op1: svint16_t, op2: svint16_t) -> svint16_t {
+    static_assert_range!(IMM3, 1..=16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sri.nxv8i16")]
+        fn _svsri_n_s16(op1: svint16_t, op2: svint16_t, imm3: i32) -> svint16_t;
+    }
+    unsafe { _svsri_n_s16(op1, op2, IMM3) }
+}
+#[doc = "Shift right and insert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsri[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sri, IMM3 = 1))]
+pub fn svsri_n_s32<const IMM3: i32>(op1: svint32_t, op2: svint32_t) -> svint32_t {
+    static_assert_range!(IMM3, 1..=32);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sri.nxv4i32")]
+        fn _svsri_n_s32(op1: svint32_t, op2: svint32_t, imm3: i32) -> svint32_t;
+    }
+    unsafe { _svsri_n_s32(op1, op2, IMM3) }
+}
+#[doc = "Shift right and insert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsri[_n_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sri, IMM3 = 1))]
+pub fn svsri_n_s64<const IMM3: i32>(op1: svint64_t, op2: svint64_t) -> svint64_t {
+    static_assert_range!(IMM3, 1..=64);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.sri.nxv2i64")]
+        fn _svsri_n_s64(op1: svint64_t, op2: svint64_t, imm3: i32) -> svint64_t;
+    }
+    unsafe { _svsri_n_s64(op1, op2, IMM3) }
+}
+#[doc = "Shift right and insert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsri[_n_u8])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sri, IMM3 = 1))]
+pub fn svsri_n_u8<const IMM3: i32>(op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    static_assert_range!(IMM3, 1..=8);
+    unsafe { svsri_n_s8::<IMM3>(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Shift right and insert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsri[_n_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sri, IMM3 = 1))]
+pub fn svsri_n_u16<const IMM3: i32>(op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    static_assert_range!(IMM3, 1..=16);
+    unsafe { svsri_n_s16::<IMM3>(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Shift right and insert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsri[_n_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sri, IMM3 = 1))]
+pub fn svsri_n_u32<const IMM3: i32>(op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    static_assert_range!(IMM3, 1..=32);
+    unsafe { svsri_n_s32::<IMM3>(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Shift right and insert"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsri[_n_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sri, IMM3 = 1))]
+pub fn svsri_n_u64<const IMM3: i32>(op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    static_assert_range!(IMM3, 1..=64);
+    unsafe { svsri_n_s64::<IMM3>(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Non-truncating store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1_scatter_[s64]index[_f64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1d))]
+pub unsafe fn svstnt1_scatter_s64index_f64(
+    pg: svbool_t,
+    base: *mut f64,
+    indices: svint64_t,
+    data: svfloat64_t,
+) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.stnt1.scatter.index.nxv2f64"
+        )]
+        fn _svstnt1_scatter_s64index_f64(
+            data: svfloat64_t,
+            pg: svbool2_t,
+            base: *mut f64,
+            indices: svint64_t,
+        );
+    }
+    _svstnt1_scatter_s64index_f64(data, pg.sve_into(), base, indices)
+}
+#[doc = "Non-truncating store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1_scatter_[s64]index[_s64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1d))]
+pub unsafe fn svstnt1_scatter_s64index_s64(
+    pg: svbool_t,
+    base: *mut i64,
+    indices: svint64_t,
+    data: svint64_t,
+) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.stnt1.scatter.index.nxv2i64"
+        )]
+        fn _svstnt1_scatter_s64index_s64(
+            data: svint64_t,
+            pg: svbool2_t,
+            base: *mut i64,
+            indices: svint64_t,
+        );
+    }
+    _svstnt1_scatter_s64index_s64(data, pg.sve_into(), base, indices)
+}
+#[doc = "Non-truncating store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1_scatter_[s64]index[_u64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1d))]
+pub unsafe fn svstnt1_scatter_s64index_u64(
+    pg: svbool_t,
+    base: *mut u64,
+    indices: svint64_t,
+    data: svuint64_t,
+) {
+    svstnt1_scatter_s64index_s64(pg, base.as_signed(), indices, data.as_signed())
+}
+#[doc = "Non-truncating store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1_scatter_[u64]index[_f64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1d))]
+pub unsafe fn svstnt1_scatter_u64index_f64(
+    pg: svbool_t,
+    base: *mut f64,
+    indices: svuint64_t,
+    data: svfloat64_t,
+) {
+    svstnt1_scatter_s64index_f64(pg, base, indices.as_signed(), data)
+}
+#[doc = "Non-truncating store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1_scatter_[u64]index[_s64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1d))]
+pub unsafe fn svstnt1_scatter_u64index_s64(
+    pg: svbool_t,
+    base: *mut i64,
+    indices: svuint64_t,
+    data: svint64_t,
+) {
+    svstnt1_scatter_s64index_s64(pg, base, indices.as_signed(), data)
+}
+#[doc = "Non-truncating store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1_scatter_[u64]index[_u64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1d))]
+pub unsafe fn svstnt1_scatter_u64index_u64(
+    pg: svbool_t,
+    base: *mut u64,
+    indices: svuint64_t,
+    data: svuint64_t,
+) {
+    svstnt1_scatter_s64index_s64(pg, base.as_signed(), indices.as_signed(), data.as_signed())
+}
+#[doc = "Non-truncating store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1_scatter_[s64]offset[_f64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1d))]
+pub unsafe fn svstnt1_scatter_s64offset_f64(
+    pg: svbool_t,
+    base: *mut f64,
+    offsets: svint64_t,
+    data: svfloat64_t,
+) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.stnt1.scatter.nxv2f64"
+        )]
+        fn _svstnt1_scatter_s64offset_f64(
+            data: svfloat64_t,
+            pg: svbool2_t,
+            base: *mut f64,
+            offsets: svint64_t,
+        );
+    }
+    _svstnt1_scatter_s64offset_f64(data, pg.sve_into(), base, offsets)
+}
+#[doc = "Non-truncating store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1_scatter_[s64]offset[_s64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1d))]
+pub unsafe fn svstnt1_scatter_s64offset_s64(
+    pg: svbool_t,
+    base: *mut i64,
+    offsets: svint64_t,
+    data: svint64_t,
+) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.stnt1.scatter.nxv2i64"
+        )]
+        fn _svstnt1_scatter_s64offset_s64(
+            data: svint64_t,
+            pg: svbool2_t,
+            base: *mut i64,
+            offsets: svint64_t,
+        );
+    }
+    _svstnt1_scatter_s64offset_s64(data, pg.sve_into(), base, offsets)
+}
+#[doc = "Non-truncating store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1_scatter_[s64]offset[_u64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1d))]
+pub unsafe fn svstnt1_scatter_s64offset_u64(
+    pg: svbool_t,
+    base: *mut u64,
+    offsets: svint64_t,
+    data: svuint64_t,
+) {
+    svstnt1_scatter_s64offset_s64(pg, base.as_signed(), offsets, data.as_signed())
+}
+#[doc = "Non-truncating store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1_scatter_[u32]offset[_f32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1w))]
+pub unsafe fn svstnt1_scatter_u32offset_f32(
+    pg: svbool_t,
+    base: *mut f32,
+    offsets: svuint32_t,
+    data: svfloat32_t,
+) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.stnt1.scatter.uxtw.nxv4f32"
+        )]
+        fn _svstnt1_scatter_u32offset_f32(
+            data: svfloat32_t,
+            pg: svbool4_t,
+            base: *mut f32,
+            offsets: svint32_t,
+        );
+    }
+    _svstnt1_scatter_u32offset_f32(data, pg.sve_into(), base, offsets.as_signed())
+}
+#[doc = "Non-truncating store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1_scatter_[u32]offset[_s32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1w))]
+pub unsafe fn svstnt1_scatter_u32offset_s32(
+    pg: svbool_t,
+    base: *mut i32,
+    offsets: svuint32_t,
+    data: svint32_t,
+) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.stnt1.scatter.uxtw.nxv4i32"
+        )]
+        fn _svstnt1_scatter_u32offset_s32(
+            data: svint32_t,
+            pg: svbool4_t,
+            base: *mut i32,
+            offsets: svint32_t,
+        );
+    }
+    _svstnt1_scatter_u32offset_s32(data, pg.sve_into(), base, offsets.as_signed())
+}
+#[doc = "Non-truncating store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1_scatter_[u32]offset[_u32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1w))]
+pub unsafe fn svstnt1_scatter_u32offset_u32(
+    pg: svbool_t,
+    base: *mut u32,
+    offsets: svuint32_t,
+    data: svuint32_t,
+) {
+    svstnt1_scatter_u32offset_s32(pg, base.as_signed(), offsets, data.as_signed())
+}
+#[doc = "Non-truncating store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1_scatter_[u64]offset[_f64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1d))]
+pub unsafe fn svstnt1_scatter_u64offset_f64(
+    pg: svbool_t,
+    base: *mut f64,
+    offsets: svuint64_t,
+    data: svfloat64_t,
+) {
+    svstnt1_scatter_s64offset_f64(pg, base, offsets.as_signed(), data)
+}
+#[doc = "Non-truncating store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1_scatter_[u64]offset[_s64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1d))]
+pub unsafe fn svstnt1_scatter_u64offset_s64(
+    pg: svbool_t,
+    base: *mut i64,
+    offsets: svuint64_t,
+    data: svint64_t,
+) {
+    svstnt1_scatter_s64offset_s64(pg, base, offsets.as_signed(), data)
+}
+#[doc = "Non-truncating store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1_scatter_[u64]offset[_u64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1d))]
+pub unsafe fn svstnt1_scatter_u64offset_u64(
+    pg: svbool_t,
+    base: *mut u64,
+    offsets: svuint64_t,
+    data: svuint64_t,
+) {
+    svstnt1_scatter_s64offset_s64(pg, base.as_signed(), offsets.as_signed(), data.as_signed())
+}
+#[doc = "Non-truncating store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1_scatter[_u32base_f32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1w))]
+pub unsafe fn svstnt1_scatter_u32base_f32(pg: svbool_t, bases: svuint32_t, data: svfloat32_t) {
+    svstnt1_scatter_u32base_offset_f32(pg, bases, 0, data)
+}
+#[doc = "Non-truncating store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1_scatter[_u32base_s32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1w))]
+pub unsafe fn svstnt1_scatter_u32base_s32(pg: svbool_t, bases: svuint32_t, data: svint32_t) {
+    svstnt1_scatter_u32base_offset_s32(pg, bases, 0, data)
+}
+#[doc = "Non-truncating store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1_scatter[_u32base_u32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1w))]
+pub unsafe fn svstnt1_scatter_u32base_u32(pg: svbool_t, bases: svuint32_t, data: svuint32_t) {
+    svstnt1_scatter_u32base_offset_u32(pg, bases, 0, data)
+}
+#[doc = "Non-truncating store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1_scatter[_u64base_f64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1d))]
+pub unsafe fn svstnt1_scatter_u64base_f64(pg: svbool_t, bases: svuint64_t, data: svfloat64_t) {
+    svstnt1_scatter_u64base_offset_f64(pg, bases, 0, data)
+}
+#[doc = "Non-truncating store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1_scatter[_u64base_s64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1d))]
+pub unsafe fn svstnt1_scatter_u64base_s64(pg: svbool_t, bases: svuint64_t, data: svint64_t) {
+    svstnt1_scatter_u64base_offset_s64(pg, bases, 0, data)
+}
+#[doc = "Non-truncating store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1_scatter[_u64base_u64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1d))]
+pub unsafe fn svstnt1_scatter_u64base_u64(pg: svbool_t, bases: svuint64_t, data: svuint64_t) {
+    svstnt1_scatter_u64base_offset_u64(pg, bases, 0, data)
+}
+#[doc = "Non-truncating store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1_scatter[_u32base]_index[_f32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1w))]
+pub unsafe fn svstnt1_scatter_u32base_index_f32(
+    pg: svbool_t,
+    bases: svuint32_t,
+    index: i64,
+    data: svfloat32_t,
+) {
+    svstnt1_scatter_u32base_offset_f32(pg, bases, index.unchecked_shl(2), data)
+}
+#[doc = "Non-truncating store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1_scatter[_u32base]_index[_s32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1w))]
+pub unsafe fn svstnt1_scatter_u32base_index_s32(
+    pg: svbool_t,
+    bases: svuint32_t,
+    index: i64,
+    data: svint32_t,
+) {
+    svstnt1_scatter_u32base_offset_s32(pg, bases, index.unchecked_shl(2), data)
+}
+#[doc = "Non-truncating store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1_scatter[_u32base]_index[_u32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1w))]
+pub unsafe fn svstnt1_scatter_u32base_index_u32(
+    pg: svbool_t,
+    bases: svuint32_t,
+    index: i64,
+    data: svuint32_t,
+) {
+    svstnt1_scatter_u32base_offset_u32(pg, bases, index.unchecked_shl(2), data)
+}
+#[doc = "Non-truncating store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1_scatter[_u64base]_index[_f64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1d))]
+pub unsafe fn svstnt1_scatter_u64base_index_f64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    index: i64,
+    data: svfloat64_t,
+) {
+    svstnt1_scatter_u64base_offset_f64(pg, bases, index.unchecked_shl(3), data)
+}
+#[doc = "Non-truncating store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1_scatter[_u64base]_index[_s64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1d))]
+pub unsafe fn svstnt1_scatter_u64base_index_s64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    index: i64,
+    data: svint64_t,
+) {
+    svstnt1_scatter_u64base_offset_s64(pg, bases, index.unchecked_shl(3), data)
+}
+#[doc = "Non-truncating store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1_scatter[_u64base]_index[_u64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1d))]
+pub unsafe fn svstnt1_scatter_u64base_index_u64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    index: i64,
+    data: svuint64_t,
+) {
+    svstnt1_scatter_u64base_offset_u64(pg, bases, index.unchecked_shl(3), data)
+}
+#[doc = "Non-truncating store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1_scatter[_u32base]_offset[_f32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1w))]
+pub unsafe fn svstnt1_scatter_u32base_offset_f32(
+    pg: svbool_t,
+    bases: svuint32_t,
+    offset: i64,
+    data: svfloat32_t,
+) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv4f32.nxv4i32"
+        )]
+        fn _svstnt1_scatter_u32base_offset_f32(
+            data: svfloat32_t,
+            pg: svbool4_t,
+            bases: svint32_t,
+            offset: i64,
+        );
+    }
+    _svstnt1_scatter_u32base_offset_f32(data, pg.sve_into(), bases.as_signed(), offset)
+}
+#[doc = "Non-truncating store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1_scatter[_u32base]_offset[_s32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1w))]
+pub unsafe fn svstnt1_scatter_u32base_offset_s32(
+    pg: svbool_t,
+    bases: svuint32_t,
+    offset: i64,
+    data: svint32_t,
+) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv4i32.nxv4i32"
+        )]
+        fn _svstnt1_scatter_u32base_offset_s32(
+            data: svint32_t,
+            pg: svbool4_t,
+            bases: svint32_t,
+            offset: i64,
+        );
+    }
+    _svstnt1_scatter_u32base_offset_s32(data, pg.sve_into(), bases.as_signed(), offset)
+}
+#[doc = "Non-truncating store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1_scatter[_u32base]_offset[_u32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1w))]
+pub unsafe fn svstnt1_scatter_u32base_offset_u32(
+    pg: svbool_t,
+    bases: svuint32_t,
+    offset: i64,
+    data: svuint32_t,
+) {
+    svstnt1_scatter_u32base_offset_s32(pg, bases, offset, data.as_signed())
+}
+#[doc = "Non-truncating store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1_scatter[_u64base]_offset[_f64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1d))]
+pub unsafe fn svstnt1_scatter_u64base_offset_f64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    offset: i64,
+    data: svfloat64_t,
+) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv2f64.nxv2i64"
+        )]
+        fn _svstnt1_scatter_u64base_offset_f64(
+            data: svfloat64_t,
+            pg: svbool2_t,
+            bases: svint64_t,
+            offset: i64,
+        );
+    }
+    _svstnt1_scatter_u64base_offset_f64(data, pg.sve_into(), bases.as_signed(), offset)
+}
+#[doc = "Non-truncating store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1_scatter[_u64base]_offset[_s64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1d))]
+pub unsafe fn svstnt1_scatter_u64base_offset_s64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    offset: i64,
+    data: svint64_t,
+) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv2i64.nxv2i64"
+        )]
+        fn _svstnt1_scatter_u64base_offset_s64(
+            data: svint64_t,
+            pg: svbool2_t,
+            bases: svint64_t,
+            offset: i64,
+        );
+    }
+    _svstnt1_scatter_u64base_offset_s64(data, pg.sve_into(), bases.as_signed(), offset)
+}
+#[doc = "Non-truncating store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1_scatter[_u64base]_offset[_u64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1d))]
+pub unsafe fn svstnt1_scatter_u64base_offset_u64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    offset: i64,
+    data: svuint64_t,
+) {
+    svstnt1_scatter_u64base_offset_s64(pg, bases, offset, data.as_signed())
+}
+#[doc = "Truncate to 8 bits and store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1b_scatter_[s64]offset[_s64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1b))]
+pub unsafe fn svstnt1b_scatter_s64offset_s64(
+    pg: svbool_t,
+    base: *mut i8,
+    offsets: svint64_t,
+    data: svint64_t,
+) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.stnt1.scatter.nxv2i8"
+        )]
+        fn _svstnt1b_scatter_s64offset_s64(
+            data: nxv2i8,
+            pg: svbool2_t,
+            base: *mut i8,
+            offsets: svint64_t,
+        );
+    }
+    _svstnt1b_scatter_s64offset_s64(
+        crate::intrinsics::simd::simd_cast(data),
+        pg.sve_into(),
+        base,
+        offsets,
+    )
+}
+#[doc = "Truncate to 16 bits and store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1h_scatter_[s64]offset[_s64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1h))]
+pub unsafe fn svstnt1h_scatter_s64offset_s64(
+    pg: svbool_t,
+    base: *mut i16,
+    offsets: svint64_t,
+    data: svint64_t,
+) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.stnt1.scatter.nxv2i16"
+        )]
+        fn _svstnt1h_scatter_s64offset_s64(
+            data: nxv2i16,
+            pg: svbool2_t,
+            base: *mut i16,
+            offsets: svint64_t,
+        );
+    }
+    _svstnt1h_scatter_s64offset_s64(
+        crate::intrinsics::simd::simd_cast(data),
+        pg.sve_into(),
+        base,
+        offsets,
+    )
+}
+#[doc = "Truncate to 32 bits and store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1w_scatter_[s64]offset[_s64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1w))]
+pub unsafe fn svstnt1w_scatter_s64offset_s64(
+    pg: svbool_t,
+    base: *mut i32,
+    offsets: svint64_t,
+    data: svint64_t,
+) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.stnt1.scatter.nxv2i32"
+        )]
+        fn _svstnt1w_scatter_s64offset_s64(
+            data: nxv2i32,
+            pg: svbool2_t,
+            base: *mut i32,
+            offsets: svint64_t,
+        );
+    }
+    _svstnt1w_scatter_s64offset_s64(
+        crate::intrinsics::simd::simd_cast(data),
+        pg.sve_into(),
+        base,
+        offsets,
+    )
+}
+#[doc = "Truncate to 8 bits and store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1b_scatter_[s64]offset[_u64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1b))]
+pub unsafe fn svstnt1b_scatter_s64offset_u64(
+    pg: svbool_t,
+    base: *mut u8,
+    offsets: svint64_t,
+    data: svuint64_t,
+) {
+    svstnt1b_scatter_s64offset_s64(pg, base.as_signed(), offsets, data.as_signed())
+}
+#[doc = "Truncate to 16 bits and store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1h_scatter_[s64]offset[_u64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1h))]
+pub unsafe fn svstnt1h_scatter_s64offset_u64(
+    pg: svbool_t,
+    base: *mut u16,
+    offsets: svint64_t,
+    data: svuint64_t,
+) {
+    svstnt1h_scatter_s64offset_s64(pg, base.as_signed(), offsets, data.as_signed())
+}
+#[doc = "Truncate to 32 bits and store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1w_scatter_[s64]offset[_u64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1w))]
+pub unsafe fn svstnt1w_scatter_s64offset_u64(
+    pg: svbool_t,
+    base: *mut u32,
+    offsets: svint64_t,
+    data: svuint64_t,
+) {
+    svstnt1w_scatter_s64offset_s64(pg, base.as_signed(), offsets, data.as_signed())
+}
+#[doc = "Truncate to 8 bits and store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1b_scatter_[u32]offset[_s32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1b))]
+pub unsafe fn svstnt1b_scatter_u32offset_s32(
+    pg: svbool_t,
+    base: *mut i8,
+    offsets: svuint32_t,
+    data: svint32_t,
+) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.stnt1.scatter.uxtw.nxv4i8"
+        )]
+        fn _svstnt1b_scatter_u32offset_s32(
+            data: nxv4i8,
+            pg: svbool4_t,
+            base: *mut i8,
+            offsets: svint32_t,
+        );
+    }
+    _svstnt1b_scatter_u32offset_s32(
+        crate::intrinsics::simd::simd_cast(data),
+        pg.sve_into(),
+        base,
+        offsets.as_signed(),
+    )
+}
+#[doc = "Truncate to 16 bits and store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1h_scatter_[u32]offset[_s32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1h))]
+pub unsafe fn svstnt1h_scatter_u32offset_s32(
+    pg: svbool_t,
+    base: *mut i16,
+    offsets: svuint32_t,
+    data: svint32_t,
+) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.stnt1.scatter.uxtw.nxv4i16"
+        )]
+        fn _svstnt1h_scatter_u32offset_s32(
+            data: nxv4i16,
+            pg: svbool4_t,
+            base: *mut i16,
+            offsets: svint32_t,
+        );
+    }
+    _svstnt1h_scatter_u32offset_s32(
+        crate::intrinsics::simd::simd_cast(data),
+        pg.sve_into(),
+        base,
+        offsets.as_signed(),
+    )
+}
+#[doc = "Truncate to 8 bits and store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1b_scatter_[u32]offset[_u32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1b))]
+pub unsafe fn svstnt1b_scatter_u32offset_u32(
+    pg: svbool_t,
+    base: *mut u8,
+    offsets: svuint32_t,
+    data: svuint32_t,
+) {
+    svstnt1b_scatter_u32offset_s32(pg, base.as_signed(), offsets, data.as_signed())
+}
+#[doc = "Truncate to 16 bits and store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1h_scatter_[u32]offset[_u32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1h))]
+pub unsafe fn svstnt1h_scatter_u32offset_u32(
+    pg: svbool_t,
+    base: *mut u16,
+    offsets: svuint32_t,
+    data: svuint32_t,
+) {
+    svstnt1h_scatter_u32offset_s32(pg, base.as_signed(), offsets, data.as_signed())
+}
+#[doc = "Truncate to 8 bits and store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1b_scatter_[u64]offset[_s64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1b))]
+pub unsafe fn svstnt1b_scatter_u64offset_s64(
+    pg: svbool_t,
+    base: *mut i8,
+    offsets: svuint64_t,
+    data: svint64_t,
+) {
+    svstnt1b_scatter_s64offset_s64(pg, base, offsets.as_signed(), data)
+}
+#[doc = "Truncate to 16 bits and store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1h_scatter_[u64]offset[_s64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1h))]
+pub unsafe fn svstnt1h_scatter_u64offset_s64(
+    pg: svbool_t,
+    base: *mut i16,
+    offsets: svuint64_t,
+    data: svint64_t,
+) {
+    svstnt1h_scatter_s64offset_s64(pg, base, offsets.as_signed(), data)
+}
+#[doc = "Truncate to 32 bits and store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1w_scatter_[u64]offset[_s64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1w))]
+pub unsafe fn svstnt1w_scatter_u64offset_s64(
+    pg: svbool_t,
+    base: *mut i32,
+    offsets: svuint64_t,
+    data: svint64_t,
+) {
+    svstnt1w_scatter_s64offset_s64(pg, base, offsets.as_signed(), data)
+}
+#[doc = "Truncate to 8 bits and store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1b_scatter_[u64]offset[_u64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1b))]
+pub unsafe fn svstnt1b_scatter_u64offset_u64(
+    pg: svbool_t,
+    base: *mut u8,
+    offsets: svuint64_t,
+    data: svuint64_t,
+) {
+    svstnt1b_scatter_s64offset_s64(pg, base.as_signed(), offsets.as_signed(), data.as_signed())
+}
+#[doc = "Truncate to 16 bits and store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1h_scatter_[u64]offset[_u64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1h))]
+pub unsafe fn svstnt1h_scatter_u64offset_u64(
+    pg: svbool_t,
+    base: *mut u16,
+    offsets: svuint64_t,
+    data: svuint64_t,
+) {
+    svstnt1h_scatter_s64offset_s64(pg, base.as_signed(), offsets.as_signed(), data.as_signed())
+}
+#[doc = "Truncate to 32 bits and store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1w_scatter_[u64]offset[_u64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1w))]
+pub unsafe fn svstnt1w_scatter_u64offset_u64(
+    pg: svbool_t,
+    base: *mut u32,
+    offsets: svuint64_t,
+    data: svuint64_t,
+) {
+    svstnt1w_scatter_s64offset_s64(pg, base.as_signed(), offsets.as_signed(), data.as_signed())
+}
+#[doc = "Truncate to 8 bits and store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1b_scatter[_u32base]_offset[_s32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1b))]
+pub unsafe fn svstnt1b_scatter_u32base_offset_s32(
+    pg: svbool_t,
+    bases: svuint32_t,
+    offset: i64,
+    data: svint32_t,
+) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv4i8.nxv4i32"
+        )]
+        fn _svstnt1b_scatter_u32base_offset_s32(
+            data: nxv4i8,
+            pg: svbool4_t,
+            bases: svint32_t,
+            offset: i64,
+        );
+    }
+    _svstnt1b_scatter_u32base_offset_s32(
+        crate::intrinsics::simd::simd_cast(data),
+        pg.sve_into(),
+        bases.as_signed(),
+        offset,
+    )
+}
+#[doc = "Truncate to 16 bits and store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1h_scatter[_u32base]_offset[_s32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1h))]
+pub unsafe fn svstnt1h_scatter_u32base_offset_s32(
+    pg: svbool_t,
+    bases: svuint32_t,
+    offset: i64,
+    data: svint32_t,
+) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv4i16.nxv4i32"
+        )]
+        fn _svstnt1h_scatter_u32base_offset_s32(
+            data: nxv4i16,
+            pg: svbool4_t,
+            bases: svint32_t,
+            offset: i64,
+        );
+    }
+    _svstnt1h_scatter_u32base_offset_s32(
+        crate::intrinsics::simd::simd_cast(data),
+        pg.sve_into(),
+        bases.as_signed(),
+        offset,
+    )
+}
+#[doc = "Truncate to 8 bits and store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1b_scatter[_u32base]_offset[_u32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1b))]
+pub unsafe fn svstnt1b_scatter_u32base_offset_u32(
+    pg: svbool_t,
+    bases: svuint32_t,
+    offset: i64,
+    data: svuint32_t,
+) {
+    svstnt1b_scatter_u32base_offset_s32(pg, bases, offset, data.as_signed())
+}
+#[doc = "Truncate to 16 bits and store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1h_scatter[_u32base]_offset[_u32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1h))]
+pub unsafe fn svstnt1h_scatter_u32base_offset_u32(
+    pg: svbool_t,
+    bases: svuint32_t,
+    offset: i64,
+    data: svuint32_t,
+) {
+    svstnt1h_scatter_u32base_offset_s32(pg, bases, offset, data.as_signed())
+}
+#[doc = "Truncate to 8 bits and store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1b_scatter[_u64base]_offset[_s64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1b))]
+pub unsafe fn svstnt1b_scatter_u64base_offset_s64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    offset: i64,
+    data: svint64_t,
+) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv2i8.nxv2i64"
+        )]
+        fn _svstnt1b_scatter_u64base_offset_s64(
+            data: nxv2i8,
+            pg: svbool2_t,
+            bases: svint64_t,
+            offset: i64,
+        );
+    }
+    _svstnt1b_scatter_u64base_offset_s64(
+        crate::intrinsics::simd::simd_cast(data),
+        pg.sve_into(),
+        bases.as_signed(),
+        offset,
+    )
+}
+#[doc = "Truncate to 16 bits and store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1h_scatter[_u64base]_offset[_s64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1h))]
+pub unsafe fn svstnt1h_scatter_u64base_offset_s64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    offset: i64,
+    data: svint64_t,
+) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv2i16.nxv2i64"
+        )]
+        fn _svstnt1h_scatter_u64base_offset_s64(
+            data: nxv2i16,
+            pg: svbool2_t,
+            bases: svint64_t,
+            offset: i64,
+        );
+    }
+    _svstnt1h_scatter_u64base_offset_s64(
+        crate::intrinsics::simd::simd_cast(data),
+        pg.sve_into(),
+        bases.as_signed(),
+        offset,
+    )
+}
+#[doc = "Truncate to 32 bits and store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1w_scatter[_u64base]_offset[_s64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1w))]
+pub unsafe fn svstnt1w_scatter_u64base_offset_s64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    offset: i64,
+    data: svint64_t,
+) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv2i32.nxv2i64"
+        )]
+        fn _svstnt1w_scatter_u64base_offset_s64(
+            data: nxv2i32,
+            pg: svbool2_t,
+            bases: svint64_t,
+            offset: i64,
+        );
+    }
+    _svstnt1w_scatter_u64base_offset_s64(
+        crate::intrinsics::simd::simd_cast(data),
+        pg.sve_into(),
+        bases.as_signed(),
+        offset,
+    )
+}
+#[doc = "Truncate to 8 bits and store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1b_scatter[_u64base]_offset[_u64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1b))]
+pub unsafe fn svstnt1b_scatter_u64base_offset_u64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    offset: i64,
+    data: svuint64_t,
+) {
+    svstnt1b_scatter_u64base_offset_s64(pg, bases, offset, data.as_signed())
+}
+#[doc = "Truncate to 16 bits and store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1h_scatter[_u64base]_offset[_u64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1h))]
+pub unsafe fn svstnt1h_scatter_u64base_offset_u64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    offset: i64,
+    data: svuint64_t,
+) {
+    svstnt1h_scatter_u64base_offset_s64(pg, bases, offset, data.as_signed())
+}
+#[doc = "Truncate to 32 bits and store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1w_scatter[_u64base]_offset[_u64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1w))]
+pub unsafe fn svstnt1w_scatter_u64base_offset_u64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    offset: i64,
+    data: svuint64_t,
+) {
+    svstnt1w_scatter_u64base_offset_s64(pg, bases, offset, data.as_signed())
+}
+#[doc = "Truncate to 8 bits and store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1b_scatter[_u32base_s32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1b))]
+pub unsafe fn svstnt1b_scatter_u32base_s32(pg: svbool_t, bases: svuint32_t, data: svint32_t) {
+    svstnt1b_scatter_u32base_offset_s32(pg, bases, 0, data)
+}
+#[doc = "Truncate to 16 bits and store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1h_scatter[_u32base_s32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1h))]
+pub unsafe fn svstnt1h_scatter_u32base_s32(pg: svbool_t, bases: svuint32_t, data: svint32_t) {
+    svstnt1h_scatter_u32base_offset_s32(pg, bases, 0, data)
+}
+#[doc = "Truncate to 8 bits and store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1b_scatter[_u32base_u32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1b))]
+pub unsafe fn svstnt1b_scatter_u32base_u32(pg: svbool_t, bases: svuint32_t, data: svuint32_t) {
+    svstnt1b_scatter_u32base_offset_u32(pg, bases, 0, data)
+}
+#[doc = "Truncate to 16 bits and store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1h_scatter[_u32base_u32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1h))]
+pub unsafe fn svstnt1h_scatter_u32base_u32(pg: svbool_t, bases: svuint32_t, data: svuint32_t) {
+    svstnt1h_scatter_u32base_offset_u32(pg, bases, 0, data)
+}
+#[doc = "Truncate to 8 bits and store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1b_scatter[_u64base_s64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1b))]
+pub unsafe fn svstnt1b_scatter_u64base_s64(pg: svbool_t, bases: svuint64_t, data: svint64_t) {
+    svstnt1b_scatter_u64base_offset_s64(pg, bases, 0, data)
+}
+#[doc = "Truncate to 16 bits and store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1h_scatter[_u64base_s64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1h))]
+pub unsafe fn svstnt1h_scatter_u64base_s64(pg: svbool_t, bases: svuint64_t, data: svint64_t) {
+    svstnt1h_scatter_u64base_offset_s64(pg, bases, 0, data)
+}
+#[doc = "Truncate to 32 bits and store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1w_scatter[_u64base_s64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1w))]
+pub unsafe fn svstnt1w_scatter_u64base_s64(pg: svbool_t, bases: svuint64_t, data: svint64_t) {
+    svstnt1w_scatter_u64base_offset_s64(pg, bases, 0, data)
+}
+#[doc = "Truncate to 8 bits and store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1b_scatter[_u64base_u64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1b))]
+pub unsafe fn svstnt1b_scatter_u64base_u64(pg: svbool_t, bases: svuint64_t, data: svuint64_t) {
+    svstnt1b_scatter_u64base_offset_u64(pg, bases, 0, data)
+}
+#[doc = "Truncate to 16 bits and store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1h_scatter[_u64base_u64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1h))]
+pub unsafe fn svstnt1h_scatter_u64base_u64(pg: svbool_t, bases: svuint64_t, data: svuint64_t) {
+    svstnt1h_scatter_u64base_offset_u64(pg, bases, 0, data)
+}
+#[doc = "Truncate to 32 bits and store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1w_scatter[_u64base_u64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1w))]
+pub unsafe fn svstnt1w_scatter_u64base_u64(pg: svbool_t, bases: svuint64_t, data: svuint64_t) {
+    svstnt1w_scatter_u64base_offset_u64(pg, bases, 0, data)
+}
+#[doc = "Truncate to 16 bits and store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1h_scatter_[s64]index[_s64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1h))]
+pub unsafe fn svstnt1h_scatter_s64index_s64(
+    pg: svbool_t,
+    base: *mut i16,
+    indices: svint64_t,
+    data: svint64_t,
+) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.stnt1.scatter.index.nxv2i16"
+        )]
+        fn _svstnt1h_scatter_s64index_s64(
+            data: nxv2i16,
+            pg: svbool2_t,
+            base: *mut i16,
+            indices: svint64_t,
+        );
+    }
+    _svstnt1h_scatter_s64index_s64(
+        crate::intrinsics::simd::simd_cast(data),
+        pg.sve_into(),
+        base,
+        indices,
+    )
+}
+#[doc = "Truncate to 32 bits and store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1w_scatter_[s64]index[_s64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1w))]
+pub unsafe fn svstnt1w_scatter_s64index_s64(
+    pg: svbool_t,
+    base: *mut i32,
+    indices: svint64_t,
+    data: svint64_t,
+) {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.stnt1.scatter.index.nxv2i32"
+        )]
+        fn _svstnt1w_scatter_s64index_s64(
+            data: nxv2i32,
+            pg: svbool2_t,
+            base: *mut i32,
+            indices: svint64_t,
+        );
+    }
+    _svstnt1w_scatter_s64index_s64(
+        crate::intrinsics::simd::simd_cast(data),
+        pg.sve_into(),
+        base,
+        indices,
+    )
+}
+#[doc = "Truncate to 16 bits and store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1h_scatter_[s64]index[_u64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1h))]
+pub unsafe fn svstnt1h_scatter_s64index_u64(
+    pg: svbool_t,
+    base: *mut u16,
+    indices: svint64_t,
+    data: svuint64_t,
+) {
+    svstnt1h_scatter_s64index_s64(pg, base.as_signed(), indices, data.as_signed())
+}
+#[doc = "Truncate to 32 bits and store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1w_scatter_[s64]index[_u64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1w))]
+pub unsafe fn svstnt1w_scatter_s64index_u64(
+    pg: svbool_t,
+    base: *mut u32,
+    indices: svint64_t,
+    data: svuint64_t,
+) {
+    svstnt1w_scatter_s64index_s64(pg, base.as_signed(), indices, data.as_signed())
+}
+#[doc = "Truncate to 16 bits and store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1h_scatter_[u64]index[_s64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1h))]
+pub unsafe fn svstnt1h_scatter_u64index_s64(
+    pg: svbool_t,
+    base: *mut i16,
+    indices: svuint64_t,
+    data: svint64_t,
+) {
+    svstnt1h_scatter_s64index_s64(pg, base, indices.as_signed(), data)
+}
+#[doc = "Truncate to 32 bits and store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1w_scatter_[u64]index[_s64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1w))]
+pub unsafe fn svstnt1w_scatter_u64index_s64(
+    pg: svbool_t,
+    base: *mut i32,
+    indices: svuint64_t,
+    data: svint64_t,
+) {
+    svstnt1w_scatter_s64index_s64(pg, base, indices.as_signed(), data)
+}
+#[doc = "Truncate to 16 bits and store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1h_scatter_[u64]index[_u64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1h))]
+pub unsafe fn svstnt1h_scatter_u64index_u64(
+    pg: svbool_t,
+    base: *mut u16,
+    indices: svuint64_t,
+    data: svuint64_t,
+) {
+    svstnt1h_scatter_s64index_s64(pg, base.as_signed(), indices.as_signed(), data.as_signed())
+}
+#[doc = "Truncate to 32 bits and store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1w_scatter_[u64]index[_u64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1w))]
+pub unsafe fn svstnt1w_scatter_u64index_u64(
+    pg: svbool_t,
+    base: *mut u32,
+    indices: svuint64_t,
+    data: svuint64_t,
+) {
+    svstnt1w_scatter_s64index_s64(pg, base.as_signed(), indices.as_signed(), data.as_signed())
+}
+#[doc = "Truncate to 16 bits and store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1h_scatter[_u32base]_index[_s32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1h))]
+pub unsafe fn svstnt1h_scatter_u32base_index_s32(
+    pg: svbool_t,
+    bases: svuint32_t,
+    index: i64,
+    data: svint32_t,
+) {
+    svstnt1h_scatter_u32base_offset_s32(pg, bases, index.unchecked_shl(1), data)
+}
+#[doc = "Truncate to 16 bits and store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1h_scatter[_u32base]_index[_u32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1h))]
+pub unsafe fn svstnt1h_scatter_u32base_index_u32(
+    pg: svbool_t,
+    bases: svuint32_t,
+    index: i64,
+    data: svuint32_t,
+) {
+    svstnt1h_scatter_u32base_offset_u32(pg, bases, index.unchecked_shl(1), data)
+}
+#[doc = "Truncate to 16 bits and store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1h_scatter[_u64base]_index[_s64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1h))]
+pub unsafe fn svstnt1h_scatter_u64base_index_s64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    index: i64,
+    data: svint64_t,
+) {
+    svstnt1h_scatter_u64base_offset_s64(pg, bases, index.unchecked_shl(1), data)
+}
+#[doc = "Truncate to 32 bits and store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1w_scatter[_u64base]_index[_s64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1w))]
+pub unsafe fn svstnt1w_scatter_u64base_index_s64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    index: i64,
+    data: svint64_t,
+) {
+    svstnt1w_scatter_u64base_offset_s64(pg, bases, index.unchecked_shl(2), data)
+}
+#[doc = "Truncate to 16 bits and store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1h_scatter[_u64base]_index[_u64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1h))]
+pub unsafe fn svstnt1h_scatter_u64base_index_u64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    index: i64,
+    data: svuint64_t,
+) {
+    svstnt1h_scatter_u64base_offset_u64(pg, bases, index.unchecked_shl(1), data)
+}
+#[doc = "Truncate to 32 bits and store, non-temporal"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svstnt1w_scatter[_u64base]_index[_u64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::offset`](pointer#method.offset) safety constraints must be met for the address calculation for each active element (governed by `pg`)."]
+#[doc = "  * This dereferences and accesses the calculated address for each active element (governed by `pg`)."]
+#[doc = "  * Addresses passed in `bases` lack provenance, so this is similar to using a `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane before  using it."]
+#[doc = "  * Non-temporal accesses have special memory ordering rules, and [explicit barriers may be required for some applications](https://developer.arm.com/documentation/den0024/a/Memory-Ordering/Barriers/Non-temporal-load-and-store-pair?lang=en)."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(stnt1w))]
+pub unsafe fn svstnt1w_scatter_u64base_index_u64(
+    pg: svbool_t,
+    bases: svuint64_t,
+    index: i64,
+    data: svuint64_t,
+) {
+    svstnt1w_scatter_u64base_offset_u64(pg, bases, index.unchecked_shl(2), data)
+}
+#[doc = "Subtract narrow high part (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubhnb[_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(subhnb))]
+pub fn svsubhnb_s16(op1: svint16_t, op2: svint16_t) -> svint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.subhnb.nxv8i16")]
+        fn _svsubhnb_s16(op1: svint16_t, op2: svint16_t) -> svint8_t;
+    }
+    unsafe { _svsubhnb_s16(op1, op2) }
+}
+#[doc = "Subtract narrow high part (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubhnb[_n_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(subhnb))]
+pub fn svsubhnb_n_s16(op1: svint16_t, op2: i16) -> svint8_t {
+    svsubhnb_s16(op1, svdup_n_s16(op2))
+}
+#[doc = "Subtract narrow high part (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubhnb[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(subhnb))]
+pub fn svsubhnb_s32(op1: svint32_t, op2: svint32_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.subhnb.nxv4i32")]
+        fn _svsubhnb_s32(op1: svint32_t, op2: svint32_t) -> svint16_t;
+    }
+    unsafe { _svsubhnb_s32(op1, op2) }
+}
+#[doc = "Subtract narrow high part (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubhnb[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(subhnb))]
+pub fn svsubhnb_n_s32(op1: svint32_t, op2: i32) -> svint16_t {
+    svsubhnb_s32(op1, svdup_n_s32(op2))
+}
+#[doc = "Subtract narrow high part (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubhnb[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(subhnb))]
+pub fn svsubhnb_s64(op1: svint64_t, op2: svint64_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.subhnb.nxv2i64")]
+        fn _svsubhnb_s64(op1: svint64_t, op2: svint64_t) -> svint32_t;
+    }
+    unsafe { _svsubhnb_s64(op1, op2) }
+}
+#[doc = "Subtract narrow high part (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubhnb[_n_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(subhnb))]
+pub fn svsubhnb_n_s64(op1: svint64_t, op2: i64) -> svint32_t {
+    svsubhnb_s64(op1, svdup_n_s64(op2))
+}
+#[doc = "Subtract narrow high part (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubhnb[_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(subhnb))]
+pub fn svsubhnb_u16(op1: svuint16_t, op2: svuint16_t) -> svuint8_t {
+    unsafe { svsubhnb_s16(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Subtract narrow high part (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubhnb[_n_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(subhnb))]
+pub fn svsubhnb_n_u16(op1: svuint16_t, op2: u16) -> svuint8_t {
+    svsubhnb_u16(op1, svdup_n_u16(op2))
+}
+#[doc = "Subtract narrow high part (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubhnb[_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(subhnb))]
+pub fn svsubhnb_u32(op1: svuint32_t, op2: svuint32_t) -> svuint16_t {
+    unsafe { svsubhnb_s32(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Subtract narrow high part (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubhnb[_n_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(subhnb))]
+pub fn svsubhnb_n_u32(op1: svuint32_t, op2: u32) -> svuint16_t {
+    svsubhnb_u32(op1, svdup_n_u32(op2))
+}
+#[doc = "Subtract narrow high part (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubhnb[_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(subhnb))]
+pub fn svsubhnb_u64(op1: svuint64_t, op2: svuint64_t) -> svuint32_t {
+    unsafe { svsubhnb_s64(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Subtract narrow high part (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubhnb[_n_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(subhnb))]
+pub fn svsubhnb_n_u64(op1: svuint64_t, op2: u64) -> svuint32_t {
+    svsubhnb_u64(op1, svdup_n_u64(op2))
+}
+#[doc = "Subtract narrow high part (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubhnt[_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(subhnt))]
+pub fn svsubhnt_s16(even: svint8_t, op1: svint16_t, op2: svint16_t) -> svint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.subhnt.nxv8i16")]
+        fn _svsubhnt_s16(even: svint8_t, op1: svint16_t, op2: svint16_t) -> svint8_t;
+    }
+    unsafe { _svsubhnt_s16(even, op1, op2) }
+}
+#[doc = "Subtract narrow high part (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubhnt[_n_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(subhnt))]
+pub fn svsubhnt_n_s16(even: svint8_t, op1: svint16_t, op2: i16) -> svint8_t {
+    svsubhnt_s16(even, op1, svdup_n_s16(op2))
+}
+#[doc = "Subtract narrow high part (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubhnt[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(subhnt))]
+pub fn svsubhnt_s32(even: svint16_t, op1: svint32_t, op2: svint32_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.subhnt.nxv4i32")]
+        fn _svsubhnt_s32(even: svint16_t, op1: svint32_t, op2: svint32_t) -> svint16_t;
+    }
+    unsafe { _svsubhnt_s32(even, op1, op2) }
+}
+#[doc = "Subtract narrow high part (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubhnt[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(subhnt))]
+pub fn svsubhnt_n_s32(even: svint16_t, op1: svint32_t, op2: i32) -> svint16_t {
+    svsubhnt_s32(even, op1, svdup_n_s32(op2))
+}
+#[doc = "Subtract narrow high part (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubhnt[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(subhnt))]
+pub fn svsubhnt_s64(even: svint32_t, op1: svint64_t, op2: svint64_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.subhnt.nxv2i64")]
+        fn _svsubhnt_s64(even: svint32_t, op1: svint64_t, op2: svint64_t) -> svint32_t;
+    }
+    unsafe { _svsubhnt_s64(even, op1, op2) }
+}
+#[doc = "Subtract narrow high part (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubhnt[_n_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(subhnt))]
+pub fn svsubhnt_n_s64(even: svint32_t, op1: svint64_t, op2: i64) -> svint32_t {
+    svsubhnt_s64(even, op1, svdup_n_s64(op2))
+}
+#[doc = "Subtract narrow high part (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubhnt[_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(subhnt))]
+pub fn svsubhnt_u16(even: svuint8_t, op1: svuint16_t, op2: svuint16_t) -> svuint8_t {
+    unsafe { svsubhnt_s16(even.as_signed(), op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Subtract narrow high part (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubhnt[_n_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(subhnt))]
+pub fn svsubhnt_n_u16(even: svuint8_t, op1: svuint16_t, op2: u16) -> svuint8_t {
+    svsubhnt_u16(even, op1, svdup_n_u16(op2))
+}
+#[doc = "Subtract narrow high part (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubhnt[_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(subhnt))]
+pub fn svsubhnt_u32(even: svuint16_t, op1: svuint32_t, op2: svuint32_t) -> svuint16_t {
+    unsafe { svsubhnt_s32(even.as_signed(), op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Subtract narrow high part (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubhnt[_n_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(subhnt))]
+pub fn svsubhnt_n_u32(even: svuint16_t, op1: svuint32_t, op2: u32) -> svuint16_t {
+    svsubhnt_u32(even, op1, svdup_n_u32(op2))
+}
+#[doc = "Subtract narrow high part (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubhnt[_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(subhnt))]
+pub fn svsubhnt_u64(even: svuint32_t, op1: svuint64_t, op2: svuint64_t) -> svuint32_t {
+    unsafe { svsubhnt_s64(even.as_signed(), op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Subtract narrow high part (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubhnt[_n_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(subhnt))]
+pub fn svsubhnt_n_u64(even: svuint32_t, op1: svuint64_t, op2: u64) -> svuint32_t {
+    svsubhnt_u64(even, op1, svdup_n_u64(op2))
+}
+#[doc = "Subtract long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsublb[_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ssublb))]
+pub fn svsublb_s16(op1: svint8_t, op2: svint8_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ssublb.nxv8i16")]
+        fn _svsublb_s16(op1: svint8_t, op2: svint8_t) -> svint16_t;
+    }
+    unsafe { _svsublb_s16(op1, op2) }
+}
+#[doc = "Subtract long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsublb[_n_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ssublb))]
+pub fn svsublb_n_s16(op1: svint8_t, op2: i8) -> svint16_t {
+    svsublb_s16(op1, svdup_n_s8(op2))
+}
+#[doc = "Subtract long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsublb[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ssublb))]
+pub fn svsublb_s32(op1: svint16_t, op2: svint16_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ssublb.nxv4i32")]
+        fn _svsublb_s32(op1: svint16_t, op2: svint16_t) -> svint32_t;
+    }
+    unsafe { _svsublb_s32(op1, op2) }
+}
+#[doc = "Subtract long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsublb[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ssublb))]
+pub fn svsublb_n_s32(op1: svint16_t, op2: i16) -> svint32_t {
+    svsublb_s32(op1, svdup_n_s16(op2))
+}
+#[doc = "Subtract long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsublb[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ssublb))]
+pub fn svsublb_s64(op1: svint32_t, op2: svint32_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ssublb.nxv2i64")]
+        fn _svsublb_s64(op1: svint32_t, op2: svint32_t) -> svint64_t;
+    }
+    unsafe { _svsublb_s64(op1, op2) }
+}
+#[doc = "Subtract long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsublb[_n_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ssublb))]
+pub fn svsublb_n_s64(op1: svint32_t, op2: i32) -> svint64_t {
+    svsublb_s64(op1, svdup_n_s32(op2))
+}
+#[doc = "Subtract long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsublb[_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(usublb))]
+pub fn svsublb_u16(op1: svuint8_t, op2: svuint8_t) -> svuint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.usublb.nxv8i16")]
+        fn _svsublb_u16(op1: svint8_t, op2: svint8_t) -> svint16_t;
+    }
+    unsafe { _svsublb_u16(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Subtract long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsublb[_n_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(usublb))]
+pub fn svsublb_n_u16(op1: svuint8_t, op2: u8) -> svuint16_t {
+    svsublb_u16(op1, svdup_n_u8(op2))
+}
+#[doc = "Subtract long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsublb[_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(usublb))]
+pub fn svsublb_u32(op1: svuint16_t, op2: svuint16_t) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.usublb.nxv4i32")]
+        fn _svsublb_u32(op1: svint16_t, op2: svint16_t) -> svint32_t;
+    }
+    unsafe { _svsublb_u32(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Subtract long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsublb[_n_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(usublb))]
+pub fn svsublb_n_u32(op1: svuint16_t, op2: u16) -> svuint32_t {
+    svsublb_u32(op1, svdup_n_u16(op2))
+}
+#[doc = "Subtract long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsublb[_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(usublb))]
+pub fn svsublb_u64(op1: svuint32_t, op2: svuint32_t) -> svuint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.usublb.nxv2i64")]
+        fn _svsublb_u64(op1: svint32_t, op2: svint32_t) -> svint64_t;
+    }
+    unsafe { _svsublb_u64(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Subtract long (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsublb[_n_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(usublb))]
+pub fn svsublb_n_u64(op1: svuint32_t, op2: u32) -> svuint64_t {
+    svsublb_u64(op1, svdup_n_u32(op2))
+}
+#[doc = "Subtract long (bottom - top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsublbt[_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ssublbt))]
+pub fn svsublbt_s16(op1: svint8_t, op2: svint8_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ssublbt.nxv8i16"
+        )]
+        fn _svsublbt_s16(op1: svint8_t, op2: svint8_t) -> svint16_t;
+    }
+    unsafe { _svsublbt_s16(op1, op2) }
+}
+#[doc = "Subtract long (bottom - top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsublbt[_n_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ssublbt))]
+pub fn svsublbt_n_s16(op1: svint8_t, op2: i8) -> svint16_t {
+    svsublbt_s16(op1, svdup_n_s8(op2))
+}
+#[doc = "Subtract long (bottom - top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsublbt[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ssublbt))]
+pub fn svsublbt_s32(op1: svint16_t, op2: svint16_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ssublbt.nxv4i32"
+        )]
+        fn _svsublbt_s32(op1: svint16_t, op2: svint16_t) -> svint32_t;
+    }
+    unsafe { _svsublbt_s32(op1, op2) }
+}
+#[doc = "Subtract long (bottom - top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsublbt[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ssublbt))]
+pub fn svsublbt_n_s32(op1: svint16_t, op2: i16) -> svint32_t {
+    svsublbt_s32(op1, svdup_n_s16(op2))
+}
+#[doc = "Subtract long (bottom - top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsublbt[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ssublbt))]
+pub fn svsublbt_s64(op1: svint32_t, op2: svint32_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ssublbt.nxv2i64"
+        )]
+        fn _svsublbt_s64(op1: svint32_t, op2: svint32_t) -> svint64_t;
+    }
+    unsafe { _svsublbt_s64(op1, op2) }
+}
+#[doc = "Subtract long (bottom - top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsublbt[_n_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ssublbt))]
+pub fn svsublbt_n_s64(op1: svint32_t, op2: i32) -> svint64_t {
+    svsublbt_s64(op1, svdup_n_s32(op2))
+}
+#[doc = "Subtract long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsublt[_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ssublt))]
+pub fn svsublt_s16(op1: svint8_t, op2: svint8_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ssublt.nxv8i16")]
+        fn _svsublt_s16(op1: svint8_t, op2: svint8_t) -> svint16_t;
+    }
+    unsafe { _svsublt_s16(op1, op2) }
+}
+#[doc = "Subtract long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsublt[_n_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ssublt))]
+pub fn svsublt_n_s16(op1: svint8_t, op2: i8) -> svint16_t {
+    svsublt_s16(op1, svdup_n_s8(op2))
+}
+#[doc = "Subtract long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsublt[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ssublt))]
+pub fn svsublt_s32(op1: svint16_t, op2: svint16_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ssublt.nxv4i32")]
+        fn _svsublt_s32(op1: svint16_t, op2: svint16_t) -> svint32_t;
+    }
+    unsafe { _svsublt_s32(op1, op2) }
+}
+#[doc = "Subtract long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsublt[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ssublt))]
+pub fn svsublt_n_s32(op1: svint16_t, op2: i16) -> svint32_t {
+    svsublt_s32(op1, svdup_n_s16(op2))
+}
+#[doc = "Subtract long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsublt[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ssublt))]
+pub fn svsublt_s64(op1: svint32_t, op2: svint32_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ssublt.nxv2i64")]
+        fn _svsublt_s64(op1: svint32_t, op2: svint32_t) -> svint64_t;
+    }
+    unsafe { _svsublt_s64(op1, op2) }
+}
+#[doc = "Subtract long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsublt[_n_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ssublt))]
+pub fn svsublt_n_s64(op1: svint32_t, op2: i32) -> svint64_t {
+    svsublt_s64(op1, svdup_n_s32(op2))
+}
+#[doc = "Subtract long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsublt[_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(usublt))]
+pub fn svsublt_u16(op1: svuint8_t, op2: svuint8_t) -> svuint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.usublt.nxv8i16")]
+        fn _svsublt_u16(op1: svint8_t, op2: svint8_t) -> svint16_t;
+    }
+    unsafe { _svsublt_u16(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Subtract long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsublt[_n_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(usublt))]
+pub fn svsublt_n_u16(op1: svuint8_t, op2: u8) -> svuint16_t {
+    svsublt_u16(op1, svdup_n_u8(op2))
+}
+#[doc = "Subtract long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsublt[_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(usublt))]
+pub fn svsublt_u32(op1: svuint16_t, op2: svuint16_t) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.usublt.nxv4i32")]
+        fn _svsublt_u32(op1: svint16_t, op2: svint16_t) -> svint32_t;
+    }
+    unsafe { _svsublt_u32(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Subtract long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsublt[_n_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(usublt))]
+pub fn svsublt_n_u32(op1: svuint16_t, op2: u16) -> svuint32_t {
+    svsublt_u32(op1, svdup_n_u16(op2))
+}
+#[doc = "Subtract long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsublt[_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(usublt))]
+pub fn svsublt_u64(op1: svuint32_t, op2: svuint32_t) -> svuint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.usublt.nxv2i64")]
+        fn _svsublt_u64(op1: svint32_t, op2: svint32_t) -> svint64_t;
+    }
+    unsafe { _svsublt_u64(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Subtract long (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsublt[_n_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(usublt))]
+pub fn svsublt_n_u64(op1: svuint32_t, op2: u32) -> svuint64_t {
+    svsublt_u64(op1, svdup_n_u32(op2))
+}
+#[doc = "Subtract long (top - bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubltb[_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ssubltb))]
+pub fn svsubltb_s16(op1: svint8_t, op2: svint8_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ssubltb.nxv8i16"
+        )]
+        fn _svsubltb_s16(op1: svint8_t, op2: svint8_t) -> svint16_t;
+    }
+    unsafe { _svsubltb_s16(op1, op2) }
+}
+#[doc = "Subtract long (top - bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubltb[_n_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ssubltb))]
+pub fn svsubltb_n_s16(op1: svint8_t, op2: i8) -> svint16_t {
+    svsubltb_s16(op1, svdup_n_s8(op2))
+}
+#[doc = "Subtract long (top - bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubltb[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ssubltb))]
+pub fn svsubltb_s32(op1: svint16_t, op2: svint16_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ssubltb.nxv4i32"
+        )]
+        fn _svsubltb_s32(op1: svint16_t, op2: svint16_t) -> svint32_t;
+    }
+    unsafe { _svsubltb_s32(op1, op2) }
+}
+#[doc = "Subtract long (top - bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubltb[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ssubltb))]
+pub fn svsubltb_n_s32(op1: svint16_t, op2: i16) -> svint32_t {
+    svsubltb_s32(op1, svdup_n_s16(op2))
+}
+#[doc = "Subtract long (top - bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubltb[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ssubltb))]
+pub fn svsubltb_s64(op1: svint32_t, op2: svint32_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.ssubltb.nxv2i64"
+        )]
+        fn _svsubltb_s64(op1: svint32_t, op2: svint32_t) -> svint64_t;
+    }
+    unsafe { _svsubltb_s64(op1, op2) }
+}
+#[doc = "Subtract long (top - bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubltb[_n_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ssubltb))]
+pub fn svsubltb_n_s64(op1: svint32_t, op2: i32) -> svint64_t {
+    svsubltb_s64(op1, svdup_n_s32(op2))
+}
+#[doc = "Subtract wide (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubwb[_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ssubwb))]
+pub fn svsubwb_s16(op1: svint16_t, op2: svint8_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ssubwb.nxv8i16")]
+        fn _svsubwb_s16(op1: svint16_t, op2: svint8_t) -> svint16_t;
+    }
+    unsafe { _svsubwb_s16(op1, op2) }
+}
+#[doc = "Subtract wide (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubwb[_n_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ssubwb))]
+pub fn svsubwb_n_s16(op1: svint16_t, op2: i8) -> svint16_t {
+    svsubwb_s16(op1, svdup_n_s8(op2))
+}
+#[doc = "Subtract wide (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubwb[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ssubwb))]
+pub fn svsubwb_s32(op1: svint32_t, op2: svint16_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ssubwb.nxv4i32")]
+        fn _svsubwb_s32(op1: svint32_t, op2: svint16_t) -> svint32_t;
+    }
+    unsafe { _svsubwb_s32(op1, op2) }
+}
+#[doc = "Subtract wide (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubwb[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ssubwb))]
+pub fn svsubwb_n_s32(op1: svint32_t, op2: i16) -> svint32_t {
+    svsubwb_s32(op1, svdup_n_s16(op2))
+}
+#[doc = "Subtract wide (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubwb[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ssubwb))]
+pub fn svsubwb_s64(op1: svint64_t, op2: svint32_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ssubwb.nxv2i64")]
+        fn _svsubwb_s64(op1: svint64_t, op2: svint32_t) -> svint64_t;
+    }
+    unsafe { _svsubwb_s64(op1, op2) }
+}
+#[doc = "Subtract wide (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubwb[_n_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ssubwb))]
+pub fn svsubwb_n_s64(op1: svint64_t, op2: i32) -> svint64_t {
+    svsubwb_s64(op1, svdup_n_s32(op2))
+}
+#[doc = "Subtract wide (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubwb[_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(usubwb))]
+pub fn svsubwb_u16(op1: svuint16_t, op2: svuint8_t) -> svuint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.usubwb.nxv8i16")]
+        fn _svsubwb_u16(op1: svint16_t, op2: svint8_t) -> svint16_t;
+    }
+    unsafe { _svsubwb_u16(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Subtract wide (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubwb[_n_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(usubwb))]
+pub fn svsubwb_n_u16(op1: svuint16_t, op2: u8) -> svuint16_t {
+    svsubwb_u16(op1, svdup_n_u8(op2))
+}
+#[doc = "Subtract wide (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubwb[_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(usubwb))]
+pub fn svsubwb_u32(op1: svuint32_t, op2: svuint16_t) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.usubwb.nxv4i32")]
+        fn _svsubwb_u32(op1: svint32_t, op2: svint16_t) -> svint32_t;
+    }
+    unsafe { _svsubwb_u32(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Subtract wide (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubwb[_n_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(usubwb))]
+pub fn svsubwb_n_u32(op1: svuint32_t, op2: u16) -> svuint32_t {
+    svsubwb_u32(op1, svdup_n_u16(op2))
+}
+#[doc = "Subtract wide (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubwb[_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(usubwb))]
+pub fn svsubwb_u64(op1: svuint64_t, op2: svuint32_t) -> svuint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.usubwb.nxv2i64")]
+        fn _svsubwb_u64(op1: svint64_t, op2: svint32_t) -> svint64_t;
+    }
+    unsafe { _svsubwb_u64(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Subtract wide (bottom)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubwb[_n_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(usubwb))]
+pub fn svsubwb_n_u64(op1: svuint64_t, op2: u32) -> svuint64_t {
+    svsubwb_u64(op1, svdup_n_u32(op2))
+}
+#[doc = "Subtract wide (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubwt[_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ssubwt))]
+pub fn svsubwt_s16(op1: svint16_t, op2: svint8_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ssubwt.nxv8i16")]
+        fn _svsubwt_s16(op1: svint16_t, op2: svint8_t) -> svint16_t;
+    }
+    unsafe { _svsubwt_s16(op1, op2) }
+}
+#[doc = "Subtract wide (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubwt[_n_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ssubwt))]
+pub fn svsubwt_n_s16(op1: svint16_t, op2: i8) -> svint16_t {
+    svsubwt_s16(op1, svdup_n_s8(op2))
+}
+#[doc = "Subtract wide (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubwt[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ssubwt))]
+pub fn svsubwt_s32(op1: svint32_t, op2: svint16_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ssubwt.nxv4i32")]
+        fn _svsubwt_s32(op1: svint32_t, op2: svint16_t) -> svint32_t;
+    }
+    unsafe { _svsubwt_s32(op1, op2) }
+}
+#[doc = "Subtract wide (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubwt[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ssubwt))]
+pub fn svsubwt_n_s32(op1: svint32_t, op2: i16) -> svint32_t {
+    svsubwt_s32(op1, svdup_n_s16(op2))
+}
+#[doc = "Subtract wide (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubwt[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ssubwt))]
+pub fn svsubwt_s64(op1: svint64_t, op2: svint32_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.ssubwt.nxv2i64")]
+        fn _svsubwt_s64(op1: svint64_t, op2: svint32_t) -> svint64_t;
+    }
+    unsafe { _svsubwt_s64(op1, op2) }
+}
+#[doc = "Subtract wide (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubwt[_n_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(ssubwt))]
+pub fn svsubwt_n_s64(op1: svint64_t, op2: i32) -> svint64_t {
+    svsubwt_s64(op1, svdup_n_s32(op2))
+}
+#[doc = "Subtract wide (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubwt[_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(usubwt))]
+pub fn svsubwt_u16(op1: svuint16_t, op2: svuint8_t) -> svuint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.usubwt.nxv8i16")]
+        fn _svsubwt_u16(op1: svint16_t, op2: svint8_t) -> svint16_t;
+    }
+    unsafe { _svsubwt_u16(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Subtract wide (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubwt[_n_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(usubwt))]
+pub fn svsubwt_n_u16(op1: svuint16_t, op2: u8) -> svuint16_t {
+    svsubwt_u16(op1, svdup_n_u8(op2))
+}
+#[doc = "Subtract wide (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubwt[_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(usubwt))]
+pub fn svsubwt_u32(op1: svuint32_t, op2: svuint16_t) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.usubwt.nxv4i32")]
+        fn _svsubwt_u32(op1: svint32_t, op2: svint16_t) -> svint32_t;
+    }
+    unsafe { _svsubwt_u32(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Subtract wide (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubwt[_n_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(usubwt))]
+pub fn svsubwt_n_u32(op1: svuint32_t, op2: u16) -> svuint32_t {
+    svsubwt_u32(op1, svdup_n_u16(op2))
+}
+#[doc = "Subtract wide (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubwt[_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(usubwt))]
+pub fn svsubwt_u64(op1: svuint64_t, op2: svuint32_t) -> svuint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.usubwt.nxv2i64")]
+        fn _svsubwt_u64(op1: svint64_t, op2: svint32_t) -> svint64_t;
+    }
+    unsafe { _svsubwt_u64(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Subtract wide (top)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svsubwt[_n_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(usubwt))]
+pub fn svsubwt_n_u64(op1: svuint64_t, op2: u32) -> svuint64_t {
+    svsubwt_u64(op1, svdup_n_u32(op2))
+}
+#[doc = "Table lookup in two-vector table"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtbl2[_f32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(tbl))]
+pub fn svtbl2_f32(data: svfloat32x2_t, indices: svuint32_t) -> svfloat32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.tbl2.nxv4f32")]
+        fn _svtbl2_f32(data0: svfloat32_t, data1: svfloat32_t, indices: svint32_t) -> svfloat32_t;
+    }
+    unsafe {
+        _svtbl2_f32(
+            svget2_f32::<0>(data),
+            svget2_f32::<1>(data),
+            indices.as_signed(),
+        )
+    }
+}
+#[doc = "Table lookup in two-vector table"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtbl2[_f64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(tbl))]
+pub fn svtbl2_f64(data: svfloat64x2_t, indices: svuint64_t) -> svfloat64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.tbl2.nxv2f64")]
+        fn _svtbl2_f64(data0: svfloat64_t, data1: svfloat64_t, indices: svint64_t) -> svfloat64_t;
+    }
+    unsafe {
+        _svtbl2_f64(
+            svget2_f64::<0>(data),
+            svget2_f64::<1>(data),
+            indices.as_signed(),
+        )
+    }
+}
+#[doc = "Table lookup in two-vector table"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtbl2[_s8])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(tbl))]
+pub fn svtbl2_s8(data: svint8x2_t, indices: svuint8_t) -> svint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.tbl2.nxv16i8")]
+        fn _svtbl2_s8(data0: svint8_t, data1: svint8_t, indices: svint8_t) -> svint8_t;
+    }
+    unsafe {
+        _svtbl2_s8(
+            svget2_s8::<0>(data),
+            svget2_s8::<1>(data),
+            indices.as_signed(),
+        )
+    }
+}
+#[doc = "Table lookup in two-vector table"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtbl2[_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(tbl))]
+pub fn svtbl2_s16(data: svint16x2_t, indices: svuint16_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.tbl2.nxv8i16")]
+        fn _svtbl2_s16(data0: svint16_t, data1: svint16_t, indices: svint16_t) -> svint16_t;
+    }
+    unsafe {
+        _svtbl2_s16(
+            svget2_s16::<0>(data),
+            svget2_s16::<1>(data),
+            indices.as_signed(),
+        )
+    }
+}
+#[doc = "Table lookup in two-vector table"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtbl2[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(tbl))]
+pub fn svtbl2_s32(data: svint32x2_t, indices: svuint32_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.tbl2.nxv4i32")]
+        fn _svtbl2_s32(data0: svint32_t, data1: svint32_t, indices: svint32_t) -> svint32_t;
+    }
+    unsafe {
+        _svtbl2_s32(
+            svget2_s32::<0>(data),
+            svget2_s32::<1>(data),
+            indices.as_signed(),
+        )
+    }
+}
+#[doc = "Table lookup in two-vector table"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtbl2[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(tbl))]
+pub fn svtbl2_s64(data: svint64x2_t, indices: svuint64_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.tbl2.nxv2i64")]
+        fn _svtbl2_s64(data0: svint64_t, data1: svint64_t, indices: svint64_t) -> svint64_t;
+    }
+    unsafe {
+        _svtbl2_s64(
+            svget2_s64::<0>(data),
+            svget2_s64::<1>(data),
+            indices.as_signed(),
+        )
+    }
+}
+#[doc = "Table lookup in two-vector table"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtbl2[_u8])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(tbl))]
+pub fn svtbl2_u8(data: svuint8x2_t, indices: svuint8_t) -> svuint8_t {
+    unsafe { svtbl2_s8(data.as_signed(), indices).as_unsigned() }
+}
+#[doc = "Table lookup in two-vector table"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtbl2[_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(tbl))]
+pub fn svtbl2_u16(data: svuint16x2_t, indices: svuint16_t) -> svuint16_t {
+    unsafe { svtbl2_s16(data.as_signed(), indices).as_unsigned() }
+}
+#[doc = "Table lookup in two-vector table"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtbl2[_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(tbl))]
+pub fn svtbl2_u32(data: svuint32x2_t, indices: svuint32_t) -> svuint32_t {
+    unsafe { svtbl2_s32(data.as_signed(), indices).as_unsigned() }
+}
+#[doc = "Table lookup in two-vector table"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtbl2[_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(tbl))]
+pub fn svtbl2_u64(data: svuint64x2_t, indices: svuint64_t) -> svuint64_t {
+    unsafe { svtbl2_s64(data.as_signed(), indices).as_unsigned() }
+}
+#[doc = "Table lookup in single-vector table (merging)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtbx[_f32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(tbx))]
+pub fn svtbx_f32(fallback: svfloat32_t, data: svfloat32_t, indices: svuint32_t) -> svfloat32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.tbx.nxv4f32")]
+        fn _svtbx_f32(fallback: svfloat32_t, data: svfloat32_t, indices: svint32_t) -> svfloat32_t;
+    }
+    unsafe { _svtbx_f32(fallback, data, indices.as_signed()) }
+}
+#[doc = "Table lookup in single-vector table (merging)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtbx[_f64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(tbx))]
+pub fn svtbx_f64(fallback: svfloat64_t, data: svfloat64_t, indices: svuint64_t) -> svfloat64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.tbx.nxv2f64")]
+        fn _svtbx_f64(fallback: svfloat64_t, data: svfloat64_t, indices: svint64_t) -> svfloat64_t;
+    }
+    unsafe { _svtbx_f64(fallback, data, indices.as_signed()) }
+}
+#[doc = "Table lookup in single-vector table (merging)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtbx[_s8])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(tbx))]
+pub fn svtbx_s8(fallback: svint8_t, data: svint8_t, indices: svuint8_t) -> svint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.tbx.nxv16i8")]
+        fn _svtbx_s8(fallback: svint8_t, data: svint8_t, indices: svint8_t) -> svint8_t;
+    }
+    unsafe { _svtbx_s8(fallback, data, indices.as_signed()) }
+}
+#[doc = "Table lookup in single-vector table (merging)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtbx[_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(tbx))]
+pub fn svtbx_s16(fallback: svint16_t, data: svint16_t, indices: svuint16_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.tbx.nxv8i16")]
+        fn _svtbx_s16(fallback: svint16_t, data: svint16_t, indices: svint16_t) -> svint16_t;
+    }
+    unsafe { _svtbx_s16(fallback, data, indices.as_signed()) }
+}
+#[doc = "Table lookup in single-vector table (merging)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtbx[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(tbx))]
+pub fn svtbx_s32(fallback: svint32_t, data: svint32_t, indices: svuint32_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.tbx.nxv4i32")]
+        fn _svtbx_s32(fallback: svint32_t, data: svint32_t, indices: svint32_t) -> svint32_t;
+    }
+    unsafe { _svtbx_s32(fallback, data, indices.as_signed()) }
+}
+#[doc = "Table lookup in single-vector table (merging)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtbx[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(tbx))]
+pub fn svtbx_s64(fallback: svint64_t, data: svint64_t, indices: svuint64_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.tbx.nxv2i64")]
+        fn _svtbx_s64(fallback: svint64_t, data: svint64_t, indices: svint64_t) -> svint64_t;
+    }
+    unsafe { _svtbx_s64(fallback, data, indices.as_signed()) }
+}
+#[doc = "Table lookup in single-vector table (merging)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtbx[_u8])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(tbx))]
+pub fn svtbx_u8(fallback: svuint8_t, data: svuint8_t, indices: svuint8_t) -> svuint8_t {
+    unsafe { svtbx_s8(fallback.as_signed(), data.as_signed(), indices).as_unsigned() }
+}
+#[doc = "Table lookup in single-vector table (merging)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtbx[_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(tbx))]
+pub fn svtbx_u16(fallback: svuint16_t, data: svuint16_t, indices: svuint16_t) -> svuint16_t {
+    unsafe { svtbx_s16(fallback.as_signed(), data.as_signed(), indices).as_unsigned() }
+}
+#[doc = "Table lookup in single-vector table (merging)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtbx[_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(tbx))]
+pub fn svtbx_u32(fallback: svuint32_t, data: svuint32_t, indices: svuint32_t) -> svuint32_t {
+    unsafe { svtbx_s32(fallback.as_signed(), data.as_signed(), indices).as_unsigned() }
+}
+#[doc = "Table lookup in single-vector table (merging)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svtbx[_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(tbx))]
+pub fn svtbx_u64(fallback: svuint64_t, data: svuint64_t, indices: svuint64_t) -> svuint64_t {
+    unsafe { svtbx_s64(fallback.as_signed(), data.as_signed(), indices).as_unsigned() }
+}
+#[doc = "Unpack and extend high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svunpkhi[_b])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(punpkhi))]
+pub fn svunpkhi_b(op: svbool_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.punpkhi.nxv16i1"
+        )]
+        fn _svunpkhi_b(op: svbool_t) -> svbool8_t;
+    }
+    unsafe { _svunpkhi_b(op).sve_into() }
+}
+#[doc = "Unpack and extend high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svunpkhi[_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sunpkhi))]
+pub fn svunpkhi_s16(op: svint8_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sunpkhi.nxv8i16"
+        )]
+        fn _svunpkhi_s16(op: svint8_t) -> svint16_t;
+    }
+    unsafe { _svunpkhi_s16(op) }
+}
+#[doc = "Unpack and extend high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svunpkhi[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sunpkhi))]
+pub fn svunpkhi_s32(op: svint16_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sunpkhi.nxv4i32"
+        )]
+        fn _svunpkhi_s32(op: svint16_t) -> svint32_t;
+    }
+    unsafe { _svunpkhi_s32(op) }
+}
+#[doc = "Unpack and extend high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svunpkhi[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sunpkhi))]
+pub fn svunpkhi_s64(op: svint32_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sunpkhi.nxv2i64"
+        )]
+        fn _svunpkhi_s64(op: svint32_t) -> svint64_t;
+    }
+    unsafe { _svunpkhi_s64(op) }
+}
+#[doc = "Unpack and extend high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svunpkhi[_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uunpkhi))]
+pub fn svunpkhi_u16(op: svuint8_t) -> svuint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.uunpkhi.nxv8i16"
+        )]
+        fn _svunpkhi_u16(op: svint8_t) -> svint16_t;
+    }
+    unsafe { _svunpkhi_u16(op.as_signed()).as_unsigned() }
+}
+#[doc = "Unpack and extend high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svunpkhi[_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uunpkhi))]
+pub fn svunpkhi_u32(op: svuint16_t) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.uunpkhi.nxv4i32"
+        )]
+        fn _svunpkhi_u32(op: svint16_t) -> svint32_t;
+    }
+    unsafe { _svunpkhi_u32(op.as_signed()).as_unsigned() }
+}
+#[doc = "Unpack and extend high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svunpkhi[_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uunpkhi))]
+pub fn svunpkhi_u64(op: svuint32_t) -> svuint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.uunpkhi.nxv2i64"
+        )]
+        fn _svunpkhi_u64(op: svint32_t) -> svint64_t;
+    }
+    unsafe { _svunpkhi_u64(op.as_signed()).as_unsigned() }
+}
+#[doc = "Unpack and extend low half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svunpklo[_b])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(punpklo))]
+pub fn svunpklo_b(op: svbool_t) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.punpklo.nxv16i1"
+        )]
+        fn _svunpklo_b(op: svbool_t) -> svbool8_t;
+    }
+    unsafe { _svunpklo_b(op).sve_into() }
+}
+#[doc = "Unpack and extend low half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svunpklo[_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sunpklo))]
+pub fn svunpklo_s16(op: svint8_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sunpklo.nxv8i16"
+        )]
+        fn _svunpklo_s16(op: svint8_t) -> svint16_t;
+    }
+    unsafe { _svunpklo_s16(op) }
+}
+#[doc = "Unpack and extend low half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svunpklo[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sunpklo))]
+pub fn svunpklo_s32(op: svint16_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sunpklo.nxv4i32"
+        )]
+        fn _svunpklo_s32(op: svint16_t) -> svint32_t;
+    }
+    unsafe { _svunpklo_s32(op) }
+}
+#[doc = "Unpack and extend low half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svunpklo[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(sunpklo))]
+pub fn svunpklo_s64(op: svint32_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.sunpklo.nxv2i64"
+        )]
+        fn _svunpklo_s64(op: svint32_t) -> svint64_t;
+    }
+    unsafe { _svunpklo_s64(op) }
+}
+#[doc = "Unpack and extend low half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svunpklo[_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uunpklo))]
+pub fn svunpklo_u16(op: svuint8_t) -> svuint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.uunpklo.nxv8i16"
+        )]
+        fn _svunpklo_u16(op: svint8_t) -> svint16_t;
+    }
+    unsafe { _svunpklo_u16(op.as_signed()).as_unsigned() }
+}
+#[doc = "Unpack and extend low half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svunpklo[_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uunpklo))]
+pub fn svunpklo_u32(op: svuint16_t) -> svuint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.uunpklo.nxv4i32"
+        )]
+        fn _svunpklo_u32(op: svint16_t) -> svint32_t;
+    }
+    unsafe { _svunpklo_u32(op.as_signed()).as_unsigned() }
+}
+#[doc = "Unpack and extend low half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svunpklo[_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(uunpklo))]
+pub fn svunpklo_u64(op: svuint32_t) -> svuint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.uunpklo.nxv2i64"
+        )]
+        fn _svunpklo_u64(op: svint32_t) -> svint64_t;
+    }
+    unsafe { _svunpklo_u64(op.as_signed()).as_unsigned() }
+}
+#[doc = "Saturating add with unsigned addend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuqadd[_s8]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(suqadd))]
+pub fn svuqadd_s8_m(pg: svbool_t, op1: svint8_t, op2: svuint8_t) -> svint8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.suqadd.nxv16i8")]
+        fn _svuqadd_s8_m(pg: svbool_t, op1: svint8_t, op2: svint8_t) -> svint8_t;
+    }
+    unsafe { _svuqadd_s8_m(pg, op1, op2.as_signed()) }
+}
+#[doc = "Saturating add with unsigned addend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuqadd[_n_s8]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(suqadd))]
+pub fn svuqadd_n_s8_m(pg: svbool_t, op1: svint8_t, op2: u8) -> svint8_t {
+    svuqadd_s8_m(pg, op1, svdup_n_u8(op2))
+}
+#[doc = "Saturating add with unsigned addend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuqadd[_s8]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(suqadd))]
+pub fn svuqadd_s8_x(pg: svbool_t, op1: svint8_t, op2: svuint8_t) -> svint8_t {
+    svuqadd_s8_m(pg, op1, op2)
+}
+#[doc = "Saturating add with unsigned addend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuqadd[_n_s8]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(suqadd))]
+pub fn svuqadd_n_s8_x(pg: svbool_t, op1: svint8_t, op2: u8) -> svint8_t {
+    svuqadd_s8_x(pg, op1, svdup_n_u8(op2))
+}
+#[doc = "Saturating add with unsigned addend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuqadd[_s8]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(suqadd))]
+pub fn svuqadd_s8_z(pg: svbool_t, op1: svint8_t, op2: svuint8_t) -> svint8_t {
+    svuqadd_s8_m(pg, svsel_s8(pg, op1, svdup_n_s8(0)), op2)
+}
+#[doc = "Saturating add with unsigned addend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuqadd[_n_s8]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(suqadd))]
+pub fn svuqadd_n_s8_z(pg: svbool_t, op1: svint8_t, op2: u8) -> svint8_t {
+    svuqadd_s8_z(pg, op1, svdup_n_u8(op2))
+}
+#[doc = "Saturating add with unsigned addend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuqadd[_s16]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(suqadd))]
+pub fn svuqadd_s16_m(pg: svbool_t, op1: svint16_t, op2: svuint16_t) -> svint16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.suqadd.nxv8i16")]
+        fn _svuqadd_s16_m(pg: svbool8_t, op1: svint16_t, op2: svint16_t) -> svint16_t;
+    }
+    unsafe { _svuqadd_s16_m(pg.sve_into(), op1, op2.as_signed()) }
+}
+#[doc = "Saturating add with unsigned addend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuqadd[_n_s16]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(suqadd))]
+pub fn svuqadd_n_s16_m(pg: svbool_t, op1: svint16_t, op2: u16) -> svint16_t {
+    svuqadd_s16_m(pg, op1, svdup_n_u16(op2))
+}
+#[doc = "Saturating add with unsigned addend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuqadd[_s16]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(suqadd))]
+pub fn svuqadd_s16_x(pg: svbool_t, op1: svint16_t, op2: svuint16_t) -> svint16_t {
+    svuqadd_s16_m(pg, op1, op2)
+}
+#[doc = "Saturating add with unsigned addend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuqadd[_n_s16]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(suqadd))]
+pub fn svuqadd_n_s16_x(pg: svbool_t, op1: svint16_t, op2: u16) -> svint16_t {
+    svuqadd_s16_x(pg, op1, svdup_n_u16(op2))
+}
+#[doc = "Saturating add with unsigned addend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuqadd[_s16]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(suqadd))]
+pub fn svuqadd_s16_z(pg: svbool_t, op1: svint16_t, op2: svuint16_t) -> svint16_t {
+    svuqadd_s16_m(pg, svsel_s16(pg, op1, svdup_n_s16(0)), op2)
+}
+#[doc = "Saturating add with unsigned addend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuqadd[_n_s16]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(suqadd))]
+pub fn svuqadd_n_s16_z(pg: svbool_t, op1: svint16_t, op2: u16) -> svint16_t {
+    svuqadd_s16_z(pg, op1, svdup_n_u16(op2))
+}
+#[doc = "Saturating add with unsigned addend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuqadd[_s32]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(suqadd))]
+pub fn svuqadd_s32_m(pg: svbool_t, op1: svint32_t, op2: svuint32_t) -> svint32_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.suqadd.nxv4i32")]
+        fn _svuqadd_s32_m(pg: svbool4_t, op1: svint32_t, op2: svint32_t) -> svint32_t;
+    }
+    unsafe { _svuqadd_s32_m(pg.sve_into(), op1, op2.as_signed()) }
+}
+#[doc = "Saturating add with unsigned addend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuqadd[_n_s32]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(suqadd))]
+pub fn svuqadd_n_s32_m(pg: svbool_t, op1: svint32_t, op2: u32) -> svint32_t {
+    svuqadd_s32_m(pg, op1, svdup_n_u32(op2))
+}
+#[doc = "Saturating add with unsigned addend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuqadd[_s32]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(suqadd))]
+pub fn svuqadd_s32_x(pg: svbool_t, op1: svint32_t, op2: svuint32_t) -> svint32_t {
+    svuqadd_s32_m(pg, op1, op2)
+}
+#[doc = "Saturating add with unsigned addend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuqadd[_n_s32]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(suqadd))]
+pub fn svuqadd_n_s32_x(pg: svbool_t, op1: svint32_t, op2: u32) -> svint32_t {
+    svuqadd_s32_x(pg, op1, svdup_n_u32(op2))
+}
+#[doc = "Saturating add with unsigned addend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuqadd[_s32]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(suqadd))]
+pub fn svuqadd_s32_z(pg: svbool_t, op1: svint32_t, op2: svuint32_t) -> svint32_t {
+    svuqadd_s32_m(pg, svsel_s32(pg, op1, svdup_n_s32(0)), op2)
+}
+#[doc = "Saturating add with unsigned addend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuqadd[_n_s32]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(suqadd))]
+pub fn svuqadd_n_s32_z(pg: svbool_t, op1: svint32_t, op2: u32) -> svint32_t {
+    svuqadd_s32_z(pg, op1, svdup_n_u32(op2))
+}
+#[doc = "Saturating add with unsigned addend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuqadd[_s64]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(suqadd))]
+pub fn svuqadd_s64_m(pg: svbool_t, op1: svint64_t, op2: svuint64_t) -> svint64_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.suqadd.nxv2i64")]
+        fn _svuqadd_s64_m(pg: svbool2_t, op1: svint64_t, op2: svint64_t) -> svint64_t;
+    }
+    unsafe { _svuqadd_s64_m(pg.sve_into(), op1, op2.as_signed()) }
+}
+#[doc = "Saturating add with unsigned addend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuqadd[_n_s64]_m)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(suqadd))]
+pub fn svuqadd_n_s64_m(pg: svbool_t, op1: svint64_t, op2: u64) -> svint64_t {
+    svuqadd_s64_m(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Saturating add with unsigned addend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuqadd[_s64]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(suqadd))]
+pub fn svuqadd_s64_x(pg: svbool_t, op1: svint64_t, op2: svuint64_t) -> svint64_t {
+    svuqadd_s64_m(pg, op1, op2)
+}
+#[doc = "Saturating add with unsigned addend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuqadd[_n_s64]_x)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(suqadd))]
+pub fn svuqadd_n_s64_x(pg: svbool_t, op1: svint64_t, op2: u64) -> svint64_t {
+    svuqadd_s64_x(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "Saturating add with unsigned addend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuqadd[_s64]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(suqadd))]
+pub fn svuqadd_s64_z(pg: svbool_t, op1: svint64_t, op2: svuint64_t) -> svint64_t {
+    svuqadd_s64_m(pg, svsel_s64(pg, op1, svdup_n_s64(0)), op2)
+}
+#[doc = "Saturating add with unsigned addend"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svuqadd[_n_s64]_z)"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(suqadd))]
+pub fn svuqadd_n_s64_z(pg: svbool_t, op1: svint64_t, op2: u64) -> svint64_t {
+    svuqadd_s64_z(pg, op1, svdup_n_u64(op2))
+}
+#[doc = "While decrementing scalar is greater than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svwhilege_b8[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(whilege))]
+pub fn svwhilege_b8_s32(op1: i32, op2: i32) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.whilege.nxv16i1.i32"
+        )]
+        fn _svwhilege_b8_s32(op1: i32, op2: i32) -> svbool_t;
+    }
+    unsafe { _svwhilege_b8_s32(op1, op2) }
+}
+#[doc = "While decrementing scalar is greater than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svwhilege_b16[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(whilege))]
+pub fn svwhilege_b16_s32(op1: i32, op2: i32) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.whilege.nxv8i1.i32"
+        )]
+        fn _svwhilege_b16_s32(op1: i32, op2: i32) -> svbool8_t;
+    }
+    unsafe { _svwhilege_b16_s32(op1, op2).sve_into() }
+}
+#[doc = "While decrementing scalar is greater than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svwhilege_b32[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(whilege))]
+pub fn svwhilege_b32_s32(op1: i32, op2: i32) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.whilege.nxv4i1.i32"
+        )]
+        fn _svwhilege_b32_s32(op1: i32, op2: i32) -> svbool4_t;
+    }
+    unsafe { _svwhilege_b32_s32(op1, op2).sve_into() }
+}
+#[doc = "While decrementing scalar is greater than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svwhilege_b64[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(whilege))]
+pub fn svwhilege_b64_s32(op1: i32, op2: i32) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.whilege.nxv2i1.i32"
+        )]
+        fn _svwhilege_b64_s32(op1: i32, op2: i32) -> svbool2_t;
+    }
+    unsafe { _svwhilege_b64_s32(op1, op2).sve_into() }
+}
+#[doc = "While decrementing scalar is greater than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svwhilege_b8[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(whilege))]
+pub fn svwhilege_b8_s64(op1: i64, op2: i64) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.whilege.nxv16i1.i64"
+        )]
+        fn _svwhilege_b8_s64(op1: i64, op2: i64) -> svbool_t;
+    }
+    unsafe { _svwhilege_b8_s64(op1, op2) }
+}
+#[doc = "While decrementing scalar is greater than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svwhilege_b16[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(whilege))]
+pub fn svwhilege_b16_s64(op1: i64, op2: i64) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.whilege.nxv8i1.i64"
+        )]
+        fn _svwhilege_b16_s64(op1: i64, op2: i64) -> svbool8_t;
+    }
+    unsafe { _svwhilege_b16_s64(op1, op2).sve_into() }
+}
+#[doc = "While decrementing scalar is greater than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svwhilege_b32[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(whilege))]
+pub fn svwhilege_b32_s64(op1: i64, op2: i64) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.whilege.nxv4i1.i64"
+        )]
+        fn _svwhilege_b32_s64(op1: i64, op2: i64) -> svbool4_t;
+    }
+    unsafe { _svwhilege_b32_s64(op1, op2).sve_into() }
+}
+#[doc = "While decrementing scalar is greater than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svwhilege_b64[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(whilege))]
+pub fn svwhilege_b64_s64(op1: i64, op2: i64) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.whilege.nxv2i1.i64"
+        )]
+        fn _svwhilege_b64_s64(op1: i64, op2: i64) -> svbool2_t;
+    }
+    unsafe { _svwhilege_b64_s64(op1, op2).sve_into() }
+}
+#[doc = "While decrementing scalar is greater than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svwhilege_b8[_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(whilehs))]
+pub fn svwhilege_b8_u32(op1: u32, op2: u32) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.whilehs.nxv16i1.i32"
+        )]
+        fn _svwhilege_b8_u32(op1: i32, op2: i32) -> svbool_t;
+    }
+    unsafe { _svwhilege_b8_u32(op1.as_signed(), op2.as_signed()) }
+}
+#[doc = "While decrementing scalar is greater than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svwhilege_b16[_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(whilehs))]
+pub fn svwhilege_b16_u32(op1: u32, op2: u32) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.whilehs.nxv8i1.i32"
+        )]
+        fn _svwhilege_b16_u32(op1: i32, op2: i32) -> svbool8_t;
+    }
+    unsafe { _svwhilege_b16_u32(op1.as_signed(), op2.as_signed()).sve_into() }
+}
+#[doc = "While decrementing scalar is greater than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svwhilege_b32[_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(whilehs))]
+pub fn svwhilege_b32_u32(op1: u32, op2: u32) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.whilehs.nxv4i1.i32"
+        )]
+        fn _svwhilege_b32_u32(op1: i32, op2: i32) -> svbool4_t;
+    }
+    unsafe { _svwhilege_b32_u32(op1.as_signed(), op2.as_signed()).sve_into() }
+}
+#[doc = "While decrementing scalar is greater than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svwhilege_b64[_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(whilehs))]
+pub fn svwhilege_b64_u32(op1: u32, op2: u32) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.whilehs.nxv2i1.i32"
+        )]
+        fn _svwhilege_b64_u32(op1: i32, op2: i32) -> svbool2_t;
+    }
+    unsafe { _svwhilege_b64_u32(op1.as_signed(), op2.as_signed()).sve_into() }
+}
+#[doc = "While decrementing scalar is greater than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svwhilege_b8[_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(whilehs))]
+pub fn svwhilege_b8_u64(op1: u64, op2: u64) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.whilehs.nxv16i1.i64"
+        )]
+        fn _svwhilege_b8_u64(op1: i64, op2: i64) -> svbool_t;
+    }
+    unsafe { _svwhilege_b8_u64(op1.as_signed(), op2.as_signed()) }
+}
+#[doc = "While decrementing scalar is greater than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svwhilege_b16[_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(whilehs))]
+pub fn svwhilege_b16_u64(op1: u64, op2: u64) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.whilehs.nxv8i1.i64"
+        )]
+        fn _svwhilege_b16_u64(op1: i64, op2: i64) -> svbool8_t;
+    }
+    unsafe { _svwhilege_b16_u64(op1.as_signed(), op2.as_signed()).sve_into() }
+}
+#[doc = "While decrementing scalar is greater than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svwhilege_b32[_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(whilehs))]
+pub fn svwhilege_b32_u64(op1: u64, op2: u64) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.whilehs.nxv4i1.i64"
+        )]
+        fn _svwhilege_b32_u64(op1: i64, op2: i64) -> svbool4_t;
+    }
+    unsafe { _svwhilege_b32_u64(op1.as_signed(), op2.as_signed()).sve_into() }
+}
+#[doc = "While decrementing scalar is greater than or equal to"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svwhilege_b64[_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(whilehs))]
+pub fn svwhilege_b64_u64(op1: u64, op2: u64) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.whilehs.nxv2i1.i64"
+        )]
+        fn _svwhilege_b64_u64(op1: i64, op2: i64) -> svbool2_t;
+    }
+    unsafe { _svwhilege_b64_u64(op1.as_signed(), op2.as_signed()).sve_into() }
+}
+#[doc = "While decrementing scalar is greater than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svwhilegt_b8[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(whilegt))]
+pub fn svwhilegt_b8_s32(op1: i32, op2: i32) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.whilegt.nxv16i1.i32"
+        )]
+        fn _svwhilegt_b8_s32(op1: i32, op2: i32) -> svbool_t;
+    }
+    unsafe { _svwhilegt_b8_s32(op1, op2) }
+}
+#[doc = "While decrementing scalar is greater than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svwhilegt_b16[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(whilegt))]
+pub fn svwhilegt_b16_s32(op1: i32, op2: i32) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.whilegt.nxv8i1.i32"
+        )]
+        fn _svwhilegt_b16_s32(op1: i32, op2: i32) -> svbool8_t;
+    }
+    unsafe { _svwhilegt_b16_s32(op1, op2).sve_into() }
+}
+#[doc = "While decrementing scalar is greater than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svwhilegt_b32[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(whilegt))]
+pub fn svwhilegt_b32_s32(op1: i32, op2: i32) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.whilegt.nxv4i1.i32"
+        )]
+        fn _svwhilegt_b32_s32(op1: i32, op2: i32) -> svbool4_t;
+    }
+    unsafe { _svwhilegt_b32_s32(op1, op2).sve_into() }
+}
+#[doc = "While decrementing scalar is greater than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svwhilegt_b64[_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(whilegt))]
+pub fn svwhilegt_b64_s32(op1: i32, op2: i32) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.whilegt.nxv2i1.i32"
+        )]
+        fn _svwhilegt_b64_s32(op1: i32, op2: i32) -> svbool2_t;
+    }
+    unsafe { _svwhilegt_b64_s32(op1, op2).sve_into() }
+}
+#[doc = "While decrementing scalar is greater than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svwhilegt_b8[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(whilegt))]
+pub fn svwhilegt_b8_s64(op1: i64, op2: i64) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.whilegt.nxv16i1.i64"
+        )]
+        fn _svwhilegt_b8_s64(op1: i64, op2: i64) -> svbool_t;
+    }
+    unsafe { _svwhilegt_b8_s64(op1, op2) }
+}
+#[doc = "While decrementing scalar is greater than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svwhilegt_b16[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(whilegt))]
+pub fn svwhilegt_b16_s64(op1: i64, op2: i64) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.whilegt.nxv8i1.i64"
+        )]
+        fn _svwhilegt_b16_s64(op1: i64, op2: i64) -> svbool8_t;
+    }
+    unsafe { _svwhilegt_b16_s64(op1, op2).sve_into() }
+}
+#[doc = "While decrementing scalar is greater than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svwhilegt_b32[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(whilegt))]
+pub fn svwhilegt_b32_s64(op1: i64, op2: i64) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.whilegt.nxv4i1.i64"
+        )]
+        fn _svwhilegt_b32_s64(op1: i64, op2: i64) -> svbool4_t;
+    }
+    unsafe { _svwhilegt_b32_s64(op1, op2).sve_into() }
+}
+#[doc = "While decrementing scalar is greater than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svwhilegt_b64[_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(whilegt))]
+pub fn svwhilegt_b64_s64(op1: i64, op2: i64) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.whilegt.nxv2i1.i64"
+        )]
+        fn _svwhilegt_b64_s64(op1: i64, op2: i64) -> svbool2_t;
+    }
+    unsafe { _svwhilegt_b64_s64(op1, op2).sve_into() }
+}
+#[doc = "While decrementing scalar is greater than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svwhilegt_b8[_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(whilehi))]
+pub fn svwhilegt_b8_u32(op1: u32, op2: u32) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.whilehi.nxv16i1.i32"
+        )]
+        fn _svwhilegt_b8_u32(op1: i32, op2: i32) -> svbool_t;
+    }
+    unsafe { _svwhilegt_b8_u32(op1.as_signed(), op2.as_signed()) }
+}
+#[doc = "While decrementing scalar is greater than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svwhilegt_b16[_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(whilehi))]
+pub fn svwhilegt_b16_u32(op1: u32, op2: u32) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.whilehi.nxv8i1.i32"
+        )]
+        fn _svwhilegt_b16_u32(op1: i32, op2: i32) -> svbool8_t;
+    }
+    unsafe { _svwhilegt_b16_u32(op1.as_signed(), op2.as_signed()).sve_into() }
+}
+#[doc = "While decrementing scalar is greater than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svwhilegt_b32[_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(whilehi))]
+pub fn svwhilegt_b32_u32(op1: u32, op2: u32) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.whilehi.nxv4i1.i32"
+        )]
+        fn _svwhilegt_b32_u32(op1: i32, op2: i32) -> svbool4_t;
+    }
+    unsafe { _svwhilegt_b32_u32(op1.as_signed(), op2.as_signed()).sve_into() }
+}
+#[doc = "While decrementing scalar is greater than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svwhilegt_b64[_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(whilehi))]
+pub fn svwhilegt_b64_u32(op1: u32, op2: u32) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.whilehi.nxv2i1.i32"
+        )]
+        fn _svwhilegt_b64_u32(op1: i32, op2: i32) -> svbool2_t;
+    }
+    unsafe { _svwhilegt_b64_u32(op1.as_signed(), op2.as_signed()).sve_into() }
+}
+#[doc = "While decrementing scalar is greater than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svwhilegt_b8[_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(whilehi))]
+pub fn svwhilegt_b8_u64(op1: u64, op2: u64) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.whilehi.nxv16i1.i64"
+        )]
+        fn _svwhilegt_b8_u64(op1: i64, op2: i64) -> svbool_t;
+    }
+    unsafe { _svwhilegt_b8_u64(op1.as_signed(), op2.as_signed()) }
+}
+#[doc = "While decrementing scalar is greater than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svwhilegt_b16[_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(whilehi))]
+pub fn svwhilegt_b16_u64(op1: u64, op2: u64) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.whilehi.nxv8i1.i64"
+        )]
+        fn _svwhilegt_b16_u64(op1: i64, op2: i64) -> svbool8_t;
+    }
+    unsafe { _svwhilegt_b16_u64(op1.as_signed(), op2.as_signed()).sve_into() }
+}
+#[doc = "While decrementing scalar is greater than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svwhilegt_b32[_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(whilehi))]
+pub fn svwhilegt_b32_u64(op1: u64, op2: u64) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.whilehi.nxv4i1.i64"
+        )]
+        fn _svwhilegt_b32_u64(op1: i64, op2: i64) -> svbool4_t;
+    }
+    unsafe { _svwhilegt_b32_u64(op1.as_signed(), op2.as_signed()).sve_into() }
+}
+#[doc = "While decrementing scalar is greater than"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svwhilegt_b64[_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(whilehi))]
+pub fn svwhilegt_b64_u64(op1: u64, op2: u64) -> svbool_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.whilehi.nxv2i1.i64"
+        )]
+        fn _svwhilegt_b64_u64(op1: i64, op2: i64) -> svbool2_t;
+    }
+    unsafe { _svwhilegt_b64_u64(op1.as_signed(), op2.as_signed()).sve_into() }
+}
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+unsafe fn svwhilerw_8ptr<T>(op1: *const T, op2: *const T) -> svbool_t {
+    let op1 = op1 as *const crate::ffi::c_void;
+    let op2 = op2 as *const crate::ffi::c_void;
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.whilerw.b.nxv16i1.p0"
+        )]
+        fn _svwhilerw_8ptr(
+            op1: *const crate::ffi::c_void,
+            op2: *const crate::ffi::c_void,
+        ) -> svbool_t;
+    }
+    _svwhilerw_8ptr(op1, op2)
+}
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+unsafe fn svwhilerw_16ptr<T>(op1: *const T, op2: *const T) -> svbool_t {
+    let op1 = op1 as *const crate::ffi::c_void;
+    let op2 = op2 as *const crate::ffi::c_void;
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.whilerw.h.nxv8i1.p0"
+        )]
+        fn _svwhilerw_16ptr(
+            op1: *const crate::ffi::c_void,
+            op2: *const crate::ffi::c_void,
+        ) -> svbool8_t;
+    }
+    _svwhilerw_16ptr(op1, op2).sve_into()
+}
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+unsafe fn svwhilerw_32ptr<T>(op1: *const T, op2: *const T) -> svbool_t {
+    let op1 = op1 as *const crate::ffi::c_void;
+    let op2 = op2 as *const crate::ffi::c_void;
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.whilerw.s.nxv4i1.p0"
+        )]
+        fn _svwhilerw_32ptr(
+            op1: *const crate::ffi::c_void,
+            op2: *const crate::ffi::c_void,
+        ) -> svbool4_t;
+    }
+    _svwhilerw_32ptr(op1, op2).sve_into()
+}
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+unsafe fn svwhilerw_64ptr<T>(op1: *const T, op2: *const T) -> svbool_t {
+    let op1 = op1 as *const crate::ffi::c_void;
+    let op2 = op2 as *const crate::ffi::c_void;
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.whilerw.d.nxv2i1.p0"
+        )]
+        fn _svwhilerw_64ptr(
+            op1: *const crate::ffi::c_void,
+            op2: *const crate::ffi::c_void,
+        ) -> svbool2_t;
+    }
+    _svwhilerw_64ptr(op1, op2).sve_into()
+}
+#[doc = "While free of read-after-write conflicts"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svwhilerw[_f32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::byte_offset_from`](pointer#method.byte_offset_from) safety constraints must be met for at least the base pointers, `op1` and `op2`."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(whilerw))]
+pub unsafe fn svwhilerw_f32(op1: *const f32, op2: *const f32) -> svbool_t {
+    svwhilerw_32ptr::<f32>(op1, op2)
+}
+#[doc = "While free of read-after-write conflicts"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svwhilerw[_f64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::byte_offset_from`](pointer#method.byte_offset_from) safety constraints must be met for at least the base pointers, `op1` and `op2`."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(whilerw))]
+pub unsafe fn svwhilerw_f64(op1: *const f64, op2: *const f64) -> svbool_t {
+    svwhilerw_64ptr::<f64>(op1, op2)
+}
+#[doc = "While free of read-after-write conflicts"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svwhilerw[_s8])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::byte_offset_from`](pointer#method.byte_offset_from) safety constraints must be met for at least the base pointers, `op1` and `op2`."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(whilerw))]
+pub unsafe fn svwhilerw_s8(op1: *const i8, op2: *const i8) -> svbool_t {
+    svwhilerw_8ptr::<i8>(op1, op2)
+}
+#[doc = "While free of read-after-write conflicts"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svwhilerw[_s16])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::byte_offset_from`](pointer#method.byte_offset_from) safety constraints must be met for at least the base pointers, `op1` and `op2`."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(whilerw))]
+pub unsafe fn svwhilerw_s16(op1: *const i16, op2: *const i16) -> svbool_t {
+    svwhilerw_16ptr::<i16>(op1, op2)
+}
+#[doc = "While free of read-after-write conflicts"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svwhilerw[_s32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::byte_offset_from`](pointer#method.byte_offset_from) safety constraints must be met for at least the base pointers, `op1` and `op2`."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(whilerw))]
+pub unsafe fn svwhilerw_s32(op1: *const i32, op2: *const i32) -> svbool_t {
+    svwhilerw_32ptr::<i32>(op1, op2)
+}
+#[doc = "While free of read-after-write conflicts"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svwhilerw[_s64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::byte_offset_from`](pointer#method.byte_offset_from) safety constraints must be met for at least the base pointers, `op1` and `op2`."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(whilerw))]
+pub unsafe fn svwhilerw_s64(op1: *const i64, op2: *const i64) -> svbool_t {
+    svwhilerw_64ptr::<i64>(op1, op2)
+}
+#[doc = "While free of read-after-write conflicts"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svwhilerw[_u8])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::byte_offset_from`](pointer#method.byte_offset_from) safety constraints must be met for at least the base pointers, `op1` and `op2`."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(whilerw))]
+pub unsafe fn svwhilerw_u8(op1: *const u8, op2: *const u8) -> svbool_t {
+    svwhilerw_8ptr::<u8>(op1, op2)
+}
+#[doc = "While free of read-after-write conflicts"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svwhilerw[_u16])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::byte_offset_from`](pointer#method.byte_offset_from) safety constraints must be met for at least the base pointers, `op1` and `op2`."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(whilerw))]
+pub unsafe fn svwhilerw_u16(op1: *const u16, op2: *const u16) -> svbool_t {
+    svwhilerw_16ptr::<u16>(op1, op2)
+}
+#[doc = "While free of read-after-write conflicts"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svwhilerw[_u32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::byte_offset_from`](pointer#method.byte_offset_from) safety constraints must be met for at least the base pointers, `op1` and `op2`."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(whilerw))]
+pub unsafe fn svwhilerw_u32(op1: *const u32, op2: *const u32) -> svbool_t {
+    svwhilerw_32ptr::<u32>(op1, op2)
+}
+#[doc = "While free of read-after-write conflicts"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svwhilerw[_u64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::byte_offset_from`](pointer#method.byte_offset_from) safety constraints must be met for at least the base pointers, `op1` and `op2`."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(whilerw))]
+pub unsafe fn svwhilerw_u64(op1: *const u64, op2: *const u64) -> svbool_t {
+    svwhilerw_64ptr::<u64>(op1, op2)
+}
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+unsafe fn svwhilewr_8ptr<T>(op1: *const T, op2: *const T) -> svbool_t {
+    let op1 = op1 as *const crate::ffi::c_void;
+    let op2 = op2 as *const crate::ffi::c_void;
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.whilewr.b.nxv16i1.p0"
+        )]
+        fn _svwhilewr_8ptr(
+            op1: *const crate::ffi::c_void,
+            op2: *const crate::ffi::c_void,
+        ) -> svbool_t;
+    }
+    _svwhilewr_8ptr(op1, op2)
+}
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+unsafe fn svwhilewr_16ptr<T>(op1: *const T, op2: *const T) -> svbool_t {
+    let op1 = op1 as *const crate::ffi::c_void;
+    let op2 = op2 as *const crate::ffi::c_void;
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.whilewr.h.nxv8i1.p0"
+        )]
+        fn _svwhilewr_16ptr(
+            op1: *const crate::ffi::c_void,
+            op2: *const crate::ffi::c_void,
+        ) -> svbool8_t;
+    }
+    _svwhilewr_16ptr(op1, op2).sve_into()
+}
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+unsafe fn svwhilewr_32ptr<T>(op1: *const T, op2: *const T) -> svbool_t {
+    let op1 = op1 as *const crate::ffi::c_void;
+    let op2 = op2 as *const crate::ffi::c_void;
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.whilewr.s.nxv4i1.p0"
+        )]
+        fn _svwhilewr_32ptr(
+            op1: *const crate::ffi::c_void,
+            op2: *const crate::ffi::c_void,
+        ) -> svbool4_t;
+    }
+    _svwhilewr_32ptr(op1, op2).sve_into()
+}
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+unsafe fn svwhilewr_64ptr<T>(op1: *const T, op2: *const T) -> svbool_t {
+    let op1 = op1 as *const crate::ffi::c_void;
+    let op2 = op2 as *const crate::ffi::c_void;
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            target_arch = "aarch64",
+            link_name = "llvm.aarch64.sve.whilewr.d.nxv2i1.p0"
+        )]
+        fn _svwhilewr_64ptr(
+            op1: *const crate::ffi::c_void,
+            op2: *const crate::ffi::c_void,
+        ) -> svbool2_t;
+    }
+    _svwhilewr_64ptr(op1, op2).sve_into()
+}
+#[doc = "While free of write-after-read conflicts"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svwhilewr[_f32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::byte_offset_from`](pointer#method.byte_offset_from) safety constraints must be met for at least the base pointers, `op1` and `op2`."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(whilewr))]
+pub unsafe fn svwhilewr_f32(op1: *const f32, op2: *const f32) -> svbool_t {
+    svwhilewr_32ptr::<f32>(op1, op2)
+}
+#[doc = "While free of write-after-read conflicts"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svwhilewr[_f64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::byte_offset_from`](pointer#method.byte_offset_from) safety constraints must be met for at least the base pointers, `op1` and `op2`."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(whilewr))]
+pub unsafe fn svwhilewr_f64(op1: *const f64, op2: *const f64) -> svbool_t {
+    svwhilewr_64ptr::<f64>(op1, op2)
+}
+#[doc = "While free of write-after-read conflicts"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svwhilewr[_s8])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::byte_offset_from`](pointer#method.byte_offset_from) safety constraints must be met for at least the base pointers, `op1` and `op2`."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(whilewr))]
+pub unsafe fn svwhilewr_s8(op1: *const i8, op2: *const i8) -> svbool_t {
+    svwhilewr_8ptr::<i8>(op1, op2)
+}
+#[doc = "While free of write-after-read conflicts"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svwhilewr[_s16])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::byte_offset_from`](pointer#method.byte_offset_from) safety constraints must be met for at least the base pointers, `op1` and `op2`."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(whilewr))]
+pub unsafe fn svwhilewr_s16(op1: *const i16, op2: *const i16) -> svbool_t {
+    svwhilewr_16ptr::<i16>(op1, op2)
+}
+#[doc = "While free of write-after-read conflicts"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svwhilewr[_s32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::byte_offset_from`](pointer#method.byte_offset_from) safety constraints must be met for at least the base pointers, `op1` and `op2`."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(whilewr))]
+pub unsafe fn svwhilewr_s32(op1: *const i32, op2: *const i32) -> svbool_t {
+    svwhilewr_32ptr::<i32>(op1, op2)
+}
+#[doc = "While free of write-after-read conflicts"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svwhilewr[_s64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::byte_offset_from`](pointer#method.byte_offset_from) safety constraints must be met for at least the base pointers, `op1` and `op2`."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(whilewr))]
+pub unsafe fn svwhilewr_s64(op1: *const i64, op2: *const i64) -> svbool_t {
+    svwhilewr_64ptr::<i64>(op1, op2)
+}
+#[doc = "While free of write-after-read conflicts"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svwhilewr[_u8])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::byte_offset_from`](pointer#method.byte_offset_from) safety constraints must be met for at least the base pointers, `op1` and `op2`."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(whilewr))]
+pub unsafe fn svwhilewr_u8(op1: *const u8, op2: *const u8) -> svbool_t {
+    svwhilewr_8ptr::<u8>(op1, op2)
+}
+#[doc = "While free of write-after-read conflicts"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svwhilewr[_u16])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::byte_offset_from`](pointer#method.byte_offset_from) safety constraints must be met for at least the base pointers, `op1` and `op2`."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(whilewr))]
+pub unsafe fn svwhilewr_u16(op1: *const u16, op2: *const u16) -> svbool_t {
+    svwhilewr_16ptr::<u16>(op1, op2)
+}
+#[doc = "While free of write-after-read conflicts"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svwhilewr[_u32])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::byte_offset_from`](pointer#method.byte_offset_from) safety constraints must be met for at least the base pointers, `op1` and `op2`."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(whilewr))]
+pub unsafe fn svwhilewr_u32(op1: *const u32, op2: *const u32) -> svbool_t {
+    svwhilewr_32ptr::<u32>(op1, op2)
+}
+#[doc = "While free of write-after-read conflicts"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svwhilewr[_u64])"]
+#[doc = "## Safety"]
+#[doc = "  * [`pointer::byte_offset_from`](pointer#method.byte_offset_from) safety constraints must be met for at least the base pointers, `op1` and `op2`."]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(whilewr))]
+pub unsafe fn svwhilewr_u64(op1: *const u64, op2: *const u64) -> svbool_t {
+    svwhilewr_64ptr::<u64>(op1, op2)
+}
+#[doc = "Bitwise exclusive OR and rotate right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svxar[_n_s8])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(xar, IMM3 = 1))]
+pub fn svxar_n_s8<const IMM3: i32>(op1: svint8_t, op2: svint8_t) -> svint8_t {
+    static_assert_range!(IMM3, 1..=8);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.xar.nxv16i8")]
+        fn _svxar_n_s8(op1: svint8_t, op2: svint8_t, imm3: i32) -> svint8_t;
+    }
+    unsafe { _svxar_n_s8(op1, op2, IMM3) }
+}
+#[doc = "Bitwise exclusive OR and rotate right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svxar[_n_s16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(xar, IMM3 = 1))]
+pub fn svxar_n_s16<const IMM3: i32>(op1: svint16_t, op2: svint16_t) -> svint16_t {
+    static_assert_range!(IMM3, 1..=16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.xar.nxv8i16")]
+        fn _svxar_n_s16(op1: svint16_t, op2: svint16_t, imm3: i32) -> svint16_t;
+    }
+    unsafe { _svxar_n_s16(op1, op2, IMM3) }
+}
+#[doc = "Bitwise exclusive OR and rotate right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svxar[_n_s32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(xar, IMM3 = 1))]
+pub fn svxar_n_s32<const IMM3: i32>(op1: svint32_t, op2: svint32_t) -> svint32_t {
+    static_assert_range!(IMM3, 1..=32);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.xar.nxv4i32")]
+        fn _svxar_n_s32(op1: svint32_t, op2: svint32_t, imm3: i32) -> svint32_t;
+    }
+    unsafe { _svxar_n_s32(op1, op2, IMM3) }
+}
+#[doc = "Bitwise exclusive OR and rotate right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svxar[_n_s64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(xar, IMM3 = 1))]
+pub fn svxar_n_s64<const IMM3: i32>(op1: svint64_t, op2: svint64_t) -> svint64_t {
+    static_assert_range!(IMM3, 1..=64);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.xar.nxv2i64")]
+        fn _svxar_n_s64(op1: svint64_t, op2: svint64_t, imm3: i32) -> svint64_t;
+    }
+    unsafe { _svxar_n_s64(op1, op2, IMM3) }
+}
+#[doc = "Bitwise exclusive OR and rotate right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svxar[_n_u8])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(xar, IMM3 = 1))]
+pub fn svxar_n_u8<const IMM3: i32>(op1: svuint8_t, op2: svuint8_t) -> svuint8_t {
+    static_assert_range!(IMM3, 1..=8);
+    unsafe { svxar_n_s8::<IMM3>(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Bitwise exclusive OR and rotate right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svxar[_n_u16])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(xar, IMM3 = 1))]
+pub fn svxar_n_u16<const IMM3: i32>(op1: svuint16_t, op2: svuint16_t) -> svuint16_t {
+    static_assert_range!(IMM3, 1..=16);
+    unsafe { svxar_n_s16::<IMM3>(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Bitwise exclusive OR and rotate right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svxar[_n_u32])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(xar, IMM3 = 1))]
+pub fn svxar_n_u32<const IMM3: i32>(op1: svuint32_t, op2: svuint32_t) -> svuint32_t {
+    static_assert_range!(IMM3, 1..=32);
+    unsafe { svxar_n_s32::<IMM3>(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
+#[doc = "Bitwise exclusive OR and rotate right"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/svxar[_n_u64])"]
+#[inline]
+#[target_feature(enable = "sve,sve2")]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+#[cfg_attr(test, assert_instr(xar, IMM3 = 1))]
+pub fn svxar_n_u64<const IMM3: i32>(op1: svuint64_t, op2: svuint64_t) -> svuint64_t {
+    static_assert_range!(IMM3, 1..=64);
+    unsafe { svxar_n_s64::<IMM3>(op1.as_signed(), op2.as_signed()).as_unsigned() }
+}
diff --git a/crates/core_arch/src/aarch64/sve2/ld_st_tests_aarch64.rs b/crates/core_arch/src/aarch64/sve2/ld_st_tests_aarch64.rs
new file mode 100644
index 0000000000..2ec3ad6a5d
--- /dev/null
+++ b/crates/core_arch/src/aarch64/sve2/ld_st_tests_aarch64.rs
@@ -0,0 +1,2482 @@
+// This code is automatically generated. DO NOT MODIFY.
+//
+// Instead, modify `crates/stdarch-gen-arm/spec/sve` and run the following command to re-generate
+// this file:
+//
+// ```
+// cargo run --bin=stdarch-gen-arm -- crates/stdarch-gen-arm/spec
+// ```
+#![allow(unused)]
+use super::*;
+use std::boxed::Box;
+use std::convert::{TryFrom, TryInto};
+use std::sync::LazyLock;
+use std::vec::Vec;
+use stdarch_test::simd_test;
+static F32_DATA: LazyLock<[f32; 64 * 5]> = LazyLock::new(|| {
+    (0..64 * 5)
+        .map(|i| i as f32)
+        .collect::<Vec<_>>()
+        .try_into()
+        .expect("f32 data incorrectly initialised")
+});
+static F64_DATA: LazyLock<[f64; 32 * 5]> = LazyLock::new(|| {
+    (0..32 * 5)
+        .map(|i| i as f64)
+        .collect::<Vec<_>>()
+        .try_into()
+        .expect("f64 data incorrectly initialised")
+});
+static I8_DATA: LazyLock<[i8; 256 * 5]> = LazyLock::new(|| {
+    (0..256 * 5)
+        .map(|i| ((i + 128) % 256 - 128) as i8)
+        .collect::<Vec<_>>()
+        .try_into()
+        .expect("i8 data incorrectly initialised")
+});
+static I16_DATA: LazyLock<[i16; 128 * 5]> = LazyLock::new(|| {
+    (0..128 * 5)
+        .map(|i| i as i16)
+        .collect::<Vec<_>>()
+        .try_into()
+        .expect("i16 data incorrectly initialised")
+});
+static I32_DATA: LazyLock<[i32; 64 * 5]> = LazyLock::new(|| {
+    (0..64 * 5)
+        .map(|i| i as i32)
+        .collect::<Vec<_>>()
+        .try_into()
+        .expect("i32 data incorrectly initialised")
+});
+static I64_DATA: LazyLock<[i64; 32 * 5]> = LazyLock::new(|| {
+    (0..32 * 5)
+        .map(|i| i as i64)
+        .collect::<Vec<_>>()
+        .try_into()
+        .expect("i64 data incorrectly initialised")
+});
+static U8_DATA: LazyLock<[u8; 256 * 5]> = LazyLock::new(|| {
+    (0..256 * 5)
+        .map(|i| i as u8)
+        .collect::<Vec<_>>()
+        .try_into()
+        .expect("u8 data incorrectly initialised")
+});
+static U16_DATA: LazyLock<[u16; 128 * 5]> = LazyLock::new(|| {
+    (0..128 * 5)
+        .map(|i| i as u16)
+        .collect::<Vec<_>>()
+        .try_into()
+        .expect("u16 data incorrectly initialised")
+});
+static U32_DATA: LazyLock<[u32; 64 * 5]> = LazyLock::new(|| {
+    (0..64 * 5)
+        .map(|i| i as u32)
+        .collect::<Vec<_>>()
+        .try_into()
+        .expect("u32 data incorrectly initialised")
+});
+static U64_DATA: LazyLock<[u64; 32 * 5]> = LazyLock::new(|| {
+    (0..32 * 5)
+        .map(|i| i as u64)
+        .collect::<Vec<_>>()
+        .try_into()
+        .expect("u64 data incorrectly initialised")
+});
+#[target_feature(enable = "sve")]
+fn assert_vector_matches_f32(vector: svfloat32_t, expected: svfloat32_t) {
+    let defined = svrdffr();
+    assert!(svptest_first(svptrue_b32(), defined));
+    let cmp = svcmpne_f32(defined, vector, expected);
+    assert!(!svptest_any(defined, cmp))
+}
+#[target_feature(enable = "sve")]
+fn assert_vector_matches_f64(vector: svfloat64_t, expected: svfloat64_t) {
+    let defined = svrdffr();
+    assert!(svptest_first(svptrue_b64(), defined));
+    let cmp = svcmpne_f64(defined, vector, expected);
+    assert!(!svptest_any(defined, cmp))
+}
+#[target_feature(enable = "sve")]
+fn assert_vector_matches_i8(vector: svint8_t, expected: svint8_t) {
+    let defined = svrdffr();
+    assert!(svptest_first(svptrue_b8(), defined));
+    let cmp = svcmpne_s8(defined, vector, expected);
+    assert!(!svptest_any(defined, cmp))
+}
+#[target_feature(enable = "sve")]
+fn assert_vector_matches_i16(vector: svint16_t, expected: svint16_t) {
+    let defined = svrdffr();
+    assert!(svptest_first(svptrue_b16(), defined));
+    let cmp = svcmpne_s16(defined, vector, expected);
+    assert!(!svptest_any(defined, cmp))
+}
+#[target_feature(enable = "sve")]
+fn assert_vector_matches_i32(vector: svint32_t, expected: svint32_t) {
+    let defined = svrdffr();
+    assert!(svptest_first(svptrue_b32(), defined));
+    let cmp = svcmpne_s32(defined, vector, expected);
+    assert!(!svptest_any(defined, cmp))
+}
+#[target_feature(enable = "sve")]
+fn assert_vector_matches_i64(vector: svint64_t, expected: svint64_t) {
+    let defined = svrdffr();
+    assert!(svptest_first(svptrue_b64(), defined));
+    let cmp = svcmpne_s64(defined, vector, expected);
+    assert!(!svptest_any(defined, cmp))
+}
+#[target_feature(enable = "sve")]
+fn assert_vector_matches_u8(vector: svuint8_t, expected: svuint8_t) {
+    let defined = svrdffr();
+    assert!(svptest_first(svptrue_b8(), defined));
+    let cmp = svcmpne_u8(defined, vector, expected);
+    assert!(!svptest_any(defined, cmp))
+}
+#[target_feature(enable = "sve")]
+fn assert_vector_matches_u16(vector: svuint16_t, expected: svuint16_t) {
+    let defined = svrdffr();
+    assert!(svptest_first(svptrue_b16(), defined));
+    let cmp = svcmpne_u16(defined, vector, expected);
+    assert!(!svptest_any(defined, cmp))
+}
+#[target_feature(enable = "sve")]
+fn assert_vector_matches_u32(vector: svuint32_t, expected: svuint32_t) {
+    let defined = svrdffr();
+    assert!(svptest_first(svptrue_b32(), defined));
+    let cmp = svcmpne_u32(defined, vector, expected);
+    assert!(!svptest_any(defined, cmp))
+}
+#[target_feature(enable = "sve")]
+fn assert_vector_matches_u64(vector: svuint64_t, expected: svuint64_t) {
+    let defined = svrdffr();
+    assert!(svptest_first(svptrue_b64(), defined));
+    let cmp = svcmpne_u64(defined, vector, expected);
+    assert!(!svptest_any(defined, cmp))
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1_gather_s64index_f64_with_svstnt1_scatter_s64index_f64() {
+    let mut storage = [0 as f64; 160usize];
+    let data = svcvt_f64_s64_x(
+        svptrue_b64(),
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+    let indices = svindex_s64(0, 1);
+    svstnt1_scatter_s64index_f64(svptrue_b64(), storage.as_mut_ptr(), indices, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as f64 || val == i as f64);
+    }
+    svsetffr();
+    let loaded =
+        svldnt1_gather_s64index_f64(svptrue_b64(), storage.as_ptr() as *const f64, indices);
+    assert_vector_matches_f64(
+        loaded,
+        svcvt_f64_s64_x(
+            svptrue_b64(),
+            svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+        ),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1_gather_s64index_s64_with_svstnt1_scatter_s64index_s64() {
+    let mut storage = [0 as i64; 160usize];
+    let data = svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let indices = svindex_s64(0, 1);
+    svstnt1_scatter_s64index_s64(svptrue_b64(), storage.as_mut_ptr(), indices, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i64 || val == i as i64);
+    }
+    svsetffr();
+    let loaded =
+        svldnt1_gather_s64index_s64(svptrue_b64(), storage.as_ptr() as *const i64, indices);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1_gather_s64index_u64_with_svstnt1_scatter_s64index_u64() {
+    let mut storage = [0 as u64; 160usize];
+    let data = svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let indices = svindex_s64(0, 1);
+    svstnt1_scatter_s64index_u64(svptrue_b64(), storage.as_mut_ptr(), indices, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u64 || val == i as u64);
+    }
+    svsetffr();
+    let loaded =
+        svldnt1_gather_s64index_u64(svptrue_b64(), storage.as_ptr() as *const u64, indices);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1_gather_u64index_f64_with_svstnt1_scatter_u64index_f64() {
+    let mut storage = [0 as f64; 160usize];
+    let data = svcvt_f64_s64_x(
+        svptrue_b64(),
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+    let indices = svindex_u64(0, 1);
+    svstnt1_scatter_u64index_f64(svptrue_b64(), storage.as_mut_ptr(), indices, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as f64 || val == i as f64);
+    }
+    svsetffr();
+    let loaded =
+        svldnt1_gather_u64index_f64(svptrue_b64(), storage.as_ptr() as *const f64, indices);
+    assert_vector_matches_f64(
+        loaded,
+        svcvt_f64_s64_x(
+            svptrue_b64(),
+            svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+        ),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1_gather_u64index_s64_with_svstnt1_scatter_u64index_s64() {
+    let mut storage = [0 as i64; 160usize];
+    let data = svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let indices = svindex_u64(0, 1);
+    svstnt1_scatter_u64index_s64(svptrue_b64(), storage.as_mut_ptr(), indices, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i64 || val == i as i64);
+    }
+    svsetffr();
+    let loaded =
+        svldnt1_gather_u64index_s64(svptrue_b64(), storage.as_ptr() as *const i64, indices);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1_gather_u64index_u64_with_svstnt1_scatter_u64index_u64() {
+    let mut storage = [0 as u64; 160usize];
+    let data = svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let indices = svindex_u64(0, 1);
+    svstnt1_scatter_u64index_u64(svptrue_b64(), storage.as_mut_ptr(), indices, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u64 || val == i as u64);
+    }
+    svsetffr();
+    let loaded =
+        svldnt1_gather_u64index_u64(svptrue_b64(), storage.as_ptr() as *const u64, indices);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1_gather_s64offset_f64_with_svstnt1_scatter_s64offset_f64() {
+    let mut storage = [0 as f64; 160usize];
+    let data = svcvt_f64_s64_x(
+        svptrue_b64(),
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+    let offsets = svindex_s64(0, 8u32.try_into().unwrap());
+    svstnt1_scatter_s64offset_f64(svptrue_b64(), storage.as_mut_ptr(), offsets, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as f64 || val == i as f64);
+    }
+    svsetffr();
+    let loaded =
+        svldnt1_gather_s64offset_f64(svptrue_b64(), storage.as_ptr() as *const f64, offsets);
+    assert_vector_matches_f64(
+        loaded,
+        svcvt_f64_s64_x(
+            svptrue_b64(),
+            svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+        ),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1_gather_s64offset_s64_with_svstnt1_scatter_s64offset_s64() {
+    let mut storage = [0 as i64; 160usize];
+    let data = svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let offsets = svindex_s64(0, 8u32.try_into().unwrap());
+    svstnt1_scatter_s64offset_s64(svptrue_b64(), storage.as_mut_ptr(), offsets, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i64 || val == i as i64);
+    }
+    svsetffr();
+    let loaded =
+        svldnt1_gather_s64offset_s64(svptrue_b64(), storage.as_ptr() as *const i64, offsets);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1_gather_s64offset_u64_with_svstnt1_scatter_s64offset_u64() {
+    let mut storage = [0 as u64; 160usize];
+    let data = svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let offsets = svindex_s64(0, 8u32.try_into().unwrap());
+    svstnt1_scatter_s64offset_u64(svptrue_b64(), storage.as_mut_ptr(), offsets, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u64 || val == i as u64);
+    }
+    svsetffr();
+    let loaded =
+        svldnt1_gather_s64offset_u64(svptrue_b64(), storage.as_ptr() as *const u64, offsets);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1_gather_u32offset_f32_with_svstnt1_scatter_u32offset_f32() {
+    let mut storage = [0 as f32; 320usize];
+    let data = svcvt_f32_s32_x(
+        svptrue_b32(),
+        svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+    let offsets = svindex_u32(0, 4u32.try_into().unwrap());
+    svstnt1_scatter_u32offset_f32(svptrue_b32(), storage.as_mut_ptr(), offsets, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as f32 || val == i as f32);
+    }
+    svsetffr();
+    let loaded =
+        svldnt1_gather_u32offset_f32(svptrue_b32(), storage.as_ptr() as *const f32, offsets);
+    assert_vector_matches_f32(
+        loaded,
+        svcvt_f32_s32_x(
+            svptrue_b32(),
+            svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+        ),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1_gather_u32offset_s32_with_svstnt1_scatter_u32offset_s32() {
+    let mut storage = [0 as i32; 320usize];
+    let data = svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let offsets = svindex_u32(0, 4u32.try_into().unwrap());
+    svstnt1_scatter_u32offset_s32(svptrue_b32(), storage.as_mut_ptr(), offsets, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i32 || val == i as i32);
+    }
+    svsetffr();
+    let loaded =
+        svldnt1_gather_u32offset_s32(svptrue_b32(), storage.as_ptr() as *const i32, offsets);
+    assert_vector_matches_i32(
+        loaded,
+        svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1_gather_u32offset_u32_with_svstnt1_scatter_u32offset_u32() {
+    let mut storage = [0 as u32; 320usize];
+    let data = svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let offsets = svindex_u32(0, 4u32.try_into().unwrap());
+    svstnt1_scatter_u32offset_u32(svptrue_b32(), storage.as_mut_ptr(), offsets, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u32 || val == i as u32);
+    }
+    svsetffr();
+    let loaded =
+        svldnt1_gather_u32offset_u32(svptrue_b32(), storage.as_ptr() as *const u32, offsets);
+    assert_vector_matches_u32(
+        loaded,
+        svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1_gather_u64offset_f64_with_svstnt1_scatter_u64offset_f64() {
+    let mut storage = [0 as f64; 160usize];
+    let data = svcvt_f64_s64_x(
+        svptrue_b64(),
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+    let offsets = svindex_u64(0, 8u32.try_into().unwrap());
+    svstnt1_scatter_u64offset_f64(svptrue_b64(), storage.as_mut_ptr(), offsets, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as f64 || val == i as f64);
+    }
+    svsetffr();
+    let loaded =
+        svldnt1_gather_u64offset_f64(svptrue_b64(), storage.as_ptr() as *const f64, offsets);
+    assert_vector_matches_f64(
+        loaded,
+        svcvt_f64_s64_x(
+            svptrue_b64(),
+            svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+        ),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1_gather_u64offset_s64_with_svstnt1_scatter_u64offset_s64() {
+    let mut storage = [0 as i64; 160usize];
+    let data = svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let offsets = svindex_u64(0, 8u32.try_into().unwrap());
+    svstnt1_scatter_u64offset_s64(svptrue_b64(), storage.as_mut_ptr(), offsets, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i64 || val == i as i64);
+    }
+    svsetffr();
+    let loaded =
+        svldnt1_gather_u64offset_s64(svptrue_b64(), storage.as_ptr() as *const i64, offsets);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1_gather_u64offset_u64_with_svstnt1_scatter_u64offset_u64() {
+    let mut storage = [0 as u64; 160usize];
+    let data = svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let offsets = svindex_u64(0, 8u32.try_into().unwrap());
+    svstnt1_scatter_u64offset_u64(svptrue_b64(), storage.as_mut_ptr(), offsets, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u64 || val == i as u64);
+    }
+    svsetffr();
+    let loaded =
+        svldnt1_gather_u64offset_u64(svptrue_b64(), storage.as_ptr() as *const u64, offsets);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1_gather_u64base_f64_with_svstnt1_scatter_u64base_f64() {
+    let mut storage = [0 as f64; 160usize];
+    let data = svcvt_f64_s64_x(
+        svptrue_b64(),
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+    let bases = svdup_n_u64(storage.as_ptr() as u64);
+    let offsets = svindex_u64(0, 8u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b64(), bases, offsets);
+    svstnt1_scatter_u64base_f64(svptrue_b64(), bases, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as f64 || val == i as f64);
+    }
+    svsetffr();
+    let loaded = svldnt1_gather_u64base_f64(svptrue_b64(), bases);
+    assert_vector_matches_f64(
+        loaded,
+        svcvt_f64_s64_x(
+            svptrue_b64(),
+            svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+        ),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1_gather_u64base_s64_with_svstnt1_scatter_u64base_s64() {
+    let mut storage = [0 as i64; 160usize];
+    let data = svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svdup_n_u64(storage.as_ptr() as u64);
+    let offsets = svindex_u64(0, 8u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b64(), bases, offsets);
+    svstnt1_scatter_u64base_s64(svptrue_b64(), bases, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i64 || val == i as i64);
+    }
+    svsetffr();
+    let loaded = svldnt1_gather_u64base_s64(svptrue_b64(), bases);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1_gather_u64base_u64_with_svstnt1_scatter_u64base_u64() {
+    let mut storage = [0 as u64; 160usize];
+    let data = svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svdup_n_u64(storage.as_ptr() as u64);
+    let offsets = svindex_u64(0, 8u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b64(), bases, offsets);
+    svstnt1_scatter_u64base_u64(svptrue_b64(), bases, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u64 || val == i as u64);
+    }
+    svsetffr();
+    let loaded = svldnt1_gather_u64base_u64(svptrue_b64(), bases);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1_gather_u32base_index_f32_with_svstnt1_scatter_u32base_index_f32() {
+    let mut storage = [0 as f32; 320usize];
+    let data = svcvt_f32_s32_x(
+        svptrue_b32(),
+        svindex_s32((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+    let bases = svindex_u32(0, 4u32.try_into().unwrap());
+    svstnt1_scatter_u32base_index_f32(
+        svptrue_b32(),
+        bases,
+        storage.as_ptr() as i64 / (4u32 as i64) + 1,
+        data,
+    );
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as f32 || val == i as f32);
+    }
+    svsetffr();
+    let loaded = svldnt1_gather_u32base_index_f32(
+        svptrue_b32(),
+        bases,
+        storage.as_ptr() as i64 / (4u32 as i64) + 1,
+    );
+    assert_vector_matches_f32(
+        loaded,
+        svcvt_f32_s32_x(
+            svptrue_b32(),
+            svindex_s32((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+        ),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1_gather_u32base_index_s32_with_svstnt1_scatter_u32base_index_s32() {
+    let mut storage = [0 as i32; 320usize];
+    let data = svindex_s32((1usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svindex_u32(0, 4u32.try_into().unwrap());
+    svstnt1_scatter_u32base_index_s32(
+        svptrue_b32(),
+        bases,
+        storage.as_ptr() as i64 / (4u32 as i64) + 1,
+        data,
+    );
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i32 || val == i as i32);
+    }
+    svsetffr();
+    let loaded = svldnt1_gather_u32base_index_s32(
+        svptrue_b32(),
+        bases,
+        storage.as_ptr() as i64 / (4u32 as i64) + 1,
+    );
+    assert_vector_matches_i32(
+        loaded,
+        svindex_s32((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1_gather_u32base_index_u32_with_svstnt1_scatter_u32base_index_u32() {
+    let mut storage = [0 as u32; 320usize];
+    let data = svindex_u32((1usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svindex_u32(0, 4u32.try_into().unwrap());
+    svstnt1_scatter_u32base_index_u32(
+        svptrue_b32(),
+        bases,
+        storage.as_ptr() as i64 / (4u32 as i64) + 1,
+        data,
+    );
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u32 || val == i as u32);
+    }
+    svsetffr();
+    let loaded = svldnt1_gather_u32base_index_u32(
+        svptrue_b32(),
+        bases,
+        storage.as_ptr() as i64 / (4u32 as i64) + 1,
+    );
+    assert_vector_matches_u32(
+        loaded,
+        svindex_u32((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1_gather_u64base_index_f64_with_svstnt1_scatter_u64base_index_f64() {
+    let mut storage = [0 as f64; 160usize];
+    let data = svcvt_f64_s64_x(
+        svptrue_b64(),
+        svindex_s64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+    let bases = svdup_n_u64(storage.as_ptr() as u64);
+    let offsets = svindex_u64(0, 8u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b64(), bases, offsets);
+    svstnt1_scatter_u64base_index_f64(svptrue_b64(), bases, 1.try_into().unwrap(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as f64 || val == i as f64);
+    }
+    svsetffr();
+    let loaded = svldnt1_gather_u64base_index_f64(svptrue_b64(), bases, 1.try_into().unwrap());
+    assert_vector_matches_f64(
+        loaded,
+        svcvt_f64_s64_x(
+            svptrue_b64(),
+            svindex_s64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+        ),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1_gather_u64base_index_s64_with_svstnt1_scatter_u64base_index_s64() {
+    let mut storage = [0 as i64; 160usize];
+    let data = svindex_s64((1usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svdup_n_u64(storage.as_ptr() as u64);
+    let offsets = svindex_u64(0, 8u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b64(), bases, offsets);
+    svstnt1_scatter_u64base_index_s64(svptrue_b64(), bases, 1.try_into().unwrap(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i64 || val == i as i64);
+    }
+    svsetffr();
+    let loaded = svldnt1_gather_u64base_index_s64(svptrue_b64(), bases, 1.try_into().unwrap());
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1_gather_u64base_index_u64_with_svstnt1_scatter_u64base_index_u64() {
+    let mut storage = [0 as u64; 160usize];
+    let data = svindex_u64((1usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svdup_n_u64(storage.as_ptr() as u64);
+    let offsets = svindex_u64(0, 8u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b64(), bases, offsets);
+    svstnt1_scatter_u64base_index_u64(svptrue_b64(), bases, 1.try_into().unwrap(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u64 || val == i as u64);
+    }
+    svsetffr();
+    let loaded = svldnt1_gather_u64base_index_u64(svptrue_b64(), bases, 1.try_into().unwrap());
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1_gather_u32base_offset_f32_with_svstnt1_scatter_u32base_offset_f32() {
+    let mut storage = [0 as f32; 320usize];
+    let data = svcvt_f32_s32_x(
+        svptrue_b32(),
+        svindex_s32((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+    let bases = svindex_u32(0, 4u32.try_into().unwrap());
+    svstnt1_scatter_u32base_offset_f32(
+        svptrue_b32(),
+        bases,
+        storage.as_ptr() as i64 + 4u32 as i64,
+        data,
+    );
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as f32 || val == i as f32);
+    }
+    svsetffr();
+    let loaded = svldnt1_gather_u32base_offset_f32(
+        svptrue_b32(),
+        bases,
+        storage.as_ptr() as i64 + 4u32 as i64,
+    );
+    assert_vector_matches_f32(
+        loaded,
+        svcvt_f32_s32_x(
+            svptrue_b32(),
+            svindex_s32((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+        ),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1_gather_u32base_offset_s32_with_svstnt1_scatter_u32base_offset_s32() {
+    let mut storage = [0 as i32; 320usize];
+    let data = svindex_s32((1usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svindex_u32(0, 4u32.try_into().unwrap());
+    svstnt1_scatter_u32base_offset_s32(
+        svptrue_b32(),
+        bases,
+        storage.as_ptr() as i64 + 4u32 as i64,
+        data,
+    );
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i32 || val == i as i32);
+    }
+    svsetffr();
+    let loaded = svldnt1_gather_u32base_offset_s32(
+        svptrue_b32(),
+        bases,
+        storage.as_ptr() as i64 + 4u32 as i64,
+    );
+    assert_vector_matches_i32(
+        loaded,
+        svindex_s32((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1_gather_u32base_offset_u32_with_svstnt1_scatter_u32base_offset_u32() {
+    let mut storage = [0 as u32; 320usize];
+    let data = svindex_u32((1usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svindex_u32(0, 4u32.try_into().unwrap());
+    svstnt1_scatter_u32base_offset_u32(
+        svptrue_b32(),
+        bases,
+        storage.as_ptr() as i64 + 4u32 as i64,
+        data,
+    );
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u32 || val == i as u32);
+    }
+    svsetffr();
+    let loaded = svldnt1_gather_u32base_offset_u32(
+        svptrue_b32(),
+        bases,
+        storage.as_ptr() as i64 + 4u32 as i64,
+    );
+    assert_vector_matches_u32(
+        loaded,
+        svindex_u32((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1_gather_u64base_offset_f64_with_svstnt1_scatter_u64base_offset_f64() {
+    let mut storage = [0 as f64; 160usize];
+    let data = svcvt_f64_s64_x(
+        svptrue_b64(),
+        svindex_s64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+    let bases = svdup_n_u64(storage.as_ptr() as u64);
+    let offsets = svindex_u64(0, 8u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b64(), bases, offsets);
+    svstnt1_scatter_u64base_offset_f64(svptrue_b64(), bases, 8u32.try_into().unwrap(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as f64 || val == i as f64);
+    }
+    svsetffr();
+    let loaded = svldnt1_gather_u64base_offset_f64(svptrue_b64(), bases, 8u32.try_into().unwrap());
+    assert_vector_matches_f64(
+        loaded,
+        svcvt_f64_s64_x(
+            svptrue_b64(),
+            svindex_s64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+        ),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1_gather_u64base_offset_s64_with_svstnt1_scatter_u64base_offset_s64() {
+    let mut storage = [0 as i64; 160usize];
+    let data = svindex_s64((1usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svdup_n_u64(storage.as_ptr() as u64);
+    let offsets = svindex_u64(0, 8u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b64(), bases, offsets);
+    svstnt1_scatter_u64base_offset_s64(svptrue_b64(), bases, 8u32.try_into().unwrap(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i64 || val == i as i64);
+    }
+    svsetffr();
+    let loaded = svldnt1_gather_u64base_offset_s64(svptrue_b64(), bases, 8u32.try_into().unwrap());
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1_gather_u64base_offset_u64_with_svstnt1_scatter_u64base_offset_u64() {
+    let mut storage = [0 as u64; 160usize];
+    let data = svindex_u64((1usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svdup_n_u64(storage.as_ptr() as u64);
+    let offsets = svindex_u64(0, 8u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b64(), bases, offsets);
+    svstnt1_scatter_u64base_offset_u64(svptrue_b64(), bases, 8u32.try_into().unwrap(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u64 || val == i as u64);
+    }
+    svsetffr();
+    let loaded = svldnt1_gather_u64base_offset_u64(svptrue_b64(), bases, 8u32.try_into().unwrap());
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1sb_gather_s64offset_s64_with_svstnt1b_scatter_s64offset_s64() {
+    let mut storage = [0 as i8; 1280usize];
+    let data = svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let offsets = svindex_s64(0, 1u32.try_into().unwrap());
+    svstnt1b_scatter_s64offset_s64(svptrue_b8(), storage.as_mut_ptr(), offsets, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i8 || val == i as i8);
+    }
+    svsetffr();
+    let loaded =
+        svldnt1sb_gather_s64offset_s64(svptrue_b8(), storage.as_ptr() as *const i8, offsets);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1sh_gather_s64offset_s64_with_svstnt1h_scatter_s64offset_s64() {
+    let mut storage = [0 as i16; 640usize];
+    let data = svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let offsets = svindex_s64(0, 2u32.try_into().unwrap());
+    svstnt1h_scatter_s64offset_s64(svptrue_b16(), storage.as_mut_ptr(), offsets, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i16 || val == i as i16);
+    }
+    svsetffr();
+    let loaded =
+        svldnt1sh_gather_s64offset_s64(svptrue_b16(), storage.as_ptr() as *const i16, offsets);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1sw_gather_s64offset_s64_with_svstnt1w_scatter_s64offset_s64() {
+    let mut storage = [0 as i32; 320usize];
+    let data = svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let offsets = svindex_s64(0, 4u32.try_into().unwrap());
+    svstnt1w_scatter_s64offset_s64(svptrue_b32(), storage.as_mut_ptr(), offsets, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i32 || val == i as i32);
+    }
+    svsetffr();
+    let loaded =
+        svldnt1sw_gather_s64offset_s64(svptrue_b32(), storage.as_ptr() as *const i32, offsets);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1sb_gather_s64offset_u64_with_svstnt1b_scatter_s64offset_u64() {
+    let mut storage = [0 as u8; 1280usize];
+    let data = svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let offsets = svindex_s64(0, 1u32.try_into().unwrap());
+    svstnt1b_scatter_s64offset_u64(svptrue_b8(), storage.as_mut_ptr(), offsets, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u8 || val == i as u8);
+    }
+    svsetffr();
+    let loaded =
+        svldnt1sb_gather_s64offset_u64(svptrue_b8(), storage.as_ptr() as *const i8, offsets);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1sh_gather_s64offset_u64_with_svstnt1h_scatter_s64offset_u64() {
+    let mut storage = [0 as u16; 640usize];
+    let data = svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let offsets = svindex_s64(0, 2u32.try_into().unwrap());
+    svstnt1h_scatter_s64offset_u64(svptrue_b16(), storage.as_mut_ptr(), offsets, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u16 || val == i as u16);
+    }
+    svsetffr();
+    let loaded =
+        svldnt1sh_gather_s64offset_u64(svptrue_b16(), storage.as_ptr() as *const i16, offsets);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1sw_gather_s64offset_u64_with_svstnt1w_scatter_s64offset_u64() {
+    let mut storage = [0 as u32; 320usize];
+    let data = svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let offsets = svindex_s64(0, 4u32.try_into().unwrap());
+    svstnt1w_scatter_s64offset_u64(svptrue_b32(), storage.as_mut_ptr(), offsets, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u32 || val == i as u32);
+    }
+    svsetffr();
+    let loaded =
+        svldnt1sw_gather_s64offset_u64(svptrue_b32(), storage.as_ptr() as *const i32, offsets);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1sb_gather_u32offset_s32_with_svstnt1b_scatter_u32offset_s32() {
+    let mut storage = [0 as i8; 1280usize];
+    let data = svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let offsets = svindex_u32(0, 1u32.try_into().unwrap());
+    svstnt1b_scatter_u32offset_s32(svptrue_b8(), storage.as_mut_ptr(), offsets, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i8 || val == i as i8);
+    }
+    svsetffr();
+    let loaded =
+        svldnt1sb_gather_u32offset_s32(svptrue_b8(), storage.as_ptr() as *const i8, offsets);
+    assert_vector_matches_i32(
+        loaded,
+        svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1sh_gather_u32offset_s32_with_svstnt1h_scatter_u32offset_s32() {
+    let mut storage = [0 as i16; 640usize];
+    let data = svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let offsets = svindex_u32(0, 2u32.try_into().unwrap());
+    svstnt1h_scatter_u32offset_s32(svptrue_b16(), storage.as_mut_ptr(), offsets, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i16 || val == i as i16);
+    }
+    svsetffr();
+    let loaded =
+        svldnt1sh_gather_u32offset_s32(svptrue_b16(), storage.as_ptr() as *const i16, offsets);
+    assert_vector_matches_i32(
+        loaded,
+        svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1sb_gather_u32offset_u32_with_svstnt1b_scatter_u32offset_u32() {
+    let mut storage = [0 as u8; 1280usize];
+    let data = svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let offsets = svindex_u32(0, 1u32.try_into().unwrap());
+    svstnt1b_scatter_u32offset_u32(svptrue_b8(), storage.as_mut_ptr(), offsets, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u8 || val == i as u8);
+    }
+    svsetffr();
+    let loaded =
+        svldnt1sb_gather_u32offset_u32(svptrue_b8(), storage.as_ptr() as *const i8, offsets);
+    assert_vector_matches_u32(
+        loaded,
+        svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1sh_gather_u32offset_u32_with_svstnt1h_scatter_u32offset_u32() {
+    let mut storage = [0 as u16; 640usize];
+    let data = svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let offsets = svindex_u32(0, 2u32.try_into().unwrap());
+    svstnt1h_scatter_u32offset_u32(svptrue_b16(), storage.as_mut_ptr(), offsets, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u16 || val == i as u16);
+    }
+    svsetffr();
+    let loaded =
+        svldnt1sh_gather_u32offset_u32(svptrue_b16(), storage.as_ptr() as *const i16, offsets);
+    assert_vector_matches_u32(
+        loaded,
+        svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1sb_gather_u64offset_s64_with_svstnt1b_scatter_u64offset_s64() {
+    let mut storage = [0 as i8; 1280usize];
+    let data = svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let offsets = svindex_u64(0, 1u32.try_into().unwrap());
+    svstnt1b_scatter_u64offset_s64(svptrue_b8(), storage.as_mut_ptr(), offsets, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i8 || val == i as i8);
+    }
+    svsetffr();
+    let loaded =
+        svldnt1sb_gather_u64offset_s64(svptrue_b8(), storage.as_ptr() as *const i8, offsets);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1sh_gather_u64offset_s64_with_svstnt1h_scatter_u64offset_s64() {
+    let mut storage = [0 as i16; 640usize];
+    let data = svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let offsets = svindex_u64(0, 2u32.try_into().unwrap());
+    svstnt1h_scatter_u64offset_s64(svptrue_b16(), storage.as_mut_ptr(), offsets, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i16 || val == i as i16);
+    }
+    svsetffr();
+    let loaded =
+        svldnt1sh_gather_u64offset_s64(svptrue_b16(), storage.as_ptr() as *const i16, offsets);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1sw_gather_u64offset_s64_with_svstnt1w_scatter_u64offset_s64() {
+    let mut storage = [0 as i32; 320usize];
+    let data = svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let offsets = svindex_u64(0, 4u32.try_into().unwrap());
+    svstnt1w_scatter_u64offset_s64(svptrue_b32(), storage.as_mut_ptr(), offsets, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i32 || val == i as i32);
+    }
+    svsetffr();
+    let loaded =
+        svldnt1sw_gather_u64offset_s64(svptrue_b32(), storage.as_ptr() as *const i32, offsets);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1sb_gather_u64offset_u64_with_svstnt1b_scatter_u64offset_u64() {
+    let mut storage = [0 as u8; 1280usize];
+    let data = svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let offsets = svindex_u64(0, 1u32.try_into().unwrap());
+    svstnt1b_scatter_u64offset_u64(svptrue_b8(), storage.as_mut_ptr(), offsets, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u8 || val == i as u8);
+    }
+    svsetffr();
+    let loaded =
+        svldnt1sb_gather_u64offset_u64(svptrue_b8(), storage.as_ptr() as *const i8, offsets);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1sh_gather_u64offset_u64_with_svstnt1h_scatter_u64offset_u64() {
+    let mut storage = [0 as u16; 640usize];
+    let data = svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let offsets = svindex_u64(0, 2u32.try_into().unwrap());
+    svstnt1h_scatter_u64offset_u64(svptrue_b16(), storage.as_mut_ptr(), offsets, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u16 || val == i as u16);
+    }
+    svsetffr();
+    let loaded =
+        svldnt1sh_gather_u64offset_u64(svptrue_b16(), storage.as_ptr() as *const i16, offsets);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1sw_gather_u64offset_u64_with_svstnt1w_scatter_u64offset_u64() {
+    let mut storage = [0 as u32; 320usize];
+    let data = svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let offsets = svindex_u64(0, 4u32.try_into().unwrap());
+    svstnt1w_scatter_u64offset_u64(svptrue_b32(), storage.as_mut_ptr(), offsets, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u32 || val == i as u32);
+    }
+    svsetffr();
+    let loaded =
+        svldnt1sw_gather_u64offset_u64(svptrue_b32(), storage.as_ptr() as *const i32, offsets);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1sb_gather_u32base_offset_s32_with_svstnt1b_scatter_u32base_offset_s32() {
+    let mut storage = [0 as i8; 1280usize];
+    let data = svindex_s32((1usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svindex_u32(0, 1u32.try_into().unwrap());
+    svstnt1b_scatter_u32base_offset_s32(
+        svptrue_b8(),
+        bases,
+        storage.as_ptr() as i64 + 1u32 as i64,
+        data,
+    );
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i8 || val == i as i8);
+    }
+    svsetffr();
+    let loaded = svldnt1sb_gather_u32base_offset_s32(
+        svptrue_b8(),
+        bases,
+        storage.as_ptr() as i64 + 1u32 as i64,
+    );
+    assert_vector_matches_i32(
+        loaded,
+        svindex_s32((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1sh_gather_u32base_offset_s32_with_svstnt1h_scatter_u32base_offset_s32() {
+    let mut storage = [0 as i16; 640usize];
+    let data = svindex_s32((1usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svindex_u32(0, 2u32.try_into().unwrap());
+    svstnt1h_scatter_u32base_offset_s32(
+        svptrue_b16(),
+        bases,
+        storage.as_ptr() as i64 + 2u32 as i64,
+        data,
+    );
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i16 || val == i as i16);
+    }
+    svsetffr();
+    let loaded = svldnt1sh_gather_u32base_offset_s32(
+        svptrue_b16(),
+        bases,
+        storage.as_ptr() as i64 + 2u32 as i64,
+    );
+    assert_vector_matches_i32(
+        loaded,
+        svindex_s32((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1sb_gather_u32base_offset_u32_with_svstnt1b_scatter_u32base_offset_u32() {
+    let mut storage = [0 as i8; 1280usize];
+    let data = svindex_u32((1usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svindex_u32(0, 1u32.try_into().unwrap());
+    svstnt1b_scatter_u32base_offset_u32(
+        svptrue_b8(),
+        bases,
+        storage.as_ptr() as i64 + 1u32 as i64,
+        data,
+    );
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i8 || val == i as i8);
+    }
+    svsetffr();
+    let loaded = svldnt1sb_gather_u32base_offset_u32(
+        svptrue_b8(),
+        bases,
+        storage.as_ptr() as i64 + 1u32 as i64,
+    );
+    assert_vector_matches_u32(
+        loaded,
+        svindex_u32((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1sh_gather_u32base_offset_u32_with_svstnt1h_scatter_u32base_offset_u32() {
+    let mut storage = [0 as i16; 640usize];
+    let data = svindex_u32((1usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svindex_u32(0, 2u32.try_into().unwrap());
+    svstnt1h_scatter_u32base_offset_u32(
+        svptrue_b16(),
+        bases,
+        storage.as_ptr() as i64 + 2u32 as i64,
+        data,
+    );
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i16 || val == i as i16);
+    }
+    svsetffr();
+    let loaded = svldnt1sh_gather_u32base_offset_u32(
+        svptrue_b16(),
+        bases,
+        storage.as_ptr() as i64 + 2u32 as i64,
+    );
+    assert_vector_matches_u32(
+        loaded,
+        svindex_u32((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1sb_gather_u64base_offset_s64_with_svstnt1b_scatter_u64base_offset_s64() {
+    let mut storage = [0 as i8; 1280usize];
+    let data = svindex_s64((1usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svdup_n_u64(storage.as_ptr() as u64);
+    let offsets = svindex_u64(0, 1u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b8(), bases, offsets);
+    svstnt1b_scatter_u64base_offset_s64(svptrue_b8(), bases, 1u32.try_into().unwrap(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i8 || val == i as i8);
+    }
+    svsetffr();
+    let loaded = svldnt1sb_gather_u64base_offset_s64(svptrue_b8(), bases, 1u32.try_into().unwrap());
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1sh_gather_u64base_offset_s64_with_svstnt1h_scatter_u64base_offset_s64() {
+    let mut storage = [0 as i16; 640usize];
+    let data = svindex_s64((1usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svdup_n_u64(storage.as_ptr() as u64);
+    let offsets = svindex_u64(0, 2u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b16(), bases, offsets);
+    svstnt1h_scatter_u64base_offset_s64(svptrue_b16(), bases, 2u32.try_into().unwrap(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i16 || val == i as i16);
+    }
+    svsetffr();
+    let loaded =
+        svldnt1sh_gather_u64base_offset_s64(svptrue_b16(), bases, 2u32.try_into().unwrap());
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1sw_gather_u64base_offset_s64_with_svstnt1w_scatter_u64base_offset_s64() {
+    let mut storage = [0 as i32; 320usize];
+    let data = svindex_s64((1usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svdup_n_u64(storage.as_ptr() as u64);
+    let offsets = svindex_u64(0, 4u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b32(), bases, offsets);
+    svstnt1w_scatter_u64base_offset_s64(svptrue_b32(), bases, 4u32.try_into().unwrap(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i32 || val == i as i32);
+    }
+    svsetffr();
+    let loaded =
+        svldnt1sw_gather_u64base_offset_s64(svptrue_b32(), bases, 4u32.try_into().unwrap());
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1sb_gather_u64base_offset_u64_with_svstnt1b_scatter_u64base_offset_u64() {
+    let mut storage = [0 as i8; 1280usize];
+    let data = svindex_u64((1usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svdup_n_u64(storage.as_ptr() as u64);
+    let offsets = svindex_u64(0, 1u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b8(), bases, offsets);
+    svstnt1b_scatter_u64base_offset_u64(svptrue_b8(), bases, 1u32.try_into().unwrap(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i8 || val == i as i8);
+    }
+    svsetffr();
+    let loaded = svldnt1sb_gather_u64base_offset_u64(svptrue_b8(), bases, 1u32.try_into().unwrap());
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1sh_gather_u64base_offset_u64_with_svstnt1h_scatter_u64base_offset_u64() {
+    let mut storage = [0 as i16; 640usize];
+    let data = svindex_u64((1usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svdup_n_u64(storage.as_ptr() as u64);
+    let offsets = svindex_u64(0, 2u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b16(), bases, offsets);
+    svstnt1h_scatter_u64base_offset_u64(svptrue_b16(), bases, 2u32.try_into().unwrap(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i16 || val == i as i16);
+    }
+    svsetffr();
+    let loaded =
+        svldnt1sh_gather_u64base_offset_u64(svptrue_b16(), bases, 2u32.try_into().unwrap());
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1sw_gather_u64base_offset_u64_with_svstnt1w_scatter_u64base_offset_u64() {
+    let mut storage = [0 as i32; 320usize];
+    let data = svindex_u64((1usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svdup_n_u64(storage.as_ptr() as u64);
+    let offsets = svindex_u64(0, 4u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b32(), bases, offsets);
+    svstnt1w_scatter_u64base_offset_u64(svptrue_b32(), bases, 4u32.try_into().unwrap(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i32 || val == i as i32);
+    }
+    svsetffr();
+    let loaded =
+        svldnt1sw_gather_u64base_offset_u64(svptrue_b32(), bases, 4u32.try_into().unwrap());
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1sb_gather_u64base_s64_with_svstnt1b_scatter_u64base_s64() {
+    let mut storage = [0 as i8; 1280usize];
+    let data = svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svdup_n_u64(storage.as_ptr() as u64);
+    let offsets = svindex_u64(0, 1u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b8(), bases, offsets);
+    svstnt1b_scatter_u64base_s64(svptrue_b8(), bases, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i8 || val == i as i8);
+    }
+    svsetffr();
+    let loaded = svldnt1sb_gather_u64base_s64(svptrue_b8(), bases);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1sh_gather_u64base_s64_with_svstnt1h_scatter_u64base_s64() {
+    let mut storage = [0 as i16; 640usize];
+    let data = svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svdup_n_u64(storage.as_ptr() as u64);
+    let offsets = svindex_u64(0, 2u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b16(), bases, offsets);
+    svstnt1h_scatter_u64base_s64(svptrue_b16(), bases, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i16 || val == i as i16);
+    }
+    svsetffr();
+    let loaded = svldnt1sh_gather_u64base_s64(svptrue_b16(), bases);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1sw_gather_u64base_s64_with_svstnt1w_scatter_u64base_s64() {
+    let mut storage = [0 as i32; 320usize];
+    let data = svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svdup_n_u64(storage.as_ptr() as u64);
+    let offsets = svindex_u64(0, 4u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b32(), bases, offsets);
+    svstnt1w_scatter_u64base_s64(svptrue_b32(), bases, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i32 || val == i as i32);
+    }
+    svsetffr();
+    let loaded = svldnt1sw_gather_u64base_s64(svptrue_b32(), bases);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1sb_gather_u64base_u64_with_svstnt1b_scatter_u64base_u64() {
+    let mut storage = [0 as i8; 1280usize];
+    let data = svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svdup_n_u64(storage.as_ptr() as u64);
+    let offsets = svindex_u64(0, 1u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b8(), bases, offsets);
+    svstnt1b_scatter_u64base_u64(svptrue_b8(), bases, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i8 || val == i as i8);
+    }
+    svsetffr();
+    let loaded = svldnt1sb_gather_u64base_u64(svptrue_b8(), bases);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1sh_gather_u64base_u64_with_svstnt1h_scatter_u64base_u64() {
+    let mut storage = [0 as i16; 640usize];
+    let data = svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svdup_n_u64(storage.as_ptr() as u64);
+    let offsets = svindex_u64(0, 2u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b16(), bases, offsets);
+    svstnt1h_scatter_u64base_u64(svptrue_b16(), bases, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i16 || val == i as i16);
+    }
+    svsetffr();
+    let loaded = svldnt1sh_gather_u64base_u64(svptrue_b16(), bases);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1sw_gather_u64base_u64_with_svstnt1w_scatter_u64base_u64() {
+    let mut storage = [0 as i32; 320usize];
+    let data = svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svdup_n_u64(storage.as_ptr() as u64);
+    let offsets = svindex_u64(0, 4u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b32(), bases, offsets);
+    svstnt1w_scatter_u64base_u64(svptrue_b32(), bases, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i32 || val == i as i32);
+    }
+    svsetffr();
+    let loaded = svldnt1sw_gather_u64base_u64(svptrue_b32(), bases);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1sh_gather_s64index_s64_with_svstnt1h_scatter_s64index_s64() {
+    let mut storage = [0 as i16; 640usize];
+    let data = svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let indices = svindex_s64(0, 1);
+    svstnt1h_scatter_s64index_s64(svptrue_b16(), storage.as_mut_ptr(), indices, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i16 || val == i as i16);
+    }
+    svsetffr();
+    let loaded =
+        svldnt1sh_gather_s64index_s64(svptrue_b16(), storage.as_ptr() as *const i16, indices);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1sw_gather_s64index_s64_with_svstnt1w_scatter_s64index_s64() {
+    let mut storage = [0 as i32; 320usize];
+    let data = svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let indices = svindex_s64(0, 1);
+    svstnt1w_scatter_s64index_s64(svptrue_b32(), storage.as_mut_ptr(), indices, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i32 || val == i as i32);
+    }
+    svsetffr();
+    let loaded =
+        svldnt1sw_gather_s64index_s64(svptrue_b32(), storage.as_ptr() as *const i32, indices);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1sh_gather_s64index_u64_with_svstnt1h_scatter_s64index_u64() {
+    let mut storage = [0 as u16; 640usize];
+    let data = svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let indices = svindex_s64(0, 1);
+    svstnt1h_scatter_s64index_u64(svptrue_b16(), storage.as_mut_ptr(), indices, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u16 || val == i as u16);
+    }
+    svsetffr();
+    let loaded =
+        svldnt1sh_gather_s64index_u64(svptrue_b16(), storage.as_ptr() as *const i16, indices);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1sw_gather_s64index_u64_with_svstnt1w_scatter_s64index_u64() {
+    let mut storage = [0 as u32; 320usize];
+    let data = svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let indices = svindex_s64(0, 1);
+    svstnt1w_scatter_s64index_u64(svptrue_b32(), storage.as_mut_ptr(), indices, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u32 || val == i as u32);
+    }
+    svsetffr();
+    let loaded =
+        svldnt1sw_gather_s64index_u64(svptrue_b32(), storage.as_ptr() as *const i32, indices);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1sh_gather_u64index_s64_with_svstnt1h_scatter_u64index_s64() {
+    let mut storage = [0 as i16; 640usize];
+    let data = svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let indices = svindex_u64(0, 1);
+    svstnt1h_scatter_u64index_s64(svptrue_b16(), storage.as_mut_ptr(), indices, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i16 || val == i as i16);
+    }
+    svsetffr();
+    let loaded =
+        svldnt1sh_gather_u64index_s64(svptrue_b16(), storage.as_ptr() as *const i16, indices);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1sw_gather_u64index_s64_with_svstnt1w_scatter_u64index_s64() {
+    let mut storage = [0 as i32; 320usize];
+    let data = svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let indices = svindex_u64(0, 1);
+    svstnt1w_scatter_u64index_s64(svptrue_b32(), storage.as_mut_ptr(), indices, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i32 || val == i as i32);
+    }
+    svsetffr();
+    let loaded =
+        svldnt1sw_gather_u64index_s64(svptrue_b32(), storage.as_ptr() as *const i32, indices);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1sh_gather_u64index_u64_with_svstnt1h_scatter_u64index_u64() {
+    let mut storage = [0 as u16; 640usize];
+    let data = svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let indices = svindex_u64(0, 1);
+    svstnt1h_scatter_u64index_u64(svptrue_b16(), storage.as_mut_ptr(), indices, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u16 || val == i as u16);
+    }
+    svsetffr();
+    let loaded =
+        svldnt1sh_gather_u64index_u64(svptrue_b16(), storage.as_ptr() as *const i16, indices);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1sw_gather_u64index_u64_with_svstnt1w_scatter_u64index_u64() {
+    let mut storage = [0 as u32; 320usize];
+    let data = svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let indices = svindex_u64(0, 1);
+    svstnt1w_scatter_u64index_u64(svptrue_b32(), storage.as_mut_ptr(), indices, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u32 || val == i as u32);
+    }
+    svsetffr();
+    let loaded =
+        svldnt1sw_gather_u64index_u64(svptrue_b32(), storage.as_ptr() as *const i32, indices);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1sh_gather_u32base_index_s32_with_svstnt1h_scatter_u32base_index_s32() {
+    let mut storage = [0 as i16; 640usize];
+    let data = svindex_s32((1usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svindex_u32(0, 2u32.try_into().unwrap());
+    svstnt1h_scatter_u32base_index_s32(
+        svptrue_b16(),
+        bases,
+        storage.as_ptr() as i64 / (2u32 as i64) + 1,
+        data,
+    );
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i16 || val == i as i16);
+    }
+    svsetffr();
+    let loaded = svldnt1sh_gather_u32base_index_s32(
+        svptrue_b16(),
+        bases,
+        storage.as_ptr() as i64 / (2u32 as i64) + 1,
+    );
+    assert_vector_matches_i32(
+        loaded,
+        svindex_s32((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1sh_gather_u32base_index_u32_with_svstnt1h_scatter_u32base_index_u32() {
+    let mut storage = [0 as i16; 640usize];
+    let data = svindex_u32((1usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svindex_u32(0, 2u32.try_into().unwrap());
+    svstnt1h_scatter_u32base_index_u32(
+        svptrue_b16(),
+        bases,
+        storage.as_ptr() as i64 / (2u32 as i64) + 1,
+        data,
+    );
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i16 || val == i as i16);
+    }
+    svsetffr();
+    let loaded = svldnt1sh_gather_u32base_index_u32(
+        svptrue_b16(),
+        bases,
+        storage.as_ptr() as i64 / (2u32 as i64) + 1,
+    );
+    assert_vector_matches_u32(
+        loaded,
+        svindex_u32((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1sh_gather_u64base_index_s64_with_svstnt1h_scatter_u64base_index_s64() {
+    let mut storage = [0 as i16; 640usize];
+    let data = svindex_s64((1usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svdup_n_u64(storage.as_ptr() as u64);
+    let offsets = svindex_u64(0, 2u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b16(), bases, offsets);
+    svstnt1h_scatter_u64base_index_s64(svptrue_b16(), bases, 1.try_into().unwrap(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i16 || val == i as i16);
+    }
+    svsetffr();
+    let loaded = svldnt1sh_gather_u64base_index_s64(svptrue_b16(), bases, 1.try_into().unwrap());
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1sw_gather_u64base_index_s64_with_svstnt1w_scatter_u64base_index_s64() {
+    let mut storage = [0 as i32; 320usize];
+    let data = svindex_s64((1usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svdup_n_u64(storage.as_ptr() as u64);
+    let offsets = svindex_u64(0, 4u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b32(), bases, offsets);
+    svstnt1w_scatter_u64base_index_s64(svptrue_b32(), bases, 1.try_into().unwrap(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i32 || val == i as i32);
+    }
+    svsetffr();
+    let loaded = svldnt1sw_gather_u64base_index_s64(svptrue_b32(), bases, 1.try_into().unwrap());
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1sh_gather_u64base_index_u64_with_svstnt1h_scatter_u64base_index_u64() {
+    let mut storage = [0 as i16; 640usize];
+    let data = svindex_u64((1usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svdup_n_u64(storage.as_ptr() as u64);
+    let offsets = svindex_u64(0, 2u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b16(), bases, offsets);
+    svstnt1h_scatter_u64base_index_u64(svptrue_b16(), bases, 1.try_into().unwrap(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i16 || val == i as i16);
+    }
+    svsetffr();
+    let loaded = svldnt1sh_gather_u64base_index_u64(svptrue_b16(), bases, 1.try_into().unwrap());
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1sw_gather_u64base_index_u64_with_svstnt1w_scatter_u64base_index_u64() {
+    let mut storage = [0 as i32; 320usize];
+    let data = svindex_u64((1usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svdup_n_u64(storage.as_ptr() as u64);
+    let offsets = svindex_u64(0, 4u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b32(), bases, offsets);
+    svstnt1w_scatter_u64base_index_u64(svptrue_b32(), bases, 1.try_into().unwrap(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i32 || val == i as i32);
+    }
+    svsetffr();
+    let loaded = svldnt1sw_gather_u64base_index_u64(svptrue_b32(), bases, 1.try_into().unwrap());
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1ub_gather_s64offset_s64_with_svstnt1b_scatter_s64offset_s64() {
+    let mut storage = [0 as i8; 1280usize];
+    let data = svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let offsets = svindex_s64(0, 1u32.try_into().unwrap());
+    svstnt1b_scatter_s64offset_s64(svptrue_b8(), storage.as_mut_ptr(), offsets, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i8 || val == i as i8);
+    }
+    svsetffr();
+    let loaded =
+        svldnt1ub_gather_s64offset_s64(svptrue_b8(), storage.as_ptr() as *const u8, offsets);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1uh_gather_s64offset_s64_with_svstnt1h_scatter_s64offset_s64() {
+    let mut storage = [0 as i16; 640usize];
+    let data = svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let offsets = svindex_s64(0, 2u32.try_into().unwrap());
+    svstnt1h_scatter_s64offset_s64(svptrue_b16(), storage.as_mut_ptr(), offsets, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i16 || val == i as i16);
+    }
+    svsetffr();
+    let loaded =
+        svldnt1uh_gather_s64offset_s64(svptrue_b16(), storage.as_ptr() as *const u16, offsets);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1uw_gather_s64offset_s64_with_svstnt1w_scatter_s64offset_s64() {
+    let mut storage = [0 as i32; 320usize];
+    let data = svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let offsets = svindex_s64(0, 4u32.try_into().unwrap());
+    svstnt1w_scatter_s64offset_s64(svptrue_b32(), storage.as_mut_ptr(), offsets, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i32 || val == i as i32);
+    }
+    svsetffr();
+    let loaded =
+        svldnt1uw_gather_s64offset_s64(svptrue_b32(), storage.as_ptr() as *const u32, offsets);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1ub_gather_s64offset_u64_with_svstnt1b_scatter_s64offset_u64() {
+    let mut storage = [0 as u8; 1280usize];
+    let data = svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let offsets = svindex_s64(0, 1u32.try_into().unwrap());
+    svstnt1b_scatter_s64offset_u64(svptrue_b8(), storage.as_mut_ptr(), offsets, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u8 || val == i as u8);
+    }
+    svsetffr();
+    let loaded =
+        svldnt1ub_gather_s64offset_u64(svptrue_b8(), storage.as_ptr() as *const u8, offsets);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1uh_gather_s64offset_u64_with_svstnt1h_scatter_s64offset_u64() {
+    let mut storage = [0 as u16; 640usize];
+    let data = svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let offsets = svindex_s64(0, 2u32.try_into().unwrap());
+    svstnt1h_scatter_s64offset_u64(svptrue_b16(), storage.as_mut_ptr(), offsets, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u16 || val == i as u16);
+    }
+    svsetffr();
+    let loaded =
+        svldnt1uh_gather_s64offset_u64(svptrue_b16(), storage.as_ptr() as *const u16, offsets);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1uw_gather_s64offset_u64_with_svstnt1w_scatter_s64offset_u64() {
+    let mut storage = [0 as u32; 320usize];
+    let data = svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let offsets = svindex_s64(0, 4u32.try_into().unwrap());
+    svstnt1w_scatter_s64offset_u64(svptrue_b32(), storage.as_mut_ptr(), offsets, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u32 || val == i as u32);
+    }
+    svsetffr();
+    let loaded =
+        svldnt1uw_gather_s64offset_u64(svptrue_b32(), storage.as_ptr() as *const u32, offsets);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1ub_gather_u32offset_s32_with_svstnt1b_scatter_u32offset_s32() {
+    let mut storage = [0 as i8; 1280usize];
+    let data = svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let offsets = svindex_u32(0, 1u32.try_into().unwrap());
+    svstnt1b_scatter_u32offset_s32(svptrue_b8(), storage.as_mut_ptr(), offsets, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i8 || val == i as i8);
+    }
+    svsetffr();
+    let loaded =
+        svldnt1ub_gather_u32offset_s32(svptrue_b8(), storage.as_ptr() as *const u8, offsets);
+    assert_vector_matches_i32(
+        loaded,
+        svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1uh_gather_u32offset_s32_with_svstnt1h_scatter_u32offset_s32() {
+    let mut storage = [0 as i16; 640usize];
+    let data = svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let offsets = svindex_u32(0, 2u32.try_into().unwrap());
+    svstnt1h_scatter_u32offset_s32(svptrue_b16(), storage.as_mut_ptr(), offsets, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i16 || val == i as i16);
+    }
+    svsetffr();
+    let loaded =
+        svldnt1uh_gather_u32offset_s32(svptrue_b16(), storage.as_ptr() as *const u16, offsets);
+    assert_vector_matches_i32(
+        loaded,
+        svindex_s32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1ub_gather_u32offset_u32_with_svstnt1b_scatter_u32offset_u32() {
+    let mut storage = [0 as u8; 1280usize];
+    let data = svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let offsets = svindex_u32(0, 1u32.try_into().unwrap());
+    svstnt1b_scatter_u32offset_u32(svptrue_b8(), storage.as_mut_ptr(), offsets, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u8 || val == i as u8);
+    }
+    svsetffr();
+    let loaded =
+        svldnt1ub_gather_u32offset_u32(svptrue_b8(), storage.as_ptr() as *const u8, offsets);
+    assert_vector_matches_u32(
+        loaded,
+        svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1uh_gather_u32offset_u32_with_svstnt1h_scatter_u32offset_u32() {
+    let mut storage = [0 as u16; 640usize];
+    let data = svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let offsets = svindex_u32(0, 2u32.try_into().unwrap());
+    svstnt1h_scatter_u32offset_u32(svptrue_b16(), storage.as_mut_ptr(), offsets, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u16 || val == i as u16);
+    }
+    svsetffr();
+    let loaded =
+        svldnt1uh_gather_u32offset_u32(svptrue_b16(), storage.as_ptr() as *const u16, offsets);
+    assert_vector_matches_u32(
+        loaded,
+        svindex_u32((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1ub_gather_u64offset_s64_with_svstnt1b_scatter_u64offset_s64() {
+    let mut storage = [0 as i8; 1280usize];
+    let data = svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let offsets = svindex_u64(0, 1u32.try_into().unwrap());
+    svstnt1b_scatter_u64offset_s64(svptrue_b8(), storage.as_mut_ptr(), offsets, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i8 || val == i as i8);
+    }
+    svsetffr();
+    let loaded =
+        svldnt1ub_gather_u64offset_s64(svptrue_b8(), storage.as_ptr() as *const u8, offsets);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1uh_gather_u64offset_s64_with_svstnt1h_scatter_u64offset_s64() {
+    let mut storage = [0 as i16; 640usize];
+    let data = svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let offsets = svindex_u64(0, 2u32.try_into().unwrap());
+    svstnt1h_scatter_u64offset_s64(svptrue_b16(), storage.as_mut_ptr(), offsets, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i16 || val == i as i16);
+    }
+    svsetffr();
+    let loaded =
+        svldnt1uh_gather_u64offset_s64(svptrue_b16(), storage.as_ptr() as *const u16, offsets);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1uw_gather_u64offset_s64_with_svstnt1w_scatter_u64offset_s64() {
+    let mut storage = [0 as i32; 320usize];
+    let data = svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let offsets = svindex_u64(0, 4u32.try_into().unwrap());
+    svstnt1w_scatter_u64offset_s64(svptrue_b32(), storage.as_mut_ptr(), offsets, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i32 || val == i as i32);
+    }
+    svsetffr();
+    let loaded =
+        svldnt1uw_gather_u64offset_s64(svptrue_b32(), storage.as_ptr() as *const u32, offsets);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1ub_gather_u64offset_u64_with_svstnt1b_scatter_u64offset_u64() {
+    let mut storage = [0 as u8; 1280usize];
+    let data = svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let offsets = svindex_u64(0, 1u32.try_into().unwrap());
+    svstnt1b_scatter_u64offset_u64(svptrue_b8(), storage.as_mut_ptr(), offsets, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u8 || val == i as u8);
+    }
+    svsetffr();
+    let loaded =
+        svldnt1ub_gather_u64offset_u64(svptrue_b8(), storage.as_ptr() as *const u8, offsets);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1uh_gather_u64offset_u64_with_svstnt1h_scatter_u64offset_u64() {
+    let mut storage = [0 as u16; 640usize];
+    let data = svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let offsets = svindex_u64(0, 2u32.try_into().unwrap());
+    svstnt1h_scatter_u64offset_u64(svptrue_b16(), storage.as_mut_ptr(), offsets, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u16 || val == i as u16);
+    }
+    svsetffr();
+    let loaded =
+        svldnt1uh_gather_u64offset_u64(svptrue_b16(), storage.as_ptr() as *const u16, offsets);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1uw_gather_u64offset_u64_with_svstnt1w_scatter_u64offset_u64() {
+    let mut storage = [0 as u32; 320usize];
+    let data = svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let offsets = svindex_u64(0, 4u32.try_into().unwrap());
+    svstnt1w_scatter_u64offset_u64(svptrue_b32(), storage.as_mut_ptr(), offsets, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u32 || val == i as u32);
+    }
+    svsetffr();
+    let loaded =
+        svldnt1uw_gather_u64offset_u64(svptrue_b32(), storage.as_ptr() as *const u32, offsets);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1ub_gather_u32base_offset_s32_with_svstnt1b_scatter_u32base_offset_s32() {
+    let mut storage = [0 as i8; 1280usize];
+    let data = svindex_s32((1usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svindex_u32(0, 1u32.try_into().unwrap());
+    svstnt1b_scatter_u32base_offset_s32(
+        svptrue_b8(),
+        bases,
+        storage.as_ptr() as i64 + 1u32 as i64,
+        data,
+    );
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i8 || val == i as i8);
+    }
+    svsetffr();
+    let loaded = svldnt1ub_gather_u32base_offset_s32(
+        svptrue_b8(),
+        bases,
+        storage.as_ptr() as i64 + 1u32 as i64,
+    );
+    assert_vector_matches_i32(
+        loaded,
+        svindex_s32((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1uh_gather_u32base_offset_s32_with_svstnt1h_scatter_u32base_offset_s32() {
+    let mut storage = [0 as i16; 640usize];
+    let data = svindex_s32((1usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svindex_u32(0, 2u32.try_into().unwrap());
+    svstnt1h_scatter_u32base_offset_s32(
+        svptrue_b16(),
+        bases,
+        storage.as_ptr() as i64 + 2u32 as i64,
+        data,
+    );
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i16 || val == i as i16);
+    }
+    svsetffr();
+    let loaded = svldnt1uh_gather_u32base_offset_s32(
+        svptrue_b16(),
+        bases,
+        storage.as_ptr() as i64 + 2u32 as i64,
+    );
+    assert_vector_matches_i32(
+        loaded,
+        svindex_s32((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1ub_gather_u32base_offset_u32_with_svstnt1b_scatter_u32base_offset_u32() {
+    let mut storage = [0 as i8; 1280usize];
+    let data = svindex_u32((1usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svindex_u32(0, 1u32.try_into().unwrap());
+    svstnt1b_scatter_u32base_offset_u32(
+        svptrue_b8(),
+        bases,
+        storage.as_ptr() as i64 + 1u32 as i64,
+        data,
+    );
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i8 || val == i as i8);
+    }
+    svsetffr();
+    let loaded = svldnt1ub_gather_u32base_offset_u32(
+        svptrue_b8(),
+        bases,
+        storage.as_ptr() as i64 + 1u32 as i64,
+    );
+    assert_vector_matches_u32(
+        loaded,
+        svindex_u32((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1uh_gather_u32base_offset_u32_with_svstnt1h_scatter_u32base_offset_u32() {
+    let mut storage = [0 as i16; 640usize];
+    let data = svindex_u32((1usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svindex_u32(0, 2u32.try_into().unwrap());
+    svstnt1h_scatter_u32base_offset_u32(
+        svptrue_b16(),
+        bases,
+        storage.as_ptr() as i64 + 2u32 as i64,
+        data,
+    );
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i16 || val == i as i16);
+    }
+    svsetffr();
+    let loaded = svldnt1uh_gather_u32base_offset_u32(
+        svptrue_b16(),
+        bases,
+        storage.as_ptr() as i64 + 2u32 as i64,
+    );
+    assert_vector_matches_u32(
+        loaded,
+        svindex_u32((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1ub_gather_u64base_offset_s64_with_svstnt1b_scatter_u64base_offset_s64() {
+    let mut storage = [0 as i8; 1280usize];
+    let data = svindex_s64((1usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svdup_n_u64(storage.as_ptr() as u64);
+    let offsets = svindex_u64(0, 1u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b8(), bases, offsets);
+    svstnt1b_scatter_u64base_offset_s64(svptrue_b8(), bases, 1u32.try_into().unwrap(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i8 || val == i as i8);
+    }
+    svsetffr();
+    let loaded = svldnt1ub_gather_u64base_offset_s64(svptrue_b8(), bases, 1u32.try_into().unwrap());
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1uh_gather_u64base_offset_s64_with_svstnt1h_scatter_u64base_offset_s64() {
+    let mut storage = [0 as i16; 640usize];
+    let data = svindex_s64((1usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svdup_n_u64(storage.as_ptr() as u64);
+    let offsets = svindex_u64(0, 2u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b16(), bases, offsets);
+    svstnt1h_scatter_u64base_offset_s64(svptrue_b16(), bases, 2u32.try_into().unwrap(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i16 || val == i as i16);
+    }
+    svsetffr();
+    let loaded =
+        svldnt1uh_gather_u64base_offset_s64(svptrue_b16(), bases, 2u32.try_into().unwrap());
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1uw_gather_u64base_offset_s64_with_svstnt1w_scatter_u64base_offset_s64() {
+    let mut storage = [0 as i32; 320usize];
+    let data = svindex_s64((1usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svdup_n_u64(storage.as_ptr() as u64);
+    let offsets = svindex_u64(0, 4u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b32(), bases, offsets);
+    svstnt1w_scatter_u64base_offset_s64(svptrue_b32(), bases, 4u32.try_into().unwrap(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i32 || val == i as i32);
+    }
+    svsetffr();
+    let loaded =
+        svldnt1uw_gather_u64base_offset_s64(svptrue_b32(), bases, 4u32.try_into().unwrap());
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1ub_gather_u64base_offset_u64_with_svstnt1b_scatter_u64base_offset_u64() {
+    let mut storage = [0 as i8; 1280usize];
+    let data = svindex_u64((1usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svdup_n_u64(storage.as_ptr() as u64);
+    let offsets = svindex_u64(0, 1u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b8(), bases, offsets);
+    svstnt1b_scatter_u64base_offset_u64(svptrue_b8(), bases, 1u32.try_into().unwrap(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i8 || val == i as i8);
+    }
+    svsetffr();
+    let loaded = svldnt1ub_gather_u64base_offset_u64(svptrue_b8(), bases, 1u32.try_into().unwrap());
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1uh_gather_u64base_offset_u64_with_svstnt1h_scatter_u64base_offset_u64() {
+    let mut storage = [0 as i16; 640usize];
+    let data = svindex_u64((1usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svdup_n_u64(storage.as_ptr() as u64);
+    let offsets = svindex_u64(0, 2u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b16(), bases, offsets);
+    svstnt1h_scatter_u64base_offset_u64(svptrue_b16(), bases, 2u32.try_into().unwrap(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i16 || val == i as i16);
+    }
+    svsetffr();
+    let loaded =
+        svldnt1uh_gather_u64base_offset_u64(svptrue_b16(), bases, 2u32.try_into().unwrap());
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1uw_gather_u64base_offset_u64_with_svstnt1w_scatter_u64base_offset_u64() {
+    let mut storage = [0 as i32; 320usize];
+    let data = svindex_u64((1usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svdup_n_u64(storage.as_ptr() as u64);
+    let offsets = svindex_u64(0, 4u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b32(), bases, offsets);
+    svstnt1w_scatter_u64base_offset_u64(svptrue_b32(), bases, 4u32.try_into().unwrap(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i32 || val == i as i32);
+    }
+    svsetffr();
+    let loaded =
+        svldnt1uw_gather_u64base_offset_u64(svptrue_b32(), bases, 4u32.try_into().unwrap());
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1ub_gather_u64base_s64_with_svstnt1b_scatter_u64base_s64() {
+    let mut storage = [0 as i8; 1280usize];
+    let data = svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svdup_n_u64(storage.as_ptr() as u64);
+    let offsets = svindex_u64(0, 1u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b8(), bases, offsets);
+    svstnt1b_scatter_u64base_s64(svptrue_b8(), bases, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i8 || val == i as i8);
+    }
+    svsetffr();
+    let loaded = svldnt1ub_gather_u64base_s64(svptrue_b8(), bases);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1uh_gather_u64base_s64_with_svstnt1h_scatter_u64base_s64() {
+    let mut storage = [0 as i16; 640usize];
+    let data = svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svdup_n_u64(storage.as_ptr() as u64);
+    let offsets = svindex_u64(0, 2u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b16(), bases, offsets);
+    svstnt1h_scatter_u64base_s64(svptrue_b16(), bases, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i16 || val == i as i16);
+    }
+    svsetffr();
+    let loaded = svldnt1uh_gather_u64base_s64(svptrue_b16(), bases);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1uw_gather_u64base_s64_with_svstnt1w_scatter_u64base_s64() {
+    let mut storage = [0 as i32; 320usize];
+    let data = svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svdup_n_u64(storage.as_ptr() as u64);
+    let offsets = svindex_u64(0, 4u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b32(), bases, offsets);
+    svstnt1w_scatter_u64base_s64(svptrue_b32(), bases, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i32 || val == i as i32);
+    }
+    svsetffr();
+    let loaded = svldnt1uw_gather_u64base_s64(svptrue_b32(), bases);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1ub_gather_u64base_u64_with_svstnt1b_scatter_u64base_u64() {
+    let mut storage = [0 as i8; 1280usize];
+    let data = svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svdup_n_u64(storage.as_ptr() as u64);
+    let offsets = svindex_u64(0, 1u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b8(), bases, offsets);
+    svstnt1b_scatter_u64base_u64(svptrue_b8(), bases, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i8 || val == i as i8);
+    }
+    svsetffr();
+    let loaded = svldnt1ub_gather_u64base_u64(svptrue_b8(), bases);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1uh_gather_u64base_u64_with_svstnt1h_scatter_u64base_u64() {
+    let mut storage = [0 as i16; 640usize];
+    let data = svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svdup_n_u64(storage.as_ptr() as u64);
+    let offsets = svindex_u64(0, 2u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b16(), bases, offsets);
+    svstnt1h_scatter_u64base_u64(svptrue_b16(), bases, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i16 || val == i as i16);
+    }
+    svsetffr();
+    let loaded = svldnt1uh_gather_u64base_u64(svptrue_b16(), bases);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1uw_gather_u64base_u64_with_svstnt1w_scatter_u64base_u64() {
+    let mut storage = [0 as i32; 320usize];
+    let data = svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svdup_n_u64(storage.as_ptr() as u64);
+    let offsets = svindex_u64(0, 4u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b32(), bases, offsets);
+    svstnt1w_scatter_u64base_u64(svptrue_b32(), bases, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i32 || val == i as i32);
+    }
+    svsetffr();
+    let loaded = svldnt1uw_gather_u64base_u64(svptrue_b32(), bases);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1uh_gather_s64index_s64_with_svstnt1h_scatter_s64index_s64() {
+    let mut storage = [0 as i16; 640usize];
+    let data = svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let indices = svindex_s64(0, 1);
+    svstnt1h_scatter_s64index_s64(svptrue_b16(), storage.as_mut_ptr(), indices, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i16 || val == i as i16);
+    }
+    svsetffr();
+    let loaded =
+        svldnt1uh_gather_s64index_s64(svptrue_b16(), storage.as_ptr() as *const u16, indices);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1uw_gather_s64index_s64_with_svstnt1w_scatter_s64index_s64() {
+    let mut storage = [0 as i32; 320usize];
+    let data = svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let indices = svindex_s64(0, 1);
+    svstnt1w_scatter_s64index_s64(svptrue_b32(), storage.as_mut_ptr(), indices, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i32 || val == i as i32);
+    }
+    svsetffr();
+    let loaded =
+        svldnt1uw_gather_s64index_s64(svptrue_b32(), storage.as_ptr() as *const u32, indices);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1uh_gather_s64index_u64_with_svstnt1h_scatter_s64index_u64() {
+    let mut storage = [0 as u16; 640usize];
+    let data = svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let indices = svindex_s64(0, 1);
+    svstnt1h_scatter_s64index_u64(svptrue_b16(), storage.as_mut_ptr(), indices, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u16 || val == i as u16);
+    }
+    svsetffr();
+    let loaded =
+        svldnt1uh_gather_s64index_u64(svptrue_b16(), storage.as_ptr() as *const u16, indices);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1uw_gather_s64index_u64_with_svstnt1w_scatter_s64index_u64() {
+    let mut storage = [0 as u32; 320usize];
+    let data = svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let indices = svindex_s64(0, 1);
+    svstnt1w_scatter_s64index_u64(svptrue_b32(), storage.as_mut_ptr(), indices, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u32 || val == i as u32);
+    }
+    svsetffr();
+    let loaded =
+        svldnt1uw_gather_s64index_u64(svptrue_b32(), storage.as_ptr() as *const u32, indices);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1uh_gather_u64index_s64_with_svstnt1h_scatter_u64index_s64() {
+    let mut storage = [0 as i16; 640usize];
+    let data = svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let indices = svindex_u64(0, 1);
+    svstnt1h_scatter_u64index_s64(svptrue_b16(), storage.as_mut_ptr(), indices, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i16 || val == i as i16);
+    }
+    svsetffr();
+    let loaded =
+        svldnt1uh_gather_u64index_s64(svptrue_b16(), storage.as_ptr() as *const u16, indices);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1uw_gather_u64index_s64_with_svstnt1w_scatter_u64index_s64() {
+    let mut storage = [0 as i32; 320usize];
+    let data = svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let indices = svindex_u64(0, 1);
+    svstnt1w_scatter_u64index_s64(svptrue_b32(), storage.as_mut_ptr(), indices, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i32 || val == i as i32);
+    }
+    svsetffr();
+    let loaded =
+        svldnt1uw_gather_u64index_s64(svptrue_b32(), storage.as_ptr() as *const u32, indices);
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1uh_gather_u64index_u64_with_svstnt1h_scatter_u64index_u64() {
+    let mut storage = [0 as u16; 640usize];
+    let data = svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let indices = svindex_u64(0, 1);
+    svstnt1h_scatter_u64index_u64(svptrue_b16(), storage.as_mut_ptr(), indices, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u16 || val == i as u16);
+    }
+    svsetffr();
+    let loaded =
+        svldnt1uh_gather_u64index_u64(svptrue_b16(), storage.as_ptr() as *const u16, indices);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1uw_gather_u64index_u64_with_svstnt1w_scatter_u64index_u64() {
+    let mut storage = [0 as u32; 320usize];
+    let data = svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let indices = svindex_u64(0, 1);
+    svstnt1w_scatter_u64index_u64(svptrue_b32(), storage.as_mut_ptr(), indices, data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as u32 || val == i as u32);
+    }
+    svsetffr();
+    let loaded =
+        svldnt1uw_gather_u64index_u64(svptrue_b32(), storage.as_ptr() as *const u32, indices);
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((0usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1uh_gather_u32base_index_s32_with_svstnt1h_scatter_u32base_index_s32() {
+    let mut storage = [0 as i16; 640usize];
+    let data = svindex_s32((1usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svindex_u32(0, 2u32.try_into().unwrap());
+    svstnt1h_scatter_u32base_index_s32(
+        svptrue_b16(),
+        bases,
+        storage.as_ptr() as i64 / (2u32 as i64) + 1,
+        data,
+    );
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i16 || val == i as i16);
+    }
+    svsetffr();
+    let loaded = svldnt1uh_gather_u32base_index_s32(
+        svptrue_b16(),
+        bases,
+        storage.as_ptr() as i64 / (2u32 as i64) + 1,
+    );
+    assert_vector_matches_i32(
+        loaded,
+        svindex_s32((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1uh_gather_u32base_index_u32_with_svstnt1h_scatter_u32base_index_u32() {
+    let mut storage = [0 as i16; 640usize];
+    let data = svindex_u32((1usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svindex_u32(0, 2u32.try_into().unwrap());
+    svstnt1h_scatter_u32base_index_u32(
+        svptrue_b16(),
+        bases,
+        storage.as_ptr() as i64 / (2u32 as i64) + 1,
+        data,
+    );
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i16 || val == i as i16);
+    }
+    svsetffr();
+    let loaded = svldnt1uh_gather_u32base_index_u32(
+        svptrue_b16(),
+        bases,
+        storage.as_ptr() as i64 / (2u32 as i64) + 1,
+    );
+    assert_vector_matches_u32(
+        loaded,
+        svindex_u32((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1uh_gather_u64base_index_s64_with_svstnt1h_scatter_u64base_index_s64() {
+    let mut storage = [0 as i16; 640usize];
+    let data = svindex_s64((1usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svdup_n_u64(storage.as_ptr() as u64);
+    let offsets = svindex_u64(0, 2u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b16(), bases, offsets);
+    svstnt1h_scatter_u64base_index_s64(svptrue_b16(), bases, 1.try_into().unwrap(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i16 || val == i as i16);
+    }
+    svsetffr();
+    let loaded = svldnt1uh_gather_u64base_index_s64(svptrue_b16(), bases, 1.try_into().unwrap());
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1uw_gather_u64base_index_s64_with_svstnt1w_scatter_u64base_index_s64() {
+    let mut storage = [0 as i32; 320usize];
+    let data = svindex_s64((1usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svdup_n_u64(storage.as_ptr() as u64);
+    let offsets = svindex_u64(0, 4u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b32(), bases, offsets);
+    svstnt1w_scatter_u64base_index_s64(svptrue_b32(), bases, 1.try_into().unwrap(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i32 || val == i as i32);
+    }
+    svsetffr();
+    let loaded = svldnt1uw_gather_u64base_index_s64(svptrue_b32(), bases, 1.try_into().unwrap());
+    assert_vector_matches_i64(
+        loaded,
+        svindex_s64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1uh_gather_u64base_index_u64_with_svstnt1h_scatter_u64base_index_u64() {
+    let mut storage = [0 as i16; 640usize];
+    let data = svindex_u64((1usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svdup_n_u64(storage.as_ptr() as u64);
+    let offsets = svindex_u64(0, 2u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b16(), bases, offsets);
+    svstnt1h_scatter_u64base_index_u64(svptrue_b16(), bases, 1.try_into().unwrap(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i16 || val == i as i16);
+    }
+    svsetffr();
+    let loaded = svldnt1uh_gather_u64base_index_u64(svptrue_b16(), bases, 1.try_into().unwrap());
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
+#[simd_test(enable = "sve,sve2")]
+unsafe fn test_svldnt1uw_gather_u64base_index_u64_with_svstnt1w_scatter_u64base_index_u64() {
+    let mut storage = [0 as i32; 320usize];
+    let data = svindex_u64((1usize).try_into().unwrap(), 1usize.try_into().unwrap());
+    let bases = svdup_n_u64(storage.as_ptr() as u64);
+    let offsets = svindex_u64(0, 4u32.try_into().unwrap());
+    let bases = svadd_u64_x(svptrue_b32(), bases, offsets);
+    svstnt1w_scatter_u64base_index_u64(svptrue_b32(), bases, 1.try_into().unwrap(), data);
+    for (i, &val) in storage.iter().enumerate() {
+        assert!(val == 0 as i32 || val == i as i32);
+    }
+    svsetffr();
+    let loaded = svldnt1uw_gather_u64base_index_u64(svptrue_b32(), bases, 1.try_into().unwrap());
+    assert_vector_matches_u64(
+        loaded,
+        svindex_u64((1usize).try_into().unwrap(), 1usize.try_into().unwrap()),
+    );
+}
diff --git a/crates/core_arch/src/aarch64/sve2/mod.rs b/crates/core_arch/src/aarch64/sve2/mod.rs
new file mode 100644
index 0000000000..acf9070214
--- /dev/null
+++ b/crates/core_arch/src/aarch64/sve2/mod.rs
@@ -0,0 +1,17 @@
+//! SVE2 intrinsics
+
+#![allow(non_camel_case_types)]
+
+// `generated.rs` has a `super::*` and this import is for that
+use super::sve::*;
+use crate::intrinsics::*;
+
+#[rustfmt::skip]
+mod generated;
+#[rustfmt::skip]
+#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]
+pub use self::generated::*;
+
+#[cfg(test)]
+#[path = "ld_st_tests_aarch64.rs"]
+mod ld_st_tests;
diff --git a/crates/core_arch/src/aarch64/tme.rs b/crates/core_arch/src/aarch64/tme.rs
deleted file mode 100644
index 207633c1f8..0000000000
--- a/crates/core_arch/src/aarch64/tme.rs
+++ /dev/null
@@ -1,201 +0,0 @@
-//! ARM's Transactional Memory Extensions (TME).
-//!
-//! This CPU feature is available on Aarch64 - A architecture profile.
-//! This feature is in the non-neon feature set. TME specific vendor documentation can
-//! be found [TME Intrinsics Introduction][tme_intrinsics_intro].
-//!
-//! The reference is [ACLE Q4 2019][acle_q4_2019_ref].
-//!
-//! ACLE has a section for TME extensions and state masks for aborts and failure codes.
-//! [ARM A64 Architecture Register Datasheet][a_profile_future] also describes possible failure code scenarios.
-//!
-//! [acle_q4_2019_ref]: https://static.docs.arm.com/101028/0010/ACLE_2019Q4_release-0010.pdf
-//! [tme_intrinsics_intro]: https://developer.arm.com/docs/101028/0010/transactional-memory-extension-tme-intrinsics
-//! [llvm_aarch64_int]: https://github.com/llvm/llvm-project/commit/a36d31478c182903523e04eb271bbf102bfab2cc#diff-ff24e1c35f4d54f1110ce5d90c709319R626-R646
-//! [a_profile_future]: https://static.docs.arm.com/ddi0601/a/SysReg_xml_futureA-2019-04.pdf?_ga=2.116560387.441514988.1590524918-1110153136.1588469296
-
-#[cfg(test)]
-use stdarch_test::assert_instr;
-
-unsafe extern "unadjusted" {
-    #[link_name = "llvm.aarch64.tstart"]
-    fn aarch64_tstart() -> u64;
-    #[link_name = "llvm.aarch64.tcommit"]
-    fn aarch64_tcommit();
-    #[link_name = "llvm.aarch64.tcancel"]
-    fn aarch64_tcancel(imm0: u64);
-    #[link_name = "llvm.aarch64.ttest"]
-    fn aarch64_ttest() -> u64;
-}
-
-/// Transaction successfully started.
-#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
-pub const _TMSTART_SUCCESS: u64 = 0x00_u64;
-
-/// Extraction mask for failure reason
-#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
-pub const _TMFAILURE_REASON: u64 = 0x00007FFF_u64;
-
-/// Transaction retry is possible.
-#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
-pub const _TMFAILURE_RTRY: u64 = 1 << 15;
-
-/// Transaction executed a TCANCEL instruction
-#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
-pub const _TMFAILURE_CNCL: u64 = 1 << 16;
-
-/// Transaction aborted because a conflict occurred
-#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
-pub const _TMFAILURE_MEM: u64 = 1 << 17;
-
-/// Fallback error type for any other reason
-#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
-pub const _TMFAILURE_IMP: u64 = 1 << 18;
-
-/// Transaction aborted because a non-permissible operation was attempted
-#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
-pub const _TMFAILURE_ERR: u64 = 1 << 19;
-
-/// Transaction aborted due to read or write set limit was exceeded
-#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
-pub const _TMFAILURE_SIZE: u64 = 1 << 20;
-
-/// Transaction aborted due to transactional nesting level was exceeded
-#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
-pub const _TMFAILURE_NEST: u64 = 1 << 21;
-
-/// Transaction aborted due to a debug trap.
-#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
-pub const _TMFAILURE_DBG: u64 = 1 << 22;
-
-/// Transaction failed from interrupt
-#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
-pub const _TMFAILURE_INT: u64 = 1 << 23;
-
-/// Indicates a TRIVIAL version of TM is available
-#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
-pub const _TMFAILURE_TRIVIAL: u64 = 1 << 24;
-
-// NOTE: Tests for these instructions are disabled on MSVC as dumpbin doesn't
-// understand these instructions.
-
-/// Starts a new transaction. When the transaction starts successfully the return value is 0.
-/// If the transaction fails, all state modifications are discarded and a cause of the failure
-/// is encoded in the return value.
-///
-/// [ARM TME Intrinsics](https://developer.arm.com/docs/101028/0010/transactional-memory-extension-tme-intrinsics).
-#[inline]
-#[target_feature(enable = "tme")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(tstart))]
-#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
-pub unsafe fn __tstart() -> u64 {
-    aarch64_tstart()
-}
-
-/// Commits the current transaction. For a nested transaction, the only effect is that the
-/// transactional nesting depth is decreased. For an outer transaction, the state modifications
-/// performed transactionally are committed to the architectural state.
-///
-/// [ARM TME Intrinsics](https://developer.arm.com/docs/101028/0010/transactional-memory-extension-tme-intrinsics).
-#[inline]
-#[target_feature(enable = "tme")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(tcommit))]
-#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
-pub unsafe fn __tcommit() {
-    aarch64_tcommit()
-}
-
-/// Cancels the current transaction and discards all state modifications that were performed transactionally.
-///
-/// [ARM TME Intrinsics](https://developer.arm.com/docs/101028/0010/transactional-memory-extension-tme-intrinsics).
-#[inline]
-#[target_feature(enable = "tme")]
-#[cfg_attr(
-    all(test, not(target_env = "msvc")),
-    assert_instr(tcancel, IMM16 = 0x0)
-)]
-#[rustc_legacy_const_generics(0)]
-#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
-pub unsafe fn __tcancel<const IMM16: u64>() {
-    static_assert!(IMM16 <= 65535);
-    aarch64_tcancel(IMM16);
-}
-
-/// Tests if executing inside a transaction. If no transaction is currently executing,
-/// the return value is 0. Otherwise, this intrinsic returns the depth of the transaction.
-///
-/// [ARM TME Intrinsics](https://developer.arm.com/docs/101028/0010/transactional-memory-extension-tme-intrinsics).
-#[inline]
-#[target_feature(enable = "tme")]
-#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(ttest))]
-#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
-pub unsafe fn __ttest() -> u64 {
-    aarch64_ttest()
-}
-
-#[cfg(test)]
-mod tests {
-    use stdarch_test::simd_test;
-
-    use crate::core_arch::aarch64::*;
-
-    const CANCEL_CODE: u64 = (0 | (0x123 & _TMFAILURE_REASON) as u64) as u64;
-
-    #[simd_test(enable = "tme")]
-    unsafe fn test_tstart() {
-        let mut x = 0;
-        for i in 0..10 {
-            let code = tme::__tstart();
-            if code == _TMSTART_SUCCESS {
-                x += 1;
-                assert_eq!(x, i + 1);
-                break;
-            }
-            assert_eq!(x, 0);
-        }
-    }
-
-    #[simd_test(enable = "tme")]
-    unsafe fn test_tcommit() {
-        let mut x = 0;
-        for i in 0..10 {
-            let code = tme::__tstart();
-            if code == _TMSTART_SUCCESS {
-                x += 1;
-                assert_eq!(x, i + 1);
-                tme::__tcommit();
-            }
-            assert_eq!(x, i + 1);
-        }
-    }
-
-    #[simd_test(enable = "tme")]
-    unsafe fn test_tcancel() {
-        let mut x = 0;
-
-        for i in 0..10 {
-            let code = tme::__tstart();
-            if code == _TMSTART_SUCCESS {
-                x += 1;
-                assert_eq!(x, i + 1);
-                tme::__tcancel::<CANCEL_CODE>();
-                break;
-            }
-        }
-
-        assert_eq!(x, 0);
-    }
-
-    #[simd_test(enable = "tme")]
-    unsafe fn test_ttest() {
-        for _ in 0..10 {
-            let code = tme::__tstart();
-            if code == _TMSTART_SUCCESS {
-                if tme::__ttest() == 2 {
-                    tme::__tcancel::<CANCEL_CODE>();
-                    break;
-                }
-            }
-        }
-    }
-}
diff --git a/crates/core_arch/src/amdgpu/intrinsic_is_convergent.md b/crates/core_arch/src/amdgpu/intrinsic_is_convergent.md
new file mode 100644
index 0000000000..1bc8899d33
--- /dev/null
+++ b/crates/core_arch/src/amdgpu/intrinsic_is_convergent.md
@@ -0,0 +1,6 @@
+This intrinsic does not behave like a normal function call; it is a "[convergent]" operation and as such has non-standard control-flow effects which need special treatment by the language.
+Rust currently does not properly support convergent operations.
+This operation is hence provided on a best-effort basis.
+Using it may result in incorrect code under some circumstances.
+
+[convergent]: https://llvm.org/docs/ConvergentOperations.html
diff --git a/crates/core_arch/src/amdgpu/mod.rs b/crates/core_arch/src/amdgpu/mod.rs
new file mode 100644
index 0000000000..374f582696
--- /dev/null
+++ b/crates/core_arch/src/amdgpu/mod.rs
@@ -0,0 +1,1125 @@
+//! amdgpu intrinsics
+//!
+//! The reference is the [LLVM amdgpu guide] and the [LLVM implementation].
+//! The order of intrinsics here follows the order in the [LLVM implementation].
+//!
+//! [LLVM amdgpu guide]: https://llvm.org/docs/AMDGPUUsage.html#llvm-ir-intrinsics
+//! [LLVM implementation]: https://github.com/llvm/llvm-project/blob/main/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+
+#[allow(improper_ctypes)]
+unsafe extern "unadjusted" {
+    #[link_name = "llvm.amdgcn.workitem.id.x"]
+    safe fn llvm_workitem_id_x() -> u32;
+    #[link_name = "llvm.amdgcn.workitem.id.y"]
+    safe fn llvm_workitem_id_y() -> u32;
+    #[link_name = "llvm.amdgcn.workitem.id.z"]
+    safe fn llvm_workitem_id_z() -> u32;
+
+    #[link_name = "llvm.amdgcn.workgroup.id.x"]
+    safe fn llvm_workgroup_id_x() -> u32;
+    #[link_name = "llvm.amdgcn.workgroup.id.y"]
+    safe fn llvm_workgroup_id_y() -> u32;
+    #[link_name = "llvm.amdgcn.workgroup.id.z"]
+    safe fn llvm_workgroup_id_z() -> u32;
+
+    #[link_name = "llvm.amdgcn.groupstaticsize"]
+    safe fn llvm_groupstaticsize() -> u32;
+    #[link_name = "llvm.amdgcn.dispatch.id"]
+    safe fn llvm_dispatch_id() -> u64;
+
+    #[link_name = "llvm.amdgcn.wavefrontsize"]
+    safe fn llvm_wavefrontsize() -> u32;
+
+    #[link_name = "llvm.amdgcn.s.barrier"]
+    safe fn llvm_s_barrier();
+    #[link_name = "llvm.amdgcn.s.barrier.signal"]
+    fn llvm_s_barrier_signal(barrier_type: i32);
+    #[link_name = "llvm.amdgcn.s.barrier.signal.isfirst"]
+    fn llvm_s_barrier_signal_isfirst(barrier_type: i32) -> bool;
+    #[link_name = "llvm.amdgcn.s.barrier.wait"]
+    fn llvm_s_barrier_wait(barrier_type: i16);
+    #[link_name = "llvm.amdgcn.s.get.barrier.state"]
+    fn llvm_s_get_barrier_state(barrier_type: i32) -> u32;
+    #[link_name = "llvm.amdgcn.wave.barrier"]
+    safe fn llvm_wave_barrier();
+    #[link_name = "llvm.amdgcn.sched.barrier"]
+    fn llvm_sched_barrier(mask: u32);
+    #[link_name = "llvm.amdgcn.sched.group.barrier"]
+    fn llvm_sched_group_barrier(mask: u32, size: u32, sync_id: u32);
+
+    #[link_name = "llvm.amdgcn.s.sleep"]
+    safe fn llvm_s_sleep(count: u32);
+
+    #[link_name = "llvm.amdgcn.s.sethalt"]
+    safe fn llvm_s_sethalt(value: u32) -> !;
+
+    #[link_name = "llvm.amdgcn.s.getpc"]
+    safe fn llvm_s_getpc() -> i64;
+
+    #[link_name = "llvm.amdgcn.mbcnt.lo"]
+    safe fn llvm_mbcnt_lo(value: u32, init: u32) -> u32;
+    #[link_name = "llvm.amdgcn.mbcnt.hi"]
+    safe fn llvm_mbcnt_hi(value: u32, init: u32) -> u32;
+
+    #[link_name = "llvm.amdgcn.ballot"]
+    safe fn llvm_ballot(b: bool) -> u64;
+
+    #[link_name = "llvm.amdgcn.inverse.ballot"]
+    safe fn llvm_inverse_ballot(value: u64) -> bool;
+
+    #[link_name = "llvm.amdgcn.wave.reduce.umin"]
+    safe fn llvm_wave_reduce_umin(value: u32, strategy: u32) -> u32;
+    #[link_name = "llvm.amdgcn.wave.reduce.min"]
+    safe fn llvm_wave_reduce_min(value: i32, strategy: u32) -> i32;
+    #[link_name = "llvm.amdgcn.wave.reduce.umax"]
+    safe fn llvm_wave_reduce_umax(value: u32, strategy: u32) -> u32;
+    #[link_name = "llvm.amdgcn.wave.reduce.max"]
+    safe fn llvm_wave_reduce_max(value: i32, strategy: u32) -> i32;
+    #[link_name = "llvm.amdgcn.wave.reduce.add"]
+    safe fn llvm_wave_reduce_add(value: u32, strategy: u32) -> u32;
+    #[link_name = "llvm.amdgcn.wave.reduce.and"]
+    safe fn llvm_wave_reduce_and(value: u32, strategy: u32) -> u32;
+    #[link_name = "llvm.amdgcn.wave.reduce.or"]
+    safe fn llvm_wave_reduce_or(value: u32, strategy: u32) -> u32;
+    #[link_name = "llvm.amdgcn.wave.reduce.xor"]
+    safe fn llvm_wave_reduce_xor(value: u32, strategy: u32) -> u32;
+
+    // The following intrinsics can have multiple sizes
+
+    #[link_name = "llvm.amdgcn.readfirstlane.i32"]
+    safe fn llvm_readfirstlane_u32(value: u32) -> u32;
+    #[link_name = "llvm.amdgcn.readfirstlane.i64"]
+    safe fn llvm_readfirstlane_u64(value: u64) -> u64;
+    #[link_name = "llvm.amdgcn.readlane.i32"]
+    fn llvm_readlane_u32(value: u32, lane: u32) -> u32;
+    #[link_name = "llvm.amdgcn.readlane.i64"]
+    fn llvm_readlane_u64(value: u64, lane: u32) -> u64;
+    #[link_name = "llvm.amdgcn.writelane.i32"]
+    fn llvm_writelane_u32(value: u32, lane: u32, default: u32) -> u32;
+    #[link_name = "llvm.amdgcn.writelane.i64"]
+    fn llvm_writelane_u64(value: u64, lane: u32, default: u64) -> u64;
+
+    #[link_name = "llvm.amdgcn.endpgm"]
+    safe fn llvm_endpgm() -> !;
+
+    #[link_name = "llvm.amdgcn.update.dpp.i32"]
+    fn llvm_update_dpp(
+        old: u32,
+        src: u32,
+        dpp_ctrl: u32,
+        row_mask: u32,
+        bank_mask: u32,
+        bound_control: bool,
+    ) -> u32;
+
+    #[link_name = "llvm.amdgcn.s.memrealtime"]
+    safe fn llvm_s_memrealtime() -> u64;
+
+    #[link_name = "llvm.amdgcn.ds.permute"]
+    fn llvm_ds_permute(lane: u32, value: u32) -> u32;
+    #[link_name = "llvm.amdgcn.ds.bpermute"]
+    fn llvm_ds_bpermute(lane: u32, value: u32) -> u32;
+    #[link_name = "llvm.amdgcn.perm"]
+    fn llvm_perm(src0: u32, src1: u32, selector: u32) -> u32;
+
+    // gfx10
+    #[link_name = "llvm.amdgcn.permlane16.i32"]
+    fn llvm_permlane16_u32(
+        old: u32,
+        src0: u32,
+        src1: u32,
+        src2: u32,
+        fi: bool,
+        bound_control: bool,
+    ) -> u32;
+
+    // gfx10
+    #[link_name = "llvm.amdgcn.permlanex16.i32"]
+    fn llvm_permlanex16_u32(
+        old: u32,
+        src0: u32,
+        src1: u32,
+        src2: u32,
+        fi: bool,
+        bound_control: bool,
+    ) -> u32;
+
+    #[link_name = "llvm.amdgcn.s.get.waveid.in.workgroup"]
+    safe fn llvm_s_get_waveid_in_workgroup() -> u32;
+
+    // gfx11
+    #[link_name = "llvm.amdgcn.permlane64.i32"]
+    fn llvm_permlane64_u32(value: u32) -> u32;
+
+    // gfx12
+    #[link_name = "llvm.amdgcn.permlane16.var"]
+    fn llvm_permlane16_var(old: u32, src0: u32, src1: u32, fi: bool, bound_control: bool) -> u32;
+
+    // gfx12
+    #[link_name = "llvm.amdgcn.permlanex16.var"]
+    fn llvm_permlanex16_var(old: u32, src0: u32, src1: u32, fi: bool, bound_control: bool) -> u32;
+
+    #[link_name = "llvm.amdgcn.wave.id"]
+    safe fn llvm_wave_id() -> u32;
+
+    // gfx950
+    #[link_name = "llvm.amdgcn.permlane16.swap"]
+    fn llvm_permlane16_swap(
+        vdst_old: u32,
+        vsrc_src0: u32,
+        fi: bool,
+        bound_control: bool,
+    ) -> (u32, u32);
+
+    // gfx950
+    #[link_name = "llvm.amdgcn.permlane32.swap"]
+    fn llvm_permlane32_swap(
+        vdst_old: u32,
+        vsrc_src0: u32,
+        fi: bool,
+        bound_control: bool,
+    ) -> (u32, u32);
+}
+
+/// Returns the x coordinate of the workitem index within the workgroup.
+#[inline]
+#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
+pub fn workitem_id_x() -> u32 {
+    llvm_workitem_id_x()
+}
+/// Returns the y coordinate of the workitem index within the workgroup.
+#[inline]
+#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
+pub fn workitem_id_y() -> u32 {
+    llvm_workitem_id_y()
+}
+/// Returns the z coordinate of the workitem index within the workgroup.
+#[inline]
+#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
+pub fn workitem_id_z() -> u32 {
+    llvm_workitem_id_z()
+}
+
+/// Returns the x coordinate of the workgroup index within the dispatch.
+#[inline]
+#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
+pub fn workgroup_id_x() -> u32 {
+    llvm_workgroup_id_x()
+}
+/// Returns the y coordinate of the workgroup index within the dispatch.
+#[inline]
+#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
+pub fn workgroup_id_y() -> u32 {
+    llvm_workgroup_id_y()
+}
+/// Returns the z coordinate of the workgroup index within the dispatch.
+#[inline]
+#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
+pub fn workgroup_id_z() -> u32 {
+    llvm_workgroup_id_z()
+}
+
+/// Returns the size of statically allocated shared memory for this program in bytes.
+#[inline]
+#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
+pub fn groupstaticsize() -> u32 {
+    llvm_groupstaticsize()
+}
+/// Returns the id of the dispatch that is currently executed.
+#[inline]
+#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
+pub fn dispatch_id() -> u64 {
+    llvm_dispatch_id()
+}
+
+/// Returns the number of threads in a wavefront.
+///
+/// Is always a power of 2.
+#[inline]
+#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
+pub fn wavefrontsize() -> u32 {
+    llvm_wavefrontsize()
+}
+
+/// Synchronize all wavefronts in a workgroup.
+///
+/// Each wavefronts in a workgroup waits at the barrier until all wavefronts in the workgroup arrive at a barrier.
+///
+#[doc = include_str!("intrinsic_is_convergent.md")]
+#[inline]
+#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
+pub fn s_barrier() {
+    llvm_s_barrier()
+}
+
+/// Signal a specific barrier type.
+///
+/// Only for non-named barriers.
+///
+#[doc = include_str!("intrinsic_is_convergent.md")]
+#[inline]
+#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
+pub unsafe fn s_barrier_signal<const BARRIER_TYPE: i32>() {
+    unsafe { llvm_s_barrier_signal(BARRIER_TYPE) }
+}
+
+/// Signal a specific barrier type.
+///
+/// Only for non-named barriers.
+/// Provides access to the s_barrier_signal_first instruction;
+/// additionally ensures that the result value is valid even when
+/// the intrinsic is used from a wavefront that is not running in a workgroup.
+///
+#[doc = include_str!("intrinsic_is_convergent.md")]
+#[inline]
+#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
+pub unsafe fn s_barrier_signal_isfirst<const BARRIER_TYPE: i32>() -> bool {
+    unsafe { llvm_s_barrier_signal_isfirst(BARRIER_TYPE) }
+}
+
+/// Wait for a specific barrier type.
+///
+/// Only for non-named barriers.
+///
+#[doc = include_str!("intrinsic_is_convergent.md")]
+#[inline]
+#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
+pub unsafe fn s_barrier_wait<const BARRIER_TYPE: i16>() {
+    unsafe { llvm_s_barrier_wait(BARRIER_TYPE) }
+}
+
+/// Get the state of a specific barrier type.
+///
+/// The `barrier_type` argument must be uniform, otherwise behavior is undefined.
+///
+#[doc = include_str!("intrinsic_is_convergent.md")]
+#[inline]
+#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
+pub unsafe fn s_get_barrier_state<const BARRIER_TYPE: i32>() -> u32 {
+    unsafe { llvm_s_get_barrier_state(BARRIER_TYPE) }
+}
+
+/// A barrier for only the threads within the current wavefront.
+///
+/// Does not result in an instruction but restricts the compiler.
+///
+#[doc = include_str!("intrinsic_is_convergent.md")]
+#[inline]
+#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
+pub fn wave_barrier() {
+    llvm_wave_barrier()
+}
+
+/// Prevent movement of some instruction types.
+///
+/// Controls the types of instructions that may be allowed to cross the intrinsic during instruction scheduling.
+/// The parameter is a mask for the instruction types that can cross the intrinsic.
+///
+/// - 0x0000: No instructions may be scheduled across `sched_barrier`.
+/// - 0x0001: All, non-memory, non-side-effect producing instructions may be scheduled across `sched_barrier`, i.e. allow ALU instructions to pass.
+/// - 0x0002: VALU instructions may be scheduled across `sched_barrier`.
+/// - 0x0004: SALU instructions may be scheduled across `sched_barrier`.
+/// - 0x0008: MFMA/WMMA instructions may be scheduled across `sched_barrier`.
+/// - 0x0010: All VMEM instructions may be scheduled across `sched_barrier`.
+/// - 0x0020: VMEM read instructions may be scheduled across `sched_barrier`.
+/// - 0x0040: VMEM write instructions may be scheduled across `sched_barrier`.
+/// - 0x0080: All DS instructions may be scheduled across `sched_barrier`.
+/// - 0x0100: All DS read instructions may be scheduled across `sched_barrier`.
+/// - 0x0200: All DS write instructions may be scheduled across `sched_barrier`.
+/// - 0x0400: All Transcendental (e.g. V_EXP) instructions may be scheduled across `sched_barrier`.
+///
+#[doc = include_str!("intrinsic_is_convergent.md")]
+#[inline]
+#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
+pub unsafe fn sched_barrier<const MASK: u32>() {
+    static_assert_uimm_bits!(MASK, 11);
+    unsafe { llvm_sched_barrier(MASK) }
+}
+
+/// Creates schedule groups with specific properties to create custom scheduling pipelines.
+///
+/// The ordering between groups is enforced by the instruction scheduler.
+/// The intrinsic applies to the code that precedes the intrinsic.
+/// The intrinsic takes three values that control the behavior of the schedule groups.
+///
+/// - `mask`: Classify instruction groups using the [`sched_barrier`] mask values.
+/// - `size`: The number of instructions that are in the group.
+/// - `sync_id`: Order is enforced between groups with matching values.
+///
+/// The mask can include multiple instruction types. It is undefined behavior to set values beyond the range of valid masks.
+///
+/// Combining multiple `sched_group_barrier` intrinsics enables an ordering of specific instruction types during instruction scheduling.
+/// For example, the following enforces a sequence of 1 VMEM read, followed by 1 VALU instruction, followed by 5 MFMA instructions.
+///
+/// ```rust
+/// // 1 VMEM read
+/// sched_group_barrier::<32, 1, 0>()
+/// // 1 VALU
+/// sched_group_barrier::<2, 1, 0>()
+/// // 5 MFMA
+/// sched_group_barrier::<8, 5, 0>()
+/// ```
+///
+#[doc = include_str!("intrinsic_is_convergent.md")]
+#[inline]
+#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
+pub unsafe fn sched_group_barrier<const MASK: u32, const SIZE: u32, const SYNC_ID: u32>() {
+    static_assert_uimm_bits!(MASK, 11);
+    unsafe { llvm_sched_group_barrier(MASK, SIZE, SYNC_ID) }
+}
+
+/// Sleeps for approximately `COUNT * 64` cycles.
+///
+/// `COUNT` must be a constant.
+/// Only the lower 7 bits of `COUNT` are used.
+/// If `COUNT == 0x8000`, sleep forever until woken up, or killed.
+#[inline]
+#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
+pub fn s_sleep<const COUNT: u32>() {
+    llvm_s_sleep(COUNT)
+}
+
+/// Stop execution of the kernel.
+///
+/// This usually signals an error state.
+///
+#[doc = include_str!("intrinsic_is_convergent.md")]
+#[inline]
+#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
+pub fn s_sethalt<const VALUE: u32>() -> ! {
+    static_assert_uimm_bits!(VALUE, 3);
+    llvm_s_sethalt(VALUE)
+}
+
+/// Returns the current process counter.
+///
+/// Provides access to the s_getpc_b64 instruction, but with the return value sign-extended
+/// from the width of the underlying PC hardware register even on processors where the
+/// s_getpc_b64 instruction returns a zero-extended value.
+#[inline]
+#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
+pub fn s_getpc() -> i64 {
+    llvm_s_getpc()
+}
+
+/// Masked bit count, low 32 lanes.
+///
+/// Computes the number of bits set in `value`, masked with a thread mask
+/// which contains 1 for all active threads less than the current thread within a wavefront.
+/// `init` is added to the result.
+#[inline]
+#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
+pub fn mbcnt_lo(value: u32, init: u32) -> u32 {
+    llvm_mbcnt_lo(value, init)
+}
+/// Masked bit count, high 32 lanes.
+///
+/// Computes the number of bits set in `value`, masked with a thread mask
+/// which contains 1 for all active threads less than the current thread within a wavefront.
+/// `init` is added to the result.
+#[inline]
+#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
+pub fn mbcnt_hi(value: u32, init: u32) -> u32 {
+    llvm_mbcnt_hi(value, init)
+}
+
+/// Returns a bitfield (`u32` or `u64`) containing the result of its i1 argument
+/// in all active lanes, and zero in all inactive lanes.
+///
+#[doc = include_str!("intrinsic_is_convergent.md")]
+#[inline]
+#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
+pub fn ballot(b: bool) -> u64 {
+    llvm_ballot(b)
+}
+
+/// Indexes into the `value` with the current lane id and returns for each lane
+/// if the corresponding bit is set.
+///
+/// While [`ballot`] converts a `bool` to a mask, `inverse_ballot` converts a mask back to a `bool`.
+/// This means `inverse_ballot(ballot(b)) == b`.
+/// The inverse of `ballot(inverse_ballot(value)) ~= value` is not always true as inactive lanes are set to zero by `ballot`.
+///
+#[doc = include_str!("intrinsic_is_convergent.md")]
+#[inline]
+#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
+pub fn inverse_ballot(value: u64) -> bool {
+    llvm_inverse_ballot(value)
+}
+
+/// Performs an arithmetic min reduction on the unsigned values provided by each lane in the wavefront.
+///
+/// The `STRATEGY` argument is a hint for the reduction strategy.
+/// - 0: Target default preference
+/// - 1: Iterative strategy
+/// - 2: DPP
+///
+/// If target does not support the DPP operations (e.g. gfx6/7), reduction will be performed using default iterative strategy.
+///
+#[doc = include_str!("intrinsic_is_convergent.md")]
+#[inline]
+#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
+pub fn wave_reduce_umin<const STRATEGY: u32>(value: u32) -> u32 {
+    static_assert!(STRATEGY <= 2);
+    llvm_wave_reduce_umin(value, STRATEGY)
+}
+/// Performs an arithmetic min reduction on the signed values provided by each lane in the wavefront.
+///
+/// The `STRATEGY` argument is a hint for the reduction strategy.
+/// - 0: Target default preference
+/// - 1: Iterative strategy
+/// - 2: DPP
+///
+/// If target does not support the DPP operations (e.g. gfx6/7), reduction will be performed using default iterative strategy.
+///
+#[doc = include_str!("intrinsic_is_convergent.md")]
+#[inline]
+#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
+pub fn wave_reduce_min<const STRATEGY: u32>(value: i32) -> i32 {
+    static_assert!(STRATEGY <= 2);
+    llvm_wave_reduce_min(value, STRATEGY)
+}
+
+/// Performs an arithmetic max reduction on the unsigned values provided by each lane in the wavefront.
+///
+/// The `STRATEGY` argument is a hint for the reduction strategy.
+/// - 0: Target default preference
+/// - 1: Iterative strategy
+/// - 2: DPP
+///
+/// If target does not support the DPP operations (e.g. gfx6/7), reduction will be performed using default iterative strategy.
+///
+#[doc = include_str!("intrinsic_is_convergent.md")]
+#[inline]
+#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
+pub fn wave_reduce_umax<const STRATEGY: u32>(value: u32) -> u32 {
+    static_assert!(STRATEGY <= 2);
+    llvm_wave_reduce_umax(value, STRATEGY)
+}
+/// Performs an arithmetic max reduction on the signed values provided by each lane in the wavefront.
+///
+/// The `STRATEGY` argument is a hint for the reduction strategy.
+/// - 0: Target default preference
+/// - 1: Iterative strategy
+/// - 2: DPP
+///
+/// If target does not support the DPP operations (e.g. gfx6/7), reduction will be performed using default iterative strategy.
+///
+#[doc = include_str!("intrinsic_is_convergent.md")]
+#[inline]
+#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
+pub fn wave_reduce_max<const STRATEGY: u32>(value: i32) -> i32 {
+    static_assert!(STRATEGY <= 2);
+    llvm_wave_reduce_max(value, STRATEGY)
+}
+
+/// Performs an arithmetic add reduction on the values provided by each lane in the wavefront.
+///
+/// The `STRATEGY` argument is a hint for the reduction strategy.
+/// - 0: Target default preference
+/// - 1: Iterative strategy
+/// - 2: DPP
+///
+/// If target does not support the DPP operations (e.g. gfx6/7), reduction will be performed using default iterative strategy.
+///
+#[doc = include_str!("intrinsic_is_convergent.md")]
+#[inline]
+#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
+pub fn wave_reduce_add<const STRATEGY: u32>(value: u32) -> u32 {
+    static_assert!(STRATEGY <= 2);
+    llvm_wave_reduce_add(value, STRATEGY)
+}
+
+/// Performs a logical and reduction on the unsigned values provided by each lane in the wavefront.
+///
+/// The `STRATEGY` argument is a hint for the reduction strategy.
+/// - 0: Target default preference
+/// - 1: Iterative strategy
+/// - 2: DPP
+///
+/// If target does not support the DPP operations (e.g. gfx6/7), reduction will be performed using default iterative strategy.
+///
+#[doc = include_str!("intrinsic_is_convergent.md")]
+#[inline]
+#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
+pub fn wave_reduce_and<const STRATEGY: u32>(value: u32) -> u32 {
+    static_assert!(STRATEGY <= 2);
+    llvm_wave_reduce_and(value, STRATEGY)
+}
+/// Performs a logical or reduction on the unsigned values provided by each lane in the wavefront.
+///
+/// The `STRATEGY` argument is a hint for the reduction strategy.
+/// - 0: Target default preference
+/// - 1: Iterative strategy
+/// - 2: DPP
+///
+/// If target does not support the DPP operations (e.g. gfx6/7), reduction will be performed using default iterative strategy.
+///
+#[doc = include_str!("intrinsic_is_convergent.md")]
+#[inline]
+#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
+pub fn wave_reduce_or<const STRATEGY: u32>(value: u32) -> u32 {
+    static_assert!(STRATEGY <= 2);
+    llvm_wave_reduce_or(value, STRATEGY)
+}
+/// Performs a logical xor reduction on the unsigned values provided by each lane in the wavefront.
+///
+/// The `STRATEGY` argument is a hint for the reduction strategy.
+/// - 0: Target default preference
+/// - 1: Iterative strategy
+/// - 2: DPP
+///
+/// If target does not support the DPP operations (e.g. gfx6/7), reduction will be performed using default iterative strategy.
+///
+#[doc = include_str!("intrinsic_is_convergent.md")]
+#[inline]
+#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
+pub fn wave_reduce_xor<const STRATEGY: u32>(value: u32) -> u32 {
+    static_assert!(STRATEGY <= 2);
+    llvm_wave_reduce_xor(value, STRATEGY)
+}
+
+// The following intrinsics can have multiple sizes
+
+/// Get `value` from the first active lane in the wavefront.
+///
+#[doc = include_str!("intrinsic_is_convergent.md")]
+#[inline]
+#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
+pub fn readfirstlane_u32(value: u32) -> u32 {
+    llvm_readfirstlane_u32(value)
+}
+/// Get `value` from the first active lane in the wavefront.
+///
+#[doc = include_str!("intrinsic_is_convergent.md")]
+#[inline]
+#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
+pub fn readfirstlane_u64(value: u64) -> u64 {
+    llvm_readfirstlane_u64(value)
+}
+/// Get `value` from the lane at index `lane` in the wavefront.
+///
+/// The lane argument must be uniform across the currently active threads
+/// of the current wavefront. Otherwise, the result is undefined.
+///
+#[doc = include_str!("intrinsic_is_convergent.md")]
+#[inline]
+#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
+pub unsafe fn readlane_u32(value: u32, lane: u32) -> u32 {
+    unsafe { llvm_readlane_u32(value, lane) }
+}
+/// Get `value` from the lane at index `lane` in the wavefront.
+///
+/// The lane argument must be uniform across the currently active threads
+/// of the current wavefront. Otherwise, the result is undefined.
+///
+#[doc = include_str!("intrinsic_is_convergent.md")]
+#[inline]
+#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
+pub unsafe fn readlane_u64(value: u64, lane: u32) -> u64 {
+    unsafe { llvm_readlane_u64(value, lane) }
+}
+/// Return `value` for the lane at index `lane` in the wavefront.
+/// Return `default` for all other lanes.
+///
+/// The value to write and lane select arguments must be uniform across the
+/// currently active threads of the current wavefront. Otherwise, the result is
+/// undefined.
+///
+/// `value` is the value returned by `lane`.
+/// `default` is the value returned by all lanes other than `lane`.
+///
+#[doc = include_str!("intrinsic_is_convergent.md")]
+#[inline]
+#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
+pub unsafe fn writelane_u32(value: u32, lane: u32, default: u32) -> u32 {
+    unsafe { llvm_writelane_u32(value, lane, default) }
+}
+/// Return `value` for the lane at index `lane` in the wavefront.
+/// Return `default` for all other lanes.
+///
+/// The value to write and lane select arguments must be uniform across the
+/// currently active threads of the current wavefront. Otherwise, the result is
+/// undefined.
+///
+/// `value` is the value returned by `lane`.
+/// `default` is the value returned by all lanes other than `lane`.
+///
+#[doc = include_str!("intrinsic_is_convergent.md")]
+#[inline]
+#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
+pub unsafe fn writelane_u64(value: u64, lane: u32, default: u64) -> u64 {
+    unsafe { llvm_writelane_u64(value, lane, default) }
+}
+
+/// Stop execution of the wavefront.
+///
+/// This usually signals the end of a successful execution.
+///
+#[doc = include_str!("intrinsic_is_convergent.md")]
+#[inline]
+#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
+pub fn endpgm() -> ! {
+    llvm_endpgm()
+}
+
+/// The `update_dpp` intrinsic represents the `update.dpp` operation in AMDGPU.
+/// It takes an old value, a source operand, a DPP control operand, a row mask, a bank mask, and a bound control.
+/// This operation is equivalent to a sequence of `v_mov_b32` operations.
+///
+/// `llvm.amdgcn.update.dpp.i32 <old> <src> <dpp_ctrl> <row_mask> <bank_mask> <bound_ctrl>`
+/// Should be equivalent to:
+/// ```asm
+/// v_mov_b32 <dest> <old>
+/// v_mov_b32 <dest> <src> <dpp_ctrl> <row_mask> <bank_mask> <bound_ctrl>
+/// ```
+///
+#[doc = include_str!("intrinsic_is_convergent.md")]
+#[inline]
+#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
+pub unsafe fn update_dpp<
+    const DPP_CTRL: u32,
+    const ROW_MASK: u32,
+    const BANK_MASK: u32,
+    const BOUND_CONTROL: bool,
+>(
+    old: u32,
+    src: u32,
+) -> u32 {
+    unsafe { llvm_update_dpp(old, src, DPP_CTRL, ROW_MASK, BANK_MASK, BOUND_CONTROL) }
+}
+
+/// Measures time based on a fixed frequency.
+///
+/// Provides a real-time clock counter that runs at constant speed (typically 100 MHz) independent of ALU clock speeds.
+/// The clock is consistent across the chip, so can be used for measuring between different wavefronts.
+#[inline]
+#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
+pub fn s_memrealtime() -> u64 {
+    llvm_s_memrealtime()
+}
+
+/// Scatter data across all lanes in a wavefront.
+///
+/// Writes `value` to the lane `lane`.
+///
+/// Reading from inactive lanes returns `0`.
+/// In case multiple values get written to the same `lane`, the value from the source lane with the higher index is taken.
+///
+#[doc = include_str!("intrinsic_is_convergent.md")]
+#[inline]
+#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
+pub unsafe fn ds_permute(lane: u32, value: u32) -> u32 {
+    unsafe { llvm_ds_permute(lane, value) }
+}
+/// Gather data across all lanes in a wavefront.
+///
+/// Returns the `value` given to `ds_permute` by lane `lane`.
+///
+/// Reading from inactive lanes returns `0`.
+///
+#[doc = include_str!("intrinsic_is_convergent.md")]
+#[inline]
+#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
+pub unsafe fn ds_bpermute(lane: u32, value: u32) -> u32 {
+    unsafe { llvm_ds_bpermute(lane, value) }
+}
+/// Permute a 64-bit value.
+///
+/// `selector` selects between different patterns in which the 64-bit values represented by `src0` and `src1` are permuted.
+#[inline]
+#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
+pub unsafe fn perm(src0: u32, src1: u32, selector: u32) -> u32 {
+    unsafe { llvm_perm(src0, src1, selector) }
+}
+
+// gfx10
+/// Performs arbitrary gather-style operation within a row (16 contiguous lanes) of the second input operand.
+///
+/// The third and fourth inputs must be uniform across the current wavefront.
+/// These are combined into a single 64-bit value representing lane selects used to swizzle within each row.
+///
+#[doc = include_str!("intrinsic_is_convergent.md")]
+#[inline]
+#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
+pub unsafe fn permlane16_u32<const FI: bool, const BOUND_CONTROL: bool>(
+    old: u32,
+    src0: u32,
+    src1: u32,
+    src2: u32,
+) -> u32 {
+    unsafe { llvm_permlane16_u32(old, src0, src1, src2, FI, BOUND_CONTROL) }
+}
+
+// gfx10
+/// Performs arbitrary gather-style operation across two rows (16 contiguous lanes) of the second input operand.
+///
+/// The third and fourth inputs must be uniform across the current wavefront.
+/// These are combined into a single 64-bit value representing lane selects used to swizzle within each row.
+///
+#[doc = include_str!("intrinsic_is_convergent.md")]
+#[inline]
+#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
+pub unsafe fn permlanex16_u32<const FI: bool, const BOUND_CONTROL: bool>(
+    old: u32,
+    src0: u32,
+    src1: u32,
+    src2: u32,
+) -> u32 {
+    unsafe { llvm_permlanex16_u32(old, src0, src1, src2, FI, BOUND_CONTROL) }
+}
+
+/// Get the index of the current wavefront in the workgroup.
+#[inline]
+#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
+pub fn s_get_waveid_in_workgroup() -> u32 {
+    llvm_s_get_waveid_in_workgroup()
+}
+
+// gfx11
+/// Swap `value` between upper and lower 32 lanes in a wavefront.
+///
+/// Does nothing for wave32.
+///
+#[doc = include_str!("intrinsic_is_convergent.md")]
+#[inline]
+#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
+pub unsafe fn permlane64_u32(value: u32) -> u32 {
+    unsafe { llvm_permlane64_u32(value) }
+}
+
+// gfx12
+/// Performs arbitrary gather-style operation within a row (16 contiguous lanes) of the second input operand.
+///
+/// In contrast to [`permlane16_u32`], allows each lane to specify its own gather lane.
+///
+#[doc = include_str!("intrinsic_is_convergent.md")]
+#[inline]
+#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
+pub unsafe fn permlane16_var<const FI: bool, const BOUND_CONTROL: bool>(
+    old: u32,
+    src0: u32,
+    src1: u32,
+) -> u32 {
+    unsafe { llvm_permlane16_var(old, src0, src1, FI, BOUND_CONTROL) }
+}
+
+// gfx12
+/// Performs arbitrary gather-style operation across two rows (16 contiguous lanes) of the second input operand.
+///
+/// In contrast to [`permlanex16_u32`], allows each lane to specify its own gather lane.
+///
+#[doc = include_str!("intrinsic_is_convergent.md")]
+#[inline]
+#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
+pub unsafe fn permlanex16_var<const FI: bool, const BOUND_CONTROL: bool>(
+    old: u32,
+    src0: u32,
+    src1: u32,
+) -> u32 {
+    unsafe { llvm_permlanex16_var(old, src0, src1, FI, BOUND_CONTROL) }
+}
+
+/// Get the index of the current wavefront in the workgroup.
+#[inline]
+#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
+pub fn wave_id() -> u32 {
+    llvm_wave_id()
+}
+
+// gfx950
+/// Provide direct access to `v_permlane16_swap_b32` instruction on supported targets.
+///
+/// Swaps the values across lanes of first 2 operands.
+/// Odd rows of the first operand are swapped with even rows of the second operand (one row is 16 lanes).
+/// Returns a pair for the swapped registers.
+/// The first element of the return corresponds to the swapped element of the first argument.
+///
+#[doc = include_str!("intrinsic_is_convergent.md")]
+#[inline]
+#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
+pub unsafe fn permlane16_swap<const FI: bool, const BOUND_CONTROL: bool>(
+    vdst_old: u32,
+    vsrc_src0: u32,
+) -> (u32, u32) {
+    unsafe { llvm_permlane16_swap(vdst_old, vsrc_src0, FI, BOUND_CONTROL) }
+}
+
+// gfx950
+/// Provide direct access to `v_permlane32_swap_b32` instruction on supported targets.
+///
+/// Swaps the values across lanes of first 2 operands.
+/// Rows 2 and 3 of the first operand are swapped with rows 0 and 1 of the second operand (one row is 16 lanes).
+/// Returns a pair for the swapped registers.
+/// The first element of the return corresponds to the swapped element of the first argument.
+///
+#[doc = include_str!("intrinsic_is_convergent.md")]
+#[inline]
+#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
+pub unsafe fn permlane32_swap<const FI: bool, const BOUND_CONTROL: bool>(
+    vdst_old: u32,
+    vsrc_src0: u32,
+) -> (u32, u32) {
+    unsafe { llvm_permlane32_swap(vdst_old, vsrc_src0, FI, BOUND_CONTROL) }
+}
+
+// Functions to generate code, used to check that the intrinsics build.
+// Marked as no_mangle, so the compiler does not remove the functions.
+// To test, uncomment the `#[cfg(test)]` line below and run
+// NORUN=1 NOSTD=1 TARGET=amdgcn-amd-amdhsa CARGO_UNSTABLE_BUILD_STD=core ci/run.sh
+//
+// Note that depending on the target-cpu set in run.sh, some of these intrinsics are not available
+// and compilation fails with `Cannot select: intrinsic %llvm.amdgcn...`.
+// Uncomment these intrinsics to check.
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[unsafe(no_mangle)]
+    fn test_workitem_id_x() -> u32 {
+        workitem_id_x()
+    }
+    #[unsafe(no_mangle)]
+    fn test_workitem_id_y() -> u32 {
+        workitem_id_y()
+    }
+    #[unsafe(no_mangle)]
+    fn test_workitem_id_z() -> u32 {
+        workitem_id_z()
+    }
+
+    #[unsafe(no_mangle)]
+    fn test_workgroup_id_x() -> u32 {
+        workgroup_id_x()
+    }
+    #[unsafe(no_mangle)]
+    fn test_workgroup_id_y() -> u32 {
+        workgroup_id_y()
+    }
+    #[unsafe(no_mangle)]
+    fn test_workgroup_id_z() -> u32 {
+        workgroup_id_z()
+    }
+
+    #[unsafe(no_mangle)]
+    fn test_groupstaticsize() -> u32 {
+        groupstaticsize()
+    }
+    #[unsafe(no_mangle)]
+    fn test_dispatch_id() -> u64 {
+        dispatch_id()
+    }
+
+    #[unsafe(no_mangle)]
+    fn test_wavefrontsize() -> u32 {
+        wavefrontsize()
+    }
+
+    #[unsafe(no_mangle)]
+    fn test_s_barrier() {
+        s_barrier()
+    }
+
+    #[unsafe(no_mangle)]
+    fn test_s_barrier_signal() {
+        unsafe { s_barrier_signal::<-1>() }
+    }
+
+    #[unsafe(no_mangle)]
+    fn test_s_barrier_signal_isfirst() -> bool {
+        unsafe { s_barrier_signal_isfirst::<-1>() }
+    }
+
+    #[unsafe(no_mangle)]
+    fn test_s_barrier_wait() {
+        unsafe { s_barrier_wait::<-1>() }
+    }
+
+    #[unsafe(no_mangle)]
+    fn test_s_get_barrier_state() -> u32 {
+        unsafe { s_get_barrier_state::<-1>() }
+    }
+
+    #[unsafe(no_mangle)]
+    fn test_wave_barrier() {
+        wave_barrier()
+    }
+
+    #[unsafe(no_mangle)]
+    fn test_sched_barrier() {
+        unsafe { sched_barrier::<1>() }
+    }
+
+    #[unsafe(no_mangle)]
+    fn test_sched_group_barrier() {
+        unsafe { sched_group_barrier::<1, 1, 0>() }
+    }
+
+    #[unsafe(no_mangle)]
+    fn test_s_sleep() {
+        s_sleep::<1>()
+    }
+
+    #[unsafe(no_mangle)]
+    fn test_s_sethalt() -> ! {
+        s_sethalt::<1>()
+    }
+
+    #[unsafe(no_mangle)]
+    fn test_s_getpc() -> i64 {
+        s_getpc()
+    }
+
+    #[unsafe(no_mangle)]
+    fn test_mbcnt_lo(value: u32, init: u32) -> u32 {
+        mbcnt_lo(value, init)
+    }
+    #[unsafe(no_mangle)]
+    fn test_mbcnt_hi(value: u32, init: u32) -> u32 {
+        mbcnt_hi(value, init)
+    }
+
+    #[unsafe(no_mangle)]
+    fn test_ballot(b: bool) -> u64 {
+        ballot(b)
+    }
+
+    #[unsafe(no_mangle)]
+    fn test_inverse_ballot(value: u64) -> bool {
+        inverse_ballot(value)
+    }
+
+    #[unsafe(no_mangle)]
+    fn test_wave_reduce_umin(value: u32) -> u32 {
+        wave_reduce_umin::<0>(value)
+    }
+    #[unsafe(no_mangle)]
+    fn test_wave_reduce_min(value: i32) -> i32 {
+        wave_reduce_min::<0>(value)
+    }
+
+    #[unsafe(no_mangle)]
+    fn test_wave_reduce_umax(value: u32) -> u32 {
+        wave_reduce_umax::<0>(value)
+    }
+    #[unsafe(no_mangle)]
+    fn test_wave_reduce_max(value: i32) -> i32 {
+        wave_reduce_max::<0>(value)
+    }
+
+    #[unsafe(no_mangle)]
+    fn test_wave_reduce_add(value: u32) -> u32 {
+        wave_reduce_add::<0>(value)
+    }
+
+    #[unsafe(no_mangle)]
+    fn test_wave_reduce_and(value: u32) -> u32 {
+        wave_reduce_and::<0>(value)
+    }
+    #[unsafe(no_mangle)]
+    fn test_wave_reduce_or(value: u32) -> u32 {
+        wave_reduce_or::<0>(value)
+    }
+    #[unsafe(no_mangle)]
+    fn test_wave_reduce_xor(value: u32) -> u32 {
+        wave_reduce_xor::<0>(value)
+    }
+
+    #[unsafe(no_mangle)]
+    fn test_readfirstlane_u32(value: u32) -> u32 {
+        readfirstlane_u32(value)
+    }
+    #[unsafe(no_mangle)]
+    fn test_readfirstlane_u64(value: u64) -> u64 {
+        readfirstlane_u64(value)
+    }
+    #[unsafe(no_mangle)]
+    fn test_readlane_u32(value: u32, lane: u32) -> u32 {
+        unsafe { readlane_u32(value, lane) }
+    }
+    #[unsafe(no_mangle)]
+    fn test_readlane_u64(value: u64, lane: u32) -> u64 {
+        unsafe { readlane_u64(value, lane) }
+    }
+    #[unsafe(no_mangle)]
+    fn test_writelane_u32(value: u32, lane: u32, default: u32) -> u32 {
+        unsafe { writelane_u32(value, lane, default) }
+    }
+    #[unsafe(no_mangle)]
+    fn test_writelane_u64(value: u64, lane: u32, default: u64) -> u64 {
+        unsafe { writelane_u64(value, lane, default) }
+    }
+
+    #[unsafe(no_mangle)]
+    fn test_endpgm() -> ! {
+        endpgm()
+    }
+
+    #[unsafe(no_mangle)]
+    fn test_update_dpp(old: u32, src: u32) -> u32 {
+        unsafe { update_dpp::<0, 0, 0, true>(old, src) }
+    }
+
+    #[unsafe(no_mangle)]
+    fn test_s_memrealtime() -> u64 {
+        s_memrealtime()
+    }
+
+    #[unsafe(no_mangle)]
+    fn test_ds_permute(lane: u32, value: u32) -> u32 {
+        unsafe { ds_permute(lane, value) }
+    }
+    #[unsafe(no_mangle)]
+    fn test_ds_bpermute(lane: u32, value: u32) -> u32 {
+        unsafe { ds_bpermute(lane, value) }
+    }
+    #[unsafe(no_mangle)]
+    fn test_perm(src0: u32, src1: u32, selector: u32) -> u32 {
+        unsafe { perm(src0, src1, selector) }
+    }
+
+    #[unsafe(no_mangle)]
+    fn test_permlane16_u32(old: u32, src0: u32, src1: u32, src2: u32) -> u32 {
+        unsafe { permlane16_u32::<false, true>(old, src0, src1, src2) }
+    }
+
+    #[unsafe(no_mangle)]
+    fn test_permlanex16_u32(old: u32, src0: u32, src1: u32, src2: u32) -> u32 {
+        unsafe { permlanex16_u32::<false, true>(old, src0, src1, src2) }
+    }
+
+    #[unsafe(no_mangle)]
+    fn test_s_get_waveid_in_workgroup() -> u32 {
+        s_get_waveid_in_workgroup()
+    }
+
+    #[unsafe(no_mangle)]
+    fn test_permlane64_u32(value: u32) -> u32 {
+        unsafe { permlane64_u32(value) }
+    }
+
+    #[unsafe(no_mangle)]
+    fn test_permlane16_var(old: u32, src0: u32, src1: u32) -> u32 {
+        unsafe { permlane16_var::<false, true>(old, src0, src1) }
+    }
+
+    #[unsafe(no_mangle)]
+    fn test_permlanex16_var(old: u32, src0: u32, src1: u32) -> u32 {
+        unsafe { permlanex16_var::<false, true>(old, src0, src1) }
+    }
+
+    #[unsafe(no_mangle)]
+    fn test_wave_id() -> u32 {
+        wave_id()
+    }
+
+    #[unsafe(no_mangle)]
+    fn test_permlane16_swap(vdst_old: u32, vsrc_src0: u32) -> (u32, u32) {
+        unsafe { permlane16_swap::<false, true>(vdst_old, vsrc_src0) }
+    }
+
+    #[unsafe(no_mangle)]
+    fn test_permlane32_swap(vdst_old: u32, vsrc_src0: u32) -> (u32, u32) {
+        unsafe { permlane32_swap::<false, true>(vdst_old, vsrc_src0) }
+    }
+}
diff --git a/crates/core_arch/src/arm_shared/hints.rs b/crates/core_arch/src/arm_shared/hints.rs
index 54fd78270a..8a25cc1163 100644
--- a/crates/core_arch/src/arm_shared/hints.rs
+++ b/crates/core_arch/src/arm_shared/hints.rs
@@ -83,8 +83,11 @@ pub unsafe fn __sevl() {
 /// improve overall system performance.
 // Section 10.1 of ACLE says that the supported arches are: 8, 6K, 6-M
 // LLVM says "instruction requires: armv6k"
+// On ARMv6 in Thumb mode, T2 is required (see Arm DDI0406C Section A8.8.427)
 #[cfg(any(
-    target_feature = "v6",
+    all(target_feature = "v6k", not(target_feature = "thumb-mode")),
+    target_feature = "v6t2",
+    all(target_feature = "v6", target_feature = "mclass"),
     target_arch = "aarch64",
     target_arch = "arm64ec",
     doc
diff --git a/crates/core_arch/src/arm_shared/neon/generated.rs b/crates/core_arch/src/arm_shared/neon/generated.rs
index b5ba792b18..c5bd5c8917 100644
--- a/crates/core_arch/src/arm_shared/neon/generated.rs
+++ b/crates/core_arch/src/arm_shared/neon/generated.rs
@@ -819,7 +819,14 @@ pub fn vabaq_u8(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_t {
     assert_instr(fabd)
 )]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vabd_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
     unsafe extern "unadjusted" {
@@ -842,7 +849,14 @@ pub fn vabd_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
     assert_instr(fabd)
 )]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vabdq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
     unsafe extern "unadjusted" {
@@ -1406,7 +1420,14 @@ pub fn vabdl_u32(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t {
     assert_instr(fabs)
 )]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vabs_f16(a: float16x4_t) -> float16x4_t {
     unsafe { simd_fabs(a) }
@@ -1421,7 +1442,14 @@ pub fn vabs_f16(a: float16x4_t) -> float16x4_t {
     assert_instr(fabs)
 )]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vabsq_f16(a: float16x8_t) -> float16x8_t {
     unsafe { simd_fabs(a) }
@@ -1631,7 +1659,7 @@ pub fn vabsq_s32(a: int32x4_t) -> int32x4_t {
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vabsh_f16(a: f16) -> f16 {
-    unsafe { simd_extract!(vabs_f16(vdup_n_f16(a)), 0) }
+    vget_lane_f16::<0>(vabs_f16(vdup_n_f16(a)))
 }
 #[doc = "Floating-point Add (vector)."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vadd_f16)"]
@@ -1643,7 +1671,14 @@ pub fn vabsh_f16(a: f16) -> f16 {
     assert_instr(fadd)
 )]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vadd_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
     unsafe { simd_add(a, b) }
@@ -1658,7 +1693,14 @@ pub fn vadd_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
     assert_instr(fadd)
 )]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vaddq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
     unsafe { simd_add(a, b) }
@@ -2147,7 +2189,11 @@ pub fn vaddh_f16(a: f16, b: f16) -> f16 {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddhn))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
     assert_instr(addhn2)
 )]
 #[cfg_attr(
@@ -2161,7 +2207,7 @@ pub fn vaddh_f16(a: f16, b: f16) -> f16 {
 pub fn vaddhn_high_s16(r: int8x8_t, a: int16x8_t, b: int16x8_t) -> int8x16_t {
     unsafe {
         let x = simd_cast(simd_shr(simd_add(a, b), int16x8_t::splat(8)));
-        simd_shuffle!(r, x, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
+        vcombine_s8(r, x)
     }
 }
 #[doc = "Add returning High Narrow (high half)."]
@@ -2171,7 +2217,11 @@ pub fn vaddhn_high_s16(r: int8x8_t, a: int16x8_t, b: int16x8_t) -> int8x16_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddhn))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
     assert_instr(addhn2)
 )]
 #[cfg_attr(
@@ -2185,7 +2235,7 @@ pub fn vaddhn_high_s16(r: int8x8_t, a: int16x8_t, b: int16x8_t) -> int8x16_t {
 pub fn vaddhn_high_s32(r: int16x4_t, a: int32x4_t, b: int32x4_t) -> int16x8_t {
     unsafe {
         let x = simd_cast(simd_shr(simd_add(a, b), int32x4_t::splat(16)));
-        simd_shuffle!(r, x, [0, 1, 2, 3, 4, 5, 6, 7])
+        vcombine_s16(r, x)
     }
 }
 #[doc = "Add returning High Narrow (high half)."]
@@ -2195,7 +2245,11 @@ pub fn vaddhn_high_s32(r: int16x4_t, a: int32x4_t, b: int32x4_t) -> int16x8_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddhn))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
     assert_instr(addhn2)
 )]
 #[cfg_attr(
@@ -2209,7 +2263,7 @@ pub fn vaddhn_high_s32(r: int16x4_t, a: int32x4_t, b: int32x4_t) -> int16x8_t {
 pub fn vaddhn_high_s64(r: int32x2_t, a: int64x2_t, b: int64x2_t) -> int32x4_t {
     unsafe {
         let x = simd_cast(simd_shr(simd_add(a, b), int64x2_t::splat(32)));
-        simd_shuffle!(r, x, [0, 1, 2, 3])
+        vcombine_s32(r, x)
     }
 }
 #[doc = "Add returning High Narrow (high half)."]
@@ -2219,7 +2273,11 @@ pub fn vaddhn_high_s64(r: int32x2_t, a: int64x2_t, b: int64x2_t) -> int32x4_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddhn))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
     assert_instr(addhn2)
 )]
 #[cfg_attr(
@@ -2233,7 +2291,7 @@ pub fn vaddhn_high_s64(r: int32x2_t, a: int64x2_t, b: int64x2_t) -> int32x4_t {
 pub fn vaddhn_high_u16(r: uint8x8_t, a: uint16x8_t, b: uint16x8_t) -> uint8x16_t {
     unsafe {
         let x = simd_cast(simd_shr(simd_add(a, b), uint16x8_t::splat(8)));
-        simd_shuffle!(r, x, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
+        vcombine_u8(r, x)
     }
 }
 #[doc = "Add returning High Narrow (high half)."]
@@ -2243,7 +2301,11 @@ pub fn vaddhn_high_u16(r: uint8x8_t, a: uint16x8_t, b: uint16x8_t) -> uint8x16_t
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddhn))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
     assert_instr(addhn2)
 )]
 #[cfg_attr(
@@ -2257,7 +2319,7 @@ pub fn vaddhn_high_u16(r: uint8x8_t, a: uint16x8_t, b: uint16x8_t) -> uint8x16_t
 pub fn vaddhn_high_u32(r: uint16x4_t, a: uint32x4_t, b: uint32x4_t) -> uint16x8_t {
     unsafe {
         let x = simd_cast(simd_shr(simd_add(a, b), uint32x4_t::splat(16)));
-        simd_shuffle!(r, x, [0, 1, 2, 3, 4, 5, 6, 7])
+        vcombine_u16(r, x)
     }
 }
 #[doc = "Add returning High Narrow (high half)."]
@@ -2267,7 +2329,11 @@ pub fn vaddhn_high_u32(r: uint16x4_t, a: uint32x4_t, b: uint32x4_t) -> uint16x8_
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddhn))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
     assert_instr(addhn2)
 )]
 #[cfg_attr(
@@ -2281,7 +2347,7 @@ pub fn vaddhn_high_u32(r: uint16x4_t, a: uint32x4_t, b: uint32x4_t) -> uint16x8_
 pub fn vaddhn_high_u64(r: uint32x2_t, a: uint64x2_t, b: uint64x2_t) -> uint32x4_t {
     unsafe {
         let x = simd_cast(simd_shr(simd_add(a, b), uint64x2_t::splat(32)));
-        simd_shuffle!(r, x, [0, 1, 2, 3])
+        vcombine_u32(r, x)
     }
 }
 #[doc = "Add returning High Narrow."]
@@ -2417,7 +2483,11 @@ pub fn vaddhn_u64(a: uint64x2_t, b: uint64x2_t) -> uint32x2_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddl))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
     assert_instr(saddl2)
 )]
 #[cfg_attr(
@@ -2429,9 +2499,9 @@ pub fn vaddhn_u64(a: uint64x2_t, b: uint64x2_t) -> uint32x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub fn vaddl_high_s16(a: int16x8_t, b: int16x8_t) -> int32x4_t {
+    let a: int16x4_t = vget_high_s16(a);
+    let b: int16x4_t = vget_high_s16(b);
     unsafe {
-        let a: int16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
-        let b: int16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]);
         let a: int32x4_t = simd_cast(a);
         let b: int32x4_t = simd_cast(b);
         simd_add(a, b)
@@ -2444,7 +2514,11 @@ pub fn vaddl_high_s16(a: int16x8_t, b: int16x8_t) -> int32x4_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddl))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
     assert_instr(saddl2)
 )]
 #[cfg_attr(
@@ -2456,9 +2530,9 @@ pub fn vaddl_high_s16(a: int16x8_t, b: int16x8_t) -> int32x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub fn vaddl_high_s32(a: int32x4_t, b: int32x4_t) -> int64x2_t {
+    let a: int32x2_t = vget_high_s32(a);
+    let b: int32x2_t = vget_high_s32(b);
     unsafe {
-        let a: int32x2_t = simd_shuffle!(a, a, [2, 3]);
-        let b: int32x2_t = simd_shuffle!(b, b, [2, 3]);
         let a: int64x2_t = simd_cast(a);
         let b: int64x2_t = simd_cast(b);
         simd_add(a, b)
@@ -2471,7 +2545,11 @@ pub fn vaddl_high_s32(a: int32x4_t, b: int32x4_t) -> int64x2_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddl))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
     assert_instr(saddl2)
 )]
 #[cfg_attr(
@@ -2483,9 +2561,9 @@ pub fn vaddl_high_s32(a: int32x4_t, b: int32x4_t) -> int64x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub fn vaddl_high_s8(a: int8x16_t, b: int8x16_t) -> int16x8_t {
+    let a: int8x8_t = vget_high_s8(a);
+    let b: int8x8_t = vget_high_s8(b);
     unsafe {
-        let a: int8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
-        let b: int8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
         let a: int16x8_t = simd_cast(a);
         let b: int16x8_t = simd_cast(b);
         simd_add(a, b)
@@ -2498,7 +2576,11 @@ pub fn vaddl_high_s8(a: int8x16_t, b: int8x16_t) -> int16x8_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddl))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
     assert_instr(uaddl2)
 )]
 #[cfg_attr(
@@ -2510,9 +2592,9 @@ pub fn vaddl_high_s8(a: int8x16_t, b: int8x16_t) -> int16x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub fn vaddl_high_u16(a: uint16x8_t, b: uint16x8_t) -> uint32x4_t {
+    let a: uint16x4_t = vget_high_u16(a);
+    let b: uint16x4_t = vget_high_u16(b);
     unsafe {
-        let a: uint16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
-        let b: uint16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]);
         let a: uint32x4_t = simd_cast(a);
         let b: uint32x4_t = simd_cast(b);
         simd_add(a, b)
@@ -2525,7 +2607,11 @@ pub fn vaddl_high_u16(a: uint16x8_t, b: uint16x8_t) -> uint32x4_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddl))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
     assert_instr(uaddl2)
 )]
 #[cfg_attr(
@@ -2537,9 +2623,9 @@ pub fn vaddl_high_u16(a: uint16x8_t, b: uint16x8_t) -> uint32x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub fn vaddl_high_u32(a: uint32x4_t, b: uint32x4_t) -> uint64x2_t {
+    let a: uint32x2_t = vget_high_u32(a);
+    let b: uint32x2_t = vget_high_u32(b);
     unsafe {
-        let a: uint32x2_t = simd_shuffle!(a, a, [2, 3]);
-        let b: uint32x2_t = simd_shuffle!(b, b, [2, 3]);
         let a: uint64x2_t = simd_cast(a);
         let b: uint64x2_t = simd_cast(b);
         simd_add(a, b)
@@ -2552,7 +2638,11 @@ pub fn vaddl_high_u32(a: uint32x4_t, b: uint32x4_t) -> uint64x2_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddl))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
     assert_instr(uaddl2)
 )]
 #[cfg_attr(
@@ -2564,9 +2654,9 @@ pub fn vaddl_high_u32(a: uint32x4_t, b: uint32x4_t) -> uint64x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub fn vaddl_high_u8(a: uint8x16_t, b: uint8x16_t) -> uint16x8_t {
+    let a: uint8x8_t = vget_high_u8(a);
+    let b: uint8x8_t = vget_high_u8(b);
     unsafe {
-        let a: uint8x8_t = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
-        let b: uint8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
         let a: uint16x8_t = simd_cast(a);
         let b: uint16x8_t = simd_cast(b);
         simd_add(a, b)
@@ -2750,7 +2840,11 @@ pub fn vaddq_p128(a: p128, b: p128) -> p128 {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddw))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
     assert_instr(saddw2)
 )]
 #[cfg_attr(
@@ -2762,8 +2856,8 @@ pub fn vaddq_p128(a: p128, b: p128) -> p128 {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub fn vaddw_high_s16(a: int32x4_t, b: int16x8_t) -> int32x4_t {
+    let b = vget_high_s16(b);
     unsafe {
-        let b: int16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]);
         let b: int32x4_t = simd_cast(b);
         simd_add(a, b)
     }
@@ -2775,7 +2869,11 @@ pub fn vaddw_high_s16(a: int32x4_t, b: int16x8_t) -> int32x4_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddw))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
     assert_instr(saddw2)
 )]
 #[cfg_attr(
@@ -2787,8 +2885,8 @@ pub fn vaddw_high_s16(a: int32x4_t, b: int16x8_t) -> int32x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub fn vaddw_high_s32(a: int64x2_t, b: int32x4_t) -> int64x2_t {
+    let b = vget_high_s32(b);
     unsafe {
-        let b: int32x2_t = simd_shuffle!(b, b, [2, 3]);
         let b: int64x2_t = simd_cast(b);
         simd_add(a, b)
     }
@@ -2800,7 +2898,11 @@ pub fn vaddw_high_s32(a: int64x2_t, b: int32x4_t) -> int64x2_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddw))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
     assert_instr(saddw2)
 )]
 #[cfg_attr(
@@ -2812,8 +2914,8 @@ pub fn vaddw_high_s32(a: int64x2_t, b: int32x4_t) -> int64x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub fn vaddw_high_s8(a: int16x8_t, b: int8x16_t) -> int16x8_t {
+    let b = vget_high_s8(b);
     unsafe {
-        let b: int8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
         let b: int16x8_t = simd_cast(b);
         simd_add(a, b)
     }
@@ -2825,7 +2927,11 @@ pub fn vaddw_high_s8(a: int16x8_t, b: int8x16_t) -> int16x8_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddw))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
     assert_instr(uaddw2)
 )]
 #[cfg_attr(
@@ -2837,8 +2943,8 @@ pub fn vaddw_high_s8(a: int16x8_t, b: int8x16_t) -> int16x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub fn vaddw_high_u16(a: uint32x4_t, b: uint16x8_t) -> uint32x4_t {
+    let b = vget_high_u16(b);
     unsafe {
-        let b: uint16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]);
         let b: uint32x4_t = simd_cast(b);
         simd_add(a, b)
     }
@@ -2850,7 +2956,11 @@ pub fn vaddw_high_u16(a: uint32x4_t, b: uint16x8_t) -> uint32x4_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddw))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
     assert_instr(uaddw2)
 )]
 #[cfg_attr(
@@ -2862,8 +2972,8 @@ pub fn vaddw_high_u16(a: uint32x4_t, b: uint16x8_t) -> uint32x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub fn vaddw_high_u32(a: uint64x2_t, b: uint32x4_t) -> uint64x2_t {
+    let b = vget_high_u32(b);
     unsafe {
-        let b: uint32x2_t = simd_shuffle!(b, b, [2, 3]);
         let b: uint64x2_t = simd_cast(b);
         simd_add(a, b)
     }
@@ -2875,7 +2985,11 @@ pub fn vaddw_high_u32(a: uint64x2_t, b: uint32x4_t) -> uint64x2_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddw))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
     assert_instr(uaddw2)
 )]
 #[cfg_attr(
@@ -2887,8 +3001,8 @@ pub fn vaddw_high_u32(a: uint64x2_t, b: uint32x4_t) -> uint64x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub fn vaddw_high_u8(a: uint16x8_t, b: uint8x16_t) -> uint16x8_t {
+    let b = vget_high_u8(b);
     unsafe {
-        let b: uint8x8_t = simd_shuffle!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
         let b: uint16x8_t = simd_cast(b);
         simd_add(a, b)
     }
@@ -3835,7 +3949,14 @@ pub fn vbicq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
     assert_instr(bsl)
 )]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vbsl_f16(a: uint16x4_t, b: float16x4_t, c: float16x4_t) -> float16x4_t {
     let not = int16x4_t::splat(-1);
@@ -3856,7 +3977,14 @@ pub fn vbsl_f16(a: uint16x4_t, b: float16x4_t, c: float16x4_t) -> float16x4_t {
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
     assert_instr(bsl)
 )]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vbslq_f16(a: uint16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t {
     let not = int16x8_t::splat(-1);
@@ -4471,7 +4599,14 @@ pub fn vbslq_u8(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_t {
     assert_instr(facge)
 )]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcage_f16(a: float16x4_t, b: float16x4_t) -> uint16x4_t {
     unsafe extern "unadjusted" {
@@ -4494,7 +4629,14 @@ pub fn vcage_f16(a: float16x4_t, b: float16x4_t) -> uint16x4_t {
     assert_instr(facge)
 )]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcageq_f16(a: float16x8_t, b: float16x8_t) -> uint16x8_t {
     unsafe extern "unadjusted" {
@@ -4575,7 +4717,14 @@ pub fn vcageq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t {
     assert_instr(facgt)
 )]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcagt_f16(a: float16x4_t, b: float16x4_t) -> uint16x4_t {
     unsafe extern "unadjusted" {
@@ -4598,7 +4747,14 @@ pub fn vcagt_f16(a: float16x4_t, b: float16x4_t) -> uint16x4_t {
     assert_instr(facgt)
 )]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcagtq_f16(a: float16x8_t, b: float16x8_t) -> uint16x8_t {
     unsafe extern "unadjusted" {
@@ -4679,7 +4835,14 @@ pub fn vcagtq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t {
     assert_instr(facge)
 )]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcale_f16(a: float16x4_t, b: float16x4_t) -> uint16x4_t {
     vcage_f16(b, a)
@@ -4694,7 +4857,14 @@ pub fn vcale_f16(a: float16x4_t, b: float16x4_t) -> uint16x4_t {
     assert_instr(facge)
 )]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcaleq_f16(a: float16x8_t, b: float16x8_t) -> uint16x8_t {
     vcageq_f16(b, a)
@@ -4751,7 +4921,14 @@ pub fn vcaleq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t {
     assert_instr(facgt)
 )]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcalt_f16(a: float16x4_t, b: float16x4_t) -> uint16x4_t {
     vcagt_f16(b, a)
@@ -4766,7 +4943,14 @@ pub fn vcalt_f16(a: float16x4_t, b: float16x4_t) -> uint16x4_t {
     assert_instr(facgt)
 )]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcaltq_f16(a: float16x8_t, b: float16x8_t) -> uint16x8_t {
     vcagtq_f16(b, a)
@@ -4823,7 +5007,14 @@ pub fn vcaltq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t {
     assert_instr(fcmeq)
 )]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vceq_f16(a: float16x4_t, b: float16x4_t) -> uint16x4_t {
     unsafe { simd_eq(a, b) }
@@ -4838,7 +5029,14 @@ pub fn vceq_f16(a: float16x4_t, b: float16x4_t) -> uint16x4_t {
     assert_instr(fcmeq)
 )]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vceqq_f16(a: float16x8_t, b: float16x8_t) -> uint16x8_t {
     unsafe { simd_eq(a, b) }
@@ -5189,7 +5387,14 @@ pub fn vceqq_p8(a: poly8x16_t, b: poly8x16_t) -> uint8x16_t {
     assert_instr(fcmge)
 )]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcge_f16(a: float16x4_t, b: float16x4_t) -> uint16x4_t {
     unsafe { simd_ge(a, b) }
@@ -5204,7 +5409,14 @@ pub fn vcge_f16(a: float16x4_t, b: float16x4_t) -> uint16x4_t {
     assert_instr(fcmge)
 )]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcgeq_f16(a: float16x8_t, b: float16x8_t) -> uint16x8_t {
     unsafe { simd_ge(a, b) }
@@ -5513,7 +5725,14 @@ pub fn vcgeq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
     assert_instr(fcmge)
 )]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcgez_f16(a: float16x4_t) -> uint16x4_t {
     let b: f16x4 = f16x4::new(0.0, 0.0, 0.0, 0.0);
@@ -5529,7 +5748,14 @@ pub fn vcgez_f16(a: float16x4_t) -> uint16x4_t {
     assert_instr(fcmge)
 )]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcgezq_f16(a: float16x8_t) -> uint16x8_t {
     let b: f16x8 = f16x8::new(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0);
@@ -5545,7 +5771,14 @@ pub fn vcgezq_f16(a: float16x8_t) -> uint16x8_t {
     assert_instr(fcmgt)
 )]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcgt_f16(a: float16x4_t, b: float16x4_t) -> uint16x4_t {
     unsafe { simd_gt(a, b) }
@@ -5560,7 +5793,14 @@ pub fn vcgt_f16(a: float16x4_t, b: float16x4_t) -> uint16x4_t {
     assert_instr(fcmgt)
 )]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcgtq_f16(a: float16x8_t, b: float16x8_t) -> uint16x8_t {
     unsafe { simd_gt(a, b) }
@@ -5869,7 +6109,14 @@ pub fn vcgtq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
     assert_instr(fcmgt)
 )]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcgtz_f16(a: float16x4_t) -> uint16x4_t {
     let b: f16x4 = f16x4::new(0.0, 0.0, 0.0, 0.0);
@@ -5885,7 +6132,14 @@ pub fn vcgtz_f16(a: float16x4_t) -> uint16x4_t {
     assert_instr(fcmgt)
 )]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcgtzq_f16(a: float16x8_t) -> uint16x8_t {
     let b: f16x8 = f16x8::new(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0);
@@ -5901,7 +6155,14 @@ pub fn vcgtzq_f16(a: float16x8_t) -> uint16x8_t {
     assert_instr(fcmge)
 )]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcle_f16(a: float16x4_t, b: float16x4_t) -> uint16x4_t {
     unsafe { simd_le(a, b) }
@@ -5916,7 +6177,14 @@ pub fn vcle_f16(a: float16x4_t, b: float16x4_t) -> uint16x4_t {
     assert_instr(fcmge)
 )]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcleq_f16(a: float16x8_t, b: float16x8_t) -> uint16x8_t {
     unsafe { simd_le(a, b) }
@@ -6225,7 +6493,14 @@ pub fn vcleq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
     assert_instr(fcmle)
 )]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vclez_f16(a: float16x4_t) -> uint16x4_t {
     let b: f16x4 = f16x4::new(0.0, 0.0, 0.0, 0.0);
@@ -6241,7 +6516,14 @@ pub fn vclez_f16(a: float16x4_t) -> uint16x4_t {
     assert_instr(fcmle)
 )]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vclezq_f16(a: float16x8_t) -> uint16x8_t {
     let b: f16x8 = f16x8::new(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0);
@@ -6557,7 +6839,14 @@ pub fn vclsq_u32(a: uint32x4_t) -> int32x4_t {
     assert_instr(fcmgt)
 )]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vclt_f16(a: float16x4_t, b: float16x4_t) -> uint16x4_t {
     unsafe { simd_lt(a, b) }
@@ -6572,7 +6861,14 @@ pub fn vclt_f16(a: float16x4_t, b: float16x4_t) -> uint16x4_t {
     assert_instr(fcmgt)
 )]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcltq_f16(a: float16x8_t, b: float16x8_t) -> uint16x8_t {
     unsafe { simd_lt(a, b) }
@@ -6881,7 +7177,14 @@ pub fn vcltq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
     assert_instr(fcmlt)
 )]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcltz_f16(a: float16x4_t) -> uint16x4_t {
     let b: f16x4 = f16x4::new(0.0, 0.0, 0.0, 0.0);
@@ -6897,7 +7200,14 @@ pub fn vcltz_f16(a: float16x4_t) -> uint16x4_t {
     assert_instr(fcmlt)
 )]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcltzq_f16(a: float16x8_t) -> uint16x8_t {
     let b: f16x8 = f16x8::new(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0);
@@ -7032,7 +7342,6 @@ pub fn vclzq_s32(a: int32x4_t) -> int32x4_t {
 #[doc = "Count leading zero bits"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclz_u16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i16"))]
@@ -7052,35 +7361,8 @@ pub fn vclz_u16(a: uint16x4_t) -> uint16x4_t {
     unsafe { transmute(vclz_s16(transmute(a))) }
 }
 #[doc = "Count leading zero bits"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclz_u16)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i16"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(clz)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vclz_u16(a: uint16x4_t) -> uint16x4_t {
-    let a: uint16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint16x4_t = transmute(vclz_s16(transmute(a)));
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
-}
-#[doc = "Count leading zero bits"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclzq_u16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i16"))]
@@ -7100,35 +7382,8 @@ pub fn vclzq_u16(a: uint16x8_t) -> uint16x8_t {
     unsafe { transmute(vclzq_s16(transmute(a))) }
 }
 #[doc = "Count leading zero bits"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclzq_u16)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i16"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(clz)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vclzq_u16(a: uint16x8_t) -> uint16x8_t {
-    let a: uint16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint16x8_t = transmute(vclzq_s16(transmute(a)));
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Count leading zero bits"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclz_u32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i32"))]
@@ -7148,35 +7403,8 @@ pub fn vclz_u32(a: uint32x2_t) -> uint32x2_t {
     unsafe { transmute(vclz_s32(transmute(a))) }
 }
 #[doc = "Count leading zero bits"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclz_u32)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i32"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(clz)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vclz_u32(a: uint32x2_t) -> uint32x2_t {
-    let a: uint32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe {
-        let ret_val: uint32x2_t = transmute(vclz_s32(transmute(a)));
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
-}
-#[doc = "Count leading zero bits"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclzq_u32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i32"))]
@@ -7196,35 +7424,8 @@ pub fn vclzq_u32(a: uint32x4_t) -> uint32x4_t {
     unsafe { transmute(vclzq_s32(transmute(a))) }
 }
 #[doc = "Count leading zero bits"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclzq_u32)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i32"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(clz)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vclzq_u32(a: uint32x4_t) -> uint32x4_t {
-    let a: uint32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint32x4_t = transmute(vclzq_s32(transmute(a)));
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
-}
-#[doc = "Count leading zero bits"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclz_u8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i8"))]
@@ -7244,35 +7445,8 @@ pub fn vclz_u8(a: uint8x8_t) -> uint8x8_t {
     unsafe { transmute(vclz_s8(transmute(a))) }
 }
 #[doc = "Count leading zero bits"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclz_u8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i8"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(clz)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vclz_u8(a: uint8x8_t) -> uint8x8_t {
-    let a: uint8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint8x8_t = transmute(vclz_s8(transmute(a)));
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Count leading zero bits"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclzq_u8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i8"))]
@@ -7291,37 +7465,6 @@ pub fn vclz_u8(a: uint8x8_t) -> uint8x8_t {
 pub fn vclzq_u8(a: uint8x16_t) -> uint8x16_t {
     unsafe { transmute(vclzq_s8(transmute(a))) }
 }
-#[doc = "Count leading zero bits"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vclzq_u8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i8"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(clz)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vclzq_u8(a: uint8x16_t) -> uint8x16_t {
-    let a: uint8x16_t =
-        unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint8x16_t = transmute(vclzq_s8(transmute(a)));
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
-}
 #[doc = "Population count per byte."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcnt_s8)"]
 #[inline]
@@ -7367,7 +7510,6 @@ pub fn vcntq_s8(a: int8x16_t) -> int8x16_t {
 #[doc = "Population count per byte."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcnt_u8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcnt))]
@@ -7387,35 +7529,8 @@ pub fn vcnt_u8(a: uint8x8_t) -> uint8x8_t {
     unsafe { transmute(vcnt_s8(transmute(a))) }
 }
 #[doc = "Population count per byte."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcnt_u8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcnt))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(cnt)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vcnt_u8(a: uint8x8_t) -> uint8x8_t {
-    let a: uint8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint8x8_t = transmute(vcnt_s8(transmute(a)));
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Population count per byte."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcntq_u8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcnt))]
@@ -7435,40 +7550,8 @@ pub fn vcntq_u8(a: uint8x16_t) -> uint8x16_t {
     unsafe { transmute(vcntq_s8(transmute(a))) }
 }
 #[doc = "Population count per byte."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcntq_u8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcnt))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(cnt)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vcntq_u8(a: uint8x16_t) -> uint8x16_t {
-    let a: uint8x16_t =
-        unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint8x16_t = transmute(vcntq_s8(transmute(a)));
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
-}
-#[doc = "Population count per byte."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcnt_p8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcnt))]
@@ -7488,35 +7571,8 @@ pub fn vcnt_p8(a: poly8x8_t) -> poly8x8_t {
     unsafe { transmute(vcnt_s8(transmute(a))) }
 }
 #[doc = "Population count per byte."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcnt_p8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcnt))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(cnt)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vcnt_p8(a: poly8x8_t) -> poly8x8_t {
-    let a: poly8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: poly8x8_t = transmute(vcnt_s8(transmute(a)));
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Population count per byte."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcntq_p8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcnt))]
@@ -7535,43 +7591,20 @@ pub fn vcnt_p8(a: poly8x8_t) -> poly8x8_t {
 pub fn vcntq_p8(a: poly8x16_t) -> poly8x16_t {
     unsafe { transmute(vcntq_s8(transmute(a))) }
 }
-#[doc = "Population count per byte."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcntq_p8)"]
+#[doc = "Join two smaller vectors into a single larger vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_f16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcnt))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(cnt)
-)]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vcntq_p8(a: poly8x16_t) -> poly8x16_t {
-    let a: poly8x16_t =
-        unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: poly8x16_t = transmute(vcntq_s8(transmute(a)));
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
-}
-#[doc = "Join two smaller vectors into a single larger vector"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcombine_f16)"]
-#[inline]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(nop))]
 pub fn vcombine_f16(a: float16x4_t, b: float16x4_t) -> float16x8_t {
@@ -7785,14 +7818,22 @@ pub fn vcombine_p64(a: poly64x1_t, b: poly64x1_t) -> poly64x2_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_f16)"]
 #[inline]
 #[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
     assert_instr(nop)
 )]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcreate_f16(a: u64) -> float16x4_t {
     unsafe { transmute(a) }
@@ -7801,14 +7842,22 @@ pub fn vcreate_f16(a: u64) -> float16x4_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcreate_f16)"]
 #[inline]
 #[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
     assert_instr(nop)
 )]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcreate_f16(a: u64) -> float16x4_t {
     unsafe {
@@ -8305,14 +8354,22 @@ pub fn vcreate_p64(a: u64) -> poly64x1_t {
 #[doc = "Floating-point convert to lower precision narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_f16_f32)"]
 #[inline]
+#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-# [cfg_attr (all (test , target_arch = "arm") , assert_instr (vcvt . f16 . f32))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
     assert_instr(fcvtn)
 )]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvt_f16_f32(a: float32x4_t) -> float16x4_t {
     unsafe { simd_cast(a) }
@@ -8327,7 +8384,14 @@ pub fn vcvt_f16_f32(a: float32x4_t) -> float16x4_t {
     assert_instr(scvtf)
 )]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvt_f16_s16(a: int16x4_t) -> float16x4_t {
     unsafe { simd_cast(a) }
@@ -8342,7 +8406,14 @@ pub fn vcvt_f16_s16(a: int16x4_t) -> float16x4_t {
     assert_instr(scvtf)
 )]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvtq_f16_s16(a: int16x8_t) -> float16x8_t {
     unsafe { simd_cast(a) }
@@ -8357,7 +8428,14 @@ pub fn vcvtq_f16_s16(a: int16x8_t) -> float16x8_t {
     assert_instr(ucvtf)
 )]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvt_f16_u16(a: uint16x4_t) -> float16x4_t {
     unsafe { simd_cast(a) }
@@ -8372,7 +8450,14 @@ pub fn vcvt_f16_u16(a: uint16x4_t) -> float16x4_t {
     assert_instr(ucvtf)
 )]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvtq_f16_u16(a: uint16x8_t) -> float16x8_t {
     unsafe { simd_cast(a) }
@@ -8380,14 +8465,22 @@ pub fn vcvtq_f16_u16(a: uint16x8_t) -> float16x8_t {
 #[doc = "Floating-point convert to higher precision long"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vcvt_f32_f16)"]
 #[inline]
+#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
     assert_instr(fcvtl)
 )]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvt_f32_f16(a: float16x4_t) -> float32x4_t {
     unsafe { simd_cast(a) }
@@ -8487,7 +8580,14 @@ pub fn vcvtq_f32_u32(a: uint32x4_t) -> float32x4_t {
 )]
 #[rustc_legacy_const_generics(1)]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvt_n_f16_s16<const N: i32>(a: int16x4_t) -> float16x4_t {
     static_assert!(N >= 1 && N <= 16);
@@ -8515,7 +8615,14 @@ pub fn vcvt_n_f16_s16<const N: i32>(a: int16x4_t) -> float16x4_t {
 )]
 #[rustc_legacy_const_generics(1)]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvtq_n_f16_s16<const N: i32>(a: int16x8_t) -> float16x8_t {
     static_assert!(N >= 1 && N <= 16);
@@ -8543,7 +8650,14 @@ pub fn vcvtq_n_f16_s16<const N: i32>(a: int16x8_t) -> float16x8_t {
 )]
 #[rustc_legacy_const_generics(1)]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvt_n_f16_u16<const N: i32>(a: uint16x4_t) -> float16x4_t {
     static_assert!(N >= 1 && N <= 16);
@@ -8571,7 +8685,14 @@ pub fn vcvt_n_f16_u16<const N: i32>(a: uint16x4_t) -> float16x4_t {
 )]
 #[rustc_legacy_const_generics(1)]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvtq_n_f16_u16<const N: i32>(a: uint16x8_t) -> float16x8_t {
     static_assert!(N >= 1 && N <= 16);
@@ -8751,7 +8872,14 @@ pub fn vcvtq_n_f32_u32<const N: i32>(a: uint32x4_t) -> float32x4_t {
 )]
 #[rustc_legacy_const_generics(1)]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvt_n_s16_f16<const N: i32>(a: float16x4_t) -> int16x4_t {
     static_assert!(N >= 1 && N <= 16);
@@ -8779,7 +8907,14 @@ pub fn vcvt_n_s16_f16<const N: i32>(a: float16x4_t) -> int16x4_t {
 )]
 #[rustc_legacy_const_generics(1)]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvtq_n_s16_f16<const N: i32>(a: float16x8_t) -> int16x8_t {
     static_assert!(N >= 1 && N <= 16);
@@ -8883,7 +9018,14 @@ pub fn vcvtq_n_s32_f32<const N: i32>(a: float32x4_t) -> int32x4_t {
 )]
 #[rustc_legacy_const_generics(1)]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvt_n_u16_f16<const N: i32>(a: float16x4_t) -> uint16x4_t {
     static_assert!(N >= 1 && N <= 16);
@@ -8911,7 +9053,14 @@ pub fn vcvt_n_u16_f16<const N: i32>(a: float16x4_t) -> uint16x4_t {
 )]
 #[rustc_legacy_const_generics(1)]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvtq_n_u16_f16<const N: i32>(a: float16x8_t) -> uint16x8_t {
     static_assert!(N >= 1 && N <= 16);
@@ -9014,7 +9163,14 @@ pub fn vcvtq_n_u32_f32<const N: i32>(a: float32x4_t) -> uint32x4_t {
     assert_instr(fcvtzs)
 )]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvt_s16_f16(a: float16x4_t) -> int16x4_t {
     unsafe { simd_cast(a) }
@@ -9029,7 +9185,14 @@ pub fn vcvt_s16_f16(a: float16x4_t) -> int16x4_t {
     assert_instr(fcvtzs)
 )]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvtq_s16_f16(a: float16x8_t) -> int16x8_t {
     unsafe { simd_cast(a) }
@@ -9102,7 +9265,14 @@ pub fn vcvtq_s32_f32(a: float32x4_t) -> int32x4_t {
     assert_instr(fcvtzu)
 )]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvt_u16_f16(a: float16x4_t) -> uint16x4_t {
     unsafe { simd_cast(a) }
@@ -9117,7 +9287,14 @@ pub fn vcvt_u16_f16(a: float16x4_t) -> uint16x4_t {
     assert_instr(fcvtzu)
 )]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vcvtq_u16_f16(a: float16x8_t) -> uint16x8_t {
     unsafe { simd_cast(a) }
@@ -9201,11 +9378,9 @@ pub fn vcvtq_u32_f32(a: float32x4_t) -> uint32x4_t {
 )]
 pub fn vdot_lane_s32<const LANE: i32>(a: int32x2_t, b: int8x8_t, c: int8x8_t) -> int32x2_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        let c: int32x2_t = transmute(c);
-        let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
-        vdot_s32(a, b, transmute(c))
-    }
+    let c = vreinterpret_s32_s8(c);
+    let c = vdup_lane_s32::<LANE>(c);
+    vdot_s32(a, b, vreinterpret_s8_s32(c))
 }
 #[doc = "Dot product arithmetic (indexed)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdotq_lane_s32)"]
@@ -9228,12 +9403,9 @@ pub fn vdot_lane_s32<const LANE: i32>(a: int32x2_t, b: int8x8_t, c: int8x8_t) ->
 )]
 pub fn vdotq_lane_s32<const LANE: i32>(a: int32x4_t, b: int8x16_t, c: int8x8_t) -> int32x4_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        let c: int32x2_t = transmute(c);
-        let c: int32x4_t =
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
-        vdotq_s32(a, b, transmute(c))
-    }
+    let c = vreinterpret_s32_s8(c);
+    let c = vdupq_lane_s32::<LANE>(c);
+    vdotq_s32(a, b, vreinterpretq_s8_s32(c))
 }
 #[doc = "Dot product arithmetic (indexed)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_lane_u32)"]
@@ -9256,11 +9428,9 @@ pub fn vdotq_lane_s32<const LANE: i32>(a: int32x4_t, b: int8x16_t, c: int8x8_t)
 )]
 pub fn vdot_lane_u32<const LANE: i32>(a: uint32x2_t, b: uint8x8_t, c: uint8x8_t) -> uint32x2_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        let c: uint32x2_t = transmute(c);
-        let c: uint32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
-        vdot_u32(a, b, transmute(c))
-    }
+    let c = vreinterpret_u32_u8(c);
+    let c = vdup_lane_u32::<LANE>(c);
+    vdot_u32(a, b, vreinterpret_u8_u32(c))
 }
 #[doc = "Dot product arithmetic (indexed)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdotq_lane_u32)"]
@@ -9283,12 +9453,81 @@ pub fn vdot_lane_u32<const LANE: i32>(a: uint32x2_t, b: uint8x8_t, c: uint8x8_t)
 )]
 pub fn vdotq_lane_u32<const LANE: i32>(a: uint32x4_t, b: uint8x16_t, c: uint8x8_t) -> uint32x4_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        let c: uint32x2_t = transmute(c);
-        let c: uint32x4_t =
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
-        vdotq_u32(a, b, transmute(c))
-    }
+    let c = vreinterpret_u32_u8(c);
+    let c = vdupq_lane_u32::<LANE>(c);
+    vdotq_u32(a, b, vreinterpretq_u8_u32(c))
+}
+#[doc = "Dot product arithmetic (indexed)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_laneq_s32)"]
+#[inline]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon,dotprod")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsdot, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sdot, LANE = 0)
+)]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_neon_dotprod", issue = "117224")]
+pub fn vdot_laneq_s32<const LANE: i32>(a: int32x2_t, b: int8x8_t, c: int8x16_t) -> int32x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    let c = vreinterpretq_s32_s8(c);
+    let c = vdup_laneq_s32::<LANE>(c);
+    vdot_s32(a, b, vreinterpret_s8_s32(c))
+}
+#[doc = "Dot product arithmetic (indexed)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdotq_laneq_s32)"]
+#[inline]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon,dotprod")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsdot, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(sdot, LANE = 0)
+)]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_neon_dotprod", issue = "117224")]
+pub fn vdotq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int8x16_t, c: int8x16_t) -> int32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    let c = vreinterpretq_s32_s8(c);
+    let c = vdupq_laneq_s32::<LANE>(c);
+    vdotq_s32(a, b, vreinterpretq_s8_s32(c))
+}
+#[doc = "Dot product arithmetic (indexed)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_laneq_u32)"]
+#[inline]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon,dotprod")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vudot, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(udot, LANE = 0)
+)]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_neon_dotprod", issue = "117224")]
+pub fn vdot_laneq_u32<const LANE: i32>(a: uint32x2_t, b: uint8x8_t, c: uint8x16_t) -> uint32x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    let c = vreinterpretq_u32_u8(c);
+    let c = vdup_laneq_u32::<LANE>(c);
+    vdot_u32(a, b, vreinterpret_u8_u32(c))
+}
+#[doc = "Dot product arithmetic (indexed)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdotq_laneq_u32)"]
+#[inline]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon,dotprod")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vudot, LANE = 0))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(udot, LANE = 0)
+)]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_neon_dotprod", issue = "117224")]
+pub fn vdotq_laneq_u32<const LANE: i32>(a: uint32x4_t, b: uint8x16_t, c: uint8x16_t) -> uint32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    let c = vreinterpretq_u32_u8(c);
+    let c = vdupq_laneq_u32::<LANE>(c);
+    vdotq_u32(a, b, vreinterpretq_u8_u32(c))
 }
 #[doc = "Dot product arithmetic (vector)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_s32)"]
@@ -9409,6 +9648,7 @@ pub fn vdotq_u32(a: uint32x4_t, b: uint8x16_t, c: uint8x16_t) -> uint32x4_t {
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_f16)"]
 #[inline]
+#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))]
 #[cfg_attr(
@@ -9416,16 +9656,24 @@ pub fn vdotq_u32(a: uint32x4_t, b: uint8x16_t, c: uint8x16_t) -> uint32x4_t {
     assert_instr(dup, N = 2)
 )]
 #[rustc_legacy_const_generics(1)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vdup_lane_f16<const N: i32>(a: float16x4_t) -> float16x4_t {
     static_assert_uimm_bits!(N, 2);
-    unsafe { simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32]) }
+    unsafe { simd_shuffle!(a, a, [N as u32; 4]) }
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_f16)"]
 #[inline]
+#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))]
 #[cfg_attr(
@@ -9433,18 +9681,19 @@ pub fn vdup_lane_f16<const N: i32>(a: float16x4_t) -> float16x4_t {
     assert_instr(dup, N = 2)
 )]
 #[rustc_legacy_const_generics(1)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vdupq_lane_f16<const N: i32>(a: float16x4_t) -> float16x8_t {
     static_assert_uimm_bits!(N, 2);
-    unsafe {
-        simd_shuffle!(
-            a,
-            a,
-            [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]
-        )
-    }
+    unsafe { simd_shuffle!(a, a, [N as u32; 8]) }
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_f32)"]
@@ -9605,7 +9854,7 @@ pub fn vdupq_lane_u32<const N: i32>(a: uint32x2_t) -> uint32x4_t {
 )]
 pub fn vdup_lane_p16<const N: i32>(a: poly16x4_t) -> poly16x4_t {
     static_assert_uimm_bits!(N, 2);
-    unsafe { simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32]) }
+    unsafe { simd_shuffle!(a, a, [N as u32; 4]) }
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_s16)"]
@@ -9628,7 +9877,7 @@ pub fn vdup_lane_p16<const N: i32>(a: poly16x4_t) -> poly16x4_t {
 )]
 pub fn vdup_lane_s16<const N: i32>(a: int16x4_t) -> int16x4_t {
     static_assert_uimm_bits!(N, 2);
-    unsafe { simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32]) }
+    unsafe { simd_shuffle!(a, a, [N as u32; 4]) }
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_u16)"]
@@ -9651,7 +9900,7 @@ pub fn vdup_lane_s16<const N: i32>(a: int16x4_t) -> int16x4_t {
 )]
 pub fn vdup_lane_u16<const N: i32>(a: uint16x4_t) -> uint16x4_t {
     static_assert_uimm_bits!(N, 2);
-    unsafe { simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32]) }
+    unsafe { simd_shuffle!(a, a, [N as u32; 4]) }
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_p16)"]
@@ -9674,13 +9923,7 @@ pub fn vdup_lane_u16<const N: i32>(a: uint16x4_t) -> uint16x4_t {
 )]
 pub fn vdupq_lane_p16<const N: i32>(a: poly16x4_t) -> poly16x8_t {
     static_assert_uimm_bits!(N, 2);
-    unsafe {
-        simd_shuffle!(
-            a,
-            a,
-            [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]
-        )
-    }
+    unsafe { simd_shuffle!(a, a, [N as u32; 8]) }
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_s16)"]
@@ -9703,13 +9946,7 @@ pub fn vdupq_lane_p16<const N: i32>(a: poly16x4_t) -> poly16x8_t {
 )]
 pub fn vdupq_lane_s16<const N: i32>(a: int16x4_t) -> int16x8_t {
     static_assert_uimm_bits!(N, 2);
-    unsafe {
-        simd_shuffle!(
-            a,
-            a,
-            [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]
-        )
-    }
+    unsafe { simd_shuffle!(a, a, [N as u32; 8]) }
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_u16)"]
@@ -9732,13 +9969,7 @@ pub fn vdupq_lane_s16<const N: i32>(a: int16x4_t) -> int16x8_t {
 )]
 pub fn vdupq_lane_u16<const N: i32>(a: uint16x4_t) -> uint16x8_t {
     static_assert_uimm_bits!(N, 2);
-    unsafe {
-        simd_shuffle!(
-            a,
-            a,
-            [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]
-        )
-    }
+    unsafe { simd_shuffle!(a, a, [N as u32; 8]) }
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_p8)"]
@@ -9761,13 +9992,7 @@ pub fn vdupq_lane_u16<const N: i32>(a: uint16x4_t) -> uint16x8_t {
 )]
 pub fn vdup_lane_p8<const N: i32>(a: poly8x8_t) -> poly8x8_t {
     static_assert_uimm_bits!(N, 3);
-    unsafe {
-        simd_shuffle!(
-            a,
-            a,
-            [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]
-        )
-    }
+    unsafe { simd_shuffle!(a, a, [N as u32; 8]) }
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_s8)"]
@@ -9790,13 +10015,7 @@ pub fn vdup_lane_p8<const N: i32>(a: poly8x8_t) -> poly8x8_t {
 )]
 pub fn vdup_lane_s8<const N: i32>(a: int8x8_t) -> int8x8_t {
     static_assert_uimm_bits!(N, 3);
-    unsafe {
-        simd_shuffle!(
-            a,
-            a,
-            [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]
-        )
-    }
+    unsafe { simd_shuffle!(a, a, [N as u32; 8]) }
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_u8)"]
@@ -9819,13 +10038,7 @@ pub fn vdup_lane_s8<const N: i32>(a: int8x8_t) -> int8x8_t {
 )]
 pub fn vdup_lane_u8<const N: i32>(a: uint8x8_t) -> uint8x8_t {
     static_assert_uimm_bits!(N, 3);
-    unsafe {
-        simd_shuffle!(
-            a,
-            a,
-            [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]
-        )
-    }
+    unsafe { simd_shuffle!(a, a, [N as u32; 8]) }
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_p8)"]
@@ -9848,16 +10061,7 @@ pub fn vdup_lane_u8<const N: i32>(a: uint8x8_t) -> uint8x8_t {
 )]
 pub fn vdupq_lane_p8<const N: i32>(a: poly8x8_t) -> poly8x16_t {
     static_assert_uimm_bits!(N, 3);
-    unsafe {
-        simd_shuffle!(
-            a,
-            a,
-            [
-                N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32,
-                N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32
-            ]
-        )
-    }
+    unsafe { simd_shuffle!(a, a, [N as u32; 16]) }
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_s8)"]
@@ -9880,16 +10084,7 @@ pub fn vdupq_lane_p8<const N: i32>(a: poly8x8_t) -> poly8x16_t {
 )]
 pub fn vdupq_lane_s8<const N: i32>(a: int8x8_t) -> int8x16_t {
     static_assert_uimm_bits!(N, 3);
-    unsafe {
-        simd_shuffle!(
-            a,
-            a,
-            [
-                N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32,
-                N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32
-            ]
-        )
-    }
+    unsafe { simd_shuffle!(a, a, [N as u32; 16]) }
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_u8)"]
@@ -9912,16 +10107,7 @@ pub fn vdupq_lane_s8<const N: i32>(a: int8x8_t) -> int8x16_t {
 )]
 pub fn vdupq_lane_u8<const N: i32>(a: uint8x8_t) -> uint8x16_t {
     static_assert_uimm_bits!(N, 3);
-    unsafe {
-        simd_shuffle!(
-            a,
-            a,
-            [
-                N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32,
-                N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32
-            ]
-        )
-    }
+    unsafe { simd_shuffle!(a, a, [N as u32; 16]) }
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_s64)"]
@@ -9972,6 +10158,7 @@ pub fn vdup_lane_u64<const N: i32>(a: uint64x1_t) -> uint64x1_t {
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_f16)"]
 #[inline]
+#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))]
 #[cfg_attr(
@@ -9979,16 +10166,24 @@ pub fn vdup_lane_u64<const N: i32>(a: uint64x1_t) -> uint64x1_t {
     assert_instr(dup, N = 4)
 )]
 #[rustc_legacy_const_generics(1)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vdup_laneq_f16<const N: i32>(a: float16x8_t) -> float16x4_t {
     static_assert_uimm_bits!(N, 3);
-    unsafe { simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32]) }
+    unsafe { simd_shuffle!(a, a, [N as u32; 4]) }
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_f16)"]
 #[inline]
+#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))]
 #[cfg_attr(
@@ -9996,18 +10191,19 @@ pub fn vdup_laneq_f16<const N: i32>(a: float16x8_t) -> float16x4_t {
     assert_instr(dup, N = 4)
 )]
 #[rustc_legacy_const_generics(1)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vdupq_laneq_f16<const N: i32>(a: float16x8_t) -> float16x8_t {
     static_assert_uimm_bits!(N, 3);
-    unsafe {
-        simd_shuffle!(
-            a,
-            a,
-            [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]
-        )
-    }
+    unsafe { simd_shuffle!(a, a, [N as u32; 8]) }
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_f32)"]
@@ -10168,7 +10364,7 @@ pub fn vdupq_laneq_u32<const N: i32>(a: uint32x4_t) -> uint32x4_t {
 )]
 pub fn vdup_laneq_p16<const N: i32>(a: poly16x8_t) -> poly16x4_t {
     static_assert_uimm_bits!(N, 3);
-    unsafe { simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32]) }
+    unsafe { simd_shuffle!(a, a, [N as u32; 4]) }
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_s16)"]
@@ -10191,7 +10387,7 @@ pub fn vdup_laneq_p16<const N: i32>(a: poly16x8_t) -> poly16x4_t {
 )]
 pub fn vdup_laneq_s16<const N: i32>(a: int16x8_t) -> int16x4_t {
     static_assert_uimm_bits!(N, 3);
-    unsafe { simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32]) }
+    unsafe { simd_shuffle!(a, a, [N as u32; 4]) }
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_u16)"]
@@ -10214,7 +10410,7 @@ pub fn vdup_laneq_s16<const N: i32>(a: int16x8_t) -> int16x4_t {
 )]
 pub fn vdup_laneq_u16<const N: i32>(a: uint16x8_t) -> uint16x4_t {
     static_assert_uimm_bits!(N, 3);
-    unsafe { simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32]) }
+    unsafe { simd_shuffle!(a, a, [N as u32; 4]) }
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_p16)"]
@@ -10237,13 +10433,7 @@ pub fn vdup_laneq_u16<const N: i32>(a: uint16x8_t) -> uint16x4_t {
 )]
 pub fn vdupq_laneq_p16<const N: i32>(a: poly16x8_t) -> poly16x8_t {
     static_assert_uimm_bits!(N, 3);
-    unsafe {
-        simd_shuffle!(
-            a,
-            a,
-            [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]
-        )
-    }
+    unsafe { simd_shuffle!(a, a, [N as u32; 8]) }
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_s16)"]
@@ -10266,13 +10456,7 @@ pub fn vdupq_laneq_p16<const N: i32>(a: poly16x8_t) -> poly16x8_t {
 )]
 pub fn vdupq_laneq_s16<const N: i32>(a: int16x8_t) -> int16x8_t {
     static_assert_uimm_bits!(N, 3);
-    unsafe {
-        simd_shuffle!(
-            a,
-            a,
-            [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]
-        )
-    }
+    unsafe { simd_shuffle!(a, a, [N as u32; 8]) }
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_u16)"]
@@ -10295,13 +10479,7 @@ pub fn vdupq_laneq_s16<const N: i32>(a: int16x8_t) -> int16x8_t {
 )]
 pub fn vdupq_laneq_u16<const N: i32>(a: uint16x8_t) -> uint16x8_t {
     static_assert_uimm_bits!(N, 3);
-    unsafe {
-        simd_shuffle!(
-            a,
-            a,
-            [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]
-        )
-    }
+    unsafe { simd_shuffle!(a, a, [N as u32; 8]) }
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_p8)"]
@@ -10324,13 +10502,7 @@ pub fn vdupq_laneq_u16<const N: i32>(a: uint16x8_t) -> uint16x8_t {
 )]
 pub fn vdup_laneq_p8<const N: i32>(a: poly8x16_t) -> poly8x8_t {
     static_assert_uimm_bits!(N, 4);
-    unsafe {
-        simd_shuffle!(
-            a,
-            a,
-            [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]
-        )
-    }
+    unsafe { simd_shuffle!(a, a, [N as u32; 8]) }
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_s8)"]
@@ -10353,13 +10525,7 @@ pub fn vdup_laneq_p8<const N: i32>(a: poly8x16_t) -> poly8x8_t {
 )]
 pub fn vdup_laneq_s8<const N: i32>(a: int8x16_t) -> int8x8_t {
     static_assert_uimm_bits!(N, 4);
-    unsafe {
-        simd_shuffle!(
-            a,
-            a,
-            [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]
-        )
-    }
+    unsafe { simd_shuffle!(a, a, [N as u32; 8]) }
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_u8)"]
@@ -10382,13 +10548,7 @@ pub fn vdup_laneq_s8<const N: i32>(a: int8x16_t) -> int8x8_t {
 )]
 pub fn vdup_laneq_u8<const N: i32>(a: uint8x16_t) -> uint8x8_t {
     static_assert_uimm_bits!(N, 4);
-    unsafe {
-        simd_shuffle!(
-            a,
-            a,
-            [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]
-        )
-    }
+    unsafe { simd_shuffle!(a, a, [N as u32; 8]) }
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_p8)"]
@@ -10411,16 +10571,7 @@ pub fn vdup_laneq_u8<const N: i32>(a: uint8x16_t) -> uint8x8_t {
 )]
 pub fn vdupq_laneq_p8<const N: i32>(a: poly8x16_t) -> poly8x16_t {
     static_assert_uimm_bits!(N, 4);
-    unsafe {
-        simd_shuffle!(
-            a,
-            a,
-            [
-                N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32,
-                N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32
-            ]
-        )
-    }
+    unsafe { simd_shuffle!(a, a, [N as u32; 16]) }
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_s8)"]
@@ -10443,16 +10594,7 @@ pub fn vdupq_laneq_p8<const N: i32>(a: poly8x16_t) -> poly8x16_t {
 )]
 pub fn vdupq_laneq_s8<const N: i32>(a: int8x16_t) -> int8x16_t {
     static_assert_uimm_bits!(N, 4);
-    unsafe {
-        simd_shuffle!(
-            a,
-            a,
-            [
-                N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32,
-                N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32
-            ]
-        )
-    }
+    unsafe { simd_shuffle!(a, a, [N as u32; 16]) }
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_u8)"]
@@ -10475,16 +10617,7 @@ pub fn vdupq_laneq_s8<const N: i32>(a: int8x16_t) -> int8x16_t {
 )]
 pub fn vdupq_laneq_u8<const N: i32>(a: uint8x16_t) -> uint8x16_t {
     static_assert_uimm_bits!(N, 4);
-    unsafe {
-        simd_shuffle!(
-            a,
-            a,
-            [
-                N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32,
-                N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32
-            ]
-        )
-    }
+    unsafe { simd_shuffle!(a, a, [N as u32; 16]) }
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_s64)"]
@@ -10507,7 +10640,7 @@ pub fn vdupq_laneq_u8<const N: i32>(a: uint8x16_t) -> uint8x16_t {
 )]
 pub fn vdup_laneq_s64<const N: i32>(a: int64x2_t) -> int64x1_t {
     static_assert_uimm_bits!(N, 1);
-    unsafe { transmute::<i64, _>(simd_extract!(a, N as u32)) }
+    unsafe { transmute(vgetq_lane_s64::<N>(a)) }
 }
 #[doc = "Set all vector lanes to the same value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_u64)"]
@@ -10530,18 +10663,19 @@ pub fn vdup_laneq_s64<const N: i32>(a: int64x2_t) -> int64x1_t {
 )]
 pub fn vdup_laneq_u64<const N: i32>(a: uint64x2_t) -> uint64x1_t {
     static_assert_uimm_bits!(N, 1);
-    unsafe { transmute::<u64, _>(simd_extract!(a, N as u32)) }
+    unsafe { transmute(vgetq_lane_u64::<N>(a)) }
 }
 #[doc = "Create a new vector with all lanes set to a value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_n_f16)"]
 #[inline]
+#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
     assert_instr(dup)
 )]
-#[target_feature(enable = "neon,fp16")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vdup_n_f16(a: f16) -> float16x4_t {
@@ -10550,13 +10684,14 @@ pub fn vdup_n_f16(a: f16) -> float16x4_t {
 #[doc = "Create a new vector with all lanes set to a value"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_n_f16)"]
 #[inline]
+#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
     assert_instr(dup)
 )]
-#[target_feature(enable = "neon,fp16")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vdupq_n_f16(a: f16) -> float16x8_t {
@@ -11505,19 +11640,18 @@ pub fn veorq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
 )]
 #[rustc_legacy_const_generics(2)]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vext_f16<const N: i32>(a: float16x4_t, b: float16x4_t) -> float16x4_t {
     static_assert_uimm_bits!(N, 2);
-    unsafe {
-        match N & 0b11 {
-            0 => simd_shuffle!(a, b, [0, 1, 2, 3]),
-            1 => simd_shuffle!(a, b, [1, 2, 3, 4]),
-            2 => simd_shuffle!(a, b, [2, 3, 4, 5]),
-            3 => simd_shuffle!(a, b, [3, 4, 5, 6]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    unsafe { simd_shuffle!(a, b, [N as u32, N as u32 + 1, N as u32 + 2, N as u32 + 3]) }
 }
 #[doc = "Extract vector from pair of vectors"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_f32)"]
@@ -11540,13 +11674,7 @@ pub fn vext_f16<const N: i32>(a: float16x4_t, b: float16x4_t) -> float16x4_t {
 )]
 pub fn vext_f32<const N: i32>(a: float32x2_t, b: float32x2_t) -> float32x2_t {
     static_assert_uimm_bits!(N, 1);
-    unsafe {
-        match N & 0b1 {
-            0 => simd_shuffle!(a, b, [0, 1]),
-            1 => simd_shuffle!(a, b, [1, 2]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    unsafe { simd_shuffle!(a, b, [N as u32, N as u32 + 1]) }
 }
 #[doc = "Extract vector from pair of vectors"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_s32)"]
@@ -11569,13 +11697,7 @@ pub fn vext_f32<const N: i32>(a: float32x2_t, b: float32x2_t) -> float32x2_t {
 )]
 pub fn vext_s32<const N: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     static_assert_uimm_bits!(N, 1);
-    unsafe {
-        match N & 0b1 {
-            0 => simd_shuffle!(a, b, [0, 1]),
-            1 => simd_shuffle!(a, b, [1, 2]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    unsafe { simd_shuffle!(a, b, [N as u32, N as u32 + 1]) }
 }
 #[doc = "Extract vector from pair of vectors"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_u32)"]
@@ -11598,18 +11720,12 @@ pub fn vext_s32<const N: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t {
 )]
 pub fn vext_u32<const N: i32>(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
     static_assert_uimm_bits!(N, 1);
-    unsafe {
-        match N & 0b1 {
-            0 => simd_shuffle!(a, b, [0, 1]),
-            1 => simd_shuffle!(a, b, [1, 2]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    unsafe { simd_shuffle!(a, b, [N as u32, N as u32 + 1]) }
 }
 #[doc = "Extract vector from pair of vectors"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_s64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -11634,7 +11750,7 @@ pub unsafe fn vext_s64<const N: i32>(a: int64x1_t, _b: int64x1_t) -> int64x1_t {
 #[doc = "Extract vector from pair of vectors"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_u64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -11678,17 +11794,20 @@ pub unsafe fn vext_u64<const N: i32>(a: uint64x1_t, _b: uint64x1_t) -> uint64x1_
 pub fn vext_s8<const N: i32>(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     static_assert_uimm_bits!(N, 3);
     unsafe {
-        match N & 0b111 {
-            0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]),
-            1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]),
-            2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]),
-            3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]),
-            4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]),
-            5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]),
-            6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]),
-            7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]),
-            _ => unreachable_unchecked(),
-        }
+        simd_shuffle!(
+            a,
+            b,
+            [
+                N as u32,
+                N as u32 + 1,
+                N as u32 + 2,
+                N as u32 + 3,
+                N as u32 + 4,
+                N as u32 + 5,
+                N as u32 + 6,
+                N as u32 + 7
+            ]
+        )
     }
 }
 #[doc = "Extract vector from pair of vectors"]
@@ -11713,17 +11832,20 @@ pub fn vext_s8<const N: i32>(a: int8x8_t, b: int8x8_t) -> int8x8_t {
 pub fn vextq_s16<const N: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     static_assert_uimm_bits!(N, 3);
     unsafe {
-        match N & 0b111 {
-            0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]),
-            1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]),
-            2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]),
-            3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]),
-            4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]),
-            5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]),
-            6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]),
-            7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]),
-            _ => unreachable_unchecked(),
-        }
+        simd_shuffle!(
+            a,
+            b,
+            [
+                N as u32,
+                N as u32 + 1,
+                N as u32 + 2,
+                N as u32 + 3,
+                N as u32 + 4,
+                N as u32 + 5,
+                N as u32 + 6,
+                N as u32 + 7
+            ]
+        )
     }
 }
 #[doc = "Extract vector from pair of vectors"]
@@ -11748,17 +11870,20 @@ pub fn vextq_s16<const N: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_t {
 pub fn vext_u8<const N: i32>(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     static_assert_uimm_bits!(N, 3);
     unsafe {
-        match N & 0b111 {
-            0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]),
-            1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]),
-            2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]),
-            3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]),
-            4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]),
-            5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]),
-            6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]),
-            7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]),
-            _ => unreachable_unchecked(),
-        }
+        simd_shuffle!(
+            a,
+            b,
+            [
+                N as u32,
+                N as u32 + 1,
+                N as u32 + 2,
+                N as u32 + 3,
+                N as u32 + 4,
+                N as u32 + 5,
+                N as u32 + 6,
+                N as u32 + 7
+            ]
+        )
     }
 }
 #[doc = "Extract vector from pair of vectors"]
@@ -11783,17 +11908,20 @@ pub fn vext_u8<const N: i32>(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
 pub fn vextq_u16<const N: i32>(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
     static_assert_uimm_bits!(N, 3);
     unsafe {
-        match N & 0b111 {
-            0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]),
-            1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]),
-            2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]),
-            3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]),
-            4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]),
-            5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]),
-            6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]),
-            7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]),
-            _ => unreachable_unchecked(),
-        }
+        simd_shuffle!(
+            a,
+            b,
+            [
+                N as u32,
+                N as u32 + 1,
+                N as u32 + 2,
+                N as u32 + 3,
+                N as u32 + 4,
+                N as u32 + 5,
+                N as u32 + 6,
+                N as u32 + 7
+            ]
+        )
     }
 }
 #[doc = "Extract vector from pair of vectors"]
@@ -11818,17 +11946,20 @@ pub fn vextq_u16<const N: i32>(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
 pub fn vext_p8<const N: i32>(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
     static_assert_uimm_bits!(N, 3);
     unsafe {
-        match N & 0b111 {
-            0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]),
-            1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]),
-            2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]),
-            3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]),
-            4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]),
-            5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]),
-            6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]),
-            7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]),
-            _ => unreachable_unchecked(),
-        }
+        simd_shuffle!(
+            a,
+            b,
+            [
+                N as u32,
+                N as u32 + 1,
+                N as u32 + 2,
+                N as u32 + 3,
+                N as u32 + 4,
+                N as u32 + 5,
+                N as u32 + 6,
+                N as u32 + 7
+            ]
+        )
     }
 }
 #[doc = "Extract vector from pair of vectors"]
@@ -11853,17 +11984,20 @@ pub fn vext_p8<const N: i32>(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
 pub fn vextq_p16<const N: i32>(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
     static_assert_uimm_bits!(N, 3);
     unsafe {
-        match N & 0b111 {
-            0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]),
-            1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]),
-            2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]),
-            3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]),
-            4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]),
-            5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]),
-            6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]),
-            7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]),
-            _ => unreachable_unchecked(),
-        }
+        simd_shuffle!(
+            a,
+            b,
+            [
+                N as u32,
+                N as u32 + 1,
+                N as u32 + 2,
+                N as u32 + 3,
+                N as u32 + 4,
+                N as u32 + 5,
+                N as u32 + 6,
+                N as u32 + 7
+            ]
+        )
     }
 }
 #[doc = "Extract vector from pair of vectors"]
@@ -11877,22 +12011,32 @@ pub fn vextq_p16<const N: i32>(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
 )]
 #[rustc_legacy_const_generics(2)]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vextq_f16<const N: i32>(a: float16x8_t, b: float16x8_t) -> float16x8_t {
     static_assert_uimm_bits!(N, 3);
     unsafe {
-        match N & 0b111 {
-            0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]),
-            1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]),
-            2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]),
-            3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]),
-            4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]),
-            5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]),
-            6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]),
-            7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]),
-            _ => unreachable_unchecked(),
-        }
+        simd_shuffle!(
+            a,
+            b,
+            [
+                N as u32,
+                N as u32 + 1,
+                N as u32 + 2,
+                N as u32 + 3,
+                N as u32 + 4,
+                N as u32 + 5,
+                N as u32 + 6,
+                N as u32 + 7
+            ]
+        )
     }
 }
 #[doc = "Extract vector from pair of vectors"]
@@ -11916,15 +12060,7 @@ pub fn vextq_f16<const N: i32>(a: float16x8_t, b: float16x8_t) -> float16x8_t {
 )]
 pub fn vextq_f32<const N: i32>(a: float32x4_t, b: float32x4_t) -> float32x4_t {
     static_assert_uimm_bits!(N, 2);
-    unsafe {
-        match N & 0b11 {
-            0 => simd_shuffle!(a, b, [0, 1, 2, 3]),
-            1 => simd_shuffle!(a, b, [1, 2, 3, 4]),
-            2 => simd_shuffle!(a, b, [2, 3, 4, 5]),
-            3 => simd_shuffle!(a, b, [3, 4, 5, 6]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    unsafe { simd_shuffle!(a, b, [N as u32, N as u32 + 1, N as u32 + 2, N as u32 + 3]) }
 }
 #[doc = "Extract vector from pair of vectors"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_s16)"]
@@ -11947,15 +12083,7 @@ pub fn vextq_f32<const N: i32>(a: float32x4_t, b: float32x4_t) -> float32x4_t {
 )]
 pub fn vext_s16<const N: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     static_assert_uimm_bits!(N, 2);
-    unsafe {
-        match N & 0b11 {
-            0 => simd_shuffle!(a, b, [0, 1, 2, 3]),
-            1 => simd_shuffle!(a, b, [1, 2, 3, 4]),
-            2 => simd_shuffle!(a, b, [2, 3, 4, 5]),
-            3 => simd_shuffle!(a, b, [3, 4, 5, 6]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    unsafe { simd_shuffle!(a, b, [N as u32, N as u32 + 1, N as u32 + 2, N as u32 + 3]) }
 }
 #[doc = "Extract vector from pair of vectors"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_s32)"]
@@ -11978,15 +12106,7 @@ pub fn vext_s16<const N: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t {
 )]
 pub fn vextq_s32<const N: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     static_assert_uimm_bits!(N, 2);
-    unsafe {
-        match N & 0b11 {
-            0 => simd_shuffle!(a, b, [0, 1, 2, 3]),
-            1 => simd_shuffle!(a, b, [1, 2, 3, 4]),
-            2 => simd_shuffle!(a, b, [2, 3, 4, 5]),
-            3 => simd_shuffle!(a, b, [3, 4, 5, 6]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    unsafe { simd_shuffle!(a, b, [N as u32, N as u32 + 1, N as u32 + 2, N as u32 + 3]) }
 }
 #[doc = "Extract vector from pair of vectors"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_u16)"]
@@ -12009,15 +12129,7 @@ pub fn vextq_s32<const N: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_t {
 )]
 pub fn vext_u16<const N: i32>(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
     static_assert_uimm_bits!(N, 2);
-    unsafe {
-        match N & 0b11 {
-            0 => simd_shuffle!(a, b, [0, 1, 2, 3]),
-            1 => simd_shuffle!(a, b, [1, 2, 3, 4]),
-            2 => simd_shuffle!(a, b, [2, 3, 4, 5]),
-            3 => simd_shuffle!(a, b, [3, 4, 5, 6]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    unsafe { simd_shuffle!(a, b, [N as u32, N as u32 + 1, N as u32 + 2, N as u32 + 3]) }
 }
 #[doc = "Extract vector from pair of vectors"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_u32)"]
@@ -12040,15 +12152,7 @@ pub fn vext_u16<const N: i32>(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
 )]
 pub fn vextq_u32<const N: i32>(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
     static_assert_uimm_bits!(N, 2);
-    unsafe {
-        match N & 0b11 {
-            0 => simd_shuffle!(a, b, [0, 1, 2, 3]),
-            1 => simd_shuffle!(a, b, [1, 2, 3, 4]),
-            2 => simd_shuffle!(a, b, [2, 3, 4, 5]),
-            3 => simd_shuffle!(a, b, [3, 4, 5, 6]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    unsafe { simd_shuffle!(a, b, [N as u32, N as u32 + 1, N as u32 + 2, N as u32 + 3]) }
 }
 #[doc = "Extract vector from pair of vectors"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vext_p16)"]
@@ -12071,15 +12175,7 @@ pub fn vextq_u32<const N: i32>(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
 )]
 pub fn vext_p16<const N: i32>(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
     static_assert_uimm_bits!(N, 2);
-    unsafe {
-        match N & 0b11 {
-            0 => simd_shuffle!(a, b, [0, 1, 2, 3]),
-            1 => simd_shuffle!(a, b, [1, 2, 3, 4]),
-            2 => simd_shuffle!(a, b, [2, 3, 4, 5]),
-            3 => simd_shuffle!(a, b, [3, 4, 5, 6]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    unsafe { simd_shuffle!(a, b, [N as u32, N as u32 + 1, N as u32 + 2, N as u32 + 3]) }
 }
 #[doc = "Extract vector from pair of vectors"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_s64)"]
@@ -12102,13 +12198,7 @@ pub fn vext_p16<const N: i32>(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
 )]
 pub fn vextq_s64<const N: i32>(a: int64x2_t, b: int64x2_t) -> int64x2_t {
     static_assert_uimm_bits!(N, 1);
-    unsafe {
-        match N & 0b1 {
-            0 => simd_shuffle!(a, b, [0, 1]),
-            1 => simd_shuffle!(a, b, [1, 2]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    unsafe { simd_shuffle!(a, b, [N as u32, N as u32 + 1]) }
 }
 #[doc = "Extract vector from pair of vectors"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_u64)"]
@@ -12131,13 +12221,7 @@ pub fn vextq_s64<const N: i32>(a: int64x2_t, b: int64x2_t) -> int64x2_t {
 )]
 pub fn vextq_u64<const N: i32>(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
     static_assert_uimm_bits!(N, 1);
-    unsafe {
-        match N & 0b1 {
-            0 => simd_shuffle!(a, b, [0, 1]),
-            1 => simd_shuffle!(a, b, [1, 2]),
-            _ => unreachable_unchecked(),
-        }
-    }
+    unsafe { simd_shuffle!(a, b, [N as u32, N as u32 + 1]) }
 }
 #[doc = "Extract vector from pair of vectors"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vextq_s8)"]
@@ -12161,85 +12245,28 @@ pub fn vextq_u64<const N: i32>(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
 pub fn vextq_s8<const N: i32>(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     static_assert_uimm_bits!(N, 4);
     unsafe {
-        match N & 0b1111 {
-            0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
-            1 => simd_shuffle!(
-                a,
-                b,
-                [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]
-            ),
-            2 => simd_shuffle!(
-                a,
-                b,
-                [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17]
-            ),
-            3 => simd_shuffle!(
-                a,
-                b,
-                [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]
-            ),
-            4 => simd_shuffle!(
-                a,
-                b,
-                [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
-            ),
-            5 => simd_shuffle!(
-                a,
-                b,
-                [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
-            ),
-            6 => simd_shuffle!(
-                a,
-                b,
-                [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]
-            ),
-            7 => simd_shuffle!(
-                a,
-                b,
-                [7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22]
-            ),
-            8 => simd_shuffle!(
-                a,
-                b,
-                [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]
-            ),
-            9 => simd_shuffle!(
-                a,
-                b,
-                [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]
-            ),
-            10 => simd_shuffle!(
-                a,
-                b,
-                [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]
-            ),
-            11 => simd_shuffle!(
-                a,
-                b,
-                [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26]
-            ),
-            12 => simd_shuffle!(
-                a,
-                b,
-                [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27]
-            ),
-            13 => simd_shuffle!(
-                a,
-                b,
-                [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28]
-            ),
-            14 => simd_shuffle!(
-                a,
-                b,
-                [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29]
-            ),
-            15 => simd_shuffle!(
-                a,
-                b,
-                [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]
-            ),
-            _ => unreachable_unchecked(),
-        }
+        simd_shuffle!(
+            a,
+            b,
+            [
+                N as u32,
+                N as u32 + 1,
+                N as u32 + 2,
+                N as u32 + 3,
+                N as u32 + 4,
+                N as u32 + 5,
+                N as u32 + 6,
+                N as u32 + 7,
+                N as u32 + 8,
+                N as u32 + 9,
+                N as u32 + 10,
+                N as u32 + 11,
+                N as u32 + 12,
+                N as u32 + 13,
+                N as u32 + 14,
+                N as u32 + 15
+            ]
+        )
     }
 }
 #[doc = "Extract vector from pair of vectors"]
@@ -12264,85 +12291,28 @@ pub fn vextq_s8<const N: i32>(a: int8x16_t, b: int8x16_t) -> int8x16_t {
 pub fn vextq_u8<const N: i32>(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
     static_assert_uimm_bits!(N, 4);
     unsafe {
-        match N & 0b1111 {
-            0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
-            1 => simd_shuffle!(
-                a,
-                b,
-                [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]
-            ),
-            2 => simd_shuffle!(
-                a,
-                b,
-                [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17]
-            ),
-            3 => simd_shuffle!(
-                a,
-                b,
-                [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]
-            ),
-            4 => simd_shuffle!(
-                a,
-                b,
-                [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
-            ),
-            5 => simd_shuffle!(
-                a,
-                b,
-                [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
-            ),
-            6 => simd_shuffle!(
-                a,
-                b,
-                [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]
-            ),
-            7 => simd_shuffle!(
-                a,
-                b,
-                [7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22]
-            ),
-            8 => simd_shuffle!(
-                a,
-                b,
-                [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]
-            ),
-            9 => simd_shuffle!(
-                a,
-                b,
-                [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]
-            ),
-            10 => simd_shuffle!(
-                a,
-                b,
-                [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]
-            ),
-            11 => simd_shuffle!(
-                a,
-                b,
-                [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26]
-            ),
-            12 => simd_shuffle!(
-                a,
-                b,
-                [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27]
-            ),
-            13 => simd_shuffle!(
-                a,
-                b,
-                [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28]
-            ),
-            14 => simd_shuffle!(
-                a,
-                b,
-                [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29]
-            ),
-            15 => simd_shuffle!(
-                a,
-                b,
-                [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]
-            ),
-            _ => unreachable_unchecked(),
-        }
+        simd_shuffle!(
+            a,
+            b,
+            [
+                N as u32,
+                N as u32 + 1,
+                N as u32 + 2,
+                N as u32 + 3,
+                N as u32 + 4,
+                N as u32 + 5,
+                N as u32 + 6,
+                N as u32 + 7,
+                N as u32 + 8,
+                N as u32 + 9,
+                N as u32 + 10,
+                N as u32 + 11,
+                N as u32 + 12,
+                N as u32 + 13,
+                N as u32 + 14,
+                N as u32 + 15
+            ]
+        )
     }
 }
 #[doc = "Extract vector from pair of vectors"]
@@ -12367,85 +12337,28 @@ pub fn vextq_u8<const N: i32>(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
 pub fn vextq_p8<const N: i32>(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
     static_assert_uimm_bits!(N, 4);
     unsafe {
-        match N & 0b1111 {
-            0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
-            1 => simd_shuffle!(
-                a,
-                b,
-                [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]
-            ),
-            2 => simd_shuffle!(
-                a,
-                b,
-                [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17]
-            ),
-            3 => simd_shuffle!(
-                a,
-                b,
-                [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]
-            ),
-            4 => simd_shuffle!(
-                a,
-                b,
-                [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
-            ),
-            5 => simd_shuffle!(
-                a,
-                b,
-                [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
-            ),
-            6 => simd_shuffle!(
-                a,
-                b,
-                [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]
-            ),
-            7 => simd_shuffle!(
-                a,
-                b,
-                [7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22]
-            ),
-            8 => simd_shuffle!(
-                a,
-                b,
-                [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]
-            ),
-            9 => simd_shuffle!(
-                a,
-                b,
-                [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]
-            ),
-            10 => simd_shuffle!(
-                a,
-                b,
-                [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]
-            ),
-            11 => simd_shuffle!(
-                a,
-                b,
-                [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26]
-            ),
-            12 => simd_shuffle!(
-                a,
-                b,
-                [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27]
-            ),
-            13 => simd_shuffle!(
-                a,
-                b,
-                [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28]
-            ),
-            14 => simd_shuffle!(
-                a,
-                b,
-                [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29]
-            ),
-            15 => simd_shuffle!(
-                a,
-                b,
-                [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]
-            ),
-            _ => unreachable_unchecked(),
-        }
+        simd_shuffle!(
+            a,
+            b,
+            [
+                N as u32,
+                N as u32 + 1,
+                N as u32 + 2,
+                N as u32 + 3,
+                N as u32 + 4,
+                N as u32 + 5,
+                N as u32 + 6,
+                N as u32 + 7,
+                N as u32 + 8,
+                N as u32 + 9,
+                N as u32 + 10,
+                N as u32 + 11,
+                N as u32 + 12,
+                N as u32 + 13,
+                N as u32 + 14,
+                N as u32 + 15
+            ]
+        )
     }
 }
 #[doc = "Floating-point fused Multiply-Add to accumulator (vector)"]
@@ -12458,7 +12371,14 @@ pub fn vextq_p8<const N: i32>(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
     assert_instr(fmla)
 )]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vfma_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t) -> float16x4_t {
     unsafe { simd_fma(b, c, a) }
@@ -12473,7 +12393,14 @@ pub fn vfma_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t) -> float16x4_t {
     assert_instr(fmla)
 )]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vfmaq_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t {
     unsafe { simd_fma(b, c, a) }
@@ -12573,7 +12500,14 @@ pub fn vfmaq_n_f32(a: float32x4_t, b: float32x4_t, c: f32) -> float32x4_t {
     assert_instr(fmls)
 )]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vfms_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t) -> float16x4_t {
     unsafe {
@@ -12592,7 +12526,14 @@ pub fn vfms_f16(a: float16x4_t, b: float16x4_t, c: float16x4_t) -> float16x4_t {
     assert_instr(fmls)
 )]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vfmsq_f16(a: float16x8_t, b: float16x8_t, c: float16x8_t) -> float16x8_t {
     unsafe {
@@ -12693,9 +12634,17 @@ pub fn vfmsq_n_f32(a: float32x4_t, b: float32x4_t, c: f32) -> float32x4_t {
 #[doc = "Duplicate vector element to vector"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_f16)"]
 #[inline]
+#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(nop))]
 pub fn vget_high_f16(a: float16x8_t) -> float16x4_t {
@@ -12704,9 +12653,17 @@ pub fn vget_high_f16(a: float16x8_t) -> float16x4_t {
 #[doc = "Duplicate vector element to vector"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_f16)"]
 #[inline]
+#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(nop))]
 pub fn vget_low_f16(a: float16x8_t) -> float16x4_t {
@@ -12902,6 +12859,27 @@ pub fn vget_high_u8(a: uint8x16_t) -> uint8x8_t {
     unsafe { simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]) }
 }
 #[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_p64)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ext)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vget_high_p64(a: poly64x2_t) -> poly64x1_t {
+    unsafe { transmute(u64x1::new(simd_extract!(a, 1))) }
+}
+#[doc = "Duplicate vector element to vector or scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_high_s64)"]
 #[inline]
 #[target_feature(enable = "neon")]
@@ -12946,14 +12924,15 @@ pub fn vget_high_u64(a: uint64x2_t) -> uint64x1_t {
 #[doc = "Duplicate vector element to scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_lane_f16)"]
 #[inline]
+#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[target_feature(enable = "neon,fp16")]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
     assert_instr(nop, LANE = 0)
 )]
 #[rustc_legacy_const_generics(1)]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vget_lane_f16<const LANE: i32>(a: float16x4_t) -> f16 {
@@ -12963,14 +12942,15 @@ pub fn vget_lane_f16<const LANE: i32>(a: float16x4_t) -> f16 {
 #[doc = "Duplicate vector element to scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vgetq_lane_f16)"]
 #[inline]
+#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[target_feature(enable = "neon,fp16")]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
     assert_instr(nop, LANE = 0)
 )]
 #[rustc_legacy_const_generics(1)]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vgetq_lane_f16<const LANE: i32>(a: float16x8_t) -> f16 {
@@ -13587,6 +13567,23 @@ pub fn vget_low_u8(a: uint8x16_t) -> uint8x8_t {
     unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]) }
 }
 #[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_p64)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(test, assert_instr(nop))]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vget_low_p64(a: poly64x2_t) -> poly64x1_t {
+    unsafe { transmute(u64x1::new(simd_extract!(a, 0))) }
+}
+#[doc = "Duplicate vector element to vector or scalar"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vget_low_s64)"]
 #[inline]
 #[target_feature(enable = "neon")]
@@ -14319,15 +14316,16 @@ pub fn vhsubq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
 #[doc = "Load one single-element structure and replicate to all lanes of one register"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_f16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
+#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
     assert_instr(ld1r)
 )]
-#[target_feature(enable = "neon,fp16")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vld1_dup_f16(ptr: *const f16) -> float16x4_t {
@@ -14337,15 +14335,16 @@ pub unsafe fn vld1_dup_f16(ptr: *const f16) -> float16x4_t {
 #[doc = "Load one single-element structure and replicate to all lanes of one register"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_f16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
+#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
     assert_instr(ld1r)
 )]
-#[target_feature(enable = "neon,fp16")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vld1q_dup_f16(ptr: *const f16) -> float16x8_t {
@@ -14355,7 +14354,7 @@ pub unsafe fn vld1q_dup_f16(ptr: *const f16) -> float16x8_t {
 #[doc = "Load one single-element structure and Replicate to all lanes (of one register)."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_f32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -14378,7 +14377,7 @@ pub unsafe fn vld1_dup_f32(ptr: *const f32) -> float32x2_t {
 #[doc = "Load one single-element structure and Replicate to all lanes (of one register)."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_p16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -14401,7 +14400,7 @@ pub unsafe fn vld1_dup_p16(ptr: *const p16) -> poly16x4_t {
 #[doc = "Load one single-element structure and Replicate to all lanes (of one register)."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_p8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -14424,7 +14423,7 @@ pub unsafe fn vld1_dup_p8(ptr: *const p8) -> poly8x8_t {
 #[doc = "Load one single-element structure and Replicate to all lanes (of one register)."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_s16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -14447,7 +14446,7 @@ pub unsafe fn vld1_dup_s16(ptr: *const i16) -> int16x4_t {
 #[doc = "Load one single-element structure and Replicate to all lanes (of one register)."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_s32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -14470,7 +14469,7 @@ pub unsafe fn vld1_dup_s32(ptr: *const i32) -> int32x2_t {
 #[doc = "Load one single-element structure and Replicate to all lanes (of one register)."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_s8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -14493,7 +14492,7 @@ pub unsafe fn vld1_dup_s8(ptr: *const i8) -> int8x8_t {
 #[doc = "Load one single-element structure and Replicate to all lanes (of one register)."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_u16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -14516,7 +14515,7 @@ pub unsafe fn vld1_dup_u16(ptr: *const u16) -> uint16x4_t {
 #[doc = "Load one single-element structure and Replicate to all lanes (of one register)."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_u32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -14539,7 +14538,7 @@ pub unsafe fn vld1_dup_u32(ptr: *const u32) -> uint32x2_t {
 #[doc = "Load one single-element structure and Replicate to all lanes (of one register)."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_u8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -14562,7 +14561,7 @@ pub unsafe fn vld1_dup_u8(ptr: *const u8) -> uint8x8_t {
 #[doc = "Load one single-element structure and Replicate to all lanes (of one register)."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_f32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -14585,7 +14584,7 @@ pub unsafe fn vld1q_dup_f32(ptr: *const f32) -> float32x4_t {
 #[doc = "Load one single-element structure and Replicate to all lanes (of one register)."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_p16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -14608,7 +14607,7 @@ pub unsafe fn vld1q_dup_p16(ptr: *const p16) -> poly16x8_t {
 #[doc = "Load one single-element structure and Replicate to all lanes (of one register)."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_p8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -14631,7 +14630,7 @@ pub unsafe fn vld1q_dup_p8(ptr: *const p8) -> poly8x16_t {
 #[doc = "Load one single-element structure and Replicate to all lanes (of one register)."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_s16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -14654,7 +14653,7 @@ pub unsafe fn vld1q_dup_s16(ptr: *const i16) -> int16x8_t {
 #[doc = "Load one single-element structure and Replicate to all lanes (of one register)."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_s32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -14677,7 +14676,7 @@ pub unsafe fn vld1q_dup_s32(ptr: *const i32) -> int32x4_t {
 #[doc = "Load one single-element structure and Replicate to all lanes (of one register)."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_s64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -14700,7 +14699,7 @@ pub unsafe fn vld1q_dup_s64(ptr: *const i64) -> int64x2_t {
 #[doc = "Load one single-element structure and Replicate to all lanes (of one register)."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_s8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -14723,7 +14722,7 @@ pub unsafe fn vld1q_dup_s8(ptr: *const i8) -> int8x16_t {
 #[doc = "Load one single-element structure and Replicate to all lanes (of one register)."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_u16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -14746,7 +14745,7 @@ pub unsafe fn vld1q_dup_u16(ptr: *const u16) -> uint16x8_t {
 #[doc = "Load one single-element structure and Replicate to all lanes (of one register)."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_u32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -14769,7 +14768,7 @@ pub unsafe fn vld1q_dup_u32(ptr: *const u32) -> uint32x4_t {
 #[doc = "Load one single-element structure and Replicate to all lanes (of one register)."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_u64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -14792,7 +14791,7 @@ pub unsafe fn vld1q_dup_u64(ptr: *const u64) -> uint64x2_t {
 #[doc = "Load one single-element structure and Replicate to all lanes (of one register)."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_u8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -14815,7 +14814,7 @@ pub unsafe fn vld1q_dup_u8(ptr: *const u8) -> uint8x16_t {
 #[doc = "Load one single-element structure and Replicate to all lanes (of one register)."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_p64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -14847,7 +14846,7 @@ pub unsafe fn vld1_dup_p64(ptr: *const p64) -> poly64x1_t {
 #[doc = "Load one single-element structure and Replicate to all lanes (of one register)."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_s64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -14879,7 +14878,7 @@ pub unsafe fn vld1_dup_s64(ptr: *const i64) -> int64x1_t {
 #[doc = "Load one single-element structure and Replicate to all lanes (of one register)."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_dup_u64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -14911,9 +14910,8 @@ pub unsafe fn vld1_dup_u64(ptr: *const u64) -> uint64x1_t {
 #[doc = "Load multiple single-element structures to one, two, three, or four registers."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
 #[target_feature(enable = "neon,fp16")]
@@ -14927,30 +14925,10 @@ pub unsafe fn vld1_f16(ptr: *const f16) -> float16x4_t {
     ))
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))]
-pub unsafe fn vld1_f16(ptr: *const f16) -> float16x4_t {
-    let ret_val: float16x4_t = transmute(vld1_v4f16(
-        ptr as *const i8,
-        crate::mem::align_of::<f16>() as i32,
-    ));
-    simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-}
-#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
 #[target_feature(enable = "neon,fp16")]
@@ -14963,671 +14941,320 @@ pub unsafe fn vld1q_f16(ptr: *const f16) -> float16x8_t {
         crate::mem::align_of::<f16>() as i32,
     ))
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))]
-pub unsafe fn vld1q_f16(ptr: *const f16) -> float16x8_t {
-    let ret_val: float16x8_t = transmute(vld1q_v8f16(
-        ptr as *const i8,
-        crate::mem::align_of::<f16>() as i32,
-    ));
-    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-}
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f16_x2)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
+#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(ld)
 )]
-#[target_feature(enable = "neon,fp16")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vld1_f16_x2(a: *const f16) -> float16x4x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld1x2.v4f16.p0"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v4f16.p0")]
-        fn _vld1_f16_x2(a: *const f16) -> float16x4x2_t;
-    }
-    _vld1_f16_x2(a)
+    crate::ptr::read_unaligned(a.cast())
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f16_x3)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
+#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(ld)
 )]
-#[target_feature(enable = "neon,fp16")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vld1_f16_x3(a: *const f16) -> float16x4x3_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld1x3.v4f16.p0"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v4f16.p0")]
-        fn _vld1_f16_x3(a: *const f16) -> float16x4x3_t;
-    }
-    _vld1_f16_x3(a)
+    crate::ptr::read_unaligned(a.cast())
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f16_x4)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
+#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(ld)
 )]
-#[target_feature(enable = "neon,fp16")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vld1_f16_x4(a: *const f16) -> float16x4x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld1x4.v4f16.p0"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v4f16.p0")]
-        fn _vld1_f16_x4(a: *const f16) -> float16x4x4_t;
-    }
-    _vld1_f16_x4(a)
+    crate::ptr::read_unaligned(a.cast())
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f16_x2)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
+#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(ld)
 )]
-#[target_feature(enable = "neon,fp16")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vld1q_f16_x2(a: *const f16) -> float16x8x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld1x2.v8f16.p0"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v8f16.p0")]
-        fn _vld1q_f16_x2(a: *const f16) -> float16x8x2_t;
-    }
-    _vld1q_f16_x2(a)
+    crate::ptr::read_unaligned(a.cast())
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f16_x3)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
+#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(ld)
 )]
-#[target_feature(enable = "neon,fp16")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vld1q_f16_x3(a: *const f16) -> float16x8x3_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld1x3.v8f16.p0"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v8f16.p0")]
-        fn _vld1q_f16_x3(a: *const f16) -> float16x8x3_t;
-    }
-    _vld1q_f16_x3(a)
+    crate::ptr::read_unaligned(a.cast())
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f16_x4)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
+#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(ld)
 )]
-#[target_feature(enable = "neon,fp16")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vld1q_f16_x4(a: *const f16) -> float16x8x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld1x4.v8f16.p0"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v8f16.p0")]
-        fn _vld1q_f16_x4(a: *const f16) -> float16x8x4_t;
-    }
-    _vld1q_f16_x4(a)
+    crate::ptr::read_unaligned(a.cast())
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))]
 pub unsafe fn vld1_f32(ptr: *const f32) -> float32x2_t {
-    transmute(vld1_v2f32(
-        ptr as *const i8,
-        crate::mem::align_of::<f32>() as i32,
-    ))
-}
-#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))]
-pub unsafe fn vld1_f32(ptr: *const f32) -> float32x2_t {
-    let ret_val: float32x2_t = transmute(vld1_v2f32(
-        ptr as *const i8,
-        crate::mem::align_of::<f32>() as i32,
-    ));
-    simd_shuffle!(ret_val, ret_val, [1, 0])
-}
-#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))]
-pub unsafe fn vld1q_f32(ptr: *const f32) -> float32x4_t {
-    transmute(vld1q_v4f32(
-        ptr as *const i8,
-        crate::mem::align_of::<f32>() as i32,
-    ))
+    const ALIGN: i32 = crate::mem::align_of::<f32>() as i32;
+    transmute(vld1_v2f32::<ALIGN>(ptr as *const i8))
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))]
 pub unsafe fn vld1q_f32(ptr: *const f32) -> float32x4_t {
-    let ret_val: float32x4_t = transmute(vld1q_v4f32(
-        ptr as *const i8,
-        crate::mem::align_of::<f32>() as i32,
-    ));
-    simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    const ALIGN: i32 = crate::mem::align_of::<f32>() as i32;
+    transmute(vld1q_v4f32::<ALIGN>(ptr as *const i8))
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))]
 pub unsafe fn vld1_u8(ptr: *const u8) -> uint8x8_t {
-    transmute(vld1_v8i8(
-        ptr as *const i8,
-        crate::mem::align_of::<u8>() as i32,
-    ))
-}
-#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))]
-pub unsafe fn vld1_u8(ptr: *const u8) -> uint8x8_t {
-    let ret_val: uint8x8_t = transmute(vld1_v8i8(
-        ptr as *const i8,
-        crate::mem::align_of::<u8>() as i32,
-    ));
-    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-}
-#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))]
-pub unsafe fn vld1q_u8(ptr: *const u8) -> uint8x16_t {
-    transmute(vld1q_v16i8(
-        ptr as *const i8,
-        crate::mem::align_of::<u8>() as i32,
-    ))
+    const ALIGN: i32 = crate::mem::align_of::<u8>() as i32;
+    transmute(vld1_v8i8::<ALIGN>(ptr as *const i8))
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))]
 pub unsafe fn vld1q_u8(ptr: *const u8) -> uint8x16_t {
-    let ret_val: uint8x16_t = transmute(vld1q_v16i8(
-        ptr as *const i8,
-        crate::mem::align_of::<u8>() as i32,
-    ));
-    simd_shuffle!(
-        ret_val,
-        ret_val,
-        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-    )
-}
-#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))]
-pub unsafe fn vld1_u16(ptr: *const u16) -> uint16x4_t {
-    transmute(vld1_v4i16(
-        ptr as *const i8,
-        crate::mem::align_of::<u16>() as i32,
-    ))
+    const ALIGN: i32 = crate::mem::align_of::<u8>() as i32;
+    transmute(vld1q_v16i8::<ALIGN>(ptr as *const i8))
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))]
 pub unsafe fn vld1_u16(ptr: *const u16) -> uint16x4_t {
-    let ret_val: uint16x4_t = transmute(vld1_v4i16(
-        ptr as *const i8,
-        crate::mem::align_of::<u16>() as i32,
-    ));
-    simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    const ALIGN: i32 = crate::mem::align_of::<u16>() as i32;
+    transmute(vld1_v4i16::<ALIGN>(ptr as *const i8))
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))]
 pub unsafe fn vld1q_u16(ptr: *const u16) -> uint16x8_t {
-    transmute(vld1q_v8i16(
-        ptr as *const i8,
-        crate::mem::align_of::<u16>() as i32,
-    ))
-}
-#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))]
-pub unsafe fn vld1q_u16(ptr: *const u16) -> uint16x8_t {
-    let ret_val: uint16x8_t = transmute(vld1q_v8i16(
-        ptr as *const i8,
-        crate::mem::align_of::<u16>() as i32,
-    ));
-    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    const ALIGN: i32 = crate::mem::align_of::<u16>() as i32;
+    transmute(vld1q_v8i16::<ALIGN>(ptr as *const i8))
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))]
 pub unsafe fn vld1_u32(ptr: *const u32) -> uint32x2_t {
-    transmute(vld1_v2i32(
-        ptr as *const i8,
-        crate::mem::align_of::<u32>() as i32,
-    ))
-}
-#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))]
-pub unsafe fn vld1_u32(ptr: *const u32) -> uint32x2_t {
-    let ret_val: uint32x2_t = transmute(vld1_v2i32(
-        ptr as *const i8,
-        crate::mem::align_of::<u32>() as i32,
-    ));
-    simd_shuffle!(ret_val, ret_val, [1, 0])
+    const ALIGN: i32 = crate::mem::align_of::<u32>() as i32;
+    transmute(vld1_v2i32::<ALIGN>(ptr as *const i8))
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))]
 pub unsafe fn vld1q_u32(ptr: *const u32) -> uint32x4_t {
-    transmute(vld1q_v4i32(
-        ptr as *const i8,
-        crate::mem::align_of::<u32>() as i32,
-    ))
-}
-#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))]
-pub unsafe fn vld1q_u32(ptr: *const u32) -> uint32x4_t {
-    let ret_val: uint32x4_t = transmute(vld1q_v4i32(
-        ptr as *const i8,
-        crate::mem::align_of::<u32>() as i32,
-    ));
-    simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    const ALIGN: i32 = crate::mem::align_of::<u32>() as i32;
+    transmute(vld1q_v4i32::<ALIGN>(ptr as *const i8))
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))]
 pub unsafe fn vld1_u64(ptr: *const u64) -> uint64x1_t {
-    transmute(vld1_v1i64(
-        ptr as *const i8,
-        crate::mem::align_of::<u64>() as i32,
-    ))
-}
-#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.64"))]
-pub unsafe fn vld1q_u64(ptr: *const u64) -> uint64x2_t {
-    transmute(vld1q_v2i64(
-        ptr as *const i8,
-        crate::mem::align_of::<u64>() as i32,
-    ))
+    const ALIGN: i32 = crate::mem::align_of::<u64>() as i32;
+    transmute(vld1_v1i64::<ALIGN>(ptr as *const i8))
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.64"))]
 pub unsafe fn vld1q_u64(ptr: *const u64) -> uint64x2_t {
-    let ret_val: uint64x2_t = transmute(vld1q_v2i64(
-        ptr as *const i8,
-        crate::mem::align_of::<u64>() as i32,
-    ));
-    simd_shuffle!(ret_val, ret_val, [1, 0])
+    const ALIGN: i32 = crate::mem::align_of::<u64>() as i32;
+    transmute(vld1q_v2i64::<ALIGN>(ptr as *const i8))
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))]
 pub unsafe fn vld1_p8(ptr: *const p8) -> poly8x8_t {
-    transmute(vld1_v8i8(
-        ptr as *const i8,
-        crate::mem::align_of::<p8>() as i32,
-    ))
-}
-#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))]
-pub unsafe fn vld1_p8(ptr: *const p8) -> poly8x8_t {
-    let ret_val: poly8x8_t = transmute(vld1_v8i8(
-        ptr as *const i8,
-        crate::mem::align_of::<p8>() as i32,
-    ));
-    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    const ALIGN: i32 = crate::mem::align_of::<p8>() as i32;
+    transmute(vld1_v8i8::<ALIGN>(ptr as *const i8))
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))]
-pub unsafe fn vld1q_p8(ptr: *const p8) -> poly8x16_t {
-    transmute(vld1q_v16i8(
-        ptr as *const i8,
-        crate::mem::align_of::<p8>() as i32,
-    ))
-}
-#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[cfg(target_endian = "big")]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))]
 pub unsafe fn vld1q_p8(ptr: *const p8) -> poly8x16_t {
-    let ret_val: poly8x16_t = transmute(vld1q_v16i8(
-        ptr as *const i8,
-        crate::mem::align_of::<p8>() as i32,
-    ));
-    simd_shuffle!(
-        ret_val,
-        ret_val,
-        [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-    )
+    const ALIGN: i32 = crate::mem::align_of::<p8>() as i32;
+    transmute(vld1q_v16i8::<ALIGN>(ptr as *const i8))
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))]
 pub unsafe fn vld1_p16(ptr: *const p16) -> poly16x4_t {
-    transmute(vld1_v4i16(
-        ptr as *const i8,
-        crate::mem::align_of::<p16>() as i32,
-    ))
-}
-#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))]
-pub unsafe fn vld1_p16(ptr: *const p16) -> poly16x4_t {
-    let ret_val: poly16x4_t = transmute(vld1_v4i16(
-        ptr as *const i8,
-        crate::mem::align_of::<p16>() as i32,
-    ));
-    simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    const ALIGN: i32 = crate::mem::align_of::<p16>() as i32;
+    transmute(vld1_v4i16::<ALIGN>(ptr as *const i8))
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))]
 pub unsafe fn vld1q_p16(ptr: *const p16) -> poly16x8_t {
-    transmute(vld1q_v8i16(
-        ptr as *const i8,
-        crate::mem::align_of::<p16>() as i32,
-    ))
-}
-#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))]
-pub unsafe fn vld1q_p16(ptr: *const p16) -> poly16x8_t {
-    let ret_val: poly16x8_t = transmute(vld1q_v8i16(
-        ptr as *const i8,
-        crate::mem::align_of::<p16>() as i32,
-    ));
-    simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    const ALIGN: i32 = crate::mem::align_of::<p16>() as i32;
+    transmute(vld1q_v8i16::<ALIGN>(ptr as *const i8))
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,aes")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.64"))]
 pub unsafe fn vld1q_p64(ptr: *const p64) -> poly64x2_t {
-    transmute(vld1q_v2i64(
-        ptr as *const i8,
-        crate::mem::align_of::<p64>() as i32,
-    ))
-}
-#[doc = "Load multiple single-element structures to one, two, three, or four registers."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,aes")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.64"))]
-pub unsafe fn vld1q_p64(ptr: *const p64) -> poly64x2_t {
-    let ret_val: poly64x2_t = transmute(vld1q_v2i64(
-        ptr as *const i8,
-        crate::mem::align_of::<p64>() as i32,
-    ));
-    simd_shuffle!(ret_val, ret_val, [1, 0])
+    const ALIGN: i32 = crate::mem::align_of::<p64>() as i32;
+    transmute(vld1q_v2i64::<ALIGN>(ptr as *const i8))
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f32_x2)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -15638,27 +15265,19 @@ pub unsafe fn vld1q_p64(ptr: *const p64) -> poly64x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vld1_f32_x2(a: *const f32) -> float32x2x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld1x2.v2f32.p0"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v2f32.p0")]
-        fn _vld1_f32_x2(a: *const f32) -> float32x2x2_t;
-    }
-    _vld1_f32_x2(a)
+    crate::ptr::read_unaligned(a.cast())
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f32_x3)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -15669,27 +15288,19 @@ pub unsafe fn vld1_f32_x2(a: *const f32) -> float32x2x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vld1_f32_x3(a: *const f32) -> float32x2x3_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld1x3.v2f32.p0"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v2f32.p0")]
-        fn _vld1_f32_x3(a: *const f32) -> float32x2x3_t;
-    }
-    _vld1_f32_x3(a)
+    crate::ptr::read_unaligned(a.cast())
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f32_x4)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -15700,27 +15311,19 @@ pub unsafe fn vld1_f32_x3(a: *const f32) -> float32x2x3_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vld1_f32_x4(a: *const f32) -> float32x2x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld1x4.v2f32.p0"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v2f32.p0")]
-        fn _vld1_f32_x4(a: *const f32) -> float32x2x4_t;
-    }
-    _vld1_f32_x4(a)
+    crate::ptr::read_unaligned(a.cast())
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f32_x2)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -15731,27 +15334,19 @@ pub unsafe fn vld1_f32_x4(a: *const f32) -> float32x2x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vld1q_f32_x2(a: *const f32) -> float32x4x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld1x2.v4f32.p0"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v4f32.p0")]
-        fn _vld1q_f32_x2(a: *const f32) -> float32x4x2_t;
-    }
-    _vld1q_f32_x2(a)
+    crate::ptr::read_unaligned(a.cast())
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f32_x3)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -15762,27 +15357,19 @@ pub unsafe fn vld1q_f32_x2(a: *const f32) -> float32x4x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vld1q_f32_x3(a: *const f32) -> float32x4x3_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld1x3.v4f32.p0"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v4f32.p0")]
-        fn _vld1q_f32_x3(a: *const f32) -> float32x4x3_t;
-    }
-    _vld1q_f32_x3(a)
+    crate::ptr::read_unaligned(a.cast())
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f32_x4)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -15793,21 +15380,14 @@ pub unsafe fn vld1q_f32_x3(a: *const f32) -> float32x4x3_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vld1q_f32_x4(a: *const f32) -> float32x4x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld1x4.v4f32.p0"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v4f32.p0")]
-        fn _vld1q_f32_x4(a: *const f32) -> float32x4x4_t;
-    }
-    _vld1q_f32_x4(a)
+    crate::ptr::read_unaligned(a.cast())
 }
 #[doc = "Load one single-element structure to one lane of one register"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_f16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
+#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1, LANE = 0))]
 #[cfg_attr(
@@ -15815,7 +15395,7 @@ pub unsafe fn vld1q_f32_x4(a: *const f32) -> float32x4x4_t {
     assert_instr(ld1, LANE = 0)
 )]
 #[rustc_legacy_const_generics(2)]
-#[target_feature(enable = "neon,fp16")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vld1_lane_f16<const LANE: i32>(ptr: *const f16, src: float16x4_t) -> float16x4_t {
@@ -15825,8 +15405,9 @@ pub unsafe fn vld1_lane_f16<const LANE: i32>(ptr: *const f16, src: float16x4_t)
 #[doc = "Load one single-element structure to one lane of one register"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_f16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
+#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1, LANE = 0))]
 #[cfg_attr(
@@ -15834,7 +15415,7 @@ pub unsafe fn vld1_lane_f16<const LANE: i32>(ptr: *const f16, src: float16x4_t)
     assert_instr(ld1, LANE = 0)
 )]
 #[rustc_legacy_const_generics(2)]
-#[target_feature(enable = "neon,fp16")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vld1q_lane_f16<const LANE: i32>(ptr: *const f16, src: float16x8_t) -> float16x8_t {
@@ -15844,7 +15425,7 @@ pub unsafe fn vld1q_lane_f16<const LANE: i32>(ptr: *const f16, src: float16x8_t)
 #[doc = "Load one single-element structure to one lane of one register."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_f32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -15869,7 +15450,7 @@ pub unsafe fn vld1_lane_f32<const LANE: i32>(ptr: *const f32, src: float32x2_t)
 #[doc = "Load one single-element structure to one lane of one register."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_p16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -15894,7 +15475,7 @@ pub unsafe fn vld1_lane_p16<const LANE: i32>(ptr: *const p16, src: poly16x4_t) -
 #[doc = "Load one single-element structure to one lane of one register."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_p8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -15919,7 +15500,7 @@ pub unsafe fn vld1_lane_p8<const LANE: i32>(ptr: *const p8, src: poly8x8_t) -> p
 #[doc = "Load one single-element structure to one lane of one register."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_s16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -15944,7 +15525,7 @@ pub unsafe fn vld1_lane_s16<const LANE: i32>(ptr: *const i16, src: int16x4_t) ->
 #[doc = "Load one single-element structure to one lane of one register."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_s32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -15969,7 +15550,7 @@ pub unsafe fn vld1_lane_s32<const LANE: i32>(ptr: *const i32, src: int32x2_t) ->
 #[doc = "Load one single-element structure to one lane of one register."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_s64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -15994,7 +15575,7 @@ pub unsafe fn vld1_lane_s64<const LANE: i32>(ptr: *const i64, src: int64x1_t) ->
 #[doc = "Load one single-element structure to one lane of one register."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_s8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -16019,7 +15600,7 @@ pub unsafe fn vld1_lane_s8<const LANE: i32>(ptr: *const i8, src: int8x8_t) -> in
 #[doc = "Load one single-element structure to one lane of one register."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_u16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -16044,7 +15625,7 @@ pub unsafe fn vld1_lane_u16<const LANE: i32>(ptr: *const u16, src: uint16x4_t) -
 #[doc = "Load one single-element structure to one lane of one register."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_u32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -16069,7 +15650,7 @@ pub unsafe fn vld1_lane_u32<const LANE: i32>(ptr: *const u32, src: uint32x2_t) -
 #[doc = "Load one single-element structure to one lane of one register."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_u64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -16094,7 +15675,7 @@ pub unsafe fn vld1_lane_u64<const LANE: i32>(ptr: *const u64, src: uint64x1_t) -
 #[doc = "Load one single-element structure to one lane of one register."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_u8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -16119,7 +15700,7 @@ pub unsafe fn vld1_lane_u8<const LANE: i32>(ptr: *const u8, src: uint8x8_t) -> u
 #[doc = "Load one single-element structure to one lane of one register."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_f32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -16144,7 +15725,7 @@ pub unsafe fn vld1q_lane_f32<const LANE: i32>(ptr: *const f32, src: float32x4_t)
 #[doc = "Load one single-element structure to one lane of one register."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_p16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -16169,7 +15750,7 @@ pub unsafe fn vld1q_lane_p16<const LANE: i32>(ptr: *const p16, src: poly16x8_t)
 #[doc = "Load one single-element structure to one lane of one register."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_p8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -16194,7 +15775,7 @@ pub unsafe fn vld1q_lane_p8<const LANE: i32>(ptr: *const p8, src: poly8x16_t) ->
 #[doc = "Load one single-element structure to one lane of one register."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_s16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -16219,7 +15800,7 @@ pub unsafe fn vld1q_lane_s16<const LANE: i32>(ptr: *const i16, src: int16x8_t) -
 #[doc = "Load one single-element structure to one lane of one register."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_s32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -16244,7 +15825,7 @@ pub unsafe fn vld1q_lane_s32<const LANE: i32>(ptr: *const i32, src: int32x4_t) -
 #[doc = "Load one single-element structure to one lane of one register."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_s64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -16269,7 +15850,7 @@ pub unsafe fn vld1q_lane_s64<const LANE: i32>(ptr: *const i64, src: int64x2_t) -
 #[doc = "Load one single-element structure to one lane of one register."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_s8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -16294,7 +15875,7 @@ pub unsafe fn vld1q_lane_s8<const LANE: i32>(ptr: *const i8, src: int8x16_t) ->
 #[doc = "Load one single-element structure to one lane of one register."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_u16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -16319,7 +15900,7 @@ pub unsafe fn vld1q_lane_u16<const LANE: i32>(ptr: *const u16, src: uint16x8_t)
 #[doc = "Load one single-element structure to one lane of one register."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_u32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -16344,7 +15925,7 @@ pub unsafe fn vld1q_lane_u32<const LANE: i32>(ptr: *const u32, src: uint32x4_t)
 #[doc = "Load one single-element structure to one lane of one register."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_u64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -16369,7 +15950,7 @@ pub unsafe fn vld1q_lane_u64<const LANE: i32>(ptr: *const u64, src: uint64x2_t)
 #[doc = "Load one single-element structure to one lane of one register."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_u8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -16394,7 +15975,7 @@ pub unsafe fn vld1q_lane_u8<const LANE: i32>(ptr: *const u8, src: uint8x16_t) ->
 #[doc = "Load one single-element structure to one lane of one register."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_lane_p64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -16419,7 +16000,7 @@ pub unsafe fn vld1_lane_p64<const LANE: i32>(ptr: *const p64, src: poly64x1_t) -
 #[doc = "Load one single-element structure to one lane of one register."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_lane_p64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -16444,7 +16025,7 @@ pub unsafe fn vld1q_lane_p64<const LANE: i32>(ptr: *const p64, src: poly64x2_t)
 #[doc = "Load multiple single-element structures to one, two, three, or four registers."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,aes")]
@@ -16462,14 +16043,14 @@ pub unsafe fn vld1_p64(ptr: *const p64) -> poly64x1_t {
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p64_x2)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -16480,19 +16061,19 @@ pub unsafe fn vld1_p64(ptr: *const p64) -> poly64x1_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vld1_p64_x2(a: *const p64) -> poly64x1x2_t {
-    transmute(vld1_s64_x2(transmute(a)))
+    crate::ptr::read_unaligned(a.cast())
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p64_x3)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -16503,19 +16084,19 @@ pub unsafe fn vld1_p64_x2(a: *const p64) -> poly64x1x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vld1_p64_x3(a: *const p64) -> poly64x1x3_t {
-    transmute(vld1_s64_x3(transmute(a)))
+    crate::ptr::read_unaligned(a.cast())
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p64_x4)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -16526,44 +16107,19 @@ pub unsafe fn vld1_p64_x3(a: *const p64) -> poly64x1x3_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vld1_p64_x4(a: *const p64) -> poly64x1x4_t {
-    transmute(vld1_s64_x4(transmute(a)))
-}
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p64_x2)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld1q_p64_x2(a: *const p64) -> poly64x2x2_t {
-    transmute(vld1q_s64_x2(transmute(a)))
+    crate::ptr::read_unaligned(a.cast())
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p64_x2)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -16574,47 +16130,19 @@ pub unsafe fn vld1q_p64_x2(a: *const p64) -> poly64x2x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vld1q_p64_x2(a: *const p64) -> poly64x2x2_t {
-    let mut ret_val: poly64x2x2_t = transmute(vld1q_s64_x2(transmute(a)));
-    ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [1, 0]) };
-    ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [1, 0]) };
-    ret_val
-}
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p64_x3)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld1q_p64_x3(a: *const p64) -> poly64x2x3_t {
-    transmute(vld1q_s64_x3(transmute(a)))
+    crate::ptr::read_unaligned(a.cast())
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p64_x3)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -16625,48 +16153,19 @@ pub unsafe fn vld1q_p64_x3(a: *const p64) -> poly64x2x3_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vld1q_p64_x3(a: *const p64) -> poly64x2x3_t {
-    let mut ret_val: poly64x2x3_t = transmute(vld1q_s64_x3(transmute(a)));
-    ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [1, 0]) };
-    ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [1, 0]) };
-    ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [1, 0]) };
-    ret_val
-}
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p64_x4)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld1q_p64_x4(a: *const p64) -> poly64x2x4_t {
-    transmute(vld1q_s64_x4(transmute(a)))
+    crate::ptr::read_unaligned(a.cast())
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p64_x4)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -16677,120 +16176,123 @@ pub unsafe fn vld1q_p64_x4(a: *const p64) -> poly64x2x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vld1q_p64_x4(a: *const p64) -> poly64x2x4_t {
-    let mut ret_val: poly64x2x4_t = transmute(vld1q_s64_x4(transmute(a)));
-    ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [1, 0]) };
-    ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [1, 0]) };
-    ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [1, 0]) };
-    ret_val.3 = unsafe { simd_shuffle!(ret_val.3, ret_val.3, [1, 0]) };
-    ret_val
+    crate::ptr::read_unaligned(a.cast())
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))]
 pub unsafe fn vld1_s8(ptr: *const i8) -> int8x8_t {
-    vld1_v8i8(ptr as *const i8, crate::mem::align_of::<i8>() as i32)
+    const ALIGN: i32 = crate::mem::align_of::<i8>() as i32;
+    vld1_v8i8::<ALIGN>(ptr as *const i8)
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))]
 pub unsafe fn vld1q_s8(ptr: *const i8) -> int8x16_t {
-    vld1q_v16i8(ptr as *const i8, crate::mem::align_of::<i8>() as i32)
+    const ALIGN: i32 = crate::mem::align_of::<i8>() as i32;
+    vld1q_v16i8::<ALIGN>(ptr as *const i8)
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))]
 pub unsafe fn vld1_s16(ptr: *const i16) -> int16x4_t {
-    vld1_v4i16(ptr as *const i8, crate::mem::align_of::<i16>() as i32)
+    const ALIGN: i32 = crate::mem::align_of::<i16>() as i32;
+    vld1_v4i16::<ALIGN>(ptr as *const i8)
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))]
 pub unsafe fn vld1q_s16(ptr: *const i16) -> int16x8_t {
-    vld1q_v8i16(ptr as *const i8, crate::mem::align_of::<i16>() as i32)
+    const ALIGN: i32 = crate::mem::align_of::<i16>() as i32;
+    vld1q_v8i16::<ALIGN>(ptr as *const i8)
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))]
 pub unsafe fn vld1_s32(ptr: *const i32) -> int32x2_t {
-    vld1_v2i32(ptr as *const i8, crate::mem::align_of::<i32>() as i32)
+    const ALIGN: i32 = crate::mem::align_of::<i32>() as i32;
+    vld1_v2i32::<ALIGN>(ptr as *const i8)
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))]
 pub unsafe fn vld1q_s32(ptr: *const i32) -> int32x4_t {
-    vld1q_v4i32(ptr as *const i8, crate::mem::align_of::<i32>() as i32)
+    const ALIGN: i32 = crate::mem::align_of::<i32>() as i32;
+    vld1q_v4i32::<ALIGN>(ptr as *const i8)
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))]
 pub unsafe fn vld1_s64(ptr: *const i64) -> int64x1_t {
-    vld1_v1i64(ptr as *const i8, crate::mem::align_of::<i64>() as i32)
+    const ALIGN: i32 = crate::mem::align_of::<i64>() as i32;
+    vld1_v1i64::<ALIGN>(ptr as *const i8)
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.64"))]
 pub unsafe fn vld1q_s64(ptr: *const i64) -> int64x2_t {
-    vld1q_v2i64(ptr as *const i8, crate::mem::align_of::<i64>() as i32)
+    const ALIGN: i32 = crate::mem::align_of::<i64>() as i32;
+    vld1q_v2i64::<ALIGN>(ptr as *const i8)
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s8_x2)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -16801,27 +16303,19 @@ pub unsafe fn vld1q_s64(ptr: *const i64) -> int64x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vld1_s8_x2(a: *const i8) -> int8x8x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld1x2.v8i8.p0"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v8i8.p0")]
-        fn _vld1_s8_x2(a: *const i8) -> int8x8x2_t;
-    }
-    _vld1_s8_x2(a)
+    crate::ptr::read_unaligned(a.cast())
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s8_x3)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -16832,27 +16326,19 @@ pub unsafe fn vld1_s8_x2(a: *const i8) -> int8x8x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vld1_s8_x3(a: *const i8) -> int8x8x3_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld1x3.v8i8.p0"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v8i8.p0")]
-        fn _vld1_s8_x3(a: *const i8) -> int8x8x3_t;
-    }
-    _vld1_s8_x3(a)
+    crate::ptr::read_unaligned(a.cast())
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s8_x4)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -16863,27 +16349,19 @@ pub unsafe fn vld1_s8_x3(a: *const i8) -> int8x8x3_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vld1_s8_x4(a: *const i8) -> int8x8x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld1x4.v8i8.p0"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v8i8.p0")]
-        fn _vld1_s8_x4(a: *const i8) -> int8x8x4_t;
-    }
-    _vld1_s8_x4(a)
+    crate::ptr::read_unaligned(a.cast())
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s8_x2)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -16894,27 +16372,19 @@ pub unsafe fn vld1_s8_x4(a: *const i8) -> int8x8x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vld1q_s8_x2(a: *const i8) -> int8x16x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld1x2.v16i8.p0"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v16i8.p0")]
-        fn _vld1q_s8_x2(a: *const i8) -> int8x16x2_t;
-    }
-    _vld1q_s8_x2(a)
+    crate::ptr::read_unaligned(a.cast())
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s8_x3)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -16925,27 +16395,19 @@ pub unsafe fn vld1q_s8_x2(a: *const i8) -> int8x16x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vld1q_s8_x3(a: *const i8) -> int8x16x3_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld1x3.v16i8.p0"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v16i8.p0")]
-        fn _vld1q_s8_x3(a: *const i8) -> int8x16x3_t;
-    }
-    _vld1q_s8_x3(a)
+    crate::ptr::read_unaligned(a.cast())
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s8_x4)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -16956,27 +16418,19 @@ pub unsafe fn vld1q_s8_x3(a: *const i8) -> int8x16x3_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vld1q_s8_x4(a: *const i8) -> int8x16x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld1x4.v16i8.p0"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v16i8.p0")]
-        fn _vld1q_s8_x4(a: *const i8) -> int8x16x4_t;
-    }
-    _vld1q_s8_x4(a)
+    crate::ptr::read_unaligned(a.cast())
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s16_x2)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -16987,27 +16441,19 @@ pub unsafe fn vld1q_s8_x4(a: *const i8) -> int8x16x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vld1_s16_x2(a: *const i16) -> int16x4x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld1x2.v4i16.p0"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v4i16.p0")]
-        fn _vld1_s16_x2(a: *const i16) -> int16x4x2_t;
-    }
-    _vld1_s16_x2(a)
+    crate::ptr::read_unaligned(a.cast())
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s16_x3)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -17018,27 +16464,19 @@ pub unsafe fn vld1_s16_x2(a: *const i16) -> int16x4x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vld1_s16_x3(a: *const i16) -> int16x4x3_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld1x3.v4i16.p0"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v4i16.p0")]
-        fn _vld1_s16_x3(a: *const i16) -> int16x4x3_t;
-    }
-    _vld1_s16_x3(a)
+    crate::ptr::read_unaligned(a.cast())
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s16_x4)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -17049,27 +16487,19 @@ pub unsafe fn vld1_s16_x3(a: *const i16) -> int16x4x3_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vld1_s16_x4(a: *const i16) -> int16x4x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld1x4.v4i16.p0"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v4i16.p0")]
-        fn _vld1_s16_x4(a: *const i16) -> int16x4x4_t;
-    }
-    _vld1_s16_x4(a)
+    crate::ptr::read_unaligned(a.cast())
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s16_x2)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -17080,27 +16510,19 @@ pub unsafe fn vld1_s16_x4(a: *const i16) -> int16x4x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vld1q_s16_x2(a: *const i16) -> int16x8x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld1x2.v8i16.p0"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v8i16.p0")]
-        fn _vld1q_s16_x2(a: *const i16) -> int16x8x2_t;
-    }
-    _vld1q_s16_x2(a)
+    crate::ptr::read_unaligned(a.cast())
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s16_x3)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -17111,27 +16533,19 @@ pub unsafe fn vld1q_s16_x2(a: *const i16) -> int16x8x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vld1q_s16_x3(a: *const i16) -> int16x8x3_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld1x3.v8i16.p0"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v8i16.p0")]
-        fn _vld1q_s16_x3(a: *const i16) -> int16x8x3_t;
-    }
-    _vld1q_s16_x3(a)
+    crate::ptr::read_unaligned(a.cast())
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s16_x4)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -17142,27 +16556,19 @@ pub unsafe fn vld1q_s16_x3(a: *const i16) -> int16x8x3_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vld1q_s16_x4(a: *const i16) -> int16x8x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld1x4.v8i16.p0"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v8i16.p0")]
-        fn _vld1q_s16_x4(a: *const i16) -> int16x8x4_t;
-    }
-    _vld1q_s16_x4(a)
+    crate::ptr::read_unaligned(a.cast())
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s32_x2)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -17173,27 +16579,19 @@ pub unsafe fn vld1q_s16_x4(a: *const i16) -> int16x8x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vld1_s32_x2(a: *const i32) -> int32x2x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld1x2.v2i32.p0"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v2i32.p0")]
-        fn _vld1_s32_x2(a: *const i32) -> int32x2x2_t;
-    }
-    _vld1_s32_x2(a)
+    crate::ptr::read_unaligned(a.cast())
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s32_x3)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -17204,27 +16602,19 @@ pub unsafe fn vld1_s32_x2(a: *const i32) -> int32x2x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vld1_s32_x3(a: *const i32) -> int32x2x3_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld1x3.v2i32.p0"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v2i32.p0")]
-        fn _vld1_s32_x3(a: *const i32) -> int32x2x3_t;
-    }
-    _vld1_s32_x3(a)
+    crate::ptr::read_unaligned(a.cast())
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s32_x4)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -17235,27 +16625,19 @@ pub unsafe fn vld1_s32_x3(a: *const i32) -> int32x2x3_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vld1_s32_x4(a: *const i32) -> int32x2x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld1x4.v2i32.p0"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v2i32.p0")]
-        fn _vld1_s32_x4(a: *const i32) -> int32x2x4_t;
-    }
-    _vld1_s32_x4(a)
+    crate::ptr::read_unaligned(a.cast())
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s32_x2)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -17266,27 +16648,19 @@ pub unsafe fn vld1_s32_x4(a: *const i32) -> int32x2x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vld1q_s32_x2(a: *const i32) -> int32x4x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld1x2.v4i32.p0"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v4i32.p0")]
-        fn _vld1q_s32_x2(a: *const i32) -> int32x4x2_t;
-    }
-    _vld1q_s32_x2(a)
+    crate::ptr::read_unaligned(a.cast())
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s32_x3)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -17297,27 +16671,19 @@ pub unsafe fn vld1q_s32_x2(a: *const i32) -> int32x4x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vld1q_s32_x3(a: *const i32) -> int32x4x3_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld1x3.v4i32.p0"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v4i32.p0")]
-        fn _vld1q_s32_x3(a: *const i32) -> int32x4x3_t;
-    }
-    _vld1q_s32_x3(a)
+    crate::ptr::read_unaligned(a.cast())
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s32_x4)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -17328,27 +16694,19 @@ pub unsafe fn vld1q_s32_x3(a: *const i32) -> int32x4x3_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vld1q_s32_x4(a: *const i32) -> int32x4x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld1x4.v4i32.p0"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v4i32.p0")]
-        fn _vld1q_s32_x4(a: *const i32) -> int32x4x4_t;
-    }
-    _vld1q_s32_x4(a)
+    crate::ptr::read_unaligned(a.cast())
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s64_x2)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -17359,27 +16717,19 @@ pub unsafe fn vld1q_s32_x4(a: *const i32) -> int32x4x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vld1_s64_x2(a: *const i64) -> int64x1x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld1x2.v1i64.p0"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v1i64.p0")]
-        fn _vld1_s64_x2(a: *const i64) -> int64x1x2_t;
-    }
-    _vld1_s64_x2(a)
+    crate::ptr::read_unaligned(a.cast())
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s64_x3)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -17390,27 +16740,19 @@ pub unsafe fn vld1_s64_x2(a: *const i64) -> int64x1x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vld1_s64_x3(a: *const i64) -> int64x1x3_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld1x3.v1i64.p0"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v1i64.p0")]
-        fn _vld1_s64_x3(a: *const i64) -> int64x1x3_t;
-    }
-    _vld1_s64_x3(a)
+    crate::ptr::read_unaligned(a.cast())
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s64_x4)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -17421,27 +16763,19 @@ pub unsafe fn vld1_s64_x3(a: *const i64) -> int64x1x3_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vld1_s64_x4(a: *const i64) -> int64x1x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld1x4.v1i64.p0"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v1i64.p0")]
-        fn _vld1_s64_x4(a: *const i64) -> int64x1x4_t;
-    }
-    _vld1_s64_x4(a)
+    crate::ptr::read_unaligned(a.cast())
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s64_x2)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -17452,27 +16786,19 @@ pub unsafe fn vld1_s64_x4(a: *const i64) -> int64x1x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vld1q_s64_x2(a: *const i64) -> int64x2x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld1x2.v2i64.p0"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v2i64.p0")]
-        fn _vld1q_s64_x2(a: *const i64) -> int64x2x2_t;
-    }
-    _vld1q_s64_x2(a)
+    crate::ptr::read_unaligned(a.cast())
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s64_x3)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -17483,27 +16809,19 @@ pub unsafe fn vld1q_s64_x2(a: *const i64) -> int64x2x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vld1q_s64_x3(a: *const i64) -> int64x2x3_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld1x3.v2i64.p0"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v2i64.p0")]
-        fn _vld1q_s64_x3(a: *const i64) -> int64x2x3_t;
-    }
-    _vld1q_s64_x3(a)
+    crate::ptr::read_unaligned(a.cast())
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s64_x4)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -17514,52 +16832,19 @@ pub unsafe fn vld1q_s64_x3(a: *const i64) -> int64x2x3_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vld1q_s64_x4(a: *const i64) -> int64x2x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld1x4.v2i64.p0"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v2i64.p0")]
-        fn _vld1q_s64_x4(a: *const i64) -> int64x2x4_t;
-    }
-    _vld1q_s64_x4(a)
-}
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u8_x2)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld1_u8_x2(a: *const u8) -> uint8x8x2_t {
-    transmute(vld1_s8_x2(transmute(a)))
+    crate::ptr::read_unaligned(a.cast())
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u8_x2)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -17570,47 +16855,19 @@ pub unsafe fn vld1_u8_x2(a: *const u8) -> uint8x8x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vld1_u8_x2(a: *const u8) -> uint8x8x2_t {
-    let mut ret_val: uint8x8x2_t = transmute(vld1_s8_x2(transmute(a)));
-    ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val
-}
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u8_x3)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld1_u8_x3(a: *const u8) -> uint8x8x3_t {
-    transmute(vld1_s8_x3(transmute(a)))
+    crate::ptr::read_unaligned(a.cast())
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u8_x3)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -17621,48 +16878,19 @@ pub unsafe fn vld1_u8_x3(a: *const u8) -> uint8x8x3_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vld1_u8_x3(a: *const u8) -> uint8x8x3_t {
-    let mut ret_val: uint8x8x3_t = transmute(vld1_s8_x3(transmute(a)));
-    ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val
-}
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u8_x4)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld1_u8_x4(a: *const u8) -> uint8x8x4_t {
-    transmute(vld1_s8_x4(transmute(a)))
+    crate::ptr::read_unaligned(a.cast())
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u8_x4)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -17673,25 +16901,19 @@ pub unsafe fn vld1_u8_x4(a: *const u8) -> uint8x8x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vld1_u8_x4(a: *const u8) -> uint8x8x4_t {
-    let mut ret_val: uint8x8x4_t = transmute(vld1_s8_x4(transmute(a)));
-    ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val.3 = unsafe { simd_shuffle!(ret_val.3, ret_val.3, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val
+    crate::ptr::read_unaligned(a.cast())
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u8_x2)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -17702,20 +16924,19 @@ pub unsafe fn vld1_u8_x4(a: *const u8) -> uint8x8x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub unsafe fn vld1q_u8_x2(a: *const u8) -> uint8x16x2_t {
-    transmute(vld1q_s8_x2(transmute(a)))
+    crate::ptr::read_unaligned(a.cast())
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u8_x2)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u8_x3)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -17725,36 +16946,20 @@ pub unsafe fn vld1q_u8_x2(a: *const u8) -> uint8x16x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_u8_x2(a: *const u8) -> uint8x16x2_t {
-    let mut ret_val: uint8x16x2_t = transmute(vld1q_s8_x2(transmute(a)));
-    ret_val.0 = unsafe {
-        simd_shuffle!(
-            ret_val.0,
-            ret_val.0,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    ret_val.1 = unsafe {
-        simd_shuffle!(
-            ret_val.1,
-            ret_val.1,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    ret_val
+pub unsafe fn vld1q_u8_x3(a: *const u8) -> uint8x16x3_t {
+    crate::ptr::read_unaligned(a.cast())
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u8_x3)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u8_x4)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -17764,21 +16969,20 @@ pub unsafe fn vld1q_u8_x2(a: *const u8) -> uint8x16x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_u8_x3(a: *const u8) -> uint8x16x3_t {
-    transmute(vld1q_s8_x3(transmute(a)))
+pub unsafe fn vld1q_u8_x4(a: *const u8) -> uint8x16x4_t {
+    crate::ptr::read_unaligned(a.cast())
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u8_x3)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u16_x2)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -17788,43 +16992,20 @@ pub unsafe fn vld1q_u8_x3(a: *const u8) -> uint8x16x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_u8_x3(a: *const u8) -> uint8x16x3_t {
-    let mut ret_val: uint8x16x3_t = transmute(vld1q_s8_x3(transmute(a)));
-    ret_val.0 = unsafe {
-        simd_shuffle!(
-            ret_val.0,
-            ret_val.0,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    ret_val.1 = unsafe {
-        simd_shuffle!(
-            ret_val.1,
-            ret_val.1,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    ret_val.2 = unsafe {
-        simd_shuffle!(
-            ret_val.2,
-            ret_val.2,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    ret_val
+pub unsafe fn vld1_u16_x2(a: *const u16) -> uint16x4x2_t {
+    crate::ptr::read_unaligned(a.cast())
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u8_x4)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u16_x3)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -17834,21 +17015,20 @@ pub unsafe fn vld1q_u8_x3(a: *const u8) -> uint8x16x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_u8_x4(a: *const u8) -> uint8x16x4_t {
-    transmute(vld1q_s8_x4(transmute(a)))
+pub unsafe fn vld1_u16_x3(a: *const u16) -> uint16x4x3_t {
+    crate::ptr::read_unaligned(a.cast())
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u8_x4)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u16_x4)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -17858,50 +17038,20 @@ pub unsafe fn vld1q_u8_x4(a: *const u8) -> uint8x16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_u8_x4(a: *const u8) -> uint8x16x4_t {
-    let mut ret_val: uint8x16x4_t = transmute(vld1q_s8_x4(transmute(a)));
-    ret_val.0 = unsafe {
-        simd_shuffle!(
-            ret_val.0,
-            ret_val.0,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    ret_val.1 = unsafe {
-        simd_shuffle!(
-            ret_val.1,
-            ret_val.1,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    ret_val.2 = unsafe {
-        simd_shuffle!(
-            ret_val.2,
-            ret_val.2,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    ret_val.3 = unsafe {
-        simd_shuffle!(
-            ret_val.3,
-            ret_val.3,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    ret_val
+pub unsafe fn vld1_u16_x4(a: *const u16) -> uint16x4x4_t {
+    crate::ptr::read_unaligned(a.cast())
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u16_x2)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u16_x2)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -17911,21 +17061,20 @@ pub unsafe fn vld1q_u8_x4(a: *const u8) -> uint8x16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_u16_x2(a: *const u16) -> uint16x4x2_t {
-    transmute(vld1_s16_x2(transmute(a)))
+pub unsafe fn vld1q_u16_x2(a: *const u16) -> uint16x8x2_t {
+    crate::ptr::read_unaligned(a.cast())
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u16_x2)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u16_x3)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -17935,24 +17084,20 @@ pub unsafe fn vld1_u16_x2(a: *const u16) -> uint16x4x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_u16_x2(a: *const u16) -> uint16x4x2_t {
-    let mut ret_val: uint16x4x2_t = transmute(vld1_s16_x2(transmute(a)));
-    ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]) };
-    ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]) };
-    ret_val
+pub unsafe fn vld1q_u16_x3(a: *const u16) -> uint16x8x3_t {
+    crate::ptr::read_unaligned(a.cast())
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u16_x3)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u16_x4)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -17962,21 +17107,20 @@ pub unsafe fn vld1_u16_x2(a: *const u16) -> uint16x4x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_u16_x3(a: *const u16) -> uint16x4x3_t {
-    transmute(vld1_s16_x3(transmute(a)))
+pub unsafe fn vld1q_u16_x4(a: *const u16) -> uint16x8x4_t {
+    crate::ptr::read_unaligned(a.cast())
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u16_x3)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u32_x2)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -17986,25 +17130,20 @@ pub unsafe fn vld1_u16_x3(a: *const u16) -> uint16x4x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_u16_x3(a: *const u16) -> uint16x4x3_t {
-    let mut ret_val: uint16x4x3_t = transmute(vld1_s16_x3(transmute(a)));
-    ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]) };
-    ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]) };
-    ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [3, 2, 1, 0]) };
-    ret_val
+pub unsafe fn vld1_u32_x2(a: *const u32) -> uint32x2x2_t {
+    crate::ptr::read_unaligned(a.cast())
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u16_x4)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u32_x3)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -18014,21 +17153,20 @@ pub unsafe fn vld1_u16_x3(a: *const u16) -> uint16x4x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_u16_x4(a: *const u16) -> uint16x4x4_t {
-    transmute(vld1_s16_x4(transmute(a)))
+pub unsafe fn vld1_u32_x3(a: *const u32) -> uint32x2x3_t {
+    crate::ptr::read_unaligned(a.cast())
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u16_x4)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u32_x4)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -18038,26 +17176,20 @@ pub unsafe fn vld1_u16_x4(a: *const u16) -> uint16x4x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_u16_x4(a: *const u16) -> uint16x4x4_t {
-    let mut ret_val: uint16x4x4_t = transmute(vld1_s16_x4(transmute(a)));
-    ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]) };
-    ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]) };
-    ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [3, 2, 1, 0]) };
-    ret_val.3 = unsafe { simd_shuffle!(ret_val.3, ret_val.3, [3, 2, 1, 0]) };
-    ret_val
+pub unsafe fn vld1_u32_x4(a: *const u32) -> uint32x2x4_t {
+    crate::ptr::read_unaligned(a.cast())
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u16_x2)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u32_x2)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -18067,21 +17199,20 @@ pub unsafe fn vld1_u16_x4(a: *const u16) -> uint16x4x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_u16_x2(a: *const u16) -> uint16x8x2_t {
-    transmute(vld1q_s16_x2(transmute(a)))
+pub unsafe fn vld1q_u32_x2(a: *const u32) -> uint32x4x2_t {
+    crate::ptr::read_unaligned(a.cast())
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u16_x2)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u32_x3)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -18091,24 +17222,20 @@ pub unsafe fn vld1q_u16_x2(a: *const u16) -> uint16x8x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_u16_x2(a: *const u16) -> uint16x8x2_t {
-    let mut ret_val: uint16x8x2_t = transmute(vld1q_s16_x2(transmute(a)));
-    ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val
+pub unsafe fn vld1q_u32_x3(a: *const u32) -> uint32x4x3_t {
+    crate::ptr::read_unaligned(a.cast())
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u16_x3)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u32_x4)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -18118,21 +17245,20 @@ pub unsafe fn vld1q_u16_x2(a: *const u16) -> uint16x8x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_u16_x3(a: *const u16) -> uint16x8x3_t {
-    transmute(vld1q_s16_x3(transmute(a)))
+pub unsafe fn vld1q_u32_x4(a: *const u32) -> uint32x4x4_t {
+    crate::ptr::read_unaligned(a.cast())
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u16_x3)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u64_x2)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -18142,25 +17268,20 @@ pub unsafe fn vld1q_u16_x3(a: *const u16) -> uint16x8x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_u16_x3(a: *const u16) -> uint16x8x3_t {
-    let mut ret_val: uint16x8x3_t = transmute(vld1q_s16_x3(transmute(a)));
-    ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val
+pub unsafe fn vld1_u64_x2(a: *const u64) -> uint64x1x2_t {
+    crate::ptr::read_unaligned(a.cast())
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u16_x4)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u64_x3)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -18170,21 +17291,20 @@ pub unsafe fn vld1q_u16_x3(a: *const u16) -> uint16x8x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_u16_x4(a: *const u16) -> uint16x8x4_t {
-    transmute(vld1q_s16_x4(transmute(a)))
+pub unsafe fn vld1_u64_x3(a: *const u64) -> uint64x1x3_t {
+    crate::ptr::read_unaligned(a.cast())
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u16_x4)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u64_x4)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -18194,26 +17314,20 @@ pub unsafe fn vld1q_u16_x4(a: *const u16) -> uint16x8x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_u16_x4(a: *const u16) -> uint16x8x4_t {
-    let mut ret_val: uint16x8x4_t = transmute(vld1q_s16_x4(transmute(a)));
-    ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val.3 = unsafe { simd_shuffle!(ret_val.3, ret_val.3, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val
+pub unsafe fn vld1_u64_x4(a: *const u64) -> uint64x1x4_t {
+    crate::ptr::read_unaligned(a.cast())
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u32_x2)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u64_x2)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -18223,21 +17337,20 @@ pub unsafe fn vld1q_u16_x4(a: *const u16) -> uint16x8x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_u32_x2(a: *const u32) -> uint32x2x2_t {
-    transmute(vld1_s32_x2(transmute(a)))
+pub unsafe fn vld1q_u64_x2(a: *const u64) -> uint64x2x2_t {
+    crate::ptr::read_unaligned(a.cast())
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u32_x2)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u64_x3)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -18247,24 +17360,20 @@ pub unsafe fn vld1_u32_x2(a: *const u32) -> uint32x2x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_u32_x2(a: *const u32) -> uint32x2x2_t {
-    let mut ret_val: uint32x2x2_t = transmute(vld1_s32_x2(transmute(a)));
-    ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [1, 0]) };
-    ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [1, 0]) };
-    ret_val
+pub unsafe fn vld1q_u64_x3(a: *const u64) -> uint64x2x3_t {
+    crate::ptr::read_unaligned(a.cast())
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u32_x3)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u64_x4)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -18274,21 +17383,20 @@ pub unsafe fn vld1_u32_x2(a: *const u32) -> uint32x2x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_u32_x3(a: *const u32) -> uint32x2x3_t {
-    transmute(vld1_s32_x3(transmute(a)))
+pub unsafe fn vld1q_u64_x4(a: *const u64) -> uint64x2x4_t {
+    crate::ptr::read_unaligned(a.cast())
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u32_x3)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p8_x2)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -18298,25 +17406,20 @@ pub unsafe fn vld1_u32_x3(a: *const u32) -> uint32x2x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_u32_x3(a: *const u32) -> uint32x2x3_t {
-    let mut ret_val: uint32x2x3_t = transmute(vld1_s32_x3(transmute(a)));
-    ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [1, 0]) };
-    ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [1, 0]) };
-    ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [1, 0]) };
-    ret_val
+pub unsafe fn vld1_p8_x2(a: *const p8) -> poly8x8x2_t {
+    crate::ptr::read_unaligned(a.cast())
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u32_x4)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p8_x3)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -18326,21 +17429,20 @@ pub unsafe fn vld1_u32_x3(a: *const u32) -> uint32x2x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_u32_x4(a: *const u32) -> uint32x2x4_t {
-    transmute(vld1_s32_x4(transmute(a)))
+pub unsafe fn vld1_p8_x3(a: *const p8) -> poly8x8x3_t {
+    crate::ptr::read_unaligned(a.cast())
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u32_x4)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p8_x4)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -18350,26 +17452,20 @@ pub unsafe fn vld1_u32_x4(a: *const u32) -> uint32x2x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_u32_x4(a: *const u32) -> uint32x2x4_t {
-    let mut ret_val: uint32x2x4_t = transmute(vld1_s32_x4(transmute(a)));
-    ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [1, 0]) };
-    ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [1, 0]) };
-    ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [1, 0]) };
-    ret_val.3 = unsafe { simd_shuffle!(ret_val.3, ret_val.3, [1, 0]) };
-    ret_val
+pub unsafe fn vld1_p8_x4(a: *const p8) -> poly8x8x4_t {
+    crate::ptr::read_unaligned(a.cast())
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u32_x2)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p8_x2)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -18379,21 +17475,20 @@ pub unsafe fn vld1_u32_x4(a: *const u32) -> uint32x2x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_u32_x2(a: *const u32) -> uint32x4x2_t {
-    transmute(vld1q_s32_x2(transmute(a)))
+pub unsafe fn vld1q_p8_x2(a: *const p8) -> poly8x16x2_t {
+    crate::ptr::read_unaligned(a.cast())
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u32_x2)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p8_x3)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -18403,24 +17498,20 @@ pub unsafe fn vld1q_u32_x2(a: *const u32) -> uint32x4x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_u32_x2(a: *const u32) -> uint32x4x2_t {
-    let mut ret_val: uint32x4x2_t = transmute(vld1q_s32_x2(transmute(a)));
-    ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]) };
-    ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]) };
-    ret_val
+pub unsafe fn vld1q_p8_x3(a: *const p8) -> poly8x16x3_t {
+    crate::ptr::read_unaligned(a.cast())
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u32_x3)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p8_x4)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -18430,21 +17521,20 @@ pub unsafe fn vld1q_u32_x2(a: *const u32) -> uint32x4x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_u32_x3(a: *const u32) -> uint32x4x3_t {
-    transmute(vld1q_s32_x3(transmute(a)))
+pub unsafe fn vld1q_p8_x4(a: *const p8) -> poly8x16x4_t {
+    crate::ptr::read_unaligned(a.cast())
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u32_x3)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p16_x2)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -18454,25 +17544,20 @@ pub unsafe fn vld1q_u32_x3(a: *const u32) -> uint32x4x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_u32_x3(a: *const u32) -> uint32x4x3_t {
-    let mut ret_val: uint32x4x3_t = transmute(vld1q_s32_x3(transmute(a)));
-    ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]) };
-    ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]) };
-    ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [3, 2, 1, 0]) };
-    ret_val
+pub unsafe fn vld1_p16_x2(a: *const p16) -> poly16x4x2_t {
+    crate::ptr::read_unaligned(a.cast())
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u32_x4)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p16_x3)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -18482,21 +17567,20 @@ pub unsafe fn vld1q_u32_x3(a: *const u32) -> uint32x4x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_u32_x4(a: *const u32) -> uint32x4x4_t {
-    transmute(vld1q_s32_x4(transmute(a)))
+pub unsafe fn vld1_p16_x3(a: *const p16) -> poly16x4x3_t {
+    crate::ptr::read_unaligned(a.cast())
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u32_x4)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p16_x4)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -18506,25 +17590,20 @@ pub unsafe fn vld1q_u32_x4(a: *const u32) -> uint32x4x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_u32_x4(a: *const u32) -> uint32x4x4_t {
-    let mut ret_val: uint32x4x4_t = transmute(vld1q_s32_x4(transmute(a)));
-    ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]) };
-    ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]) };
-    ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [3, 2, 1, 0]) };
-    ret_val.3 = unsafe { simd_shuffle!(ret_val.3, ret_val.3, [3, 2, 1, 0]) };
-    ret_val
+pub unsafe fn vld1_p16_x4(a: *const p16) -> poly16x4x4_t {
+    crate::ptr::read_unaligned(a.cast())
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u64_x2)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p16_x2)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -18534,20 +17613,20 @@ pub unsafe fn vld1q_u32_x4(a: *const u32) -> uint32x4x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_u64_x2(a: *const u64) -> uint64x1x2_t {
-    transmute(vld1_s64_x2(transmute(a)))
+pub unsafe fn vld1q_p16_x2(a: *const p16) -> poly16x8x2_t {
+    crate::ptr::read_unaligned(a.cast())
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u64_x3)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p16_x3)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -18557,20 +17636,20 @@ pub unsafe fn vld1_u64_x2(a: *const u64) -> uint64x1x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_u64_x3(a: *const u64) -> uint64x1x3_t {
-    transmute(vld1_s64_x3(transmute(a)))
+pub unsafe fn vld1q_p16_x3(a: *const p16) -> poly16x8x3_t {
+    crate::ptr::read_unaligned(a.cast())
 }
 #[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u64_x4)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p16_x4)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(ld)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -18580,280 +17659,178 @@ pub unsafe fn vld1_u64_x3(a: *const u64) -> uint64x1x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_u64_x4(a: *const u64) -> uint64x1x4_t {
-    transmute(vld1_s64_x4(transmute(a)))
+pub unsafe fn vld1q_p16_x4(a: *const p16) -> poly16x8x4_t {
+    crate::ptr::read_unaligned(a.cast())
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u64_x2)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld1q_u64_x2(a: *const u64) -> uint64x2x2_t {
-    transmute(vld1q_s64_x2(transmute(a)))
+#[rustc_legacy_const_generics(1)]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", ALIGN = 0))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+unsafe fn vld1_v1i64<const ALIGN: i32>(a: *const i8) -> int64x1_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v1i64")]
+        fn _vld1_v1i64(a: *const i8, b: i32) -> int64x1_t;
+    }
+    _vld1_v1i64(a, ALIGN)
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u64_x2)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld1q_u64_x2(a: *const u64) -> uint64x2x2_t {
-    let mut ret_val: uint64x2x2_t = transmute(vld1q_s64_x2(transmute(a)));
-    ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [1, 0]) };
-    ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [1, 0]) };
-    ret_val
+#[rustc_legacy_const_generics(1)]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", ALIGN = 0))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+unsafe fn vld1_v2f32<const ALIGN: i32>(a: *const i8) -> float32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v2f32")]
+        fn _vld1_v2f32(a: *const i8, b: i32) -> float32x2_t;
+    }
+    _vld1_v2f32(a, ALIGN)
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u64_x3)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld1q_u64_x3(a: *const u64) -> uint64x2x3_t {
-    transmute(vld1q_s64_x3(transmute(a)))
+#[rustc_legacy_const_generics(1)]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", ALIGN = 0))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+unsafe fn vld1_v2i32<const ALIGN: i32>(a: *const i8) -> int32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v2i32")]
+        fn _vld1_v2i32(a: *const i8, b: i32) -> int32x2_t;
+    }
+    _vld1_v2i32(a, ALIGN)
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u64_x3)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld1q_u64_x3(a: *const u64) -> uint64x2x3_t {
-    let mut ret_val: uint64x2x3_t = transmute(vld1q_s64_x3(transmute(a)));
-    ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [1, 0]) };
-    ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [1, 0]) };
-    ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [1, 0]) };
-    ret_val
+#[rustc_legacy_const_generics(1)]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", ALIGN = 0))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+unsafe fn vld1_v4i16<const ALIGN: i32>(a: *const i8) -> int16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v4i16")]
+        fn _vld1_v4i16(a: *const i8, b: i32) -> int16x4_t;
+    }
+    _vld1_v4i16(a, ALIGN)
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u64_x4)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld1q_u64_x4(a: *const u64) -> uint64x2x4_t {
-    transmute(vld1q_s64_x4(transmute(a)))
+#[rustc_legacy_const_generics(1)]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", ALIGN = 0))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+unsafe fn vld1_v8i8<const ALIGN: i32>(a: *const i8) -> int8x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v8i8")]
+        fn _vld1_v8i8(a: *const i8, b: i32) -> int8x8_t;
+    }
+    _vld1_v8i8(a, ALIGN)
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u64_x4)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld1q_u64_x4(a: *const u64) -> uint64x2x4_t {
-    let mut ret_val: uint64x2x4_t = transmute(vld1q_s64_x4(transmute(a)));
-    ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [1, 0]) };
-    ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [1, 0]) };
-    ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [1, 0]) };
-    ret_val.3 = unsafe { simd_shuffle!(ret_val.3, ret_val.3, [1, 0]) };
-    ret_val
+#[rustc_legacy_const_generics(1)]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", ALIGN = 0))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+unsafe fn vld1q_v16i8<const ALIGN: i32>(a: *const i8) -> int8x16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v16i8")]
+        fn _vld1q_v16i8(a: *const i8, b: i32) -> int8x16_t;
+    }
+    _vld1q_v16i8(a, ALIGN)
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p8_x2)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld1_p8_x2(a: *const p8) -> poly8x8x2_t {
-    transmute(vld1_s8_x2(transmute(a)))
+#[rustc_legacy_const_generics(1)]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", ALIGN = 0))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+unsafe fn vld1q_v2i64<const ALIGN: i32>(a: *const i8) -> int64x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v2i64")]
+        fn _vld1q_v2i64(a: *const i8, b: i32) -> int64x2_t;
+    }
+    _vld1q_v2i64(a, ALIGN)
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p8_x2)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld1_p8_x2(a: *const p8) -> poly8x8x2_t {
-    let mut ret_val: poly8x8x2_t = transmute(vld1_s8_x2(transmute(a)));
-    ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val
+#[rustc_legacy_const_generics(1)]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", ALIGN = 0))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+unsafe fn vld1q_v4f32<const ALIGN: i32>(a: *const i8) -> float32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v4f32")]
+        fn _vld1q_v4f32(a: *const i8, b: i32) -> float32x4_t;
+    }
+    _vld1q_v4f32(a, ALIGN)
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p8_x3)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld1_p8_x3(a: *const p8) -> poly8x8x3_t {
-    transmute(vld1_s8_x3(transmute(a)))
+#[rustc_legacy_const_generics(1)]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", ALIGN = 0))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+unsafe fn vld1q_v4i32<const ALIGN: i32>(a: *const i8) -> int32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v4i32")]
+        fn _vld1q_v4i32(a: *const i8, b: i32) -> int32x4_t;
+    }
+    _vld1q_v4i32(a, ALIGN)
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p8_x3)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld1_p8_x3(a: *const p8) -> poly8x8x3_t {
-    let mut ret_val: poly8x8x3_t = transmute(vld1_s8_x3(transmute(a)));
-    ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val
+#[rustc_legacy_const_generics(1)]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", ALIGN = 0))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+unsafe fn vld1q_v8i16<const ALIGN: i32>(a: *const i8) -> int16x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v8i16")]
+        fn _vld1q_v8i16(a: *const i8, b: i32) -> int16x8_t;
+    }
+    _vld1q_v8i16(a, ALIGN)
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p8_x4)"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[target_feature(enable = "neon,fp16")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg(not(target_arch = "arm64ec"))]
+unsafe fn vld1_v4f16(a: *const i8, b: i32) -> float16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v4f16")]
+        fn _vld1_v4f16(a: *const i8, b: i32) -> float16x4_t;
+    }
+    _vld1_v4f16(a, b)
+}
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[target_feature(enable = "neon,fp16")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg(not(target_arch = "arm64ec"))]
+unsafe fn vld1q_v8f16(a: *const i8, b: i32) -> float16x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v8f16")]
+        fn _vld1q_v8f16(a: *const i8, b: i32) -> float16x8_t;
+    }
+    _vld1q_v8f16(a, b)
+}
+#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_p64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
+#[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(ld1r)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -18863,260 +17840,387 @@ pub unsafe fn vld1_p8_x3(a: *const p8) -> poly8x8x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_p8_x4(a: *const p8) -> poly8x8x4_t {
-    transmute(vld1_s8_x4(transmute(a)))
+pub unsafe fn vld1q_dup_p64(ptr: *const p64) -> poly64x2_t {
+    let x = vld1q_lane_p64::<0>(ptr, transmute(u64x2::splat(0)));
+    simd_shuffle!(x, x, [0, 0])
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p8_x4)"]
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_f16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld1_p8_x4(a: *const p8) -> poly8x8x4_t {
-    let mut ret_val: poly8x8x4_t = transmute(vld1_s8_x4(transmute(a)));
-    ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val.3 = unsafe { simd_shuffle!(ret_val.3, ret_val.3, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+pub unsafe fn vld2_dup_f16(a: *const f16) -> float16x4x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v4f16.p0")]
+        fn _vld2_dup_f16(ptr: *const f16, size: i32) -> float16x4x2_t;
+    }
+    _vld2_dup_f16(a as _, 2)
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p8_x2)"]
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_f16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld1q_p8_x2(a: *const p8) -> poly8x16x2_t {
-    transmute(vld1q_s8_x2(transmute(a)))
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+pub unsafe fn vld2q_dup_f16(a: *const f16) -> float16x8x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v8f16.p0")]
+        fn _vld2q_dup_f16(ptr: *const f16, size: i32) -> float16x8x2_t;
+    }
+    _vld2q_dup_f16(a as _, 2)
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p8_x2)"]
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_f16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg(not(target_arch = "arm"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+    assert_instr(ld2r)
 )]
-pub unsafe fn vld1q_p8_x2(a: *const p8) -> poly8x16x2_t {
-    let mut ret_val: poly8x16x2_t = transmute(vld1q_s8_x2(transmute(a)));
-    ret_val.0 = unsafe {
-        simd_shuffle!(
-            ret_val.0,
-            ret_val.0,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    ret_val.1 = unsafe {
-        simd_shuffle!(
-            ret_val.1,
-            ret_val.1,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    ret_val
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+pub unsafe fn vld2_dup_f16(a: *const f16) -> float16x4x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2r.v4f16.p0"
+        )]
+        fn _vld2_dup_f16(ptr: *const f16) -> float16x4x2_t;
+    }
+    _vld2_dup_f16(a as _)
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p8_x3)"]
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_f16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg(not(target_arch = "arm"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+    assert_instr(ld2r)
 )]
-pub unsafe fn vld1q_p8_x3(a: *const p8) -> poly8x16x3_t {
-    transmute(vld1q_s8_x3(transmute(a)))
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+pub unsafe fn vld2q_dup_f16(a: *const f16) -> float16x8x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2r.v8f16.p0"
+        )]
+        fn _vld2q_dup_f16(ptr: *const f16) -> float16x8x2_t;
+    }
+    _vld2q_dup_f16(a as _)
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p8_x3)"]
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_f32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld1q_p8_x3(a: *const p8) -> poly8x16x3_t {
-    let mut ret_val: poly8x16x3_t = transmute(vld1q_s8_x3(transmute(a)));
-    ret_val.0 = unsafe {
-        simd_shuffle!(
-            ret_val.0,
-            ret_val.0,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    ret_val.1 = unsafe {
-        simd_shuffle!(
-            ret_val.1,
-            ret_val.1,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    ret_val.2 = unsafe {
-        simd_shuffle!(
-            ret_val.2,
-            ret_val.2,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    ret_val
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld2))]
+pub unsafe fn vld2_dup_f32(a: *const f32) -> float32x2x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v2f32.p0")]
+        fn _vld2_dup_f32(ptr: *const i8, size: i32) -> float32x2x2_t;
+    }
+    _vld2_dup_f32(a as *const i8, 4)
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p8_x4)"]
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_f32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld1q_p8_x4(a: *const p8) -> poly8x16x4_t {
-    transmute(vld1q_s8_x4(transmute(a)))
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld2))]
+pub unsafe fn vld2q_dup_f32(a: *const f32) -> float32x4x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v4f32.p0")]
+        fn _vld2q_dup_f32(ptr: *const i8, size: i32) -> float32x4x2_t;
+    }
+    _vld2q_dup_f32(a as *const i8, 4)
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p8_x4)"]
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld1q_p8_x4(a: *const p8) -> poly8x16x4_t {
-    let mut ret_val: poly8x16x4_t = transmute(vld1q_s8_x4(transmute(a)));
-    ret_val.0 = unsafe {
-        simd_shuffle!(
-            ret_val.0,
-            ret_val.0,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    ret_val.1 = unsafe {
-        simd_shuffle!(
-            ret_val.1,
-            ret_val.1,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    ret_val.2 = unsafe {
-        simd_shuffle!(
-            ret_val.2,
-            ret_val.2,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    ret_val.3 = unsafe {
-        simd_shuffle!(
-            ret_val.3,
-            ret_val.3,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    ret_val
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld2))]
+pub unsafe fn vld2_dup_s8(a: *const i8) -> int8x8x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v8i8.p0")]
+        fn _vld2_dup_s8(ptr: *const i8, size: i32) -> int8x8x2_t;
+    }
+    _vld2_dup_s8(a as *const i8, 1)
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p16_x2)"]
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_s8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld2))]
+pub unsafe fn vld2q_dup_s8(a: *const i8) -> int8x16x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v16i8.p0")]
+        fn _vld2q_dup_s8(ptr: *const i8, size: i32) -> int8x16x2_t;
+    }
+    _vld2q_dup_s8(a as *const i8, 1)
+}
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld2))]
+pub unsafe fn vld2_dup_s16(a: *const i16) -> int16x4x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v4i16.p0")]
+        fn _vld2_dup_s16(ptr: *const i8, size: i32) -> int16x4x2_t;
+    }
+    _vld2_dup_s16(a as *const i8, 2)
+}
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld2))]
+pub unsafe fn vld2q_dup_s16(a: *const i16) -> int16x8x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v8i16.p0")]
+        fn _vld2q_dup_s16(ptr: *const i8, size: i32) -> int16x8x2_t;
+    }
+    _vld2q_dup_s16(a as *const i8, 2)
+}
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld2))]
+pub unsafe fn vld2_dup_s32(a: *const i32) -> int32x2x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v2i32.p0")]
+        fn _vld2_dup_s32(ptr: *const i8, size: i32) -> int32x2x2_t;
+    }
+    _vld2_dup_s32(a as *const i8, 4)
+}
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld2))]
+pub unsafe fn vld2q_dup_s32(a: *const i32) -> int32x4x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v4i32.p0")]
+        fn _vld2q_dup_s32(ptr: *const i8, size: i32) -> int32x4x2_t;
+    }
+    _vld2q_dup_s32(a as *const i8, 4)
+}
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld2r))]
+pub unsafe fn vld2_dup_f32(a: *const f32) -> float32x2x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2r.v2f32.p0"
+        )]
+        fn _vld2_dup_f32(ptr: *const f32) -> float32x2x2_t;
+    }
+    _vld2_dup_f32(a as _)
+}
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld2r))]
+pub unsafe fn vld2q_dup_f32(a: *const f32) -> float32x4x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2r.v4f32.p0"
+        )]
+        fn _vld2q_dup_f32(ptr: *const f32) -> float32x4x2_t;
+    }
+    _vld2q_dup_f32(a as _)
+}
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld2r))]
+pub unsafe fn vld2_dup_s8(a: *const i8) -> int8x8x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2r.v8i8.p0"
+        )]
+        fn _vld2_dup_s8(ptr: *const i8) -> int8x8x2_t;
+    }
+    _vld2_dup_s8(a as _)
+}
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld2r))]
+pub unsafe fn vld2q_dup_s8(a: *const i8) -> int8x16x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2r.v16i8.p0"
+        )]
+        fn _vld2q_dup_s8(ptr: *const i8) -> int8x16x2_t;
+    }
+    _vld2q_dup_s8(a as _)
+}
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld2r))]
+pub unsafe fn vld2_dup_s16(a: *const i16) -> int16x4x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2r.v4i16.p0"
+        )]
+        fn _vld2_dup_s16(ptr: *const i16) -> int16x4x2_t;
+    }
+    _vld2_dup_s16(a as _)
+}
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld2r))]
+pub unsafe fn vld2q_dup_s16(a: *const i16) -> int16x8x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2r.v8i16.p0"
+        )]
+        fn _vld2q_dup_s16(ptr: *const i16) -> int16x8x2_t;
+    }
+    _vld2q_dup_s16(a as _)
+}
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld2r))]
+pub unsafe fn vld2_dup_s32(a: *const i32) -> int32x2x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2r.v2i32.p0"
+        )]
+        fn _vld2_dup_s32(ptr: *const i32) -> int32x2x2_t;
+    }
+    _vld2_dup_s32(a as _)
+}
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld2r))]
+pub unsafe fn vld2q_dup_s32(a: *const i32) -> int32x4x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2r.v4i32.p0"
+        )]
+        fn _vld2q_dup_s32(ptr: *const i32) -> int32x4x2_t;
+    }
+    _vld2q_dup_s32(a as _)
+}
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_p64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(ld2r)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -19126,21 +18230,55 @@ pub unsafe fn vld1q_p8_x4(a: *const p8) -> poly8x16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_p16_x2(a: *const p16) -> poly16x4x2_t {
-    transmute(vld1_s16_x2(transmute(a)))
+pub unsafe fn vld2_dup_p64(a: *const p64) -> poly64x1x2_t {
+    transmute(vld2_dup_s64(transmute(a)))
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p16_x2)"]
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vld2_dup_s64(a: *const i64) -> int64x1x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v1i64.p0")]
+        fn _vld2_dup_s64(ptr: *const i8, size: i32) -> int64x1x2_t;
+    }
+    _vld2_dup_s64(a as *const i8, 8)
+}
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld2r))]
+pub unsafe fn vld2_dup_s64(a: *const i64) -> int64x1x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2r.v1i64.p0"
+        )]
+        fn _vld2_dup_s64(ptr: *const i64) -> int64x1x2_t;
+    }
+    _vld2_dup_s64(a as _)
+}
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(ld2r)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -19150,24 +18288,20 @@ pub unsafe fn vld1_p16_x2(a: *const p16) -> poly16x4x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_p16_x2(a: *const p16) -> poly16x4x2_t {
-    let mut ret_val: poly16x4x2_t = transmute(vld1_s16_x2(transmute(a)));
-    ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]) };
-    ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]) };
-    ret_val
+pub unsafe fn vld2_dup_u64(a: *const u64) -> uint64x1x2_t {
+    transmute(vld2_dup_s64(transmute(a)))
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p16_x3)"]
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_u8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(ld2r)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -19177,21 +18311,20 @@ pub unsafe fn vld1_p16_x2(a: *const p16) -> poly16x4x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_p16_x3(a: *const p16) -> poly16x4x3_t {
-    transmute(vld1_s16_x3(transmute(a)))
+pub unsafe fn vld2_dup_u8(a: *const u8) -> uint8x8x2_t {
+    transmute(vld2_dup_s8(transmute(a)))
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p16_x3)"]
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_u8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(ld2r)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -19201,25 +18334,20 @@ pub unsafe fn vld1_p16_x3(a: *const p16) -> poly16x4x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_p16_x3(a: *const p16) -> poly16x4x3_t {
-    let mut ret_val: poly16x4x3_t = transmute(vld1_s16_x3(transmute(a)));
-    ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]) };
-    ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]) };
-    ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [3, 2, 1, 0]) };
-    ret_val
+pub unsafe fn vld2q_dup_u8(a: *const u8) -> uint8x16x2_t {
+    transmute(vld2q_dup_s8(transmute(a)))
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p16_x4)"]
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_u16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(ld2r)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -19229,21 +18357,20 @@ pub unsafe fn vld1_p16_x3(a: *const p16) -> poly16x4x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_p16_x4(a: *const p16) -> poly16x4x4_t {
-    transmute(vld1_s16_x4(transmute(a)))
+pub unsafe fn vld2_dup_u16(a: *const u16) -> uint16x4x2_t {
+    transmute(vld2_dup_s16(transmute(a)))
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p16_x4)"]
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_u16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(ld2r)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -19253,26 +18380,20 @@ pub unsafe fn vld1_p16_x4(a: *const p16) -> poly16x4x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1_p16_x4(a: *const p16) -> poly16x4x4_t {
-    let mut ret_val: poly16x4x4_t = transmute(vld1_s16_x4(transmute(a)));
-    ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]) };
-    ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]) };
-    ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [3, 2, 1, 0]) };
-    ret_val.3 = unsafe { simd_shuffle!(ret_val.3, ret_val.3, [3, 2, 1, 0]) };
-    ret_val
+pub unsafe fn vld2q_dup_u16(a: *const u16) -> uint16x8x2_t {
+    transmute(vld2q_dup_s16(transmute(a)))
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p16_x2)"]
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_u32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(ld2r)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -19282,21 +18403,20 @@ pub unsafe fn vld1_p16_x4(a: *const p16) -> poly16x4x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_p16_x2(a: *const p16) -> poly16x8x2_t {
-    transmute(vld1q_s16_x2(transmute(a)))
+pub unsafe fn vld2_dup_u32(a: *const u32) -> uint32x2x2_t {
+    transmute(vld2_dup_s32(transmute(a)))
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p16_x2)"]
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_u32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(ld2r)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -19306,24 +18426,20 @@ pub unsafe fn vld1q_p16_x2(a: *const p16) -> poly16x8x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_p16_x2(a: *const p16) -> poly16x8x2_t {
-    let mut ret_val: poly16x8x2_t = transmute(vld1q_s16_x2(transmute(a)));
-    ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val
+pub unsafe fn vld2q_dup_u32(a: *const u32) -> uint32x4x2_t {
+    transmute(vld2q_dup_s32(transmute(a)))
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p16_x3)"]
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_p8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(ld2r)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -19333,21 +18449,20 @@ pub unsafe fn vld1q_p16_x2(a: *const p16) -> poly16x8x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_p16_x3(a: *const p16) -> poly16x8x3_t {
-    transmute(vld1q_s16_x3(transmute(a)))
+pub unsafe fn vld2_dup_p8(a: *const p8) -> poly8x8x2_t {
+    transmute(vld2_dup_s8(transmute(a)))
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p16_x3)"]
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_p8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(ld2r)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -19357,25 +18472,20 @@ pub unsafe fn vld1q_p16_x3(a: *const p16) -> poly16x8x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_p16_x3(a: *const p16) -> poly16x8x3_t {
-    let mut ret_val: poly16x8x3_t = transmute(vld1q_s16_x3(transmute(a)));
-    ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val
+pub unsafe fn vld2q_dup_p8(a: *const p8) -> poly8x16x2_t {
+    transmute(vld2q_dup_s8(transmute(a)))
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p16_x4)"]
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_p16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(ld2r)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -19385,21 +18495,20 @@ pub unsafe fn vld1q_p16_x3(a: *const p16) -> poly16x8x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_p16_x4(a: *const p16) -> poly16x8x4_t {
-    transmute(vld1q_s16_x4(transmute(a)))
+pub unsafe fn vld2_dup_p16(a: *const p16) -> poly16x4x2_t {
+    transmute(vld2_dup_s16(transmute(a)))
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p16_x4)"]
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_p16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1)
+    assert_instr(ld2r)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -19409,656 +18518,809 @@ pub unsafe fn vld1q_p16_x4(a: *const p16) -> poly16x8x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld1q_p16_x4(a: *const p16) -> poly16x8x4_t {
-    let mut ret_val: poly16x8x4_t = transmute(vld1q_s16_x4(transmute(a)));
-    ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val.3 = unsafe { simd_shuffle!(ret_val.3, ret_val.3, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val
+pub unsafe fn vld2q_dup_p16(a: *const p16) -> poly16x8x2_t {
+    transmute(vld2q_dup_s16(transmute(a)))
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_v1i64)"]
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_f16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-unsafe fn vld1_v1i64(a: *const i8, b: i32) -> int64x1_t {
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+pub unsafe fn vld2_f16(a: *const f16) -> float16x4x2_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v1i64")]
-        fn _vld1_v1i64(a: *const i8, b: i32) -> int64x1_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v4f16.p0")]
+        fn _vld2_f16(ptr: *const f16, size: i32) -> float16x4x2_t;
     }
-    _vld1_v1i64(a, b)
+    _vld2_f16(a as _, 2)
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_v2f32)"]
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_f16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-unsafe fn vld1_v2f32(a: *const i8, b: i32) -> float32x2_t {
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+pub unsafe fn vld2q_f16(a: *const f16) -> float16x8x2_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v2f32")]
-        fn _vld1_v2f32(a: *const i8, b: i32) -> float32x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v8f16.p0")]
+        fn _vld2q_f16(ptr: *const f16, size: i32) -> float16x8x2_t;
     }
-    _vld1_v2f32(a, b)
+    _vld2q_f16(a as _, 2)
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_v2i32)"]
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_f16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-unsafe fn vld1_v2i32(a: *const i8, b: i32) -> int32x2_t {
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld2)
+)]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+pub unsafe fn vld2_f16(a: *const f16) -> float16x4x2_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v2i32")]
-        fn _vld1_v2i32(a: *const i8, b: i32) -> int32x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2.v4f16.p0"
+        )]
+        fn _vld2_f16(ptr: *const f16) -> float16x4x2_t;
     }
-    _vld1_v2i32(a, b)
+    _vld2_f16(a as _)
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_v4i16)"]
+#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_f16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-unsafe fn vld1_v4i16(a: *const i8, b: i32) -> int16x4_t {
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld2)
+)]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+pub unsafe fn vld2q_f16(a: *const f16) -> float16x8x2_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v4i16")]
-        fn _vld1_v4i16(a: *const i8, b: i32) -> int16x4_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2.v8f16.p0"
+        )]
+        fn _vld2q_f16(ptr: *const f16) -> float16x8x2_t;
     }
-    _vld1_v4i16(a, b)
+    _vld2q_f16(a as _)
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_v8i8)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_f32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-unsafe fn vld1_v8i8(a: *const i8, b: i32) -> int8x8_t {
+#[cfg_attr(test, assert_instr(vld2))]
+pub unsafe fn vld2_f32(a: *const f32) -> float32x2x2_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v8i8")]
-        fn _vld1_v8i8(a: *const i8, b: i32) -> int8x8_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v2f32")]
+        fn _vld2_f32(ptr: *const i8, size: i32) -> float32x2x2_t;
     }
-    _vld1_v8i8(a, b)
+    _vld2_f32(a as *const i8, 4)
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_v16i8)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_f32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-unsafe fn vld1q_v16i8(a: *const i8, b: i32) -> int8x16_t {
+#[cfg_attr(test, assert_instr(vld2))]
+pub unsafe fn vld2q_f32(a: *const f32) -> float32x4x2_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v16i8")]
-        fn _vld1q_v16i8(a: *const i8, b: i32) -> int8x16_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v4f32")]
+        fn _vld2q_f32(ptr: *const i8, size: i32) -> float32x4x2_t;
     }
-    _vld1q_v16i8(a, b)
+    _vld2q_f32(a as *const i8, 4)
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_v2i64)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-unsafe fn vld1q_v2i64(a: *const i8, b: i32) -> int64x2_t {
+#[cfg_attr(test, assert_instr(vld2))]
+pub unsafe fn vld2_s8(a: *const i8) -> int8x8x2_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v2i64")]
-        fn _vld1q_v2i64(a: *const i8, b: i32) -> int64x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v8i8")]
+        fn _vld2_s8(ptr: *const i8, size: i32) -> int8x8x2_t;
     }
-    _vld1q_v2i64(a, b)
+    _vld2_s8(a as *const i8, 1)
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_v4f32)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_s8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-unsafe fn vld1q_v4f32(a: *const i8, b: i32) -> float32x4_t {
+#[cfg_attr(test, assert_instr(vld2))]
+pub unsafe fn vld2q_s8(a: *const i8) -> int8x16x2_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v4f32")]
-        fn _vld1q_v4f32(a: *const i8, b: i32) -> float32x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v16i8")]
+        fn _vld2q_s8(ptr: *const i8, size: i32) -> int8x16x2_t;
     }
-    _vld1q_v4f32(a, b)
+    _vld2q_s8(a as *const i8, 1)
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_v4i32)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-unsafe fn vld1q_v4i32(a: *const i8, b: i32) -> int32x4_t {
+#[cfg_attr(test, assert_instr(vld2))]
+pub unsafe fn vld2_s16(a: *const i16) -> int16x4x2_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v4i32")]
-        fn _vld1q_v4i32(a: *const i8, b: i32) -> int32x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v4i16")]
+        fn _vld2_s16(ptr: *const i8, size: i32) -> int16x4x2_t;
     }
-    _vld1q_v4i32(a, b)
+    _vld2_s16(a as *const i8, 2)
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_v8i16)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_s16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-unsafe fn vld1q_v8i16(a: *const i8, b: i32) -> int16x8_t {
+#[cfg_attr(test, assert_instr(vld2))]
+pub unsafe fn vld2q_s16(a: *const i16) -> int16x8x2_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v8i16")]
-        fn _vld1q_v8i16(a: *const i8, b: i32) -> int16x8_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v8i16")]
+        fn _vld2q_s16(ptr: *const i8, size: i32) -> int16x8x2_t;
     }
-    _vld1q_v8i16(a, b)
+    _vld2q_s16(a as *const i8, 2)
 }
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_v4f16)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-unsafe fn vld1_v4f16(a: *const i8, b: i32) -> float16x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v4f16")]
-        fn _vld1_v4f16(a: *const i8, b: i32) -> float16x4_t;
-    }
-    _vld1_v4f16(a, b)
-}
-#[doc = "Load multiple single-element structures to one, two, three, or four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_v8f16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
 #[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-unsafe fn vld1q_v8f16(a: *const i8, b: i32) -> float16x8_t {
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld2))]
+pub unsafe fn vld2_s32(a: *const i32) -> int32x2x2_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v8f16")]
-        fn _vld1q_v8f16(a: *const i8, b: i32) -> float16x8_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v2i32")]
+        fn _vld2_s32(ptr: *const i8, size: i32) -> int32x2x2_t;
     }
-    _vld1q_v8f16(a, b)
-}
-#[doc = "Load one single-element structure and Replicate to all lanes (of one register)."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_dup_p64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld1r)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld1q_dup_p64(ptr: *const p64) -> poly64x2_t {
-    let x = vld1q_lane_p64::<0>(ptr, transmute(u64x2::splat(0)));
-    simd_shuffle!(x, x, [0, 0])
+    _vld2_s32(a as *const i8, 4)
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_f16)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_s32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,fp16")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vld2_dup_f16(a: *const f16) -> float16x4x2_t {
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld2))]
+pub unsafe fn vld2q_s32(a: *const i32) -> int32x4x2_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v4f16.p0")]
-        fn _vld2_dup_f16(ptr: *const f16, size: i32) -> float16x4x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v4i32")]
+        fn _vld2q_s32(ptr: *const i8, size: i32) -> int32x4x2_t;
     }
-    _vld2_dup_f16(a as _, 2)
+    _vld2q_s32(a as *const i8, 4)
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_f16)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_f32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,fp16")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vld2q_dup_f16(a: *const f16) -> float16x8x2_t {
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld2))]
+pub unsafe fn vld2_f32(a: *const f32) -> float32x2x2_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v8f16.p0")]
-        fn _vld2q_dup_f16(ptr: *const f16, size: i32) -> float16x8x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2.v2f32.p0"
+        )]
+        fn _vld2_f32(ptr: *const float32x2_t) -> float32x2x2_t;
     }
-    _vld2q_dup_f16(a as _, 2)
+    _vld2_f32(a as _)
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_f16)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_f32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
+#[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2r)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vld2_dup_f16(a: *const f16) -> float16x4x2_t {
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld2))]
+pub unsafe fn vld2q_f32(a: *const f32) -> float32x4x2_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2r.v4f16.p0"
+            link_name = "llvm.aarch64.neon.ld2.v4f32.p0"
         )]
-        fn _vld2_dup_f16(ptr: *const f16) -> float16x4x2_t;
+        fn _vld2q_f32(ptr: *const float32x4_t) -> float32x4x2_t;
     }
-    _vld2_dup_f16(a as _)
+    _vld2q_f32(a as _)
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_f16)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
+#[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2r)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vld2q_dup_f16(a: *const f16) -> float16x8x2_t {
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld2))]
+pub unsafe fn vld2_s8(a: *const i8) -> int8x8x2_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2r.v8f16.p0"
+            link_name = "llvm.aarch64.neon.ld2.v8i8.p0"
         )]
-        fn _vld2q_dup_f16(ptr: *const f16) -> float16x8x2_t;
+        fn _vld2_s8(ptr: *const int8x8_t) -> int8x8x2_t;
     }
-    _vld2q_dup_f16(a as _)
+    _vld2_s8(a as _)
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_f32)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_s8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld2))]
-pub unsafe fn vld2_dup_f32(a: *const f32) -> float32x2x2_t {
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld2))]
+pub unsafe fn vld2q_s8(a: *const i8) -> int8x16x2_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v2f32.p0")]
-        fn _vld2_dup_f32(ptr: *const i8, size: i32) -> float32x2x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2.v16i8.p0"
+        )]
+        fn _vld2q_s8(ptr: *const int8x16_t) -> int8x16x2_t;
     }
-    _vld2_dup_f32(a as *const i8, 4)
+    _vld2q_s8(a as _)
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_f32)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld2))]
-pub unsafe fn vld2q_dup_f32(a: *const f32) -> float32x4x2_t {
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld2))]
+pub unsafe fn vld2_s16(a: *const i16) -> int16x4x2_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v4f32.p0")]
-        fn _vld2q_dup_f32(ptr: *const i8, size: i32) -> float32x4x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2.v4i16.p0"
+        )]
+        fn _vld2_s16(ptr: *const int16x4_t) -> int16x4x2_t;
     }
-    _vld2q_dup_f32(a as *const i8, 4)
+    _vld2_s16(a as _)
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s8)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_s16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld2))]
-pub unsafe fn vld2_dup_s8(a: *const i8) -> int8x8x2_t {
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld2))]
+pub unsafe fn vld2q_s16(a: *const i16) -> int16x8x2_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v8i8.p0")]
-        fn _vld2_dup_s8(ptr: *const i8, size: i32) -> int8x8x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2.v8i16.p0"
+        )]
+        fn _vld2q_s16(ptr: *const int16x8_t) -> int16x8x2_t;
     }
-    _vld2_dup_s8(a as *const i8, 1)
+    _vld2q_s16(a as _)
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_s8)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld2))]
-pub unsafe fn vld2q_dup_s8(a: *const i8) -> int8x16x2_t {
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld2))]
+pub unsafe fn vld2_s32(a: *const i32) -> int32x2x2_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v16i8.p0")]
-        fn _vld2q_dup_s8(ptr: *const i8, size: i32) -> int8x16x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2.v2i32.p0"
+        )]
+        fn _vld2_s32(ptr: *const int32x2_t) -> int32x2x2_t;
     }
-    _vld2q_dup_s8(a as *const i8, 1)
+    _vld2_s32(a as _)
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s16)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_s32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld2))]
-pub unsafe fn vld2_dup_s16(a: *const i16) -> int16x4x2_t {
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld2))]
+pub unsafe fn vld2q_s32(a: *const i32) -> int32x4x2_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v4i16.p0")]
-        fn _vld2_dup_s16(ptr: *const i8, size: i32) -> int16x4x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2.v4i32.p0"
+        )]
+        fn _vld2q_s32(ptr: *const int32x4_t) -> int32x4x2_t;
     }
-    _vld2_dup_s16(a as *const i8, 2)
+    _vld2q_s32(a as _)
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_s16)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_f16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld2))]
-pub unsafe fn vld2q_dup_s16(a: *const i16) -> int16x8x2_t {
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+pub unsafe fn vld2_lane_f16<const LANE: i32>(a: *const f16, b: float16x4x2_t) -> float16x4x2_t {
+    static_assert_uimm_bits!(LANE, 2);
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v8i16.p0")]
-        fn _vld2q_dup_s16(ptr: *const i8, size: i32) -> int16x8x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v4f16.p0")]
+        fn _vld2_lane_f16(
+            ptr: *const f16,
+            a: float16x4_t,
+            b: float16x4_t,
+            n: i32,
+            size: i32,
+        ) -> float16x4x2_t;
     }
-    _vld2q_dup_s16(a as *const i8, 2)
+    _vld2_lane_f16(a as _, b.0, b.1, LANE, 2)
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s32)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_f16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld2))]
-pub unsafe fn vld2_dup_s32(a: *const i32) -> int32x2x2_t {
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+pub unsafe fn vld2q_lane_f16<const LANE: i32>(a: *const f16, b: float16x8x2_t) -> float16x8x2_t {
+    static_assert_uimm_bits!(LANE, 3);
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v2i32.p0")]
-        fn _vld2_dup_s32(ptr: *const i8, size: i32) -> int32x2x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v8f16.p0")]
+        fn _vld2q_lane_f16(
+            ptr: *const f16,
+            a: float16x8_t,
+            b: float16x8_t,
+            n: i32,
+            size: i32,
+        ) -> float16x8x2_t;
     }
-    _vld2_dup_s32(a as *const i8, 4)
+    _vld2q_lane_f16(a as _, b.0, b.1, LANE, 2)
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_s32)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_f16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld2))]
-pub unsafe fn vld2q_dup_s32(a: *const i32) -> int32x4x2_t {
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld2, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+pub unsafe fn vld2_lane_f16<const LANE: i32>(a: *const f16, b: float16x4x2_t) -> float16x4x2_t {
+    static_assert_uimm_bits!(LANE, 2);
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v4i32.p0")]
-        fn _vld2q_dup_s32(ptr: *const i8, size: i32) -> int32x4x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2lane.v4f16.p0"
+        )]
+        fn _vld2_lane_f16(a: float16x4_t, b: float16x4_t, n: i64, ptr: *const f16)
+            -> float16x4x2_t;
     }
-    _vld2q_dup_s32(a as *const i8, 4)
+    _vld2_lane_f16(b.0, b.1, LANE as i64, a as _)
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_f32)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_f16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld2r))]
-pub unsafe fn vld2_dup_f32(a: *const f32) -> float32x2x2_t {
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld2, LANE = 0)
+)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+pub unsafe fn vld2q_lane_f16<const LANE: i32>(a: *const f16, b: float16x8x2_t) -> float16x8x2_t {
+    static_assert_uimm_bits!(LANE, 3);
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2r.v2f32.p0"
+            link_name = "llvm.aarch64.neon.ld2lane.v8f16.p0"
         )]
-        fn _vld2_dup_f32(ptr: *const f32) -> float32x2x2_t;
+        fn _vld2q_lane_f16(
+            a: float16x8_t,
+            b: float16x8_t,
+            n: i64,
+            ptr: *const f16,
+        ) -> float16x8x2_t;
     }
-    _vld2_dup_f32(a as _)
+    _vld2q_lane_f16(b.0, b.1, LANE as i64, a as _)
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_f32)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_f32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld2r))]
-pub unsafe fn vld2q_dup_f32(a: *const f32) -> float32x4x2_t {
+pub unsafe fn vld2_lane_f32<const LANE: i32>(a: *const f32, b: float32x2x2_t) -> float32x2x2_t {
+    static_assert_uimm_bits!(LANE, 2);
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2r.v4f32.p0"
+            link_name = "llvm.aarch64.neon.ld2lane.v2f32.p0"
         )]
-        fn _vld2q_dup_f32(ptr: *const f32) -> float32x4x2_t;
+        fn _vld2_lane_f32(a: float32x2_t, b: float32x2_t, n: i64, ptr: *const i8) -> float32x2x2_t;
     }
-    _vld2q_dup_f32(a as _)
+    _vld2_lane_f32(b.0, b.1, LANE as i64, a as _)
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s8)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_f32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld2r))]
-pub unsafe fn vld2_dup_s8(a: *const i8) -> int8x8x2_t {
+pub unsafe fn vld2q_lane_f32<const LANE: i32>(a: *const f32, b: float32x4x2_t) -> float32x4x2_t {
+    static_assert_uimm_bits!(LANE, 2);
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2r.v8i8.p0"
+            link_name = "llvm.aarch64.neon.ld2lane.v4f32.p0"
         )]
-        fn _vld2_dup_s8(ptr: *const i8) -> int8x8x2_t;
+        fn _vld2q_lane_f32(a: float32x4_t, b: float32x4_t, n: i64, ptr: *const i8)
+            -> float32x4x2_t;
     }
-    _vld2_dup_s8(a as _)
+    _vld2q_lane_f32(b.0, b.1, LANE as i64, a as _)
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_s8)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_s8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld2r))]
-pub unsafe fn vld2q_dup_s8(a: *const i8) -> int8x16x2_t {
+pub unsafe fn vld2_lane_s8<const LANE: i32>(a: *const i8, b: int8x8x2_t) -> int8x8x2_t {
+    static_assert_uimm_bits!(LANE, 3);
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2r.v16i8.p0"
+            link_name = "llvm.aarch64.neon.ld2lane.v8i8.p0"
         )]
-        fn _vld2q_dup_s8(ptr: *const i8) -> int8x16x2_t;
+        fn _vld2_lane_s8(a: int8x8_t, b: int8x8_t, n: i64, ptr: *const i8) -> int8x8x2_t;
     }
-    _vld2q_dup_s8(a as _)
+    _vld2_lane_s8(b.0, b.1, LANE as i64, a as _)
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s16)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_s16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld2r))]
-pub unsafe fn vld2_dup_s16(a: *const i16) -> int16x4x2_t {
+pub unsafe fn vld2_lane_s16<const LANE: i32>(a: *const i16, b: int16x4x2_t) -> int16x4x2_t {
+    static_assert_uimm_bits!(LANE, 2);
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2r.v4i16.p0"
+            link_name = "llvm.aarch64.neon.ld2lane.v4i16.p0"
         )]
-        fn _vld2_dup_s16(ptr: *const i16) -> int16x4x2_t;
+        fn _vld2_lane_s16(a: int16x4_t, b: int16x4_t, n: i64, ptr: *const i8) -> int16x4x2_t;
     }
-    _vld2_dup_s16(a as _)
+    _vld2_lane_s16(b.0, b.1, LANE as i64, a as _)
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_s16)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_s16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld2r))]
-pub unsafe fn vld2q_dup_s16(a: *const i16) -> int16x8x2_t {
+pub unsafe fn vld2q_lane_s16<const LANE: i32>(a: *const i16, b: int16x8x2_t) -> int16x8x2_t {
+    static_assert_uimm_bits!(LANE, 3);
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2r.v8i16.p0"
+            link_name = "llvm.aarch64.neon.ld2lane.v8i16.p0"
         )]
-        fn _vld2q_dup_s16(ptr: *const i16) -> int16x8x2_t;
+        fn _vld2q_lane_s16(a: int16x8_t, b: int16x8_t, n: i64, ptr: *const i8) -> int16x8x2_t;
     }
-    _vld2q_dup_s16(a as _)
+    _vld2q_lane_s16(b.0, b.1, LANE as i64, a as _)
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s32)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_s32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld2r))]
-pub unsafe fn vld2_dup_s32(a: *const i32) -> int32x2x2_t {
+#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld2_lane_s32<const LANE: i32>(a: *const i32, b: int32x2x2_t) -> int32x2x2_t {
+    static_assert_uimm_bits!(LANE, 1);
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2r.v2i32.p0"
+            link_name = "llvm.aarch64.neon.ld2lane.v2i32.p0"
         )]
-        fn _vld2_dup_s32(ptr: *const i32) -> int32x2x2_t;
+        fn _vld2_lane_s32(a: int32x2_t, b: int32x2_t, n: i64, ptr: *const i8) -> int32x2x2_t;
     }
-    _vld2_dup_s32(a as _)
+    _vld2_lane_s32(b.0, b.1, LANE as i64, a as _)
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_s32)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_s32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld2r))]
-pub unsafe fn vld2q_dup_s32(a: *const i32) -> int32x4x2_t {
+pub unsafe fn vld2q_lane_s32<const LANE: i32>(a: *const i32, b: int32x4x2_t) -> int32x4x2_t {
+    static_assert_uimm_bits!(LANE, 2);
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2r.v4i32.p0"
+            link_name = "llvm.aarch64.neon.ld2lane.v4i32.p0"
         )]
-        fn _vld2q_dup_s32(ptr: *const i32) -> int32x4x2_t;
+        fn _vld2q_lane_s32(a: int32x4_t, b: int32x4_t, n: i64, ptr: *const i8) -> int32x4x2_t;
     }
-    _vld2q_dup_s32(a as _)
+    _vld2q_lane_s32(b.0, b.1, LANE as i64, a as _)
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_p64)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_f32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2r)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld2_dup_p64(a: *const p64) -> poly64x1x2_t {
-    transmute(vld2_dup_s64(transmute(a)))
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(test, assert_instr(vld2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld2_lane_f32<const LANE: i32>(a: *const f32, b: float32x2x2_t) -> float32x2x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v2f32.p0")]
+        fn _vld2_lane_f32(
+            ptr: *const i8,
+            a: float32x2_t,
+            b: float32x2_t,
+            n: i32,
+            size: i32,
+        ) -> float32x2x2_t;
+    }
+    _vld2_lane_f32(a as _, b.0, b.1, LANE, 4)
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s64)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_f32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
+#[cfg_attr(test, assert_instr(vld2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vld2_dup_s64(a: *const i64) -> int64x1x2_t {
+pub unsafe fn vld2q_lane_f32<const LANE: i32>(a: *const f32, b: float32x4x2_t) -> float32x4x2_t {
+    static_assert_uimm_bits!(LANE, 2);
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v1i64.p0")]
-        fn _vld2_dup_s64(ptr: *const i8, size: i32) -> int64x1x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v4f32.p0")]
+        fn _vld2q_lane_f32(
+            ptr: *const i8,
+            a: float32x4_t,
+            b: float32x4_t,
+            n: i32,
+            size: i32,
+        ) -> float32x4x2_t;
     }
-    _vld2_dup_s64(a as *const i8, 8)
+    _vld2q_lane_f32(a as _, b.0, b.1, LANE, 4)
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_s64)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_s16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld2r))]
-pub unsafe fn vld2_dup_s64(a: *const i64) -> int64x1x2_t {
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(test, assert_instr(vld2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld2q_lane_s16<const LANE: i32>(a: *const i16, b: int16x8x2_t) -> int16x8x2_t {
+    static_assert_uimm_bits!(LANE, 3);
     unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2r.v1i64.p0"
-        )]
-        fn _vld2_dup_s64(ptr: *const i64) -> int64x1x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v8i16.p0")]
+        fn _vld2q_lane_s16(
+            ptr: *const i8,
+            a: int16x8_t,
+            b: int16x8_t,
+            n: i32,
+            size: i32,
+        ) -> int16x8x2_t;
     }
-    _vld2_dup_s64(a as _)
+    _vld2q_lane_s16(a as _, b.0, b.1, LANE, 2)
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_u64)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(test, assert_instr(vld2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld2q_lane_s32<const LANE: i32>(a: *const i32, b: int32x4x2_t) -> int32x4x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v4i32.p0")]
+        fn _vld2q_lane_s32(
+            ptr: *const i8,
+            a: int32x4_t,
+            b: int32x4_t,
+            n: i32,
+            size: i32,
+        ) -> int32x4x2_t;
+    }
+    _vld2q_lane_s32(a as _, b.0, b.1, LANE, 4)
+}
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(test, assert_instr(vld2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld2_lane_s8<const LANE: i32>(a: *const i8, b: int8x8x2_t) -> int8x8x2_t {
+    static_assert_uimm_bits!(LANE, 3);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v8i8.p0")]
+        fn _vld2_lane_s8(ptr: *const i8, a: int8x8_t, b: int8x8_t, n: i32, size: i32)
+            -> int8x8x2_t;
+    }
+    _vld2_lane_s8(a as _, b.0, b.1, LANE, 1)
+}
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(test, assert_instr(vld2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld2_lane_s16<const LANE: i32>(a: *const i16, b: int16x4x2_t) -> int16x4x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v4i16.p0")]
+        fn _vld2_lane_s16(
+            ptr: *const i8,
+            a: int16x4_t,
+            b: int16x4_t,
+            n: i32,
+            size: i32,
+        ) -> int16x4x2_t;
+    }
+    _vld2_lane_s16(a as _, b.0, b.1, LANE, 2)
+}
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(test, assert_instr(vld2, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld2_lane_s32<const LANE: i32>(a: *const i32, b: int32x2x2_t) -> int32x2x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v2i32.p0")]
+        fn _vld2_lane_s32(
+            ptr: *const i8,
+            a: int32x2_t,
+            b: int32x2_t,
+            n: i32,
+            size: i32,
+        ) -> int32x2x2_t;
+    }
+    _vld2_lane_s32(a as _, b.0, b.1, LANE, 4)
+}
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_u8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2r)
+    assert_instr(ld2, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -20067,22 +19329,23 @@ pub unsafe fn vld2_dup_s64(a: *const i64) -> int64x1x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld2_dup_u64(a: *const u64) -> uint64x1x2_t {
-    transmute(vld2_dup_s64(transmute(a)))
+pub unsafe fn vld2_lane_u8<const LANE: i32>(a: *const u8, b: uint8x8x2_t) -> uint8x8x2_t {
+    static_assert_uimm_bits!(LANE, 3);
+    transmute(vld2_lane_s8::<LANE>(transmute(a), transmute(b)))
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_u8)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_u16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2r)
+    assert_instr(ld2, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -20091,22 +19354,23 @@ pub unsafe fn vld2_dup_u64(a: *const u64) -> uint64x1x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld2_dup_u8(a: *const u8) -> uint8x8x2_t {
-    transmute(vld2_dup_s8(transmute(a)))
+pub unsafe fn vld2_lane_u16<const LANE: i32>(a: *const u16, b: uint16x4x2_t) -> uint16x4x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    transmute(vld2_lane_s16::<LANE>(transmute(a), transmute(b)))
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_u8)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_u16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2r)
+    assert_instr(ld2, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -20115,25 +19379,23 @@ pub unsafe fn vld2_dup_u8(a: *const u8) -> uint8x8x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld2_dup_u8(a: *const u8) -> uint8x8x2_t {
-    let mut ret_val: uint8x8x2_t = transmute(vld2_dup_s8(transmute(a)));
-    ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val
+pub unsafe fn vld2q_lane_u16<const LANE: i32>(a: *const u16, b: uint16x8x2_t) -> uint16x8x2_t {
+    static_assert_uimm_bits!(LANE, 3);
+    transmute(vld2q_lane_s16::<LANE>(transmute(a), transmute(b)))
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_u8)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_u32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2r)
+    assert_instr(ld2, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -20142,22 +19404,23 @@ pub unsafe fn vld2_dup_u8(a: *const u8) -> uint8x8x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld2q_dup_u8(a: *const u8) -> uint8x16x2_t {
-    transmute(vld2q_dup_s8(transmute(a)))
+pub unsafe fn vld2_lane_u32<const LANE: i32>(a: *const u32, b: uint32x2x2_t) -> uint32x2x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    transmute(vld2_lane_s32::<LANE>(transmute(a), transmute(b)))
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_u8)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_u32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2r)
+    assert_instr(ld2, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -20166,37 +19429,23 @@ pub unsafe fn vld2q_dup_u8(a: *const u8) -> uint8x16x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld2q_dup_u8(a: *const u8) -> uint8x16x2_t {
-    let mut ret_val: uint8x16x2_t = transmute(vld2q_dup_s8(transmute(a)));
-    ret_val.0 = unsafe {
-        simd_shuffle!(
-            ret_val.0,
-            ret_val.0,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    ret_val.1 = unsafe {
-        simd_shuffle!(
-            ret_val.1,
-            ret_val.1,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    ret_val
+pub unsafe fn vld2q_lane_u32<const LANE: i32>(a: *const u32, b: uint32x4x2_t) -> uint32x4x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    transmute(vld2q_lane_s32::<LANE>(transmute(a), transmute(b)))
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_u16)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_p8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2r)
+    assert_instr(ld2, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -20205,22 +19454,23 @@ pub unsafe fn vld2q_dup_u8(a: *const u8) -> uint8x16x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld2_dup_u16(a: *const u16) -> uint16x4x2_t {
-    transmute(vld2_dup_s16(transmute(a)))
+pub unsafe fn vld2_lane_p8<const LANE: i32>(a: *const p8, b: poly8x8x2_t) -> poly8x8x2_t {
+    static_assert_uimm_bits!(LANE, 3);
+    transmute(vld2_lane_s8::<LANE>(transmute(a), transmute(b)))
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_u16)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_p16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2r)
+    assert_instr(ld2, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -20229,25 +19479,23 @@ pub unsafe fn vld2_dup_u16(a: *const u16) -> uint16x4x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld2_dup_u16(a: *const u16) -> uint16x4x2_t {
-    let mut ret_val: uint16x4x2_t = transmute(vld2_dup_s16(transmute(a)));
-    ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]) };
-    ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]) };
-    ret_val
+pub unsafe fn vld2_lane_p16<const LANE: i32>(a: *const p16, b: poly16x4x2_t) -> poly16x4x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    transmute(vld2_lane_s16::<LANE>(transmute(a), transmute(b)))
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_u16)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_p16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2r)
+    assert_instr(ld2, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -20256,21 +19504,21 @@ pub unsafe fn vld2_dup_u16(a: *const u16) -> uint16x4x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld2q_dup_u16(a: *const u16) -> uint16x8x2_t {
-    transmute(vld2q_dup_s16(transmute(a)))
+pub unsafe fn vld2q_lane_p16<const LANE: i32>(a: *const p16, b: poly16x8x2_t) -> poly16x8x2_t {
+    static_assert_uimm_bits!(LANE, 3);
+    transmute(vld2q_lane_s16::<LANE>(transmute(a), transmute(b)))
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_u16)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_p64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2r)
+    assert_instr(nop)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -20280,24 +19528,55 @@ pub unsafe fn vld2q_dup_u16(a: *const u16) -> uint16x8x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld2q_dup_u16(a: *const u16) -> uint16x8x2_t {
-    let mut ret_val: uint16x8x2_t = transmute(vld2q_dup_s16(transmute(a)));
-    ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val
+pub unsafe fn vld2_p64(a: *const p64) -> poly64x1x2_t {
+    transmute(vld2_s64(transmute(a)))
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_u32)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vld2_s64(a: *const i64) -> int64x1x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v1i64")]
+        fn _vld2_s64(ptr: *const i8, size: i32) -> int64x1x2_t;
+    }
+    _vld2_s64(a as *const i8, 8)
+}
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vld2_s64(a: *const i64) -> int64x1x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld2.v1i64.p0"
+        )]
+        fn _vld2_s64(ptr: *const int64x1_t) -> int64x1x2_t;
+    }
+    _vld2_s64(a as _)
+}
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2r)
+    assert_instr(nop)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -20307,21 +19586,20 @@ pub unsafe fn vld2q_dup_u16(a: *const u16) -> uint16x8x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld2_dup_u32(a: *const u32) -> uint32x2x2_t {
-    transmute(vld2_dup_s32(transmute(a)))
+pub unsafe fn vld2_u64(a: *const u64) -> uint64x1x2_t {
+    transmute(vld2_s64(transmute(a)))
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_u32)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_u8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2r)
+    assert_instr(ld2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -20331,24 +19609,20 @@ pub unsafe fn vld2_dup_u32(a: *const u32) -> uint32x2x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld2_dup_u32(a: *const u32) -> uint32x2x2_t {
-    let mut ret_val: uint32x2x2_t = transmute(vld2_dup_s32(transmute(a)));
-    ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [1, 0]) };
-    ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [1, 0]) };
-    ret_val
+pub unsafe fn vld2_u8(a: *const u8) -> uint8x8x2_t {
+    transmute(vld2_s8(transmute(a)))
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_u32)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_u8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2r)
+    assert_instr(ld2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -20358,21 +19632,20 @@ pub unsafe fn vld2_dup_u32(a: *const u32) -> uint32x2x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld2q_dup_u32(a: *const u32) -> uint32x4x2_t {
-    transmute(vld2q_dup_s32(transmute(a)))
+pub unsafe fn vld2q_u8(a: *const u8) -> uint8x16x2_t {
+    transmute(vld2q_s8(transmute(a)))
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_u32)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_u16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2r)
+    assert_instr(ld2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -20382,24 +19655,20 @@ pub unsafe fn vld2q_dup_u32(a: *const u32) -> uint32x4x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld2q_dup_u32(a: *const u32) -> uint32x4x2_t {
-    let mut ret_val: uint32x4x2_t = transmute(vld2q_dup_s32(transmute(a)));
-    ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]) };
-    ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]) };
-    ret_val
+pub unsafe fn vld2_u16(a: *const u16) -> uint16x4x2_t {
+    transmute(vld2_s16(transmute(a)))
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_p8)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_u16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2r)
+    assert_instr(ld2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -20409,21 +19678,20 @@ pub unsafe fn vld2q_dup_u32(a: *const u32) -> uint32x4x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld2_dup_p8(a: *const p8) -> poly8x8x2_t {
-    transmute(vld2_dup_s8(transmute(a)))
+pub unsafe fn vld2q_u16(a: *const u16) -> uint16x8x2_t {
+    transmute(vld2q_s16(transmute(a)))
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_p8)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_u32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2r)
+    assert_instr(ld2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -20433,24 +19701,20 @@ pub unsafe fn vld2_dup_p8(a: *const p8) -> poly8x8x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld2_dup_p8(a: *const p8) -> poly8x8x2_t {
-    let mut ret_val: poly8x8x2_t = transmute(vld2_dup_s8(transmute(a)));
-    ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val
+pub unsafe fn vld2_u32(a: *const u32) -> uint32x2x2_t {
+    transmute(vld2_s32(transmute(a)))
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_p8)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_u32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2r)
+    assert_instr(ld2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -20460,21 +19724,20 @@ pub unsafe fn vld2_dup_p8(a: *const p8) -> poly8x8x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld2q_dup_p8(a: *const p8) -> poly8x16x2_t {
-    transmute(vld2q_dup_s8(transmute(a)))
+pub unsafe fn vld2q_u32(a: *const u32) -> uint32x4x2_t {
+    transmute(vld2q_s32(transmute(a)))
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_p8)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_p8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2r)
+    assert_instr(ld2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -20484,36 +19747,20 @@ pub unsafe fn vld2q_dup_p8(a: *const p8) -> poly8x16x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld2q_dup_p8(a: *const p8) -> poly8x16x2_t {
-    let mut ret_val: poly8x16x2_t = transmute(vld2q_dup_s8(transmute(a)));
-    ret_val.0 = unsafe {
-        simd_shuffle!(
-            ret_val.0,
-            ret_val.0,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    ret_val.1 = unsafe {
-        simd_shuffle!(
-            ret_val.1,
-            ret_val.1,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    ret_val
+pub unsafe fn vld2_p8(a: *const p8) -> poly8x8x2_t {
+    transmute(vld2_s8(transmute(a)))
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_p16)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_p8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2r)
+    assert_instr(ld2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -20523,21 +19770,20 @@ pub unsafe fn vld2q_dup_p8(a: *const p8) -> poly8x16x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld2_dup_p16(a: *const p16) -> poly16x4x2_t {
-    transmute(vld2_dup_s16(transmute(a)))
+pub unsafe fn vld2q_p8(a: *const p8) -> poly8x16x2_t {
+    transmute(vld2q_s8(transmute(a)))
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_dup_p16)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_p16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2r)
+    assert_instr(ld2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -20547,24 +19793,20 @@ pub unsafe fn vld2_dup_p16(a: *const p16) -> poly16x4x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld2_dup_p16(a: *const p16) -> poly16x4x2_t {
-    let mut ret_val: poly16x4x2_t = transmute(vld2_dup_s16(transmute(a)));
-    ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]) };
-    ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]) };
-    ret_val
+pub unsafe fn vld2_p16(a: *const p16) -> poly16x4x2_t {
+    transmute(vld2_s16(transmute(a)))
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_p16)"]
+#[doc = "Load multiple 2-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_p16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2r)
+    assert_instr(ld2)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -20574,830 +19816,445 @@ pub unsafe fn vld2_dup_p16(a: *const p16) -> poly16x4x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld2q_dup_p16(a: *const p16) -> poly16x8x2_t {
-    transmute(vld2q_dup_s16(transmute(a)))
+pub unsafe fn vld2q_p16(a: *const p16) -> poly16x8x2_t {
+    transmute(vld2q_s16(transmute(a)))
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_dup_p16)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_f16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2r)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld2q_dup_p16(a: *const p16) -> poly16x8x2_t {
-    let mut ret_val: poly16x8x2_t = transmute(vld2q_dup_s16(transmute(a)));
-    ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val
-}
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_f16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg(target_arch = "arm")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
-#[target_feature(enable = "neon,fp16")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vld2_f16(a: *const f16) -> float16x4x2_t {
+pub unsafe fn vld3_dup_f16(a: *const f16) -> float16x4x3_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v4f16.p0")]
-        fn _vld2_f16(ptr: *const f16, size: i32) -> float16x4x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v4f16.p0")]
+        fn _vld3_dup_f16(ptr: *const f16, size: i32) -> float16x4x3_t;
     }
-    _vld2_f16(a as _, 2)
+    _vld3_dup_f16(a as _, 2)
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_f16)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_f16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
+#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg(target_arch = "arm")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
-#[target_feature(enable = "neon,fp16")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vld2q_f16(a: *const f16) -> float16x8x2_t {
+pub unsafe fn vld3q_dup_f16(a: *const f16) -> float16x8x3_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v8f16.p0")]
-        fn _vld2q_f16(ptr: *const f16, size: i32) -> float16x8x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v8f16.p0")]
+        fn _vld3q_dup_f16(ptr: *const f16, size: i32) -> float16x8x3_t;
     }
-    _vld2q_f16(a as _, 2)
+    _vld3q_dup_f16(a as _, 2)
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_f16)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_f16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
+#[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2)
+    assert_instr(ld3r)
 )]
-#[target_feature(enable = "neon,fp16")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vld2_f16(a: *const f16) -> float16x4x2_t {
+pub unsafe fn vld3_dup_f16(a: *const f16) -> float16x4x3_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2.v4f16.p0"
+            link_name = "llvm.aarch64.neon.ld3r.v4f16.p0"
         )]
-        fn _vld2_f16(ptr: *const f16) -> float16x4x2_t;
+        fn _vld3_dup_f16(ptr: *const f16) -> float16x4x3_t;
     }
-    _vld2_f16(a as _)
+    _vld3_dup_f16(a as _)
 }
-#[doc = "Load single 2-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_f16)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_f16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
+#[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2)
+    assert_instr(ld3r)
 )]
-#[target_feature(enable = "neon,fp16")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vld2q_f16(a: *const f16) -> float16x8x2_t {
+pub unsafe fn vld3q_dup_f16(a: *const f16) -> float16x8x3_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2.v8f16.p0"
+            link_name = "llvm.aarch64.neon.ld3r.v8f16.p0"
         )]
-        fn _vld2q_f16(ptr: *const f16) -> float16x8x2_t;
-    }
-    _vld2q_f16(a as _)
-}
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_f32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld2))]
-pub unsafe fn vld2_f32(a: *const f32) -> float32x2x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v2f32")]
-        fn _vld2_f32(ptr: *const i8, size: i32) -> float32x2x2_t;
-    }
-    _vld2_f32(a as *const i8, 4)
-}
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_f32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld2))]
-pub unsafe fn vld2q_f32(a: *const f32) -> float32x4x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v4f32")]
-        fn _vld2q_f32(ptr: *const i8, size: i32) -> float32x4x2_t;
-    }
-    _vld2q_f32(a as *const i8, 4)
-}
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld2))]
-pub unsafe fn vld2_s8(a: *const i8) -> int8x8x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v8i8")]
-        fn _vld2_s8(ptr: *const i8, size: i32) -> int8x8x2_t;
-    }
-    _vld2_s8(a as *const i8, 1)
-}
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_s8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld2))]
-pub unsafe fn vld2q_s8(a: *const i8) -> int8x16x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v16i8")]
-        fn _vld2q_s8(ptr: *const i8, size: i32) -> int8x16x2_t;
-    }
-    _vld2q_s8(a as *const i8, 1)
-}
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld2))]
-pub unsafe fn vld2_s16(a: *const i16) -> int16x4x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v4i16")]
-        fn _vld2_s16(ptr: *const i8, size: i32) -> int16x4x2_t;
-    }
-    _vld2_s16(a as *const i8, 2)
-}
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_s16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld2))]
-pub unsafe fn vld2q_s16(a: *const i16) -> int16x8x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v8i16")]
-        fn _vld2q_s16(ptr: *const i8, size: i32) -> int16x8x2_t;
-    }
-    _vld2q_s16(a as *const i8, 2)
-}
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld2))]
-pub unsafe fn vld2_s32(a: *const i32) -> int32x2x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v2i32")]
-        fn _vld2_s32(ptr: *const i8, size: i32) -> int32x2x2_t;
+        fn _vld3q_dup_f16(ptr: *const f16) -> float16x8x3_t;
     }
-    _vld2_s32(a as *const i8, 4)
+    _vld3q_dup_f16(a as _)
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_s32)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_f32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld2))]
-pub unsafe fn vld2q_s32(a: *const i32) -> int32x4x2_t {
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld3r))]
+pub unsafe fn vld3_dup_f32(a: *const f32) -> float32x2x3_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v4i32")]
-        fn _vld2q_s32(ptr: *const i8, size: i32) -> int32x4x2_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld3r.v2f32.p0"
+        )]
+        fn _vld3_dup_f32(ptr: *const f32) -> float32x2x3_t;
     }
-    _vld2q_s32(a as *const i8, 4)
+    _vld3_dup_f32(a as _)
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_f32)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_f32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld2))]
-pub unsafe fn vld2_f32(a: *const f32) -> float32x2x2_t {
+#[cfg_attr(test, assert_instr(ld3r))]
+pub unsafe fn vld3q_dup_f32(a: *const f32) -> float32x4x3_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2.v2f32.p0"
+            link_name = "llvm.aarch64.neon.ld3r.v4f32.p0"
         )]
-        fn _vld2_f32(ptr: *const float32x2_t) -> float32x2x2_t;
+        fn _vld3q_dup_f32(ptr: *const f32) -> float32x4x3_t;
     }
-    _vld2_f32(a as _)
+    _vld3q_dup_f32(a as _)
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_f32)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld2))]
-pub unsafe fn vld2q_f32(a: *const f32) -> float32x4x2_t {
+#[cfg_attr(test, assert_instr(ld3r))]
+pub unsafe fn vld3_dup_s8(a: *const i8) -> int8x8x3_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2.v4f32.p0"
+            link_name = "llvm.aarch64.neon.ld3r.v8i8.p0"
         )]
-        fn _vld2q_f32(ptr: *const float32x4_t) -> float32x4x2_t;
+        fn _vld3_dup_s8(ptr: *const i8) -> int8x8x3_t;
     }
-    _vld2q_f32(a as _)
+    _vld3_dup_s8(a as _)
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s8)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_s8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld2))]
-pub unsafe fn vld2_s8(a: *const i8) -> int8x8x2_t {
+#[cfg_attr(test, assert_instr(ld3r))]
+pub unsafe fn vld3q_dup_s8(a: *const i8) -> int8x16x3_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2.v8i8.p0"
+            link_name = "llvm.aarch64.neon.ld3r.v16i8.p0"
         )]
-        fn _vld2_s8(ptr: *const int8x8_t) -> int8x8x2_t;
+        fn _vld3q_dup_s8(ptr: *const i8) -> int8x16x3_t;
     }
-    _vld2_s8(a as _)
+    _vld3q_dup_s8(a as _)
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_s8)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld2))]
-pub unsafe fn vld2q_s8(a: *const i8) -> int8x16x2_t {
+#[cfg_attr(test, assert_instr(ld3r))]
+pub unsafe fn vld3_dup_s16(a: *const i16) -> int16x4x3_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2.v16i8.p0"
+            link_name = "llvm.aarch64.neon.ld3r.v4i16.p0"
         )]
-        fn _vld2q_s8(ptr: *const int8x16_t) -> int8x16x2_t;
+        fn _vld3_dup_s16(ptr: *const i16) -> int16x4x3_t;
     }
-    _vld2q_s8(a as _)
+    _vld3_dup_s16(a as _)
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s16)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_s16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld2))]
-pub unsafe fn vld2_s16(a: *const i16) -> int16x4x2_t {
+#[cfg_attr(test, assert_instr(ld3r))]
+pub unsafe fn vld3q_dup_s16(a: *const i16) -> int16x8x3_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2.v4i16.p0"
+            link_name = "llvm.aarch64.neon.ld3r.v8i16.p0"
         )]
-        fn _vld2_s16(ptr: *const int16x4_t) -> int16x4x2_t;
+        fn _vld3q_dup_s16(ptr: *const i16) -> int16x8x3_t;
     }
-    _vld2_s16(a as _)
+    _vld3q_dup_s16(a as _)
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_s16)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld2))]
-pub unsafe fn vld2q_s16(a: *const i16) -> int16x8x2_t {
+#[cfg_attr(test, assert_instr(ld3r))]
+pub unsafe fn vld3_dup_s32(a: *const i32) -> int32x2x3_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2.v8i16.p0"
+            link_name = "llvm.aarch64.neon.ld3r.v2i32.p0"
         )]
-        fn _vld2q_s16(ptr: *const int16x8_t) -> int16x8x2_t;
+        fn _vld3_dup_s32(ptr: *const i32) -> int32x2x3_t;
     }
-    _vld2q_s16(a as _)
+    _vld3_dup_s32(a as _)
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s32)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_s32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld2))]
-pub unsafe fn vld2_s32(a: *const i32) -> int32x2x2_t {
+#[cfg_attr(test, assert_instr(ld3r))]
+pub unsafe fn vld3q_dup_s32(a: *const i32) -> int32x4x3_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2.v2i32.p0"
+            link_name = "llvm.aarch64.neon.ld3r.v4i32.p0"
         )]
-        fn _vld2_s32(ptr: *const int32x2_t) -> int32x2x2_t;
+        fn _vld3q_dup_s32(ptr: *const i32) -> int32x4x3_t;
     }
-    _vld2_s32(a as _)
+    _vld3q_dup_s32(a as _)
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_s32)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld2))]
-pub unsafe fn vld2q_s32(a: *const i32) -> int32x4x2_t {
+#[cfg_attr(test, assert_instr(ld3r))]
+pub unsafe fn vld3_dup_s64(a: *const i64) -> int64x1x3_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2.v4i32.p0"
+            link_name = "llvm.aarch64.neon.ld3r.v1i64.p0"
         )]
-        fn _vld2q_s32(ptr: *const int32x4_t) -> int32x4x2_t;
+        fn _vld3_dup_s64(ptr: *const i64) -> int64x1x3_t;
     }
-    _vld2q_s32(a as _)
+    _vld3_dup_s64(a as _)
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_f16)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_f32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vld2_lane_f16<const LANE: i32>(a: *const f16, b: float16x4x2_t) -> float16x4x2_t {
-    static_assert_uimm_bits!(LANE, 2);
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld3))]
+pub unsafe fn vld3_dup_f32(a: *const f32) -> float32x2x3_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v4f16.p0")]
-        fn _vld2_lane_f16(
-            ptr: *const f16,
-            a: float16x4_t,
-            b: float16x4_t,
-            n: i32,
-            size: i32,
-        ) -> float16x4x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v2f32.p0")]
+        fn _vld3_dup_f32(ptr: *const i8, size: i32) -> float32x2x3_t;
     }
-    _vld2_lane_f16(a as _, b.0, b.1, LANE, 2)
+    _vld3_dup_f32(a as *const i8, 4)
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_f16)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_f32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vld2q_lane_f16<const LANE: i32>(a: *const f16, b: float16x8x2_t) -> float16x8x2_t {
-    static_assert_uimm_bits!(LANE, 3);
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld3))]
+pub unsafe fn vld3q_dup_f32(a: *const f32) -> float32x4x3_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v8f16.p0")]
-        fn _vld2q_lane_f16(
-            ptr: *const f16,
-            a: float16x8_t,
-            b: float16x8_t,
-            n: i32,
-            size: i32,
-        ) -> float16x8x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v4f32.p0")]
+        fn _vld3q_dup_f32(ptr: *const i8, size: i32) -> float32x4x3_t;
     }
-    _vld2q_lane_f16(a as _, b.0, b.1, LANE, 2)
+    _vld3q_dup_f32(a as *const i8, 4)
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_f16)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2, LANE = 0)
-)]
-#[rustc_legacy_const_generics(2)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vld2_lane_f16<const LANE: i32>(a: *const f16, b: float16x4x2_t) -> float16x4x2_t {
-    static_assert_uimm_bits!(LANE, 2);
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld3))]
+pub unsafe fn vld3_dup_s8(a: *const i8) -> int8x8x3_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2lane.v4f16.p0"
-        )]
-        fn _vld2_lane_f16(a: float16x4_t, b: float16x4_t, n: i64, ptr: *const f16)
-            -> float16x4x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v8i8.p0")]
+        fn _vld3_dup_s8(ptr: *const i8, size: i32) -> int8x8x3_t;
     }
-    _vld2_lane_f16(b.0, b.1, LANE as i64, a as _)
+    _vld3_dup_s8(a as *const i8, 1)
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_f16)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_s8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2, LANE = 0)
-)]
-#[rustc_legacy_const_generics(2)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vld2q_lane_f16<const LANE: i32>(a: *const f16, b: float16x8x2_t) -> float16x8x2_t {
-    static_assert_uimm_bits!(LANE, 3);
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld3))]
+pub unsafe fn vld3q_dup_s8(a: *const i8) -> int8x16x3_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2lane.v8f16.p0"
-        )]
-        fn _vld2q_lane_f16(
-            a: float16x8_t,
-            b: float16x8_t,
-            n: i64,
-            ptr: *const f16,
-        ) -> float16x8x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v16i8.p0")]
+        fn _vld3q_dup_s8(ptr: *const i8, size: i32) -> int8x16x3_t;
     }
-    _vld2q_lane_f16(b.0, b.1, LANE as i64, a as _)
+    _vld3q_dup_s8(a as *const i8, 1)
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_f32)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld2_lane_f32<const LANE: i32>(a: *const f32, b: float32x2x2_t) -> float32x2x2_t {
-    static_assert_uimm_bits!(LANE, 2);
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld3))]
+pub unsafe fn vld3_dup_s16(a: *const i16) -> int16x4x3_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2lane.v2f32.p0"
-        )]
-        fn _vld2_lane_f32(a: float32x2_t, b: float32x2_t, n: i64, ptr: *const i8) -> float32x2x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v4i16.p0")]
+        fn _vld3_dup_s16(ptr: *const i8, size: i32) -> int16x4x3_t;
     }
-    _vld2_lane_f32(b.0, b.1, LANE as i64, a as _)
+    _vld3_dup_s16(a as *const i8, 2)
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_f32)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_s16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld2q_lane_f32<const LANE: i32>(a: *const f32, b: float32x4x2_t) -> float32x4x2_t {
-    static_assert_uimm_bits!(LANE, 2);
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld3))]
+pub unsafe fn vld3q_dup_s16(a: *const i16) -> int16x8x3_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2lane.v4f32.p0"
-        )]
-        fn _vld2q_lane_f32(a: float32x4_t, b: float32x4_t, n: i64, ptr: *const i8)
-            -> float32x4x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v8i16.p0")]
+        fn _vld3q_dup_s16(ptr: *const i8, size: i32) -> int16x8x3_t;
     }
-    _vld2q_lane_f32(b.0, b.1, LANE as i64, a as _)
+    _vld3q_dup_s16(a as *const i8, 2)
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_s8)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld2_lane_s8<const LANE: i32>(a: *const i8, b: int8x8x2_t) -> int8x8x2_t {
-    static_assert_uimm_bits!(LANE, 3);
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld3))]
+pub unsafe fn vld3_dup_s32(a: *const i32) -> int32x2x3_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2lane.v8i8.p0"
-        )]
-        fn _vld2_lane_s8(a: int8x8_t, b: int8x8_t, n: i64, ptr: *const i8) -> int8x8x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v2i32.p0")]
+        fn _vld3_dup_s32(ptr: *const i8, size: i32) -> int32x2x3_t;
     }
-    _vld2_lane_s8(b.0, b.1, LANE as i64, a as _)
+    _vld3_dup_s32(a as *const i8, 4)
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_s16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld2_lane_s16<const LANE: i32>(a: *const i16, b: int16x4x2_t) -> int16x4x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2lane.v4i16.p0"
-        )]
-        fn _vld2_lane_s16(a: int16x4_t, b: int16x4_t, n: i64, ptr: *const i8) -> int16x4x2_t;
-    }
-    _vld2_lane_s16(b.0, b.1, LANE as i64, a as _)
-}
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_s16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld2q_lane_s16<const LANE: i32>(a: *const i16, b: int16x8x2_t) -> int16x8x2_t {
-    static_assert_uimm_bits!(LANE, 3);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2lane.v8i16.p0"
-        )]
-        fn _vld2q_lane_s16(a: int16x8_t, b: int16x8_t, n: i64, ptr: *const i8) -> int16x8x2_t;
-    }
-    _vld2q_lane_s16(b.0, b.1, LANE as i64, a as _)
-}
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_s32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld2_lane_s32<const LANE: i32>(a: *const i32, b: int32x2x2_t) -> int32x2x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2lane.v2i32.p0"
-        )]
-        fn _vld2_lane_s32(a: int32x2_t, b: int32x2_t, n: i64, ptr: *const i8) -> int32x2x2_t;
-    }
-    _vld2_lane_s32(b.0, b.1, LANE as i64, a as _)
-}
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_s32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld2q_lane_s32<const LANE: i32>(a: *const i32, b: int32x4x2_t) -> int32x4x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2lane.v4i32.p0"
-        )]
-        fn _vld2q_lane_s32(a: int32x4_t, b: int32x4_t, n: i64, ptr: *const i8) -> int32x4x2_t;
-    }
-    _vld2q_lane_s32(b.0, b.1, LANE as i64, a as _)
-}
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_f32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[cfg_attr(test, assert_instr(vld2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld2_lane_f32<const LANE: i32>(a: *const f32, b: float32x2x2_t) -> float32x2x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v2f32.p0")]
-        fn _vld2_lane_f32(
-            ptr: *const i8,
-            a: float32x2_t,
-            b: float32x2_t,
-            n: i32,
-            size: i32,
-        ) -> float32x2x2_t;
-    }
-    _vld2_lane_f32(a as _, b.0, b.1, LANE, 4)
-}
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_f32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[cfg_attr(test, assert_instr(vld2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld2q_lane_f32<const LANE: i32>(a: *const f32, b: float32x4x2_t) -> float32x4x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v4f32.p0")]
-        fn _vld2q_lane_f32(
-            ptr: *const i8,
-            a: float32x4_t,
-            b: float32x4_t,
-            n: i32,
-            size: i32,
-        ) -> float32x4x2_t;
-    }
-    _vld2q_lane_f32(a as _, b.0, b.1, LANE, 4)
-}
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_s16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[cfg_attr(test, assert_instr(vld2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld2q_lane_s16<const LANE: i32>(a: *const i16, b: int16x8x2_t) -> int16x8x2_t {
-    static_assert_uimm_bits!(LANE, 3);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v8i16.p0")]
-        fn _vld2q_lane_s16(
-            ptr: *const i8,
-            a: int16x8_t,
-            b: int16x8_t,
-            n: i32,
-            size: i32,
-        ) -> int16x8x2_t;
-    }
-    _vld2q_lane_s16(a as _, b.0, b.1, LANE, 2)
-}
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_s32)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_s32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
-#[cfg_attr(test, assert_instr(vld2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld2q_lane_s32<const LANE: i32>(a: *const i32, b: int32x4x2_t) -> int32x4x2_t {
-    static_assert_uimm_bits!(LANE, 2);
+#[cfg_attr(test, assert_instr(vld3))]
+pub unsafe fn vld3q_dup_s32(a: *const i32) -> int32x4x3_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v4i32.p0")]
-        fn _vld2q_lane_s32(
-            ptr: *const i8,
-            a: int32x4_t,
-            b: int32x4_t,
-            n: i32,
-            size: i32,
-        ) -> int32x4x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v4i32.p0")]
+        fn _vld3q_dup_s32(ptr: *const i8, size: i32) -> int32x4x3_t;
     }
-    _vld2q_lane_s32(a as _, b.0, b.1, LANE, 4)
+    _vld3q_dup_s32(a as *const i8, 4)
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_s8)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_p64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[cfg_attr(test, assert_instr(vld2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld2_lane_s8<const LANE: i32>(a: *const i8, b: int8x8x2_t) -> int8x8x2_t {
-    static_assert_uimm_bits!(LANE, 3);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v8i8.p0")]
-        fn _vld2_lane_s8(ptr: *const i8, a: int8x8_t, b: int8x8_t, n: i32, size: i32)
-            -> int8x8x2_t;
-    }
-    _vld2_lane_s8(a as _, b.0, b.1, LANE, 1)
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld3r)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld3_dup_p64(a: *const p64) -> poly64x1x3_t {
+    transmute(vld3_dup_s64(transmute(a)))
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_s16)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
-#[cfg_attr(test, assert_instr(vld2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld2_lane_s16<const LANE: i32>(a: *const i16, b: int16x4x2_t) -> int16x4x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v4i16.p0")]
-        fn _vld2_lane_s16(
-            ptr: *const i8,
-            a: int16x4_t,
-            b: int16x4_t,
-            n: i32,
-            size: i32,
-        ) -> int16x4x2_t;
-    }
-    _vld2_lane_s16(a as _, b.0, b.1, LANE, 2)
-}
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_s32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
 #[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[cfg_attr(test, assert_instr(vld2, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld2_lane_s32<const LANE: i32>(a: *const i32, b: int32x2x2_t) -> int32x2x2_t {
-    static_assert_uimm_bits!(LANE, 1);
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vld3_dup_s64(a: *const i64) -> int64x1x3_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v2i32.p0")]
-        fn _vld2_lane_s32(
-            ptr: *const i8,
-            a: int32x2_t,
-            b: int32x2_t,
-            n: i32,
-            size: i32,
-        ) -> int32x2x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v1i64.p0")]
+        fn _vld3_dup_s64(ptr: *const i8, size: i32) -> int64x1x3_t;
     }
-    _vld2_lane_s32(a as _, b.0, b.1, LANE, 4)
+    _vld3_dup_s64(a as *const i8, 8)
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_u8)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_u64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2, LANE = 0)
+    assert_instr(ld3r)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -21406,23 +20263,21 @@ pub unsafe fn vld2_lane_s32<const LANE: i32>(a: *const i32, b: int32x2x2_t) -> i
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld2_lane_u8<const LANE: i32>(a: *const u8, b: uint8x8x2_t) -> uint8x8x2_t {
-    static_assert_uimm_bits!(LANE, 3);
-    transmute(vld2_lane_s8::<LANE>(transmute(a), transmute(b)))
+pub unsafe fn vld3_dup_u64(a: *const u64) -> uint64x1x3_t {
+    transmute(vld3_dup_s64(transmute(a)))
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_u16)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_u8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2, LANE = 0)
+    assert_instr(ld3r)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -21431,23 +20286,21 @@ pub unsafe fn vld2_lane_u8<const LANE: i32>(a: *const u8, b: uint8x8x2_t) -> uin
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld2_lane_u16<const LANE: i32>(a: *const u16, b: uint16x4x2_t) -> uint16x4x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    transmute(vld2_lane_s16::<LANE>(transmute(a), transmute(b)))
+pub unsafe fn vld3_dup_u8(a: *const u8) -> uint8x8x3_t {
+    transmute(vld3_dup_s8(transmute(a)))
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_u16)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_u8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2, LANE = 0)
+    assert_instr(ld3r)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -21456,23 +20309,21 @@ pub unsafe fn vld2_lane_u16<const LANE: i32>(a: *const u16, b: uint16x4x2_t) ->
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld2q_lane_u16<const LANE: i32>(a: *const u16, b: uint16x8x2_t) -> uint16x8x2_t {
-    static_assert_uimm_bits!(LANE, 3);
-    transmute(vld2q_lane_s16::<LANE>(transmute(a), transmute(b)))
+pub unsafe fn vld3q_dup_u8(a: *const u8) -> uint8x16x3_t {
+    transmute(vld3q_dup_s8(transmute(a)))
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_u32)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_u16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2, LANE = 0)
+    assert_instr(ld3r)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -21481,23 +20332,21 @@ pub unsafe fn vld2q_lane_u16<const LANE: i32>(a: *const u16, b: uint16x8x2_t) ->
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld2_lane_u32<const LANE: i32>(a: *const u32, b: uint32x2x2_t) -> uint32x2x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    transmute(vld2_lane_s32::<LANE>(transmute(a), transmute(b)))
+pub unsafe fn vld3_dup_u16(a: *const u16) -> uint16x4x3_t {
+    transmute(vld3_dup_s16(transmute(a)))
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_u32)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_u16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2, LANE = 0)
+    assert_instr(ld3r)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -21506,23 +20355,21 @@ pub unsafe fn vld2_lane_u32<const LANE: i32>(a: *const u32, b: uint32x2x2_t) ->
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld2q_lane_u32<const LANE: i32>(a: *const u32, b: uint32x4x2_t) -> uint32x4x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    transmute(vld2q_lane_s32::<LANE>(transmute(a), transmute(b)))
+pub unsafe fn vld3q_dup_u16(a: *const u16) -> uint16x8x3_t {
+    transmute(vld3q_dup_s16(transmute(a)))
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_p8)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_u32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2, LANE = 0)
+    assert_instr(ld3r)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -21531,23 +20378,21 @@ pub unsafe fn vld2q_lane_u32<const LANE: i32>(a: *const u32, b: uint32x4x2_t) ->
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld2_lane_p8<const LANE: i32>(a: *const p8, b: poly8x8x2_t) -> poly8x8x2_t {
-    static_assert_uimm_bits!(LANE, 3);
-    transmute(vld2_lane_s8::<LANE>(transmute(a), transmute(b)))
+pub unsafe fn vld3_dup_u32(a: *const u32) -> uint32x2x3_t {
+    transmute(vld3_dup_s32(transmute(a)))
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_lane_p16)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_u32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2, LANE = 0)
+    assert_instr(ld3r)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -21556,23 +20401,21 @@ pub unsafe fn vld2_lane_p8<const LANE: i32>(a: *const p8, b: poly8x8x2_t) -> pol
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld2_lane_p16<const LANE: i32>(a: *const p16, b: poly16x4x2_t) -> poly16x4x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    transmute(vld2_lane_s16::<LANE>(transmute(a), transmute(b)))
+pub unsafe fn vld3q_dup_u32(a: *const u32) -> uint32x4x3_t {
+    transmute(vld3q_dup_s32(transmute(a)))
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_lane_p16)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_p8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2, LANE = 0)
+    assert_instr(ld3r)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -21581,21 +20424,20 @@ pub unsafe fn vld2_lane_p16<const LANE: i32>(a: *const p16, b: poly16x4x2_t) ->
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld2q_lane_p16<const LANE: i32>(a: *const p16, b: poly16x8x2_t) -> poly16x8x2_t {
-    static_assert_uimm_bits!(LANE, 3);
-    transmute(vld2q_lane_s16::<LANE>(transmute(a), transmute(b)))
+pub unsafe fn vld3_dup_p8(a: *const p8) -> poly8x8x3_t {
+    transmute(vld3_dup_s8(transmute(a)))
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_p64)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_p8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(ld3r)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -21605,55 +20447,20 @@ pub unsafe fn vld2q_lane_p16<const LANE: i32>(a: *const p16, b: poly16x8x2_t) ->
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld2_p64(a: *const p64) -> poly64x1x2_t {
-    transmute(vld2_s64(transmute(a)))
-}
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vld2_s64(a: *const i64) -> int64x1x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v1i64")]
-        fn _vld2_s64(ptr: *const i8, size: i32) -> int64x1x2_t;
-    }
-    _vld2_s64(a as *const i8, 8)
-}
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_s64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vld2_s64(a: *const i64) -> int64x1x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld2.v1i64.p0"
-        )]
-        fn _vld2_s64(ptr: *const int64x1_t) -> int64x1x2_t;
-    }
-    _vld2_s64(a as _)
+pub unsafe fn vld3q_dup_p8(a: *const p8) -> poly8x16x3_t {
+    transmute(vld3q_dup_s8(transmute(a)))
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_u64)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_p16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(ld3r)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -21663,21 +20470,20 @@ pub unsafe fn vld2_s64(a: *const i64) -> int64x1x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld2_u64(a: *const u64) -> uint64x1x2_t {
-    transmute(vld2_s64(transmute(a)))
+pub unsafe fn vld3_dup_p16(a: *const p16) -> poly16x4x3_t {
+    transmute(vld3_dup_s16(transmute(a)))
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_u8)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_p16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2)
+    assert_instr(ld3r)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -21687,912 +20493,775 @@ pub unsafe fn vld2_u64(a: *const u64) -> uint64x1x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld2_u8(a: *const u8) -> uint8x8x2_t {
-    transmute(vld2_s8(transmute(a)))
+pub unsafe fn vld3q_dup_p16(a: *const p16) -> poly16x8x3_t {
+    transmute(vld3q_dup_s16(transmute(a)))
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_u8)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_f16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld2_u8(a: *const u8) -> uint8x8x2_t {
-    let mut ret_val: uint8x8x2_t = transmute(vld2_s8(transmute(a)));
-    ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val
+#[cfg(target_arch = "arm")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+pub unsafe fn vld3_f16(a: *const f16) -> float16x4x3_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v4f16.p0")]
+        fn _vld3_f16(ptr: *const f16, size: i32) -> float16x4x3_t;
+    }
+    _vld3_f16(a as _, 2)
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_u8)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_f16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+pub unsafe fn vld3q_f16(a: *const f16) -> float16x8x3_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v8f16.p0")]
+        fn _vld3q_f16(ptr: *const f16, size: i32) -> float16x8x3_t;
+    }
+    _vld3q_f16(a as _, 2)
+}
+#[doc = "Load single 3-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_f16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+    assert_instr(ld3)
 )]
-pub unsafe fn vld2q_u8(a: *const u8) -> uint8x16x2_t {
-    transmute(vld2q_s8(transmute(a)))
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+pub unsafe fn vld3_f16(a: *const f16) -> float16x4x3_t {
+    crate::core_arch::macros::deinterleaving_load!(f16, 4, 3, a)
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_u8)"]
+#[doc = "Load single 3-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_f16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
+#[cfg(not(target_arch = "arm"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2)
+    assert_instr(ld3)
 )]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld2q_u8(a: *const u8) -> uint8x16x2_t {
-    let mut ret_val: uint8x16x2_t = transmute(vld2q_s8(transmute(a)));
-    ret_val.0 = unsafe {
-        simd_shuffle!(
-            ret_val.0,
-            ret_val.0,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    ret_val.1 = unsafe {
-        simd_shuffle!(
-            ret_val.1,
-            ret_val.1,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    ret_val
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+pub unsafe fn vld3q_f16(a: *const f16) -> float16x8x3_t {
+    crate::core_arch::macros::deinterleaving_load!(f16, 8, 3, a)
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_u16)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_f32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld2_u16(a: *const u16) -> uint16x4x2_t {
-    transmute(vld2_s16(transmute(a)))
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld3))]
+pub unsafe fn vld3_f32(a: *const f32) -> float32x2x3_t {
+    crate::core_arch::macros::deinterleaving_load!(f32, 2, 3, a)
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_u16)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_f32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld2_u16(a: *const u16) -> uint16x4x2_t {
-    let mut ret_val: uint16x4x2_t = transmute(vld2_s16(transmute(a)));
-    ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]) };
-    ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]) };
-    ret_val
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld3))]
+pub unsafe fn vld3q_f32(a: *const f32) -> float32x4x3_t {
+    crate::core_arch::macros::deinterleaving_load!(f32, 4, 3, a)
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_u16)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld2q_u16(a: *const u16) -> uint16x8x2_t {
-    transmute(vld2q_s16(transmute(a)))
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld3))]
+pub unsafe fn vld3_s8(a: *const i8) -> int8x8x3_t {
+    crate::core_arch::macros::deinterleaving_load!(i8, 8, 3, a)
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_u16)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_s8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld2q_u16(a: *const u16) -> uint16x8x2_t {
-    let mut ret_val: uint16x8x2_t = transmute(vld2q_s16(transmute(a)));
-    ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld3))]
+pub unsafe fn vld3q_s8(a: *const i8) -> int8x16x3_t {
+    crate::core_arch::macros::deinterleaving_load!(i8, 16, 3, a)
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_u32)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld2_u32(a: *const u32) -> uint32x2x2_t {
-    transmute(vld2_s32(transmute(a)))
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld3))]
+pub unsafe fn vld3_s16(a: *const i16) -> int16x4x3_t {
+    crate::core_arch::macros::deinterleaving_load!(i16, 4, 3, a)
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_u32)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_s16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld2_u32(a: *const u32) -> uint32x2x2_t {
-    let mut ret_val: uint32x2x2_t = transmute(vld2_s32(transmute(a)));
-    ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [1, 0]) };
-    ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [1, 0]) };
-    ret_val
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld3))]
+pub unsafe fn vld3q_s16(a: *const i16) -> int16x8x3_t {
+    crate::core_arch::macros::deinterleaving_load!(i16, 8, 3, a)
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_u32)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld2q_u32(a: *const u32) -> uint32x4x2_t {
-    transmute(vld2q_s32(transmute(a)))
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld3))]
+pub unsafe fn vld3_s32(a: *const i32) -> int32x2x3_t {
+    crate::core_arch::macros::deinterleaving_load!(i32, 2, 3, a)
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_u32)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_s32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld2q_u32(a: *const u32) -> uint32x4x2_t {
-    let mut ret_val: uint32x4x2_t = transmute(vld2q_s32(transmute(a)));
-    ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]) };
-    ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]) };
-    ret_val
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld3))]
+pub unsafe fn vld3q_s32(a: *const i32) -> int32x4x3_t {
+    crate::core_arch::macros::deinterleaving_load!(i32, 4, 3, a)
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_p8)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_f32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld2_p8(a: *const p8) -> poly8x8x2_t {
-    transmute(vld2_s8(transmute(a)))
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld3))]
+pub unsafe fn vld3_f32(a: *const f32) -> float32x2x3_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v2f32.p0")]
+        fn _vld3_f32(ptr: *const i8, size: i32) -> float32x2x3_t;
+    }
+    _vld3_f32(a as *const i8, 4)
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_p8)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_f32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld2_p8(a: *const p8) -> poly8x8x2_t {
-    let mut ret_val: poly8x8x2_t = transmute(vld2_s8(transmute(a)));
-    ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld3))]
+pub unsafe fn vld3q_f32(a: *const f32) -> float32x4x3_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v4f32.p0")]
+        fn _vld3q_f32(ptr: *const i8, size: i32) -> float32x4x3_t;
+    }
+    _vld3q_f32(a as *const i8, 4)
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_p8)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld2q_p8(a: *const p8) -> poly8x16x2_t {
-    transmute(vld2q_s8(transmute(a)))
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld3))]
+pub unsafe fn vld3_s8(a: *const i8) -> int8x8x3_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v8i8.p0")]
+        fn _vld3_s8(ptr: *const i8, size: i32) -> int8x8x3_t;
+    }
+    _vld3_s8(a as *const i8, 1)
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_p8)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_s8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld2q_p8(a: *const p8) -> poly8x16x2_t {
-    let mut ret_val: poly8x16x2_t = transmute(vld2q_s8(transmute(a)));
-    ret_val.0 = unsafe {
-        simd_shuffle!(
-            ret_val.0,
-            ret_val.0,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    ret_val.1 = unsafe {
-        simd_shuffle!(
-            ret_val.1,
-            ret_val.1,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    ret_val
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld3))]
+pub unsafe fn vld3q_s8(a: *const i8) -> int8x16x3_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v16i8.p0")]
+        fn _vld3q_s8(ptr: *const i8, size: i32) -> int8x16x3_t;
+    }
+    _vld3q_s8(a as *const i8, 1)
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_p16)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld2_p16(a: *const p16) -> poly16x4x2_t {
-    transmute(vld2_s16(transmute(a)))
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld3))]
+pub unsafe fn vld3_s16(a: *const i16) -> int16x4x3_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v4i16.p0")]
+        fn _vld3_s16(ptr: *const i8, size: i32) -> int16x4x3_t;
+    }
+    _vld3_s16(a as *const i8, 2)
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2_p16)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_s16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld2_p16(a: *const p16) -> poly16x4x2_t {
-    let mut ret_val: poly16x4x2_t = transmute(vld2_s16(transmute(a)));
-    ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]) };
-    ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]) };
-    ret_val
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld3))]
+pub unsafe fn vld3q_s16(a: *const i16) -> int16x8x3_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v8i16.p0")]
+        fn _vld3q_s16(ptr: *const i8, size: i32) -> int16x8x3_t;
+    }
+    _vld3q_s16(a as *const i8, 2)
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_p16)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld2q_p16(a: *const p16) -> poly16x8x2_t {
-    transmute(vld2q_s16(transmute(a)))
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld3))]
+pub unsafe fn vld3_s32(a: *const i32) -> int32x2x3_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v2i32.p0")]
+        fn _vld3_s32(ptr: *const i8, size: i32) -> int32x2x3_t;
+    }
+    _vld3_s32(a as *const i8, 4)
 }
-#[doc = "Load multiple 2-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld2q_p16)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_s32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld2)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld2q_p16(a: *const p16) -> poly16x8x2_t {
-    let mut ret_val: poly16x8x2_t = transmute(vld2q_s16(transmute(a)));
-    ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld3))]
+pub unsafe fn vld3q_s32(a: *const i32) -> int32x4x3_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v4i32.p0")]
+        fn _vld3q_s32(ptr: *const i8, size: i32) -> int32x4x3_t;
+    }
+    _vld3q_s32(a as *const i8, 4)
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_f16)"]
+#[doc = "Load multiple 3-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_f16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
-#[target_feature(enable = "neon,fp16")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vld3_dup_f16(a: *const f16) -> float16x4x3_t {
+pub unsafe fn vld3_lane_f16<const LANE: i32>(a: *const f16, b: float16x4x3_t) -> float16x4x3_t {
+    static_assert_uimm_bits!(LANE, 2);
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v4f16.p0")]
-        fn _vld3_dup_f16(ptr: *const f16, size: i32) -> float16x4x3_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v4f16.p0")]
+        fn _vld3_lane_f16(
+            ptr: *const f16,
+            a: float16x4_t,
+            b: float16x4_t,
+            c: float16x4_t,
+            n: i32,
+            size: i32,
+        ) -> float16x4x3_t;
     }
-    _vld3_dup_f16(a as _, 2)
+    _vld3_lane_f16(a as _, b.0, b.1, b.2, LANE, 2)
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_f16)"]
+#[doc = "Load multiple 3-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_f16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
-#[target_feature(enable = "neon,fp16")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vld3q_dup_f16(a: *const f16) -> float16x8x3_t {
+pub unsafe fn vld3q_lane_f16<const LANE: i32>(a: *const f16, b: float16x8x3_t) -> float16x8x3_t {
+    static_assert_uimm_bits!(LANE, 3);
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v8f16.p0")]
-        fn _vld3q_dup_f16(ptr: *const f16, size: i32) -> float16x8x3_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v8f16.p0")]
+        fn _vld3q_lane_f16(
+            ptr: *const f16,
+            a: float16x8_t,
+            b: float16x8_t,
+            c: float16x8_t,
+            n: i32,
+            size: i32,
+        ) -> float16x8x3_t;
     }
-    _vld3q_dup_f16(a as _, 2)
+    _vld3q_lane_f16(a as _, b.0, b.1, b.2, LANE, 2)
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_f16)"]
+#[doc = "Load multiple 3-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_f16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
+#[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3r)
+    assert_instr(ld3, LANE = 0)
 )]
-#[target_feature(enable = "neon,fp16")]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vld3_dup_f16(a: *const f16) -> float16x4x3_t {
+pub unsafe fn vld3_lane_f16<const LANE: i32>(a: *const f16, b: float16x4x3_t) -> float16x4x3_t {
+    static_assert_uimm_bits!(LANE, 2);
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld3r.v4f16.p0"
+            link_name = "llvm.aarch64.neon.ld3lane.v4f16.p0"
         )]
-        fn _vld3_dup_f16(ptr: *const f16) -> float16x4x3_t;
+        fn _vld3_lane_f16(
+            a: float16x4_t,
+            b: float16x4_t,
+            c: float16x4_t,
+            n: i64,
+            ptr: *const f16,
+        ) -> float16x4x3_t;
     }
-    _vld3_dup_f16(a as _)
+    _vld3_lane_f16(b.0, b.1, b.2, LANE as i64, a as _)
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_f16)"]
+#[doc = "Load multiple 3-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_f16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
+#[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3r)
+    assert_instr(ld3, LANE = 0)
 )]
-#[target_feature(enable = "neon,fp16")]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vld3q_dup_f16(a: *const f16) -> float16x8x3_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld3r.v8f16.p0"
-        )]
-        fn _vld3q_dup_f16(ptr: *const f16) -> float16x8x3_t;
-    }
-    _vld3q_dup_f16(a as _)
-}
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_f32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld3r))]
-pub unsafe fn vld3_dup_f32(a: *const f32) -> float32x2x3_t {
+pub unsafe fn vld3q_lane_f16<const LANE: i32>(a: *const f16, b: float16x8x3_t) -> float16x8x3_t {
+    static_assert_uimm_bits!(LANE, 3);
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld3r.v2f32.p0"
+            link_name = "llvm.aarch64.neon.ld3lane.v8f16.p0"
         )]
-        fn _vld3_dup_f32(ptr: *const f32) -> float32x2x3_t;
+        fn _vld3q_lane_f16(
+            a: float16x8_t,
+            b: float16x8_t,
+            c: float16x8_t,
+            n: i64,
+            ptr: *const f16,
+        ) -> float16x8x3_t;
     }
-    _vld3_dup_f32(a as _)
+    _vld3q_lane_f16(b.0, b.1, b.2, LANE as i64, a as _)
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_f32)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_f32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld3r))]
-pub unsafe fn vld3q_dup_f32(a: *const f32) -> float32x4x3_t {
+pub unsafe fn vld3_lane_f32<const LANE: i32>(a: *const f32, b: float32x2x3_t) -> float32x2x3_t {
+    static_assert_uimm_bits!(LANE, 1);
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld3r.v4f32.p0"
+            link_name = "llvm.aarch64.neon.ld3lane.v2f32.p0"
         )]
-        fn _vld3q_dup_f32(ptr: *const f32) -> float32x4x3_t;
+        fn _vld3_lane_f32(
+            a: float32x2_t,
+            b: float32x2_t,
+            c: float32x2_t,
+            n: i64,
+            ptr: *const i8,
+        ) -> float32x2x3_t;
     }
-    _vld3q_dup_f32(a as _)
+    _vld3_lane_f32(b.0, b.1, b.2, LANE as i64, a as _)
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s8)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_f32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld3r))]
-pub unsafe fn vld3_dup_s8(a: *const i8) -> int8x8x3_t {
+pub unsafe fn vld3q_lane_f32<const LANE: i32>(a: *const f32, b: float32x4x3_t) -> float32x4x3_t {
+    static_assert_uimm_bits!(LANE, 2);
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld3r.v8i8.p0"
+            link_name = "llvm.aarch64.neon.ld3lane.v4f32.p0"
         )]
-        fn _vld3_dup_s8(ptr: *const i8) -> int8x8x3_t;
+        fn _vld3q_lane_f32(
+            a: float32x4_t,
+            b: float32x4_t,
+            c: float32x4_t,
+            n: i64,
+            ptr: *const i8,
+        ) -> float32x4x3_t;
     }
-    _vld3_dup_s8(a as _)
+    _vld3q_lane_f32(b.0, b.1, b.2, LANE as i64, a as _)
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_s8)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_f32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld3r))]
-pub unsafe fn vld3q_dup_s8(a: *const i8) -> int8x16x3_t {
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vld3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld3_lane_f32<const LANE: i32>(a: *const f32, b: float32x2x3_t) -> float32x2x3_t {
+    static_assert_uimm_bits!(LANE, 1);
     unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld3r.v16i8.p0"
-        )]
-        fn _vld3q_dup_s8(ptr: *const i8) -> int8x16x3_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v2f32.p0")]
+        fn _vld3_lane_f32(
+            ptr: *const i8,
+            a: float32x2_t,
+            b: float32x2_t,
+            c: float32x2_t,
+            n: i32,
+            size: i32,
+        ) -> float32x2x3_t;
     }
-    _vld3q_dup_s8(a as _)
+    _vld3_lane_f32(a as _, b.0, b.1, b.2, LANE, 4)
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s16)"]
+#[doc = "Load multiple 3-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_s8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld3r))]
-pub unsafe fn vld3_dup_s16(a: *const i16) -> int16x4x3_t {
+pub unsafe fn vld3_lane_s8<const LANE: i32>(a: *const i8, b: int8x8x3_t) -> int8x8x3_t {
+    static_assert_uimm_bits!(LANE, 3);
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld3r.v4i16.p0"
+            link_name = "llvm.aarch64.neon.ld3lane.v8i8.p0"
         )]
-        fn _vld3_dup_s16(ptr: *const i16) -> int16x4x3_t;
+        fn _vld3_lane_s8(
+            a: int8x8_t,
+            b: int8x8_t,
+            c: int8x8_t,
+            n: i64,
+            ptr: *const i8,
+        ) -> int8x8x3_t;
     }
-    _vld3_dup_s16(a as _)
+    _vld3_lane_s8(b.0, b.1, b.2, LANE as i64, a as _)
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_s16)"]
+#[doc = "Load multiple 3-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_s16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld3r))]
-pub unsafe fn vld3q_dup_s16(a: *const i16) -> int16x8x3_t {
+pub unsafe fn vld3_lane_s16<const LANE: i32>(a: *const i16, b: int16x4x3_t) -> int16x4x3_t {
+    static_assert_uimm_bits!(LANE, 2);
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld3r.v8i16.p0"
+            link_name = "llvm.aarch64.neon.ld3lane.v4i16.p0"
         )]
-        fn _vld3q_dup_s16(ptr: *const i16) -> int16x8x3_t;
+        fn _vld3_lane_s16(
+            a: int16x4_t,
+            b: int16x4_t,
+            c: int16x4_t,
+            n: i64,
+            ptr: *const i8,
+        ) -> int16x4x3_t;
     }
-    _vld3q_dup_s16(a as _)
+    _vld3_lane_s16(b.0, b.1, b.2, LANE as i64, a as _)
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s32)"]
+#[doc = "Load multiple 3-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_s16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld3r))]
-pub unsafe fn vld3_dup_s32(a: *const i32) -> int32x2x3_t {
+pub unsafe fn vld3q_lane_s16<const LANE: i32>(a: *const i16, b: int16x8x3_t) -> int16x8x3_t {
+    static_assert_uimm_bits!(LANE, 4);
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld3r.v2i32.p0"
+            link_name = "llvm.aarch64.neon.ld3lane.v8i16.p0"
         )]
-        fn _vld3_dup_s32(ptr: *const i32) -> int32x2x3_t;
+        fn _vld3q_lane_s16(
+            a: int16x8_t,
+            b: int16x8_t,
+            c: int16x8_t,
+            n: i64,
+            ptr: *const i8,
+        ) -> int16x8x3_t;
     }
-    _vld3_dup_s32(a as _)
+    _vld3q_lane_s16(b.0, b.1, b.2, LANE as i64, a as _)
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_s32)"]
+#[doc = "Load multiple 3-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_s32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld3r))]
-pub unsafe fn vld3q_dup_s32(a: *const i32) -> int32x4x3_t {
+pub unsafe fn vld3_lane_s32<const LANE: i32>(a: *const i32, b: int32x2x3_t) -> int32x2x3_t {
+    static_assert_uimm_bits!(LANE, 1);
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld3r.v4i32.p0"
+            link_name = "llvm.aarch64.neon.ld3lane.v2i32.p0"
         )]
-        fn _vld3q_dup_s32(ptr: *const i32) -> int32x4x3_t;
+        fn _vld3_lane_s32(
+            a: int32x2_t,
+            b: int32x2_t,
+            c: int32x2_t,
+            n: i64,
+            ptr: *const i8,
+        ) -> int32x2x3_t;
     }
-    _vld3q_dup_s32(a as _)
+    _vld3_lane_s32(b.0, b.1, b.2, LANE as i64, a as _)
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s64)"]
+#[doc = "Load multiple 3-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_s32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld3r))]
-pub unsafe fn vld3_dup_s64(a: *const i64) -> int64x1x3_t {
+pub unsafe fn vld3q_lane_s32<const LANE: i32>(a: *const i32, b: int32x4x3_t) -> int32x4x3_t {
+    static_assert_uimm_bits!(LANE, 2);
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld3r.v1i64.p0"
+            link_name = "llvm.aarch64.neon.ld3lane.v4i32.p0"
         )]
-        fn _vld3_dup_s64(ptr: *const i64) -> int64x1x3_t;
+        fn _vld3q_lane_s32(
+            a: int32x4_t,
+            b: int32x4_t,
+            c: int32x4_t,
+            n: i64,
+            ptr: *const i8,
+        ) -> int32x4x3_t;
     }
-    _vld3_dup_s64(a as _)
+    _vld3q_lane_s32(b.0, b.1, b.2, LANE as i64, a as _)
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_f32)"]
+#[doc = "Load multiple 3-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_s8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld3))]
-pub unsafe fn vld3_dup_f32(a: *const f32) -> float32x2x3_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v2f32.p0")]
-        fn _vld3_dup_f32(ptr: *const i8, size: i32) -> float32x2x3_t;
-    }
-    _vld3_dup_f32(a as *const i8, 4)
-}
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_f32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
 #[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
+#[cfg_attr(test, assert_instr(vld3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld3))]
-pub unsafe fn vld3q_dup_f32(a: *const f32) -> float32x4x3_t {
+pub unsafe fn vld3_lane_s8<const LANE: i32>(a: *const i8, b: int8x8x3_t) -> int8x8x3_t {
+    static_assert_uimm_bits!(LANE, 3);
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v4f32.p0")]
-        fn _vld3q_dup_f32(ptr: *const i8, size: i32) -> float32x4x3_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v8i8.p0")]
+        fn _vld3_lane_s8(
+            ptr: *const i8,
+            a: int8x8_t,
+            b: int8x8_t,
+            c: int8x8_t,
+            n: i32,
+            size: i32,
+        ) -> int8x8x3_t;
     }
-    _vld3q_dup_f32(a as *const i8, 4)
+    _vld3_lane_s8(a as _, b.0, b.1, b.2, LANE, 1)
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s8)"]
+#[doc = "Load multiple 3-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_s16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld3))]
-pub unsafe fn vld3_dup_s8(a: *const i8) -> int8x8x3_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v8i8.p0")]
-        fn _vld3_dup_s8(ptr: *const i8, size: i32) -> int8x8x3_t;
-    }
-    _vld3_dup_s8(a as *const i8, 1)
-}
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_s8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
 #[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
+#[cfg_attr(test, assert_instr(vld3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld3))]
-pub unsafe fn vld3q_dup_s8(a: *const i8) -> int8x16x3_t {
+pub unsafe fn vld3_lane_s16<const LANE: i32>(a: *const i16, b: int16x4x3_t) -> int16x4x3_t {
+    static_assert_uimm_bits!(LANE, 2);
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v16i8.p0")]
-        fn _vld3q_dup_s8(ptr: *const i8, size: i32) -> int8x16x3_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v4i16.p0")]
+        fn _vld3_lane_s16(
+            ptr: *const i8,
+            a: int16x4_t,
+            b: int16x4_t,
+            c: int16x4_t,
+            n: i32,
+            size: i32,
+        ) -> int16x4x3_t;
     }
-    _vld3q_dup_s8(a as *const i8, 1)
+    _vld3_lane_s16(a as _, b.0, b.1, b.2, LANE, 2)
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s16)"]
+#[doc = "Load multiple 3-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_s16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld3))]
-pub unsafe fn vld3_dup_s16(a: *const i16) -> int16x4x3_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v4i16.p0")]
-        fn _vld3_dup_s16(ptr: *const i8, size: i32) -> int16x4x3_t;
-    }
-    _vld3_dup_s16(a as *const i8, 2)
-}
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_s16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
 #[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
+#[cfg_attr(test, assert_instr(vld3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld3))]
-pub unsafe fn vld3q_dup_s16(a: *const i16) -> int16x8x3_t {
+pub unsafe fn vld3q_lane_s16<const LANE: i32>(a: *const i16, b: int16x8x3_t) -> int16x8x3_t {
+    static_assert_uimm_bits!(LANE, 3);
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v8i16.p0")]
-        fn _vld3q_dup_s16(ptr: *const i8, size: i32) -> int16x8x3_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v8i16.p0")]
+        fn _vld3q_lane_s16(
+            ptr: *const i8,
+            a: int16x8_t,
+            b: int16x8_t,
+            c: int16x8_t,
+            n: i32,
+            size: i32,
+        ) -> int16x8x3_t;
     }
-    _vld3q_dup_s16(a as *const i8, 2)
+    _vld3q_lane_s16(a as _, b.0, b.1, b.2, LANE, 2)
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s32)"]
+#[doc = "Load multiple 3-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_s32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vld3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld3))]
-pub unsafe fn vld3_dup_s32(a: *const i32) -> int32x2x3_t {
+pub unsafe fn vld3_lane_s32<const LANE: i32>(a: *const i32, b: int32x2x3_t) -> int32x2x3_t {
+    static_assert_uimm_bits!(LANE, 1);
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v2i32.p0")]
-        fn _vld3_dup_s32(ptr: *const i8, size: i32) -> int32x2x3_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v2i32.p0")]
+        fn _vld3_lane_s32(
+            ptr: *const i8,
+            a: int32x2_t,
+            b: int32x2_t,
+            c: int32x2_t,
+            n: i32,
+            size: i32,
+        ) -> int32x2x3_t;
     }
-    _vld3_dup_s32(a as *const i8, 4)
+    _vld3_lane_s32(a as _, b.0, b.1, b.2, LANE, 4)
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_s32)"]
+#[doc = "Load multiple 3-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_s32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vld3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld3))]
-pub unsafe fn vld3q_dup_s32(a: *const i32) -> int32x4x3_t {
+pub unsafe fn vld3q_lane_s32<const LANE: i32>(a: *const i32, b: int32x4x3_t) -> int32x4x3_t {
+    static_assert_uimm_bits!(LANE, 2);
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v4i32.p0")]
-        fn _vld3q_dup_s32(ptr: *const i8, size: i32) -> int32x4x3_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v4i32.p0")]
+        fn _vld3q_lane_s32(
+            ptr: *const i8,
+            a: int32x4_t,
+            b: int32x4_t,
+            c: int32x4_t,
+            n: i32,
+            size: i32,
+        ) -> int32x4x3_t;
     }
-    _vld3q_dup_s32(a as *const i8, 4)
+    _vld3q_lane_s32(a as _, b.0, b.1, b.2, LANE, 4)
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_p64)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_u8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3r)
+    assert_instr(ld3, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -22601,37 +21270,23 @@ pub unsafe fn vld3q_dup_s32(a: *const i32) -> int32x4x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld3_dup_p64(a: *const p64) -> poly64x1x3_t {
-    transmute(vld3_dup_s64(transmute(a)))
-}
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_s64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vld3_dup_s64(a: *const i64) -> int64x1x3_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v1i64.p0")]
-        fn _vld3_dup_s64(ptr: *const i8, size: i32) -> int64x1x3_t;
-    }
-    _vld3_dup_s64(a as *const i8, 8)
+pub unsafe fn vld3_lane_u8<const LANE: i32>(a: *const u8, b: uint8x8x3_t) -> uint8x8x3_t {
+    static_assert_uimm_bits!(LANE, 3);
+    transmute(vld3_lane_s8::<LANE>(transmute(a), transmute(b)))
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_u64)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_u16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3r)
+    assert_instr(ld3, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -22640,22 +21295,23 @@ pub unsafe fn vld3_dup_s64(a: *const i64) -> int64x1x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld3_dup_u64(a: *const u64) -> uint64x1x3_t {
-    transmute(vld3_dup_s64(transmute(a)))
+pub unsafe fn vld3_lane_u16<const LANE: i32>(a: *const u16, b: uint16x4x3_t) -> uint16x4x3_t {
+    static_assert_uimm_bits!(LANE, 2);
+    transmute(vld3_lane_s16::<LANE>(transmute(a), transmute(b)))
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_u8)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_u16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3r)
+    assert_instr(ld3, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -22664,22 +21320,23 @@ pub unsafe fn vld3_dup_u64(a: *const u64) -> uint64x1x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld3_dup_u8(a: *const u8) -> uint8x8x3_t {
-    transmute(vld3_dup_s8(transmute(a)))
+pub unsafe fn vld3q_lane_u16<const LANE: i32>(a: *const u16, b: uint16x8x3_t) -> uint16x8x3_t {
+    static_assert_uimm_bits!(LANE, 3);
+    transmute(vld3q_lane_s16::<LANE>(transmute(a), transmute(b)))
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_u8)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_u32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3r)
+    assert_instr(ld3, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -22688,26 +21345,23 @@ pub unsafe fn vld3_dup_u8(a: *const u8) -> uint8x8x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld3_dup_u8(a: *const u8) -> uint8x8x3_t {
-    let mut ret_val: uint8x8x3_t = transmute(vld3_dup_s8(transmute(a)));
-    ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val
+pub unsafe fn vld3_lane_u32<const LANE: i32>(a: *const u32, b: uint32x2x3_t) -> uint32x2x3_t {
+    static_assert_uimm_bits!(LANE, 1);
+    transmute(vld3_lane_s32::<LANE>(transmute(a), transmute(b)))
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_u8)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_u32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3r)
+    assert_instr(ld3, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -22716,22 +21370,23 @@ pub unsafe fn vld3_dup_u8(a: *const u8) -> uint8x8x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld3q_dup_u8(a: *const u8) -> uint8x16x3_t {
-    transmute(vld3q_dup_s8(transmute(a)))
+pub unsafe fn vld3q_lane_u32<const LANE: i32>(a: *const u32, b: uint32x4x3_t) -> uint32x4x3_t {
+    static_assert_uimm_bits!(LANE, 2);
+    transmute(vld3q_lane_s32::<LANE>(transmute(a), transmute(b)))
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_u8)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_p8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3r)
+    assert_instr(ld3, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -22740,44 +21395,23 @@ pub unsafe fn vld3q_dup_u8(a: *const u8) -> uint8x16x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld3q_dup_u8(a: *const u8) -> uint8x16x3_t {
-    let mut ret_val: uint8x16x3_t = transmute(vld3q_dup_s8(transmute(a)));
-    ret_val.0 = unsafe {
-        simd_shuffle!(
-            ret_val.0,
-            ret_val.0,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    ret_val.1 = unsafe {
-        simd_shuffle!(
-            ret_val.1,
-            ret_val.1,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    ret_val.2 = unsafe {
-        simd_shuffle!(
-            ret_val.2,
-            ret_val.2,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    ret_val
+pub unsafe fn vld3_lane_p8<const LANE: i32>(a: *const p8, b: poly8x8x3_t) -> poly8x8x3_t {
+    static_assert_uimm_bits!(LANE, 3);
+    transmute(vld3_lane_s8::<LANE>(transmute(a), transmute(b)))
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_u16)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_p16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3r)
+    assert_instr(ld3, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -22786,22 +21420,23 @@ pub unsafe fn vld3q_dup_u8(a: *const u8) -> uint8x16x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld3_dup_u16(a: *const u16) -> uint16x4x3_t {
-    transmute(vld3_dup_s16(transmute(a)))
+pub unsafe fn vld3_lane_p16<const LANE: i32>(a: *const p16, b: poly16x4x3_t) -> poly16x4x3_t {
+    static_assert_uimm_bits!(LANE, 2);
+    transmute(vld3_lane_s16::<LANE>(transmute(a), transmute(b)))
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_u16)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_p16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3r)
+    assert_instr(ld3, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -22810,25 +21445,21 @@ pub unsafe fn vld3_dup_u16(a: *const u16) -> uint16x4x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld3_dup_u16(a: *const u16) -> uint16x4x3_t {
-    let mut ret_val: uint16x4x3_t = transmute(vld3_dup_s16(transmute(a)));
-    ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]) };
-    ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]) };
-    ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [3, 2, 1, 0]) };
-    ret_val
+pub unsafe fn vld3q_lane_p16<const LANE: i32>(a: *const p16, b: poly16x8x3_t) -> poly16x8x3_t {
+    static_assert_uimm_bits!(LANE, 3);
+    transmute(vld3q_lane_s16::<LANE>(transmute(a), transmute(b)))
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_u16)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_p64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3r)
+    assert_instr(nop)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -22838,73 +21469,48 @@ pub unsafe fn vld3_dup_u16(a: *const u16) -> uint16x4x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld3q_dup_u16(a: *const u16) -> uint16x8x3_t {
-    transmute(vld3q_dup_s16(transmute(a)))
+pub unsafe fn vld3_p64(a: *const p64) -> poly64x1x3_t {
+    transmute(vld3_s64(transmute(a)))
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_u16)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3r)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld3q_dup_u16(a: *const u16) -> uint16x8x3_t {
-    let mut ret_val: uint16x8x3_t = transmute(vld3q_dup_s16(transmute(a)));
-    ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vld3_s64(a: *const i64) -> int64x1x3_t {
+    crate::ptr::read_unaligned(a.cast())
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_u32)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3r)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld3_dup_u32(a: *const u32) -> uint32x2x3_t {
-    transmute(vld3_dup_s32(transmute(a)))
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vld3_s64(a: *const i64) -> int64x1x3_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v1i64.p0")]
+        fn _vld3_s64(ptr: *const i8, size: i32) -> int64x1x3_t;
+    }
+    _vld3_s64(a as *const i8, 8)
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_u32)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_u64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3r)
+    assert_instr(nop)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -22914,25 +21520,20 @@ pub unsafe fn vld3_dup_u32(a: *const u32) -> uint32x2x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld3_dup_u32(a: *const u32) -> uint32x2x3_t {
-    let mut ret_val: uint32x2x3_t = transmute(vld3_dup_s32(transmute(a)));
-    ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [1, 0]) };
-    ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [1, 0]) };
-    ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [1, 0]) };
-    ret_val
+pub unsafe fn vld3_u64(a: *const u64) -> uint64x1x3_t {
+    transmute(vld3_s64(transmute(a)))
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_u32)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_u8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3r)
+    assert_instr(ld3)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -22942,21 +21543,20 @@ pub unsafe fn vld3_dup_u32(a: *const u32) -> uint32x2x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld3q_dup_u32(a: *const u32) -> uint32x4x3_t {
-    transmute(vld3q_dup_s32(transmute(a)))
+pub unsafe fn vld3_u8(a: *const u8) -> uint8x8x3_t {
+    transmute(vld3_s8(transmute(a)))
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_u32)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_u8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3r)
+    assert_instr(ld3)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -22966,25 +21566,20 @@ pub unsafe fn vld3q_dup_u32(a: *const u32) -> uint32x4x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld3q_dup_u32(a: *const u32) -> uint32x4x3_t {
-    let mut ret_val: uint32x4x3_t = transmute(vld3q_dup_s32(transmute(a)));
-    ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]) };
-    ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]) };
-    ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [3, 2, 1, 0]) };
-    ret_val
+pub unsafe fn vld3q_u8(a: *const u8) -> uint8x16x3_t {
+    transmute(vld3q_s8(transmute(a)))
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_p8)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_u16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3r)
+    assert_instr(ld3)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -22994,21 +21589,20 @@ pub unsafe fn vld3q_dup_u32(a: *const u32) -> uint32x4x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld3_dup_p8(a: *const p8) -> poly8x8x3_t {
-    transmute(vld3_dup_s8(transmute(a)))
+pub unsafe fn vld3_u16(a: *const u16) -> uint16x4x3_t {
+    transmute(vld3_s16(transmute(a)))
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_p8)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_u16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3r)
+    assert_instr(ld3)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -23018,25 +21612,20 @@ pub unsafe fn vld3_dup_p8(a: *const p8) -> poly8x8x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld3_dup_p8(a: *const p8) -> poly8x8x3_t {
-    let mut ret_val: poly8x8x3_t = transmute(vld3_dup_s8(transmute(a)));
-    ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val
+pub unsafe fn vld3q_u16(a: *const u16) -> uint16x8x3_t {
+    transmute(vld3q_s16(transmute(a)))
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_p8)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_u32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3r)
+    assert_instr(ld3)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -23046,21 +21635,20 @@ pub unsafe fn vld3_dup_p8(a: *const p8) -> poly8x8x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld3q_dup_p8(a: *const p8) -> poly8x16x3_t {
-    transmute(vld3q_dup_s8(transmute(a)))
+pub unsafe fn vld3_u32(a: *const u32) -> uint32x2x3_t {
+    transmute(vld3_s32(transmute(a)))
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_p8)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_u32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3r)
+    assert_instr(ld3)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -23070,43 +21658,20 @@ pub unsafe fn vld3q_dup_p8(a: *const p8) -> poly8x16x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld3q_dup_p8(a: *const p8) -> poly8x16x3_t {
-    let mut ret_val: poly8x16x3_t = transmute(vld3q_dup_s8(transmute(a)));
-    ret_val.0 = unsafe {
-        simd_shuffle!(
-            ret_val.0,
-            ret_val.0,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    ret_val.1 = unsafe {
-        simd_shuffle!(
-            ret_val.1,
-            ret_val.1,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    ret_val.2 = unsafe {
-        simd_shuffle!(
-            ret_val.2,
-            ret_val.2,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    ret_val
+pub unsafe fn vld3q_u32(a: *const u32) -> uint32x4x3_t {
+    transmute(vld3q_s32(transmute(a)))
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_p16)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_p8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3r)
+    assert_instr(ld3)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -23116,21 +21681,20 @@ pub unsafe fn vld3q_dup_p8(a: *const p8) -> poly8x16x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld3_dup_p16(a: *const p16) -> poly16x4x3_t {
-    transmute(vld3_dup_s16(transmute(a)))
+pub unsafe fn vld3_p8(a: *const p8) -> poly8x8x3_t {
+    transmute(vld3_s8(transmute(a)))
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_dup_p16)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_p8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3r)
+    assert_instr(ld3)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -23140,25 +21704,20 @@ pub unsafe fn vld3_dup_p16(a: *const p16) -> poly16x4x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld3_dup_p16(a: *const p16) -> poly16x4x3_t {
-    let mut ret_val: poly16x4x3_t = transmute(vld3_dup_s16(transmute(a)));
-    ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]) };
-    ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]) };
-    ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [3, 2, 1, 0]) };
-    ret_val
+pub unsafe fn vld3q_p8(a: *const p8) -> poly8x16x3_t {
+    transmute(vld3q_s8(transmute(a)))
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_p16)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_p16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3r)
+    assert_instr(ld3)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -23168,21 +21727,20 @@ pub unsafe fn vld3_dup_p16(a: *const p16) -> poly16x4x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld3q_dup_p16(a: *const p16) -> poly16x8x3_t {
-    transmute(vld3q_dup_s16(transmute(a)))
+pub unsafe fn vld3_p16(a: *const p16) -> poly16x4x3_t {
+    transmute(vld3_s16(transmute(a)))
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_dup_p16)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_p16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3r)
+    assert_instr(ld3)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -23192,843 +21750,489 @@ pub unsafe fn vld3q_dup_p16(a: *const p16) -> poly16x8x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld3q_dup_p16(a: *const p16) -> poly16x8x3_t {
-    let mut ret_val: poly16x8x3_t = transmute(vld3q_dup_s16(transmute(a)));
-    ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val
+pub unsafe fn vld3q_p16(a: *const p16) -> poly16x8x3_t {
+    transmute(vld3q_s16(transmute(a)))
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_f16)"]
+#[doc = "Load multiple 3-element structures to three registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_f32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vld3, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld3q_lane_f32<const LANE: i32>(a: *const f32, b: float32x4x3_t) -> float32x4x3_t {
+    static_assert_uimm_bits!(LANE, 2);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v4f32.p0")]
+        fn _vld3q_lane_f32(
+            ptr: *const i8,
+            a: float32x4_t,
+            b: float32x4_t,
+            c: float32x4_t,
+            n: i32,
+            size: i32,
+        ) -> float32x4x3_t;
+    }
+    _vld3q_lane_f32(a as _, b.0, b.1, b.2, LANE, 4)
+}
+#[doc = "Load single 4-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_f16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg(target_arch = "arm")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vld3_f16(a: *const f16) -> float16x4x3_t {
+pub unsafe fn vld4_dup_f16(a: *const f16) -> float16x4x4_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v4f16.p0")]
-        fn _vld3_f16(ptr: *const f16, size: i32) -> float16x4x3_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v4f16.p0")]
+        fn _vld4_dup_f16(ptr: *const f16, size: i32) -> float16x4x4_t;
     }
-    _vld3_f16(a as _, 2)
+    _vld4_dup_f16(a as _, 2)
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_f16)"]
+#[doc = "Load single 4-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_f16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg(target_arch = "arm")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vld3q_f16(a: *const f16) -> float16x8x3_t {
+pub unsafe fn vld4q_dup_f16(a: *const f16) -> float16x8x4_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v8f16.p0")]
-        fn _vld3q_f16(ptr: *const f16, size: i32) -> float16x8x3_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v8f16.p0")]
+        fn _vld4q_dup_f16(ptr: *const f16, size: i32) -> float16x8x4_t;
     }
-    _vld3q_f16(a as _, 2)
+    _vld4q_dup_f16(a as _, 2)
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_f16)"]
+#[doc = "Load single 4-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_f16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(not(target_arch = "arm"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3)
+    assert_instr(ld4r)
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vld3_f16(a: *const f16) -> float16x4x3_t {
+pub unsafe fn vld4_dup_f16(a: *const f16) -> float16x4x4_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld3.v4f16.p0"
+            link_name = "llvm.aarch64.neon.ld4r.v4f16.p0"
         )]
-        fn _vld3_f16(ptr: *const f16) -> float16x4x3_t;
+        fn _vld4_dup_f16(ptr: *const f16) -> float16x4x4_t;
     }
-    _vld3_f16(a as _)
+    _vld4_dup_f16(a as _)
 }
-#[doc = "Load single 3-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_f16)"]
+#[doc = "Load single 4-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_f16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(not(target_arch = "arm"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3)
+    assert_instr(ld4r)
 )]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vld3q_f16(a: *const f16) -> float16x8x3_t {
+pub unsafe fn vld4q_dup_f16(a: *const f16) -> float16x8x4_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld3.v8f16.p0"
+            link_name = "llvm.aarch64.neon.ld4r.v8f16.p0"
         )]
-        fn _vld3q_f16(ptr: *const f16) -> float16x8x3_t;
+        fn _vld4q_dup_f16(ptr: *const f16) -> float16x8x4_t;
     }
-    _vld3q_f16(a as _)
+    _vld4q_dup_f16(a as _)
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_f32)"]
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_f32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld3))]
-pub unsafe fn vld3_f32(a: *const f32) -> float32x2x3_t {
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vld4))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld4_dup_f32(a: *const f32) -> float32x2x4_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld3.v2f32.p0"
-        )]
-        fn _vld3_f32(ptr: *const float32x2_t) -> float32x2x3_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v2f32.p0")]
+        fn _vld4_dup_f32(ptr: *const i8, size: i32) -> float32x2x4_t;
     }
-    _vld3_f32(a as _)
+    _vld4_dup_f32(a as *const i8, 4)
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_f32)"]
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_f32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld3))]
-pub unsafe fn vld3q_f32(a: *const f32) -> float32x4x3_t {
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vld4))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld4q_dup_f32(a: *const f32) -> float32x4x4_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld3.v4f32.p0"
-        )]
-        fn _vld3q_f32(ptr: *const float32x4_t) -> float32x4x3_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v4f32.p0")]
+        fn _vld4q_dup_f32(ptr: *const i8, size: i32) -> float32x4x4_t;
     }
-    _vld3q_f32(a as _)
+    _vld4q_dup_f32(a as *const i8, 4)
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s8)"]
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld3))]
-pub unsafe fn vld3_s8(a: *const i8) -> int8x8x3_t {
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vld4))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld4_dup_s8(a: *const i8) -> int8x8x4_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld3.v8i8.p0"
-        )]
-        fn _vld3_s8(ptr: *const int8x8_t) -> int8x8x3_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v8i8.p0")]
+        fn _vld4_dup_s8(ptr: *const i8, size: i32) -> int8x8x4_t;
     }
-    _vld3_s8(a as _)
+    _vld4_dup_s8(a as *const i8, 1)
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_s8)"]
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_s8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld3))]
-pub unsafe fn vld3q_s8(a: *const i8) -> int8x16x3_t {
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vld4))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld4q_dup_s8(a: *const i8) -> int8x16x4_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld3.v16i8.p0"
-        )]
-        fn _vld3q_s8(ptr: *const int8x16_t) -> int8x16x3_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v16i8.p0")]
+        fn _vld4q_dup_s8(ptr: *const i8, size: i32) -> int8x16x4_t;
     }
-    _vld3q_s8(a as _)
+    _vld4q_dup_s8(a as *const i8, 1)
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s16)"]
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld3))]
-pub unsafe fn vld3_s16(a: *const i16) -> int16x4x3_t {
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vld4))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld4_dup_s16(a: *const i16) -> int16x4x4_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld3.v4i16.p0"
-        )]
-        fn _vld3_s16(ptr: *const int16x4_t) -> int16x4x3_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v4i16.p0")]
+        fn _vld4_dup_s16(ptr: *const i8, size: i32) -> int16x4x4_t;
     }
-    _vld3_s16(a as _)
+    _vld4_dup_s16(a as *const i8, 2)
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_s16)"]
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_s16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld3))]
-pub unsafe fn vld3q_s16(a: *const i16) -> int16x8x3_t {
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vld4))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld4q_dup_s16(a: *const i16) -> int16x8x4_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld3.v8i16.p0"
-        )]
-        fn _vld3q_s16(ptr: *const int16x8_t) -> int16x8x3_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v8i16.p0")]
+        fn _vld4q_dup_s16(ptr: *const i8, size: i32) -> int16x8x4_t;
     }
-    _vld3q_s16(a as _)
+    _vld4q_dup_s16(a as *const i8, 2)
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s32)"]
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld3))]
-pub unsafe fn vld3_s32(a: *const i32) -> int32x2x3_t {
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vld4))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld4_dup_s32(a: *const i32) -> int32x2x4_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld3.v2i32.p0"
-        )]
-        fn _vld3_s32(ptr: *const int32x2_t) -> int32x2x3_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v2i32.p0")]
+        fn _vld4_dup_s32(ptr: *const i8, size: i32) -> int32x2x4_t;
     }
-    _vld3_s32(a as _)
+    _vld4_dup_s32(a as *const i8, 4)
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_s32)"]
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_s32)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vld4))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld4q_dup_s32(a: *const i32) -> int32x4x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v4i32.p0")]
+        fn _vld4q_dup_s32(ptr: *const i8, size: i32) -> int32x4x4_t;
+    }
+    _vld4q_dup_s32(a as *const i8, 4)
+}
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_f32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld3))]
-pub unsafe fn vld3q_s32(a: *const i32) -> int32x4x3_t {
+#[cfg_attr(test, assert_instr(ld4r))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld4_dup_f32(a: *const f32) -> float32x2x4_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld3.v4i32.p0"
+            link_name = "llvm.aarch64.neon.ld4r.v2f32.p0.p0"
         )]
-        fn _vld3q_s32(ptr: *const int32x4_t) -> int32x4x3_t;
+        fn _vld4_dup_f32(ptr: *const f32) -> float32x2x4_t;
     }
-    _vld3q_s32(a as _)
+    _vld4_dup_f32(a as _)
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_f32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld3))]
-pub unsafe fn vld3_f32(a: *const f32) -> float32x2x3_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v2f32.p0")]
-        fn _vld3_f32(ptr: *const i8, size: i32) -> float32x2x3_t;
-    }
-    _vld3_f32(a as *const i8, 4)
-}
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_f32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld3))]
-pub unsafe fn vld3q_f32(a: *const f32) -> float32x4x3_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v4f32.p0")]
-        fn _vld3q_f32(ptr: *const i8, size: i32) -> float32x4x3_t;
-    }
-    _vld3q_f32(a as *const i8, 4)
-}
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld3))]
-pub unsafe fn vld3_s8(a: *const i8) -> int8x8x3_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v8i8.p0")]
-        fn _vld3_s8(ptr: *const i8, size: i32) -> int8x8x3_t;
-    }
-    _vld3_s8(a as *const i8, 1)
-}
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_s8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld3))]
-pub unsafe fn vld3q_s8(a: *const i8) -> int8x16x3_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v16i8.p0")]
-        fn _vld3q_s8(ptr: *const i8, size: i32) -> int8x16x3_t;
-    }
-    _vld3q_s8(a as *const i8, 1)
-}
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld3))]
-pub unsafe fn vld3_s16(a: *const i16) -> int16x4x3_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v4i16.p0")]
-        fn _vld3_s16(ptr: *const i8, size: i32) -> int16x4x3_t;
-    }
-    _vld3_s16(a as *const i8, 2)
-}
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_s16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld3))]
-pub unsafe fn vld3q_s16(a: *const i16) -> int16x8x3_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v8i16.p0")]
-        fn _vld3q_s16(ptr: *const i8, size: i32) -> int16x8x3_t;
-    }
-    _vld3q_s16(a as *const i8, 2)
-}
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld3))]
-pub unsafe fn vld3_s32(a: *const i32) -> int32x2x3_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v2i32.p0")]
-        fn _vld3_s32(ptr: *const i8, size: i32) -> int32x2x3_t;
-    }
-    _vld3_s32(a as *const i8, 4)
-}
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_s32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld3))]
-pub unsafe fn vld3q_s32(a: *const i32) -> int32x4x3_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v4i32.p0")]
-        fn _vld3q_s32(ptr: *const i8, size: i32) -> int32x4x3_t;
-    }
-    _vld3q_s32(a as *const i8, 4)
-}
-#[doc = "Load multiple 3-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_f16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vld3_lane_f16<const LANE: i32>(a: *const f16, b: float16x4x3_t) -> float16x4x3_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v4f16.p0")]
-        fn _vld3_lane_f16(
-            ptr: *const f16,
-            a: float16x4_t,
-            b: float16x4_t,
-            c: float16x4_t,
-            n: i32,
-            size: i32,
-        ) -> float16x4x3_t;
-    }
-    _vld3_lane_f16(a as _, b.0, b.1, b.2, LANE, 2)
-}
-#[doc = "Load multiple 3-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_f16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vld3q_lane_f16<const LANE: i32>(a: *const f16, b: float16x8x3_t) -> float16x8x3_t {
-    static_assert_uimm_bits!(LANE, 3);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v8f16.p0")]
-        fn _vld3q_lane_f16(
-            ptr: *const f16,
-            a: float16x8_t,
-            b: float16x8_t,
-            c: float16x8_t,
-            n: i32,
-            size: i32,
-        ) -> float16x8x3_t;
-    }
-    _vld3q_lane_f16(a as _, b.0, b.1, b.2, LANE, 2)
-}
-#[doc = "Load multiple 3-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_f16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3, LANE = 0)
-)]
-#[rustc_legacy_const_generics(2)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vld3_lane_f16<const LANE: i32>(a: *const f16, b: float16x4x3_t) -> float16x4x3_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld3lane.v4f16.p0"
-        )]
-        fn _vld3_lane_f16(
-            a: float16x4_t,
-            b: float16x4_t,
-            c: float16x4_t,
-            n: i64,
-            ptr: *const f16,
-        ) -> float16x4x3_t;
-    }
-    _vld3_lane_f16(b.0, b.1, b.2, LANE as i64, a as _)
-}
-#[doc = "Load multiple 3-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_f16)"]
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_f32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
+#[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3, LANE = 0)
-)]
-#[rustc_legacy_const_generics(2)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vld3q_lane_f16<const LANE: i32>(a: *const f16, b: float16x8x3_t) -> float16x8x3_t {
-    static_assert_uimm_bits!(LANE, 3);
+#[cfg_attr(test, assert_instr(ld4r))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld4q_dup_f32(a: *const f32) -> float32x4x4_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld3lane.v8f16.p0"
+            link_name = "llvm.aarch64.neon.ld4r.v4f32.p0.p0"
         )]
-        fn _vld3q_lane_f16(
-            a: float16x8_t,
-            b: float16x8_t,
-            c: float16x8_t,
-            n: i64,
-            ptr: *const f16,
-        ) -> float16x8x3_t;
+        fn _vld4q_dup_f32(ptr: *const f32) -> float32x4x4_t;
     }
-    _vld3q_lane_f16(b.0, b.1, b.2, LANE as i64, a as _)
+    _vld4q_dup_f32(a as _)
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_f32)"]
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(ld4r))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld3_lane_f32<const LANE: i32>(a: *const f32, b: float32x2x3_t) -> float32x2x3_t {
-    static_assert_uimm_bits!(LANE, 1);
+pub unsafe fn vld4_dup_s8(a: *const i8) -> int8x8x4_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld3lane.v2f32.p0"
+            link_name = "llvm.aarch64.neon.ld4r.v8i8.p0.p0"
         )]
-        fn _vld3_lane_f32(
-            a: float32x2_t,
-            b: float32x2_t,
-            c: float32x2_t,
-            n: i64,
-            ptr: *const i8,
-        ) -> float32x2x3_t;
+        fn _vld4_dup_s8(ptr: *const i8) -> int8x8x4_t;
     }
-    _vld3_lane_f32(b.0, b.1, b.2, LANE as i64, a as _)
+    _vld4_dup_s8(a as _)
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_f32)"]
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_s8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(ld4r))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld3q_lane_f32<const LANE: i32>(a: *const f32, b: float32x4x3_t) -> float32x4x3_t {
-    static_assert_uimm_bits!(LANE, 2);
+pub unsafe fn vld4q_dup_s8(a: *const i8) -> int8x16x4_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld3lane.v4f32.p0"
+            link_name = "llvm.aarch64.neon.ld4r.v16i8.p0.p0"
         )]
-        fn _vld3q_lane_f32(
-            a: float32x4_t,
-            b: float32x4_t,
-            c: float32x4_t,
-            n: i64,
-            ptr: *const i8,
-        ) -> float32x4x3_t;
-    }
-    _vld3q_lane_f32(b.0, b.1, b.2, LANE as i64, a as _)
-}
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_f32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld3_lane_f32<const LANE: i32>(a: *const f32, b: float32x2x3_t) -> float32x2x3_t {
-    static_assert_uimm_bits!(LANE, 1);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v2f32.p0")]
-        fn _vld3_lane_f32(
-            ptr: *const i8,
-            a: float32x2_t,
-            b: float32x2_t,
-            c: float32x2_t,
-            n: i32,
-            size: i32,
-        ) -> float32x2x3_t;
+        fn _vld4q_dup_s8(ptr: *const i8) -> int8x16x4_t;
     }
-    _vld3_lane_f32(a as _, b.0, b.1, b.2, LANE, 4)
+    _vld4q_dup_s8(a as _)
 }
-#[doc = "Load multiple 3-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_s8)"]
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(ld4r))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld3_lane_s8<const LANE: i32>(a: *const i8, b: int8x8x3_t) -> int8x8x3_t {
-    static_assert_uimm_bits!(LANE, 3);
+pub unsafe fn vld4_dup_s16(a: *const i16) -> int16x4x4_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld3lane.v8i8.p0"
+            link_name = "llvm.aarch64.neon.ld4r.v4i16.p0.p0"
         )]
-        fn _vld3_lane_s8(
-            a: int8x8_t,
-            b: int8x8_t,
-            c: int8x8_t,
-            n: i64,
-            ptr: *const i8,
-        ) -> int8x8x3_t;
+        fn _vld4_dup_s16(ptr: *const i16) -> int16x4x4_t;
     }
-    _vld3_lane_s8(b.0, b.1, b.2, LANE as i64, a as _)
+    _vld4_dup_s16(a as _)
 }
-#[doc = "Load multiple 3-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_s16)"]
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_s16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(ld4r))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld3_lane_s16<const LANE: i32>(a: *const i16, b: int16x4x3_t) -> int16x4x3_t {
-    static_assert_uimm_bits!(LANE, 2);
+pub unsafe fn vld4q_dup_s16(a: *const i16) -> int16x8x4_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld3lane.v4i16.p0"
+            link_name = "llvm.aarch64.neon.ld4r.v8i16.p0.p0"
         )]
-        fn _vld3_lane_s16(
-            a: int16x4_t,
-            b: int16x4_t,
-            c: int16x4_t,
-            n: i64,
-            ptr: *const i8,
-        ) -> int16x4x3_t;
+        fn _vld4q_dup_s16(ptr: *const i16) -> int16x8x4_t;
     }
-    _vld3_lane_s16(b.0, b.1, b.2, LANE as i64, a as _)
+    _vld4q_dup_s16(a as _)
 }
-#[doc = "Load multiple 3-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_s16)"]
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(ld4r))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld3q_lane_s16<const LANE: i32>(a: *const i16, b: int16x8x3_t) -> int16x8x3_t {
-    static_assert_uimm_bits!(LANE, 4);
+pub unsafe fn vld4_dup_s32(a: *const i32) -> int32x2x4_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld3lane.v8i16.p0"
+            link_name = "llvm.aarch64.neon.ld4r.v2i32.p0.p0"
         )]
-        fn _vld3q_lane_s16(
-            a: int16x8_t,
-            b: int16x8_t,
-            c: int16x8_t,
-            n: i64,
-            ptr: *const i8,
-        ) -> int16x8x3_t;
+        fn _vld4_dup_s32(ptr: *const i32) -> int32x2x4_t;
     }
-    _vld3q_lane_s16(b.0, b.1, b.2, LANE as i64, a as _)
+    _vld4_dup_s32(a as _)
 }
-#[doc = "Load multiple 3-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_s32)"]
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_s32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(ld4r))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld3_lane_s32<const LANE: i32>(a: *const i32, b: int32x2x3_t) -> int32x2x3_t {
-    static_assert_uimm_bits!(LANE, 1);
+pub unsafe fn vld4q_dup_s32(a: *const i32) -> int32x4x4_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld3lane.v2i32.p0"
+            link_name = "llvm.aarch64.neon.ld4r.v4i32.p0.p0"
         )]
-        fn _vld3_lane_s32(
-            a: int32x2_t,
-            b: int32x2_t,
-            c: int32x2_t,
-            n: i64,
-            ptr: *const i8,
-        ) -> int32x2x3_t;
+        fn _vld4q_dup_s32(ptr: *const i32) -> int32x4x4_t;
     }
-    _vld3_lane_s32(b.0, b.1, b.2, LANE as i64, a as _)
+    _vld4q_dup_s32(a as _)
 }
-#[doc = "Load multiple 3-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_s32)"]
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(ld4r))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld3q_lane_s32<const LANE: i32>(a: *const i32, b: int32x4x3_t) -> int32x4x3_t {
-    static_assert_uimm_bits!(LANE, 2);
+pub unsafe fn vld4_dup_s64(a: *const i64) -> int64x1x4_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld3lane.v4i32.p0"
+            link_name = "llvm.aarch64.neon.ld4r.v1i64.p0.p0"
         )]
-        fn _vld3q_lane_s32(
-            a: int32x4_t,
-            b: int32x4_t,
-            c: int32x4_t,
-            n: i64,
-            ptr: *const i8,
-        ) -> int32x4x3_t;
+        fn _vld4_dup_s64(ptr: *const i64) -> int64x1x4_t;
     }
-    _vld3q_lane_s32(b.0, b.1, b.2, LANE as i64, a as _)
+    _vld4_dup_s64(a as _)
 }
-#[doc = "Load multiple 3-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_s8)"]
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_p64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld3_lane_s8<const LANE: i32>(a: *const i8, b: int8x8x3_t) -> int8x8x3_t {
-    static_assert_uimm_bits!(LANE, 3);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v8i8.p0")]
-        fn _vld3_lane_s8(
-            ptr: *const i8,
-            a: int8x8_t,
-            b: int8x8_t,
-            c: int8x8_t,
-            n: i32,
-            size: i32,
-        ) -> int8x8x3_t;
-    }
-    _vld3_lane_s8(a as _, b.0, b.1, b.2, LANE, 1)
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld4r)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld4_dup_p64(a: *const p64) -> poly64x1x4_t {
+    transmute(vld4_dup_s64(transmute(a)))
 }
-#[doc = "Load multiple 3-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_s16)"]
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(nop))]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld3_lane_s16<const LANE: i32>(a: *const i16, b: int16x4x3_t) -> int16x4x3_t {
-    static_assert_uimm_bits!(LANE, 2);
+pub unsafe fn vld4_dup_s64(a: *const i64) -> int64x1x4_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v4i16.p0")]
-        fn _vld3_lane_s16(
-            ptr: *const i8,
-            a: int16x4_t,
-            b: int16x4_t,
-            c: int16x4_t,
-            n: i32,
-            size: i32,
-        ) -> int16x4x3_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v1i64.p0")]
+        fn _vld4_dup_s64(ptr: *const i8, size: i32) -> int64x1x4_t;
     }
-    _vld3_lane_s16(a as _, b.0, b.1, b.2, LANE, 2)
+    _vld4_dup_s64(a as *const i8, 8)
 }
-#[doc = "Load multiple 3-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_s16)"]
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_u64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld3q_lane_s16<const LANE: i32>(a: *const i16, b: int16x8x3_t) -> int16x8x3_t {
-    static_assert_uimm_bits!(LANE, 3);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v8i16.p0")]
-        fn _vld3q_lane_s16(
-            ptr: *const i8,
-            a: int16x8_t,
-            b: int16x8_t,
-            c: int16x8_t,
-            n: i32,
-            size: i32,
-        ) -> int16x8x3_t;
-    }
-    _vld3q_lane_s16(a as _, b.0, b.1, b.2, LANE, 2)
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(ld4r)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub unsafe fn vld4_dup_u64(a: *const u64) -> uint64x1x4_t {
+    transmute(vld4_dup_s64(transmute(a)))
 }
-#[doc = "Load multiple 3-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_s32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld3_lane_s32<const LANE: i32>(a: *const i32, b: int32x2x3_t) -> int32x2x3_t {
-    static_assert_uimm_bits!(LANE, 1);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v2i32.p0")]
-        fn _vld3_lane_s32(
-            ptr: *const i8,
-            a: int32x2_t,
-            b: int32x2_t,
-            c: int32x2_t,
-            n: i32,
-            size: i32,
-        ) -> int32x2x3_t;
-    }
-    _vld3_lane_s32(a as _, b.0, b.1, b.2, LANE, 4)
-}
-#[doc = "Load multiple 3-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_s32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld3q_lane_s32<const LANE: i32>(a: *const i32, b: int32x4x3_t) -> int32x4x3_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v4i32.p0")]
-        fn _vld3q_lane_s32(
-            ptr: *const i8,
-            a: int32x4_t,
-            b: int32x4_t,
-            c: int32x4_t,
-            n: i32,
-            size: i32,
-        ) -> int32x4x3_t;
-    }
-    _vld3q_lane_s32(a as _, b.0, b.1, b.2, LANE, 4)
-}
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_u8)"]
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_u8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3, LANE = 0)
+    assert_instr(ld4r)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -24037,23 +22241,21 @@ pub unsafe fn vld3q_lane_s32<const LANE: i32>(a: *const i32, b: int32x4x3_t) ->
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld3_lane_u8<const LANE: i32>(a: *const u8, b: uint8x8x3_t) -> uint8x8x3_t {
-    static_assert_uimm_bits!(LANE, 3);
-    transmute(vld3_lane_s8::<LANE>(transmute(a), transmute(b)))
+pub unsafe fn vld4_dup_u8(a: *const u8) -> uint8x8x4_t {
+    transmute(vld4_dup_s8(transmute(a)))
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_u16)"]
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_u8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3, LANE = 0)
+    assert_instr(ld4r)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -24062,23 +22264,21 @@ pub unsafe fn vld3_lane_u8<const LANE: i32>(a: *const u8, b: uint8x8x3_t) -> uin
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld3_lane_u16<const LANE: i32>(a: *const u16, b: uint16x4x3_t) -> uint16x4x3_t {
-    static_assert_uimm_bits!(LANE, 2);
-    transmute(vld3_lane_s16::<LANE>(transmute(a), transmute(b)))
+pub unsafe fn vld4q_dup_u8(a: *const u8) -> uint8x16x4_t {
+    transmute(vld4q_dup_s8(transmute(a)))
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_u16)"]
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_u16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3, LANE = 0)
+    assert_instr(ld4r)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -24087,23 +22287,21 @@ pub unsafe fn vld3_lane_u16<const LANE: i32>(a: *const u16, b: uint16x4x3_t) ->
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld3q_lane_u16<const LANE: i32>(a: *const u16, b: uint16x8x3_t) -> uint16x8x3_t {
-    static_assert_uimm_bits!(LANE, 3);
-    transmute(vld3q_lane_s16::<LANE>(transmute(a), transmute(b)))
+pub unsafe fn vld4_dup_u16(a: *const u16) -> uint16x4x4_t {
+    transmute(vld4_dup_s16(transmute(a)))
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_u32)"]
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_u16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3, LANE = 0)
+    assert_instr(ld4r)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -24112,23 +22310,21 @@ pub unsafe fn vld3q_lane_u16<const LANE: i32>(a: *const u16, b: uint16x8x3_t) ->
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld3_lane_u32<const LANE: i32>(a: *const u32, b: uint32x2x3_t) -> uint32x2x3_t {
-    static_assert_uimm_bits!(LANE, 1);
-    transmute(vld3_lane_s32::<LANE>(transmute(a), transmute(b)))
+pub unsafe fn vld4q_dup_u16(a: *const u16) -> uint16x8x4_t {
+    transmute(vld4q_dup_s16(transmute(a)))
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_u32)"]
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_u32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3, LANE = 0)
+    assert_instr(ld4r)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -24137,23 +22333,21 @@ pub unsafe fn vld3_lane_u32<const LANE: i32>(a: *const u32, b: uint32x2x3_t) ->
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld3q_lane_u32<const LANE: i32>(a: *const u32, b: uint32x4x3_t) -> uint32x4x3_t {
-    static_assert_uimm_bits!(LANE, 2);
-    transmute(vld3q_lane_s32::<LANE>(transmute(a), transmute(b)))
+pub unsafe fn vld4_dup_u32(a: *const u32) -> uint32x2x4_t {
+    transmute(vld4_dup_s32(transmute(a)))
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_p8)"]
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_u32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3, LANE = 0)
+    assert_instr(ld4r)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -24162,23 +22356,21 @@ pub unsafe fn vld3q_lane_u32<const LANE: i32>(a: *const u32, b: uint32x4x3_t) ->
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld3_lane_p8<const LANE: i32>(a: *const p8, b: poly8x8x3_t) -> poly8x8x3_t {
-    static_assert_uimm_bits!(LANE, 3);
-    transmute(vld3_lane_s8::<LANE>(transmute(a), transmute(b)))
+pub unsafe fn vld4q_dup_u32(a: *const u32) -> uint32x4x4_t {
+    transmute(vld4q_dup_s32(transmute(a)))
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_lane_p16)"]
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_p8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3, LANE = 0)
+    assert_instr(ld4r)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -24187,46 +22379,20 @@ pub unsafe fn vld3_lane_p8<const LANE: i32>(a: *const p8, b: poly8x8x3_t) -> pol
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld3_lane_p16<const LANE: i32>(a: *const p16, b: poly16x4x3_t) -> poly16x4x3_t {
-    static_assert_uimm_bits!(LANE, 2);
-    transmute(vld3_lane_s16::<LANE>(transmute(a), transmute(b)))
+pub unsafe fn vld4_dup_p8(a: *const p8) -> poly8x8x4_t {
+    transmute(vld4_dup_s8(transmute(a)))
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_p16)"]
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_p8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3, LANE = 0)
-)]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld3q_lane_p16<const LANE: i32>(a: *const p16, b: poly16x8x3_t) -> poly16x8x3_t {
-    static_assert_uimm_bits!(LANE, 3);
-    transmute(vld3q_lane_s16::<LANE>(transmute(a), transmute(b)))
-}
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_p64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(ld4r)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -24236,55 +22402,20 @@ pub unsafe fn vld3q_lane_p16<const LANE: i32>(a: *const p16, b: poly16x8x3_t) ->
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld3_p64(a: *const p64) -> poly64x1x3_t {
-    transmute(vld3_s64(transmute(a)))
-}
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vld3_s64(a: *const i64) -> int64x1x3_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld3.v1i64.p0"
-        )]
-        fn _vld3_s64(ptr: *const int64x1_t) -> int64x1x3_t;
-    }
-    _vld3_s64(a as _)
-}
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_s64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vld3_s64(a: *const i64) -> int64x1x3_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v1i64.p0")]
-        fn _vld3_s64(ptr: *const i8, size: i32) -> int64x1x3_t;
-    }
-    _vld3_s64(a as *const i8, 8)
+pub unsafe fn vld4q_dup_p8(a: *const p8) -> poly8x16x4_t {
+    transmute(vld4q_dup_s8(transmute(a)))
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_u64)"]
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_p16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(ld4r)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -24294,21 +22425,20 @@ pub unsafe fn vld3_s64(a: *const i64) -> int64x1x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld3_u64(a: *const u64) -> uint64x1x3_t {
-    transmute(vld3_s64(transmute(a)))
+pub unsafe fn vld4_dup_p16(a: *const p16) -> poly16x4x4_t {
+    transmute(vld4_dup_s16(transmute(a)))
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_u8)"]
+#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_p16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3)
+    assert_instr(ld4r)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -24318,998 +22448,812 @@ pub unsafe fn vld3_u64(a: *const u64) -> uint64x1x3_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld3_u8(a: *const u8) -> uint8x8x3_t {
-    transmute(vld3_s8(transmute(a)))
+pub unsafe fn vld4q_dup_p16(a: *const p16) -> poly16x8x4_t {
+    transmute(vld4q_dup_s16(transmute(a)))
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_u8)"]
+#[doc = "Load single 4-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_f16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld3_u8(a: *const u8) -> uint8x8x3_t {
-    let mut ret_val: uint8x8x3_t = transmute(vld3_s8(transmute(a)));
-    ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val
+#[cfg(target_arch = "arm")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
+#[target_feature(enable = "neon,fp16")]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+pub unsafe fn vld4_f16(a: *const f16) -> float16x4x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v4f16.p0")]
+        fn _vld4_f16(ptr: *const f16, size: i32) -> float16x4x4_t;
+    }
+    _vld4_f16(a as _, 2)
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_u8)"]
+#[doc = "Load single 4-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_f16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld3q_u8(a: *const u8) -> uint8x16x3_t {
-    transmute(vld3q_s8(transmute(a)))
+#[cfg(target_arch = "arm")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
+#[target_feature(enable = "neon,fp16")]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+pub unsafe fn vld4q_f16(a: *const f16) -> float16x8x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v8f16.p0")]
+        fn _vld4q_f16(ptr: *const f16, size: i32) -> float16x8x4_t;
+    }
+    _vld4q_f16(a as _, 2)
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_u8)"]
+#[doc = "Load single 4-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_f16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
+#[cfg(not(target_arch = "arm"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+    assert_instr(ld4)
 )]
-pub unsafe fn vld3q_u8(a: *const u8) -> uint8x16x3_t {
-    let mut ret_val: uint8x16x3_t = transmute(vld3q_s8(transmute(a)));
-    ret_val.0 = unsafe {
-        simd_shuffle!(
-            ret_val.0,
-            ret_val.0,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    ret_val.1 = unsafe {
-        simd_shuffle!(
-            ret_val.1,
-            ret_val.1,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    ret_val.2 = unsafe {
-        simd_shuffle!(
-            ret_val.2,
-            ret_val.2,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    ret_val
+#[target_feature(enable = "neon,fp16")]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+pub unsafe fn vld4_f16(a: *const f16) -> float16x4x4_t {
+    crate::core_arch::macros::deinterleaving_load!(f16, 4, 4, a)
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_u16)"]
+#[doc = "Load single 4-element structure and replicate to all lanes of two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_f16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
+#[cfg(not(target_arch = "arm"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+    assert_instr(ld4)
 )]
-pub unsafe fn vld3_u16(a: *const u16) -> uint16x4x3_t {
-    transmute(vld3_s16(transmute(a)))
+#[target_feature(enable = "neon,fp16")]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+pub unsafe fn vld4q_f16(a: *const f16) -> float16x8x4_t {
+    crate::core_arch::macros::deinterleaving_load!(f16, 8, 4, a)
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_u16)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_f32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld3_u16(a: *const u16) -> uint16x4x3_t {
-    let mut ret_val: uint16x4x3_t = transmute(vld3_s16(transmute(a)));
-    ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]) };
-    ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]) };
-    ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [3, 2, 1, 0]) };
-    ret_val
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld4))]
+pub unsafe fn vld4_f32(a: *const f32) -> float32x2x4_t {
+    crate::core_arch::macros::deinterleaving_load!(f32, 2, 4, a)
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_u16)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_f32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld3q_u16(a: *const u16) -> uint16x8x3_t {
-    transmute(vld3q_s16(transmute(a)))
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld4))]
+pub unsafe fn vld4q_f32(a: *const f32) -> float32x4x4_t {
+    crate::core_arch::macros::deinterleaving_load!(f32, 4, 4, a)
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_u16)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld3q_u16(a: *const u16) -> uint16x8x3_t {
-    let mut ret_val: uint16x8x3_t = transmute(vld3q_s16(transmute(a)));
-    ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld4))]
+pub unsafe fn vld4_s8(a: *const i8) -> int8x8x4_t {
+    crate::core_arch::macros::deinterleaving_load!(i8, 8, 4, a)
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_u32)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_s8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld3_u32(a: *const u32) -> uint32x2x3_t {
-    transmute(vld3_s32(transmute(a)))
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld4))]
+pub unsafe fn vld4q_s8(a: *const i8) -> int8x16x4_t {
+    crate::core_arch::macros::deinterleaving_load!(i8, 16, 4, a)
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_u32)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld3_u32(a: *const u32) -> uint32x2x3_t {
-    let mut ret_val: uint32x2x3_t = transmute(vld3_s32(transmute(a)));
-    ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [1, 0]) };
-    ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [1, 0]) };
-    ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [1, 0]) };
-    ret_val
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld4))]
+pub unsafe fn vld4_s16(a: *const i16) -> int16x4x4_t {
+    crate::core_arch::macros::deinterleaving_load!(i16, 4, 4, a)
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_u32)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_s16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld3q_u32(a: *const u32) -> uint32x4x3_t {
-    transmute(vld3q_s32(transmute(a)))
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld4))]
+pub unsafe fn vld4q_s16(a: *const i16) -> int16x8x4_t {
+    crate::core_arch::macros::deinterleaving_load!(i16, 8, 4, a)
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_u32)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld3q_u32(a: *const u32) -> uint32x4x3_t {
-    let mut ret_val: uint32x4x3_t = transmute(vld3q_s32(transmute(a)));
-    ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]) };
-    ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]) };
-    ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [3, 2, 1, 0]) };
-    ret_val
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld4))]
+pub unsafe fn vld4_s32(a: *const i32) -> int32x2x4_t {
+    crate::core_arch::macros::deinterleaving_load!(i32, 2, 4, a)
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_p8)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_s32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld3_p8(a: *const p8) -> poly8x8x3_t {
-    transmute(vld3_s8(transmute(a)))
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(ld4))]
+pub unsafe fn vld4q_s32(a: *const i32) -> int32x4x4_t {
+    crate::core_arch::macros::deinterleaving_load!(i32, 4, 4, a)
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_p8)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_f32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld3_p8(a: *const p8) -> poly8x8x3_t {
-    let mut ret_val: poly8x8x3_t = transmute(vld3_s8(transmute(a)));
-    ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld4))]
+pub unsafe fn vld4_f32(a: *const f32) -> float32x2x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v2f32.p0")]
+        fn _vld4_f32(ptr: *const i8, size: i32) -> float32x2x4_t;
+    }
+    _vld4_f32(a as *const i8, 4)
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_p8)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_f32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld3q_p8(a: *const p8) -> poly8x16x3_t {
-    transmute(vld3q_s8(transmute(a)))
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld4))]
+pub unsafe fn vld4q_f32(a: *const f32) -> float32x4x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v4f32.p0")]
+        fn _vld4q_f32(ptr: *const i8, size: i32) -> float32x4x4_t;
+    }
+    _vld4q_f32(a as *const i8, 4)
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_p8)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld3q_p8(a: *const p8) -> poly8x16x3_t {
-    let mut ret_val: poly8x16x3_t = transmute(vld3q_s8(transmute(a)));
-    ret_val.0 = unsafe {
-        simd_shuffle!(
-            ret_val.0,
-            ret_val.0,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    ret_val.1 = unsafe {
-        simd_shuffle!(
-            ret_val.1,
-            ret_val.1,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    ret_val.2 = unsafe {
-        simd_shuffle!(
-            ret_val.2,
-            ret_val.2,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    ret_val
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld4))]
+pub unsafe fn vld4_s8(a: *const i8) -> int8x8x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v8i8.p0")]
+        fn _vld4_s8(ptr: *const i8, size: i32) -> int8x8x4_t;
+    }
+    _vld4_s8(a as *const i8, 1)
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_p16)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_s8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld3_p16(a: *const p16) -> poly16x4x3_t {
-    transmute(vld3_s16(transmute(a)))
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld4))]
+pub unsafe fn vld4q_s8(a: *const i8) -> int8x16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v16i8.p0")]
+        fn _vld4q_s8(ptr: *const i8, size: i32) -> int8x16x4_t;
+    }
+    _vld4q_s8(a as *const i8, 1)
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3_p16)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld3_p16(a: *const p16) -> poly16x4x3_t {
-    let mut ret_val: poly16x4x3_t = transmute(vld3_s16(transmute(a)));
-    ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]) };
-    ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]) };
-    ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [3, 2, 1, 0]) };
-    ret_val
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld4))]
+pub unsafe fn vld4_s16(a: *const i16) -> int16x4x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v4i16.p0")]
+        fn _vld4_s16(ptr: *const i8, size: i32) -> int16x4x4_t;
+    }
+    _vld4_s16(a as *const i8, 2)
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_p16)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_s16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld3q_p16(a: *const p16) -> poly16x8x3_t {
-    transmute(vld3q_s16(transmute(a)))
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld4))]
+pub unsafe fn vld4q_s16(a: *const i16) -> int16x8x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v8i16.p0")]
+        fn _vld4q_s16(ptr: *const i8, size: i32) -> int16x8x4_t;
+    }
+    _vld4q_s16(a as *const i8, 2)
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_p16)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld3)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld3q_p16(a: *const p16) -> poly16x8x3_t {
-    let mut ret_val: poly16x8x3_t = transmute(vld3q_s16(transmute(a)));
-    ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(vld4))]
+pub unsafe fn vld4_s32(a: *const i32) -> int32x2x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v2i32.p0")]
+        fn _vld4_s32(ptr: *const i8, size: i32) -> int32x2x4_t;
+    }
+    _vld4_s32(a as *const i8, 4)
 }
-#[doc = "Load multiple 3-element structures to three registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld3q_lane_f32)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_s32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld3, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
+#[cfg(target_arch = "arm")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld3q_lane_f32<const LANE: i32>(a: *const f32, b: float32x4x3_t) -> float32x4x3_t {
-    static_assert_uimm_bits!(LANE, 2);
+#[cfg_attr(test, assert_instr(vld4))]
+pub unsafe fn vld4q_s32(a: *const i32) -> int32x4x4_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v4f32.p0")]
-        fn _vld3q_lane_f32(
-            ptr: *const i8,
-            a: float32x4_t,
-            b: float32x4_t,
-            c: float32x4_t,
-            n: i32,
-            size: i32,
-        ) -> float32x4x3_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v4i32.p0")]
+        fn _vld4q_s32(ptr: *const i8, size: i32) -> int32x4x4_t;
     }
-    _vld3q_lane_f32(a as _, b.0, b.1, b.2, LANE, 4)
+    _vld4q_s32(a as *const i8, 4)
 }
-#[doc = "Load single 4-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_f16)"]
+#[doc = "Load multiple 4-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_f16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vld4_dup_f16(a: *const f16) -> float16x4x4_t {
+pub unsafe fn vld4_lane_f16<const LANE: i32>(a: *const f16, b: float16x4x4_t) -> float16x4x4_t {
+    static_assert_uimm_bits!(LANE, 2);
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v4f16.p0")]
-        fn _vld4_dup_f16(ptr: *const f16, size: i32) -> float16x4x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v4f16.p0")]
+        fn _vld4_lane_f16(
+            ptr: *const f16,
+            a: float16x4_t,
+            b: float16x4_t,
+            c: float16x4_t,
+            d: float16x4_t,
+            n: i32,
+            size: i32,
+        ) -> float16x4x4_t;
     }
-    _vld4_dup_f16(a as _, 2)
+    _vld4_lane_f16(a as _, b.0, b.1, b.2, b.3, LANE, 2)
 }
-#[doc = "Load single 4-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_f16)"]
+#[doc = "Load multiple 4-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_f16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vld4q_dup_f16(a: *const f16) -> float16x8x4_t {
+pub unsafe fn vld4q_lane_f16<const LANE: i32>(a: *const f16, b: float16x8x4_t) -> float16x8x4_t {
+    static_assert_uimm_bits!(LANE, 3);
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v8f16.p0")]
-        fn _vld4q_dup_f16(ptr: *const f16, size: i32) -> float16x8x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v8f16.p0")]
+        fn _vld4q_lane_f16(
+            ptr: *const f16,
+            a: float16x8_t,
+            b: float16x8_t,
+            c: float16x8_t,
+            d: float16x8_t,
+            n: i32,
+            size: i32,
+        ) -> float16x8x4_t;
     }
-    _vld4q_dup_f16(a as _, 2)
+    _vld4q_lane_f16(a as _, b.0, b.1, b.2, b.3, LANE, 2)
 }
-#[doc = "Load single 4-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_f16)"]
+#[doc = "Load multiple 4-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_f16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(not(target_arch = "arm"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4r)
+    assert_instr(ld4, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vld4_dup_f16(a: *const f16) -> float16x4x4_t {
+pub unsafe fn vld4_lane_f16<const LANE: i32>(a: *const f16, b: float16x4x4_t) -> float16x4x4_t {
+    static_assert_uimm_bits!(LANE, 2);
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld4r.v4f16.p0"
+            link_name = "llvm.aarch64.neon.ld4lane.v4f16.p0"
         )]
-        fn _vld4_dup_f16(ptr: *const f16) -> float16x4x4_t;
+        fn _vld4_lane_f16(
+            a: float16x4_t,
+            b: float16x4_t,
+            c: float16x4_t,
+            d: float16x4_t,
+            n: i64,
+            ptr: *const f16,
+        ) -> float16x4x4_t;
     }
-    _vld4_dup_f16(a as _)
+    _vld4_lane_f16(b.0, b.1, b.2, b.3, LANE as i64, a as _)
 }
-#[doc = "Load single 4-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_f16)"]
+#[doc = "Load multiple 4-element structures to two registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_f16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(not(target_arch = "arm"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4r)
+    assert_instr(ld4, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[target_feature(enable = "neon,fp16")]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vld4q_dup_f16(a: *const f16) -> float16x8x4_t {
+pub unsafe fn vld4q_lane_f16<const LANE: i32>(a: *const f16, b: float16x8x4_t) -> float16x8x4_t {
+    static_assert_uimm_bits!(LANE, 3);
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld4r.v8f16.p0"
+            link_name = "llvm.aarch64.neon.ld4lane.v8f16.p0"
         )]
-        fn _vld4q_dup_f16(ptr: *const f16) -> float16x8x4_t;
+        fn _vld4q_lane_f16(
+            a: float16x8_t,
+            b: float16x8_t,
+            c: float16x8_t,
+            d: float16x8_t,
+            n: i64,
+            ptr: *const f16,
+        ) -> float16x8x4_t;
     }
-    _vld4q_dup_f16(a as _)
+    _vld4q_lane_f16(b.0, b.1, b.2, b.3, LANE as i64, a as _)
 }
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_f32)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_f32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld4))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld4_dup_f32(a: *const f32) -> float32x2x4_t {
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld4_lane_f32<const LANE: i32>(a: *const f32, b: float32x2x4_t) -> float32x2x4_t {
+    static_assert_uimm_bits!(LANE, 1);
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v2f32.p0")]
-        fn _vld4_dup_f32(ptr: *const i8, size: i32) -> float32x2x4_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld4lane.v2f32.p0"
+        )]
+        fn _vld4_lane_f32(
+            a: float32x2_t,
+            b: float32x2_t,
+            c: float32x2_t,
+            d: float32x2_t,
+            n: i64,
+            ptr: *const i8,
+        ) -> float32x2x4_t;
     }
-    _vld4_dup_f32(a as *const i8, 4)
+    _vld4_lane_f32(b.0, b.1, b.2, b.3, LANE as i64, a as _)
 }
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_f32)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_f32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld4))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld4q_dup_f32(a: *const f32) -> float32x4x4_t {
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld4q_lane_f32<const LANE: i32>(a: *const f32, b: float32x4x4_t) -> float32x4x4_t {
+    static_assert_uimm_bits!(LANE, 2);
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v4f32.p0")]
-        fn _vld4q_dup_f32(ptr: *const i8, size: i32) -> float32x4x4_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld4lane.v4f32.p0"
+        )]
+        fn _vld4q_lane_f32(
+            a: float32x4_t,
+            b: float32x4_t,
+            c: float32x4_t,
+            d: float32x4_t,
+            n: i64,
+            ptr: *const i8,
+        ) -> float32x4x4_t;
     }
-    _vld4q_dup_f32(a as *const i8, 4)
+    _vld4q_lane_f32(b.0, b.1, b.2, b.3, LANE as i64, a as _)
 }
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s8)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_s8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld4))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld4_dup_s8(a: *const i8) -> int8x8x4_t {
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld4_lane_s8<const LANE: i32>(a: *const i8, b: int8x8x4_t) -> int8x8x4_t {
+    static_assert_uimm_bits!(LANE, 3);
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v8i8.p0")]
-        fn _vld4_dup_s8(ptr: *const i8, size: i32) -> int8x8x4_t;
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ld4lane.v8i8.p0"
+        )]
+        fn _vld4_lane_s8(
+            a: int8x8_t,
+            b: int8x8_t,
+            c: int8x8_t,
+            d: int8x8_t,
+            n: i64,
+            ptr: *const i8,
+        ) -> int8x8x4_t;
     }
-    _vld4_dup_s8(a as *const i8, 1)
+    _vld4_lane_s8(b.0, b.1, b.2, b.3, LANE as i64, a as _)
 }
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_s8)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_s16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld4))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld4q_dup_s8(a: *const i8) -> int8x16x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v16i8.p0")]
-        fn _vld4q_dup_s8(ptr: *const i8, size: i32) -> int8x16x4_t;
-    }
-    _vld4q_dup_s8(a as *const i8, 1)
-}
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld4))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld4_dup_s16(a: *const i16) -> int16x4x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v4i16.p0")]
-        fn _vld4_dup_s16(ptr: *const i8, size: i32) -> int16x4x4_t;
-    }
-    _vld4_dup_s16(a as *const i8, 2)
-}
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_s16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld4))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld4q_dup_s16(a: *const i16) -> int16x8x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v8i16.p0")]
-        fn _vld4q_dup_s16(ptr: *const i8, size: i32) -> int16x8x4_t;
-    }
-    _vld4q_dup_s16(a as *const i8, 2)
-}
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld4))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld4_dup_s32(a: *const i32) -> int32x2x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v2i32.p0")]
-        fn _vld4_dup_s32(ptr: *const i8, size: i32) -> int32x2x4_t;
-    }
-    _vld4_dup_s32(a as *const i8, 4)
-}
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_s32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vld4))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld4q_dup_s32(a: *const i32) -> int32x4x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v4i32.p0")]
-        fn _vld4q_dup_s32(ptr: *const i8, size: i32) -> int32x4x4_t;
-    }
-    _vld4q_dup_s32(a as *const i8, 4)
-}
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_f32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld4r))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4_dup_f32(a: *const f32) -> float32x2x4_t {
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub unsafe fn vld4_lane_s16<const LANE: i32>(a: *const i16, b: int16x4x4_t) -> int16x4x4_t {
+    static_assert_uimm_bits!(LANE, 2);
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld4r.v2f32.p0.p0"
+            link_name = "llvm.aarch64.neon.ld4lane.v4i16.p0"
         )]
-        fn _vld4_dup_f32(ptr: *const f32) -> float32x2x4_t;
+        fn _vld4_lane_s16(
+            a: int16x4_t,
+            b: int16x4_t,
+            c: int16x4_t,
+            d: int16x4_t,
+            n: i64,
+            ptr: *const i8,
+        ) -> int16x4x4_t;
     }
-    _vld4_dup_f32(a as _)
+    _vld4_lane_s16(b.0, b.1, b.2, b.3, LANE as i64, a as _)
 }
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_f32)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_s16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld4r))]
+#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4q_dup_f32(a: *const f32) -> float32x4x4_t {
+pub unsafe fn vld4q_lane_s16<const LANE: i32>(a: *const i16, b: int16x8x4_t) -> int16x8x4_t {
+    static_assert_uimm_bits!(LANE, 3);
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld4r.v4f32.p0.p0"
+            link_name = "llvm.aarch64.neon.ld4lane.v8i16.p0"
         )]
-        fn _vld4q_dup_f32(ptr: *const f32) -> float32x4x4_t;
+        fn _vld4q_lane_s16(
+            a: int16x8_t,
+            b: int16x8_t,
+            c: int16x8_t,
+            d: int16x8_t,
+            n: i64,
+            ptr: *const i8,
+        ) -> int16x8x4_t;
     }
-    _vld4q_dup_f32(a as _)
+    _vld4q_lane_s16(b.0, b.1, b.2, b.3, LANE as i64, a as _)
 }
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s8)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_s32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld4r))]
+#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4_dup_s8(a: *const i8) -> int8x8x4_t {
+pub unsafe fn vld4_lane_s32<const LANE: i32>(a: *const i32, b: int32x2x4_t) -> int32x2x4_t {
+    static_assert_uimm_bits!(LANE, 1);
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld4r.v8i8.p0.p0"
+            link_name = "llvm.aarch64.neon.ld4lane.v2i32.p0"
         )]
-        fn _vld4_dup_s8(ptr: *const i8) -> int8x8x4_t;
+        fn _vld4_lane_s32(
+            a: int32x2_t,
+            b: int32x2_t,
+            c: int32x2_t,
+            d: int32x2_t,
+            n: i64,
+            ptr: *const i8,
+        ) -> int32x2x4_t;
     }
-    _vld4_dup_s8(a as _)
+    _vld4_lane_s32(b.0, b.1, b.2, b.3, LANE as i64, a as _)
 }
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_s8)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_s32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld4r))]
+#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4q_dup_s8(a: *const i8) -> int8x16x4_t {
+pub unsafe fn vld4q_lane_s32<const LANE: i32>(a: *const i32, b: int32x4x4_t) -> int32x4x4_t {
+    static_assert_uimm_bits!(LANE, 2);
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld4r.v16i8.p0.p0"
+            link_name = "llvm.aarch64.neon.ld4lane.v4i32.p0"
         )]
-        fn _vld4q_dup_s8(ptr: *const i8) -> int8x16x4_t;
+        fn _vld4q_lane_s32(
+            a: int32x4_t,
+            b: int32x4_t,
+            c: int32x4_t,
+            d: int32x4_t,
+            n: i64,
+            ptr: *const i8,
+        ) -> int32x4x4_t;
     }
-    _vld4q_dup_s8(a as _)
+    _vld4q_lane_s32(b.0, b.1, b.2, b.3, LANE as i64, a as _)
 }
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s16)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_f32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld4r))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4_dup_s16(a: *const i16) -> int16x4x4_t {
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(test, assert_instr(vld4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld4_lane_f32<const LANE: i32>(a: *const f32, b: float32x2x4_t) -> float32x2x4_t {
+    static_assert_uimm_bits!(LANE, 1);
     unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld4r.v4i16.p0.p0"
-        )]
-        fn _vld4_dup_s16(ptr: *const i16) -> int16x4x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v2f32.p0")]
+        fn _vld4_lane_f32(
+            ptr: *const i8,
+            a: float32x2_t,
+            b: float32x2_t,
+            c: float32x2_t,
+            d: float32x2_t,
+            n: i32,
+            size: i32,
+        ) -> float32x2x4_t;
     }
-    _vld4_dup_s16(a as _)
+    _vld4_lane_f32(a as _, b.0, b.1, b.2, b.3, LANE, 4)
 }
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_s16)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_f32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld4r))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4q_dup_s16(a: *const i16) -> int16x8x4_t {
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(test, assert_instr(vld4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld4q_lane_f32<const LANE: i32>(a: *const f32, b: float32x4x4_t) -> float32x4x4_t {
+    static_assert_uimm_bits!(LANE, 2);
     unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld4r.v8i16.p0.p0"
-        )]
-        fn _vld4q_dup_s16(ptr: *const i16) -> int16x8x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v4f32.p0")]
+        fn _vld4q_lane_f32(
+            ptr: *const i8,
+            a: float32x4_t,
+            b: float32x4_t,
+            c: float32x4_t,
+            d: float32x4_t,
+            n: i32,
+            size: i32,
+        ) -> float32x4x4_t;
     }
-    _vld4q_dup_s16(a as _)
+    _vld4q_lane_f32(a as _, b.0, b.1, b.2, b.3, LANE, 4)
 }
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s32)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_s8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld4r))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4_dup_s32(a: *const i32) -> int32x2x4_t {
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(test, assert_instr(vld4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld4_lane_s8<const LANE: i32>(a: *const i8, b: int8x8x4_t) -> int8x8x4_t {
+    static_assert_uimm_bits!(LANE, 3);
     unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld4r.v2i32.p0.p0"
-        )]
-        fn _vld4_dup_s32(ptr: *const i32) -> int32x2x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v8i8.p0")]
+        fn _vld4_lane_s8(
+            ptr: *const i8,
+            a: int8x8_t,
+            b: int8x8_t,
+            c: int8x8_t,
+            d: int8x8_t,
+            n: i32,
+            size: i32,
+        ) -> int8x8x4_t;
     }
-    _vld4_dup_s32(a as _)
+    _vld4_lane_s8(a as _, b.0, b.1, b.2, b.3, LANE, 1)
 }
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_s32)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_s16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld4r))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4q_dup_s32(a: *const i32) -> int32x4x4_t {
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(test, assert_instr(vld4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld4_lane_s16<const LANE: i32>(a: *const i16, b: int16x4x4_t) -> int16x4x4_t {
+    static_assert_uimm_bits!(LANE, 2);
     unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld4r.v4i32.p0.p0"
-        )]
-        fn _vld4q_dup_s32(ptr: *const i32) -> int32x4x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v4i16.p0")]
+        fn _vld4_lane_s16(
+            ptr: *const i8,
+            a: int16x4_t,
+            b: int16x4_t,
+            c: int16x4_t,
+            d: int16x4_t,
+            n: i32,
+            size: i32,
+        ) -> int16x4x4_t;
     }
-    _vld4q_dup_s32(a as _)
+    _vld4_lane_s16(a as _, b.0, b.1, b.2, b.3, LANE, 2)
 }
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s64)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_s16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld4r))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4_dup_s64(a: *const i64) -> int64x1x4_t {
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(test, assert_instr(vld4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld4q_lane_s16<const LANE: i32>(a: *const i16, b: int16x8x4_t) -> int16x8x4_t {
+    static_assert_uimm_bits!(LANE, 3);
     unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld4r.v1i64.p0.p0"
-        )]
-        fn _vld4_dup_s64(ptr: *const i64) -> int64x1x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v8i16.p0")]
+        fn _vld4q_lane_s16(
+            ptr: *const i8,
+            a: int16x8_t,
+            b: int16x8_t,
+            c: int16x8_t,
+            d: int16x8_t,
+            n: i32,
+            size: i32,
+        ) -> int16x8x4_t;
     }
-    _vld4_dup_s64(a as _)
+    _vld4q_lane_s16(a as _, b.0, b.1, b.2, b.3, LANE, 2)
 }
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_p64)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_s32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4r)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub unsafe fn vld4_dup_p64(a: *const p64) -> poly64x1x4_t {
-    transmute(vld4_dup_s64(transmute(a)))
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(test, assert_instr(vld4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub unsafe fn vld4_lane_s32<const LANE: i32>(a: *const i32, b: int32x2x4_t) -> int32x2x4_t {
+    static_assert_uimm_bits!(LANE, 1);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v2i32.p0")]
+        fn _vld4_lane_s32(
+            ptr: *const i8,
+            a: int32x2_t,
+            b: int32x2_t,
+            c: int32x2_t,
+            d: int32x2_t,
+            n: i32,
+            size: i32,
+        ) -> int32x2x4_t;
+    }
+    _vld4_lane_s32(a as _, b.0, b.1, b.2, b.3, LANE, 4)
 }
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_s64)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_s32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(nop))]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(test, assert_instr(vld4, LANE = 0))]
+#[rustc_legacy_const_generics(2)]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld4_dup_s64(a: *const i64) -> int64x1x4_t {
+pub unsafe fn vld4q_lane_s32<const LANE: i32>(a: *const i32, b: int32x4x4_t) -> int32x4x4_t {
+    static_assert_uimm_bits!(LANE, 2);
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v1i64.p0")]
-        fn _vld4_dup_s64(ptr: *const i8, size: i32) -> int64x1x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v4i32.p0")]
+        fn _vld4q_lane_s32(
+            ptr: *const i8,
+            a: int32x4_t,
+            b: int32x4_t,
+            c: int32x4_t,
+            d: int32x4_t,
+            n: i32,
+            size: i32,
+        ) -> int32x4x4_t;
     }
-    _vld4_dup_s64(a as *const i8, 8)
+    _vld4q_lane_s32(a as _, b.0, b.1, b.2, b.3, LANE, 4)
 }
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_u64)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_u8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4r)
+    assert_instr(ld4, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -25318,22 +23262,23 @@ pub unsafe fn vld4_dup_s64(a: *const i64) -> int64x1x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld4_dup_u64(a: *const u64) -> uint64x1x4_t {
-    transmute(vld4_dup_s64(transmute(a)))
+pub unsafe fn vld4_lane_u8<const LANE: i32>(a: *const u8, b: uint8x8x4_t) -> uint8x8x4_t {
+    static_assert_uimm_bits!(LANE, 3);
+    transmute(vld4_lane_s8::<LANE>(transmute(a), transmute(b)))
 }
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_u8)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_u16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4r)
+    assert_instr(ld4, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -25342,22 +23287,23 @@ pub unsafe fn vld4_dup_u64(a: *const u64) -> uint64x1x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld4_dup_u8(a: *const u8) -> uint8x8x4_t {
-    transmute(vld4_dup_s8(transmute(a)))
+pub unsafe fn vld4_lane_u16<const LANE: i32>(a: *const u16, b: uint16x4x4_t) -> uint16x4x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    transmute(vld4_lane_s16::<LANE>(transmute(a), transmute(b)))
 }
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_u8)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_u16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4r)
+    assert_instr(ld4, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -25366,27 +23312,23 @@ pub unsafe fn vld4_dup_u8(a: *const u8) -> uint8x8x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld4_dup_u8(a: *const u8) -> uint8x8x4_t {
-    let mut ret_val: uint8x8x4_t = transmute(vld4_dup_s8(transmute(a)));
-    ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val.3 = unsafe { simd_shuffle!(ret_val.3, ret_val.3, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val
+pub unsafe fn vld4q_lane_u16<const LANE: i32>(a: *const u16, b: uint16x8x4_t) -> uint16x8x4_t {
+    static_assert_uimm_bits!(LANE, 3);
+    transmute(vld4q_lane_s16::<LANE>(transmute(a), transmute(b)))
 }
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_u8)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_u32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4r)
+    assert_instr(ld4, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -25395,22 +23337,23 @@ pub unsafe fn vld4_dup_u8(a: *const u8) -> uint8x8x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld4q_dup_u8(a: *const u8) -> uint8x16x4_t {
-    transmute(vld4q_dup_s8(transmute(a)))
+pub unsafe fn vld4_lane_u32<const LANE: i32>(a: *const u32, b: uint32x2x4_t) -> uint32x2x4_t {
+    static_assert_uimm_bits!(LANE, 1);
+    transmute(vld4_lane_s32::<LANE>(transmute(a), transmute(b)))
 }
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_u8)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_u32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4r)
+    assert_instr(ld4, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -25419,51 +23362,23 @@ pub unsafe fn vld4q_dup_u8(a: *const u8) -> uint8x16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld4q_dup_u8(a: *const u8) -> uint8x16x4_t {
-    let mut ret_val: uint8x16x4_t = transmute(vld4q_dup_s8(transmute(a)));
-    ret_val.0 = unsafe {
-        simd_shuffle!(
-            ret_val.0,
-            ret_val.0,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    ret_val.1 = unsafe {
-        simd_shuffle!(
-            ret_val.1,
-            ret_val.1,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    ret_val.2 = unsafe {
-        simd_shuffle!(
-            ret_val.2,
-            ret_val.2,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    ret_val.3 = unsafe {
-        simd_shuffle!(
-            ret_val.3,
-            ret_val.3,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    ret_val
+pub unsafe fn vld4q_lane_u32<const LANE: i32>(a: *const u32, b: uint32x4x4_t) -> uint32x4x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    transmute(vld4q_lane_s32::<LANE>(transmute(a), transmute(b)))
 }
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_u16)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_p8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4r)
+    assert_instr(ld4, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -25472,22 +23387,23 @@ pub unsafe fn vld4q_dup_u8(a: *const u8) -> uint8x16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld4_dup_u16(a: *const u16) -> uint16x4x4_t {
-    transmute(vld4_dup_s16(transmute(a)))
+pub unsafe fn vld4_lane_p8<const LANE: i32>(a: *const p8, b: poly8x8x4_t) -> poly8x8x4_t {
+    static_assert_uimm_bits!(LANE, 3);
+    transmute(vld4_lane_s8::<LANE>(transmute(a), transmute(b)))
 }
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_u16)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_p16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4r)
+    assert_instr(ld4, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -25496,27 +23412,23 @@ pub unsafe fn vld4_dup_u16(a: *const u16) -> uint16x4x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld4_dup_u16(a: *const u16) -> uint16x4x4_t {
-    let mut ret_val: uint16x4x4_t = transmute(vld4_dup_s16(transmute(a)));
-    ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]) };
-    ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]) };
-    ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [3, 2, 1, 0]) };
-    ret_val.3 = unsafe { simd_shuffle!(ret_val.3, ret_val.3, [3, 2, 1, 0]) };
-    ret_val
+pub unsafe fn vld4_lane_p16<const LANE: i32>(a: *const p16, b: poly16x4x4_t) -> poly16x4x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    transmute(vld4_lane_s16::<LANE>(transmute(a), transmute(b)))
 }
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_u16)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_p16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4r)
+    assert_instr(ld4, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -25525,21 +23437,21 @@ pub unsafe fn vld4_dup_u16(a: *const u16) -> uint16x4x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld4q_dup_u16(a: *const u16) -> uint16x8x4_t {
-    transmute(vld4q_dup_s16(transmute(a)))
+pub unsafe fn vld4q_lane_p16<const LANE: i32>(a: *const p16, b: poly16x8x4_t) -> poly16x8x4_t {
+    static_assert_uimm_bits!(LANE, 3);
+    transmute(vld4q_lane_s16::<LANE>(transmute(a), transmute(b)))
 }
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_u16)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_p64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4r)
+    assert_instr(nop)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -25549,26 +23461,48 @@ pub unsafe fn vld4q_dup_u16(a: *const u16) -> uint16x8x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld4q_dup_u16(a: *const u16) -> uint16x8x4_t {
-    let mut ret_val: uint16x8x4_t = transmute(vld4q_dup_s16(transmute(a)));
-    ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val.3 = unsafe { simd_shuffle!(ret_val.3, ret_val.3, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val
+pub unsafe fn vld4_p64(a: *const p64) -> poly64x1x4_t {
+    transmute(vld4_s64(transmute(a)))
 }
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_u32)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vld4_s64(a: *const i64) -> int64x1x4_t {
+    crate::ptr::read_unaligned(a.cast())
+}
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,v7")]
+#[cfg(target_arch = "arm")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[cfg_attr(test, assert_instr(nop))]
+pub unsafe fn vld4_s64(a: *const i64) -> int64x1x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v1i64.p0")]
+        fn _vld4_s64(ptr: *const i8, size: i32) -> int64x1x4_t;
+    }
+    _vld4_s64(a as *const i8, 8)
+}
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_u64)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4r)
+    assert_instr(nop)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -25578,21 +23512,20 @@ pub unsafe fn vld4q_dup_u16(a: *const u16) -> uint16x8x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld4_dup_u32(a: *const u32) -> uint32x2x4_t {
-    transmute(vld4_dup_s32(transmute(a)))
+pub unsafe fn vld4_u64(a: *const u64) -> uint64x1x4_t {
+    transmute(vld4_s64(transmute(a)))
 }
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_u32)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_u8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4r)
+    assert_instr(ld4)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -25602,26 +23535,20 @@ pub unsafe fn vld4_dup_u32(a: *const u32) -> uint32x2x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld4_dup_u32(a: *const u32) -> uint32x2x4_t {
-    let mut ret_val: uint32x2x4_t = transmute(vld4_dup_s32(transmute(a)));
-    ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [1, 0]) };
-    ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [1, 0]) };
-    ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [1, 0]) };
-    ret_val.3 = unsafe { simd_shuffle!(ret_val.3, ret_val.3, [1, 0]) };
-    ret_val
+pub unsafe fn vld4_u8(a: *const u8) -> uint8x8x4_t {
+    transmute(vld4_s8(transmute(a)))
 }
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_u32)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_u8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4r)
+    assert_instr(ld4)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -25631,21 +23558,20 @@ pub unsafe fn vld4_dup_u32(a: *const u32) -> uint32x2x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld4q_dup_u32(a: *const u32) -> uint32x4x4_t {
-    transmute(vld4q_dup_s32(transmute(a)))
+pub unsafe fn vld4q_u8(a: *const u8) -> uint8x16x4_t {
+    transmute(vld4q_s8(transmute(a)))
 }
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_u32)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_u16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4r)
+    assert_instr(ld4)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -25655,26 +23581,20 @@ pub unsafe fn vld4q_dup_u32(a: *const u32) -> uint32x4x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld4q_dup_u32(a: *const u32) -> uint32x4x4_t {
-    let mut ret_val: uint32x4x4_t = transmute(vld4q_dup_s32(transmute(a)));
-    ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]) };
-    ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]) };
-    ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [3, 2, 1, 0]) };
-    ret_val.3 = unsafe { simd_shuffle!(ret_val.3, ret_val.3, [3, 2, 1, 0]) };
-    ret_val
+pub unsafe fn vld4_u16(a: *const u16) -> uint16x4x4_t {
+    transmute(vld4_s16(transmute(a)))
 }
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_p8)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_u16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4r)
+    assert_instr(ld4)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -25684,21 +23604,20 @@ pub unsafe fn vld4q_dup_u32(a: *const u32) -> uint32x4x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld4_dup_p8(a: *const p8) -> poly8x8x4_t {
-    transmute(vld4_dup_s8(transmute(a)))
+pub unsafe fn vld4q_u16(a: *const u16) -> uint16x8x4_t {
+    transmute(vld4q_s16(transmute(a)))
 }
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_p8)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_u32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4r)
+    assert_instr(ld4)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -25708,26 +23627,20 @@ pub unsafe fn vld4_dup_p8(a: *const p8) -> poly8x8x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld4_dup_p8(a: *const p8) -> poly8x8x4_t {
-    let mut ret_val: poly8x8x4_t = transmute(vld4_dup_s8(transmute(a)));
-    ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val.3 = unsafe { simd_shuffle!(ret_val.3, ret_val.3, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val
+pub unsafe fn vld4_u32(a: *const u32) -> uint32x2x4_t {
+    transmute(vld4_s32(transmute(a)))
 }
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_p8)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_u32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4r)
+    assert_instr(ld4)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -25737,21 +23650,20 @@ pub unsafe fn vld4_dup_p8(a: *const p8) -> poly8x8x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld4q_dup_p8(a: *const p8) -> poly8x16x4_t {
-    transmute(vld4q_dup_s8(transmute(a)))
+pub unsafe fn vld4q_u32(a: *const u32) -> uint32x4x4_t {
+    transmute(vld4q_s32(transmute(a)))
 }
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_p8)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_p8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4r)
+    assert_instr(ld4)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -25761,50 +23673,20 @@ pub unsafe fn vld4q_dup_p8(a: *const p8) -> poly8x16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld4q_dup_p8(a: *const p8) -> poly8x16x4_t {
-    let mut ret_val: poly8x16x4_t = transmute(vld4q_dup_s8(transmute(a)));
-    ret_val.0 = unsafe {
-        simd_shuffle!(
-            ret_val.0,
-            ret_val.0,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    ret_val.1 = unsafe {
-        simd_shuffle!(
-            ret_val.1,
-            ret_val.1,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    ret_val.2 = unsafe {
-        simd_shuffle!(
-            ret_val.2,
-            ret_val.2,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    ret_val.3 = unsafe {
-        simd_shuffle!(
-            ret_val.3,
-            ret_val.3,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    ret_val
+pub unsafe fn vld4_p8(a: *const p8) -> poly8x8x4_t {
+    transmute(vld4_s8(transmute(a)))
 }
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_p16)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_p8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4r)
+    assert_instr(ld4)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -25814,21 +23696,20 @@ pub unsafe fn vld4q_dup_p8(a: *const p8) -> poly8x16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld4_dup_p16(a: *const p16) -> poly16x4x4_t {
-    transmute(vld4_dup_s16(transmute(a)))
+pub unsafe fn vld4q_p8(a: *const p8) -> poly8x16x4_t {
+    transmute(vld4q_s8(transmute(a)))
 }
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_dup_p16)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_p16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4r)
+    assert_instr(ld4)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -25838,26 +23719,20 @@ pub unsafe fn vld4_dup_p16(a: *const p16) -> poly16x4x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld4_dup_p16(a: *const p16) -> poly16x4x4_t {
-    let mut ret_val: poly16x4x4_t = transmute(vld4_dup_s16(transmute(a)));
-    ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]) };
-    ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]) };
-    ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [3, 2, 1, 0]) };
-    ret_val.3 = unsafe { simd_shuffle!(ret_val.3, ret_val.3, [3, 2, 1, 0]) };
-    ret_val
+pub unsafe fn vld4_p16(a: *const p16) -> poly16x4x4_t {
+    transmute(vld4_s16(transmute(a)))
 }
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_p16)"]
+#[doc = "Load multiple 4-element structures to four registers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_p16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4r)
+    assert_instr(ld4)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -25867,21 +23742,20 @@ pub unsafe fn vld4_dup_p16(a: *const p16) -> poly16x4x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld4q_dup_p16(a: *const p16) -> poly16x8x4_t {
-    transmute(vld4q_dup_s16(transmute(a)))
+pub unsafe fn vld4q_p16(a: *const p16) -> poly16x8x4_t {
+    transmute(vld4q_s16(transmute(a)))
 }
-#[doc = "Load single 4-element structure and replicate to all lanes of four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_dup_p16)"]
+#[doc = "Store SIMD&FP register (immediate offset)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vldrq_p128)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4r)
+    assert_instr(nop)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -25891,887 +23765,632 @@ pub unsafe fn vld4q_dup_p16(a: *const p16) -> poly16x8x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld4q_dup_p16(a: *const p16) -> poly16x8x4_t {
-    let mut ret_val: poly16x8x4_t = transmute(vld4q_dup_s16(transmute(a)));
-    ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val.3 = unsafe { simd_shuffle!(ret_val.3, ret_val.3, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val
-}
-#[doc = "Load single 4-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_f16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg(target_arch = "arm")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vld4_f16(a: *const f16) -> float16x4x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v4f16.p0")]
-        fn _vld4_f16(ptr: *const f16, size: i32) -> float16x4x4_t;
-    }
-    _vld4_f16(a as _, 2)
-}
-#[doc = "Load single 4-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_f16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg(target_arch = "arm")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vld4q_f16(a: *const f16) -> float16x8x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v8f16.p0")]
-        fn _vld4q_f16(ptr: *const f16, size: i32) -> float16x8x4_t;
-    }
-    _vld4q_f16(a as _, 2)
+pub unsafe fn vldrq_p128(a: *const p128) -> p128 {
+    *a
 }
-#[doc = "Load single 4-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_f16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "Maximum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_f16)"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4)
+    assert_instr(fmax)
 )]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vld4_f16(a: *const f16) -> float16x4x4_t {
+pub fn vmax_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
     unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v4f16")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld4.v4f16.p0"
+            link_name = "llvm.aarch64.neon.fmax.v4f16"
         )]
-        fn _vld4_f16(ptr: *const f16) -> float16x4x4_t;
+        fn _vmax_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t;
     }
-    _vld4_f16(a as _)
+    unsafe { _vmax_f16(a, b) }
 }
-#[doc = "Load single 4-element structure and replicate to all lanes of two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_f16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "Maximum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_f16)"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4)
+    assert_instr(fmax)
 )]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vld4q_f16(a: *const f16) -> float16x8x4_t {
+pub fn vmaxq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
     unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v8f16")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld4.v8f16.p0"
+            link_name = "llvm.aarch64.neon.fmax.v8f16"
         )]
-        fn _vld4q_f16(ptr: *const f16) -> float16x8x4_t;
+        fn _vmaxq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t;
     }
-    _vld4q_f16(a as _)
+    unsafe { _vmaxq_f16(a, b) }
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_f32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "Maximum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld4))]
-pub unsafe fn vld4_f32(a: *const f32) -> float32x2x4_t {
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(fmax)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vmax_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
     unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v2f32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld4.v2f32.p0"
+            link_name = "llvm.aarch64.neon.fmax.v2f32"
         )]
-        fn _vld4_f32(ptr: *const float32x2_t) -> float32x2x4_t;
+        fn _vmax_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t;
     }
-    _vld4_f32(a as _)
+    unsafe { _vmax_f32(a, b) }
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_f32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "Maximum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld4))]
-pub unsafe fn vld4q_f32(a: *const f32) -> float32x4x4_t {
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(fmax)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vmaxq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
     unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v4f32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld4.v4f32.p0"
+            link_name = "llvm.aarch64.neon.fmax.v4f32"
         )]
-        fn _vld4q_f32(ptr: *const float32x4_t) -> float32x4x4_t;
+        fn _vmaxq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t;
     }
-    _vld4q_f32(a as _)
+    unsafe { _vmaxq_f32(a, b) }
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "Maximum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld4))]
-pub unsafe fn vld4_s8(a: *const i8) -> int8x8x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld4.v8i8.p0"
-        )]
-        fn _vld4_s8(ptr: *const int8x8_t) -> int8x8x4_t;
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(smax)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vmax_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    unsafe {
+        let mask: int8x8_t = simd_ge(a, b);
+        simd_select(mask, a, b)
     }
-    _vld4_s8(a as _)
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_s8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "Maximum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld4))]
-pub unsafe fn vld4q_s8(a: *const i8) -> int8x16x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld4.v16i8.p0"
-        )]
-        fn _vld4q_s8(ptr: *const int8x16_t) -> int8x16x4_t;
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(smax)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vmaxq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    unsafe {
+        let mask: int8x16_t = simd_ge(a, b);
+        simd_select(mask, a, b)
     }
-    _vld4q_s8(a as _)
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "Maximum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld4))]
-pub unsafe fn vld4_s16(a: *const i16) -> int16x4x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld4.v4i16.p0"
-        )]
-        fn _vld4_s16(ptr: *const int16x4_t) -> int16x4x4_t;
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(smax)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vmax_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    unsafe {
+        let mask: int16x4_t = simd_ge(a, b);
+        simd_select(mask, a, b)
     }
-    _vld4_s16(a as _)
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_s16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "Maximum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld4))]
-pub unsafe fn vld4q_s16(a: *const i16) -> int16x8x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld4.v8i16.p0"
-        )]
-        fn _vld4q_s16(ptr: *const int16x8_t) -> int16x8x4_t;
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(smax)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vmaxq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    unsafe {
+        let mask: int16x8_t = simd_ge(a, b);
+        simd_select(mask, a, b)
     }
-    _vld4q_s16(a as _)
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "Maximum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld4))]
-pub unsafe fn vld4_s32(a: *const i32) -> int32x2x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld4.v2i32.p0"
-        )]
-        fn _vld4_s32(ptr: *const int32x2_t) -> int32x2x4_t;
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(smax)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vmax_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    unsafe {
+        let mask: int32x2_t = simd_ge(a, b);
+        simd_select(mask, a, b)
     }
-    _vld4_s32(a as _)
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_s32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "Maximum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(ld4))]
-pub unsafe fn vld4q_s32(a: *const i32) -> int32x4x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld4.v4i32.p0"
-        )]
-        fn _vld4q_s32(ptr: *const int32x4_t) -> int32x4x4_t;
-    }
-    _vld4q_s32(a as _)
-}
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_f32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld4))]
-pub unsafe fn vld4_f32(a: *const f32) -> float32x2x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v2f32.p0")]
-        fn _vld4_f32(ptr: *const i8, size: i32) -> float32x2x4_t;
-    }
-    _vld4_f32(a as *const i8, 4)
-}
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_f32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld4))]
-pub unsafe fn vld4q_f32(a: *const f32) -> float32x4x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v4f32.p0")]
-        fn _vld4q_f32(ptr: *const i8, size: i32) -> float32x4x4_t;
-    }
-    _vld4q_f32(a as *const i8, 4)
-}
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld4))]
-pub unsafe fn vld4_s8(a: *const i8) -> int8x8x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v8i8.p0")]
-        fn _vld4_s8(ptr: *const i8, size: i32) -> int8x8x4_t;
-    }
-    _vld4_s8(a as *const i8, 1)
-}
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_s8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld4))]
-pub unsafe fn vld4q_s8(a: *const i8) -> int8x16x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v16i8.p0")]
-        fn _vld4q_s8(ptr: *const i8, size: i32) -> int8x16x4_t;
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(smax)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vmaxq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    unsafe {
+        let mask: int32x4_t = simd_ge(a, b);
+        simd_select(mask, a, b)
     }
-    _vld4q_s8(a as *const i8, 1)
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "Maximum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_u8)"]
 #[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld4))]
-pub unsafe fn vld4_s16(a: *const i16) -> int16x4x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v4i16.p0")]
-        fn _vld4_s16(ptr: *const i8, size: i32) -> int16x4x4_t;
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(umax)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vmax_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    unsafe {
+        let mask: uint8x8_t = simd_ge(a, b);
+        simd_select(mask, a, b)
     }
-    _vld4_s16(a as *const i8, 2)
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_s16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "Maximum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_u8)"]
 #[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld4))]
-pub unsafe fn vld4q_s16(a: *const i16) -> int16x8x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v8i16.p0")]
-        fn _vld4q_s16(ptr: *const i8, size: i32) -> int16x8x4_t;
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(umax)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vmaxq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    unsafe {
+        let mask: uint8x16_t = simd_ge(a, b);
+        simd_select(mask, a, b)
     }
-    _vld4q_s16(a as *const i8, 2)
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "Maximum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_u16)"]
 #[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld4))]
-pub unsafe fn vld4_s32(a: *const i32) -> int32x2x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v2i32.p0")]
-        fn _vld4_s32(ptr: *const i8, size: i32) -> int32x2x4_t;
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(umax)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vmax_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    unsafe {
+        let mask: uint16x4_t = simd_ge(a, b);
+        simd_select(mask, a, b)
     }
-    _vld4_s32(a as *const i8, 4)
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_s32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "Maximum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_u16)"]
 #[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vld4))]
-pub unsafe fn vld4q_s32(a: *const i32) -> int32x4x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v4i32.p0")]
-        fn _vld4q_s32(ptr: *const i8, size: i32) -> int32x4x4_t;
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(umax)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vmaxq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    unsafe {
+        let mask: uint16x8_t = simd_ge(a, b);
+        simd_select(mask, a, b)
     }
-    _vld4q_s32(a as *const i8, 4)
 }
-#[doc = "Load multiple 4-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_f16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "Maximum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_u32)"]
 #[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vld4_lane_f16<const LANE: i32>(a: *const f16, b: float16x4x4_t) -> float16x4x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v4f16.p0")]
-        fn _vld4_lane_f16(
-            ptr: *const f16,
-            a: float16x4_t,
-            b: float16x4_t,
-            c: float16x4_t,
-            d: float16x4_t,
-            n: i32,
-            size: i32,
-        ) -> float16x4x4_t;
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(umax)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vmax_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    unsafe {
+        let mask: uint32x2_t = simd_ge(a, b);
+        simd_select(mask, a, b)
     }
-    _vld4_lane_f16(a as _, b.0, b.1, b.2, b.3, LANE, 2)
 }
-#[doc = "Load multiple 4-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_f16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "Maximum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_u32)"]
 #[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vld4q_lane_f16<const LANE: i32>(a: *const f16, b: float16x8x4_t) -> float16x8x4_t {
-    static_assert_uimm_bits!(LANE, 3);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v8f16.p0")]
-        fn _vld4q_lane_f16(
-            ptr: *const f16,
-            a: float16x8_t,
-            b: float16x8_t,
-            c: float16x8_t,
-            d: float16x8_t,
-            n: i32,
-            size: i32,
-        ) -> float16x8x4_t;
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(umax)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vmaxq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    unsafe {
+        let mask: uint32x4_t = simd_ge(a, b);
+        simd_select(mask, a, b)
     }
-    _vld4q_lane_f16(a as _, b.0, b.1, b.2, b.3, LANE, 2)
 }
-#[doc = "Load multiple 4-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_f16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "Floating-point Maximum Number (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnm_f16)"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmaxnm))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4, LANE = 0)
+    assert_instr(fmaxnm)
 )]
-#[rustc_legacy_const_generics(2)]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vld4_lane_f16<const LANE: i32>(a: *const f16, b: float16x4x4_t) -> float16x4x4_t {
-    static_assert_uimm_bits!(LANE, 2);
+pub fn vmaxnm_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
     unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxnm.v4f16")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld4lane.v4f16.p0"
+            link_name = "llvm.aarch64.neon.fmaxnm.v4f16"
         )]
-        fn _vld4_lane_f16(
-            a: float16x4_t,
-            b: float16x4_t,
-            c: float16x4_t,
-            d: float16x4_t,
-            n: i64,
-            ptr: *const f16,
-        ) -> float16x4x4_t;
+        fn _vmaxnm_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t;
     }
-    _vld4_lane_f16(b.0, b.1, b.2, b.3, LANE as i64, a as _)
+    unsafe { _vmaxnm_f16(a, b) }
 }
-#[doc = "Load multiple 4-element structures to two registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_f16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "Floating-point Maximum Number (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnmq_f16)"]
 #[inline]
-#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmaxnm))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4, LANE = 0)
+    assert_instr(fmaxnm)
 )]
-#[rustc_legacy_const_generics(2)]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
-pub unsafe fn vld4q_lane_f16<const LANE: i32>(a: *const f16, b: float16x8x4_t) -> float16x8x4_t {
-    static_assert_uimm_bits!(LANE, 3);
+pub fn vmaxnmq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
     unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxnm.v8f16")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld4lane.v8f16.p0"
+            link_name = "llvm.aarch64.neon.fmaxnm.v8f16"
         )]
-        fn _vld4q_lane_f16(
-            a: float16x8_t,
-            b: float16x8_t,
-            c: float16x8_t,
-            d: float16x8_t,
-            n: i64,
-            ptr: *const f16,
-        ) -> float16x8x4_t;
+        fn _vmaxnmq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t;
     }
-    _vld4q_lane_f16(b.0, b.1, b.2, b.3, LANE as i64, a as _)
+    unsafe { _vmaxnmq_f16(a, b) }
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_f32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "Floating-point Maximum Number (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnm_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4_lane_f32<const LANE: i32>(a: *const f32, b: float32x2x4_t) -> float32x2x4_t {
-    static_assert_uimm_bits!(LANE, 1);
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmaxnm))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(fmaxnm)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vmaxnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
     unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxnm.v2f32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld4lane.v2f32.p0"
+            link_name = "llvm.aarch64.neon.fmaxnm.v2f32"
         )]
-        fn _vld4_lane_f32(
-            a: float32x2_t,
-            b: float32x2_t,
-            c: float32x2_t,
-            d: float32x2_t,
-            n: i64,
-            ptr: *const i8,
-        ) -> float32x2x4_t;
+        fn _vmaxnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t;
     }
-    _vld4_lane_f32(b.0, b.1, b.2, b.3, LANE as i64, a as _)
+    unsafe { _vmaxnm_f32(a, b) }
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_f32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "Floating-point Maximum Number (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnmq_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4q_lane_f32<const LANE: i32>(a: *const f32, b: float32x4x4_t) -> float32x4x4_t {
-    static_assert_uimm_bits!(LANE, 2);
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmaxnm))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(fmaxnm)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vmaxnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
     unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxnm.v4f32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld4lane.v4f32.p0"
+            link_name = "llvm.aarch64.neon.fmaxnm.v4f32"
         )]
-        fn _vld4q_lane_f32(
-            a: float32x4_t,
-            b: float32x4_t,
-            c: float32x4_t,
-            d: float32x4_t,
-            n: i64,
-            ptr: *const i8,
-        ) -> float32x4x4_t;
+        fn _vmaxnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t;
     }
-    _vld4q_lane_f32(b.0, b.1, b.2, b.3, LANE as i64, a as _)
+    unsafe { _vmaxnmq_f32(a, b) }
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_s8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "Minimum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_f16)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4_lane_s8<const LANE: i32>(a: *const i8, b: int8x8x4_t) -> int8x8x4_t {
-    static_assert_uimm_bits!(LANE, 3);
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(fmin)
+)]
+#[target_feature(enable = "neon,fp16")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vmin_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
     unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v4f16")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld4lane.v8i8.p0"
+            link_name = "llvm.aarch64.neon.fmin.v4f16"
         )]
-        fn _vld4_lane_s8(
-            a: int8x8_t,
-            b: int8x8_t,
-            c: int8x8_t,
-            d: int8x8_t,
-            n: i64,
-            ptr: *const i8,
-        ) -> int8x8x4_t;
+        fn _vmin_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t;
     }
-    _vld4_lane_s8(b.0, b.1, b.2, b.3, LANE as i64, a as _)
+    unsafe { _vmin_f16(a, b) }
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_s16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "Minimum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_f16)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4_lane_s16<const LANE: i32>(a: *const i16, b: int16x4x4_t) -> int16x4x4_t {
-    static_assert_uimm_bits!(LANE, 2);
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(fmin)
+)]
+#[target_feature(enable = "neon,fp16")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vminq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
     unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v8f16")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld4lane.v4i16.p0"
+            link_name = "llvm.aarch64.neon.fmin.v8f16"
         )]
-        fn _vld4_lane_s16(
-            a: int16x4_t,
-            b: int16x4_t,
-            c: int16x4_t,
-            d: int16x4_t,
-            n: i64,
-            ptr: *const i8,
-        ) -> int16x4x4_t;
+        fn _vminq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t;
     }
-    _vld4_lane_s16(b.0, b.1, b.2, b.3, LANE as i64, a as _)
+    unsafe { _vminq_f16(a, b) }
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_s16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4q_lane_s16<const LANE: i32>(a: *const i16, b: int16x8x4_t) -> int16x8x4_t {
-    static_assert_uimm_bits!(LANE, 3);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld4lane.v8i16.p0"
-        )]
-        fn _vld4q_lane_s16(
-            a: int16x8_t,
-            b: int16x8_t,
-            c: int16x8_t,
-            d: int16x8_t,
-            n: i64,
-            ptr: *const i8,
-        ) -> int16x8x4_t;
-    }
-    _vld4q_lane_s16(b.0, b.1, b.2, b.3, LANE as i64, a as _)
-}
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_s32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4_lane_s32<const LANE: i32>(a: *const i32, b: int32x2x4_t) -> int32x2x4_t {
-    static_assert_uimm_bits!(LANE, 1);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld4lane.v2i32.p0"
-        )]
-        fn _vld4_lane_s32(
-            a: int32x2_t,
-            b: int32x2_t,
-            c: int32x2_t,
-            d: int32x2_t,
-            n: i64,
-            ptr: *const i8,
-        ) -> int32x2x4_t;
-    }
-    _vld4_lane_s32(b.0, b.1, b.2, b.3, LANE as i64, a as _)
-}
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_s32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "Minimum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(ld4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub unsafe fn vld4q_lane_s32<const LANE: i32>(a: *const i32, b: int32x4x4_t) -> int32x4x4_t {
-    static_assert_uimm_bits!(LANE, 2);
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(fmin)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vmin_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
     unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v2f32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld4lane.v4i32.p0"
+            link_name = "llvm.aarch64.neon.fmin.v2f32"
         )]
-        fn _vld4q_lane_s32(
-            a: int32x4_t,
-            b: int32x4_t,
-            c: int32x4_t,
-            d: int32x4_t,
-            n: i64,
-            ptr: *const i8,
-        ) -> int32x4x4_t;
-    }
-    _vld4q_lane_s32(b.0, b.1, b.2, b.3, LANE as i64, a as _)
-}
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_f32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[cfg_attr(test, assert_instr(vld4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld4_lane_f32<const LANE: i32>(a: *const f32, b: float32x2x4_t) -> float32x2x4_t {
-    static_assert_uimm_bits!(LANE, 1);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v2f32.p0")]
-        fn _vld4_lane_f32(
-            ptr: *const i8,
-            a: float32x2_t,
-            b: float32x2_t,
-            c: float32x2_t,
-            d: float32x2_t,
-            n: i32,
-            size: i32,
-        ) -> float32x2x4_t;
-    }
-    _vld4_lane_f32(a as _, b.0, b.1, b.2, b.3, LANE, 4)
-}
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_f32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[cfg_attr(test, assert_instr(vld4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld4q_lane_f32<const LANE: i32>(a: *const f32, b: float32x4x4_t) -> float32x4x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v4f32.p0")]
-        fn _vld4q_lane_f32(
-            ptr: *const i8,
-            a: float32x4_t,
-            b: float32x4_t,
-            c: float32x4_t,
-            d: float32x4_t,
-            n: i32,
-            size: i32,
-        ) -> float32x4x4_t;
-    }
-    _vld4q_lane_f32(a as _, b.0, b.1, b.2, b.3, LANE, 4)
-}
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_s8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[cfg_attr(test, assert_instr(vld4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld4_lane_s8<const LANE: i32>(a: *const i8, b: int8x8x4_t) -> int8x8x4_t {
-    static_assert_uimm_bits!(LANE, 3);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v8i8.p0")]
-        fn _vld4_lane_s8(
-            ptr: *const i8,
-            a: int8x8_t,
-            b: int8x8_t,
-            c: int8x8_t,
-            d: int8x8_t,
-            n: i32,
-            size: i32,
-        ) -> int8x8x4_t;
-    }
-    _vld4_lane_s8(a as _, b.0, b.1, b.2, b.3, LANE, 1)
-}
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_s16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[cfg_attr(test, assert_instr(vld4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld4_lane_s16<const LANE: i32>(a: *const i16, b: int16x4x4_t) -> int16x4x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v4i16.p0")]
-        fn _vld4_lane_s16(
-            ptr: *const i8,
-            a: int16x4_t,
-            b: int16x4_t,
-            c: int16x4_t,
-            d: int16x4_t,
-            n: i32,
-            size: i32,
-        ) -> int16x4x4_t;
-    }
-    _vld4_lane_s16(a as _, b.0, b.1, b.2, b.3, LANE, 2)
-}
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_s16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[cfg_attr(test, assert_instr(vld4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld4q_lane_s16<const LANE: i32>(a: *const i16, b: int16x8x4_t) -> int16x8x4_t {
-    static_assert_uimm_bits!(LANE, 3);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v8i16.p0")]
-        fn _vld4q_lane_s16(
-            ptr: *const i8,
-            a: int16x8_t,
-            b: int16x8_t,
-            c: int16x8_t,
-            d: int16x8_t,
-            n: i32,
-            size: i32,
-        ) -> int16x8x4_t;
-    }
-    _vld4q_lane_s16(a as _, b.0, b.1, b.2, b.3, LANE, 2)
-}
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_s32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[cfg_attr(test, assert_instr(vld4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld4_lane_s32<const LANE: i32>(a: *const i32, b: int32x2x4_t) -> int32x2x4_t {
-    static_assert_uimm_bits!(LANE, 1);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v2i32.p0")]
-        fn _vld4_lane_s32(
-            ptr: *const i8,
-            a: int32x2_t,
-            b: int32x2_t,
-            c: int32x2_t,
-            d: int32x2_t,
-            n: i32,
-            size: i32,
-        ) -> int32x2x4_t;
-    }
-    _vld4_lane_s32(a as _, b.0, b.1, b.2, b.3, LANE, 4)
-}
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_s32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[cfg_attr(test, assert_instr(vld4, LANE = 0))]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub unsafe fn vld4q_lane_s32<const LANE: i32>(a: *const i32, b: int32x4x4_t) -> int32x4x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v4i32.p0")]
-        fn _vld4q_lane_s32(
-            ptr: *const i8,
-            a: int32x4_t,
-            b: int32x4_t,
-            c: int32x4_t,
-            d: int32x4_t,
-            n: i32,
-            size: i32,
-        ) -> int32x4x4_t;
+        fn _vmin_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t;
     }
-    _vld4q_lane_s32(a as _, b.0, b.1, b.2, b.3, LANE, 4)
+    unsafe { _vmin_f32(a, b) }
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_u8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "Minimum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4, LANE = 0)
+    assert_instr(fmin)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -26780,23 +24399,27 @@ pub unsafe fn vld4q_lane_s32<const LANE: i32>(a: *const i32, b: int32x4x4_t) ->
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld4_lane_u8<const LANE: i32>(a: *const u8, b: uint8x8x4_t) -> uint8x8x4_t {
-    static_assert_uimm_bits!(LANE, 3);
-    transmute(vld4_lane_s8::<LANE>(transmute(a), transmute(b)))
+pub fn vminq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v4f32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmin.v4f32"
+        )]
+        fn _vminq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t;
+    }
+    unsafe { _vminq_f32(a, b) }
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_u16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "Minimum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4, LANE = 0)
+    assert_instr(smin)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -26805,23 +24428,22 @@ pub unsafe fn vld4_lane_u8<const LANE: i32>(a: *const u8, b: uint8x8x4_t) -> uin
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld4_lane_u16<const LANE: i32>(a: *const u16, b: uint16x4x4_t) -> uint16x4x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    transmute(vld4_lane_s16::<LANE>(transmute(a), transmute(b)))
+pub fn vmin_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    unsafe {
+        let mask: int8x8_t = simd_le(a, b);
+        simd_select(mask, a, b)
+    }
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_u16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "Minimum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4, LANE = 0)
+    assert_instr(smin)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -26830,23 +24452,22 @@ pub unsafe fn vld4_lane_u16<const LANE: i32>(a: *const u16, b: uint16x4x4_t) ->
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld4q_lane_u16<const LANE: i32>(a: *const u16, b: uint16x8x4_t) -> uint16x8x4_t {
-    static_assert_uimm_bits!(LANE, 3);
-    transmute(vld4q_lane_s16::<LANE>(transmute(a), transmute(b)))
+pub fn vminq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    unsafe {
+        let mask: int8x16_t = simd_le(a, b);
+        simd_select(mask, a, b)
+    }
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_u32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "Minimum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4, LANE = 0)
+    assert_instr(smin)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -26855,23 +24476,22 @@ pub unsafe fn vld4q_lane_u16<const LANE: i32>(a: *const u16, b: uint16x8x4_t) ->
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld4_lane_u32<const LANE: i32>(a: *const u32, b: uint32x2x4_t) -> uint32x2x4_t {
-    static_assert_uimm_bits!(LANE, 1);
-    transmute(vld4_lane_s32::<LANE>(transmute(a), transmute(b)))
+pub fn vmin_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    unsafe {
+        let mask: int16x4_t = simd_le(a, b);
+        simd_select(mask, a, b)
+    }
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_u32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "Minimum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4, LANE = 0)
+    assert_instr(smin)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -26880,23 +24500,22 @@ pub unsafe fn vld4_lane_u32<const LANE: i32>(a: *const u32, b: uint32x2x4_t) ->
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld4q_lane_u32<const LANE: i32>(a: *const u32, b: uint32x4x4_t) -> uint32x4x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    transmute(vld4q_lane_s32::<LANE>(transmute(a), transmute(b)))
+pub fn vminq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    unsafe {
+        let mask: int16x8_t = simd_le(a, b);
+        simd_select(mask, a, b)
+    }
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_p8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "Minimum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4, LANE = 0)
+    assert_instr(smin)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -26905,23 +24524,22 @@ pub unsafe fn vld4q_lane_u32<const LANE: i32>(a: *const u32, b: uint32x4x4_t) ->
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld4_lane_p8<const LANE: i32>(a: *const p8, b: poly8x8x4_t) -> poly8x8x4_t {
-    static_assert_uimm_bits!(LANE, 3);
-    transmute(vld4_lane_s8::<LANE>(transmute(a), transmute(b)))
+pub fn vmin_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    unsafe {
+        let mask: int32x2_t = simd_le(a, b);
+        simd_select(mask, a, b)
+    }
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_lane_p16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "Minimum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4, LANE = 0)
+    assert_instr(smin)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -26930,23 +24548,22 @@ pub unsafe fn vld4_lane_p8<const LANE: i32>(a: *const p8, b: poly8x8x4_t) -> pol
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld4_lane_p16<const LANE: i32>(a: *const p16, b: poly16x4x4_t) -> poly16x4x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    transmute(vld4_lane_s16::<LANE>(transmute(a), transmute(b)))
+pub fn vminq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    unsafe {
+        let mask: int32x4_t = simd_le(a, b);
+        simd_select(mask, a, b)
+    }
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_lane_p16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "Minimum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4, LANE = 0)
+    assert_instr(umin)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -26955,21 +24572,21 @@ pub unsafe fn vld4_lane_p16<const LANE: i32>(a: *const p16, b: poly16x4x4_t) ->
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld4q_lane_p16<const LANE: i32>(a: *const p16, b: poly16x8x4_t) -> poly16x8x4_t {
-    static_assert_uimm_bits!(LANE, 3);
-    transmute(vld4q_lane_s16::<LANE>(transmute(a), transmute(b)))
+pub fn vmin_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    unsafe {
+        let mask: uint8x8_t = simd_le(a, b);
+        simd_select(mask, a, b)
+    }
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_p64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "Minimum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_u8)"]
 #[inline]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(umin)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -26979,55 +24596,21 @@ pub unsafe fn vld4q_lane_p16<const LANE: i32>(a: *const p16, b: poly16x8x4_t) ->
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld4_p64(a: *const p64) -> poly64x1x4_t {
-    transmute(vld4_s64(transmute(a)))
-}
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vld4_s64(a: *const i64) -> int64x1x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ld4.v1i64.p0"
-        )]
-        fn _vld4_s64(ptr: *const int64x1_t) -> int64x1x4_t;
-    }
-    _vld4_s64(a as _)
-}
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_s64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
-#[inline]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(nop))]
-pub unsafe fn vld4_s64(a: *const i64) -> int64x1x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v1i64.p0")]
-        fn _vld4_s64(ptr: *const i8, size: i32) -> int64x1x4_t;
+pub fn vminq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    unsafe {
+        let mask: uint8x16_t = simd_le(a, b);
+        simd_select(mask, a, b)
     }
-    _vld4_s64(a as *const i8, 8)
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_u64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "Minimum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(umin)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -27037,21 +24620,21 @@ pub unsafe fn vld4_s64(a: *const i64) -> int64x1x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld4_u64(a: *const u64) -> uint64x1x4_t {
-    transmute(vld4_s64(transmute(a)))
+pub fn vmin_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    unsafe {
+        let mask: uint16x4_t = simd_le(a, b);
+        simd_select(mask, a, b)
+    }
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_u8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "Minimum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_u16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4)
+    assert_instr(umin)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -27061,21 +24644,21 @@ pub unsafe fn vld4_u64(a: *const u64) -> uint64x1x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld4_u8(a: *const u8) -> uint8x8x4_t {
-    transmute(vld4_s8(transmute(a)))
+pub fn vminq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    unsafe {
+        let mask: uint16x8_t = simd_le(a, b);
+        simd_select(mask, a, b)
+    }
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_u8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "Minimum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_u32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4)
+    assert_instr(umin)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -27085,26 +24668,21 @@ pub unsafe fn vld4_u8(a: *const u8) -> uint8x8x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld4_u8(a: *const u8) -> uint8x8x4_t {
-    let mut ret_val: uint8x8x4_t = transmute(vld4_s8(transmute(a)));
-    ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val.3 = unsafe { simd_shuffle!(ret_val.3, ret_val.3, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val
+pub fn vmin_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    unsafe {
+        let mask: uint32x2_t = simd_le(a, b);
+        simd_select(mask, a, b)
+    }
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_u8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "Minimum (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_u32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4)
+    assert_instr(umin)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -27114,98 +24692,81 @@ pub unsafe fn vld4_u8(a: *const u8) -> uint8x8x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld4q_u8(a: *const u8) -> uint8x16x4_t {
-    transmute(vld4q_s8(transmute(a)))
+pub fn vminq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    unsafe {
+        let mask: uint32x4_t = simd_le(a, b);
+        simd_select(mask, a, b)
+    }
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_u8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "Floating-point Minimum Number (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminnm_f16)"]
 #[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vminnm))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4)
+    assert_instr(fminnm)
 )]
+#[target_feature(enable = "neon,fp16")]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld4q_u8(a: *const u8) -> uint8x16x4_t {
-    let mut ret_val: uint8x16x4_t = transmute(vld4q_s8(transmute(a)));
-    ret_val.0 = unsafe {
-        simd_shuffle!(
-            ret_val.0,
-            ret_val.0,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    ret_val.1 = unsafe {
-        simd_shuffle!(
-            ret_val.1,
-            ret_val.1,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    ret_val.2 = unsafe {
-        simd_shuffle!(
-            ret_val.2,
-            ret_val.2,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    ret_val.3 = unsafe {
-        simd_shuffle!(
-            ret_val.3,
-            ret_val.3,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    ret_val
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vminnm_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminnm.v4f16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fminnm.v4f16"
+        )]
+        fn _vminnm_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t;
+    }
+    unsafe { _vminnm_f16(a, b) }
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_u16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "Floating-point Minimum Number (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminnmq_f16)"]
 #[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vminnm))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4)
+    assert_instr(fminnm)
 )]
+#[target_feature(enable = "neon,fp16")]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld4_u16(a: *const u16) -> uint16x4x4_t {
-    transmute(vld4_s16(transmute(a)))
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vminnmq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminnm.v8f16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fminnm.v8f16"
+        )]
+        fn _vminnmq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t;
+    }
+    unsafe { _vminnmq_f16(a, b) }
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_u16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "Floating-point Minimum Number (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminnm_f32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vminnm))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4)
+    assert_instr(fminnm)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -27215,26 +24776,26 @@ pub unsafe fn vld4_u16(a: *const u16) -> uint16x4x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld4_u16(a: *const u16) -> uint16x4x4_t {
-    let mut ret_val: uint16x4x4_t = transmute(vld4_s16(transmute(a)));
-    ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]) };
-    ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]) };
-    ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [3, 2, 1, 0]) };
-    ret_val.3 = unsafe { simd_shuffle!(ret_val.3, ret_val.3, [3, 2, 1, 0]) };
-    ret_val
+pub fn vminnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminnm.v2f32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fminnm.v2f32"
+        )]
+        fn _vminnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t;
+    }
+    unsafe { _vminnm_f32(a, b) }
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_u16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "Floating-point Minimum Number (vector)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminnmq_f32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vminnm))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4)
+    assert_instr(fminnm)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -27244,21 +24805,26 @@ pub unsafe fn vld4_u16(a: *const u16) -> uint16x4x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld4q_u16(a: *const u16) -> uint16x8x4_t {
-    transmute(vld4q_s16(transmute(a)))
+pub fn vminnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminnm.v4f32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fminnm.v4f32"
+        )]
+        fn _vminnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t;
+    }
+    unsafe { _vminnmq_f32(a, b) }
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_u16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "Floating-point multiply-add to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_f32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4)
+    assert_instr(fmul)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -27268,26 +24834,18 @@ pub unsafe fn vld4q_u16(a: *const u16) -> uint16x8x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld4q_u16(a: *const u16) -> uint16x8x4_t {
-    let mut ret_val: uint16x8x4_t = transmute(vld4q_s16(transmute(a)));
-    ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val.3 = unsafe { simd_shuffle!(ret_val.3, ret_val.3, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val
+pub fn vmla_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
+    unsafe { simd_add(a, simd_mul(b, c)) }
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_u32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "Floating-point multiply-add to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_f32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4)
+    assert_instr(fmul)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -27297,22 +24855,20 @@ pub unsafe fn vld4q_u16(a: *const u16) -> uint16x8x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld4_u32(a: *const u32) -> uint32x2x4_t {
-    transmute(vld4_s32(transmute(a)))
+pub fn vmlaq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t {
+    unsafe { simd_add(a, simd_mul(b, c)) }
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_u32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_lane_f32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4)
+    assert_instr(fmul, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -27321,27 +24877,25 @@ pub unsafe fn vld4_u32(a: *const u32) -> uint32x2x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld4_u32(a: *const u32) -> uint32x2x4_t {
-    let mut ret_val: uint32x2x4_t = transmute(vld4_s32(transmute(a)));
-    ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [1, 0]) };
-    ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [1, 0]) };
-    ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [1, 0]) };
-    ret_val.3 = unsafe { simd_shuffle!(ret_val.3, ret_val.3, [1, 0]) };
-    ret_val
+pub fn vmla_lane_f32<const LANE: i32>(
+    a: float32x2_t,
+    b: float32x2_t,
+    c: float32x2_t,
+) -> float32x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    vmla_f32(a, b, vdup_lane_f32::<LANE>(c))
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_u32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_laneq_f32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4)
+    assert_instr(fmul, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -27350,22 +24904,25 @@ pub unsafe fn vld4_u32(a: *const u32) -> uint32x2x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld4q_u32(a: *const u32) -> uint32x4x4_t {
-    transmute(vld4q_s32(transmute(a)))
+pub fn vmla_laneq_f32<const LANE: i32>(
+    a: float32x2_t,
+    b: float32x2_t,
+    c: float32x4_t,
+) -> float32x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmla_f32(a, b, vdup_laneq_f32::<LANE>(c))
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_u32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_lane_f32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4)
+    assert_instr(fmul, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -27374,27 +24931,25 @@ pub unsafe fn vld4q_u32(a: *const u32) -> uint32x4x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld4q_u32(a: *const u32) -> uint32x4x4_t {
-    let mut ret_val: uint32x4x4_t = transmute(vld4q_s32(transmute(a)));
-    ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]) };
-    ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]) };
-    ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [3, 2, 1, 0]) };
-    ret_val.3 = unsafe { simd_shuffle!(ret_val.3, ret_val.3, [3, 2, 1, 0]) };
-    ret_val
+pub fn vmlaq_lane_f32<const LANE: i32>(
+    a: float32x4_t,
+    b: float32x4_t,
+    c: float32x2_t,
+) -> float32x4_t {
+    static_assert_uimm_bits!(LANE, 1);
+    vmlaq_f32(a, b, vdupq_lane_f32::<LANE>(c))
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_p8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_laneq_f32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4)
+    assert_instr(fmul, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -27403,22 +24958,25 @@ pub unsafe fn vld4q_u32(a: *const u32) -> uint32x4x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld4_p8(a: *const p8) -> poly8x8x4_t {
-    transmute(vld4_s8(transmute(a)))
+pub fn vmlaq_laneq_f32<const LANE: i32>(
+    a: float32x4_t,
+    b: float32x4_t,
+    c: float32x4_t,
+) -> float32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmlaq_f32(a, b, vdupq_laneq_f32::<LANE>(c))
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_p8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_lane_s16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4)
+    assert_instr(mla, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -27427,27 +24985,21 @@ pub unsafe fn vld4_p8(a: *const p8) -> poly8x8x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld4_p8(a: *const p8) -> poly8x8x4_t {
-    let mut ret_val: poly8x8x4_t = transmute(vld4_s8(transmute(a)));
-    ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val.3 = unsafe { simd_shuffle!(ret_val.3, ret_val.3, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val
+pub fn vmla_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmla_s16(a, b, vdup_lane_s16::<LANE>(c))
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_p8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_lane_u16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4)
+    assert_instr(mla, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -27456,22 +25008,21 @@ pub unsafe fn vld4_p8(a: *const p8) -> poly8x8x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld4q_p8(a: *const p8) -> poly8x16x4_t {
-    transmute(vld4q_s8(transmute(a)))
+pub fn vmla_lane_u16<const LANE: i32>(a: uint16x4_t, b: uint16x4_t, c: uint16x4_t) -> uint16x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmla_u16(a, b, vdup_lane_u16::<LANE>(c))
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_p8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_laneq_s16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4)
+    assert_instr(mla, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -27480,51 +25031,21 @@ pub unsafe fn vld4q_p8(a: *const p8) -> poly8x16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld4q_p8(a: *const p8) -> poly8x16x4_t {
-    let mut ret_val: poly8x16x4_t = transmute(vld4q_s8(transmute(a)));
-    ret_val.0 = unsafe {
-        simd_shuffle!(
-            ret_val.0,
-            ret_val.0,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    ret_val.1 = unsafe {
-        simd_shuffle!(
-            ret_val.1,
-            ret_val.1,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    ret_val.2 = unsafe {
-        simd_shuffle!(
-            ret_val.2,
-            ret_val.2,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    ret_val.3 = unsafe {
-        simd_shuffle!(
-            ret_val.3,
-            ret_val.3,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    };
-    ret_val
+pub fn vmla_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x8_t) -> int16x4_t {
+    static_assert_uimm_bits!(LANE, 3);
+    vmla_s16(a, b, vdup_laneq_s16::<LANE>(c))
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_p16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_laneq_u16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4)
+    assert_instr(mla, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -27533,22 +25054,21 @@ pub unsafe fn vld4q_p8(a: *const p8) -> poly8x16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld4_p16(a: *const p16) -> poly16x4x4_t {
-    transmute(vld4_s16(transmute(a)))
+pub fn vmla_laneq_u16<const LANE: i32>(a: uint16x4_t, b: uint16x4_t, c: uint16x8_t) -> uint16x4_t {
+    static_assert_uimm_bits!(LANE, 3);
+    vmla_u16(a, b, vdup_laneq_u16::<LANE>(c))
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4_p16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_lane_s16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4)
+    assert_instr(mla, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -27557,27 +25077,21 @@ pub unsafe fn vld4_p16(a: *const p16) -> poly16x4x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld4_p16(a: *const p16) -> poly16x4x4_t {
-    let mut ret_val: poly16x4x4_t = transmute(vld4_s16(transmute(a)));
-    ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [3, 2, 1, 0]) };
-    ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [3, 2, 1, 0]) };
-    ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [3, 2, 1, 0]) };
-    ret_val.3 = unsafe { simd_shuffle!(ret_val.3, ret_val.3, [3, 2, 1, 0]) };
-    ret_val
+pub fn vmlaq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int16x4_t) -> int16x8_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmlaq_s16(a, b, vdupq_lane_s16::<LANE>(c))
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_p16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_lane_u16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4)
+    assert_instr(mla, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -27586,22 +25100,21 @@ pub unsafe fn vld4_p16(a: *const p16) -> poly16x4x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld4q_p16(a: *const p16) -> poly16x8x4_t {
-    transmute(vld4q_s16(transmute(a)))
+pub fn vmlaq_lane_u16<const LANE: i32>(a: uint16x8_t, b: uint16x8_t, c: uint16x4_t) -> uint16x8_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmlaq_u16(a, b, vdupq_lane_u16::<LANE>(c))
 }
-#[doc = "Load multiple 4-element structures to four registers"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld4q_p16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_laneq_s16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ld4)
+    assert_instr(mla, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -27610,26 +25123,21 @@ pub unsafe fn vld4q_p16(a: *const p16) -> poly16x8x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vld4q_p16(a: *const p16) -> poly16x8x4_t {
-    let mut ret_val: poly16x8x4_t = transmute(vld4q_s16(transmute(a)));
-    ret_val.0 = unsafe { simd_shuffle!(ret_val.0, ret_val.0, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val.1 = unsafe { simd_shuffle!(ret_val.1, ret_val.1, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val.2 = unsafe { simd_shuffle!(ret_val.2, ret_val.2, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val.3 = unsafe { simd_shuffle!(ret_val.3, ret_val.3, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    ret_val
+pub fn vmlaq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
+    static_assert_uimm_bits!(LANE, 3);
+    vmlaq_s16(a, b, vdupq_laneq_s16::<LANE>(c))
 }
-#[doc = "Store SIMD&FP register (immediate offset)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vldrq_p128)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_laneq_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(mla, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -27638,65 +25146,21 @@ pub unsafe fn vld4q_p16(a: *const p16) -> poly16x8x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub unsafe fn vldrq_p128(a: *const p128) -> p128 {
-    *a
-}
-#[doc = "Maximum (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_f16)"]
-#[inline]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmax)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vmax_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v4f16")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fmax.v4f16"
-        )]
-        fn _vmax_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t;
-    }
-    unsafe { _vmax_f16(a, b) }
-}
-#[doc = "Maximum (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_f16)"]
-#[inline]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmax)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vmaxq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v8f16")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fmax.v8f16"
-        )]
-        fn _vmaxq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t;
-    }
-    unsafe { _vmaxq_f16(a, b) }
+pub fn vmlaq_laneq_u16<const LANE: i32>(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t {
+    static_assert_uimm_bits!(LANE, 3);
+    vmlaq_u16(a, b, vdupq_laneq_u16::<LANE>(c))
 }
-#[doc = "Maximum (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_f32)"]
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_lane_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmax)
+    assert_instr(mla, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -27705,27 +25169,21 @@ pub fn vmaxq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmax_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v2f32")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fmax.v2f32"
-        )]
-        fn _vmax_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t;
-    }
-    unsafe { _vmax_f32(a, b) }
+pub fn vmla_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    vmla_s32(a, b, vdup_lane_s32::<LANE>(c))
 }
-#[doc = "Maximum (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_f32)"]
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_lane_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmax)
+    assert_instr(mla, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -27734,27 +25192,21 @@ pub fn vmax_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmaxq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v4f32")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fmax.v4f32"
-        )]
-        fn _vmaxq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t;
-    }
-    unsafe { _vmaxq_f32(a, b) }
+pub fn vmla_lane_u32<const LANE: i32>(a: uint32x2_t, b: uint32x2_t, c: uint32x2_t) -> uint32x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    vmla_u32(a, b, vdup_lane_u32::<LANE>(c))
 }
-#[doc = "Maximum (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_s8)"]
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_laneq_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smax)
+    assert_instr(mla, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -27763,22 +25215,21 @@ pub fn vmaxq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmax_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    unsafe {
-        let mask: int8x8_t = simd_ge(a, b);
-        simd_select(mask, a, b)
-    }
+pub fn vmla_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int32x4_t) -> int32x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmla_s32(a, b, vdup_laneq_s32::<LANE>(c))
 }
-#[doc = "Maximum (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_s8)"]
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_laneq_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smax)
+    assert_instr(mla, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -27787,22 +25238,21 @@ pub fn vmax_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmaxq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    unsafe {
-        let mask: int8x16_t = simd_ge(a, b);
-        simd_select(mask, a, b)
-    }
+pub fn vmla_laneq_u32<const LANE: i32>(a: uint32x2_t, b: uint32x2_t, c: uint32x4_t) -> uint32x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmla_u32(a, b, vdup_laneq_u32::<LANE>(c))
 }
-#[doc = "Maximum (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_s16)"]
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_lane_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smax)
+    assert_instr(mla, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -27811,22 +25261,21 @@ pub fn vmaxq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmax_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    unsafe {
-        let mask: int16x4_t = simd_ge(a, b);
-        simd_select(mask, a, b)
-    }
+pub fn vmlaq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c: int32x2_t) -> int32x4_t {
+    static_assert_uimm_bits!(LANE, 1);
+    vmlaq_s32(a, b, vdupq_lane_s32::<LANE>(c))
 }
-#[doc = "Maximum (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_s16)"]
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_lane_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smax)
+    assert_instr(mla, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -27835,22 +25284,21 @@ pub fn vmax_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmaxq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    unsafe {
-        let mask: int16x8_t = simd_ge(a, b);
-        simd_select(mask, a, b)
-    }
+pub fn vmlaq_lane_u32<const LANE: i32>(a: uint32x4_t, b: uint32x4_t, c: uint32x2_t) -> uint32x4_t {
+    static_assert_uimm_bits!(LANE, 1);
+    vmlaq_u32(a, b, vdupq_lane_u32::<LANE>(c))
 }
-#[doc = "Maximum (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_s32)"]
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_laneq_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smax)
+    assert_instr(mla, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -27859,22 +25307,21 @@ pub fn vmaxq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmax_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    unsafe {
-        let mask: int32x2_t = simd_ge(a, b);
-        simd_select(mask, a, b)
-    }
+pub fn vmlaq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmlaq_s32(a, b, vdupq_laneq_s32::<LANE>(c))
 }
-#[doc = "Maximum (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_s32)"]
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_laneq_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smax)
+    assert_instr(mla, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -27883,21 +25330,19 @@ pub fn vmax_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmaxq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    unsafe {
-        let mask: int32x4_t = simd_ge(a, b);
-        simd_select(mask, a, b)
-    }
+pub fn vmlaq_laneq_u32<const LANE: i32>(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmlaq_u32(a, b, vdupq_laneq_u32::<LANE>(c))
 }
-#[doc = "Maximum (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_u8)"]
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_n_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umax)
+    assert_instr(fmul)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -27907,21 +25352,18 @@ pub fn vmaxq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmax_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    unsafe {
-        let mask: uint8x8_t = simd_ge(a, b);
-        simd_select(mask, a, b)
-    }
+pub fn vmla_n_f32(a: float32x2_t, b: float32x2_t, c: f32) -> float32x2_t {
+    vmla_f32(a, b, vdup_n_f32(c))
 }
-#[doc = "Maximum (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_u8)"]
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_n_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umax)
+    assert_instr(fmul)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -27931,21 +25373,18 @@ pub fn vmax_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmaxq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
-    unsafe {
-        let mask: uint8x16_t = simd_ge(a, b);
-        simd_select(mask, a, b)
-    }
+pub fn vmlaq_n_f32(a: float32x4_t, b: float32x4_t, c: f32) -> float32x4_t {
+    vmlaq_f32(a, b, vdupq_n_f32(c))
 }
-#[doc = "Maximum (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_u16)"]
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_n_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umax)
+    assert_instr(mla)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -27955,21 +25394,18 @@ pub fn vmaxq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmax_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    unsafe {
-        let mask: uint16x4_t = simd_ge(a, b);
-        simd_select(mask, a, b)
-    }
+pub fn vmla_n_s16(a: int16x4_t, b: int16x4_t, c: i16) -> int16x4_t {
+    vmla_s16(a, b, vdup_n_s16(c))
 }
-#[doc = "Maximum (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_u16)"]
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_n_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umax)
+    assert_instr(mla)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -27979,21 +25415,18 @@ pub fn vmax_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmaxq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    unsafe {
-        let mask: uint16x8_t = simd_ge(a, b);
-        simd_select(mask, a, b)
-    }
+pub fn vmlaq_n_s16(a: int16x8_t, b: int16x8_t, c: i16) -> int16x8_t {
+    vmlaq_s16(a, b, vdupq_n_s16(c))
 }
-#[doc = "Maximum (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_u32)"]
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_n_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umax)
+    assert_instr(mla)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -28003,21 +25436,18 @@ pub fn vmaxq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmax_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    unsafe {
-        let mask: uint32x2_t = simd_ge(a, b);
-        simd_select(mask, a, b)
-    }
+pub fn vmla_n_u16(a: uint16x4_t, b: uint16x4_t, c: u16) -> uint16x4_t {
+    vmla_u16(a, b, vdup_n_u16(c))
 }
-#[doc = "Maximum (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxq_u32)"]
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_n_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umax)
+    assert_instr(mla)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -28027,51 +25457,18 @@ pub fn vmax_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmaxq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    unsafe {
-        let mask: uint32x4_t = simd_ge(a, b);
-        simd_select(mask, a, b)
-    }
-}
-#[doc = "Floating-point Maximum Number (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnm_f16)"]
-#[inline]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmaxnm))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmaxnm)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vmaxnm_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
-    unsafe { simd_fmax(a, b) }
-}
-#[doc = "Floating-point Maximum Number (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnmq_f16)"]
-#[inline]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmaxnm))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmaxnm)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vmaxnmq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
-    unsafe { simd_fmax(a, b) }
+pub fn vmlaq_n_u16(a: uint16x8_t, b: uint16x8_t, c: u16) -> uint16x8_t {
+    vmlaq_u16(a, b, vdupq_n_u16(c))
 }
-#[doc = "Floating-point Maximum Number (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnm_f32)"]
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_n_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmaxnm))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmaxnm)
+    assert_instr(mla)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -28081,18 +25478,18 @@ pub fn vmaxnmq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmaxnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
-    unsafe { simd_fmax(a, b) }
+pub fn vmla_n_s32(a: int32x2_t, b: int32x2_t, c: i32) -> int32x2_t {
+    vmla_s32(a, b, vdup_n_s32(c))
 }
-#[doc = "Floating-point Maximum Number (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmaxnmq_f32)"]
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_n_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmaxnm))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmaxnm)
+    assert_instr(mla)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -28102,64 +25499,18 @@ pub fn vmaxnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmaxnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
-    unsafe { simd_fmax(a, b) }
-}
-#[doc = "Minimum (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_f16)"]
-#[inline]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmin)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vmin_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v4f16")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fmin.v4f16"
-        )]
-        fn _vmin_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t;
-    }
-    unsafe { _vmin_f16(a, b) }
-}
-#[doc = "Minimum (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_f16)"]
-#[inline]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmin)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vminq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v8f16")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fmin.v8f16"
-        )]
-        fn _vminq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t;
-    }
-    unsafe { _vminq_f16(a, b) }
+pub fn vmlaq_n_s32(a: int32x4_t, b: int32x4_t, c: i32) -> int32x4_t {
+    vmlaq_s32(a, b, vdupq_n_s32(c))
 }
-#[doc = "Minimum (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_f32)"]
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_n_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmin)
+    assert_instr(mla)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -28169,26 +25520,18 @@ pub fn vminq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmin_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v2f32")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fmin.v2f32"
-        )]
-        fn _vmin_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t;
-    }
-    unsafe { _vmin_f32(a, b) }
+pub fn vmla_n_u32(a: uint32x2_t, b: uint32x2_t, c: u32) -> uint32x2_t {
+    vmla_u32(a, b, vdup_n_u32(c))
 }
-#[doc = "Minimum (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_f32)"]
+#[doc = "Vector multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_n_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmin)
+    assert_instr(mla)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -28198,26 +25541,18 @@ pub fn vmin_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vminq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v4f32")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fmin.v4f32"
-        )]
-        fn _vminq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t;
-    }
-    unsafe { _vminq_f32(a, b) }
+pub fn vmlaq_n_u32(a: uint32x4_t, b: uint32x4_t, c: u32) -> uint32x4_t {
+    vmlaq_u32(a, b, vdupq_n_u32(c))
 }
-#[doc = "Minimum (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_s8)"]
+#[doc = "Multiply-add to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smin)
+    assert_instr(mla)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -28227,21 +25562,18 @@ pub fn vminq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmin_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    unsafe {
-        let mask: int8x8_t = simd_le(a, b);
-        simd_select(mask, a, b)
-    }
+pub fn vmla_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t {
+    unsafe { simd_add(a, simd_mul(b, c)) }
 }
-#[doc = "Minimum (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_s8)"]
+#[doc = "Multiply-add to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smin)
+    assert_instr(mla)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -28251,21 +25583,18 @@ pub fn vmin_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vminq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    unsafe {
-        let mask: int8x16_t = simd_le(a, b);
-        simd_select(mask, a, b)
-    }
+pub fn vmlaq_s8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t {
+    unsafe { simd_add(a, simd_mul(b, c)) }
 }
-#[doc = "Minimum (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_s16)"]
+#[doc = "Multiply-add to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smin)
+    assert_instr(mla)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -28275,21 +25604,18 @@ pub fn vminq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmin_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    unsafe {
-        let mask: int16x4_t = simd_le(a, b);
-        simd_select(mask, a, b)
-    }
+pub fn vmla_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t {
+    unsafe { simd_add(a, simd_mul(b, c)) }
 }
-#[doc = "Minimum (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_s16)"]
+#[doc = "Multiply-add to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smin)
+    assert_instr(mla)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -28299,21 +25625,18 @@ pub fn vmin_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vminq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    unsafe {
-        let mask: int16x8_t = simd_le(a, b);
-        simd_select(mask, a, b)
-    }
+pub fn vmlaq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
+    unsafe { simd_add(a, simd_mul(b, c)) }
 }
-#[doc = "Minimum (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_s32)"]
+#[doc = "Multiply-add to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smin)
+    assert_instr(mla)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -28323,21 +25646,18 @@ pub fn vminq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmin_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    unsafe {
-        let mask: int32x2_t = simd_le(a, b);
-        simd_select(mask, a, b)
-    }
+pub fn vmla_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t {
+    unsafe { simd_add(a, simd_mul(b, c)) }
 }
-#[doc = "Minimum (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_s32)"]
+#[doc = "Multiply-add to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smin)
+    assert_instr(mla)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -28347,21 +25667,18 @@ pub fn vmin_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vminq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    unsafe {
-        let mask: int32x4_t = simd_le(a, b);
-        simd_select(mask, a, b)
-    }
+pub fn vmlaq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
+    unsafe { simd_add(a, simd_mul(b, c)) }
 }
-#[doc = "Minimum (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_u8)"]
+#[doc = "Multiply-add to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umin)
+    assert_instr(mla)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -28371,21 +25688,18 @@ pub fn vminq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmin_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    unsafe {
-        let mask: uint8x8_t = simd_le(a, b);
-        simd_select(mask, a, b)
-    }
+pub fn vmla_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t {
+    unsafe { simd_add(a, simd_mul(b, c)) }
 }
-#[doc = "Minimum (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_u8)"]
+#[doc = "Multiply-add to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umin)
+    assert_instr(mla)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -28395,21 +25709,18 @@ pub fn vmin_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vminq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
-    unsafe {
-        let mask: uint8x16_t = simd_le(a, b);
-        simd_select(mask, a, b)
-    }
+pub fn vmlaq_u8(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_t {
+    unsafe { simd_add(a, simd_mul(b, c)) }
 }
-#[doc = "Minimum (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_u16)"]
+#[doc = "Multiply-add to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umin)
+    assert_instr(mla)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -28419,21 +25730,18 @@ pub fn vminq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmin_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    unsafe {
-        let mask: uint16x4_t = simd_le(a, b);
-        simd_select(mask, a, b)
-    }
+pub fn vmla_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x4_t) -> uint16x4_t {
+    unsafe { simd_add(a, simd_mul(b, c)) }
 }
-#[doc = "Minimum (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_u16)"]
+#[doc = "Multiply-add to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umin)
+    assert_instr(mla)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -28443,21 +25751,18 @@ pub fn vmin_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vminq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    unsafe {
-        let mask: uint16x8_t = simd_le(a, b);
-        simd_select(mask, a, b)
-    }
+pub fn vmlaq_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t {
+    unsafe { simd_add(a, simd_mul(b, c)) }
 }
-#[doc = "Minimum (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmin_u32)"]
+#[doc = "Multiply-add to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umin)
+    assert_instr(mla)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -28467,21 +25772,18 @@ pub fn vminq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmin_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    unsafe {
-        let mask: uint32x2_t = simd_le(a, b);
-        simd_select(mask, a, b)
-    }
+pub fn vmla_u32(a: uint32x2_t, b: uint32x2_t, c: uint32x2_t) -> uint32x2_t {
+    unsafe { simd_add(a, simd_mul(b, c)) }
 }
-#[doc = "Minimum (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminq_u32)"]
+#[doc = "Multiply-add to accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umin)
+    assert_instr(mla)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -28491,52 +25793,20 @@ pub fn vmin_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vminq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    unsafe {
-        let mask: uint32x4_t = simd_le(a, b);
-        simd_select(mask, a, b)
-    }
-}
-#[doc = "Floating-point Minimum Number (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminnm_f16)"]
-#[inline]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vminnm))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fminnm)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vminnm_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
-    unsafe { simd_fmin(a, b) }
-}
-#[doc = "Floating-point Minimum Number (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminnmq_f16)"]
-#[inline]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vminnm))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fminnm)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vminnmq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
-    unsafe { simd_fmin(a, b) }
+pub fn vmlaq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
+    unsafe { simd_add(a, simd_mul(b, c)) }
 }
-#[doc = "Floating-point Minimum Number (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminnm_f32)"]
+#[doc = "Vector widening multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_lane_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vminnm))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s16", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fminnm)
+    assert_instr(smlal, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -28545,19 +25815,21 @@ pub fn vminnmq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vminnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
-    unsafe { simd_fmin(a, b) }
+pub fn vmlal_lane_s16<const LANE: i32>(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmlal_s16(a, b, vdup_lane_s16::<LANE>(c))
 }
-#[doc = "Floating-point Minimum Number (vector)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vminnmq_f32)"]
+#[doc = "Vector widening multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_laneq_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vminnm))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s16", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fminnm)
+    assert_instr(smlal, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -28566,19 +25838,21 @@ pub fn vminnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vminnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
-    unsafe { simd_fmin(a, b) }
+pub fn vmlal_laneq_s16<const LANE: i32>(a: int32x4_t, b: int16x4_t, c: int16x8_t) -> int32x4_t {
+    static_assert_uimm_bits!(LANE, 3);
+    vmlal_s16(a, b, vdup_laneq_s16::<LANE>(c))
 }
-#[doc = "Floating-point multiply-add to accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_f32)"]
+#[doc = "Vector widening multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_lane_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmul)
+    assert_instr(smlal, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -28587,19 +25861,21 @@ pub fn vminnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmla_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
-    unsafe { simd_add(a, simd_mul(b, c)) }
+pub fn vmlal_lane_s32<const LANE: i32>(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    vmlal_s32(a, b, vdup_lane_s32::<LANE>(c))
 }
-#[doc = "Floating-point multiply-add to accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_f32)"]
+#[doc = "Vector widening multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_laneq_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmul)
+    assert_instr(smlal, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -28608,18 +25884,19 @@ pub fn vmla_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlaq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t {
-    unsafe { simd_add(a, simd_mul(b, c)) }
+pub fn vmlal_laneq_s32<const LANE: i32>(a: int64x2_t, b: int32x2_t, c: int32x4_t) -> int64x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmlal_s32(a, b, vdup_laneq_s32::<LANE>(c))
 }
-#[doc = "Vector multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_lane_f32)"]
+#[doc = "Vector widening multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_lane_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u16", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmul, LANE = 1)
+    assert_instr(umlal, LANE = 1)
 )]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(
@@ -28630,23 +25907,19 @@ pub fn vmlaq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmla_lane_f32<const LANE: i32>(
-    a: float32x2_t,
-    b: float32x2_t,
-    c: float32x2_t,
-) -> float32x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    unsafe { vmla_f32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32])) }
+pub fn vmlal_lane_u16<const LANE: i32>(a: uint32x4_t, b: uint16x4_t, c: uint16x4_t) -> uint32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmlal_u16(a, b, vdup_lane_u16::<LANE>(c))
 }
-#[doc = "Vector multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_laneq_f32)"]
+#[doc = "Vector widening multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_laneq_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u16", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmul, LANE = 1)
+    assert_instr(umlal, LANE = 1)
 )]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(
@@ -28657,23 +25930,19 @@ pub fn vmla_lane_f32<const LANE: i32>(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmla_laneq_f32<const LANE: i32>(
-    a: float32x2_t,
-    b: float32x2_t,
-    c: float32x4_t,
-) -> float32x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe { vmla_f32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32])) }
+pub fn vmlal_laneq_u16<const LANE: i32>(a: uint32x4_t, b: uint16x4_t, c: uint16x8_t) -> uint32x4_t {
+    static_assert_uimm_bits!(LANE, 3);
+    vmlal_u16(a, b, vdup_laneq_u16::<LANE>(c))
 }
-#[doc = "Vector multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_lane_f32)"]
+#[doc = "Vector widening multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_lane_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmul, LANE = 1)
+    assert_instr(umlal, LANE = 1)
 )]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(
@@ -28684,29 +25953,19 @@ pub fn vmla_laneq_f32<const LANE: i32>(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlaq_lane_f32<const LANE: i32>(
-    a: float32x4_t,
-    b: float32x4_t,
-    c: float32x2_t,
-) -> float32x4_t {
+pub fn vmlal_lane_u32<const LANE: i32>(a: uint64x2_t, b: uint32x2_t, c: uint32x2_t) -> uint64x2_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        vmlaq_f32(
-            a,
-            b,
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+    vmlal_u32(a, b, vdup_lane_u32::<LANE>(c))
 }
-#[doc = "Vector multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_laneq_f32)"]
+#[doc = "Vector widening multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_laneq_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmul, LANE = 1)
+    assert_instr(umlal, LANE = 1)
 )]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(
@@ -28717,31 +25976,20 @@ pub fn vmlaq_lane_f32<const LANE: i32>(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlaq_laneq_f32<const LANE: i32>(
-    a: float32x4_t,
-    b: float32x4_t,
-    c: float32x4_t,
-) -> float32x4_t {
+pub fn vmlal_laneq_u32<const LANE: i32>(a: uint64x2_t, b: uint32x2_t, c: uint32x4_t) -> uint64x2_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        vmlaq_f32(
-            a,
-            b,
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+    vmlal_u32(a, b, vdup_laneq_u32::<LANE>(c))
 }
-#[doc = "Vector multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_lane_s16)"]
+#[doc = "Vector widening multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_n_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla, LANE = 1)
+    assert_instr(smlal)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -28750,27 +25998,19 @@ pub fn vmlaq_laneq_f32<const LANE: i32>(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmla_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        vmla_s16(
-            a,
-            b,
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+pub fn vmlal_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t {
+    vmlal_s16(a, b, vdup_n_s16(c))
 }
-#[doc = "Vector multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_lane_u16)"]
+#[doc = "Vector widening multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_n_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla, LANE = 1)
+    assert_instr(smlal)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -28779,27 +26019,19 @@ pub fn vmla_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x4_t)
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmla_lane_u16<const LANE: i32>(a: uint16x4_t, b: uint16x4_t, c: uint16x4_t) -> uint16x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        vmla_u16(
-            a,
-            b,
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+pub fn vmlal_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t {
+    vmlal_s32(a, b, vdup_n_s32(c))
 }
-#[doc = "Vector multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_laneq_s16)"]
+#[doc = "Vector widening multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_n_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla, LANE = 1)
+    assert_instr(umlal)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -28808,27 +26040,19 @@ pub fn vmla_lane_u16<const LANE: i32>(a: uint16x4_t, b: uint16x4_t, c: uint16x4_
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmla_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x8_t) -> int16x4_t {
-    static_assert_uimm_bits!(LANE, 3);
-    unsafe {
-        vmla_s16(
-            a,
-            b,
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+pub fn vmlal_n_u16(a: uint32x4_t, b: uint16x4_t, c: u16) -> uint32x4_t {
+    vmlal_u16(a, b, vdup_n_u16(c))
 }
-#[doc = "Vector multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_laneq_u16)"]
+#[doc = "Vector widening multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_n_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla, LANE = 1)
+    assert_instr(umlal)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -28837,27 +26061,19 @@ pub fn vmla_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x8_t)
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmla_laneq_u16<const LANE: i32>(a: uint16x4_t, b: uint16x4_t, c: uint16x8_t) -> uint16x4_t {
-    static_assert_uimm_bits!(LANE, 3);
-    unsafe {
-        vmla_u16(
-            a,
-            b,
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+pub fn vmlal_n_u32(a: uint64x2_t, b: uint32x2_t, c: u32) -> uint64x2_t {
+    vmlal_u32(a, b, vdup_n_u32(c))
 }
-#[doc = "Vector multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_lane_s16)"]
+#[doc = "Signed multiply-add long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla, LANE = 1)
+    assert_instr(smlal)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -28866,40 +26082,19 @@ pub fn vmla_laneq_u16<const LANE: i32>(a: uint16x4_t, b: uint16x4_t, c: uint16x8
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlaq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int16x4_t) -> int16x8_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        vmlaq_s16(
-            a,
-            b,
-            simd_shuffle!(
-                c,
-                c,
-                [
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32
-                ]
-            ),
-        )
-    }
+pub fn vmlal_s8(a: int16x8_t, b: int8x8_t, c: int8x8_t) -> int16x8_t {
+    unsafe { simd_add(a, vmull_s8(b, c)) }
 }
-#[doc = "Vector multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_lane_u16)"]
+#[doc = "Signed multiply-add long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla, LANE = 1)
+    assert_instr(smlal)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -28908,40 +26103,19 @@ pub fn vmlaq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int16x4_t)
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlaq_lane_u16<const LANE: i32>(a: uint16x8_t, b: uint16x8_t, c: uint16x4_t) -> uint16x8_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        vmlaq_u16(
-            a,
-            b,
-            simd_shuffle!(
-                c,
-                c,
-                [
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32
-                ]
-            ),
-        )
-    }
+pub fn vmlal_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t {
+    unsafe { simd_add(a, vmull_s16(b, c)) }
 }
-#[doc = "Vector multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_laneq_s16)"]
+#[doc = "Signed multiply-add long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla, LANE = 1)
+    assert_instr(smlal)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -28950,40 +26124,19 @@ pub fn vmlaq_lane_u16<const LANE: i32>(a: uint16x8_t, b: uint16x8_t, c: uint16x4
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlaq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
-    static_assert_uimm_bits!(LANE, 3);
-    unsafe {
-        vmlaq_s16(
-            a,
-            b,
-            simd_shuffle!(
-                c,
-                c,
-                [
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32
-                ]
-            ),
-        )
-    }
+pub fn vmlal_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t {
+    unsafe { simd_add(a, vmull_s32(b, c)) }
 }
-#[doc = "Vector multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_laneq_u16)"]
+#[doc = "Unsigned multiply-add long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla, LANE = 1)
+    assert_instr(umlal)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -28992,40 +26145,19 @@ pub fn vmlaq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int16x8_t
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlaq_laneq_u16<const LANE: i32>(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t {
-    static_assert_uimm_bits!(LANE, 3);
-    unsafe {
-        vmlaq_u16(
-            a,
-            b,
-            simd_shuffle!(
-                c,
-                c,
-                [
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32
-                ]
-            ),
-        )
-    }
+pub fn vmlal_u8(a: uint16x8_t, b: uint8x8_t, c: uint8x8_t) -> uint16x8_t {
+    unsafe { simd_add(a, vmull_u8(b, c)) }
 }
-#[doc = "Vector multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_lane_s32)"]
+#[doc = "Unsigned multiply-add long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla, LANE = 1)
+    assert_instr(umlal)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -29034,21 +26166,19 @@ pub fn vmlaq_laneq_u16<const LANE: i32>(a: uint16x8_t, b: uint16x8_t, c: uint16x
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmla_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    unsafe { vmla_s32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32])) }
+pub fn vmlal_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x4_t) -> uint32x4_t {
+    unsafe { simd_add(a, vmull_u16(b, c)) }
 }
-#[doc = "Vector multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_lane_u32)"]
+#[doc = "Unsigned multiply-add long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla, LANE = 1)
+    assert_instr(umlal)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -29057,21 +26187,19 @@ pub fn vmla_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int32x2_t)
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmla_lane_u32<const LANE: i32>(a: uint32x2_t, b: uint32x2_t, c: uint32x2_t) -> uint32x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    unsafe { vmla_u32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32])) }
+pub fn vmlal_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x2_t) -> uint64x2_t {
+    unsafe { simd_add(a, vmull_u32(b, c)) }
 }
-#[doc = "Vector multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_laneq_s32)"]
+#[doc = "Floating-point multiply-subtract from accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla, LANE = 1)
+    assert_instr(fmul)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -29080,21 +26208,19 @@ pub fn vmla_lane_u32<const LANE: i32>(a: uint32x2_t, b: uint32x2_t, c: uint32x2_
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmla_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int32x4_t) -> int32x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe { vmla_s32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32])) }
+pub fn vmls_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
+    unsafe { simd_sub(a, simd_mul(b, c)) }
 }
-#[doc = "Vector multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_laneq_u32)"]
+#[doc = "Floating-point multiply-subtract from accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla, LANE = 1)
+    assert_instr(fmul)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -29103,19 +26229,18 @@ pub fn vmla_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int32x4_t)
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmla_laneq_u32<const LANE: i32>(a: uint32x2_t, b: uint32x2_t, c: uint32x4_t) -> uint32x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe { vmla_u32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32])) }
+pub fn vmlsq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t {
+    unsafe { simd_sub(a, simd_mul(b, c)) }
 }
-#[doc = "Vector multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_lane_s32)"]
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_lane_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla, LANE = 1)
+    assert_instr(fmul, LANE = 1)
 )]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(
@@ -29126,25 +26251,23 @@ pub fn vmla_laneq_u32<const LANE: i32>(a: uint32x2_t, b: uint32x2_t, c: uint32x4
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlaq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c: int32x2_t) -> int32x4_t {
+pub fn vmls_lane_f32<const LANE: i32>(
+    a: float32x2_t,
+    b: float32x2_t,
+    c: float32x2_t,
+) -> float32x2_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        vmlaq_s32(
-            a,
-            b,
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+    vmls_f32(a, b, vdup_lane_f32::<LANE>(c))
 }
-#[doc = "Vector multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_lane_u32)"]
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_laneq_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla, LANE = 1)
+    assert_instr(fmul, LANE = 1)
 )]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(
@@ -29155,25 +26278,23 @@ pub fn vmlaq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c: int32x2_t)
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlaq_lane_u32<const LANE: i32>(a: uint32x4_t, b: uint32x4_t, c: uint32x2_t) -> uint32x4_t {
-    static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        vmlaq_u32(
-            a,
-            b,
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+pub fn vmls_laneq_f32<const LANE: i32>(
+    a: float32x2_t,
+    b: float32x2_t,
+    c: float32x4_t,
+) -> float32x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmls_f32(a, b, vdup_laneq_f32::<LANE>(c))
 }
-#[doc = "Vector multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_laneq_s32)"]
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_lane_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla, LANE = 1)
+    assert_instr(fmul, LANE = 1)
 )]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(
@@ -29184,25 +26305,23 @@ pub fn vmlaq_lane_u32<const LANE: i32>(a: uint32x4_t, b: uint32x4_t, c: uint32x2
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlaq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        vmlaq_s32(
-            a,
-            b,
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+pub fn vmlsq_lane_f32<const LANE: i32>(
+    a: float32x4_t,
+    b: float32x4_t,
+    c: float32x2_t,
+) -> float32x4_t {
+    static_assert_uimm_bits!(LANE, 1);
+    vmlsq_f32(a, b, vdupq_lane_f32::<LANE>(c))
 }
-#[doc = "Vector multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_laneq_u32)"]
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_laneq_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla, LANE = 1)
+    assert_instr(fmul, LANE = 1)
 )]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(
@@ -29213,26 +26332,25 @@ pub fn vmlaq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c: int32x4_t
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlaq_laneq_u32<const LANE: i32>(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
+pub fn vmlsq_laneq_f32<const LANE: i32>(
+    a: float32x4_t,
+    b: float32x4_t,
+    c: float32x4_t,
+) -> float32x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        vmlaq_u32(
-            a,
-            b,
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+    vmlsq_f32(a, b, vdupq_laneq_f32::<LANE>(c))
 }
-#[doc = "Vector multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_n_f32)"]
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_lane_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmul)
+    assert_instr(mls, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -29241,19 +26359,21 @@ pub fn vmlaq_laneq_u32<const LANE: i32>(a: uint32x4_t, b: uint32x4_t, c: uint32x
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmla_n_f32(a: float32x2_t, b: float32x2_t, c: f32) -> float32x2_t {
-    vmla_f32(a, b, vdup_n_f32(c))
+pub fn vmls_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmls_s16(a, b, vdup_lane_s16::<LANE>(c))
 }
-#[doc = "Vector multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_n_f32)"]
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_lane_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmul)
+    assert_instr(mls, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -29262,19 +26382,21 @@ pub fn vmla_n_f32(a: float32x2_t, b: float32x2_t, c: f32) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlaq_n_f32(a: float32x4_t, b: float32x4_t, c: f32) -> float32x4_t {
-    vmlaq_f32(a, b, vdupq_n_f32(c))
+pub fn vmls_lane_u16<const LANE: i32>(a: uint16x4_t, b: uint16x4_t, c: uint16x4_t) -> uint16x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmls_u16(a, b, vdup_lane_u16::<LANE>(c))
 }
-#[doc = "Vector multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_n_s16)"]
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_laneq_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla)
+    assert_instr(mls, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -29283,19 +26405,21 @@ pub fn vmlaq_n_f32(a: float32x4_t, b: float32x4_t, c: f32) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmla_n_s16(a: int16x4_t, b: int16x4_t, c: i16) -> int16x4_t {
-    vmla_s16(a, b, vdup_n_s16(c))
+pub fn vmls_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x8_t) -> int16x4_t {
+    static_assert_uimm_bits!(LANE, 3);
+    vmls_s16(a, b, vdup_laneq_s16::<LANE>(c))
 }
-#[doc = "Vector multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_n_s16)"]
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_laneq_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla)
+    assert_instr(mls, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -29304,19 +26428,21 @@ pub fn vmla_n_s16(a: int16x4_t, b: int16x4_t, c: i16) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlaq_n_s16(a: int16x8_t, b: int16x8_t, c: i16) -> int16x8_t {
-    vmlaq_s16(a, b, vdupq_n_s16(c))
+pub fn vmls_laneq_u16<const LANE: i32>(a: uint16x4_t, b: uint16x4_t, c: uint16x8_t) -> uint16x4_t {
+    static_assert_uimm_bits!(LANE, 3);
+    vmls_u16(a, b, vdup_laneq_u16::<LANE>(c))
 }
-#[doc = "Vector multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_n_u16)"]
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_lane_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla)
+    assert_instr(mls, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -29325,19 +26451,21 @@ pub fn vmlaq_n_s16(a: int16x8_t, b: int16x8_t, c: i16) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmla_n_u16(a: uint16x4_t, b: uint16x4_t, c: u16) -> uint16x4_t {
-    vmla_u16(a, b, vdup_n_u16(c))
+pub fn vmlsq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int16x4_t) -> int16x8_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmlsq_s16(a, b, vdupq_lane_s16::<LANE>(c))
 }
-#[doc = "Vector multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_n_u16)"]
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_lane_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla)
+    assert_instr(mls, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -29346,19 +26474,21 @@ pub fn vmla_n_u16(a: uint16x4_t, b: uint16x4_t, c: u16) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlaq_n_u16(a: uint16x8_t, b: uint16x8_t, c: u16) -> uint16x8_t {
-    vmlaq_u16(a, b, vdupq_n_u16(c))
+pub fn vmlsq_lane_u16<const LANE: i32>(a: uint16x8_t, b: uint16x8_t, c: uint16x4_t) -> uint16x8_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmlsq_u16(a, b, vdupq_lane_u16::<LANE>(c))
 }
-#[doc = "Vector multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_n_s32)"]
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_laneq_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla)
+    assert_instr(mls, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -29367,19 +26497,21 @@ pub fn vmlaq_n_u16(a: uint16x8_t, b: uint16x8_t, c: u16) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmla_n_s32(a: int32x2_t, b: int32x2_t, c: i32) -> int32x2_t {
-    vmla_s32(a, b, vdup_n_s32(c))
+pub fn vmlsq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
+    static_assert_uimm_bits!(LANE, 3);
+    vmlsq_s16(a, b, vdupq_laneq_s16::<LANE>(c))
 }
-#[doc = "Vector multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_n_s32)"]
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_laneq_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla)
+    assert_instr(mls, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -29388,19 +26520,21 @@ pub fn vmla_n_s32(a: int32x2_t, b: int32x2_t, c: i32) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlaq_n_s32(a: int32x4_t, b: int32x4_t, c: i32) -> int32x4_t {
-    vmlaq_s32(a, b, vdupq_n_s32(c))
+pub fn vmlsq_laneq_u16<const LANE: i32>(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t {
+    static_assert_uimm_bits!(LANE, 3);
+    vmlsq_u16(a, b, vdupq_laneq_u16::<LANE>(c))
 }
-#[doc = "Vector multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_n_u32)"]
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_lane_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla)
+    assert_instr(mls, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -29409,19 +26543,21 @@ pub fn vmlaq_n_s32(a: int32x4_t, b: int32x4_t, c: i32) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmla_n_u32(a: uint32x2_t, b: uint32x2_t, c: u32) -> uint32x2_t {
-    vmla_u32(a, b, vdup_n_u32(c))
+pub fn vmls_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    vmls_s32(a, b, vdup_lane_s32::<LANE>(c))
 }
-#[doc = "Vector multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_n_u32)"]
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_lane_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla)
+    assert_instr(mls, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -29430,19 +26566,21 @@ pub fn vmla_n_u32(a: uint32x2_t, b: uint32x2_t, c: u32) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlaq_n_u32(a: uint32x4_t, b: uint32x4_t, c: u32) -> uint32x4_t {
-    vmlaq_u32(a, b, vdupq_n_u32(c))
+pub fn vmls_lane_u32<const LANE: i32>(a: uint32x2_t, b: uint32x2_t, c: uint32x2_t) -> uint32x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    vmls_u32(a, b, vdup_lane_u32::<LANE>(c))
 }
-#[doc = "Multiply-add to accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_s8)"]
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_laneq_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla)
+    assert_instr(mls, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -29451,19 +26589,21 @@ pub fn vmlaq_n_u32(a: uint32x4_t, b: uint32x4_t, c: u32) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmla_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t {
-    unsafe { simd_add(a, simd_mul(b, c)) }
+pub fn vmls_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int32x4_t) -> int32x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmls_s32(a, b, vdup_laneq_s32::<LANE>(c))
 }
-#[doc = "Multiply-add to accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_s8)"]
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_laneq_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla)
+    assert_instr(mls, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -29472,19 +26612,21 @@ pub fn vmla_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlaq_s8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t {
-    unsafe { simd_add(a, simd_mul(b, c)) }
+pub fn vmls_laneq_u32<const LANE: i32>(a: uint32x2_t, b: uint32x2_t, c: uint32x4_t) -> uint32x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmls_u32(a, b, vdup_laneq_u32::<LANE>(c))
 }
-#[doc = "Multiply-add to accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_s16)"]
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_lane_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla)
+    assert_instr(mls, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -29493,19 +26635,21 @@ pub fn vmlaq_s8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmla_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t {
-    unsafe { simd_add(a, simd_mul(b, c)) }
+pub fn vmlsq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c: int32x2_t) -> int32x4_t {
+    static_assert_uimm_bits!(LANE, 1);
+    vmlsq_s32(a, b, vdupq_lane_s32::<LANE>(c))
 }
-#[doc = "Multiply-add to accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_s16)"]
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_lane_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla)
+    assert_instr(mls, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -29514,19 +26658,21 @@ pub fn vmla_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlaq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
-    unsafe { simd_add(a, simd_mul(b, c)) }
+pub fn vmlsq_lane_u32<const LANE: i32>(a: uint32x4_t, b: uint32x4_t, c: uint32x2_t) -> uint32x4_t {
+    static_assert_uimm_bits!(LANE, 1);
+    vmlsq_u32(a, b, vdupq_lane_u32::<LANE>(c))
 }
-#[doc = "Multiply-add to accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_s32)"]
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_laneq_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla)
+    assert_instr(mls, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -29535,19 +26681,21 @@ pub fn vmlaq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmla_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t {
-    unsafe { simd_add(a, simd_mul(b, c)) }
+pub fn vmlsq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmlsq_s32(a, b, vdupq_laneq_s32::<LANE>(c))
 }
-#[doc = "Multiply-add to accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_s32)"]
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_laneq_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla)
+    assert_instr(mls, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -29556,18 +26704,19 @@ pub fn vmla_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlaq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
-    unsafe { simd_add(a, simd_mul(b, c)) }
+pub fn vmlsq_laneq_u32<const LANE: i32>(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmlsq_u32(a, b, vdupq_laneq_u32::<LANE>(c))
 }
-#[doc = "Multiply-add to accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_u8)"]
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_n_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla)
+    assert_instr(fmul)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -29577,18 +26726,18 @@ pub fn vmlaq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmla_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t {
-    unsafe { simd_add(a, simd_mul(b, c)) }
+pub fn vmls_n_f32(a: float32x2_t, b: float32x2_t, c: f32) -> float32x2_t {
+    vmls_f32(a, b, vdup_n_f32(c))
 }
-#[doc = "Multiply-add to accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_u8)"]
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_n_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla)
+    assert_instr(fmul)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -29598,18 +26747,18 @@ pub fn vmla_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlaq_u8(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_t {
-    unsafe { simd_add(a, simd_mul(b, c)) }
+pub fn vmlsq_n_f32(a: float32x4_t, b: float32x4_t, c: f32) -> float32x4_t {
+    vmlsq_f32(a, b, vdupq_n_f32(c))
 }
-#[doc = "Multiply-add to accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_u16)"]
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_n_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla)
+    assert_instr(mls)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -29619,18 +26768,18 @@ pub fn vmlaq_u8(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmla_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x4_t) -> uint16x4_t {
-    unsafe { simd_add(a, simd_mul(b, c)) }
+pub fn vmls_n_s16(a: int16x4_t, b: int16x4_t, c: i16) -> int16x4_t {
+    vmls_s16(a, b, vdup_n_s16(c))
 }
-#[doc = "Multiply-add to accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_u16)"]
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_n_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla)
+    assert_instr(mls)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -29640,18 +26789,18 @@ pub fn vmla_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x4_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlaq_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t {
-    unsafe { simd_add(a, simd_mul(b, c)) }
+pub fn vmlsq_n_s16(a: int16x8_t, b: int16x8_t, c: i16) -> int16x8_t {
+    vmlsq_s16(a, b, vdupq_n_s16(c))
 }
-#[doc = "Multiply-add to accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmla_u32)"]
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_n_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla)
+    assert_instr(mls)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -29661,18 +26810,18 @@ pub fn vmlaq_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmla_u32(a: uint32x2_t, b: uint32x2_t, c: uint32x2_t) -> uint32x2_t {
-    unsafe { simd_add(a, simd_mul(b, c)) }
+pub fn vmls_n_u16(a: uint16x4_t, b: uint16x4_t, c: u16) -> uint16x4_t {
+    vmls_u16(a, b, vdup_n_u16(c))
 }
-#[doc = "Multiply-add to accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlaq_u32)"]
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_n_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mla)
+    assert_instr(mls)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -29682,20 +26831,19 @@ pub fn vmla_u32(a: uint32x2_t, b: uint32x2_t, c: uint32x2_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlaq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
-    unsafe { simd_add(a, simd_mul(b, c)) }
+pub fn vmlsq_n_u16(a: uint16x8_t, b: uint16x8_t, c: u16) -> uint16x8_t {
+    vmlsq_u16(a, b, vdupq_n_u16(c))
 }
-#[doc = "Vector widening multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_lane_s16)"]
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_n_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s16", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smlal, LANE = 1)
+    assert_instr(mls)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -29704,27 +26852,19 @@ pub fn vmlaq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlal_lane_s16<const LANE: i32>(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        vmlal_s16(
-            a,
-            b,
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+pub fn vmls_n_s32(a: int32x2_t, b: int32x2_t, c: i32) -> int32x2_t {
+    vmls_s32(a, b, vdup_n_s32(c))
 }
-#[doc = "Vector widening multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_laneq_s16)"]
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_n_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s16", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smlal, LANE = 1)
+    assert_instr(mls)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -29733,27 +26873,19 @@ pub fn vmlal_lane_s16<const LANE: i32>(a: int32x4_t, b: int16x4_t, c: int16x4_t)
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlal_laneq_s16<const LANE: i32>(a: int32x4_t, b: int16x4_t, c: int16x8_t) -> int32x4_t {
-    static_assert_uimm_bits!(LANE, 3);
-    unsafe {
-        vmlal_s16(
-            a,
-            b,
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+pub fn vmlsq_n_s32(a: int32x4_t, b: int32x4_t, c: i32) -> int32x4_t {
+    vmlsq_s32(a, b, vdupq_n_s32(c))
 }
-#[doc = "Vector widening multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_lane_s32)"]
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_n_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s32", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smlal, LANE = 1)
+    assert_instr(mls)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -29762,21 +26894,19 @@ pub fn vmlal_laneq_s16<const LANE: i32>(a: int32x4_t, b: int16x4_t, c: int16x8_t
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlal_lane_s32<const LANE: i32>(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    unsafe { vmlal_s32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32])) }
+pub fn vmls_n_u32(a: uint32x2_t, b: uint32x2_t, c: u32) -> uint32x2_t {
+    vmls_u32(a, b, vdup_n_u32(c))
 }
-#[doc = "Vector widening multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_laneq_s32)"]
+#[doc = "Vector multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_n_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s32", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smlal, LANE = 1)
+    assert_instr(mls)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -29785,21 +26915,19 @@ pub fn vmlal_lane_s32<const LANE: i32>(a: int64x2_t, b: int32x2_t, c: int32x2_t)
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlal_laneq_s32<const LANE: i32>(a: int64x2_t, b: int32x2_t, c: int32x4_t) -> int64x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe { vmlal_s32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32])) }
+pub fn vmlsq_n_u32(a: uint32x4_t, b: uint32x4_t, c: u32) -> uint32x4_t {
+    vmlsq_u32(a, b, vdupq_n_u32(c))
 }
-#[doc = "Vector widening multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_lane_u16)"]
+#[doc = "Multiply-subtract from accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u16", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umlal, LANE = 1)
+    assert_instr(mls)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -29808,27 +26936,19 @@ pub fn vmlal_laneq_s32<const LANE: i32>(a: int64x2_t, b: int32x2_t, c: int32x4_t
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlal_lane_u16<const LANE: i32>(a: uint32x4_t, b: uint16x4_t, c: uint16x4_t) -> uint32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        vmlal_u16(
-            a,
-            b,
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+pub fn vmls_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t {
+    unsafe { simd_sub(a, simd_mul(b, c)) }
 }
-#[doc = "Vector widening multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_laneq_u16)"]
+#[doc = "Multiply-subtract from accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u16", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umlal, LANE = 1)
+    assert_instr(mls)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -29837,27 +26957,19 @@ pub fn vmlal_lane_u16<const LANE: i32>(a: uint32x4_t, b: uint16x4_t, c: uint16x4
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlal_laneq_u16<const LANE: i32>(a: uint32x4_t, b: uint16x4_t, c: uint16x8_t) -> uint32x4_t {
-    static_assert_uimm_bits!(LANE, 3);
-    unsafe {
-        vmlal_u16(
-            a,
-            b,
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+pub fn vmlsq_s8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t {
+    unsafe { simd_sub(a, simd_mul(b, c)) }
 }
-#[doc = "Vector widening multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_lane_u32)"]
+#[doc = "Multiply-subtract from accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u32", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umlal, LANE = 1)
+    assert_instr(mls)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -29866,21 +26978,19 @@ pub fn vmlal_laneq_u16<const LANE: i32>(a: uint32x4_t, b: uint16x4_t, c: uint16x
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlal_lane_u32<const LANE: i32>(a: uint64x2_t, b: uint32x2_t, c: uint32x2_t) -> uint64x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    unsafe { vmlal_u32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32])) }
+pub fn vmls_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t {
+    unsafe { simd_sub(a, simd_mul(b, c)) }
 }
-#[doc = "Vector widening multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_laneq_u32)"]
+#[doc = "Multiply-subtract from accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u32", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umlal, LANE = 1)
+    assert_instr(mls)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -29889,19 +26999,18 @@ pub fn vmlal_lane_u32<const LANE: i32>(a: uint64x2_t, b: uint32x2_t, c: uint32x2
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlal_laneq_u32<const LANE: i32>(a: uint64x2_t, b: uint32x2_t, c: uint32x4_t) -> uint64x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe { vmlal_u32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32])) }
+pub fn vmlsq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
+    unsafe { simd_sub(a, simd_mul(b, c)) }
 }
-#[doc = "Vector widening multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_n_s16)"]
+#[doc = "Multiply-subtract from accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smlal)
+    assert_instr(mls)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -29911,18 +27020,18 @@ pub fn vmlal_laneq_u32<const LANE: i32>(a: uint64x2_t, b: uint32x2_t, c: uint32x
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlal_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t {
-    vmlal_s16(a, b, vdup_n_s16(c))
+pub fn vmls_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t {
+    unsafe { simd_sub(a, simd_mul(b, c)) }
 }
-#[doc = "Vector widening multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_n_s32)"]
+#[doc = "Multiply-subtract from accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smlal)
+    assert_instr(mls)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -29932,18 +27041,18 @@ pub fn vmlal_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlal_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t {
-    vmlal_s32(a, b, vdup_n_s32(c))
+pub fn vmlsq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
+    unsafe { simd_sub(a, simd_mul(b, c)) }
 }
-#[doc = "Vector widening multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_n_u16)"]
+#[doc = "Multiply-subtract from accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umlal)
+    assert_instr(mls)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -29953,18 +27062,18 @@ pub fn vmlal_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlal_n_u16(a: uint32x4_t, b: uint16x4_t, c: u16) -> uint32x4_t {
-    vmlal_u16(a, b, vdup_n_u16(c))
+pub fn vmls_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t {
+    unsafe { simd_sub(a, simd_mul(b, c)) }
 }
-#[doc = "Vector widening multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_n_u32)"]
+#[doc = "Multiply-subtract from accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umlal)
+    assert_instr(mls)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -29974,18 +27083,18 @@ pub fn vmlal_n_u16(a: uint32x4_t, b: uint16x4_t, c: u16) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlal_n_u32(a: uint64x2_t, b: uint32x2_t, c: u32) -> uint64x2_t {
-    vmlal_u32(a, b, vdup_n_u32(c))
+pub fn vmlsq_u8(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_t {
+    unsafe { simd_sub(a, simd_mul(b, c)) }
 }
-#[doc = "Signed multiply-add long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_s8)"]
+#[doc = "Multiply-subtract from accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smlal)
+    assert_instr(mls)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -29995,18 +27104,18 @@ pub fn vmlal_n_u32(a: uint64x2_t, b: uint32x2_t, c: u32) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlal_s8(a: int16x8_t, b: int8x8_t, c: int8x8_t) -> int16x8_t {
-    unsafe { simd_add(a, vmull_s8(b, c)) }
+pub fn vmls_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x4_t) -> uint16x4_t {
+    unsafe { simd_sub(a, simd_mul(b, c)) }
 }
-#[doc = "Signed multiply-add long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_s16)"]
+#[doc = "Multiply-subtract from accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smlal)
+    assert_instr(mls)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -30016,18 +27125,18 @@ pub fn vmlal_s8(a: int16x8_t, b: int8x8_t, c: int8x8_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlal_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t {
-    unsafe { simd_add(a, vmull_s16(b, c)) }
+pub fn vmlsq_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t {
+    unsafe { simd_sub(a, simd_mul(b, c)) }
 }
-#[doc = "Signed multiply-add long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_s32)"]
+#[doc = "Multiply-subtract from accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smlal)
+    assert_instr(mls)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -30037,18 +27146,18 @@ pub fn vmlal_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlal_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t {
-    unsafe { simd_add(a, vmull_s32(b, c)) }
+pub fn vmls_u32(a: uint32x2_t, b: uint32x2_t, c: uint32x2_t) -> uint32x2_t {
+    unsafe { simd_sub(a, simd_mul(b, c)) }
 }
-#[doc = "Unsigned multiply-add long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_u8)"]
+#[doc = "Multiply-subtract from accumulator"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umlal)
+    assert_instr(mls)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -30058,19 +27167,20 @@ pub fn vmlal_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlal_u8(a: uint16x8_t, b: uint8x8_t, c: uint8x8_t) -> uint16x8_t {
-    unsafe { simd_add(a, vmull_u8(b, c)) }
+pub fn vmlsq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
+    unsafe { simd_sub(a, simd_mul(b, c)) }
 }
-#[doc = "Unsigned multiply-add long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_u16)"]
+#[doc = "Vector widening multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_lane_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s16", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umlal)
+    assert_instr(smlsl, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -30079,19 +27189,21 @@ pub fn vmlal_u8(a: uint16x8_t, b: uint8x8_t, c: uint8x8_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlal_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x4_t) -> uint32x4_t {
-    unsafe { simd_add(a, vmull_u16(b, c)) }
+pub fn vmlsl_lane_s16<const LANE: i32>(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmlsl_s16(a, b, vdup_lane_s16::<LANE>(c))
 }
-#[doc = "Unsigned multiply-add long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_u32)"]
+#[doc = "Vector widening multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_laneq_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s16", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umlal)
+    assert_instr(smlsl, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -30100,19 +27212,21 @@ pub fn vmlal_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x4_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlal_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x2_t) -> uint64x2_t {
-    unsafe { simd_add(a, vmull_u32(b, c)) }
+pub fn vmlsl_laneq_s16<const LANE: i32>(a: int32x4_t, b: int16x4_t, c: int16x8_t) -> int32x4_t {
+    static_assert_uimm_bits!(LANE, 3);
+    vmlsl_s16(a, b, vdup_laneq_s16::<LANE>(c))
 }
-#[doc = "Floating-point multiply-subtract from accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_f32)"]
+#[doc = "Vector widening multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_lane_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmul)
+    assert_instr(smlsl, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -30121,19 +27235,21 @@ pub fn vmlal_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x2_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmls_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
-    unsafe { simd_sub(a, simd_mul(b, c)) }
+pub fn vmlsl_lane_s32<const LANE: i32>(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    vmlsl_s32(a, b, vdup_lane_s32::<LANE>(c))
 }
-#[doc = "Floating-point multiply-subtract from accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_f32)"]
+#[doc = "Vector widening multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_laneq_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmul)
+    assert_instr(smlsl, LANE = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -30142,18 +27258,19 @@ pub fn vmls_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlsq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t {
-    unsafe { simd_sub(a, simd_mul(b, c)) }
+pub fn vmlsl_laneq_s32<const LANE: i32>(a: int64x2_t, b: int32x2_t, c: int32x4_t) -> int64x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmlsl_s32(a, b, vdup_laneq_s32::<LANE>(c))
 }
-#[doc = "Vector multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_lane_f32)"]
+#[doc = "Vector widening multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_lane_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u16", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmul, LANE = 1)
+    assert_instr(umlsl, LANE = 1)
 )]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(
@@ -30164,23 +27281,19 @@ pub fn vmlsq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmls_lane_f32<const LANE: i32>(
-    a: float32x2_t,
-    b: float32x2_t,
-    c: float32x2_t,
-) -> float32x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    unsafe { vmls_f32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32])) }
+pub fn vmlsl_lane_u16<const LANE: i32>(a: uint32x4_t, b: uint16x4_t, c: uint16x4_t) -> uint32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmlsl_u16(a, b, vdup_lane_u16::<LANE>(c))
 }
-#[doc = "Vector multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_laneq_f32)"]
+#[doc = "Vector widening multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_laneq_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u16", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmul, LANE = 1)
+    assert_instr(umlsl, LANE = 1)
 )]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(
@@ -30191,23 +27304,19 @@ pub fn vmls_lane_f32<const LANE: i32>(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmls_laneq_f32<const LANE: i32>(
-    a: float32x2_t,
-    b: float32x2_t,
-    c: float32x4_t,
-) -> float32x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe { vmls_f32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32])) }
+pub fn vmlsl_laneq_u16<const LANE: i32>(a: uint32x4_t, b: uint16x4_t, c: uint16x8_t) -> uint32x4_t {
+    static_assert_uimm_bits!(LANE, 3);
+    vmlsl_u16(a, b, vdup_laneq_u16::<LANE>(c))
 }
-#[doc = "Vector multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_lane_f32)"]
+#[doc = "Vector widening multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_lane_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmul, LANE = 1)
+    assert_instr(umlsl, LANE = 1)
 )]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(
@@ -30218,29 +27327,19 @@ pub fn vmls_laneq_f32<const LANE: i32>(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlsq_lane_f32<const LANE: i32>(
-    a: float32x4_t,
-    b: float32x4_t,
-    c: float32x2_t,
-) -> float32x4_t {
+pub fn vmlsl_lane_u32<const LANE: i32>(a: uint64x2_t, b: uint32x2_t, c: uint32x2_t) -> uint64x2_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        vmlsq_f32(
-            a,
-            b,
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+    vmlsl_u32(a, b, vdup_lane_u32::<LANE>(c))
 }
-#[doc = "Vector multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_laneq_f32)"]
+#[doc = "Vector widening multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_laneq_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u32", LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmul, LANE = 1)
+    assert_instr(umlsl, LANE = 1)
 )]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(
@@ -30251,31 +27350,20 @@ pub fn vmlsq_lane_f32<const LANE: i32>(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlsq_laneq_f32<const LANE: i32>(
-    a: float32x4_t,
-    b: float32x4_t,
-    c: float32x4_t,
-) -> float32x4_t {
+pub fn vmlsl_laneq_u32<const LANE: i32>(a: uint64x2_t, b: uint32x2_t, c: uint32x4_t) -> uint64x2_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        vmlsq_f32(
-            a,
-            b,
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+    vmlsl_u32(a, b, vdup_laneq_u32::<LANE>(c))
 }
-#[doc = "Vector multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_lane_s16)"]
+#[doc = "Vector widening multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_n_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls, LANE = 1)
+    assert_instr(smlsl)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -30284,27 +27372,19 @@ pub fn vmlsq_laneq_f32<const LANE: i32>(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmls_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        vmls_s16(
-            a,
-            b,
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+pub fn vmlsl_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t {
+    vmlsl_s16(a, b, vdup_n_s16(c))
 }
-#[doc = "Vector multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_lane_u16)"]
+#[doc = "Vector widening multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_n_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls, LANE = 1)
+    assert_instr(smlsl)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -30313,27 +27393,19 @@ pub fn vmls_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x4_t)
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmls_lane_u16<const LANE: i32>(a: uint16x4_t, b: uint16x4_t, c: uint16x4_t) -> uint16x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        vmls_u16(
-            a,
-            b,
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+pub fn vmlsl_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t {
+    vmlsl_s32(a, b, vdup_n_s32(c))
 }
-#[doc = "Vector multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_laneq_s16)"]
+#[doc = "Vector widening multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_n_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls, LANE = 1)
+    assert_instr(umlsl)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -30342,27 +27414,19 @@ pub fn vmls_lane_u16<const LANE: i32>(a: uint16x4_t, b: uint16x4_t, c: uint16x4_
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmls_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x8_t) -> int16x4_t {
-    static_assert_uimm_bits!(LANE, 3);
-    unsafe {
-        vmls_s16(
-            a,
-            b,
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
-}
-#[doc = "Vector multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_laneq_u16)"]
+pub fn vmlsl_n_u16(a: uint32x4_t, b: uint16x4_t, c: u16) -> uint32x4_t {
+    vmlsl_u16(a, b, vdup_n_u16(c))
+}
+#[doc = "Vector widening multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_n_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls, LANE = 1)
+    assert_instr(umlsl)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -30371,27 +27435,19 @@ pub fn vmls_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x8_t)
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmls_laneq_u16<const LANE: i32>(a: uint16x4_t, b: uint16x4_t, c: uint16x8_t) -> uint16x4_t {
-    static_assert_uimm_bits!(LANE, 3);
-    unsafe {
-        vmls_u16(
-            a,
-            b,
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+pub fn vmlsl_n_u32(a: uint64x2_t, b: uint32x2_t, c: u32) -> uint64x2_t {
+    vmlsl_u32(a, b, vdup_n_u32(c))
 }
-#[doc = "Vector multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_lane_s16)"]
+#[doc = "Signed multiply-subtract long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls, LANE = 1)
+    assert_instr(smlsl)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -30400,40 +27456,19 @@ pub fn vmls_laneq_u16<const LANE: i32>(a: uint16x4_t, b: uint16x4_t, c: uint16x8
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlsq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int16x4_t) -> int16x8_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        vmlsq_s16(
-            a,
-            b,
-            simd_shuffle!(
-                c,
-                c,
-                [
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32
-                ]
-            ),
-        )
-    }
+pub fn vmlsl_s8(a: int16x8_t, b: int8x8_t, c: int8x8_t) -> int16x8_t {
+    unsafe { simd_sub(a, vmull_s8(b, c)) }
 }
-#[doc = "Vector multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_lane_u16)"]
+#[doc = "Signed multiply-subtract long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls, LANE = 1)
+    assert_instr(smlsl)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -30442,40 +27477,19 @@ pub fn vmlsq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int16x4_t)
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlsq_lane_u16<const LANE: i32>(a: uint16x8_t, b: uint16x8_t, c: uint16x4_t) -> uint16x8_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        vmlsq_u16(
-            a,
-            b,
-            simd_shuffle!(
-                c,
-                c,
-                [
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32
-                ]
-            ),
-        )
-    }
+pub fn vmlsl_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t {
+    unsafe { simd_sub(a, vmull_s16(b, c)) }
 }
-#[doc = "Vector multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_laneq_s16)"]
+#[doc = "Signed multiply-subtract long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls, LANE = 1)
+    assert_instr(smlsl)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -30484,40 +27498,19 @@ pub fn vmlsq_lane_u16<const LANE: i32>(a: uint16x8_t, b: uint16x8_t, c: uint16x4
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlsq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
-    static_assert_uimm_bits!(LANE, 3);
-    unsafe {
-        vmlsq_s16(
-            a,
-            b,
-            simd_shuffle!(
-                c,
-                c,
-                [
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32
-                ]
-            ),
-        )
-    }
+pub fn vmlsl_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t {
+    unsafe { simd_sub(a, vmull_s32(b, c)) }
 }
-#[doc = "Vector multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_laneq_u16)"]
+#[doc = "Unsigned multiply-subtract long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls, LANE = 1)
+    assert_instr(umlsl)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -30526,40 +27519,19 @@ pub fn vmlsq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int16x8_t
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlsq_laneq_u16<const LANE: i32>(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t {
-    static_assert_uimm_bits!(LANE, 3);
-    unsafe {
-        vmlsq_u16(
-            a,
-            b,
-            simd_shuffle!(
-                c,
-                c,
-                [
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32
-                ]
-            ),
-        )
-    }
+pub fn vmlsl_u8(a: uint16x8_t, b: uint8x8_t, c: uint8x8_t) -> uint16x8_t {
+    unsafe { simd_sub(a, vmull_u8(b, c)) }
 }
-#[doc = "Vector multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_lane_s32)"]
+#[doc = "Unsigned multiply-subtract long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls, LANE = 1)
+    assert_instr(umlsl)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -30568,21 +27540,19 @@ pub fn vmlsq_laneq_u16<const LANE: i32>(a: uint16x8_t, b: uint16x8_t, c: uint16x
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmls_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    unsafe { vmls_s32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32])) }
+pub fn vmlsl_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x4_t) -> uint32x4_t {
+    unsafe { simd_sub(a, vmull_u16(b, c)) }
 }
-#[doc = "Vector multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_lane_u32)"]
+#[doc = "Unsigned multiply-subtract long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls, LANE = 1)
+    assert_instr(umlsl)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -30591,96 +27561,109 @@ pub fn vmls_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int32x2_t)
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmls_lane_u32<const LANE: i32>(a: uint32x2_t, b: uint32x2_t, c: uint32x2_t) -> uint32x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    unsafe { vmls_u32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32])) }
+pub fn vmlsl_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x2_t) -> uint64x2_t {
+    unsafe { simd_sub(a, vmull_u32(b, c)) }
 }
-#[doc = "Vector multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_laneq_s32)"]
+#[doc = "8-bit integer matrix multiply-accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmmlaq_s32)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))]
+#[target_feature(enable = "neon,i8mm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls, LANE = 1)
+    assert_instr(smmla)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    unstable(feature = "stdarch_neon_i8mm", issue = "117223")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmls_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int32x4_t) -> int32x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe { vmls_s32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32])) }
+pub fn vmmlaq_s32(a: int32x4_t, b: int8x16_t, c: int8x16_t) -> int32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.smmla.v4i32.v16i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.smmla.v4i32.v16i8")]
+        fn _vmmlaq_s32(a: int32x4_t, b: int8x16_t, c: int8x16_t) -> int32x4_t;
+    }
+    unsafe { _vmmlaq_s32(a, b, c) }
 }
-#[doc = "Vector multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_laneq_u32)"]
+#[doc = "8-bit integer matrix multiply-accumulate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmmlaq_u32)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))]
+#[target_feature(enable = "neon,i8mm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls, LANE = 1)
+    assert_instr(ummla)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    unstable(feature = "stdarch_neon_i8mm", issue = "117223")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmls_laneq_u32<const LANE: i32>(a: uint32x2_t, b: uint32x2_t, c: uint32x4_t) -> uint32x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe { vmls_u32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32])) }
+pub fn vmmlaq_u32(a: uint32x4_t, b: uint8x16_t, c: uint8x16_t) -> uint32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.ummla.v4i32.v16i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.ummla.v4i32.v16i8")]
+        fn _vmmlaq_u32(a: uint32x4_t, b: uint8x16_t, c: uint8x16_t) -> uint32x4_t;
+    }
+    unsafe { _vmmlaq_u32(a, b, c) }
 }
-#[doc = "Vector multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_lane_s32)"]
+#[doc = "Duplicate element to vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_f16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls, LANE = 1)
-)]
-#[rustc_legacy_const_generics(3)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    assert_instr(dup)
 )]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vmov_n_f16(a: f16) -> float16x4_t {
+    vdup_n_f16(a)
+}
+#[doc = "Duplicate element to vector"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_f16)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))]
 #[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(dup)
 )]
-pub fn vmlsq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c: int32x2_t) -> int32x4_t {
-    static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        vmlsq_s32(
-            a,
-            b,
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vmovq_n_f16(a: f16) -> float16x8_t {
+    vdupq_n_f16(a)
 }
-#[doc = "Vector multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_lane_u32)"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls, LANE = 1)
+    assert_instr(dup)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -30689,27 +27672,19 @@ pub fn vmlsq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c: int32x2_t)
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlsq_lane_u32<const LANE: i32>(a: uint32x4_t, b: uint32x4_t, c: uint32x2_t) -> uint32x4_t {
-    static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        vmlsq_u32(
-            a,
-            b,
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+pub fn vmov_n_f32(value: f32) -> float32x2_t {
+    vdup_n_f32(value)
 }
-#[doc = "Vector multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_laneq_s32)"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_p16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls, LANE = 1)
+    assert_instr(dup)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -30718,27 +27693,19 @@ pub fn vmlsq_lane_u32<const LANE: i32>(a: uint32x4_t, b: uint32x4_t, c: uint32x2
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlsq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        vmlsq_s32(
-            a,
-            b,
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+pub fn vmov_n_p16(value: p16) -> poly16x4_t {
+    vdup_n_p16(value)
 }
-#[doc = "Vector multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_laneq_u32)"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_p8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls, LANE = 1)
+    assert_instr(dup)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -30747,25 +27714,18 @@ pub fn vmlsq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c: int32x4_t
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlsq_laneq_u32<const LANE: i32>(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        vmlsq_u32(
-            a,
-            b,
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+pub fn vmov_n_p8(value: p8) -> poly8x8_t {
+    vdup_n_p8(value)
 }
-#[doc = "Vector multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_n_f32)"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmul)
+    assert_instr(dup)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -30775,18 +27735,18 @@ pub fn vmlsq_laneq_u32<const LANE: i32>(a: uint32x4_t, b: uint32x4_t, c: uint32x
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmls_n_f32(a: float32x2_t, b: float32x2_t, c: f32) -> float32x2_t {
-    vmls_f32(a, b, vdup_n_f32(c))
+pub fn vmov_n_s16(value: i16) -> int16x4_t {
+    vdup_n_s16(value)
 }
-#[doc = "Vector multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_n_f32)"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmul)
+    assert_instr(dup)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -30796,18 +27756,18 @@ pub fn vmls_n_f32(a: float32x2_t, b: float32x2_t, c: f32) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlsq_n_f32(a: float32x4_t, b: float32x4_t, c: f32) -> float32x4_t {
-    vmlsq_f32(a, b, vdupq_n_f32(c))
+pub fn vmov_n_s32(value: i32) -> int32x2_t {
+    vdup_n_s32(value)
 }
-#[doc = "Vector multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_n_s16)"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_s64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls)
+    assert_instr(fmov)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -30817,18 +27777,18 @@ pub fn vmlsq_n_f32(a: float32x4_t, b: float32x4_t, c: f32) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmls_n_s16(a: int16x4_t, b: int16x4_t, c: i16) -> int16x4_t {
-    vmls_s16(a, b, vdup_n_s16(c))
+pub fn vmov_n_s64(value: i64) -> int64x1_t {
+    vdup_n_s64(value)
 }
-#[doc = "Vector multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_n_s16)"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls)
+    assert_instr(dup)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -30838,18 +27798,18 @@ pub fn vmls_n_s16(a: int16x4_t, b: int16x4_t, c: i16) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlsq_n_s16(a: int16x8_t, b: int16x8_t, c: i16) -> int16x8_t {
-    vmlsq_s16(a, b, vdupq_n_s16(c))
+pub fn vmov_n_s8(value: i8) -> int8x8_t {
+    vdup_n_s8(value)
 }
-#[doc = "Vector multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_n_u16)"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls)
+    assert_instr(dup)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -30859,18 +27819,18 @@ pub fn vmlsq_n_s16(a: int16x8_t, b: int16x8_t, c: i16) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmls_n_u16(a: uint16x4_t, b: uint16x4_t, c: u16) -> uint16x4_t {
-    vmls_u16(a, b, vdup_n_u16(c))
+pub fn vmov_n_u16(value: u16) -> uint16x4_t {
+    vdup_n_u16(value)
 }
-#[doc = "Vector multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_n_u16)"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls)
+    assert_instr(dup)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -30880,18 +27840,18 @@ pub fn vmls_n_u16(a: uint16x4_t, b: uint16x4_t, c: u16) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlsq_n_u16(a: uint16x8_t, b: uint16x8_t, c: u16) -> uint16x8_t {
-    vmlsq_u16(a, b, vdupq_n_u16(c))
+pub fn vmov_n_u32(value: u32) -> uint32x2_t {
+    vdup_n_u32(value)
 }
-#[doc = "Vector multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_n_s32)"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_u64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls)
+    assert_instr(fmov)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -30901,18 +27861,18 @@ pub fn vmlsq_n_u16(a: uint16x8_t, b: uint16x8_t, c: u16) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmls_n_s32(a: int32x2_t, b: int32x2_t, c: i32) -> int32x2_t {
-    vmls_s32(a, b, vdup_n_s32(c))
+pub fn vmov_n_u64(value: u64) -> uint64x1_t {
+    vdup_n_u64(value)
 }
-#[doc = "Vector multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_n_s32)"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls)
+    assert_instr(dup)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -30922,18 +27882,18 @@ pub fn vmls_n_s32(a: int32x2_t, b: int32x2_t, c: i32) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlsq_n_s32(a: int32x4_t, b: int32x4_t, c: i32) -> int32x4_t {
-    vmlsq_s32(a, b, vdupq_n_s32(c))
+pub fn vmov_n_u8(value: u8) -> uint8x8_t {
+    vdup_n_u8(value)
 }
-#[doc = "Vector multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_n_u32)"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls)
+    assert_instr(dup)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -30943,18 +27903,18 @@ pub fn vmlsq_n_s32(a: int32x4_t, b: int32x4_t, c: i32) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmls_n_u32(a: uint32x2_t, b: uint32x2_t, c: u32) -> uint32x2_t {
-    vmls_u32(a, b, vdup_n_u32(c))
+pub fn vmovq_n_f32(value: f32) -> float32x4_t {
+    vdupq_n_f32(value)
 }
-#[doc = "Vector multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_n_u32)"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_p16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls)
+    assert_instr(dup)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -30964,18 +27924,18 @@ pub fn vmls_n_u32(a: uint32x2_t, b: uint32x2_t, c: u32) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlsq_n_u32(a: uint32x4_t, b: uint32x4_t, c: u32) -> uint32x4_t {
-    vmlsq_u32(a, b, vdupq_n_u32(c))
+pub fn vmovq_n_p16(value: p16) -> poly16x8_t {
+    vdupq_n_p16(value)
 }
-#[doc = "Multiply-subtract from accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_s8)"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_p8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls)
+    assert_instr(dup)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -30985,18 +27945,18 @@ pub fn vmlsq_n_u32(a: uint32x4_t, b: uint32x4_t, c: u32) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmls_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t {
-    unsafe { simd_sub(a, simd_mul(b, c)) }
+pub fn vmovq_n_p8(value: p8) -> poly8x16_t {
+    vdupq_n_p8(value)
 }
-#[doc = "Multiply-subtract from accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_s8)"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls)
+    assert_instr(dup)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -31006,18 +27966,18 @@ pub fn vmls_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlsq_s8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t {
-    unsafe { simd_sub(a, simd_mul(b, c)) }
+pub fn vmovq_n_s16(value: i16) -> int16x8_t {
+    vdupq_n_s16(value)
 }
-#[doc = "Multiply-subtract from accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_s16)"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls)
+    assert_instr(dup)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -31027,18 +27987,18 @@ pub fn vmlsq_s8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmls_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t {
-    unsafe { simd_sub(a, simd_mul(b, c)) }
+pub fn vmovq_n_s32(value: i32) -> int32x4_t {
+    vdupq_n_s32(value)
 }
-#[doc = "Multiply-subtract from accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_s16)"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_s64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls)
+    assert_instr(dup)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -31048,18 +28008,18 @@ pub fn vmls_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlsq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
-    unsafe { simd_sub(a, simd_mul(b, c)) }
+pub fn vmovq_n_s64(value: i64) -> int64x2_t {
+    vdupq_n_s64(value)
 }
-#[doc = "Multiply-subtract from accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_s32)"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls)
+    assert_instr(dup)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -31069,18 +28029,18 @@ pub fn vmlsq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmls_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t {
-    unsafe { simd_sub(a, simd_mul(b, c)) }
+pub fn vmovq_n_s8(value: i8) -> int8x16_t {
+    vdupq_n_s8(value)
 }
-#[doc = "Multiply-subtract from accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_s32)"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls)
+    assert_instr(dup)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -31090,18 +28050,18 @@ pub fn vmls_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlsq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
-    unsafe { simd_sub(a, simd_mul(b, c)) }
+pub fn vmovq_n_u16(value: u16) -> uint16x8_t {
+    vdupq_n_u16(value)
 }
-#[doc = "Multiply-subtract from accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_u8)"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls)
+    assert_instr(dup)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -31111,18 +28071,18 @@ pub fn vmlsq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmls_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t {
-    unsafe { simd_sub(a, simd_mul(b, c)) }
+pub fn vmovq_n_u32(value: u32) -> uint32x4_t {
+    vdupq_n_u32(value)
 }
-#[doc = "Multiply-subtract from accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_u8)"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_u64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls)
+    assert_instr(dup)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -31132,18 +28092,18 @@ pub fn vmls_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlsq_u8(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_t {
-    unsafe { simd_sub(a, simd_mul(b, c)) }
+pub fn vmovq_n_u64(value: u64) -> uint64x2_t {
+    vdupq_n_u64(value)
 }
-#[doc = "Multiply-subtract from accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_u16)"]
+#[doc = "Duplicate vector element to vector or scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls)
+    assert_instr(dup)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -31153,18 +28113,18 @@ pub fn vmlsq_u8(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmls_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x4_t) -> uint16x4_t {
-    unsafe { simd_sub(a, simd_mul(b, c)) }
+pub fn vmovq_n_u8(value: u8) -> uint8x16_t {
+    vdupq_n_u8(value)
 }
-#[doc = "Multiply-subtract from accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_u16)"]
+#[doc = "Vector long move."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovl_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls)
+    assert_instr(sxtl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -31174,18 +28134,18 @@ pub fn vmls_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x4_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlsq_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t {
-    unsafe { simd_sub(a, simd_mul(b, c)) }
+pub fn vmovl_s16(a: int16x4_t) -> int32x4_t {
+    unsafe { simd_cast(a) }
 }
-#[doc = "Multiply-subtract from accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmls_u32)"]
+#[doc = "Vector long move."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovl_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls)
+    assert_instr(sxtl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -31195,18 +28155,18 @@ pub fn vmlsq_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmls_u32(a: uint32x2_t, b: uint32x2_t, c: uint32x2_t) -> uint32x2_t {
-    unsafe { simd_sub(a, simd_mul(b, c)) }
+pub fn vmovl_s32(a: int32x2_t) -> int64x2_t {
+    unsafe { simd_cast(a) }
 }
-#[doc = "Multiply-subtract from accumulator"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsq_u32)"]
+#[doc = "Vector long move."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovl_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mls)
+    assert_instr(sxtl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -31216,20 +28176,19 @@ pub fn vmls_u32(a: uint32x2_t, b: uint32x2_t, c: uint32x2_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlsq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
-    unsafe { simd_sub(a, simd_mul(b, c)) }
+pub fn vmovl_s8(a: int8x8_t) -> int16x8_t {
+    unsafe { simd_cast(a) }
 }
-#[doc = "Vector widening multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_lane_s16)"]
+#[doc = "Vector long move."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovl_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s16", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smlsl, LANE = 1)
+    assert_instr(uxtl)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -31238,27 +28197,19 @@ pub fn vmlsq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlsl_lane_s16<const LANE: i32>(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        vmlsl_s16(
-            a,
-            b,
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+pub fn vmovl_u16(a: uint16x4_t) -> uint32x4_t {
+    unsafe { simd_cast(a) }
 }
-#[doc = "Vector widening multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_laneq_s16)"]
+#[doc = "Vector long move."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovl_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s16", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smlsl, LANE = 1)
+    assert_instr(uxtl)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -31267,27 +28218,19 @@ pub fn vmlsl_lane_s16<const LANE: i32>(a: int32x4_t, b: int16x4_t, c: int16x4_t)
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlsl_laneq_s16<const LANE: i32>(a: int32x4_t, b: int16x4_t, c: int16x8_t) -> int32x4_t {
-    static_assert_uimm_bits!(LANE, 3);
-    unsafe {
-        vmlsl_s16(
-            a,
-            b,
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+pub fn vmovl_u32(a: uint32x2_t) -> uint64x2_t {
+    unsafe { simd_cast(a) }
 }
-#[doc = "Vector widening multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_lane_s32)"]
+#[doc = "Vector long move."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovl_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s32", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smlsl, LANE = 1)
+    assert_instr(uxtl)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -31296,21 +28239,19 @@ pub fn vmlsl_laneq_s16<const LANE: i32>(a: int32x4_t, b: int16x4_t, c: int16x8_t
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlsl_lane_s32<const LANE: i32>(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    unsafe { vmlsl_s32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32])) }
+pub fn vmovl_u8(a: uint8x8_t) -> uint16x8_t {
+    unsafe { simd_cast(a) }
 }
-#[doc = "Vector widening multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_laneq_s32)"]
+#[doc = "Vector narrow integer."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovn_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s32", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smlsl, LANE = 1)
+    assert_instr(xtn)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -31319,21 +28260,19 @@ pub fn vmlsl_lane_s32<const LANE: i32>(a: int64x2_t, b: int32x2_t, c: int32x2_t)
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlsl_laneq_s32<const LANE: i32>(a: int64x2_t, b: int32x2_t, c: int32x4_t) -> int64x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe { vmlsl_s32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32])) }
+pub fn vmovn_s16(a: int16x8_t) -> int8x8_t {
+    unsafe { simd_cast(a) }
 }
-#[doc = "Vector widening multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_lane_u16)"]
+#[doc = "Vector narrow integer."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovn_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u16", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umlsl, LANE = 1)
+    assert_instr(xtn)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -31342,27 +28281,19 @@ pub fn vmlsl_laneq_s32<const LANE: i32>(a: int64x2_t, b: int32x2_t, c: int32x4_t
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlsl_lane_u16<const LANE: i32>(a: uint32x4_t, b: uint16x4_t, c: uint16x4_t) -> uint32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        vmlsl_u16(
-            a,
-            b,
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+pub fn vmovn_s32(a: int32x4_t) -> int16x4_t {
+    unsafe { simd_cast(a) }
 }
-#[doc = "Vector widening multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_laneq_u16)"]
+#[doc = "Vector narrow integer."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovn_s64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u16", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umlsl, LANE = 1)
+    assert_instr(xtn)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -31371,27 +28302,19 @@ pub fn vmlsl_lane_u16<const LANE: i32>(a: uint32x4_t, b: uint16x4_t, c: uint16x4
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlsl_laneq_u16<const LANE: i32>(a: uint32x4_t, b: uint16x4_t, c: uint16x8_t) -> uint32x4_t {
-    static_assert_uimm_bits!(LANE, 3);
-    unsafe {
-        vmlsl_u16(
-            a,
-            b,
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+pub fn vmovn_s64(a: int64x2_t) -> int32x2_t {
+    unsafe { simd_cast(a) }
 }
-#[doc = "Vector widening multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_lane_u32)"]
+#[doc = "Vector narrow integer."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovn_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u32", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umlsl, LANE = 1)
+    assert_instr(xtn)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -31400,21 +28323,19 @@ pub fn vmlsl_laneq_u16<const LANE: i32>(a: uint32x4_t, b: uint16x4_t, c: uint16x
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlsl_lane_u32<const LANE: i32>(a: uint64x2_t, b: uint32x2_t, c: uint32x2_t) -> uint64x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    unsafe { vmlsl_u32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32])) }
+pub fn vmovn_u16(a: uint16x8_t) -> uint8x8_t {
+    unsafe { simd_cast(a) }
 }
-#[doc = "Vector widening multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_laneq_u32)"]
+#[doc = "Vector narrow integer."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovn_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u32", LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umlsl, LANE = 1)
+    assert_instr(xtn)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -31423,19 +28344,18 @@ pub fn vmlsl_lane_u32<const LANE: i32>(a: uint64x2_t, b: uint32x2_t, c: uint32x2
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlsl_laneq_u32<const LANE: i32>(a: uint64x2_t, b: uint32x2_t, c: uint32x4_t) -> uint64x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe { vmlsl_u32(a, b, simd_shuffle!(c, c, [LANE as u32, LANE as u32])) }
+pub fn vmovn_u32(a: uint32x4_t) -> uint16x4_t {
+    unsafe { simd_cast(a) }
 }
-#[doc = "Vector widening multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_n_s16)"]
+#[doc = "Vector narrow integer."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovn_u64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smlsl)
+    assert_instr(xtn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -31445,60 +28365,62 @@ pub fn vmlsl_laneq_u32<const LANE: i32>(a: uint64x2_t, b: uint32x2_t, c: uint32x
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlsl_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t {
-    vmlsl_s16(a, b, vdup_n_s16(c))
+pub fn vmovn_u64(a: uint64x2_t) -> uint32x2_t {
+    unsafe { simd_cast(a) }
 }
-#[doc = "Vector widening multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_n_s32)"]
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_f16)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s32"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.f16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smlsl)
+    assert_instr(fmul)
 )]
+#[target_feature(enable = "neon,fp16")]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlsl_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t {
-    vmlsl_s32(a, b, vdup_n_s32(c))
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vmul_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
+    unsafe { simd_mul(a, b) }
 }
-#[doc = "Vector widening multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_n_u16)"]
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_f16)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u16"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.f16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umlsl)
+    assert_instr(fmul)
 )]
+#[target_feature(enable = "neon,fp16")]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlsl_n_u16(a: uint32x4_t, b: uint16x4_t, c: u16) -> uint32x4_t {
-    vmlsl_u16(a, b, vdup_n_u16(c))
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vmulq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
+    unsafe { simd_mul(a, b) }
 }
-#[doc = "Vector widening multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_n_u32)"]
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.f32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umlsl)
+    assert_instr(fmul)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -31508,18 +28430,18 @@ pub fn vmlsl_n_u16(a: uint32x4_t, b: uint16x4_t, c: u16) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlsl_n_u32(a: uint64x2_t, b: uint32x2_t, c: u32) -> uint64x2_t {
-    vmlsl_u32(a, b, vdup_n_u32(c))
+pub fn vmul_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    unsafe { simd_mul(a, b) }
 }
-#[doc = "Signed multiply-subtract long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_s8)"]
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.f32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smlsl)
+    assert_instr(fmul)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -31529,61 +28451,68 @@ pub fn vmlsl_n_u32(a: uint64x2_t, b: uint32x2_t, c: u32) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlsl_s8(a: int16x8_t, b: int8x8_t, c: int8x8_t) -> int16x8_t {
-    unsafe { simd_sub(a, vmull_s8(b, c)) }
+pub fn vmulq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    unsafe { simd_mul(a, b) }
 }
-#[doc = "Signed multiply-subtract long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_s16)"]
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_lane_f16)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s16"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smlsl)
+    assert_instr(fmul, LANE = 1)
 )]
+#[rustc_legacy_const_generics(2)]
+#[target_feature(enable = "neon,fp16")]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlsl_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t {
-    unsafe { simd_sub(a, vmull_s16(b, c)) }
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vmul_lane_f16<const LANE: i32>(a: float16x4_t, v: float16x4_t) -> float16x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    unsafe { simd_mul(a, vdup_lane_f16::<LANE>(v)) }
 }
-#[doc = "Signed multiply-subtract long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_s32)"]
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_lane_f16)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s32"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smlsl)
+    assert_instr(fmul, LANE = 1)
 )]
+#[rustc_legacy_const_generics(2)]
+#[target_feature(enable = "neon,fp16")]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlsl_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t {
-    unsafe { simd_sub(a, vmull_s32(b, c)) }
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vmulq_lane_f16<const LANE: i32>(a: float16x8_t, v: float16x4_t) -> float16x8_t {
+    static_assert_uimm_bits!(LANE, 2);
+    unsafe { simd_mul(a, vdupq_lane_f16::<LANE>(v)) }
 }
-#[doc = "Unsigned multiply-subtract long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_u8)"]
+#[doc = "Floating-point multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_lane_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umlsl)
+    assert_instr(fmul, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -31592,19 +28521,21 @@ pub fn vmlsl_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlsl_u8(a: uint16x8_t, b: uint8x8_t, c: uint8x8_t) -> uint16x8_t {
-    unsafe { simd_sub(a, vmull_u8(b, c)) }
+pub fn vmul_lane_f32<const LANE: i32>(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    unsafe { simd_mul(a, vdup_lane_f32::<LANE>(b)) }
 }
-#[doc = "Unsigned multiply-subtract long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_u16)"]
+#[doc = "Floating-point multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_laneq_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umlsl)
+    assert_instr(fmul, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -31613,19 +28544,21 @@ pub fn vmlsl_u8(a: uint16x8_t, b: uint8x8_t, c: uint8x8_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlsl_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x4_t) -> uint32x4_t {
-    unsafe { simd_sub(a, vmull_u16(b, c)) }
+pub fn vmul_laneq_f32<const LANE: i32>(a: float32x2_t, b: float32x4_t) -> float32x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    unsafe { simd_mul(a, vdup_laneq_f32::<LANE>(b)) }
 }
-#[doc = "Unsigned multiply-subtract long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_u32)"]
+#[doc = "Floating-point multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_lane_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umlsl)
+    assert_instr(fmul, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -31634,107 +28567,90 @@ pub fn vmlsl_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x4_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmlsl_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x2_t) -> uint64x2_t {
-    unsafe { simd_sub(a, vmull_u32(b, c)) }
+pub fn vmulq_lane_f32<const LANE: i32>(a: float32x4_t, b: float32x2_t) -> float32x4_t {
+    static_assert_uimm_bits!(LANE, 1);
+    unsafe { simd_mul(a, vdupq_lane_f32::<LANE>(b)) }
 }
-#[doc = "8-bit integer matrix multiply-accumulate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmmlaq_s32)"]
+#[doc = "Floating-point multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_laneq_f32)"]
 #[inline]
-#[target_feature(enable = "neon,i8mm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smmla)
+    assert_instr(fmul, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
-    unstable(feature = "stdarch_neon_i8mm", issue = "117223")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmmlaq_s32(a: int32x4_t, b: int8x16_t, c: int8x16_t) -> int32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.smmla.v4i32.v16i8"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.smmla.v4i32.v16i8")]
-        fn _vmmlaq_s32(a: int32x4_t, b: int8x16_t, c: int8x16_t) -> int32x4_t;
-    }
-    unsafe { _vmmlaq_s32(a, b, c) }
+pub fn vmulq_laneq_f32<const LANE: i32>(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    unsafe { simd_mul(a, vdupq_laneq_f32::<LANE>(b)) }
 }
-#[doc = "8-bit integer matrix multiply-accumulate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmmlaq_u32)"]
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_lane_s16)"]
 #[inline]
-#[target_feature(enable = "neon,i8mm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(ummla)
+    assert_instr(mul, LANE = 1)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
-    unstable(feature = "stdarch_neon_i8mm", issue = "117223")
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmmlaq_u32(a: uint32x4_t, b: uint8x16_t, c: uint8x16_t) -> uint32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.ummla.v4i32.v16i8"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.ummla.v4i32.v16i8")]
-        fn _vmmlaq_u32(a: uint32x4_t, b: uint8x16_t, c: uint8x16_t) -> uint32x4_t;
-    }
-    unsafe { _vmmlaq_u32(a, b, c) }
+pub fn vmul_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    unsafe { simd_mul(a, vdup_lane_s16::<LANE>(b)) }
 }
-#[doc = "Duplicate element to vector"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_f16)"]
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_lane_s16)"]
 #[inline]
+#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    assert_instr(mul, LANE = 1)
 )]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vmov_n_f16(a: f16) -> float16x4_t {
-    vdup_n_f16(a)
-}
-#[doc = "Duplicate element to vector"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_f16)"]
-#[inline]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vmovq_n_f16(a: f16) -> float16x8_t {
-    vdupq_n_f16(a)
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vmulq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x4_t) -> int16x8_t {
+    static_assert_uimm_bits!(LANE, 2);
+    unsafe { simd_mul(a, vdupq_lane_s16::<LANE>(b)) }
 }
-#[doc = "Duplicate vector element to vector or scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_f32)"]
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_lane_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    assert_instr(mul, LANE = 1)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -31743,19 +28659,21 @@ pub fn vmovq_n_f16(a: f16) -> float16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmov_n_f32(value: f32) -> float32x2_t {
-    vdup_n_f32(value)
+pub fn vmul_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    unsafe { simd_mul(a, vdup_lane_s32::<LANE>(b)) }
 }
-#[doc = "Duplicate vector element to vector or scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_p16)"]
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_lane_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    assert_instr(mul, LANE = 1)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -31764,19 +28682,21 @@ pub fn vmov_n_f32(value: f32) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmov_n_p16(value: p16) -> poly16x4_t {
-    vdup_n_p16(value)
+pub fn vmulq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x2_t) -> int32x4_t {
+    static_assert_uimm_bits!(LANE, 1);
+    unsafe { simd_mul(a, vdupq_lane_s32::<LANE>(b)) }
 }
-#[doc = "Duplicate vector element to vector or scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_p8)"]
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_lane_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    assert_instr(mul, LANE = 1)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -31785,19 +28705,21 @@ pub fn vmov_n_p16(value: p16) -> poly16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmov_n_p8(value: p8) -> poly8x8_t {
-    vdup_n_p8(value)
+pub fn vmul_lane_u16<const LANE: i32>(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    unsafe { simd_mul(a, vdup_lane_u16::<LANE>(b)) }
 }
-#[doc = "Duplicate vector element to vector or scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_s16)"]
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_lane_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    assert_instr(mul, LANE = 1)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -31806,19 +28728,21 @@ pub fn vmov_n_p8(value: p8) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmov_n_s16(value: i16) -> int16x4_t {
-    vdup_n_s16(value)
+pub fn vmulq_lane_u16<const LANE: i32>(a: uint16x8_t, b: uint16x4_t) -> uint16x8_t {
+    static_assert_uimm_bits!(LANE, 2);
+    unsafe { simd_mul(a, vdupq_lane_u16::<LANE>(b)) }
 }
-#[doc = "Duplicate vector element to vector or scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_s32)"]
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_lane_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    assert_instr(mul, LANE = 1)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -31827,19 +28751,21 @@ pub fn vmov_n_s16(value: i16) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmov_n_s32(value: i32) -> int32x2_t {
-    vdup_n_s32(value)
+pub fn vmul_lane_u32<const LANE: i32>(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    unsafe { simd_mul(a, vdup_lane_u32::<LANE>(b)) }
 }
-#[doc = "Duplicate vector element to vector or scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_s64)"]
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_lane_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmov)
+    assert_instr(mul, LANE = 1)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -31848,19 +28774,21 @@ pub fn vmov_n_s32(value: i32) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmov_n_s64(value: i64) -> int64x1_t {
-    vdup_n_s64(value)
+pub fn vmulq_lane_u32<const LANE: i32>(a: uint32x4_t, b: uint32x2_t) -> uint32x4_t {
+    static_assert_uimm_bits!(LANE, 1);
+    unsafe { simd_mul(a, vdupq_lane_u32::<LANE>(b)) }
 }
-#[doc = "Duplicate vector element to vector or scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_s8)"]
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_laneq_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    assert_instr(mul, LANE = 1)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -31869,19 +28797,21 @@ pub fn vmov_n_s64(value: i64) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmov_n_s8(value: i8) -> int8x8_t {
-    vdup_n_s8(value)
+pub fn vmul_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x8_t) -> int16x4_t {
+    static_assert_uimm_bits!(LANE, 3);
+    unsafe { simd_mul(a, vdup_laneq_s16::<LANE>(b)) }
 }
-#[doc = "Duplicate vector element to vector or scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_u16)"]
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_laneq_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    assert_instr(mul, LANE = 1)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -31890,19 +28820,21 @@ pub fn vmov_n_s8(value: i8) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmov_n_u16(value: u16) -> uint16x4_t {
-    vdup_n_u16(value)
+pub fn vmulq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    static_assert_uimm_bits!(LANE, 3);
+    unsafe { simd_mul(a, vdupq_laneq_s16::<LANE>(b)) }
 }
-#[doc = "Duplicate vector element to vector or scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_u32)"]
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_laneq_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    assert_instr(mul, LANE = 1)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -31911,19 +28843,21 @@ pub fn vmov_n_u16(value: u16) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmov_n_u32(value: u32) -> uint32x2_t {
-    vdup_n_u32(value)
+pub fn vmul_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x4_t) -> int32x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    unsafe { simd_mul(a, vdup_laneq_s32::<LANE>(b)) }
 }
-#[doc = "Duplicate vector element to vector or scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_u64)"]
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_laneq_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmov)
+    assert_instr(mul, LANE = 1)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -31932,19 +28866,21 @@ pub fn vmov_n_u32(value: u32) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmov_n_u64(value: u64) -> uint64x1_t {
-    vdup_n_u64(value)
+pub fn vmulq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    unsafe { simd_mul(a, vdupq_laneq_s32::<LANE>(b)) }
 }
-#[doc = "Duplicate vector element to vector or scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmov_n_u8)"]
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_laneq_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    assert_instr(mul, LANE = 1)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -31953,19 +28889,21 @@ pub fn vmov_n_u64(value: u64) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmov_n_u8(value: u8) -> uint8x8_t {
-    vdup_n_u8(value)
+pub fn vmul_laneq_u16<const LANE: i32>(a: uint16x4_t, b: uint16x8_t) -> uint16x4_t {
+    static_assert_uimm_bits!(LANE, 3);
+    unsafe { simd_mul(a, vdup_laneq_u16::<LANE>(b)) }
 }
-#[doc = "Duplicate vector element to vector or scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_f32)"]
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_laneq_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    assert_instr(mul, LANE = 1)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -31974,19 +28912,21 @@ pub fn vmov_n_u8(value: u8) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmovq_n_f32(value: f32) -> float32x4_t {
-    vdupq_n_f32(value)
+pub fn vmulq_laneq_u16<const LANE: i32>(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    static_assert_uimm_bits!(LANE, 3);
+    unsafe { simd_mul(a, vdupq_laneq_u16::<LANE>(b)) }
 }
-#[doc = "Duplicate vector element to vector or scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_p16)"]
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_laneq_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    assert_instr(mul, LANE = 1)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -31995,19 +28935,21 @@ pub fn vmovq_n_f32(value: f32) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmovq_n_p16(value: p16) -> poly16x8_t {
-    vdupq_n_p16(value)
+pub fn vmul_laneq_u32<const LANE: i32>(a: uint32x2_t, b: uint32x4_t) -> uint32x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    unsafe { simd_mul(a, vdup_laneq_u32::<LANE>(b)) }
 }
-#[doc = "Duplicate vector element to vector or scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_p8)"]
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_laneq_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    assert_instr(mul, LANE = 1)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -32016,18 +28958,49 @@ pub fn vmovq_n_p16(value: p16) -> poly16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmovq_n_p8(value: p8) -> poly8x16_t {
-    vdupq_n_p8(value)
+pub fn vmulq_laneq_u32<const LANE: i32>(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    unsafe { simd_mul(a, vdupq_laneq_u32::<LANE>(b)) }
 }
-#[doc = "Duplicate vector element to vector or scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_s16)"]
+#[doc = "Vector multiply by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_n_f16)"]
+#[inline]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(fmul)
+)]
+#[target_feature(enable = "neon,fp16")]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vmul_n_f16(a: float16x4_t, b: f16) -> float16x4_t {
+    unsafe { simd_mul(a, vdup_n_f16(b)) }
+}
+#[doc = "Vector multiply by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_n_f16)"]
+#[inline]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(fmul)
+)]
+#[target_feature(enable = "neon,fp16")]
+#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vmulq_n_f16(a: float16x8_t, b: f16) -> float16x8_t {
+    unsafe { simd_mul(a, vdupq_n_f16(b)) }
+}
+#[doc = "Vector multiply by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_n_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    assert_instr(fmul)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -32037,18 +29010,18 @@ pub fn vmovq_n_p8(value: p8) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmovq_n_s16(value: i16) -> int16x8_t {
-    vdupq_n_s16(value)
+pub fn vmul_n_f32(a: float32x2_t, b: f32) -> float32x2_t {
+    unsafe { simd_mul(a, vdup_n_f32(b)) }
 }
-#[doc = "Duplicate vector element to vector or scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_s32)"]
+#[doc = "Vector multiply by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_n_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    assert_instr(fmul)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -32058,18 +29031,18 @@ pub fn vmovq_n_s16(value: i16) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmovq_n_s32(value: i32) -> int32x4_t {
-    vdupq_n_s32(value)
+pub fn vmulq_n_f32(a: float32x4_t, b: f32) -> float32x4_t {
+    unsafe { simd_mul(a, vdupq_n_f32(b)) }
 }
-#[doc = "Duplicate vector element to vector or scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_s64)"]
+#[doc = "Vector multiply by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_n_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    assert_instr(mul)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -32079,18 +29052,18 @@ pub fn vmovq_n_s32(value: i32) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmovq_n_s64(value: i64) -> int64x2_t {
-    vdupq_n_s64(value)
+pub fn vmul_n_s16(a: int16x4_t, b: i16) -> int16x4_t {
+    unsafe { simd_mul(a, vdup_n_s16(b)) }
 }
-#[doc = "Duplicate vector element to vector or scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_s8)"]
+#[doc = "Vector multiply by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_n_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    assert_instr(mul)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -32100,18 +29073,18 @@ pub fn vmovq_n_s64(value: i64) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmovq_n_s8(value: i8) -> int8x16_t {
-    vdupq_n_s8(value)
+pub fn vmulq_n_s16(a: int16x8_t, b: i16) -> int16x8_t {
+    unsafe { simd_mul(a, vdupq_n_s16(b)) }
 }
-#[doc = "Duplicate vector element to vector or scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_u16)"]
+#[doc = "Vector multiply by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_n_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    assert_instr(mul)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -32121,18 +29094,18 @@ pub fn vmovq_n_s8(value: i8) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmovq_n_u16(value: u16) -> uint16x8_t {
-    vdupq_n_u16(value)
+pub fn vmul_n_s32(a: int32x2_t, b: i32) -> int32x2_t {
+    unsafe { simd_mul(a, vdup_n_s32(b)) }
 }
-#[doc = "Duplicate vector element to vector or scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_u32)"]
+#[doc = "Vector multiply by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_n_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    assert_instr(mul)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -32142,18 +29115,18 @@ pub fn vmovq_n_u16(value: u16) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmovq_n_u32(value: u32) -> uint32x4_t {
-    vdupq_n_u32(value)
+pub fn vmulq_n_s32(a: int32x4_t, b: i32) -> int32x4_t {
+    unsafe { simd_mul(a, vdupq_n_s32(b)) }
 }
-#[doc = "Duplicate vector element to vector or scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_u64)"]
+#[doc = "Vector multiply by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_n_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    assert_instr(mul)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -32163,18 +29136,18 @@ pub fn vmovq_n_u32(value: u32) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmovq_n_u64(value: u64) -> uint64x2_t {
-    vdupq_n_u64(value)
+pub fn vmul_n_u16(a: uint16x4_t, b: u16) -> uint16x4_t {
+    unsafe { simd_mul(a, vdup_n_u16(b)) }
 }
-#[doc = "Duplicate vector element to vector or scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovq_n_u8)"]
+#[doc = "Vector multiply by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_n_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(dup)
+    assert_instr(mul)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -32184,18 +29157,18 @@ pub fn vmovq_n_u64(value: u64) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmovq_n_u8(value: u8) -> uint8x16_t {
-    vdupq_n_u8(value)
+pub fn vmulq_n_u16(a: uint16x8_t, b: u16) -> uint16x8_t {
+    unsafe { simd_mul(a, vdupq_n_u16(b)) }
 }
-#[doc = "Vector long move."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovl_s16)"]
+#[doc = "Vector multiply by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_n_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sxtl)
+    assert_instr(mul)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -32205,18 +29178,18 @@ pub fn vmovq_n_u8(value: u8) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmovl_s16(a: int16x4_t) -> int32x4_t {
-    unsafe { simd_cast(a) }
+pub fn vmul_n_u32(a: uint32x2_t, b: u32) -> uint32x2_t {
+    unsafe { simd_mul(a, vdup_n_u32(b)) }
 }
-#[doc = "Vector long move."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovl_s32)"]
+#[doc = "Vector multiply by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_n_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sxtl)
+    assert_instr(mul)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -32226,18 +29199,18 @@ pub fn vmovl_s16(a: int16x4_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmovl_s32(a: int32x2_t) -> int64x2_t {
-    unsafe { simd_cast(a) }
+pub fn vmulq_n_u32(a: uint32x4_t, b: u32) -> uint32x4_t {
+    unsafe { simd_mul(a, vdupq_n_u32(b)) }
 }
-#[doc = "Vector long move."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovl_s8)"]
+#[doc = "Polynomial multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_p8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sxtl)
+    assert_instr(pmul)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -32247,18 +29220,26 @@ pub fn vmovl_s32(a: int32x2_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmovl_s8(a: int8x8_t) -> int16x8_t {
-    unsafe { simd_cast(a) }
+pub fn vmul_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmulp.v8i8")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.pmul.v8i8"
+        )]
+        fn _vmul_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t;
+    }
+    unsafe { _vmul_p8(a, b) }
 }
-#[doc = "Vector long move."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovl_u16)"]
+#[doc = "Polynomial multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_p8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uxtl)
+    assert_instr(pmul)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -32268,18 +29249,26 @@ pub fn vmovl_s8(a: int8x8_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmovl_u16(a: uint16x4_t) -> uint32x4_t {
-    unsafe { simd_cast(a) }
+pub fn vmulq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmulp.v16i8")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.pmul.v16i8"
+        )]
+        fn _vmulq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t;
+    }
+    unsafe { _vmulq_p8(a, b) }
 }
-#[doc = "Vector long move."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovl_u32)"]
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uxtl)
+    assert_instr(mul)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -32289,18 +29278,18 @@ pub fn vmovl_u16(a: uint16x4_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmovl_u32(a: uint32x2_t) -> uint64x2_t {
-    unsafe { simd_cast(a) }
+pub fn vmul_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    unsafe { simd_mul(a, b) }
 }
-#[doc = "Vector long move."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovl_u8)"]
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uxtl)
+    assert_instr(mul)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -32310,18 +29299,18 @@ pub fn vmovl_u32(a: uint32x2_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmovl_u8(a: uint8x8_t) -> uint16x8_t {
-    unsafe { simd_cast(a) }
+pub fn vmulq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    unsafe { simd_mul(a, b) }
 }
-#[doc = "Vector narrow integer."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovn_s16)"]
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(xtn)
+    assert_instr(mul)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -32331,18 +29320,18 @@ pub fn vmovl_u8(a: uint8x8_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmovn_s16(a: int16x8_t) -> int8x8_t {
-    unsafe { simd_cast(a) }
+pub fn vmul_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    unsafe { simd_mul(a, b) }
 }
-#[doc = "Vector narrow integer."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovn_s32)"]
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(xtn)
+    assert_instr(mul)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -32352,18 +29341,18 @@ pub fn vmovn_s16(a: int16x8_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmovn_s32(a: int32x4_t) -> int16x4_t {
-    unsafe { simd_cast(a) }
+pub fn vmulq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    unsafe { simd_mul(a, b) }
 }
-#[doc = "Vector narrow integer."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovn_s64)"]
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(xtn)
+    assert_instr(mul)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -32373,18 +29362,18 @@ pub fn vmovn_s32(a: int32x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmovn_s64(a: int64x2_t) -> int32x2_t {
-    unsafe { simd_cast(a) }
+pub fn vmul_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    unsafe { simd_mul(a, b) }
 }
-#[doc = "Vector narrow integer."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovn_u16)"]
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(xtn)
+    assert_instr(mul)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -32394,18 +29383,18 @@ pub fn vmovn_s64(a: int64x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmovn_u16(a: uint16x8_t) -> uint8x8_t {
-    unsafe { simd_cast(a) }
+pub fn vmulq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    unsafe { simd_mul(a, b) }
 }
-#[doc = "Vector narrow integer."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovn_u32)"]
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(xtn)
+    assert_instr(mul)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -32415,18 +29404,18 @@ pub fn vmovn_u16(a: uint16x8_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmovn_u32(a: uint32x4_t) -> uint16x4_t {
-    unsafe { simd_cast(a) }
+pub fn vmul_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    unsafe { simd_mul(a, b) }
 }
-#[doc = "Vector narrow integer."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmovn_u64)"]
+#[doc = "Multiply"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(xtn)
+    assert_instr(mul)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -32436,48 +29425,39 @@ pub fn vmovn_u32(a: uint32x4_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmovn_u64(a: uint64x2_t) -> uint32x2_t {
-    unsafe { simd_cast(a) }
+pub fn vmulq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    unsafe { simd_mul(a, b) }
 }
 #[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_s8)"]
 #[inline]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.f16"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmul)
+    assert_instr(mul)
 )]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vmul_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
-    unsafe { simd_mul(a, b) }
-}
-#[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_f16)"]
-#[inline]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.f16"))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmul)
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vmulq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vmul_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     unsafe { simd_mul(a, b) }
 }
 #[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.f32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmul)
+    assert_instr(mul)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -32487,18 +29467,18 @@ pub fn vmulq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmul_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+pub fn vmulq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     unsafe { simd_mul(a, b) }
 }
 #[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.f32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmul)
+    assert_instr(mul)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -32508,75 +29488,39 @@ pub fn vmul_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmulq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+pub fn vmul_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     unsafe { simd_mul(a, b) }
 }
 #[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_lane_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_u8)"]
 #[inline]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmul, LANE = 1)
+    assert_instr(mul)
 )]
-#[rustc_legacy_const_generics(2)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vmul_lane_f16<const LANE: i32>(a: float16x4_t, v: float16x4_t) -> float16x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        simd_mul(
-            a,
-            simd_shuffle!(v, v, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
-}
-#[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_lane_f16)"]
-#[inline]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmul, LANE = 1)
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
-#[rustc_legacy_const_generics(2)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vmulq_lane_f16<const LANE: i32>(a: float16x8_t, v: float16x4_t) -> float16x8_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        simd_mul(
-            a,
-            simd_shuffle!(
-                v,
-                v,
-                [
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32
-                ]
-            ),
-        )
-    }
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vmulq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    unsafe { simd_mul(a, b) }
 }
-#[doc = "Floating-point multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_lane_f32)"]
+#[doc = "Vector long multiply by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_lane_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmul, LANE = 0)
+    assert_instr(smull, LANE = 1)
 )]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(
@@ -32587,19 +29531,19 @@ pub fn vmulq_lane_f16<const LANE: i32>(a: float16x8_t, v: float16x4_t) -> float1
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmul_lane_f32<const LANE: i32>(a: float32x2_t, b: float32x2_t) -> float32x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    unsafe { simd_mul(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32])) }
+pub fn vmull_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t) -> int32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmull_s16(a, vdup_lane_s16::<LANE>(b))
 }
-#[doc = "Floating-point multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_laneq_f32)"]
+#[doc = "Vector long multiply by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_laneq_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmul, LANE = 0)
+    assert_instr(smull, LANE = 1)
 )]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(
@@ -32610,19 +29554,19 @@ pub fn vmul_lane_f32<const LANE: i32>(a: float32x2_t, b: float32x2_t) -> float32
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmul_laneq_f32<const LANE: i32>(a: float32x2_t, b: float32x4_t) -> float32x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe { simd_mul(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32])) }
+pub fn vmull_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x8_t) -> int32x4_t {
+    static_assert_uimm_bits!(LANE, 3);
+    vmull_s16(a, vdup_laneq_s16::<LANE>(b))
 }
-#[doc = "Floating-point multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_lane_f32)"]
+#[doc = "Vector long multiply by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_lane_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmul, LANE = 0)
+    assert_instr(smull, LANE = 1)
 )]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(
@@ -32633,24 +29577,19 @@ pub fn vmul_laneq_f32<const LANE: i32>(a: float32x2_t, b: float32x4_t) -> float3
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmulq_lane_f32<const LANE: i32>(a: float32x4_t, b: float32x2_t) -> float32x4_t {
+pub fn vmull_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t) -> int64x2_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        simd_mul(
-            a,
-            simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+    vmull_s32(a, vdup_lane_s32::<LANE>(b))
 }
-#[doc = "Floating-point multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_laneq_f32)"]
+#[doc = "Vector long multiply by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_laneq_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmul, LANE = 0)
+    assert_instr(smull, LANE = 1)
 )]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(
@@ -32661,24 +29600,19 @@ pub fn vmulq_lane_f32<const LANE: i32>(a: float32x4_t, b: float32x2_t) -> float3
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmulq_laneq_f32<const LANE: i32>(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+pub fn vmull_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x4_t) -> int64x2_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        simd_mul(
-            a,
-            simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+    vmull_s32(a, vdup_laneq_s32::<LANE>(b))
 }
-#[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_lane_s16)"]
+#[doc = "Vector long multiply by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_lane_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul, LANE = 1)
+    assert_instr(umull, LANE = 1)
 )]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(
@@ -32689,24 +29623,19 @@ pub fn vmulq_laneq_f32<const LANE: i32>(a: float32x4_t, b: float32x4_t) -> float
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmul_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+pub fn vmull_lane_u16<const LANE: i32>(a: uint16x4_t, b: uint16x4_t) -> uint32x4_t {
     static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        simd_mul(
-            a,
-            simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+    vmull_u16(a, vdup_lane_u16::<LANE>(b))
 }
-#[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_lane_s16)"]
+#[doc = "Vector long multiply by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_laneq_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul, LANE = 1)
+    assert_instr(umull, LANE = 1)
 )]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(
@@ -32717,37 +29646,19 @@ pub fn vmul_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmulq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x4_t) -> int16x8_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        simd_mul(
-            a,
-            simd_shuffle!(
-                b,
-                b,
-                [
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32
-                ]
-            ),
-        )
-    }
-}
-#[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_lane_s32)"]
+pub fn vmull_laneq_u16<const LANE: i32>(a: uint16x4_t, b: uint16x8_t) -> uint32x4_t {
+    static_assert_uimm_bits!(LANE, 3);
+    vmull_u16(a, vdup_laneq_u16::<LANE>(b))
+}
+#[doc = "Vector long multiply by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_lane_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul, LANE = 1)
+    assert_instr(umull, LANE = 1)
 )]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(
@@ -32758,19 +29669,19 @@ pub fn vmulq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x4_t) -> int16x8_t
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmul_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+pub fn vmull_lane_u32<const LANE: i32>(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe { simd_mul(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32])) }
+    vmull_u32(a, vdup_lane_u32::<LANE>(b))
 }
-#[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_lane_s32)"]
+#[doc = "Vector long multiply by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_laneq_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul, LANE = 1)
+    assert_instr(umull, LANE = 1)
 )]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(
@@ -32781,26 +29692,20 @@ pub fn vmul_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmulq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x2_t) -> int32x4_t {
-    static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        simd_mul(
-            a,
-            simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+pub fn vmull_laneq_u32<const LANE: i32>(a: uint32x2_t, b: uint32x4_t) -> uint64x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vmull_u32(a, vdup_laneq_u32::<LANE>(b))
 }
-#[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_lane_u16)"]
+#[doc = "Vector long multiply with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_n_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul, LANE = 1)
+    assert_instr(smull)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -32809,26 +29714,19 @@ pub fn vmulq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x2_t) -> int32x4_t
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmul_lane_u16<const LANE: i32>(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        simd_mul(
-            a,
-            simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+pub fn vmull_n_s16(a: int16x4_t, b: i16) -> int32x4_t {
+    vmull_s16(a, vdup_n_s16(b))
 }
-#[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_lane_u16)"]
+#[doc = "Vector long multiply with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_n_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul, LANE = 1)
+    assert_instr(smull)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -32837,39 +29735,19 @@ pub fn vmul_lane_u16<const LANE: i32>(a: uint16x4_t, b: uint16x4_t) -> uint16x4_
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmulq_lane_u16<const LANE: i32>(a: uint16x8_t, b: uint16x4_t) -> uint16x8_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        simd_mul(
-            a,
-            simd_shuffle!(
-                b,
-                b,
-                [
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32
-                ]
-            ),
-        )
-    }
+pub fn vmull_n_s32(a: int32x2_t, b: i32) -> int64x2_t {
+    vmull_s32(a, vdup_n_s32(b))
 }
-#[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_lane_u32)"]
+#[doc = "Vector long multiply with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_n_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul, LANE = 1)
+    assert_instr(umull)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -32878,21 +29756,19 @@ pub fn vmulq_lane_u16<const LANE: i32>(a: uint16x8_t, b: uint16x4_t) -> uint16x8
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmul_lane_u32<const LANE: i32>(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    unsafe { simd_mul(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32])) }
+pub fn vmull_n_u16(a: uint16x4_t, b: u16) -> uint32x4_t {
+    vmull_u16(a, vdup_n_u16(b))
 }
-#[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_lane_u32)"]
+#[doc = "Vector long multiply with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_n_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul, LANE = 1)
+    assert_instr(umull)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -32901,26 +29777,19 @@ pub fn vmul_lane_u32<const LANE: i32>(a: uint32x2_t, b: uint32x2_t) -> uint32x2_
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmulq_lane_u32<const LANE: i32>(a: uint32x4_t, b: uint32x2_t) -> uint32x4_t {
-    static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        simd_mul(
-            a,
-            simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+pub fn vmull_n_u32(a: uint32x2_t, b: u32) -> uint64x2_t {
+    vmull_u32(a, vdup_n_u32(b))
 }
-#[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_laneq_s16)"]
+#[doc = "Polynomial multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_p8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.p8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul, LANE = 1)
+    assert_instr(pmull)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -32929,26 +29798,27 @@ pub fn vmulq_lane_u32<const LANE: i32>(a: uint32x4_t, b: uint32x2_t) -> uint32x4
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmul_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x8_t) -> int16x4_t {
-    static_assert_uimm_bits!(LANE, 3);
-    unsafe {
-        simd_mul(
-            a,
-            simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
+pub fn vmull_p8(a: poly8x8_t, b: poly8x8_t) -> poly16x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.pmull.v8i16"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmullp.v8i16")]
+        fn _vmull_p8(a: poly8x8_t, b: poly8x8_t) -> poly16x8_t;
     }
+    unsafe { _vmull_p8(a, b) }
 }
-#[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_laneq_s16)"]
+#[doc = "Signed multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.s16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul, LANE = 1)
+    assert_instr(smull)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -32957,39 +29827,19 @@ pub fn vmul_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x8_t) -> int16x4_t
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmulq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    static_assert_uimm_bits!(LANE, 3);
-    unsafe {
-        simd_mul(
-            a,
-            simd_shuffle!(
-                b,
-                b,
-                [
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32
-                ]
-            ),
-        )
-    }
+pub fn vmull_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t {
+    unsafe { simd_mul(simd_cast(a), simd_cast(b)) }
 }
-#[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_laneq_s32)"]
+#[doc = "Signed multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.s32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul, LANE = 1)
+    assert_instr(smull)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -32998,21 +29848,19 @@ pub fn vmulq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_t
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmul_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x4_t) -> int32x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe { simd_mul(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32])) }
+pub fn vmull_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t {
+    unsafe { simd_mul(simd_cast(a), simd_cast(b)) }
 }
-#[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_laneq_s32)"]
+#[doc = "Signed multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.s8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul, LANE = 1)
+    assert_instr(smull)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -33021,26 +29869,19 @@ pub fn vmul_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x4_t) -> int32x2_t
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmulq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        simd_mul(
-            a,
-            simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+pub fn vmull_s8(a: int8x8_t, b: int8x8_t) -> int16x8_t {
+    unsafe { simd_mul(simd_cast(a), simd_cast(b)) }
 }
-#[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_laneq_u16)"]
+#[doc = "Unsigned multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.u8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul, LANE = 1)
+    assert_instr(umull)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -33049,26 +29890,19 @@ pub fn vmulq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_t
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmul_laneq_u16<const LANE: i32>(a: uint16x4_t, b: uint16x8_t) -> uint16x4_t {
-    static_assert_uimm_bits!(LANE, 3);
-    unsafe {
-        simd_mul(
-            a,
-            simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+pub fn vmull_u8(a: uint8x8_t, b: uint8x8_t) -> uint16x8_t {
+    unsafe { simd_mul(simd_cast(a), simd_cast(b)) }
 }
-#[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_laneq_u16)"]
+#[doc = "Unsigned multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.u16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul, LANE = 1)
+    assert_instr(umull)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -33077,39 +29911,19 @@ pub fn vmul_laneq_u16<const LANE: i32>(a: uint16x4_t, b: uint16x8_t) -> uint16x4
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmulq_laneq_u16<const LANE: i32>(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    static_assert_uimm_bits!(LANE, 3);
-    unsafe {
-        simd_mul(
-            a,
-            simd_shuffle!(
-                b,
-                b,
-                [
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32,
-                    LANE as u32
-                ]
-            ),
-        )
-    }
+pub fn vmull_u16(a: uint16x4_t, b: uint16x4_t) -> uint32x4_t {
+    unsafe { simd_mul(simd_cast(a), simd_cast(b)) }
 }
-#[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_laneq_u32)"]
+#[doc = "Unsigned multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.u32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul, LANE = 1)
+    assert_instr(umull)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -33118,21 +29932,19 @@ pub fn vmulq_laneq_u16<const LANE: i32>(a: uint16x8_t, b: uint16x8_t) -> uint16x
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmul_laneq_u32<const LANE: i32>(a: uint32x2_t, b: uint32x4_t) -> uint32x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe { simd_mul(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32])) }
+pub fn vmull_u32(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t {
+    unsafe { simd_mul(simd_cast(a), simd_cast(b)) }
 }
-#[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_laneq_u32)"]
+#[doc = "Vector bitwise not."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvn_p8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul, LANE = 1)
+    assert_instr(mvn)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -33141,54 +29953,41 @@ pub fn vmul_laneq_u32<const LANE: i32>(a: uint32x2_t, b: uint32x4_t) -> uint32x2
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmulq_laneq_u32<const LANE: i32>(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        simd_mul(
-            a,
-            simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+pub fn vmvn_p8(a: poly8x8_t) -> poly8x8_t {
+    let b = poly8x8_t::splat(255);
+    unsafe { simd_xor(a, b) }
 }
-#[doc = "Vector multiply by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_n_f16)"]
+#[doc = "Vector bitwise not."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvn_s16)"]
 #[inline]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmul)
+    assert_instr(mvn)
 )]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vmul_n_f16(a: float16x4_t, b: f16) -> float16x4_t {
-    unsafe { simd_mul(a, vdup_n_f16(b)) }
-}
-#[doc = "Vector multiply by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_n_f16)"]
-#[inline]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmul)
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vmulq_n_f16(a: float16x8_t, b: f16) -> float16x8_t {
-    unsafe { simd_mul(a, vdupq_n_f16(b)) }
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vmvn_s16(a: int16x4_t) -> int16x4_t {
+    let b = int16x4_t::splat(-1);
+    unsafe { simd_xor(a, b) }
 }
-#[doc = "Vector multiply by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_n_f32)"]
+#[doc = "Vector bitwise not."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvn_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmul)
+    assert_instr(mvn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -33198,18 +29997,19 @@ pub fn vmulq_n_f16(a: float16x8_t, b: f16) -> float16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmul_n_f32(a: float32x2_t, b: f32) -> float32x2_t {
-    unsafe { simd_mul(a, vdup_n_f32(b)) }
+pub fn vmvn_s32(a: int32x2_t) -> int32x2_t {
+    let b = int32x2_t::splat(-1);
+    unsafe { simd_xor(a, b) }
 }
-#[doc = "Vector multiply by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_n_f32)"]
+#[doc = "Vector bitwise not."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvn_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmul)
+    assert_instr(mvn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -33219,18 +30019,19 @@ pub fn vmul_n_f32(a: float32x2_t, b: f32) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmulq_n_f32(a: float32x4_t, b: f32) -> float32x4_t {
-    unsafe { simd_mul(a, vdupq_n_f32(b)) }
+pub fn vmvn_s8(a: int8x8_t) -> int8x8_t {
+    let b = int8x8_t::splat(-1);
+    unsafe { simd_xor(a, b) }
 }
-#[doc = "Vector multiply by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_n_s16)"]
+#[doc = "Vector bitwise not."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvn_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul)
+    assert_instr(mvn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -33240,18 +30041,19 @@ pub fn vmulq_n_f32(a: float32x4_t, b: f32) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmul_n_s16(a: int16x4_t, b: i16) -> int16x4_t {
-    unsafe { simd_mul(a, vdup_n_s16(b)) }
+pub fn vmvn_u16(a: uint16x4_t) -> uint16x4_t {
+    let b = uint16x4_t::splat(65_535);
+    unsafe { simd_xor(a, b) }
 }
-#[doc = "Vector multiply by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_n_s16)"]
+#[doc = "Vector bitwise not."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvn_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul)
+    assert_instr(mvn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -33261,18 +30063,19 @@ pub fn vmul_n_s16(a: int16x4_t, b: i16) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmulq_n_s16(a: int16x8_t, b: i16) -> int16x8_t {
-    unsafe { simd_mul(a, vdupq_n_s16(b)) }
+pub fn vmvn_u32(a: uint32x2_t) -> uint32x2_t {
+    let b = uint32x2_t::splat(4_294_967_295);
+    unsafe { simd_xor(a, b) }
 }
-#[doc = "Vector multiply by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_n_s32)"]
+#[doc = "Vector bitwise not."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvn_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul)
+    assert_instr(mvn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -33282,18 +30085,19 @@ pub fn vmulq_n_s16(a: int16x8_t, b: i16) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmul_n_s32(a: int32x2_t, b: i32) -> int32x2_t {
-    unsafe { simd_mul(a, vdup_n_s32(b)) }
+pub fn vmvn_u8(a: uint8x8_t) -> uint8x8_t {
+    let b = uint8x8_t::splat(255);
+    unsafe { simd_xor(a, b) }
 }
-#[doc = "Vector multiply by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_n_s32)"]
+#[doc = "Vector bitwise not."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvnq_p8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul)
+    assert_instr(mvn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -33303,18 +30107,19 @@ pub fn vmul_n_s32(a: int32x2_t, b: i32) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmulq_n_s32(a: int32x4_t, b: i32) -> int32x4_t {
-    unsafe { simd_mul(a, vdupq_n_s32(b)) }
+pub fn vmvnq_p8(a: poly8x16_t) -> poly8x16_t {
+    let b = poly8x16_t::splat(255);
+    unsafe { simd_xor(a, b) }
 }
-#[doc = "Vector multiply by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_n_u16)"]
+#[doc = "Vector bitwise not."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvnq_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul)
+    assert_instr(mvn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -33324,18 +30129,19 @@ pub fn vmulq_n_s32(a: int32x4_t, b: i32) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmul_n_u16(a: uint16x4_t, b: u16) -> uint16x4_t {
-    unsafe { simd_mul(a, vdup_n_u16(b)) }
+pub fn vmvnq_s16(a: int16x8_t) -> int16x8_t {
+    let b = int16x8_t::splat(-1);
+    unsafe { simd_xor(a, b) }
 }
-#[doc = "Vector multiply by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_n_u16)"]
+#[doc = "Vector bitwise not."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvnq_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul)
+    assert_instr(mvn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -33345,18 +30151,19 @@ pub fn vmul_n_u16(a: uint16x4_t, b: u16) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmulq_n_u16(a: uint16x8_t, b: u16) -> uint16x8_t {
-    unsafe { simd_mul(a, vdupq_n_u16(b)) }
+pub fn vmvnq_s32(a: int32x4_t) -> int32x4_t {
+    let b = int32x4_t::splat(-1);
+    unsafe { simd_xor(a, b) }
 }
-#[doc = "Vector multiply by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_n_u32)"]
+#[doc = "Vector bitwise not."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvnq_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul)
+    assert_instr(mvn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -33366,18 +30173,19 @@ pub fn vmulq_n_u16(a: uint16x8_t, b: u16) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmul_n_u32(a: uint32x2_t, b: u32) -> uint32x2_t {
-    unsafe { simd_mul(a, vdup_n_u32(b)) }
+pub fn vmvnq_s8(a: int8x16_t) -> int8x16_t {
+    let b = int8x16_t::splat(-1);
+    unsafe { simd_xor(a, b) }
 }
-#[doc = "Vector multiply by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_n_u32)"]
+#[doc = "Vector bitwise not."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvnq_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul)
+    assert_instr(mvn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -33387,18 +30195,19 @@ pub fn vmul_n_u32(a: uint32x2_t, b: u32) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmulq_n_u32(a: uint32x4_t, b: u32) -> uint32x4_t {
-    unsafe { simd_mul(a, vdupq_n_u32(b)) }
+pub fn vmvnq_u16(a: uint16x8_t) -> uint16x8_t {
+    let b = uint16x8_t::splat(65_535);
+    unsafe { simd_xor(a, b) }
 }
-#[doc = "Polynomial multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_p8)"]
+#[doc = "Vector bitwise not."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvnq_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(pmul)
+    assert_instr(mvn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -33408,26 +30217,19 @@ pub fn vmulq_n_u32(a: uint32x4_t, b: u32) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmul_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmulp.v8i8")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.pmul.v8i8"
-        )]
-        fn _vmul_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t;
-    }
-    unsafe { _vmul_p8(a, b) }
+pub fn vmvnq_u32(a: uint32x4_t) -> uint32x4_t {
+    let b = uint32x4_t::splat(4_294_967_295);
+    unsafe { simd_xor(a, b) }
 }
-#[doc = "Polynomial multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_p8)"]
+#[doc = "Vector bitwise not."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvnq_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(pmul)
+    assert_instr(mvn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -33437,68 +30239,63 @@ pub fn vmul_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmulq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmulp.v16i8")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.pmul.v16i8"
-        )]
-        fn _vmulq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t;
-    }
-    unsafe { _vmulq_p8(a, b) }
+pub fn vmvnq_u8(a: uint8x16_t) -> uint8x16_t {
+    let b = uint8x16_t::splat(255);
+    unsafe { simd_xor(a, b) }
 }
-#[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_s16)"]
+#[doc = "Negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vneg_f16)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i16"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.f16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul)
+    assert_instr(fneg)
 )]
+#[target_feature(enable = "neon,fp16")]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmul_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    unsafe { simd_mul(a, b) }
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vneg_f16(a: float16x4_t) -> float16x4_t {
+    unsafe { simd_neg(a) }
 }
-#[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_s16)"]
+#[doc = "Negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vnegq_f16)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i16"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.f16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul)
+    assert_instr(fneg)
 )]
+#[target_feature(enable = "neon,fp16")]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmulq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    unsafe { simd_mul(a, b) }
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vnegq_f16(a: float16x8_t) -> float16x8_t {
+    unsafe { simd_neg(a) }
 }
-#[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_u16)"]
+#[doc = "Negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vneg_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.f32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul)
+    assert_instr(fneg)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -33508,18 +30305,18 @@ pub fn vmulq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmul_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    unsafe { simd_mul(a, b) }
+pub fn vneg_f32(a: float32x2_t) -> float32x2_t {
+    unsafe { simd_neg(a) }
 }
-#[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_u16)"]
+#[doc = "Negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vnegq_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.f32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul)
+    assert_instr(fneg)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -33529,18 +30326,18 @@ pub fn vmul_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmulq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    unsafe { simd_mul(a, b) }
+pub fn vnegq_f32(a: float32x4_t) -> float32x4_t {
+    unsafe { simd_neg(a) }
 }
-#[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_s32)"]
+#[doc = "Negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vneg_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul)
+    assert_instr(neg)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -33550,18 +30347,18 @@ pub fn vmulq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmul_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    unsafe { simd_mul(a, b) }
+pub fn vneg_s8(a: int8x8_t) -> int8x8_t {
+    unsafe { simd_neg(a) }
 }
-#[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_s32)"]
+#[doc = "Negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vnegq_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul)
+    assert_instr(neg)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -33571,18 +30368,18 @@ pub fn vmul_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmulq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    unsafe { simd_mul(a, b) }
+pub fn vnegq_s8(a: int8x16_t) -> int8x16_t {
+    unsafe { simd_neg(a) }
 }
-#[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_u32)"]
+#[doc = "Negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vneg_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul)
+    assert_instr(neg)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -33592,18 +30389,18 @@ pub fn vmulq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmul_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    unsafe { simd_mul(a, b) }
+pub fn vneg_s16(a: int16x4_t) -> int16x4_t {
+    unsafe { simd_neg(a) }
 }
-#[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_u32)"]
+#[doc = "Negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vnegq_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul)
+    assert_instr(neg)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -33613,18 +30410,18 @@ pub fn vmul_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmulq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    unsafe { simd_mul(a, b) }
+pub fn vnegq_s16(a: int16x8_t) -> int16x8_t {
+    unsafe { simd_neg(a) }
 }
-#[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_s8)"]
+#[doc = "Negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vneg_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul)
+    assert_instr(neg)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -33634,18 +30431,18 @@ pub fn vmulq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmul_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    unsafe { simd_mul(a, b) }
+pub fn vneg_s32(a: int32x2_t) -> int32x2_t {
+    unsafe { simd_neg(a) }
 }
-#[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_s8)"]
+#[doc = "Negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vnegq_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul)
+    assert_instr(neg)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -33655,18 +30452,18 @@ pub fn vmul_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmulq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    unsafe { simd_mul(a, b) }
+pub fn vnegq_s32(a: int32x4_t) -> int32x4_t {
+    unsafe { simd_neg(a) }
 }
-#[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_u8)"]
+#[doc = "Vector bitwise inclusive OR NOT"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorn_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul)
+    assert_instr(orn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -33676,18 +30473,19 @@ pub fn vmulq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmul_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    unsafe { simd_mul(a, b) }
+pub fn vorn_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    let c = int16x4_t::splat(-1);
+    unsafe { simd_or(simd_xor(b, c), a) }
 }
-#[doc = "Multiply"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_u8)"]
+#[doc = "Vector bitwise inclusive OR NOT"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorn_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mul)
+    assert_instr(orn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -33697,20 +30495,20 @@ pub fn vmul_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmulq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
-    unsafe { simd_mul(a, b) }
+pub fn vorn_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    let c = int32x2_t::splat(-1);
+    unsafe { simd_or(simd_xor(b, c), a) }
 }
-#[doc = "Vector long multiply by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_lane_s16)"]
+#[doc = "Vector bitwise inclusive OR NOT"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorn_s64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smull, LANE = 1)
+    assert_instr(orn)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -33719,26 +30517,20 @@ pub fn vmulq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmull_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t) -> int32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        vmull_s16(
-            a,
-            simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+pub fn vorn_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
+    let c = int64x1_t::splat(-1);
+    unsafe { simd_or(simd_xor(b, c), a) }
 }
-#[doc = "Vector long multiply by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_laneq_s16)"]
+#[doc = "Vector bitwise inclusive OR NOT"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorn_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smull, LANE = 1)
+    assert_instr(orn)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -33747,26 +30539,20 @@ pub fn vmull_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t) -> int32x4_t
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmull_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x8_t) -> int32x4_t {
-    static_assert_uimm_bits!(LANE, 3);
-    unsafe {
-        vmull_s16(
-            a,
-            simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+pub fn vorn_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    let c = int8x8_t::splat(-1);
+    unsafe { simd_or(simd_xor(b, c), a) }
 }
-#[doc = "Vector long multiply by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_lane_s32)"]
+#[doc = "Vector bitwise inclusive OR NOT"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vornq_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smull, LANE = 1)
+    assert_instr(orn)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -33775,21 +30561,20 @@ pub fn vmull_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x8_t) -> int32x4_t
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmull_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t) -> int64x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    unsafe { vmull_s32(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32])) }
+pub fn vornq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    let c = int16x8_t::splat(-1);
+    unsafe { simd_or(simd_xor(b, c), a) }
 }
-#[doc = "Vector long multiply by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_laneq_s32)"]
+#[doc = "Vector bitwise inclusive OR NOT"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vornq_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smull, LANE = 1)
+    assert_instr(orn)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -33798,21 +30583,20 @@ pub fn vmull_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t) -> int64x2_t
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmull_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x4_t) -> int64x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe { vmull_s32(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32])) }
+pub fn vornq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    let c = int32x4_t::splat(-1);
+    unsafe { simd_or(simd_xor(b, c), a) }
 }
-#[doc = "Vector long multiply by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_lane_u16)"]
+#[doc = "Vector bitwise inclusive OR NOT"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vornq_s64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umull, LANE = 1)
+    assert_instr(orn)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -33821,26 +30605,20 @@ pub fn vmull_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x4_t) -> int64x2_t
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmull_lane_u16<const LANE: i32>(a: uint16x4_t, b: uint16x4_t) -> uint32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        vmull_u16(
-            a,
-            simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+pub fn vornq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
+    let c = int64x2_t::splat(-1);
+    unsafe { simd_or(simd_xor(b, c), a) }
 }
-#[doc = "Vector long multiply by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_laneq_u16)"]
+#[doc = "Vector bitwise inclusive OR NOT"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vornq_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umull, LANE = 1)
+    assert_instr(orn)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -33849,26 +30627,20 @@ pub fn vmull_lane_u16<const LANE: i32>(a: uint16x4_t, b: uint16x4_t) -> uint32x4
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmull_laneq_u16<const LANE: i32>(a: uint16x4_t, b: uint16x8_t) -> uint32x4_t {
-    static_assert_uimm_bits!(LANE, 3);
-    unsafe {
-        vmull_u16(
-            a,
-            simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
-        )
-    }
+pub fn vornq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    let c = int8x16_t::splat(-1);
+    unsafe { simd_or(simd_xor(b, c), a) }
 }
-#[doc = "Vector long multiply by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_lane_u32)"]
+#[doc = "Vector bitwise inclusive OR NOT"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorn_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umull, LANE = 1)
+    assert_instr(orn)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -33877,21 +30649,20 @@ pub fn vmull_laneq_u16<const LANE: i32>(a: uint16x4_t, b: uint16x8_t) -> uint32x
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmull_lane_u32<const LANE: i32>(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    unsafe { vmull_u32(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32])) }
+pub fn vorn_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    let c = int16x4_t::splat(-1);
+    unsafe { simd_or(simd_xor(b, transmute(c)), a) }
 }
-#[doc = "Vector long multiply by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_laneq_u32)"]
+#[doc = "Vector bitwise inclusive OR NOT"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorn_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umull, LANE = 1)
+    assert_instr(orn)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -33900,19 +30671,19 @@ pub fn vmull_lane_u32<const LANE: i32>(a: uint32x2_t, b: uint32x2_t) -> uint64x2
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmull_laneq_u32<const LANE: i32>(a: uint32x2_t, b: uint32x4_t) -> uint64x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe { vmull_u32(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32])) }
+pub fn vorn_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    let c = int32x2_t::splat(-1);
+    unsafe { simd_or(simd_xor(b, transmute(c)), a) }
 }
-#[doc = "Vector long multiply with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_n_s16)"]
+#[doc = "Vector bitwise inclusive OR NOT"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorn_u64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smull)
+    assert_instr(orn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -33922,18 +30693,19 @@ pub fn vmull_laneq_u32<const LANE: i32>(a: uint32x2_t, b: uint32x4_t) -> uint64x
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmull_n_s16(a: int16x4_t, b: i16) -> int32x4_t {
-    vmull_s16(a, vdup_n_s16(b))
+pub fn vorn_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
+    let c = int64x1_t::splat(-1);
+    unsafe { simd_or(simd_xor(b, transmute(c)), a) }
 }
-#[doc = "Vector long multiply with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_n_s32)"]
+#[doc = "Vector bitwise inclusive OR NOT"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorn_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smull)
+    assert_instr(orn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -33943,18 +30715,19 @@ pub fn vmull_n_s16(a: int16x4_t, b: i16) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmull_n_s32(a: int32x2_t, b: i32) -> int64x2_t {
-    vmull_s32(a, vdup_n_s32(b))
+pub fn vorn_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    let c = int8x8_t::splat(-1);
+    unsafe { simd_or(simd_xor(b, transmute(c)), a) }
 }
-#[doc = "Vector long multiply with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_n_u16)"]
+#[doc = "Vector bitwise inclusive OR NOT"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vornq_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umull)
+    assert_instr(orn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -33964,18 +30737,19 @@ pub fn vmull_n_s32(a: int32x2_t, b: i32) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmull_n_u16(a: uint16x4_t, b: u16) -> uint32x4_t {
-    vmull_u16(a, vdup_n_u16(b))
+pub fn vornq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    let c = int16x8_t::splat(-1);
+    unsafe { simd_or(simd_xor(b, transmute(c)), a) }
 }
-#[doc = "Vector long multiply with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_n_u32)"]
+#[doc = "Vector bitwise inclusive OR NOT"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vornq_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umull)
+    assert_instr(orn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -33985,18 +30759,19 @@ pub fn vmull_n_u16(a: uint16x4_t, b: u16) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmull_n_u32(a: uint32x2_t, b: u32) -> uint64x2_t {
-    vmull_u32(a, vdup_n_u32(b))
+pub fn vornq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    let c = int32x4_t::splat(-1);
+    unsafe { simd_or(simd_xor(b, transmute(c)), a) }
 }
-#[doc = "Polynomial multiply long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_p8)"]
+#[doc = "Vector bitwise inclusive OR NOT"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vornq_u64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.p8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(pmull)
+    assert_instr(orn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -34006,26 +30781,19 @@ pub fn vmull_n_u32(a: uint32x2_t, b: u32) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmull_p8(a: poly8x8_t, b: poly8x8_t) -> poly16x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.pmull.v8i16"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmullp.v8i16")]
-        fn _vmull_p8(a: poly8x8_t, b: poly8x8_t) -> poly16x8_t;
-    }
-    unsafe { _vmull_p8(a, b) }
+pub fn vornq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    let c = int64x2_t::splat(-1);
+    unsafe { simd_or(simd_xor(b, transmute(c)), a) }
 }
-#[doc = "Signed multiply long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_s16)"]
+#[doc = "Vector bitwise inclusive OR NOT"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vornq_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.s16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smull)
+    assert_instr(orn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -34035,26 +30803,19 @@ pub fn vmull_p8(a: poly8x8_t, b: poly8x8_t) -> poly16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmull_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.smull.v4i32"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmulls.v4i32")]
-        fn _vmull_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t;
-    }
-    unsafe { _vmull_s16(a, b) }
+pub fn vornq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    let c = int8x16_t::splat(-1);
+    unsafe { simd_or(simd_xor(b, transmute(c)), a) }
 }
-#[doc = "Signed multiply long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_s32)"]
+#[doc = "Vector bitwise or (immediate, inclusive)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.s32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smull)
+    assert_instr(orr)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -34064,26 +30825,18 @@ pub fn vmull_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmull_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.smull.v2i64"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmulls.v2i64")]
-        fn _vmull_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t;
-    }
-    unsafe { _vmull_s32(a, b) }
+pub fn vorr_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    unsafe { simd_or(a, b) }
 }
-#[doc = "Signed multiply long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_s8)"]
+#[doc = "Vector bitwise or (immediate, inclusive)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.s8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smull)
+    assert_instr(orr)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -34093,26 +30846,18 @@ pub fn vmull_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmull_s8(a: int8x8_t, b: int8x8_t) -> int16x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.smull.v8i16"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmulls.v8i16")]
-        fn _vmull_s8(a: int8x8_t, b: int8x8_t) -> int16x8_t;
-    }
-    unsafe { _vmull_s8(a, b) }
+pub fn vorrq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    unsafe { simd_or(a, b) }
 }
-#[doc = "Unsigned multiply long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_u8)"]
+#[doc = "Vector bitwise or (immediate, inclusive)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.u8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umull)
+    assert_instr(orr)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -34122,26 +30867,18 @@ pub fn vmull_s8(a: int8x8_t, b: int8x8_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmull_u8(a: uint8x8_t, b: uint8x8_t) -> uint16x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.umull.v8i16"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmullu.v8i16")]
-        fn _vmull_u8(a: uint8x8_t, b: uint8x8_t) -> uint16x8_t;
-    }
-    unsafe { _vmull_u8(a, b) }
+pub fn vorr_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    unsafe { simd_or(a, b) }
 }
-#[doc = "Unsigned multiply long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_u16)"]
+#[doc = "Vector bitwise or (immediate, inclusive)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.u16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umull)
+    assert_instr(orr)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -34151,26 +30888,18 @@ pub fn vmull_u8(a: uint8x8_t, b: uint8x8_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmull_u16(a: uint16x4_t, b: uint16x4_t) -> uint32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.umull.v4i32"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmullu.v4i32")]
-        fn _vmull_u16(a: uint16x4_t, b: uint16x4_t) -> uint32x4_t;
-    }
-    unsafe { _vmull_u16(a, b) }
+pub fn vorrq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    unsafe { simd_or(a, b) }
 }
-#[doc = "Unsigned multiply long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_u32)"]
+#[doc = "Vector bitwise or (immediate, inclusive)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.u32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umull)
+    assert_instr(orr)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -34180,26 +30909,18 @@ pub fn vmull_u16(a: uint16x4_t, b: uint16x4_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmull_u32(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.umull.v2i64"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmullu.v2i64")]
-        fn _vmull_u32(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t;
-    }
-    unsafe { _vmull_u32(a, b) }
+pub fn vorr_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    unsafe { simd_or(a, b) }
 }
-#[doc = "Vector bitwise not."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvn_p8)"]
+#[doc = "Vector bitwise or (immediate, inclusive)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mvn)
+    assert_instr(orr)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -34209,19 +30930,18 @@ pub fn vmull_u32(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmvn_p8(a: poly8x8_t) -> poly8x8_t {
-    let b = poly8x8_t::splat(255);
-    unsafe { simd_xor(a, b) }
+pub fn vorrq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    unsafe { simd_or(a, b) }
 }
-#[doc = "Vector bitwise not."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvn_s16)"]
+#[doc = "Vector bitwise or (immediate, inclusive)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_s64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mvn)
+    assert_instr(orr)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -34231,19 +30951,18 @@ pub fn vmvn_p8(a: poly8x8_t) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmvn_s16(a: int16x4_t) -> int16x4_t {
-    let b = int16x4_t::splat(-1);
-    unsafe { simd_xor(a, b) }
+pub fn vorr_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
+    unsafe { simd_or(a, b) }
 }
-#[doc = "Vector bitwise not."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvn_s32)"]
+#[doc = "Vector bitwise or (immediate, inclusive)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_s64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mvn)
+    assert_instr(orr)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -34253,19 +30972,18 @@ pub fn vmvn_s16(a: int16x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmvn_s32(a: int32x2_t) -> int32x2_t {
-    let b = int32x2_t::splat(-1);
-    unsafe { simd_xor(a, b) }
+pub fn vorrq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
+    unsafe { simd_or(a, b) }
 }
-#[doc = "Vector bitwise not."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvn_s8)"]
+#[doc = "Vector bitwise or (immediate, inclusive)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mvn)
+    assert_instr(orr)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -34275,19 +30993,18 @@ pub fn vmvn_s32(a: int32x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmvn_s8(a: int8x8_t) -> int8x8_t {
-    let b = int8x8_t::splat(-1);
-    unsafe { simd_xor(a, b) }
+pub fn vorr_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    unsafe { simd_or(a, b) }
 }
-#[doc = "Vector bitwise not."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvn_u16)"]
+#[doc = "Vector bitwise or (immediate, inclusive)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mvn)
+    assert_instr(orr)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -34297,19 +31014,18 @@ pub fn vmvn_s8(a: int8x8_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmvn_u16(a: uint16x4_t) -> uint16x4_t {
-    let b = uint16x4_t::splat(65_535);
-    unsafe { simd_xor(a, b) }
+pub fn vorrq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    unsafe { simd_or(a, b) }
 }
-#[doc = "Vector bitwise not."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvn_u32)"]
+#[doc = "Vector bitwise or (immediate, inclusive)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mvn)
+    assert_instr(orr)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -34319,19 +31035,18 @@ pub fn vmvn_u16(a: uint16x4_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmvn_u32(a: uint32x2_t) -> uint32x2_t {
-    let b = uint32x2_t::splat(4_294_967_295);
-    unsafe { simd_xor(a, b) }
+pub fn vorr_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    unsafe { simd_or(a, b) }
 }
-#[doc = "Vector bitwise not."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvn_u8)"]
+#[doc = "Vector bitwise or (immediate, inclusive)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mvn)
+    assert_instr(orr)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -34341,19 +31056,18 @@ pub fn vmvn_u32(a: uint32x2_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmvn_u8(a: uint8x8_t) -> uint8x8_t {
-    let b = uint8x8_t::splat(255);
-    unsafe { simd_xor(a, b) }
+pub fn vorrq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    unsafe { simd_or(a, b) }
 }
-#[doc = "Vector bitwise not."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvnq_p8)"]
+#[doc = "Vector bitwise or (immediate, inclusive)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mvn)
+    assert_instr(orr)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -34363,19 +31077,18 @@ pub fn vmvn_u8(a: uint8x8_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmvnq_p8(a: poly8x16_t) -> poly8x16_t {
-    let b = poly8x16_t::splat(255);
-    unsafe { simd_xor(a, b) }
+pub fn vorr_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    unsafe { simd_or(a, b) }
 }
-#[doc = "Vector bitwise not."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvnq_s16)"]
+#[doc = "Vector bitwise or (immediate, inclusive)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mvn)
+    assert_instr(orr)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -34385,19 +31098,18 @@ pub fn vmvnq_p8(a: poly8x16_t) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmvnq_s16(a: int16x8_t) -> int16x8_t {
-    let b = int16x8_t::splat(-1);
-    unsafe { simd_xor(a, b) }
+pub fn vorrq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    unsafe { simd_or(a, b) }
 }
-#[doc = "Vector bitwise not."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvnq_s32)"]
+#[doc = "Vector bitwise or (immediate, inclusive)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_u64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mvn)
+    assert_instr(orr)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -34407,19 +31119,18 @@ pub fn vmvnq_s16(a: int16x8_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmvnq_s32(a: int32x4_t) -> int32x4_t {
-    let b = int32x4_t::splat(-1);
-    unsafe { simd_xor(a, b) }
+pub fn vorr_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
+    unsafe { simd_or(a, b) }
 }
-#[doc = "Vector bitwise not."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvnq_s8)"]
+#[doc = "Vector bitwise or (immediate, inclusive)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_u64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mvn)
+    assert_instr(orr)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -34429,19 +31140,18 @@ pub fn vmvnq_s32(a: int32x4_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmvnq_s8(a: int8x16_t) -> int8x16_t {
-    let b = int8x16_t::splat(-1);
-    unsafe { simd_xor(a, b) }
+pub fn vorrq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    unsafe { simd_or(a, b) }
 }
-#[doc = "Vector bitwise not."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvnq_u16)"]
+#[doc = "Signed Add and Accumulate Long Pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadal_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.s8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mvn)
+    assert_instr(sadalp)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -34451,19 +31161,27 @@ pub fn vmvnq_s8(a: int8x16_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmvnq_u16(a: uint16x8_t) -> uint16x8_t {
-    let b = uint16x8_t::splat(65_535);
-    unsafe { simd_xor(a, b) }
+pub fn vpadal_s8(a: int16x4_t, b: int8x8_t) -> int16x4_t {
+    let x: int16x4_t;
+    #[cfg(target_arch = "arm")]
+    {
+        x = priv_vpadal_s8(a, b);
+    }
+    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
+    unsafe {
+        x = simd_add(vpaddl_s8(b), a);
+    };
+    x
 }
-#[doc = "Vector bitwise not."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvnq_u32)"]
+#[doc = "Signed Add and Accumulate Long Pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadalq_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.s8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mvn)
+    assert_instr(sadalp)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -34473,19 +31191,27 @@ pub fn vmvnq_u16(a: uint16x8_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmvnq_u32(a: uint32x4_t) -> uint32x4_t {
-    let b = uint32x4_t::splat(4_294_967_295);
-    unsafe { simd_xor(a, b) }
+pub fn vpadalq_s8(a: int16x8_t, b: int8x16_t) -> int16x8_t {
+    let x: int16x8_t;
+    #[cfg(target_arch = "arm")]
+    {
+        x = priv_vpadalq_s8(a, b);
+    }
+    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
+    unsafe {
+        x = simd_add(vpaddlq_s8(b), a);
+    };
+    x
 }
-#[doc = "Vector bitwise not."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmvnq_u8)"]
+#[doc = "Signed Add and Accumulate Long Pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadal_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.s16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(mvn)
+    assert_instr(sadalp)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -34495,49 +31221,57 @@ pub fn vmvnq_u32(a: uint32x4_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vmvnq_u8(a: uint8x16_t) -> uint8x16_t {
-    let b = uint8x16_t::splat(255);
-    unsafe { simd_xor(a, b) }
+pub fn vpadal_s16(a: int32x2_t, b: int16x4_t) -> int32x2_t {
+    let x: int32x2_t;
+    #[cfg(target_arch = "arm")]
+    {
+        x = priv_vpadal_s16(a, b);
+    }
+    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
+    unsafe {
+        x = simd_add(vpaddl_s16(b), a);
+    };
+    x
 }
-#[doc = "Negate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vneg_f16)"]
+#[doc = "Signed Add and Accumulate Long Pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadalq_s16)"]
 #[inline]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.f16"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.s16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fneg)
+    assert_instr(sadalp)
 )]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vneg_f16(a: float16x4_t) -> float16x4_t {
-    unsafe { simd_neg(a) }
-}
-#[doc = "Negate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vnegq_f16)"]
-#[inline]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.f16"))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fneg)
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vnegq_f16(a: float16x8_t) -> float16x8_t {
-    unsafe { simd_neg(a) }
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vpadalq_s16(a: int32x4_t, b: int16x8_t) -> int32x4_t {
+    let x: int32x4_t;
+    #[cfg(target_arch = "arm")]
+    {
+        x = priv_vpadalq_s16(a, b);
+    }
+    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
+    unsafe {
+        x = simd_add(vpaddlq_s16(b), a);
+    };
+    x
 }
-#[doc = "Negate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vneg_f32)"]
+#[doc = "Signed Add and Accumulate Long Pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadal_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.f32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.s32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fneg)
+    assert_instr(sadalp)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -34547,18 +31281,27 @@ pub fn vnegq_f16(a: float16x8_t) -> float16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vneg_f32(a: float32x2_t) -> float32x2_t {
-    unsafe { simd_neg(a) }
+pub fn vpadal_s32(a: int64x1_t, b: int32x2_t) -> int64x1_t {
+    let x: int64x1_t;
+    #[cfg(target_arch = "arm")]
+    {
+        x = priv_vpadal_s32(a, b);
+    }
+    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
+    unsafe {
+        x = simd_add(vpaddl_s32(b), a);
+    };
+    x
 }
-#[doc = "Negate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vnegq_f32)"]
+#[doc = "Signed Add and Accumulate Long Pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadalq_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.f32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.s32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fneg)
+    assert_instr(sadalp)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -34568,18 +31311,27 @@ pub fn vneg_f32(a: float32x2_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vnegq_f32(a: float32x4_t) -> float32x4_t {
-    unsafe { simd_neg(a) }
+pub fn vpadalq_s32(a: int64x2_t, b: int32x4_t) -> int64x2_t {
+    let x: int64x2_t;
+    #[cfg(target_arch = "arm")]
+    {
+        x = priv_vpadalq_s32(a, b);
+    }
+    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
+    unsafe {
+        x = simd_add(vpaddlq_s32(b), a);
+    };
+    x
 }
-#[doc = "Negate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vneg_s8)"]
+#[doc = "Unsigned Add and Accumulate Long Pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadal_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.u8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(neg)
+    assert_instr(uadalp)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -34589,18 +31341,27 @@ pub fn vnegq_f32(a: float32x4_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vneg_s8(a: int8x8_t) -> int8x8_t {
-    unsafe { simd_neg(a) }
+pub fn vpadal_u8(a: uint16x4_t, b: uint8x8_t) -> uint16x4_t {
+    let x: uint16x4_t;
+    #[cfg(target_arch = "arm")]
+    {
+        x = priv_vpadal_u8(a, b);
+    }
+    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
+    unsafe {
+        x = simd_add(vpaddl_u8(b), a);
+    };
+    x
 }
-#[doc = "Negate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vnegq_s8)"]
+#[doc = "Unsigned Add and Accumulate Long Pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadalq_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.u8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(neg)
+    assert_instr(uadalp)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -34610,18 +31371,27 @@ pub fn vneg_s8(a: int8x8_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vnegq_s8(a: int8x16_t) -> int8x16_t {
-    unsafe { simd_neg(a) }
+pub fn vpadalq_u8(a: uint16x8_t, b: uint8x16_t) -> uint16x8_t {
+    let x: uint16x8_t;
+    #[cfg(target_arch = "arm")]
+    {
+        x = priv_vpadalq_u8(a, b);
+    }
+    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
+    unsafe {
+        x = simd_add(vpaddlq_u8(b), a);
+    };
+    x
 }
-#[doc = "Negate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vneg_s16)"]
+#[doc = "Unsigned Add and Accumulate Long Pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadal_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.u16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(neg)
+    assert_instr(uadalp)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -34631,18 +31401,27 @@ pub fn vnegq_s8(a: int8x16_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vneg_s16(a: int16x4_t) -> int16x4_t {
-    unsafe { simd_neg(a) }
+pub fn vpadal_u16(a: uint32x2_t, b: uint16x4_t) -> uint32x2_t {
+    let x: uint32x2_t;
+    #[cfg(target_arch = "arm")]
+    {
+        x = priv_vpadal_u16(a, b);
+    }
+    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
+    unsafe {
+        x = simd_add(vpaddl_u16(b), a);
+    };
+    x
 }
-#[doc = "Negate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vnegq_s16)"]
+#[doc = "Unsigned Add and Accumulate Long Pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadalq_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.u16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(neg)
+    assert_instr(uadalp)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -34652,18 +31431,27 @@ pub fn vneg_s16(a: int16x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vnegq_s16(a: int16x8_t) -> int16x8_t {
-    unsafe { simd_neg(a) }
+pub fn vpadalq_u16(a: uint32x4_t, b: uint16x8_t) -> uint32x4_t {
+    let x: uint32x4_t;
+    #[cfg(target_arch = "arm")]
+    {
+        x = priv_vpadalq_u16(a, b);
+    }
+    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
+    unsafe {
+        x = simd_add(vpaddlq_u16(b), a);
+    };
+    x
 }
-#[doc = "Negate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vneg_s32)"]
+#[doc = "Unsigned Add and Accumulate Long Pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadal_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.u32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(neg)
+    assert_instr(uadalp)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -34673,18 +31461,27 @@ pub fn vnegq_s16(a: int16x8_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vneg_s32(a: int32x2_t) -> int32x2_t {
-    unsafe { simd_neg(a) }
+pub fn vpadal_u32(a: uint64x1_t, b: uint32x2_t) -> uint64x1_t {
+    let x: uint64x1_t;
+    #[cfg(target_arch = "arm")]
+    {
+        x = priv_vpadal_u32(a, b);
+    }
+    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
+    unsafe {
+        x = simd_add(vpaddl_u32(b), a);
+    };
+    x
 }
-#[doc = "Negate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vnegq_s32)"]
+#[doc = "Unsigned Add and Accumulate Long Pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadalq_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.u32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(neg)
+    assert_instr(uadalp)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -34694,40 +31491,57 @@ pub fn vneg_s32(a: int32x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vnegq_s32(a: int32x4_t) -> int32x4_t {
-    unsafe { simd_neg(a) }
+pub fn vpadalq_u32(a: uint64x2_t, b: uint32x4_t) -> uint64x2_t {
+    let x: uint64x2_t;
+    #[cfg(target_arch = "arm")]
+    {
+        x = priv_vpadalq_u32(a, b);
+    }
+    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
+    unsafe {
+        x = simd_add(vpaddlq_u32(b), a);
+    };
+    x
 }
-#[doc = "Vector bitwise inclusive OR NOT"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorn_s16)"]
+#[doc = "Floating-point add pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_f16)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(orn)
+    assert_instr(faddp)
 )]
+#[target_feature(enable = "neon,fp16")]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vorn_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    let c = int16x4_t::splat(-1);
-    unsafe { simd_or(simd_xor(b, c), a) }
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vpadd_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v4f16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.faddp.v4f16"
+        )]
+        fn _vpadd_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t;
+    }
+    unsafe { _vpadd_f16(a, b) }
 }
-#[doc = "Vector bitwise inclusive OR NOT"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorn_s32)"]
+#[doc = "Floating-point add pairwise"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(orn)
+    assert_instr(faddp)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -34737,19 +31551,26 @@ pub fn vorn_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vorn_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    let c = int32x2_t::splat(-1);
-    unsafe { simd_or(simd_xor(b, c), a) }
+pub fn vpadd_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v2f32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.faddp.v2f32"
+        )]
+        fn _vpadd_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t;
+    }
+    unsafe { _vpadd_f32(a, b) }
 }
-#[doc = "Vector bitwise inclusive OR NOT"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorn_s64)"]
+#[doc = "Add pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(orn)
+    assert_instr(addp)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -34759,19 +31580,26 @@ pub fn vorn_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vorn_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
-    let c = int64x1_t::splat(-1);
-    unsafe { simd_or(simd_xor(b, c), a) }
+pub fn vpadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.addp.v8i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v8i8")]
+        fn _vpadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
+    }
+    unsafe { _vpadd_s8(a, b) }
 }
-#[doc = "Vector bitwise inclusive OR NOT"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorn_s8)"]
+#[doc = "Add pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(orn)
+    assert_instr(addp)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -34781,19 +31609,26 @@ pub fn vorn_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vorn_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    let c = int8x8_t::splat(-1);
-    unsafe { simd_or(simd_xor(b, c), a) }
+pub fn vpadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.addp.v4i16"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v4i16")]
+        fn _vpadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
+    }
+    unsafe { _vpadd_s16(a, b) }
 }
-#[doc = "Vector bitwise inclusive OR NOT"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vornq_s16)"]
+#[doc = "Add pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(orn)
+    assert_instr(addp)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -34803,19 +31638,26 @@ pub fn vorn_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vornq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    let c = int16x8_t::splat(-1);
-    unsafe { simd_or(simd_xor(b, c), a) }
+pub fn vpadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.addp.v2i32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v2i32")]
+        fn _vpadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
+    }
+    unsafe { _vpadd_s32(a, b) }
 }
-#[doc = "Vector bitwise inclusive OR NOT"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vornq_s32)"]
+#[doc = "Add pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(orn)
+    assert_instr(addp)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -34825,19 +31667,18 @@ pub fn vornq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vornq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    let c = int32x4_t::splat(-1);
-    unsafe { simd_or(simd_xor(b, c), a) }
+pub fn vpadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    unsafe { transmute(vpadd_s8(transmute(a), transmute(b))) }
 }
-#[doc = "Vector bitwise inclusive OR NOT"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vornq_s64)"]
+#[doc = "Add pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(orn)
+    assert_instr(addp)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -34847,19 +31688,18 @@ pub fn vornq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vornq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
-    let c = int64x2_t::splat(-1);
-    unsafe { simd_or(simd_xor(b, c), a) }
+pub fn vpadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    unsafe { transmute(vpadd_s16(transmute(a), transmute(b))) }
 }
-#[doc = "Vector bitwise inclusive OR NOT"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vornq_s8)"]
+#[doc = "Add pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(orn)
+    assert_instr(addp)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -34869,19 +31709,18 @@ pub fn vornq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vornq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    let c = int8x16_t::splat(-1);
-    unsafe { simd_or(simd_xor(b, c), a) }
+pub fn vpadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    unsafe { transmute(vpadd_s32(transmute(a), transmute(b))) }
 }
-#[doc = "Vector bitwise inclusive OR NOT"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorn_u16)"]
+#[doc = "Signed Add and Accumulate Long Pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddl_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.s8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(orn)
+    assert_instr(saddlp)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -34891,19 +31730,26 @@ pub fn vornq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vorn_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    let c = int16x4_t::splat(-1);
-    unsafe { simd_or(simd_xor(b, transmute(c)), a) }
+pub fn vpaddl_s8(a: int8x8_t) -> int16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.saddlp.v4i16.v8i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddls.v4i16.v8i8")]
+        fn _vpaddl_s8(a: int8x8_t) -> int16x4_t;
+    }
+    unsafe { _vpaddl_s8(a) }
 }
-#[doc = "Vector bitwise inclusive OR NOT"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorn_u32)"]
+#[doc = "Signed Add and Accumulate Long Pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddlq_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.s8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(orn)
+    assert_instr(saddlp)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -34913,19 +31759,26 @@ pub fn vorn_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vorn_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    let c = int32x2_t::splat(-1);
-    unsafe { simd_or(simd_xor(b, transmute(c)), a) }
+pub fn vpaddlq_s8(a: int8x16_t) -> int16x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.saddlp.v8i16.v16i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddls.v8i16.v16i8")]
+        fn _vpaddlq_s8(a: int8x16_t) -> int16x8_t;
+    }
+    unsafe { _vpaddlq_s8(a) }
 }
-#[doc = "Vector bitwise inclusive OR NOT"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorn_u64)"]
+#[doc = "Signed Add and Accumulate Long Pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddl_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.s16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(orn)
+    assert_instr(saddlp)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -34935,19 +31788,26 @@ pub fn vorn_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vorn_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
-    let c = int64x1_t::splat(-1);
-    unsafe { simd_or(simd_xor(b, transmute(c)), a) }
+pub fn vpaddl_s16(a: int16x4_t) -> int32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.saddlp.v2i32.v4i16"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddls.v2i32.v4i16")]
+        fn _vpaddl_s16(a: int16x4_t) -> int32x2_t;
+    }
+    unsafe { _vpaddl_s16(a) }
 }
-#[doc = "Vector bitwise inclusive OR NOT"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorn_u8)"]
+#[doc = "Signed Add and Accumulate Long Pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddlq_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.s16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(orn)
+    assert_instr(saddlp)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -34957,19 +31817,26 @@ pub fn vorn_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vorn_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    let c = int8x8_t::splat(-1);
-    unsafe { simd_or(simd_xor(b, transmute(c)), a) }
-}
-#[doc = "Vector bitwise inclusive OR NOT"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vornq_u16)"]
+pub fn vpaddlq_s16(a: int16x8_t) -> int32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.saddlp.v4i32.v8i16"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddls.v4i32.v8i16")]
+        fn _vpaddlq_s16(a: int16x8_t) -> int32x4_t;
+    }
+    unsafe { _vpaddlq_s16(a) }
+}
+#[doc = "Signed Add and Accumulate Long Pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddl_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.s32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(orn)
+    assert_instr(saddlp)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -34979,19 +31846,26 @@ pub fn vorn_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vornq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    let c = int16x8_t::splat(-1);
-    unsafe { simd_or(simd_xor(b, transmute(c)), a) }
+pub fn vpaddl_s32(a: int32x2_t) -> int64x1_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.saddlp.v1i64.v2i32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddls.v1i64.v2i32")]
+        fn _vpaddl_s32(a: int32x2_t) -> int64x1_t;
+    }
+    unsafe { _vpaddl_s32(a) }
 }
-#[doc = "Vector bitwise inclusive OR NOT"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vornq_u32)"]
+#[doc = "Signed Add and Accumulate Long Pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddlq_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.s32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(orn)
+    assert_instr(saddlp)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -35001,19 +31875,26 @@ pub fn vornq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vornq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    let c = int32x4_t::splat(-1);
-    unsafe { simd_or(simd_xor(b, transmute(c)), a) }
+pub fn vpaddlq_s32(a: int32x4_t) -> int64x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.saddlp.v2i64.v4i32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddls.v2i64.v4i32")]
+        fn _vpaddlq_s32(a: int32x4_t) -> int64x2_t;
+    }
+    unsafe { _vpaddlq_s32(a) }
 }
-#[doc = "Vector bitwise inclusive OR NOT"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vornq_u64)"]
+#[doc = "Unsigned Add and Accumulate Long Pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddl_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.u8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(orn)
+    assert_instr(uaddlp)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -35023,19 +31904,26 @@ pub fn vornq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vornq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
-    let c = int64x2_t::splat(-1);
-    unsafe { simd_or(simd_xor(b, transmute(c)), a) }
+pub fn vpaddl_u8(a: uint8x8_t) -> uint16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uaddlp.v4i16.v8i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddlu.v4i16.v8i8")]
+        fn _vpaddl_u8(a: uint8x8_t) -> uint16x4_t;
+    }
+    unsafe { _vpaddl_u8(a) }
 }
-#[doc = "Vector bitwise inclusive OR NOT"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vornq_u8)"]
+#[doc = "Unsigned Add and Accumulate Long Pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddlq_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.u8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(orn)
+    assert_instr(uaddlp)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -35045,19 +31933,26 @@ pub fn vornq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vornq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
-    let c = int8x16_t::splat(-1);
-    unsafe { simd_or(simd_xor(b, transmute(c)), a) }
+pub fn vpaddlq_u8(a: uint8x16_t) -> uint16x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uaddlp.v8i16.v16i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddlu.v8i16.v16i8")]
+        fn _vpaddlq_u8(a: uint8x16_t) -> uint16x8_t;
+    }
+    unsafe { _vpaddlq_u8(a) }
 }
-#[doc = "Vector bitwise or (immediate, inclusive)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_s8)"]
+#[doc = "Unsigned Add and Accumulate Long Pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddl_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.u16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(orr)
+    assert_instr(uaddlp)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -35067,18 +31962,26 @@ pub fn vornq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vorr_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    unsafe { simd_or(a, b) }
+pub fn vpaddl_u16(a: uint16x4_t) -> uint32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uaddlp.v2i32.v4i16"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddlu.v2i32.v4i16")]
+        fn _vpaddl_u16(a: uint16x4_t) -> uint32x2_t;
+    }
+    unsafe { _vpaddl_u16(a) }
 }
-#[doc = "Vector bitwise or (immediate, inclusive)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_s8)"]
+#[doc = "Unsigned Add and Accumulate Long Pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddlq_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.u16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(orr)
+    assert_instr(uaddlp)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -35088,18 +31991,26 @@ pub fn vorr_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vorrq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    unsafe { simd_or(a, b) }
+pub fn vpaddlq_u16(a: uint16x8_t) -> uint32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uaddlp.v4i32.v8i16"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddlu.v4i32.v8i16")]
+        fn _vpaddlq_u16(a: uint16x8_t) -> uint32x4_t;
+    }
+    unsafe { _vpaddlq_u16(a) }
 }
-#[doc = "Vector bitwise or (immediate, inclusive)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_s16)"]
+#[doc = "Unsigned Add and Accumulate Long Pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddl_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.u32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(orr)
+    assert_instr(uaddlp)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -35109,18 +32020,26 @@ pub fn vorrq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vorr_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    unsafe { simd_or(a, b) }
+pub fn vpaddl_u32(a: uint32x2_t) -> uint64x1_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uaddlp.v1i64.v2i32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddlu.v1i64.v2i32")]
+        fn _vpaddl_u32(a: uint32x2_t) -> uint64x1_t;
+    }
+    unsafe { _vpaddl_u32(a) }
 }
-#[doc = "Vector bitwise or (immediate, inclusive)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_s16)"]
+#[doc = "Unsigned Add and Accumulate Long Pairwise."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddlq_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.u32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(orr)
+    assert_instr(uaddlp)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -35130,18 +32049,26 @@ pub fn vorr_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vorrq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    unsafe { simd_or(a, b) }
+pub fn vpaddlq_u32(a: uint32x4_t) -> uint64x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uaddlp.v2i64.v4i32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddlu.v2i64.v4i32")]
+        fn _vpaddlq_u32(a: uint32x4_t) -> uint64x2_t;
+    }
+    unsafe { _vpaddlq_u32(a) }
 }
-#[doc = "Vector bitwise or (immediate, inclusive)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_s32)"]
+#[doc = "Folding maximum of adjacent pairs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(orr)
+    assert_instr(fmaxp)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -35151,18 +32078,26 @@ pub fn vorrq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vorr_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    unsafe { simd_or(a, b) }
+pub fn vpmax_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fmaxp.v2f32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v2f32")]
+        fn _vpmax_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t;
+    }
+    unsafe { _vpmax_f32(a, b) }
 }
-#[doc = "Vector bitwise or (immediate, inclusive)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_s32)"]
+#[doc = "Folding maximum of adjacent pairs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(orr)
+    assert_instr(smaxp)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -35172,18 +32107,26 @@ pub fn vorr_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vorrq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    unsafe { simd_or(a, b) }
+pub fn vpmax_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.smaxp.v8i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v8i8")]
+        fn _vpmax_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
+    }
+    unsafe { _vpmax_s8(a, b) }
 }
-#[doc = "Vector bitwise or (immediate, inclusive)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_s64)"]
+#[doc = "Folding maximum of adjacent pairs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(orr)
+    assert_instr(smaxp)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -35193,18 +32136,26 @@ pub fn vorrq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vorr_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
-    unsafe { simd_or(a, b) }
+pub fn vpmax_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.smaxp.v4i16"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v4i16")]
+        fn _vpmax_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
+    }
+    unsafe { _vpmax_s16(a, b) }
 }
-#[doc = "Vector bitwise or (immediate, inclusive)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_s64)"]
+#[doc = "Folding maximum of adjacent pairs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(orr)
+    assert_instr(smaxp)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -35214,18 +32165,26 @@ pub fn vorr_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vorrq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
-    unsafe { simd_or(a, b) }
+pub fn vpmax_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.smaxp.v2i32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v2i32")]
+        fn _vpmax_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
+    }
+    unsafe { _vpmax_s32(a, b) }
 }
-#[doc = "Vector bitwise or (immediate, inclusive)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_u8)"]
+#[doc = "Folding maximum of adjacent pairs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(orr)
+    assert_instr(umaxp)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -35235,18 +32194,26 @@ pub fn vorrq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vorr_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    unsafe { simd_or(a, b) }
+pub fn vpmax_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.umaxp.v8i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxu.v8i8")]
+        fn _vpmax_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t;
+    }
+    unsafe { _vpmax_u8(a, b) }
 }
-#[doc = "Vector bitwise or (immediate, inclusive)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_u8)"]
+#[doc = "Folding maximum of adjacent pairs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(orr)
+    assert_instr(umaxp)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -35256,18 +32223,26 @@ pub fn vorr_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vorrq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
-    unsafe { simd_or(a, b) }
+pub fn vpmax_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.umaxp.v4i16"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxu.v4i16")]
+        fn _vpmax_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t;
+    }
+    unsafe { _vpmax_u16(a, b) }
 }
-#[doc = "Vector bitwise or (immediate, inclusive)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_u16)"]
+#[doc = "Folding maximum of adjacent pairs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(orr)
+    assert_instr(umaxp)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -35277,18 +32252,26 @@ pub fn vorrq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vorr_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    unsafe { simd_or(a, b) }
+pub fn vpmax_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.umaxp.v2i32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxu.v2i32")]
+        fn _vpmax_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t;
+    }
+    unsafe { _vpmax_u32(a, b) }
 }
-#[doc = "Vector bitwise or (immediate, inclusive)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_u16)"]
+#[doc = "Folding minimum of adjacent pairs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_f32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(orr)
+    assert_instr(fminp)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -35298,18 +32281,26 @@ pub fn vorr_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vorrq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    unsafe { simd_or(a, b) }
+pub fn vpmin_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.fminp.v2f32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmins.v2f32")]
+        fn _vpmin_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t;
+    }
+    unsafe { _vpmin_f32(a, b) }
 }
-#[doc = "Vector bitwise or (immediate, inclusive)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_u32)"]
+#[doc = "Folding minimum of adjacent pairs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(orr)
+    assert_instr(sminp)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -35319,18 +32310,26 @@ pub fn vorrq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vorr_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    unsafe { simd_or(a, b) }
+pub fn vpmin_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sminp.v8i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmins.v8i8")]
+        fn _vpmin_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
+    }
+    unsafe { _vpmin_s8(a, b) }
 }
-#[doc = "Vector bitwise or (immediate, inclusive)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_u32)"]
+#[doc = "Folding minimum of adjacent pairs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(orr)
+    assert_instr(sminp)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -35340,18 +32339,26 @@ pub fn vorr_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vorrq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    unsafe { simd_or(a, b) }
+pub fn vpmin_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sminp.v4i16"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmins.v4i16")]
+        fn _vpmin_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
+    }
+    unsafe { _vpmin_s16(a, b) }
 }
-#[doc = "Vector bitwise or (immediate, inclusive)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorr_u64)"]
+#[doc = "Folding minimum of adjacent pairs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(orr)
+    assert_instr(sminp)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -35361,18 +32368,26 @@ pub fn vorrq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vorr_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
-    unsafe { simd_or(a, b) }
+pub fn vpmin_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sminp.v2i32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmins.v2i32")]
+        fn _vpmin_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
+    }
+    unsafe { _vpmin_s32(a, b) }
 }
-#[doc = "Vector bitwise or (immediate, inclusive)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vorrq_u64)"]
+#[doc = "Folding minimum of adjacent pairs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(orr)
+    assert_instr(uminp)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -35382,18 +32397,26 @@ pub fn vorr_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vorrq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
-    unsafe { simd_or(a, b) }
+pub fn vpmin_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uminp.v8i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpminu.v8i8")]
+        fn _vpmin_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t;
+    }
+    unsafe { _vpmin_u8(a, b) }
 }
-#[doc = "Signed Add and Accumulate Long Pairwise."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadal_s8)"]
+#[doc = "Folding minimum of adjacent pairs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.s8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sadalp)
+    assert_instr(uminp)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -35403,27 +32426,26 @@ pub fn vorrq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vpadal_s8(a: int16x4_t, b: int8x8_t) -> int16x4_t {
-    let x: int16x4_t;
-    #[cfg(target_arch = "arm")]
-    {
-        x = priv_vpadal_s8(a, b);
+pub fn vpmin_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uminp.v4i16"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpminu.v4i16")]
+        fn _vpmin_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t;
     }
-    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
-    unsafe {
-        x = simd_add(vpaddl_s8(b), a);
-    };
-    x
+    unsafe { _vpmin_u16(a, b) }
 }
-#[doc = "Signed Add and Accumulate Long Pairwise."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadalq_s8)"]
+#[doc = "Folding minimum of adjacent pairs"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.s8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sadalp)
+    assert_instr(uminp)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -35433,27 +32455,26 @@ pub fn vpadal_s8(a: int16x4_t, b: int8x8_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vpadalq_s8(a: int16x8_t, b: int8x16_t) -> int16x8_t {
-    let x: int16x8_t;
-    #[cfg(target_arch = "arm")]
-    {
-        x = priv_vpadalq_s8(a, b);
+pub fn vpmin_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uminp.v2i32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpminu.v2i32")]
+        fn _vpmin_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t;
     }
-    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
-    unsafe {
-        x = simd_add(vpaddlq_s8(b), a);
-    };
-    x
+    unsafe { _vpmin_u32(a, b) }
 }
-#[doc = "Signed Add and Accumulate Long Pairwise."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadal_s16)"]
+#[doc = "Signed saturating Absolute value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabs_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.s16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sadalp)
+    assert_instr(sqabs)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -35463,27 +32484,26 @@ pub fn vpadalq_s8(a: int16x8_t, b: int8x16_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vpadal_s16(a: int32x2_t, b: int16x4_t) -> int32x2_t {
-    let x: int32x2_t;
-    #[cfg(target_arch = "arm")]
-    {
-        x = priv_vpadal_s16(a, b);
+pub fn vqabs_s8(a: int8x8_t) -> int8x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqabs.v8i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v8i8")]
+        fn _vqabs_s8(a: int8x8_t) -> int8x8_t;
     }
-    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
-    unsafe {
-        x = simd_add(vpaddl_s16(b), a);
-    };
-    x
+    unsafe { _vqabs_s8(a) }
 }
-#[doc = "Signed Add and Accumulate Long Pairwise."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadalq_s16)"]
+#[doc = "Signed saturating Absolute value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabsq_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.s16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sadalp)
+    assert_instr(sqabs)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -35493,27 +32513,26 @@ pub fn vpadal_s16(a: int32x2_t, b: int16x4_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vpadalq_s16(a: int32x4_t, b: int16x8_t) -> int32x4_t {
-    let x: int32x4_t;
-    #[cfg(target_arch = "arm")]
-    {
-        x = priv_vpadalq_s16(a, b);
-    }
-    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
-    unsafe {
-        x = simd_add(vpaddlq_s16(b), a);
-    };
-    x
+pub fn vqabsq_s8(a: int8x16_t) -> int8x16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqabs.v16i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v16i8")]
+        fn _vqabsq_s8(a: int8x16_t) -> int8x16_t;
+    }
+    unsafe { _vqabsq_s8(a) }
 }
-#[doc = "Signed Add and Accumulate Long Pairwise."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadal_s32)"]
+#[doc = "Signed saturating Absolute value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabs_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.s32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sadalp)
+    assert_instr(sqabs)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -35523,27 +32542,26 @@ pub fn vpadalq_s16(a: int32x4_t, b: int16x8_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vpadal_s32(a: int64x1_t, b: int32x2_t) -> int64x1_t {
-    let x: int64x1_t;
-    #[cfg(target_arch = "arm")]
-    {
-        x = priv_vpadal_s32(a, b);
+pub fn vqabs_s16(a: int16x4_t) -> int16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqabs.v4i16"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v4i16")]
+        fn _vqabs_s16(a: int16x4_t) -> int16x4_t;
     }
-    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
-    unsafe {
-        x = simd_add(vpaddl_s32(b), a);
-    };
-    x
+    unsafe { _vqabs_s16(a) }
 }
-#[doc = "Signed Add and Accumulate Long Pairwise."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadalq_s32)"]
+#[doc = "Signed saturating Absolute value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabsq_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.s32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sadalp)
+    assert_instr(sqabs)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -35553,27 +32571,26 @@ pub fn vpadal_s32(a: int64x1_t, b: int32x2_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vpadalq_s32(a: int64x2_t, b: int32x4_t) -> int64x2_t {
-    let x: int64x2_t;
-    #[cfg(target_arch = "arm")]
-    {
-        x = priv_vpadalq_s32(a, b);
+pub fn vqabsq_s16(a: int16x8_t) -> int16x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqabs.v8i16"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v8i16")]
+        fn _vqabsq_s16(a: int16x8_t) -> int16x8_t;
     }
-    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
-    unsafe {
-        x = simd_add(vpaddlq_s32(b), a);
-    };
-    x
+    unsafe { _vqabsq_s16(a) }
 }
-#[doc = "Unsigned Add and Accumulate Long Pairwise."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadal_u8)"]
+#[doc = "Signed saturating Absolute value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabs_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.u8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uadalp)
+    assert_instr(sqabs)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -35583,27 +32600,26 @@ pub fn vpadalq_s32(a: int64x2_t, b: int32x4_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vpadal_u8(a: uint16x4_t, b: uint8x8_t) -> uint16x4_t {
-    let x: uint16x4_t;
-    #[cfg(target_arch = "arm")]
-    {
-        x = priv_vpadal_u8(a, b);
+pub fn vqabs_s32(a: int32x2_t) -> int32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqabs.v2i32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v2i32")]
+        fn _vqabs_s32(a: int32x2_t) -> int32x2_t;
     }
-    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
-    unsafe {
-        x = simd_add(vpaddl_u8(b), a);
-    };
-    x
+    unsafe { _vqabs_s32(a) }
 }
-#[doc = "Unsigned Add and Accumulate Long Pairwise."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadalq_u8)"]
+#[doc = "Signed saturating Absolute value"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabsq_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.u8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uadalp)
+    assert_instr(sqabs)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -35613,27 +32629,26 @@ pub fn vpadal_u8(a: uint16x4_t, b: uint8x8_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vpadalq_u8(a: uint16x8_t, b: uint8x16_t) -> uint16x8_t {
-    let x: uint16x8_t;
-    #[cfg(target_arch = "arm")]
-    {
-        x = priv_vpadalq_u8(a, b);
+pub fn vqabsq_s32(a: int32x4_t) -> int32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqabs.v4i32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v4i32")]
+        fn _vqabsq_s32(a: int32x4_t) -> int32x4_t;
     }
-    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
-    unsafe {
-        x = simd_add(vpaddlq_u8(b), a);
-    };
-    x
+    unsafe { _vqabsq_s32(a) }
 }
-#[doc = "Unsigned Add and Accumulate Long Pairwise."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadal_u16)"]
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.u16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uadalp)
+    assert_instr(sqadd)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -35643,27 +32658,18 @@ pub fn vpadalq_u8(a: uint16x8_t, b: uint8x16_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vpadal_u16(a: uint32x2_t, b: uint16x4_t) -> uint32x2_t {
-    let x: uint32x2_t;
-    #[cfg(target_arch = "arm")]
-    {
-        x = priv_vpadal_u16(a, b);
-    }
-    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
-    unsafe {
-        x = simd_add(vpaddl_u16(b), a);
-    };
-    x
+pub fn vqadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    unsafe { simd_saturating_add(a, b) }
 }
-#[doc = "Unsigned Add and Accumulate Long Pairwise."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadalq_u16)"]
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.u16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uadalp)
+    assert_instr(sqadd)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -35673,27 +32679,18 @@ pub fn vpadal_u16(a: uint32x2_t, b: uint16x4_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vpadalq_u16(a: uint32x4_t, b: uint16x8_t) -> uint32x4_t {
-    let x: uint32x4_t;
-    #[cfg(target_arch = "arm")]
-    {
-        x = priv_vpadalq_u16(a, b);
-    }
-    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
-    unsafe {
-        x = simd_add(vpaddlq_u16(b), a);
-    };
-    x
+pub fn vqaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    unsafe { simd_saturating_add(a, b) }
 }
-#[doc = "Unsigned Add and Accumulate Long Pairwise."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadal_u32)"]
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.u32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uadalp)
+    assert_instr(sqadd)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -35703,27 +32700,18 @@ pub fn vpadalq_u16(a: uint32x4_t, b: uint16x8_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vpadal_u32(a: uint64x1_t, b: uint32x2_t) -> uint64x1_t {
-    let x: uint64x1_t;
-    #[cfg(target_arch = "arm")]
-    {
-        x = priv_vpadal_u32(a, b);
-    }
-    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
-    unsafe {
-        x = simd_add(vpaddl_u32(b), a);
-    };
-    x
+pub fn vqadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    unsafe { simd_saturating_add(a, b) }
 }
-#[doc = "Unsigned Add and Accumulate Long Pairwise."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadalq_u32)"]
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpadal.u32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uadalp)
+    assert_instr(sqadd)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -35733,50 +32721,18 @@ pub fn vpadal_u32(a: uint64x1_t, b: uint32x2_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vpadalq_u32(a: uint64x2_t, b: uint32x4_t) -> uint64x2_t {
-    let x: uint64x2_t;
-    #[cfg(target_arch = "arm")]
-    {
-        x = priv_vpadalq_u32(a, b);
-    }
-    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
-    unsafe {
-        x = simd_add(vpaddlq_u32(b), a);
-    };
-    x
-}
-#[doc = "Floating-point add pairwise"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_f16)"]
-#[inline]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(faddp)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vpadd_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v4f16")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.faddp.v4f16"
-        )]
-        fn _vpadd_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t;
-    }
-    unsafe { _vpadd_f16(a, b) }
+pub fn vqaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    unsafe { simd_saturating_add(a, b) }
 }
-#[doc = "Floating-point add pairwise"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_f32)"]
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(faddp)
+    assert_instr(sqadd)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -35786,26 +32742,18 @@ pub fn vpadd_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vpadd_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v2f32")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.faddp.v2f32"
-        )]
-        fn _vpadd_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t;
-    }
-    unsafe { _vpadd_f32(a, b) }
+pub fn vqadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    unsafe { simd_saturating_add(a, b) }
 }
-#[doc = "Add pairwise."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_s8)"]
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(addp)
+    assert_instr(sqadd)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -35815,26 +32763,18 @@ pub fn vpadd_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vpadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.addp.v8i8"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v8i8")]
-        fn _vpadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
-    }
-    unsafe { _vpadd_s8(a, b) }
+pub fn vqaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    unsafe { simd_saturating_add(a, b) }
 }
-#[doc = "Add pairwise."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_s16)"]
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_s64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s64"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(addp)
+    assert_instr(sqadd)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -35844,26 +32784,18 @@ pub fn vpadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vpadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.addp.v4i16"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v4i16")]
-        fn _vpadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
-    }
-    unsafe { _vpadd_s16(a, b) }
+pub fn vqadd_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
+    unsafe { simd_saturating_add(a, b) }
 }
-#[doc = "Add pairwise."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_s32)"]
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_s64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s64"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(addp)
+    assert_instr(sqadd)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -35873,27 +32805,18 @@ pub fn vpadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vpadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.addp.v2i32"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v2i32")]
-        fn _vpadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
-    }
-    unsafe { _vpadd_s32(a, b) }
+pub fn vqaddq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
+    unsafe { simd_saturating_add(a, b) }
 }
-#[doc = "Add pairwise."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_u8)"]
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_u8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(addp)
+    assert_instr(uqadd)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -35903,19 +32826,18 @@ pub fn vpadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vpadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    unsafe { transmute(vpadd_s8(transmute(a), transmute(b))) }
+pub fn vqadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    unsafe { simd_saturating_add(a, b) }
 }
-#[doc = "Add pairwise."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_u8)"]
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_u8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(addp)
+    assert_instr(uqadd)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -35925,24 +32847,18 @@ pub fn vpadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vpadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    let a: uint8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    let b: uint8x8_t = unsafe { simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint8x8_t = transmute(vpadd_s8(transmute(a), transmute(b)));
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+pub fn vqaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    unsafe { simd_saturating_add(a, b) }
 }
-#[doc = "Add pairwise."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_u16)"]
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_u16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(addp)
+    assert_instr(uqadd)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -35952,19 +32868,18 @@ pub fn vpadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vpadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    unsafe { transmute(vpadd_s16(transmute(a), transmute(b))) }
+pub fn vqadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    unsafe { simd_saturating_add(a, b) }
 }
-#[doc = "Add pairwise."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_u16)"]
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_u16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(addp)
+    assert_instr(uqadd)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -35974,24 +32889,18 @@ pub fn vpadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vpadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    let a: uint16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    let b: uint16x4_t = unsafe { simd_shuffle!(b, b, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint16x4_t = transmute(vpadd_s16(transmute(a), transmute(b)));
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+pub fn vqaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    unsafe { simd_saturating_add(a, b) }
 }
-#[doc = "Add pairwise."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_u32)"]
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_u32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(addp)
+    assert_instr(uqadd)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -36001,19 +32910,18 @@ pub fn vpadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vpadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    unsafe { transmute(vpadd_s32(transmute(a), transmute(b))) }
+pub fn vqadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    unsafe { simd_saturating_add(a, b) }
 }
-#[doc = "Add pairwise."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_u32)"]
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_u32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(addp)
+    assert_instr(uqadd)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -36023,23 +32931,18 @@ pub fn vpadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vpadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    let a: uint32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    let b: uint32x2_t = unsafe { simd_shuffle!(b, b, [1, 0]) };
-    unsafe {
-        let ret_val: uint32x2_t = transmute(vpadd_s32(transmute(a), transmute(b)));
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vqaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    unsafe { simd_saturating_add(a, b) }
 }
-#[doc = "Signed Add and Accumulate Long Pairwise."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddl_s8)"]
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_u64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.s8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u64"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(saddlp)
+    assert_instr(uqadd)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -36049,26 +32952,18 @@ pub fn vpadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vpaddl_s8(a: int8x8_t) -> int16x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.saddlp.v4i16.v8i8"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddls.v4i16.v8i8")]
-        fn _vpaddl_s8(a: int8x8_t) -> int16x4_t;
-    }
-    unsafe { _vpaddl_s8(a) }
+pub fn vqadd_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
+    unsafe { simd_saturating_add(a, b) }
 }
-#[doc = "Signed Add and Accumulate Long Pairwise."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddlq_s8)"]
+#[doc = "Saturating add"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_u64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.s8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u64"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(saddlp)
+    assert_instr(uqadd)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -36078,27 +32973,24 @@ pub fn vpaddl_s8(a: int8x8_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vpaddlq_s8(a: int8x16_t) -> int16x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.saddlp.v8i16.v16i8"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddls.v8i16.v16i8")]
-        fn _vpaddlq_s8(a: int8x16_t) -> int16x8_t;
-    }
-    unsafe { _vpaddlq_s8(a) }
+pub fn vqaddq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    unsafe { simd_saturating_add(a, b) }
 }
-#[doc = "Signed Add and Accumulate Long Pairwise."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddl_s16)"]
+#[doc = "Vector widening saturating doubling multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_lane_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.s16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal, N = 2))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(saddlp)
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
+    assert_instr(sqdmlal, N = 2)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -36107,27 +32999,25 @@ pub fn vpaddlq_s8(a: int8x16_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vpaddl_s16(a: int16x4_t) -> int32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.saddlp.v2i32.v4i16"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddls.v2i32.v4i16")]
-        fn _vpaddl_s16(a: int16x4_t) -> int32x2_t;
-    }
-    unsafe { _vpaddl_s16(a) }
+pub fn vqdmlal_lane_s16<const N: i32>(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t {
+    static_assert_uimm_bits!(N, 2);
+    vqaddq_s32(a, vqdmull_lane_s16::<N>(b, c))
 }
-#[doc = "Signed Add and Accumulate Long Pairwise."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddlq_s16)"]
+#[doc = "Vector widening saturating doubling multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_lane_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.s16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal, N = 1))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(saddlp)
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
+    assert_instr(sqdmlal, N = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -36136,26 +33026,19 @@ pub fn vpaddl_s16(a: int16x4_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vpaddlq_s16(a: int16x8_t) -> int32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.saddlp.v4i32.v8i16"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddls.v4i32.v8i16")]
-        fn _vpaddlq_s16(a: int16x8_t) -> int32x4_t;
-    }
-    unsafe { _vpaddlq_s16(a) }
+pub fn vqdmlal_lane_s32<const N: i32>(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t {
+    static_assert_uimm_bits!(N, 1);
+    vqaddq_s64(a, vqdmull_lane_s32::<N>(b, c))
 }
-#[doc = "Signed Add and Accumulate Long Pairwise."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddl_s32)"]
+#[doc = "Vector widening saturating doubling multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_n_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.s32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(saddlp)
+    assert_instr(sqdmlal)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -36165,26 +33048,18 @@ pub fn vpaddlq_s16(a: int16x8_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vpaddl_s32(a: int32x2_t) -> int64x1_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.saddlp.v1i64.v2i32"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddls.v1i64.v2i32")]
-        fn _vpaddl_s32(a: int32x2_t) -> int64x1_t;
-    }
-    unsafe { _vpaddl_s32(a) }
+pub fn vqdmlal_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t {
+    vqaddq_s32(a, vqdmull_n_s16(b, c))
 }
-#[doc = "Signed Add and Accumulate Long Pairwise."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddlq_s32)"]
+#[doc = "Vector widening saturating doubling multiply accumulate with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_n_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.s32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(saddlp)
+    assert_instr(sqdmlal)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -36194,26 +33069,18 @@ pub fn vpaddl_s32(a: int32x2_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vpaddlq_s32(a: int32x4_t) -> int64x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.saddlp.v2i64.v4i32"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddls.v2i64.v4i32")]
-        fn _vpaddlq_s32(a: int32x4_t) -> int64x2_t;
-    }
-    unsafe { _vpaddlq_s32(a) }
+pub fn vqdmlal_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t {
+    vqaddq_s64(a, vqdmull_n_s32(b, c))
 }
-#[doc = "Unsigned Add and Accumulate Long Pairwise."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddl_u8)"]
+#[doc = "Signed saturating doubling multiply-add long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.u8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uaddlp)
+    assert_instr(sqdmlal)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -36223,26 +33090,18 @@ pub fn vpaddlq_s32(a: int32x4_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vpaddl_u8(a: uint8x8_t) -> uint16x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uaddlp.v4i16.v8i8"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddlu.v4i16.v8i8")]
-        fn _vpaddl_u8(a: uint8x8_t) -> uint16x4_t;
-    }
-    unsafe { _vpaddl_u8(a) }
+pub fn vqdmlal_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t {
+    vqaddq_s32(a, vqdmull_s16(b, c))
 }
-#[doc = "Unsigned Add and Accumulate Long Pairwise."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddlq_u8)"]
+#[doc = "Signed saturating doubling multiply-add long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.u8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uaddlp)
+    assert_instr(sqdmlal)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -36252,27 +33111,24 @@ pub fn vpaddl_u8(a: uint8x8_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vpaddlq_u8(a: uint8x16_t) -> uint16x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uaddlp.v8i16.v16i8"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddlu.v8i16.v16i8")]
-        fn _vpaddlq_u8(a: uint8x16_t) -> uint16x8_t;
-    }
-    unsafe { _vpaddlq_u8(a) }
+pub fn vqdmlal_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t {
+    vqaddq_s64(a, vqdmull_s32(b, c))
 }
-#[doc = "Unsigned Add and Accumulate Long Pairwise."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddl_u16)"]
+#[doc = "Vector widening saturating doubling multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_lane_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.u16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl, N = 2))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uaddlp)
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
+    assert_instr(sqdmlsl, N = 2)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -36281,27 +33137,25 @@ pub fn vpaddlq_u8(a: uint8x16_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vpaddl_u16(a: uint16x4_t) -> uint32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uaddlp.v2i32.v4i16"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddlu.v2i32.v4i16")]
-        fn _vpaddl_u16(a: uint16x4_t) -> uint32x2_t;
-    }
-    unsafe { _vpaddl_u16(a) }
+pub fn vqdmlsl_lane_s16<const N: i32>(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t {
+    static_assert_uimm_bits!(N, 2);
+    vqsubq_s32(a, vqdmull_lane_s16::<N>(b, c))
 }
-#[doc = "Unsigned Add and Accumulate Long Pairwise."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddlq_u16)"]
+#[doc = "Vector widening saturating doubling multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_lane_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.u16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl, N = 1))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uaddlp)
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
+    assert_instr(sqdmlsl, N = 1)
 )]
+#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -36310,26 +33164,19 @@ pub fn vpaddl_u16(a: uint16x4_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vpaddlq_u16(a: uint16x8_t) -> uint32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uaddlp.v4i32.v8i16"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddlu.v4i32.v8i16")]
-        fn _vpaddlq_u16(a: uint16x8_t) -> uint32x4_t;
-    }
-    unsafe { _vpaddlq_u16(a) }
+pub fn vqdmlsl_lane_s32<const N: i32>(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t {
+    static_assert_uimm_bits!(N, 1);
+    vqsubq_s64(a, vqdmull_lane_s32::<N>(b, c))
 }
-#[doc = "Unsigned Add and Accumulate Long Pairwise."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddl_u32)"]
+#[doc = "Vector widening saturating doubling multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_n_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.u32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uaddlp)
+    assert_instr(sqdmlsl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -36339,26 +33186,18 @@ pub fn vpaddlq_u16(a: uint16x8_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vpaddl_u32(a: uint32x2_t) -> uint64x1_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uaddlp.v1i64.v2i32"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddlu.v1i64.v2i32")]
-        fn _vpaddl_u32(a: uint32x2_t) -> uint64x1_t;
-    }
-    unsafe { _vpaddl_u32(a) }
+pub fn vqdmlsl_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t {
+    vqsubq_s32(a, vqdmull_n_s16(b, c))
 }
-#[doc = "Unsigned Add and Accumulate Long Pairwise."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddlq_u32)"]
+#[doc = "Vector widening saturating doubling multiply subtract with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_n_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vpaddl.u32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uaddlp)
+    assert_instr(sqdmlsl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -36368,26 +33207,18 @@ pub fn vpaddl_u32(a: uint32x2_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vpaddlq_u32(a: uint32x4_t) -> uint64x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uaddlp.v2i64.v4i32"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddlu.v2i64.v4i32")]
-        fn _vpaddlq_u32(a: uint32x4_t) -> uint64x2_t;
-    }
-    unsafe { _vpaddlq_u32(a) }
+pub fn vqdmlsl_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t {
+    vqsubq_s64(a, vqdmull_n_s32(b, c))
 }
-#[doc = "Folding maximum of adjacent pairs"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_f32)"]
+#[doc = "Signed saturating doubling multiply-subtract long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fmaxp)
+    assert_instr(sqdmlsl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -36397,26 +33228,18 @@ pub fn vpaddlq_u32(a: uint32x4_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vpmax_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fmaxp.v2f32"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v2f32")]
-        fn _vpmax_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t;
-    }
-    unsafe { _vpmax_f32(a, b) }
+pub fn vqdmlsl_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t {
+    vqsubq_s32(a, vqdmull_s16(b, c))
 }
-#[doc = "Folding maximum of adjacent pairs"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_s8)"]
+#[doc = "Signed saturating doubling multiply-subtract long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smaxp)
+    assert_instr(sqdmlsl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -36426,27 +33249,20 @@ pub fn vpmax_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vpmax_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.smaxp.v8i8"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v8i8")]
-        fn _vpmax_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
-    }
-    unsafe { _vpmax_s8(a, b) }
+pub fn vqdmlsl_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t {
+    vqsubq_s64(a, vqdmull_s32(b, c))
 }
-#[doc = "Folding maximum of adjacent pairs"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_s16)"]
+#[doc = "Vector saturating doubling multiply high by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulh_laneq_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smaxp)
+    assert_instr(sqdmulh, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -36455,27 +33271,21 @@ pub fn vpmax_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vpmax_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.smaxp.v4i16"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v4i16")]
-        fn _vpmax_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
-    }
-    unsafe { _vpmax_s16(a, b) }
+pub fn vqdmulh_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x8_t) -> int16x4_t {
+    static_assert_uimm_bits!(LANE, 3);
+    vqdmulh_s16(a, vdup_n_s16(vgetq_lane_s16::<LANE>(b)))
 }
-#[doc = "Folding maximum of adjacent pairs"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_s32)"]
+#[doc = "Vector saturating doubling multiply high by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhq_laneq_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(smaxp)
+    assert_instr(sqdmulh, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -36484,27 +33294,21 @@ pub fn vpmax_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vpmax_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.smaxp.v2i32"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v2i32")]
-        fn _vpmax_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
-    }
-    unsafe { _vpmax_s32(a, b) }
+pub fn vqdmulhq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    static_assert_uimm_bits!(LANE, 3);
+    vqdmulhq_s16(a, vdupq_n_s16(vgetq_lane_s16::<LANE>(b)))
 }
-#[doc = "Folding maximum of adjacent pairs"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_u8)"]
+#[doc = "Vector saturating doubling multiply high by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulh_laneq_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umaxp)
+    assert_instr(sqdmulh, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -36513,27 +33317,21 @@ pub fn vpmax_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vpmax_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.umaxp.v8i8"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxu.v8i8")]
-        fn _vpmax_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t;
-    }
-    unsafe { _vpmax_u8(a, b) }
+pub fn vqdmulh_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x4_t) -> int32x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vqdmulh_s32(a, vdup_n_s32(vgetq_lane_s32::<LANE>(b)))
 }
-#[doc = "Folding maximum of adjacent pairs"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_u16)"]
+#[doc = "Vector saturating doubling multiply high by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhq_laneq_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh, LANE = 0))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umaxp)
+    assert_instr(sqdmulh, LANE = 0)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -36542,26 +33340,19 @@ pub fn vpmax_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vpmax_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.umaxp.v4i16"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxu.v4i16")]
-        fn _vpmax_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t;
-    }
-    unsafe { _vpmax_u16(a, b) }
+pub fn vqdmulhq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    vqdmulhq_s32(a, vdupq_n_s32(vgetq_lane_s32::<LANE>(b)))
 }
-#[doc = "Folding maximum of adjacent pairs"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmax_u32)"]
+#[doc = "Vector saturating doubling multiply high with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulh_n_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(umaxp)
+    assert_instr(sqdmulh)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -36571,26 +33362,19 @@ pub fn vpmax_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vpmax_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.umaxp.v2i32"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxu.v2i32")]
-        fn _vpmax_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t;
-    }
-    unsafe { _vpmax_u32(a, b) }
+pub fn vqdmulh_n_s16(a: int16x4_t, b: i16) -> int16x4_t {
+    let b: int16x4_t = vdup_n_s16(b);
+    vqdmulh_s16(a, b)
 }
-#[doc = "Folding minimum of adjacent pairs"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_f32)"]
+#[doc = "Vector saturating doubling multiply high with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhq_n_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(fminp)
+    assert_instr(sqdmulh)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -36600,26 +33384,19 @@ pub fn vpmax_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vpmin_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.fminp.v2f32"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmins.v2f32")]
-        fn _vpmin_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t;
-    }
-    unsafe { _vpmin_f32(a, b) }
+pub fn vqdmulhq_n_s16(a: int16x8_t, b: i16) -> int16x8_t {
+    let b: int16x8_t = vdupq_n_s16(b);
+    vqdmulhq_s16(a, b)
 }
-#[doc = "Folding minimum of adjacent pairs"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_s8)"]
+#[doc = "Vector saturating doubling multiply high with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulh_n_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sminp)
+    assert_instr(sqdmulh)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -36629,26 +33406,19 @@ pub fn vpmin_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vpmin_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sminp.v8i8"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmins.v8i8")]
-        fn _vpmin_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
-    }
-    unsafe { _vpmin_s8(a, b) }
+pub fn vqdmulh_n_s32(a: int32x2_t, b: i32) -> int32x2_t {
+    let b: int32x2_t = vdup_n_s32(b);
+    vqdmulh_s32(a, b)
 }
-#[doc = "Folding minimum of adjacent pairs"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_s16)"]
+#[doc = "Vector saturating doubling multiply high with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhq_n_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sminp)
+    assert_instr(sqdmulh)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -36658,26 +33428,19 @@ pub fn vpmin_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vpmin_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sminp.v4i16"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmins.v4i16")]
-        fn _vpmin_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
-    }
-    unsafe { _vpmin_s16(a, b) }
+pub fn vqdmulhq_n_s32(a: int32x4_t, b: i32) -> int32x4_t {
+    let b: int32x4_t = vdupq_n_s32(b);
+    vqdmulhq_s32(a, b)
 }
-#[doc = "Folding minimum of adjacent pairs"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_s32)"]
+#[doc = "Signed saturating doubling multiply returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulh_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sminp)
+    assert_instr(sqdmulh)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -36687,26 +33450,26 @@ pub fn vpmin_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vpmin_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+pub fn vqdmulh_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmulh.v4i16")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sminp.v2i32"
+            link_name = "llvm.aarch64.neon.sqdmulh.v4i16"
         )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmins.v2i32")]
-        fn _vpmin_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
+        fn _vqdmulh_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
     }
-    unsafe { _vpmin_s32(a, b) }
+    unsafe { _vqdmulh_s16(a, b) }
 }
-#[doc = "Folding minimum of adjacent pairs"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_u8)"]
+#[doc = "Signed saturating doubling multiply returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhq_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uminp)
+    assert_instr(sqdmulh)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -36716,26 +33479,26 @@ pub fn vpmin_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vpmin_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+pub fn vqdmulhq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmulh.v8i16")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uminp.v8i8"
+            link_name = "llvm.aarch64.neon.sqdmulh.v8i16"
         )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpminu.v8i8")]
-        fn _vpmin_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t;
+        fn _vqdmulhq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
     }
-    unsafe { _vpmin_u8(a, b) }
+    unsafe { _vqdmulhq_s16(a, b) }
 }
-#[doc = "Folding minimum of adjacent pairs"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_u16)"]
+#[doc = "Signed saturating doubling multiply returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulh_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uminp)
+    assert_instr(sqdmulh)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -36745,26 +33508,26 @@ pub fn vpmin_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vpmin_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+pub fn vqdmulh_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmulh.v2i32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uminp.v4i16"
+            link_name = "llvm.aarch64.neon.sqdmulh.v2i32"
         )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpminu.v4i16")]
-        fn _vpmin_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t;
+        fn _vqdmulh_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
     }
-    unsafe { _vpmin_u16(a, b) }
+    unsafe { _vqdmulh_s32(a, b) }
 }
-#[doc = "Folding minimum of adjacent pairs"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmin_u32)"]
+#[doc = "Signed saturating doubling multiply returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhq_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uminp)
+    assert_instr(sqdmulh)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -36774,27 +33537,28 @@ pub fn vpmin_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vpmin_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+pub fn vqdmulhq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmulh.v4i32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uminp.v2i32"
+            link_name = "llvm.aarch64.neon.sqdmulh.v4i32"
         )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpminu.v2i32")]
-        fn _vpmin_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t;
+        fn _vqdmulhq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
     }
-    unsafe { _vpmin_u32(a, b) }
+    unsafe { _vqdmulhq_s32(a, b) }
 }
-#[doc = "Signed saturating Absolute value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabs_s8)"]
+#[doc = "Vector saturating doubling long multiply by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_lane_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull, N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqabs)
+    assert_instr(sqdmull, N = 2)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -36803,27 +33567,22 @@ pub fn vpmin_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqabs_s8(a: int8x8_t) -> int8x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqabs.v8i8"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v8i8")]
-        fn _vqabs_s8(a: int8x8_t) -> int8x8_t;
-    }
-    unsafe { _vqabs_s8(a) }
+pub fn vqdmull_lane_s16<const N: i32>(a: int16x4_t, b: int16x4_t) -> int32x4_t {
+    static_assert_uimm_bits!(N, 2);
+    let b = vdup_lane_s16::<N>(b);
+    vqdmull_s16(a, b)
 }
-#[doc = "Signed saturating Absolute value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabsq_s8)"]
+#[doc = "Vector saturating doubling long multiply by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_lane_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull, N = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqabs)
+    assert_instr(sqdmull, N = 1)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -36832,26 +33591,20 @@ pub fn vqabs_s8(a: int8x8_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqabsq_s8(a: int8x16_t) -> int8x16_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqabs.v16i8"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v16i8")]
-        fn _vqabsq_s8(a: int8x16_t) -> int8x16_t;
-    }
-    unsafe { _vqabsq_s8(a) }
+pub fn vqdmull_lane_s32<const N: i32>(a: int32x2_t, b: int32x2_t) -> int64x2_t {
+    static_assert_uimm_bits!(N, 1);
+    let b = vdup_lane_s32::<N>(b);
+    vqdmull_s32(a, b)
 }
-#[doc = "Signed saturating Absolute value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabs_s16)"]
+#[doc = "Vector saturating doubling long multiply with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_n_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqabs)
+    assert_instr(sqdmull)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -36861,26 +33614,18 @@ pub fn vqabsq_s8(a: int8x16_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqabs_s16(a: int16x4_t) -> int16x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqabs.v4i16"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v4i16")]
-        fn _vqabs_s16(a: int16x4_t) -> int16x4_t;
-    }
-    unsafe { _vqabs_s16(a) }
+pub fn vqdmull_n_s16(a: int16x4_t, b: i16) -> int32x4_t {
+    vqdmull_s16(a, vdup_n_s16(b))
 }
-#[doc = "Signed saturating Absolute value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabsq_s16)"]
+#[doc = "Vector saturating doubling long multiply with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_n_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqabs)
+    assert_instr(sqdmull)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -36890,26 +33635,18 @@ pub fn vqabs_s16(a: int16x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqabsq_s16(a: int16x8_t) -> int16x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqabs.v8i16"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v8i16")]
-        fn _vqabsq_s16(a: int16x8_t) -> int16x8_t;
-    }
-    unsafe { _vqabsq_s16(a) }
+pub fn vqdmull_n_s32(a: int32x2_t, b: i32) -> int64x2_t {
+    vqdmull_s32(a, vdup_n_s32(b))
 }
-#[doc = "Signed saturating Absolute value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabs_s32)"]
+#[doc = "Signed saturating doubling multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqabs)
+    assert_instr(sqdmull)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -36919,26 +33656,26 @@ pub fn vqabsq_s16(a: int16x8_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqabs_s32(a: int32x2_t) -> int32x2_t {
+pub fn vqdmull_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t {
     unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmull.v4i32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqabs.v2i32"
+            link_name = "llvm.aarch64.neon.sqdmull.v4i32"
         )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v2i32")]
-        fn _vqabs_s32(a: int32x2_t) -> int32x2_t;
+        fn _vqdmull_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t;
     }
-    unsafe { _vqabs_s32(a) }
+    unsafe { _vqdmull_s16(a, b) }
 }
-#[doc = "Signed saturating Absolute value"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqabsq_s32)"]
+#[doc = "Signed saturating doubling multiply long"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqabs)
+    assert_instr(sqdmull)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -36948,26 +33685,26 @@ pub fn vqabs_s32(a: int32x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqabsq_s32(a: int32x4_t) -> int32x4_t {
+pub fn vqdmull_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t {
     unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmull.v2i64")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqabs.v4i32"
+            link_name = "llvm.aarch64.neon.sqdmull.v2i64"
         )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v4i32")]
-        fn _vqabsq_s32(a: int32x4_t) -> int32x4_t;
+        fn _vqdmull_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t;
     }
-    unsafe { _vqabsq_s32(a) }
+    unsafe { _vqdmull_s32(a, b) }
 }
-#[doc = "Saturating add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_s8)"]
+#[doc = "Signed saturating extract narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqadd)
+    assert_instr(sqxtn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -36977,18 +33714,26 @@ pub fn vqabsq_s32(a: int32x4_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    unsafe { simd_saturating_add(a, b) }
+pub fn vqmovn_s16(a: int16x8_t) -> int8x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovns.v8i8")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqxtn.v8i8"
+        )]
+        fn _vqmovn_s16(a: int16x8_t) -> int8x8_t;
+    }
+    unsafe { _vqmovn_s16(a) }
 }
-#[doc = "Saturating add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_s8)"]
+#[doc = "Signed saturating extract narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqadd)
+    assert_instr(sqxtn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -36998,18 +33743,26 @@ pub fn vqadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    unsafe { simd_saturating_add(a, b) }
+pub fn vqmovn_s32(a: int32x4_t) -> int16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovns.v4i16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqxtn.v4i16"
+        )]
+        fn _vqmovn_s32(a: int32x4_t) -> int16x4_t;
+    }
+    unsafe { _vqmovn_s32(a) }
 }
-#[doc = "Saturating add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_s16)"]
+#[doc = "Signed saturating extract narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_s64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqadd)
+    assert_instr(sqxtn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -37019,18 +33772,26 @@ pub fn vqaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    unsafe { simd_saturating_add(a, b) }
+pub fn vqmovn_s64(a: int64x2_t) -> int32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovns.v2i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqxtn.v2i32"
+        )]
+        fn _vqmovn_s64(a: int64x2_t) -> int32x2_t;
+    }
+    unsafe { _vqmovn_s64(a) }
 }
-#[doc = "Saturating add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_s16)"]
+#[doc = "Unsigned saturating extract narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqadd)
+    assert_instr(uqxtn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -37040,18 +33801,26 @@ pub fn vqadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    unsafe { simd_saturating_add(a, b) }
+pub fn vqmovn_u16(a: uint16x8_t) -> uint8x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnu.v8i8")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqxtn.v8i8"
+        )]
+        fn _vqmovn_u16(a: uint16x8_t) -> uint8x8_t;
+    }
+    unsafe { _vqmovn_u16(a) }
 }
-#[doc = "Saturating add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_s32)"]
+#[doc = "Unsigned saturating extract narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqadd)
+    assert_instr(uqxtn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -37061,18 +33830,26 @@ pub fn vqaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    unsafe { simd_saturating_add(a, b) }
+pub fn vqmovn_u32(a: uint32x4_t) -> uint16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnu.v4i16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqxtn.v4i16"
+        )]
+        fn _vqmovn_u32(a: uint32x4_t) -> uint16x4_t;
+    }
+    unsafe { _vqmovn_u32(a) }
 }
-#[doc = "Saturating add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_s32)"]
+#[doc = "Unsigned saturating extract narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_u64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqadd)
+    assert_instr(uqxtn)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -37082,18 +33859,26 @@ pub fn vqadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    unsafe { simd_saturating_add(a, b) }
+pub fn vqmovn_u64(a: uint64x2_t) -> uint32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnu.v2i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.uqxtn.v2i32"
+        )]
+        fn _vqmovn_u64(a: uint64x2_t) -> uint32x2_t;
+    }
+    unsafe { _vqmovn_u64(a) }
 }
-#[doc = "Saturating add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_s64)"]
+#[doc = "Signed saturating extract unsigned narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovun_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s64"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovun))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqadd)
+    assert_instr(sqxtun)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -37103,18 +33888,26 @@ pub fn vqaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqadd_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
-    unsafe { simd_saturating_add(a, b) }
+pub fn vqmovun_s16(a: int16x8_t) -> uint8x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnsu.v8i8")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqxtun.v8i8"
+        )]
+        fn _vqmovun_s16(a: int16x8_t) -> uint8x8_t;
+    }
+    unsafe { _vqmovun_s16(a) }
 }
-#[doc = "Saturating add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_s64)"]
+#[doc = "Signed saturating extract unsigned narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovun_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s64"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovun))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqadd)
+    assert_instr(sqxtun)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -37124,18 +33917,26 @@ pub fn vqadd_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqaddq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
-    unsafe { simd_saturating_add(a, b) }
+pub fn vqmovun_s32(a: int32x4_t) -> uint16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnsu.v4i16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqxtun.v4i16"
+        )]
+        fn _vqmovun_s32(a: int32x4_t) -> uint16x4_t;
+    }
+    unsafe { _vqmovun_s32(a) }
 }
-#[doc = "Saturating add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_u8)"]
+#[doc = "Signed saturating extract unsigned narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovun_s64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovun))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqadd)
+    assert_instr(sqxtun)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -37145,18 +33946,26 @@ pub fn vqaddq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    unsafe { simd_saturating_add(a, b) }
+pub fn vqmovun_s64(a: int64x2_t) -> uint32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnsu.v2i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqxtun.v2i32"
+        )]
+        fn _vqmovun_s64(a: int64x2_t) -> uint32x2_t;
+    }
+    unsafe { _vqmovun_s64(a) }
 }
-#[doc = "Saturating add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_u8)"]
+#[doc = "Signed saturating negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqneg_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqadd)
+    assert_instr(sqneg)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -37166,18 +33975,26 @@ pub fn vqadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
-    unsafe { simd_saturating_add(a, b) }
+pub fn vqneg_s8(a: int8x8_t) -> int8x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqneg.v8i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v8i8")]
+        fn _vqneg_s8(a: int8x8_t) -> int8x8_t;
+    }
+    unsafe { _vqneg_s8(a) }
 }
-#[doc = "Saturating add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_u16)"]
+#[doc = "Signed saturating negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqnegq_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqadd)
+    assert_instr(sqneg)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -37187,18 +34004,26 @@ pub fn vqaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    unsafe { simd_saturating_add(a, b) }
+pub fn vqnegq_s8(a: int8x16_t) -> int8x16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqneg.v16i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v16i8")]
+        fn _vqnegq_s8(a: int8x16_t) -> int8x16_t;
+    }
+    unsafe { _vqnegq_s8(a) }
 }
-#[doc = "Saturating add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_u16)"]
+#[doc = "Signed saturating negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqneg_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u16"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqadd)
+    assert_instr(sqneg)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -37208,18 +34033,26 @@ pub fn vqadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    unsafe { simd_saturating_add(a, b) }
+pub fn vqneg_s16(a: int16x4_t) -> int16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqneg.v4i16"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v4i16")]
+        fn _vqneg_s16(a: int16x4_t) -> int16x4_t;
+    }
+    unsafe { _vqneg_s16(a) }
 }
-#[doc = "Saturating add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_u32)"]
+#[doc = "Signed saturating negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqnegq_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqadd)
+    assert_instr(sqneg)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -37229,18 +34062,26 @@ pub fn vqaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    unsafe { simd_saturating_add(a, b) }
+pub fn vqnegq_s16(a: int16x8_t) -> int16x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqneg.v8i16"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v8i16")]
+        fn _vqnegq_s16(a: int16x8_t) -> int16x8_t;
+    }
+    unsafe { _vqnegq_s16(a) }
 }
-#[doc = "Saturating add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_u32)"]
+#[doc = "Signed saturating negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqneg_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u32"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqadd)
+    assert_instr(sqneg)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -37250,18 +34091,26 @@ pub fn vqadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    unsafe { simd_saturating_add(a, b) }
+pub fn vqneg_s32(a: int32x2_t) -> int32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqneg.v2i32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v2i32")]
+        fn _vqneg_s32(a: int32x2_t) -> int32x2_t;
+    }
+    unsafe { _vqneg_s32(a) }
 }
-#[doc = "Saturating add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqadd_u64)"]
+#[doc = "Signed saturating negate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqnegq_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u64"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqadd)
+    assert_instr(sqneg)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -37271,19 +34120,28 @@ pub fn vqaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqadd_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
-    unsafe { simd_saturating_add(a, b) }
+pub fn vqnegq_s32(a: int32x4_t) -> int32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqneg.v4i32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v4i32")]
+        fn _vqnegq_s32(a: int32x4_t) -> int32x4_t;
+    }
+    unsafe { _vqnegq_s32(a) }
 }
-#[doc = "Saturating add"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqaddq_u64)"]
+#[doc = "Vector rounding saturating doubling multiply high by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_lane_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u64"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqadd)
+    assert_instr(sqrdmulh, LANE = 1)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -37292,20 +34150,22 @@ pub fn vqadd_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqaddq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
-    unsafe { simd_saturating_add(a, b) }
+pub fn vqrdmulh_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    let b = vdup_lane_s16::<LANE>(b);
+    vqrdmulh_s16(a, b)
 }
-#[doc = "Vector widening saturating doubling multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_lane_s16)"]
+#[doc = "Vector rounding saturating doubling multiply high by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_lane_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal, N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqdmlal, N = 2)
+    assert_instr(sqrdmulh, LANE = 1)
 )]
-#[rustc_legacy_const_generics(3)]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -37314,21 +34174,22 @@ pub fn vqaddq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqdmlal_lane_s16<const N: i32>(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t {
-    static_assert_uimm_bits!(N, 2);
-    vqaddq_s32(a, vqdmull_lane_s16::<N>(b, c))
+pub fn vqrdmulh_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    static_assert_uimm_bits!(LANE, 1);
+    let b = vdup_lane_s32::<LANE>(b);
+    vqrdmulh_s32(a, b)
 }
-#[doc = "Vector widening saturating doubling multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_lane_s32)"]
+#[doc = "Vector rounding saturating doubling multiply high by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_laneq_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal, N = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqdmlal, N = 1)
+    assert_instr(sqrdmulh, LANE = 1)
 )]
-#[rustc_legacy_const_generics(3)]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -37337,20 +34198,22 @@ pub fn vqdmlal_lane_s16<const N: i32>(a: int32x4_t, b: int16x4_t, c: int16x4_t)
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqdmlal_lane_s32<const N: i32>(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t {
-    static_assert_uimm_bits!(N, 1);
-    vqaddq_s64(a, vqdmull_lane_s32::<N>(b, c))
+pub fn vqrdmulh_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x8_t) -> int16x4_t {
+    static_assert_uimm_bits!(LANE, 3);
+    let b = vdup_laneq_s16::<LANE>(b);
+    vqrdmulh_s16(a, b)
 }
-#[doc = "Vector widening saturating doubling multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_n_s16)"]
+#[doc = "Vector rounding saturating doubling multiply high by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_laneq_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqdmlal)
+    assert_instr(sqrdmulh, LANE = 1)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -37359,19 +34222,22 @@ pub fn vqdmlal_lane_s32<const N: i32>(a: int64x2_t, b: int32x2_t, c: int32x2_t)
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqdmlal_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t {
-    vqaddq_s32(a, vqdmull_n_s16(b, c))
+pub fn vqrdmulh_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x4_t) -> int32x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    let b = vdup_laneq_s32::<LANE>(b);
+    vqrdmulh_s32(a, b)
 }
-#[doc = "Vector widening saturating doubling multiply accumulate with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_n_s32)"]
+#[doc = "Vector rounding saturating doubling multiply high by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_lane_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqdmlal)
+    assert_instr(sqrdmulh, LANE = 1)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -37380,19 +34246,22 @@ pub fn vqdmlal_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqdmlal_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t {
-    vqaddq_s64(a, vqdmull_n_s32(b, c))
+pub fn vqrdmulhq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x4_t) -> int16x8_t {
+    static_assert_uimm_bits!(LANE, 2);
+    let b = vdupq_lane_s16::<LANE>(b);
+    vqrdmulhq_s16(a, b)
 }
-#[doc = "Signed saturating doubling multiply-add long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_s16)"]
+#[doc = "Vector rounding saturating doubling multiply high by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_lane_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqdmlal)
+    assert_instr(sqrdmulh, LANE = 1)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -37401,19 +34270,22 @@ pub fn vqdmlal_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqdmlal_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t {
-    vqaddq_s32(a, vqdmull_s16(b, c))
+pub fn vqrdmulhq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x2_t) -> int32x4_t {
+    static_assert_uimm_bits!(LANE, 1);
+    let b = vdupq_lane_s32::<LANE>(b);
+    vqrdmulhq_s32(a, b)
 }
-#[doc = "Signed saturating doubling multiply-add long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlal_s32)"]
+#[doc = "Vector rounding saturating doubling multiply high by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_laneq_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqdmlal)
+    assert_instr(sqrdmulh, LANE = 1)
 )]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -37422,20 +34294,22 @@ pub fn vqdmlal_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqdmlal_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t {
-    vqaddq_s64(a, vqdmull_s32(b, c))
+pub fn vqrdmulhq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    static_assert_uimm_bits!(LANE, 3);
+    let b = vdupq_laneq_s16::<LANE>(b);
+    vqrdmulhq_s16(a, b)
 }
-#[doc = "Vector widening saturating doubling multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_lane_s16)"]
+#[doc = "Vector rounding saturating doubling multiply high by scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_laneq_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl, N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqdmlsl, N = 2)
+    assert_instr(sqrdmulh, LANE = 1)
 )]
-#[rustc_legacy_const_generics(3)]
+#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -37444,21 +34318,21 @@ pub fn vqdmlal_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqdmlsl_lane_s16<const N: i32>(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t {
-    static_assert_uimm_bits!(N, 2);
-    vqsubq_s32(a, vqdmull_lane_s16::<N>(b, c))
+pub fn vqrdmulhq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    let b = vdupq_laneq_s32::<LANE>(b);
+    vqrdmulhq_s32(a, b)
 }
-#[doc = "Vector widening saturating doubling multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_lane_s32)"]
+#[doc = "Vector saturating rounding doubling multiply high with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_n_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl, N = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqdmlsl, N = 1)
+    assert_instr(sqrdmulh)
 )]
-#[rustc_legacy_const_generics(3)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -37467,19 +34341,18 @@ pub fn vqdmlsl_lane_s16<const N: i32>(a: int32x4_t, b: int16x4_t, c: int16x4_t)
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqdmlsl_lane_s32<const N: i32>(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t {
-    static_assert_uimm_bits!(N, 1);
-    vqsubq_s64(a, vqdmull_lane_s32::<N>(b, c))
+pub fn vqrdmulh_n_s16(a: int16x4_t, b: i16) -> int16x4_t {
+    vqrdmulh_s16(a, vdup_n_s16(b))
 }
-#[doc = "Vector widening saturating doubling multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_n_s16)"]
+#[doc = "Vector saturating rounding doubling multiply high with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_n_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqdmlsl)
+    assert_instr(sqrdmulh)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -37489,18 +34362,18 @@ pub fn vqdmlsl_lane_s32<const N: i32>(a: int64x2_t, b: int32x2_t, c: int32x2_t)
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqdmlsl_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t {
-    vqsubq_s32(a, vqdmull_n_s16(b, c))
+pub fn vqrdmulhq_n_s16(a: int16x8_t, b: i16) -> int16x8_t {
+    vqrdmulhq_s16(a, vdupq_n_s16(b))
 }
-#[doc = "Vector widening saturating doubling multiply subtract with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_n_s32)"]
+#[doc = "Vector saturating rounding doubling multiply high with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_n_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqdmlsl)
+    assert_instr(sqrdmulh)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -37510,18 +34383,18 @@ pub fn vqdmlsl_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqdmlsl_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t {
-    vqsubq_s64(a, vqdmull_n_s32(b, c))
+pub fn vqrdmulh_n_s32(a: int32x2_t, b: i32) -> int32x2_t {
+    vqrdmulh_s32(a, vdup_n_s32(b))
 }
-#[doc = "Signed saturating doubling multiply-subtract long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_s16)"]
+#[doc = "Vector saturating rounding doubling multiply high with scalar"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_n_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqdmlsl)
+    assert_instr(sqrdmulh)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -37531,18 +34404,18 @@ pub fn vqdmlsl_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqdmlsl_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t {
-    vqsubq_s32(a, vqdmull_s16(b, c))
+pub fn vqrdmulhq_n_s32(a: int32x4_t, b: i32) -> int32x4_t {
+    vqrdmulhq_s32(a, vdupq_n_s32(b))
 }
-#[doc = "Signed saturating doubling multiply-subtract long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmlsl_s32)"]
+#[doc = "Signed saturating rounding doubling multiply returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqdmlsl)
+    assert_instr(sqrdmulh)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -37552,20 +34425,27 @@ pub fn vqdmlsl_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqdmlsl_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t {
-    vqsubq_s64(a, vqdmull_s32(b, c))
+pub fn vqrdmulh_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrdmulh.v4i16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqrdmulh.v4i16"
+        )]
+        fn _vqrdmulh_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
+    }
+    unsafe { _vqrdmulh_s16(a, b) }
 }
-#[doc = "Vector saturating doubling multiply high by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulh_laneq_s16)"]
+#[doc = "Signed saturating rounding doubling multiply returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh, LANE = 0))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqdmulh, LANE = 0)
+    assert_instr(sqrdmulh)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -37574,205 +34454,26 @@ pub fn vqdmlsl_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqdmulh_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x8_t) -> int16x4_t {
-    static_assert_uimm_bits!(LANE, 3);
-    unsafe { vqdmulh_s16(a, vdup_n_s16(simd_extract!(b, LANE as u32))) }
-}
-#[doc = "Vector saturating doubling multiply high by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhq_laneq_s16)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh, LANE = 0))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqdmulh, LANE = 0)
-)]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vqdmulhq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    static_assert_uimm_bits!(LANE, 3);
-    unsafe { vqdmulhq_s16(a, vdupq_n_s16(simd_extract!(b, LANE as u32))) }
-}
-#[doc = "Vector saturating doubling multiply high by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulh_laneq_s32)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh, LANE = 0))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqdmulh, LANE = 0)
-)]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vqdmulh_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x4_t) -> int32x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe { vqdmulh_s32(a, vdup_n_s32(simd_extract!(b, LANE as u32))) }
-}
-#[doc = "Vector saturating doubling multiply high by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhq_laneq_s32)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh, LANE = 0))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqdmulh, LANE = 0)
-)]
-#[rustc_legacy_const_generics(2)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vqdmulhq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe { vqdmulhq_s32(a, vdupq_n_s32(simd_extract!(b, LANE as u32))) }
-}
-#[doc = "Vector saturating doubling multiply high with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulh_n_s16)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqdmulh)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vqdmulh_n_s16(a: int16x4_t, b: i16) -> int16x4_t {
-    let b: int16x4_t = vdup_n_s16(b);
-    vqdmulh_s16(a, b)
-}
-#[doc = "Vector saturating doubling multiply high with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhq_n_s16)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqdmulh)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vqdmulhq_n_s16(a: int16x8_t, b: i16) -> int16x8_t {
-    let b: int16x8_t = vdupq_n_s16(b);
-    vqdmulhq_s16(a, b)
-}
-#[doc = "Vector saturating doubling multiply high with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulh_n_s32)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqdmulh)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vqdmulh_n_s32(a: int32x2_t, b: i32) -> int32x2_t {
-    let b: int32x2_t = vdup_n_s32(b);
-    vqdmulh_s32(a, b)
-}
-#[doc = "Vector saturating doubling multiply high with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhq_n_s32)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqdmulh)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vqdmulhq_n_s32(a: int32x4_t, b: i32) -> int32x4_t {
-    let b: int32x4_t = vdupq_n_s32(b);
-    vqdmulhq_s32(a, b)
-}
-#[doc = "Signed saturating doubling multiply returning high half"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulh_s16)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqdmulh)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vqdmulh_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+pub fn vqrdmulhq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmulh.v4i16")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrdmulh.v8i16")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqdmulh.v4i16"
+            link_name = "llvm.aarch64.neon.sqrdmulh.v8i16"
         )]
-        fn _vqdmulh_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
+        fn _vqrdmulhq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
     }
-    unsafe { _vqdmulh_s16(a, b) }
+    unsafe { _vqrdmulhq_s16(a, b) }
 }
-#[doc = "Signed saturating doubling multiply returning high half"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhq_s16)"]
+#[doc = "Signed saturating rounding doubling multiply returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqdmulh)
+    assert_instr(sqrdmulh)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -37782,26 +34483,26 @@ pub fn vqdmulh_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqdmulhq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+pub fn vqrdmulh_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmulh.v8i16")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrdmulh.v2i32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqdmulh.v8i16"
+            link_name = "llvm.aarch64.neon.sqrdmulh.v2i32"
         )]
-        fn _vqdmulhq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
+        fn _vqrdmulh_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
     }
-    unsafe { _vqdmulhq_s16(a, b) }
+    unsafe { _vqrdmulh_s32(a, b) }
 }
-#[doc = "Signed saturating doubling multiply returning high half"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulh_s32)"]
+#[doc = "Signed saturating rounding doubling multiply returning high half"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqdmulh)
+    assert_instr(sqrdmulh)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -37811,26 +34512,26 @@ pub fn vqdmulhq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqdmulh_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+pub fn vqrdmulhq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmulh.v2i32")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrdmulh.v4i32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqdmulh.v2i32"
+            link_name = "llvm.aarch64.neon.sqrdmulh.v4i32"
         )]
-        fn _vqdmulh_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
+        fn _vqrdmulhq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
     }
-    unsafe { _vqdmulh_s32(a, b) }
+    unsafe { _vqrdmulhq_s32(a, b) }
 }
-#[doc = "Signed saturating doubling multiply returning high half"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmulhq_s32)"]
+#[doc = "Signed saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqdmulh)
+    assert_instr(sqrshl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -37840,28 +34541,27 @@ pub fn vqdmulh_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqdmulhq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+pub fn vqrshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmulh.v4i32")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v8i8")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqdmulh.v4i32"
+            link_name = "llvm.aarch64.neon.sqrshl.v8i8"
         )]
-        fn _vqdmulhq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
+        fn _vqrshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
     }
-    unsafe { _vqdmulhq_s32(a, b) }
+    unsafe { _vqrshl_s8(a, b) }
 }
-#[doc = "Vector saturating doubling long multiply by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_lane_s16)"]
+#[doc = "Signed saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull, N = 2))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqdmull, N = 2)
+    assert_instr(sqrshl)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -37870,24 +34570,27 @@ pub fn vqdmulhq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqdmull_lane_s16<const N: i32>(a: int16x4_t, b: int16x4_t) -> int32x4_t {
-    static_assert_uimm_bits!(N, 2);
-    unsafe {
-        let b: int16x4_t = simd_shuffle!(b, b, [N as u32, N as u32, N as u32, N as u32]);
-        vqdmull_s16(a, b)
+pub fn vqrshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v16i8")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqrshl.v16i8"
+        )]
+        fn _vqrshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t;
     }
+    unsafe { _vqrshlq_s8(a, b) }
 }
-#[doc = "Vector saturating doubling long multiply by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_lane_s32)"]
+#[doc = "Signed saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull, N = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqdmull, N = 1)
+    assert_instr(sqrshl)
 )]
-#[rustc_legacy_const_generics(2)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -37896,22 +34599,26 @@ pub fn vqdmull_lane_s16<const N: i32>(a: int16x4_t, b: int16x4_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqdmull_lane_s32<const N: i32>(a: int32x2_t, b: int32x2_t) -> int64x2_t {
-    static_assert_uimm_bits!(N, 1);
-    unsafe {
-        let b: int32x2_t = simd_shuffle!(b, b, [N as u32, N as u32]);
-        vqdmull_s32(a, b)
+pub fn vqrshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v4i16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqrshl.v4i16"
+        )]
+        fn _vqrshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
     }
+    unsafe { _vqrshl_s16(a, b) }
 }
-#[doc = "Vector saturating doubling long multiply with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_n_s16)"]
+#[doc = "Signed saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqdmull)
+    assert_instr(sqrshl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -37921,18 +34628,26 @@ pub fn vqdmull_lane_s32<const N: i32>(a: int32x2_t, b: int32x2_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqdmull_n_s16(a: int16x4_t, b: i16) -> int32x4_t {
-    vqdmull_s16(a, vdup_n_s16(b))
+pub fn vqrshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v8i16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqrshl.v8i16"
+        )]
+        fn _vqrshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
+    }
+    unsafe { _vqrshlq_s16(a, b) }
 }
-#[doc = "Vector saturating doubling long multiply with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_n_s32)"]
+#[doc = "Signed saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqdmull)
+    assert_instr(sqrshl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -37942,18 +34657,26 @@ pub fn vqdmull_n_s16(a: int16x4_t, b: i16) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqdmull_n_s32(a: int32x2_t, b: i32) -> int64x2_t {
-    vqdmull_s32(a, vdup_n_s32(b))
+pub fn vqrshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v2i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqrshl.v2i32"
+        )]
+        fn _vqrshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
+    }
+    unsafe { _vqrshl_s32(a, b) }
 }
-#[doc = "Signed saturating doubling multiply long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_s16)"]
+#[doc = "Signed saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqdmull)
+    assert_instr(sqrshl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -37963,26 +34686,26 @@ pub fn vqdmull_n_s32(a: int32x2_t, b: i32) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqdmull_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t {
+pub fn vqrshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmull.v4i32")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v4i32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqdmull.v4i32"
+            link_name = "llvm.aarch64.neon.sqrshl.v4i32"
         )]
-        fn _vqdmull_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t;
+        fn _vqrshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
     }
-    unsafe { _vqdmull_s16(a, b) }
+    unsafe { _vqrshlq_s32(a, b) }
 }
-#[doc = "Signed saturating doubling multiply long"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_s32)"]
+#[doc = "Signed saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_s64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqdmull)
+    assert_instr(sqrshl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -37992,26 +34715,26 @@ pub fn vqdmull_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqdmull_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t {
+pub fn vqrshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmull.v2i64")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v1i64")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqdmull.v2i64"
+            link_name = "llvm.aarch64.neon.sqrshl.v1i64"
         )]
-        fn _vqdmull_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t;
+        fn _vqrshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t;
     }
-    unsafe { _vqdmull_s32(a, b) }
+    unsafe { _vqrshl_s64(a, b) }
 }
-#[doc = "Signed saturating extract narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_s16)"]
+#[doc = "Signed saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_s64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqxtn)
+    assert_instr(sqrshl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -38021,26 +34744,26 @@ pub fn vqdmull_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqmovn_s16(a: int16x8_t) -> int8x8_t {
+pub fn vqrshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovns.v8i8")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v2i64")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqxtn.v8i8"
+            link_name = "llvm.aarch64.neon.sqrshl.v2i64"
         )]
-        fn _vqmovn_s16(a: int16x8_t) -> int8x8_t;
+        fn _vqrshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t;
     }
-    unsafe { _vqmovn_s16(a) }
+    unsafe { _vqrshlq_s64(a, b) }
 }
-#[doc = "Signed saturating extract narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_s32)"]
+#[doc = "Unsigned signed saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqxtn)
+    assert_instr(uqrshl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -38050,26 +34773,26 @@ pub fn vqmovn_s16(a: int16x8_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqmovn_s32(a: int32x4_t) -> int16x4_t {
+pub fn vqrshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovns.v4i16")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v8i8")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqxtn.v4i16"
+            link_name = "llvm.aarch64.neon.uqrshl.v8i8"
         )]
-        fn _vqmovn_s32(a: int32x4_t) -> int16x4_t;
+        fn _vqrshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t;
     }
-    unsafe { _vqmovn_s32(a) }
+    unsafe { _vqrshl_u8(a, b) }
 }
-#[doc = "Signed saturating extract narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_s64)"]
+#[doc = "Unsigned signed saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqxtn)
+    assert_instr(uqrshl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -38079,26 +34802,26 @@ pub fn vqmovn_s32(a: int32x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqmovn_s64(a: int64x2_t) -> int32x2_t {
+pub fn vqrshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovns.v2i32")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v16i8")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqxtn.v2i32"
+            link_name = "llvm.aarch64.neon.uqrshl.v16i8"
         )]
-        fn _vqmovn_s64(a: int64x2_t) -> int32x2_t;
+        fn _vqrshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t;
     }
-    unsafe { _vqmovn_s64(a) }
+    unsafe { _vqrshlq_u8(a, b) }
 }
-#[doc = "Unsigned saturating extract narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_u16)"]
+#[doc = "Unsigned signed saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqxtn)
+    assert_instr(uqrshl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -38108,26 +34831,26 @@ pub fn vqmovn_s64(a: int64x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqmovn_u16(a: uint16x8_t) -> uint8x8_t {
+pub fn vqrshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnu.v8i8")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v4i16")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqxtn.v8i8"
+            link_name = "llvm.aarch64.neon.uqrshl.v4i16"
         )]
-        fn _vqmovn_u16(a: uint16x8_t) -> uint8x8_t;
+        fn _vqrshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t;
     }
-    unsafe { _vqmovn_u16(a) }
+    unsafe { _vqrshl_u16(a, b) }
 }
-#[doc = "Unsigned saturating extract narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_u32)"]
+#[doc = "Unsigned signed saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqxtn)
+    assert_instr(uqrshl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -38137,26 +34860,26 @@ pub fn vqmovn_u16(a: uint16x8_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqmovn_u32(a: uint32x4_t) -> uint16x4_t {
+pub fn vqrshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnu.v4i16")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v8i16")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqxtn.v4i16"
+            link_name = "llvm.aarch64.neon.uqrshl.v8i16"
         )]
-        fn _vqmovn_u32(a: uint32x4_t) -> uint16x4_t;
+        fn _vqrshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t;
     }
-    unsafe { _vqmovn_u32(a) }
+    unsafe { _vqrshlq_u16(a, b) }
 }
-#[doc = "Unsigned saturating extract narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovn_u64)"]
+#[doc = "Unsigned signed saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqxtn)
+    assert_instr(uqrshl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -38166,26 +34889,26 @@ pub fn vqmovn_u32(a: uint32x4_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqmovn_u64(a: uint64x2_t) -> uint32x2_t {
+pub fn vqrshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnu.v2i32")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v2i32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqxtn.v2i32"
+            link_name = "llvm.aarch64.neon.uqrshl.v2i32"
         )]
-        fn _vqmovn_u64(a: uint64x2_t) -> uint32x2_t;
+        fn _vqrshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t;
     }
-    unsafe { _vqmovn_u64(a) }
+    unsafe { _vqrshl_u32(a, b) }
 }
-#[doc = "Signed saturating extract unsigned narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovun_s16)"]
+#[doc = "Unsigned signed saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovun))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqxtun)
+    assert_instr(uqrshl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -38195,26 +34918,26 @@ pub fn vqmovn_u64(a: uint64x2_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqmovun_s16(a: int16x8_t) -> uint8x8_t {
+pub fn vqrshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnsu.v8i8")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v4i32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqxtun.v8i8"
+            link_name = "llvm.aarch64.neon.uqrshl.v4i32"
         )]
-        fn _vqmovun_s16(a: int16x8_t) -> uint8x8_t;
+        fn _vqrshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t;
     }
-    unsafe { _vqmovun_s16(a) }
+    unsafe { _vqrshlq_u32(a, b) }
 }
-#[doc = "Signed saturating extract unsigned narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovun_s32)"]
+#[doc = "Unsigned signed saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_u64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovun))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqxtun)
+    assert_instr(uqrshl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -38224,26 +34947,26 @@ pub fn vqmovun_s16(a: int16x8_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqmovun_s32(a: int32x4_t) -> uint16x4_t {
+pub fn vqrshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnsu.v4i16")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v1i64")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqxtun.v4i16"
+            link_name = "llvm.aarch64.neon.uqrshl.v1i64"
         )]
-        fn _vqmovun_s32(a: int32x4_t) -> uint16x4_t;
+        fn _vqrshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t;
     }
-    unsafe { _vqmovun_s32(a) }
+    unsafe { _vqrshl_u64(a, b) }
 }
-#[doc = "Signed saturating extract unsigned narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqmovun_s64)"]
+#[doc = "Unsigned signed saturating rounding shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_u64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovun))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqxtun)
+    assert_instr(uqrshl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -38253,202 +34976,343 @@ pub fn vqmovun_s32(a: int32x4_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqmovun_s64(a: int64x2_t) -> uint32x2_t {
+pub fn vqrshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnsu.v2i32")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v2i64")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqxtun.v2i32"
+            link_name = "llvm.aarch64.neon.uqrshl.v2i64"
         )]
-        fn _vqmovun_s64(a: int64x2_t) -> uint32x2_t;
+        fn _vqrshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t;
     }
-    unsafe { _vqmovun_s64(a) }
+    unsafe { _vqrshlq_u64(a, b) }
 }
-#[doc = "Signed saturating negate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqneg_s8)"]
+#[doc = "Signed saturating rounded shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_s16)"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vqrshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub fn vqrshrn_n_s16<const N: i32>(a: int16x8_t) -> int8x8_t {
+    static_assert!(N >= 1 && N <= 8);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftns.v8i8")]
+        fn _vqrshrn_n_s16(a: int16x8_t, n: int16x8_t) -> int8x8_t;
+    }
+    unsafe { _vqrshrn_n_s16(a, const { int16x8_t([-N as i16; 8]) }) }
+}
+#[doc = "Signed saturating rounded shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_s32)"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vqrshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub fn vqrshrn_n_s32<const N: i32>(a: int32x4_t) -> int16x4_t {
+    static_assert!(N >= 1 && N <= 16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftns.v4i16")]
+        fn _vqrshrn_n_s32(a: int32x4_t, n: int32x4_t) -> int16x4_t;
+    }
+    unsafe { _vqrshrn_n_s32(a, const { int32x4_t([-N; 4]) }) }
+}
+#[doc = "Signed saturating rounded shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_s64)"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vqrshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub fn vqrshrn_n_s64<const N: i32>(a: int64x2_t) -> int32x2_t {
+    static_assert!(N >= 1 && N <= 32);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftns.v2i32")]
+        fn _vqrshrn_n_s64(a: int64x2_t, n: int64x2_t) -> int32x2_t;
+    }
+    unsafe { _vqrshrn_n_s64(a, const { int64x2_t([-N as i64; 2]) }) }
+}
+#[doc = "Signed saturating rounded shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s8"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqneg)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vqneg_s8(a: int8x8_t) -> int8x8_t {
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(sqrshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vqrshrn_n_s16<const N: i32>(a: int16x8_t) -> int8x8_t {
+    static_assert!(N >= 1 && N <= 8);
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqneg.v8i8"
+            link_name = "llvm.aarch64.neon.sqrshrn.v8i8"
         )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v8i8")]
-        fn _vqneg_s8(a: int8x8_t) -> int8x8_t;
+        fn _vqrshrn_n_s16(a: int16x8_t, n: i32) -> int8x8_t;
     }
-    unsafe { _vqneg_s8(a) }
+    unsafe { _vqrshrn_n_s16(a, N) }
 }
-#[doc = "Signed saturating negate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqnegq_s8)"]
+#[doc = "Signed saturating rounded shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s8"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqneg)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vqnegq_s8(a: int8x16_t) -> int8x16_t {
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(sqrshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vqrshrn_n_s32<const N: i32>(a: int32x4_t) -> int16x4_t {
+    static_assert!(N >= 1 && N <= 16);
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqneg.v16i8"
+            link_name = "llvm.aarch64.neon.sqrshrn.v4i16"
         )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v16i8")]
-        fn _vqnegq_s8(a: int8x16_t) -> int8x16_t;
+        fn _vqrshrn_n_s32(a: int32x4_t, n: i32) -> int16x4_t;
     }
-    unsafe { _vqnegq_s8(a) }
+    unsafe { _vqrshrn_n_s32(a, N) }
 }
-#[doc = "Signed saturating negate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqneg_s16)"]
+#[doc = "Signed saturating rounded shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_s64)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s16"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqneg)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vqneg_s16(a: int16x4_t) -> int16x4_t {
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(sqrshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vqrshrn_n_s64<const N: i32>(a: int64x2_t) -> int32x2_t {
+    static_assert!(N >= 1 && N <= 32);
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqneg.v4i16"
+            link_name = "llvm.aarch64.neon.sqrshrn.v2i32"
         )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v4i16")]
-        fn _vqneg_s16(a: int16x4_t) -> int16x4_t;
+        fn _vqrshrn_n_s64(a: int64x2_t, n: i32) -> int32x2_t;
     }
-    unsafe { _vqneg_s16(a) }
+    unsafe { _vqrshrn_n_s64(a, N) }
 }
-#[doc = "Signed saturating negate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqnegq_s16)"]
+#[doc = "Unsigned signed saturating rounded shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_u16)"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vqrshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub fn vqrshrn_n_u16<const N: i32>(a: uint16x8_t) -> uint8x8_t {
+    static_assert!(N >= 1 && N <= 8);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnu.v8i8")]
+        fn _vqrshrn_n_u16(a: uint16x8_t, n: uint16x8_t) -> uint8x8_t;
+    }
+    unsafe { _vqrshrn_n_u16(a, const { uint16x8_t([-N as u16; 8]) }) }
+}
+#[doc = "Unsigned signed saturating rounded shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_u32)"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vqrshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub fn vqrshrn_n_u32<const N: i32>(a: uint32x4_t) -> uint16x4_t {
+    static_assert!(N >= 1 && N <= 16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnu.v4i16")]
+        fn _vqrshrn_n_u32(a: uint32x4_t, n: uint32x4_t) -> uint16x4_t;
+    }
+    unsafe { _vqrshrn_n_u32(a, const { uint32x4_t([-N as u32; 4]) }) }
+}
+#[doc = "Unsigned signed saturating rounded shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_u64)"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vqrshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub fn vqrshrn_n_u64<const N: i32>(a: uint64x2_t) -> uint32x2_t {
+    static_assert!(N >= 1 && N <= 32);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnu.v2i32")]
+        fn _vqrshrn_n_u64(a: uint64x2_t, n: uint64x2_t) -> uint32x2_t;
+    }
+    unsafe { _vqrshrn_n_u64(a, const { uint64x2_t([-N as u64; 2]) }) }
+}
+#[doc = "Unsigned signed saturating rounded shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s16"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqneg)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vqnegq_s16(a: int16x8_t) -> int16x8_t {
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(uqrshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vqrshrn_n_u16<const N: i32>(a: uint16x8_t) -> uint8x8_t {
+    static_assert!(N >= 1 && N <= 8);
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqneg.v8i16"
+            link_name = "llvm.aarch64.neon.uqrshrn.v8i8"
         )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v8i16")]
-        fn _vqnegq_s16(a: int16x8_t) -> int16x8_t;
+        fn _vqrshrn_n_u16(a: uint16x8_t, n: i32) -> uint8x8_t;
     }
-    unsafe { _vqnegq_s16(a) }
+    unsafe { _vqrshrn_n_u16(a, N) }
 }
-#[doc = "Signed saturating negate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqneg_s32)"]
+#[doc = "Unsigned signed saturating rounded shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s32"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqneg)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vqneg_s32(a: int32x2_t) -> int32x2_t {
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(uqrshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vqrshrn_n_u32<const N: i32>(a: uint32x4_t) -> uint16x4_t {
+    static_assert!(N >= 1 && N <= 16);
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqneg.v2i32"
+            link_name = "llvm.aarch64.neon.uqrshrn.v4i16"
         )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v2i32")]
-        fn _vqneg_s32(a: int32x2_t) -> int32x2_t;
+        fn _vqrshrn_n_u32(a: uint32x4_t, n: i32) -> uint16x4_t;
     }
-    unsafe { _vqneg_s32(a) }
+    unsafe { _vqrshrn_n_u32(a, N) }
 }
-#[doc = "Signed saturating negate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqnegq_s32)"]
+#[doc = "Unsigned signed saturating rounded shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_u64)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s32"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqneg)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vqnegq_s32(a: int32x4_t) -> int32x4_t {
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(uqrshrn, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vqrshrn_n_u64<const N: i32>(a: uint64x2_t) -> uint32x2_t {
+    static_assert!(N >= 1 && N <= 32);
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqneg.v4i32"
+            link_name = "llvm.aarch64.neon.uqrshrn.v2i32"
         )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v4i32")]
-        fn _vqnegq_s32(a: int32x4_t) -> int32x4_t;
+        fn _vqrshrn_n_u64(a: uint64x2_t, n: i32) -> uint32x2_t;
     }
-    unsafe { _vqnegq_s32(a) }
+    unsafe { _vqrshrn_n_u64(a, N) }
 }
-#[doc = "Vector rounding saturating doubling multiply high by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_lane_s16)"]
+#[doc = "Signed saturating rounded shift right unsigned narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_n_s16)"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vqrshrun, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub fn vqrshrun_n_s16<const N: i32>(a: int16x8_t) -> uint8x8_t {
+    static_assert!(N >= 1 && N <= 8);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnsu.v8i8")]
+        fn _vqrshrun_n_s16(a: int16x8_t, n: int16x8_t) -> uint8x8_t;
+    }
+    unsafe { _vqrshrun_n_s16(a, const { int16x8_t([-N as i16; 8]) }) }
+}
+#[doc = "Signed saturating rounded shift right unsigned narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_n_s32)"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vqrshrun, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub fn vqrshrun_n_s32<const N: i32>(a: int32x4_t) -> uint16x4_t {
+    static_assert!(N >= 1 && N <= 16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnsu.v4i16")]
+        fn _vqrshrun_n_s32(a: int32x4_t, n: int32x4_t) -> uint16x4_t;
+    }
+    unsafe { _vqrshrun_n_s32(a, const { int32x4_t([-N; 4]) }) }
+}
+#[doc = "Signed saturating rounded shift right unsigned narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_n_s64)"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vqrshrun, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub fn vqrshrun_n_s64<const N: i32>(a: int64x2_t) -> uint32x2_t {
+    static_assert!(N >= 1 && N <= 32);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnsu.v2i32")]
+        fn _vqrshrun_n_s64(a: int64x2_t, n: int64x2_t) -> uint32x2_t;
+    }
+    unsafe { _vqrshrun_n_s64(a, const { int64x2_t([-N as i64; 2]) }) }
+}
+#[doc = "Signed saturating rounded shift right unsigned narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_n_s16)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(sqrshrun, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vqrshrun_n_s16<const N: i32>(a: int16x8_t) -> uint8x8_t {
+    static_assert!(N >= 1 && N <= 8);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqrshrun.v8i8"
+        )]
+        fn _vqrshrun_n_s16(a: int16x8_t, n: i32) -> uint8x8_t;
+    }
+    unsafe { _vqrshrun_n_s16(a, N) }
+}
+#[doc = "Signed saturating rounded shift right unsigned narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_n_s32)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(sqrshrun, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vqrshrun_n_s32<const N: i32>(a: int32x4_t) -> uint16x4_t {
+    static_assert!(N >= 1 && N <= 16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqrshrun.v4i16"
+        )]
+        fn _vqrshrun_n_s32(a: int32x4_t, n: i32) -> uint16x4_t;
+    }
+    unsafe { _vqrshrun_n_s32(a, N) }
+}
+#[doc = "Signed saturating rounded shift right unsigned narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_n_s64)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(sqrshrun, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vqrshrun_n_s64<const N: i32>(a: int64x2_t) -> uint32x2_t {
+    static_assert!(N >= 1 && N <= 32);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqrshrun.v2i32"
+        )]
+        fn _vqrshrun_n_s64(a: int64x2_t, n: i32) -> uint32x2_t;
+    }
+    unsafe { _vqrshrun_n_s64(a, N) }
+}
+#[doc = "Signed saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqrdmulh, LANE = 1)
+    assert_instr(sqshl, N = 2)
 )]
-#[rustc_legacy_const_generics(2)]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -38457,25 +35321,21 @@ pub fn vqnegq_s32(a: int32x4_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqrdmulh_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        let b: int16x4_t =
-            simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
-        vqrdmulh_s16(a, b)
-    }
+pub fn vqshl_n_s8<const N: i32>(a: int8x8_t) -> int8x8_t {
+    static_assert_uimm_bits!(N, 3);
+    vqshl_s8(a, vdup_n_s8(N as _))
 }
-#[doc = "Vector rounding saturating doubling multiply high by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_lane_s32)"]
+#[doc = "Signed saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqrdmulh, LANE = 1)
+    assert_instr(sqshl, N = 2)
 )]
-#[rustc_legacy_const_generics(2)]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -38484,24 +35344,21 @@ pub fn vqrdmulh_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t) -> int16x4
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqrdmulh_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        let b: int32x2_t = simd_shuffle!(b, b, [LANE as u32, LANE as u32]);
-        vqrdmulh_s32(a, b)
-    }
+pub fn vqshlq_n_s8<const N: i32>(a: int8x16_t) -> int8x16_t {
+    static_assert_uimm_bits!(N, 3);
+    vqshlq_s8(a, vdupq_n_s8(N as _))
 }
-#[doc = "Vector rounding saturating doubling multiply high by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_laneq_s16)"]
+#[doc = "Signed saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqrdmulh, LANE = 1)
+    assert_instr(sqshl, N = 2)
 )]
-#[rustc_legacy_const_generics(2)]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -38510,25 +35367,21 @@ pub fn vqrdmulh_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t) -> int32x2
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqrdmulh_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x8_t) -> int16x4_t {
-    static_assert_uimm_bits!(LANE, 3);
-    unsafe {
-        let b: int16x4_t =
-            simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
-        vqrdmulh_s16(a, b)
-    }
+pub fn vqshl_n_s16<const N: i32>(a: int16x4_t) -> int16x4_t {
+    static_assert_uimm_bits!(N, 4);
+    vqshl_s16(a, vdup_n_s16(N as _))
 }
-#[doc = "Vector rounding saturating doubling multiply high by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_laneq_s32)"]
+#[doc = "Signed saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqrdmulh, LANE = 1)
+    assert_instr(sqshl, N = 2)
 )]
-#[rustc_legacy_const_generics(2)]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -38537,24 +35390,21 @@ pub fn vqrdmulh_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x8_t) -> int16x
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqrdmulh_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x4_t) -> int32x2_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        let b: int32x2_t = simd_shuffle!(b, b, [LANE as u32, LANE as u32]);
-        vqrdmulh_s32(a, b)
-    }
+pub fn vqshlq_n_s16<const N: i32>(a: int16x8_t) -> int16x8_t {
+    static_assert_uimm_bits!(N, 4);
+    vqshlq_s16(a, vdupq_n_s16(N as _))
 }
-#[doc = "Vector rounding saturating doubling multiply high by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_lane_s16)"]
+#[doc = "Signed saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqrdmulh, LANE = 1)
+    assert_instr(sqshl, N = 2)
 )]
-#[rustc_legacy_const_generics(2)]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -38563,37 +35413,21 @@ pub fn vqrdmulh_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x4_t) -> int32x
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqrdmulhq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x4_t) -> int16x8_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        let b: int16x8_t = simd_shuffle!(
-            b,
-            b,
-            [
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32
-            ]
-        );
-        vqrdmulhq_s16(a, b)
-    }
+pub fn vqshl_n_s32<const N: i32>(a: int32x2_t) -> int32x2_t {
+    static_assert_uimm_bits!(N, 5);
+    vqshl_s32(a, vdup_n_s32(N as _))
 }
-#[doc = "Vector rounding saturating doubling multiply high by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_lane_s32)"]
+#[doc = "Signed saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqrdmulh, LANE = 1)
+    assert_instr(sqshl, N = 2)
 )]
-#[rustc_legacy_const_generics(2)]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -38602,25 +35436,21 @@ pub fn vqrdmulhq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x4_t) -> int16x
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqrdmulhq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x2_t) -> int32x4_t {
-    static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        let b: int32x4_t =
-            simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
-        vqrdmulhq_s32(a, b)
-    }
+pub fn vqshlq_n_s32<const N: i32>(a: int32x4_t) -> int32x4_t {
+    static_assert_uimm_bits!(N, 5);
+    vqshlq_s32(a, vdupq_n_s32(N as _))
 }
-#[doc = "Vector rounding saturating doubling multiply high by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_laneq_s16)"]
+#[doc = "Signed saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_s64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqrdmulh, LANE = 1)
+    assert_instr(sqshl, N = 2)
 )]
-#[rustc_legacy_const_generics(2)]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -38629,37 +35459,21 @@ pub fn vqrdmulhq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x2_t) -> int32x
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqrdmulhq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    static_assert_uimm_bits!(LANE, 3);
-    unsafe {
-        let b: int16x8_t = simd_shuffle!(
-            b,
-            b,
-            [
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32,
-                LANE as u32
-            ]
-        );
-        vqrdmulhq_s16(a, b)
-    }
+pub fn vqshl_n_s64<const N: i32>(a: int64x1_t) -> int64x1_t {
+    static_assert_uimm_bits!(N, 6);
+    vqshl_s64(a, vdup_n_s64(N as _))
 }
-#[doc = "Vector rounding saturating doubling multiply high by scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_laneq_s32)"]
+#[doc = "Signed saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_s64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqrdmulh, LANE = 1)
+    assert_instr(sqshl, N = 2)
 )]
-#[rustc_legacy_const_generics(2)]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -38668,24 +35482,21 @@ pub fn vqrdmulhq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t) -> int16
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqrdmulhq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    static_assert_uimm_bits!(LANE, 2);
-    unsafe {
-        let b: int32x4_t =
-            simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
-        vqrdmulhq_s32(a, b)
-    }
+pub fn vqshlq_n_s64<const N: i32>(a: int64x2_t) -> int64x2_t {
+    static_assert_uimm_bits!(N, 6);
+    vqshlq_s64(a, vdupq_n_s64(N as _))
 }
-#[doc = "Vector saturating rounding doubling multiply high with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_n_s16)"]
+#[doc = "Unsigned saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqrdmulh)
+    assert_instr(uqshl, N = 2)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -38694,19 +35505,21 @@ pub fn vqrdmulhq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t) -> int32
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqrdmulh_n_s16(a: int16x4_t, b: i16) -> int16x4_t {
-    vqrdmulh_s16(a, vdup_n_s16(b))
+pub fn vqshl_n_u8<const N: i32>(a: uint8x8_t) -> uint8x8_t {
+    static_assert_uimm_bits!(N, 3);
+    vqshl_u8(a, vdup_n_s8(N as _))
 }
-#[doc = "Vector saturating rounding doubling multiply high with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_n_s16)"]
+#[doc = "Unsigned saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqrdmulh)
+    assert_instr(uqshl, N = 2)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -38715,19 +35528,21 @@ pub fn vqrdmulh_n_s16(a: int16x4_t, b: i16) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqrdmulhq_n_s16(a: int16x8_t, b: i16) -> int16x8_t {
-    vqrdmulhq_s16(a, vdupq_n_s16(b))
+pub fn vqshlq_n_u8<const N: i32>(a: uint8x16_t) -> uint8x16_t {
+    static_assert_uimm_bits!(N, 3);
+    vqshlq_u8(a, vdupq_n_s8(N as _))
 }
-#[doc = "Vector saturating rounding doubling multiply high with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_n_s32)"]
+#[doc = "Unsigned saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqrdmulh)
+    assert_instr(uqshl, N = 2)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -38736,19 +35551,21 @@ pub fn vqrdmulhq_n_s16(a: int16x8_t, b: i16) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqrdmulh_n_s32(a: int32x2_t, b: i32) -> int32x2_t {
-    vqrdmulh_s32(a, vdup_n_s32(b))
+pub fn vqshl_n_u16<const N: i32>(a: uint16x4_t) -> uint16x4_t {
+    static_assert_uimm_bits!(N, 4);
+    vqshl_u16(a, vdup_n_s16(N as _))
 }
-#[doc = "Vector saturating rounding doubling multiply high with scalar"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_n_s32)"]
+#[doc = "Unsigned saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqrdmulh)
+    assert_instr(uqshl, N = 2)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -38757,19 +35574,21 @@ pub fn vqrdmulh_n_s32(a: int32x2_t, b: i32) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqrdmulhq_n_s32(a: int32x4_t, b: i32) -> int32x4_t {
-    vqrdmulhq_s32(a, vdupq_n_s32(b))
+pub fn vqshlq_n_u16<const N: i32>(a: uint16x8_t) -> uint16x8_t {
+    static_assert_uimm_bits!(N, 4);
+    vqshlq_u16(a, vdupq_n_s16(N as _))
 }
-#[doc = "Signed saturating rounding doubling multiply returning high half"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_s16)"]
+#[doc = "Unsigned saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqrdmulh)
+    assert_instr(uqshl, N = 2)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -38778,27 +35597,21 @@ pub fn vqrdmulhq_n_s32(a: int32x4_t, b: i32) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqrdmulh_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrdmulh.v4i16")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqrdmulh.v4i16"
-        )]
-        fn _vqrdmulh_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
-    }
-    unsafe { _vqrdmulh_s16(a, b) }
+pub fn vqshl_n_u32<const N: i32>(a: uint32x2_t) -> uint32x2_t {
+    static_assert_uimm_bits!(N, 5);
+    vqshl_u32(a, vdup_n_s32(N as _))
 }
-#[doc = "Signed saturating rounding doubling multiply returning high half"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_s16)"]
+#[doc = "Unsigned saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqrdmulh)
+    assert_instr(uqshl, N = 2)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -38807,27 +35620,21 @@ pub fn vqrdmulh_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqrdmulhq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrdmulh.v8i16")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqrdmulh.v8i16"
-        )]
-        fn _vqrdmulhq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
-    }
-    unsafe { _vqrdmulhq_s16(a, b) }
+pub fn vqshlq_n_u32<const N: i32>(a: uint32x4_t) -> uint32x4_t {
+    static_assert_uimm_bits!(N, 5);
+    vqshlq_u32(a, vdupq_n_s32(N as _))
 }
-#[doc = "Signed saturating rounding doubling multiply returning high half"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulh_s32)"]
+#[doc = "Unsigned saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_u64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqrdmulh)
+    assert_instr(uqshl, N = 2)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -38836,27 +35643,21 @@ pub fn vqrdmulhq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqrdmulh_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrdmulh.v2i32")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqrdmulh.v2i32"
-        )]
-        fn _vqrdmulh_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
-    }
-    unsafe { _vqrdmulh_s32(a, b) }
+pub fn vqshl_n_u64<const N: i32>(a: uint64x1_t) -> uint64x1_t {
+    static_assert_uimm_bits!(N, 6);
+    vqshl_u64(a, vdup_n_s64(N as _))
 }
-#[doc = "Signed saturating rounding doubling multiply returning high half"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrdmulhq_s32)"]
+#[doc = "Unsigned saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_u64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqrdmulh)
+    assert_instr(uqshl, N = 2)
 )]
+#[rustc_legacy_const_generics(1)]
 #[cfg_attr(
     not(target_arch = "arm"),
     stable(feature = "neon_intrinsics", since = "1.59.0")
@@ -38865,26 +35666,19 @@ pub fn vqrdmulh_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqrdmulhq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrdmulh.v4i32")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqrdmulh.v4i32"
-        )]
-        fn _vqrdmulhq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
-    }
-    unsafe { _vqrdmulhq_s32(a, b) }
+pub fn vqshlq_n_u64<const N: i32>(a: uint64x2_t) -> uint64x2_t {
+    static_assert_uimm_bits!(N, 6);
+    vqshlq_u64(a, vdupq_n_s64(N as _))
 }
-#[doc = "Signed saturating rounding shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_s8)"]
+#[doc = "Signed saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqrshl)
+    assert_instr(sqshl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -38894,26 +35688,26 @@ pub fn vqrdmulhq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqrshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+pub fn vqshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v8i8")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v8i8")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqrshl.v8i8"
+            link_name = "llvm.aarch64.neon.sqshl.v8i8"
         )]
-        fn _vqrshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
+        fn _vqshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
     }
-    unsafe { _vqrshl_s8(a, b) }
+    unsafe { _vqshl_s8(a, b) }
 }
-#[doc = "Signed saturating rounding shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_s8)"]
+#[doc = "Signed saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqrshl)
+    assert_instr(sqshl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -38923,26 +35717,26 @@ pub fn vqrshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqrshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+pub fn vqshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v16i8")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v16i8")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqrshl.v16i8"
+            link_name = "llvm.aarch64.neon.sqshl.v16i8"
         )]
-        fn _vqrshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t;
+        fn _vqshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t;
     }
-    unsafe { _vqrshlq_s8(a, b) }
+    unsafe { _vqshlq_s8(a, b) }
 }
-#[doc = "Signed saturating rounding shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_s16)"]
+#[doc = "Signed saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqrshl)
+    assert_instr(sqshl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -38952,26 +35746,26 @@ pub fn vqrshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqrshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+pub fn vqshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v4i16")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v4i16")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqrshl.v4i16"
+            link_name = "llvm.aarch64.neon.sqshl.v4i16"
         )]
-        fn _vqrshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
+        fn _vqshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
     }
-    unsafe { _vqrshl_s16(a, b) }
+    unsafe { _vqshl_s16(a, b) }
 }
-#[doc = "Signed saturating rounding shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_s16)"]
+#[doc = "Signed saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqrshl)
+    assert_instr(sqshl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -38981,26 +35775,26 @@ pub fn vqrshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqrshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+pub fn vqshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v8i16")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v8i16")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqrshl.v8i16"
+            link_name = "llvm.aarch64.neon.sqshl.v8i16"
         )]
-        fn _vqrshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
+        fn _vqshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
     }
-    unsafe { _vqrshlq_s16(a, b) }
+    unsafe { _vqshlq_s16(a, b) }
 }
-#[doc = "Signed saturating rounding shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_s32)"]
+#[doc = "Signed saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqrshl)
+    assert_instr(sqshl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -39010,26 +35804,26 @@ pub fn vqrshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqrshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+pub fn vqshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v2i32")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v2i32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqrshl.v2i32"
+            link_name = "llvm.aarch64.neon.sqshl.v2i32"
         )]
-        fn _vqrshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
+        fn _vqshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
     }
-    unsafe { _vqrshl_s32(a, b) }
+    unsafe { _vqshl_s32(a, b) }
 }
-#[doc = "Signed saturating rounding shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_s32)"]
+#[doc = "Signed saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqrshl)
+    assert_instr(sqshl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -39039,26 +35833,26 @@ pub fn vqrshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqrshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+pub fn vqshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v4i32")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v4i32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqrshl.v4i32"
+            link_name = "llvm.aarch64.neon.sqshl.v4i32"
         )]
-        fn _vqrshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
+        fn _vqshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
     }
-    unsafe { _vqrshlq_s32(a, b) }
+    unsafe { _vqshlq_s32(a, b) }
 }
-#[doc = "Signed saturating rounding shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_s64)"]
+#[doc = "Signed saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_s64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqrshl)
+    assert_instr(sqshl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -39068,26 +35862,26 @@ pub fn vqrshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqrshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
+pub fn vqshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v1i64")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v1i64")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqrshl.v1i64"
+            link_name = "llvm.aarch64.neon.sqshl.v1i64"
         )]
-        fn _vqrshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t;
+        fn _vqshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t;
     }
-    unsafe { _vqrshl_s64(a, b) }
+    unsafe { _vqshl_s64(a, b) }
 }
-#[doc = "Signed saturating rounding shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_s64)"]
+#[doc = "Signed saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_s64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqrshl)
+    assert_instr(sqshl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -39097,26 +35891,26 @@ pub fn vqrshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqrshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
+pub fn vqshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v2i64")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v2i64")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqrshl.v2i64"
+            link_name = "llvm.aarch64.neon.sqshl.v2i64"
         )]
-        fn _vqrshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t;
+        fn _vqshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t;
     }
-    unsafe { _vqrshlq_s64(a, b) }
+    unsafe { _vqshlq_s64(a, b) }
 }
-#[doc = "Unsigned signed saturating rounding shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_u8)"]
+#[doc = "Unsigned saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqrshl)
+    assert_instr(uqshl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -39126,26 +35920,26 @@ pub fn vqrshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqrshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t {
+pub fn vqshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v8i8")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v8i8")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqrshl.v8i8"
+            link_name = "llvm.aarch64.neon.uqshl.v8i8"
         )]
-        fn _vqrshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t;
+        fn _vqshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t;
     }
-    unsafe { _vqrshl_u8(a, b) }
+    unsafe { _vqshl_u8(a, b) }
 }
-#[doc = "Unsigned signed saturating rounding shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_u8)"]
+#[doc = "Unsigned saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_u8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqrshl)
+    assert_instr(uqshl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -39155,26 +35949,26 @@ pub fn vqrshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqrshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t {
+pub fn vqshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v16i8")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v16i8")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqrshl.v16i8"
+            link_name = "llvm.aarch64.neon.uqshl.v16i8"
         )]
-        fn _vqrshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t;
+        fn _vqshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t;
     }
-    unsafe { _vqrshlq_u8(a, b) }
+    unsafe { _vqshlq_u8(a, b) }
 }
-#[doc = "Unsigned signed saturating rounding shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_u16)"]
+#[doc = "Unsigned saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqrshl)
+    assert_instr(uqshl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -39184,26 +35978,26 @@ pub fn vqrshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqrshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t {
+pub fn vqshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v4i16")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v4i16")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqrshl.v4i16"
+            link_name = "llvm.aarch64.neon.uqshl.v4i16"
         )]
-        fn _vqrshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t;
+        fn _vqshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t;
     }
-    unsafe { _vqrshl_u16(a, b) }
+    unsafe { _vqshl_u16(a, b) }
 }
-#[doc = "Unsigned signed saturating rounding shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_u16)"]
+#[doc = "Unsigned saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqrshl)
+    assert_instr(uqshl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -39213,26 +36007,26 @@ pub fn vqrshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqrshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t {
+pub fn vqshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v8i16")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v8i16")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqrshl.v8i16"
+            link_name = "llvm.aarch64.neon.uqshl.v8i16"
         )]
-        fn _vqrshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t;
+        fn _vqshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t;
     }
-    unsafe { _vqrshlq_u16(a, b) }
+    unsafe { _vqshlq_u16(a, b) }
 }
-#[doc = "Unsigned signed saturating rounding shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_u32)"]
+#[doc = "Unsigned saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqrshl)
+    assert_instr(uqshl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -39242,26 +36036,26 @@ pub fn vqrshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqrshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t {
+pub fn vqshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v2i32")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v2i32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqrshl.v2i32"
+            link_name = "llvm.aarch64.neon.uqshl.v2i32"
         )]
-        fn _vqrshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t;
+        fn _vqshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t;
     }
-    unsafe { _vqrshl_u32(a, b) }
+    unsafe { _vqshl_u32(a, b) }
 }
-#[doc = "Unsigned signed saturating rounding shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_u32)"]
+#[doc = "Unsigned saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqrshl)
+    assert_instr(uqshl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -39271,26 +36065,26 @@ pub fn vqrshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqrshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t {
+pub fn vqshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v4i32")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v4i32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqrshl.v4i32"
+            link_name = "llvm.aarch64.neon.uqshl.v4i32"
         )]
-        fn _vqrshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t;
+        fn _vqshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t;
     }
-    unsafe { _vqrshlq_u32(a, b) }
+    unsafe { _vqshlq_u32(a, b) }
 }
-#[doc = "Unsigned signed saturating rounding shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshl_u64)"]
+#[doc = "Unsigned saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_u64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqrshl)
+    assert_instr(uqshl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -39300,26 +36094,26 @@ pub fn vqrshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqrshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t {
+pub fn vqshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v1i64")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v1i64")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqrshl.v1i64"
+            link_name = "llvm.aarch64.neon.uqshl.v1i64"
         )]
-        fn _vqrshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t;
+        fn _vqshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t;
     }
-    unsafe { _vqrshl_u64(a, b) }
+    unsafe { _vqshl_u64(a, b) }
 }
-#[doc = "Unsigned signed saturating rounding shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshlq_u64)"]
+#[doc = "Unsigned saturating shift left"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_u64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqrshl)
+    assert_instr(uqshl)
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
@@ -39329,4564 +36123,2212 @@ pub fn vqrshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vqrshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t {
+pub fn vqshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t {
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v2i64")]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v2i64")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqrshl.v2i64"
+            link_name = "llvm.aarch64.neon.uqshl.v2i64"
         )]
-        fn _vqrshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t;
+        fn _vqshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t;
     }
-    unsafe { _vqrshlq_u64(a, b) }
+    unsafe { _vqshlq_u64(a, b) }
 }
-#[doc = "Signed saturating rounded shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_s16)"]
+#[doc = "Signed saturating shift left unsigned"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s8)"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqrshrn, N = 2))]
+#[cfg_attr(test, assert_instr(vqshlu, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub fn vqrshrn_n_s16<const N: i32>(a: int16x8_t) -> int8x8_t {
-    static_assert!(N >= 1 && N <= 8);
+pub fn vqshlu_n_s8<const N: i32>(a: int8x8_t) -> uint8x8_t {
+    static_assert_uimm_bits!(N, 3);
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftns.v8i8")]
-        fn _vqrshrn_n_s16(a: int16x8_t, n: int16x8_t) -> int8x8_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v8i8")]
+        fn _vqshlu_n_s8(a: int8x8_t, n: int8x8_t) -> uint8x8_t;
     }
-    unsafe { _vqrshrn_n_s16(a, const { int16x8_t([-N as i16; 8]) }) }
+    unsafe { _vqshlu_n_s8(a, const { int8x8_t([N as i8; 8]) }) }
 }
-#[doc = "Signed saturating rounded shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_s32)"]
+#[doc = "Signed saturating shift left unsigned"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s8)"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqrshrn, N = 2))]
+#[cfg_attr(test, assert_instr(vqshlu, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub fn vqrshrn_n_s32<const N: i32>(a: int32x4_t) -> int16x4_t {
-    static_assert!(N >= 1 && N <= 16);
+pub fn vqshluq_n_s8<const N: i32>(a: int8x16_t) -> uint8x16_t {
+    static_assert_uimm_bits!(N, 3);
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftns.v4i16")]
-        fn _vqrshrn_n_s32(a: int32x4_t, n: int32x4_t) -> int16x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v16i8")]
+        fn _vqshluq_n_s8(a: int8x16_t, n: int8x16_t) -> uint8x16_t;
     }
-    unsafe { _vqrshrn_n_s32(a, const { int32x4_t([-N; 4]) }) }
+    unsafe { _vqshluq_n_s8(a, const { int8x16_t([N as i8; 16]) }) }
 }
-#[doc = "Signed saturating rounded shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_s64)"]
+#[doc = "Signed saturating shift left unsigned"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s16)"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqrshrn, N = 2))]
+#[cfg_attr(test, assert_instr(vqshlu, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub fn vqrshrn_n_s64<const N: i32>(a: int64x2_t) -> int32x2_t {
-    static_assert!(N >= 1 && N <= 32);
+pub fn vqshlu_n_s16<const N: i32>(a: int16x4_t) -> uint16x4_t {
+    static_assert_uimm_bits!(N, 4);
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftns.v2i32")]
-        fn _vqrshrn_n_s64(a: int64x2_t, n: int64x2_t) -> int32x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v4i16")]
+        fn _vqshlu_n_s16(a: int16x4_t, n: int16x4_t) -> uint16x4_t;
     }
-    unsafe { _vqrshrn_n_s64(a, const { int64x2_t([-N as i64; 2]) }) }
+    unsafe { _vqshlu_n_s16(a, const { int16x4_t([N as i16; 4]) }) }
 }
-#[doc = "Signed saturating rounded shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_s16)"]
+#[doc = "Signed saturating shift left unsigned"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s16)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(sqrshrn, N = 2))]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vqshlu, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub fn vqshluq_n_s16<const N: i32>(a: int16x8_t) -> uint16x8_t {
+    static_assert_uimm_bits!(N, 4);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v8i16")]
+        fn _vqshluq_n_s16(a: int16x8_t, n: int16x8_t) -> uint16x8_t;
+    }
+    unsafe { _vqshluq_n_s16(a, const { int16x8_t([N as i16; 8]) }) }
+}
+#[doc = "Signed saturating shift left unsigned"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s32)"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vqshlu, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub fn vqshlu_n_s32<const N: i32>(a: int32x2_t) -> uint32x2_t {
+    static_assert_uimm_bits!(N, 5);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v2i32")]
+        fn _vqshlu_n_s32(a: int32x2_t, n: int32x2_t) -> uint32x2_t;
+    }
+    unsafe { _vqshlu_n_s32(a, const { int32x2_t([N; 2]) }) }
+}
+#[doc = "Signed saturating shift left unsigned"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s32)"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vqshlu, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub fn vqshluq_n_s32<const N: i32>(a: int32x4_t) -> uint32x4_t {
+    static_assert_uimm_bits!(N, 5);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v4i32")]
+        fn _vqshluq_n_s32(a: int32x4_t, n: int32x4_t) -> uint32x4_t;
+    }
+    unsafe { _vqshluq_n_s32(a, const { int32x4_t([N; 4]) }) }
+}
+#[doc = "Signed saturating shift left unsigned"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s64)"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vqshlu, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub fn vqshlu_n_s64<const N: i32>(a: int64x1_t) -> uint64x1_t {
+    static_assert_uimm_bits!(N, 6);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v1i64")]
+        fn _vqshlu_n_s64(a: int64x1_t, n: int64x1_t) -> uint64x1_t;
+    }
+    unsafe { _vqshlu_n_s64(a, const { int64x1_t([N as i64]) }) }
+}
+#[doc = "Signed saturating shift left unsigned"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s64)"]
+#[inline]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vqshlu, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub fn vqshluq_n_s64<const N: i32>(a: int64x2_t) -> uint64x2_t {
+    static_assert_uimm_bits!(N, 6);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v2i64")]
+        fn _vqshluq_n_s64(a: int64x2_t, n: int64x2_t) -> uint64x2_t;
+    }
+    unsafe { _vqshluq_n_s64(a, const { int64x2_t([N as i64; 2]) }) }
+}
+#[doc = "Signed saturating shift left unsigned"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s8)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(sqshlu, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vqrshrn_n_s16<const N: i32>(a: int16x8_t) -> int8x8_t {
-    static_assert!(N >= 1 && N <= 8);
+pub fn vqshlu_n_s8<const N: i32>(a: int8x8_t) -> uint8x8_t {
+    static_assert_uimm_bits!(N, 3);
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqrshrn.v8i8"
+            link_name = "llvm.aarch64.neon.sqshlu.v8i8"
         )]
-        fn _vqrshrn_n_s16(a: int16x8_t, n: i32) -> int8x8_t;
+        fn _vqshlu_n_s8(a: int8x8_t, n: int8x8_t) -> uint8x8_t;
     }
-    unsafe { _vqrshrn_n_s16(a, N) }
+    unsafe { _vqshlu_n_s8(a, const { int8x8_t([N as i8; 8]) }) }
 }
-#[doc = "Signed saturating rounded shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_s32)"]
+#[doc = "Signed saturating shift left unsigned"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(sqrshrn, N = 2))]
+#[cfg_attr(test, assert_instr(sqshlu, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vqrshrn_n_s32<const N: i32>(a: int32x4_t) -> int16x4_t {
-    static_assert!(N >= 1 && N <= 16);
+pub fn vqshluq_n_s8<const N: i32>(a: int8x16_t) -> uint8x16_t {
+    static_assert_uimm_bits!(N, 3);
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqrshrn.v4i16"
+            link_name = "llvm.aarch64.neon.sqshlu.v16i8"
         )]
-        fn _vqrshrn_n_s32(a: int32x4_t, n: i32) -> int16x4_t;
+        fn _vqshluq_n_s8(a: int8x16_t, n: int8x16_t) -> uint8x16_t;
     }
-    unsafe { _vqrshrn_n_s32(a, N) }
+    unsafe { _vqshluq_n_s8(a, const { int8x16_t([N as i8; 16]) }) }
 }
-#[doc = "Signed saturating rounded shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_s64)"]
+#[doc = "Signed saturating shift left unsigned"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(sqrshrn, N = 2))]
+#[cfg_attr(test, assert_instr(sqshlu, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vqrshrn_n_s64<const N: i32>(a: int64x2_t) -> int32x2_t {
-    static_assert!(N >= 1 && N <= 32);
+pub fn vqshlu_n_s16<const N: i32>(a: int16x4_t) -> uint16x4_t {
+    static_assert_uimm_bits!(N, 4);
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqrshrn.v2i32"
+            link_name = "llvm.aarch64.neon.sqshlu.v4i16"
         )]
-        fn _vqrshrn_n_s64(a: int64x2_t, n: i32) -> int32x2_t;
+        fn _vqshlu_n_s16(a: int16x4_t, n: int16x4_t) -> uint16x4_t;
     }
-    unsafe { _vqrshrn_n_s64(a, N) }
+    unsafe { _vqshlu_n_s16(a, const { int16x4_t([N as i16; 4]) }) }
 }
-#[doc = "Unsigned signed saturating rounded shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_u16)"]
+#[doc = "Signed saturating shift left unsigned"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s16)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(sqshlu, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vqshluq_n_s16<const N: i32>(a: int16x8_t) -> uint16x8_t {
+    static_assert_uimm_bits!(N, 4);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqshlu.v8i16"
+        )]
+        fn _vqshluq_n_s16(a: int16x8_t, n: int16x8_t) -> uint16x8_t;
+    }
+    unsafe { _vqshluq_n_s16(a, const { int16x8_t([N as i16; 8]) }) }
+}
+#[doc = "Signed saturating shift left unsigned"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s32)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(sqshlu, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vqshlu_n_s32<const N: i32>(a: int32x2_t) -> uint32x2_t {
+    static_assert_uimm_bits!(N, 5);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqshlu.v2i32"
+        )]
+        fn _vqshlu_n_s32(a: int32x2_t, n: int32x2_t) -> uint32x2_t;
+    }
+    unsafe { _vqshlu_n_s32(a, const { int32x2_t([N; 2]) }) }
+}
+#[doc = "Signed saturating shift left unsigned"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s32)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(sqshlu, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vqshluq_n_s32<const N: i32>(a: int32x4_t) -> uint32x4_t {
+    static_assert_uimm_bits!(N, 5);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqshlu.v4i32"
+        )]
+        fn _vqshluq_n_s32(a: int32x4_t, n: int32x4_t) -> uint32x4_t;
+    }
+    unsafe { _vqshluq_n_s32(a, const { int32x4_t([N; 4]) }) }
+}
+#[doc = "Signed saturating shift left unsigned"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s64)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(sqshlu, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vqshlu_n_s64<const N: i32>(a: int64x1_t) -> uint64x1_t {
+    static_assert_uimm_bits!(N, 6);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqshlu.v1i64"
+        )]
+        fn _vqshlu_n_s64(a: int64x1_t, n: int64x1_t) -> uint64x1_t;
+    }
+    unsafe { _vqshlu_n_s64(a, const { int64x1_t([N as i64]) }) }
+}
+#[doc = "Signed saturating shift left unsigned"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s64)"]
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(sqshlu, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vqshluq_n_s64<const N: i32>(a: int64x2_t) -> uint64x2_t {
+    static_assert_uimm_bits!(N, 6);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.sqshlu.v2i64"
+        )]
+        fn _vqshluq_n_s64(a: int64x2_t, n: int64x2_t) -> uint64x2_t;
+    }
+    unsafe { _vqshluq_n_s64(a, const { int64x2_t([N as i64; 2]) }) }
+}
+#[doc = "Signed saturating shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_s16)"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqrshrn, N = 2))]
+#[cfg_attr(test, assert_instr(vqshrn, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub fn vqrshrn_n_u16<const N: i32>(a: uint16x8_t) -> uint8x8_t {
+pub fn vqshrn_n_s16<const N: i32>(a: int16x8_t) -> int8x8_t {
     static_assert!(N >= 1 && N <= 8);
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnu.v8i8")]
-        fn _vqrshrn_n_u16(a: uint16x8_t, n: uint16x8_t) -> uint8x8_t;
-    }
-    unsafe {
-        _vqrshrn_n_u16(
-            a,
-            const {
-                uint16x8_t([
-                    -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16,
-                    -N as u16,
-                ])
-            },
-        )
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftns.v8i8")]
+        fn _vqshrn_n_s16(a: int16x8_t, n: int16x8_t) -> int8x8_t;
     }
+    unsafe { _vqshrn_n_s16(a, const { int16x8_t([-N as i16; 8]) }) }
 }
-#[doc = "Unsigned signed saturating rounded shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_u32)"]
+#[doc = "Signed saturating shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_s32)"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqrshrn, N = 2))]
+#[cfg_attr(test, assert_instr(vqshrn, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub fn vqrshrn_n_u32<const N: i32>(a: uint32x4_t) -> uint16x4_t {
+pub fn vqshrn_n_s32<const N: i32>(a: int32x4_t) -> int16x4_t {
     static_assert!(N >= 1 && N <= 16);
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnu.v4i16")]
-        fn _vqrshrn_n_u32(a: uint32x4_t, n: uint32x4_t) -> uint16x4_t;
-    }
-    unsafe {
-        _vqrshrn_n_u32(
-            a,
-            const { uint32x4_t([-N as u32, -N as u32, -N as u32, -N as u32]) },
-        )
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftns.v4i16")]
+        fn _vqshrn_n_s32(a: int32x4_t, n: int32x4_t) -> int16x4_t;
     }
+    unsafe { _vqshrn_n_s32(a, const { int32x4_t([-N; 4]) }) }
 }
-#[doc = "Unsigned signed saturating rounded shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_u64)"]
+#[doc = "Signed saturating shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_s64)"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqrshrn, N = 2))]
+#[cfg_attr(test, assert_instr(vqshrn, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub fn vqrshrn_n_u64<const N: i32>(a: uint64x2_t) -> uint32x2_t {
+pub fn vqshrn_n_s64<const N: i32>(a: int64x2_t) -> int32x2_t {
     static_assert!(N >= 1 && N <= 32);
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnu.v2i32")]
-        fn _vqrshrn_n_u64(a: uint64x2_t, n: uint64x2_t) -> uint32x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftns.v2i32")]
+        fn _vqshrn_n_s64(a: int64x2_t, n: int64x2_t) -> int32x2_t;
     }
-    unsafe { _vqrshrn_n_u64(a, const { uint64x2_t([-N as u64, -N as u64]) }) }
+    unsafe { _vqshrn_n_s64(a, const { int64x2_t([-N as i64; 2]) }) }
 }
-#[doc = "Unsigned signed saturating rounded shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_u16)"]
+#[doc = "Signed saturating shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(uqrshrn, N = 2))]
+#[cfg_attr(test, assert_instr(sqshrn, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vqrshrn_n_u16<const N: i32>(a: uint16x8_t) -> uint8x8_t {
+pub fn vqshrn_n_s16<const N: i32>(a: int16x8_t) -> int8x8_t {
     static_assert!(N >= 1 && N <= 8);
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqrshrn.v8i8"
+            link_name = "llvm.aarch64.neon.sqshrn.v8i8"
         )]
-        fn _vqrshrn_n_u16(a: uint16x8_t, n: i32) -> uint8x8_t;
+        fn _vqshrn_n_s16(a: int16x8_t, n: i32) -> int8x8_t;
     }
-    unsafe { _vqrshrn_n_u16(a, N) }
+    unsafe { _vqshrn_n_s16(a, N) }
 }
-#[doc = "Unsigned signed saturating rounded shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_u32)"]
+#[doc = "Signed saturating shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(uqrshrn, N = 2))]
+#[cfg_attr(test, assert_instr(sqshrn, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vqrshrn_n_u32<const N: i32>(a: uint32x4_t) -> uint16x4_t {
+pub fn vqshrn_n_s32<const N: i32>(a: int32x4_t) -> int16x4_t {
     static_assert!(N >= 1 && N <= 16);
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqrshrn.v4i16"
+            link_name = "llvm.aarch64.neon.sqshrn.v4i16"
         )]
-        fn _vqrshrn_n_u32(a: uint32x4_t, n: i32) -> uint16x4_t;
+        fn _vqshrn_n_s32(a: int32x4_t, n: i32) -> int16x4_t;
     }
-    unsafe { _vqrshrn_n_u32(a, N) }
+    unsafe { _vqshrn_n_s32(a, N) }
 }
-#[doc = "Unsigned signed saturating rounded shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_u64)"]
+#[doc = "Signed saturating shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_s64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(uqrshrn, N = 2))]
+#[cfg_attr(test, assert_instr(sqshrn, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vqrshrn_n_u64<const N: i32>(a: uint64x2_t) -> uint32x2_t {
+pub fn vqshrn_n_s64<const N: i32>(a: int64x2_t) -> int32x2_t {
     static_assert!(N >= 1 && N <= 32);
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqrshrn.v2i32"
+            link_name = "llvm.aarch64.neon.sqshrn.v2i32"
         )]
-        fn _vqrshrn_n_u64(a: uint64x2_t, n: i32) -> uint32x2_t;
+        fn _vqshrn_n_s64(a: int64x2_t, n: i32) -> int32x2_t;
     }
-    unsafe { _vqrshrn_n_u64(a, N) }
+    unsafe { _vqshrn_n_s64(a, N) }
 }
-#[doc = "Signed saturating rounded shift right unsigned narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_n_s16)"]
+#[doc = "Unsigned saturating shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_u16)"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqrshrun, N = 2))]
+#[cfg_attr(test, assert_instr(vqshrn, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub fn vqrshrun_n_s16<const N: i32>(a: int16x8_t) -> uint8x8_t {
+pub fn vqshrn_n_u16<const N: i32>(a: uint16x8_t) -> uint8x8_t {
     static_assert!(N >= 1 && N <= 8);
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnsu.v8i8")]
-        fn _vqrshrun_n_s16(a: int16x8_t, n: int16x8_t) -> uint8x8_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnu.v8i8")]
+        fn _vqshrn_n_u16(a: uint16x8_t, n: uint16x8_t) -> uint8x8_t;
     }
-    unsafe { _vqrshrun_n_s16(a, const { int16x8_t([-N as i16; 8]) }) }
+    unsafe { _vqshrn_n_u16(a, const { uint16x8_t([-N as u16; 8]) }) }
 }
-#[doc = "Signed saturating rounded shift right unsigned narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_n_s32)"]
+#[doc = "Unsigned saturating shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_u32)"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqrshrun, N = 2))]
+#[cfg_attr(test, assert_instr(vqshrn, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub fn vqrshrun_n_s32<const N: i32>(a: int32x4_t) -> uint16x4_t {
+pub fn vqshrn_n_u32<const N: i32>(a: uint32x4_t) -> uint16x4_t {
     static_assert!(N >= 1 && N <= 16);
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnsu.v4i16")]
-        fn _vqrshrun_n_s32(a: int32x4_t, n: int32x4_t) -> uint16x4_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnu.v4i16")]
+        fn _vqshrn_n_u32(a: uint32x4_t, n: uint32x4_t) -> uint16x4_t;
     }
-    unsafe { _vqrshrun_n_s32(a, const { int32x4_t([-N; 4]) }) }
+    unsafe { _vqshrn_n_u32(a, const { uint32x4_t([-N as u32; 4]) }) }
 }
-#[doc = "Signed saturating rounded shift right unsigned narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_n_s64)"]
+#[doc = "Unsigned saturating shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_u64)"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqrshrun, N = 2))]
+#[cfg_attr(test, assert_instr(vqshrn, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub fn vqrshrun_n_s64<const N: i32>(a: int64x2_t) -> uint32x2_t {
+pub fn vqshrn_n_u64<const N: i32>(a: uint64x2_t) -> uint32x2_t {
     static_assert!(N >= 1 && N <= 32);
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnsu.v2i32")]
-        fn _vqrshrun_n_s64(a: int64x2_t, n: int64x2_t) -> uint32x2_t;
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnu.v2i32")]
+        fn _vqshrn_n_u64(a: uint64x2_t, n: uint64x2_t) -> uint32x2_t;
     }
-    unsafe { _vqrshrun_n_s64(a, const { int64x2_t([-N as i64; 2]) }) }
+    unsafe { _vqshrn_n_u64(a, const { uint64x2_t([-N as u64; 2]) }) }
 }
-#[doc = "Signed saturating rounded shift right unsigned narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_n_s16)"]
+#[doc = "Unsigned saturating shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_u16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(sqrshrun, N = 2))]
+#[cfg_attr(test, assert_instr(uqshrn, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vqrshrun_n_s16<const N: i32>(a: int16x8_t) -> uint8x8_t {
+pub fn vqshrn_n_u16<const N: i32>(a: uint16x8_t) -> uint8x8_t {
     static_assert!(N >= 1 && N <= 8);
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqrshrun.v8i8"
+            link_name = "llvm.aarch64.neon.uqshrn.v8i8"
         )]
-        fn _vqrshrun_n_s16(a: int16x8_t, n: i32) -> uint8x8_t;
+        fn _vqshrn_n_u16(a: uint16x8_t, n: i32) -> uint8x8_t;
     }
-    unsafe { _vqrshrun_n_s16(a, N) }
+    unsafe { _vqshrn_n_u16(a, N) }
 }
-#[doc = "Signed saturating rounded shift right unsigned narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_n_s32)"]
+#[doc = "Unsigned saturating shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_u32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(sqrshrun, N = 2))]
+#[cfg_attr(test, assert_instr(uqshrn, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vqrshrun_n_s32<const N: i32>(a: int32x4_t) -> uint16x4_t {
+pub fn vqshrn_n_u32<const N: i32>(a: uint32x4_t) -> uint16x4_t {
     static_assert!(N >= 1 && N <= 16);
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqrshrun.v4i16"
+            link_name = "llvm.aarch64.neon.uqshrn.v4i16"
         )]
-        fn _vqrshrun_n_s32(a: int32x4_t, n: i32) -> uint16x4_t;
+        fn _vqshrn_n_u32(a: uint32x4_t, n: i32) -> uint16x4_t;
     }
-    unsafe { _vqrshrun_n_s32(a, N) }
+    unsafe { _vqshrn_n_u32(a, N) }
 }
-#[doc = "Signed saturating rounded shift right unsigned narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrun_n_s64)"]
+#[doc = "Unsigned saturating shift right narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_u64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(sqrshrun, N = 2))]
+#[cfg_attr(test, assert_instr(uqshrn, N = 2))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vqrshrun_n_s64<const N: i32>(a: int64x2_t) -> uint32x2_t {
+pub fn vqshrn_n_u64<const N: i32>(a: uint64x2_t) -> uint32x2_t {
     static_assert!(N >= 1 && N <= 32);
     unsafe extern "unadjusted" {
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqrshrun.v2i32"
+            link_name = "llvm.aarch64.neon.uqshrn.v2i32"
         )]
-        fn _vqrshrun_n_s64(a: int64x2_t, n: i32) -> uint32x2_t;
+        fn _vqshrn_n_u64(a: uint64x2_t, n: i32) -> uint32x2_t;
     }
-    unsafe { _vqrshrun_n_s64(a, N) }
+    unsafe { _vqshrn_n_u64(a, N) }
 }
-#[doc = "Signed saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_s8)"]
+#[doc = "Signed saturating shift right unsigned narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_n_s16)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqshl, N = 2)
-)]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vqshrun, N = 2))]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vqshl_n_s8<const N: i32>(a: int8x8_t) -> int8x8_t {
-    static_assert_uimm_bits!(N, 3);
-    vqshl_s8(a, vdup_n_s8(N as _))
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub fn vqshrun_n_s16<const N: i32>(a: int16x8_t) -> uint8x8_t {
+    static_assert!(N >= 1 && N <= 8);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnsu.v8i8")]
+        fn _vqshrun_n_s16(a: int16x8_t, n: int16x8_t) -> uint8x8_t;
+    }
+    unsafe { _vqshrun_n_s16(a, const { int16x8_t([-N as i16; 8]) }) }
 }
-#[doc = "Signed saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_s8)"]
+#[doc = "Signed saturating shift right unsigned narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_n_s32)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqshl, N = 2)
-)]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vqshrun, N = 2))]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vqshlq_n_s8<const N: i32>(a: int8x16_t) -> int8x16_t {
-    static_assert_uimm_bits!(N, 3);
-    vqshlq_s8(a, vdupq_n_s8(N as _))
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub fn vqshrun_n_s32<const N: i32>(a: int32x4_t) -> uint16x4_t {
+    static_assert!(N >= 1 && N <= 16);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnsu.v4i16")]
+        fn _vqshrun_n_s32(a: int32x4_t, n: int32x4_t) -> uint16x4_t;
+    }
+    unsafe { _vqshrun_n_s32(a, const { int32x4_t([-N; 4]) }) }
 }
-#[doc = "Signed saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_s16)"]
+#[doc = "Signed saturating shift right unsigned narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_n_s64)"]
 #[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqshl, N = 2)
-)]
+#[cfg(target_arch = "arm")]
+#[target_feature(enable = "neon,v7")]
+#[cfg_attr(test, assert_instr(vqshrun, N = 2))]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vqshl_n_s16<const N: i32>(a: int16x4_t) -> int16x4_t {
-    static_assert_uimm_bits!(N, 4);
-    vqshl_s16(a, vdup_n_s16(N as _))
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+pub fn vqshrun_n_s64<const N: i32>(a: int64x2_t) -> uint32x2_t {
+    static_assert!(N >= 1 && N <= 32);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnsu.v2i32")]
+        fn _vqshrun_n_s64(a: int64x2_t, n: int64x2_t) -> uint32x2_t;
+    }
+    unsafe { _vqshrun_n_s64(a, const { int64x2_t([-N as i64; 2]) }) }
 }
-#[doc = "Signed saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_s16)"]
+#[doc = "Signed saturating shift right unsigned narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_n_s16)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqshl, N = 2)
-)]
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(sqshrun, N = 2))]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vqshlq_n_s16<const N: i32>(a: int16x8_t) -> int16x8_t {
-    static_assert_uimm_bits!(N, 4);
-    vqshlq_s16(a, vdupq_n_s16(N as _))
-}
-#[doc = "Signed saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_s32)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqshl, N = 2)
-)]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vqshl_n_s32<const N: i32>(a: int32x2_t) -> int32x2_t {
-    static_assert_uimm_bits!(N, 5);
-    vqshl_s32(a, vdup_n_s32(N as _))
-}
-#[doc = "Signed saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_s32)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqshl, N = 2)
-)]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vqshlq_n_s32<const N: i32>(a: int32x4_t) -> int32x4_t {
-    static_assert_uimm_bits!(N, 5);
-    vqshlq_s32(a, vdupq_n_s32(N as _))
-}
-#[doc = "Signed saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_s64)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqshl, N = 2)
-)]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vqshl_n_s64<const N: i32>(a: int64x1_t) -> int64x1_t {
-    static_assert_uimm_bits!(N, 6);
-    vqshl_s64(a, vdup_n_s64(N as _))
-}
-#[doc = "Signed saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_s64)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqshl, N = 2)
-)]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vqshlq_n_s64<const N: i32>(a: int64x2_t) -> int64x2_t {
-    static_assert_uimm_bits!(N, 6);
-    vqshlq_s64(a, vdupq_n_s64(N as _))
-}
-#[doc = "Unsigned saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_u8)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqshl, N = 2)
-)]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vqshl_n_u8<const N: i32>(a: uint8x8_t) -> uint8x8_t {
-    static_assert_uimm_bits!(N, 3);
-    vqshl_u8(a, vdup_n_s8(N as _))
-}
-#[doc = "Unsigned saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_u8)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqshl, N = 2)
-)]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vqshlq_n_u8<const N: i32>(a: uint8x16_t) -> uint8x16_t {
-    static_assert_uimm_bits!(N, 3);
-    vqshlq_u8(a, vdupq_n_s8(N as _))
-}
-#[doc = "Unsigned saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_u16)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqshl, N = 2)
-)]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vqshl_n_u16<const N: i32>(a: uint16x4_t) -> uint16x4_t {
-    static_assert_uimm_bits!(N, 4);
-    vqshl_u16(a, vdup_n_s16(N as _))
-}
-#[doc = "Unsigned saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_u16)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqshl, N = 2)
-)]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vqshlq_n_u16<const N: i32>(a: uint16x8_t) -> uint16x8_t {
-    static_assert_uimm_bits!(N, 4);
-    vqshlq_u16(a, vdupq_n_s16(N as _))
-}
-#[doc = "Unsigned saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_u32)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqshl, N = 2)
-)]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vqshl_n_u32<const N: i32>(a: uint32x2_t) -> uint32x2_t {
-    static_assert_uimm_bits!(N, 5);
-    vqshl_u32(a, vdup_n_s32(N as _))
-}
-#[doc = "Unsigned saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_u32)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqshl, N = 2)
-)]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vqshlq_n_u32<const N: i32>(a: uint32x4_t) -> uint32x4_t {
-    static_assert_uimm_bits!(N, 5);
-    vqshlq_u32(a, vdupq_n_s32(N as _))
-}
-#[doc = "Unsigned saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_n_u64)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqshl, N = 2)
-)]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vqshl_n_u64<const N: i32>(a: uint64x1_t) -> uint64x1_t {
-    static_assert_uimm_bits!(N, 6);
-    vqshl_u64(a, vdup_n_s64(N as _))
-}
-#[doc = "Unsigned saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_n_u64)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqshl, N = 2)
-)]
-#[rustc_legacy_const_generics(1)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vqshlq_n_u64<const N: i32>(a: uint64x2_t) -> uint64x2_t {
-    static_assert_uimm_bits!(N, 6);
-    vqshlq_u64(a, vdupq_n_s64(N as _))
-}
-#[doc = "Signed saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_s8)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqshl)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vqshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v8i8")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqshl.v8i8"
-        )]
-        fn _vqshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t;
-    }
-    unsafe { _vqshl_s8(a, b) }
-}
-#[doc = "Signed saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_s8)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqshl)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vqshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v16i8")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqshl.v16i8"
-        )]
-        fn _vqshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t;
-    }
-    unsafe { _vqshlq_s8(a, b) }
-}
-#[doc = "Signed saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_s16)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqshl)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vqshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v4i16")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqshl.v4i16"
-        )]
-        fn _vqshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t;
-    }
-    unsafe { _vqshl_s16(a, b) }
-}
-#[doc = "Signed saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_s16)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqshl)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vqshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v8i16")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqshl.v8i16"
-        )]
-        fn _vqshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t;
-    }
-    unsafe { _vqshlq_s16(a, b) }
-}
-#[doc = "Signed saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_s32)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqshl)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vqshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v2i32")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqshl.v2i32"
-        )]
-        fn _vqshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t;
-    }
-    unsafe { _vqshl_s32(a, b) }
-}
-#[doc = "Signed saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_s32)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqshl)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vqshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v4i32")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqshl.v4i32"
-        )]
-        fn _vqshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t;
-    }
-    unsafe { _vqshlq_s32(a, b) }
-}
-#[doc = "Signed saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_s64)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqshl)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vqshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v1i64")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqshl.v1i64"
-        )]
-        fn _vqshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t;
-    }
-    unsafe { _vqshl_s64(a, b) }
-}
-#[doc = "Signed saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_s64)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqshl)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vqshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v2i64")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqshl.v2i64"
-        )]
-        fn _vqshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t;
-    }
-    unsafe { _vqshlq_s64(a, b) }
-}
-#[doc = "Unsigned saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_u8)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqshl)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vqshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v8i8")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqshl.v8i8"
-        )]
-        fn _vqshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t;
-    }
-    unsafe { _vqshl_u8(a, b) }
-}
-#[doc = "Unsigned saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_u8)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqshl)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vqshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v16i8")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqshl.v16i8"
-        )]
-        fn _vqshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t;
-    }
-    unsafe { _vqshlq_u8(a, b) }
-}
-#[doc = "Unsigned saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_u16)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqshl)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vqshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v4i16")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqshl.v4i16"
-        )]
-        fn _vqshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t;
-    }
-    unsafe { _vqshl_u16(a, b) }
-}
-#[doc = "Unsigned saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_u16)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqshl)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vqshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v8i16")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqshl.v8i16"
-        )]
-        fn _vqshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t;
-    }
-    unsafe { _vqshlq_u16(a, b) }
-}
-#[doc = "Unsigned saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_u32)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqshl)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vqshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v2i32")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqshl.v2i32"
-        )]
-        fn _vqshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t;
-    }
-    unsafe { _vqshl_u32(a, b) }
-}
-#[doc = "Unsigned saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_u32)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqshl)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vqshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v4i32")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqshl.v4i32"
-        )]
-        fn _vqshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t;
-    }
-    unsafe { _vqshlq_u32(a, b) }
-}
-#[doc = "Unsigned saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshl_u64)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqshl)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vqshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v1i64")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqshl.v1i64"
-        )]
-        fn _vqshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t;
-    }
-    unsafe { _vqshl_u64(a, b) }
-}
-#[doc = "Unsigned saturating shift left"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlq_u64)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqshl)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vqshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v2i64")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqshl.v2i64"
-        )]
-        fn _vqshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t;
-    }
-    unsafe { _vqshlq_u64(a, b) }
-}
-#[doc = "Signed saturating shift left unsigned"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s8)"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqshlu, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub fn vqshlu_n_s8<const N: i32>(a: int8x8_t) -> uint8x8_t {
-    static_assert_uimm_bits!(N, 3);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v8i8")]
-        fn _vqshlu_n_s8(a: int8x8_t, n: int8x8_t) -> uint8x8_t;
-    }
-    unsafe { _vqshlu_n_s8(a, const { int8x8_t([N as i8; 8]) }) }
-}
-#[doc = "Signed saturating shift left unsigned"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s8)"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqshlu, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub fn vqshluq_n_s8<const N: i32>(a: int8x16_t) -> uint8x16_t {
-    static_assert_uimm_bits!(N, 3);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v16i8")]
-        fn _vqshluq_n_s8(a: int8x16_t, n: int8x16_t) -> uint8x16_t;
-    }
-    unsafe { _vqshluq_n_s8(a, const { int8x16_t([N as i8; 16]) }) }
-}
-#[doc = "Signed saturating shift left unsigned"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s16)"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqshlu, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub fn vqshlu_n_s16<const N: i32>(a: int16x4_t) -> uint16x4_t {
-    static_assert_uimm_bits!(N, 4);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v4i16")]
-        fn _vqshlu_n_s16(a: int16x4_t, n: int16x4_t) -> uint16x4_t;
-    }
-    unsafe { _vqshlu_n_s16(a, const { int16x4_t([N as i16; 4]) }) }
-}
-#[doc = "Signed saturating shift left unsigned"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s16)"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqshlu, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub fn vqshluq_n_s16<const N: i32>(a: int16x8_t) -> uint16x8_t {
-    static_assert_uimm_bits!(N, 4);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v8i16")]
-        fn _vqshluq_n_s16(a: int16x8_t, n: int16x8_t) -> uint16x8_t;
-    }
-    unsafe { _vqshluq_n_s16(a, const { int16x8_t([N as i16; 8]) }) }
-}
-#[doc = "Signed saturating shift left unsigned"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s32)"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqshlu, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub fn vqshlu_n_s32<const N: i32>(a: int32x2_t) -> uint32x2_t {
-    static_assert_uimm_bits!(N, 5);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v2i32")]
-        fn _vqshlu_n_s32(a: int32x2_t, n: int32x2_t) -> uint32x2_t;
-    }
-    unsafe { _vqshlu_n_s32(a, const { int32x2_t([N; 2]) }) }
-}
-#[doc = "Signed saturating shift left unsigned"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s32)"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqshlu, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub fn vqshluq_n_s32<const N: i32>(a: int32x4_t) -> uint32x4_t {
-    static_assert_uimm_bits!(N, 5);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v4i32")]
-        fn _vqshluq_n_s32(a: int32x4_t, n: int32x4_t) -> uint32x4_t;
-    }
-    unsafe { _vqshluq_n_s32(a, const { int32x4_t([N; 4]) }) }
-}
-#[doc = "Signed saturating shift left unsigned"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s64)"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqshlu, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub fn vqshlu_n_s64<const N: i32>(a: int64x1_t) -> uint64x1_t {
-    static_assert_uimm_bits!(N, 6);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v1i64")]
-        fn _vqshlu_n_s64(a: int64x1_t, n: int64x1_t) -> uint64x1_t;
-    }
-    unsafe { _vqshlu_n_s64(a, const { int64x1_t([N as i64]) }) }
-}
-#[doc = "Signed saturating shift left unsigned"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s64)"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqshlu, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub fn vqshluq_n_s64<const N: i32>(a: int64x2_t) -> uint64x2_t {
-    static_assert_uimm_bits!(N, 6);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v2i64")]
-        fn _vqshluq_n_s64(a: int64x2_t, n: int64x2_t) -> uint64x2_t;
-    }
-    unsafe { _vqshluq_n_s64(a, const { int64x2_t([N as i64; 2]) }) }
-}
-#[doc = "Signed saturating shift left unsigned"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s8)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(sqshlu, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vqshlu_n_s8<const N: i32>(a: int8x8_t) -> uint8x8_t {
-    static_assert_uimm_bits!(N, 3);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqshlu.v8i8"
-        )]
-        fn _vqshlu_n_s8(a: int8x8_t, n: int8x8_t) -> uint8x8_t;
-    }
-    unsafe { _vqshlu_n_s8(a, const { int8x8_t([N as i8; 8]) }) }
-}
-#[doc = "Signed saturating shift left unsigned"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s8)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(sqshlu, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vqshluq_n_s8<const N: i32>(a: int8x16_t) -> uint8x16_t {
-    static_assert_uimm_bits!(N, 3);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqshlu.v16i8"
-        )]
-        fn _vqshluq_n_s8(a: int8x16_t, n: int8x16_t) -> uint8x16_t;
-    }
-    unsafe { _vqshluq_n_s8(a, const { int8x16_t([N as i8; 16]) }) }
-}
-#[doc = "Signed saturating shift left unsigned"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s16)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(sqshlu, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vqshlu_n_s16<const N: i32>(a: int16x4_t) -> uint16x4_t {
-    static_assert_uimm_bits!(N, 4);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqshlu.v4i16"
-        )]
-        fn _vqshlu_n_s16(a: int16x4_t, n: int16x4_t) -> uint16x4_t;
-    }
-    unsafe { _vqshlu_n_s16(a, const { int16x4_t([N as i16; 4]) }) }
-}
-#[doc = "Signed saturating shift left unsigned"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s16)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(sqshlu, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vqshluq_n_s16<const N: i32>(a: int16x8_t) -> uint16x8_t {
-    static_assert_uimm_bits!(N, 4);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqshlu.v8i16"
-        )]
-        fn _vqshluq_n_s16(a: int16x8_t, n: int16x8_t) -> uint16x8_t;
-    }
-    unsafe { _vqshluq_n_s16(a, const { int16x8_t([N as i16; 8]) }) }
-}
-#[doc = "Signed saturating shift left unsigned"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s32)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(sqshlu, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vqshlu_n_s32<const N: i32>(a: int32x2_t) -> uint32x2_t {
-    static_assert_uimm_bits!(N, 5);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqshlu.v2i32"
-        )]
-        fn _vqshlu_n_s32(a: int32x2_t, n: int32x2_t) -> uint32x2_t;
-    }
-    unsafe { _vqshlu_n_s32(a, const { int32x2_t([N; 2]) }) }
-}
-#[doc = "Signed saturating shift left unsigned"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s32)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(sqshlu, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vqshluq_n_s32<const N: i32>(a: int32x4_t) -> uint32x4_t {
-    static_assert_uimm_bits!(N, 5);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqshlu.v4i32"
-        )]
-        fn _vqshluq_n_s32(a: int32x4_t, n: int32x4_t) -> uint32x4_t;
-    }
-    unsafe { _vqshluq_n_s32(a, const { int32x4_t([N; 4]) }) }
-}
-#[doc = "Signed saturating shift left unsigned"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshlu_n_s64)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(sqshlu, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vqshlu_n_s64<const N: i32>(a: int64x1_t) -> uint64x1_t {
-    static_assert_uimm_bits!(N, 6);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqshlu.v1i64"
-        )]
-        fn _vqshlu_n_s64(a: int64x1_t, n: int64x1_t) -> uint64x1_t;
-    }
-    unsafe { _vqshlu_n_s64(a, const { int64x1_t([N as i64]) }) }
-}
-#[doc = "Signed saturating shift left unsigned"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshluq_n_s64)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(sqshlu, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vqshluq_n_s64<const N: i32>(a: int64x2_t) -> uint64x2_t {
-    static_assert_uimm_bits!(N, 6);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqshlu.v2i64"
-        )]
-        fn _vqshluq_n_s64(a: int64x2_t, n: int64x2_t) -> uint64x2_t;
-    }
-    unsafe { _vqshluq_n_s64(a, const { int64x2_t([N as i64; 2]) }) }
-}
-#[doc = "Signed saturating shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_s16)"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub fn vqshrn_n_s16<const N: i32>(a: int16x8_t) -> int8x8_t {
-    static_assert!(N >= 1 && N <= 8);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftns.v8i8")]
-        fn _vqshrn_n_s16(a: int16x8_t, n: int16x8_t) -> int8x8_t;
-    }
-    unsafe { _vqshrn_n_s16(a, const { int16x8_t([-N as i16; 8]) }) }
-}
-#[doc = "Signed saturating shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_s32)"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub fn vqshrn_n_s32<const N: i32>(a: int32x4_t) -> int16x4_t {
-    static_assert!(N >= 1 && N <= 16);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftns.v4i16")]
-        fn _vqshrn_n_s32(a: int32x4_t, n: int32x4_t) -> int16x4_t;
-    }
-    unsafe { _vqshrn_n_s32(a, const { int32x4_t([-N; 4]) }) }
-}
-#[doc = "Signed saturating shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_s64)"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub fn vqshrn_n_s64<const N: i32>(a: int64x2_t) -> int32x2_t {
-    static_assert!(N >= 1 && N <= 32);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftns.v2i32")]
-        fn _vqshrn_n_s64(a: int64x2_t, n: int64x2_t) -> int32x2_t;
-    }
-    unsafe { _vqshrn_n_s64(a, const { int64x2_t([-N as i64; 2]) }) }
-}
-#[doc = "Signed saturating shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_s16)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(sqshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vqshrn_n_s16<const N: i32>(a: int16x8_t) -> int8x8_t {
-    static_assert!(N >= 1 && N <= 8);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqshrn.v8i8"
-        )]
-        fn _vqshrn_n_s16(a: int16x8_t, n: i32) -> int8x8_t;
-    }
-    unsafe { _vqshrn_n_s16(a, N) }
-}
-#[doc = "Signed saturating shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_s32)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(sqshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vqshrn_n_s32<const N: i32>(a: int32x4_t) -> int16x4_t {
-    static_assert!(N >= 1 && N <= 16);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqshrn.v4i16"
-        )]
-        fn _vqshrn_n_s32(a: int32x4_t, n: i32) -> int16x4_t;
-    }
-    unsafe { _vqshrn_n_s32(a, N) }
-}
-#[doc = "Signed saturating shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_s64)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(sqshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vqshrn_n_s64<const N: i32>(a: int64x2_t) -> int32x2_t {
-    static_assert!(N >= 1 && N <= 32);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqshrn.v2i32"
-        )]
-        fn _vqshrn_n_s64(a: int64x2_t, n: i32) -> int32x2_t;
-    }
-    unsafe { _vqshrn_n_s64(a, N) }
-}
-#[doc = "Unsigned saturating shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_u16)"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub fn vqshrn_n_u16<const N: i32>(a: uint16x8_t) -> uint8x8_t {
-    static_assert!(N >= 1 && N <= 8);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnu.v8i8")]
-        fn _vqshrn_n_u16(a: uint16x8_t, n: uint16x8_t) -> uint8x8_t;
-    }
-    unsafe {
-        _vqshrn_n_u16(
-            a,
-            const {
-                uint16x8_t([
-                    -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16,
-                    -N as u16,
-                ])
-            },
-        )
-    }
-}
-#[doc = "Unsigned saturating shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_u32)"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub fn vqshrn_n_u32<const N: i32>(a: uint32x4_t) -> uint16x4_t {
-    static_assert!(N >= 1 && N <= 16);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnu.v4i16")]
-        fn _vqshrn_n_u32(a: uint32x4_t, n: uint32x4_t) -> uint16x4_t;
-    }
-    unsafe {
-        _vqshrn_n_u32(
-            a,
-            const { uint32x4_t([-N as u32, -N as u32, -N as u32, -N as u32]) },
-        )
-    }
-}
-#[doc = "Unsigned saturating shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_u64)"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub fn vqshrn_n_u64<const N: i32>(a: uint64x2_t) -> uint32x2_t {
-    static_assert!(N >= 1 && N <= 32);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnu.v2i32")]
-        fn _vqshrn_n_u64(a: uint64x2_t, n: uint64x2_t) -> uint32x2_t;
-    }
-    unsafe { _vqshrn_n_u64(a, const { uint64x2_t([-N as u64, -N as u64]) }) }
-}
-#[doc = "Unsigned saturating shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_u16)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(uqshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vqshrn_n_u16<const N: i32>(a: uint16x8_t) -> uint8x8_t {
-    static_assert!(N >= 1 && N <= 8);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqshrn.v8i8"
-        )]
-        fn _vqshrn_n_u16(a: uint16x8_t, n: i32) -> uint8x8_t;
-    }
-    unsafe { _vqshrn_n_u16(a, N) }
-}
-#[doc = "Unsigned saturating shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_u32)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(uqshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vqshrn_n_u32<const N: i32>(a: uint32x4_t) -> uint16x4_t {
-    static_assert!(N >= 1 && N <= 16);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqshrn.v4i16"
-        )]
-        fn _vqshrn_n_u32(a: uint32x4_t, n: i32) -> uint16x4_t;
-    }
-    unsafe { _vqshrn_n_u32(a, N) }
-}
-#[doc = "Unsigned saturating shift right narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_u64)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(uqshrn, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vqshrn_n_u64<const N: i32>(a: uint64x2_t) -> uint32x2_t {
-    static_assert!(N >= 1 && N <= 32);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.uqshrn.v2i32"
-        )]
-        fn _vqshrn_n_u64(a: uint64x2_t, n: i32) -> uint32x2_t;
-    }
-    unsafe { _vqshrn_n_u64(a, N) }
-}
-#[doc = "Signed saturating shift right unsigned narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_n_s16)"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqshrun, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub fn vqshrun_n_s16<const N: i32>(a: int16x8_t) -> uint8x8_t {
-    static_assert!(N >= 1 && N <= 8);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnsu.v8i8")]
-        fn _vqshrun_n_s16(a: int16x8_t, n: int16x8_t) -> uint8x8_t;
-    }
-    unsafe { _vqshrun_n_s16(a, const { int16x8_t([-N as i16; 8]) }) }
-}
-#[doc = "Signed saturating shift right unsigned narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_n_s32)"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqshrun, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub fn vqshrun_n_s32<const N: i32>(a: int32x4_t) -> uint16x4_t {
-    static_assert!(N >= 1 && N <= 16);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnsu.v4i16")]
-        fn _vqshrun_n_s32(a: int32x4_t, n: int32x4_t) -> uint16x4_t;
-    }
-    unsafe { _vqshrun_n_s32(a, const { int32x4_t([-N; 4]) }) }
-}
-#[doc = "Signed saturating shift right unsigned narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_n_s64)"]
-#[inline]
-#[cfg(target_arch = "arm")]
-#[target_feature(enable = "neon,v7")]
-#[cfg_attr(test, assert_instr(vqshrun, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-pub fn vqshrun_n_s64<const N: i32>(a: int64x2_t) -> uint32x2_t {
-    static_assert!(N >= 1 && N <= 32);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnsu.v2i32")]
-        fn _vqshrun_n_s64(a: int64x2_t, n: int64x2_t) -> uint32x2_t;
-    }
-    unsafe { _vqshrun_n_s64(a, const { int64x2_t([-N as i64; 2]) }) }
-}
-#[doc = "Signed saturating shift right unsigned narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_n_s16)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(sqshrun, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vqshrun_n_s16<const N: i32>(a: int16x8_t) -> uint8x8_t {
-    static_assert!(N >= 1 && N <= 8);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqshrun.v8i8"
-        )]
-        fn _vqshrun_n_s16(a: int16x8_t, n: i32) -> uint8x8_t;
-    }
-    unsafe { _vqshrun_n_s16(a, N) }
-}
-#[doc = "Signed saturating shift right unsigned narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_n_s32)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(sqshrun, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vqshrun_n_s32<const N: i32>(a: int32x4_t) -> uint16x4_t {
-    static_assert!(N >= 1 && N <= 16);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqshrun.v4i16"
-        )]
-        fn _vqshrun_n_s32(a: int32x4_t, n: i32) -> uint16x4_t;
-    }
-    unsafe { _vqshrun_n_s32(a, N) }
-}
-#[doc = "Signed saturating shift right unsigned narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_n_s64)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg(not(target_arch = "arm"))]
-#[cfg_attr(test, assert_instr(sqshrun, N = 2))]
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "neon_intrinsics", since = "1.59.0")]
-pub fn vqshrun_n_s64<const N: i32>(a: int64x2_t) -> uint32x2_t {
-    static_assert!(N >= 1 && N <= 32);
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.sqshrun.v2i32"
-        )]
-        fn _vqshrun_n_s64(a: int64x2_t, n: i32) -> uint32x2_t;
-    }
-    unsafe { _vqshrun_n_s64(a, N) }
-}
-#[doc = "Saturating subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_s8)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s8"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqsub)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vqsub_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
-    unsafe { simd_saturating_sub(a, b) }
-}
-#[doc = "Saturating subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_s8)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s8"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqsub)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vqsubq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
-    unsafe { simd_saturating_sub(a, b) }
-}
-#[doc = "Saturating subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_s16)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s16"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqsub)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vqsub_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
-    unsafe { simd_saturating_sub(a, b) }
-}
-#[doc = "Saturating subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_s16)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s16"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqsub)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vqsubq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
-    unsafe { simd_saturating_sub(a, b) }
-}
-#[doc = "Saturating subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_s32)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s32"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqsub)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vqsub_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
-    unsafe { simd_saturating_sub(a, b) }
-}
-#[doc = "Saturating subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_s32)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s32"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqsub)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vqsubq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
-    unsafe { simd_saturating_sub(a, b) }
-}
-#[doc = "Saturating subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_s64)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s64"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqsub)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vqsub_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
-    unsafe { simd_saturating_sub(a, b) }
-}
-#[doc = "Saturating subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_s64)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s64"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(sqsub)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vqsubq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
-    unsafe { simd_saturating_sub(a, b) }
-}
-#[doc = "Saturating subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_u8)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u8"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqsub)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vqsub_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    unsafe { simd_saturating_sub(a, b) }
-}
-#[doc = "Saturating subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_u8)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u8"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqsub)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vqsubq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
-    unsafe { simd_saturating_sub(a, b) }
-}
-#[doc = "Saturating subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_u16)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u16"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqsub)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vqsub_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
-    unsafe { simd_saturating_sub(a, b) }
-}
-#[doc = "Saturating subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_u16)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u16"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqsub)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vqsubq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
-    unsafe { simd_saturating_sub(a, b) }
-}
-#[doc = "Saturating subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_u32)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u32"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqsub)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vqsub_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
-    unsafe { simd_saturating_sub(a, b) }
-}
-#[doc = "Saturating subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_u32)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u32"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqsub)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vqsubq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
-    unsafe { simd_saturating_sub(a, b) }
-}
-#[doc = "Saturating subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_u64)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u64"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqsub)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vqsub_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
-    unsafe { simd_saturating_sub(a, b) }
-}
-#[doc = "Saturating subtract"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_u64)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u64"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(uqsub)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vqsubq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
-    unsafe { simd_saturating_sub(a, b) }
-}
-#[doc = "Rounding Add returning High Narrow (high half)."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_high_s16)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i16"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(raddhn2)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vraddhn_high_s16(a: int8x8_t, b: int16x8_t, c: int16x8_t) -> int8x16_t {
-    let x = vraddhn_s16(b, c);
-    unsafe { simd_shuffle!(a, x, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) }
-}
-#[doc = "Rounding Add returning High Narrow (high half)."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_high_s32)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i32"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(raddhn2)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vraddhn_high_s32(a: int16x4_t, b: int32x4_t, c: int32x4_t) -> int16x8_t {
-    let x = vraddhn_s32(b, c);
-    unsafe { simd_shuffle!(a, x, [0, 1, 2, 3, 4, 5, 6, 7]) }
-}
-#[doc = "Rounding Add returning High Narrow (high half)."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_high_s64)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i64"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(raddhn2)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vraddhn_high_s64(a: int32x2_t, b: int64x2_t, c: int64x2_t) -> int32x4_t {
-    let x = vraddhn_s64(b, c);
-    unsafe { simd_shuffle!(a, x, [0, 1, 2, 3]) }
-}
-#[doc = "Rounding Add returning High Narrow (high half)."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_high_u16)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i16"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(raddhn2)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vraddhn_high_u16(a: uint8x8_t, b: uint16x8_t, c: uint16x8_t) -> uint8x16_t {
-    unsafe {
-        let x: uint8x8_t = transmute(vraddhn_s16(transmute(b), transmute(c)));
-        simd_shuffle!(a, x, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
-    }
-}
-#[doc = "Rounding Add returning High Narrow (high half)."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_high_u32)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i32"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(raddhn2)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vraddhn_high_u32(a: uint16x4_t, b: uint32x4_t, c: uint32x4_t) -> uint16x8_t {
-    unsafe {
-        let x: uint16x4_t = transmute(vraddhn_s32(transmute(b), transmute(c)));
-        simd_shuffle!(a, x, [0, 1, 2, 3, 4, 5, 6, 7])
-    }
-}
-#[doc = "Rounding Add returning High Narrow (high half)."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_high_u64)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i64"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(raddhn2)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vraddhn_high_u64(a: uint32x2_t, b: uint64x2_t, c: uint64x2_t) -> uint32x4_t {
-    unsafe {
-        let x: uint32x2_t = transmute(vraddhn_s64(transmute(b), transmute(c)));
-        simd_shuffle!(a, x, [0, 1, 2, 3])
-    }
-}
-#[doc = "Rounding Add returning High Narrow."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_s16)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i16"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(raddhn)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vraddhn_s16(a: int16x8_t, b: int16x8_t) -> int8x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.raddhn.v8i8"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vraddhn.v8i8")]
-        fn _vraddhn_s16(a: int16x8_t, b: int16x8_t) -> int8x8_t;
-    }
-    unsafe { _vraddhn_s16(a, b) }
-}
-#[doc = "Rounding Add returning High Narrow."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_s32)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i32"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(raddhn)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vraddhn_s32(a: int32x4_t, b: int32x4_t) -> int16x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.raddhn.v4i16"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vraddhn.v4i16")]
-        fn _vraddhn_s32(a: int32x4_t, b: int32x4_t) -> int16x4_t;
-    }
-    unsafe { _vraddhn_s32(a, b) }
-}
-#[doc = "Rounding Add returning High Narrow."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_s64)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i64"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(raddhn)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vraddhn_s64(a: int64x2_t, b: int64x2_t) -> int32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.raddhn.v2i32"
-        )]
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vraddhn.v2i32")]
-        fn _vraddhn_s64(a: int64x2_t, b: int64x2_t) -> int32x2_t;
-    }
-    unsafe { _vraddhn_s64(a, b) }
-}
-#[doc = "Rounding Add returning High Narrow."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_u16)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i16"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(raddhn)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vraddhn_u16(a: uint16x8_t, b: uint16x8_t) -> uint8x8_t {
-    unsafe { transmute(vraddhn_s16(transmute(a), transmute(b))) }
-}
-#[doc = "Rounding Add returning High Narrow."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_u16)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i16"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(raddhn)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vraddhn_u16(a: uint16x8_t, b: uint16x8_t) -> uint8x8_t {
-    let a: uint16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    let b: uint16x8_t = unsafe { simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint8x8_t = transmute(vraddhn_s16(transmute(a), transmute(b)));
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Rounding Add returning High Narrow."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_u32)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i32"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(raddhn)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vraddhn_u32(a: uint32x4_t, b: uint32x4_t) -> uint16x4_t {
-    unsafe { transmute(vraddhn_s32(transmute(a), transmute(b))) }
-}
-#[doc = "Rounding Add returning High Narrow."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_u32)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i32"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(raddhn)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vraddhn_u32(a: uint32x4_t, b: uint32x4_t) -> uint16x4_t {
-    let a: uint32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    let b: uint32x4_t = unsafe { simd_shuffle!(b, b, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint16x4_t = transmute(vraddhn_s32(transmute(a), transmute(b)));
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
-}
-#[doc = "Rounding Add returning High Narrow."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_u64)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i64"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(raddhn)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vraddhn_u64(a: uint64x2_t, b: uint64x2_t) -> uint32x2_t {
-    unsafe { transmute(vraddhn_s64(transmute(a), transmute(b))) }
-}
-#[doc = "Rounding Add returning High Narrow."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_u64)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i64"))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(raddhn)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vraddhn_u64(a: uint64x2_t, b: uint64x2_t) -> uint32x2_t {
-    let a: uint64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    let b: uint64x2_t = unsafe { simd_shuffle!(b, b, [1, 0]) };
-    unsafe {
-        let ret_val: uint32x2_t = transmute(vraddhn_s64(transmute(a), transmute(b)));
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
-}
-#[doc = "Reciprocal estimate."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpe_f16)"]
-#[inline]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(frecpe)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vrecpe_f16(a: float16x4_t) -> float16x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v4f16")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.frecpe.v4f16"
-        )]
-        fn _vrecpe_f16(a: float16x4_t) -> float16x4_t;
-    }
-    unsafe { _vrecpe_f16(a) }
-}
-#[doc = "Reciprocal estimate."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpeq_f16)"]
-#[inline]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(frecpe)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vrecpeq_f16(a: float16x8_t) -> float16x8_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v8f16")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.frecpe.v8f16"
-        )]
-        fn _vrecpeq_f16(a: float16x8_t) -> float16x8_t;
-    }
-    unsafe { _vrecpeq_f16(a) }
-}
-#[doc = "Reciprocal estimate."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpe_f32)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(frecpe)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vrecpe_f32(a: float32x2_t) -> float32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v2f32")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.frecpe.v2f32"
-        )]
-        fn _vrecpe_f32(a: float32x2_t) -> float32x2_t;
-    }
-    unsafe { _vrecpe_f32(a) }
-}
-#[doc = "Reciprocal estimate."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpeq_f32)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(frecpe)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vrecpeq_f32(a: float32x4_t) -> float32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v4f32")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.frecpe.v4f32"
-        )]
-        fn _vrecpeq_f32(a: float32x4_t) -> float32x4_t;
-    }
-    unsafe { _vrecpeq_f32(a) }
-}
-#[doc = "Unsigned reciprocal estimate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpe_u32)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(urecpe)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vrecpe_u32(a: uint32x2_t) -> uint32x2_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v2i32")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.urecpe.v2i32"
-        )]
-        fn _vrecpe_u32(a: uint32x2_t) -> uint32x2_t;
-    }
-    unsafe { _vrecpe_u32(a) }
-}
-#[doc = "Unsigned reciprocal estimate"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpeq_u32)"]
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(urecpe)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vrecpeq_u32(a: uint32x4_t) -> uint32x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v4i32")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.urecpe.v4i32"
-        )]
-        fn _vrecpeq_u32(a: uint32x4_t) -> uint32x4_t;
-    }
-    unsafe { _vrecpeq_u32(a) }
-}
-#[doc = "Floating-point reciprocal step"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecps_f16)"]
-#[inline]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecps))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(frecps)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vrecps_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecps.v4f16")]
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.frecps.v4f16"
-        )]
-        fn _vrecps_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t;
-    }
-    unsafe { _vrecps_f16(a, b) }
-}
-#[doc = "Floating-point reciprocal step"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpsq_f16)"]
-#[inline]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecps))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(frecps)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vrecpsq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vqshrun_n_s16<const N: i32>(a: int16x8_t) -> uint8x8_t {
+    static_assert!(N >= 1 && N <= 8);
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecps.v8f16")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.frecps.v8f16"
+            link_name = "llvm.aarch64.neon.sqshrun.v8i8"
         )]
-        fn _vrecpsq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t;
+        fn _vqshrun_n_s16(a: int16x8_t, n: i32) -> uint8x8_t;
     }
-    unsafe { _vrecpsq_f16(a, b) }
+    unsafe { _vqshrun_n_s16(a, N) }
 }
-#[doc = "Floating-point reciprocal step"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecps_f32)"]
+#[doc = "Signed saturating shift right unsigned narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_n_s32)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecps))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(frecps)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vrecps_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(sqshrun, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vqshrun_n_s32<const N: i32>(a: int32x4_t) -> uint16x4_t {
+    static_assert!(N >= 1 && N <= 16);
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecps.v2f32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.frecps.v2f32"
+            link_name = "llvm.aarch64.neon.sqshrun.v4i16"
         )]
-        fn _vrecps_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t;
+        fn _vqshrun_n_s32(a: int32x4_t, n: i32) -> uint16x4_t;
     }
-    unsafe { _vrecps_f32(a, b) }
+    unsafe { _vqshrun_n_s32(a, N) }
 }
-#[doc = "Floating-point reciprocal step"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpsq_f32)"]
+#[doc = "Signed saturating shift right unsigned narrow"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrun_n_s64)"]
 #[inline]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecps))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(frecps)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vrecpsq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+#[cfg(not(target_arch = "arm"))]
+#[cfg_attr(test, assert_instr(sqshrun, N = 2))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "neon_intrinsics", since = "1.59.0")]
+pub fn vqshrun_n_s64<const N: i32>(a: int64x2_t) -> uint32x2_t {
+    static_assert!(N >= 1 && N <= 32);
     unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecps.v4f32")]
         #[cfg_attr(
             any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.frecps.v4f32"
+            link_name = "llvm.aarch64.neon.sqshrun.v2i32"
         )]
-        fn _vrecpsq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t;
-    }
-    unsafe { _vrecpsq_f32(a, b) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_f16)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_f32_f16(a: float16x4_t) -> float32x2_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_f16)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_f32_f16(a: float16x4_t) -> float32x2_t {
-    let a: float16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: float32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_f16)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_s8_f16(a: float16x4_t) -> int8x8_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_f16)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_s8_f16(a: float16x4_t) -> int8x8_t {
-    let a: float16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: int8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_f16)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_s16_f16(a: float16x4_t) -> int16x4_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_f16)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_s16_f16(a: float16x4_t) -> int16x4_t {
-    let a: float16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: int16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_f16)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_s32_f16(a: float16x4_t) -> int32x2_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_f16)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_s32_f16(a: float16x4_t) -> int32x2_t {
-    let a: float16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: int32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_f16)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_s64_f16(a: float16x4_t) -> int64x1_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_f16)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_s64_f16(a: float16x4_t) -> int64x1_t {
-    let a: float16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_f16)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_u8_f16(a: float16x4_t) -> uint8x8_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_f16)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_u8_f16(a: float16x4_t) -> uint8x8_t {
-    let a: float16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_f16)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_u16_f16(a: float16x4_t) -> uint16x4_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_f16)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_u16_f16(a: float16x4_t) -> uint16x4_t {
-    let a: float16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_f16)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_u32_f16(a: float16x4_t) -> uint32x2_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_f16)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_u32_f16(a: float16x4_t) -> uint32x2_t {
-    let a: float16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_f16)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_u64_f16(a: float16x4_t) -> uint64x1_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_f16)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_u64_f16(a: float16x4_t) -> uint64x1_t {
-    let a: float16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe { transmute(a) }
+        fn _vqshrun_n_s64(a: int64x2_t, n: i32) -> uint32x2_t;
+    }
+    unsafe { _vqshrun_n_s64(a, N) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_f16)"]
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_s8)"]
 #[inline]
-#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(sqsub)
 )]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_p8_f16(a: float16x4_t) -> poly8x8_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_f16)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_p8_f16(a: float16x4_t) -> poly8x8_t {
-    let a: float16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: poly8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_f16)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_p16_f16(a: float16x4_t) -> poly16x4_t {
-    unsafe { transmute(a) }
+pub fn vqsub_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    unsafe { simd_saturating_sub(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_f16)"]
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_s8)"]
 #[inline]
-#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(sqsub)
 )]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_p16_f16(a: float16x4_t) -> poly16x4_t {
-    let a: float16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: poly16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_f16)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_f32_f16(a: float16x8_t) -> float32x4_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_f16)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_f32_f16(a: float16x8_t) -> float32x4_t {
-    let a: float16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: float32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+pub fn vqsubq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    unsafe { simd_saturating_sub(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_f16)"]
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_s16)"]
 #[inline]
-#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(sqsub)
 )]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_s8_f16(a: float16x8_t) -> int8x16_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_f16)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_s8_f16(a: float16x8_t) -> int8x16_t {
-    let a: float16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: int8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_f16)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_s16_f16(a: float16x8_t) -> int16x8_t {
-    unsafe { transmute(a) }
+pub fn vqsub_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    unsafe { simd_saturating_sub(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_f16)"]
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_s16)"]
 #[inline]
-#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(sqsub)
 )]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_s16_f16(a: float16x8_t) -> int16x8_t {
-    let a: float16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: int16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_f16)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_s32_f16(a: float16x8_t) -> int32x4_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_f16)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_s32_f16(a: float16x8_t) -> int32x4_t {
-    let a: float16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: int32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+pub fn vqsubq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    unsafe { simd_saturating_sub(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_f16)"]
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_s32)"]
 #[inline]
-#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(sqsub)
 )]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_s64_f16(a: float16x8_t) -> int64x2_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_f16)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_s64_f16(a: float16x8_t) -> int64x2_t {
-    let a: float16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: int64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_f16)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_u8_f16(a: float16x8_t) -> uint8x16_t {
-    unsafe { transmute(a) }
+pub fn vqsub_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    unsafe { simd_saturating_sub(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_f16)"]
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_s32)"]
 #[inline]
-#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(sqsub)
 )]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_u8_f16(a: float16x8_t) -> uint8x16_t {
-    let a: float16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_f16)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_u16_f16(a: float16x8_t) -> uint16x8_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_f16)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_u16_f16(a: float16x8_t) -> uint16x8_t {
-    let a: float16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+pub fn vqsubq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    unsafe { simd_saturating_sub(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_f16)"]
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_s64)"]
 #[inline]
-#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s64"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(sqsub)
 )]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_u32_f16(a: float16x8_t) -> uint32x4_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_f16)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_u32_f16(a: float16x8_t) -> uint32x4_t {
-    let a: float16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_f16)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_u64_f16(a: float16x8_t) -> uint64x2_t {
-    unsafe { transmute(a) }
+pub fn vqsub_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
+    unsafe { simd_saturating_sub(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_f16)"]
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_s64)"]
 #[inline]
-#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s64"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(sqsub)
 )]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_u64_f16(a: float16x8_t) -> uint64x2_t {
-    let a: float16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_f16)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_p8_f16(a: float16x8_t) -> poly8x16_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_f16)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_p8_f16(a: float16x8_t) -> poly8x16_t {
-    let a: float16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: poly8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
+pub fn vqsubq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
+    unsafe { simd_saturating_sub(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_f16)"]
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_u8)"]
 #[inline]
-#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(uqsub)
 )]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_p16_f16(a: float16x8_t) -> poly16x8_t {
-    unsafe { transmute(a) }
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vqsub_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
+    unsafe { simd_saturating_sub(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_f16)"]
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_u8)"]
 #[inline]
-#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u8"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(uqsub)
 )]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_p16_f16(a: float16x8_t) -> poly16x8_t {
-    let a: float16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: poly16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vqsubq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    unsafe { simd_saturating_sub(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_f32)"]
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_u16)"]
 #[inline]
-#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(uqsub)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_f16_f32(a: float32x2_t) -> float16x4_t {
-    unsafe { transmute(a) }
+pub fn vqsub_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
+    unsafe { simd_saturating_sub(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_f32)"]
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_u16)"]
 #[inline]
-#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(uqsub)
 )]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_f16_f32(a: float32x2_t) -> float16x4_t {
-    let a: float32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe {
-        let ret_val: float16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vqsubq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
+    unsafe { simd_saturating_sub(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_f32)"]
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_u32)"]
 #[inline]
-#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(uqsub)
 )]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_f16_f32(a: float32x4_t) -> float16x8_t {
-    unsafe { transmute(a) }
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vqsub_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
+    unsafe { simd_saturating_sub(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_f32)"]
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_u32)"]
 #[inline]
-#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(uqsub)
 )]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_f16_f32(a: float32x4_t) -> float16x8_t {
-    let a: float32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: float16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vqsubq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
+    unsafe { simd_saturating_sub(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s8)"]
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsub_u64)"]
 #[inline]
-#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u64"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(uqsub)
 )]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_f16_s8(a: int8x8_t) -> float16x4_t {
-    unsafe { transmute(a) }
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vqsub_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
+    unsafe { simd_saturating_sub(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s8)"]
+#[doc = "Saturating subtract"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqsubq_u64)"]
 #[inline]
-#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u64"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(uqsub)
 )]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_f16_s8(a: int8x8_t) -> float16x4_t {
-    let a: int8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: float16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vqsubq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
+    unsafe { simd_saturating_sub(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s8)"]
+#[doc = "Rounding Add returning High Narrow (high half)."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_high_s16)"]
 #[inline]
-#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i16"))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
+    assert_instr(raddhn2)
 )]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_f16_s8(a: int8x16_t) -> float16x8_t {
-    unsafe { transmute(a) }
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vraddhn_high_s16(a: int8x8_t, b: int16x8_t, c: int16x8_t) -> int8x16_t {
+    let x = vraddhn_s16(b, c);
+    vcombine_s8(a, x)
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s8)"]
+#[doc = "Rounding Add returning High Narrow (high half)."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_high_s32)"]
 #[inline]
-#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i32"))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
+    assert_instr(raddhn2)
 )]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_f16_s8(a: int8x16_t) -> float16x8_t {
-    let a: int8x16_t =
-        unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: float16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vraddhn_high_s32(a: int16x4_t, b: int32x4_t, c: int32x4_t) -> int16x8_t {
+    let x = vraddhn_s32(b, c);
+    vcombine_s16(a, x)
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s16)"]
+#[doc = "Rounding Add returning High Narrow (high half)."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_high_s64)"]
 #[inline]
-#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i64"))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
+    assert_instr(raddhn2)
 )]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_f16_s16(a: int16x4_t) -> float16x4_t {
-    unsafe { transmute(a) }
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vraddhn_high_s64(a: int32x2_t, b: int64x2_t, c: int64x2_t) -> int32x4_t {
+    let x = vraddhn_s64(b, c);
+    vcombine_s32(a, x)
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s16)"]
+#[doc = "Rounding Add returning High Narrow (high half)."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_high_u16)"]
 #[inline]
-#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i16"))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
+    assert_instr(raddhn2)
 )]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_f16_s16(a: int16x4_t) -> float16x4_t {
-    let a: int16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vraddhn_high_u16(a: uint8x8_t, b: uint16x8_t, c: uint16x8_t) -> uint8x16_t {
     unsafe {
-        let ret_val: float16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let x: uint8x8_t = transmute(vraddhn_s16(transmute(b), transmute(c)));
+        vcombine_u8(a, x)
     }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s16)"]
+#[doc = "Rounding Add returning High Narrow (high half)."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_high_u32)"]
 #[inline]
-#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i32"))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
+    assert_instr(raddhn2)
 )]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_f16_s16(a: int16x8_t) -> float16x8_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s16)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_f16_s16(a: int16x8_t) -> float16x8_t {
-    let a: int16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vraddhn_high_u32(a: uint16x4_t, b: uint32x4_t, c: uint32x4_t) -> uint16x8_t {
     unsafe {
-        let ret_val: float16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let x: uint16x4_t = transmute(vraddhn_s32(transmute(b), transmute(c)));
+        vcombine_u16(a, x)
     }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s32)"]
+#[doc = "Rounding Add returning High Narrow (high half)."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_high_u64)"]
 #[inline]
-#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i64"))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
+    assert_instr(raddhn2)
 )]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_f16_s32(a: int32x2_t) -> float16x4_t {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s32)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
 )]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_f16_s32(a: int32x2_t) -> float16x4_t {
-    let a: int32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vraddhn_high_u64(a: uint32x2_t, b: uint64x2_t, c: uint64x2_t) -> uint32x4_t {
     unsafe {
-        let ret_val: float16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let x: uint32x2_t = transmute(vraddhn_s64(transmute(b), transmute(c)));
+        vcombine_u32(a, x)
     }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s32)"]
+#[doc = "Rounding Add returning High Narrow."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_s16)"]
 #[inline]
-#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(raddhn)
 )]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_f16_s32(a: int32x4_t) -> float16x8_t {
-    unsafe { transmute(a) }
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vraddhn_s16(a: int16x8_t, b: int16x8_t) -> int8x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.raddhn.v8i8"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vraddhn.v8i8")]
+        fn _vraddhn_s16(a: int16x8_t, b: int16x8_t) -> int8x8_t;
+    }
+    unsafe { _vraddhn_s16(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s32)"]
+#[doc = "Rounding Add returning High Narrow."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_s32)"]
 #[inline]
-#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(raddhn)
 )]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_f16_s32(a: int32x4_t) -> float16x8_t {
-    let a: int32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: float16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vraddhn_s32(a: int32x4_t, b: int32x4_t) -> int16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.raddhn.v4i16"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vraddhn.v4i16")]
+        fn _vraddhn_s32(a: int32x4_t, b: int32x4_t) -> int16x4_t;
     }
+    unsafe { _vraddhn_s32(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s64)"]
+#[doc = "Rounding Add returning High Narrow."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_s64)"]
 #[inline]
-#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i64"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(raddhn)
 )]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_f16_s64(a: int64x1_t) -> float16x4_t {
-    unsafe { transmute(a) }
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vraddhn_s64(a: int64x2_t, b: int64x2_t) -> int32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.raddhn.v2i32"
+        )]
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vraddhn.v2i32")]
+        fn _vraddhn_s64(a: int64x2_t, b: int64x2_t) -> int32x2_t;
+    }
+    unsafe { _vraddhn_s64(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s64)"]
+#[doc = "Rounding Add returning High Narrow."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_u16)"]
 #[inline]
-#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i16"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(raddhn)
 )]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_f16_s64(a: int64x1_t) -> float16x4_t {
-    unsafe {
-        let ret_val: float16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vraddhn_u16(a: uint16x8_t, b: uint16x8_t) -> uint8x8_t {
+    unsafe { transmute(vraddhn_s16(transmute(a), transmute(b))) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s64)"]
+#[doc = "Rounding Add returning High Narrow."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_u32)"]
 #[inline]
-#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i32"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(raddhn)
 )]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_f16_s64(a: int64x2_t) -> float16x8_t {
-    unsafe { transmute(a) }
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vraddhn_u32(a: uint32x4_t, b: uint32x4_t) -> uint16x4_t {
+    unsafe { transmute(vraddhn_s32(transmute(a), transmute(b))) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s64)"]
+#[doc = "Rounding Add returning High Narrow."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vraddhn_u64)"]
 #[inline]
-#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vraddhn.i64"))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(raddhn)
 )]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_f16_s64(a: int64x2_t) -> float16x8_t {
-    let a: int64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe {
-        let ret_val: float16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vraddhn_u64(a: uint64x2_t, b: uint64x2_t) -> uint32x2_t {
+    unsafe { transmute(vraddhn_s64(transmute(a), transmute(b))) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u8)"]
+#[doc = "Reciprocal estimate."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpe_f16)"]
 #[inline]
-#[cfg(target_endian = "little")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(frecpe)
 )]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_f16_u8(a: uint8x8_t) -> float16x4_t {
-    unsafe { transmute(a) }
+pub fn vrecpe_f16(a: float16x4_t) -> float16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v4f16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frecpe.v4f16"
+        )]
+        fn _vrecpe_f16(a: float16x4_t) -> float16x4_t;
+    }
+    unsafe { _vrecpe_f16(a) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u8)"]
+#[doc = "Reciprocal estimate."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpeq_f16)"]
 #[inline]
-#[cfg(target_endian = "big")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(frecpe)
 )]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_f16_u8(a: uint8x8_t) -> float16x4_t {
-    let a: uint8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: float16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+pub fn vrecpeq_f16(a: float16x8_t) -> float16x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v8f16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frecpe.v8f16"
+        )]
+        fn _vrecpeq_f16(a: float16x8_t) -> float16x8_t;
     }
+    unsafe { _vrecpeq_f16(a) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u8)"]
+#[doc = "Reciprocal estimate."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpe_f32)"]
 #[inline]
-#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(frecpe)
 )]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_f16_u8(a: uint8x16_t) -> float16x8_t {
-    unsafe { transmute(a) }
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vrecpe_f32(a: float32x2_t) -> float32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v2f32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frecpe.v2f32"
+        )]
+        fn _vrecpe_f32(a: float32x2_t) -> float32x2_t;
+    }
+    unsafe { _vrecpe_f32(a) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u8)"]
+#[doc = "Reciprocal estimate."]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpeq_f32)"]
 #[inline]
-#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(frecpe)
 )]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_f16_u8(a: uint8x16_t) -> float16x8_t {
-    let a: uint8x16_t =
-        unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: float16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vrecpeq_f32(a: float32x4_t) -> float32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v4f32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frecpe.v4f32"
+        )]
+        fn _vrecpeq_f32(a: float32x4_t) -> float32x4_t;
     }
+    unsafe { _vrecpeq_f32(a) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u16)"]
+#[doc = "Unsigned reciprocal estimate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpe_u32)"]
 #[inline]
-#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(urecpe)
 )]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_f16_u16(a: uint16x4_t) -> float16x4_t {
-    unsafe { transmute(a) }
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vrecpe_u32(a: uint32x2_t) -> uint32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v2i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.urecpe.v2i32"
+        )]
+        fn _vrecpe_u32(a: uint32x2_t) -> uint32x2_t;
+    }
+    unsafe { _vrecpe_u32(a) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u16)"]
+#[doc = "Unsigned reciprocal estimate"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpeq_u32)"]
 #[inline]
-#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(urecpe)
 )]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_f16_u16(a: uint16x4_t) -> float16x4_t {
-    let a: uint16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: float16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vrecpeq_u32(a: uint32x4_t) -> uint32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v4i32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.urecpe.v4i32"
+        )]
+        fn _vrecpeq_u32(a: uint32x4_t) -> uint32x4_t;
     }
+    unsafe { _vrecpeq_u32(a) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u16)"]
+#[doc = "Floating-point reciprocal step"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecps_f16)"]
 #[inline]
-#[cfg(target_endian = "little")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecps))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(frecps)
 )]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_f16_u16(a: uint16x8_t) -> float16x8_t {
-    unsafe { transmute(a) }
+pub fn vrecps_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecps.v4f16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frecps.v4f16"
+        )]
+        fn _vrecps_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t;
+    }
+    unsafe { _vrecps_f16(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u16)"]
+#[doc = "Floating-point reciprocal step"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpsq_f16)"]
 #[inline]
-#[cfg(target_endian = "big")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecps))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(frecps)
 )]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_f16_u16(a: uint16x8_t) -> float16x8_t {
-    let a: uint16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: float16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+pub fn vrecpsq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecps.v8f16")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frecps.v8f16"
+        )]
+        fn _vrecpsq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t;
     }
+    unsafe { _vrecpsq_f16(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u32)"]
+#[doc = "Floating-point reciprocal step"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecps_f32)"]
 #[inline]
-#[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecps))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(frecps)
 )]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_f16_u32(a: uint32x2_t) -> float16x4_t {
-    unsafe { transmute(a) }
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vrecps_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecps.v2f32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frecps.v2f32"
+        )]
+        fn _vrecps_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t;
+    }
+    unsafe { _vrecps_f32(a, b) }
 }
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u32)"]
+#[doc = "Floating-point reciprocal step"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrecpsq_f32)"]
 #[inline]
-#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecps))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
+    assert_instr(frecps)
 )]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
-#[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_f16_u32(a: uint32x2_t) -> float16x4_t {
-    let a: uint32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe {
-        let ret_val: float16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vrecpsq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecps.v4f32")]
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.frecps.v4f32"
+        )]
+        fn _vrecpsq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t;
     }
+    unsafe { _vrecpsq_f32(a, b) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_f16)"]
 #[inline]
 #[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
     assert_instr(nop)
 )]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_f16_u32(a: uint32x4_t) -> float16x8_t {
+pub fn vreinterpret_f32_f16(a: float16x4_t) -> float32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_f16)"]
 #[inline]
 #[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
     assert_instr(nop)
 )]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_f16_u32(a: uint32x4_t) -> float16x8_t {
-    let a: uint32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
+pub fn vreinterpret_f32_f16(a: float16x4_t) -> float32x2_t {
     unsafe {
-        let ret_val: float16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: float32x2_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_f16)"]
 #[inline]
 #[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
     assert_instr(nop)
 )]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_f16_u64(a: uint64x1_t) -> float16x4_t {
+pub fn vreinterpret_s8_f16(a: float16x4_t) -> int8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_f16)"]
 #[inline]
 #[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
     assert_instr(nop)
 )]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_f16_u64(a: uint64x1_t) -> float16x4_t {
+pub fn vreinterpret_s8_f16(a: float16x4_t) -> int8x8_t {
     unsafe {
-        let ret_val: float16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: int8x8_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_f16)"]
 #[inline]
 #[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
     assert_instr(nop)
 )]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_f16_u64(a: uint64x2_t) -> float16x8_t {
+pub fn vreinterpret_s32_f16(a: float16x4_t) -> int32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_f16)"]
 #[inline]
 #[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
     assert_instr(nop)
 )]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_f16_u64(a: uint64x2_t) -> float16x8_t {
-    let a: uint64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
+pub fn vreinterpret_s32_f16(a: float16x4_t) -> int32x2_t {
     unsafe {
-        let ret_val: float16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: int32x2_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_f16)"]
 #[inline]
 #[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
     assert_instr(nop)
 )]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_f16_p8(a: poly8x8_t) -> float16x4_t {
+pub fn vreinterpret_s64_f16(a: float16x4_t) -> int64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_f16)"]
 #[inline]
 #[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
     assert_instr(nop)
 )]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_f16_p8(a: poly8x8_t) -> float16x4_t {
-    let a: poly8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
+pub fn vreinterpret_s64_f16(a: float16x4_t) -> int64x1_t {
     unsafe {
-        let ret_val: float16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        transmute(a)
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_f16)"]
 #[inline]
 #[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
     assert_instr(nop)
 )]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_f16_p8(a: poly8x16_t) -> float16x8_t {
+pub fn vreinterpret_u8_f16(a: float16x4_t) -> uint8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_f16)"]
 #[inline]
 #[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
     assert_instr(nop)
 )]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_f16_p8(a: poly8x16_t) -> float16x8_t {
-    let a: poly8x16_t =
-        unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
+pub fn vreinterpret_u8_f16(a: float16x4_t) -> uint8x8_t {
     unsafe {
-        let ret_val: float16x8_t = transmute(a);
+        let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: uint8x8_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_f16)"]
 #[inline]
 #[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
     assert_instr(nop)
 )]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_f16_p16(a: poly16x4_t) -> float16x4_t {
+pub fn vreinterpret_u32_f16(a: float16x4_t) -> uint32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_f16)"]
 #[inline]
 #[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
     assert_instr(nop)
 )]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_f16_p16(a: poly16x4_t) -> float16x4_t {
-    let a: poly16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
+pub fn vreinterpret_u32_f16(a: float16x4_t) -> uint32x2_t {
     unsafe {
-        let ret_val: float16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: uint32x2_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_f16)"]
 #[inline]
 #[cfg(target_endian = "little")]
+#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
     assert_instr(nop)
 )]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_f16_p16(a: poly16x8_t) -> float16x8_t {
+pub fn vreinterpret_u64_f16(a: float16x4_t) -> uint64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_f16)"]
 #[inline]
 #[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
     assert_instr(nop)
 )]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_f16_p16(a: poly16x8_t) -> float16x8_t {
-    let a: poly16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
+pub fn vreinterpret_u64_f16(a: float16x4_t) -> uint64x1_t {
     unsafe {
-        let ret_val: float16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        transmute(a)
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p128)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_f16)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
     assert_instr(nop)
 )]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_f16_p128(a: p128) -> float16x8_t {
+pub fn vreinterpret_p8_f16(a: float16x4_t) -> poly8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p128)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_f16)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
     assert_instr(nop)
 )]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_f16_p128(a: p128) -> float16x8_t {
+pub fn vreinterpret_p8_f16(a: float16x4_t) -> poly8x8_t {
     unsafe {
-        let ret_val: float16x8_t = transmute(a);
+        let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: poly8x8_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_f16)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
     assert_instr(nop)
 )]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_p64_f16(a: float16x4_t) -> poly64x1_t {
+pub fn vreinterpretq_f32_f16(a: float16x8_t) -> float32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_f16)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
     assert_instr(nop)
 )]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_p64_f16(a: float16x4_t) -> poly64x1_t {
-    let a: float16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe { transmute(a) }
+pub fn vreinterpretq_f32_f16(a: float16x8_t) -> float32x4_t {
+    unsafe {
+        let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: float32x4_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_f16)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
     assert_instr(nop)
 )]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_p128_f16(a: float16x8_t) -> p128 {
+pub fn vreinterpretq_s8_f16(a: float16x8_t) -> int8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_f16)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
     assert_instr(nop)
 )]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_p128_f16(a: float16x8_t) -> p128 {
-    let a: float16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe { transmute(a) }
+pub fn vreinterpretq_s8_f16(a: float16x8_t) -> int8x16_t {
+    unsafe {
+        let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int8x16_t = transmute(a);
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
+    }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_f16)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
     assert_instr(nop)
 )]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_p64_f16(a: float16x8_t) -> poly64x2_t {
+pub fn vreinterpretq_s32_f16(a: float16x8_t) -> int32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_f16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_f16)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
     assert_instr(nop)
 )]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_p64_f16(a: float16x8_t) -> poly64x2_t {
-    let a: float16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
+pub fn vreinterpretq_s32_f16(a: float16x8_t) -> int32x4_t {
     unsafe {
-        let ret_val: poly64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int32x4_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_p64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_f16)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
     assert_instr(nop)
 )]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_f16_p64(a: poly64x1_t) -> float16x4_t {
+pub fn vreinterpretq_s64_f16(a: float16x8_t) -> int64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_p64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_f16)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
     assert_instr(nop)
 )]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpret_f16_p64(a: poly64x1_t) -> float16x4_t {
+pub fn vreinterpretq_s64_f16(a: float16x8_t) -> int64x2_t {
     unsafe {
-        let ret_val: float16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int64x2_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_f16)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
     assert_instr(nop)
 )]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_f16_p64(a: poly64x2_t) -> float16x8_t {
+pub fn vreinterpretq_u8_f16(a: float16x8_t) -> uint8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_f16)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
     assert_instr(nop)
 )]
-#[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
-pub fn vreinterpretq_f16_p64(a: poly64x2_t) -> float16x8_t {
-    let a: poly64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
+pub fn vreinterpretq_u8_f16(a: float16x8_t) -> uint8x16_t {
     unsafe {
-        let ret_val: float16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint8x16_t = transmute(a);
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p128)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_f16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -43898,17 +38340,18 @@ pub fn vreinterpretq_f16_p64(a: poly64x2_t) -> float16x8_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_f32_p128(a: p128) -> float32x4_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpretq_u32_f16(a: float16x8_t) -> uint32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p128)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_f16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -43920,20 +38363,22 @@ pub fn vreinterpretq_f32_p128(a: p128) -> float32x4_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_f32_p128(a: p128) -> float32x4_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpretq_u32_f16(a: float16x8_t) -> uint32x4_t {
     unsafe {
-        let ret_val: float32x4_t = transmute(a);
+        let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint32x4_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_f16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -43945,17 +38390,18 @@ pub fn vreinterpretq_f32_p128(a: p128) -> float32x4_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s8_f32(a: float32x2_t) -> int8x8_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpretq_u64_f16(a: float16x8_t) -> uint64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_f16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -43967,21 +38413,22 @@ pub fn vreinterpret_s8_f32(a: float32x2_t) -> int8x8_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s8_f32(a: float32x2_t) -> int8x8_t {
-    let a: float32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpretq_u64_f16(a: float16x8_t) -> uint64x2_t {
     unsafe {
-        let ret_val: int8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint64x2_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_f16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -43993,17 +38440,18 @@ pub fn vreinterpret_s8_f32(a: float32x2_t) -> int8x8_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s16_f32(a: float32x2_t) -> int16x4_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpretq_p8_f16(a: float16x8_t) -> poly8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_f16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -44015,21 +38463,26 @@ pub fn vreinterpret_s16_f32(a: float32x2_t) -> int16x4_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s16_f32(a: float32x2_t) -> int16x4_t {
-    let a: float32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpretq_p8_f16(a: float16x8_t) -> poly8x16_t {
     unsafe {
-        let ret_val: int16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: poly8x16_t = transmute(a);
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_f32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -44041,17 +38494,18 @@ pub fn vreinterpret_s16_f32(a: float32x2_t) -> int16x4_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s32_f32(a: float32x2_t) -> int32x2_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpret_f16_f32(a: float32x2_t) -> float16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_f32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -44063,21 +38517,22 @@ pub fn vreinterpret_s32_f32(a: float32x2_t) -> int32x2_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s32_f32(a: float32x2_t) -> int32x2_t {
-    let a: float32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpret_f16_f32(a: float32x2_t) -> float16x4_t {
     unsafe {
-        let ret_val: int32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let a: float32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let ret_val: float16x4_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_f32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -44089,17 +38544,18 @@ pub fn vreinterpret_s32_f32(a: float32x2_t) -> int32x2_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s64_f32(a: float32x2_t) -> int64x1_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpretq_f16_f32(a: float32x4_t) -> float16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_f32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -44111,18 +38567,22 @@ pub fn vreinterpret_s64_f32(a: float32x2_t) -> int64x1_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s64_f32(a: float32x2_t) -> int64x1_t {
-    let a: float32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe { transmute(a) }
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpretq_f16_f32(a: float32x4_t) -> float16x8_t {
+    unsafe {
+        let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: float16x8_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -44134,17 +38594,18 @@ pub fn vreinterpret_s64_f32(a: float32x2_t) -> int64x1_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u8_f32(a: float32x2_t) -> uint8x8_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpret_f16_s8(a: int8x8_t) -> float16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -44156,21 +38617,22 @@ pub fn vreinterpret_u8_f32(a: float32x2_t) -> uint8x8_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u8_f32(a: float32x2_t) -> uint8x8_t {
-    let a: float32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpret_f16_s8(a: int8x8_t) -> float16x4_t {
     unsafe {
-        let ret_val: uint8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: float16x4_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -44182,17 +38644,18 @@ pub fn vreinterpret_u8_f32(a: float32x2_t) -> uint8x8_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u16_f32(a: float32x2_t) -> uint16x4_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpretq_f16_s8(a: int8x16_t) -> float16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -44204,21 +38667,23 @@ pub fn vreinterpret_u16_f32(a: float32x2_t) -> uint16x4_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u16_f32(a: float32x2_t) -> uint16x4_t {
-    let a: float32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpretq_f16_s8(a: int8x16_t) -> float16x8_t {
     unsafe {
-        let ret_val: uint16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let a: int8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: float16x8_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -44230,17 +38695,18 @@ pub fn vreinterpret_u16_f32(a: float32x2_t) -> uint16x4_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u32_f32(a: float32x2_t) -> uint32x2_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpret_f16_s32(a: int32x2_t) -> float16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -44252,21 +38718,22 @@ pub fn vreinterpret_u32_f32(a: float32x2_t) -> uint32x2_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u32_f32(a: float32x2_t) -> uint32x2_t {
-    let a: float32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpret_f16_s32(a: int32x2_t) -> float16x4_t {
     unsafe {
-        let ret_val: uint32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let a: int32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let ret_val: float16x4_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -44278,17 +38745,18 @@ pub fn vreinterpret_u32_f32(a: float32x2_t) -> uint32x2_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u64_f32(a: float32x2_t) -> uint64x1_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpretq_f16_s32(a: int32x4_t) -> float16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -44300,18 +38768,22 @@ pub fn vreinterpret_u64_f32(a: float32x2_t) -> uint64x1_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u64_f32(a: float32x2_t) -> uint64x1_t {
-    let a: float32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe { transmute(a) }
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpretq_f16_s32(a: int32x4_t) -> float16x8_t {
+    unsafe {
+        let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: float16x8_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -44323,17 +38795,18 @@ pub fn vreinterpret_u64_f32(a: float32x2_t) -> uint64x1_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p8_f32(a: float32x2_t) -> poly8x8_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpret_f16_s64(a: int64x1_t) -> float16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -44345,21 +38818,21 @@ pub fn vreinterpret_p8_f32(a: float32x2_t) -> poly8x8_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p8_f32(a: float32x2_t) -> poly8x8_t {
-    let a: float32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpret_f16_s64(a: int64x1_t) -> float16x4_t {
     unsafe {
-        let ret_val: poly8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let ret_val: float16x4_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -44371,17 +38844,18 @@ pub fn vreinterpret_p8_f32(a: float32x2_t) -> poly8x8_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p16_f32(a: float32x2_t) -> poly16x4_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpretq_f16_s64(a: int64x2_t) -> float16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -44393,21 +38867,22 @@ pub fn vreinterpret_p16_f32(a: float32x2_t) -> poly16x4_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p16_f32(a: float32x2_t) -> poly16x4_t {
-    let a: float32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpretq_f16_s64(a: int64x2_t) -> float16x8_t {
     unsafe {
-        let ret_val: poly16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let a: int64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let ret_val: float16x8_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -44419,17 +38894,18 @@ pub fn vreinterpret_p16_f32(a: float32x2_t) -> poly16x4_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p128_f32(a: float32x4_t) -> p128 {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpret_f16_u8(a: uint8x8_t) -> float16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -44441,18 +38917,22 @@ pub fn vreinterpretq_p128_f32(a: float32x4_t) -> p128 {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p128_f32(a: float32x4_t) -> p128 {
-    let a: float32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe { transmute(a) }
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpret_f16_u8(a: uint8x8_t) -> float16x4_t {
+    unsafe {
+        let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: float16x4_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -44464,17 +38944,18 @@ pub fn vreinterpretq_p128_f32(a: float32x4_t) -> p128 {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s8_f32(a: float32x4_t) -> int8x16_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpretq_f16_u8(a: uint8x16_t) -> float16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -44486,25 +38967,23 @@ pub fn vreinterpretq_s8_f32(a: float32x4_t) -> int8x16_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s8_f32(a: float32x4_t) -> int8x16_t {
-    let a: float32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpretq_f16_u8(a: uint8x16_t) -> float16x8_t {
     unsafe {
-        let ret_val: int8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
+        let a: uint8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: float16x8_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -44516,17 +38995,18 @@ pub fn vreinterpretq_s8_f32(a: float32x4_t) -> int8x16_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s16_f32(a: float32x4_t) -> int16x8_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpret_f16_u32(a: uint32x2_t) -> float16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -44538,21 +39018,22 @@ pub fn vreinterpretq_s16_f32(a: float32x4_t) -> int16x8_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s16_f32(a: float32x4_t) -> int16x8_t {
-    let a: float32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpret_f16_u32(a: uint32x2_t) -> float16x4_t {
     unsafe {
-        let ret_val: int16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let ret_val: float16x4_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -44564,17 +39045,18 @@ pub fn vreinterpretq_s16_f32(a: float32x4_t) -> int16x8_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s32_f32(a: float32x4_t) -> int32x4_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpretq_f16_u32(a: uint32x4_t) -> float16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -44586,21 +39068,22 @@ pub fn vreinterpretq_s32_f32(a: float32x4_t) -> int32x4_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s32_f32(a: float32x4_t) -> int32x4_t {
-    let a: float32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpretq_f16_u32(a: uint32x4_t) -> float16x8_t {
     unsafe {
-        let ret_val: int32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: float16x8_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -44612,17 +39095,18 @@ pub fn vreinterpretq_s32_f32(a: float32x4_t) -> int32x4_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s64_f32(a: float32x4_t) -> int64x2_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpret_f16_u64(a: uint64x1_t) -> float16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -44634,21 +39118,21 @@ pub fn vreinterpretq_s64_f32(a: float32x4_t) -> int64x2_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s64_f32(a: float32x4_t) -> int64x2_t {
-    let a: float32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpret_f16_u64(a: uint64x1_t) -> float16x4_t {
     unsafe {
-        let ret_val: int64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let ret_val: float16x4_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -44660,17 +39144,18 @@ pub fn vreinterpretq_s64_f32(a: float32x4_t) -> int64x2_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u8_f32(a: float32x4_t) -> uint8x16_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpretq_f16_u64(a: uint64x2_t) -> float16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -44682,25 +39167,22 @@ pub fn vreinterpretq_u8_f32(a: float32x4_t) -> uint8x16_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u8_f32(a: float32x4_t) -> uint8x16_t {
-    let a: float32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpretq_f16_u64(a: uint64x2_t) -> float16x8_t {
     unsafe {
-        let ret_val: uint8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
+        let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let ret_val: float16x8_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_p8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -44712,17 +39194,18 @@ pub fn vreinterpretq_u8_f32(a: float32x4_t) -> uint8x16_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u16_f32(a: float32x4_t) -> uint16x8_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpret_f16_p8(a: poly8x8_t) -> float16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_p8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -44734,21 +39217,22 @@ pub fn vreinterpretq_u16_f32(a: float32x4_t) -> uint16x8_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u16_f32(a: float32x4_t) -> uint16x8_t {
-    let a: float32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpret_f16_p8(a: poly8x8_t) -> float16x4_t {
     unsafe {
-        let ret_val: uint16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: float16x4_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -44760,17 +39244,18 @@ pub fn vreinterpretq_u16_f32(a: float32x4_t) -> uint16x8_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u32_f32(a: float32x4_t) -> uint32x4_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpretq_f16_p8(a: poly8x16_t) -> float16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -44782,23 +39267,24 @@ pub fn vreinterpretq_u32_f32(a: float32x4_t) -> uint32x4_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u32_f32(a: float32x4_t) -> uint32x4_t {
-    let a: float32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpretq_f16_p8(a: poly8x16_t) -> float16x8_t {
     unsafe {
-        let ret_val: uint32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let a: poly8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: float16x8_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_f16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -44808,19 +39294,19 @@ pub fn vreinterpretq_u32_f32(a: float32x4_t) -> uint32x4_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u64_f32(a: float32x4_t) -> uint64x2_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpret_s16_f16(a: float16x4_t) -> int16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_f16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -44830,23 +39316,19 @@ pub fn vreinterpretq_u64_f32(a: float32x4_t) -> uint64x2_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u64_f32(a: float32x4_t) -> uint64x2_t {
-    let a: float32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpret_u16_f16(a: float16x4_t) -> uint16x4_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_f16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -44856,19 +39338,19 @@ pub fn vreinterpretq_u64_f32(a: float32x4_t) -> uint64x2_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p8_f32(a: float32x4_t) -> poly8x16_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpret_p16_f16(a: float16x4_t) -> poly16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_f16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -44878,27 +39360,19 @@ pub fn vreinterpretq_p8_f32(a: float32x4_t) -> poly8x16_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p8_f32(a: float32x4_t) -> poly8x16_t {
-    let a: float32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: poly8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpretq_s16_f16(a: float16x8_t) -> int16x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_f16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -44908,19 +39382,19 @@ pub fn vreinterpretq_p8_f32(a: float32x4_t) -> poly8x16_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p16_f32(a: float32x4_t) -> poly16x8_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpretq_u16_f16(a: float16x8_t) -> uint16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_f32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_f16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -44930,23 +39404,19 @@ pub fn vreinterpretq_p16_f32(a: float32x4_t) -> poly16x8_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p16_f32(a: float32x4_t) -> poly16x8_t {
-    let a: float32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: poly16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpretq_p16_f16(a: float16x8_t) -> poly16x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_s16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -44956,19 +39426,19 @@ pub fn vreinterpretq_p16_f32(a: float32x4_t) -> poly16x8_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_f32_s8(a: int8x8_t) -> float32x2_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpret_f16_s16(a: int16x4_t) -> float16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_s16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -44978,23 +39448,19 @@ pub fn vreinterpret_f32_s8(a: int8x8_t) -> float32x2_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_f32_s8(a: int8x8_t) -> float32x2_t {
-    let a: int8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: float32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpretq_f16_s16(a: int16x8_t) -> float16x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_u16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -45004,19 +39470,19 @@ pub fn vreinterpret_f32_s8(a: int8x8_t) -> float32x2_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s16_s8(a: int8x8_t) -> int16x4_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpret_f16_u16(a: uint16x4_t) -> float16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_u16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -45026,23 +39492,19 @@ pub fn vreinterpret_s16_s8(a: int8x8_t) -> int16x4_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s16_s8(a: int8x8_t) -> int16x4_t {
-    let a: int8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: int16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpretq_f16_u16(a: uint16x8_t) -> float16x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_p16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -45052,19 +39514,19 @@ pub fn vreinterpret_s16_s8(a: int8x8_t) -> int16x4_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s32_s8(a: int8x8_t) -> int32x2_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpret_f16_p16(a: poly16x4_t) -> float16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -45074,23 +39536,19 @@ pub fn vreinterpret_s32_s8(a: int8x8_t) -> int32x2_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s32_s8(a: int8x8_t) -> int32x2_t {
-    let a: int8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: int32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpretq_f16_p16(a: poly16x8_t) -> float16x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_f32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -45106,13 +39564,12 @@ pub fn vreinterpret_s32_s8(a: int8x8_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s64_s8(a: int8x8_t) -> int64x1_t {
+pub fn vreinterpret_s32_f32(a: float32x2_t) -> int32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_f32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -45128,14 +39585,12 @@ pub fn vreinterpret_s64_s8(a: int8x8_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s64_s8(a: int8x8_t) -> int64x1_t {
-    let a: int8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
+pub fn vreinterpret_u32_f32(a: float32x2_t) -> uint32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_f32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -45151,13 +39606,12 @@ pub fn vreinterpret_s64_s8(a: int8x8_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u8_s8(a: int8x8_t) -> uint8x8_t {
+pub fn vreinterpretq_s32_f32(a: float32x4_t) -> int32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_f32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -45173,17 +39627,12 @@ pub fn vreinterpret_u8_s8(a: int8x8_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u8_s8(a: int8x8_t) -> uint8x8_t {
-    let a: int8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+pub fn vreinterpretq_u32_f32(a: float32x4_t) -> uint32x4_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -45199,13 +39648,12 @@ pub fn vreinterpret_u8_s8(a: int8x8_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u16_s8(a: int8x8_t) -> uint16x4_t {
+pub fn vreinterpret_u8_s8(a: int8x8_t) -> uint8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -45221,17 +39669,12 @@ pub fn vreinterpret_u16_s8(a: int8x8_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u16_s8(a: int8x8_t) -> uint16x4_t {
-    let a: int8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+pub fn vreinterpret_p8_s8(a: int8x8_t) -> poly8x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -45247,13 +39690,12 @@ pub fn vreinterpret_u16_s8(a: int8x8_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u32_s8(a: int8x8_t) -> uint32x2_t {
+pub fn vreinterpretq_u8_s8(a: int8x16_t) -> uint8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -45269,17 +39711,12 @@ pub fn vreinterpret_u32_s8(a: int8x8_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u32_s8(a: int8x8_t) -> uint32x2_t {
-    let a: int8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vreinterpretq_p8_s8(a: int8x16_t) -> poly8x16_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -45295,13 +39732,12 @@ pub fn vreinterpret_u32_s8(a: int8x8_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u64_s8(a: int8x8_t) -> uint64x1_t {
+pub fn vreinterpret_u16_s16(a: int16x4_t) -> uint16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -45317,14 +39753,12 @@ pub fn vreinterpret_u64_s8(a: int8x8_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u64_s8(a: int8x8_t) -> uint64x1_t {
-    let a: int8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
+pub fn vreinterpret_p16_s16(a: int16x4_t) -> poly16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -45340,13 +39774,12 @@ pub fn vreinterpret_u64_s8(a: int8x8_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p8_s8(a: int8x8_t) -> poly8x8_t {
+pub fn vreinterpretq_u16_s16(a: int16x8_t) -> uint16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -45362,17 +39795,12 @@ pub fn vreinterpret_p8_s8(a: int8x8_t) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p8_s8(a: int8x8_t) -> poly8x8_t {
-    let a: int8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: poly8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+pub fn vreinterpretq_p16_s16(a: int16x8_t) -> poly16x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -45388,13 +39816,12 @@ pub fn vreinterpret_p8_s8(a: int8x8_t) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p16_s8(a: int8x8_t) -> poly16x4_t {
+pub fn vreinterpret_f32_s32(a: int32x2_t) -> float32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -45410,17 +39837,12 @@ pub fn vreinterpret_p16_s8(a: int8x8_t) -> poly16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p16_s8(a: int8x8_t) -> poly16x4_t {
-    let a: int8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: poly16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+pub fn vreinterpret_u32_s32(a: int32x2_t) -> uint32x2_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -45436,13 +39858,12 @@ pub fn vreinterpret_p16_s8(a: int8x8_t) -> poly16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_f32_s8(a: int8x16_t) -> float32x4_t {
+pub fn vreinterpretq_f32_s32(a: int32x4_t) -> float32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -45458,18 +39879,12 @@ pub fn vreinterpretq_f32_s8(a: int8x16_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_f32_s8(a: int8x16_t) -> float32x4_t {
-    let a: int8x16_t =
-        unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: float32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+pub fn vreinterpretq_u32_s32(a: int32x4_t) -> uint32x4_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -45485,13 +39900,12 @@ pub fn vreinterpretq_f32_s8(a: int8x16_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s16_s8(a: int8x16_t) -> int16x8_t {
+pub fn vreinterpret_u64_s64(a: int64x1_t) -> uint64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -45507,18 +39921,12 @@ pub fn vreinterpretq_s16_s8(a: int8x16_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s16_s8(a: int8x16_t) -> int16x8_t {
-    let a: int8x16_t =
-        unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: int16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+pub fn vreinterpretq_u64_s64(a: int64x2_t) -> uint64x2_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -45534,13 +39942,12 @@ pub fn vreinterpretq_s16_s8(a: int8x16_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s32_s8(a: int8x16_t) -> int32x4_t {
+pub fn vreinterpret_s8_u8(a: uint8x8_t) -> int8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -45556,18 +39963,12 @@ pub fn vreinterpretq_s32_s8(a: int8x16_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s32_s8(a: int8x16_t) -> int32x4_t {
-    let a: int8x16_t =
-        unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: int32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+pub fn vreinterpret_p8_u8(a: uint8x8_t) -> poly8x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -45583,13 +39984,12 @@ pub fn vreinterpretq_s32_s8(a: int8x16_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s64_s8(a: int8x16_t) -> int64x2_t {
+pub fn vreinterpretq_s8_u8(a: uint8x16_t) -> int8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -45605,18 +40005,12 @@ pub fn vreinterpretq_s64_s8(a: int8x16_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s64_s8(a: int8x16_t) -> int64x2_t {
-    let a: int8x16_t =
-        unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: int64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vreinterpretq_p8_u8(a: uint8x16_t) -> poly8x16_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -45632,13 +40026,12 @@ pub fn vreinterpretq_s64_s8(a: int8x16_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u8_s8(a: int8x16_t) -> uint8x16_t {
+pub fn vreinterpret_s16_u16(a: uint16x4_t) -> int16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -45654,22 +40047,12 @@ pub fn vreinterpretq_u8_s8(a: int8x16_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u8_s8(a: int8x16_t) -> uint8x16_t {
-    let a: int8x16_t =
-        unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
+pub fn vreinterpret_p16_u16(a: uint16x4_t) -> poly16x4_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -45685,13 +40068,12 @@ pub fn vreinterpretq_u8_s8(a: int8x16_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u16_s8(a: int8x16_t) -> uint16x8_t {
+pub fn vreinterpretq_s16_u16(a: uint16x8_t) -> int16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -45707,18 +40089,12 @@ pub fn vreinterpretq_u16_s8(a: int8x16_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u16_s8(a: int8x16_t) -> uint16x8_t {
-    let a: int8x16_t =
-        unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+pub fn vreinterpretq_p16_u16(a: uint16x8_t) -> poly16x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -45734,13 +40110,12 @@ pub fn vreinterpretq_u16_s8(a: int8x16_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u32_s8(a: int8x16_t) -> uint32x4_t {
+pub fn vreinterpret_f32_u32(a: uint32x2_t) -> float32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -45756,18 +40131,12 @@ pub fn vreinterpretq_u32_s8(a: int8x16_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u32_s8(a: int8x16_t) -> uint32x4_t {
-    let a: int8x16_t =
-        unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
+pub fn vreinterpret_s32_u32(a: uint32x2_t) -> int32x2_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -45783,13 +40152,12 @@ pub fn vreinterpretq_u32_s8(a: int8x16_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u64_s8(a: int8x16_t) -> uint64x2_t {
+pub fn vreinterpretq_f32_u32(a: uint32x4_t) -> float32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u32)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -45805,18 +40173,12 @@ pub fn vreinterpretq_u64_s8(a: int8x16_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u64_s8(a: int8x16_t) -> uint64x2_t {
-    let a: int8x16_t =
-        unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vreinterpretq_s32_u32(a: uint32x4_t) -> int32x4_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -45832,13 +40194,12 @@ pub fn vreinterpretq_u64_s8(a: int8x16_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p8_s8(a: int8x16_t) -> poly8x16_t {
+pub fn vreinterpret_s64_u64(a: uint64x1_t) -> int64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u64)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -45854,22 +40215,12 @@ pub fn vreinterpretq_p8_s8(a: int8x16_t) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p8_s8(a: int8x16_t) -> poly8x16_t {
-    let a: int8x16_t =
-        unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: poly8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
-    }
+pub fn vreinterpretq_s64_u64(a: uint64x2_t) -> int64x2_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_p8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -45885,13 +40236,12 @@ pub fn vreinterpretq_p8_s8(a: int8x16_t) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p16_s8(a: int8x16_t) -> poly16x8_t {
+pub fn vreinterpret_s8_p8(a: poly8x8_t) -> int8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_p8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -45907,18 +40257,12 @@ pub fn vreinterpretq_p16_s8(a: int8x16_t) -> poly16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p16_s8(a: int8x16_t) -> poly16x8_t {
-    let a: int8x16_t =
-        unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: poly16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+pub fn vreinterpret_u8_p8(a: poly8x8_t) -> uint8x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -45934,13 +40278,12 @@ pub fn vreinterpretq_p16_s8(a: int8x16_t) -> poly16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_f32_s16(a: int16x4_t) -> float32x2_t {
+pub fn vreinterpretq_s8_p8(a: poly8x16_t) -> int8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p8)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -45956,17 +40299,12 @@ pub fn vreinterpret_f32_s16(a: int16x4_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_f32_s16(a: int16x4_t) -> float32x2_t {
-    let a: int16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: float32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vreinterpretq_u8_p8(a: poly8x16_t) -> uint8x16_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_p16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -45982,13 +40320,12 @@ pub fn vreinterpret_f32_s16(a: int16x4_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s8_s16(a: int16x4_t) -> int8x8_t {
+pub fn vreinterpret_s16_p16(a: poly16x4_t) -> int16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_p16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -46004,17 +40341,12 @@ pub fn vreinterpret_s8_s16(a: int16x4_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s8_s16(a: int16x4_t) -> int8x8_t {
-    let a: int16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: int8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
+pub fn vreinterpret_u16_p16(a: poly16x4_t) -> uint16x4_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -46030,13 +40362,12 @@ pub fn vreinterpret_s8_s16(a: int16x4_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s32_s16(a: int16x4_t) -> int32x2_t {
+pub fn vreinterpretq_s16_p16(a: poly16x8_t) -> int16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p16)"]
 #[inline]
-#[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -46052,19 +40383,15 @@ pub fn vreinterpret_s32_s16(a: int16x4_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s32_s16(a: int16x4_t) -> int32x2_t {
-    let a: int16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: int32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
+pub fn vreinterpretq_u16_p16(a: poly16x8_t) -> uint16x8_t {
+    unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p128)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -46072,21 +40399,22 @@ pub fn vreinterpret_s32_s16(a: int16x4_t) -> int32x2_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s64_s16(a: int16x4_t) -> int64x1_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpretq_f16_p128(a: p128) -> float16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p128)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -46094,22 +40422,25 @@ pub fn vreinterpret_s64_s16(a: int16x4_t) -> int64x1_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s64_s16(a: int16x4_t) -> int64x1_t {
-    let a: int16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe { transmute(a) }
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpretq_f16_p128(a: p128) -> float16x8_t {
+    unsafe {
+        let ret_val: float16x8_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_f16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -46117,21 +40448,22 @@ pub fn vreinterpret_s64_s16(a: int16x4_t) -> int64x1_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u8_s16(a: int16x4_t) -> uint8x8_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpret_p64_f16(a: float16x4_t) -> poly64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_f16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -46139,25 +40471,25 @@ pub fn vreinterpret_u8_s16(a: int16x4_t) -> uint8x8_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u8_s16(a: int16x4_t) -> uint8x8_t {
-    let a: int16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpret_p64_f16(a: float16x4_t) -> poly64x1_t {
     unsafe {
-        let ret_val: uint8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let a: float16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        transmute(a)
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_f16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -46165,21 +40497,22 @@ pub fn vreinterpret_u8_s16(a: int16x4_t) -> uint8x8_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u16_s16(a: int16x4_t) -> uint16x4_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpretq_p128_f16(a: float16x8_t) -> p128 {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_f16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -46187,25 +40520,25 @@ pub fn vreinterpret_u16_s16(a: int16x4_t) -> uint16x4_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u16_s16(a: int16x4_t) -> uint16x4_t {
-    let a: int16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpretq_p128_f16(a: float16x8_t) -> p128 {
     unsafe {
-        let ret_val: uint16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        transmute(a)
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_f16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -46213,21 +40546,22 @@ pub fn vreinterpret_u16_s16(a: int16x4_t) -> uint16x4_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u32_s16(a: int16x4_t) -> uint32x2_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpretq_p64_f16(a: float16x8_t) -> poly64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_f16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -46235,25 +40569,26 @@ pub fn vreinterpret_u32_s16(a: int16x4_t) -> uint32x2_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u32_s16(a: int16x4_t) -> uint32x2_t {
-    let a: int16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpretq_p64_f16(a: float16x8_t) -> poly64x2_t {
     unsafe {
-        let ret_val: uint32x2_t = transmute(a);
+        let a: float16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: poly64x2_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_p64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -46261,21 +40596,22 @@ pub fn vreinterpret_u32_s16(a: int16x4_t) -> uint32x2_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u64_s16(a: int16x4_t) -> uint64x1_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpret_f16_p64(a: poly64x1_t) -> float16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f16_p64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -46283,22 +40619,25 @@ pub fn vreinterpret_u64_s16(a: int16x4_t) -> uint64x1_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u64_s16(a: int16x4_t) -> uint64x1_t {
-    let a: int16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe { transmute(a) }
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpret_f16_p64(a: poly64x1_t) -> float16x4_t {
+    unsafe {
+        let ret_val: float16x4_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -46306,21 +40645,22 @@ pub fn vreinterpret_u64_s16(a: int16x4_t) -> uint64x1_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p8_s16(a: int16x4_t) -> poly8x8_t {
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpretq_f16_p64(a: poly64x2_t) -> float16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f16_p64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -46328,21 +40668,22 @@ pub fn vreinterpret_p8_s16(a: int16x4_t) -> poly8x8_t {
 )]
 #[cfg_attr(
     not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
 )]
 #[cfg_attr(
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p8_s16(a: int16x4_t) -> poly8x8_t {
-    let a: int16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
+#[cfg(not(target_arch = "arm64ec"))]
+pub fn vreinterpretq_f16_p64(a: poly64x2_t) -> float16x8_t {
     unsafe {
-        let ret_val: poly8x8_t = transmute(a);
+        let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let ret_val: float16x8_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p128)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -46360,11 +40701,11 @@ pub fn vreinterpret_p8_s16(a: int16x4_t) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p16_s16(a: int16x4_t) -> poly16x4_t {
+pub fn vreinterpretq_f32_p128(a: p128) -> float32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p128)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -46382,15 +40723,14 @@ pub fn vreinterpret_p16_s16(a: int16x4_t) -> poly16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p16_s16(a: int16x4_t) -> poly16x4_t {
-    let a: int16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
+pub fn vreinterpretq_f32_p128(a: p128) -> float32x4_t {
     unsafe {
-        let ret_val: poly16x4_t = transmute(a);
+        let ret_val: float32x4_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_f32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -46408,11 +40748,11 @@ pub fn vreinterpret_p16_s16(a: int16x4_t) -> poly16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_f32_s16(a: int16x8_t) -> float32x4_t {
+pub fn vreinterpret_s8_f32(a: float32x2_t) -> int8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_f32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -46430,15 +40770,15 @@ pub fn vreinterpretq_f32_s16(a: int16x8_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_f32_s16(a: int16x8_t) -> float32x4_t {
-    let a: int16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
+pub fn vreinterpret_s8_f32(a: float32x2_t) -> int8x8_t {
     unsafe {
-        let ret_val: float32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let a: float32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let ret_val: int8x8_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_f32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -46456,11 +40796,11 @@ pub fn vreinterpretq_f32_s16(a: int16x8_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s8_s16(a: int16x8_t) -> int8x16_t {
+pub fn vreinterpret_s16_f32(a: float32x2_t) -> int16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_f32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -46478,19 +40818,15 @@ pub fn vreinterpretq_s8_s16(a: int16x8_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s8_s16(a: int16x8_t) -> int8x16_t {
-    let a: int16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
+pub fn vreinterpret_s16_f32(a: float32x2_t) -> int16x4_t {
     unsafe {
-        let ret_val: int8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
+        let a: float32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let ret_val: int16x4_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_f32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -46508,11 +40844,11 @@ pub fn vreinterpretq_s8_s16(a: int16x8_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s32_s16(a: int16x8_t) -> int32x4_t {
+pub fn vreinterpret_s64_f32(a: float32x2_t) -> int64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_f32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -46530,15 +40866,14 @@ pub fn vreinterpretq_s32_s16(a: int16x8_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s32_s16(a: int16x8_t) -> int32x4_t {
-    let a: int16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
+pub fn vreinterpret_s64_f32(a: float32x2_t) -> int64x1_t {
     unsafe {
-        let ret_val: int32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let a: float32x2_t = simd_shuffle!(a, a, [1, 0]);
+        transmute(a)
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_f32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -46556,11 +40891,11 @@ pub fn vreinterpretq_s32_s16(a: int16x8_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s64_s16(a: int16x8_t) -> int64x2_t {
+pub fn vreinterpret_u8_f32(a: float32x2_t) -> uint8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_f32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -46578,15 +40913,15 @@ pub fn vreinterpretq_s64_s16(a: int16x8_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s64_s16(a: int16x8_t) -> int64x2_t {
-    let a: int16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
+pub fn vreinterpret_u8_f32(a: float32x2_t) -> uint8x8_t {
     unsafe {
-        let ret_val: int64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let a: float32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let ret_val: uint8x8_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_f32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -46604,11 +40939,11 @@ pub fn vreinterpretq_s64_s16(a: int16x8_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u8_s16(a: int16x8_t) -> uint8x16_t {
+pub fn vreinterpret_u16_f32(a: float32x2_t) -> uint16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_f32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -46626,19 +40961,15 @@ pub fn vreinterpretq_u8_s16(a: int16x8_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u8_s16(a: int16x8_t) -> uint8x16_t {
-    let a: int16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
+pub fn vreinterpret_u16_f32(a: float32x2_t) -> uint16x4_t {
     unsafe {
-        let ret_val: uint8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
+        let a: float32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let ret_val: uint16x4_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_f32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -46656,11 +40987,11 @@ pub fn vreinterpretq_u8_s16(a: int16x8_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u16_s16(a: int16x8_t) -> uint16x8_t {
+pub fn vreinterpret_u64_f32(a: float32x2_t) -> uint64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_f32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -46678,15 +41009,14 @@ pub fn vreinterpretq_u16_s16(a: int16x8_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u16_s16(a: int16x8_t) -> uint16x8_t {
-    let a: int16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
+pub fn vreinterpret_u64_f32(a: float32x2_t) -> uint64x1_t {
     unsafe {
-        let ret_val: uint16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let a: float32x2_t = simd_shuffle!(a, a, [1, 0]);
+        transmute(a)
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_f32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -46704,11 +41034,11 @@ pub fn vreinterpretq_u16_s16(a: int16x8_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u32_s16(a: int16x8_t) -> uint32x4_t {
+pub fn vreinterpret_p8_f32(a: float32x2_t) -> poly8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_f32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -46726,15 +41056,15 @@ pub fn vreinterpretq_u32_s16(a: int16x8_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u32_s16(a: int16x8_t) -> uint32x4_t {
-    let a: int16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
+pub fn vreinterpret_p8_f32(a: float32x2_t) -> poly8x8_t {
     unsafe {
-        let ret_val: uint32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let a: float32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let ret_val: poly8x8_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_f32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -46752,11 +41082,11 @@ pub fn vreinterpretq_u32_s16(a: int16x8_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u64_s16(a: int16x8_t) -> uint64x2_t {
+pub fn vreinterpret_p16_f32(a: float32x2_t) -> poly16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_f32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -46774,15 +41104,15 @@ pub fn vreinterpretq_u64_s16(a: int16x8_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u64_s16(a: int16x8_t) -> uint64x2_t {
-    let a: int16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
+pub fn vreinterpret_p16_f32(a: float32x2_t) -> poly16x4_t {
     unsafe {
-        let ret_val: uint64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let a: float32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let ret_val: poly16x4_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_f32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -46800,11 +41130,11 @@ pub fn vreinterpretq_u64_s16(a: int16x8_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p8_s16(a: int16x8_t) -> poly8x16_t {
+pub fn vreinterpretq_p128_f32(a: float32x4_t) -> p128 {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_f32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -46822,19 +41152,14 @@ pub fn vreinterpretq_p8_s16(a: int16x8_t) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p8_s16(a: int16x8_t) -> poly8x16_t {
-    let a: int16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
+pub fn vreinterpretq_p128_f32(a: float32x4_t) -> p128 {
     unsafe {
-        let ret_val: poly8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
+        let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        transmute(a)
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_f32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -46852,11 +41177,11 @@ pub fn vreinterpretq_p8_s16(a: int16x8_t) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p16_s16(a: int16x8_t) -> poly16x8_t {
+pub fn vreinterpretq_s8_f32(a: float32x4_t) -> int8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_f32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -46874,15 +41199,19 @@ pub fn vreinterpretq_p16_s16(a: int16x8_t) -> poly16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p16_s16(a: int16x8_t) -> poly16x8_t {
-    let a: int16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
+pub fn vreinterpretq_s8_f32(a: float32x4_t) -> int8x16_t {
     unsafe {
-        let ret_val: poly16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: int8x16_t = transmute(a);
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_f32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -46900,11 +41229,11 @@ pub fn vreinterpretq_p16_s16(a: int16x8_t) -> poly16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_f32_s32(a: int32x2_t) -> float32x2_t {
+pub fn vreinterpretq_s16_f32(a: float32x4_t) -> int16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_f32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -46922,15 +41251,15 @@ pub fn vreinterpret_f32_s32(a: int32x2_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_f32_s32(a: int32x2_t) -> float32x2_t {
-    let a: int32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
+pub fn vreinterpretq_s16_f32(a: float32x4_t) -> int16x8_t {
     unsafe {
-        let ret_val: float32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: int16x8_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_f32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -46948,11 +41277,11 @@ pub fn vreinterpret_f32_s32(a: int32x2_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s8_s32(a: int32x2_t) -> int8x8_t {
+pub fn vreinterpretq_s64_f32(a: float32x4_t) -> int64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_f32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -46970,15 +41299,15 @@ pub fn vreinterpret_s8_s32(a: int32x2_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s8_s32(a: int32x2_t) -> int8x8_t {
-    let a: int32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
+pub fn vreinterpretq_s64_f32(a: float32x4_t) -> int64x2_t {
     unsafe {
-        let ret_val: int8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: int64x2_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_f32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -46996,11 +41325,11 @@ pub fn vreinterpret_s8_s32(a: int32x2_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s16_s32(a: int32x2_t) -> int16x4_t {
+pub fn vreinterpretq_u8_f32(a: float32x4_t) -> uint8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_f32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -47018,15 +41347,19 @@ pub fn vreinterpret_s16_s32(a: int32x2_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s16_s32(a: int32x2_t) -> int16x4_t {
-    let a: int32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
+pub fn vreinterpretq_u8_f32(a: float32x4_t) -> uint8x16_t {
     unsafe {
-        let ret_val: int16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: uint8x16_t = transmute(a);
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_f32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -47044,11 +41377,11 @@ pub fn vreinterpret_s16_s32(a: int32x2_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s64_s32(a: int32x2_t) -> int64x1_t {
+pub fn vreinterpretq_u16_f32(a: float32x4_t) -> uint16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_f32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -47066,12 +41399,15 @@ pub fn vreinterpret_s64_s32(a: int32x2_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s64_s32(a: int32x2_t) -> int64x1_t {
-    let a: int32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe { transmute(a) }
+pub fn vreinterpretq_u16_f32(a: float32x4_t) -> uint16x8_t {
+    unsafe {
+        let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: uint16x8_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_f32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -47089,11 +41425,11 @@ pub fn vreinterpret_s64_s32(a: int32x2_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u8_s32(a: int32x2_t) -> uint8x8_t {
+pub fn vreinterpretq_u64_f32(a: float32x4_t) -> uint64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_f32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -47111,15 +41447,15 @@ pub fn vreinterpret_u8_s32(a: int32x2_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u8_s32(a: int32x2_t) -> uint8x8_t {
-    let a: int32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
+pub fn vreinterpretq_u64_f32(a: float32x4_t) -> uint64x2_t {
     unsafe {
-        let ret_val: uint8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: uint64x2_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_f32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -47137,11 +41473,11 @@ pub fn vreinterpret_u8_s32(a: int32x2_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u16_s32(a: int32x2_t) -> uint16x4_t {
+pub fn vreinterpretq_p8_f32(a: float32x4_t) -> poly8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_f32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -47159,15 +41495,19 @@ pub fn vreinterpret_u16_s32(a: int32x2_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u16_s32(a: int32x2_t) -> uint16x4_t {
-    let a: int32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
+pub fn vreinterpretq_p8_f32(a: float32x4_t) -> poly8x16_t {
     unsafe {
-        let ret_val: uint16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: poly8x16_t = transmute(a);
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_f32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -47185,11 +41525,11 @@ pub fn vreinterpret_u16_s32(a: int32x2_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u32_s32(a: int32x2_t) -> uint32x2_t {
+pub fn vreinterpretq_p16_f32(a: float32x4_t) -> poly16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_f32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -47207,15 +41547,15 @@ pub fn vreinterpret_u32_s32(a: int32x2_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u32_s32(a: int32x2_t) -> uint32x2_t {
-    let a: int32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
+pub fn vreinterpretq_p16_f32(a: float32x4_t) -> poly16x8_t {
     unsafe {
-        let ret_val: uint32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let a: float32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: poly16x8_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -47233,11 +41573,11 @@ pub fn vreinterpret_u32_s32(a: int32x2_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u64_s32(a: int32x2_t) -> uint64x1_t {
+pub fn vreinterpret_f32_s8(a: int8x8_t) -> float32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -47255,12 +41595,15 @@ pub fn vreinterpret_u64_s32(a: int32x2_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u64_s32(a: int32x2_t) -> uint64x1_t {
-    let a: int32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe { transmute(a) }
+pub fn vreinterpret_f32_s8(a: int8x8_t) -> float32x2_t {
+    unsafe {
+        let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: float32x2_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_s8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -47278,11 +41621,11 @@ pub fn vreinterpret_u64_s32(a: int32x2_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p8_s32(a: int32x2_t) -> poly8x8_t {
+pub fn vreinterpret_s16_s8(a: int8x8_t) -> int16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_s8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -47300,15 +41643,15 @@ pub fn vreinterpret_p8_s32(a: int32x2_t) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p8_s32(a: int32x2_t) -> poly8x8_t {
-    let a: int32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
+pub fn vreinterpret_s16_s8(a: int8x8_t) -> int16x4_t {
     unsafe {
-        let ret_val: poly8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int16x4_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_s8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -47326,11 +41669,11 @@ pub fn vreinterpret_p8_s32(a: int32x2_t) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p16_s32(a: int32x2_t) -> poly16x4_t {
+pub fn vreinterpret_s32_s8(a: int8x8_t) -> int32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_s8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -47348,15 +41691,15 @@ pub fn vreinterpret_p16_s32(a: int32x2_t) -> poly16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p16_s32(a: int32x2_t) -> poly16x4_t {
-    let a: int32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
+pub fn vreinterpret_s32_s8(a: int8x8_t) -> int32x2_t {
     unsafe {
-        let ret_val: poly16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int32x2_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_s8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -47374,11 +41717,11 @@ pub fn vreinterpret_p16_s32(a: int32x2_t) -> poly16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_f32_s32(a: int32x4_t) -> float32x4_t {
+pub fn vreinterpret_s64_s8(a: int8x8_t) -> int64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_s8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -47396,15 +41739,14 @@ pub fn vreinterpretq_f32_s32(a: int32x4_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_f32_s32(a: int32x4_t) -> float32x4_t {
-    let a: int32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
+pub fn vreinterpret_s64_s8(a: int8x8_t) -> int64x1_t {
     unsafe {
-        let ret_val: float32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        transmute(a)
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -47422,11 +41764,11 @@ pub fn vreinterpretq_f32_s32(a: int32x4_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s8_s32(a: int32x4_t) -> int8x16_t {
+pub fn vreinterpret_u16_s8(a: int8x8_t) -> uint16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -47444,19 +41786,15 @@ pub fn vreinterpretq_s8_s32(a: int32x4_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s8_s32(a: int32x4_t) -> int8x16_t {
-    let a: int32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
+pub fn vreinterpret_u16_s8(a: int8x8_t) -> uint16x4_t {
     unsafe {
-        let ret_val: int8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
+        let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint16x4_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -47474,11 +41812,11 @@ pub fn vreinterpretq_s8_s32(a: int32x4_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s16_s32(a: int32x4_t) -> int16x8_t {
+pub fn vreinterpret_u32_s8(a: int8x8_t) -> uint32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -47496,15 +41834,15 @@ pub fn vreinterpretq_s16_s32(a: int32x4_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s16_s32(a: int32x4_t) -> int16x8_t {
-    let a: int32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
+pub fn vreinterpret_u32_s8(a: int8x8_t) -> uint32x2_t {
     unsafe {
-        let ret_val: int16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint32x2_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -47522,11 +41860,11 @@ pub fn vreinterpretq_s16_s32(a: int32x4_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s64_s32(a: int32x4_t) -> int64x2_t {
+pub fn vreinterpret_u64_s8(a: int8x8_t) -> uint64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -47544,15 +41882,14 @@ pub fn vreinterpretq_s64_s32(a: int32x4_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s64_s32(a: int32x4_t) -> int64x2_t {
-    let a: int32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
+pub fn vreinterpret_u64_s8(a: int8x8_t) -> uint64x1_t {
     unsafe {
-        let ret_val: int64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        transmute(a)
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -47570,11 +41907,11 @@ pub fn vreinterpretq_s64_s32(a: int32x4_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u8_s32(a: int32x4_t) -> uint8x16_t {
+pub fn vreinterpret_p16_s8(a: int8x8_t) -> poly16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -47592,19 +41929,15 @@ pub fn vreinterpretq_u8_s32(a: int32x4_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u8_s32(a: int32x4_t) -> uint8x16_t {
-    let a: int32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
+pub fn vreinterpret_p16_s8(a: int8x8_t) -> poly16x4_t {
     unsafe {
-        let ret_val: uint8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
+        let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: poly16x4_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -47622,11 +41955,11 @@ pub fn vreinterpretq_u8_s32(a: int32x4_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u16_s32(a: int32x4_t) -> uint16x8_t {
+pub fn vreinterpret_p64_s8(a: int8x8_t) -> poly64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -47644,15 +41977,14 @@ pub fn vreinterpretq_u16_s32(a: int32x4_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u16_s32(a: int32x4_t) -> uint16x8_t {
-    let a: int32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
+pub fn vreinterpret_p64_s8(a: int8x8_t) -> poly64x1_t {
     unsafe {
-        let ret_val: uint16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        transmute(a)
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -47670,11 +42002,11 @@ pub fn vreinterpretq_u16_s32(a: int32x4_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u32_s32(a: int32x4_t) -> uint32x4_t {
+pub fn vreinterpretq_p128_s8(a: int8x16_t) -> p128 {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -47692,15 +42024,15 @@ pub fn vreinterpretq_u32_s32(a: int32x4_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u32_s32(a: int32x4_t) -> uint32x4_t {
-    let a: int32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
+pub fn vreinterpretq_p128_s8(a: int8x16_t) -> p128 {
     unsafe {
-        let ret_val: uint32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let a: int8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        transmute(a)
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -47718,11 +42050,11 @@ pub fn vreinterpretq_u32_s32(a: int32x4_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u64_s32(a: int32x4_t) -> uint64x2_t {
+pub fn vreinterpretq_f32_s8(a: int8x16_t) -> float32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -47740,15 +42072,16 @@ pub fn vreinterpretq_u64_s32(a: int32x4_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u64_s32(a: int32x4_t) -> uint64x2_t {
-    let a: int32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
+pub fn vreinterpretq_f32_s8(a: int8x16_t) -> float32x4_t {
     unsafe {
-        let ret_val: uint64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let a: int8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: float32x4_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_s8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -47766,11 +42099,11 @@ pub fn vreinterpretq_u64_s32(a: int32x4_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p8_s32(a: int32x4_t) -> poly8x16_t {
+pub fn vreinterpretq_s16_s8(a: int8x16_t) -> int16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_s8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -47788,19 +42121,16 @@ pub fn vreinterpretq_p8_s32(a: int32x4_t) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p8_s32(a: int32x4_t) -> poly8x16_t {
-    let a: int32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
+pub fn vreinterpretq_s16_s8(a: int8x16_t) -> int16x8_t {
     unsafe {
-        let ret_val: poly8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
+        let a: int8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int16x8_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_s8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -47818,11 +42148,11 @@ pub fn vreinterpretq_p8_s32(a: int32x4_t) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p16_s32(a: int32x4_t) -> poly16x8_t {
+pub fn vreinterpretq_s32_s8(a: int8x16_t) -> int32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_s8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -47840,15 +42170,16 @@ pub fn vreinterpretq_p16_s32(a: int32x4_t) -> poly16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p16_s32(a: int32x4_t) -> poly16x8_t {
-    let a: int32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
+pub fn vreinterpretq_s32_s8(a: int8x16_t) -> int32x4_t {
     unsafe {
-        let ret_val: poly16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let a: int8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int32x4_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_s8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -47866,11 +42197,11 @@ pub fn vreinterpretq_p16_s32(a: int32x4_t) -> poly16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_f32_s64(a: int64x1_t) -> float32x2_t {
+pub fn vreinterpretq_s64_s8(a: int8x16_t) -> int64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_s8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -47888,14 +42219,16 @@ pub fn vreinterpret_f32_s64(a: int64x1_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_f32_s64(a: int64x1_t) -> float32x2_t {
+pub fn vreinterpretq_s64_s8(a: int8x16_t) -> int64x2_t {
     unsafe {
-        let ret_val: float32x2_t = transmute(a);
+        let a: int8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int64x2_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -47913,11 +42246,11 @@ pub fn vreinterpret_f32_s64(a: int64x1_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s8_s64(a: int64x1_t) -> int8x8_t {
+pub fn vreinterpretq_u16_s8(a: int8x16_t) -> uint16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -47935,14 +42268,16 @@ pub fn vreinterpret_s8_s64(a: int64x1_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s8_s64(a: int64x1_t) -> int8x8_t {
+pub fn vreinterpretq_u16_s8(a: int8x16_t) -> uint16x8_t {
     unsafe {
-        let ret_val: int8x8_t = transmute(a);
+        let a: int8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint16x8_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -47960,11 +42295,11 @@ pub fn vreinterpret_s8_s64(a: int64x1_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s16_s64(a: int64x1_t) -> int16x4_t {
+pub fn vreinterpretq_u32_s8(a: int8x16_t) -> uint32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -47982,14 +42317,16 @@ pub fn vreinterpret_s16_s64(a: int64x1_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s16_s64(a: int64x1_t) -> int16x4_t {
+pub fn vreinterpretq_u32_s8(a: int8x16_t) -> uint32x4_t {
     unsafe {
-        let ret_val: int16x4_t = transmute(a);
+        let a: int8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint32x4_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -48007,11 +42344,11 @@ pub fn vreinterpret_s16_s64(a: int64x1_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s32_s64(a: int64x1_t) -> int32x2_t {
+pub fn vreinterpretq_u64_s8(a: int8x16_t) -> uint64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -48029,14 +42366,16 @@ pub fn vreinterpret_s32_s64(a: int64x1_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s32_s64(a: int64x1_t) -> int32x2_t {
+pub fn vreinterpretq_u64_s8(a: int8x16_t) -> uint64x2_t {
     unsafe {
-        let ret_val: int32x2_t = transmute(a);
+        let a: int8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint64x2_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -48054,11 +42393,11 @@ pub fn vreinterpret_s32_s64(a: int64x1_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u8_s64(a: int64x1_t) -> uint8x8_t {
+pub fn vreinterpretq_p16_s8(a: int8x16_t) -> poly16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -48076,14 +42415,16 @@ pub fn vreinterpret_u8_s64(a: int64x1_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u8_s64(a: int64x1_t) -> uint8x8_t {
+pub fn vreinterpretq_p16_s8(a: int8x16_t) -> poly16x8_t {
     unsafe {
-        let ret_val: uint8x8_t = transmute(a);
+        let a: int8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: poly16x8_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -48101,11 +42442,11 @@ pub fn vreinterpret_u8_s64(a: int64x1_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u16_s64(a: int64x1_t) -> uint16x4_t {
+pub fn vreinterpretq_p64_s8(a: int8x16_t) -> poly64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -48123,14 +42464,16 @@ pub fn vreinterpret_u16_s64(a: int64x1_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u16_s64(a: int64x1_t) -> uint16x4_t {
+pub fn vreinterpretq_p64_s8(a: int8x16_t) -> poly64x2_t {
     unsafe {
-        let ret_val: uint16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let a: int8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: poly64x2_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -48148,11 +42491,11 @@ pub fn vreinterpret_u16_s64(a: int64x1_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u32_s64(a: int64x1_t) -> uint32x2_t {
+pub fn vreinterpret_f32_s16(a: int16x4_t) -> float32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -48170,15 +42513,17 @@ pub fn vreinterpret_u32_s64(a: int64x1_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u32_s64(a: int64x1_t) -> uint32x2_t {
+pub fn vreinterpret_f32_s16(a: int16x4_t) -> float32x2_t {
     unsafe {
-        let ret_val: uint32x2_t = transmute(a);
+        let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: float32x2_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_s16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -48194,11 +42539,37 @@ pub fn vreinterpret_u32_s64(a: int64x1_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u64_s64(a: int64x1_t) -> uint64x1_t {
+pub fn vreinterpret_s8_s16(a: int16x4_t) -> int8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_s16)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vreinterpret_s8_s16(a: int16x4_t) -> int8x8_t {
+    unsafe {
+        let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: int8x8_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_s16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -48216,11 +42587,11 @@ pub fn vreinterpret_u64_s64(a: int64x1_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p8_s64(a: int64x1_t) -> poly8x8_t {
+pub fn vreinterpret_s32_s16(a: int16x4_t) -> int32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_s16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -48238,14 +42609,15 @@ pub fn vreinterpret_p8_s64(a: int64x1_t) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p8_s64(a: int64x1_t) -> poly8x8_t {
+pub fn vreinterpret_s32_s16(a: int16x4_t) -> int32x2_t {
     unsafe {
-        let ret_val: poly8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: int32x2_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_s16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -48263,11 +42635,11 @@ pub fn vreinterpret_p8_s64(a: int64x1_t) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p16_s64(a: int64x1_t) -> poly16x4_t {
+pub fn vreinterpret_s64_s16(a: int16x4_t) -> int64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_s16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -48285,14 +42657,14 @@ pub fn vreinterpret_p16_s64(a: int64x1_t) -> poly16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p16_s64(a: int64x1_t) -> poly16x4_t {
+pub fn vreinterpret_s64_s16(a: int16x4_t) -> int64x1_t {
     unsafe {
-        let ret_val: poly16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        transmute(a)
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -48310,11 +42682,11 @@ pub fn vreinterpret_p16_s64(a: int64x1_t) -> poly16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_f32_s64(a: int64x2_t) -> float32x4_t {
+pub fn vreinterpret_u8_s16(a: int16x4_t) -> uint8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -48332,15 +42704,15 @@ pub fn vreinterpretq_f32_s64(a: int64x2_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_f32_s64(a: int64x2_t) -> float32x4_t {
-    let a: int64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
+pub fn vreinterpret_u8_s16(a: int16x4_t) -> uint8x8_t {
     unsafe {
-        let ret_val: float32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: uint8x8_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -48358,11 +42730,11 @@ pub fn vreinterpretq_f32_s64(a: int64x2_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s8_s64(a: int64x2_t) -> int8x16_t {
+pub fn vreinterpret_u32_s16(a: int16x4_t) -> uint32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -48380,19 +42752,15 @@ pub fn vreinterpretq_s8_s64(a: int64x2_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s8_s64(a: int64x2_t) -> int8x16_t {
-    let a: int64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
+pub fn vreinterpret_u32_s16(a: int16x4_t) -> uint32x2_t {
     unsafe {
-        let ret_val: int8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
+        let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: uint32x2_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -48410,11 +42778,11 @@ pub fn vreinterpretq_s8_s64(a: int64x2_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s16_s64(a: int64x2_t) -> int16x8_t {
+pub fn vreinterpret_u64_s16(a: int16x4_t) -> uint64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -48432,15 +42800,14 @@ pub fn vreinterpretq_s16_s64(a: int64x2_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s16_s64(a: int64x2_t) -> int16x8_t {
-    let a: int64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
+pub fn vreinterpret_u64_s16(a: int16x4_t) -> uint64x1_t {
     unsafe {
-        let ret_val: int16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        transmute(a)
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -48458,11 +42825,11 @@ pub fn vreinterpretq_s16_s64(a: int64x2_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s32_s64(a: int64x2_t) -> int32x4_t {
+pub fn vreinterpret_p8_s16(a: int16x4_t) -> poly8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -48480,15 +42847,15 @@ pub fn vreinterpretq_s32_s64(a: int64x2_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s32_s64(a: int64x2_t) -> int32x4_t {
-    let a: int64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
+pub fn vreinterpret_p8_s16(a: int16x4_t) -> poly8x8_t {
     unsafe {
-        let ret_val: int32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: poly8x8_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -48506,11 +42873,11 @@ pub fn vreinterpretq_s32_s64(a: int64x2_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u8_s64(a: int64x2_t) -> uint8x16_t {
+pub fn vreinterpret_p64_s16(a: int16x4_t) -> poly64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -48528,19 +42895,14 @@ pub fn vreinterpretq_u8_s64(a: int64x2_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u8_s64(a: int64x2_t) -> uint8x16_t {
-    let a: int64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
+pub fn vreinterpret_p64_s16(a: int16x4_t) -> poly64x1_t {
     unsafe {
-        let ret_val: uint8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
+        let a: int16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        transmute(a)
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -48558,11 +42920,11 @@ pub fn vreinterpretq_u8_s64(a: int64x2_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u16_s64(a: int64x2_t) -> uint16x8_t {
+pub fn vreinterpretq_p128_s16(a: int16x8_t) -> p128 {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -48580,15 +42942,14 @@ pub fn vreinterpretq_u16_s64(a: int64x2_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u16_s64(a: int64x2_t) -> uint16x8_t {
-    let a: int64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
+pub fn vreinterpretq_p128_s16(a: int16x8_t) -> p128 {
     unsafe {
-        let ret_val: uint16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        transmute(a)
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -48606,11 +42967,11 @@ pub fn vreinterpretq_u16_s64(a: int64x2_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u32_s64(a: int64x2_t) -> uint32x4_t {
+pub fn vreinterpretq_f32_s16(a: int16x8_t) -> float32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -48628,15 +42989,15 @@ pub fn vreinterpretq_u32_s64(a: int64x2_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u32_s64(a: int64x2_t) -> uint32x4_t {
-    let a: int64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
+pub fn vreinterpretq_f32_s16(a: int16x8_t) -> float32x4_t {
     unsafe {
-        let ret_val: uint32x4_t = transmute(a);
+        let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: float32x4_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_s16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -48654,11 +43015,11 @@ pub fn vreinterpretq_u32_s64(a: int64x2_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u64_s64(a: int64x2_t) -> uint64x2_t {
+pub fn vreinterpretq_s8_s16(a: int16x8_t) -> int8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_s16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -48676,15 +43037,19 @@ pub fn vreinterpretq_u64_s64(a: int64x2_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u64_s64(a: int64x2_t) -> uint64x2_t {
-    let a: int64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
+pub fn vreinterpretq_s8_s16(a: int16x8_t) -> int8x16_t {
     unsafe {
-        let ret_val: uint64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int8x16_t = transmute(a);
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_s16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -48702,11 +43067,11 @@ pub fn vreinterpretq_u64_s64(a: int64x2_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p8_s64(a: int64x2_t) -> poly8x16_t {
+pub fn vreinterpretq_s32_s16(a: int16x8_t) -> int32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_s16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -48724,19 +43089,15 @@ pub fn vreinterpretq_p8_s64(a: int64x2_t) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p8_s64(a: int64x2_t) -> poly8x16_t {
-    let a: int64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
+pub fn vreinterpretq_s32_s16(a: int16x8_t) -> int32x4_t {
     unsafe {
-        let ret_val: poly8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
+        let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int32x4_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_s16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -48754,11 +43115,11 @@ pub fn vreinterpretq_p8_s64(a: int64x2_t) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p16_s64(a: int64x2_t) -> poly16x8_t {
+pub fn vreinterpretq_s64_s16(a: int16x8_t) -> int64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_s16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -48776,15 +43137,15 @@ pub fn vreinterpretq_p16_s64(a: int64x2_t) -> poly16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p16_s64(a: int64x2_t) -> poly16x8_t {
-    let a: int64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
+pub fn vreinterpretq_s64_s16(a: int16x8_t) -> int64x2_t {
     unsafe {
-        let ret_val: poly16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int64x2_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -48802,11 +43163,11 @@ pub fn vreinterpretq_p16_s64(a: int64x2_t) -> poly16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_f32_u8(a: uint8x8_t) -> float32x2_t {
+pub fn vreinterpretq_u8_s16(a: int16x8_t) -> uint8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -48824,15 +43185,19 @@ pub fn vreinterpret_f32_u8(a: uint8x8_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_f32_u8(a: uint8x8_t) -> float32x2_t {
-    let a: uint8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
+pub fn vreinterpretq_u8_s16(a: int16x8_t) -> uint8x16_t {
     unsafe {
-        let ret_val: float32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint8x16_t = transmute(a);
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -48850,11 +43215,11 @@ pub fn vreinterpret_f32_u8(a: uint8x8_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s8_u8(a: uint8x8_t) -> int8x8_t {
+pub fn vreinterpretq_u32_s16(a: int16x8_t) -> uint32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -48872,15 +43237,15 @@ pub fn vreinterpret_s8_u8(a: uint8x8_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s8_u8(a: uint8x8_t) -> int8x8_t {
-    let a: uint8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
+pub fn vreinterpretq_u32_s16(a: int16x8_t) -> uint32x4_t {
     unsafe {
-        let ret_val: int8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint32x4_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -48898,11 +43263,11 @@ pub fn vreinterpret_s8_u8(a: uint8x8_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s16_u8(a: uint8x8_t) -> int16x4_t {
+pub fn vreinterpretq_u64_s16(a: int16x8_t) -> uint64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -48920,15 +43285,15 @@ pub fn vreinterpret_s16_u8(a: uint8x8_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s16_u8(a: uint8x8_t) -> int16x4_t {
-    let a: uint8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
+pub fn vreinterpretq_u64_s16(a: int16x8_t) -> uint64x2_t {
     unsafe {
-        let ret_val: int16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint64x2_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -48946,11 +43311,11 @@ pub fn vreinterpret_s16_u8(a: uint8x8_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s32_u8(a: uint8x8_t) -> int32x2_t {
+pub fn vreinterpretq_p8_s16(a: int16x8_t) -> poly8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -48968,15 +43333,19 @@ pub fn vreinterpret_s32_u8(a: uint8x8_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s32_u8(a: uint8x8_t) -> int32x2_t {
-    let a: uint8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
+pub fn vreinterpretq_p8_s16(a: int16x8_t) -> poly8x16_t {
     unsafe {
-        let ret_val: int32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: poly8x16_t = transmute(a);
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -48994,11 +43363,11 @@ pub fn vreinterpret_s32_u8(a: uint8x8_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s64_u8(a: uint8x8_t) -> int64x1_t {
+pub fn vreinterpretq_p64_s16(a: int16x8_t) -> poly64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -49016,12 +43385,15 @@ pub fn vreinterpret_s64_u8(a: uint8x8_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s64_u8(a: uint8x8_t) -> int64x1_t {
-    let a: uint8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe { transmute(a) }
+pub fn vreinterpretq_p64_s16(a: int16x8_t) -> poly64x2_t {
+    unsafe {
+        let a: int16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: poly64x2_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_s32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -49039,11 +43411,11 @@ pub fn vreinterpret_s64_u8(a: uint8x8_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u16_u8(a: uint8x8_t) -> uint16x4_t {
+pub fn vreinterpret_s8_s32(a: int32x2_t) -> int8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_s32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -49061,15 +43433,15 @@ pub fn vreinterpret_u16_u8(a: uint8x8_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u16_u8(a: uint8x8_t) -> uint16x4_t {
-    let a: uint8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
+pub fn vreinterpret_s8_s32(a: int32x2_t) -> int8x8_t {
     unsafe {
-        let ret_val: uint16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let a: int32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let ret_val: int8x8_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_s32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -49087,11 +43459,11 @@ pub fn vreinterpret_u16_u8(a: uint8x8_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u32_u8(a: uint8x8_t) -> uint32x2_t {
+pub fn vreinterpret_s16_s32(a: int32x2_t) -> int16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_s32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -49109,15 +43481,15 @@ pub fn vreinterpret_u32_u8(a: uint8x8_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u32_u8(a: uint8x8_t) -> uint32x2_t {
-    let a: uint8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
+pub fn vreinterpret_s16_s32(a: int32x2_t) -> int16x4_t {
     unsafe {
-        let ret_val: uint32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let a: int32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let ret_val: int16x4_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_s32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -49135,11 +43507,11 @@ pub fn vreinterpret_u32_u8(a: uint8x8_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u64_u8(a: uint8x8_t) -> uint64x1_t {
+pub fn vreinterpret_s64_s32(a: int32x2_t) -> int64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_s32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -49157,12 +43529,14 @@ pub fn vreinterpret_u64_u8(a: uint8x8_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u64_u8(a: uint8x8_t) -> uint64x1_t {
-    let a: uint8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe { transmute(a) }
+pub fn vreinterpret_s64_s32(a: int32x2_t) -> int64x1_t {
+    unsafe {
+        let a: int32x2_t = simd_shuffle!(a, a, [1, 0]);
+        transmute(a)
+    }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -49180,11 +43554,11 @@ pub fn vreinterpret_u64_u8(a: uint8x8_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p8_u8(a: uint8x8_t) -> poly8x8_t {
+pub fn vreinterpret_u8_s32(a: int32x2_t) -> uint8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -49202,15 +43576,15 @@ pub fn vreinterpret_p8_u8(a: uint8x8_t) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p8_u8(a: uint8x8_t) -> poly8x8_t {
-    let a: uint8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
+pub fn vreinterpret_u8_s32(a: int32x2_t) -> uint8x8_t {
     unsafe {
-        let ret_val: poly8x8_t = transmute(a);
+        let a: int32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let ret_val: uint8x8_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -49228,11 +43602,11 @@ pub fn vreinterpret_p8_u8(a: uint8x8_t) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p16_u8(a: uint8x8_t) -> poly16x4_t {
+pub fn vreinterpret_u16_s32(a: int32x2_t) -> uint16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -49250,15 +43624,15 @@ pub fn vreinterpret_p16_u8(a: uint8x8_t) -> poly16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p16_u8(a: uint8x8_t) -> poly16x4_t {
-    let a: uint8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
+pub fn vreinterpret_u16_s32(a: int32x2_t) -> uint16x4_t {
     unsafe {
-        let ret_val: poly16x4_t = transmute(a);
+        let a: int32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let ret_val: uint16x4_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -49276,11 +43650,11 @@ pub fn vreinterpret_p16_u8(a: uint8x8_t) -> poly16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_f32_u8(a: uint8x16_t) -> float32x4_t {
+pub fn vreinterpret_u64_s32(a: int32x2_t) -> uint64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_s32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -49298,16 +43672,14 @@ pub fn vreinterpretq_f32_u8(a: uint8x16_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_f32_u8(a: uint8x16_t) -> float32x4_t {
-    let a: uint8x16_t =
-        unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
+pub fn vreinterpret_u64_s32(a: int32x2_t) -> uint64x1_t {
     unsafe {
-        let ret_val: float32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let a: int32x2_t = simd_shuffle!(a, a, [1, 0]);
+        transmute(a)
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -49325,11 +43697,11 @@ pub fn vreinterpretq_f32_u8(a: uint8x16_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s8_u8(a: uint8x16_t) -> int8x16_t {
+pub fn vreinterpret_p8_s32(a: int32x2_t) -> poly8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -49347,20 +43719,15 @@ pub fn vreinterpretq_s8_u8(a: uint8x16_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s8_u8(a: uint8x16_t) -> int8x16_t {
-    let a: uint8x16_t =
-        unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
+pub fn vreinterpret_p8_s32(a: int32x2_t) -> poly8x8_t {
     unsafe {
-        let ret_val: int8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
+        let a: int32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let ret_val: poly8x8_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -49378,11 +43745,11 @@ pub fn vreinterpretq_s8_u8(a: uint8x16_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s16_u8(a: uint8x16_t) -> int16x8_t {
+pub fn vreinterpret_p16_s32(a: int32x2_t) -> poly16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -49400,16 +43767,15 @@ pub fn vreinterpretq_s16_u8(a: uint8x16_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s16_u8(a: uint8x16_t) -> int16x8_t {
-    let a: uint8x16_t =
-        unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
+pub fn vreinterpret_p16_s32(a: int32x2_t) -> poly16x4_t {
     unsafe {
-        let ret_val: int16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let a: int32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let ret_val: poly16x4_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -49427,11 +43793,11 @@ pub fn vreinterpretq_s16_u8(a: uint8x16_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s32_u8(a: uint8x16_t) -> int32x4_t {
+pub fn vreinterpret_p64_s32(a: int32x2_t) -> poly64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -49449,16 +43815,14 @@ pub fn vreinterpretq_s32_u8(a: uint8x16_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s32_u8(a: uint8x16_t) -> int32x4_t {
-    let a: uint8x16_t =
-        unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
+pub fn vreinterpret_p64_s32(a: int32x2_t) -> poly64x1_t {
     unsafe {
-        let ret_val: int32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let a: int32x2_t = simd_shuffle!(a, a, [1, 0]);
+        transmute(a)
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -49476,11 +43840,11 @@ pub fn vreinterpretq_s32_u8(a: uint8x16_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s64_u8(a: uint8x16_t) -> int64x2_t {
+pub fn vreinterpretq_p128_s32(a: int32x4_t) -> p128 {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -49498,16 +43862,14 @@ pub fn vreinterpretq_s64_u8(a: uint8x16_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s64_u8(a: uint8x16_t) -> int64x2_t {
-    let a: uint8x16_t =
-        unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
+pub fn vreinterpretq_p128_s32(a: int32x4_t) -> p128 {
     unsafe {
-        let ret_val: int64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        transmute(a)
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_s32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -49525,11 +43887,11 @@ pub fn vreinterpretq_s64_u8(a: uint8x16_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u16_u8(a: uint8x16_t) -> uint16x8_t {
+pub fn vreinterpretq_s8_s32(a: int32x4_t) -> int8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_s32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -49547,16 +43909,19 @@ pub fn vreinterpretq_u16_u8(a: uint8x16_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u16_u8(a: uint8x16_t) -> uint16x8_t {
-    let a: uint8x16_t =
-        unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
+pub fn vreinterpretq_s8_s32(a: int32x4_t) -> int8x16_t {
     unsafe {
-        let ret_val: uint16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: int8x16_t = transmute(a);
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_s32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -49574,11 +43939,11 @@ pub fn vreinterpretq_u16_u8(a: uint8x16_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u32_u8(a: uint8x16_t) -> uint32x4_t {
+pub fn vreinterpretq_s16_s32(a: int32x4_t) -> int16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_s32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -49596,16 +43961,15 @@ pub fn vreinterpretq_u32_u8(a: uint8x16_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u32_u8(a: uint8x16_t) -> uint32x4_t {
-    let a: uint8x16_t =
-        unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
+pub fn vreinterpretq_s16_s32(a: int32x4_t) -> int16x8_t {
     unsafe {
-        let ret_val: uint32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: int16x8_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_s32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -49623,11 +43987,11 @@ pub fn vreinterpretq_u32_u8(a: uint8x16_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u64_u8(a: uint8x16_t) -> uint64x2_t {
+pub fn vreinterpretq_s64_s32(a: int32x4_t) -> int64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_s32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -49645,16 +44009,15 @@ pub fn vreinterpretq_u64_u8(a: uint8x16_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u64_u8(a: uint8x16_t) -> uint64x2_t {
-    let a: uint8x16_t =
-        unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
+pub fn vreinterpretq_s64_s32(a: int32x4_t) -> int64x2_t {
     unsafe {
-        let ret_val: uint64x2_t = transmute(a);
+        let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: int64x2_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -49672,11 +44035,11 @@ pub fn vreinterpretq_u64_u8(a: uint8x16_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p8_u8(a: uint8x16_t) -> poly8x16_t {
+pub fn vreinterpretq_u8_s32(a: int32x4_t) -> uint8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -49694,11 +44057,10 @@ pub fn vreinterpretq_p8_u8(a: uint8x16_t) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p8_u8(a: uint8x16_t) -> poly8x16_t {
-    let a: uint8x16_t =
-        unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
+pub fn vreinterpretq_u8_s32(a: int32x4_t) -> uint8x16_t {
     unsafe {
-        let ret_val: poly8x16_t = transmute(a);
+        let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: uint8x16_t = transmute(a);
         simd_shuffle!(
             ret_val,
             ret_val,
@@ -49707,7 +44069,7 @@ pub fn vreinterpretq_p8_u8(a: uint8x16_t) -> poly8x16_t {
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -49725,11 +44087,11 @@ pub fn vreinterpretq_p8_u8(a: uint8x16_t) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p16_u8(a: uint8x16_t) -> poly16x8_t {
+pub fn vreinterpretq_u16_s32(a: int32x4_t) -> uint16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -49747,16 +44109,15 @@ pub fn vreinterpretq_p16_u8(a: uint8x16_t) -> poly16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p16_u8(a: uint8x16_t) -> poly16x8_t {
-    let a: uint8x16_t =
-        unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
+pub fn vreinterpretq_u16_s32(a: int32x4_t) -> uint16x8_t {
     unsafe {
-        let ret_val: poly16x8_t = transmute(a);
+        let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: uint16x8_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -49774,11 +44135,11 @@ pub fn vreinterpretq_p16_u8(a: uint8x16_t) -> poly16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_f32_u16(a: uint16x4_t) -> float32x2_t {
+pub fn vreinterpretq_u64_s32(a: int32x4_t) -> uint64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_s32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -49796,15 +44157,15 @@ pub fn vreinterpret_f32_u16(a: uint16x4_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_f32_u16(a: uint16x4_t) -> float32x2_t {
-    let a: uint16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
+pub fn vreinterpretq_u64_s32(a: int32x4_t) -> uint64x2_t {
     unsafe {
-        let ret_val: float32x2_t = transmute(a);
+        let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: uint64x2_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -49822,11 +44183,11 @@ pub fn vreinterpret_f32_u16(a: uint16x4_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s8_u16(a: uint16x4_t) -> int8x8_t {
+pub fn vreinterpretq_p8_s32(a: int32x4_t) -> poly8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -49844,15 +44205,19 @@ pub fn vreinterpret_s8_u16(a: uint16x4_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s8_u16(a: uint16x4_t) -> int8x8_t {
-    let a: uint16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
+pub fn vreinterpretq_p8_s32(a: int32x4_t) -> poly8x16_t {
     unsafe {
-        let ret_val: int8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: poly8x16_t = transmute(a);
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -49870,11 +44235,11 @@ pub fn vreinterpret_s8_u16(a: uint16x4_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s16_u16(a: uint16x4_t) -> int16x4_t {
+pub fn vreinterpretq_p16_s32(a: int32x4_t) -> poly16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -49892,15 +44257,15 @@ pub fn vreinterpret_s16_u16(a: uint16x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s16_u16(a: uint16x4_t) -> int16x4_t {
-    let a: uint16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
+pub fn vreinterpretq_p16_s32(a: int32x4_t) -> poly16x8_t {
     unsafe {
-        let ret_val: int16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: poly16x8_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -49918,11 +44283,11 @@ pub fn vreinterpret_s16_u16(a: uint16x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s32_u16(a: uint16x4_t) -> int32x2_t {
+pub fn vreinterpretq_p64_s32(a: int32x4_t) -> poly64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -49940,15 +44305,15 @@ pub fn vreinterpret_s32_u16(a: uint16x4_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s32_u16(a: uint16x4_t) -> int32x2_t {
-    let a: uint16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
+pub fn vreinterpretq_p64_s32(a: int32x4_t) -> poly64x2_t {
     unsafe {
-        let ret_val: int32x2_t = transmute(a);
+        let a: int32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: poly64x2_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -49966,11 +44331,11 @@ pub fn vreinterpret_s32_u16(a: uint16x4_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s64_u16(a: uint16x4_t) -> int64x1_t {
+pub fn vreinterpret_f32_s64(a: int64x1_t) -> float32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_s64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -49988,12 +44353,14 @@ pub fn vreinterpret_s64_u16(a: uint16x4_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s64_u16(a: uint16x4_t) -> int64x1_t {
-    let a: uint16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe { transmute(a) }
+pub fn vreinterpret_f32_s64(a: int64x1_t) -> float32x2_t {
+    unsafe {
+        let ret_val: float32x2_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_s64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -50011,11 +44378,11 @@ pub fn vreinterpret_s64_u16(a: uint16x4_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u8_u16(a: uint16x4_t) -> uint8x8_t {
+pub fn vreinterpret_s8_s64(a: int64x1_t) -> int8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_s64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -50033,15 +44400,14 @@ pub fn vreinterpret_u8_u16(a: uint16x4_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u8_u16(a: uint16x4_t) -> uint8x8_t {
-    let a: uint16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
+pub fn vreinterpret_s8_s64(a: int64x1_t) -> int8x8_t {
     unsafe {
-        let ret_val: uint8x8_t = transmute(a);
+        let ret_val: int8x8_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_s64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -50059,11 +44425,11 @@ pub fn vreinterpret_u8_u16(a: uint16x4_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u32_u16(a: uint16x4_t) -> uint32x2_t {
+pub fn vreinterpret_s16_s64(a: int64x1_t) -> int16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_s64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -50081,15 +44447,14 @@ pub fn vreinterpret_u32_u16(a: uint16x4_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u32_u16(a: uint16x4_t) -> uint32x2_t {
-    let a: uint16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
+pub fn vreinterpret_s16_s64(a: int64x1_t) -> int16x4_t {
     unsafe {
-        let ret_val: uint32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let ret_val: int16x4_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_s64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -50107,11 +44472,11 @@ pub fn vreinterpret_u32_u16(a: uint16x4_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u64_u16(a: uint16x4_t) -> uint64x1_t {
+pub fn vreinterpret_s32_s64(a: int64x1_t) -> int32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_s64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -50129,12 +44494,14 @@ pub fn vreinterpret_u64_u16(a: uint16x4_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u64_u16(a: uint16x4_t) -> uint64x1_t {
-    let a: uint16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe { transmute(a) }
+pub fn vreinterpret_s32_s64(a: int64x1_t) -> int32x2_t {
+    unsafe {
+        let ret_val: int32x2_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -50152,11 +44519,11 @@ pub fn vreinterpret_u64_u16(a: uint16x4_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p8_u16(a: uint16x4_t) -> poly8x8_t {
+pub fn vreinterpret_u8_s64(a: int64x1_t) -> uint8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_s64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -50174,15 +44541,14 @@ pub fn vreinterpret_p8_u16(a: uint16x4_t) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p8_u16(a: uint16x4_t) -> poly8x8_t {
-    let a: uint16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
+pub fn vreinterpret_u8_s64(a: int64x1_t) -> uint8x8_t {
     unsafe {
-        let ret_val: poly8x8_t = transmute(a);
+        let ret_val: uint8x8_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -50200,11 +44566,11 @@ pub fn vreinterpret_p8_u16(a: uint16x4_t) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p16_u16(a: uint16x4_t) -> poly16x4_t {
+pub fn vreinterpret_u16_s64(a: int64x1_t) -> uint16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_s64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -50222,15 +44588,14 @@ pub fn vreinterpret_p16_u16(a: uint16x4_t) -> poly16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p16_u16(a: uint16x4_t) -> poly16x4_t {
-    let a: uint16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
+pub fn vreinterpret_u16_s64(a: int64x1_t) -> uint16x4_t {
     unsafe {
-        let ret_val: poly16x4_t = transmute(a);
+        let ret_val: uint16x4_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -50248,11 +44613,11 @@ pub fn vreinterpret_p16_u16(a: uint16x4_t) -> poly16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_f32_u16(a: uint16x8_t) -> float32x4_t {
+pub fn vreinterpret_u32_s64(a: int64x1_t) -> uint32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_s64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -50270,15 +44635,14 @@ pub fn vreinterpretq_f32_u16(a: uint16x8_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_f32_u16(a: uint16x8_t) -> float32x4_t {
-    let a: uint16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
+pub fn vreinterpret_u32_s64(a: int64x1_t) -> uint32x2_t {
     unsafe {
-        let ret_val: float32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let ret_val: uint32x2_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -50296,11 +44660,11 @@ pub fn vreinterpretq_f32_u16(a: uint16x8_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s8_u16(a: uint16x8_t) -> int8x16_t {
+pub fn vreinterpret_p8_s64(a: int64x1_t) -> poly8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_s64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -50318,19 +44682,14 @@ pub fn vreinterpretq_s8_u16(a: uint16x8_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s8_u16(a: uint16x8_t) -> int8x16_t {
-    let a: uint16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
+pub fn vreinterpret_p8_s64(a: int64x1_t) -> poly8x8_t {
     unsafe {
-        let ret_val: int8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
+        let ret_val: poly8x8_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -50348,11 +44707,11 @@ pub fn vreinterpretq_s8_u16(a: uint16x8_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s16_u16(a: uint16x8_t) -> int16x8_t {
+pub fn vreinterpret_p16_s64(a: int64x1_t) -> poly16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_s64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -50370,15 +44729,14 @@ pub fn vreinterpretq_s16_u16(a: uint16x8_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s16_u16(a: uint16x8_t) -> int16x8_t {
-    let a: uint16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
+pub fn vreinterpret_p16_s64(a: int64x1_t) -> poly16x4_t {
     unsafe {
-        let ret_val: int16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let ret_val: poly16x4_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -50396,11 +44754,11 @@ pub fn vreinterpretq_s16_u16(a: uint16x8_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s32_u16(a: uint16x8_t) -> int32x4_t {
+pub fn vreinterpretq_p128_s64(a: int64x2_t) -> p128 {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -50418,15 +44776,14 @@ pub fn vreinterpretq_s32_u16(a: uint16x8_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s32_u16(a: uint16x8_t) -> int32x4_t {
-    let a: uint16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
+pub fn vreinterpretq_p128_s64(a: int64x2_t) -> p128 {
     unsafe {
-        let ret_val: int32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let a: int64x2_t = simd_shuffle!(a, a, [1, 0]);
+        transmute(a)
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -50444,11 +44801,11 @@ pub fn vreinterpretq_s32_u16(a: uint16x8_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s64_u16(a: uint16x8_t) -> int64x2_t {
+pub fn vreinterpretq_f32_s64(a: int64x2_t) -> float32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_s64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -50466,15 +44823,15 @@ pub fn vreinterpretq_s64_u16(a: uint16x8_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s64_u16(a: uint16x8_t) -> int64x2_t {
-    let a: uint16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
+pub fn vreinterpretq_f32_s64(a: int64x2_t) -> float32x4_t {
     unsafe {
-        let ret_val: int64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let a: int64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let ret_val: float32x4_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_s64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -50492,11 +44849,11 @@ pub fn vreinterpretq_s64_u16(a: uint16x8_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u8_u16(a: uint16x8_t) -> uint8x16_t {
+pub fn vreinterpretq_s8_s64(a: int64x2_t) -> int8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_s64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -50514,10 +44871,10 @@ pub fn vreinterpretq_u8_u16(a: uint16x8_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u8_u16(a: uint16x8_t) -> uint8x16_t {
-    let a: uint16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
+pub fn vreinterpretq_s8_s64(a: int64x2_t) -> int8x16_t {
     unsafe {
-        let ret_val: uint8x16_t = transmute(a);
+        let a: int64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let ret_val: int8x16_t = transmute(a);
         simd_shuffle!(
             ret_val,
             ret_val,
@@ -50526,7 +44883,7 @@ pub fn vreinterpretq_u8_u16(a: uint16x8_t) -> uint8x16_t {
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_s64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -50544,11 +44901,11 @@ pub fn vreinterpretq_u8_u16(a: uint16x8_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u32_u16(a: uint16x8_t) -> uint32x4_t {
+pub fn vreinterpretq_s16_s64(a: int64x2_t) -> int16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_s64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -50566,15 +44923,15 @@ pub fn vreinterpretq_u32_u16(a: uint16x8_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u32_u16(a: uint16x8_t) -> uint32x4_t {
-    let a: uint16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
+pub fn vreinterpretq_s16_s64(a: int64x2_t) -> int16x8_t {
     unsafe {
-        let ret_val: uint32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let a: int64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let ret_val: int16x8_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_s64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -50592,11 +44949,11 @@ pub fn vreinterpretq_u32_u16(a: uint16x8_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u64_u16(a: uint16x8_t) -> uint64x2_t {
+pub fn vreinterpretq_s32_s64(a: int64x2_t) -> int32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_s64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -50614,15 +44971,15 @@ pub fn vreinterpretq_u64_u16(a: uint16x8_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u64_u16(a: uint16x8_t) -> uint64x2_t {
-    let a: uint16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
+pub fn vreinterpretq_s32_s64(a: int64x2_t) -> int32x4_t {
     unsafe {
-        let ret_val: uint64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let a: int64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let ret_val: int32x4_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -50640,11 +44997,11 @@ pub fn vreinterpretq_u64_u16(a: uint16x8_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p8_u16(a: uint16x8_t) -> poly8x16_t {
+pub fn vreinterpretq_u8_s64(a: int64x2_t) -> uint8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_s64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -50662,10 +45019,10 @@ pub fn vreinterpretq_p8_u16(a: uint16x8_t) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p8_u16(a: uint16x8_t) -> poly8x16_t {
-    let a: uint16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
+pub fn vreinterpretq_u8_s64(a: int64x2_t) -> uint8x16_t {
     unsafe {
-        let ret_val: poly8x16_t = transmute(a);
+        let a: int64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let ret_val: uint8x16_t = transmute(a);
         simd_shuffle!(
             ret_val,
             ret_val,
@@ -50674,7 +45031,7 @@ pub fn vreinterpretq_p8_u16(a: uint16x8_t) -> poly8x16_t {
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -50692,11 +45049,11 @@ pub fn vreinterpretq_p8_u16(a: uint16x8_t) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p16_u16(a: uint16x8_t) -> poly16x8_t {
+pub fn vreinterpretq_u16_s64(a: int64x2_t) -> uint16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_s64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -50714,15 +45071,15 @@ pub fn vreinterpretq_p16_u16(a: uint16x8_t) -> poly16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p16_u16(a: uint16x8_t) -> poly16x8_t {
-    let a: uint16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
+pub fn vreinterpretq_u16_s64(a: int64x2_t) -> uint16x8_t {
     unsafe {
-        let ret_val: poly16x8_t = transmute(a);
+        let a: int64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let ret_val: uint16x8_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -50740,11 +45097,11 @@ pub fn vreinterpretq_p16_u16(a: uint16x8_t) -> poly16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_f32_u32(a: uint32x2_t) -> float32x2_t {
+pub fn vreinterpretq_u32_s64(a: int64x2_t) -> uint32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_s64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -50762,15 +45119,15 @@ pub fn vreinterpret_f32_u32(a: uint32x2_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_f32_u32(a: uint32x2_t) -> float32x2_t {
-    let a: uint32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
+pub fn vreinterpretq_u32_s64(a: int64x2_t) -> uint32x4_t {
     unsafe {
-        let ret_val: float32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let a: int64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let ret_val: uint32x4_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -50788,11 +45145,11 @@ pub fn vreinterpret_f32_u32(a: uint32x2_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s8_u32(a: uint32x2_t) -> int8x8_t {
+pub fn vreinterpretq_p8_s64(a: int64x2_t) -> poly8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_s64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -50810,15 +45167,19 @@ pub fn vreinterpret_s8_u32(a: uint32x2_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s8_u32(a: uint32x2_t) -> int8x8_t {
-    let a: uint32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
+pub fn vreinterpretq_p8_s64(a: int64x2_t) -> poly8x16_t {
     unsafe {
-        let ret_val: int8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let a: int64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let ret_val: poly8x16_t = transmute(a);
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -50836,11 +45197,11 @@ pub fn vreinterpret_s8_u32(a: uint32x2_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s16_u32(a: uint32x2_t) -> int16x4_t {
+pub fn vreinterpretq_p16_s64(a: int64x2_t) -> poly16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_s64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -50858,15 +45219,15 @@ pub fn vreinterpret_s16_u32(a: uint32x2_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s16_u32(a: uint32x2_t) -> int16x4_t {
-    let a: uint32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
+pub fn vreinterpretq_p16_s64(a: int64x2_t) -> poly16x8_t {
     unsafe {
-        let ret_val: int16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let a: int64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let ret_val: poly16x8_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -50884,11 +45245,11 @@ pub fn vreinterpret_s16_u32(a: uint32x2_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s32_u32(a: uint32x2_t) -> int32x2_t {
+pub fn vreinterpret_f32_u8(a: uint8x8_t) -> float32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -50906,15 +45267,15 @@ pub fn vreinterpret_s32_u32(a: uint32x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s32_u32(a: uint32x2_t) -> int32x2_t {
-    let a: uint32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
+pub fn vreinterpret_f32_u8(a: uint8x8_t) -> float32x2_t {
     unsafe {
-        let ret_val: int32x2_t = transmute(a);
+        let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: float32x2_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -50932,11 +45293,11 @@ pub fn vreinterpret_s32_u32(a: uint32x2_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s64_u32(a: uint32x2_t) -> int64x1_t {
+pub fn vreinterpret_s16_u8(a: uint8x8_t) -> int16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -50954,12 +45315,15 @@ pub fn vreinterpret_s64_u32(a: uint32x2_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s64_u32(a: uint32x2_t) -> int64x1_t {
-    let a: uint32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe { transmute(a) }
+pub fn vreinterpret_s16_u8(a: uint8x8_t) -> int16x4_t {
+    unsafe {
+        let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int16x4_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -50977,11 +45341,11 @@ pub fn vreinterpret_s64_u32(a: uint32x2_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u8_u32(a: uint32x2_t) -> uint8x8_t {
+pub fn vreinterpret_s32_u8(a: uint8x8_t) -> int32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -50999,15 +45363,15 @@ pub fn vreinterpret_u8_u32(a: uint32x2_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u8_u32(a: uint32x2_t) -> uint8x8_t {
-    let a: uint32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
+pub fn vreinterpret_s32_u8(a: uint8x8_t) -> int32x2_t {
     unsafe {
-        let ret_val: uint8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int32x2_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -51025,11 +45389,11 @@ pub fn vreinterpret_u8_u32(a: uint32x2_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u16_u32(a: uint32x2_t) -> uint16x4_t {
+pub fn vreinterpret_s64_u8(a: uint8x8_t) -> int64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -51047,15 +45411,14 @@ pub fn vreinterpret_u16_u32(a: uint32x2_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u16_u32(a: uint32x2_t) -> uint16x4_t {
-    let a: uint32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
+pub fn vreinterpret_s64_u8(a: uint8x8_t) -> int64x1_t {
     unsafe {
-        let ret_val: uint16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        transmute(a)
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_u8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -51073,11 +45436,11 @@ pub fn vreinterpret_u16_u32(a: uint32x2_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u64_u32(a: uint32x2_t) -> uint64x1_t {
+pub fn vreinterpret_u16_u8(a: uint8x8_t) -> uint16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_u8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -51095,12 +45458,15 @@ pub fn vreinterpret_u64_u32(a: uint32x2_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u64_u32(a: uint32x2_t) -> uint64x1_t {
-    let a: uint32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe { transmute(a) }
+pub fn vreinterpret_u16_u8(a: uint8x8_t) -> uint16x4_t {
+    unsafe {
+        let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint16x4_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_u8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -51118,11 +45484,11 @@ pub fn vreinterpret_u64_u32(a: uint32x2_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p8_u32(a: uint32x2_t) -> poly8x8_t {
+pub fn vreinterpret_u32_u8(a: uint8x8_t) -> uint32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_u8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -51140,15 +45506,15 @@ pub fn vreinterpret_p8_u32(a: uint32x2_t) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p8_u32(a: uint32x2_t) -> poly8x8_t {
-    let a: uint32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
+pub fn vreinterpret_u32_u8(a: uint8x8_t) -> uint32x2_t {
     unsafe {
-        let ret_val: poly8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint32x2_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_u8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -51166,11 +45532,11 @@ pub fn vreinterpret_p8_u32(a: uint32x2_t) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p16_u32(a: uint32x2_t) -> poly16x4_t {
+pub fn vreinterpret_u64_u8(a: uint8x8_t) -> uint64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_u8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -51188,15 +45554,14 @@ pub fn vreinterpret_p16_u32(a: uint32x2_t) -> poly16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p16_u32(a: uint32x2_t) -> poly16x4_t {
-    let a: uint32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
+pub fn vreinterpret_u64_u8(a: uint8x8_t) -> uint64x1_t {
     unsafe {
-        let ret_val: poly16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        transmute(a)
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -51214,11 +45579,11 @@ pub fn vreinterpret_p16_u32(a: uint32x2_t) -> poly16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_f32_u32(a: uint32x4_t) -> float32x4_t {
+pub fn vreinterpret_p16_u8(a: uint8x8_t) -> poly16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -51236,15 +45601,15 @@ pub fn vreinterpretq_f32_u32(a: uint32x4_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_f32_u32(a: uint32x4_t) -> float32x4_t {
-    let a: uint32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
+pub fn vreinterpret_p16_u8(a: uint8x8_t) -> poly16x4_t {
     unsafe {
-        let ret_val: float32x4_t = transmute(a);
+        let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: poly16x4_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -51262,11 +45627,11 @@ pub fn vreinterpretq_f32_u32(a: uint32x4_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s8_u32(a: uint32x4_t) -> int8x16_t {
+pub fn vreinterpret_p64_u8(a: uint8x8_t) -> poly64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -51284,19 +45649,14 @@ pub fn vreinterpretq_s8_u32(a: uint32x4_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s8_u32(a: uint32x4_t) -> int8x16_t {
-    let a: uint32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
+pub fn vreinterpret_p64_u8(a: uint8x8_t) -> poly64x1_t {
     unsafe {
-        let ret_val: int8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
+        let a: uint8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        transmute(a)
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -51314,11 +45674,11 @@ pub fn vreinterpretq_s8_u32(a: uint32x4_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s16_u32(a: uint32x4_t) -> int16x8_t {
+pub fn vreinterpretq_p128_u8(a: uint8x16_t) -> p128 {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -51336,15 +45696,15 @@ pub fn vreinterpretq_s16_u32(a: uint32x4_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s16_u32(a: uint32x4_t) -> int16x8_t {
-    let a: uint32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
+pub fn vreinterpretq_p128_u8(a: uint8x16_t) -> p128 {
     unsafe {
-        let ret_val: int16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let a: uint8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        transmute(a)
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -51362,11 +45722,11 @@ pub fn vreinterpretq_s16_u32(a: uint32x4_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s32_u32(a: uint32x4_t) -> int32x4_t {
+pub fn vreinterpretq_f32_u8(a: uint8x16_t) -> float32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -51384,15 +45744,16 @@ pub fn vreinterpretq_s32_u32(a: uint32x4_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s32_u32(a: uint32x4_t) -> int32x4_t {
-    let a: uint32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
+pub fn vreinterpretq_f32_u8(a: uint8x16_t) -> float32x4_t {
     unsafe {
-        let ret_val: int32x4_t = transmute(a);
+        let a: uint8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: float32x4_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -51410,11 +45771,11 @@ pub fn vreinterpretq_s32_u32(a: uint32x4_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s64_u32(a: uint32x4_t) -> int64x2_t {
+pub fn vreinterpretq_s16_u8(a: uint8x16_t) -> int16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -51432,15 +45793,16 @@ pub fn vreinterpretq_s64_u32(a: uint32x4_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s64_u32(a: uint32x4_t) -> int64x2_t {
-    let a: uint32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
+pub fn vreinterpretq_s16_u8(a: uint8x16_t) -> int16x8_t {
     unsafe {
-        let ret_val: int64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let a: uint8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int16x8_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -51458,11 +45820,11 @@ pub fn vreinterpretq_s64_u32(a: uint32x4_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u8_u32(a: uint32x4_t) -> uint8x16_t {
+pub fn vreinterpretq_s32_u8(a: uint8x16_t) -> int32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -51480,19 +45842,16 @@ pub fn vreinterpretq_u8_u32(a: uint32x4_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u8_u32(a: uint32x4_t) -> uint8x16_t {
-    let a: uint32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
+pub fn vreinterpretq_s32_u8(a: uint8x16_t) -> int32x4_t {
     unsafe {
-        let ret_val: uint8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
+        let a: uint8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int32x4_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -51510,11 +45869,11 @@ pub fn vreinterpretq_u8_u32(a: uint32x4_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u16_u32(a: uint32x4_t) -> uint16x8_t {
+pub fn vreinterpretq_s64_u8(a: uint8x16_t) -> int64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -51532,15 +45891,16 @@ pub fn vreinterpretq_u16_u32(a: uint32x4_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u16_u32(a: uint32x4_t) -> uint16x8_t {
-    let a: uint32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
+pub fn vreinterpretq_s64_u8(a: uint8x16_t) -> int64x2_t {
     unsafe {
-        let ret_val: uint16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let a: uint8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int64x2_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_u8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -51558,11 +45918,11 @@ pub fn vreinterpretq_u16_u32(a: uint32x4_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u64_u32(a: uint32x4_t) -> uint64x2_t {
+pub fn vreinterpretq_u16_u8(a: uint8x16_t) -> uint16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_u8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -51580,15 +45940,16 @@ pub fn vreinterpretq_u64_u32(a: uint32x4_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u64_u32(a: uint32x4_t) -> uint64x2_t {
-    let a: uint32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
+pub fn vreinterpretq_u16_u8(a: uint8x16_t) -> uint16x8_t {
     unsafe {
-        let ret_val: uint64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let a: uint8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint16x8_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_u8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -51606,11 +45967,11 @@ pub fn vreinterpretq_u64_u32(a: uint32x4_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p8_u32(a: uint32x4_t) -> poly8x16_t {
+pub fn vreinterpretq_u32_u8(a: uint8x16_t) -> uint32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_u8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -51628,19 +45989,16 @@ pub fn vreinterpretq_p8_u32(a: uint32x4_t) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p8_u32(a: uint32x4_t) -> poly8x16_t {
-    let a: uint32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
+pub fn vreinterpretq_u32_u8(a: uint8x16_t) -> uint32x4_t {
     unsafe {
-        let ret_val: poly8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
+        let a: uint8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint32x4_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_u8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -51658,11 +46016,11 @@ pub fn vreinterpretq_p8_u32(a: uint32x4_t) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p16_u32(a: uint32x4_t) -> poly16x8_t {
+pub fn vreinterpretq_u64_u8(a: uint8x16_t) -> uint64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_u8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -51680,15 +46038,16 @@ pub fn vreinterpretq_p16_u32(a: uint32x4_t) -> poly16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p16_u32(a: uint32x4_t) -> poly16x8_t {
-    let a: uint32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
+pub fn vreinterpretq_u64_u8(a: uint8x16_t) -> uint64x2_t {
     unsafe {
-        let ret_val: poly16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let a: uint8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint64x2_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -51706,11 +46065,11 @@ pub fn vreinterpretq_p16_u32(a: uint32x4_t) -> poly16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_f32_u64(a: uint64x1_t) -> float32x2_t {
+pub fn vreinterpretq_p16_u8(a: uint8x16_t) -> poly16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -51728,14 +46087,16 @@ pub fn vreinterpret_f32_u64(a: uint64x1_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_f32_u64(a: uint64x1_t) -> float32x2_t {
+pub fn vreinterpretq_p16_u8(a: uint8x16_t) -> poly16x8_t {
     unsafe {
-        let ret_val: float32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let a: uint8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: poly16x8_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -51753,11 +46114,11 @@ pub fn vreinterpret_f32_u64(a: uint64x1_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s8_u64(a: uint64x1_t) -> int8x8_t {
+pub fn vreinterpretq_p64_u8(a: uint8x16_t) -> poly64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -51775,14 +46136,16 @@ pub fn vreinterpret_s8_u64(a: uint64x1_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s8_u64(a: uint64x1_t) -> int8x8_t {
+pub fn vreinterpretq_p64_u8(a: uint8x16_t) -> poly64x2_t {
     unsafe {
-        let ret_val: int8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let a: uint8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: poly64x2_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -51800,11 +46163,11 @@ pub fn vreinterpret_s8_u64(a: uint64x1_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s16_u64(a: uint64x1_t) -> int16x4_t {
+pub fn vreinterpret_f32_u16(a: uint16x4_t) -> float32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -51822,14 +46185,15 @@ pub fn vreinterpret_s16_u64(a: uint64x1_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s16_u64(a: uint64x1_t) -> int16x4_t {
+pub fn vreinterpret_f32_u16(a: uint16x4_t) -> float32x2_t {
     unsafe {
-        let ret_val: int16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: float32x2_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -51847,11 +46211,11 @@ pub fn vreinterpret_s16_u64(a: uint64x1_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s32_u64(a: uint64x1_t) -> int32x2_t {
+pub fn vreinterpret_s8_u16(a: uint16x4_t) -> int8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -51869,15 +46233,17 @@ pub fn vreinterpret_s32_u64(a: uint64x1_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s32_u64(a: uint64x1_t) -> int32x2_t {
+pub fn vreinterpret_s8_u16(a: uint16x4_t) -> int8x8_t {
     unsafe {
-        let ret_val: int32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: int8x8_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u16)"]
 #[inline]
+#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
@@ -51893,11 +46259,37 @@ pub fn vreinterpret_s32_u64(a: uint64x1_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s64_u64(a: uint64x1_t) -> int64x1_t {
+pub fn vreinterpret_s32_u16(a: uint16x4_t) -> int32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u16)"]
+#[inline]
+#[cfg(target_endian = "big")]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vreinterpret_s32_u16(a: uint16x4_t) -> int32x2_t {
+    unsafe {
+        let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: int32x2_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -51915,11 +46307,11 @@ pub fn vreinterpret_s64_u64(a: uint64x1_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u8_u64(a: uint64x1_t) -> uint8x8_t {
+pub fn vreinterpret_s64_u16(a: uint16x4_t) -> int64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -51937,14 +46329,14 @@ pub fn vreinterpret_u8_u64(a: uint64x1_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u8_u64(a: uint64x1_t) -> uint8x8_t {
+pub fn vreinterpret_s64_u16(a: uint16x4_t) -> int64x1_t {
     unsafe {
-        let ret_val: uint8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        transmute(a)
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_u16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -51962,11 +46354,11 @@ pub fn vreinterpret_u8_u64(a: uint64x1_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u16_u64(a: uint64x1_t) -> uint16x4_t {
+pub fn vreinterpret_u8_u16(a: uint16x4_t) -> uint8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_u16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -51984,14 +46376,15 @@ pub fn vreinterpret_u16_u64(a: uint64x1_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u16_u64(a: uint64x1_t) -> uint16x4_t {
+pub fn vreinterpret_u8_u16(a: uint16x4_t) -> uint8x8_t {
     unsafe {
-        let ret_val: uint16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: uint8x8_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_u16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -52009,11 +46402,11 @@ pub fn vreinterpret_u16_u64(a: uint64x1_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u32_u64(a: uint64x1_t) -> uint32x2_t {
+pub fn vreinterpret_u32_u16(a: uint16x4_t) -> uint32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_u16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -52031,14 +46424,15 @@ pub fn vreinterpret_u32_u64(a: uint64x1_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u32_u64(a: uint64x1_t) -> uint32x2_t {
+pub fn vreinterpret_u32_u16(a: uint16x4_t) -> uint32x2_t {
     unsafe {
+        let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
         let ret_val: uint32x2_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_u16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -52056,11 +46450,11 @@ pub fn vreinterpret_u32_u64(a: uint64x1_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p8_u64(a: uint64x1_t) -> poly8x8_t {
+pub fn vreinterpret_u64_u16(a: uint16x4_t) -> uint64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_u16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -52078,14 +46472,14 @@ pub fn vreinterpret_p8_u64(a: uint64x1_t) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p8_u64(a: uint64x1_t) -> poly8x8_t {
+pub fn vreinterpret_u64_u16(a: uint16x4_t) -> uint64x1_t {
     unsafe {
-        let ret_val: poly8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        transmute(a)
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -52103,11 +46497,11 @@ pub fn vreinterpret_p8_u64(a: uint64x1_t) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p16_u64(a: uint64x1_t) -> poly16x4_t {
+pub fn vreinterpret_p8_u16(a: uint16x4_t) -> poly8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -52125,14 +46519,15 @@ pub fn vreinterpret_p16_u64(a: uint64x1_t) -> poly16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p16_u64(a: uint64x1_t) -> poly16x4_t {
+pub fn vreinterpret_p8_u16(a: uint16x4_t) -> poly8x8_t {
     unsafe {
-        let ret_val: poly16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: poly8x8_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -52150,11 +46545,11 @@ pub fn vreinterpret_p16_u64(a: uint64x1_t) -> poly16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_f32_u64(a: uint64x2_t) -> float32x4_t {
+pub fn vreinterpret_p64_u16(a: uint16x4_t) -> poly64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -52172,15 +46567,14 @@ pub fn vreinterpretq_f32_u64(a: uint64x2_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_f32_u64(a: uint64x2_t) -> float32x4_t {
-    let a: uint64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
+pub fn vreinterpret_p64_u16(a: uint16x4_t) -> poly64x1_t {
     unsafe {
-        let ret_val: float32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let a: uint16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        transmute(a)
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -52198,11 +46592,11 @@ pub fn vreinterpretq_f32_u64(a: uint64x2_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s8_u64(a: uint64x2_t) -> int8x16_t {
+pub fn vreinterpretq_p128_u16(a: uint16x8_t) -> p128 {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -52220,19 +46614,14 @@ pub fn vreinterpretq_s8_u64(a: uint64x2_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s8_u64(a: uint64x2_t) -> int8x16_t {
-    let a: uint64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
+pub fn vreinterpretq_p128_u16(a: uint16x8_t) -> p128 {
     unsafe {
-        let ret_val: int8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
+        let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        transmute(a)
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -52250,11 +46639,11 @@ pub fn vreinterpretq_s8_u64(a: uint64x2_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s16_u64(a: uint64x2_t) -> int16x8_t {
+pub fn vreinterpretq_f32_u16(a: uint16x8_t) -> float32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -52272,15 +46661,15 @@ pub fn vreinterpretq_s16_u64(a: uint64x2_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s16_u64(a: uint64x2_t) -> int16x8_t {
-    let a: uint64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
+pub fn vreinterpretq_f32_u16(a: uint16x8_t) -> float32x4_t {
     unsafe {
-        let ret_val: int16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: float32x4_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -52298,11 +46687,11 @@ pub fn vreinterpretq_s16_u64(a: uint64x2_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s32_u64(a: uint64x2_t) -> int32x4_t {
+pub fn vreinterpretq_s8_u16(a: uint16x8_t) -> int8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -52320,15 +46709,19 @@ pub fn vreinterpretq_s32_u64(a: uint64x2_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s32_u64(a: uint64x2_t) -> int32x4_t {
-    let a: uint64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
+pub fn vreinterpretq_s8_u16(a: uint16x8_t) -> int8x16_t {
     unsafe {
-        let ret_val: int32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int8x16_t = transmute(a);
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -52346,11 +46739,11 @@ pub fn vreinterpretq_s32_u64(a: uint64x2_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s64_u64(a: uint64x2_t) -> int64x2_t {
+pub fn vreinterpretq_s32_u16(a: uint16x8_t) -> int32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -52368,15 +46761,15 @@ pub fn vreinterpretq_s64_u64(a: uint64x2_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s64_u64(a: uint64x2_t) -> int64x2_t {
-    let a: uint64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
+pub fn vreinterpretq_s32_u16(a: uint16x8_t) -> int32x4_t {
     unsafe {
-        let ret_val: int64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int32x4_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -52394,11 +46787,11 @@ pub fn vreinterpretq_s64_u64(a: uint64x2_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u8_u64(a: uint64x2_t) -> uint8x16_t {
+pub fn vreinterpretq_s64_u16(a: uint16x8_t) -> int64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -52416,19 +46809,15 @@ pub fn vreinterpretq_u8_u64(a: uint64x2_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u8_u64(a: uint64x2_t) -> uint8x16_t {
-    let a: uint64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
+pub fn vreinterpretq_s64_u16(a: uint16x8_t) -> int64x2_t {
     unsafe {
-        let ret_val: uint8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
+        let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int64x2_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_u16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -52446,11 +46835,11 @@ pub fn vreinterpretq_u8_u64(a: uint64x2_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u16_u64(a: uint64x2_t) -> uint16x8_t {
+pub fn vreinterpretq_u8_u16(a: uint16x8_t) -> uint8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_u16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -52468,15 +46857,19 @@ pub fn vreinterpretq_u16_u64(a: uint64x2_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u16_u64(a: uint64x2_t) -> uint16x8_t {
-    let a: uint64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
+pub fn vreinterpretq_u8_u16(a: uint16x8_t) -> uint8x16_t {
     unsafe {
-        let ret_val: uint16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint8x16_t = transmute(a);
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_u16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -52494,11 +46887,11 @@ pub fn vreinterpretq_u16_u64(a: uint64x2_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u32_u64(a: uint64x2_t) -> uint32x4_t {
+pub fn vreinterpretq_u32_u16(a: uint16x8_t) -> uint32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_u16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -52516,15 +46909,15 @@ pub fn vreinterpretq_u32_u64(a: uint64x2_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u32_u64(a: uint64x2_t) -> uint32x4_t {
-    let a: uint64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
+pub fn vreinterpretq_u32_u16(a: uint16x8_t) -> uint32x4_t {
     unsafe {
+        let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
         let ret_val: uint32x4_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_u16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -52542,11 +46935,11 @@ pub fn vreinterpretq_u32_u64(a: uint64x2_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p8_u64(a: uint64x2_t) -> poly8x16_t {
+pub fn vreinterpretq_u64_u16(a: uint16x8_t) -> uint64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_u16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -52564,19 +46957,15 @@ pub fn vreinterpretq_p8_u64(a: uint64x2_t) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p8_u64(a: uint64x2_t) -> poly8x16_t {
-    let a: uint64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
+pub fn vreinterpretq_u64_u16(a: uint16x8_t) -> uint64x2_t {
     unsafe {
-        let ret_val: poly8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
+        let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint64x2_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -52594,11 +46983,11 @@ pub fn vreinterpretq_p8_u64(a: uint64x2_t) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p16_u64(a: uint64x2_t) -> poly16x8_t {
+pub fn vreinterpretq_p8_u16(a: uint16x8_t) -> poly8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -52616,15 +47005,19 @@ pub fn vreinterpretq_p16_u64(a: uint64x2_t) -> poly16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p16_u64(a: uint64x2_t) -> poly16x8_t {
-    let a: uint64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
+pub fn vreinterpretq_p8_u16(a: uint16x8_t) -> poly8x16_t {
     unsafe {
-        let ret_val: poly16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: poly8x16_t = transmute(a);
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u16)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -52642,11 +47035,11 @@ pub fn vreinterpretq_p16_u64(a: uint64x2_t) -> poly16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_f32_p8(a: poly8x8_t) -> float32x2_t {
+pub fn vreinterpretq_p64_u16(a: uint16x8_t) -> poly64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u16)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -52664,15 +47057,15 @@ pub fn vreinterpret_f32_p8(a: poly8x8_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_f32_p8(a: poly8x8_t) -> float32x2_t {
-    let a: poly8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
+pub fn vreinterpretq_p64_u16(a: uint16x8_t) -> poly64x2_t {
     unsafe {
-        let ret_val: float32x2_t = transmute(a);
+        let a: uint16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: poly64x2_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -52690,11 +47083,11 @@ pub fn vreinterpret_f32_p8(a: poly8x8_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s8_p8(a: poly8x8_t) -> int8x8_t {
+pub fn vreinterpret_s8_u32(a: uint32x2_t) -> int8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -52712,15 +47105,15 @@ pub fn vreinterpret_s8_p8(a: poly8x8_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s8_p8(a: poly8x8_t) -> int8x8_t {
-    let a: poly8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
+pub fn vreinterpret_s8_u32(a: uint32x2_t) -> int8x8_t {
     unsafe {
+        let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]);
         let ret_val: int8x8_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -52738,11 +47131,11 @@ pub fn vreinterpret_s8_p8(a: poly8x8_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s16_p8(a: poly8x8_t) -> int16x4_t {
+pub fn vreinterpret_s16_u32(a: uint32x2_t) -> int16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -52760,15 +47153,15 @@ pub fn vreinterpret_s16_p8(a: poly8x8_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s16_p8(a: poly8x8_t) -> int16x4_t {
-    let a: poly8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
+pub fn vreinterpret_s16_u32(a: uint32x2_t) -> int16x4_t {
     unsafe {
+        let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]);
         let ret_val: int16x4_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -52786,11 +47179,11 @@ pub fn vreinterpret_s16_p8(a: poly8x8_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s32_p8(a: poly8x8_t) -> int32x2_t {
+pub fn vreinterpret_s64_u32(a: uint32x2_t) -> int64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_u32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -52808,15 +47201,14 @@ pub fn vreinterpret_s32_p8(a: poly8x8_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s32_p8(a: poly8x8_t) -> int32x2_t {
-    let a: poly8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
+pub fn vreinterpret_s64_u32(a: uint32x2_t) -> int64x1_t {
     unsafe {
-        let ret_val: int32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]);
+        transmute(a)
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_u32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -52834,11 +47226,11 @@ pub fn vreinterpret_s32_p8(a: poly8x8_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s64_p8(a: poly8x8_t) -> int64x1_t {
+pub fn vreinterpret_u8_u32(a: uint32x2_t) -> uint8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_u32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -52856,12 +47248,15 @@ pub fn vreinterpret_s64_p8(a: poly8x8_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s64_p8(a: poly8x8_t) -> int64x1_t {
-    let a: poly8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe { transmute(a) }
+pub fn vreinterpret_u8_u32(a: uint32x2_t) -> uint8x8_t {
+    unsafe {
+        let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let ret_val: uint8x8_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_u32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -52879,11 +47274,11 @@ pub fn vreinterpret_s64_p8(a: poly8x8_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u8_p8(a: poly8x8_t) -> uint8x8_t {
+pub fn vreinterpret_u16_u32(a: uint32x2_t) -> uint16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_u32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -52901,15 +47296,15 @@ pub fn vreinterpret_u8_p8(a: poly8x8_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u8_p8(a: poly8x8_t) -> uint8x8_t {
-    let a: poly8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
+pub fn vreinterpret_u16_u32(a: uint32x2_t) -> uint16x4_t {
     unsafe {
-        let ret_val: uint8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let ret_val: uint16x4_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_u32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -52927,11 +47322,11 @@ pub fn vreinterpret_u8_p8(a: poly8x8_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u16_p8(a: poly8x8_t) -> uint16x4_t {
+pub fn vreinterpret_u64_u32(a: uint32x2_t) -> uint64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_u32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -52949,15 +47344,14 @@ pub fn vreinterpret_u16_p8(a: poly8x8_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u16_p8(a: poly8x8_t) -> uint16x4_t {
-    let a: poly8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
+pub fn vreinterpret_u64_u32(a: uint32x2_t) -> uint64x1_t {
     unsafe {
-        let ret_val: uint16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]);
+        transmute(a)
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -52975,11 +47369,11 @@ pub fn vreinterpret_u16_p8(a: poly8x8_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u32_p8(a: poly8x8_t) -> uint32x2_t {
+pub fn vreinterpret_p8_u32(a: uint32x2_t) -> poly8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -52997,15 +47391,15 @@ pub fn vreinterpret_u32_p8(a: poly8x8_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u32_p8(a: poly8x8_t) -> uint32x2_t {
-    let a: poly8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
+pub fn vreinterpret_p8_u32(a: uint32x2_t) -> poly8x8_t {
     unsafe {
-        let ret_val: uint32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let ret_val: poly8x8_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -53023,11 +47417,11 @@ pub fn vreinterpret_u32_p8(a: poly8x8_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u64_p8(a: poly8x8_t) -> uint64x1_t {
+pub fn vreinterpret_p16_u32(a: uint32x2_t) -> poly16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -53045,12 +47439,15 @@ pub fn vreinterpret_u64_p8(a: poly8x8_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u64_p8(a: poly8x8_t) -> uint64x1_t {
-    let a: poly8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe { transmute(a) }
+pub fn vreinterpret_p16_u32(a: uint32x2_t) -> poly16x4_t {
+    unsafe {
+        let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]);
+        let ret_val: poly16x4_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -53068,11 +47465,11 @@ pub fn vreinterpret_u64_p8(a: poly8x8_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p16_p8(a: poly8x8_t) -> poly16x4_t {
+pub fn vreinterpret_p64_u32(a: uint32x2_t) -> poly64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -53090,15 +47487,14 @@ pub fn vreinterpret_p16_p8(a: poly8x8_t) -> poly16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p16_p8(a: poly8x8_t) -> poly16x4_t {
-    let a: poly8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
+pub fn vreinterpret_p64_u32(a: uint32x2_t) -> poly64x1_t {
     unsafe {
-        let ret_val: poly16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let a: uint32x2_t = simd_shuffle!(a, a, [1, 0]);
+        transmute(a)
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -53116,11 +47512,11 @@ pub fn vreinterpret_p16_p8(a: poly8x8_t) -> poly16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_f32_p8(a: poly8x16_t) -> float32x4_t {
+pub fn vreinterpretq_p128_u32(a: uint32x4_t) -> p128 {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -53138,16 +47534,14 @@ pub fn vreinterpretq_f32_p8(a: poly8x16_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_f32_p8(a: poly8x16_t) -> float32x4_t {
-    let a: poly8x16_t =
-        unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
+pub fn vreinterpretq_p128_u32(a: uint32x4_t) -> p128 {
     unsafe {
-        let ret_val: float32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        transmute(a)
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -53165,11 +47559,11 @@ pub fn vreinterpretq_f32_p8(a: poly8x16_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s8_p8(a: poly8x16_t) -> int8x16_t {
+pub fn vreinterpretq_s8_u32(a: uint32x4_t) -> int8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -53187,10 +47581,9 @@ pub fn vreinterpretq_s8_p8(a: poly8x16_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s8_p8(a: poly8x16_t) -> int8x16_t {
-    let a: poly8x16_t =
-        unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
+pub fn vreinterpretq_s8_u32(a: uint32x4_t) -> int8x16_t {
     unsafe {
+        let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
         let ret_val: int8x16_t = transmute(a);
         simd_shuffle!(
             ret_val,
@@ -53200,7 +47593,7 @@ pub fn vreinterpretq_s8_p8(a: poly8x16_t) -> int8x16_t {
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -53218,11 +47611,11 @@ pub fn vreinterpretq_s8_p8(a: poly8x16_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s16_p8(a: poly8x16_t) -> int16x8_t {
+pub fn vreinterpretq_s16_u32(a: uint32x4_t) -> int16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -53240,16 +47633,15 @@ pub fn vreinterpretq_s16_p8(a: poly8x16_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s16_p8(a: poly8x16_t) -> int16x8_t {
-    let a: poly8x16_t =
-        unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
+pub fn vreinterpretq_s16_u32(a: uint32x4_t) -> int16x8_t {
     unsafe {
+        let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
         let ret_val: int16x8_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -53267,11 +47659,11 @@ pub fn vreinterpretq_s16_p8(a: poly8x16_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s32_p8(a: poly8x16_t) -> int32x4_t {
+pub fn vreinterpretq_s64_u32(a: uint32x4_t) -> int64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_u32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -53289,16 +47681,15 @@ pub fn vreinterpretq_s32_p8(a: poly8x16_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s32_p8(a: poly8x16_t) -> int32x4_t {
-    let a: poly8x16_t =
-        unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
+pub fn vreinterpretq_s64_u32(a: uint32x4_t) -> int64x2_t {
     unsafe {
-        let ret_val: int32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: int64x2_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_u32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -53316,11 +47707,11 @@ pub fn vreinterpretq_s32_p8(a: poly8x16_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s64_p8(a: poly8x16_t) -> int64x2_t {
+pub fn vreinterpretq_u8_u32(a: uint32x4_t) -> uint8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_u32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -53338,16 +47729,19 @@ pub fn vreinterpretq_s64_p8(a: poly8x16_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s64_p8(a: poly8x16_t) -> int64x2_t {
-    let a: poly8x16_t =
-        unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
+pub fn vreinterpretq_u8_u32(a: uint32x4_t) -> uint8x16_t {
     unsafe {
-        let ret_val: int64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: uint8x16_t = transmute(a);
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_u32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -53365,11 +47759,11 @@ pub fn vreinterpretq_s64_p8(a: poly8x16_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u8_p8(a: poly8x16_t) -> uint8x16_t {
+pub fn vreinterpretq_u16_u32(a: uint32x4_t) -> uint16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_u32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -53387,20 +47781,15 @@ pub fn vreinterpretq_u8_p8(a: poly8x16_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u8_p8(a: poly8x16_t) -> uint8x16_t {
-    let a: poly8x16_t =
-        unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
+pub fn vreinterpretq_u16_u32(a: uint32x4_t) -> uint16x8_t {
     unsafe {
-        let ret_val: uint8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
+        let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: uint16x8_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_u32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -53418,11 +47807,11 @@ pub fn vreinterpretq_u8_p8(a: poly8x16_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u16_p8(a: poly8x16_t) -> uint16x8_t {
+pub fn vreinterpretq_u64_u32(a: uint32x4_t) -> uint64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_u32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -53440,16 +47829,15 @@ pub fn vreinterpretq_u16_p8(a: poly8x16_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u16_p8(a: poly8x16_t) -> uint16x8_t {
-    let a: poly8x16_t =
-        unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
+pub fn vreinterpretq_u64_u32(a: uint32x4_t) -> uint64x2_t {
     unsafe {
-        let ret_val: uint16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: uint64x2_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -53467,11 +47855,11 @@ pub fn vreinterpretq_u16_p8(a: poly8x16_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u32_p8(a: poly8x16_t) -> uint32x4_t {
+pub fn vreinterpretq_p8_u32(a: uint32x4_t) -> poly8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -53489,16 +47877,19 @@ pub fn vreinterpretq_u32_p8(a: poly8x16_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u32_p8(a: poly8x16_t) -> uint32x4_t {
-    let a: poly8x16_t =
-        unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
+pub fn vreinterpretq_p8_u32(a: uint32x4_t) -> poly8x16_t {
     unsafe {
-        let ret_val: uint32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: poly8x16_t = transmute(a);
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -53516,11 +47907,11 @@ pub fn vreinterpretq_u32_p8(a: poly8x16_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u64_p8(a: poly8x16_t) -> uint64x2_t {
+pub fn vreinterpretq_p16_u32(a: uint32x4_t) -> poly16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -53538,16 +47929,15 @@ pub fn vreinterpretq_u64_p8(a: poly8x16_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u64_p8(a: poly8x16_t) -> uint64x2_t {
-    let a: poly8x16_t =
-        unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
+pub fn vreinterpretq_p16_u32(a: uint32x4_t) -> poly16x8_t {
     unsafe {
-        let ret_val: uint64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: poly16x8_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u32)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -53565,11 +47955,11 @@ pub fn vreinterpretq_u64_p8(a: poly8x16_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p16_p8(a: poly8x16_t) -> poly16x8_t {
+pub fn vreinterpretq_p64_u32(a: uint32x4_t) -> poly64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u32)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -53587,16 +47977,15 @@ pub fn vreinterpretq_p16_p8(a: poly8x16_t) -> poly16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p16_p8(a: poly8x16_t) -> poly16x8_t {
-    let a: poly8x16_t =
-        unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
+pub fn vreinterpretq_p64_u32(a: uint32x4_t) -> poly64x2_t {
     unsafe {
-        let ret_val: poly16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let a: uint32x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: poly64x2_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -53614,11 +48003,11 @@ pub fn vreinterpretq_p16_p8(a: poly8x16_t) -> poly16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_f32_p16(a: poly16x4_t) -> float32x2_t {
+pub fn vreinterpret_f32_u64(a: uint64x1_t) -> float32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_u64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -53636,15 +48025,14 @@ pub fn vreinterpret_f32_p16(a: poly16x4_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_f32_p16(a: poly16x4_t) -> float32x2_t {
-    let a: poly16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
+pub fn vreinterpret_f32_u64(a: uint64x1_t) -> float32x2_t {
     unsafe {
         let ret_val: float32x2_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -53662,11 +48050,11 @@ pub fn vreinterpret_f32_p16(a: poly16x4_t) -> float32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s8_p16(a: poly16x4_t) -> int8x8_t {
+pub fn vreinterpret_s8_u64(a: uint64x1_t) -> int8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_u64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -53684,15 +48072,14 @@ pub fn vreinterpret_s8_p16(a: poly16x4_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s8_p16(a: poly16x4_t) -> int8x8_t {
-    let a: poly16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
+pub fn vreinterpret_s8_u64(a: uint64x1_t) -> int8x8_t {
     unsafe {
         let ret_val: int8x8_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -53710,11 +48097,11 @@ pub fn vreinterpret_s8_p16(a: poly16x4_t) -> int8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s16_p16(a: poly16x4_t) -> int16x4_t {
+pub fn vreinterpret_s16_u64(a: uint64x1_t) -> int16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_u64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -53732,15 +48119,14 @@ pub fn vreinterpret_s16_p16(a: poly16x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s16_p16(a: poly16x4_t) -> int16x4_t {
-    let a: poly16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
+pub fn vreinterpret_s16_u64(a: uint64x1_t) -> int16x4_t {
     unsafe {
         let ret_val: int16x4_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -53758,11 +48144,11 @@ pub fn vreinterpret_s16_p16(a: poly16x4_t) -> int16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s32_p16(a: poly16x4_t) -> int32x2_t {
+pub fn vreinterpret_s32_u64(a: uint64x1_t) -> int32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_u64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -53780,15 +48166,14 @@ pub fn vreinterpret_s32_p16(a: poly16x4_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s32_p16(a: poly16x4_t) -> int32x2_t {
-    let a: poly16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
+pub fn vreinterpret_s32_u64(a: uint64x1_t) -> int32x2_t {
     unsafe {
         let ret_val: int32x2_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_u64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -53806,11 +48191,11 @@ pub fn vreinterpret_s32_p16(a: poly16x4_t) -> int32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s64_p16(a: poly16x4_t) -> int64x1_t {
+pub fn vreinterpret_u8_u64(a: uint64x1_t) -> uint8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_u64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -53828,12 +48213,14 @@ pub fn vreinterpret_s64_p16(a: poly16x4_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_s64_p16(a: poly16x4_t) -> int64x1_t {
-    let a: poly16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe { transmute(a) }
+pub fn vreinterpret_u8_u64(a: uint64x1_t) -> uint8x8_t {
+    unsafe {
+        let ret_val: uint8x8_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_u64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -53851,11 +48238,11 @@ pub fn vreinterpret_s64_p16(a: poly16x4_t) -> int64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u8_p16(a: poly16x4_t) -> uint8x8_t {
+pub fn vreinterpret_u16_u64(a: uint64x1_t) -> uint16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_u64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -53873,15 +48260,14 @@ pub fn vreinterpret_u8_p16(a: poly16x4_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u8_p16(a: poly16x4_t) -> uint8x8_t {
-    let a: poly16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
+pub fn vreinterpret_u16_u64(a: uint64x1_t) -> uint16x4_t {
     unsafe {
-        let ret_val: uint8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let ret_val: uint16x4_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_u64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -53899,11 +48285,11 @@ pub fn vreinterpret_u8_p16(a: poly16x4_t) -> uint8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u16_p16(a: poly16x4_t) -> uint16x4_t {
+pub fn vreinterpret_u32_u64(a: uint64x1_t) -> uint32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_u64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -53921,15 +48307,14 @@ pub fn vreinterpret_u16_p16(a: poly16x4_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u16_p16(a: poly16x4_t) -> uint16x4_t {
-    let a: poly16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
+pub fn vreinterpret_u32_u64(a: uint64x1_t) -> uint32x2_t {
     unsafe {
-        let ret_val: uint16x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let ret_val: uint32x2_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -53947,11 +48332,11 @@ pub fn vreinterpret_u16_p16(a: poly16x4_t) -> uint16x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u32_p16(a: poly16x4_t) -> uint32x2_t {
+pub fn vreinterpret_p8_u64(a: uint64x1_t) -> poly8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_u64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -53969,15 +48354,14 @@ pub fn vreinterpret_u32_p16(a: poly16x4_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u32_p16(a: poly16x4_t) -> uint32x2_t {
-    let a: poly16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
+pub fn vreinterpret_p8_u64(a: uint64x1_t) -> poly8x8_t {
     unsafe {
-        let ret_val: uint32x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let ret_val: poly8x8_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -53995,11 +48379,11 @@ pub fn vreinterpret_u32_p16(a: poly16x4_t) -> uint32x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u64_p16(a: poly16x4_t) -> uint64x1_t {
+pub fn vreinterpret_p16_u64(a: uint64x1_t) -> poly16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_u64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -54017,12 +48401,14 @@ pub fn vreinterpret_u64_p16(a: poly16x4_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_u64_p16(a: poly16x4_t) -> uint64x1_t {
-    let a: poly16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe { transmute(a) }
+pub fn vreinterpret_p16_u64(a: uint64x1_t) -> poly16x4_t {
+    unsafe {
+        let ret_val: poly16x4_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -54040,11 +48426,11 @@ pub fn vreinterpret_u64_p16(a: poly16x4_t) -> uint64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p8_p16(a: poly16x4_t) -> poly8x8_t {
+pub fn vreinterpretq_p128_u64(a: uint64x2_t) -> p128 {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -54062,15 +48448,14 @@ pub fn vreinterpret_p8_p16(a: poly16x4_t) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p8_p16(a: poly16x4_t) -> poly8x8_t {
-    let a: poly16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
+pub fn vreinterpretq_p128_u64(a: uint64x2_t) -> p128 {
     unsafe {
-        let ret_val: poly8x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]);
+        transmute(a)
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -54088,11 +48473,11 @@ pub fn vreinterpret_p8_p16(a: poly16x4_t) -> poly8x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_f32_p16(a: poly16x8_t) -> float32x4_t {
+pub fn vreinterpretq_f32_u64(a: uint64x2_t) -> float32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_u64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -54110,15 +48495,15 @@ pub fn vreinterpretq_f32_p16(a: poly16x8_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_f32_p16(a: poly16x8_t) -> float32x4_t {
-    let a: poly16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
+pub fn vreinterpretq_f32_u64(a: uint64x2_t) -> float32x4_t {
     unsafe {
+        let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]);
         let ret_val: float32x4_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -54136,11 +48521,11 @@ pub fn vreinterpretq_f32_p16(a: poly16x8_t) -> float32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s8_p16(a: poly16x8_t) -> int8x16_t {
+pub fn vreinterpretq_s8_u64(a: uint64x2_t) -> int8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_u64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -54158,9 +48543,9 @@ pub fn vreinterpretq_s8_p16(a: poly16x8_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s8_p16(a: poly16x8_t) -> int8x16_t {
-    let a: poly16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
+pub fn vreinterpretq_s8_u64(a: uint64x2_t) -> int8x16_t {
     unsafe {
+        let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]);
         let ret_val: int8x16_t = transmute(a);
         simd_shuffle!(
             ret_val,
@@ -54170,7 +48555,7 @@ pub fn vreinterpretq_s8_p16(a: poly16x8_t) -> int8x16_t {
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -54188,11 +48573,11 @@ pub fn vreinterpretq_s8_p16(a: poly16x8_t) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s16_p16(a: poly16x8_t) -> int16x8_t {
+pub fn vreinterpretq_s16_u64(a: uint64x2_t) -> int16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_u64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -54210,15 +48595,15 @@ pub fn vreinterpretq_s16_p16(a: poly16x8_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s16_p16(a: poly16x8_t) -> int16x8_t {
-    let a: poly16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
+pub fn vreinterpretq_s16_u64(a: uint64x2_t) -> int16x8_t {
     unsafe {
+        let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]);
         let ret_val: int16x8_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -54236,11 +48621,11 @@ pub fn vreinterpretq_s16_p16(a: poly16x8_t) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s32_p16(a: poly16x8_t) -> int32x4_t {
+pub fn vreinterpretq_s32_u64(a: uint64x2_t) -> int32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_u64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -54258,15 +48643,15 @@ pub fn vreinterpretq_s32_p16(a: poly16x8_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s32_p16(a: poly16x8_t) -> int32x4_t {
-    let a: poly16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
+pub fn vreinterpretq_s32_u64(a: uint64x2_t) -> int32x4_t {
     unsafe {
+        let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]);
         let ret_val: int32x4_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_u64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -54284,11 +48669,11 @@ pub fn vreinterpretq_s32_p16(a: poly16x8_t) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s64_p16(a: poly16x8_t) -> int64x2_t {
+pub fn vreinterpretq_u8_u64(a: uint64x2_t) -> uint8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_u64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -54306,15 +48691,19 @@ pub fn vreinterpretq_s64_p16(a: poly16x8_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s64_p16(a: poly16x8_t) -> int64x2_t {
-    let a: poly16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
+pub fn vreinterpretq_u8_u64(a: uint64x2_t) -> uint8x16_t {
     unsafe {
-        let ret_val: int64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let ret_val: uint8x16_t = transmute(a);
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_u64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -54332,11 +48721,11 @@ pub fn vreinterpretq_s64_p16(a: poly16x8_t) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u8_p16(a: poly16x8_t) -> uint8x16_t {
+pub fn vreinterpretq_u16_u64(a: uint64x2_t) -> uint16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_u64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -54354,19 +48743,15 @@ pub fn vreinterpretq_u8_p16(a: poly16x8_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u8_p16(a: poly16x8_t) -> uint8x16_t {
-    let a: poly16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
+pub fn vreinterpretq_u16_u64(a: uint64x2_t) -> uint16x8_t {
     unsafe {
-        let ret_val: uint8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
+        let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let ret_val: uint16x8_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_u64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -54384,11 +48769,11 @@ pub fn vreinterpretq_u8_p16(a: poly16x8_t) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u16_p16(a: poly16x8_t) -> uint16x8_t {
+pub fn vreinterpretq_u32_u64(a: uint64x2_t) -> uint32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_u64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -54406,15 +48791,15 @@ pub fn vreinterpretq_u16_p16(a: poly16x8_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u16_p16(a: poly16x8_t) -> uint16x8_t {
-    let a: poly16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
+pub fn vreinterpretq_u32_u64(a: uint64x2_t) -> uint32x4_t {
     unsafe {
-        let ret_val: uint16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let ret_val: uint32x4_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -54432,11 +48817,11 @@ pub fn vreinterpretq_u16_p16(a: poly16x8_t) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u32_p16(a: poly16x8_t) -> uint32x4_t {
+pub fn vreinterpretq_p8_u64(a: uint64x2_t) -> poly8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_u64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -54454,15 +48839,19 @@ pub fn vreinterpretq_u32_p16(a: poly16x8_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u32_p16(a: poly16x8_t) -> uint32x4_t {
-    let a: poly16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
+pub fn vreinterpretq_p8_u64(a: uint64x2_t) -> poly8x16_t {
     unsafe {
-        let ret_val: uint32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let ret_val: poly8x16_t = transmute(a);
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u64)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -54480,11 +48869,11 @@ pub fn vreinterpretq_u32_p16(a: poly16x8_t) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u64_p16(a: poly16x8_t) -> uint64x2_t {
+pub fn vreinterpretq_p16_u64(a: uint64x2_t) -> poly16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_u64)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -54502,15 +48891,15 @@ pub fn vreinterpretq_u64_p16(a: poly16x8_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u64_p16(a: poly16x8_t) -> uint64x2_t {
-    let a: poly16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
+pub fn vreinterpretq_p16_u64(a: uint64x2_t) -> poly16x8_t {
     unsafe {
-        let ret_val: uint64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let a: uint64x2_t = simd_shuffle!(a, a, [1, 0]);
+        let ret_val: poly16x8_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_p8)"]
 #[inline]
 #[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
@@ -54528,11 +48917,11 @@ pub fn vreinterpretq_u64_p16(a: poly16x8_t) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p8_p16(a: poly16x8_t) -> poly8x16_t {
+pub fn vreinterpret_f32_p8(a: poly8x8_t) -> float32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_p8)"]
 #[inline]
 #[cfg(target_endian = "big")]
 #[target_feature(enable = "neon")]
@@ -54550,23 +48939,19 @@ pub fn vreinterpretq_p8_p16(a: poly16x8_t) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p8_p16(a: poly16x8_t) -> poly8x16_t {
-    let a: poly16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
+pub fn vreinterpret_f32_p8(a: poly8x8_t) -> float32x2_t {
     unsafe {
-        let ret_val: poly8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
+        let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: float32x2_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p128)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_p8)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -54580,15 +48965,15 @@ pub fn vreinterpretq_p8_p16(a: poly16x8_t) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s8_p128(a: p128) -> int8x16_t {
+pub fn vreinterpret_s16_p8(a: poly8x8_t) -> int16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p128)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_p8)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -54602,22 +48987,19 @@ pub fn vreinterpretq_s8_p128(a: p128) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s8_p128(a: p128) -> int8x16_t {
+pub fn vreinterpret_s16_p8(a: poly8x8_t) -> int16x4_t {
     unsafe {
-        let ret_val: int8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
+        let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int16x4_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p128)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_p8)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -54631,15 +49013,15 @@ pub fn vreinterpretq_s8_p128(a: p128) -> int8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s16_p128(a: p128) -> int16x8_t {
+pub fn vreinterpret_s32_p8(a: poly8x8_t) -> int32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p128)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_p8)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -54653,18 +49035,19 @@ pub fn vreinterpretq_s16_p128(a: p128) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s16_p128(a: p128) -> int16x8_t {
+pub fn vreinterpret_s32_p8(a: poly8x8_t) -> int32x2_t {
     unsafe {
-        let ret_val: int16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int32x2_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p128)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_p8)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -54678,15 +49061,15 @@ pub fn vreinterpretq_s16_p128(a: p128) -> int16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s32_p128(a: p128) -> int32x4_t {
+pub fn vreinterpret_s64_p8(a: poly8x8_t) -> int64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p128)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_p8)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -54700,18 +49083,18 @@ pub fn vreinterpretq_s32_p128(a: p128) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s32_p128(a: p128) -> int32x4_t {
+pub fn vreinterpret_s64_p8(a: poly8x8_t) -> int64x1_t {
     unsafe {
-        let ret_val: int32x4_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+        let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        transmute(a)
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p128)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_p8)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -54725,15 +49108,15 @@ pub fn vreinterpretq_s32_p128(a: p128) -> int32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s64_p128(a: p128) -> int64x2_t {
+pub fn vreinterpret_u16_p8(a: poly8x8_t) -> uint16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p128)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_p8)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -54747,18 +49130,19 @@ pub fn vreinterpretq_s64_p128(a: p128) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_s64_p128(a: p128) -> int64x2_t {
+pub fn vreinterpret_u16_p8(a: poly8x8_t) -> uint16x4_t {
     unsafe {
-        let ret_val: int64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint16x4_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p128)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_p8)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -54772,15 +49156,15 @@ pub fn vreinterpretq_s64_p128(a: p128) -> int64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u8_p128(a: p128) -> uint8x16_t {
+pub fn vreinterpret_u32_p8(a: poly8x8_t) -> uint32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p128)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_p8)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -54794,22 +49178,19 @@ pub fn vreinterpretq_u8_p128(a: p128) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u8_p128(a: p128) -> uint8x16_t {
+pub fn vreinterpret_u32_p8(a: poly8x8_t) -> uint32x2_t {
     unsafe {
-        let ret_val: uint8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
+        let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint32x2_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p128)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_p8)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -54823,15 +49204,15 @@ pub fn vreinterpretq_u8_p128(a: p128) -> uint8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u16_p128(a: p128) -> uint16x8_t {
+pub fn vreinterpret_u64_p8(a: poly8x8_t) -> uint64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p128)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_p8)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -54845,18 +49226,18 @@ pub fn vreinterpretq_u16_p128(a: p128) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u16_p128(a: p128) -> uint16x8_t {
+pub fn vreinterpret_u64_p8(a: poly8x8_t) -> uint64x1_t {
     unsafe {
-        let ret_val: uint16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        transmute(a)
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p128)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_p8)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -54870,15 +49251,15 @@ pub fn vreinterpretq_u16_p128(a: p128) -> uint16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u32_p128(a: p128) -> uint32x4_t {
+pub fn vreinterpret_p16_p8(a: poly8x8_t) -> poly16x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p128)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_p8)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -54892,18 +49273,19 @@ pub fn vreinterpretq_u32_p128(a: p128) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u32_p128(a: p128) -> uint32x4_t {
+pub fn vreinterpret_p16_p8(a: poly8x8_t) -> poly16x4_t {
     unsafe {
-        let ret_val: uint32x4_t = transmute(a);
+        let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: poly16x4_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p128)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_p8)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -54917,15 +49299,15 @@ pub fn vreinterpretq_u32_p128(a: p128) -> uint32x4_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u64_p128(a: p128) -> uint64x2_t {
+pub fn vreinterpret_p64_p8(a: poly8x8_t) -> poly64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p128)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_p8)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -54939,18 +49321,18 @@ pub fn vreinterpretq_u64_p128(a: p128) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_u64_p128(a: p128) -> uint64x2_t {
+pub fn vreinterpret_p64_p8(a: poly8x8_t) -> poly64x1_t {
     unsafe {
-        let ret_val: uint64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let a: poly8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        transmute(a)
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_p128)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_p8)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -54964,15 +49346,15 @@ pub fn vreinterpretq_u64_p128(a: p128) -> uint64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p8_p128(a: p128) -> poly8x16_t {
+pub fn vreinterpretq_p128_p8(a: poly8x16_t) -> p128 {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_p128)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_p8)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -54986,22 +49368,19 @@ pub fn vreinterpretq_p8_p128(a: p128) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p8_p128(a: p128) -> poly8x16_t {
+pub fn vreinterpretq_p128_p8(a: poly8x16_t) -> p128 {
     unsafe {
-        let ret_val: poly8x16_t = transmute(a);
-        simd_shuffle!(
-            ret_val,
-            ret_val,
-            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-        )
+        let a: poly8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        transmute(a)
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_p128)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p8)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -55015,15 +49394,15 @@ pub fn vreinterpretq_p8_p128(a: p128) -> poly8x16_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p16_p128(a: p128) -> poly16x8_t {
+pub fn vreinterpretq_f32_p8(a: poly8x16_t) -> float32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_p128)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p8)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -55037,18 +49416,20 @@ pub fn vreinterpretq_p16_p128(a: p128) -> poly16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p16_p128(a: p128) -> poly16x8_t {
+pub fn vreinterpretq_f32_p8(a: poly8x16_t) -> float32x4_t {
     unsafe {
-        let ret_val: poly16x8_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+        let a: poly8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: float32x4_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_p128)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p8)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -55062,15 +49443,15 @@ pub fn vreinterpretq_p16_p128(a: p128) -> poly16x8_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p64_p128(a: p128) -> poly64x2_t {
+pub fn vreinterpretq_s16_p8(a: poly8x16_t) -> int16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_p128)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p8)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -55084,18 +49465,20 @@ pub fn vreinterpretq_p64_p128(a: p128) -> poly64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p64_p128(a: p128) -> poly64x2_t {
+pub fn vreinterpretq_s16_p8(a: poly8x16_t) -> int16x8_t {
     unsafe {
-        let ret_val: poly64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let a: poly8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int16x8_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p8)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -55109,15 +49492,15 @@ pub fn vreinterpretq_p64_p128(a: p128) -> poly64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p64_s8(a: int8x8_t) -> poly64x1_t {
+pub fn vreinterpretq_s32_p8(a: poly8x16_t) -> int32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p8)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -55131,16 +49514,20 @@ pub fn vreinterpret_p64_s8(a: int8x8_t) -> poly64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p64_s8(a: int8x8_t) -> poly64x1_t {
-    let a: int8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe { transmute(a) }
+pub fn vreinterpretq_s32_p8(a: poly8x16_t) -> int32x4_t {
+    unsafe {
+        let a: poly8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int32x4_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p8)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -55154,15 +49541,15 @@ pub fn vreinterpret_p64_s8(a: int8x8_t) -> poly64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p128_s8(a: int8x16_t) -> p128 {
+pub fn vreinterpretq_s64_p8(a: poly8x16_t) -> int64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p8)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -55176,17 +49563,20 @@ pub fn vreinterpretq_p128_s8(a: int8x16_t) -> p128 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p128_s8(a: int8x16_t) -> p128 {
-    let a: int8x16_t =
-        unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe { transmute(a) }
+pub fn vreinterpretq_s64_p8(a: poly8x16_t) -> int64x2_t {
+    unsafe {
+        let a: poly8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int64x2_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p8)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -55200,15 +49590,15 @@ pub fn vreinterpretq_p128_s8(a: int8x16_t) -> p128 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p64_s8(a: int8x16_t) -> poly64x2_t {
+pub fn vreinterpretq_u16_p8(a: poly8x16_t) -> uint16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p8)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -55222,20 +49612,20 @@ pub fn vreinterpretq_p64_s8(a: int8x16_t) -> poly64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p64_s8(a: int8x16_t) -> poly64x2_t {
-    let a: int8x16_t =
-        unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
+pub fn vreinterpretq_u16_p8(a: poly8x16_t) -> uint16x8_t {
     unsafe {
-        let ret_val: poly64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let a: poly8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint16x8_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p8)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -55249,15 +49639,15 @@ pub fn vreinterpretq_p64_s8(a: int8x16_t) -> poly64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p64_s16(a: int16x4_t) -> poly64x1_t {
+pub fn vreinterpretq_u32_p8(a: poly8x16_t) -> uint32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p8)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -55271,16 +49661,20 @@ pub fn vreinterpret_p64_s16(a: int16x4_t) -> poly64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p64_s16(a: int16x4_t) -> poly64x1_t {
-    let a: int16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe { transmute(a) }
+pub fn vreinterpretq_u32_p8(a: poly8x16_t) -> uint32x4_t {
+    unsafe {
+        let a: poly8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint32x4_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p8)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -55294,15 +49688,15 @@ pub fn vreinterpret_p64_s16(a: int16x4_t) -> poly64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p128_s16(a: int16x8_t) -> p128 {
+pub fn vreinterpretq_u64_p8(a: poly8x16_t) -> uint64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p8)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -55316,16 +49710,20 @@ pub fn vreinterpretq_p128_s16(a: int16x8_t) -> p128 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p128_s16(a: int16x8_t) -> p128 {
-    let a: int16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe { transmute(a) }
+pub fn vreinterpretq_u64_p8(a: poly8x16_t) -> uint64x2_t {
+    unsafe {
+        let a: poly8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint64x2_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_p8)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -55339,15 +49737,15 @@ pub fn vreinterpretq_p128_s16(a: int16x8_t) -> p128 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p64_s16(a: int16x8_t) -> poly64x2_t {
+pub fn vreinterpretq_p16_p8(a: poly8x16_t) -> poly16x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_p8)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -55361,19 +49759,20 @@ pub fn vreinterpretq_p64_s16(a: int16x8_t) -> poly64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p64_s16(a: int16x8_t) -> poly64x2_t {
-    let a: int16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
+pub fn vreinterpretq_p16_p8(a: poly8x16_t) -> poly16x8_t {
     unsafe {
-        let ret_val: poly64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let a: poly8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: poly16x8_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_p8)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -55387,15 +49786,15 @@ pub fn vreinterpretq_p64_s16(a: int16x8_t) -> poly64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p64_s32(a: int32x2_t) -> poly64x1_t {
+pub fn vreinterpretq_p64_p8(a: poly8x16_t) -> poly64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_p8)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -55409,16 +49808,20 @@ pub fn vreinterpret_p64_s32(a: int32x2_t) -> poly64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p64_s32(a: int32x2_t) -> poly64x1_t {
-    let a: int32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe { transmute(a) }
+pub fn vreinterpretq_p64_p8(a: poly8x16_t) -> poly64x2_t {
+    unsafe {
+        let a: poly8x16_t =
+            simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: poly64x2_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_p16)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -55432,15 +49835,15 @@ pub fn vreinterpret_p64_s32(a: int32x2_t) -> poly64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p128_s32(a: int32x4_t) -> p128 {
+pub fn vreinterpret_f32_p16(a: poly16x4_t) -> float32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_f32_p16)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -55454,16 +49857,19 @@ pub fn vreinterpretq_p128_s32(a: int32x4_t) -> p128 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p128_s32(a: int32x4_t) -> p128 {
-    let a: int32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe { transmute(a) }
+pub fn vreinterpret_f32_p16(a: poly16x4_t) -> float32x2_t {
+    unsafe {
+        let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: float32x2_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_p16)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -55477,15 +49883,15 @@ pub fn vreinterpretq_p128_s32(a: int32x4_t) -> p128 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p64_s32(a: int32x4_t) -> poly64x2_t {
+pub fn vreinterpret_s8_p16(a: poly16x4_t) -> int8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_s32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_p16)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -55499,19 +49905,19 @@ pub fn vreinterpretq_p64_s32(a: int32x4_t) -> poly64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p64_s32(a: int32x4_t) -> poly64x2_t {
-    let a: int32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
+pub fn vreinterpret_s8_p16(a: poly16x4_t) -> int8x8_t {
     unsafe {
-        let ret_val: poly64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: int8x8_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_p16)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -55525,15 +49931,15 @@ pub fn vreinterpretq_p64_s32(a: int32x4_t) -> poly64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p128_s64(a: int64x2_t) -> p128 {
+pub fn vreinterpret_s32_p16(a: poly16x4_t) -> int32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_s64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_p16)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -55547,16 +49953,19 @@ pub fn vreinterpretq_p128_s64(a: int64x2_t) -> p128 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p128_s64(a: int64x2_t) -> p128 {
-    let a: int64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe { transmute(a) }
+pub fn vreinterpret_s32_p16(a: poly16x4_t) -> int32x2_t {
+    unsafe {
+        let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: int32x2_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_p16)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -55570,15 +49979,15 @@ pub fn vreinterpretq_p128_s64(a: int64x2_t) -> p128 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p64_u8(a: uint8x8_t) -> poly64x1_t {
+pub fn vreinterpret_s64_p16(a: poly16x4_t) -> int64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s64_p16)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -55592,16 +50001,18 @@ pub fn vreinterpret_p64_u8(a: uint8x8_t) -> poly64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p64_u8(a: uint8x8_t) -> poly64x1_t {
-    let a: uint8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe { transmute(a) }
+pub fn vreinterpret_s64_p16(a: poly16x4_t) -> int64x1_t {
+    unsafe {
+        let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        transmute(a)
+    }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_p16)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -55615,15 +50026,15 @@ pub fn vreinterpret_p64_u8(a: uint8x8_t) -> poly64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p128_u8(a: uint8x16_t) -> p128 {
+pub fn vreinterpret_u8_p16(a: poly16x4_t) -> uint8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_p16)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -55637,17 +50048,19 @@ pub fn vreinterpretq_p128_u8(a: uint8x16_t) -> p128 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p128_u8(a: uint8x16_t) -> p128 {
-    let a: uint8x16_t =
-        unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe { transmute(a) }
+pub fn vreinterpret_u8_p16(a: poly16x4_t) -> uint8x8_t {
+    unsafe {
+        let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: uint8x8_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_p16)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -55661,15 +50074,15 @@ pub fn vreinterpretq_p128_u8(a: uint8x16_t) -> p128 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p64_u8(a: uint8x16_t) -> poly64x2_t {
+pub fn vreinterpret_u32_p16(a: poly16x4_t) -> uint32x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_p16)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -55683,20 +50096,19 @@ pub fn vreinterpretq_p64_u8(a: uint8x16_t) -> poly64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p64_u8(a: uint8x16_t) -> poly64x2_t {
-    let a: uint8x16_t =
-        unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
+pub fn vreinterpret_u32_p16(a: poly16x4_t) -> uint32x2_t {
     unsafe {
-        let ret_val: poly64x2_t = transmute(a);
+        let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: uint32x2_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_p16)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -55710,15 +50122,15 @@ pub fn vreinterpretq_p64_u8(a: uint8x16_t) -> poly64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p64_u16(a: uint16x4_t) -> poly64x1_t {
+pub fn vreinterpret_u64_p16(a: poly16x4_t) -> uint64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u64_p16)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -55732,16 +50144,18 @@ pub fn vreinterpret_p64_u16(a: uint16x4_t) -> poly64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p64_u16(a: uint16x4_t) -> poly64x1_t {
-    let a: uint16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe { transmute(a) }
+pub fn vreinterpret_u64_p16(a: poly16x4_t) -> uint64x1_t {
+    unsafe {
+        let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        transmute(a)
+    }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_p16)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -55755,15 +50169,15 @@ pub fn vreinterpret_p64_u16(a: uint16x4_t) -> poly64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p128_u16(a: uint16x8_t) -> p128 {
+pub fn vreinterpret_p8_p16(a: poly16x4_t) -> poly8x8_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_p16)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -55777,16 +50191,19 @@ pub fn vreinterpretq_p128_u16(a: uint16x8_t) -> p128 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p128_u16(a: uint16x8_t) -> p128 {
-    let a: uint16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe { transmute(a) }
+pub fn vreinterpret_p8_p16(a: poly16x4_t) -> poly8x8_t {
+    unsafe {
+        let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        let ret_val: poly8x8_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
+    }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_p16)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -55800,15 +50217,15 @@ pub fn vreinterpretq_p128_u16(a: uint16x8_t) -> p128 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p64_u16(a: uint16x8_t) -> poly64x2_t {
+pub fn vreinterpret_p64_p16(a: poly16x4_t) -> poly64x1_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_p16)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -55822,19 +50239,18 @@ pub fn vreinterpretq_p64_u16(a: uint16x8_t) -> poly64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p64_u16(a: uint16x8_t) -> poly64x2_t {
-    let a: uint16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
+pub fn vreinterpret_p64_p16(a: poly16x4_t) -> poly64x1_t {
     unsafe {
-        let ret_val: poly64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let a: poly16x4_t = simd_shuffle!(a, a, [3, 2, 1, 0]);
+        transmute(a)
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_p16)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -55848,15 +50264,15 @@ pub fn vreinterpretq_p64_u16(a: uint16x8_t) -> poly64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p64_u32(a: uint32x2_t) -> poly64x1_t {
+pub fn vreinterpretq_p128_p16(a: poly16x8_t) -> p128 {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_p16)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -55870,16 +50286,18 @@ pub fn vreinterpret_p64_u32(a: uint32x2_t) -> poly64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p64_u32(a: uint32x2_t) -> poly64x1_t {
-    let a: uint32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe { transmute(a) }
+pub fn vreinterpretq_p128_p16(a: poly16x8_t) -> p128 {
+    unsafe {
+        let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        transmute(a)
+    }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p16)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -55893,15 +50311,15 @@ pub fn vreinterpret_p64_u32(a: uint32x2_t) -> poly64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p128_u32(a: uint32x4_t) -> p128 {
+pub fn vreinterpretq_f32_p16(a: poly16x8_t) -> float32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_f32_p16)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -55915,16 +50333,19 @@ pub fn vreinterpretq_p128_u32(a: uint32x4_t) -> p128 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p128_u32(a: uint32x4_t) -> p128 {
-    let a: uint32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe { transmute(a) }
+pub fn vreinterpretq_f32_p16(a: poly16x8_t) -> float32x4_t {
+    unsafe {
+        let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: float32x4_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p16)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -55938,15 +50359,15 @@ pub fn vreinterpretq_p128_u32(a: uint32x4_t) -> p128 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p64_u32(a: uint32x4_t) -> poly64x2_t {
+pub fn vreinterpretq_s8_p16(a: poly16x8_t) -> int8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_u32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p16)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -55960,19 +50381,23 @@ pub fn vreinterpretq_p64_u32(a: uint32x4_t) -> poly64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p64_u32(a: uint32x4_t) -> poly64x2_t {
-    let a: uint32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
+pub fn vreinterpretq_s8_p16(a: poly16x8_t) -> int8x16_t {
     unsafe {
-        let ret_val: poly64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int8x16_t = transmute(a);
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p16)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -55986,15 +50411,15 @@ pub fn vreinterpretq_p64_u32(a: uint32x4_t) -> poly64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p128_u64(a: uint64x2_t) -> p128 {
+pub fn vreinterpretq_s32_p16(a: poly16x8_t) -> int32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_u64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p16)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -56008,16 +50433,19 @@ pub fn vreinterpretq_p128_u64(a: uint64x2_t) -> p128 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p128_u64(a: uint64x2_t) -> p128 {
-    let a: uint64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe { transmute(a) }
+pub fn vreinterpretq_s32_p16(a: poly16x8_t) -> int32x4_t {
+    unsafe {
+        let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int32x4_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
+    }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p16)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -56031,15 +50459,15 @@ pub fn vreinterpretq_p128_u64(a: uint64x2_t) -> p128 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p64_p8(a: poly8x8_t) -> poly64x1_t {
+pub fn vreinterpretq_s64_p16(a: poly16x8_t) -> int64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p16)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -56053,16 +50481,19 @@ pub fn vreinterpret_p64_p8(a: poly8x8_t) -> poly64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p64_p8(a: poly8x8_t) -> poly64x1_t {
-    let a: poly8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe { transmute(a) }
+pub fn vreinterpretq_s64_p16(a: poly16x8_t) -> int64x2_t {
+    unsafe {
+        let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: int64x2_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p16)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -56076,15 +50507,15 @@ pub fn vreinterpret_p64_p8(a: poly8x8_t) -> poly64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p128_p8(a: poly8x16_t) -> p128 {
+pub fn vreinterpretq_u8_p16(a: poly16x8_t) -> uint8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p16)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -56098,17 +50529,23 @@ pub fn vreinterpretq_p128_p8(a: poly8x16_t) -> p128 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p128_p8(a: poly8x16_t) -> p128 {
-    let a: poly8x16_t =
-        unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe { transmute(a) }
+pub fn vreinterpretq_u8_p16(a: poly16x8_t) -> uint8x16_t {
+    unsafe {
+        let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint8x16_t = transmute(a);
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
+    }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p16)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -56122,15 +50559,15 @@ pub fn vreinterpretq_p128_p8(a: poly8x16_t) -> p128 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p64_p8(a: poly8x16_t) -> poly64x2_t {
+pub fn vreinterpretq_u32_p16(a: poly16x8_t) -> uint32x4_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_p8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p16)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -56144,20 +50581,19 @@ pub fn vreinterpretq_p64_p8(a: poly8x16_t) -> poly64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p64_p8(a: poly8x16_t) -> poly64x2_t {
-    let a: poly8x16_t =
-        unsafe { simd_shuffle!(a, a, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) };
+pub fn vreinterpretq_u32_p16(a: poly16x8_t) -> uint32x4_t {
     unsafe {
-        let ret_val: poly64x2_t = transmute(a);
-        simd_shuffle!(ret_val, ret_val, [1, 0])
+        let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint32x4_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p16)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -56171,15 +50607,15 @@ pub fn vreinterpretq_p64_p8(a: poly8x16_t) -> poly64x2_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p64_p16(a: poly16x4_t) -> poly64x1_t {
+pub fn vreinterpretq_u64_p16(a: poly16x8_t) -> uint64x2_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p64_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p16)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -56193,16 +50629,19 @@ pub fn vreinterpret_p64_p16(a: poly16x4_t) -> poly64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpret_p64_p16(a: poly16x4_t) -> poly64x1_t {
-    let a: poly16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    unsafe { transmute(a) }
+pub fn vreinterpretq_u64_p16(a: poly16x8_t) -> uint64x2_t {
+    unsafe {
+        let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: uint64x2_t = transmute(a);
+        simd_shuffle!(ret_val, ret_val, [1, 0])
+    }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_p16)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -56216,15 +50655,15 @@ pub fn vreinterpret_p64_p16(a: poly16x4_t) -> poly64x1_t {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p128_p16(a: poly16x8_t) -> p128 {
+pub fn vreinterpretq_p8_p16(a: poly16x8_t) -> poly8x16_t {
     unsafe { transmute(a) }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_p16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_p16)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -56238,16 +50677,23 @@ pub fn vreinterpretq_p128_p16(a: poly16x8_t) -> p128 {
     target_arch = "arm",
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
-pub fn vreinterpretq_p128_p16(a: poly16x8_t) -> p128 {
-    let a: poly16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe { transmute(a) }
+pub fn vreinterpretq_p8_p16(a: poly16x8_t) -> poly8x16_t {
+    unsafe {
+        let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let ret_val: poly8x16_t = transmute(a);
+        simd_shuffle!(
+            ret_val,
+            ret_val,
+            [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
+        )
+    }
 }
 #[doc = "Vector reinterpret cast operation"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_p16)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -56268,8 +50714,8 @@ pub fn vreinterpretq_p64_p16(a: poly16x8_t) -> poly64x2_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_p16)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -56284,8 +50730,8 @@ pub fn vreinterpretq_p64_p16(a: poly16x8_t) -> poly64x2_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub fn vreinterpretq_p64_p16(a: poly16x8_t) -> poly64x2_t {
-    let a: poly16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
     unsafe {
+        let a: poly16x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
         let ret_val: poly64x2_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [1, 0])
     }
@@ -56294,8 +50740,8 @@ pub fn vreinterpretq_p64_p16(a: poly16x8_t) -> poly64x2_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_p64)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -56316,8 +50762,8 @@ pub fn vreinterpret_s8_p64(a: poly64x1_t) -> int8x8_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s8_p64)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -56341,8 +50787,8 @@ pub fn vreinterpret_s8_p64(a: poly64x1_t) -> int8x8_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_p64)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -56363,8 +50809,8 @@ pub fn vreinterpret_s16_p64(a: poly64x1_t) -> int16x4_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s16_p64)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -56388,8 +50834,8 @@ pub fn vreinterpret_s16_p64(a: poly64x1_t) -> int16x4_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_p64)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -56410,8 +50856,8 @@ pub fn vreinterpret_s32_p64(a: poly64x1_t) -> int32x2_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_s32_p64)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -56435,8 +50881,8 @@ pub fn vreinterpret_s32_p64(a: poly64x1_t) -> int32x2_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_p64)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -56457,8 +50903,8 @@ pub fn vreinterpret_u8_p64(a: poly64x1_t) -> uint8x8_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u8_p64)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -56482,8 +50928,8 @@ pub fn vreinterpret_u8_p64(a: poly64x1_t) -> uint8x8_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_p64)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -56504,8 +50950,8 @@ pub fn vreinterpret_u16_p64(a: poly64x1_t) -> uint16x4_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u16_p64)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -56529,8 +50975,8 @@ pub fn vreinterpret_u16_p64(a: poly64x1_t) -> uint16x4_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_p64)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -56551,8 +50997,8 @@ pub fn vreinterpret_u32_p64(a: poly64x1_t) -> uint32x2_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_u32_p64)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -56576,8 +51022,8 @@ pub fn vreinterpret_u32_p64(a: poly64x1_t) -> uint32x2_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_p64)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -56598,8 +51044,8 @@ pub fn vreinterpret_p8_p64(a: poly64x1_t) -> poly8x8_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p8_p64)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -56623,8 +51069,8 @@ pub fn vreinterpret_p8_p64(a: poly64x1_t) -> poly8x8_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_p64)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -56645,8 +51091,8 @@ pub fn vreinterpret_p16_p64(a: poly64x1_t) -> poly16x4_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpret_p16_p64)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -56667,56 +51113,11 @@ pub fn vreinterpret_p16_p64(a: poly64x1_t) -> poly16x4_t {
     }
 }
 #[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_p64)"]
-#[inline]
-#[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_p128_p64(a: poly64x2_t) -> p128 {
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_p64)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(nop)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vreinterpretq_p128_p64(a: poly64x2_t) -> p128 {
-    let a: poly64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    unsafe { transmute(a) }
-}
-#[doc = "Vector reinterpret cast operation"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p64)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -56737,8 +51138,8 @@ pub fn vreinterpretq_s8_p64(a: poly64x2_t) -> int8x16_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p64)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -56753,8 +51154,8 @@ pub fn vreinterpretq_s8_p64(a: poly64x2_t) -> int8x16_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub fn vreinterpretq_s8_p64(a: poly64x2_t) -> int8x16_t {
-    let a: poly64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
     unsafe {
+        let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]);
         let ret_val: int8x16_t = transmute(a);
         simd_shuffle!(
             ret_val,
@@ -56767,8 +51168,8 @@ pub fn vreinterpretq_s8_p64(a: poly64x2_t) -> int8x16_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p64)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -56789,8 +51190,8 @@ pub fn vreinterpretq_s16_p64(a: poly64x2_t) -> int16x8_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p64)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -56805,8 +51206,8 @@ pub fn vreinterpretq_s16_p64(a: poly64x2_t) -> int16x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub fn vreinterpretq_s16_p64(a: poly64x2_t) -> int16x8_t {
-    let a: poly64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
     unsafe {
+        let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]);
         let ret_val: int16x8_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
@@ -56815,8 +51216,8 @@ pub fn vreinterpretq_s16_p64(a: poly64x2_t) -> int16x8_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p64)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -56837,8 +51238,8 @@ pub fn vreinterpretq_s32_p64(a: poly64x2_t) -> int32x4_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p64)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -56853,8 +51254,8 @@ pub fn vreinterpretq_s32_p64(a: poly64x2_t) -> int32x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub fn vreinterpretq_s32_p64(a: poly64x2_t) -> int32x4_t {
-    let a: poly64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
     unsafe {
+        let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]);
         let ret_val: int32x4_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
@@ -56863,8 +51264,8 @@ pub fn vreinterpretq_s32_p64(a: poly64x2_t) -> int32x4_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p64)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -56885,8 +51286,8 @@ pub fn vreinterpretq_u8_p64(a: poly64x2_t) -> uint8x16_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p64)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -56901,8 +51302,8 @@ pub fn vreinterpretq_u8_p64(a: poly64x2_t) -> uint8x16_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub fn vreinterpretq_u8_p64(a: poly64x2_t) -> uint8x16_t {
-    let a: poly64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
     unsafe {
+        let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]);
         let ret_val: uint8x16_t = transmute(a);
         simd_shuffle!(
             ret_val,
@@ -56915,8 +51316,8 @@ pub fn vreinterpretq_u8_p64(a: poly64x2_t) -> uint8x16_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p64)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -56937,8 +51338,8 @@ pub fn vreinterpretq_u16_p64(a: poly64x2_t) -> uint16x8_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p64)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -56953,8 +51354,8 @@ pub fn vreinterpretq_u16_p64(a: poly64x2_t) -> uint16x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub fn vreinterpretq_u16_p64(a: poly64x2_t) -> uint16x8_t {
-    let a: poly64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
     unsafe {
+        let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]);
         let ret_val: uint16x8_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
@@ -56963,8 +51364,8 @@ pub fn vreinterpretq_u16_p64(a: poly64x2_t) -> uint16x8_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p64)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -56985,8 +51386,8 @@ pub fn vreinterpretq_u32_p64(a: poly64x2_t) -> uint32x4_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p64)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -57001,8 +51402,8 @@ pub fn vreinterpretq_u32_p64(a: poly64x2_t) -> uint32x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub fn vreinterpretq_u32_p64(a: poly64x2_t) -> uint32x4_t {
-    let a: poly64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
     unsafe {
+        let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]);
         let ret_val: uint32x4_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
     }
@@ -57011,8 +51412,8 @@ pub fn vreinterpretq_u32_p64(a: poly64x2_t) -> uint32x4_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_p64)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -57033,8 +51434,8 @@ pub fn vreinterpretq_p8_p64(a: poly64x2_t) -> poly8x16_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_p64)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -57049,8 +51450,8 @@ pub fn vreinterpretq_p8_p64(a: poly64x2_t) -> poly8x16_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub fn vreinterpretq_p8_p64(a: poly64x2_t) -> poly8x16_t {
-    let a: poly64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
     unsafe {
+        let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]);
         let ret_val: poly8x16_t = transmute(a);
         simd_shuffle!(
             ret_val,
@@ -57063,8 +51464,8 @@ pub fn vreinterpretq_p8_p64(a: poly64x2_t) -> poly8x16_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_p64)"]
 #[inline]
 #[cfg(target_endian = "little")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -57085,8 +51486,8 @@ pub fn vreinterpretq_p16_p64(a: poly64x2_t) -> poly16x8_t {
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_p64)"]
 #[inline]
 #[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,aes")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
 #[cfg_attr(
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
@@ -57101,12 +51502,264 @@ pub fn vreinterpretq_p16_p64(a: poly64x2_t) -> poly16x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub fn vreinterpretq_p16_p64(a: poly64x2_t) -> poly16x8_t {
-    let a: poly64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
     unsafe {
+        let a: poly64x2_t = simd_shuffle!(a, a, [1, 0]);
         let ret_val: poly16x8_t = transmute(a);
         simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
     }
 }
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s8_p128)"]
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vreinterpretq_s8_p128(a: p128) -> int8x16_t {
+    unsafe { transmute(a) }
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s16_p128)"]
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vreinterpretq_s16_p128(a: p128) -> int16x8_t {
+    unsafe { transmute(a) }
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s32_p128)"]
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vreinterpretq_s32_p128(a: p128) -> int32x4_t {
+    unsafe { transmute(a) }
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_s64_p128)"]
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vreinterpretq_s64_p128(a: p128) -> int64x2_t {
+    unsafe { transmute(a) }
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u8_p128)"]
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vreinterpretq_u8_p128(a: p128) -> uint8x16_t {
+    unsafe { transmute(a) }
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u16_p128)"]
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vreinterpretq_u16_p128(a: p128) -> uint16x8_t {
+    unsafe { transmute(a) }
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u32_p128)"]
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vreinterpretq_u32_p128(a: p128) -> uint32x4_t {
+    unsafe { transmute(a) }
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_u64_p128)"]
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vreinterpretq_u64_p128(a: p128) -> uint64x2_t {
+    unsafe { transmute(a) }
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p8_p128)"]
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vreinterpretq_p8_p128(a: p128) -> poly8x16_t {
+    unsafe { transmute(a) }
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p16_p128)"]
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vreinterpretq_p16_p128(a: p128) -> poly16x8_t {
+    unsafe { transmute(a) }
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p64_p128)"]
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vreinterpretq_p64_p128(a: p128) -> poly64x2_t {
+    unsafe { transmute(a) }
+}
+#[doc = "Vector reinterpret cast operation"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vreinterpretq_p128_p64)"]
+#[inline]
+#[target_feature(enable = "neon,aes")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(nop)
+)]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "neon_intrinsics", since = "1.59.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
+pub fn vreinterpretq_p128_p64(a: poly64x2_t) -> p128 {
+    unsafe { transmute(a) }
+}
 #[doc = "Reversing vector elements (swap endianness)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrev16_p8)"]
 #[inline]
@@ -57873,7 +52526,14 @@ pub fn vrev64q_u8(a: uint8x16_t) -> uint8x16_t {
     assert_instr(rev64)
 )]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vrev64_f16(a: float16x4_t) -> float16x4_t {
     unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }
@@ -57888,7 +52548,14 @@ pub fn vrev64_f16(a: float16x4_t) -> float16x4_t {
     assert_instr(rev64)
 )]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vrev64q_f16(a: float16x8_t) -> float16x8_t {
     unsafe { simd_shuffle!(a, a, [3, 2, 1, 0, 7, 6, 5, 4]) }
@@ -58251,7 +52918,14 @@ pub fn vrhaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
     assert_instr(frintn)
 )]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vrndn_f16(a: float16x4_t) -> float16x4_t {
     unsafe extern "unadjusted" {
@@ -58273,7 +52947,14 @@ pub fn vrndn_f16(a: float16x4_t) -> float16x4_t {
     assert_instr(frintn)
 )]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vrndnq_f16(a: float16x8_t) -> float16x8_t {
     unsafe extern "unadjusted" {
@@ -59357,7 +54038,14 @@ pub fn vrshrn_n_u64<const N: i32>(a: uint64x2_t) -> uint32x2_t {
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
     assert_instr(frsqrte)
 )]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vrsqrte_f16(a: float16x4_t) -> float16x4_t {
     unsafe extern "unadjusted" {
@@ -59380,7 +54068,14 @@ pub fn vrsqrte_f16(a: float16x4_t) -> float16x4_t {
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
     assert_instr(frsqrte)
 )]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vrsqrteq_f16(a: float16x8_t) -> float16x8_t {
     unsafe extern "unadjusted" {
@@ -59519,7 +54214,14 @@ pub fn vrsqrteq_u32(a: uint32x4_t) -> uint32x4_t {
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
     assert_instr(frsqrts)
 )]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vrsqrts_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
     unsafe extern "unadjusted" {
@@ -59542,7 +54244,14 @@ pub fn vrsqrts_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
     all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
     assert_instr(frsqrts)
 )]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vrsqrtsq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
     unsafe extern "unadjusted" {
@@ -60071,7 +54780,6 @@ pub fn vrsubhn_s64(a: int64x2_t, b: int64x2_t) -> int32x2_t {
 #[doc = "Rounding subtract returning high narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_u16)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))]
@@ -60091,36 +54799,8 @@ pub fn vrsubhn_u16(a: uint16x8_t, b: uint16x8_t) -> uint8x8_t {
     unsafe { transmute(vrsubhn_s16(transmute(a), transmute(b))) }
 }
 #[doc = "Rounding subtract returning high narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_u16)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(rsubhn)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vrsubhn_u16(a: uint16x8_t, b: uint16x8_t) -> uint8x8_t {
-    let a: uint16x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    let b: uint16x8_t = unsafe { simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint8x8_t = transmute(vrsubhn_s16(transmute(a), transmute(b)));
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Rounding subtract returning high narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_u32)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))]
@@ -60140,36 +54820,8 @@ pub fn vrsubhn_u32(a: uint32x4_t, b: uint32x4_t) -> uint16x4_t {
     unsafe { transmute(vrsubhn_s32(transmute(a), transmute(b))) }
 }
 #[doc = "Rounding subtract returning high narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_u32)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(rsubhn)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vrsubhn_u32(a: uint32x4_t, b: uint32x4_t) -> uint16x4_t {
-    let a: uint32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
-    let b: uint32x4_t = unsafe { simd_shuffle!(b, b, [3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint16x4_t = transmute(vrsubhn_s32(transmute(a), transmute(b)));
-        simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
-    }
-}
-#[doc = "Rounding subtract returning high narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_u64)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))]
@@ -60188,33 +54840,6 @@ pub fn vrsubhn_u32(a: uint32x4_t, b: uint32x4_t) -> uint16x4_t {
 pub fn vrsubhn_u64(a: uint64x2_t, b: uint64x2_t) -> uint32x2_t {
     unsafe { transmute(vrsubhn_s64(transmute(a), transmute(b))) }
 }
-#[doc = "Rounding subtract returning high narrow"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrsubhn_u64)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))]
-#[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
-    assert_instr(rsubhn)
-)]
-#[cfg_attr(
-    not(target_arch = "arm"),
-    stable(feature = "neon_intrinsics", since = "1.59.0")
-)]
-#[cfg_attr(
-    target_arch = "arm",
-    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
-)]
-pub fn vrsubhn_u64(a: uint64x2_t, b: uint64x2_t) -> uint32x2_t {
-    let a: uint64x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
-    let b: uint64x2_t = unsafe { simd_shuffle!(b, b, [1, 0]) };
-    unsafe {
-        let ret_val: uint32x2_t = transmute(vrsubhn_s64(transmute(a), transmute(b)));
-        simd_shuffle!(ret_val, ret_val, [1, 0])
-    }
-}
 #[doc = "Insert vector element from another vector element"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vset_lane_f16)"]
 #[inline]
@@ -61060,117 +55685,229 @@ pub fn vsha256su1q_u32(tw0_3: uint32x4_t, w8_11: uint32x4_t, w12_15: uint32x4_t)
     }
     unsafe { _vsha256su1q_u32(tw0_3, w8_11, w12_15) }
 }
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[rustc_legacy_const_generics(2)]
+fn vshiftlins_v16i8<const N: i32>(a: int8x16_t, b: int8x16_t) -> int8x16_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v16i8")]
+        fn _vshiftlins_v16i8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t;
+    }
+    unsafe { _vshiftlins_v16i8(a, b, const { int8x16_t([N as i8; 16]) }) }
+}
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[rustc_legacy_const_generics(2)]
+fn vshiftlins_v1i64<const N: i32>(a: int64x1_t, b: int64x1_t) -> int64x1_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v1i64")]
+        fn _vshiftlins_v1i64(a: int64x1_t, b: int64x1_t, c: int64x1_t) -> int64x1_t;
+    }
+    unsafe { _vshiftlins_v1i64(a, b, const { int64x1_t([N as i64; 1]) }) }
+}
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[rustc_legacy_const_generics(2)]
+fn vshiftlins_v2i32<const N: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v2i32")]
+        fn _vshiftlins_v2i32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t;
+    }
+    unsafe { _vshiftlins_v2i32(a, b, const { int32x2_t([N; 2]) }) }
+}
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[rustc_legacy_const_generics(2)]
+fn vshiftlins_v2i64<const N: i32>(a: int64x2_t, b: int64x2_t) -> int64x2_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v2i64")]
+        fn _vshiftlins_v2i64(a: int64x2_t, b: int64x2_t, c: int64x2_t) -> int64x2_t;
+    }
+    unsafe { _vshiftlins_v2i64(a, b, const { int64x2_t([N as i64; 2]) }) }
+}
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[rustc_legacy_const_generics(2)]
+fn vshiftlins_v4i16<const N: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v4i16")]
+        fn _vshiftlins_v4i16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t;
+    }
+    unsafe { _vshiftlins_v4i16(a, b, const { int16x4_t([N as i16; 4]) }) }
+}
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[rustc_legacy_const_generics(2)]
+fn vshiftlins_v4i32<const N: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v4i32")]
+        fn _vshiftlins_v4i32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t;
+    }
+    unsafe { _vshiftlins_v4i32(a, b, const { int32x4_t([N; 4]) }) }
+}
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[rustc_legacy_const_generics(2)]
+fn vshiftlins_v8i16<const N: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v8i16")]
+        fn _vshiftlins_v8i16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t;
+    }
+    unsafe { _vshiftlins_v8i16(a, b, const { int16x8_t([N as i16; 8]) }) }
+}
+#[inline]
+#[target_feature(enable = "neon")]
+#[cfg(target_arch = "arm")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
+#[rustc_legacy_const_generics(2)]
+fn vshiftlins_v8i8<const N: i32>(a: int8x8_t, b: int8x8_t) -> int8x8_t {
+    unsafe extern "unadjusted" {
+        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v8i8")]
+        fn _vshiftlins_v8i8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t;
+    }
+    unsafe { _vshiftlins_v8i8(a, b, const { int8x8_t([N as i8; 8]) }) }
+}
 #[doc = "Shift Right and Insert (immediate)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftins_v16i8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftrins_v16i8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-fn vshiftins_v16i8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t {
+#[rustc_legacy_const_generics(2)]
+fn vshiftrins_v16i8<const N: i32>(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v16i8")]
-        fn _vshiftins_v16i8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t;
+        fn _vshiftrins_v16i8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t;
     }
-    unsafe { _vshiftins_v16i8(a, b, c) }
+    unsafe { _vshiftrins_v16i8(a, b, const { int8x16_t([-N as i8; 16]) }) }
 }
 #[doc = "Shift Right and Insert (immediate)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftins_v1i64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftrins_v1i64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-fn vshiftins_v1i64(a: int64x1_t, b: int64x1_t, c: int64x1_t) -> int64x1_t {
+#[rustc_legacy_const_generics(2)]
+fn vshiftrins_v1i64<const N: i32>(a: int64x1_t, b: int64x1_t) -> int64x1_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v1i64")]
-        fn _vshiftins_v1i64(a: int64x1_t, b: int64x1_t, c: int64x1_t) -> int64x1_t;
+        fn _vshiftrins_v1i64(a: int64x1_t, b: int64x1_t, c: int64x1_t) -> int64x1_t;
     }
-    unsafe { _vshiftins_v1i64(a, b, c) }
+    unsafe { _vshiftrins_v1i64(a, b, const { int64x1_t([-N as i64; 1]) }) }
 }
 #[doc = "Shift Right and Insert (immediate)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftins_v2i32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftrins_v2i32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-fn vshiftins_v2i32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t {
+#[rustc_legacy_const_generics(2)]
+fn vshiftrins_v2i32<const N: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v2i32")]
-        fn _vshiftins_v2i32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t;
+        fn _vshiftrins_v2i32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t;
     }
-    unsafe { _vshiftins_v2i32(a, b, c) }
+    unsafe { _vshiftrins_v2i32(a, b, const { int32x2_t([-N; 2]) }) }
 }
 #[doc = "Shift Right and Insert (immediate)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftins_v2i64)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftrins_v2i64)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-fn vshiftins_v2i64(a: int64x2_t, b: int64x2_t, c: int64x2_t) -> int64x2_t {
+#[rustc_legacy_const_generics(2)]
+fn vshiftrins_v2i64<const N: i32>(a: int64x2_t, b: int64x2_t) -> int64x2_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v2i64")]
-        fn _vshiftins_v2i64(a: int64x2_t, b: int64x2_t, c: int64x2_t) -> int64x2_t;
+        fn _vshiftrins_v2i64(a: int64x2_t, b: int64x2_t, c: int64x2_t) -> int64x2_t;
     }
-    unsafe { _vshiftins_v2i64(a, b, c) }
+    unsafe { _vshiftrins_v2i64(a, b, const { int64x2_t([-N as i64; 2]) }) }
 }
 #[doc = "Shift Right and Insert (immediate)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftins_v4i16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftrins_v4i16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-fn vshiftins_v4i16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t {
+#[rustc_legacy_const_generics(2)]
+fn vshiftrins_v4i16<const N: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v4i16")]
-        fn _vshiftins_v4i16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t;
+        fn _vshiftrins_v4i16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t;
     }
-    unsafe { _vshiftins_v4i16(a, b, c) }
+    unsafe { _vshiftrins_v4i16(a, b, const { int16x4_t([-N as i16; 4]) }) }
 }
 #[doc = "Shift Right and Insert (immediate)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftins_v4i32)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftrins_v4i32)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-fn vshiftins_v4i32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
+#[rustc_legacy_const_generics(2)]
+fn vshiftrins_v4i32<const N: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v4i32")]
-        fn _vshiftins_v4i32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t;
+        fn _vshiftrins_v4i32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t;
     }
-    unsafe { _vshiftins_v4i32(a, b, c) }
+    unsafe { _vshiftrins_v4i32(a, b, const { int32x4_t([-N; 4]) }) }
 }
 #[doc = "Shift Right and Insert (immediate)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftins_v8i16)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftrins_v8i16)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-fn vshiftins_v8i16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
+#[rustc_legacy_const_generics(2)]
+fn vshiftrins_v8i16<const N: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v8i16")]
-        fn _vshiftins_v8i16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t;
+        fn _vshiftrins_v8i16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t;
     }
-    unsafe { _vshiftins_v8i16(a, b, c) }
+    unsafe { _vshiftrins_v8i16(a, b, const { int16x8_t([-N as i16; 8]) }) }
 }
 #[doc = "Shift Right and Insert (immediate)"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftins_v8i8)"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftrins_v8i8)"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-fn vshiftins_v8i8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t {
+#[rustc_legacy_const_generics(2)]
+fn vshiftrins_v8i8<const N: i32>(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v8i8")]
-        fn _vshiftins_v8i8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t;
+        fn _vshiftrins_v8i8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t;
     }
-    unsafe { _vshiftins_v8i8(a, b, c) }
+    unsafe { _vshiftrins_v8i8(a, b, const { int8x8_t([-N as i8; 8]) }) }
 }
 #[doc = "Shift left"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_n_s8)"]
@@ -62706,7 +57443,7 @@ pub fn vshrn_n_u64<const N: i32>(a: uint64x2_t) -> uint32x2_t {
 #[rustc_legacy_const_generics(2)]
 pub fn vsli_n_s8<const N: i32>(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     static_assert_uimm_bits!(N, 3);
-    vshiftins_v8i8(a, b, int8x8_t::splat(N as i8))
+    vshiftlins_v8i8::<N>(a, b)
 }
 #[doc = "Shift Left and Insert (immediate)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_s8)"]
@@ -62718,7 +57455,7 @@ pub fn vsli_n_s8<const N: i32>(a: int8x8_t, b: int8x8_t) -> int8x8_t {
 #[rustc_legacy_const_generics(2)]
 pub fn vsliq_n_s8<const N: i32>(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     static_assert_uimm_bits!(N, 3);
-    vshiftins_v16i8(a, b, int8x16_t::splat(N as i8))
+    vshiftlins_v16i8::<N>(a, b)
 }
 #[doc = "Shift Left and Insert (immediate)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_s16)"]
@@ -62730,7 +57467,7 @@ pub fn vsliq_n_s8<const N: i32>(a: int8x16_t, b: int8x16_t) -> int8x16_t {
 #[rustc_legacy_const_generics(2)]
 pub fn vsli_n_s16<const N: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     static_assert_uimm_bits!(N, 4);
-    vshiftins_v4i16(a, b, int16x4_t::splat(N as i16))
+    vshiftlins_v4i16::<N>(a, b)
 }
 #[doc = "Shift Left and Insert (immediate)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_s16)"]
@@ -62742,7 +57479,7 @@ pub fn vsli_n_s16<const N: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t {
 #[rustc_legacy_const_generics(2)]
 pub fn vsliq_n_s16<const N: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     static_assert_uimm_bits!(N, 4);
-    vshiftins_v8i16(a, b, int16x8_t::splat(N as i16))
+    vshiftlins_v8i16::<N>(a, b)
 }
 #[doc = "Shift Left and Insert (immediate)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_s32)"]
@@ -62754,7 +57491,7 @@ pub fn vsliq_n_s16<const N: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_t {
 #[rustc_legacy_const_generics(2)]
 pub fn vsli_n_s32<const N: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     static_assert!(N >= 0 && N <= 31);
-    vshiftins_v2i32(a, b, int32x2_t::splat(N))
+    vshiftlins_v2i32::<N>(a, b)
 }
 #[doc = "Shift Left and Insert (immediate)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_s32)"]
@@ -62766,7 +57503,7 @@ pub fn vsli_n_s32<const N: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t {
 #[rustc_legacy_const_generics(2)]
 pub fn vsliq_n_s32<const N: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     static_assert!(N >= 0 && N <= 31);
-    vshiftins_v4i32(a, b, int32x4_t::splat(N))
+    vshiftlins_v4i32::<N>(a, b)
 }
 #[doc = "Shift Left and Insert (immediate)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_s64)"]
@@ -62778,7 +57515,7 @@ pub fn vsliq_n_s32<const N: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_t {
 #[rustc_legacy_const_generics(2)]
 pub fn vsli_n_s64<const N: i32>(a: int64x1_t, b: int64x1_t) -> int64x1_t {
     static_assert!(N >= 0 && N <= 63);
-    vshiftins_v1i64(a, b, int64x1_t::splat(N as i64))
+    vshiftlins_v1i64::<N>(a, b)
 }
 #[doc = "Shift Left and Insert (immediate)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_s64)"]
@@ -62790,7 +57527,7 @@ pub fn vsli_n_s64<const N: i32>(a: int64x1_t, b: int64x1_t) -> int64x1_t {
 #[rustc_legacy_const_generics(2)]
 pub fn vsliq_n_s64<const N: i32>(a: int64x2_t, b: int64x2_t) -> int64x2_t {
     static_assert!(N >= 0 && N <= 63);
-    vshiftins_v2i64(a, b, int64x2_t::splat(N as i64))
+    vshiftlins_v2i64::<N>(a, b)
 }
 #[doc = "Shift Left and Insert (immediate)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_u8)"]
@@ -62802,13 +57539,7 @@ pub fn vsliq_n_s64<const N: i32>(a: int64x2_t, b: int64x2_t) -> int64x2_t {
 #[rustc_legacy_const_generics(2)]
 pub fn vsli_n_u8<const N: i32>(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     static_assert_uimm_bits!(N, 3);
-    unsafe {
-        transmute(vshiftins_v8i8(
-            transmute(a),
-            transmute(b),
-            int8x8_t::splat(N as i8),
-        ))
-    }
+    unsafe { transmute(vshiftlins_v8i8::<N>(transmute(a), transmute(b))) }
 }
 #[doc = "Shift Left and Insert (immediate)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_u8)"]
@@ -62820,13 +57551,7 @@ pub fn vsli_n_u8<const N: i32>(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
 #[rustc_legacy_const_generics(2)]
 pub fn vsliq_n_u8<const N: i32>(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
     static_assert_uimm_bits!(N, 3);
-    unsafe {
-        transmute(vshiftins_v16i8(
-            transmute(a),
-            transmute(b),
-            int8x16_t::splat(N as i8),
-        ))
-    }
+    unsafe { transmute(vshiftlins_v16i8::<N>(transmute(a), transmute(b))) }
 }
 #[doc = "Shift Left and Insert (immediate)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_u16)"]
@@ -62838,13 +57563,7 @@ pub fn vsliq_n_u8<const N: i32>(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
 #[rustc_legacy_const_generics(2)]
 pub fn vsli_n_u16<const N: i32>(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
     static_assert_uimm_bits!(N, 4);
-    unsafe {
-        transmute(vshiftins_v4i16(
-            transmute(a),
-            transmute(b),
-            int16x4_t::splat(N as i16),
-        ))
-    }
+    unsafe { transmute(vshiftlins_v4i16::<N>(transmute(a), transmute(b))) }
 }
 #[doc = "Shift Left and Insert (immediate)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_u16)"]
@@ -62856,13 +57575,7 @@ pub fn vsli_n_u16<const N: i32>(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
 #[rustc_legacy_const_generics(2)]
 pub fn vsliq_n_u16<const N: i32>(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
     static_assert_uimm_bits!(N, 4);
-    unsafe {
-        transmute(vshiftins_v8i16(
-            transmute(a),
-            transmute(b),
-            int16x8_t::splat(N as i16),
-        ))
-    }
+    unsafe { transmute(vshiftlins_v8i16::<N>(transmute(a), transmute(b))) }
 }
 #[doc = "Shift Left and Insert (immediate)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_u32)"]
@@ -62874,13 +57587,7 @@ pub fn vsliq_n_u16<const N: i32>(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
 #[rustc_legacy_const_generics(2)]
 pub fn vsli_n_u32<const N: i32>(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
     static_assert!(N >= 0 && N <= 31);
-    unsafe {
-        transmute(vshiftins_v2i32(
-            transmute(a),
-            transmute(b),
-            int32x2_t::splat(N),
-        ))
-    }
+    unsafe { transmute(vshiftlins_v2i32::<N>(transmute(a), transmute(b))) }
 }
 #[doc = "Shift Left and Insert (immediate)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_u32)"]
@@ -62892,13 +57599,7 @@ pub fn vsli_n_u32<const N: i32>(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
 #[rustc_legacy_const_generics(2)]
 pub fn vsliq_n_u32<const N: i32>(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
     static_assert!(N >= 0 && N <= 31);
-    unsafe {
-        transmute(vshiftins_v4i32(
-            transmute(a),
-            transmute(b),
-            int32x4_t::splat(N),
-        ))
-    }
+    unsafe { transmute(vshiftlins_v4i32::<N>(transmute(a), transmute(b))) }
 }
 #[doc = "Shift Left and Insert (immediate)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_u64)"]
@@ -62910,13 +57611,7 @@ pub fn vsliq_n_u32<const N: i32>(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
 #[rustc_legacy_const_generics(2)]
 pub fn vsli_n_u64<const N: i32>(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
     static_assert!(N >= 0 && N <= 63);
-    unsafe {
-        transmute(vshiftins_v1i64(
-            transmute(a),
-            transmute(b),
-            int64x1_t::splat(N as i64),
-        ))
-    }
+    unsafe { transmute(vshiftlins_v1i64::<N>(transmute(a), transmute(b))) }
 }
 #[doc = "Shift Left and Insert (immediate)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_u64)"]
@@ -62928,13 +57623,7 @@ pub fn vsli_n_u64<const N: i32>(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
 #[rustc_legacy_const_generics(2)]
 pub fn vsliq_n_u64<const N: i32>(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
     static_assert!(N >= 0 && N <= 63);
-    unsafe {
-        transmute(vshiftins_v2i64(
-            transmute(a),
-            transmute(b),
-            int64x2_t::splat(N as i64),
-        ))
-    }
+    unsafe { transmute(vshiftlins_v2i64::<N>(transmute(a), transmute(b))) }
 }
 #[doc = "Shift Left and Insert (immediate)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_p8)"]
@@ -62946,13 +57635,7 @@ pub fn vsliq_n_u64<const N: i32>(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
 #[rustc_legacy_const_generics(2)]
 pub fn vsli_n_p8<const N: i32>(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
     static_assert_uimm_bits!(N, 3);
-    unsafe {
-        transmute(vshiftins_v8i8(
-            transmute(a),
-            transmute(b),
-            int8x8_t::splat(N as i8),
-        ))
-    }
+    unsafe { transmute(vshiftlins_v8i8::<N>(transmute(a), transmute(b))) }
 }
 #[doc = "Shift Left and Insert (immediate)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_p8)"]
@@ -62964,13 +57647,7 @@ pub fn vsli_n_p8<const N: i32>(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
 #[rustc_legacy_const_generics(2)]
 pub fn vsliq_n_p8<const N: i32>(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
     static_assert_uimm_bits!(N, 3);
-    unsafe {
-        transmute(vshiftins_v16i8(
-            transmute(a),
-            transmute(b),
-            int8x16_t::splat(N as i8),
-        ))
-    }
+    unsafe { transmute(vshiftlins_v16i8::<N>(transmute(a), transmute(b))) }
 }
 #[doc = "Shift Left and Insert (immediate)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_p16)"]
@@ -62982,13 +57659,7 @@ pub fn vsliq_n_p8<const N: i32>(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
 #[rustc_legacy_const_generics(2)]
 pub fn vsli_n_p16<const N: i32>(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
     static_assert_uimm_bits!(N, 4);
-    unsafe {
-        transmute(vshiftins_v4i16(
-            transmute(a),
-            transmute(b),
-            int16x4_t::splat(N as i16),
-        ))
-    }
+    unsafe { transmute(vshiftlins_v4i16::<N>(transmute(a), transmute(b))) }
 }
 #[doc = "Shift Left and Insert (immediate)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_p16)"]
@@ -63000,13 +57671,7 @@ pub fn vsli_n_p16<const N: i32>(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
 #[rustc_legacy_const_generics(2)]
 pub fn vsliq_n_p16<const N: i32>(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
     static_assert_uimm_bits!(N, 4);
-    unsafe {
-        transmute(vshiftins_v8i16(
-            transmute(a),
-            transmute(b),
-            int16x8_t::splat(N as i16),
-        ))
-    }
+    unsafe { transmute(vshiftlins_v8i16::<N>(transmute(a), transmute(b))) }
 }
 #[doc = "Signed shift right and accumulate"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsra_n_s8)"]
@@ -63386,7 +58051,7 @@ pub fn vsraq_n_u64<const N: i32>(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
 #[rustc_legacy_const_generics(2)]
 pub fn vsri_n_s8<const N: i32>(a: int8x8_t, b: int8x8_t) -> int8x8_t {
     static_assert!(1 <= N && N <= 8);
-    vshiftins_v8i8(a, b, int8x8_t::splat(-N as i8))
+    vshiftrins_v8i8::<N>(a, b)
 }
 #[doc = "Shift Right and Insert (immediate)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_s8)"]
@@ -63398,7 +58063,7 @@ pub fn vsri_n_s8<const N: i32>(a: int8x8_t, b: int8x8_t) -> int8x8_t {
 #[rustc_legacy_const_generics(2)]
 pub fn vsriq_n_s8<const N: i32>(a: int8x16_t, b: int8x16_t) -> int8x16_t {
     static_assert!(1 <= N && N <= 8);
-    vshiftins_v16i8(a, b, int8x16_t::splat(-N as i8))
+    vshiftrins_v16i8::<N>(a, b)
 }
 #[doc = "Shift Right and Insert (immediate)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_s16)"]
@@ -63410,7 +58075,7 @@ pub fn vsriq_n_s8<const N: i32>(a: int8x16_t, b: int8x16_t) -> int8x16_t {
 #[rustc_legacy_const_generics(2)]
 pub fn vsri_n_s16<const N: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t {
     static_assert!(1 <= N && N <= 16);
-    vshiftins_v4i16(a, b, int16x4_t::splat(-N as i16))
+    vshiftrins_v4i16::<N>(a, b)
 }
 #[doc = "Shift Right and Insert (immediate)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_s16)"]
@@ -63422,7 +58087,7 @@ pub fn vsri_n_s16<const N: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t {
 #[rustc_legacy_const_generics(2)]
 pub fn vsriq_n_s16<const N: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_t {
     static_assert!(1 <= N && N <= 16);
-    vshiftins_v8i16(a, b, int16x8_t::splat(-N as i16))
+    vshiftrins_v8i16::<N>(a, b)
 }
 #[doc = "Shift Right and Insert (immediate)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_s32)"]
@@ -63434,7 +58099,7 @@ pub fn vsriq_n_s16<const N: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_t {
 #[rustc_legacy_const_generics(2)]
 pub fn vsri_n_s32<const N: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t {
     static_assert!(1 <= N && N <= 32);
-    vshiftins_v2i32(a, b, int32x2_t::splat(-N))
+    vshiftrins_v2i32::<N>(a, b)
 }
 #[doc = "Shift Right and Insert (immediate)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_s32)"]
@@ -63446,7 +58111,7 @@ pub fn vsri_n_s32<const N: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t {
 #[rustc_legacy_const_generics(2)]
 pub fn vsriq_n_s32<const N: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_t {
     static_assert!(1 <= N && N <= 32);
-    vshiftins_v4i32(a, b, int32x4_t::splat(-N))
+    vshiftrins_v4i32::<N>(a, b)
 }
 #[doc = "Shift Right and Insert (immediate)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_s64)"]
@@ -63458,7 +58123,7 @@ pub fn vsriq_n_s32<const N: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_t {
 #[rustc_legacy_const_generics(2)]
 pub fn vsri_n_s64<const N: i32>(a: int64x1_t, b: int64x1_t) -> int64x1_t {
     static_assert!(1 <= N && N <= 64);
-    vshiftins_v1i64(a, b, int64x1_t::splat(-N as i64))
+    vshiftrins_v1i64::<N>(a, b)
 }
 #[doc = "Shift Right and Insert (immediate)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_s64)"]
@@ -63470,7 +58135,7 @@ pub fn vsri_n_s64<const N: i32>(a: int64x1_t, b: int64x1_t) -> int64x1_t {
 #[rustc_legacy_const_generics(2)]
 pub fn vsriq_n_s64<const N: i32>(a: int64x2_t, b: int64x2_t) -> int64x2_t {
     static_assert!(1 <= N && N <= 64);
-    vshiftins_v2i64(a, b, int64x2_t::splat(-N as i64))
+    vshiftrins_v2i64::<N>(a, b)
 }
 #[doc = "Shift Right and Insert (immediate)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_u8)"]
@@ -63482,13 +58147,7 @@ pub fn vsriq_n_s64<const N: i32>(a: int64x2_t, b: int64x2_t) -> int64x2_t {
 #[rustc_legacy_const_generics(2)]
 pub fn vsri_n_u8<const N: i32>(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     static_assert!(1 <= N && N <= 8);
-    unsafe {
-        transmute(vshiftins_v8i8(
-            transmute(a),
-            transmute(b),
-            int8x8_t::splat(-N as i8),
-        ))
-    }
+    unsafe { transmute(vshiftrins_v8i8::<N>(transmute(a), transmute(b))) }
 }
 #[doc = "Shift Right and Insert (immediate)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_u8)"]
@@ -63500,13 +58159,7 @@ pub fn vsri_n_u8<const N: i32>(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
 #[rustc_legacy_const_generics(2)]
 pub fn vsriq_n_u8<const N: i32>(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
     static_assert!(1 <= N && N <= 8);
-    unsafe {
-        transmute(vshiftins_v16i8(
-            transmute(a),
-            transmute(b),
-            int8x16_t::splat(-N as i8),
-        ))
-    }
+    unsafe { transmute(vshiftrins_v16i8::<N>(transmute(a), transmute(b))) }
 }
 #[doc = "Shift Right and Insert (immediate)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_u16)"]
@@ -63518,13 +58171,7 @@ pub fn vsriq_n_u8<const N: i32>(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
 #[rustc_legacy_const_generics(2)]
 pub fn vsri_n_u16<const N: i32>(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
     static_assert!(1 <= N && N <= 16);
-    unsafe {
-        transmute(vshiftins_v4i16(
-            transmute(a),
-            transmute(b),
-            int16x4_t::splat(-N as i16),
-        ))
-    }
+    unsafe { transmute(vshiftrins_v4i16::<N>(transmute(a), transmute(b))) }
 }
 #[doc = "Shift Right and Insert (immediate)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_u16)"]
@@ -63536,13 +58183,7 @@ pub fn vsri_n_u16<const N: i32>(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
 #[rustc_legacy_const_generics(2)]
 pub fn vsriq_n_u16<const N: i32>(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
     static_assert!(1 <= N && N <= 16);
-    unsafe {
-        transmute(vshiftins_v8i16(
-            transmute(a),
-            transmute(b),
-            int16x8_t::splat(-N as i16),
-        ))
-    }
+    unsafe { transmute(vshiftrins_v8i16::<N>(transmute(a), transmute(b))) }
 }
 #[doc = "Shift Right and Insert (immediate)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_u32)"]
@@ -63554,13 +58195,7 @@ pub fn vsriq_n_u16<const N: i32>(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
 #[rustc_legacy_const_generics(2)]
 pub fn vsri_n_u32<const N: i32>(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
     static_assert!(1 <= N && N <= 32);
-    unsafe {
-        transmute(vshiftins_v2i32(
-            transmute(a),
-            transmute(b),
-            int32x2_t::splat(-N),
-        ))
-    }
+    unsafe { transmute(vshiftrins_v2i32::<N>(transmute(a), transmute(b))) }
 }
 #[doc = "Shift Right and Insert (immediate)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_u32)"]
@@ -63572,13 +58207,7 @@ pub fn vsri_n_u32<const N: i32>(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
 #[rustc_legacy_const_generics(2)]
 pub fn vsriq_n_u32<const N: i32>(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
     static_assert!(1 <= N && N <= 32);
-    unsafe {
-        transmute(vshiftins_v4i32(
-            transmute(a),
-            transmute(b),
-            int32x4_t::splat(-N),
-        ))
-    }
+    unsafe { transmute(vshiftrins_v4i32::<N>(transmute(a), transmute(b))) }
 }
 #[doc = "Shift Right and Insert (immediate)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_u64)"]
@@ -63590,13 +58219,7 @@ pub fn vsriq_n_u32<const N: i32>(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
 #[rustc_legacy_const_generics(2)]
 pub fn vsri_n_u64<const N: i32>(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
     static_assert!(1 <= N && N <= 64);
-    unsafe {
-        transmute(vshiftins_v1i64(
-            transmute(a),
-            transmute(b),
-            int64x1_t::splat(-N as i64),
-        ))
-    }
+    unsafe { transmute(vshiftrins_v1i64::<N>(transmute(a), transmute(b))) }
 }
 #[doc = "Shift Right and Insert (immediate)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_u64)"]
@@ -63608,13 +58231,7 @@ pub fn vsri_n_u64<const N: i32>(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
 #[rustc_legacy_const_generics(2)]
 pub fn vsriq_n_u64<const N: i32>(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
     static_assert!(1 <= N && N <= 64);
-    unsafe {
-        transmute(vshiftins_v2i64(
-            transmute(a),
-            transmute(b),
-            int64x2_t::splat(-N as i64),
-        ))
-    }
+    unsafe { transmute(vshiftrins_v2i64::<N>(transmute(a), transmute(b))) }
 }
 #[doc = "Shift Right and Insert (immediate)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_p8)"]
@@ -63626,13 +58243,7 @@ pub fn vsriq_n_u64<const N: i32>(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
 #[rustc_legacy_const_generics(2)]
 pub fn vsri_n_p8<const N: i32>(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
     static_assert!(1 <= N && N <= 8);
-    unsafe {
-        transmute(vshiftins_v8i8(
-            transmute(a),
-            transmute(b),
-            int8x8_t::splat(-N as i8),
-        ))
-    }
+    unsafe { transmute(vshiftrins_v8i8::<N>(transmute(a), transmute(b))) }
 }
 #[doc = "Shift Right and Insert (immediate)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_p8)"]
@@ -63644,13 +58255,7 @@ pub fn vsri_n_p8<const N: i32>(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
 #[rustc_legacy_const_generics(2)]
 pub fn vsriq_n_p8<const N: i32>(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
     static_assert!(1 <= N && N <= 8);
-    unsafe {
-        transmute(vshiftins_v16i8(
-            transmute(a),
-            transmute(b),
-            int8x16_t::splat(-N as i8),
-        ))
-    }
+    unsafe { transmute(vshiftrins_v16i8::<N>(transmute(a), transmute(b))) }
 }
 #[doc = "Shift Right and Insert (immediate)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_p16)"]
@@ -63662,13 +58267,7 @@ pub fn vsriq_n_p8<const N: i32>(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
 #[rustc_legacy_const_generics(2)]
 pub fn vsri_n_p16<const N: i32>(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
     static_assert!(1 <= N && N <= 16);
-    unsafe {
-        transmute(vshiftins_v4i16(
-            transmute(a),
-            transmute(b),
-            int16x4_t::splat(-N as i16),
-        ))
-    }
+    unsafe { transmute(vshiftrins_v4i16::<N>(transmute(a), transmute(b))) }
 }
 #[doc = "Shift Right and Insert (immediate)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_p16)"]
@@ -63680,18 +58279,12 @@ pub fn vsri_n_p16<const N: i32>(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t {
 #[rustc_legacy_const_generics(2)]
 pub fn vsriq_n_p16<const N: i32>(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t {
     static_assert!(1 <= N && N <= 16);
-    unsafe {
-        transmute(vshiftins_v8i16(
-            transmute(a),
-            transmute(b),
-            int16x8_t::splat(-N as i16),
-        ))
-    }
+    unsafe { transmute(vshiftrins_v8i16::<N>(transmute(a), transmute(b))) }
 }
 #[doc = "Store multiple single-element structures from one, two, three, or four registers."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -63709,7 +58302,7 @@ pub unsafe fn vst1_f16(ptr: *mut f16, a: float16x4_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -63727,7 +58320,7 @@ pub unsafe fn vst1q_f16(ptr: *mut f16, a: float16x8_t) {
 #[doc = "Store multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f16_x2)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -63745,7 +58338,7 @@ pub unsafe fn vst1_f16_x2(a: *mut f16, b: float16x4x2_t) {
 #[doc = "Store multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f16_x2)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -63763,7 +58356,7 @@ pub unsafe fn vst1q_f16_x2(a: *mut f16, b: float16x8x2_t) {
 #[doc = "Store multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f16_x2)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(not(target_arch = "arm"))]
 #[cfg_attr(test, assert_instr(st1))]
@@ -63783,7 +58376,7 @@ pub unsafe fn vst1_f16_x2(a: *mut f16, b: float16x4x2_t) {
 #[doc = "Store multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f16_x2)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(not(target_arch = "arm"))]
 #[cfg_attr(test, assert_instr(st1))]
@@ -63803,7 +58396,7 @@ pub unsafe fn vst1q_f16_x2(a: *mut f16, b: float16x8x2_t) {
 #[doc = "Store multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f16_x3)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -63821,7 +58414,7 @@ pub unsafe fn vst1_f16_x3(a: *mut f16, b: float16x4x3_t) {
 #[doc = "Store multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f16_x3)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -63839,7 +58432,7 @@ pub unsafe fn vst1q_f16_x3(a: *mut f16, b: float16x8x3_t) {
 #[doc = "Store multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f16_x3)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(not(target_arch = "arm"))]
 #[cfg_attr(test, assert_instr(st1))]
@@ -63859,7 +58452,7 @@ pub unsafe fn vst1_f16_x3(a: *mut f16, b: float16x4x3_t) {
 #[doc = "Store multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f16_x3)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(not(target_arch = "arm"))]
 #[cfg_attr(test, assert_instr(st1))]
@@ -63879,11 +58472,12 @@ pub unsafe fn vst1q_f16_x3(a: *mut f16, b: float16x8x3_t) {
 #[doc = "Store multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f16_x4)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
+#[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[target_feature(enable = "neon,fp16")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(vst1))]
@@ -63903,11 +58497,12 @@ pub unsafe fn vst1_f16_x4(a: *mut f16, b: float16x4x4_t) {
 #[doc = "Store multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f16_x4)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
+#[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[target_feature(enable = "neon,fp16")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(vst1))]
@@ -63927,7 +58522,7 @@ pub unsafe fn vst1q_f16_x4(a: *mut f16, b: float16x8x4_t) {
 #[doc = "Store multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f16_x4)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(not(target_arch = "arm"))]
 #[cfg_attr(test, assert_instr(st1))]
@@ -63953,7 +58548,7 @@ pub unsafe fn vst1_f16_x4(a: *mut f16, b: float16x4x4_t) {
 #[doc = "Store multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f16_x4)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(not(target_arch = "arm"))]
 #[cfg_attr(test, assert_instr(st1))]
@@ -63979,7 +58574,7 @@ pub unsafe fn vst1q_f16_x4(a: *mut f16, b: float16x8x4_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
@@ -63987,16 +58582,13 @@ pub unsafe fn vst1q_f16_x4(a: *mut f16, b: float16x8x4_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32"))]
 pub unsafe fn vst1_f32(ptr: *mut f32, a: float32x2_t) {
-    vst1_v2f32(
-        ptr as *const i8,
-        transmute(a),
-        crate::mem::align_of::<f32>() as i32,
-    )
+    const ALIGN: i32 = crate::mem::align_of::<f32>() as i32;
+    vst1_v2f32::<ALIGN>(ptr as *const i8, transmute(a))
 }
 #[doc = "Store multiple single-element structures from one, two, three, or four registers."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
@@ -64004,16 +58596,13 @@ pub unsafe fn vst1_f32(ptr: *mut f32, a: float32x2_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32"))]
 pub unsafe fn vst1q_f32(ptr: *mut f32, a: float32x4_t) {
-    vst1q_v4f32(
-        ptr as *const i8,
-        transmute(a),
-        crate::mem::align_of::<f32>() as i32,
-    )
+    const ALIGN: i32 = crate::mem::align_of::<f32>() as i32;
+    vst1q_v4f32::<ALIGN>(ptr as *const i8, transmute(a))
 }
 #[doc = "Store multiple single-element structures from one, two, three, or four registers."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
@@ -64021,12 +58610,13 @@ pub unsafe fn vst1q_f32(ptr: *mut f32, a: float32x4_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.8"))]
 pub unsafe fn vst1_s8(ptr: *mut i8, a: int8x8_t) {
-    vst1_v8i8(ptr as *const i8, a, crate::mem::align_of::<i8>() as i32)
+    const ALIGN: i32 = crate::mem::align_of::<i8>() as i32;
+    vst1_v8i8::<ALIGN>(ptr as *const i8, a)
 }
 #[doc = "Store multiple single-element structures from one, two, three, or four registers."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
@@ -64034,12 +58624,13 @@ pub unsafe fn vst1_s8(ptr: *mut i8, a: int8x8_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.8"))]
 pub unsafe fn vst1q_s8(ptr: *mut i8, a: int8x16_t) {
-    vst1q_v16i8(ptr as *const i8, a, crate::mem::align_of::<i8>() as i32)
+    const ALIGN: i32 = crate::mem::align_of::<i8>() as i32;
+    vst1q_v16i8::<ALIGN>(ptr as *const i8, a)
 }
 #[doc = "Store multiple single-element structures from one, two, three, or four registers."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
@@ -64047,12 +58638,13 @@ pub unsafe fn vst1q_s8(ptr: *mut i8, a: int8x16_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))]
 pub unsafe fn vst1_s16(ptr: *mut i16, a: int16x4_t) {
-    vst1_v4i16(ptr as *const i8, a, crate::mem::align_of::<i16>() as i32)
+    const ALIGN: i32 = crate::mem::align_of::<i16>() as i32;
+    vst1_v4i16::<ALIGN>(ptr as *const i8, a)
 }
 #[doc = "Store multiple single-element structures from one, two, three, or four registers."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
@@ -64060,12 +58652,13 @@ pub unsafe fn vst1_s16(ptr: *mut i16, a: int16x4_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))]
 pub unsafe fn vst1q_s16(ptr: *mut i16, a: int16x8_t) {
-    vst1q_v8i16(ptr as *const i8, a, crate::mem::align_of::<i16>() as i32)
+    const ALIGN: i32 = crate::mem::align_of::<i16>() as i32;
+    vst1q_v8i16::<ALIGN>(ptr as *const i8, a)
 }
 #[doc = "Store multiple single-element structures from one, two, three, or four registers."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
@@ -64073,12 +58666,13 @@ pub unsafe fn vst1q_s16(ptr: *mut i16, a: int16x8_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32"))]
 pub unsafe fn vst1_s32(ptr: *mut i32, a: int32x2_t) {
-    vst1_v2i32(ptr as *const i8, a, crate::mem::align_of::<i32>() as i32)
+    const ALIGN: i32 = crate::mem::align_of::<i32>() as i32;
+    vst1_v2i32::<ALIGN>(ptr as *const i8, a)
 }
 #[doc = "Store multiple single-element structures from one, two, three, or four registers."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
@@ -64086,12 +58680,13 @@ pub unsafe fn vst1_s32(ptr: *mut i32, a: int32x2_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32"))]
 pub unsafe fn vst1q_s32(ptr: *mut i32, a: int32x4_t) {
-    vst1q_v4i32(ptr as *const i8, a, crate::mem::align_of::<i32>() as i32)
+    const ALIGN: i32 = crate::mem::align_of::<i32>() as i32;
+    vst1q_v4i32::<ALIGN>(ptr as *const i8, a)
 }
 #[doc = "Store multiple single-element structures from one, two, three, or four registers."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
@@ -64099,12 +58694,13 @@ pub unsafe fn vst1q_s32(ptr: *mut i32, a: int32x4_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.64"))]
 pub unsafe fn vst1_s64(ptr: *mut i64, a: int64x1_t) {
-    vst1_v1i64(ptr as *const i8, a, crate::mem::align_of::<i64>() as i32)
+    const ALIGN: i32 = crate::mem::align_of::<i64>() as i32;
+    vst1_v1i64::<ALIGN>(ptr as *const i8, a)
 }
 #[doc = "Store multiple single-element structures from one, two, three, or four registers."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
@@ -64112,12 +58708,13 @@ pub unsafe fn vst1_s64(ptr: *mut i64, a: int64x1_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.64"))]
 pub unsafe fn vst1q_s64(ptr: *mut i64, a: int64x2_t) {
-    vst1q_v2i64(ptr as *const i8, a, crate::mem::align_of::<i64>() as i32)
+    const ALIGN: i32 = crate::mem::align_of::<i64>() as i32;
+    vst1q_v2i64::<ALIGN>(ptr as *const i8, a)
 }
 #[doc = "Store multiple single-element structures from one, two, three, or four registers."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
@@ -64125,16 +58722,13 @@ pub unsafe fn vst1q_s64(ptr: *mut i64, a: int64x2_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.8"))]
 pub unsafe fn vst1_u8(ptr: *mut u8, a: uint8x8_t) {
-    vst1_v8i8(
-        ptr as *const i8,
-        transmute(a),
-        crate::mem::align_of::<u8>() as i32,
-    )
+    const ALIGN: i32 = crate::mem::align_of::<u8>() as i32;
+    vst1_v8i8::<ALIGN>(ptr as *const i8, transmute(a))
 }
 #[doc = "Store multiple single-element structures from one, two, three, or four registers."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
@@ -64142,16 +58736,13 @@ pub unsafe fn vst1_u8(ptr: *mut u8, a: uint8x8_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.8"))]
 pub unsafe fn vst1q_u8(ptr: *mut u8, a: uint8x16_t) {
-    vst1q_v16i8(
-        ptr as *const i8,
-        transmute(a),
-        crate::mem::align_of::<u8>() as i32,
-    )
+    const ALIGN: i32 = crate::mem::align_of::<u8>() as i32;
+    vst1q_v16i8::<ALIGN>(ptr as *const i8, transmute(a))
 }
 #[doc = "Store multiple single-element structures from one, two, three, or four registers."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
@@ -64159,16 +58750,13 @@ pub unsafe fn vst1q_u8(ptr: *mut u8, a: uint8x16_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))]
 pub unsafe fn vst1_u16(ptr: *mut u16, a: uint16x4_t) {
-    vst1_v4i16(
-        ptr as *const i8,
-        transmute(a),
-        crate::mem::align_of::<u16>() as i32,
-    )
+    const ALIGN: i32 = crate::mem::align_of::<u16>() as i32;
+    vst1_v4i16::<ALIGN>(ptr as *const i8, transmute(a))
 }
 #[doc = "Store multiple single-element structures from one, two, three, or four registers."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
@@ -64176,16 +58764,13 @@ pub unsafe fn vst1_u16(ptr: *mut u16, a: uint16x4_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))]
 pub unsafe fn vst1q_u16(ptr: *mut u16, a: uint16x8_t) {
-    vst1q_v8i16(
-        ptr as *const i8,
-        transmute(a),
-        crate::mem::align_of::<u16>() as i32,
-    )
+    const ALIGN: i32 = crate::mem::align_of::<u16>() as i32;
+    vst1q_v8i16::<ALIGN>(ptr as *const i8, transmute(a))
 }
 #[doc = "Store multiple single-element structures from one, two, three, or four registers."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
@@ -64193,16 +58778,13 @@ pub unsafe fn vst1q_u16(ptr: *mut u16, a: uint16x8_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32"))]
 pub unsafe fn vst1_u32(ptr: *mut u32, a: uint32x2_t) {
-    vst1_v2i32(
-        ptr as *const i8,
-        transmute(a),
-        crate::mem::align_of::<u32>() as i32,
-    )
+    const ALIGN: i32 = crate::mem::align_of::<u32>() as i32;
+    vst1_v2i32::<ALIGN>(ptr as *const i8, transmute(a))
 }
 #[doc = "Store multiple single-element structures from one, two, three, or four registers."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
@@ -64210,16 +58792,13 @@ pub unsafe fn vst1_u32(ptr: *mut u32, a: uint32x2_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32"))]
 pub unsafe fn vst1q_u32(ptr: *mut u32, a: uint32x4_t) {
-    vst1q_v4i32(
-        ptr as *const i8,
-        transmute(a),
-        crate::mem::align_of::<u32>() as i32,
-    )
+    const ALIGN: i32 = crate::mem::align_of::<u32>() as i32;
+    vst1q_v4i32::<ALIGN>(ptr as *const i8, transmute(a))
 }
 #[doc = "Store multiple single-element structures from one, two, three, or four registers."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
@@ -64227,16 +58806,13 @@ pub unsafe fn vst1q_u32(ptr: *mut u32, a: uint32x4_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.64"))]
 pub unsafe fn vst1_u64(ptr: *mut u64, a: uint64x1_t) {
-    vst1_v1i64(
-        ptr as *const i8,
-        transmute(a),
-        crate::mem::align_of::<u64>() as i32,
-    )
+    const ALIGN: i32 = crate::mem::align_of::<u64>() as i32;
+    vst1_v1i64::<ALIGN>(ptr as *const i8, transmute(a))
 }
 #[doc = "Store multiple single-element structures from one, two, three, or four registers."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
@@ -64244,16 +58820,13 @@ pub unsafe fn vst1_u64(ptr: *mut u64, a: uint64x1_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.64"))]
 pub unsafe fn vst1q_u64(ptr: *mut u64, a: uint64x2_t) {
-    vst1q_v2i64(
-        ptr as *const i8,
-        transmute(a),
-        crate::mem::align_of::<u64>() as i32,
-    )
+    const ALIGN: i32 = crate::mem::align_of::<u64>() as i32;
+    vst1q_v2i64::<ALIGN>(ptr as *const i8, transmute(a))
 }
 #[doc = "Store multiple single-element structures from one, two, three, or four registers."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
@@ -64261,16 +58834,13 @@ pub unsafe fn vst1q_u64(ptr: *mut u64, a: uint64x2_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.8"))]
 pub unsafe fn vst1_p8(ptr: *mut p8, a: poly8x8_t) {
-    vst1_v8i8(
-        ptr as *const i8,
-        transmute(a),
-        crate::mem::align_of::<p8>() as i32,
-    )
+    const ALIGN: i32 = crate::mem::align_of::<p8>() as i32;
+    vst1_v8i8::<ALIGN>(ptr as *const i8, transmute(a))
 }
 #[doc = "Store multiple single-element structures from one, two, three, or four registers."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
@@ -64278,16 +58848,13 @@ pub unsafe fn vst1_p8(ptr: *mut p8, a: poly8x8_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.8"))]
 pub unsafe fn vst1q_p8(ptr: *mut p8, a: poly8x16_t) {
-    vst1q_v16i8(
-        ptr as *const i8,
-        transmute(a),
-        crate::mem::align_of::<p8>() as i32,
-    )
+    const ALIGN: i32 = crate::mem::align_of::<p8>() as i32;
+    vst1q_v16i8::<ALIGN>(ptr as *const i8, transmute(a))
 }
 #[doc = "Store multiple single-element structures from one, two, three, or four registers."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
@@ -64295,16 +58862,13 @@ pub unsafe fn vst1q_p8(ptr: *mut p8, a: poly8x16_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))]
 pub unsafe fn vst1_p16(ptr: *mut p16, a: poly16x4_t) {
-    vst1_v4i16(
-        ptr as *const i8,
-        transmute(a),
-        crate::mem::align_of::<p16>() as i32,
-    )
+    const ALIGN: i32 = crate::mem::align_of::<p16>() as i32;
+    vst1_v4i16::<ALIGN>(ptr as *const i8, transmute(a))
 }
 #[doc = "Store multiple single-element structures from one, two, three, or four registers."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
@@ -64312,16 +58876,13 @@ pub unsafe fn vst1_p16(ptr: *mut p16, a: poly16x4_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))]
 pub unsafe fn vst1q_p16(ptr: *mut p16, a: poly16x8_t) {
-    vst1q_v8i16(
-        ptr as *const i8,
-        transmute(a),
-        crate::mem::align_of::<p16>() as i32,
-    )
+    const ALIGN: i32 = crate::mem::align_of::<p16>() as i32;
+    vst1q_v8i16::<ALIGN>(ptr as *const i8, transmute(a))
 }
 #[doc = "Store multiple single-element structures from one, two, three, or four registers."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
@@ -64329,16 +58890,13 @@ pub unsafe fn vst1q_p16(ptr: *mut p16, a: poly16x8_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.64"))]
 pub unsafe fn vst1_p64(ptr: *mut p64, a: poly64x1_t) {
-    vst1_v1i64(
-        ptr as *const i8,
-        transmute(a),
-        crate::mem::align_of::<p64>() as i32,
-    )
+    const ALIGN: i32 = crate::mem::align_of::<p64>() as i32;
+    vst1_v1i64::<ALIGN>(ptr as *const i8, transmute(a))
 }
 #[doc = "Store multiple single-element structures from one, two, three, or four registers."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
@@ -64346,16 +58904,13 @@ pub unsafe fn vst1_p64(ptr: *mut p64, a: poly64x1_t) {
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.64"))]
 pub unsafe fn vst1q_p64(ptr: *mut p64, a: poly64x2_t) {
-    vst1q_v2i64(
-        ptr as *const i8,
-        transmute(a),
-        crate::mem::align_of::<p64>() as i32,
-    )
+    const ALIGN: i32 = crate::mem::align_of::<p64>() as i32;
+    vst1q_v2i64::<ALIGN>(ptr as *const i8, transmute(a))
 }
 #[doc = "Store multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f32_x2)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
@@ -64371,7 +58926,7 @@ pub unsafe fn vst1_f32_x2(a: *mut f32, b: float32x2x2_t) {
 #[doc = "Store multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f32_x2)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
@@ -64387,7 +58942,7 @@ pub unsafe fn vst1q_f32_x2(a: *mut f32, b: float32x4x2_t) {
 #[doc = "Store multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f32_x2)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
@@ -64406,7 +58961,7 @@ pub unsafe fn vst1_f32_x2(a: *mut f32, b: float32x2x2_t) {
 #[doc = "Store multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f32_x2)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
@@ -64425,7 +58980,7 @@ pub unsafe fn vst1q_f32_x2(a: *mut f32, b: float32x4x2_t) {
 #[doc = "Store multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f32_x3)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
@@ -64444,7 +58999,7 @@ pub unsafe fn vst1_f32_x3(a: *mut f32, b: float32x2x3_t) {
 #[doc = "Store multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f32_x3)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
@@ -64463,7 +59018,7 @@ pub unsafe fn vst1q_f32_x3(a: *mut f32, b: float32x4x3_t) {
 #[doc = "Store multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f32_x4)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
@@ -64485,7 +59040,7 @@ pub unsafe fn vst1_f32_x4(a: *mut f32, b: float32x2x4_t) {
 #[doc = "Store multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f32_x4)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
@@ -64507,7 +59062,7 @@ pub unsafe fn vst1q_f32_x4(a: *mut f32, b: float32x4x4_t) {
 #[doc = "Store multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f32_x4)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
@@ -64532,7 +59087,7 @@ pub unsafe fn vst1_f32_x4(a: *mut f32, b: float32x2x4_t) {
 #[doc = "Store multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f32_x4)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
@@ -64557,8 +59112,9 @@ pub unsafe fn vst1q_f32_x4(a: *mut f32, b: float32x4x4_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_f16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
+#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
 #[cfg_attr(
@@ -64566,7 +59122,7 @@ pub unsafe fn vst1q_f32_x4(a: *mut f32, b: float32x4x4_t) {
     assert_instr(nop, LANE = 0)
 )]
 #[rustc_legacy_const_generics(2)]
-#[target_feature(enable = "neon,fp16")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vst1_lane_f16<const LANE: i32>(a: *mut f16, b: float16x4_t) {
@@ -64576,8 +59132,9 @@ pub unsafe fn vst1_lane_f16<const LANE: i32>(a: *mut f16, b: float16x4_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_f16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
+#[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
 #[cfg_attr(
@@ -64585,7 +59142,7 @@ pub unsafe fn vst1_lane_f16<const LANE: i32>(a: *mut f16, b: float16x4_t) {
     assert_instr(nop, LANE = 0)
 )]
 #[rustc_legacy_const_generics(2)]
-#[target_feature(enable = "neon,fp16")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vst1q_lane_f16<const LANE: i32>(a: *mut f16, b: float16x8_t) {
@@ -64595,7 +59152,7 @@ pub unsafe fn vst1q_lane_f16<const LANE: i32>(a: *mut f16, b: float16x8_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_f32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -64620,7 +59177,7 @@ pub unsafe fn vst1_lane_f32<const LANE: i32>(a: *mut f32, b: float32x2_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_f32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -64645,7 +59202,7 @@ pub unsafe fn vst1q_lane_f32<const LANE: i32>(a: *mut f32, b: float32x4_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_s8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -64670,7 +59227,7 @@ pub unsafe fn vst1_lane_s8<const LANE: i32>(a: *mut i8, b: int8x8_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_s8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -64695,7 +59252,7 @@ pub unsafe fn vst1q_lane_s8<const LANE: i32>(a: *mut i8, b: int8x16_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_s16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -64720,7 +59277,7 @@ pub unsafe fn vst1_lane_s16<const LANE: i32>(a: *mut i16, b: int16x4_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_s16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -64745,7 +59302,7 @@ pub unsafe fn vst1q_lane_s16<const LANE: i32>(a: *mut i16, b: int16x8_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_s32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -64770,7 +59327,7 @@ pub unsafe fn vst1_lane_s32<const LANE: i32>(a: *mut i32, b: int32x2_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_s32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -64795,7 +59352,7 @@ pub unsafe fn vst1q_lane_s32<const LANE: i32>(a: *mut i32, b: int32x4_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_s64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -64820,7 +59377,7 @@ pub unsafe fn vst1q_lane_s64<const LANE: i32>(a: *mut i64, b: int64x2_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_u8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -64845,7 +59402,7 @@ pub unsafe fn vst1_lane_u8<const LANE: i32>(a: *mut u8, b: uint8x8_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_u8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -64870,7 +59427,7 @@ pub unsafe fn vst1q_lane_u8<const LANE: i32>(a: *mut u8, b: uint8x16_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_u16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -64895,7 +59452,7 @@ pub unsafe fn vst1_lane_u16<const LANE: i32>(a: *mut u16, b: uint16x4_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_u16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -64920,7 +59477,7 @@ pub unsafe fn vst1q_lane_u16<const LANE: i32>(a: *mut u16, b: uint16x8_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_u32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -64945,7 +59502,7 @@ pub unsafe fn vst1_lane_u32<const LANE: i32>(a: *mut u32, b: uint32x2_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_u32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -64970,7 +59527,7 @@ pub unsafe fn vst1q_lane_u32<const LANE: i32>(a: *mut u32, b: uint32x4_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_u64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -64995,7 +59552,7 @@ pub unsafe fn vst1q_lane_u64<const LANE: i32>(a: *mut u64, b: uint64x2_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_p8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -65020,7 +59577,7 @@ pub unsafe fn vst1_lane_p8<const LANE: i32>(a: *mut p8, b: poly8x8_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_p8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -65045,7 +59602,7 @@ pub unsafe fn vst1q_lane_p8<const LANE: i32>(a: *mut p8, b: poly8x16_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_p16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -65070,7 +59627,7 @@ pub unsafe fn vst1_lane_p16<const LANE: i32>(a: *mut p16, b: poly16x4_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_p16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -65095,7 +59652,7 @@ pub unsafe fn vst1q_lane_p16<const LANE: i32>(a: *mut p16, b: poly16x8_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_p64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[target_feature(enable = "neon,aes")]
@@ -65120,7 +59677,7 @@ pub unsafe fn vst1_lane_p64<const LANE: i32>(a: *mut p64, b: poly64x1_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_s64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -65145,7 +59702,7 @@ pub unsafe fn vst1_lane_s64<const LANE: i32>(a: *mut i64, b: int64x1_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_lane_u64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -65170,7 +59727,7 @@ pub unsafe fn vst1_lane_u64<const LANE: i32>(a: *mut u64, b: uint64x1_t) {
 #[doc = "Store multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p64_x2)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
@@ -65193,7 +59750,7 @@ pub unsafe fn vst1_p64_x2(a: *mut p64, b: poly64x1x2_t) {
 #[doc = "Store multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p64_x3)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
@@ -65216,7 +59773,7 @@ pub unsafe fn vst1_p64_x3(a: *mut p64, b: poly64x1x3_t) {
 #[doc = "Store multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p64_x4)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
@@ -65239,7 +59796,7 @@ pub unsafe fn vst1_p64_x4(a: *mut p64, b: poly64x1x4_t) {
 #[doc = "Store multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p64_x2)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
@@ -65262,7 +59819,7 @@ pub unsafe fn vst1q_p64_x2(a: *mut p64, b: poly64x2x2_t) {
 #[doc = "Store multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p64_x3)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
@@ -65285,7 +59842,7 @@ pub unsafe fn vst1q_p64_x3(a: *mut p64, b: poly64x2x3_t) {
 #[doc = "Store multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p64_x4)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,aes")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
@@ -65308,7 +59865,7 @@ pub unsafe fn vst1q_p64_x4(a: *mut p64, b: poly64x2x4_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s8_x2)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
@@ -65327,7 +59884,7 @@ pub unsafe fn vst1_s8_x2(a: *mut i8, b: int8x8x2_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s8_x2)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
@@ -65346,7 +59903,7 @@ pub unsafe fn vst1q_s8_x2(a: *mut i8, b: int8x16x2_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s16_x2)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
@@ -65365,7 +59922,7 @@ pub unsafe fn vst1_s16_x2(a: *mut i16, b: int16x4x2_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s16_x2)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
@@ -65384,7 +59941,7 @@ pub unsafe fn vst1q_s16_x2(a: *mut i16, b: int16x8x2_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s32_x2)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
@@ -65403,7 +59960,7 @@ pub unsafe fn vst1_s32_x2(a: *mut i32, b: int32x2x2_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s32_x2)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
@@ -65422,7 +59979,7 @@ pub unsafe fn vst1q_s32_x2(a: *mut i32, b: int32x4x2_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s64_x2)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
@@ -65441,7 +59998,7 @@ pub unsafe fn vst1_s64_x2(a: *mut i64, b: int64x1x2_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s64_x2)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
@@ -65460,7 +60017,7 @@ pub unsafe fn vst1q_s64_x2(a: *mut i64, b: int64x2x2_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s8_x2)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
@@ -65476,7 +60033,7 @@ pub unsafe fn vst1_s8_x2(a: *mut i8, b: int8x8x2_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s8_x2)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
@@ -65492,7 +60049,7 @@ pub unsafe fn vst1q_s8_x2(a: *mut i8, b: int8x16x2_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s16_x2)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
@@ -65508,7 +60065,7 @@ pub unsafe fn vst1_s16_x2(a: *mut i16, b: int16x4x2_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s16_x2)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
@@ -65524,7 +60081,7 @@ pub unsafe fn vst1q_s16_x2(a: *mut i16, b: int16x8x2_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s32_x2)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
@@ -65540,7 +60097,7 @@ pub unsafe fn vst1_s32_x2(a: *mut i32, b: int32x2x2_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s32_x2)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
@@ -65556,7 +60113,7 @@ pub unsafe fn vst1q_s32_x2(a: *mut i32, b: int32x4x2_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s64_x2)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
@@ -65572,7 +60129,7 @@ pub unsafe fn vst1_s64_x2(a: *mut i64, b: int64x1x2_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s64_x2)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
@@ -65588,7 +60145,7 @@ pub unsafe fn vst1q_s64_x2(a: *mut i64, b: int64x2x2_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s8_x3)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
@@ -65607,7 +60164,7 @@ pub unsafe fn vst1_s8_x3(a: *mut i8, b: int8x8x3_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s8_x3)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
@@ -65626,7 +60183,7 @@ pub unsafe fn vst1q_s8_x3(a: *mut i8, b: int8x16x3_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s16_x3)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
@@ -65645,7 +60202,7 @@ pub unsafe fn vst1_s16_x3(a: *mut i16, b: int16x4x3_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s16_x3)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
@@ -65664,7 +60221,7 @@ pub unsafe fn vst1q_s16_x3(a: *mut i16, b: int16x8x3_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s32_x3)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
@@ -65683,7 +60240,7 @@ pub unsafe fn vst1_s32_x3(a: *mut i32, b: int32x2x3_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s32_x3)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
@@ -65702,7 +60259,7 @@ pub unsafe fn vst1q_s32_x3(a: *mut i32, b: int32x4x3_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s64_x3)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
@@ -65721,7 +60278,7 @@ pub unsafe fn vst1_s64_x3(a: *mut i64, b: int64x1x3_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s64_x3)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
@@ -65740,7 +60297,7 @@ pub unsafe fn vst1q_s64_x3(a: *mut i64, b: int64x2x3_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s8_x3)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
@@ -65756,7 +60313,7 @@ pub unsafe fn vst1_s8_x3(a: *mut i8, b: int8x8x3_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s8_x3)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
@@ -65772,7 +60329,7 @@ pub unsafe fn vst1q_s8_x3(a: *mut i8, b: int8x16x3_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s16_x3)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
@@ -65788,7 +60345,7 @@ pub unsafe fn vst1_s16_x3(a: *mut i16, b: int16x4x3_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s16_x3)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
@@ -65804,7 +60361,7 @@ pub unsafe fn vst1q_s16_x3(a: *mut i16, b: int16x8x3_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s32_x3)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
@@ -65820,7 +60377,7 @@ pub unsafe fn vst1_s32_x3(a: *mut i32, b: int32x2x3_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s32_x3)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
@@ -65836,7 +60393,7 @@ pub unsafe fn vst1q_s32_x3(a: *mut i32, b: int32x4x3_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s64_x3)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
@@ -65852,7 +60409,7 @@ pub unsafe fn vst1_s64_x3(a: *mut i64, b: int64x1x3_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s64_x3)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
@@ -65868,7 +60425,7 @@ pub unsafe fn vst1q_s64_x3(a: *mut i64, b: int64x2x3_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s8_x4)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
@@ -65887,7 +60444,7 @@ pub unsafe fn vst1_s8_x4(a: *mut i8, b: int8x8x4_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s8_x4)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
@@ -65906,7 +60463,7 @@ pub unsafe fn vst1q_s8_x4(a: *mut i8, b: int8x16x4_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s16_x4)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
@@ -65925,7 +60482,7 @@ pub unsafe fn vst1_s16_x4(a: *mut i16, b: int16x4x4_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s16_x4)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
@@ -65944,7 +60501,7 @@ pub unsafe fn vst1q_s16_x4(a: *mut i16, b: int16x8x4_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s32_x4)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
@@ -65963,7 +60520,7 @@ pub unsafe fn vst1_s32_x4(a: *mut i32, b: int32x2x4_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s32_x4)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
@@ -65982,7 +60539,7 @@ pub unsafe fn vst1q_s32_x4(a: *mut i32, b: int32x4x4_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s64_x4)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
@@ -66001,7 +60558,7 @@ pub unsafe fn vst1_s64_x4(a: *mut i64, b: int64x1x4_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s64_x4)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
@@ -66020,7 +60577,7 @@ pub unsafe fn vst1q_s64_x4(a: *mut i64, b: int64x2x4_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s8_x4)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
@@ -66036,7 +60593,7 @@ pub unsafe fn vst1_s8_x4(a: *mut i8, b: int8x8x4_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s8_x4)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
@@ -66052,7 +60609,7 @@ pub unsafe fn vst1q_s8_x4(a: *mut i8, b: int8x16x4_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s16_x4)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
@@ -66068,7 +60625,7 @@ pub unsafe fn vst1_s16_x4(a: *mut i16, b: int16x4x4_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s16_x4)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
@@ -66084,7 +60641,7 @@ pub unsafe fn vst1q_s16_x4(a: *mut i16, b: int16x8x4_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s32_x4)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
@@ -66100,7 +60657,7 @@ pub unsafe fn vst1_s32_x4(a: *mut i32, b: int32x2x4_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s32_x4)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
@@ -66116,7 +60673,7 @@ pub unsafe fn vst1q_s32_x4(a: *mut i32, b: int32x4x4_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s64_x4)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
@@ -66132,7 +60689,7 @@ pub unsafe fn vst1_s64_x4(a: *mut i64, b: int64x1x4_t) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s64_x4)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
@@ -66148,7 +60705,7 @@ pub unsafe fn vst1q_s64_x4(a: *mut i64, b: int64x2x4_t) {
 #[doc = "Store multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u8_x2)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -66171,7 +60728,7 @@ pub unsafe fn vst1_u8_x2(a: *mut u8, b: uint8x8x2_t) {
 #[doc = "Store multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u8_x3)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -66194,7 +60751,7 @@ pub unsafe fn vst1_u8_x3(a: *mut u8, b: uint8x8x3_t) {
 #[doc = "Store multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u8_x4)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -66217,7 +60774,7 @@ pub unsafe fn vst1_u8_x4(a: *mut u8, b: uint8x8x4_t) {
 #[doc = "Store multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u8_x2)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -66240,7 +60797,7 @@ pub unsafe fn vst1q_u8_x2(a: *mut u8, b: uint8x16x2_t) {
 #[doc = "Store multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u8_x3)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -66263,7 +60820,7 @@ pub unsafe fn vst1q_u8_x3(a: *mut u8, b: uint8x16x3_t) {
 #[doc = "Store multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u8_x4)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -66286,7 +60843,7 @@ pub unsafe fn vst1q_u8_x4(a: *mut u8, b: uint8x16x4_t) {
 #[doc = "Store multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u16_x2)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -66309,7 +60866,7 @@ pub unsafe fn vst1_u16_x2(a: *mut u16, b: uint16x4x2_t) {
 #[doc = "Store multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u16_x3)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -66332,7 +60889,7 @@ pub unsafe fn vst1_u16_x3(a: *mut u16, b: uint16x4x3_t) {
 #[doc = "Store multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u16_x4)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -66355,7 +60912,7 @@ pub unsafe fn vst1_u16_x4(a: *mut u16, b: uint16x4x4_t) {
 #[doc = "Store multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u16_x2)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -66378,7 +60935,7 @@ pub unsafe fn vst1q_u16_x2(a: *mut u16, b: uint16x8x2_t) {
 #[doc = "Store multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u16_x3)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -66401,7 +60958,7 @@ pub unsafe fn vst1q_u16_x3(a: *mut u16, b: uint16x8x3_t) {
 #[doc = "Store multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u16_x4)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -66424,7 +60981,7 @@ pub unsafe fn vst1q_u16_x4(a: *mut u16, b: uint16x8x4_t) {
 #[doc = "Store multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u32_x2)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -66447,7 +61004,7 @@ pub unsafe fn vst1_u32_x2(a: *mut u32, b: uint32x2x2_t) {
 #[doc = "Store multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u32_x3)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -66470,7 +61027,7 @@ pub unsafe fn vst1_u32_x3(a: *mut u32, b: uint32x2x3_t) {
 #[doc = "Store multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u32_x4)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -66493,7 +61050,7 @@ pub unsafe fn vst1_u32_x4(a: *mut u32, b: uint32x2x4_t) {
 #[doc = "Store multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u32_x2)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -66516,7 +61073,7 @@ pub unsafe fn vst1q_u32_x2(a: *mut u32, b: uint32x4x2_t) {
 #[doc = "Store multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u32_x3)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -66539,7 +61096,7 @@ pub unsafe fn vst1q_u32_x3(a: *mut u32, b: uint32x4x3_t) {
 #[doc = "Store multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u32_x4)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -66562,7 +61119,7 @@ pub unsafe fn vst1q_u32_x4(a: *mut u32, b: uint32x4x4_t) {
 #[doc = "Store multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u64_x2)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -66585,7 +61142,7 @@ pub unsafe fn vst1_u64_x2(a: *mut u64, b: uint64x1x2_t) {
 #[doc = "Store multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u64_x3)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -66608,7 +61165,7 @@ pub unsafe fn vst1_u64_x3(a: *mut u64, b: uint64x1x3_t) {
 #[doc = "Store multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u64_x4)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -66631,7 +61188,7 @@ pub unsafe fn vst1_u64_x4(a: *mut u64, b: uint64x1x4_t) {
 #[doc = "Store multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u64_x2)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -66654,7 +61211,7 @@ pub unsafe fn vst1q_u64_x2(a: *mut u64, b: uint64x2x2_t) {
 #[doc = "Store multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u64_x3)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -66677,7 +61234,7 @@ pub unsafe fn vst1q_u64_x3(a: *mut u64, b: uint64x2x3_t) {
 #[doc = "Store multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u64_x4)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -66700,7 +61257,7 @@ pub unsafe fn vst1q_u64_x4(a: *mut u64, b: uint64x2x4_t) {
 #[doc = "Store multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p8_x2)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -66723,7 +61280,7 @@ pub unsafe fn vst1_p8_x2(a: *mut p8, b: poly8x8x2_t) {
 #[doc = "Store multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p8_x3)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -66746,7 +61303,7 @@ pub unsafe fn vst1_p8_x3(a: *mut p8, b: poly8x8x3_t) {
 #[doc = "Store multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p8_x4)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -66769,7 +61326,7 @@ pub unsafe fn vst1_p8_x4(a: *mut p8, b: poly8x8x4_t) {
 #[doc = "Store multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p8_x2)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -66792,7 +61349,7 @@ pub unsafe fn vst1q_p8_x2(a: *mut p8, b: poly8x16x2_t) {
 #[doc = "Store multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p8_x3)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -66815,7 +61372,7 @@ pub unsafe fn vst1q_p8_x3(a: *mut p8, b: poly8x16x3_t) {
 #[doc = "Store multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p8_x4)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -66838,7 +61395,7 @@ pub unsafe fn vst1q_p8_x4(a: *mut p8, b: poly8x16x4_t) {
 #[doc = "Store multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p16_x2)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -66861,7 +61418,7 @@ pub unsafe fn vst1_p16_x2(a: *mut p16, b: poly16x4x2_t) {
 #[doc = "Store multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p16_x3)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -66884,7 +61441,7 @@ pub unsafe fn vst1_p16_x3(a: *mut p16, b: poly16x4x3_t) {
 #[doc = "Store multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p16_x4)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -66907,7 +61464,7 @@ pub unsafe fn vst1_p16_x4(a: *mut p16, b: poly16x4x4_t) {
 #[doc = "Store multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p16_x2)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -66930,7 +61487,7 @@ pub unsafe fn vst1q_p16_x2(a: *mut p16, b: poly16x8x2_t) {
 #[doc = "Store multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p16_x3)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -66953,7 +61510,7 @@ pub unsafe fn vst1q_p16_x3(a: *mut p16, b: poly16x8x3_t) {
 #[doc = "Store multiple single-element structures to one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p16_x4)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -66973,185 +61530,155 @@ pub unsafe fn vst1q_p16_x3(a: *mut p16, b: poly16x8x3_t) {
 pub unsafe fn vst1q_p16_x4(a: *mut p16, b: poly16x8x4_t) {
     vst1q_s16_x4(transmute(a), transmute(b))
 }
-#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_v1i64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.64"))]
-unsafe fn vst1_v1i64(addr: *const i8, val: int64x1_t, align: i32) {
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.64", ALIGN = 0))]
+#[rustc_legacy_const_generics(2)]
+unsafe fn vst1_v1i64<const ALIGN: i32>(addr: *const i8, val: int64x1_t) {
     unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v1i64.p0")]
         fn _vst1_v1i64(addr: *const i8, val: int64x1_t, align: i32);
     }
-    _vst1_v1i64(addr, val, align)
+    _vst1_v1i64(addr, val, ALIGN)
 }
-#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_v2f32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32"))]
-unsafe fn vst1_v2f32(addr: *const i8, val: float32x2_t, align: i32) {
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32", ALIGN = 0))]
+#[rustc_legacy_const_generics(2)]
+unsafe fn vst1_v2f32<const ALIGN: i32>(addr: *const i8, val: float32x2_t) {
     unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v2f32.p0")]
         fn _vst1_v2f32(addr: *const i8, val: float32x2_t, align: i32);
     }
-    _vst1_v2f32(addr, val, align)
+    _vst1_v2f32(addr, val, ALIGN)
 }
-#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_v2i32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32"))]
-unsafe fn vst1_v2i32(addr: *const i8, val: int32x2_t, align: i32) {
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32", ALIGN = 0))]
+#[rustc_legacy_const_generics(2)]
+unsafe fn vst1_v2i32<const ALIGN: i32>(addr: *const i8, val: int32x2_t) {
     unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v2i32.p0")]
         fn _vst1_v2i32(addr: *const i8, val: int32x2_t, align: i32);
     }
-    _vst1_v2i32(addr, val, align)
+    _vst1_v2i32(addr, val, ALIGN)
 }
-#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_v4i16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))]
-unsafe fn vst1_v4i16(addr: *const i8, val: int16x4_t, align: i32) {
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16", ALIGN = 0))]
+#[rustc_legacy_const_generics(2)]
+unsafe fn vst1_v4i16<const ALIGN: i32>(addr: *const i8, val: int16x4_t) {
     unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v4i16.p0")]
         fn _vst1_v4i16(addr: *const i8, val: int16x4_t, align: i32);
     }
-    _vst1_v4i16(addr, val, align)
+    _vst1_v4i16(addr, val, ALIGN)
 }
-#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_v8i8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.8"))]
-unsafe fn vst1_v8i8(addr: *const i8, val: int8x8_t, align: i32) {
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.8", ALIGN = 0))]
+#[rustc_legacy_const_generics(2)]
+unsafe fn vst1_v8i8<const ALIGN: i32>(addr: *const i8, val: int8x8_t) {
     unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v8i8.p0")]
         fn _vst1_v8i8(addr: *const i8, val: int8x8_t, align: i32);
     }
-    _vst1_v8i8(addr, val, align)
+    _vst1_v8i8(addr, val, ALIGN)
 }
-#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_v16i8)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.8"))]
-unsafe fn vst1q_v16i8(addr: *const i8, val: int8x16_t, align: i32) {
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.8", ALIGN = 0))]
+#[rustc_legacy_const_generics(2)]
+unsafe fn vst1q_v16i8<const ALIGN: i32>(addr: *const i8, val: int8x16_t) {
     unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v16i8.p0")]
         fn _vst1q_v16i8(addr: *const i8, val: int8x16_t, align: i32);
     }
-    _vst1q_v16i8(addr, val, align)
+    _vst1q_v16i8(addr, val, ALIGN)
 }
-#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_v2i64)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.64"))]
-unsafe fn vst1q_v2i64(addr: *const i8, val: int64x2_t, align: i32) {
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.64", ALIGN = 0))]
+#[rustc_legacy_const_generics(2)]
+unsafe fn vst1q_v2i64<const ALIGN: i32>(addr: *const i8, val: int64x2_t) {
     unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v2i64.p0")]
         fn _vst1q_v2i64(addr: *const i8, val: int64x2_t, align: i32);
     }
-    _vst1q_v2i64(addr, val, align)
+    _vst1q_v2i64(addr, val, ALIGN)
 }
-#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_v4f32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32"))]
-unsafe fn vst1q_v4f32(addr: *const i8, val: float32x4_t, align: i32) {
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32", ALIGN = 0))]
+#[rustc_legacy_const_generics(2)]
+unsafe fn vst1q_v4f32<const ALIGN: i32>(addr: *const i8, val: float32x4_t) {
     unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v4f32.p0")]
         fn _vst1q_v4f32(addr: *const i8, val: float32x4_t, align: i32);
     }
-    _vst1q_v4f32(addr, val, align)
+    _vst1q_v4f32(addr, val, ALIGN)
 }
-#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_v4i32)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32"))]
-unsafe fn vst1q_v4i32(addr: *const i8, val: int32x4_t, align: i32) {
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32", ALIGN = 0))]
+#[rustc_legacy_const_generics(2)]
+unsafe fn vst1q_v4i32<const ALIGN: i32>(addr: *const i8, val: int32x4_t) {
     unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v4i32.p0")]
         fn _vst1q_v4i32(addr: *const i8, val: int32x4_t, align: i32);
     }
-    _vst1q_v4i32(addr, val, align)
+    _vst1q_v4i32(addr, val, ALIGN)
 }
-#[doc = "Store multiple single-element structures from one, two, three, or four registers."]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_v8i16)"]
-#[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))]
-unsafe fn vst1q_v8i16(addr: *const i8, val: int16x8_t, align: i32) {
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16", ALIGN = 0))]
+#[rustc_legacy_const_generics(2)]
+unsafe fn vst1q_v8i16<const ALIGN: i32>(addr: *const i8, val: int16x8_t) {
     unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v8i16.p0")]
         fn _vst1q_v8i16(addr: *const i8, val: int16x8_t, align: i32);
     }
-    _vst1q_v8i16(addr, val, align)
+    _vst1q_v8i16(addr, val, ALIGN)
 }
 #[doc = "Store multiple single-element structures from one, two, three, or four registers."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_v4f16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))]
 unsafe fn vst1_v4f16(addr: *const i8, val: float16x4_t, align: i32) {
@@ -67164,12 +61691,12 @@ unsafe fn vst1_v4f16(addr: *const i8, val: float16x4_t, align: i32) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers."]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_v8f16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))]
 unsafe fn vst1q_v8f16(addr: *const i8, val: float16x8_t, align: i32) {
@@ -67182,7 +61709,7 @@ unsafe fn vst1q_v8f16(addr: *const i8, val: float16x8_t, align: i32) {
 #[doc = "Store multiple single-element structures from one, two, three, or four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_lane_p64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[target_feature(enable = "neon,aes")]
@@ -67207,10 +61734,11 @@ pub unsafe fn vst1q_lane_p64<const LANE: i32>(a: *mut p64, b: poly64x2_t) {
 #[doc = "Store multiple 2-element structures from two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_f16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
+#[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
-#[target_feature(enable = "neon,fp16")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(st2))]
@@ -67227,10 +61755,11 @@ pub unsafe fn vst2_f16(a: *mut f16, b: float16x4x2_t) {
 #[doc = "Store multiple 2-element structures from two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_f16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
+#[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
-#[target_feature(enable = "neon,fp16")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(st2))]
@@ -67247,11 +61776,12 @@ pub unsafe fn vst2q_f16(a: *mut f16, b: float16x8x2_t) {
 #[doc = "Store multiple 2-element structures from two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_f16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
+#[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[target_feature(enable = "neon,fp16")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(vst2))]
@@ -67265,11 +61795,12 @@ pub unsafe fn vst2_f16(a: *mut f16, b: float16x4x2_t) {
 #[doc = "Store multiple 2-element structures from two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_f16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
+#[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[target_feature(enable = "neon,fp16")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(vst2))]
@@ -67283,159 +61814,103 @@ pub unsafe fn vst2q_f16(a: *mut f16, b: float16x8x2_t) {
 #[doc = "Store multiple 2-element structures from two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_f32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st2))]
 pub unsafe fn vst2_f32(a: *mut f32, b: float32x2x2_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st2.v2f32.p0"
-        )]
-        fn _vst2_f32(a: float32x2_t, b: float32x2_t, ptr: *mut i8);
-    }
-    _vst2_f32(b.0, b.1, a as _)
+    crate::core_arch::macros::interleaving_store!(f32, 2, 2, a, b)
 }
 #[doc = "Store multiple 2-element structures from two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_f32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st2))]
 pub unsafe fn vst2q_f32(a: *mut f32, b: float32x4x2_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st2.v4f32.p0"
-        )]
-        fn _vst2q_f32(a: float32x4_t, b: float32x4_t, ptr: *mut i8);
-    }
-    _vst2q_f32(b.0, b.1, a as _)
+    crate::core_arch::macros::interleaving_store!(f32, 4, 2, a, b)
 }
 #[doc = "Store multiple 2-element structures from two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_s8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st2))]
 pub unsafe fn vst2_s8(a: *mut i8, b: int8x8x2_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st2.v8i8.p0"
-        )]
-        fn _vst2_s8(a: int8x8_t, b: int8x8_t, ptr: *mut i8);
-    }
-    _vst2_s8(b.0, b.1, a as _)
+    crate::core_arch::macros::interleaving_store!(i8, 8, 2, a, b)
 }
 #[doc = "Store multiple 2-element structures from two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_s8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st2))]
 pub unsafe fn vst2q_s8(a: *mut i8, b: int8x16x2_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st2.v16i8.p0"
-        )]
-        fn _vst2q_s8(a: int8x16_t, b: int8x16_t, ptr: *mut i8);
-    }
-    _vst2q_s8(b.0, b.1, a as _)
+    crate::core_arch::macros::interleaving_store!(i8, 16, 2, a, b)
 }
 #[doc = "Store multiple 2-element structures from two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_s16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st2))]
 pub unsafe fn vst2_s16(a: *mut i16, b: int16x4x2_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st2.v4i16.p0"
-        )]
-        fn _vst2_s16(a: int16x4_t, b: int16x4_t, ptr: *mut i8);
-    }
-    _vst2_s16(b.0, b.1, a as _)
+    crate::core_arch::macros::interleaving_store!(i16, 4, 2, a, b)
 }
 #[doc = "Store multiple 2-element structures from two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_s16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st2))]
 pub unsafe fn vst2q_s16(a: *mut i16, b: int16x8x2_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st2.v8i16.p0"
-        )]
-        fn _vst2q_s16(a: int16x8_t, b: int16x8_t, ptr: *mut i8);
-    }
-    _vst2q_s16(b.0, b.1, a as _)
+    crate::core_arch::macros::interleaving_store!(i16, 8, 2, a, b)
 }
 #[doc = "Store multiple 2-element structures from two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_s32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st2))]
 pub unsafe fn vst2_s32(a: *mut i32, b: int32x2x2_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st2.v2i32.p0"
-        )]
-        fn _vst2_s32(a: int32x2_t, b: int32x2_t, ptr: *mut i8);
-    }
-    _vst2_s32(b.0, b.1, a as _)
+    crate::core_arch::macros::interleaving_store!(i32, 2, 2, a, b)
 }
 #[doc = "Store multiple 2-element structures from two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_s32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st2))]
 pub unsafe fn vst2q_s32(a: *mut i32, b: int32x4x2_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st2.v4i32.p0"
-        )]
-        fn _vst2q_s32(a: int32x4_t, b: int32x4_t, ptr: *mut i8);
-    }
-    _vst2q_s32(b.0, b.1, a as _)
+    crate::core_arch::macros::interleaving_store!(i32, 4, 2, a, b)
 }
 #[doc = "Store multiple 2-element structures from two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_f32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
@@ -67451,7 +61926,7 @@ pub unsafe fn vst2_f32(a: *mut f32, b: float32x2x2_t) {
 #[doc = "Store multiple 2-element structures from two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_f32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
@@ -67467,7 +61942,7 @@ pub unsafe fn vst2q_f32(a: *mut f32, b: float32x4x2_t) {
 #[doc = "Store multiple 2-element structures from two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_s8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
@@ -67483,7 +61958,7 @@ pub unsafe fn vst2_s8(a: *mut i8, b: int8x8x2_t) {
 #[doc = "Store multiple 2-element structures from two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_s8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
@@ -67499,7 +61974,7 @@ pub unsafe fn vst2q_s8(a: *mut i8, b: int8x16x2_t) {
 #[doc = "Store multiple 2-element structures from two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_s16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
@@ -67515,7 +61990,7 @@ pub unsafe fn vst2_s16(a: *mut i16, b: int16x4x2_t) {
 #[doc = "Store multiple 2-element structures from two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_s16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
@@ -67531,7 +62006,7 @@ pub unsafe fn vst2q_s16(a: *mut i16, b: int16x8x2_t) {
 #[doc = "Store multiple 2-element structures from two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_s32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
@@ -67547,7 +62022,7 @@ pub unsafe fn vst2_s32(a: *mut i32, b: int32x2x2_t) {
 #[doc = "Store multiple 2-element structures from two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_s32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
@@ -67563,12 +62038,13 @@ pub unsafe fn vst2q_s32(a: *mut i32, b: int32x4x2_t) {
 #[doc = "Store multiple 2-element structures from two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_f16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
+#[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(test, assert_instr(st2, LANE = 0))]
-#[target_feature(enable = "neon,fp16")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vst2_lane_f16<const LANE: i32>(a: *mut f16, b: float16x4x2_t) {
@@ -67585,12 +62061,13 @@ pub unsafe fn vst2_lane_f16<const LANE: i32>(a: *mut f16, b: float16x4x2_t) {
 #[doc = "Store multiple 2-element structures from two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_f16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
+#[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(test, assert_instr(st2, LANE = 0))]
-#[target_feature(enable = "neon,fp16")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vst2q_lane_f16<const LANE: i32>(a: *mut f16, b: float16x8x2_t) {
@@ -67607,13 +62084,14 @@ pub unsafe fn vst2q_lane_f16<const LANE: i32>(a: *mut f16, b: float16x8x2_t) {
 #[doc = "Store multiple 2-element structures from two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_f16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
+#[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(test, assert_instr(vst2, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
-#[target_feature(enable = "neon,fp16")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vst2_lane_f16<const LANE: i32>(a: *mut f16, b: float16x4x2_t) {
@@ -67627,17 +62105,18 @@ pub unsafe fn vst2_lane_f16<const LANE: i32>(a: *mut f16, b: float16x4x2_t) {
 #[doc = "Store multiple 2-element structures from two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_f16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
+#[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(test, assert_instr(vst2, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
-#[target_feature(enable = "neon,fp16")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vst2q_lane_f16<const LANE: i32>(a: *mut f16, b: float16x8x2_t) {
-    static_assert_uimm_bits!(LANE, 1);
+    static_assert_uimm_bits!(LANE, 3);
     unsafe extern "unadjusted" {
         #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.p0.v8f16")]
         fn _vst2q_lane_f16(ptr: *mut i8, a: float16x8_t, b: float16x8_t, n: i32, size: i32);
@@ -67647,7 +62126,7 @@ pub unsafe fn vst2q_lane_f16<const LANE: i32>(a: *mut f16, b: float16x8x2_t) {
 #[doc = "Store multiple 2-element structures from two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_f32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
@@ -67668,7 +62147,7 @@ pub unsafe fn vst2_lane_f32<const LANE: i32>(a: *mut f32, b: float32x2x2_t) {
 #[doc = "Store multiple 2-element structures from two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_f32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
@@ -67689,7 +62168,7 @@ pub unsafe fn vst2q_lane_f32<const LANE: i32>(a: *mut f32, b: float32x4x2_t) {
 #[doc = "Store multiple 2-element structures from two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_s8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
@@ -67710,7 +62189,7 @@ pub unsafe fn vst2_lane_s8<const LANE: i32>(a: *mut i8, b: int8x8x2_t) {
 #[doc = "Store multiple 2-element structures from two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_s16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
@@ -67731,7 +62210,7 @@ pub unsafe fn vst2_lane_s16<const LANE: i32>(a: *mut i16, b: int16x4x2_t) {
 #[doc = "Store multiple 2-element structures from two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_s16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
@@ -67752,7 +62231,7 @@ pub unsafe fn vst2q_lane_s16<const LANE: i32>(a: *mut i16, b: int16x8x2_t) {
 #[doc = "Store multiple 2-element structures from two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_s32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
@@ -67773,7 +62252,7 @@ pub unsafe fn vst2_lane_s32<const LANE: i32>(a: *mut i32, b: int32x2x2_t) {
 #[doc = "Store multiple 2-element structures from two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_s32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
@@ -67794,7 +62273,7 @@ pub unsafe fn vst2q_lane_s32<const LANE: i32>(a: *mut i32, b: int32x4x2_t) {
 #[doc = "Store multiple 2-element structures from two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_f32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
@@ -67812,7 +62291,7 @@ pub unsafe fn vst2_lane_f32<const LANE: i32>(a: *mut f32, b: float32x2x2_t) {
 #[doc = "Store multiple 2-element structures from two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_f32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
@@ -67830,7 +62309,7 @@ pub unsafe fn vst2q_lane_f32<const LANE: i32>(a: *mut f32, b: float32x4x2_t) {
 #[doc = "Store multiple 2-element structures from two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_s8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
@@ -67848,7 +62327,7 @@ pub unsafe fn vst2_lane_s8<const LANE: i32>(a: *mut i8, b: int8x8x2_t) {
 #[doc = "Store multiple 2-element structures from two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_s16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
@@ -67866,7 +62345,7 @@ pub unsafe fn vst2_lane_s16<const LANE: i32>(a: *mut i16, b: int16x4x2_t) {
 #[doc = "Store multiple 2-element structures from two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_s16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
@@ -67884,7 +62363,7 @@ pub unsafe fn vst2q_lane_s16<const LANE: i32>(a: *mut i16, b: int16x8x2_t) {
 #[doc = "Store multiple 2-element structures from two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_s32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
@@ -67902,7 +62381,7 @@ pub unsafe fn vst2_lane_s32<const LANE: i32>(a: *mut i32, b: int32x2x2_t) {
 #[doc = "Store multiple 2-element structures from two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_s32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
@@ -67920,7 +62399,7 @@ pub unsafe fn vst2q_lane_s32<const LANE: i32>(a: *mut i32, b: int32x4x2_t) {
 #[doc = "Store multiple 2-element structures from two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_u8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -67945,7 +62424,7 @@ pub unsafe fn vst2_lane_u8<const LANE: i32>(a: *mut u8, b: uint8x8x2_t) {
 #[doc = "Store multiple 2-element structures from two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_u16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -67970,7 +62449,7 @@ pub unsafe fn vst2_lane_u16<const LANE: i32>(a: *mut u16, b: uint16x4x2_t) {
 #[doc = "Store multiple 2-element structures from two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_u16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -67995,7 +62474,7 @@ pub unsafe fn vst2q_lane_u16<const LANE: i32>(a: *mut u16, b: uint16x8x2_t) {
 #[doc = "Store multiple 2-element structures from two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_u32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -68020,7 +62499,7 @@ pub unsafe fn vst2_lane_u32<const LANE: i32>(a: *mut u32, b: uint32x2x2_t) {
 #[doc = "Store multiple 2-element structures from two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_u32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -68045,7 +62524,7 @@ pub unsafe fn vst2q_lane_u32<const LANE: i32>(a: *mut u32, b: uint32x4x2_t) {
 #[doc = "Store multiple 2-element structures from two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_p8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -68070,7 +62549,7 @@ pub unsafe fn vst2_lane_p8<const LANE: i32>(a: *mut p8, b: poly8x8x2_t) {
 #[doc = "Store multiple 2-element structures from two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_p16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -68095,7 +62574,7 @@ pub unsafe fn vst2_lane_p16<const LANE: i32>(a: *mut p16, b: poly16x4x2_t) {
 #[doc = "Store multiple 2-element structures from two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_p16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -68120,7 +62599,7 @@ pub unsafe fn vst2q_lane_p16<const LANE: i32>(a: *mut p16, b: poly16x8x2_t) {
 #[doc = "Store multiple 2-element structures from two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_p64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[target_feature(enable = "neon,aes")]
@@ -68143,42 +62622,31 @@ pub unsafe fn vst2_p64(a: *mut p64, b: poly64x1x2_t) {
 #[doc = "Store multiple 2-element structures from two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_s64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vst2_s64(a: *mut i64, b: int64x1x2_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.v1i64.p0")]
-        fn _vst2_s64(ptr: *mut i8, a: int64x1_t, b: int64x1_t, size: i32);
-    }
-    _vst2_s64(a as _, b.0, b.1, 8)
+    core::ptr::write_unaligned(a.cast(), b)
 }
 #[doc = "Store multiple 2-element structures from two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_s64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vst2_s64(a: *mut i64, b: int64x1x2_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st2.v1i64.p0"
-        )]
-        fn _vst2_s64(a: int64x1_t, b: int64x1_t, ptr: *mut i8);
-    }
-    _vst2_s64(b.0, b.1, a as _)
+    core::ptr::write_unaligned(a.cast(), b)
 }
 #[doc = "Store multiple 2-element structures from two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_u64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -68201,7 +62669,7 @@ pub unsafe fn vst2_u64(a: *mut u64, b: uint64x1x2_t) {
 #[doc = "Store multiple 2-element structures from two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_u8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -68224,7 +62692,7 @@ pub unsafe fn vst2_u8(a: *mut u8, b: uint8x8x2_t) {
 #[doc = "Store multiple 2-element structures from two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_u8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -68247,7 +62715,7 @@ pub unsafe fn vst2q_u8(a: *mut u8, b: uint8x16x2_t) {
 #[doc = "Store multiple 2-element structures from two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_u16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -68270,7 +62738,7 @@ pub unsafe fn vst2_u16(a: *mut u16, b: uint16x4x2_t) {
 #[doc = "Store multiple 2-element structures from two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_u16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -68293,7 +62761,7 @@ pub unsafe fn vst2q_u16(a: *mut u16, b: uint16x8x2_t) {
 #[doc = "Store multiple 2-element structures from two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_u32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -68316,7 +62784,7 @@ pub unsafe fn vst2_u32(a: *mut u32, b: uint32x2x2_t) {
 #[doc = "Store multiple 2-element structures from two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_u32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -68339,7 +62807,7 @@ pub unsafe fn vst2q_u32(a: *mut u32, b: uint32x4x2_t) {
 #[doc = "Store multiple 2-element structures from two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_p8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -68362,7 +62830,7 @@ pub unsafe fn vst2_p8(a: *mut p8, b: poly8x8x2_t) {
 #[doc = "Store multiple 2-element structures from two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_p8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -68385,7 +62853,7 @@ pub unsafe fn vst2q_p8(a: *mut p8, b: poly8x16x2_t) {
 #[doc = "Store multiple 2-element structures from two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_p16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -68408,7 +62876,7 @@ pub unsafe fn vst2_p16(a: *mut p16, b: poly16x4x2_t) {
 #[doc = "Store multiple 2-element structures from two registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_p16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -68431,11 +62899,12 @@ pub unsafe fn vst2q_p16(a: *mut p16, b: poly16x8x2_t) {
 #[doc = "Store multiple 3-element structures from three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_f16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
+#[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[target_feature(enable = "neon,fp16")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(vst3))]
@@ -68449,11 +62918,12 @@ pub unsafe fn vst3_f16(a: *mut f16, b: float16x4x3_t) {
 #[doc = "Store multiple 3-element structures from three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_f16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
+#[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[target_feature(enable = "neon,fp16")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(vst3))]
@@ -68467,10 +62937,11 @@ pub unsafe fn vst3q_f16(a: *mut f16, b: float16x8x3_t) {
 #[doc = "Store multiple 3-element structures from three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_f16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
+#[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
-#[target_feature(enable = "neon,fp16")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(st3))]
@@ -68487,10 +62958,11 @@ pub unsafe fn vst3_f16(a: *mut f16, b: float16x4x3_t) {
 #[doc = "Store multiple 3-element structures from three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_f16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
+#[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
-#[target_feature(enable = "neon,fp16")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(st3))]
@@ -68507,293 +62979,206 @@ pub unsafe fn vst3q_f16(a: *mut f16, b: float16x8x3_t) {
 #[doc = "Store multiple 3-element structures from three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_f32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vst3))]
 pub unsafe fn vst3_f32(a: *mut f32, b: float32x2x3_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0.v2f32")]
-        fn _vst3_f32(ptr: *mut i8, a: float32x2_t, b: float32x2_t, c: float32x2_t, size: i32);
-    }
-    _vst3_f32(a as _, b.0, b.1, b.2, 4)
+    crate::core_arch::macros::interleaving_store!(f32, 2, 3, a, b)
 }
 #[doc = "Store multiple 3-element structures from three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_f32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vst3))]
 pub unsafe fn vst3q_f32(a: *mut f32, b: float32x4x3_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0.v4f32")]
-        fn _vst3q_f32(ptr: *mut i8, a: float32x4_t, b: float32x4_t, c: float32x4_t, size: i32);
-    }
-    _vst3q_f32(a as _, b.0, b.1, b.2, 4)
+    crate::core_arch::macros::interleaving_store!(f32, 4, 3, a, b)
 }
 #[doc = "Store multiple 3-element structures from three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vst3))]
 pub unsafe fn vst3_s8(a: *mut i8, b: int8x8x3_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0.v8i8")]
-        fn _vst3_s8(ptr: *mut i8, a: int8x8_t, b: int8x8_t, c: int8x8_t, size: i32);
-    }
-    _vst3_s8(a as _, b.0, b.1, b.2, 1)
+    crate::core_arch::macros::interleaving_store!(i8, 8, 3, a, b)
 }
 #[doc = "Store multiple 3-element structures from three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_s8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vst3))]
 pub unsafe fn vst3q_s8(a: *mut i8, b: int8x16x3_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0.v16i8")]
-        fn _vst3q_s8(ptr: *mut i8, a: int8x16_t, b: int8x16_t, c: int8x16_t, size: i32);
-    }
-    _vst3q_s8(a as _, b.0, b.1, b.2, 1)
+    crate::core_arch::macros::interleaving_store!(i8, 16, 3, a, b)
 }
 #[doc = "Store multiple 3-element structures from three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vst3))]
 pub unsafe fn vst3_s16(a: *mut i16, b: int16x4x3_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0.v4i16")]
-        fn _vst3_s16(ptr: *mut i8, a: int16x4_t, b: int16x4_t, c: int16x4_t, size: i32);
-    }
-    _vst3_s16(a as _, b.0, b.1, b.2, 2)
+    crate::core_arch::macros::interleaving_store!(i16, 4, 3, a, b)
 }
 #[doc = "Store multiple 3-element structures from three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_s16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vst3))]
 pub unsafe fn vst3q_s16(a: *mut i16, b: int16x8x3_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0.v8i16")]
-        fn _vst3q_s16(ptr: *mut i8, a: int16x8_t, b: int16x8_t, c: int16x8_t, size: i32);
-    }
-    _vst3q_s16(a as _, b.0, b.1, b.2, 2)
+    crate::core_arch::macros::interleaving_store!(i16, 8, 3, a, b)
 }
 #[doc = "Store multiple 3-element structures from three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vst3))]
 pub unsafe fn vst3_s32(a: *mut i32, b: int32x2x3_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0.v2i32")]
-        fn _vst3_s32(ptr: *mut i8, a: int32x2_t, b: int32x2_t, c: int32x2_t, size: i32);
-    }
-    _vst3_s32(a as _, b.0, b.1, b.2, 4)
+    crate::core_arch::macros::interleaving_store!(i32, 2, 3, a, b)
 }
 #[doc = "Store multiple 3-element structures from three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_s32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(vst3))]
 pub unsafe fn vst3q_s32(a: *mut i32, b: int32x4x3_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0.v4i32")]
-        fn _vst3q_s32(ptr: *mut i8, a: int32x4_t, b: int32x4_t, c: int32x4_t, size: i32);
-    }
-    _vst3q_s32(a as _, b.0, b.1, b.2, 4)
+    crate::core_arch::macros::interleaving_store!(i32, 4, 3, a, b)
 }
 #[doc = "Store multiple 3-element structures from three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_f32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st3))]
 pub unsafe fn vst3_f32(a: *mut f32, b: float32x2x3_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st3.v2f32.p0"
-        )]
-        fn _vst3_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t, ptr: *mut i8);
-    }
-    _vst3_f32(b.0, b.1, b.2, a as _)
+    crate::core_arch::macros::interleaving_store!(f32, 2, 3, a, b)
 }
 #[doc = "Store multiple 3-element structures from three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_f32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st3))]
 pub unsafe fn vst3q_f32(a: *mut f32, b: float32x4x3_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st3.v4f32.p0"
-        )]
-        fn _vst3q_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t, ptr: *mut i8);
-    }
-    _vst3q_f32(b.0, b.1, b.2, a as _)
+    crate::core_arch::macros::interleaving_store!(f32, 4, 3, a, b)
 }
 #[doc = "Store multiple 3-element structures from three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st3))]
 pub unsafe fn vst3_s8(a: *mut i8, b: int8x8x3_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st3.v8i8.p0"
-        )]
-        fn _vst3_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t, ptr: *mut i8);
-    }
-    _vst3_s8(b.0, b.1, b.2, a as _)
+    crate::core_arch::macros::interleaving_store!(i8, 8, 3, a, b)
 }
 #[doc = "Store multiple 3-element structures from three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_s8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st3))]
 pub unsafe fn vst3q_s8(a: *mut i8, b: int8x16x3_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st3.v16i8.p0"
-        )]
-        fn _vst3q_s8(a: int8x16_t, b: int8x16_t, c: int8x16_t, ptr: *mut i8);
-    }
-    _vst3q_s8(b.0, b.1, b.2, a as _)
+    crate::core_arch::macros::interleaving_store!(i8, 16, 3, a, b)
 }
 #[doc = "Store multiple 3-element structures from three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st3))]
 pub unsafe fn vst3_s16(a: *mut i16, b: int16x4x3_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st3.v4i16.p0"
-        )]
-        fn _vst3_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t, ptr: *mut i8);
-    }
-    _vst3_s16(b.0, b.1, b.2, a as _)
+    crate::core_arch::macros::interleaving_store!(i16, 4, 3, a, b)
 }
 #[doc = "Store multiple 3-element structures from three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_s16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st3))]
 pub unsafe fn vst3q_s16(a: *mut i16, b: int16x8x3_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st3.v8i16.p0"
-        )]
-        fn _vst3q_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t, ptr: *mut i8);
-    }
-    _vst3q_s16(b.0, b.1, b.2, a as _)
+    crate::core_arch::macros::interleaving_store!(i16, 8, 3, a, b)
 }
 #[doc = "Store multiple 3-element structures from three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st3))]
 pub unsafe fn vst3_s32(a: *mut i32, b: int32x2x3_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st3.v2i32.p0"
-        )]
-        fn _vst3_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t, ptr: *mut i8);
-    }
-    _vst3_s32(b.0, b.1, b.2, a as _)
+    crate::core_arch::macros::interleaving_store!(i32, 2, 3, a, b)
 }
 #[doc = "Store multiple 3-element structures from three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_s32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st3))]
 pub unsafe fn vst3q_s32(a: *mut i32, b: int32x4x3_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st3.v4i32.p0"
-        )]
-        fn _vst3q_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t, ptr: *mut i8);
-    }
-    _vst3q_s32(b.0, b.1, b.2, a as _)
+    crate::core_arch::macros::interleaving_store!(i32, 4, 3, a, b)
 }
 #[doc = "Store multiple 3-element structures from three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_f16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
+#[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(test, assert_instr(vst3, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
-#[target_feature(enable = "neon,fp16")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vst3_lane_f16<const LANE: i32>(a: *mut f16, b: float16x4x3_t) {
@@ -68814,13 +63199,14 @@ pub unsafe fn vst3_lane_f16<const LANE: i32>(a: *mut f16, b: float16x4x3_t) {
 #[doc = "Store multiple 3-element structures from three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_f16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
+#[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(test, assert_instr(vst3, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
-#[target_feature(enable = "neon,fp16")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vst3q_lane_f16<const LANE: i32>(a: *mut f16, b: float16x8x3_t) {
@@ -68841,12 +63227,13 @@ pub unsafe fn vst3q_lane_f16<const LANE: i32>(a: *mut f16, b: float16x8x3_t) {
 #[doc = "Store multiple 3-element structures from three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_f16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
+#[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(test, assert_instr(st3, LANE = 0))]
-#[target_feature(enable = "neon,fp16")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vst3_lane_f16<const LANE: i32>(a: *mut f16, b: float16x4x3_t) {
@@ -68863,12 +63250,13 @@ pub unsafe fn vst3_lane_f16<const LANE: i32>(a: *mut f16, b: float16x4x3_t) {
 #[doc = "Store multiple 3-element structures from three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_f16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
+#[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(test, assert_instr(st3, LANE = 0))]
-#[target_feature(enable = "neon,fp16")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vst3q_lane_f16<const LANE: i32>(a: *mut f16, b: float16x8x3_t) {
@@ -68885,7 +63273,7 @@ pub unsafe fn vst3q_lane_f16<const LANE: i32>(a: *mut f16, b: float16x8x3_t) {
 #[doc = "Store multiple 3-element structures from three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_f32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
@@ -68910,7 +63298,7 @@ pub unsafe fn vst3_lane_f32<const LANE: i32>(a: *mut f32, b: float32x2x3_t) {
 #[doc = "Store multiple 3-element structures from three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_f32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
@@ -68935,7 +63323,7 @@ pub unsafe fn vst3q_lane_f32<const LANE: i32>(a: *mut f32, b: float32x4x3_t) {
 #[doc = "Store multiple 3-element structures from three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_s8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
@@ -68953,7 +63341,7 @@ pub unsafe fn vst3_lane_s8<const LANE: i32>(a: *mut i8, b: int8x8x3_t) {
 #[doc = "Store multiple 3-element structures from three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_s16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
@@ -68978,7 +63366,7 @@ pub unsafe fn vst3_lane_s16<const LANE: i32>(a: *mut i16, b: int16x4x3_t) {
 #[doc = "Store multiple 3-element structures from three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_s16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
@@ -69003,7 +63391,7 @@ pub unsafe fn vst3q_lane_s16<const LANE: i32>(a: *mut i16, b: int16x8x3_t) {
 #[doc = "Store multiple 3-element structures from three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_s32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
@@ -69028,7 +63416,7 @@ pub unsafe fn vst3_lane_s32<const LANE: i32>(a: *mut i32, b: int32x2x3_t) {
 #[doc = "Store multiple 3-element structures from three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_s32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
@@ -69053,7 +63441,7 @@ pub unsafe fn vst3q_lane_s32<const LANE: i32>(a: *mut i32, b: int32x4x3_t) {
 #[doc = "Store multiple 3-element structures from three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_f32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
@@ -69074,7 +63462,7 @@ pub unsafe fn vst3_lane_f32<const LANE: i32>(a: *mut f32, b: float32x2x3_t) {
 #[doc = "Store multiple 3-element structures from three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_f32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
@@ -69095,7 +63483,7 @@ pub unsafe fn vst3q_lane_f32<const LANE: i32>(a: *mut f32, b: float32x4x3_t) {
 #[doc = "Store multiple 3-element structures from three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_s8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
@@ -69116,7 +63504,7 @@ pub unsafe fn vst3_lane_s8<const LANE: i32>(a: *mut i8, b: int8x8x3_t) {
 #[doc = "Store multiple 3-element structures from three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_s16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
@@ -69137,7 +63525,7 @@ pub unsafe fn vst3_lane_s16<const LANE: i32>(a: *mut i16, b: int16x4x3_t) {
 #[doc = "Store multiple 3-element structures from three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_s16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
@@ -69158,7 +63546,7 @@ pub unsafe fn vst3q_lane_s16<const LANE: i32>(a: *mut i16, b: int16x8x3_t) {
 #[doc = "Store multiple 3-element structures from three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_s32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
@@ -69179,7 +63567,7 @@ pub unsafe fn vst3_lane_s32<const LANE: i32>(a: *mut i32, b: int32x2x3_t) {
 #[doc = "Store multiple 3-element structures from three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_s32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
@@ -69200,7 +63588,7 @@ pub unsafe fn vst3q_lane_s32<const LANE: i32>(a: *mut i32, b: int32x4x3_t) {
 #[doc = "Store multiple 3-element structures from three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_u8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -69225,7 +63613,7 @@ pub unsafe fn vst3_lane_u8<const LANE: i32>(a: *mut u8, b: uint8x8x3_t) {
 #[doc = "Store multiple 3-element structures from three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_u16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -69250,7 +63638,7 @@ pub unsafe fn vst3_lane_u16<const LANE: i32>(a: *mut u16, b: uint16x4x3_t) {
 #[doc = "Store multiple 3-element structures from three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_u16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -69275,7 +63663,7 @@ pub unsafe fn vst3q_lane_u16<const LANE: i32>(a: *mut u16, b: uint16x8x3_t) {
 #[doc = "Store multiple 3-element structures from three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_u32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -69300,7 +63688,7 @@ pub unsafe fn vst3_lane_u32<const LANE: i32>(a: *mut u32, b: uint32x2x3_t) {
 #[doc = "Store multiple 3-element structures from three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_u32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -69325,7 +63713,7 @@ pub unsafe fn vst3q_lane_u32<const LANE: i32>(a: *mut u32, b: uint32x4x3_t) {
 #[doc = "Store multiple 3-element structures from three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_p8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -69350,7 +63738,7 @@ pub unsafe fn vst3_lane_p8<const LANE: i32>(a: *mut p8, b: poly8x8x3_t) {
 #[doc = "Store multiple 3-element structures from three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_p16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -69375,7 +63763,7 @@ pub unsafe fn vst3_lane_p16<const LANE: i32>(a: *mut p16, b: poly16x4x3_t) {
 #[doc = "Store multiple 3-element structures from three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_p16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -69400,7 +63788,7 @@ pub unsafe fn vst3q_lane_p16<const LANE: i32>(a: *mut p16, b: poly16x8x3_t) {
 #[doc = "Store multiple 3-element structures from three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_p64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[target_feature(enable = "neon,aes")]
@@ -69423,42 +63811,31 @@ pub unsafe fn vst3_p64(a: *mut p64, b: poly64x1x3_t) {
 #[doc = "Store multiple 3-element structures from three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vst3_s64(a: *mut i64, b: int64x1x3_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st3.v1i64.p0"
-        )]
-        fn _vst3_s64(a: int64x1_t, b: int64x1_t, c: int64x1_t, ptr: *mut i8);
-    }
-    _vst3_s64(b.0, b.1, b.2, a as _)
+    core::ptr::write_unaligned(a.cast(), b)
 }
 #[doc = "Store multiple 3-element structures from three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vst3_s64(a: *mut i64, b: int64x1x3_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0.v1i64")]
-        fn _vst3_s64(ptr: *mut i8, a: int64x1_t, b: int64x1_t, c: int64x1_t, size: i32);
-    }
-    _vst3_s64(a as _, b.0, b.1, b.2, 8)
+    core::ptr::write_unaligned(a.cast(), b)
 }
 #[doc = "Store multiple 3-element structures from three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_u64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -69481,7 +63858,7 @@ pub unsafe fn vst3_u64(a: *mut u64, b: uint64x1x3_t) {
 #[doc = "Store multiple 3-element structures from three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_u8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -69504,7 +63881,7 @@ pub unsafe fn vst3_u8(a: *mut u8, b: uint8x8x3_t) {
 #[doc = "Store multiple 3-element structures from three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_u8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -69527,7 +63904,7 @@ pub unsafe fn vst3q_u8(a: *mut u8, b: uint8x16x3_t) {
 #[doc = "Store multiple 3-element structures from three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_u16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -69550,7 +63927,7 @@ pub unsafe fn vst3_u16(a: *mut u16, b: uint16x4x3_t) {
 #[doc = "Store multiple 3-element structures from three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_u16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -69573,7 +63950,7 @@ pub unsafe fn vst3q_u16(a: *mut u16, b: uint16x8x3_t) {
 #[doc = "Store multiple 3-element structures from three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_u32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -69596,7 +63973,7 @@ pub unsafe fn vst3_u32(a: *mut u32, b: uint32x2x3_t) {
 #[doc = "Store multiple 3-element structures from three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_u32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -69619,7 +63996,7 @@ pub unsafe fn vst3q_u32(a: *mut u32, b: uint32x4x3_t) {
 #[doc = "Store multiple 3-element structures from three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_p8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -69642,7 +64019,7 @@ pub unsafe fn vst3_p8(a: *mut p8, b: poly8x8x3_t) {
 #[doc = "Store multiple 3-element structures from three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_p8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -69665,7 +64042,7 @@ pub unsafe fn vst3q_p8(a: *mut p8, b: poly8x16x3_t) {
 #[doc = "Store multiple 3-element structures from three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_p16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -69688,7 +64065,7 @@ pub unsafe fn vst3_p16(a: *mut p16, b: poly16x4x3_t) {
 #[doc = "Store multiple 3-element structures from three registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_p16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -69711,11 +64088,12 @@ pub unsafe fn vst3q_p16(a: *mut p16, b: poly16x8x3_t) {
 #[doc = "Store multiple 4-element structures from four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_f16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
+#[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[target_feature(enable = "neon,fp16")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(vst4))]
@@ -69736,11 +64114,12 @@ pub unsafe fn vst4_f16(a: *mut f16, b: float16x4x4_t) {
 #[doc = "Store multiple 4-element structures from four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_f16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
+#[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[target_feature(enable = "neon,fp16")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(vst4))]
@@ -69761,10 +64140,11 @@ pub unsafe fn vst4q_f16(a: *mut f16, b: float16x8x4_t) {
 #[doc = "Store multiple 4-element structures from four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_f16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
+#[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
-#[target_feature(enable = "neon,fp16")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(st4))]
@@ -69781,10 +64161,11 @@ pub unsafe fn vst4_f16(a: *mut f16, b: float16x4x4_t) {
 #[doc = "Store multiple 4-element structures from four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_f16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
+#[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
-#[target_feature(enable = "neon,fp16")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
 #[cfg_attr(test, assert_instr(st4))]
@@ -69801,7 +64182,7 @@ pub unsafe fn vst4q_f16(a: *mut f16, b: float16x8x4_t) {
 #[doc = "Store multiple 4-element structures from four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_f32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
@@ -69824,7 +64205,7 @@ pub unsafe fn vst4_f32(a: *mut f32, b: float32x2x4_t) {
 #[doc = "Store multiple 4-element structures from four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_f32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
@@ -69847,7 +64228,7 @@ pub unsafe fn vst4q_f32(a: *mut f32, b: float32x4x4_t) {
 #[doc = "Store multiple 4-element structures from four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_s8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
@@ -69863,7 +64244,7 @@ pub unsafe fn vst4_s8(a: *mut i8, b: int8x8x4_t) {
 #[doc = "Store multiple 4-element structures from four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_s8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
@@ -69886,7 +64267,7 @@ pub unsafe fn vst4q_s8(a: *mut i8, b: int8x16x4_t) {
 #[doc = "Store multiple 4-element structures from four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_s16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
@@ -69909,7 +64290,7 @@ pub unsafe fn vst4_s16(a: *mut i16, b: int16x4x4_t) {
 #[doc = "Store multiple 4-element structures from four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_s16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
@@ -69932,7 +64313,7 @@ pub unsafe fn vst4q_s16(a: *mut i16, b: int16x8x4_t) {
 #[doc = "Store multiple 4-element structures from four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_s32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
@@ -69955,7 +64336,7 @@ pub unsafe fn vst4_s32(a: *mut i32, b: int32x2x4_t) {
 #[doc = "Store multiple 4-element structures from four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_s32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
@@ -69978,165 +64359,110 @@ pub unsafe fn vst4q_s32(a: *mut i32, b: int32x4x4_t) {
 #[doc = "Store multiple 4-element structures from four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_f32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st4))]
 pub unsafe fn vst4_f32(a: *mut f32, b: float32x2x4_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st4.v2f32.p0"
-        )]
-        fn _vst4_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t, d: float32x2_t, ptr: *mut i8);
-    }
-    _vst4_f32(b.0, b.1, b.2, b.3, a as _)
+    crate::core_arch::macros::interleaving_store!(f32, 2, 4, a, b)
 }
 #[doc = "Store multiple 4-element structures from four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_f32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st4))]
 pub unsafe fn vst4q_f32(a: *mut f32, b: float32x4x4_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st4.v4f32.p0"
-        )]
-        fn _vst4q_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t, d: float32x4_t, ptr: *mut i8);
-    }
-    _vst4q_f32(b.0, b.1, b.2, b.3, a as _)
+    crate::core_arch::macros::interleaving_store!(f32, 4, 4, a, b)
 }
 #[doc = "Store multiple 4-element structures from four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_s8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st4))]
 pub unsafe fn vst4_s8(a: *mut i8, b: int8x8x4_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st4.v8i8.p0"
-        )]
-        fn _vst4_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t, ptr: *mut i8);
-    }
-    _vst4_s8(b.0, b.1, b.2, b.3, a as _)
+    crate::core_arch::macros::interleaving_store!(i8, 8, 4, a, b)
 }
 #[doc = "Store multiple 4-element structures from four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_s8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st4))]
 pub unsafe fn vst4q_s8(a: *mut i8, b: int8x16x4_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st4.v16i8.p0"
-        )]
-        fn _vst4q_s8(a: int8x16_t, b: int8x16_t, c: int8x16_t, d: int8x16_t, ptr: *mut i8);
-    }
-    _vst4q_s8(b.0, b.1, b.2, b.3, a as _)
+    crate::core_arch::macros::interleaving_store!(i8, 16, 4, a, b)
 }
 #[doc = "Store multiple 4-element structures from four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_s16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st4))]
 pub unsafe fn vst4_s16(a: *mut i16, b: int16x4x4_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st4.v4i16.p0"
-        )]
-        fn _vst4_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t, d: int16x4_t, ptr: *mut i8);
-    }
-    _vst4_s16(b.0, b.1, b.2, b.3, a as _)
+    crate::core_arch::macros::interleaving_store!(i16, 4, 4, a, b)
 }
 #[doc = "Store multiple 4-element structures from four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_s16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st4))]
 pub unsafe fn vst4q_s16(a: *mut i16, b: int16x8x4_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st4.v8i16.p0"
-        )]
-        fn _vst4q_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t, d: int16x8_t, ptr: *mut i8);
-    }
-    _vst4q_s16(b.0, b.1, b.2, b.3, a as _)
+    crate::core_arch::macros::interleaving_store!(i16, 8, 4, a, b)
 }
 #[doc = "Store multiple 4-element structures from four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_s32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st4))]
 pub unsafe fn vst4_s32(a: *mut i32, b: int32x2x4_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st4.v2i32.p0"
-        )]
-        fn _vst4_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t, d: int32x2_t, ptr: *mut i8);
-    }
-    _vst4_s32(b.0, b.1, b.2, b.3, a as _)
+    crate::core_arch::macros::interleaving_store!(i32, 2, 4, a, b)
 }
 #[doc = "Store multiple 4-element structures from four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_s32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(st4))]
 pub unsafe fn vst4q_s32(a: *mut i32, b: int32x4x4_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st4.v4i32.p0"
-        )]
-        fn _vst4q_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t, d: int32x4_t, ptr: *mut i8);
-    }
-    _vst4q_s32(b.0, b.1, b.2, b.3, a as _)
+    crate::core_arch::macros::interleaving_store!(i32, 4, 4, a, b)
 }
 #[doc = "Store multiple 4-element structures from four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_f16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
+#[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(test, assert_instr(vst4, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
-#[target_feature(enable = "neon,fp16")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vst4_lane_f16<const LANE: i32>(a: *mut f16, b: float16x4x4_t) {
@@ -70158,13 +64484,14 @@ pub unsafe fn vst4_lane_f16<const LANE: i32>(a: *mut f16, b: float16x4x4_t) {
 #[doc = "Store multiple 4-element structures from four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_f16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
+#[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(test, assert_instr(vst4, LANE = 0))]
 #[rustc_legacy_const_generics(2)]
-#[target_feature(enable = "neon,fp16")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vst4q_lane_f16<const LANE: i32>(a: *mut f16, b: float16x8x4_t) {
@@ -70186,12 +64513,13 @@ pub unsafe fn vst4q_lane_f16<const LANE: i32>(a: *mut f16, b: float16x8x4_t) {
 #[doc = "Store multiple 4-element structures from four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_f16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
+#[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(test, assert_instr(st4, LANE = 0))]
-#[target_feature(enable = "neon,fp16")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vst4_lane_f16<const LANE: i32>(a: *mut f16, b: float16x4x4_t) {
@@ -70215,12 +64543,13 @@ pub unsafe fn vst4_lane_f16<const LANE: i32>(a: *mut f16, b: float16x4x4_t) {
 #[doc = "Store multiple 4-element structures from four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_f16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
+#[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(test, assert_instr(st4, LANE = 0))]
-#[target_feature(enable = "neon,fp16")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "fp16"))]
 #[unstable(feature = "stdarch_neon_f16", issue = "136306")]
 #[cfg(not(target_arch = "arm64ec"))]
 pub unsafe fn vst4q_lane_f16<const LANE: i32>(a: *mut f16, b: float16x8x4_t) {
@@ -70244,7 +64573,7 @@ pub unsafe fn vst4q_lane_f16<const LANE: i32>(a: *mut f16, b: float16x8x4_t) {
 #[doc = "Store multiple 4-element structures from four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_f32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
@@ -70270,7 +64599,7 @@ pub unsafe fn vst4_lane_f32<const LANE: i32>(a: *mut f32, b: float32x2x4_t) {
 #[doc = "Store multiple 4-element structures from four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_f32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
@@ -70296,7 +64625,7 @@ pub unsafe fn vst4q_lane_f32<const LANE: i32>(a: *mut f32, b: float32x4x4_t) {
 #[doc = "Store multiple 4-element structures from four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_s8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
@@ -70322,7 +64651,7 @@ pub unsafe fn vst4_lane_s8<const LANE: i32>(a: *mut i8, b: int8x8x4_t) {
 #[doc = "Store multiple 4-element structures from four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_s16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
@@ -70348,7 +64677,7 @@ pub unsafe fn vst4_lane_s16<const LANE: i32>(a: *mut i16, b: int16x4x4_t) {
 #[doc = "Store multiple 4-element structures from four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_s16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
@@ -70374,7 +64703,7 @@ pub unsafe fn vst4q_lane_s16<const LANE: i32>(a: *mut i16, b: int16x8x4_t) {
 #[doc = "Store multiple 4-element structures from four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_s32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
@@ -70400,7 +64729,7 @@ pub unsafe fn vst4_lane_s32<const LANE: i32>(a: *mut i32, b: int32x2x4_t) {
 #[doc = "Store multiple 4-element structures from four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_s32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
@@ -70426,7 +64755,7 @@ pub unsafe fn vst4q_lane_s32<const LANE: i32>(a: *mut i32, b: int32x4x4_t) {
 #[doc = "Store multiple 4-element structures from four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_f32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
@@ -70454,7 +64783,7 @@ pub unsafe fn vst4_lane_f32<const LANE: i32>(a: *mut f32, b: float32x2x4_t) {
 #[doc = "Store multiple 4-element structures from four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_f32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
@@ -70482,7 +64811,7 @@ pub unsafe fn vst4q_lane_f32<const LANE: i32>(a: *mut f32, b: float32x4x4_t) {
 #[doc = "Store multiple 4-element structures from four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_s8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
@@ -70503,7 +64832,7 @@ pub unsafe fn vst4_lane_s8<const LANE: i32>(a: *mut i8, b: int8x8x4_t) {
 #[doc = "Store multiple 4-element structures from four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_s16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
@@ -70531,7 +64860,7 @@ pub unsafe fn vst4_lane_s16<const LANE: i32>(a: *mut i16, b: int16x4x4_t) {
 #[doc = "Store multiple 4-element structures from four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_s16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
@@ -70559,7 +64888,7 @@ pub unsafe fn vst4q_lane_s16<const LANE: i32>(a: *mut i16, b: int16x8x4_t) {
 #[doc = "Store multiple 4-element structures from four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_s32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
@@ -70587,7 +64916,7 @@ pub unsafe fn vst4_lane_s32<const LANE: i32>(a: *mut i32, b: int32x2x4_t) {
 #[doc = "Store multiple 4-element structures from four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_s32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
@@ -70615,7 +64944,7 @@ pub unsafe fn vst4q_lane_s32<const LANE: i32>(a: *mut i32, b: int32x4x4_t) {
 #[doc = "Store multiple 4-element structures from four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_u8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -70640,7 +64969,7 @@ pub unsafe fn vst4_lane_u8<const LANE: i32>(a: *mut u8, b: uint8x8x4_t) {
 #[doc = "Store multiple 4-element structures from four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_u16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -70665,7 +64994,7 @@ pub unsafe fn vst4_lane_u16<const LANE: i32>(a: *mut u16, b: uint16x4x4_t) {
 #[doc = "Store multiple 4-element structures from four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_u16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -70690,7 +65019,7 @@ pub unsafe fn vst4q_lane_u16<const LANE: i32>(a: *mut u16, b: uint16x8x4_t) {
 #[doc = "Store multiple 4-element structures from four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_u32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -70715,7 +65044,7 @@ pub unsafe fn vst4_lane_u32<const LANE: i32>(a: *mut u32, b: uint32x2x4_t) {
 #[doc = "Store multiple 4-element structures from four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_u32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -70740,7 +65069,7 @@ pub unsafe fn vst4q_lane_u32<const LANE: i32>(a: *mut u32, b: uint32x4x4_t) {
 #[doc = "Store multiple 4-element structures from four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_p8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -70765,7 +65094,7 @@ pub unsafe fn vst4_lane_p8<const LANE: i32>(a: *mut p8, b: poly8x8x4_t) {
 #[doc = "Store multiple 4-element structures from four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_p16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -70790,7 +65119,7 @@ pub unsafe fn vst4_lane_p16<const LANE: i32>(a: *mut p16, b: poly16x4x4_t) {
 #[doc = "Store multiple 4-element structures from four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_p16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -70815,7 +65144,7 @@ pub unsafe fn vst4q_lane_p16<const LANE: i32>(a: *mut p16, b: poly16x8x4_t) {
 #[doc = "Store multiple 4-element structures from four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_p64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[target_feature(enable = "neon,aes")]
@@ -70838,49 +65167,31 @@ pub unsafe fn vst4_p64(a: *mut p64, b: poly64x1x4_t) {
 #[doc = "Store multiple 4-element structures from four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_s64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[cfg(target_arch = "arm")]
 #[target_feature(enable = "neon,v7")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
 #[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vst4_s64(a: *mut i64, b: int64x1x4_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0.v1i64")]
-        fn _vst4_s64(
-            ptr: *mut i8,
-            a: int64x1_t,
-            b: int64x1_t,
-            c: int64x1_t,
-            d: int64x1_t,
-            size: i32,
-        );
-    }
-    _vst4_s64(a as _, b.0, b.1, b.2, b.3, 8)
+    core::ptr::write_unaligned(a.cast(), b)
 }
 #[doc = "Store multiple 4-element structures from four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_s64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg(not(target_arch = "arm"))]
 #[stable(feature = "neon_intrinsics", since = "1.59.0")]
 #[cfg_attr(test, assert_instr(nop))]
 pub unsafe fn vst4_s64(a: *mut i64, b: int64x1x4_t) {
-    unsafe extern "unadjusted" {
-        #[cfg_attr(
-            any(target_arch = "aarch64", target_arch = "arm64ec"),
-            link_name = "llvm.aarch64.neon.st4.v1i64.p0"
-        )]
-        fn _vst4_s64(a: int64x1_t, b: int64x1_t, c: int64x1_t, d: int64x1_t, ptr: *mut i8);
-    }
-    _vst4_s64(b.0, b.1, b.2, b.3, a as _)
+    core::ptr::write_unaligned(a.cast(), b)
 }
 #[doc = "Store multiple 4-element structures from four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_u64)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -70903,7 +65214,7 @@ pub unsafe fn vst4_u64(a: *mut u64, b: uint64x1x4_t) {
 #[doc = "Store multiple 4-element structures from four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_u8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -70926,7 +65237,7 @@ pub unsafe fn vst4_u8(a: *mut u8, b: uint8x8x4_t) {
 #[doc = "Store multiple 4-element structures from four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_u8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -70949,7 +65260,7 @@ pub unsafe fn vst4q_u8(a: *mut u8, b: uint8x16x4_t) {
 #[doc = "Store multiple 4-element structures from four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_u16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -70972,7 +65283,7 @@ pub unsafe fn vst4_u16(a: *mut u16, b: uint16x4x4_t) {
 #[doc = "Store multiple 4-element structures from four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_u16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -70995,7 +65306,7 @@ pub unsafe fn vst4q_u16(a: *mut u16, b: uint16x8x4_t) {
 #[doc = "Store multiple 4-element structures from four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_u32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -71018,7 +65329,7 @@ pub unsafe fn vst4_u32(a: *mut u32, b: uint32x2x4_t) {
 #[doc = "Store multiple 4-element structures from four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_u32)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -71041,7 +65352,7 @@ pub unsafe fn vst4q_u32(a: *mut u32, b: uint32x4x4_t) {
 #[doc = "Store multiple 4-element structures from four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_p8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -71064,7 +65375,7 @@ pub unsafe fn vst4_p8(a: *mut p8, b: poly8x8x4_t) {
 #[doc = "Store multiple 4-element structures from four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_p8)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -71087,7 +65398,7 @@ pub unsafe fn vst4q_p8(a: *mut p8, b: poly8x16x4_t) {
 #[doc = "Store multiple 4-element structures from four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_p16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -71110,7 +65421,7 @@ pub unsafe fn vst4_p16(a: *mut p16, b: poly16x4x4_t) {
 #[doc = "Store multiple 4-element structures from four registers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_p16)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -71133,7 +65444,7 @@ pub unsafe fn vst4q_p16(a: *mut p16, b: poly16x8x4_t) {
 #[doc = "Store SIMD&FP register (immediate offset)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vstrq_p128)"]
 #[doc = "## Safety"]
-#[doc = "  * Neon instrinsic unsafe"]
+#[doc = "  * Neon intrinsic unsafe"]
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -71163,7 +65474,14 @@ pub unsafe fn vstrq_p128(a: *mut p128, b: p128) {
     assert_instr(fsub)
 )]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vsub_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
     unsafe { simd_sub(a, b) }
@@ -71178,7 +65496,14 @@ pub fn vsub_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t {
     assert_instr(fsub)
 )]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vsubq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t {
     unsafe { simd_sub(a, b) }
@@ -71568,7 +65893,11 @@ pub fn vsubq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
     assert_instr(subhn2)
 )]
 #[cfg_attr(
@@ -71580,8 +65909,8 @@ pub fn vsubq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub fn vsubhn_high_s16(a: int8x8_t, b: int16x8_t, c: int16x8_t) -> int8x16_t {
-    let d: int8x8_t = vsubhn_s16(b, c);
-    unsafe { simd_shuffle!(a, d, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) }
+    let d = vsubhn_s16(b, c);
+    vcombine_s8(a, d)
 }
 #[doc = "Subtract returning high narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_high_s32)"]
@@ -71590,7 +65919,11 @@ pub fn vsubhn_high_s16(a: int8x8_t, b: int16x8_t, c: int16x8_t) -> int8x16_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
     assert_instr(subhn2)
 )]
 #[cfg_attr(
@@ -71602,8 +65935,8 @@ pub fn vsubhn_high_s16(a: int8x8_t, b: int16x8_t, c: int16x8_t) -> int8x16_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub fn vsubhn_high_s32(a: int16x4_t, b: int32x4_t, c: int32x4_t) -> int16x8_t {
-    let d: int16x4_t = vsubhn_s32(b, c);
-    unsafe { simd_shuffle!(a, d, [0, 1, 2, 3, 4, 5, 6, 7]) }
+    let d = vsubhn_s32(b, c);
+    vcombine_s16(a, d)
 }
 #[doc = "Subtract returning high narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_high_s64)"]
@@ -71612,7 +65945,11 @@ pub fn vsubhn_high_s32(a: int16x4_t, b: int32x4_t, c: int32x4_t) -> int16x8_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
     assert_instr(subhn2)
 )]
 #[cfg_attr(
@@ -71624,8 +65961,8 @@ pub fn vsubhn_high_s32(a: int16x4_t, b: int32x4_t, c: int32x4_t) -> int16x8_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub fn vsubhn_high_s64(a: int32x2_t, b: int64x2_t, c: int64x2_t) -> int32x4_t {
-    let d: int32x2_t = vsubhn_s64(b, c);
-    unsafe { simd_shuffle!(a, d, [0, 1, 2, 3]) }
+    let d = vsubhn_s64(b, c);
+    vcombine_s32(a, d)
 }
 #[doc = "Subtract returning high narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_high_u16)"]
@@ -71634,7 +65971,11 @@ pub fn vsubhn_high_s64(a: int32x2_t, b: int64x2_t, c: int64x2_t) -> int32x4_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
     assert_instr(subhn2)
 )]
 #[cfg_attr(
@@ -71646,8 +65987,8 @@ pub fn vsubhn_high_s64(a: int32x2_t, b: int64x2_t, c: int64x2_t) -> int32x4_t {
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub fn vsubhn_high_u16(a: uint8x8_t, b: uint16x8_t, c: uint16x8_t) -> uint8x16_t {
-    let d: uint8x8_t = vsubhn_u16(b, c);
-    unsafe { simd_shuffle!(a, d, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) }
+    let d = vsubhn_u16(b, c);
+    vcombine_u8(a, d)
 }
 #[doc = "Subtract returning high narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_high_u32)"]
@@ -71656,7 +65997,11 @@ pub fn vsubhn_high_u16(a: uint8x8_t, b: uint16x8_t, c: uint16x8_t) -> uint8x16_t
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
     assert_instr(subhn2)
 )]
 #[cfg_attr(
@@ -71668,8 +66013,8 @@ pub fn vsubhn_high_u16(a: uint8x8_t, b: uint16x8_t, c: uint16x8_t) -> uint8x16_t
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub fn vsubhn_high_u32(a: uint16x4_t, b: uint32x4_t, c: uint32x4_t) -> uint16x8_t {
-    let d: uint16x4_t = vsubhn_u32(b, c);
-    unsafe { simd_shuffle!(a, d, [0, 1, 2, 3, 4, 5, 6, 7]) }
+    let d = vsubhn_u32(b, c);
+    vcombine_u16(a, d)
 }
 #[doc = "Subtract returning high narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_high_u64)"]
@@ -71678,7 +66023,11 @@ pub fn vsubhn_high_u32(a: uint16x4_t, b: uint32x4_t, c: uint32x4_t) -> uint16x8_
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
     assert_instr(subhn2)
 )]
 #[cfg_attr(
@@ -71690,8 +66039,8 @@ pub fn vsubhn_high_u32(a: uint16x4_t, b: uint32x4_t, c: uint32x4_t) -> uint16x8_
     unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
 )]
 pub fn vsubhn_high_u64(a: uint32x2_t, b: uint64x2_t, c: uint64x2_t) -> uint32x4_t {
-    let d: uint32x2_t = vsubhn_u64(b, c);
-    unsafe { simd_shuffle!(a, d, [0, 1, 2, 3]) }
+    let d = vsubhn_u64(b, c);
+    vcombine_u32(a, d)
 }
 #[doc = "Subtract returning high narrow"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsubhn_s16)"]
@@ -72108,7 +66457,11 @@ pub fn vsubw_u32(a: uint64x2_t, b: uint32x2_t) -> uint64x2_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsudot, LANE = 0))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
     assert_instr(sudot, LANE = 0)
 )]
 #[rustc_legacy_const_generics(3)]
@@ -72122,11 +66475,9 @@ pub fn vsubw_u32(a: uint64x2_t, b: uint32x2_t) -> uint64x2_t {
 )]
 pub fn vsudot_lane_s32<const LANE: i32>(a: int32x2_t, b: int8x8_t, c: uint8x8_t) -> int32x2_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        let c: uint32x2_t = transmute(c);
-        let c: uint32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
-        vusdot_s32(a, transmute(c), b)
-    }
+    let c = vreinterpret_u32_u8(c);
+    let c = vdup_lane_u32::<LANE>(c);
+    vusdot_s32(a, vreinterpret_u8_u32(c), b)
 }
 #[doc = "Dot product index form with signed and unsigned integers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsudotq_lane_s32)"]
@@ -72135,7 +66486,11 @@ pub fn vsudot_lane_s32<const LANE: i32>(a: int32x2_t, b: int8x8_t, c: uint8x8_t)
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsudot, LANE = 0))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
     assert_instr(sudot, LANE = 0)
 )]
 #[rustc_legacy_const_generics(3)]
@@ -72149,12 +66504,53 @@ pub fn vsudot_lane_s32<const LANE: i32>(a: int32x2_t, b: int8x8_t, c: uint8x8_t)
 )]
 pub fn vsudotq_lane_s32<const LANE: i32>(a: int32x4_t, b: int8x16_t, c: uint8x8_t) -> int32x4_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        let c: uint32x2_t = transmute(c);
-        let c: uint32x4_t =
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
-        vusdotq_s32(a, transmute(c), b)
-    }
+    let c = vreinterpret_u32_u8(c);
+    let c = vdupq_lane_u32::<LANE>(c);
+    vusdotq_s32(a, vreinterpretq_u8_u32(c), b)
+}
+#[doc = "Dot product index form with signed and unsigned integers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsudot_laneq_s32)"]
+#[inline]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon,i8mm")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsudot, LANE = 1))]
+#[cfg_attr(
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
+    assert_instr(sudot, LANE = 3)
+)]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_neon_i8mm", issue = "117223")]
+pub fn vsudot_laneq_s32<const LANE: i32>(a: int32x2_t, b: int8x8_t, c: uint8x16_t) -> int32x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    let c = vreinterpretq_u32_u8(c);
+    let c = vdup_laneq_u32::<LANE>(c);
+    vusdot_s32(a, vreinterpret_u8_u32(c), b)
+}
+#[doc = "Dot product index form with signed and unsigned integers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsudotq_laneq_s32)"]
+#[inline]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon,i8mm")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsudot, LANE = 1))]
+#[cfg_attr(
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
+    assert_instr(sudot, LANE = 3)
+)]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_neon_i8mm", issue = "117223")]
+pub fn vsudotq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int8x16_t, c: uint8x16_t) -> int32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    let c = vreinterpretq_u32_u8(c);
+    let c = vdupq_laneq_u32::<LANE>(c);
+    vusdotq_s32(a, vreinterpretq_u8_u32(c), b)
 }
 #[doc = "Table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl1)"]
@@ -72185,7 +66581,6 @@ pub fn vtbl1_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
 #[doc = "Table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl1_u8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -72195,26 +66590,8 @@ pub fn vtbl1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
     unsafe { transmute(vtbl1(transmute(a), transmute(b))) }
 }
 #[doc = "Table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl1_u8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg(target_arch = "arm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vtbl))]
-pub fn vtbl1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
-    let a: uint8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    let b: uint8x8_t = unsafe { simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint8x8_t = transmute(vtbl1(transmute(a), transmute(b)));
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl1_p8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -72224,23 +66601,6 @@ pub fn vtbl1_p8(a: poly8x8_t, b: uint8x8_t) -> poly8x8_t {
     unsafe { transmute(vtbl1(transmute(a), transmute(b))) }
 }
 #[doc = "Table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl1_p8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg(target_arch = "arm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vtbl))]
-pub fn vtbl1_p8(a: poly8x8_t, b: uint8x8_t) -> poly8x8_t {
-    let a: poly8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    let b: uint8x8_t = unsafe { simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: poly8x8_t = transmute(vtbl1(transmute(a), transmute(b)));
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl2)"]
 #[inline]
 #[target_feature(enable = "neon")]
@@ -72269,7 +66629,6 @@ pub fn vtbl2_s8(a: int8x8x2_t, b: int8x8_t) -> int8x8_t {
 #[doc = "Table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl2_u8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -72279,28 +66638,8 @@ pub fn vtbl2_u8(a: uint8x8x2_t, b: uint8x8_t) -> uint8x8_t {
     unsafe { transmute(vtbl2(transmute(a.0), transmute(a.1), transmute(b))) }
 }
 #[doc = "Table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl2_u8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg(target_arch = "arm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vtbl))]
-pub fn vtbl2_u8(a: uint8x8x2_t, b: uint8x8_t) -> uint8x8_t {
-    let mut a: uint8x8x2_t = a;
-    a.0 = unsafe { simd_shuffle!(a.0, a.0, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    a.1 = unsafe { simd_shuffle!(a.1, a.1, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    let b: uint8x8_t = unsafe { simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint8x8_t = transmute(vtbl2(transmute(a.0), transmute(a.1), transmute(b)));
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl2_p8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -72310,25 +66649,6 @@ pub fn vtbl2_p8(a: poly8x8x2_t, b: uint8x8_t) -> poly8x8_t {
     unsafe { transmute(vtbl2(transmute(a.0), transmute(a.1), transmute(b))) }
 }
 #[doc = "Table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl2_p8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg(target_arch = "arm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vtbl))]
-pub fn vtbl2_p8(a: poly8x8x2_t, b: uint8x8_t) -> poly8x8_t {
-    let mut a: poly8x8x2_t = a;
-    a.0 = unsafe { simd_shuffle!(a.0, a.0, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    a.1 = unsafe { simd_shuffle!(a.1, a.1, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    let b: uint8x8_t = unsafe { simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: poly8x8_t = transmute(vtbl2(transmute(a.0), transmute(a.1), transmute(b)));
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl3)"]
 #[inline]
 #[target_feature(enable = "neon")]
@@ -72357,7 +66677,6 @@ pub fn vtbl3_s8(a: int8x8x3_t, b: int8x8_t) -> int8x8_t {
 #[doc = "Table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl3_u8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -72374,34 +66693,8 @@ pub fn vtbl3_u8(a: uint8x8x3_t, b: uint8x8_t) -> uint8x8_t {
     }
 }
 #[doc = "Table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl3_u8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg(target_arch = "arm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vtbl))]
-pub fn vtbl3_u8(a: uint8x8x3_t, b: uint8x8_t) -> uint8x8_t {
-    let mut a: uint8x8x3_t = a;
-    a.0 = unsafe { simd_shuffle!(a.0, a.0, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    a.1 = unsafe { simd_shuffle!(a.1, a.1, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    a.2 = unsafe { simd_shuffle!(a.2, a.2, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    let b: uint8x8_t = unsafe { simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint8x8_t = transmute(vtbl3(
-            transmute(a.0),
-            transmute(a.1),
-            transmute(a.2),
-            transmute(b),
-        ));
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl3_p8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -72418,31 +66711,6 @@ pub fn vtbl3_p8(a: poly8x8x3_t, b: uint8x8_t) -> poly8x8_t {
     }
 }
 #[doc = "Table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl3_p8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg(target_arch = "arm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vtbl))]
-pub fn vtbl3_p8(a: poly8x8x3_t, b: uint8x8_t) -> poly8x8_t {
-    let mut a: poly8x8x3_t = a;
-    a.0 = unsafe { simd_shuffle!(a.0, a.0, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    a.1 = unsafe { simd_shuffle!(a.1, a.1, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    a.2 = unsafe { simd_shuffle!(a.2, a.2, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    let b: uint8x8_t = unsafe { simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: poly8x8_t = transmute(vtbl3(
-            transmute(a.0),
-            transmute(a.1),
-            transmute(a.2),
-            transmute(b),
-        ));
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl4)"]
 #[inline]
 #[target_feature(enable = "neon")]
@@ -72471,7 +66739,6 @@ pub fn vtbl4_s8(a: int8x8x4_t, b: int8x8_t) -> int8x8_t {
 #[doc = "Table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl4_u8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -72489,36 +66756,8 @@ pub fn vtbl4_u8(a: uint8x8x4_t, b: uint8x8_t) -> uint8x8_t {
     }
 }
 #[doc = "Table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl4_u8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg(target_arch = "arm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vtbl))]
-pub fn vtbl4_u8(a: uint8x8x4_t, b: uint8x8_t) -> uint8x8_t {
-    let mut a: uint8x8x4_t = a;
-    a.0 = unsafe { simd_shuffle!(a.0, a.0, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    a.1 = unsafe { simd_shuffle!(a.1, a.1, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    a.2 = unsafe { simd_shuffle!(a.2, a.2, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    a.3 = unsafe { simd_shuffle!(a.3, a.3, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    let b: uint8x8_t = unsafe { simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint8x8_t = transmute(vtbl4(
-            transmute(a.0),
-            transmute(a.1),
-            transmute(a.2),
-            transmute(a.3),
-            transmute(b),
-        ));
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl4_p8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon")]
 #[cfg(target_arch = "arm")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
@@ -72535,33 +66774,6 @@ pub fn vtbl4_p8(a: poly8x8x4_t, b: uint8x8_t) -> poly8x8_t {
         ))
     }
 }
-#[doc = "Table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl4_p8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon")]
-#[cfg(target_arch = "arm")]
-#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vtbl))]
-pub fn vtbl4_p8(a: poly8x8x4_t, b: uint8x8_t) -> poly8x8_t {
-    let mut a: poly8x8x4_t = a;
-    a.0 = unsafe { simd_shuffle!(a.0, a.0, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    a.1 = unsafe { simd_shuffle!(a.1, a.1, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    a.2 = unsafe { simd_shuffle!(a.2, a.2, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    a.3 = unsafe { simd_shuffle!(a.3, a.3, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    let b: uint8x8_t = unsafe { simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: poly8x8_t = transmute(vtbl4(
-            transmute(a.0),
-            transmute(a.1),
-            transmute(a.2),
-            transmute(a.3),
-            transmute(b),
-        ));
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
 #[doc = "Extended table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx1)"]
 #[inline]
@@ -72589,7 +66801,6 @@ pub fn vtbx1_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t {
 #[doc = "Extended table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx1_u8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
@@ -72598,26 +66809,8 @@ pub fn vtbx1_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t {
     unsafe { transmute(vtbx1(transmute(a), transmute(b), transmute(c))) }
 }
 #[doc = "Extended table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx1_u8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vtbx))]
-pub fn vtbx1_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t {
-    let a: uint8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    let b: uint8x8_t = unsafe { simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    let c: uint8x8_t = unsafe { simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint8x8_t = transmute(vtbx1(transmute(a), transmute(b), transmute(c)));
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Extended table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx1_p8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
@@ -72626,23 +66819,6 @@ pub fn vtbx1_p8(a: poly8x8_t, b: poly8x8_t, c: uint8x8_t) -> poly8x8_t {
     unsafe { transmute(vtbx1(transmute(a), transmute(b), transmute(c))) }
 }
 #[doc = "Extended table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx1_p8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vtbx))]
-pub fn vtbx1_p8(a: poly8x8_t, b: poly8x8_t, c: uint8x8_t) -> poly8x8_t {
-    let a: poly8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    let b: poly8x8_t = unsafe { simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    let c: uint8x8_t = unsafe { simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: poly8x8_t = transmute(vtbx1(transmute(a), transmute(b), transmute(c)));
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Extended table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx2)"]
 #[inline]
 #[target_feature(enable = "neon,v7")]
@@ -72669,7 +66845,6 @@ pub fn vtbx2_s8(a: int8x8_t, b: int8x8x2_t, c: int8x8_t) -> int8x8_t {
 #[doc = "Extended table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx2_u8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
@@ -72685,33 +66860,8 @@ pub fn vtbx2_u8(a: uint8x8_t, b: uint8x8x2_t, c: uint8x8_t) -> uint8x8_t {
     }
 }
 #[doc = "Extended table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx2_u8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vtbx))]
-pub fn vtbx2_u8(a: uint8x8_t, b: uint8x8x2_t, c: uint8x8_t) -> uint8x8_t {
-    let mut b: uint8x8x2_t = b;
-    let a: uint8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    b.0 = unsafe { simd_shuffle!(b.0, b.0, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    b.1 = unsafe { simd_shuffle!(b.1, b.1, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    let c: uint8x8_t = unsafe { simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint8x8_t = transmute(vtbx2(
-            transmute(a),
-            transmute(b.0),
-            transmute(b.1),
-            transmute(c),
-        ));
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Extended table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx2_p8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
@@ -72727,30 +66877,6 @@ pub fn vtbx2_p8(a: poly8x8_t, b: poly8x8x2_t, c: uint8x8_t) -> poly8x8_t {
     }
 }
 #[doc = "Extended table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx2_p8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vtbx))]
-pub fn vtbx2_p8(a: poly8x8_t, b: poly8x8x2_t, c: uint8x8_t) -> poly8x8_t {
-    let mut b: poly8x8x2_t = b;
-    let a: poly8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    b.0 = unsafe { simd_shuffle!(b.0, b.0, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    b.1 = unsafe { simd_shuffle!(b.1, b.1, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    let c: uint8x8_t = unsafe { simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: poly8x8_t = transmute(vtbx2(
-            transmute(a),
-            transmute(b.0),
-            transmute(b.1),
-            transmute(c),
-        ));
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Extended table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx3)"]
 #[inline]
 #[target_feature(enable = "neon,v7")]
@@ -72777,7 +66903,6 @@ pub fn vtbx3_s8(a: int8x8_t, b: int8x8x3_t, c: int8x8_t) -> int8x8_t {
 #[doc = "Extended table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx3_u8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
@@ -72794,35 +66919,8 @@ pub fn vtbx3_u8(a: uint8x8_t, b: uint8x8x3_t, c: uint8x8_t) -> uint8x8_t {
     }
 }
 #[doc = "Extended table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx3_u8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vtbx))]
-pub fn vtbx3_u8(a: uint8x8_t, b: uint8x8x3_t, c: uint8x8_t) -> uint8x8_t {
-    let mut b: uint8x8x3_t = b;
-    let a: uint8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    b.0 = unsafe { simd_shuffle!(b.0, b.0, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    b.1 = unsafe { simd_shuffle!(b.1, b.1, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    b.2 = unsafe { simd_shuffle!(b.2, b.2, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    let c: uint8x8_t = unsafe { simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint8x8_t = transmute(vtbx3(
-            transmute(a),
-            transmute(b.0),
-            transmute(b.1),
-            transmute(b.2),
-            transmute(c),
-        ));
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Extended table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx3_p8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
@@ -72839,32 +66937,6 @@ pub fn vtbx3_p8(a: poly8x8_t, b: poly8x8x3_t, c: uint8x8_t) -> poly8x8_t {
     }
 }
 #[doc = "Extended table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx3_p8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vtbx))]
-pub fn vtbx3_p8(a: poly8x8_t, b: poly8x8x3_t, c: uint8x8_t) -> poly8x8_t {
-    let mut b: poly8x8x3_t = b;
-    let a: poly8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    b.0 = unsafe { simd_shuffle!(b.0, b.0, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    b.1 = unsafe { simd_shuffle!(b.1, b.1, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    b.2 = unsafe { simd_shuffle!(b.2, b.2, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    let c: uint8x8_t = unsafe { simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: poly8x8_t = transmute(vtbx3(
-            transmute(a),
-            transmute(b.0),
-            transmute(b.1),
-            transmute(b.2),
-            transmute(c),
-        ));
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Extended table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx4)"]
 #[inline]
 #[target_feature(enable = "neon,v7")]
@@ -72915,13 +66987,13 @@ pub fn vtbx4_s8(a: int8x8_t, b: int8x8x4_t, c: int8x8_t) -> int8x8_t {
 #[cfg_attr(test, assert_instr(vtbx))]
 pub fn vtbx4_s8(a: int8x8_t, b: int8x8x4_t, c: int8x8_t) -> int8x8_t {
     let mut b: int8x8x4_t = b;
-    let a: int8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    b.0 = unsafe { simd_shuffle!(b.0, b.0, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    b.1 = unsafe { simd_shuffle!(b.1, b.1, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    b.2 = unsafe { simd_shuffle!(b.2, b.2, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    b.3 = unsafe { simd_shuffle!(b.3, b.3, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    let c: int8x8_t = unsafe { simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]) };
     unsafe {
+        let a: int8x8_t = simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]);
+        b.0 = simd_shuffle!(b.0, b.0, [7, 6, 5, 4, 3, 2, 1, 0]);
+        b.1 = simd_shuffle!(b.1, b.1, [7, 6, 5, 4, 3, 2, 1, 0]);
+        b.2 = simd_shuffle!(b.2, b.2, [7, 6, 5, 4, 3, 2, 1, 0]);
+        b.3 = simd_shuffle!(b.3, b.3, [7, 6, 5, 4, 3, 2, 1, 0]);
+        let c: int8x8_t = simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]);
         let ret_val: int8x8_t = vtbx4(
             a,
             transmute(b.0),
@@ -72936,7 +67008,6 @@ pub fn vtbx4_s8(a: int8x8_t, b: int8x8x4_t, c: int8x8_t) -> int8x8_t {
 #[doc = "Extended table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx4_u8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
@@ -72954,37 +67025,8 @@ pub fn vtbx4_u8(a: uint8x8_t, b: uint8x8x4_t, c: uint8x8_t) -> uint8x8_t {
     }
 }
 #[doc = "Extended table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx4_u8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vtbx))]
-pub fn vtbx4_u8(a: uint8x8_t, b: uint8x8x4_t, c: uint8x8_t) -> uint8x8_t {
-    let mut b: uint8x8x4_t = b;
-    let a: uint8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    b.0 = unsafe { simd_shuffle!(b.0, b.0, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    b.1 = unsafe { simd_shuffle!(b.1, b.1, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    b.2 = unsafe { simd_shuffle!(b.2, b.2, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    b.3 = unsafe { simd_shuffle!(b.3, b.3, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    let c: uint8x8_t = unsafe { simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: uint8x8_t = transmute(vtbx4(
-            transmute(a),
-            transmute(b.0),
-            transmute(b.1),
-            transmute(b.2),
-            transmute(b.3),
-            transmute(c),
-        ));
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
-#[doc = "Extended table look-up"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx4_p8)"]
 #[inline]
-#[cfg(target_endian = "little")]
 #[target_feature(enable = "neon,v7")]
 #[cfg(target_arch = "arm")]
 #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
@@ -73001,34 +67043,6 @@ pub fn vtbx4_p8(a: poly8x8_t, b: poly8x8x4_t, c: uint8x8_t) -> poly8x8_t {
         ))
     }
 }
-#[doc = "Extended table look-up"]
-#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbx4_p8)"]
-#[inline]
-#[cfg(target_endian = "big")]
-#[target_feature(enable = "neon,v7")]
-#[cfg(target_arch = "arm")]
-#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")]
-#[cfg_attr(test, assert_instr(vtbx))]
-pub fn vtbx4_p8(a: poly8x8_t, b: poly8x8x4_t, c: uint8x8_t) -> poly8x8_t {
-    let mut b: poly8x8x4_t = b;
-    let a: poly8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    b.0 = unsafe { simd_shuffle!(b.0, b.0, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    b.1 = unsafe { simd_shuffle!(b.1, b.1, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    b.2 = unsafe { simd_shuffle!(b.2, b.2, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    b.3 = unsafe { simd_shuffle!(b.3, b.3, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    let c: uint8x8_t = unsafe { simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]) };
-    unsafe {
-        let ret_val: poly8x8_t = transmute(vtbx4(
-            transmute(a),
-            transmute(b.0),
-            transmute(b.1),
-            transmute(b.2),
-            transmute(b.3),
-            transmute(c),
-        ));
-        simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
-    }
-}
 #[doc = "Transpose elements"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn_f16)"]
 #[inline]
@@ -73043,7 +67057,14 @@ pub fn vtbx4_p8(a: poly8x8_t, b: poly8x8x4_t, c: uint8x8_t) -> poly8x8_t {
     assert_instr(trn2)
 )]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vtrn_f16(a: float16x4_t, b: float16x4_t) -> float16x4x2_t {
     unsafe {
@@ -73066,7 +67087,14 @@ pub fn vtrn_f16(a: float16x4_t, b: float16x4_t) -> float16x4x2_t {
     assert_instr(trn2)
 )]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vtrnq_f16(a: float16x8_t, b: float16x8_t) -> float16x8x2_t {
     unsafe {
@@ -74042,11 +68070,9 @@ pub fn vtstq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
 )]
 pub fn vusdot_lane_s32<const LANE: i32>(a: int32x2_t, b: uint8x8_t, c: int8x8_t) -> int32x2_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        let c: int32x2_t = transmute(c);
-        let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
-        vusdot_s32(a, b, transmute(c))
-    }
+    let c = vreinterpret_s32_s8(c);
+    let c = vdup_lane_s32::<LANE>(c);
+    vusdot_s32(a, b, vreinterpret_s8_s32(c))
 }
 #[doc = "Dot product index form with unsigned and signed integers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdotq_lane_s32)"]
@@ -74069,12 +68095,45 @@ pub fn vusdot_lane_s32<const LANE: i32>(a: int32x2_t, b: uint8x8_t, c: int8x8_t)
 )]
 pub fn vusdotq_lane_s32<const LANE: i32>(a: int32x4_t, b: uint8x16_t, c: int8x8_t) -> int32x4_t {
     static_assert_uimm_bits!(LANE, 1);
-    unsafe {
-        let c: int32x2_t = transmute(c);
-        let c: int32x4_t =
-            simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
-        vusdotq_s32(a, b, transmute(c))
-    }
+    let c = vreinterpret_s32_s8(c);
+    let c = vdupq_lane_s32::<LANE>(c);
+    vusdotq_s32(a, b, vreinterpretq_s8_s32(c))
+}
+#[doc = "Dot product index form with unsigned and signed integers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdot_laneq_s32)"]
+#[inline]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon,i8mm")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vusdot, LANE = 3))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(usdot, LANE = 3)
+)]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_neon_i8mm", issue = "117223")]
+pub fn vusdot_laneq_s32<const LANE: i32>(a: int32x2_t, b: uint8x8_t, c: int8x16_t) -> int32x2_t {
+    static_assert_uimm_bits!(LANE, 2);
+    let c = vreinterpretq_s32_s8(c);
+    let c = vdup_laneq_s32::<LANE>(c);
+    vusdot_s32(a, b, vreinterpret_s8_s32(c))
+}
+#[doc = "Dot product index form with unsigned and signed integers"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdotq_laneq_s32)"]
+#[inline]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+#[target_feature(enable = "neon,i8mm")]
+#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vusdot, LANE = 3))]
+#[cfg_attr(
+    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    assert_instr(usdot, LANE = 3)
+)]
+#[rustc_legacy_const_generics(3)]
+#[unstable(feature = "stdarch_neon_i8mm", issue = "117223")]
+pub fn vusdotq_laneq_s32<const LANE: i32>(a: int32x4_t, b: uint8x16_t, c: int8x16_t) -> int32x4_t {
+    static_assert_uimm_bits!(LANE, 2);
+    let c = vreinterpretq_s32_s8(c);
+    let c = vdupq_laneq_s32::<LANE>(c);
+    vusdotq_s32(a, b, vreinterpretq_s8_s32(c))
 }
 #[doc = "Dot product vector form with unsigned and signed integers"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdot_s32)"]
@@ -74083,7 +68142,11 @@ pub fn vusdotq_lane_s32<const LANE: i32>(a: int32x4_t, b: uint8x16_t, c: int8x8_
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vusdot))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
     assert_instr(usdot)
 )]
 #[cfg_attr(
@@ -74112,7 +68175,11 @@ pub fn vusdot_s32(a: int32x2_t, b: uint8x8_t, c: int8x8_t) -> int32x2_t {
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vusdot))]
 #[cfg_attr(
-    all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
+    all(
+        test,
+        any(target_arch = "aarch64", target_arch = "arm64ec"),
+        target_endian = "little"
+    ),
     assert_instr(usdot)
 )]
 #[cfg_attr(
@@ -74177,7 +68244,14 @@ pub fn vusmmlaq_s32(a: int32x4_t, b: uint8x16_t, c: int8x16_t) -> int32x4_t {
     assert_instr(uzp2)
 )]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vuzp_f16(a: float16x4_t, b: float16x4_t) -> float16x4x2_t {
     unsafe {
@@ -74200,7 +68274,14 @@ pub fn vuzp_f16(a: float16x4_t, b: float16x4_t) -> float16x4x2_t {
     assert_instr(uzp2)
 )]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vuzpq_f16(a: float16x8_t, b: float16x8_t) -> float16x8x2_t {
     unsafe {
@@ -74769,7 +68850,14 @@ pub fn vuzpq_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8x2_t {
     assert_instr(zip2)
 )]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vzip_f16(a: float16x4_t, b: float16x4_t) -> float16x4x2_t {
     unsafe {
@@ -74792,7 +68880,14 @@ pub fn vzip_f16(a: float16x4_t, b: float16x4_t) -> float16x4x2_t {
     assert_instr(zip2)
 )]
 #[target_feature(enable = "neon,fp16")]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 #[cfg(not(target_arch = "arm64ec"))]
 pub fn vzipq_f16(a: float16x8_t, b: float16x8_t) -> float16x8x2_t {
     unsafe {
diff --git a/crates/core_arch/src/arm_shared/neon/load_tests.rs b/crates/core_arch/src/arm_shared/neon/load_tests.rs
index bdf511ecf8..70a37f7c05 100644
--- a/crates/core_arch/src/arm_shared/neon/load_tests.rs
+++ b/crates/core_arch/src/arm_shared/neon/load_tests.rs
@@ -13,194 +13,213 @@ use crate::core_arch::aarch64::*;
 use crate::core_arch::simd::*;
 use std::mem;
 use stdarch_test::simd_test;
+
 #[simd_test(enable = "neon")]
-unsafe fn test_vld1_s8() {
+fn test_vld1_s8() {
     let a: [i8; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
     let e = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-    let r: i8x8 = transmute(vld1_s8(a[1..].as_ptr()));
+    let r = unsafe { i8x8::from(vld1_s8(a[1..].as_ptr())) };
     assert_eq!(r, e)
 }
 
 #[simd_test(enable = "neon")]
-unsafe fn test_vld1q_s8() {
+fn test_vld1q_s8() {
     let a: [i8; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
     let e = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-    let r: i8x16 = transmute(vld1q_s8(a[1..].as_ptr()));
+    let r = unsafe { i8x16::from(vld1q_s8(a[1..].as_ptr())) };
     assert_eq!(r, e)
 }
 
 #[simd_test(enable = "neon")]
-unsafe fn test_vld1_s16() {
+fn test_vld1_s16() {
     let a: [i16; 5] = [0, 1, 2, 3, 4];
     let e = i16x4::new(1, 2, 3, 4);
-    let r: i16x4 = transmute(vld1_s16(a[1..].as_ptr()));
+    let r = unsafe { i16x4::from(vld1_s16(a[1..].as_ptr())) };
     assert_eq!(r, e)
 }
 
 #[simd_test(enable = "neon")]
-unsafe fn test_vld1q_s16() {
+fn test_vld1q_s16() {
     let a: [i16; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
     let e = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-    let r: i16x8 = transmute(vld1q_s16(a[1..].as_ptr()));
+    let r = unsafe { i16x8::from(vld1q_s16(a[1..].as_ptr())) };
     assert_eq!(r, e)
 }
 
 #[simd_test(enable = "neon")]
-unsafe fn test_vld1_s32() {
+fn test_vld1_s32() {
     let a: [i32; 3] = [0, 1, 2];
     let e = i32x2::new(1, 2);
-    let r: i32x2 = transmute(vld1_s32(a[1..].as_ptr()));
+    let r = unsafe { i32x2::from(vld1_s32(a[1..].as_ptr())) };
     assert_eq!(r, e)
 }
 
 #[simd_test(enable = "neon")]
-unsafe fn test_vld1q_s32() {
+fn test_vld1q_s32() {
     let a: [i32; 5] = [0, 1, 2, 3, 4];
     let e = i32x4::new(1, 2, 3, 4);
-    let r: i32x4 = transmute(vld1q_s32(a[1..].as_ptr()));
+    let r = unsafe { i32x4::from(vld1q_s32(a[1..].as_ptr())) };
     assert_eq!(r, e)
 }
 
 #[simd_test(enable = "neon")]
-unsafe fn test_vld1_s64() {
+fn test_vld1_s64() {
     let a: [i64; 2] = [0, 1];
     let e = i64x1::new(1);
-    let r: i64x1 = transmute(vld1_s64(a[1..].as_ptr()));
+    let r = unsafe { i64x1::from(vld1_s64(a[1..].as_ptr())) };
     assert_eq!(r, e)
 }
 
 #[simd_test(enable = "neon")]
-unsafe fn test_vld1q_s64() {
+fn test_vld1q_s64() {
     let a: [i64; 3] = [0, 1, 2];
     let e = i64x2::new(1, 2);
-    let r: i64x2 = transmute(vld1q_s64(a[1..].as_ptr()));
+    let r = unsafe { i64x2::from(vld1q_s64(a[1..].as_ptr())) };
     assert_eq!(r, e)
 }
 
 #[simd_test(enable = "neon")]
-unsafe fn test_vld1_u8() {
+fn test_vld1_u8() {
     let a: [u8; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
     let e = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-    let r: u8x8 = transmute(vld1_u8(a[1..].as_ptr()));
+    let r = unsafe { u8x8::from(vld1_u8(a[1..].as_ptr())) };
     assert_eq!(r, e)
 }
 
 #[simd_test(enable = "neon")]
-unsafe fn test_vld1q_u8() {
+fn test_vld1q_u8() {
     let a: [u8; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
     let e = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-    let r: u8x16 = transmute(vld1q_u8(a[1..].as_ptr()));
+    let r = unsafe { u8x16::from(vld1q_u8(a[1..].as_ptr())) };
     assert_eq!(r, e)
 }
 
 #[simd_test(enable = "neon")]
-unsafe fn test_vld1_u16() {
+fn test_vld1_u16() {
     let a: [u16; 5] = [0, 1, 2, 3, 4];
     let e = u16x4::new(1, 2, 3, 4);
-    let r: u16x4 = transmute(vld1_u16(a[1..].as_ptr()));
+    let r = unsafe { u16x4::from(vld1_u16(a[1..].as_ptr())) };
     assert_eq!(r, e)
 }
 
 #[simd_test(enable = "neon")]
-unsafe fn test_vld1q_u16() {
+fn test_vld1q_u16() {
     let a: [u16; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
     let e = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-    let r: u16x8 = transmute(vld1q_u16(a[1..].as_ptr()));
+    let r = unsafe { u16x8::from(vld1q_u16(a[1..].as_ptr())) };
     assert_eq!(r, e)
 }
 
 #[simd_test(enable = "neon")]
-unsafe fn test_vld1_u32() {
+fn test_vld1_u32() {
     let a: [u32; 3] = [0, 1, 2];
     let e = u32x2::new(1, 2);
-    let r: u32x2 = transmute(vld1_u32(a[1..].as_ptr()));
+    let r = unsafe { u32x2::from(vld1_u32(a[1..].as_ptr())) };
     assert_eq!(r, e)
 }
 
 #[simd_test(enable = "neon")]
-unsafe fn test_vld1q_u32() {
+fn test_vld1q_u32() {
     let a: [u32; 5] = [0, 1, 2, 3, 4];
     let e = u32x4::new(1, 2, 3, 4);
-    let r: u32x4 = transmute(vld1q_u32(a[1..].as_ptr()));
+    let r = unsafe { u32x4::from(vld1q_u32(a[1..].as_ptr())) };
     assert_eq!(r, e)
 }
 
 #[simd_test(enable = "neon")]
-unsafe fn test_vld1_u64() {
+fn test_vld1_u64() {
     let a: [u64; 2] = [0, 1];
     let e = u64x1::new(1);
-    let r: u64x1 = transmute(vld1_u64(a[1..].as_ptr()));
+    let r = unsafe { u64x1::from(vld1_u64(a[1..].as_ptr())) };
     assert_eq!(r, e)
 }
 
 #[simd_test(enable = "neon")]
-unsafe fn test_vld1q_u64() {
+fn test_vld1q_u64() {
     let a: [u64; 3] = [0, 1, 2];
     let e = u64x2::new(1, 2);
-    let r: u64x2 = transmute(vld1q_u64(a[1..].as_ptr()));
+    let r = unsafe { u64x2::from(vld1q_u64(a[1..].as_ptr())) };
     assert_eq!(r, e)
 }
 
 #[simd_test(enable = "neon")]
-unsafe fn test_vld1_p8() {
+fn test_vld1_p8() {
     let a: [p8; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
     let e = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-    let r: u8x8 = transmute(vld1_p8(a[1..].as_ptr()));
+    let r = unsafe { u8x8::from(vld1_p8(a[1..].as_ptr())) };
     assert_eq!(r, e)
 }
 
 #[simd_test(enable = "neon")]
-unsafe fn test_vld1q_p8() {
+fn test_vld1q_p8() {
     let a: [p8; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
     let e = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-    let r: u8x16 = transmute(vld1q_p8(a[1..].as_ptr()));
+    let r = unsafe { u8x16::from(vld1q_p8(a[1..].as_ptr())) };
     assert_eq!(r, e)
 }
 
 #[simd_test(enable = "neon")]
-unsafe fn test_vld1_p16() {
+fn test_vld1_p16() {
     let a: [p16; 5] = [0, 1, 2, 3, 4];
     let e = u16x4::new(1, 2, 3, 4);
-    let r: u16x4 = transmute(vld1_p16(a[1..].as_ptr()));
+    let r = unsafe { u16x4::from(vld1_p16(a[1..].as_ptr())) };
     assert_eq!(r, e)
 }
 
 #[simd_test(enable = "neon")]
-unsafe fn test_vld1q_p16() {
+fn test_vld1q_p16() {
     let a: [p16; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
     let e = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-    let r: u16x8 = transmute(vld1q_p16(a[1..].as_ptr()));
+    let r = unsafe { u16x8::from(vld1q_p16(a[1..].as_ptr())) };
     assert_eq!(r, e)
 }
 
 #[simd_test(enable = "neon,aes")]
-unsafe fn test_vld1_p64() {
+fn test_vld1_p64() {
     let a: [p64; 2] = [0, 1];
     let e = u64x1::new(1);
-    let r: u64x1 = transmute(vld1_p64(a[1..].as_ptr()));
+    let r = unsafe { u64x1::from(vld1_p64(a[1..].as_ptr())) };
     assert_eq!(r, e)
 }
 
 #[simd_test(enable = "neon,aes")]
-unsafe fn test_vld1q_p64() {
+fn test_vld1q_p64() {
     let a: [p64; 3] = [0, 1, 2];
     let e = u64x2::new(1, 2);
-    let r: u64x2 = transmute(vld1q_p64(a[1..].as_ptr()));
+    let r = unsafe { u64x2::from(vld1q_p64(a[1..].as_ptr())) };
+    assert_eq!(r, e)
+}
+
+#[cfg(not(target_arch = "arm64ec"))]
+#[simd_test(enable = "neon,fp16")]
+fn test_vld1_f16() {
+    let a: [f16; 5] = [0., 1., 2., 3., 4.];
+    let e = f16x4::new(1., 2., 3., 4.);
+    let r = unsafe { f16x4::from(vld1_f16(a[1..].as_ptr())) };
+    assert_eq!(r, e)
+}
+
+#[cfg(not(target_arch = "arm64ec"))]
+#[simd_test(enable = "neon,fp16")]
+fn test_vld1q_f16() {
+    let a: [f16; 9] = [0., 1., 2., 3., 4., 5., 6., 7., 8.];
+    let e = f16x8::new(1., 2., 3., 4., 5., 6., 7., 8.);
+    let r = unsafe { f16x8::from(vld1q_f16(a[1..].as_ptr())) };
     assert_eq!(r, e)
 }
 
 #[simd_test(enable = "neon")]
-unsafe fn test_vld1_f32() {
+fn test_vld1_f32() {
     let a: [f32; 3] = [0., 1., 2.];
     let e = f32x2::new(1., 2.);
-    let r: f32x2 = transmute(vld1_f32(a[1..].as_ptr()));
+    let r = unsafe { f32x2::from(vld1_f32(a[1..].as_ptr())) };
     assert_eq!(r, e)
 }
 
 #[simd_test(enable = "neon")]
-unsafe fn test_vld1q_f32() {
+fn test_vld1q_f32() {
     let a: [f32; 5] = [0., 1., 2., 3., 4.];
     let e = f32x4::new(1., 2., 3., 4.);
-    let r: f32x4 = transmute(vld1q_f32(a[1..].as_ptr()));
+    let r = unsafe { f32x4::from(vld1q_f32(a[1..].as_ptr())) };
     assert_eq!(r, e)
 }
diff --git a/crates/core_arch/src/arm_shared/neon/mod.rs b/crates/core_arch/src/arm_shared/neon/mod.rs
index fbd1967c54..4cc7f64f2d 100644
--- a/crates/core_arch/src/arm_shared/neon/mod.rs
+++ b/crates/core_arch/src/arm_shared/neon/mod.rs
@@ -7,7 +7,7 @@ mod generated;
 #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 pub use self::generated::*;
 
-use crate::{core_arch::simd::*, hint::unreachable_unchecked, intrinsics::simd::*, mem::transmute};
+use crate::{core_arch::simd::*, intrinsics::simd::*, mem::transmute};
 #[cfg(test)]
 use stdarch_test::assert_instr;
 
@@ -104,7 +104,8 @@ types! {
 }
 
 types! {
-    #![unstable(feature = "stdarch_neon_f16", issue = "136306")]
+    #![cfg_attr(not(target_arch = "arm"), stable(feature = "stdarch_neon_fp16", since = "1.94.0"))]
+    #![cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 
     /// Arm-specific 64-bit wide vector of four packed `f16`.
     pub struct float16x4_t(4 x pub(crate) f16);
@@ -747,19 +748,40 @@ pub struct uint32x4x4_t(
 /// Arm-specific type containing two `float16x4_t` vectors.
 #[repr(C)]
 #[derive(Copy, Clone, Debug)]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 pub struct float16x4x2_t(pub float16x4_t, pub float16x4_t);
 
 /// Arm-specific type containing three `float16x4_t` vectors.
 #[repr(C)]
 #[derive(Copy, Clone, Debug)]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 pub struct float16x4x3_t(pub float16x4_t, pub float16x4_t, pub float16x4_t);
 
 /// Arm-specific type containing four `float16x4_t` vectors.
 #[repr(C)]
 #[derive(Copy, Clone, Debug)]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 pub struct float16x4x4_t(
     pub float16x4_t,
     pub float16x4_t,
@@ -770,19 +792,40 @@ pub struct float16x4x4_t(
 /// Arm-specific type containing two `float16x8_t` vectors.
 #[repr(C)]
 #[derive(Copy, Clone, Debug)]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 pub struct float16x8x2_t(pub float16x8_t, pub float16x8_t);
 
 /// Arm-specific type containing three `float16x8_t` vectors.
 #[repr(C)]
 #[derive(Copy, Clone, Debug)]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 pub struct float16x8x3_t(pub float16x8_t, pub float16x8_t, pub float16x8_t);
 
 /// Arm-specific type containing four `float16x8_t` vectors.
 #[repr(C)]
 #[derive(Copy, Clone, Debug)]
-#[unstable(feature = "stdarch_neon_f16", issue = "136306")]
+#[cfg_attr(
+    not(target_arch = "arm"),
+    stable(feature = "stdarch_neon_fp16", since = "1.94.0")
+)]
+#[cfg_attr(
+    target_arch = "arm",
+    unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
+)]
 pub struct float16x8x4_t(
     pub float16x8_t,
     pub float16x8_t,
@@ -1204,1268 +1247,861 @@ mod tests {
     use crate::core_arch::arm::*;
     use crate::core_arch::arm_shared::test_support::*;
     use crate::core_arch::simd::*;
-    use std::{mem::transmute, vec::Vec};
     use stdarch_test::simd_test;
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld1_lane_s8() {
+    fn test_vld1_lane_s8() {
         let a = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
         let elem: i8 = 42;
         let e = i8x8::new(0, 1, 2, 3, 4, 5, 6, 42);
-        let r: i8x8 = transmute(vld1_lane_s8::<7>(&elem, transmute(a)));
+        let r = unsafe { i8x8::from(vld1_lane_s8::<7>(&elem, a.into())) };
         assert_eq!(r, e)
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld1q_lane_s8() {
+    fn test_vld1q_lane_s8() {
         let a = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let elem: i8 = 42;
         let e = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 42);
-        let r: i8x16 = transmute(vld1q_lane_s8::<15>(&elem, transmute(a)));
+        let r = unsafe { i8x16::from(vld1q_lane_s8::<15>(&elem, a.into())) };
         assert_eq!(r, e)
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld1_lane_s16() {
+    fn test_vld1_lane_s16() {
         let a = i16x4::new(0, 1, 2, 3);
         let elem: i16 = 42;
         let e = i16x4::new(0, 1, 2, 42);
-        let r: i16x4 = transmute(vld1_lane_s16::<3>(&elem, transmute(a)));
+        let r = unsafe { i16x4::from(vld1_lane_s16::<3>(&elem, a.into())) };
         assert_eq!(r, e)
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld1q_lane_s16() {
+    fn test_vld1q_lane_s16() {
         let a = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
         let elem: i16 = 42;
         let e = i16x8::new(0, 1, 2, 3, 4, 5, 6, 42);
-        let r: i16x8 = transmute(vld1q_lane_s16::<7>(&elem, transmute(a)));
+        let r = unsafe { i16x8::from(vld1q_lane_s16::<7>(&elem, a.into())) };
         assert_eq!(r, e)
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld1_lane_s32() {
+    fn test_vld1_lane_s32() {
         let a = i32x2::new(0, 1);
         let elem: i32 = 42;
         let e = i32x2::new(0, 42);
-        let r: i32x2 = transmute(vld1_lane_s32::<1>(&elem, transmute(a)));
+        let r = unsafe { i32x2::from(vld1_lane_s32::<1>(&elem, a.into())) };
         assert_eq!(r, e)
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld1q_lane_s32() {
+    fn test_vld1q_lane_s32() {
         let a = i32x4::new(0, 1, 2, 3);
         let elem: i32 = 42;
         let e = i32x4::new(0, 1, 2, 42);
-        let r: i32x4 = transmute(vld1q_lane_s32::<3>(&elem, transmute(a)));
+        let r = unsafe { i32x4::from(vld1q_lane_s32::<3>(&elem, a.into())) };
         assert_eq!(r, e)
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld1_lane_s64() {
+    fn test_vld1_lane_s64() {
         let a = i64x1::new(0);
         let elem: i64 = 42;
         let e = i64x1::new(42);
-        let r: i64x1 = transmute(vld1_lane_s64::<0>(&elem, transmute(a)));
+        let r = unsafe { i64x1::from(vld1_lane_s64::<0>(&elem, a.into())) };
         assert_eq!(r, e)
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld1q_lane_s64() {
+    fn test_vld1q_lane_s64() {
         let a = i64x2::new(0, 1);
         let elem: i64 = 42;
         let e = i64x2::new(0, 42);
-        let r: i64x2 = transmute(vld1q_lane_s64::<1>(&elem, transmute(a)));
+        let r = unsafe { i64x2::from(vld1q_lane_s64::<1>(&elem, a.into())) };
         assert_eq!(r, e)
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld1_lane_u8() {
+    fn test_vld1_lane_u8() {
         let a = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
         let elem: u8 = 42;
         let e = u8x8::new(0, 1, 2, 3, 4, 5, 6, 42);
-        let r: u8x8 = transmute(vld1_lane_u8::<7>(&elem, transmute(a)));
+        let r = unsafe { u8x8::from(vld1_lane_u8::<7>(&elem, a.into())) };
         assert_eq!(r, e)
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld1q_lane_u8() {
+    fn test_vld1q_lane_u8() {
         let a = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let elem: u8 = 42;
         let e = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 42);
-        let r: u8x16 = transmute(vld1q_lane_u8::<15>(&elem, transmute(a)));
+        let r = unsafe { u8x16::from(vld1q_lane_u8::<15>(&elem, a.into())) };
         assert_eq!(r, e)
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld1_lane_u16() {
+    fn test_vld1_lane_u16() {
         let a = u16x4::new(0, 1, 2, 3);
         let elem: u16 = 42;
         let e = u16x4::new(0, 1, 2, 42);
-        let r: u16x4 = transmute(vld1_lane_u16::<3>(&elem, transmute(a)));
+        let r = unsafe { u16x4::from(vld1_lane_u16::<3>(&elem, a.into())) };
         assert_eq!(r, e)
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld1q_lane_u16() {
+    fn test_vld1q_lane_u16() {
         let a = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
         let elem: u16 = 42;
         let e = u16x8::new(0, 1, 2, 3, 4, 5, 6, 42);
-        let r: u16x8 = transmute(vld1q_lane_u16::<7>(&elem, transmute(a)));
+        let r = unsafe { u16x8::from(vld1q_lane_u16::<7>(&elem, a.into())) };
         assert_eq!(r, e)
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld1_lane_u32() {
+    fn test_vld1_lane_u32() {
         let a = u32x2::new(0, 1);
         let elem: u32 = 42;
         let e = u32x2::new(0, 42);
-        let r: u32x2 = transmute(vld1_lane_u32::<1>(&elem, transmute(a)));
+        let r = unsafe { u32x2::from(vld1_lane_u32::<1>(&elem, a.into())) };
         assert_eq!(r, e)
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld1q_lane_u32() {
+    fn test_vld1q_lane_u32() {
         let a = u32x4::new(0, 1, 2, 3);
         let elem: u32 = 42;
         let e = u32x4::new(0, 1, 2, 42);
-        let r: u32x4 = transmute(vld1q_lane_u32::<3>(&elem, transmute(a)));
+        let r = unsafe { u32x4::from(vld1q_lane_u32::<3>(&elem, a.into())) };
         assert_eq!(r, e)
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld1_lane_u64() {
+    fn test_vld1_lane_u64() {
         let a = u64x1::new(0);
         let elem: u64 = 42;
         let e = u64x1::new(42);
-        let r: u64x1 = transmute(vld1_lane_u64::<0>(&elem, transmute(a)));
+        let r = unsafe { u64x1::from(vld1_lane_u64::<0>(&elem, a.into())) };
         assert_eq!(r, e)
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld1q_lane_u64() {
+    fn test_vld1q_lane_u64() {
         let a = u64x2::new(0, 1);
         let elem: u64 = 42;
         let e = u64x2::new(0, 42);
-        let r: u64x2 = transmute(vld1q_lane_u64::<1>(&elem, transmute(a)));
+        let r = unsafe { u64x2::from(vld1q_lane_u64::<1>(&elem, a.into())) };
         assert_eq!(r, e)
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld1_lane_p8() {
+    fn test_vld1_lane_p8() {
         let a = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
         let elem: p8 = 42;
         let e = u8x8::new(0, 1, 2, 3, 4, 5, 6, 42);
-        let r: u8x8 = transmute(vld1_lane_p8::<7>(&elem, transmute(a)));
+        let r = unsafe { u8x8::from(vld1_lane_p8::<7>(&elem, a.into())) };
         assert_eq!(r, e)
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld1q_lane_p8() {
+    fn test_vld1q_lane_p8() {
         let a = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let elem: p8 = 42;
         let e = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 42);
-        let r: u8x16 = transmute(vld1q_lane_p8::<15>(&elem, transmute(a)));
+        let r = unsafe { u8x16::from(vld1q_lane_p8::<15>(&elem, a.into())) };
         assert_eq!(r, e)
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld1_lane_p16() {
+    fn test_vld1_lane_p16() {
         let a = u16x4::new(0, 1, 2, 3);
         let elem: p16 = 42;
         let e = u16x4::new(0, 1, 2, 42);
-        let r: u16x4 = transmute(vld1_lane_p16::<3>(&elem, transmute(a)));
+        let r = unsafe { u16x4::from(vld1_lane_p16::<3>(&elem, a.into())) };
         assert_eq!(r, e)
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld1q_lane_p16() {
+    fn test_vld1q_lane_p16() {
         let a = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
         let elem: p16 = 42;
         let e = u16x8::new(0, 1, 2, 3, 4, 5, 6, 42);
-        let r: u16x8 = transmute(vld1q_lane_p16::<7>(&elem, transmute(a)));
+        let r = unsafe { u16x8::from(vld1q_lane_p16::<7>(&elem, a.into())) };
         assert_eq!(r, e)
     }
 
     #[simd_test(enable = "neon,aes")]
-    unsafe fn test_vld1_lane_p64() {
+    fn test_vld1_lane_p64() {
         let a = u64x1::new(0);
         let elem: u64 = 42;
         let e = u64x1::new(42);
-        let r: u64x1 = transmute(vld1_lane_p64::<0>(&elem, transmute(a)));
+        let r = unsafe { u64x1::from(vld1_lane_p64::<0>(&elem, a.into())) };
         assert_eq!(r, e)
     }
 
     #[simd_test(enable = "neon,aes")]
-    unsafe fn test_vld1q_lane_p64() {
+    fn test_vld1q_lane_p64() {
         let a = u64x2::new(0, 1);
         let elem: u64 = 42;
         let e = u64x2::new(0, 42);
-        let r: u64x2 = transmute(vld1q_lane_p64::<1>(&elem, transmute(a)));
+        let r = unsafe { u64x2::from(vld1q_lane_p64::<1>(&elem, a.into())) };
         assert_eq!(r, e)
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld1_lane_f32() {
+    fn test_vld1_lane_f32() {
         let a = f32x2::new(0., 1.);
         let elem: f32 = 42.;
         let e = f32x2::new(0., 42.);
-        let r: f32x2 = transmute(vld1_lane_f32::<1>(&elem, transmute(a)));
+        let r = unsafe { f32x2::from(vld1_lane_f32::<1>(&elem, a.into())) };
         assert_eq!(r, e)
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld1q_lane_f32() {
+    fn test_vld1q_lane_f32() {
         let a = f32x4::new(0., 1., 2., 3.);
         let elem: f32 = 42.;
         let e = f32x4::new(0., 1., 2., 42.);
-        let r: f32x4 = transmute(vld1q_lane_f32::<3>(&elem, transmute(a)));
+        let r = unsafe { f32x4::from(vld1q_lane_f32::<3>(&elem, a.into())) };
         assert_eq!(r, e)
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld1_dup_s8() {
+    fn test_vld1_dup_s8() {
         let elem: i8 = 42;
         let e = i8x8::new(42, 42, 42, 42, 42, 42, 42, 42);
-        let r: i8x8 = transmute(vld1_dup_s8(&elem));
+        let r = unsafe { i8x8::from(vld1_dup_s8(&elem)) };
         assert_eq!(r, e)
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld1q_dup_s8() {
+    fn test_vld1q_dup_s8() {
         let elem: i8 = 42;
         let e = i8x16::new(
             42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
         );
-        let r: i8x16 = transmute(vld1q_dup_s8(&elem));
+        let r = unsafe { i8x16::from(vld1q_dup_s8(&elem)) };
         assert_eq!(r, e)
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld1_dup_s16() {
+    fn test_vld1_dup_s16() {
         let elem: i16 = 42;
         let e = i16x4::new(42, 42, 42, 42);
-        let r: i16x4 = transmute(vld1_dup_s16(&elem));
+        let r = unsafe { i16x4::from(vld1_dup_s16(&elem)) };
         assert_eq!(r, e)
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld1q_dup_s16() {
+    fn test_vld1q_dup_s16() {
         let elem: i16 = 42;
         let e = i16x8::new(42, 42, 42, 42, 42, 42, 42, 42);
-        let r: i16x8 = transmute(vld1q_dup_s16(&elem));
+        let r = unsafe { i16x8::from(vld1q_dup_s16(&elem)) };
         assert_eq!(r, e)
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld1_dup_s32() {
+    fn test_vld1_dup_s32() {
         let elem: i32 = 42;
         let e = i32x2::new(42, 42);
-        let r: i32x2 = transmute(vld1_dup_s32(&elem));
+        let r = unsafe { i32x2::from(vld1_dup_s32(&elem)) };
         assert_eq!(r, e)
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld1q_dup_s32() {
+    fn test_vld1q_dup_s32() {
         let elem: i32 = 42;
         let e = i32x4::new(42, 42, 42, 42);
-        let r: i32x4 = transmute(vld1q_dup_s32(&elem));
+        let r = unsafe { i32x4::from(vld1q_dup_s32(&elem)) };
         assert_eq!(r, e)
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld1_dup_s64() {
+    fn test_vld1_dup_s64() {
         let elem: i64 = 42;
         let e = i64x1::new(42);
-        let r: i64x1 = transmute(vld1_dup_s64(&elem));
+        let r = unsafe { i64x1::from(vld1_dup_s64(&elem)) };
         assert_eq!(r, e)
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld1q_dup_s64() {
+    fn test_vld1q_dup_s64() {
         let elem: i64 = 42;
         let e = i64x2::new(42, 42);
-        let r: i64x2 = transmute(vld1q_dup_s64(&elem));
+        let r = unsafe { i64x2::from(vld1q_dup_s64(&elem)) };
         assert_eq!(r, e)
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld1_dup_u8() {
+    fn test_vld1_dup_u8() {
         let elem: u8 = 42;
         let e = u8x8::new(42, 42, 42, 42, 42, 42, 42, 42);
-        let r: u8x8 = transmute(vld1_dup_u8(&elem));
+        let r = unsafe { u8x8::from(vld1_dup_u8(&elem)) };
         assert_eq!(r, e)
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld1q_dup_u8() {
+    fn test_vld1q_dup_u8() {
         let elem: u8 = 42;
         let e = u8x16::new(
             42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
         );
-        let r: u8x16 = transmute(vld1q_dup_u8(&elem));
+        let r = unsafe { u8x16::from(vld1q_dup_u8(&elem)) };
         assert_eq!(r, e)
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld1_dup_u16() {
+    fn test_vld1_dup_u16() {
         let elem: u16 = 42;
         let e = u16x4::new(42, 42, 42, 42);
-        let r: u16x4 = transmute(vld1_dup_u16(&elem));
+        let r = unsafe { u16x4::from(vld1_dup_u16(&elem)) };
         assert_eq!(r, e)
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld1q_dup_u16() {
+    fn test_vld1q_dup_u16() {
         let elem: u16 = 42;
         let e = u16x8::new(42, 42, 42, 42, 42, 42, 42, 42);
-        let r: u16x8 = transmute(vld1q_dup_u16(&elem));
+        let r = unsafe { u16x8::from(vld1q_dup_u16(&elem)) };
         assert_eq!(r, e)
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld1_dup_u32() {
+    fn test_vld1_dup_u32() {
         let elem: u32 = 42;
         let e = u32x2::new(42, 42);
-        let r: u32x2 = transmute(vld1_dup_u32(&elem));
+        let r = unsafe { u32x2::from(vld1_dup_u32(&elem)) };
         assert_eq!(r, e)
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld1q_dup_u32() {
+    fn test_vld1q_dup_u32() {
         let elem: u32 = 42;
         let e = u32x4::new(42, 42, 42, 42);
-        let r: u32x4 = transmute(vld1q_dup_u32(&elem));
+        let r = unsafe { u32x4::from(vld1q_dup_u32(&elem)) };
         assert_eq!(r, e)
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld1_dup_u64() {
+    fn test_vld1_dup_u64() {
         let elem: u64 = 42;
         let e = u64x1::new(42);
-        let r: u64x1 = transmute(vld1_dup_u64(&elem));
+        let r = unsafe { u64x1::from(vld1_dup_u64(&elem)) };
         assert_eq!(r, e)
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld1q_dup_u64() {
+    fn test_vld1q_dup_u64() {
         let elem: u64 = 42;
         let e = u64x2::new(42, 42);
-        let r: u64x2 = transmute(vld1q_dup_u64(&elem));
+        let r = unsafe { u64x2::from(vld1q_dup_u64(&elem)) };
         assert_eq!(r, e)
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld1_dup_p8() {
+    fn test_vld1_dup_p8() {
         let elem: p8 = 42;
         let e = u8x8::new(42, 42, 42, 42, 42, 42, 42, 42);
-        let r: u8x8 = transmute(vld1_dup_p8(&elem));
+        let r = unsafe { u8x8::from(vld1_dup_p8(&elem)) };
         assert_eq!(r, e)
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld1q_dup_p8() {
+    fn test_vld1q_dup_p8() {
         let elem: p8 = 42;
         let e = u8x16::new(
             42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
         );
-        let r: u8x16 = transmute(vld1q_dup_p8(&elem));
+        let r = unsafe { u8x16::from(vld1q_dup_p8(&elem)) };
         assert_eq!(r, e)
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld1_dup_p16() {
+    fn test_vld1_dup_p16() {
         let elem: p16 = 42;
         let e = u16x4::new(42, 42, 42, 42);
-        let r: u16x4 = transmute(vld1_dup_p16(&elem));
+        let r = unsafe { u16x4::from(vld1_dup_p16(&elem)) };
         assert_eq!(r, e)
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld1q_dup_p16() {
+    fn test_vld1q_dup_p16() {
         let elem: p16 = 42;
         let e = u16x8::new(42, 42, 42, 42, 42, 42, 42, 42);
-        let r: u16x8 = transmute(vld1q_dup_p16(&elem));
+        let r = unsafe { u16x8::from(vld1q_dup_p16(&elem)) };
         assert_eq!(r, e)
     }
 
     #[simd_test(enable = "neon,aes")]
-    unsafe fn test_vld1_dup_p64() {
+    fn test_vld1_dup_p64() {
         let elem: u64 = 42;
         let e = u64x1::new(42);
-        let r: u64x1 = transmute(vld1_dup_p64(&elem));
+        let r = unsafe { u64x1::from(vld1_dup_p64(&elem)) };
         assert_eq!(r, e)
     }
 
     #[simd_test(enable = "neon,aes")]
-    unsafe fn test_vld1q_dup_p64() {
+    fn test_vld1q_dup_p64() {
         let elem: u64 = 42;
         let e = u64x2::new(42, 42);
-        let r: u64x2 = transmute(vld1q_dup_p64(&elem));
+        let r = unsafe { u64x2::from(vld1q_dup_p64(&elem)) };
         assert_eq!(r, e)
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld1_dup_f32() {
+    fn test_vld1_dup_f32() {
         let elem: f32 = 42.;
         let e = f32x2::new(42., 42.);
-        let r: f32x2 = transmute(vld1_dup_f32(&elem));
+        let r = unsafe { f32x2::from(vld1_dup_f32(&elem)) };
         assert_eq!(r, e)
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vld1q_dup_f32() {
+    fn test_vld1q_dup_f32() {
         let elem: f32 = 42.;
         let e = f32x4::new(42., 42., 42., 42.);
-        let r: f32x4 = transmute(vld1q_dup_f32(&elem));
+        let r = unsafe { f32x4::from(vld1q_dup_f32(&elem)) };
         assert_eq!(r, e)
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vget_lane_u8() {
-        let v = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let r = vget_lane_u8::<1>(transmute(v));
-        assert_eq!(r, 2);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vgetq_lane_u32() {
-        let v = i32x4::new(1, 2, 3, 4);
-        let r = vgetq_lane_u32::<1>(transmute(v));
-        assert_eq!(r, 2);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vgetq_lane_s32() {
-        let v = i32x4::new(1, 2, 3, 4);
-        let r = vgetq_lane_s32::<1>(transmute(v));
-        assert_eq!(r, 2);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vget_lane_u64() {
-        let v: u64 = 1;
-        let r = vget_lane_u64::<0>(transmute(v));
-        assert_eq!(r, 1);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vgetq_lane_u16() {
-        let v = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let r = vgetq_lane_u16::<1>(transmute(v));
-        assert_eq!(r, 2);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vget_lane_s8() {
-        let v = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let r = vget_lane_s8::<2>(transmute(v));
-        assert_eq!(r, 2);
-        let r = vget_lane_s8::<4>(transmute(v));
-        assert_eq!(r, 4);
-        let r = vget_lane_s8::<5>(transmute(v));
-        assert_eq!(r, 5);
-    }
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vget_lane_p8() {
-        let v = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let r = vget_lane_p8::<2>(transmute(v));
-        assert_eq!(r, 2);
-        let r = vget_lane_p8::<3>(transmute(v));
-        assert_eq!(r, 3);
-        let r = vget_lane_p8::<5>(transmute(v));
-        assert_eq!(r, 5);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vget_lane_p16() {
-        let v = u16x4::new(0, 1, 2, 3);
-        let r = vget_lane_p16::<2>(transmute(v));
-        assert_eq!(r, 2);
-        let r = vget_lane_p16::<3>(transmute(v));
-        assert_eq!(r, 3);
-        let r = vget_lane_p16::<0>(transmute(v));
-        assert_eq!(r, 0);
-        let r = vget_lane_p16::<1>(transmute(v));
-        assert_eq!(r, 1);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vget_lane_s16() {
-        let v = i16x4::new(0, 1, 2, 3);
-        let r = vget_lane_s16::<2>(transmute(v));
-        assert_eq!(r, 2);
-        let r = vget_lane_s16::<3>(transmute(v));
-        assert_eq!(r, 3);
-        let r = vget_lane_s16::<0>(transmute(v));
-        assert_eq!(r, 0);
-        let r = vget_lane_s16::<1>(transmute(v));
-        assert_eq!(r, 1);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vget_lane_u16() {
-        let v = u16x4::new(0, 1, 2, 3);
-        let r = vget_lane_u16::<2>(transmute(v));
-        assert_eq!(r, 2);
-        let r = vget_lane_u16::<3>(transmute(v));
-        assert_eq!(r, 3);
-        let r = vget_lane_u16::<0>(transmute(v));
-        assert_eq!(r, 0);
-        let r = vget_lane_u16::<1>(transmute(v));
-        assert_eq!(r, 1);
-    }
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vget_lane_f32() {
-        let v = f32x2::new(0.0, 1.0);
-        let r = vget_lane_f32::<1>(transmute(v));
-        assert_eq!(r, 1.0);
-        let r = vget_lane_f32::<0>(transmute(v));
-        assert_eq!(r, 0.0);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vget_lane_s32() {
-        let v = i32x2::new(0, 1);
-        let r = vget_lane_s32::<1>(transmute(v));
-        assert_eq!(r, 1);
-        let r = vget_lane_s32::<0>(transmute(v));
-        assert_eq!(r, 0);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vget_lane_u32() {
-        let v = u32x2::new(0, 1);
-        let r = vget_lane_u32::<1>(transmute(v));
-        assert_eq!(r, 1);
-        let r = vget_lane_u32::<0>(transmute(v));
-        assert_eq!(r, 0);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vget_lane_s64() {
-        let v = i64x1::new(1);
-        let r = vget_lane_s64::<0>(transmute(v));
-        assert_eq!(r, 1);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vget_lane_p64() {
-        let v = u64x1::new(1);
-        let r = vget_lane_p64::<0>(transmute(v));
-        assert_eq!(r, 1);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vgetq_lane_s8() {
-        let v = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
-        let r = vgetq_lane_s8::<7>(transmute(v));
-        assert_eq!(r, 7);
-        let r = vgetq_lane_s8::<13>(transmute(v));
-        assert_eq!(r, 13);
-        let r = vgetq_lane_s8::<3>(transmute(v));
-        assert_eq!(r, 3);
-        let r = vgetq_lane_s8::<0>(transmute(v));
-        assert_eq!(r, 0);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vgetq_lane_p8() {
-        let v = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
-        let r = vgetq_lane_p8::<7>(transmute(v));
-        assert_eq!(r, 7);
-        let r = vgetq_lane_p8::<13>(transmute(v));
-        assert_eq!(r, 13);
-        let r = vgetq_lane_p8::<3>(transmute(v));
-        assert_eq!(r, 3);
-        let r = vgetq_lane_p8::<0>(transmute(v));
-        assert_eq!(r, 0);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vgetq_lane_u8() {
-        let v = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
-        let r = vgetq_lane_u8::<7>(transmute(v));
-        assert_eq!(r, 7);
-        let r = vgetq_lane_u8::<13>(transmute(v));
-        assert_eq!(r, 13);
-        let r = vgetq_lane_u8::<3>(transmute(v));
-        assert_eq!(r, 3);
-        let r = vgetq_lane_u8::<0>(transmute(v));
-        assert_eq!(r, 0);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vgetq_lane_s16() {
-        let v = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let r = vgetq_lane_s16::<3>(transmute(v));
-        assert_eq!(r, 3);
-        let r = vgetq_lane_s16::<6>(transmute(v));
-        assert_eq!(r, 6);
-        let r = vgetq_lane_s16::<0>(transmute(v));
-        assert_eq!(r, 0);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vgetq_lane_p16() {
-        let v = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let r = vgetq_lane_p16::<3>(transmute(v));
-        assert_eq!(r, 3);
-        let r = vgetq_lane_p16::<7>(transmute(v));
-        assert_eq!(r, 7);
-        let r = vgetq_lane_p16::<1>(transmute(v));
-        assert_eq!(r, 1);
-    }
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vgetq_lane_f32() {
-        let v = f32x4::new(0.0, 1.0, 2.0, 3.0);
-        let r = vgetq_lane_f32::<3>(transmute(v));
-        assert_eq!(r, 3.0);
-        let r = vgetq_lane_f32::<0>(transmute(v));
-        assert_eq!(r, 0.0);
-        let r = vgetq_lane_f32::<2>(transmute(v));
-        assert_eq!(r, 2.0);
-        let r = vgetq_lane_f32::<1>(transmute(v));
-        assert_eq!(r, 1.0);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vgetq_lane_s64() {
-        let v = i64x2::new(0, 1);
-        let r = vgetq_lane_s64::<1>(transmute(v));
-        assert_eq!(r, 1);
-        let r = vgetq_lane_s64::<0>(transmute(v));
-        assert_eq!(r, 0);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vgetq_lane_p64() {
-        let v = u64x2::new(0, 1);
-        let r = vgetq_lane_p64::<1>(transmute(v));
-        assert_eq!(r, 1);
-        let r = vgetq_lane_p64::<0>(transmute(v));
-        assert_eq!(r, 0);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vext_s64() {
+    fn test_vext_s64() {
         let a: i64x1 = i64x1::new(0);
         let b: i64x1 = i64x1::new(1);
         let e: i64x1 = i64x1::new(0);
-        let r: i64x1 = transmute(vext_s64::<0>(transmute(a), transmute(b)));
+        let r = unsafe { i64x1::from(vext_s64::<0>(a.into(), b.into())) };
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vext_u64() {
+    fn test_vext_u64() {
         let a: u64x1 = u64x1::new(0);
         let b: u64x1 = u64x1::new(1);
         let e: u64x1 = u64x1::new(0);
-        let r: u64x1 = transmute(vext_u64::<0>(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vget_high_s8() {
-        let a = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let e = i8x8::new(9, 10, 11, 12, 13, 14, 15, 16);
-        let r: i8x8 = transmute(vget_high_s8(transmute(a)));
+        let r = unsafe { u64x1::from(vext_u64::<0>(a.into(), b.into())) };
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vget_high_s16() {
-        let a = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let e = i16x4::new(5, 6, 7, 8);
-        let r: i16x4 = transmute(vget_high_s16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vget_high_s32() {
-        let a = i32x4::new(1, 2, 3, 4);
-        let e = i32x2::new(3, 4);
-        let r: i32x2 = transmute(vget_high_s32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vget_high_s64() {
-        let a = i64x2::new(1, 2);
-        let e = i64x1::new(2);
-        let r: i64x1 = transmute(vget_high_s64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vget_high_u8() {
-        let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let e = u8x8::new(9, 10, 11, 12, 13, 14, 15, 16);
-        let r: u8x8 = transmute(vget_high_u8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vget_high_u16() {
-        let a = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let e = u16x4::new(5, 6, 7, 8);
-        let r: u16x4 = transmute(vget_high_u16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vget_high_u32() {
-        let a = u32x4::new(1, 2, 3, 4);
-        let e = u32x2::new(3, 4);
-        let r: u32x2 = transmute(vget_high_u32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vget_high_u64() {
-        let a = u64x2::new(1, 2);
-        let e = u64x1::new(2);
-        let r: u64x1 = transmute(vget_high_u64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vget_high_p8() {
-        let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let e = u8x8::new(9, 10, 11, 12, 13, 14, 15, 16);
-        let r: u8x8 = transmute(vget_high_p8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vget_high_p16() {
-        let a = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let e = u16x4::new(5, 6, 7, 8);
-        let r: u16x4 = transmute(vget_high_p16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vget_high_f32() {
-        let a = f32x4::new(1.0, 2.0, 3.0, 4.0);
-        let e = f32x2::new(3.0, 4.0);
-        let r: f32x2 = transmute(vget_high_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vget_low_s8() {
-        let a = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let e = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let r: i8x8 = transmute(vget_low_s8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vget_low_s16() {
-        let a = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let e = i16x4::new(1, 2, 3, 4);
-        let r: i16x4 = transmute(vget_low_s16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vget_low_s32() {
-        let a = i32x4::new(1, 2, 3, 4);
-        let e = i32x2::new(1, 2);
-        let r: i32x2 = transmute(vget_low_s32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vget_low_s64() {
-        let a = i64x2::new(1, 2);
-        let e = i64x1::new(1);
-        let r: i64x1 = transmute(vget_low_s64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vget_low_u8() {
-        let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let e = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let r: u8x8 = transmute(vget_low_u8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vget_low_u16() {
-        let a = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let e = u16x4::new(1, 2, 3, 4);
-        let r: u16x4 = transmute(vget_low_u16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vget_low_u32() {
-        let a = u32x4::new(1, 2, 3, 4);
-        let e = u32x2::new(1, 2);
-        let r: u32x2 = transmute(vget_low_u32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vget_low_u64() {
-        let a = u64x2::new(1, 2);
-        let e = u64x1::new(1);
-        let r: u64x1 = transmute(vget_low_u64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vget_low_p8() {
-        let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
-        let e = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let r: u8x8 = transmute(vget_low_p8(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vget_low_p16() {
-        let a = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let e = u16x4::new(1, 2, 3, 4);
-        let r: u16x4 = transmute(vget_low_p16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vget_low_f32() {
-        let a = f32x4::new(1.0, 2.0, 3.0, 4.0);
-        let e = f32x2::new(1.0, 2.0);
-        let r: f32x2 = transmute(vget_low_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vdupq_n_s8() {
+    fn test_vdupq_n_s8() {
         let v: i8 = 42;
         let e = i8x16::new(
             42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
         );
-        let r: i8x16 = transmute(vdupq_n_s8(v));
+        let r = i8x16::from(vdupq_n_s8(v));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdupq_n_s16() {
+    fn test_vdupq_n_s16() {
         let v: i16 = 64;
         let e = i16x8::new(64, 64, 64, 64, 64, 64, 64, 64);
-        let r: i16x8 = transmute(vdupq_n_s16(v));
+        let r = i16x8::from(vdupq_n_s16(v));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdupq_n_s32() {
+    fn test_vdupq_n_s32() {
         let v: i32 = 64;
         let e = i32x4::new(64, 64, 64, 64);
-        let r: i32x4 = transmute(vdupq_n_s32(v));
+        let r = i32x4::from(vdupq_n_s32(v));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdupq_n_s64() {
+    fn test_vdupq_n_s64() {
         let v: i64 = 64;
         let e = i64x2::new(64, 64);
-        let r: i64x2 = transmute(vdupq_n_s64(v));
+        let r = i64x2::from(vdupq_n_s64(v));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdupq_n_u8() {
+    fn test_vdupq_n_u8() {
         let v: u8 = 64;
         let e = u8x16::new(
             64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
         );
-        let r: u8x16 = transmute(vdupq_n_u8(v));
+        let r = u8x16::from(vdupq_n_u8(v));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdupq_n_u16() {
+    fn test_vdupq_n_u16() {
         let v: u16 = 64;
         let e = u16x8::new(64, 64, 64, 64, 64, 64, 64, 64);
-        let r: u16x8 = transmute(vdupq_n_u16(v));
+        let r = u16x8::from(vdupq_n_u16(v));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdupq_n_u32() {
+    fn test_vdupq_n_u32() {
         let v: u32 = 64;
         let e = u32x4::new(64, 64, 64, 64);
-        let r: u32x4 = transmute(vdupq_n_u32(v));
+        let r = u32x4::from(vdupq_n_u32(v));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdupq_n_u64() {
+    fn test_vdupq_n_u64() {
         let v: u64 = 64;
         let e = u64x2::new(64, 64);
-        let r: u64x2 = transmute(vdupq_n_u64(v));
+        let r = u64x2::from(vdupq_n_u64(v));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdupq_n_p8() {
+    fn test_vdupq_n_p8() {
         let v: p8 = 64;
         let e = u8x16::new(
             64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
         );
-        let r: u8x16 = transmute(vdupq_n_p8(v));
+        let r = u8x16::from(vdupq_n_p8(v));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdupq_n_p16() {
+    fn test_vdupq_n_p16() {
         let v: p16 = 64;
         let e = u16x8::new(64, 64, 64, 64, 64, 64, 64, 64);
-        let r: u16x8 = transmute(vdupq_n_p16(v));
+        let r = u16x8::from(vdupq_n_p16(v));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdupq_n_f32() {
+    fn test_vdupq_n_f32() {
         let v: f32 = 64.0;
         let e = f32x4::new(64.0, 64.0, 64.0, 64.0);
-        let r: f32x4 = transmute(vdupq_n_f32(v));
+        let r = f32x4::from(vdupq_n_f32(v));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdup_n_s8() {
+    fn test_vdup_n_s8() {
         let v: i8 = 64;
         let e = i8x8::new(64, 64, 64, 64, 64, 64, 64, 64);
-        let r: i8x8 = transmute(vdup_n_s8(v));
+        let r = i8x8::from(vdup_n_s8(v));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdup_n_s16() {
+    fn test_vdup_n_s16() {
         let v: i16 = 64;
         let e = i16x4::new(64, 64, 64, 64);
-        let r: i16x4 = transmute(vdup_n_s16(v));
+        let r = i16x4::from(vdup_n_s16(v));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdup_n_s32() {
+    fn test_vdup_n_s32() {
         let v: i32 = 64;
         let e = i32x2::new(64, 64);
-        let r: i32x2 = transmute(vdup_n_s32(v));
+        let r = i32x2::from(vdup_n_s32(v));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdup_n_s64() {
+    fn test_vdup_n_s64() {
         let v: i64 = 64;
         let e = i64x1::new(64);
-        let r: i64x1 = transmute(vdup_n_s64(v));
+        let r = i64x1::from(vdup_n_s64(v));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdup_n_u8() {
+    fn test_vdup_n_u8() {
         let v: u8 = 64;
         let e = u8x8::new(64, 64, 64, 64, 64, 64, 64, 64);
-        let r: u8x8 = transmute(vdup_n_u8(v));
+        let r = u8x8::from(vdup_n_u8(v));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdup_n_u16() {
+    fn test_vdup_n_u16() {
         let v: u16 = 64;
         let e = u16x4::new(64, 64, 64, 64);
-        let r: u16x4 = transmute(vdup_n_u16(v));
+        let r = u16x4::from(vdup_n_u16(v));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdup_n_u32() {
+    fn test_vdup_n_u32() {
         let v: u32 = 64;
         let e = u32x2::new(64, 64);
-        let r: u32x2 = transmute(vdup_n_u32(v));
+        let r = u32x2::from(vdup_n_u32(v));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdup_n_u64() {
+    fn test_vdup_n_u64() {
         let v: u64 = 64;
         let e = u64x1::new(64);
-        let r: u64x1 = transmute(vdup_n_u64(v));
+        let r = u64x1::from(vdup_n_u64(v));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdup_n_p8() {
+    fn test_vdup_n_p8() {
         let v: p8 = 64;
         let e = u8x8::new(64, 64, 64, 64, 64, 64, 64, 64);
-        let r: u8x8 = transmute(vdup_n_p8(v));
+        let r = u8x8::from(vdup_n_p8(v));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdup_n_p16() {
+    fn test_vdup_n_p16() {
         let v: p16 = 64;
         let e = u16x4::new(64, 64, 64, 64);
-        let r: u16x4 = transmute(vdup_n_p16(v));
+        let r = u16x4::from(vdup_n_p16(v));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vdup_n_f32() {
+    fn test_vdup_n_f32() {
         let v: f32 = 64.0;
         let e = f32x2::new(64.0, 64.0);
-        let r: f32x2 = transmute(vdup_n_f32(v));
+        let r = f32x2::from(vdup_n_f32(v));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vldrq_p128() {
+    fn test_vldrq_p128() {
         let v: [p128; 2] = [1, 2];
         let e: p128 = 2;
-        let r: p128 = vldrq_p128(v[1..].as_ptr());
+        let r: p128 = unsafe { vldrq_p128(v[1..].as_ptr()) };
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vstrq_p128() {
+    fn test_vstrq_p128() {
         let v: [p128; 2] = [1, 2];
         let e: p128 = 2;
         let mut r: p128 = 1;
-        vstrq_p128(&mut r, v[1]);
+        unsafe {
+            vstrq_p128(&mut r, v[1]);
+        }
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmov_n_s8() {
+    fn test_vmov_n_s8() {
         let v: i8 = 64;
         let e = i8x8::new(64, 64, 64, 64, 64, 64, 64, 64);
-        let r: i8x8 = transmute(vmov_n_s8(v));
+        let r = i8x8::from(vmov_n_s8(v));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmov_n_s16() {
+    fn test_vmov_n_s16() {
         let v: i16 = 64;
         let e = i16x4::new(64, 64, 64, 64);
-        let r: i16x4 = transmute(vmov_n_s16(v));
+        let r = i16x4::from(vmov_n_s16(v));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmov_n_s32() {
+    fn test_vmov_n_s32() {
         let v: i32 = 64;
         let e = i32x2::new(64, 64);
-        let r: i32x2 = transmute(vmov_n_s32(v));
+        let r = i32x2::from(vmov_n_s32(v));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmov_n_s64() {
+    fn test_vmov_n_s64() {
         let v: i64 = 64;
         let e = i64x1::new(64);
-        let r: i64x1 = transmute(vmov_n_s64(v));
+        let r = i64x1::from(vmov_n_s64(v));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmov_n_u8() {
+    fn test_vmov_n_u8() {
         let v: u8 = 64;
         let e = u8x8::new(64, 64, 64, 64, 64, 64, 64, 64);
-        let r: u8x8 = transmute(vmov_n_u8(v));
+        let r = u8x8::from(vmov_n_u8(v));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmov_n_u16() {
+    fn test_vmov_n_u16() {
         let v: u16 = 64;
         let e = u16x4::new(64, 64, 64, 64);
-        let r: u16x4 = transmute(vmov_n_u16(v));
+        let r = u16x4::from(vmov_n_u16(v));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmov_n_u32() {
+    fn test_vmov_n_u32() {
         let v: u32 = 64;
         let e = u32x2::new(64, 64);
-        let r: u32x2 = transmute(vmov_n_u32(v));
+        let r = u32x2::from(vmov_n_u32(v));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmov_n_u64() {
+    fn test_vmov_n_u64() {
         let v: u64 = 64;
         let e = u64x1::new(64);
-        let r: u64x1 = transmute(vmov_n_u64(v));
+        let r = u64x1::from(vmov_n_u64(v));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmov_n_p8() {
+    fn test_vmov_n_p8() {
         let v: p8 = 64;
         let e = u8x8::new(64, 64, 64, 64, 64, 64, 64, 64);
-        let r: u8x8 = transmute(vmov_n_p8(v));
+        let r = u8x8::from(vmov_n_p8(v));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmov_n_p16() {
+    fn test_vmov_n_p16() {
         let v: p16 = 64;
         let e = u16x4::new(64, 64, 64, 64);
-        let r: u16x4 = transmute(vmov_n_p16(v));
+        let r = u16x4::from(vmov_n_p16(v));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmov_n_f32() {
+    fn test_vmov_n_f32() {
         let v: f32 = 64.0;
         let e = f32x2::new(64.0, 64.0);
-        let r: f32x2 = transmute(vmov_n_f32(v));
+        let r = f32x2::from(vmov_n_f32(v));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmovq_n_s8() {
+    fn test_vmovq_n_s8() {
         let v: i8 = 64;
         let e = i8x16::new(
             64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
         );
-        let r: i8x16 = transmute(vmovq_n_s8(v));
+        let r = i8x16::from(vmovq_n_s8(v));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmovq_n_s16() {
+    fn test_vmovq_n_s16() {
         let v: i16 = 64;
         let e = i16x8::new(64, 64, 64, 64, 64, 64, 64, 64);
-        let r: i16x8 = transmute(vmovq_n_s16(v));
+        let r = i16x8::from(vmovq_n_s16(v));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmovq_n_s32() {
+    fn test_vmovq_n_s32() {
         let v: i32 = 64;
         let e = i32x4::new(64, 64, 64, 64);
-        let r: i32x4 = transmute(vmovq_n_s32(v));
+        let r = i32x4::from(vmovq_n_s32(v));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmovq_n_s64() {
+    fn test_vmovq_n_s64() {
         let v: i64 = 64;
         let e = i64x2::new(64, 64);
-        let r: i64x2 = transmute(vmovq_n_s64(v));
+        let r = i64x2::from(vmovq_n_s64(v));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmovq_n_u8() {
+    fn test_vmovq_n_u8() {
         let v: u8 = 64;
         let e = u8x16::new(
             64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
         );
-        let r: u8x16 = transmute(vmovq_n_u8(v));
+        let r = u8x16::from(vmovq_n_u8(v));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmovq_n_u16() {
+    fn test_vmovq_n_u16() {
         let v: u16 = 64;
         let e = u16x8::new(64, 64, 64, 64, 64, 64, 64, 64);
-        let r: u16x8 = transmute(vmovq_n_u16(v));
+        let r = u16x8::from(vmovq_n_u16(v));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmovq_n_u32() {
+    fn test_vmovq_n_u32() {
         let v: u32 = 64;
         let e = u32x4::new(64, 64, 64, 64);
-        let r: u32x4 = transmute(vmovq_n_u32(v));
+        let r = u32x4::from(vmovq_n_u32(v));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmovq_n_u64() {
+    fn test_vmovq_n_u64() {
         let v: u64 = 64;
         let e = u64x2::new(64, 64);
-        let r: u64x2 = transmute(vmovq_n_u64(v));
+        let r = u64x2::from(vmovq_n_u64(v));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmovq_n_p8() {
+    fn test_vmovq_n_p8() {
         let v: p8 = 64;
         let e = u8x16::new(
             64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
         );
-        let r: u8x16 = transmute(vmovq_n_p8(v));
+        let r = u8x16::from(vmovq_n_p8(v));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmovq_n_p16() {
+    fn test_vmovq_n_p16() {
         let v: p16 = 64;
         let e = u16x8::new(64, 64, 64, 64, 64, 64, 64, 64);
-        let r: u16x8 = transmute(vmovq_n_p16(v));
+        let r = u16x8::from(vmovq_n_p16(v));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmovq_n_f32() {
+    fn test_vmovq_n_f32() {
         let v: f32 = 64.0;
         let e = f32x4::new(64.0, 64.0, 64.0, 64.0);
-        let r: f32x4 = transmute(vmovq_n_f32(v));
+        let r = f32x4::from(vmovq_n_f32(v));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vgetq_lane_u64() {
-        let v = i64x2::new(1, 2);
-        let r = vgetq_lane_u64::<1>(transmute(v));
-        assert_eq!(r, 2);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vadd_s8() {
+    fn test_vadd_s8() {
         test_ari_s8(
             |i, j| vadd_s8(i, j),
             |a: i8, b: i8| -> i8 { a.overflowing_add(b).0 },
         );
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vaddq_s8() {
+    fn test_vaddq_s8() {
         testq_ari_s8(
             |i, j| vaddq_s8(i, j),
             |a: i8, b: i8| -> i8 { a.overflowing_add(b).0 },
         );
     }
     #[simd_test(enable = "neon")]
-    unsafe fn test_vadd_s16() {
+    fn test_vadd_s16() {
         test_ari_s16(
             |i, j| vadd_s16(i, j),
             |a: i16, b: i16| -> i16 { a.overflowing_add(b).0 },
         );
     }
     #[simd_test(enable = "neon")]
-    unsafe fn test_vaddq_s16() {
+    fn test_vaddq_s16() {
         testq_ari_s16(
             |i, j| vaddq_s16(i, j),
             |a: i16, b: i16| -> i16 { a.overflowing_add(b).0 },
         );
     }
     #[simd_test(enable = "neon")]
-    unsafe fn test_vadd_s32() {
+    fn test_vadd_s32() {
         test_ari_s32(
             |i, j| vadd_s32(i, j),
             |a: i32, b: i32| -> i32 { a.overflowing_add(b).0 },
         );
     }
     #[simd_test(enable = "neon")]
-    unsafe fn test_vaddq_s32() {
+    fn test_vaddq_s32() {
         testq_ari_s32(
             |i, j| vaddq_s32(i, j),
             |a: i32, b: i32| -> i32 { a.overflowing_add(b).0 },
@@ -2473,42 +2109,47 @@ mod tests {
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vadd_u8() {
+    fn test_vadd_u8() {
         test_ari_u8(
             |i, j| vadd_u8(i, j),
             |a: u8, b: u8| -> u8 { a.overflowing_add(b).0 },
         );
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vaddq_u8() {
+    fn test_vaddq_u8() {
         testq_ari_u8(
             |i, j| vaddq_u8(i, j),
             |a: u8, b: u8| -> u8 { a.overflowing_add(b).0 },
         );
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vadd_u16() {
+    fn test_vadd_u16() {
         test_ari_u16(
             |i, j| vadd_u16(i, j),
             |a: u16, b: u16| -> u16 { a.overflowing_add(b).0 },
         );
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vaddq_u16() {
+    fn test_vaddq_u16() {
         testq_ari_u16(
             |i, j| vaddq_u16(i, j),
             |a: u16, b: u16| -> u16 { a.overflowing_add(b).0 },
         );
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vadd_u32() {
+    fn test_vadd_u32() {
         test_ari_u32(
             |i, j| vadd_u32(i, j),
             |a: u32, b: u32| -> u32 { a.overflowing_add(b).0 },
         );
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vaddq_u32() {
+    fn test_vaddq_u32() {
         testq_ari_u32(
             |i, j| vaddq_u32(i, j),
             |a: u32, b: u32| -> u32 { a.overflowing_add(b).0 },
@@ -2516,142 +2157,77 @@ mod tests {
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vadd_f32() {
+    fn test_vadd_f32() {
         test_ari_f32(|i, j| vadd_f32(i, j), |a: f32, b: f32| -> f32 { a + b });
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vaddq_f32() {
+    fn test_vaddq_f32() {
         testq_ari_f32(|i, j| vaddq_f32(i, j), |a: f32, b: f32| -> f32 { a + b });
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vaddl_s8() {
+    fn test_vaddl_s8() {
         let v = i8::MAX;
         let a = i8x8::new(v, v, v, v, v, v, v, v);
         let v = 2 * (v as i16);
         let e = i16x8::new(v, v, v, v, v, v, v, v);
-        let r: i16x8 = transmute(vaddl_s8(transmute(a), transmute(a)));
+        let r = i16x8::from(vaddl_s8(a.into(), a.into()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vaddl_s16() {
+    fn test_vaddl_s16() {
         let v = i16::MAX;
         let a = i16x4::new(v, v, v, v);
         let v = 2 * (v as i32);
         let e = i32x4::new(v, v, v, v);
-        let r: i32x4 = transmute(vaddl_s16(transmute(a), transmute(a)));
+        let r = i32x4::from(vaddl_s16(a.into(), a.into()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vaddl_s32() {
+    fn test_vaddl_s32() {
         let v = i32::MAX;
         let a = i32x2::new(v, v);
         let v = 2 * (v as i64);
         let e = i64x2::new(v, v);
-        let r: i64x2 = transmute(vaddl_s32(transmute(a), transmute(a)));
+        let r = i64x2::from(vaddl_s32(a.into(), a.into()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vaddl_u8() {
+    fn test_vaddl_u8() {
         let v = u8::MAX;
         let a = u8x8::new(v, v, v, v, v, v, v, v);
         let v = 2 * (v as u16);
         let e = u16x8::new(v, v, v, v, v, v, v, v);
-        let r: u16x8 = transmute(vaddl_u8(transmute(a), transmute(a)));
+        let r = u16x8::from(vaddl_u8(a.into(), a.into()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vaddl_u16() {
+    fn test_vaddl_u16() {
         let v = u16::MAX;
         let a = u16x4::new(v, v, v, v);
         let v = 2 * (v as u32);
         let e = u32x4::new(v, v, v, v);
-        let r: u32x4 = transmute(vaddl_u16(transmute(a), transmute(a)));
+        let r = u32x4::from(vaddl_u16(a.into(), a.into()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vaddl_u32() {
+    fn test_vaddl_u32() {
         let v = u32::MAX;
         let a = u32x2::new(v, v);
         let v = 2 * (v as u64);
         let e = u64x2::new(v, v);
-        let r: u64x2 = transmute(vaddl_u32(transmute(a), transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vaddl_high_s8() {
-        let a = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
-        let x = i8::MAX;
-        let b = i8x16::new(x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x);
-        let x = x as i16;
-        let e = i16x8::new(x + 8, x + 9, x + 10, x + 11, x + 12, x + 13, x + 14, x + 15);
-        let r: i16x8 = transmute(vaddl_high_s8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vaddl_high_s16() {
-        let a = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let x = i16::MAX;
-        let b = i16x8::new(x, x, x, x, x, x, x, x);
-        let x = x as i32;
-        let e = i32x4::new(x + 4, x + 5, x + 6, x + 7);
-        let r: i32x4 = transmute(vaddl_high_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vaddl_high_s32() {
-        let a = i32x4::new(0, 1, 2, 3);
-        let x = i32::MAX;
-        let b = i32x4::new(x, x, x, x);
-        let x = x as i64;
-        let e = i64x2::new(x + 2, x + 3);
-        let r: i64x2 = transmute(vaddl_high_s32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vaddl_high_u8() {
-        let a = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
-        let x = u8::MAX;
-        let b = u8x16::new(x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x);
-        let x = x as u16;
-        let e = u16x8::new(x + 8, x + 9, x + 10, x + 11, x + 12, x + 13, x + 14, x + 15);
-        let r: u16x8 = transmute(vaddl_high_u8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vaddl_high_u16() {
-        let a = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let x = u16::MAX;
-        let b = u16x8::new(x, x, x, x, x, x, x, x);
-        let x = x as u32;
-        let e = u32x4::new(x + 4, x + 5, x + 6, x + 7);
-        let r: u32x4 = transmute(vaddl_high_u16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vaddl_high_u32() {
-        let a = u32x4::new(0, 1, 2, 3);
-        let x = u32::MAX;
-        let b = u32x4::new(x, x, x, x);
-        let x = x as u64;
-        let e = u64x2::new(x + 2, x + 3);
-        let r: u64x2 = transmute(vaddl_high_u32(transmute(a), transmute(b)));
+        let r = u64x2::from(vaddl_u32(a.into(), a.into()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vaddw_s8() {
+    fn test_vaddw_s8() {
         let x = i16::MAX;
         let a = i16x8::new(x, 1, 2, 3, 4, 5, 6, 7);
         let y = i8::MAX;
@@ -2667,36 +2243,36 @@ mod tests {
             6 + y,
             7 + y,
         );
-        let r: i16x8 = transmute(vaddw_s8(transmute(a), transmute(b)));
+        let r = i16x8::from(vaddw_s8(a.into(), b.into()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vaddw_s16() {
+    fn test_vaddw_s16() {
         let x = i32::MAX;
         let a = i32x4::new(x, 1, 2, 3);
         let y = i16::MAX;
         let b = i16x4::new(y, y, y, y);
         let y = y as i32;
         let e = i32x4::new(x.wrapping_add(y), 1 + y, 2 + y, 3 + y);
-        let r: i32x4 = transmute(vaddw_s16(transmute(a), transmute(b)));
+        let r = i32x4::from(vaddw_s16(a.into(), b.into()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vaddw_s32() {
+    fn test_vaddw_s32() {
         let x = i64::MAX;
         let a = i64x2::new(x, 1);
         let y = i32::MAX;
         let b = i32x2::new(y, y);
         let y = y as i64;
         let e = i64x2::new(x.wrapping_add(y), 1 + y);
-        let r: i64x2 = transmute(vaddw_s32(transmute(a), transmute(b)));
+        let r = i64x2::from(vaddw_s32(a.into(), b.into()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vaddw_u8() {
+    fn test_vaddw_u8() {
         let x = u16::MAX;
         let a = u16x8::new(x, 1, 2, 3, 4, 5, 6, 7);
         let y = u8::MAX;
@@ -2712,255 +2288,165 @@ mod tests {
             6 + y,
             7 + y,
         );
-        let r: u16x8 = transmute(vaddw_u8(transmute(a), transmute(b)));
+        let r = u16x8::from(vaddw_u8(a.into(), b.into()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vaddw_u16() {
+    fn test_vaddw_u16() {
         let x = u32::MAX;
         let a = u32x4::new(x, 1, 2, 3);
         let y = u16::MAX;
         let b = u16x4::new(y, y, y, y);
         let y = y as u32;
         let e = u32x4::new(x.wrapping_add(y), 1 + y, 2 + y, 3 + y);
-        let r: u32x4 = transmute(vaddw_u16(transmute(a), transmute(b)));
+        let r = u32x4::from(vaddw_u16(a.into(), b.into()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vaddw_u32() {
+    fn test_vaddw_u32() {
         let x = u64::MAX;
         let a = u64x2::new(x, 1);
         let y = u32::MAX;
         let b = u32x2::new(y, y);
         let y = y as u64;
         let e = u64x2::new(x.wrapping_add(y), 1 + y);
-        let r: u64x2 = transmute(vaddw_u32(transmute(a), transmute(b)));
+        let r = u64x2::from(vaddw_u32(a.into(), b.into()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vaddw_high_s8() {
-        let x = i16::MAX;
-        let a = i16x8::new(x, 1, 2, 3, 4, 5, 6, 7);
-        let y = i8::MAX;
-        let b = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, y, y, y, y, y, y, y, y);
-        let y = y as i16;
-        let e = i16x8::new(
-            x.wrapping_add(y),
-            1 + y,
-            2 + y,
-            3 + y,
-            4 + y,
-            5 + y,
-            6 + y,
-            7 + y,
-        );
-        let r: i16x8 = transmute(vaddw_high_s8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vaddw_high_s16() {
-        let x = i32::MAX;
-        let a = i32x4::new(x, 1, 2, 3);
-        let y = i16::MAX;
-        let b = i16x8::new(0, 0, 0, 0, y, y, y, y);
-        let y = y as i32;
-        let e = i32x4::new(x.wrapping_add(y), 1 + y, 2 + y, 3 + y);
-        let r: i32x4 = transmute(vaddw_high_s16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vaddw_high_s32() {
-        let x = i64::MAX;
-        let a = i64x2::new(x, 1);
-        let y = i32::MAX;
-        let b = i32x4::new(0, 0, y, y);
-        let y = y as i64;
-        let e = i64x2::new(x.wrapping_add(y), 1 + y);
-        let r: i64x2 = transmute(vaddw_high_s32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vaddw_high_u8() {
-        let x = u16::MAX;
-        let a = u16x8::new(x, 1, 2, 3, 4, 5, 6, 7);
-        let y = u8::MAX;
-        let b = u8x16::new(0, 0, 0, 0, 0, 0, 0, 0, y, y, y, y, y, y, y, y);
-        let y = y as u16;
-        let e = u16x8::new(
-            x.wrapping_add(y),
-            1 + y,
-            2 + y,
-            3 + y,
-            4 + y,
-            5 + y,
-            6 + y,
-            7 + y,
-        );
-        let r: u16x8 = transmute(vaddw_high_u8(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vaddw_high_u16() {
-        let x = u32::MAX;
-        let a = u32x4::new(x, 1, 2, 3);
-        let y = u16::MAX;
-        let b = u16x8::new(0, 0, 0, 0, y, y, y, y);
-        let y = y as u32;
-        let e = u32x4::new(x.wrapping_add(y), 1 + y, 2 + y, 3 + y);
-        let r: u32x4 = transmute(vaddw_high_u16(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vaddw_high_u32() {
-        let x = u64::MAX;
-        let a = u64x2::new(x, 1);
-        let y = u32::MAX;
-        let b = u32x4::new(0, 0, y, y);
-        let y = y as u64;
-        let e = u64x2::new(x.wrapping_add(y), 1 + y);
-        let r: u64x2 = transmute(vaddw_high_u32(transmute(a), transmute(b)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vmvn_s8() {
+    fn test_vmvn_s8() {
         let a = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
         let e = i8x8::new(-1, -2, -3, -4, -5, -6, -7, -8);
-        let r: i8x8 = transmute(vmvn_s8(transmute(a)));
+        let r = i8x8::from(vmvn_s8(a.into()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmvnq_s8() {
+    fn test_vmvnq_s8() {
         let a = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let e = i8x16::new(
             -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16,
         );
-        let r: i8x16 = transmute(vmvnq_s8(transmute(a)));
+        let r = i8x16::from(vmvnq_s8(a.into()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmvn_s16() {
+    fn test_vmvn_s16() {
         let a = i16x4::new(0, 1, 2, 3);
         let e = i16x4::new(-1, -2, -3, -4);
-        let r: i16x4 = transmute(vmvn_s16(transmute(a)));
+        let r = i16x4::from(vmvn_s16(a.into()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmvnq_s16() {
+    fn test_vmvnq_s16() {
         let a = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
         let e = i16x8::new(-1, -2, -3, -4, -5, -6, -7, -8);
-        let r: i16x8 = transmute(vmvnq_s16(transmute(a)));
+        let r = i16x8::from(vmvnq_s16(a.into()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmvn_s32() {
+    fn test_vmvn_s32() {
         let a = i32x2::new(0, 1);
         let e = i32x2::new(-1, -2);
-        let r: i32x2 = transmute(vmvn_s32(transmute(a)));
+        let r = i32x2::from(vmvn_s32(a.into()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmvnq_s32() {
+    fn test_vmvnq_s32() {
         let a = i32x4::new(0, 1, 2, 3);
         let e = i32x4::new(-1, -2, -3, -4);
-        let r: i32x4 = transmute(vmvnq_s32(transmute(a)));
+        let r = i32x4::from(vmvnq_s32(a.into()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmvn_u8() {
+    fn test_vmvn_u8() {
         let a = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
         let e = u8x8::new(255, 254, 253, 252, 251, 250, 249, 248);
-        let r: u8x8 = transmute(vmvn_u8(transmute(a)));
+        let r = u8x8::from(vmvn_u8(a.into()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmvnq_u8() {
+    fn test_vmvnq_u8() {
         let a = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let e = u8x16::new(
             255, 254, 253, 252, 251, 250, 249, 248, 247, 246, 245, 244, 243, 242, 241, 240,
         );
-        let r: u8x16 = transmute(vmvnq_u8(transmute(a)));
+        let r = u8x16::from(vmvnq_u8(a.into()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmvn_u16() {
+    fn test_vmvn_u16() {
         let a = u16x4::new(0, 1, 2, 3);
         let e = u16x4::new(65_535, 65_534, 65_533, 65_532);
-        let r: u16x4 = transmute(vmvn_u16(transmute(a)));
+        let r = u16x4::from(vmvn_u16(a.into()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmvnq_u16() {
+    fn test_vmvnq_u16() {
         let a = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
         let e = u16x8::new(
             65_535, 65_534, 65_533, 65_532, 65_531, 65_530, 65_529, 65_528,
         );
-        let r: u16x8 = transmute(vmvnq_u16(transmute(a)));
+        let r = u16x8::from(vmvnq_u16(a.into()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmvn_u32() {
+    fn test_vmvn_u32() {
         let a = u32x2::new(0, 1);
         let e = u32x2::new(4_294_967_295, 4_294_967_294);
-        let r: u32x2 = transmute(vmvn_u32(transmute(a)));
+        let r = u32x2::from(vmvn_u32(a.into()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmvnq_u32() {
+    fn test_vmvnq_u32() {
         let a = u32x4::new(0, 1, 2, 3);
         let e = u32x4::new(4_294_967_295, 4_294_967_294, 4_294_967_293, 4_294_967_292);
-        let r: u32x4 = transmute(vmvnq_u32(transmute(a)));
+        let r = u32x4::from(vmvnq_u32(a.into()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmvn_p8() {
+    fn test_vmvn_p8() {
         let a = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
         let e = u8x8::new(255, 254, 253, 252, 251, 250, 249, 248);
-        let r: u8x8 = transmute(vmvn_p8(transmute(a)));
+        let r = u8x8::from(vmvn_p8(a.into()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmvnq_p8() {
+    fn test_vmvnq_p8() {
         let a = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let e = u8x16::new(
             255, 254, 253, 252, 251, 250, 249, 248, 247, 246, 245, 244, 243, 242, 241, 240,
         );
-        let r: u8x16 = transmute(vmvnq_p8(transmute(a)));
+        let r = u8x16::from(vmvnq_p8(a.into()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vbic_s8() {
+    fn test_vbic_s8() {
         let a = i8x8::new(0, -1, -2, -3, -4, -5, -6, -7);
         let b = i8x8::new(1, 1, 1, 1, 1, 1, 1, 1);
         let e = i8x8::new(0, -2, -2, -4, -4, -6, -6, -8);
-        let r: i8x8 = transmute(vbic_s8(transmute(a), transmute(b)));
+        let r = i8x8::from(vbic_s8(a.into(), b.into()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vbicq_s8() {
+    fn test_vbicq_s8() {
         let a = i8x16::new(
             0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15,
         );
@@ -2968,138 +2454,138 @@ mod tests {
         let e = i8x16::new(
             0, -2, -2, -4, -4, -6, -6, -8, -8, -10, -10, -12, -12, -14, -14, -16,
         );
-        let r: i8x16 = transmute(vbicq_s8(transmute(a), transmute(b)));
+        let r = i8x16::from(vbicq_s8(a.into(), b.into()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vbic_s16() {
+    fn test_vbic_s16() {
         let a = i16x4::new(0, -1, -2, -3);
         let b = i16x4::new(1, 1, 1, 1);
         let e = i16x4::new(0, -2, -2, -4);
-        let r: i16x4 = transmute(vbic_s16(transmute(a), transmute(b)));
+        let r = i16x4::from(vbic_s16(a.into(), b.into()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vbicq_s16() {
+    fn test_vbicq_s16() {
         let a = i16x8::new(0, -1, -2, -3, -4, -5, -6, -7);
         let b = i16x8::new(1, 1, 1, 1, 1, 1, 1, 1);
         let e = i16x8::new(0, -2, -2, -4, -4, -6, -6, -8);
-        let r: i16x8 = transmute(vbicq_s16(transmute(a), transmute(b)));
+        let r = i16x8::from(vbicq_s16(a.into(), b.into()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vbic_s32() {
+    fn test_vbic_s32() {
         let a = i32x2::new(0, -1);
         let b = i32x2::new(1, 1);
         let e = i32x2::new(0, -2);
-        let r: i32x2 = transmute(vbic_s32(transmute(a), transmute(b)));
+        let r = i32x2::from(vbic_s32(a.into(), b.into()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vbicq_s32() {
+    fn test_vbicq_s32() {
         let a = i32x4::new(0, -1, -2, -3);
         let b = i32x4::new(1, 1, 1, 1);
         let e = i32x4::new(0, -2, -2, -4);
-        let r: i32x4 = transmute(vbicq_s32(transmute(a), transmute(b)));
+        let r = i32x4::from(vbicq_s32(a.into(), b.into()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vbic_s64() {
+    fn test_vbic_s64() {
         let a = i64x1::new(-1);
         let b = i64x1::new(1);
         let e = i64x1::new(-2);
-        let r: i64x1 = transmute(vbic_s64(transmute(a), transmute(b)));
+        let r = i64x1::from(vbic_s64(a.into(), b.into()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vbicq_s64() {
+    fn test_vbicq_s64() {
         let a = i64x2::new(0, -1);
         let b = i64x2::new(1, 1);
         let e = i64x2::new(0, -2);
-        let r: i64x2 = transmute(vbicq_s64(transmute(a), transmute(b)));
+        let r = i64x2::from(vbicq_s64(a.into(), b.into()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vbic_u8() {
+    fn test_vbic_u8() {
         let a = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
         let b = u8x8::new(1, 1, 1, 1, 1, 1, 1, 1);
         let e = u8x8::new(0, 0, 2, 2, 4, 4, 6, 6);
-        let r: u8x8 = transmute(vbic_u8(transmute(a), transmute(b)));
+        let r = u8x8::from(vbic_u8(a.into(), b.into()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vbicq_u8() {
+    fn test_vbicq_u8() {
         let a = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let b = u8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
         let e = u8x16::new(0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14);
-        let r: u8x16 = transmute(vbicq_u8(transmute(a), transmute(b)));
+        let r = u8x16::from(vbicq_u8(a.into(), b.into()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vbic_u16() {
+    fn test_vbic_u16() {
         let a = u16x4::new(0, 1, 2, 3);
         let b = u16x4::new(1, 1, 1, 1);
         let e = u16x4::new(0, 0, 2, 2);
-        let r: u16x4 = transmute(vbic_u16(transmute(a), transmute(b)));
+        let r = u16x4::from(vbic_u16(a.into(), b.into()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vbicq_u16() {
+    fn test_vbicq_u16() {
         let a = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
         let b = u16x8::new(1, 1, 1, 1, 1, 1, 1, 1);
         let e = u16x8::new(0, 0, 2, 2, 4, 4, 6, 6);
-        let r: u16x8 = transmute(vbicq_u16(transmute(a), transmute(b)));
+        let r = u16x8::from(vbicq_u16(a.into(), b.into()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vbic_u32() {
+    fn test_vbic_u32() {
         let a = u32x2::new(0, 1);
         let b = u32x2::new(1, 1);
         let e = u32x2::new(0, 0);
-        let r: u32x2 = transmute(vbic_u32(transmute(a), transmute(b)));
+        let r = u32x2::from(vbic_u32(a.into(), b.into()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vbicq_u32() {
+    fn test_vbicq_u32() {
         let a = u32x4::new(0, 1, 2, 3);
         let b = u32x4::new(1, 1, 1, 1);
         let e = u32x4::new(0, 0, 2, 2);
-        let r: u32x4 = transmute(vbicq_u32(transmute(a), transmute(b)));
+        let r = u32x4::from(vbicq_u32(a.into(), b.into()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vbic_u64() {
+    fn test_vbic_u64() {
         let a = u64x1::new(1);
         let b = u64x1::new(1);
         let e = u64x1::new(0);
-        let r: u64x1 = transmute(vbic_u64(transmute(a), transmute(b)));
+        let r = u64x1::from(vbic_u64(a.into(), b.into()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vbicq_u64() {
+    fn test_vbicq_u64() {
         let a = u64x2::new(0, 1);
         let b = u64x2::new(1, 1);
         let e = u64x2::new(0, 0);
-        let r: u64x2 = transmute(vbicq_u64(transmute(a), transmute(b)));
+        let r = u64x2::from(vbicq_u64(a.into(), b.into()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vbsl_s8() {
+    fn test_vbsl_s8() {
         let a = u8x8::new(u8::MAX, 1, u8::MAX, 2, u8::MAX, 0, u8::MAX, 0);
         let b = i8x8::new(
             i8::MAX,
@@ -3131,38 +2617,42 @@ mod tests {
             i8::MAX,
             i8::MIN,
         );
-        let r: i8x8 = transmute(vbsl_s8(transmute(a), transmute(b), transmute(c)));
+        let r = i8x8::from(vbsl_s8(a.into(), b.into(), c.into()));
         assert_eq!(r, e);
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vbsl_s16() {
+    fn test_vbsl_s16() {
         let a = u16x4::new(u16::MAX, 0, 1, 2);
         let b = i16x4::new(i16::MAX, i16::MAX, i16::MAX, i16::MAX);
         let c = i16x4::new(i16::MIN, i16::MIN, i16::MIN, i16::MIN);
         let e = i16x4::new(i16::MAX, i16::MIN, i16::MIN | 1, i16::MIN | 2);
-        let r: i16x4 = transmute(vbsl_s16(transmute(a), transmute(b), transmute(c)));
+        let r = i16x4::from(vbsl_s16(a.into(), b.into(), c.into()));
         assert_eq!(r, e);
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vbsl_s32() {
+    fn test_vbsl_s32() {
         let a = u32x2::new(u32::MAX, 1);
         let b = i32x2::new(i32::MAX, i32::MAX);
         let c = i32x2::new(i32::MIN, i32::MIN);
         let e = i32x2::new(i32::MAX, i32::MIN | 1);
-        let r: i32x2 = transmute(vbsl_s32(transmute(a), transmute(b), transmute(c)));
+        let r = i32x2::from(vbsl_s32(a.into(), b.into(), c.into()));
         assert_eq!(r, e);
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vbsl_s64() {
+    fn test_vbsl_s64() {
         let a = u64x1::new(1);
         let b = i64x1::new(i64::MAX);
         let c = i64x1::new(i64::MIN);
         let e = i64x1::new(i64::MIN | 1);
-        let r: i64x1 = transmute(vbsl_s64(transmute(a), transmute(b), transmute(c)));
+        let r = i64x1::from(vbsl_s64(a.into(), b.into(), c.into()));
         assert_eq!(r, e);
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vbsl_u8() {
+    fn test_vbsl_u8() {
         let a = u8x8::new(u8::MAX, 1, u8::MAX, 2, u8::MAX, 0, u8::MAX, 0);
         let b = u8x8::new(
             u8::MAX,
@@ -3185,47 +2675,52 @@ mod tests {
             u8::MIN,
         );
         let e = u8x8::new(u8::MAX, 1, u8::MAX, 2, u8::MAX, u8::MIN, u8::MAX, u8::MIN);
-        let r: u8x8 = transmute(vbsl_u8(transmute(a), transmute(b), transmute(c)));
+        let r = u8x8::from(vbsl_u8(a.into(), b.into(), c.into()));
         assert_eq!(r, e);
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vbsl_u16() {
+    fn test_vbsl_u16() {
         let a = u16x4::new(u16::MAX, 0, 1, 2);
         let b = u16x4::new(u16::MAX, u16::MAX, u16::MAX, u16::MAX);
         let c = u16x4::new(u16::MIN, u16::MIN, u16::MIN, u16::MIN);
         let e = u16x4::new(u16::MAX, 0, 1, 2);
-        let r: u16x4 = transmute(vbsl_u16(transmute(a), transmute(b), transmute(c)));
+        let r = u16x4::from(vbsl_u16(a.into(), b.into(), c.into()));
         assert_eq!(r, e);
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vbsl_u32() {
+    fn test_vbsl_u32() {
         let a = u32x2::new(u32::MAX, 2);
         let b = u32x2::new(u32::MAX, u32::MAX);
         let c = u32x2::new(u32::MIN, u32::MIN);
         let e = u32x2::new(u32::MAX, 2);
-        let r: u32x2 = transmute(vbsl_u32(transmute(a), transmute(b), transmute(c)));
+        let r = u32x2::from(vbsl_u32(a.into(), b.into(), c.into()));
         assert_eq!(r, e);
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vbsl_u64() {
+    fn test_vbsl_u64() {
         let a = u64x1::new(2);
         let b = u64x1::new(u64::MAX);
         let c = u64x1::new(u64::MIN);
         let e = u64x1::new(2);
-        let r: u64x1 = transmute(vbsl_u64(transmute(a), transmute(b), transmute(c)));
+        let r = u64x1::from(vbsl_u64(a.into(), b.into(), c.into()));
         assert_eq!(r, e);
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vbsl_f32() {
+    fn test_vbsl_f32() {
         let a = u32x2::new(1, 0x80000000);
         let b = f32x2::new(8388609f32, -1.23f32);
         let c = f32x2::new(2097152f32, 2.34f32);
         let e = f32x2::new(2097152.25f32, -2.34f32);
-        let r: f32x2 = transmute(vbsl_f32(transmute(a), transmute(b), transmute(c)));
+        let r = f32x2::from(vbsl_f32(a.into(), b.into(), c.into()));
         assert_eq!(r, e);
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vbsl_p8() {
+    fn test_vbsl_p8() {
         let a = u8x8::new(u8::MAX, 1, u8::MAX, 2, u8::MAX, 0, u8::MAX, 0);
         let b = u8x8::new(
             u8::MAX,
@@ -3248,20 +2743,22 @@ mod tests {
             u8::MIN,
         );
         let e = u8x8::new(u8::MAX, 1, u8::MAX, 2, u8::MAX, u8::MIN, u8::MAX, u8::MIN);
-        let r: u8x8 = transmute(vbsl_p8(transmute(a), transmute(b), transmute(c)));
+        let r = u8x8::from(vbsl_p8(a.into(), b.into(), c.into()));
         assert_eq!(r, e);
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vbsl_p16() {
+    fn test_vbsl_p16() {
         let a = u16x4::new(u16::MAX, 0, 1, 2);
         let b = u16x4::new(u16::MAX, u16::MAX, u16::MAX, u16::MAX);
         let c = u16x4::new(u16::MIN, u16::MIN, u16::MIN, u16::MIN);
         let e = u16x4::new(u16::MAX, 0, 1, 2);
-        let r: u16x4 = transmute(vbsl_p16(transmute(a), transmute(b), transmute(c)));
+        let r = u16x4::from(vbsl_p16(a.into(), b.into(), c.into()));
         assert_eq!(r, e);
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vbslq_s8() {
+    fn test_vbslq_s8() {
         let a = u8x16::new(
             u8::MAX,
             1,
@@ -3334,11 +2831,12 @@ mod tests {
             i8::MAX,
             i8::MIN,
         );
-        let r: i8x16 = transmute(vbslq_s8(transmute(a), transmute(b), transmute(c)));
+        let r = i8x16::from(vbslq_s8(a.into(), b.into(), c.into()));
         assert_eq!(r, e);
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vbslq_s16() {
+    fn test_vbslq_s16() {
         let a = u16x8::new(u16::MAX, 1, u16::MAX, 2, u16::MAX, 0, u16::MAX, 0);
         let b = i16x8::new(
             i16::MAX,
@@ -3370,29 +2868,32 @@ mod tests {
             i16::MAX,
             i16::MIN,
         );
-        let r: i16x8 = transmute(vbslq_s16(transmute(a), transmute(b), transmute(c)));
+        let r = i16x8::from(vbslq_s16(a.into(), b.into(), c.into()));
         assert_eq!(r, e);
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vbslq_s32() {
+    fn test_vbslq_s32() {
         let a = u32x4::new(u32::MAX, 1, u32::MAX, 2);
         let b = i32x4::new(i32::MAX, i32::MAX, i32::MAX, i32::MAX);
         let c = i32x4::new(i32::MIN, i32::MIN, i32::MIN, i32::MIN);
         let e = i32x4::new(i32::MAX, i32::MIN | 1, i32::MAX, i32::MIN | 2);
-        let r: i32x4 = transmute(vbslq_s32(transmute(a), transmute(b), transmute(c)));
+        let r = i32x4::from(vbslq_s32(a.into(), b.into(), c.into()));
         assert_eq!(r, e);
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vbslq_s64() {
+    fn test_vbslq_s64() {
         let a = u64x2::new(u64::MAX, 1);
         let b = i64x2::new(i64::MAX, i64::MAX);
         let c = i64x2::new(i64::MIN, i64::MIN);
         let e = i64x2::new(i64::MAX, i64::MIN | 1);
-        let r: i64x2 = transmute(vbslq_s64(transmute(a), transmute(b), transmute(c)));
+        let r = i64x2::from(vbslq_s64(a.into(), b.into(), c.into()));
         assert_eq!(r, e);
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vbslq_u8() {
+    fn test_vbslq_u8() {
         let a = u8x16::new(
             u8::MAX,
             1,
@@ -3465,11 +2966,12 @@ mod tests {
             u8::MAX,
             u8::MIN,
         );
-        let r: u8x16 = transmute(vbslq_u8(transmute(a), transmute(b), transmute(c)));
+        let r = u8x16::from(vbslq_u8(a.into(), b.into(), c.into()));
         assert_eq!(r, e);
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vbslq_u16() {
+    fn test_vbslq_u16() {
         let a = u16x8::new(u16::MAX, 1, u16::MAX, 2, u16::MAX, 0, u16::MAX, 0);
         let b = u16x8::new(
             u16::MAX,
@@ -3501,38 +3003,42 @@ mod tests {
             u16::MAX,
             u16::MIN,
         );
-        let r: u16x8 = transmute(vbslq_u16(transmute(a), transmute(b), transmute(c)));
+        let r = u16x8::from(vbslq_u16(a.into(), b.into(), c.into()));
         assert_eq!(r, e);
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vbslq_u32() {
+    fn test_vbslq_u32() {
         let a = u32x4::new(u32::MAX, 1, u32::MAX, 2);
         let b = u32x4::new(u32::MAX, u32::MAX, u32::MAX, u32::MAX);
         let c = u32x4::new(u32::MIN, u32::MIN, u32::MIN, u32::MIN);
         let e = u32x4::new(u32::MAX, 1, u32::MAX, 2);
-        let r: u32x4 = transmute(vbslq_u32(transmute(a), transmute(b), transmute(c)));
+        let r = u32x4::from(vbslq_u32(a.into(), b.into(), c.into()));
         assert_eq!(r, e);
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vbslq_u64() {
+    fn test_vbslq_u64() {
         let a = u64x2::new(u64::MAX, 1);
         let b = u64x2::new(u64::MAX, u64::MAX);
         let c = u64x2::new(u64::MIN, u64::MIN);
         let e = u64x2::new(u64::MAX, 1);
-        let r: u64x2 = transmute(vbslq_u64(transmute(a), transmute(b), transmute(c)));
+        let r = u64x2::from(vbslq_u64(a.into(), b.into(), c.into()));
         assert_eq!(r, e);
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vbslq_f32() {
+    fn test_vbslq_f32() {
         let a = u32x4::new(u32::MAX, 0, 1, 0x80000000);
         let b = f32x4::new(-1.23f32, -1.23f32, 8388609f32, -1.23f32);
         let c = f32x4::new(2.34f32, 2.34f32, 2097152f32, 2.34f32);
         let e = f32x4::new(-1.23f32, 2.34f32, 2097152.25f32, -2.34f32);
-        let r: f32x4 = transmute(vbslq_f32(transmute(a), transmute(b), transmute(c)));
+        let r = f32x4::from(vbslq_f32(a.into(), b.into(), c.into()));
         assert_eq!(r, e);
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vbslq_p8() {
+    fn test_vbslq_p8() {
         let a = u8x16::new(
             u8::MAX,
             1,
@@ -3605,11 +3111,12 @@ mod tests {
             u8::MAX,
             u8::MIN,
         );
-        let r: u8x16 = transmute(vbslq_p8(transmute(a), transmute(b), transmute(c)));
+        let r = u8x16::from(vbslq_p8(a.into(), b.into(), c.into()));
         assert_eq!(r, e);
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vbslq_p16() {
+    fn test_vbslq_p16() {
         let a = u16x8::new(u16::MAX, 1, u16::MAX, 2, u16::MAX, 0, u16::MAX, 0);
         let b = u16x8::new(
             u16::MAX,
@@ -3641,21 +3148,21 @@ mod tests {
             u16::MAX,
             u16::MIN,
         );
-        let r: u16x8 = transmute(vbslq_p16(transmute(a), transmute(b), transmute(c)));
+        let r = u16x8::from(vbslq_p16(a.into(), b.into(), c.into()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vorn_s8() {
+    fn test_vorn_s8() {
         let a = i8x8::new(0, -1, -2, -3, -4, -5, -6, -7);
         let b = i8x8::new(-2, -2, -2, -2, -2, -2, -2, -2);
         let e = i8x8::new(1, -1, -1, -3, -3, -5, -5, -7);
-        let r: i8x8 = transmute(vorn_s8(transmute(a), transmute(b)));
+        let r = i8x8::from(vorn_s8(a.into(), b.into()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vornq_s8() {
+    fn test_vornq_s8() {
         let a = i8x16::new(
             0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15,
         );
@@ -3665,475 +3172,522 @@ mod tests {
         let e = i8x16::new(
             1, -1, -1, -3, -3, -5, -5, -7, -7, -9, -9, -11, -11, -13, -13, -15,
         );
-        let r: i8x16 = transmute(vornq_s8(transmute(a), transmute(b)));
+        let r = i8x16::from(vornq_s8(a.into(), b.into()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vorn_s16() {
+    fn test_vorn_s16() {
         let a = i16x4::new(0, -1, -2, -3);
         let b = i16x4::new(-2, -2, -2, -2);
         let e = i16x4::new(1, -1, -1, -3);
-        let r: i16x4 = transmute(vorn_s16(transmute(a), transmute(b)));
+        let r = i16x4::from(vorn_s16(a.into(), b.into()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vornq_s16() {
+    fn test_vornq_s16() {
         let a = i16x8::new(0, -1, -2, -3, -4, -5, -6, -7);
         let b = i16x8::new(-2, -2, -2, -2, -2, -2, -2, -2);
         let e = i16x8::new(1, -1, -1, -3, -3, -5, -5, -7);
-        let r: i16x8 = transmute(vornq_s16(transmute(a), transmute(b)));
+        let r = i16x8::from(vornq_s16(a.into(), b.into()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vorn_s32() {
+    fn test_vorn_s32() {
         let a = i32x2::new(0, -1);
         let b = i32x2::new(-2, -2);
         let e = i32x2::new(1, -1);
-        let r: i32x2 = transmute(vorn_s32(transmute(a), transmute(b)));
+        let r = i32x2::from(vorn_s32(a.into(), b.into()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vornq_s32() {
+    fn test_vornq_s32() {
         let a = i32x4::new(0, -1, -2, -3);
         let b = i32x4::new(-2, -2, -2, -2);
         let e = i32x4::new(1, -1, -1, -3);
-        let r: i32x4 = transmute(vornq_s32(transmute(a), transmute(b)));
+        let r = i32x4::from(vornq_s32(a.into(), b.into()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vorn_s64() {
+    fn test_vorn_s64() {
         let a = i64x1::new(0);
         let b = i64x1::new(-2);
         let e = i64x1::new(1);
-        let r: i64x1 = transmute(vorn_s64(transmute(a), transmute(b)));
+        let r = i64x1::from(vorn_s64(a.into(), b.into()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vornq_s64() {
+    fn test_vornq_s64() {
         let a = i64x2::new(0, -1);
         let b = i64x2::new(-2, -2);
         let e = i64x2::new(1, -1);
-        let r: i64x2 = transmute(vornq_s64(transmute(a), transmute(b)));
+        let r = i64x2::from(vornq_s64(a.into(), b.into()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vorn_u8() {
+    fn test_vorn_u8() {
         let a = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
         let t = u8::MAX - 1;
         let b = u8x8::new(t, t, t, t, t, t, t, t);
         let e = u8x8::new(1, 1, 3, 3, 5, 5, 7, 7);
-        let r: u8x8 = transmute(vorn_u8(transmute(a), transmute(b)));
+        let r = u8x8::from(vorn_u8(a.into(), b.into()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vornq_u8() {
+    fn test_vornq_u8() {
         let a = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let t = u8::MAX - 1;
         let b = u8x16::new(t, t, t, t, t, t, t, t, t, t, t, t, t, t, t, t);
         let e = u8x16::new(1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15);
-        let r: u8x16 = transmute(vornq_u8(transmute(a), transmute(b)));
+        let r = u8x16::from(vornq_u8(a.into(), b.into()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vorn_u16() {
+    fn test_vorn_u16() {
         let a = u16x4::new(0, 1, 2, 3);
         let t = u16::MAX - 1;
         let b = u16x4::new(t, t, t, t);
         let e = u16x4::new(1, 1, 3, 3);
-        let r: u16x4 = transmute(vorn_u16(transmute(a), transmute(b)));
+        let r = u16x4::from(vorn_u16(a.into(), b.into()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vornq_u16() {
+    fn test_vornq_u16() {
         let a = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
         let t = u16::MAX - 1;
         let b = u16x8::new(t, t, t, t, t, t, t, t);
         let e = u16x8::new(1, 1, 3, 3, 5, 5, 7, 7);
-        let r: u16x8 = transmute(vornq_u16(transmute(a), transmute(b)));
+        let r = u16x8::from(vornq_u16(a.into(), b.into()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vorn_u32() {
+    fn test_vorn_u32() {
         let a = u32x2::new(0, 1);
         let t = u32::MAX - 1;
         let b = u32x2::new(t, t);
         let e = u32x2::new(1, 1);
-        let r: u32x2 = transmute(vorn_u32(transmute(a), transmute(b)));
+        let r = u32x2::from(vorn_u32(a.into(), b.into()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vornq_u32() {
+    fn test_vornq_u32() {
         let a = u32x4::new(0, 1, 2, 3);
         let t = u32::MAX - 1;
         let b = u32x4::new(t, t, t, t);
         let e = u32x4::new(1, 1, 3, 3);
-        let r: u32x4 = transmute(vornq_u32(transmute(a), transmute(b)));
+        let r = u32x4::from(vornq_u32(a.into(), b.into()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vorn_u64() {
+    fn test_vorn_u64() {
         let a = u64x1::new(0);
         let t = u64::MAX - 1;
         let b = u64x1::new(t);
         let e = u64x1::new(1);
-        let r: u64x1 = transmute(vorn_u64(transmute(a), transmute(b)));
+        let r = u64x1::from(vorn_u64(a.into(), b.into()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vornq_u64() {
+    fn test_vornq_u64() {
         let a = u64x2::new(0, 1);
         let t = u64::MAX - 1;
         let b = u64x2::new(t, t);
         let e = u64x2::new(1, 1);
-        let r: u64x2 = transmute(vornq_u64(transmute(a), transmute(b)));
+        let r = u64x2::from(vornq_u64(a.into(), b.into()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmovn_s16() {
+    fn test_vmovn_s16() {
         let a = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
         let e = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let r: i8x8 = transmute(vmovn_s16(transmute(a)));
+        let r = i8x8::from(vmovn_s16(a.into()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmovn_s32() {
+    fn test_vmovn_s32() {
         let a = i32x4::new(1, 2, 3, 4);
         let e = i16x4::new(1, 2, 3, 4);
-        let r: i16x4 = transmute(vmovn_s32(transmute(a)));
+        let r = i16x4::from(vmovn_s32(a.into()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmovn_s64() {
+    fn test_vmovn_s64() {
         let a = i64x2::new(1, 2);
         let e = i32x2::new(1, 2);
-        let r: i32x2 = transmute(vmovn_s64(transmute(a)));
+        let r = i32x2::from(vmovn_s64(a.into()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmovn_u16() {
+    fn test_vmovn_u16() {
         let a = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
         let e = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let r: u8x8 = transmute(vmovn_u16(transmute(a)));
+        let r = u8x8::from(vmovn_u16(a.into()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmovn_u32() {
+    fn test_vmovn_u32() {
         let a = u32x4::new(1, 2, 3, 4);
         let e = u16x4::new(1, 2, 3, 4);
-        let r: u16x4 = transmute(vmovn_u32(transmute(a)));
+        let r = u16x4::from(vmovn_u32(a.into()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmovn_u64() {
+    fn test_vmovn_u64() {
         let a = u64x2::new(1, 2);
         let e = u32x2::new(1, 2);
-        let r: u32x2 = transmute(vmovn_u64(transmute(a)));
+        let r = u32x2::from(vmovn_u64(a.into()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmovl_s8() {
+    fn test_vmovl_s8() {
         let e = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
         let a = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let r: i16x8 = transmute(vmovl_s8(transmute(a)));
+        let r = i16x8::from(vmovl_s8(a.into()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmovl_s16() {
+    fn test_vmovl_s16() {
         let e = i32x4::new(1, 2, 3, 4);
         let a = i16x4::new(1, 2, 3, 4);
-        let r: i32x4 = transmute(vmovl_s16(transmute(a)));
+        let r = i32x4::from(vmovl_s16(a.into()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmovl_s32() {
+    fn test_vmovl_s32() {
         let e = i64x2::new(1, 2);
         let a = i32x2::new(1, 2);
-        let r: i64x2 = transmute(vmovl_s32(transmute(a)));
+        let r = i64x2::from(vmovl_s32(a.into()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmovl_u8() {
+    fn test_vmovl_u8() {
         let e = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
         let a = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
-        let r: u16x8 = transmute(vmovl_u8(transmute(a)));
+        let r = u16x8::from(vmovl_u8(a.into()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmovl_u16() {
+    fn test_vmovl_u16() {
         let e = u32x4::new(1, 2, 3, 4);
         let a = u16x4::new(1, 2, 3, 4);
-        let r: u32x4 = transmute(vmovl_u16(transmute(a)));
+        let r = u32x4::from(vmovl_u16(a.into()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmovl_u32() {
+    fn test_vmovl_u32() {
         let e = u64x2::new(1, 2);
         let a = u32x2::new(1, 2);
-        let r: u64x2 = transmute(vmovl_u32(transmute(a)));
+        let r = u64x2::from(vmovl_u32(a.into()));
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vand_s8() {
+    fn test_vand_s8() {
         test_bit_s8(|i, j| vand_s8(i, j), |a: i8, b: i8| -> i8 { a & b });
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vandq_s8() {
+    fn test_vandq_s8() {
         testq_bit_s8(|i, j| vandq_s8(i, j), |a: i8, b: i8| -> i8 { a & b });
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vand_s16() {
+    fn test_vand_s16() {
         test_bit_s16(|i, j| vand_s16(i, j), |a: i16, b: i16| -> i16 { a & b });
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vandq_s16() {
+    fn test_vandq_s16() {
         testq_bit_s16(|i, j| vandq_s16(i, j), |a: i16, b: i16| -> i16 { a & b });
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vand_s32() {
+    fn test_vand_s32() {
         test_bit_s32(|i, j| vand_s32(i, j), |a: i32, b: i32| -> i32 { a & b });
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vandq_s32() {
+    fn test_vandq_s32() {
         testq_bit_s32(|i, j| vandq_s32(i, j), |a: i32, b: i32| -> i32 { a & b });
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vand_s64() {
+    fn test_vand_s64() {
         test_bit_s64(|i, j| vand_s64(i, j), |a: i64, b: i64| -> i64 { a & b });
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vandq_s64() {
+    fn test_vandq_s64() {
         testq_bit_s64(|i, j| vandq_s64(i, j), |a: i64, b: i64| -> i64 { a & b });
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vand_u8() {
+    fn test_vand_u8() {
         test_bit_u8(|i, j| vand_u8(i, j), |a: u8, b: u8| -> u8 { a & b });
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vandq_u8() {
+    fn test_vandq_u8() {
         testq_bit_u8(|i, j| vandq_u8(i, j), |a: u8, b: u8| -> u8 { a & b });
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vand_u16() {
+    fn test_vand_u16() {
         test_bit_u16(|i, j| vand_u16(i, j), |a: u16, b: u16| -> u16 { a & b });
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vandq_u16() {
+    fn test_vandq_u16() {
         testq_bit_u16(|i, j| vandq_u16(i, j), |a: u16, b: u16| -> u16 { a & b });
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vand_u32() {
+    fn test_vand_u32() {
         test_bit_u32(|i, j| vand_u32(i, j), |a: u32, b: u32| -> u32 { a & b });
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vandq_u32() {
+    fn test_vandq_u32() {
         testq_bit_u32(|i, j| vandq_u32(i, j), |a: u32, b: u32| -> u32 { a & b });
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vand_u64() {
+    fn test_vand_u64() {
         test_bit_u64(|i, j| vand_u64(i, j), |a: u64, b: u64| -> u64 { a & b });
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vandq_u64() {
+    fn test_vandq_u64() {
         testq_bit_u64(|i, j| vandq_u64(i, j), |a: u64, b: u64| -> u64 { a & b });
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vorr_s8() {
+    fn test_vorr_s8() {
         test_bit_s8(|i, j| vorr_s8(i, j), |a: i8, b: i8| -> i8 { a | b });
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vorrq_s8() {
+    fn test_vorrq_s8() {
         testq_bit_s8(|i, j| vorrq_s8(i, j), |a: i8, b: i8| -> i8 { a | b });
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vorr_s16() {
+    fn test_vorr_s16() {
         test_bit_s16(|i, j| vorr_s16(i, j), |a: i16, b: i16| -> i16 { a | b });
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vorrq_s16() {
+    fn test_vorrq_s16() {
         testq_bit_s16(|i, j| vorrq_s16(i, j), |a: i16, b: i16| -> i16 { a | b });
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vorr_s32() {
+    fn test_vorr_s32() {
         test_bit_s32(|i, j| vorr_s32(i, j), |a: i32, b: i32| -> i32 { a | b });
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vorrq_s32() {
+    fn test_vorrq_s32() {
         testq_bit_s32(|i, j| vorrq_s32(i, j), |a: i32, b: i32| -> i32 { a | b });
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vorr_s64() {
+    fn test_vorr_s64() {
         test_bit_s64(|i, j| vorr_s64(i, j), |a: i64, b: i64| -> i64 { a | b });
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vorrq_s64() {
+    fn test_vorrq_s64() {
         testq_bit_s64(|i, j| vorrq_s64(i, j), |a: i64, b: i64| -> i64 { a | b });
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vorr_u8() {
+    fn test_vorr_u8() {
         test_bit_u8(|i, j| vorr_u8(i, j), |a: u8, b: u8| -> u8 { a | b });
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vorrq_u8() {
+    fn test_vorrq_u8() {
         testq_bit_u8(|i, j| vorrq_u8(i, j), |a: u8, b: u8| -> u8 { a | b });
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vorr_u16() {
+    fn test_vorr_u16() {
         test_bit_u16(|i, j| vorr_u16(i, j), |a: u16, b: u16| -> u16 { a | b });
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vorrq_u16() {
+    fn test_vorrq_u16() {
         testq_bit_u16(|i, j| vorrq_u16(i, j), |a: u16, b: u16| -> u16 { a | b });
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vorr_u32() {
+    fn test_vorr_u32() {
         test_bit_u32(|i, j| vorr_u32(i, j), |a: u32, b: u32| -> u32 { a | b });
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vorrq_u32() {
+    fn test_vorrq_u32() {
         testq_bit_u32(|i, j| vorrq_u32(i, j), |a: u32, b: u32| -> u32 { a | b });
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vorr_u64() {
+    fn test_vorr_u64() {
         test_bit_u64(|i, j| vorr_u64(i, j), |a: u64, b: u64| -> u64 { a | b });
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vorrq_u64() {
+    fn test_vorrq_u64() {
         testq_bit_u64(|i, j| vorrq_u64(i, j), |a: u64, b: u64| -> u64 { a | b });
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_veor_s8() {
+    fn test_veor_s8() {
         test_bit_s8(|i, j| veor_s8(i, j), |a: i8, b: i8| -> i8 { a ^ b });
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_veorq_s8() {
+    fn test_veorq_s8() {
         testq_bit_s8(|i, j| veorq_s8(i, j), |a: i8, b: i8| -> i8 { a ^ b });
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_veor_s16() {
+    fn test_veor_s16() {
         test_bit_s16(|i, j| veor_s16(i, j), |a: i16, b: i16| -> i16 { a ^ b });
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_veorq_s16() {
+    fn test_veorq_s16() {
         testq_bit_s16(|i, j| veorq_s16(i, j), |a: i16, b: i16| -> i16 { a ^ b });
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_veor_s32() {
+    fn test_veor_s32() {
         test_bit_s32(|i, j| veor_s32(i, j), |a: i32, b: i32| -> i32 { a ^ b });
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_veorq_s32() {
+    fn test_veorq_s32() {
         testq_bit_s32(|i, j| veorq_s32(i, j), |a: i32, b: i32| -> i32 { a ^ b });
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_veor_s64() {
+    fn test_veor_s64() {
         test_bit_s64(|i, j| veor_s64(i, j), |a: i64, b: i64| -> i64 { a ^ b });
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_veorq_s64() {
+    fn test_veorq_s64() {
         testq_bit_s64(|i, j| veorq_s64(i, j), |a: i64, b: i64| -> i64 { a ^ b });
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_veor_u8() {
+    fn test_veor_u8() {
         test_bit_u8(|i, j| veor_u8(i, j), |a: u8, b: u8| -> u8 { a ^ b });
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_veorq_u8() {
+    fn test_veorq_u8() {
         testq_bit_u8(|i, j| veorq_u8(i, j), |a: u8, b: u8| -> u8 { a ^ b });
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_veor_u16() {
+    fn test_veor_u16() {
         test_bit_u16(|i, j| veor_u16(i, j), |a: u16, b: u16| -> u16 { a ^ b });
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_veorq_u16() {
+    fn test_veorq_u16() {
         testq_bit_u16(|i, j| veorq_u16(i, j), |a: u16, b: u16| -> u16 { a ^ b });
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_veor_u32() {
+    fn test_veor_u32() {
         test_bit_u32(|i, j| veor_u32(i, j), |a: u32, b: u32| -> u32 { a ^ b });
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_veorq_u32() {
+    fn test_veorq_u32() {
         testq_bit_u32(|i, j| veorq_u32(i, j), |a: u32, b: u32| -> u32 { a ^ b });
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_veor_u64() {
+    fn test_veor_u64() {
         test_bit_u64(|i, j| veor_u64(i, j), |a: u64, b: u64| -> u64 { a ^ b });
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_veorq_u64() {
+    fn test_veorq_u64() {
         testq_bit_u64(|i, j| veorq_u64(i, j), |a: u64, b: u64| -> u64 { a ^ b });
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vceq_s8() {
+    fn test_vceq_s8() {
         test_cmp_s8(
             |i, j| vceq_s8(i, j),
             |a: i8, b: i8| -> u8 { if a == b { 0xFF } else { 0 } },
         );
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vceqq_s8() {
+    fn test_vceqq_s8() {
         testq_cmp_s8(
             |i, j| vceqq_s8(i, j),
             |a: i8, b: i8| -> u8 { if a == b { 0xFF } else { 0 } },
         );
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vceq_s16() {
+    fn test_vceq_s16() {
         test_cmp_s16(
             |i, j| vceq_s16(i, j),
             |a: i16, b: i16| -> u16 { if a == b { 0xFFFF } else { 0 } },
         );
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vceqq_s16() {
+    fn test_vceqq_s16() {
         testq_cmp_s16(
             |i, j| vceqq_s16(i, j),
             |a: i16, b: i16| -> u16 { if a == b { 0xFFFF } else { 0 } },
         );
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vceq_s32() {
+    fn test_vceq_s32() {
         test_cmp_s32(
             |i, j| vceq_s32(i, j),
             |a: i32, b: i32| -> u32 { if a == b { 0xFFFFFFFF } else { 0 } },
         );
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vceqq_s32() {
+    fn test_vceqq_s32() {
         testq_cmp_s32(
             |i, j| vceqq_s32(i, j),
             |a: i32, b: i32| -> u32 { if a == b { 0xFFFFFFFF } else { 0 } },
@@ -4141,42 +3695,47 @@ mod tests {
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vceq_u8() {
+    fn test_vceq_u8() {
         test_cmp_u8(
             |i, j| vceq_u8(i, j),
             |a: u8, b: u8| -> u8 { if a == b { 0xFF } else { 0 } },
         );
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vceqq_u8() {
+    fn test_vceqq_u8() {
         testq_cmp_u8(
             |i, j| vceqq_u8(i, j),
             |a: u8, b: u8| -> u8 { if a == b { 0xFF } else { 0 } },
         );
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vceq_u16() {
+    fn test_vceq_u16() {
         test_cmp_u16(
             |i, j| vceq_u16(i, j),
             |a: u16, b: u16| -> u16 { if a == b { 0xFFFF } else { 0 } },
         );
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vceqq_u16() {
+    fn test_vceqq_u16() {
         testq_cmp_u16(
             |i, j| vceqq_u16(i, j),
             |a: u16, b: u16| -> u16 { if a == b { 0xFFFF } else { 0 } },
         );
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vceq_u32() {
+    fn test_vceq_u32() {
         test_cmp_u32(
             |i, j| vceq_u32(i, j),
             |a: u32, b: u32| -> u32 { if a == b { 0xFFFFFFFF } else { 0 } },
         );
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vceqq_u32() {
+    fn test_vceqq_u32() {
         testq_cmp_u32(
             |i, j| vceqq_u32(i, j),
             |a: u32, b: u32| -> u32 { if a == b { 0xFFFFFFFF } else { 0 } },
@@ -4184,14 +3743,15 @@ mod tests {
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vceq_f32() {
+    fn test_vceq_f32() {
         test_cmp_f32(
             |i, j| vcge_f32(i, j),
             |a: f32, b: f32| -> u32 { if a == b { 0xFFFFFFFF } else { 0 } },
         );
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vceqq_f32() {
+    fn test_vceqq_f32() {
         testq_cmp_f32(
             |i, j| vcgeq_f32(i, j),
             |a: f32, b: f32| -> u32 { if a == b { 0xFFFFFFFF } else { 0 } },
@@ -4199,42 +3759,47 @@ mod tests {
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcgt_s8() {
+    fn test_vcgt_s8() {
         test_cmp_s8(
             |i, j| vcgt_s8(i, j),
             |a: i8, b: i8| -> u8 { if a > b { 0xFF } else { 0 } },
         );
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcgtq_s8() {
+    fn test_vcgtq_s8() {
         testq_cmp_s8(
             |i, j| vcgtq_s8(i, j),
             |a: i8, b: i8| -> u8 { if a > b { 0xFF } else { 0 } },
         );
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcgt_s16() {
+    fn test_vcgt_s16() {
         test_cmp_s16(
             |i, j| vcgt_s16(i, j),
             |a: i16, b: i16| -> u16 { if a > b { 0xFFFF } else { 0 } },
         );
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcgtq_s16() {
+    fn test_vcgtq_s16() {
         testq_cmp_s16(
             |i, j| vcgtq_s16(i, j),
             |a: i16, b: i16| -> u16 { if a > b { 0xFFFF } else { 0 } },
         );
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcgt_s32() {
+    fn test_vcgt_s32() {
         test_cmp_s32(
             |i, j| vcgt_s32(i, j),
             |a: i32, b: i32| -> u32 { if a > b { 0xFFFFFFFF } else { 0 } },
         );
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcgtq_s32() {
+    fn test_vcgtq_s32() {
         testq_cmp_s32(
             |i, j| vcgtq_s32(i, j),
             |a: i32, b: i32| -> u32 { if a > b { 0xFFFFFFFF } else { 0 } },
@@ -4242,42 +3807,47 @@ mod tests {
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcgt_u8() {
+    fn test_vcgt_u8() {
         test_cmp_u8(
             |i, j| vcgt_u8(i, j),
             |a: u8, b: u8| -> u8 { if a > b { 0xFF } else { 0 } },
         );
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcgtq_u8() {
+    fn test_vcgtq_u8() {
         testq_cmp_u8(
             |i, j| vcgtq_u8(i, j),
             |a: u8, b: u8| -> u8 { if a > b { 0xFF } else { 0 } },
         );
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcgt_u16() {
+    fn test_vcgt_u16() {
         test_cmp_u16(
             |i, j| vcgt_u16(i, j),
             |a: u16, b: u16| -> u16 { if a > b { 0xFFFF } else { 0 } },
         );
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcgtq_u16() {
+    fn test_vcgtq_u16() {
         testq_cmp_u16(
             |i, j| vcgtq_u16(i, j),
             |a: u16, b: u16| -> u16 { if a > b { 0xFFFF } else { 0 } },
         );
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcgt_u32() {
+    fn test_vcgt_u32() {
         test_cmp_u32(
             |i, j| vcgt_u32(i, j),
             |a: u32, b: u32| -> u32 { if a > b { 0xFFFFFF } else { 0 } },
         );
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcgtq_u32() {
+    fn test_vcgtq_u32() {
         testq_cmp_u32(
             |i, j| vcgtq_u32(i, j),
             |a: u32, b: u32| -> u32 { if a > b { 0xFFFFFFFF } else { 0 } },
@@ -4285,14 +3855,15 @@ mod tests {
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcgt_f32() {
+    fn test_vcgt_f32() {
         test_cmp_f32(
             |i, j| vcgt_f32(i, j),
             |a: f32, b: f32| -> u32 { if a > b { 0xFFFFFFFF } else { 0 } },
         );
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcgtq_f32() {
+    fn test_vcgtq_f32() {
         testq_cmp_f32(
             |i, j| vcgtq_f32(i, j),
             |a: f32, b: f32| -> u32 { if a > b { 0xFFFFFFFF } else { 0 } },
@@ -4300,42 +3871,47 @@ mod tests {
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vclt_s8() {
+    fn test_vclt_s8() {
         test_cmp_s8(
             |i, j| vclt_s8(i, j),
             |a: i8, b: i8| -> u8 { if a < b { 0xFF } else { 0 } },
         );
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcltq_s8() {
+    fn test_vcltq_s8() {
         testq_cmp_s8(
             |i, j| vcltq_s8(i, j),
             |a: i8, b: i8| -> u8 { if a < b { 0xFF } else { 0 } },
         );
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vclt_s16() {
+    fn test_vclt_s16() {
         test_cmp_s16(
             |i, j| vclt_s16(i, j),
             |a: i16, b: i16| -> u16 { if a < b { 0xFFFF } else { 0 } },
         );
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcltq_s16() {
+    fn test_vcltq_s16() {
         testq_cmp_s16(
             |i, j| vcltq_s16(i, j),
             |a: i16, b: i16| -> u16 { if a < b { 0xFFFF } else { 0 } },
         );
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vclt_s32() {
+    fn test_vclt_s32() {
         test_cmp_s32(
             |i, j| vclt_s32(i, j),
             |a: i32, b: i32| -> u32 { if a < b { 0xFFFFFFFF } else { 0 } },
         );
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcltq_s32() {
+    fn test_vcltq_s32() {
         testq_cmp_s32(
             |i, j| vcltq_s32(i, j),
             |a: i32, b: i32| -> u32 { if a < b { 0xFFFFFFFF } else { 0 } },
@@ -4343,42 +3919,47 @@ mod tests {
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vclt_u8() {
+    fn test_vclt_u8() {
         test_cmp_u8(
             |i, j| vclt_u8(i, j),
             |a: u8, b: u8| -> u8 { if a < b { 0xFF } else { 0 } },
         );
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcltq_u8() {
+    fn test_vcltq_u8() {
         testq_cmp_u8(
             |i, j| vcltq_u8(i, j),
             |a: u8, b: u8| -> u8 { if a < b { 0xFF } else { 0 } },
         );
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vclt_u16() {
+    fn test_vclt_u16() {
         test_cmp_u16(
             |i, j| vclt_u16(i, j),
             |a: u16, b: u16| -> u16 { if a < b { 0xFFFF } else { 0 } },
         );
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcltq_u16() {
+    fn test_vcltq_u16() {
         testq_cmp_u16(
             |i, j| vcltq_u16(i, j),
             |a: u16, b: u16| -> u16 { if a < b { 0xFFFF } else { 0 } },
         );
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vclt_u32() {
+    fn test_vclt_u32() {
         test_cmp_u32(
             |i, j| vclt_u32(i, j),
             |a: u32, b: u32| -> u32 { if a < b { 0xFFFFFF } else { 0 } },
         );
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcltq_u32() {
+    fn test_vcltq_u32() {
         testq_cmp_u32(
             |i, j| vcltq_u32(i, j),
             |a: u32, b: u32| -> u32 { if a < b { 0xFFFFFFFF } else { 0 } },
@@ -4386,14 +3967,15 @@ mod tests {
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vclt_f32() {
+    fn test_vclt_f32() {
         test_cmp_f32(
             |i, j| vclt_f32(i, j),
             |a: f32, b: f32| -> u32 { if a < b { 0xFFFFFFFF } else { 0 } },
         );
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcltq_f32() {
+    fn test_vcltq_f32() {
         testq_cmp_f32(
             |i, j| vcltq_f32(i, j),
             |a: f32, b: f32| -> u32 { if a < b { 0xFFFFFFFF } else { 0 } },
@@ -4401,42 +3983,47 @@ mod tests {
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcle_s8() {
+    fn test_vcle_s8() {
         test_cmp_s8(
             |i, j| vcle_s8(i, j),
             |a: i8, b: i8| -> u8 { if a <= b { 0xFF } else { 0 } },
         );
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcleq_s8() {
+    fn test_vcleq_s8() {
         testq_cmp_s8(
             |i, j| vcleq_s8(i, j),
             |a: i8, b: i8| -> u8 { if a <= b { 0xFF } else { 0 } },
         );
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcle_s16() {
+    fn test_vcle_s16() {
         test_cmp_s16(
             |i, j| vcle_s16(i, j),
             |a: i16, b: i16| -> u16 { if a <= b { 0xFFFF } else { 0 } },
         );
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcleq_s16() {
+    fn test_vcleq_s16() {
         testq_cmp_s16(
             |i, j| vcleq_s16(i, j),
             |a: i16, b: i16| -> u16 { if a <= b { 0xFFFF } else { 0 } },
         );
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcle_s32() {
+    fn test_vcle_s32() {
         test_cmp_s32(
             |i, j| vcle_s32(i, j),
             |a: i32, b: i32| -> u32 { if a <= b { 0xFFFFFFFF } else { 0 } },
         );
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcleq_s32() {
+    fn test_vcleq_s32() {
         testq_cmp_s32(
             |i, j| vcleq_s32(i, j),
             |a: i32, b: i32| -> u32 { if a <= b { 0xFFFFFFFF } else { 0 } },
@@ -4444,42 +4031,47 @@ mod tests {
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcle_u8() {
+    fn test_vcle_u8() {
         test_cmp_u8(
             |i, j| vcle_u8(i, j),
             |a: u8, b: u8| -> u8 { if a <= b { 0xFF } else { 0 } },
         );
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcleq_u8() {
+    fn test_vcleq_u8() {
         testq_cmp_u8(
             |i, j| vcleq_u8(i, j),
             |a: u8, b: u8| -> u8 { if a <= b { 0xFF } else { 0 } },
         );
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcle_u16() {
+    fn test_vcle_u16() {
         test_cmp_u16(
             |i, j| vcle_u16(i, j),
             |a: u16, b: u16| -> u16 { if a <= b { 0xFFFF } else { 0 } },
         );
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcleq_u16() {
+    fn test_vcleq_u16() {
         testq_cmp_u16(
             |i, j| vcleq_u16(i, j),
             |a: u16, b: u16| -> u16 { if a <= b { 0xFFFF } else { 0 } },
         );
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcle_u32() {
+    fn test_vcle_u32() {
         test_cmp_u32(
             |i, j| vcle_u32(i, j),
             |a: u32, b: u32| -> u32 { if a <= b { 0xFFFFFFFF } else { 0 } },
         );
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcleq_u32() {
+    fn test_vcleq_u32() {
         testq_cmp_u32(
             |i, j| vcleq_u32(i, j),
             |a: u32, b: u32| -> u32 { if a <= b { 0xFFFFFFFF } else { 0 } },
@@ -4487,14 +4079,15 @@ mod tests {
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcle_f32() {
+    fn test_vcle_f32() {
         test_cmp_f32(
             |i, j| vcle_f32(i, j),
             |a: f32, b: f32| -> u32 { if a <= b { 0xFFFFFFFF } else { 0 } },
         );
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcleq_f32() {
+    fn test_vcleq_f32() {
         testq_cmp_f32(
             |i, j| vcleq_f32(i, j),
             |a: f32, b: f32| -> u32 { if a <= b { 0xFFFFFFFF } else { 0 } },
@@ -4502,42 +4095,47 @@ mod tests {
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcge_s8() {
+    fn test_vcge_s8() {
         test_cmp_s8(
             |i, j| vcge_s8(i, j),
             |a: i8, b: i8| -> u8 { if a >= b { 0xFF } else { 0 } },
         );
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcgeq_s8() {
+    fn test_vcgeq_s8() {
         testq_cmp_s8(
             |i, j| vcgeq_s8(i, j),
             |a: i8, b: i8| -> u8 { if a >= b { 0xFF } else { 0 } },
         );
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcge_s16() {
+    fn test_vcge_s16() {
         test_cmp_s16(
             |i, j| vcge_s16(i, j),
             |a: i16, b: i16| -> u16 { if a >= b { 0xFFFF } else { 0 } },
         );
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcgeq_s16() {
+    fn test_vcgeq_s16() {
         testq_cmp_s16(
             |i, j| vcgeq_s16(i, j),
             |a: i16, b: i16| -> u16 { if a >= b { 0xFFFF } else { 0 } },
         );
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcge_s32() {
+    fn test_vcge_s32() {
         test_cmp_s32(
             |i, j| vcge_s32(i, j),
             |a: i32, b: i32| -> u32 { if a >= b { 0xFFFFFFFF } else { 0 } },
         );
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcgeq_s32() {
+    fn test_vcgeq_s32() {
         testq_cmp_s32(
             |i, j| vcgeq_s32(i, j),
             |a: i32, b: i32| -> u32 { if a >= b { 0xFFFFFFFF } else { 0 } },
@@ -4545,42 +4143,45 @@ mod tests {
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcge_u8() {
+    fn test_vcge_u8() {
         test_cmp_u8(
             |i, j| vcge_u8(i, j),
             |a: u8, b: u8| -> u8 { if a >= b { 0xFF } else { 0 } },
         );
     }
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcgeq_u8() {
+    fn test_vcgeq_u8() {
         testq_cmp_u8(
             |i, j| vcgeq_u8(i, j),
             |a: u8, b: u8| -> u8 { if a >= b { 0xFF } else { 0 } },
         );
     }
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcge_u16() {
+    fn test_vcge_u16() {
         test_cmp_u16(
             |i, j| vcge_u16(i, j),
             |a: u16, b: u16| -> u16 { if a >= b { 0xFFFF } else { 0 } },
         );
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcgeq_u16() {
+    fn test_vcgeq_u16() {
         testq_cmp_u16(
             |i, j| vcgeq_u16(i, j),
             |a: u16, b: u16| -> u16 { if a >= b { 0xFFFF } else { 0 } },
         );
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcge_u32() {
+    fn test_vcge_u32() {
         test_cmp_u32(
             |i, j| vcge_u32(i, j),
             |a: u32, b: u32| -> u32 { if a >= b { 0xFFFFFFFF } else { 0 } },
         );
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcgeq_u32() {
+    fn test_vcgeq_u32() {
         testq_cmp_u32(
             |i, j| vcgeq_u32(i, j),
             |a: u32, b: u32| -> u32 { if a >= b { 0xFFFFFFFF } else { 0 } },
@@ -4588,14 +4189,14 @@ mod tests {
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcge_f32() {
+    fn test_vcge_f32() {
         test_cmp_f32(
             |i, j| vcge_f32(i, j),
             |a: f32, b: f32| -> u32 { if a >= b { 0xFFFFFFFF } else { 0 } },
         );
     }
     #[simd_test(enable = "neon")]
-    unsafe fn test_vcgeq_f32() {
+    fn test_vcgeq_f32() {
         testq_cmp_f32(
             |i, j| vcgeq_f32(i, j),
             |a: f32, b: f32| -> u32 { if a >= b { 0xFFFFFFFF } else { 0 } },
@@ -4603,42 +4204,47 @@ mod tests {
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vqsub_s8() {
+    fn test_vqsub_s8() {
         test_ari_s8(
             |i, j| vqsub_s8(i, j),
             |a: i8, b: i8| -> i8 { a.saturating_sub(b) },
         );
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vqsubq_s8() {
+    fn test_vqsubq_s8() {
         testq_ari_s8(
             |i, j| vqsubq_s8(i, j),
             |a: i8, b: i8| -> i8 { a.saturating_sub(b) },
         );
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vqsub_s16() {
+    fn test_vqsub_s16() {
         test_ari_s16(
             |i, j| vqsub_s16(i, j),
             |a: i16, b: i16| -> i16 { a.saturating_sub(b) },
         );
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vqsubq_s16() {
+    fn test_vqsubq_s16() {
         testq_ari_s16(
             |i, j| vqsubq_s16(i, j),
             |a: i16, b: i16| -> i16 { a.saturating_sub(b) },
         );
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vqsub_s32() {
+    fn test_vqsub_s32() {
         test_ari_s32(
             |i, j| vqsub_s32(i, j),
             |a: i32, b: i32| -> i32 { a.saturating_sub(b) },
         );
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vqsubq_s32() {
+    fn test_vqsubq_s32() {
         testq_ari_s32(
             |i, j| vqsubq_s32(i, j),
             |a: i32, b: i32| -> i32 { a.saturating_sub(b) },
@@ -4646,42 +4252,47 @@ mod tests {
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vqsub_u8() {
+    fn test_vqsub_u8() {
         test_ari_u8(
             |i, j| vqsub_u8(i, j),
             |a: u8, b: u8| -> u8 { a.saturating_sub(b) },
         );
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vqsubq_u8() {
+    fn test_vqsubq_u8() {
         testq_ari_u8(
             |i, j| vqsubq_u8(i, j),
             |a: u8, b: u8| -> u8 { a.saturating_sub(b) },
         );
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vqsub_u16() {
+    fn test_vqsub_u16() {
         test_ari_u16(
             |i, j| vqsub_u16(i, j),
             |a: u16, b: u16| -> u16 { a.saturating_sub(b) },
         );
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vqsubq_u16() {
+    fn test_vqsubq_u16() {
         testq_ari_u16(
             |i, j| vqsubq_u16(i, j),
             |a: u16, b: u16| -> u16 { a.saturating_sub(b) },
         );
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vqsub_u32() {
+    fn test_vqsub_u32() {
         test_ari_u32(
             |i, j| vqsub_u32(i, j),
             |a: u32, b: u32| -> u32 { a.saturating_sub(b) },
         );
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vqsubq_u32() {
+    fn test_vqsubq_u32() {
         testq_ari_u32(
             |i, j| vqsubq_u32(i, j),
             |a: u32, b: u32| -> u32 { a.saturating_sub(b) },
@@ -4689,142 +4300,166 @@ mod tests {
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vhadd_s8() {
+    fn test_vhadd_s8() {
         test_ari_s8(|i, j| vhadd_s8(i, j), |a: i8, b: i8| -> i8 { a & b });
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vhaddq_s8() {
+    fn test_vhaddq_s8() {
         testq_ari_s8(|i, j| vhaddq_s8(i, j), |a: i8, b: i8| -> i8 { a & b });
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vhadd_s16() {
+    fn test_vhadd_s16() {
         test_ari_s16(|i, j| vhadd_s16(i, j), |a: i16, b: i16| -> i16 { a & b });
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vhaddq_s16() {
+    fn test_vhaddq_s16() {
         testq_ari_s16(|i, j| vhaddq_s16(i, j), |a: i16, b: i16| -> i16 { a & b });
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vhadd_s32() {
+    fn test_vhadd_s32() {
         test_ari_s32(|i, j| vhadd_s32(i, j), |a: i32, b: i32| -> i32 { a & b });
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vhaddq_s32() {
+    fn test_vhaddq_s32() {
         testq_ari_s32(|i, j| vhaddq_s32(i, j), |a: i32, b: i32| -> i32 { a & b });
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vhadd_u8() {
+    fn test_vhadd_u8() {
         test_ari_u8(|i, j| vhadd_u8(i, j), |a: u8, b: u8| -> u8 { a & b });
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vhaddq_u8() {
+    fn test_vhaddq_u8() {
         testq_ari_u8(|i, j| vhaddq_u8(i, j), |a: u8, b: u8| -> u8 { a & b });
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vhadd_u16() {
+    fn test_vhadd_u16() {
         test_ari_u16(|i, j| vhadd_u16(i, j), |a: u16, b: u16| -> u16 { a & b });
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vhaddq_u16() {
+    fn test_vhaddq_u16() {
         testq_ari_u16(|i, j| vhaddq_u16(i, j), |a: u16, b: u16| -> u16 { a & b });
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vhadd_u32() {
+    fn test_vhadd_u32() {
         test_ari_u32(|i, j| vhadd_u32(i, j), |a: u32, b: u32| -> u32 { a & b });
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vhaddq_u32() {
+    fn test_vhaddq_u32() {
         testq_ari_u32(|i, j| vhaddq_u32(i, j), |a: u32, b: u32| -> u32 { a & b });
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrhadd_s8() {
+    fn test_vrhadd_s8() {
         test_ari_s8(|i, j| vrhadd_s8(i, j), |a: i8, b: i8| -> i8 { a & b });
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrhaddq_s8() {
+    fn test_vrhaddq_s8() {
         testq_ari_s8(|i, j| vrhaddq_s8(i, j), |a: i8, b: i8| -> i8 { a & b });
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrhadd_s16() {
+    fn test_vrhadd_s16() {
         test_ari_s16(|i, j| vrhadd_s16(i, j), |a: i16, b: i16| -> i16 { a & b });
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrhaddq_s16() {
+    fn test_vrhaddq_s16() {
         testq_ari_s16(|i, j| vrhaddq_s16(i, j), |a: i16, b: i16| -> i16 { a & b });
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrhadd_s32() {
+    fn test_vrhadd_s32() {
         test_ari_s32(|i, j| vrhadd_s32(i, j), |a: i32, b: i32| -> i32 { a & b });
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrhaddq_s32() {
+    fn test_vrhaddq_s32() {
         testq_ari_s32(|i, j| vrhaddq_s32(i, j), |a: i32, b: i32| -> i32 { a & b });
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrhadd_u8() {
+    fn test_vrhadd_u8() {
         test_ari_u8(|i, j| vrhadd_u8(i, j), |a: u8, b: u8| -> u8 { a & b });
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrhaddq_u8() {
+    fn test_vrhaddq_u8() {
         testq_ari_u8(|i, j| vrhaddq_u8(i, j), |a: u8, b: u8| -> u8 { a & b });
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrhadd_u16() {
+    fn test_vrhadd_u16() {
         test_ari_u16(|i, j| vrhadd_u16(i, j), |a: u16, b: u16| -> u16 { a & b });
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrhaddq_u16() {
+    fn test_vrhaddq_u16() {
         testq_ari_u16(|i, j| vrhaddq_u16(i, j), |a: u16, b: u16| -> u16 { a & b });
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrhadd_u32() {
+    fn test_vrhadd_u32() {
         test_ari_u32(|i, j| vrhadd_u32(i, j), |a: u32, b: u32| -> u32 { a & b });
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrhaddq_u32() {
+    fn test_vrhaddq_u32() {
         testq_ari_u32(|i, j| vrhaddq_u32(i, j), |a: u32, b: u32| -> u32 { a & b });
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vqadd_s8() {
+    fn test_vqadd_s8() {
         test_ari_s8(
             |i, j| vqadd_s8(i, j),
             |a: i8, b: i8| -> i8 { a.saturating_add(b) },
         );
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vqaddq_s8() {
+    fn test_vqaddq_s8() {
         testq_ari_s8(
             |i, j| vqaddq_s8(i, j),
             |a: i8, b: i8| -> i8 { a.saturating_add(b) },
         );
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vqadd_s16() {
+    fn test_vqadd_s16() {
         test_ari_s16(
             |i, j| vqadd_s16(i, j),
             |a: i16, b: i16| -> i16 { a.saturating_add(b) },
         );
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vqaddq_s16() {
+    fn test_vqaddq_s16() {
         testq_ari_s16(
             |i, j| vqaddq_s16(i, j),
             |a: i16, b: i16| -> i16 { a.saturating_add(b) },
         );
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vqadd_s32() {
+    fn test_vqadd_s32() {
         test_ari_s32(
             |i, j| vqadd_s32(i, j),
             |a: i32, b: i32| -> i32 { a.saturating_add(b) },
         );
     }
     #[simd_test(enable = "neon")]
-    unsafe fn test_vqaddq_s32() {
+    fn test_vqaddq_s32() {
         testq_ari_s32(
             |i, j| vqaddq_s32(i, j),
             |a: i32, b: i32| -> i32 { a.saturating_add(b) },
@@ -4832,42 +4467,47 @@ mod tests {
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vqadd_u8() {
+    fn test_vqadd_u8() {
         test_ari_u8(
             |i, j| vqadd_u8(i, j),
             |a: u8, b: u8| -> u8 { a.saturating_add(b) },
         );
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vqaddq_u8() {
+    fn test_vqaddq_u8() {
         testq_ari_u8(
             |i, j| vqaddq_u8(i, j),
             |a: u8, b: u8| -> u8 { a.saturating_add(b) },
         );
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vqadd_u16() {
+    fn test_vqadd_u16() {
         test_ari_u16(
             |i, j| vqadd_u16(i, j),
             |a: u16, b: u16| -> u16 { a.saturating_add(b) },
         );
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vqaddq_u16() {
+    fn test_vqaddq_u16() {
         testq_ari_u16(
             |i, j| vqaddq_u16(i, j),
             |a: u16, b: u16| -> u16 { a.saturating_add(b) },
         );
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vqadd_u32() {
+    fn test_vqadd_u32() {
         test_ari_u32(
             |i, j| vqadd_u32(i, j),
             |a: u32, b: u32| -> u32 { a.saturating_add(b) },
         );
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vqaddq_u32() {
+    fn test_vqaddq_u32() {
         testq_ari_u32(
             |i, j| vqaddq_u32(i, j),
             |a: u32, b: u32| -> u32 { a.saturating_add(b) },
@@ -4875,42 +4515,47 @@ mod tests {
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmul_s8() {
+    fn test_vmul_s8() {
         test_ari_s8(
             |i, j| vmul_s8(i, j),
             |a: i8, b: i8| -> i8 { a.overflowing_mul(b).0 },
         );
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmulq_s8() {
+    fn test_vmulq_s8() {
         testq_ari_s8(
             |i, j| vmulq_s8(i, j),
             |a: i8, b: i8| -> i8 { a.overflowing_mul(b).0 },
         );
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmul_s16() {
+    fn test_vmul_s16() {
         test_ari_s16(
             |i, j| vmul_s16(i, j),
             |a: i16, b: i16| -> i16 { a.overflowing_mul(b).0 },
         );
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmulq_s16() {
+    fn test_vmulq_s16() {
         testq_ari_s16(
             |i, j| vmulq_s16(i, j),
             |a: i16, b: i16| -> i16 { a.overflowing_mul(b).0 },
         );
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmul_s32() {
+    fn test_vmul_s32() {
         test_ari_s32(
             |i, j| vmul_s32(i, j),
             |a: i32, b: i32| -> i32 { a.overflowing_mul(b).0 },
         );
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmulq_s32() {
+    fn test_vmulq_s32() {
         testq_ari_s32(
             |i, j| vmulq_s32(i, j),
             |a: i32, b: i32| -> i32 { a.overflowing_mul(b).0 },
@@ -4918,42 +4563,47 @@ mod tests {
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmul_u8() {
+    fn test_vmul_u8() {
         test_ari_u8(
             |i, j| vmul_u8(i, j),
             |a: u8, b: u8| -> u8 { a.overflowing_mul(b).0 },
         );
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmulq_u8() {
+    fn test_vmulq_u8() {
         testq_ari_u8(
             |i, j| vmulq_u8(i, j),
             |a: u8, b: u8| -> u8 { a.overflowing_mul(b).0 },
         );
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmul_u16() {
+    fn test_vmul_u16() {
         test_ari_u16(
             |i, j| vmul_u16(i, j),
             |a: u16, b: u16| -> u16 { a.overflowing_mul(b).0 },
         );
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmulq_u16() {
+    fn test_vmulq_u16() {
         testq_ari_u16(
             |i, j| vmulq_u16(i, j),
             |a: u16, b: u16| -> u16 { a.overflowing_mul(b).0 },
         );
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmul_u32() {
+    fn test_vmul_u32() {
         test_ari_u32(
             |i, j| vmul_u32(i, j),
             |a: u32, b: u32| -> u32 { a.overflowing_mul(b).0 },
         );
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmulq_u32() {
+    fn test_vmulq_u32() {
         testq_ari_u32(
             |i, j| vmulq_u32(i, j),
             |a: u32, b: u32| -> u32 { a.overflowing_mul(b).0 },
@@ -4961,110 +4611,127 @@ mod tests {
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmul_f32() {
+    fn test_vmul_f32() {
         test_ari_f32(|i, j| vmul_f32(i, j), |a: f32, b: f32| -> f32 { a * b });
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vmulq_f32() {
+    fn test_vmulq_f32() {
         testq_ari_f32(|i, j| vmulq_f32(i, j), |a: f32, b: f32| -> f32 { a * b });
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vsub_s8() {
+    fn test_vsub_s8() {
         test_ari_s8(|i, j| vsub_s8(i, j), |a: i8, b: i8| -> i8 { a - b });
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vsubq_s8() {
+    fn test_vsubq_s8() {
         testq_ari_s8(|i, j| vsubq_s8(i, j), |a: i8, b: i8| -> i8 { a - b });
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vsub_s16() {
+    fn test_vsub_s16() {
         test_ari_s16(|i, j| vsub_s16(i, j), |a: i16, b: i16| -> i16 { a - b });
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vsubq_s16() {
+    fn test_vsubq_s16() {
         testq_ari_s16(|i, j| vsubq_s16(i, j), |a: i16, b: i16| -> i16 { a - b });
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vsub_s32() {
+    fn test_vsub_s32() {
         test_ari_s32(|i, j| vsub_s32(i, j), |a: i32, b: i32| -> i32 { a - b });
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vsubq_s32() {
+    fn test_vsubq_s32() {
         testq_ari_s32(|i, j| vsubq_s32(i, j), |a: i32, b: i32| -> i32 { a - b });
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vsub_u8() {
+    fn test_vsub_u8() {
         test_ari_u8(|i, j| vsub_u8(i, j), |a: u8, b: u8| -> u8 { a - b });
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vsubq_u8() {
+    fn test_vsubq_u8() {
         testq_ari_u8(|i, j| vsubq_u8(i, j), |a: u8, b: u8| -> u8 { a - b });
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vsub_u16() {
+    fn test_vsub_u16() {
         test_ari_u16(|i, j| vsub_u16(i, j), |a: u16, b: u16| -> u16 { a - b });
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vsubq_u16() {
+    fn test_vsubq_u16() {
         testq_ari_u16(|i, j| vsubq_u16(i, j), |a: u16, b: u16| -> u16 { a - b });
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vsub_u32() {
+    fn test_vsub_u32() {
         test_ari_u32(|i, j| vsub_u32(i, j), |a: u32, b: u32| -> u32 { a - b });
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vsubq_u32() {
+    fn test_vsubq_u32() {
         testq_ari_u32(|i, j| vsubq_u32(i, j), |a: u32, b: u32| -> u32 { a - b });
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vsub_f32() {
+    fn test_vsub_f32() {
         test_ari_f32(|i, j| vsub_f32(i, j), |a: f32, b: f32| -> f32 { a - b });
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vsubq_f32() {
+    fn test_vsubq_f32() {
         testq_ari_f32(|i, j| vsubq_f32(i, j), |a: f32, b: f32| -> f32 { a - b });
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vhsub_s8() {
+    fn test_vhsub_s8() {
         test_ari_s8(
             |i, j| vhsub_s8(i, j),
             |a: i8, b: i8| -> i8 { (((a as i16) - (b as i16)) / 2) as i8 },
         );
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vhsubq_s8() {
+    fn test_vhsubq_s8() {
         testq_ari_s8(
             |i, j| vhsubq_s8(i, j),
             |a: i8, b: i8| -> i8 { (((a as i16) - (b as i16)) / 2) as i8 },
         );
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vhsub_s16() {
+    fn test_vhsub_s16() {
         test_ari_s16(
             |i, j| vhsub_s16(i, j),
             |a: i16, b: i16| -> i16 { (((a as i32) - (b as i32)) / 2) as i16 },
         );
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vhsubq_s16() {
+    fn test_vhsubq_s16() {
         testq_ari_s16(
             |i, j| vhsubq_s16(i, j),
             |a: i16, b: i16| -> i16 { (((a as i32) - (b as i32)) / 2) as i16 },
         );
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vhsub_s32() {
+    fn test_vhsub_s32() {
         test_ari_s32(
             |i, j| vhsub_s32(i, j),
             |a: i32, b: i32| -> i32 { (((a as i64) - (b as i64)) / 2) as i32 },
         );
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vhsubq_s32() {
+    fn test_vhsubq_s32() {
         testq_ari_s32(
             |i, j| vhsubq_s32(i, j),
             |a: i32, b: i32| -> i32 { (((a as i64) - (b as i64)) / 2) as i32 },
@@ -5072,42 +4739,47 @@ mod tests {
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vhsub_u8() {
+    fn test_vhsub_u8() {
         test_ari_u8(
             |i, j| vhsub_u8(i, j),
             |a: u8, b: u8| -> u8 { (((a as u16) - (b as u16)) / 2) as u8 },
         );
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vhsubq_u8() {
+    fn test_vhsubq_u8() {
         testq_ari_u8(
             |i, j| vhsubq_u8(i, j),
             |a: u8, b: u8| -> u8 { (((a as u16) - (b as u16)) / 2) as u8 },
         );
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vhsub_u16() {
+    fn test_vhsub_u16() {
         test_ari_u16(
             |i, j| vhsub_u16(i, j),
             |a: u16, b: u16| -> u16 { (((a as u16) - (b as u16)) / 2) as u16 },
         );
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vhsubq_u16() {
+    fn test_vhsubq_u16() {
         testq_ari_u16(
             |i, j| vhsubq_u16(i, j),
             |a: u16, b: u16| -> u16 { (((a as u16) - (b as u16)) / 2) as u16 },
         );
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vhsub_u32() {
+    fn test_vhsub_u32() {
         test_ari_u32(
             |i, j| vhsub_u32(i, j),
             |a: u32, b: u32| -> u32 { (((a as u64) - (b as u64)) / 2) as u32 },
         );
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vhsubq_u32() {
+    fn test_vhsubq_u32() {
         testq_ari_u32(
             |i, j| vhsubq_u32(i, j),
             |a: u32, b: u32| -> u32 { (((a as u64) - (b as u64)) / 2) as u32 },
@@ -5115,411 +4787,545 @@ mod tests {
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vaba_s8() {
+    fn test_vaba_s8() {
         let a = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
         let b = i8x8::new(1, 1, 1, 1, 1, 1, 1, 1);
         let c = i8x8::new(10, 9, 8, 7, 6, 5, 4, 3);
-        let r: i8x8 = transmute(vaba_s8(transmute(a), transmute(b), transmute(c)));
+        let r = i8x8::from(vaba_s8(a.into(), b.into(), c.into()));
         let e = i8x8::new(10, 10, 10, 10, 10, 10, 10, 10);
         assert_eq!(r, e);
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vaba_s16() {
+    fn test_vaba_s16() {
         let a = i16x4::new(1, 2, 3, 4);
         let b = i16x4::new(1, 1, 1, 1);
         let c = i16x4::new(10, 9, 8, 7);
-        let r: i16x4 = transmute(vaba_s16(transmute(a), transmute(b), transmute(c)));
+        let r = i16x4::from(vaba_s16(a.into(), b.into(), c.into()));
         let e = i16x4::new(10, 10, 10, 10);
         assert_eq!(r, e);
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vaba_s32() {
+    fn test_vaba_s32() {
         let a = i32x2::new(1, 2);
         let b = i32x2::new(1, 1);
         let c = i32x2::new(10, 9);
-        let r: i32x2 = transmute(vaba_s32(transmute(a), transmute(b), transmute(c)));
+        let r = i32x2::from(vaba_s32(a.into(), b.into(), c.into()));
         let e = i32x2::new(10, 10);
         assert_eq!(r, e);
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vaba_u8() {
+    fn test_vaba_u8() {
         let a = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
         let b = u8x8::new(1, 1, 1, 1, 1, 1, 1, 1);
         let c = u8x8::new(10, 9, 8, 7, 6, 5, 4, 3);
-        let r: u8x8 = transmute(vaba_u8(transmute(a), transmute(b), transmute(c)));
+        let r = u8x8::from(vaba_u8(a.into(), b.into(), c.into()));
         let e = u8x8::new(10, 10, 10, 10, 10, 10, 10, 10);
         assert_eq!(r, e);
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vaba_u16() {
+    fn test_vaba_u16() {
         let a = u16x4::new(1, 2, 3, 4);
         let b = u16x4::new(1, 1, 1, 1);
         let c = u16x4::new(10, 9, 8, 7);
-        let r: u16x4 = transmute(vaba_u16(transmute(a), transmute(b), transmute(c)));
+        let r = u16x4::from(vaba_u16(a.into(), b.into(), c.into()));
         let e = u16x4::new(10, 10, 10, 10);
         assert_eq!(r, e);
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vaba_u32() {
+    fn test_vaba_u32() {
         let a = u32x2::new(1, 2);
         let b = u32x2::new(1, 1);
         let c = u32x2::new(10, 9);
-        let r: u32x2 = transmute(vaba_u32(transmute(a), transmute(b), transmute(c)));
+        let r = u32x2::from(vaba_u32(a.into(), b.into(), c.into()));
         let e = u32x2::new(10, 10);
         assert_eq!(r, e);
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vabaq_s8() {
+    fn test_vabaq_s8() {
         let a = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 8, 7, 6, 5, 4, 3, 2);
         let b = i8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
         let c = i8x16::new(10, 9, 8, 7, 6, 5, 4, 3, 12, 13, 14, 15, 16, 17, 18, 19);
-        let r: i8x16 = transmute(vabaq_s8(transmute(a), transmute(b), transmute(c)));
+        let r = i8x16::from(vabaq_s8(a.into(), b.into(), c.into()));
         let e = i8x16::new(
             10, 10, 10, 10, 10, 10, 10, 10, 20, 20, 20, 20, 20, 20, 20, 20,
         );
         assert_eq!(r, e);
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vabaq_s16() {
+    fn test_vabaq_s16() {
         let a = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
         let b = i16x8::new(1, 1, 1, 1, 1, 1, 1, 1);
         let c = i16x8::new(10, 9, 8, 7, 6, 5, 4, 3);
-        let r: i16x8 = transmute(vabaq_s16(transmute(a), transmute(b), transmute(c)));
+        let r = i16x8::from(vabaq_s16(a.into(), b.into(), c.into()));
         let e = i16x8::new(10, 10, 10, 10, 10, 10, 10, 10);
         assert_eq!(r, e);
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vabaq_s32() {
+    fn test_vabaq_s32() {
         let a = i32x4::new(1, 2, 3, 4);
         let b = i32x4::new(1, 1, 1, 1);
         let c = i32x4::new(10, 9, 8, 7);
-        let r: i32x4 = transmute(vabaq_s32(transmute(a), transmute(b), transmute(c)));
+        let r = i32x4::from(vabaq_s32(a.into(), b.into(), c.into()));
         let e = i32x4::new(10, 10, 10, 10);
         assert_eq!(r, e);
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vabaq_u8() {
+    fn test_vabaq_u8() {
         let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 8, 7, 6, 5, 4, 3, 2);
         let b = u8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
         let c = u8x16::new(10, 9, 8, 7, 6, 5, 4, 3, 12, 13, 14, 15, 16, 17, 18, 19);
-        let r: u8x16 = transmute(vabaq_u8(transmute(a), transmute(b), transmute(c)));
+        let r = u8x16::from(vabaq_u8(a.into(), b.into(), c.into()));
         let e = u8x16::new(
             10, 10, 10, 10, 10, 10, 10, 10, 20, 20, 20, 20, 20, 20, 20, 20,
         );
         assert_eq!(r, e);
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vabaq_u16() {
+    fn test_vabaq_u16() {
         let a = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
         let b = u16x8::new(1, 1, 1, 1, 1, 1, 1, 1);
         let c = u16x8::new(10, 9, 8, 7, 6, 5, 4, 3);
-        let r: u16x8 = transmute(vabaq_u16(transmute(a), transmute(b), transmute(c)));
+        let r = u16x8::from(vabaq_u16(a.into(), b.into(), c.into()));
         let e = u16x8::new(10, 10, 10, 10, 10, 10, 10, 10);
         assert_eq!(r, e);
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vabaq_u32() {
+    fn test_vabaq_u32() {
         let a = u32x4::new(1, 2, 3, 4);
         let b = u32x4::new(1, 1, 1, 1);
         let c = u32x4::new(10, 9, 8, 7);
-        let r: u32x4 = transmute(vabaq_u32(transmute(a), transmute(b), transmute(c)));
+        let r = u32x4::from(vabaq_u32(a.into(), b.into(), c.into()));
         let e = u32x4::new(10, 10, 10, 10);
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrev16_s8() {
+    fn test_vrev16_s8() {
         let a = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
         let r = i8x8::new(1, 0, 3, 2, 5, 4, 7, 6);
-        let e: i8x8 = transmute(vrev16_s8(transmute(a)));
+        let e = i8x8::from(vrev16_s8(a.into()));
         assert_eq!(r, e);
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrev16q_s8() {
+    fn test_vrev16q_s8() {
         let a = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let r = i8x16::new(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14);
-        let e: i8x16 = transmute(vrev16q_s8(transmute(a)));
+        let e = i8x16::from(vrev16q_s8(a.into()));
         assert_eq!(r, e);
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrev16_u8() {
+    fn test_vrev16_u8() {
         let a = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
         let r = u8x8::new(1, 0, 3, 2, 5, 4, 7, 6);
-        let e: u8x8 = transmute(vrev16_u8(transmute(a)));
+        let e = u8x8::from(vrev16_u8(a.into()));
         assert_eq!(r, e);
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrev16q_u8() {
+    fn test_vrev16q_u8() {
         let a = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let r = u8x16::new(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14);
-        let e: u8x16 = transmute(vrev16q_u8(transmute(a)));
+        let e = u8x16::from(vrev16q_u8(a.into()));
         assert_eq!(r, e);
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrev16_p8() {
-        let a = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let r = i8x8::new(1, 0, 3, 2, 5, 4, 7, 6);
-        let e: i8x8 = transmute(vrev16_p8(transmute(a)));
+    fn test_vrev16_p8() {
+        let a = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
+        let r = u8x8::new(1, 0, 3, 2, 5, 4, 7, 6);
+        let e = u8x8::from(vrev16_p8(a.into()));
         assert_eq!(r, e);
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrev16q_p8() {
+    fn test_vrev16q_p8() {
         let a = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let r = u8x16::new(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14);
-        let e: u8x16 = transmute(vrev16q_p8(transmute(a)));
+        let e = u8x16::from(vrev16q_p8(a.into()));
         assert_eq!(r, e);
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrev32_s8() {
+    fn test_vrev32_s8() {
         let a = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
         let r = i8x8::new(3, 2, 1, 0, 7, 6, 5, 4);
-        let e: i8x8 = transmute(vrev32_s8(transmute(a)));
+        let e = i8x8::from(vrev32_s8(a.into()));
         assert_eq!(r, e);
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrev32q_s8() {
+    fn test_vrev32q_s8() {
         let a = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let r = i8x16::new(3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12);
-        let e: i8x16 = transmute(vrev32q_s8(transmute(a)));
+        let e = i8x16::from(vrev32q_s8(a.into()));
         assert_eq!(r, e);
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrev32_u8() {
+    fn test_vrev32_u8() {
         let a = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
         let r = u8x8::new(3, 2, 1, 0, 7, 6, 5, 4);
-        let e: u8x8 = transmute(vrev32_u8(transmute(a)));
+        let e = u8x8::from(vrev32_u8(a.into()));
         assert_eq!(r, e);
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrev32q_u8() {
+    fn test_vrev32q_u8() {
         let a = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let r = u8x16::new(3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12);
-        let e: u8x16 = transmute(vrev32q_u8(transmute(a)));
+        let e = u8x16::from(vrev32q_u8(a.into()));
         assert_eq!(r, e);
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrev32_s16() {
+    fn test_vrev32_s16() {
         let a = i16x4::new(0, 1, 2, 3);
         let r = i16x4::new(1, 0, 3, 2);
-        let e: i16x4 = transmute(vrev32_s16(transmute(a)));
+        let e = i16x4::from(vrev32_s16(a.into()));
         assert_eq!(r, e);
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrev32q_s16() {
+    fn test_vrev32q_s16() {
         let a = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
         let r = i16x8::new(1, 0, 3, 2, 5, 4, 7, 6);
-        let e: i16x8 = transmute(vrev32q_s16(transmute(a)));
+        let e = i16x8::from(vrev32q_s16(a.into()));
         assert_eq!(r, e);
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrev32_p16() {
-        let a = i16x4::new(0, 1, 2, 3);
-        let r = i16x4::new(1, 0, 3, 2);
-        let e: i16x4 = transmute(vrev32_p16(transmute(a)));
+    fn test_vrev32_p16() {
+        let a = u16x4::new(0, 1, 2, 3);
+        let r = u16x4::new(1, 0, 3, 2);
+        let e = u16x4::from(vrev32_p16(a.into()));
         assert_eq!(r, e);
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrev32q_p16() {
-        let a = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
-        let r = i16x8::new(1, 0, 3, 2, 5, 4, 7, 6);
-        let e: i16x8 = transmute(vrev32q_p16(transmute(a)));
+    fn test_vrev32q_p16() {
+        let a = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
+        let r = u16x8::new(1, 0, 3, 2, 5, 4, 7, 6);
+        let e = u16x8::from(vrev32q_p16(a.into()));
         assert_eq!(r, e);
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrev32_u16() {
+    fn test_vrev32_u16() {
         let a = u16x4::new(0, 1, 2, 3);
         let r = u16x4::new(1, 0, 3, 2);
-        let e: u16x4 = transmute(vrev32_u16(transmute(a)));
+        let e = u16x4::from(vrev32_u16(a.into()));
         assert_eq!(r, e);
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrev32q_u16() {
+    fn test_vrev32q_u16() {
         let a = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
         let r = u16x8::new(1, 0, 3, 2, 5, 4, 7, 6);
-        let e: u16x8 = transmute(vrev32q_u16(transmute(a)));
+        let e = u16x8::from(vrev32q_u16(a.into()));
         assert_eq!(r, e);
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrev32_p8() {
+    fn test_vrev32_p8() {
         let a = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
         let r = u8x8::new(3, 2, 1, 0, 7, 6, 5, 4);
-        let e: u8x8 = transmute(vrev32_p8(transmute(a)));
+        let e = u8x8::from(vrev32_p8(a.into()));
         assert_eq!(r, e);
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrev32q_p8() {
+    fn test_vrev32q_p8() {
         let a = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let r = u8x16::new(3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12);
-        let e: u8x16 = transmute(vrev32q_p8(transmute(a)));
+        let e = u8x16::from(vrev32q_p8(a.into()));
         assert_eq!(r, e);
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrev64_s8() {
+    fn test_vrev64_s8() {
         let a = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
         let r = i8x8::new(7, 6, 5, 4, 3, 2, 1, 0);
-        let e: i8x8 = transmute(vrev64_s8(transmute(a)));
+        let e = i8x8::from(vrev64_s8(a.into()));
         assert_eq!(r, e);
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrev64q_s8() {
+    fn test_vrev64q_s8() {
         let a = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let r = i8x16::new(7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8);
-        let e: i8x16 = transmute(vrev64q_s8(transmute(a)));
+        let e = i8x16::from(vrev64q_s8(a.into()));
         assert_eq!(r, e);
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrev64_s16() {
+    fn test_vrev64_s16() {
         let a = i16x4::new(0, 1, 2, 3);
         let r = i16x4::new(3, 2, 1, 0);
-        let e: i16x4 = transmute(vrev64_s16(transmute(a)));
+        let e = i16x4::from(vrev64_s16(a.into()));
         assert_eq!(r, e);
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrev64q_s16() {
+    fn test_vrev64q_s16() {
         let a = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
         let r = i16x8::new(3, 2, 1, 0, 7, 6, 5, 4);
-        let e: i16x8 = transmute(vrev64q_s16(transmute(a)));
+        let e = i16x8::from(vrev64q_s16(a.into()));
         assert_eq!(r, e);
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrev64_s32() {
+    fn test_vrev64_s32() {
         let a = i32x2::new(0, 1);
         let r = i32x2::new(1, 0);
-        let e: i32x2 = transmute(vrev64_s32(transmute(a)));
+        let e = i32x2::from(vrev64_s32(a.into()));
         assert_eq!(r, e);
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrev64q_s32() {
+    fn test_vrev64q_s32() {
         let a = i32x4::new(0, 1, 2, 3);
         let r = i32x4::new(1, 0, 3, 2);
-        let e: i32x4 = transmute(vrev64q_s32(transmute(a)));
+        let e = i32x4::from(vrev64q_s32(a.into()));
         assert_eq!(r, e);
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrev64_u8() {
+    fn test_vrev64_u8() {
         let a = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
         let r = u8x8::new(7, 6, 5, 4, 3, 2, 1, 0);
-        let e: u8x8 = transmute(vrev64_u8(transmute(a)));
+        let e = u8x8::from(vrev64_u8(a.into()));
         assert_eq!(r, e);
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrev64q_u8() {
+    fn test_vrev64q_u8() {
         let a = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let r = u8x16::new(7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8);
-        let e: u8x16 = transmute(vrev64q_u8(transmute(a)));
+        let e = u8x16::from(vrev64q_u8(a.into()));
         assert_eq!(r, e);
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrev64_u16() {
+    fn test_vrev64_u16() {
         let a = u16x4::new(0, 1, 2, 3);
         let r = u16x4::new(3, 2, 1, 0);
-        let e: u16x4 = transmute(vrev64_u16(transmute(a)));
+        let e = u16x4::from(vrev64_u16(a.into()));
         assert_eq!(r, e);
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrev64q_u16() {
+    fn test_vrev64q_u16() {
         let a = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
         let r = u16x8::new(3, 2, 1, 0, 7, 6, 5, 4);
-        let e: u16x8 = transmute(vrev64q_u16(transmute(a)));
+        let e = u16x8::from(vrev64q_u16(a.into()));
         assert_eq!(r, e);
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrev64_u32() {
+    fn test_vrev64_u32() {
         let a = u32x2::new(0, 1);
         let r = u32x2::new(1, 0);
-        let e: u32x2 = transmute(vrev64_u32(transmute(a)));
+        let e = u32x2::from(vrev64_u32(a.into()));
         assert_eq!(r, e);
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrev64q_u32() {
+    fn test_vrev64q_u32() {
         let a = u32x4::new(0, 1, 2, 3);
         let r = u32x4::new(1, 0, 3, 2);
-        let e: u32x4 = transmute(vrev64q_u32(transmute(a)));
+        let e = u32x4::from(vrev64q_u32(a.into()));
         assert_eq!(r, e);
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrev64_f32() {
+    fn test_vrev64_f32() {
         let a = f32x2::new(1.0, 2.0);
         let r = f32x2::new(2.0, 1.0);
-        let e: f32x2 = transmute(vrev64_f32(transmute(a)));
+        let e = f32x2::from(vrev64_f32(a.into()));
         assert_eq!(r, e);
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrev64q_f32() {
+    fn test_vrev64q_f32() {
         let a = f32x4::new(1.0, 2.0, -2.0, -1.0);
         let r = f32x4::new(2.0, 1.0, -1.0, -2.0);
-        let e: f32x4 = transmute(vrev64q_f32(transmute(a)));
+        let e = f32x4::from(vrev64q_f32(a.into()));
         assert_eq!(r, e);
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrev64_p8() {
+    fn test_vrev64_p8() {
         let a = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
         let r = u8x8::new(7, 6, 5, 4, 3, 2, 1, 0);
-        let e: u8x8 = transmute(vrev64_p8(transmute(a)));
+        let e = u8x8::from(vrev64_p8(a.into()));
         assert_eq!(r, e);
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrev64q_p8() {
+    fn test_vrev64q_p8() {
         let a = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let r = u8x16::new(7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8);
-        let e: u8x16 = transmute(vrev64q_p8(transmute(a)));
+        let e = u8x16::from(vrev64q_p8(a.into()));
         assert_eq!(r, e);
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrev64_p16() {
+    fn test_vrev64_p16() {
         let a = u16x4::new(0, 1, 2, 3);
         let r = u16x4::new(3, 2, 1, 0);
-        let e: u16x4 = transmute(vrev64_p16(transmute(a)));
+        let e = u16x4::from(vrev64_p16(a.into()));
         assert_eq!(r, e);
     }
+
     #[simd_test(enable = "neon")]
-    unsafe fn test_vrev64q_p16() {
+    fn test_vrev64q_p16() {
         let a = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
         let r = u16x8::new(3, 2, 1, 0, 7, 6, 5, 4);
-        let e: u16x8 = transmute(vrev64q_p16(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    macro_rules! test_vcombine {
-        ($test_id:ident => $fn_id:ident ([$($a:expr),*], [$($b:expr),*])) => {
-            #[allow(unused_assignments)]
-            #[simd_test(enable = "neon")]
-            unsafe fn $test_id() {
-                let a = [$($a),*];
-                let b = [$($b),*];
-                let e = [$($a),* $(, $b)*];
-                let c = $fn_id(transmute(a), transmute(b));
-                let mut d = e;
-                d = transmute(c);
-                assert_eq!(d, e);
-            }
-        }
+        let e = u16x8::from(vrev64q_p16(a.into()));
+        assert_eq!(r, e);
     }
 
-    test_vcombine!(test_vcombine_s8 => vcombine_s8([3_i8, -4, 5, -6, 7, 8, 9, 10], [13_i8, -14, 15, -16, 17, 18, 19, 110]));
-    test_vcombine!(test_vcombine_u8 => vcombine_u8([3_u8, 4, 5, 6, 7, 8, 9, 10], [13_u8, 14, 15, 16, 17, 18, 19, 110]));
-    test_vcombine!(test_vcombine_p8 => vcombine_p8([3_u8, 4, 5, 6, 7, 8, 9, 10], [13_u8, 14, 15, 16, 17, 18, 19, 110]));
-
-    test_vcombine!(test_vcombine_s16 => vcombine_s16([3_i16, -4, 5, -6], [13_i16, -14, 15, -16]));
-    test_vcombine!(test_vcombine_u16 => vcombine_u16([3_u16, 4, 5, 6], [13_u16, 14, 15, 16]));
-    test_vcombine!(test_vcombine_p16 => vcombine_p16([3_u16, 4, 5, 6], [13_u16, 14, 15, 16]));
     #[cfg(not(target_arch = "arm64ec"))]
     mod fp16 {
         use super::*;
-        test_vcombine!(test_vcombine_f16 => vcombine_f16([3_f16, 4., 5., 6.],
-        [13_f16, 14., 15., 16.]));
-    }
+        #[simd_test(enable = "neon,fp16")]
+        fn test_vld1_lane_f16() {
+            let a = f16x4::new(0., 1., 2., 3.);
+            let elem: f16 = 42.;
+            let e = f16x4::new(0., 1., 2., 42.);
+            let r = unsafe { f16x4::from(vld1_lane_f16::<3>(&elem, a.into())) };
+            assert_eq!(r, e)
+        }
 
-    test_vcombine!(test_vcombine_s32 => vcombine_s32([3_i32, -4], [13_i32, -14]));
-    test_vcombine!(test_vcombine_u32 => vcombine_u32([3_u32, 4], [13_u32, 14]));
-    // note: poly32x4 does not exist, and neither does vcombine_p32
-    test_vcombine!(test_vcombine_f32 => vcombine_f32([3_f32, -4.], [13_f32, -14.]));
+        #[simd_test(enable = "neon,fp16")]
+        fn test_vld1q_lane_f16() {
+            let a = f16x8::new(0., 1., 2., 3., 4., 5., 6., 7.);
+            let elem: f16 = 42.;
+            let e = f16x8::new(0., 1., 2., 3., 4., 5., 6., 42.);
+            let r = unsafe { f16x8::from(vld1q_lane_f16::<7>(&elem, a.into())) };
+            assert_eq!(r, e)
+        }
 
-    test_vcombine!(test_vcombine_s64 => vcombine_s64([-3_i64], [13_i64]));
-    test_vcombine!(test_vcombine_u64 => vcombine_u64([3_u64], [13_u64]));
-    test_vcombine!(test_vcombine_p64 => vcombine_p64([3_u64], [13_u64]));
-    #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))]
-    test_vcombine!(test_vcombine_f64 => vcombine_f64([-3_f64], [13_f64]));
+        #[simd_test(enable = "neon,fp16")]
+        fn test_vld1_dup_f16() {
+            let elem: f16 = 42.;
+            let e = f16x4::new(42., 42., 42., 42.);
+            let r = unsafe { f16x4::from(vld1_dup_f16(&elem)) };
+            assert_eq!(r, e)
+        }
+
+        #[simd_test(enable = "neon,fp16")]
+        fn test_vld1q_dup_f16() {
+            let elem: f16 = 42.;
+            let e = f16x8::new(42., 42., 42., 42., 42., 42., 42., 42.);
+            let r = unsafe { f16x8::from(vld1q_dup_f16(&elem)) };
+            assert_eq!(r, e)
+        }
+    }
+
+    macro_rules! lane_wide_store_load_roundtrip {
+        ($elem_ty:ty, $len:expr, $idx:expr, $vec_ty:ty, $store:ident, $load:ident) => {
+            let vals: [$elem_ty; $len] = crate::array::from_fn(|i| i as $elem_ty);
+            let a: $vec_ty = transmute(vals);
+            let mut tmp = [0 as $elem_ty; 4];
+            $store::<$idx>(tmp.as_mut_ptr().cast(), a);
+            let r: $vec_ty = $load::<$idx>(tmp.as_ptr().cast(), a);
+            let out: [$elem_ty; $len] = transmute(r);
+            assert_eq!(out, vals);
+        };
+    }
+
+    macro_rules! lane_wide_store_load_roundtrip_neon {
+        ($( $name:ident $args:tt);* $(;)?) => {
+            $(
+                #[cfg_attr(miri, ignore)] // uses unsupported vendor intrinsics
+                #[simd_test(enable = "neon")]
+                unsafe fn $name() {
+                    lane_wide_store_load_roundtrip! $args;
+                }
+            )*
+        };
+    }
+
+    macro_rules! lane_wide_store_load_roundtrip_fp16 {
+        ($( $name:ident $args:tt);* $(;)?) => {
+            $(
+                #[cfg_attr(miri, ignore)] // uses unsupported vendor intrinsics
+                #[simd_test(enable = "neon,fp16")]
+                #[cfg(not(target_arch = "arm64ec"))]
+                unsafe fn $name() {
+                    lane_wide_store_load_roundtrip! $args;
+                }
+            )*
+        };
+    }
+
+    lane_wide_store_load_roundtrip_neon! {
+        test_vld2_lane_s8(i8, 16, 7, int8x8x2_t, vst2_lane_s8, vld2_lane_s8);
+        test_vld3_lane_s8(i8, 24, 7, int8x8x3_t, vst3_lane_s8, vld3_lane_s8);
+        test_vld4_lane_s8(i8, 32, 7, int8x8x4_t, vst4_lane_s8, vld4_lane_s8);
+
+        test_vld2_lane_u8(u8, 16, 7, uint8x8x2_t, vst2_lane_u8, vld2_lane_u8);
+        test_vld3_lane_u8(u8, 24, 7, uint8x8x3_t, vst3_lane_u8, vld3_lane_u8);
+        test_vld4_lane_u8(u8, 32, 7, uint8x8x4_t, vst4_lane_u8, vld4_lane_u8);
+
+        test_vld2_lane_s16(i16, 8, 3, int16x4x2_t, vst2_lane_s16, vld2_lane_s16);
+        test_vld3_lane_s16(i16, 12, 3, int16x4x3_t, vst3_lane_s16, vld3_lane_s16);
+        test_vld4_lane_s16(i16, 16, 3, int16x4x4_t, vst4_lane_s16, vld4_lane_s16);
+        test_vld2q_lane_s16(i16, 16, 7, int16x8x2_t, vst2q_lane_s16, vld2q_lane_s16);
+        test_vld3q_lane_s16(i16, 24, 7, int16x8x3_t, vst3q_lane_s16, vld3q_lane_s16);
+        test_vld4q_lane_s16(i16, 32, 7, int16x8x4_t, vst4q_lane_s16, vld4q_lane_s16);
+
+        test_vld2_lane_u16(u16, 8, 3, uint16x4x2_t, vst2_lane_u16, vld2_lane_u16);
+        test_vld3_lane_u16(u16, 12, 3, uint16x4x3_t, vst3_lane_u16, vld3_lane_u16);
+        test_vld4_lane_u16(u16, 16, 3, uint16x4x4_t, vst4_lane_u16, vld4_lane_u16);
+        test_vld2q_lane_u16(u16, 16, 7, uint16x8x2_t, vst2q_lane_u16, vld2q_lane_u16);
+        test_vld3q_lane_u16(u16, 24, 7, uint16x8x3_t, vst3q_lane_u16, vld3q_lane_u16);
+        test_vld4q_lane_u16(u16, 32, 7, uint16x8x4_t, vst4q_lane_u16, vld4q_lane_u16);
+
+        test_vld2_lane_s32(i32, 4, 1, int32x2x2_t, vst2_lane_s32, vld2_lane_s32);
+        test_vld3_lane_s32(i32, 6, 1, int32x2x3_t, vst3_lane_s32, vld3_lane_s32);
+        test_vld4_lane_s32(i32, 8, 1, int32x2x4_t, vst4_lane_s32, vld4_lane_s32);
+        test_vld2q_lane_s32(i32, 8, 3, int32x4x2_t, vst2q_lane_s32, vld2q_lane_s32);
+        test_vld3q_lane_s32(i32, 12, 3, int32x4x3_t, vst3q_lane_s32, vld3q_lane_s32);
+        test_vld4q_lane_s32(i32, 16, 3, int32x4x4_t, vst4q_lane_s32, vld4q_lane_s32);
+
+        test_vld2_lane_u32(u32, 4, 1, uint32x2x2_t, vst2_lane_u32, vld2_lane_u32);
+        test_vld3_lane_u32(u32, 6, 1, uint32x2x3_t, vst3_lane_u32, vld3_lane_u32);
+        test_vld4_lane_u32(u32, 8, 1, uint32x2x4_t, vst4_lane_u32, vld4_lane_u32);
+        test_vld2q_lane_u32(u32, 8, 3, uint32x4x2_t, vst2q_lane_u32, vld2q_lane_u32);
+        test_vld3q_lane_u32(u32, 12, 3, uint32x4x3_t, vst3q_lane_u32, vld3q_lane_u32);
+        test_vld4q_lane_u32(u32, 16, 3, uint32x4x4_t, vst4q_lane_u32, vld4q_lane_u32);
+
+        test_vld2_lane_f32(f32, 4, 1, float32x2x2_t, vst2_lane_f32, vld2_lane_f32);
+        test_vld3_lane_f32(f32, 6, 1, float32x2x3_t, vst3_lane_f32, vld3_lane_f32);
+        test_vld4_lane_f32(f32, 8, 1, float32x2x4_t, vst4_lane_f32, vld4_lane_f32);
+        test_vld2q_lane_f32(f32, 8, 3, float32x4x2_t, vst2q_lane_f32, vld2q_lane_f32);
+        test_vld3q_lane_f32(f32, 12, 3, float32x4x3_t, vst3q_lane_f32, vld3q_lane_f32);
+        test_vld4q_lane_f32(f32, 16, 3, float32x4x4_t, vst4q_lane_f32, vld4q_lane_f32);
+    }
+
+    lane_wide_store_load_roundtrip_fp16! {
+        test_vld2_lane_f16(f16, 8, 3, float16x4x2_t, vst2_lane_f16, vld2_lane_f16);
+        test_vld3_lane_f16(f16, 12, 3, float16x4x3_t, vst3_lane_f16, vld3_lane_f16);
+        test_vld4_lane_f16(f16, 16, 3, float16x4x4_t, vst4_lane_f16, vld4_lane_f16);
+        test_vld2q_lane_f16(f16, 16, 7, float16x8x2_t, vst2q_lane_f16, vld2q_lane_f16);
+        test_vld3q_lane_f16(f16, 24, 7, float16x8x3_t, vst3q_lane_f16, vld3q_lane_f16);
+        test_vld4q_lane_f16(f16, 32, 7, float16x8x4_t, vst4q_lane_f16, vld4q_lane_f16);
+    }
 }
 
 #[cfg(all(test, target_arch = "arm"))]
diff --git a/crates/core_arch/src/arm_shared/neon/store_tests.rs b/crates/core_arch/src/arm_shared/neon/store_tests.rs
index 6b5d4a19ad..6eb60e4c78 100644
--- a/crates/core_arch/src/arm_shared/neon/store_tests.rs
+++ b/crates/core_arch/src/arm_shared/neon/store_tests.rs
@@ -14,11 +14,13 @@ use crate::core_arch::simd::*;
 use stdarch_test::simd_test;
 
 #[simd_test(enable = "neon")]
-unsafe fn test_vst1_s8() {
+fn test_vst1_s8() {
     let mut vals = [0_i8; 9];
     let a = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
 
-    vst1_s8(vals[1..].as_mut_ptr(), transmute(a));
+    unsafe {
+        vst1_s8(vals[1..].as_mut_ptr(), a.into());
+    }
 
     assert_eq!(vals[0], 0);
     assert_eq!(vals[1], 1);
@@ -32,11 +34,13 @@ unsafe fn test_vst1_s8() {
 }
 
 #[simd_test(enable = "neon")]
-unsafe fn test_vst1q_s8() {
+fn test_vst1q_s8() {
     let mut vals = [0_i8; 17];
     let a = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
 
-    vst1q_s8(vals[1..].as_mut_ptr(), transmute(a));
+    unsafe {
+        vst1q_s8(vals[1..].as_mut_ptr(), a.into());
+    }
 
     assert_eq!(vals[0], 0);
     assert_eq!(vals[1], 1);
@@ -58,11 +62,13 @@ unsafe fn test_vst1q_s8() {
 }
 
 #[simd_test(enable = "neon")]
-unsafe fn test_vst1_s16() {
+fn test_vst1_s16() {
     let mut vals = [0_i16; 5];
     let a = i16x4::new(1, 2, 3, 4);
 
-    vst1_s16(vals[1..].as_mut_ptr(), transmute(a));
+    unsafe {
+        vst1_s16(vals[1..].as_mut_ptr(), a.into());
+    }
 
     assert_eq!(vals[0], 0);
     assert_eq!(vals[1], 1);
@@ -72,11 +78,13 @@ unsafe fn test_vst1_s16() {
 }
 
 #[simd_test(enable = "neon")]
-unsafe fn test_vst1q_s16() {
+fn test_vst1q_s16() {
     let mut vals = [0_i16; 9];
     let a = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
 
-    vst1q_s16(vals[1..].as_mut_ptr(), transmute(a));
+    unsafe {
+        vst1q_s16(vals[1..].as_mut_ptr(), a.into());
+    }
 
     assert_eq!(vals[0], 0);
     assert_eq!(vals[1], 1);
@@ -90,11 +98,13 @@ unsafe fn test_vst1q_s16() {
 }
 
 #[simd_test(enable = "neon")]
-unsafe fn test_vst1_s32() {
+fn test_vst1_s32() {
     let mut vals = [0_i32; 3];
     let a = i32x2::new(1, 2);
 
-    vst1_s32(vals[1..].as_mut_ptr(), transmute(a));
+    unsafe {
+        vst1_s32(vals[1..].as_mut_ptr(), a.into());
+    }
 
     assert_eq!(vals[0], 0);
     assert_eq!(vals[1], 1);
@@ -102,11 +112,13 @@ unsafe fn test_vst1_s32() {
 }
 
 #[simd_test(enable = "neon")]
-unsafe fn test_vst1q_s32() {
+fn test_vst1q_s32() {
     let mut vals = [0_i32; 5];
     let a = i32x4::new(1, 2, 3, 4);
 
-    vst1q_s32(vals[1..].as_mut_ptr(), transmute(a));
+    unsafe {
+        vst1q_s32(vals[1..].as_mut_ptr(), a.into());
+    }
 
     assert_eq!(vals[0], 0);
     assert_eq!(vals[1], 1);
@@ -116,22 +128,26 @@ unsafe fn test_vst1q_s32() {
 }
 
 #[simd_test(enable = "neon")]
-unsafe fn test_vst1_s64() {
+fn test_vst1_s64() {
     let mut vals = [0_i64; 2];
     let a = i64x1::new(1);
 
-    vst1_s64(vals[1..].as_mut_ptr(), transmute(a));
+    unsafe {
+        vst1_s64(vals[1..].as_mut_ptr(), a.into());
+    }
 
     assert_eq!(vals[0], 0);
     assert_eq!(vals[1], 1);
 }
 
 #[simd_test(enable = "neon")]
-unsafe fn test_vst1q_s64() {
+fn test_vst1q_s64() {
     let mut vals = [0_i64; 3];
     let a = i64x2::new(1, 2);
 
-    vst1q_s64(vals[1..].as_mut_ptr(), transmute(a));
+    unsafe {
+        vst1q_s64(vals[1..].as_mut_ptr(), a.into());
+    }
 
     assert_eq!(vals[0], 0);
     assert_eq!(vals[1], 1);
@@ -139,11 +155,13 @@ unsafe fn test_vst1q_s64() {
 }
 
 #[simd_test(enable = "neon")]
-unsafe fn test_vst1_u8() {
+fn test_vst1_u8() {
     let mut vals = [0_u8; 9];
     let a = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
 
-    vst1_u8(vals[1..].as_mut_ptr(), transmute(a));
+    unsafe {
+        vst1_u8(vals[1..].as_mut_ptr(), a.into());
+    }
 
     assert_eq!(vals[0], 0);
     assert_eq!(vals[1], 1);
@@ -157,11 +175,13 @@ unsafe fn test_vst1_u8() {
 }
 
 #[simd_test(enable = "neon")]
-unsafe fn test_vst1q_u8() {
+fn test_vst1q_u8() {
     let mut vals = [0_u8; 17];
     let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
 
-    vst1q_u8(vals[1..].as_mut_ptr(), transmute(a));
+    unsafe {
+        vst1q_u8(vals[1..].as_mut_ptr(), a.into());
+    }
 
     assert_eq!(vals[0], 0);
     assert_eq!(vals[1], 1);
@@ -183,11 +203,13 @@ unsafe fn test_vst1q_u8() {
 }
 
 #[simd_test(enable = "neon")]
-unsafe fn test_vst1_u16() {
+fn test_vst1_u16() {
     let mut vals = [0_u16; 5];
     let a = u16x4::new(1, 2, 3, 4);
 
-    vst1_u16(vals[1..].as_mut_ptr(), transmute(a));
+    unsafe {
+        vst1_u16(vals[1..].as_mut_ptr(), a.into());
+    }
 
     assert_eq!(vals[0], 0);
     assert_eq!(vals[1], 1);
@@ -197,11 +219,13 @@ unsafe fn test_vst1_u16() {
 }
 
 #[simd_test(enable = "neon")]
-unsafe fn test_vst1q_u16() {
+fn test_vst1q_u16() {
     let mut vals = [0_u16; 9];
     let a = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
 
-    vst1q_u16(vals[1..].as_mut_ptr(), transmute(a));
+    unsafe {
+        vst1q_u16(vals[1..].as_mut_ptr(), a.into());
+    }
 
     assert_eq!(vals[0], 0);
     assert_eq!(vals[1], 1);
@@ -215,11 +239,13 @@ unsafe fn test_vst1q_u16() {
 }
 
 #[simd_test(enable = "neon")]
-unsafe fn test_vst1_u32() {
+fn test_vst1_u32() {
     let mut vals = [0_u32; 3];
     let a = u32x2::new(1, 2);
 
-    vst1_u32(vals[1..].as_mut_ptr(), transmute(a));
+    unsafe {
+        vst1_u32(vals[1..].as_mut_ptr(), a.into());
+    }
 
     assert_eq!(vals[0], 0);
     assert_eq!(vals[1], 1);
@@ -227,11 +253,13 @@ unsafe fn test_vst1_u32() {
 }
 
 #[simd_test(enable = "neon")]
-unsafe fn test_vst1q_u32() {
+fn test_vst1q_u32() {
     let mut vals = [0_u32; 5];
     let a = u32x4::new(1, 2, 3, 4);
 
-    vst1q_u32(vals[1..].as_mut_ptr(), transmute(a));
+    unsafe {
+        vst1q_u32(vals[1..].as_mut_ptr(), a.into());
+    }
 
     assert_eq!(vals[0], 0);
     assert_eq!(vals[1], 1);
@@ -241,22 +269,26 @@ unsafe fn test_vst1q_u32() {
 }
 
 #[simd_test(enable = "neon")]
-unsafe fn test_vst1_u64() {
+fn test_vst1_u64() {
     let mut vals = [0_u64; 2];
     let a = u64x1::new(1);
 
-    vst1_u64(vals[1..].as_mut_ptr(), transmute(a));
+    unsafe {
+        vst1_u64(vals[1..].as_mut_ptr(), a.into());
+    }
 
     assert_eq!(vals[0], 0);
     assert_eq!(vals[1], 1);
 }
 
 #[simd_test(enable = "neon")]
-unsafe fn test_vst1q_u64() {
+fn test_vst1q_u64() {
     let mut vals = [0_u64; 3];
     let a = u64x2::new(1, 2);
 
-    vst1q_u64(vals[1..].as_mut_ptr(), transmute(a));
+    unsafe {
+        vst1q_u64(vals[1..].as_mut_ptr(), a.into());
+    }
 
     assert_eq!(vals[0], 0);
     assert_eq!(vals[1], 1);
@@ -264,11 +296,13 @@ unsafe fn test_vst1q_u64() {
 }
 
 #[simd_test(enable = "neon")]
-unsafe fn test_vst1_p8() {
+fn test_vst1_p8() {
     let mut vals = [0_u8; 9];
     let a = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
 
-    vst1_p8(vals[1..].as_mut_ptr(), transmute(a));
+    unsafe {
+        vst1_p8(vals[1..].as_mut_ptr(), a.into());
+    }
 
     assert_eq!(vals[0], 0);
     assert_eq!(vals[1], 1);
@@ -282,11 +316,13 @@ unsafe fn test_vst1_p8() {
 }
 
 #[simd_test(enable = "neon")]
-unsafe fn test_vst1q_p8() {
+fn test_vst1q_p8() {
     let mut vals = [0_u8; 17];
     let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
 
-    vst1q_p8(vals[1..].as_mut_ptr(), transmute(a));
+    unsafe {
+        vst1q_p8(vals[1..].as_mut_ptr(), a.into());
+    }
 
     assert_eq!(vals[0], 0);
     assert_eq!(vals[1], 1);
@@ -308,11 +344,13 @@ unsafe fn test_vst1q_p8() {
 }
 
 #[simd_test(enable = "neon")]
-unsafe fn test_vst1_p16() {
+fn test_vst1_p16() {
     let mut vals = [0_u16; 5];
     let a = u16x4::new(1, 2, 3, 4);
 
-    vst1_p16(vals[1..].as_mut_ptr(), transmute(a));
+    unsafe {
+        vst1_p16(vals[1..].as_mut_ptr(), a.into());
+    }
 
     assert_eq!(vals[0], 0);
     assert_eq!(vals[1], 1);
@@ -322,11 +360,13 @@ unsafe fn test_vst1_p16() {
 }
 
 #[simd_test(enable = "neon")]
-unsafe fn test_vst1q_p16() {
+fn test_vst1q_p16() {
     let mut vals = [0_u16; 9];
     let a = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
 
-    vst1q_p16(vals[1..].as_mut_ptr(), transmute(a));
+    unsafe {
+        vst1q_p16(vals[1..].as_mut_ptr(), a.into());
+    }
 
     assert_eq!(vals[0], 0);
     assert_eq!(vals[1], 1);
@@ -340,34 +380,78 @@ unsafe fn test_vst1q_p16() {
 }
 
 #[simd_test(enable = "neon,aes")]
-unsafe fn test_vst1_p64() {
+fn test_vst1_p64() {
     let mut vals = [0_u64; 2];
     let a = u64x1::new(1);
 
-    vst1_p64(vals[1..].as_mut_ptr(), transmute(a));
+    unsafe {
+        vst1_p64(vals[1..].as_mut_ptr(), a.into());
+    }
 
     assert_eq!(vals[0], 0);
     assert_eq!(vals[1], 1);
 }
 
 #[simd_test(enable = "neon,aes")]
-unsafe fn test_vst1q_p64() {
+fn test_vst1q_p64() {
     let mut vals = [0_u64; 3];
     let a = u64x2::new(1, 2);
 
-    vst1q_p64(vals[1..].as_mut_ptr(), transmute(a));
+    unsafe {
+        vst1q_p64(vals[1..].as_mut_ptr(), a.into());
+    }
 
     assert_eq!(vals[0], 0);
     assert_eq!(vals[1], 1);
     assert_eq!(vals[2], 2);
 }
 
+#[cfg(not(target_arch = "arm64ec"))]
+#[simd_test(enable = "neon,fp16")]
+fn test_vst1_f16() {
+    let mut vals = [0_f16; 5];
+    let a = f16x4::new(1., 2., 3., 4.);
+
+    unsafe {
+        vst1_f16(vals[1..].as_mut_ptr(), a.into());
+    }
+
+    assert_eq!(vals[0], 0.);
+    assert_eq!(vals[1], 1.);
+    assert_eq!(vals[2], 2.);
+    assert_eq!(vals[3], 3.);
+    assert_eq!(vals[4], 4.);
+}
+
+#[cfg(not(target_arch = "arm64ec"))]
+#[simd_test(enable = "neon,fp16")]
+fn test_vst1q_f16() {
+    let mut vals = [0_f16; 9];
+    let a = f16x8::new(1., 2., 3., 4., 5., 6., 7., 8.);
+
+    unsafe {
+        vst1q_f16(vals[1..].as_mut_ptr(), a.into());
+    }
+
+    assert_eq!(vals[0], 0.);
+    assert_eq!(vals[1], 1.);
+    assert_eq!(vals[2], 2.);
+    assert_eq!(vals[3], 3.);
+    assert_eq!(vals[4], 4.);
+    assert_eq!(vals[5], 5.);
+    assert_eq!(vals[6], 6.);
+    assert_eq!(vals[7], 7.);
+    assert_eq!(vals[8], 8.);
+}
+
 #[simd_test(enable = "neon")]
-unsafe fn test_vst1_f32() {
+fn test_vst1_f32() {
     let mut vals = [0_f32; 3];
     let a = f32x2::new(1., 2.);
 
-    vst1_f32(vals[1..].as_mut_ptr(), transmute(a));
+    unsafe {
+        vst1_f32(vals[1..].as_mut_ptr(), a.into());
+    }
 
     assert_eq!(vals[0], 0.);
     assert_eq!(vals[1], 1.);
@@ -375,11 +459,13 @@ unsafe fn test_vst1_f32() {
 }
 
 #[simd_test(enable = "neon")]
-unsafe fn test_vst1q_f32() {
+fn test_vst1q_f32() {
     let mut vals = [0_f32; 5];
     let a = f32x4::new(1., 2., 3., 4.);
 
-    vst1q_f32(vals[1..].as_mut_ptr(), transmute(a));
+    unsafe {
+        vst1q_f32(vals[1..].as_mut_ptr(), a.into());
+    }
 
     assert_eq!(vals[0], 0.);
     assert_eq!(vals[1], 1.);
diff --git a/crates/core_arch/src/arm_shared/neon/table_lookup_tests.rs b/crates/core_arch/src/arm_shared/neon/table_lookup_tests.rs
index 9403855f00..1e0333444b 100644
--- a/crates/core_arch/src/arm_shared/neon/table_lookup_tests.rs
+++ b/crates/core_arch/src/arm_shared/neon/table_lookup_tests.rs
@@ -21,19 +21,19 @@ macro_rules! test_vtbl {
     ) => {
         #[cfg(target_endian = "little")]
         #[simd_test(enable = "neon")]
-        unsafe fn $test_name() {
+        fn $test_name() {
             // create table as array, and transmute it to
             // arm's table type
-            let table: $table_t = mem::transmute([$($table_v),*]);
+            let table: $table_t = unsafe { mem::transmute([$($table_v),*]) };
 
             // For each control vector, perform a table lookup and
             // verify the result:
             $(
                 {
-                    let ctrl: $ctrl_t = mem::transmute([$($ctrl_v),*]);
-                    let result = $fn_id(table, mem::transmute(ctrl));
-                    let result: $ctrl_t = mem::transmute(result);
-                    let expected: $ctrl_t = mem::transmute([$($exp_v),*]);
+                    let ctrl: $ctrl_t = unsafe { mem::transmute([$($ctrl_v),*]) };
+                    let result = $fn_id(table, unsafe { mem::transmute(ctrl) });
+                    let result: $ctrl_t = unsafe { mem::transmute(result) };
+                    let expected: $ctrl_t = unsafe { mem::transmute([$($exp_v),*]) };
                     assert_eq!(result, expected);
                 }
             )*
@@ -171,20 +171,19 @@ macro_rules! test_vtbx {
     ) => {
         #[cfg(target_endian = "little")]
         #[simd_test(enable = "neon")]
-        unsafe fn $test_name() {
+        fn $test_name() {
             // create table as array, and transmute it to
             // arm's table type
-            let table: $table_t = mem::transmute([$($table_v),*]);
-            let ext: $ext_t = mem::transmute([$($ext_v),*]);
-
+            let table: $table_t = unsafe { mem::transmute([$($table_v),*]) };
+            let ext: $ext_t = unsafe { mem::transmute([$($ext_v),*]) };
             // For each control vector, perform a table lookup and
             // verify the result:
             $(
                 {
-                    let ctrl: $ctrl_t = mem::transmute([$($ctrl_v),*]);
-                    let result = $fn_id(ext, table, mem::transmute(ctrl));
-                    let result: $ctrl_t = mem::transmute(result);
-                    let expected: $ctrl_t = mem::transmute([$($exp_v),*]);
+                    let ctrl: $ctrl_t = unsafe { mem::transmute([$($ctrl_v),*]) };
+                    let result = $fn_id(ext, table, unsafe { mem::transmute(ctrl) });
+                    let result: $ctrl_t = unsafe { mem::transmute(result) };
+                    let expected: $ctrl_t = unsafe { mem::transmute([$($exp_v),*]) };
                     assert_eq!(result, expected);
                 }
             )*
diff --git a/crates/core_arch/src/arm_shared/test_support.rs b/crates/core_arch/src/arm_shared/test_support.rs
index e2828f8556..8117b81cd9 100644
--- a/crates/core_arch/src/arm_shared/test_support.rs
+++ b/crates/core_arch/src/arm_shared/test_support.rs
@@ -111,13 +111,13 @@ macro_rules! V_f32 {
 
 macro_rules! to64 {
     ($t : ident) => {
-        |v: $t| -> u64 { transmute(v) }
+        |v: $t| -> u64 { unsafe { transmute(v) } }
     };
 }
 
 macro_rules! to128 {
     ($t : ident) => {
-        |v: $t| -> u128 { transmute(v) }
+        |v: $t| -> u128 { unsafe { transmute(v) } }
     };
 }
 
@@ -158,9 +158,7 @@ pub(crate) fn test<T, U, V, W, X>(
 macro_rules! gen_test_fn {
     ($n: ident, $t: ident, $u: ident, $v: ident, $w: ident, $x: ident, $vals: expr, $fill1: expr, $fill2: expr, $cast: expr) => {
         pub(crate) fn $n(test_fun: fn($v, $v) -> $w, verify_fun: fn($t, $t) -> $u) {
-            unsafe {
-                test::<$t, $u, $v, $w, $x>($vals, $fill1, $fill2, $cast, test_fun, verify_fun)
-            };
+            test::<$t, $u, $v, $w, $x>($vals, $fill1, $fill2, $cast, test_fun, verify_fun);
         }
     };
 }
diff --git a/crates/core_arch/src/core_arch_docs.md b/crates/core_arch/src/core_arch_docs.md
index 6aea2b4618..9b52fb2af1 100644
--- a/crates/core_arch/src/core_arch_docs.md
+++ b/crates/core_arch/src/core_arch_docs.md
@@ -185,6 +185,8 @@ others at:
 * [`x86_64`]
 * [`arm`]
 * [`aarch64`]
+* [`amdgpu`]
+* [`hexagon`]
 * [`riscv32`]
 * [`riscv64`]
 * [`mips`]
@@ -201,6 +203,8 @@ others at:
 [`x86_64`]: ../../core/arch/x86_64/index.html
 [`arm`]: ../../core/arch/arm/index.html
 [`aarch64`]: ../../core/arch/aarch64/index.html
+[`amdgpu`]: ../../core/arch/amdgpu/index.html
+[`hexagon`]: ../../core/arch/hexagon/index.html
 [`riscv32`]: ../../core/arch/riscv32/index.html
 [`riscv64`]: ../../core/arch/riscv64/index.html
 [`mips`]: ../../core/arch/mips/index.html
diff --git a/crates/core_arch/src/hexagon/mod.rs b/crates/core_arch/src/hexagon/mod.rs
new file mode 100644
index 0000000000..c973f7dc62
--- /dev/null
+++ b/crates/core_arch/src/hexagon/mod.rs
@@ -0,0 +1,38 @@
+//! Hexagon architecture intrinsics
+//!
+//! This module contains intrinsics for the Qualcomm Hexagon DSP architecture,
+//! including scalar operations and the Hexagon Vector Extensions (HVX).
+//!
+//! ## Scalar Intrinsics
+//!
+//! The [`scalar`] module provides intrinsics for scalar DSP operations including
+//! arithmetic, multiply, shift, saturate, compare, and floating-point operations.
+//!
+//! ## HVX Vector Intrinsics
+//!
+//! HVX is a wide SIMD architecture designed for high-performance signal processing,
+//! machine learning, and image processing workloads.
+//!
+//! HVX supports two vector length modes:
+//! - 64-byte mode (512-bit vectors): Use the [`v64`] module
+//! - 128-byte mode (1024-bit vectors): Use the [`v128`] module
+//!
+//! Both modules are available unconditionally, but require the appropriate
+//! target features to actually use the intrinsics:
+//! - For 64-byte mode: `-C target-feature=+hvx-length64b`
+//! - For 128-byte mode: `-C target-feature=+hvx-length128b`
+//!
+//! Note that HVX v66 and later default to 128-byte mode, while earlier versions
+//! (v60-v65) default to 64-byte mode.
+
+/// Scalar intrinsics for Hexagon DSP operations
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub mod scalar;
+
+/// HVX intrinsics for 64-byte vector mode (512-bit vectors)
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub mod v64;
+
+/// HVX intrinsics for 128-byte vector mode (1024-bit vectors)
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub mod v128;
diff --git a/crates/core_arch/src/hexagon/scalar.rs b/crates/core_arch/src/hexagon/scalar.rs
new file mode 100644
index 0000000000..c906ec5166
--- /dev/null
+++ b/crates/core_arch/src/hexagon/scalar.rs
@@ -0,0 +1,12181 @@
+//! Hexagon scalar intrinsics
+//!
+//! This module provides intrinsics for scalar (non-HVX) Hexagon DSP operations,
+//! including arithmetic, multiply, shift, saturate, compare, and floating-point
+//! operations.
+//!
+//! [Hexagon V68 Programmer's Reference Manual](https://docs.qualcomm.com/doc/80-N2040-45)
+//!
+//! ## Naming Convention
+//!
+//! Function names preserve the original Q6 naming case because the convention
+//! uses case to distinguish register types:
+//! - `P` (uppercase) = 64-bit register pair (`Word64`)
+//! - `p` (lowercase) = predicate register (`Byte`)
+//!
+//! For example, `Q6_P_and_PP` operates on 64-bit pairs while `Q6_p_and_pp`
+//! operates on predicate registers.
+//!
+//! ## Architecture Versions
+//!
+//! Most scalar intrinsics are available on all Hexagon architectures.
+//! Some intrinsics require specific architecture versions (v60, v62, v65,
+//! v66, v67, v68, or v67+audio) and carry
+//! `#[target_feature(enable = "v68")]` (or the appropriate version).
+//! Enable these with `-C target-feature=+v68` or by setting the target CPU
+//! via `-C target-cpu=hexagonv68`.
+//!
+//! Each version includes all features from previous versions.
+
+#![allow(non_snake_case)]
+
+#[cfg(test)]
+use stdarch_test::assert_instr;
+
+// LLVM intrinsic declarations for Hexagon scalar operations
+#[allow(improper_ctypes)]
+unsafe extern "unadjusted" {
+    #[link_name = "llvm.hexagon.A2.abs"]
+    fn hexagon_A2_abs(_: i32) -> i32;
+    #[link_name = "llvm.hexagon.A2.absp"]
+    fn hexagon_A2_absp(_: i64) -> i64;
+    #[link_name = "llvm.hexagon.A2.abssat"]
+    fn hexagon_A2_abssat(_: i32) -> i32;
+    #[link_name = "llvm.hexagon.A2.add"]
+    fn hexagon_A2_add(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.A2.addh.h16.hh"]
+    fn hexagon_A2_addh_h16_hh(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.A2.addh.h16.hl"]
+    fn hexagon_A2_addh_h16_hl(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.A2.addh.h16.lh"]
+    fn hexagon_A2_addh_h16_lh(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.A2.addh.h16.ll"]
+    fn hexagon_A2_addh_h16_ll(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.A2.addh.h16.sat.hh"]
+    fn hexagon_A2_addh_h16_sat_hh(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.A2.addh.h16.sat.hl"]
+    fn hexagon_A2_addh_h16_sat_hl(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.A2.addh.h16.sat.lh"]
+    fn hexagon_A2_addh_h16_sat_lh(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.A2.addh.h16.sat.ll"]
+    fn hexagon_A2_addh_h16_sat_ll(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.A2.addh.l16.hl"]
+    fn hexagon_A2_addh_l16_hl(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.A2.addh.l16.ll"]
+    fn hexagon_A2_addh_l16_ll(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.A2.addh.l16.sat.hl"]
+    fn hexagon_A2_addh_l16_sat_hl(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.A2.addh.l16.sat.ll"]
+    fn hexagon_A2_addh_l16_sat_ll(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.A2.addi"]
+    fn hexagon_A2_addi(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.A2.addp"]
+    fn hexagon_A2_addp(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.A2.addpsat"]
+    fn hexagon_A2_addpsat(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.A2.addsat"]
+    fn hexagon_A2_addsat(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.A2.addsp"]
+    fn hexagon_A2_addsp(_: i32, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.A2.and"]
+    fn hexagon_A2_and(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.A2.andir"]
+    fn hexagon_A2_andir(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.A2.andp"]
+    fn hexagon_A2_andp(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.A2.aslh"]
+    fn hexagon_A2_aslh(_: i32) -> i32;
+    #[link_name = "llvm.hexagon.A2.asrh"]
+    fn hexagon_A2_asrh(_: i32) -> i32;
+    #[link_name = "llvm.hexagon.A2.combine.hh"]
+    fn hexagon_A2_combine_hh(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.A2.combine.hl"]
+    fn hexagon_A2_combine_hl(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.A2.combine.lh"]
+    fn hexagon_A2_combine_lh(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.A2.combine.ll"]
+    fn hexagon_A2_combine_ll(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.A2.combineii"]
+    fn hexagon_A2_combineii(_: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.A2.combinew"]
+    fn hexagon_A2_combinew(_: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.A2.max"]
+    fn hexagon_A2_max(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.A2.maxp"]
+    fn hexagon_A2_maxp(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.A2.maxu"]
+    fn hexagon_A2_maxu(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.A2.maxup"]
+    fn hexagon_A2_maxup(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.A2.min"]
+    fn hexagon_A2_min(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.A2.minp"]
+    fn hexagon_A2_minp(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.A2.minu"]
+    fn hexagon_A2_minu(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.A2.minup"]
+    fn hexagon_A2_minup(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.A2.neg"]
+    fn hexagon_A2_neg(_: i32) -> i32;
+    #[link_name = "llvm.hexagon.A2.negp"]
+    fn hexagon_A2_negp(_: i64) -> i64;
+    #[link_name = "llvm.hexagon.A2.negsat"]
+    fn hexagon_A2_negsat(_: i32) -> i32;
+    #[link_name = "llvm.hexagon.A2.not"]
+    fn hexagon_A2_not(_: i32) -> i32;
+    #[link_name = "llvm.hexagon.A2.notp"]
+    fn hexagon_A2_notp(_: i64) -> i64;
+    #[link_name = "llvm.hexagon.A2.or"]
+    fn hexagon_A2_or(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.A2.orir"]
+    fn hexagon_A2_orir(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.A2.orp"]
+    fn hexagon_A2_orp(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.A2.roundsat"]
+    fn hexagon_A2_roundsat(_: i64) -> i32;
+    #[link_name = "llvm.hexagon.A2.sat"]
+    fn hexagon_A2_sat(_: i64) -> i32;
+    #[link_name = "llvm.hexagon.A2.satb"]
+    fn hexagon_A2_satb(_: i32) -> i32;
+    #[link_name = "llvm.hexagon.A2.sath"]
+    fn hexagon_A2_sath(_: i32) -> i32;
+    #[link_name = "llvm.hexagon.A2.satub"]
+    fn hexagon_A2_satub(_: i32) -> i32;
+    #[link_name = "llvm.hexagon.A2.satuh"]
+    fn hexagon_A2_satuh(_: i32) -> i32;
+    #[link_name = "llvm.hexagon.A2.sub"]
+    fn hexagon_A2_sub(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.A2.subh.h16.hh"]
+    fn hexagon_A2_subh_h16_hh(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.A2.subh.h16.hl"]
+    fn hexagon_A2_subh_h16_hl(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.A2.subh.h16.lh"]
+    fn hexagon_A2_subh_h16_lh(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.A2.subh.h16.ll"]
+    fn hexagon_A2_subh_h16_ll(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.A2.subh.h16.sat.hh"]
+    fn hexagon_A2_subh_h16_sat_hh(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.A2.subh.h16.sat.hl"]
+    fn hexagon_A2_subh_h16_sat_hl(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.A2.subh.h16.sat.lh"]
+    fn hexagon_A2_subh_h16_sat_lh(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.A2.subh.h16.sat.ll"]
+    fn hexagon_A2_subh_h16_sat_ll(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.A2.subh.l16.hl"]
+    fn hexagon_A2_subh_l16_hl(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.A2.subh.l16.ll"]
+    fn hexagon_A2_subh_l16_ll(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.A2.subh.l16.sat.hl"]
+    fn hexagon_A2_subh_l16_sat_hl(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.A2.subh.l16.sat.ll"]
+    fn hexagon_A2_subh_l16_sat_ll(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.A2.subp"]
+    fn hexagon_A2_subp(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.A2.subri"]
+    fn hexagon_A2_subri(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.A2.subsat"]
+    fn hexagon_A2_subsat(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.A2.svaddh"]
+    fn hexagon_A2_svaddh(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.A2.svaddhs"]
+    fn hexagon_A2_svaddhs(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.A2.svadduhs"]
+    fn hexagon_A2_svadduhs(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.A2.svavgh"]
+    fn hexagon_A2_svavgh(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.A2.svavghs"]
+    fn hexagon_A2_svavghs(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.A2.svnavgh"]
+    fn hexagon_A2_svnavgh(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.A2.svsubh"]
+    fn hexagon_A2_svsubh(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.A2.svsubhs"]
+    fn hexagon_A2_svsubhs(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.A2.svsubuhs"]
+    fn hexagon_A2_svsubuhs(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.A2.swiz"]
+    fn hexagon_A2_swiz(_: i32) -> i32;
+    #[link_name = "llvm.hexagon.A2.sxtb"]
+    fn hexagon_A2_sxtb(_: i32) -> i32;
+    #[link_name = "llvm.hexagon.A2.sxth"]
+    fn hexagon_A2_sxth(_: i32) -> i32;
+    #[link_name = "llvm.hexagon.A2.sxtw"]
+    fn hexagon_A2_sxtw(_: i32) -> i64;
+    #[link_name = "llvm.hexagon.A2.tfr"]
+    fn hexagon_A2_tfr(_: i32) -> i32;
+    #[link_name = "llvm.hexagon.A2.tfrih"]
+    fn hexagon_A2_tfrih(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.A2.tfril"]
+    fn hexagon_A2_tfril(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.A2.tfrp"]
+    fn hexagon_A2_tfrp(_: i64) -> i64;
+    #[link_name = "llvm.hexagon.A2.tfrpi"]
+    fn hexagon_A2_tfrpi(_: i32) -> i64;
+    #[link_name = "llvm.hexagon.A2.tfrsi"]
+    fn hexagon_A2_tfrsi(_: i32) -> i32;
+    #[link_name = "llvm.hexagon.A2.vabsh"]
+    fn hexagon_A2_vabsh(_: i64) -> i64;
+    #[link_name = "llvm.hexagon.A2.vabshsat"]
+    fn hexagon_A2_vabshsat(_: i64) -> i64;
+    #[link_name = "llvm.hexagon.A2.vabsw"]
+    fn hexagon_A2_vabsw(_: i64) -> i64;
+    #[link_name = "llvm.hexagon.A2.vabswsat"]
+    fn hexagon_A2_vabswsat(_: i64) -> i64;
+    #[link_name = "llvm.hexagon.A2.vaddb.map"]
+    fn hexagon_A2_vaddb_map(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.A2.vaddh"]
+    fn hexagon_A2_vaddh(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.A2.vaddhs"]
+    fn hexagon_A2_vaddhs(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.A2.vaddub"]
+    fn hexagon_A2_vaddub(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.A2.vaddubs"]
+    fn hexagon_A2_vaddubs(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.A2.vadduhs"]
+    fn hexagon_A2_vadduhs(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.A2.vaddw"]
+    fn hexagon_A2_vaddw(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.A2.vaddws"]
+    fn hexagon_A2_vaddws(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.A2.vavgh"]
+    fn hexagon_A2_vavgh(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.A2.vavghcr"]
+    fn hexagon_A2_vavghcr(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.A2.vavghr"]
+    fn hexagon_A2_vavghr(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.A2.vavgub"]
+    fn hexagon_A2_vavgub(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.A2.vavgubr"]
+    fn hexagon_A2_vavgubr(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.A2.vavguh"]
+    fn hexagon_A2_vavguh(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.A2.vavguhr"]
+    fn hexagon_A2_vavguhr(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.A2.vavguw"]
+    fn hexagon_A2_vavguw(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.A2.vavguwr"]
+    fn hexagon_A2_vavguwr(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.A2.vavgw"]
+    fn hexagon_A2_vavgw(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.A2.vavgwcr"]
+    fn hexagon_A2_vavgwcr(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.A2.vavgwr"]
+    fn hexagon_A2_vavgwr(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.A2.vcmpbeq"]
+    fn hexagon_A2_vcmpbeq(_: i64, _: i64) -> i32;
+    #[link_name = "llvm.hexagon.A2.vcmpbgtu"]
+    fn hexagon_A2_vcmpbgtu(_: i64, _: i64) -> i32;
+    #[link_name = "llvm.hexagon.A2.vcmpheq"]
+    fn hexagon_A2_vcmpheq(_: i64, _: i64) -> i32;
+    #[link_name = "llvm.hexagon.A2.vcmphgt"]
+    fn hexagon_A2_vcmphgt(_: i64, _: i64) -> i32;
+    #[link_name = "llvm.hexagon.A2.vcmphgtu"]
+    fn hexagon_A2_vcmphgtu(_: i64, _: i64) -> i32;
+    #[link_name = "llvm.hexagon.A2.vcmpweq"]
+    fn hexagon_A2_vcmpweq(_: i64, _: i64) -> i32;
+    #[link_name = "llvm.hexagon.A2.vcmpwgt"]
+    fn hexagon_A2_vcmpwgt(_: i64, _: i64) -> i32;
+    #[link_name = "llvm.hexagon.A2.vcmpwgtu"]
+    fn hexagon_A2_vcmpwgtu(_: i64, _: i64) -> i32;
+    #[link_name = "llvm.hexagon.A2.vconj"]
+    fn hexagon_A2_vconj(_: i64) -> i64;
+    #[link_name = "llvm.hexagon.A2.vmaxb"]
+    fn hexagon_A2_vmaxb(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.A2.vmaxh"]
+    fn hexagon_A2_vmaxh(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.A2.vmaxub"]
+    fn hexagon_A2_vmaxub(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.A2.vmaxuh"]
+    fn hexagon_A2_vmaxuh(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.A2.vmaxuw"]
+    fn hexagon_A2_vmaxuw(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.A2.vmaxw"]
+    fn hexagon_A2_vmaxw(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.A2.vminb"]
+    fn hexagon_A2_vminb(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.A2.vminh"]
+    fn hexagon_A2_vminh(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.A2.vminub"]
+    fn hexagon_A2_vminub(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.A2.vminuh"]
+    fn hexagon_A2_vminuh(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.A2.vminuw"]
+    fn hexagon_A2_vminuw(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.A2.vminw"]
+    fn hexagon_A2_vminw(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.A2.vnavgh"]
+    fn hexagon_A2_vnavgh(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.A2.vnavghcr"]
+    fn hexagon_A2_vnavghcr(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.A2.vnavghr"]
+    fn hexagon_A2_vnavghr(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.A2.vnavgw"]
+    fn hexagon_A2_vnavgw(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.A2.vnavgwcr"]
+    fn hexagon_A2_vnavgwcr(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.A2.vnavgwr"]
+    fn hexagon_A2_vnavgwr(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.A2.vraddub"]
+    fn hexagon_A2_vraddub(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.A2.vraddub.acc"]
+    fn hexagon_A2_vraddub_acc(_: i64, _: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.A2.vrsadub"]
+    fn hexagon_A2_vrsadub(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.A2.vrsadub.acc"]
+    fn hexagon_A2_vrsadub_acc(_: i64, _: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.A2.vsubb.map"]
+    fn hexagon_A2_vsubb_map(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.A2.vsubh"]
+    fn hexagon_A2_vsubh(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.A2.vsubhs"]
+    fn hexagon_A2_vsubhs(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.A2.vsubub"]
+    fn hexagon_A2_vsubub(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.A2.vsububs"]
+    fn hexagon_A2_vsububs(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.A2.vsubuhs"]
+    fn hexagon_A2_vsubuhs(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.A2.vsubw"]
+    fn hexagon_A2_vsubw(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.A2.vsubws"]
+    fn hexagon_A2_vsubws(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.A2.xor"]
+    fn hexagon_A2_xor(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.A2.xorp"]
+    fn hexagon_A2_xorp(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.A2.zxtb"]
+    fn hexagon_A2_zxtb(_: i32) -> i32;
+    #[link_name = "llvm.hexagon.A2.zxth"]
+    fn hexagon_A2_zxth(_: i32) -> i32;
+    #[link_name = "llvm.hexagon.A4.andn"]
+    fn hexagon_A4_andn(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.A4.andnp"]
+    fn hexagon_A4_andnp(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.A4.bitsplit"]
+    fn hexagon_A4_bitsplit(_: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.A4.bitspliti"]
+    fn hexagon_A4_bitspliti(_: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.A4.boundscheck"]
+    fn hexagon_A4_boundscheck(_: i32, _: i64) -> i32;
+    #[link_name = "llvm.hexagon.A4.cmpbeq"]
+    fn hexagon_A4_cmpbeq(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.A4.cmpbeqi"]
+    fn hexagon_A4_cmpbeqi(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.A4.cmpbgt"]
+    fn hexagon_A4_cmpbgt(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.A4.cmpbgti"]
+    fn hexagon_A4_cmpbgti(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.A4.cmpbgtu"]
+    fn hexagon_A4_cmpbgtu(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.A4.cmpbgtui"]
+    fn hexagon_A4_cmpbgtui(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.A4.cmpheq"]
+    fn hexagon_A4_cmpheq(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.A4.cmpheqi"]
+    fn hexagon_A4_cmpheqi(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.A4.cmphgt"]
+    fn hexagon_A4_cmphgt(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.A4.cmphgti"]
+    fn hexagon_A4_cmphgti(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.A4.cmphgtu"]
+    fn hexagon_A4_cmphgtu(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.A4.cmphgtui"]
+    fn hexagon_A4_cmphgtui(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.A4.combineir"]
+    fn hexagon_A4_combineir(_: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.A4.combineri"]
+    fn hexagon_A4_combineri(_: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.A4.cround.ri"]
+    fn hexagon_A4_cround_ri(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.A4.cround.rr"]
+    fn hexagon_A4_cround_rr(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.A4.modwrapu"]
+    fn hexagon_A4_modwrapu(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.A4.orn"]
+    fn hexagon_A4_orn(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.A4.ornp"]
+    fn hexagon_A4_ornp(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.A4.rcmpeq"]
+    fn hexagon_A4_rcmpeq(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.A4.rcmpeqi"]
+    fn hexagon_A4_rcmpeqi(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.A4.rcmpneq"]
+    fn hexagon_A4_rcmpneq(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.A4.rcmpneqi"]
+    fn hexagon_A4_rcmpneqi(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.A4.round.ri"]
+    fn hexagon_A4_round_ri(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.A4.round.ri.sat"]
+    fn hexagon_A4_round_ri_sat(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.A4.round.rr"]
+    fn hexagon_A4_round_rr(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.A4.round.rr.sat"]
+    fn hexagon_A4_round_rr_sat(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.A4.tlbmatch"]
+    fn hexagon_A4_tlbmatch(_: i64, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.A4.vcmpbeq.any"]
+    fn hexagon_A4_vcmpbeq_any(_: i64, _: i64) -> i32;
+    #[link_name = "llvm.hexagon.A4.vcmpbeqi"]
+    fn hexagon_A4_vcmpbeqi(_: i64, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.A4.vcmpbgt"]
+    fn hexagon_A4_vcmpbgt(_: i64, _: i64) -> i32;
+    #[link_name = "llvm.hexagon.A4.vcmpbgti"]
+    fn hexagon_A4_vcmpbgti(_: i64, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.A4.vcmpbgtui"]
+    fn hexagon_A4_vcmpbgtui(_: i64, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.A4.vcmpheqi"]
+    fn hexagon_A4_vcmpheqi(_: i64, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.A4.vcmphgti"]
+    fn hexagon_A4_vcmphgti(_: i64, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.A4.vcmphgtui"]
+    fn hexagon_A4_vcmphgtui(_: i64, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.A4.vcmpweqi"]
+    fn hexagon_A4_vcmpweqi(_: i64, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.A4.vcmpwgti"]
+    fn hexagon_A4_vcmpwgti(_: i64, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.A4.vcmpwgtui"]
+    fn hexagon_A4_vcmpwgtui(_: i64, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.A4.vrmaxh"]
+    fn hexagon_A4_vrmaxh(_: i64, _: i64, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.A4.vrmaxuh"]
+    fn hexagon_A4_vrmaxuh(_: i64, _: i64, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.A4.vrmaxuw"]
+    fn hexagon_A4_vrmaxuw(_: i64, _: i64, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.A4.vrmaxw"]
+    fn hexagon_A4_vrmaxw(_: i64, _: i64, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.A4.vrminh"]
+    fn hexagon_A4_vrminh(_: i64, _: i64, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.A4.vrminuh"]
+    fn hexagon_A4_vrminuh(_: i64, _: i64, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.A4.vrminuw"]
+    fn hexagon_A4_vrminuw(_: i64, _: i64, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.A4.vrminw"]
+    fn hexagon_A4_vrminw(_: i64, _: i64, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.A5.vaddhubs"]
+    fn hexagon_A5_vaddhubs(_: i64, _: i64) -> i32;
+    #[link_name = "llvm.hexagon.C2.all8"]
+    fn hexagon_C2_all8(_: i32) -> i32;
+    #[link_name = "llvm.hexagon.C2.and"]
+    fn hexagon_C2_and(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.C2.andn"]
+    fn hexagon_C2_andn(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.C2.any8"]
+    fn hexagon_C2_any8(_: i32) -> i32;
+    #[link_name = "llvm.hexagon.C2.bitsclr"]
+    fn hexagon_C2_bitsclr(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.C2.bitsclri"]
+    fn hexagon_C2_bitsclri(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.C2.bitsset"]
+    fn hexagon_C2_bitsset(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.C2.cmpeq"]
+    fn hexagon_C2_cmpeq(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.C2.cmpeqi"]
+    fn hexagon_C2_cmpeqi(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.C2.cmpeqp"]
+    fn hexagon_C2_cmpeqp(_: i64, _: i64) -> i32;
+    #[link_name = "llvm.hexagon.C2.cmpgei"]
+    fn hexagon_C2_cmpgei(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.C2.cmpgeui"]
+    fn hexagon_C2_cmpgeui(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.C2.cmpgt"]
+    fn hexagon_C2_cmpgt(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.C2.cmpgti"]
+    fn hexagon_C2_cmpgti(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.C2.cmpgtp"]
+    fn hexagon_C2_cmpgtp(_: i64, _: i64) -> i32;
+    #[link_name = "llvm.hexagon.C2.cmpgtu"]
+    fn hexagon_C2_cmpgtu(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.C2.cmpgtui"]
+    fn hexagon_C2_cmpgtui(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.C2.cmpgtup"]
+    fn hexagon_C2_cmpgtup(_: i64, _: i64) -> i32;
+    #[link_name = "llvm.hexagon.C2.cmplt"]
+    fn hexagon_C2_cmplt(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.C2.cmpltu"]
+    fn hexagon_C2_cmpltu(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.C2.mask"]
+    fn hexagon_C2_mask(_: i32) -> i64;
+    #[link_name = "llvm.hexagon.C2.mux"]
+    fn hexagon_C2_mux(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.C2.muxii"]
+    fn hexagon_C2_muxii(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.C2.muxir"]
+    fn hexagon_C2_muxir(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.C2.muxri"]
+    fn hexagon_C2_muxri(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.C2.not"]
+    fn hexagon_C2_not(_: i32) -> i32;
+    #[link_name = "llvm.hexagon.C2.or"]
+    fn hexagon_C2_or(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.C2.orn"]
+    fn hexagon_C2_orn(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.C2.pxfer.map"]
+    fn hexagon_C2_pxfer_map(_: i32) -> i32;
+    #[link_name = "llvm.hexagon.C2.tfrpr"]
+    fn hexagon_C2_tfrpr(_: i32) -> i32;
+    #[link_name = "llvm.hexagon.C2.tfrrp"]
+    fn hexagon_C2_tfrrp(_: i32) -> i32;
+    #[link_name = "llvm.hexagon.C2.vitpack"]
+    fn hexagon_C2_vitpack(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.C2.vmux"]
+    fn hexagon_C2_vmux(_: i32, _: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.C2.xor"]
+    fn hexagon_C2_xor(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.C4.and.and"]
+    fn hexagon_C4_and_and(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.C4.and.andn"]
+    fn hexagon_C4_and_andn(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.C4.and.or"]
+    fn hexagon_C4_and_or(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.C4.and.orn"]
+    fn hexagon_C4_and_orn(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.C4.cmplte"]
+    fn hexagon_C4_cmplte(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.C4.cmpltei"]
+    fn hexagon_C4_cmpltei(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.C4.cmplteu"]
+    fn hexagon_C4_cmplteu(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.C4.cmplteui"]
+    fn hexagon_C4_cmplteui(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.C4.cmpneq"]
+    fn hexagon_C4_cmpneq(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.C4.cmpneqi"]
+    fn hexagon_C4_cmpneqi(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.C4.fastcorner9"]
+    fn hexagon_C4_fastcorner9(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.C4.fastcorner9.not"]
+    fn hexagon_C4_fastcorner9_not(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.C4.nbitsclr"]
+    fn hexagon_C4_nbitsclr(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.C4.nbitsclri"]
+    fn hexagon_C4_nbitsclri(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.C4.nbitsset"]
+    fn hexagon_C4_nbitsset(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.C4.or.and"]
+    fn hexagon_C4_or_and(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.C4.or.andn"]
+    fn hexagon_C4_or_andn(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.C4.or.or"]
+    fn hexagon_C4_or_or(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.C4.or.orn"]
+    fn hexagon_C4_or_orn(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.F2.conv.d2df"]
+    fn hexagon_F2_conv_d2df(_: i64) -> f64;
+    #[link_name = "llvm.hexagon.F2.conv.d2sf"]
+    fn hexagon_F2_conv_d2sf(_: i64) -> f32;
+    #[link_name = "llvm.hexagon.F2.conv.df2d"]
+    fn hexagon_F2_conv_df2d(_: f64) -> i64;
+    #[link_name = "llvm.hexagon.F2.conv.df2d.chop"]
+    fn hexagon_F2_conv_df2d_chop(_: f64) -> i64;
+    #[link_name = "llvm.hexagon.F2.conv.df2sf"]
+    fn hexagon_F2_conv_df2sf(_: f64) -> f32;
+    #[link_name = "llvm.hexagon.F2.conv.df2ud"]
+    fn hexagon_F2_conv_df2ud(_: f64) -> i64;
+    #[link_name = "llvm.hexagon.F2.conv.df2ud.chop"]
+    fn hexagon_F2_conv_df2ud_chop(_: f64) -> i64;
+    #[link_name = "llvm.hexagon.F2.conv.df2uw"]
+    fn hexagon_F2_conv_df2uw(_: f64) -> i32;
+    #[link_name = "llvm.hexagon.F2.conv.df2uw.chop"]
+    fn hexagon_F2_conv_df2uw_chop(_: f64) -> i32;
+    #[link_name = "llvm.hexagon.F2.conv.df2w"]
+    fn hexagon_F2_conv_df2w(_: f64) -> i32;
+    #[link_name = "llvm.hexagon.F2.conv.df2w.chop"]
+    fn hexagon_F2_conv_df2w_chop(_: f64) -> i32;
+    #[link_name = "llvm.hexagon.F2.conv.sf2d"]
+    fn hexagon_F2_conv_sf2d(_: f32) -> i64;
+    #[link_name = "llvm.hexagon.F2.conv.sf2d.chop"]
+    fn hexagon_F2_conv_sf2d_chop(_: f32) -> i64;
+    #[link_name = "llvm.hexagon.F2.conv.sf2df"]
+    fn hexagon_F2_conv_sf2df(_: f32) -> f64;
+    #[link_name = "llvm.hexagon.F2.conv.sf2ud"]
+    fn hexagon_F2_conv_sf2ud(_: f32) -> i64;
+    #[link_name = "llvm.hexagon.F2.conv.sf2ud.chop"]
+    fn hexagon_F2_conv_sf2ud_chop(_: f32) -> i64;
+    #[link_name = "llvm.hexagon.F2.conv.sf2uw"]
+    fn hexagon_F2_conv_sf2uw(_: f32) -> i32;
+    #[link_name = "llvm.hexagon.F2.conv.sf2uw.chop"]
+    fn hexagon_F2_conv_sf2uw_chop(_: f32) -> i32;
+    #[link_name = "llvm.hexagon.F2.conv.sf2w"]
+    fn hexagon_F2_conv_sf2w(_: f32) -> i32;
+    #[link_name = "llvm.hexagon.F2.conv.sf2w.chop"]
+    fn hexagon_F2_conv_sf2w_chop(_: f32) -> i32;
+    #[link_name = "llvm.hexagon.F2.conv.ud2df"]
+    fn hexagon_F2_conv_ud2df(_: i64) -> f64;
+    #[link_name = "llvm.hexagon.F2.conv.ud2sf"]
+    fn hexagon_F2_conv_ud2sf(_: i64) -> f32;
+    #[link_name = "llvm.hexagon.F2.conv.uw2df"]
+    fn hexagon_F2_conv_uw2df(_: i32) -> f64;
+    #[link_name = "llvm.hexagon.F2.conv.uw2sf"]
+    fn hexagon_F2_conv_uw2sf(_: i32) -> f32;
+    #[link_name = "llvm.hexagon.F2.conv.w2df"]
+    fn hexagon_F2_conv_w2df(_: i32) -> f64;
+    #[link_name = "llvm.hexagon.F2.conv.w2sf"]
+    fn hexagon_F2_conv_w2sf(_: i32) -> f32;
+    #[link_name = "llvm.hexagon.F2.dfclass"]
+    fn hexagon_F2_dfclass(_: f64, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.F2.dfcmpeq"]
+    fn hexagon_F2_dfcmpeq(_: f64, _: f64) -> i32;
+    #[link_name = "llvm.hexagon.F2.dfcmpge"]
+    fn hexagon_F2_dfcmpge(_: f64, _: f64) -> i32;
+    #[link_name = "llvm.hexagon.F2.dfcmpgt"]
+    fn hexagon_F2_dfcmpgt(_: f64, _: f64) -> i32;
+    #[link_name = "llvm.hexagon.F2.dfcmpuo"]
+    fn hexagon_F2_dfcmpuo(_: f64, _: f64) -> i32;
+    #[link_name = "llvm.hexagon.F2.dfimm.n"]
+    fn hexagon_F2_dfimm_n(_: i32) -> f64;
+    #[link_name = "llvm.hexagon.F2.dfimm.p"]
+    fn hexagon_F2_dfimm_p(_: i32) -> f64;
+    #[link_name = "llvm.hexagon.F2.sfadd"]
+    fn hexagon_F2_sfadd(_: f32, _: f32) -> f32;
+    #[link_name = "llvm.hexagon.F2.sfclass"]
+    fn hexagon_F2_sfclass(_: f32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.F2.sfcmpeq"]
+    fn hexagon_F2_sfcmpeq(_: f32, _: f32) -> i32;
+    #[link_name = "llvm.hexagon.F2.sfcmpge"]
+    fn hexagon_F2_sfcmpge(_: f32, _: f32) -> i32;
+    #[link_name = "llvm.hexagon.F2.sfcmpgt"]
+    fn hexagon_F2_sfcmpgt(_: f32, _: f32) -> i32;
+    #[link_name = "llvm.hexagon.F2.sfcmpuo"]
+    fn hexagon_F2_sfcmpuo(_: f32, _: f32) -> i32;
+    #[link_name = "llvm.hexagon.F2.sffixupd"]
+    fn hexagon_F2_sffixupd(_: f32, _: f32) -> f32;
+    #[link_name = "llvm.hexagon.F2.sffixupn"]
+    fn hexagon_F2_sffixupn(_: f32, _: f32) -> f32;
+    #[link_name = "llvm.hexagon.F2.sffixupr"]
+    fn hexagon_F2_sffixupr(_: f32) -> f32;
+    #[link_name = "llvm.hexagon.F2.sffma"]
+    fn hexagon_F2_sffma(_: f32, _: f32, _: f32) -> f32;
+    #[link_name = "llvm.hexagon.F2.sffma.lib"]
+    fn hexagon_F2_sffma_lib(_: f32, _: f32, _: f32) -> f32;
+    #[link_name = "llvm.hexagon.F2.sffma.sc"]
+    fn hexagon_F2_sffma_sc(_: f32, _: f32, _: f32, _: i32) -> f32;
+    #[link_name = "llvm.hexagon.F2.sffms"]
+    fn hexagon_F2_sffms(_: f32, _: f32, _: f32) -> f32;
+    #[link_name = "llvm.hexagon.F2.sffms.lib"]
+    fn hexagon_F2_sffms_lib(_: f32, _: f32, _: f32) -> f32;
+    #[link_name = "llvm.hexagon.F2.sfimm.n"]
+    fn hexagon_F2_sfimm_n(_: i32) -> f32;
+    #[link_name = "llvm.hexagon.F2.sfimm.p"]
+    fn hexagon_F2_sfimm_p(_: i32) -> f32;
+    #[link_name = "llvm.hexagon.F2.sfmax"]
+    fn hexagon_F2_sfmax(_: f32, _: f32) -> f32;
+    #[link_name = "llvm.hexagon.F2.sfmin"]
+    fn hexagon_F2_sfmin(_: f32, _: f32) -> f32;
+    #[link_name = "llvm.hexagon.F2.sfmpy"]
+    fn hexagon_F2_sfmpy(_: f32, _: f32) -> f32;
+    #[link_name = "llvm.hexagon.F2.sfsub"]
+    fn hexagon_F2_sfsub(_: f32, _: f32) -> f32;
+    #[link_name = "llvm.hexagon.M2.acci"]
+    fn hexagon_M2_acci(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.accii"]
+    fn hexagon_M2_accii(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.cmaci.s0"]
+    fn hexagon_M2_cmaci_s0(_: i64, _: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.M2.cmacr.s0"]
+    fn hexagon_M2_cmacr_s0(_: i64, _: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.M2.cmacs.s0"]
+    fn hexagon_M2_cmacs_s0(_: i64, _: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.M2.cmacs.s1"]
+    fn hexagon_M2_cmacs_s1(_: i64, _: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.M2.cmacsc.s0"]
+    fn hexagon_M2_cmacsc_s0(_: i64, _: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.M2.cmacsc.s1"]
+    fn hexagon_M2_cmacsc_s1(_: i64, _: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.M2.cmpyi.s0"]
+    fn hexagon_M2_cmpyi_s0(_: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.M2.cmpyr.s0"]
+    fn hexagon_M2_cmpyr_s0(_: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.M2.cmpyrs.s0"]
+    fn hexagon_M2_cmpyrs_s0(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.cmpyrs.s1"]
+    fn hexagon_M2_cmpyrs_s1(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.cmpyrsc.s0"]
+    fn hexagon_M2_cmpyrsc_s0(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.cmpyrsc.s1"]
+    fn hexagon_M2_cmpyrsc_s1(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.cmpys.s0"]
+    fn hexagon_M2_cmpys_s0(_: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.M2.cmpys.s1"]
+    fn hexagon_M2_cmpys_s1(_: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.M2.cmpysc.s0"]
+    fn hexagon_M2_cmpysc_s0(_: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.M2.cmpysc.s1"]
+    fn hexagon_M2_cmpysc_s1(_: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.M2.cnacs.s0"]
+    fn hexagon_M2_cnacs_s0(_: i64, _: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.M2.cnacs.s1"]
+    fn hexagon_M2_cnacs_s1(_: i64, _: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.M2.cnacsc.s0"]
+    fn hexagon_M2_cnacsc_s0(_: i64, _: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.M2.cnacsc.s1"]
+    fn hexagon_M2_cnacsc_s1(_: i64, _: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.M2.dpmpyss.acc.s0"]
+    fn hexagon_M2_dpmpyss_acc_s0(_: i64, _: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.M2.dpmpyss.nac.s0"]
+    fn hexagon_M2_dpmpyss_nac_s0(_: i64, _: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.M2.dpmpyss.rnd.s0"]
+    fn hexagon_M2_dpmpyss_rnd_s0(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.dpmpyss.s0"]
+    fn hexagon_M2_dpmpyss_s0(_: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.M2.dpmpyuu.acc.s0"]
+    fn hexagon_M2_dpmpyuu_acc_s0(_: i64, _: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.M2.dpmpyuu.nac.s0"]
+    fn hexagon_M2_dpmpyuu_nac_s0(_: i64, _: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.M2.dpmpyuu.s0"]
+    fn hexagon_M2_dpmpyuu_s0(_: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.M2.hmmpyh.rs1"]
+    fn hexagon_M2_hmmpyh_rs1(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.hmmpyh.s1"]
+    fn hexagon_M2_hmmpyh_s1(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.hmmpyl.rs1"]
+    fn hexagon_M2_hmmpyl_rs1(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.hmmpyl.s1"]
+    fn hexagon_M2_hmmpyl_s1(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.maci"]
+    fn hexagon_M2_maci(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.macsin"]
+    fn hexagon_M2_macsin(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.macsip"]
+    fn hexagon_M2_macsip(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.mmachs.rs0"]
+    fn hexagon_M2_mmachs_rs0(_: i64, _: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.M2.mmachs.rs1"]
+    fn hexagon_M2_mmachs_rs1(_: i64, _: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.M2.mmachs.s0"]
+    fn hexagon_M2_mmachs_s0(_: i64, _: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.M2.mmachs.s1"]
+    fn hexagon_M2_mmachs_s1(_: i64, _: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.M2.mmacls.rs0"]
+    fn hexagon_M2_mmacls_rs0(_: i64, _: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.M2.mmacls.rs1"]
+    fn hexagon_M2_mmacls_rs1(_: i64, _: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.M2.mmacls.s0"]
+    fn hexagon_M2_mmacls_s0(_: i64, _: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.M2.mmacls.s1"]
+    fn hexagon_M2_mmacls_s1(_: i64, _: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.M2.mmacuhs.rs0"]
+    fn hexagon_M2_mmacuhs_rs0(_: i64, _: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.M2.mmacuhs.rs1"]
+    fn hexagon_M2_mmacuhs_rs1(_: i64, _: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.M2.mmacuhs.s0"]
+    fn hexagon_M2_mmacuhs_s0(_: i64, _: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.M2.mmacuhs.s1"]
+    fn hexagon_M2_mmacuhs_s1(_: i64, _: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.M2.mmaculs.rs0"]
+    fn hexagon_M2_mmaculs_rs0(_: i64, _: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.M2.mmaculs.rs1"]
+    fn hexagon_M2_mmaculs_rs1(_: i64, _: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.M2.mmaculs.s0"]
+    fn hexagon_M2_mmaculs_s0(_: i64, _: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.M2.mmaculs.s1"]
+    fn hexagon_M2_mmaculs_s1(_: i64, _: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.M2.mmpyh.rs0"]
+    fn hexagon_M2_mmpyh_rs0(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.M2.mmpyh.rs1"]
+    fn hexagon_M2_mmpyh_rs1(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.M2.mmpyh.s0"]
+    fn hexagon_M2_mmpyh_s0(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.M2.mmpyh.s1"]
+    fn hexagon_M2_mmpyh_s1(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.M2.mmpyl.rs0"]
+    fn hexagon_M2_mmpyl_rs0(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.M2.mmpyl.rs1"]
+    fn hexagon_M2_mmpyl_rs1(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.M2.mmpyl.s0"]
+    fn hexagon_M2_mmpyl_s0(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.M2.mmpyl.s1"]
+    fn hexagon_M2_mmpyl_s1(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.M2.mmpyuh.rs0"]
+    fn hexagon_M2_mmpyuh_rs0(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.M2.mmpyuh.rs1"]
+    fn hexagon_M2_mmpyuh_rs1(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.M2.mmpyuh.s0"]
+    fn hexagon_M2_mmpyuh_s0(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.M2.mmpyuh.s1"]
+    fn hexagon_M2_mmpyuh_s1(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.M2.mmpyul.rs0"]
+    fn hexagon_M2_mmpyul_rs0(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.M2.mmpyul.rs1"]
+    fn hexagon_M2_mmpyul_rs1(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.M2.mmpyul.s0"]
+    fn hexagon_M2_mmpyul_s0(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.M2.mmpyul.s1"]
+    fn hexagon_M2_mmpyul_s1(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.M2.mpy.acc.hh.s0"]
+    fn hexagon_M2_mpy_acc_hh_s0(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.mpy.acc.hh.s1"]
+    fn hexagon_M2_mpy_acc_hh_s1(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.mpy.acc.hl.s0"]
+    fn hexagon_M2_mpy_acc_hl_s0(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.mpy.acc.hl.s1"]
+    fn hexagon_M2_mpy_acc_hl_s1(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.mpy.acc.lh.s0"]
+    fn hexagon_M2_mpy_acc_lh_s0(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.mpy.acc.lh.s1"]
+    fn hexagon_M2_mpy_acc_lh_s1(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.mpy.acc.ll.s0"]
+    fn hexagon_M2_mpy_acc_ll_s0(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.mpy.acc.ll.s1"]
+    fn hexagon_M2_mpy_acc_ll_s1(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.mpy.acc.sat.hh.s0"]
+    fn hexagon_M2_mpy_acc_sat_hh_s0(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.mpy.acc.sat.hh.s1"]
+    fn hexagon_M2_mpy_acc_sat_hh_s1(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.mpy.acc.sat.hl.s0"]
+    fn hexagon_M2_mpy_acc_sat_hl_s0(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.mpy.acc.sat.hl.s1"]
+    fn hexagon_M2_mpy_acc_sat_hl_s1(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.mpy.acc.sat.lh.s0"]
+    fn hexagon_M2_mpy_acc_sat_lh_s0(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.mpy.acc.sat.lh.s1"]
+    fn hexagon_M2_mpy_acc_sat_lh_s1(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.mpy.acc.sat.ll.s0"]
+    fn hexagon_M2_mpy_acc_sat_ll_s0(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.mpy.acc.sat.ll.s1"]
+    fn hexagon_M2_mpy_acc_sat_ll_s1(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.mpy.hh.s0"]
+    fn hexagon_M2_mpy_hh_s0(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.mpy.hh.s1"]
+    fn hexagon_M2_mpy_hh_s1(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.mpy.hl.s0"]
+    fn hexagon_M2_mpy_hl_s0(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.mpy.hl.s1"]
+    fn hexagon_M2_mpy_hl_s1(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.mpy.lh.s0"]
+    fn hexagon_M2_mpy_lh_s0(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.mpy.lh.s1"]
+    fn hexagon_M2_mpy_lh_s1(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.mpy.ll.s0"]
+    fn hexagon_M2_mpy_ll_s0(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.mpy.ll.s1"]
+    fn hexagon_M2_mpy_ll_s1(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.mpy.nac.hh.s0"]
+    fn hexagon_M2_mpy_nac_hh_s0(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.mpy.nac.hh.s1"]
+    fn hexagon_M2_mpy_nac_hh_s1(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.mpy.nac.hl.s0"]
+    fn hexagon_M2_mpy_nac_hl_s0(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.mpy.nac.hl.s1"]
+    fn hexagon_M2_mpy_nac_hl_s1(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.mpy.nac.lh.s0"]
+    fn hexagon_M2_mpy_nac_lh_s0(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.mpy.nac.lh.s1"]
+    fn hexagon_M2_mpy_nac_lh_s1(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.mpy.nac.ll.s0"]
+    fn hexagon_M2_mpy_nac_ll_s0(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.mpy.nac.ll.s1"]
+    fn hexagon_M2_mpy_nac_ll_s1(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.mpy.nac.sat.hh.s0"]
+    fn hexagon_M2_mpy_nac_sat_hh_s0(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.mpy.nac.sat.hh.s1"]
+    fn hexagon_M2_mpy_nac_sat_hh_s1(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.mpy.nac.sat.hl.s0"]
+    fn hexagon_M2_mpy_nac_sat_hl_s0(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.mpy.nac.sat.hl.s1"]
+    fn hexagon_M2_mpy_nac_sat_hl_s1(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.mpy.nac.sat.lh.s0"]
+    fn hexagon_M2_mpy_nac_sat_lh_s0(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.mpy.nac.sat.lh.s1"]
+    fn hexagon_M2_mpy_nac_sat_lh_s1(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.mpy.nac.sat.ll.s0"]
+    fn hexagon_M2_mpy_nac_sat_ll_s0(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.mpy.nac.sat.ll.s1"]
+    fn hexagon_M2_mpy_nac_sat_ll_s1(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.mpy.rnd.hh.s0"]
+    fn hexagon_M2_mpy_rnd_hh_s0(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.mpy.rnd.hh.s1"]
+    fn hexagon_M2_mpy_rnd_hh_s1(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.mpy.rnd.hl.s0"]
+    fn hexagon_M2_mpy_rnd_hl_s0(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.mpy.rnd.hl.s1"]
+    fn hexagon_M2_mpy_rnd_hl_s1(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.mpy.rnd.lh.s0"]
+    fn hexagon_M2_mpy_rnd_lh_s0(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.mpy.rnd.lh.s1"]
+    fn hexagon_M2_mpy_rnd_lh_s1(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.mpy.rnd.ll.s0"]
+    fn hexagon_M2_mpy_rnd_ll_s0(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.mpy.rnd.ll.s1"]
+    fn hexagon_M2_mpy_rnd_ll_s1(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.mpy.sat.hh.s0"]
+    fn hexagon_M2_mpy_sat_hh_s0(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.mpy.sat.hh.s1"]
+    fn hexagon_M2_mpy_sat_hh_s1(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.mpy.sat.hl.s0"]
+    fn hexagon_M2_mpy_sat_hl_s0(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.mpy.sat.hl.s1"]
+    fn hexagon_M2_mpy_sat_hl_s1(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.mpy.sat.lh.s0"]
+    fn hexagon_M2_mpy_sat_lh_s0(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.mpy.sat.lh.s1"]
+    fn hexagon_M2_mpy_sat_lh_s1(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.mpy.sat.ll.s0"]
+    fn hexagon_M2_mpy_sat_ll_s0(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.mpy.sat.ll.s1"]
+    fn hexagon_M2_mpy_sat_ll_s1(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.mpy.sat.rnd.hh.s0"]
+    fn hexagon_M2_mpy_sat_rnd_hh_s0(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.mpy.sat.rnd.hh.s1"]
+    fn hexagon_M2_mpy_sat_rnd_hh_s1(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.mpy.sat.rnd.hl.s0"]
+    fn hexagon_M2_mpy_sat_rnd_hl_s0(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.mpy.sat.rnd.hl.s1"]
+    fn hexagon_M2_mpy_sat_rnd_hl_s1(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.mpy.sat.rnd.lh.s0"]
+    fn hexagon_M2_mpy_sat_rnd_lh_s0(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.mpy.sat.rnd.lh.s1"]
+    fn hexagon_M2_mpy_sat_rnd_lh_s1(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.mpy.sat.rnd.ll.s0"]
+    fn hexagon_M2_mpy_sat_rnd_ll_s0(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.mpy.sat.rnd.ll.s1"]
+    fn hexagon_M2_mpy_sat_rnd_ll_s1(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.mpy.up"]
+    fn hexagon_M2_mpy_up(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.mpy.up.s1"]
+    fn hexagon_M2_mpy_up_s1(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.mpy.up.s1.sat"]
+    fn hexagon_M2_mpy_up_s1_sat(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.mpyd.acc.hh.s0"]
+    fn hexagon_M2_mpyd_acc_hh_s0(_: i64, _: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.M2.mpyd.acc.hh.s1"]
+    fn hexagon_M2_mpyd_acc_hh_s1(_: i64, _: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.M2.mpyd.acc.hl.s0"]
+    fn hexagon_M2_mpyd_acc_hl_s0(_: i64, _: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.M2.mpyd.acc.hl.s1"]
+    fn hexagon_M2_mpyd_acc_hl_s1(_: i64, _: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.M2.mpyd.acc.lh.s0"]
+    fn hexagon_M2_mpyd_acc_lh_s0(_: i64, _: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.M2.mpyd.acc.lh.s1"]
+    fn hexagon_M2_mpyd_acc_lh_s1(_: i64, _: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.M2.mpyd.acc.ll.s0"]
+    fn hexagon_M2_mpyd_acc_ll_s0(_: i64, _: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.M2.mpyd.acc.ll.s1"]
+    fn hexagon_M2_mpyd_acc_ll_s1(_: i64, _: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.M2.mpyd.hh.s0"]
+    fn hexagon_M2_mpyd_hh_s0(_: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.M2.mpyd.hh.s1"]
+    fn hexagon_M2_mpyd_hh_s1(_: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.M2.mpyd.hl.s0"]
+    fn hexagon_M2_mpyd_hl_s0(_: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.M2.mpyd.hl.s1"]
+    fn hexagon_M2_mpyd_hl_s1(_: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.M2.mpyd.lh.s0"]
+    fn hexagon_M2_mpyd_lh_s0(_: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.M2.mpyd.lh.s1"]
+    fn hexagon_M2_mpyd_lh_s1(_: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.M2.mpyd.ll.s0"]
+    fn hexagon_M2_mpyd_ll_s0(_: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.M2.mpyd.ll.s1"]
+    fn hexagon_M2_mpyd_ll_s1(_: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.M2.mpyd.nac.hh.s0"]
+    fn hexagon_M2_mpyd_nac_hh_s0(_: i64, _: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.M2.mpyd.nac.hh.s1"]
+    fn hexagon_M2_mpyd_nac_hh_s1(_: i64, _: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.M2.mpyd.nac.hl.s0"]
+    fn hexagon_M2_mpyd_nac_hl_s0(_: i64, _: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.M2.mpyd.nac.hl.s1"]
+    fn hexagon_M2_mpyd_nac_hl_s1(_: i64, _: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.M2.mpyd.nac.lh.s0"]
+    fn hexagon_M2_mpyd_nac_lh_s0(_: i64, _: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.M2.mpyd.nac.lh.s1"]
+    fn hexagon_M2_mpyd_nac_lh_s1(_: i64, _: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.M2.mpyd.nac.ll.s0"]
+    fn hexagon_M2_mpyd_nac_ll_s0(_: i64, _: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.M2.mpyd.nac.ll.s1"]
+    fn hexagon_M2_mpyd_nac_ll_s1(_: i64, _: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.M2.mpyd.rnd.hh.s0"]
+    fn hexagon_M2_mpyd_rnd_hh_s0(_: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.M2.mpyd.rnd.hh.s1"]
+    fn hexagon_M2_mpyd_rnd_hh_s1(_: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.M2.mpyd.rnd.hl.s0"]
+    fn hexagon_M2_mpyd_rnd_hl_s0(_: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.M2.mpyd.rnd.hl.s1"]
+    fn hexagon_M2_mpyd_rnd_hl_s1(_: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.M2.mpyd.rnd.lh.s0"]
+    fn hexagon_M2_mpyd_rnd_lh_s0(_: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.M2.mpyd.rnd.lh.s1"]
+    fn hexagon_M2_mpyd_rnd_lh_s1(_: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.M2.mpyd.rnd.ll.s0"]
+    fn hexagon_M2_mpyd_rnd_ll_s0(_: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.M2.mpyd.rnd.ll.s1"]
+    fn hexagon_M2_mpyd_rnd_ll_s1(_: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.M2.mpyi"]
+    fn hexagon_M2_mpyi(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.mpysmi"]
+    fn hexagon_M2_mpysmi(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.mpysu.up"]
+    fn hexagon_M2_mpysu_up(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.mpyu.acc.hh.s0"]
+    fn hexagon_M2_mpyu_acc_hh_s0(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.mpyu.acc.hh.s1"]
+    fn hexagon_M2_mpyu_acc_hh_s1(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.mpyu.acc.hl.s0"]
+    fn hexagon_M2_mpyu_acc_hl_s0(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.mpyu.acc.hl.s1"]
+    fn hexagon_M2_mpyu_acc_hl_s1(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.mpyu.acc.lh.s0"]
+    fn hexagon_M2_mpyu_acc_lh_s0(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.mpyu.acc.lh.s1"]
+    fn hexagon_M2_mpyu_acc_lh_s1(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.mpyu.acc.ll.s0"]
+    fn hexagon_M2_mpyu_acc_ll_s0(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.mpyu.acc.ll.s1"]
+    fn hexagon_M2_mpyu_acc_ll_s1(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.mpyu.hh.s0"]
+    fn hexagon_M2_mpyu_hh_s0(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.mpyu.hh.s1"]
+    fn hexagon_M2_mpyu_hh_s1(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.mpyu.hl.s0"]
+    fn hexagon_M2_mpyu_hl_s0(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.mpyu.hl.s1"]
+    fn hexagon_M2_mpyu_hl_s1(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.mpyu.lh.s0"]
+    fn hexagon_M2_mpyu_lh_s0(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.mpyu.lh.s1"]
+    fn hexagon_M2_mpyu_lh_s1(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.mpyu.ll.s0"]
+    fn hexagon_M2_mpyu_ll_s0(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.mpyu.ll.s1"]
+    fn hexagon_M2_mpyu_ll_s1(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.mpyu.nac.hh.s0"]
+    fn hexagon_M2_mpyu_nac_hh_s0(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.mpyu.nac.hh.s1"]
+    fn hexagon_M2_mpyu_nac_hh_s1(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.mpyu.nac.hl.s0"]
+    fn hexagon_M2_mpyu_nac_hl_s0(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.mpyu.nac.hl.s1"]
+    fn hexagon_M2_mpyu_nac_hl_s1(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.mpyu.nac.lh.s0"]
+    fn hexagon_M2_mpyu_nac_lh_s0(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.mpyu.nac.lh.s1"]
+    fn hexagon_M2_mpyu_nac_lh_s1(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.mpyu.nac.ll.s0"]
+    fn hexagon_M2_mpyu_nac_ll_s0(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.mpyu.nac.ll.s1"]
+    fn hexagon_M2_mpyu_nac_ll_s1(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.mpyu.up"]
+    fn hexagon_M2_mpyu_up(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.mpyud.acc.hh.s0"]
+    fn hexagon_M2_mpyud_acc_hh_s0(_: i64, _: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.M2.mpyud.acc.hh.s1"]
+    fn hexagon_M2_mpyud_acc_hh_s1(_: i64, _: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.M2.mpyud.acc.hl.s0"]
+    fn hexagon_M2_mpyud_acc_hl_s0(_: i64, _: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.M2.mpyud.acc.hl.s1"]
+    fn hexagon_M2_mpyud_acc_hl_s1(_: i64, _: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.M2.mpyud.acc.lh.s0"]
+    fn hexagon_M2_mpyud_acc_lh_s0(_: i64, _: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.M2.mpyud.acc.lh.s1"]
+    fn hexagon_M2_mpyud_acc_lh_s1(_: i64, _: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.M2.mpyud.acc.ll.s0"]
+    fn hexagon_M2_mpyud_acc_ll_s0(_: i64, _: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.M2.mpyud.acc.ll.s1"]
+    fn hexagon_M2_mpyud_acc_ll_s1(_: i64, _: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.M2.mpyud.hh.s0"]
+    fn hexagon_M2_mpyud_hh_s0(_: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.M2.mpyud.hh.s1"]
+    fn hexagon_M2_mpyud_hh_s1(_: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.M2.mpyud.hl.s0"]
+    fn hexagon_M2_mpyud_hl_s0(_: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.M2.mpyud.hl.s1"]
+    fn hexagon_M2_mpyud_hl_s1(_: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.M2.mpyud.lh.s0"]
+    fn hexagon_M2_mpyud_lh_s0(_: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.M2.mpyud.lh.s1"]
+    fn hexagon_M2_mpyud_lh_s1(_: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.M2.mpyud.ll.s0"]
+    fn hexagon_M2_mpyud_ll_s0(_: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.M2.mpyud.ll.s1"]
+    fn hexagon_M2_mpyud_ll_s1(_: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.M2.mpyud.nac.hh.s0"]
+    fn hexagon_M2_mpyud_nac_hh_s0(_: i64, _: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.M2.mpyud.nac.hh.s1"]
+    fn hexagon_M2_mpyud_nac_hh_s1(_: i64, _: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.M2.mpyud.nac.hl.s0"]
+    fn hexagon_M2_mpyud_nac_hl_s0(_: i64, _: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.M2.mpyud.nac.hl.s1"]
+    fn hexagon_M2_mpyud_nac_hl_s1(_: i64, _: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.M2.mpyud.nac.lh.s0"]
+    fn hexagon_M2_mpyud_nac_lh_s0(_: i64, _: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.M2.mpyud.nac.lh.s1"]
+    fn hexagon_M2_mpyud_nac_lh_s1(_: i64, _: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.M2.mpyud.nac.ll.s0"]
+    fn hexagon_M2_mpyud_nac_ll_s0(_: i64, _: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.M2.mpyud.nac.ll.s1"]
+    fn hexagon_M2_mpyud_nac_ll_s1(_: i64, _: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.M2.mpyui"]
+    fn hexagon_M2_mpyui(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.nacci"]
+    fn hexagon_M2_nacci(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.naccii"]
+    fn hexagon_M2_naccii(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.subacc"]
+    fn hexagon_M2_subacc(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.vabsdiffh"]
+    fn hexagon_M2_vabsdiffh(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.M2.vabsdiffw"]
+    fn hexagon_M2_vabsdiffw(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.M2.vcmac.s0.sat.i"]
+    fn hexagon_M2_vcmac_s0_sat_i(_: i64, _: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.M2.vcmac.s0.sat.r"]
+    fn hexagon_M2_vcmac_s0_sat_r(_: i64, _: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.M2.vcmpy.s0.sat.i"]
+    fn hexagon_M2_vcmpy_s0_sat_i(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.M2.vcmpy.s0.sat.r"]
+    fn hexagon_M2_vcmpy_s0_sat_r(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.M2.vcmpy.s1.sat.i"]
+    fn hexagon_M2_vcmpy_s1_sat_i(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.M2.vcmpy.s1.sat.r"]
+    fn hexagon_M2_vcmpy_s1_sat_r(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.M2.vdmacs.s0"]
+    fn hexagon_M2_vdmacs_s0(_: i64, _: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.M2.vdmacs.s1"]
+    fn hexagon_M2_vdmacs_s1(_: i64, _: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.M2.vdmpyrs.s0"]
+    fn hexagon_M2_vdmpyrs_s0(_: i64, _: i64) -> i32;
+    #[link_name = "llvm.hexagon.M2.vdmpyrs.s1"]
+    fn hexagon_M2_vdmpyrs_s1(_: i64, _: i64) -> i32;
+    #[link_name = "llvm.hexagon.M2.vdmpys.s0"]
+    fn hexagon_M2_vdmpys_s0(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.M2.vdmpys.s1"]
+    fn hexagon_M2_vdmpys_s1(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.M2.vmac2"]
+    fn hexagon_M2_vmac2(_: i64, _: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.M2.vmac2es"]
+    fn hexagon_M2_vmac2es(_: i64, _: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.M2.vmac2es.s0"]
+    fn hexagon_M2_vmac2es_s0(_: i64, _: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.M2.vmac2es.s1"]
+    fn hexagon_M2_vmac2es_s1(_: i64, _: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.M2.vmac2s.s0"]
+    fn hexagon_M2_vmac2s_s0(_: i64, _: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.M2.vmac2s.s1"]
+    fn hexagon_M2_vmac2s_s1(_: i64, _: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.M2.vmac2su.s0"]
+    fn hexagon_M2_vmac2su_s0(_: i64, _: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.M2.vmac2su.s1"]
+    fn hexagon_M2_vmac2su_s1(_: i64, _: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.M2.vmpy2es.s0"]
+    fn hexagon_M2_vmpy2es_s0(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.M2.vmpy2es.s1"]
+    fn hexagon_M2_vmpy2es_s1(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.M2.vmpy2s.s0"]
+    fn hexagon_M2_vmpy2s_s0(_: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.M2.vmpy2s.s0pack"]
+    fn hexagon_M2_vmpy2s_s0pack(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.vmpy2s.s1"]
+    fn hexagon_M2_vmpy2s_s1(_: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.M2.vmpy2s.s1pack"]
+    fn hexagon_M2_vmpy2s_s1pack(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.vmpy2su.s0"]
+    fn hexagon_M2_vmpy2su_s0(_: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.M2.vmpy2su.s1"]
+    fn hexagon_M2_vmpy2su_s1(_: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.M2.vraddh"]
+    fn hexagon_M2_vraddh(_: i64, _: i64) -> i32;
+    #[link_name = "llvm.hexagon.M2.vradduh"]
+    fn hexagon_M2_vradduh(_: i64, _: i64) -> i32;
+    #[link_name = "llvm.hexagon.M2.vrcmaci.s0"]
+    fn hexagon_M2_vrcmaci_s0(_: i64, _: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.M2.vrcmaci.s0c"]
+    fn hexagon_M2_vrcmaci_s0c(_: i64, _: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.M2.vrcmacr.s0"]
+    fn hexagon_M2_vrcmacr_s0(_: i64, _: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.M2.vrcmacr.s0c"]
+    fn hexagon_M2_vrcmacr_s0c(_: i64, _: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.M2.vrcmpyi.s0"]
+    fn hexagon_M2_vrcmpyi_s0(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.M2.vrcmpyi.s0c"]
+    fn hexagon_M2_vrcmpyi_s0c(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.M2.vrcmpyr.s0"]
+    fn hexagon_M2_vrcmpyr_s0(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.M2.vrcmpyr.s0c"]
+    fn hexagon_M2_vrcmpyr_s0c(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.M2.vrcmpys.acc.s1"]
+    fn hexagon_M2_vrcmpys_acc_s1(_: i64, _: i64, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.M2.vrcmpys.s1"]
+    fn hexagon_M2_vrcmpys_s1(_: i64, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.M2.vrcmpys.s1rp"]
+    fn hexagon_M2_vrcmpys_s1rp(_: i64, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M2.vrmac.s0"]
+    fn hexagon_M2_vrmac_s0(_: i64, _: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.M2.vrmpy.s0"]
+    fn hexagon_M2_vrmpy_s0(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.M2.xor.xacc"]
+    fn hexagon_M2_xor_xacc(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M4.and.and"]
+    fn hexagon_M4_and_and(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M4.and.andn"]
+    fn hexagon_M4_and_andn(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M4.and.or"]
+    fn hexagon_M4_and_or(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M4.and.xor"]
+    fn hexagon_M4_and_xor(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M4.cmpyi.wh"]
+    fn hexagon_M4_cmpyi_wh(_: i64, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M4.cmpyi.whc"]
+    fn hexagon_M4_cmpyi_whc(_: i64, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M4.cmpyr.wh"]
+    fn hexagon_M4_cmpyr_wh(_: i64, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M4.cmpyr.whc"]
+    fn hexagon_M4_cmpyr_whc(_: i64, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M4.mac.up.s1.sat"]
+    fn hexagon_M4_mac_up_s1_sat(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M4.mpyri.addi"]
+    fn hexagon_M4_mpyri_addi(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M4.mpyri.addr"]
+    fn hexagon_M4_mpyri_addr(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M4.mpyri.addr.u2"]
+    fn hexagon_M4_mpyri_addr_u2(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M4.mpyrr.addi"]
+    fn hexagon_M4_mpyrr_addi(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M4.mpyrr.addr"]
+    fn hexagon_M4_mpyrr_addr(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M4.nac.up.s1.sat"]
+    fn hexagon_M4_nac_up_s1_sat(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M4.or.and"]
+    fn hexagon_M4_or_and(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M4.or.andn"]
+    fn hexagon_M4_or_andn(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M4.or.or"]
+    fn hexagon_M4_or_or(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M4.or.xor"]
+    fn hexagon_M4_or_xor(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M4.pmpyw"]
+    fn hexagon_M4_pmpyw(_: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.M4.pmpyw.acc"]
+    fn hexagon_M4_pmpyw_acc(_: i64, _: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.M4.vpmpyh"]
+    fn hexagon_M4_vpmpyh(_: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.M4.vpmpyh.acc"]
+    fn hexagon_M4_vpmpyh_acc(_: i64, _: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.M4.vrmpyeh.acc.s0"]
+    fn hexagon_M4_vrmpyeh_acc_s0(_: i64, _: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.M4.vrmpyeh.acc.s1"]
+    fn hexagon_M4_vrmpyeh_acc_s1(_: i64, _: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.M4.vrmpyeh.s0"]
+    fn hexagon_M4_vrmpyeh_s0(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.M4.vrmpyeh.s1"]
+    fn hexagon_M4_vrmpyeh_s1(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.M4.vrmpyoh.acc.s0"]
+    fn hexagon_M4_vrmpyoh_acc_s0(_: i64, _: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.M4.vrmpyoh.acc.s1"]
+    fn hexagon_M4_vrmpyoh_acc_s1(_: i64, _: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.M4.vrmpyoh.s0"]
+    fn hexagon_M4_vrmpyoh_s0(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.M4.vrmpyoh.s1"]
+    fn hexagon_M4_vrmpyoh_s1(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.M4.xor.and"]
+    fn hexagon_M4_xor_and(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M4.xor.andn"]
+    fn hexagon_M4_xor_andn(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M4.xor.or"]
+    fn hexagon_M4_xor_or(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M4.xor.xacc"]
+    fn hexagon_M4_xor_xacc(_: i64, _: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.M5.vdmacbsu"]
+    fn hexagon_M5_vdmacbsu(_: i64, _: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.M5.vdmpybsu"]
+    fn hexagon_M5_vdmpybsu(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.M5.vmacbsu"]
+    fn hexagon_M5_vmacbsu(_: i64, _: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.M5.vmacbuu"]
+    fn hexagon_M5_vmacbuu(_: i64, _: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.M5.vmpybsu"]
+    fn hexagon_M5_vmpybsu(_: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.M5.vmpybuu"]
+    fn hexagon_M5_vmpybuu(_: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.M5.vrmacbsu"]
+    fn hexagon_M5_vrmacbsu(_: i64, _: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.M5.vrmacbuu"]
+    fn hexagon_M5_vrmacbuu(_: i64, _: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.M5.vrmpybsu"]
+    fn hexagon_M5_vrmpybsu(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.M5.vrmpybuu"]
+    fn hexagon_M5_vrmpybuu(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.S2.addasl.rrri"]
+    fn hexagon_S2_addasl_rrri(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.S2.asl.i.p"]
+    fn hexagon_S2_asl_i_p(_: i64, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.S2.asl.i.p.acc"]
+    fn hexagon_S2_asl_i_p_acc(_: i64, _: i64, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.S2.asl.i.p.and"]
+    fn hexagon_S2_asl_i_p_and(_: i64, _: i64, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.S2.asl.i.p.nac"]
+    fn hexagon_S2_asl_i_p_nac(_: i64, _: i64, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.S2.asl.i.p.or"]
+    fn hexagon_S2_asl_i_p_or(_: i64, _: i64, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.S2.asl.i.p.xacc"]
+    fn hexagon_S2_asl_i_p_xacc(_: i64, _: i64, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.S2.asl.i.r"]
+    fn hexagon_S2_asl_i_r(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.S2.asl.i.r.acc"]
+    fn hexagon_S2_asl_i_r_acc(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.S2.asl.i.r.and"]
+    fn hexagon_S2_asl_i_r_and(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.S2.asl.i.r.nac"]
+    fn hexagon_S2_asl_i_r_nac(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.S2.asl.i.r.or"]
+    fn hexagon_S2_asl_i_r_or(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.S2.asl.i.r.sat"]
+    fn hexagon_S2_asl_i_r_sat(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.S2.asl.i.r.xacc"]
+    fn hexagon_S2_asl_i_r_xacc(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.S2.asl.i.vh"]
+    fn hexagon_S2_asl_i_vh(_: i64, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.S2.asl.i.vw"]
+    fn hexagon_S2_asl_i_vw(_: i64, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.S2.asl.r.p"]
+    fn hexagon_S2_asl_r_p(_: i64, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.S2.asl.r.p.acc"]
+    fn hexagon_S2_asl_r_p_acc(_: i64, _: i64, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.S2.asl.r.p.and"]
+    fn hexagon_S2_asl_r_p_and(_: i64, _: i64, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.S2.asl.r.p.nac"]
+    fn hexagon_S2_asl_r_p_nac(_: i64, _: i64, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.S2.asl.r.p.or"]
+    fn hexagon_S2_asl_r_p_or(_: i64, _: i64, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.S2.asl.r.p.xor"]
+    fn hexagon_S2_asl_r_p_xor(_: i64, _: i64, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.S2.asl.r.r"]
+    fn hexagon_S2_asl_r_r(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.S2.asl.r.r.acc"]
+    fn hexagon_S2_asl_r_r_acc(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.S2.asl.r.r.and"]
+    fn hexagon_S2_asl_r_r_and(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.S2.asl.r.r.nac"]
+    fn hexagon_S2_asl_r_r_nac(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.S2.asl.r.r.or"]
+    fn hexagon_S2_asl_r_r_or(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.S2.asl.r.r.sat"]
+    fn hexagon_S2_asl_r_r_sat(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.S2.asl.r.vh"]
+    fn hexagon_S2_asl_r_vh(_: i64, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.S2.asl.r.vw"]
+    fn hexagon_S2_asl_r_vw(_: i64, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.S2.asr.i.p"]
+    fn hexagon_S2_asr_i_p(_: i64, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.S2.asr.i.p.acc"]
+    fn hexagon_S2_asr_i_p_acc(_: i64, _: i64, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.S2.asr.i.p.and"]
+    fn hexagon_S2_asr_i_p_and(_: i64, _: i64, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.S2.asr.i.p.nac"]
+    fn hexagon_S2_asr_i_p_nac(_: i64, _: i64, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.S2.asr.i.p.or"]
+    fn hexagon_S2_asr_i_p_or(_: i64, _: i64, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.S2.asr.i.p.rnd"]
+    fn hexagon_S2_asr_i_p_rnd(_: i64, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.S2.asr.i.p.rnd.goodsyntax"]
+    fn hexagon_S2_asr_i_p_rnd_goodsyntax(_: i64, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.S2.asr.i.r"]
+    fn hexagon_S2_asr_i_r(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.S2.asr.i.r.acc"]
+    fn hexagon_S2_asr_i_r_acc(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.S2.asr.i.r.and"]
+    fn hexagon_S2_asr_i_r_and(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.S2.asr.i.r.nac"]
+    fn hexagon_S2_asr_i_r_nac(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.S2.asr.i.r.or"]
+    fn hexagon_S2_asr_i_r_or(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.S2.asr.i.r.rnd"]
+    fn hexagon_S2_asr_i_r_rnd(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.S2.asr.i.r.rnd.goodsyntax"]
+    fn hexagon_S2_asr_i_r_rnd_goodsyntax(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.S2.asr.i.svw.trun"]
+    fn hexagon_S2_asr_i_svw_trun(_: i64, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.S2.asr.i.vh"]
+    fn hexagon_S2_asr_i_vh(_: i64, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.S2.asr.i.vw"]
+    fn hexagon_S2_asr_i_vw(_: i64, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.S2.asr.r.p"]
+    fn hexagon_S2_asr_r_p(_: i64, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.S2.asr.r.p.acc"]
+    fn hexagon_S2_asr_r_p_acc(_: i64, _: i64, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.S2.asr.r.p.and"]
+    fn hexagon_S2_asr_r_p_and(_: i64, _: i64, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.S2.asr.r.p.nac"]
+    fn hexagon_S2_asr_r_p_nac(_: i64, _: i64, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.S2.asr.r.p.or"]
+    fn hexagon_S2_asr_r_p_or(_: i64, _: i64, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.S2.asr.r.p.xor"]
+    fn hexagon_S2_asr_r_p_xor(_: i64, _: i64, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.S2.asr.r.r"]
+    fn hexagon_S2_asr_r_r(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.S2.asr.r.r.acc"]
+    fn hexagon_S2_asr_r_r_acc(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.S2.asr.r.r.and"]
+    fn hexagon_S2_asr_r_r_and(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.S2.asr.r.r.nac"]
+    fn hexagon_S2_asr_r_r_nac(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.S2.asr.r.r.or"]
+    fn hexagon_S2_asr_r_r_or(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.S2.asr.r.r.sat"]
+    fn hexagon_S2_asr_r_r_sat(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.S2.asr.r.svw.trun"]
+    fn hexagon_S2_asr_r_svw_trun(_: i64, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.S2.asr.r.vh"]
+    fn hexagon_S2_asr_r_vh(_: i64, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.S2.asr.r.vw"]
+    fn hexagon_S2_asr_r_vw(_: i64, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.S2.brev"]
+    fn hexagon_S2_brev(_: i32) -> i32;
+    #[link_name = "llvm.hexagon.S2.brevp"]
+    fn hexagon_S2_brevp(_: i64) -> i64;
+    #[link_name = "llvm.hexagon.S2.cl0"]
+    fn hexagon_S2_cl0(_: i32) -> i32;
+    #[link_name = "llvm.hexagon.S2.cl0p"]
+    fn hexagon_S2_cl0p(_: i64) -> i32;
+    #[link_name = "llvm.hexagon.S2.cl1"]
+    fn hexagon_S2_cl1(_: i32) -> i32;
+    #[link_name = "llvm.hexagon.S2.cl1p"]
+    fn hexagon_S2_cl1p(_: i64) -> i32;
+    #[link_name = "llvm.hexagon.S2.clb"]
+    fn hexagon_S2_clb(_: i32) -> i32;
+    #[link_name = "llvm.hexagon.S2.clbnorm"]
+    fn hexagon_S2_clbnorm(_: i32) -> i32;
+    #[link_name = "llvm.hexagon.S2.clbp"]
+    fn hexagon_S2_clbp(_: i64) -> i32;
+    #[link_name = "llvm.hexagon.S2.clrbit.i"]
+    fn hexagon_S2_clrbit_i(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.S2.clrbit.r"]
+    fn hexagon_S2_clrbit_r(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.S2.ct0"]
+    fn hexagon_S2_ct0(_: i32) -> i32;
+    #[link_name = "llvm.hexagon.S2.ct0p"]
+    fn hexagon_S2_ct0p(_: i64) -> i32;
+    #[link_name = "llvm.hexagon.S2.ct1"]
+    fn hexagon_S2_ct1(_: i32) -> i32;
+    #[link_name = "llvm.hexagon.S2.ct1p"]
+    fn hexagon_S2_ct1p(_: i64) -> i32;
+    #[link_name = "llvm.hexagon.S2.deinterleave"]
+    fn hexagon_S2_deinterleave(_: i64) -> i64;
+    #[link_name = "llvm.hexagon.S2.extractu"]
+    fn hexagon_S2_extractu(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.S2.extractu.rp"]
+    fn hexagon_S2_extractu_rp(_: i32, _: i64) -> i32;
+    #[link_name = "llvm.hexagon.S2.extractup"]
+    fn hexagon_S2_extractup(_: i64, _: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.S2.extractup.rp"]
+    fn hexagon_S2_extractup_rp(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.S2.insert"]
+    fn hexagon_S2_insert(_: i32, _: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.S2.insert.rp"]
+    fn hexagon_S2_insert_rp(_: i32, _: i32, _: i64) -> i32;
+    #[link_name = "llvm.hexagon.S2.insertp"]
+    fn hexagon_S2_insertp(_: i64, _: i64, _: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.S2.insertp.rp"]
+    fn hexagon_S2_insertp_rp(_: i64, _: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.S2.interleave"]
+    fn hexagon_S2_interleave(_: i64) -> i64;
+    #[link_name = "llvm.hexagon.S2.lfsp"]
+    fn hexagon_S2_lfsp(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.S2.lsl.r.p"]
+    fn hexagon_S2_lsl_r_p(_: i64, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.S2.lsl.r.p.acc"]
+    fn hexagon_S2_lsl_r_p_acc(_: i64, _: i64, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.S2.lsl.r.p.and"]
+    fn hexagon_S2_lsl_r_p_and(_: i64, _: i64, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.S2.lsl.r.p.nac"]
+    fn hexagon_S2_lsl_r_p_nac(_: i64, _: i64, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.S2.lsl.r.p.or"]
+    fn hexagon_S2_lsl_r_p_or(_: i64, _: i64, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.S2.lsl.r.p.xor"]
+    fn hexagon_S2_lsl_r_p_xor(_: i64, _: i64, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.S2.lsl.r.r"]
+    fn hexagon_S2_lsl_r_r(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.S2.lsl.r.r.acc"]
+    fn hexagon_S2_lsl_r_r_acc(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.S2.lsl.r.r.and"]
+    fn hexagon_S2_lsl_r_r_and(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.S2.lsl.r.r.nac"]
+    fn hexagon_S2_lsl_r_r_nac(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.S2.lsl.r.r.or"]
+    fn hexagon_S2_lsl_r_r_or(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.S2.lsl.r.vh"]
+    fn hexagon_S2_lsl_r_vh(_: i64, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.S2.lsl.r.vw"]
+    fn hexagon_S2_lsl_r_vw(_: i64, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.S2.lsr.i.p"]
+    fn hexagon_S2_lsr_i_p(_: i64, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.S2.lsr.i.p.acc"]
+    fn hexagon_S2_lsr_i_p_acc(_: i64, _: i64, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.S2.lsr.i.p.and"]
+    fn hexagon_S2_lsr_i_p_and(_: i64, _: i64, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.S2.lsr.i.p.nac"]
+    fn hexagon_S2_lsr_i_p_nac(_: i64, _: i64, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.S2.lsr.i.p.or"]
+    fn hexagon_S2_lsr_i_p_or(_: i64, _: i64, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.S2.lsr.i.p.xacc"]
+    fn hexagon_S2_lsr_i_p_xacc(_: i64, _: i64, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.S2.lsr.i.r"]
+    fn hexagon_S2_lsr_i_r(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.S2.lsr.i.r.acc"]
+    fn hexagon_S2_lsr_i_r_acc(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.S2.lsr.i.r.and"]
+    fn hexagon_S2_lsr_i_r_and(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.S2.lsr.i.r.nac"]
+    fn hexagon_S2_lsr_i_r_nac(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.S2.lsr.i.r.or"]
+    fn hexagon_S2_lsr_i_r_or(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.S2.lsr.i.r.xacc"]
+    fn hexagon_S2_lsr_i_r_xacc(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.S2.lsr.i.vh"]
+    fn hexagon_S2_lsr_i_vh(_: i64, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.S2.lsr.i.vw"]
+    fn hexagon_S2_lsr_i_vw(_: i64, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.S2.lsr.r.p"]
+    fn hexagon_S2_lsr_r_p(_: i64, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.S2.lsr.r.p.acc"]
+    fn hexagon_S2_lsr_r_p_acc(_: i64, _: i64, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.S2.lsr.r.p.and"]
+    fn hexagon_S2_lsr_r_p_and(_: i64, _: i64, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.S2.lsr.r.p.nac"]
+    fn hexagon_S2_lsr_r_p_nac(_: i64, _: i64, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.S2.lsr.r.p.or"]
+    fn hexagon_S2_lsr_r_p_or(_: i64, _: i64, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.S2.lsr.r.p.xor"]
+    fn hexagon_S2_lsr_r_p_xor(_: i64, _: i64, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.S2.lsr.r.r"]
+    fn hexagon_S2_lsr_r_r(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.S2.lsr.r.r.acc"]
+    fn hexagon_S2_lsr_r_r_acc(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.S2.lsr.r.r.and"]
+    fn hexagon_S2_lsr_r_r_and(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.S2.lsr.r.r.nac"]
+    fn hexagon_S2_lsr_r_r_nac(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.S2.lsr.r.r.or"]
+    fn hexagon_S2_lsr_r_r_or(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.S2.lsr.r.vh"]
+    fn hexagon_S2_lsr_r_vh(_: i64, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.S2.lsr.r.vw"]
+    fn hexagon_S2_lsr_r_vw(_: i64, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.S2.packhl"]
+    fn hexagon_S2_packhl(_: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.S2.parityp"]
+    fn hexagon_S2_parityp(_: i64, _: i64) -> i32;
+    #[link_name = "llvm.hexagon.S2.setbit.i"]
+    fn hexagon_S2_setbit_i(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.S2.setbit.r"]
+    fn hexagon_S2_setbit_r(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.S2.shuffeb"]
+    fn hexagon_S2_shuffeb(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.S2.shuffeh"]
+    fn hexagon_S2_shuffeh(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.S2.shuffob"]
+    fn hexagon_S2_shuffob(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.S2.shuffoh"]
+    fn hexagon_S2_shuffoh(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.S2.svsathb"]
+    fn hexagon_S2_svsathb(_: i32) -> i32;
+    #[link_name = "llvm.hexagon.S2.svsathub"]
+    fn hexagon_S2_svsathub(_: i32) -> i32;
+    #[link_name = "llvm.hexagon.S2.tableidxb.goodsyntax"]
+    fn hexagon_S2_tableidxb_goodsyntax(_: i32, _: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.S2.tableidxd.goodsyntax"]
+    fn hexagon_S2_tableidxd_goodsyntax(_: i32, _: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.S2.tableidxh.goodsyntax"]
+    fn hexagon_S2_tableidxh_goodsyntax(_: i32, _: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.S2.tableidxw.goodsyntax"]
+    fn hexagon_S2_tableidxw_goodsyntax(_: i32, _: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.S2.togglebit.i"]
+    fn hexagon_S2_togglebit_i(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.S2.togglebit.r"]
+    fn hexagon_S2_togglebit_r(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.S2.tstbit.i"]
+    fn hexagon_S2_tstbit_i(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.S2.tstbit.r"]
+    fn hexagon_S2_tstbit_r(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.S2.valignib"]
+    fn hexagon_S2_valignib(_: i64, _: i64, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.S2.valignrb"]
+    fn hexagon_S2_valignrb(_: i64, _: i64, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.S2.vcnegh"]
+    fn hexagon_S2_vcnegh(_: i64, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.S2.vcrotate"]
+    fn hexagon_S2_vcrotate(_: i64, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.S2.vrcnegh"]
+    fn hexagon_S2_vrcnegh(_: i64, _: i64, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.S2.vrndpackwh"]
+    fn hexagon_S2_vrndpackwh(_: i64) -> i32;
+    #[link_name = "llvm.hexagon.S2.vrndpackwhs"]
+    fn hexagon_S2_vrndpackwhs(_: i64) -> i32;
+    #[link_name = "llvm.hexagon.S2.vsathb"]
+    fn hexagon_S2_vsathb(_: i64) -> i32;
+    #[link_name = "llvm.hexagon.S2.vsathb.nopack"]
+    fn hexagon_S2_vsathb_nopack(_: i64) -> i64;
+    #[link_name = "llvm.hexagon.S2.vsathub"]
+    fn hexagon_S2_vsathub(_: i64) -> i32;
+    #[link_name = "llvm.hexagon.S2.vsathub.nopack"]
+    fn hexagon_S2_vsathub_nopack(_: i64) -> i64;
+    #[link_name = "llvm.hexagon.S2.vsatwh"]
+    fn hexagon_S2_vsatwh(_: i64) -> i32;
+    #[link_name = "llvm.hexagon.S2.vsatwh.nopack"]
+    fn hexagon_S2_vsatwh_nopack(_: i64) -> i64;
+    #[link_name = "llvm.hexagon.S2.vsatwuh"]
+    fn hexagon_S2_vsatwuh(_: i64) -> i32;
+    #[link_name = "llvm.hexagon.S2.vsatwuh.nopack"]
+    fn hexagon_S2_vsatwuh_nopack(_: i64) -> i64;
+    #[link_name = "llvm.hexagon.S2.vsplatrb"]
+    fn hexagon_S2_vsplatrb(_: i32) -> i32;
+    #[link_name = "llvm.hexagon.S2.vsplatrh"]
+    fn hexagon_S2_vsplatrh(_: i32) -> i64;
+    #[link_name = "llvm.hexagon.S2.vspliceib"]
+    fn hexagon_S2_vspliceib(_: i64, _: i64, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.S2.vsplicerb"]
+    fn hexagon_S2_vsplicerb(_: i64, _: i64, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.S2.vsxtbh"]
+    fn hexagon_S2_vsxtbh(_: i32) -> i64;
+    #[link_name = "llvm.hexagon.S2.vsxthw"]
+    fn hexagon_S2_vsxthw(_: i32) -> i64;
+    #[link_name = "llvm.hexagon.S2.vtrunehb"]
+    fn hexagon_S2_vtrunehb(_: i64) -> i32;
+    #[link_name = "llvm.hexagon.S2.vtrunewh"]
+    fn hexagon_S2_vtrunewh(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.S2.vtrunohb"]
+    fn hexagon_S2_vtrunohb(_: i64) -> i32;
+    #[link_name = "llvm.hexagon.S2.vtrunowh"]
+    fn hexagon_S2_vtrunowh(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.S2.vzxtbh"]
+    fn hexagon_S2_vzxtbh(_: i32) -> i64;
+    #[link_name = "llvm.hexagon.S2.vzxthw"]
+    fn hexagon_S2_vzxthw(_: i32) -> i64;
+    #[link_name = "llvm.hexagon.S4.addaddi"]
+    fn hexagon_S4_addaddi(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.S4.addi.asl.ri"]
+    fn hexagon_S4_addi_asl_ri(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.S4.addi.lsr.ri"]
+    fn hexagon_S4_addi_lsr_ri(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.S4.andi.asl.ri"]
+    fn hexagon_S4_andi_asl_ri(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.S4.andi.lsr.ri"]
+    fn hexagon_S4_andi_lsr_ri(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.S4.clbaddi"]
+    fn hexagon_S4_clbaddi(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.S4.clbpaddi"]
+    fn hexagon_S4_clbpaddi(_: i64, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.S4.clbpnorm"]
+    fn hexagon_S4_clbpnorm(_: i64) -> i32;
+    #[link_name = "llvm.hexagon.S4.extract"]
+    fn hexagon_S4_extract(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.S4.extract.rp"]
+    fn hexagon_S4_extract_rp(_: i32, _: i64) -> i32;
+    #[link_name = "llvm.hexagon.S4.extractp"]
+    fn hexagon_S4_extractp(_: i64, _: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.S4.extractp.rp"]
+    fn hexagon_S4_extractp_rp(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.S4.lsli"]
+    fn hexagon_S4_lsli(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.S4.ntstbit.i"]
+    fn hexagon_S4_ntstbit_i(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.S4.ntstbit.r"]
+    fn hexagon_S4_ntstbit_r(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.S4.or.andi"]
+    fn hexagon_S4_or_andi(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.S4.or.andix"]
+    fn hexagon_S4_or_andix(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.S4.or.ori"]
+    fn hexagon_S4_or_ori(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.S4.ori.asl.ri"]
+    fn hexagon_S4_ori_asl_ri(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.S4.ori.lsr.ri"]
+    fn hexagon_S4_ori_lsr_ri(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.S4.parity"]
+    fn hexagon_S4_parity(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.S4.subaddi"]
+    fn hexagon_S4_subaddi(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.S4.subi.asl.ri"]
+    fn hexagon_S4_subi_asl_ri(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.S4.subi.lsr.ri"]
+    fn hexagon_S4_subi_lsr_ri(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.S4.vrcrotate"]
+    fn hexagon_S4_vrcrotate(_: i64, _: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.S4.vrcrotate.acc"]
+    fn hexagon_S4_vrcrotate_acc(_: i64, _: i64, _: i32, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.S4.vxaddsubh"]
+    fn hexagon_S4_vxaddsubh(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.S4.vxaddsubhr"]
+    fn hexagon_S4_vxaddsubhr(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.S4.vxaddsubw"]
+    fn hexagon_S4_vxaddsubw(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.S4.vxsubaddh"]
+    fn hexagon_S4_vxsubaddh(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.S4.vxsubaddhr"]
+    fn hexagon_S4_vxsubaddhr(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.S4.vxsubaddw"]
+    fn hexagon_S4_vxsubaddw(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.S5.asrhub.rnd.sat.goodsyntax"]
+    fn hexagon_S5_asrhub_rnd_sat_goodsyntax(_: i64, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.S5.asrhub.sat"]
+    fn hexagon_S5_asrhub_sat(_: i64, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.S5.popcountp"]
+    fn hexagon_S5_popcountp(_: i64) -> i32;
+    #[link_name = "llvm.hexagon.S5.vasrhrnd.goodsyntax"]
+    fn hexagon_S5_vasrhrnd_goodsyntax(_: i64, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.Y2.dccleana"]
+    fn hexagon_Y2_dccleana(_: i32);
+    #[link_name = "llvm.hexagon.Y2.dccleaninva"]
+    fn hexagon_Y2_dccleaninva(_: i32);
+    #[link_name = "llvm.hexagon.Y2.dcfetch"]
+    fn hexagon_Y2_dcfetch(_: i32);
+    #[link_name = "llvm.hexagon.Y2.dcinva"]
+    fn hexagon_Y2_dcinva(_: i32);
+    #[link_name = "llvm.hexagon.Y2.dczeroa"]
+    fn hexagon_Y2_dczeroa(_: i32);
+    #[link_name = "llvm.hexagon.Y4.l2fetch"]
+    fn hexagon_Y4_l2fetch(_: i32, _: i32);
+    #[link_name = "llvm.hexagon.Y5.l2fetch"]
+    fn hexagon_Y5_l2fetch(_: i32, _: i64);
+    #[link_name = "llvm.hexagon.S6.rol.i.p"]
+    fn hexagon_S6_rol_i_p(_: i64, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.S6.rol.i.p.acc"]
+    fn hexagon_S6_rol_i_p_acc(_: i64, _: i64, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.S6.rol.i.p.and"]
+    fn hexagon_S6_rol_i_p_and(_: i64, _: i64, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.S6.rol.i.p.nac"]
+    fn hexagon_S6_rol_i_p_nac(_: i64, _: i64, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.S6.rol.i.p.or"]
+    fn hexagon_S6_rol_i_p_or(_: i64, _: i64, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.S6.rol.i.p.xacc"]
+    fn hexagon_S6_rol_i_p_xacc(_: i64, _: i64, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.S6.rol.i.r"]
+    fn hexagon_S6_rol_i_r(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.S6.rol.i.r.acc"]
+    fn hexagon_S6_rol_i_r_acc(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.S6.rol.i.r.and"]
+    fn hexagon_S6_rol_i_r_and(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.S6.rol.i.r.nac"]
+    fn hexagon_S6_rol_i_r_nac(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.S6.rol.i.r.or"]
+    fn hexagon_S6_rol_i_r_or(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.S6.rol.i.r.xacc"]
+    fn hexagon_S6_rol_i_r_xacc(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.M6.vabsdiffb"]
+    fn hexagon_M6_vabsdiffb(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.M6.vabsdiffub"]
+    fn hexagon_M6_vabsdiffub(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.S6.vsplatrbp"]
+    fn hexagon_S6_vsplatrbp(_: i32) -> i64;
+    #[link_name = "llvm.hexagon.S6.vtrunehb.ppp"]
+    fn hexagon_S6_vtrunehb_ppp(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.S6.vtrunohb.ppp"]
+    fn hexagon_S6_vtrunohb_ppp(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.A6.vcmpbeq.notany"]
+    fn hexagon_A6_vcmpbeq_notany(_: i64, _: i64) -> i32;
+    #[link_name = "llvm.hexagon.F2.dfadd"]
+    fn hexagon_F2_dfadd(_: f64, _: f64) -> f64;
+    #[link_name = "llvm.hexagon.F2.dfsub"]
+    fn hexagon_F2_dfsub(_: f64, _: f64) -> f64;
+    #[link_name = "llvm.hexagon.M2.mnaci"]
+    fn hexagon_M2_mnaci(_: i32, _: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.S2.mask"]
+    fn hexagon_S2_mask(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.A7.clip"]
+    fn hexagon_A7_clip(_: i32, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.A7.croundd.ri"]
+    fn hexagon_A7_croundd_ri(_: i64, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.A7.croundd.rr"]
+    fn hexagon_A7_croundd_rr(_: i64, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.A7.vclip"]
+    fn hexagon_A7_vclip(_: i64, _: i32) -> i64;
+    #[link_name = "llvm.hexagon.F2.dfmax"]
+    fn hexagon_F2_dfmax(_: f64, _: f64) -> f64;
+    #[link_name = "llvm.hexagon.F2.dfmin"]
+    fn hexagon_F2_dfmin(_: f64, _: f64) -> f64;
+    #[link_name = "llvm.hexagon.F2.dfmpyfix"]
+    fn hexagon_F2_dfmpyfix(_: f64, _: f64) -> f64;
+    #[link_name = "llvm.hexagon.F2.dfmpyhh"]
+    fn hexagon_F2_dfmpyhh(_: f64, _: f64, _: f64) -> f64;
+    #[link_name = "llvm.hexagon.F2.dfmpylh"]
+    fn hexagon_F2_dfmpylh(_: f64, _: f64, _: f64) -> f64;
+    #[link_name = "llvm.hexagon.F2.dfmpyll"]
+    fn hexagon_F2_dfmpyll(_: f64, _: f64) -> f64;
+    #[link_name = "llvm.hexagon.M7.dcmpyiw"]
+    fn hexagon_M7_dcmpyiw(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.M7.dcmpyiw.acc"]
+    fn hexagon_M7_dcmpyiw_acc(_: i64, _: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.M7.dcmpyiwc"]
+    fn hexagon_M7_dcmpyiwc(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.M7.dcmpyiwc.acc"]
+    fn hexagon_M7_dcmpyiwc_acc(_: i64, _: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.M7.dcmpyrw"]
+    fn hexagon_M7_dcmpyrw(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.M7.dcmpyrw.acc"]
+    fn hexagon_M7_dcmpyrw_acc(_: i64, _: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.M7.dcmpyrwc"]
+    fn hexagon_M7_dcmpyrwc(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.M7.dcmpyrwc.acc"]
+    fn hexagon_M7_dcmpyrwc_acc(_: i64, _: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.M7.vdmpy"]
+    fn hexagon_M7_vdmpy(_: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.M7.vdmpy.acc"]
+    fn hexagon_M7_vdmpy_acc(_: i64, _: i64, _: i64) -> i64;
+    #[link_name = "llvm.hexagon.M7.wcmpyiw"]
+    fn hexagon_M7_wcmpyiw(_: i64, _: i64) -> i32;
+    #[link_name = "llvm.hexagon.M7.wcmpyiw.rnd"]
+    fn hexagon_M7_wcmpyiw_rnd(_: i64, _: i64) -> i32;
+    #[link_name = "llvm.hexagon.M7.wcmpyiwc"]
+    fn hexagon_M7_wcmpyiwc(_: i64, _: i64) -> i32;
+    #[link_name = "llvm.hexagon.M7.wcmpyiwc.rnd"]
+    fn hexagon_M7_wcmpyiwc_rnd(_: i64, _: i64) -> i32;
+    #[link_name = "llvm.hexagon.M7.wcmpyrw"]
+    fn hexagon_M7_wcmpyrw(_: i64, _: i64) -> i32;
+    #[link_name = "llvm.hexagon.M7.wcmpyrw.rnd"]
+    fn hexagon_M7_wcmpyrw_rnd(_: i64, _: i64) -> i32;
+    #[link_name = "llvm.hexagon.M7.wcmpyrwc"]
+    fn hexagon_M7_wcmpyrwc(_: i64, _: i64) -> i32;
+    #[link_name = "llvm.hexagon.M7.wcmpyrwc.rnd"]
+    fn hexagon_M7_wcmpyrwc_rnd(_: i64, _: i64) -> i32;
+    #[link_name = "llvm.hexagon.Y6.dmlink"]
+    fn hexagon_Y6_dmlink(_: i32, _: i32);
+    #[link_name = "llvm.hexagon.Y6.dmpause"]
+    fn hexagon_Y6_dmpause() -> i32;
+    #[link_name = "llvm.hexagon.Y6.dmpoll"]
+    fn hexagon_Y6_dmpoll() -> i32;
+    #[link_name = "llvm.hexagon.Y6.dmresume"]
+    fn hexagon_Y6_dmresume(_: i32);
+    #[link_name = "llvm.hexagon.Y6.dmstart"]
+    fn hexagon_Y6_dmstart(_: i32);
+    #[link_name = "llvm.hexagon.Y6.dmwait"]
+    fn hexagon_Y6_dmwait() -> i32;
+}
+
+/// `Rd32=abs(Rs32)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(abs))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_abs_R(rs: i32) -> i32 {
+    hexagon_A2_abs(rs)
+}
+
+/// `Rdd32=abs(Rss32)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(abs))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_abs_P(rss: i64) -> i64 {
+    hexagon_A2_absp(rss)
+}
+
+/// `Rd32=abs(Rs32):sat`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(abs))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_abs_R_sat(rs: i32) -> i32 {
+    hexagon_A2_abssat(rs)
+}
+
+/// `Rd32=add(Rs32,Rt32)`
+///
+/// Instruction Type: ALU32_3op
+/// Execution Slots: SLOT0123
+#[inline(always)]
+#[cfg_attr(test, assert_instr(add))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_add_RR(rs: i32, rt: i32) -> i32 {
+    hexagon_A2_add(rs, rt)
+}
+
+/// `Rd32=add(Rt32.h,Rs32.h):<<16`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(add))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_add_RhRh_s16(rt: i32, rs: i32) -> i32 {
+    hexagon_A2_addh_h16_hh(rt, rs)
+}
+
+/// `Rd32=add(Rt32.h,Rs32.l):<<16`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(add))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_add_RhRl_s16(rt: i32, rs: i32) -> i32 {
+    hexagon_A2_addh_h16_hl(rt, rs)
+}
+
+/// `Rd32=add(Rt32.l,Rs32.h):<<16`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(add))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_add_RlRh_s16(rt: i32, rs: i32) -> i32 {
+    hexagon_A2_addh_h16_lh(rt, rs)
+}
+
+/// `Rd32=add(Rt32.l,Rs32.l):<<16`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(add))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_add_RlRl_s16(rt: i32, rs: i32) -> i32 {
+    hexagon_A2_addh_h16_ll(rt, rs)
+}
+
+/// `Rd32=add(Rt32.h,Rs32.h):sat:<<16`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(add))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_add_RhRh_sat_s16(rt: i32, rs: i32) -> i32 {
+    hexagon_A2_addh_h16_sat_hh(rt, rs)
+}
+
+/// `Rd32=add(Rt32.h,Rs32.l):sat:<<16`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(add))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_add_RhRl_sat_s16(rt: i32, rs: i32) -> i32 {
+    hexagon_A2_addh_h16_sat_hl(rt, rs)
+}
+
+/// `Rd32=add(Rt32.l,Rs32.h):sat:<<16`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(add))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_add_RlRh_sat_s16(rt: i32, rs: i32) -> i32 {
+    hexagon_A2_addh_h16_sat_lh(rt, rs)
+}
+
+/// `Rd32=add(Rt32.l,Rs32.l):sat:<<16`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(add))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_add_RlRl_sat_s16(rt: i32, rs: i32) -> i32 {
+    hexagon_A2_addh_h16_sat_ll(rt, rs)
+}
+
+/// `Rd32=add(Rt32.l,Rs32.h)`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(add))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_add_RlRh(rt: i32, rs: i32) -> i32 {
+    hexagon_A2_addh_l16_hl(rt, rs)
+}
+
+/// `Rd32=add(Rt32.l,Rs32.l)`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(add))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_add_RlRl(rt: i32, rs: i32) -> i32 {
+    hexagon_A2_addh_l16_ll(rt, rs)
+}
+
+/// `Rd32=add(Rt32.l,Rs32.h):sat`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(add))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_add_RlRh_sat(rt: i32, rs: i32) -> i32 {
+    hexagon_A2_addh_l16_sat_hl(rt, rs)
+}
+
+/// `Rd32=add(Rt32.l,Rs32.l):sat`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(add))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_add_RlRl_sat(rt: i32, rs: i32) -> i32 {
+    hexagon_A2_addh_l16_sat_ll(rt, rs)
+}
+
+/// `Rd32=add(Rs32,#s16)`
+///
+/// Instruction Type: ALU32_ADDI
+/// Execution Slots: SLOT0123
+#[inline(always)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(add, IS16 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_add_RI<const IS16: i32>(rs: i32) -> i32 {
+    static_assert_simm_bits!(IS16, 16);
+    hexagon_A2_addi(rs, IS16)
+}
+
+/// `Rdd32=add(Rss32,Rtt32)`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(add))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_add_PP(rss: i64, rtt: i64) -> i64 {
+    hexagon_A2_addp(rss, rtt)
+}
+
+/// `Rdd32=add(Rss32,Rtt32):sat`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(add))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_add_PP_sat(rss: i64, rtt: i64) -> i64 {
+    hexagon_A2_addpsat(rss, rtt)
+}
+
+/// `Rd32=add(Rs32,Rt32):sat`
+///
+/// Instruction Type: ALU32_3op
+/// Execution Slots: SLOT0123
+#[inline(always)]
+#[cfg_attr(test, assert_instr(add))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_add_RR_sat(rs: i32, rt: i32) -> i32 {
+    hexagon_A2_addsat(rs, rt)
+}
+
+/// `Rdd32=add(Rs32,Rtt32)`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT0123
+#[inline(always)]
+#[cfg_attr(test, assert_instr(add))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_add_RP(rs: i32, rtt: i64) -> i64 {
+    hexagon_A2_addsp(rs, rtt)
+}
+
+/// `Rd32=and(Rs32,Rt32)`
+///
+/// Instruction Type: ALU32_3op
+/// Execution Slots: SLOT0123
+#[inline(always)]
+#[cfg_attr(test, assert_instr(and))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_and_RR(rs: i32, rt: i32) -> i32 {
+    hexagon_A2_and(rs, rt)
+}
+
+/// `Rd32=and(Rs32,#s10)`
+///
+/// Instruction Type: ALU32_2op
+/// Execution Slots: SLOT0123
+#[inline(always)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(and, IS10 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_and_RI<const IS10: i32>(rs: i32) -> i32 {
+    static_assert_simm_bits!(IS10, 10);
+    hexagon_A2_andir(rs, IS10)
+}
+
+/// `Rdd32=and(Rss32,Rtt32)`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(and))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_and_PP(rss: i64, rtt: i64) -> i64 {
+    hexagon_A2_andp(rss, rtt)
+}
+
+/// `Rd32=aslh(Rs32)`
+///
+/// Instruction Type: ALU32_2op
+/// Execution Slots: SLOT0123
+#[inline(always)]
+#[cfg_attr(test, assert_instr(aslh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_aslh_R(rs: i32) -> i32 {
+    hexagon_A2_aslh(rs)
+}
+
+/// `Rd32=asrh(Rs32)`
+///
+/// Instruction Type: ALU32_2op
+/// Execution Slots: SLOT0123
+#[inline(always)]
+#[cfg_attr(test, assert_instr(asrh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_asrh_R(rs: i32) -> i32 {
+    hexagon_A2_asrh(rs)
+}
+
+/// `Rd32=combine(Rt32.h,Rs32.h)`
+///
+/// Instruction Type: ALU32_3op
+/// Execution Slots: SLOT0123
+#[inline(always)]
+#[cfg_attr(test, assert_instr(combine))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_combine_RhRh(rt: i32, rs: i32) -> i32 {
+    hexagon_A2_combine_hh(rt, rs)
+}
+
+/// `Rd32=combine(Rt32.h,Rs32.l)`
+///
+/// Instruction Type: ALU32_3op
+/// Execution Slots: SLOT0123
+#[inline(always)]
+#[cfg_attr(test, assert_instr(combine))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_combine_RhRl(rt: i32, rs: i32) -> i32 {
+    hexagon_A2_combine_hl(rt, rs)
+}
+
+/// `Rd32=combine(Rt32.l,Rs32.h)`
+///
+/// Instruction Type: ALU32_3op
+/// Execution Slots: SLOT0123
+#[inline(always)]
+#[cfg_attr(test, assert_instr(combine))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_combine_RlRh(rt: i32, rs: i32) -> i32 {
+    hexagon_A2_combine_lh(rt, rs)
+}
+
+/// `Rd32=combine(Rt32.l,Rs32.l)`
+///
+/// Instruction Type: ALU32_3op
+/// Execution Slots: SLOT0123
+#[inline(always)]
+#[cfg_attr(test, assert_instr(combine))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_combine_RlRl(rt: i32, rs: i32) -> i32 {
+    hexagon_A2_combine_ll(rt, rs)
+}
+
+/// `Rdd32=combine(#s8,#S8)`
+///
+/// Instruction Type: ALU32_2op
+/// Execution Slots: SLOT0123
+#[inline(always)]
+#[rustc_legacy_const_generics(0, 1)]
+#[cfg_attr(test, assert_instr(combine, IS8 = 0, IS8_2 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_combine_II<const IS8: i32, const IS8_2: i32>() -> i64 {
+    static_assert_simm_bits!(IS8, 8);
+    static_assert_simm_bits!(IS8_2, 8);
+    hexagon_A2_combineii(IS8, IS8_2)
+}
+
+/// `Rdd32=combine(Rs32,Rt32)`
+///
+/// Instruction Type: ALU32_3op
+/// Execution Slots: SLOT0123
+#[inline(always)]
+#[cfg_attr(test, assert_instr(combine))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_combine_RR(rs: i32, rt: i32) -> i64 {
+    hexagon_A2_combinew(rs, rt)
+}
+
+/// `Rd32=max(Rs32,Rt32)`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(max))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_max_RR(rs: i32, rt: i32) -> i32 {
+    hexagon_A2_max(rs, rt)
+}
+
+/// `Rdd32=max(Rss32,Rtt32)`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(max))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_max_PP(rss: i64, rtt: i64) -> i64 {
+    hexagon_A2_maxp(rss, rtt)
+}
+
+/// `Rd32=maxu(Rs32,Rt32)`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(maxu))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_maxu_RR(rs: i32, rt: i32) -> i32 {
+    hexagon_A2_maxu(rs, rt)
+}
+
+/// `Rdd32=maxu(Rss32,Rtt32)`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(maxu))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_maxu_PP(rss: i64, rtt: i64) -> i64 {
+    hexagon_A2_maxup(rss, rtt)
+}
+
+/// `Rd32=min(Rt32,Rs32)`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(min))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_min_RR(rt: i32, rs: i32) -> i32 {
+    hexagon_A2_min(rt, rs)
+}
+
+/// `Rdd32=min(Rtt32,Rss32)`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(min))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_min_PP(rtt: i64, rss: i64) -> i64 {
+    hexagon_A2_minp(rtt, rss)
+}
+
+/// `Rd32=minu(Rt32,Rs32)`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(minu))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_minu_RR(rt: i32, rs: i32) -> i32 {
+    hexagon_A2_minu(rt, rs)
+}
+
+/// `Rdd32=minu(Rtt32,Rss32)`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(minu))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_minu_PP(rtt: i64, rss: i64) -> i64 {
+    hexagon_A2_minup(rtt, rss)
+}
+
+/// `Rd32=neg(Rs32)`
+///
+/// Instruction Type: ALU32_2op
+/// Execution Slots: SLOT0123
+#[inline(always)]
+#[cfg_attr(test, assert_instr(neg))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_neg_R(rs: i32) -> i32 {
+    hexagon_A2_neg(rs)
+}
+
+/// `Rdd32=neg(Rss32)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(neg))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_neg_P(rss: i64) -> i64 {
+    hexagon_A2_negp(rss)
+}
+
+/// `Rd32=neg(Rs32):sat`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(neg))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_neg_R_sat(rs: i32) -> i32 {
+    hexagon_A2_negsat(rs)
+}
+
+/// `Rd32=not(Rs32)`
+///
+/// Instruction Type: ALU32_2op
+/// Execution Slots: SLOT0123
+#[inline(always)]
+#[cfg_attr(test, assert_instr(not))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_not_R(rs: i32) -> i32 {
+    hexagon_A2_not(rs)
+}
+
+/// `Rdd32=not(Rss32)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(not))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_not_P(rss: i64) -> i64 {
+    hexagon_A2_notp(rss)
+}
+
+/// `Rd32=or(Rs32,Rt32)`
+///
+/// Instruction Type: ALU32_3op
+/// Execution Slots: SLOT0123
+#[inline(always)]
+#[cfg_attr(test, assert_instr(or))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_or_RR(rs: i32, rt: i32) -> i32 {
+    hexagon_A2_or(rs, rt)
+}
+
+/// `Rd32=or(Rs32,#s10)`
+///
+/// Instruction Type: ALU32_2op
+/// Execution Slots: SLOT0123
+#[inline(always)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(or, IS10 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_or_RI<const IS10: i32>(rs: i32) -> i32 {
+    static_assert_simm_bits!(IS10, 10);
+    hexagon_A2_orir(rs, IS10)
+}
+
+/// `Rdd32=or(Rss32,Rtt32)`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(or))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_or_PP(rss: i64, rtt: i64) -> i64 {
+    hexagon_A2_orp(rss, rtt)
+}
+
+/// `Rd32=round(Rss32):sat`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(round))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_round_P_sat(rss: i64) -> i32 {
+    hexagon_A2_roundsat(rss)
+}
+
+/// `Rd32=sat(Rss32)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(sat))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_sat_P(rss: i64) -> i32 {
+    hexagon_A2_sat(rss)
+}
+
+/// `Rd32=satb(Rs32)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(satb))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_satb_R(rs: i32) -> i32 {
+    hexagon_A2_satb(rs)
+}
+
+/// `Rd32=sath(Rs32)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(sath))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_sath_R(rs: i32) -> i32 {
+    hexagon_A2_sath(rs)
+}
+
+/// `Rd32=satub(Rs32)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(satub))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_satub_R(rs: i32) -> i32 {
+    hexagon_A2_satub(rs)
+}
+
+/// `Rd32=satuh(Rs32)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(satuh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_satuh_R(rs: i32) -> i32 {
+    hexagon_A2_satuh(rs)
+}
+
+/// `Rd32=sub(Rt32,Rs32)`
+///
+/// Instruction Type: ALU32_3op
+/// Execution Slots: SLOT0123
+#[inline(always)]
+#[cfg_attr(test, assert_instr(sub))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_sub_RR(rt: i32, rs: i32) -> i32 {
+    hexagon_A2_sub(rt, rs)
+}
+
+/// `Rd32=sub(Rt32.h,Rs32.h):<<16`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(sub))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_sub_RhRh_s16(rt: i32, rs: i32) -> i32 {
+    hexagon_A2_subh_h16_hh(rt, rs)
+}
+
+/// `Rd32=sub(Rt32.h,Rs32.l):<<16`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(sub))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_sub_RhRl_s16(rt: i32, rs: i32) -> i32 {
+    hexagon_A2_subh_h16_hl(rt, rs)
+}
+
+/// `Rd32=sub(Rt32.l,Rs32.h):<<16`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(sub))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_sub_RlRh_s16(rt: i32, rs: i32) -> i32 {
+    hexagon_A2_subh_h16_lh(rt, rs)
+}
+
+/// `Rd32=sub(Rt32.l,Rs32.l):<<16`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(sub))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_sub_RlRl_s16(rt: i32, rs: i32) -> i32 {
+    hexagon_A2_subh_h16_ll(rt, rs)
+}
+
+/// `Rd32=sub(Rt32.h,Rs32.h):sat:<<16`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(sub))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_sub_RhRh_sat_s16(rt: i32, rs: i32) -> i32 {
+    hexagon_A2_subh_h16_sat_hh(rt, rs)
+}
+
+/// `Rd32=sub(Rt32.h,Rs32.l):sat:<<16`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(sub))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_sub_RhRl_sat_s16(rt: i32, rs: i32) -> i32 {
+    hexagon_A2_subh_h16_sat_hl(rt, rs)
+}
+
+/// `Rd32=sub(Rt32.l,Rs32.h):sat:<<16`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(sub))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_sub_RlRh_sat_s16(rt: i32, rs: i32) -> i32 {
+    hexagon_A2_subh_h16_sat_lh(rt, rs)
+}
+
+/// `Rd32=sub(Rt32.l,Rs32.l):sat:<<16`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(sub))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_sub_RlRl_sat_s16(rt: i32, rs: i32) -> i32 {
+    hexagon_A2_subh_h16_sat_ll(rt, rs)
+}
+
+/// `Rd32=sub(Rt32.l,Rs32.h)`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(sub))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_sub_RlRh(rt: i32, rs: i32) -> i32 {
+    hexagon_A2_subh_l16_hl(rt, rs)
+}
+
+/// `Rd32=sub(Rt32.l,Rs32.l)`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(sub))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_sub_RlRl(rt: i32, rs: i32) -> i32 {
+    hexagon_A2_subh_l16_ll(rt, rs)
+}
+
+/// `Rd32=sub(Rt32.l,Rs32.h):sat`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(sub))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_sub_RlRh_sat(rt: i32, rs: i32) -> i32 {
+    hexagon_A2_subh_l16_sat_hl(rt, rs)
+}
+
+/// `Rd32=sub(Rt32.l,Rs32.l):sat`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(sub))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_sub_RlRl_sat(rt: i32, rs: i32) -> i32 {
+    hexagon_A2_subh_l16_sat_ll(rt, rs)
+}
+
+/// `Rdd32=sub(Rtt32,Rss32)`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(sub))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_sub_PP(rtt: i64, rss: i64) -> i64 {
+    hexagon_A2_subp(rtt, rss)
+}
+
+/// `Rd32=sub(#s10,Rs32)`
+///
+/// Instruction Type: ALU32_2op
+/// Execution Slots: SLOT0123
+#[inline(always)]
+#[rustc_legacy_const_generics(0)]
+#[cfg_attr(test, assert_instr(sub, IS10 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_sub_IR<const IS10: i32>(rs: i32) -> i32 {
+    static_assert_simm_bits!(IS10, 10);
+    hexagon_A2_subri(IS10, rs)
+}
+
+/// `Rd32=sub(Rt32,Rs32):sat`
+///
+/// Instruction Type: ALU32_3op
+/// Execution Slots: SLOT0123
+#[inline(always)]
+#[cfg_attr(test, assert_instr(sub))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_sub_RR_sat(rt: i32, rs: i32) -> i32 {
+    hexagon_A2_subsat(rt, rs)
+}
+
+/// `Rd32=vaddh(Rs32,Rt32)`
+///
+/// Instruction Type: ALU32_3op
+/// Execution Slots: SLOT0123
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vaddh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_vaddh_RR(rs: i32, rt: i32) -> i32 {
+    hexagon_A2_svaddh(rs, rt)
+}
+
+/// `Rd32=vaddh(Rs32,Rt32):sat`
+///
+/// Instruction Type: ALU32_3op
+/// Execution Slots: SLOT0123
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vaddh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_vaddh_RR_sat(rs: i32, rt: i32) -> i32 {
+    hexagon_A2_svaddhs(rs, rt)
+}
+
+/// `Rd32=vadduh(Rs32,Rt32):sat`
+///
+/// Instruction Type: ALU32_3op
+/// Execution Slots: SLOT0123
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vadduh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_vadduh_RR_sat(rs: i32, rt: i32) -> i32 {
+    hexagon_A2_svadduhs(rs, rt)
+}
+
+/// `Rd32=vavgh(Rs32,Rt32)`
+///
+/// Instruction Type: ALU32_3op
+/// Execution Slots: SLOT0123
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vavgh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_vavgh_RR(rs: i32, rt: i32) -> i32 {
+    hexagon_A2_svavgh(rs, rt)
+}
+
+/// `Rd32=vavgh(Rs32,Rt32):rnd`
+///
+/// Instruction Type: ALU32_3op
+/// Execution Slots: SLOT0123
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vavgh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_vavgh_RR_rnd(rs: i32, rt: i32) -> i32 {
+    hexagon_A2_svavghs(rs, rt)
+}
+
+/// `Rd32=vnavgh(Rt32,Rs32)`
+///
+/// Instruction Type: ALU32_3op
+/// Execution Slots: SLOT0123
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vnavgh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_vnavgh_RR(rt: i32, rs: i32) -> i32 {
+    hexagon_A2_svnavgh(rt, rs)
+}
+
+/// `Rd32=vsubh(Rt32,Rs32)`
+///
+/// Instruction Type: ALU32_3op
+/// Execution Slots: SLOT0123
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vsubh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_vsubh_RR(rt: i32, rs: i32) -> i32 {
+    hexagon_A2_svsubh(rt, rs)
+}
+
+/// `Rd32=vsubh(Rt32,Rs32):sat`
+///
+/// Instruction Type: ALU32_3op
+/// Execution Slots: SLOT0123
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vsubh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_vsubh_RR_sat(rt: i32, rs: i32) -> i32 {
+    hexagon_A2_svsubhs(rt, rs)
+}
+
+/// `Rd32=vsubuh(Rt32,Rs32):sat`
+///
+/// Instruction Type: ALU32_3op
+/// Execution Slots: SLOT0123
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vsubuh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_vsubuh_RR_sat(rt: i32, rs: i32) -> i32 {
+    hexagon_A2_svsubuhs(rt, rs)
+}
+
+/// `Rd32=swiz(Rs32)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(swiz))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_swiz_R(rs: i32) -> i32 {
+    hexagon_A2_swiz(rs)
+}
+
+/// `Rd32=sxtb(Rs32)`
+///
+/// Instruction Type: ALU32_2op
+/// Execution Slots: SLOT0123
+#[inline(always)]
+#[cfg_attr(test, assert_instr(sxtb))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_sxtb_R(rs: i32) -> i32 {
+    hexagon_A2_sxtb(rs)
+}
+
+/// `Rd32=sxth(Rs32)`
+///
+/// Instruction Type: ALU32_2op
+/// Execution Slots: SLOT0123
+#[inline(always)]
+#[cfg_attr(test, assert_instr(sxth))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_sxth_R(rs: i32) -> i32 {
+    hexagon_A2_sxth(rs)
+}
+
+/// `Rdd32=sxtw(Rs32)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(sxtw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_sxtw_R(rs: i32) -> i64 {
+    hexagon_A2_sxtw(rs)
+}
+
+/// `Rd32=Rs32`
+///
+/// Instruction Type: ALU32_2op
+/// Execution Slots: SLOT0123
+#[inline(always)]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_equals_R(rs: i32) -> i32 {
+    hexagon_A2_tfr(rs)
+}
+
+/// `Rx32.h=#u16`
+///
+/// Instruction Type: ALU32_2op
+/// Execution Slots: SLOT0123
+#[inline(always)]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Rh_equals_I<const IU16: u32>(rx: i32) -> i32 {
+    static_assert_uimm_bits!(IU16, 16);
+    hexagon_A2_tfrih(rx, IU16 as i32)
+}
+
+/// `Rx32.l=#u16`
+///
+/// Instruction Type: ALU32_2op
+/// Execution Slots: SLOT0123
+#[inline(always)]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Rl_equals_I<const IU16: u32>(rx: i32) -> i32 {
+    static_assert_uimm_bits!(IU16, 16);
+    hexagon_A2_tfril(rx, IU16 as i32)
+}
+
+/// `Rdd32=Rss32`
+///
+/// Instruction Type: ALU32_2op
+/// Execution Slots: SLOT0123
+#[inline(always)]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_equals_P(rss: i64) -> i64 {
+    hexagon_A2_tfrp(rss)
+}
+
+/// `Rdd32=#s8`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT0123
+#[inline(always)]
+#[rustc_legacy_const_generics(0)]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_equals_I<const IS8: i32>() -> i64 {
+    static_assert_simm_bits!(IS8, 8);
+    hexagon_A2_tfrpi(IS8)
+}
+
+/// `Rd32=#s16`
+///
+/// Instruction Type: ALU32_2op
+/// Execution Slots: SLOT0123
+#[inline(always)]
+#[rustc_legacy_const_generics(0)]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_equals_I<const IS16: i32>() -> i32 {
+    static_assert_simm_bits!(IS16, 16);
+    hexagon_A2_tfrsi(IS16)
+}
+
+/// `Rdd32=vabsh(Rss32)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vabsh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vabsh_P(rss: i64) -> i64 {
+    hexagon_A2_vabsh(rss)
+}
+
+/// `Rdd32=vabsh(Rss32):sat`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vabsh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vabsh_P_sat(rss: i64) -> i64 {
+    hexagon_A2_vabshsat(rss)
+}
+
+/// `Rdd32=vabsw(Rss32)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vabsw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vabsw_P(rss: i64) -> i64 {
+    hexagon_A2_vabsw(rss)
+}
+
+/// `Rdd32=vabsw(Rss32):sat`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vabsw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vabsw_P_sat(rss: i64) -> i64 {
+    hexagon_A2_vabswsat(rss)
+}
+
+/// `Rdd32=vaddb(Rss32,Rtt32)`
+///
+/// Instruction Type: MAPPING
+/// Execution Slots: SLOT0123
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vaddb))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vaddb_PP(rss: i64, rtt: i64) -> i64 {
+    hexagon_A2_vaddb_map(rss, rtt)
+}
+
+/// `Rdd32=vaddh(Rss32,Rtt32)`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vaddh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vaddh_PP(rss: i64, rtt: i64) -> i64 {
+    hexagon_A2_vaddh(rss, rtt)
+}
+
+/// `Rdd32=vaddh(Rss32,Rtt32):sat`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vaddh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vaddh_PP_sat(rss: i64, rtt: i64) -> i64 {
+    hexagon_A2_vaddhs(rss, rtt)
+}
+
+/// `Rdd32=vaddub(Rss32,Rtt32)`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vaddub))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vaddub_PP(rss: i64, rtt: i64) -> i64 {
+    hexagon_A2_vaddub(rss, rtt)
+}
+
+/// `Rdd32=vaddub(Rss32,Rtt32):sat`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vaddub))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vaddub_PP_sat(rss: i64, rtt: i64) -> i64 {
+    hexagon_A2_vaddubs(rss, rtt)
+}
+
+/// `Rdd32=vadduh(Rss32,Rtt32):sat`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vadduh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vadduh_PP_sat(rss: i64, rtt: i64) -> i64 {
+    hexagon_A2_vadduhs(rss, rtt)
+}
+
+/// `Rdd32=vaddw(Rss32,Rtt32)`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vaddw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vaddw_PP(rss: i64, rtt: i64) -> i64 {
+    hexagon_A2_vaddw(rss, rtt)
+}
+
+/// `Rdd32=vaddw(Rss32,Rtt32):sat`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vaddw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vaddw_PP_sat(rss: i64, rtt: i64) -> i64 {
+    hexagon_A2_vaddws(rss, rtt)
+}
+
+/// `Rdd32=vavgh(Rss32,Rtt32)`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vavgh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vavgh_PP(rss: i64, rtt: i64) -> i64 {
+    hexagon_A2_vavgh(rss, rtt)
+}
+
+/// `Rdd32=vavgh(Rss32,Rtt32):crnd`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vavgh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vavgh_PP_crnd(rss: i64, rtt: i64) -> i64 {
+    hexagon_A2_vavghcr(rss, rtt)
+}
+
+/// `Rdd32=vavgh(Rss32,Rtt32):rnd`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vavgh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vavgh_PP_rnd(rss: i64, rtt: i64) -> i64 {
+    hexagon_A2_vavghr(rss, rtt)
+}
+
+/// `Rdd32=vavgub(Rss32,Rtt32)`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vavgub))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vavgub_PP(rss: i64, rtt: i64) -> i64 {
+    hexagon_A2_vavgub(rss, rtt)
+}
+
+/// `Rdd32=vavgub(Rss32,Rtt32):rnd`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vavgub))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vavgub_PP_rnd(rss: i64, rtt: i64) -> i64 {
+    hexagon_A2_vavgubr(rss, rtt)
+}
+
+/// `Rdd32=vavguh(Rss32,Rtt32)`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vavguh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vavguh_PP(rss: i64, rtt: i64) -> i64 {
+    hexagon_A2_vavguh(rss, rtt)
+}
+
+/// `Rdd32=vavguh(Rss32,Rtt32):rnd`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vavguh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vavguh_PP_rnd(rss: i64, rtt: i64) -> i64 {
+    hexagon_A2_vavguhr(rss, rtt)
+}
+
+/// `Rdd32=vavguw(Rss32,Rtt32)`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vavguw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vavguw_PP(rss: i64, rtt: i64) -> i64 {
+    hexagon_A2_vavguw(rss, rtt)
+}
+
+/// `Rdd32=vavguw(Rss32,Rtt32):rnd`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vavguw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vavguw_PP_rnd(rss: i64, rtt: i64) -> i64 {
+    hexagon_A2_vavguwr(rss, rtt)
+}
+
+/// `Rdd32=vavgw(Rss32,Rtt32)`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vavgw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vavgw_PP(rss: i64, rtt: i64) -> i64 {
+    hexagon_A2_vavgw(rss, rtt)
+}
+
+/// `Rdd32=vavgw(Rss32,Rtt32):crnd`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vavgw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vavgw_PP_crnd(rss: i64, rtt: i64) -> i64 {
+    hexagon_A2_vavgwcr(rss, rtt)
+}
+
+/// `Rdd32=vavgw(Rss32,Rtt32):rnd`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vavgw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vavgw_PP_rnd(rss: i64, rtt: i64) -> i64 {
+    hexagon_A2_vavgwr(rss, rtt)
+}
+
+/// `Pd4=vcmpb.eq(Rss32,Rtt32)`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vcmpb))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_p_vcmpb_eq_PP(rss: i64, rtt: i64) -> i32 {
+    hexagon_A2_vcmpbeq(rss, rtt)
+}
+
+/// `Pd4=vcmpb.gtu(Rss32,Rtt32)`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vcmpb))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_p_vcmpb_gtu_PP(rss: i64, rtt: i64) -> i32 {
+    hexagon_A2_vcmpbgtu(rss, rtt)
+}
+
+/// `Pd4=vcmph.eq(Rss32,Rtt32)`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vcmph))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_p_vcmph_eq_PP(rss: i64, rtt: i64) -> i32 {
+    hexagon_A2_vcmpheq(rss, rtt)
+}
+
+/// `Pd4=vcmph.gt(Rss32,Rtt32)`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vcmph))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_p_vcmph_gt_PP(rss: i64, rtt: i64) -> i32 {
+    hexagon_A2_vcmphgt(rss, rtt)
+}
+
+/// `Pd4=vcmph.gtu(Rss32,Rtt32)`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vcmph))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_p_vcmph_gtu_PP(rss: i64, rtt: i64) -> i32 {
+    hexagon_A2_vcmphgtu(rss, rtt)
+}
+
+/// `Pd4=vcmpw.eq(Rss32,Rtt32)`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vcmpw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_p_vcmpw_eq_PP(rss: i64, rtt: i64) -> i32 {
+    hexagon_A2_vcmpweq(rss, rtt)
+}
+
+/// `Pd4=vcmpw.gt(Rss32,Rtt32)`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vcmpw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_p_vcmpw_gt_PP(rss: i64, rtt: i64) -> i32 {
+    hexagon_A2_vcmpwgt(rss, rtt)
+}
+
+/// `Pd4=vcmpw.gtu(Rss32,Rtt32)`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vcmpw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_p_vcmpw_gtu_PP(rss: i64, rtt: i64) -> i32 {
+    hexagon_A2_vcmpwgtu(rss, rtt)
+}
+
+/// `Rdd32=vconj(Rss32):sat`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vconj))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vconj_P_sat(rss: i64) -> i64 {
+    hexagon_A2_vconj(rss)
+}
+
+/// `Rdd32=vmaxb(Rtt32,Rss32)`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vmaxb))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vmaxb_PP(rtt: i64, rss: i64) -> i64 {
+    hexagon_A2_vmaxb(rtt, rss)
+}
+
+/// `Rdd32=vmaxh(Rtt32,Rss32)`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vmaxh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vmaxh_PP(rtt: i64, rss: i64) -> i64 {
+    hexagon_A2_vmaxh(rtt, rss)
+}
+
+/// `Rdd32=vmaxub(Rtt32,Rss32)`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vmaxub))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vmaxub_PP(rtt: i64, rss: i64) -> i64 {
+    hexagon_A2_vmaxub(rtt, rss)
+}
+
+/// `Rdd32=vmaxuh(Rtt32,Rss32)`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vmaxuh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vmaxuh_PP(rtt: i64, rss: i64) -> i64 {
+    hexagon_A2_vmaxuh(rtt, rss)
+}
+
+/// `Rdd32=vmaxuw(Rtt32,Rss32)`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vmaxuw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vmaxuw_PP(rtt: i64, rss: i64) -> i64 {
+    hexagon_A2_vmaxuw(rtt, rss)
+}
+
+/// `Rdd32=vmaxw(Rtt32,Rss32)`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vmaxw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vmaxw_PP(rtt: i64, rss: i64) -> i64 {
+    hexagon_A2_vmaxw(rtt, rss)
+}
+
+/// `Rdd32=vminb(Rtt32,Rss32)`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vminb))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vminb_PP(rtt: i64, rss: i64) -> i64 {
+    hexagon_A2_vminb(rtt, rss)
+}
+
+/// `Rdd32=vminh(Rtt32,Rss32)`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vminh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vminh_PP(rtt: i64, rss: i64) -> i64 {
+    hexagon_A2_vminh(rtt, rss)
+}
+
+/// `Rdd32=vminub(Rtt32,Rss32)`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vminub))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vminub_PP(rtt: i64, rss: i64) -> i64 {
+    hexagon_A2_vminub(rtt, rss)
+}
+
+/// `Rdd32=vminuh(Rtt32,Rss32)`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vminuh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vminuh_PP(rtt: i64, rss: i64) -> i64 {
+    hexagon_A2_vminuh(rtt, rss)
+}
+
+/// `Rdd32=vminuw(Rtt32,Rss32)`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vminuw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vminuw_PP(rtt: i64, rss: i64) -> i64 {
+    hexagon_A2_vminuw(rtt, rss)
+}
+
+/// `Rdd32=vminw(Rtt32,Rss32)`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vminw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vminw_PP(rtt: i64, rss: i64) -> i64 {
+    hexagon_A2_vminw(rtt, rss)
+}
+
+/// `Rdd32=vnavgh(Rtt32,Rss32)`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vnavgh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vnavgh_PP(rtt: i64, rss: i64) -> i64 {
+    hexagon_A2_vnavgh(rtt, rss)
+}
+
+/// `Rdd32=vnavgh(Rtt32,Rss32):crnd:sat`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vnavgh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vnavgh_PP_crnd_sat(rtt: i64, rss: i64) -> i64 {
+    hexagon_A2_vnavghcr(rtt, rss)
+}
+
+/// `Rdd32=vnavgh(Rtt32,Rss32):rnd:sat`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vnavgh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vnavgh_PP_rnd_sat(rtt: i64, rss: i64) -> i64 {
+    hexagon_A2_vnavghr(rtt, rss)
+}
+
+/// `Rdd32=vnavgw(Rtt32,Rss32)`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vnavgw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vnavgw_PP(rtt: i64, rss: i64) -> i64 {
+    hexagon_A2_vnavgw(rtt, rss)
+}
+
+/// `Rdd32=vnavgw(Rtt32,Rss32):crnd:sat`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vnavgw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vnavgw_PP_crnd_sat(rtt: i64, rss: i64) -> i64 {
+    hexagon_A2_vnavgwcr(rtt, rss)
+}
+
+/// `Rdd32=vnavgw(Rtt32,Rss32):rnd:sat`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vnavgw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vnavgw_PP_rnd_sat(rtt: i64, rss: i64) -> i64 {
+    hexagon_A2_vnavgwr(rtt, rss)
+}
+
+/// `Rdd32=vraddub(Rss32,Rtt32)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vraddub))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vraddub_PP(rss: i64, rtt: i64) -> i64 {
+    hexagon_A2_vraddub(rss, rtt)
+}
+
+/// `Rxx32+=vraddub(Rss32,Rtt32)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vraddub))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vraddubacc_PP(rxx: i64, rss: i64, rtt: i64) -> i64 {
+    hexagon_A2_vraddub_acc(rxx, rss, rtt)
+}
+
+/// `Rdd32=vrsadub(Rss32,Rtt32)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vrsadub))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vrsadub_PP(rss: i64, rtt: i64) -> i64 {
+    hexagon_A2_vrsadub(rss, rtt)
+}
+
+/// `Rxx32+=vrsadub(Rss32,Rtt32)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vrsadub))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vrsadubacc_PP(rxx: i64, rss: i64, rtt: i64) -> i64 {
+    hexagon_A2_vrsadub_acc(rxx, rss, rtt)
+}
+
+/// `Rdd32=vsubb(Rss32,Rtt32)`
+///
+/// Instruction Type: MAPPING
+/// Execution Slots: SLOT0123
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vsubb))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vsubb_PP(rss: i64, rtt: i64) -> i64 {
+    hexagon_A2_vsubb_map(rss, rtt)
+}
+
+/// `Rdd32=vsubh(Rtt32,Rss32)`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vsubh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vsubh_PP(rtt: i64, rss: i64) -> i64 {
+    hexagon_A2_vsubh(rtt, rss)
+}
+
+/// `Rdd32=vsubh(Rtt32,Rss32):sat`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vsubh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vsubh_PP_sat(rtt: i64, rss: i64) -> i64 {
+    hexagon_A2_vsubhs(rtt, rss)
+}
+
+/// `Rdd32=vsubub(Rtt32,Rss32)`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vsubub))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vsubub_PP(rtt: i64, rss: i64) -> i64 {
+    hexagon_A2_vsubub(rtt, rss)
+}
+
+/// `Rdd32=vsubub(Rtt32,Rss32):sat`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vsubub))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vsubub_PP_sat(rtt: i64, rss: i64) -> i64 {
+    hexagon_A2_vsububs(rtt, rss)
+}
+
+/// `Rdd32=vsubuh(Rtt32,Rss32):sat`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vsubuh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vsubuh_PP_sat(rtt: i64, rss: i64) -> i64 {
+    hexagon_A2_vsubuhs(rtt, rss)
+}
+
+/// `Rdd32=vsubw(Rtt32,Rss32)`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vsubw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vsubw_PP(rtt: i64, rss: i64) -> i64 {
+    hexagon_A2_vsubw(rtt, rss)
+}
+
+/// `Rdd32=vsubw(Rtt32,Rss32):sat`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vsubw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vsubw_PP_sat(rtt: i64, rss: i64) -> i64 {
+    hexagon_A2_vsubws(rtt, rss)
+}
+
+/// `Rd32=xor(Rs32,Rt32)`
+///
+/// Instruction Type: ALU32_3op
+/// Execution Slots: SLOT0123
+#[inline(always)]
+#[cfg_attr(test, assert_instr(xor))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_xor_RR(rs: i32, rt: i32) -> i32 {
+    hexagon_A2_xor(rs, rt)
+}
+
+/// `Rdd32=xor(Rss32,Rtt32)`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(xor))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_xor_PP(rss: i64, rtt: i64) -> i64 {
+    hexagon_A2_xorp(rss, rtt)
+}
+
+/// `Rd32=zxtb(Rs32)`
+///
+/// Instruction Type: ALU32_2op
+/// Execution Slots: SLOT0123
+#[inline(always)]
+#[cfg_attr(test, assert_instr(zxtb))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_zxtb_R(rs: i32) -> i32 {
+    hexagon_A2_zxtb(rs)
+}
+
+/// `Rd32=zxth(Rs32)`
+///
+/// Instruction Type: ALU32_2op
+/// Execution Slots: SLOT0123
+#[inline(always)]
+#[cfg_attr(test, assert_instr(zxth))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_zxth_R(rs: i32) -> i32 {
+    hexagon_A2_zxth(rs)
+}
+
+/// `Rd32=and(Rt32,~Rs32)`
+///
+/// Instruction Type: ALU32_3op
+/// Execution Slots: SLOT0123
+#[inline(always)]
+#[cfg_attr(test, assert_instr(and))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_and_RnR(rt: i32, rs: i32) -> i32 {
+    hexagon_A4_andn(rt, rs)
+}
+
+/// `Rdd32=and(Rtt32,~Rss32)`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(and))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_and_PnP(rtt: i64, rss: i64) -> i64 {
+    hexagon_A4_andnp(rtt, rss)
+}
+
+/// `Rdd32=bitsplit(Rs32,Rt32)`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(bitsplit))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_bitsplit_RR(rs: i32, rt: i32) -> i64 {
+    hexagon_A4_bitsplit(rs, rt)
+}
+
+/// `Rdd32=bitsplit(Rs32,#u5)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(bitsplit, IU5 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_bitsplit_RI<const IU5: u32>(rs: i32) -> i64 {
+    static_assert_uimm_bits!(IU5, 5);
+    hexagon_A4_bitspliti(rs, IU5 as i32)
+}
+
+/// `Pd4=boundscheck(Rs32,Rtt32)`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT0123
+#[inline(always)]
+#[cfg_attr(test, assert_instr(boundscheck))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_p_boundscheck_RP(rs: i32, rtt: i64) -> i32 {
+    hexagon_A4_boundscheck(rs, rtt)
+}
+
+/// `Pd4=cmpb.eq(Rs32,Rt32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(cmpb))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_p_cmpb_eq_RR(rs: i32, rt: i32) -> i32 {
+    hexagon_A4_cmpbeq(rs, rt)
+}
+
+/// `Pd4=cmpb.eq(Rs32,#u8)`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(cmpb, IU8 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_p_cmpb_eq_RI<const IU8: u32>(rs: i32) -> i32 {
+    static_assert_uimm_bits!(IU8, 8);
+    hexagon_A4_cmpbeqi(rs, IU8 as i32)
+}
+
+/// `Pd4=cmpb.gt(Rs32,Rt32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(cmpb))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_p_cmpb_gt_RR(rs: i32, rt: i32) -> i32 {
+    hexagon_A4_cmpbgt(rs, rt)
+}
+
+/// `Pd4=cmpb.gt(Rs32,#s8)`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(cmpb, IS8 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_p_cmpb_gt_RI<const IS8: i32>(rs: i32) -> i32 {
+    static_assert_simm_bits!(IS8, 8);
+    hexagon_A4_cmpbgti(rs, IS8)
+}
+
+/// `Pd4=cmpb.gtu(Rs32,Rt32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(cmpb))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_p_cmpb_gtu_RR(rs: i32, rt: i32) -> i32 {
+    hexagon_A4_cmpbgtu(rs, rt)
+}
+
+/// `Pd4=cmpb.gtu(Rs32,#u7)`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(cmpb, IU7 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_p_cmpb_gtu_RI<const IU7: u32>(rs: i32) -> i32 {
+    static_assert_uimm_bits!(IU7, 7);
+    hexagon_A4_cmpbgtui(rs, IU7 as i32)
+}
+
+/// `Pd4=cmph.eq(Rs32,Rt32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(cmph))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_p_cmph_eq_RR(rs: i32, rt: i32) -> i32 {
+    hexagon_A4_cmpheq(rs, rt)
+}
+
+/// `Pd4=cmph.eq(Rs32,#s8)`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(cmph, IS8 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_p_cmph_eq_RI<const IS8: i32>(rs: i32) -> i32 {
+    static_assert_simm_bits!(IS8, 8);
+    hexagon_A4_cmpheqi(rs, IS8)
+}
+
+/// `Pd4=cmph.gt(Rs32,Rt32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(cmph))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_p_cmph_gt_RR(rs: i32, rt: i32) -> i32 {
+    hexagon_A4_cmphgt(rs, rt)
+}
+
+/// `Pd4=cmph.gt(Rs32,#s8)`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(cmph, IS8 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_p_cmph_gt_RI<const IS8: i32>(rs: i32) -> i32 {
+    static_assert_simm_bits!(IS8, 8);
+    hexagon_A4_cmphgti(rs, IS8)
+}
+
+/// `Pd4=cmph.gtu(Rs32,Rt32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(cmph))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_p_cmph_gtu_RR(rs: i32, rt: i32) -> i32 {
+    hexagon_A4_cmphgtu(rs, rt)
+}
+
+/// `Pd4=cmph.gtu(Rs32,#u7)`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(cmph, IU7 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_p_cmph_gtu_RI<const IU7: u32>(rs: i32) -> i32 {
+    static_assert_uimm_bits!(IU7, 7);
+    hexagon_A4_cmphgtui(rs, IU7 as i32)
+}
+
+/// `Rdd32=combine(#s8,Rs32)`
+///
+/// Instruction Type: ALU32_2op
+/// Execution Slots: SLOT0123
+#[inline(always)]
+#[rustc_legacy_const_generics(0)]
+#[cfg_attr(test, assert_instr(combine, IS8 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_combine_IR<const IS8: i32>(rs: i32) -> i64 {
+    static_assert_simm_bits!(IS8, 8);
+    hexagon_A4_combineir(IS8, rs)
+}
+
+/// `Rdd32=combine(Rs32,#s8)`
+///
+/// Instruction Type: ALU32_2op
+/// Execution Slots: SLOT0123
+#[inline(always)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(combine, IS8 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_combine_RI<const IS8: i32>(rs: i32) -> i64 {
+    static_assert_simm_bits!(IS8, 8);
+    hexagon_A4_combineri(rs, IS8)
+}
+
+/// `Rd32=cround(Rs32,#u5)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(cround, IU5 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_cround_RI<const IU5: u32>(rs: i32) -> i32 {
+    static_assert_uimm_bits!(IU5, 5);
+    hexagon_A4_cround_ri(rs, IU5 as i32)
+}
+
+/// `Rd32=cround(Rs32,Rt32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(cround))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_cround_RR(rs: i32, rt: i32) -> i32 {
+    hexagon_A4_cround_rr(rs, rt)
+}
+
+/// `Rd32=modwrap(Rs32,Rt32)`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(modwrap))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_modwrap_RR(rs: i32, rt: i32) -> i32 {
+    hexagon_A4_modwrapu(rs, rt)
+}
+
+/// `Rd32=or(Rt32,~Rs32)`
+///
+/// Instruction Type: ALU32_3op
+/// Execution Slots: SLOT0123
+#[inline(always)]
+#[cfg_attr(test, assert_instr(or))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_or_RnR(rt: i32, rs: i32) -> i32 {
+    hexagon_A4_orn(rt, rs)
+}
+
+/// `Rdd32=or(Rtt32,~Rss32)`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(or))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_or_PnP(rtt: i64, rss: i64) -> i64 {
+    hexagon_A4_ornp(rtt, rss)
+}
+
+/// `Rd32=cmp.eq(Rs32,Rt32)`
+///
+/// Instruction Type: ALU32_3op
+/// Execution Slots: SLOT0123
+#[inline(always)]
+#[cfg_attr(test, assert_instr(cmp))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_cmp_eq_RR(rs: i32, rt: i32) -> i32 {
+    hexagon_A4_rcmpeq(rs, rt)
+}
+
+/// `Rd32=cmp.eq(Rs32,#s8)`
+///
+/// Instruction Type: ALU32_2op
+/// Execution Slots: SLOT0123
+#[inline(always)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(cmp, IS8 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_cmp_eq_RI<const IS8: i32>(rs: i32) -> i32 {
+    static_assert_simm_bits!(IS8, 8);
+    hexagon_A4_rcmpeqi(rs, IS8)
+}
+
+/// `Rd32=!cmp.eq(Rs32,Rt32)`
+///
+/// Instruction Type: ALU32_3op
+/// Execution Slots: SLOT0123
+#[inline(always)]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_not_cmp_eq_RR(rs: i32, rt: i32) -> i32 {
+    hexagon_A4_rcmpneq(rs, rt)
+}
+
+/// `Rd32=!cmp.eq(Rs32,#s8)`
+///
+/// Instruction Type: ALU32_2op
+/// Execution Slots: SLOT0123
+#[inline(always)]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_not_cmp_eq_RI<const IS8: i32>(rs: i32) -> i32 {
+    static_assert_simm_bits!(IS8, 8);
+    hexagon_A4_rcmpneqi(rs, IS8)
+}
+
+/// `Rd32=round(Rs32,#u5)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(round, IU5 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_round_RI<const IU5: u32>(rs: i32) -> i32 {
+    static_assert_uimm_bits!(IU5, 5);
+    hexagon_A4_round_ri(rs, IU5 as i32)
+}
+
+/// `Rd32=round(Rs32,#u5):sat`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(round, IU5 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_round_RI_sat<const IU5: u32>(rs: i32) -> i32 {
+    static_assert_uimm_bits!(IU5, 5);
+    hexagon_A4_round_ri_sat(rs, IU5 as i32)
+}
+
+/// `Rd32=round(Rs32,Rt32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(round))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_round_RR(rs: i32, rt: i32) -> i32 {
+    hexagon_A4_round_rr(rs, rt)
+}
+
+/// `Rd32=round(Rs32,Rt32):sat`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(round))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_round_RR_sat(rs: i32, rt: i32) -> i32 {
+    hexagon_A4_round_rr_sat(rs, rt)
+}
+
+/// `Pd4=tlbmatch(Rss32,Rt32)`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(tlbmatch))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_p_tlbmatch_PR(rss: i64, rt: i32) -> i32 {
+    hexagon_A4_tlbmatch(rss, rt)
+}
+
+/// `Pd4=any8(vcmpb.eq(Rss32,Rtt32))`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(any8))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_p_any8_vcmpb_eq_PP(rss: i64, rtt: i64) -> i32 {
+    hexagon_A4_vcmpbeq_any(rss, rtt)
+}
+
+/// `Pd4=vcmpb.eq(Rss32,#u8)`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(vcmpb, IU8 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_p_vcmpb_eq_PI<const IU8: u32>(rss: i64) -> i32 {
+    static_assert_uimm_bits!(IU8, 8);
+    hexagon_A4_vcmpbeqi(rss, IU8 as i32)
+}
+
+/// `Pd4=vcmpb.gt(Rss32,Rtt32)`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vcmpb))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_p_vcmpb_gt_PP(rss: i64, rtt: i64) -> i32 {
+    hexagon_A4_vcmpbgt(rss, rtt)
+}
+
+/// `Pd4=vcmpb.gt(Rss32,#s8)`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(vcmpb, IS8 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_p_vcmpb_gt_PI<const IS8: i32>(rss: i64) -> i32 {
+    static_assert_simm_bits!(IS8, 8);
+    hexagon_A4_vcmpbgti(rss, IS8)
+}
+
+/// `Pd4=vcmpb.gtu(Rss32,#u7)`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(vcmpb, IU7 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_p_vcmpb_gtu_PI<const IU7: u32>(rss: i64) -> i32 {
+    static_assert_uimm_bits!(IU7, 7);
+    hexagon_A4_vcmpbgtui(rss, IU7 as i32)
+}
+
+/// `Pd4=vcmph.eq(Rss32,#s8)`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(vcmph, IS8 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_p_vcmph_eq_PI<const IS8: i32>(rss: i64) -> i32 {
+    static_assert_simm_bits!(IS8, 8);
+    hexagon_A4_vcmpheqi(rss, IS8)
+}
+
+/// `Pd4=vcmph.gt(Rss32,#s8)`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(vcmph, IS8 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_p_vcmph_gt_PI<const IS8: i32>(rss: i64) -> i32 {
+    static_assert_simm_bits!(IS8, 8);
+    hexagon_A4_vcmphgti(rss, IS8)
+}
+
+/// `Pd4=vcmph.gtu(Rss32,#u7)`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(vcmph, IU7 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_p_vcmph_gtu_PI<const IU7: u32>(rss: i64) -> i32 {
+    static_assert_uimm_bits!(IU7, 7);
+    hexagon_A4_vcmphgtui(rss, IU7 as i32)
+}
+
+/// `Pd4=vcmpw.eq(Rss32,#s8)`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(vcmpw, IS8 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_p_vcmpw_eq_PI<const IS8: i32>(rss: i64) -> i32 {
+    static_assert_simm_bits!(IS8, 8);
+    hexagon_A4_vcmpweqi(rss, IS8)
+}
+
+/// `Pd4=vcmpw.gt(Rss32,#s8)`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(vcmpw, IS8 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_p_vcmpw_gt_PI<const IS8: i32>(rss: i64) -> i32 {
+    static_assert_simm_bits!(IS8, 8);
+    hexagon_A4_vcmpwgti(rss, IS8)
+}
+
+/// `Pd4=vcmpw.gtu(Rss32,#u7)`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(vcmpw, IU7 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_p_vcmpw_gtu_PI<const IU7: u32>(rss: i64) -> i32 {
+    static_assert_uimm_bits!(IU7, 7);
+    hexagon_A4_vcmpwgtui(rss, IU7 as i32)
+}
+
+/// `Rxx32=vrmaxh(Rss32,Ru32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vrmaxh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vrmaxh_PR(rxx: i64, rss: i64, ru: i32) -> i64 {
+    hexagon_A4_vrmaxh(rxx, rss, ru)
+}
+
+/// `Rxx32=vrmaxuh(Rss32,Ru32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vrmaxuh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vrmaxuh_PR(rxx: i64, rss: i64, ru: i32) -> i64 {
+    hexagon_A4_vrmaxuh(rxx, rss, ru)
+}
+
+/// `Rxx32=vrmaxuw(Rss32,Ru32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vrmaxuw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vrmaxuw_PR(rxx: i64, rss: i64, ru: i32) -> i64 {
+    hexagon_A4_vrmaxuw(rxx, rss, ru)
+}
+
+/// `Rxx32=vrmaxw(Rss32,Ru32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vrmaxw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vrmaxw_PR(rxx: i64, rss: i64, ru: i32) -> i64 {
+    hexagon_A4_vrmaxw(rxx, rss, ru)
+}
+
+/// `Rxx32=vrminh(Rss32,Ru32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vrminh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vrminh_PR(rxx: i64, rss: i64, ru: i32) -> i64 {
+    hexagon_A4_vrminh(rxx, rss, ru)
+}
+
+/// `Rxx32=vrminuh(Rss32,Ru32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vrminuh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vrminuh_PR(rxx: i64, rss: i64, ru: i32) -> i64 {
+    hexagon_A4_vrminuh(rxx, rss, ru)
+}
+
+/// `Rxx32=vrminuw(Rss32,Ru32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vrminuw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vrminuw_PR(rxx: i64, rss: i64, ru: i32) -> i64 {
+    hexagon_A4_vrminuw(rxx, rss, ru)
+}
+
+/// `Rxx32=vrminw(Rss32,Ru32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vrminw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vrminw_PR(rxx: i64, rss: i64, ru: i32) -> i64 {
+    hexagon_A4_vrminw(rxx, rss, ru)
+}
+
+/// `Rd32=vaddhub(Rss32,Rtt32):sat`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vaddhub))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_vaddhub_PP_sat(rss: i64, rtt: i64) -> i32 {
+    hexagon_A5_vaddhubs(rss, rtt)
+}
+
+/// `Pd4=all8(Ps4)`
+///
+/// Instruction Type: CR
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(all8))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_p_all8_p(ps: i32) -> i32 {
+    hexagon_C2_all8(ps)
+}
+
+/// `Pd4=and(Pt4,Ps4)`
+///
+/// Instruction Type: CR
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(and))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_p_and_pp(pt: i32, ps: i32) -> i32 {
+    hexagon_C2_and(pt, ps)
+}
+
+/// `Pd4=and(Pt4,!Ps4)`
+///
+/// Instruction Type: CR
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(and))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_p_and_pnp(pt: i32, ps: i32) -> i32 {
+    hexagon_C2_andn(pt, ps)
+}
+
+/// `Pd4=any8(Ps4)`
+///
+/// Instruction Type: CR
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(any8))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_p_any8_p(ps: i32) -> i32 {
+    hexagon_C2_any8(ps)
+}
+
+/// `Pd4=bitsclr(Rs32,Rt32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(bitsclr))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_p_bitsclr_RR(rs: i32, rt: i32) -> i32 {
+    hexagon_C2_bitsclr(rs, rt)
+}
+
+/// `Pd4=bitsclr(Rs32,#u6)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(bitsclr, IU6 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_p_bitsclr_RI<const IU6: u32>(rs: i32) -> i32 {
+    static_assert_uimm_bits!(IU6, 6);
+    hexagon_C2_bitsclri(rs, IU6 as i32)
+}
+
+/// `Pd4=bitsset(Rs32,Rt32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(bitsset))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_p_bitsset_RR(rs: i32, rt: i32) -> i32 {
+    hexagon_C2_bitsset(rs, rt)
+}
+
+/// `Pd4=cmp.eq(Rs32,Rt32)`
+///
+/// Instruction Type: ALU32_3op
+/// Execution Slots: SLOT0123
+#[inline(always)]
+#[cfg_attr(test, assert_instr(cmp))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_p_cmp_eq_RR(rs: i32, rt: i32) -> i32 {
+    hexagon_C2_cmpeq(rs, rt)
+}
+
+/// `Pd4=cmp.eq(Rs32,#s10)`
+///
+/// Instruction Type: ALU32_2op
+/// Execution Slots: SLOT0123
+#[inline(always)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(cmp, IS10 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_p_cmp_eq_RI<const IS10: i32>(rs: i32) -> i32 {
+    static_assert_simm_bits!(IS10, 10);
+    hexagon_C2_cmpeqi(rs, IS10)
+}
+
+/// `Pd4=cmp.eq(Rss32,Rtt32)`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(cmp))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_p_cmp_eq_PP(rss: i64, rtt: i64) -> i32 {
+    hexagon_C2_cmpeqp(rss, rtt)
+}
+
+/// `Pd4=cmp.ge(Rs32,#s8)`
+///
+/// Instruction Type: ALU32_2op
+/// Execution Slots: SLOT0123
+#[inline(always)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(cmp, IS8 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_p_cmp_ge_RI<const IS8: i32>(rs: i32) -> i32 {
+    static_assert_simm_bits!(IS8, 8);
+    hexagon_C2_cmpgei(rs, IS8)
+}
+
+/// `Pd4=cmp.geu(Rs32,#u8)`
+///
+/// Instruction Type: ALU32_2op
+/// Execution Slots: SLOT0123
+#[inline(always)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(cmp, IU8 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_p_cmp_geu_RI<const IU8: u32>(rs: i32) -> i32 {
+    static_assert_uimm_bits!(IU8, 8);
+    hexagon_C2_cmpgeui(rs, IU8 as i32)
+}
+
+/// `Pd4=cmp.gt(Rs32,Rt32)`
+///
+/// Instruction Type: ALU32_3op
+/// Execution Slots: SLOT0123
+#[inline(always)]
+#[cfg_attr(test, assert_instr(cmp))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_p_cmp_gt_RR(rs: i32, rt: i32) -> i32 {
+    hexagon_C2_cmpgt(rs, rt)
+}
+
+/// `Pd4=cmp.gt(Rs32,#s10)`
+///
+/// Instruction Type: ALU32_2op
+/// Execution Slots: SLOT0123
+#[inline(always)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(cmp, IS10 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_p_cmp_gt_RI<const IS10: i32>(rs: i32) -> i32 {
+    static_assert_simm_bits!(IS10, 10);
+    hexagon_C2_cmpgti(rs, IS10)
+}
+
+/// `Pd4=cmp.gt(Rss32,Rtt32)`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(cmp))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_p_cmp_gt_PP(rss: i64, rtt: i64) -> i32 {
+    hexagon_C2_cmpgtp(rss, rtt)
+}
+
+/// `Pd4=cmp.gtu(Rs32,Rt32)`
+///
+/// Instruction Type: ALU32_3op
+/// Execution Slots: SLOT0123
+#[inline(always)]
+#[cfg_attr(test, assert_instr(cmp))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_p_cmp_gtu_RR(rs: i32, rt: i32) -> i32 {
+    hexagon_C2_cmpgtu(rs, rt)
+}
+
+/// `Pd4=cmp.gtu(Rs32,#u9)`
+///
+/// Instruction Type: ALU32_2op
+/// Execution Slots: SLOT0123
+#[inline(always)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(cmp, IU9 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_p_cmp_gtu_RI<const IU9: u32>(rs: i32) -> i32 {
+    static_assert_uimm_bits!(IU9, 9);
+    hexagon_C2_cmpgtui(rs, IU9 as i32)
+}
+
+/// `Pd4=cmp.gtu(Rss32,Rtt32)`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(cmp))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_p_cmp_gtu_PP(rss: i64, rtt: i64) -> i32 {
+    hexagon_C2_cmpgtup(rss, rtt)
+}
+
+/// `Pd4=cmp.lt(Rs32,Rt32)`
+///
+/// Instruction Type: ALU32_3op
+/// Execution Slots: SLOT0123
+#[inline(always)]
+#[cfg_attr(test, assert_instr(cmp))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_p_cmp_lt_RR(rs: i32, rt: i32) -> i32 {
+    hexagon_C2_cmplt(rs, rt)
+}
+
+/// `Pd4=cmp.ltu(Rs32,Rt32)`
+///
+/// Instruction Type: ALU32_3op
+/// Execution Slots: SLOT0123
+#[inline(always)]
+#[cfg_attr(test, assert_instr(cmp))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_p_cmp_ltu_RR(rs: i32, rt: i32) -> i32 {
+    hexagon_C2_cmpltu(rs, rt)
+}
+
+/// `Rdd32=mask(Pt4)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mask))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_mask_p(pt: i32) -> i64 {
+    hexagon_C2_mask(pt)
+}
+
+/// `Rd32=mux(Pu4,Rs32,Rt32)`
+///
+/// Instruction Type: ALU32_3op
+/// Execution Slots: SLOT0123
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mux))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mux_pRR(pu: i32, rs: i32, rt: i32) -> i32 {
+    hexagon_C2_mux(pu, rs, rt)
+}
+
+/// `Rd32=mux(Pu4,#s8,#S8)`
+///
+/// Instruction Type: ALU32_2op
+/// Execution Slots: SLOT0123
+#[inline(always)]
+#[rustc_legacy_const_generics(1, 2)]
+#[cfg_attr(test, assert_instr(mux, IS8 = 0, IS8_2 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mux_pII<const IS8: i32, const IS8_2: i32>(pu: i32) -> i32 {
+    static_assert_simm_bits!(IS8, 8);
+    static_assert_simm_bits!(IS8_2, 8);
+    hexagon_C2_muxii(pu, IS8, IS8_2)
+}
+
+/// `Rd32=mux(Pu4,Rs32,#s8)`
+///
+/// Instruction Type: ALU32_2op
+/// Execution Slots: SLOT0123
+#[inline(always)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(mux, IS8 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mux_pRI<const IS8: i32>(pu: i32, rs: i32) -> i32 {
+    static_assert_simm_bits!(IS8, 8);
+    hexagon_C2_muxir(pu, rs, IS8)
+}
+
+/// `Rd32=mux(Pu4,#s8,Rs32)`
+///
+/// Instruction Type: ALU32_2op
+/// Execution Slots: SLOT0123
+#[inline(always)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(mux, IS8 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mux_pIR<const IS8: i32>(pu: i32, rs: i32) -> i32 {
+    static_assert_simm_bits!(IS8, 8);
+    hexagon_C2_muxri(pu, IS8, rs)
+}
+
+/// `Pd4=not(Ps4)`
+///
+/// Instruction Type: CR
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(not))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_p_not_p(ps: i32) -> i32 {
+    hexagon_C2_not(ps)
+}
+
+/// `Pd4=or(Pt4,Ps4)`
+///
+/// Instruction Type: CR
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(or))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_p_or_pp(pt: i32, ps: i32) -> i32 {
+    hexagon_C2_or(pt, ps)
+}
+
+/// `Pd4=or(Pt4,!Ps4)`
+///
+/// Instruction Type: CR
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(or))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_p_or_pnp(pt: i32, ps: i32) -> i32 {
+    hexagon_C2_orn(pt, ps)
+}
+
+/// `Pd4=Ps4`
+///
+/// Instruction Type: MAPPING
+/// Execution Slots: SLOT0123
+#[inline(always)]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_p_equals_p(ps: i32) -> i32 {
+    hexagon_C2_pxfer_map(ps)
+}
+
+/// `Rd32=Ps4`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_equals_p(ps: i32) -> i32 {
+    hexagon_C2_tfrpr(ps)
+}
+
+/// `Pd4=Rs32`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_p_equals_R(rs: i32) -> i32 {
+    hexagon_C2_tfrrp(rs)
+}
+
+/// `Rd32=vitpack(Ps4,Pt4)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vitpack))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_vitpack_pp(ps: i32, pt: i32) -> i32 {
+    hexagon_C2_vitpack(ps, pt)
+}
+
+/// `Rdd32=vmux(Pu4,Rss32,Rtt32)`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vmux))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vmux_pPP(pu: i32, rss: i64, rtt: i64) -> i64 {
+    hexagon_C2_vmux(pu, rss, rtt)
+}
+
+/// `Pd4=xor(Ps4,Pt4)`
+///
+/// Instruction Type: CR
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(xor))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_p_xor_pp(ps: i32, pt: i32) -> i32 {
+    hexagon_C2_xor(ps, pt)
+}
+
+/// `Pd4=and(Ps4,and(Pt4,Pu4))`
+///
+/// Instruction Type: CR
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(and))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_p_and_and_ppp(ps: i32, pt: i32, pu: i32) -> i32 {
+    hexagon_C4_and_and(ps, pt, pu)
+}
+
+/// `Pd4=and(Ps4,and(Pt4,!Pu4))`
+///
+/// Instruction Type: CR
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(and))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_p_and_and_ppnp(ps: i32, pt: i32, pu: i32) -> i32 {
+    hexagon_C4_and_andn(ps, pt, pu)
+}
+
+/// `Pd4=and(Ps4,or(Pt4,Pu4))`
+///
+/// Instruction Type: CR
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(and))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_p_and_or_ppp(ps: i32, pt: i32, pu: i32) -> i32 {
+    hexagon_C4_and_or(ps, pt, pu)
+}
+
+/// `Pd4=and(Ps4,or(Pt4,!Pu4))`
+///
+/// Instruction Type: CR
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(and))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_p_and_or_ppnp(ps: i32, pt: i32, pu: i32) -> i32 {
+    hexagon_C4_and_orn(ps, pt, pu)
+}
+
+/// `Pd4=!cmp.gt(Rs32,Rt32)`
+///
+/// Instruction Type: ALU32_3op
+/// Execution Slots: SLOT0123
+#[inline(always)]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_p_not_cmp_gt_RR(rs: i32, rt: i32) -> i32 {
+    hexagon_C4_cmplte(rs, rt)
+}
+
+/// `Pd4=!cmp.gt(Rs32,#s10)`
+///
+/// Instruction Type: ALU32_2op
+/// Execution Slots: SLOT0123
+#[inline(always)]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_p_not_cmp_gt_RI<const IS10: i32>(rs: i32) -> i32 {
+    static_assert_simm_bits!(IS10, 10);
+    hexagon_C4_cmpltei(rs, IS10)
+}
+
+/// `Pd4=!cmp.gtu(Rs32,Rt32)`
+///
+/// Instruction Type: ALU32_3op
+/// Execution Slots: SLOT0123
+#[inline(always)]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_p_not_cmp_gtu_RR(rs: i32, rt: i32) -> i32 {
+    hexagon_C4_cmplteu(rs, rt)
+}
+
+/// `Pd4=!cmp.gtu(Rs32,#u9)`
+///
+/// Instruction Type: ALU32_2op
+/// Execution Slots: SLOT0123
+#[inline(always)]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_p_not_cmp_gtu_RI<const IU9: u32>(rs: i32) -> i32 {
+    static_assert_uimm_bits!(IU9, 9);
+    hexagon_C4_cmplteui(rs, IU9 as i32)
+}
+
+/// `Pd4=!cmp.eq(Rs32,Rt32)`
+///
+/// Instruction Type: ALU32_3op
+/// Execution Slots: SLOT0123
+#[inline(always)]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_p_not_cmp_eq_RR(rs: i32, rt: i32) -> i32 {
+    hexagon_C4_cmpneq(rs, rt)
+}
+
+/// `Pd4=!cmp.eq(Rs32,#s10)`
+///
+/// Instruction Type: ALU32_2op
+/// Execution Slots: SLOT0123
+#[inline(always)]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_p_not_cmp_eq_RI<const IS10: i32>(rs: i32) -> i32 {
+    static_assert_simm_bits!(IS10, 10);
+    hexagon_C4_cmpneqi(rs, IS10)
+}
+
+/// `Pd4=fastcorner9(Ps4,Pt4)`
+///
+/// Instruction Type: CR
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(fastcorner9))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_p_fastcorner9_pp(ps: i32, pt: i32) -> i32 {
+    hexagon_C4_fastcorner9(ps, pt)
+}
+
+/// `Pd4=!fastcorner9(Ps4,Pt4)`
+///
+/// Instruction Type: CR
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_p_not_fastcorner9_pp(ps: i32, pt: i32) -> i32 {
+    hexagon_C4_fastcorner9_not(ps, pt)
+}
+
+/// `Pd4=!bitsclr(Rs32,Rt32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_p_not_bitsclr_RR(rs: i32, rt: i32) -> i32 {
+    hexagon_C4_nbitsclr(rs, rt)
+}
+
+/// `Pd4=!bitsclr(Rs32,#u6)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_p_not_bitsclr_RI<const IU6: u32>(rs: i32) -> i32 {
+    static_assert_uimm_bits!(IU6, 6);
+    hexagon_C4_nbitsclri(rs, IU6 as i32)
+}
+
+/// `Pd4=!bitsset(Rs32,Rt32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_p_not_bitsset_RR(rs: i32, rt: i32) -> i32 {
+    hexagon_C4_nbitsset(rs, rt)
+}
+
+/// `Pd4=or(Ps4,and(Pt4,Pu4))`
+///
+/// Instruction Type: CR
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(or))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_p_or_and_ppp(ps: i32, pt: i32, pu: i32) -> i32 {
+    hexagon_C4_or_and(ps, pt, pu)
+}
+
+/// `Pd4=or(Ps4,and(Pt4,!Pu4))`
+///
+/// Instruction Type: CR
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(or))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_p_or_and_ppnp(ps: i32, pt: i32, pu: i32) -> i32 {
+    hexagon_C4_or_andn(ps, pt, pu)
+}
+
+/// `Pd4=or(Ps4,or(Pt4,Pu4))`
+///
+/// Instruction Type: CR
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(or))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_p_or_or_ppp(ps: i32, pt: i32, pu: i32) -> i32 {
+    hexagon_C4_or_or(ps, pt, pu)
+}
+
+/// `Pd4=or(Ps4,or(Pt4,!Pu4))`
+///
+/// Instruction Type: CR
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(or))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_p_or_or_ppnp(ps: i32, pt: i32, pu: i32) -> i32 {
+    hexagon_C4_or_orn(ps, pt, pu)
+}
+
+/// `Rdd32=convert_d2df(Rss32)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(convert_d2df))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_convert_d2df_P(rss: i64) -> f64 {
+    hexagon_F2_conv_d2df(rss)
+}
+
+/// `Rd32=convert_d2sf(Rss32)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(convert_d2sf))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_convert_d2sf_P(rss: i64) -> f32 {
+    hexagon_F2_conv_d2sf(rss)
+}
+
+/// `Rdd32=convert_df2d(Rss32)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(convert_df2d))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_convert_df2d_P(rss: f64) -> i64 {
+    hexagon_F2_conv_df2d(rss)
+}
+
+/// `Rdd32=convert_df2d(Rss32):chop`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(convert_df2d))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_convert_df2d_P_chop(rss: f64) -> i64 {
+    hexagon_F2_conv_df2d_chop(rss)
+}
+
+/// `Rd32=convert_df2sf(Rss32)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(convert_df2sf))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_convert_df2sf_P(rss: f64) -> f32 {
+    hexagon_F2_conv_df2sf(rss)
+}
+
+/// `Rdd32=convert_df2ud(Rss32)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(convert_df2ud))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_convert_df2ud_P(rss: f64) -> i64 {
+    hexagon_F2_conv_df2ud(rss)
+}
+
+/// `Rdd32=convert_df2ud(Rss32):chop`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(convert_df2ud))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_convert_df2ud_P_chop(rss: f64) -> i64 {
+    hexagon_F2_conv_df2ud_chop(rss)
+}
+
+/// `Rd32=convert_df2uw(Rss32)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(convert_df2uw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_convert_df2uw_P(rss: f64) -> i32 {
+    hexagon_F2_conv_df2uw(rss)
+}
+
+/// `Rd32=convert_df2uw(Rss32):chop`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(convert_df2uw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_convert_df2uw_P_chop(rss: f64) -> i32 {
+    hexagon_F2_conv_df2uw_chop(rss)
+}
+
+/// `Rd32=convert_df2w(Rss32)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(convert_df2w))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_convert_df2w_P(rss: f64) -> i32 {
+    hexagon_F2_conv_df2w(rss)
+}
+
+/// `Rd32=convert_df2w(Rss32):chop`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(convert_df2w))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_convert_df2w_P_chop(rss: f64) -> i32 {
+    hexagon_F2_conv_df2w_chop(rss)
+}
+
+/// `Rdd32=convert_sf2d(Rs32)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(convert_sf2d))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_convert_sf2d_R(rs: f32) -> i64 {
+    hexagon_F2_conv_sf2d(rs)
+}
+
+/// `Rdd32=convert_sf2d(Rs32):chop`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(convert_sf2d))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_convert_sf2d_R_chop(rs: f32) -> i64 {
+    hexagon_F2_conv_sf2d_chop(rs)
+}
+
+/// `Rdd32=convert_sf2df(Rs32)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(convert_sf2df))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_convert_sf2df_R(rs: f32) -> f64 {
+    hexagon_F2_conv_sf2df(rs)
+}
+
+/// `Rdd32=convert_sf2ud(Rs32)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(convert_sf2ud))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_convert_sf2ud_R(rs: f32) -> i64 {
+    hexagon_F2_conv_sf2ud(rs)
+}
+
+/// `Rdd32=convert_sf2ud(Rs32):chop`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(convert_sf2ud))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_convert_sf2ud_R_chop(rs: f32) -> i64 {
+    hexagon_F2_conv_sf2ud_chop(rs)
+}
+
+/// `Rd32=convert_sf2uw(Rs32)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(convert_sf2uw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_convert_sf2uw_R(rs: f32) -> i32 {
+    hexagon_F2_conv_sf2uw(rs)
+}
+
+/// `Rd32=convert_sf2uw(Rs32):chop`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(convert_sf2uw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_convert_sf2uw_R_chop(rs: f32) -> i32 {
+    hexagon_F2_conv_sf2uw_chop(rs)
+}
+
+/// `Rd32=convert_sf2w(Rs32)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(convert_sf2w))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_convert_sf2w_R(rs: f32) -> i32 {
+    hexagon_F2_conv_sf2w(rs)
+}
+
+/// `Rd32=convert_sf2w(Rs32):chop`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(convert_sf2w))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_convert_sf2w_R_chop(rs: f32) -> i32 {
+    hexagon_F2_conv_sf2w_chop(rs)
+}
+
+/// `Rdd32=convert_ud2df(Rss32)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(convert_ud2df))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_convert_ud2df_P(rss: i64) -> f64 {
+    hexagon_F2_conv_ud2df(rss)
+}
+
+/// `Rd32=convert_ud2sf(Rss32)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(convert_ud2sf))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_convert_ud2sf_P(rss: i64) -> f32 {
+    hexagon_F2_conv_ud2sf(rss)
+}
+
+/// `Rdd32=convert_uw2df(Rs32)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(convert_uw2df))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_convert_uw2df_R(rs: i32) -> f64 {
+    hexagon_F2_conv_uw2df(rs)
+}
+
+/// `Rd32=convert_uw2sf(Rs32)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(convert_uw2sf))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_convert_uw2sf_R(rs: i32) -> f32 {
+    hexagon_F2_conv_uw2sf(rs)
+}
+
+/// `Rdd32=convert_w2df(Rs32)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(convert_w2df))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_convert_w2df_R(rs: i32) -> f64 {
+    hexagon_F2_conv_w2df(rs)
+}
+
+/// `Rd32=convert_w2sf(Rs32)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(convert_w2sf))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_convert_w2sf_R(rs: i32) -> f32 {
+    hexagon_F2_conv_w2sf(rs)
+}
+
+/// `Pd4=dfclass(Rss32,#u5)`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(dfclass, IU5 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_p_dfclass_PI<const IU5: u32>(rss: f64) -> i32 {
+    static_assert_uimm_bits!(IU5, 5);
+    hexagon_F2_dfclass(rss, IU5 as i32)
+}
+
+/// `Pd4=dfcmp.eq(Rss32,Rtt32)`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(dfcmp))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_p_dfcmp_eq_PP(rss: f64, rtt: f64) -> i32 {
+    hexagon_F2_dfcmpeq(rss, rtt)
+}
+
+/// `Pd4=dfcmp.ge(Rss32,Rtt32)`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(dfcmp))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_p_dfcmp_ge_PP(rss: f64, rtt: f64) -> i32 {
+    hexagon_F2_dfcmpge(rss, rtt)
+}
+
+/// `Pd4=dfcmp.gt(Rss32,Rtt32)`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(dfcmp))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_p_dfcmp_gt_PP(rss: f64, rtt: f64) -> i32 {
+    hexagon_F2_dfcmpgt(rss, rtt)
+}
+
+/// `Pd4=dfcmp.uo(Rss32,Rtt32)`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(dfcmp))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_p_dfcmp_uo_PP(rss: f64, rtt: f64) -> i32 {
+    hexagon_F2_dfcmpuo(rss, rtt)
+}
+
+/// `Rdd32=dfmake(#u10):neg`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(0)]
+#[cfg_attr(test, assert_instr(dfmake, IU10 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_dfmake_I_neg<const IU10: u32>() -> f64 {
+    static_assert_uimm_bits!(IU10, 10);
+    hexagon_F2_dfimm_n(IU10 as i32)
+}
+
+/// `Rdd32=dfmake(#u10):pos`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(0)]
+#[cfg_attr(test, assert_instr(dfmake, IU10 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_dfmake_I_pos<const IU10: u32>() -> f64 {
+    static_assert_uimm_bits!(IU10, 10);
+    hexagon_F2_dfimm_p(IU10 as i32)
+}
+
+/// `Rd32=sfadd(Rs32,Rt32)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(sfadd))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_sfadd_RR(rs: f32, rt: f32) -> f32 {
+    hexagon_F2_sfadd(rs, rt)
+}
+
+/// `Pd4=sfclass(Rs32,#u5)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(sfclass, IU5 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_p_sfclass_RI<const IU5: u32>(rs: f32) -> i32 {
+    static_assert_uimm_bits!(IU5, 5);
+    hexagon_F2_sfclass(rs, IU5 as i32)
+}
+
+/// `Pd4=sfcmp.eq(Rs32,Rt32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(sfcmp))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_p_sfcmp_eq_RR(rs: f32, rt: f32) -> i32 {
+    hexagon_F2_sfcmpeq(rs, rt)
+}
+
+/// `Pd4=sfcmp.ge(Rs32,Rt32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(sfcmp))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_p_sfcmp_ge_RR(rs: f32, rt: f32) -> i32 {
+    hexagon_F2_sfcmpge(rs, rt)
+}
+
+/// `Pd4=sfcmp.gt(Rs32,Rt32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(sfcmp))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_p_sfcmp_gt_RR(rs: f32, rt: f32) -> i32 {
+    hexagon_F2_sfcmpgt(rs, rt)
+}
+
+/// `Pd4=sfcmp.uo(Rs32,Rt32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(sfcmp))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_p_sfcmp_uo_RR(rs: f32, rt: f32) -> i32 {
+    hexagon_F2_sfcmpuo(rs, rt)
+}
+
+/// `Rd32=sffixupd(Rs32,Rt32)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(sffixupd))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_sffixupd_RR(rs: f32, rt: f32) -> f32 {
+    hexagon_F2_sffixupd(rs, rt)
+}
+
+/// `Rd32=sffixupn(Rs32,Rt32)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(sffixupn))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_sffixupn_RR(rs: f32, rt: f32) -> f32 {
+    hexagon_F2_sffixupn(rs, rt)
+}
+
+/// `Rd32=sffixupr(Rs32)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(sffixupr))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_sffixupr_R(rs: f32) -> f32 {
+    hexagon_F2_sffixupr(rs)
+}
+
+/// `Rx32+=sfmpy(Rs32,Rt32)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(sfmpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_sfmpyacc_RR(rx: f32, rs: f32, rt: f32) -> f32 {
+    hexagon_F2_sffma(rx, rs, rt)
+}
+
+/// `Rx32+=sfmpy(Rs32,Rt32):lib`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(sfmpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_sfmpyacc_RR_lib(rx: f32, rs: f32, rt: f32) -> f32 {
+    hexagon_F2_sffma_lib(rx, rs, rt)
+}
+
+/// `Rx32+=sfmpy(Rs32,Rt32,Pu4):scale`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(sfmpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_sfmpyacc_RRp_scale(rx: f32, rs: f32, rt: f32, pu: i32) -> f32 {
+    hexagon_F2_sffma_sc(rx, rs, rt, pu)
+}
+
+/// `Rx32-=sfmpy(Rs32,Rt32)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(sfmpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_sfmpynac_RR(rx: f32, rs: f32, rt: f32) -> f32 {
+    hexagon_F2_sffms(rx, rs, rt)
+}
+
+/// `Rx32-=sfmpy(Rs32,Rt32):lib`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(sfmpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_sfmpynac_RR_lib(rx: f32, rs: f32, rt: f32) -> f32 {
+    hexagon_F2_sffms_lib(rx, rs, rt)
+}
+
+/// `Rd32=sfmake(#u10):neg`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(0)]
+#[cfg_attr(test, assert_instr(sfmake, IU10 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_sfmake_I_neg<const IU10: u32>() -> f32 {
+    static_assert_uimm_bits!(IU10, 10);
+    hexagon_F2_sfimm_n(IU10 as i32)
+}
+
+/// `Rd32=sfmake(#u10):pos`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(0)]
+#[cfg_attr(test, assert_instr(sfmake, IU10 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_sfmake_I_pos<const IU10: u32>() -> f32 {
+    static_assert_uimm_bits!(IU10, 10);
+    hexagon_F2_sfimm_p(IU10 as i32)
+}
+
+/// `Rd32=sfmax(Rs32,Rt32)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(sfmax))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_sfmax_RR(rs: f32, rt: f32) -> f32 {
+    hexagon_F2_sfmax(rs, rt)
+}
+
+/// `Rd32=sfmin(Rs32,Rt32)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(sfmin))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_sfmin_RR(rs: f32, rt: f32) -> f32 {
+    hexagon_F2_sfmin(rs, rt)
+}
+
+/// `Rd32=sfmpy(Rs32,Rt32)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(sfmpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_sfmpy_RR(rs: f32, rt: f32) -> f32 {
+    hexagon_F2_sfmpy(rs, rt)
+}
+
+/// `Rd32=sfsub(Rs32,Rt32)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(sfsub))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_sfsub_RR(rs: f32, rt: f32) -> f32 {
+    hexagon_F2_sfsub(rs, rt)
+}
+
+/// `Rx32+=add(Rs32,Rt32)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(add))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_addacc_RR(rx: i32, rs: i32, rt: i32) -> i32 {
+    hexagon_M2_acci(rx, rs, rt)
+}
+
+/// `Rx32+=add(Rs32,#s8)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(add, IS8 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_addacc_RI<const IS8: i32>(rx: i32, rs: i32) -> i32 {
+    static_assert_simm_bits!(IS8, 8);
+    hexagon_M2_accii(rx, rs, IS8)
+}
+
+/// `Rxx32+=cmpyi(Rs32,Rt32)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(cmpyi))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_cmpyiacc_RR(rxx: i64, rs: i32, rt: i32) -> i64 {
+    hexagon_M2_cmaci_s0(rxx, rs, rt)
+}
+
+/// `Rxx32+=cmpyr(Rs32,Rt32)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(cmpyr))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_cmpyracc_RR(rxx: i64, rs: i32, rt: i32) -> i64 {
+    hexagon_M2_cmacr_s0(rxx, rs, rt)
+}
+
+/// `Rxx32+=cmpy(Rs32,Rt32):sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(cmpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_cmpyacc_RR_sat(rxx: i64, rs: i32, rt: i32) -> i64 {
+    hexagon_M2_cmacs_s0(rxx, rs, rt)
+}
+
+/// `Rxx32+=cmpy(Rs32,Rt32):<<1:sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(cmpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_cmpyacc_RR_s1_sat(rxx: i64, rs: i32, rt: i32) -> i64 {
+    hexagon_M2_cmacs_s1(rxx, rs, rt)
+}
+
+/// `Rxx32+=cmpy(Rs32,Rt32*):sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(cmpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_cmpyacc_RR_conj_sat(rxx: i64, rs: i32, rt: i32) -> i64 {
+    hexagon_M2_cmacsc_s0(rxx, rs, rt)
+}
+
+/// `Rxx32+=cmpy(Rs32,Rt32*):<<1:sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(cmpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_cmpyacc_RR_conj_s1_sat(rxx: i64, rs: i32, rt: i32) -> i64 {
+    hexagon_M2_cmacsc_s1(rxx, rs, rt)
+}
+
+/// `Rdd32=cmpyi(Rs32,Rt32)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(cmpyi))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_cmpyi_RR(rs: i32, rt: i32) -> i64 {
+    hexagon_M2_cmpyi_s0(rs, rt)
+}
+
+/// `Rdd32=cmpyr(Rs32,Rt32)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(cmpyr))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_cmpyr_RR(rs: i32, rt: i32) -> i64 {
+    hexagon_M2_cmpyr_s0(rs, rt)
+}
+
+/// `Rd32=cmpy(Rs32,Rt32):rnd:sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(cmpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_cmpy_RR_rnd_sat(rs: i32, rt: i32) -> i32 {
+    hexagon_M2_cmpyrs_s0(rs, rt)
+}
+
+/// `Rd32=cmpy(Rs32,Rt32):<<1:rnd:sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(cmpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_cmpy_RR_s1_rnd_sat(rs: i32, rt: i32) -> i32 {
+    hexagon_M2_cmpyrs_s1(rs, rt)
+}
+
+/// `Rd32=cmpy(Rs32,Rt32*):rnd:sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(cmpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_cmpy_RR_conj_rnd_sat(rs: i32, rt: i32) -> i32 {
+    hexagon_M2_cmpyrsc_s0(rs, rt)
+}
+
+/// `Rd32=cmpy(Rs32,Rt32*):<<1:rnd:sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(cmpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_cmpy_RR_conj_s1_rnd_sat(rs: i32, rt: i32) -> i32 {
+    hexagon_M2_cmpyrsc_s1(rs, rt)
+}
+
+/// `Rdd32=cmpy(Rs32,Rt32):sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(cmpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_cmpy_RR_sat(rs: i32, rt: i32) -> i64 {
+    hexagon_M2_cmpys_s0(rs, rt)
+}
+
+/// `Rdd32=cmpy(Rs32,Rt32):<<1:sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(cmpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_cmpy_RR_s1_sat(rs: i32, rt: i32) -> i64 {
+    hexagon_M2_cmpys_s1(rs, rt)
+}
+
+/// `Rdd32=cmpy(Rs32,Rt32*):sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(cmpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_cmpy_RR_conj_sat(rs: i32, rt: i32) -> i64 {
+    hexagon_M2_cmpysc_s0(rs, rt)
+}
+
+/// `Rdd32=cmpy(Rs32,Rt32*):<<1:sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(cmpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_cmpy_RR_conj_s1_sat(rs: i32, rt: i32) -> i64 {
+    hexagon_M2_cmpysc_s1(rs, rt)
+}
+
+/// `Rxx32-=cmpy(Rs32,Rt32):sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(cmpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_cmpynac_RR_sat(rxx: i64, rs: i32, rt: i32) -> i64 {
+    hexagon_M2_cnacs_s0(rxx, rs, rt)
+}
+
+/// `Rxx32-=cmpy(Rs32,Rt32):<<1:sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(cmpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_cmpynac_RR_s1_sat(rxx: i64, rs: i32, rt: i32) -> i64 {
+    hexagon_M2_cnacs_s1(rxx, rs, rt)
+}
+
+/// `Rxx32-=cmpy(Rs32,Rt32*):sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(cmpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_cmpynac_RR_conj_sat(rxx: i64, rs: i32, rt: i32) -> i64 {
+    hexagon_M2_cnacsc_s0(rxx, rs, rt)
+}
+
+/// `Rxx32-=cmpy(Rs32,Rt32*):<<1:sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(cmpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_cmpynac_RR_conj_s1_sat(rxx: i64, rs: i32, rt: i32) -> i64 {
+    hexagon_M2_cnacsc_s1(rxx, rs, rt)
+}
+
+/// `Rxx32+=mpy(Rs32,Rt32)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_mpyacc_RR(rxx: i64, rs: i32, rt: i32) -> i64 {
+    hexagon_M2_dpmpyss_acc_s0(rxx, rs, rt)
+}
+
+/// `Rxx32-=mpy(Rs32,Rt32)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_mpynac_RR(rxx: i64, rs: i32, rt: i32) -> i64 {
+    hexagon_M2_dpmpyss_nac_s0(rxx, rs, rt)
+}
+
+/// `Rd32=mpy(Rs32,Rt32):rnd`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpy_RR_rnd(rs: i32, rt: i32) -> i32 {
+    hexagon_M2_dpmpyss_rnd_s0(rs, rt)
+}
+
+/// `Rdd32=mpy(Rs32,Rt32)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_mpy_RR(rs: i32, rt: i32) -> i64 {
+    hexagon_M2_dpmpyss_s0(rs, rt)
+}
+
+/// `Rxx32+=mpyu(Rs32,Rt32)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpyu))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_mpyuacc_RR(rxx: i64, rs: i32, rt: i32) -> i64 {
+    hexagon_M2_dpmpyuu_acc_s0(rxx, rs, rt)
+}
+
+/// `Rxx32-=mpyu(Rs32,Rt32)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpyu))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_mpyunac_RR(rxx: i64, rs: i32, rt: i32) -> i64 {
+    hexagon_M2_dpmpyuu_nac_s0(rxx, rs, rt)
+}
+
+/// `Rdd32=mpyu(Rs32,Rt32)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpyu))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_mpyu_RR(rs: i32, rt: i32) -> i64 {
+    hexagon_M2_dpmpyuu_s0(rs, rt)
+}
+
+/// `Rd32=mpy(Rs32,Rt32.h):<<1:rnd:sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpy_RRh_s1_rnd_sat(rs: i32, rt: i32) -> i32 {
+    hexagon_M2_hmmpyh_rs1(rs, rt)
+}
+
+/// `Rd32=mpy(Rs32,Rt32.h):<<1:sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpy_RRh_s1_sat(rs: i32, rt: i32) -> i32 {
+    hexagon_M2_hmmpyh_s1(rs, rt)
+}
+
+/// `Rd32=mpy(Rs32,Rt32.l):<<1:rnd:sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpy_RRl_s1_rnd_sat(rs: i32, rt: i32) -> i32 {
+    hexagon_M2_hmmpyl_rs1(rs, rt)
+}
+
+/// `Rd32=mpy(Rs32,Rt32.l):<<1:sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpy_RRl_s1_sat(rs: i32, rt: i32) -> i32 {
+    hexagon_M2_hmmpyl_s1(rs, rt)
+}
+
+/// `Rx32+=mpyi(Rs32,Rt32)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpyi))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpyiacc_RR(rx: i32, rs: i32, rt: i32) -> i32 {
+    hexagon_M2_maci(rx, rs, rt)
+}
+
+/// `Rx32-=mpyi(Rs32,#u8)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(mpyi, IU8 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpyinac_RI<const IU8: u32>(rx: i32, rs: i32) -> i32 {
+    static_assert_uimm_bits!(IU8, 8);
+    hexagon_M2_macsin(rx, rs, IU8 as i32)
+}
+
+/// `Rx32+=mpyi(Rs32,#u8)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(mpyi, IU8 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpyiacc_RI<const IU8: u32>(rx: i32, rs: i32) -> i32 {
+    static_assert_uimm_bits!(IU8, 8);
+    hexagon_M2_macsip(rx, rs, IU8 as i32)
+}
+
+/// `Rxx32+=vmpywoh(Rss32,Rtt32):rnd:sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vmpywoh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vmpywohacc_PP_rnd_sat(rxx: i64, rss: i64, rtt: i64) -> i64 {
+    hexagon_M2_mmachs_rs0(rxx, rss, rtt)
+}
+
+/// `Rxx32+=vmpywoh(Rss32,Rtt32):<<1:rnd:sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vmpywoh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vmpywohacc_PP_s1_rnd_sat(rxx: i64, rss: i64, rtt: i64) -> i64 {
+    hexagon_M2_mmachs_rs1(rxx, rss, rtt)
+}
+
+/// `Rxx32+=vmpywoh(Rss32,Rtt32):sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vmpywoh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vmpywohacc_PP_sat(rxx: i64, rss: i64, rtt: i64) -> i64 {
+    hexagon_M2_mmachs_s0(rxx, rss, rtt)
+}
+
+/// `Rxx32+=vmpywoh(Rss32,Rtt32):<<1:sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vmpywoh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vmpywohacc_PP_s1_sat(rxx: i64, rss: i64, rtt: i64) -> i64 {
+    hexagon_M2_mmachs_s1(rxx, rss, rtt)
+}
+
+/// `Rxx32+=vmpyweh(Rss32,Rtt32):rnd:sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vmpyweh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vmpywehacc_PP_rnd_sat(rxx: i64, rss: i64, rtt: i64) -> i64 {
+    hexagon_M2_mmacls_rs0(rxx, rss, rtt)
+}
+
+/// `Rxx32+=vmpyweh(Rss32,Rtt32):<<1:rnd:sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vmpyweh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vmpywehacc_PP_s1_rnd_sat(rxx: i64, rss: i64, rtt: i64) -> i64 {
+    hexagon_M2_mmacls_rs1(rxx, rss, rtt)
+}
+
+/// `Rxx32+=vmpyweh(Rss32,Rtt32):sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vmpyweh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vmpywehacc_PP_sat(rxx: i64, rss: i64, rtt: i64) -> i64 {
+    hexagon_M2_mmacls_s0(rxx, rss, rtt)
+}
+
+/// `Rxx32+=vmpyweh(Rss32,Rtt32):<<1:sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vmpyweh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vmpywehacc_PP_s1_sat(rxx: i64, rss: i64, rtt: i64) -> i64 {
+    hexagon_M2_mmacls_s1(rxx, rss, rtt)
+}
+
+/// `Rxx32+=vmpywouh(Rss32,Rtt32):rnd:sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vmpywouh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vmpywouhacc_PP_rnd_sat(rxx: i64, rss: i64, rtt: i64) -> i64 {
+    hexagon_M2_mmacuhs_rs0(rxx, rss, rtt)
+}
+
+/// `Rxx32+=vmpywouh(Rss32,Rtt32):<<1:rnd:sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vmpywouh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vmpywouhacc_PP_s1_rnd_sat(rxx: i64, rss: i64, rtt: i64) -> i64 {
+    hexagon_M2_mmacuhs_rs1(rxx, rss, rtt)
+}
+
+/// `Rxx32+=vmpywouh(Rss32,Rtt32):sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vmpywouh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vmpywouhacc_PP_sat(rxx: i64, rss: i64, rtt: i64) -> i64 {
+    hexagon_M2_mmacuhs_s0(rxx, rss, rtt)
+}
+
+/// `Rxx32+=vmpywouh(Rss32,Rtt32):<<1:sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vmpywouh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vmpywouhacc_PP_s1_sat(rxx: i64, rss: i64, rtt: i64) -> i64 {
+    hexagon_M2_mmacuhs_s1(rxx, rss, rtt)
+}
+
+/// `Rxx32+=vmpyweuh(Rss32,Rtt32):rnd:sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vmpyweuh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vmpyweuhacc_PP_rnd_sat(rxx: i64, rss: i64, rtt: i64) -> i64 {
+    hexagon_M2_mmaculs_rs0(rxx, rss, rtt)
+}
+
+/// `Rxx32+=vmpyweuh(Rss32,Rtt32):<<1:rnd:sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vmpyweuh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vmpyweuhacc_PP_s1_rnd_sat(rxx: i64, rss: i64, rtt: i64) -> i64 {
+    hexagon_M2_mmaculs_rs1(rxx, rss, rtt)
+}
+
+/// `Rxx32+=vmpyweuh(Rss32,Rtt32):sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vmpyweuh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vmpyweuhacc_PP_sat(rxx: i64, rss: i64, rtt: i64) -> i64 {
+    hexagon_M2_mmaculs_s0(rxx, rss, rtt)
+}
+
+/// `Rxx32+=vmpyweuh(Rss32,Rtt32):<<1:sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vmpyweuh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vmpyweuhacc_PP_s1_sat(rxx: i64, rss: i64, rtt: i64) -> i64 {
+    hexagon_M2_mmaculs_s1(rxx, rss, rtt)
+}
+
+/// `Rdd32=vmpywoh(Rss32,Rtt32):rnd:sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vmpywoh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vmpywoh_PP_rnd_sat(rss: i64, rtt: i64) -> i64 {
+    hexagon_M2_mmpyh_rs0(rss, rtt)
+}
+
+/// `Rdd32=vmpywoh(Rss32,Rtt32):<<1:rnd:sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vmpywoh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vmpywoh_PP_s1_rnd_sat(rss: i64, rtt: i64) -> i64 {
+    hexagon_M2_mmpyh_rs1(rss, rtt)
+}
+
+/// `Rdd32=vmpywoh(Rss32,Rtt32):sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vmpywoh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vmpywoh_PP_sat(rss: i64, rtt: i64) -> i64 {
+    hexagon_M2_mmpyh_s0(rss, rtt)
+}
+
+/// `Rdd32=vmpywoh(Rss32,Rtt32):<<1:sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vmpywoh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vmpywoh_PP_s1_sat(rss: i64, rtt: i64) -> i64 {
+    hexagon_M2_mmpyh_s1(rss, rtt)
+}
+
+/// `Rdd32=vmpyweh(Rss32,Rtt32):rnd:sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vmpyweh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vmpyweh_PP_rnd_sat(rss: i64, rtt: i64) -> i64 {
+    hexagon_M2_mmpyl_rs0(rss, rtt)
+}
+
+/// `Rdd32=vmpyweh(Rss32,Rtt32):<<1:rnd:sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vmpyweh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vmpyweh_PP_s1_rnd_sat(rss: i64, rtt: i64) -> i64 {
+    hexagon_M2_mmpyl_rs1(rss, rtt)
+}
+
+/// `Rdd32=vmpyweh(Rss32,Rtt32):sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vmpyweh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vmpyweh_PP_sat(rss: i64, rtt: i64) -> i64 {
+    hexagon_M2_mmpyl_s0(rss, rtt)
+}
+
+/// `Rdd32=vmpyweh(Rss32,Rtt32):<<1:sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vmpyweh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vmpyweh_PP_s1_sat(rss: i64, rtt: i64) -> i64 {
+    hexagon_M2_mmpyl_s1(rss, rtt)
+}
+
+/// `Rdd32=vmpywouh(Rss32,Rtt32):rnd:sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vmpywouh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vmpywouh_PP_rnd_sat(rss: i64, rtt: i64) -> i64 {
+    hexagon_M2_mmpyuh_rs0(rss, rtt)
+}
+
+/// `Rdd32=vmpywouh(Rss32,Rtt32):<<1:rnd:sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vmpywouh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vmpywouh_PP_s1_rnd_sat(rss: i64, rtt: i64) -> i64 {
+    hexagon_M2_mmpyuh_rs1(rss, rtt)
+}
+
+/// `Rdd32=vmpywouh(Rss32,Rtt32):sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vmpywouh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vmpywouh_PP_sat(rss: i64, rtt: i64) -> i64 {
+    hexagon_M2_mmpyuh_s0(rss, rtt)
+}
+
+/// `Rdd32=vmpywouh(Rss32,Rtt32):<<1:sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vmpywouh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vmpywouh_PP_s1_sat(rss: i64, rtt: i64) -> i64 {
+    hexagon_M2_mmpyuh_s1(rss, rtt)
+}
+
+/// `Rdd32=vmpyweuh(Rss32,Rtt32):rnd:sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vmpyweuh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vmpyweuh_PP_rnd_sat(rss: i64, rtt: i64) -> i64 {
+    hexagon_M2_mmpyul_rs0(rss, rtt)
+}
+
+/// `Rdd32=vmpyweuh(Rss32,Rtt32):<<1:rnd:sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vmpyweuh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vmpyweuh_PP_s1_rnd_sat(rss: i64, rtt: i64) -> i64 {
+    hexagon_M2_mmpyul_rs1(rss, rtt)
+}
+
+/// `Rdd32=vmpyweuh(Rss32,Rtt32):sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vmpyweuh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vmpyweuh_PP_sat(rss: i64, rtt: i64) -> i64 {
+    hexagon_M2_mmpyul_s0(rss, rtt)
+}
+
+/// `Rdd32=vmpyweuh(Rss32,Rtt32):<<1:sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vmpyweuh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vmpyweuh_PP_s1_sat(rss: i64, rtt: i64) -> i64 {
+    hexagon_M2_mmpyul_s1(rss, rtt)
+}
+
+/// `Rx32+=mpy(Rs32.h,Rt32.h)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpyacc_RhRh(rx: i32, rs: i32, rt: i32) -> i32 {
+    hexagon_M2_mpy_acc_hh_s0(rx, rs, rt)
+}
+
+/// `Rx32+=mpy(Rs32.h,Rt32.h):<<1`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpyacc_RhRh_s1(rx: i32, rs: i32, rt: i32) -> i32 {
+    hexagon_M2_mpy_acc_hh_s1(rx, rs, rt)
+}
+
+/// `Rx32+=mpy(Rs32.h,Rt32.l)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpyacc_RhRl(rx: i32, rs: i32, rt: i32) -> i32 {
+    hexagon_M2_mpy_acc_hl_s0(rx, rs, rt)
+}
+
+/// `Rx32+=mpy(Rs32.h,Rt32.l):<<1`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpyacc_RhRl_s1(rx: i32, rs: i32, rt: i32) -> i32 {
+    hexagon_M2_mpy_acc_hl_s1(rx, rs, rt)
+}
+
+/// `Rx32+=mpy(Rs32.l,Rt32.h)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpyacc_RlRh(rx: i32, rs: i32, rt: i32) -> i32 {
+    hexagon_M2_mpy_acc_lh_s0(rx, rs, rt)
+}
+
+/// `Rx32+=mpy(Rs32.l,Rt32.h):<<1`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpyacc_RlRh_s1(rx: i32, rs: i32, rt: i32) -> i32 {
+    hexagon_M2_mpy_acc_lh_s1(rx, rs, rt)
+}
+
+/// `Rx32+=mpy(Rs32.l,Rt32.l)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpyacc_RlRl(rx: i32, rs: i32, rt: i32) -> i32 {
+    hexagon_M2_mpy_acc_ll_s0(rx, rs, rt)
+}
+
+/// `Rx32+=mpy(Rs32.l,Rt32.l):<<1`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpyacc_RlRl_s1(rx: i32, rs: i32, rt: i32) -> i32 {
+    hexagon_M2_mpy_acc_ll_s1(rx, rs, rt)
+}
+
+/// `Rx32+=mpy(Rs32.h,Rt32.h):sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpyacc_RhRh_sat(rx: i32, rs: i32, rt: i32) -> i32 {
+    hexagon_M2_mpy_acc_sat_hh_s0(rx, rs, rt)
+}
+
+/// `Rx32+=mpy(Rs32.h,Rt32.h):<<1:sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpyacc_RhRh_s1_sat(rx: i32, rs: i32, rt: i32) -> i32 {
+    hexagon_M2_mpy_acc_sat_hh_s1(rx, rs, rt)
+}
+
+/// `Rx32+=mpy(Rs32.h,Rt32.l):sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpyacc_RhRl_sat(rx: i32, rs: i32, rt: i32) -> i32 {
+    hexagon_M2_mpy_acc_sat_hl_s0(rx, rs, rt)
+}
+
+/// `Rx32+=mpy(Rs32.h,Rt32.l):<<1:sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpyacc_RhRl_s1_sat(rx: i32, rs: i32, rt: i32) -> i32 {
+    hexagon_M2_mpy_acc_sat_hl_s1(rx, rs, rt)
+}
+
+/// `Rx32+=mpy(Rs32.l,Rt32.h):sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpyacc_RlRh_sat(rx: i32, rs: i32, rt: i32) -> i32 {
+    hexagon_M2_mpy_acc_sat_lh_s0(rx, rs, rt)
+}
+
+/// `Rx32+=mpy(Rs32.l,Rt32.h):<<1:sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpyacc_RlRh_s1_sat(rx: i32, rs: i32, rt: i32) -> i32 {
+    hexagon_M2_mpy_acc_sat_lh_s1(rx, rs, rt)
+}
+
+/// `Rx32+=mpy(Rs32.l,Rt32.l):sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpyacc_RlRl_sat(rx: i32, rs: i32, rt: i32) -> i32 {
+    hexagon_M2_mpy_acc_sat_ll_s0(rx, rs, rt)
+}
+
+/// `Rx32+=mpy(Rs32.l,Rt32.l):<<1:sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpyacc_RlRl_s1_sat(rx: i32, rs: i32, rt: i32) -> i32 {
+    hexagon_M2_mpy_acc_sat_ll_s1(rx, rs, rt)
+}
+
+/// `Rd32=mpy(Rs32.h,Rt32.h)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpy_RhRh(rs: i32, rt: i32) -> i32 {
+    hexagon_M2_mpy_hh_s0(rs, rt)
+}
+
+/// `Rd32=mpy(Rs32.h,Rt32.h):<<1`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpy_RhRh_s1(rs: i32, rt: i32) -> i32 {
+    hexagon_M2_mpy_hh_s1(rs, rt)
+}
+
+/// `Rd32=mpy(Rs32.h,Rt32.l)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpy_RhRl(rs: i32, rt: i32) -> i32 {
+    hexagon_M2_mpy_hl_s0(rs, rt)
+}
+
+/// `Rd32=mpy(Rs32.h,Rt32.l):<<1`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpy_RhRl_s1(rs: i32, rt: i32) -> i32 {
+    hexagon_M2_mpy_hl_s1(rs, rt)
+}
+
+/// `Rd32=mpy(Rs32.l,Rt32.h)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpy_RlRh(rs: i32, rt: i32) -> i32 {
+    hexagon_M2_mpy_lh_s0(rs, rt)
+}
+
+/// `Rd32=mpy(Rs32.l,Rt32.h):<<1`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpy_RlRh_s1(rs: i32, rt: i32) -> i32 {
+    hexagon_M2_mpy_lh_s1(rs, rt)
+}
+
+/// `Rd32=mpy(Rs32.l,Rt32.l)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpy_RlRl(rs: i32, rt: i32) -> i32 {
+    hexagon_M2_mpy_ll_s0(rs, rt)
+}
+
+/// `Rd32=mpy(Rs32.l,Rt32.l):<<1`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpy_RlRl_s1(rs: i32, rt: i32) -> i32 {
+    hexagon_M2_mpy_ll_s1(rs, rt)
+}
+
+/// `Rx32-=mpy(Rs32.h,Rt32.h)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpynac_RhRh(rx: i32, rs: i32, rt: i32) -> i32 {
+    hexagon_M2_mpy_nac_hh_s0(rx, rs, rt)
+}
+
+/// `Rx32-=mpy(Rs32.h,Rt32.h):<<1`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpynac_RhRh_s1(rx: i32, rs: i32, rt: i32) -> i32 {
+    hexagon_M2_mpy_nac_hh_s1(rx, rs, rt)
+}
+
+/// `Rx32-=mpy(Rs32.h,Rt32.l)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpynac_RhRl(rx: i32, rs: i32, rt: i32) -> i32 {
+    hexagon_M2_mpy_nac_hl_s0(rx, rs, rt)
+}
+
+/// `Rx32-=mpy(Rs32.h,Rt32.l):<<1`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpynac_RhRl_s1(rx: i32, rs: i32, rt: i32) -> i32 {
+    hexagon_M2_mpy_nac_hl_s1(rx, rs, rt)
+}
+
+/// `Rx32-=mpy(Rs32.l,Rt32.h)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpynac_RlRh(rx: i32, rs: i32, rt: i32) -> i32 {
+    hexagon_M2_mpy_nac_lh_s0(rx, rs, rt)
+}
+
+/// `Rx32-=mpy(Rs32.l,Rt32.h):<<1`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpynac_RlRh_s1(rx: i32, rs: i32, rt: i32) -> i32 {
+    hexagon_M2_mpy_nac_lh_s1(rx, rs, rt)
+}
+
+/// `Rx32-=mpy(Rs32.l,Rt32.l)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpynac_RlRl(rx: i32, rs: i32, rt: i32) -> i32 {
+    hexagon_M2_mpy_nac_ll_s0(rx, rs, rt)
+}
+
+/// `Rx32-=mpy(Rs32.l,Rt32.l):<<1`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpynac_RlRl_s1(rx: i32, rs: i32, rt: i32) -> i32 {
+    hexagon_M2_mpy_nac_ll_s1(rx, rs, rt)
+}
+
+/// `Rx32-=mpy(Rs32.h,Rt32.h):sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpynac_RhRh_sat(rx: i32, rs: i32, rt: i32) -> i32 {
+    hexagon_M2_mpy_nac_sat_hh_s0(rx, rs, rt)
+}
+
+/// `Rx32-=mpy(Rs32.h,Rt32.h):<<1:sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpynac_RhRh_s1_sat(rx: i32, rs: i32, rt: i32) -> i32 {
+    hexagon_M2_mpy_nac_sat_hh_s1(rx, rs, rt)
+}
+
+/// `Rx32-=mpy(Rs32.h,Rt32.l):sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpynac_RhRl_sat(rx: i32, rs: i32, rt: i32) -> i32 {
+    hexagon_M2_mpy_nac_sat_hl_s0(rx, rs, rt)
+}
+
+/// `Rx32-=mpy(Rs32.h,Rt32.l):<<1:sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpynac_RhRl_s1_sat(rx: i32, rs: i32, rt: i32) -> i32 {
+    hexagon_M2_mpy_nac_sat_hl_s1(rx, rs, rt)
+}
+
+/// `Rx32-=mpy(Rs32.l,Rt32.h):sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpynac_RlRh_sat(rx: i32, rs: i32, rt: i32) -> i32 {
+    hexagon_M2_mpy_nac_sat_lh_s0(rx, rs, rt)
+}
+
+/// `Rx32-=mpy(Rs32.l,Rt32.h):<<1:sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpynac_RlRh_s1_sat(rx: i32, rs: i32, rt: i32) -> i32 {
+    hexagon_M2_mpy_nac_sat_lh_s1(rx, rs, rt)
+}
+
+/// `Rx32-=mpy(Rs32.l,Rt32.l):sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpynac_RlRl_sat(rx: i32, rs: i32, rt: i32) -> i32 {
+    hexagon_M2_mpy_nac_sat_ll_s0(rx, rs, rt)
+}
+
+/// `Rx32-=mpy(Rs32.l,Rt32.l):<<1:sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpynac_RlRl_s1_sat(rx: i32, rs: i32, rt: i32) -> i32 {
+    hexagon_M2_mpy_nac_sat_ll_s1(rx, rs, rt)
+}
+
+/// `Rd32=mpy(Rs32.h,Rt32.h):rnd`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpy_RhRh_rnd(rs: i32, rt: i32) -> i32 {
+    hexagon_M2_mpy_rnd_hh_s0(rs, rt)
+}
+
+/// `Rd32=mpy(Rs32.h,Rt32.h):<<1:rnd`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpy_RhRh_s1_rnd(rs: i32, rt: i32) -> i32 {
+    hexagon_M2_mpy_rnd_hh_s1(rs, rt)
+}
+
+/// `Rd32=mpy(Rs32.h,Rt32.l):rnd`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpy_RhRl_rnd(rs: i32, rt: i32) -> i32 {
+    hexagon_M2_mpy_rnd_hl_s0(rs, rt)
+}
+
+/// `Rd32=mpy(Rs32.h,Rt32.l):<<1:rnd`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpy_RhRl_s1_rnd(rs: i32, rt: i32) -> i32 {
+    hexagon_M2_mpy_rnd_hl_s1(rs, rt)
+}
+
+/// `Rd32=mpy(Rs32.l,Rt32.h):rnd`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpy_RlRh_rnd(rs: i32, rt: i32) -> i32 {
+    hexagon_M2_mpy_rnd_lh_s0(rs, rt)
+}
+
+/// `Rd32=mpy(Rs32.l,Rt32.h):<<1:rnd`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpy_RlRh_s1_rnd(rs: i32, rt: i32) -> i32 {
+    hexagon_M2_mpy_rnd_lh_s1(rs, rt)
+}
+
+/// `Rd32=mpy(Rs32.l,Rt32.l):rnd`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpy_RlRl_rnd(rs: i32, rt: i32) -> i32 {
+    hexagon_M2_mpy_rnd_ll_s0(rs, rt)
+}
+
+/// `Rd32=mpy(Rs32.l,Rt32.l):<<1:rnd`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpy_RlRl_s1_rnd(rs: i32, rt: i32) -> i32 {
+    hexagon_M2_mpy_rnd_ll_s1(rs, rt)
+}
+
+/// `Rd32=mpy(Rs32.h,Rt32.h):sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpy_RhRh_sat(rs: i32, rt: i32) -> i32 {
+    hexagon_M2_mpy_sat_hh_s0(rs, rt)
+}
+
+/// `Rd32=mpy(Rs32.h,Rt32.h):<<1:sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpy_RhRh_s1_sat(rs: i32, rt: i32) -> i32 {
+    hexagon_M2_mpy_sat_hh_s1(rs, rt)
+}
+
+/// `Rd32=mpy(Rs32.h,Rt32.l):sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpy_RhRl_sat(rs: i32, rt: i32) -> i32 {
+    hexagon_M2_mpy_sat_hl_s0(rs, rt)
+}
+
+/// `Rd32=mpy(Rs32.h,Rt32.l):<<1:sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpy_RhRl_s1_sat(rs: i32, rt: i32) -> i32 {
+    hexagon_M2_mpy_sat_hl_s1(rs, rt)
+}
+
+/// `Rd32=mpy(Rs32.l,Rt32.h):sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpy_RlRh_sat(rs: i32, rt: i32) -> i32 {
+    hexagon_M2_mpy_sat_lh_s0(rs, rt)
+}
+
+/// `Rd32=mpy(Rs32.l,Rt32.h):<<1:sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpy_RlRh_s1_sat(rs: i32, rt: i32) -> i32 {
+    hexagon_M2_mpy_sat_lh_s1(rs, rt)
+}
+
+/// `Rd32=mpy(Rs32.l,Rt32.l):sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpy_RlRl_sat(rs: i32, rt: i32) -> i32 {
+    hexagon_M2_mpy_sat_ll_s0(rs, rt)
+}
+
+/// `Rd32=mpy(Rs32.l,Rt32.l):<<1:sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpy_RlRl_s1_sat(rs: i32, rt: i32) -> i32 {
+    hexagon_M2_mpy_sat_ll_s1(rs, rt)
+}
+
+/// `Rd32=mpy(Rs32.h,Rt32.h):rnd:sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpy_RhRh_rnd_sat(rs: i32, rt: i32) -> i32 {
+    hexagon_M2_mpy_sat_rnd_hh_s0(rs, rt)
+}
+
+/// `Rd32=mpy(Rs32.h,Rt32.h):<<1:rnd:sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpy_RhRh_s1_rnd_sat(rs: i32, rt: i32) -> i32 {
+    hexagon_M2_mpy_sat_rnd_hh_s1(rs, rt)
+}
+
+/// `Rd32=mpy(Rs32.h,Rt32.l):rnd:sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpy_RhRl_rnd_sat(rs: i32, rt: i32) -> i32 {
+    hexagon_M2_mpy_sat_rnd_hl_s0(rs, rt)
+}
+
+/// `Rd32=mpy(Rs32.h,Rt32.l):<<1:rnd:sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpy_RhRl_s1_rnd_sat(rs: i32, rt: i32) -> i32 {
+    hexagon_M2_mpy_sat_rnd_hl_s1(rs, rt)
+}
+
+/// `Rd32=mpy(Rs32.l,Rt32.h):rnd:sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpy_RlRh_rnd_sat(rs: i32, rt: i32) -> i32 {
+    hexagon_M2_mpy_sat_rnd_lh_s0(rs, rt)
+}
+
+/// `Rd32=mpy(Rs32.l,Rt32.h):<<1:rnd:sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpy_RlRh_s1_rnd_sat(rs: i32, rt: i32) -> i32 {
+    hexagon_M2_mpy_sat_rnd_lh_s1(rs, rt)
+}
+
+/// `Rd32=mpy(Rs32.l,Rt32.l):rnd:sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpy_RlRl_rnd_sat(rs: i32, rt: i32) -> i32 {
+    hexagon_M2_mpy_sat_rnd_ll_s0(rs, rt)
+}
+
+/// `Rd32=mpy(Rs32.l,Rt32.l):<<1:rnd:sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpy_RlRl_s1_rnd_sat(rs: i32, rt: i32) -> i32 {
+    hexagon_M2_mpy_sat_rnd_ll_s1(rs, rt)
+}
+
+/// `Rd32=mpy(Rs32,Rt32)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpy_RR(rs: i32, rt: i32) -> i32 {
+    hexagon_M2_mpy_up(rs, rt)
+}
+
+/// `Rd32=mpy(Rs32,Rt32):<<1`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpy_RR_s1(rs: i32, rt: i32) -> i32 {
+    hexagon_M2_mpy_up_s1(rs, rt)
+}
+
+/// `Rd32=mpy(Rs32,Rt32):<<1:sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpy_RR_s1_sat(rs: i32, rt: i32) -> i32 {
+    hexagon_M2_mpy_up_s1_sat(rs, rt)
+}
+
+/// `Rxx32+=mpy(Rs32.h,Rt32.h)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_mpyacc_RhRh(rxx: i64, rs: i32, rt: i32) -> i64 {
+    hexagon_M2_mpyd_acc_hh_s0(rxx, rs, rt)
+}
+
+/// `Rxx32+=mpy(Rs32.h,Rt32.h):<<1`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_mpyacc_RhRh_s1(rxx: i64, rs: i32, rt: i32) -> i64 {
+    hexagon_M2_mpyd_acc_hh_s1(rxx, rs, rt)
+}
+
+/// `Rxx32+=mpy(Rs32.h,Rt32.l)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_mpyacc_RhRl(rxx: i64, rs: i32, rt: i32) -> i64 {
+    hexagon_M2_mpyd_acc_hl_s0(rxx, rs, rt)
+}
+
+/// `Rxx32+=mpy(Rs32.h,Rt32.l):<<1`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_mpyacc_RhRl_s1(rxx: i64, rs: i32, rt: i32) -> i64 {
+    hexagon_M2_mpyd_acc_hl_s1(rxx, rs, rt)
+}
+
+/// `Rxx32+=mpy(Rs32.l,Rt32.h)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_mpyacc_RlRh(rxx: i64, rs: i32, rt: i32) -> i64 {
+    hexagon_M2_mpyd_acc_lh_s0(rxx, rs, rt)
+}
+
+/// `Rxx32+=mpy(Rs32.l,Rt32.h):<<1`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_mpyacc_RlRh_s1(rxx: i64, rs: i32, rt: i32) -> i64 {
+    hexagon_M2_mpyd_acc_lh_s1(rxx, rs, rt)
+}
+
+/// `Rxx32+=mpy(Rs32.l,Rt32.l)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_mpyacc_RlRl(rxx: i64, rs: i32, rt: i32) -> i64 {
+    hexagon_M2_mpyd_acc_ll_s0(rxx, rs, rt)
+}
+
+/// `Rxx32+=mpy(Rs32.l,Rt32.l):<<1`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_mpyacc_RlRl_s1(rxx: i64, rs: i32, rt: i32) -> i64 {
+    hexagon_M2_mpyd_acc_ll_s1(rxx, rs, rt)
+}
+
+/// `Rdd32=mpy(Rs32.h,Rt32.h)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_mpy_RhRh(rs: i32, rt: i32) -> i64 {
+    hexagon_M2_mpyd_hh_s0(rs, rt)
+}
+
+/// `Rdd32=mpy(Rs32.h,Rt32.h):<<1`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_mpy_RhRh_s1(rs: i32, rt: i32) -> i64 {
+    hexagon_M2_mpyd_hh_s1(rs, rt)
+}
+
+/// `Rdd32=mpy(Rs32.h,Rt32.l)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_mpy_RhRl(rs: i32, rt: i32) -> i64 {
+    hexagon_M2_mpyd_hl_s0(rs, rt)
+}
+
+/// `Rdd32=mpy(Rs32.h,Rt32.l):<<1`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_mpy_RhRl_s1(rs: i32, rt: i32) -> i64 {
+    hexagon_M2_mpyd_hl_s1(rs, rt)
+}
+
+/// `Rdd32=mpy(Rs32.l,Rt32.h)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_mpy_RlRh(rs: i32, rt: i32) -> i64 {
+    hexagon_M2_mpyd_lh_s0(rs, rt)
+}
+
+/// `Rdd32=mpy(Rs32.l,Rt32.h):<<1`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_mpy_RlRh_s1(rs: i32, rt: i32) -> i64 {
+    hexagon_M2_mpyd_lh_s1(rs, rt)
+}
+
+/// `Rdd32=mpy(Rs32.l,Rt32.l)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_mpy_RlRl(rs: i32, rt: i32) -> i64 {
+    hexagon_M2_mpyd_ll_s0(rs, rt)
+}
+
+/// `Rdd32=mpy(Rs32.l,Rt32.l):<<1`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_mpy_RlRl_s1(rs: i32, rt: i32) -> i64 {
+    hexagon_M2_mpyd_ll_s1(rs, rt)
+}
+
+/// `Rxx32-=mpy(Rs32.h,Rt32.h)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_mpynac_RhRh(rxx: i64, rs: i32, rt: i32) -> i64 {
+    hexagon_M2_mpyd_nac_hh_s0(rxx, rs, rt)
+}
+
+/// `Rxx32-=mpy(Rs32.h,Rt32.h):<<1`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_mpynac_RhRh_s1(rxx: i64, rs: i32, rt: i32) -> i64 {
+    hexagon_M2_mpyd_nac_hh_s1(rxx, rs, rt)
+}
+
+/// `Rxx32-=mpy(Rs32.h,Rt32.l)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_mpynac_RhRl(rxx: i64, rs: i32, rt: i32) -> i64 {
+    hexagon_M2_mpyd_nac_hl_s0(rxx, rs, rt)
+}
+
+/// `Rxx32-=mpy(Rs32.h,Rt32.l):<<1`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_mpynac_RhRl_s1(rxx: i64, rs: i32, rt: i32) -> i64 {
+    hexagon_M2_mpyd_nac_hl_s1(rxx, rs, rt)
+}
+
+/// `Rxx32-=mpy(Rs32.l,Rt32.h)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_mpynac_RlRh(rxx: i64, rs: i32, rt: i32) -> i64 {
+    hexagon_M2_mpyd_nac_lh_s0(rxx, rs, rt)
+}
+
+/// `Rxx32-=mpy(Rs32.l,Rt32.h):<<1`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_mpynac_RlRh_s1(rxx: i64, rs: i32, rt: i32) -> i64 {
+    hexagon_M2_mpyd_nac_lh_s1(rxx, rs, rt)
+}
+
+/// `Rxx32-=mpy(Rs32.l,Rt32.l)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_mpynac_RlRl(rxx: i64, rs: i32, rt: i32) -> i64 {
+    hexagon_M2_mpyd_nac_ll_s0(rxx, rs, rt)
+}
+
+/// `Rxx32-=mpy(Rs32.l,Rt32.l):<<1`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_mpynac_RlRl_s1(rxx: i64, rs: i32, rt: i32) -> i64 {
+    hexagon_M2_mpyd_nac_ll_s1(rxx, rs, rt)
+}
+
+/// `Rdd32=mpy(Rs32.h,Rt32.h):rnd`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_mpy_RhRh_rnd(rs: i32, rt: i32) -> i64 {
+    hexagon_M2_mpyd_rnd_hh_s0(rs, rt)
+}
+
+/// `Rdd32=mpy(Rs32.h,Rt32.h):<<1:rnd`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_mpy_RhRh_s1_rnd(rs: i32, rt: i32) -> i64 {
+    hexagon_M2_mpyd_rnd_hh_s1(rs, rt)
+}
+
+/// `Rdd32=mpy(Rs32.h,Rt32.l):rnd`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_mpy_RhRl_rnd(rs: i32, rt: i32) -> i64 {
+    hexagon_M2_mpyd_rnd_hl_s0(rs, rt)
+}
+
+/// `Rdd32=mpy(Rs32.h,Rt32.l):<<1:rnd`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_mpy_RhRl_s1_rnd(rs: i32, rt: i32) -> i64 {
+    hexagon_M2_mpyd_rnd_hl_s1(rs, rt)
+}
+
+/// `Rdd32=mpy(Rs32.l,Rt32.h):rnd`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_mpy_RlRh_rnd(rs: i32, rt: i32) -> i64 {
+    hexagon_M2_mpyd_rnd_lh_s0(rs, rt)
+}
+
+/// `Rdd32=mpy(Rs32.l,Rt32.h):<<1:rnd`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_mpy_RlRh_s1_rnd(rs: i32, rt: i32) -> i64 {
+    hexagon_M2_mpyd_rnd_lh_s1(rs, rt)
+}
+
+/// `Rdd32=mpy(Rs32.l,Rt32.l):rnd`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_mpy_RlRl_rnd(rs: i32, rt: i32) -> i64 {
+    hexagon_M2_mpyd_rnd_ll_s0(rs, rt)
+}
+
+/// `Rdd32=mpy(Rs32.l,Rt32.l):<<1:rnd`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_mpy_RlRl_s1_rnd(rs: i32, rt: i32) -> i64 {
+    hexagon_M2_mpyd_rnd_ll_s1(rs, rt)
+}
+
+/// `Rd32=mpyi(Rs32,Rt32)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpyi))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpyi_RR(rs: i32, rt: i32) -> i32 {
+    hexagon_M2_mpyi(rs, rt)
+}
+
+/// `Rd32=mpyi(Rs32,#m9)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT0123
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpyi))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpyi_RI(rs: i32, im9: i32) -> i32 {
+    hexagon_M2_mpysmi(rs, im9)
+}
+
+/// `Rd32=mpysu(Rs32,Rt32)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpysu))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpysu_RR(rs: i32, rt: i32) -> i32 {
+    hexagon_M2_mpysu_up(rs, rt)
+}
+
+/// `Rx32+=mpyu(Rs32.h,Rt32.h)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpyu))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpyuacc_RhRh(rx: i32, rs: i32, rt: i32) -> i32 {
+    hexagon_M2_mpyu_acc_hh_s0(rx, rs, rt)
+}
+
+/// `Rx32+=mpyu(Rs32.h,Rt32.h):<<1`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpyu))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpyuacc_RhRh_s1(rx: i32, rs: i32, rt: i32) -> i32 {
+    hexagon_M2_mpyu_acc_hh_s1(rx, rs, rt)
+}
+
+/// `Rx32+=mpyu(Rs32.h,Rt32.l)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpyu))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpyuacc_RhRl(rx: i32, rs: i32, rt: i32) -> i32 {
+    hexagon_M2_mpyu_acc_hl_s0(rx, rs, rt)
+}
+
+/// `Rx32+=mpyu(Rs32.h,Rt32.l):<<1`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpyu))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpyuacc_RhRl_s1(rx: i32, rs: i32, rt: i32) -> i32 {
+    hexagon_M2_mpyu_acc_hl_s1(rx, rs, rt)
+}
+
+/// `Rx32+=mpyu(Rs32.l,Rt32.h)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpyu))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpyuacc_RlRh(rx: i32, rs: i32, rt: i32) -> i32 {
+    hexagon_M2_mpyu_acc_lh_s0(rx, rs, rt)
+}
+
+/// `Rx32+=mpyu(Rs32.l,Rt32.h):<<1`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpyu))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpyuacc_RlRh_s1(rx: i32, rs: i32, rt: i32) -> i32 {
+    hexagon_M2_mpyu_acc_lh_s1(rx, rs, rt)
+}
+
+/// `Rx32+=mpyu(Rs32.l,Rt32.l)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpyu))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpyuacc_RlRl(rx: i32, rs: i32, rt: i32) -> i32 {
+    hexagon_M2_mpyu_acc_ll_s0(rx, rs, rt)
+}
+
+/// `Rx32+=mpyu(Rs32.l,Rt32.l):<<1`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpyu))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpyuacc_RlRl_s1(rx: i32, rs: i32, rt: i32) -> i32 {
+    hexagon_M2_mpyu_acc_ll_s1(rx, rs, rt)
+}
+
+/// `Rd32=mpyu(Rs32.h,Rt32.h)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpyu))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpyu_RhRh(rs: i32, rt: i32) -> i32 {
+    hexagon_M2_mpyu_hh_s0(rs, rt)
+}
+
+/// `Rd32=mpyu(Rs32.h,Rt32.h):<<1`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpyu))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpyu_RhRh_s1(rs: i32, rt: i32) -> i32 {
+    hexagon_M2_mpyu_hh_s1(rs, rt)
+}
+
+/// `Rd32=mpyu(Rs32.h,Rt32.l)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpyu))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpyu_RhRl(rs: i32, rt: i32) -> i32 {
+    hexagon_M2_mpyu_hl_s0(rs, rt)
+}
+
+/// `Rd32=mpyu(Rs32.h,Rt32.l):<<1`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpyu))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpyu_RhRl_s1(rs: i32, rt: i32) -> i32 {
+    hexagon_M2_mpyu_hl_s1(rs, rt)
+}
+
+/// `Rd32=mpyu(Rs32.l,Rt32.h)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpyu))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpyu_RlRh(rs: i32, rt: i32) -> i32 {
+    hexagon_M2_mpyu_lh_s0(rs, rt)
+}
+
+/// `Rd32=mpyu(Rs32.l,Rt32.h):<<1`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpyu))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpyu_RlRh_s1(rs: i32, rt: i32) -> i32 {
+    hexagon_M2_mpyu_lh_s1(rs, rt)
+}
+
+/// `Rd32=mpyu(Rs32.l,Rt32.l)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpyu))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpyu_RlRl(rs: i32, rt: i32) -> i32 {
+    hexagon_M2_mpyu_ll_s0(rs, rt)
+}
+
+/// `Rd32=mpyu(Rs32.l,Rt32.l):<<1`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpyu))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpyu_RlRl_s1(rs: i32, rt: i32) -> i32 {
+    hexagon_M2_mpyu_ll_s1(rs, rt)
+}
+
+/// `Rx32-=mpyu(Rs32.h,Rt32.h)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpyu))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpyunac_RhRh(rx: i32, rs: i32, rt: i32) -> i32 {
+    hexagon_M2_mpyu_nac_hh_s0(rx, rs, rt)
+}
+
+/// `Rx32-=mpyu(Rs32.h,Rt32.h):<<1`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpyu))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpyunac_RhRh_s1(rx: i32, rs: i32, rt: i32) -> i32 {
+    hexagon_M2_mpyu_nac_hh_s1(rx, rs, rt)
+}
+
+/// `Rx32-=mpyu(Rs32.h,Rt32.l)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpyu))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpyunac_RhRl(rx: i32, rs: i32, rt: i32) -> i32 {
+    hexagon_M2_mpyu_nac_hl_s0(rx, rs, rt)
+}
+
+/// `Rx32-=mpyu(Rs32.h,Rt32.l):<<1`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpyu))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpyunac_RhRl_s1(rx: i32, rs: i32, rt: i32) -> i32 {
+    hexagon_M2_mpyu_nac_hl_s1(rx, rs, rt)
+}
+
+/// `Rx32-=mpyu(Rs32.l,Rt32.h)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpyu))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpyunac_RlRh(rx: i32, rs: i32, rt: i32) -> i32 {
+    hexagon_M2_mpyu_nac_lh_s0(rx, rs, rt)
+}
+
+/// `Rx32-=mpyu(Rs32.l,Rt32.h):<<1`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpyu))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpyunac_RlRh_s1(rx: i32, rs: i32, rt: i32) -> i32 {
+    hexagon_M2_mpyu_nac_lh_s1(rx, rs, rt)
+}
+
+/// `Rx32-=mpyu(Rs32.l,Rt32.l)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpyu))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpyunac_RlRl(rx: i32, rs: i32, rt: i32) -> i32 {
+    hexagon_M2_mpyu_nac_ll_s0(rx, rs, rt)
+}
+
+/// `Rx32-=mpyu(Rs32.l,Rt32.l):<<1`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpyu))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpyunac_RlRl_s1(rx: i32, rs: i32, rt: i32) -> i32 {
+    hexagon_M2_mpyu_nac_ll_s1(rx, rs, rt)
+}
+
+/// `Rd32=mpyu(Rs32,Rt32)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpyu))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpyu_RR(rs: i32, rt: i32) -> i32 {
+    hexagon_M2_mpyu_up(rs, rt)
+}
+
+/// `Rxx32+=mpyu(Rs32.h,Rt32.h)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpyu))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_mpyuacc_RhRh(rxx: i64, rs: i32, rt: i32) -> i64 {
+    hexagon_M2_mpyud_acc_hh_s0(rxx, rs, rt)
+}
+
+/// `Rxx32+=mpyu(Rs32.h,Rt32.h):<<1`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpyu))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_mpyuacc_RhRh_s1(rxx: i64, rs: i32, rt: i32) -> i64 {
+    hexagon_M2_mpyud_acc_hh_s1(rxx, rs, rt)
+}
+
+/// `Rxx32+=mpyu(Rs32.h,Rt32.l)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpyu))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_mpyuacc_RhRl(rxx: i64, rs: i32, rt: i32) -> i64 {
+    hexagon_M2_mpyud_acc_hl_s0(rxx, rs, rt)
+}
+
+/// `Rxx32+=mpyu(Rs32.h,Rt32.l):<<1`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpyu))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_mpyuacc_RhRl_s1(rxx: i64, rs: i32, rt: i32) -> i64 {
+    hexagon_M2_mpyud_acc_hl_s1(rxx, rs, rt)
+}
+
+/// `Rxx32+=mpyu(Rs32.l,Rt32.h)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpyu))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_mpyuacc_RlRh(rxx: i64, rs: i32, rt: i32) -> i64 {
+    hexagon_M2_mpyud_acc_lh_s0(rxx, rs, rt)
+}
+
+/// `Rxx32+=mpyu(Rs32.l,Rt32.h):<<1`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpyu))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_mpyuacc_RlRh_s1(rxx: i64, rs: i32, rt: i32) -> i64 {
+    hexagon_M2_mpyud_acc_lh_s1(rxx, rs, rt)
+}
+
+/// `Rxx32+=mpyu(Rs32.l,Rt32.l)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpyu))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_mpyuacc_RlRl(rxx: i64, rs: i32, rt: i32) -> i64 {
+    hexagon_M2_mpyud_acc_ll_s0(rxx, rs, rt)
+}
+
+/// `Rxx32+=mpyu(Rs32.l,Rt32.l):<<1`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpyu))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_mpyuacc_RlRl_s1(rxx: i64, rs: i32, rt: i32) -> i64 {
+    hexagon_M2_mpyud_acc_ll_s1(rxx, rs, rt)
+}
+
+/// `Rdd32=mpyu(Rs32.h,Rt32.h)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpyu))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_mpyu_RhRh(rs: i32, rt: i32) -> i64 {
+    hexagon_M2_mpyud_hh_s0(rs, rt)
+}
+
+/// `Rdd32=mpyu(Rs32.h,Rt32.h):<<1`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpyu))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_mpyu_RhRh_s1(rs: i32, rt: i32) -> i64 {
+    hexagon_M2_mpyud_hh_s1(rs, rt)
+}
+
+/// `Rdd32=mpyu(Rs32.h,Rt32.l)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpyu))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_mpyu_RhRl(rs: i32, rt: i32) -> i64 {
+    hexagon_M2_mpyud_hl_s0(rs, rt)
+}
+
+/// `Rdd32=mpyu(Rs32.h,Rt32.l):<<1`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpyu))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_mpyu_RhRl_s1(rs: i32, rt: i32) -> i64 {
+    hexagon_M2_mpyud_hl_s1(rs, rt)
+}
+
+/// `Rdd32=mpyu(Rs32.l,Rt32.h)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpyu))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_mpyu_RlRh(rs: i32, rt: i32) -> i64 {
+    hexagon_M2_mpyud_lh_s0(rs, rt)
+}
+
+/// `Rdd32=mpyu(Rs32.l,Rt32.h):<<1`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpyu))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_mpyu_RlRh_s1(rs: i32, rt: i32) -> i64 {
+    hexagon_M2_mpyud_lh_s1(rs, rt)
+}
+
+/// `Rdd32=mpyu(Rs32.l,Rt32.l)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpyu))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_mpyu_RlRl(rs: i32, rt: i32) -> i64 {
+    hexagon_M2_mpyud_ll_s0(rs, rt)
+}
+
+/// `Rdd32=mpyu(Rs32.l,Rt32.l):<<1`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpyu))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_mpyu_RlRl_s1(rs: i32, rt: i32) -> i64 {
+    hexagon_M2_mpyud_ll_s1(rs, rt)
+}
+
+/// `Rxx32-=mpyu(Rs32.h,Rt32.h)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpyu))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_mpyunac_RhRh(rxx: i64, rs: i32, rt: i32) -> i64 {
+    hexagon_M2_mpyud_nac_hh_s0(rxx, rs, rt)
+}
+
+/// `Rxx32-=mpyu(Rs32.h,Rt32.h):<<1`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpyu))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_mpyunac_RhRh_s1(rxx: i64, rs: i32, rt: i32) -> i64 {
+    hexagon_M2_mpyud_nac_hh_s1(rxx, rs, rt)
+}
+
+/// `Rxx32-=mpyu(Rs32.h,Rt32.l)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpyu))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_mpyunac_RhRl(rxx: i64, rs: i32, rt: i32) -> i64 {
+    hexagon_M2_mpyud_nac_hl_s0(rxx, rs, rt)
+}
+
+/// `Rxx32-=mpyu(Rs32.h,Rt32.l):<<1`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpyu))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_mpyunac_RhRl_s1(rxx: i64, rs: i32, rt: i32) -> i64 {
+    hexagon_M2_mpyud_nac_hl_s1(rxx, rs, rt)
+}
+
+/// `Rxx32-=mpyu(Rs32.l,Rt32.h)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpyu))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_mpyunac_RlRh(rxx: i64, rs: i32, rt: i32) -> i64 {
+    hexagon_M2_mpyud_nac_lh_s0(rxx, rs, rt)
+}
+
+/// `Rxx32-=mpyu(Rs32.l,Rt32.h):<<1`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpyu))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_mpyunac_RlRh_s1(rxx: i64, rs: i32, rt: i32) -> i64 {
+    hexagon_M2_mpyud_nac_lh_s1(rxx, rs, rt)
+}
+
+/// `Rxx32-=mpyu(Rs32.l,Rt32.l)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpyu))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_mpyunac_RlRl(rxx: i64, rs: i32, rt: i32) -> i64 {
+    hexagon_M2_mpyud_nac_ll_s0(rxx, rs, rt)
+}
+
+/// `Rxx32-=mpyu(Rs32.l,Rt32.l):<<1`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpyu))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_mpyunac_RlRl_s1(rxx: i64, rs: i32, rt: i32) -> i64 {
+    hexagon_M2_mpyud_nac_ll_s1(rxx, rs, rt)
+}
+
+/// `Rd32=mpyui(Rs32,Rt32)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT0123
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpyui))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpyui_RR(rs: i32, rt: i32) -> i32 {
+    hexagon_M2_mpyui(rs, rt)
+}
+
+/// `Rx32-=add(Rs32,Rt32)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(add))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_addnac_RR(rx: i32, rs: i32, rt: i32) -> i32 {
+    hexagon_M2_nacci(rx, rs, rt)
+}
+
+/// `Rx32-=add(Rs32,#s8)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(add, IS8 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_addnac_RI<const IS8: i32>(rx: i32, rs: i32) -> i32 {
+    static_assert_simm_bits!(IS8, 8);
+    hexagon_M2_naccii(rx, rs, IS8)
+}
+
+/// `Rx32+=sub(Rt32,Rs32)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(sub))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_subacc_RR(rx: i32, rt: i32, rs: i32) -> i32 {
+    hexagon_M2_subacc(rx, rt, rs)
+}
+
+/// `Rdd32=vabsdiffh(Rtt32,Rss32)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vabsdiffh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vabsdiffh_PP(rtt: i64, rss: i64) -> i64 {
+    hexagon_M2_vabsdiffh(rtt, rss)
+}
+
+/// `Rdd32=vabsdiffw(Rtt32,Rss32)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vabsdiffw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vabsdiffw_PP(rtt: i64, rss: i64) -> i64 {
+    hexagon_M2_vabsdiffw(rtt, rss)
+}
+
+/// `Rxx32+=vcmpyi(Rss32,Rtt32):sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vcmpyi))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vcmpyiacc_PP_sat(rxx: i64, rss: i64, rtt: i64) -> i64 {
+    hexagon_M2_vcmac_s0_sat_i(rxx, rss, rtt)
+}
+
+/// `Rxx32+=vcmpyr(Rss32,Rtt32):sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vcmpyr))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vcmpyracc_PP_sat(rxx: i64, rss: i64, rtt: i64) -> i64 {
+    hexagon_M2_vcmac_s0_sat_r(rxx, rss, rtt)
+}
+
+/// `Rdd32=vcmpyi(Rss32,Rtt32):sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vcmpyi))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vcmpyi_PP_sat(rss: i64, rtt: i64) -> i64 {
+    hexagon_M2_vcmpy_s0_sat_i(rss, rtt)
+}
+
+/// `Rdd32=vcmpyr(Rss32,Rtt32):sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vcmpyr))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vcmpyr_PP_sat(rss: i64, rtt: i64) -> i64 {
+    hexagon_M2_vcmpy_s0_sat_r(rss, rtt)
+}
+
+/// `Rdd32=vcmpyi(Rss32,Rtt32):<<1:sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vcmpyi))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vcmpyi_PP_s1_sat(rss: i64, rtt: i64) -> i64 {
+    hexagon_M2_vcmpy_s1_sat_i(rss, rtt)
+}
+
+/// `Rdd32=vcmpyr(Rss32,Rtt32):<<1:sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vcmpyr))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vcmpyr_PP_s1_sat(rss: i64, rtt: i64) -> i64 {
+    hexagon_M2_vcmpy_s1_sat_r(rss, rtt)
+}
+
+/// `Rxx32+=vdmpy(Rss32,Rtt32):sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vdmpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vdmpyacc_PP_sat(rxx: i64, rss: i64, rtt: i64) -> i64 {
+    hexagon_M2_vdmacs_s0(rxx, rss, rtt)
+}
+
+/// `Rxx32+=vdmpy(Rss32,Rtt32):<<1:sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vdmpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vdmpyacc_PP_s1_sat(rxx: i64, rss: i64, rtt: i64) -> i64 {
+    hexagon_M2_vdmacs_s1(rxx, rss, rtt)
+}
+
+/// `Rd32=vdmpy(Rss32,Rtt32):rnd:sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vdmpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_vdmpy_PP_rnd_sat(rss: i64, rtt: i64) -> i32 {
+    hexagon_M2_vdmpyrs_s0(rss, rtt)
+}
+
+/// `Rd32=vdmpy(Rss32,Rtt32):<<1:rnd:sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vdmpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_vdmpy_PP_s1_rnd_sat(rss: i64, rtt: i64) -> i32 {
+    hexagon_M2_vdmpyrs_s1(rss, rtt)
+}
+
+/// `Rdd32=vdmpy(Rss32,Rtt32):sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vdmpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vdmpy_PP_sat(rss: i64, rtt: i64) -> i64 {
+    hexagon_M2_vdmpys_s0(rss, rtt)
+}
+
+/// `Rdd32=vdmpy(Rss32,Rtt32):<<1:sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vdmpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vdmpy_PP_s1_sat(rss: i64, rtt: i64) -> i64 {
+    hexagon_M2_vdmpys_s1(rss, rtt)
+}
+
+/// `Rxx32+=vmpyh(Rs32,Rt32)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vmpyh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vmpyhacc_RR(rxx: i64, rs: i32, rt: i32) -> i64 {
+    hexagon_M2_vmac2(rxx, rs, rt)
+}
+
+/// `Rxx32+=vmpyeh(Rss32,Rtt32)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vmpyeh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vmpyehacc_PP(rxx: i64, rss: i64, rtt: i64) -> i64 {
+    hexagon_M2_vmac2es(rxx, rss, rtt)
+}
+
+/// `Rxx32+=vmpyeh(Rss32,Rtt32):sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vmpyeh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vmpyehacc_PP_sat(rxx: i64, rss: i64, rtt: i64) -> i64 {
+    hexagon_M2_vmac2es_s0(rxx, rss, rtt)
+}
+
+/// `Rxx32+=vmpyeh(Rss32,Rtt32):<<1:sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vmpyeh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vmpyehacc_PP_s1_sat(rxx: i64, rss: i64, rtt: i64) -> i64 {
+    hexagon_M2_vmac2es_s1(rxx, rss, rtt)
+}
+
+/// `Rxx32+=vmpyh(Rs32,Rt32):sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vmpyh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vmpyhacc_RR_sat(rxx: i64, rs: i32, rt: i32) -> i64 {
+    hexagon_M2_vmac2s_s0(rxx, rs, rt)
+}
+
+/// `Rxx32+=vmpyh(Rs32,Rt32):<<1:sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vmpyh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vmpyhacc_RR_s1_sat(rxx: i64, rs: i32, rt: i32) -> i64 {
+    hexagon_M2_vmac2s_s1(rxx, rs, rt)
+}
+
+/// `Rxx32+=vmpyhsu(Rs32,Rt32):sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vmpyhsu))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vmpyhsuacc_RR_sat(rxx: i64, rs: i32, rt: i32) -> i64 {
+    hexagon_M2_vmac2su_s0(rxx, rs, rt)
+}
+
+/// `Rxx32+=vmpyhsu(Rs32,Rt32):<<1:sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vmpyhsu))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vmpyhsuacc_RR_s1_sat(rxx: i64, rs: i32, rt: i32) -> i64 {
+    hexagon_M2_vmac2su_s1(rxx, rs, rt)
+}
+
+/// `Rdd32=vmpyeh(Rss32,Rtt32):sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vmpyeh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vmpyeh_PP_sat(rss: i64, rtt: i64) -> i64 {
+    hexagon_M2_vmpy2es_s0(rss, rtt)
+}
+
+/// `Rdd32=vmpyeh(Rss32,Rtt32):<<1:sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vmpyeh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vmpyeh_PP_s1_sat(rss: i64, rtt: i64) -> i64 {
+    hexagon_M2_vmpy2es_s1(rss, rtt)
+}
+
+/// `Rdd32=vmpyh(Rs32,Rt32):sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vmpyh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vmpyh_RR_sat(rs: i32, rt: i32) -> i64 {
+    hexagon_M2_vmpy2s_s0(rs, rt)
+}
+
+/// `Rd32=vmpyh(Rs32,Rt32):rnd:sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vmpyh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_vmpyh_RR_rnd_sat(rs: i32, rt: i32) -> i32 {
+    hexagon_M2_vmpy2s_s0pack(rs, rt)
+}
+
+/// `Rdd32=vmpyh(Rs32,Rt32):<<1:sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vmpyh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vmpyh_RR_s1_sat(rs: i32, rt: i32) -> i64 {
+    hexagon_M2_vmpy2s_s1(rs, rt)
+}
+
+/// `Rd32=vmpyh(Rs32,Rt32):<<1:rnd:sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vmpyh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_vmpyh_RR_s1_rnd_sat(rs: i32, rt: i32) -> i32 {
+    hexagon_M2_vmpy2s_s1pack(rs, rt)
+}
+
+/// `Rdd32=vmpyhsu(Rs32,Rt32):sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vmpyhsu))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vmpyhsu_RR_sat(rs: i32, rt: i32) -> i64 {
+    hexagon_M2_vmpy2su_s0(rs, rt)
+}
+
+/// `Rdd32=vmpyhsu(Rs32,Rt32):<<1:sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vmpyhsu))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vmpyhsu_RR_s1_sat(rs: i32, rt: i32) -> i64 {
+    hexagon_M2_vmpy2su_s1(rs, rt)
+}
+
+/// `Rd32=vraddh(Rss32,Rtt32)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vraddh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_vraddh_PP(rss: i64, rtt: i64) -> i32 {
+    hexagon_M2_vraddh(rss, rtt)
+}
+
+/// `Rd32=vradduh(Rss32,Rtt32)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vradduh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_vradduh_PP(rss: i64, rtt: i64) -> i32 {
+    hexagon_M2_vradduh(rss, rtt)
+}
+
+/// `Rxx32+=vrcmpyi(Rss32,Rtt32)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vrcmpyi))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vrcmpyiacc_PP(rxx: i64, rss: i64, rtt: i64) -> i64 {
+    hexagon_M2_vrcmaci_s0(rxx, rss, rtt)
+}
+
+/// `Rxx32+=vrcmpyi(Rss32,Rtt32*)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vrcmpyi))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vrcmpyiacc_PP_conj(rxx: i64, rss: i64, rtt: i64) -> i64 {
+    hexagon_M2_vrcmaci_s0c(rxx, rss, rtt)
+}
+
+/// `Rxx32+=vrcmpyr(Rss32,Rtt32)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vrcmpyr))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vrcmpyracc_PP(rxx: i64, rss: i64, rtt: i64) -> i64 {
+    hexagon_M2_vrcmacr_s0(rxx, rss, rtt)
+}
+
+/// `Rxx32+=vrcmpyr(Rss32,Rtt32*)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vrcmpyr))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vrcmpyracc_PP_conj(rxx: i64, rss: i64, rtt: i64) -> i64 {
+    hexagon_M2_vrcmacr_s0c(rxx, rss, rtt)
+}
+
+/// `Rdd32=vrcmpyi(Rss32,Rtt32)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vrcmpyi))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vrcmpyi_PP(rss: i64, rtt: i64) -> i64 {
+    hexagon_M2_vrcmpyi_s0(rss, rtt)
+}
+
+/// `Rdd32=vrcmpyi(Rss32,Rtt32*)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vrcmpyi))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vrcmpyi_PP_conj(rss: i64, rtt: i64) -> i64 {
+    hexagon_M2_vrcmpyi_s0c(rss, rtt)
+}
+
+/// `Rdd32=vrcmpyr(Rss32,Rtt32)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vrcmpyr))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vrcmpyr_PP(rss: i64, rtt: i64) -> i64 {
+    hexagon_M2_vrcmpyr_s0(rss, rtt)
+}
+
+/// `Rdd32=vrcmpyr(Rss32,Rtt32*)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vrcmpyr))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vrcmpyr_PP_conj(rss: i64, rtt: i64) -> i64 {
+    hexagon_M2_vrcmpyr_s0c(rss, rtt)
+}
+
+/// `Rxx32+=vrcmpys(Rss32,Rt32):<<1:sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT0123
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vrcmpys))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vrcmpysacc_PR_s1_sat(rxx: i64, rss: i64, rt: i32) -> i64 {
+    hexagon_M2_vrcmpys_acc_s1(rxx, rss, rt)
+}
+
+/// `Rdd32=vrcmpys(Rss32,Rt32):<<1:sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT0123
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vrcmpys))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vrcmpys_PR_s1_sat(rss: i64, rt: i32) -> i64 {
+    hexagon_M2_vrcmpys_s1(rss, rt)
+}
+
+/// `Rd32=vrcmpys(Rss32,Rt32):<<1:rnd:sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT0123
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vrcmpys))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_vrcmpys_PR_s1_rnd_sat(rss: i64, rt: i32) -> i32 {
+    hexagon_M2_vrcmpys_s1rp(rss, rt)
+}
+
+/// `Rxx32+=vrmpyh(Rss32,Rtt32)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vrmpyh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vrmpyhacc_PP(rxx: i64, rss: i64, rtt: i64) -> i64 {
+    hexagon_M2_vrmac_s0(rxx, rss, rtt)
+}
+
+/// `Rdd32=vrmpyh(Rss32,Rtt32)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vrmpyh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vrmpyh_PP(rss: i64, rtt: i64) -> i64 {
+    hexagon_M2_vrmpy_s0(rss, rtt)
+}
+
+/// `Rx32^=xor(Rs32,Rt32)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(xor))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_xorxacc_RR(rx: i32, rs: i32, rt: i32) -> i32 {
+    hexagon_M2_xor_xacc(rx, rs, rt)
+}
+
+/// `Rx32&=and(Rs32,Rt32)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(and))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_andand_RR(rx: i32, rs: i32, rt: i32) -> i32 {
+    hexagon_M4_and_and(rx, rs, rt)
+}
+
+/// `Rx32&=and(Rs32,~Rt32)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(and))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_andand_RnR(rx: i32, rs: i32, rt: i32) -> i32 {
+    hexagon_M4_and_andn(rx, rs, rt)
+}
+
+/// `Rx32&=or(Rs32,Rt32)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(or))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_orand_RR(rx: i32, rs: i32, rt: i32) -> i32 {
+    hexagon_M4_and_or(rx, rs, rt)
+}
+
+/// `Rx32&=xor(Rs32,Rt32)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(xor))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_xorand_RR(rx: i32, rs: i32, rt: i32) -> i32 {
+    hexagon_M4_and_xor(rx, rs, rt)
+}
+
+/// `Rd32=cmpyiwh(Rss32,Rt32):<<1:rnd:sat`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(cmpyiwh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_cmpyiwh_PR_s1_rnd_sat(rss: i64, rt: i32) -> i32 {
+    hexagon_M4_cmpyi_wh(rss, rt)
+}
+
+/// `Rd32=cmpyiwh(Rss32,Rt32*):<<1:rnd:sat`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(cmpyiwh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_cmpyiwh_PR_conj_s1_rnd_sat(rss: i64, rt: i32) -> i32 {
+    hexagon_M4_cmpyi_whc(rss, rt)
+}
+
+/// `Rd32=cmpyrwh(Rss32,Rt32):<<1:rnd:sat`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(cmpyrwh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_cmpyrwh_PR_s1_rnd_sat(rss: i64, rt: i32) -> i32 {
+    hexagon_M4_cmpyr_wh(rss, rt)
+}
+
+/// `Rd32=cmpyrwh(Rss32,Rt32*):<<1:rnd:sat`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(cmpyrwh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_cmpyrwh_PR_conj_s1_rnd_sat(rss: i64, rt: i32) -> i32 {
+    hexagon_M4_cmpyr_whc(rss, rt)
+}
+
+/// `Rx32+=mpy(Rs32,Rt32):<<1:sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpyacc_RR_s1_sat(rx: i32, rs: i32, rt: i32) -> i32 {
+    hexagon_M4_mac_up_s1_sat(rx, rs, rt)
+}
+
+/// `Rd32=add(#u6,mpyi(Rs32,#U6))`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(0, 2)]
+#[cfg_attr(test, assert_instr(add, IU6 = 0, IU6_2 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_add_mpyi_IRI<const IU6: u32, const IU6_2: u32>(rs: i32) -> i32 {
+    static_assert_uimm_bits!(IU6, 6);
+    static_assert_uimm_bits!(IU6_2, 6);
+    hexagon_M4_mpyri_addi(IU6 as i32, rs, IU6_2 as i32)
+}
+
+/// `Rd32=add(Ru32,mpyi(Rs32,#u6))`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(add, IU6 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_add_mpyi_RRI<const IU6: u32>(ru: i32, rs: i32) -> i32 {
+    static_assert_uimm_bits!(IU6, 6);
+    hexagon_M4_mpyri_addr(ru, rs, IU6 as i32)
+}
+
+/// `Rd32=add(Ru32,mpyi(#u6:2,Rs32))`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(add, IU6_2 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_add_mpyi_RIR<const IU6_2: u32>(ru: i32, rs: i32) -> i32 {
+    static_assert_uimm_bits!(IU6_2, 6);
+    hexagon_M4_mpyri_addr_u2(ru, IU6_2 as i32, rs)
+}
+
+/// `Rd32=add(#u6,mpyi(Rs32,Rt32))`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(0)]
+#[cfg_attr(test, assert_instr(add, IU6 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_add_mpyi_IRR<const IU6: u32>(rs: i32, rt: i32) -> i32 {
+    static_assert_uimm_bits!(IU6, 6);
+    hexagon_M4_mpyrr_addi(IU6 as i32, rs, rt)
+}
+
+/// `Ry32=add(Ru32,mpyi(Ry32,Rs32))`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(add))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_add_mpyi_RRR(ru: i32, ry: i32, rs: i32) -> i32 {
+    hexagon_M4_mpyrr_addr(ru, ry, rs)
+}
+
+/// `Rx32-=mpy(Rs32,Rt32):<<1:sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(mpy))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpynac_RR_s1_sat(rx: i32, rs: i32, rt: i32) -> i32 {
+    hexagon_M4_nac_up_s1_sat(rx, rs, rt)
+}
+
+/// `Rx32|=and(Rs32,Rt32)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(and))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_andor_RR(rx: i32, rs: i32, rt: i32) -> i32 {
+    hexagon_M4_or_and(rx, rs, rt)
+}
+
+/// `Rx32|=and(Rs32,~Rt32)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(and))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_andor_RnR(rx: i32, rs: i32, rt: i32) -> i32 {
+    hexagon_M4_or_andn(rx, rs, rt)
+}
+
+/// `Rx32|=or(Rs32,Rt32)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(or))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_oror_RR(rx: i32, rs: i32, rt: i32) -> i32 {
+    hexagon_M4_or_or(rx, rs, rt)
+}
+
+/// `Rx32|=xor(Rs32,Rt32)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(xor))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_xoror_RR(rx: i32, rs: i32, rt: i32) -> i32 {
+    hexagon_M4_or_xor(rx, rs, rt)
+}
+
+/// `Rdd32=pmpyw(Rs32,Rt32)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(pmpyw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_pmpyw_RR(rs: i32, rt: i32) -> i64 {
+    hexagon_M4_pmpyw(rs, rt)
+}
+
+/// `Rxx32^=pmpyw(Rs32,Rt32)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(pmpyw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_pmpywxacc_RR(rxx: i64, rs: i32, rt: i32) -> i64 {
+    hexagon_M4_pmpyw_acc(rxx, rs, rt)
+}
+
+/// `Rdd32=vpmpyh(Rs32,Rt32)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vpmpyh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vpmpyh_RR(rs: i32, rt: i32) -> i64 {
+    hexagon_M4_vpmpyh(rs, rt)
+}
+
+/// `Rxx32^=vpmpyh(Rs32,Rt32)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vpmpyh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vpmpyhxacc_RR(rxx: i64, rs: i32, rt: i32) -> i64 {
+    hexagon_M4_vpmpyh_acc(rxx, rs, rt)
+}
+
+/// `Rxx32+=vrmpyweh(Rss32,Rtt32)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vrmpyweh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vrmpywehacc_PP(rxx: i64, rss: i64, rtt: i64) -> i64 {
+    hexagon_M4_vrmpyeh_acc_s0(rxx, rss, rtt)
+}
+
+/// `Rxx32+=vrmpyweh(Rss32,Rtt32):<<1`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vrmpyweh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vrmpywehacc_PP_s1(rxx: i64, rss: i64, rtt: i64) -> i64 {
+    hexagon_M4_vrmpyeh_acc_s1(rxx, rss, rtt)
+}
+
+/// `Rdd32=vrmpyweh(Rss32,Rtt32)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vrmpyweh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vrmpyweh_PP(rss: i64, rtt: i64) -> i64 {
+    hexagon_M4_vrmpyeh_s0(rss, rtt)
+}
+
+/// `Rdd32=vrmpyweh(Rss32,Rtt32):<<1`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vrmpyweh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vrmpyweh_PP_s1(rss: i64, rtt: i64) -> i64 {
+    hexagon_M4_vrmpyeh_s1(rss, rtt)
+}
+
+/// `Rxx32+=vrmpywoh(Rss32,Rtt32)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vrmpywoh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vrmpywohacc_PP(rxx: i64, rss: i64, rtt: i64) -> i64 {
+    hexagon_M4_vrmpyoh_acc_s0(rxx, rss, rtt)
+}
+
+/// `Rxx32+=vrmpywoh(Rss32,Rtt32):<<1`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vrmpywoh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vrmpywohacc_PP_s1(rxx: i64, rss: i64, rtt: i64) -> i64 {
+    hexagon_M4_vrmpyoh_acc_s1(rxx, rss, rtt)
+}
+
+/// `Rdd32=vrmpywoh(Rss32,Rtt32)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vrmpywoh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vrmpywoh_PP(rss: i64, rtt: i64) -> i64 {
+    hexagon_M4_vrmpyoh_s0(rss, rtt)
+}
+
+/// `Rdd32=vrmpywoh(Rss32,Rtt32):<<1`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vrmpywoh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vrmpywoh_PP_s1(rss: i64, rtt: i64) -> i64 {
+    hexagon_M4_vrmpyoh_s1(rss, rtt)
+}
+
+/// `Rx32^=and(Rs32,Rt32)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(and))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_andxacc_RR(rx: i32, rs: i32, rt: i32) -> i32 {
+    hexagon_M4_xor_and(rx, rs, rt)
+}
+
+/// `Rx32^=and(Rs32,~Rt32)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(and))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_andxacc_RnR(rx: i32, rs: i32, rt: i32) -> i32 {
+    hexagon_M4_xor_andn(rx, rs, rt)
+}
+
+/// `Rx32^=or(Rs32,Rt32)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(or))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_orxacc_RR(rx: i32, rs: i32, rt: i32) -> i32 {
+    hexagon_M4_xor_or(rx, rs, rt)
+}
+
+/// `Rxx32^=xor(Rss32,Rtt32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(xor))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_xorxacc_PP(rxx: i64, rss: i64, rtt: i64) -> i64 {
+    hexagon_M4_xor_xacc(rxx, rss, rtt)
+}
+
+/// `Rxx32+=vdmpybsu(Rss32,Rtt32):sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vdmpybsu))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vdmpybsuacc_PP_sat(rxx: i64, rss: i64, rtt: i64) -> i64 {
+    hexagon_M5_vdmacbsu(rxx, rss, rtt)
+}
+
+/// `Rdd32=vdmpybsu(Rss32,Rtt32):sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vdmpybsu))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vdmpybsu_PP_sat(rss: i64, rtt: i64) -> i64 {
+    hexagon_M5_vdmpybsu(rss, rtt)
+}
+
+/// `Rxx32+=vmpybsu(Rs32,Rt32)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vmpybsu))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vmpybsuacc_RR(rxx: i64, rs: i32, rt: i32) -> i64 {
+    hexagon_M5_vmacbsu(rxx, rs, rt)
+}
+
+/// `Rxx32+=vmpybu(Rs32,Rt32)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vmpybu))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vmpybuacc_RR(rxx: i64, rs: i32, rt: i32) -> i64 {
+    hexagon_M5_vmacbuu(rxx, rs, rt)
+}
+
+/// `Rdd32=vmpybsu(Rs32,Rt32)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vmpybsu))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vmpybsu_RR(rs: i32, rt: i32) -> i64 {
+    hexagon_M5_vmpybsu(rs, rt)
+}
+
+/// `Rdd32=vmpybu(Rs32,Rt32)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vmpybu))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vmpybu_RR(rs: i32, rt: i32) -> i64 {
+    hexagon_M5_vmpybuu(rs, rt)
+}
+
+/// `Rxx32+=vrmpybsu(Rss32,Rtt32)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vrmpybsu))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vrmpybsuacc_PP(rxx: i64, rss: i64, rtt: i64) -> i64 {
+    hexagon_M5_vrmacbsu(rxx, rss, rtt)
+}
+
+/// `Rxx32+=vrmpybu(Rss32,Rtt32)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vrmpybu))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vrmpybuacc_PP(rxx: i64, rss: i64, rtt: i64) -> i64 {
+    hexagon_M5_vrmacbuu(rxx, rss, rtt)
+}
+
+/// `Rdd32=vrmpybsu(Rss32,Rtt32)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vrmpybsu))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vrmpybsu_PP(rss: i64, rtt: i64) -> i64 {
+    hexagon_M5_vrmpybsu(rss, rtt)
+}
+
+/// `Rdd32=vrmpybu(Rss32,Rtt32)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vrmpybu))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vrmpybu_PP(rss: i64, rtt: i64) -> i64 {
+    hexagon_M5_vrmpybuu(rss, rtt)
+}
+
+/// `Rd32=addasl(Rt32,Rs32,#u3)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(addasl, IU3 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_addasl_RRI<const IU3: u32>(rt: i32, rs: i32) -> i32 {
+    static_assert_uimm_bits!(IU3, 3);
+    hexagon_S2_addasl_rrri(rt, rs, IU3 as i32)
+}
+
+/// `Rdd32=asl(Rss32,#u6)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(asl, IU6 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_asl_PI<const IU6: u32>(rss: i64) -> i64 {
+    static_assert_uimm_bits!(IU6, 6);
+    hexagon_S2_asl_i_p(rss, IU6 as i32)
+}
+
+/// `Rxx32+=asl(Rss32,#u6)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(asl, IU6 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_aslacc_PI<const IU6: u32>(rxx: i64, rss: i64) -> i64 {
+    static_assert_uimm_bits!(IU6, 6);
+    hexagon_S2_asl_i_p_acc(rxx, rss, IU6 as i32)
+}
+
+/// `Rxx32&=asl(Rss32,#u6)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(asl, IU6 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_asland_PI<const IU6: u32>(rxx: i64, rss: i64) -> i64 {
+    static_assert_uimm_bits!(IU6, 6);
+    hexagon_S2_asl_i_p_and(rxx, rss, IU6 as i32)
+}
+
+/// `Rxx32-=asl(Rss32,#u6)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(asl, IU6 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_aslnac_PI<const IU6: u32>(rxx: i64, rss: i64) -> i64 {
+    static_assert_uimm_bits!(IU6, 6);
+    hexagon_S2_asl_i_p_nac(rxx, rss, IU6 as i32)
+}
+
+/// `Rxx32|=asl(Rss32,#u6)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(asl, IU6 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_aslor_PI<const IU6: u32>(rxx: i64, rss: i64) -> i64 {
+    static_assert_uimm_bits!(IU6, 6);
+    hexagon_S2_asl_i_p_or(rxx, rss, IU6 as i32)
+}
+
+/// `Rxx32^=asl(Rss32,#u6)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(asl, IU6 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_aslxacc_PI<const IU6: u32>(rxx: i64, rss: i64) -> i64 {
+    static_assert_uimm_bits!(IU6, 6);
+    hexagon_S2_asl_i_p_xacc(rxx, rss, IU6 as i32)
+}
+
+/// `Rd32=asl(Rs32,#u5)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(asl, IU5 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_asl_RI<const IU5: u32>(rs: i32) -> i32 {
+    static_assert_uimm_bits!(IU5, 5);
+    hexagon_S2_asl_i_r(rs, IU5 as i32)
+}
+
+/// `Rx32+=asl(Rs32,#u5)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(asl, IU5 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_aslacc_RI<const IU5: u32>(rx: i32, rs: i32) -> i32 {
+    static_assert_uimm_bits!(IU5, 5);
+    hexagon_S2_asl_i_r_acc(rx, rs, IU5 as i32)
+}
+
+/// `Rx32&=asl(Rs32,#u5)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(asl, IU5 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_asland_RI<const IU5: u32>(rx: i32, rs: i32) -> i32 {
+    static_assert_uimm_bits!(IU5, 5);
+    hexagon_S2_asl_i_r_and(rx, rs, IU5 as i32)
+}
+
+/// `Rx32-=asl(Rs32,#u5)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(asl, IU5 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_aslnac_RI<const IU5: u32>(rx: i32, rs: i32) -> i32 {
+    static_assert_uimm_bits!(IU5, 5);
+    hexagon_S2_asl_i_r_nac(rx, rs, IU5 as i32)
+}
+
+/// `Rx32|=asl(Rs32,#u5)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(asl, IU5 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_aslor_RI<const IU5: u32>(rx: i32, rs: i32) -> i32 {
+    static_assert_uimm_bits!(IU5, 5);
+    hexagon_S2_asl_i_r_or(rx, rs, IU5 as i32)
+}
+
+/// `Rd32=asl(Rs32,#u5):sat`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(asl, IU5 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_asl_RI_sat<const IU5: u32>(rs: i32) -> i32 {
+    static_assert_uimm_bits!(IU5, 5);
+    hexagon_S2_asl_i_r_sat(rs, IU5 as i32)
+}
+
+/// `Rx32^=asl(Rs32,#u5)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(asl, IU5 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_aslxacc_RI<const IU5: u32>(rx: i32, rs: i32) -> i32 {
+    static_assert_uimm_bits!(IU5, 5);
+    hexagon_S2_asl_i_r_xacc(rx, rs, IU5 as i32)
+}
+
+/// `Rdd32=vaslh(Rss32,#u4)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(vaslh, IU4 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vaslh_PI<const IU4: u32>(rss: i64) -> i64 {
+    static_assert_uimm_bits!(IU4, 4);
+    hexagon_S2_asl_i_vh(rss, IU4 as i32)
+}
+
+/// `Rdd32=vaslw(Rss32,#u5)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(vaslw, IU5 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vaslw_PI<const IU5: u32>(rss: i64) -> i64 {
+    static_assert_uimm_bits!(IU5, 5);
+    hexagon_S2_asl_i_vw(rss, IU5 as i32)
+}
+
+/// `Rdd32=asl(Rss32,Rt32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(asl))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_asl_PR(rss: i64, rt: i32) -> i64 {
+    hexagon_S2_asl_r_p(rss, rt)
+}
+
+/// `Rxx32+=asl(Rss32,Rt32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(asl))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_aslacc_PR(rxx: i64, rss: i64, rt: i32) -> i64 {
+    hexagon_S2_asl_r_p_acc(rxx, rss, rt)
+}
+
+/// `Rxx32&=asl(Rss32,Rt32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(asl))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_asland_PR(rxx: i64, rss: i64, rt: i32) -> i64 {
+    hexagon_S2_asl_r_p_and(rxx, rss, rt)
+}
+
+/// `Rxx32-=asl(Rss32,Rt32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(asl))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_aslnac_PR(rxx: i64, rss: i64, rt: i32) -> i64 {
+    hexagon_S2_asl_r_p_nac(rxx, rss, rt)
+}
+
+/// `Rxx32|=asl(Rss32,Rt32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(asl))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_aslor_PR(rxx: i64, rss: i64, rt: i32) -> i64 {
+    hexagon_S2_asl_r_p_or(rxx, rss, rt)
+}
+
+/// `Rxx32^=asl(Rss32,Rt32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(asl))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_aslxacc_PR(rxx: i64, rss: i64, rt: i32) -> i64 {
+    hexagon_S2_asl_r_p_xor(rxx, rss, rt)
+}
+
+/// `Rd32=asl(Rs32,Rt32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(asl))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_asl_RR(rs: i32, rt: i32) -> i32 {
+    hexagon_S2_asl_r_r(rs, rt)
+}
+
+/// `Rx32+=asl(Rs32,Rt32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(asl))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_aslacc_RR(rx: i32, rs: i32, rt: i32) -> i32 {
+    hexagon_S2_asl_r_r_acc(rx, rs, rt)
+}
+
+/// `Rx32&=asl(Rs32,Rt32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(asl))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_asland_RR(rx: i32, rs: i32, rt: i32) -> i32 {
+    hexagon_S2_asl_r_r_and(rx, rs, rt)
+}
+
+/// `Rx32-=asl(Rs32,Rt32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(asl))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_aslnac_RR(rx: i32, rs: i32, rt: i32) -> i32 {
+    hexagon_S2_asl_r_r_nac(rx, rs, rt)
+}
+
+/// `Rx32|=asl(Rs32,Rt32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(asl))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_aslor_RR(rx: i32, rs: i32, rt: i32) -> i32 {
+    hexagon_S2_asl_r_r_or(rx, rs, rt)
+}
+
+/// `Rd32=asl(Rs32,Rt32):sat`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(asl))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_asl_RR_sat(rs: i32, rt: i32) -> i32 {
+    hexagon_S2_asl_r_r_sat(rs, rt)
+}
+
+/// `Rdd32=vaslh(Rss32,Rt32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vaslh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vaslh_PR(rss: i64, rt: i32) -> i64 {
+    hexagon_S2_asl_r_vh(rss, rt)
+}
+
+/// `Rdd32=vaslw(Rss32,Rt32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vaslw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vaslw_PR(rss: i64, rt: i32) -> i64 {
+    hexagon_S2_asl_r_vw(rss, rt)
+}
+
+/// `Rdd32=asr(Rss32,#u6)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(asr, IU6 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_asr_PI<const IU6: u32>(rss: i64) -> i64 {
+    static_assert_uimm_bits!(IU6, 6);
+    hexagon_S2_asr_i_p(rss, IU6 as i32)
+}
+
+/// `Rxx32+=asr(Rss32,#u6)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(asr, IU6 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_asracc_PI<const IU6: u32>(rxx: i64, rss: i64) -> i64 {
+    static_assert_uimm_bits!(IU6, 6);
+    hexagon_S2_asr_i_p_acc(rxx, rss, IU6 as i32)
+}
+
+/// `Rxx32&=asr(Rss32,#u6)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(asr, IU6 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_asrand_PI<const IU6: u32>(rxx: i64, rss: i64) -> i64 {
+    static_assert_uimm_bits!(IU6, 6);
+    hexagon_S2_asr_i_p_and(rxx, rss, IU6 as i32)
+}
+
+/// `Rxx32-=asr(Rss32,#u6)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(asr, IU6 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_asrnac_PI<const IU6: u32>(rxx: i64, rss: i64) -> i64 {
+    static_assert_uimm_bits!(IU6, 6);
+    hexagon_S2_asr_i_p_nac(rxx, rss, IU6 as i32)
+}
+
+/// `Rxx32|=asr(Rss32,#u6)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(asr, IU6 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_asror_PI<const IU6: u32>(rxx: i64, rss: i64) -> i64 {
+    static_assert_uimm_bits!(IU6, 6);
+    hexagon_S2_asr_i_p_or(rxx, rss, IU6 as i32)
+}
+
+/// `Rdd32=asr(Rss32,#u6):rnd`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(asr, IU6 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_asr_PI_rnd<const IU6: u32>(rss: i64) -> i64 {
+    static_assert_uimm_bits!(IU6, 6);
+    hexagon_S2_asr_i_p_rnd(rss, IU6 as i32)
+}
+
+/// `Rdd32=asrrnd(Rss32,#u6)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT0123
+#[inline(always)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(asrrnd, IU6 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_asrrnd_PI<const IU6: u32>(rss: i64) -> i64 {
+    static_assert_uimm_bits!(IU6, 6);
+    hexagon_S2_asr_i_p_rnd_goodsyntax(rss, IU6 as i32)
+}
+
+/// `Rd32=asr(Rs32,#u5)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(asr, IU5 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_asr_RI<const IU5: u32>(rs: i32) -> i32 {
+    static_assert_uimm_bits!(IU5, 5);
+    hexagon_S2_asr_i_r(rs, IU5 as i32)
+}
+
+/// `Rx32+=asr(Rs32,#u5)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(asr, IU5 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_asracc_RI<const IU5: u32>(rx: i32, rs: i32) -> i32 {
+    static_assert_uimm_bits!(IU5, 5);
+    hexagon_S2_asr_i_r_acc(rx, rs, IU5 as i32)
+}
+
+/// `Rx32&=asr(Rs32,#u5)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(asr, IU5 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_asrand_RI<const IU5: u32>(rx: i32, rs: i32) -> i32 {
+    static_assert_uimm_bits!(IU5, 5);
+    hexagon_S2_asr_i_r_and(rx, rs, IU5 as i32)
+}
+
+/// `Rx32-=asr(Rs32,#u5)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(asr, IU5 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_asrnac_RI<const IU5: u32>(rx: i32, rs: i32) -> i32 {
+    static_assert_uimm_bits!(IU5, 5);
+    hexagon_S2_asr_i_r_nac(rx, rs, IU5 as i32)
+}
+
+/// `Rx32|=asr(Rs32,#u5)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(asr, IU5 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_asror_RI<const IU5: u32>(rx: i32, rs: i32) -> i32 {
+    static_assert_uimm_bits!(IU5, 5);
+    hexagon_S2_asr_i_r_or(rx, rs, IU5 as i32)
+}
+
+/// `Rd32=asr(Rs32,#u5):rnd`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(asr, IU5 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_asr_RI_rnd<const IU5: u32>(rs: i32) -> i32 {
+    static_assert_uimm_bits!(IU5, 5);
+    hexagon_S2_asr_i_r_rnd(rs, IU5 as i32)
+}
+
+/// `Rd32=asrrnd(Rs32,#u5)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT0123
+#[inline(always)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(asrrnd, IU5 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_asrrnd_RI<const IU5: u32>(rs: i32) -> i32 {
+    static_assert_uimm_bits!(IU5, 5);
+    hexagon_S2_asr_i_r_rnd_goodsyntax(rs, IU5 as i32)
+}
+
+/// `Rd32=vasrw(Rss32,#u5)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(vasrw, IU5 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_vasrw_PI<const IU5: u32>(rss: i64) -> i32 {
+    static_assert_uimm_bits!(IU5, 5);
+    hexagon_S2_asr_i_svw_trun(rss, IU5 as i32)
+}
+
+/// `Rdd32=vasrh(Rss32,#u4)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(vasrh, IU4 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vasrh_PI<const IU4: u32>(rss: i64) -> i64 {
+    static_assert_uimm_bits!(IU4, 4);
+    hexagon_S2_asr_i_vh(rss, IU4 as i32)
+}
+
+/// `Rdd32=vasrw(Rss32,#u5)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(vasrw, IU5 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vasrw_PI<const IU5: u32>(rss: i64) -> i64 {
+    static_assert_uimm_bits!(IU5, 5);
+    hexagon_S2_asr_i_vw(rss, IU5 as i32)
+}
+
+/// `Rdd32=asr(Rss32,Rt32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(asr))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_asr_PR(rss: i64, rt: i32) -> i64 {
+    hexagon_S2_asr_r_p(rss, rt)
+}
+
+/// `Rxx32+=asr(Rss32,Rt32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(asr))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_asracc_PR(rxx: i64, rss: i64, rt: i32) -> i64 {
+    hexagon_S2_asr_r_p_acc(rxx, rss, rt)
+}
+
+/// `Rxx32&=asr(Rss32,Rt32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(asr))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_asrand_PR(rxx: i64, rss: i64, rt: i32) -> i64 {
+    hexagon_S2_asr_r_p_and(rxx, rss, rt)
+}
+
+/// `Rxx32-=asr(Rss32,Rt32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(asr))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_asrnac_PR(rxx: i64, rss: i64, rt: i32) -> i64 {
+    hexagon_S2_asr_r_p_nac(rxx, rss, rt)
+}
+
+/// `Rxx32|=asr(Rss32,Rt32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(asr))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_asror_PR(rxx: i64, rss: i64, rt: i32) -> i64 {
+    hexagon_S2_asr_r_p_or(rxx, rss, rt)
+}
+
+/// `Rxx32^=asr(Rss32,Rt32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(asr))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_asrxacc_PR(rxx: i64, rss: i64, rt: i32) -> i64 {
+    hexagon_S2_asr_r_p_xor(rxx, rss, rt)
+}
+
+/// `Rd32=asr(Rs32,Rt32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(asr))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_asr_RR(rs: i32, rt: i32) -> i32 {
+    hexagon_S2_asr_r_r(rs, rt)
+}
+
+/// `Rx32+=asr(Rs32,Rt32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(asr))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_asracc_RR(rx: i32, rs: i32, rt: i32) -> i32 {
+    hexagon_S2_asr_r_r_acc(rx, rs, rt)
+}
+
+/// `Rx32&=asr(Rs32,Rt32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(asr))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_asrand_RR(rx: i32, rs: i32, rt: i32) -> i32 {
+    hexagon_S2_asr_r_r_and(rx, rs, rt)
+}
+
+/// `Rx32-=asr(Rs32,Rt32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(asr))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_asrnac_RR(rx: i32, rs: i32, rt: i32) -> i32 {
+    hexagon_S2_asr_r_r_nac(rx, rs, rt)
+}
+
+/// `Rx32|=asr(Rs32,Rt32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(asr))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_asror_RR(rx: i32, rs: i32, rt: i32) -> i32 {
+    hexagon_S2_asr_r_r_or(rx, rs, rt)
+}
+
+/// `Rd32=asr(Rs32,Rt32):sat`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(asr))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_asr_RR_sat(rs: i32, rt: i32) -> i32 {
+    hexagon_S2_asr_r_r_sat(rs, rt)
+}
+
+/// `Rd32=vasrw(Rss32,Rt32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vasrw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_vasrw_PR(rss: i64, rt: i32) -> i32 {
+    hexagon_S2_asr_r_svw_trun(rss, rt)
+}
+
+/// `Rdd32=vasrh(Rss32,Rt32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vasrh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vasrh_PR(rss: i64, rt: i32) -> i64 {
+    hexagon_S2_asr_r_vh(rss, rt)
+}
+
+/// `Rdd32=vasrw(Rss32,Rt32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vasrw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vasrw_PR(rss: i64, rt: i32) -> i64 {
+    hexagon_S2_asr_r_vw(rss, rt)
+}
+
+/// `Rd32=brev(Rs32)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(brev))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_brev_R(rs: i32) -> i32 {
+    hexagon_S2_brev(rs)
+}
+
+/// `Rdd32=brev(Rss32)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(brev))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_brev_P(rss: i64) -> i64 {
+    hexagon_S2_brevp(rss)
+}
+
+/// `Rd32=cl0(Rs32)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(cl0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_cl0_R(rs: i32) -> i32 {
+    hexagon_S2_cl0(rs)
+}
+
+/// `Rd32=cl0(Rss32)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(cl0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_cl0_P(rss: i64) -> i32 {
+    hexagon_S2_cl0p(rss)
+}
+
+/// `Rd32=cl1(Rs32)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(cl1))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_cl1_R(rs: i32) -> i32 {
+    hexagon_S2_cl1(rs)
+}
+
+/// `Rd32=cl1(Rss32)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(cl1))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_cl1_P(rss: i64) -> i32 {
+    hexagon_S2_cl1p(rss)
+}
+
+/// `Rd32=clb(Rs32)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(clb))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_clb_R(rs: i32) -> i32 {
+    hexagon_S2_clb(rs)
+}
+
+/// `Rd32=normamt(Rs32)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(normamt))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_normamt_R(rs: i32) -> i32 {
+    hexagon_S2_clbnorm(rs)
+}
+
+/// `Rd32=clb(Rss32)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(clb))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_clb_P(rss: i64) -> i32 {
+    hexagon_S2_clbp(rss)
+}
+
+/// `Rd32=clrbit(Rs32,#u5)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(clrbit, IU5 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_clrbit_RI<const IU5: u32>(rs: i32) -> i32 {
+    static_assert_uimm_bits!(IU5, 5);
+    hexagon_S2_clrbit_i(rs, IU5 as i32)
+}
+
+/// `Rd32=clrbit(Rs32,Rt32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(clrbit))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_clrbit_RR(rs: i32, rt: i32) -> i32 {
+    hexagon_S2_clrbit_r(rs, rt)
+}
+
+/// `Rd32=ct0(Rs32)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(ct0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_ct0_R(rs: i32) -> i32 {
+    hexagon_S2_ct0(rs)
+}
+
+/// `Rd32=ct0(Rss32)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(ct0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_ct0_P(rss: i64) -> i32 {
+    hexagon_S2_ct0p(rss)
+}
+
+/// `Rd32=ct1(Rs32)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(ct1))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_ct1_R(rs: i32) -> i32 {
+    hexagon_S2_ct1(rs)
+}
+
+/// `Rd32=ct1(Rss32)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(ct1))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_ct1_P(rss: i64) -> i32 {
+    hexagon_S2_ct1p(rss)
+}
+
+/// `Rdd32=deinterleave(Rss32)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(deinterleave))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_deinterleave_P(rss: i64) -> i64 {
+    hexagon_S2_deinterleave(rss)
+}
+
+/// `Rd32=extractu(Rs32,#u5,#U5)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(1, 2)]
+#[cfg_attr(test, assert_instr(extractu, IU5 = 0, IU5_2 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_extractu_RII<const IU5: u32, const IU5_2: u32>(rs: i32) -> i32 {
+    static_assert_uimm_bits!(IU5, 5);
+    static_assert_uimm_bits!(IU5_2, 5);
+    hexagon_S2_extractu(rs, IU5 as i32, IU5_2 as i32)
+}
+
+/// `Rd32=extractu(Rs32,Rtt32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(extractu))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_extractu_RP(rs: i32, rtt: i64) -> i32 {
+    hexagon_S2_extractu_rp(rs, rtt)
+}
+
+/// `Rdd32=extractu(Rss32,#u6,#U6)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(1, 2)]
+#[cfg_attr(test, assert_instr(extractu, IU6 = 0, IU6_2 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_extractu_PII<const IU6: u32, const IU6_2: u32>(rss: i64) -> i64 {
+    static_assert_uimm_bits!(IU6, 6);
+    static_assert_uimm_bits!(IU6_2, 6);
+    hexagon_S2_extractup(rss, IU6 as i32, IU6_2 as i32)
+}
+
+/// `Rdd32=extractu(Rss32,Rtt32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(extractu))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_extractu_PP(rss: i64, rtt: i64) -> i64 {
+    hexagon_S2_extractup_rp(rss, rtt)
+}
+
+/// `Rx32=insert(Rs32,#u5,#U5)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(2, 3)]
+#[cfg_attr(test, assert_instr(insert, IU5 = 0, IU5_2 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_insert_RII<const IU5: u32, const IU5_2: u32>(rx: i32, rs: i32) -> i32 {
+    static_assert_uimm_bits!(IU5, 5);
+    static_assert_uimm_bits!(IU5_2, 5);
+    hexagon_S2_insert(rx, rs, IU5 as i32, IU5_2 as i32)
+}
+
+/// `Rx32=insert(Rs32,Rtt32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(insert))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_insert_RP(rx: i32, rs: i32, rtt: i64) -> i32 {
+    hexagon_S2_insert_rp(rx, rs, rtt)
+}
+
+/// `Rxx32=insert(Rss32,#u6,#U6)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(2, 3)]
+#[cfg_attr(test, assert_instr(insert, IU6 = 0, IU6_2 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_insert_PII<const IU6: u32, const IU6_2: u32>(rxx: i64, rss: i64) -> i64 {
+    static_assert_uimm_bits!(IU6, 6);
+    static_assert_uimm_bits!(IU6_2, 6);
+    hexagon_S2_insertp(rxx, rss, IU6 as i32, IU6_2 as i32)
+}
+
+/// `Rxx32=insert(Rss32,Rtt32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(insert))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_insert_PP(rxx: i64, rss: i64, rtt: i64) -> i64 {
+    hexagon_S2_insertp_rp(rxx, rss, rtt)
+}
+
+/// `Rdd32=interleave(Rss32)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(interleave))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_interleave_P(rss: i64) -> i64 {
+    hexagon_S2_interleave(rss)
+}
+
+/// `Rdd32=lfs(Rss32,Rtt32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(lfs))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_lfs_PP(rss: i64, rtt: i64) -> i64 {
+    hexagon_S2_lfsp(rss, rtt)
+}
+
+/// `Rdd32=lsl(Rss32,Rt32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(lsl))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_lsl_PR(rss: i64, rt: i32) -> i64 {
+    hexagon_S2_lsl_r_p(rss, rt)
+}
+
+/// `Rxx32+=lsl(Rss32,Rt32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(lsl))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_lslacc_PR(rxx: i64, rss: i64, rt: i32) -> i64 {
+    hexagon_S2_lsl_r_p_acc(rxx, rss, rt)
+}
+
+/// `Rxx32&=lsl(Rss32,Rt32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(lsl))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_lsland_PR(rxx: i64, rss: i64, rt: i32) -> i64 {
+    hexagon_S2_lsl_r_p_and(rxx, rss, rt)
+}
+
+/// `Rxx32-=lsl(Rss32,Rt32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(lsl))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_lslnac_PR(rxx: i64, rss: i64, rt: i32) -> i64 {
+    hexagon_S2_lsl_r_p_nac(rxx, rss, rt)
+}
+
+/// `Rxx32|=lsl(Rss32,Rt32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(lsl))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_lslor_PR(rxx: i64, rss: i64, rt: i32) -> i64 {
+    hexagon_S2_lsl_r_p_or(rxx, rss, rt)
+}
+
+/// `Rxx32^=lsl(Rss32,Rt32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(lsl))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_lslxacc_PR(rxx: i64, rss: i64, rt: i32) -> i64 {
+    hexagon_S2_lsl_r_p_xor(rxx, rss, rt)
+}
+
+/// `Rd32=lsl(Rs32,Rt32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(lsl))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_lsl_RR(rs: i32, rt: i32) -> i32 {
+    hexagon_S2_lsl_r_r(rs, rt)
+}
+
+/// `Rx32+=lsl(Rs32,Rt32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(lsl))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_lslacc_RR(rx: i32, rs: i32, rt: i32) -> i32 {
+    hexagon_S2_lsl_r_r_acc(rx, rs, rt)
+}
+
+/// `Rx32&=lsl(Rs32,Rt32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(lsl))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_lsland_RR(rx: i32, rs: i32, rt: i32) -> i32 {
+    hexagon_S2_lsl_r_r_and(rx, rs, rt)
+}
+
+/// `Rx32-=lsl(Rs32,Rt32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(lsl))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_lslnac_RR(rx: i32, rs: i32, rt: i32) -> i32 {
+    hexagon_S2_lsl_r_r_nac(rx, rs, rt)
+}
+
+/// `Rx32|=lsl(Rs32,Rt32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(lsl))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_lslor_RR(rx: i32, rs: i32, rt: i32) -> i32 {
+    hexagon_S2_lsl_r_r_or(rx, rs, rt)
+}
+
+/// `Rdd32=vlslh(Rss32,Rt32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vlslh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vlslh_PR(rss: i64, rt: i32) -> i64 {
+    hexagon_S2_lsl_r_vh(rss, rt)
+}
+
+/// `Rdd32=vlslw(Rss32,Rt32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vlslw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vlslw_PR(rss: i64, rt: i32) -> i64 {
+    hexagon_S2_lsl_r_vw(rss, rt)
+}
+
+/// `Rdd32=lsr(Rss32,#u6)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(lsr, IU6 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_lsr_PI<const IU6: u32>(rss: i64) -> i64 {
+    static_assert_uimm_bits!(IU6, 6);
+    hexagon_S2_lsr_i_p(rss, IU6 as i32)
+}
+
+/// `Rxx32+=lsr(Rss32,#u6)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(lsr, IU6 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_lsracc_PI<const IU6: u32>(rxx: i64, rss: i64) -> i64 {
+    static_assert_uimm_bits!(IU6, 6);
+    hexagon_S2_lsr_i_p_acc(rxx, rss, IU6 as i32)
+}
+
+/// `Rxx32&=lsr(Rss32,#u6)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(lsr, IU6 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_lsrand_PI<const IU6: u32>(rxx: i64, rss: i64) -> i64 {
+    static_assert_uimm_bits!(IU6, 6);
+    hexagon_S2_lsr_i_p_and(rxx, rss, IU6 as i32)
+}
+
+/// `Rxx32-=lsr(Rss32,#u6)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(lsr, IU6 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_lsrnac_PI<const IU6: u32>(rxx: i64, rss: i64) -> i64 {
+    static_assert_uimm_bits!(IU6, 6);
+    hexagon_S2_lsr_i_p_nac(rxx, rss, IU6 as i32)
+}
+
+/// `Rxx32|=lsr(Rss32,#u6)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(lsr, IU6 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_lsror_PI<const IU6: u32>(rxx: i64, rss: i64) -> i64 {
+    static_assert_uimm_bits!(IU6, 6);
+    hexagon_S2_lsr_i_p_or(rxx, rss, IU6 as i32)
+}
+
+/// `Rxx32^=lsr(Rss32,#u6)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(lsr, IU6 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_lsrxacc_PI<const IU6: u32>(rxx: i64, rss: i64) -> i64 {
+    static_assert_uimm_bits!(IU6, 6);
+    hexagon_S2_lsr_i_p_xacc(rxx, rss, IU6 as i32)
+}
+
+/// `Rd32=lsr(Rs32,#u5)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(lsr, IU5 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_lsr_RI<const IU5: u32>(rs: i32) -> i32 {
+    static_assert_uimm_bits!(IU5, 5);
+    hexagon_S2_lsr_i_r(rs, IU5 as i32)
+}
+
+/// `Rx32+=lsr(Rs32,#u5)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(lsr, IU5 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_lsracc_RI<const IU5: u32>(rx: i32, rs: i32) -> i32 {
+    static_assert_uimm_bits!(IU5, 5);
+    hexagon_S2_lsr_i_r_acc(rx, rs, IU5 as i32)
+}
+
+/// `Rx32&=lsr(Rs32,#u5)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(lsr, IU5 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_lsrand_RI<const IU5: u32>(rx: i32, rs: i32) -> i32 {
+    static_assert_uimm_bits!(IU5, 5);
+    hexagon_S2_lsr_i_r_and(rx, rs, IU5 as i32)
+}
+
+/// `Rx32-=lsr(Rs32,#u5)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(lsr, IU5 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_lsrnac_RI<const IU5: u32>(rx: i32, rs: i32) -> i32 {
+    static_assert_uimm_bits!(IU5, 5);
+    hexagon_S2_lsr_i_r_nac(rx, rs, IU5 as i32)
+}
+
+/// `Rx32|=lsr(Rs32,#u5)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(lsr, IU5 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_lsror_RI<const IU5: u32>(rx: i32, rs: i32) -> i32 {
+    static_assert_uimm_bits!(IU5, 5);
+    hexagon_S2_lsr_i_r_or(rx, rs, IU5 as i32)
+}
+
+/// `Rx32^=lsr(Rs32,#u5)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(lsr, IU5 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_lsrxacc_RI<const IU5: u32>(rx: i32, rs: i32) -> i32 {
+    static_assert_uimm_bits!(IU5, 5);
+    hexagon_S2_lsr_i_r_xacc(rx, rs, IU5 as i32)
+}
+
+/// `Rdd32=vlsrh(Rss32,#u4)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(vlsrh, IU4 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vlsrh_PI<const IU4: u32>(rss: i64) -> i64 {
+    static_assert_uimm_bits!(IU4, 4);
+    hexagon_S2_lsr_i_vh(rss, IU4 as i32)
+}
+
+/// `Rdd32=vlsrw(Rss32,#u5)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(vlsrw, IU5 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vlsrw_PI<const IU5: u32>(rss: i64) -> i64 {
+    static_assert_uimm_bits!(IU5, 5);
+    hexagon_S2_lsr_i_vw(rss, IU5 as i32)
+}
+
+/// `Rdd32=lsr(Rss32,Rt32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(lsr))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_lsr_PR(rss: i64, rt: i32) -> i64 {
+    hexagon_S2_lsr_r_p(rss, rt)
+}
+
+/// `Rxx32+=lsr(Rss32,Rt32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(lsr))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_lsracc_PR(rxx: i64, rss: i64, rt: i32) -> i64 {
+    hexagon_S2_lsr_r_p_acc(rxx, rss, rt)
+}
+
+/// `Rxx32&=lsr(Rss32,Rt32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(lsr))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_lsrand_PR(rxx: i64, rss: i64, rt: i32) -> i64 {
+    hexagon_S2_lsr_r_p_and(rxx, rss, rt)
+}
+
+/// `Rxx32-=lsr(Rss32,Rt32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(lsr))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_lsrnac_PR(rxx: i64, rss: i64, rt: i32) -> i64 {
+    hexagon_S2_lsr_r_p_nac(rxx, rss, rt)
+}
+
+/// `Rxx32|=lsr(Rss32,Rt32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(lsr))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_lsror_PR(rxx: i64, rss: i64, rt: i32) -> i64 {
+    hexagon_S2_lsr_r_p_or(rxx, rss, rt)
+}
+
+/// `Rxx32^=lsr(Rss32,Rt32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(lsr))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_lsrxacc_PR(rxx: i64, rss: i64, rt: i32) -> i64 {
+    hexagon_S2_lsr_r_p_xor(rxx, rss, rt)
+}
+
+/// `Rd32=lsr(Rs32,Rt32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(lsr))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_lsr_RR(rs: i32, rt: i32) -> i32 {
+    hexagon_S2_lsr_r_r(rs, rt)
+}
+
+/// `Rx32+=lsr(Rs32,Rt32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(lsr))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_lsracc_RR(rx: i32, rs: i32, rt: i32) -> i32 {
+    hexagon_S2_lsr_r_r_acc(rx, rs, rt)
+}
+
+/// `Rx32&=lsr(Rs32,Rt32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(lsr))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_lsrand_RR(rx: i32, rs: i32, rt: i32) -> i32 {
+    hexagon_S2_lsr_r_r_and(rx, rs, rt)
+}
+
+/// `Rx32-=lsr(Rs32,Rt32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(lsr))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_lsrnac_RR(rx: i32, rs: i32, rt: i32) -> i32 {
+    hexagon_S2_lsr_r_r_nac(rx, rs, rt)
+}
+
+/// `Rx32|=lsr(Rs32,Rt32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(lsr))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_lsror_RR(rx: i32, rs: i32, rt: i32) -> i32 {
+    hexagon_S2_lsr_r_r_or(rx, rs, rt)
+}
+
+/// `Rdd32=vlsrh(Rss32,Rt32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vlsrh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vlsrh_PR(rss: i64, rt: i32) -> i64 {
+    hexagon_S2_lsr_r_vh(rss, rt)
+}
+
+/// `Rdd32=vlsrw(Rss32,Rt32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vlsrw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vlsrw_PR(rss: i64, rt: i32) -> i64 {
+    hexagon_S2_lsr_r_vw(rss, rt)
+}
+
+/// `Rdd32=packhl(Rs32,Rt32)`
+///
+/// Instruction Type: ALU32_3op
+/// Execution Slots: SLOT0123
+#[inline(always)]
+#[cfg_attr(test, assert_instr(packhl))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_packhl_RR(rs: i32, rt: i32) -> i64 {
+    hexagon_S2_packhl(rs, rt)
+}
+
+/// `Rd32=parity(Rss32,Rtt32)`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(parity))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_parity_PP(rss: i64, rtt: i64) -> i32 {
+    hexagon_S2_parityp(rss, rtt)
+}
+
+/// `Rd32=setbit(Rs32,#u5)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(setbit, IU5 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_setbit_RI<const IU5: u32>(rs: i32) -> i32 {
+    static_assert_uimm_bits!(IU5, 5);
+    hexagon_S2_setbit_i(rs, IU5 as i32)
+}
+
+/// `Rd32=setbit(Rs32,Rt32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(setbit))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_setbit_RR(rs: i32, rt: i32) -> i32 {
+    hexagon_S2_setbit_r(rs, rt)
+}
+
+/// `Rdd32=shuffeb(Rss32,Rtt32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(shuffeb))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_shuffeb_PP(rss: i64, rtt: i64) -> i64 {
+    hexagon_S2_shuffeb(rss, rtt)
+}
+
+/// `Rdd32=shuffeh(Rss32,Rtt32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(shuffeh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_shuffeh_PP(rss: i64, rtt: i64) -> i64 {
+    hexagon_S2_shuffeh(rss, rtt)
+}
+
+/// `Rdd32=shuffob(Rtt32,Rss32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(shuffob))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_shuffob_PP(rtt: i64, rss: i64) -> i64 {
+    hexagon_S2_shuffob(rtt, rss)
+}
+
+/// `Rdd32=shuffoh(Rtt32,Rss32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(shuffoh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_shuffoh_PP(rtt: i64, rss: i64) -> i64 {
+    hexagon_S2_shuffoh(rtt, rss)
+}
+
+/// `Rd32=vsathb(Rs32)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vsathb))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_vsathb_R(rs: i32) -> i32 {
+    hexagon_S2_svsathb(rs)
+}
+
+/// `Rd32=vsathub(Rs32)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vsathub))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_vsathub_R(rs: i32) -> i32 {
+    hexagon_S2_svsathub(rs)
+}
+
+/// `Rx32=tableidxb(Rs32,#u4,#U5)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT0123
+#[inline(always)]
+#[rustc_legacy_const_generics(2, 3)]
+#[cfg_attr(test, assert_instr(tableidxb, IU4 = 0, IU5 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_tableidxb_RII<const IU4: u32, const IU5: u32>(rx: i32, rs: i32) -> i32 {
+    static_assert_uimm_bits!(IU4, 4);
+    static_assert_uimm_bits!(IU5, 5);
+    hexagon_S2_tableidxb_goodsyntax(rx, rs, IU4 as i32, IU5 as i32)
+}
+
+/// `Rx32=tableidxd(Rs32,#u4,#U5)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT0123
+#[inline(always)]
+#[rustc_legacy_const_generics(2, 3)]
+#[cfg_attr(test, assert_instr(tableidxd, IU4 = 0, IU5 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_tableidxd_RII<const IU4: u32, const IU5: u32>(rx: i32, rs: i32) -> i32 {
+    static_assert_uimm_bits!(IU4, 4);
+    static_assert_uimm_bits!(IU5, 5);
+    hexagon_S2_tableidxd_goodsyntax(rx, rs, IU4 as i32, IU5 as i32)
+}
+
+/// `Rx32=tableidxh(Rs32,#u4,#U5)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT0123
+#[inline(always)]
+#[rustc_legacy_const_generics(2, 3)]
+#[cfg_attr(test, assert_instr(tableidxh, IU4 = 0, IU5 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_tableidxh_RII<const IU4: u32, const IU5: u32>(rx: i32, rs: i32) -> i32 {
+    static_assert_uimm_bits!(IU4, 4);
+    static_assert_uimm_bits!(IU5, 5);
+    hexagon_S2_tableidxh_goodsyntax(rx, rs, IU4 as i32, IU5 as i32)
+}
+
+/// `Rx32=tableidxw(Rs32,#u4,#U5)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT0123
+#[inline(always)]
+#[rustc_legacy_const_generics(2, 3)]
+#[cfg_attr(test, assert_instr(tableidxw, IU4 = 0, IU5 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_tableidxw_RII<const IU4: u32, const IU5: u32>(rx: i32, rs: i32) -> i32 {
+    static_assert_uimm_bits!(IU4, 4);
+    static_assert_uimm_bits!(IU5, 5);
+    hexagon_S2_tableidxw_goodsyntax(rx, rs, IU4 as i32, IU5 as i32)
+}
+
+/// `Rd32=togglebit(Rs32,#u5)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(togglebit, IU5 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_togglebit_RI<const IU5: u32>(rs: i32) -> i32 {
+    static_assert_uimm_bits!(IU5, 5);
+    hexagon_S2_togglebit_i(rs, IU5 as i32)
+}
+
+/// `Rd32=togglebit(Rs32,Rt32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(togglebit))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_togglebit_RR(rs: i32, rt: i32) -> i32 {
+    hexagon_S2_togglebit_r(rs, rt)
+}
+
+/// `Pd4=tstbit(Rs32,#u5)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(tstbit, IU5 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_p_tstbit_RI<const IU5: u32>(rs: i32) -> i32 {
+    static_assert_uimm_bits!(IU5, 5);
+    hexagon_S2_tstbit_i(rs, IU5 as i32)
+}
+
+/// `Pd4=tstbit(Rs32,Rt32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(tstbit))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_p_tstbit_RR(rs: i32, rt: i32) -> i32 {
+    hexagon_S2_tstbit_r(rs, rt)
+}
+
+/// `Rdd32=valignb(Rtt32,Rss32,#u3)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(valignb, IU3 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_valignb_PPI<const IU3: u32>(rtt: i64, rss: i64) -> i64 {
+    static_assert_uimm_bits!(IU3, 3);
+    hexagon_S2_valignib(rtt, rss, IU3 as i32)
+}
+
+/// `Rdd32=valignb(Rtt32,Rss32,Pu4)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(valignb))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_valignb_PPp(rtt: i64, rss: i64, pu: i32) -> i64 {
+    hexagon_S2_valignrb(rtt, rss, pu)
+}
+
+/// `Rdd32=vcnegh(Rss32,Rt32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vcnegh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vcnegh_PR(rss: i64, rt: i32) -> i64 {
+    hexagon_S2_vcnegh(rss, rt)
+}
+
+/// `Rdd32=vcrotate(Rss32,Rt32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vcrotate))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vcrotate_PR(rss: i64, rt: i32) -> i64 {
+    hexagon_S2_vcrotate(rss, rt)
+}
+
+/// `Rxx32+=vrcnegh(Rss32,Rt32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vrcnegh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vrcneghacc_PR(rxx: i64, rss: i64, rt: i32) -> i64 {
+    hexagon_S2_vrcnegh(rxx, rss, rt)
+}
+
+/// `Rd32=vrndwh(Rss32)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vrndwh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_vrndwh_P(rss: i64) -> i32 {
+    hexagon_S2_vrndpackwh(rss)
+}
+
+/// `Rd32=vrndwh(Rss32):sat`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vrndwh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_vrndwh_P_sat(rss: i64) -> i32 {
+    hexagon_S2_vrndpackwhs(rss)
+}
+
+/// `Rd32=vsathb(Rss32)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vsathb))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_vsathb_P(rss: i64) -> i32 {
+    hexagon_S2_vsathb(rss)
+}
+
+/// `Rdd32=vsathb(Rss32)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vsathb))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vsathb_P(rss: i64) -> i64 {
+    hexagon_S2_vsathb_nopack(rss)
+}
+
+/// `Rd32=vsathub(Rss32)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vsathub))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_vsathub_P(rss: i64) -> i32 {
+    hexagon_S2_vsathub(rss)
+}
+
+/// `Rdd32=vsathub(Rss32)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vsathub))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vsathub_P(rss: i64) -> i64 {
+    hexagon_S2_vsathub_nopack(rss)
+}
+
+/// `Rd32=vsatwh(Rss32)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vsatwh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_vsatwh_P(rss: i64) -> i32 {
+    hexagon_S2_vsatwh(rss)
+}
+
+/// `Rdd32=vsatwh(Rss32)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vsatwh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vsatwh_P(rss: i64) -> i64 {
+    hexagon_S2_vsatwh_nopack(rss)
+}
+
+/// `Rd32=vsatwuh(Rss32)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vsatwuh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_vsatwuh_P(rss: i64) -> i32 {
+    hexagon_S2_vsatwuh(rss)
+}
+
+/// `Rdd32=vsatwuh(Rss32)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vsatwuh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vsatwuh_P(rss: i64) -> i64 {
+    hexagon_S2_vsatwuh_nopack(rss)
+}
+
+/// `Rd32=vsplatb(Rs32)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vsplatb))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_vsplatb_R(rs: i32) -> i32 {
+    hexagon_S2_vsplatrb(rs)
+}
+
+/// `Rdd32=vsplath(Rs32)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vsplath))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vsplath_R(rs: i32) -> i64 {
+    hexagon_S2_vsplatrh(rs)
+}
+
+/// `Rdd32=vspliceb(Rss32,Rtt32,#u3)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(vspliceb, IU3 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vspliceb_PPI<const IU3: u32>(rss: i64, rtt: i64) -> i64 {
+    static_assert_uimm_bits!(IU3, 3);
+    hexagon_S2_vspliceib(rss, rtt, IU3 as i32)
+}
+
+/// `Rdd32=vspliceb(Rss32,Rtt32,Pu4)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vspliceb))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vspliceb_PPp(rss: i64, rtt: i64, pu: i32) -> i64 {
+    hexagon_S2_vsplicerb(rss, rtt, pu)
+}
+
+/// `Rdd32=vsxtbh(Rs32)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vsxtbh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vsxtbh_R(rs: i32) -> i64 {
+    hexagon_S2_vsxtbh(rs)
+}
+
+/// `Rdd32=vsxthw(Rs32)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vsxthw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vsxthw_R(rs: i32) -> i64 {
+    hexagon_S2_vsxthw(rs)
+}
+
+/// `Rd32=vtrunehb(Rss32)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vtrunehb))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_vtrunehb_P(rss: i64) -> i32 {
+    hexagon_S2_vtrunehb(rss)
+}
+
+/// `Rdd32=vtrunewh(Rss32,Rtt32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vtrunewh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vtrunewh_PP(rss: i64, rtt: i64) -> i64 {
+    hexagon_S2_vtrunewh(rss, rtt)
+}
+
+/// `Rd32=vtrunohb(Rss32)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vtrunohb))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_vtrunohb_P(rss: i64) -> i32 {
+    hexagon_S2_vtrunohb(rss)
+}
+
+/// `Rdd32=vtrunowh(Rss32,Rtt32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vtrunowh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vtrunowh_PP(rss: i64, rtt: i64) -> i64 {
+    hexagon_S2_vtrunowh(rss, rtt)
+}
+
+/// `Rdd32=vzxtbh(Rs32)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vzxtbh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vzxtbh_R(rs: i32) -> i64 {
+    hexagon_S2_vzxtbh(rs)
+}
+
+/// `Rdd32=vzxthw(Rs32)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vzxthw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vzxthw_R(rs: i32) -> i64 {
+    hexagon_S2_vzxthw(rs)
+}
+
+/// `Rd32=add(Rs32,add(Ru32,#s6))`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(add, IS6 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_add_add_RRI<const IS6: i32>(rs: i32, ru: i32) -> i32 {
+    static_assert_simm_bits!(IS6, 6);
+    hexagon_S4_addaddi(rs, ru, IS6)
+}
+
+/// `Rx32=add(#u8,asl(Rx32,#U5))`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(0, 2)]
+#[cfg_attr(test, assert_instr(add, IU8 = 0, IU5 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_add_asl_IRI<const IU8: u32, const IU5: u32>(rx: i32) -> i32 {
+    static_assert_uimm_bits!(IU8, 8);
+    static_assert_uimm_bits!(IU5, 5);
+    hexagon_S4_addi_asl_ri(IU8 as i32, rx, IU5 as i32)
+}
+
+/// `Rx32=add(#u8,lsr(Rx32,#U5))`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(0, 2)]
+#[cfg_attr(test, assert_instr(add, IU8 = 0, IU5 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_add_lsr_IRI<const IU8: u32, const IU5: u32>(rx: i32) -> i32 {
+    static_assert_uimm_bits!(IU8, 8);
+    static_assert_uimm_bits!(IU5, 5);
+    hexagon_S4_addi_lsr_ri(IU8 as i32, rx, IU5 as i32)
+}
+
+/// `Rx32=and(#u8,asl(Rx32,#U5))`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(0, 2)]
+#[cfg_attr(test, assert_instr(and, IU8 = 0, IU5 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_and_asl_IRI<const IU8: u32, const IU5: u32>(rx: i32) -> i32 {
+    static_assert_uimm_bits!(IU8, 8);
+    static_assert_uimm_bits!(IU5, 5);
+    hexagon_S4_andi_asl_ri(IU8 as i32, rx, IU5 as i32)
+}
+
+/// `Rx32=and(#u8,lsr(Rx32,#U5))`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(0, 2)]
+#[cfg_attr(test, assert_instr(and, IU8 = 0, IU5 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_and_lsr_IRI<const IU8: u32, const IU5: u32>(rx: i32) -> i32 {
+    static_assert_uimm_bits!(IU8, 8);
+    static_assert_uimm_bits!(IU5, 5);
+    hexagon_S4_andi_lsr_ri(IU8 as i32, rx, IU5 as i32)
+}
+
+/// `Rd32=add(clb(Rs32),#s6)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(add, IS6 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_add_clb_RI<const IS6: i32>(rs: i32) -> i32 {
+    static_assert_simm_bits!(IS6, 6);
+    hexagon_S4_clbaddi(rs, IS6)
+}
+
+/// `Rd32=add(clb(Rss32),#s6)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(add, IS6 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_add_clb_PI<const IS6: i32>(rss: i64) -> i32 {
+    static_assert_simm_bits!(IS6, 6);
+    hexagon_S4_clbpaddi(rss, IS6)
+}
+
+/// `Rd32=normamt(Rss32)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(normamt))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_normamt_P(rss: i64) -> i32 {
+    hexagon_S4_clbpnorm(rss)
+}
+
+/// `Rd32=extract(Rs32,#u5,#U5)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(1, 2)]
+#[cfg_attr(test, assert_instr(extract, IU5 = 0, IU5_2 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_extract_RII<const IU5: u32, const IU5_2: u32>(rs: i32) -> i32 {
+    static_assert_uimm_bits!(IU5, 5);
+    static_assert_uimm_bits!(IU5_2, 5);
+    hexagon_S4_extract(rs, IU5 as i32, IU5_2 as i32)
+}
+
+/// `Rd32=extract(Rs32,Rtt32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(extract))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_extract_RP(rs: i32, rtt: i64) -> i32 {
+    hexagon_S4_extract_rp(rs, rtt)
+}
+
+/// `Rdd32=extract(Rss32,#u6,#U6)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(1, 2)]
+#[cfg_attr(test, assert_instr(extract, IU6 = 0, IU6_2 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_extract_PII<const IU6: u32, const IU6_2: u32>(rss: i64) -> i64 {
+    static_assert_uimm_bits!(IU6, 6);
+    static_assert_uimm_bits!(IU6_2, 6);
+    hexagon_S4_extractp(rss, IU6 as i32, IU6_2 as i32)
+}
+
+/// `Rdd32=extract(Rss32,Rtt32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(extract))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_extract_PP(rss: i64, rtt: i64) -> i64 {
+    hexagon_S4_extractp_rp(rss, rtt)
+}
+
+/// `Rd32=lsl(#s6,Rt32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(0)]
+#[cfg_attr(test, assert_instr(lsl, IS6 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_lsl_IR<const IS6: i32>(rt: i32) -> i32 {
+    static_assert_simm_bits!(IS6, 6);
+    hexagon_S4_lsli(IS6, rt)
+}
+
+/// `Pd4=!tstbit(Rs32,#u5)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(1)]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_p_not_tstbit_RI<const IU5: u32>(rs: i32) -> i32 {
+    static_assert_uimm_bits!(IU5, 5);
+    hexagon_S4_ntstbit_i(rs, IU5 as i32)
+}
+
+/// `Pd4=!tstbit(Rs32,Rt32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_p_not_tstbit_RR(rs: i32, rt: i32) -> i32 {
+    hexagon_S4_ntstbit_r(rs, rt)
+}
+
+/// `Rx32|=and(Rs32,#s10)`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(and, IS10 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_andor_RI<const IS10: i32>(rx: i32, rs: i32) -> i32 {
+    static_assert_simm_bits!(IS10, 10);
+    hexagon_S4_or_andi(rx, rs, IS10)
+}
+
+/// `Rx32=or(Ru32,and(Rx32,#s10))`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(or, IS10 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_or_and_RRI<const IS10: i32>(ru: i32, rx: i32) -> i32 {
+    static_assert_simm_bits!(IS10, 10);
+    hexagon_S4_or_andix(ru, rx, IS10)
+}
+
+/// `Rx32|=or(Rs32,#s10)`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(or, IS10 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_oror_RI<const IS10: i32>(rx: i32, rs: i32) -> i32 {
+    static_assert_simm_bits!(IS10, 10);
+    hexagon_S4_or_ori(rx, rs, IS10)
+}
+
+/// `Rx32=or(#u8,asl(Rx32,#U5))`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(0, 2)]
+#[cfg_attr(test, assert_instr(or, IU8 = 0, IU5 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_or_asl_IRI<const IU8: u32, const IU5: u32>(rx: i32) -> i32 {
+    static_assert_uimm_bits!(IU8, 8);
+    static_assert_uimm_bits!(IU5, 5);
+    hexagon_S4_ori_asl_ri(IU8 as i32, rx, IU5 as i32)
+}
+
+/// `Rx32=or(#u8,lsr(Rx32,#U5))`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(0, 2)]
+#[cfg_attr(test, assert_instr(or, IU8 = 0, IU5 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_or_lsr_IRI<const IU8: u32, const IU5: u32>(rx: i32) -> i32 {
+    static_assert_uimm_bits!(IU8, 8);
+    static_assert_uimm_bits!(IU5, 5);
+    hexagon_S4_ori_lsr_ri(IU8 as i32, rx, IU5 as i32)
+}
+
+/// `Rd32=parity(Rs32,Rt32)`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(parity))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_parity_RR(rs: i32, rt: i32) -> i32 {
+    hexagon_S4_parity(rs, rt)
+}
+
+/// `Rd32=add(Rs32,sub(#s6,Ru32))`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(add, IS6 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_add_sub_RIR<const IS6: i32>(rs: i32, ru: i32) -> i32 {
+    static_assert_simm_bits!(IS6, 6);
+    hexagon_S4_subaddi(rs, IS6, ru)
+}
+
+/// `Rx32=sub(#u8,asl(Rx32,#U5))`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(0, 2)]
+#[cfg_attr(test, assert_instr(sub, IU8 = 0, IU5 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_sub_asl_IRI<const IU8: u32, const IU5: u32>(rx: i32) -> i32 {
+    static_assert_uimm_bits!(IU8, 8);
+    static_assert_uimm_bits!(IU5, 5);
+    hexagon_S4_subi_asl_ri(IU8 as i32, rx, IU5 as i32)
+}
+
+/// `Rx32=sub(#u8,lsr(Rx32,#U5))`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(0, 2)]
+#[cfg_attr(test, assert_instr(sub, IU8 = 0, IU5 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_sub_lsr_IRI<const IU8: u32, const IU5: u32>(rx: i32) -> i32 {
+    static_assert_uimm_bits!(IU8, 8);
+    static_assert_uimm_bits!(IU5, 5);
+    hexagon_S4_subi_lsr_ri(IU8 as i32, rx, IU5 as i32)
+}
+
+/// `Rdd32=vrcrotate(Rss32,Rt32,#u2)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(vrcrotate, IU2 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vrcrotate_PRI<const IU2: u32>(rss: i64, rt: i32) -> i64 {
+    static_assert_uimm_bits!(IU2, 2);
+    hexagon_S4_vrcrotate(rss, rt, IU2 as i32)
+}
+
+/// `Rxx32+=vrcrotate(Rss32,Rt32,#u2)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(3)]
+#[cfg_attr(test, assert_instr(vrcrotate, IU2 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vrcrotateacc_PRI<const IU2: u32>(rxx: i64, rss: i64, rt: i32) -> i64 {
+    static_assert_uimm_bits!(IU2, 2);
+    hexagon_S4_vrcrotate_acc(rxx, rss, rt, IU2 as i32)
+}
+
+/// `Rdd32=vxaddsubh(Rss32,Rtt32):sat`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vxaddsubh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vxaddsubh_PP_sat(rss: i64, rtt: i64) -> i64 {
+    hexagon_S4_vxaddsubh(rss, rtt)
+}
+
+/// `Rdd32=vxaddsubh(Rss32,Rtt32):rnd:>>1:sat`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vxaddsubh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vxaddsubh_PP_rnd_rs1_sat(rss: i64, rtt: i64) -> i64 {
+    hexagon_S4_vxaddsubhr(rss, rtt)
+}
+
+/// `Rdd32=vxaddsubw(Rss32,Rtt32):sat`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vxaddsubw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vxaddsubw_PP_sat(rss: i64, rtt: i64) -> i64 {
+    hexagon_S4_vxaddsubw(rss, rtt)
+}
+
+/// `Rdd32=vxsubaddh(Rss32,Rtt32):sat`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vxsubaddh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vxsubaddh_PP_sat(rss: i64, rtt: i64) -> i64 {
+    hexagon_S4_vxsubaddh(rss, rtt)
+}
+
+/// `Rdd32=vxsubaddh(Rss32,Rtt32):rnd:>>1:sat`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vxsubaddh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vxsubaddh_PP_rnd_rs1_sat(rss: i64, rtt: i64) -> i64 {
+    hexagon_S4_vxsubaddhr(rss, rtt)
+}
+
+/// `Rdd32=vxsubaddw(Rss32,Rtt32):sat`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(vxsubaddw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vxsubaddw_PP_sat(rss: i64, rtt: i64) -> i64 {
+    hexagon_S4_vxsubaddw(rss, rtt)
+}
+
+/// `Rd32=vasrhub(Rss32,#u4):rnd:sat`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT0123
+#[inline(always)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(vasrhub, IU4 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_vasrhub_PI_rnd_sat<const IU4: u32>(rss: i64) -> i32 {
+    static_assert_uimm_bits!(IU4, 4);
+    hexagon_S5_asrhub_rnd_sat_goodsyntax(rss, IU4 as i32)
+}
+
+/// `Rd32=vasrhub(Rss32,#u4):sat`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(vasrhub, IU4 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_vasrhub_PI_sat<const IU4: u32>(rss: i64) -> i32 {
+    static_assert_uimm_bits!(IU4, 4);
+    hexagon_S5_asrhub_sat(rss, IU4 as i32)
+}
+
+/// `Rd32=popcount(Rss32)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+#[inline(always)]
+#[cfg_attr(test, assert_instr(popcount))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_popcount_P(rss: i64) -> i32 {
+    hexagon_S5_popcountp(rss)
+}
+
+/// `Rdd32=vasrh(Rss32,#u4):rnd`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT0123
+#[inline(always)]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(vasrh, IU4 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vasrh_PI_rnd<const IU4: u32>(rss: i64) -> i64 {
+    static_assert_uimm_bits!(IU4, 4);
+    hexagon_S5_vasrhrnd_goodsyntax(rss, IU4 as i32)
+}
+
+/// `dccleana(Rs32)`
+///
+/// Instruction Type: ST
+/// Execution Slots: SLOT0
+#[inline(always)]
+#[cfg_attr(test, assert_instr(dccleana))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_dccleana_A(rs: i32) {
+    hexagon_Y2_dccleana(rs)
+}
+
+/// `dccleaninva(Rs32)`
+///
+/// Instruction Type: ST
+/// Execution Slots: SLOT0
+#[inline(always)]
+#[cfg_attr(test, assert_instr(dccleaninva))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_dccleaninva_A(rs: i32) {
+    hexagon_Y2_dccleaninva(rs)
+}
+
+/// `dcfetch(Rs32)`
+///
+/// Instruction Type: MAPPING
+/// Execution Slots: SLOT0123
+#[inline(always)]
+#[cfg_attr(test, assert_instr(dcfetch))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_dcfetch_A(rs: i32) {
+    hexagon_Y2_dcfetch(rs)
+}
+
+/// `dcinva(Rs32)`
+///
+/// Instruction Type: ST
+/// Execution Slots: SLOT0
+#[inline(always)]
+#[cfg_attr(test, assert_instr(dcinva))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_dcinva_A(rs: i32) {
+    hexagon_Y2_dcinva(rs)
+}
+
+/// `dczeroa(Rs32)`
+///
+/// Instruction Type: ST
+/// Execution Slots: SLOT0
+#[inline(always)]
+#[cfg_attr(test, assert_instr(dczeroa))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_dczeroa_A(rs: i32) {
+    hexagon_Y2_dczeroa(rs)
+}
+
+/// `l2fetch(Rs32,Rt32)`
+///
+/// Instruction Type: ST
+/// Execution Slots: SLOT0
+#[inline(always)]
+#[cfg_attr(test, assert_instr(l2fetch))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_l2fetch_AR(rs: i32, rt: i32) {
+    hexagon_Y4_l2fetch(rs, rt)
+}
+
+/// `l2fetch(Rs32,Rtt32)`
+///
+/// Instruction Type: ST
+/// Execution Slots: SLOT0
+#[inline(always)]
+#[cfg_attr(test, assert_instr(l2fetch))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_l2fetch_AP(rs: i32, rtt: i64) {
+    hexagon_Y5_l2fetch(rs, rtt)
+}
+
+/// `Rdd32=rol(Rss32,#u6)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+/// Requires: V60
+#[inline(always)]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "v60"))]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(rol, IU6 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_rol_PI<const IU6: u32>(rss: i64) -> i64 {
+    static_assert_uimm_bits!(IU6, 6);
+    hexagon_S6_rol_i_p(rss, IU6 as i32)
+}
+
+/// `Rxx32+=rol(Rss32,#u6)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+/// Requires: V60
+#[inline(always)]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "v60"))]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(rol, IU6 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_rolacc_PI<const IU6: u32>(rxx: i64, rss: i64) -> i64 {
+    static_assert_uimm_bits!(IU6, 6);
+    hexagon_S6_rol_i_p_acc(rxx, rss, IU6 as i32)
+}
+
+/// `Rxx32&=rol(Rss32,#u6)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+/// Requires: V60
+#[inline(always)]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "v60"))]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(rol, IU6 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_roland_PI<const IU6: u32>(rxx: i64, rss: i64) -> i64 {
+    static_assert_uimm_bits!(IU6, 6);
+    hexagon_S6_rol_i_p_and(rxx, rss, IU6 as i32)
+}
+
+/// `Rxx32-=rol(Rss32,#u6)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+/// Requires: V60
+#[inline(always)]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "v60"))]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(rol, IU6 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_rolnac_PI<const IU6: u32>(rxx: i64, rss: i64) -> i64 {
+    static_assert_uimm_bits!(IU6, 6);
+    hexagon_S6_rol_i_p_nac(rxx, rss, IU6 as i32)
+}
+
+/// `Rxx32|=rol(Rss32,#u6)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+/// Requires: V60
+#[inline(always)]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "v60"))]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(rol, IU6 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_rolor_PI<const IU6: u32>(rxx: i64, rss: i64) -> i64 {
+    static_assert_uimm_bits!(IU6, 6);
+    hexagon_S6_rol_i_p_or(rxx, rss, IU6 as i32)
+}
+
+/// `Rxx32^=rol(Rss32,#u6)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+/// Requires: V60
+#[inline(always)]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "v60"))]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(rol, IU6 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_rolxacc_PI<const IU6: u32>(rxx: i64, rss: i64) -> i64 {
+    static_assert_uimm_bits!(IU6, 6);
+    hexagon_S6_rol_i_p_xacc(rxx, rss, IU6 as i32)
+}
+
+/// `Rd32=rol(Rs32,#u5)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+/// Requires: V60
+#[inline(always)]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "v60"))]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(rol, IU5 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_rol_RI<const IU5: u32>(rs: i32) -> i32 {
+    static_assert_uimm_bits!(IU5, 5);
+    hexagon_S6_rol_i_r(rs, IU5 as i32)
+}
+
+/// `Rx32+=rol(Rs32,#u5)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+/// Requires: V60
+#[inline(always)]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "v60"))]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(rol, IU5 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_rolacc_RI<const IU5: u32>(rx: i32, rs: i32) -> i32 {
+    static_assert_uimm_bits!(IU5, 5);
+    hexagon_S6_rol_i_r_acc(rx, rs, IU5 as i32)
+}
+
+/// `Rx32&=rol(Rs32,#u5)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+/// Requires: V60
+#[inline(always)]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "v60"))]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(rol, IU5 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_roland_RI<const IU5: u32>(rx: i32, rs: i32) -> i32 {
+    static_assert_uimm_bits!(IU5, 5);
+    hexagon_S6_rol_i_r_and(rx, rs, IU5 as i32)
+}
+
+/// `Rx32-=rol(Rs32,#u5)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+/// Requires: V60
+#[inline(always)]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "v60"))]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(rol, IU5 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_rolnac_RI<const IU5: u32>(rx: i32, rs: i32) -> i32 {
+    static_assert_uimm_bits!(IU5, 5);
+    hexagon_S6_rol_i_r_nac(rx, rs, IU5 as i32)
+}
+
+/// `Rx32|=rol(Rs32,#u5)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+/// Requires: V60
+#[inline(always)]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "v60"))]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(rol, IU5 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_rolor_RI<const IU5: u32>(rx: i32, rs: i32) -> i32 {
+    static_assert_uimm_bits!(IU5, 5);
+    hexagon_S6_rol_i_r_or(rx, rs, IU5 as i32)
+}
+
+/// `Rx32^=rol(Rs32,#u5)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+/// Requires: V60
+#[inline(always)]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "v60"))]
+#[rustc_legacy_const_generics(2)]
+#[cfg_attr(test, assert_instr(rol, IU5 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_rolxacc_RI<const IU5: u32>(rx: i32, rs: i32) -> i32 {
+    static_assert_uimm_bits!(IU5, 5);
+    hexagon_S6_rol_i_r_xacc(rx, rs, IU5 as i32)
+}
+
+/// `Rdd32=vabsdiffb(Rtt32,Rss32)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+/// Requires: V62
+#[inline(always)]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "v62"))]
+#[cfg_attr(test, assert_instr(vabsdiffb))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vabsdiffb_PP(rtt: i64, rss: i64) -> i64 {
+    hexagon_M6_vabsdiffb(rtt, rss)
+}
+
+/// `Rdd32=vabsdiffub(Rtt32,Rss32)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+/// Requires: V62
+#[inline(always)]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "v62"))]
+#[cfg_attr(test, assert_instr(vabsdiffub))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vabsdiffub_PP(rtt: i64, rss: i64) -> i64 {
+    hexagon_M6_vabsdiffub(rtt, rss)
+}
+
+/// `Rdd32=vsplatb(Rs32)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+/// Requires: V62
+#[inline(always)]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "v62"))]
+#[cfg_attr(test, assert_instr(vsplatb))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vsplatb_R(rs: i32) -> i64 {
+    hexagon_S6_vsplatrbp(rs)
+}
+
+/// `Rdd32=vtrunehb(Rss32,Rtt32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+/// Requires: V62
+#[inline(always)]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "v62"))]
+#[cfg_attr(test, assert_instr(vtrunehb))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vtrunehb_PP(rss: i64, rtt: i64) -> i64 {
+    hexagon_S6_vtrunehb_ppp(rss, rtt)
+}
+
+/// `Rdd32=vtrunohb(Rss32,Rtt32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+/// Requires: V62
+#[inline(always)]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "v62"))]
+#[cfg_attr(test, assert_instr(vtrunohb))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vtrunohb_PP(rss: i64, rtt: i64) -> i64 {
+    hexagon_S6_vtrunohb_ppp(rss, rtt)
+}
+
+/// `Pd4=!any8(vcmpb.eq(Rss32,Rtt32))`
+///
+/// Instruction Type: ALU64
+/// Execution Slots: SLOT23
+/// Requires: V65
+#[inline(always)]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "v65"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_p_not_any8_vcmpb_eq_PP(rss: i64, rtt: i64) -> i32 {
+    hexagon_A6_vcmpbeq_notany(rss, rtt)
+}
+
+/// `Rdd32=dfadd(Rss32,Rtt32)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+/// Requires: V66
+#[inline(always)]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "v66"))]
+#[cfg_attr(test, assert_instr(dfadd))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_dfadd_PP(rss: f64, rtt: f64) -> f64 {
+    hexagon_F2_dfadd(rss, rtt)
+}
+
+/// `Rdd32=dfsub(Rss32,Rtt32)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+/// Requires: V66
+#[inline(always)]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "v66"))]
+#[cfg_attr(test, assert_instr(dfsub))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_dfsub_PP(rss: f64, rtt: f64) -> f64 {
+    hexagon_F2_dfsub(rss, rtt)
+}
+
+/// `Rx32-=mpyi(Rs32,Rt32)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+/// Requires: V66
+#[inline(always)]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "v66"))]
+#[cfg_attr(test, assert_instr(mpyi))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mpyinac_RR(rx: i32, rs: i32, rt: i32) -> i32 {
+    hexagon_M2_mnaci(rx, rs, rt)
+}
+
+/// `Rd32=mask(#u5,#U5)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+/// Requires: V66
+#[inline(always)]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "v66"))]
+#[rustc_legacy_const_generics(0, 1)]
+#[cfg_attr(test, assert_instr(mask, IU5 = 0, IU5_2 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_mask_II<const IU5: u32, const IU5_2: u32>() -> i32 {
+    static_assert_uimm_bits!(IU5, 5);
+    static_assert_uimm_bits!(IU5_2, 5);
+    hexagon_S2_mask(IU5 as i32, IU5_2 as i32)
+}
+
+/// `Rd32=clip(Rs32,#u5)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+/// Requires: V67, Audio
+#[inline(always)]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "v67,audio"))]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(clip, IU5 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_clip_RI<const IU5: u32>(rs: i32) -> i32 {
+    static_assert_uimm_bits!(IU5, 5);
+    hexagon_A7_clip(rs, IU5 as i32)
+}
+
+/// `Rdd32=cround(Rss32,#u6)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+/// Requires: V67, Audio
+#[inline(always)]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "v67,audio"))]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(cround, IU6 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_cround_PI<const IU6: u32>(rss: i64) -> i64 {
+    static_assert_uimm_bits!(IU6, 6);
+    hexagon_A7_croundd_ri(rss, IU6 as i32)
+}
+
+/// `Rdd32=cround(Rss32,Rt32)`
+///
+/// Instruction Type: S_3op
+/// Execution Slots: SLOT23
+/// Requires: V67, Audio
+#[inline(always)]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "v67,audio"))]
+#[cfg_attr(test, assert_instr(cround))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_cround_PR(rss: i64, rt: i32) -> i64 {
+    hexagon_A7_croundd_rr(rss, rt)
+}
+
+/// `Rdd32=vclip(Rss32,#u5)`
+///
+/// Instruction Type: S_2op
+/// Execution Slots: SLOT23
+/// Requires: V67, Audio
+#[inline(always)]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "v67,audio"))]
+#[rustc_legacy_const_generics(1)]
+#[cfg_attr(test, assert_instr(vclip, IU5 = 0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vclip_PI<const IU5: u32>(rss: i64) -> i64 {
+    static_assert_uimm_bits!(IU5, 5);
+    hexagon_A7_vclip(rss, IU5 as i32)
+}
+
+/// `Rdd32=dfmax(Rss32,Rtt32)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+/// Requires: V67
+#[inline(always)]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "v67"))]
+#[cfg_attr(test, assert_instr(dfmax))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_dfmax_PP(rss: f64, rtt: f64) -> f64 {
+    hexagon_F2_dfmax(rss, rtt)
+}
+
+/// `Rdd32=dfmin(Rss32,Rtt32)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+/// Requires: V67
+#[inline(always)]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "v67"))]
+#[cfg_attr(test, assert_instr(dfmin))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_dfmin_PP(rss: f64, rtt: f64) -> f64 {
+    hexagon_F2_dfmin(rss, rtt)
+}
+
+/// `Rdd32=dfmpyfix(Rss32,Rtt32)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+/// Requires: V67
+#[inline(always)]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "v67"))]
+#[cfg_attr(test, assert_instr(dfmpyfix))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_dfmpyfix_PP(rss: f64, rtt: f64) -> f64 {
+    hexagon_F2_dfmpyfix(rss, rtt)
+}
+
+/// `Rxx32+=dfmpyhh(Rss32,Rtt32)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+/// Requires: V67
+#[inline(always)]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "v67"))]
+#[cfg_attr(test, assert_instr(dfmpyhh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_dfmpyhhacc_PP(rxx: f64, rss: f64, rtt: f64) -> f64 {
+    hexagon_F2_dfmpyhh(rxx, rss, rtt)
+}
+
+/// `Rxx32+=dfmpylh(Rss32,Rtt32)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+/// Requires: V67
+#[inline(always)]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "v67"))]
+#[cfg_attr(test, assert_instr(dfmpylh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_dfmpylhacc_PP(rxx: f64, rss: f64, rtt: f64) -> f64 {
+    hexagon_F2_dfmpylh(rxx, rss, rtt)
+}
+
+/// `Rdd32=dfmpyll(Rss32,Rtt32)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT23
+/// Requires: V67
+#[inline(always)]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "v67"))]
+#[cfg_attr(test, assert_instr(dfmpyll))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_dfmpyll_PP(rss: f64, rtt: f64) -> f64 {
+    hexagon_F2_dfmpyll(rss, rtt)
+}
+
+/// `Rdd32=cmpyiw(Rss32,Rtt32)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT3
+/// Requires: V67, Audio
+#[inline(always)]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "v67,audio"))]
+#[cfg_attr(test, assert_instr(cmpyiw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_cmpyiw_PP(rss: i64, rtt: i64) -> i64 {
+    hexagon_M7_dcmpyiw(rss, rtt)
+}
+
+/// `Rxx32+=cmpyiw(Rss32,Rtt32)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT3
+/// Requires: V67, Audio
+#[inline(always)]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "v67,audio"))]
+#[cfg_attr(test, assert_instr(cmpyiw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_cmpyiwacc_PP(rxx: i64, rss: i64, rtt: i64) -> i64 {
+    hexagon_M7_dcmpyiw_acc(rxx, rss, rtt)
+}
+
+/// `Rdd32=cmpyiw(Rss32,Rtt32*)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT3
+/// Requires: V67, Audio
+#[inline(always)]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "v67,audio"))]
+#[cfg_attr(test, assert_instr(cmpyiw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_cmpyiw_PP_conj(rss: i64, rtt: i64) -> i64 {
+    hexagon_M7_dcmpyiwc(rss, rtt)
+}
+
+/// `Rxx32+=cmpyiw(Rss32,Rtt32*)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT3
+/// Requires: V67, Audio
+#[inline(always)]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "v67,audio"))]
+#[cfg_attr(test, assert_instr(cmpyiw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_cmpyiwacc_PP_conj(rxx: i64, rss: i64, rtt: i64) -> i64 {
+    hexagon_M7_dcmpyiwc_acc(rxx, rss, rtt)
+}
+
+/// `Rdd32=cmpyrw(Rss32,Rtt32)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT3
+/// Requires: V67, Audio
+#[inline(always)]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "v67,audio"))]
+#[cfg_attr(test, assert_instr(cmpyrw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_cmpyrw_PP(rss: i64, rtt: i64) -> i64 {
+    hexagon_M7_dcmpyrw(rss, rtt)
+}
+
+/// `Rxx32+=cmpyrw(Rss32,Rtt32)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT3
+/// Requires: V67, Audio
+#[inline(always)]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "v67,audio"))]
+#[cfg_attr(test, assert_instr(cmpyrw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_cmpyrwacc_PP(rxx: i64, rss: i64, rtt: i64) -> i64 {
+    hexagon_M7_dcmpyrw_acc(rxx, rss, rtt)
+}
+
+/// `Rdd32=cmpyrw(Rss32,Rtt32*)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT3
+/// Requires: V67, Audio
+#[inline(always)]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "v67,audio"))]
+#[cfg_attr(test, assert_instr(cmpyrw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_cmpyrw_PP_conj(rss: i64, rtt: i64) -> i64 {
+    hexagon_M7_dcmpyrwc(rss, rtt)
+}
+
+/// `Rxx32+=cmpyrw(Rss32,Rtt32*)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT3
+/// Requires: V67, Audio
+#[inline(always)]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "v67,audio"))]
+#[cfg_attr(test, assert_instr(cmpyrw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_cmpyrwacc_PP_conj(rxx: i64, rss: i64, rtt: i64) -> i64 {
+    hexagon_M7_dcmpyrwc_acc(rxx, rss, rtt)
+}
+
+/// `Rdd32=vdmpyw(Rss32,Rtt32)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT3
+/// Requires: V67, Audio
+#[inline(always)]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "v67,audio"))]
+#[cfg_attr(test, assert_instr(vdmpyw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vdmpyw_PP(rss: i64, rtt: i64) -> i64 {
+    hexagon_M7_vdmpy(rss, rtt)
+}
+
+/// `Rxx32+=vdmpyw(Rss32,Rtt32)`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT3
+/// Requires: V67, Audio
+#[inline(always)]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "v67,audio"))]
+#[cfg_attr(test, assert_instr(vdmpyw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_P_vdmpywacc_PP(rxx: i64, rss: i64, rtt: i64) -> i64 {
+    hexagon_M7_vdmpy_acc(rxx, rss, rtt)
+}
+
+/// `Rd32=cmpyiw(Rss32,Rtt32):<<1:sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT3
+/// Requires: V67, Audio
+#[inline(always)]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "v67,audio"))]
+#[cfg_attr(test, assert_instr(cmpyiw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_cmpyiw_PP_s1_sat(rss: i64, rtt: i64) -> i32 {
+    hexagon_M7_wcmpyiw(rss, rtt)
+}
+
+/// `Rd32=cmpyiw(Rss32,Rtt32):<<1:rnd:sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT3
+/// Requires: V67, Audio
+#[inline(always)]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "v67,audio"))]
+#[cfg_attr(test, assert_instr(cmpyiw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_cmpyiw_PP_s1_rnd_sat(rss: i64, rtt: i64) -> i32 {
+    hexagon_M7_wcmpyiw_rnd(rss, rtt)
+}
+
+/// `Rd32=cmpyiw(Rss32,Rtt32*):<<1:sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT3
+/// Requires: V67, Audio
+#[inline(always)]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "v67,audio"))]
+#[cfg_attr(test, assert_instr(cmpyiw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_cmpyiw_PP_conj_s1_sat(rss: i64, rtt: i64) -> i32 {
+    hexagon_M7_wcmpyiwc(rss, rtt)
+}
+
+/// `Rd32=cmpyiw(Rss32,Rtt32*):<<1:rnd:sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT3
+/// Requires: V67, Audio
+#[inline(always)]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "v67,audio"))]
+#[cfg_attr(test, assert_instr(cmpyiw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_cmpyiw_PP_conj_s1_rnd_sat(rss: i64, rtt: i64) -> i32 {
+    hexagon_M7_wcmpyiwc_rnd(rss, rtt)
+}
+
+/// `Rd32=cmpyrw(Rss32,Rtt32):<<1:sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT3
+/// Requires: V67, Audio
+#[inline(always)]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "v67,audio"))]
+#[cfg_attr(test, assert_instr(cmpyrw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_cmpyrw_PP_s1_sat(rss: i64, rtt: i64) -> i32 {
+    hexagon_M7_wcmpyrw(rss, rtt)
+}
+
+/// `Rd32=cmpyrw(Rss32,Rtt32):<<1:rnd:sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT3
+/// Requires: V67, Audio
+#[inline(always)]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "v67,audio"))]
+#[cfg_attr(test, assert_instr(cmpyrw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_cmpyrw_PP_s1_rnd_sat(rss: i64, rtt: i64) -> i32 {
+    hexagon_M7_wcmpyrw_rnd(rss, rtt)
+}
+
+/// `Rd32=cmpyrw(Rss32,Rtt32*):<<1:sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT3
+/// Requires: V67, Audio
+#[inline(always)]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "v67,audio"))]
+#[cfg_attr(test, assert_instr(cmpyrw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_cmpyrw_PP_conj_s1_sat(rss: i64, rtt: i64) -> i32 {
+    hexagon_M7_wcmpyrwc(rss, rtt)
+}
+
+/// `Rd32=cmpyrw(Rss32,Rtt32*):<<1:rnd:sat`
+///
+/// Instruction Type: M
+/// Execution Slots: SLOT3
+/// Requires: V67, Audio
+#[inline(always)]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "v67,audio"))]
+#[cfg_attr(test, assert_instr(cmpyrw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_cmpyrw_PP_conj_s1_rnd_sat(rss: i64, rtt: i64) -> i32 {
+    hexagon_M7_wcmpyrwc_rnd(rss, rtt)
+}
+
+/// `dmlink(Rs32,Rt32)`
+///
+/// Instruction Type: ST
+/// Execution Slots: SLOT0
+/// Requires: V68
+#[inline(always)]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "v68"))]
+#[cfg_attr(test, assert_instr(dmlink))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_dmlink_AA(rs: i32, rt: i32) {
+    hexagon_Y6_dmlink(rs, rt)
+}
+
+/// `Rd32=dmpause`
+///
+/// Instruction Type: ST
+/// Execution Slots: SLOT0
+/// Requires: V68
+#[inline(always)]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "v68"))]
+#[cfg_attr(test, assert_instr(dmpause))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_dmpause() -> i32 {
+    hexagon_Y6_dmpause()
+}
+
+/// `Rd32=dmpoll`
+///
+/// Instruction Type: ST
+/// Execution Slots: SLOT0
+/// Requires: V68
+#[inline(always)]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "v68"))]
+#[cfg_attr(test, assert_instr(dmpoll))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_dmpoll() -> i32 {
+    hexagon_Y6_dmpoll()
+}
+
+/// `dmresume(Rs32)`
+///
+/// Instruction Type: ST
+/// Execution Slots: SLOT0
+/// Requires: V68
+#[inline(always)]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "v68"))]
+#[cfg_attr(test, assert_instr(dmresume))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_dmresume_A(rs: i32) {
+    hexagon_Y6_dmresume(rs)
+}
+
+/// `dmstart(Rs32)`
+///
+/// Instruction Type: ST
+/// Execution Slots: SLOT0
+/// Requires: V68
+#[inline(always)]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "v68"))]
+#[cfg_attr(test, assert_instr(dmstart))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_dmstart_A(rs: i32) {
+    hexagon_Y6_dmstart(rs)
+}
+
+/// `Rd32=dmwait`
+///
+/// Instruction Type: ST
+/// Execution Slots: SLOT0
+/// Requires: V68
+#[inline(always)]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "v68"))]
+#[cfg_attr(test, assert_instr(dmwait))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_dmwait() -> i32 {
+    hexagon_Y6_dmwait()
+}
diff --git a/crates/core_arch/src/hexagon/v128.rs b/crates/core_arch/src/hexagon/v128.rs
new file mode 100644
index 0000000000..1f0566af78
--- /dev/null
+++ b/crates/core_arch/src/hexagon/v128.rs
@@ -0,0 +1,7502 @@
+//! Hexagon HVX 128-byte vector mode intrinsics
+//!
+//! This module provides intrinsics for the Hexagon Vector Extensions (HVX)
+//! in 128-byte vector mode (1024-bit vectors).
+//!
+//! HVX is a wide vector extension designed for high-performance signal processing.
+//! [Hexagon HVX Programmer's Reference Manual](https://docs.qualcomm.com/doc/80-N2040-61)
+//!
+//! ## Vector Types
+//!
+//! In 128-byte mode:
+//! - `HvxVector` is 1024 bits (128 bytes) containing 32 x 32-bit values
+//! - `HvxVectorPair` is 2048 bits (256 bytes)
+//! - `HvxVectorPred` is 1024 bits (128 bytes) for predicate operations
+//!
+//! To use this module, compile with `-C target-feature=+hvx-length128b`.
+//!
+//! ## Naming Convention
+//!
+//! Function names preserve the original Q6 naming case because the convention
+//! uses case to distinguish register types:
+//! - `W` (uppercase) = vector pair (`HvxVectorPair`)
+//! - `V` (uppercase) = vector (`HvxVector`)
+//! - `Q` (uppercase) = predicate (`HvxVectorPred`)
+//! - `R` = scalar register (`i32`)
+//!
+//! For example, `Q6_W_vcombine_VV` operates on a vector pair while
+//! `Q6_V_hi_W` extracts a vector from a pair.
+//!
+//! ## Architecture Versions
+//!
+//! Different intrinsics require different HVX architecture versions. Use the
+//! appropriate target feature to enable the required version:
+//! - HVX v60: `-C target-feature=+hvxv60` (basic HVX operations)
+//! - HVX v62: `-C target-feature=+hvxv62`
+//! - HVX v65: `-C target-feature=+hvxv65` (includes floating-point support)
+//! - HVX v66: `-C target-feature=+hvxv66`
+//! - HVX v68: `-C target-feature=+hvxv68`
+//! - HVX v69: `-C target-feature=+hvxv69`
+//! - HVX v73: `-C target-feature=+hvxv73`
+//! - HVX v79: `-C target-feature=+hvxv79`
+//!
+//! Each version includes all features from previous versions.
+
+#![allow(non_camel_case_types)]
+#![allow(non_snake_case)]
+
+#[cfg(test)]
+use stdarch_test::assert_instr;
+
+use crate::intrinsics::simd::{simd_add, simd_and, simd_or, simd_sub, simd_xor};
+
+// HVX type definitions for 128-byte vector mode
+types! {
+    #![unstable(feature = "stdarch_hexagon", issue = "151523")]
+
+    /// HVX vector type (1024 bits / 128 bytes)
+    ///
+    /// This type represents a single HVX vector register containing 32 x 32-bit values.
+    pub struct HvxVector(32 x i32);
+
+    /// HVX vector pair type (2048 bits / 256 bytes)
+    ///
+    /// This type represents a pair of HVX vector registers, often used for
+    /// operations that produce double-width results.
+    pub struct HvxVectorPair(64 x i32);
+
+    /// HVX vector predicate type (1024 bits / 128 bytes)
+    ///
+    /// This type represents a predicate vector used for conditional operations.
+    /// Each bit corresponds to a lane in the vector.
+    pub struct HvxVectorPred(32 x i32);
+}
+
+// LLVM intrinsic declarations for 128-byte vector mode
+#[allow(improper_ctypes)]
+unsafe extern "unadjusted" {
+    #[link_name = "llvm.hexagon.V6.extractw.128B"]
+    fn extractw(_: HvxVector, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.V6.get.qfext.128B"]
+    fn get_qfext(_: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.hi.128B"]
+    fn hi(_: HvxVectorPair) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.lo.128B"]
+    fn lo(_: HvxVectorPair) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.lvsplatb.128B"]
+    fn lvsplatb(_: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.lvsplath.128B"]
+    fn lvsplath(_: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.lvsplatw.128B"]
+    fn lvsplatw(_: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.pred.and.128B"]
+    fn pred_and(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.pred.and.n.128B"]
+    fn pred_and_n(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.pred.not.128B"]
+    fn pred_not(_: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.pred.or.128B"]
+    fn pred_or(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.pred.or.n.128B"]
+    fn pred_or_n(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.pred.scalar2.128B"]
+    fn pred_scalar2(_: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.pred.scalar2v2.128B"]
+    fn pred_scalar2v2(_: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.pred.xor.128B"]
+    fn pred_xor(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.set.qfext.128B"]
+    fn set_qfext(_: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.shuffeqh.128B"]
+    fn shuffeqh(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.shuffeqw.128B"]
+    fn shuffeqw(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.v6mpyhubs10.128B"]
+    fn v6mpyhubs10(_: HvxVectorPair, _: HvxVectorPair, _: i32) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.v6mpyhubs10.vxx.128B"]
+    fn v6mpyhubs10_vxx(
+        _: HvxVectorPair,
+        _: HvxVectorPair,
+        _: HvxVectorPair,
+        _: i32,
+    ) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.v6mpyvubs10.128B"]
+    fn v6mpyvubs10(_: HvxVectorPair, _: HvxVectorPair, _: i32) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.v6mpyvubs10.vxx.128B"]
+    fn v6mpyvubs10_vxx(
+        _: HvxVectorPair,
+        _: HvxVectorPair,
+        _: HvxVectorPair,
+        _: i32,
+    ) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vS32b.nqpred.ai.128B"]
+    fn vS32b_nqpred_ai(_: HvxVector, _: *mut HvxVector, _: HvxVector) -> ();
+    #[link_name = "llvm.hexagon.V6.vS32b.nt.nqpred.ai.128B"]
+    fn vS32b_nt_nqpred_ai(_: HvxVector, _: *mut HvxVector, _: HvxVector) -> ();
+    #[link_name = "llvm.hexagon.V6.vS32b.nt.qpred.ai.128B"]
+    fn vS32b_nt_qpred_ai(_: HvxVector, _: *mut HvxVector, _: HvxVector) -> ();
+    #[link_name = "llvm.hexagon.V6.vS32b.qpred.ai.128B"]
+    fn vS32b_qpred_ai(_: HvxVector, _: *mut HvxVector, _: HvxVector) -> ();
+    #[link_name = "llvm.hexagon.V6.vabs.f8.128B"]
+    fn vabs_f8(_: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vabs.hf.128B"]
+    fn vabs_hf(_: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vabs.sf.128B"]
+    fn vabs_sf(_: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vabsb.128B"]
+    fn vabsb(_: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vabsb.sat.128B"]
+    fn vabsb_sat(_: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vabsdiffh.128B"]
+    fn vabsdiffh(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vabsdiffub.128B"]
+    fn vabsdiffub(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vabsdiffuh.128B"]
+    fn vabsdiffuh(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vabsdiffw.128B"]
+    fn vabsdiffw(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vabsh.128B"]
+    fn vabsh(_: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vabsh.sat.128B"]
+    fn vabsh_sat(_: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vabsw.128B"]
+    fn vabsw(_: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vabsw.sat.128B"]
+    fn vabsw_sat(_: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vadd.hf.128B"]
+    fn vadd_hf(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vadd.hf.hf.128B"]
+    fn vadd_hf_hf(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vadd.qf16.128B"]
+    fn vadd_qf16(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vadd.qf16.mix.128B"]
+    fn vadd_qf16_mix(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vadd.qf32.128B"]
+    fn vadd_qf32(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vadd.qf32.mix.128B"]
+    fn vadd_qf32_mix(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vadd.sf.128B"]
+    fn vadd_sf(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vadd.sf.hf.128B"]
+    fn vadd_sf_hf(_: HvxVector, _: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vadd.sf.sf.128B"]
+    fn vadd_sf_sf(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vaddb.128B"]
+    fn vaddb(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vaddb.dv.128B"]
+    fn vaddb_dv(_: HvxVectorPair, _: HvxVectorPair) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vaddbnq.128B"]
+    fn vaddbnq(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vaddbq.128B"]
+    fn vaddbq(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vaddbsat.128B"]
+    fn vaddbsat(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vaddbsat.dv.128B"]
+    fn vaddbsat_dv(_: HvxVectorPair, _: HvxVectorPair) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vaddcarrysat.128B"]
+    fn vaddcarrysat(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vaddclbh.128B"]
+    fn vaddclbh(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vaddclbw.128B"]
+    fn vaddclbw(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vaddh.128B"]
+    fn vaddh(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vaddh.dv.128B"]
+    fn vaddh_dv(_: HvxVectorPair, _: HvxVectorPair) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vaddhnq.128B"]
+    fn vaddhnq(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vaddhq.128B"]
+    fn vaddhq(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vaddhsat.128B"]
+    fn vaddhsat(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vaddhsat.dv.128B"]
+    fn vaddhsat_dv(_: HvxVectorPair, _: HvxVectorPair) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vaddhw.128B"]
+    fn vaddhw(_: HvxVector, _: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vaddhw.acc.128B"]
+    fn vaddhw_acc(_: HvxVectorPair, _: HvxVector, _: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vaddubh.128B"]
+    fn vaddubh(_: HvxVector, _: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vaddubh.acc.128B"]
+    fn vaddubh_acc(_: HvxVectorPair, _: HvxVector, _: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vaddubsat.128B"]
+    fn vaddubsat(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vaddubsat.dv.128B"]
+    fn vaddubsat_dv(_: HvxVectorPair, _: HvxVectorPair) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vaddububb.sat.128B"]
+    fn vaddububb_sat(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vadduhsat.128B"]
+    fn vadduhsat(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vadduhsat.dv.128B"]
+    fn vadduhsat_dv(_: HvxVectorPair, _: HvxVectorPair) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vadduhw.128B"]
+    fn vadduhw(_: HvxVector, _: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vadduhw.acc.128B"]
+    fn vadduhw_acc(_: HvxVectorPair, _: HvxVector, _: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vadduwsat.128B"]
+    fn vadduwsat(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vadduwsat.dv.128B"]
+    fn vadduwsat_dv(_: HvxVectorPair, _: HvxVectorPair) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vaddw.128B"]
+    fn vaddw(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vaddw.dv.128B"]
+    fn vaddw_dv(_: HvxVectorPair, _: HvxVectorPair) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vaddwnq.128B"]
+    fn vaddwnq(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vaddwq.128B"]
+    fn vaddwq(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vaddwsat.128B"]
+    fn vaddwsat(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vaddwsat.dv.128B"]
+    fn vaddwsat_dv(_: HvxVectorPair, _: HvxVectorPair) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.valignb.128B"]
+    fn valignb(_: HvxVector, _: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.valignbi.128B"]
+    fn valignbi(_: HvxVector, _: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vand.128B"]
+    fn vand(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vandnqrt.128B"]
+    fn vandnqrt(_: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vandnqrt.acc.128B"]
+    fn vandnqrt_acc(_: HvxVector, _: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vandqrt.128B"]
+    fn vandqrt(_: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vandqrt.acc.128B"]
+    fn vandqrt_acc(_: HvxVector, _: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vandvnqv.128B"]
+    fn vandvnqv(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vandvqv.128B"]
+    fn vandvqv(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vandvrt.128B"]
+    fn vandvrt(_: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vandvrt.acc.128B"]
+    fn vandvrt_acc(_: HvxVector, _: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vaslh.128B"]
+    fn vaslh(_: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vaslh.acc.128B"]
+    fn vaslh_acc(_: HvxVector, _: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vaslhv.128B"]
+    fn vaslhv(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vaslw.128B"]
+    fn vaslw(_: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vaslw.acc.128B"]
+    fn vaslw_acc(_: HvxVector, _: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vaslwv.128B"]
+    fn vaslwv(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vasr.into.128B"]
+    fn vasr_into(_: HvxVectorPair, _: HvxVector, _: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vasrh.128B"]
+    fn vasrh(_: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vasrh.acc.128B"]
+    fn vasrh_acc(_: HvxVector, _: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vasrhbrndsat.128B"]
+    fn vasrhbrndsat(_: HvxVector, _: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vasrhbsat.128B"]
+    fn vasrhbsat(_: HvxVector, _: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vasrhubrndsat.128B"]
+    fn vasrhubrndsat(_: HvxVector, _: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vasrhubsat.128B"]
+    fn vasrhubsat(_: HvxVector, _: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vasrhv.128B"]
+    fn vasrhv(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vasruhubrndsat.128B"]
+    fn vasruhubrndsat(_: HvxVector, _: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vasruhubsat.128B"]
+    fn vasruhubsat(_: HvxVector, _: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vasruwuhrndsat.128B"]
+    fn vasruwuhrndsat(_: HvxVector, _: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vasruwuhsat.128B"]
+    fn vasruwuhsat(_: HvxVector, _: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vasrvuhubrndsat.128B"]
+    fn vasrvuhubrndsat(_: HvxVectorPair, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vasrvuhubsat.128B"]
+    fn vasrvuhubsat(_: HvxVectorPair, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vasrvwuhrndsat.128B"]
+    fn vasrvwuhrndsat(_: HvxVectorPair, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vasrvwuhsat.128B"]
+    fn vasrvwuhsat(_: HvxVectorPair, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vasrw.128B"]
+    fn vasrw(_: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vasrw.acc.128B"]
+    fn vasrw_acc(_: HvxVector, _: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vasrwh.128B"]
+    fn vasrwh(_: HvxVector, _: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vasrwhrndsat.128B"]
+    fn vasrwhrndsat(_: HvxVector, _: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vasrwhsat.128B"]
+    fn vasrwhsat(_: HvxVector, _: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vasrwuhrndsat.128B"]
+    fn vasrwuhrndsat(_: HvxVector, _: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vasrwuhsat.128B"]
+    fn vasrwuhsat(_: HvxVector, _: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vasrwv.128B"]
+    fn vasrwv(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vassign.128B"]
+    fn vassign(_: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vassign.fp.128B"]
+    fn vassign_fp(_: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vassignp.128B"]
+    fn vassignp(_: HvxVectorPair) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vavgb.128B"]
+    fn vavgb(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vavgbrnd.128B"]
+    fn vavgbrnd(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vavgh.128B"]
+    fn vavgh(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vavghrnd.128B"]
+    fn vavghrnd(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vavgub.128B"]
+    fn vavgub(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vavgubrnd.128B"]
+    fn vavgubrnd(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vavguh.128B"]
+    fn vavguh(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vavguhrnd.128B"]
+    fn vavguhrnd(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vavguw.128B"]
+    fn vavguw(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vavguwrnd.128B"]
+    fn vavguwrnd(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vavgw.128B"]
+    fn vavgw(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vavgwrnd.128B"]
+    fn vavgwrnd(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vcl0h.128B"]
+    fn vcl0h(_: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vcl0w.128B"]
+    fn vcl0w(_: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vcombine.128B"]
+    fn vcombine(_: HvxVector, _: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vconv.h.hf.128B"]
+    fn vconv_h_hf(_: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vconv.hf.h.128B"]
+    fn vconv_hf_h(_: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vconv.hf.qf16.128B"]
+    fn vconv_hf_qf16(_: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vconv.hf.qf32.128B"]
+    fn vconv_hf_qf32(_: HvxVectorPair) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vconv.sf.qf32.128B"]
+    fn vconv_sf_qf32(_: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vconv.sf.w.128B"]
+    fn vconv_sf_w(_: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vconv.w.sf.128B"]
+    fn vconv_w_sf(_: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vcvt2.hf.b.128B"]
+    fn vcvt2_hf_b(_: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vcvt2.hf.ub.128B"]
+    fn vcvt2_hf_ub(_: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vcvt.b.hf.128B"]
+    fn vcvt_b_hf(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vcvt.h.hf.128B"]
+    fn vcvt_h_hf(_: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vcvt.hf.b.128B"]
+    fn vcvt_hf_b(_: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vcvt.hf.f8.128B"]
+    fn vcvt_hf_f8(_: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vcvt.hf.h.128B"]
+    fn vcvt_hf_h(_: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vcvt.hf.sf.128B"]
+    fn vcvt_hf_sf(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vcvt.hf.ub.128B"]
+    fn vcvt_hf_ub(_: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vcvt.hf.uh.128B"]
+    fn vcvt_hf_uh(_: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vcvt.sf.hf.128B"]
+    fn vcvt_sf_hf(_: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vcvt.ub.hf.128B"]
+    fn vcvt_ub_hf(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vcvt.uh.hf.128B"]
+    fn vcvt_uh_hf(_: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vd0.128B"]
+    fn vd0() -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vdd0.128B"]
+    fn vdd0() -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vdealb.128B"]
+    fn vdealb(_: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vdealb4w.128B"]
+    fn vdealb4w(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vdealh.128B"]
+    fn vdealh(_: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vdealvdd.128B"]
+    fn vdealvdd(_: HvxVector, _: HvxVector, _: i32) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vdelta.128B"]
+    fn vdelta(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vdmpy.sf.hf.128B"]
+    fn vdmpy_sf_hf(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vdmpy.sf.hf.acc.128B"]
+    fn vdmpy_sf_hf_acc(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vdmpybus.128B"]
+    fn vdmpybus(_: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vdmpybus.acc.128B"]
+    fn vdmpybus_acc(_: HvxVector, _: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vdmpybus.dv.128B"]
+    fn vdmpybus_dv(_: HvxVectorPair, _: i32) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vdmpybus.dv.acc.128B"]
+    fn vdmpybus_dv_acc(_: HvxVectorPair, _: HvxVectorPair, _: i32) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vdmpyhb.128B"]
+    fn vdmpyhb(_: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vdmpyhb.acc.128B"]
+    fn vdmpyhb_acc(_: HvxVector, _: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vdmpyhb.dv.128B"]
+    fn vdmpyhb_dv(_: HvxVectorPair, _: i32) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vdmpyhb.dv.acc.128B"]
+    fn vdmpyhb_dv_acc(_: HvxVectorPair, _: HvxVectorPair, _: i32) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vdmpyhisat.128B"]
+    fn vdmpyhisat(_: HvxVectorPair, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vdmpyhisat.acc.128B"]
+    fn vdmpyhisat_acc(_: HvxVector, _: HvxVectorPair, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vdmpyhsat.128B"]
+    fn vdmpyhsat(_: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vdmpyhsat.acc.128B"]
+    fn vdmpyhsat_acc(_: HvxVector, _: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vdmpyhsuisat.128B"]
+    fn vdmpyhsuisat(_: HvxVectorPair, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vdmpyhsuisat.acc.128B"]
+    fn vdmpyhsuisat_acc(_: HvxVector, _: HvxVectorPair, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vdmpyhsusat.128B"]
+    fn vdmpyhsusat(_: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vdmpyhsusat.acc.128B"]
+    fn vdmpyhsusat_acc(_: HvxVector, _: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vdmpyhvsat.128B"]
+    fn vdmpyhvsat(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vdmpyhvsat.acc.128B"]
+    fn vdmpyhvsat_acc(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vdsaduh.128B"]
+    fn vdsaduh(_: HvxVectorPair, _: i32) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vdsaduh.acc.128B"]
+    fn vdsaduh_acc(_: HvxVectorPair, _: HvxVectorPair, _: i32) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.veqb.128B"]
+    fn veqb(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.veqb.and.128B"]
+    fn veqb_and(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.veqb.or.128B"]
+    fn veqb_or(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.veqb.xor.128B"]
+    fn veqb_xor(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.veqh.128B"]
+    fn veqh(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.veqh.and.128B"]
+    fn veqh_and(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.veqh.or.128B"]
+    fn veqh_or(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.veqh.xor.128B"]
+    fn veqh_xor(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.veqw.128B"]
+    fn veqw(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.veqw.and.128B"]
+    fn veqw_and(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.veqw.or.128B"]
+    fn veqw_or(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.veqw.xor.128B"]
+    fn veqw_xor(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vfmax.f8.128B"]
+    fn vfmax_f8(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vfmax.hf.128B"]
+    fn vfmax_hf(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vfmax.sf.128B"]
+    fn vfmax_sf(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vfmin.f8.128B"]
+    fn vfmin_f8(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vfmin.hf.128B"]
+    fn vfmin_hf(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vfmin.sf.128B"]
+    fn vfmin_sf(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vfneg.f8.128B"]
+    fn vfneg_f8(_: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vfneg.hf.128B"]
+    fn vfneg_hf(_: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vfneg.sf.128B"]
+    fn vfneg_sf(_: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vgathermh.128B"]
+    fn vgathermh(_: *mut HvxVector, _: i32, _: i32, _: HvxVector) -> ();
+    #[link_name = "llvm.hexagon.V6.vgathermhq.128B"]
+    fn vgathermhq(_: *mut HvxVector, _: HvxVector, _: i32, _: i32, _: HvxVector) -> ();
+    #[link_name = "llvm.hexagon.V6.vgathermhw.128B"]
+    fn vgathermhw(_: *mut HvxVector, _: i32, _: i32, _: HvxVectorPair) -> ();
+    #[link_name = "llvm.hexagon.V6.vgathermhwq.128B"]
+    fn vgathermhwq(_: *mut HvxVector, _: HvxVector, _: i32, _: i32, _: HvxVectorPair) -> ();
+    #[link_name = "llvm.hexagon.V6.vgathermw.128B"]
+    fn vgathermw(_: *mut HvxVector, _: i32, _: i32, _: HvxVector) -> ();
+    #[link_name = "llvm.hexagon.V6.vgathermwq.128B"]
+    fn vgathermwq(_: *mut HvxVector, _: HvxVector, _: i32, _: i32, _: HvxVector) -> ();
+    #[link_name = "llvm.hexagon.V6.vgtb.128B"]
+    fn vgtb(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vgtb.and.128B"]
+    fn vgtb_and(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vgtb.or.128B"]
+    fn vgtb_or(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vgtb.xor.128B"]
+    fn vgtb_xor(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vgth.128B"]
+    fn vgth(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vgth.and.128B"]
+    fn vgth_and(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vgth.or.128B"]
+    fn vgth_or(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vgth.xor.128B"]
+    fn vgth_xor(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vgthf.128B"]
+    fn vgthf(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vgthf.and.128B"]
+    fn vgthf_and(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vgthf.or.128B"]
+    fn vgthf_or(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vgthf.xor.128B"]
+    fn vgthf_xor(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vgtsf.128B"]
+    fn vgtsf(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vgtsf.and.128B"]
+    fn vgtsf_and(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vgtsf.or.128B"]
+    fn vgtsf_or(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vgtsf.xor.128B"]
+    fn vgtsf_xor(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vgtub.128B"]
+    fn vgtub(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vgtub.and.128B"]
+    fn vgtub_and(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vgtub.or.128B"]
+    fn vgtub_or(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vgtub.xor.128B"]
+    fn vgtub_xor(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vgtuh.128B"]
+    fn vgtuh(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vgtuh.and.128B"]
+    fn vgtuh_and(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vgtuh.or.128B"]
+    fn vgtuh_or(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vgtuh.xor.128B"]
+    fn vgtuh_xor(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vgtuw.128B"]
+    fn vgtuw(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vgtuw.and.128B"]
+    fn vgtuw_and(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vgtuw.or.128B"]
+    fn vgtuw_or(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vgtuw.xor.128B"]
+    fn vgtuw_xor(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vgtw.128B"]
+    fn vgtw(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vgtw.and.128B"]
+    fn vgtw_and(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vgtw.or.128B"]
+    fn vgtw_or(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vgtw.xor.128B"]
+    fn vgtw_xor(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vinsertwr.128B"]
+    fn vinsertwr(_: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vlalignb.128B"]
+    fn vlalignb(_: HvxVector, _: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vlalignbi.128B"]
+    fn vlalignbi(_: HvxVector, _: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vlsrb.128B"]
+    fn vlsrb(_: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vlsrh.128B"]
+    fn vlsrh(_: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vlsrhv.128B"]
+    fn vlsrhv(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vlsrw.128B"]
+    fn vlsrw(_: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vlsrwv.128B"]
+    fn vlsrwv(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vlutvvb.128B"]
+    fn vlutvvb(_: HvxVector, _: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vlutvvb.nm.128B"]
+    fn vlutvvb_nm(_: HvxVector, _: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vlutvvb.oracc.128B"]
+    fn vlutvvb_oracc(_: HvxVector, _: HvxVector, _: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vlutvvb.oracci.128B"]
+    fn vlutvvb_oracci(_: HvxVector, _: HvxVector, _: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vlutvvbi.128B"]
+    fn vlutvvbi(_: HvxVector, _: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vlutvwh.128B"]
+    fn vlutvwh(_: HvxVector, _: HvxVector, _: i32) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vlutvwh.nm.128B"]
+    fn vlutvwh_nm(_: HvxVector, _: HvxVector, _: i32) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vlutvwh.oracc.128B"]
+    fn vlutvwh_oracc(_: HvxVectorPair, _: HvxVector, _: HvxVector, _: i32) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vlutvwh.oracci.128B"]
+    fn vlutvwh_oracci(_: HvxVectorPair, _: HvxVector, _: HvxVector, _: i32) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vlutvwhi.128B"]
+    fn vlutvwhi(_: HvxVector, _: HvxVector, _: i32) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vmax.hf.128B"]
+    fn vmax_hf(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vmax.sf.128B"]
+    fn vmax_sf(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vmaxb.128B"]
+    fn vmaxb(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vmaxh.128B"]
+    fn vmaxh(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vmaxub.128B"]
+    fn vmaxub(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vmaxuh.128B"]
+    fn vmaxuh(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vmaxw.128B"]
+    fn vmaxw(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vmin.hf.128B"]
+    fn vmin_hf(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vmin.sf.128B"]
+    fn vmin_sf(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vminb.128B"]
+    fn vminb(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vminh.128B"]
+    fn vminh(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vminub.128B"]
+    fn vminub(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vminuh.128B"]
+    fn vminuh(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vminw.128B"]
+    fn vminw(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vmpabus.128B"]
+    fn vmpabus(_: HvxVectorPair, _: i32) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vmpabus.acc.128B"]
+    fn vmpabus_acc(_: HvxVectorPair, _: HvxVectorPair, _: i32) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vmpabusv.128B"]
+    fn vmpabusv(_: HvxVectorPair, _: HvxVectorPair) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vmpabuu.128B"]
+    fn vmpabuu(_: HvxVectorPair, _: i32) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vmpabuu.acc.128B"]
+    fn vmpabuu_acc(_: HvxVectorPair, _: HvxVectorPair, _: i32) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vmpabuuv.128B"]
+    fn vmpabuuv(_: HvxVectorPair, _: HvxVectorPair) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vmpahb.128B"]
+    fn vmpahb(_: HvxVectorPair, _: i32) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vmpahb.acc.128B"]
+    fn vmpahb_acc(_: HvxVectorPair, _: HvxVectorPair, _: i32) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vmpauhb.128B"]
+    fn vmpauhb(_: HvxVectorPair, _: i32) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vmpauhb.acc.128B"]
+    fn vmpauhb_acc(_: HvxVectorPair, _: HvxVectorPair, _: i32) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vmpy.hf.hf.128B"]
+    fn vmpy_hf_hf(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vmpy.hf.hf.acc.128B"]
+    fn vmpy_hf_hf_acc(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vmpy.qf16.128B"]
+    fn vmpy_qf16(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vmpy.qf16.hf.128B"]
+    fn vmpy_qf16_hf(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vmpy.qf16.mix.hf.128B"]
+    fn vmpy_qf16_mix_hf(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vmpy.qf32.128B"]
+    fn vmpy_qf32(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vmpy.qf32.hf.128B"]
+    fn vmpy_qf32_hf(_: HvxVector, _: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vmpy.qf32.mix.hf.128B"]
+    fn vmpy_qf32_mix_hf(_: HvxVector, _: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vmpy.qf32.qf16.128B"]
+    fn vmpy_qf32_qf16(_: HvxVector, _: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vmpy.qf32.sf.128B"]
+    fn vmpy_qf32_sf(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vmpy.sf.hf.128B"]
+    fn vmpy_sf_hf(_: HvxVector, _: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vmpy.sf.hf.acc.128B"]
+    fn vmpy_sf_hf_acc(_: HvxVectorPair, _: HvxVector, _: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vmpy.sf.sf.128B"]
+    fn vmpy_sf_sf(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vmpybus.128B"]
+    fn vmpybus(_: HvxVector, _: i32) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vmpybus.acc.128B"]
+    fn vmpybus_acc(_: HvxVectorPair, _: HvxVector, _: i32) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vmpybusv.128B"]
+    fn vmpybusv(_: HvxVector, _: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vmpybusv.acc.128B"]
+    fn vmpybusv_acc(_: HvxVectorPair, _: HvxVector, _: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vmpybv.128B"]
+    fn vmpybv(_: HvxVector, _: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vmpybv.acc.128B"]
+    fn vmpybv_acc(_: HvxVectorPair, _: HvxVector, _: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vmpyewuh.128B"]
+    fn vmpyewuh(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vmpyewuh.64.128B"]
+    fn vmpyewuh_64(_: HvxVector, _: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vmpyh.128B"]
+    fn vmpyh(_: HvxVector, _: i32) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vmpyh.acc.128B"]
+    fn vmpyh_acc(_: HvxVectorPair, _: HvxVector, _: i32) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vmpyhsat.acc.128B"]
+    fn vmpyhsat_acc(_: HvxVectorPair, _: HvxVector, _: i32) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vmpyhsrs.128B"]
+    fn vmpyhsrs(_: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vmpyhss.128B"]
+    fn vmpyhss(_: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vmpyhus.128B"]
+    fn vmpyhus(_: HvxVector, _: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vmpyhus.acc.128B"]
+    fn vmpyhus_acc(_: HvxVectorPair, _: HvxVector, _: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vmpyhv.128B"]
+    fn vmpyhv(_: HvxVector, _: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vmpyhv.acc.128B"]
+    fn vmpyhv_acc(_: HvxVectorPair, _: HvxVector, _: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vmpyhvsrs.128B"]
+    fn vmpyhvsrs(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vmpyieoh.128B"]
+    fn vmpyieoh(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vmpyiewh.acc.128B"]
+    fn vmpyiewh_acc(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vmpyiewuh.128B"]
+    fn vmpyiewuh(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vmpyiewuh.acc.128B"]
+    fn vmpyiewuh_acc(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vmpyih.128B"]
+    fn vmpyih(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vmpyih.acc.128B"]
+    fn vmpyih_acc(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vmpyihb.128B"]
+    fn vmpyihb(_: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vmpyihb.acc.128B"]
+    fn vmpyihb_acc(_: HvxVector, _: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vmpyiowh.128B"]
+    fn vmpyiowh(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vmpyiwb.128B"]
+    fn vmpyiwb(_: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vmpyiwb.acc.128B"]
+    fn vmpyiwb_acc(_: HvxVector, _: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vmpyiwh.128B"]
+    fn vmpyiwh(_: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vmpyiwh.acc.128B"]
+    fn vmpyiwh_acc(_: HvxVector, _: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vmpyiwub.128B"]
+    fn vmpyiwub(_: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vmpyiwub.acc.128B"]
+    fn vmpyiwub_acc(_: HvxVector, _: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vmpyowh.128B"]
+    fn vmpyowh(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vmpyowh.64.acc.128B"]
+    fn vmpyowh_64_acc(_: HvxVectorPair, _: HvxVector, _: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vmpyowh.rnd.128B"]
+    fn vmpyowh_rnd(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vmpyowh.rnd.sacc.128B"]
+    fn vmpyowh_rnd_sacc(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vmpyowh.sacc.128B"]
+    fn vmpyowh_sacc(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vmpyub.128B"]
+    fn vmpyub(_: HvxVector, _: i32) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vmpyub.acc.128B"]
+    fn vmpyub_acc(_: HvxVectorPair, _: HvxVector, _: i32) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vmpyubv.128B"]
+    fn vmpyubv(_: HvxVector, _: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vmpyubv.acc.128B"]
+    fn vmpyubv_acc(_: HvxVectorPair, _: HvxVector, _: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vmpyuh.128B"]
+    fn vmpyuh(_: HvxVector, _: i32) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vmpyuh.acc.128B"]
+    fn vmpyuh_acc(_: HvxVectorPair, _: HvxVector, _: i32) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vmpyuhe.128B"]
+    fn vmpyuhe(_: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vmpyuhe.acc.128B"]
+    fn vmpyuhe_acc(_: HvxVector, _: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vmpyuhv.128B"]
+    fn vmpyuhv(_: HvxVector, _: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vmpyuhv.acc.128B"]
+    fn vmpyuhv_acc(_: HvxVectorPair, _: HvxVector, _: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vmpyuhvs.128B"]
+    fn vmpyuhvs(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vmux.128B"]
+    fn vmux(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vnavgb.128B"]
+    fn vnavgb(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vnavgh.128B"]
+    fn vnavgh(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vnavgub.128B"]
+    fn vnavgub(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vnavgw.128B"]
+    fn vnavgw(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vnormamth.128B"]
+    fn vnormamth(_: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vnormamtw.128B"]
+    fn vnormamtw(_: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vnot.128B"]
+    fn vnot(_: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vor.128B"]
+    fn vor(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vpackeb.128B"]
+    fn vpackeb(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vpackeh.128B"]
+    fn vpackeh(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vpackhb.sat.128B"]
+    fn vpackhb_sat(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vpackhub.sat.128B"]
+    fn vpackhub_sat(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vpackob.128B"]
+    fn vpackob(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vpackoh.128B"]
+    fn vpackoh(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vpackwh.sat.128B"]
+    fn vpackwh_sat(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vpackwuh.sat.128B"]
+    fn vpackwuh_sat(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vpopcounth.128B"]
+    fn vpopcounth(_: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vprefixqb.128B"]
+    fn vprefixqb(_: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vprefixqh.128B"]
+    fn vprefixqh(_: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vprefixqw.128B"]
+    fn vprefixqw(_: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vrdelta.128B"]
+    fn vrdelta(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vrmpybus.128B"]
+    fn vrmpybus(_: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vrmpybus.acc.128B"]
+    fn vrmpybus_acc(_: HvxVector, _: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vrmpybusi.128B"]
+    fn vrmpybusi(_: HvxVectorPair, _: i32, _: i32) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vrmpybusi.acc.128B"]
+    fn vrmpybusi_acc(_: HvxVectorPair, _: HvxVectorPair, _: i32, _: i32) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vrmpybusv.128B"]
+    fn vrmpybusv(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vrmpybusv.acc.128B"]
+    fn vrmpybusv_acc(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vrmpybv.128B"]
+    fn vrmpybv(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vrmpybv.acc.128B"]
+    fn vrmpybv_acc(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vrmpyub.128B"]
+    fn vrmpyub(_: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vrmpyub.acc.128B"]
+    fn vrmpyub_acc(_: HvxVector, _: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vrmpyubi.128B"]
+    fn vrmpyubi(_: HvxVectorPair, _: i32, _: i32) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vrmpyubi.acc.128B"]
+    fn vrmpyubi_acc(_: HvxVectorPair, _: HvxVectorPair, _: i32, _: i32) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vrmpyubv.128B"]
+    fn vrmpyubv(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vrmpyubv.acc.128B"]
+    fn vrmpyubv_acc(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vror.128B"]
+    fn vror(_: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vrotr.128B"]
+    fn vrotr(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vroundhb.128B"]
+    fn vroundhb(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vroundhub.128B"]
+    fn vroundhub(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vrounduhub.128B"]
+    fn vrounduhub(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vrounduwuh.128B"]
+    fn vrounduwuh(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vroundwh.128B"]
+    fn vroundwh(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vroundwuh.128B"]
+    fn vroundwuh(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vrsadubi.128B"]
+    fn vrsadubi(_: HvxVectorPair, _: i32, _: i32) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vrsadubi.acc.128B"]
+    fn vrsadubi_acc(_: HvxVectorPair, _: HvxVectorPair, _: i32, _: i32) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vsatdw.128B"]
+    fn vsatdw(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vsathub.128B"]
+    fn vsathub(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vsatuwuh.128B"]
+    fn vsatuwuh(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vsatwh.128B"]
+    fn vsatwh(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vsb.128B"]
+    fn vsb(_: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vscattermh.128B"]
+    fn vscattermh(_: i32, _: i32, _: HvxVector, _: HvxVector) -> ();
+    #[link_name = "llvm.hexagon.V6.vscattermh.add.128B"]
+    fn vscattermh_add(_: i32, _: i32, _: HvxVector, _: HvxVector) -> ();
+    #[link_name = "llvm.hexagon.V6.vscattermhq.128B"]
+    fn vscattermhq(_: HvxVector, _: i32, _: i32, _: HvxVector, _: HvxVector) -> ();
+    #[link_name = "llvm.hexagon.V6.vscattermhw.128B"]
+    fn vscattermhw(_: i32, _: i32, _: HvxVectorPair, _: HvxVector) -> ();
+    #[link_name = "llvm.hexagon.V6.vscattermhw.add.128B"]
+    fn vscattermhw_add(_: i32, _: i32, _: HvxVectorPair, _: HvxVector) -> ();
+    #[link_name = "llvm.hexagon.V6.vscattermhwq.128B"]
+    fn vscattermhwq(_: HvxVector, _: i32, _: i32, _: HvxVectorPair, _: HvxVector) -> ();
+    #[link_name = "llvm.hexagon.V6.vscattermw.128B"]
+    fn vscattermw(_: i32, _: i32, _: HvxVector, _: HvxVector) -> ();
+    #[link_name = "llvm.hexagon.V6.vscattermw.add.128B"]
+    fn vscattermw_add(_: i32, _: i32, _: HvxVector, _: HvxVector) -> ();
+    #[link_name = "llvm.hexagon.V6.vscattermwq.128B"]
+    fn vscattermwq(_: HvxVector, _: i32, _: i32, _: HvxVector, _: HvxVector) -> ();
+    #[link_name = "llvm.hexagon.V6.vsh.128B"]
+    fn vsh(_: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vshufeh.128B"]
+    fn vshufeh(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vshuffb.128B"]
+    fn vshuffb(_: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vshuffeb.128B"]
+    fn vshuffeb(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vshuffh.128B"]
+    fn vshuffh(_: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vshuffob.128B"]
+    fn vshuffob(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vshuffvdd.128B"]
+    fn vshuffvdd(_: HvxVector, _: HvxVector, _: i32) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vshufoeb.128B"]
+    fn vshufoeb(_: HvxVector, _: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vshufoeh.128B"]
+    fn vshufoeh(_: HvxVector, _: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vshufoh.128B"]
+    fn vshufoh(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vsub.hf.128B"]
+    fn vsub_hf(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vsub.hf.hf.128B"]
+    fn vsub_hf_hf(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vsub.qf16.128B"]
+    fn vsub_qf16(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vsub.qf16.mix.128B"]
+    fn vsub_qf16_mix(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vsub.qf32.128B"]
+    fn vsub_qf32(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vsub.qf32.mix.128B"]
+    fn vsub_qf32_mix(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vsub.sf.128B"]
+    fn vsub_sf(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vsub.sf.hf.128B"]
+    fn vsub_sf_hf(_: HvxVector, _: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vsub.sf.sf.128B"]
+    fn vsub_sf_sf(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vsubb.128B"]
+    fn vsubb(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vsubb.dv.128B"]
+    fn vsubb_dv(_: HvxVectorPair, _: HvxVectorPair) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vsubbnq.128B"]
+    fn vsubbnq(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vsubbq.128B"]
+    fn vsubbq(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vsubbsat.128B"]
+    fn vsubbsat(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vsubbsat.dv.128B"]
+    fn vsubbsat_dv(_: HvxVectorPair, _: HvxVectorPair) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vsubh.128B"]
+    fn vsubh(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vsubh.dv.128B"]
+    fn vsubh_dv(_: HvxVectorPair, _: HvxVectorPair) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vsubhnq.128B"]
+    fn vsubhnq(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vsubhq.128B"]
+    fn vsubhq(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vsubhsat.128B"]
+    fn vsubhsat(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vsubhsat.dv.128B"]
+    fn vsubhsat_dv(_: HvxVectorPair, _: HvxVectorPair) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vsubhw.128B"]
+    fn vsubhw(_: HvxVector, _: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vsububh.128B"]
+    fn vsububh(_: HvxVector, _: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vsububsat.128B"]
+    fn vsububsat(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vsububsat.dv.128B"]
+    fn vsububsat_dv(_: HvxVectorPair, _: HvxVectorPair) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vsubububb.sat.128B"]
+    fn vsubububb_sat(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vsubuhsat.128B"]
+    fn vsubuhsat(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vsubuhsat.dv.128B"]
+    fn vsubuhsat_dv(_: HvxVectorPair, _: HvxVectorPair) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vsubuhw.128B"]
+    fn vsubuhw(_: HvxVector, _: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vsubuwsat.128B"]
+    fn vsubuwsat(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vsubuwsat.dv.128B"]
+    fn vsubuwsat_dv(_: HvxVectorPair, _: HvxVectorPair) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vsubw.128B"]
+    fn vsubw(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vsubw.dv.128B"]
+    fn vsubw_dv(_: HvxVectorPair, _: HvxVectorPair) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vsubwnq.128B"]
+    fn vsubwnq(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vsubwq.128B"]
+    fn vsubwq(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vsubwsat.128B"]
+    fn vsubwsat(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vsubwsat.dv.128B"]
+    fn vsubwsat_dv(_: HvxVectorPair, _: HvxVectorPair) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vswap.128B"]
+    fn vswap(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vtmpyb.128B"]
+    fn vtmpyb(_: HvxVectorPair, _: i32) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vtmpyb.acc.128B"]
+    fn vtmpyb_acc(_: HvxVectorPair, _: HvxVectorPair, _: i32) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vtmpybus.128B"]
+    fn vtmpybus(_: HvxVectorPair, _: i32) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vtmpybus.acc.128B"]
+    fn vtmpybus_acc(_: HvxVectorPair, _: HvxVectorPair, _: i32) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vtmpyhb.128B"]
+    fn vtmpyhb(_: HvxVectorPair, _: i32) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vtmpyhb.acc.128B"]
+    fn vtmpyhb_acc(_: HvxVectorPair, _: HvxVectorPair, _: i32) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vunpackb.128B"]
+    fn vunpackb(_: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vunpackh.128B"]
+    fn vunpackh(_: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vunpackob.128B"]
+    fn vunpackob(_: HvxVectorPair, _: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vunpackoh.128B"]
+    fn vunpackoh(_: HvxVectorPair, _: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vunpackub.128B"]
+    fn vunpackub(_: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vunpackuh.128B"]
+    fn vunpackuh(_: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vxor.128B"]
+    fn vxor(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vzb.128B"]
+    fn vzb(_: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vzh.128B"]
+    fn vzh(_: HvxVector) -> HvxVectorPair;
+}
+
+/// `Rd32=vextract(Vu32,Rs32)`
+///
+/// Instruction Type: LD
+/// Execution Slots: SLOT0
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(extractw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_vextract_VR(vu: HvxVector, rs: i32) -> i32 {
+    extractw(vu, rs)
+}
+
+/// `Vd32=hi(Vss32)`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(hi))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_V_hi_W(vss: HvxVectorPair) -> HvxVector {
+    hi(vss)
+}
+
+/// `Vd32=lo(Vss32)`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(lo))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_V_lo_W(vss: HvxVectorPair) -> HvxVector {
+    lo(vss)
+}
+
+/// `Vd32=vsplat(Rt32)`
+///
+/// Instruction Type: CVI_VX_LATE
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(lvsplatw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_V_vsplat_R(rt: i32) -> HvxVector {
+    lvsplatw(rt)
+}
+
+/// `Vd32.uh=vabsdiff(Vu32.h,Vv32.h)`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vabsdiffh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vuh_vabsdiff_VhVh(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vabsdiffh(vu, vv)
+}
+
+/// `Vd32.ub=vabsdiff(Vu32.ub,Vv32.ub)`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vabsdiffub))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vub_vabsdiff_VubVub(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vabsdiffub(vu, vv)
+}
+
+/// `Vd32.uh=vabsdiff(Vu32.uh,Vv32.uh)`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vabsdiffuh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vuh_vabsdiff_VuhVuh(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vabsdiffuh(vu, vv)
+}
+
+/// `Vd32.uw=vabsdiff(Vu32.w,Vv32.w)`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vabsdiffw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vuw_vabsdiff_VwVw(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vabsdiffw(vu, vv)
+}
+
+/// `Vd32.h=vabs(Vu32.h)`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vabsh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_vabs_Vh(vu: HvxVector) -> HvxVector {
+    vabsh(vu)
+}
+
+/// `Vd32.h=vabs(Vu32.h):sat`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vabsh_sat))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_vabs_Vh_sat(vu: HvxVector) -> HvxVector {
+    vabsh_sat(vu)
+}
+
+/// `Vd32.w=vabs(Vu32.w)`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vabsw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vabs_Vw(vu: HvxVector) -> HvxVector {
+    vabsw(vu)
+}
+
+/// `Vd32.w=vabs(Vu32.w):sat`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vabsw_sat))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vabs_Vw_sat(vu: HvxVector) -> HvxVector {
+    vabsw_sat(vu)
+}
+
+/// `Vd32.b=vadd(Vu32.b,Vv32.b)`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vaddb))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vb_vadd_VbVb(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vaddb(vu, vv)
+}
+
+/// `Vdd32.b=vadd(Vuu32.b,Vvv32.b)`
+///
+/// Instruction Type: CVI_VA_DV
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vaddb_dv))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wb_vadd_WbWb(vuu: HvxVectorPair, vvv: HvxVectorPair) -> HvxVectorPair {
+    vaddb_dv(vuu, vvv)
+}
+
+/// `Vd32.h=vadd(Vu32.h,Vv32.h)`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vaddh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_vadd_VhVh(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vaddh(vu, vv)
+}
+
+/// `Vdd32.h=vadd(Vuu32.h,Vvv32.h)`
+///
+/// Instruction Type: CVI_VA_DV
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vaddh_dv))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wh_vadd_WhWh(vuu: HvxVectorPair, vvv: HvxVectorPair) -> HvxVectorPair {
+    vaddh_dv(vuu, vvv)
+}
+
+/// `Vd32.h=vadd(Vu32.h,Vv32.h):sat`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vaddhsat))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_vadd_VhVh_sat(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vaddhsat(vu, vv)
+}
+
+/// `Vdd32.h=vadd(Vuu32.h,Vvv32.h):sat`
+///
+/// Instruction Type: CVI_VA_DV
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vaddhsat_dv))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wh_vadd_WhWh_sat(vuu: HvxVectorPair, vvv: HvxVectorPair) -> HvxVectorPair {
+    vaddhsat_dv(vuu, vvv)
+}
+
+/// `Vdd32.w=vadd(Vu32.h,Vv32.h)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vaddhw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Ww_vadd_VhVh(vu: HvxVector, vv: HvxVector) -> HvxVectorPair {
+    vaddhw(vu, vv)
+}
+
+/// `Vdd32.h=vadd(Vu32.ub,Vv32.ub)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vaddubh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wh_vadd_VubVub(vu: HvxVector, vv: HvxVector) -> HvxVectorPair {
+    vaddubh(vu, vv)
+}
+
+/// `Vd32.ub=vadd(Vu32.ub,Vv32.ub):sat`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vaddubsat))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vub_vadd_VubVub_sat(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vaddubsat(vu, vv)
+}
+
+/// `Vdd32.ub=vadd(Vuu32.ub,Vvv32.ub):sat`
+///
+/// Instruction Type: CVI_VA_DV
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vaddubsat_dv))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wub_vadd_WubWub_sat(vuu: HvxVectorPair, vvv: HvxVectorPair) -> HvxVectorPair {
+    vaddubsat_dv(vuu, vvv)
+}
+
+/// `Vd32.uh=vadd(Vu32.uh,Vv32.uh):sat`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vadduhsat))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vuh_vadd_VuhVuh_sat(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vadduhsat(vu, vv)
+}
+
+/// `Vdd32.uh=vadd(Vuu32.uh,Vvv32.uh):sat`
+///
+/// Instruction Type: CVI_VA_DV
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vadduhsat_dv))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wuh_vadd_WuhWuh_sat(vuu: HvxVectorPair, vvv: HvxVectorPair) -> HvxVectorPair {
+    vadduhsat_dv(vuu, vvv)
+}
+
+/// `Vdd32.w=vadd(Vu32.uh,Vv32.uh)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vadduhw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Ww_vadd_VuhVuh(vu: HvxVector, vv: HvxVector) -> HvxVectorPair {
+    vadduhw(vu, vv)
+}
+
+/// `Vd32.w=vadd(Vu32.w,Vv32.w)`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vaddw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vadd_VwVw(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    simd_add(vu, vv)
+}
+
+/// `Vdd32.w=vadd(Vuu32.w,Vvv32.w)`
+///
+/// Instruction Type: CVI_VA_DV
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vaddw_dv))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Ww_vadd_WwWw(vuu: HvxVectorPair, vvv: HvxVectorPair) -> HvxVectorPair {
+    vaddw_dv(vuu, vvv)
+}
+
+/// `Vd32.w=vadd(Vu32.w,Vv32.w):sat`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vaddwsat))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vadd_VwVw_sat(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vaddwsat(vu, vv)
+}
+
+/// `Vdd32.w=vadd(Vuu32.w,Vvv32.w):sat`
+///
+/// Instruction Type: CVI_VA_DV
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vaddwsat_dv))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Ww_vadd_WwWw_sat(vuu: HvxVectorPair, vvv: HvxVectorPair) -> HvxVectorPair {
+    vaddwsat_dv(vuu, vvv)
+}
+
+/// `Vd32=valign(Vu32,Vv32,Rt8)`
+///
+/// Instruction Type: CVI_VP
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(valignb))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_V_valign_VVR(vu: HvxVector, vv: HvxVector, rt: i32) -> HvxVector {
+    valignb(vu, vv, rt)
+}
+
+/// `Vd32=valign(Vu32,Vv32,#u3)`
+///
+/// Instruction Type: CVI_VP
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(valignbi))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_V_valign_VVI(vu: HvxVector, vv: HvxVector, iu3: i32) -> HvxVector {
+    valignbi(vu, vv, iu3)
+}
+
+/// `Vd32=vand(Vu32,Vv32)`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vand))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_V_vand_VV(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    simd_and(vu, vv)
+}
+
+/// `Vd32.h=vasl(Vu32.h,Rt32)`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vaslh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_vasl_VhR(vu: HvxVector, rt: i32) -> HvxVector {
+    vaslh(vu, rt)
+}
+
+/// `Vd32.h=vasl(Vu32.h,Vv32.h)`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vaslhv))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_vasl_VhVh(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vaslhv(vu, vv)
+}
+
+/// `Vd32.w=vasl(Vu32.w,Rt32)`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vaslw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vasl_VwR(vu: HvxVector, rt: i32) -> HvxVector {
+    vaslw(vu, rt)
+}
+
+/// `Vx32.w+=vasl(Vu32.w,Rt32)`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vaslw_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vaslacc_VwVwR(vx: HvxVector, vu: HvxVector, rt: i32) -> HvxVector {
+    vaslw_acc(vx, vu, rt)
+}
+
+/// `Vd32.w=vasl(Vu32.w,Vv32.w)`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vaslwv))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vasl_VwVw(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vaslwv(vu, vv)
+}
+
+/// `Vd32.h=vasr(Vu32.h,Rt32)`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vasrh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_vasr_VhR(vu: HvxVector, rt: i32) -> HvxVector {
+    vasrh(vu, rt)
+}
+
+/// `Vd32.b=vasr(Vu32.h,Vv32.h,Rt8):rnd:sat`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vasrhbrndsat))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vb_vasr_VhVhR_rnd_sat(vu: HvxVector, vv: HvxVector, rt: i32) -> HvxVector {
+    vasrhbrndsat(vu, vv, rt)
+}
+
+/// `Vd32.ub=vasr(Vu32.h,Vv32.h,Rt8):rnd:sat`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vasrhubrndsat))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vub_vasr_VhVhR_rnd_sat(vu: HvxVector, vv: HvxVector, rt: i32) -> HvxVector {
+    vasrhubrndsat(vu, vv, rt)
+}
+
+/// `Vd32.ub=vasr(Vu32.h,Vv32.h,Rt8):sat`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vasrhubsat))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vub_vasr_VhVhR_sat(vu: HvxVector, vv: HvxVector, rt: i32) -> HvxVector {
+    vasrhubsat(vu, vv, rt)
+}
+
+/// `Vd32.h=vasr(Vu32.h,Vv32.h)`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vasrhv))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_vasr_VhVh(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vasrhv(vu, vv)
+}
+
+/// `Vd32.w=vasr(Vu32.w,Rt32)`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vasrw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vasr_VwR(vu: HvxVector, rt: i32) -> HvxVector {
+    vasrw(vu, rt)
+}
+
+/// `Vx32.w+=vasr(Vu32.w,Rt32)`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vasrw_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vasracc_VwVwR(vx: HvxVector, vu: HvxVector, rt: i32) -> HvxVector {
+    vasrw_acc(vx, vu, rt)
+}
+
+/// `Vd32.h=vasr(Vu32.w,Vv32.w,Rt8)`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vasrwh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_vasr_VwVwR(vu: HvxVector, vv: HvxVector, rt: i32) -> HvxVector {
+    vasrwh(vu, vv, rt)
+}
+
+/// `Vd32.h=vasr(Vu32.w,Vv32.w,Rt8):rnd:sat`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vasrwhrndsat))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_vasr_VwVwR_rnd_sat(vu: HvxVector, vv: HvxVector, rt: i32) -> HvxVector {
+    vasrwhrndsat(vu, vv, rt)
+}
+
+/// `Vd32.h=vasr(Vu32.w,Vv32.w,Rt8):sat`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vasrwhsat))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_vasr_VwVwR_sat(vu: HvxVector, vv: HvxVector, rt: i32) -> HvxVector {
+    vasrwhsat(vu, vv, rt)
+}
+
+/// `Vd32.uh=vasr(Vu32.w,Vv32.w,Rt8):sat`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vasrwuhsat))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vuh_vasr_VwVwR_sat(vu: HvxVector, vv: HvxVector, rt: i32) -> HvxVector {
+    vasrwuhsat(vu, vv, rt)
+}
+
+/// `Vd32.w=vasr(Vu32.w,Vv32.w)`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vasrwv))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vasr_VwVw(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vasrwv(vu, vv)
+}
+
+/// `Vd32=Vu32`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vassign))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_V_equals_V(vu: HvxVector) -> HvxVector {
+    vassign(vu)
+}
+
+/// `Vdd32=Vuu32`
+///
+/// Instruction Type: CVI_VA_DV
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vassignp))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_W_equals_W(vuu: HvxVectorPair) -> HvxVectorPair {
+    vassignp(vuu)
+}
+
+/// `Vd32.h=vavg(Vu32.h,Vv32.h)`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vavgh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_vavg_VhVh(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vavgh(vu, vv)
+}
+
+/// `Vd32.h=vavg(Vu32.h,Vv32.h):rnd`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vavghrnd))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_vavg_VhVh_rnd(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vavghrnd(vu, vv)
+}
+
+/// `Vd32.ub=vavg(Vu32.ub,Vv32.ub)`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vavgub))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vub_vavg_VubVub(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vavgub(vu, vv)
+}
+
+/// `Vd32.ub=vavg(Vu32.ub,Vv32.ub):rnd`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vavgubrnd))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vub_vavg_VubVub_rnd(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vavgubrnd(vu, vv)
+}
+
+/// `Vd32.uh=vavg(Vu32.uh,Vv32.uh)`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vavguh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vuh_vavg_VuhVuh(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vavguh(vu, vv)
+}
+
+/// `Vd32.uh=vavg(Vu32.uh,Vv32.uh):rnd`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vavguhrnd))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vuh_vavg_VuhVuh_rnd(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vavguhrnd(vu, vv)
+}
+
+/// `Vd32.w=vavg(Vu32.w,Vv32.w)`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vavgw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vavg_VwVw(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vavgw(vu, vv)
+}
+
+/// `Vd32.w=vavg(Vu32.w,Vv32.w):rnd`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vavgwrnd))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vavg_VwVw_rnd(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vavgwrnd(vu, vv)
+}
+
+/// `Vd32.uh=vcl0(Vu32.uh)`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vcl0h))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vuh_vcl0_Vuh(vu: HvxVector) -> HvxVector {
+    vcl0h(vu)
+}
+
+/// `Vd32.uw=vcl0(Vu32.uw)`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vcl0w))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vuw_vcl0_Vuw(vu: HvxVector) -> HvxVector {
+    vcl0w(vu)
+}
+
+/// `Vdd32=vcombine(Vu32,Vv32)`
+///
+/// Instruction Type: CVI_VA_DV
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vcombine))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_W_vcombine_VV(vu: HvxVector, vv: HvxVector) -> HvxVectorPair {
+    vcombine(vu, vv)
+}
+
+/// `Vd32=#0`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vd0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_V_vzero() -> HvxVector {
+    vd0()
+}
+
+/// `Vd32.b=vdeal(Vu32.b)`
+///
+/// Instruction Type: CVI_VP
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vdealb))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vb_vdeal_Vb(vu: HvxVector) -> HvxVector {
+    vdealb(vu)
+}
+
+/// `Vd32.b=vdeale(Vu32.b,Vv32.b)`
+///
+/// Instruction Type: CVI_VP
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vdealb4w))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vb_vdeale_VbVb(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vdealb4w(vu, vv)
+}
+
+/// `Vd32.h=vdeal(Vu32.h)`
+///
+/// Instruction Type: CVI_VP
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vdealh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_vdeal_Vh(vu: HvxVector) -> HvxVector {
+    vdealh(vu)
+}
+
+/// `Vdd32=vdeal(Vu32,Vv32,Rt8)`
+///
+/// Instruction Type: CVI_VP_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vdealvdd))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_W_vdeal_VVR(vu: HvxVector, vv: HvxVector, rt: i32) -> HvxVectorPair {
+    vdealvdd(vu, vv, rt)
+}
+
+/// `Vd32=vdelta(Vu32,Vv32)`
+///
+/// Instruction Type: CVI_VP
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vdelta))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_V_vdelta_VV(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vdelta(vu, vv)
+}
+
+/// `Vd32.h=vdmpy(Vu32.ub,Rt32.b)`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vdmpybus))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_vdmpy_VubRb(vu: HvxVector, rt: i32) -> HvxVector {
+    vdmpybus(vu, rt)
+}
+
+/// `Vx32.h+=vdmpy(Vu32.ub,Rt32.b)`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vdmpybus_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_vdmpyacc_VhVubRb(vx: HvxVector, vu: HvxVector, rt: i32) -> HvxVector {
+    vdmpybus_acc(vx, vu, rt)
+}
+
+/// `Vdd32.h=vdmpy(Vuu32.ub,Rt32.b)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vdmpybus_dv))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wh_vdmpy_WubRb(vuu: HvxVectorPair, rt: i32) -> HvxVectorPair {
+    vdmpybus_dv(vuu, rt)
+}
+
+/// `Vxx32.h+=vdmpy(Vuu32.ub,Rt32.b)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vdmpybus_dv_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wh_vdmpyacc_WhWubRb(
+    vxx: HvxVectorPair,
+    vuu: HvxVectorPair,
+    rt: i32,
+) -> HvxVectorPair {
+    vdmpybus_dv_acc(vxx, vuu, rt)
+}
+
+/// `Vd32.w=vdmpy(Vu32.h,Rt32.b)`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vdmpyhb))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vdmpy_VhRb(vu: HvxVector, rt: i32) -> HvxVector {
+    vdmpyhb(vu, rt)
+}
+
+/// `Vx32.w+=vdmpy(Vu32.h,Rt32.b)`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vdmpyhb_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vdmpyacc_VwVhRb(vx: HvxVector, vu: HvxVector, rt: i32) -> HvxVector {
+    vdmpyhb_acc(vx, vu, rt)
+}
+
+/// `Vdd32.w=vdmpy(Vuu32.h,Rt32.b)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vdmpyhb_dv))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Ww_vdmpy_WhRb(vuu: HvxVectorPair, rt: i32) -> HvxVectorPair {
+    vdmpyhb_dv(vuu, rt)
+}
+
+/// `Vxx32.w+=vdmpy(Vuu32.h,Rt32.b)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vdmpyhb_dv_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Ww_vdmpyacc_WwWhRb(
+    vxx: HvxVectorPair,
+    vuu: HvxVectorPair,
+    rt: i32,
+) -> HvxVectorPair {
+    vdmpyhb_dv_acc(vxx, vuu, rt)
+}
+
+/// `Vd32.w=vdmpy(Vuu32.h,Rt32.h):sat`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vdmpyhisat))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vdmpy_WhRh_sat(vuu: HvxVectorPair, rt: i32) -> HvxVector {
+    vdmpyhisat(vuu, rt)
+}
+
+/// `Vx32.w+=vdmpy(Vuu32.h,Rt32.h):sat`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vdmpyhisat_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vdmpyacc_VwWhRh_sat(vx: HvxVector, vuu: HvxVectorPair, rt: i32) -> HvxVector {
+    vdmpyhisat_acc(vx, vuu, rt)
+}
+
+/// `Vd32.w=vdmpy(Vu32.h,Rt32.h):sat`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vdmpyhsat))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vdmpy_VhRh_sat(vu: HvxVector, rt: i32) -> HvxVector {
+    vdmpyhsat(vu, rt)
+}
+
+/// `Vx32.w+=vdmpy(Vu32.h,Rt32.h):sat`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vdmpyhsat_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vdmpyacc_VwVhRh_sat(vx: HvxVector, vu: HvxVector, rt: i32) -> HvxVector {
+    vdmpyhsat_acc(vx, vu, rt)
+}
+
+/// `Vd32.w=vdmpy(Vuu32.h,Rt32.uh,#1):sat`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vdmpyhsuisat))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vdmpy_WhRuh_sat(vuu: HvxVectorPair, rt: i32) -> HvxVector {
+    vdmpyhsuisat(vuu, rt)
+}
+
+/// `Vx32.w+=vdmpy(Vuu32.h,Rt32.uh,#1):sat`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vdmpyhsuisat_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vdmpyacc_VwWhRuh_sat(vx: HvxVector, vuu: HvxVectorPair, rt: i32) -> HvxVector {
+    vdmpyhsuisat_acc(vx, vuu, rt)
+}
+
+/// `Vd32.w=vdmpy(Vu32.h,Rt32.uh):sat`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vdmpyhsusat))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vdmpy_VhRuh_sat(vu: HvxVector, rt: i32) -> HvxVector {
+    vdmpyhsusat(vu, rt)
+}
+
+/// `Vx32.w+=vdmpy(Vu32.h,Rt32.uh):sat`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vdmpyhsusat_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vdmpyacc_VwVhRuh_sat(vx: HvxVector, vu: HvxVector, rt: i32) -> HvxVector {
+    vdmpyhsusat_acc(vx, vu, rt)
+}
+
+/// `Vd32.w=vdmpy(Vu32.h,Vv32.h):sat`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vdmpyhvsat))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vdmpy_VhVh_sat(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vdmpyhvsat(vu, vv)
+}
+
+/// `Vx32.w+=vdmpy(Vu32.h,Vv32.h):sat`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vdmpyhvsat_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vdmpyacc_VwVhVh_sat(vx: HvxVector, vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vdmpyhvsat_acc(vx, vu, vv)
+}
+
+/// `Vdd32.uw=vdsad(Vuu32.uh,Rt32.uh)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vdsaduh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wuw_vdsad_WuhRuh(vuu: HvxVectorPair, rt: i32) -> HvxVectorPair {
+    vdsaduh(vuu, rt)
+}
+
+/// `Vxx32.uw+=vdsad(Vuu32.uh,Rt32.uh)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vdsaduh_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wuw_vdsadacc_WuwWuhRuh(
+    vxx: HvxVectorPair,
+    vuu: HvxVectorPair,
+    rt: i32,
+) -> HvxVectorPair {
+    vdsaduh_acc(vxx, vuu, rt)
+}
+
+/// `Vx32.w=vinsert(Rt32)`
+///
+/// Instruction Type: CVI_VX_LATE
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vinsertwr))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vinsert_VwR(vx: HvxVector, rt: i32) -> HvxVector {
+    vinsertwr(vx, rt)
+}
+
+/// `Vd32=vlalign(Vu32,Vv32,Rt8)`
+///
+/// Instruction Type: CVI_VP
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vlalignb))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_V_vlalign_VVR(vu: HvxVector, vv: HvxVector, rt: i32) -> HvxVector {
+    vlalignb(vu, vv, rt)
+}
+
+/// `Vd32=vlalign(Vu32,Vv32,#u3)`
+///
+/// Instruction Type: CVI_VP
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vlalignbi))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_V_vlalign_VVI(vu: HvxVector, vv: HvxVector, iu3: i32) -> HvxVector {
+    vlalignbi(vu, vv, iu3)
+}
+
+/// `Vd32.uh=vlsr(Vu32.uh,Rt32)`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vlsrh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vuh_vlsr_VuhR(vu: HvxVector, rt: i32) -> HvxVector {
+    vlsrh(vu, rt)
+}
+
+/// `Vd32.h=vlsr(Vu32.h,Vv32.h)`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vlsrhv))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_vlsr_VhVh(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vlsrhv(vu, vv)
+}
+
+/// `Vd32.uw=vlsr(Vu32.uw,Rt32)`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vlsrw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vuw_vlsr_VuwR(vu: HvxVector, rt: i32) -> HvxVector {
+    vlsrw(vu, rt)
+}
+
+/// `Vd32.w=vlsr(Vu32.w,Vv32.w)`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vlsrwv))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vlsr_VwVw(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vlsrwv(vu, vv)
+}
+
+/// `Vd32.b=vlut32(Vu32.b,Vv32.b,Rt8)`
+///
+/// Instruction Type: CVI_VP
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vlutvvb))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vb_vlut32_VbVbR(vu: HvxVector, vv: HvxVector, rt: i32) -> HvxVector {
+    vlutvvb(vu, vv, rt)
+}
+
+/// `Vx32.b|=vlut32(Vu32.b,Vv32.b,Rt8)`
+///
+/// Instruction Type: CVI_VP_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vlutvvb_oracc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vb_vlut32or_VbVbVbR(
+    vx: HvxVector,
+    vu: HvxVector,
+    vv: HvxVector,
+    rt: i32,
+) -> HvxVector {
+    vlutvvb_oracc(vx, vu, vv, rt)
+}
+
+/// `Vdd32.h=vlut16(Vu32.b,Vv32.h,Rt8)`
+///
+/// Instruction Type: CVI_VP_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vlutvwh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wh_vlut16_VbVhR(vu: HvxVector, vv: HvxVector, rt: i32) -> HvxVectorPair {
+    vlutvwh(vu, vv, rt)
+}
+
+/// `Vxx32.h|=vlut16(Vu32.b,Vv32.h,Rt8)`
+///
+/// Instruction Type: CVI_VP_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vlutvwh_oracc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wh_vlut16or_WhVbVhR(
+    vxx: HvxVectorPair,
+    vu: HvxVector,
+    vv: HvxVector,
+    rt: i32,
+) -> HvxVectorPair {
+    vlutvwh_oracc(vxx, vu, vv, rt)
+}
+
+/// `Vd32.h=vmax(Vu32.h,Vv32.h)`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmaxh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_vmax_VhVh(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vmaxh(vu, vv)
+}
+
+/// `Vd32.ub=vmax(Vu32.ub,Vv32.ub)`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmaxub))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vub_vmax_VubVub(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vmaxub(vu, vv)
+}
+
+/// `Vd32.uh=vmax(Vu32.uh,Vv32.uh)`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmaxuh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vuh_vmax_VuhVuh(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vmaxuh(vu, vv)
+}
+
+/// `Vd32.w=vmax(Vu32.w,Vv32.w)`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmaxw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vmax_VwVw(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vmaxw(vu, vv)
+}
+
+/// `Vd32.h=vmin(Vu32.h,Vv32.h)`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vminh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_vmin_VhVh(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vminh(vu, vv)
+}
+
+/// `Vd32.ub=vmin(Vu32.ub,Vv32.ub)`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vminub))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vub_vmin_VubVub(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vminub(vu, vv)
+}
+
+/// `Vd32.uh=vmin(Vu32.uh,Vv32.uh)`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vminuh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vuh_vmin_VuhVuh(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vminuh(vu, vv)
+}
+
+/// `Vd32.w=vmin(Vu32.w,Vv32.w)`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vminw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vmin_VwVw(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vminw(vu, vv)
+}
+
+/// `Vdd32.h=vmpa(Vuu32.ub,Rt32.b)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmpabus))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wh_vmpa_WubRb(vuu: HvxVectorPair, rt: i32) -> HvxVectorPair {
+    vmpabus(vuu, rt)
+}
+
+/// `Vxx32.h+=vmpa(Vuu32.ub,Rt32.b)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmpabus_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wh_vmpaacc_WhWubRb(
+    vxx: HvxVectorPair,
+    vuu: HvxVectorPair,
+    rt: i32,
+) -> HvxVectorPair {
+    vmpabus_acc(vxx, vuu, rt)
+}
+
+/// `Vdd32.h=vmpa(Vuu32.ub,Vvv32.b)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmpabusv))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wh_vmpa_WubWb(vuu: HvxVectorPair, vvv: HvxVectorPair) -> HvxVectorPair {
+    vmpabusv(vuu, vvv)
+}
+
+/// `Vdd32.h=vmpa(Vuu32.ub,Vvv32.ub)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmpabuuv))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wh_vmpa_WubWub(vuu: HvxVectorPair, vvv: HvxVectorPair) -> HvxVectorPair {
+    vmpabuuv(vuu, vvv)
+}
+
+/// `Vdd32.w=vmpa(Vuu32.h,Rt32.b)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmpahb))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Ww_vmpa_WhRb(vuu: HvxVectorPair, rt: i32) -> HvxVectorPair {
+    vmpahb(vuu, rt)
+}
+
+/// `Vxx32.w+=vmpa(Vuu32.h,Rt32.b)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmpahb_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Ww_vmpaacc_WwWhRb(
+    vxx: HvxVectorPair,
+    vuu: HvxVectorPair,
+    rt: i32,
+) -> HvxVectorPair {
+    vmpahb_acc(vxx, vuu, rt)
+}
+
+/// `Vdd32.h=vmpy(Vu32.ub,Rt32.b)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmpybus))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wh_vmpy_VubRb(vu: HvxVector, rt: i32) -> HvxVectorPair {
+    vmpybus(vu, rt)
+}
+
+/// `Vxx32.h+=vmpy(Vu32.ub,Rt32.b)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmpybus_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wh_vmpyacc_WhVubRb(vxx: HvxVectorPair, vu: HvxVector, rt: i32) -> HvxVectorPair {
+    vmpybus_acc(vxx, vu, rt)
+}
+
+/// `Vdd32.h=vmpy(Vu32.ub,Vv32.b)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmpybusv))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wh_vmpy_VubVb(vu: HvxVector, vv: HvxVector) -> HvxVectorPair {
+    vmpybusv(vu, vv)
+}
+
+/// `Vxx32.h+=vmpy(Vu32.ub,Vv32.b)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmpybusv_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wh_vmpyacc_WhVubVb(
+    vxx: HvxVectorPair,
+    vu: HvxVector,
+    vv: HvxVector,
+) -> HvxVectorPair {
+    vmpybusv_acc(vxx, vu, vv)
+}
+
+/// `Vdd32.h=vmpy(Vu32.b,Vv32.b)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmpybv))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wh_vmpy_VbVb(vu: HvxVector, vv: HvxVector) -> HvxVectorPair {
+    vmpybv(vu, vv)
+}
+
+/// `Vxx32.h+=vmpy(Vu32.b,Vv32.b)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmpybv_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wh_vmpyacc_WhVbVb(
+    vxx: HvxVectorPair,
+    vu: HvxVector,
+    vv: HvxVector,
+) -> HvxVectorPair {
+    vmpybv_acc(vxx, vu, vv)
+}
+
+/// `Vd32.w=vmpye(Vu32.w,Vv32.uh)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmpyewuh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vmpye_VwVuh(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vmpyewuh(vu, vv)
+}
+
+/// `Vdd32.w=vmpy(Vu32.h,Rt32.h)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmpyh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Ww_vmpy_VhRh(vu: HvxVector, rt: i32) -> HvxVectorPair {
+    vmpyh(vu, rt)
+}
+
+/// `Vxx32.w+=vmpy(Vu32.h,Rt32.h):sat`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmpyhsat_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Ww_vmpyacc_WwVhRh_sat(
+    vxx: HvxVectorPair,
+    vu: HvxVector,
+    rt: i32,
+) -> HvxVectorPair {
+    vmpyhsat_acc(vxx, vu, rt)
+}
+
+/// `Vd32.h=vmpy(Vu32.h,Rt32.h):<<1:rnd:sat`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmpyhsrs))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_vmpy_VhRh_s1_rnd_sat(vu: HvxVector, rt: i32) -> HvxVector {
+    vmpyhsrs(vu, rt)
+}
+
+/// `Vd32.h=vmpy(Vu32.h,Rt32.h):<<1:sat`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmpyhss))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_vmpy_VhRh_s1_sat(vu: HvxVector, rt: i32) -> HvxVector {
+    vmpyhss(vu, rt)
+}
+
+/// `Vdd32.w=vmpy(Vu32.h,Vv32.uh)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmpyhus))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Ww_vmpy_VhVuh(vu: HvxVector, vv: HvxVector) -> HvxVectorPair {
+    vmpyhus(vu, vv)
+}
+
+/// `Vxx32.w+=vmpy(Vu32.h,Vv32.uh)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmpyhus_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Ww_vmpyacc_WwVhVuh(
+    vxx: HvxVectorPair,
+    vu: HvxVector,
+    vv: HvxVector,
+) -> HvxVectorPair {
+    vmpyhus_acc(vxx, vu, vv)
+}
+
+/// `Vdd32.w=vmpy(Vu32.h,Vv32.h)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmpyhv))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Ww_vmpy_VhVh(vu: HvxVector, vv: HvxVector) -> HvxVectorPair {
+    vmpyhv(vu, vv)
+}
+
+/// `Vxx32.w+=vmpy(Vu32.h,Vv32.h)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmpyhv_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Ww_vmpyacc_WwVhVh(
+    vxx: HvxVectorPair,
+    vu: HvxVector,
+    vv: HvxVector,
+) -> HvxVectorPair {
+    vmpyhv_acc(vxx, vu, vv)
+}
+
+/// `Vd32.h=vmpy(Vu32.h,Vv32.h):<<1:rnd:sat`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmpyhvsrs))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_vmpy_VhVh_s1_rnd_sat(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vmpyhvsrs(vu, vv)
+}
+
+/// `Vd32.w=vmpyieo(Vu32.h,Vv32.h)`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmpyieoh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vmpyieo_VhVh(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vmpyieoh(vu, vv)
+}
+
+/// `Vx32.w+=vmpyie(Vu32.w,Vv32.h)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmpyiewh_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vmpyieacc_VwVwVh(vx: HvxVector, vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vmpyiewh_acc(vx, vu, vv)
+}
+
+/// `Vd32.w=vmpyie(Vu32.w,Vv32.uh)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmpyiewuh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vmpyie_VwVuh(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vmpyiewuh(vu, vv)
+}
+
+/// `Vx32.w+=vmpyie(Vu32.w,Vv32.uh)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmpyiewuh_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vmpyieacc_VwVwVuh(vx: HvxVector, vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vmpyiewuh_acc(vx, vu, vv)
+}
+
+/// `Vd32.h=vmpyi(Vu32.h,Vv32.h)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmpyih))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_vmpyi_VhVh(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vmpyih(vu, vv)
+}
+
+/// `Vx32.h+=vmpyi(Vu32.h,Vv32.h)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmpyih_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_vmpyiacc_VhVhVh(vx: HvxVector, vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vmpyih_acc(vx, vu, vv)
+}
+
+/// `Vd32.h=vmpyi(Vu32.h,Rt32.b)`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmpyihb))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_vmpyi_VhRb(vu: HvxVector, rt: i32) -> HvxVector {
+    vmpyihb(vu, rt)
+}
+
+/// `Vx32.h+=vmpyi(Vu32.h,Rt32.b)`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmpyihb_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_vmpyiacc_VhVhRb(vx: HvxVector, vu: HvxVector, rt: i32) -> HvxVector {
+    vmpyihb_acc(vx, vu, rt)
+}
+
+/// `Vd32.w=vmpyio(Vu32.w,Vv32.h)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmpyiowh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vmpyio_VwVh(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vmpyiowh(vu, vv)
+}
+
+/// `Vd32.w=vmpyi(Vu32.w,Rt32.b)`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmpyiwb))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vmpyi_VwRb(vu: HvxVector, rt: i32) -> HvxVector {
+    vmpyiwb(vu, rt)
+}
+
+/// `Vx32.w+=vmpyi(Vu32.w,Rt32.b)`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmpyiwb_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vmpyiacc_VwVwRb(vx: HvxVector, vu: HvxVector, rt: i32) -> HvxVector {
+    vmpyiwb_acc(vx, vu, rt)
+}
+
+/// `Vd32.w=vmpyi(Vu32.w,Rt32.h)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmpyiwh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vmpyi_VwRh(vu: HvxVector, rt: i32) -> HvxVector {
+    vmpyiwh(vu, rt)
+}
+
+/// `Vx32.w+=vmpyi(Vu32.w,Rt32.h)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmpyiwh_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vmpyiacc_VwVwRh(vx: HvxVector, vu: HvxVector, rt: i32) -> HvxVector {
+    vmpyiwh_acc(vx, vu, rt)
+}
+
+/// `Vd32.w=vmpyo(Vu32.w,Vv32.h):<<1:sat`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmpyowh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vmpyo_VwVh_s1_sat(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vmpyowh(vu, vv)
+}
+
+/// `Vd32.w=vmpyo(Vu32.w,Vv32.h):<<1:rnd:sat`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmpyowh_rnd))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vmpyo_VwVh_s1_rnd_sat(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vmpyowh_rnd(vu, vv)
+}
+
+/// `Vx32.w+=vmpyo(Vu32.w,Vv32.h):<<1:rnd:sat:shift`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmpyowh_rnd_sacc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vmpyoacc_VwVwVh_s1_rnd_sat_shift(
+    vx: HvxVector,
+    vu: HvxVector,
+    vv: HvxVector,
+) -> HvxVector {
+    vmpyowh_rnd_sacc(vx, vu, vv)
+}
+
+/// `Vx32.w+=vmpyo(Vu32.w,Vv32.h):<<1:sat:shift`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmpyowh_sacc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vmpyoacc_VwVwVh_s1_sat_shift(
+    vx: HvxVector,
+    vu: HvxVector,
+    vv: HvxVector,
+) -> HvxVector {
+    vmpyowh_sacc(vx, vu, vv)
+}
+
+/// `Vdd32.uh=vmpy(Vu32.ub,Rt32.ub)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmpyub))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wuh_vmpy_VubRub(vu: HvxVector, rt: i32) -> HvxVectorPair {
+    vmpyub(vu, rt)
+}
+
+/// `Vxx32.uh+=vmpy(Vu32.ub,Rt32.ub)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmpyub_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wuh_vmpyacc_WuhVubRub(
+    vxx: HvxVectorPair,
+    vu: HvxVector,
+    rt: i32,
+) -> HvxVectorPair {
+    vmpyub_acc(vxx, vu, rt)
+}
+
+/// `Vdd32.uh=vmpy(Vu32.ub,Vv32.ub)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmpyubv))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wuh_vmpy_VubVub(vu: HvxVector, vv: HvxVector) -> HvxVectorPair {
+    vmpyubv(vu, vv)
+}
+
+/// `Vxx32.uh+=vmpy(Vu32.ub,Vv32.ub)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmpyubv_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wuh_vmpyacc_WuhVubVub(
+    vxx: HvxVectorPair,
+    vu: HvxVector,
+    vv: HvxVector,
+) -> HvxVectorPair {
+    vmpyubv_acc(vxx, vu, vv)
+}
+
+/// `Vdd32.uw=vmpy(Vu32.uh,Rt32.uh)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmpyuh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wuw_vmpy_VuhRuh(vu: HvxVector, rt: i32) -> HvxVectorPair {
+    vmpyuh(vu, rt)
+}
+
+/// `Vxx32.uw+=vmpy(Vu32.uh,Rt32.uh)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmpyuh_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wuw_vmpyacc_WuwVuhRuh(
+    vxx: HvxVectorPair,
+    vu: HvxVector,
+    rt: i32,
+) -> HvxVectorPair {
+    vmpyuh_acc(vxx, vu, rt)
+}
+
+/// `Vdd32.uw=vmpy(Vu32.uh,Vv32.uh)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmpyuhv))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wuw_vmpy_VuhVuh(vu: HvxVector, vv: HvxVector) -> HvxVectorPair {
+    vmpyuhv(vu, vv)
+}
+
+/// `Vxx32.uw+=vmpy(Vu32.uh,Vv32.uh)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmpyuhv_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wuw_vmpyacc_WuwVuhVuh(
+    vxx: HvxVectorPair,
+    vu: HvxVector,
+    vv: HvxVector,
+) -> HvxVectorPair {
+    vmpyuhv_acc(vxx, vu, vv)
+}
+
+/// `Vd32.h=vnavg(Vu32.h,Vv32.h)`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vnavgh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_vnavg_VhVh(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vnavgh(vu, vv)
+}
+
+/// `Vd32.b=vnavg(Vu32.ub,Vv32.ub)`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vnavgub))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vb_vnavg_VubVub(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vnavgub(vu, vv)
+}
+
+/// `Vd32.w=vnavg(Vu32.w,Vv32.w)`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vnavgw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vnavg_VwVw(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vnavgw(vu, vv)
+}
+
+/// `Vd32.h=vnormamt(Vu32.h)`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vnormamth))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_vnormamt_Vh(vu: HvxVector) -> HvxVector {
+    vnormamth(vu)
+}
+
+/// `Vd32.w=vnormamt(Vu32.w)`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vnormamtw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vnormamt_Vw(vu: HvxVector) -> HvxVector {
+    vnormamtw(vu)
+}
+
+/// `Vd32=vnot(Vu32)`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vnot))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_V_vnot_V(vu: HvxVector) -> HvxVector {
+    vnot(vu)
+}
+
+/// `Vd32=vor(Vu32,Vv32)`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vor))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_V_vor_VV(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    simd_or(vu, vv)
+}
+
+/// `Vd32.b=vpacke(Vu32.h,Vv32.h)`
+///
+/// Instruction Type: CVI_VP
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vpackeb))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vb_vpacke_VhVh(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vpackeb(vu, vv)
+}
+
+/// `Vd32.h=vpacke(Vu32.w,Vv32.w)`
+///
+/// Instruction Type: CVI_VP
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vpackeh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_vpacke_VwVw(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vpackeh(vu, vv)
+}
+
+/// `Vd32.b=vpack(Vu32.h,Vv32.h):sat`
+///
+/// Instruction Type: CVI_VP
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vpackhb_sat))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vb_vpack_VhVh_sat(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vpackhb_sat(vu, vv)
+}
+
+/// `Vd32.ub=vpack(Vu32.h,Vv32.h):sat`
+///
+/// Instruction Type: CVI_VP
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vpackhub_sat))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vub_vpack_VhVh_sat(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vpackhub_sat(vu, vv)
+}
+
+/// `Vd32.b=vpacko(Vu32.h,Vv32.h)`
+///
+/// Instruction Type: CVI_VP
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vpackob))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vb_vpacko_VhVh(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vpackob(vu, vv)
+}
+
+/// `Vd32.h=vpacko(Vu32.w,Vv32.w)`
+///
+/// Instruction Type: CVI_VP
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vpackoh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_vpacko_VwVw(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vpackoh(vu, vv)
+}
+
+/// `Vd32.h=vpack(Vu32.w,Vv32.w):sat`
+///
+/// Instruction Type: CVI_VP
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vpackwh_sat))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_vpack_VwVw_sat(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vpackwh_sat(vu, vv)
+}
+
+/// `Vd32.uh=vpack(Vu32.w,Vv32.w):sat`
+///
+/// Instruction Type: CVI_VP
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vpackwuh_sat))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vuh_vpack_VwVw_sat(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vpackwuh_sat(vu, vv)
+}
+
+/// `Vd32.h=vpopcount(Vu32.h)`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vpopcounth))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_vpopcount_Vh(vu: HvxVector) -> HvxVector {
+    vpopcounth(vu)
+}
+
+/// `Vd32=vrdelta(Vu32,Vv32)`
+///
+/// Instruction Type: CVI_VP
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vrdelta))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_V_vrdelta_VV(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vrdelta(vu, vv)
+}
+
+/// `Vd32.w=vrmpy(Vu32.ub,Rt32.b)`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vrmpybus))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vrmpy_VubRb(vu: HvxVector, rt: i32) -> HvxVector {
+    vrmpybus(vu, rt)
+}
+
+/// `Vx32.w+=vrmpy(Vu32.ub,Rt32.b)`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vrmpybus_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vrmpyacc_VwVubRb(vx: HvxVector, vu: HvxVector, rt: i32) -> HvxVector {
+    vrmpybus_acc(vx, vu, rt)
+}
+
+/// `Vdd32.w=vrmpy(Vuu32.ub,Rt32.b,#u1)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vrmpybusi))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Ww_vrmpy_WubRbI(vuu: HvxVectorPair, rt: i32, iu1: i32) -> HvxVectorPair {
+    vrmpybusi(vuu, rt, iu1)
+}
+
+/// `Vxx32.w+=vrmpy(Vuu32.ub,Rt32.b,#u1)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vrmpybusi_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Ww_vrmpyacc_WwWubRbI(
+    vxx: HvxVectorPair,
+    vuu: HvxVectorPair,
+    rt: i32,
+    iu1: i32,
+) -> HvxVectorPair {
+    vrmpybusi_acc(vxx, vuu, rt, iu1)
+}
+
+/// `Vd32.w=vrmpy(Vu32.ub,Vv32.b)`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vrmpybusv))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vrmpy_VubVb(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vrmpybusv(vu, vv)
+}
+
+/// `Vx32.w+=vrmpy(Vu32.ub,Vv32.b)`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vrmpybusv_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vrmpyacc_VwVubVb(vx: HvxVector, vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vrmpybusv_acc(vx, vu, vv)
+}
+
+/// `Vd32.w=vrmpy(Vu32.b,Vv32.b)`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vrmpybv))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vrmpy_VbVb(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vrmpybv(vu, vv)
+}
+
+/// `Vx32.w+=vrmpy(Vu32.b,Vv32.b)`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vrmpybv_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vrmpyacc_VwVbVb(vx: HvxVector, vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vrmpybv_acc(vx, vu, vv)
+}
+
+/// `Vd32.uw=vrmpy(Vu32.ub,Rt32.ub)`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vrmpyub))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vuw_vrmpy_VubRub(vu: HvxVector, rt: i32) -> HvxVector {
+    vrmpyub(vu, rt)
+}
+
+/// `Vx32.uw+=vrmpy(Vu32.ub,Rt32.ub)`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vrmpyub_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vuw_vrmpyacc_VuwVubRub(vx: HvxVector, vu: HvxVector, rt: i32) -> HvxVector {
+    vrmpyub_acc(vx, vu, rt)
+}
+
+/// `Vdd32.uw=vrmpy(Vuu32.ub,Rt32.ub,#u1)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vrmpyubi))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wuw_vrmpy_WubRubI(vuu: HvxVectorPair, rt: i32, iu1: i32) -> HvxVectorPair {
+    vrmpyubi(vuu, rt, iu1)
+}
+
+/// `Vxx32.uw+=vrmpy(Vuu32.ub,Rt32.ub,#u1)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vrmpyubi_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wuw_vrmpyacc_WuwWubRubI(
+    vxx: HvxVectorPair,
+    vuu: HvxVectorPair,
+    rt: i32,
+    iu1: i32,
+) -> HvxVectorPair {
+    vrmpyubi_acc(vxx, vuu, rt, iu1)
+}
+
+/// `Vd32.uw=vrmpy(Vu32.ub,Vv32.ub)`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vrmpyubv))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vuw_vrmpy_VubVub(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vrmpyubv(vu, vv)
+}
+
+/// `Vx32.uw+=vrmpy(Vu32.ub,Vv32.ub)`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vrmpyubv_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vuw_vrmpyacc_VuwVubVub(vx: HvxVector, vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vrmpyubv_acc(vx, vu, vv)
+}
+
+/// `Vd32=vror(Vu32,Rt32)`
+///
+/// Instruction Type: CVI_VP
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vror))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_V_vror_VR(vu: HvxVector, rt: i32) -> HvxVector {
+    vror(vu, rt)
+}
+
+/// `Vd32.b=vround(Vu32.h,Vv32.h):sat`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vroundhb))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vb_vround_VhVh_sat(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vroundhb(vu, vv)
+}
+
+/// `Vd32.ub=vround(Vu32.h,Vv32.h):sat`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vroundhub))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vub_vround_VhVh_sat(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vroundhub(vu, vv)
+}
+
+/// `Vd32.h=vround(Vu32.w,Vv32.w):sat`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vroundwh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_vround_VwVw_sat(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vroundwh(vu, vv)
+}
+
+/// `Vd32.uh=vround(Vu32.w,Vv32.w):sat`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vroundwuh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vuh_vround_VwVw_sat(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vroundwuh(vu, vv)
+}
+
+/// `Vdd32.uw=vrsad(Vuu32.ub,Rt32.ub,#u1)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vrsadubi))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wuw_vrsad_WubRubI(vuu: HvxVectorPair, rt: i32, iu1: i32) -> HvxVectorPair {
+    vrsadubi(vuu, rt, iu1)
+}
+
+/// `Vxx32.uw+=vrsad(Vuu32.ub,Rt32.ub,#u1)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vrsadubi_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wuw_vrsadacc_WuwWubRubI(
+    vxx: HvxVectorPair,
+    vuu: HvxVectorPair,
+    rt: i32,
+    iu1: i32,
+) -> HvxVectorPair {
+    vrsadubi_acc(vxx, vuu, rt, iu1)
+}
+
+/// `Vd32.ub=vsat(Vu32.h,Vv32.h)`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vsathub))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vub_vsat_VhVh(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vsathub(vu, vv)
+}
+
+/// `Vd32.h=vsat(Vu32.w,Vv32.w)`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vsatwh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_vsat_VwVw(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vsatwh(vu, vv)
+}
+
+/// `Vdd32.h=vsxt(Vu32.b)`
+///
+/// Instruction Type: CVI_VA_DV
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vsb))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wh_vsxt_Vb(vu: HvxVector) -> HvxVectorPair {
+    vsb(vu)
+}
+
+/// `Vdd32.w=vsxt(Vu32.h)`
+///
+/// Instruction Type: CVI_VA_DV
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vsh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Ww_vsxt_Vh(vu: HvxVector) -> HvxVectorPair {
+    vsh(vu)
+}
+
+/// `Vd32.h=vshuffe(Vu32.h,Vv32.h)`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vshufeh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_vshuffe_VhVh(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vshufeh(vu, vv)
+}
+
+/// `Vd32.b=vshuff(Vu32.b)`
+///
+/// Instruction Type: CVI_VP
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vshuffb))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vb_vshuff_Vb(vu: HvxVector) -> HvxVector {
+    vshuffb(vu)
+}
+
+/// `Vd32.b=vshuffe(Vu32.b,Vv32.b)`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vshuffeb))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vb_vshuffe_VbVb(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vshuffeb(vu, vv)
+}
+
+/// `Vd32.h=vshuff(Vu32.h)`
+///
+/// Instruction Type: CVI_VP
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vshuffh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_vshuff_Vh(vu: HvxVector) -> HvxVector {
+    vshuffh(vu)
+}
+
+/// `Vd32.b=vshuffo(Vu32.b,Vv32.b)`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vshuffob))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vb_vshuffo_VbVb(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vshuffob(vu, vv)
+}
+
+/// `Vdd32=vshuff(Vu32,Vv32,Rt8)`
+///
+/// Instruction Type: CVI_VP_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vshuffvdd))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_W_vshuff_VVR(vu: HvxVector, vv: HvxVector, rt: i32) -> HvxVectorPair {
+    vshuffvdd(vu, vv, rt)
+}
+
+/// `Vdd32.b=vshuffoe(Vu32.b,Vv32.b)`
+///
+/// Instruction Type: CVI_VA_DV
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vshufoeb))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wb_vshuffoe_VbVb(vu: HvxVector, vv: HvxVector) -> HvxVectorPair {
+    vshufoeb(vu, vv)
+}
+
+/// `Vdd32.h=vshuffoe(Vu32.h,Vv32.h)`
+///
+/// Instruction Type: CVI_VA_DV
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vshufoeh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wh_vshuffoe_VhVh(vu: HvxVector, vv: HvxVector) -> HvxVectorPair {
+    vshufoeh(vu, vv)
+}
+
+/// `Vd32.h=vshuffo(Vu32.h,Vv32.h)`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vshufoh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_vshuffo_VhVh(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vshufoh(vu, vv)
+}
+
+/// `Vd32.b=vsub(Vu32.b,Vv32.b)`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vsubb))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vb_vsub_VbVb(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vsubb(vu, vv)
+}
+
+/// `Vdd32.b=vsub(Vuu32.b,Vvv32.b)`
+///
+/// Instruction Type: CVI_VA_DV
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vsubb_dv))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wb_vsub_WbWb(vuu: HvxVectorPair, vvv: HvxVectorPair) -> HvxVectorPair {
+    vsubb_dv(vuu, vvv)
+}
+
+/// `Vd32.h=vsub(Vu32.h,Vv32.h)`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vsubh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_vsub_VhVh(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vsubh(vu, vv)
+}
+
+/// `Vdd32.h=vsub(Vuu32.h,Vvv32.h)`
+///
+/// Instruction Type: CVI_VA_DV
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vsubh_dv))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wh_vsub_WhWh(vuu: HvxVectorPair, vvv: HvxVectorPair) -> HvxVectorPair {
+    vsubh_dv(vuu, vvv)
+}
+
+/// `Vd32.h=vsub(Vu32.h,Vv32.h):sat`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vsubhsat))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_vsub_VhVh_sat(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vsubhsat(vu, vv)
+}
+
+/// `Vdd32.h=vsub(Vuu32.h,Vvv32.h):sat`
+///
+/// Instruction Type: CVI_VA_DV
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vsubhsat_dv))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wh_vsub_WhWh_sat(vuu: HvxVectorPair, vvv: HvxVectorPair) -> HvxVectorPair {
+    vsubhsat_dv(vuu, vvv)
+}
+
+/// `Vdd32.w=vsub(Vu32.h,Vv32.h)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vsubhw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Ww_vsub_VhVh(vu: HvxVector, vv: HvxVector) -> HvxVectorPair {
+    vsubhw(vu, vv)
+}
+
+/// `Vdd32.h=vsub(Vu32.ub,Vv32.ub)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vsububh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wh_vsub_VubVub(vu: HvxVector, vv: HvxVector) -> HvxVectorPair {
+    vsububh(vu, vv)
+}
+
+/// `Vd32.ub=vsub(Vu32.ub,Vv32.ub):sat`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vsububsat))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vub_vsub_VubVub_sat(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vsububsat(vu, vv)
+}
+
+/// `Vdd32.ub=vsub(Vuu32.ub,Vvv32.ub):sat`
+///
+/// Instruction Type: CVI_VA_DV
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vsububsat_dv))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wub_vsub_WubWub_sat(vuu: HvxVectorPair, vvv: HvxVectorPair) -> HvxVectorPair {
+    vsububsat_dv(vuu, vvv)
+}
+
+/// `Vd32.uh=vsub(Vu32.uh,Vv32.uh):sat`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vsubuhsat))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vuh_vsub_VuhVuh_sat(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vsubuhsat(vu, vv)
+}
+
+/// `Vdd32.uh=vsub(Vuu32.uh,Vvv32.uh):sat`
+///
+/// Instruction Type: CVI_VA_DV
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vsubuhsat_dv))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wuh_vsub_WuhWuh_sat(vuu: HvxVectorPair, vvv: HvxVectorPair) -> HvxVectorPair {
+    vsubuhsat_dv(vuu, vvv)
+}
+
+/// `Vdd32.w=vsub(Vu32.uh,Vv32.uh)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vsubuhw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Ww_vsub_VuhVuh(vu: HvxVector, vv: HvxVector) -> HvxVectorPair {
+    vsubuhw(vu, vv)
+}
+
+/// `Vd32.w=vsub(Vu32.w,Vv32.w)`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vsubw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vsub_VwVw(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    simd_sub(vu, vv)
+}
+
+/// `Vdd32.w=vsub(Vuu32.w,Vvv32.w)`
+///
+/// Instruction Type: CVI_VA_DV
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vsubw_dv))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Ww_vsub_WwWw(vuu: HvxVectorPair, vvv: HvxVectorPair) -> HvxVectorPair {
+    vsubw_dv(vuu, vvv)
+}
+
+/// `Vd32.w=vsub(Vu32.w,Vv32.w):sat`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vsubwsat))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vsub_VwVw_sat(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vsubwsat(vu, vv)
+}
+
+/// `Vdd32.w=vsub(Vuu32.w,Vvv32.w):sat`
+///
+/// Instruction Type: CVI_VA_DV
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vsubwsat_dv))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Ww_vsub_WwWw_sat(vuu: HvxVectorPair, vvv: HvxVectorPair) -> HvxVectorPair {
+    vsubwsat_dv(vuu, vvv)
+}
+
+/// `Vdd32.h=vtmpy(Vuu32.b,Rt32.b)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vtmpyb))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wh_vtmpy_WbRb(vuu: HvxVectorPair, rt: i32) -> HvxVectorPair {
+    vtmpyb(vuu, rt)
+}
+
+/// `Vxx32.h+=vtmpy(Vuu32.b,Rt32.b)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vtmpyb_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wh_vtmpyacc_WhWbRb(
+    vxx: HvxVectorPair,
+    vuu: HvxVectorPair,
+    rt: i32,
+) -> HvxVectorPair {
+    vtmpyb_acc(vxx, vuu, rt)
+}
+
+/// `Vdd32.h=vtmpy(Vuu32.ub,Rt32.b)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vtmpybus))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wh_vtmpy_WubRb(vuu: HvxVectorPair, rt: i32) -> HvxVectorPair {
+    vtmpybus(vuu, rt)
+}
+
+/// `Vxx32.h+=vtmpy(Vuu32.ub,Rt32.b)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vtmpybus_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wh_vtmpyacc_WhWubRb(
+    vxx: HvxVectorPair,
+    vuu: HvxVectorPair,
+    rt: i32,
+) -> HvxVectorPair {
+    vtmpybus_acc(vxx, vuu, rt)
+}
+
+/// `Vdd32.w=vtmpy(Vuu32.h,Rt32.b)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vtmpyhb))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Ww_vtmpy_WhRb(vuu: HvxVectorPair, rt: i32) -> HvxVectorPair {
+    vtmpyhb(vuu, rt)
+}
+
+/// `Vxx32.w+=vtmpy(Vuu32.h,Rt32.b)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vtmpyhb_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Ww_vtmpyacc_WwWhRb(
+    vxx: HvxVectorPair,
+    vuu: HvxVectorPair,
+    rt: i32,
+) -> HvxVectorPair {
+    vtmpyhb_acc(vxx, vuu, rt)
+}
+
+/// `Vdd32.h=vunpack(Vu32.b)`
+///
+/// Instruction Type: CVI_VP_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vunpackb))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wh_vunpack_Vb(vu: HvxVector) -> HvxVectorPair {
+    vunpackb(vu)
+}
+
+/// `Vdd32.w=vunpack(Vu32.h)`
+///
+/// Instruction Type: CVI_VP_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vunpackh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Ww_vunpack_Vh(vu: HvxVector) -> HvxVectorPair {
+    vunpackh(vu)
+}
+
+/// `Vxx32.h|=vunpacko(Vu32.b)`
+///
+/// Instruction Type: CVI_VP_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vunpackob))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wh_vunpackoor_WhVb(vxx: HvxVectorPair, vu: HvxVector) -> HvxVectorPair {
+    vunpackob(vxx, vu)
+}
+
+/// `Vxx32.w|=vunpacko(Vu32.h)`
+///
+/// Instruction Type: CVI_VP_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vunpackoh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Ww_vunpackoor_WwVh(vxx: HvxVectorPair, vu: HvxVector) -> HvxVectorPair {
+    vunpackoh(vxx, vu)
+}
+
+/// `Vdd32.uh=vunpack(Vu32.ub)`
+///
+/// Instruction Type: CVI_VP_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vunpackub))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wuh_vunpack_Vub(vu: HvxVector) -> HvxVectorPair {
+    vunpackub(vu)
+}
+
+/// `Vdd32.uw=vunpack(Vu32.uh)`
+///
+/// Instruction Type: CVI_VP_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vunpackuh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wuw_vunpack_Vuh(vu: HvxVector) -> HvxVectorPair {
+    vunpackuh(vu)
+}
+
+/// `Vd32=vxor(Vu32,Vv32)`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vxor))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_V_vxor_VV(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    simd_xor(vu, vv)
+}
+
+/// `Vdd32.uh=vzxt(Vu32.ub)`
+///
+/// Instruction Type: CVI_VA_DV
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vzb))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wuh_vzxt_Vub(vu: HvxVector) -> HvxVectorPair {
+    vzb(vu)
+}
+
+/// `Vdd32.uw=vzxt(Vu32.uh)`
+///
+/// Instruction Type: CVI_VA_DV
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vzh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wuw_vzxt_Vuh(vu: HvxVector) -> HvxVectorPair {
+    vzh(vu)
+}
+
+/// `Vd32.b=vsplat(Rt32)`
+///
+/// Instruction Type: CVI_VX_LATE
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv62"))]
+#[cfg_attr(test, assert_instr(lvsplatb))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vb_vsplat_R(rt: i32) -> HvxVector {
+    lvsplatb(rt)
+}
+
+/// `Vd32.h=vsplat(Rt32)`
+///
+/// Instruction Type: CVI_VX_LATE
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv62"))]
+#[cfg_attr(test, assert_instr(lvsplath))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_vsplat_R(rt: i32) -> HvxVector {
+    lvsplath(rt)
+}
+
+/// `Vd32.b=vadd(Vu32.b,Vv32.b):sat`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv62"))]
+#[cfg_attr(test, assert_instr(vaddbsat))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vb_vadd_VbVb_sat(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vaddbsat(vu, vv)
+}
+
+/// `Vdd32.b=vadd(Vuu32.b,Vvv32.b):sat`
+///
+/// Instruction Type: CVI_VA_DV
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv62"))]
+#[cfg_attr(test, assert_instr(vaddbsat_dv))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wb_vadd_WbWb_sat(vuu: HvxVectorPair, vvv: HvxVectorPair) -> HvxVectorPair {
+    vaddbsat_dv(vuu, vvv)
+}
+
+/// `Vd32.h=vadd(vclb(Vu32.h),Vv32.h)`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv62"))]
+#[cfg_attr(test, assert_instr(vaddclbh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_vadd_vclb_VhVh(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vaddclbh(vu, vv)
+}
+
+/// `Vd32.w=vadd(vclb(Vu32.w),Vv32.w)`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv62"))]
+#[cfg_attr(test, assert_instr(vaddclbw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vadd_vclb_VwVw(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vaddclbw(vu, vv)
+}
+
+/// `Vxx32.w+=vadd(Vu32.h,Vv32.h)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv62"))]
+#[cfg_attr(test, assert_instr(vaddhw_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Ww_vaddacc_WwVhVh(
+    vxx: HvxVectorPair,
+    vu: HvxVector,
+    vv: HvxVector,
+) -> HvxVectorPair {
+    vaddhw_acc(vxx, vu, vv)
+}
+
+/// `Vxx32.h+=vadd(Vu32.ub,Vv32.ub)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv62"))]
+#[cfg_attr(test, assert_instr(vaddubh_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wh_vaddacc_WhVubVub(
+    vxx: HvxVectorPair,
+    vu: HvxVector,
+    vv: HvxVector,
+) -> HvxVectorPair {
+    vaddubh_acc(vxx, vu, vv)
+}
+
+/// `Vd32.ub=vadd(Vu32.ub,Vv32.b):sat`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv62"))]
+#[cfg_attr(test, assert_instr(vaddububb_sat))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vub_vadd_VubVb_sat(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vaddububb_sat(vu, vv)
+}
+
+/// `Vxx32.w+=vadd(Vu32.uh,Vv32.uh)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv62"))]
+#[cfg_attr(test, assert_instr(vadduhw_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Ww_vaddacc_WwVuhVuh(
+    vxx: HvxVectorPair,
+    vu: HvxVector,
+    vv: HvxVector,
+) -> HvxVectorPair {
+    vadduhw_acc(vxx, vu, vv)
+}
+
+/// `Vd32.uw=vadd(Vu32.uw,Vv32.uw):sat`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv62"))]
+#[cfg_attr(test, assert_instr(vadduwsat))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vuw_vadd_VuwVuw_sat(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vadduwsat(vu, vv)
+}
+
+/// `Vdd32.uw=vadd(Vuu32.uw,Vvv32.uw):sat`
+///
+/// Instruction Type: CVI_VA_DV
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv62"))]
+#[cfg_attr(test, assert_instr(vadduwsat_dv))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wuw_vadd_WuwWuw_sat(vuu: HvxVectorPair, vvv: HvxVectorPair) -> HvxVectorPair {
+    vadduwsat_dv(vuu, vvv)
+}
+
+/// `Vd32.b=vasr(Vu32.h,Vv32.h,Rt8):sat`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv62"))]
+#[cfg_attr(test, assert_instr(vasrhbsat))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vb_vasr_VhVhR_sat(vu: HvxVector, vv: HvxVector, rt: i32) -> HvxVector {
+    vasrhbsat(vu, vv, rt)
+}
+
+/// `Vd32.uh=vasr(Vu32.uw,Vv32.uw,Rt8):rnd:sat`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv62"))]
+#[cfg_attr(test, assert_instr(vasruwuhrndsat))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vuh_vasr_VuwVuwR_rnd_sat(vu: HvxVector, vv: HvxVector, rt: i32) -> HvxVector {
+    vasruwuhrndsat(vu, vv, rt)
+}
+
+/// `Vd32.uh=vasr(Vu32.w,Vv32.w,Rt8):rnd:sat`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv62"))]
+#[cfg_attr(test, assert_instr(vasrwuhrndsat))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vuh_vasr_VwVwR_rnd_sat(vu: HvxVector, vv: HvxVector, rt: i32) -> HvxVector {
+    vasrwuhrndsat(vu, vv, rt)
+}
+
+/// `Vd32.ub=vlsr(Vu32.ub,Rt32)`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv62"))]
+#[cfg_attr(test, assert_instr(vlsrb))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vub_vlsr_VubR(vu: HvxVector, rt: i32) -> HvxVector {
+    vlsrb(vu, rt)
+}
+
+/// `Vd32.b=vlut32(Vu32.b,Vv32.b,Rt8):nomatch`
+///
+/// Instruction Type: CVI_VP
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv62"))]
+#[cfg_attr(test, assert_instr(vlutvvb_nm))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vb_vlut32_VbVbR_nomatch(vu: HvxVector, vv: HvxVector, rt: i32) -> HvxVector {
+    vlutvvb_nm(vu, vv, rt)
+}
+
+/// `Vx32.b|=vlut32(Vu32.b,Vv32.b,#u3)`
+///
+/// Instruction Type: CVI_VP_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv62"))]
+#[cfg_attr(test, assert_instr(vlutvvb_oracci))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vb_vlut32or_VbVbVbI(
+    vx: HvxVector,
+    vu: HvxVector,
+    vv: HvxVector,
+    iu3: i32,
+) -> HvxVector {
+    vlutvvb_oracci(vx, vu, vv, iu3)
+}
+
+/// `Vd32.b=vlut32(Vu32.b,Vv32.b,#u3)`
+///
+/// Instruction Type: CVI_VP
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv62"))]
+#[cfg_attr(test, assert_instr(vlutvvbi))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vb_vlut32_VbVbI(vu: HvxVector, vv: HvxVector, iu3: i32) -> HvxVector {
+    vlutvvbi(vu, vv, iu3)
+}
+
+/// `Vdd32.h=vlut16(Vu32.b,Vv32.h,Rt8):nomatch`
+///
+/// Instruction Type: CVI_VP_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv62"))]
+#[cfg_attr(test, assert_instr(vlutvwh_nm))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wh_vlut16_VbVhR_nomatch(vu: HvxVector, vv: HvxVector, rt: i32) -> HvxVectorPair {
+    vlutvwh_nm(vu, vv, rt)
+}
+
+/// `Vxx32.h|=vlut16(Vu32.b,Vv32.h,#u3)`
+///
+/// Instruction Type: CVI_VP_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv62"))]
+#[cfg_attr(test, assert_instr(vlutvwh_oracci))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wh_vlut16or_WhVbVhI(
+    vxx: HvxVectorPair,
+    vu: HvxVector,
+    vv: HvxVector,
+    iu3: i32,
+) -> HvxVectorPair {
+    vlutvwh_oracci(vxx, vu, vv, iu3)
+}
+
+/// `Vdd32.h=vlut16(Vu32.b,Vv32.h,#u3)`
+///
+/// Instruction Type: CVI_VP_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv62"))]
+#[cfg_attr(test, assert_instr(vlutvwhi))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wh_vlut16_VbVhI(vu: HvxVector, vv: HvxVector, iu3: i32) -> HvxVectorPair {
+    vlutvwhi(vu, vv, iu3)
+}
+
+/// `Vd32.b=vmax(Vu32.b,Vv32.b)`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv62"))]
+#[cfg_attr(test, assert_instr(vmaxb))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vb_vmax_VbVb(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vmaxb(vu, vv)
+}
+
+/// `Vd32.b=vmin(Vu32.b,Vv32.b)`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv62"))]
+#[cfg_attr(test, assert_instr(vminb))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vb_vmin_VbVb(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vminb(vu, vv)
+}
+
+/// `Vdd32.w=vmpa(Vuu32.uh,Rt32.b)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv62"))]
+#[cfg_attr(test, assert_instr(vmpauhb))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Ww_vmpa_WuhRb(vuu: HvxVectorPair, rt: i32) -> HvxVectorPair {
+    vmpauhb(vuu, rt)
+}
+
+/// `Vxx32.w+=vmpa(Vuu32.uh,Rt32.b)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv62"))]
+#[cfg_attr(test, assert_instr(vmpauhb_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Ww_vmpaacc_WwWuhRb(
+    vxx: HvxVectorPair,
+    vuu: HvxVectorPair,
+    rt: i32,
+) -> HvxVectorPair {
+    vmpauhb_acc(vxx, vuu, rt)
+}
+
+/// `Vdd32=vmpye(Vu32.w,Vv32.uh)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv62"))]
+#[cfg_attr(test, assert_instr(vmpyewuh_64))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_W_vmpye_VwVuh(vu: HvxVector, vv: HvxVector) -> HvxVectorPair {
+    vmpyewuh_64(vu, vv)
+}
+
+/// `Vd32.w=vmpyi(Vu32.w,Rt32.ub)`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv62"))]
+#[cfg_attr(test, assert_instr(vmpyiwub))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vmpyi_VwRub(vu: HvxVector, rt: i32) -> HvxVector {
+    vmpyiwub(vu, rt)
+}
+
+/// `Vx32.w+=vmpyi(Vu32.w,Rt32.ub)`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv62"))]
+#[cfg_attr(test, assert_instr(vmpyiwub_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vmpyiacc_VwVwRub(vx: HvxVector, vu: HvxVector, rt: i32) -> HvxVector {
+    vmpyiwub_acc(vx, vu, rt)
+}
+
+/// `Vxx32+=vmpyo(Vu32.w,Vv32.h)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv62"))]
+#[cfg_attr(test, assert_instr(vmpyowh_64_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_W_vmpyoacc_WVwVh(
+    vxx: HvxVectorPair,
+    vu: HvxVector,
+    vv: HvxVector,
+) -> HvxVectorPair {
+    vmpyowh_64_acc(vxx, vu, vv)
+}
+
+/// `Vd32.ub=vround(Vu32.uh,Vv32.uh):sat`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv62"))]
+#[cfg_attr(test, assert_instr(vrounduhub))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vub_vround_VuhVuh_sat(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vrounduhub(vu, vv)
+}
+
+/// `Vd32.uh=vround(Vu32.uw,Vv32.uw):sat`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv62"))]
+#[cfg_attr(test, assert_instr(vrounduwuh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vuh_vround_VuwVuw_sat(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vrounduwuh(vu, vv)
+}
+
+/// `Vd32.uh=vsat(Vu32.uw,Vv32.uw)`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv62"))]
+#[cfg_attr(test, assert_instr(vsatuwuh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vuh_vsat_VuwVuw(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vsatuwuh(vu, vv)
+}
+
+/// `Vd32.b=vsub(Vu32.b,Vv32.b):sat`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv62"))]
+#[cfg_attr(test, assert_instr(vsubbsat))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vb_vsub_VbVb_sat(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vsubbsat(vu, vv)
+}
+
+/// `Vdd32.b=vsub(Vuu32.b,Vvv32.b):sat`
+///
+/// Instruction Type: CVI_VA_DV
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv62"))]
+#[cfg_attr(test, assert_instr(vsubbsat_dv))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wb_vsub_WbWb_sat(vuu: HvxVectorPair, vvv: HvxVectorPair) -> HvxVectorPair {
+    vsubbsat_dv(vuu, vvv)
+}
+
+/// `Vd32.ub=vsub(Vu32.ub,Vv32.b):sat`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv62"))]
+#[cfg_attr(test, assert_instr(vsubububb_sat))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vub_vsub_VubVb_sat(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vsubububb_sat(vu, vv)
+}
+
+/// `Vd32.uw=vsub(Vu32.uw,Vv32.uw):sat`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv62"))]
+#[cfg_attr(test, assert_instr(vsubuwsat))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vuw_vsub_VuwVuw_sat(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vsubuwsat(vu, vv)
+}
+
+/// `Vdd32.uw=vsub(Vuu32.uw,Vvv32.uw):sat`
+///
+/// Instruction Type: CVI_VA_DV
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv62"))]
+#[cfg_attr(test, assert_instr(vsubuwsat_dv))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wuw_vsub_WuwWuw_sat(vuu: HvxVectorPair, vvv: HvxVectorPair) -> HvxVectorPair {
+    vsubuwsat_dv(vuu, vvv)
+}
+
+/// `Vd32.b=vabs(Vu32.b)`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv65"))]
+#[cfg_attr(test, assert_instr(vabsb))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vb_vabs_Vb(vu: HvxVector) -> HvxVector {
+    vabsb(vu)
+}
+
+/// `Vd32.b=vabs(Vu32.b):sat`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv65"))]
+#[cfg_attr(test, assert_instr(vabsb_sat))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vb_vabs_Vb_sat(vu: HvxVector) -> HvxVector {
+    vabsb_sat(vu)
+}
+
+/// `Vx32.h+=vasl(Vu32.h,Rt32)`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv65"))]
+#[cfg_attr(test, assert_instr(vaslh_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_vaslacc_VhVhR(vx: HvxVector, vu: HvxVector, rt: i32) -> HvxVector {
+    vaslh_acc(vx, vu, rt)
+}
+
+/// `Vx32.h+=vasr(Vu32.h,Rt32)`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv65"))]
+#[cfg_attr(test, assert_instr(vasrh_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_vasracc_VhVhR(vx: HvxVector, vu: HvxVector, rt: i32) -> HvxVector {
+    vasrh_acc(vx, vu, rt)
+}
+
+/// `Vd32.ub=vasr(Vu32.uh,Vv32.uh,Rt8):rnd:sat`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv65"))]
+#[cfg_attr(test, assert_instr(vasruhubrndsat))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vub_vasr_VuhVuhR_rnd_sat(vu: HvxVector, vv: HvxVector, rt: i32) -> HvxVector {
+    vasruhubrndsat(vu, vv, rt)
+}
+
+/// `Vd32.ub=vasr(Vu32.uh,Vv32.uh,Rt8):sat`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv65"))]
+#[cfg_attr(test, assert_instr(vasruhubsat))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vub_vasr_VuhVuhR_sat(vu: HvxVector, vv: HvxVector, rt: i32) -> HvxVector {
+    vasruhubsat(vu, vv, rt)
+}
+
+/// `Vd32.uh=vasr(Vu32.uw,Vv32.uw,Rt8):sat`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv65"))]
+#[cfg_attr(test, assert_instr(vasruwuhsat))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vuh_vasr_VuwVuwR_sat(vu: HvxVector, vv: HvxVector, rt: i32) -> HvxVector {
+    vasruwuhsat(vu, vv, rt)
+}
+
+/// `Vd32.b=vavg(Vu32.b,Vv32.b)`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv65"))]
+#[cfg_attr(test, assert_instr(vavgb))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vb_vavg_VbVb(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vavgb(vu, vv)
+}
+
+/// `Vd32.b=vavg(Vu32.b,Vv32.b):rnd`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv65"))]
+#[cfg_attr(test, assert_instr(vavgbrnd))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vb_vavg_VbVb_rnd(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vavgbrnd(vu, vv)
+}
+
+/// `Vd32.uw=vavg(Vu32.uw,Vv32.uw)`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv65"))]
+#[cfg_attr(test, assert_instr(vavguw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vuw_vavg_VuwVuw(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vavguw(vu, vv)
+}
+
+/// `Vd32.uw=vavg(Vu32.uw,Vv32.uw):rnd`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv65"))]
+#[cfg_attr(test, assert_instr(vavguwrnd))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vuw_vavg_VuwVuw_rnd(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vavguwrnd(vu, vv)
+}
+
+/// `Vdd32=#0`
+///
+/// Instruction Type: MAPPING
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv65"))]
+#[cfg_attr(test, assert_instr(vdd0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_W_vzero() -> HvxVectorPair {
+    vdd0()
+}
+
+/// `vtmp.h=vgather(Rt32,Mu2,Vv32.h).h`
+///
+/// Instruction Type: CVI_GATHER
+/// Execution Slots: SLOT01
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv65"))]
+#[cfg_attr(test, assert_instr(vgathermh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_vgather_ARMVh(rs: *mut HvxVector, rt: i32, mu: i32, vv: HvxVector) {
+    vgathermh(rs, rt, mu, vv)
+}
+
+/// `vtmp.h=vgather(Rt32,Mu2,Vvv32.w).h`
+///
+/// Instruction Type: CVI_GATHER_DV
+/// Execution Slots: SLOT01
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv65"))]
+#[cfg_attr(test, assert_instr(vgathermhw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_vgather_ARMWw(rs: *mut HvxVector, rt: i32, mu: i32, vvv: HvxVectorPair) {
+    vgathermhw(rs, rt, mu, vvv)
+}
+
+/// `vtmp.w=vgather(Rt32,Mu2,Vv32.w).w`
+///
+/// Instruction Type: CVI_GATHER
+/// Execution Slots: SLOT01
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv65"))]
+#[cfg_attr(test, assert_instr(vgathermw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_vgather_ARMVw(rs: *mut HvxVector, rt: i32, mu: i32, vv: HvxVector) {
+    vgathermw(rs, rt, mu, vv)
+}
+
+/// `Vdd32.h=vmpa(Vuu32.ub,Rt32.ub)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv65"))]
+#[cfg_attr(test, assert_instr(vmpabuu))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wh_vmpa_WubRub(vuu: HvxVectorPair, rt: i32) -> HvxVectorPair {
+    vmpabuu(vuu, rt)
+}
+
+/// `Vxx32.h+=vmpa(Vuu32.ub,Rt32.ub)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv65"))]
+#[cfg_attr(test, assert_instr(vmpabuu_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wh_vmpaacc_WhWubRub(
+    vxx: HvxVectorPair,
+    vuu: HvxVectorPair,
+    rt: i32,
+) -> HvxVectorPair {
+    vmpabuu_acc(vxx, vuu, rt)
+}
+
+/// `Vxx32.w+=vmpy(Vu32.h,Rt32.h)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv65"))]
+#[cfg_attr(test, assert_instr(vmpyh_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Ww_vmpyacc_WwVhRh(vxx: HvxVectorPair, vu: HvxVector, rt: i32) -> HvxVectorPair {
+    vmpyh_acc(vxx, vu, rt)
+}
+
+/// `Vd32.uw=vmpye(Vu32.uh,Rt32.uh)`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv65"))]
+#[cfg_attr(test, assert_instr(vmpyuhe))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vuw_vmpye_VuhRuh(vu: HvxVector, rt: i32) -> HvxVector {
+    vmpyuhe(vu, rt)
+}
+
+/// `Vx32.uw+=vmpye(Vu32.uh,Rt32.uh)`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv65"))]
+#[cfg_attr(test, assert_instr(vmpyuhe_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vuw_vmpyeacc_VuwVuhRuh(vx: HvxVector, vu: HvxVector, rt: i32) -> HvxVector {
+    vmpyuhe_acc(vx, vu, rt)
+}
+
+/// `Vd32.b=vnavg(Vu32.b,Vv32.b)`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv65"))]
+#[cfg_attr(test, assert_instr(vnavgb))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vb_vnavg_VbVb(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vnavgb(vu, vv)
+}
+
+/// `vscatter(Rt32,Mu2,Vv32.h).h=Vw32`
+///
+/// Instruction Type: CVI_SCATTER
+/// Execution Slots: SLOT0
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv65"))]
+#[cfg_attr(test, assert_instr(vscattermh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_vscatter_RMVhV(rt: i32, mu: i32, vv: HvxVector, vw: HvxVector) {
+    vscattermh(rt, mu, vv, vw)
+}
+
+/// `vscatter(Rt32,Mu2,Vv32.h).h+=Vw32`
+///
+/// Instruction Type: CVI_SCATTER
+/// Execution Slots: SLOT0
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv65"))]
+#[cfg_attr(test, assert_instr(vscattermh_add))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_vscatteracc_RMVhV(rt: i32, mu: i32, vv: HvxVector, vw: HvxVector) {
+    vscattermh_add(rt, mu, vv, vw)
+}
+
+/// `vscatter(Rt32,Mu2,Vvv32.w).h=Vw32`
+///
+/// Instruction Type: CVI_SCATTER_DV
+/// Execution Slots: SLOT0
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv65"))]
+#[cfg_attr(test, assert_instr(vscattermhw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_vscatter_RMWwV(rt: i32, mu: i32, vvv: HvxVectorPair, vw: HvxVector) {
+    vscattermhw(rt, mu, vvv, vw)
+}
+
+/// `vscatter(Rt32,Mu2,Vvv32.w).h+=Vw32`
+///
+/// Instruction Type: CVI_SCATTER_DV
+/// Execution Slots: SLOT0
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv65"))]
+#[cfg_attr(test, assert_instr(vscattermhw_add))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_vscatteracc_RMWwV(rt: i32, mu: i32, vvv: HvxVectorPair, vw: HvxVector) {
+    vscattermhw_add(rt, mu, vvv, vw)
+}
+
+/// `vscatter(Rt32,Mu2,Vv32.w).w=Vw32`
+///
+/// Instruction Type: CVI_SCATTER
+/// Execution Slots: SLOT0
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv65"))]
+#[cfg_attr(test, assert_instr(vscattermw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_vscatter_RMVwV(rt: i32, mu: i32, vv: HvxVector, vw: HvxVector) {
+    vscattermw(rt, mu, vv, vw)
+}
+
+/// `vscatter(Rt32,Mu2,Vv32.w).w+=Vw32`
+///
+/// Instruction Type: CVI_SCATTER
+/// Execution Slots: SLOT0
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv65"))]
+#[cfg_attr(test, assert_instr(vscattermw_add))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_vscatteracc_RMVwV(rt: i32, mu: i32, vv: HvxVector, vw: HvxVector) {
+    vscattermw_add(rt, mu, vv, vw)
+}
+
+/// `Vxx32.w=vasrinto(Vu32.w,Vv32.w)`
+///
+/// Instruction Type: CVI_VP_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv66"))]
+#[cfg_attr(test, assert_instr(vasr_into))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Ww_vasrinto_WwVwVw(
+    vxx: HvxVectorPair,
+    vu: HvxVector,
+    vv: HvxVector,
+) -> HvxVectorPair {
+    vasr_into(vxx, vu, vv)
+}
+
+/// `Vd32.uw=vrotr(Vu32.uw,Vv32.uw)`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv66"))]
+#[cfg_attr(test, assert_instr(vrotr))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vuw_vrotr_VuwVuw(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vrotr(vu, vv)
+}
+
+/// `Vd32.w=vsatdw(Vu32.w,Vv32.w)`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv66"))]
+#[cfg_attr(test, assert_instr(vsatdw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vsatdw_VwVw(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vsatdw(vu, vv)
+}
+
+/// `Vdd32.w=v6mpy(Vuu32.ub,Vvv32.b,#u2):h`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(v6mpyhubs10))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Ww_v6mpy_WubWbI_h(
+    vuu: HvxVectorPair,
+    vvv: HvxVectorPair,
+    iu2: i32,
+) -> HvxVectorPair {
+    v6mpyhubs10(vuu, vvv, iu2)
+}
+
+/// `Vxx32.w+=v6mpy(Vuu32.ub,Vvv32.b,#u2):h`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(v6mpyhubs10_vxx))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Ww_v6mpyacc_WwWubWbI_h(
+    vxx: HvxVectorPair,
+    vuu: HvxVectorPair,
+    vvv: HvxVectorPair,
+    iu2: i32,
+) -> HvxVectorPair {
+    v6mpyhubs10_vxx(vxx, vuu, vvv, iu2)
+}
+
+/// `Vdd32.w=v6mpy(Vuu32.ub,Vvv32.b,#u2):v`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(v6mpyvubs10))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Ww_v6mpy_WubWbI_v(
+    vuu: HvxVectorPair,
+    vvv: HvxVectorPair,
+    iu2: i32,
+) -> HvxVectorPair {
+    v6mpyvubs10(vuu, vvv, iu2)
+}
+
+/// `Vxx32.w+=v6mpy(Vuu32.ub,Vvv32.b,#u2):v`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(v6mpyvubs10_vxx))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Ww_v6mpyacc_WwWubWbI_v(
+    vxx: HvxVectorPair,
+    vuu: HvxVectorPair,
+    vvv: HvxVectorPair,
+    iu2: i32,
+) -> HvxVectorPair {
+    v6mpyvubs10_vxx(vxx, vuu, vvv, iu2)
+}
+
+/// `Vd32.hf=vabs(Vu32.hf)`
+///
+/// Instruction Type: CVI_VX_LATE
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vabs_hf))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vhf_vabs_Vhf(vu: HvxVector) -> HvxVector {
+    vabs_hf(vu)
+}
+
+/// `Vd32.sf=vabs(Vu32.sf)`
+///
+/// Instruction Type: CVI_VX_LATE
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vabs_sf))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vsf_vabs_Vsf(vu: HvxVector) -> HvxVector {
+    vabs_sf(vu)
+}
+
+/// `Vd32.qf16=vadd(Vu32.hf,Vv32.hf)`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vadd_hf))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vqf16_vadd_VhfVhf(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vadd_hf(vu, vv)
+}
+
+/// `Vd32.hf=vadd(Vu32.hf,Vv32.hf)`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vadd_hf_hf))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vhf_vadd_VhfVhf(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vadd_hf_hf(vu, vv)
+}
+
+/// `Vd32.qf16=vadd(Vu32.qf16,Vv32.qf16)`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vadd_qf16))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vqf16_vadd_Vqf16Vqf16(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vadd_qf16(vu, vv)
+}
+
+/// `Vd32.qf16=vadd(Vu32.qf16,Vv32.hf)`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vadd_qf16_mix))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vqf16_vadd_Vqf16Vhf(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vadd_qf16_mix(vu, vv)
+}
+
+/// `Vd32.qf32=vadd(Vu32.qf32,Vv32.qf32)`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vadd_qf32))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vqf32_vadd_Vqf32Vqf32(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vadd_qf32(vu, vv)
+}
+
+/// `Vd32.qf32=vadd(Vu32.qf32,Vv32.sf)`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vadd_qf32_mix))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vqf32_vadd_Vqf32Vsf(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vadd_qf32_mix(vu, vv)
+}
+
+/// `Vd32.qf32=vadd(Vu32.sf,Vv32.sf)`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vadd_sf))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vqf32_vadd_VsfVsf(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vadd_sf(vu, vv)
+}
+
+/// `Vdd32.sf=vadd(Vu32.hf,Vv32.hf)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vadd_sf_hf))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wsf_vadd_VhfVhf(vu: HvxVector, vv: HvxVector) -> HvxVectorPair {
+    vadd_sf_hf(vu, vv)
+}
+
+/// `Vd32.sf=vadd(Vu32.sf,Vv32.sf)`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vadd_sf_sf))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vsf_vadd_VsfVsf(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vadd_sf_sf(vu, vv)
+}
+
+/// `Vd32.w=vfmv(Vu32.w)`
+///
+/// Instruction Type: CVI_VX_LATE
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vassign_fp))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vfmv_Vw(vu: HvxVector) -> HvxVector {
+    vassign_fp(vu)
+}
+
+/// `Vd32.hf=Vu32.qf16`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vconv_hf_qf16))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vhf_equals_Vqf16(vu: HvxVector) -> HvxVector {
+    vconv_hf_qf16(vu)
+}
+
+/// `Vd32.hf=Vuu32.qf32`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vconv_hf_qf32))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vhf_equals_Wqf32(vuu: HvxVectorPair) -> HvxVector {
+    vconv_hf_qf32(vuu)
+}
+
+/// `Vd32.sf=Vu32.qf32`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vconv_sf_qf32))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vsf_equals_Vqf32(vu: HvxVector) -> HvxVector {
+    vconv_sf_qf32(vu)
+}
+
+/// `Vd32.b=vcvt(Vu32.hf,Vv32.hf)`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vcvt_b_hf))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vb_vcvt_VhfVhf(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vcvt_b_hf(vu, vv)
+}
+
+/// `Vd32.h=vcvt(Vu32.hf)`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vcvt_h_hf))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_vcvt_Vhf(vu: HvxVector) -> HvxVector {
+    vcvt_h_hf(vu)
+}
+
+/// `Vdd32.hf=vcvt(Vu32.b)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vcvt_hf_b))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Whf_vcvt_Vb(vu: HvxVector) -> HvxVectorPair {
+    vcvt_hf_b(vu)
+}
+
+/// `Vd32.hf=vcvt(Vu32.h)`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vcvt_hf_h))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vhf_vcvt_Vh(vu: HvxVector) -> HvxVector {
+    vcvt_hf_h(vu)
+}
+
+/// `Vd32.hf=vcvt(Vu32.sf,Vv32.sf)`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vcvt_hf_sf))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vhf_vcvt_VsfVsf(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vcvt_hf_sf(vu, vv)
+}
+
+/// `Vdd32.hf=vcvt(Vu32.ub)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vcvt_hf_ub))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Whf_vcvt_Vub(vu: HvxVector) -> HvxVectorPair {
+    vcvt_hf_ub(vu)
+}
+
+/// `Vd32.hf=vcvt(Vu32.uh)`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vcvt_hf_uh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vhf_vcvt_Vuh(vu: HvxVector) -> HvxVector {
+    vcvt_hf_uh(vu)
+}
+
+/// `Vdd32.sf=vcvt(Vu32.hf)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vcvt_sf_hf))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wsf_vcvt_Vhf(vu: HvxVector) -> HvxVectorPair {
+    vcvt_sf_hf(vu)
+}
+
+/// `Vd32.ub=vcvt(Vu32.hf,Vv32.hf)`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vcvt_ub_hf))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vub_vcvt_VhfVhf(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vcvt_ub_hf(vu, vv)
+}
+
+/// `Vd32.uh=vcvt(Vu32.hf)`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vcvt_uh_hf))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vuh_vcvt_Vhf(vu: HvxVector) -> HvxVector {
+    vcvt_uh_hf(vu)
+}
+
+/// `Vd32.sf=vdmpy(Vu32.hf,Vv32.hf)`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vdmpy_sf_hf))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vsf_vdmpy_VhfVhf(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vdmpy_sf_hf(vu, vv)
+}
+
+/// `Vx32.sf+=vdmpy(Vu32.hf,Vv32.hf)`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vdmpy_sf_hf_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vsf_vdmpyacc_VsfVhfVhf(vx: HvxVector, vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vdmpy_sf_hf_acc(vx, vu, vv)
+}
+
+/// `Vd32.hf=vfmax(Vu32.hf,Vv32.hf)`
+///
+/// Instruction Type: CVI_VX_LATE
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vfmax_hf))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vhf_vfmax_VhfVhf(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vfmax_hf(vu, vv)
+}
+
+/// `Vd32.sf=vfmax(Vu32.sf,Vv32.sf)`
+///
+/// Instruction Type: CVI_VX_LATE
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vfmax_sf))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vsf_vfmax_VsfVsf(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vfmax_sf(vu, vv)
+}
+
+/// `Vd32.hf=vfmin(Vu32.hf,Vv32.hf)`
+///
+/// Instruction Type: CVI_VX_LATE
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vfmin_hf))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vhf_vfmin_VhfVhf(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vfmin_hf(vu, vv)
+}
+
+/// `Vd32.sf=vfmin(Vu32.sf,Vv32.sf)`
+///
+/// Instruction Type: CVI_VX_LATE
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vfmin_sf))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vsf_vfmin_VsfVsf(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vfmin_sf(vu, vv)
+}
+
+/// `Vd32.hf=vfneg(Vu32.hf)`
+///
+/// Instruction Type: CVI_VX_LATE
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vfneg_hf))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vhf_vfneg_Vhf(vu: HvxVector) -> HvxVector {
+    vfneg_hf(vu)
+}
+
+/// `Vd32.sf=vfneg(Vu32.sf)`
+///
+/// Instruction Type: CVI_VX_LATE
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vfneg_sf))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vsf_vfneg_Vsf(vu: HvxVector) -> HvxVector {
+    vfneg_sf(vu)
+}
+
+/// `Vd32.hf=vmax(Vu32.hf,Vv32.hf)`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vmax_hf))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vhf_vmax_VhfVhf(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vmax_hf(vu, vv)
+}
+
+/// `Vd32.sf=vmax(Vu32.sf,Vv32.sf)`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vmax_sf))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vsf_vmax_VsfVsf(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vmax_sf(vu, vv)
+}
+
+/// `Vd32.hf=vmin(Vu32.hf,Vv32.hf)`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vmin_hf))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vhf_vmin_VhfVhf(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vmin_hf(vu, vv)
+}
+
+/// `Vd32.sf=vmin(Vu32.sf,Vv32.sf)`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vmin_sf))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vsf_vmin_VsfVsf(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vmin_sf(vu, vv)
+}
+
+/// `Vd32.hf=vmpy(Vu32.hf,Vv32.hf)`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vmpy_hf_hf))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vhf_vmpy_VhfVhf(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vmpy_hf_hf(vu, vv)
+}
+
+/// `Vx32.hf+=vmpy(Vu32.hf,Vv32.hf)`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vmpy_hf_hf_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vhf_vmpyacc_VhfVhfVhf(vx: HvxVector, vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vmpy_hf_hf_acc(vx, vu, vv)
+}
+
+/// `Vd32.qf16=vmpy(Vu32.qf16,Vv32.qf16)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vmpy_qf16))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vqf16_vmpy_Vqf16Vqf16(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vmpy_qf16(vu, vv)
+}
+
+/// `Vd32.qf16=vmpy(Vu32.hf,Vv32.hf)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vmpy_qf16_hf))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vqf16_vmpy_VhfVhf(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vmpy_qf16_hf(vu, vv)
+}
+
+/// `Vd32.qf16=vmpy(Vu32.qf16,Vv32.hf)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vmpy_qf16_mix_hf))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vqf16_vmpy_Vqf16Vhf(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vmpy_qf16_mix_hf(vu, vv)
+}
+
+/// `Vd32.qf32=vmpy(Vu32.qf32,Vv32.qf32)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vmpy_qf32))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vqf32_vmpy_Vqf32Vqf32(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vmpy_qf32(vu, vv)
+}
+
+/// `Vdd32.qf32=vmpy(Vu32.hf,Vv32.hf)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vmpy_qf32_hf))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wqf32_vmpy_VhfVhf(vu: HvxVector, vv: HvxVector) -> HvxVectorPair {
+    vmpy_qf32_hf(vu, vv)
+}
+
+/// `Vdd32.qf32=vmpy(Vu32.qf16,Vv32.hf)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vmpy_qf32_mix_hf))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wqf32_vmpy_Vqf16Vhf(vu: HvxVector, vv: HvxVector) -> HvxVectorPair {
+    vmpy_qf32_mix_hf(vu, vv)
+}
+
+/// `Vdd32.qf32=vmpy(Vu32.qf16,Vv32.qf16)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vmpy_qf32_qf16))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wqf32_vmpy_Vqf16Vqf16(vu: HvxVector, vv: HvxVector) -> HvxVectorPair {
+    vmpy_qf32_qf16(vu, vv)
+}
+
+/// `Vd32.qf32=vmpy(Vu32.sf,Vv32.sf)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vmpy_qf32_sf))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vqf32_vmpy_VsfVsf(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vmpy_qf32_sf(vu, vv)
+}
+
+/// `Vdd32.sf=vmpy(Vu32.hf,Vv32.hf)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vmpy_sf_hf))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wsf_vmpy_VhfVhf(vu: HvxVector, vv: HvxVector) -> HvxVectorPair {
+    vmpy_sf_hf(vu, vv)
+}
+
+/// `Vxx32.sf+=vmpy(Vu32.hf,Vv32.hf)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vmpy_sf_hf_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wsf_vmpyacc_WsfVhfVhf(
+    vxx: HvxVectorPair,
+    vu: HvxVector,
+    vv: HvxVector,
+) -> HvxVectorPair {
+    vmpy_sf_hf_acc(vxx, vu, vv)
+}
+
+/// `Vd32.sf=vmpy(Vu32.sf,Vv32.sf)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vmpy_sf_sf))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vsf_vmpy_VsfVsf(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vmpy_sf_sf(vu, vv)
+}
+
+/// `Vd32.qf16=vsub(Vu32.hf,Vv32.hf)`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vsub_hf))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vqf16_vsub_VhfVhf(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vsub_hf(vu, vv)
+}
+
+/// `Vd32.hf=vsub(Vu32.hf,Vv32.hf)`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vsub_hf_hf))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vhf_vsub_VhfVhf(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vsub_hf_hf(vu, vv)
+}
+
+/// `Vd32.qf16=vsub(Vu32.qf16,Vv32.qf16)`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vsub_qf16))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vqf16_vsub_Vqf16Vqf16(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vsub_qf16(vu, vv)
+}
+
+/// `Vd32.qf16=vsub(Vu32.qf16,Vv32.hf)`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vsub_qf16_mix))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vqf16_vsub_Vqf16Vhf(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vsub_qf16_mix(vu, vv)
+}
+
+/// `Vd32.qf32=vsub(Vu32.qf32,Vv32.qf32)`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vsub_qf32))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vqf32_vsub_Vqf32Vqf32(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vsub_qf32(vu, vv)
+}
+
+/// `Vd32.qf32=vsub(Vu32.qf32,Vv32.sf)`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vsub_qf32_mix))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vqf32_vsub_Vqf32Vsf(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vsub_qf32_mix(vu, vv)
+}
+
+/// `Vd32.qf32=vsub(Vu32.sf,Vv32.sf)`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vsub_sf))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vqf32_vsub_VsfVsf(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vsub_sf(vu, vv)
+}
+
+/// `Vdd32.sf=vsub(Vu32.hf,Vv32.hf)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vsub_sf_hf))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wsf_vsub_VhfVhf(vu: HvxVector, vv: HvxVector) -> HvxVectorPair {
+    vsub_sf_hf(vu, vv)
+}
+
+/// `Vd32.sf=vsub(Vu32.sf,Vv32.sf)`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vsub_sf_sf))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vsf_vsub_VsfVsf(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vsub_sf_sf(vu, vv)
+}
+
+/// `Vd32.ub=vasr(Vuu32.uh,Vv32.ub):rnd:sat`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv69"))]
+#[cfg_attr(test, assert_instr(vasrvuhubrndsat))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vub_vasr_WuhVub_rnd_sat(vuu: HvxVectorPair, vv: HvxVector) -> HvxVector {
+    vasrvuhubrndsat(vuu, vv)
+}
+
+/// `Vd32.ub=vasr(Vuu32.uh,Vv32.ub):sat`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv69"))]
+#[cfg_attr(test, assert_instr(vasrvuhubsat))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vub_vasr_WuhVub_sat(vuu: HvxVectorPair, vv: HvxVector) -> HvxVector {
+    vasrvuhubsat(vuu, vv)
+}
+
+/// `Vd32.uh=vasr(Vuu32.w,Vv32.uh):rnd:sat`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv69"))]
+#[cfg_attr(test, assert_instr(vasrvwuhrndsat))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vuh_vasr_WwVuh_rnd_sat(vuu: HvxVectorPair, vv: HvxVector) -> HvxVector {
+    vasrvwuhrndsat(vuu, vv)
+}
+
+/// `Vd32.uh=vasr(Vuu32.w,Vv32.uh):sat`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv69"))]
+#[cfg_attr(test, assert_instr(vasrvwuhsat))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vuh_vasr_WwVuh_sat(vuu: HvxVectorPair, vv: HvxVector) -> HvxVector {
+    vasrvwuhsat(vuu, vv)
+}
+
+/// `Vd32.uh=vmpy(Vu32.uh,Vv32.uh):>>16`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv69"))]
+#[cfg_attr(test, assert_instr(vmpyuhvs))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vuh_vmpy_VuhVuh_rs16(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vmpyuhvs(vu, vv)
+}
+
+/// `Vd32.h=Vu32.hf`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv73"))]
+#[cfg_attr(test, assert_instr(vconv_h_hf))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_equals_Vhf(vu: HvxVector) -> HvxVector {
+    vconv_h_hf(vu)
+}
+
+/// `Vd32.hf=Vu32.h`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv73"))]
+#[cfg_attr(test, assert_instr(vconv_hf_h))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vhf_equals_Vh(vu: HvxVector) -> HvxVector {
+    vconv_hf_h(vu)
+}
+
+/// `Vd32.sf=Vu32.w`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv73"))]
+#[cfg_attr(test, assert_instr(vconv_sf_w))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vsf_equals_Vw(vu: HvxVector) -> HvxVector {
+    vconv_sf_w(vu)
+}
+
+/// `Vd32.w=Vu32.sf`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv73"))]
+#[cfg_attr(test, assert_instr(vconv_w_sf))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_equals_Vsf(vu: HvxVector) -> HvxVector {
+    vconv_w_sf(vu)
+}
+
+/// `Vd32=vgetqfext(Vu32.x,Rt32)`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv79"))]
+#[cfg_attr(test, assert_instr(get_qfext))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_V_vgetqfext_VR(vu: HvxVector, rt: i32) -> HvxVector {
+    get_qfext(vu, rt)
+}
+
+/// `Vd32.x=vsetqfext(Vu32,Rt32)`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv79"))]
+#[cfg_attr(test, assert_instr(set_qfext))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_V_vsetqfext_VR(vu: HvxVector, rt: i32) -> HvxVector {
+    set_qfext(vu, rt)
+}
+
+/// `Vd32.f8=vabs(Vu32.f8)`
+///
+/// Instruction Type: CVI_VX_LATE
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv79"))]
+#[cfg_attr(test, assert_instr(vabs_f8))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_V_vabs_V(vu: HvxVector) -> HvxVector {
+    vabs_f8(vu)
+}
+
+/// `Vdd32.hf=vcvt2(Vu32.b)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv79"))]
+#[cfg_attr(test, assert_instr(vcvt2_hf_b))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Whf_vcvt2_Vb(vu: HvxVector) -> HvxVectorPair {
+    vcvt2_hf_b(vu)
+}
+
+/// `Vdd32.hf=vcvt2(Vu32.ub)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv79"))]
+#[cfg_attr(test, assert_instr(vcvt2_hf_ub))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Whf_vcvt2_Vub(vu: HvxVector) -> HvxVectorPair {
+    vcvt2_hf_ub(vu)
+}
+
+/// `Vdd32.hf=vcvt(Vu32.f8)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv79"))]
+#[cfg_attr(test, assert_instr(vcvt_hf_f8))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Whf_vcvt_V(vu: HvxVector) -> HvxVectorPair {
+    vcvt_hf_f8(vu)
+}
+
+/// `Vd32.f8=vfmax(Vu32.f8,Vv32.f8)`
+///
+/// Instruction Type: CVI_VX_LATE
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv79"))]
+#[cfg_attr(test, assert_instr(vfmax_f8))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_V_vfmax_VV(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vfmax_f8(vu, vv)
+}
+
+/// `Vd32.f8=vfmin(Vu32.f8,Vv32.f8)`
+///
+/// Instruction Type: CVI_VX_LATE
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv79"))]
+#[cfg_attr(test, assert_instr(vfmin_f8))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_V_vfmin_VV(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vfmin_f8(vu, vv)
+}
+
+/// `Vd32.f8=vfneg(Vu32.f8)`
+///
+/// Instruction Type: CVI_VX_LATE
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv79"))]
+#[cfg_attr(test, assert_instr(vfneg_f8))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_V_vfneg_V(vu: HvxVector) -> HvxVector {
+    vfneg_f8(vu)
+}
+
+/// `Qd4=and(Qs4,Qt4)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA_DV
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_and_QQ(qs: HvxVectorPred, qt: HvxVectorPred) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(
+        pred_and(
+            vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qs), -1),
+            vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qt), -1),
+        ),
+        -1,
+    ))
+}
+
+/// `Qd4=and(Qs4,!Qt4)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA_DV
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_and_QQn(qs: HvxVectorPred, qt: HvxVectorPred) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(
+        pred_and_n(
+            vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qs), -1),
+            vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qt), -1),
+        ),
+        -1,
+    ))
+}
+
+/// `Qd4=not(Qs4)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_not_Q(qs: HvxVectorPred) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(
+        pred_not(vandvrt(
+            core::mem::transmute::<HvxVectorPred, HvxVector>(qs),
+            -1,
+        )),
+        -1,
+    ))
+}
+
+/// `Qd4=or(Qs4,Qt4)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA_DV
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_or_QQ(qs: HvxVectorPred, qt: HvxVectorPred) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(
+        pred_or(
+            vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qs), -1),
+            vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qt), -1),
+        ),
+        -1,
+    ))
+}
+
+/// `Qd4=or(Qs4,!Qt4)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA_DV
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_or_QQn(qs: HvxVectorPred, qt: HvxVectorPred) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(
+        pred_or_n(
+            vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qs), -1),
+            vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qt), -1),
+        ),
+        -1,
+    ))
+}
+
+/// `Qd4=vsetq(Rt32)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VP
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_vsetq_R(rt: i32) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(pred_scalar2(rt), -1))
+}
+
+/// `Qd4=xor(Qs4,Qt4)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA_DV
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_xor_QQ(qs: HvxVectorPred, qt: HvxVectorPred) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(
+        pred_xor(
+            vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qs), -1),
+            vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qt), -1),
+        ),
+        -1,
+    ))
+}
+
+/// `if (!Qv4) vmem(Rt32+#s4)=Vs32`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VM_ST
+/// Execution Slots: SLOT0
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_vmem_QnRIV(qv: HvxVectorPred, rt: *mut HvxVector, vs: HvxVector) {
+    vS32b_nqpred_ai(
+        vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qv), -1),
+        rt,
+        vs,
+    )
+}
+
+/// `if (!Qv4) vmem(Rt32+#s4):nt=Vs32`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VM_ST
+/// Execution Slots: SLOT0
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_vmem_QnRIV_nt(qv: HvxVectorPred, rt: *mut HvxVector, vs: HvxVector) {
+    vS32b_nt_nqpred_ai(
+        vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qv), -1),
+        rt,
+        vs,
+    )
+}
+
+/// `if (Qv4) vmem(Rt32+#s4):nt=Vs32`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VM_ST
+/// Execution Slots: SLOT0
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_vmem_QRIV_nt(qv: HvxVectorPred, rt: *mut HvxVector, vs: HvxVector) {
+    vS32b_nt_qpred_ai(
+        vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qv), -1),
+        rt,
+        vs,
+    )
+}
+
+/// `if (Qv4) vmem(Rt32+#s4)=Vs32`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VM_ST
+/// Execution Slots: SLOT0
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_vmem_QRIV(qv: HvxVectorPred, rt: *mut HvxVector, vs: HvxVector) {
+    vS32b_qpred_ai(
+        vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qv), -1),
+        rt,
+        vs,
+    )
+}
+
+/// `if (!Qv4) Vx32.b+=Vu32.b`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vb_condacc_QnVbVb(qv: HvxVectorPred, vx: HvxVector, vu: HvxVector) -> HvxVector {
+    vaddbnq(
+        vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qv), -1),
+        vx,
+        vu,
+    )
+}
+
+/// `if (Qv4) Vx32.b+=Vu32.b`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vb_condacc_QVbVb(qv: HvxVectorPred, vx: HvxVector, vu: HvxVector) -> HvxVector {
+    vaddbq(
+        vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qv), -1),
+        vx,
+        vu,
+    )
+}
+
+/// `if (!Qv4) Vx32.h+=Vu32.h`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_condacc_QnVhVh(qv: HvxVectorPred, vx: HvxVector, vu: HvxVector) -> HvxVector {
+    vaddhnq(
+        vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qv), -1),
+        vx,
+        vu,
+    )
+}
+
+/// `if (Qv4) Vx32.h+=Vu32.h`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_condacc_QVhVh(qv: HvxVectorPred, vx: HvxVector, vu: HvxVector) -> HvxVector {
+    vaddhq(
+        vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qv), -1),
+        vx,
+        vu,
+    )
+}
+
+/// `if (!Qv4) Vx32.w+=Vu32.w`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_condacc_QnVwVw(qv: HvxVectorPred, vx: HvxVector, vu: HvxVector) -> HvxVector {
+    vaddwnq(
+        vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qv), -1),
+        vx,
+        vu,
+    )
+}
+
+/// `if (Qv4) Vx32.w+=Vu32.w`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_condacc_QVwVw(qv: HvxVectorPred, vx: HvxVector, vu: HvxVector) -> HvxVector {
+    vaddwq(
+        vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qv), -1),
+        vx,
+        vu,
+    )
+}
+
+/// `Vd32=vand(Qu4,Rt32)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VX_LATE
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_V_vand_QR(qu: HvxVectorPred, rt: i32) -> HvxVector {
+    vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qu), rt)
+}
+
+/// `Vx32|=vand(Qu4,Rt32)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VX_LATE
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_V_vandor_VQR(vx: HvxVector, qu: HvxVectorPred, rt: i32) -> HvxVector {
+    vandvrt_acc(vx, core::mem::transmute::<HvxVectorPred, HvxVector>(qu), rt)
+}
+
+/// `Qd4=vand(Vu32,Rt32)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VX_LATE
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_vand_VR(vu: HvxVector, rt: i32) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(vu, rt))
+}
+
+/// `Qx4|=vand(Vu32,Rt32)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VX_LATE
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_vandor_QVR(qx: HvxVectorPred, vu: HvxVector, rt: i32) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt_acc(
+        core::mem::transmute::<HvxVectorPred, HvxVector>(qx),
+        vu,
+        rt,
+    ))
+}
+
+/// `Qd4=vcmp.eq(Vu32.b,Vv32.b)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_vcmp_eq_VbVb(vu: HvxVector, vv: HvxVector) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(veqb(vu, vv), -1))
+}
+
+/// `Qx4&=vcmp.eq(Vu32.b,Vv32.b)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_vcmp_eqand_QVbVb(
+    qx: HvxVectorPred,
+    vu: HvxVector,
+    vv: HvxVector,
+) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(
+        veqb_and(
+            vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qx), -1),
+            vu,
+            vv,
+        ),
+        -1,
+    ))
+}
+
+/// `Qx4|=vcmp.eq(Vu32.b,Vv32.b)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_vcmp_eqor_QVbVb(
+    qx: HvxVectorPred,
+    vu: HvxVector,
+    vv: HvxVector,
+) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(
+        veqb_or(
+            vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qx), -1),
+            vu,
+            vv,
+        ),
+        -1,
+    ))
+}
+
+/// `Qx4^=vcmp.eq(Vu32.b,Vv32.b)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_vcmp_eqxacc_QVbVb(
+    qx: HvxVectorPred,
+    vu: HvxVector,
+    vv: HvxVector,
+) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(
+        veqb_xor(
+            vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qx), -1),
+            vu,
+            vv,
+        ),
+        -1,
+    ))
+}
+
+/// `Qd4=vcmp.eq(Vu32.h,Vv32.h)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_vcmp_eq_VhVh(vu: HvxVector, vv: HvxVector) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(veqh(vu, vv), -1))
+}
+
+/// `Qx4&=vcmp.eq(Vu32.h,Vv32.h)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_vcmp_eqand_QVhVh(
+    qx: HvxVectorPred,
+    vu: HvxVector,
+    vv: HvxVector,
+) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(
+        veqh_and(
+            vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qx), -1),
+            vu,
+            vv,
+        ),
+        -1,
+    ))
+}
+
+/// `Qx4|=vcmp.eq(Vu32.h,Vv32.h)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_vcmp_eqor_QVhVh(
+    qx: HvxVectorPred,
+    vu: HvxVector,
+    vv: HvxVector,
+) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(
+        veqh_or(
+            vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qx), -1),
+            vu,
+            vv,
+        ),
+        -1,
+    ))
+}
+
+/// `Qx4^=vcmp.eq(Vu32.h,Vv32.h)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_vcmp_eqxacc_QVhVh(
+    qx: HvxVectorPred,
+    vu: HvxVector,
+    vv: HvxVector,
+) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(
+        veqh_xor(
+            vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qx), -1),
+            vu,
+            vv,
+        ),
+        -1,
+    ))
+}
+
+/// `Qd4=vcmp.eq(Vu32.w,Vv32.w)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_vcmp_eq_VwVw(vu: HvxVector, vv: HvxVector) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(veqw(vu, vv), -1))
+}
+
+/// `Qx4&=vcmp.eq(Vu32.w,Vv32.w)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_vcmp_eqand_QVwVw(
+    qx: HvxVectorPred,
+    vu: HvxVector,
+    vv: HvxVector,
+) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(
+        veqw_and(
+            vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qx), -1),
+            vu,
+            vv,
+        ),
+        -1,
+    ))
+}
+
+/// `Qx4|=vcmp.eq(Vu32.w,Vv32.w)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_vcmp_eqor_QVwVw(
+    qx: HvxVectorPred,
+    vu: HvxVector,
+    vv: HvxVector,
+) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(
+        veqw_or(
+            vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qx), -1),
+            vu,
+            vv,
+        ),
+        -1,
+    ))
+}
+
+/// `Qx4^=vcmp.eq(Vu32.w,Vv32.w)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_vcmp_eqxacc_QVwVw(
+    qx: HvxVectorPred,
+    vu: HvxVector,
+    vv: HvxVector,
+) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(
+        veqw_xor(
+            vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qx), -1),
+            vu,
+            vv,
+        ),
+        -1,
+    ))
+}
+
+/// `Qd4=vcmp.gt(Vu32.b,Vv32.b)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_vcmp_gt_VbVb(vu: HvxVector, vv: HvxVector) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(vgtb(vu, vv), -1))
+}
+
+/// `Qx4&=vcmp.gt(Vu32.b,Vv32.b)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_vcmp_gtand_QVbVb(
+    qx: HvxVectorPred,
+    vu: HvxVector,
+    vv: HvxVector,
+) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(
+        vgtb_and(
+            vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qx), -1),
+            vu,
+            vv,
+        ),
+        -1,
+    ))
+}
+
+/// `Qx4|=vcmp.gt(Vu32.b,Vv32.b)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_vcmp_gtor_QVbVb(
+    qx: HvxVectorPred,
+    vu: HvxVector,
+    vv: HvxVector,
+) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(
+        vgtb_or(
+            vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qx), -1),
+            vu,
+            vv,
+        ),
+        -1,
+    ))
+}
+
+/// `Qx4^=vcmp.gt(Vu32.b,Vv32.b)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_vcmp_gtxacc_QVbVb(
+    qx: HvxVectorPred,
+    vu: HvxVector,
+    vv: HvxVector,
+) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(
+        vgtb_xor(
+            vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qx), -1),
+            vu,
+            vv,
+        ),
+        -1,
+    ))
+}
+
+/// `Qd4=vcmp.gt(Vu32.h,Vv32.h)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_vcmp_gt_VhVh(vu: HvxVector, vv: HvxVector) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(vgth(vu, vv), -1))
+}
+
+/// `Qx4&=vcmp.gt(Vu32.h,Vv32.h)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_vcmp_gtand_QVhVh(
+    qx: HvxVectorPred,
+    vu: HvxVector,
+    vv: HvxVector,
+) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(
+        vgth_and(
+            vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qx), -1),
+            vu,
+            vv,
+        ),
+        -1,
+    ))
+}
+
+/// `Qx4|=vcmp.gt(Vu32.h,Vv32.h)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_vcmp_gtor_QVhVh(
+    qx: HvxVectorPred,
+    vu: HvxVector,
+    vv: HvxVector,
+) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(
+        vgth_or(
+            vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qx), -1),
+            vu,
+            vv,
+        ),
+        -1,
+    ))
+}
+
+/// `Qx4^=vcmp.gt(Vu32.h,Vv32.h)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_vcmp_gtxacc_QVhVh(
+    qx: HvxVectorPred,
+    vu: HvxVector,
+    vv: HvxVector,
+) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(
+        vgth_xor(
+            vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qx), -1),
+            vu,
+            vv,
+        ),
+        -1,
+    ))
+}
+
+/// `Qd4=vcmp.gt(Vu32.ub,Vv32.ub)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_vcmp_gt_VubVub(vu: HvxVector, vv: HvxVector) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(vgtub(vu, vv), -1))
+}
+
+/// `Qx4&=vcmp.gt(Vu32.ub,Vv32.ub)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_vcmp_gtand_QVubVub(
+    qx: HvxVectorPred,
+    vu: HvxVector,
+    vv: HvxVector,
+) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(
+        vgtub_and(
+            vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qx), -1),
+            vu,
+            vv,
+        ),
+        -1,
+    ))
+}
+
+/// `Qx4|=vcmp.gt(Vu32.ub,Vv32.ub)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_vcmp_gtor_QVubVub(
+    qx: HvxVectorPred,
+    vu: HvxVector,
+    vv: HvxVector,
+) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(
+        vgtub_or(
+            vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qx), -1),
+            vu,
+            vv,
+        ),
+        -1,
+    ))
+}
+
+/// `Qx4^=vcmp.gt(Vu32.ub,Vv32.ub)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_vcmp_gtxacc_QVubVub(
+    qx: HvxVectorPred,
+    vu: HvxVector,
+    vv: HvxVector,
+) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(
+        vgtub_xor(
+            vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qx), -1),
+            vu,
+            vv,
+        ),
+        -1,
+    ))
+}
+
+/// `Qd4=vcmp.gt(Vu32.uh,Vv32.uh)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_vcmp_gt_VuhVuh(vu: HvxVector, vv: HvxVector) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(vgtuh(vu, vv), -1))
+}
+
+/// `Qx4&=vcmp.gt(Vu32.uh,Vv32.uh)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_vcmp_gtand_QVuhVuh(
+    qx: HvxVectorPred,
+    vu: HvxVector,
+    vv: HvxVector,
+) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(
+        vgtuh_and(
+            vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qx), -1),
+            vu,
+            vv,
+        ),
+        -1,
+    ))
+}
+
+/// `Qx4|=vcmp.gt(Vu32.uh,Vv32.uh)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_vcmp_gtor_QVuhVuh(
+    qx: HvxVectorPred,
+    vu: HvxVector,
+    vv: HvxVector,
+) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(
+        vgtuh_or(
+            vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qx), -1),
+            vu,
+            vv,
+        ),
+        -1,
+    ))
+}
+
+/// `Qx4^=vcmp.gt(Vu32.uh,Vv32.uh)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_vcmp_gtxacc_QVuhVuh(
+    qx: HvxVectorPred,
+    vu: HvxVector,
+    vv: HvxVector,
+) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(
+        vgtuh_xor(
+            vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qx), -1),
+            vu,
+            vv,
+        ),
+        -1,
+    ))
+}
+
+/// `Qd4=vcmp.gt(Vu32.uw,Vv32.uw)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_vcmp_gt_VuwVuw(vu: HvxVector, vv: HvxVector) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(vgtuw(vu, vv), -1))
+}
+
+/// `Qx4&=vcmp.gt(Vu32.uw,Vv32.uw)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_vcmp_gtand_QVuwVuw(
+    qx: HvxVectorPred,
+    vu: HvxVector,
+    vv: HvxVector,
+) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(
+        vgtuw_and(
+            vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qx), -1),
+            vu,
+            vv,
+        ),
+        -1,
+    ))
+}
+
+/// `Qx4|=vcmp.gt(Vu32.uw,Vv32.uw)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_vcmp_gtor_QVuwVuw(
+    qx: HvxVectorPred,
+    vu: HvxVector,
+    vv: HvxVector,
+) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(
+        vgtuw_or(
+            vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qx), -1),
+            vu,
+            vv,
+        ),
+        -1,
+    ))
+}
+
+/// `Qx4^=vcmp.gt(Vu32.uw,Vv32.uw)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_vcmp_gtxacc_QVuwVuw(
+    qx: HvxVectorPred,
+    vu: HvxVector,
+    vv: HvxVector,
+) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(
+        vgtuw_xor(
+            vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qx), -1),
+            vu,
+            vv,
+        ),
+        -1,
+    ))
+}
+
+/// `Qd4=vcmp.gt(Vu32.w,Vv32.w)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_vcmp_gt_VwVw(vu: HvxVector, vv: HvxVector) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(vgtw(vu, vv), -1))
+}
+
+/// `Qx4&=vcmp.gt(Vu32.w,Vv32.w)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_vcmp_gtand_QVwVw(
+    qx: HvxVectorPred,
+    vu: HvxVector,
+    vv: HvxVector,
+) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(
+        vgtw_and(
+            vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qx), -1),
+            vu,
+            vv,
+        ),
+        -1,
+    ))
+}
+
+/// `Qx4|=vcmp.gt(Vu32.w,Vv32.w)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_vcmp_gtor_QVwVw(
+    qx: HvxVectorPred,
+    vu: HvxVector,
+    vv: HvxVector,
+) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(
+        vgtw_or(
+            vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qx), -1),
+            vu,
+            vv,
+        ),
+        -1,
+    ))
+}
+
+/// `Qx4^=vcmp.gt(Vu32.w,Vv32.w)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_vcmp_gtxacc_QVwVw(
+    qx: HvxVectorPred,
+    vu: HvxVector,
+    vv: HvxVector,
+) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(
+        vgtw_xor(
+            vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qx), -1),
+            vu,
+            vv,
+        ),
+        -1,
+    ))
+}
+
+/// `Vd32=vmux(Qt4,Vu32,Vv32)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_V_vmux_QVV(qt: HvxVectorPred, vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vmux(
+        vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qt), -1),
+        vu,
+        vv,
+    )
+}
+
+/// `if (!Qv4) Vx32.b-=Vu32.b`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vb_condnac_QnVbVb(qv: HvxVectorPred, vx: HvxVector, vu: HvxVector) -> HvxVector {
+    vsubbnq(
+        vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qv), -1),
+        vx,
+        vu,
+    )
+}
+
+/// `if (Qv4) Vx32.b-=Vu32.b`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vb_condnac_QVbVb(qv: HvxVectorPred, vx: HvxVector, vu: HvxVector) -> HvxVector {
+    vsubbq(
+        vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qv), -1),
+        vx,
+        vu,
+    )
+}
+
+/// `if (!Qv4) Vx32.h-=Vu32.h`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_condnac_QnVhVh(qv: HvxVectorPred, vx: HvxVector, vu: HvxVector) -> HvxVector {
+    vsubhnq(
+        vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qv), -1),
+        vx,
+        vu,
+    )
+}
+
+/// `if (Qv4) Vx32.h-=Vu32.h`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_condnac_QVhVh(qv: HvxVectorPred, vx: HvxVector, vu: HvxVector) -> HvxVector {
+    vsubhq(
+        vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qv), -1),
+        vx,
+        vu,
+    )
+}
+
+/// `if (!Qv4) Vx32.w-=Vu32.w`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_condnac_QnVwVw(qv: HvxVectorPred, vx: HvxVector, vu: HvxVector) -> HvxVector {
+    vsubwnq(
+        vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qv), -1),
+        vx,
+        vu,
+    )
+}
+
+/// `if (Qv4) Vx32.w-=Vu32.w`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_condnac_QVwVw(qv: HvxVectorPred, vx: HvxVector, vu: HvxVector) -> HvxVector {
+    vsubwq(
+        vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qv), -1),
+        vx,
+        vu,
+    )
+}
+
+/// `Vdd32=vswap(Qt4,Vu32,Vv32)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA_DV
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_W_vswap_QVV(qt: HvxVectorPred, vu: HvxVector, vv: HvxVector) -> HvxVectorPair {
+    vswap(
+        vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qt), -1),
+        vu,
+        vv,
+    )
+}
+
+/// `Qd4=vsetq2(Rt32)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VP
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv62"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_vsetq2_R(rt: i32) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(pred_scalar2v2(rt), -1))
+}
+
+/// `Qd4.b=vshuffe(Qs4.h,Qt4.h)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA_DV
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv62"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Qb_vshuffe_QhQh(qs: HvxVectorPred, qt: HvxVectorPred) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(
+        shuffeqh(
+            vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qs), -1),
+            vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qt), -1),
+        ),
+        -1,
+    ))
+}
+
+/// `Qd4.h=vshuffe(Qs4.w,Qt4.w)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA_DV
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv62"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Qh_vshuffe_QwQw(qs: HvxVectorPred, qt: HvxVectorPred) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(
+        shuffeqw(
+            vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qs), -1),
+            vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qt), -1),
+        ),
+        -1,
+    ))
+}
+
+/// `Vd32=vand(!Qu4,Rt32)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VX_LATE
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv62"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_V_vand_QnR(qu: HvxVectorPred, rt: i32) -> HvxVector {
+    vandnqrt(
+        vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qu), -1),
+        rt,
+    )
+}
+
+/// `Vx32|=vand(!Qu4,Rt32)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VX_LATE
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv62"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_V_vandor_VQnR(vx: HvxVector, qu: HvxVectorPred, rt: i32) -> HvxVector {
+    vandnqrt_acc(
+        vx,
+        vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qu), -1),
+        rt,
+    )
+}
+
+/// `Vd32=vand(!Qv4,Vu32)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv62"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_V_vand_QnV(qv: HvxVectorPred, vu: HvxVector) -> HvxVector {
+    vandvnqv(
+        vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qv), -1),
+        vu,
+    )
+}
+
+/// `Vd32=vand(Qv4,Vu32)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv62"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_V_vand_QV(qv: HvxVectorPred, vu: HvxVector) -> HvxVector {
+    vandvqv(
+        vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qv), -1),
+        vu,
+    )
+}
+
+/// `if (Qs4) vtmp.h=vgather(Rt32,Mu2,Vv32.h).h`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_GATHER
+/// Execution Slots: SLOT01
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv65"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_vgather_AQRMVh(
+    rs: *mut HvxVector,
+    qs: HvxVectorPred,
+    rt: i32,
+    mu: i32,
+    vv: HvxVector,
+) {
+    vgathermhq(
+        rs,
+        vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qs), -1),
+        rt,
+        mu,
+        vv,
+    )
+}
+
+/// `if (Qs4) vtmp.h=vgather(Rt32,Mu2,Vvv32.w).h`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_GATHER_DV
+/// Execution Slots: SLOT01
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv65"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_vgather_AQRMWw(
+    rs: *mut HvxVector,
+    qs: HvxVectorPred,
+    rt: i32,
+    mu: i32,
+    vvv: HvxVectorPair,
+) {
+    vgathermhwq(
+        rs,
+        vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qs), -1),
+        rt,
+        mu,
+        vvv,
+    )
+}
+
+/// `if (Qs4) vtmp.w=vgather(Rt32,Mu2,Vv32.w).w`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_GATHER
+/// Execution Slots: SLOT01
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv65"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_vgather_AQRMVw(
+    rs: *mut HvxVector,
+    qs: HvxVectorPred,
+    rt: i32,
+    mu: i32,
+    vv: HvxVector,
+) {
+    vgathermwq(
+        rs,
+        vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qs), -1),
+        rt,
+        mu,
+        vv,
+    )
+}
+
+/// `Vd32.b=prefixsum(Qv4)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv65"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vb_prefixsum_Q(qv: HvxVectorPred) -> HvxVector {
+    vprefixqb(vandvrt(
+        core::mem::transmute::<HvxVectorPred, HvxVector>(qv),
+        -1,
+    ))
+}
+
+/// `Vd32.h=prefixsum(Qv4)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv65"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_prefixsum_Q(qv: HvxVectorPred) -> HvxVector {
+    vprefixqh(vandvrt(
+        core::mem::transmute::<HvxVectorPred, HvxVector>(qv),
+        -1,
+    ))
+}
+
+/// `Vd32.w=prefixsum(Qv4)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv65"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_prefixsum_Q(qv: HvxVectorPred) -> HvxVector {
+    vprefixqw(vandvrt(
+        core::mem::transmute::<HvxVectorPred, HvxVector>(qv),
+        -1,
+    ))
+}
+
+/// `if (Qs4) vscatter(Rt32,Mu2,Vv32.h).h=Vw32`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_SCATTER
+/// Execution Slots: SLOT0
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv65"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_vscatter_QRMVhV(
+    qs: HvxVectorPred,
+    rt: i32,
+    mu: i32,
+    vv: HvxVector,
+    vw: HvxVector,
+) {
+    vscattermhq(
+        vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qs), -1),
+        rt,
+        mu,
+        vv,
+        vw,
+    )
+}
+
+/// `if (Qs4) vscatter(Rt32,Mu2,Vvv32.w).h=Vw32`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_SCATTER_DV
+/// Execution Slots: SLOT0
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv65"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_vscatter_QRMWwV(
+    qs: HvxVectorPred,
+    rt: i32,
+    mu: i32,
+    vvv: HvxVectorPair,
+    vw: HvxVector,
+) {
+    vscattermhwq(
+        vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qs), -1),
+        rt,
+        mu,
+        vvv,
+        vw,
+    )
+}
+
+/// `if (Qs4) vscatter(Rt32,Mu2,Vv32.w).w=Vw32`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_SCATTER
+/// Execution Slots: SLOT0
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv65"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_vscatter_QRMVwV(
+    qs: HvxVectorPred,
+    rt: i32,
+    mu: i32,
+    vv: HvxVector,
+    vw: HvxVector,
+) {
+    vscattermwq(
+        vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qs), -1),
+        rt,
+        mu,
+        vv,
+        vw,
+    )
+}
+
+/// `Vd32.w=vadd(Vu32.w,Vv32.w,Qs4):carry:sat`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv66"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vadd_VwVwQ_carry_sat(
+    vu: HvxVector,
+    vv: HvxVector,
+    qs: HvxVectorPred,
+) -> HvxVector {
+    vaddcarrysat(
+        vu,
+        vv,
+        vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qs), -1),
+    )
+}
+
+/// `Qd4=vcmp.gt(Vu32.hf,Vv32.hf)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_vcmp_gt_VhfVhf(vu: HvxVector, vv: HvxVector) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(vgthf(vu, vv), -1))
+}
+
+/// `Qx4&=vcmp.gt(Vu32.hf,Vv32.hf)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_vcmp_gtand_QVhfVhf(
+    qx: HvxVectorPred,
+    vu: HvxVector,
+    vv: HvxVector,
+) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(
+        vgthf_and(
+            vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qx), -1),
+            vu,
+            vv,
+        ),
+        -1,
+    ))
+}
+
+/// `Qx4|=vcmp.gt(Vu32.hf,Vv32.hf)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_vcmp_gtor_QVhfVhf(
+    qx: HvxVectorPred,
+    vu: HvxVector,
+    vv: HvxVector,
+) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(
+        vgthf_or(
+            vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qx), -1),
+            vu,
+            vv,
+        ),
+        -1,
+    ))
+}
+
+/// `Qx4^=vcmp.gt(Vu32.hf,Vv32.hf)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_vcmp_gtxacc_QVhfVhf(
+    qx: HvxVectorPred,
+    vu: HvxVector,
+    vv: HvxVector,
+) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(
+        vgthf_xor(
+            vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qx), -1),
+            vu,
+            vv,
+        ),
+        -1,
+    ))
+}
+
+/// `Qd4=vcmp.gt(Vu32.sf,Vv32.sf)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_vcmp_gt_VsfVsf(vu: HvxVector, vv: HvxVector) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(vgtsf(vu, vv), -1))
+}
+
+/// `Qx4&=vcmp.gt(Vu32.sf,Vv32.sf)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_vcmp_gtand_QVsfVsf(
+    qx: HvxVectorPred,
+    vu: HvxVector,
+    vv: HvxVector,
+) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(
+        vgtsf_and(
+            vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qx), -1),
+            vu,
+            vv,
+        ),
+        -1,
+    ))
+}
+
+/// `Qx4|=vcmp.gt(Vu32.sf,Vv32.sf)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_vcmp_gtor_QVsfVsf(
+    qx: HvxVectorPred,
+    vu: HvxVector,
+    vv: HvxVector,
+) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(
+        vgtsf_or(
+            vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qx), -1),
+            vu,
+            vv,
+        ),
+        -1,
+    ))
+}
+
+/// `Qx4^=vcmp.gt(Vu32.sf,Vv32.sf)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_vcmp_gtxacc_QVsfVsf(
+    qx: HvxVectorPred,
+    vu: HvxVector,
+    vv: HvxVector,
+) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(
+        vgtsf_xor(
+            vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qx), -1),
+            vu,
+            vv,
+        ),
+        -1,
+    ))
+}
diff --git a/crates/core_arch/src/hexagon/v64.rs b/crates/core_arch/src/hexagon/v64.rs
new file mode 100644
index 0000000000..e9b18b2fd8
--- /dev/null
+++ b/crates/core_arch/src/hexagon/v64.rs
@@ -0,0 +1,7502 @@
+//! Hexagon HVX 64-byte vector mode intrinsics
+//!
+//! This module provides intrinsics for the Hexagon Vector Extensions (HVX)
+//! in 64-byte vector mode (512-bit vectors).
+//!
+//! HVX is a wide vector extension designed for high-performance signal processing.
+//! [Hexagon HVX Programmer's Reference Manual](https://docs.qualcomm.com/doc/80-N2040-61)
+//!
+//! ## Vector Types
+//!
+//! In 64-byte mode:
+//! - `HvxVector` is 512 bits (64 bytes) containing 16 x 32-bit values
+//! - `HvxVectorPair` is 1024 bits (128 bytes)
+//! - `HvxVectorPred` is 512 bits (64 bytes) for predicate operations
+//!
+//! To use this module, compile with `-C target-feature=+hvx-length64b`.
+//!
+//! ## Naming Convention
+//!
+//! Function names preserve the original Q6 naming case because the convention
+//! uses case to distinguish register types:
+//! - `W` (uppercase) = vector pair (`HvxVectorPair`)
+//! - `V` (uppercase) = vector (`HvxVector`)
+//! - `Q` (uppercase) = predicate (`HvxVectorPred`)
+//! - `R` = scalar register (`i32`)
+//!
+//! For example, `Q6_W_vcombine_VV` operates on a vector pair while
+//! `Q6_V_hi_W` extracts a vector from a pair.
+//!
+//! ## Architecture Versions
+//!
+//! Different intrinsics require different HVX architecture versions. Use the
+//! appropriate target feature to enable the required version:
+//! - HVX v60: `-C target-feature=+hvxv60` (basic HVX operations)
+//! - HVX v62: `-C target-feature=+hvxv62`
+//! - HVX v65: `-C target-feature=+hvxv65` (includes floating-point support)
+//! - HVX v66: `-C target-feature=+hvxv66`
+//! - HVX v68: `-C target-feature=+hvxv68`
+//! - HVX v69: `-C target-feature=+hvxv69`
+//! - HVX v73: `-C target-feature=+hvxv73`
+//! - HVX v79: `-C target-feature=+hvxv79`
+//!
+//! Each version includes all features from previous versions.
+
+#![allow(non_camel_case_types)]
+#![allow(non_snake_case)]
+
+#[cfg(test)]
+use stdarch_test::assert_instr;
+
+use crate::intrinsics::simd::{simd_add, simd_and, simd_or, simd_sub, simd_xor};
+
+// HVX type definitions for 64-byte vector mode
+types! {
+    #![unstable(feature = "stdarch_hexagon", issue = "151523")]
+
+    /// HVX vector type (512 bits / 64 bytes)
+    ///
+    /// This type represents a single HVX vector register containing 16 x 32-bit values.
+    pub struct HvxVector(16 x i32);
+
+    /// HVX vector pair type (1024 bits / 128 bytes)
+    ///
+    /// This type represents a pair of HVX vector registers, often used for
+    /// operations that produce double-width results.
+    pub struct HvxVectorPair(32 x i32);
+
+    /// HVX vector predicate type (512 bits / 64 bytes)
+    ///
+    /// This type represents a predicate vector used for conditional operations.
+    /// Each bit corresponds to a lane in the vector.
+    pub struct HvxVectorPred(16 x i32);
+}
+
+// LLVM intrinsic declarations for 64-byte vector mode
+#[allow(improper_ctypes)]
+unsafe extern "unadjusted" {
+    #[link_name = "llvm.hexagon.V6.extractw"]
+    fn extractw(_: HvxVector, _: i32) -> i32;
+    #[link_name = "llvm.hexagon.V6.get.qfext"]
+    fn get_qfext(_: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.hi"]
+    fn hi(_: HvxVectorPair) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.lo"]
+    fn lo(_: HvxVectorPair) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.lvsplatb"]
+    fn lvsplatb(_: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.lvsplath"]
+    fn lvsplath(_: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.lvsplatw"]
+    fn lvsplatw(_: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.pred.and"]
+    fn pred_and(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.pred.and.n"]
+    fn pred_and_n(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.pred.not"]
+    fn pred_not(_: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.pred.or"]
+    fn pred_or(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.pred.or.n"]
+    fn pred_or_n(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.pred.scalar2"]
+    fn pred_scalar2(_: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.pred.scalar2v2"]
+    fn pred_scalar2v2(_: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.pred.xor"]
+    fn pred_xor(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.set.qfext"]
+    fn set_qfext(_: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.shuffeqh"]
+    fn shuffeqh(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.shuffeqw"]
+    fn shuffeqw(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.v6mpyhubs10"]
+    fn v6mpyhubs10(_: HvxVectorPair, _: HvxVectorPair, _: i32) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.v6mpyhubs10.vxx"]
+    fn v6mpyhubs10_vxx(
+        _: HvxVectorPair,
+        _: HvxVectorPair,
+        _: HvxVectorPair,
+        _: i32,
+    ) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.v6mpyvubs10"]
+    fn v6mpyvubs10(_: HvxVectorPair, _: HvxVectorPair, _: i32) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.v6mpyvubs10.vxx"]
+    fn v6mpyvubs10_vxx(
+        _: HvxVectorPair,
+        _: HvxVectorPair,
+        _: HvxVectorPair,
+        _: i32,
+    ) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vS32b.nqpred.ai"]
+    fn vS32b_nqpred_ai(_: HvxVector, _: *mut HvxVector, _: HvxVector) -> ();
+    #[link_name = "llvm.hexagon.V6.vS32b.nt.nqpred.ai"]
+    fn vS32b_nt_nqpred_ai(_: HvxVector, _: *mut HvxVector, _: HvxVector) -> ();
+    #[link_name = "llvm.hexagon.V6.vS32b.nt.qpred.ai"]
+    fn vS32b_nt_qpred_ai(_: HvxVector, _: *mut HvxVector, _: HvxVector) -> ();
+    #[link_name = "llvm.hexagon.V6.vS32b.qpred.ai"]
+    fn vS32b_qpred_ai(_: HvxVector, _: *mut HvxVector, _: HvxVector) -> ();
+    #[link_name = "llvm.hexagon.V6.vabs.f8"]
+    fn vabs_f8(_: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vabs.hf"]
+    fn vabs_hf(_: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vabs.sf"]
+    fn vabs_sf(_: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vabsb"]
+    fn vabsb(_: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vabsb.sat"]
+    fn vabsb_sat(_: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vabsdiffh"]
+    fn vabsdiffh(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vabsdiffub"]
+    fn vabsdiffub(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vabsdiffuh"]
+    fn vabsdiffuh(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vabsdiffw"]
+    fn vabsdiffw(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vabsh"]
+    fn vabsh(_: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vabsh.sat"]
+    fn vabsh_sat(_: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vabsw"]
+    fn vabsw(_: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vabsw.sat"]
+    fn vabsw_sat(_: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vadd.hf"]
+    fn vadd_hf(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vadd.hf.hf"]
+    fn vadd_hf_hf(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vadd.qf16"]
+    fn vadd_qf16(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vadd.qf16.mix"]
+    fn vadd_qf16_mix(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vadd.qf32"]
+    fn vadd_qf32(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vadd.qf32.mix"]
+    fn vadd_qf32_mix(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vadd.sf"]
+    fn vadd_sf(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vadd.sf.hf"]
+    fn vadd_sf_hf(_: HvxVector, _: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vadd.sf.sf"]
+    fn vadd_sf_sf(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vaddb"]
+    fn vaddb(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vaddb.dv"]
+    fn vaddb_dv(_: HvxVectorPair, _: HvxVectorPair) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vaddbnq"]
+    fn vaddbnq(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vaddbq"]
+    fn vaddbq(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vaddbsat"]
+    fn vaddbsat(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vaddbsat.dv"]
+    fn vaddbsat_dv(_: HvxVectorPair, _: HvxVectorPair) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vaddcarrysat"]
+    fn vaddcarrysat(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vaddclbh"]
+    fn vaddclbh(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vaddclbw"]
+    fn vaddclbw(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vaddh"]
+    fn vaddh(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vaddh.dv"]
+    fn vaddh_dv(_: HvxVectorPair, _: HvxVectorPair) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vaddhnq"]
+    fn vaddhnq(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vaddhq"]
+    fn vaddhq(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vaddhsat"]
+    fn vaddhsat(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vaddhsat.dv"]
+    fn vaddhsat_dv(_: HvxVectorPair, _: HvxVectorPair) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vaddhw"]
+    fn vaddhw(_: HvxVector, _: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vaddhw.acc"]
+    fn vaddhw_acc(_: HvxVectorPair, _: HvxVector, _: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vaddubh"]
+    fn vaddubh(_: HvxVector, _: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vaddubh.acc"]
+    fn vaddubh_acc(_: HvxVectorPair, _: HvxVector, _: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vaddubsat"]
+    fn vaddubsat(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vaddubsat.dv"]
+    fn vaddubsat_dv(_: HvxVectorPair, _: HvxVectorPair) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vaddububb.sat"]
+    fn vaddububb_sat(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vadduhsat"]
+    fn vadduhsat(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vadduhsat.dv"]
+    fn vadduhsat_dv(_: HvxVectorPair, _: HvxVectorPair) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vadduhw"]
+    fn vadduhw(_: HvxVector, _: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vadduhw.acc"]
+    fn vadduhw_acc(_: HvxVectorPair, _: HvxVector, _: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vadduwsat"]
+    fn vadduwsat(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vadduwsat.dv"]
+    fn vadduwsat_dv(_: HvxVectorPair, _: HvxVectorPair) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vaddw"]
+    fn vaddw(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vaddw.dv"]
+    fn vaddw_dv(_: HvxVectorPair, _: HvxVectorPair) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vaddwnq"]
+    fn vaddwnq(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vaddwq"]
+    fn vaddwq(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vaddwsat"]
+    fn vaddwsat(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vaddwsat.dv"]
+    fn vaddwsat_dv(_: HvxVectorPair, _: HvxVectorPair) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.valignb"]
+    fn valignb(_: HvxVector, _: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.valignbi"]
+    fn valignbi(_: HvxVector, _: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vand"]
+    fn vand(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vandnqrt"]
+    fn vandnqrt(_: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vandnqrt.acc"]
+    fn vandnqrt_acc(_: HvxVector, _: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vandqrt"]
+    fn vandqrt(_: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vandqrt.acc"]
+    fn vandqrt_acc(_: HvxVector, _: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vandvnqv"]
+    fn vandvnqv(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vandvqv"]
+    fn vandvqv(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vandvrt"]
+    fn vandvrt(_: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vandvrt.acc"]
+    fn vandvrt_acc(_: HvxVector, _: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vaslh"]
+    fn vaslh(_: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vaslh.acc"]
+    fn vaslh_acc(_: HvxVector, _: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vaslhv"]
+    fn vaslhv(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vaslw"]
+    fn vaslw(_: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vaslw.acc"]
+    fn vaslw_acc(_: HvxVector, _: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vaslwv"]
+    fn vaslwv(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vasr.into"]
+    fn vasr_into(_: HvxVectorPair, _: HvxVector, _: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vasrh"]
+    fn vasrh(_: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vasrh.acc"]
+    fn vasrh_acc(_: HvxVector, _: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vasrhbrndsat"]
+    fn vasrhbrndsat(_: HvxVector, _: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vasrhbsat"]
+    fn vasrhbsat(_: HvxVector, _: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vasrhubrndsat"]
+    fn vasrhubrndsat(_: HvxVector, _: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vasrhubsat"]
+    fn vasrhubsat(_: HvxVector, _: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vasrhv"]
+    fn vasrhv(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vasruhubrndsat"]
+    fn vasruhubrndsat(_: HvxVector, _: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vasruhubsat"]
+    fn vasruhubsat(_: HvxVector, _: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vasruwuhrndsat"]
+    fn vasruwuhrndsat(_: HvxVector, _: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vasruwuhsat"]
+    fn vasruwuhsat(_: HvxVector, _: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vasrvuhubrndsat"]
+    fn vasrvuhubrndsat(_: HvxVectorPair, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vasrvuhubsat"]
+    fn vasrvuhubsat(_: HvxVectorPair, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vasrvwuhrndsat"]
+    fn vasrvwuhrndsat(_: HvxVectorPair, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vasrvwuhsat"]
+    fn vasrvwuhsat(_: HvxVectorPair, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vasrw"]
+    fn vasrw(_: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vasrw.acc"]
+    fn vasrw_acc(_: HvxVector, _: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vasrwh"]
+    fn vasrwh(_: HvxVector, _: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vasrwhrndsat"]
+    fn vasrwhrndsat(_: HvxVector, _: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vasrwhsat"]
+    fn vasrwhsat(_: HvxVector, _: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vasrwuhrndsat"]
+    fn vasrwuhrndsat(_: HvxVector, _: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vasrwuhsat"]
+    fn vasrwuhsat(_: HvxVector, _: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vasrwv"]
+    fn vasrwv(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vassign"]
+    fn vassign(_: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vassign.fp"]
+    fn vassign_fp(_: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vassignp"]
+    fn vassignp(_: HvxVectorPair) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vavgb"]
+    fn vavgb(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vavgbrnd"]
+    fn vavgbrnd(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vavgh"]
+    fn vavgh(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vavghrnd"]
+    fn vavghrnd(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vavgub"]
+    fn vavgub(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vavgubrnd"]
+    fn vavgubrnd(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vavguh"]
+    fn vavguh(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vavguhrnd"]
+    fn vavguhrnd(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vavguw"]
+    fn vavguw(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vavguwrnd"]
+    fn vavguwrnd(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vavgw"]
+    fn vavgw(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vavgwrnd"]
+    fn vavgwrnd(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vcl0h"]
+    fn vcl0h(_: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vcl0w"]
+    fn vcl0w(_: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vcombine"]
+    fn vcombine(_: HvxVector, _: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vconv.h.hf"]
+    fn vconv_h_hf(_: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vconv.hf.h"]
+    fn vconv_hf_h(_: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vconv.hf.qf16"]
+    fn vconv_hf_qf16(_: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vconv.hf.qf32"]
+    fn vconv_hf_qf32(_: HvxVectorPair) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vconv.sf.qf32"]
+    fn vconv_sf_qf32(_: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vconv.sf.w"]
+    fn vconv_sf_w(_: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vconv.w.sf"]
+    fn vconv_w_sf(_: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vcvt2.hf.b"]
+    fn vcvt2_hf_b(_: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vcvt2.hf.ub"]
+    fn vcvt2_hf_ub(_: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vcvt.b.hf"]
+    fn vcvt_b_hf(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vcvt.h.hf"]
+    fn vcvt_h_hf(_: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vcvt.hf.b"]
+    fn vcvt_hf_b(_: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vcvt.hf.f8"]
+    fn vcvt_hf_f8(_: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vcvt.hf.h"]
+    fn vcvt_hf_h(_: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vcvt.hf.sf"]
+    fn vcvt_hf_sf(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vcvt.hf.ub"]
+    fn vcvt_hf_ub(_: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vcvt.hf.uh"]
+    fn vcvt_hf_uh(_: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vcvt.sf.hf"]
+    fn vcvt_sf_hf(_: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vcvt.ub.hf"]
+    fn vcvt_ub_hf(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vcvt.uh.hf"]
+    fn vcvt_uh_hf(_: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vd0"]
+    fn vd0() -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vdd0"]
+    fn vdd0() -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vdealb"]
+    fn vdealb(_: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vdealb4w"]
+    fn vdealb4w(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vdealh"]
+    fn vdealh(_: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vdealvdd"]
+    fn vdealvdd(_: HvxVector, _: HvxVector, _: i32) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vdelta"]
+    fn vdelta(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vdmpy.sf.hf"]
+    fn vdmpy_sf_hf(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vdmpy.sf.hf.acc"]
+    fn vdmpy_sf_hf_acc(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vdmpybus"]
+    fn vdmpybus(_: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vdmpybus.acc"]
+    fn vdmpybus_acc(_: HvxVector, _: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vdmpybus.dv"]
+    fn vdmpybus_dv(_: HvxVectorPair, _: i32) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vdmpybus.dv.acc"]
+    fn vdmpybus_dv_acc(_: HvxVectorPair, _: HvxVectorPair, _: i32) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vdmpyhb"]
+    fn vdmpyhb(_: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vdmpyhb.acc"]
+    fn vdmpyhb_acc(_: HvxVector, _: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vdmpyhb.dv"]
+    fn vdmpyhb_dv(_: HvxVectorPair, _: i32) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vdmpyhb.dv.acc"]
+    fn vdmpyhb_dv_acc(_: HvxVectorPair, _: HvxVectorPair, _: i32) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vdmpyhisat"]
+    fn vdmpyhisat(_: HvxVectorPair, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vdmpyhisat.acc"]
+    fn vdmpyhisat_acc(_: HvxVector, _: HvxVectorPair, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vdmpyhsat"]
+    fn vdmpyhsat(_: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vdmpyhsat.acc"]
+    fn vdmpyhsat_acc(_: HvxVector, _: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vdmpyhsuisat"]
+    fn vdmpyhsuisat(_: HvxVectorPair, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vdmpyhsuisat.acc"]
+    fn vdmpyhsuisat_acc(_: HvxVector, _: HvxVectorPair, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vdmpyhsusat"]
+    fn vdmpyhsusat(_: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vdmpyhsusat.acc"]
+    fn vdmpyhsusat_acc(_: HvxVector, _: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vdmpyhvsat"]
+    fn vdmpyhvsat(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vdmpyhvsat.acc"]
+    fn vdmpyhvsat_acc(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vdsaduh"]
+    fn vdsaduh(_: HvxVectorPair, _: i32) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vdsaduh.acc"]
+    fn vdsaduh_acc(_: HvxVectorPair, _: HvxVectorPair, _: i32) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.veqb"]
+    fn veqb(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.veqb.and"]
+    fn veqb_and(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.veqb.or"]
+    fn veqb_or(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.veqb.xor"]
+    fn veqb_xor(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.veqh"]
+    fn veqh(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.veqh.and"]
+    fn veqh_and(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.veqh.or"]
+    fn veqh_or(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.veqh.xor"]
+    fn veqh_xor(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.veqw"]
+    fn veqw(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.veqw.and"]
+    fn veqw_and(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.veqw.or"]
+    fn veqw_or(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.veqw.xor"]
+    fn veqw_xor(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vfmax.f8"]
+    fn vfmax_f8(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vfmax.hf"]
+    fn vfmax_hf(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vfmax.sf"]
+    fn vfmax_sf(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vfmin.f8"]
+    fn vfmin_f8(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vfmin.hf"]
+    fn vfmin_hf(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vfmin.sf"]
+    fn vfmin_sf(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vfneg.f8"]
+    fn vfneg_f8(_: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vfneg.hf"]
+    fn vfneg_hf(_: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vfneg.sf"]
+    fn vfneg_sf(_: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vgathermh"]
+    fn vgathermh(_: *mut HvxVector, _: i32, _: i32, _: HvxVector) -> ();
+    #[link_name = "llvm.hexagon.V6.vgathermhq"]
+    fn vgathermhq(_: *mut HvxVector, _: HvxVector, _: i32, _: i32, _: HvxVector) -> ();
+    #[link_name = "llvm.hexagon.V6.vgathermhw"]
+    fn vgathermhw(_: *mut HvxVector, _: i32, _: i32, _: HvxVectorPair) -> ();
+    #[link_name = "llvm.hexagon.V6.vgathermhwq"]
+    fn vgathermhwq(_: *mut HvxVector, _: HvxVector, _: i32, _: i32, _: HvxVectorPair) -> ();
+    #[link_name = "llvm.hexagon.V6.vgathermw"]
+    fn vgathermw(_: *mut HvxVector, _: i32, _: i32, _: HvxVector) -> ();
+    #[link_name = "llvm.hexagon.V6.vgathermwq"]
+    fn vgathermwq(_: *mut HvxVector, _: HvxVector, _: i32, _: i32, _: HvxVector) -> ();
+    #[link_name = "llvm.hexagon.V6.vgtb"]
+    fn vgtb(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vgtb.and"]
+    fn vgtb_and(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vgtb.or"]
+    fn vgtb_or(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vgtb.xor"]
+    fn vgtb_xor(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vgth"]
+    fn vgth(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vgth.and"]
+    fn vgth_and(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vgth.or"]
+    fn vgth_or(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vgth.xor"]
+    fn vgth_xor(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vgthf"]
+    fn vgthf(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vgthf.and"]
+    fn vgthf_and(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vgthf.or"]
+    fn vgthf_or(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vgthf.xor"]
+    fn vgthf_xor(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vgtsf"]
+    fn vgtsf(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vgtsf.and"]
+    fn vgtsf_and(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vgtsf.or"]
+    fn vgtsf_or(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vgtsf.xor"]
+    fn vgtsf_xor(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vgtub"]
+    fn vgtub(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vgtub.and"]
+    fn vgtub_and(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vgtub.or"]
+    fn vgtub_or(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vgtub.xor"]
+    fn vgtub_xor(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vgtuh"]
+    fn vgtuh(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vgtuh.and"]
+    fn vgtuh_and(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vgtuh.or"]
+    fn vgtuh_or(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vgtuh.xor"]
+    fn vgtuh_xor(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vgtuw"]
+    fn vgtuw(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vgtuw.and"]
+    fn vgtuw_and(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vgtuw.or"]
+    fn vgtuw_or(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vgtuw.xor"]
+    fn vgtuw_xor(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vgtw"]
+    fn vgtw(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vgtw.and"]
+    fn vgtw_and(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vgtw.or"]
+    fn vgtw_or(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vgtw.xor"]
+    fn vgtw_xor(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vinsertwr"]
+    fn vinsertwr(_: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vlalignb"]
+    fn vlalignb(_: HvxVector, _: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vlalignbi"]
+    fn vlalignbi(_: HvxVector, _: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vlsrb"]
+    fn vlsrb(_: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vlsrh"]
+    fn vlsrh(_: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vlsrhv"]
+    fn vlsrhv(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vlsrw"]
+    fn vlsrw(_: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vlsrwv"]
+    fn vlsrwv(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vlutvvb"]
+    fn vlutvvb(_: HvxVector, _: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vlutvvb.nm"]
+    fn vlutvvb_nm(_: HvxVector, _: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vlutvvb.oracc"]
+    fn vlutvvb_oracc(_: HvxVector, _: HvxVector, _: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vlutvvb.oracci"]
+    fn vlutvvb_oracci(_: HvxVector, _: HvxVector, _: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vlutvvbi"]
+    fn vlutvvbi(_: HvxVector, _: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vlutvwh"]
+    fn vlutvwh(_: HvxVector, _: HvxVector, _: i32) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vlutvwh.nm"]
+    fn vlutvwh_nm(_: HvxVector, _: HvxVector, _: i32) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vlutvwh.oracc"]
+    fn vlutvwh_oracc(_: HvxVectorPair, _: HvxVector, _: HvxVector, _: i32) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vlutvwh.oracci"]
+    fn vlutvwh_oracci(_: HvxVectorPair, _: HvxVector, _: HvxVector, _: i32) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vlutvwhi"]
+    fn vlutvwhi(_: HvxVector, _: HvxVector, _: i32) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vmax.hf"]
+    fn vmax_hf(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vmax.sf"]
+    fn vmax_sf(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vmaxb"]
+    fn vmaxb(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vmaxh"]
+    fn vmaxh(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vmaxub"]
+    fn vmaxub(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vmaxuh"]
+    fn vmaxuh(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vmaxw"]
+    fn vmaxw(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vmin.hf"]
+    fn vmin_hf(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vmin.sf"]
+    fn vmin_sf(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vminb"]
+    fn vminb(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vminh"]
+    fn vminh(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vminub"]
+    fn vminub(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vminuh"]
+    fn vminuh(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vminw"]
+    fn vminw(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vmpabus"]
+    fn vmpabus(_: HvxVectorPair, _: i32) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vmpabus.acc"]
+    fn vmpabus_acc(_: HvxVectorPair, _: HvxVectorPair, _: i32) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vmpabusv"]
+    fn vmpabusv(_: HvxVectorPair, _: HvxVectorPair) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vmpabuu"]
+    fn vmpabuu(_: HvxVectorPair, _: i32) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vmpabuu.acc"]
+    fn vmpabuu_acc(_: HvxVectorPair, _: HvxVectorPair, _: i32) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vmpabuuv"]
+    fn vmpabuuv(_: HvxVectorPair, _: HvxVectorPair) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vmpahb"]
+    fn vmpahb(_: HvxVectorPair, _: i32) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vmpahb.acc"]
+    fn vmpahb_acc(_: HvxVectorPair, _: HvxVectorPair, _: i32) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vmpauhb"]
+    fn vmpauhb(_: HvxVectorPair, _: i32) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vmpauhb.acc"]
+    fn vmpauhb_acc(_: HvxVectorPair, _: HvxVectorPair, _: i32) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vmpy.hf.hf"]
+    fn vmpy_hf_hf(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vmpy.hf.hf.acc"]
+    fn vmpy_hf_hf_acc(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vmpy.qf16"]
+    fn vmpy_qf16(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vmpy.qf16.hf"]
+    fn vmpy_qf16_hf(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vmpy.qf16.mix.hf"]
+    fn vmpy_qf16_mix_hf(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vmpy.qf32"]
+    fn vmpy_qf32(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vmpy.qf32.hf"]
+    fn vmpy_qf32_hf(_: HvxVector, _: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vmpy.qf32.mix.hf"]
+    fn vmpy_qf32_mix_hf(_: HvxVector, _: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vmpy.qf32.qf16"]
+    fn vmpy_qf32_qf16(_: HvxVector, _: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vmpy.qf32.sf"]
+    fn vmpy_qf32_sf(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vmpy.sf.hf"]
+    fn vmpy_sf_hf(_: HvxVector, _: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vmpy.sf.hf.acc"]
+    fn vmpy_sf_hf_acc(_: HvxVectorPair, _: HvxVector, _: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vmpy.sf.sf"]
+    fn vmpy_sf_sf(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vmpybus"]
+    fn vmpybus(_: HvxVector, _: i32) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vmpybus.acc"]
+    fn vmpybus_acc(_: HvxVectorPair, _: HvxVector, _: i32) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vmpybusv"]
+    fn vmpybusv(_: HvxVector, _: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vmpybusv.acc"]
+    fn vmpybusv_acc(_: HvxVectorPair, _: HvxVector, _: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vmpybv"]
+    fn vmpybv(_: HvxVector, _: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vmpybv.acc"]
+    fn vmpybv_acc(_: HvxVectorPair, _: HvxVector, _: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vmpyewuh"]
+    fn vmpyewuh(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vmpyewuh.64"]
+    fn vmpyewuh_64(_: HvxVector, _: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vmpyh"]
+    fn vmpyh(_: HvxVector, _: i32) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vmpyh.acc"]
+    fn vmpyh_acc(_: HvxVectorPair, _: HvxVector, _: i32) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vmpyhsat.acc"]
+    fn vmpyhsat_acc(_: HvxVectorPair, _: HvxVector, _: i32) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vmpyhsrs"]
+    fn vmpyhsrs(_: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vmpyhss"]
+    fn vmpyhss(_: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vmpyhus"]
+    fn vmpyhus(_: HvxVector, _: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vmpyhus.acc"]
+    fn vmpyhus_acc(_: HvxVectorPair, _: HvxVector, _: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vmpyhv"]
+    fn vmpyhv(_: HvxVector, _: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vmpyhv.acc"]
+    fn vmpyhv_acc(_: HvxVectorPair, _: HvxVector, _: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vmpyhvsrs"]
+    fn vmpyhvsrs(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vmpyieoh"]
+    fn vmpyieoh(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vmpyiewh.acc"]
+    fn vmpyiewh_acc(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vmpyiewuh"]
+    fn vmpyiewuh(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vmpyiewuh.acc"]
+    fn vmpyiewuh_acc(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vmpyih"]
+    fn vmpyih(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vmpyih.acc"]
+    fn vmpyih_acc(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vmpyihb"]
+    fn vmpyihb(_: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vmpyihb.acc"]
+    fn vmpyihb_acc(_: HvxVector, _: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vmpyiowh"]
+    fn vmpyiowh(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vmpyiwb"]
+    fn vmpyiwb(_: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vmpyiwb.acc"]
+    fn vmpyiwb_acc(_: HvxVector, _: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vmpyiwh"]
+    fn vmpyiwh(_: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vmpyiwh.acc"]
+    fn vmpyiwh_acc(_: HvxVector, _: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vmpyiwub"]
+    fn vmpyiwub(_: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vmpyiwub.acc"]
+    fn vmpyiwub_acc(_: HvxVector, _: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vmpyowh"]
+    fn vmpyowh(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vmpyowh.64.acc"]
+    fn vmpyowh_64_acc(_: HvxVectorPair, _: HvxVector, _: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vmpyowh.rnd"]
+    fn vmpyowh_rnd(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vmpyowh.rnd.sacc"]
+    fn vmpyowh_rnd_sacc(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vmpyowh.sacc"]
+    fn vmpyowh_sacc(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vmpyub"]
+    fn vmpyub(_: HvxVector, _: i32) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vmpyub.acc"]
+    fn vmpyub_acc(_: HvxVectorPair, _: HvxVector, _: i32) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vmpyubv"]
+    fn vmpyubv(_: HvxVector, _: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vmpyubv.acc"]
+    fn vmpyubv_acc(_: HvxVectorPair, _: HvxVector, _: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vmpyuh"]
+    fn vmpyuh(_: HvxVector, _: i32) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vmpyuh.acc"]
+    fn vmpyuh_acc(_: HvxVectorPair, _: HvxVector, _: i32) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vmpyuhe"]
+    fn vmpyuhe(_: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vmpyuhe.acc"]
+    fn vmpyuhe_acc(_: HvxVector, _: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vmpyuhv"]
+    fn vmpyuhv(_: HvxVector, _: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vmpyuhv.acc"]
+    fn vmpyuhv_acc(_: HvxVectorPair, _: HvxVector, _: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vmpyuhvs"]
+    fn vmpyuhvs(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vmux"]
+    fn vmux(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vnavgb"]
+    fn vnavgb(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vnavgh"]
+    fn vnavgh(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vnavgub"]
+    fn vnavgub(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vnavgw"]
+    fn vnavgw(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vnormamth"]
+    fn vnormamth(_: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vnormamtw"]
+    fn vnormamtw(_: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vnot"]
+    fn vnot(_: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vor"]
+    fn vor(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vpackeb"]
+    fn vpackeb(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vpackeh"]
+    fn vpackeh(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vpackhb.sat"]
+    fn vpackhb_sat(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vpackhub.sat"]
+    fn vpackhub_sat(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vpackob"]
+    fn vpackob(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vpackoh"]
+    fn vpackoh(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vpackwh.sat"]
+    fn vpackwh_sat(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vpackwuh.sat"]
+    fn vpackwuh_sat(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vpopcounth"]
+    fn vpopcounth(_: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vprefixqb"]
+    fn vprefixqb(_: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vprefixqh"]
+    fn vprefixqh(_: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vprefixqw"]
+    fn vprefixqw(_: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vrdelta"]
+    fn vrdelta(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vrmpybus"]
+    fn vrmpybus(_: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vrmpybus.acc"]
+    fn vrmpybus_acc(_: HvxVector, _: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vrmpybusi"]
+    fn vrmpybusi(_: HvxVectorPair, _: i32, _: i32) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vrmpybusi.acc"]
+    fn vrmpybusi_acc(_: HvxVectorPair, _: HvxVectorPair, _: i32, _: i32) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vrmpybusv"]
+    fn vrmpybusv(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vrmpybusv.acc"]
+    fn vrmpybusv_acc(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vrmpybv"]
+    fn vrmpybv(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vrmpybv.acc"]
+    fn vrmpybv_acc(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vrmpyub"]
+    fn vrmpyub(_: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vrmpyub.acc"]
+    fn vrmpyub_acc(_: HvxVector, _: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vrmpyubi"]
+    fn vrmpyubi(_: HvxVectorPair, _: i32, _: i32) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vrmpyubi.acc"]
+    fn vrmpyubi_acc(_: HvxVectorPair, _: HvxVectorPair, _: i32, _: i32) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vrmpyubv"]
+    fn vrmpyubv(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vrmpyubv.acc"]
+    fn vrmpyubv_acc(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vror"]
+    fn vror(_: HvxVector, _: i32) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vrotr"]
+    fn vrotr(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vroundhb"]
+    fn vroundhb(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vroundhub"]
+    fn vroundhub(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vrounduhub"]
+    fn vrounduhub(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vrounduwuh"]
+    fn vrounduwuh(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vroundwh"]
+    fn vroundwh(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vroundwuh"]
+    fn vroundwuh(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vrsadubi"]
+    fn vrsadubi(_: HvxVectorPair, _: i32, _: i32) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vrsadubi.acc"]
+    fn vrsadubi_acc(_: HvxVectorPair, _: HvxVectorPair, _: i32, _: i32) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vsatdw"]
+    fn vsatdw(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vsathub"]
+    fn vsathub(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vsatuwuh"]
+    fn vsatuwuh(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vsatwh"]
+    fn vsatwh(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vsb"]
+    fn vsb(_: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vscattermh"]
+    fn vscattermh(_: i32, _: i32, _: HvxVector, _: HvxVector) -> ();
+    #[link_name = "llvm.hexagon.V6.vscattermh.add"]
+    fn vscattermh_add(_: i32, _: i32, _: HvxVector, _: HvxVector) -> ();
+    #[link_name = "llvm.hexagon.V6.vscattermhq"]
+    fn vscattermhq(_: HvxVector, _: i32, _: i32, _: HvxVector, _: HvxVector) -> ();
+    #[link_name = "llvm.hexagon.V6.vscattermhw"]
+    fn vscattermhw(_: i32, _: i32, _: HvxVectorPair, _: HvxVector) -> ();
+    #[link_name = "llvm.hexagon.V6.vscattermhw.add"]
+    fn vscattermhw_add(_: i32, _: i32, _: HvxVectorPair, _: HvxVector) -> ();
+    #[link_name = "llvm.hexagon.V6.vscattermhwq"]
+    fn vscattermhwq(_: HvxVector, _: i32, _: i32, _: HvxVectorPair, _: HvxVector) -> ();
+    #[link_name = "llvm.hexagon.V6.vscattermw"]
+    fn vscattermw(_: i32, _: i32, _: HvxVector, _: HvxVector) -> ();
+    #[link_name = "llvm.hexagon.V6.vscattermw.add"]
+    fn vscattermw_add(_: i32, _: i32, _: HvxVector, _: HvxVector) -> ();
+    #[link_name = "llvm.hexagon.V6.vscattermwq"]
+    fn vscattermwq(_: HvxVector, _: i32, _: i32, _: HvxVector, _: HvxVector) -> ();
+    #[link_name = "llvm.hexagon.V6.vsh"]
+    fn vsh(_: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vshufeh"]
+    fn vshufeh(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vshuffb"]
+    fn vshuffb(_: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vshuffeb"]
+    fn vshuffeb(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vshuffh"]
+    fn vshuffh(_: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vshuffob"]
+    fn vshuffob(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vshuffvdd"]
+    fn vshuffvdd(_: HvxVector, _: HvxVector, _: i32) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vshufoeb"]
+    fn vshufoeb(_: HvxVector, _: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vshufoeh"]
+    fn vshufoeh(_: HvxVector, _: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vshufoh"]
+    fn vshufoh(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vsub.hf"]
+    fn vsub_hf(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vsub.hf.hf"]
+    fn vsub_hf_hf(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vsub.qf16"]
+    fn vsub_qf16(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vsub.qf16.mix"]
+    fn vsub_qf16_mix(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vsub.qf32"]
+    fn vsub_qf32(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vsub.qf32.mix"]
+    fn vsub_qf32_mix(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vsub.sf"]
+    fn vsub_sf(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vsub.sf.hf"]
+    fn vsub_sf_hf(_: HvxVector, _: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vsub.sf.sf"]
+    fn vsub_sf_sf(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vsubb"]
+    fn vsubb(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vsubb.dv"]
+    fn vsubb_dv(_: HvxVectorPair, _: HvxVectorPair) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vsubbnq"]
+    fn vsubbnq(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vsubbq"]
+    fn vsubbq(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vsubbsat"]
+    fn vsubbsat(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vsubbsat.dv"]
+    fn vsubbsat_dv(_: HvxVectorPair, _: HvxVectorPair) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vsubh"]
+    fn vsubh(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vsubh.dv"]
+    fn vsubh_dv(_: HvxVectorPair, _: HvxVectorPair) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vsubhnq"]
+    fn vsubhnq(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vsubhq"]
+    fn vsubhq(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vsubhsat"]
+    fn vsubhsat(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vsubhsat.dv"]
+    fn vsubhsat_dv(_: HvxVectorPair, _: HvxVectorPair) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vsubhw"]
+    fn vsubhw(_: HvxVector, _: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vsububh"]
+    fn vsububh(_: HvxVector, _: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vsububsat"]
+    fn vsububsat(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vsububsat.dv"]
+    fn vsububsat_dv(_: HvxVectorPair, _: HvxVectorPair) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vsubububb.sat"]
+    fn vsubububb_sat(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vsubuhsat"]
+    fn vsubuhsat(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vsubuhsat.dv"]
+    fn vsubuhsat_dv(_: HvxVectorPair, _: HvxVectorPair) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vsubuhw"]
+    fn vsubuhw(_: HvxVector, _: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vsubuwsat"]
+    fn vsubuwsat(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vsubuwsat.dv"]
+    fn vsubuwsat_dv(_: HvxVectorPair, _: HvxVectorPair) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vsubw"]
+    fn vsubw(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vsubw.dv"]
+    fn vsubw_dv(_: HvxVectorPair, _: HvxVectorPair) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vsubwnq"]
+    fn vsubwnq(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vsubwq"]
+    fn vsubwq(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vsubwsat"]
+    fn vsubwsat(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vsubwsat.dv"]
+    fn vsubwsat_dv(_: HvxVectorPair, _: HvxVectorPair) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vswap"]
+    fn vswap(_: HvxVector, _: HvxVector, _: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vtmpyb"]
+    fn vtmpyb(_: HvxVectorPair, _: i32) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vtmpyb.acc"]
+    fn vtmpyb_acc(_: HvxVectorPair, _: HvxVectorPair, _: i32) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vtmpybus"]
+    fn vtmpybus(_: HvxVectorPair, _: i32) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vtmpybus.acc"]
+    fn vtmpybus_acc(_: HvxVectorPair, _: HvxVectorPair, _: i32) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vtmpyhb"]
+    fn vtmpyhb(_: HvxVectorPair, _: i32) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vtmpyhb.acc"]
+    fn vtmpyhb_acc(_: HvxVectorPair, _: HvxVectorPair, _: i32) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vunpackb"]
+    fn vunpackb(_: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vunpackh"]
+    fn vunpackh(_: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vunpackob"]
+    fn vunpackob(_: HvxVectorPair, _: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vunpackoh"]
+    fn vunpackoh(_: HvxVectorPair, _: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vunpackub"]
+    fn vunpackub(_: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vunpackuh"]
+    fn vunpackuh(_: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vxor"]
+    fn vxor(_: HvxVector, _: HvxVector) -> HvxVector;
+    #[link_name = "llvm.hexagon.V6.vzb"]
+    fn vzb(_: HvxVector) -> HvxVectorPair;
+    #[link_name = "llvm.hexagon.V6.vzh"]
+    fn vzh(_: HvxVector) -> HvxVectorPair;
+}
+
+/// `Rd32=vextract(Vu32,Rs32)`
+///
+/// Instruction Type: LD
+/// Execution Slots: SLOT0
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(extractw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_R_vextract_VR(vu: HvxVector, rs: i32) -> i32 {
+    extractw(vu, rs)
+}
+
+/// `Vd32=hi(Vss32)`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(hi))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_V_hi_W(vss: HvxVectorPair) -> HvxVector {
+    hi(vss)
+}
+
+/// `Vd32=lo(Vss32)`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(lo))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_V_lo_W(vss: HvxVectorPair) -> HvxVector {
+    lo(vss)
+}
+
+/// `Vd32=vsplat(Rt32)`
+///
+/// Instruction Type: CVI_VX_LATE
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(lvsplatw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_V_vsplat_R(rt: i32) -> HvxVector {
+    lvsplatw(rt)
+}
+
+/// `Vd32.uh=vabsdiff(Vu32.h,Vv32.h)`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vabsdiffh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vuh_vabsdiff_VhVh(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vabsdiffh(vu, vv)
+}
+
+/// `Vd32.ub=vabsdiff(Vu32.ub,Vv32.ub)`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vabsdiffub))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vub_vabsdiff_VubVub(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vabsdiffub(vu, vv)
+}
+
+/// `Vd32.uh=vabsdiff(Vu32.uh,Vv32.uh)`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vabsdiffuh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vuh_vabsdiff_VuhVuh(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vabsdiffuh(vu, vv)
+}
+
+/// `Vd32.uw=vabsdiff(Vu32.w,Vv32.w)`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vabsdiffw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vuw_vabsdiff_VwVw(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vabsdiffw(vu, vv)
+}
+
+/// `Vd32.h=vabs(Vu32.h)`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vabsh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_vabs_Vh(vu: HvxVector) -> HvxVector {
+    vabsh(vu)
+}
+
+/// `Vd32.h=vabs(Vu32.h):sat`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vabsh_sat))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_vabs_Vh_sat(vu: HvxVector) -> HvxVector {
+    vabsh_sat(vu)
+}
+
+/// `Vd32.w=vabs(Vu32.w)`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vabsw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vabs_Vw(vu: HvxVector) -> HvxVector {
+    vabsw(vu)
+}
+
+/// `Vd32.w=vabs(Vu32.w):sat`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vabsw_sat))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vabs_Vw_sat(vu: HvxVector) -> HvxVector {
+    vabsw_sat(vu)
+}
+
+/// `Vd32.b=vadd(Vu32.b,Vv32.b)`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vaddb))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vb_vadd_VbVb(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vaddb(vu, vv)
+}
+
+/// `Vdd32.b=vadd(Vuu32.b,Vvv32.b)`
+///
+/// Instruction Type: CVI_VA_DV
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vaddb_dv))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wb_vadd_WbWb(vuu: HvxVectorPair, vvv: HvxVectorPair) -> HvxVectorPair {
+    vaddb_dv(vuu, vvv)
+}
+
+/// `Vd32.h=vadd(Vu32.h,Vv32.h)`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vaddh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_vadd_VhVh(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vaddh(vu, vv)
+}
+
+/// `Vdd32.h=vadd(Vuu32.h,Vvv32.h)`
+///
+/// Instruction Type: CVI_VA_DV
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vaddh_dv))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wh_vadd_WhWh(vuu: HvxVectorPair, vvv: HvxVectorPair) -> HvxVectorPair {
+    vaddh_dv(vuu, vvv)
+}
+
+/// `Vd32.h=vadd(Vu32.h,Vv32.h):sat`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vaddhsat))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_vadd_VhVh_sat(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vaddhsat(vu, vv)
+}
+
+/// `Vdd32.h=vadd(Vuu32.h,Vvv32.h):sat`
+///
+/// Instruction Type: CVI_VA_DV
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vaddhsat_dv))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wh_vadd_WhWh_sat(vuu: HvxVectorPair, vvv: HvxVectorPair) -> HvxVectorPair {
+    vaddhsat_dv(vuu, vvv)
+}
+
+/// `Vdd32.w=vadd(Vu32.h,Vv32.h)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vaddhw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Ww_vadd_VhVh(vu: HvxVector, vv: HvxVector) -> HvxVectorPair {
+    vaddhw(vu, vv)
+}
+
+/// `Vdd32.h=vadd(Vu32.ub,Vv32.ub)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vaddubh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wh_vadd_VubVub(vu: HvxVector, vv: HvxVector) -> HvxVectorPair {
+    vaddubh(vu, vv)
+}
+
+/// `Vd32.ub=vadd(Vu32.ub,Vv32.ub):sat`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vaddubsat))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vub_vadd_VubVub_sat(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vaddubsat(vu, vv)
+}
+
+/// `Vdd32.ub=vadd(Vuu32.ub,Vvv32.ub):sat`
+///
+/// Instruction Type: CVI_VA_DV
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vaddubsat_dv))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wub_vadd_WubWub_sat(vuu: HvxVectorPair, vvv: HvxVectorPair) -> HvxVectorPair {
+    vaddubsat_dv(vuu, vvv)
+}
+
+/// `Vd32.uh=vadd(Vu32.uh,Vv32.uh):sat`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vadduhsat))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vuh_vadd_VuhVuh_sat(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vadduhsat(vu, vv)
+}
+
+/// `Vdd32.uh=vadd(Vuu32.uh,Vvv32.uh):sat`
+///
+/// Instruction Type: CVI_VA_DV
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vadduhsat_dv))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wuh_vadd_WuhWuh_sat(vuu: HvxVectorPair, vvv: HvxVectorPair) -> HvxVectorPair {
+    vadduhsat_dv(vuu, vvv)
+}
+
+/// `Vdd32.w=vadd(Vu32.uh,Vv32.uh)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vadduhw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Ww_vadd_VuhVuh(vu: HvxVector, vv: HvxVector) -> HvxVectorPair {
+    vadduhw(vu, vv)
+}
+
+/// `Vd32.w=vadd(Vu32.w,Vv32.w)`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vaddw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vadd_VwVw(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    simd_add(vu, vv)
+}
+
+/// `Vdd32.w=vadd(Vuu32.w,Vvv32.w)`
+///
+/// Instruction Type: CVI_VA_DV
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vaddw_dv))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Ww_vadd_WwWw(vuu: HvxVectorPair, vvv: HvxVectorPair) -> HvxVectorPair {
+    vaddw_dv(vuu, vvv)
+}
+
+/// `Vd32.w=vadd(Vu32.w,Vv32.w):sat`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vaddwsat))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vadd_VwVw_sat(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vaddwsat(vu, vv)
+}
+
+/// `Vdd32.w=vadd(Vuu32.w,Vvv32.w):sat`
+///
+/// Instruction Type: CVI_VA_DV
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vaddwsat_dv))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Ww_vadd_WwWw_sat(vuu: HvxVectorPair, vvv: HvxVectorPair) -> HvxVectorPair {
+    vaddwsat_dv(vuu, vvv)
+}
+
+/// `Vd32=valign(Vu32,Vv32,Rt8)`
+///
+/// Instruction Type: CVI_VP
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(valignb))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_V_valign_VVR(vu: HvxVector, vv: HvxVector, rt: i32) -> HvxVector {
+    valignb(vu, vv, rt)
+}
+
+/// `Vd32=valign(Vu32,Vv32,#u3)`
+///
+/// Instruction Type: CVI_VP
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(valignbi))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_V_valign_VVI(vu: HvxVector, vv: HvxVector, iu3: i32) -> HvxVector {
+    valignbi(vu, vv, iu3)
+}
+
+/// `Vd32=vand(Vu32,Vv32)`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vand))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_V_vand_VV(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    simd_and(vu, vv)
+}
+
+/// `Vd32.h=vasl(Vu32.h,Rt32)`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vaslh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_vasl_VhR(vu: HvxVector, rt: i32) -> HvxVector {
+    vaslh(vu, rt)
+}
+
+/// `Vd32.h=vasl(Vu32.h,Vv32.h)`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vaslhv))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_vasl_VhVh(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vaslhv(vu, vv)
+}
+
+/// `Vd32.w=vasl(Vu32.w,Rt32)`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vaslw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vasl_VwR(vu: HvxVector, rt: i32) -> HvxVector {
+    vaslw(vu, rt)
+}
+
+/// `Vx32.w+=vasl(Vu32.w,Rt32)`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vaslw_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vaslacc_VwVwR(vx: HvxVector, vu: HvxVector, rt: i32) -> HvxVector {
+    vaslw_acc(vx, vu, rt)
+}
+
+/// `Vd32.w=vasl(Vu32.w,Vv32.w)`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vaslwv))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vasl_VwVw(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vaslwv(vu, vv)
+}
+
+/// `Vd32.h=vasr(Vu32.h,Rt32)`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vasrh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_vasr_VhR(vu: HvxVector, rt: i32) -> HvxVector {
+    vasrh(vu, rt)
+}
+
+/// `Vd32.b=vasr(Vu32.h,Vv32.h,Rt8):rnd:sat`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vasrhbrndsat))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vb_vasr_VhVhR_rnd_sat(vu: HvxVector, vv: HvxVector, rt: i32) -> HvxVector {
+    vasrhbrndsat(vu, vv, rt)
+}
+
+/// `Vd32.ub=vasr(Vu32.h,Vv32.h,Rt8):rnd:sat`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vasrhubrndsat))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vub_vasr_VhVhR_rnd_sat(vu: HvxVector, vv: HvxVector, rt: i32) -> HvxVector {
+    vasrhubrndsat(vu, vv, rt)
+}
+
+/// `Vd32.ub=vasr(Vu32.h,Vv32.h,Rt8):sat`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vasrhubsat))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vub_vasr_VhVhR_sat(vu: HvxVector, vv: HvxVector, rt: i32) -> HvxVector {
+    vasrhubsat(vu, vv, rt)
+}
+
+/// `Vd32.h=vasr(Vu32.h,Vv32.h)`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vasrhv))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_vasr_VhVh(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vasrhv(vu, vv)
+}
+
+/// `Vd32.w=vasr(Vu32.w,Rt32)`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vasrw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vasr_VwR(vu: HvxVector, rt: i32) -> HvxVector {
+    vasrw(vu, rt)
+}
+
+/// `Vx32.w+=vasr(Vu32.w,Rt32)`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vasrw_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vasracc_VwVwR(vx: HvxVector, vu: HvxVector, rt: i32) -> HvxVector {
+    vasrw_acc(vx, vu, rt)
+}
+
+/// `Vd32.h=vasr(Vu32.w,Vv32.w,Rt8)`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vasrwh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_vasr_VwVwR(vu: HvxVector, vv: HvxVector, rt: i32) -> HvxVector {
+    vasrwh(vu, vv, rt)
+}
+
+/// `Vd32.h=vasr(Vu32.w,Vv32.w,Rt8):rnd:sat`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vasrwhrndsat))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_vasr_VwVwR_rnd_sat(vu: HvxVector, vv: HvxVector, rt: i32) -> HvxVector {
+    vasrwhrndsat(vu, vv, rt)
+}
+
+/// `Vd32.h=vasr(Vu32.w,Vv32.w,Rt8):sat`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vasrwhsat))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_vasr_VwVwR_sat(vu: HvxVector, vv: HvxVector, rt: i32) -> HvxVector {
+    vasrwhsat(vu, vv, rt)
+}
+
+/// `Vd32.uh=vasr(Vu32.w,Vv32.w,Rt8):sat`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vasrwuhsat))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vuh_vasr_VwVwR_sat(vu: HvxVector, vv: HvxVector, rt: i32) -> HvxVector {
+    vasrwuhsat(vu, vv, rt)
+}
+
+/// `Vd32.w=vasr(Vu32.w,Vv32.w)`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vasrwv))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vasr_VwVw(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vasrwv(vu, vv)
+}
+
+/// `Vd32=Vu32`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vassign))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_V_equals_V(vu: HvxVector) -> HvxVector {
+    vassign(vu)
+}
+
+/// `Vdd32=Vuu32`
+///
+/// Instruction Type: CVI_VA_DV
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vassignp))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_W_equals_W(vuu: HvxVectorPair) -> HvxVectorPair {
+    vassignp(vuu)
+}
+
+/// `Vd32.h=vavg(Vu32.h,Vv32.h)`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vavgh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_vavg_VhVh(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vavgh(vu, vv)
+}
+
+/// `Vd32.h=vavg(Vu32.h,Vv32.h):rnd`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vavghrnd))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_vavg_VhVh_rnd(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vavghrnd(vu, vv)
+}
+
+/// `Vd32.ub=vavg(Vu32.ub,Vv32.ub)`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vavgub))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vub_vavg_VubVub(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vavgub(vu, vv)
+}
+
+/// `Vd32.ub=vavg(Vu32.ub,Vv32.ub):rnd`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vavgubrnd))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vub_vavg_VubVub_rnd(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vavgubrnd(vu, vv)
+}
+
+/// `Vd32.uh=vavg(Vu32.uh,Vv32.uh)`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vavguh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vuh_vavg_VuhVuh(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vavguh(vu, vv)
+}
+
+/// `Vd32.uh=vavg(Vu32.uh,Vv32.uh):rnd`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vavguhrnd))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vuh_vavg_VuhVuh_rnd(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vavguhrnd(vu, vv)
+}
+
+/// `Vd32.w=vavg(Vu32.w,Vv32.w)`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vavgw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vavg_VwVw(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vavgw(vu, vv)
+}
+
+/// `Vd32.w=vavg(Vu32.w,Vv32.w):rnd`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vavgwrnd))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vavg_VwVw_rnd(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vavgwrnd(vu, vv)
+}
+
+/// `Vd32.uh=vcl0(Vu32.uh)`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vcl0h))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vuh_vcl0_Vuh(vu: HvxVector) -> HvxVector {
+    vcl0h(vu)
+}
+
+/// `Vd32.uw=vcl0(Vu32.uw)`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vcl0w))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vuw_vcl0_Vuw(vu: HvxVector) -> HvxVector {
+    vcl0w(vu)
+}
+
+/// `Vdd32=vcombine(Vu32,Vv32)`
+///
+/// Instruction Type: CVI_VA_DV
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vcombine))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_W_vcombine_VV(vu: HvxVector, vv: HvxVector) -> HvxVectorPair {
+    vcombine(vu, vv)
+}
+
+/// `Vd32=#0`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vd0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_V_vzero() -> HvxVector {
+    vd0()
+}
+
+/// `Vd32.b=vdeal(Vu32.b)`
+///
+/// Instruction Type: CVI_VP
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vdealb))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vb_vdeal_Vb(vu: HvxVector) -> HvxVector {
+    vdealb(vu)
+}
+
+/// `Vd32.b=vdeale(Vu32.b,Vv32.b)`
+///
+/// Instruction Type: CVI_VP
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vdealb4w))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vb_vdeale_VbVb(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vdealb4w(vu, vv)
+}
+
+/// `Vd32.h=vdeal(Vu32.h)`
+///
+/// Instruction Type: CVI_VP
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vdealh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_vdeal_Vh(vu: HvxVector) -> HvxVector {
+    vdealh(vu)
+}
+
+/// `Vdd32=vdeal(Vu32,Vv32,Rt8)`
+///
+/// Instruction Type: CVI_VP_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vdealvdd))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_W_vdeal_VVR(vu: HvxVector, vv: HvxVector, rt: i32) -> HvxVectorPair {
+    vdealvdd(vu, vv, rt)
+}
+
+/// `Vd32=vdelta(Vu32,Vv32)`
+///
+/// Instruction Type: CVI_VP
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vdelta))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_V_vdelta_VV(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vdelta(vu, vv)
+}
+
+/// `Vd32.h=vdmpy(Vu32.ub,Rt32.b)`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vdmpybus))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_vdmpy_VubRb(vu: HvxVector, rt: i32) -> HvxVector {
+    vdmpybus(vu, rt)
+}
+
+/// `Vx32.h+=vdmpy(Vu32.ub,Rt32.b)`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vdmpybus_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_vdmpyacc_VhVubRb(vx: HvxVector, vu: HvxVector, rt: i32) -> HvxVector {
+    vdmpybus_acc(vx, vu, rt)
+}
+
+/// `Vdd32.h=vdmpy(Vuu32.ub,Rt32.b)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vdmpybus_dv))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wh_vdmpy_WubRb(vuu: HvxVectorPair, rt: i32) -> HvxVectorPair {
+    vdmpybus_dv(vuu, rt)
+}
+
+/// `Vxx32.h+=vdmpy(Vuu32.ub,Rt32.b)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vdmpybus_dv_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wh_vdmpyacc_WhWubRb(
+    vxx: HvxVectorPair,
+    vuu: HvxVectorPair,
+    rt: i32,
+) -> HvxVectorPair {
+    vdmpybus_dv_acc(vxx, vuu, rt)
+}
+
+/// `Vd32.w=vdmpy(Vu32.h,Rt32.b)`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vdmpyhb))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vdmpy_VhRb(vu: HvxVector, rt: i32) -> HvxVector {
+    vdmpyhb(vu, rt)
+}
+
+/// `Vx32.w+=vdmpy(Vu32.h,Rt32.b)`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vdmpyhb_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vdmpyacc_VwVhRb(vx: HvxVector, vu: HvxVector, rt: i32) -> HvxVector {
+    vdmpyhb_acc(vx, vu, rt)
+}
+
+/// `Vdd32.w=vdmpy(Vuu32.h,Rt32.b)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vdmpyhb_dv))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Ww_vdmpy_WhRb(vuu: HvxVectorPair, rt: i32) -> HvxVectorPair {
+    vdmpyhb_dv(vuu, rt)
+}
+
+/// `Vxx32.w+=vdmpy(Vuu32.h,Rt32.b)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vdmpyhb_dv_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Ww_vdmpyacc_WwWhRb(
+    vxx: HvxVectorPair,
+    vuu: HvxVectorPair,
+    rt: i32,
+) -> HvxVectorPair {
+    vdmpyhb_dv_acc(vxx, vuu, rt)
+}
+
+/// `Vd32.w=vdmpy(Vuu32.h,Rt32.h):sat`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vdmpyhisat))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vdmpy_WhRh_sat(vuu: HvxVectorPair, rt: i32) -> HvxVector {
+    vdmpyhisat(vuu, rt)
+}
+
+/// `Vx32.w+=vdmpy(Vuu32.h,Rt32.h):sat`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vdmpyhisat_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vdmpyacc_VwWhRh_sat(vx: HvxVector, vuu: HvxVectorPair, rt: i32) -> HvxVector {
+    vdmpyhisat_acc(vx, vuu, rt)
+}
+
+/// `Vd32.w=vdmpy(Vu32.h,Rt32.h):sat`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vdmpyhsat))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vdmpy_VhRh_sat(vu: HvxVector, rt: i32) -> HvxVector {
+    vdmpyhsat(vu, rt)
+}
+
+/// `Vx32.w+=vdmpy(Vu32.h,Rt32.h):sat`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vdmpyhsat_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vdmpyacc_VwVhRh_sat(vx: HvxVector, vu: HvxVector, rt: i32) -> HvxVector {
+    vdmpyhsat_acc(vx, vu, rt)
+}
+
+/// `Vd32.w=vdmpy(Vuu32.h,Rt32.uh,#1):sat`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vdmpyhsuisat))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vdmpy_WhRuh_sat(vuu: HvxVectorPair, rt: i32) -> HvxVector {
+    vdmpyhsuisat(vuu, rt)
+}
+
+/// `Vx32.w+=vdmpy(Vuu32.h,Rt32.uh,#1):sat`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vdmpyhsuisat_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vdmpyacc_VwWhRuh_sat(vx: HvxVector, vuu: HvxVectorPair, rt: i32) -> HvxVector {
+    vdmpyhsuisat_acc(vx, vuu, rt)
+}
+
+/// `Vd32.w=vdmpy(Vu32.h,Rt32.uh):sat`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vdmpyhsusat))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vdmpy_VhRuh_sat(vu: HvxVector, rt: i32) -> HvxVector {
+    vdmpyhsusat(vu, rt)
+}
+
+/// `Vx32.w+=vdmpy(Vu32.h,Rt32.uh):sat`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vdmpyhsusat_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vdmpyacc_VwVhRuh_sat(vx: HvxVector, vu: HvxVector, rt: i32) -> HvxVector {
+    vdmpyhsusat_acc(vx, vu, rt)
+}
+
+/// `Vd32.w=vdmpy(Vu32.h,Vv32.h):sat`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vdmpyhvsat))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vdmpy_VhVh_sat(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vdmpyhvsat(vu, vv)
+}
+
+/// `Vx32.w+=vdmpy(Vu32.h,Vv32.h):sat`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vdmpyhvsat_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vdmpyacc_VwVhVh_sat(vx: HvxVector, vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vdmpyhvsat_acc(vx, vu, vv)
+}
+
+/// `Vdd32.uw=vdsad(Vuu32.uh,Rt32.uh)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vdsaduh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wuw_vdsad_WuhRuh(vuu: HvxVectorPair, rt: i32) -> HvxVectorPair {
+    vdsaduh(vuu, rt)
+}
+
+/// `Vxx32.uw+=vdsad(Vuu32.uh,Rt32.uh)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vdsaduh_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wuw_vdsadacc_WuwWuhRuh(
+    vxx: HvxVectorPair,
+    vuu: HvxVectorPair,
+    rt: i32,
+) -> HvxVectorPair {
+    vdsaduh_acc(vxx, vuu, rt)
+}
+
+/// `Vx32.w=vinsert(Rt32)`
+///
+/// Instruction Type: CVI_VX_LATE
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vinsertwr))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vinsert_VwR(vx: HvxVector, rt: i32) -> HvxVector {
+    vinsertwr(vx, rt)
+}
+
+/// `Vd32=vlalign(Vu32,Vv32,Rt8)`
+///
+/// Instruction Type: CVI_VP
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vlalignb))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_V_vlalign_VVR(vu: HvxVector, vv: HvxVector, rt: i32) -> HvxVector {
+    vlalignb(vu, vv, rt)
+}
+
+/// `Vd32=vlalign(Vu32,Vv32,#u3)`
+///
+/// Instruction Type: CVI_VP
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vlalignbi))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_V_vlalign_VVI(vu: HvxVector, vv: HvxVector, iu3: i32) -> HvxVector {
+    vlalignbi(vu, vv, iu3)
+}
+
+/// `Vd32.uh=vlsr(Vu32.uh,Rt32)`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vlsrh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vuh_vlsr_VuhR(vu: HvxVector, rt: i32) -> HvxVector {
+    vlsrh(vu, rt)
+}
+
+/// `Vd32.h=vlsr(Vu32.h,Vv32.h)`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vlsrhv))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_vlsr_VhVh(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vlsrhv(vu, vv)
+}
+
+/// `Vd32.uw=vlsr(Vu32.uw,Rt32)`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vlsrw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vuw_vlsr_VuwR(vu: HvxVector, rt: i32) -> HvxVector {
+    vlsrw(vu, rt)
+}
+
+/// `Vd32.w=vlsr(Vu32.w,Vv32.w)`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vlsrwv))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vlsr_VwVw(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vlsrwv(vu, vv)
+}
+
+/// `Vd32.b=vlut32(Vu32.b,Vv32.b,Rt8)`
+///
+/// Instruction Type: CVI_VP
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vlutvvb))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vb_vlut32_VbVbR(vu: HvxVector, vv: HvxVector, rt: i32) -> HvxVector {
+    vlutvvb(vu, vv, rt)
+}
+
+/// `Vx32.b|=vlut32(Vu32.b,Vv32.b,Rt8)`
+///
+/// Instruction Type: CVI_VP_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vlutvvb_oracc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vb_vlut32or_VbVbVbR(
+    vx: HvxVector,
+    vu: HvxVector,
+    vv: HvxVector,
+    rt: i32,
+) -> HvxVector {
+    vlutvvb_oracc(vx, vu, vv, rt)
+}
+
+/// `Vdd32.h=vlut16(Vu32.b,Vv32.h,Rt8)`
+///
+/// Instruction Type: CVI_VP_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vlutvwh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wh_vlut16_VbVhR(vu: HvxVector, vv: HvxVector, rt: i32) -> HvxVectorPair {
+    vlutvwh(vu, vv, rt)
+}
+
+/// `Vxx32.h|=vlut16(Vu32.b,Vv32.h,Rt8)`
+///
+/// Instruction Type: CVI_VP_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vlutvwh_oracc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wh_vlut16or_WhVbVhR(
+    vxx: HvxVectorPair,
+    vu: HvxVector,
+    vv: HvxVector,
+    rt: i32,
+) -> HvxVectorPair {
+    vlutvwh_oracc(vxx, vu, vv, rt)
+}
+
+/// `Vd32.h=vmax(Vu32.h,Vv32.h)`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmaxh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_vmax_VhVh(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vmaxh(vu, vv)
+}
+
+/// `Vd32.ub=vmax(Vu32.ub,Vv32.ub)`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmaxub))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vub_vmax_VubVub(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vmaxub(vu, vv)
+}
+
+/// `Vd32.uh=vmax(Vu32.uh,Vv32.uh)`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmaxuh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vuh_vmax_VuhVuh(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vmaxuh(vu, vv)
+}
+
+/// `Vd32.w=vmax(Vu32.w,Vv32.w)`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmaxw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vmax_VwVw(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vmaxw(vu, vv)
+}
+
+/// `Vd32.h=vmin(Vu32.h,Vv32.h)`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vminh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_vmin_VhVh(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vminh(vu, vv)
+}
+
+/// `Vd32.ub=vmin(Vu32.ub,Vv32.ub)`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vminub))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vub_vmin_VubVub(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vminub(vu, vv)
+}
+
+/// `Vd32.uh=vmin(Vu32.uh,Vv32.uh)`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vminuh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vuh_vmin_VuhVuh(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vminuh(vu, vv)
+}
+
+/// `Vd32.w=vmin(Vu32.w,Vv32.w)`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vminw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vmin_VwVw(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vminw(vu, vv)
+}
+
+/// `Vdd32.h=vmpa(Vuu32.ub,Rt32.b)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmpabus))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wh_vmpa_WubRb(vuu: HvxVectorPair, rt: i32) -> HvxVectorPair {
+    vmpabus(vuu, rt)
+}
+
+/// `Vxx32.h+=vmpa(Vuu32.ub,Rt32.b)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmpabus_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wh_vmpaacc_WhWubRb(
+    vxx: HvxVectorPair,
+    vuu: HvxVectorPair,
+    rt: i32,
+) -> HvxVectorPair {
+    vmpabus_acc(vxx, vuu, rt)
+}
+
+/// `Vdd32.h=vmpa(Vuu32.ub,Vvv32.b)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmpabusv))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wh_vmpa_WubWb(vuu: HvxVectorPair, vvv: HvxVectorPair) -> HvxVectorPair {
+    vmpabusv(vuu, vvv)
+}
+
+/// `Vdd32.h=vmpa(Vuu32.ub,Vvv32.ub)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmpabuuv))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wh_vmpa_WubWub(vuu: HvxVectorPair, vvv: HvxVectorPair) -> HvxVectorPair {
+    vmpabuuv(vuu, vvv)
+}
+
+/// `Vdd32.w=vmpa(Vuu32.h,Rt32.b)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmpahb))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Ww_vmpa_WhRb(vuu: HvxVectorPair, rt: i32) -> HvxVectorPair {
+    vmpahb(vuu, rt)
+}
+
+/// `Vxx32.w+=vmpa(Vuu32.h,Rt32.b)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmpahb_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Ww_vmpaacc_WwWhRb(
+    vxx: HvxVectorPair,
+    vuu: HvxVectorPair,
+    rt: i32,
+) -> HvxVectorPair {
+    vmpahb_acc(vxx, vuu, rt)
+}
+
+/// `Vdd32.h=vmpy(Vu32.ub,Rt32.b)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmpybus))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wh_vmpy_VubRb(vu: HvxVector, rt: i32) -> HvxVectorPair {
+    vmpybus(vu, rt)
+}
+
+/// `Vxx32.h+=vmpy(Vu32.ub,Rt32.b)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmpybus_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wh_vmpyacc_WhVubRb(vxx: HvxVectorPair, vu: HvxVector, rt: i32) -> HvxVectorPair {
+    vmpybus_acc(vxx, vu, rt)
+}
+
+/// `Vdd32.h=vmpy(Vu32.ub,Vv32.b)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmpybusv))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wh_vmpy_VubVb(vu: HvxVector, vv: HvxVector) -> HvxVectorPair {
+    vmpybusv(vu, vv)
+}
+
+/// `Vxx32.h+=vmpy(Vu32.ub,Vv32.b)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmpybusv_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wh_vmpyacc_WhVubVb(
+    vxx: HvxVectorPair,
+    vu: HvxVector,
+    vv: HvxVector,
+) -> HvxVectorPair {
+    vmpybusv_acc(vxx, vu, vv)
+}
+
+/// `Vdd32.h=vmpy(Vu32.b,Vv32.b)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmpybv))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wh_vmpy_VbVb(vu: HvxVector, vv: HvxVector) -> HvxVectorPair {
+    vmpybv(vu, vv)
+}
+
+/// `Vxx32.h+=vmpy(Vu32.b,Vv32.b)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmpybv_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wh_vmpyacc_WhVbVb(
+    vxx: HvxVectorPair,
+    vu: HvxVector,
+    vv: HvxVector,
+) -> HvxVectorPair {
+    vmpybv_acc(vxx, vu, vv)
+}
+
+/// `Vd32.w=vmpye(Vu32.w,Vv32.uh)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmpyewuh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vmpye_VwVuh(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vmpyewuh(vu, vv)
+}
+
+/// `Vdd32.w=vmpy(Vu32.h,Rt32.h)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmpyh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Ww_vmpy_VhRh(vu: HvxVector, rt: i32) -> HvxVectorPair {
+    vmpyh(vu, rt)
+}
+
+/// `Vxx32.w+=vmpy(Vu32.h,Rt32.h):sat`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmpyhsat_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Ww_vmpyacc_WwVhRh_sat(
+    vxx: HvxVectorPair,
+    vu: HvxVector,
+    rt: i32,
+) -> HvxVectorPair {
+    vmpyhsat_acc(vxx, vu, rt)
+}
+
+/// `Vd32.h=vmpy(Vu32.h,Rt32.h):<<1:rnd:sat`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmpyhsrs))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_vmpy_VhRh_s1_rnd_sat(vu: HvxVector, rt: i32) -> HvxVector {
+    vmpyhsrs(vu, rt)
+}
+
+/// `Vd32.h=vmpy(Vu32.h,Rt32.h):<<1:sat`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmpyhss))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_vmpy_VhRh_s1_sat(vu: HvxVector, rt: i32) -> HvxVector {
+    vmpyhss(vu, rt)
+}
+
+/// `Vdd32.w=vmpy(Vu32.h,Vv32.uh)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmpyhus))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Ww_vmpy_VhVuh(vu: HvxVector, vv: HvxVector) -> HvxVectorPair {
+    vmpyhus(vu, vv)
+}
+
+/// `Vxx32.w+=vmpy(Vu32.h,Vv32.uh)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmpyhus_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Ww_vmpyacc_WwVhVuh(
+    vxx: HvxVectorPair,
+    vu: HvxVector,
+    vv: HvxVector,
+) -> HvxVectorPair {
+    vmpyhus_acc(vxx, vu, vv)
+}
+
+/// `Vdd32.w=vmpy(Vu32.h,Vv32.h)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmpyhv))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Ww_vmpy_VhVh(vu: HvxVector, vv: HvxVector) -> HvxVectorPair {
+    vmpyhv(vu, vv)
+}
+
+/// `Vxx32.w+=vmpy(Vu32.h,Vv32.h)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmpyhv_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Ww_vmpyacc_WwVhVh(
+    vxx: HvxVectorPair,
+    vu: HvxVector,
+    vv: HvxVector,
+) -> HvxVectorPair {
+    vmpyhv_acc(vxx, vu, vv)
+}
+
+/// `Vd32.h=vmpy(Vu32.h,Vv32.h):<<1:rnd:sat`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmpyhvsrs))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_vmpy_VhVh_s1_rnd_sat(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vmpyhvsrs(vu, vv)
+}
+
+/// `Vd32.w=vmpyieo(Vu32.h,Vv32.h)`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmpyieoh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vmpyieo_VhVh(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vmpyieoh(vu, vv)
+}
+
+/// `Vx32.w+=vmpyie(Vu32.w,Vv32.h)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmpyiewh_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vmpyieacc_VwVwVh(vx: HvxVector, vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vmpyiewh_acc(vx, vu, vv)
+}
+
+/// `Vd32.w=vmpyie(Vu32.w,Vv32.uh)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmpyiewuh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vmpyie_VwVuh(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vmpyiewuh(vu, vv)
+}
+
+/// `Vx32.w+=vmpyie(Vu32.w,Vv32.uh)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmpyiewuh_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vmpyieacc_VwVwVuh(vx: HvxVector, vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vmpyiewuh_acc(vx, vu, vv)
+}
+
+/// `Vd32.h=vmpyi(Vu32.h,Vv32.h)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmpyih))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_vmpyi_VhVh(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vmpyih(vu, vv)
+}
+
+/// `Vx32.h+=vmpyi(Vu32.h,Vv32.h)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmpyih_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_vmpyiacc_VhVhVh(vx: HvxVector, vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vmpyih_acc(vx, vu, vv)
+}
+
+/// `Vd32.h=vmpyi(Vu32.h,Rt32.b)`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmpyihb))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_vmpyi_VhRb(vu: HvxVector, rt: i32) -> HvxVector {
+    vmpyihb(vu, rt)
+}
+
+/// `Vx32.h+=vmpyi(Vu32.h,Rt32.b)`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmpyihb_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_vmpyiacc_VhVhRb(vx: HvxVector, vu: HvxVector, rt: i32) -> HvxVector {
+    vmpyihb_acc(vx, vu, rt)
+}
+
+/// `Vd32.w=vmpyio(Vu32.w,Vv32.h)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmpyiowh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vmpyio_VwVh(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vmpyiowh(vu, vv)
+}
+
+/// `Vd32.w=vmpyi(Vu32.w,Rt32.b)`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmpyiwb))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vmpyi_VwRb(vu: HvxVector, rt: i32) -> HvxVector {
+    vmpyiwb(vu, rt)
+}
+
+/// `Vx32.w+=vmpyi(Vu32.w,Rt32.b)`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmpyiwb_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vmpyiacc_VwVwRb(vx: HvxVector, vu: HvxVector, rt: i32) -> HvxVector {
+    vmpyiwb_acc(vx, vu, rt)
+}
+
+/// `Vd32.w=vmpyi(Vu32.w,Rt32.h)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmpyiwh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vmpyi_VwRh(vu: HvxVector, rt: i32) -> HvxVector {
+    vmpyiwh(vu, rt)
+}
+
+/// `Vx32.w+=vmpyi(Vu32.w,Rt32.h)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmpyiwh_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vmpyiacc_VwVwRh(vx: HvxVector, vu: HvxVector, rt: i32) -> HvxVector {
+    vmpyiwh_acc(vx, vu, rt)
+}
+
+/// `Vd32.w=vmpyo(Vu32.w,Vv32.h):<<1:sat`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmpyowh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vmpyo_VwVh_s1_sat(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vmpyowh(vu, vv)
+}
+
+/// `Vd32.w=vmpyo(Vu32.w,Vv32.h):<<1:rnd:sat`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmpyowh_rnd))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vmpyo_VwVh_s1_rnd_sat(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vmpyowh_rnd(vu, vv)
+}
+
+/// `Vx32.w+=vmpyo(Vu32.w,Vv32.h):<<1:rnd:sat:shift`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmpyowh_rnd_sacc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vmpyoacc_VwVwVh_s1_rnd_sat_shift(
+    vx: HvxVector,
+    vu: HvxVector,
+    vv: HvxVector,
+) -> HvxVector {
+    vmpyowh_rnd_sacc(vx, vu, vv)
+}
+
+/// `Vx32.w+=vmpyo(Vu32.w,Vv32.h):<<1:sat:shift`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmpyowh_sacc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vmpyoacc_VwVwVh_s1_sat_shift(
+    vx: HvxVector,
+    vu: HvxVector,
+    vv: HvxVector,
+) -> HvxVector {
+    vmpyowh_sacc(vx, vu, vv)
+}
+
+/// `Vdd32.uh=vmpy(Vu32.ub,Rt32.ub)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmpyub))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wuh_vmpy_VubRub(vu: HvxVector, rt: i32) -> HvxVectorPair {
+    vmpyub(vu, rt)
+}
+
+/// `Vxx32.uh+=vmpy(Vu32.ub,Rt32.ub)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmpyub_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wuh_vmpyacc_WuhVubRub(
+    vxx: HvxVectorPair,
+    vu: HvxVector,
+    rt: i32,
+) -> HvxVectorPair {
+    vmpyub_acc(vxx, vu, rt)
+}
+
+/// `Vdd32.uh=vmpy(Vu32.ub,Vv32.ub)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmpyubv))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wuh_vmpy_VubVub(vu: HvxVector, vv: HvxVector) -> HvxVectorPair {
+    vmpyubv(vu, vv)
+}
+
+/// `Vxx32.uh+=vmpy(Vu32.ub,Vv32.ub)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmpyubv_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wuh_vmpyacc_WuhVubVub(
+    vxx: HvxVectorPair,
+    vu: HvxVector,
+    vv: HvxVector,
+) -> HvxVectorPair {
+    vmpyubv_acc(vxx, vu, vv)
+}
+
+/// `Vdd32.uw=vmpy(Vu32.uh,Rt32.uh)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmpyuh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wuw_vmpy_VuhRuh(vu: HvxVector, rt: i32) -> HvxVectorPair {
+    vmpyuh(vu, rt)
+}
+
+/// `Vxx32.uw+=vmpy(Vu32.uh,Rt32.uh)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmpyuh_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wuw_vmpyacc_WuwVuhRuh(
+    vxx: HvxVectorPair,
+    vu: HvxVector,
+    rt: i32,
+) -> HvxVectorPair {
+    vmpyuh_acc(vxx, vu, rt)
+}
+
+/// `Vdd32.uw=vmpy(Vu32.uh,Vv32.uh)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmpyuhv))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wuw_vmpy_VuhVuh(vu: HvxVector, vv: HvxVector) -> HvxVectorPair {
+    vmpyuhv(vu, vv)
+}
+
+/// `Vxx32.uw+=vmpy(Vu32.uh,Vv32.uh)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vmpyuhv_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wuw_vmpyacc_WuwVuhVuh(
+    vxx: HvxVectorPair,
+    vu: HvxVector,
+    vv: HvxVector,
+) -> HvxVectorPair {
+    vmpyuhv_acc(vxx, vu, vv)
+}
+
+/// `Vd32.h=vnavg(Vu32.h,Vv32.h)`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vnavgh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_vnavg_VhVh(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vnavgh(vu, vv)
+}
+
+/// `Vd32.b=vnavg(Vu32.ub,Vv32.ub)`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vnavgub))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vb_vnavg_VubVub(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vnavgub(vu, vv)
+}
+
+/// `Vd32.w=vnavg(Vu32.w,Vv32.w)`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vnavgw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vnavg_VwVw(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vnavgw(vu, vv)
+}
+
+/// `Vd32.h=vnormamt(Vu32.h)`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vnormamth))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_vnormamt_Vh(vu: HvxVector) -> HvxVector {
+    vnormamth(vu)
+}
+
+/// `Vd32.w=vnormamt(Vu32.w)`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vnormamtw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vnormamt_Vw(vu: HvxVector) -> HvxVector {
+    vnormamtw(vu)
+}
+
+/// `Vd32=vnot(Vu32)`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vnot))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_V_vnot_V(vu: HvxVector) -> HvxVector {
+    vnot(vu)
+}
+
+/// `Vd32=vor(Vu32,Vv32)`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vor))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_V_vor_VV(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    simd_or(vu, vv)
+}
+
+/// `Vd32.b=vpacke(Vu32.h,Vv32.h)`
+///
+/// Instruction Type: CVI_VP
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vpackeb))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vb_vpacke_VhVh(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vpackeb(vu, vv)
+}
+
+/// `Vd32.h=vpacke(Vu32.w,Vv32.w)`
+///
+/// Instruction Type: CVI_VP
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vpackeh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_vpacke_VwVw(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vpackeh(vu, vv)
+}
+
+/// `Vd32.b=vpack(Vu32.h,Vv32.h):sat`
+///
+/// Instruction Type: CVI_VP
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vpackhb_sat))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vb_vpack_VhVh_sat(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vpackhb_sat(vu, vv)
+}
+
+/// `Vd32.ub=vpack(Vu32.h,Vv32.h):sat`
+///
+/// Instruction Type: CVI_VP
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vpackhub_sat))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vub_vpack_VhVh_sat(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vpackhub_sat(vu, vv)
+}
+
+/// `Vd32.b=vpacko(Vu32.h,Vv32.h)`
+///
+/// Instruction Type: CVI_VP
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vpackob))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vb_vpacko_VhVh(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vpackob(vu, vv)
+}
+
+/// `Vd32.h=vpacko(Vu32.w,Vv32.w)`
+///
+/// Instruction Type: CVI_VP
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vpackoh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_vpacko_VwVw(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vpackoh(vu, vv)
+}
+
+/// `Vd32.h=vpack(Vu32.w,Vv32.w):sat`
+///
+/// Instruction Type: CVI_VP
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vpackwh_sat))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_vpack_VwVw_sat(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vpackwh_sat(vu, vv)
+}
+
+/// `Vd32.uh=vpack(Vu32.w,Vv32.w):sat`
+///
+/// Instruction Type: CVI_VP
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vpackwuh_sat))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vuh_vpack_VwVw_sat(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vpackwuh_sat(vu, vv)
+}
+
+/// `Vd32.h=vpopcount(Vu32.h)`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vpopcounth))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_vpopcount_Vh(vu: HvxVector) -> HvxVector {
+    vpopcounth(vu)
+}
+
+/// `Vd32=vrdelta(Vu32,Vv32)`
+///
+/// Instruction Type: CVI_VP
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vrdelta))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_V_vrdelta_VV(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vrdelta(vu, vv)
+}
+
+/// `Vd32.w=vrmpy(Vu32.ub,Rt32.b)`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vrmpybus))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vrmpy_VubRb(vu: HvxVector, rt: i32) -> HvxVector {
+    vrmpybus(vu, rt)
+}
+
+/// `Vx32.w+=vrmpy(Vu32.ub,Rt32.b)`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vrmpybus_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vrmpyacc_VwVubRb(vx: HvxVector, vu: HvxVector, rt: i32) -> HvxVector {
+    vrmpybus_acc(vx, vu, rt)
+}
+
+/// `Vdd32.w=vrmpy(Vuu32.ub,Rt32.b,#u1)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vrmpybusi))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Ww_vrmpy_WubRbI(vuu: HvxVectorPair, rt: i32, iu1: i32) -> HvxVectorPair {
+    vrmpybusi(vuu, rt, iu1)
+}
+
+/// `Vxx32.w+=vrmpy(Vuu32.ub,Rt32.b,#u1)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vrmpybusi_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Ww_vrmpyacc_WwWubRbI(
+    vxx: HvxVectorPair,
+    vuu: HvxVectorPair,
+    rt: i32,
+    iu1: i32,
+) -> HvxVectorPair {
+    vrmpybusi_acc(vxx, vuu, rt, iu1)
+}
+
+/// `Vd32.w=vrmpy(Vu32.ub,Vv32.b)`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vrmpybusv))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vrmpy_VubVb(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vrmpybusv(vu, vv)
+}
+
+/// `Vx32.w+=vrmpy(Vu32.ub,Vv32.b)`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vrmpybusv_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vrmpyacc_VwVubVb(vx: HvxVector, vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vrmpybusv_acc(vx, vu, vv)
+}
+
+/// `Vd32.w=vrmpy(Vu32.b,Vv32.b)`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vrmpybv))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vrmpy_VbVb(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vrmpybv(vu, vv)
+}
+
+/// `Vx32.w+=vrmpy(Vu32.b,Vv32.b)`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vrmpybv_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vrmpyacc_VwVbVb(vx: HvxVector, vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vrmpybv_acc(vx, vu, vv)
+}
+
+/// `Vd32.uw=vrmpy(Vu32.ub,Rt32.ub)`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vrmpyub))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vuw_vrmpy_VubRub(vu: HvxVector, rt: i32) -> HvxVector {
+    vrmpyub(vu, rt)
+}
+
+/// `Vx32.uw+=vrmpy(Vu32.ub,Rt32.ub)`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vrmpyub_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vuw_vrmpyacc_VuwVubRub(vx: HvxVector, vu: HvxVector, rt: i32) -> HvxVector {
+    vrmpyub_acc(vx, vu, rt)
+}
+
+/// `Vdd32.uw=vrmpy(Vuu32.ub,Rt32.ub,#u1)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vrmpyubi))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wuw_vrmpy_WubRubI(vuu: HvxVectorPair, rt: i32, iu1: i32) -> HvxVectorPair {
+    vrmpyubi(vuu, rt, iu1)
+}
+
+/// `Vxx32.uw+=vrmpy(Vuu32.ub,Rt32.ub,#u1)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vrmpyubi_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wuw_vrmpyacc_WuwWubRubI(
+    vxx: HvxVectorPair,
+    vuu: HvxVectorPair,
+    rt: i32,
+    iu1: i32,
+) -> HvxVectorPair {
+    vrmpyubi_acc(vxx, vuu, rt, iu1)
+}
+
+/// `Vd32.uw=vrmpy(Vu32.ub,Vv32.ub)`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vrmpyubv))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vuw_vrmpy_VubVub(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vrmpyubv(vu, vv)
+}
+
+/// `Vx32.uw+=vrmpy(Vu32.ub,Vv32.ub)`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vrmpyubv_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vuw_vrmpyacc_VuwVubVub(vx: HvxVector, vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vrmpyubv_acc(vx, vu, vv)
+}
+
+/// `Vd32=vror(Vu32,Rt32)`
+///
+/// Instruction Type: CVI_VP
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vror))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_V_vror_VR(vu: HvxVector, rt: i32) -> HvxVector {
+    vror(vu, rt)
+}
+
+/// `Vd32.b=vround(Vu32.h,Vv32.h):sat`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vroundhb))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vb_vround_VhVh_sat(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vroundhb(vu, vv)
+}
+
+/// `Vd32.ub=vround(Vu32.h,Vv32.h):sat`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vroundhub))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vub_vround_VhVh_sat(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vroundhub(vu, vv)
+}
+
+/// `Vd32.h=vround(Vu32.w,Vv32.w):sat`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vroundwh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_vround_VwVw_sat(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vroundwh(vu, vv)
+}
+
+/// `Vd32.uh=vround(Vu32.w,Vv32.w):sat`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vroundwuh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vuh_vround_VwVw_sat(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vroundwuh(vu, vv)
+}
+
+/// `Vdd32.uw=vrsad(Vuu32.ub,Rt32.ub,#u1)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vrsadubi))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wuw_vrsad_WubRubI(vuu: HvxVectorPair, rt: i32, iu1: i32) -> HvxVectorPair {
+    vrsadubi(vuu, rt, iu1)
+}
+
+/// `Vxx32.uw+=vrsad(Vuu32.ub,Rt32.ub,#u1)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vrsadubi_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wuw_vrsadacc_WuwWubRubI(
+    vxx: HvxVectorPair,
+    vuu: HvxVectorPair,
+    rt: i32,
+    iu1: i32,
+) -> HvxVectorPair {
+    vrsadubi_acc(vxx, vuu, rt, iu1)
+}
+
+/// `Vd32.ub=vsat(Vu32.h,Vv32.h)`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vsathub))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vub_vsat_VhVh(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vsathub(vu, vv)
+}
+
+/// `Vd32.h=vsat(Vu32.w,Vv32.w)`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vsatwh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_vsat_VwVw(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vsatwh(vu, vv)
+}
+
+/// `Vdd32.h=vsxt(Vu32.b)`
+///
+/// Instruction Type: CVI_VA_DV
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vsb))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wh_vsxt_Vb(vu: HvxVector) -> HvxVectorPair {
+    vsb(vu)
+}
+
+/// `Vdd32.w=vsxt(Vu32.h)`
+///
+/// Instruction Type: CVI_VA_DV
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vsh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Ww_vsxt_Vh(vu: HvxVector) -> HvxVectorPair {
+    vsh(vu)
+}
+
+/// `Vd32.h=vshuffe(Vu32.h,Vv32.h)`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vshufeh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_vshuffe_VhVh(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vshufeh(vu, vv)
+}
+
+/// `Vd32.b=vshuff(Vu32.b)`
+///
+/// Instruction Type: CVI_VP
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vshuffb))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vb_vshuff_Vb(vu: HvxVector) -> HvxVector {
+    vshuffb(vu)
+}
+
+/// `Vd32.b=vshuffe(Vu32.b,Vv32.b)`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vshuffeb))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vb_vshuffe_VbVb(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vshuffeb(vu, vv)
+}
+
+/// `Vd32.h=vshuff(Vu32.h)`
+///
+/// Instruction Type: CVI_VP
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vshuffh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_vshuff_Vh(vu: HvxVector) -> HvxVector {
+    vshuffh(vu)
+}
+
+/// `Vd32.b=vshuffo(Vu32.b,Vv32.b)`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vshuffob))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vb_vshuffo_VbVb(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vshuffob(vu, vv)
+}
+
+/// `Vdd32=vshuff(Vu32,Vv32,Rt8)`
+///
+/// Instruction Type: CVI_VP_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vshuffvdd))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_W_vshuff_VVR(vu: HvxVector, vv: HvxVector, rt: i32) -> HvxVectorPair {
+    vshuffvdd(vu, vv, rt)
+}
+
+/// `Vdd32.b=vshuffoe(Vu32.b,Vv32.b)`
+///
+/// Instruction Type: CVI_VA_DV
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vshufoeb))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wb_vshuffoe_VbVb(vu: HvxVector, vv: HvxVector) -> HvxVectorPair {
+    vshufoeb(vu, vv)
+}
+
+/// `Vdd32.h=vshuffoe(Vu32.h,Vv32.h)`
+///
+/// Instruction Type: CVI_VA_DV
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vshufoeh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wh_vshuffoe_VhVh(vu: HvxVector, vv: HvxVector) -> HvxVectorPair {
+    vshufoeh(vu, vv)
+}
+
+/// `Vd32.h=vshuffo(Vu32.h,Vv32.h)`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vshufoh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_vshuffo_VhVh(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vshufoh(vu, vv)
+}
+
+/// `Vd32.b=vsub(Vu32.b,Vv32.b)`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vsubb))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vb_vsub_VbVb(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vsubb(vu, vv)
+}
+
+/// `Vdd32.b=vsub(Vuu32.b,Vvv32.b)`
+///
+/// Instruction Type: CVI_VA_DV
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vsubb_dv))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wb_vsub_WbWb(vuu: HvxVectorPair, vvv: HvxVectorPair) -> HvxVectorPair {
+    vsubb_dv(vuu, vvv)
+}
+
+/// `Vd32.h=vsub(Vu32.h,Vv32.h)`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vsubh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_vsub_VhVh(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vsubh(vu, vv)
+}
+
+/// `Vdd32.h=vsub(Vuu32.h,Vvv32.h)`
+///
+/// Instruction Type: CVI_VA_DV
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vsubh_dv))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wh_vsub_WhWh(vuu: HvxVectorPair, vvv: HvxVectorPair) -> HvxVectorPair {
+    vsubh_dv(vuu, vvv)
+}
+
+/// `Vd32.h=vsub(Vu32.h,Vv32.h):sat`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vsubhsat))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_vsub_VhVh_sat(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vsubhsat(vu, vv)
+}
+
+/// `Vdd32.h=vsub(Vuu32.h,Vvv32.h):sat`
+///
+/// Instruction Type: CVI_VA_DV
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vsubhsat_dv))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wh_vsub_WhWh_sat(vuu: HvxVectorPair, vvv: HvxVectorPair) -> HvxVectorPair {
+    vsubhsat_dv(vuu, vvv)
+}
+
+/// `Vdd32.w=vsub(Vu32.h,Vv32.h)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vsubhw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Ww_vsub_VhVh(vu: HvxVector, vv: HvxVector) -> HvxVectorPair {
+    vsubhw(vu, vv)
+}
+
+/// `Vdd32.h=vsub(Vu32.ub,Vv32.ub)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vsububh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wh_vsub_VubVub(vu: HvxVector, vv: HvxVector) -> HvxVectorPair {
+    vsububh(vu, vv)
+}
+
+/// `Vd32.ub=vsub(Vu32.ub,Vv32.ub):sat`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vsububsat))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vub_vsub_VubVub_sat(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vsububsat(vu, vv)
+}
+
+/// `Vdd32.ub=vsub(Vuu32.ub,Vvv32.ub):sat`
+///
+/// Instruction Type: CVI_VA_DV
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vsububsat_dv))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wub_vsub_WubWub_sat(vuu: HvxVectorPair, vvv: HvxVectorPair) -> HvxVectorPair {
+    vsububsat_dv(vuu, vvv)
+}
+
+/// `Vd32.uh=vsub(Vu32.uh,Vv32.uh):sat`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vsubuhsat))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vuh_vsub_VuhVuh_sat(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vsubuhsat(vu, vv)
+}
+
+/// `Vdd32.uh=vsub(Vuu32.uh,Vvv32.uh):sat`
+///
+/// Instruction Type: CVI_VA_DV
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vsubuhsat_dv))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wuh_vsub_WuhWuh_sat(vuu: HvxVectorPair, vvv: HvxVectorPair) -> HvxVectorPair {
+    vsubuhsat_dv(vuu, vvv)
+}
+
+/// `Vdd32.w=vsub(Vu32.uh,Vv32.uh)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vsubuhw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Ww_vsub_VuhVuh(vu: HvxVector, vv: HvxVector) -> HvxVectorPair {
+    vsubuhw(vu, vv)
+}
+
+/// `Vd32.w=vsub(Vu32.w,Vv32.w)`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vsubw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vsub_VwVw(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    simd_sub(vu, vv)
+}
+
+/// `Vdd32.w=vsub(Vuu32.w,Vvv32.w)`
+///
+/// Instruction Type: CVI_VA_DV
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vsubw_dv))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Ww_vsub_WwWw(vuu: HvxVectorPair, vvv: HvxVectorPair) -> HvxVectorPair {
+    vsubw_dv(vuu, vvv)
+}
+
+/// `Vd32.w=vsub(Vu32.w,Vv32.w):sat`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vsubwsat))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vsub_VwVw_sat(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vsubwsat(vu, vv)
+}
+
+/// `Vdd32.w=vsub(Vuu32.w,Vvv32.w):sat`
+///
+/// Instruction Type: CVI_VA_DV
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vsubwsat_dv))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Ww_vsub_WwWw_sat(vuu: HvxVectorPair, vvv: HvxVectorPair) -> HvxVectorPair {
+    vsubwsat_dv(vuu, vvv)
+}
+
+/// `Vdd32.h=vtmpy(Vuu32.b,Rt32.b)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vtmpyb))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wh_vtmpy_WbRb(vuu: HvxVectorPair, rt: i32) -> HvxVectorPair {
+    vtmpyb(vuu, rt)
+}
+
+/// `Vxx32.h+=vtmpy(Vuu32.b,Rt32.b)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vtmpyb_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wh_vtmpyacc_WhWbRb(
+    vxx: HvxVectorPair,
+    vuu: HvxVectorPair,
+    rt: i32,
+) -> HvxVectorPair {
+    vtmpyb_acc(vxx, vuu, rt)
+}
+
+/// `Vdd32.h=vtmpy(Vuu32.ub,Rt32.b)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vtmpybus))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wh_vtmpy_WubRb(vuu: HvxVectorPair, rt: i32) -> HvxVectorPair {
+    vtmpybus(vuu, rt)
+}
+
+/// `Vxx32.h+=vtmpy(Vuu32.ub,Rt32.b)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vtmpybus_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wh_vtmpyacc_WhWubRb(
+    vxx: HvxVectorPair,
+    vuu: HvxVectorPair,
+    rt: i32,
+) -> HvxVectorPair {
+    vtmpybus_acc(vxx, vuu, rt)
+}
+
+/// `Vdd32.w=vtmpy(Vuu32.h,Rt32.b)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vtmpyhb))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Ww_vtmpy_WhRb(vuu: HvxVectorPair, rt: i32) -> HvxVectorPair {
+    vtmpyhb(vuu, rt)
+}
+
+/// `Vxx32.w+=vtmpy(Vuu32.h,Rt32.b)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vtmpyhb_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Ww_vtmpyacc_WwWhRb(
+    vxx: HvxVectorPair,
+    vuu: HvxVectorPair,
+    rt: i32,
+) -> HvxVectorPair {
+    vtmpyhb_acc(vxx, vuu, rt)
+}
+
+/// `Vdd32.h=vunpack(Vu32.b)`
+///
+/// Instruction Type: CVI_VP_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vunpackb))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wh_vunpack_Vb(vu: HvxVector) -> HvxVectorPair {
+    vunpackb(vu)
+}
+
+/// `Vdd32.w=vunpack(Vu32.h)`
+///
+/// Instruction Type: CVI_VP_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vunpackh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Ww_vunpack_Vh(vu: HvxVector) -> HvxVectorPair {
+    vunpackh(vu)
+}
+
+/// `Vxx32.h|=vunpacko(Vu32.b)`
+///
+/// Instruction Type: CVI_VP_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vunpackob))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wh_vunpackoor_WhVb(vxx: HvxVectorPair, vu: HvxVector) -> HvxVectorPair {
+    vunpackob(vxx, vu)
+}
+
+/// `Vxx32.w|=vunpacko(Vu32.h)`
+///
+/// Instruction Type: CVI_VP_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vunpackoh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Ww_vunpackoor_WwVh(vxx: HvxVectorPair, vu: HvxVector) -> HvxVectorPair {
+    vunpackoh(vxx, vu)
+}
+
+/// `Vdd32.uh=vunpack(Vu32.ub)`
+///
+/// Instruction Type: CVI_VP_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vunpackub))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wuh_vunpack_Vub(vu: HvxVector) -> HvxVectorPair {
+    vunpackub(vu)
+}
+
+/// `Vdd32.uw=vunpack(Vu32.uh)`
+///
+/// Instruction Type: CVI_VP_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vunpackuh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wuw_vunpack_Vuh(vu: HvxVector) -> HvxVectorPair {
+    vunpackuh(vu)
+}
+
+/// `Vd32=vxor(Vu32,Vv32)`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vxor))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_V_vxor_VV(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    simd_xor(vu, vv)
+}
+
+/// `Vdd32.uh=vzxt(Vu32.ub)`
+///
+/// Instruction Type: CVI_VA_DV
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vzb))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wuh_vzxt_Vub(vu: HvxVector) -> HvxVectorPair {
+    vzb(vu)
+}
+
+/// `Vdd32.uw=vzxt(Vu32.uh)`
+///
+/// Instruction Type: CVI_VA_DV
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[cfg_attr(test, assert_instr(vzh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wuw_vzxt_Vuh(vu: HvxVector) -> HvxVectorPair {
+    vzh(vu)
+}
+
+/// `Vd32.b=vsplat(Rt32)`
+///
+/// Instruction Type: CVI_VX_LATE
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv62"))]
+#[cfg_attr(test, assert_instr(lvsplatb))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vb_vsplat_R(rt: i32) -> HvxVector {
+    lvsplatb(rt)
+}
+
+/// `Vd32.h=vsplat(Rt32)`
+///
+/// Instruction Type: CVI_VX_LATE
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv62"))]
+#[cfg_attr(test, assert_instr(lvsplath))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_vsplat_R(rt: i32) -> HvxVector {
+    lvsplath(rt)
+}
+
+/// `Vd32.b=vadd(Vu32.b,Vv32.b):sat`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv62"))]
+#[cfg_attr(test, assert_instr(vaddbsat))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vb_vadd_VbVb_sat(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vaddbsat(vu, vv)
+}
+
+/// `Vdd32.b=vadd(Vuu32.b,Vvv32.b):sat`
+///
+/// Instruction Type: CVI_VA_DV
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv62"))]
+#[cfg_attr(test, assert_instr(vaddbsat_dv))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wb_vadd_WbWb_sat(vuu: HvxVectorPair, vvv: HvxVectorPair) -> HvxVectorPair {
+    vaddbsat_dv(vuu, vvv)
+}
+
+/// `Vd32.h=vadd(vclb(Vu32.h),Vv32.h)`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv62"))]
+#[cfg_attr(test, assert_instr(vaddclbh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_vadd_vclb_VhVh(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vaddclbh(vu, vv)
+}
+
+/// `Vd32.w=vadd(vclb(Vu32.w),Vv32.w)`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv62"))]
+#[cfg_attr(test, assert_instr(vaddclbw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vadd_vclb_VwVw(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vaddclbw(vu, vv)
+}
+
+/// `Vxx32.w+=vadd(Vu32.h,Vv32.h)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv62"))]
+#[cfg_attr(test, assert_instr(vaddhw_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Ww_vaddacc_WwVhVh(
+    vxx: HvxVectorPair,
+    vu: HvxVector,
+    vv: HvxVector,
+) -> HvxVectorPair {
+    vaddhw_acc(vxx, vu, vv)
+}
+
+/// `Vxx32.h+=vadd(Vu32.ub,Vv32.ub)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv62"))]
+#[cfg_attr(test, assert_instr(vaddubh_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wh_vaddacc_WhVubVub(
+    vxx: HvxVectorPair,
+    vu: HvxVector,
+    vv: HvxVector,
+) -> HvxVectorPair {
+    vaddubh_acc(vxx, vu, vv)
+}
+
+/// `Vd32.ub=vadd(Vu32.ub,Vv32.b):sat`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv62"))]
+#[cfg_attr(test, assert_instr(vaddububb_sat))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vub_vadd_VubVb_sat(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vaddububb_sat(vu, vv)
+}
+
+/// `Vxx32.w+=vadd(Vu32.uh,Vv32.uh)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv62"))]
+#[cfg_attr(test, assert_instr(vadduhw_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Ww_vaddacc_WwVuhVuh(
+    vxx: HvxVectorPair,
+    vu: HvxVector,
+    vv: HvxVector,
+) -> HvxVectorPair {
+    vadduhw_acc(vxx, vu, vv)
+}
+
+/// `Vd32.uw=vadd(Vu32.uw,Vv32.uw):sat`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv62"))]
+#[cfg_attr(test, assert_instr(vadduwsat))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vuw_vadd_VuwVuw_sat(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vadduwsat(vu, vv)
+}
+
+/// `Vdd32.uw=vadd(Vuu32.uw,Vvv32.uw):sat`
+///
+/// Instruction Type: CVI_VA_DV
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv62"))]
+#[cfg_attr(test, assert_instr(vadduwsat_dv))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wuw_vadd_WuwWuw_sat(vuu: HvxVectorPair, vvv: HvxVectorPair) -> HvxVectorPair {
+    vadduwsat_dv(vuu, vvv)
+}
+
+/// `Vd32.b=vasr(Vu32.h,Vv32.h,Rt8):sat`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv62"))]
+#[cfg_attr(test, assert_instr(vasrhbsat))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vb_vasr_VhVhR_sat(vu: HvxVector, vv: HvxVector, rt: i32) -> HvxVector {
+    vasrhbsat(vu, vv, rt)
+}
+
+/// `Vd32.uh=vasr(Vu32.uw,Vv32.uw,Rt8):rnd:sat`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv62"))]
+#[cfg_attr(test, assert_instr(vasruwuhrndsat))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vuh_vasr_VuwVuwR_rnd_sat(vu: HvxVector, vv: HvxVector, rt: i32) -> HvxVector {
+    vasruwuhrndsat(vu, vv, rt)
+}
+
+/// `Vd32.uh=vasr(Vu32.w,Vv32.w,Rt8):rnd:sat`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv62"))]
+#[cfg_attr(test, assert_instr(vasrwuhrndsat))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vuh_vasr_VwVwR_rnd_sat(vu: HvxVector, vv: HvxVector, rt: i32) -> HvxVector {
+    vasrwuhrndsat(vu, vv, rt)
+}
+
+/// `Vd32.ub=vlsr(Vu32.ub,Rt32)`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv62"))]
+#[cfg_attr(test, assert_instr(vlsrb))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vub_vlsr_VubR(vu: HvxVector, rt: i32) -> HvxVector {
+    vlsrb(vu, rt)
+}
+
+/// `Vd32.b=vlut32(Vu32.b,Vv32.b,Rt8):nomatch`
+///
+/// Instruction Type: CVI_VP
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv62"))]
+#[cfg_attr(test, assert_instr(vlutvvb_nm))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vb_vlut32_VbVbR_nomatch(vu: HvxVector, vv: HvxVector, rt: i32) -> HvxVector {
+    vlutvvb_nm(vu, vv, rt)
+}
+
+/// `Vx32.b|=vlut32(Vu32.b,Vv32.b,#u3)`
+///
+/// Instruction Type: CVI_VP_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv62"))]
+#[cfg_attr(test, assert_instr(vlutvvb_oracci))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vb_vlut32or_VbVbVbI(
+    vx: HvxVector,
+    vu: HvxVector,
+    vv: HvxVector,
+    iu3: i32,
+) -> HvxVector {
+    vlutvvb_oracci(vx, vu, vv, iu3)
+}
+
+/// `Vd32.b=vlut32(Vu32.b,Vv32.b,#u3)`
+///
+/// Instruction Type: CVI_VP
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv62"))]
+#[cfg_attr(test, assert_instr(vlutvvbi))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vb_vlut32_VbVbI(vu: HvxVector, vv: HvxVector, iu3: i32) -> HvxVector {
+    vlutvvbi(vu, vv, iu3)
+}
+
+/// `Vdd32.h=vlut16(Vu32.b,Vv32.h,Rt8):nomatch`
+///
+/// Instruction Type: CVI_VP_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv62"))]
+#[cfg_attr(test, assert_instr(vlutvwh_nm))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wh_vlut16_VbVhR_nomatch(vu: HvxVector, vv: HvxVector, rt: i32) -> HvxVectorPair {
+    vlutvwh_nm(vu, vv, rt)
+}
+
+/// `Vxx32.h|=vlut16(Vu32.b,Vv32.h,#u3)`
+///
+/// Instruction Type: CVI_VP_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv62"))]
+#[cfg_attr(test, assert_instr(vlutvwh_oracci))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wh_vlut16or_WhVbVhI(
+    vxx: HvxVectorPair,
+    vu: HvxVector,
+    vv: HvxVector,
+    iu3: i32,
+) -> HvxVectorPair {
+    vlutvwh_oracci(vxx, vu, vv, iu3)
+}
+
+/// `Vdd32.h=vlut16(Vu32.b,Vv32.h,#u3)`
+///
+/// Instruction Type: CVI_VP_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv62"))]
+#[cfg_attr(test, assert_instr(vlutvwhi))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wh_vlut16_VbVhI(vu: HvxVector, vv: HvxVector, iu3: i32) -> HvxVectorPair {
+    vlutvwhi(vu, vv, iu3)
+}
+
+/// `Vd32.b=vmax(Vu32.b,Vv32.b)`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv62"))]
+#[cfg_attr(test, assert_instr(vmaxb))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vb_vmax_VbVb(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vmaxb(vu, vv)
+}
+
+/// `Vd32.b=vmin(Vu32.b,Vv32.b)`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv62"))]
+#[cfg_attr(test, assert_instr(vminb))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vb_vmin_VbVb(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vminb(vu, vv)
+}
+
+/// `Vdd32.w=vmpa(Vuu32.uh,Rt32.b)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv62"))]
+#[cfg_attr(test, assert_instr(vmpauhb))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Ww_vmpa_WuhRb(vuu: HvxVectorPair, rt: i32) -> HvxVectorPair {
+    vmpauhb(vuu, rt)
+}
+
+/// `Vxx32.w+=vmpa(Vuu32.uh,Rt32.b)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv62"))]
+#[cfg_attr(test, assert_instr(vmpauhb_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Ww_vmpaacc_WwWuhRb(
+    vxx: HvxVectorPair,
+    vuu: HvxVectorPair,
+    rt: i32,
+) -> HvxVectorPair {
+    vmpauhb_acc(vxx, vuu, rt)
+}
+
+/// `Vdd32=vmpye(Vu32.w,Vv32.uh)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv62"))]
+#[cfg_attr(test, assert_instr(vmpyewuh_64))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_W_vmpye_VwVuh(vu: HvxVector, vv: HvxVector) -> HvxVectorPair {
+    vmpyewuh_64(vu, vv)
+}
+
+/// `Vd32.w=vmpyi(Vu32.w,Rt32.ub)`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv62"))]
+#[cfg_attr(test, assert_instr(vmpyiwub))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vmpyi_VwRub(vu: HvxVector, rt: i32) -> HvxVector {
+    vmpyiwub(vu, rt)
+}
+
+/// `Vx32.w+=vmpyi(Vu32.w,Rt32.ub)`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv62"))]
+#[cfg_attr(test, assert_instr(vmpyiwub_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vmpyiacc_VwVwRub(vx: HvxVector, vu: HvxVector, rt: i32) -> HvxVector {
+    vmpyiwub_acc(vx, vu, rt)
+}
+
+/// `Vxx32+=vmpyo(Vu32.w,Vv32.h)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv62"))]
+#[cfg_attr(test, assert_instr(vmpyowh_64_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_W_vmpyoacc_WVwVh(
+    vxx: HvxVectorPair,
+    vu: HvxVector,
+    vv: HvxVector,
+) -> HvxVectorPair {
+    vmpyowh_64_acc(vxx, vu, vv)
+}
+
+/// `Vd32.ub=vround(Vu32.uh,Vv32.uh):sat`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv62"))]
+#[cfg_attr(test, assert_instr(vrounduhub))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vub_vround_VuhVuh_sat(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vrounduhub(vu, vv)
+}
+
+/// `Vd32.uh=vround(Vu32.uw,Vv32.uw):sat`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv62"))]
+#[cfg_attr(test, assert_instr(vrounduwuh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vuh_vround_VuwVuw_sat(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vrounduwuh(vu, vv)
+}
+
+/// `Vd32.uh=vsat(Vu32.uw,Vv32.uw)`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv62"))]
+#[cfg_attr(test, assert_instr(vsatuwuh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vuh_vsat_VuwVuw(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vsatuwuh(vu, vv)
+}
+
+/// `Vd32.b=vsub(Vu32.b,Vv32.b):sat`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv62"))]
+#[cfg_attr(test, assert_instr(vsubbsat))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vb_vsub_VbVb_sat(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vsubbsat(vu, vv)
+}
+
+/// `Vdd32.b=vsub(Vuu32.b,Vvv32.b):sat`
+///
+/// Instruction Type: CVI_VA_DV
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv62"))]
+#[cfg_attr(test, assert_instr(vsubbsat_dv))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wb_vsub_WbWb_sat(vuu: HvxVectorPair, vvv: HvxVectorPair) -> HvxVectorPair {
+    vsubbsat_dv(vuu, vvv)
+}
+
+/// `Vd32.ub=vsub(Vu32.ub,Vv32.b):sat`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv62"))]
+#[cfg_attr(test, assert_instr(vsubububb_sat))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vub_vsub_VubVb_sat(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vsubububb_sat(vu, vv)
+}
+
+/// `Vd32.uw=vsub(Vu32.uw,Vv32.uw):sat`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv62"))]
+#[cfg_attr(test, assert_instr(vsubuwsat))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vuw_vsub_VuwVuw_sat(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vsubuwsat(vu, vv)
+}
+
+/// `Vdd32.uw=vsub(Vuu32.uw,Vvv32.uw):sat`
+///
+/// Instruction Type: CVI_VA_DV
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv62"))]
+#[cfg_attr(test, assert_instr(vsubuwsat_dv))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wuw_vsub_WuwWuw_sat(vuu: HvxVectorPair, vvv: HvxVectorPair) -> HvxVectorPair {
+    vsubuwsat_dv(vuu, vvv)
+}
+
+/// `Vd32.b=vabs(Vu32.b)`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv65"))]
+#[cfg_attr(test, assert_instr(vabsb))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vb_vabs_Vb(vu: HvxVector) -> HvxVector {
+    vabsb(vu)
+}
+
+/// `Vd32.b=vabs(Vu32.b):sat`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv65"))]
+#[cfg_attr(test, assert_instr(vabsb_sat))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vb_vabs_Vb_sat(vu: HvxVector) -> HvxVector {
+    vabsb_sat(vu)
+}
+
+/// `Vx32.h+=vasl(Vu32.h,Rt32)`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv65"))]
+#[cfg_attr(test, assert_instr(vaslh_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_vaslacc_VhVhR(vx: HvxVector, vu: HvxVector, rt: i32) -> HvxVector {
+    vaslh_acc(vx, vu, rt)
+}
+
+/// `Vx32.h+=vasr(Vu32.h,Rt32)`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv65"))]
+#[cfg_attr(test, assert_instr(vasrh_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_vasracc_VhVhR(vx: HvxVector, vu: HvxVector, rt: i32) -> HvxVector {
+    vasrh_acc(vx, vu, rt)
+}
+
+/// `Vd32.ub=vasr(Vu32.uh,Vv32.uh,Rt8):rnd:sat`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv65"))]
+#[cfg_attr(test, assert_instr(vasruhubrndsat))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vub_vasr_VuhVuhR_rnd_sat(vu: HvxVector, vv: HvxVector, rt: i32) -> HvxVector {
+    vasruhubrndsat(vu, vv, rt)
+}
+
+/// `Vd32.ub=vasr(Vu32.uh,Vv32.uh,Rt8):sat`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv65"))]
+#[cfg_attr(test, assert_instr(vasruhubsat))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vub_vasr_VuhVuhR_sat(vu: HvxVector, vv: HvxVector, rt: i32) -> HvxVector {
+    vasruhubsat(vu, vv, rt)
+}
+
+/// `Vd32.uh=vasr(Vu32.uw,Vv32.uw,Rt8):sat`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv65"))]
+#[cfg_attr(test, assert_instr(vasruwuhsat))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vuh_vasr_VuwVuwR_sat(vu: HvxVector, vv: HvxVector, rt: i32) -> HvxVector {
+    vasruwuhsat(vu, vv, rt)
+}
+
+/// `Vd32.b=vavg(Vu32.b,Vv32.b)`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv65"))]
+#[cfg_attr(test, assert_instr(vavgb))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vb_vavg_VbVb(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vavgb(vu, vv)
+}
+
+/// `Vd32.b=vavg(Vu32.b,Vv32.b):rnd`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv65"))]
+#[cfg_attr(test, assert_instr(vavgbrnd))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vb_vavg_VbVb_rnd(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vavgbrnd(vu, vv)
+}
+
+/// `Vd32.uw=vavg(Vu32.uw,Vv32.uw)`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv65"))]
+#[cfg_attr(test, assert_instr(vavguw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vuw_vavg_VuwVuw(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vavguw(vu, vv)
+}
+
+/// `Vd32.uw=vavg(Vu32.uw,Vv32.uw):rnd`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv65"))]
+#[cfg_attr(test, assert_instr(vavguwrnd))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vuw_vavg_VuwVuw_rnd(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vavguwrnd(vu, vv)
+}
+
+/// `Vdd32=#0`
+///
+/// Instruction Type: MAPPING
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv65"))]
+#[cfg_attr(test, assert_instr(vdd0))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_W_vzero() -> HvxVectorPair {
+    vdd0()
+}
+
+/// `vtmp.h=vgather(Rt32,Mu2,Vv32.h).h`
+///
+/// Instruction Type: CVI_GATHER
+/// Execution Slots: SLOT01
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv65"))]
+#[cfg_attr(test, assert_instr(vgathermh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_vgather_ARMVh(rs: *mut HvxVector, rt: i32, mu: i32, vv: HvxVector) {
+    vgathermh(rs, rt, mu, vv)
+}
+
+/// `vtmp.h=vgather(Rt32,Mu2,Vvv32.w).h`
+///
+/// Instruction Type: CVI_GATHER_DV
+/// Execution Slots: SLOT01
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv65"))]
+#[cfg_attr(test, assert_instr(vgathermhw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_vgather_ARMWw(rs: *mut HvxVector, rt: i32, mu: i32, vvv: HvxVectorPair) {
+    vgathermhw(rs, rt, mu, vvv)
+}
+
+/// `vtmp.w=vgather(Rt32,Mu2,Vv32.w).w`
+///
+/// Instruction Type: CVI_GATHER
+/// Execution Slots: SLOT01
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv65"))]
+#[cfg_attr(test, assert_instr(vgathermw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_vgather_ARMVw(rs: *mut HvxVector, rt: i32, mu: i32, vv: HvxVector) {
+    vgathermw(rs, rt, mu, vv)
+}
+
+/// `Vdd32.h=vmpa(Vuu32.ub,Rt32.ub)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv65"))]
+#[cfg_attr(test, assert_instr(vmpabuu))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wh_vmpa_WubRub(vuu: HvxVectorPair, rt: i32) -> HvxVectorPair {
+    vmpabuu(vuu, rt)
+}
+
+/// `Vxx32.h+=vmpa(Vuu32.ub,Rt32.ub)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv65"))]
+#[cfg_attr(test, assert_instr(vmpabuu_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wh_vmpaacc_WhWubRub(
+    vxx: HvxVectorPair,
+    vuu: HvxVectorPair,
+    rt: i32,
+) -> HvxVectorPair {
+    vmpabuu_acc(vxx, vuu, rt)
+}
+
+/// `Vxx32.w+=vmpy(Vu32.h,Rt32.h)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv65"))]
+#[cfg_attr(test, assert_instr(vmpyh_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Ww_vmpyacc_WwVhRh(vxx: HvxVectorPair, vu: HvxVector, rt: i32) -> HvxVectorPair {
+    vmpyh_acc(vxx, vu, rt)
+}
+
+/// `Vd32.uw=vmpye(Vu32.uh,Rt32.uh)`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv65"))]
+#[cfg_attr(test, assert_instr(vmpyuhe))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vuw_vmpye_VuhRuh(vu: HvxVector, rt: i32) -> HvxVector {
+    vmpyuhe(vu, rt)
+}
+
+/// `Vx32.uw+=vmpye(Vu32.uh,Rt32.uh)`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv65"))]
+#[cfg_attr(test, assert_instr(vmpyuhe_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vuw_vmpyeacc_VuwVuhRuh(vx: HvxVector, vu: HvxVector, rt: i32) -> HvxVector {
+    vmpyuhe_acc(vx, vu, rt)
+}
+
+/// `Vd32.b=vnavg(Vu32.b,Vv32.b)`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv65"))]
+#[cfg_attr(test, assert_instr(vnavgb))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vb_vnavg_VbVb(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vnavgb(vu, vv)
+}
+
+/// `vscatter(Rt32,Mu2,Vv32.h).h=Vw32`
+///
+/// Instruction Type: CVI_SCATTER
+/// Execution Slots: SLOT0
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv65"))]
+#[cfg_attr(test, assert_instr(vscattermh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_vscatter_RMVhV(rt: i32, mu: i32, vv: HvxVector, vw: HvxVector) {
+    vscattermh(rt, mu, vv, vw)
+}
+
+/// `vscatter(Rt32,Mu2,Vv32.h).h+=Vw32`
+///
+/// Instruction Type: CVI_SCATTER
+/// Execution Slots: SLOT0
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv65"))]
+#[cfg_attr(test, assert_instr(vscattermh_add))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_vscatteracc_RMVhV(rt: i32, mu: i32, vv: HvxVector, vw: HvxVector) {
+    vscattermh_add(rt, mu, vv, vw)
+}
+
+/// `vscatter(Rt32,Mu2,Vvv32.w).h=Vw32`
+///
+/// Instruction Type: CVI_SCATTER_DV
+/// Execution Slots: SLOT0
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv65"))]
+#[cfg_attr(test, assert_instr(vscattermhw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_vscatter_RMWwV(rt: i32, mu: i32, vvv: HvxVectorPair, vw: HvxVector) {
+    vscattermhw(rt, mu, vvv, vw)
+}
+
+/// `vscatter(Rt32,Mu2,Vvv32.w).h+=Vw32`
+///
+/// Instruction Type: CVI_SCATTER_DV
+/// Execution Slots: SLOT0
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv65"))]
+#[cfg_attr(test, assert_instr(vscattermhw_add))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_vscatteracc_RMWwV(rt: i32, mu: i32, vvv: HvxVectorPair, vw: HvxVector) {
+    vscattermhw_add(rt, mu, vvv, vw)
+}
+
+/// `vscatter(Rt32,Mu2,Vv32.w).w=Vw32`
+///
+/// Instruction Type: CVI_SCATTER
+/// Execution Slots: SLOT0
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv65"))]
+#[cfg_attr(test, assert_instr(vscattermw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_vscatter_RMVwV(rt: i32, mu: i32, vv: HvxVector, vw: HvxVector) {
+    vscattermw(rt, mu, vv, vw)
+}
+
+/// `vscatter(Rt32,Mu2,Vv32.w).w+=Vw32`
+///
+/// Instruction Type: CVI_SCATTER
+/// Execution Slots: SLOT0
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv65"))]
+#[cfg_attr(test, assert_instr(vscattermw_add))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_vscatteracc_RMVwV(rt: i32, mu: i32, vv: HvxVector, vw: HvxVector) {
+    vscattermw_add(rt, mu, vv, vw)
+}
+
+/// `Vxx32.w=vasrinto(Vu32.w,Vv32.w)`
+///
+/// Instruction Type: CVI_VP_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv66"))]
+#[cfg_attr(test, assert_instr(vasr_into))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Ww_vasrinto_WwVwVw(
+    vxx: HvxVectorPair,
+    vu: HvxVector,
+    vv: HvxVector,
+) -> HvxVectorPair {
+    vasr_into(vxx, vu, vv)
+}
+
+/// `Vd32.uw=vrotr(Vu32.uw,Vv32.uw)`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv66"))]
+#[cfg_attr(test, assert_instr(vrotr))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vuw_vrotr_VuwVuw(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vrotr(vu, vv)
+}
+
+/// `Vd32.w=vsatdw(Vu32.w,Vv32.w)`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv66"))]
+#[cfg_attr(test, assert_instr(vsatdw))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vsatdw_VwVw(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vsatdw(vu, vv)
+}
+
+/// `Vdd32.w=v6mpy(Vuu32.ub,Vvv32.b,#u2):h`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(v6mpyhubs10))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Ww_v6mpy_WubWbI_h(
+    vuu: HvxVectorPair,
+    vvv: HvxVectorPair,
+    iu2: i32,
+) -> HvxVectorPair {
+    v6mpyhubs10(vuu, vvv, iu2)
+}
+
+/// `Vxx32.w+=v6mpy(Vuu32.ub,Vvv32.b,#u2):h`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(v6mpyhubs10_vxx))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Ww_v6mpyacc_WwWubWbI_h(
+    vxx: HvxVectorPair,
+    vuu: HvxVectorPair,
+    vvv: HvxVectorPair,
+    iu2: i32,
+) -> HvxVectorPair {
+    v6mpyhubs10_vxx(vxx, vuu, vvv, iu2)
+}
+
+/// `Vdd32.w=v6mpy(Vuu32.ub,Vvv32.b,#u2):v`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(v6mpyvubs10))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Ww_v6mpy_WubWbI_v(
+    vuu: HvxVectorPair,
+    vvv: HvxVectorPair,
+    iu2: i32,
+) -> HvxVectorPair {
+    v6mpyvubs10(vuu, vvv, iu2)
+}
+
+/// `Vxx32.w+=v6mpy(Vuu32.ub,Vvv32.b,#u2):v`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(v6mpyvubs10_vxx))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Ww_v6mpyacc_WwWubWbI_v(
+    vxx: HvxVectorPair,
+    vuu: HvxVectorPair,
+    vvv: HvxVectorPair,
+    iu2: i32,
+) -> HvxVectorPair {
+    v6mpyvubs10_vxx(vxx, vuu, vvv, iu2)
+}
+
+/// `Vd32.hf=vabs(Vu32.hf)`
+///
+/// Instruction Type: CVI_VX_LATE
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vabs_hf))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vhf_vabs_Vhf(vu: HvxVector) -> HvxVector {
+    vabs_hf(vu)
+}
+
+/// `Vd32.sf=vabs(Vu32.sf)`
+///
+/// Instruction Type: CVI_VX_LATE
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vabs_sf))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vsf_vabs_Vsf(vu: HvxVector) -> HvxVector {
+    vabs_sf(vu)
+}
+
+/// `Vd32.qf16=vadd(Vu32.hf,Vv32.hf)`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vadd_hf))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vqf16_vadd_VhfVhf(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vadd_hf(vu, vv)
+}
+
+/// `Vd32.hf=vadd(Vu32.hf,Vv32.hf)`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vadd_hf_hf))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vhf_vadd_VhfVhf(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vadd_hf_hf(vu, vv)
+}
+
+/// `Vd32.qf16=vadd(Vu32.qf16,Vv32.qf16)`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vadd_qf16))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vqf16_vadd_Vqf16Vqf16(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vadd_qf16(vu, vv)
+}
+
+/// `Vd32.qf16=vadd(Vu32.qf16,Vv32.hf)`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vadd_qf16_mix))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vqf16_vadd_Vqf16Vhf(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vadd_qf16_mix(vu, vv)
+}
+
+/// `Vd32.qf32=vadd(Vu32.qf32,Vv32.qf32)`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vadd_qf32))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vqf32_vadd_Vqf32Vqf32(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vadd_qf32(vu, vv)
+}
+
+/// `Vd32.qf32=vadd(Vu32.qf32,Vv32.sf)`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vadd_qf32_mix))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vqf32_vadd_Vqf32Vsf(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vadd_qf32_mix(vu, vv)
+}
+
+/// `Vd32.qf32=vadd(Vu32.sf,Vv32.sf)`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vadd_sf))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vqf32_vadd_VsfVsf(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vadd_sf(vu, vv)
+}
+
+/// `Vdd32.sf=vadd(Vu32.hf,Vv32.hf)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vadd_sf_hf))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wsf_vadd_VhfVhf(vu: HvxVector, vv: HvxVector) -> HvxVectorPair {
+    vadd_sf_hf(vu, vv)
+}
+
+/// `Vd32.sf=vadd(Vu32.sf,Vv32.sf)`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vadd_sf_sf))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vsf_vadd_VsfVsf(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vadd_sf_sf(vu, vv)
+}
+
+/// `Vd32.w=vfmv(Vu32.w)`
+///
+/// Instruction Type: CVI_VX_LATE
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vassign_fp))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vfmv_Vw(vu: HvxVector) -> HvxVector {
+    vassign_fp(vu)
+}
+
+/// `Vd32.hf=Vu32.qf16`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vconv_hf_qf16))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vhf_equals_Vqf16(vu: HvxVector) -> HvxVector {
+    vconv_hf_qf16(vu)
+}
+
+/// `Vd32.hf=Vuu32.qf32`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vconv_hf_qf32))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vhf_equals_Wqf32(vuu: HvxVectorPair) -> HvxVector {
+    vconv_hf_qf32(vuu)
+}
+
+/// `Vd32.sf=Vu32.qf32`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vconv_sf_qf32))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vsf_equals_Vqf32(vu: HvxVector) -> HvxVector {
+    vconv_sf_qf32(vu)
+}
+
+/// `Vd32.b=vcvt(Vu32.hf,Vv32.hf)`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vcvt_b_hf))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vb_vcvt_VhfVhf(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vcvt_b_hf(vu, vv)
+}
+
+/// `Vd32.h=vcvt(Vu32.hf)`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vcvt_h_hf))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_vcvt_Vhf(vu: HvxVector) -> HvxVector {
+    vcvt_h_hf(vu)
+}
+
+/// `Vdd32.hf=vcvt(Vu32.b)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vcvt_hf_b))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Whf_vcvt_Vb(vu: HvxVector) -> HvxVectorPair {
+    vcvt_hf_b(vu)
+}
+
+/// `Vd32.hf=vcvt(Vu32.h)`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vcvt_hf_h))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vhf_vcvt_Vh(vu: HvxVector) -> HvxVector {
+    vcvt_hf_h(vu)
+}
+
+/// `Vd32.hf=vcvt(Vu32.sf,Vv32.sf)`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vcvt_hf_sf))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vhf_vcvt_VsfVsf(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vcvt_hf_sf(vu, vv)
+}
+
+/// `Vdd32.hf=vcvt(Vu32.ub)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vcvt_hf_ub))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Whf_vcvt_Vub(vu: HvxVector) -> HvxVectorPair {
+    vcvt_hf_ub(vu)
+}
+
+/// `Vd32.hf=vcvt(Vu32.uh)`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vcvt_hf_uh))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vhf_vcvt_Vuh(vu: HvxVector) -> HvxVector {
+    vcvt_hf_uh(vu)
+}
+
+/// `Vdd32.sf=vcvt(Vu32.hf)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vcvt_sf_hf))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wsf_vcvt_Vhf(vu: HvxVector) -> HvxVectorPair {
+    vcvt_sf_hf(vu)
+}
+
+/// `Vd32.ub=vcvt(Vu32.hf,Vv32.hf)`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vcvt_ub_hf))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vub_vcvt_VhfVhf(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vcvt_ub_hf(vu, vv)
+}
+
+/// `Vd32.uh=vcvt(Vu32.hf)`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vcvt_uh_hf))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vuh_vcvt_Vhf(vu: HvxVector) -> HvxVector {
+    vcvt_uh_hf(vu)
+}
+
+/// `Vd32.sf=vdmpy(Vu32.hf,Vv32.hf)`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vdmpy_sf_hf))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vsf_vdmpy_VhfVhf(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vdmpy_sf_hf(vu, vv)
+}
+
+/// `Vx32.sf+=vdmpy(Vu32.hf,Vv32.hf)`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vdmpy_sf_hf_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vsf_vdmpyacc_VsfVhfVhf(vx: HvxVector, vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vdmpy_sf_hf_acc(vx, vu, vv)
+}
+
+/// `Vd32.hf=vfmax(Vu32.hf,Vv32.hf)`
+///
+/// Instruction Type: CVI_VX_LATE
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vfmax_hf))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vhf_vfmax_VhfVhf(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vfmax_hf(vu, vv)
+}
+
+/// `Vd32.sf=vfmax(Vu32.sf,Vv32.sf)`
+///
+/// Instruction Type: CVI_VX_LATE
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vfmax_sf))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vsf_vfmax_VsfVsf(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vfmax_sf(vu, vv)
+}
+
+/// `Vd32.hf=vfmin(Vu32.hf,Vv32.hf)`
+///
+/// Instruction Type: CVI_VX_LATE
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vfmin_hf))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vhf_vfmin_VhfVhf(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vfmin_hf(vu, vv)
+}
+
+/// `Vd32.sf=vfmin(Vu32.sf,Vv32.sf)`
+///
+/// Instruction Type: CVI_VX_LATE
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vfmin_sf))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vsf_vfmin_VsfVsf(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vfmin_sf(vu, vv)
+}
+
+/// `Vd32.hf=vfneg(Vu32.hf)`
+///
+/// Instruction Type: CVI_VX_LATE
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vfneg_hf))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vhf_vfneg_Vhf(vu: HvxVector) -> HvxVector {
+    vfneg_hf(vu)
+}
+
+/// `Vd32.sf=vfneg(Vu32.sf)`
+///
+/// Instruction Type: CVI_VX_LATE
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vfneg_sf))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vsf_vfneg_Vsf(vu: HvxVector) -> HvxVector {
+    vfneg_sf(vu)
+}
+
+/// `Vd32.hf=vmax(Vu32.hf,Vv32.hf)`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vmax_hf))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vhf_vmax_VhfVhf(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vmax_hf(vu, vv)
+}
+
+/// `Vd32.sf=vmax(Vu32.sf,Vv32.sf)`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vmax_sf))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vsf_vmax_VsfVsf(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vmax_sf(vu, vv)
+}
+
+/// `Vd32.hf=vmin(Vu32.hf,Vv32.hf)`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vmin_hf))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vhf_vmin_VhfVhf(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vmin_hf(vu, vv)
+}
+
+/// `Vd32.sf=vmin(Vu32.sf,Vv32.sf)`
+///
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vmin_sf))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vsf_vmin_VsfVsf(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vmin_sf(vu, vv)
+}
+
+/// `Vd32.hf=vmpy(Vu32.hf,Vv32.hf)`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vmpy_hf_hf))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vhf_vmpy_VhfVhf(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vmpy_hf_hf(vu, vv)
+}
+
+/// `Vx32.hf+=vmpy(Vu32.hf,Vv32.hf)`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vmpy_hf_hf_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vhf_vmpyacc_VhfVhfVhf(vx: HvxVector, vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vmpy_hf_hf_acc(vx, vu, vv)
+}
+
+/// `Vd32.qf16=vmpy(Vu32.qf16,Vv32.qf16)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vmpy_qf16))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vqf16_vmpy_Vqf16Vqf16(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vmpy_qf16(vu, vv)
+}
+
+/// `Vd32.qf16=vmpy(Vu32.hf,Vv32.hf)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vmpy_qf16_hf))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vqf16_vmpy_VhfVhf(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vmpy_qf16_hf(vu, vv)
+}
+
+/// `Vd32.qf16=vmpy(Vu32.qf16,Vv32.hf)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vmpy_qf16_mix_hf))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vqf16_vmpy_Vqf16Vhf(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vmpy_qf16_mix_hf(vu, vv)
+}
+
+/// `Vd32.qf32=vmpy(Vu32.qf32,Vv32.qf32)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vmpy_qf32))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vqf32_vmpy_Vqf32Vqf32(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vmpy_qf32(vu, vv)
+}
+
+/// `Vdd32.qf32=vmpy(Vu32.hf,Vv32.hf)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vmpy_qf32_hf))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wqf32_vmpy_VhfVhf(vu: HvxVector, vv: HvxVector) -> HvxVectorPair {
+    vmpy_qf32_hf(vu, vv)
+}
+
+/// `Vdd32.qf32=vmpy(Vu32.qf16,Vv32.hf)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vmpy_qf32_mix_hf))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wqf32_vmpy_Vqf16Vhf(vu: HvxVector, vv: HvxVector) -> HvxVectorPair {
+    vmpy_qf32_mix_hf(vu, vv)
+}
+
+/// `Vdd32.qf32=vmpy(Vu32.qf16,Vv32.qf16)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vmpy_qf32_qf16))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wqf32_vmpy_Vqf16Vqf16(vu: HvxVector, vv: HvxVector) -> HvxVectorPair {
+    vmpy_qf32_qf16(vu, vv)
+}
+
+/// `Vd32.qf32=vmpy(Vu32.sf,Vv32.sf)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vmpy_qf32_sf))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vqf32_vmpy_VsfVsf(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vmpy_qf32_sf(vu, vv)
+}
+
+/// `Vdd32.sf=vmpy(Vu32.hf,Vv32.hf)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vmpy_sf_hf))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wsf_vmpy_VhfVhf(vu: HvxVector, vv: HvxVector) -> HvxVectorPair {
+    vmpy_sf_hf(vu, vv)
+}
+
+/// `Vxx32.sf+=vmpy(Vu32.hf,Vv32.hf)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vmpy_sf_hf_acc))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wsf_vmpyacc_WsfVhfVhf(
+    vxx: HvxVectorPair,
+    vu: HvxVector,
+    vv: HvxVector,
+) -> HvxVectorPair {
+    vmpy_sf_hf_acc(vxx, vu, vv)
+}
+
+/// `Vd32.sf=vmpy(Vu32.sf,Vv32.sf)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vmpy_sf_sf))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vsf_vmpy_VsfVsf(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vmpy_sf_sf(vu, vv)
+}
+
+/// `Vd32.qf16=vsub(Vu32.hf,Vv32.hf)`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vsub_hf))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vqf16_vsub_VhfVhf(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vsub_hf(vu, vv)
+}
+
+/// `Vd32.hf=vsub(Vu32.hf,Vv32.hf)`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vsub_hf_hf))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vhf_vsub_VhfVhf(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vsub_hf_hf(vu, vv)
+}
+
+/// `Vd32.qf16=vsub(Vu32.qf16,Vv32.qf16)`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vsub_qf16))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vqf16_vsub_Vqf16Vqf16(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vsub_qf16(vu, vv)
+}
+
+/// `Vd32.qf16=vsub(Vu32.qf16,Vv32.hf)`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vsub_qf16_mix))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vqf16_vsub_Vqf16Vhf(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vsub_qf16_mix(vu, vv)
+}
+
+/// `Vd32.qf32=vsub(Vu32.qf32,Vv32.qf32)`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vsub_qf32))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vqf32_vsub_Vqf32Vqf32(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vsub_qf32(vu, vv)
+}
+
+/// `Vd32.qf32=vsub(Vu32.qf32,Vv32.sf)`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vsub_qf32_mix))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vqf32_vsub_Vqf32Vsf(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vsub_qf32_mix(vu, vv)
+}
+
+/// `Vd32.qf32=vsub(Vu32.sf,Vv32.sf)`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vsub_sf))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vqf32_vsub_VsfVsf(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vsub_sf(vu, vv)
+}
+
+/// `Vdd32.sf=vsub(Vu32.hf,Vv32.hf)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vsub_sf_hf))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Wsf_vsub_VhfVhf(vu: HvxVector, vv: HvxVector) -> HvxVectorPair {
+    vsub_sf_hf(vu, vv)
+}
+
+/// `Vd32.sf=vsub(Vu32.sf,Vv32.sf)`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[cfg_attr(test, assert_instr(vsub_sf_sf))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vsf_vsub_VsfVsf(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vsub_sf_sf(vu, vv)
+}
+
+/// `Vd32.ub=vasr(Vuu32.uh,Vv32.ub):rnd:sat`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv69"))]
+#[cfg_attr(test, assert_instr(vasrvuhubrndsat))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vub_vasr_WuhVub_rnd_sat(vuu: HvxVectorPair, vv: HvxVector) -> HvxVector {
+    vasrvuhubrndsat(vuu, vv)
+}
+
+/// `Vd32.ub=vasr(Vuu32.uh,Vv32.ub):sat`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv69"))]
+#[cfg_attr(test, assert_instr(vasrvuhubsat))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vub_vasr_WuhVub_sat(vuu: HvxVectorPair, vv: HvxVector) -> HvxVector {
+    vasrvuhubsat(vuu, vv)
+}
+
+/// `Vd32.uh=vasr(Vuu32.w,Vv32.uh):rnd:sat`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv69"))]
+#[cfg_attr(test, assert_instr(vasrvwuhrndsat))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vuh_vasr_WwVuh_rnd_sat(vuu: HvxVectorPair, vv: HvxVector) -> HvxVector {
+    vasrvwuhrndsat(vuu, vv)
+}
+
+/// `Vd32.uh=vasr(Vuu32.w,Vv32.uh):sat`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv69"))]
+#[cfg_attr(test, assert_instr(vasrvwuhsat))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vuh_vasr_WwVuh_sat(vuu: HvxVectorPair, vv: HvxVector) -> HvxVector {
+    vasrvwuhsat(vuu, vv)
+}
+
+/// `Vd32.uh=vmpy(Vu32.uh,Vv32.uh):>>16`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv69"))]
+#[cfg_attr(test, assert_instr(vmpyuhvs))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vuh_vmpy_VuhVuh_rs16(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vmpyuhvs(vu, vv)
+}
+
+/// `Vd32.h=Vu32.hf`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv73"))]
+#[cfg_attr(test, assert_instr(vconv_h_hf))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_equals_Vhf(vu: HvxVector) -> HvxVector {
+    vconv_h_hf(vu)
+}
+
+/// `Vd32.hf=Vu32.h`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv73"))]
+#[cfg_attr(test, assert_instr(vconv_hf_h))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vhf_equals_Vh(vu: HvxVector) -> HvxVector {
+    vconv_hf_h(vu)
+}
+
+/// `Vd32.sf=Vu32.w`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv73"))]
+#[cfg_attr(test, assert_instr(vconv_sf_w))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vsf_equals_Vw(vu: HvxVector) -> HvxVector {
+    vconv_sf_w(vu)
+}
+
+/// `Vd32.w=Vu32.sf`
+///
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv73"))]
+#[cfg_attr(test, assert_instr(vconv_w_sf))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_equals_Vsf(vu: HvxVector) -> HvxVector {
+    vconv_w_sf(vu)
+}
+
+/// `Vd32=vgetqfext(Vu32.x,Rt32)`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv79"))]
+#[cfg_attr(test, assert_instr(get_qfext))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_V_vgetqfext_VR(vu: HvxVector, rt: i32) -> HvxVector {
+    get_qfext(vu, rt)
+}
+
+/// `Vd32.x=vsetqfext(Vu32,Rt32)`
+///
+/// Instruction Type: CVI_VX
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv79"))]
+#[cfg_attr(test, assert_instr(set_qfext))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_V_vsetqfext_VR(vu: HvxVector, rt: i32) -> HvxVector {
+    set_qfext(vu, rt)
+}
+
+/// `Vd32.f8=vabs(Vu32.f8)`
+///
+/// Instruction Type: CVI_VX_LATE
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv79"))]
+#[cfg_attr(test, assert_instr(vabs_f8))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_V_vabs_V(vu: HvxVector) -> HvxVector {
+    vabs_f8(vu)
+}
+
+/// `Vdd32.hf=vcvt2(Vu32.b)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv79"))]
+#[cfg_attr(test, assert_instr(vcvt2_hf_b))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Whf_vcvt2_Vb(vu: HvxVector) -> HvxVectorPair {
+    vcvt2_hf_b(vu)
+}
+
+/// `Vdd32.hf=vcvt2(Vu32.ub)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv79"))]
+#[cfg_attr(test, assert_instr(vcvt2_hf_ub))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Whf_vcvt2_Vub(vu: HvxVector) -> HvxVectorPair {
+    vcvt2_hf_ub(vu)
+}
+
+/// `Vdd32.hf=vcvt(Vu32.f8)`
+///
+/// Instruction Type: CVI_VX_DV
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv79"))]
+#[cfg_attr(test, assert_instr(vcvt_hf_f8))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Whf_vcvt_V(vu: HvxVector) -> HvxVectorPair {
+    vcvt_hf_f8(vu)
+}
+
+/// `Vd32.f8=vfmax(Vu32.f8,Vv32.f8)`
+///
+/// Instruction Type: CVI_VX_LATE
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv79"))]
+#[cfg_attr(test, assert_instr(vfmax_f8))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_V_vfmax_VV(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vfmax_f8(vu, vv)
+}
+
+/// `Vd32.f8=vfmin(Vu32.f8,Vv32.f8)`
+///
+/// Instruction Type: CVI_VX_LATE
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv79"))]
+#[cfg_attr(test, assert_instr(vfmin_f8))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_V_vfmin_VV(vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vfmin_f8(vu, vv)
+}
+
+/// `Vd32.f8=vfneg(Vu32.f8)`
+///
+/// Instruction Type: CVI_VX_LATE
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv79"))]
+#[cfg_attr(test, assert_instr(vfneg_f8))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_V_vfneg_V(vu: HvxVector) -> HvxVector {
+    vfneg_f8(vu)
+}
+
+/// `Qd4=and(Qs4,Qt4)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA_DV
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_and_QQ(qs: HvxVectorPred, qt: HvxVectorPred) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(
+        pred_and(
+            vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qs), -1),
+            vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qt), -1),
+        ),
+        -1,
+    ))
+}
+
+/// `Qd4=and(Qs4,!Qt4)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA_DV
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_and_QQn(qs: HvxVectorPred, qt: HvxVectorPred) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(
+        pred_and_n(
+            vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qs), -1),
+            vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qt), -1),
+        ),
+        -1,
+    ))
+}
+
+/// `Qd4=not(Qs4)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_not_Q(qs: HvxVectorPred) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(
+        pred_not(vandvrt(
+            core::mem::transmute::<HvxVectorPred, HvxVector>(qs),
+            -1,
+        )),
+        -1,
+    ))
+}
+
+/// `Qd4=or(Qs4,Qt4)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA_DV
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_or_QQ(qs: HvxVectorPred, qt: HvxVectorPred) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(
+        pred_or(
+            vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qs), -1),
+            vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qt), -1),
+        ),
+        -1,
+    ))
+}
+
+/// `Qd4=or(Qs4,!Qt4)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA_DV
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_or_QQn(qs: HvxVectorPred, qt: HvxVectorPred) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(
+        pred_or_n(
+            vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qs), -1),
+            vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qt), -1),
+        ),
+        -1,
+    ))
+}
+
+/// `Qd4=vsetq(Rt32)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VP
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_vsetq_R(rt: i32) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(pred_scalar2(rt), -1))
+}
+
+/// `Qd4=xor(Qs4,Qt4)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA_DV
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_xor_QQ(qs: HvxVectorPred, qt: HvxVectorPred) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(
+        pred_xor(
+            vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qs), -1),
+            vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qt), -1),
+        ),
+        -1,
+    ))
+}
+
+/// `if (!Qv4) vmem(Rt32+#s4)=Vs32`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VM_ST
+/// Execution Slots: SLOT0
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_vmem_QnRIV(qv: HvxVectorPred, rt: *mut HvxVector, vs: HvxVector) {
+    vS32b_nqpred_ai(
+        vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qv), -1),
+        rt,
+        vs,
+    )
+}
+
+/// `if (!Qv4) vmem(Rt32+#s4):nt=Vs32`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VM_ST
+/// Execution Slots: SLOT0
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_vmem_QnRIV_nt(qv: HvxVectorPred, rt: *mut HvxVector, vs: HvxVector) {
+    vS32b_nt_nqpred_ai(
+        vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qv), -1),
+        rt,
+        vs,
+    )
+}
+
+/// `if (Qv4) vmem(Rt32+#s4):nt=Vs32`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VM_ST
+/// Execution Slots: SLOT0
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_vmem_QRIV_nt(qv: HvxVectorPred, rt: *mut HvxVector, vs: HvxVector) {
+    vS32b_nt_qpred_ai(
+        vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qv), -1),
+        rt,
+        vs,
+    )
+}
+
+/// `if (Qv4) vmem(Rt32+#s4)=Vs32`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VM_ST
+/// Execution Slots: SLOT0
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_vmem_QRIV(qv: HvxVectorPred, rt: *mut HvxVector, vs: HvxVector) {
+    vS32b_qpred_ai(
+        vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qv), -1),
+        rt,
+        vs,
+    )
+}
+
+/// `if (!Qv4) Vx32.b+=Vu32.b`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vb_condacc_QnVbVb(qv: HvxVectorPred, vx: HvxVector, vu: HvxVector) -> HvxVector {
+    vaddbnq(
+        vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qv), -1),
+        vx,
+        vu,
+    )
+}
+
+/// `if (Qv4) Vx32.b+=Vu32.b`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vb_condacc_QVbVb(qv: HvxVectorPred, vx: HvxVector, vu: HvxVector) -> HvxVector {
+    vaddbq(
+        vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qv), -1),
+        vx,
+        vu,
+    )
+}
+
+/// `if (!Qv4) Vx32.h+=Vu32.h`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_condacc_QnVhVh(qv: HvxVectorPred, vx: HvxVector, vu: HvxVector) -> HvxVector {
+    vaddhnq(
+        vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qv), -1),
+        vx,
+        vu,
+    )
+}
+
+/// `if (Qv4) Vx32.h+=Vu32.h`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_condacc_QVhVh(qv: HvxVectorPred, vx: HvxVector, vu: HvxVector) -> HvxVector {
+    vaddhq(
+        vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qv), -1),
+        vx,
+        vu,
+    )
+}
+
+/// `if (!Qv4) Vx32.w+=Vu32.w`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_condacc_QnVwVw(qv: HvxVectorPred, vx: HvxVector, vu: HvxVector) -> HvxVector {
+    vaddwnq(
+        vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qv), -1),
+        vx,
+        vu,
+    )
+}
+
+/// `if (Qv4) Vx32.w+=Vu32.w`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_condacc_QVwVw(qv: HvxVectorPred, vx: HvxVector, vu: HvxVector) -> HvxVector {
+    vaddwq(
+        vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qv), -1),
+        vx,
+        vu,
+    )
+}
+
+/// `Vd32=vand(Qu4,Rt32)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VX_LATE
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_V_vand_QR(qu: HvxVectorPred, rt: i32) -> HvxVector {
+    vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qu), rt)
+}
+
+/// `Vx32|=vand(Qu4,Rt32)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VX_LATE
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_V_vandor_VQR(vx: HvxVector, qu: HvxVectorPred, rt: i32) -> HvxVector {
+    vandvrt_acc(vx, core::mem::transmute::<HvxVectorPred, HvxVector>(qu), rt)
+}
+
+/// `Qd4=vand(Vu32,Rt32)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VX_LATE
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_vand_VR(vu: HvxVector, rt: i32) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(vu, rt))
+}
+
+/// `Qx4|=vand(Vu32,Rt32)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VX_LATE
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_vandor_QVR(qx: HvxVectorPred, vu: HvxVector, rt: i32) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt_acc(
+        core::mem::transmute::<HvxVectorPred, HvxVector>(qx),
+        vu,
+        rt,
+    ))
+}
+
+/// `Qd4=vcmp.eq(Vu32.b,Vv32.b)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_vcmp_eq_VbVb(vu: HvxVector, vv: HvxVector) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(veqb(vu, vv), -1))
+}
+
+/// `Qx4&=vcmp.eq(Vu32.b,Vv32.b)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_vcmp_eqand_QVbVb(
+    qx: HvxVectorPred,
+    vu: HvxVector,
+    vv: HvxVector,
+) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(
+        veqb_and(
+            vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qx), -1),
+            vu,
+            vv,
+        ),
+        -1,
+    ))
+}
+
+/// `Qx4|=vcmp.eq(Vu32.b,Vv32.b)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_vcmp_eqor_QVbVb(
+    qx: HvxVectorPred,
+    vu: HvxVector,
+    vv: HvxVector,
+) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(
+        veqb_or(
+            vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qx), -1),
+            vu,
+            vv,
+        ),
+        -1,
+    ))
+}
+
+/// `Qx4^=vcmp.eq(Vu32.b,Vv32.b)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_vcmp_eqxacc_QVbVb(
+    qx: HvxVectorPred,
+    vu: HvxVector,
+    vv: HvxVector,
+) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(
+        veqb_xor(
+            vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qx), -1),
+            vu,
+            vv,
+        ),
+        -1,
+    ))
+}
+
+/// `Qd4=vcmp.eq(Vu32.h,Vv32.h)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_vcmp_eq_VhVh(vu: HvxVector, vv: HvxVector) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(veqh(vu, vv), -1))
+}
+
+/// `Qx4&=vcmp.eq(Vu32.h,Vv32.h)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_vcmp_eqand_QVhVh(
+    qx: HvxVectorPred,
+    vu: HvxVector,
+    vv: HvxVector,
+) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(
+        veqh_and(
+            vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qx), -1),
+            vu,
+            vv,
+        ),
+        -1,
+    ))
+}
+
+/// `Qx4|=vcmp.eq(Vu32.h,Vv32.h)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_vcmp_eqor_QVhVh(
+    qx: HvxVectorPred,
+    vu: HvxVector,
+    vv: HvxVector,
+) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(
+        veqh_or(
+            vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qx), -1),
+            vu,
+            vv,
+        ),
+        -1,
+    ))
+}
+
+/// `Qx4^=vcmp.eq(Vu32.h,Vv32.h)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_vcmp_eqxacc_QVhVh(
+    qx: HvxVectorPred,
+    vu: HvxVector,
+    vv: HvxVector,
+) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(
+        veqh_xor(
+            vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qx), -1),
+            vu,
+            vv,
+        ),
+        -1,
+    ))
+}
+
+/// `Qd4=vcmp.eq(Vu32.w,Vv32.w)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_vcmp_eq_VwVw(vu: HvxVector, vv: HvxVector) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(veqw(vu, vv), -1))
+}
+
+/// `Qx4&=vcmp.eq(Vu32.w,Vv32.w)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_vcmp_eqand_QVwVw(
+    qx: HvxVectorPred,
+    vu: HvxVector,
+    vv: HvxVector,
+) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(
+        veqw_and(
+            vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qx), -1),
+            vu,
+            vv,
+        ),
+        -1,
+    ))
+}
+
+/// `Qx4|=vcmp.eq(Vu32.w,Vv32.w)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_vcmp_eqor_QVwVw(
+    qx: HvxVectorPred,
+    vu: HvxVector,
+    vv: HvxVector,
+) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(
+        veqw_or(
+            vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qx), -1),
+            vu,
+            vv,
+        ),
+        -1,
+    ))
+}
+
+/// `Qx4^=vcmp.eq(Vu32.w,Vv32.w)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_vcmp_eqxacc_QVwVw(
+    qx: HvxVectorPred,
+    vu: HvxVector,
+    vv: HvxVector,
+) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(
+        veqw_xor(
+            vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qx), -1),
+            vu,
+            vv,
+        ),
+        -1,
+    ))
+}
+
+/// `Qd4=vcmp.gt(Vu32.b,Vv32.b)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_vcmp_gt_VbVb(vu: HvxVector, vv: HvxVector) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(vgtb(vu, vv), -1))
+}
+
+/// `Qx4&=vcmp.gt(Vu32.b,Vv32.b)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_vcmp_gtand_QVbVb(
+    qx: HvxVectorPred,
+    vu: HvxVector,
+    vv: HvxVector,
+) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(
+        vgtb_and(
+            vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qx), -1),
+            vu,
+            vv,
+        ),
+        -1,
+    ))
+}
+
+/// `Qx4|=vcmp.gt(Vu32.b,Vv32.b)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_vcmp_gtor_QVbVb(
+    qx: HvxVectorPred,
+    vu: HvxVector,
+    vv: HvxVector,
+) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(
+        vgtb_or(
+            vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qx), -1),
+            vu,
+            vv,
+        ),
+        -1,
+    ))
+}
+
+/// `Qx4^=vcmp.gt(Vu32.b,Vv32.b)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_vcmp_gtxacc_QVbVb(
+    qx: HvxVectorPred,
+    vu: HvxVector,
+    vv: HvxVector,
+) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(
+        vgtb_xor(
+            vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qx), -1),
+            vu,
+            vv,
+        ),
+        -1,
+    ))
+}
+
+/// `Qd4=vcmp.gt(Vu32.h,Vv32.h)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_vcmp_gt_VhVh(vu: HvxVector, vv: HvxVector) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(vgth(vu, vv), -1))
+}
+
+/// `Qx4&=vcmp.gt(Vu32.h,Vv32.h)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_vcmp_gtand_QVhVh(
+    qx: HvxVectorPred,
+    vu: HvxVector,
+    vv: HvxVector,
+) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(
+        vgth_and(
+            vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qx), -1),
+            vu,
+            vv,
+        ),
+        -1,
+    ))
+}
+
+/// `Qx4|=vcmp.gt(Vu32.h,Vv32.h)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_vcmp_gtor_QVhVh(
+    qx: HvxVectorPred,
+    vu: HvxVector,
+    vv: HvxVector,
+) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(
+        vgth_or(
+            vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qx), -1),
+            vu,
+            vv,
+        ),
+        -1,
+    ))
+}
+
+/// `Qx4^=vcmp.gt(Vu32.h,Vv32.h)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_vcmp_gtxacc_QVhVh(
+    qx: HvxVectorPred,
+    vu: HvxVector,
+    vv: HvxVector,
+) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(
+        vgth_xor(
+            vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qx), -1),
+            vu,
+            vv,
+        ),
+        -1,
+    ))
+}
+
+/// `Qd4=vcmp.gt(Vu32.ub,Vv32.ub)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_vcmp_gt_VubVub(vu: HvxVector, vv: HvxVector) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(vgtub(vu, vv), -1))
+}
+
+/// `Qx4&=vcmp.gt(Vu32.ub,Vv32.ub)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_vcmp_gtand_QVubVub(
+    qx: HvxVectorPred,
+    vu: HvxVector,
+    vv: HvxVector,
+) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(
+        vgtub_and(
+            vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qx), -1),
+            vu,
+            vv,
+        ),
+        -1,
+    ))
+}
+
+/// `Qx4|=vcmp.gt(Vu32.ub,Vv32.ub)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_vcmp_gtor_QVubVub(
+    qx: HvxVectorPred,
+    vu: HvxVector,
+    vv: HvxVector,
+) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(
+        vgtub_or(
+            vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qx), -1),
+            vu,
+            vv,
+        ),
+        -1,
+    ))
+}
+
+/// `Qx4^=vcmp.gt(Vu32.ub,Vv32.ub)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_vcmp_gtxacc_QVubVub(
+    qx: HvxVectorPred,
+    vu: HvxVector,
+    vv: HvxVector,
+) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(
+        vgtub_xor(
+            vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qx), -1),
+            vu,
+            vv,
+        ),
+        -1,
+    ))
+}
+
+/// `Qd4=vcmp.gt(Vu32.uh,Vv32.uh)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_vcmp_gt_VuhVuh(vu: HvxVector, vv: HvxVector) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(vgtuh(vu, vv), -1))
+}
+
+/// `Qx4&=vcmp.gt(Vu32.uh,Vv32.uh)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_vcmp_gtand_QVuhVuh(
+    qx: HvxVectorPred,
+    vu: HvxVector,
+    vv: HvxVector,
+) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(
+        vgtuh_and(
+            vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qx), -1),
+            vu,
+            vv,
+        ),
+        -1,
+    ))
+}
+
+/// `Qx4|=vcmp.gt(Vu32.uh,Vv32.uh)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_vcmp_gtor_QVuhVuh(
+    qx: HvxVectorPred,
+    vu: HvxVector,
+    vv: HvxVector,
+) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(
+        vgtuh_or(
+            vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qx), -1),
+            vu,
+            vv,
+        ),
+        -1,
+    ))
+}
+
+/// `Qx4^=vcmp.gt(Vu32.uh,Vv32.uh)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_vcmp_gtxacc_QVuhVuh(
+    qx: HvxVectorPred,
+    vu: HvxVector,
+    vv: HvxVector,
+) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(
+        vgtuh_xor(
+            vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qx), -1),
+            vu,
+            vv,
+        ),
+        -1,
+    ))
+}
+
+/// `Qd4=vcmp.gt(Vu32.uw,Vv32.uw)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_vcmp_gt_VuwVuw(vu: HvxVector, vv: HvxVector) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(vgtuw(vu, vv), -1))
+}
+
+/// `Qx4&=vcmp.gt(Vu32.uw,Vv32.uw)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_vcmp_gtand_QVuwVuw(
+    qx: HvxVectorPred,
+    vu: HvxVector,
+    vv: HvxVector,
+) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(
+        vgtuw_and(
+            vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qx), -1),
+            vu,
+            vv,
+        ),
+        -1,
+    ))
+}
+
+/// `Qx4|=vcmp.gt(Vu32.uw,Vv32.uw)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_vcmp_gtor_QVuwVuw(
+    qx: HvxVectorPred,
+    vu: HvxVector,
+    vv: HvxVector,
+) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(
+        vgtuw_or(
+            vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qx), -1),
+            vu,
+            vv,
+        ),
+        -1,
+    ))
+}
+
+/// `Qx4^=vcmp.gt(Vu32.uw,Vv32.uw)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_vcmp_gtxacc_QVuwVuw(
+    qx: HvxVectorPred,
+    vu: HvxVector,
+    vv: HvxVector,
+) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(
+        vgtuw_xor(
+            vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qx), -1),
+            vu,
+            vv,
+        ),
+        -1,
+    ))
+}
+
+/// `Qd4=vcmp.gt(Vu32.w,Vv32.w)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_vcmp_gt_VwVw(vu: HvxVector, vv: HvxVector) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(vgtw(vu, vv), -1))
+}
+
+/// `Qx4&=vcmp.gt(Vu32.w,Vv32.w)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_vcmp_gtand_QVwVw(
+    qx: HvxVectorPred,
+    vu: HvxVector,
+    vv: HvxVector,
+) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(
+        vgtw_and(
+            vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qx), -1),
+            vu,
+            vv,
+        ),
+        -1,
+    ))
+}
+
+/// `Qx4|=vcmp.gt(Vu32.w,Vv32.w)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_vcmp_gtor_QVwVw(
+    qx: HvxVectorPred,
+    vu: HvxVector,
+    vv: HvxVector,
+) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(
+        vgtw_or(
+            vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qx), -1),
+            vu,
+            vv,
+        ),
+        -1,
+    ))
+}
+
+/// `Qx4^=vcmp.gt(Vu32.w,Vv32.w)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_vcmp_gtxacc_QVwVw(
+    qx: HvxVectorPred,
+    vu: HvxVector,
+    vv: HvxVector,
+) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(
+        vgtw_xor(
+            vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qx), -1),
+            vu,
+            vv,
+        ),
+        -1,
+    ))
+}
+
+/// `Vd32=vmux(Qt4,Vu32,Vv32)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_V_vmux_QVV(qt: HvxVectorPred, vu: HvxVector, vv: HvxVector) -> HvxVector {
+    vmux(
+        vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qt), -1),
+        vu,
+        vv,
+    )
+}
+
+/// `if (!Qv4) Vx32.b-=Vu32.b`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vb_condnac_QnVbVb(qv: HvxVectorPred, vx: HvxVector, vu: HvxVector) -> HvxVector {
+    vsubbnq(
+        vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qv), -1),
+        vx,
+        vu,
+    )
+}
+
+/// `if (Qv4) Vx32.b-=Vu32.b`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vb_condnac_QVbVb(qv: HvxVectorPred, vx: HvxVector, vu: HvxVector) -> HvxVector {
+    vsubbq(
+        vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qv), -1),
+        vx,
+        vu,
+    )
+}
+
+/// `if (!Qv4) Vx32.h-=Vu32.h`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_condnac_QnVhVh(qv: HvxVectorPred, vx: HvxVector, vu: HvxVector) -> HvxVector {
+    vsubhnq(
+        vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qv), -1),
+        vx,
+        vu,
+    )
+}
+
+/// `if (Qv4) Vx32.h-=Vu32.h`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_condnac_QVhVh(qv: HvxVectorPred, vx: HvxVector, vu: HvxVector) -> HvxVector {
+    vsubhq(
+        vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qv), -1),
+        vx,
+        vu,
+    )
+}
+
+/// `if (!Qv4) Vx32.w-=Vu32.w`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_condnac_QnVwVw(qv: HvxVectorPred, vx: HvxVector, vu: HvxVector) -> HvxVector {
+    vsubwnq(
+        vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qv), -1),
+        vx,
+        vu,
+    )
+}
+
+/// `if (Qv4) Vx32.w-=Vu32.w`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_condnac_QVwVw(qv: HvxVectorPred, vx: HvxVector, vu: HvxVector) -> HvxVector {
+    vsubwq(
+        vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qv), -1),
+        vx,
+        vu,
+    )
+}
+
+/// `Vdd32=vswap(Qt4,Vu32,Vv32)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA_DV
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv60"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_W_vswap_QVV(qt: HvxVectorPred, vu: HvxVector, vv: HvxVector) -> HvxVectorPair {
+    vswap(
+        vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qt), -1),
+        vu,
+        vv,
+    )
+}
+
+/// `Qd4=vsetq2(Rt32)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VP
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv62"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_vsetq2_R(rt: i32) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(pred_scalar2v2(rt), -1))
+}
+
+/// `Qd4.b=vshuffe(Qs4.h,Qt4.h)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA_DV
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv62"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Qb_vshuffe_QhQh(qs: HvxVectorPred, qt: HvxVectorPred) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(
+        shuffeqh(
+            vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qs), -1),
+            vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qt), -1),
+        ),
+        -1,
+    ))
+}
+
+/// `Qd4.h=vshuffe(Qs4.w,Qt4.w)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA_DV
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv62"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Qh_vshuffe_QwQw(qs: HvxVectorPred, qt: HvxVectorPred) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(
+        shuffeqw(
+            vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qs), -1),
+            vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qt), -1),
+        ),
+        -1,
+    ))
+}
+
+/// `Vd32=vand(!Qu4,Rt32)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VX_LATE
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv62"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_V_vand_QnR(qu: HvxVectorPred, rt: i32) -> HvxVector {
+    vandnqrt(
+        vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qu), -1),
+        rt,
+    )
+}
+
+/// `Vx32|=vand(!Qu4,Rt32)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VX_LATE
+/// Execution Slots: SLOT23
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv62"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_V_vandor_VQnR(vx: HvxVector, qu: HvxVectorPred, rt: i32) -> HvxVector {
+    vandnqrt_acc(
+        vx,
+        vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qu), -1),
+        rt,
+    )
+}
+
+/// `Vd32=vand(!Qv4,Vu32)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv62"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_V_vand_QnV(qv: HvxVectorPred, vu: HvxVector) -> HvxVector {
+    vandvnqv(
+        vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qv), -1),
+        vu,
+    )
+}
+
+/// `Vd32=vand(Qv4,Vu32)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv62"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_V_vand_QV(qv: HvxVectorPred, vu: HvxVector) -> HvxVector {
+    vandvqv(
+        vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qv), -1),
+        vu,
+    )
+}
+
+/// `if (Qs4) vtmp.h=vgather(Rt32,Mu2,Vv32.h).h`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_GATHER
+/// Execution Slots: SLOT01
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv65"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_vgather_AQRMVh(
+    rs: *mut HvxVector,
+    qs: HvxVectorPred,
+    rt: i32,
+    mu: i32,
+    vv: HvxVector,
+) {
+    vgathermhq(
+        rs,
+        vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qs), -1),
+        rt,
+        mu,
+        vv,
+    )
+}
+
+/// `if (Qs4) vtmp.h=vgather(Rt32,Mu2,Vvv32.w).h`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_GATHER_DV
+/// Execution Slots: SLOT01
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv65"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_vgather_AQRMWw(
+    rs: *mut HvxVector,
+    qs: HvxVectorPred,
+    rt: i32,
+    mu: i32,
+    vvv: HvxVectorPair,
+) {
+    vgathermhwq(
+        rs,
+        vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qs), -1),
+        rt,
+        mu,
+        vvv,
+    )
+}
+
+/// `if (Qs4) vtmp.w=vgather(Rt32,Mu2,Vv32.w).w`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_GATHER
+/// Execution Slots: SLOT01
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv65"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_vgather_AQRMVw(
+    rs: *mut HvxVector,
+    qs: HvxVectorPred,
+    rt: i32,
+    mu: i32,
+    vv: HvxVector,
+) {
+    vgathermwq(
+        rs,
+        vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qs), -1),
+        rt,
+        mu,
+        vv,
+    )
+}
+
+/// `Vd32.b=prefixsum(Qv4)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv65"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vb_prefixsum_Q(qv: HvxVectorPred) -> HvxVector {
+    vprefixqb(vandvrt(
+        core::mem::transmute::<HvxVectorPred, HvxVector>(qv),
+        -1,
+    ))
+}
+
+/// `Vd32.h=prefixsum(Qv4)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv65"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vh_prefixsum_Q(qv: HvxVectorPred) -> HvxVector {
+    vprefixqh(vandvrt(
+        core::mem::transmute::<HvxVectorPred, HvxVector>(qv),
+        -1,
+    ))
+}
+
+/// `Vd32.w=prefixsum(Qv4)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VS
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv65"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_prefixsum_Q(qv: HvxVectorPred) -> HvxVector {
+    vprefixqw(vandvrt(
+        core::mem::transmute::<HvxVectorPred, HvxVector>(qv),
+        -1,
+    ))
+}
+
+/// `if (Qs4) vscatter(Rt32,Mu2,Vv32.h).h=Vw32`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_SCATTER
+/// Execution Slots: SLOT0
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv65"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_vscatter_QRMVhV(
+    qs: HvxVectorPred,
+    rt: i32,
+    mu: i32,
+    vv: HvxVector,
+    vw: HvxVector,
+) {
+    vscattermhq(
+        vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qs), -1),
+        rt,
+        mu,
+        vv,
+        vw,
+    )
+}
+
+/// `if (Qs4) vscatter(Rt32,Mu2,Vvv32.w).h=Vw32`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_SCATTER_DV
+/// Execution Slots: SLOT0
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv65"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_vscatter_QRMWwV(
+    qs: HvxVectorPred,
+    rt: i32,
+    mu: i32,
+    vvv: HvxVectorPair,
+    vw: HvxVector,
+) {
+    vscattermhwq(
+        vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qs), -1),
+        rt,
+        mu,
+        vvv,
+        vw,
+    )
+}
+
+/// `if (Qs4) vscatter(Rt32,Mu2,Vv32.w).w=Vw32`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_SCATTER
+/// Execution Slots: SLOT0
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv65"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_vscatter_QRMVwV(
+    qs: HvxVectorPred,
+    rt: i32,
+    mu: i32,
+    vv: HvxVector,
+    vw: HvxVector,
+) {
+    vscattermwq(
+        vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qs), -1),
+        rt,
+        mu,
+        vv,
+        vw,
+    )
+}
+
+/// `Vd32.w=vadd(Vu32.w,Vv32.w,Qs4):carry:sat`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv66"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Vw_vadd_VwVwQ_carry_sat(
+    vu: HvxVector,
+    vv: HvxVector,
+    qs: HvxVectorPred,
+) -> HvxVector {
+    vaddcarrysat(
+        vu,
+        vv,
+        vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qs), -1),
+    )
+}
+
+/// `Qd4=vcmp.gt(Vu32.hf,Vv32.hf)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_vcmp_gt_VhfVhf(vu: HvxVector, vv: HvxVector) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(vgthf(vu, vv), -1))
+}
+
+/// `Qx4&=vcmp.gt(Vu32.hf,Vv32.hf)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_vcmp_gtand_QVhfVhf(
+    qx: HvxVectorPred,
+    vu: HvxVector,
+    vv: HvxVector,
+) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(
+        vgthf_and(
+            vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qx), -1),
+            vu,
+            vv,
+        ),
+        -1,
+    ))
+}
+
+/// `Qx4|=vcmp.gt(Vu32.hf,Vv32.hf)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_vcmp_gtor_QVhfVhf(
+    qx: HvxVectorPred,
+    vu: HvxVector,
+    vv: HvxVector,
+) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(
+        vgthf_or(
+            vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qx), -1),
+            vu,
+            vv,
+        ),
+        -1,
+    ))
+}
+
+/// `Qx4^=vcmp.gt(Vu32.hf,Vv32.hf)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_vcmp_gtxacc_QVhfVhf(
+    qx: HvxVectorPred,
+    vu: HvxVector,
+    vv: HvxVector,
+) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(
+        vgthf_xor(
+            vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qx), -1),
+            vu,
+            vv,
+        ),
+        -1,
+    ))
+}
+
+/// `Qd4=vcmp.gt(Vu32.sf,Vv32.sf)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_vcmp_gt_VsfVsf(vu: HvxVector, vv: HvxVector) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(vgtsf(vu, vv), -1))
+}
+
+/// `Qx4&=vcmp.gt(Vu32.sf,Vv32.sf)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_vcmp_gtand_QVsfVsf(
+    qx: HvxVectorPred,
+    vu: HvxVector,
+    vv: HvxVector,
+) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(
+        vgtsf_and(
+            vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qx), -1),
+            vu,
+            vv,
+        ),
+        -1,
+    ))
+}
+
+/// `Qx4|=vcmp.gt(Vu32.sf,Vv32.sf)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_vcmp_gtor_QVsfVsf(
+    qx: HvxVectorPred,
+    vu: HvxVector,
+    vv: HvxVector,
+) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(
+        vgtsf_or(
+            vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qx), -1),
+            vu,
+            vv,
+        ),
+        -1,
+    ))
+}
+
+/// `Qx4^=vcmp.gt(Vu32.sf,Vv32.sf)`
+///
+/// This is a compound operation composed of multiple HVX instructions.
+/// Instruction Type: CVI_VA
+/// Execution Slots: SLOT0123
+#[inline]
+#[cfg_attr(target_arch = "hexagon", target_feature(enable = "hvxv68"))]
+#[unstable(feature = "stdarch_hexagon", issue = "151523")]
+pub unsafe fn Q6_Q_vcmp_gtxacc_QVsfVsf(
+    qx: HvxVectorPred,
+    vu: HvxVector,
+    vv: HvxVector,
+) -> HvxVectorPred {
+    core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(
+        vgtsf_xor(
+            vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qx), -1),
+            vu,
+            vv,
+        ),
+        -1,
+    ))
+}
diff --git a/crates/core_arch/src/lib.rs b/crates/core_arch/src/lib.rs
index 26a9cb5899..b9d63b4497 100644
--- a/crates/core_arch/src/lib.rs
+++ b/crates/core_arch/src/lib.rs
@@ -22,8 +22,8 @@
     arm_target_feature,
     mips_target_feature,
     powerpc_target_feature,
-    s390x_target_feature,
     loongarch_target_feature,
+    hexagon_target_feature,
     wasm_target_feature,
     abi_unadjusted,
     rtm_target_feature,
@@ -33,8 +33,14 @@
     x86_amx_intrinsics,
     f16,
     aarch64_unstable_target_feature,
-    bigint_helper_methods,
-    funnel_shifts
+    funnel_shifts,
+    avx10_target_feature,
+    const_trait_impl,
+    const_cmp,
+    const_eval_select,
+    maybe_uninit_as_bytes,
+    movrs_target_feature,
+    min_adt_const_params
 )]
 #![cfg_attr(test, feature(test, abi_vectorcall, stdarch_internal))]
 #![deny(clippy::missing_inline_in_public_items)]
@@ -66,8 +72,8 @@
     test,
     feature(
         stdarch_arm_feature_detection,
+        stdarch_mips_feature_detection,
         stdarch_powerpc_feature_detection,
-        stdarch_s390x_feature_detection
     )
 )]
 
@@ -88,4 +94,4 @@ pub mod arch {
 }
 
 #[allow(unused_imports)]
-use core::{array, convert, ffi, fmt, hint, intrinsics, marker, mem, ops, ptr, sync};
+use core::{array, cmp, convert, ffi, fmt, hint, intrinsics, marker, mem, ops, ptr, sync};
diff --git a/crates/core_arch/src/loongarch32/mod.rs b/crates/core_arch/src/loongarch32/mod.rs
index 4e3f3d2718..6cc1116113 100644
--- a/crates/core_arch/src/loongarch32/mod.rs
+++ b/crates/core_arch/src/loongarch32/mod.rs
@@ -15,7 +15,7 @@ unsafe extern "unadjusted" {
 }
 
 /// Generates the cache operation instruction
-#[inline]
+#[inline(always)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
 pub unsafe fn cacop<const IMM5: i32, const IMM_S12: i32>(b: i32) {
     static_assert_uimm_bits!(IMM5, 5);
@@ -24,7 +24,7 @@ pub unsafe fn cacop<const IMM5: i32, const IMM_S12: i32>(b: i32) {
 }
 
 /// Reads the CSR
-#[inline]
+#[inline(always)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
 pub unsafe fn csrrd<const IMM14: i32>() -> i32 {
     static_assert_uimm_bits!(IMM14, 14);
@@ -32,7 +32,7 @@ pub unsafe fn csrrd<const IMM14: i32>() -> i32 {
 }
 
 /// Writes the CSR
-#[inline]
+#[inline(always)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
 pub unsafe fn csrwr<const IMM14: i32>(a: i32) -> i32 {
     static_assert_uimm_bits!(IMM14, 14);
@@ -40,7 +40,7 @@ pub unsafe fn csrwr<const IMM14: i32>(a: i32) -> i32 {
 }
 
 /// Exchanges the CSR
-#[inline]
+#[inline(always)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
 pub unsafe fn csrxchg<const IMM14: i32>(a: i32, b: i32) -> i32 {
     static_assert_uimm_bits!(IMM14, 14);
diff --git a/crates/core_arch/src/loongarch64/lasx/generated.rs b/crates/core_arch/src/loongarch64/lasx/generated.rs
index cda0ebec67..f8667887c7 100644
--- a/crates/core_arch/src/loongarch64/lasx/generated.rs
+++ b/crates/core_arch/src/loongarch64/lasx/generated.rs
@@ -7,42 +7,10 @@
 // ```
 
 use crate::mem::transmute;
-use super::types::*;
+use super::super::*;
 
 #[allow(improper_ctypes)]
 unsafe extern "unadjusted" {
-    #[link_name = "llvm.loongarch.lasx.xvsll.b"]
-    fn __lasx_xvsll_b(a: __v32i8, b: __v32i8) -> __v32i8;
-    #[link_name = "llvm.loongarch.lasx.xvsll.h"]
-    fn __lasx_xvsll_h(a: __v16i16, b: __v16i16) -> __v16i16;
-    #[link_name = "llvm.loongarch.lasx.xvsll.w"]
-    fn __lasx_xvsll_w(a: __v8i32, b: __v8i32) -> __v8i32;
-    #[link_name = "llvm.loongarch.lasx.xvsll.d"]
-    fn __lasx_xvsll_d(a: __v4i64, b: __v4i64) -> __v4i64;
-    #[link_name = "llvm.loongarch.lasx.xvslli.b"]
-    fn __lasx_xvslli_b(a: __v32i8, b: u32) -> __v32i8;
-    #[link_name = "llvm.loongarch.lasx.xvslli.h"]
-    fn __lasx_xvslli_h(a: __v16i16, b: u32) -> __v16i16;
-    #[link_name = "llvm.loongarch.lasx.xvslli.w"]
-    fn __lasx_xvslli_w(a: __v8i32, b: u32) -> __v8i32;
-    #[link_name = "llvm.loongarch.lasx.xvslli.d"]
-    fn __lasx_xvslli_d(a: __v4i64, b: u32) -> __v4i64;
-    #[link_name = "llvm.loongarch.lasx.xvsra.b"]
-    fn __lasx_xvsra_b(a: __v32i8, b: __v32i8) -> __v32i8;
-    #[link_name = "llvm.loongarch.lasx.xvsra.h"]
-    fn __lasx_xvsra_h(a: __v16i16, b: __v16i16) -> __v16i16;
-    #[link_name = "llvm.loongarch.lasx.xvsra.w"]
-    fn __lasx_xvsra_w(a: __v8i32, b: __v8i32) -> __v8i32;
-    #[link_name = "llvm.loongarch.lasx.xvsra.d"]
-    fn __lasx_xvsra_d(a: __v4i64, b: __v4i64) -> __v4i64;
-    #[link_name = "llvm.loongarch.lasx.xvsrai.b"]
-    fn __lasx_xvsrai_b(a: __v32i8, b: u32) -> __v32i8;
-    #[link_name = "llvm.loongarch.lasx.xvsrai.h"]
-    fn __lasx_xvsrai_h(a: __v16i16, b: u32) -> __v16i16;
-    #[link_name = "llvm.loongarch.lasx.xvsrai.w"]
-    fn __lasx_xvsrai_w(a: __v8i32, b: u32) -> __v8i32;
-    #[link_name = "llvm.loongarch.lasx.xvsrai.d"]
-    fn __lasx_xvsrai_d(a: __v4i64, b: u32) -> __v4i64;
     #[link_name = "llvm.loongarch.lasx.xvsrar.b"]
     fn __lasx_xvsrar_b(a: __v32i8, b: __v32i8) -> __v32i8;
     #[link_name = "llvm.loongarch.lasx.xvsrar.h"]
@@ -59,22 +27,6 @@ unsafe extern "unadjusted" {
     fn __lasx_xvsrari_w(a: __v8i32, b: u32) -> __v8i32;
     #[link_name = "llvm.loongarch.lasx.xvsrari.d"]
     fn __lasx_xvsrari_d(a: __v4i64, b: u32) -> __v4i64;
-    #[link_name = "llvm.loongarch.lasx.xvsrl.b"]
-    fn __lasx_xvsrl_b(a: __v32i8, b: __v32i8) -> __v32i8;
-    #[link_name = "llvm.loongarch.lasx.xvsrl.h"]
-    fn __lasx_xvsrl_h(a: __v16i16, b: __v16i16) -> __v16i16;
-    #[link_name = "llvm.loongarch.lasx.xvsrl.w"]
-    fn __lasx_xvsrl_w(a: __v8i32, b: __v8i32) -> __v8i32;
-    #[link_name = "llvm.loongarch.lasx.xvsrl.d"]
-    fn __lasx_xvsrl_d(a: __v4i64, b: __v4i64) -> __v4i64;
-    #[link_name = "llvm.loongarch.lasx.xvsrli.b"]
-    fn __lasx_xvsrli_b(a: __v32i8, b: u32) -> __v32i8;
-    #[link_name = "llvm.loongarch.lasx.xvsrli.h"]
-    fn __lasx_xvsrli_h(a: __v16i16, b: u32) -> __v16i16;
-    #[link_name = "llvm.loongarch.lasx.xvsrli.w"]
-    fn __lasx_xvsrli_w(a: __v8i32, b: u32) -> __v8i32;
-    #[link_name = "llvm.loongarch.lasx.xvsrli.d"]
-    fn __lasx_xvsrli_d(a: __v4i64, b: u32) -> __v4i64;
     #[link_name = "llvm.loongarch.lasx.xvsrlr.b"]
     fn __lasx_xvsrlr_b(a: __v32i8, b: __v32i8) -> __v32i8;
     #[link_name = "llvm.loongarch.lasx.xvsrlr.h"]
@@ -91,14 +43,6 @@ unsafe extern "unadjusted" {
     fn __lasx_xvsrlri_w(a: __v8i32, b: u32) -> __v8i32;
     #[link_name = "llvm.loongarch.lasx.xvsrlri.d"]
     fn __lasx_xvsrlri_d(a: __v4i64, b: u32) -> __v4i64;
-    #[link_name = "llvm.loongarch.lasx.xvbitclr.b"]
-    fn __lasx_xvbitclr_b(a: __v32u8, b: __v32u8) -> __v32u8;
-    #[link_name = "llvm.loongarch.lasx.xvbitclr.h"]
-    fn __lasx_xvbitclr_h(a: __v16u16, b: __v16u16) -> __v16u16;
-    #[link_name = "llvm.loongarch.lasx.xvbitclr.w"]
-    fn __lasx_xvbitclr_w(a: __v8u32, b: __v8u32) -> __v8u32;
-    #[link_name = "llvm.loongarch.lasx.xvbitclr.d"]
-    fn __lasx_xvbitclr_d(a: __v4u64, b: __v4u64) -> __v4u64;
     #[link_name = "llvm.loongarch.lasx.xvbitclri.b"]
     fn __lasx_xvbitclri_b(a: __v32u8, b: u32) -> __v32u8;
     #[link_name = "llvm.loongarch.lasx.xvbitclri.h"]
@@ -107,14 +51,6 @@ unsafe extern "unadjusted" {
     fn __lasx_xvbitclri_w(a: __v8u32, b: u32) -> __v8u32;
     #[link_name = "llvm.loongarch.lasx.xvbitclri.d"]
     fn __lasx_xvbitclri_d(a: __v4u64, b: u32) -> __v4u64;
-    #[link_name = "llvm.loongarch.lasx.xvbitset.b"]
-    fn __lasx_xvbitset_b(a: __v32u8, b: __v32u8) -> __v32u8;
-    #[link_name = "llvm.loongarch.lasx.xvbitset.h"]
-    fn __lasx_xvbitset_h(a: __v16u16, b: __v16u16) -> __v16u16;
-    #[link_name = "llvm.loongarch.lasx.xvbitset.w"]
-    fn __lasx_xvbitset_w(a: __v8u32, b: __v8u32) -> __v8u32;
-    #[link_name = "llvm.loongarch.lasx.xvbitset.d"]
-    fn __lasx_xvbitset_d(a: __v4u64, b: __v4u64) -> __v4u64;
     #[link_name = "llvm.loongarch.lasx.xvbitseti.b"]
     fn __lasx_xvbitseti_b(a: __v32u8, b: u32) -> __v32u8;
     #[link_name = "llvm.loongarch.lasx.xvbitseti.h"]
@@ -123,14 +59,6 @@ unsafe extern "unadjusted" {
     fn __lasx_xvbitseti_w(a: __v8u32, b: u32) -> __v8u32;
     #[link_name = "llvm.loongarch.lasx.xvbitseti.d"]
     fn __lasx_xvbitseti_d(a: __v4u64, b: u32) -> __v4u64;
-    #[link_name = "llvm.loongarch.lasx.xvbitrev.b"]
-    fn __lasx_xvbitrev_b(a: __v32u8, b: __v32u8) -> __v32u8;
-    #[link_name = "llvm.loongarch.lasx.xvbitrev.h"]
-    fn __lasx_xvbitrev_h(a: __v16u16, b: __v16u16) -> __v16u16;
-    #[link_name = "llvm.loongarch.lasx.xvbitrev.w"]
-    fn __lasx_xvbitrev_w(a: __v8u32, b: __v8u32) -> __v8u32;
-    #[link_name = "llvm.loongarch.lasx.xvbitrev.d"]
-    fn __lasx_xvbitrev_d(a: __v4u64, b: __v4u64) -> __v4u64;
     #[link_name = "llvm.loongarch.lasx.xvbitrevi.b"]
     fn __lasx_xvbitrevi_b(a: __v32u8, b: u32) -> __v32u8;
     #[link_name = "llvm.loongarch.lasx.xvbitrevi.h"]
@@ -139,30 +67,6 @@ unsafe extern "unadjusted" {
     fn __lasx_xvbitrevi_w(a: __v8u32, b: u32) -> __v8u32;
     #[link_name = "llvm.loongarch.lasx.xvbitrevi.d"]
     fn __lasx_xvbitrevi_d(a: __v4u64, b: u32) -> __v4u64;
-    #[link_name = "llvm.loongarch.lasx.xvadd.b"]
-    fn __lasx_xvadd_b(a: __v32i8, b: __v32i8) -> __v32i8;
-    #[link_name = "llvm.loongarch.lasx.xvadd.h"]
-    fn __lasx_xvadd_h(a: __v16i16, b: __v16i16) -> __v16i16;
-    #[link_name = "llvm.loongarch.lasx.xvadd.w"]
-    fn __lasx_xvadd_w(a: __v8i32, b: __v8i32) -> __v8i32;
-    #[link_name = "llvm.loongarch.lasx.xvadd.d"]
-    fn __lasx_xvadd_d(a: __v4i64, b: __v4i64) -> __v4i64;
-    #[link_name = "llvm.loongarch.lasx.xvaddi.bu"]
-    fn __lasx_xvaddi_bu(a: __v32i8, b: u32) -> __v32i8;
-    #[link_name = "llvm.loongarch.lasx.xvaddi.hu"]
-    fn __lasx_xvaddi_hu(a: __v16i16, b: u32) -> __v16i16;
-    #[link_name = "llvm.loongarch.lasx.xvaddi.wu"]
-    fn __lasx_xvaddi_wu(a: __v8i32, b: u32) -> __v8i32;
-    #[link_name = "llvm.loongarch.lasx.xvaddi.du"]
-    fn __lasx_xvaddi_du(a: __v4i64, b: u32) -> __v4i64;
-    #[link_name = "llvm.loongarch.lasx.xvsub.b"]
-    fn __lasx_xvsub_b(a: __v32i8, b: __v32i8) -> __v32i8;
-    #[link_name = "llvm.loongarch.lasx.xvsub.h"]
-    fn __lasx_xvsub_h(a: __v16i16, b: __v16i16) -> __v16i16;
-    #[link_name = "llvm.loongarch.lasx.xvsub.w"]
-    fn __lasx_xvsub_w(a: __v8i32, b: __v8i32) -> __v8i32;
-    #[link_name = "llvm.loongarch.lasx.xvsub.d"]
-    fn __lasx_xvsub_d(a: __v4i64, b: __v4i64) -> __v4i64;
     #[link_name = "llvm.loongarch.lasx.xvsubi.bu"]
     fn __lasx_xvsubi_bu(a: __v32i8, b: u32) -> __v32i8;
     #[link_name = "llvm.loongarch.lasx.xvsubi.hu"]
@@ -171,150 +75,6 @@ unsafe extern "unadjusted" {
     fn __lasx_xvsubi_wu(a: __v8i32, b: u32) -> __v8i32;
     #[link_name = "llvm.loongarch.lasx.xvsubi.du"]
     fn __lasx_xvsubi_du(a: __v4i64, b: u32) -> __v4i64;
-    #[link_name = "llvm.loongarch.lasx.xvmax.b"]
-    fn __lasx_xvmax_b(a: __v32i8, b: __v32i8) -> __v32i8;
-    #[link_name = "llvm.loongarch.lasx.xvmax.h"]
-    fn __lasx_xvmax_h(a: __v16i16, b: __v16i16) -> __v16i16;
-    #[link_name = "llvm.loongarch.lasx.xvmax.w"]
-    fn __lasx_xvmax_w(a: __v8i32, b: __v8i32) -> __v8i32;
-    #[link_name = "llvm.loongarch.lasx.xvmax.d"]
-    fn __lasx_xvmax_d(a: __v4i64, b: __v4i64) -> __v4i64;
-    #[link_name = "llvm.loongarch.lasx.xvmaxi.b"]
-    fn __lasx_xvmaxi_b(a: __v32i8, b: i32) -> __v32i8;
-    #[link_name = "llvm.loongarch.lasx.xvmaxi.h"]
-    fn __lasx_xvmaxi_h(a: __v16i16, b: i32) -> __v16i16;
-    #[link_name = "llvm.loongarch.lasx.xvmaxi.w"]
-    fn __lasx_xvmaxi_w(a: __v8i32, b: i32) -> __v8i32;
-    #[link_name = "llvm.loongarch.lasx.xvmaxi.d"]
-    fn __lasx_xvmaxi_d(a: __v4i64, b: i32) -> __v4i64;
-    #[link_name = "llvm.loongarch.lasx.xvmax.bu"]
-    fn __lasx_xvmax_bu(a: __v32u8, b: __v32u8) -> __v32u8;
-    #[link_name = "llvm.loongarch.lasx.xvmax.hu"]
-    fn __lasx_xvmax_hu(a: __v16u16, b: __v16u16) -> __v16u16;
-    #[link_name = "llvm.loongarch.lasx.xvmax.wu"]
-    fn __lasx_xvmax_wu(a: __v8u32, b: __v8u32) -> __v8u32;
-    #[link_name = "llvm.loongarch.lasx.xvmax.du"]
-    fn __lasx_xvmax_du(a: __v4u64, b: __v4u64) -> __v4u64;
-    #[link_name = "llvm.loongarch.lasx.xvmaxi.bu"]
-    fn __lasx_xvmaxi_bu(a: __v32u8, b: u32) -> __v32u8;
-    #[link_name = "llvm.loongarch.lasx.xvmaxi.hu"]
-    fn __lasx_xvmaxi_hu(a: __v16u16, b: u32) -> __v16u16;
-    #[link_name = "llvm.loongarch.lasx.xvmaxi.wu"]
-    fn __lasx_xvmaxi_wu(a: __v8u32, b: u32) -> __v8u32;
-    #[link_name = "llvm.loongarch.lasx.xvmaxi.du"]
-    fn __lasx_xvmaxi_du(a: __v4u64, b: u32) -> __v4u64;
-    #[link_name = "llvm.loongarch.lasx.xvmin.b"]
-    fn __lasx_xvmin_b(a: __v32i8, b: __v32i8) -> __v32i8;
-    #[link_name = "llvm.loongarch.lasx.xvmin.h"]
-    fn __lasx_xvmin_h(a: __v16i16, b: __v16i16) -> __v16i16;
-    #[link_name = "llvm.loongarch.lasx.xvmin.w"]
-    fn __lasx_xvmin_w(a: __v8i32, b: __v8i32) -> __v8i32;
-    #[link_name = "llvm.loongarch.lasx.xvmin.d"]
-    fn __lasx_xvmin_d(a: __v4i64, b: __v4i64) -> __v4i64;
-    #[link_name = "llvm.loongarch.lasx.xvmini.b"]
-    fn __lasx_xvmini_b(a: __v32i8, b: i32) -> __v32i8;
-    #[link_name = "llvm.loongarch.lasx.xvmini.h"]
-    fn __lasx_xvmini_h(a: __v16i16, b: i32) -> __v16i16;
-    #[link_name = "llvm.loongarch.lasx.xvmini.w"]
-    fn __lasx_xvmini_w(a: __v8i32, b: i32) -> __v8i32;
-    #[link_name = "llvm.loongarch.lasx.xvmini.d"]
-    fn __lasx_xvmini_d(a: __v4i64, b: i32) -> __v4i64;
-    #[link_name = "llvm.loongarch.lasx.xvmin.bu"]
-    fn __lasx_xvmin_bu(a: __v32u8, b: __v32u8) -> __v32u8;
-    #[link_name = "llvm.loongarch.lasx.xvmin.hu"]
-    fn __lasx_xvmin_hu(a: __v16u16, b: __v16u16) -> __v16u16;
-    #[link_name = "llvm.loongarch.lasx.xvmin.wu"]
-    fn __lasx_xvmin_wu(a: __v8u32, b: __v8u32) -> __v8u32;
-    #[link_name = "llvm.loongarch.lasx.xvmin.du"]
-    fn __lasx_xvmin_du(a: __v4u64, b: __v4u64) -> __v4u64;
-    #[link_name = "llvm.loongarch.lasx.xvmini.bu"]
-    fn __lasx_xvmini_bu(a: __v32u8, b: u32) -> __v32u8;
-    #[link_name = "llvm.loongarch.lasx.xvmini.hu"]
-    fn __lasx_xvmini_hu(a: __v16u16, b: u32) -> __v16u16;
-    #[link_name = "llvm.loongarch.lasx.xvmini.wu"]
-    fn __lasx_xvmini_wu(a: __v8u32, b: u32) -> __v8u32;
-    #[link_name = "llvm.loongarch.lasx.xvmini.du"]
-    fn __lasx_xvmini_du(a: __v4u64, b: u32) -> __v4u64;
-    #[link_name = "llvm.loongarch.lasx.xvseq.b"]
-    fn __lasx_xvseq_b(a: __v32i8, b: __v32i8) -> __v32i8;
-    #[link_name = "llvm.loongarch.lasx.xvseq.h"]
-    fn __lasx_xvseq_h(a: __v16i16, b: __v16i16) -> __v16i16;
-    #[link_name = "llvm.loongarch.lasx.xvseq.w"]
-    fn __lasx_xvseq_w(a: __v8i32, b: __v8i32) -> __v8i32;
-    #[link_name = "llvm.loongarch.lasx.xvseq.d"]
-    fn __lasx_xvseq_d(a: __v4i64, b: __v4i64) -> __v4i64;
-    #[link_name = "llvm.loongarch.lasx.xvseqi.b"]
-    fn __lasx_xvseqi_b(a: __v32i8, b: i32) -> __v32i8;
-    #[link_name = "llvm.loongarch.lasx.xvseqi.h"]
-    fn __lasx_xvseqi_h(a: __v16i16, b: i32) -> __v16i16;
-    #[link_name = "llvm.loongarch.lasx.xvseqi.w"]
-    fn __lasx_xvseqi_w(a: __v8i32, b: i32) -> __v8i32;
-    #[link_name = "llvm.loongarch.lasx.xvseqi.d"]
-    fn __lasx_xvseqi_d(a: __v4i64, b: i32) -> __v4i64;
-    #[link_name = "llvm.loongarch.lasx.xvslt.b"]
-    fn __lasx_xvslt_b(a: __v32i8, b: __v32i8) -> __v32i8;
-    #[link_name = "llvm.loongarch.lasx.xvslt.h"]
-    fn __lasx_xvslt_h(a: __v16i16, b: __v16i16) -> __v16i16;
-    #[link_name = "llvm.loongarch.lasx.xvslt.w"]
-    fn __lasx_xvslt_w(a: __v8i32, b: __v8i32) -> __v8i32;
-    #[link_name = "llvm.loongarch.lasx.xvslt.d"]
-    fn __lasx_xvslt_d(a: __v4i64, b: __v4i64) -> __v4i64;
-    #[link_name = "llvm.loongarch.lasx.xvslti.b"]
-    fn __lasx_xvslti_b(a: __v32i8, b: i32) -> __v32i8;
-    #[link_name = "llvm.loongarch.lasx.xvslti.h"]
-    fn __lasx_xvslti_h(a: __v16i16, b: i32) -> __v16i16;
-    #[link_name = "llvm.loongarch.lasx.xvslti.w"]
-    fn __lasx_xvslti_w(a: __v8i32, b: i32) -> __v8i32;
-    #[link_name = "llvm.loongarch.lasx.xvslti.d"]
-    fn __lasx_xvslti_d(a: __v4i64, b: i32) -> __v4i64;
-    #[link_name = "llvm.loongarch.lasx.xvslt.bu"]
-    fn __lasx_xvslt_bu(a: __v32u8, b: __v32u8) -> __v32i8;
-    #[link_name = "llvm.loongarch.lasx.xvslt.hu"]
-    fn __lasx_xvslt_hu(a: __v16u16, b: __v16u16) -> __v16i16;
-    #[link_name = "llvm.loongarch.lasx.xvslt.wu"]
-    fn __lasx_xvslt_wu(a: __v8u32, b: __v8u32) -> __v8i32;
-    #[link_name = "llvm.loongarch.lasx.xvslt.du"]
-    fn __lasx_xvslt_du(a: __v4u64, b: __v4u64) -> __v4i64;
-    #[link_name = "llvm.loongarch.lasx.xvslti.bu"]
-    fn __lasx_xvslti_bu(a: __v32u8, b: u32) -> __v32i8;
-    #[link_name = "llvm.loongarch.lasx.xvslti.hu"]
-    fn __lasx_xvslti_hu(a: __v16u16, b: u32) -> __v16i16;
-    #[link_name = "llvm.loongarch.lasx.xvslti.wu"]
-    fn __lasx_xvslti_wu(a: __v8u32, b: u32) -> __v8i32;
-    #[link_name = "llvm.loongarch.lasx.xvslti.du"]
-    fn __lasx_xvslti_du(a: __v4u64, b: u32) -> __v4i64;
-    #[link_name = "llvm.loongarch.lasx.xvsle.b"]
-    fn __lasx_xvsle_b(a: __v32i8, b: __v32i8) -> __v32i8;
-    #[link_name = "llvm.loongarch.lasx.xvsle.h"]
-    fn __lasx_xvsle_h(a: __v16i16, b: __v16i16) -> __v16i16;
-    #[link_name = "llvm.loongarch.lasx.xvsle.w"]
-    fn __lasx_xvsle_w(a: __v8i32, b: __v8i32) -> __v8i32;
-    #[link_name = "llvm.loongarch.lasx.xvsle.d"]
-    fn __lasx_xvsle_d(a: __v4i64, b: __v4i64) -> __v4i64;
-    #[link_name = "llvm.loongarch.lasx.xvslei.b"]
-    fn __lasx_xvslei_b(a: __v32i8, b: i32) -> __v32i8;
-    #[link_name = "llvm.loongarch.lasx.xvslei.h"]
-    fn __lasx_xvslei_h(a: __v16i16, b: i32) -> __v16i16;
-    #[link_name = "llvm.loongarch.lasx.xvslei.w"]
-    fn __lasx_xvslei_w(a: __v8i32, b: i32) -> __v8i32;
-    #[link_name = "llvm.loongarch.lasx.xvslei.d"]
-    fn __lasx_xvslei_d(a: __v4i64, b: i32) -> __v4i64;
-    #[link_name = "llvm.loongarch.lasx.xvsle.bu"]
-    fn __lasx_xvsle_bu(a: __v32u8, b: __v32u8) -> __v32i8;
-    #[link_name = "llvm.loongarch.lasx.xvsle.hu"]
-    fn __lasx_xvsle_hu(a: __v16u16, b: __v16u16) -> __v16i16;
-    #[link_name = "llvm.loongarch.lasx.xvsle.wu"]
-    fn __lasx_xvsle_wu(a: __v8u32, b: __v8u32) -> __v8i32;
-    #[link_name = "llvm.loongarch.lasx.xvsle.du"]
-    fn __lasx_xvsle_du(a: __v4u64, b: __v4u64) -> __v4i64;
-    #[link_name = "llvm.loongarch.lasx.xvslei.bu"]
-    fn __lasx_xvslei_bu(a: __v32u8, b: u32) -> __v32i8;
-    #[link_name = "llvm.loongarch.lasx.xvslei.hu"]
-    fn __lasx_xvslei_hu(a: __v16u16, b: u32) -> __v16i16;
-    #[link_name = "llvm.loongarch.lasx.xvslei.wu"]
-    fn __lasx_xvslei_wu(a: __v8u32, b: u32) -> __v8i32;
-    #[link_name = "llvm.loongarch.lasx.xvslei.du"]
-    fn __lasx_xvslei_du(a: __v4u64, b: u32) -> __v4i64;
     #[link_name = "llvm.loongarch.lasx.xvsat.b"]
     fn __lasx_xvsat_b(a: __v32i8, b: u32) -> __v32i8;
     #[link_name = "llvm.loongarch.lasx.xvsat.h"]
@@ -331,30 +91,6 @@ unsafe extern "unadjusted" {
     fn __lasx_xvsat_wu(a: __v8u32, b: u32) -> __v8u32;
     #[link_name = "llvm.loongarch.lasx.xvsat.du"]
     fn __lasx_xvsat_du(a: __v4u64, b: u32) -> __v4u64;
-    #[link_name = "llvm.loongarch.lasx.xvadda.b"]
-    fn __lasx_xvadda_b(a: __v32i8, b: __v32i8) -> __v32i8;
-    #[link_name = "llvm.loongarch.lasx.xvadda.h"]
-    fn __lasx_xvadda_h(a: __v16i16, b: __v16i16) -> __v16i16;
-    #[link_name = "llvm.loongarch.lasx.xvadda.w"]
-    fn __lasx_xvadda_w(a: __v8i32, b: __v8i32) -> __v8i32;
-    #[link_name = "llvm.loongarch.lasx.xvadda.d"]
-    fn __lasx_xvadda_d(a: __v4i64, b: __v4i64) -> __v4i64;
-    #[link_name = "llvm.loongarch.lasx.xvsadd.b"]
-    fn __lasx_xvsadd_b(a: __v32i8, b: __v32i8) -> __v32i8;
-    #[link_name = "llvm.loongarch.lasx.xvsadd.h"]
-    fn __lasx_xvsadd_h(a: __v16i16, b: __v16i16) -> __v16i16;
-    #[link_name = "llvm.loongarch.lasx.xvsadd.w"]
-    fn __lasx_xvsadd_w(a: __v8i32, b: __v8i32) -> __v8i32;
-    #[link_name = "llvm.loongarch.lasx.xvsadd.d"]
-    fn __lasx_xvsadd_d(a: __v4i64, b: __v4i64) -> __v4i64;
-    #[link_name = "llvm.loongarch.lasx.xvsadd.bu"]
-    fn __lasx_xvsadd_bu(a: __v32u8, b: __v32u8) -> __v32u8;
-    #[link_name = "llvm.loongarch.lasx.xvsadd.hu"]
-    fn __lasx_xvsadd_hu(a: __v16u16, b: __v16u16) -> __v16u16;
-    #[link_name = "llvm.loongarch.lasx.xvsadd.wu"]
-    fn __lasx_xvsadd_wu(a: __v8u32, b: __v8u32) -> __v8u32;
-    #[link_name = "llvm.loongarch.lasx.xvsadd.du"]
-    fn __lasx_xvsadd_du(a: __v4u64, b: __v4u64) -> __v4u64;
     #[link_name = "llvm.loongarch.lasx.xvavg.b"]
     fn __lasx_xvavg_b(a: __v32i8, b: __v32i8) -> __v32i8;
     #[link_name = "llvm.loongarch.lasx.xvavg.h"]
@@ -387,78 +123,6 @@ unsafe extern "unadjusted" {
     fn __lasx_xvavgr_wu(a: __v8u32, b: __v8u32) -> __v8u32;
     #[link_name = "llvm.loongarch.lasx.xvavgr.du"]
     fn __lasx_xvavgr_du(a: __v4u64, b: __v4u64) -> __v4u64;
-    #[link_name = "llvm.loongarch.lasx.xvssub.b"]
-    fn __lasx_xvssub_b(a: __v32i8, b: __v32i8) -> __v32i8;
-    #[link_name = "llvm.loongarch.lasx.xvssub.h"]
-    fn __lasx_xvssub_h(a: __v16i16, b: __v16i16) -> __v16i16;
-    #[link_name = "llvm.loongarch.lasx.xvssub.w"]
-    fn __lasx_xvssub_w(a: __v8i32, b: __v8i32) -> __v8i32;
-    #[link_name = "llvm.loongarch.lasx.xvssub.d"]
-    fn __lasx_xvssub_d(a: __v4i64, b: __v4i64) -> __v4i64;
-    #[link_name = "llvm.loongarch.lasx.xvssub.bu"]
-    fn __lasx_xvssub_bu(a: __v32u8, b: __v32u8) -> __v32u8;
-    #[link_name = "llvm.loongarch.lasx.xvssub.hu"]
-    fn __lasx_xvssub_hu(a: __v16u16, b: __v16u16) -> __v16u16;
-    #[link_name = "llvm.loongarch.lasx.xvssub.wu"]
-    fn __lasx_xvssub_wu(a: __v8u32, b: __v8u32) -> __v8u32;
-    #[link_name = "llvm.loongarch.lasx.xvssub.du"]
-    fn __lasx_xvssub_du(a: __v4u64, b: __v4u64) -> __v4u64;
-    #[link_name = "llvm.loongarch.lasx.xvabsd.b"]
-    fn __lasx_xvabsd_b(a: __v32i8, b: __v32i8) -> __v32i8;
-    #[link_name = "llvm.loongarch.lasx.xvabsd.h"]
-    fn __lasx_xvabsd_h(a: __v16i16, b: __v16i16) -> __v16i16;
-    #[link_name = "llvm.loongarch.lasx.xvabsd.w"]
-    fn __lasx_xvabsd_w(a: __v8i32, b: __v8i32) -> __v8i32;
-    #[link_name = "llvm.loongarch.lasx.xvabsd.d"]
-    fn __lasx_xvabsd_d(a: __v4i64, b: __v4i64) -> __v4i64;
-    #[link_name = "llvm.loongarch.lasx.xvabsd.bu"]
-    fn __lasx_xvabsd_bu(a: __v32u8, b: __v32u8) -> __v32u8;
-    #[link_name = "llvm.loongarch.lasx.xvabsd.hu"]
-    fn __lasx_xvabsd_hu(a: __v16u16, b: __v16u16) -> __v16u16;
-    #[link_name = "llvm.loongarch.lasx.xvabsd.wu"]
-    fn __lasx_xvabsd_wu(a: __v8u32, b: __v8u32) -> __v8u32;
-    #[link_name = "llvm.loongarch.lasx.xvabsd.du"]
-    fn __lasx_xvabsd_du(a: __v4u64, b: __v4u64) -> __v4u64;
-    #[link_name = "llvm.loongarch.lasx.xvmul.b"]
-    fn __lasx_xvmul_b(a: __v32i8, b: __v32i8) -> __v32i8;
-    #[link_name = "llvm.loongarch.lasx.xvmul.h"]
-    fn __lasx_xvmul_h(a: __v16i16, b: __v16i16) -> __v16i16;
-    #[link_name = "llvm.loongarch.lasx.xvmul.w"]
-    fn __lasx_xvmul_w(a: __v8i32, b: __v8i32) -> __v8i32;
-    #[link_name = "llvm.loongarch.lasx.xvmul.d"]
-    fn __lasx_xvmul_d(a: __v4i64, b: __v4i64) -> __v4i64;
-    #[link_name = "llvm.loongarch.lasx.xvmadd.b"]
-    fn __lasx_xvmadd_b(a: __v32i8, b: __v32i8, c: __v32i8) -> __v32i8;
-    #[link_name = "llvm.loongarch.lasx.xvmadd.h"]
-    fn __lasx_xvmadd_h(a: __v16i16, b: __v16i16, c: __v16i16) -> __v16i16;
-    #[link_name = "llvm.loongarch.lasx.xvmadd.w"]
-    fn __lasx_xvmadd_w(a: __v8i32, b: __v8i32, c: __v8i32) -> __v8i32;
-    #[link_name = "llvm.loongarch.lasx.xvmadd.d"]
-    fn __lasx_xvmadd_d(a: __v4i64, b: __v4i64, c: __v4i64) -> __v4i64;
-    #[link_name = "llvm.loongarch.lasx.xvmsub.b"]
-    fn __lasx_xvmsub_b(a: __v32i8, b: __v32i8, c: __v32i8) -> __v32i8;
-    #[link_name = "llvm.loongarch.lasx.xvmsub.h"]
-    fn __lasx_xvmsub_h(a: __v16i16, b: __v16i16, c: __v16i16) -> __v16i16;
-    #[link_name = "llvm.loongarch.lasx.xvmsub.w"]
-    fn __lasx_xvmsub_w(a: __v8i32, b: __v8i32, c: __v8i32) -> __v8i32;
-    #[link_name = "llvm.loongarch.lasx.xvmsub.d"]
-    fn __lasx_xvmsub_d(a: __v4i64, b: __v4i64, c: __v4i64) -> __v4i64;
-    #[link_name = "llvm.loongarch.lasx.xvdiv.b"]
-    fn __lasx_xvdiv_b(a: __v32i8, b: __v32i8) -> __v32i8;
-    #[link_name = "llvm.loongarch.lasx.xvdiv.h"]
-    fn __lasx_xvdiv_h(a: __v16i16, b: __v16i16) -> __v16i16;
-    #[link_name = "llvm.loongarch.lasx.xvdiv.w"]
-    fn __lasx_xvdiv_w(a: __v8i32, b: __v8i32) -> __v8i32;
-    #[link_name = "llvm.loongarch.lasx.xvdiv.d"]
-    fn __lasx_xvdiv_d(a: __v4i64, b: __v4i64) -> __v4i64;
-    #[link_name = "llvm.loongarch.lasx.xvdiv.bu"]
-    fn __lasx_xvdiv_bu(a: __v32u8, b: __v32u8) -> __v32u8;
-    #[link_name = "llvm.loongarch.lasx.xvdiv.hu"]
-    fn __lasx_xvdiv_hu(a: __v16u16, b: __v16u16) -> __v16u16;
-    #[link_name = "llvm.loongarch.lasx.xvdiv.wu"]
-    fn __lasx_xvdiv_wu(a: __v8u32, b: __v8u32) -> __v8u32;
-    #[link_name = "llvm.loongarch.lasx.xvdiv.du"]
-    fn __lasx_xvdiv_du(a: __v4u64, b: __v4u64) -> __v4u64;
     #[link_name = "llvm.loongarch.lasx.xvhaddw.h.b"]
     fn __lasx_xvhaddw_h_b(a: __v32i8, b: __v32i8) -> __v16i16;
     #[link_name = "llvm.loongarch.lasx.xvhaddw.w.h"]
@@ -483,22 +147,6 @@ unsafe extern "unadjusted" {
     fn __lasx_xvhsubw_wu_hu(a: __v16u16, b: __v16u16) -> __v8i32;
     #[link_name = "llvm.loongarch.lasx.xvhsubw.du.wu"]
     fn __lasx_xvhsubw_du_wu(a: __v8u32, b: __v8u32) -> __v4i64;
-    #[link_name = "llvm.loongarch.lasx.xvmod.b"]
-    fn __lasx_xvmod_b(a: __v32i8, b: __v32i8) -> __v32i8;
-    #[link_name = "llvm.loongarch.lasx.xvmod.h"]
-    fn __lasx_xvmod_h(a: __v16i16, b: __v16i16) -> __v16i16;
-    #[link_name = "llvm.loongarch.lasx.xvmod.w"]
-    fn __lasx_xvmod_w(a: __v8i32, b: __v8i32) -> __v8i32;
-    #[link_name = "llvm.loongarch.lasx.xvmod.d"]
-    fn __lasx_xvmod_d(a: __v4i64, b: __v4i64) -> __v4i64;
-    #[link_name = "llvm.loongarch.lasx.xvmod.bu"]
-    fn __lasx_xvmod_bu(a: __v32u8, b: __v32u8) -> __v32u8;
-    #[link_name = "llvm.loongarch.lasx.xvmod.hu"]
-    fn __lasx_xvmod_hu(a: __v16u16, b: __v16u16) -> __v16u16;
-    #[link_name = "llvm.loongarch.lasx.xvmod.wu"]
-    fn __lasx_xvmod_wu(a: __v8u32, b: __v8u32) -> __v8u32;
-    #[link_name = "llvm.loongarch.lasx.xvmod.du"]
-    fn __lasx_xvmod_du(a: __v4u64, b: __v4u64) -> __v4u64;
     #[link_name = "llvm.loongarch.lasx.xvrepl128vei.b"]
     fn __lasx_xvrepl128vei_b(a: __v32i8, b: u32) -> __v32i8;
     #[link_name = "llvm.loongarch.lasx.xvrepl128vei.h"]
@@ -563,20 +211,12 @@ unsafe extern "unadjusted" {
     fn __lasx_xvshuf_w(a: __v8i32, b: __v8i32, c: __v8i32) -> __v8i32;
     #[link_name = "llvm.loongarch.lasx.xvshuf.d"]
     fn __lasx_xvshuf_d(a: __v4i64, b: __v4i64, c: __v4i64) -> __v4i64;
-    #[link_name = "llvm.loongarch.lasx.xvand.v"]
-    fn __lasx_xvand_v(a: __v32u8, b: __v32u8) -> __v32u8;
     #[link_name = "llvm.loongarch.lasx.xvandi.b"]
     fn __lasx_xvandi_b(a: __v32u8, b: u32) -> __v32u8;
-    #[link_name = "llvm.loongarch.lasx.xvor.v"]
-    fn __lasx_xvor_v(a: __v32u8, b: __v32u8) -> __v32u8;
     #[link_name = "llvm.loongarch.lasx.xvori.b"]
     fn __lasx_xvori_b(a: __v32u8, b: u32) -> __v32u8;
-    #[link_name = "llvm.loongarch.lasx.xvnor.v"]
-    fn __lasx_xvnor_v(a: __v32u8, b: __v32u8) -> __v32u8;
     #[link_name = "llvm.loongarch.lasx.xvnori.b"]
     fn __lasx_xvnori_b(a: __v32u8, b: u32) -> __v32u8;
-    #[link_name = "llvm.loongarch.lasx.xvxor.v"]
-    fn __lasx_xvxor_v(a: __v32u8, b: __v32u8) -> __v32u8;
     #[link_name = "llvm.loongarch.lasx.xvxori.b"]
     fn __lasx_xvxori_b(a: __v32u8, b: u32) -> __v32u8;
     #[link_name = "llvm.loongarch.lasx.xvbitsel.v"]
@@ -589,22 +229,6 @@ unsafe extern "unadjusted" {
     fn __lasx_xvshuf4i_h(a: __v16i16, b: u32) -> __v16i16;
     #[link_name = "llvm.loongarch.lasx.xvshuf4i.w"]
     fn __lasx_xvshuf4i_w(a: __v8i32, b: u32) -> __v8i32;
-    #[link_name = "llvm.loongarch.lasx.xvreplgr2vr.b"]
-    fn __lasx_xvreplgr2vr_b(a: i32) -> __v32i8;
-    #[link_name = "llvm.loongarch.lasx.xvreplgr2vr.h"]
-    fn __lasx_xvreplgr2vr_h(a: i32) -> __v16i16;
-    #[link_name = "llvm.loongarch.lasx.xvreplgr2vr.w"]
-    fn __lasx_xvreplgr2vr_w(a: i32) -> __v8i32;
-    #[link_name = "llvm.loongarch.lasx.xvreplgr2vr.d"]
-    fn __lasx_xvreplgr2vr_d(a: i64) -> __v4i64;
-    #[link_name = "llvm.loongarch.lasx.xvpcnt.b"]
-    fn __lasx_xvpcnt_b(a: __v32i8) -> __v32i8;
-    #[link_name = "llvm.loongarch.lasx.xvpcnt.h"]
-    fn __lasx_xvpcnt_h(a: __v16i16) -> __v16i16;
-    #[link_name = "llvm.loongarch.lasx.xvpcnt.w"]
-    fn __lasx_xvpcnt_w(a: __v8i32) -> __v8i32;
-    #[link_name = "llvm.loongarch.lasx.xvpcnt.d"]
-    fn __lasx_xvpcnt_d(a: __v4i64) -> __v4i64;
     #[link_name = "llvm.loongarch.lasx.xvclo.b"]
     fn __lasx_xvclo_b(a: __v32i8) -> __v32i8;
     #[link_name = "llvm.loongarch.lasx.xvclo.h"]
@@ -613,30 +237,6 @@ unsafe extern "unadjusted" {
     fn __lasx_xvclo_w(a: __v8i32) -> __v8i32;
     #[link_name = "llvm.loongarch.lasx.xvclo.d"]
     fn __lasx_xvclo_d(a: __v4i64) -> __v4i64;
-    #[link_name = "llvm.loongarch.lasx.xvclz.b"]
-    fn __lasx_xvclz_b(a: __v32i8) -> __v32i8;
-    #[link_name = "llvm.loongarch.lasx.xvclz.h"]
-    fn __lasx_xvclz_h(a: __v16i16) -> __v16i16;
-    #[link_name = "llvm.loongarch.lasx.xvclz.w"]
-    fn __lasx_xvclz_w(a: __v8i32) -> __v8i32;
-    #[link_name = "llvm.loongarch.lasx.xvclz.d"]
-    fn __lasx_xvclz_d(a: __v4i64) -> __v4i64;
-    #[link_name = "llvm.loongarch.lasx.xvfadd.s"]
-    fn __lasx_xvfadd_s(a: __v8f32, b: __v8f32) -> __v8f32;
-    #[link_name = "llvm.loongarch.lasx.xvfadd.d"]
-    fn __lasx_xvfadd_d(a: __v4f64, b: __v4f64) -> __v4f64;
-    #[link_name = "llvm.loongarch.lasx.xvfsub.s"]
-    fn __lasx_xvfsub_s(a: __v8f32, b: __v8f32) -> __v8f32;
-    #[link_name = "llvm.loongarch.lasx.xvfsub.d"]
-    fn __lasx_xvfsub_d(a: __v4f64, b: __v4f64) -> __v4f64;
-    #[link_name = "llvm.loongarch.lasx.xvfmul.s"]
-    fn __lasx_xvfmul_s(a: __v8f32, b: __v8f32) -> __v8f32;
-    #[link_name = "llvm.loongarch.lasx.xvfmul.d"]
-    fn __lasx_xvfmul_d(a: __v4f64, b: __v4f64) -> __v4f64;
-    #[link_name = "llvm.loongarch.lasx.xvfdiv.s"]
-    fn __lasx_xvfdiv_s(a: __v8f32, b: __v8f32) -> __v8f32;
-    #[link_name = "llvm.loongarch.lasx.xvfdiv.d"]
-    fn __lasx_xvfdiv_d(a: __v4f64, b: __v4f64) -> __v4f64;
     #[link_name = "llvm.loongarch.lasx.xvfcvt.h.s"]
     fn __lasx_xvfcvt_h_s(a: __v8f32, b: __v8f32) -> __v16i16;
     #[link_name = "llvm.loongarch.lasx.xvfcvt.s.d"]
@@ -661,10 +261,6 @@ unsafe extern "unadjusted" {
     fn __lasx_xvfclass_s(a: __v8f32) -> __v8i32;
     #[link_name = "llvm.loongarch.lasx.xvfclass.d"]
     fn __lasx_xvfclass_d(a: __v4f64) -> __v4i64;
-    #[link_name = "llvm.loongarch.lasx.xvfsqrt.s"]
-    fn __lasx_xvfsqrt_s(a: __v8f32) -> __v8f32;
-    #[link_name = "llvm.loongarch.lasx.xvfsqrt.d"]
-    fn __lasx_xvfsqrt_d(a: __v4f64) -> __v4f64;
     #[link_name = "llvm.loongarch.lasx.xvfrecip.s"]
     fn __lasx_xvfrecip_s(a: __v8f32) -> __v8f32;
     #[link_name = "llvm.loongarch.lasx.xvfrecip.d"]
@@ -731,16 +327,6 @@ unsafe extern "unadjusted" {
     fn __lasx_xvreplve_d(a: __v4i64, b: i32) -> __v4i64;
     #[link_name = "llvm.loongarch.lasx.xvpermi.w"]
     fn __lasx_xvpermi_w(a: __v8i32, b: __v8i32, c: u32) -> __v8i32;
-    #[link_name = "llvm.loongarch.lasx.xvandn.v"]
-    fn __lasx_xvandn_v(a: __v32u8, b: __v32u8) -> __v32u8;
-    #[link_name = "llvm.loongarch.lasx.xvneg.b"]
-    fn __lasx_xvneg_b(a: __v32i8) -> __v32i8;
-    #[link_name = "llvm.loongarch.lasx.xvneg.h"]
-    fn __lasx_xvneg_h(a: __v16i16) -> __v16i16;
-    #[link_name = "llvm.loongarch.lasx.xvneg.w"]
-    fn __lasx_xvneg_w(a: __v8i32) -> __v8i32;
-    #[link_name = "llvm.loongarch.lasx.xvneg.d"]
-    fn __lasx_xvneg_d(a: __v4i64) -> __v4i64;
     #[link_name = "llvm.loongarch.lasx.xvmuh.b"]
     fn __lasx_xvmuh_b(a: __v32i8, b: __v32i8) -> __v32i8;
     #[link_name = "llvm.loongarch.lasx.xvmuh.h"]
@@ -867,22 +453,6 @@ unsafe extern "unadjusted" {
     fn __lasx_xvsigncov_w(a: __v8i32, b: __v8i32) -> __v8i32;
     #[link_name = "llvm.loongarch.lasx.xvsigncov.d"]
     fn __lasx_xvsigncov_d(a: __v4i64, b: __v4i64) -> __v4i64;
-    #[link_name = "llvm.loongarch.lasx.xvfmadd.s"]
-    fn __lasx_xvfmadd_s(a: __v8f32, b: __v8f32, c: __v8f32) -> __v8f32;
-    #[link_name = "llvm.loongarch.lasx.xvfmadd.d"]
-    fn __lasx_xvfmadd_d(a: __v4f64, b: __v4f64, c: __v4f64) -> __v4f64;
-    #[link_name = "llvm.loongarch.lasx.xvfmsub.s"]
-    fn __lasx_xvfmsub_s(a: __v8f32, b: __v8f32, c: __v8f32) -> __v8f32;
-    #[link_name = "llvm.loongarch.lasx.xvfmsub.d"]
-    fn __lasx_xvfmsub_d(a: __v4f64, b: __v4f64, c: __v4f64) -> __v4f64;
-    #[link_name = "llvm.loongarch.lasx.xvfnmadd.s"]
-    fn __lasx_xvfnmadd_s(a: __v8f32, b: __v8f32, c: __v8f32) -> __v8f32;
-    #[link_name = "llvm.loongarch.lasx.xvfnmadd.d"]
-    fn __lasx_xvfnmadd_d(a: __v4f64, b: __v4f64, c: __v4f64) -> __v4f64;
-    #[link_name = "llvm.loongarch.lasx.xvfnmsub.s"]
-    fn __lasx_xvfnmsub_s(a: __v8f32, b: __v8f32, c: __v8f32) -> __v8f32;
-    #[link_name = "llvm.loongarch.lasx.xvfnmsub.d"]
-    fn __lasx_xvfnmsub_d(a: __v4f64, b: __v4f64, c: __v4f64) -> __v4f64;
     #[link_name = "llvm.loongarch.lasx.xvftintrne.w.s"]
     fn __lasx_xvftintrne_w_s(a: __v8f32) -> __v8i32;
     #[link_name = "llvm.loongarch.lasx.xvftintrne.l.d"]
@@ -979,8 +549,6 @@ unsafe extern "unadjusted" {
     fn __lasx_xvssrln_h_w(a: __v8i32, b: __v8i32) -> __v16i16;
     #[link_name = "llvm.loongarch.lasx.xvssrln.w.d"]
     fn __lasx_xvssrln_w_d(a: __v4i64, b: __v4i64) -> __v8i32;
-    #[link_name = "llvm.loongarch.lasx.xvorn.v"]
-    fn __lasx_xvorn_v(a: __v32i8, b: __v32i8) -> __v32i8;
     #[link_name = "llvm.loongarch.lasx.xvldi"]
     fn __lasx_xvldi(a: i32) -> __v4i64;
     #[link_name = "llvm.loongarch.lasx.xvldx"]
@@ -989,10 +557,6 @@ unsafe extern "unadjusted" {
     fn __lasx_xvstx(a: __v32i8, b: *mut i8, c: i64);
     #[link_name = "llvm.loongarch.lasx.xvextl.qu.du"]
     fn __lasx_xvextl_qu_du(a: __v4u64) -> __v4u64;
-    #[link_name = "llvm.loongarch.lasx.xvinsgr2vr.w"]
-    fn __lasx_xvinsgr2vr_w(a: __v8i32, b: i32, c: u32) -> __v8i32;
-    #[link_name = "llvm.loongarch.lasx.xvinsgr2vr.d"]
-    fn __lasx_xvinsgr2vr_d(a: __v4i64, b: i64, c: u32) -> __v4i64;
     #[link_name = "llvm.loongarch.lasx.xvreplve0.b"]
     fn __lasx_xvreplve0_b(a: __v32i8) -> __v32i8;
     #[link_name = "llvm.loongarch.lasx.xvreplve0.h"]
@@ -1041,14 +605,6 @@ unsafe extern "unadjusted" {
     fn __lasx_xvldrepl_w(a: *const i8, b: i32) -> __v8i32;
     #[link_name = "llvm.loongarch.lasx.xvldrepl.d"]
     fn __lasx_xvldrepl_d(a: *const i8, b: i32) -> __v4i64;
-    #[link_name = "llvm.loongarch.lasx.xvpickve2gr.w"]
-    fn __lasx_xvpickve2gr_w(a: __v8i32, b: u32) -> i32;
-    #[link_name = "llvm.loongarch.lasx.xvpickve2gr.wu"]
-    fn __lasx_xvpickve2gr_wu(a: __v8i32, b: u32) -> u32;
-    #[link_name = "llvm.loongarch.lasx.xvpickve2gr.d"]
-    fn __lasx_xvpickve2gr_d(a: __v4i64, b: u32) -> i64;
-    #[link_name = "llvm.loongarch.lasx.xvpickve2gr.du"]
-    fn __lasx_xvpickve2gr_du(a: __v4i64, b: u32) -> u64;
     #[link_name = "llvm.loongarch.lasx.xvaddwev.q.d"]
     fn __lasx_xvaddwev_q_d(a: __v4i64, b: __v4i64) -> __v4i64;
     #[link_name = "llvm.loongarch.lasx.xvaddwev.d.w"]
@@ -1483,142 +1039,42 @@ unsafe extern "unadjusted" {
     fn __lasx_xvpickve_d_f(a: __v4f64, b: u32) -> __v4f64;
     #[link_name = "llvm.loongarch.lasx.xvpickve.w.f"]
     fn __lasx_xvpickve_w_f(a: __v8f32, b: u32) -> __v8f32;
-    #[link_name = "llvm.loongarch.lasx.xvrepli.b"]
-    fn __lasx_xvrepli_b(a: i32) -> __v32i8;
-    #[link_name = "llvm.loongarch.lasx.xvrepli.d"]
-    fn __lasx_xvrepli_d(a: i32) -> __v4i64;
-    #[link_name = "llvm.loongarch.lasx.xvrepli.h"]
-    fn __lasx_xvrepli_h(a: i32) -> __v16i16;
-    #[link_name = "llvm.loongarch.lasx.xvrepli.w"]
-    fn __lasx_xvrepli_w(a: i32) -> __v8i32;
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvsll_b(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvsll_b(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvsll_h(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvsll_h(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvsll_w(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvsll_w(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvsll_d(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvsll_d(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvslli_b<const IMM3: u32>(a: m256i) -> m256i {
-    static_assert_uimm_bits!(IMM3, 3);
-    unsafe { transmute(__lasx_xvslli_b(transmute(a), IMM3)) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvslli_h<const IMM4: u32>(a: m256i) -> m256i {
-    static_assert_uimm_bits!(IMM4, 4);
-    unsafe { transmute(__lasx_xvslli_h(transmute(a), IMM4)) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvslli_w<const IMM5: u32>(a: m256i) -> m256i {
-    static_assert_uimm_bits!(IMM5, 5);
-    unsafe { transmute(__lasx_xvslli_w(transmute(a), IMM5)) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvslli_d<const IMM6: u32>(a: m256i) -> m256i {
-    static_assert_uimm_bits!(IMM6, 6);
-    unsafe { transmute(__lasx_xvslli_d(transmute(a), IMM6)) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvsra_b(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvsra_b(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvsra_h(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvsra_h(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvsra_w(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvsra_w(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvsra_d(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvsra_d(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvsrai_b<const IMM3: u32>(a: m256i) -> m256i {
-    static_assert_uimm_bits!(IMM3, 3);
-    unsafe { transmute(__lasx_xvsrai_b(transmute(a), IMM3)) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvsrai_h<const IMM4: u32>(a: m256i) -> m256i {
-    static_assert_uimm_bits!(IMM4, 4);
-    unsafe { transmute(__lasx_xvsrai_h(transmute(a), IMM4)) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvsrai_w<const IMM5: u32>(a: m256i) -> m256i {
-    static_assert_uimm_bits!(IMM5, 5);
-    unsafe { transmute(__lasx_xvsrai_w(transmute(a), IMM5)) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvsrai_d<const IMM6: u32>(a: m256i) -> m256i {
-    static_assert_uimm_bits!(IMM6, 6);
-    unsafe { transmute(__lasx_xvsrai_d(transmute(a), IMM6)) }
+    #[link_name = "llvm.loongarch.lasx.cast.128.s"]
+    fn __lasx_cast_128_s(a: __v4f32) -> __v8f32;
+    #[link_name = "llvm.loongarch.lasx.cast.128.d"]
+    fn __lasx_cast_128_d(a: __v2f64) -> __v4f64;
+    #[link_name = "llvm.loongarch.lasx.cast.128"]
+    fn __lasx_cast_128(a: __v2i64) -> __v4i64;
+    #[link_name = "llvm.loongarch.lasx.concat.128.s"]
+    fn __lasx_concat_128_s(a: __v4f32, b: __v4f32) -> __v8f32;
+    #[link_name = "llvm.loongarch.lasx.concat.128.d"]
+    fn __lasx_concat_128_d(a: __v2f64, b: __v2f64) -> __v4f64;
+    #[link_name = "llvm.loongarch.lasx.concat.128"]
+    fn __lasx_concat_128(a: __v2i64, b: __v2i64) -> __v4i64;
+    #[link_name = "llvm.loongarch.lasx.extract.128.lo.s"]
+    fn __lasx_extract_128_lo_s(a: __v8f32) -> __v4f32;
+    #[link_name = "llvm.loongarch.lasx.extract.128.hi.s"]
+    fn __lasx_extract_128_hi_s(a: __v8f32) -> __v4f32;
+    #[link_name = "llvm.loongarch.lasx.extract.128.lo.d"]
+    fn __lasx_extract_128_lo_d(a: __v4f64) -> __v2f64;
+    #[link_name = "llvm.loongarch.lasx.extract.128.hi.d"]
+    fn __lasx_extract_128_hi_d(a: __v4f64) -> __v2f64;
+    #[link_name = "llvm.loongarch.lasx.extract.128.lo"]
+    fn __lasx_extract_128_lo(a: __v4i64) -> __v2i64;
+    #[link_name = "llvm.loongarch.lasx.extract.128.hi"]
+    fn __lasx_extract_128_hi(a: __v4i64) -> __v2i64;
+    #[link_name = "llvm.loongarch.lasx.insert.128.lo.s"]
+    fn __lasx_insert_128_lo_s(a: __v8f32, b: __v4f32) -> __v8f32;
+    #[link_name = "llvm.loongarch.lasx.insert.128.hi.s"]
+    fn __lasx_insert_128_hi_s(a: __v8f32, b: __v4f32) -> __v8f32;
+    #[link_name = "llvm.loongarch.lasx.insert.128.lo.d"]
+    fn __lasx_insert_128_lo_d(a: __v4f64, b: __v2f64) -> __v4f64;
+    #[link_name = "llvm.loongarch.lasx.insert.128.hi.d"]
+    fn __lasx_insert_128_hi_d(a: __v4f64, b: __v2f64) -> __v4f64;
+    #[link_name = "llvm.loongarch.lasx.insert.128.lo"]
+    fn __lasx_insert_128_lo(a: __v4i64, b: __v2i64) -> __v4i64;
+    #[link_name = "llvm.loongarch.lasx.insert.128.hi"]
+    fn __lasx_insert_128_hi(a: __v4i64, b: __v2i64) -> __v4i64;
 }
 
 #[inline]
@@ -1685,70 +1141,6 @@ pub fn lasx_xvsrari_d<const IMM6: u32>(a: m256i) -> m256i {
     unsafe { transmute(__lasx_xvsrari_d(transmute(a), IMM6)) }
 }
 
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvsrl_b(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvsrl_b(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvsrl_h(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvsrl_h(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvsrl_w(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvsrl_w(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvsrl_d(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvsrl_d(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvsrli_b<const IMM3: u32>(a: m256i) -> m256i {
-    static_assert_uimm_bits!(IMM3, 3);
-    unsafe { transmute(__lasx_xvsrli_b(transmute(a), IMM3)) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvsrli_h<const IMM4: u32>(a: m256i) -> m256i {
-    static_assert_uimm_bits!(IMM4, 4);
-    unsafe { transmute(__lasx_xvsrli_h(transmute(a), IMM4)) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvsrli_w<const IMM5: u32>(a: m256i) -> m256i {
-    static_assert_uimm_bits!(IMM5, 5);
-    unsafe { transmute(__lasx_xvsrli_w(transmute(a), IMM5)) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvsrli_d<const IMM6: u32>(a: m256i) -> m256i {
-    static_assert_uimm_bits!(IMM6, 6);
-    unsafe { transmute(__lasx_xvsrli_d(transmute(a), IMM6)) }
-}
-
 #[inline]
 #[target_feature(enable = "lasx")]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
@@ -1813,34 +1205,6 @@ pub fn lasx_xvsrlri_d<const IMM6: u32>(a: m256i) -> m256i {
     unsafe { transmute(__lasx_xvsrlri_d(transmute(a), IMM6)) }
 }
 
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvbitclr_b(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvbitclr_b(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvbitclr_h(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvbitclr_h(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvbitclr_w(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvbitclr_w(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvbitclr_d(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvbitclr_d(transmute(a), transmute(b))) }
-}
-
 #[inline]
 #[target_feature(enable = "lasx")]
 #[rustc_legacy_const_generics(1)]
@@ -1877,34 +1241,6 @@ pub fn lasx_xvbitclri_d<const IMM6: u32>(a: m256i) -> m256i {
     unsafe { transmute(__lasx_xvbitclri_d(transmute(a), IMM6)) }
 }
 
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvbitset_b(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvbitset_b(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvbitset_h(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvbitset_h(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvbitset_w(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvbitset_w(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvbitset_d(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvbitset_d(transmute(a), transmute(b))) }
-}
-
 #[inline]
 #[target_feature(enable = "lasx")]
 #[rustc_legacy_const_generics(1)]
@@ -1941,34 +1277,6 @@ pub fn lasx_xvbitseti_d<const IMM6: u32>(a: m256i) -> m256i {
     unsafe { transmute(__lasx_xvbitseti_d(transmute(a), IMM6)) }
 }
 
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvbitrev_b(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvbitrev_b(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvbitrev_h(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvbitrev_h(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvbitrev_w(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvbitrev_w(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvbitrev_d(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvbitrev_d(transmute(a), transmute(b))) }
-}
-
 #[inline]
 #[target_feature(enable = "lasx")]
 #[rustc_legacy_const_generics(1)]
@@ -2005,1228 +1313,224 @@ pub fn lasx_xvbitrevi_d<const IMM6: u32>(a: m256i) -> m256i {
     unsafe { transmute(__lasx_xvbitrevi_d(transmute(a), IMM6)) }
 }
 
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvadd_b(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvadd_b(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvadd_h(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvadd_h(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvadd_w(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvadd_w(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvadd_d(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvadd_d(transmute(a), transmute(b))) }
-}
-
 #[inline]
 #[target_feature(enable = "lasx")]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvaddi_bu<const IMM5: u32>(a: m256i) -> m256i {
+pub fn lasx_xvsubi_bu<const IMM5: u32>(a: m256i) -> m256i {
     static_assert_uimm_bits!(IMM5, 5);
-    unsafe { transmute(__lasx_xvaddi_bu(transmute(a), IMM5)) }
+    unsafe { transmute(__lasx_xvsubi_bu(transmute(a), IMM5)) }
 }
 
 #[inline]
 #[target_feature(enable = "lasx")]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvaddi_hu<const IMM5: u32>(a: m256i) -> m256i {
+pub fn lasx_xvsubi_hu<const IMM5: u32>(a: m256i) -> m256i {
     static_assert_uimm_bits!(IMM5, 5);
-    unsafe { transmute(__lasx_xvaddi_hu(transmute(a), IMM5)) }
+    unsafe { transmute(__lasx_xvsubi_hu(transmute(a), IMM5)) }
 }
 
 #[inline]
 #[target_feature(enable = "lasx")]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvaddi_wu<const IMM5: u32>(a: m256i) -> m256i {
+pub fn lasx_xvsubi_wu<const IMM5: u32>(a: m256i) -> m256i {
     static_assert_uimm_bits!(IMM5, 5);
-    unsafe { transmute(__lasx_xvaddi_wu(transmute(a), IMM5)) }
+    unsafe { transmute(__lasx_xvsubi_wu(transmute(a), IMM5)) }
 }
 
 #[inline]
 #[target_feature(enable = "lasx")]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvaddi_du<const IMM5: u32>(a: m256i) -> m256i {
+pub fn lasx_xvsubi_du<const IMM5: u32>(a: m256i) -> m256i {
     static_assert_uimm_bits!(IMM5, 5);
-    unsafe { transmute(__lasx_xvaddi_du(transmute(a), IMM5)) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvsub_b(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvsub_b(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvsub_h(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvsub_h(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvsub_w(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvsub_w(transmute(a), transmute(b))) }
+    unsafe { transmute(__lasx_xvsubi_du(transmute(a), IMM5)) }
 }
 
 #[inline]
 #[target_feature(enable = "lasx")]
+#[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvsub_d(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvsub_d(transmute(a), transmute(b))) }
+pub fn lasx_xvsat_b<const IMM3: u32>(a: m256i) -> m256i {
+    static_assert_uimm_bits!(IMM3, 3);
+    unsafe { transmute(__lasx_xvsat_b(transmute(a), IMM3)) }
 }
 
 #[inline]
 #[target_feature(enable = "lasx")]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvsubi_bu<const IMM5: u32>(a: m256i) -> m256i {
-    static_assert_uimm_bits!(IMM5, 5);
-    unsafe { transmute(__lasx_xvsubi_bu(transmute(a), IMM5)) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvsubi_hu<const IMM5: u32>(a: m256i) -> m256i {
-    static_assert_uimm_bits!(IMM5, 5);
-    unsafe { transmute(__lasx_xvsubi_hu(transmute(a), IMM5)) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvsubi_wu<const IMM5: u32>(a: m256i) -> m256i {
-    static_assert_uimm_bits!(IMM5, 5);
-    unsafe { transmute(__lasx_xvsubi_wu(transmute(a), IMM5)) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvsubi_du<const IMM5: u32>(a: m256i) -> m256i {
-    static_assert_uimm_bits!(IMM5, 5);
-    unsafe { transmute(__lasx_xvsubi_du(transmute(a), IMM5)) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvmax_b(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvmax_b(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvmax_h(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvmax_h(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvmax_w(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvmax_w(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvmax_d(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvmax_d(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvmaxi_b<const IMM_S5: i32>(a: m256i) -> m256i {
-    static_assert_simm_bits!(IMM_S5, 5);
-    unsafe { transmute(__lasx_xvmaxi_b(transmute(a), IMM_S5)) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvmaxi_h<const IMM_S5: i32>(a: m256i) -> m256i {
-    static_assert_simm_bits!(IMM_S5, 5);
-    unsafe { transmute(__lasx_xvmaxi_h(transmute(a), IMM_S5)) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvmaxi_w<const IMM_S5: i32>(a: m256i) -> m256i {
-    static_assert_simm_bits!(IMM_S5, 5);
-    unsafe { transmute(__lasx_xvmaxi_w(transmute(a), IMM_S5)) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvmaxi_d<const IMM_S5: i32>(a: m256i) -> m256i {
-    static_assert_simm_bits!(IMM_S5, 5);
-    unsafe { transmute(__lasx_xvmaxi_d(transmute(a), IMM_S5)) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvmax_bu(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvmax_bu(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvmax_hu(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvmax_hu(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvmax_wu(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvmax_wu(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvmax_du(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvmax_du(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvmaxi_bu<const IMM5: u32>(a: m256i) -> m256i {
-    static_assert_uimm_bits!(IMM5, 5);
-    unsafe { transmute(__lasx_xvmaxi_bu(transmute(a), IMM5)) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvmaxi_hu<const IMM5: u32>(a: m256i) -> m256i {
-    static_assert_uimm_bits!(IMM5, 5);
-    unsafe { transmute(__lasx_xvmaxi_hu(transmute(a), IMM5)) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvmaxi_wu<const IMM5: u32>(a: m256i) -> m256i {
-    static_assert_uimm_bits!(IMM5, 5);
-    unsafe { transmute(__lasx_xvmaxi_wu(transmute(a), IMM5)) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvmaxi_du<const IMM5: u32>(a: m256i) -> m256i {
-    static_assert_uimm_bits!(IMM5, 5);
-    unsafe { transmute(__lasx_xvmaxi_du(transmute(a), IMM5)) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvmin_b(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvmin_b(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvmin_h(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvmin_h(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvmin_w(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvmin_w(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvmin_d(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvmin_d(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvmini_b<const IMM_S5: i32>(a: m256i) -> m256i {
-    static_assert_simm_bits!(IMM_S5, 5);
-    unsafe { transmute(__lasx_xvmini_b(transmute(a), IMM_S5)) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvmini_h<const IMM_S5: i32>(a: m256i) -> m256i {
-    static_assert_simm_bits!(IMM_S5, 5);
-    unsafe { transmute(__lasx_xvmini_h(transmute(a), IMM_S5)) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvmini_w<const IMM_S5: i32>(a: m256i) -> m256i {
-    static_assert_simm_bits!(IMM_S5, 5);
-    unsafe { transmute(__lasx_xvmini_w(transmute(a), IMM_S5)) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvmini_d<const IMM_S5: i32>(a: m256i) -> m256i {
-    static_assert_simm_bits!(IMM_S5, 5);
-    unsafe { transmute(__lasx_xvmini_d(transmute(a), IMM_S5)) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvmin_bu(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvmin_bu(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvmin_hu(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvmin_hu(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvmin_wu(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvmin_wu(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvmin_du(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvmin_du(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvmini_bu<const IMM5: u32>(a: m256i) -> m256i {
-    static_assert_uimm_bits!(IMM5, 5);
-    unsafe { transmute(__lasx_xvmini_bu(transmute(a), IMM5)) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvmini_hu<const IMM5: u32>(a: m256i) -> m256i {
-    static_assert_uimm_bits!(IMM5, 5);
-    unsafe { transmute(__lasx_xvmini_hu(transmute(a), IMM5)) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvmini_wu<const IMM5: u32>(a: m256i) -> m256i {
-    static_assert_uimm_bits!(IMM5, 5);
-    unsafe { transmute(__lasx_xvmini_wu(transmute(a), IMM5)) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvmini_du<const IMM5: u32>(a: m256i) -> m256i {
-    static_assert_uimm_bits!(IMM5, 5);
-    unsafe { transmute(__lasx_xvmini_du(transmute(a), IMM5)) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvseq_b(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvseq_b(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvseq_h(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvseq_h(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvseq_w(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvseq_w(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvseq_d(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvseq_d(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvseqi_b<const IMM_S5: i32>(a: m256i) -> m256i {
-    static_assert_simm_bits!(IMM_S5, 5);
-    unsafe { transmute(__lasx_xvseqi_b(transmute(a), IMM_S5)) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvseqi_h<const IMM_S5: i32>(a: m256i) -> m256i {
-    static_assert_simm_bits!(IMM_S5, 5);
-    unsafe { transmute(__lasx_xvseqi_h(transmute(a), IMM_S5)) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvseqi_w<const IMM_S5: i32>(a: m256i) -> m256i {
-    static_assert_simm_bits!(IMM_S5, 5);
-    unsafe { transmute(__lasx_xvseqi_w(transmute(a), IMM_S5)) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvseqi_d<const IMM_S5: i32>(a: m256i) -> m256i {
-    static_assert_simm_bits!(IMM_S5, 5);
-    unsafe { transmute(__lasx_xvseqi_d(transmute(a), IMM_S5)) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvslt_b(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvslt_b(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvslt_h(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvslt_h(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvslt_w(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvslt_w(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvslt_d(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvslt_d(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvslti_b<const IMM_S5: i32>(a: m256i) -> m256i {
-    static_assert_simm_bits!(IMM_S5, 5);
-    unsafe { transmute(__lasx_xvslti_b(transmute(a), IMM_S5)) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvslti_h<const IMM_S5: i32>(a: m256i) -> m256i {
-    static_assert_simm_bits!(IMM_S5, 5);
-    unsafe { transmute(__lasx_xvslti_h(transmute(a), IMM_S5)) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvslti_w<const IMM_S5: i32>(a: m256i) -> m256i {
-    static_assert_simm_bits!(IMM_S5, 5);
-    unsafe { transmute(__lasx_xvslti_w(transmute(a), IMM_S5)) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvslti_d<const IMM_S5: i32>(a: m256i) -> m256i {
-    static_assert_simm_bits!(IMM_S5, 5);
-    unsafe { transmute(__lasx_xvslti_d(transmute(a), IMM_S5)) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvslt_bu(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvslt_bu(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvslt_hu(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvslt_hu(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvslt_wu(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvslt_wu(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvslt_du(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvslt_du(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvslti_bu<const IMM5: u32>(a: m256i) -> m256i {
-    static_assert_uimm_bits!(IMM5, 5);
-    unsafe { transmute(__lasx_xvslti_bu(transmute(a), IMM5)) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvslti_hu<const IMM5: u32>(a: m256i) -> m256i {
-    static_assert_uimm_bits!(IMM5, 5);
-    unsafe { transmute(__lasx_xvslti_hu(transmute(a), IMM5)) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvslti_wu<const IMM5: u32>(a: m256i) -> m256i {
-    static_assert_uimm_bits!(IMM5, 5);
-    unsafe { transmute(__lasx_xvslti_wu(transmute(a), IMM5)) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvslti_du<const IMM5: u32>(a: m256i) -> m256i {
-    static_assert_uimm_bits!(IMM5, 5);
-    unsafe { transmute(__lasx_xvslti_du(transmute(a), IMM5)) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvsle_b(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvsle_b(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvsle_h(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvsle_h(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvsle_w(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvsle_w(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvsle_d(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvsle_d(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvslei_b<const IMM_S5: i32>(a: m256i) -> m256i {
-    static_assert_simm_bits!(IMM_S5, 5);
-    unsafe { transmute(__lasx_xvslei_b(transmute(a), IMM_S5)) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvslei_h<const IMM_S5: i32>(a: m256i) -> m256i {
-    static_assert_simm_bits!(IMM_S5, 5);
-    unsafe { transmute(__lasx_xvslei_h(transmute(a), IMM_S5)) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvslei_w<const IMM_S5: i32>(a: m256i) -> m256i {
-    static_assert_simm_bits!(IMM_S5, 5);
-    unsafe { transmute(__lasx_xvslei_w(transmute(a), IMM_S5)) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvslei_d<const IMM_S5: i32>(a: m256i) -> m256i {
-    static_assert_simm_bits!(IMM_S5, 5);
-    unsafe { transmute(__lasx_xvslei_d(transmute(a), IMM_S5)) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvsle_bu(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvsle_bu(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvsle_hu(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvsle_hu(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvsle_wu(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvsle_wu(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvsle_du(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvsle_du(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvslei_bu<const IMM5: u32>(a: m256i) -> m256i {
-    static_assert_uimm_bits!(IMM5, 5);
-    unsafe { transmute(__lasx_xvslei_bu(transmute(a), IMM5)) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvslei_hu<const IMM5: u32>(a: m256i) -> m256i {
-    static_assert_uimm_bits!(IMM5, 5);
-    unsafe { transmute(__lasx_xvslei_hu(transmute(a), IMM5)) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvslei_wu<const IMM5: u32>(a: m256i) -> m256i {
-    static_assert_uimm_bits!(IMM5, 5);
-    unsafe { transmute(__lasx_xvslei_wu(transmute(a), IMM5)) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvslei_du<const IMM5: u32>(a: m256i) -> m256i {
-    static_assert_uimm_bits!(IMM5, 5);
-    unsafe { transmute(__lasx_xvslei_du(transmute(a), IMM5)) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvsat_b<const IMM3: u32>(a: m256i) -> m256i {
-    static_assert_uimm_bits!(IMM3, 3);
-    unsafe { transmute(__lasx_xvsat_b(transmute(a), IMM3)) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvsat_h<const IMM4: u32>(a: m256i) -> m256i {
-    static_assert_uimm_bits!(IMM4, 4);
-    unsafe { transmute(__lasx_xvsat_h(transmute(a), IMM4)) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvsat_w<const IMM5: u32>(a: m256i) -> m256i {
-    static_assert_uimm_bits!(IMM5, 5);
-    unsafe { transmute(__lasx_xvsat_w(transmute(a), IMM5)) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvsat_d<const IMM6: u32>(a: m256i) -> m256i {
-    static_assert_uimm_bits!(IMM6, 6);
-    unsafe { transmute(__lasx_xvsat_d(transmute(a), IMM6)) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvsat_bu<const IMM3: u32>(a: m256i) -> m256i {
-    static_assert_uimm_bits!(IMM3, 3);
-    unsafe { transmute(__lasx_xvsat_bu(transmute(a), IMM3)) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvsat_hu<const IMM4: u32>(a: m256i) -> m256i {
-    static_assert_uimm_bits!(IMM4, 4);
-    unsafe { transmute(__lasx_xvsat_hu(transmute(a), IMM4)) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvsat_wu<const IMM5: u32>(a: m256i) -> m256i {
-    static_assert_uimm_bits!(IMM5, 5);
-    unsafe { transmute(__lasx_xvsat_wu(transmute(a), IMM5)) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvsat_du<const IMM6: u32>(a: m256i) -> m256i {
-    static_assert_uimm_bits!(IMM6, 6);
-    unsafe { transmute(__lasx_xvsat_du(transmute(a), IMM6)) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvadda_b(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvadda_b(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvadda_h(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvadda_h(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvadda_w(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvadda_w(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvadda_d(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvadda_d(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvsadd_b(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvsadd_b(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvsadd_h(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvsadd_h(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvsadd_w(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvsadd_w(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvsadd_d(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvsadd_d(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvsadd_bu(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvsadd_bu(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvsadd_hu(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvsadd_hu(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvsadd_wu(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvsadd_wu(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvsadd_du(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvsadd_du(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvavg_b(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvavg_b(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvavg_h(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvavg_h(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvavg_w(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvavg_w(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvavg_d(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvavg_d(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvavg_bu(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvavg_bu(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvavg_hu(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvavg_hu(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvavg_wu(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvavg_wu(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvavg_du(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvavg_du(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvavgr_b(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvavgr_b(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvavgr_h(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvavgr_h(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvavgr_w(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvavgr_w(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvavgr_d(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvavgr_d(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvavgr_bu(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvavgr_bu(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvavgr_hu(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvavgr_hu(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvavgr_wu(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvavgr_wu(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvavgr_du(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvavgr_du(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvssub_b(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvssub_b(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvssub_h(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvssub_h(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvssub_w(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvssub_w(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvssub_d(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvssub_d(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvssub_bu(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvssub_bu(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvssub_hu(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvssub_hu(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvssub_wu(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvssub_wu(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvssub_du(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvssub_du(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvabsd_b(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvabsd_b(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvabsd_h(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvabsd_h(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvabsd_w(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvabsd_w(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvabsd_d(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvabsd_d(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvabsd_bu(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvabsd_bu(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvabsd_hu(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvabsd_hu(transmute(a), transmute(b))) }
+pub fn lasx_xvsat_h<const IMM4: u32>(a: m256i) -> m256i {
+    static_assert_uimm_bits!(IMM4, 4);
+    unsafe { transmute(__lasx_xvsat_h(transmute(a), IMM4)) }
 }
 
 #[inline]
 #[target_feature(enable = "lasx")]
+#[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvabsd_wu(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvabsd_wu(transmute(a), transmute(b))) }
+pub fn lasx_xvsat_w<const IMM5: u32>(a: m256i) -> m256i {
+    static_assert_uimm_bits!(IMM5, 5);
+    unsafe { transmute(__lasx_xvsat_w(transmute(a), IMM5)) }
 }
 
 #[inline]
 #[target_feature(enable = "lasx")]
+#[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvabsd_du(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvabsd_du(transmute(a), transmute(b))) }
+pub fn lasx_xvsat_d<const IMM6: u32>(a: m256i) -> m256i {
+    static_assert_uimm_bits!(IMM6, 6);
+    unsafe { transmute(__lasx_xvsat_d(transmute(a), IMM6)) }
 }
 
 #[inline]
 #[target_feature(enable = "lasx")]
+#[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvmul_b(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvmul_b(transmute(a), transmute(b))) }
+pub fn lasx_xvsat_bu<const IMM3: u32>(a: m256i) -> m256i {
+    static_assert_uimm_bits!(IMM3, 3);
+    unsafe { transmute(__lasx_xvsat_bu(transmute(a), IMM3)) }
 }
 
 #[inline]
 #[target_feature(enable = "lasx")]
+#[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvmul_h(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvmul_h(transmute(a), transmute(b))) }
+pub fn lasx_xvsat_hu<const IMM4: u32>(a: m256i) -> m256i {
+    static_assert_uimm_bits!(IMM4, 4);
+    unsafe { transmute(__lasx_xvsat_hu(transmute(a), IMM4)) }
 }
 
 #[inline]
 #[target_feature(enable = "lasx")]
+#[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvmul_w(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvmul_w(transmute(a), transmute(b))) }
+pub fn lasx_xvsat_wu<const IMM5: u32>(a: m256i) -> m256i {
+    static_assert_uimm_bits!(IMM5, 5);
+    unsafe { transmute(__lasx_xvsat_wu(transmute(a), IMM5)) }
 }
 
 #[inline]
 #[target_feature(enable = "lasx")]
+#[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvmul_d(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvmul_d(transmute(a), transmute(b))) }
+pub fn lasx_xvsat_du<const IMM6: u32>(a: m256i) -> m256i {
+    static_assert_uimm_bits!(IMM6, 6);
+    unsafe { transmute(__lasx_xvsat_du(transmute(a), IMM6)) }
 }
 
 #[inline]
 #[target_feature(enable = "lasx")]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvmadd_b(a: m256i, b: m256i, c: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvmadd_b(transmute(a), transmute(b), transmute(c))) }
+pub fn lasx_xvavg_b(a: m256i, b: m256i) -> m256i {
+    unsafe { transmute(__lasx_xvavg_b(transmute(a), transmute(b))) }
 }
 
 #[inline]
 #[target_feature(enable = "lasx")]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvmadd_h(a: m256i, b: m256i, c: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvmadd_h(transmute(a), transmute(b), transmute(c))) }
+pub fn lasx_xvavg_h(a: m256i, b: m256i) -> m256i {
+    unsafe { transmute(__lasx_xvavg_h(transmute(a), transmute(b))) }
 }
 
 #[inline]
 #[target_feature(enable = "lasx")]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvmadd_w(a: m256i, b: m256i, c: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvmadd_w(transmute(a), transmute(b), transmute(c))) }
+pub fn lasx_xvavg_w(a: m256i, b: m256i) -> m256i {
+    unsafe { transmute(__lasx_xvavg_w(transmute(a), transmute(b))) }
 }
 
 #[inline]
 #[target_feature(enable = "lasx")]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvmadd_d(a: m256i, b: m256i, c: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvmadd_d(transmute(a), transmute(b), transmute(c))) }
+pub fn lasx_xvavg_d(a: m256i, b: m256i) -> m256i {
+    unsafe { transmute(__lasx_xvavg_d(transmute(a), transmute(b))) }
 }
 
 #[inline]
 #[target_feature(enable = "lasx")]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvmsub_b(a: m256i, b: m256i, c: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvmsub_b(transmute(a), transmute(b), transmute(c))) }
+pub fn lasx_xvavg_bu(a: m256i, b: m256i) -> m256i {
+    unsafe { transmute(__lasx_xvavg_bu(transmute(a), transmute(b))) }
 }
 
 #[inline]
 #[target_feature(enable = "lasx")]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvmsub_h(a: m256i, b: m256i, c: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvmsub_h(transmute(a), transmute(b), transmute(c))) }
+pub fn lasx_xvavg_hu(a: m256i, b: m256i) -> m256i {
+    unsafe { transmute(__lasx_xvavg_hu(transmute(a), transmute(b))) }
 }
 
 #[inline]
 #[target_feature(enable = "lasx")]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvmsub_w(a: m256i, b: m256i, c: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvmsub_w(transmute(a), transmute(b), transmute(c))) }
+pub fn lasx_xvavg_wu(a: m256i, b: m256i) -> m256i {
+    unsafe { transmute(__lasx_xvavg_wu(transmute(a), transmute(b))) }
 }
 
 #[inline]
 #[target_feature(enable = "lasx")]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvmsub_d(a: m256i, b: m256i, c: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvmsub_d(transmute(a), transmute(b), transmute(c))) }
+pub fn lasx_xvavg_du(a: m256i, b: m256i) -> m256i {
+    unsafe { transmute(__lasx_xvavg_du(transmute(a), transmute(b))) }
 }
 
 #[inline]
 #[target_feature(enable = "lasx")]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvdiv_b(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvdiv_b(transmute(a), transmute(b))) }
+pub fn lasx_xvavgr_b(a: m256i, b: m256i) -> m256i {
+    unsafe { transmute(__lasx_xvavgr_b(transmute(a), transmute(b))) }
 }
 
 #[inline]
 #[target_feature(enable = "lasx")]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvdiv_h(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvdiv_h(transmute(a), transmute(b))) }
+pub fn lasx_xvavgr_h(a: m256i, b: m256i) -> m256i {
+    unsafe { transmute(__lasx_xvavgr_h(transmute(a), transmute(b))) }
 }
 
 #[inline]
 #[target_feature(enable = "lasx")]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvdiv_w(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvdiv_w(transmute(a), transmute(b))) }
+pub fn lasx_xvavgr_w(a: m256i, b: m256i) -> m256i {
+    unsafe { transmute(__lasx_xvavgr_w(transmute(a), transmute(b))) }
 }
 
 #[inline]
 #[target_feature(enable = "lasx")]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvdiv_d(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvdiv_d(transmute(a), transmute(b))) }
+pub fn lasx_xvavgr_d(a: m256i, b: m256i) -> m256i {
+    unsafe { transmute(__lasx_xvavgr_d(transmute(a), transmute(b))) }
 }
 
 #[inline]
 #[target_feature(enable = "lasx")]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvdiv_bu(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvdiv_bu(transmute(a), transmute(b))) }
+pub fn lasx_xvavgr_bu(a: m256i, b: m256i) -> m256i {
+    unsafe { transmute(__lasx_xvavgr_bu(transmute(a), transmute(b))) }
 }
 
 #[inline]
 #[target_feature(enable = "lasx")]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvdiv_hu(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvdiv_hu(transmute(a), transmute(b))) }
+pub fn lasx_xvavgr_hu(a: m256i, b: m256i) -> m256i {
+    unsafe { transmute(__lasx_xvavgr_hu(transmute(a), transmute(b))) }
 }
 
 #[inline]
 #[target_feature(enable = "lasx")]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvdiv_wu(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvdiv_wu(transmute(a), transmute(b))) }
+pub fn lasx_xvavgr_wu(a: m256i, b: m256i) -> m256i {
+    unsafe { transmute(__lasx_xvavgr_wu(transmute(a), transmute(b))) }
 }
 
 #[inline]
 #[target_feature(enable = "lasx")]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvdiv_du(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvdiv_du(transmute(a), transmute(b))) }
+pub fn lasx_xvavgr_du(a: m256i, b: m256i) -> m256i {
+    unsafe { transmute(__lasx_xvavgr_du(transmute(a), transmute(b))) }
 }
 
 #[inline]
@@ -3313,62 +1617,6 @@ pub fn lasx_xvhsubw_du_wu(a: m256i, b: m256i) -> m256i {
     unsafe { transmute(__lasx_xvhsubw_du_wu(transmute(a), transmute(b))) }
 }
 
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvmod_b(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvmod_b(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvmod_h(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvmod_h(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvmod_w(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvmod_w(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvmod_d(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvmod_d(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvmod_bu(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvmod_bu(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvmod_hu(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvmod_hu(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvmod_wu(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvmod_wu(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvmod_du(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvmod_du(transmute(a), transmute(b))) }
-}
-
 #[inline]
 #[target_feature(enable = "lasx")]
 #[rustc_legacy_const_generics(1)]
@@ -3583,297 +1831,129 @@ pub fn lasx_xvshuf_b(a: m256i, b: m256i, c: m256i) -> m256i {
 #[inline]
 #[target_feature(enable = "lasx")]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvshuf_h(a: m256i, b: m256i, c: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvshuf_h(transmute(a), transmute(b), transmute(c))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvshuf_w(a: m256i, b: m256i, c: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvshuf_w(transmute(a), transmute(b), transmute(c))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvshuf_d(a: m256i, b: m256i, c: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvshuf_d(transmute(a), transmute(b), transmute(c))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvand_v(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvand_v(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvandi_b<const IMM8: u32>(a: m256i) -> m256i {
-    static_assert_uimm_bits!(IMM8, 8);
-    unsafe { transmute(__lasx_xvandi_b(transmute(a), IMM8)) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvor_v(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvor_v(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvori_b<const IMM8: u32>(a: m256i) -> m256i {
-    static_assert_uimm_bits!(IMM8, 8);
-    unsafe { transmute(__lasx_xvori_b(transmute(a), IMM8)) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvnor_v(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvnor_v(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvnori_b<const IMM8: u32>(a: m256i) -> m256i {
-    static_assert_uimm_bits!(IMM8, 8);
-    unsafe { transmute(__lasx_xvnori_b(transmute(a), IMM8)) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvxor_v(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvxor_v(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvxori_b<const IMM8: u32>(a: m256i) -> m256i {
-    static_assert_uimm_bits!(IMM8, 8);
-    unsafe { transmute(__lasx_xvxori_b(transmute(a), IMM8)) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvbitsel_v(a: m256i, b: m256i, c: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvbitsel_v(transmute(a), transmute(b), transmute(c))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvbitseli_b<const IMM8: u32>(a: m256i, b: m256i) -> m256i {
-    static_assert_uimm_bits!(IMM8, 8);
-    unsafe { transmute(__lasx_xvbitseli_b(transmute(a), transmute(b), IMM8)) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvshuf4i_b<const IMM8: u32>(a: m256i) -> m256i {
-    static_assert_uimm_bits!(IMM8, 8);
-    unsafe { transmute(__lasx_xvshuf4i_b(transmute(a), IMM8)) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvshuf4i_h<const IMM8: u32>(a: m256i) -> m256i {
-    static_assert_uimm_bits!(IMM8, 8);
-    unsafe { transmute(__lasx_xvshuf4i_h(transmute(a), IMM8)) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvshuf4i_w<const IMM8: u32>(a: m256i) -> m256i {
-    static_assert_uimm_bits!(IMM8, 8);
-    unsafe { transmute(__lasx_xvshuf4i_w(transmute(a), IMM8)) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvreplgr2vr_b(a: i32) -> m256i {
-    unsafe { transmute(__lasx_xvreplgr2vr_b(transmute(a))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvreplgr2vr_h(a: i32) -> m256i {
-    unsafe { transmute(__lasx_xvreplgr2vr_h(transmute(a))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvreplgr2vr_w(a: i32) -> m256i {
-    unsafe { transmute(__lasx_xvreplgr2vr_w(transmute(a))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvreplgr2vr_d(a: i64) -> m256i {
-    unsafe { transmute(__lasx_xvreplgr2vr_d(transmute(a))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvpcnt_b(a: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvpcnt_b(transmute(a))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvpcnt_h(a: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvpcnt_h(transmute(a))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvpcnt_w(a: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvpcnt_w(transmute(a))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvpcnt_d(a: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvpcnt_d(transmute(a))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvclo_b(a: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvclo_b(transmute(a))) }
+pub fn lasx_xvshuf_h(a: m256i, b: m256i, c: m256i) -> m256i {
+    unsafe { transmute(__lasx_xvshuf_h(transmute(a), transmute(b), transmute(c))) }
 }
 
 #[inline]
 #[target_feature(enable = "lasx")]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvclo_h(a: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvclo_h(transmute(a))) }
+pub fn lasx_xvshuf_w(a: m256i, b: m256i, c: m256i) -> m256i {
+    unsafe { transmute(__lasx_xvshuf_w(transmute(a), transmute(b), transmute(c))) }
 }
 
 #[inline]
 #[target_feature(enable = "lasx")]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvclo_w(a: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvclo_w(transmute(a))) }
+pub fn lasx_xvshuf_d(a: m256i, b: m256i, c: m256i) -> m256i {
+    unsafe { transmute(__lasx_xvshuf_d(transmute(a), transmute(b), transmute(c))) }
 }
 
 #[inline]
 #[target_feature(enable = "lasx")]
+#[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvclo_d(a: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvclo_d(transmute(a))) }
+pub fn lasx_xvandi_b<const IMM8: u32>(a: m256i) -> m256i {
+    static_assert_uimm_bits!(IMM8, 8);
+    unsafe { transmute(__lasx_xvandi_b(transmute(a), IMM8)) }
 }
 
 #[inline]
 #[target_feature(enable = "lasx")]
+#[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvclz_b(a: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvclz_b(transmute(a))) }
+pub fn lasx_xvori_b<const IMM8: u32>(a: m256i) -> m256i {
+    static_assert_uimm_bits!(IMM8, 8);
+    unsafe { transmute(__lasx_xvori_b(transmute(a), IMM8)) }
 }
 
 #[inline]
 #[target_feature(enable = "lasx")]
+#[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvclz_h(a: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvclz_h(transmute(a))) }
+pub fn lasx_xvnori_b<const IMM8: u32>(a: m256i) -> m256i {
+    static_assert_uimm_bits!(IMM8, 8);
+    unsafe { transmute(__lasx_xvnori_b(transmute(a), IMM8)) }
 }
 
 #[inline]
 #[target_feature(enable = "lasx")]
+#[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvclz_w(a: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvclz_w(transmute(a))) }
+pub fn lasx_xvxori_b<const IMM8: u32>(a: m256i) -> m256i {
+    static_assert_uimm_bits!(IMM8, 8);
+    unsafe { transmute(__lasx_xvxori_b(transmute(a), IMM8)) }
 }
 
 #[inline]
 #[target_feature(enable = "lasx")]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvclz_d(a: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvclz_d(transmute(a))) }
+pub fn lasx_xvbitsel_v(a: m256i, b: m256i, c: m256i) -> m256i {
+    unsafe { transmute(__lasx_xvbitsel_v(transmute(a), transmute(b), transmute(c))) }
 }
 
 #[inline]
 #[target_feature(enable = "lasx")]
+#[rustc_legacy_const_generics(2)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvfadd_s(a: m256, b: m256) -> m256 {
-    unsafe { transmute(__lasx_xvfadd_s(transmute(a), transmute(b))) }
+pub fn lasx_xvbitseli_b<const IMM8: u32>(a: m256i, b: m256i) -> m256i {
+    static_assert_uimm_bits!(IMM8, 8);
+    unsafe { transmute(__lasx_xvbitseli_b(transmute(a), transmute(b), IMM8)) }
 }
 
 #[inline]
 #[target_feature(enable = "lasx")]
+#[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvfadd_d(a: m256d, b: m256d) -> m256d {
-    unsafe { transmute(__lasx_xvfadd_d(transmute(a), transmute(b))) }
+pub fn lasx_xvshuf4i_b<const IMM8: u32>(a: m256i) -> m256i {
+    static_assert_uimm_bits!(IMM8, 8);
+    unsafe { transmute(__lasx_xvshuf4i_b(transmute(a), IMM8)) }
 }
 
 #[inline]
 #[target_feature(enable = "lasx")]
+#[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvfsub_s(a: m256, b: m256) -> m256 {
-    unsafe { transmute(__lasx_xvfsub_s(transmute(a), transmute(b))) }
+pub fn lasx_xvshuf4i_h<const IMM8: u32>(a: m256i) -> m256i {
+    static_assert_uimm_bits!(IMM8, 8);
+    unsafe { transmute(__lasx_xvshuf4i_h(transmute(a), IMM8)) }
 }
 
 #[inline]
 #[target_feature(enable = "lasx")]
+#[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvfsub_d(a: m256d, b: m256d) -> m256d {
-    unsafe { transmute(__lasx_xvfsub_d(transmute(a), transmute(b))) }
+pub fn lasx_xvshuf4i_w<const IMM8: u32>(a: m256i) -> m256i {
+    static_assert_uimm_bits!(IMM8, 8);
+    unsafe { transmute(__lasx_xvshuf4i_w(transmute(a), IMM8)) }
 }
 
 #[inline]
 #[target_feature(enable = "lasx")]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvfmul_s(a: m256, b: m256) -> m256 {
-    unsafe { transmute(__lasx_xvfmul_s(transmute(a), transmute(b))) }
+pub fn lasx_xvclo_b(a: m256i) -> m256i {
+    unsafe { transmute(__lasx_xvclo_b(transmute(a))) }
 }
 
 #[inline]
 #[target_feature(enable = "lasx")]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvfmul_d(a: m256d, b: m256d) -> m256d {
-    unsafe { transmute(__lasx_xvfmul_d(transmute(a), transmute(b))) }
+pub fn lasx_xvclo_h(a: m256i) -> m256i {
+    unsafe { transmute(__lasx_xvclo_h(transmute(a))) }
 }
 
 #[inline]
 #[target_feature(enable = "lasx")]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvfdiv_s(a: m256, b: m256) -> m256 {
-    unsafe { transmute(__lasx_xvfdiv_s(transmute(a), transmute(b))) }
+pub fn lasx_xvclo_w(a: m256i) -> m256i {
+    unsafe { transmute(__lasx_xvclo_w(transmute(a))) }
 }
 
 #[inline]
 #[target_feature(enable = "lasx")]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvfdiv_d(a: m256d, b: m256d) -> m256d {
-    unsafe { transmute(__lasx_xvfdiv_d(transmute(a), transmute(b))) }
+pub fn lasx_xvclo_d(a: m256i) -> m256i {
+    unsafe { transmute(__lasx_xvclo_d(transmute(a))) }
 }
 
 #[inline]
@@ -3960,20 +2040,6 @@ pub fn lasx_xvfclass_d(a: m256d) -> m256i {
     unsafe { transmute(__lasx_xvfclass_d(transmute(a))) }
 }
 
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvfsqrt_s(a: m256) -> m256 {
-    unsafe { transmute(__lasx_xvfsqrt_s(transmute(a))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvfsqrt_d(a: m256d) -> m256d {
-    unsafe { transmute(__lasx_xvfsqrt_d(transmute(a))) }
-}
-
 #[inline]
 #[target_feature(enable = "lasx")]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
@@ -4207,41 +2273,6 @@ pub fn lasx_xvpermi_w<const IMM8: u32>(a: m256i, b: m256i) -> m256i {
     unsafe { transmute(__lasx_xvpermi_w(transmute(a), transmute(b), IMM8)) }
 }
 
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvandn_v(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvandn_v(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvneg_b(a: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvneg_b(transmute(a))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvneg_h(a: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvneg_h(transmute(a))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvneg_w(a: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvneg_w(transmute(a))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvneg_d(a: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvneg_d(transmute(a))) }
-}
-
 #[inline]
 #[target_feature(enable = "lasx")]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
@@ -4713,62 +2744,6 @@ pub fn lasx_xvsigncov_d(a: m256i, b: m256i) -> m256i {
     unsafe { transmute(__lasx_xvsigncov_d(transmute(a), transmute(b))) }
 }
 
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvfmadd_s(a: m256, b: m256, c: m256) -> m256 {
-    unsafe { transmute(__lasx_xvfmadd_s(transmute(a), transmute(b), transmute(c))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvfmadd_d(a: m256d, b: m256d, c: m256d) -> m256d {
-    unsafe { transmute(__lasx_xvfmadd_d(transmute(a), transmute(b), transmute(c))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvfmsub_s(a: m256, b: m256, c: m256) -> m256 {
-    unsafe { transmute(__lasx_xvfmsub_s(transmute(a), transmute(b), transmute(c))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvfmsub_d(a: m256d, b: m256d, c: m256d) -> m256d {
-    unsafe { transmute(__lasx_xvfmsub_d(transmute(a), transmute(b), transmute(c))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvfnmadd_s(a: m256, b: m256, c: m256) -> m256 {
-    unsafe { transmute(__lasx_xvfnmadd_s(transmute(a), transmute(b), transmute(c))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvfnmadd_d(a: m256d, b: m256d, c: m256d) -> m256d {
-    unsafe { transmute(__lasx_xvfnmadd_d(transmute(a), transmute(b), transmute(c))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvfnmsub_s(a: m256, b: m256, c: m256) -> m256 {
-    unsafe { transmute(__lasx_xvfnmsub_s(transmute(a), transmute(b), transmute(c))) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvfnmsub_d(a: m256d, b: m256d, c: m256d) -> m256d {
-    unsafe { transmute(__lasx_xvfnmsub_d(transmute(a), transmute(b), transmute(c))) }
-}
-
 #[inline]
 #[target_feature(enable = "lasx")]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
@@ -5008,7 +2983,7 @@ pub unsafe fn lasx_xvld<const IMM_S12: i32>(mem_addr: *const i8) -> m256i {
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
 pub unsafe fn lasx_xvst<const IMM_S12: i32>(a: m256i, mem_addr: *mut i8) {
     static_assert_simm_bits!(IMM_S12, 12);
-    transmute(__lasx_xvst(transmute(a), mem_addr, IMM_S12))
+    __lasx_xvst(transmute(a), mem_addr, IMM_S12)
 }
 
 #[inline]
@@ -5018,7 +2993,7 @@ pub unsafe fn lasx_xvst<const IMM_S12: i32>(a: m256i, mem_addr: *mut i8) {
 pub unsafe fn lasx_xvstelm_b<const IMM_S8: i32, const IMM4: u32>(a: m256i, mem_addr: *mut i8) {
     static_assert_simm_bits!(IMM_S8, 8);
     static_assert_uimm_bits!(IMM4, 4);
-    transmute(__lasx_xvstelm_b(transmute(a), mem_addr, IMM_S8, IMM4))
+    __lasx_xvstelm_b(transmute(a), mem_addr, IMM_S8, IMM4)
 }
 
 #[inline]
@@ -5028,7 +3003,7 @@ pub unsafe fn lasx_xvstelm_b<const IMM_S8: i32, const IMM4: u32>(a: m256i, mem_a
 pub unsafe fn lasx_xvstelm_h<const IMM_S8: i32, const IMM3: u32>(a: m256i, mem_addr: *mut i8) {
     static_assert_simm_bits!(IMM_S8, 8);
     static_assert_uimm_bits!(IMM3, 3);
-    transmute(__lasx_xvstelm_h(transmute(a), mem_addr, IMM_S8, IMM3))
+    __lasx_xvstelm_h(transmute(a), mem_addr, IMM_S8, IMM3)
 }
 
 #[inline]
@@ -5038,7 +3013,7 @@ pub unsafe fn lasx_xvstelm_h<const IMM_S8: i32, const IMM3: u32>(a: m256i, mem_a
 pub unsafe fn lasx_xvstelm_w<const IMM_S8: i32, const IMM2: u32>(a: m256i, mem_addr: *mut i8) {
     static_assert_simm_bits!(IMM_S8, 8);
     static_assert_uimm_bits!(IMM2, 2);
-    transmute(__lasx_xvstelm_w(transmute(a), mem_addr, IMM_S8, IMM2))
+    __lasx_xvstelm_w(transmute(a), mem_addr, IMM_S8, IMM2)
 }
 
 #[inline]
@@ -5048,7 +3023,7 @@ pub unsafe fn lasx_xvstelm_w<const IMM_S8: i32, const IMM2: u32>(a: m256i, mem_a
 pub unsafe fn lasx_xvstelm_d<const IMM_S8: i32, const IMM1: u32>(a: m256i, mem_addr: *mut i8) {
     static_assert_simm_bits!(IMM_S8, 8);
     static_assert_uimm_bits!(IMM1, 1);
-    transmute(__lasx_xvstelm_d(transmute(a), mem_addr, IMM_S8, IMM1))
+    __lasx_xvstelm_d(transmute(a), mem_addr, IMM_S8, IMM1)
 }
 
 #[inline]
@@ -5129,13 +3104,6 @@ pub fn lasx_xvssrln_w_d(a: m256i, b: m256i) -> m256i {
     unsafe { transmute(__lasx_xvssrln_w_d(transmute(a), transmute(b))) }
 }
 
-#[inline]
-#[target_feature(enable = "lasx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvorn_v(a: m256i, b: m256i) -> m256i {
-    unsafe { transmute(__lasx_xvorn_v(transmute(a), transmute(b))) }
-}
-
 #[inline]
 #[target_feature(enable = "lasx")]
 #[rustc_legacy_const_generics(0)]
@@ -5156,7 +3124,7 @@ pub unsafe fn lasx_xvldx(mem_addr: *const i8, b: i64) -> m256i {
 #[target_feature(enable = "lasx")]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
 pub unsafe fn lasx_xvstx(a: m256i, mem_addr: *mut i8, b: i64) {
-    transmute(__lasx_xvstx(transmute(a), mem_addr, transmute(b)))
+    __lasx_xvstx(transmute(a), mem_addr, transmute(b))
 }
 
 #[inline]
@@ -5166,24 +3134,6 @@ pub fn lasx_xvextl_qu_du(a: m256i) -> m256i {
     unsafe { transmute(__lasx_xvextl_qu_du(transmute(a))) }
 }
 
-#[inline]
-#[target_feature(enable = "lasx")]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvinsgr2vr_w<const IMM3: u32>(a: m256i, b: i32) -> m256i {
-    static_assert_uimm_bits!(IMM3, 3);
-    unsafe { transmute(__lasx_xvinsgr2vr_w(transmute(a), transmute(b), IMM3)) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvinsgr2vr_d<const IMM2: u32>(a: m256i, b: i64) -> m256i {
-    static_assert_uimm_bits!(IMM2, 2);
-    unsafe { transmute(__lasx_xvinsgr2vr_d(transmute(a), transmute(b), IMM2)) }
-}
-
 #[inline]
 #[target_feature(enable = "lasx")]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
@@ -5364,42 +3314,6 @@ pub unsafe fn lasx_xvldrepl_d<const IMM_S9: i32>(mem_addr: *const i8) -> m256i {
     transmute(__lasx_xvldrepl_d(mem_addr, IMM_S9))
 }
 
-#[inline]
-#[target_feature(enable = "lasx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvpickve2gr_w<const IMM3: u32>(a: m256i) -> i32 {
-    static_assert_uimm_bits!(IMM3, 3);
-    unsafe { transmute(__lasx_xvpickve2gr_w(transmute(a), IMM3)) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvpickve2gr_wu<const IMM3: u32>(a: m256i) -> u32 {
-    static_assert_uimm_bits!(IMM3, 3);
-    unsafe { transmute(__lasx_xvpickve2gr_wu(transmute(a), IMM3)) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvpickve2gr_d<const IMM2: u32>(a: m256i) -> i64 {
-    static_assert_uimm_bits!(IMM2, 2);
-    unsafe { transmute(__lasx_xvpickve2gr_d(transmute(a), IMM2)) }
-}
-
-#[inline]
-#[target_feature(enable = "lasx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvpickve2gr_du<const IMM2: u32>(a: m256i) -> u64 {
-    static_assert_uimm_bits!(IMM2, 2);
-    unsafe { transmute(__lasx_xvpickve2gr_du(transmute(a), IMM2)) }
-}
-
 #[inline]
 #[target_feature(enable = "lasx")]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
@@ -7029,36 +4943,126 @@ pub fn lasx_xvpickve_w_f<const IMM3: u32>(a: m256) -> m256 {
 
 #[inline]
 #[target_feature(enable = "lasx")]
-#[rustc_legacy_const_generics(0)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvrepli_b<const IMM_S10: i32>() -> m256i {
-    static_assert_simm_bits!(IMM_S10, 10);
-    unsafe { transmute(__lasx_xvrepli_b(IMM_S10)) }
+pub fn lasx_cast_128_s(a: m128) -> m256 {
+    unsafe { transmute(__lasx_cast_128_s(transmute(a))) }
 }
 
 #[inline]
 #[target_feature(enable = "lasx")]
-#[rustc_legacy_const_generics(0)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvrepli_d<const IMM_S10: i32>() -> m256i {
-    static_assert_simm_bits!(IMM_S10, 10);
-    unsafe { transmute(__lasx_xvrepli_d(IMM_S10)) }
+pub fn lasx_cast_128_d(a: m128d) -> m256d {
+    unsafe { transmute(__lasx_cast_128_d(transmute(a))) }
 }
 
 #[inline]
 #[target_feature(enable = "lasx")]
-#[rustc_legacy_const_generics(0)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvrepli_h<const IMM_S10: i32>() -> m256i {
-    static_assert_simm_bits!(IMM_S10, 10);
-    unsafe { transmute(__lasx_xvrepli_h(IMM_S10)) }
+pub fn lasx_cast_128(a: m128i) -> m256i {
+    unsafe { transmute(__lasx_cast_128(transmute(a))) }
 }
 
 #[inline]
 #[target_feature(enable = "lasx")]
-#[rustc_legacy_const_generics(0)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lasx_xvrepli_w<const IMM_S10: i32>() -> m256i {
-    static_assert_simm_bits!(IMM_S10, 10);
-    unsafe { transmute(__lasx_xvrepli_w(IMM_S10)) }
+pub fn lasx_concat_128_s(a: m128, b: m128) -> m256 {
+    unsafe { transmute(__lasx_concat_128_s(transmute(a), transmute(b))) }
+}
+
+#[inline]
+#[target_feature(enable = "lasx")]
+#[unstable(feature = "stdarch_loongarch", issue = "117427")]
+pub fn lasx_concat_128_d(a: m128d, b: m128d) -> m256d {
+    unsafe { transmute(__lasx_concat_128_d(transmute(a), transmute(b))) }
+}
+
+#[inline]
+#[target_feature(enable = "lasx")]
+#[unstable(feature = "stdarch_loongarch", issue = "117427")]
+pub fn lasx_concat_128(a: m128i, b: m128i) -> m256i {
+    unsafe { transmute(__lasx_concat_128(transmute(a), transmute(b))) }
+}
+
+#[inline]
+#[target_feature(enable = "lasx")]
+#[unstable(feature = "stdarch_loongarch", issue = "117427")]
+pub fn lasx_extract_128_lo_s(a: m256) -> m128 {
+    unsafe { transmute(__lasx_extract_128_lo_s(transmute(a))) }
+}
+
+#[inline]
+#[target_feature(enable = "lasx")]
+#[unstable(feature = "stdarch_loongarch", issue = "117427")]
+pub fn lasx_extract_128_hi_s(a: m256) -> m128 {
+    unsafe { transmute(__lasx_extract_128_hi_s(transmute(a))) }
+}
+
+#[inline]
+#[target_feature(enable = "lasx")]
+#[unstable(feature = "stdarch_loongarch", issue = "117427")]
+pub fn lasx_extract_128_lo_d(a: m256d) -> m128d {
+    unsafe { transmute(__lasx_extract_128_lo_d(transmute(a))) }
+}
+
+#[inline]
+#[target_feature(enable = "lasx")]
+#[unstable(feature = "stdarch_loongarch", issue = "117427")]
+pub fn lasx_extract_128_hi_d(a: m256d) -> m128d {
+    unsafe { transmute(__lasx_extract_128_hi_d(transmute(a))) }
+}
+
+#[inline]
+#[target_feature(enable = "lasx")]
+#[unstable(feature = "stdarch_loongarch", issue = "117427")]
+pub fn lasx_extract_128_lo(a: m256i) -> m128i {
+    unsafe { transmute(__lasx_extract_128_lo(transmute(a))) }
+}
+
+#[inline]
+#[target_feature(enable = "lasx")]
+#[unstable(feature = "stdarch_loongarch", issue = "117427")]
+pub fn lasx_extract_128_hi(a: m256i) -> m128i {
+    unsafe { transmute(__lasx_extract_128_hi(transmute(a))) }
+}
+
+#[inline]
+#[target_feature(enable = "lasx")]
+#[unstable(feature = "stdarch_loongarch", issue = "117427")]
+pub fn lasx_insert_128_lo_s(a: m256, b: m128) -> m256 {
+    unsafe { transmute(__lasx_insert_128_lo_s(transmute(a), transmute(b))) }
+}
+
+#[inline]
+#[target_feature(enable = "lasx")]
+#[unstable(feature = "stdarch_loongarch", issue = "117427")]
+pub fn lasx_insert_128_hi_s(a: m256, b: m128) -> m256 {
+    unsafe { transmute(__lasx_insert_128_hi_s(transmute(a), transmute(b))) }
+}
+
+#[inline]
+#[target_feature(enable = "lasx")]
+#[unstable(feature = "stdarch_loongarch", issue = "117427")]
+pub fn lasx_insert_128_lo_d(a: m256d, b: m128d) -> m256d {
+    unsafe { transmute(__lasx_insert_128_lo_d(transmute(a), transmute(b))) }
+}
+
+#[inline]
+#[target_feature(enable = "lasx")]
+#[unstable(feature = "stdarch_loongarch", issue = "117427")]
+pub fn lasx_insert_128_hi_d(a: m256d, b: m128d) -> m256d {
+    unsafe { transmute(__lasx_insert_128_hi_d(transmute(a), transmute(b))) }
+}
+
+#[inline]
+#[target_feature(enable = "lasx")]
+#[unstable(feature = "stdarch_loongarch", issue = "117427")]
+pub fn lasx_insert_128_lo(a: m256i, b: m128i) -> m256i {
+    unsafe { transmute(__lasx_insert_128_lo(transmute(a), transmute(b))) }
+}
+
+#[inline]
+#[target_feature(enable = "lasx")]
+#[unstable(feature = "stdarch_loongarch", issue = "117427")]
+pub fn lasx_insert_128_hi(a: m256i, b: m128i) -> m256i {
+    unsafe { transmute(__lasx_insert_128_hi(transmute(a), transmute(b))) }
 }
diff --git a/crates/core_arch/src/loongarch64/lasx/mod.rs b/crates/core_arch/src/loongarch64/lasx/mod.rs
index c3a244e740..cc449e9492 100644
--- a/crates/core_arch/src/loongarch64/lasx/mod.rs
+++ b/crates/core_arch/src/loongarch64/lasx/mod.rs
@@ -16,6 +16,13 @@ mod generated;
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
 pub use self::generated::*;
 
+#[rustfmt::skip]
+mod portable;
+
+#[rustfmt::skip]
+#[unstable(feature = "stdarch_loongarch", issue = "117427")]
+pub use self::portable::*;
+
 #[rustfmt::skip]
 #[cfg(test)]
 mod tests;
diff --git a/crates/core_arch/src/loongarch64/lasx/portable.rs b/crates/core_arch/src/loongarch64/lasx/portable.rs
new file mode 100644
index 0000000000..8953558210
--- /dev/null
+++ b/crates/core_arch/src/loongarch64/lasx/portable.rs
@@ -0,0 +1,241 @@
+//! LoongArch64 LASX intrinsics - intrinsics::simd implementation
+
+use super::super::{simd as ls, simd::*, *};
+use crate::core_arch::simd::{self as cs, *};
+use crate::intrinsics::simd as is;
+use crate::mem::transmute;
+
+impl_vv!("lasx", lasx_xvpcnt_b, is::simd_ctpop, m256i, i8x32);
+impl_vv!("lasx", lasx_xvpcnt_h, is::simd_ctpop, m256i, i16x16);
+impl_vv!("lasx", lasx_xvpcnt_w, is::simd_ctpop, m256i, i32x8);
+impl_vv!("lasx", lasx_xvpcnt_d, is::simd_ctpop, m256i, i64x4);
+impl_vv!("lasx", lasx_xvclz_b, is::simd_ctlz, m256i, i8x32);
+impl_vv!("lasx", lasx_xvclz_h, is::simd_ctlz, m256i, i16x16);
+impl_vv!("lasx", lasx_xvclz_w, is::simd_ctlz, m256i, i32x8);
+impl_vv!("lasx", lasx_xvclz_d, is::simd_ctlz, m256i, i64x4);
+impl_vv!("lasx", lasx_xvneg_b, is::simd_neg, m256i, i8x32);
+impl_vv!("lasx", lasx_xvneg_h, is::simd_neg, m256i, i16x16);
+impl_vv!("lasx", lasx_xvneg_w, is::simd_neg, m256i, i32x8);
+impl_vv!("lasx", lasx_xvneg_d, is::simd_neg, m256i, i64x4);
+impl_vv!("lasx", lasx_xvfsqrt_s, is::simd_fsqrt, m256, f32x8);
+impl_vv!("lasx", lasx_xvfsqrt_d, is::simd_fsqrt, m256d, f64x4);
+
+impl_gv!("lasx", lasx_xvreplgr2vr_b, ls::simd_splat, m256i, i8x32, i32);
+impl_gv!("lasx", lasx_xvreplgr2vr_h, ls::simd_splat, m256i, i16x16, i32);
+impl_gv!("lasx", lasx_xvreplgr2vr_w, ls::simd_splat, m256i, i32x8, i32);
+impl_gv!("lasx", lasx_xvreplgr2vr_d, ls::simd_splat, m256i, i64x4, i64);
+
+impl_sv!("lasx", lasx_xvrepli_b, ls::simd_splat, m256i, i8x32, 10);
+impl_sv!("lasx", lasx_xvrepli_h, ls::simd_splat, m256i, i16x16, 10);
+impl_sv!("lasx", lasx_xvrepli_w, ls::simd_splat, m256i, i32x8, 10);
+impl_sv!("lasx", lasx_xvrepli_d, ls::simd_splat, m256i, i64x4, 10);
+
+impl_vvv!("lasx", lasx_xvadd_b, is::simd_add, m256i, i8x32);
+impl_vvv!("lasx", lasx_xvadd_h, is::simd_add, m256i, i16x16);
+impl_vvv!("lasx", lasx_xvadd_w, is::simd_add, m256i, i32x8);
+impl_vvv!("lasx", lasx_xvadd_d, is::simd_add, m256i, i64x4);
+impl_vvv!("lasx", lasx_xvsub_b, is::simd_sub, m256i, i8x32);
+impl_vvv!("lasx", lasx_xvsub_h, is::simd_sub, m256i, i16x16);
+impl_vvv!("lasx", lasx_xvsub_w, is::simd_sub, m256i, i32x8);
+impl_vvv!("lasx", lasx_xvsub_d, is::simd_sub, m256i, i64x4);
+impl_vvv!("lasx", lasx_xvmax_b, cs::simd_imax, m256i, i8x32);
+impl_vvv!("lasx", lasx_xvmax_h, cs::simd_imax, m256i, i16x16);
+impl_vvv!("lasx", lasx_xvmax_w, cs::simd_imax, m256i, i32x8);
+impl_vvv!("lasx", lasx_xvmax_d, cs::simd_imax, m256i, i64x4);
+impl_vvv!("lasx", lasx_xvmax_bu, cs::simd_imax, m256i, u8x32);
+impl_vvv!("lasx", lasx_xvmax_hu, cs::simd_imax, m256i, u16x16);
+impl_vvv!("lasx", lasx_xvmax_wu, cs::simd_imax, m256i, u32x8);
+impl_vvv!("lasx", lasx_xvmax_du, cs::simd_imax, m256i, u64x4);
+impl_vvv!("lasx", lasx_xvmin_b, cs::simd_imin, m256i, i8x32);
+impl_vvv!("lasx", lasx_xvmin_h, cs::simd_imin, m256i, i16x16);
+impl_vvv!("lasx", lasx_xvmin_w, cs::simd_imin, m256i, i32x8);
+impl_vvv!("lasx", lasx_xvmin_d, cs::simd_imin, m256i, i64x4);
+impl_vvv!("lasx", lasx_xvmin_bu, cs::simd_imin, m256i, u8x32);
+impl_vvv!("lasx", lasx_xvmin_hu, cs::simd_imin, m256i, u16x16);
+impl_vvv!("lasx", lasx_xvmin_wu, cs::simd_imin, m256i, u32x8);
+impl_vvv!("lasx", lasx_xvmin_du, cs::simd_imin, m256i, u64x4);
+impl_vvv!("lasx", lasx_xvseq_b, is::simd_eq, m256i, i8x32);
+impl_vvv!("lasx", lasx_xvseq_h, is::simd_eq, m256i, i16x16);
+impl_vvv!("lasx", lasx_xvseq_w, is::simd_eq, m256i, i32x8);
+impl_vvv!("lasx", lasx_xvseq_d, is::simd_eq, m256i, i64x4);
+impl_vvv!("lasx", lasx_xvslt_b, is::simd_lt, m256i, i8x32);
+impl_vvv!("lasx", lasx_xvslt_h, is::simd_lt, m256i, i16x16);
+impl_vvv!("lasx", lasx_xvslt_w, is::simd_lt, m256i, i32x8);
+impl_vvv!("lasx", lasx_xvslt_d, is::simd_lt, m256i, i64x4);
+impl_vvv!("lasx", lasx_xvslt_bu, is::simd_lt, m256i, u8x32);
+impl_vvv!("lasx", lasx_xvslt_hu, is::simd_lt, m256i, u16x16);
+impl_vvv!("lasx", lasx_xvslt_wu, is::simd_lt, m256i, u32x8);
+impl_vvv!("lasx", lasx_xvslt_du, is::simd_lt, m256i, u64x4);
+impl_vvv!("lasx", lasx_xvsle_b, is::simd_le, m256i, i8x32);
+impl_vvv!("lasx", lasx_xvsle_h, is::simd_le, m256i, i16x16);
+impl_vvv!("lasx", lasx_xvsle_w, is::simd_le, m256i, i32x8);
+impl_vvv!("lasx", lasx_xvsle_d, is::simd_le, m256i, i64x4);
+impl_vvv!("lasx", lasx_xvsle_bu, is::simd_le, m256i, u8x32);
+impl_vvv!("lasx", lasx_xvsle_hu, is::simd_le, m256i, u16x16);
+impl_vvv!("lasx", lasx_xvsle_wu, is::simd_le, m256i, u32x8);
+impl_vvv!("lasx", lasx_xvsle_du, is::simd_le, m256i, u64x4);
+impl_vvv!("lasx", lasx_xvmul_b, is::simd_mul, m256i, i8x32);
+impl_vvv!("lasx", lasx_xvmul_h, is::simd_mul, m256i, i16x16);
+impl_vvv!("lasx", lasx_xvmul_w, is::simd_mul, m256i, i32x8);
+impl_vvv!("lasx", lasx_xvmul_d, is::simd_mul, m256i, i64x4);
+impl_vvv!("lasx", lasx_xvdiv_b, is::simd_div, m256i, i8x32);
+impl_vvv!("lasx", lasx_xvdiv_h, is::simd_div, m256i, i16x16);
+impl_vvv!("lasx", lasx_xvdiv_w, is::simd_div, m256i, i32x8);
+impl_vvv!("lasx", lasx_xvdiv_d, is::simd_div, m256i, i64x4);
+impl_vvv!("lasx", lasx_xvdiv_bu, is::simd_div, m256i, u8x32);
+impl_vvv!("lasx", lasx_xvdiv_hu, is::simd_div, m256i, u16x16);
+impl_vvv!("lasx", lasx_xvdiv_wu, is::simd_div, m256i, u32x8);
+impl_vvv!("lasx", lasx_xvdiv_du, is::simd_div, m256i, u64x4);
+impl_vvv!("lasx", lasx_xvmod_b, is::simd_rem, m256i, i8x32);
+impl_vvv!("lasx", lasx_xvmod_h, is::simd_rem, m256i, i16x16);
+impl_vvv!("lasx", lasx_xvmod_w, is::simd_rem, m256i, i32x8);
+impl_vvv!("lasx", lasx_xvmod_d, is::simd_rem, m256i, i64x4);
+impl_vvv!("lasx", lasx_xvmod_bu, is::simd_rem, m256i, u8x32);
+impl_vvv!("lasx", lasx_xvmod_hu, is::simd_rem, m256i, u16x16);
+impl_vvv!("lasx", lasx_xvmod_wu, is::simd_rem, m256i, u32x8);
+impl_vvv!("lasx", lasx_xvmod_du, is::simd_rem, m256i, u64x4);
+impl_vvv!("lasx", lasx_xvand_v, is::simd_and, m256i, u8x32);
+impl_vvv!("lasx", lasx_xvandn_v, ls::simd_andn, m256i, u8x32);
+impl_vvv!("lasx", lasx_xvor_v, is::simd_or, m256i, u8x32);
+impl_vvv!("lasx", lasx_xvorn_v, ls::simd_orn, m256i, u8x32);
+impl_vvv!("lasx", lasx_xvnor_v, ls::simd_nor, m256i, u8x32);
+impl_vvv!("lasx", lasx_xvxor_v, is::simd_xor, m256i, u8x32);
+impl_vvv!("lasx", lasx_xvfadd_s, is::simd_add, m256, f32x8);
+impl_vvv!("lasx", lasx_xvfadd_d, is::simd_add, m256d, f64x4);
+impl_vvv!("lasx", lasx_xvfsub_s, is::simd_sub, m256, f32x8);
+impl_vvv!("lasx", lasx_xvfsub_d, is::simd_sub, m256d, f64x4);
+impl_vvv!("lasx", lasx_xvfmul_s, is::simd_mul, m256, f32x8);
+impl_vvv!("lasx", lasx_xvfmul_d, is::simd_mul, m256d, f64x4);
+impl_vvv!("lasx", lasx_xvfdiv_s, is::simd_div, m256, f32x8);
+impl_vvv!("lasx", lasx_xvfdiv_d, is::simd_div, m256d, f64x4);
+impl_vvv!("lasx", lasx_xvsll_b, ls::simd_shl, m256i, i8x32);
+impl_vvv!("lasx", lasx_xvsll_h, ls::simd_shl, m256i, i16x16);
+impl_vvv!("lasx", lasx_xvsll_w, ls::simd_shl, m256i, i32x8);
+impl_vvv!("lasx", lasx_xvsll_d, ls::simd_shl, m256i, i64x4);
+impl_vvv!("lasx", lasx_xvsra_b, ls::simd_shr, m256i, i8x32);
+impl_vvv!("lasx", lasx_xvsra_h, ls::simd_shr, m256i, i16x16);
+impl_vvv!("lasx", lasx_xvsra_w, ls::simd_shr, m256i, i32x8);
+impl_vvv!("lasx", lasx_xvsra_d, ls::simd_shr, m256i, i64x4);
+impl_vvv!("lasx", lasx_xvsrl_b, ls::simd_shr, m256i, u8x32);
+impl_vvv!("lasx", lasx_xvsrl_h, ls::simd_shr, m256i, u16x16);
+impl_vvv!("lasx", lasx_xvsrl_w, ls::simd_shr, m256i, u32x8);
+impl_vvv!("lasx", lasx_xvsrl_d, ls::simd_shr, m256i, u64x4);
+impl_vvv!("lasx", lasx_xvbitclr_b, ls::simd_bitclr, m256i, u8x32);
+impl_vvv!("lasx", lasx_xvbitclr_h, ls::simd_bitclr, m256i, u16x16);
+impl_vvv!("lasx", lasx_xvbitclr_w, ls::simd_bitclr, m256i, u32x8);
+impl_vvv!("lasx", lasx_xvbitclr_d, ls::simd_bitclr, m256i, u64x4);
+impl_vvv!("lasx", lasx_xvbitset_b, ls::simd_bitset, m256i, u8x32);
+impl_vvv!("lasx", lasx_xvbitset_h, ls::simd_bitset, m256i, u16x16);
+impl_vvv!("lasx", lasx_xvbitset_w, ls::simd_bitset, m256i, u32x8);
+impl_vvv!("lasx", lasx_xvbitset_d, ls::simd_bitset, m256i, u64x4);
+impl_vvv!("lasx", lasx_xvbitrev_b, ls::simd_bitrev, m256i, u8x32);
+impl_vvv!("lasx", lasx_xvbitrev_h, ls::simd_bitrev, m256i, u16x16);
+impl_vvv!("lasx", lasx_xvbitrev_w, ls::simd_bitrev, m256i, u32x8);
+impl_vvv!("lasx", lasx_xvbitrev_d, ls::simd_bitrev, m256i, u64x4);
+impl_vvv!("lasx", lasx_xvsadd_b, is::simd_saturating_add, m256i, i8x32);
+impl_vvv!("lasx", lasx_xvsadd_h, is::simd_saturating_add, m256i, i16x16);
+impl_vvv!("lasx", lasx_xvsadd_w, is::simd_saturating_add, m256i, i32x8);
+impl_vvv!("lasx", lasx_xvsadd_d, is::simd_saturating_add, m256i, i64x4);
+impl_vvv!("lasx", lasx_xvsadd_bu, is::simd_saturating_add, m256i, u8x32);
+impl_vvv!("lasx", lasx_xvsadd_hu, is::simd_saturating_add, m256i, u16x16);
+impl_vvv!("lasx", lasx_xvsadd_wu, is::simd_saturating_add, m256i, u32x8);
+impl_vvv!("lasx", lasx_xvsadd_du, is::simd_saturating_add, m256i, u64x4);
+impl_vvv!("lasx", lasx_xvssub_b, is::simd_saturating_sub, m256i, i8x32);
+impl_vvv!("lasx", lasx_xvssub_h, is::simd_saturating_sub, m256i, i16x16);
+impl_vvv!("lasx", lasx_xvssub_w, is::simd_saturating_sub, m256i, i32x8);
+impl_vvv!("lasx", lasx_xvssub_d, is::simd_saturating_sub, m256i, i64x4);
+impl_vvv!("lasx", lasx_xvssub_bu, is::simd_saturating_sub, m256i, u8x32);
+impl_vvv!("lasx", lasx_xvssub_hu, is::simd_saturating_sub, m256i, u16x16);
+impl_vvv!("lasx", lasx_xvssub_wu, is::simd_saturating_sub, m256i, u32x8);
+impl_vvv!("lasx", lasx_xvssub_du, is::simd_saturating_sub, m256i, u64x4);
+impl_vvv!("lasx", lasx_xvadda_b, ls::simd_adda, m256i, i8x32);
+impl_vvv!("lasx", lasx_xvadda_h, ls::simd_adda, m256i, i16x16);
+impl_vvv!("lasx", lasx_xvadda_w, ls::simd_adda, m256i, i32x8);
+impl_vvv!("lasx", lasx_xvadda_d, ls::simd_adda, m256i, i64x4);
+impl_vvv!("lasx", lasx_xvabsd_b, ls::simd_absd, m256i, i8x32);
+impl_vvv!("lasx", lasx_xvabsd_h, ls::simd_absd, m256i, i16x16);
+impl_vvv!("lasx", lasx_xvabsd_w, ls::simd_absd, m256i, i32x8);
+impl_vvv!("lasx", lasx_xvabsd_d, ls::simd_absd, m256i, i64x4);
+impl_vvv!("lasx", lasx_xvabsd_bu, ls::simd_absd, m256i, u8x32);
+impl_vvv!("lasx", lasx_xvabsd_hu, ls::simd_absd, m256i, u16x16);
+impl_vvv!("lasx", lasx_xvabsd_wu, ls::simd_absd, m256i, u32x8);
+impl_vvv!("lasx", lasx_xvabsd_du, ls::simd_absd, m256i, u64x4);
+
+impl_vuv!("lasx", lasx_xvslli_b, is::simd_shl, m256i, i8x32);
+impl_vuv!("lasx", lasx_xvslli_h, is::simd_shl, m256i, i16x16);
+impl_vuv!("lasx", lasx_xvslli_w, is::simd_shl, m256i, i32x8);
+impl_vuv!("lasx", lasx_xvslli_d, is::simd_shl, m256i, i64x4);
+impl_vuv!("lasx", lasx_xvsrai_b, is::simd_shr, m256i, i8x32);
+impl_vuv!("lasx", lasx_xvsrai_h, is::simd_shr, m256i, i16x16);
+impl_vuv!("lasx", lasx_xvsrai_w, is::simd_shr, m256i, i32x8);
+impl_vuv!("lasx", lasx_xvsrai_d, is::simd_shr, m256i, i64x4);
+impl_vuv!("lasx", lasx_xvsrli_b, is::simd_shr, m256i, u8x32);
+impl_vuv!("lasx", lasx_xvsrli_h, is::simd_shr, m256i, u16x16);
+impl_vuv!("lasx", lasx_xvsrli_w, is::simd_shr, m256i, u32x8);
+impl_vuv!("lasx", lasx_xvsrli_d, is::simd_shr, m256i, u64x4);
+impl_vuv!("lasx", lasx_xvaddi_bu, is::simd_add, m256i, u8x32, 5);
+impl_vuv!("lasx", lasx_xvaddi_hu, is::simd_add, m256i, u16x16, 5);
+impl_vuv!("lasx", lasx_xvaddi_wu, is::simd_add, m256i, u32x8, 5);
+impl_vuv!("lasx", lasx_xvaddi_du, is::simd_add, m256i, u64x4, 5);
+impl_vuv!("lasx", lasx_xvslti_bu, is::simd_lt, m256i, u8x32, 5);
+impl_vuv!("lasx", lasx_xvslti_hu, is::simd_lt, m256i, u16x16, 5);
+impl_vuv!("lasx", lasx_xvslti_wu, is::simd_lt, m256i, u32x8, 5);
+impl_vuv!("lasx", lasx_xvslti_du, is::simd_lt, m256i, u64x4, 5);
+impl_vuv!("lasx", lasx_xvslei_bu, is::simd_le, m256i, u8x32, 5);
+impl_vuv!("lasx", lasx_xvslei_hu, is::simd_le, m256i, u16x16, 5);
+impl_vuv!("lasx", lasx_xvslei_wu, is::simd_le, m256i, u32x8, 5);
+impl_vuv!("lasx", lasx_xvslei_du, is::simd_le, m256i, u64x4, 5);
+impl_vuv!("lasx", lasx_xvmaxi_bu, cs::simd_imax, m256i, u8x32, 5);
+impl_vuv!("lasx", lasx_xvmaxi_hu, cs::simd_imax, m256i, u16x16, 5);
+impl_vuv!("lasx", lasx_xvmaxi_wu, cs::simd_imax, m256i, u32x8, 5);
+impl_vuv!("lasx", lasx_xvmaxi_du, cs::simd_imax, m256i, u64x4, 5);
+impl_vuv!("lasx", lasx_xvmini_bu, cs::simd_imin, m256i, u8x32, 5);
+impl_vuv!("lasx", lasx_xvmini_hu, cs::simd_imin, m256i, u16x16, 5);
+impl_vuv!("lasx", lasx_xvmini_wu, cs::simd_imin, m256i, u32x8, 5);
+impl_vuv!("lasx", lasx_xvmini_du, cs::simd_imin, m256i, u64x4, 5);
+
+impl_vug!("lasx", lasx_xvpickve2gr_w, is::simd_extract, m256i, i32x8, i32, 3);
+impl_vug!("lasx", lasx_xvpickve2gr_d, is::simd_extract, m256i, i64x4, i64, 2);
+impl_vug!("lasx", lasx_xvpickve2gr_wu, is::simd_extract, m256i, u32x8, u32, 3);
+impl_vug!("lasx", lasx_xvpickve2gr_du, is::simd_extract, m256i, u64x4, u64, 2);
+
+impl_vsv!("lasx", lasx_xvseqi_b, is::simd_eq, m256i, i8x32, 5);
+impl_vsv!("lasx", lasx_xvseqi_h, is::simd_eq, m256i, i16x16, 5);
+impl_vsv!("lasx", lasx_xvseqi_w, is::simd_eq, m256i, i32x8, 5);
+impl_vsv!("lasx", lasx_xvseqi_d, is::simd_eq, m256i, i64x4, 5);
+impl_vsv!("lasx", lasx_xvslti_b, is::simd_lt, m256i, i8x32, 5);
+impl_vsv!("lasx", lasx_xvslti_h, is::simd_lt, m256i, i16x16, 5);
+impl_vsv!("lasx", lasx_xvslti_w, is::simd_lt, m256i, i32x8, 5);
+impl_vsv!("lasx", lasx_xvslti_d, is::simd_lt, m256i, i64x4, 5);
+impl_vsv!("lasx", lasx_xvslei_b, is::simd_le, m256i, i8x32, 5);
+impl_vsv!("lasx", lasx_xvslei_h, is::simd_le, m256i, i16x16, 5);
+impl_vsv!("lasx", lasx_xvslei_w, is::simd_le, m256i, i32x8, 5);
+impl_vsv!("lasx", lasx_xvslei_d, is::simd_le, m256i, i64x4, 5);
+impl_vsv!("lasx", lasx_xvmaxi_b, cs::simd_imax, m256i, i8x32, 5);
+impl_vsv!("lasx", lasx_xvmaxi_h, cs::simd_imax, m256i, i16x16, 5);
+impl_vsv!("lasx", lasx_xvmaxi_w, cs::simd_imax, m256i, i32x8, 5);
+impl_vsv!("lasx", lasx_xvmaxi_d, cs::simd_imax, m256i, i64x4, 5);
+impl_vsv!("lasx", lasx_xvmini_b, cs::simd_imin, m256i, i8x32, 5);
+impl_vsv!("lasx", lasx_xvmini_h, cs::simd_imin, m256i, i16x16, 5);
+impl_vsv!("lasx", lasx_xvmini_w, cs::simd_imin, m256i, i32x8, 5);
+impl_vsv!("lasx", lasx_xvmini_d, cs::simd_imin, m256i, i64x4, 5);
+
+impl_vvvv!("lasx", lasx_xvmadd_b, ls::simd_madd, m256i, i8x32);
+impl_vvvv!("lasx", lasx_xvmadd_h, ls::simd_madd, m256i, i16x16);
+impl_vvvv!("lasx", lasx_xvmadd_w, ls::simd_madd, m256i, i32x8);
+impl_vvvv!("lasx", lasx_xvmadd_d, ls::simd_madd, m256i, i64x4);
+impl_vvvv!("lasx", lasx_xvmsub_b, ls::simd_msub, m256i, i8x32);
+impl_vvvv!("lasx", lasx_xvmsub_h, ls::simd_msub, m256i, i16x16);
+impl_vvvv!("lasx", lasx_xvmsub_w, ls::simd_msub, m256i, i32x8);
+impl_vvvv!("lasx", lasx_xvmsub_d, ls::simd_msub, m256i, i64x4);
+impl_vvvv!("lasx", lasx_xvfmadd_s, is::simd_fma, m256, f32x8);
+impl_vvvv!("lasx", lasx_xvfmadd_d, is::simd_fma, m256d, f64x4);
+impl_vvvv!("lasx", lasx_xvfmsub_s, ls::simd_fmsub, m256, f32x8);
+impl_vvvv!("lasx", lasx_xvfmsub_d, ls::simd_fmsub, m256d, f64x4);
+impl_vvvv!("lasx", lasx_xvfnmadd_s, ls::simd_fnmadd, m256, f32x8);
+impl_vvvv!("lasx", lasx_xvfnmadd_d, ls::simd_fnmadd, m256d, f64x4);
+impl_vvvv!("lasx", lasx_xvfnmsub_s, ls::simd_fnmsub, m256, f32x8);
+impl_vvvv!("lasx", lasx_xvfnmsub_d, ls::simd_fnmsub, m256d, f64x4);
+
+impl_vugv!("lasx", lasx_xvinsgr2vr_w, is::simd_insert, m256i, i32x8, i32, 3);
+impl_vugv!("lasx", lasx_xvinsgr2vr_d, is::simd_insert, m256i, i64x4, i64, 2);
diff --git a/crates/core_arch/src/loongarch64/lasx/tests.rs b/crates/core_arch/src/loongarch64/lasx/tests.rs
index 54771d7b51..bd22d25771 100644
--- a/crates/core_arch/src/loongarch64/lasx/tests.rs
+++ b/crates/core_arch/src/loongarch64/lasx/tests.rs
@@ -5,6 +5,7 @@ use crate::{
     core_arch::{loongarch64::*, simd::*},
     mem::transmute,
 };
+use std::hint::black_box;
 use stdarch_test::simd_test;
 
 #[simd_test(enable = "lasx")]
@@ -24,7 +25,13 @@ unsafe fn test_lasx_xvsll_b() {
         2882304449461665880,
     );
 
-    assert_eq!(r, transmute(lasx_xvsll_b(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvsll_b(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -44,7 +51,13 @@ unsafe fn test_lasx_xvsll_h() {
         7061899947028838480,
     );
 
-    assert_eq!(r, transmute(lasx_xvsll_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvsll_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -76,7 +89,13 @@ unsafe fn test_lasx_xvsll_w() {
         3598939055443673088,
     );
 
-    assert_eq!(r, transmute(lasx_xvsll_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvsll_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -100,7 +119,13 @@ unsafe fn test_lasx_xvsll_d() {
         -289787284616642560,
     );
 
-    assert_eq!(r, transmute(lasx_xvsll_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvsll_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -116,7 +141,7 @@ unsafe fn test_lasx_xvslli_b() {
         5775955139904200724,
     );
 
-    assert_eq!(r, transmute(lasx_xvslli_b::<2>(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvslli_b::<2>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -132,7 +157,7 @@ unsafe fn test_lasx_xvslli_h() {
         -9223160928474759168,
     );
 
-    assert_eq!(r, transmute(lasx_xvslli_h::<14>(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvslli_h::<14>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -154,7 +179,7 @@ unsafe fn test_lasx_xvslli_w() {
         -1585267064908546048,
     );
 
-    assert_eq!(r, transmute(lasx_xvslli_w::<24>(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvslli_w::<24>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -172,7 +197,7 @@ unsafe fn test_lasx_xvslli_d() {
         -2305843009213693952,
     );
 
-    assert_eq!(r, transmute(lasx_xvslli_d::<61>(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvslli_d::<61>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -192,7 +217,13 @@ unsafe fn test_lasx_xvsra_b() {
         -505532365968836077,
     );
 
-    assert_eq!(r, transmute(lasx_xvsra_b(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvsra_b(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -212,7 +243,13 @@ unsafe fn test_lasx_xvsra_h() {
         8725659825471543,
     );
 
-    assert_eq!(r, transmute(lasx_xvsra_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvsra_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -244,7 +281,13 @@ unsafe fn test_lasx_xvsra_w() {
         -36696200575105,
     );
 
-    assert_eq!(r, transmute(lasx_xvsra_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvsra_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -263,7 +306,13 @@ unsafe fn test_lasx_xvsra_d() {
     );
     let r = i64x4::new(1, -129761412875, -1, 8464978396185);
 
-    assert_eq!(r, transmute(lasx_xvsra_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvsra_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -279,7 +328,7 @@ unsafe fn test_lasx_xvsrai_b() {
         -218421283493247239,
     );
 
-    assert_eq!(r, transmute(lasx_xvsrai_b::<4>(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvsrai_b::<4>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -290,7 +339,7 @@ unsafe fn test_lasx_xvsrai_h() {
     );
     let r = i64x4::new(-281474976710658, 8589803520, -4295098367, 562941363552256);
 
-    assert_eq!(r, transmute(lasx_xvsrai_h::<14>(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvsrai_h::<14>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -307,7 +356,7 @@ unsafe fn test_lasx_xvsrai_w() {
     );
     let r = i64x4::new(68719476730, -16, 17179869169, -25769803773);
 
-    assert_eq!(r, transmute(lasx_xvsrai_w::<27>(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvsrai_w::<27>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -320,7 +369,7 @@ unsafe fn test_lasx_xvsrai_d() {
     );
     let r = i64x4::new(-2, 2, -6, -8);
 
-    assert_eq!(r, transmute(lasx_xvsrai_d::<60>(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvsrai_d::<60>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -340,7 +389,13 @@ unsafe fn test_lasx_xvsrar_b() {
         302862676776648704,
     );
 
-    assert_eq!(r, transmute(lasx_xvsrar_b(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvsrar_b(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -360,7 +415,13 @@ unsafe fn test_lasx_xvsrar_h() {
         -2251658079567874,
     );
 
-    assert_eq!(r, transmute(lasx_xvsrar_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvsrar_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -392,7 +453,13 @@ unsafe fn test_lasx_xvsrar_w() {
         -1668156707832192,
     );
 
-    assert_eq!(r, transmute(lasx_xvsrar_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvsrar_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -411,7 +478,13 @@ unsafe fn test_lasx_xvsrar_d() {
     );
     let r = i64x4::new(19951225, 505, -1907248091287715676, 362);
 
-    assert_eq!(r, transmute(lasx_xvsrar_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvsrar_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -427,7 +500,7 @@ unsafe fn test_lasx_xvsrari_b() {
         790117907428411639,
     );
 
-    assert_eq!(r, transmute(lasx_xvsrari_b::<3>(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvsrari_b::<3>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -443,7 +516,7 @@ unsafe fn test_lasx_xvsrari_h() {
         -24488623625338826,
     );
 
-    assert_eq!(r, transmute(lasx_xvsrari_h::<8>(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvsrari_h::<8>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -460,7 +533,7 @@ unsafe fn test_lasx_xvsrari_w() {
     );
     let r = i64x4::new(-1, 4294967294, -2, -1);
 
-    assert_eq!(r, transmute(lasx_xvsrari_w::<29>(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvsrari_w::<29>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -473,7 +546,7 @@ unsafe fn test_lasx_xvsrari_d() {
     );
     let r = i64x4::new(-3228, 4782, -4328, -2120);
 
-    assert_eq!(r, transmute(lasx_xvsrari_d::<50>(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvsrari_d::<50>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -493,7 +566,13 @@ unsafe fn test_lasx_xvsrl_b() {
         3996105849293766692,
     );
 
-    assert_eq!(r, transmute(lasx_xvsrl_b(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvsrl_b(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -513,7 +592,13 @@ unsafe fn test_lasx_xvsrl_h() {
         12385032119328029,
     );
 
-    assert_eq!(r, transmute(lasx_xvsrl_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvsrl_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -540,7 +625,13 @@ unsafe fn test_lasx_xvsrl_w() {
     );
     let r = i64x4::new(3152506611213, 910538585043, 150899, 25769803779);
 
-    assert_eq!(r, transmute(lasx_xvsrl_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvsrl_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -559,7 +650,13 @@ unsafe fn test_lasx_xvsrl_d() {
     );
     let r = i64x4::new(22, 8215, 774027732, 338970735904462);
 
-    assert_eq!(r, transmute(lasx_xvsrl_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvsrl_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -575,7 +672,7 @@ unsafe fn test_lasx_xvsrli_b() {
         3694315145030590091,
     );
 
-    assert_eq!(r, transmute(lasx_xvsrli_b::<0>(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvsrli_b::<0>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -586,7 +683,7 @@ unsafe fn test_lasx_xvsrli_h() {
     );
     let r = i64x4::new(7036883009470493, 73014771737, 38655688722, 3096241924866048);
 
-    assert_eq!(r, transmute(lasx_xvsrli_h::<11>(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvsrli_h::<11>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -608,7 +705,7 @@ unsafe fn test_lasx_xvsrli_w() {
         11669426172998,
     );
 
-    assert_eq!(r, transmute(lasx_xvsrli_w::<17>(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvsrli_w::<17>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -621,7 +718,7 @@ unsafe fn test_lasx_xvsrli_d() {
     );
     let r = i64x4::new(16617962184, 1898365962, 5054169972, 27969530398);
 
-    assert_eq!(r, transmute(lasx_xvsrli_d::<29>(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvsrli_d::<29>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -641,7 +738,13 @@ unsafe fn test_lasx_xvsrlr_b() {
         150872911094481483,
     );
 
-    assert_eq!(r, transmute(lasx_xvsrlr_b(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvsrlr_b(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -661,7 +764,13 @@ unsafe fn test_lasx_xvsrlr_h() {
         565118914199555,
     );
 
-    assert_eq!(r, transmute(lasx_xvsrlr_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvsrlr_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -693,7 +802,13 @@ unsafe fn test_lasx_xvsrlr_w() {
         7085854838990307330,
     );
 
-    assert_eq!(r, transmute(lasx_xvsrlr_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvsrlr_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -712,7 +827,13 @@ unsafe fn test_lasx_xvsrlr_d() {
     );
     let r = i64x4::new(1801, 481878, 1923591164085, 6280495597);
 
-    assert_eq!(r, transmute(lasx_xvsrlr_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvsrlr_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -728,7 +849,7 @@ unsafe fn test_lasx_xvsrlri_b() {
         2893318883870770962,
     );
 
-    assert_eq!(r, transmute(lasx_xvsrlri_b::<2>(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvsrlri_b::<2>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -744,7 +865,7 @@ unsafe fn test_lasx_xvsrlri_h() {
         32932658182619167,
     );
 
-    assert_eq!(r, transmute(lasx_xvsrlri_h::<9>(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvsrlri_h::<9>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -761,7 +882,7 @@ unsafe fn test_lasx_xvsrlri_w() {
     );
     let r = i64x4::new(8589934592, 8589934594, 4294967296, 8589934593);
 
-    assert_eq!(r, transmute(lasx_xvsrlri_w::<31>(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvsrlri_w::<31>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -779,7 +900,7 @@ unsafe fn test_lasx_xvsrlri_d() {
         197693428197319479,
     );
 
-    assert_eq!(r, transmute(lasx_xvsrlri_d::<6>(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvsrlri_d::<6>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -799,7 +920,13 @@ unsafe fn test_lasx_xvbitclr_b() {
         2031321085346416701,
     );
 
-    assert_eq!(r, transmute(lasx_xvbitclr_b(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvbitclr_b(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -819,7 +946,13 @@ unsafe fn test_lasx_xvbitclr_h() {
         -8417099780160452424,
     );
 
-    assert_eq!(r, transmute(lasx_xvbitclr_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvbitclr_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -839,7 +972,13 @@ unsafe fn test_lasx_xvbitclr_w() {
         436221668492520778,
     );
 
-    assert_eq!(r, transmute(lasx_xvbitclr_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvbitclr_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -863,7 +1002,13 @@ unsafe fn test_lasx_xvbitclr_d() {
         3668272799860684125,
     );
 
-    assert_eq!(r, transmute(lasx_xvbitclr_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvbitclr_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -879,7 +1024,7 @@ unsafe fn test_lasx_xvbitclri_b() {
         3065582154070828979,
     );
 
-    assert_eq!(r, transmute(lasx_xvbitclri_b::<6>(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvbitclri_b::<6>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -895,7 +1040,7 @@ unsafe fn test_lasx_xvbitclri_h() {
         7727381349517352021,
     );
 
-    assert_eq!(r, transmute(lasx_xvbitclri_h::<1>(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvbitclri_h::<1>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -911,7 +1056,10 @@ unsafe fn test_lasx_xvbitclri_w() {
         -5611395396043530126,
     );
 
-    assert_eq!(r, transmute(lasx_xvbitclri_w::<30>(transmute(a))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvbitclri_w::<30>(black_box(transmute(a))))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -929,7 +1077,10 @@ unsafe fn test_lasx_xvbitclri_d() {
         -63139220754952887,
     );
 
-    assert_eq!(r, transmute(lasx_xvbitclri_d::<46>(transmute(a))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvbitclri_d::<46>(black_box(transmute(a))))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -949,7 +1100,13 @@ unsafe fn test_lasx_xvbitset_b() {
         -7702318388235109826,
     );
 
-    assert_eq!(r, transmute(lasx_xvbitset_b(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvbitset_b(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -969,7 +1126,13 @@ unsafe fn test_lasx_xvbitset_h() {
         1674099372676878223,
     );
 
-    assert_eq!(r, transmute(lasx_xvbitset_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvbitset_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -989,7 +1152,13 @@ unsafe fn test_lasx_xvbitset_w() {
         -4953617511697867204,
     );
 
-    assert_eq!(r, transmute(lasx_xvbitset_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvbitset_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -1013,7 +1182,13 @@ unsafe fn test_lasx_xvbitset_d() {
         8641001130845153939,
     );
 
-    assert_eq!(r, transmute(lasx_xvbitset_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvbitset_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -1029,7 +1204,7 @@ unsafe fn test_lasx_xvbitseti_b() {
         -3539275497407339017,
     );
 
-    assert_eq!(r, transmute(lasx_xvbitseti_b::<7>(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvbitseti_b::<7>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -1045,7 +1220,10 @@ unsafe fn test_lasx_xvbitseti_h() {
         -1050847327214912781,
     );
 
-    assert_eq!(r, transmute(lasx_xvbitseti_h::<13>(transmute(a))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvbitseti_h::<13>(black_box(transmute(a))))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -1061,7 +1239,10 @@ unsafe fn test_lasx_xvbitseti_w() {
         -1933536090599238411,
     );
 
-    assert_eq!(r, transmute(lasx_xvbitseti_w::<29>(transmute(a))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvbitseti_w::<29>(black_box(transmute(a))))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -1079,7 +1260,10 @@ unsafe fn test_lasx_xvbitseti_d() {
         7640056937583456779,
     );
 
-    assert_eq!(r, transmute(lasx_xvbitseti_d::<17>(transmute(a))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvbitseti_d::<17>(black_box(transmute(a))))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -1099,7 +1283,13 @@ unsafe fn test_lasx_xvbitrev_b() {
         8353346322052154032,
     );
 
-    assert_eq!(r, transmute(lasx_xvbitrev_b(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvbitrev_b(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -1119,7 +1309,13 @@ unsafe fn test_lasx_xvbitrev_h() {
         1161012008856358603,
     );
 
-    assert_eq!(r, transmute(lasx_xvbitrev_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvbitrev_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -1139,7 +1335,13 @@ unsafe fn test_lasx_xvbitrev_w() {
         2239715596821320928,
     );
 
-    assert_eq!(r, transmute(lasx_xvbitrev_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvbitrev_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -1163,7 +1365,13 @@ unsafe fn test_lasx_xvbitrev_d() {
         -7824300689033275105,
     );
 
-    assert_eq!(r, transmute(lasx_xvbitrev_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvbitrev_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -1179,7 +1387,7 @@ unsafe fn test_lasx_xvbitrevi_b() {
         -468434338938596352,
     );
 
-    assert_eq!(r, transmute(lasx_xvbitrevi_b::<5>(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvbitrevi_b::<5>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -1195,7 +1403,10 @@ unsafe fn test_lasx_xvbitrevi_h() {
         4180481285432101679,
     );
 
-    assert_eq!(r, transmute(lasx_xvbitrevi_h::<11>(transmute(a))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvbitrevi_h::<11>(black_box(transmute(a))))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -1211,7 +1422,10 @@ unsafe fn test_lasx_xvbitrevi_w() {
         -7201777846932221130,
     );
 
-    assert_eq!(r, transmute(lasx_xvbitrevi_w::<30>(transmute(a))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvbitrevi_w::<30>(black_box(transmute(a))))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -1229,7 +1443,10 @@ unsafe fn test_lasx_xvbitrevi_d() {
         -1340750007927221124,
     );
 
-    assert_eq!(r, transmute(lasx_xvbitrevi_d::<25>(transmute(a))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvbitrevi_d::<25>(black_box(transmute(a))))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -1249,7 +1466,13 @@ unsafe fn test_lasx_xvadd_b() {
         39834845715162790,
     );
 
-    assert_eq!(r, transmute(lasx_xvadd_b(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvadd_b(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -1269,7 +1492,13 @@ unsafe fn test_lasx_xvadd_h() {
         3485514723534807729,
     );
 
-    assert_eq!(r, transmute(lasx_xvadd_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvadd_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -1301,7 +1530,13 @@ unsafe fn test_lasx_xvadd_w() {
         449408456544649458,
     );
 
-    assert_eq!(r, transmute(lasx_xvadd_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvadd_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -1325,7 +1560,13 @@ unsafe fn test_lasx_xvadd_d() {
         -3333036084724254699,
     );
 
-    assert_eq!(r, transmute(lasx_xvadd_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvadd_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -1341,7 +1582,7 @@ unsafe fn test_lasx_xvaddi_bu() {
         1765491911008659808,
     );
 
-    assert_eq!(r, transmute(lasx_xvaddi_bu::<3>(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvaddi_bu::<3>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -1357,7 +1598,7 @@ unsafe fn test_lasx_xvaddi_hu() {
         4257614802810591100,
     );
 
-    assert_eq!(r, transmute(lasx_xvaddi_hu::<1>(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvaddi_hu::<1>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -1379,7 +1620,7 @@ unsafe fn test_lasx_xvaddi_wu() {
         8831113348648816385,
     );
 
-    assert_eq!(r, transmute(lasx_xvaddi_wu::<18>(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvaddi_wu::<18>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -1397,7 +1638,7 @@ unsafe fn test_lasx_xvaddi_du() {
         -4546559236496052074,
     );
 
-    assert_eq!(r, transmute(lasx_xvaddi_du::<24>(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvaddi_du::<24>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -1417,7 +1658,13 @@ unsafe fn test_lasx_xvsub_b() {
         -7947080804470620196,
     );
 
-    assert_eq!(r, transmute(lasx_xvsub_b(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvsub_b(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -1437,7 +1684,13 @@ unsafe fn test_lasx_xvsub_h() {
         -2694318201466204009,
     );
 
-    assert_eq!(r, transmute(lasx_xvsub_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvsub_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -1469,7 +1722,13 @@ unsafe fn test_lasx_xvsub_w() {
         -4928352995773315889,
     );
 
-    assert_eq!(r, transmute(lasx_xvsub_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvsub_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -1493,7 +1752,13 @@ unsafe fn test_lasx_xvsub_d() {
         -1297126209654251318,
     );
 
-    assert_eq!(r, transmute(lasx_xvsub_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvsub_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -1509,7 +1774,7 @@ unsafe fn test_lasx_xvsubi_bu() {
         6185872108420092159,
     );
 
-    assert_eq!(r, transmute(lasx_xvsubi_bu::<13>(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvsubi_bu::<13>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -1525,7 +1790,7 @@ unsafe fn test_lasx_xvsubi_hu() {
         1522443898558080492,
     );
 
-    assert_eq!(r, transmute(lasx_xvsubi_hu::<7>(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvsubi_hu::<7>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -1547,7 +1812,7 @@ unsafe fn test_lasx_xvsubi_wu() {
         1285045436848317605,
     );
 
-    assert_eq!(r, transmute(lasx_xvsubi_wu::<26>(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvsubi_wu::<26>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -1565,7 +1830,7 @@ unsafe fn test_lasx_xvsubi_du() {
         4145748346670499010,
     );
 
-    assert_eq!(r, transmute(lasx_xvsubi_du::<12>(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvsubi_du::<12>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -1585,7 +1850,13 @@ unsafe fn test_lasx_xvmax_b() {
         8535488153625188193,
     );
 
-    assert_eq!(r, transmute(lasx_xvmax_b(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvmax_b(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -1605,7 +1876,13 @@ unsafe fn test_lasx_xvmax_h() {
         -4332902052436023459,
     );
 
-    assert_eq!(r, transmute(lasx_xvmax_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvmax_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -1637,7 +1914,13 @@ unsafe fn test_lasx_xvmax_w() {
         6702174376295843649,
     );
 
-    assert_eq!(r, transmute(lasx_xvmax_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvmax_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -1661,7 +1944,13 @@ unsafe fn test_lasx_xvmax_d() {
         -880822478913123851,
     );
 
-    assert_eq!(r, transmute(lasx_xvmax_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvmax_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -1677,7 +1966,7 @@ unsafe fn test_lasx_xvmaxi_b() {
         5914634738497113077,
     );
 
-    assert_eq!(r, transmute(lasx_xvmaxi_b::<-11>(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvmaxi_b::<-11>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -1693,7 +1982,7 @@ unsafe fn test_lasx_xvmaxi_h() {
         4406209242478280693,
     );
 
-    assert_eq!(r, transmute(lasx_xvmaxi_h::<-11>(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvmaxi_h::<-11>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -1715,7 +2004,7 @@ unsafe fn test_lasx_xvmaxi_w() {
         22981864337,
     );
 
-    assert_eq!(r, transmute(lasx_xvmaxi_w::<5>(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvmaxi_w::<5>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -1733,7 +2022,7 @@ unsafe fn test_lasx_xvmaxi_d() {
         2429249725865673045,
     );
 
-    assert_eq!(r, transmute(lasx_xvmaxi_d::<-3>(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvmaxi_d::<-3>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -1753,7 +2042,13 @@ unsafe fn test_lasx_xvmax_bu() {
         4233495576175936231,
     );
 
-    assert_eq!(r, transmute(lasx_xvmax_bu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvmax_bu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -1773,7 +2068,13 @@ unsafe fn test_lasx_xvmax_hu() {
         -1573457187787184228,
     );
 
-    assert_eq!(r, transmute(lasx_xvmax_hu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvmax_hu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -1793,7 +2094,13 @@ unsafe fn test_lasx_xvmax_wu() {
         -7315994376096540525,
     );
 
-    assert_eq!(r, transmute(lasx_xvmax_wu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvmax_wu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -1817,7 +2124,13 @@ unsafe fn test_lasx_xvmax_du() {
         5141420152487342561,
     );
 
-    assert_eq!(r, transmute(lasx_xvmax_du(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvmax_du(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -1833,7 +2146,7 @@ unsafe fn test_lasx_xvmaxi_bu() {
         -8478920119441971628,
     );
 
-    assert_eq!(r, transmute(lasx_xvmaxi_bu::<10>(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvmaxi_bu::<10>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -1849,7 +2162,7 @@ unsafe fn test_lasx_xvmaxi_hu() {
         2580949584734723198,
     );
 
-    assert_eq!(r, transmute(lasx_xvmaxi_hu::<15>(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvmaxi_hu::<15>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -1865,7 +2178,7 @@ unsafe fn test_lasx_xvmaxi_wu() {
         6328395255824707620,
     );
 
-    assert_eq!(r, transmute(lasx_xvmaxi_wu::<12>(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvmaxi_wu::<12>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -1883,7 +2196,7 @@ unsafe fn test_lasx_xvmaxi_du() {
         3280369825537805033,
     );
 
-    assert_eq!(r, transmute(lasx_xvmaxi_du::<18>(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvmaxi_du::<18>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -1903,7 +2216,13 @@ unsafe fn test_lasx_xvmin_b() {
         -433018640497265418,
     );
 
-    assert_eq!(r, transmute(lasx_xvmin_b(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvmin_b(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -1923,7 +2242,13 @@ unsafe fn test_lasx_xvmin_h() {
         -1753422264687927210,
     );
 
-    assert_eq!(r, transmute(lasx_xvmin_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvmin_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -1949,7 +2274,13 @@ unsafe fn test_lasx_xvmin_w() {
         -710046880263550629,
     );
 
-    assert_eq!(r, transmute(lasx_xvmin_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvmin_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -1973,7 +2304,13 @@ unsafe fn test_lasx_xvmin_d() {
         -3792381296290037631,
     );
 
-    assert_eq!(r, transmute(lasx_xvmin_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvmin_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -1989,7 +2326,7 @@ unsafe fn test_lasx_xvmini_b() {
         -1088282380739546975,
     );
 
-    assert_eq!(r, transmute(lasx_xvmini_b::<-16>(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvmini_b::<-16>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -2005,7 +2342,7 @@ unsafe fn test_lasx_xvmini_h() {
         2439077560844296,
     );
 
-    assert_eq!(r, transmute(lasx_xvmini_h::<8>(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvmini_h::<8>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -2027,7 +2364,7 @@ unsafe fn test_lasx_xvmini_w() {
         -3162971646443594334,
     );
 
-    assert_eq!(r, transmute(lasx_xvmini_w::<-16>(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvmini_w::<-16>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -2040,7 +2377,7 @@ unsafe fn test_lasx_xvmini_d() {
     );
     let r = i64x4::new(-8, -8, -8, -8);
 
-    assert_eq!(r, transmute(lasx_xvmini_d::<-8>(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvmini_d::<-8>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -2060,7 +2397,13 @@ unsafe fn test_lasx_xvmin_bu() {
         481055128827070653,
     );
 
-    assert_eq!(r, transmute(lasx_xvmin_bu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvmin_bu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -2080,7 +2423,13 @@ unsafe fn test_lasx_xvmin_hu() {
         4690886800975071114,
     );
 
-    assert_eq!(r, transmute(lasx_xvmin_hu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvmin_hu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -2100,7 +2449,13 @@ unsafe fn test_lasx_xvmin_wu() {
         841320412252129092,
     );
 
-    assert_eq!(r, transmute(lasx_xvmin_wu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvmin_wu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -2124,7 +2479,13 @@ unsafe fn test_lasx_xvmin_du() {
         168959420679376173,
     );
 
-    assert_eq!(r, transmute(lasx_xvmin_du(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvmin_du(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -2140,7 +2501,7 @@ unsafe fn test_lasx_xvmini_bu() {
         1803156197610166553,
     );
 
-    assert_eq!(r, transmute(lasx_xvmini_bu::<25>(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvmini_bu::<25>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -2156,7 +2517,7 @@ unsafe fn test_lasx_xvmini_hu() {
         7881419608817692,
     );
 
-    assert_eq!(r, transmute(lasx_xvmini_hu::<28>(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvmini_hu::<28>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -2167,7 +2528,7 @@ unsafe fn test_lasx_xvmini_wu() {
     );
     let r = i64x4::new(94489280534, 94489280534, 94489280534, 94489280534);
 
-    assert_eq!(r, transmute(lasx_xvmini_wu::<22>(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvmini_wu::<22>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -2180,7 +2541,7 @@ unsafe fn test_lasx_xvmini_du() {
     );
     let r = i64x4::new(18, 18, 18, 18);
 
-    assert_eq!(r, transmute(lasx_xvmini_du::<18>(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvmini_du::<18>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -2195,7 +2556,13 @@ unsafe fn test_lasx_xvseq_b() {
     );
     let r = i64x4::new(0, 0, 0, 0);
 
-    assert_eq!(r, transmute(lasx_xvseq_b(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvseq_b(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -2210,7 +2577,13 @@ unsafe fn test_lasx_xvseq_h() {
     );
     let r = i64x4::new(0, 0, 0, 0);
 
-    assert_eq!(r, transmute(lasx_xvseq_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvseq_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -2237,7 +2610,13 @@ unsafe fn test_lasx_xvseq_w() {
     );
     let r = i64x4::new(0, 0, 0, 0);
 
-    assert_eq!(r, transmute(lasx_xvseq_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvseq_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -2256,7 +2635,13 @@ unsafe fn test_lasx_xvseq_d() {
     );
     let r = i64x4::new(0, 0, 0, 0);
 
-    assert_eq!(r, transmute(lasx_xvseq_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvseq_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -2267,7 +2652,7 @@ unsafe fn test_lasx_xvseqi_b() {
     );
     let r = i64x4::new(0, 0, 0, 0);
 
-    assert_eq!(r, transmute(lasx_xvseqi_b::<-14>(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvseqi_b::<-14>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -2278,7 +2663,7 @@ unsafe fn test_lasx_xvseqi_h() {
     );
     let r = i64x4::new(0, 0, 0, 0);
 
-    assert_eq!(r, transmute(lasx_xvseqi_h::<-8>(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvseqi_h::<-8>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -2295,7 +2680,7 @@ unsafe fn test_lasx_xvseqi_w() {
     );
     let r = i64x4::new(0, 0, 0, 0);
 
-    assert_eq!(r, transmute(lasx_xvseqi_w::<-11>(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvseqi_w::<-11>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -2308,7 +2693,7 @@ unsafe fn test_lasx_xvseqi_d() {
     );
     let r = i64x4::new(0, 0, 0, 0);
 
-    assert_eq!(r, transmute(lasx_xvseqi_d::<-2>(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvseqi_d::<-2>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -2328,7 +2713,13 @@ unsafe fn test_lasx_xvslt_b() {
         71776119077994495,
     );
 
-    assert_eq!(r, transmute(lasx_xvslt_b(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvslt_b(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -2348,7 +2739,13 @@ unsafe fn test_lasx_xvslt_h() {
         -281470681743361,
     );
 
-    assert_eq!(r, transmute(lasx_xvslt_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvslt_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -2375,7 +2772,13 @@ unsafe fn test_lasx_xvslt_w() {
     );
     let r = i64x4::new(4294967295, 0, -1, 0);
 
-    assert_eq!(r, transmute(lasx_xvslt_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvslt_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -2394,7 +2797,13 @@ unsafe fn test_lasx_xvslt_d() {
     );
     let r = i64x4::new(0, 0, -1, 0);
 
-    assert_eq!(r, transmute(lasx_xvslt_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvslt_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -2410,7 +2819,7 @@ unsafe fn test_lasx_xvslti_b() {
         71777218556067840,
     );
 
-    assert_eq!(r, transmute(lasx_xvslti_b::<-16>(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvslti_b::<-16>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -2421,7 +2830,7 @@ unsafe fn test_lasx_xvslti_h() {
     );
     let r = i64x4::new(4294967295, -1, -281470681743361, 65535);
 
-    assert_eq!(r, transmute(lasx_xvslti_h::<-4>(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvslti_h::<-4>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -2438,7 +2847,7 @@ unsafe fn test_lasx_xvslti_w() {
     );
     let r = i64x4::new(-1, 0, -4294967296, -1);
 
-    assert_eq!(r, transmute(lasx_xvslti_w::<-4>(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvslti_w::<-4>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -2451,7 +2860,7 @@ unsafe fn test_lasx_xvslti_d() {
     );
     let r = i64x4::new(-1, 0, 0, 0);
 
-    assert_eq!(r, transmute(lasx_xvslti_d::<1>(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvslti_d::<1>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -2466,7 +2875,13 @@ unsafe fn test_lasx_xvslt_bu() {
     );
     let r = i64x4::new(-1095216660481, 280375465083135, -1099494915841, 16711680);
 
-    assert_eq!(r, transmute(lasx_xvslt_bu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvslt_bu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -2481,7 +2896,13 @@ unsafe fn test_lasx_xvslt_hu() {
     );
     let r = i64x4::new(-281470681808896, 4294901760, -65536, 281470681808895);
 
-    assert_eq!(r, transmute(lasx_xvslt_hu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvslt_hu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -2496,7 +2917,13 @@ unsafe fn test_lasx_xvslt_wu() {
     );
     let r = i64x4::new(-1, -1, -4294967296, -1);
 
-    assert_eq!(r, transmute(lasx_xvslt_wu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvslt_wu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -2515,7 +2942,13 @@ unsafe fn test_lasx_xvslt_du() {
     );
     let r = i64x4::new(-1, -1, 0, -1);
 
-    assert_eq!(r, transmute(lasx_xvslt_du(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvslt_du(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -2526,7 +2959,7 @@ unsafe fn test_lasx_xvslti_bu() {
     );
     let r = i64x4::new(16711680, 0, 0, 0);
 
-    assert_eq!(r, transmute(lasx_xvslti_bu::<7>(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvslti_bu::<7>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -2537,7 +2970,7 @@ unsafe fn test_lasx_xvslti_hu() {
     );
     let r = i64x4::new(0, 0, 0, 0);
 
-    assert_eq!(r, transmute(lasx_xvslti_hu::<13>(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvslti_hu::<13>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -2548,7 +2981,7 @@ unsafe fn test_lasx_xvslti_wu() {
     );
     let r = i64x4::new(0, 0, 0, 0);
 
-    assert_eq!(r, transmute(lasx_xvslti_wu::<8>(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvslti_wu::<8>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -2561,7 +2994,7 @@ unsafe fn test_lasx_xvslti_du() {
     );
     let r = i64x4::new(0, 0, 0, 0);
 
-    assert_eq!(r, transmute(lasx_xvslti_du::<2>(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvslti_du::<2>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -2581,7 +3014,13 @@ unsafe fn test_lasx_xvsle_b() {
         1095216726015,
     );
 
-    assert_eq!(r, transmute(lasx_xvsle_b(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvsle_b(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -2596,7 +3035,13 @@ unsafe fn test_lasx_xvsle_h() {
     );
     let r = i64x4::new(-1, 4294901760, 4294901760, -281470681743361);
 
-    assert_eq!(r, transmute(lasx_xvsle_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvsle_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -2623,7 +3068,13 @@ unsafe fn test_lasx_xvsle_w() {
     );
     let r = i64x4::new(-4294967296, 0, -1, -4294967296);
 
-    assert_eq!(r, transmute(lasx_xvsle_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvsle_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -2642,7 +3093,13 @@ unsafe fn test_lasx_xvsle_d() {
     );
     let r = i64x4::new(-1, 0, 0, 0);
 
-    assert_eq!(r, transmute(lasx_xvsle_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvsle_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -2658,7 +3115,7 @@ unsafe fn test_lasx_xvslei_b() {
         280375465148415,
     );
 
-    assert_eq!(r, transmute(lasx_xvslei_b::<-14>(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvslei_b::<-14>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -2669,7 +3126,7 @@ unsafe fn test_lasx_xvslei_h() {
     );
     let r = i64x4::new(-65536, -4294901761, 281474976710655, -65536);
 
-    assert_eq!(r, transmute(lasx_xvslei_h::<-15>(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvslei_h::<-15>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -2680,7 +3137,7 @@ unsafe fn test_lasx_xvslei_w() {
     );
     let r = i64x4::new(-4294967296, 0, -1, 0);
 
-    assert_eq!(r, transmute(lasx_xvslei_w::<-3>(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvslei_w::<-3>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -2693,7 +3150,7 @@ unsafe fn test_lasx_xvslei_d() {
     );
     let r = i64x4::new(-1, 0, -1, -1);
 
-    assert_eq!(r, transmute(lasx_xvslei_d::<6>(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvslei_d::<6>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -2713,7 +3170,13 @@ unsafe fn test_lasx_xvsle_bu() {
         281474976710655,
     );
 
-    assert_eq!(r, transmute(lasx_xvsle_bu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvsle_bu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -2728,7 +3191,13 @@ unsafe fn test_lasx_xvsle_hu() {
     );
     let r = i64x4::new(281474976645120, -4294967296, 281470681808895, 0);
 
-    assert_eq!(r, transmute(lasx_xvsle_hu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvsle_hu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -2743,7 +3212,13 @@ unsafe fn test_lasx_xvsle_wu() {
     );
     let r = i64x4::new(-4294967296, -1, 0, 0);
 
-    assert_eq!(r, transmute(lasx_xvsle_wu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvsle_wu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -2762,7 +3237,13 @@ unsafe fn test_lasx_xvsle_du() {
     );
     let r = i64x4::new(0, -1, 0, -1);
 
-    assert_eq!(r, transmute(lasx_xvsle_du(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvsle_du(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -2773,7 +3254,7 @@ unsafe fn test_lasx_xvslei_bu() {
     );
     let r = i64x4::new(72056494526365440, 280375465082880, 71776119077928960, 0);
 
-    assert_eq!(r, transmute(lasx_xvslei_bu::<29>(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvslei_bu::<29>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -2784,7 +3265,7 @@ unsafe fn test_lasx_xvslei_hu() {
     );
     let r = i64x4::new(0, 0, 0, 0);
 
-    assert_eq!(r, transmute(lasx_xvslei_hu::<30>(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvslei_hu::<30>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -2795,7 +3276,7 @@ unsafe fn test_lasx_xvslei_wu() {
     );
     let r = i64x4::new(0, 0, 0, 0);
 
-    assert_eq!(r, transmute(lasx_xvslei_wu::<31>(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvslei_wu::<31>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -2808,7 +3289,7 @@ unsafe fn test_lasx_xvslei_du() {
     );
     let r = i64x4::new(0, 0, 0, 0);
 
-    assert_eq!(r, transmute(lasx_xvslei_du::<5>(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvslei_du::<5>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -2824,7 +3305,7 @@ unsafe fn test_lasx_xvsat_b() {
         1985954429852520914,
     );
 
-    assert_eq!(r, transmute(lasx_xvsat_b::<7>(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvsat_b::<7>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -2840,7 +3321,7 @@ unsafe fn test_lasx_xvsat_h() {
         1152903912689234618,
     );
 
-    assert_eq!(r, transmute(lasx_xvsat_h::<12>(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvsat_h::<12>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -2857,7 +3338,7 @@ unsafe fn test_lasx_xvsat_w() {
     );
     let r = i64x4::new(-34359738361, 34359738360, -30064771080, -34359738361);
 
-    assert_eq!(r, transmute(lasx_xvsat_w::<3>(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvsat_w::<3>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -2875,7 +3356,7 @@ unsafe fn test_lasx_xvsat_d() {
         6102033771404793023,
     );
 
-    assert_eq!(r, transmute(lasx_xvsat_d::<63>(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvsat_d::<63>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -2891,7 +3372,7 @@ unsafe fn test_lasx_xvsat_bu() {
         2539795165049929535,
     );
 
-    assert_eq!(r, transmute(lasx_xvsat_bu::<5>(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvsat_bu::<5>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -2907,7 +3388,7 @@ unsafe fn test_lasx_xvsat_hu() {
         1970354902204423,
     );
 
-    assert_eq!(r, transmute(lasx_xvsat_hu::<2>(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvsat_hu::<2>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -2918,7 +3399,7 @@ unsafe fn test_lasx_xvsat_wu() {
     );
     let r = i64x4::new(270582939711, 270582939711, 270582939711, 270582939711);
 
-    assert_eq!(r, transmute(lasx_xvsat_wu::<5>(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvsat_wu::<5>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -2931,7 +3412,7 @@ unsafe fn test_lasx_xvsat_du() {
     );
     let r = i64x4::new(8796093022207, 8796093022207, 8796093022207, 8796093022207);
 
-    assert_eq!(r, transmute(lasx_xvsat_du::<42>(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvsat_du::<42>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -2951,7 +3432,13 @@ unsafe fn test_lasx_xvadda_b() {
         -6512388827583513148,
     );
 
-    assert_eq!(r, transmute(lasx_xvadda_b(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvadda_b(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -2971,7 +3458,13 @@ unsafe fn test_lasx_xvadda_h() {
         4288196905584441792,
     );
 
-    assert_eq!(r, transmute(lasx_xvadda_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvadda_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -2997,7 +3490,13 @@ unsafe fn test_lasx_xvadda_w() {
         7114837115730115925,
     );
 
-    assert_eq!(r, transmute(lasx_xvadda_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvadda_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -3021,7 +3520,13 @@ unsafe fn test_lasx_xvadda_d() {
         -3532969990801796507,
     );
 
-    assert_eq!(r, transmute(lasx_xvadda_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvadda_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -3041,7 +3546,13 @@ unsafe fn test_lasx_xvsadd_b() {
         3530119333939728429,
     );
 
-    assert_eq!(r, transmute(lasx_xvsadd_b(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvsadd_b(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -3061,7 +3572,13 @@ unsafe fn test_lasx_xvsadd_h() {
         -5137195089227040637,
     );
 
-    assert_eq!(r, transmute(lasx_xvsadd_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvsadd_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -3093,7 +3610,13 @@ unsafe fn test_lasx_xvsadd_w() {
         6493388403303310332,
     );
 
-    assert_eq!(r, transmute(lasx_xvsadd_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvsadd_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -3117,7 +3640,13 @@ unsafe fn test_lasx_xvsadd_d() {
         -1670245304326307655,
     );
 
-    assert_eq!(r, transmute(lasx_xvsadd_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvsadd_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -3137,7 +3666,13 @@ unsafe fn test_lasx_xvsadd_bu() {
         -380207497217,
     );
 
-    assert_eq!(r, transmute(lasx_xvsadd_bu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvsadd_bu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -3157,7 +3692,13 @@ unsafe fn test_lasx_xvsadd_hu() {
         -2766274561,
     );
 
-    assert_eq!(r, transmute(lasx_xvsadd_hu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvsadd_hu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -3177,7 +3718,13 @@ unsafe fn test_lasx_xvsadd_wu() {
         9110967605937569791,
     );
 
-    assert_eq!(r, transmute(lasx_xvsadd_wu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvsadd_wu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -3196,7 +3743,13 @@ unsafe fn test_lasx_xvsadd_du() {
     );
     let r = i64x4::new(-1, -7683287700352967836, -3264735658191843562, -1);
 
-    assert_eq!(r, transmute(lasx_xvsadd_du(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvsadd_du(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -3216,7 +3769,13 @@ unsafe fn test_lasx_xvavg_b() {
         -2451086284962613015,
     );
 
-    assert_eq!(r, transmute(lasx_xvavg_b(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvavg_b(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -3236,7 +3795,13 @@ unsafe fn test_lasx_xvavg_h() {
         -6082277202109387491,
     );
 
-    assert_eq!(r, transmute(lasx_xvavg_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvavg_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -3268,7 +3833,13 @@ unsafe fn test_lasx_xvavg_w() {
         -97541447405991454,
     );
 
-    assert_eq!(r, transmute(lasx_xvavg_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvavg_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -3292,7 +3863,13 @@ unsafe fn test_lasx_xvavg_d() {
         743619511763122382,
     );
 
-    assert_eq!(r, transmute(lasx_xvavg_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvavg_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -3312,7 +3889,13 @@ unsafe fn test_lasx_xvavg_bu() {
         5794025379951354001,
     );
 
-    assert_eq!(r, transmute(lasx_xvavg_bu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvavg_bu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -3332,7 +3915,13 @@ unsafe fn test_lasx_xvavg_hu() {
         -3939723307751543404,
     );
 
-    assert_eq!(r, transmute(lasx_xvavg_hu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvavg_hu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -3351,7 +3940,13 @@ unsafe fn test_lasx_xvavg_wu() {
         6180173283312674740,
     );
 
-    assert_eq!(r, transmute(lasx_xvavg_wu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvavg_wu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -3375,7 +3970,13 @@ unsafe fn test_lasx_xvavg_du() {
         -9048945872629561085,
     );
 
-    assert_eq!(r, transmute(lasx_xvavg_du(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvavg_du(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -3395,7 +3996,13 @@ unsafe fn test_lasx_xvavgr_b() {
         -1577916506278329386,
     );
 
-    assert_eq!(r, transmute(lasx_xvavgr_b(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvavgr_b(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -3415,7 +4022,13 @@ unsafe fn test_lasx_xvavgr_h() {
         1044782302812228671,
     );
 
-    assert_eq!(r, transmute(lasx_xvavgr_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvavgr_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -3447,7 +4060,13 @@ unsafe fn test_lasx_xvavgr_w() {
         4983380877656540978,
     );
 
-    assert_eq!(r, transmute(lasx_xvavgr_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvavgr_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -3471,7 +4090,13 @@ unsafe fn test_lasx_xvavgr_d() {
         229317404291257478,
     );
 
-    assert_eq!(r, transmute(lasx_xvavgr_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvavgr_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -3491,7 +4116,13 @@ unsafe fn test_lasx_xvavgr_bu() {
         8511681618342279077,
     );
 
-    assert_eq!(r, transmute(lasx_xvavgr_bu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvavgr_bu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -3511,7 +4142,13 @@ unsafe fn test_lasx_xvavgr_hu() {
         -4835281559523879916,
     );
 
-    assert_eq!(r, transmute(lasx_xvavgr_hu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvavgr_hu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -3531,7 +4168,13 @@ unsafe fn test_lasx_xvavgr_wu() {
         2489338192049926342,
     );
 
-    assert_eq!(r, transmute(lasx_xvavgr_wu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvavgr_wu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -3555,7 +4198,13 @@ unsafe fn test_lasx_xvavgr_du() {
         6414723233875186966,
     );
 
-    assert_eq!(r, transmute(lasx_xvavgr_du(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvavgr_du(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -3575,7 +4224,13 @@ unsafe fn test_lasx_xvssub_b() {
         -4561472970538678093,
     );
 
-    assert_eq!(r, transmute(lasx_xvssub_b(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvssub_b(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -3595,7 +4250,13 @@ unsafe fn test_lasx_xvssub_h() {
         8048307602867637285,
     );
 
-    assert_eq!(r, transmute(lasx_xvssub_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvssub_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -3627,7 +4288,13 @@ unsafe fn test_lasx_xvssub_w() {
         4655436811119524629,
     );
 
-    assert_eq!(r, transmute(lasx_xvssub_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvssub_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -3651,7 +4318,13 @@ unsafe fn test_lasx_xvssub_d() {
         -9223372036854775808,
     );
 
-    assert_eq!(r, transmute(lasx_xvssub_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvssub_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -3671,7 +4344,13 @@ unsafe fn test_lasx_xvssub_bu() {
         864691185841012929,
     );
 
-    assert_eq!(r, transmute(lasx_xvssub_bu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvssub_bu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -3691,7 +4370,13 @@ unsafe fn test_lasx_xvssub_hu() {
         188750927758467,
     );
 
-    assert_eq!(r, transmute(lasx_xvssub_hu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvssub_hu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -3711,7 +4396,13 @@ unsafe fn test_lasx_xvssub_wu() {
         3974517532346153551,
     );
 
-    assert_eq!(r, transmute(lasx_xvssub_wu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvssub_wu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -3730,7 +4421,13 @@ unsafe fn test_lasx_xvssub_du() {
     );
     let r = i64x4::new(1075384133325788465, 0, 8236940487074099359, 0);
 
-    assert_eq!(r, transmute(lasx_xvssub_du(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvssub_du(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -3750,7 +4447,13 @@ unsafe fn test_lasx_xvabsd_b() {
         4109603046844106624,
     );
 
-    assert_eq!(r, transmute(lasx_xvabsd_b(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvabsd_b(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -3770,7 +4473,13 @@ unsafe fn test_lasx_xvabsd_h() {
         5513891007581016946,
     );
 
-    assert_eq!(r, transmute(lasx_xvabsd_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvabsd_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -3802,7 +4511,13 @@ unsafe fn test_lasx_xvabsd_w() {
         -7014776540975538355,
     );
 
-    assert_eq!(r, transmute(lasx_xvabsd_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvabsd_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -3826,7 +4541,13 @@ unsafe fn test_lasx_xvabsd_d() {
         4722306005291245989,
     );
 
-    assert_eq!(r, transmute(lasx_xvabsd_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvabsd_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -3846,7 +4567,13 @@ unsafe fn test_lasx_xvabsd_bu() {
         1887319547440621943,
     );
 
-    assert_eq!(r, transmute(lasx_xvabsd_bu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvabsd_bu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -3866,7 +4593,13 @@ unsafe fn test_lasx_xvabsd_hu() {
         1864011964690965056,
     );
 
-    assert_eq!(r, transmute(lasx_xvabsd_hu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvabsd_hu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -3886,7 +4619,13 @@ unsafe fn test_lasx_xvabsd_wu() {
         1525979489064328670,
     );
 
-    assert_eq!(r, transmute(lasx_xvabsd_wu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvabsd_wu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -3910,7 +4649,13 @@ unsafe fn test_lasx_xvabsd_du() {
         2127486190004927946,
     );
 
-    assert_eq!(r, transmute(lasx_xvabsd_du(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvabsd_du(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -3930,7 +4675,13 @@ unsafe fn test_lasx_xvmul_b() {
         -9159357540886189840,
     );
 
-    assert_eq!(r, transmute(lasx_xvmul_b(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvmul_b(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -3950,7 +4701,13 @@ unsafe fn test_lasx_xvmul_h() {
         -7534790044979024262,
     );
 
-    assert_eq!(r, transmute(lasx_xvmul_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvmul_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -3982,7 +4739,13 @@ unsafe fn test_lasx_xvmul_w() {
         1142495638330554240,
     );
 
-    assert_eq!(r, transmute(lasx_xvmul_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvmul_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -4006,7 +4769,13 @@ unsafe fn test_lasx_xvmul_d() {
         -3668010491661410128,
     );
 
-    assert_eq!(r, transmute(lasx_xvmul_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvmul_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -4032,7 +4801,11 @@ unsafe fn test_lasx_xvmadd_b() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvmadd_b(transmute(a), transmute(b), transmute(c)))
+        transmute(lasx_xvmadd_b(
+            black_box(transmute(a)),
+            black_box(transmute(b)),
+            black_box(transmute(c))
+        ))
     );
 }
 
@@ -4059,7 +4832,11 @@ unsafe fn test_lasx_xvmadd_h() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvmadd_h(transmute(a), transmute(b), transmute(c)))
+        transmute(lasx_xvmadd_h(
+            black_box(transmute(a)),
+            black_box(transmute(b)),
+            black_box(transmute(c))
+        ))
     );
 }
 
@@ -4104,7 +4881,11 @@ unsafe fn test_lasx_xvmadd_w() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvmadd_w(transmute(a), transmute(b), transmute(c)))
+        transmute(lasx_xvmadd_w(
+            black_box(transmute(a)),
+            black_box(transmute(b)),
+            black_box(transmute(c))
+        ))
     );
 }
 
@@ -4137,7 +4918,11 @@ unsafe fn test_lasx_xvmadd_d() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvmadd_d(transmute(a), transmute(b), transmute(c)))
+        transmute(lasx_xvmadd_d(
+            black_box(transmute(a)),
+            black_box(transmute(b)),
+            black_box(transmute(c))
+        ))
     );
 }
 
@@ -4164,7 +4949,11 @@ unsafe fn test_lasx_xvmsub_b() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvmsub_b(transmute(a), transmute(b), transmute(c)))
+        transmute(lasx_xvmsub_b(
+            black_box(transmute(a)),
+            black_box(transmute(b)),
+            black_box(transmute(c))
+        ))
     );
 }
 
@@ -4191,7 +4980,11 @@ unsafe fn test_lasx_xvmsub_h() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvmsub_h(transmute(a), transmute(b), transmute(c)))
+        transmute(lasx_xvmsub_h(
+            black_box(transmute(a)),
+            black_box(transmute(b)),
+            black_box(transmute(c))
+        ))
     );
 }
 
@@ -4236,7 +5029,11 @@ unsafe fn test_lasx_xvmsub_w() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvmsub_w(transmute(a), transmute(b), transmute(c)))
+        transmute(lasx_xvmsub_w(
+            black_box(transmute(a)),
+            black_box(transmute(b)),
+            black_box(transmute(c))
+        ))
     );
 }
 
@@ -4269,7 +5066,11 @@ unsafe fn test_lasx_xvmsub_d() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvmsub_d(transmute(a), transmute(b), transmute(c)))
+        transmute(lasx_xvmsub_d(
+            black_box(transmute(a)),
+            black_box(transmute(b)),
+            black_box(transmute(c))
+        ))
     );
 }
 
@@ -4285,7 +5086,13 @@ unsafe fn test_lasx_xvdiv_b() {
     );
     let r = i64x4::new(67174400, 843334041468931, 16515072, 1090921824000);
 
-    assert_eq!(r, transmute(lasx_xvdiv_b(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvdiv_b(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -4305,7 +5112,13 @@ unsafe fn test_lasx_xvdiv_h() {
         -281470681939967,
     );
 
-    assert_eq!(r, transmute(lasx_xvdiv_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvdiv_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -4332,7 +5145,13 @@ unsafe fn test_lasx_xvdiv_w() {
     );
     let r = i64x4::new(-25769803778, 4294967295, 34359738365, 1);
 
-    assert_eq!(r, transmute(lasx_xvdiv_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvdiv_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -4351,7 +5170,13 @@ unsafe fn test_lasx_xvdiv_d() {
     );
     let r = i64x4::new(-3, 0, -3, 0);
 
-    assert_eq!(r, transmute(lasx_xvdiv_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvdiv_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -4371,7 +5196,13 @@ unsafe fn test_lasx_xvdiv_bu() {
         144118486677848127,
     );
 
-    assert_eq!(r, transmute(lasx_xvdiv_bu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvdiv_bu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -4386,7 +5217,13 @@ unsafe fn test_lasx_xvdiv_hu() {
     );
     let r = i64x4::new(4295098372, 38654705665, 281474976776212, 283467841601537);
 
-    assert_eq!(r, transmute(lasx_xvdiv_hu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvdiv_hu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -4401,7 +5238,13 @@ unsafe fn test_lasx_xvdiv_wu() {
     );
     let r = i64x4::new(0, 1, 46, 4294967299);
 
-    assert_eq!(r, transmute(lasx_xvdiv_wu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvdiv_wu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -4420,7 +5263,13 @@ unsafe fn test_lasx_xvdiv_du() {
     );
     let r = i64x4::new(0, 0, 1, 6);
 
-    assert_eq!(r, transmute(lasx_xvdiv_du(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvdiv_du(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -4440,7 +5289,13 @@ unsafe fn test_lasx_xvhaddw_h_b() {
         -18859072538017839,
     );
 
-    assert_eq!(r, transmute(lasx_xvhaddw_h_b(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvhaddw_h_b(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -4460,7 +5315,13 @@ unsafe fn test_lasx_xvhaddw_w_h() {
         -36597416302335,
     );
 
-    assert_eq!(r, transmute(lasx_xvhaddw_w_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvhaddw_w_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -4487,7 +5348,13 @@ unsafe fn test_lasx_xvhaddw_d_w() {
     );
     let r = i64x4::new(1043954543, 64421064, -1003667433, -119821715);
 
-    assert_eq!(r, transmute(lasx_xvhaddw_d_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvhaddw_d_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -4507,7 +5374,13 @@ unsafe fn test_lasx_xvhaddw_hu_bu() {
         56014362196705476,
     );
 
-    assert_eq!(r, transmute(lasx_xvhaddw_hu_bu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvhaddw_hu_bu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -4527,7 +5400,13 @@ unsafe fn test_lasx_xvhaddw_wu_hu() {
         392255068231306,
     );
 
-    assert_eq!(r, transmute(lasx_xvhaddw_wu_hu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvhaddw_wu_hu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -4542,7 +5421,13 @@ unsafe fn test_lasx_xvhaddw_du_wu() {
     );
     let r = i64x4::new(2983569336, 4514288382, 2479696956, 1680431840);
 
-    assert_eq!(r, transmute(lasx_xvhaddw_du_wu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvhaddw_du_wu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -4562,7 +5447,13 @@ unsafe fn test_lasx_xvhsubw_h_b() {
         -21955597927907350,
     );
 
-    assert_eq!(r, transmute(lasx_xvhsubw_h_b(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvhsubw_h_b(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -4582,7 +5473,13 @@ unsafe fn test_lasx_xvhsubw_w_h() {
         -108800111503156,
     );
 
-    assert_eq!(r, transmute(lasx_xvhsubw_w_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvhsubw_w_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -4603,7 +5500,13 @@ unsafe fn test_lasx_xvhsubw_d_w() {
     );
     let r = i64x4::new(2748898148, -45146293, 958916832, 1285325893);
 
-    assert_eq!(r, transmute(lasx_xvhsubw_d_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvhsubw_d_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -4623,7 +5526,13 @@ unsafe fn test_lasx_xvhsubw_hu_bu() {
         9289103727198239,
     );
 
-    assert_eq!(r, transmute(lasx_xvhsubw_hu_bu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvhsubw_hu_bu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -4643,7 +5552,13 @@ unsafe fn test_lasx_xvhsubw_wu_hu() {
         32018981198856,
     );
 
-    assert_eq!(r, transmute(lasx_xvhsubw_wu_hu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvhsubw_wu_hu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -4658,7 +5573,13 @@ unsafe fn test_lasx_xvhsubw_du_wu() {
     );
     let r = i64x4::new(-1056733131, -2613149992, 384615677, -1588276541);
 
-    assert_eq!(r, transmute(lasx_xvhsubw_du_wu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvhsubw_du_wu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -4678,7 +5599,13 @@ unsafe fn test_lasx_xvmod_b() {
         -48385121157714142,
     );
 
-    assert_eq!(r, transmute(lasx_xvmod_b(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvmod_b(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -4698,7 +5625,13 @@ unsafe fn test_lasx_xvmod_h() {
         -194216204870745003,
     );
 
-    assert_eq!(r, transmute(lasx_xvmod_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvmod_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -4724,7 +5657,13 @@ unsafe fn test_lasx_xvmod_w() {
         807808928635455307,
     );
 
-    assert_eq!(r, transmute(lasx_xvmod_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvmod_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -4748,7 +5687,13 @@ unsafe fn test_lasx_xvmod_d() {
         -3048989907394276239,
     );
 
-    assert_eq!(r, transmute(lasx_xvmod_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvmod_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -4768,7 +5713,13 @@ unsafe fn test_lasx_xvmod_bu() {
         5417620637589803790,
     );
 
-    assert_eq!(r, transmute(lasx_xvmod_bu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvmod_bu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -4788,7 +5739,13 @@ unsafe fn test_lasx_xvmod_hu() {
         129490854556368167,
     );
 
-    assert_eq!(r, transmute(lasx_xvmod_hu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvmod_hu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -4808,7 +5765,13 @@ unsafe fn test_lasx_xvmod_wu() {
         480682694340619302,
     );
 
-    assert_eq!(r, transmute(lasx_xvmod_wu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvmod_wu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -4832,7 +5795,13 @@ unsafe fn test_lasx_xvmod_du() {
         150087784552479859,
     );
 
-    assert_eq!(r, transmute(lasx_xvmod_du(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvmod_du(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -4848,7 +5817,10 @@ unsafe fn test_lasx_xvrepl128vei_b() {
         8970181431921507452,
     );
 
-    assert_eq!(r, transmute(lasx_xvrepl128vei_b::<8>(transmute(a))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvrepl128vei_b::<8>(black_box(transmute(a))))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -4864,7 +5836,10 @@ unsafe fn test_lasx_xvrepl128vei_h() {
         -3904680457625679409,
     );
 
-    assert_eq!(r, transmute(lasx_xvrepl128vei_h::<3>(transmute(a))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvrepl128vei_h::<3>(black_box(transmute(a))))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -4886,7 +5861,10 @@ unsafe fn test_lasx_xvrepl128vei_w() {
         -1327396365108239351,
     );
 
-    assert_eq!(r, transmute(lasx_xvrepl128vei_w::<1>(transmute(a))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvrepl128vei_w::<1>(black_box(transmute(a))))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -4904,7 +5882,10 @@ unsafe fn test_lasx_xvrepl128vei_d() {
         4427502889722976813,
     );
 
-    assert_eq!(r, transmute(lasx_xvrepl128vei_d::<0>(transmute(a))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvrepl128vei_d::<0>(black_box(transmute(a))))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -4924,7 +5905,13 @@ unsafe fn test_lasx_xvpickev_b() {
         4502896606534087725,
     );
 
-    assert_eq!(r, transmute(lasx_xvpickev_b(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvpickev_b(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -4944,7 +5931,13 @@ unsafe fn test_lasx_xvpickev_h() {
         -2117051360895385090,
     );
 
-    assert_eq!(r, transmute(lasx_xvpickev_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvpickev_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -4976,7 +5969,13 @@ unsafe fn test_lasx_xvpickev_w() {
         -4454806063744691677,
     );
 
-    assert_eq!(r, transmute(lasx_xvpickev_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvpickev_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -5000,7 +5999,13 @@ unsafe fn test_lasx_xvpickev_d() {
         1952973857169882715,
     );
 
-    assert_eq!(r, transmute(lasx_xvpickev_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvpickev_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -5020,7 +6025,13 @@ unsafe fn test_lasx_xvpickod_b() {
         4092165317489988560,
     );
 
-    assert_eq!(r, transmute(lasx_xvpickod_b(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvpickod_b(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -5040,7 +6051,13 @@ unsafe fn test_lasx_xvpickod_h() {
         5912677724127371711,
     );
 
-    assert_eq!(r, transmute(lasx_xvpickod_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvpickod_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -5072,7 +6089,13 @@ unsafe fn test_lasx_xvpickod_w() {
         14200989743342145,
     );
 
-    assert_eq!(r, transmute(lasx_xvpickod_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvpickod_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -5096,7 +6119,13 @@ unsafe fn test_lasx_xvpickod_d() {
         3923084493864153244,
     );
 
-    assert_eq!(r, transmute(lasx_xvpickod_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvpickod_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -5116,7 +6145,13 @@ unsafe fn test_lasx_xvilvh_b() {
         6070396101995813657,
     );
 
-    assert_eq!(r, transmute(lasx_xvilvh_b(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvilvh_b(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -5136,7 +6171,13 @@ unsafe fn test_lasx_xvilvh_h() {
         6944594579025051980,
     );
 
-    assert_eq!(r, transmute(lasx_xvilvh_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvilvh_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -5168,7 +6209,13 @@ unsafe fn test_lasx_xvilvh_w() {
         2557948893958412086,
     );
 
-    assert_eq!(r, transmute(lasx_xvilvh_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvilvh_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -5192,7 +6239,13 @@ unsafe fn test_lasx_xvilvh_d() {
         -1576924492614617443,
     );
 
-    assert_eq!(r, transmute(lasx_xvilvh_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvilvh_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -5212,7 +6265,13 @@ unsafe fn test_lasx_xvilvl_b() {
         -1661662459983806644,
     );
 
-    assert_eq!(r, transmute(lasx_xvilvl_b(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvilvl_b(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -5232,7 +6291,13 @@ unsafe fn test_lasx_xvilvl_h() {
         -894657396213105965,
     );
 
-    assert_eq!(r, transmute(lasx_xvilvl_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvilvl_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -5264,7 +6329,13 @@ unsafe fn test_lasx_xvilvl_w() {
         6940426927105417163,
     );
 
-    assert_eq!(r, transmute(lasx_xvilvl_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvilvl_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -5288,7 +6359,13 @@ unsafe fn test_lasx_xvilvl_d() {
         -2688716944239585727,
     );
 
-    assert_eq!(r, transmute(lasx_xvilvl_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvilvl_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -5308,7 +6385,13 @@ unsafe fn test_lasx_xvpackev_b() {
         -9004682544879989266,
     );
 
-    assert_eq!(r, transmute(lasx_xvpackev_b(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvpackev_b(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -5328,7 +6411,13 @@ unsafe fn test_lasx_xvpackev_h() {
         -5280992525495869891,
     );
 
-    assert_eq!(r, transmute(lasx_xvpackev_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvpackev_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -5360,7 +6449,13 @@ unsafe fn test_lasx_xvpackev_w() {
         338692385926626324,
     );
 
-    assert_eq!(r, transmute(lasx_xvpackev_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvpackev_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -5384,7 +6479,13 @@ unsafe fn test_lasx_xvpackev_d() {
         -3601691172781761847,
     );
 
-    assert_eq!(r, transmute(lasx_xvpackev_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvpackev_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -5404,7 +6505,13 @@ unsafe fn test_lasx_xvpackod_b() {
         3700670962761760653,
     );
 
-    assert_eq!(r, transmute(lasx_xvpackod_b(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvpackod_b(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -5424,7 +6531,13 @@ unsafe fn test_lasx_xvpackod_h() {
         -5523279134117035742,
     );
 
-    assert_eq!(r, transmute(lasx_xvpackod_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvpackod_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -5456,7 +6569,13 @@ unsafe fn test_lasx_xvpackod_w() {
         -7292079267755798519,
     );
 
-    assert_eq!(r, transmute(lasx_xvpackod_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvpackod_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -5480,7 +6599,13 @@ unsafe fn test_lasx_xvpackod_d() {
         -8628096693516187272,
     );
 
-    assert_eq!(r, transmute(lasx_xvpackod_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvpackod_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -5506,7 +6631,11 @@ unsafe fn test_lasx_xvshuf_b() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvshuf_b(transmute(a), transmute(b), transmute(c)))
+        transmute(lasx_xvshuf_b(
+            black_box(transmute(a)),
+            black_box(transmute(b)),
+            black_box(transmute(c))
+        ))
     );
 }
 
@@ -5530,7 +6659,11 @@ unsafe fn test_lasx_xvshuf_h() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvshuf_h(transmute(a), transmute(b), transmute(c)))
+        transmute(lasx_xvshuf_h(
+            black_box(transmute(a)),
+            black_box(transmute(b)),
+            black_box(transmute(c))
+        ))
     );
 }
 
@@ -5566,7 +6699,11 @@ unsafe fn test_lasx_xvshuf_w() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvshuf_w(transmute(a), transmute(b), transmute(c)))
+        transmute(lasx_xvshuf_w(
+            black_box(transmute(a)),
+            black_box(transmute(b)),
+            black_box(transmute(c))
+        ))
     );
 }
 
@@ -5594,7 +6731,11 @@ unsafe fn test_lasx_xvshuf_d() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvshuf_d(transmute(a), transmute(b), transmute(c)))
+        transmute(lasx_xvshuf_d(
+            black_box(transmute(a)),
+            black_box(transmute(b)),
+            black_box(transmute(c))
+        ))
     );
 }
 
@@ -5615,7 +6756,13 @@ unsafe fn test_lasx_xvand_v() {
         -7998109804568426495,
     );
 
-    assert_eq!(r, transmute(lasx_xvand_v(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvand_v(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -5631,7 +6778,7 @@ unsafe fn test_lasx_xvandi_b() {
         793492300495455493,
     );
 
-    assert_eq!(r, transmute(lasx_xvandi_b::<47>(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvandi_b::<47>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -5651,7 +6798,13 @@ unsafe fn test_lasx_xvor_v() {
         -198266276987019378,
     );
 
-    assert_eq!(r, transmute(lasx_xvor_v(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvor_v(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -5667,7 +6820,7 @@ unsafe fn test_lasx_xvori_b() {
         8466485259632311926,
     );
 
-    assert_eq!(r, transmute(lasx_xvori_b::<116>(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvori_b::<116>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -5687,7 +6840,13 @@ unsafe fn test_lasx_xvnor_v() {
         -8601510250130767824,
     );
 
-    assert_eq!(r, transmute(lasx_xvnor_v(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvnor_v(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -5703,7 +6862,7 @@ unsafe fn test_lasx_xvnori_b() {
         6053994920729270286,
     );
 
-    assert_eq!(r, transmute(lasx_xvnori_b::<161>(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvnori_b::<161>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -5723,7 +6882,13 @@ unsafe fn test_lasx_xvxor_v() {
         4786489823605581252,
     );
 
-    assert_eq!(r, transmute(lasx_xvxor_v(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvxor_v(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -5739,7 +6904,7 @@ unsafe fn test_lasx_xvxori_b() {
         1979210996964535887,
     );
 
-    assert_eq!(r, transmute(lasx_xvxori_b::<179>(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvxori_b::<179>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -5765,7 +6930,11 @@ unsafe fn test_lasx_xvbitsel_v() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvbitsel_v(transmute(a), transmute(b), transmute(c)))
+        transmute(lasx_xvbitsel_v(
+            black_box(transmute(a)),
+            black_box(transmute(b)),
+            black_box(transmute(c))
+        ))
     );
 }
 
@@ -5788,7 +6957,10 @@ unsafe fn test_lasx_xvbitseli_b() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvbitseli_b::<156>(transmute(a), transmute(b)))
+        transmute(lasx_xvbitseli_b::<156>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -5805,7 +6977,10 @@ unsafe fn test_lasx_xvshuf4i_b() {
         1357573681433480718,
     );
 
-    assert_eq!(r, transmute(lasx_xvshuf4i_b::<117>(transmute(a))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvshuf4i_b::<117>(black_box(transmute(a))))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -5821,7 +6996,10 @@ unsafe fn test_lasx_xvshuf4i_h() {
         4406041774853078309,
     );
 
-    assert_eq!(r, transmute(lasx_xvshuf4i_h::<125>(transmute(a))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvshuf4i_h::<125>(black_box(transmute(a))))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -5843,7 +7021,7 @@ unsafe fn test_lasx_xvshuf4i_w() {
         -206225345846487261,
     );
 
-    assert_eq!(r, transmute(lasx_xvshuf4i_w::<10>(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvshuf4i_w::<10>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -5855,7 +7033,7 @@ unsafe fn test_lasx_xvreplgr2vr_b() {
         8463800222054970741,
     );
 
-    assert_eq!(r, transmute(lasx_xvreplgr2vr_b(-139770763)));
+    assert_eq!(r, transmute(lasx_xvreplgr2vr_b(black_box(-139770763))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -5867,7 +7045,7 @@ unsafe fn test_lasx_xvreplgr2vr_h() {
         -1100020993973555013,
     );
 
-    assert_eq!(r, transmute(lasx_xvreplgr2vr_h(-111546181)));
+    assert_eq!(r, transmute(lasx_xvreplgr2vr_h(black_box(-111546181))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -5879,7 +7057,7 @@ unsafe fn test_lasx_xvreplgr2vr_w() {
         -8112237653938959659,
     );
 
-    assert_eq!(r, transmute(lasx_xvreplgr2vr_w(-1888777515)));
+    assert_eq!(r, transmute(lasx_xvreplgr2vr_w(black_box(-1888777515))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -5891,7 +7069,10 @@ unsafe fn test_lasx_xvreplgr2vr_d() {
         -1472556476011894783,
     );
 
-    assert_eq!(r, transmute(lasx_xvreplgr2vr_d(-1472556476011894783)));
+    assert_eq!(
+        r,
+        transmute(lasx_xvreplgr2vr_d(black_box(-1472556476011894783)))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -5907,7 +7088,7 @@ unsafe fn test_lasx_xvpcnt_b() {
         288795538114413315,
     );
 
-    assert_eq!(r, transmute(lasx_xvpcnt_b(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvpcnt_b(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -5923,7 +7104,7 @@ unsafe fn test_lasx_xvpcnt_h() {
         2251829878980617,
     );
 
-    assert_eq!(r, transmute(lasx_xvpcnt_h(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvpcnt_h(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -5940,7 +7121,7 @@ unsafe fn test_lasx_xvpcnt_w() {
     );
     let r = i64x4::new(77309411341, 60129542155, 73014444046, 55834574863);
 
-    assert_eq!(r, transmute(lasx_xvpcnt_w(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvpcnt_w(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -5953,7 +7134,7 @@ unsafe fn test_lasx_xvpcnt_d() {
     );
     let r = i64x4::new(33, 31, 29, 33);
 
-    assert_eq!(r, transmute(lasx_xvpcnt_d(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvpcnt_d(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -5964,7 +7145,7 @@ unsafe fn test_lasx_xvclo_b() {
     );
     let r = i64x4::new(2207613190657, 8589934592, 1103806726660, 3298568503554);
 
-    assert_eq!(r, transmute(lasx_xvclo_b(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvclo_b(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -5980,7 +7161,7 @@ unsafe fn test_lasx_xvclo_h() {
         281479271677953,
     );
 
-    assert_eq!(r, transmute(lasx_xvclo_h(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvclo_h(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -5997,7 +7178,7 @@ unsafe fn test_lasx_xvclo_w() {
     );
     let r = i64x4::new(4294967299, 1, 1, 8589934593);
 
-    assert_eq!(r, transmute(lasx_xvclo_w(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvclo_w(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -6010,7 +7191,7 @@ unsafe fn test_lasx_xvclo_d() {
     );
     let r = i64x4::new(2, 0, 1, 0);
 
-    assert_eq!(r, transmute(lasx_xvclo_d(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvclo_d(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -6021,7 +7202,7 @@ unsafe fn test_lasx_xvclz_b() {
     );
     let r = i64x4::new(65538, 72621643502977024, 216173885920575744, 3302846693380);
 
-    assert_eq!(r, transmute(lasx_xvclz_b(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvclz_b(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -6037,7 +7218,7 @@ unsafe fn test_lasx_xvclz_h() {
         17179934721,
     );
 
-    assert_eq!(r, transmute(lasx_xvclz_h(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvclz_h(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -6054,7 +7235,7 @@ unsafe fn test_lasx_xvclz_w() {
     );
     let r = i64x4::new(8589934592, 0, 3, 4294967296);
 
-    assert_eq!(r, transmute(lasx_xvclz_w(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvclz_w(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -6067,7 +7248,7 @@ unsafe fn test_lasx_xvclz_d() {
     );
     let r = i64x4::new(0, 0, 0, 1);
 
-    assert_eq!(r, transmute(lasx_xvclz_d(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvclz_d(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -6087,7 +7268,13 @@ unsafe fn test_lasx_xvfadd_s() {
         4545553165339792015,
     );
 
-    assert_eq!(r, transmute(lasx_xvfadd_s(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvfadd_s(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -6111,7 +7298,13 @@ unsafe fn test_lasx_xvfadd_d() {
         4607242424158867483,
     );
 
-    assert_eq!(r, transmute(lasx_xvfadd_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvfadd_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -6131,7 +7324,13 @@ unsafe fn test_lasx_xvfsub_s() {
         -4716328899074058446,
     );
 
-    assert_eq!(r, transmute(lasx_xvfsub_s(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvfsub_s(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -6155,7 +7354,13 @@ unsafe fn test_lasx_xvfsub_d() {
         4602885236169716939,
     );
 
-    assert_eq!(r, transmute(lasx_xvfsub_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvfsub_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -6175,7 +7380,13 @@ unsafe fn test_lasx_xvfmul_s() {
         4412217640780718091,
     );
 
-    assert_eq!(r, transmute(lasx_xvfmul_s(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvfmul_s(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -6199,7 +7410,13 @@ unsafe fn test_lasx_xvfmul_d() {
         4604645288864682176,
     );
 
-    assert_eq!(r, transmute(lasx_xvfmul_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvfmul_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -6219,7 +7436,13 @@ unsafe fn test_lasx_xvfdiv_s() {
         4544549637634302505,
     );
 
-    assert_eq!(r, transmute(lasx_xvfdiv_s(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvfdiv_s(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -6243,7 +7466,13 @@ unsafe fn test_lasx_xvfdiv_d() {
         4608170208670026319,
     );
 
-    assert_eq!(r, transmute(lasx_xvfdiv_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvfdiv_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -6263,7 +7492,13 @@ unsafe fn test_lasx_xvfcvt_h_s() {
         4182498428240214789,
     );
 
-    assert_eq!(r, transmute(lasx_xvfcvt_h_s(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvfcvt_h_s(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -6287,7 +7522,13 @@ unsafe fn test_lasx_xvfcvt_s_d() {
         4509540616169896248,
     );
 
-    assert_eq!(r, transmute(lasx_xvfcvt_s_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvfcvt_s_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -6307,7 +7548,13 @@ unsafe fn test_lasx_xvfmin_s() {
         4470137692837414470,
     );
 
-    assert_eq!(r, transmute(lasx_xvfmin_s(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvfmin_s(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -6331,7 +7578,13 @@ unsafe fn test_lasx_xvfmin_d() {
         4596668800324369880,
     );
 
-    assert_eq!(r, transmute(lasx_xvfmin_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvfmin_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -6351,7 +7604,13 @@ unsafe fn test_lasx_xvfmina_s() {
         4561809912873379512,
     );
 
-    assert_eq!(r, transmute(lasx_xvfmina_s(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvfmina_s(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -6375,7 +7634,13 @@ unsafe fn test_lasx_xvfmina_d() {
         4597161583916257152,
     );
 
-    assert_eq!(r, transmute(lasx_xvfmina_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvfmina_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -6395,7 +7660,13 @@ unsafe fn test_lasx_xvfmax_s() {
         4574742780979947531,
     );
 
-    assert_eq!(r, transmute(lasx_xvfmax_s(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvfmax_s(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -6419,7 +7690,13 @@ unsafe fn test_lasx_xvfmax_d() {
         4602928137069840177,
     );
 
-    assert_eq!(r, transmute(lasx_xvfmax_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvfmax_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -6439,7 +7716,13 @@ unsafe fn test_lasx_xvfmaxa_s() {
         4527767521076114844,
     );
 
-    assert_eq!(r, transmute(lasx_xvfmaxa_s(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvfmaxa_s(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -6463,7 +7746,13 @@ unsafe fn test_lasx_xvfmaxa_d() {
         4596362093665607644,
     );
 
-    assert_eq!(r, transmute(lasx_xvfmaxa_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvfmaxa_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -6474,7 +7763,7 @@ unsafe fn test_lasx_xvfclass_s() {
     );
     let r = i64x4::new(549755814016, 549755814016, 549755814016, 549755814016);
 
-    assert_eq!(r, transmute(lasx_xvfclass_s(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvfclass_s(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -6487,7 +7776,7 @@ unsafe fn test_lasx_xvfclass_d() {
     );
     let r = i64x4::new(128, 128, 128, 128);
 
-    assert_eq!(r, transmute(lasx_xvfclass_d(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvfclass_d(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -6503,7 +7792,7 @@ unsafe fn test_lasx_xvfsqrt_s() {
         4566109703441416989,
     );
 
-    assert_eq!(r, transmute(lasx_xvfsqrt_s(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvfsqrt_s(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -6521,7 +7810,7 @@ unsafe fn test_lasx_xvfsqrt_d() {
         4601138545884238765,
     );
 
-    assert_eq!(r, transmute(lasx_xvfsqrt_d(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvfsqrt_d(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -6537,7 +7826,7 @@ unsafe fn test_lasx_xvfrecip_s() {
         4585242601638738136,
     );
 
-    assert_eq!(r, transmute(lasx_xvfrecip_s(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvfrecip_s(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -6555,7 +7844,7 @@ unsafe fn test_lasx_xvfrecip_d() {
         4611482062367896141,
     );
 
-    assert_eq!(r, transmute(lasx_xvfrecip_d(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvfrecip_d(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx,frecipe")]
@@ -6571,7 +7860,7 @@ unsafe fn test_lasx_xvfrecipe_s() {
         4728509413412007938,
     );
 
-    assert_eq!(r, transmute(lasx_xvfrecipe_s(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvfrecipe_s(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx,frecipe")]
@@ -6589,7 +7878,7 @@ unsafe fn test_lasx_xvfrecipe_d() {
         4611499011256352768,
     );
 
-    assert_eq!(r, transmute(lasx_xvfrecipe_d(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvfrecipe_d(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx,frecipe")]
@@ -6605,7 +7894,7 @@ unsafe fn test_lasx_xvfrsqrte_s() {
         4612427253546066334,
     );
 
-    assert_eq!(r, transmute(lasx_xvfrsqrte_s(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvfrsqrte_s(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx,frecipe")]
@@ -6623,7 +7912,7 @@ unsafe fn test_lasx_xvfrsqrte_d() {
         4612346183891812352,
     );
 
-    assert_eq!(r, transmute(lasx_xvfrsqrte_d(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvfrsqrte_d(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -6634,7 +7923,7 @@ unsafe fn test_lasx_xvfrint_s() {
     );
     let r = i64x4::new(0, 4575657222473777152, 1065353216, 4575657222473777152);
 
-    assert_eq!(r, transmute(lasx_xvfrint_s(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvfrint_s(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -6652,7 +7941,7 @@ unsafe fn test_lasx_xvfrint_d() {
         0,
     );
 
-    assert_eq!(r, transmute(lasx_xvfrint_d(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvfrint_d(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -6668,7 +7957,7 @@ unsafe fn test_lasx_xvfrsqrt_s() {
         4651901116840286347,
     );
 
-    assert_eq!(r, transmute(lasx_xvfrsqrt_s(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvfrsqrt_s(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -6686,7 +7975,7 @@ unsafe fn test_lasx_xvfrsqrt_d() {
         4612495411087822923,
     );
 
-    assert_eq!(r, transmute(lasx_xvfrsqrt_d(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvfrsqrt_d(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -6702,7 +7991,7 @@ unsafe fn test_lasx_xvflogb_s() {
         -4575657218195587072,
     );
 
-    assert_eq!(r, transmute(lasx_xvflogb_s(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvflogb_s(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -6720,7 +8009,7 @@ unsafe fn test_lasx_xvflogb_d() {
         -4616189618054758400,
     );
 
-    assert_eq!(r, transmute(lasx_xvflogb_d(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvflogb_d(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -6736,7 +8025,7 @@ unsafe fn test_lasx_xvfcvth_s_h() {
         4931511963987271680,
     );
 
-    assert_eq!(r, transmute(lasx_xvfcvth_s_h(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvfcvth_s_h(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -6752,7 +8041,7 @@ unsafe fn test_lasx_xvfcvth_d_s() {
         4605684912954015744,
     );
 
-    assert_eq!(r, transmute(lasx_xvfcvth_d_s(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvfcvth_d_s(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -6768,7 +8057,7 @@ unsafe fn test_lasx_xvfcvtl_s_h() {
         4719033540912152576,
     );
 
-    assert_eq!(r, transmute(lasx_xvfcvtl_s_h(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvfcvtl_s_h(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -6784,7 +8073,7 @@ unsafe fn test_lasx_xvfcvtl_d_s() {
         4598772185639682048,
     );
 
-    assert_eq!(r, transmute(lasx_xvfcvtl_d_s(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvfcvtl_d_s(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -6795,7 +8084,7 @@ unsafe fn test_lasx_xvftint_w_s() {
     );
     let r = i64x4::new(0, 0, 1, 0);
 
-    assert_eq!(r, transmute(lasx_xvftint_w_s(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvftint_w_s(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -6808,7 +8097,7 @@ unsafe fn test_lasx_xvftint_l_d() {
     );
     let r = i64x4::new(0, 0, 1, 1);
 
-    assert_eq!(r, transmute(lasx_xvftint_l_d(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvftint_l_d(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -6819,7 +8108,7 @@ unsafe fn test_lasx_xvftint_wu_s() {
     );
     let r = i64x4::new(1, 4294967297, 1, 4294967297);
 
-    assert_eq!(r, transmute(lasx_xvftint_wu_s(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvftint_wu_s(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -6832,7 +8121,7 @@ unsafe fn test_lasx_xvftint_lu_d() {
     );
     let r = i64x4::new(0, 0, 0, 0);
 
-    assert_eq!(r, transmute(lasx_xvftint_lu_d(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvftint_lu_d(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -6843,7 +8132,7 @@ unsafe fn test_lasx_xvftintrz_w_s() {
     );
     let r = i64x4::new(0, 0, 0, 0);
 
-    assert_eq!(r, transmute(lasx_xvftintrz_w_s(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvftintrz_w_s(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -6856,7 +8145,7 @@ unsafe fn test_lasx_xvftintrz_l_d() {
     );
     let r = i64x4::new(0, 0, 0, 0);
 
-    assert_eq!(r, transmute(lasx_xvftintrz_l_d(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvftintrz_l_d(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -6867,7 +8156,7 @@ unsafe fn test_lasx_xvftintrz_wu_s() {
     );
     let r = i64x4::new(0, 0, 0, 0);
 
-    assert_eq!(r, transmute(lasx_xvftintrz_wu_s(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvftintrz_wu_s(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -6880,7 +8169,7 @@ unsafe fn test_lasx_xvftintrz_lu_d() {
     );
     let r = i64x4::new(0, 0, 0, 0);
 
-    assert_eq!(r, transmute(lasx_xvftintrz_lu_d(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvftintrz_lu_d(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -6902,7 +8191,7 @@ unsafe fn test_lasx_xvffint_s_w() {
         5669248528000103797,
     );
 
-    assert_eq!(r, transmute(lasx_xvffint_s_w(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvffint_s_w(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -6920,7 +8209,7 @@ unsafe fn test_lasx_xvffint_d_l() {
         -4362160337941248997,
     );
 
-    assert_eq!(r, transmute(lasx_xvffint_d_l(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvffint_d_l(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -6936,7 +8225,7 @@ unsafe fn test_lasx_xvffint_s_wu() {
         5723492283472660471,
     );
 
-    assert_eq!(r, transmute(lasx_xvffint_s_wu(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvffint_s_wu(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -6954,7 +8243,7 @@ unsafe fn test_lasx_xvffint_d_lu() {
         4892265567869239358,
     );
 
-    assert_eq!(r, transmute(lasx_xvffint_d_lu(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvffint_d_lu(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -6970,7 +8259,7 @@ unsafe fn test_lasx_xvreplve_b() {
         -5280832617179597130,
     );
 
-    assert_eq!(r, transmute(lasx_xvreplve_b(transmute(a), 5)));
+    assert_eq!(r, transmute(lasx_xvreplve_b(black_box(transmute(a)), 5)));
 }
 
 #[simd_test(enable = "lasx")]
@@ -6986,7 +8275,7 @@ unsafe fn test_lasx_xvreplve_h() {
         -8907411554322709406,
     );
 
-    assert_eq!(r, transmute(lasx_xvreplve_h(transmute(a), -5)));
+    assert_eq!(r, transmute(lasx_xvreplve_h(black_box(transmute(a)), -5)));
 }
 
 #[simd_test(enable = "lasx")]
@@ -7008,7 +8297,7 @@ unsafe fn test_lasx_xvreplve_w() {
         -2569718735257041300,
     );
 
-    assert_eq!(r, transmute(lasx_xvreplve_w(transmute(a), 1)));
+    assert_eq!(r, transmute(lasx_xvreplve_w(black_box(transmute(a)), 1)));
 }
 
 #[simd_test(enable = "lasx")]
@@ -7026,7 +8315,7 @@ unsafe fn test_lasx_xvreplve_d() {
         -7945890434069746992,
     );
 
-    assert_eq!(r, transmute(lasx_xvreplve_d(transmute(a), -6)));
+    assert_eq!(r, transmute(lasx_xvreplve_d(black_box(transmute(a)), -6)));
 }
 
 #[simd_test(enable = "lasx")]
@@ -7060,7 +8349,10 @@ unsafe fn test_lasx_xvpermi_w() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvpermi_w::<217>(transmute(a), transmute(b)))
+        transmute(lasx_xvpermi_w::<217>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -7081,7 +8373,13 @@ unsafe fn test_lasx_xvandn_v() {
         5350223724150917,
     );
 
-    assert_eq!(r, transmute(lasx_xvandn_v(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvandn_v(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -7097,7 +8395,7 @@ unsafe fn test_lasx_xvneg_b() {
         -5388239603749330053,
     );
 
-    assert_eq!(r, transmute(lasx_xvneg_b(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvneg_b(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -7113,7 +8411,7 @@ unsafe fn test_lasx_xvneg_h() {
         5510114370614593991,
     );
 
-    assert_eq!(r, transmute(lasx_xvneg_h(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvneg_h(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -7135,7 +8433,7 @@ unsafe fn test_lasx_xvneg_w() {
         -6240794077010148150,
     );
 
-    assert_eq!(r, transmute(lasx_xvneg_w(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvneg_w(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -7153,7 +8451,7 @@ unsafe fn test_lasx_xvneg_d() {
         -906750919774206543,
     );
 
-    assert_eq!(r, transmute(lasx_xvneg_d(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvneg_d(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -7173,7 +8471,13 @@ unsafe fn test_lasx_xvmuh_b() {
         131228860074087168,
     );
 
-    assert_eq!(r, transmute(lasx_xvmuh_b(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvmuh_b(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -7193,7 +8497,13 @@ unsafe fn test_lasx_xvmuh_h() {
         -14890625691814142,
     );
 
-    assert_eq!(r, transmute(lasx_xvmuh_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvmuh_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -7225,7 +8535,13 @@ unsafe fn test_lasx_xvmuh_w() {
         15710306989437773,
     );
 
-    assert_eq!(r, transmute(lasx_xvmuh_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvmuh_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -7249,7 +8565,13 @@ unsafe fn test_lasx_xvmuh_d() {
         273863514955286020,
     );
 
-    assert_eq!(r, transmute(lasx_xvmuh_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvmuh_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -7269,7 +8591,13 @@ unsafe fn test_lasx_xvmuh_bu() {
         442221464076014683,
     );
 
-    assert_eq!(r, transmute(lasx_xvmuh_bu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvmuh_bu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -7289,7 +8617,13 @@ unsafe fn test_lasx_xvmuh_hu() {
         108786773599653576,
     );
 
-    assert_eq!(r, transmute(lasx_xvmuh_hu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvmuh_hu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -7309,7 +8643,13 @@ unsafe fn test_lasx_xvmuh_wu() {
         3278999485098399815,
     );
 
-    assert_eq!(r, transmute(lasx_xvmuh_wu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvmuh_wu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -7333,7 +8673,13 @@ unsafe fn test_lasx_xvmuh_du() {
         1569823798457591419,
     );
 
-    assert_eq!(r, transmute(lasx_xvmuh_du(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvmuh_du(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -7349,7 +8695,10 @@ unsafe fn test_lasx_xvsllwil_h_b() {
         283732621893107440,
     );
 
-    assert_eq!(r, transmute(lasx_xvsllwil_h_b::<4>(transmute(a))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvsllwil_h_b::<4>(black_box(transmute(a))))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -7365,7 +8714,10 @@ unsafe fn test_lasx_xvsllwil_w_h() {
         -19087521822982144,
     );
 
-    assert_eq!(r, transmute(lasx_xvsllwil_w_h::<11>(transmute(a))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvsllwil_w_h::<11>(black_box(transmute(a))))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -7387,7 +8739,10 @@ unsafe fn test_lasx_xvsllwil_d_w() {
         -21769464725504,
     );
 
-    assert_eq!(r, transmute(lasx_xvsllwil_d_w::<14>(transmute(a))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvsllwil_d_w::<14>(black_box(transmute(a))))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -7403,7 +8758,10 @@ unsafe fn test_lasx_xvsllwil_hu_bu() {
         180156217344131904,
     );
 
-    assert_eq!(r, transmute(lasx_xvsllwil_hu_bu::<5>(transmute(a))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvsllwil_hu_bu::<5>(black_box(transmute(a))))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -7419,7 +8777,10 @@ unsafe fn test_lasx_xvsllwil_wu_hu() {
         3493526673607606272,
     );
 
-    assert_eq!(r, transmute(lasx_xvsllwil_wu_hu::<14>(transmute(a))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvsllwil_wu_hu::<14>(black_box(transmute(a))))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -7435,7 +8796,10 @@ unsafe fn test_lasx_xvsllwil_du_wu() {
         147522340803051520,
     );
 
-    assert_eq!(r, transmute(lasx_xvsllwil_du_wu::<28>(transmute(a))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvsllwil_du_wu::<28>(black_box(transmute(a))))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -7450,7 +8814,13 @@ unsafe fn test_lasx_xvsran_b_h() {
     );
     let r = i64x4::new(-5107013816536599300, 0, -576745268203292981, 0);
 
-    assert_eq!(r, transmute(lasx_xvsran_b_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvsran_b_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -7477,7 +8847,13 @@ unsafe fn test_lasx_xvsran_h_w() {
     );
     let r = i64x4::new(-7492863874014043255, 0, -5145548381371170633, 0);
 
-    assert_eq!(r, transmute(lasx_xvsran_h_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvsran_h_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -7496,7 +8872,13 @@ unsafe fn test_lasx_xvsran_w_d() {
     );
     let r = i64x4::new(58054624080, 0, 1863787881113495402, 0);
 
-    assert_eq!(r, transmute(lasx_xvsran_w_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvsran_w_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -7511,7 +8893,13 @@ unsafe fn test_lasx_xvssran_b_h() {
     );
     let r = i64x4::new(179865806513864501, 0, -9222296776751415043, 0);
 
-    assert_eq!(r, transmute(lasx_xvssran_b_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvssran_b_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -7538,7 +8926,13 @@ unsafe fn test_lasx_xvssran_h_w() {
     );
     let r = i64x4::new(281015415144451, 0, 281472829161978, 0);
 
-    assert_eq!(r, transmute(lasx_xvssran_h_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvssran_h_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -7557,7 +8951,13 @@ unsafe fn test_lasx_xvssran_w_d() {
     );
     let r = i64x4::new(-109363692856335914, 0, -713658208354305, 0);
 
-    assert_eq!(r, transmute(lasx_xvssran_w_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvssran_w_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -7572,7 +8972,13 @@ unsafe fn test_lasx_xvssran_bu_h() {
     );
     let r = i64x4::new(144116287595479055, 0, 71776131929997312, 0);
 
-    assert_eq!(r, transmute(lasx_xvssran_bu_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvssran_bu_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -7587,7 +8993,13 @@ unsafe fn test_lasx_xvssran_hu_w() {
     );
     let r = i64x4::new(254837589540863, 0, 281470681765343, 0);
 
-    assert_eq!(r, transmute(lasx_xvssran_hu_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvssran_hu_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -7606,7 +9018,13 @@ unsafe fn test_lasx_xvssran_wu_d() {
     );
     let r = i64x4::new(0, 0, 0, 0);
 
-    assert_eq!(r, transmute(lasx_xvssran_wu_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvssran_wu_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -7621,7 +9039,13 @@ unsafe fn test_lasx_xvsrarn_b_h() {
     );
     let r = i64x4::new(-7204067930850651184, 0, -5909457163402939758, 0);
 
-    assert_eq!(r, transmute(lasx_xvsrarn_b_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvsrarn_b_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -7648,7 +9072,13 @@ unsafe fn test_lasx_xvsrarn_h_w() {
     );
     let r = i64x4::new(4021320339558432771, 0, -5499970420202995712, 0);
 
-    assert_eq!(r, transmute(lasx_xvsrarn_h_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvsrarn_h_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -7667,7 +9097,13 @@ unsafe fn test_lasx_xvsrarn_w_d() {
     );
     let r = i64x4::new(-69752906595470, 0, -7240468610764767136, 0);
 
-    assert_eq!(r, transmute(lasx_xvsrarn_w_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvsrarn_w_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -7682,7 +9118,13 @@ unsafe fn test_lasx_xvssrarn_b_h() {
     );
     let r = i64x4::new(142413695971000447, 0, -141179869986524, 0);
 
-    assert_eq!(r, transmute(lasx_xvssrarn_b_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvssrarn_b_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -7709,7 +9151,13 @@ unsafe fn test_lasx_xvssrarn_h_w() {
     );
     let r = i64x4::new(-10414028872220672, 0, 9223104806137135104, 0);
 
-    assert_eq!(r, transmute(lasx_xvssrarn_h_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvssrarn_h_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -7728,7 +9176,13 @@ unsafe fn test_lasx_xvssrarn_w_d() {
     );
     let r = i64x4::new(2147483648, 0, 326062786704572415, 0);
 
-    assert_eq!(r, transmute(lasx_xvssrarn_w_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvssrarn_w_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -7743,7 +9197,13 @@ unsafe fn test_lasx_xvssrarn_bu_h() {
     );
     let r = i64x4::new(4286578689, 0, 8163878114427135, 0);
 
-    assert_eq!(r, transmute(lasx_xvssrarn_bu_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvssrarn_bu_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -7758,7 +9218,13 @@ unsafe fn test_lasx_xvssrarn_hu_w() {
     );
     let r = i64x4::new(-281474976710656, 0, 2199023255552, 0);
 
-    assert_eq!(r, transmute(lasx_xvssrarn_hu_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvssrarn_hu_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -7777,7 +9243,13 @@ unsafe fn test_lasx_xvssrarn_wu_d() {
     );
     let r = i64x4::new(-3539373509, 0, 0, 0);
 
-    assert_eq!(r, transmute(lasx_xvssrarn_wu_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvssrarn_wu_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -7792,7 +9264,13 @@ unsafe fn test_lasx_xvsrln_b_h() {
     );
     let r = i64x4::new(776589499955319005, 0, 285495199351976, 0);
 
-    assert_eq!(r, transmute(lasx_xvsrln_b_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvsrln_b_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -7819,7 +9297,13 @@ unsafe fn test_lasx_xvsrln_h_w() {
     );
     let r = i64x4::new(-6090306652816735409, 0, -1175228277373752196, 0);
 
-    assert_eq!(r, transmute(lasx_xvsrln_h_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvsrln_h_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -7838,7 +9322,13 @@ unsafe fn test_lasx_xvsrln_w_d() {
     );
     let r = i64x4::new(262796920316080678, 0, 1866060245111069, 0);
 
-    assert_eq!(r, transmute(lasx_xvsrln_w_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvsrln_w_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -7853,7 +9343,13 @@ unsafe fn test_lasx_xvssrln_bu_h() {
     );
     let r = i64x4::new(-996419305685, 0, -71773920038018305, 0);
 
-    assert_eq!(r, transmute(lasx_xvssrln_bu_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvssrln_bu_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -7867,7 +9363,13 @@ unsafe fn test_lasx_xvssrln_hu_w() {
     );
     let r = i64x4::new(2319476961249468, 0, 208855326080470286, 0);
 
-    assert_eq!(r, transmute(lasx_xvssrln_hu_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvssrln_hu_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -7886,7 +9388,13 @@ unsafe fn test_lasx_xvssrln_wu_d() {
     );
     let r = i64x4::new(-1, 0, -1, 0);
 
-    assert_eq!(r, transmute(lasx_xvssrln_wu_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvssrln_wu_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -7901,7 +9409,13 @@ unsafe fn test_lasx_xvsrlrn_b_h() {
     );
     let r = i64x4::new(-6693460433276960310, 0, -6122543899663285619, 0);
 
-    assert_eq!(r, transmute(lasx_xvsrlrn_b_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvsrlrn_b_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -7928,7 +9442,13 @@ unsafe fn test_lasx_xvsrlrn_h_w() {
     );
     let r = i64x4::new(390723813551243448, 0, 6015496732136052023, 0);
 
-    assert_eq!(r, transmute(lasx_xvsrlrn_h_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvsrlrn_h_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -7947,7 +9467,13 @@ unsafe fn test_lasx_xvsrlrn_w_d() {
     );
     let r = i64x4::new(4295025675, 0, -3281590872273059757, 0);
 
-    assert_eq!(r, transmute(lasx_xvsrlrn_w_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvsrlrn_w_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -7962,7 +9488,13 @@ unsafe fn test_lasx_xvssrlrn_bu_h() {
     );
     let r = i64x4::new(-258385232527491, 0, 4034951496335359804, 0);
 
-    assert_eq!(r, transmute(lasx_xvssrlrn_bu_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvssrlrn_bu_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -7977,7 +9509,13 @@ unsafe fn test_lasx_xvssrlrn_hu_w() {
     );
     let r = i64x4::new(-3854303052, 0, -4029743103, 0);
 
-    assert_eq!(r, transmute(lasx_xvssrlrn_hu_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvssrlrn_hu_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -7996,9 +9534,15 @@ unsafe fn test_lasx_xvssrlrn_wu_d() {
     );
     let r = i64x4::new(-3223981555, 0, 35952127557763071, 0);
 
-    assert_eq!(r, transmute(lasx_xvssrlrn_wu_d(transmute(a), transmute(b))));
-}
-
+    assert_eq!(
+        r,
+        transmute(lasx_xvssrlrn_wu_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
+}
+
 #[simd_test(enable = "lasx")]
 unsafe fn test_lasx_xvfrstpi_b() {
     let a = i8x32::new(
@@ -8018,7 +9562,10 @@ unsafe fn test_lasx_xvfrstpi_b() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvfrstpi_b::<24>(transmute(a), transmute(b)))
+        transmute(lasx_xvfrstpi_b::<24>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -8041,7 +9588,10 @@ unsafe fn test_lasx_xvfrstpi_h() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvfrstpi_h::<10>(transmute(a), transmute(b)))
+        transmute(lasx_xvfrstpi_h::<10>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -8068,7 +9618,11 @@ unsafe fn test_lasx_xvfrstp_b() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvfrstp_b(transmute(a), transmute(b), transmute(c)))
+        transmute(lasx_xvfrstp_b(
+            black_box(transmute(a)),
+            black_box(transmute(b)),
+            black_box(transmute(c))
+        ))
     );
 }
 
@@ -8095,7 +9649,11 @@ unsafe fn test_lasx_xvfrstp_h() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvfrstp_h(transmute(a), transmute(b), transmute(c)))
+        transmute(lasx_xvfrstp_h(
+            black_box(transmute(a)),
+            black_box(transmute(b)),
+            black_box(transmute(c))
+        ))
     );
 }
 
@@ -8122,7 +9680,10 @@ unsafe fn test_lasx_xvshuf4i_d() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvshuf4i_d::<115>(transmute(a), transmute(b)))
+        transmute(lasx_xvshuf4i_d::<115>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -8139,7 +9700,7 @@ unsafe fn test_lasx_xvbsrl_v() {
         8842437361645499941,
     );
 
-    assert_eq!(r, transmute(lasx_xvbsrl_v::<0>(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvbsrl_v::<0>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -8155,7 +9716,7 @@ unsafe fn test_lasx_xvbsll_v() {
         5030360181484275352,
     );
 
-    assert_eq!(r, transmute(lasx_xvbsll_v::<0>(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvbsll_v::<0>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -8177,7 +9738,10 @@ unsafe fn test_lasx_xvextrins_b() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvextrins_b::<69>(transmute(a), transmute(b)))
+        transmute(lasx_xvextrins_b::<69>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -8200,7 +9764,10 @@ unsafe fn test_lasx_xvextrins_h() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvextrins_h::<190>(transmute(a), transmute(b)))
+        transmute(lasx_xvextrins_h::<190>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -8235,7 +9802,10 @@ unsafe fn test_lasx_xvextrins_w() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvextrins_w::<133>(transmute(a), transmute(b)))
+        transmute(lasx_xvextrins_w::<133>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -8262,7 +9832,10 @@ unsafe fn test_lasx_xvextrins_d() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvextrins_d::<210>(transmute(a), transmute(b)))
+        transmute(lasx_xvextrins_d::<210>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -8274,7 +9847,7 @@ unsafe fn test_lasx_xvmskltz_b() {
     );
     let r = i64x4::new(5684, 0, 36244, 0);
 
-    assert_eq!(r, transmute(lasx_xvmskltz_b(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvmskltz_b(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -8285,7 +9858,7 @@ unsafe fn test_lasx_xvmskltz_h() {
     );
     let r = i64x4::new(225, 0, 96, 0);
 
-    assert_eq!(r, transmute(lasx_xvmskltz_h(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvmskltz_h(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -8302,7 +9875,7 @@ unsafe fn test_lasx_xvmskltz_w() {
     );
     let r = i64x4::new(13, 0, 10, 0);
 
-    assert_eq!(r, transmute(lasx_xvmskltz_w(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvmskltz_w(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -8315,7 +9888,7 @@ unsafe fn test_lasx_xvmskltz_d() {
     );
     let r = i64x4::new(0, 0, 0, 0);
 
-    assert_eq!(r, transmute(lasx_xvmskltz_d(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvmskltz_d(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -8335,7 +9908,13 @@ unsafe fn test_lasx_xvsigncov_b() {
         -6215157037026399088,
     );
 
-    assert_eq!(r, transmute(lasx_xvsigncov_b(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvsigncov_b(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -8355,7 +9934,13 @@ unsafe fn test_lasx_xvsigncov_h() {
         2866604565619890601,
     );
 
-    assert_eq!(r, transmute(lasx_xvsigncov_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvsigncov_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -8387,7 +9972,13 @@ unsafe fn test_lasx_xvsigncov_w() {
         -180354238538399451,
     );
 
-    assert_eq!(r, transmute(lasx_xvsigncov_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvsigncov_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -8411,7 +10002,13 @@ unsafe fn test_lasx_xvsigncov_d() {
         293290471183495768,
     );
 
-    assert_eq!(r, transmute(lasx_xvsigncov_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvsigncov_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -8437,7 +10034,11 @@ unsafe fn test_lasx_xvfmadd_s() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvfmadd_s(transmute(a), transmute(b), transmute(c)))
+        transmute(lasx_xvfmadd_s(
+            black_box(transmute(a)),
+            black_box(transmute(b)),
+            black_box(transmute(c))
+        ))
     );
 }
 
@@ -8470,7 +10071,11 @@ unsafe fn test_lasx_xvfmadd_d() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvfmadd_d(transmute(a), transmute(b), transmute(c)))
+        transmute(lasx_xvfmadd_d(
+            black_box(transmute(a)),
+            black_box(transmute(b)),
+            black_box(transmute(c))
+        ))
     );
 }
 
@@ -8497,7 +10102,11 @@ unsafe fn test_lasx_xvfmsub_s() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvfmsub_s(transmute(a), transmute(b), transmute(c)))
+        transmute(lasx_xvfmsub_s(
+            black_box(transmute(a)),
+            black_box(transmute(b)),
+            black_box(transmute(c))
+        ))
     );
 }
 
@@ -8530,7 +10139,11 @@ unsafe fn test_lasx_xvfmsub_d() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvfmsub_d(transmute(a), transmute(b), transmute(c)))
+        transmute(lasx_xvfmsub_d(
+            black_box(transmute(a)),
+            black_box(transmute(b)),
+            black_box(transmute(c))
+        ))
     );
 }
 
@@ -8557,7 +10170,11 @@ unsafe fn test_lasx_xvfnmadd_s() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvfnmadd_s(transmute(a), transmute(b), transmute(c)))
+        transmute(lasx_xvfnmadd_s(
+            black_box(transmute(a)),
+            black_box(transmute(b)),
+            black_box(transmute(c))
+        ))
     );
 }
 
@@ -8590,7 +10207,11 @@ unsafe fn test_lasx_xvfnmadd_d() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvfnmadd_d(transmute(a), transmute(b), transmute(c)))
+        transmute(lasx_xvfnmadd_d(
+            black_box(transmute(a)),
+            black_box(transmute(b)),
+            black_box(transmute(c))
+        ))
     );
 }
 
@@ -8617,7 +10238,11 @@ unsafe fn test_lasx_xvfnmsub_s() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvfnmsub_s(transmute(a), transmute(b), transmute(c)))
+        transmute(lasx_xvfnmsub_s(
+            black_box(transmute(a)),
+            black_box(transmute(b)),
+            black_box(transmute(c))
+        ))
     );
 }
 
@@ -8650,7 +10275,11 @@ unsafe fn test_lasx_xvfnmsub_d() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvfnmsub_d(transmute(a), transmute(b), transmute(c)))
+        transmute(lasx_xvfnmsub_d(
+            black_box(transmute(a)),
+            black_box(transmute(b)),
+            black_box(transmute(c))
+        ))
     );
 }
 
@@ -8662,7 +10291,7 @@ unsafe fn test_lasx_xvftintrne_w_s() {
     );
     let r = i64x4::new(1, 0, 1, 4294967297);
 
-    assert_eq!(r, transmute(lasx_xvftintrne_w_s(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvftintrne_w_s(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -8675,7 +10304,7 @@ unsafe fn test_lasx_xvftintrne_l_d() {
     );
     let r = i64x4::new(0, 1, 1, 0);
 
-    assert_eq!(r, transmute(lasx_xvftintrne_l_d(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvftintrne_l_d(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -8686,7 +10315,7 @@ unsafe fn test_lasx_xvftintrp_w_s() {
     );
     let r = i64x4::new(4294967297, 4294967297, 4294967297, 4294967297);
 
-    assert_eq!(r, transmute(lasx_xvftintrp_w_s(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvftintrp_w_s(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -8699,7 +10328,7 @@ unsafe fn test_lasx_xvftintrp_l_d() {
     );
     let r = i64x4::new(1, 1, 1, 1);
 
-    assert_eq!(r, transmute(lasx_xvftintrp_l_d(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvftintrp_l_d(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -8710,7 +10339,7 @@ unsafe fn test_lasx_xvftintrm_w_s() {
     );
     let r = i64x4::new(0, 0, 0, 0);
 
-    assert_eq!(r, transmute(lasx_xvftintrm_w_s(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvftintrm_w_s(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -8723,7 +10352,7 @@ unsafe fn test_lasx_xvftintrm_l_d() {
     );
     let r = i64x4::new(0, 0, 0, 0);
 
-    assert_eq!(r, transmute(lasx_xvftintrm_l_d(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvftintrm_l_d(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -8742,7 +10371,13 @@ unsafe fn test_lasx_xvftint_w_d() {
     );
     let r = i64x4::new(0, 0, 4294967297, 4294967296);
 
-    assert_eq!(r, transmute(lasx_xvftint_w_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvftint_w_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -8766,7 +10401,13 @@ unsafe fn test_lasx_xvffint_s_l() {
         -2383622820954443903,
     );
 
-    assert_eq!(r, transmute(lasx_xvffint_s_l(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvffint_s_l(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -8785,7 +10426,13 @@ unsafe fn test_lasx_xvftintrz_w_d() {
     );
     let r = i64x4::new(0, 0, 0, 0);
 
-    assert_eq!(r, transmute(lasx_xvftintrz_w_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvftintrz_w_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -8804,7 +10451,13 @@ unsafe fn test_lasx_xvftintrp_w_d() {
     );
     let r = i64x4::new(4294967297, 4294967297, 4294967297, 4294967297);
 
-    assert_eq!(r, transmute(lasx_xvftintrp_w_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvftintrp_w_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -8823,7 +10476,13 @@ unsafe fn test_lasx_xvftintrm_w_d() {
     );
     let r = i64x4::new(0, 0, 0, 0);
 
-    assert_eq!(r, transmute(lasx_xvftintrm_w_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvftintrm_w_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -8844,7 +10503,10 @@ unsafe fn test_lasx_xvftintrne_w_d() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvftintrne_w_d(transmute(a), transmute(b)))
+        transmute(lasx_xvftintrne_w_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -8856,7 +10518,7 @@ unsafe fn test_lasx_xvftinth_l_s() {
     );
     let r = i64x4::new(0, 1, 0, 1);
 
-    assert_eq!(r, transmute(lasx_xvftinth_l_s(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvftinth_l_s(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -8867,7 +10529,7 @@ unsafe fn test_lasx_xvftintl_l_s() {
     );
     let r = i64x4::new(0, 0, 0, 1);
 
-    assert_eq!(r, transmute(lasx_xvftintl_l_s(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvftintl_l_s(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -8889,7 +10551,7 @@ unsafe fn test_lasx_xvffinth_d_w() {
         -4485741486683455488,
     );
 
-    assert_eq!(r, transmute(lasx_xvffinth_d_w(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvffinth_d_w(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -8911,7 +10573,7 @@ unsafe fn test_lasx_xvffintl_d_w() {
         -4489746915386195968,
     );
 
-    assert_eq!(r, transmute(lasx_xvffintl_d_w(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvffintl_d_w(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -8922,7 +10584,7 @@ unsafe fn test_lasx_xvftintrzh_l_s() {
     );
     let r = i64x4::new(0, 0, 0, 0);
 
-    assert_eq!(r, transmute(lasx_xvftintrzh_l_s(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvftintrzh_l_s(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -8933,7 +10595,7 @@ unsafe fn test_lasx_xvftintrzl_l_s() {
     );
     let r = i64x4::new(0, 0, 0, 0);
 
-    assert_eq!(r, transmute(lasx_xvftintrzl_l_s(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvftintrzl_l_s(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -8944,7 +10606,7 @@ unsafe fn test_lasx_xvftintrph_l_s() {
     );
     let r = i64x4::new(1, 1, 1, 1);
 
-    assert_eq!(r, transmute(lasx_xvftintrph_l_s(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvftintrph_l_s(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -8955,7 +10617,7 @@ unsafe fn test_lasx_xvftintrpl_l_s() {
     );
     let r = i64x4::new(1, 1, 1, 1);
 
-    assert_eq!(r, transmute(lasx_xvftintrpl_l_s(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvftintrpl_l_s(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -8966,7 +10628,7 @@ unsafe fn test_lasx_xvftintrmh_l_s() {
     );
     let r = i64x4::new(0, 0, 0, 0);
 
-    assert_eq!(r, transmute(lasx_xvftintrmh_l_s(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvftintrmh_l_s(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -8977,7 +10639,7 @@ unsafe fn test_lasx_xvftintrml_l_s() {
     );
     let r = i64x4::new(0, 0, 0, 0);
 
-    assert_eq!(r, transmute(lasx_xvftintrml_l_s(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvftintrml_l_s(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -8988,7 +10650,7 @@ unsafe fn test_lasx_xvftintrneh_l_s() {
     );
     let r = i64x4::new(1, 0, 0, 1);
 
-    assert_eq!(r, transmute(lasx_xvftintrneh_l_s(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvftintrneh_l_s(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -8999,7 +10661,7 @@ unsafe fn test_lasx_xvftintrnel_l_s() {
     );
     let r = i64x4::new(0, 1, 1, 0);
 
-    assert_eq!(r, transmute(lasx_xvftintrnel_l_s(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvftintrnel_l_s(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -9015,7 +10677,7 @@ unsafe fn test_lasx_xvfrintrne_s() {
         1065353216,
     );
 
-    assert_eq!(r, transmute(lasx_xvfrintrne_s(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvfrintrne_s(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -9028,7 +10690,7 @@ unsafe fn test_lasx_xvfrintrne_d() {
     );
     let r = i64x4::new(0, 0, 4607182418800017408, 0);
 
-    assert_eq!(r, transmute(lasx_xvfrintrne_d(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvfrintrne_d(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -9039,7 +10701,7 @@ unsafe fn test_lasx_xvfrintrz_s() {
     );
     let r = i64x4::new(0, 0, 0, 0);
 
-    assert_eq!(r, transmute(lasx_xvfrintrz_s(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvfrintrz_s(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -9052,7 +10714,7 @@ unsafe fn test_lasx_xvfrintrz_d() {
     );
     let r = i64x4::new(0, 0, 0, 0);
 
-    assert_eq!(r, transmute(lasx_xvfrintrz_d(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvfrintrz_d(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -9068,7 +10730,7 @@ unsafe fn test_lasx_xvfrintrp_s() {
         4575657222473777152,
     );
 
-    assert_eq!(r, transmute(lasx_xvfrintrp_s(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvfrintrp_s(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -9086,7 +10748,7 @@ unsafe fn test_lasx_xvfrintrp_d() {
         4607182418800017408,
     );
 
-    assert_eq!(r, transmute(lasx_xvfrintrp_d(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvfrintrp_d(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -9097,7 +10759,7 @@ unsafe fn test_lasx_xvfrintrm_s() {
     );
     let r = i64x4::new(0, 0, 0, 0);
 
-    assert_eq!(r, transmute(lasx_xvfrintrm_s(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvfrintrm_s(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -9110,7 +10772,7 @@ unsafe fn test_lasx_xvfrintrm_d() {
     );
     let r = i64x4::new(0, 0, 0, 0);
 
-    assert_eq!(r, transmute(lasx_xvfrintrm_d(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvfrintrm_d(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -9146,7 +10808,7 @@ unsafe fn test_lasx_xvst() {
         -1239470096778490055,
     );
 
-    lasx_xvst::<0>(transmute(a), o.as_mut_ptr());
+    lasx_xvst::<0>(black_box(transmute(a)), o.as_mut_ptr());
     assert_eq!(r, transmute(o));
 }
 
@@ -9167,7 +10829,7 @@ unsafe fn test_lasx_xvstelm_b() {
         -1243134694581333281,
     );
 
-    lasx_xvstelm_b::<0, 9>(transmute(a), o.as_mut_ptr());
+    lasx_xvstelm_b::<0, 9>(black_box(transmute(a)), o.as_mut_ptr());
     assert_eq!(r, transmute(o));
 }
 
@@ -9188,7 +10850,7 @@ unsafe fn test_lasx_xvstelm_h() {
         4649151313692342074,
     );
 
-    lasx_xvstelm_h::<0, 6>(transmute(a), o.as_mut_ptr());
+    lasx_xvstelm_h::<0, 6>(black_box(transmute(a)), o.as_mut_ptr());
     assert_eq!(r, transmute(o));
 }
 
@@ -9215,7 +10877,7 @@ unsafe fn test_lasx_xvstelm_w() {
         5471549130760739388,
     );
 
-    lasx_xvstelm_w::<0, 3>(transmute(a), o.as_mut_ptr());
+    lasx_xvstelm_w::<0, 3>(black_box(transmute(a)), o.as_mut_ptr());
     assert_eq!(r, transmute(o));
 }
 
@@ -9238,7 +10900,7 @@ unsafe fn test_lasx_xvstelm_d() {
         -4006899083251152793,
     );
 
-    lasx_xvstelm_d::<0, 0>(transmute(a), o.as_mut_ptr());
+    lasx_xvstelm_d::<0, 0>(black_box(transmute(a)), o.as_mut_ptr());
     assert_eq!(r, transmute(o));
 }
 
@@ -9273,7 +10935,10 @@ unsafe fn test_lasx_xvinsve0_w() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvinsve0_w::<5>(transmute(a), transmute(b)))
+        transmute(lasx_xvinsve0_w::<5>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -9300,7 +10965,10 @@ unsafe fn test_lasx_xvinsve0_d() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvinsve0_d::<3>(transmute(a), transmute(b)))
+        transmute(lasx_xvinsve0_d::<3>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -9318,7 +10986,7 @@ unsafe fn test_lasx_xvpickve_w() {
     );
     let r = i64x4::new(1138467779, 0, 0, 0);
 
-    assert_eq!(r, transmute(lasx_xvpickve_w::<2>(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvpickve_w::<2>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -9331,7 +10999,7 @@ unsafe fn test_lasx_xvpickve_d() {
     );
     let r = i64x4::new(8402618222187512066, 0, 0, 0);
 
-    assert_eq!(r, transmute(lasx_xvpickve_d::<0>(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvpickve_d::<0>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -9346,7 +11014,13 @@ unsafe fn test_lasx_xvssrlrn_b_h() {
     );
     let r = i64x4::new(3463408299017240959, 0, 35748968851799935, 0);
 
-    assert_eq!(r, transmute(lasx_xvssrlrn_b_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvssrlrn_b_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -9373,7 +11047,13 @@ unsafe fn test_lasx_xvssrlrn_h_w() {
     );
     let r = i64x4::new(422210317549567, 0, 11259106657337343, 0);
 
-    assert_eq!(r, transmute(lasx_xvssrlrn_h_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvssrlrn_h_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -9392,7 +11072,13 @@ unsafe fn test_lasx_xvssrlrn_w_d() {
     );
     let r = i64x4::new(33428474336875, 0, 9223372034707292159, 0);
 
-    assert_eq!(r, transmute(lasx_xvssrlrn_w_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvssrlrn_w_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -9407,7 +11093,13 @@ unsafe fn test_lasx_xvssrln_b_h() {
     );
     let r = i64x4::new(657383790217428863, 0, 941881790371430152, 0);
 
-    assert_eq!(r, transmute(lasx_xvssrln_b_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvssrln_b_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -9434,7 +11126,13 @@ unsafe fn test_lasx_xvssrln_h_w() {
     );
     let r = i64x4::new(9223103287866884105, 0, 1696871892814295669, 0);
 
-    assert_eq!(r, transmute(lasx_xvssrln_h_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvssrln_h_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -9453,7 +11151,13 @@ unsafe fn test_lasx_xvssrln_w_d() {
     );
     let r = i64x4::new(3937140138060021759, 0, 9223372034707292159, 0);
 
-    assert_eq!(r, transmute(lasx_xvssrln_w_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvssrln_w_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -9473,7 +11177,13 @@ unsafe fn test_lasx_xvorn_v() {
         -126121887133672977,
     );
 
-    assert_eq!(r, transmute(lasx_xvorn_v(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvorn_v(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -9521,7 +11231,7 @@ unsafe fn test_lasx_xvstx() {
         -4162173646616256791,
     );
 
-    lasx_xvstx(transmute(a), o.as_mut_ptr(), 0);
+    lasx_xvstx(black_box(transmute(a)), o.as_mut_ptr(), 0);
     assert_eq!(r, transmute(o));
 }
 
@@ -9535,7 +11245,7 @@ unsafe fn test_lasx_xvextl_qu_du() {
     );
     let r = i64x4::new(-5083351180651141737, 0, 4121325568380818738, 0);
 
-    assert_eq!(r, transmute(lasx_xvextl_qu_du(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvextl_qu_du(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -9559,7 +11269,7 @@ unsafe fn test_lasx_xvinsgr2vr_w() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvinsgr2vr_w::<4>(transmute(a), -596457645))
+        transmute(lasx_xvinsgr2vr_w::<4>(black_box(transmute(a)), -596457645))
     );
 }
 
@@ -9580,7 +11290,7 @@ unsafe fn test_lasx_xvinsgr2vr_d() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvinsgr2vr_d::<3>(transmute(a), -1262509914))
+        transmute(lasx_xvinsgr2vr_d::<3>(black_box(transmute(a)), -1262509914))
     );
 }
 
@@ -9597,7 +11307,7 @@ unsafe fn test_lasx_xvreplve0_b() {
         3472328296227680304,
     );
 
-    assert_eq!(r, transmute(lasx_xvreplve0_b(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvreplve0_b(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -9613,7 +11323,7 @@ unsafe fn test_lasx_xvreplve0_h() {
         115969459958317468,
     );
 
-    assert_eq!(r, transmute(lasx_xvreplve0_h(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvreplve0_h(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -9635,7 +11345,7 @@ unsafe fn test_lasx_xvreplve0_w() {
         5341799334363128369,
     );
 
-    assert_eq!(r, transmute(lasx_xvreplve0_w(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvreplve0_w(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -9653,7 +11363,7 @@ unsafe fn test_lasx_xvreplve0_d() {
         -7669512117913941619,
     );
 
-    assert_eq!(r, transmute(lasx_xvreplve0_d(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvreplve0_d(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -9669,7 +11379,7 @@ unsafe fn test_lasx_xvreplve0_q() {
         -7451765666000961269,
     );
 
-    assert_eq!(r, transmute(lasx_xvreplve0_q(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvreplve0_q(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -9685,7 +11395,7 @@ unsafe fn test_lasx_vext2xv_h_b() {
         24207148650070059,
     );
 
-    assert_eq!(r, transmute(lasx_vext2xv_h_b(transmute(a))));
+    assert_eq!(r, transmute(lasx_vext2xv_h_b(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -9701,7 +11411,7 @@ unsafe fn test_lasx_vext2xv_w_h() {
         -34359738358622,
     );
 
-    assert_eq!(r, transmute(lasx_vext2xv_w_h(transmute(a))));
+    assert_eq!(r, transmute(lasx_vext2xv_w_h(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -9718,7 +11428,7 @@ unsafe fn test_lasx_vext2xv_d_w() {
     );
     let r = i64x4::new(-585251458, -2113345963, -1846838006, -474453663);
 
-    assert_eq!(r, transmute(lasx_vext2xv_d_w(transmute(a))));
+    assert_eq!(r, transmute(lasx_vext2xv_d_w(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -9729,7 +11439,7 @@ unsafe fn test_lasx_vext2xv_w_b() {
     );
     let r = i64x4::new(-240518168540, -528280977282, 30064770965, -489626271740);
 
-    assert_eq!(r, transmute(lasx_vext2xv_w_b(transmute(a))));
+    assert_eq!(r, transmute(lasx_vext2xv_w_b(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -9740,7 +11450,7 @@ unsafe fn test_lasx_vext2xv_d_h() {
     );
     let r = i64x4::new(28568, -25911, 12053, -2728);
 
-    assert_eq!(r, transmute(lasx_vext2xv_d_h(transmute(a))));
+    assert_eq!(r, transmute(lasx_vext2xv_d_h(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -9751,7 +11461,7 @@ unsafe fn test_lasx_vext2xv_d_b() {
     );
     let r = i64x4::new(18, 112, -36, -67);
 
-    assert_eq!(r, transmute(lasx_vext2xv_d_b(transmute(a))));
+    assert_eq!(r, transmute(lasx_vext2xv_d_b(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -9767,7 +11477,7 @@ unsafe fn test_lasx_vext2xv_hu_bu() {
         16888898041348298,
     );
 
-    assert_eq!(r, transmute(lasx_vext2xv_hu_bu(transmute(a))));
+    assert_eq!(r, transmute(lasx_vext2xv_hu_bu(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -9783,7 +11493,7 @@ unsafe fn test_lasx_vext2xv_wu_hu() {
         225172250484459,
     );
 
-    assert_eq!(r, transmute(lasx_vext2xv_wu_hu(transmute(a))));
+    assert_eq!(r, transmute(lasx_vext2xv_wu_hu(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -9800,7 +11510,7 @@ unsafe fn test_lasx_vext2xv_du_wu() {
     );
     let r = i64x4::new(4027501046, 3358638690, 2495633600, 1035808674);
 
-    assert_eq!(r, transmute(lasx_vext2xv_du_wu(transmute(a))));
+    assert_eq!(r, transmute(lasx_vext2xv_du_wu(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -9811,7 +11521,7 @@ unsafe fn test_lasx_vext2xv_wu_bu() {
     );
     let r = i64x4::new(987842478134, 481036337184, 266287972487, 979252543649);
 
-    assert_eq!(r, transmute(lasx_vext2xv_wu_bu(transmute(a))));
+    assert_eq!(r, transmute(lasx_vext2xv_wu_bu(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -9822,7 +11532,7 @@ unsafe fn test_lasx_vext2xv_du_hu() {
     );
     let r = i64x4::new(61301, 41410, 35355, 19598);
 
-    assert_eq!(r, transmute(lasx_vext2xv_du_hu(transmute(a))));
+    assert_eq!(r, transmute(lasx_vext2xv_du_hu(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -9833,7 +11543,7 @@ unsafe fn test_lasx_vext2xv_du_bu() {
     );
     let r = i64x4::new(69, 25, 36, 204);
 
-    assert_eq!(r, transmute(lasx_vext2xv_du_bu(transmute(a))));
+    assert_eq!(r, transmute(lasx_vext2xv_du_bu(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -9855,7 +11565,10 @@ unsafe fn test_lasx_xvpermi_q() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvpermi_q::<49>(transmute(a), transmute(b)))
+        transmute(lasx_xvpermi_q::<49>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -9874,7 +11587,7 @@ unsafe fn test_lasx_xvpermi_d() {
         1609032298240495217,
     );
 
-    assert_eq!(r, transmute(lasx_xvpermi_d::<137>(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvpermi_d::<137>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -9900,7 +11613,13 @@ unsafe fn test_lasx_xvperm_w() {
         -3042141963630030871,
     );
 
-    assert_eq!(r, transmute(lasx_xvperm_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvperm_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -9981,7 +11700,10 @@ unsafe fn test_lasx_xvpickve2gr_w() {
     );
     let r: i32 = 1367768596;
 
-    assert_eq!(r, transmute(lasx_xvpickve2gr_w::<4>(transmute(a))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvpickve2gr_w::<4>(black_box(transmute(a))))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -9998,7 +11720,10 @@ unsafe fn test_lasx_xvpickve2gr_wu() {
     );
     let r: u32 = 3194994707;
 
-    assert_eq!(r, transmute(lasx_xvpickve2gr_wu::<7>(transmute(a))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvpickve2gr_wu::<7>(black_box(transmute(a))))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -10011,7 +11736,10 @@ unsafe fn test_lasx_xvpickve2gr_d() {
     );
     let r: i64 = 6739870851682505277;
 
-    assert_eq!(r, transmute(lasx_xvpickve2gr_d::<2>(transmute(a))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvpickve2gr_d::<2>(black_box(transmute(a))))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -10024,7 +11752,10 @@ unsafe fn test_lasx_xvpickve2gr_du() {
     );
     let r: u64 = 9525833175373449635;
 
-    assert_eq!(r, transmute(lasx_xvpickve2gr_du::<3>(transmute(a))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvpickve2gr_du::<3>(black_box(transmute(a))))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -10043,7 +11774,13 @@ unsafe fn test_lasx_xvaddwev_q_d() {
     );
     let r = i64x4::new(-7472750192138786681, -1, -7758725841623301722, -1);
 
-    assert_eq!(r, transmute(lasx_xvaddwev_q_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvaddwev_q_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -10070,7 +11807,13 @@ unsafe fn test_lasx_xvaddwev_d_w() {
     );
     let r = i64x4::new(614980351, -1946929141, -3309402607, -619077207);
 
-    assert_eq!(r, transmute(lasx_xvaddwev_d_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvaddwev_d_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -10090,7 +11833,13 @@ unsafe fn test_lasx_xvaddwev_w_h() {
         -232787227420502,
     );
 
-    assert_eq!(r, transmute(lasx_xvaddwev_w_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvaddwev_w_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -10110,7 +11859,13 @@ unsafe fn test_lasx_xvaddwev_h_b() {
         -10414449598922739,
     );
 
-    assert_eq!(r, transmute(lasx_xvaddwev_h_b(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvaddwev_h_b(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -10129,7 +11884,13 @@ unsafe fn test_lasx_xvaddwev_q_du() {
     );
     let r = i64x4::new(4866121314102936184, 1, 898239984703082844, 1);
 
-    assert_eq!(r, transmute(lasx_xvaddwev_q_du(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvaddwev_q_du(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -10144,7 +11905,13 @@ unsafe fn test_lasx_xvaddwev_d_wu() {
     );
     let r = i64x4::new(4001409528, 3398767892, 6021892971, 4349349069);
 
-    assert_eq!(r, transmute(lasx_xvaddwev_d_wu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvaddwev_d_wu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -10164,7 +11931,13 @@ unsafe fn test_lasx_xvaddwev_w_hu() {
         376479653317006,
     );
 
-    assert_eq!(r, transmute(lasx_xvaddwev_w_hu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvaddwev_w_hu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -10184,7 +11957,13 @@ unsafe fn test_lasx_xvaddwev_h_bu() {
         68962872563859917,
     );
 
-    assert_eq!(r, transmute(lasx_xvaddwev_h_bu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvaddwev_h_bu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -10203,7 +11982,13 @@ unsafe fn test_lasx_xvsubwev_q_d() {
     );
     let r = i64x4::new(8183582659207736591, -1, 5496584216395980167, -1);
 
-    assert_eq!(r, transmute(lasx_xvsubwev_q_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvsubwev_q_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -10230,7 +12015,13 @@ unsafe fn test_lasx_xvsubwev_d_w() {
     );
     let r = i64x4::new(-1945765730, 1700549847, -1218066002, -827282692);
 
-    assert_eq!(r, transmute(lasx_xvsubwev_d_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvsubwev_d_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -10250,7 +12041,13 @@ unsafe fn test_lasx_xvsubwev_w_h() {
         217514323726817,
     );
 
-    assert_eq!(r, transmute(lasx_xvsubwev_w_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvsubwev_w_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -10270,7 +12067,13 @@ unsafe fn test_lasx_xvsubwev_h_b() {
         -5910188531122352,
     );
 
-    assert_eq!(r, transmute(lasx_xvsubwev_h_b(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvsubwev_h_b(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -10289,7 +12092,13 @@ unsafe fn test_lasx_xvsubwev_q_du() {
     );
     let r = i64x4::new(-7180841769120666233, -1, -3901807980557405007, -1);
 
-    assert_eq!(r, transmute(lasx_xvsubwev_q_du(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvsubwev_q_du(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -10304,7 +12113,13 @@ unsafe fn test_lasx_xvsubwev_d_wu() {
     );
     let r = i64x4::new(-2531041484, -1085343469, -1900376905, 1600829569);
 
-    assert_eq!(r, transmute(lasx_xvsubwev_d_wu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvsubwev_d_wu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -10324,7 +12139,13 @@ unsafe fn test_lasx_xvsubwev_w_hu() {
         -117029268872947,
     );
 
-    assert_eq!(r, transmute(lasx_xvsubwev_w_hu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvsubwev_w_hu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -10344,7 +12165,13 @@ unsafe fn test_lasx_xvsubwev_h_bu() {
         -7035942402260810,
     );
 
-    assert_eq!(r, transmute(lasx_xvsubwev_h_bu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvsubwev_h_bu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -10368,7 +12195,13 @@ unsafe fn test_lasx_xvmulwev_q_d() {
         -2723954123981949807,
     );
 
-    assert_eq!(r, transmute(lasx_xvmulwev_q_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvmulwev_q_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -10400,7 +12233,13 @@ unsafe fn test_lasx_xvmulwev_d_w() {
         904288373202150940,
     );
 
-    assert_eq!(r, transmute(lasx_xvmulwev_d_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvmulwev_d_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -10420,7 +12259,13 @@ unsafe fn test_lasx_xvmulwev_w_h() {
         -218736636965849761,
     );
 
-    assert_eq!(r, transmute(lasx_xvmulwev_w_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvmulwev_w_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -10440,7 +12285,13 @@ unsafe fn test_lasx_xvmulwev_h_b() {
         -532018857412992924,
     );
 
-    assert_eq!(r, transmute(lasx_xvmulwev_h_b(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvmulwev_h_b(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -10464,7 +12315,13 @@ unsafe fn test_lasx_xvmulwev_q_du() {
         1973424773030267173,
     );
 
-    assert_eq!(r, transmute(lasx_xvmulwev_q_du(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvmulwev_q_du(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -10484,7 +12341,13 @@ unsafe fn test_lasx_xvmulwev_d_wu() {
         312983850752328844,
     );
 
-    assert_eq!(r, transmute(lasx_xvmulwev_d_wu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvmulwev_d_wu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -10504,7 +12367,13 @@ unsafe fn test_lasx_xvmulwev_w_hu() {
         -4803214827614038190,
     );
 
-    assert_eq!(r, transmute(lasx_xvmulwev_w_hu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvmulwev_w_hu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -10524,7 +12393,13 @@ unsafe fn test_lasx_xvmulwev_h_bu() {
         4458585836433706972,
     );
 
-    assert_eq!(r, transmute(lasx_xvmulwev_h_bu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvmulwev_h_bu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -10543,7 +12418,13 @@ unsafe fn test_lasx_xvaddwod_q_d() {
     );
     let r = i64x4::new(-3813723879058076957, 0, 200103109406722390, 0);
 
-    assert_eq!(r, transmute(lasx_xvaddwod_q_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvaddwod_q_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -10570,7 +12451,13 @@ unsafe fn test_lasx_xvaddwod_d_w() {
     );
     let r = i64x4::new(3142724184, -2585235328, -785720463, 926940003);
 
-    assert_eq!(r, transmute(lasx_xvaddwod_d_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvaddwod_d_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -10590,7 +12477,13 @@ unsafe fn test_lasx_xvaddwod_w_h() {
         -148498494282599,
     );
 
-    assert_eq!(r, transmute(lasx_xvaddwod_w_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvaddwod_w_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -10610,7 +12503,13 @@ unsafe fn test_lasx_xvaddwod_h_b() {
         -9570449863999416,
     );
 
-    assert_eq!(r, transmute(lasx_xvaddwod_h_b(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvaddwod_h_b(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -10629,7 +12528,13 @@ unsafe fn test_lasx_xvaddwod_q_du() {
     );
     let r = i64x4::new(751645223963476143, 1, -1275901335613508018, 0);
 
-    assert_eq!(r, transmute(lasx_xvaddwod_q_du(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvaddwod_q_du(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -10644,7 +12549,13 @@ unsafe fn test_lasx_xvaddwod_d_wu() {
     );
     let r = i64x4::new(4757884041, 1673456593, 2162927615, 5143136401);
 
-    assert_eq!(r, transmute(lasx_xvaddwod_d_wu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvaddwod_d_wu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -10664,7 +12575,13 @@ unsafe fn test_lasx_xvaddwod_w_hu() {
         248416613500221,
     );
 
-    assert_eq!(r, transmute(lasx_xvaddwod_w_hu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvaddwod_w_hu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -10684,7 +12601,13 @@ unsafe fn test_lasx_xvaddwod_h_bu() {
         83880238860075230,
     );
 
-    assert_eq!(r, transmute(lasx_xvaddwod_h_bu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvaddwod_h_bu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -10703,7 +12626,13 @@ unsafe fn test_lasx_xvsubwod_q_d() {
     );
     let r = i64x4::new(1764856097736252489, 0, 7683656878360999333, -1);
 
-    assert_eq!(r, transmute(lasx_xvsubwod_q_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvsubwod_q_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -10730,7 +12659,13 @@ unsafe fn test_lasx_xvsubwod_d_w() {
     );
     let r = i64x4::new(-959924898, 7572903, 2106559810, 3976421257);
 
-    assert_eq!(r, transmute(lasx_xvsubwod_d_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvsubwod_d_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -10750,7 +12685,13 @@ unsafe fn test_lasx_xvsubwod_w_h() {
         -17665200524651,
     );
 
-    assert_eq!(r, transmute(lasx_xvsubwod_w_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvsubwod_w_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -10770,7 +12711,13 @@ unsafe fn test_lasx_xvsubwod_h_b() {
         -3939721971105776,
     );
 
-    assert_eq!(r, transmute(lasx_xvsubwod_h_b(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvsubwod_h_b(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -10789,7 +12736,13 @@ unsafe fn test_lasx_xvsubwod_q_du() {
     );
     let r = i64x4::new(-6069526046627127478, -1, -1804068722113556285, -1);
 
-    assert_eq!(r, transmute(lasx_xvsubwod_q_du(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvsubwod_q_du(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -10804,8 +12757,14 @@ unsafe fn test_lasx_xvsubwod_d_wu() {
     );
     let r = i64x4::new(762157671, -772219478, -1655146846, -1402401592);
 
-    assert_eq!(r, transmute(lasx_xvsubwod_d_wu(transmute(a), transmute(b))));
-}
+    assert_eq!(
+        r,
+        transmute(lasx_xvsubwod_d_wu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
+}
 
 #[simd_test(enable = "lasx")]
 unsafe fn test_lasx_xvsubwod_w_hu() {
@@ -10824,7 +12783,13 @@ unsafe fn test_lasx_xvsubwod_w_hu() {
         164866614644743,
     );
 
-    assert_eq!(r, transmute(lasx_xvsubwod_w_hu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvsubwod_w_hu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -10844,7 +12809,13 @@ unsafe fn test_lasx_xvsubwod_h_bu() {
         -280740536975491,
     );
 
-    assert_eq!(r, transmute(lasx_xvsubwod_h_bu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvsubwod_h_bu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -10868,7 +12839,13 @@ unsafe fn test_lasx_xvmulwod_q_d() {
         -113061080830775254,
     );
 
-    assert_eq!(r, transmute(lasx_xvmulwod_q_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvmulwod_q_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -10900,7 +12877,13 @@ unsafe fn test_lasx_xvmulwod_d_w() {
         -1334126209007208500,
     );
 
-    assert_eq!(r, transmute(lasx_xvmulwod_d_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvmulwod_d_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -10920,7 +12903,13 @@ unsafe fn test_lasx_xvmulwod_w_h() {
         337273560374881751,
     );
 
-    assert_eq!(r, transmute(lasx_xvmulwod_w_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvmulwod_w_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -10940,7 +12929,13 @@ unsafe fn test_lasx_xvmulwod_h_b() {
         -797714991416606612,
     );
 
-    assert_eq!(r, transmute(lasx_xvmulwod_h_b(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvmulwod_h_b(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -10964,7 +12959,13 @@ unsafe fn test_lasx_xvmulwod_q_du() {
         -6864651532066967840,
     );
 
-    assert_eq!(r, transmute(lasx_xvmulwod_q_du(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvmulwod_q_du(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -10984,7 +12985,13 @@ unsafe fn test_lasx_xvmulwod_d_wu() {
         170736982952013264,
     );
 
-    assert_eq!(r, transmute(lasx_xvmulwod_d_wu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvmulwod_d_wu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -11004,7 +13011,13 @@ unsafe fn test_lasx_xvmulwod_w_hu() {
         648970298882764352,
     );
 
-    assert_eq!(r, transmute(lasx_xvmulwod_w_hu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvmulwod_w_hu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -11024,7 +13037,13 @@ unsafe fn test_lasx_xvmulwod_h_bu() {
         861263883582730760,
     );
 
-    assert_eq!(r, transmute(lasx_xvmulwod_h_bu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvmulwod_h_bu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -11047,7 +13066,10 @@ unsafe fn test_lasx_xvaddwev_d_wu_w() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvaddwev_d_wu_w(transmute(a), transmute(b)))
+        transmute(lasx_xvaddwev_d_wu_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -11070,7 +13092,10 @@ unsafe fn test_lasx_xvaddwev_w_hu_h() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvaddwev_w_hu_h(transmute(a), transmute(b)))
+        transmute(lasx_xvaddwev_w_hu_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -11093,7 +13118,10 @@ unsafe fn test_lasx_xvaddwev_h_bu_b() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvaddwev_h_bu_b(transmute(a), transmute(b)))
+        transmute(lasx_xvaddwev_h_bu_b(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -11122,7 +13150,10 @@ unsafe fn test_lasx_xvmulwev_d_wu_w() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvmulwev_d_wu_w(transmute(a), transmute(b)))
+        transmute(lasx_xvmulwev_d_wu_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -11145,7 +13176,10 @@ unsafe fn test_lasx_xvmulwev_w_hu_h() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvmulwev_w_hu_h(transmute(a), transmute(b)))
+        transmute(lasx_xvmulwev_w_hu_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -11168,7 +13202,10 @@ unsafe fn test_lasx_xvmulwev_h_bu_b() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvmulwev_h_bu_b(transmute(a), transmute(b)))
+        transmute(lasx_xvmulwev_h_bu_b(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -11192,7 +13229,10 @@ unsafe fn test_lasx_xvaddwod_d_wu_w() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvaddwod_d_wu_w(transmute(a), transmute(b)))
+        transmute(lasx_xvaddwod_d_wu_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -11215,7 +13255,10 @@ unsafe fn test_lasx_xvaddwod_w_hu_h() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvaddwod_w_hu_h(transmute(a), transmute(b)))
+        transmute(lasx_xvaddwod_w_hu_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -11238,7 +13281,10 @@ unsafe fn test_lasx_xvaddwod_h_bu_b() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvaddwod_h_bu_b(transmute(a), transmute(b)))
+        transmute(lasx_xvaddwod_h_bu_b(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -11267,7 +13313,10 @@ unsafe fn test_lasx_xvmulwod_d_wu_w() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvmulwod_d_wu_w(transmute(a), transmute(b)))
+        transmute(lasx_xvmulwod_d_wu_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -11290,7 +13339,10 @@ unsafe fn test_lasx_xvmulwod_w_hu_h() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvmulwod_w_hu_h(transmute(a), transmute(b)))
+        transmute(lasx_xvmulwod_w_hu_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -11313,7 +13365,10 @@ unsafe fn test_lasx_xvmulwod_h_bu_b() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvmulwod_h_bu_b(transmute(a), transmute(b)))
+        transmute(lasx_xvmulwod_h_bu_b(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -11333,7 +13388,13 @@ unsafe fn test_lasx_xvhaddw_q_d() {
     );
     let r = i64x4::new(7070440900316630840, -1, 4582440905924999074, 0);
 
-    assert_eq!(r, transmute(lasx_xvhaddw_q_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvhaddw_q_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -11352,7 +13413,13 @@ unsafe fn test_lasx_xvhaddw_qu_du() {
     );
     let r = i64x4::new(-6342973196760799579, 0, -6232960347008472572, 1);
 
-    assert_eq!(r, transmute(lasx_xvhaddw_qu_du(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvhaddw_qu_du(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -11371,7 +13438,13 @@ unsafe fn test_lasx_xvhsubw_q_d() {
     );
     let r = i64x4::new(5317548498597883842, 0, 6155348192460751216, -1);
 
-    assert_eq!(r, transmute(lasx_xvhsubw_q_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvhsubw_q_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -11390,7 +13463,13 @@ unsafe fn test_lasx_xvhsubw_qu_du() {
     );
     let r = i64x4::new(11053881530518619, 0, -1215853579082277290, -1);
 
-    assert_eq!(r, transmute(lasx_xvhsubw_qu_du(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvhsubw_qu_du(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -11422,7 +13501,11 @@ unsafe fn test_lasx_xvmaddwev_q_d() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvmaddwev_q_d(transmute(a), transmute(b), transmute(c)))
+        transmute(lasx_xvmaddwev_q_d(
+            black_box(transmute(a)),
+            black_box(transmute(b)),
+            black_box(transmute(c))
+        ))
     );
 }
 
@@ -11463,7 +13546,11 @@ unsafe fn test_lasx_xvmaddwev_d_w() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvmaddwev_d_w(transmute(a), transmute(b), transmute(c)))
+        transmute(lasx_xvmaddwev_d_w(
+            black_box(transmute(a)),
+            black_box(transmute(b)),
+            black_box(transmute(c))
+        ))
     );
 }
 
@@ -11496,7 +13583,11 @@ unsafe fn test_lasx_xvmaddwev_w_h() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvmaddwev_w_h(transmute(a), transmute(b), transmute(c)))
+        transmute(lasx_xvmaddwev_w_h(
+            black_box(transmute(a)),
+            black_box(transmute(b)),
+            black_box(transmute(c))
+        ))
     );
 }
 
@@ -11523,7 +13614,11 @@ unsafe fn test_lasx_xvmaddwev_h_b() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvmaddwev_h_b(transmute(a), transmute(b), transmute(c)))
+        transmute(lasx_xvmaddwev_h_b(
+            black_box(transmute(a)),
+            black_box(transmute(b)),
+            black_box(transmute(c))
+        ))
     );
 }
 
@@ -11557,9 +13652,9 @@ unsafe fn test_lasx_xvmaddwev_q_du() {
     assert_eq!(
         r,
         transmute(lasx_xvmaddwev_q_du(
-            transmute(a),
-            transmute(b),
-            transmute(c)
+            black_box(transmute(a)),
+            black_box(transmute(b)),
+            black_box(transmute(c))
         ))
     );
 }
@@ -11590,9 +13685,9 @@ unsafe fn test_lasx_xvmaddwev_d_wu() {
     assert_eq!(
         r,
         transmute(lasx_xvmaddwev_d_wu(
-            transmute(a),
-            transmute(b),
-            transmute(c)
+            black_box(transmute(a)),
+            black_box(transmute(b)),
+            black_box(transmute(c))
         ))
     );
 }
@@ -11621,9 +13716,9 @@ unsafe fn test_lasx_xvmaddwev_w_hu() {
     assert_eq!(
         r,
         transmute(lasx_xvmaddwev_w_hu(
-            transmute(a),
-            transmute(b),
-            transmute(c)
+            black_box(transmute(a)),
+            black_box(transmute(b)),
+            black_box(transmute(c))
         ))
     );
 }
@@ -11652,9 +13747,9 @@ unsafe fn test_lasx_xvmaddwev_h_bu() {
     assert_eq!(
         r,
         transmute(lasx_xvmaddwev_h_bu(
-            transmute(a),
-            transmute(b),
-            transmute(c)
+            black_box(transmute(a)),
+            black_box(transmute(b)),
+            black_box(transmute(c))
         ))
     );
 }
@@ -11688,7 +13783,11 @@ unsafe fn test_lasx_xvmaddwod_q_d() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvmaddwod_q_d(transmute(a), transmute(b), transmute(c)))
+        transmute(lasx_xvmaddwod_q_d(
+            black_box(transmute(a)),
+            black_box(transmute(b)),
+            black_box(transmute(c))
+        ))
     );
 }
 
@@ -11729,7 +13828,11 @@ unsafe fn test_lasx_xvmaddwod_d_w() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvmaddwod_d_w(transmute(a), transmute(b), transmute(c)))
+        transmute(lasx_xvmaddwod_d_w(
+            black_box(transmute(a)),
+            black_box(transmute(b)),
+            black_box(transmute(c))
+        ))
     );
 }
 
@@ -11762,7 +13865,11 @@ unsafe fn test_lasx_xvmaddwod_w_h() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvmaddwod_w_h(transmute(a), transmute(b), transmute(c)))
+        transmute(lasx_xvmaddwod_w_h(
+            black_box(transmute(a)),
+            black_box(transmute(b)),
+            black_box(transmute(c))
+        ))
     );
 }
 
@@ -11789,7 +13896,11 @@ unsafe fn test_lasx_xvmaddwod_h_b() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvmaddwod_h_b(transmute(a), transmute(b), transmute(c)))
+        transmute(lasx_xvmaddwod_h_b(
+            black_box(transmute(a)),
+            black_box(transmute(b)),
+            black_box(transmute(c))
+        ))
     );
 }
 
@@ -11823,9 +13934,9 @@ unsafe fn test_lasx_xvmaddwod_q_du() {
     assert_eq!(
         r,
         transmute(lasx_xvmaddwod_q_du(
-            transmute(a),
-            transmute(b),
-            transmute(c)
+            black_box(transmute(a)),
+            black_box(transmute(b)),
+            black_box(transmute(c))
         ))
     );
 }
@@ -11856,9 +13967,9 @@ unsafe fn test_lasx_xvmaddwod_d_wu() {
     assert_eq!(
         r,
         transmute(lasx_xvmaddwod_d_wu(
-            transmute(a),
-            transmute(b),
-            transmute(c)
+            black_box(transmute(a)),
+            black_box(transmute(b)),
+            black_box(transmute(c))
         ))
     );
 }
@@ -11887,9 +13998,9 @@ unsafe fn test_lasx_xvmaddwod_w_hu() {
     assert_eq!(
         r,
         transmute(lasx_xvmaddwod_w_hu(
-            transmute(a),
-            transmute(b),
-            transmute(c)
+            black_box(transmute(a)),
+            black_box(transmute(b)),
+            black_box(transmute(c))
         ))
     );
 }
@@ -11918,9 +14029,9 @@ unsafe fn test_lasx_xvmaddwod_h_bu() {
     assert_eq!(
         r,
         transmute(lasx_xvmaddwod_h_bu(
-            transmute(a),
-            transmute(b),
-            transmute(c)
+            black_box(transmute(a)),
+            black_box(transmute(b)),
+            black_box(transmute(c))
         ))
     );
 }
@@ -11955,9 +14066,9 @@ unsafe fn test_lasx_xvmaddwev_q_du_d() {
     assert_eq!(
         r,
         transmute(lasx_xvmaddwev_q_du_d(
-            transmute(a),
-            transmute(b),
-            transmute(c)
+            black_box(transmute(a)),
+            black_box(transmute(b)),
+            black_box(transmute(c))
         ))
     );
 }
@@ -11994,9 +14105,9 @@ unsafe fn test_lasx_xvmaddwev_d_wu_w() {
     assert_eq!(
         r,
         transmute(lasx_xvmaddwev_d_wu_w(
-            transmute(a),
-            transmute(b),
-            transmute(c)
+            black_box(transmute(a)),
+            black_box(transmute(b)),
+            black_box(transmute(c))
         ))
     );
 }
@@ -12031,9 +14142,9 @@ unsafe fn test_lasx_xvmaddwev_w_hu_h() {
     assert_eq!(
         r,
         transmute(lasx_xvmaddwev_w_hu_h(
-            transmute(a),
-            transmute(b),
-            transmute(c)
+            black_box(transmute(a)),
+            black_box(transmute(b)),
+            black_box(transmute(c))
         ))
     );
 }
@@ -12062,9 +14173,9 @@ unsafe fn test_lasx_xvmaddwev_h_bu_b() {
     assert_eq!(
         r,
         transmute(lasx_xvmaddwev_h_bu_b(
-            transmute(a),
-            transmute(b),
-            transmute(c)
+            black_box(transmute(a)),
+            black_box(transmute(b)),
+            black_box(transmute(c))
         ))
     );
 }
@@ -12099,9 +14210,9 @@ unsafe fn test_lasx_xvmaddwod_q_du_d() {
     assert_eq!(
         r,
         transmute(lasx_xvmaddwod_q_du_d(
-            transmute(a),
-            transmute(b),
-            transmute(c)
+            black_box(transmute(a)),
+            black_box(transmute(b)),
+            black_box(transmute(c))
         ))
     );
 }
@@ -12138,9 +14249,9 @@ unsafe fn test_lasx_xvmaddwod_d_wu_w() {
     assert_eq!(
         r,
         transmute(lasx_xvmaddwod_d_wu_w(
-            transmute(a),
-            transmute(b),
-            transmute(c)
+            black_box(transmute(a)),
+            black_box(transmute(b)),
+            black_box(transmute(c))
         ))
     );
 }
@@ -12169,9 +14280,9 @@ unsafe fn test_lasx_xvmaddwod_w_hu_h() {
     assert_eq!(
         r,
         transmute(lasx_xvmaddwod_w_hu_h(
-            transmute(a),
-            transmute(b),
-            transmute(c)
+            black_box(transmute(a)),
+            black_box(transmute(b)),
+            black_box(transmute(c))
         ))
     );
 }
@@ -12200,9 +14311,9 @@ unsafe fn test_lasx_xvmaddwod_h_bu_b() {
     assert_eq!(
         r,
         transmute(lasx_xvmaddwod_h_bu_b(
-            transmute(a),
-            transmute(b),
-            transmute(c)
+            black_box(transmute(a)),
+            black_box(transmute(b)),
+            black_box(transmute(c))
         ))
     );
 }
@@ -12224,7 +14335,13 @@ unsafe fn test_lasx_xvrotr_b() {
         5842271601646106402,
     );
 
-    assert_eq!(r, transmute(lasx_xvrotr_b(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvrotr_b(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -12244,7 +14361,13 @@ unsafe fn test_lasx_xvrotr_h() {
         8109266518466894464,
     );
 
-    assert_eq!(r, transmute(lasx_xvrotr_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvrotr_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -12264,7 +14387,13 @@ unsafe fn test_lasx_xvrotr_w() {
         8567937817891640092,
     );
 
-    assert_eq!(r, transmute(lasx_xvrotr_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvrotr_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -12288,7 +14417,13 @@ unsafe fn test_lasx_xvrotr_d() {
         4254025119287920211,
     );
 
-    assert_eq!(r, transmute(lasx_xvrotr_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvrotr_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -12312,7 +14447,13 @@ unsafe fn test_lasx_xvadd_q() {
         1706530784161666452,
     );
 
-    assert_eq!(r, transmute(lasx_xvadd_q(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvadd_q(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -12336,7 +14477,13 @@ unsafe fn test_lasx_xvsub_q() {
         1242748497994781383,
     );
 
-    assert_eq!(r, transmute(lasx_xvsub_q(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvsub_q(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -12357,7 +14504,10 @@ unsafe fn test_lasx_xvaddwev_q_du_d() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvaddwev_q_du_d(transmute(a), transmute(b)))
+        transmute(lasx_xvaddwev_q_du_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -12379,7 +14529,10 @@ unsafe fn test_lasx_xvaddwod_q_du_d() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvaddwod_q_du_d(transmute(a), transmute(b)))
+        transmute(lasx_xvaddwod_q_du_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -12406,7 +14559,10 @@ unsafe fn test_lasx_xvmulwev_q_du_d() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvmulwev_q_du_d(transmute(a), transmute(b)))
+        transmute(lasx_xvmulwev_q_du_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -12433,7 +14589,10 @@ unsafe fn test_lasx_xvmulwod_q_du_d() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvmulwod_q_du_d(transmute(a), transmute(b)))
+        transmute(lasx_xvmulwod_q_du_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -12445,7 +14604,7 @@ unsafe fn test_lasx_xvmskgez_b() {
     );
     let r = i64x4::new(13289, 0, 4927, 0);
 
-    assert_eq!(r, transmute(lasx_xvmskgez_b(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvmskgez_b(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -12456,7 +14615,7 @@ unsafe fn test_lasx_xvmsknz_b() {
     );
     let r = i64x4::new(65535, 0, 65535, 0);
 
-    assert_eq!(r, transmute(lasx_xvmsknz_b(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvmsknz_b(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -12472,7 +14631,7 @@ unsafe fn test_lasx_xvexth_h_b() {
         -1689051729887256,
     );
 
-    assert_eq!(r, transmute(lasx_xvexth_h_b(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvexth_h_b(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -12488,7 +14647,7 @@ unsafe fn test_lasx_xvexth_w_h() {
         -117171002791439,
     );
 
-    assert_eq!(r, transmute(lasx_xvexth_w_h(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvexth_w_h(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -12505,7 +14664,7 @@ unsafe fn test_lasx_xvexth_d_w() {
     );
     let r = i64x4::new(78514216, -1063299454, -1487536177, 1875317589);
 
-    assert_eq!(r, transmute(lasx_xvexth_d_w(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvexth_d_w(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -12518,7 +14677,7 @@ unsafe fn test_lasx_xvexth_q_d() {
     );
     let r = i64x4::new(5196480214883180720, 0, 7776492634988202392, 0);
 
-    assert_eq!(r, transmute(lasx_xvexth_q_d(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvexth_q_d(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -12534,7 +14693,7 @@ unsafe fn test_lasx_xvexth_hu_bu() {
         11259067788754993,
     );
 
-    assert_eq!(r, transmute(lasx_xvexth_hu_bu(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvexth_hu_bu(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -12550,7 +14709,7 @@ unsafe fn test_lasx_xvexth_wu_hu() {
         211376815493177,
     );
 
-    assert_eq!(r, transmute(lasx_xvexth_wu_hu(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvexth_wu_hu(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -12561,7 +14720,7 @@ unsafe fn test_lasx_xvexth_du_wu() {
     );
     let r = i64x4::new(3486710391, 717721410, 1954296323, 1406265475);
 
-    assert_eq!(r, transmute(lasx_xvexth_du_wu(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvexth_du_wu(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -12574,7 +14733,7 @@ unsafe fn test_lasx_xvexth_qu_du() {
     );
     let r = i64x4::new(6305760528044738869, 0, 3857202168052068182, 0);
 
-    assert_eq!(r, transmute(lasx_xvexth_qu_du(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvexth_qu_du(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -12590,7 +14749,7 @@ unsafe fn test_lasx_xvrotri_b() {
         -3500418816657076903,
     );
 
-    assert_eq!(r, transmute(lasx_xvrotri_b::<4>(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvrotri_b::<4>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -12606,7 +14765,7 @@ unsafe fn test_lasx_xvrotri_h() {
         4779464405959485451,
     );
 
-    assert_eq!(r, transmute(lasx_xvrotri_h::<15>(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvrotri_h::<15>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -12628,7 +14787,7 @@ unsafe fn test_lasx_xvrotri_w() {
         -1679179889808014898,
     );
 
-    assert_eq!(r, transmute(lasx_xvrotri_w::<11>(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvrotri_w::<11>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -12646,7 +14805,7 @@ unsafe fn test_lasx_xvrotri_d() {
         -7958311692822812825,
     );
 
-    assert_eq!(r, transmute(lasx_xvrotri_d::<16>(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvrotri_d::<16>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -12659,7 +14818,7 @@ unsafe fn test_lasx_xvextl_q_d() {
     );
     let r = i64x4::new(-4167783494125842132, -1, 7476993593286219399, 0);
 
-    assert_eq!(r, transmute(lasx_xvextl_q_d(transmute(a))));
+    assert_eq!(r, transmute(lasx_xvextl_q_d(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -12681,7 +14840,10 @@ unsafe fn test_lasx_xvsrlni_b_h() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvsrlni_b_h::<4>(transmute(a), transmute(b)))
+        transmute(lasx_xvsrlni_b_h::<4>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -12704,7 +14866,10 @@ unsafe fn test_lasx_xvsrlni_h_w() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvsrlni_h_w::<16>(transmute(a), transmute(b)))
+        transmute(lasx_xvsrlni_h_w::<16>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -12739,7 +14904,10 @@ unsafe fn test_lasx_xvsrlni_w_d() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvsrlni_w_d::<26>(transmute(a), transmute(b)))
+        transmute(lasx_xvsrlni_w_d::<26>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -12761,7 +14929,10 @@ unsafe fn test_lasx_xvsrlni_d_q() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvsrlni_d_q::<102>(transmute(a), transmute(b)))
+        transmute(lasx_xvsrlni_d_q::<102>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -12784,7 +14955,10 @@ unsafe fn test_lasx_xvsrlrni_b_h() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvsrlrni_b_h::<8>(transmute(a), transmute(b)))
+        transmute(lasx_xvsrlrni_b_h::<8>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -12807,7 +14981,10 @@ unsafe fn test_lasx_xvsrlrni_h_w() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvsrlrni_h_w::<5>(transmute(a), transmute(b)))
+        transmute(lasx_xvsrlrni_h_w::<5>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -12842,7 +15019,10 @@ unsafe fn test_lasx_xvsrlrni_w_d() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvsrlrni_w_d::<43>(transmute(a), transmute(b)))
+        transmute(lasx_xvsrlrni_w_d::<43>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -12864,7 +15044,10 @@ unsafe fn test_lasx_xvsrlrni_d_q() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvsrlrni_d_q::<126>(transmute(a), transmute(b)))
+        transmute(lasx_xvsrlrni_d_q::<126>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -12887,7 +15070,10 @@ unsafe fn test_lasx_xvssrlni_b_h() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvssrlni_b_h::<4>(transmute(a), transmute(b)))
+        transmute(lasx_xvssrlni_b_h::<4>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -12905,7 +15091,10 @@ unsafe fn test_lasx_xvssrlni_h_w() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvssrlni_h_w::<31>(transmute(a), transmute(b)))
+        transmute(lasx_xvssrlni_h_w::<31>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -12940,7 +15129,10 @@ unsafe fn test_lasx_xvssrlni_w_d() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvssrlni_w_d::<14>(transmute(a), transmute(b)))
+        transmute(lasx_xvssrlni_w_d::<14>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -12967,7 +15159,10 @@ unsafe fn test_lasx_xvssrlni_d_q() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvssrlni_d_q::<35>(transmute(a), transmute(b)))
+        transmute(lasx_xvssrlni_d_q::<35>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -12990,7 +15185,10 @@ unsafe fn test_lasx_xvssrlni_bu_h() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvssrlni_bu_h::<11>(transmute(a), transmute(b)))
+        transmute(lasx_xvssrlni_bu_h::<11>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -13008,7 +15206,10 @@ unsafe fn test_lasx_xvssrlni_hu_w() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvssrlni_hu_w::<31>(transmute(a), transmute(b)))
+        transmute(lasx_xvssrlni_hu_w::<31>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -13032,7 +15233,10 @@ unsafe fn test_lasx_xvssrlni_wu_d() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvssrlni_wu_d::<24>(transmute(a), transmute(b)))
+        transmute(lasx_xvssrlni_wu_d::<24>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -13054,7 +15258,10 @@ unsafe fn test_lasx_xvssrlni_du_q() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvssrlni_du_q::<109>(transmute(a), transmute(b)))
+        transmute(lasx_xvssrlni_du_q::<109>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -13077,7 +15284,10 @@ unsafe fn test_lasx_xvssrlrni_b_h() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvssrlrni_b_h::<7>(transmute(a), transmute(b)))
+        transmute(lasx_xvssrlrni_b_h::<7>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -13100,7 +15310,10 @@ unsafe fn test_lasx_xvssrlrni_h_w() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvssrlrni_h_w::<11>(transmute(a), transmute(b)))
+        transmute(lasx_xvssrlrni_h_w::<11>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -13135,7 +15348,10 @@ unsafe fn test_lasx_xvssrlrni_w_d() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvssrlrni_w_d::<27>(transmute(a), transmute(b)))
+        transmute(lasx_xvssrlrni_w_d::<27>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -13157,7 +15373,10 @@ unsafe fn test_lasx_xvssrlrni_d_q() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvssrlrni_d_q::<94>(transmute(a), transmute(b)))
+        transmute(lasx_xvssrlrni_d_q::<94>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -13175,7 +15394,10 @@ unsafe fn test_lasx_xvssrlrni_bu_h() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvssrlrni_bu_h::<4>(transmute(a), transmute(b)))
+        transmute(lasx_xvssrlrni_bu_h::<4>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -13198,7 +15420,10 @@ unsafe fn test_lasx_xvssrlrni_hu_w() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvssrlrni_hu_w::<16>(transmute(a), transmute(b)))
+        transmute(lasx_xvssrlrni_hu_w::<16>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -13227,7 +15452,10 @@ unsafe fn test_lasx_xvssrlrni_wu_d() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvssrlrni_wu_d::<50>(transmute(a), transmute(b)))
+        transmute(lasx_xvssrlrni_wu_d::<50>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -13249,7 +15477,10 @@ unsafe fn test_lasx_xvssrlrni_du_q() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvssrlrni_du_q::<53>(transmute(a), transmute(b)))
+        transmute(lasx_xvssrlrni_du_q::<53>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -13272,7 +15503,10 @@ unsafe fn test_lasx_xvsrani_b_h() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvsrani_b_h::<8>(transmute(a), transmute(b)))
+        transmute(lasx_xvsrani_b_h::<8>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -13295,7 +15529,10 @@ unsafe fn test_lasx_xvsrani_h_w() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvsrani_h_w::<0>(transmute(a), transmute(b)))
+        transmute(lasx_xvsrani_h_w::<0>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -13330,7 +15567,10 @@ unsafe fn test_lasx_xvsrani_w_d() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvsrani_w_d::<28>(transmute(a), transmute(b)))
+        transmute(lasx_xvsrani_w_d::<28>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -13357,7 +15597,10 @@ unsafe fn test_lasx_xvsrani_d_q() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvsrani_d_q::<66>(transmute(a), transmute(b)))
+        transmute(lasx_xvsrani_d_q::<66>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -13380,7 +15623,10 @@ unsafe fn test_lasx_xvsrarni_b_h() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvsrarni_b_h::<4>(transmute(a), transmute(b)))
+        transmute(lasx_xvsrarni_b_h::<4>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -13403,7 +15649,10 @@ unsafe fn test_lasx_xvsrarni_h_w() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvsrarni_h_w::<9>(transmute(a), transmute(b)))
+        transmute(lasx_xvsrarni_h_w::<9>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -13427,7 +15676,10 @@ unsafe fn test_lasx_xvsrarni_w_d() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvsrarni_w_d::<63>(transmute(a), transmute(b)))
+        transmute(lasx_xvsrarni_w_d::<63>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -13449,7 +15701,10 @@ unsafe fn test_lasx_xvsrarni_d_q() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvsrarni_d_q::<102>(transmute(a), transmute(b)))
+        transmute(lasx_xvsrarni_d_q::<102>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -13472,7 +15727,10 @@ unsafe fn test_lasx_xvssrani_b_h() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvssrani_b_h::<5>(transmute(a), transmute(b)))
+        transmute(lasx_xvssrani_b_h::<5>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -13495,7 +15753,10 @@ unsafe fn test_lasx_xvssrani_h_w() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvssrani_h_w::<0>(transmute(a), transmute(b)))
+        transmute(lasx_xvssrani_h_w::<0>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -13530,7 +15791,10 @@ unsafe fn test_lasx_xvssrani_w_d() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvssrani_w_d::<45>(transmute(a), transmute(b)))
+        transmute(lasx_xvssrani_w_d::<45>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -13557,7 +15821,10 @@ unsafe fn test_lasx_xvssrani_d_q() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvssrani_d_q::<73>(transmute(a), transmute(b)))
+        transmute(lasx_xvssrani_d_q::<73>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -13575,7 +15842,10 @@ unsafe fn test_lasx_xvssrani_bu_h() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvssrani_bu_h::<12>(transmute(a), transmute(b)))
+        transmute(lasx_xvssrani_bu_h::<12>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -13593,7 +15863,10 @@ unsafe fn test_lasx_xvssrani_hu_w() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvssrani_hu_w::<9>(transmute(a), transmute(b)))
+        transmute(lasx_xvssrani_hu_w::<9>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -13617,7 +15890,10 @@ unsafe fn test_lasx_xvssrani_wu_d() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvssrani_wu_d::<42>(transmute(a), transmute(b)))
+        transmute(lasx_xvssrani_wu_d::<42>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -13639,7 +15915,10 @@ unsafe fn test_lasx_xvssrani_du_q() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvssrani_du_q::<115>(transmute(a), transmute(b)))
+        transmute(lasx_xvssrani_du_q::<115>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -13662,7 +15941,10 @@ unsafe fn test_lasx_xvssrarni_b_h() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvssrarni_b_h::<6>(transmute(a), transmute(b)))
+        transmute(lasx_xvssrarni_b_h::<6>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -13685,7 +15967,10 @@ unsafe fn test_lasx_xvssrarni_h_w() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvssrarni_h_w::<25>(transmute(a), transmute(b)))
+        transmute(lasx_xvssrarni_h_w::<25>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -13715,7 +16000,10 @@ unsafe fn test_lasx_xvssrarni_w_d() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvssrarni_w_d::<61>(transmute(a), transmute(b)))
+        transmute(lasx_xvssrarni_w_d::<61>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -13737,7 +16025,10 @@ unsafe fn test_lasx_xvssrarni_d_q() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvssrarni_d_q::<123>(transmute(a), transmute(b)))
+        transmute(lasx_xvssrarni_d_q::<123>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -13760,7 +16051,10 @@ unsafe fn test_lasx_xvssrarni_bu_h() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvssrarni_bu_h::<10>(transmute(a), transmute(b)))
+        transmute(lasx_xvssrarni_bu_h::<10>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -13778,7 +16072,10 @@ unsafe fn test_lasx_xvssrarni_hu_w() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvssrarni_hu_w::<30>(transmute(a), transmute(b)))
+        transmute(lasx_xvssrarni_hu_w::<30>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -13802,7 +16099,10 @@ unsafe fn test_lasx_xvssrarni_wu_d() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvssrarni_wu_d::<61>(transmute(a), transmute(b)))
+        transmute(lasx_xvssrarni_wu_d::<61>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -13824,7 +16124,10 @@ unsafe fn test_lasx_xvssrarni_du_q() {
 
     assert_eq!(
         r,
-        transmute(lasx_xvssrarni_du_q::<15>(transmute(a), transmute(b)))
+        transmute(lasx_xvssrarni_du_q::<15>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -13836,7 +16139,7 @@ unsafe fn test_lasx_xbnz_b() {
     );
     let r: i32 = 1;
 
-    assert_eq!(r, transmute(lasx_xbnz_b(transmute(a))));
+    assert_eq!(r, transmute(lasx_xbnz_b(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -13849,7 +16152,7 @@ unsafe fn test_lasx_xbnz_d() {
     );
     let r: i32 = 1;
 
-    assert_eq!(r, transmute(lasx_xbnz_d(transmute(a))));
+    assert_eq!(r, transmute(lasx_xbnz_d(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -13860,7 +16163,7 @@ unsafe fn test_lasx_xbnz_h() {
     );
     let r: i32 = 1;
 
-    assert_eq!(r, transmute(lasx_xbnz_h(transmute(a))));
+    assert_eq!(r, transmute(lasx_xbnz_h(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -13871,7 +16174,7 @@ unsafe fn test_lasx_xbnz_v() {
     );
     let r: i32 = 1;
 
-    assert_eq!(r, transmute(lasx_xbnz_v(transmute(a))));
+    assert_eq!(r, transmute(lasx_xbnz_v(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -13882,7 +16185,7 @@ unsafe fn test_lasx_xbnz_w() {
     );
     let r: i32 = 1;
 
-    assert_eq!(r, transmute(lasx_xbnz_w(transmute(a))));
+    assert_eq!(r, transmute(lasx_xbnz_w(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -13893,7 +16196,7 @@ unsafe fn test_lasx_xbz_b() {
     );
     let r: i32 = 0;
 
-    assert_eq!(r, transmute(lasx_xbz_b(transmute(a))));
+    assert_eq!(r, transmute(lasx_xbz_b(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -13906,7 +16209,7 @@ unsafe fn test_lasx_xbz_d() {
     );
     let r: i32 = 0;
 
-    assert_eq!(r, transmute(lasx_xbz_d(transmute(a))));
+    assert_eq!(r, transmute(lasx_xbz_d(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -13917,7 +16220,7 @@ unsafe fn test_lasx_xbz_h() {
     );
     let r: i32 = 0;
 
-    assert_eq!(r, transmute(lasx_xbz_h(transmute(a))));
+    assert_eq!(r, transmute(lasx_xbz_h(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -13928,7 +16231,7 @@ unsafe fn test_lasx_xbz_v() {
     );
     let r: i32 = 0;
 
-    assert_eq!(r, transmute(lasx_xbz_v(transmute(a))));
+    assert_eq!(r, transmute(lasx_xbz_v(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -13939,7 +16242,7 @@ unsafe fn test_lasx_xbz_w() {
     );
     let r: i32 = 0;
 
-    assert_eq!(r, transmute(lasx_xbz_w(transmute(a))));
+    assert_eq!(r, transmute(lasx_xbz_w(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lasx")]
@@ -13958,7 +16261,13 @@ unsafe fn test_lasx_xvfcmp_caf_d() {
     );
     let r = i64x4::new(0, 0, 0, 0);
 
-    assert_eq!(r, transmute(lasx_xvfcmp_caf_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvfcmp_caf_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -13973,7 +16282,13 @@ unsafe fn test_lasx_xvfcmp_caf_s() {
     );
     let r = i64x4::new(0, 0, 0, 0);
 
-    assert_eq!(r, transmute(lasx_xvfcmp_caf_s(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvfcmp_caf_s(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -13992,7 +16307,13 @@ unsafe fn test_lasx_xvfcmp_ceq_d() {
     );
     let r = i64x4::new(0, 0, 0, 0);
 
-    assert_eq!(r, transmute(lasx_xvfcmp_ceq_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvfcmp_ceq_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -14007,7 +16328,13 @@ unsafe fn test_lasx_xvfcmp_ceq_s() {
     );
     let r = i64x4::new(0, 0, 0, 0);
 
-    assert_eq!(r, transmute(lasx_xvfcmp_ceq_s(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvfcmp_ceq_s(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -14026,7 +16353,13 @@ unsafe fn test_lasx_xvfcmp_cle_d() {
     );
     let r = i64x4::new(-1, -1, -1, 0);
 
-    assert_eq!(r, transmute(lasx_xvfcmp_cle_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvfcmp_cle_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -14041,7 +16374,13 @@ unsafe fn test_lasx_xvfcmp_cle_s() {
     );
     let r = i64x4::new(0, -1, -1, -4294967296);
 
-    assert_eq!(r, transmute(lasx_xvfcmp_cle_s(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvfcmp_cle_s(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -14060,7 +16399,13 @@ unsafe fn test_lasx_xvfcmp_clt_d() {
     );
     let r = i64x4::new(0, -1, 0, -1);
 
-    assert_eq!(r, transmute(lasx_xvfcmp_clt_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvfcmp_clt_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -14075,7 +16420,13 @@ unsafe fn test_lasx_xvfcmp_clt_s() {
     );
     let r = i64x4::new(-1, 4294967295, -1, -1);
 
-    assert_eq!(r, transmute(lasx_xvfcmp_clt_s(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvfcmp_clt_s(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -14094,7 +16445,13 @@ unsafe fn test_lasx_xvfcmp_cne_d() {
     );
     let r = i64x4::new(-1, -1, -1, -1);
 
-    assert_eq!(r, transmute(lasx_xvfcmp_cne_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvfcmp_cne_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -14109,7 +16466,13 @@ unsafe fn test_lasx_xvfcmp_cne_s() {
     );
     let r = i64x4::new(-1, -1, -1, -1);
 
-    assert_eq!(r, transmute(lasx_xvfcmp_cne_s(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvfcmp_cne_s(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -14128,7 +16491,13 @@ unsafe fn test_lasx_xvfcmp_cor_d() {
     );
     let r = i64x4::new(-1, -1, -1, -1);
 
-    assert_eq!(r, transmute(lasx_xvfcmp_cor_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvfcmp_cor_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -14143,7 +16512,13 @@ unsafe fn test_lasx_xvfcmp_cor_s() {
     );
     let r = i64x4::new(-1, -1, -1, -1);
 
-    assert_eq!(r, transmute(lasx_xvfcmp_cor_s(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvfcmp_cor_s(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -14162,7 +16537,13 @@ unsafe fn test_lasx_xvfcmp_cueq_d() {
     );
     let r = i64x4::new(0, 0, 0, 0);
 
-    assert_eq!(r, transmute(lasx_xvfcmp_cueq_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvfcmp_cueq_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -14177,7 +16558,13 @@ unsafe fn test_lasx_xvfcmp_cueq_s() {
     );
     let r = i64x4::new(0, 0, 0, 0);
 
-    assert_eq!(r, transmute(lasx_xvfcmp_cueq_s(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvfcmp_cueq_s(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -14196,7 +16583,13 @@ unsafe fn test_lasx_xvfcmp_cule_d() {
     );
     let r = i64x4::new(0, -1, -1, 0);
 
-    assert_eq!(r, transmute(lasx_xvfcmp_cule_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvfcmp_cule_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -14211,7 +16604,13 @@ unsafe fn test_lasx_xvfcmp_cule_s() {
     );
     let r = i64x4::new(-4294967296, 4294967295, 4294967295, -1);
 
-    assert_eq!(r, transmute(lasx_xvfcmp_cule_s(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvfcmp_cule_s(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -14230,7 +16629,13 @@ unsafe fn test_lasx_xvfcmp_cult_d() {
     );
     let r = i64x4::new(0, -1, 0, 0);
 
-    assert_eq!(r, transmute(lasx_xvfcmp_cult_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvfcmp_cult_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -14245,7 +16650,13 @@ unsafe fn test_lasx_xvfcmp_cult_s() {
     );
     let r = i64x4::new(-1, 0, -1, -1);
 
-    assert_eq!(r, transmute(lasx_xvfcmp_cult_s(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvfcmp_cult_s(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -14264,7 +16675,13 @@ unsafe fn test_lasx_xvfcmp_cun_d() {
     );
     let r = i64x4::new(0, 0, 0, 0);
 
-    assert_eq!(r, transmute(lasx_xvfcmp_cun_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvfcmp_cun_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -14283,7 +16700,13 @@ unsafe fn test_lasx_xvfcmp_cune_d() {
     );
     let r = i64x4::new(-1, -1, -1, -1);
 
-    assert_eq!(r, transmute(lasx_xvfcmp_cune_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvfcmp_cune_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -14298,7 +16721,13 @@ unsafe fn test_lasx_xvfcmp_cune_s() {
     );
     let r = i64x4::new(-1, -1, -1, -1);
 
-    assert_eq!(r, transmute(lasx_xvfcmp_cune_s(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvfcmp_cune_s(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -14313,7 +16742,13 @@ unsafe fn test_lasx_xvfcmp_cun_s() {
     );
     let r = i64x4::new(0, 0, 0, 0);
 
-    assert_eq!(r, transmute(lasx_xvfcmp_cun_s(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvfcmp_cun_s(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -14332,7 +16767,13 @@ unsafe fn test_lasx_xvfcmp_saf_d() {
     );
     let r = i64x4::new(0, 0, 0, 0);
 
-    assert_eq!(r, transmute(lasx_xvfcmp_saf_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvfcmp_saf_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -14347,7 +16788,13 @@ unsafe fn test_lasx_xvfcmp_saf_s() {
     );
     let r = i64x4::new(0, 0, 0, 0);
 
-    assert_eq!(r, transmute(lasx_xvfcmp_saf_s(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvfcmp_saf_s(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -14366,7 +16813,13 @@ unsafe fn test_lasx_xvfcmp_seq_d() {
     );
     let r = i64x4::new(0, 0, 0, 0);
 
-    assert_eq!(r, transmute(lasx_xvfcmp_seq_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvfcmp_seq_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -14381,7 +16834,13 @@ unsafe fn test_lasx_xvfcmp_seq_s() {
     );
     let r = i64x4::new(0, 0, 0, 0);
 
-    assert_eq!(r, transmute(lasx_xvfcmp_seq_s(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvfcmp_seq_s(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -14400,7 +16859,13 @@ unsafe fn test_lasx_xvfcmp_sle_d() {
     );
     let r = i64x4::new(0, 0, 0, 0);
 
-    assert_eq!(r, transmute(lasx_xvfcmp_sle_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvfcmp_sle_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -14415,7 +16880,13 @@ unsafe fn test_lasx_xvfcmp_sle_s() {
     );
     let r = i64x4::new(0, 4294967295, -1, 0);
 
-    assert_eq!(r, transmute(lasx_xvfcmp_sle_s(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvfcmp_sle_s(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -14434,7 +16905,13 @@ unsafe fn test_lasx_xvfcmp_slt_d() {
     );
     let r = i64x4::new(0, -1, -1, 0);
 
-    assert_eq!(r, transmute(lasx_xvfcmp_slt_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvfcmp_slt_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -14449,7 +16926,13 @@ unsafe fn test_lasx_xvfcmp_slt_s() {
     );
     let r = i64x4::new(0, -4294967296, 4294967295, -1);
 
-    assert_eq!(r, transmute(lasx_xvfcmp_slt_s(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvfcmp_slt_s(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -14468,7 +16951,13 @@ unsafe fn test_lasx_xvfcmp_sne_d() {
     );
     let r = i64x4::new(-1, -1, -1, -1);
 
-    assert_eq!(r, transmute(lasx_xvfcmp_sne_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvfcmp_sne_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -14483,7 +16972,13 @@ unsafe fn test_lasx_xvfcmp_sne_s() {
     );
     let r = i64x4::new(-1, -1, -1, -1);
 
-    assert_eq!(r, transmute(lasx_xvfcmp_sne_s(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvfcmp_sne_s(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -14502,7 +16997,13 @@ unsafe fn test_lasx_xvfcmp_sor_d() {
     );
     let r = i64x4::new(-1, -1, -1, -1);
 
-    assert_eq!(r, transmute(lasx_xvfcmp_sor_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvfcmp_sor_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -14517,7 +17018,13 @@ unsafe fn test_lasx_xvfcmp_sor_s() {
     );
     let r = i64x4::new(-1, -1, -1, -1);
 
-    assert_eq!(r, transmute(lasx_xvfcmp_sor_s(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvfcmp_sor_s(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -14536,7 +17043,13 @@ unsafe fn test_lasx_xvfcmp_sueq_d() {
     );
     let r = i64x4::new(0, 0, 0, 0);
 
-    assert_eq!(r, transmute(lasx_xvfcmp_sueq_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvfcmp_sueq_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -14551,7 +17064,13 @@ unsafe fn test_lasx_xvfcmp_sueq_s() {
     );
     let r = i64x4::new(0, 0, 0, 0);
 
-    assert_eq!(r, transmute(lasx_xvfcmp_sueq_s(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvfcmp_sueq_s(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -14570,7 +17089,13 @@ unsafe fn test_lasx_xvfcmp_sule_d() {
     );
     let r = i64x4::new(0, 0, 0, 0);
 
-    assert_eq!(r, transmute(lasx_xvfcmp_sule_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvfcmp_sule_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -14585,7 +17110,13 @@ unsafe fn test_lasx_xvfcmp_sule_s() {
     );
     let r = i64x4::new(0, 4294967295, 0, 0);
 
-    assert_eq!(r, transmute(lasx_xvfcmp_sule_s(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvfcmp_sule_s(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -14604,7 +17135,13 @@ unsafe fn test_lasx_xvfcmp_sult_d() {
     );
     let r = i64x4::new(0, -1, 0, -1);
 
-    assert_eq!(r, transmute(lasx_xvfcmp_sult_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvfcmp_sult_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -14619,7 +17156,13 @@ unsafe fn test_lasx_xvfcmp_sult_s() {
     );
     let r = i64x4::new(-1, 4294967295, -1, 0);
 
-    assert_eq!(r, transmute(lasx_xvfcmp_sult_s(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvfcmp_sult_s(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -14638,7 +17181,13 @@ unsafe fn test_lasx_xvfcmp_sun_d() {
     );
     let r = i64x4::new(0, 0, 0, 0);
 
-    assert_eq!(r, transmute(lasx_xvfcmp_sun_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvfcmp_sun_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -14657,7 +17206,13 @@ unsafe fn test_lasx_xvfcmp_sune_d() {
     );
     let r = i64x4::new(-1, -1, -1, -1);
 
-    assert_eq!(r, transmute(lasx_xvfcmp_sune_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvfcmp_sune_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -14672,7 +17227,13 @@ unsafe fn test_lasx_xvfcmp_sune_s() {
     );
     let r = i64x4::new(-1, -1, -1, -1);
 
-    assert_eq!(r, transmute(lasx_xvfcmp_sune_s(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvfcmp_sune_s(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -14687,7 +17248,13 @@ unsafe fn test_lasx_xvfcmp_sun_s() {
     );
     let r = i64x4::new(0, 0, 0, 0);
 
-    assert_eq!(r, transmute(lasx_xvfcmp_sun_s(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvfcmp_sun_s(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -14700,7 +17267,10 @@ unsafe fn test_lasx_xvpickve_d_f() {
     );
     let r = i64x4::new(4605596490350167974, 0, 0, 0);
 
-    assert_eq!(r, transmute(lasx_xvpickve_d_f::<1>(transmute(a))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvpickve_d_f::<1>(black_box(transmute(a))))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -14711,7 +17281,10 @@ unsafe fn test_lasx_xvpickve_w_f() {
     );
     let r = i64x4::new(1040565756, 0, 0, 0);
 
-    assert_eq!(r, transmute(lasx_xvpickve_w_f::<1>(transmute(a))));
+    assert_eq!(
+        r,
+        transmute(lasx_xvpickve_w_f::<1>(black_box(transmute(a))))
+    );
 }
 
 #[simd_test(enable = "lasx")]
@@ -14756,3 +17329,326 @@ unsafe fn test_lasx_xvrepli_w() {
 
     assert_eq!(r, transmute(lasx_xvrepli_w::<-388>()));
 }
+
+#[simd_test(enable = "lasx")]
+unsafe fn test_lasx_cast_128_s() {
+    let a = u32x4::new(1031165056, 1051966120, 1060984374, 1062536919);
+    let r = i64x4::new(4518160082931176576, 4563561318958585398, 1966080, 1966080);
+
+    assert_eq!(
+        r.as_array()[0..2],
+        transmute::<_, i64x4>(lasx_cast_128_s(black_box(transmute(a)))).as_array()[0..2]
+    );
+}
+
+#[simd_test(enable = "lasx")]
+unsafe fn test_lasx_cast_128_d() {
+    let a = u64x2::new(4604694967937271251, 4600904075476555984);
+    let r = i64x4::new(
+        4604694967937271251,
+        4600904075476555984,
+        2910860781861170785,
+        8314045306847701346,
+    );
+
+    assert_eq!(
+        r.as_array()[0..2],
+        transmute::<_, i64x4>(lasx_cast_128_d(black_box(transmute(a)))).as_array()[0..2]
+    );
+}
+
+#[simd_test(enable = "lasx")]
+unsafe fn test_lasx_cast_128() {
+    let a = i64x2::new(-5333716211868108402, 2442107533729495827);
+    let r = i64x4::new(
+        -5333716211868108402,
+        2442107533729495827,
+        -1115824375586394527,
+        8314045306157170687,
+    );
+
+    assert_eq!(
+        r.as_array()[0..2],
+        transmute::<_, i64x4>(lasx_cast_128(black_box(transmute(a)))).as_array()[0..2]
+    );
+}
+
+#[simd_test(enable = "lasx")]
+unsafe fn test_lasx_concat_128_s() {
+    let a = u32x4::new(1032255272, 1059413818, 1058434362, 1041454056);
+    let b = u32x4::new(1047296252, 1059191602, 1051282752, 1026847376);
+    let r = i64x4::new(
+        4550147702272751400,
+        4473011111864986938,
+        4549193291835144444,
+        4410275898954698048,
+    );
+
+    assert_eq!(
+        r,
+        transmute(lasx_concat_128_s(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
+}
+
+#[simd_test(enable = "lasx")]
+unsafe fn test_lasx_concat_128_d() {
+    let a = u64x2::new(4602341404117999960, 4599751584045405722);
+    let b = u64x2::new(4595947342927040984, 4600308396523102002);
+    let r = i64x4::new(
+        4602341404117999960,
+        4599751584045405722,
+        4595947342927040984,
+        4600308396523102002,
+    );
+
+    assert_eq!(
+        r,
+        transmute(lasx_concat_128_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
+}
+
+#[simd_test(enable = "lasx")]
+unsafe fn test_lasx_concat_128() {
+    let a = i64x2::new(3302609705743394573, 8438855426868306143);
+    let b = i64x2::new(8632034656150002181, 7751541408133090748);
+    let r = i64x4::new(
+        3302609705743394573,
+        8438855426868306143,
+        8632034656150002181,
+        7751541408133090748,
+    );
+
+    assert_eq!(
+        r,
+        transmute(lasx_concat_128(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
+}
+
+#[simd_test(enable = "lasx")]
+unsafe fn test_lasx_extract_128_lo_s() {
+    let a = u32x8::new(
+        1038279272, 1053426270, 1062315532, 1055361088, 1061380448, 1052007748, 1063816577,
+        1061671114,
+    );
+    let r = i64x2::new(4524431379435545192, 4532741359493293580);
+
+    assert_eq!(r, transmute(lasx_extract_128_lo_s(black_box(transmute(a)))));
+}
+
+#[simd_test(enable = "lasx")]
+unsafe fn test_lasx_extract_128_hi_s() {
+    let a = u32x8::new(
+        1059517342, 1052723820, 1053176244, 1060336354, 1058221022, 1064684502, 1061072013,
+        1059238420,
+    );
+    let r = i64x2::new(4572785117706267614, 4549394373627784333);
+
+    assert_eq!(r, transmute(lasx_extract_128_hi_s(black_box(transmute(a)))));
+}
+
+#[simd_test(enable = "lasx")]
+unsafe fn test_lasx_extract_128_lo_d() {
+    let a = u64x4::new(
+        4606487981487128637,
+        4592443779247846248,
+        4605637448543526041,
+        4604126872543611047,
+    );
+    let r = i64x2::new(4606487981487128637, 4592443779247846248);
+
+    assert_eq!(r, transmute(lasx_extract_128_lo_d(black_box(transmute(a)))));
+}
+
+#[simd_test(enable = "lasx")]
+unsafe fn test_lasx_extract_128_hi_d() {
+    let a = u64x4::new(
+        4595075050683709816,
+        4603388454656549851,
+        4603881047625519227,
+        4604218419306666352,
+    );
+    let r = i64x2::new(4603881047625519227, 4604218419306666352);
+
+    assert_eq!(r, transmute(lasx_extract_128_hi_d(black_box(transmute(a)))));
+}
+
+#[simd_test(enable = "lasx")]
+unsafe fn test_lasx_extract_128_lo() {
+    let a = i64x4::new(
+        1690990426210778543,
+        -1056924033489771427,
+        1791197928200737608,
+        2648792885519901423,
+    );
+    let r = i64x2::new(1690990426210778543, -1056924033489771427);
+
+    assert_eq!(r, transmute(lasx_extract_128_lo(black_box(transmute(a)))));
+}
+
+#[simd_test(enable = "lasx")]
+unsafe fn test_lasx_extract_128_hi() {
+    let a = i64x4::new(
+        1400282616691463341,
+        6677577875527300174,
+        -1903780563362068813,
+        -7449796170151383489,
+    );
+    let r = i64x2::new(-1903780563362068813, -7449796170151383489);
+
+    assert_eq!(r, transmute(lasx_extract_128_hi(black_box(transmute(a)))));
+}
+
+#[simd_test(enable = "lasx")]
+unsafe fn test_lasx_insert_128_lo_s() {
+    let a = u32x8::new(
+        1063338913, 1017815328, 1065051130, 1040694156, 1059596680, 1048796526, 1058020845,
+        1057822131,
+    );
+    let b = u32x4::new(1052930766, 1021556992, 1050709482, 1059704809);
+    let r = i64x4::new(
+        4387553872693064398,
+        4551397499119635946,
+        4504546780388010376,
+        4543311458688048621,
+    );
+
+    assert_eq!(
+        r,
+        transmute(lasx_insert_128_lo_s(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
+}
+
+#[simd_test(enable = "lasx")]
+unsafe fn test_lasx_insert_128_hi_s() {
+    let a = u32x8::new(
+        1018863744, 1064221149, 1048659080, 1057450774, 1049935896, 1034170664, 1059759433,
+        1057849762,
+    );
+    let b = u32x4::new(1060332648, 1063149600, 1051087106, 1060582348);
+    let r = i64x4::new(
+        4570795031685406848,
+        4541716492508546184,
+        4566192763815814248,
+        4555166500425978114,
+    );
+
+    assert_eq!(
+        r,
+        transmute(lasx_insert_128_hi_s(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
+}
+
+#[simd_test(enable = "lasx")]
+unsafe fn test_lasx_insert_128_lo_d() {
+    let a = u64x4::new(
+        4601319519422109044,
+        4601506273633970188,
+        4605118087882201940,
+        4605125059076454256,
+    );
+    let b = u64x2::new(4587489919640425888, 4591909120489567808);
+    let r = i64x4::new(
+        4587489919640425888,
+        4591909120489567808,
+        4605118087882201940,
+        4605125059076454256,
+    );
+
+    assert_eq!(
+        r,
+        transmute(lasx_insert_128_lo_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
+}
+
+#[simd_test(enable = "lasx")]
+unsafe fn test_lasx_insert_128_hi_d() {
+    let a = u64x4::new(
+        4604690660177752777,
+        4593824994203592700,
+        4599958775071728504,
+        4604125324674373728,
+    );
+    let b = u64x2::new(4601718173474385938, 4591758028383494760);
+    let r = i64x4::new(
+        4604690660177752777,
+        4593824994203592700,
+        4601718173474385938,
+        4591758028383494760,
+    );
+
+    assert_eq!(
+        r,
+        transmute(lasx_insert_128_hi_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
+}
+
+#[simd_test(enable = "lasx")]
+unsafe fn test_lasx_insert_128_lo() {
+    let a = i64x4::new(
+        8159968186698006293,
+        5648210958959948409,
+        603295919044368378,
+        -4396186135186039276,
+    );
+    let b = i64x2::new(-6258666140812668387, 5822982556977506382);
+    let r = i64x4::new(
+        -6258666140812668387,
+        5822982556977506382,
+        603295919044368378,
+        -4396186135186039276,
+    );
+
+    assert_eq!(
+        r,
+        transmute(lasx_insert_128_lo(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
+}
+
+#[simd_test(enable = "lasx")]
+unsafe fn test_lasx_insert_128_hi() {
+    let a = i64x4::new(
+        2981835982487038158,
+        5258378092714202875,
+        5115371338527125146,
+        -6993491475145500537,
+    );
+    let b = i64x2::new(1176776599938765863, -7502655081590988207);
+    let r = i64x4::new(
+        2981835982487038158,
+        5258378092714202875,
+        1176776599938765863,
+        -7502655081590988207,
+    );
+
+    assert_eq!(
+        r,
+        transmute(lasx_insert_128_hi(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
+}
diff --git a/crates/core_arch/src/loongarch64/lsx/generated.rs b/crates/core_arch/src/loongarch64/lsx/generated.rs
index 764e69ca05..41ced43454 100644
--- a/crates/core_arch/src/loongarch64/lsx/generated.rs
+++ b/crates/core_arch/src/loongarch64/lsx/generated.rs
@@ -7,42 +7,10 @@
 // ```
 
 use crate::mem::transmute;
-use super::types::*;
+use super::super::*;
 
 #[allow(improper_ctypes)]
 unsafe extern "unadjusted" {
-    #[link_name = "llvm.loongarch.lsx.vsll.b"]
-    fn __lsx_vsll_b(a: __v16i8, b: __v16i8) -> __v16i8;
-    #[link_name = "llvm.loongarch.lsx.vsll.h"]
-    fn __lsx_vsll_h(a: __v8i16, b: __v8i16) -> __v8i16;
-    #[link_name = "llvm.loongarch.lsx.vsll.w"]
-    fn __lsx_vsll_w(a: __v4i32, b: __v4i32) -> __v4i32;
-    #[link_name = "llvm.loongarch.lsx.vsll.d"]
-    fn __lsx_vsll_d(a: __v2i64, b: __v2i64) -> __v2i64;
-    #[link_name = "llvm.loongarch.lsx.vslli.b"]
-    fn __lsx_vslli_b(a: __v16i8, b: u32) -> __v16i8;
-    #[link_name = "llvm.loongarch.lsx.vslli.h"]
-    fn __lsx_vslli_h(a: __v8i16, b: u32) -> __v8i16;
-    #[link_name = "llvm.loongarch.lsx.vslli.w"]
-    fn __lsx_vslli_w(a: __v4i32, b: u32) -> __v4i32;
-    #[link_name = "llvm.loongarch.lsx.vslli.d"]
-    fn __lsx_vslli_d(a: __v2i64, b: u32) -> __v2i64;
-    #[link_name = "llvm.loongarch.lsx.vsra.b"]
-    fn __lsx_vsra_b(a: __v16i8, b: __v16i8) -> __v16i8;
-    #[link_name = "llvm.loongarch.lsx.vsra.h"]
-    fn __lsx_vsra_h(a: __v8i16, b: __v8i16) -> __v8i16;
-    #[link_name = "llvm.loongarch.lsx.vsra.w"]
-    fn __lsx_vsra_w(a: __v4i32, b: __v4i32) -> __v4i32;
-    #[link_name = "llvm.loongarch.lsx.vsra.d"]
-    fn __lsx_vsra_d(a: __v2i64, b: __v2i64) -> __v2i64;
-    #[link_name = "llvm.loongarch.lsx.vsrai.b"]
-    fn __lsx_vsrai_b(a: __v16i8, b: u32) -> __v16i8;
-    #[link_name = "llvm.loongarch.lsx.vsrai.h"]
-    fn __lsx_vsrai_h(a: __v8i16, b: u32) -> __v8i16;
-    #[link_name = "llvm.loongarch.lsx.vsrai.w"]
-    fn __lsx_vsrai_w(a: __v4i32, b: u32) -> __v4i32;
-    #[link_name = "llvm.loongarch.lsx.vsrai.d"]
-    fn __lsx_vsrai_d(a: __v2i64, b: u32) -> __v2i64;
     #[link_name = "llvm.loongarch.lsx.vsrar.b"]
     fn __lsx_vsrar_b(a: __v16i8, b: __v16i8) -> __v16i8;
     #[link_name = "llvm.loongarch.lsx.vsrar.h"]
@@ -59,22 +27,6 @@ unsafe extern "unadjusted" {
     fn __lsx_vsrari_w(a: __v4i32, b: u32) -> __v4i32;
     #[link_name = "llvm.loongarch.lsx.vsrari.d"]
     fn __lsx_vsrari_d(a: __v2i64, b: u32) -> __v2i64;
-    #[link_name = "llvm.loongarch.lsx.vsrl.b"]
-    fn __lsx_vsrl_b(a: __v16i8, b: __v16i8) -> __v16i8;
-    #[link_name = "llvm.loongarch.lsx.vsrl.h"]
-    fn __lsx_vsrl_h(a: __v8i16, b: __v8i16) -> __v8i16;
-    #[link_name = "llvm.loongarch.lsx.vsrl.w"]
-    fn __lsx_vsrl_w(a: __v4i32, b: __v4i32) -> __v4i32;
-    #[link_name = "llvm.loongarch.lsx.vsrl.d"]
-    fn __lsx_vsrl_d(a: __v2i64, b: __v2i64) -> __v2i64;
-    #[link_name = "llvm.loongarch.lsx.vsrli.b"]
-    fn __lsx_vsrli_b(a: __v16i8, b: u32) -> __v16i8;
-    #[link_name = "llvm.loongarch.lsx.vsrli.h"]
-    fn __lsx_vsrli_h(a: __v8i16, b: u32) -> __v8i16;
-    #[link_name = "llvm.loongarch.lsx.vsrli.w"]
-    fn __lsx_vsrli_w(a: __v4i32, b: u32) -> __v4i32;
-    #[link_name = "llvm.loongarch.lsx.vsrli.d"]
-    fn __lsx_vsrli_d(a: __v2i64, b: u32) -> __v2i64;
     #[link_name = "llvm.loongarch.lsx.vsrlr.b"]
     fn __lsx_vsrlr_b(a: __v16i8, b: __v16i8) -> __v16i8;
     #[link_name = "llvm.loongarch.lsx.vsrlr.h"]
@@ -91,14 +43,6 @@ unsafe extern "unadjusted" {
     fn __lsx_vsrlri_w(a: __v4i32, b: u32) -> __v4i32;
     #[link_name = "llvm.loongarch.lsx.vsrlri.d"]
     fn __lsx_vsrlri_d(a: __v2i64, b: u32) -> __v2i64;
-    #[link_name = "llvm.loongarch.lsx.vbitclr.b"]
-    fn __lsx_vbitclr_b(a: __v16u8, b: __v16u8) -> __v16u8;
-    #[link_name = "llvm.loongarch.lsx.vbitclr.h"]
-    fn __lsx_vbitclr_h(a: __v8u16, b: __v8u16) -> __v8u16;
-    #[link_name = "llvm.loongarch.lsx.vbitclr.w"]
-    fn __lsx_vbitclr_w(a: __v4u32, b: __v4u32) -> __v4u32;
-    #[link_name = "llvm.loongarch.lsx.vbitclr.d"]
-    fn __lsx_vbitclr_d(a: __v2u64, b: __v2u64) -> __v2u64;
     #[link_name = "llvm.loongarch.lsx.vbitclri.b"]
     fn __lsx_vbitclri_b(a: __v16u8, b: u32) -> __v16u8;
     #[link_name = "llvm.loongarch.lsx.vbitclri.h"]
@@ -107,14 +51,6 @@ unsafe extern "unadjusted" {
     fn __lsx_vbitclri_w(a: __v4u32, b: u32) -> __v4u32;
     #[link_name = "llvm.loongarch.lsx.vbitclri.d"]
     fn __lsx_vbitclri_d(a: __v2u64, b: u32) -> __v2u64;
-    #[link_name = "llvm.loongarch.lsx.vbitset.b"]
-    fn __lsx_vbitset_b(a: __v16u8, b: __v16u8) -> __v16u8;
-    #[link_name = "llvm.loongarch.lsx.vbitset.h"]
-    fn __lsx_vbitset_h(a: __v8u16, b: __v8u16) -> __v8u16;
-    #[link_name = "llvm.loongarch.lsx.vbitset.w"]
-    fn __lsx_vbitset_w(a: __v4u32, b: __v4u32) -> __v4u32;
-    #[link_name = "llvm.loongarch.lsx.vbitset.d"]
-    fn __lsx_vbitset_d(a: __v2u64, b: __v2u64) -> __v2u64;
     #[link_name = "llvm.loongarch.lsx.vbitseti.b"]
     fn __lsx_vbitseti_b(a: __v16u8, b: u32) -> __v16u8;
     #[link_name = "llvm.loongarch.lsx.vbitseti.h"]
@@ -123,14 +59,6 @@ unsafe extern "unadjusted" {
     fn __lsx_vbitseti_w(a: __v4u32, b: u32) -> __v4u32;
     #[link_name = "llvm.loongarch.lsx.vbitseti.d"]
     fn __lsx_vbitseti_d(a: __v2u64, b: u32) -> __v2u64;
-    #[link_name = "llvm.loongarch.lsx.vbitrev.b"]
-    fn __lsx_vbitrev_b(a: __v16u8, b: __v16u8) -> __v16u8;
-    #[link_name = "llvm.loongarch.lsx.vbitrev.h"]
-    fn __lsx_vbitrev_h(a: __v8u16, b: __v8u16) -> __v8u16;
-    #[link_name = "llvm.loongarch.lsx.vbitrev.w"]
-    fn __lsx_vbitrev_w(a: __v4u32, b: __v4u32) -> __v4u32;
-    #[link_name = "llvm.loongarch.lsx.vbitrev.d"]
-    fn __lsx_vbitrev_d(a: __v2u64, b: __v2u64) -> __v2u64;
     #[link_name = "llvm.loongarch.lsx.vbitrevi.b"]
     fn __lsx_vbitrevi_b(a: __v16u8, b: u32) -> __v16u8;
     #[link_name = "llvm.loongarch.lsx.vbitrevi.h"]
@@ -139,30 +67,6 @@ unsafe extern "unadjusted" {
     fn __lsx_vbitrevi_w(a: __v4u32, b: u32) -> __v4u32;
     #[link_name = "llvm.loongarch.lsx.vbitrevi.d"]
     fn __lsx_vbitrevi_d(a: __v2u64, b: u32) -> __v2u64;
-    #[link_name = "llvm.loongarch.lsx.vadd.b"]
-    fn __lsx_vadd_b(a: __v16i8, b: __v16i8) -> __v16i8;
-    #[link_name = "llvm.loongarch.lsx.vadd.h"]
-    fn __lsx_vadd_h(a: __v8i16, b: __v8i16) -> __v8i16;
-    #[link_name = "llvm.loongarch.lsx.vadd.w"]
-    fn __lsx_vadd_w(a: __v4i32, b: __v4i32) -> __v4i32;
-    #[link_name = "llvm.loongarch.lsx.vadd.d"]
-    fn __lsx_vadd_d(a: __v2i64, b: __v2i64) -> __v2i64;
-    #[link_name = "llvm.loongarch.lsx.vaddi.bu"]
-    fn __lsx_vaddi_bu(a: __v16i8, b: u32) -> __v16i8;
-    #[link_name = "llvm.loongarch.lsx.vaddi.hu"]
-    fn __lsx_vaddi_hu(a: __v8i16, b: u32) -> __v8i16;
-    #[link_name = "llvm.loongarch.lsx.vaddi.wu"]
-    fn __lsx_vaddi_wu(a: __v4i32, b: u32) -> __v4i32;
-    #[link_name = "llvm.loongarch.lsx.vaddi.du"]
-    fn __lsx_vaddi_du(a: __v2i64, b: u32) -> __v2i64;
-    #[link_name = "llvm.loongarch.lsx.vsub.b"]
-    fn __lsx_vsub_b(a: __v16i8, b: __v16i8) -> __v16i8;
-    #[link_name = "llvm.loongarch.lsx.vsub.h"]
-    fn __lsx_vsub_h(a: __v8i16, b: __v8i16) -> __v8i16;
-    #[link_name = "llvm.loongarch.lsx.vsub.w"]
-    fn __lsx_vsub_w(a: __v4i32, b: __v4i32) -> __v4i32;
-    #[link_name = "llvm.loongarch.lsx.vsub.d"]
-    fn __lsx_vsub_d(a: __v2i64, b: __v2i64) -> __v2i64;
     #[link_name = "llvm.loongarch.lsx.vsubi.bu"]
     fn __lsx_vsubi_bu(a: __v16i8, b: u32) -> __v16i8;
     #[link_name = "llvm.loongarch.lsx.vsubi.hu"]
@@ -171,150 +75,6 @@ unsafe extern "unadjusted" {
     fn __lsx_vsubi_wu(a: __v4i32, b: u32) -> __v4i32;
     #[link_name = "llvm.loongarch.lsx.vsubi.du"]
     fn __lsx_vsubi_du(a: __v2i64, b: u32) -> __v2i64;
-    #[link_name = "llvm.loongarch.lsx.vmax.b"]
-    fn __lsx_vmax_b(a: __v16i8, b: __v16i8) -> __v16i8;
-    #[link_name = "llvm.loongarch.lsx.vmax.h"]
-    fn __lsx_vmax_h(a: __v8i16, b: __v8i16) -> __v8i16;
-    #[link_name = "llvm.loongarch.lsx.vmax.w"]
-    fn __lsx_vmax_w(a: __v4i32, b: __v4i32) -> __v4i32;
-    #[link_name = "llvm.loongarch.lsx.vmax.d"]
-    fn __lsx_vmax_d(a: __v2i64, b: __v2i64) -> __v2i64;
-    #[link_name = "llvm.loongarch.lsx.vmaxi.b"]
-    fn __lsx_vmaxi_b(a: __v16i8, b: i32) -> __v16i8;
-    #[link_name = "llvm.loongarch.lsx.vmaxi.h"]
-    fn __lsx_vmaxi_h(a: __v8i16, b: i32) -> __v8i16;
-    #[link_name = "llvm.loongarch.lsx.vmaxi.w"]
-    fn __lsx_vmaxi_w(a: __v4i32, b: i32) -> __v4i32;
-    #[link_name = "llvm.loongarch.lsx.vmaxi.d"]
-    fn __lsx_vmaxi_d(a: __v2i64, b: i32) -> __v2i64;
-    #[link_name = "llvm.loongarch.lsx.vmax.bu"]
-    fn __lsx_vmax_bu(a: __v16u8, b: __v16u8) -> __v16u8;
-    #[link_name = "llvm.loongarch.lsx.vmax.hu"]
-    fn __lsx_vmax_hu(a: __v8u16, b: __v8u16) -> __v8u16;
-    #[link_name = "llvm.loongarch.lsx.vmax.wu"]
-    fn __lsx_vmax_wu(a: __v4u32, b: __v4u32) -> __v4u32;
-    #[link_name = "llvm.loongarch.lsx.vmax.du"]
-    fn __lsx_vmax_du(a: __v2u64, b: __v2u64) -> __v2u64;
-    #[link_name = "llvm.loongarch.lsx.vmaxi.bu"]
-    fn __lsx_vmaxi_bu(a: __v16u8, b: u32) -> __v16u8;
-    #[link_name = "llvm.loongarch.lsx.vmaxi.hu"]
-    fn __lsx_vmaxi_hu(a: __v8u16, b: u32) -> __v8u16;
-    #[link_name = "llvm.loongarch.lsx.vmaxi.wu"]
-    fn __lsx_vmaxi_wu(a: __v4u32, b: u32) -> __v4u32;
-    #[link_name = "llvm.loongarch.lsx.vmaxi.du"]
-    fn __lsx_vmaxi_du(a: __v2u64, b: u32) -> __v2u64;
-    #[link_name = "llvm.loongarch.lsx.vmin.b"]
-    fn __lsx_vmin_b(a: __v16i8, b: __v16i8) -> __v16i8;
-    #[link_name = "llvm.loongarch.lsx.vmin.h"]
-    fn __lsx_vmin_h(a: __v8i16, b: __v8i16) -> __v8i16;
-    #[link_name = "llvm.loongarch.lsx.vmin.w"]
-    fn __lsx_vmin_w(a: __v4i32, b: __v4i32) -> __v4i32;
-    #[link_name = "llvm.loongarch.lsx.vmin.d"]
-    fn __lsx_vmin_d(a: __v2i64, b: __v2i64) -> __v2i64;
-    #[link_name = "llvm.loongarch.lsx.vmini.b"]
-    fn __lsx_vmini_b(a: __v16i8, b: i32) -> __v16i8;
-    #[link_name = "llvm.loongarch.lsx.vmini.h"]
-    fn __lsx_vmini_h(a: __v8i16, b: i32) -> __v8i16;
-    #[link_name = "llvm.loongarch.lsx.vmini.w"]
-    fn __lsx_vmini_w(a: __v4i32, b: i32) -> __v4i32;
-    #[link_name = "llvm.loongarch.lsx.vmini.d"]
-    fn __lsx_vmini_d(a: __v2i64, b: i32) -> __v2i64;
-    #[link_name = "llvm.loongarch.lsx.vmin.bu"]
-    fn __lsx_vmin_bu(a: __v16u8, b: __v16u8) -> __v16u8;
-    #[link_name = "llvm.loongarch.lsx.vmin.hu"]
-    fn __lsx_vmin_hu(a: __v8u16, b: __v8u16) -> __v8u16;
-    #[link_name = "llvm.loongarch.lsx.vmin.wu"]
-    fn __lsx_vmin_wu(a: __v4u32, b: __v4u32) -> __v4u32;
-    #[link_name = "llvm.loongarch.lsx.vmin.du"]
-    fn __lsx_vmin_du(a: __v2u64, b: __v2u64) -> __v2u64;
-    #[link_name = "llvm.loongarch.lsx.vmini.bu"]
-    fn __lsx_vmini_bu(a: __v16u8, b: u32) -> __v16u8;
-    #[link_name = "llvm.loongarch.lsx.vmini.hu"]
-    fn __lsx_vmini_hu(a: __v8u16, b: u32) -> __v8u16;
-    #[link_name = "llvm.loongarch.lsx.vmini.wu"]
-    fn __lsx_vmini_wu(a: __v4u32, b: u32) -> __v4u32;
-    #[link_name = "llvm.loongarch.lsx.vmini.du"]
-    fn __lsx_vmini_du(a: __v2u64, b: u32) -> __v2u64;
-    #[link_name = "llvm.loongarch.lsx.vseq.b"]
-    fn __lsx_vseq_b(a: __v16i8, b: __v16i8) -> __v16i8;
-    #[link_name = "llvm.loongarch.lsx.vseq.h"]
-    fn __lsx_vseq_h(a: __v8i16, b: __v8i16) -> __v8i16;
-    #[link_name = "llvm.loongarch.lsx.vseq.w"]
-    fn __lsx_vseq_w(a: __v4i32, b: __v4i32) -> __v4i32;
-    #[link_name = "llvm.loongarch.lsx.vseq.d"]
-    fn __lsx_vseq_d(a: __v2i64, b: __v2i64) -> __v2i64;
-    #[link_name = "llvm.loongarch.lsx.vseqi.b"]
-    fn __lsx_vseqi_b(a: __v16i8, b: i32) -> __v16i8;
-    #[link_name = "llvm.loongarch.lsx.vseqi.h"]
-    fn __lsx_vseqi_h(a: __v8i16, b: i32) -> __v8i16;
-    #[link_name = "llvm.loongarch.lsx.vseqi.w"]
-    fn __lsx_vseqi_w(a: __v4i32, b: i32) -> __v4i32;
-    #[link_name = "llvm.loongarch.lsx.vseqi.d"]
-    fn __lsx_vseqi_d(a: __v2i64, b: i32) -> __v2i64;
-    #[link_name = "llvm.loongarch.lsx.vslti.b"]
-    fn __lsx_vslti_b(a: __v16i8, b: i32) -> __v16i8;
-    #[link_name = "llvm.loongarch.lsx.vslt.b"]
-    fn __lsx_vslt_b(a: __v16i8, b: __v16i8) -> __v16i8;
-    #[link_name = "llvm.loongarch.lsx.vslt.h"]
-    fn __lsx_vslt_h(a: __v8i16, b: __v8i16) -> __v8i16;
-    #[link_name = "llvm.loongarch.lsx.vslt.w"]
-    fn __lsx_vslt_w(a: __v4i32, b: __v4i32) -> __v4i32;
-    #[link_name = "llvm.loongarch.lsx.vslt.d"]
-    fn __lsx_vslt_d(a: __v2i64, b: __v2i64) -> __v2i64;
-    #[link_name = "llvm.loongarch.lsx.vslti.h"]
-    fn __lsx_vslti_h(a: __v8i16, b: i32) -> __v8i16;
-    #[link_name = "llvm.loongarch.lsx.vslti.w"]
-    fn __lsx_vslti_w(a: __v4i32, b: i32) -> __v4i32;
-    #[link_name = "llvm.loongarch.lsx.vslti.d"]
-    fn __lsx_vslti_d(a: __v2i64, b: i32) -> __v2i64;
-    #[link_name = "llvm.loongarch.lsx.vslt.bu"]
-    fn __lsx_vslt_bu(a: __v16u8, b: __v16u8) -> __v16i8;
-    #[link_name = "llvm.loongarch.lsx.vslt.hu"]
-    fn __lsx_vslt_hu(a: __v8u16, b: __v8u16) -> __v8i16;
-    #[link_name = "llvm.loongarch.lsx.vslt.wu"]
-    fn __lsx_vslt_wu(a: __v4u32, b: __v4u32) -> __v4i32;
-    #[link_name = "llvm.loongarch.lsx.vslt.du"]
-    fn __lsx_vslt_du(a: __v2u64, b: __v2u64) -> __v2i64;
-    #[link_name = "llvm.loongarch.lsx.vslti.bu"]
-    fn __lsx_vslti_bu(a: __v16u8, b: u32) -> __v16i8;
-    #[link_name = "llvm.loongarch.lsx.vslti.hu"]
-    fn __lsx_vslti_hu(a: __v8u16, b: u32) -> __v8i16;
-    #[link_name = "llvm.loongarch.lsx.vslti.wu"]
-    fn __lsx_vslti_wu(a: __v4u32, b: u32) -> __v4i32;
-    #[link_name = "llvm.loongarch.lsx.vslti.du"]
-    fn __lsx_vslti_du(a: __v2u64, b: u32) -> __v2i64;
-    #[link_name = "llvm.loongarch.lsx.vsle.b"]
-    fn __lsx_vsle_b(a: __v16i8, b: __v16i8) -> __v16i8;
-    #[link_name = "llvm.loongarch.lsx.vsle.h"]
-    fn __lsx_vsle_h(a: __v8i16, b: __v8i16) -> __v8i16;
-    #[link_name = "llvm.loongarch.lsx.vsle.w"]
-    fn __lsx_vsle_w(a: __v4i32, b: __v4i32) -> __v4i32;
-    #[link_name = "llvm.loongarch.lsx.vsle.d"]
-    fn __lsx_vsle_d(a: __v2i64, b: __v2i64) -> __v2i64;
-    #[link_name = "llvm.loongarch.lsx.vslei.b"]
-    fn __lsx_vslei_b(a: __v16i8, b: i32) -> __v16i8;
-    #[link_name = "llvm.loongarch.lsx.vslei.h"]
-    fn __lsx_vslei_h(a: __v8i16, b: i32) -> __v8i16;
-    #[link_name = "llvm.loongarch.lsx.vslei.w"]
-    fn __lsx_vslei_w(a: __v4i32, b: i32) -> __v4i32;
-    #[link_name = "llvm.loongarch.lsx.vslei.d"]
-    fn __lsx_vslei_d(a: __v2i64, b: i32) -> __v2i64;
-    #[link_name = "llvm.loongarch.lsx.vsle.bu"]
-    fn __lsx_vsle_bu(a: __v16u8, b: __v16u8) -> __v16i8;
-    #[link_name = "llvm.loongarch.lsx.vsle.hu"]
-    fn __lsx_vsle_hu(a: __v8u16, b: __v8u16) -> __v8i16;
-    #[link_name = "llvm.loongarch.lsx.vsle.wu"]
-    fn __lsx_vsle_wu(a: __v4u32, b: __v4u32) -> __v4i32;
-    #[link_name = "llvm.loongarch.lsx.vsle.du"]
-    fn __lsx_vsle_du(a: __v2u64, b: __v2u64) -> __v2i64;
-    #[link_name = "llvm.loongarch.lsx.vslei.bu"]
-    fn __lsx_vslei_bu(a: __v16u8, b: u32) -> __v16i8;
-    #[link_name = "llvm.loongarch.lsx.vslei.hu"]
-    fn __lsx_vslei_hu(a: __v8u16, b: u32) -> __v8i16;
-    #[link_name = "llvm.loongarch.lsx.vslei.wu"]
-    fn __lsx_vslei_wu(a: __v4u32, b: u32) -> __v4i32;
-    #[link_name = "llvm.loongarch.lsx.vslei.du"]
-    fn __lsx_vslei_du(a: __v2u64, b: u32) -> __v2i64;
     #[link_name = "llvm.loongarch.lsx.vsat.b"]
     fn __lsx_vsat_b(a: __v16i8, b: u32) -> __v16i8;
     #[link_name = "llvm.loongarch.lsx.vsat.h"]
@@ -331,30 +91,6 @@ unsafe extern "unadjusted" {
     fn __lsx_vsat_wu(a: __v4u32, b: u32) -> __v4u32;
     #[link_name = "llvm.loongarch.lsx.vsat.du"]
     fn __lsx_vsat_du(a: __v2u64, b: u32) -> __v2u64;
-    #[link_name = "llvm.loongarch.lsx.vadda.b"]
-    fn __lsx_vadda_b(a: __v16i8, b: __v16i8) -> __v16i8;
-    #[link_name = "llvm.loongarch.lsx.vadda.h"]
-    fn __lsx_vadda_h(a: __v8i16, b: __v8i16) -> __v8i16;
-    #[link_name = "llvm.loongarch.lsx.vadda.w"]
-    fn __lsx_vadda_w(a: __v4i32, b: __v4i32) -> __v4i32;
-    #[link_name = "llvm.loongarch.lsx.vadda.d"]
-    fn __lsx_vadda_d(a: __v2i64, b: __v2i64) -> __v2i64;
-    #[link_name = "llvm.loongarch.lsx.vsadd.b"]
-    fn __lsx_vsadd_b(a: __v16i8, b: __v16i8) -> __v16i8;
-    #[link_name = "llvm.loongarch.lsx.vsadd.h"]
-    fn __lsx_vsadd_h(a: __v8i16, b: __v8i16) -> __v8i16;
-    #[link_name = "llvm.loongarch.lsx.vsadd.w"]
-    fn __lsx_vsadd_w(a: __v4i32, b: __v4i32) -> __v4i32;
-    #[link_name = "llvm.loongarch.lsx.vsadd.d"]
-    fn __lsx_vsadd_d(a: __v2i64, b: __v2i64) -> __v2i64;
-    #[link_name = "llvm.loongarch.lsx.vsadd.bu"]
-    fn __lsx_vsadd_bu(a: __v16u8, b: __v16u8) -> __v16u8;
-    #[link_name = "llvm.loongarch.lsx.vsadd.hu"]
-    fn __lsx_vsadd_hu(a: __v8u16, b: __v8u16) -> __v8u16;
-    #[link_name = "llvm.loongarch.lsx.vsadd.wu"]
-    fn __lsx_vsadd_wu(a: __v4u32, b: __v4u32) -> __v4u32;
-    #[link_name = "llvm.loongarch.lsx.vsadd.du"]
-    fn __lsx_vsadd_du(a: __v2u64, b: __v2u64) -> __v2u64;
     #[link_name = "llvm.loongarch.lsx.vavg.b"]
     fn __lsx_vavg_b(a: __v16i8, b: __v16i8) -> __v16i8;
     #[link_name = "llvm.loongarch.lsx.vavg.h"]
@@ -387,78 +123,6 @@ unsafe extern "unadjusted" {
     fn __lsx_vavgr_wu(a: __v4u32, b: __v4u32) -> __v4u32;
     #[link_name = "llvm.loongarch.lsx.vavgr.du"]
     fn __lsx_vavgr_du(a: __v2u64, b: __v2u64) -> __v2u64;
-    #[link_name = "llvm.loongarch.lsx.vssub.b"]
-    fn __lsx_vssub_b(a: __v16i8, b: __v16i8) -> __v16i8;
-    #[link_name = "llvm.loongarch.lsx.vssub.h"]
-    fn __lsx_vssub_h(a: __v8i16, b: __v8i16) -> __v8i16;
-    #[link_name = "llvm.loongarch.lsx.vssub.w"]
-    fn __lsx_vssub_w(a: __v4i32, b: __v4i32) -> __v4i32;
-    #[link_name = "llvm.loongarch.lsx.vssub.d"]
-    fn __lsx_vssub_d(a: __v2i64, b: __v2i64) -> __v2i64;
-    #[link_name = "llvm.loongarch.lsx.vssub.bu"]
-    fn __lsx_vssub_bu(a: __v16u8, b: __v16u8) -> __v16u8;
-    #[link_name = "llvm.loongarch.lsx.vssub.hu"]
-    fn __lsx_vssub_hu(a: __v8u16, b: __v8u16) -> __v8u16;
-    #[link_name = "llvm.loongarch.lsx.vssub.wu"]
-    fn __lsx_vssub_wu(a: __v4u32, b: __v4u32) -> __v4u32;
-    #[link_name = "llvm.loongarch.lsx.vssub.du"]
-    fn __lsx_vssub_du(a: __v2u64, b: __v2u64) -> __v2u64;
-    #[link_name = "llvm.loongarch.lsx.vabsd.b"]
-    fn __lsx_vabsd_b(a: __v16i8, b: __v16i8) -> __v16i8;
-    #[link_name = "llvm.loongarch.lsx.vabsd.h"]
-    fn __lsx_vabsd_h(a: __v8i16, b: __v8i16) -> __v8i16;
-    #[link_name = "llvm.loongarch.lsx.vabsd.w"]
-    fn __lsx_vabsd_w(a: __v4i32, b: __v4i32) -> __v4i32;
-    #[link_name = "llvm.loongarch.lsx.vabsd.d"]
-    fn __lsx_vabsd_d(a: __v2i64, b: __v2i64) -> __v2i64;
-    #[link_name = "llvm.loongarch.lsx.vabsd.bu"]
-    fn __lsx_vabsd_bu(a: __v16u8, b: __v16u8) -> __v16u8;
-    #[link_name = "llvm.loongarch.lsx.vabsd.hu"]
-    fn __lsx_vabsd_hu(a: __v8u16, b: __v8u16) -> __v8u16;
-    #[link_name = "llvm.loongarch.lsx.vabsd.wu"]
-    fn __lsx_vabsd_wu(a: __v4u32, b: __v4u32) -> __v4u32;
-    #[link_name = "llvm.loongarch.lsx.vabsd.du"]
-    fn __lsx_vabsd_du(a: __v2u64, b: __v2u64) -> __v2u64;
-    #[link_name = "llvm.loongarch.lsx.vmul.b"]
-    fn __lsx_vmul_b(a: __v16i8, b: __v16i8) -> __v16i8;
-    #[link_name = "llvm.loongarch.lsx.vmul.h"]
-    fn __lsx_vmul_h(a: __v8i16, b: __v8i16) -> __v8i16;
-    #[link_name = "llvm.loongarch.lsx.vmul.w"]
-    fn __lsx_vmul_w(a: __v4i32, b: __v4i32) -> __v4i32;
-    #[link_name = "llvm.loongarch.lsx.vmul.d"]
-    fn __lsx_vmul_d(a: __v2i64, b: __v2i64) -> __v2i64;
-    #[link_name = "llvm.loongarch.lsx.vmadd.b"]
-    fn __lsx_vmadd_b(a: __v16i8, b: __v16i8, c: __v16i8) -> __v16i8;
-    #[link_name = "llvm.loongarch.lsx.vmadd.h"]
-    fn __lsx_vmadd_h(a: __v8i16, b: __v8i16, c: __v8i16) -> __v8i16;
-    #[link_name = "llvm.loongarch.lsx.vmadd.w"]
-    fn __lsx_vmadd_w(a: __v4i32, b: __v4i32, c: __v4i32) -> __v4i32;
-    #[link_name = "llvm.loongarch.lsx.vmadd.d"]
-    fn __lsx_vmadd_d(a: __v2i64, b: __v2i64, c: __v2i64) -> __v2i64;
-    #[link_name = "llvm.loongarch.lsx.vmsub.b"]
-    fn __lsx_vmsub_b(a: __v16i8, b: __v16i8, c: __v16i8) -> __v16i8;
-    #[link_name = "llvm.loongarch.lsx.vmsub.h"]
-    fn __lsx_vmsub_h(a: __v8i16, b: __v8i16, c: __v8i16) -> __v8i16;
-    #[link_name = "llvm.loongarch.lsx.vmsub.w"]
-    fn __lsx_vmsub_w(a: __v4i32, b: __v4i32, c: __v4i32) -> __v4i32;
-    #[link_name = "llvm.loongarch.lsx.vmsub.d"]
-    fn __lsx_vmsub_d(a: __v2i64, b: __v2i64, c: __v2i64) -> __v2i64;
-    #[link_name = "llvm.loongarch.lsx.vdiv.b"]
-    fn __lsx_vdiv_b(a: __v16i8, b: __v16i8) -> __v16i8;
-    #[link_name = "llvm.loongarch.lsx.vdiv.h"]
-    fn __lsx_vdiv_h(a: __v8i16, b: __v8i16) -> __v8i16;
-    #[link_name = "llvm.loongarch.lsx.vdiv.w"]
-    fn __lsx_vdiv_w(a: __v4i32, b: __v4i32) -> __v4i32;
-    #[link_name = "llvm.loongarch.lsx.vdiv.d"]
-    fn __lsx_vdiv_d(a: __v2i64, b: __v2i64) -> __v2i64;
-    #[link_name = "llvm.loongarch.lsx.vdiv.bu"]
-    fn __lsx_vdiv_bu(a: __v16u8, b: __v16u8) -> __v16u8;
-    #[link_name = "llvm.loongarch.lsx.vdiv.hu"]
-    fn __lsx_vdiv_hu(a: __v8u16, b: __v8u16) -> __v8u16;
-    #[link_name = "llvm.loongarch.lsx.vdiv.wu"]
-    fn __lsx_vdiv_wu(a: __v4u32, b: __v4u32) -> __v4u32;
-    #[link_name = "llvm.loongarch.lsx.vdiv.du"]
-    fn __lsx_vdiv_du(a: __v2u64, b: __v2u64) -> __v2u64;
     #[link_name = "llvm.loongarch.lsx.vhaddw.h.b"]
     fn __lsx_vhaddw_h_b(a: __v16i8, b: __v16i8) -> __v8i16;
     #[link_name = "llvm.loongarch.lsx.vhaddw.w.h"]
@@ -483,22 +147,6 @@ unsafe extern "unadjusted" {
     fn __lsx_vhsubw_wu_hu(a: __v8u16, b: __v8u16) -> __v4i32;
     #[link_name = "llvm.loongarch.lsx.vhsubw.du.wu"]
     fn __lsx_vhsubw_du_wu(a: __v4u32, b: __v4u32) -> __v2i64;
-    #[link_name = "llvm.loongarch.lsx.vmod.b"]
-    fn __lsx_vmod_b(a: __v16i8, b: __v16i8) -> __v16i8;
-    #[link_name = "llvm.loongarch.lsx.vmod.h"]
-    fn __lsx_vmod_h(a: __v8i16, b: __v8i16) -> __v8i16;
-    #[link_name = "llvm.loongarch.lsx.vmod.w"]
-    fn __lsx_vmod_w(a: __v4i32, b: __v4i32) -> __v4i32;
-    #[link_name = "llvm.loongarch.lsx.vmod.d"]
-    fn __lsx_vmod_d(a: __v2i64, b: __v2i64) -> __v2i64;
-    #[link_name = "llvm.loongarch.lsx.vmod.bu"]
-    fn __lsx_vmod_bu(a: __v16u8, b: __v16u8) -> __v16u8;
-    #[link_name = "llvm.loongarch.lsx.vmod.hu"]
-    fn __lsx_vmod_hu(a: __v8u16, b: __v8u16) -> __v8u16;
-    #[link_name = "llvm.loongarch.lsx.vmod.wu"]
-    fn __lsx_vmod_wu(a: __v4u32, b: __v4u32) -> __v4u32;
-    #[link_name = "llvm.loongarch.lsx.vmod.du"]
-    fn __lsx_vmod_du(a: __v2u64, b: __v2u64) -> __v2u64;
     #[link_name = "llvm.loongarch.lsx.vreplve.b"]
     fn __lsx_vreplve_b(a: __v16i8, b: i32) -> __v16i8;
     #[link_name = "llvm.loongarch.lsx.vreplve.h"]
@@ -569,20 +217,12 @@ unsafe extern "unadjusted" {
     fn __lsx_vshuf_w(a: __v4i32, b: __v4i32, c: __v4i32) -> __v4i32;
     #[link_name = "llvm.loongarch.lsx.vshuf.d"]
     fn __lsx_vshuf_d(a: __v2i64, b: __v2i64, c: __v2i64) -> __v2i64;
-    #[link_name = "llvm.loongarch.lsx.vand.v"]
-    fn __lsx_vand_v(a: __v16u8, b: __v16u8) -> __v16u8;
     #[link_name = "llvm.loongarch.lsx.vandi.b"]
     fn __lsx_vandi_b(a: __v16u8, b: u32) -> __v16u8;
-    #[link_name = "llvm.loongarch.lsx.vor.v"]
-    fn __lsx_vor_v(a: __v16u8, b: __v16u8) -> __v16u8;
     #[link_name = "llvm.loongarch.lsx.vori.b"]
     fn __lsx_vori_b(a: __v16u8, b: u32) -> __v16u8;
-    #[link_name = "llvm.loongarch.lsx.vnor.v"]
-    fn __lsx_vnor_v(a: __v16u8, b: __v16u8) -> __v16u8;
     #[link_name = "llvm.loongarch.lsx.vnori.b"]
     fn __lsx_vnori_b(a: __v16u8, b: u32) -> __v16u8;
-    #[link_name = "llvm.loongarch.lsx.vxor.v"]
-    fn __lsx_vxor_v(a: __v16u8, b: __v16u8) -> __v16u8;
     #[link_name = "llvm.loongarch.lsx.vxori.b"]
     fn __lsx_vxori_b(a: __v16u8, b: u32) -> __v16u8;
     #[link_name = "llvm.loongarch.lsx.vbitsel.v"]
@@ -595,22 +235,6 @@ unsafe extern "unadjusted" {
     fn __lsx_vshuf4i_h(a: __v8i16, b: u32) -> __v8i16;
     #[link_name = "llvm.loongarch.lsx.vshuf4i.w"]
     fn __lsx_vshuf4i_w(a: __v4i32, b: u32) -> __v4i32;
-    #[link_name = "llvm.loongarch.lsx.vreplgr2vr.b"]
-    fn __lsx_vreplgr2vr_b(a: i32) -> __v16i8;
-    #[link_name = "llvm.loongarch.lsx.vreplgr2vr.h"]
-    fn __lsx_vreplgr2vr_h(a: i32) -> __v8i16;
-    #[link_name = "llvm.loongarch.lsx.vreplgr2vr.w"]
-    fn __lsx_vreplgr2vr_w(a: i32) -> __v4i32;
-    #[link_name = "llvm.loongarch.lsx.vreplgr2vr.d"]
-    fn __lsx_vreplgr2vr_d(a: i64) -> __v2i64;
-    #[link_name = "llvm.loongarch.lsx.vpcnt.b"]
-    fn __lsx_vpcnt_b(a: __v16i8) -> __v16i8;
-    #[link_name = "llvm.loongarch.lsx.vpcnt.h"]
-    fn __lsx_vpcnt_h(a: __v8i16) -> __v8i16;
-    #[link_name = "llvm.loongarch.lsx.vpcnt.w"]
-    fn __lsx_vpcnt_w(a: __v4i32) -> __v4i32;
-    #[link_name = "llvm.loongarch.lsx.vpcnt.d"]
-    fn __lsx_vpcnt_d(a: __v2i64) -> __v2i64;
     #[link_name = "llvm.loongarch.lsx.vclo.b"]
     fn __lsx_vclo_b(a: __v16i8) -> __v16i8;
     #[link_name = "llvm.loongarch.lsx.vclo.h"]
@@ -619,54 +243,6 @@ unsafe extern "unadjusted" {
     fn __lsx_vclo_w(a: __v4i32) -> __v4i32;
     #[link_name = "llvm.loongarch.lsx.vclo.d"]
     fn __lsx_vclo_d(a: __v2i64) -> __v2i64;
-    #[link_name = "llvm.loongarch.lsx.vclz.b"]
-    fn __lsx_vclz_b(a: __v16i8) -> __v16i8;
-    #[link_name = "llvm.loongarch.lsx.vclz.h"]
-    fn __lsx_vclz_h(a: __v8i16) -> __v8i16;
-    #[link_name = "llvm.loongarch.lsx.vclz.w"]
-    fn __lsx_vclz_w(a: __v4i32) -> __v4i32;
-    #[link_name = "llvm.loongarch.lsx.vclz.d"]
-    fn __lsx_vclz_d(a: __v2i64) -> __v2i64;
-    #[link_name = "llvm.loongarch.lsx.vpickve2gr.b"]
-    fn __lsx_vpickve2gr_b(a: __v16i8, b: u32) -> i32;
-    #[link_name = "llvm.loongarch.lsx.vpickve2gr.h"]
-    fn __lsx_vpickve2gr_h(a: __v8i16, b: u32) -> i32;
-    #[link_name = "llvm.loongarch.lsx.vpickve2gr.w"]
-    fn __lsx_vpickve2gr_w(a: __v4i32, b: u32) -> i32;
-    #[link_name = "llvm.loongarch.lsx.vpickve2gr.d"]
-    fn __lsx_vpickve2gr_d(a: __v2i64, b: u32) -> i64;
-    #[link_name = "llvm.loongarch.lsx.vpickve2gr.bu"]
-    fn __lsx_vpickve2gr_bu(a: __v16i8, b: u32) -> u32;
-    #[link_name = "llvm.loongarch.lsx.vpickve2gr.hu"]
-    fn __lsx_vpickve2gr_hu(a: __v8i16, b: u32) -> u32;
-    #[link_name = "llvm.loongarch.lsx.vpickve2gr.wu"]
-    fn __lsx_vpickve2gr_wu(a: __v4i32, b: u32) -> u32;
-    #[link_name = "llvm.loongarch.lsx.vpickve2gr.du"]
-    fn __lsx_vpickve2gr_du(a: __v2i64, b: u32) -> u64;
-    #[link_name = "llvm.loongarch.lsx.vinsgr2vr.b"]
-    fn __lsx_vinsgr2vr_b(a: __v16i8, b: i32, c: u32) -> __v16i8;
-    #[link_name = "llvm.loongarch.lsx.vinsgr2vr.h"]
-    fn __lsx_vinsgr2vr_h(a: __v8i16, b: i32, c: u32) -> __v8i16;
-    #[link_name = "llvm.loongarch.lsx.vinsgr2vr.w"]
-    fn __lsx_vinsgr2vr_w(a: __v4i32, b: i32, c: u32) -> __v4i32;
-    #[link_name = "llvm.loongarch.lsx.vinsgr2vr.d"]
-    fn __lsx_vinsgr2vr_d(a: __v2i64, b: i64, c: u32) -> __v2i64;
-    #[link_name = "llvm.loongarch.lsx.vfadd.s"]
-    fn __lsx_vfadd_s(a: __v4f32, b: __v4f32) -> __v4f32;
-    #[link_name = "llvm.loongarch.lsx.vfadd.d"]
-    fn __lsx_vfadd_d(a: __v2f64, b: __v2f64) -> __v2f64;
-    #[link_name = "llvm.loongarch.lsx.vfsub.s"]
-    fn __lsx_vfsub_s(a: __v4f32, b: __v4f32) -> __v4f32;
-    #[link_name = "llvm.loongarch.lsx.vfsub.d"]
-    fn __lsx_vfsub_d(a: __v2f64, b: __v2f64) -> __v2f64;
-    #[link_name = "llvm.loongarch.lsx.vfmul.s"]
-    fn __lsx_vfmul_s(a: __v4f32, b: __v4f32) -> __v4f32;
-    #[link_name = "llvm.loongarch.lsx.vfmul.d"]
-    fn __lsx_vfmul_d(a: __v2f64, b: __v2f64) -> __v2f64;
-    #[link_name = "llvm.loongarch.lsx.vfdiv.s"]
-    fn __lsx_vfdiv_s(a: __v4f32, b: __v4f32) -> __v4f32;
-    #[link_name = "llvm.loongarch.lsx.vfdiv.d"]
-    fn __lsx_vfdiv_d(a: __v2f64, b: __v2f64) -> __v2f64;
     #[link_name = "llvm.loongarch.lsx.vfcvt.h.s"]
     fn __lsx_vfcvt_h_s(a: __v4f32, b: __v4f32) -> __v8i16;
     #[link_name = "llvm.loongarch.lsx.vfcvt.s.d"]
@@ -691,10 +267,6 @@ unsafe extern "unadjusted" {
     fn __lsx_vfclass_s(a: __v4f32) -> __v4i32;
     #[link_name = "llvm.loongarch.lsx.vfclass.d"]
     fn __lsx_vfclass_d(a: __v2f64) -> __v2i64;
-    #[link_name = "llvm.loongarch.lsx.vfsqrt.s"]
-    fn __lsx_vfsqrt_s(a: __v4f32) -> __v4f32;
-    #[link_name = "llvm.loongarch.lsx.vfsqrt.d"]
-    fn __lsx_vfsqrt_d(a: __v2f64) -> __v2f64;
     #[link_name = "llvm.loongarch.lsx.vfrecip.s"]
     fn __lsx_vfrecip_s(a: __v4f32) -> __v4f32;
     #[link_name = "llvm.loongarch.lsx.vfrecip.d"]
@@ -751,16 +323,6 @@ unsafe extern "unadjusted" {
     fn __lsx_vffint_s_wu(a: __v4u32) -> __v4f32;
     #[link_name = "llvm.loongarch.lsx.vffint.d.lu"]
     fn __lsx_vffint_d_lu(a: __v2u64) -> __v2f64;
-    #[link_name = "llvm.loongarch.lsx.vandn.v"]
-    fn __lsx_vandn_v(a: __v16u8, b: __v16u8) -> __v16u8;
-    #[link_name = "llvm.loongarch.lsx.vneg.b"]
-    fn __lsx_vneg_b(a: __v16i8) -> __v16i8;
-    #[link_name = "llvm.loongarch.lsx.vneg.h"]
-    fn __lsx_vneg_h(a: __v8i16) -> __v8i16;
-    #[link_name = "llvm.loongarch.lsx.vneg.w"]
-    fn __lsx_vneg_w(a: __v4i32) -> __v4i32;
-    #[link_name = "llvm.loongarch.lsx.vneg.d"]
-    fn __lsx_vneg_d(a: __v2i64) -> __v2i64;
     #[link_name = "llvm.loongarch.lsx.vmuh.b"]
     fn __lsx_vmuh_b(a: __v16i8, b: __v16i8) -> __v16i8;
     #[link_name = "llvm.loongarch.lsx.vmuh.h"]
@@ -887,22 +449,6 @@ unsafe extern "unadjusted" {
     fn __lsx_vsigncov_w(a: __v4i32, b: __v4i32) -> __v4i32;
     #[link_name = "llvm.loongarch.lsx.vsigncov.d"]
     fn __lsx_vsigncov_d(a: __v2i64, b: __v2i64) -> __v2i64;
-    #[link_name = "llvm.loongarch.lsx.vfmadd.s"]
-    fn __lsx_vfmadd_s(a: __v4f32, b: __v4f32, c: __v4f32) -> __v4f32;
-    #[link_name = "llvm.loongarch.lsx.vfmadd.d"]
-    fn __lsx_vfmadd_d(a: __v2f64, b: __v2f64, c: __v2f64) -> __v2f64;
-    #[link_name = "llvm.loongarch.lsx.vfmsub.s"]
-    fn __lsx_vfmsub_s(a: __v4f32, b: __v4f32, c: __v4f32) -> __v4f32;
-    #[link_name = "llvm.loongarch.lsx.vfmsub.d"]
-    fn __lsx_vfmsub_d(a: __v2f64, b: __v2f64, c: __v2f64) -> __v2f64;
-    #[link_name = "llvm.loongarch.lsx.vfnmadd.s"]
-    fn __lsx_vfnmadd_s(a: __v4f32, b: __v4f32, c: __v4f32) -> __v4f32;
-    #[link_name = "llvm.loongarch.lsx.vfnmadd.d"]
-    fn __lsx_vfnmadd_d(a: __v2f64, b: __v2f64, c: __v2f64) -> __v2f64;
-    #[link_name = "llvm.loongarch.lsx.vfnmsub.s"]
-    fn __lsx_vfnmsub_s(a: __v4f32, b: __v4f32, c: __v4f32) -> __v4f32;
-    #[link_name = "llvm.loongarch.lsx.vfnmsub.d"]
-    fn __lsx_vfnmsub_d(a: __v2f64, b: __v2f64, c: __v2f64) -> __v2f64;
     #[link_name = "llvm.loongarch.lsx.vftintrne.w.s"]
     fn __lsx_vftintrne_w_s(a: __v4f32) -> __v4i32;
     #[link_name = "llvm.loongarch.lsx.vftintrne.l.d"]
@@ -1323,8 +869,6 @@ unsafe extern "unadjusted" {
     fn __lsx_vssrln_h_w(a: __v4i32, b: __v4i32) -> __v8i16;
     #[link_name = "llvm.loongarch.lsx.vssrln.w.d"]
     fn __lsx_vssrln_w_d(a: __v2i64, b: __v2i64) -> __v4i32;
-    #[link_name = "llvm.loongarch.lsx.vorn.v"]
-    fn __lsx_vorn_v(a: __v16i8, b: __v16i8) -> __v16i8;
     #[link_name = "llvm.loongarch.lsx.vldi"]
     fn __lsx_vldi(a: i32) -> __v2i64;
     #[link_name = "llvm.loongarch.lsx.vshuf.b"]
@@ -1443,1904 +987,560 @@ unsafe extern "unadjusted" {
     fn __lsx_vfcmp_sune_s(a: __v4f32, b: __v4f32) -> __v4i32;
     #[link_name = "llvm.loongarch.lsx.vfcmp.sun.s"]
     fn __lsx_vfcmp_sun_s(a: __v4f32, b: __v4f32) -> __v4i32;
-    #[link_name = "llvm.loongarch.lsx.vrepli.b"]
-    fn __lsx_vrepli_b(a: i32) -> __v16i8;
-    #[link_name = "llvm.loongarch.lsx.vrepli.d"]
-    fn __lsx_vrepli_d(a: i32) -> __v2i64;
-    #[link_name = "llvm.loongarch.lsx.vrepli.h"]
-    fn __lsx_vrepli_h(a: i32) -> __v8i16;
-    #[link_name = "llvm.loongarch.lsx.vrepli.w"]
-    fn __lsx_vrepli_w(a: i32) -> __v4i32;
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vsll_b(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vsll_b(transmute(a), transmute(b))) }
+pub fn lsx_vsrar_b(a: m128i, b: m128i) -> m128i {
+    unsafe { transmute(__lsx_vsrar_b(transmute(a), transmute(b))) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vsll_h(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vsll_h(transmute(a), transmute(b))) }
+pub fn lsx_vsrar_h(a: m128i, b: m128i) -> m128i {
+    unsafe { transmute(__lsx_vsrar_h(transmute(a), transmute(b))) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vsll_w(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vsll_w(transmute(a), transmute(b))) }
+pub fn lsx_vsrar_w(a: m128i, b: m128i) -> m128i {
+    unsafe { transmute(__lsx_vsrar_w(transmute(a), transmute(b))) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vsll_d(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vsll_d(transmute(a), transmute(b))) }
+pub fn lsx_vsrar_d(a: m128i, b: m128i) -> m128i {
+    unsafe { transmute(__lsx_vsrar_d(transmute(a), transmute(b))) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vslli_b<const IMM3: u32>(a: m128i) -> m128i {
+pub fn lsx_vsrari_b<const IMM3: u32>(a: m128i) -> m128i {
     static_assert_uimm_bits!(IMM3, 3);
-    unsafe { transmute(__lsx_vslli_b(transmute(a), IMM3)) }
+    unsafe { transmute(__lsx_vsrari_b(transmute(a), IMM3)) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vslli_h<const IMM4: u32>(a: m128i) -> m128i {
+pub fn lsx_vsrari_h<const IMM4: u32>(a: m128i) -> m128i {
     static_assert_uimm_bits!(IMM4, 4);
-    unsafe { transmute(__lsx_vslli_h(transmute(a), IMM4)) }
+    unsafe { transmute(__lsx_vsrari_h(transmute(a), IMM4)) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vslli_w<const IMM5: u32>(a: m128i) -> m128i {
+pub fn lsx_vsrari_w<const IMM5: u32>(a: m128i) -> m128i {
     static_assert_uimm_bits!(IMM5, 5);
-    unsafe { transmute(__lsx_vslli_w(transmute(a), IMM5)) }
+    unsafe { transmute(__lsx_vsrari_w(transmute(a), IMM5)) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vslli_d<const IMM6: u32>(a: m128i) -> m128i {
+pub fn lsx_vsrari_d<const IMM6: u32>(a: m128i) -> m128i {
     static_assert_uimm_bits!(IMM6, 6);
-    unsafe { transmute(__lsx_vslli_d(transmute(a), IMM6)) }
+    unsafe { transmute(__lsx_vsrari_d(transmute(a), IMM6)) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vsra_b(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vsra_b(transmute(a), transmute(b))) }
+pub fn lsx_vsrlr_b(a: m128i, b: m128i) -> m128i {
+    unsafe { transmute(__lsx_vsrlr_b(transmute(a), transmute(b))) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vsra_h(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vsra_h(transmute(a), transmute(b))) }
+pub fn lsx_vsrlr_h(a: m128i, b: m128i) -> m128i {
+    unsafe { transmute(__lsx_vsrlr_h(transmute(a), transmute(b))) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vsra_w(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vsra_w(transmute(a), transmute(b))) }
+pub fn lsx_vsrlr_w(a: m128i, b: m128i) -> m128i {
+    unsafe { transmute(__lsx_vsrlr_w(transmute(a), transmute(b))) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vsra_d(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vsra_d(transmute(a), transmute(b))) }
+pub fn lsx_vsrlr_d(a: m128i, b: m128i) -> m128i {
+    unsafe { transmute(__lsx_vsrlr_d(transmute(a), transmute(b))) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vsrai_b<const IMM3: u32>(a: m128i) -> m128i {
+pub fn lsx_vsrlri_b<const IMM3: u32>(a: m128i) -> m128i {
     static_assert_uimm_bits!(IMM3, 3);
-    unsafe { transmute(__lsx_vsrai_b(transmute(a), IMM3)) }
+    unsafe { transmute(__lsx_vsrlri_b(transmute(a), IMM3)) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vsrai_h<const IMM4: u32>(a: m128i) -> m128i {
+pub fn lsx_vsrlri_h<const IMM4: u32>(a: m128i) -> m128i {
     static_assert_uimm_bits!(IMM4, 4);
-    unsafe { transmute(__lsx_vsrai_h(transmute(a), IMM4)) }
+    unsafe { transmute(__lsx_vsrlri_h(transmute(a), IMM4)) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vsrai_w<const IMM5: u32>(a: m128i) -> m128i {
+pub fn lsx_vsrlri_w<const IMM5: u32>(a: m128i) -> m128i {
     static_assert_uimm_bits!(IMM5, 5);
-    unsafe { transmute(__lsx_vsrai_w(transmute(a), IMM5)) }
+    unsafe { transmute(__lsx_vsrlri_w(transmute(a), IMM5)) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vsrai_d<const IMM6: u32>(a: m128i) -> m128i {
+pub fn lsx_vsrlri_d<const IMM6: u32>(a: m128i) -> m128i {
     static_assert_uimm_bits!(IMM6, 6);
-    unsafe { transmute(__lsx_vsrai_d(transmute(a), IMM6)) }
+    unsafe { transmute(__lsx_vsrlri_d(transmute(a), IMM6)) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
+#[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vsrar_b(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vsrar_b(transmute(a), transmute(b))) }
+pub fn lsx_vbitclri_b<const IMM3: u32>(a: m128i) -> m128i {
+    static_assert_uimm_bits!(IMM3, 3);
+    unsafe { transmute(__lsx_vbitclri_b(transmute(a), IMM3)) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
+#[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vsrar_h(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vsrar_h(transmute(a), transmute(b))) }
+pub fn lsx_vbitclri_h<const IMM4: u32>(a: m128i) -> m128i {
+    static_assert_uimm_bits!(IMM4, 4);
+    unsafe { transmute(__lsx_vbitclri_h(transmute(a), IMM4)) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
+#[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vsrar_w(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vsrar_w(transmute(a), transmute(b))) }
+pub fn lsx_vbitclri_w<const IMM5: u32>(a: m128i) -> m128i {
+    static_assert_uimm_bits!(IMM5, 5);
+    unsafe { transmute(__lsx_vbitclri_w(transmute(a), IMM5)) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
+#[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vsrar_d(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vsrar_d(transmute(a), transmute(b))) }
+pub fn lsx_vbitclri_d<const IMM6: u32>(a: m128i) -> m128i {
+    static_assert_uimm_bits!(IMM6, 6);
+    unsafe { transmute(__lsx_vbitclri_d(transmute(a), IMM6)) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vsrari_b<const IMM3: u32>(a: m128i) -> m128i {
+pub fn lsx_vbitseti_b<const IMM3: u32>(a: m128i) -> m128i {
     static_assert_uimm_bits!(IMM3, 3);
-    unsafe { transmute(__lsx_vsrari_b(transmute(a), IMM3)) }
+    unsafe { transmute(__lsx_vbitseti_b(transmute(a), IMM3)) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vsrari_h<const IMM4: u32>(a: m128i) -> m128i {
+pub fn lsx_vbitseti_h<const IMM4: u32>(a: m128i) -> m128i {
     static_assert_uimm_bits!(IMM4, 4);
-    unsafe { transmute(__lsx_vsrari_h(transmute(a), IMM4)) }
+    unsafe { transmute(__lsx_vbitseti_h(transmute(a), IMM4)) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vsrari_w<const IMM5: u32>(a: m128i) -> m128i {
+pub fn lsx_vbitseti_w<const IMM5: u32>(a: m128i) -> m128i {
     static_assert_uimm_bits!(IMM5, 5);
-    unsafe { transmute(__lsx_vsrari_w(transmute(a), IMM5)) }
+    unsafe { transmute(__lsx_vbitseti_w(transmute(a), IMM5)) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vsrari_d<const IMM6: u32>(a: m128i) -> m128i {
+pub fn lsx_vbitseti_d<const IMM6: u32>(a: m128i) -> m128i {
     static_assert_uimm_bits!(IMM6, 6);
-    unsafe { transmute(__lsx_vsrari_d(transmute(a), IMM6)) }
+    unsafe { transmute(__lsx_vbitseti_d(transmute(a), IMM6)) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
+#[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vsrl_b(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vsrl_b(transmute(a), transmute(b))) }
+pub fn lsx_vbitrevi_b<const IMM3: u32>(a: m128i) -> m128i {
+    static_assert_uimm_bits!(IMM3, 3);
+    unsafe { transmute(__lsx_vbitrevi_b(transmute(a), IMM3)) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
+#[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vsrl_h(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vsrl_h(transmute(a), transmute(b))) }
+pub fn lsx_vbitrevi_h<const IMM4: u32>(a: m128i) -> m128i {
+    static_assert_uimm_bits!(IMM4, 4);
+    unsafe { transmute(__lsx_vbitrevi_h(transmute(a), IMM4)) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
+#[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vsrl_w(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vsrl_w(transmute(a), transmute(b))) }
+pub fn lsx_vbitrevi_w<const IMM5: u32>(a: m128i) -> m128i {
+    static_assert_uimm_bits!(IMM5, 5);
+    unsafe { transmute(__lsx_vbitrevi_w(transmute(a), IMM5)) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
+#[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vsrl_d(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vsrl_d(transmute(a), transmute(b))) }
+pub fn lsx_vbitrevi_d<const IMM6: u32>(a: m128i) -> m128i {
+    static_assert_uimm_bits!(IMM6, 6);
+    unsafe { transmute(__lsx_vbitrevi_d(transmute(a), IMM6)) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vsrli_b<const IMM3: u32>(a: m128i) -> m128i {
-    static_assert_uimm_bits!(IMM3, 3);
-    unsafe { transmute(__lsx_vsrli_b(transmute(a), IMM3)) }
+pub fn lsx_vsubi_bu<const IMM5: u32>(a: m128i) -> m128i {
+    static_assert_uimm_bits!(IMM5, 5);
+    unsafe { transmute(__lsx_vsubi_bu(transmute(a), IMM5)) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vsrli_h<const IMM4: u32>(a: m128i) -> m128i {
-    static_assert_uimm_bits!(IMM4, 4);
-    unsafe { transmute(__lsx_vsrli_h(transmute(a), IMM4)) }
+pub fn lsx_vsubi_hu<const IMM5: u32>(a: m128i) -> m128i {
+    static_assert_uimm_bits!(IMM5, 5);
+    unsafe { transmute(__lsx_vsubi_hu(transmute(a), IMM5)) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vsrli_w<const IMM5: u32>(a: m128i) -> m128i {
+pub fn lsx_vsubi_wu<const IMM5: u32>(a: m128i) -> m128i {
     static_assert_uimm_bits!(IMM5, 5);
-    unsafe { transmute(__lsx_vsrli_w(transmute(a), IMM5)) }
+    unsafe { transmute(__lsx_vsubi_wu(transmute(a), IMM5)) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vsrli_d<const IMM6: u32>(a: m128i) -> m128i {
-    static_assert_uimm_bits!(IMM6, 6);
-    unsafe { transmute(__lsx_vsrli_d(transmute(a), IMM6)) }
+pub fn lsx_vsubi_du<const IMM5: u32>(a: m128i) -> m128i {
+    static_assert_uimm_bits!(IMM5, 5);
+    unsafe { transmute(__lsx_vsubi_du(transmute(a), IMM5)) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
+#[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vsrlr_b(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vsrlr_b(transmute(a), transmute(b))) }
+pub fn lsx_vsat_b<const IMM3: u32>(a: m128i) -> m128i {
+    static_assert_uimm_bits!(IMM3, 3);
+    unsafe { transmute(__lsx_vsat_b(transmute(a), IMM3)) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
+#[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vsrlr_h(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vsrlr_h(transmute(a), transmute(b))) }
+pub fn lsx_vsat_h<const IMM4: u32>(a: m128i) -> m128i {
+    static_assert_uimm_bits!(IMM4, 4);
+    unsafe { transmute(__lsx_vsat_h(transmute(a), IMM4)) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
+#[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vsrlr_w(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vsrlr_w(transmute(a), transmute(b))) }
+pub fn lsx_vsat_w<const IMM5: u32>(a: m128i) -> m128i {
+    static_assert_uimm_bits!(IMM5, 5);
+    unsafe { transmute(__lsx_vsat_w(transmute(a), IMM5)) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
+#[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vsrlr_d(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vsrlr_d(transmute(a), transmute(b))) }
+pub fn lsx_vsat_d<const IMM6: u32>(a: m128i) -> m128i {
+    static_assert_uimm_bits!(IMM6, 6);
+    unsafe { transmute(__lsx_vsat_d(transmute(a), IMM6)) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vsrlri_b<const IMM3: u32>(a: m128i) -> m128i {
+pub fn lsx_vsat_bu<const IMM3: u32>(a: m128i) -> m128i {
     static_assert_uimm_bits!(IMM3, 3);
-    unsafe { transmute(__lsx_vsrlri_b(transmute(a), IMM3)) }
+    unsafe { transmute(__lsx_vsat_bu(transmute(a), IMM3)) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vsrlri_h<const IMM4: u32>(a: m128i) -> m128i {
+pub fn lsx_vsat_hu<const IMM4: u32>(a: m128i) -> m128i {
     static_assert_uimm_bits!(IMM4, 4);
-    unsafe { transmute(__lsx_vsrlri_h(transmute(a), IMM4)) }
+    unsafe { transmute(__lsx_vsat_hu(transmute(a), IMM4)) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vsrlri_w<const IMM5: u32>(a: m128i) -> m128i {
+pub fn lsx_vsat_wu<const IMM5: u32>(a: m128i) -> m128i {
     static_assert_uimm_bits!(IMM5, 5);
-    unsafe { transmute(__lsx_vsrlri_w(transmute(a), IMM5)) }
+    unsafe { transmute(__lsx_vsat_wu(transmute(a), IMM5)) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vsrlri_d<const IMM6: u32>(a: m128i) -> m128i {
+pub fn lsx_vsat_du<const IMM6: u32>(a: m128i) -> m128i {
     static_assert_uimm_bits!(IMM6, 6);
-    unsafe { transmute(__lsx_vsrlri_d(transmute(a), IMM6)) }
+    unsafe { transmute(__lsx_vsat_du(transmute(a), IMM6)) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vbitclr_b(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vbitclr_b(transmute(a), transmute(b))) }
+pub fn lsx_vavg_b(a: m128i, b: m128i) -> m128i {
+    unsafe { transmute(__lsx_vavg_b(transmute(a), transmute(b))) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vbitclr_h(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vbitclr_h(transmute(a), transmute(b))) }
+pub fn lsx_vavg_h(a: m128i, b: m128i) -> m128i {
+    unsafe { transmute(__lsx_vavg_h(transmute(a), transmute(b))) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vbitclr_w(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vbitclr_w(transmute(a), transmute(b))) }
+pub fn lsx_vavg_w(a: m128i, b: m128i) -> m128i {
+    unsafe { transmute(__lsx_vavg_w(transmute(a), transmute(b))) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vbitclr_d(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vbitclr_d(transmute(a), transmute(b))) }
+pub fn lsx_vavg_d(a: m128i, b: m128i) -> m128i {
+    unsafe { transmute(__lsx_vavg_d(transmute(a), transmute(b))) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
-#[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vbitclri_b<const IMM3: u32>(a: m128i) -> m128i {
-    static_assert_uimm_bits!(IMM3, 3);
-    unsafe { transmute(__lsx_vbitclri_b(transmute(a), IMM3)) }
+pub fn lsx_vavg_bu(a: m128i, b: m128i) -> m128i {
+    unsafe { transmute(__lsx_vavg_bu(transmute(a), transmute(b))) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
-#[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vbitclri_h<const IMM4: u32>(a: m128i) -> m128i {
-    static_assert_uimm_bits!(IMM4, 4);
-    unsafe { transmute(__lsx_vbitclri_h(transmute(a), IMM4)) }
+pub fn lsx_vavg_hu(a: m128i, b: m128i) -> m128i {
+    unsafe { transmute(__lsx_vavg_hu(transmute(a), transmute(b))) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
-#[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vbitclri_w<const IMM5: u32>(a: m128i) -> m128i {
-    static_assert_uimm_bits!(IMM5, 5);
-    unsafe { transmute(__lsx_vbitclri_w(transmute(a), IMM5)) }
+pub fn lsx_vavg_wu(a: m128i, b: m128i) -> m128i {
+    unsafe { transmute(__lsx_vavg_wu(transmute(a), transmute(b))) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
-#[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vbitclri_d<const IMM6: u32>(a: m128i) -> m128i {
-    static_assert_uimm_bits!(IMM6, 6);
-    unsafe { transmute(__lsx_vbitclri_d(transmute(a), IMM6)) }
+pub fn lsx_vavg_du(a: m128i, b: m128i) -> m128i {
+    unsafe { transmute(__lsx_vavg_du(transmute(a), transmute(b))) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vbitset_b(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vbitset_b(transmute(a), transmute(b))) }
+pub fn lsx_vavgr_b(a: m128i, b: m128i) -> m128i {
+    unsafe { transmute(__lsx_vavgr_b(transmute(a), transmute(b))) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vbitset_h(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vbitset_h(transmute(a), transmute(b))) }
+pub fn lsx_vavgr_h(a: m128i, b: m128i) -> m128i {
+    unsafe { transmute(__lsx_vavgr_h(transmute(a), transmute(b))) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vbitset_w(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vbitset_w(transmute(a), transmute(b))) }
+pub fn lsx_vavgr_w(a: m128i, b: m128i) -> m128i {
+    unsafe { transmute(__lsx_vavgr_w(transmute(a), transmute(b))) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vbitset_d(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vbitset_d(transmute(a), transmute(b))) }
+pub fn lsx_vavgr_d(a: m128i, b: m128i) -> m128i {
+    unsafe { transmute(__lsx_vavgr_d(transmute(a), transmute(b))) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
-#[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vbitseti_b<const IMM3: u32>(a: m128i) -> m128i {
-    static_assert_uimm_bits!(IMM3, 3);
-    unsafe { transmute(__lsx_vbitseti_b(transmute(a), IMM3)) }
+pub fn lsx_vavgr_bu(a: m128i, b: m128i) -> m128i {
+    unsafe { transmute(__lsx_vavgr_bu(transmute(a), transmute(b))) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
-#[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vbitseti_h<const IMM4: u32>(a: m128i) -> m128i {
-    static_assert_uimm_bits!(IMM4, 4);
-    unsafe { transmute(__lsx_vbitseti_h(transmute(a), IMM4)) }
+pub fn lsx_vavgr_hu(a: m128i, b: m128i) -> m128i {
+    unsafe { transmute(__lsx_vavgr_hu(transmute(a), transmute(b))) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
-#[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vbitseti_w<const IMM5: u32>(a: m128i) -> m128i {
-    static_assert_uimm_bits!(IMM5, 5);
-    unsafe { transmute(__lsx_vbitseti_w(transmute(a), IMM5)) }
+pub fn lsx_vavgr_wu(a: m128i, b: m128i) -> m128i {
+    unsafe { transmute(__lsx_vavgr_wu(transmute(a), transmute(b))) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
-#[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vbitseti_d<const IMM6: u32>(a: m128i) -> m128i {
-    static_assert_uimm_bits!(IMM6, 6);
-    unsafe { transmute(__lsx_vbitseti_d(transmute(a), IMM6)) }
+pub fn lsx_vavgr_du(a: m128i, b: m128i) -> m128i {
+    unsafe { transmute(__lsx_vavgr_du(transmute(a), transmute(b))) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vbitrev_b(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vbitrev_b(transmute(a), transmute(b))) }
+pub fn lsx_vhaddw_h_b(a: m128i, b: m128i) -> m128i {
+    unsafe { transmute(__lsx_vhaddw_h_b(transmute(a), transmute(b))) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vbitrev_h(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vbitrev_h(transmute(a), transmute(b))) }
+pub fn lsx_vhaddw_w_h(a: m128i, b: m128i) -> m128i {
+    unsafe { transmute(__lsx_vhaddw_w_h(transmute(a), transmute(b))) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vbitrev_w(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vbitrev_w(transmute(a), transmute(b))) }
+pub fn lsx_vhaddw_d_w(a: m128i, b: m128i) -> m128i {
+    unsafe { transmute(__lsx_vhaddw_d_w(transmute(a), transmute(b))) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vbitrev_d(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vbitrev_d(transmute(a), transmute(b))) }
+pub fn lsx_vhaddw_hu_bu(a: m128i, b: m128i) -> m128i {
+    unsafe { transmute(__lsx_vhaddw_hu_bu(transmute(a), transmute(b))) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
-#[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vbitrevi_b<const IMM3: u32>(a: m128i) -> m128i {
-    static_assert_uimm_bits!(IMM3, 3);
-    unsafe { transmute(__lsx_vbitrevi_b(transmute(a), IMM3)) }
+pub fn lsx_vhaddw_wu_hu(a: m128i, b: m128i) -> m128i {
+    unsafe { transmute(__lsx_vhaddw_wu_hu(transmute(a), transmute(b))) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
-#[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vbitrevi_h<const IMM4: u32>(a: m128i) -> m128i {
-    static_assert_uimm_bits!(IMM4, 4);
-    unsafe { transmute(__lsx_vbitrevi_h(transmute(a), IMM4)) }
+pub fn lsx_vhaddw_du_wu(a: m128i, b: m128i) -> m128i {
+    unsafe { transmute(__lsx_vhaddw_du_wu(transmute(a), transmute(b))) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
-#[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vbitrevi_w<const IMM5: u32>(a: m128i) -> m128i {
-    static_assert_uimm_bits!(IMM5, 5);
-    unsafe { transmute(__lsx_vbitrevi_w(transmute(a), IMM5)) }
+pub fn lsx_vhsubw_h_b(a: m128i, b: m128i) -> m128i {
+    unsafe { transmute(__lsx_vhsubw_h_b(transmute(a), transmute(b))) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
-#[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vbitrevi_d<const IMM6: u32>(a: m128i) -> m128i {
-    static_assert_uimm_bits!(IMM6, 6);
-    unsafe { transmute(__lsx_vbitrevi_d(transmute(a), IMM6)) }
+pub fn lsx_vhsubw_w_h(a: m128i, b: m128i) -> m128i {
+    unsafe { transmute(__lsx_vhsubw_w_h(transmute(a), transmute(b))) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vadd_b(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vadd_b(transmute(a), transmute(b))) }
+pub fn lsx_vhsubw_d_w(a: m128i, b: m128i) -> m128i {
+    unsafe { transmute(__lsx_vhsubw_d_w(transmute(a), transmute(b))) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vadd_h(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vadd_h(transmute(a), transmute(b))) }
+pub fn lsx_vhsubw_hu_bu(a: m128i, b: m128i) -> m128i {
+    unsafe { transmute(__lsx_vhsubw_hu_bu(transmute(a), transmute(b))) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vadd_w(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vadd_w(transmute(a), transmute(b))) }
+pub fn lsx_vhsubw_wu_hu(a: m128i, b: m128i) -> m128i {
+    unsafe { transmute(__lsx_vhsubw_wu_hu(transmute(a), transmute(b))) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vadd_d(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vadd_d(transmute(a), transmute(b))) }
+pub fn lsx_vhsubw_du_wu(a: m128i, b: m128i) -> m128i {
+    unsafe { transmute(__lsx_vhsubw_du_wu(transmute(a), transmute(b))) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
-#[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vaddi_bu<const IMM5: u32>(a: m128i) -> m128i {
-    static_assert_uimm_bits!(IMM5, 5);
-    unsafe { transmute(__lsx_vaddi_bu(transmute(a), IMM5)) }
+pub fn lsx_vreplve_b(a: m128i, b: i32) -> m128i {
+    unsafe { transmute(__lsx_vreplve_b(transmute(a), transmute(b))) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
-#[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vaddi_hu<const IMM5: u32>(a: m128i) -> m128i {
-    static_assert_uimm_bits!(IMM5, 5);
-    unsafe { transmute(__lsx_vaddi_hu(transmute(a), IMM5)) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vaddi_wu<const IMM5: u32>(a: m128i) -> m128i {
-    static_assert_uimm_bits!(IMM5, 5);
-    unsafe { transmute(__lsx_vaddi_wu(transmute(a), IMM5)) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vaddi_du<const IMM5: u32>(a: m128i) -> m128i {
-    static_assert_uimm_bits!(IMM5, 5);
-    unsafe { transmute(__lsx_vaddi_du(transmute(a), IMM5)) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vsub_b(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vsub_b(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vsub_h(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vsub_h(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vsub_w(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vsub_w(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vsub_d(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vsub_d(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vsubi_bu<const IMM5: u32>(a: m128i) -> m128i {
-    static_assert_uimm_bits!(IMM5, 5);
-    unsafe { transmute(__lsx_vsubi_bu(transmute(a), IMM5)) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vsubi_hu<const IMM5: u32>(a: m128i) -> m128i {
-    static_assert_uimm_bits!(IMM5, 5);
-    unsafe { transmute(__lsx_vsubi_hu(transmute(a), IMM5)) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vsubi_wu<const IMM5: u32>(a: m128i) -> m128i {
-    static_assert_uimm_bits!(IMM5, 5);
-    unsafe { transmute(__lsx_vsubi_wu(transmute(a), IMM5)) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vsubi_du<const IMM5: u32>(a: m128i) -> m128i {
-    static_assert_uimm_bits!(IMM5, 5);
-    unsafe { transmute(__lsx_vsubi_du(transmute(a), IMM5)) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vmax_b(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vmax_b(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vmax_h(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vmax_h(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vmax_w(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vmax_w(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vmax_d(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vmax_d(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vmaxi_b<const IMM_S5: i32>(a: m128i) -> m128i {
-    static_assert_simm_bits!(IMM_S5, 5);
-    unsafe { transmute(__lsx_vmaxi_b(transmute(a), IMM_S5)) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vmaxi_h<const IMM_S5: i32>(a: m128i) -> m128i {
-    static_assert_simm_bits!(IMM_S5, 5);
-    unsafe { transmute(__lsx_vmaxi_h(transmute(a), IMM_S5)) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vmaxi_w<const IMM_S5: i32>(a: m128i) -> m128i {
-    static_assert_simm_bits!(IMM_S5, 5);
-    unsafe { transmute(__lsx_vmaxi_w(transmute(a), IMM_S5)) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vmaxi_d<const IMM_S5: i32>(a: m128i) -> m128i {
-    static_assert_simm_bits!(IMM_S5, 5);
-    unsafe { transmute(__lsx_vmaxi_d(transmute(a), IMM_S5)) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vmax_bu(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vmax_bu(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vmax_hu(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vmax_hu(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vmax_wu(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vmax_wu(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vmax_du(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vmax_du(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vmaxi_bu<const IMM5: u32>(a: m128i) -> m128i {
-    static_assert_uimm_bits!(IMM5, 5);
-    unsafe { transmute(__lsx_vmaxi_bu(transmute(a), IMM5)) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vmaxi_hu<const IMM5: u32>(a: m128i) -> m128i {
-    static_assert_uimm_bits!(IMM5, 5);
-    unsafe { transmute(__lsx_vmaxi_hu(transmute(a), IMM5)) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vmaxi_wu<const IMM5: u32>(a: m128i) -> m128i {
-    static_assert_uimm_bits!(IMM5, 5);
-    unsafe { transmute(__lsx_vmaxi_wu(transmute(a), IMM5)) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vmaxi_du<const IMM5: u32>(a: m128i) -> m128i {
-    static_assert_uimm_bits!(IMM5, 5);
-    unsafe { transmute(__lsx_vmaxi_du(transmute(a), IMM5)) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vmin_b(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vmin_b(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vmin_h(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vmin_h(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vmin_w(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vmin_w(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vmin_d(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vmin_d(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vmini_b<const IMM_S5: i32>(a: m128i) -> m128i {
-    static_assert_simm_bits!(IMM_S5, 5);
-    unsafe { transmute(__lsx_vmini_b(transmute(a), IMM_S5)) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vmini_h<const IMM_S5: i32>(a: m128i) -> m128i {
-    static_assert_simm_bits!(IMM_S5, 5);
-    unsafe { transmute(__lsx_vmini_h(transmute(a), IMM_S5)) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vmini_w<const IMM_S5: i32>(a: m128i) -> m128i {
-    static_assert_simm_bits!(IMM_S5, 5);
-    unsafe { transmute(__lsx_vmini_w(transmute(a), IMM_S5)) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vmini_d<const IMM_S5: i32>(a: m128i) -> m128i {
-    static_assert_simm_bits!(IMM_S5, 5);
-    unsafe { transmute(__lsx_vmini_d(transmute(a), IMM_S5)) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vmin_bu(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vmin_bu(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vmin_hu(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vmin_hu(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vmin_wu(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vmin_wu(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vmin_du(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vmin_du(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vmini_bu<const IMM5: u32>(a: m128i) -> m128i {
-    static_assert_uimm_bits!(IMM5, 5);
-    unsafe { transmute(__lsx_vmini_bu(transmute(a), IMM5)) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vmini_hu<const IMM5: u32>(a: m128i) -> m128i {
-    static_assert_uimm_bits!(IMM5, 5);
-    unsafe { transmute(__lsx_vmini_hu(transmute(a), IMM5)) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vmini_wu<const IMM5: u32>(a: m128i) -> m128i {
-    static_assert_uimm_bits!(IMM5, 5);
-    unsafe { transmute(__lsx_vmini_wu(transmute(a), IMM5)) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vmini_du<const IMM5: u32>(a: m128i) -> m128i {
-    static_assert_uimm_bits!(IMM5, 5);
-    unsafe { transmute(__lsx_vmini_du(transmute(a), IMM5)) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vseq_b(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vseq_b(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vseq_h(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vseq_h(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vseq_w(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vseq_w(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vseq_d(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vseq_d(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vseqi_b<const IMM_S5: i32>(a: m128i) -> m128i {
-    static_assert_simm_bits!(IMM_S5, 5);
-    unsafe { transmute(__lsx_vseqi_b(transmute(a), IMM_S5)) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vseqi_h<const IMM_S5: i32>(a: m128i) -> m128i {
-    static_assert_simm_bits!(IMM_S5, 5);
-    unsafe { transmute(__lsx_vseqi_h(transmute(a), IMM_S5)) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vseqi_w<const IMM_S5: i32>(a: m128i) -> m128i {
-    static_assert_simm_bits!(IMM_S5, 5);
-    unsafe { transmute(__lsx_vseqi_w(transmute(a), IMM_S5)) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vseqi_d<const IMM_S5: i32>(a: m128i) -> m128i {
-    static_assert_simm_bits!(IMM_S5, 5);
-    unsafe { transmute(__lsx_vseqi_d(transmute(a), IMM_S5)) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vslti_b<const IMM_S5: i32>(a: m128i) -> m128i {
-    static_assert_simm_bits!(IMM_S5, 5);
-    unsafe { transmute(__lsx_vslti_b(transmute(a), IMM_S5)) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vslt_b(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vslt_b(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vslt_h(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vslt_h(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vslt_w(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vslt_w(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vslt_d(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vslt_d(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vslti_h<const IMM_S5: i32>(a: m128i) -> m128i {
-    static_assert_simm_bits!(IMM_S5, 5);
-    unsafe { transmute(__lsx_vslti_h(transmute(a), IMM_S5)) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vslti_w<const IMM_S5: i32>(a: m128i) -> m128i {
-    static_assert_simm_bits!(IMM_S5, 5);
-    unsafe { transmute(__lsx_vslti_w(transmute(a), IMM_S5)) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vslti_d<const IMM_S5: i32>(a: m128i) -> m128i {
-    static_assert_simm_bits!(IMM_S5, 5);
-    unsafe { transmute(__lsx_vslti_d(transmute(a), IMM_S5)) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vslt_bu(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vslt_bu(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vslt_hu(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vslt_hu(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vslt_wu(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vslt_wu(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vslt_du(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vslt_du(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vslti_bu<const IMM5: u32>(a: m128i) -> m128i {
-    static_assert_uimm_bits!(IMM5, 5);
-    unsafe { transmute(__lsx_vslti_bu(transmute(a), IMM5)) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vslti_hu<const IMM5: u32>(a: m128i) -> m128i {
-    static_assert_uimm_bits!(IMM5, 5);
-    unsafe { transmute(__lsx_vslti_hu(transmute(a), IMM5)) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vslti_wu<const IMM5: u32>(a: m128i) -> m128i {
-    static_assert_uimm_bits!(IMM5, 5);
-    unsafe { transmute(__lsx_vslti_wu(transmute(a), IMM5)) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vslti_du<const IMM5: u32>(a: m128i) -> m128i {
-    static_assert_uimm_bits!(IMM5, 5);
-    unsafe { transmute(__lsx_vslti_du(transmute(a), IMM5)) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vsle_b(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vsle_b(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vsle_h(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vsle_h(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vsle_w(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vsle_w(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vsle_d(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vsle_d(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vslei_b<const IMM_S5: i32>(a: m128i) -> m128i {
-    static_assert_simm_bits!(IMM_S5, 5);
-    unsafe { transmute(__lsx_vslei_b(transmute(a), IMM_S5)) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vslei_h<const IMM_S5: i32>(a: m128i) -> m128i {
-    static_assert_simm_bits!(IMM_S5, 5);
-    unsafe { transmute(__lsx_vslei_h(transmute(a), IMM_S5)) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vslei_w<const IMM_S5: i32>(a: m128i) -> m128i {
-    static_assert_simm_bits!(IMM_S5, 5);
-    unsafe { transmute(__lsx_vslei_w(transmute(a), IMM_S5)) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vslei_d<const IMM_S5: i32>(a: m128i) -> m128i {
-    static_assert_simm_bits!(IMM_S5, 5);
-    unsafe { transmute(__lsx_vslei_d(transmute(a), IMM_S5)) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vsle_bu(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vsle_bu(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vsle_hu(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vsle_hu(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vsle_wu(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vsle_wu(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vsle_du(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vsle_du(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vslei_bu<const IMM5: u32>(a: m128i) -> m128i {
-    static_assert_uimm_bits!(IMM5, 5);
-    unsafe { transmute(__lsx_vslei_bu(transmute(a), IMM5)) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vslei_hu<const IMM5: u32>(a: m128i) -> m128i {
-    static_assert_uimm_bits!(IMM5, 5);
-    unsafe { transmute(__lsx_vslei_hu(transmute(a), IMM5)) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vslei_wu<const IMM5: u32>(a: m128i) -> m128i {
-    static_assert_uimm_bits!(IMM5, 5);
-    unsafe { transmute(__lsx_vslei_wu(transmute(a), IMM5)) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vslei_du<const IMM5: u32>(a: m128i) -> m128i {
-    static_assert_uimm_bits!(IMM5, 5);
-    unsafe { transmute(__lsx_vslei_du(transmute(a), IMM5)) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vsat_b<const IMM3: u32>(a: m128i) -> m128i {
-    static_assert_uimm_bits!(IMM3, 3);
-    unsafe { transmute(__lsx_vsat_b(transmute(a), IMM3)) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vsat_h<const IMM4: u32>(a: m128i) -> m128i {
-    static_assert_uimm_bits!(IMM4, 4);
-    unsafe { transmute(__lsx_vsat_h(transmute(a), IMM4)) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vsat_w<const IMM5: u32>(a: m128i) -> m128i {
-    static_assert_uimm_bits!(IMM5, 5);
-    unsafe { transmute(__lsx_vsat_w(transmute(a), IMM5)) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vsat_d<const IMM6: u32>(a: m128i) -> m128i {
-    static_assert_uimm_bits!(IMM6, 6);
-    unsafe { transmute(__lsx_vsat_d(transmute(a), IMM6)) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vsat_bu<const IMM3: u32>(a: m128i) -> m128i {
-    static_assert_uimm_bits!(IMM3, 3);
-    unsafe { transmute(__lsx_vsat_bu(transmute(a), IMM3)) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vsat_hu<const IMM4: u32>(a: m128i) -> m128i {
-    static_assert_uimm_bits!(IMM4, 4);
-    unsafe { transmute(__lsx_vsat_hu(transmute(a), IMM4)) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vsat_wu<const IMM5: u32>(a: m128i) -> m128i {
-    static_assert_uimm_bits!(IMM5, 5);
-    unsafe { transmute(__lsx_vsat_wu(transmute(a), IMM5)) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vsat_du<const IMM6: u32>(a: m128i) -> m128i {
-    static_assert_uimm_bits!(IMM6, 6);
-    unsafe { transmute(__lsx_vsat_du(transmute(a), IMM6)) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vadda_b(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vadda_b(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vadda_h(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vadda_h(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vadda_w(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vadda_w(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vadda_d(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vadda_d(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vsadd_b(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vsadd_b(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vsadd_h(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vsadd_h(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vsadd_w(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vsadd_w(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vsadd_d(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vsadd_d(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vsadd_bu(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vsadd_bu(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vsadd_hu(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vsadd_hu(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vsadd_wu(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vsadd_wu(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vsadd_du(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vsadd_du(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vavg_b(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vavg_b(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vavg_h(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vavg_h(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vavg_w(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vavg_w(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vavg_d(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vavg_d(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vavg_bu(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vavg_bu(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vavg_hu(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vavg_hu(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vavg_wu(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vavg_wu(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vavg_du(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vavg_du(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vavgr_b(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vavgr_b(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vavgr_h(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vavgr_h(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vavgr_w(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vavgr_w(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vavgr_d(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vavgr_d(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vavgr_bu(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vavgr_bu(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vavgr_hu(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vavgr_hu(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vavgr_wu(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vavgr_wu(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vavgr_du(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vavgr_du(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vssub_b(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vssub_b(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vssub_h(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vssub_h(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vssub_w(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vssub_w(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vssub_d(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vssub_d(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vssub_bu(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vssub_bu(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vssub_hu(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vssub_hu(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vssub_wu(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vssub_wu(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vssub_du(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vssub_du(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vabsd_b(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vabsd_b(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vabsd_h(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vabsd_h(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vabsd_w(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vabsd_w(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vabsd_d(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vabsd_d(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vabsd_bu(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vabsd_bu(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vabsd_hu(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vabsd_hu(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vabsd_wu(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vabsd_wu(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vabsd_du(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vabsd_du(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vmul_b(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vmul_b(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vmul_h(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vmul_h(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vmul_w(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vmul_w(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vmul_d(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vmul_d(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vmadd_b(a: m128i, b: m128i, c: m128i) -> m128i {
-    unsafe { transmute(__lsx_vmadd_b(transmute(a), transmute(b), transmute(c))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vmadd_h(a: m128i, b: m128i, c: m128i) -> m128i {
-    unsafe { transmute(__lsx_vmadd_h(transmute(a), transmute(b), transmute(c))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vmadd_w(a: m128i, b: m128i, c: m128i) -> m128i {
-    unsafe { transmute(__lsx_vmadd_w(transmute(a), transmute(b), transmute(c))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vmadd_d(a: m128i, b: m128i, c: m128i) -> m128i {
-    unsafe { transmute(__lsx_vmadd_d(transmute(a), transmute(b), transmute(c))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vmsub_b(a: m128i, b: m128i, c: m128i) -> m128i {
-    unsafe { transmute(__lsx_vmsub_b(transmute(a), transmute(b), transmute(c))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vmsub_h(a: m128i, b: m128i, c: m128i) -> m128i {
-    unsafe { transmute(__lsx_vmsub_h(transmute(a), transmute(b), transmute(c))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vmsub_w(a: m128i, b: m128i, c: m128i) -> m128i {
-    unsafe { transmute(__lsx_vmsub_w(transmute(a), transmute(b), transmute(c))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vmsub_d(a: m128i, b: m128i, c: m128i) -> m128i {
-    unsafe { transmute(__lsx_vmsub_d(transmute(a), transmute(b), transmute(c))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vdiv_b(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vdiv_b(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vdiv_h(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vdiv_h(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vdiv_w(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vdiv_w(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vdiv_d(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vdiv_d(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vdiv_bu(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vdiv_bu(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vdiv_hu(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vdiv_hu(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vdiv_wu(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vdiv_wu(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vdiv_du(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vdiv_du(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vhaddw_h_b(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vhaddw_h_b(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vhaddw_w_h(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vhaddw_w_h(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vhaddw_d_w(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vhaddw_d_w(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vhaddw_hu_bu(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vhaddw_hu_bu(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vhaddw_wu_hu(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vhaddw_wu_hu(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vhaddw_du_wu(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vhaddw_du_wu(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vhsubw_h_b(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vhsubw_h_b(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vhsubw_w_h(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vhsubw_w_h(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vhsubw_d_w(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vhsubw_d_w(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vhsubw_hu_bu(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vhsubw_hu_bu(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vhsubw_wu_hu(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vhsubw_wu_hu(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vhsubw_du_wu(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vhsubw_du_wu(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vmod_b(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vmod_b(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vmod_h(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vmod_h(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vmod_w(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vmod_w(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vmod_d(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vmod_d(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vmod_bu(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vmod_bu(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vmod_hu(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vmod_hu(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vmod_wu(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vmod_wu(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vmod_du(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vmod_du(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vreplve_b(a: m128i, b: i32) -> m128i {
-    unsafe { transmute(__lsx_vreplve_b(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vreplve_h(a: m128i, b: i32) -> m128i {
-    unsafe { transmute(__lsx_vreplve_h(transmute(a), transmute(b))) }
+pub fn lsx_vreplve_h(a: m128i, b: i32) -> m128i {
+    unsafe { transmute(__lsx_vreplve_h(transmute(a), transmute(b))) }
 }
 
 #[inline]
@@ -3386,583 +1586,307 @@ pub fn lsx_vreplvei_w<const IMM2: u32>(a: m128i) -> m128i {
 
 #[inline]
 #[target_feature(enable = "lsx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vreplvei_d<const IMM1: u32>(a: m128i) -> m128i {
-    static_assert_uimm_bits!(IMM1, 1);
-    unsafe { transmute(__lsx_vreplvei_d(transmute(a), IMM1)) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vpickev_b(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vpickev_b(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vpickev_h(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vpickev_h(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vpickev_w(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vpickev_w(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vpickev_d(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vpickev_d(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vpickod_b(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vpickod_b(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vpickod_h(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vpickod_h(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vpickod_w(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vpickod_w(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vpickod_d(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vpickod_d(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vilvh_b(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vilvh_b(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vilvh_h(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vilvh_h(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vilvh_w(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vilvh_w(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vilvh_d(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vilvh_d(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vilvl_b(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vilvl_b(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vilvl_h(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vilvl_h(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vilvl_w(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vilvl_w(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vilvl_d(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vilvl_d(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vpackev_b(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vpackev_b(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vpackev_h(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vpackev_h(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vpackev_w(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vpackev_w(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vpackev_d(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vpackev_d(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vpackod_b(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vpackod_b(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vpackod_h(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vpackod_h(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vpackod_w(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vpackod_w(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vpackod_d(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vpackod_d(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vshuf_h(a: m128i, b: m128i, c: m128i) -> m128i {
-    unsafe { transmute(__lsx_vshuf_h(transmute(a), transmute(b), transmute(c))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vshuf_w(a: m128i, b: m128i, c: m128i) -> m128i {
-    unsafe { transmute(__lsx_vshuf_w(transmute(a), transmute(b), transmute(c))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vshuf_d(a: m128i, b: m128i, c: m128i) -> m128i {
-    unsafe { transmute(__lsx_vshuf_d(transmute(a), transmute(b), transmute(c))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vand_v(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vand_v(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vandi_b<const IMM8: u32>(a: m128i) -> m128i {
-    static_assert_uimm_bits!(IMM8, 8);
-    unsafe { transmute(__lsx_vandi_b(transmute(a), IMM8)) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vor_v(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vor_v(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vori_b<const IMM8: u32>(a: m128i) -> m128i {
-    static_assert_uimm_bits!(IMM8, 8);
-    unsafe { transmute(__lsx_vori_b(transmute(a), IMM8)) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vnor_v(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vnor_v(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vnori_b<const IMM8: u32>(a: m128i) -> m128i {
-    static_assert_uimm_bits!(IMM8, 8);
-    unsafe { transmute(__lsx_vnori_b(transmute(a), IMM8)) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vxor_v(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vxor_v(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vxori_b<const IMM8: u32>(a: m128i) -> m128i {
-    static_assert_uimm_bits!(IMM8, 8);
-    unsafe { transmute(__lsx_vxori_b(transmute(a), IMM8)) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
+#[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vbitsel_v(a: m128i, b: m128i, c: m128i) -> m128i {
-    unsafe { transmute(__lsx_vbitsel_v(transmute(a), transmute(b), transmute(c))) }
+pub fn lsx_vreplvei_d<const IMM1: u32>(a: m128i) -> m128i {
+    static_assert_uimm_bits!(IMM1, 1);
+    unsafe { transmute(__lsx_vreplvei_d(transmute(a), IMM1)) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
-#[rustc_legacy_const_generics(2)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vbitseli_b<const IMM8: u32>(a: m128i, b: m128i) -> m128i {
-    static_assert_uimm_bits!(IMM8, 8);
-    unsafe { transmute(__lsx_vbitseli_b(transmute(a), transmute(b), IMM8)) }
+pub fn lsx_vpickev_b(a: m128i, b: m128i) -> m128i {
+    unsafe { transmute(__lsx_vpickev_b(transmute(a), transmute(b))) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
-#[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vshuf4i_b<const IMM8: u32>(a: m128i) -> m128i {
-    static_assert_uimm_bits!(IMM8, 8);
-    unsafe { transmute(__lsx_vshuf4i_b(transmute(a), IMM8)) }
+pub fn lsx_vpickev_h(a: m128i, b: m128i) -> m128i {
+    unsafe { transmute(__lsx_vpickev_h(transmute(a), transmute(b))) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
-#[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vshuf4i_h<const IMM8: u32>(a: m128i) -> m128i {
-    static_assert_uimm_bits!(IMM8, 8);
-    unsafe { transmute(__lsx_vshuf4i_h(transmute(a), IMM8)) }
+pub fn lsx_vpickev_w(a: m128i, b: m128i) -> m128i {
+    unsafe { transmute(__lsx_vpickev_w(transmute(a), transmute(b))) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
-#[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vshuf4i_w<const IMM8: u32>(a: m128i) -> m128i {
-    static_assert_uimm_bits!(IMM8, 8);
-    unsafe { transmute(__lsx_vshuf4i_w(transmute(a), IMM8)) }
+pub fn lsx_vpickev_d(a: m128i, b: m128i) -> m128i {
+    unsafe { transmute(__lsx_vpickev_d(transmute(a), transmute(b))) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vreplgr2vr_b(a: i32) -> m128i {
-    unsafe { transmute(__lsx_vreplgr2vr_b(transmute(a))) }
+pub fn lsx_vpickod_b(a: m128i, b: m128i) -> m128i {
+    unsafe { transmute(__lsx_vpickod_b(transmute(a), transmute(b))) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vreplgr2vr_h(a: i32) -> m128i {
-    unsafe { transmute(__lsx_vreplgr2vr_h(transmute(a))) }
+pub fn lsx_vpickod_h(a: m128i, b: m128i) -> m128i {
+    unsafe { transmute(__lsx_vpickod_h(transmute(a), transmute(b))) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vreplgr2vr_w(a: i32) -> m128i {
-    unsafe { transmute(__lsx_vreplgr2vr_w(transmute(a))) }
+pub fn lsx_vpickod_w(a: m128i, b: m128i) -> m128i {
+    unsafe { transmute(__lsx_vpickod_w(transmute(a), transmute(b))) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vreplgr2vr_d(a: i64) -> m128i {
-    unsafe { transmute(__lsx_vreplgr2vr_d(transmute(a))) }
+pub fn lsx_vpickod_d(a: m128i, b: m128i) -> m128i {
+    unsafe { transmute(__lsx_vpickod_d(transmute(a), transmute(b))) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vpcnt_b(a: m128i) -> m128i {
-    unsafe { transmute(__lsx_vpcnt_b(transmute(a))) }
+pub fn lsx_vilvh_b(a: m128i, b: m128i) -> m128i {
+    unsafe { transmute(__lsx_vilvh_b(transmute(a), transmute(b))) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vpcnt_h(a: m128i) -> m128i {
-    unsafe { transmute(__lsx_vpcnt_h(transmute(a))) }
+pub fn lsx_vilvh_h(a: m128i, b: m128i) -> m128i {
+    unsafe { transmute(__lsx_vilvh_h(transmute(a), transmute(b))) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vpcnt_w(a: m128i) -> m128i {
-    unsafe { transmute(__lsx_vpcnt_w(transmute(a))) }
+pub fn lsx_vilvh_w(a: m128i, b: m128i) -> m128i {
+    unsafe { transmute(__lsx_vilvh_w(transmute(a), transmute(b))) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vpcnt_d(a: m128i) -> m128i {
-    unsafe { transmute(__lsx_vpcnt_d(transmute(a))) }
+pub fn lsx_vilvh_d(a: m128i, b: m128i) -> m128i {
+    unsafe { transmute(__lsx_vilvh_d(transmute(a), transmute(b))) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vclo_b(a: m128i) -> m128i {
-    unsafe { transmute(__lsx_vclo_b(transmute(a))) }
+pub fn lsx_vilvl_b(a: m128i, b: m128i) -> m128i {
+    unsafe { transmute(__lsx_vilvl_b(transmute(a), transmute(b))) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vclo_h(a: m128i) -> m128i {
-    unsafe { transmute(__lsx_vclo_h(transmute(a))) }
+pub fn lsx_vilvl_h(a: m128i, b: m128i) -> m128i {
+    unsafe { transmute(__lsx_vilvl_h(transmute(a), transmute(b))) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vclo_w(a: m128i) -> m128i {
-    unsafe { transmute(__lsx_vclo_w(transmute(a))) }
+pub fn lsx_vilvl_w(a: m128i, b: m128i) -> m128i {
+    unsafe { transmute(__lsx_vilvl_w(transmute(a), transmute(b))) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vclo_d(a: m128i) -> m128i {
-    unsafe { transmute(__lsx_vclo_d(transmute(a))) }
+pub fn lsx_vilvl_d(a: m128i, b: m128i) -> m128i {
+    unsafe { transmute(__lsx_vilvl_d(transmute(a), transmute(b))) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vclz_b(a: m128i) -> m128i {
-    unsafe { transmute(__lsx_vclz_b(transmute(a))) }
+pub fn lsx_vpackev_b(a: m128i, b: m128i) -> m128i {
+    unsafe { transmute(__lsx_vpackev_b(transmute(a), transmute(b))) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vclz_h(a: m128i) -> m128i {
-    unsafe { transmute(__lsx_vclz_h(transmute(a))) }
+pub fn lsx_vpackev_h(a: m128i, b: m128i) -> m128i {
+    unsafe { transmute(__lsx_vpackev_h(transmute(a), transmute(b))) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vclz_w(a: m128i) -> m128i {
-    unsafe { transmute(__lsx_vclz_w(transmute(a))) }
+pub fn lsx_vpackev_w(a: m128i, b: m128i) -> m128i {
+    unsafe { transmute(__lsx_vpackev_w(transmute(a), transmute(b))) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vclz_d(a: m128i) -> m128i {
-    unsafe { transmute(__lsx_vclz_d(transmute(a))) }
+pub fn lsx_vpackev_d(a: m128i, b: m128i) -> m128i {
+    unsafe { transmute(__lsx_vpackev_d(transmute(a), transmute(b))) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
-#[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vpickve2gr_b<const IMM4: u32>(a: m128i) -> i32 {
-    static_assert_uimm_bits!(IMM4, 4);
-    unsafe { transmute(__lsx_vpickve2gr_b(transmute(a), IMM4)) }
+pub fn lsx_vpackod_b(a: m128i, b: m128i) -> m128i {
+    unsafe { transmute(__lsx_vpackod_b(transmute(a), transmute(b))) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
-#[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vpickve2gr_h<const IMM3: u32>(a: m128i) -> i32 {
-    static_assert_uimm_bits!(IMM3, 3);
-    unsafe { transmute(__lsx_vpickve2gr_h(transmute(a), IMM3)) }
+pub fn lsx_vpackod_h(a: m128i, b: m128i) -> m128i {
+    unsafe { transmute(__lsx_vpackod_h(transmute(a), transmute(b))) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
-#[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vpickve2gr_w<const IMM2: u32>(a: m128i) -> i32 {
-    static_assert_uimm_bits!(IMM2, 2);
-    unsafe { transmute(__lsx_vpickve2gr_w(transmute(a), IMM2)) }
+pub fn lsx_vpackod_w(a: m128i, b: m128i) -> m128i {
+    unsafe { transmute(__lsx_vpackod_w(transmute(a), transmute(b))) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
-#[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vpickve2gr_d<const IMM1: u32>(a: m128i) -> i64 {
-    static_assert_uimm_bits!(IMM1, 1);
-    unsafe { transmute(__lsx_vpickve2gr_d(transmute(a), IMM1)) }
+pub fn lsx_vpackod_d(a: m128i, b: m128i) -> m128i {
+    unsafe { transmute(__lsx_vpackod_d(transmute(a), transmute(b))) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
-#[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vpickve2gr_bu<const IMM4: u32>(a: m128i) -> u32 {
-    static_assert_uimm_bits!(IMM4, 4);
-    unsafe { transmute(__lsx_vpickve2gr_bu(transmute(a), IMM4)) }
+pub fn lsx_vshuf_h(a: m128i, b: m128i, c: m128i) -> m128i {
+    unsafe { transmute(__lsx_vshuf_h(transmute(a), transmute(b), transmute(c))) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
-#[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vpickve2gr_hu<const IMM3: u32>(a: m128i) -> u32 {
-    static_assert_uimm_bits!(IMM3, 3);
-    unsafe { transmute(__lsx_vpickve2gr_hu(transmute(a), IMM3)) }
+pub fn lsx_vshuf_w(a: m128i, b: m128i, c: m128i) -> m128i {
+    unsafe { transmute(__lsx_vshuf_w(transmute(a), transmute(b), transmute(c))) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
-#[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vpickve2gr_wu<const IMM2: u32>(a: m128i) -> u32 {
-    static_assert_uimm_bits!(IMM2, 2);
-    unsafe { transmute(__lsx_vpickve2gr_wu(transmute(a), IMM2)) }
+pub fn lsx_vshuf_d(a: m128i, b: m128i, c: m128i) -> m128i {
+    unsafe { transmute(__lsx_vshuf_d(transmute(a), transmute(b), transmute(c))) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vpickve2gr_du<const IMM1: u32>(a: m128i) -> u64 {
-    static_assert_uimm_bits!(IMM1, 1);
-    unsafe { transmute(__lsx_vpickve2gr_du(transmute(a), IMM1)) }
+pub fn lsx_vandi_b<const IMM8: u32>(a: m128i) -> m128i {
+    static_assert_uimm_bits!(IMM8, 8);
+    unsafe { transmute(__lsx_vandi_b(transmute(a), IMM8)) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
-#[rustc_legacy_const_generics(2)]
+#[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vinsgr2vr_b<const IMM4: u32>(a: m128i, b: i32) -> m128i {
-    static_assert_uimm_bits!(IMM4, 4);
-    unsafe { transmute(__lsx_vinsgr2vr_b(transmute(a), transmute(b), IMM4)) }
+pub fn lsx_vori_b<const IMM8: u32>(a: m128i) -> m128i {
+    static_assert_uimm_bits!(IMM8, 8);
+    unsafe { transmute(__lsx_vori_b(transmute(a), IMM8)) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
-#[rustc_legacy_const_generics(2)]
+#[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vinsgr2vr_h<const IMM3: u32>(a: m128i, b: i32) -> m128i {
-    static_assert_uimm_bits!(IMM3, 3);
-    unsafe { transmute(__lsx_vinsgr2vr_h(transmute(a), transmute(b), IMM3)) }
+pub fn lsx_vnori_b<const IMM8: u32>(a: m128i) -> m128i {
+    static_assert_uimm_bits!(IMM8, 8);
+    unsafe { transmute(__lsx_vnori_b(transmute(a), IMM8)) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
-#[rustc_legacy_const_generics(2)]
+#[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vinsgr2vr_w<const IMM2: u32>(a: m128i, b: i32) -> m128i {
-    static_assert_uimm_bits!(IMM2, 2);
-    unsafe { transmute(__lsx_vinsgr2vr_w(transmute(a), transmute(b), IMM2)) }
+pub fn lsx_vxori_b<const IMM8: u32>(a: m128i) -> m128i {
+    static_assert_uimm_bits!(IMM8, 8);
+    unsafe { transmute(__lsx_vxori_b(transmute(a), IMM8)) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
-#[rustc_legacy_const_generics(2)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vinsgr2vr_d<const IMM1: u32>(a: m128i, b: i64) -> m128i {
-    static_assert_uimm_bits!(IMM1, 1);
-    unsafe { transmute(__lsx_vinsgr2vr_d(transmute(a), transmute(b), IMM1)) }
+pub fn lsx_vbitsel_v(a: m128i, b: m128i, c: m128i) -> m128i {
+    unsafe { transmute(__lsx_vbitsel_v(transmute(a), transmute(b), transmute(c))) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
+#[rustc_legacy_const_generics(2)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vfadd_s(a: m128, b: m128) -> m128 {
-    unsafe { transmute(__lsx_vfadd_s(transmute(a), transmute(b))) }
+pub fn lsx_vbitseli_b<const IMM8: u32>(a: m128i, b: m128i) -> m128i {
+    static_assert_uimm_bits!(IMM8, 8);
+    unsafe { transmute(__lsx_vbitseli_b(transmute(a), transmute(b), IMM8)) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
+#[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vfadd_d(a: m128d, b: m128d) -> m128d {
-    unsafe { transmute(__lsx_vfadd_d(transmute(a), transmute(b))) }
+pub fn lsx_vshuf4i_b<const IMM8: u32>(a: m128i) -> m128i {
+    static_assert_uimm_bits!(IMM8, 8);
+    unsafe { transmute(__lsx_vshuf4i_b(transmute(a), IMM8)) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
+#[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vfsub_s(a: m128, b: m128) -> m128 {
-    unsafe { transmute(__lsx_vfsub_s(transmute(a), transmute(b))) }
+pub fn lsx_vshuf4i_h<const IMM8: u32>(a: m128i) -> m128i {
+    static_assert_uimm_bits!(IMM8, 8);
+    unsafe { transmute(__lsx_vshuf4i_h(transmute(a), IMM8)) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
+#[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vfsub_d(a: m128d, b: m128d) -> m128d {
-    unsafe { transmute(__lsx_vfsub_d(transmute(a), transmute(b))) }
+pub fn lsx_vshuf4i_w<const IMM8: u32>(a: m128i) -> m128i {
+    static_assert_uimm_bits!(IMM8, 8);
+    unsafe { transmute(__lsx_vshuf4i_w(transmute(a), IMM8)) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vfmul_s(a: m128, b: m128) -> m128 {
-    unsafe { transmute(__lsx_vfmul_s(transmute(a), transmute(b))) }
+pub fn lsx_vclo_b(a: m128i) -> m128i {
+    unsafe { transmute(__lsx_vclo_b(transmute(a))) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vfmul_d(a: m128d, b: m128d) -> m128d {
-    unsafe { transmute(__lsx_vfmul_d(transmute(a), transmute(b))) }
+pub fn lsx_vclo_h(a: m128i) -> m128i {
+    unsafe { transmute(__lsx_vclo_h(transmute(a))) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vfdiv_s(a: m128, b: m128) -> m128 {
-    unsafe { transmute(__lsx_vfdiv_s(transmute(a), transmute(b))) }
+pub fn lsx_vclo_w(a: m128i) -> m128i {
+    unsafe { transmute(__lsx_vclo_w(transmute(a))) }
 }
 
 #[inline]
 #[target_feature(enable = "lsx")]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vfdiv_d(a: m128d, b: m128d) -> m128d {
-    unsafe { transmute(__lsx_vfdiv_d(transmute(a), transmute(b))) }
+pub fn lsx_vclo_d(a: m128i) -> m128i {
+    unsafe { transmute(__lsx_vclo_d(transmute(a))) }
 }
 
 #[inline]
@@ -4049,20 +1973,6 @@ pub fn lsx_vfclass_d(a: m128d) -> m128i {
     unsafe { transmute(__lsx_vfclass_d(transmute(a))) }
 }
 
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vfsqrt_s(a: m128) -> m128 {
-    unsafe { transmute(__lsx_vfsqrt_s(transmute(a))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vfsqrt_d(a: m128d) -> m128d {
-    unsafe { transmute(__lsx_vfsqrt_d(transmute(a))) }
-}
-
 #[inline]
 #[target_feature(enable = "lsx")]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
@@ -4259,41 +2169,6 @@ pub fn lsx_vffint_d_lu(a: m128i) -> m128d {
     unsafe { transmute(__lsx_vffint_d_lu(transmute(a))) }
 }
 
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vandn_v(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vandn_v(transmute(a), transmute(b))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vneg_b(a: m128i) -> m128i {
-    unsafe { transmute(__lsx_vneg_b(transmute(a))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vneg_h(a: m128i) -> m128i {
-    unsafe { transmute(__lsx_vneg_h(transmute(a))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vneg_w(a: m128i) -> m128i {
-    unsafe { transmute(__lsx_vneg_w(transmute(a))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vneg_d(a: m128i) -> m128i {
-    unsafe { transmute(__lsx_vneg_d(transmute(a))) }
-}
-
 #[inline]
 #[target_feature(enable = "lsx")]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
@@ -4765,62 +2640,6 @@ pub fn lsx_vsigncov_d(a: m128i, b: m128i) -> m128i {
     unsafe { transmute(__lsx_vsigncov_d(transmute(a), transmute(b))) }
 }
 
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vfmadd_s(a: m128, b: m128, c: m128) -> m128 {
-    unsafe { transmute(__lsx_vfmadd_s(transmute(a), transmute(b), transmute(c))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vfmadd_d(a: m128d, b: m128d, c: m128d) -> m128d {
-    unsafe { transmute(__lsx_vfmadd_d(transmute(a), transmute(b), transmute(c))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vfmsub_s(a: m128, b: m128, c: m128) -> m128 {
-    unsafe { transmute(__lsx_vfmsub_s(transmute(a), transmute(b), transmute(c))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vfmsub_d(a: m128d, b: m128d, c: m128d) -> m128d {
-    unsafe { transmute(__lsx_vfmsub_d(transmute(a), transmute(b), transmute(c))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vfnmadd_s(a: m128, b: m128, c: m128) -> m128 {
-    unsafe { transmute(__lsx_vfnmadd_s(transmute(a), transmute(b), transmute(c))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vfnmadd_d(a: m128d, b: m128d, c: m128d) -> m128d {
-    unsafe { transmute(__lsx_vfnmadd_d(transmute(a), transmute(b), transmute(c))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vfnmsub_s(a: m128, b: m128, c: m128) -> m128 {
-    unsafe { transmute(__lsx_vfnmsub_s(transmute(a), transmute(b), transmute(c))) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vfnmsub_d(a: m128d, b: m128d, c: m128d) -> m128d {
-    unsafe { transmute(__lsx_vfnmsub_d(transmute(a), transmute(b), transmute(c))) }
-}
-
 #[inline]
 #[target_feature(enable = "lsx")]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
@@ -5052,7 +2871,7 @@ pub fn lsx_vfrintrm_d(a: m128d) -> m128d {
 pub unsafe fn lsx_vstelm_b<const IMM_S8: i32, const IMM4: u32>(a: m128i, mem_addr: *mut i8) {
     static_assert_simm_bits!(IMM_S8, 8);
     static_assert_uimm_bits!(IMM4, 4);
-    transmute(__lsx_vstelm_b(transmute(a), mem_addr, IMM_S8, IMM4))
+    __lsx_vstelm_b(transmute(a), mem_addr, IMM_S8, IMM4)
 }
 
 #[inline]
@@ -5062,7 +2881,7 @@ pub unsafe fn lsx_vstelm_b<const IMM_S8: i32, const IMM4: u32>(a: m128i, mem_add
 pub unsafe fn lsx_vstelm_h<const IMM_S8: i32, const IMM3: u32>(a: m128i, mem_addr: *mut i8) {
     static_assert_simm_bits!(IMM_S8, 8);
     static_assert_uimm_bits!(IMM3, 3);
-    transmute(__lsx_vstelm_h(transmute(a), mem_addr, IMM_S8, IMM3))
+    __lsx_vstelm_h(transmute(a), mem_addr, IMM_S8, IMM3)
 }
 
 #[inline]
@@ -5072,7 +2891,7 @@ pub unsafe fn lsx_vstelm_h<const IMM_S8: i32, const IMM3: u32>(a: m128i, mem_add
 pub unsafe fn lsx_vstelm_w<const IMM_S8: i32, const IMM2: u32>(a: m128i, mem_addr: *mut i8) {
     static_assert_simm_bits!(IMM_S8, 8);
     static_assert_uimm_bits!(IMM2, 2);
-    transmute(__lsx_vstelm_w(transmute(a), mem_addr, IMM_S8, IMM2))
+    __lsx_vstelm_w(transmute(a), mem_addr, IMM_S8, IMM2)
 }
 
 #[inline]
@@ -5082,7 +2901,7 @@ pub unsafe fn lsx_vstelm_w<const IMM_S8: i32, const IMM2: u32>(a: m128i, mem_add
 pub unsafe fn lsx_vstelm_d<const IMM_S8: i32, const IMM1: u32>(a: m128i, mem_addr: *mut i8) {
     static_assert_simm_bits!(IMM_S8, 8);
     static_assert_uimm_bits!(IMM1, 1);
-    transmute(__lsx_vstelm_d(transmute(a), mem_addr, IMM_S8, IMM1))
+    __lsx_vstelm_d(transmute(a), mem_addr, IMM_S8, IMM1)
 }
 
 #[inline]
@@ -6376,7 +4195,7 @@ pub unsafe fn lsx_vld<const IMM_S12: i32>(mem_addr: *const i8) -> m128i {
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
 pub unsafe fn lsx_vst<const IMM_S12: i32>(a: m128i, mem_addr: *mut i8) {
     static_assert_simm_bits!(IMM_S12, 12);
-    transmute(__lsx_vst(transmute(a), mem_addr, IMM_S12))
+    __lsx_vst(transmute(a), mem_addr, IMM_S12)
 }
 
 #[inline]
@@ -6421,13 +4240,6 @@ pub fn lsx_vssrln_w_d(a: m128i, b: m128i) -> m128i {
     unsafe { transmute(__lsx_vssrln_w_d(transmute(a), transmute(b))) }
 }
 
-#[inline]
-#[target_feature(enable = "lsx")]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vorn_v(a: m128i, b: m128i) -> m128i {
-    unsafe { transmute(__lsx_vorn_v(transmute(a), transmute(b))) }
-}
-
 #[inline]
 #[target_feature(enable = "lsx")]
 #[rustc_legacy_const_generics(0)]
@@ -6455,7 +4267,7 @@ pub unsafe fn lsx_vldx(mem_addr: *const i8, b: i64) -> m128i {
 #[target_feature(enable = "lsx")]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
 pub unsafe fn lsx_vstx(a: m128i, mem_addr: *mut i8, b: i64) {
-    transmute(__lsx_vstx(transmute(a), mem_addr, transmute(b)))
+    __lsx_vstx(transmute(a), mem_addr, transmute(b))
 }
 
 #[inline]
@@ -6842,39 +4654,3 @@ pub fn lsx_vfcmp_sune_s(a: m128, b: m128) -> m128i {
 pub fn lsx_vfcmp_sun_s(a: m128, b: m128) -> m128i {
     unsafe { transmute(__lsx_vfcmp_sun_s(transmute(a), transmute(b))) }
 }
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[rustc_legacy_const_generics(0)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vrepli_b<const IMM_S10: i32>() -> m128i {
-    static_assert_simm_bits!(IMM_S10, 10);
-    unsafe { transmute(__lsx_vrepli_b(IMM_S10)) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[rustc_legacy_const_generics(0)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vrepli_d<const IMM_S10: i32>() -> m128i {
-    static_assert_simm_bits!(IMM_S10, 10);
-    unsafe { transmute(__lsx_vrepli_d(IMM_S10)) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[rustc_legacy_const_generics(0)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vrepli_h<const IMM_S10: i32>() -> m128i {
-    static_assert_simm_bits!(IMM_S10, 10);
-    unsafe { transmute(__lsx_vrepli_h(IMM_S10)) }
-}
-
-#[inline]
-#[target_feature(enable = "lsx")]
-#[rustc_legacy_const_generics(0)]
-#[unstable(feature = "stdarch_loongarch", issue = "117427")]
-pub fn lsx_vrepli_w<const IMM_S10: i32>() -> m128i {
-    static_assert_simm_bits!(IMM_S10, 10);
-    unsafe { transmute(__lsx_vrepli_w(IMM_S10)) }
-}
diff --git a/crates/core_arch/src/loongarch64/lsx/mod.rs b/crates/core_arch/src/loongarch64/lsx/mod.rs
index 67a08985a9..0d353746ea 100644
--- a/crates/core_arch/src/loongarch64/lsx/mod.rs
+++ b/crates/core_arch/src/loongarch64/lsx/mod.rs
@@ -16,6 +16,13 @@ mod generated;
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
 pub use self::generated::*;
 
+#[rustfmt::skip]
+mod portable;
+
+#[rustfmt::skip]
+#[unstable(feature = "stdarch_loongarch", issue = "117427")]
+pub use self::portable::*;
+
 #[rustfmt::skip]
 #[cfg(test)]
 mod tests;
diff --git a/crates/core_arch/src/loongarch64/lsx/portable.rs b/crates/core_arch/src/loongarch64/lsx/portable.rs
new file mode 100644
index 0000000000..16f0058235
--- /dev/null
+++ b/crates/core_arch/src/loongarch64/lsx/portable.rs
@@ -0,0 +1,247 @@
+//! LoongArch64 LSX intrinsics - intrinsics::simd implementation
+
+use super::super::{simd as ls, simd::*, *};
+use crate::core_arch::simd::{self as cs, *};
+use crate::intrinsics::simd as is;
+use crate::mem::transmute;
+
+impl_vv!("lsx", lsx_vpcnt_b, is::simd_ctpop, m128i, i8x16);
+impl_vv!("lsx", lsx_vpcnt_h, is::simd_ctpop, m128i, i16x8);
+impl_vv!("lsx", lsx_vpcnt_w, is::simd_ctpop, m128i, i32x4);
+impl_vv!("lsx", lsx_vpcnt_d, is::simd_ctpop, m128i, i64x2);
+impl_vv!("lsx", lsx_vclz_b, is::simd_ctlz, m128i, i8x16);
+impl_vv!("lsx", lsx_vclz_h, is::simd_ctlz, m128i, i16x8);
+impl_vv!("lsx", lsx_vclz_w, is::simd_ctlz, m128i, i32x4);
+impl_vv!("lsx", lsx_vclz_d, is::simd_ctlz, m128i, i64x2);
+impl_vv!("lsx", lsx_vneg_b, is::simd_neg, m128i, i8x16);
+impl_vv!("lsx", lsx_vneg_h, is::simd_neg, m128i, i16x8);
+impl_vv!("lsx", lsx_vneg_w, is::simd_neg, m128i, i32x4);
+impl_vv!("lsx", lsx_vneg_d, is::simd_neg, m128i, i64x2);
+impl_vv!("lsx", lsx_vfsqrt_s, is::simd_fsqrt, m128, f32x4);
+impl_vv!("lsx", lsx_vfsqrt_d, is::simd_fsqrt, m128d, f64x2);
+
+impl_gv!("lsx", lsx_vreplgr2vr_b, ls::simd_splat, m128i, i8x16, i32);
+impl_gv!("lsx", lsx_vreplgr2vr_h, ls::simd_splat, m128i, i16x8, i32);
+impl_gv!("lsx", lsx_vreplgr2vr_w, ls::simd_splat, m128i, i32x4, i32);
+impl_gv!("lsx", lsx_vreplgr2vr_d, ls::simd_splat, m128i, i64x2, i64);
+
+impl_sv!("lsx", lsx_vrepli_b, ls::simd_splat, m128i, i8x16, 10);
+impl_sv!("lsx", lsx_vrepli_h, ls::simd_splat, m128i, i16x8, 10);
+impl_sv!("lsx", lsx_vrepli_w, ls::simd_splat, m128i, i32x4, 10);
+impl_sv!("lsx", lsx_vrepli_d, ls::simd_splat, m128i, i64x2, 10);
+
+impl_vvv!("lsx", lsx_vadd_b, is::simd_add, m128i, i8x16);
+impl_vvv!("lsx", lsx_vadd_h, is::simd_add, m128i, i16x8);
+impl_vvv!("lsx", lsx_vadd_w, is::simd_add, m128i, i32x4);
+impl_vvv!("lsx", lsx_vadd_d, is::simd_add, m128i, i64x2);
+impl_vvv!("lsx", lsx_vsub_b, is::simd_sub, m128i, i8x16);
+impl_vvv!("lsx", lsx_vsub_h, is::simd_sub, m128i, i16x8);
+impl_vvv!("lsx", lsx_vsub_w, is::simd_sub, m128i, i32x4);
+impl_vvv!("lsx", lsx_vsub_d, is::simd_sub, m128i, i64x2);
+impl_vvv!("lsx", lsx_vmax_b, cs::simd_imax, m128i, i8x16);
+impl_vvv!("lsx", lsx_vmax_h, cs::simd_imax, m128i, i16x8);
+impl_vvv!("lsx", lsx_vmax_w, cs::simd_imax, m128i, i32x4);
+impl_vvv!("lsx", lsx_vmax_d, cs::simd_imax, m128i, i64x2);
+impl_vvv!("lsx", lsx_vmax_bu, cs::simd_imax, m128i, u8x16);
+impl_vvv!("lsx", lsx_vmax_hu, cs::simd_imax, m128i, u16x8);
+impl_vvv!("lsx", lsx_vmax_wu, cs::simd_imax, m128i, u32x4);
+impl_vvv!("lsx", lsx_vmax_du, cs::simd_imax, m128i, u64x2);
+impl_vvv!("lsx", lsx_vmin_b, cs::simd_imin, m128i, i8x16);
+impl_vvv!("lsx", lsx_vmin_h, cs::simd_imin, m128i, i16x8);
+impl_vvv!("lsx", lsx_vmin_w, cs::simd_imin, m128i, i32x4);
+impl_vvv!("lsx", lsx_vmin_d, cs::simd_imin, m128i, i64x2);
+impl_vvv!("lsx", lsx_vmin_bu, cs::simd_imin, m128i, u8x16);
+impl_vvv!("lsx", lsx_vmin_hu, cs::simd_imin, m128i, u16x8);
+impl_vvv!("lsx", lsx_vmin_wu, cs::simd_imin, m128i, u32x4);
+impl_vvv!("lsx", lsx_vmin_du, cs::simd_imin, m128i, u64x2);
+impl_vvv!("lsx", lsx_vseq_b, is::simd_eq, m128i, i8x16);
+impl_vvv!("lsx", lsx_vseq_h, is::simd_eq, m128i, i16x8);
+impl_vvv!("lsx", lsx_vseq_w, is::simd_eq, m128i, i32x4);
+impl_vvv!("lsx", lsx_vseq_d, is::simd_eq, m128i, i64x2);
+impl_vvv!("lsx", lsx_vslt_b, is::simd_lt, m128i, i8x16);
+impl_vvv!("lsx", lsx_vslt_h, is::simd_lt, m128i, i16x8);
+impl_vvv!("lsx", lsx_vslt_w, is::simd_lt, m128i, i32x4);
+impl_vvv!("lsx", lsx_vslt_d, is::simd_lt, m128i, i64x2);
+impl_vvv!("lsx", lsx_vslt_bu, is::simd_lt, m128i, u8x16);
+impl_vvv!("lsx", lsx_vslt_hu, is::simd_lt, m128i, u16x8);
+impl_vvv!("lsx", lsx_vslt_wu, is::simd_lt, m128i, u32x4);
+impl_vvv!("lsx", lsx_vslt_du, is::simd_lt, m128i, u64x2);
+impl_vvv!("lsx", lsx_vsle_b, is::simd_le, m128i, i8x16);
+impl_vvv!("lsx", lsx_vsle_h, is::simd_le, m128i, i16x8);
+impl_vvv!("lsx", lsx_vsle_w, is::simd_le, m128i, i32x4);
+impl_vvv!("lsx", lsx_vsle_d, is::simd_le, m128i, i64x2);
+impl_vvv!("lsx", lsx_vsle_bu, is::simd_le, m128i, u8x16);
+impl_vvv!("lsx", lsx_vsle_hu, is::simd_le, m128i, u16x8);
+impl_vvv!("lsx", lsx_vsle_wu, is::simd_le, m128i, u32x4);
+impl_vvv!("lsx", lsx_vsle_du, is::simd_le, m128i, u64x2);
+impl_vvv!("lsx", lsx_vmul_b, is::simd_mul, m128i, i8x16);
+impl_vvv!("lsx", lsx_vmul_h, is::simd_mul, m128i, i16x8);
+impl_vvv!("lsx", lsx_vmul_w, is::simd_mul, m128i, i32x4);
+impl_vvv!("lsx", lsx_vmul_d, is::simd_mul, m128i, i64x2);
+impl_vvv!("lsx", lsx_vdiv_b, is::simd_div, m128i, i8x16);
+impl_vvv!("lsx", lsx_vdiv_h, is::simd_div, m128i, i16x8);
+impl_vvv!("lsx", lsx_vdiv_w, is::simd_div, m128i, i32x4);
+impl_vvv!("lsx", lsx_vdiv_d, is::simd_div, m128i, i64x2);
+impl_vvv!("lsx", lsx_vdiv_bu, is::simd_div, m128i, u8x16);
+impl_vvv!("lsx", lsx_vdiv_hu, is::simd_div, m128i, u16x8);
+impl_vvv!("lsx", lsx_vdiv_wu, is::simd_div, m128i, u32x4);
+impl_vvv!("lsx", lsx_vdiv_du, is::simd_div, m128i, u64x2);
+impl_vvv!("lsx", lsx_vmod_b, is::simd_rem, m128i, i8x16);
+impl_vvv!("lsx", lsx_vmod_h, is::simd_rem, m128i, i16x8);
+impl_vvv!("lsx", lsx_vmod_w, is::simd_rem, m128i, i32x4);
+impl_vvv!("lsx", lsx_vmod_d, is::simd_rem, m128i, i64x2);
+impl_vvv!("lsx", lsx_vmod_bu, is::simd_rem, m128i, u8x16);
+impl_vvv!("lsx", lsx_vmod_hu, is::simd_rem, m128i, u16x8);
+impl_vvv!("lsx", lsx_vmod_wu, is::simd_rem, m128i, u32x4);
+impl_vvv!("lsx", lsx_vmod_du, is::simd_rem, m128i, u64x2);
+impl_vvv!("lsx", lsx_vand_v, is::simd_and, m128i, u8x16);
+impl_vvv!("lsx", lsx_vandn_v, ls::simd_andn, m128i, u8x16);
+impl_vvv!("lsx", lsx_vor_v, is::simd_or, m128i, u8x16);
+impl_vvv!("lsx", lsx_vorn_v, ls::simd_orn, m128i, u8x16);
+impl_vvv!("lsx", lsx_vnor_v, ls::simd_nor, m128i, u8x16);
+impl_vvv!("lsx", lsx_vxor_v, is::simd_xor, m128i, u8x16);
+impl_vvv!("lsx", lsx_vfadd_s, is::simd_add, m128, f32x4);
+impl_vvv!("lsx", lsx_vfadd_d, is::simd_add, m128d, f64x2);
+impl_vvv!("lsx", lsx_vfsub_s, is::simd_sub, m128, f32x4);
+impl_vvv!("lsx", lsx_vfsub_d, is::simd_sub, m128d, f64x2);
+impl_vvv!("lsx", lsx_vfmul_s, is::simd_mul, m128, f32x4);
+impl_vvv!("lsx", lsx_vfmul_d, is::simd_mul, m128d, f64x2);
+impl_vvv!("lsx", lsx_vfdiv_s, is::simd_div, m128, f32x4);
+impl_vvv!("lsx", lsx_vfdiv_d, is::simd_div, m128d, f64x2);
+impl_vvv!("lsx", lsx_vsll_b, ls::simd_shl, m128i, i8x16);
+impl_vvv!("lsx", lsx_vsll_h, ls::simd_shl, m128i, i16x8);
+impl_vvv!("lsx", lsx_vsll_w, ls::simd_shl, m128i, i32x4);
+impl_vvv!("lsx", lsx_vsll_d, ls::simd_shl, m128i, i64x2);
+impl_vvv!("lsx", lsx_vsra_b, ls::simd_shr, m128i, i8x16);
+impl_vvv!("lsx", lsx_vsra_h, ls::simd_shr, m128i, i16x8);
+impl_vvv!("lsx", lsx_vsra_w, ls::simd_shr, m128i, i32x4);
+impl_vvv!("lsx", lsx_vsra_d, ls::simd_shr, m128i, i64x2);
+impl_vvv!("lsx", lsx_vsrl_b, ls::simd_shr, m128i, u8x16);
+impl_vvv!("lsx", lsx_vsrl_h, ls::simd_shr, m128i, u16x8);
+impl_vvv!("lsx", lsx_vsrl_w, ls::simd_shr, m128i, u32x4);
+impl_vvv!("lsx", lsx_vsrl_d, ls::simd_shr, m128i, u64x2);
+impl_vvv!("lsx", lsx_vbitclr_b, ls::simd_bitclr, m128i, u8x16);
+impl_vvv!("lsx", lsx_vbitclr_h, ls::simd_bitclr, m128i, u16x8);
+impl_vvv!("lsx", lsx_vbitclr_w, ls::simd_bitclr, m128i, u32x4);
+impl_vvv!("lsx", lsx_vbitclr_d, ls::simd_bitclr, m128i, u64x2);
+impl_vvv!("lsx", lsx_vbitset_b, ls::simd_bitset, m128i, u8x16);
+impl_vvv!("lsx", lsx_vbitset_h, ls::simd_bitset, m128i, u16x8);
+impl_vvv!("lsx", lsx_vbitset_w, ls::simd_bitset, m128i, u32x4);
+impl_vvv!("lsx", lsx_vbitset_d, ls::simd_bitset, m128i, u64x2);
+impl_vvv!("lsx", lsx_vbitrev_b, ls::simd_bitrev, m128i, u8x16);
+impl_vvv!("lsx", lsx_vbitrev_h, ls::simd_bitrev, m128i, u16x8);
+impl_vvv!("lsx", lsx_vbitrev_w, ls::simd_bitrev, m128i, u32x4);
+impl_vvv!("lsx", lsx_vbitrev_d, ls::simd_bitrev, m128i, u64x2);
+impl_vvv!("lsx", lsx_vsadd_b, is::simd_saturating_add, m128i, i8x16);
+impl_vvv!("lsx", lsx_vsadd_h, is::simd_saturating_add, m128i, i16x8);
+impl_vvv!("lsx", lsx_vsadd_w, is::simd_saturating_add, m128i, i32x4);
+impl_vvv!("lsx", lsx_vsadd_d, is::simd_saturating_add, m128i, i64x2);
+impl_vvv!("lsx", lsx_vsadd_bu, is::simd_saturating_add, m128i, u8x16);
+impl_vvv!("lsx", lsx_vsadd_hu, is::simd_saturating_add, m128i, u16x8);
+impl_vvv!("lsx", lsx_vsadd_wu, is::simd_saturating_add, m128i, u32x4);
+impl_vvv!("lsx", lsx_vsadd_du, is::simd_saturating_add, m128i, u64x2);
+impl_vvv!("lsx", lsx_vssub_b, is::simd_saturating_sub, m128i, i8x16);
+impl_vvv!("lsx", lsx_vssub_h, is::simd_saturating_sub, m128i, i16x8);
+impl_vvv!("lsx", lsx_vssub_w, is::simd_saturating_sub, m128i, i32x4);
+impl_vvv!("lsx", lsx_vssub_d, is::simd_saturating_sub, m128i, i64x2);
+impl_vvv!("lsx", lsx_vssub_bu, is::simd_saturating_sub, m128i, u8x16);
+impl_vvv!("lsx", lsx_vssub_hu, is::simd_saturating_sub, m128i, u16x8);
+impl_vvv!("lsx", lsx_vssub_wu, is::simd_saturating_sub, m128i, u32x4);
+impl_vvv!("lsx", lsx_vssub_du, is::simd_saturating_sub, m128i, u64x2);
+impl_vvv!("lsx", lsx_vadda_b, ls::simd_adda, m128i, i8x16);
+impl_vvv!("lsx", lsx_vadda_h, ls::simd_adda, m128i, i16x8);
+impl_vvv!("lsx", lsx_vadda_w, ls::simd_adda, m128i, i32x4);
+impl_vvv!("lsx", lsx_vadda_d, ls::simd_adda, m128i, i64x2);
+impl_vvv!("lsx", lsx_vabsd_b, ls::simd_absd, m128i, i8x16);
+impl_vvv!("lsx", lsx_vabsd_h, ls::simd_absd, m128i, i16x8);
+impl_vvv!("lsx", lsx_vabsd_w, ls::simd_absd, m128i, i32x4);
+impl_vvv!("lsx", lsx_vabsd_d, ls::simd_absd, m128i, i64x2);
+impl_vvv!("lsx", lsx_vabsd_bu, ls::simd_absd, m128i, u8x16);
+impl_vvv!("lsx", lsx_vabsd_hu, ls::simd_absd, m128i, u16x8);
+impl_vvv!("lsx", lsx_vabsd_wu, ls::simd_absd, m128i, u32x4);
+impl_vvv!("lsx", lsx_vabsd_du, ls::simd_absd, m128i, u64x2);
+
+impl_vuv!("lsx", lsx_vslli_b, is::simd_shl, m128i, i8x16);
+impl_vuv!("lsx", lsx_vslli_h, is::simd_shl, m128i, i16x8);
+impl_vuv!("lsx", lsx_vslli_w, is::simd_shl, m128i, i32x4);
+impl_vuv!("lsx", lsx_vslli_d, is::simd_shl, m128i, i64x2);
+impl_vuv!("lsx", lsx_vsrai_b, is::simd_shr, m128i, i8x16);
+impl_vuv!("lsx", lsx_vsrai_h, is::simd_shr, m128i, i16x8);
+impl_vuv!("lsx", lsx_vsrai_w, is::simd_shr, m128i, i32x4);
+impl_vuv!("lsx", lsx_vsrai_d, is::simd_shr, m128i, i64x2);
+impl_vuv!("lsx", lsx_vsrli_b, is::simd_shr, m128i, u8x16);
+impl_vuv!("lsx", lsx_vsrli_h, is::simd_shr, m128i, u16x8);
+impl_vuv!("lsx", lsx_vsrli_w, is::simd_shr, m128i, u32x4);
+impl_vuv!("lsx", lsx_vsrli_d, is::simd_shr, m128i, u64x2);
+impl_vuv!("lsx", lsx_vaddi_bu, is::simd_add, m128i, u8x16, 5);
+impl_vuv!("lsx", lsx_vaddi_hu, is::simd_add, m128i, u16x8, 5);
+impl_vuv!("lsx", lsx_vaddi_wu, is::simd_add, m128i, u32x4, 5);
+impl_vuv!("lsx", lsx_vaddi_du, is::simd_add, m128i, u64x2, 5);
+impl_vuv!("lsx", lsx_vslti_bu, is::simd_lt, m128i, u8x16, 5);
+impl_vuv!("lsx", lsx_vslti_hu, is::simd_lt, m128i, u16x8, 5);
+impl_vuv!("lsx", lsx_vslti_wu, is::simd_lt, m128i, u32x4, 5);
+impl_vuv!("lsx", lsx_vslti_du, is::simd_lt, m128i, u64x2, 5);
+impl_vuv!("lsx", lsx_vslei_bu, is::simd_le, m128i, u8x16, 5);
+impl_vuv!("lsx", lsx_vslei_hu, is::simd_le, m128i, u16x8, 5);
+impl_vuv!("lsx", lsx_vslei_wu, is::simd_le, m128i, u32x4, 5);
+impl_vuv!("lsx", lsx_vslei_du, is::simd_le, m128i, u64x2, 5);
+impl_vuv!("lsx", lsx_vmaxi_bu, cs::simd_imax, m128i, u8x16, 5);
+impl_vuv!("lsx", lsx_vmaxi_hu, cs::simd_imax, m128i, u16x8, 5);
+impl_vuv!("lsx", lsx_vmaxi_wu, cs::simd_imax, m128i, u32x4, 5);
+impl_vuv!("lsx", lsx_vmaxi_du, cs::simd_imax, m128i, u64x2, 5);
+impl_vuv!("lsx", lsx_vmini_bu, cs::simd_imin, m128i, u8x16, 5);
+impl_vuv!("lsx", lsx_vmini_hu, cs::simd_imin, m128i, u16x8, 5);
+impl_vuv!("lsx", lsx_vmini_wu, cs::simd_imin, m128i, u32x4, 5);
+impl_vuv!("lsx", lsx_vmini_du, cs::simd_imin, m128i, u64x2, 5);
+
+impl_vug!("lsx", lsx_vpickve2gr_b, is::simd_extract, m128i, i8x16, i32, 4);
+impl_vug!("lsx", lsx_vpickve2gr_h, is::simd_extract, m128i, i16x8, i32, 3);
+impl_vug!("lsx", lsx_vpickve2gr_w, is::simd_extract, m128i, i32x4, i32, 2);
+impl_vug!("lsx", lsx_vpickve2gr_d, is::simd_extract, m128i, i64x2, i64, 1);
+impl_vug!("lsx", lsx_vpickve2gr_bu, is::simd_extract, m128i, u8x16, u32, 4);
+impl_vug!("lsx", lsx_vpickve2gr_hu, is::simd_extract, m128i, u16x8, u32, 3);
+impl_vug!("lsx", lsx_vpickve2gr_wu, is::simd_extract, m128i, u32x4, u32, 2);
+impl_vug!("lsx", lsx_vpickve2gr_du, is::simd_extract, m128i, u64x2, u64, 1);
+
+impl_vsv!("lsx", lsx_vseqi_b, is::simd_eq, m128i, i8x16, 5);
+impl_vsv!("lsx", lsx_vseqi_h, is::simd_eq, m128i, i16x8, 5);
+impl_vsv!("lsx", lsx_vseqi_w, is::simd_eq, m128i, i32x4, 5);
+impl_vsv!("lsx", lsx_vseqi_d, is::simd_eq, m128i, i64x2, 5);
+impl_vsv!("lsx", lsx_vslti_b, is::simd_lt, m128i, i8x16, 5);
+impl_vsv!("lsx", lsx_vslti_h, is::simd_lt, m128i, i16x8, 5);
+impl_vsv!("lsx", lsx_vslti_w, is::simd_lt, m128i, i32x4, 5);
+impl_vsv!("lsx", lsx_vslti_d, is::simd_lt, m128i, i64x2, 5);
+impl_vsv!("lsx", lsx_vslei_b, is::simd_le, m128i, i8x16, 5);
+impl_vsv!("lsx", lsx_vslei_h, is::simd_le, m128i, i16x8, 5);
+impl_vsv!("lsx", lsx_vslei_w, is::simd_le, m128i, i32x4, 5);
+impl_vsv!("lsx", lsx_vslei_d, is::simd_le, m128i, i64x2, 5);
+impl_vsv!("lsx", lsx_vmaxi_b, cs::simd_imax, m128i, i8x16, 5);
+impl_vsv!("lsx", lsx_vmaxi_h, cs::simd_imax, m128i, i16x8, 5);
+impl_vsv!("lsx", lsx_vmaxi_w, cs::simd_imax, m128i, i32x4, 5);
+impl_vsv!("lsx", lsx_vmaxi_d, cs::simd_imax, m128i, i64x2, 5);
+impl_vsv!("lsx", lsx_vmini_b, cs::simd_imin, m128i, i8x16, 5);
+impl_vsv!("lsx", lsx_vmini_h, cs::simd_imin, m128i, i16x8, 5);
+impl_vsv!("lsx", lsx_vmini_w, cs::simd_imin, m128i, i32x4, 5);
+impl_vsv!("lsx", lsx_vmini_d, cs::simd_imin, m128i, i64x2, 5);
+
+impl_vvvv!("lsx", lsx_vmadd_b, ls::simd_madd, m128i, i8x16);
+impl_vvvv!("lsx", lsx_vmadd_h, ls::simd_madd, m128i, i16x8);
+impl_vvvv!("lsx", lsx_vmadd_w, ls::simd_madd, m128i, i32x4);
+impl_vvvv!("lsx", lsx_vmadd_d, ls::simd_madd, m128i, i64x2);
+impl_vvvv!("lsx", lsx_vmsub_b, ls::simd_msub, m128i, i8x16);
+impl_vvvv!("lsx", lsx_vmsub_h, ls::simd_msub, m128i, i16x8);
+impl_vvvv!("lsx", lsx_vmsub_w, ls::simd_msub, m128i, i32x4);
+impl_vvvv!("lsx", lsx_vmsub_d, ls::simd_msub, m128i, i64x2);
+impl_vvvv!("lsx", lsx_vfmadd_s, is::simd_fma, m128, f32x4);
+impl_vvvv!("lsx", lsx_vfmadd_d, is::simd_fma, m128d, f64x2);
+impl_vvvv!("lsx", lsx_vfmsub_s, ls::simd_fmsub, m128, f32x4);
+impl_vvvv!("lsx", lsx_vfmsub_d, ls::simd_fmsub, m128d, f64x2);
+impl_vvvv!("lsx", lsx_vfnmadd_s, ls::simd_fnmadd, m128, f32x4);
+impl_vvvv!("lsx", lsx_vfnmadd_d, ls::simd_fnmadd, m128d, f64x2);
+impl_vvvv!("lsx", lsx_vfnmsub_s, ls::simd_fnmsub, m128, f32x4);
+impl_vvvv!("lsx", lsx_vfnmsub_d, ls::simd_fnmsub, m128d, f64x2);
+
+impl_vugv!("lsx", lsx_vinsgr2vr_b, is::simd_insert, m128i, i8x16, i32, 4);
+impl_vugv!("lsx", lsx_vinsgr2vr_h, is::simd_insert, m128i, i16x8, i32, 3);
+impl_vugv!("lsx", lsx_vinsgr2vr_w, is::simd_insert, m128i, i32x4, i32, 2);
+impl_vugv!("lsx", lsx_vinsgr2vr_d, is::simd_insert, m128i, i64x2, i64, 1);
diff --git a/crates/core_arch/src/loongarch64/lsx/tests.rs b/crates/core_arch/src/loongarch64/lsx/tests.rs
index 5670bd4378..748e2b597a 100644
--- a/crates/core_arch/src/loongarch64/lsx/tests.rs
+++ b/crates/core_arch/src/loongarch64/lsx/tests.rs
@@ -5,6 +5,7 @@ use crate::{
     core_arch::{loongarch64::*, simd::*},
     mem::transmute,
 };
+use std::hint::black_box;
 use stdarch_test::simd_test;
 
 #[simd_test(enable = "lsx")]
@@ -17,7 +18,10 @@ unsafe fn test_lsx_vsll_b() {
     );
     let r = i64x2::new(70990221811840, -3257029622096690968);
 
-    assert_eq!(r, transmute(lsx_vsll_b(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vsll_b(black_box(transmute(a)), black_box(transmute(b))))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -26,7 +30,10 @@ unsafe fn test_lsx_vsll_h() {
     let b = i16x8::new(-10317, -20778, -9962, -8975, 25298, 12929, -13803, -18669);
     let r = i64x2::new(-5063658964307128392, -3539825456407336052);
 
-    assert_eq!(r, transmute(lsx_vsll_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vsll_h(black_box(transmute(a)), black_box(transmute(b))))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -35,7 +42,10 @@ unsafe fn test_lsx_vsll_w() {
     let b = i32x4::new(82237029, -819106294, -96895338, -456101700);
     let r = i64x2::new(-7163824029380778240, 2305843009528266752);
 
-    assert_eq!(r, transmute(lsx_vsll_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vsll_w(black_box(transmute(a)), black_box(transmute(b))))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -44,7 +54,10 @@ unsafe fn test_lsx_vsll_d() {
     let b = i64x2::new(8592669249977019309, -1379694176202045825);
     let r = i64x2::new(1790743801833193472, 0);
 
-    assert_eq!(r, transmute(lsx_vsll_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vsll_d(black_box(transmute(a)), black_box(transmute(b))))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -54,7 +67,7 @@ unsafe fn test_lsx_vslli_b() {
     );
     let r = i64x2::new(-2780807324588213414, -3708578564830607166);
 
-    assert_eq!(r, transmute(lsx_vslli_b::<0>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vslli_b::<0>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -62,7 +75,7 @@ unsafe fn test_lsx_vslli_h() {
     let a = i16x8::new(18469, -14840, 23655, -3474, 7467, 2798, -15418, 26847);
     let r = i64x2::new(-7241759886206301888, 4017476402818337472);
 
-    assert_eq!(r, transmute(lsx_vslli_h::<6>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vslli_h::<6>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -70,7 +83,7 @@ unsafe fn test_lsx_vslli_w() {
     let a = i32x4::new(20701902, -1777432355, 6349179, 1747667894);
     let r = i64x2::new(4189319625752393728, -5967594959501136896);
 
-    assert_eq!(r, transmute(lsx_vslli_w::<10>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vslli_w::<10>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -78,7 +91,7 @@ unsafe fn test_lsx_vslli_d() {
     let a = i64x2::new(-5896889635782282086, -8807609320972692839);
     let r = i64x2::new(-4233027607937510592, -5142337165482896608);
 
-    assert_eq!(r, transmute(lsx_vslli_d::<5>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vslli_d::<5>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -91,7 +104,10 @@ unsafe fn test_lsx_vsra_b() {
     );
     let r = i64x2::new(-1080315035391229440, 720022881735668484);
 
-    assert_eq!(r, transmute(lsx_vsra_b(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vsra_b(black_box(transmute(a)), black_box(transmute(b))))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -100,7 +116,10 @@ unsafe fn test_lsx_vsra_h() {
     let b = i16x8::new(14017, 3796, 23987, -27244, -13363, 21333, -10262, 23633);
     let r = i64x2::new(164116464290576704, -1935703552267190275);
 
-    assert_eq!(r, transmute(lsx_vsra_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vsra_h(black_box(transmute(a)), black_box(transmute(b))))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -109,7 +128,10 @@ unsafe fn test_lsx_vsra_w() {
     let b = i32x4::new(-670772992, 2044335288, -1224858031, 520588790);
     let r = i64x2::new(-210763200496, 1619202657181);
 
-    assert_eq!(r, transmute(lsx_vsra_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vsra_w(black_box(transmute(a)), black_box(transmute(b))))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -118,7 +140,10 @@ unsafe fn test_lsx_vsra_d() {
     let b = i64x2::new(4251079558060308329, 4657697142994416829);
     let r = i64x2::new(-623956, 3);
 
-    assert_eq!(r, transmute(lsx_vsra_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vsra_d(black_box(transmute(a)), black_box(transmute(b))))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -128,7 +153,7 @@ unsafe fn test_lsx_vsrai_b() {
     );
     let r = i64x2::new(-2018743940785760257, -2093355901512246518);
 
-    assert_eq!(r, transmute(lsx_vsrai_b::<2>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vsrai_b::<2>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -136,7 +161,7 @@ unsafe fn test_lsx_vsrai_h() {
     let a = i16x8::new(-22502, -7299, 19084, -21578, -28082, 20851, 23456, 15524);
     let r = i64x2::new(-1688828385492998, 844446405361657);
 
-    assert_eq!(r, transmute(lsx_vsrai_h::<12>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vsrai_h::<12>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -144,7 +169,7 @@ unsafe fn test_lsx_vsrai_w() {
     let a = i32x4::new(743537539, 1831641900, -1639033567, -984629971);
     let r = i64x2::new(30008936499988, -16131897170029);
 
-    assert_eq!(r, transmute(lsx_vsrai_w::<18>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vsrai_w::<18>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -152,7 +177,7 @@ unsafe fn test_lsx_vsrai_d() {
     let a = i64x2::new(-8375997486414293750, 1714581574012370587);
     let r = i64x2::new(-476121, 97462);
 
-    assert_eq!(r, transmute(lsx_vsrai_d::<44>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vsrai_d::<44>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -165,7 +190,13 @@ unsafe fn test_lsx_vsrar_b() {
     );
     let r = i64x2::new(139917463134404866, 143840305941130491);
 
-    assert_eq!(r, transmute(lsx_vsrar_b(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vsrar_b(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -174,7 +205,13 @@ unsafe fn test_lsx_vsrar_h() {
     let b = i16x8::new(-26450, 2176, 31587, 2222, 13726, 30172, 1067, -14273);
     let r = i64x2::new(-287115463426050, 42950131714);
 
-    assert_eq!(r, transmute(lsx_vsrar_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vsrar_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -183,7 +220,13 @@ unsafe fn test_lsx_vsrar_w() {
     let b = i32x4::new(-1532076758, 940127488, 1781366421, 1497262222);
     let r = i64x2::new(7179867468326627830, 560544771735247);
 
-    assert_eq!(r, transmute(lsx_vsrar_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vsrar_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -192,7 +235,13 @@ unsafe fn test_lsx_vsrar_d() {
     let b = i64x2::new(3571440266112779495, -725943254065719378);
     let r = i64x2::new(-890187, -17811);
 
-    assert_eq!(r, transmute(lsx_vsrar_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vsrar_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -202,7 +251,7 @@ unsafe fn test_lsx_vsrari_b() {
     );
     let r = i64x2::new(867219992078845182, -503291487652282122);
 
-    assert_eq!(r, transmute(lsx_vsrari_b::<3>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vsrari_b::<3>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -210,7 +259,7 @@ unsafe fn test_lsx_vsrari_h() {
     let a = i16x8::new(29939, -1699, 12357, 30805, -30883, 31936, 15701, -11818);
     let r = i64x2::new(4222154715365391, -1688815499411471);
 
-    assert_eq!(r, transmute(lsx_vsrari_h::<11>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vsrari_h::<11>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -218,7 +267,7 @@ unsafe fn test_lsx_vsrari_w() {
     let a = i32x4::new(588196178, -1058764534, 1325397591, 1169671026);
     let r = i64x2::new(-4294967295, 4294967297);
 
-    assert_eq!(r, transmute(lsx_vsrari_w::<30>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vsrari_w::<30>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -226,7 +275,7 @@ unsafe fn test_lsx_vsrari_d() {
     let a = i64x2::new(-2795326946470057100, 6746045132217841338);
     let r = i64x2::new(-174707934154378569, 421627820763615084);
 
-    assert_eq!(r, transmute(lsx_vsrari_d::<4>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vsrari_d::<4>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -239,7 +288,10 @@ unsafe fn test_lsx_vsrl_b() {
     );
     let r = i64x2::new(1300161376517358116, 72917012339034650);
 
-    assert_eq!(r, transmute(lsx_vsrl_b(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vsrl_b(black_box(transmute(a)), black_box(transmute(b))))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -248,7 +300,10 @@ unsafe fn test_lsx_vsrl_h() {
     let b = i16x8::new(16605, -13577, -26644, -17739, 11000, -29283, -15971, 20169);
     let r = i64x2::new(468374382728249347, 20829178341621860);
 
-    assert_eq!(r, transmute(lsx_vsrl_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vsrl_h(black_box(transmute(a)), black_box(transmute(b))))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -257,7 +312,10 @@ unsafe fn test_lsx_vsrl_w() {
     let b = i32x4::new(1777885221, -1725401090, 1849724045, -1051851102);
     let r = i64x2::new(12953227061, 1599606693325790121);
 
-    assert_eq!(r, transmute(lsx_vsrl_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vsrl_w(black_box(transmute(a)), black_box(transmute(b))))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -266,7 +324,10 @@ unsafe fn test_lsx_vsrl_d() {
     let b = i64x2::new(-7903128394835365398, 7601347629202818185);
     let r = i64x2::new(649044, 1572171616025062);
 
-    assert_eq!(r, transmute(lsx_vsrl_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vsrl_d(black_box(transmute(a)), black_box(transmute(b))))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -276,7 +337,7 @@ unsafe fn test_lsx_vsrli_b() {
     );
     let r = i64x2::new(1952909805632365845, 3971107439766933248);
 
-    assert_eq!(r, transmute(lsx_vsrli_b::<2>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vsrli_b::<2>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -284,7 +345,7 @@ unsafe fn test_lsx_vsrli_h() {
     let a = i16x8::new(29545, 354, 27695, 20915, -32766, -24491, 10641, 20310);
     let r = i64x2::new(11259230996660281, 10977609996304448);
 
-    assert_eq!(r, transmute(lsx_vsrli_h::<9>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vsrli_h::<9>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -292,7 +353,7 @@ unsafe fn test_lsx_vsrli_w() {
     let a = i32x4::new(627703601, 922874410, -234412645, -1216101872);
     let r = i64x2::new(3870813506329215, 12913695352717769);
 
-    assert_eq!(r, transmute(lsx_vsrli_w::<10>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vsrli_w::<10>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -300,7 +361,7 @@ unsafe fn test_lsx_vsrli_d() {
     let a = i64x2::new(1407685950714554203, -6076144426076800688);
     let r = i64x2::new(9, 85);
 
-    assert_eq!(r, transmute(lsx_vsrli_d::<57>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vsrli_d::<57>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -313,7 +374,13 @@ unsafe fn test_lsx_vsrlr_b() {
     );
     let r = i64x2::new(3317746744565237249, 144420860932066826);
 
-    assert_eq!(r, transmute(lsx_vsrlr_b(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vsrlr_b(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -322,7 +389,13 @@ unsafe fn test_lsx_vsrlr_h() {
     let b = i16x8::new(19500, -26403, -1282, 12290, -18989, 25105, -24347, 6707);
     let r = i64x2::new(1991716935204929539, 311033695131730530);
 
-    assert_eq!(r, transmute(lsx_vsrlr_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vsrlr_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -331,7 +404,13 @@ unsafe fn test_lsx_vsrlr_w() {
     let b = i32x4::new(1830015593, -1452673200, 962662328, -252736055);
     let r = i64x2::new(7864089021084, 20473000998469780);
 
-    assert_eq!(r, transmute(lsx_vsrlr_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vsrlr_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -340,7 +419,13 @@ unsafe fn test_lsx_vsrlr_d() {
     let b = i64x2::new(-1543621369665313706, 8544381131364512650);
     let r = i64x2::new(1428972826343, 4256393046182047);
 
-    assert_eq!(r, transmute(lsx_vsrlr_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vsrlr_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -350,7 +435,7 @@ unsafe fn test_lsx_vsrlri_b() {
     );
     let r = i64x2::new(93866580842851436, 1896906350202744602);
 
-    assert_eq!(r, transmute(lsx_vsrlri_b::<1>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vsrlri_b::<1>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -358,7 +443,7 @@ unsafe fn test_lsx_vsrlri_h() {
     let a = i16x8::new(-18045, 1968, 22966, 3692, 2010, -17108, 3373, -30706);
     let r = i64x2::new(1039304252363684227, -8642956144778934310);
 
-    assert_eq!(r, transmute(lsx_vsrlri_h::<0>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vsrlri_h::<0>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -366,7 +451,7 @@ unsafe fn test_lsx_vsrlri_w() {
     let a = i32x4::new(1306456564, -1401620667, -839707416, -1634862919);
     let r = i64x2::new(1553353645217275455, 1428132662790218397);
 
-    assert_eq!(r, transmute(lsx_vsrlri_w::<3>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vsrlri_w::<3>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -374,7 +459,7 @@ unsafe fn test_lsx_vsrlri_d() {
     let a = i64x2::new(-3683179565838693027, 6160461828074490983);
     let r = i64x2::new(205, 85);
 
-    assert_eq!(r, transmute(lsx_vsrlri_d::<56>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vsrlri_d::<56>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -387,7 +472,13 @@ unsafe fn test_lsx_vbitclr_b() {
     );
     let r = i64x2::new(-7325372782311046420, -5316383129963115396);
 
-    assert_eq!(r, transmute(lsx_vbitclr_b(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vbitclr_b(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -396,7 +487,13 @@ unsafe fn test_lsx_vbitclr_h() {
     let b = u16x8::new(26587, 57597, 34751, 38678, 23919, 45729, 62569, 5978);
     let r = i64x2::new(-5495443997997256700, -3317648531059028099);
 
-    assert_eq!(r, transmute(lsx_vbitclr_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vbitclr_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -405,7 +502,13 @@ unsafe fn test_lsx_vbitclr_w() {
     let b = u32x4::new(1968231094, 2827365864, 4097273355, 4016923215);
     let r = i64x2::new(-7626667807832507452, 546969093373761021);
 
-    assert_eq!(r, transmute(lsx_vbitclr_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vbitclr_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -414,7 +517,13 @@ unsafe fn test_lsx_vbitclr_d() {
     let b = u64x2::new(5723204188033770667, 2981956604140378920);
     let r = i64x2::new(-1242851545812588193, -5509634528458855560);
 
-    assert_eq!(r, transmute(lsx_vbitclr_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vbitclr_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -424,7 +533,7 @@ unsafe fn test_lsx_vbitclri_b() {
     );
     let r = i64x2::new(7503621968728299154, -6865556469255070542);
 
-    assert_eq!(r, transmute(lsx_vbitclri_b::<0>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vbitclri_b::<0>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -432,7 +541,7 @@ unsafe fn test_lsx_vbitclri_h() {
     let a = u16x8::new(17366, 58985, 22108, 45942, 27326, 19605, 9632, 32322);
     let r = i64x2::new(-5515130134779575338, 8809640793386347198);
 
-    assert_eq!(r, transmute(lsx_vbitclri_h::<10>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vbitclri_h::<10>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -440,7 +549,7 @@ unsafe fn test_lsx_vbitclri_w() {
     let a = u32x4::new(718858183, 3771164920, 1842485081, 896350597);
     let r = i64x2::new(-2249714073768237625, 3849796501707560281);
 
-    assert_eq!(r, transmute(lsx_vbitclri_w::<9>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vbitclri_w::<9>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -448,7 +557,7 @@ unsafe fn test_lsx_vbitclri_d() {
     let a = u64x2::new(10838658690401820648, 3833745076866321369);
     let r = i64x2::new(-7608085933063544856, 3833744527110507481);
 
-    assert_eq!(r, transmute(lsx_vbitclri_d::<39>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vbitclri_d::<39>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -461,7 +570,13 @@ unsafe fn test_lsx_vbitset_b() {
     );
     let r = i64x2::new(-7941579666116909337, -8620998056061183460);
 
-    assert_eq!(r, transmute(lsx_vbitset_b(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vbitset_b(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -470,7 +585,13 @@ unsafe fn test_lsx_vbitset_h() {
     let b = u16x8::new(64512, 23847, 57770, 47705, 8024, 31966, 14493, 50266);
     let r = i64x2::new(8218739538452480967, 9190693790629616954);
 
-    assert_eq!(r, transmute(lsx_vbitset_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vbitset_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -479,7 +600,13 @@ unsafe fn test_lsx_vbitset_w() {
     let b = u32x4::new(3259082048, 1303228302, 1429001720, 209615081);
     let r = i64x2::new(5472281065241838073, -4235320193476931022);
 
-    assert_eq!(r, transmute(lsx_vbitset_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vbitset_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -488,7 +615,13 @@ unsafe fn test_lsx_vbitset_d() {
     let b = u64x2::new(12687331714071910183, 1753585392879336372);
     let r = i64x2::new(8117422612773760492, 5031452210401715131);
 
-    assert_eq!(r, transmute(lsx_vbitset_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vbitset_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -498,7 +631,7 @@ unsafe fn test_lsx_vbitseti_b() {
     );
     let r = i64x2::new(6185254145054243811, 5860546440891134157);
 
-    assert_eq!(r, transmute(lsx_vbitseti_b::<6>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vbitseti_b::<6>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -506,7 +639,7 @@ unsafe fn test_lsx_vbitseti_h() {
     let a = u16x8::new(15222, 59961, 52253, 2908, 61562, 41309, 63627, 4191);
     let r = i64x2::new(819316619673811830, 1179934905985921146);
 
-    assert_eq!(r, transmute(lsx_vbitseti_h::<1>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vbitseti_h::<1>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -514,7 +647,7 @@ unsafe fn test_lsx_vbitseti_w() {
     let a = u32x4::new(3788412756, 1863556832, 1913138259, 1199998627);
     let r = i64x2::new(8012922850722617172, 5162962059379878995);
 
-    assert_eq!(r, transmute(lsx_vbitseti_w::<21>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vbitseti_w::<21>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -522,7 +655,7 @@ unsafe fn test_lsx_vbitseti_d() {
     let a = u64x2::new(10744510173660993785, 16946223211744108759);
     let r = i64x2::new(-7702233900048557831, -1500520861831225129);
 
-    assert_eq!(r, transmute(lsx_vbitseti_d::<27>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vbitseti_d::<27>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -535,7 +668,13 @@ unsafe fn test_lsx_vbitrev_b() {
     );
     let r = i64x2::new(7553563628828981794, -3550669970358088907);
 
-    assert_eq!(r, transmute(lsx_vbitrev_b(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vbitrev_b(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -544,7 +683,13 @@ unsafe fn test_lsx_vbitrev_h() {
     let b = u16x8::new(21347, 23131, 57157, 13786, 34463, 33445, 23964, 48087);
     let r = i64x2::new(-2253077037977362312, -1686202867067838120);
 
-    assert_eq!(r, transmute(lsx_vbitrev_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vbitrev_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -553,7 +698,13 @@ unsafe fn test_lsx_vbitrev_w() {
     let b = u32x4::new(3330530584, 4153020036, 822570638, 2652744506);
     let r = i64x2::new(4583672484591007782, 3195058299616182309);
 
-    assert_eq!(r, transmute(lsx_vbitrev_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vbitrev_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -562,7 +713,13 @@ unsafe fn test_lsx_vbitrev_d() {
     let b = u64x2::new(10942298949673565895, 12884740754463765660);
     let r = i64x2::new(-2430080033105247697, -384636561250515393);
 
-    assert_eq!(r, transmute(lsx_vbitrev_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vbitrev_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -572,7 +729,7 @@ unsafe fn test_lsx_vbitrevi_b() {
     );
     let r = i64x2::new(8727320563398842300, 7658903196653594166);
 
-    assert_eq!(r, transmute(lsx_vbitrevi_b::<2>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vbitrevi_b::<2>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -580,7 +737,7 @@ unsafe fn test_lsx_vbitrevi_h() {
     let a = u16x8::new(15083, 24599, 61212, 12408, 48399, 59833, 45416, 58826);
     let r = i64x2::new(8104420064785562347, -6500117680329458417);
 
-    assert_eq!(r, transmute(lsx_vbitrevi_h::<14>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vbitrevi_h::<14>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -588,7 +745,7 @@ unsafe fn test_lsx_vbitrevi_w() {
     let a = u32x4::new(1200613355, 1418062686, 3847355950, 3312937419);
     let r = i64x2::new(6099540060505368555, -4226793400815190482);
 
-    assert_eq!(r, transmute(lsx_vbitrevi_w::<21>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vbitrevi_w::<21>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -596,7 +753,7 @@ unsafe fn test_lsx_vbitrevi_d() {
     let a = u64x2::new(295858379748270823, 1326723086853575042);
     let r = i64x2::new(295858379748254439, 1326723086853591426);
 
-    assert_eq!(r, transmute(lsx_vbitrevi_d::<14>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vbitrevi_d::<14>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -609,7 +766,10 @@ unsafe fn test_lsx_vadd_b() {
     );
     let r = i64x2::new(5228548393274527852, 1107461330348121713);
 
-    assert_eq!(r, transmute(lsx_vadd_b(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vadd_b(black_box(transmute(a)), black_box(transmute(b))))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -618,7 +778,10 @@ unsafe fn test_lsx_vadd_h() {
     let b = i16x8::new(-25040, 15453, -28080, -31322, -24429, -12453, -18073, 27019);
     let r = i64x2::new(1938006946753467667, 3264410328302682781);
 
-    assert_eq!(r, transmute(lsx_vadd_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vadd_h(black_box(transmute(a)), black_box(transmute(b))))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -627,7 +790,10 @@ unsafe fn test_lsx_vadd_w() {
     let b = i32x4::new(-1169804484, 389773725, -731843701, -1825112934);
     let r = i64x2::new(-2841313158179161935, -1386205072290870384);
 
-    assert_eq!(r, transmute(lsx_vadd_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vadd_w(black_box(transmute(a)), black_box(transmute(b))))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -636,7 +802,10 @@ unsafe fn test_lsx_vadd_d() {
     let b = i64x2::new(7093939531558864473, 4047047970310912233);
     let r = i64x2::new(-204689461315224217, -5456447511965942904);
 
-    assert_eq!(r, transmute(lsx_vadd_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vadd_d(black_box(transmute(a)), black_box(transmute(b))))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -646,7 +815,7 @@ unsafe fn test_lsx_vaddi_bu() {
     );
     let r = i64x2::new(-7790681010872578420, 298548864442153210);
 
-    assert_eq!(r, transmute(lsx_vaddi_bu::<10>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vaddi_bu::<10>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -654,7 +823,7 @@ unsafe fn test_lsx_vaddi_hu() {
     let a = i16x8::new(-16986, -28417, 11657, 16608, -30167, 18602, 8897, -854);
     let r = i64x2::new(4681541984598867390, -233585914045887935);
 
-    assert_eq!(r, transmute(lsx_vaddi_hu::<24>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vaddi_hu::<24>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -662,7 +831,7 @@ unsafe fn test_lsx_vaddi_wu() {
     let a = i32x4::new(1142343549, 56714754, -180143297, 408668191);
     let r = i64x2::new(243588023362963327, 1755216527965240129);
 
-    assert_eq!(r, transmute(lsx_vaddi_wu::<2>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vaddi_wu::<2>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -670,7 +839,7 @@ unsafe fn test_lsx_vaddi_du() {
     let a = i64x2::new(4516502893749962130, 9158051921593642947);
     let r = i64x2::new(4516502893749962139, 9158051921593642956);
 
-    assert_eq!(r, transmute(lsx_vaddi_du::<9>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vaddi_du::<9>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -683,7 +852,10 @@ unsafe fn test_lsx_vsub_b() {
     );
     let r = i64x2::new(-4051929421319416371, 8737463450488952169);
 
-    assert_eq!(r, transmute(lsx_vsub_b(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vsub_b(black_box(transmute(a)), black_box(transmute(b))))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -692,7 +864,10 @@ unsafe fn test_lsx_vsub_h() {
     let b = i16x8::new(15368, 16207, 9677, 21447, -29583, -22036, 1845, 15671);
     let r = i64x2::new(-913983189443969573, 2742472381424198215);
 
-    assert_eq!(r, transmute(lsx_vsub_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vsub_h(black_box(transmute(a)), black_box(transmute(b))))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -701,7 +876,10 @@ unsafe fn test_lsx_vsub_w() {
     let b = i32x4::new(617176389, -1376778690, 1463940361, 620446698);
     let r = i64x2::new(-7247543435452521192, -8067077040042720878);
 
-    assert_eq!(r, transmute(lsx_vsub_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vsub_w(black_box(transmute(a)), black_box(transmute(b))))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -710,7 +888,10 @@ unsafe fn test_lsx_vsub_d() {
     let b = i64x2::new(1314101702815749241, 7673634401554993450);
     let r = i64x2::new(5925090640479842026, 5645651807574135757);
 
-    assert_eq!(r, transmute(lsx_vsub_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vsub_d(black_box(transmute(a)), black_box(transmute(b))))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -720,7 +901,7 @@ unsafe fn test_lsx_vsubi_bu() {
     );
     let r = i64x2::new(-8192169673836457574, 4758493248402185941);
 
-    assert_eq!(r, transmute(lsx_vsubi_bu::<19>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vsubi_bu::<19>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -728,7 +909,7 @@ unsafe fn test_lsx_vsubi_hu() {
     let a = i16x8::new(13272, -26858, -235, 16054, 29698, 1377, 4604, -3878);
     let r = i64x2::new(4514576075959186376, -1096043853912116238);
 
-    assert_eq!(r, transmute(lsx_vsubi_hu::<16>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vsubi_hu::<16>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -736,7 +917,7 @@ unsafe fn test_lsx_vsubi_wu() {
     let a = i32x4::new(1277091145, -2076591216, -1523555105, -945754023);
     let r = i64x2::new(-8918891362898748088, -4061982600368986914);
 
-    assert_eq!(r, transmute(lsx_vsubi_wu::<1>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vsubi_wu::<1>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -744,7 +925,7 @@ unsafe fn test_lsx_vsubi_du() {
     let a = i64x2::new(-8248876128472283209, -2119651236628000925);
     let r = i64x2::new(-8248876128472283234, -2119651236628000950);
 
-    assert_eq!(r, transmute(lsx_vsubi_du::<25>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vsubi_du::<25>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -757,7 +938,10 @@ unsafe fn test_lsx_vmax_b() {
     );
     let r = i64x2::new(1260734548147228113, 7591133008682590587);
 
-    assert_eq!(r, transmute(lsx_vmax_b(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vmax_b(black_box(transmute(a)), black_box(transmute(b))))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -766,7 +950,10 @@ unsafe fn test_lsx_vmax_h() {
     let b = i16x8::new(25637, -11569, -23103, 6983, -17125, 5183, -709, 5986);
     let r = i64x2::new(1965654441534120997, 1684966995419662474);
 
-    assert_eq!(r, transmute(lsx_vmax_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vmax_h(black_box(transmute(a)), black_box(transmute(b))))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -775,7 +962,10 @@ unsafe fn test_lsx_vmax_w() {
     let b = i32x4::new(643859790, -389733899, -1309288060, 1934346522);
     let r = i64x2::new(-1673894349703707314, 8307955054730158361);
 
-    assert_eq!(r, transmute(lsx_vmax_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vmax_w(black_box(transmute(a)), black_box(transmute(b))))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -784,7 +974,10 @@ unsafe fn test_lsx_vmax_d() {
     let b = i64x2::new(-6137495199657896371, 2160025776787809810);
     let r = i64x2::new(-990960773872867733, 6406870358170165030);
 
-    assert_eq!(r, transmute(lsx_vmax_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vmax_d(black_box(transmute(a)), black_box(transmute(b))))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -794,7 +987,7 @@ unsafe fn test_lsx_vmaxi_b() {
     );
     let r = i64x2::new(5908253215318699518, 1728939149412407162);
 
-    assert_eq!(r, transmute(lsx_vmaxi_b::<-2>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vmaxi_b::<-2>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -802,7 +995,7 @@ unsafe fn test_lsx_vmaxi_h() {
     let a = i16x8::new(-14059, 19536, 15816, 28251, 23079, -10486, -11781, 25565);
     let r = i64x2::new(7952017497535807498, 7195907822558272039);
 
-    assert_eq!(r, transmute(lsx_vmaxi_h::<10>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vmaxi_h::<10>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -810,7 +1003,7 @@ unsafe fn test_lsx_vmaxi_w() {
     let a = i32x4::new(-1136628686, -168033999, -2082324641, -1789957469);
     let r = i64x2::new(55834574861, 55834574861);
 
-    assert_eq!(r, transmute(lsx_vmaxi_w::<13>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vmaxi_w::<13>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -818,7 +1011,7 @@ unsafe fn test_lsx_vmaxi_d() {
     let a = i64x2::new(-490958606840895025, -602287987736508723);
     let r = i64x2::new(-5, -5);
 
-    assert_eq!(r, transmute(lsx_vmaxi_d::<-5>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vmaxi_d::<-5>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -831,7 +1024,13 @@ unsafe fn test_lsx_vmax_bu() {
     );
     let r = i64x2::new(-5712542810735052010, 4588590651995571688);
 
-    assert_eq!(r, transmute(lsx_vmax_bu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vmax_bu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -840,7 +1039,13 @@ unsafe fn test_lsx_vmax_hu() {
     let b = u16x8::new(61508, 27224, 11696, 15294, 30725, 4809, 55995, 24012);
     let r = i64x2::new(6366821095949791300, 6759017637785204741);
 
-    assert_eq!(r, transmute(lsx_vmax_hu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vmax_hu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -849,7 +1054,13 @@ unsafe fn test_lsx_vmax_wu() {
     let b = u32x4::new(2856502284, 546582019, 3814541188, 2370198139);
     let r = i64x2::new(2347551899043152908, -8266820577849948284);
 
-    assert_eq!(r, transmute(lsx_vmax_wu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vmax_wu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -858,7 +1069,13 @@ unsafe fn test_lsx_vmax_du() {
     let b = u64x2::new(15559502733477870114, 3537017767853389449);
     let r = i64x2::new(-1341110034690820781, -6520089917898609068);
 
-    assert_eq!(r, transmute(lsx_vmax_du(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vmax_du(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -868,7 +1085,7 @@ unsafe fn test_lsx_vmaxi_bu() {
     );
     let r = i64x2::new(-1045930669804428840, -8076220938123067729);
 
-    assert_eq!(r, transmute(lsx_vmaxi_bu::<27>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vmaxi_bu::<27>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -876,7 +1093,7 @@ unsafe fn test_lsx_vmaxi_hu() {
     let a = u16x8::new(56394, 18974, 59, 64239, 15178, 38205, 20044, 21066);
     let r = i64x2::new(-365072790147113910, 5929637950214978378);
 
-    assert_eq!(r, transmute(lsx_vmaxi_hu::<23>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vmaxi_hu::<23>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -884,7 +1101,7 @@ unsafe fn test_lsx_vmaxi_wu() {
     let a = u32x4::new(2234002286, 3837532269, 3218694441, 2956128392);
     let r = i64x2::new(-1964668478775874706, -5750269304073789143);
 
-    assert_eq!(r, transmute(lsx_vmaxi_wu::<15>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vmaxi_wu::<15>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -892,7 +1109,7 @@ unsafe fn test_lsx_vmaxi_du() {
     let a = u64x2::new(3145066433415682744, 697260191203805367);
     let r = i64x2::new(3145066433415682744, 697260191203805367);
 
-    assert_eq!(r, transmute(lsx_vmaxi_du::<15>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vmaxi_du::<15>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -905,7 +1122,10 @@ unsafe fn test_lsx_vmin_b() {
     );
     let r = i64x2::new(1870285769536668398, -8941449826914199819);
 
-    assert_eq!(r, transmute(lsx_vmin_b(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vmin_b(black_box(transmute(a)), black_box(transmute(b))))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -914,7 +1134,10 @@ unsafe fn test_lsx_vmin_h() {
     let b = i16x8::new(-5519, 15267, -28304, -5842, 32145, 6582, -9646, -24918);
     let r = i64x2::new(-1644216902720689551, -7013553423522578637);
 
-    assert_eq!(r, transmute(lsx_vmin_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vmin_h(black_box(transmute(a)), black_box(transmute(b))))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -923,7 +1146,10 @@ unsafe fn test_lsx_vmin_w() {
     let b = i32x4::new(-425011290, -2104111279, 175390640, 571448257);
     let r = i64x2::new(-9037089126579775578, 2454351575346593712);
 
-    assert_eq!(r, transmute(lsx_vmin_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vmin_w(black_box(transmute(a)), black_box(transmute(b))))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -932,7 +1158,10 @@ unsafe fn test_lsx_vmin_d() {
     let b = i64x2::new(7269804448576860985, -2384075780126369706);
     let r = i64x2::new(5262417572890363865, -2384075780126369706);
 
-    assert_eq!(r, transmute(lsx_vmin_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vmin_d(black_box(transmute(a)), black_box(transmute(b))))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -942,7 +1171,7 @@ unsafe fn test_lsx_vmini_b() {
     );
     let r = i64x2::new(-1187557278141451540, -940475489144045070);
 
-    assert_eq!(r, transmute(lsx_vmini_b::<-14>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vmini_b::<-14>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -950,7 +1179,7 @@ unsafe fn test_lsx_vmini_h() {
     let a = i16x8::new(26119, -26421, -26720, 11534, 11181, -13024, -9525, -1565);
     let r = i64x2::new(-677708916064259, -440267769697468419);
 
-    assert_eq!(r, transmute(lsx_vmini_h::<-3>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vmini_h::<-3>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -958,7 +1187,7 @@ unsafe fn test_lsx_vmini_w() {
     let a = i32x4::new(1937226480, -56354461, -210581139, 118641668);
     let r = i64x2::new(-242040566978707451, 25559222637);
 
-    assert_eq!(r, transmute(lsx_vmini_w::<5>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vmini_w::<5>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -966,7 +1195,7 @@ unsafe fn test_lsx_vmini_d() {
     let a = i64x2::new(-6839357499730806877, 2982085289136510651);
     let r = i64x2::new(-6839357499730806877, 11);
 
-    assert_eq!(r, transmute(lsx_vmini_d::<11>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vmini_d::<11>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -979,7 +1208,13 @@ unsafe fn test_lsx_vmin_bu() {
     );
     let r = i64x2::new(3617816997909406996, 4784078933357220137);
 
-    assert_eq!(r, transmute(lsx_vmin_bu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vmin_bu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -988,7 +1223,13 @@ unsafe fn test_lsx_vmin_hu() {
     let b = u16x8::new(30424, 14541, 7654, 46014, 42452, 14971, 14903, 13871);
     let r = i64x2::new(-5494921620712753448, 3904403410832303572);
 
-    assert_eq!(r, transmute(lsx_vmin_hu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vmin_hu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -997,7 +1238,13 @@ unsafe fn test_lsx_vmin_wu() {
     let b = u32x4::new(1456829356, 2264966310, 1587887390, 645429404);
     let r = i64x2::new(-8718787844260924500, 2772098183187911585);
 
-    assert_eq!(r, transmute(lsx_vmin_wu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vmin_wu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -1006,7 +1253,13 @@ unsafe fn test_lsx_vmin_du() {
     let b = u64x2::new(15079551366517035256, 13891052596545854864);
     let r = i64x2::new(6641707046382446478, 5750385968612732680);
 
-    assert_eq!(r, transmute(lsx_vmin_du(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vmin_du(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -1016,7 +1269,7 @@ unsafe fn test_lsx_vmini_bu() {
     );
     let r = i64x2::new(361700864190383365, 361700864190317829);
 
-    assert_eq!(r, transmute(lsx_vmini_bu::<5>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vmini_bu::<5>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -1024,7 +1277,7 @@ unsafe fn test_lsx_vmini_hu() {
     let a = u16x8::new(51791, 41830, 16737, 31634, 36341, 58491, 48701, 8690);
     let r = i64x2::new(5066626891382802, 5066626891382802);
 
-    assert_eq!(r, transmute(lsx_vmini_hu::<18>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vmini_hu::<18>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -1032,7 +1285,7 @@ unsafe fn test_lsx_vmini_wu() {
     let a = u32x4::new(1158888991, 2639721369, 556001789, 2902942998);
     let r = i64x2::new(77309411346, 77309411346);
 
-    assert_eq!(r, transmute(lsx_vmini_wu::<18>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vmini_wu::<18>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -1040,7 +1293,7 @@ unsafe fn test_lsx_vmini_du() {
     let a = u64x2::new(17903595768445663391, 13119300660970895532);
     let r = i64x2::new(13, 13);
 
-    assert_eq!(r, transmute(lsx_vmini_du::<13>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vmini_du::<13>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -1053,7 +1306,10 @@ unsafe fn test_lsx_vseq_b() {
     );
     let r = i64x2::new(0, 0);
 
-    assert_eq!(r, transmute(lsx_vseq_b(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vseq_b(black_box(transmute(a)), black_box(transmute(b))))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -1062,7 +1318,10 @@ unsafe fn test_lsx_vseq_h() {
     let b = i16x8::new(-7387, -24074, 15709, -4629, 30465, -9504, -21403, -30287);
     let r = i64x2::new(0, 0);
 
-    assert_eq!(r, transmute(lsx_vseq_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vseq_h(black_box(transmute(a)), black_box(transmute(b))))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -1071,7 +1330,10 @@ unsafe fn test_lsx_vseq_w() {
     let b = i32x4::new(-493722413, -522973881, -1254416384, -884207273);
     let r = i64x2::new(0, 0);
 
-    assert_eq!(r, transmute(lsx_vseq_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vseq_w(black_box(transmute(a)), black_box(transmute(b))))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -1080,7 +1342,10 @@ unsafe fn test_lsx_vseq_d() {
     let b = i64x2::new(3023654898382436999, 1783520577741396523);
     let r = i64x2::new(0, 0);
 
-    assert_eq!(r, transmute(lsx_vseq_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vseq_d(black_box(transmute(a)), black_box(transmute(b))))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -1090,7 +1355,7 @@ unsafe fn test_lsx_vseqi_b() {
     );
     let r = i64x2::new(0, 0);
 
-    assert_eq!(r, transmute(lsx_vseqi_b::<12>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vseqi_b::<12>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -1098,7 +1363,7 @@ unsafe fn test_lsx_vseqi_h() {
     let a = i16x8::new(-3205, 25452, 20774, 22065, -8424, 16590, -15971, -14154);
     let r = i64x2::new(0, 0);
 
-    assert_eq!(r, transmute(lsx_vseqi_h::<-1>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vseqi_h::<-1>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -1106,7 +1371,7 @@ unsafe fn test_lsx_vseqi_w() {
     let a = i32x4::new(199798215, -798304779, -1812193878, -1830438161);
     let r = i64x2::new(0, 0);
 
-    assert_eq!(r, transmute(lsx_vseqi_w::<11>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vseqi_w::<11>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -1114,7 +1379,7 @@ unsafe fn test_lsx_vseqi_d() {
     let a = i64x2::new(-7376858177879278972, 1947027764115386661);
     let r = i64x2::new(0, 0);
 
-    assert_eq!(r, transmute(lsx_vseqi_d::<3>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vseqi_d::<3>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -1124,7 +1389,7 @@ unsafe fn test_lsx_vslti_b() {
     );
     let r = i64x2::new(-1099511627776, 1095216660480);
 
-    assert_eq!(r, transmute(lsx_vslti_b::<-4>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vslti_b::<-4>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -1137,7 +1402,10 @@ unsafe fn test_lsx_vslt_b() {
     );
     let r = i64x2::new(-72056494526365441, -280375465148416);
 
-    assert_eq!(r, transmute(lsx_vslt_b(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vslt_b(black_box(transmute(a)), black_box(transmute(b))))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -1146,7 +1414,10 @@ unsafe fn test_lsx_vslt_h() {
     let b = i16x8::new(-10624, 12762, 31216, 13253, 2299, -12591, -8652, -22348);
     let r = i64x2::new(-4294967296, 65535);
 
-    assert_eq!(r, transmute(lsx_vslt_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vslt_h(black_box(transmute(a)), black_box(transmute(b))))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -1155,7 +1426,10 @@ unsafe fn test_lsx_vslt_w() {
     let b = i32x4::new(-1849021639, -756143028, 54274044, 646446450);
     let r = i64x2::new(-4294967296, -1);
 
-    assert_eq!(r, transmute(lsx_vslt_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vslt_w(black_box(transmute(a)), black_box(transmute(b))))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -1164,7 +1438,10 @@ unsafe fn test_lsx_vslt_d() {
     let b = i64x2::new(1481173131774551907, 270656941607020532);
     let r = i64x2::new(-1, 0);
 
-    assert_eq!(r, transmute(lsx_vslt_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vslt_d(black_box(transmute(a)), black_box(transmute(b))))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -1172,7 +1449,7 @@ unsafe fn test_lsx_vslti_h() {
     let a = i16x8::new(-8902, 5527, 17224, -27356, 4424, 28839, 29975, 18805);
     let r = i64x2::new(-281474976645121, 0);
 
-    assert_eq!(r, transmute(lsx_vslti_h::<14>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vslti_h::<14>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -1180,7 +1457,7 @@ unsafe fn test_lsx_vslti_w() {
     let a = i32x4::new(995282502, -1964668207, -996118772, 1812234755);
     let r = i64x2::new(-4294967296, 4294967295);
 
-    assert_eq!(r, transmute(lsx_vslti_w::<14>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vslti_w::<14>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -1188,7 +1465,7 @@ unsafe fn test_lsx_vslti_d() {
     let a = i64x2::new(1441753618400573134, 3878439049744730841);
     let r = i64x2::new(0, 0);
 
-    assert_eq!(r, transmute(lsx_vslti_d::<14>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vslti_d::<14>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -1201,7 +1478,13 @@ unsafe fn test_lsx_vslt_bu() {
     );
     let r = i64x2::new(-281474959998721, -72057589742960896);
 
-    assert_eq!(r, transmute(lsx_vslt_bu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vslt_bu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -1210,7 +1493,13 @@ unsafe fn test_lsx_vslt_hu() {
     let b = u16x8::new(513, 13075, 20319, 44422, 12609, 18638, 20227, 21354);
     let r = i64x2::new(281474976645120, -281474976645121);
 
-    assert_eq!(r, transmute(lsx_vslt_hu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vslt_hu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -1219,7 +1508,13 @@ unsafe fn test_lsx_vslt_wu() {
     let b = u32x4::new(1402243125, 1129899238, 2591537060, 4152171743);
     let r = i64x2::new(4294967295, -1);
 
-    assert_eq!(r, transmute(lsx_vslt_wu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vslt_wu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -1228,7 +1523,13 @@ unsafe fn test_lsx_vslt_du() {
     let b = u64x2::new(835355141719377733, 10472626544222695938);
     let r = i64x2::new(0, 0);
 
-    assert_eq!(r, transmute(lsx_vslt_du(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vslt_du(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -1238,7 +1539,7 @@ unsafe fn test_lsx_vslti_bu() {
     );
     let r = i64x2::new(0, 0);
 
-    assert_eq!(r, transmute(lsx_vslti_bu::<7>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vslti_bu::<7>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -1246,7 +1547,7 @@ unsafe fn test_lsx_vslti_hu() {
     let a = u16x8::new(60550, 12178, 30950, 44771, 25514, 35987, 55940, 21614);
     let r = i64x2::new(0, 0);
 
-    assert_eq!(r, transmute(lsx_vslti_hu::<2>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vslti_hu::<2>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -1254,7 +1555,7 @@ unsafe fn test_lsx_vslti_wu() {
     let a = u32x4::new(912580668, 18660032, 3405726641, 4033549497);
     let r = i64x2::new(0, 0);
 
-    assert_eq!(r, transmute(lsx_vslti_wu::<8>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vslti_wu::<8>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -1262,7 +1563,7 @@ unsafe fn test_lsx_vslti_du() {
     let a = u64x2::new(17196150830761730262, 5893061291971214149);
     let r = i64x2::new(0, 0);
 
-    assert_eq!(r, transmute(lsx_vslti_du::<14>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vslti_du::<14>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -1275,7 +1576,10 @@ unsafe fn test_lsx_vsle_b() {
     );
     let r = i64x2::new(281470681808895, 280375465148415);
 
-    assert_eq!(r, transmute(lsx_vsle_b(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vsle_b(black_box(transmute(a)), black_box(transmute(b))))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -1284,7 +1588,10 @@ unsafe fn test_lsx_vsle_h() {
     let b = i16x8::new(-30602, -9535, 10944, 3343, -1093, 6600, -19453, -4561);
     let r = i64x2::new(281470681743360, -281470681808896);
 
-    assert_eq!(r, transmute(lsx_vsle_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vsle_h(black_box(transmute(a)), black_box(transmute(b))))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -1293,7 +1600,10 @@ unsafe fn test_lsx_vsle_w() {
     let b = i32x4::new(-1810853975, 2021418524, 215198844, 1124361386);
     let r = i64x2::new(-4294967296, -4294967296);
 
-    assert_eq!(r, transmute(lsx_vsle_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vsle_w(black_box(transmute(a)), black_box(transmute(b))))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -1302,7 +1612,10 @@ unsafe fn test_lsx_vsle_d() {
     let b = i64x2::new(71694374951002423, -4307912969104303925);
     let r = i64x2::new(-1, 0);
 
-    assert_eq!(r, transmute(lsx_vsle_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vsle_d(black_box(transmute(a)), black_box(transmute(b))))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -1312,7 +1625,7 @@ unsafe fn test_lsx_vslei_b() {
     );
     let r = i64x2::new(72056494526365440, 280375465082880);
 
-    assert_eq!(r, transmute(lsx_vslei_b::<3>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vslei_b::<3>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -1320,7 +1633,7 @@ unsafe fn test_lsx_vslei_h() {
     let a = i16x8::new(31276, -16628, -30006, -20587, 2104, -30062, 18261, -6449);
     let r = i64x2::new(-65536, -281470681808896);
 
-    assert_eq!(r, transmute(lsx_vslei_h::<-3>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vslei_h::<-3>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -1328,7 +1641,7 @@ unsafe fn test_lsx_vslei_w() {
     let a = i32x4::new(-1890390435, 1289536678, 1490122113, 2120063492);
     let r = i64x2::new(4294967295, 0);
 
-    assert_eq!(r, transmute(lsx_vslei_w::<-16>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vslei_w::<-16>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -1336,7 +1649,7 @@ unsafe fn test_lsx_vslei_d() {
     let a = i64x2::new(-123539898448811963, 8007480165241051883);
     let r = i64x2::new(-1, 0);
 
-    assert_eq!(r, transmute(lsx_vslei_d::<8>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vslei_d::<8>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -1349,7 +1662,13 @@ unsafe fn test_lsx_vsle_bu() {
     );
     let r = i64x2::new(1095216660480, 72057594021150720);
 
-    assert_eq!(r, transmute(lsx_vsle_bu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vsle_bu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -1358,7 +1677,13 @@ unsafe fn test_lsx_vsle_hu() {
     let b = u16x8::new(50529, 35111, 24746, 62465, 21587, 30574, 11054, 11653);
     let r = i64x2::new(-4294967296, 281474976710655);
 
-    assert_eq!(r, transmute(lsx_vsle_hu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vsle_hu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -1367,7 +1692,13 @@ unsafe fn test_lsx_vsle_wu() {
     let b = u32x4::new(1321018603, 1091195011, 3525236625, 4061062671);
     let r = i64x2::new(0, -1);
 
-    assert_eq!(r, transmute(lsx_vsle_wu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vsle_wu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -1376,7 +1707,13 @@ unsafe fn test_lsx_vsle_du() {
     let b = u64x2::new(16044633718831874991, 3531311371811276914);
     let r = i64x2::new(0, 0);
 
-    assert_eq!(r, transmute(lsx_vsle_du(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vsle_du(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -1386,7 +1723,7 @@ unsafe fn test_lsx_vslei_bu() {
     );
     let r = i64x2::new(71776119061217280, 280375465082880);
 
-    assert_eq!(r, transmute(lsx_vslei_bu::<18>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vslei_bu::<18>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -1394,7 +1731,7 @@ unsafe fn test_lsx_vslei_hu() {
     let a = u16x8::new(1430, 10053, 35528, 28458, 2394, 22098, 40236, 20853);
     let r = i64x2::new(0, 0);
 
-    assert_eq!(r, transmute(lsx_vslei_hu::<10>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vslei_hu::<10>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -1402,7 +1739,7 @@ unsafe fn test_lsx_vslei_wu() {
     let a = u32x4::new(3289026584, 3653636092, 2919866047, 2895662832);
     let r = i64x2::new(0, 0);
 
-    assert_eq!(r, transmute(lsx_vslei_wu::<2>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vslei_wu::<2>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -1410,7 +1747,7 @@ unsafe fn test_lsx_vslei_du() {
     let a = u64x2::new(17462377852989253439, 17741928456729041079);
     let r = i64x2::new(0, 0);
 
-    assert_eq!(r, transmute(lsx_vslei_du::<12>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vslei_du::<12>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -1420,7 +1757,7 @@ unsafe fn test_lsx_vsat_b() {
     );
     let r = i64x2::new(-2964542792447819074, 3186937137643144200);
 
-    assert_eq!(r, transmute(lsx_vsat_b::<7>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vsat_b::<7>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -1428,7 +1765,7 @@ unsafe fn test_lsx_vsat_h() {
     let a = i16x8::new(-22234, -8008, -23350, 13768, 26313, -27447, -3569, 6025);
     let r = i64x2::new(576451960371214336, 576451960371152895);
 
-    assert_eq!(r, transmute(lsx_vsat_h::<11>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vsat_h::<11>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -1436,7 +1773,7 @@ unsafe fn test_lsx_vsat_w() {
     let a = i32x4::new(-84179653, 874415975, 1823119516, 1667850968);
     let r = i64x2::new(137438953440, 133143986207);
 
-    assert_eq!(r, transmute(lsx_vsat_w::<5>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vsat_w::<5>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -1444,7 +1781,7 @@ unsafe fn test_lsx_vsat_d() {
     let a = i64x2::new(6859869867233872152, 2514172105675226457);
     let r = i64x2::new(262143, 262143);
 
-    assert_eq!(r, transmute(lsx_vsat_d::<18>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vsat_d::<18>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -1454,7 +1791,7 @@ unsafe fn test_lsx_vsat_bu() {
     );
     let r = i64x2::new(2125538672170008439, 6577605268441825038);
 
-    assert_eq!(r, transmute(lsx_vsat_bu::<6>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vsat_bu::<6>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -1462,7 +1799,7 @@ unsafe fn test_lsx_vsat_hu() {
     let a = u16x8::new(36681, 34219, 6160, 8687, 4544, 20195, 35034, 916);
     let r = i64x2::new(287953294993589247, 257835472485549055);
 
-    assert_eq!(r, transmute(lsx_vsat_hu::<9>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vsat_hu::<9>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -1470,7 +1807,7 @@ unsafe fn test_lsx_vsat_wu() {
     let a = u32x4::new(1758000759, 4138051566, 2705324001, 3927640324);
     let r = i64x2::new(70364449226751, 70364449226751);
 
-    assert_eq!(r, transmute(lsx_vsat_wu::<13>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vsat_wu::<13>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -1478,7 +1815,7 @@ unsafe fn test_lsx_vsat_du() {
     let a = u64x2::new(1953136817312581670, 2606878300382729363);
     let r = i64x2::new(9007199254740991, 9007199254740991);
 
-    assert_eq!(r, transmute(lsx_vsat_du::<52>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vsat_du::<52>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -1491,7 +1828,13 @@ unsafe fn test_lsx_vadda_b() {
     );
     let r = i64x2::new(8248499858970022011, 8535863472581999270);
 
-    assert_eq!(r, transmute(lsx_vadda_b(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vadda_b(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -1500,7 +1843,13 @@ unsafe fn test_lsx_vadda_h() {
     let b = i16x8::new(-21543, 21720, 14529, -19143, -28953, 13450, 8037, 29413);
     let r = i64x2::new(-8646732423142600033, 8924050915627474398);
 
-    assert_eq!(r, transmute(lsx_vadda_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vadda_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -1509,7 +1858,13 @@ unsafe fn test_lsx_vadda_w() {
     let b = i32x4::new(287041349, 249467792, 312776520, 1314435078);
     let r = i64x2::new(8345875378983299469, 6092442344252138029);
 
-    assert_eq!(r, transmute(lsx_vadda_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vadda_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -1518,7 +1873,13 @@ unsafe fn test_lsx_vadda_d() {
     let b = i64x2::new(-4324432602362661920, 6402427893748093984);
     let r = i64x2::new(6071741662385212188, -5328622052402301597);
 
-    assert_eq!(r, transmute(lsx_vadda_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vadda_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -1531,7 +1892,13 @@ unsafe fn test_lsx_vsadd_b() {
     );
     let r = i64x2::new(-3422653801050278697, 1909270979770548186);
 
-    assert_eq!(r, transmute(lsx_vsadd_b(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vsadd_b(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -1540,7 +1907,13 @@ unsafe fn test_lsx_vsadd_h() {
     let b = i16x8::new(26970, 17131, 15547, -7614, -8479, 22338, 3567, -22299);
     let r = i64x2::new(6720170624686097630, -304244782337649222);
 
-    assert_eq!(r, transmute(lsx_vsadd_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vsadd_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -1549,7 +1922,13 @@ unsafe fn test_lsx_vsadd_w() {
     let b = i32x4::new(-1026388582, 222487110, 501504960, -1863994162);
     let r = i64x2::new(-6565289918505943040, -6915373914453178024);
 
-    assert_eq!(r, transmute(lsx_vsadd_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vsadd_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -1558,7 +1937,13 @@ unsafe fn test_lsx_vsadd_d() {
     let b = i64x2::new(-6599608819082608284, -5088169537193133686);
     let r = i64x2::new(-8567396806692999839, -9223372036854775808);
 
-    assert_eq!(r, transmute(lsx_vsadd_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vsadd_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -1571,7 +1956,13 @@ unsafe fn test_lsx_vsadd_bu() {
     );
     let r = i64x2::new(-5404438145481572386, -7318352348905473);
 
-    assert_eq!(r, transmute(lsx_vsadd_bu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vsadd_bu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -1580,7 +1971,13 @@ unsafe fn test_lsx_vsadd_hu() {
     let b = u16x8::new(31219, 59227, 25607, 62798, 18845, 3238, 19902, 24978);
     let r = i64x2::new(-8740258447361, -136834913009665);
 
-    assert_eq!(r, transmute(lsx_vsadd_hu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vsadd_hu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -1589,7 +1986,13 @@ unsafe fn test_lsx_vsadd_wu() {
     let b = u32x4::new(3676524021, 3894343575, 904432536, 1616820031);
     let r = i64x2::new(-1, -7583652642497232897);
 
-    assert_eq!(r, transmute(lsx_vsadd_wu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vsadd_wu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -1598,7 +2001,13 @@ unsafe fn test_lsx_vsadd_du() {
     let b = u64x2::new(11054638512585704882, 3549000132135395099);
     let r = i64x2::new(-3651327027786652925, -623479558932885349);
 
-    assert_eq!(r, transmute(lsx_vsadd_du(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vsadd_du(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -1611,7 +2020,10 @@ unsafe fn test_lsx_vavg_b() {
     );
     let r = i64x2::new(-152206416164856247, 4369276355735447089);
 
-    assert_eq!(r, transmute(lsx_vavg_b(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vavg_b(black_box(transmute(a)), black_box(transmute(b))))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -1620,7 +2032,10 @@ unsafe fn test_lsx_vavg_h() {
     let b = i16x8::new(-3088, -25854, -32552, -8417, 7808, -12495, 22032, -5168);
     let r = i64x2::new(696836182083297626, -4337760619710117321);
 
-    assert_eq!(r, transmute(lsx_vavg_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vavg_h(black_box(transmute(a)), black_box(transmute(b))))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -1629,7 +2044,10 @@ unsafe fn test_lsx_vavg_w() {
     let b = i32x4::new(-324844828, -1580060766, -1909832882, 328273785);
     let r = i64x2::new(475428188150908257, 4521676108535152711);
 
-    assert_eq!(r, transmute(lsx_vavg_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vavg_w(black_box(transmute(a)), black_box(transmute(b))))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -1638,7 +2056,10 @@ unsafe fn test_lsx_vavg_d() {
     let b = i64x2::new(3169904420607189220, 5159962511251707672);
     let r = i64x2::new(2328313764472338215, 5669256157716045974);
 
-    assert_eq!(r, transmute(lsx_vavg_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vavg_d(black_box(transmute(a)), black_box(transmute(b))))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -1651,7 +2072,13 @@ unsafe fn test_lsx_vavg_bu() {
     );
     let r = i64x2::new(-5663745084945885565, 2801126043194071837);
 
-    assert_eq!(r, transmute(lsx_vavg_bu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vavg_bu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -1660,7 +2087,13 @@ unsafe fn test_lsx_vavg_hu() {
     let b = u16x8::new(44835, 36733, 12115, 42874, 4819, 12201, 27397, 25394);
     let r = i64x2::new(-4196978047981735086, -6439149718662907396);
 
-    assert_eq!(r, transmute(lsx_vavg_hu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vavg_hu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -1669,7 +2102,13 @@ unsafe fn test_lsx_vavg_wu() {
     let b = u32x4::new(160886383, 26081142, 459122380, 2523086630);
     let r = i64x2::new(123816739188229069, -5586965600173345916);
 
-    assert_eq!(r, transmute(lsx_vavg_wu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vavg_wu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -1678,7 +2117,13 @@ unsafe fn test_lsx_vavg_du() {
     let b = u64x2::new(9749063966076740681, 5963120178993456389);
     let r = i64x2::new(-7770235857859936532, 7939635441364553211);
 
-    assert_eq!(r, transmute(lsx_vavg_du(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vavg_du(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -1691,7 +2136,13 @@ unsafe fn test_lsx_vavgr_b() {
     );
     let r = i64x2::new(1883712581662731545, -1226681417271426582);
 
-    assert_eq!(r, transmute(lsx_vavgr_b(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vavgr_b(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -1700,7 +2151,13 @@ unsafe fn test_lsx_vavgr_h() {
     let b = i16x8::new(-9758, -8332, 20577, 31066, 31120, 14788, -22323, 16722);
     let r = i64x2::new(3801916629507170613, 3994084079587580569);
 
-    assert_eq!(r, transmute(lsx_vavgr_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vavgr_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -1709,7 +2166,13 @@ unsafe fn test_lsx_vavgr_w() {
     let b = i32x4::new(1278058715, -155858446, -195547847, -750518746);
     let r = i64x2::new(4040594005688324125, -5795079921582298726);
 
-    assert_eq!(r, transmute(lsx_vavgr_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vavgr_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -1718,7 +2181,13 @@ unsafe fn test_lsx_vavgr_d() {
     let b = i64x2::new(8758126674980055299, -7441643514470614533);
     let r = i64x2::new(3399991646978312393, -1904131665097658207);
 
-    assert_eq!(r, transmute(lsx_vavgr_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vavgr_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -1731,7 +2200,13 @@ unsafe fn test_lsx_vavgr_bu() {
     );
     let r = i64x2::new(9122444831751176042, 6010164553039771699);
 
-    assert_eq!(r, transmute(lsx_vavgr_bu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vavgr_bu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -1740,7 +2215,13 @@ unsafe fn test_lsx_vavgr_hu() {
     let b = u16x8::new(26111, 34713, 61420, 23702, 29204, 9543, 62786, 7043);
     let r = i64x2::new(7022187818705851223, 4754859411904311722);
 
-    assert_eq!(r, transmute(lsx_vavgr_hu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vavgr_hu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -1749,7 +2230,13 @@ unsafe fn test_lsx_vavgr_wu() {
     let b = u32x4::new(1930150361, 3668628165, 2983921396, 2410913126);
     let r = i64x2::new(-5401180487351753235, 8140240017388800980);
 
-    assert_eq!(r, transmute(lsx_vavgr_wu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vavgr_wu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -1758,7 +2245,13 @@ unsafe fn test_lsx_vavgr_du() {
     let b = u64x2::new(8650759135311802962, 11380630663742852932);
     let r = i64x2::new(6046550632940509412, 8095423581736830430);
 
-    assert_eq!(r, transmute(lsx_vavgr_du(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vavgr_du(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -1771,7 +2264,13 @@ unsafe fn test_lsx_vssub_b() {
     );
     let r = i64x2::new(628822736562549631, -9187601072510296593);
 
-    assert_eq!(r, transmute(lsx_vssub_b(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vssub_b(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -1780,7 +2279,13 @@ unsafe fn test_lsx_vssub_h() {
     let b = i16x8::new(-26027, 6118, -13204, 25080, 12458, 8441, 24701, 11617);
     let r = i64x2::new(-9223231300041015297, 1942699741282756937);
 
-    assert_eq!(r, transmute(lsx_vssub_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vssub_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -1789,7 +2294,13 @@ unsafe fn test_lsx_vssub_w() {
     let b = i32x4::new(-1808829767, 2144666490, 146236682, 1180114488);
     let r = i64x2::new(-9223372035405031217, -177933965588659662);
 
-    assert_eq!(r, transmute(lsx_vssub_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vssub_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -1798,7 +2309,13 @@ unsafe fn test_lsx_vssub_d() {
     let b = i64x2::new(-2293337525465880409, 5736255249834646932);
     let r = i64x2::new(2921430482628531027, -4208815595153969049);
 
-    assert_eq!(r, transmute(lsx_vssub_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vssub_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -1811,7 +2328,13 @@ unsafe fn test_lsx_vssub_bu() {
     );
     let r = i64x2::new(1441151919413273782, 87960930222283);
 
-    assert_eq!(r, transmute(lsx_vssub_bu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vssub_bu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -1820,7 +2343,13 @@ unsafe fn test_lsx_vssub_hu() {
     let b = u16x8::new(50468, 33060, 15257, 59071, 59343, 21993, 42978, 20097);
     let r = i64x2::new(902801202201243247, -7922957643493867520);
 
-    assert_eq!(r, transmute(lsx_vssub_hu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vssub_hu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -1829,7 +2358,13 @@ unsafe fn test_lsx_vssub_wu() {
     let b = u32x4::new(31483972, 3489479082, 152079374, 1875131600);
     let r = i64x2::new(66202020638834260, 1378022115978010238);
 
-    assert_eq!(r, transmute(lsx_vssub_wu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vssub_wu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -1838,7 +2373,13 @@ unsafe fn test_lsx_vssub_du() {
     let b = u64x2::new(6460869225596371206, 16765308520486969885);
     let r = i64x2::new(8426906920692365065, 0);
 
-    assert_eq!(r, transmute(lsx_vssub_du(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vssub_du(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -1851,7 +2392,13 @@ unsafe fn test_lsx_vabsd_b() {
     );
     let r = i64x2::new(4230359294854509733, 2116586434120326452);
 
-    assert_eq!(r, transmute(lsx_vabsd_b(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vabsd_b(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -1860,7 +2407,13 @@ unsafe fn test_lsx_vabsd_h() {
     let b = i16x8::new(9346, 27961, 21592, 10762, -6831, 17219, 14968, -1750);
     let r = i64x2::new(4018377481144584593, 2994052849949411737);
 
-    assert_eq!(r, transmute(lsx_vabsd_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vabsd_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -1869,7 +2422,13 @@ unsafe fn test_lsx_vabsd_w() {
     let b = i32x4::new(-638463360, -1154268425, 818053243, -1766966029);
     let r = i64x2::new(4346218292750542585, 1613133471209364690);
 
-    assert_eq!(r, transmute(lsx_vabsd_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vabsd_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -1878,7 +2437,13 @@ unsafe fn test_lsx_vabsd_d() {
     let b = i64x2::new(-8533946706796471089, 1165272962517390961);
     let r = i64x2::new(7188249046367538699, 8146605509049538382);
 
-    assert_eq!(r, transmute(lsx_vabsd_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vabsd_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -1891,7 +2456,13 @@ unsafe fn test_lsx_vabsd_bu() {
     );
     let r = i64x2::new(2316568964225934796, 5350198762417854927);
 
-    assert_eq!(r, transmute(lsx_vabsd_bu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vabsd_bu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -1900,7 +2471,13 @@ unsafe fn test_lsx_vabsd_hu() {
     let b = u16x8::new(42102, 40052, 6807, 16289, 29686, 38061, 42843, 26642);
     let r = i64x2::new(-6889746235852116468, 1175584127230950722);
 
-    assert_eq!(r, transmute(lsx_vabsd_hu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vabsd_hu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -1909,7 +2486,13 @@ unsafe fn test_lsx_vabsd_wu() {
     let b = u32x4::new(3008439409, 976530727, 1726048801, 4235308512);
     let r = i64x2::new(-5056055741505581388, 103751774096297765);
 
-    assert_eq!(r, transmute(lsx_vabsd_wu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vabsd_wu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -1918,7 +2501,13 @@ unsafe fn test_lsx_vabsd_du() {
     let b = u64x2::new(305704565845198935, 18327726360649467511);
     let r = i64x2::new(-4540227154002526968, -1590034053554043722);
 
-    assert_eq!(r, transmute(lsx_vabsd_du(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vabsd_du(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -1931,7 +2520,10 @@ unsafe fn test_lsx_vmul_b() {
     );
     let r = i64x2::new(-836412611799730432, -7959044669412588992);
 
-    assert_eq!(r, transmute(lsx_vmul_b(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vmul_b(black_box(transmute(a)), black_box(transmute(b))))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -1940,7 +2532,10 @@ unsafe fn test_lsx_vmul_h() {
     let b = i16x8::new(-18582, -25667, 17674, 8424, -17121, -21798, 28934, -353);
     let r = i64x2::new(-7419436171490628650, 3947512047518358605);
 
-    assert_eq!(r, transmute(lsx_vmul_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vmul_h(black_box(transmute(a)), black_box(transmute(b))))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -1949,7 +2544,10 @@ unsafe fn test_lsx_vmul_w() {
     let b = i32x4::new(1754730718, 782084571, 894216679, -1895747372);
     let r = i64x2::new(6602438528086061106, 4680306660704041039);
 
-    assert_eq!(r, transmute(lsx_vmul_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vmul_w(black_box(transmute(a)), black_box(transmute(b))))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -1958,7 +2556,10 @@ unsafe fn test_lsx_vmul_d() {
     let b = i64x2::new(8096709215426138432, -5454415917204378153);
     let r = i64x2::new(-1062747544199352000, -649255846668983579);
 
-    assert_eq!(r, transmute(lsx_vmul_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vmul_d(black_box(transmute(a)), black_box(transmute(b))))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -1976,7 +2577,11 @@ unsafe fn test_lsx_vmadd_b() {
 
     assert_eq!(
         r,
-        transmute(lsx_vmadd_b(transmute(a), transmute(b), transmute(c)))
+        transmute(lsx_vmadd_b(
+            black_box(transmute(a)),
+            black_box(transmute(b)),
+            black_box(transmute(c))
+        ))
     );
 }
 
@@ -1989,7 +2594,11 @@ unsafe fn test_lsx_vmadd_h() {
 
     assert_eq!(
         r,
-        transmute(lsx_vmadd_h(transmute(a), transmute(b), transmute(c)))
+        transmute(lsx_vmadd_h(
+            black_box(transmute(a)),
+            black_box(transmute(b)),
+            black_box(transmute(c))
+        ))
     );
 }
 
@@ -2002,7 +2611,11 @@ unsafe fn test_lsx_vmadd_w() {
 
     assert_eq!(
         r,
-        transmute(lsx_vmadd_w(transmute(a), transmute(b), transmute(c)))
+        transmute(lsx_vmadd_w(
+            black_box(transmute(a)),
+            black_box(transmute(b)),
+            black_box(transmute(c))
+        ))
     );
 }
 
@@ -2015,7 +2628,11 @@ unsafe fn test_lsx_vmadd_d() {
 
     assert_eq!(
         r,
-        transmute(lsx_vmadd_d(transmute(a), transmute(b), transmute(c)))
+        transmute(lsx_vmadd_d(
+            black_box(transmute(a)),
+            black_box(transmute(b)),
+            black_box(transmute(c))
+        ))
     );
 }
 
@@ -2034,7 +2651,11 @@ unsafe fn test_lsx_vmsub_b() {
 
     assert_eq!(
         r,
-        transmute(lsx_vmsub_b(transmute(a), transmute(b), transmute(c)))
+        transmute(lsx_vmsub_b(
+            black_box(transmute(a)),
+            black_box(transmute(b)),
+            black_box(transmute(c))
+        ))
     );
 }
 
@@ -2047,7 +2668,11 @@ unsafe fn test_lsx_vmsub_h() {
 
     assert_eq!(
         r,
-        transmute(lsx_vmsub_h(transmute(a), transmute(b), transmute(c)))
+        transmute(lsx_vmsub_h(
+            black_box(transmute(a)),
+            black_box(transmute(b)),
+            black_box(transmute(c))
+        ))
     );
 }
 
@@ -2060,7 +2685,11 @@ unsafe fn test_lsx_vmsub_w() {
 
     assert_eq!(
         r,
-        transmute(lsx_vmsub_w(transmute(a), transmute(b), transmute(c)))
+        transmute(lsx_vmsub_w(
+            black_box(transmute(a)),
+            black_box(transmute(b)),
+            black_box(transmute(c))
+        ))
     );
 }
 
@@ -2073,7 +2702,11 @@ unsafe fn test_lsx_vmsub_d() {
 
     assert_eq!(
         r,
-        transmute(lsx_vmsub_d(transmute(a), transmute(b), transmute(c)))
+        transmute(lsx_vmsub_d(
+            black_box(transmute(a)),
+            black_box(transmute(b)),
+            black_box(transmute(c))
+        ))
     );
 }
 
@@ -2087,7 +2720,10 @@ unsafe fn test_lsx_vdiv_b() {
     );
     let r = i64x2::new(720575944674246657, 281475060530176);
 
-    assert_eq!(r, transmute(lsx_vdiv_b(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vdiv_b(black_box(transmute(a)), black_box(transmute(b))))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -2096,7 +2732,10 @@ unsafe fn test_lsx_vdiv_h() {
     let b = i16x8::new(-11221, 24673, 19931, 3799, -3251, -21373, -13758, -31286);
     let r = i64x2::new(-1125904201744385, 281470681743353);
 
-    assert_eq!(r, transmute(lsx_vdiv_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vdiv_h(black_box(transmute(a)), black_box(transmute(b))))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -2105,7 +2744,10 @@ unsafe fn test_lsx_vdiv_w() {
     let b = i32x4::new(-775731190, 1887886939, 1001718213, 1135075421);
     let r = i64x2::new(4294967295, 4294967297);
 
-    assert_eq!(r, transmute(lsx_vdiv_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vdiv_w(black_box(transmute(a)), black_box(transmute(b))))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -2114,7 +2756,10 @@ unsafe fn test_lsx_vdiv_d() {
     let b = i64x2::new(-9175012156877545557, -6390704898809702209);
     let r = i64x2::new(0, 0);
 
-    assert_eq!(r, transmute(lsx_vdiv_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vdiv_d(black_box(transmute(a)), black_box(transmute(b))))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -2127,7 +2772,13 @@ unsafe fn test_lsx_vdiv_bu() {
     );
     let r = i64x2::new(261, 72058702139687425);
 
-    assert_eq!(r, transmute(lsx_vdiv_bu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vdiv_bu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -2136,7 +2787,13 @@ unsafe fn test_lsx_vdiv_hu() {
     let b = u16x8::new(25282, 44917, 13706, 63351, 58837, 46710, 29092, 57823);
     let r = i64x2::new(4294967297, 0);
 
-    assert_eq!(r, transmute(lsx_vdiv_hu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vdiv_hu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -2145,7 +2802,13 @@ unsafe fn test_lsx_vdiv_wu() {
     let b = u32x4::new(1130189258, 1211056894, 2357258312, 3855913706);
     let r = i64x2::new(1, 1);
 
-    assert_eq!(r, transmute(lsx_vdiv_wu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vdiv_wu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -2154,7 +2817,13 @@ unsafe fn test_lsx_vdiv_du() {
     let b = u64x2::new(14945948123666054968, 10864054932328247404);
     let r = i64x2::new(0, 0);
 
-    assert_eq!(r, transmute(lsx_vdiv_du(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vdiv_du(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -2167,7 +2836,13 @@ unsafe fn test_lsx_vhaddw_h_b() {
     );
     let r = i64x2::new(13791943145684950, -562821104926904);
 
-    assert_eq!(r, transmute(lsx_vhaddw_h_b(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vhaddw_h_b(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -2178,7 +2853,13 @@ unsafe fn test_lsx_vhaddw_w_h() {
     );
     let r = i64x2::new(56307021213062, 183021441324639);
 
-    assert_eq!(r, transmute(lsx_vhaddw_w_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vhaddw_w_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -2187,7 +2868,13 @@ unsafe fn test_lsx_vhaddw_d_w() {
     let b = i32x4::new(-1119468785, -1334232049, -1752131604, -2016112631);
     let r = i64x2::new(-2502031305, -1217615295);
 
-    assert_eq!(r, transmute(lsx_vhaddw_d_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vhaddw_d_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -2200,7 +2887,13 @@ unsafe fn test_lsx_vhaddw_hu_bu() {
     );
     let r = i64x2::new(45601115212087520, 21110838012870921);
 
-    assert_eq!(r, transmute(lsx_vhaddw_hu_bu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vhaddw_hu_bu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -2209,7 +2902,13 @@ unsafe fn test_lsx_vhaddw_wu_hu() {
     let b = u16x8::new(40369, 53005, 64424, 35720, 9231, 19965, 20662, 8208);
     let r = i64x2::new(411432097222434, 312888367535410);
 
-    assert_eq!(r, transmute(lsx_vhaddw_wu_hu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vhaddw_wu_hu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -2218,7 +2917,13 @@ unsafe fn test_lsx_vhaddw_du_wu() {
     let b = u32x4::new(728838120, 1267673009, 2659634151, 2264611356);
     let r = i64x2::new(4172122985, 4839922613);
 
-    assert_eq!(r, transmute(lsx_vhaddw_du_wu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vhaddw_du_wu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -2231,7 +2936,13 @@ unsafe fn test_lsx_vhsubw_h_b() {
     );
     let r = i64x2::new(-4503363402989617, -31243430355664844);
 
-    assert_eq!(r, transmute(lsx_vhsubw_h_b(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vhsubw_h_b(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -2240,7 +2951,13 @@ unsafe fn test_lsx_vhsubw_w_h() {
     let b = i16x8::new(-14204, -13312, 8240, -4455, -6362, -4711, -30790, -15773);
     let r = i64x2::new(70059506530916, 60275571046613);
 
-    assert_eq!(r, transmute(lsx_vhsubw_w_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vhsubw_w_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -2249,7 +2966,13 @@ unsafe fn test_lsx_vhsubw_d_w() {
     let b = i32x4::new(-1671723008, 870456702, 264823818, 13322401);
     let r = i64x2::new(-201438605, 449141316);
 
-    assert_eq!(r, transmute(lsx_vhsubw_d_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vhsubw_d_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -2262,7 +2985,13 @@ unsafe fn test_lsx_vhsubw_hu_bu() {
     );
     let r = i64x2::new(-62206416523952172, 42783380429340790);
 
-    assert_eq!(r, transmute(lsx_vhsubw_hu_bu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vhsubw_hu_bu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -2271,7 +3000,13 @@ unsafe fn test_lsx_vhsubw_wu_hu() {
     let b = u16x8::new(5212, 32159, 36502, 59290, 7604, 229, 35511, 47443);
     let r = i64x2::new(24696062008394, -147484881944276);
 
-    assert_eq!(r, transmute(lsx_vhsubw_wu_hu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vhsubw_wu_hu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -2280,7 +3015,13 @@ unsafe fn test_lsx_vhsubw_du_wu() {
     let b = u32x4::new(1383087137, 2403951939, 360532131, 3513614550);
     let r = i64x2::new(-601935499, 31776736);
 
-    assert_eq!(r, transmute(lsx_vhsubw_du_wu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vhsubw_du_wu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -2293,7 +3034,10 @@ unsafe fn test_lsx_vmod_b() {
     );
     let r = i64x2::new(2804691417388804007, -2461515231199824166);
 
-    assert_eq!(r, transmute(lsx_vmod_b(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vmod_b(black_box(transmute(a)), black_box(transmute(b))))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -2302,7 +3046,10 @@ unsafe fn test_lsx_vmod_h() {
     let b = i16x8::new(1550, 9221, -12080, 14553, -24847, 28286, 1074, 192);
     let r = i64x2::new(3930282117007147005, -10982007906888970);
 
-    assert_eq!(r, transmute(lsx_vmod_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vmod_h(black_box(transmute(a)), black_box(transmute(b))))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -2311,7 +3058,10 @@ unsafe fn test_lsx_vmod_w() {
     let b = i32x4::new(344507881, 1692387020, -1397506903, -1257953510);
     let r = i64x2::new(-5027973877095011085, 2553570821342119010);
 
-    assert_eq!(r, transmute(lsx_vmod_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vmod_w(black_box(transmute(a)), black_box(transmute(b))))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -2320,7 +3070,10 @@ unsafe fn test_lsx_vmod_d() {
     let b = i64x2::new(4636642606889723746, -259899475747531088);
     let r = i64x2::new(-1381676014874400835, -257849503742906530);
 
-    assert_eq!(r, transmute(lsx_vmod_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vmod_d(black_box(transmute(a)), black_box(transmute(b))))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -2333,7 +3086,13 @@ unsafe fn test_lsx_vmod_bu() {
     );
     let r = i64x2::new(7287961163701724026, 4745974892933063220);
 
-    assert_eq!(r, transmute(lsx_vmod_bu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vmod_bu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -2342,7 +3101,13 @@ unsafe fn test_lsx_vmod_hu() {
     let b = u16x8::new(15317, 24954, 61354, 3720, 21471, 6193, 8193, 35745);
     let r = i64x2::new(315403234587388856, 7101062794264266609);
 
-    assert_eq!(r, transmute(lsx_vmod_hu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vmod_hu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -2351,7 +3116,13 @@ unsafe fn test_lsx_vmod_wu() {
     let b = u32x4::new(49228057, 2249712923, 358897384, 1782599598);
     let r = i64x2::new(1070413902953059662, 3340025749258890964);
 
-    assert_eq!(r, transmute(lsx_vmod_wu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vmod_wu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -2360,7 +3131,13 @@ unsafe fn test_lsx_vmod_du() {
     let b = u64x2::new(16850073055169051895, 16069565262862467484);
     let r = i64x2::new(7747010922784437137, 20234676239478699);
 
-    assert_eq!(r, transmute(lsx_vmod_du(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vmod_du(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -2370,7 +3147,7 @@ unsafe fn test_lsx_vreplve_b() {
     );
     let r = i64x2::new(-2893606913523066921, -2893606913523066921);
 
-    assert_eq!(r, transmute(lsx_vreplve_b(transmute(a), -8)));
+    assert_eq!(r, transmute(lsx_vreplve_b(black_box(transmute(a)), -8)));
 }
 
 #[simd_test(enable = "lsx")]
@@ -2378,7 +3155,7 @@ unsafe fn test_lsx_vreplve_h() {
     let a = i16x8::new(-29429, -23495, 8705, -7614, -25353, 11887, -25989, -12818);
     let r = i64x2::new(-3607719825936298514, -3607719825936298514);
 
-    assert_eq!(r, transmute(lsx_vreplve_h(transmute(a), 7)));
+    assert_eq!(r, transmute(lsx_vreplve_h(black_box(transmute(a)), 7)));
 }
 
 #[simd_test(enable = "lsx")]
@@ -2386,7 +3163,7 @@ unsafe fn test_lsx_vreplve_w() {
     let a = i32x4::new(1584940676, 95787593, -1655264847, 682404402);
     let r = i64x2::new(411404579393346121, 411404579393346121);
 
-    assert_eq!(r, transmute(lsx_vreplve_w(transmute(a), -3)));
+    assert_eq!(r, transmute(lsx_vreplve_w(black_box(transmute(a)), -3)));
 }
 
 #[simd_test(enable = "lsx")]
@@ -2394,7 +3171,7 @@ unsafe fn test_lsx_vreplve_d() {
     let a = i64x2::new(7614424214598615675, -7096892795239148002);
     let r = i64x2::new(7614424214598615675, 7614424214598615675);
 
-    assert_eq!(r, transmute(lsx_vreplve_d(transmute(a), 0)));
+    assert_eq!(r, transmute(lsx_vreplve_d(black_box(transmute(a)), 0)));
 }
 
 #[simd_test(enable = "lsx")]
@@ -2404,7 +3181,7 @@ unsafe fn test_lsx_vreplvei_b() {
     );
     let r = i64x2::new(-2097865012304223518, -2097865012304223518);
 
-    assert_eq!(r, transmute(lsx_vreplvei_b::<5>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vreplvei_b::<5>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -2412,7 +3189,7 @@ unsafe fn test_lsx_vreplvei_h() {
     let a = i16x8::new(-15455, -4410, 5029, 25863, -23170, 26570, 27423, -834);
     let r = i64x2::new(7719006069021698847, 7719006069021698847);
 
-    assert_eq!(r, transmute(lsx_vreplvei_h::<6>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vreplvei_h::<6>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -2420,7 +3197,7 @@ unsafe fn test_lsx_vreplvei_w() {
     let a = i32x4::new(1843143434, 491125746, -328585251, -1996512058);
     let r = i64x2::new(7916240772710277898, 7916240772710277898);
 
-    assert_eq!(r, transmute(lsx_vreplvei_w::<0>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vreplvei_w::<0>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -2428,7 +3205,7 @@ unsafe fn test_lsx_vreplvei_d() {
     let a = i64x2::new(4333963848299154309, -8310246545782080694);
     let r = i64x2::new(-8310246545782080694, -8310246545782080694);
 
-    assert_eq!(r, transmute(lsx_vreplvei_d::<1>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vreplvei_d::<1>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -2441,7 +3218,13 @@ unsafe fn test_lsx_vpickev_b() {
     );
     let r = i64x2::new(3921750152141124833, -933322373843017127);
 
-    assert_eq!(r, transmute(lsx_vpickev_b(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vpickev_b(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -2450,7 +3233,13 @@ unsafe fn test_lsx_vpickev_h() {
     let b = i16x8::new(-5248, -1786, -21768, 23214, -4223, 23538, -24936, -32316);
     let r = i64x2::new(-7018596679058658432, 139073165196191894);
 
-    assert_eq!(r, transmute(lsx_vpickev_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vpickev_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -2459,7 +3248,13 @@ unsafe fn test_lsx_vpickev_w() {
     let b = i32x4::new(-1187277846, -787064901, -980229113, 1746235326);
     let r = i64x2::new(-4210051979814398998, -769258006856513132);
 
-    assert_eq!(r, transmute(lsx_vpickev_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vpickev_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -2468,7 +3263,13 @@ unsafe fn test_lsx_vpickev_d() {
     let b = i64x2::new(6574352346370076190, -3979792156310826694);
     let r = i64x2::new(6574352346370076190, 1789073368466131160);
 
-    assert_eq!(r, transmute(lsx_vpickev_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vpickev_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -2481,7 +3282,13 @@ unsafe fn test_lsx_vpickod_b() {
     );
     let r = i64x2::new(8220640377280882872, -6083110277645985532);
 
-    assert_eq!(r, transmute(lsx_vpickod_b(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vpickod_b(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -2490,7 +3297,13 @@ unsafe fn test_lsx_vpickod_h() {
     let b = i16x8::new(12047, 25024, -10709, -28077, 24357, 19934, 10289, 28546);
     let r = i64x2::new(8035070303515402688, 6167254016163165900);
 
-    assert_eq!(r, transmute(lsx_vpickod_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vpickod_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -2499,7 +3312,13 @@ unsafe fn test_lsx_vpickod_w() {
     let b = i32x4::new(-99240403, 314407358, 543396756, 1976776696);
     let r = i64x2::new(8490191261129341374, -7045044594236590438);
 
-    assert_eq!(r, transmute(lsx_vpickod_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vpickod_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -2508,7 +3327,13 @@ unsafe fn test_lsx_vpickod_d() {
     let b = i64x2::new(-4197243771252175958, -543692393753629390);
     let r = i64x2::new(-543692393753629390, -7578696032343374601);
 
-    assert_eq!(r, transmute(lsx_vpickod_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vpickod_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -2521,7 +3346,13 @@ unsafe fn test_lsx_vilvh_b() {
     );
     let r = i64x2::new(1211180715666052671, -2634368371891034045);
 
-    assert_eq!(r, transmute(lsx_vilvh_b(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vilvh_b(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -2530,7 +3361,13 @@ unsafe fn test_lsx_vilvh_h() {
     let b = i16x8::new(23768, -31845, 28689, 14757, 9499, 7795, -13573, -10011);
     let r = i64x2::new(-4714953853167983333, 4564918175499275003);
 
-    assert_eq!(r, transmute(lsx_vilvh_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vilvh_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -2539,7 +3376,13 @@ unsafe fn test_lsx_vilvh_w() {
     let b = i32x4::new(-737076987, 38515006, 602108871, -63099569);
     let r = i64x2::new(-5365723764939852857, -1200522227779556017);
 
-    assert_eq!(r, transmute(lsx_vilvh_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vilvh_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -2548,7 +3391,13 @@ unsafe fn test_lsx_vilvh_d() {
     let b = i64x2::new(-2160658667838026389, 1449429407527660400);
     let r = i64x2::new(1449429407527660400, 5375050218784453679);
 
-    assert_eq!(r, transmute(lsx_vilvh_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vilvh_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -2561,7 +3410,13 @@ unsafe fn test_lsx_vilvl_b() {
     );
     let r = i64x2::new(6945744258789947856, 8515979671552484861);
 
-    assert_eq!(r, transmute(lsx_vilvl_b(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vilvl_b(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -2570,7 +3425,13 @@ unsafe fn test_lsx_vilvl_h() {
     let b = i16x8::new(11601, 6788, 3174, -4208, -25999, -25660, -4591, 7133);
     let r = i64x2::new(-6560589601043632815, -2260825085889541018);
 
-    assert_eq!(r, transmute(lsx_vilvl_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vilvl_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -2579,7 +3440,13 @@ unsafe fn test_lsx_vilvl_w() {
     let b = i32x4::new(486029703, 1245981961, 112180197, 1939621508);
     let r = i64x2::new(-4282490222245561977, 7435326725564935433);
 
-    assert_eq!(r, transmute(lsx_vilvl_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vilvl_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -2588,7 +3455,13 @@ unsafe fn test_lsx_vilvl_d() {
     let b = i64x2::new(3142531875873363679, 736682102982019415);
     let r = i64x2::new(3142531875873363679, 7063413230460842607);
 
-    assert_eq!(r, transmute(lsx_vilvl_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vilvl_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -2601,7 +3474,13 @@ unsafe fn test_lsx_vpackev_b() {
     );
     let r = i64x2::new(-1928363389519380677, -1882898104368665381);
 
-    assert_eq!(r, transmute(lsx_vpackev_b(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vpackev_b(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -2610,7 +3489,13 @@ unsafe fn test_lsx_vpackev_h() {
     let b = i16x8::new(-9444, 5210, -14402, 17972, 16606, 2450, 5123, 14727);
     let r = i64x2::new(7533052947329899292, 1461440082551914718);
 
-    assert_eq!(r, transmute(lsx_vpackev_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vpackev_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -2619,7 +3504,13 @@ unsafe fn test_lsx_vpackev_w() {
     let b = i32x4::new(-872903277, 1255047449, -2110158279, 682925573);
     let r = i64x2::new(5636997704425442707, -8345976908349339079);
 
-    assert_eq!(r, transmute(lsx_vpackev_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vpackev_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -2628,7 +3519,13 @@ unsafe fn test_lsx_vpackev_d() {
     let b = i64x2::new(-9119315954224042738, -4563700463464702181);
     let r = i64x2::new(-9119315954224042738, 7118943335298607169);
 
-    assert_eq!(r, transmute(lsx_vpackev_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vpackev_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -2641,7 +3538,13 @@ unsafe fn test_lsx_vpackod_b() {
     );
     let r = i64x2::new(4389351353151377653, -4315624792288929032);
 
-    assert_eq!(r, transmute(lsx_vpackod_b(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vpackod_b(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -2650,7 +3553,13 @@ unsafe fn test_lsx_vpackod_h() {
     let b = i16x8::new(-23247, 17928, -13353, -20146, 5696, 22071, -10728, -30262);
     let r = i64x2::new(-4433598883325590008, -9178747487946648009);
 
-    assert_eq!(r, transmute(lsx_vpackod_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vpackod_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -2659,7 +3568,13 @@ unsafe fn test_lsx_vpackod_w() {
     let b = i32x4::new(445270781, 793617340, -1461557030, -22199234);
     let r = i64x2::new(51238874735551420, 6731566319615689790);
 
-    assert_eq!(r, transmute(lsx_vpackod_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vpackod_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -2668,7 +3583,13 @@ unsafe fn test_lsx_vpackod_d() {
     let b = i64x2::new(9039771682296134623, -6404442538060227683);
     let r = i64x2::new(-6404442538060227683, -4670773907187480618);
 
-    assert_eq!(r, transmute(lsx_vpackod_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vpackod_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -2680,7 +3601,11 @@ unsafe fn test_lsx_vshuf_h() {
 
     assert_eq!(
         r,
-        transmute(lsx_vshuf_h(transmute(a), transmute(b), transmute(c)))
+        transmute(lsx_vshuf_h(
+            black_box(transmute(a)),
+            black_box(transmute(b)),
+            black_box(transmute(c))
+        ))
     );
 }
 
@@ -2693,7 +3618,11 @@ unsafe fn test_lsx_vshuf_w() {
 
     assert_eq!(
         r,
-        transmute(lsx_vshuf_w(transmute(a), transmute(b), transmute(c)))
+        transmute(lsx_vshuf_w(
+            black_box(transmute(a)),
+            black_box(transmute(b)),
+            black_box(transmute(c))
+        ))
     );
 }
 
@@ -2706,7 +3635,11 @@ unsafe fn test_lsx_vshuf_d() {
 
     assert_eq!(
         r,
-        transmute(lsx_vshuf_d(transmute(a), transmute(b), transmute(c)))
+        transmute(lsx_vshuf_d(
+            black_box(transmute(a)),
+            black_box(transmute(b)),
+            black_box(transmute(c))
+        ))
     );
 }
 
@@ -2720,7 +3653,10 @@ unsafe fn test_lsx_vand_v() {
     );
     let r = i64x2::new(244105884219744360, -9223116804091473582);
 
-    assert_eq!(r, transmute(lsx_vand_v(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vand_v(black_box(transmute(a)), black_box(transmute(b))))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -2730,7 +3666,7 @@ unsafe fn test_lsx_vandi_b() {
     );
     let r = i64x2::new(-8135737750142058361, -7666517314596397435);
 
-    assert_eq!(r, transmute(lsx_vandi_b::<159>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vandi_b::<159>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -2743,7 +3679,10 @@ unsafe fn test_lsx_vor_v() {
     );
     let r = i64x2::new(-2351582766212852737, -4924766118269159990);
 
-    assert_eq!(r, transmute(lsx_vor_v(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vor_v(black_box(transmute(a)), black_box(transmute(b))))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -2753,7 +3692,7 @@ unsafe fn test_lsx_vori_b() {
     );
     let r = i64x2::new(-589140355308650538, -3179554720060804109);
 
-    assert_eq!(r, transmute(lsx_vori_b::<210>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vori_b::<210>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -2766,7 +3705,10 @@ unsafe fn test_lsx_vnor_v() {
     );
     let r = i64x2::new(3036560889408918025, 7823034030269427744);
 
-    assert_eq!(r, transmute(lsx_vnor_v(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vnor_v(black_box(transmute(a)), black_box(transmute(b))))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -2776,7 +3718,7 @@ unsafe fn test_lsx_vnori_b() {
     );
     let r = i64x2::new(5227628601268782144, 596802560304890884);
 
-    assert_eq!(r, transmute(lsx_vnori_b::<51>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vnori_b::<51>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -2789,7 +3731,10 @@ unsafe fn test_lsx_vxor_v() {
     );
     let r = i64x2::new(8732028225622312747, 6858262329367852470);
 
-    assert_eq!(r, transmute(lsx_vxor_v(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vxor_v(black_box(transmute(a)), black_box(transmute(b))))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -2799,7 +3744,7 @@ unsafe fn test_lsx_vxori_b() {
     );
     let r = i64x2::new(3478586993001400570, 4687744515358339026);
 
-    assert_eq!(r, transmute(lsx_vxori_b::<225>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vxori_b::<225>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -2817,7 +3762,11 @@ unsafe fn test_lsx_vbitsel_v() {
 
     assert_eq!(
         r,
-        transmute(lsx_vbitsel_v(transmute(a), transmute(b), transmute(c)))
+        transmute(lsx_vbitsel_v(
+            black_box(transmute(a)),
+            black_box(transmute(b)),
+            black_box(transmute(c))
+        ))
     );
 }
 
@@ -2833,7 +3782,10 @@ unsafe fn test_lsx_vbitseli_b() {
 
     assert_eq!(
         r,
-        transmute(lsx_vbitseli_b::<65>(transmute(a), transmute(b)))
+        transmute(lsx_vbitseli_b::<65>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -2844,7 +3796,7 @@ unsafe fn test_lsx_vshuf4i_b() {
     );
     let r = i64x2::new(3937170420478429898, -3347145886530736916);
 
-    assert_eq!(r, transmute(lsx_vshuf4i_b::<234>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vshuf4i_b::<234>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -2852,7 +3804,7 @@ unsafe fn test_lsx_vshuf4i_h() {
     let a = i16x8::new(27707, -1094, -15784, -28387, 31634, -12323, -30387, -11480);
     let r = i64x2::new(-7989953385787032646, -3231104182470389795);
 
-    assert_eq!(r, transmute(lsx_vshuf4i_h::<209>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vshuf4i_h::<209>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -2860,35 +3812,38 @@ unsafe fn test_lsx_vshuf4i_w() {
     let a = i32x4::new(768986805, -1036149600, -1196682940, -214444511);
     let r = i64x2::new(3302773179299516085, -5139714087882845884);
 
-    assert_eq!(r, transmute(lsx_vshuf4i_w::<160>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vshuf4i_w::<160>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
 unsafe fn test_lsx_vreplgr2vr_b() {
     let r = i64x2::new(795741901218843403, 795741901218843403);
 
-    assert_eq!(r, transmute(lsx_vreplgr2vr_b(970839819)));
+    assert_eq!(r, transmute(lsx_vreplgr2vr_b(black_box(970839819))));
 }
 
 #[simd_test(enable = "lsx")]
 unsafe fn test_lsx_vreplgr2vr_h() {
     let r = i64x2::new(-6504141532176800324, -6504141532176800324);
 
-    assert_eq!(r, transmute(lsx_vreplgr2vr_h(93693372)));
+    assert_eq!(r, transmute(lsx_vreplgr2vr_h(black_box(93693372))));
 }
 
 #[simd_test(enable = "lsx")]
 unsafe fn test_lsx_vreplgr2vr_w() {
     let r = i64x2::new(-6737078705572473188, -6737078705572473188);
 
-    assert_eq!(r, transmute(lsx_vreplgr2vr_w(-1568598372)));
+    assert_eq!(r, transmute(lsx_vreplgr2vr_w(black_box(-1568598372))));
 }
 
 #[simd_test(enable = "lsx")]
 unsafe fn test_lsx_vreplgr2vr_d() {
     let r = i64x2::new(5000134708087557572, 5000134708087557572);
 
-    assert_eq!(r, transmute(lsx_vreplgr2vr_d(5000134708087557572)));
+    assert_eq!(
+        r,
+        transmute(lsx_vreplgr2vr_d(black_box(5000134708087557572)))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -2898,7 +3853,7 @@ unsafe fn test_lsx_vpcnt_b() {
     );
     let r = i64x2::new(217867142450840068, 145528077781566722);
 
-    assert_eq!(r, transmute(lsx_vpcnt_b(transmute(a))));
+    assert_eq!(r, transmute(lsx_vpcnt_b(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -2906,7 +3861,7 @@ unsafe fn test_lsx_vpcnt_h() {
     let a = i16x8::new(-512, 10388, -21267, -27094, 1085, -26444, -29360, -11576);
     let r = i64x2::new(1970367786975239, 1970350607237126);
 
-    assert_eq!(r, transmute(lsx_vpcnt_h(transmute(a))));
+    assert_eq!(r, transmute(lsx_vpcnt_h(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -2914,7 +3869,7 @@ unsafe fn test_lsx_vpcnt_w() {
     let a = i32x4::new(1399276601, -2094725994, -100739325, -1239551533);
     let r = i64x2::new(47244640271, 81604378645);
 
-    assert_eq!(r, transmute(lsx_vpcnt_w(transmute(a))));
+    assert_eq!(r, transmute(lsx_vpcnt_w(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -2922,7 +3877,7 @@ unsafe fn test_lsx_vpcnt_d() {
     let a = i64x2::new(-4470823169399930539, 3184270543884128372);
     let r = i64x2::new(29, 25);
 
-    assert_eq!(r, transmute(lsx_vpcnt_d(transmute(a))));
+    assert_eq!(r, transmute(lsx_vpcnt_d(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -2932,7 +3887,7 @@ unsafe fn test_lsx_vclo_b() {
     );
     let r = i64x2::new(72057594071547904, 3311470116864);
 
-    assert_eq!(r, transmute(lsx_vclo_b(transmute(a))));
+    assert_eq!(r, transmute(lsx_vclo_b(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -2940,7 +3895,7 @@ unsafe fn test_lsx_vclo_h() {
     let a = i16x8::new(-5432, 27872, -9150, 27393, 25236, 1028, -21312, -25189);
     let r = i64x2::new(8589934595, 281479271677952);
 
-    assert_eq!(r, transmute(lsx_vclo_h(transmute(a))));
+    assert_eq!(r, transmute(lsx_vclo_h(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -2948,7 +3903,7 @@ unsafe fn test_lsx_vclo_w() {
     let a = i32x4::new(1214322611, -1755838761, -1222326743, -1511364419);
     let r = i64x2::new(4294967296, 4294967297);
 
-    assert_eq!(r, transmute(lsx_vclo_w(transmute(a))));
+    assert_eq!(r, transmute(lsx_vclo_w(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -2956,7 +3911,7 @@ unsafe fn test_lsx_vclo_d() {
     let a = i64x2::new(-249299854527467825, -459308653408461862);
     let r = i64x2::new(6, 5);
 
-    assert_eq!(r, transmute(lsx_vclo_d(transmute(a))));
+    assert_eq!(r, transmute(lsx_vclo_d(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -2966,7 +3921,7 @@ unsafe fn test_lsx_vclz_b() {
     );
     let r = i64x2::new(144116287587483648, 72903118479688195);
 
-    assert_eq!(r, transmute(lsx_vclz_b(transmute(a))));
+    assert_eq!(r, transmute(lsx_vclz_b(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -2974,7 +3929,7 @@ unsafe fn test_lsx_vclz_h() {
     let a = i16x8::new(1222, 32426, 3164, -10763, 10189, -4197, -21841, -28676);
     let r = i64x2::new(17179934725, 2);
 
-    assert_eq!(r, transmute(lsx_vclz_h(transmute(a))));
+    assert_eq!(r, transmute(lsx_vclz_h(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -2982,7 +3937,7 @@ unsafe fn test_lsx_vclz_w() {
     let a = i32x4::new(-490443689, -1039971379, -217310592, -1921086575);
     let r = i64x2::new(0, 0);
 
-    assert_eq!(r, transmute(lsx_vclz_w(transmute(a))));
+    assert_eq!(r, transmute(lsx_vclz_w(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -2990,7 +3945,7 @@ unsafe fn test_lsx_vclz_d() {
     let a = i64x2::new(4630351532137644314, -6587611980764816064);
     let r = i64x2::new(1, 0);
 
-    assert_eq!(r, transmute(lsx_vclz_d(transmute(a))));
+    assert_eq!(r, transmute(lsx_vclz_d(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -3000,7 +3955,10 @@ unsafe fn test_lsx_vpickve2gr_b() {
     );
     let r: i32 = 51;
 
-    assert_eq!(r, transmute(lsx_vpickve2gr_b::<15>(transmute(a))));
+    assert_eq!(
+        r,
+        transmute(lsx_vpickve2gr_b::<15>(black_box(transmute(a))))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -3008,7 +3966,7 @@ unsafe fn test_lsx_vpickve2gr_h() {
     let a = i16x8::new(-12924, 31013, 18171, 20404, 21226, 14128, -6255, 26521);
     let r: i32 = 21226;
 
-    assert_eq!(r, transmute(lsx_vpickve2gr_h::<4>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vpickve2gr_h::<4>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -3016,7 +3974,7 @@ unsafe fn test_lsx_vpickve2gr_w() {
     let a = i32x4::new(-1559379275, 2065542381, -1882161334, 1502157419);
     let r: i32 = -1882161334;
 
-    assert_eq!(r, transmute(lsx_vpickve2gr_w::<2>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vpickve2gr_w::<2>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -3024,7 +3982,7 @@ unsafe fn test_lsx_vpickve2gr_d() {
     let a = i64x2::new(-6941380853339482104, 8405634758774935528);
     let r: i64 = -6941380853339482104;
 
-    assert_eq!(r, transmute(lsx_vpickve2gr_d::<0>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vpickve2gr_d::<0>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -3034,7 +3992,10 @@ unsafe fn test_lsx_vpickve2gr_bu() {
     );
     let r: u32 = 199;
 
-    assert_eq!(r, transmute(lsx_vpickve2gr_bu::<8>(transmute(a))));
+    assert_eq!(
+        r,
+        transmute(lsx_vpickve2gr_bu::<8>(black_box(transmute(a))))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -3042,7 +4003,10 @@ unsafe fn test_lsx_vpickve2gr_hu() {
     let a = i16x8::new(25003, 5139, -12977, 7550, -12177, 19294, -2216, 12693);
     let r: u32 = 25003;
 
-    assert_eq!(r, transmute(lsx_vpickve2gr_hu::<0>(transmute(a))));
+    assert_eq!(
+        r,
+        transmute(lsx_vpickve2gr_hu::<0>(black_box(transmute(a))))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -3050,7 +4014,10 @@ unsafe fn test_lsx_vpickve2gr_wu() {
     let a = i32x4::new(-295894883, 551663550, -710853968, 82692774);
     let r: u32 = 3999072413;
 
-    assert_eq!(r, transmute(lsx_vpickve2gr_wu::<0>(transmute(a))));
+    assert_eq!(
+        r,
+        transmute(lsx_vpickve2gr_wu::<0>(black_box(transmute(a))))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -3058,7 +4025,10 @@ unsafe fn test_lsx_vpickve2gr_du() {
     let a = i64x2::new(748282319555413922, -1352335765832355666);
     let r: u64 = 748282319555413922;
 
-    assert_eq!(r, transmute(lsx_vpickve2gr_du::<0>(transmute(a))));
+    assert_eq!(
+        r,
+        transmute(lsx_vpickve2gr_du::<0>(black_box(transmute(a))))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -3070,7 +4040,7 @@ unsafe fn test_lsx_vinsgr2vr_b() {
 
     assert_eq!(
         r,
-        transmute(lsx_vinsgr2vr_b::<14>(transmute(a), 1333652061))
+        transmute(lsx_vinsgr2vr_b::<14>(black_box(transmute(a)), 1333652061))
     );
 }
 
@@ -3079,7 +4049,10 @@ unsafe fn test_lsx_vinsgr2vr_h() {
     let a = i16x8::new(-20591, 7819, 25287, -11296, 4604, 28833, -1306, 6418);
     let r = i64x2::new(-3179432729573085295, 1806782266980897276);
 
-    assert_eq!(r, transmute(lsx_vinsgr2vr_h::<5>(transmute(a), -987420193)));
+    assert_eq!(
+        r,
+        transmute(lsx_vinsgr2vr_h::<5>(black_box(transmute(a)), -987420193))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -3087,7 +4060,10 @@ unsafe fn test_lsx_vinsgr2vr_w() {
     let a = i32x4::new(1608179655, 886830932, -621638499, 2021214690);
     let r = i64x2::new(3808909851629379527, 8681050995079237782);
 
-    assert_eq!(r, transmute(lsx_vinsgr2vr_w::<2>(transmute(a), -960507754)));
+    assert_eq!(
+        r,
+        transmute(lsx_vinsgr2vr_w::<2>(black_box(transmute(a)), -960507754))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -3095,7 +4071,10 @@ unsafe fn test_lsx_vinsgr2vr_d() {
     let a = i64x2::new(-6562091001143116290, -2425423285843953307);
     let r = i64x2::new(-6562091001143116290, -233659266);
 
-    assert_eq!(r, transmute(lsx_vinsgr2vr_d::<1>(transmute(a), -233659266)));
+    assert_eq!(
+        r,
+        transmute(lsx_vinsgr2vr_d::<1>(black_box(transmute(a)), -233659266))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -3104,7 +4083,13 @@ unsafe fn test_lsx_vfadd_s() {
     let b = u32x4::new(1050272808, 1054022924, 1064036136, 1063113730);
     let r = i64x2::new(4588396142719948771, 4567018621615066847);
 
-    assert_eq!(r, transmute(lsx_vfadd_s(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vfadd_s(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -3113,7 +4098,13 @@ unsafe fn test_lsx_vfadd_d() {
     let b = u64x2::new(4605819027271079334, 4601207158507578498);
     let r = i64x2::new(4608685566198055604, 4608371493448991663);
 
-    assert_eq!(r, transmute(lsx_vfadd_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vfadd_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -3122,7 +4113,13 @@ unsafe fn test_lsx_vfsub_s() {
     let b = u32x4::new(1063475462, 1045836432, 1065150677, 1042376676);
     let r = i64x2::new(4532926601401089072, 4475386505810184670);
 
-    assert_eq!(r, transmute(lsx_vfsub_s(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vfsub_s(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -3131,7 +4128,13 @@ unsafe fn test_lsx_vfsub_d() {
     let b = u64x2::new(4605973926398825814, 4600156145303017004);
     let r = i64x2::new(-4622342180736116526, 4603750919602422881);
 
-    assert_eq!(r, transmute(lsx_vfsub_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vfsub_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -3140,7 +4143,13 @@ unsafe fn test_lsx_vfmul_s() {
     let b = u32x4::new(1065241951, 1044285812, 1050678216, 1009264512);
     let r = i64x2::new(4471727895898079441, 4289440988347233543);
 
-    assert_eq!(r, transmute(lsx_vfmul_s(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vfmul_s(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -3149,7 +4158,13 @@ unsafe fn test_lsx_vfmul_d() {
     let b = u64x2::new(4605208047666947899, 4599634375243914522);
     let r = i64x2::new(4591550625791030606, 4595475933048682142);
 
-    assert_eq!(r, transmute(lsx_vfmul_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vfmul_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -3158,7 +4173,13 @@ unsafe fn test_lsx_vfdiv_s() {
     let b = u32x4::new(1055538538, 1042248668, 1061233585, 1063649172);
     let r = i64x2::new(4613180427594946541, 4523223175100126088);
 
-    assert_eq!(r, transmute(lsx_vfdiv_s(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vfdiv_s(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -3167,7 +4188,13 @@ unsafe fn test_lsx_vfdiv_d() {
     let b = u64x2::new(4606326032528596062, 4601783079746725386);
     let r = i64x2::new(4592460108638699314, 4612120084672695832);
 
-    assert_eq!(r, transmute(lsx_vfdiv_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vfdiv_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -3176,7 +4203,13 @@ unsafe fn test_lsx_vfcvt_h_s() {
     let b = u32x4::new(1049501482, 1043939972, 1042291392, 1041250232);
     let r = i64x2::new(3495410141992989809, 3873441386606634666);
 
-    assert_eq!(r, transmute(lsx_vfcvt_h_s(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vfcvt_h_s(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -3185,7 +4218,13 @@ unsafe fn test_lsx_vfcvt_s_d() {
     let b = u64x2::new(4600251021237488420, 4593890179408150924);
     let r = i64x2::new(4469319308295208818, 4496796258465732597);
 
-    assert_eq!(r, transmute(lsx_vfcvt_s_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vfcvt_s_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -3194,7 +4233,13 @@ unsafe fn test_lsx_vfmin_s() {
     let b = u32x4::new(1060093085, 1026130528, 1057322097, 1057646773);
     let r = i64x2::new(4407197060203522560, 4542558301798153756);
 
-    assert_eq!(r, transmute(lsx_vfmin_s(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vfmin_s(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -3203,7 +4248,13 @@ unsafe fn test_lsx_vfmin_d() {
     let b = u64x2::new(4584808359801648672, 4602712060570539582);
     let r = i64x2::new(4584808359801648672, 4602712060570539582);
 
-    assert_eq!(r, transmute(lsx_vfmin_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vfmin_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -3212,7 +4263,13 @@ unsafe fn test_lsx_vfmina_s() {
     let b = u32x4::new(1049119234, 1058336224, 1057046116, 1029386720);
     let r = i64x2::new(4519411155382848002, 4421182298393539560);
 
-    assert_eq!(r, transmute(lsx_vfmina_s(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vfmina_s(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -3221,7 +4278,13 @@ unsafe fn test_lsx_vfmina_d() {
     let b = u64x2::new(4599088744110071826, 4598732503789588496);
     let r = i64x2::new(4599088744110071826, 4598732503789588496);
 
-    assert_eq!(r, transmute(lsx_vfmina_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vfmina_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -3230,7 +4293,13 @@ unsafe fn test_lsx_vfmax_s() {
     let b = u32x4::new(1042175760, 1040826492, 1059132266, 1050815434);
     let r = i64x2::new(4557520760982391874, 4573984521684325226);
 
-    assert_eq!(r, transmute(lsx_vfmax_s(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vfmax_s(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -3239,7 +4308,13 @@ unsafe fn test_lsx_vfmax_d() {
     let b = u64x2::new(4593616624275112016, 4605244843740986156);
     let r = i64x2::new(4606275407710467505, 4605244843740986156);
 
-    assert_eq!(r, transmute(lsx_vfmax_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vfmax_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -3248,7 +4323,13 @@ unsafe fn test_lsx_vfmaxa_s() {
     let b = u32x4::new(1064739422, 1055122552, 1049654310, 1057411362);
     let r = i64x2::new(4531716855176798814, 4541547219258471462);
 
-    assert_eq!(r, transmute(lsx_vfmaxa_s(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vfmaxa_s(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -3257,7 +4338,13 @@ unsafe fn test_lsx_vfmaxa_d() {
     let b = u64x2::new(4603647289310579471, 4603999027307573908);
     let r = i64x2::new(4603647289310579471, 4606304546706191737);
 
-    assert_eq!(r, transmute(lsx_vfmaxa_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vfmaxa_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -3265,7 +4352,7 @@ unsafe fn test_lsx_vfclass_s() {
     let a = u32x4::new(1059786314, 1058231666, 1061513647, 1038650488);
     let r = i64x2::new(549755814016, 549755814016);
 
-    assert_eq!(r, transmute(lsx_vfclass_s(transmute(a))));
+    assert_eq!(r, transmute(lsx_vfclass_s(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -3273,7 +4360,7 @@ unsafe fn test_lsx_vfclass_d() {
     let a = u64x2::new(4601724705608768104, 4601126152607382566);
     let r = i64x2::new(128, 128);
 
-    assert_eq!(r, transmute(lsx_vfclass_d(transmute(a))));
+    assert_eq!(r, transmute(lsx_vfclass_d(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -3281,7 +4368,7 @@ unsafe fn test_lsx_vfsqrt_s() {
     let a = u32x4::new(1055398716, 1050305974, 995168768, 1064901995);
     let r = i64x2::new(4543169501430832482, 4574681629207255333);
 
-    assert_eq!(r, transmute(lsx_vfsqrt_s(transmute(a))));
+    assert_eq!(r, transmute(lsx_vfsqrt_s(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -3289,7 +4376,7 @@ unsafe fn test_lsx_vfsqrt_d() {
     let a = u64x2::new(4605784293613801157, 4602267946351406890);
     let r = i64x2::new(4606453893731357485, 4604397310232711799);
 
-    assert_eq!(r, transmute(lsx_vfsqrt_d(transmute(a))));
+    assert_eq!(r, transmute(lsx_vfsqrt_d(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -3297,7 +4384,7 @@ unsafe fn test_lsx_vfrecip_s() {
     let a = u32x4::new(1003452672, 1050811504, 1044295808, 1064402913);
     let r = i64x2::new(4632552602764963931, 4577820515916044016);
 
-    assert_eq!(r, transmute(lsx_vfrecip_s(transmute(a))));
+    assert_eq!(r, transmute(lsx_vfrecip_s(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -3305,7 +4392,7 @@ unsafe fn test_lsx_vfrecip_d() {
     let a = u64x2::new(4598634931235673106, 4598630619264835010);
     let r = i64x2::new(4615355353482170689, 4615362460048142095);
 
-    assert_eq!(r, transmute(lsx_vfrecip_d(transmute(a))));
+    assert_eq!(r, transmute(lsx_vfrecip_d(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx,frecipe")]
@@ -3313,7 +4400,7 @@ unsafe fn test_lsx_vfrecipe_s() {
     let a = u32x4::new(1057583779, 1062308847, 1060089100, 1048454688);
     let r = i64x2::new(4583644530211711115, 4647978179615164140);
 
-    assert_eq!(r, transmute(lsx_vfrecipe_s(transmute(a))));
+    assert_eq!(r, transmute(lsx_vfrecipe_s(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx,frecipe")]
@@ -3321,7 +4408,7 @@ unsafe fn test_lsx_vfrecipe_d() {
     let a = u64x2::new(4605515926442181274, 4605369703273365674);
     let r = i64x2::new(4608204937770303488, 4608317161507651584);
 
-    assert_eq!(r, transmute(lsx_vfrecipe_d(transmute(a))));
+    assert_eq!(r, transmute(lsx_vfrecipe_d(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx,frecipe")]
@@ -3329,7 +4416,7 @@ unsafe fn test_lsx_vfrsqrte_s() {
     let a = u32x4::new(1064377488, 1055815904, 1056897740, 1064016656);
     let r = i64x2::new(4592421282989204764, 4577184195020153336);
 
-    assert_eq!(r, transmute(lsx_vfrsqrte_s(transmute(a))));
+    assert_eq!(r, transmute(lsx_vfrsqrte_s(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx,frecipe")]
@@ -3337,7 +4424,7 @@ unsafe fn test_lsx_vfrsqrte_d() {
     let a = u64x2::new(4602766865443628663, 4605323203937791867);
     let r = i64x2::new(4608986772678901760, 4607734355383549952);
 
-    assert_eq!(r, transmute(lsx_vfrsqrte_d(transmute(a))));
+    assert_eq!(r, transmute(lsx_vfrsqrte_d(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -3345,7 +4432,7 @@ unsafe fn test_lsx_vfrint_s() {
     let a = u32x4::new(1062138521, 1056849108, 1034089720, 1038314384);
     let r = i64x2::new(1065353216, 0);
 
-    assert_eq!(r, transmute(lsx_vfrint_s(transmute(a))));
+    assert_eq!(r, transmute(lsx_vfrint_s(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -3353,7 +4440,7 @@ unsafe fn test_lsx_vfrint_d() {
     let a = u64x2::new(4598620052333442366, 4603262362368837514);
     let r = i64x2::new(0, 4607182418800017408);
 
-    assert_eq!(r, transmute(lsx_vfrint_d(transmute(a))));
+    assert_eq!(r, transmute(lsx_vfrint_d(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -3361,7 +4448,7 @@ unsafe fn test_lsx_vfrsqrt_s() {
     let a = u32x4::new(1058614029, 1050504950, 1013814976, 1062355001);
     let r = i64x2::new(4604601921912011494, 4579384257679777264);
 
-    assert_eq!(r, transmute(lsx_vfrsqrt_s(transmute(a))));
+    assert_eq!(r, transmute(lsx_vfrsqrt_s(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -3369,7 +4456,7 @@ unsafe fn test_lsx_vfrsqrt_d() {
     let a = u64x2::new(4602924191185043139, 4606088351077917251);
     let r = i64x2::new(4608881149202581394, 4607483676176768181);
 
-    assert_eq!(r, transmute(lsx_vfrsqrt_d(transmute(a))));
+    assert_eq!(r, transmute(lsx_vfrsqrt_d(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -3377,7 +4464,7 @@ unsafe fn test_lsx_vflogb_s() {
     let a = u32x4::new(1053488512, 1061429282, 1064965594, 1061326585);
     let r = i64x2::new(-4647714812225126400, -4647714812233515008);
 
-    assert_eq!(r, transmute(lsx_vflogb_s(transmute(a))));
+    assert_eq!(r, transmute(lsx_vflogb_s(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -3385,7 +4472,7 @@ unsafe fn test_lsx_vflogb_d() {
     let a = u64x2::new(4589481276789128632, 4599408395082246526);
     let r = i64x2::new(-4607182418800017408, -4611686018427387904);
 
-    assert_eq!(r, transmute(lsx_vflogb_d(transmute(a))));
+    assert_eq!(r, transmute(lsx_vflogb_d(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -3393,7 +4480,7 @@ unsafe fn test_lsx_vfcvth_s_h() {
     let a = i16x8::new(29550, -13884, 689, -1546, 24006, -19112, -12769, 1779);
     let r = i64x2::new(-4707668984349540352, 4097818267320836096);
 
-    assert_eq!(r, transmute(lsx_vfcvth_s_h(transmute(a))));
+    assert_eq!(r, transmute(lsx_vfcvth_s_h(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -3401,7 +4488,7 @@ unsafe fn test_lsx_vfcvth_d_s() {
     let a = u32x4::new(1051543000, 1042275304, 1038283216, 1063876621);
     let r = i64x2::new(4592649323212177408, 4606389677895712768);
 
-    assert_eq!(r, transmute(lsx_vfcvth_d_s(transmute(a))));
+    assert_eq!(r, transmute(lsx_vfcvth_d_s(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -3409,7 +4496,7 @@ unsafe fn test_lsx_vfcvtl_s_h() {
     let a = i16x8::new(-21951, -13772, -17190, 9566, -19227, 9682, 13427, -30861);
     let r = i64x2::new(-4519784435355738112, 4371798972740354048);
 
-    assert_eq!(r, transmute(lsx_vfcvtl_s_h(transmute(a))));
+    assert_eq!(r, transmute(lsx_vfcvtl_s_h(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -3417,7 +4504,7 @@ unsafe fn test_lsx_vfcvtl_d_s() {
     let a = u32x4::new(1059809930, 1051084496, 1062618346, 1058273673);
     let r = i64x2::new(4604206389789720576, 4599521958080544768);
 
-    assert_eq!(r, transmute(lsx_vfcvtl_d_s(transmute(a))));
+    assert_eq!(r, transmute(lsx_vfcvtl_d_s(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -3425,7 +4512,7 @@ unsafe fn test_lsx_vftint_w_s() {
     let a = u32x4::new(1064738153, 1040181800, 1064331056, 1050732566);
     let r = i64x2::new(1, 1);
 
-    assert_eq!(r, transmute(lsx_vftint_w_s(transmute(a))));
+    assert_eq!(r, transmute(lsx_vftint_w_s(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -3433,7 +4520,7 @@ unsafe fn test_lsx_vftint_l_d() {
     let a = u64x2::new(4602244632405616462, 4606437548563176328);
     let r = i64x2::new(0, 1);
 
-    assert_eq!(r, transmute(lsx_vftint_l_d(transmute(a))));
+    assert_eq!(r, transmute(lsx_vftint_l_d(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -3441,7 +4528,7 @@ unsafe fn test_lsx_vftint_wu_s() {
     let a = u32x4::new(1051598962, 1051261298, 1059326008, 1057784192);
     let r = i64x2::new(0, 4294967297);
 
-    assert_eq!(r, transmute(lsx_vftint_wu_s(transmute(a))));
+    assert_eq!(r, transmute(lsx_vftint_wu_s(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -3449,7 +4536,7 @@ unsafe fn test_lsx_vftint_lu_d() {
     let a = u64x2::new(4605561240422589260, 4595241299507769712);
     let r = i64x2::new(1, 0);
 
-    assert_eq!(r, transmute(lsx_vftint_lu_d(transmute(a))));
+    assert_eq!(r, transmute(lsx_vftint_lu_d(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -3457,7 +4544,7 @@ unsafe fn test_lsx_vftintrz_w_s() {
     let a = u32x4::new(1027659872, 1064207676, 1058472873, 1055740014);
     let r = i64x2::new(0, 0);
 
-    assert_eq!(r, transmute(lsx_vftintrz_w_s(transmute(a))));
+    assert_eq!(r, transmute(lsx_vftintrz_w_s(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -3465,7 +4552,7 @@ unsafe fn test_lsx_vftintrz_l_d() {
     let a = u64x2::new(4605051539601556532, 4605129242354661923);
     let r = i64x2::new(0, 0);
 
-    assert_eq!(r, transmute(lsx_vftintrz_l_d(transmute(a))));
+    assert_eq!(r, transmute(lsx_vftintrz_l_d(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -3473,7 +4560,7 @@ unsafe fn test_lsx_vftintrz_wu_s() {
     let a = u32x4::new(1060876751, 1053710034, 1057340881, 1055555596);
     let r = i64x2::new(0, 0);
 
-    assert_eq!(r, transmute(lsx_vftintrz_wu_s(transmute(a))));
+    assert_eq!(r, transmute(lsx_vftintrz_wu_s(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -3481,7 +4568,7 @@ unsafe fn test_lsx_vftintrz_lu_d() {
     let a = u64x2::new(4598711097624940956, 4598268778109474002);
     let r = i64x2::new(0, 0);
 
-    assert_eq!(r, transmute(lsx_vftintrz_lu_d(transmute(a))));
+    assert_eq!(r, transmute(lsx_vftintrz_lu_d(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -3489,7 +4576,7 @@ unsafe fn test_lsx_vffint_s_w() {
     let a = i32x4::new(81337967, 1396520141, 2124859806, 1655115736);
     let r = i64x2::new(5667351778062705614, 5676028806041521555);
 
-    assert_eq!(r, transmute(lsx_vffint_s_w(transmute(a))));
+    assert_eq!(r, transmute(lsx_vffint_s_w(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -3497,7 +4584,7 @@ unsafe fn test_lsx_vffint_d_l() {
     let a = i64x2::new(-1543454772280682525, -7672333112582708041);
     let r = i64x2::new(-4344448119835677720, -4333977527979901593);
 
-    assert_eq!(r, transmute(lsx_vffint_d_l(transmute(a))));
+    assert_eq!(r, transmute(lsx_vffint_d_l(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -3505,7 +4592,7 @@ unsafe fn test_lsx_vffint_s_wu() {
     let a = u32x4::new(2224947834, 194720725, 2248289069, 1131100007);
     let r = i64x2::new(5564675890493038082, 5658445755393114667);
 
-    assert_eq!(r, transmute(lsx_vffint_s_wu(transmute(a))));
+    assert_eq!(r, transmute(lsx_vffint_s_wu(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -3513,7 +4600,7 @@ unsafe fn test_lsx_vffint_d_lu() {
     let a = u64x2::new(11793247389644223387, 1356636411353166515);
     let r = i64x2::new(4892164017273962878, 4878194157796724979);
 
-    assert_eq!(r, transmute(lsx_vffint_d_lu(transmute(a))));
+    assert_eq!(r, transmute(lsx_vffint_d_lu(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -3526,7 +4613,13 @@ unsafe fn test_lsx_vandn_v() {
     );
     let r = i64x2::new(184648152262214664, 2315143230533931624);
 
-    assert_eq!(r, transmute(lsx_vandn_v(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vandn_v(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -3536,7 +4629,7 @@ unsafe fn test_lsx_vneg_b() {
     );
     let r = i64x2::new(-6195839201974406282, 3566844512212398771);
 
-    assert_eq!(r, transmute(lsx_vneg_b(transmute(a))));
+    assert_eq!(r, transmute(lsx_vneg_b(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -3544,7 +4637,7 @@ unsafe fn test_lsx_vneg_h() {
     let a = i16x8::new(-6540, 25893, -2534, 29805, -28719, -16331, -20168, 14650);
     let r = i64x2::new(-8389350794815923828, -4123521786840387537);
 
-    assert_eq!(r, transmute(lsx_vneg_h(transmute(a))));
+    assert_eq!(r, transmute(lsx_vneg_h(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -3552,7 +4645,7 @@ unsafe fn test_lsx_vneg_w() {
     let a = i32x4::new(-927815384, -898911982, 716171852, -2025175544);
     let r = i64x2::new(3860797565600356056, 8698062733717804468);
 
-    assert_eq!(r, transmute(lsx_vneg_w(transmute(a))));
+    assert_eq!(r, transmute(lsx_vneg_w(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -3560,7 +4653,7 @@ unsafe fn test_lsx_vneg_d() {
     let a = i64x2::new(4241851098775470984, 2487122929432859927);
     let r = i64x2::new(-4241851098775470984, -2487122929432859927);
 
-    assert_eq!(r, transmute(lsx_vneg_d(transmute(a))));
+    assert_eq!(r, transmute(lsx_vneg_d(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -3573,7 +4666,10 @@ unsafe fn test_lsx_vmuh_b() {
     );
     let r = i64x2::new(931993372669836524, 2017024359980467698);
 
-    assert_eq!(r, transmute(lsx_vmuh_b(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vmuh_b(black_box(transmute(a)), black_box(transmute(b))))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -3582,7 +4678,10 @@ unsafe fn test_lsx_vmuh_h() {
     let b = i16x8::new(-446, -16863, 19467, -13578, -9673, -26572, -7864, 9855);
     let r = i64x2::new(-1422322400225984462, -842721997477184351);
 
-    assert_eq!(r, transmute(lsx_vmuh_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vmuh_h(black_box(transmute(a)), black_box(transmute(b))))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -3591,7 +4690,10 @@ unsafe fn test_lsx_vmuh_w() {
     let b = i32x4::new(-1684820454, 449222301, 1106076122, 431017950);
     let r = i64x2::new(-950505610786872114, 420439596918869732);
 
-    assert_eq!(r, transmute(lsx_vmuh_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vmuh_w(black_box(transmute(a)), black_box(transmute(b))))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -3600,7 +4702,10 @@ unsafe fn test_lsx_vmuh_d() {
     let b = i64x2::new(-1208434038665242614, -6078343251861677818);
     let r = i64x2::new(-121343209662433286, 284995587689374477);
 
-    assert_eq!(r, transmute(lsx_vmuh_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vmuh_d(black_box(transmute(a)), black_box(transmute(b))))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -3613,7 +4718,13 @@ unsafe fn test_lsx_vmuh_bu() {
     );
     let r = i64x2::new(8725461799780227590, -3369022092985820632);
 
-    assert_eq!(r, transmute(lsx_vmuh_bu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vmuh_bu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -3622,7 +4733,13 @@ unsafe fn test_lsx_vmuh_hu() {
     let b = u16x8::new(14769, 6489, 58866, 5997, 46648, 26325, 42186, 26942);
     let r = i64x2::new(1572068217944938757, 4366267597274655896);
 
-    assert_eq!(r, transmute(lsx_vmuh_hu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vmuh_hu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -3631,7 +4748,13 @@ unsafe fn test_lsx_vmuh_wu() {
     let b = u32x4::new(1981234883, 1290836259, 1284878577, 702668871);
     let r = i64x2::new(4011887256539048298, 960560772888018584);
 
-    assert_eq!(r, transmute(lsx_vmuh_wu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vmuh_wu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -3640,7 +4763,13 @@ unsafe fn test_lsx_vmuh_du() {
     let b = u64x2::new(14805542397189366587, 10025341254588295994);
     let r = i64x2::new(-9132083796568587258, 2493261783600858707);
 
-    assert_eq!(r, transmute(lsx_vmuh_du(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vmuh_du(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -3650,7 +4779,7 @@ unsafe fn test_lsx_vsllwil_h_b() {
     );
     let r = i64x2::new(-990777899147527584, 126109727303143360);
 
-    assert_eq!(r, transmute(lsx_vsllwil_h_b::<5>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vsllwil_h_b::<5>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -3658,7 +4787,7 @@ unsafe fn test_lsx_vsllwil_w_h() {
     let a = i16x8::new(25135, -4241, 25399, -32451, 5597, -16847, 3192, -14694);
     let r = i64x2::new(-9326057613926912, -71360503652913664);
 
-    assert_eq!(r, transmute(lsx_vsllwil_w_h::<9>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vsllwil_w_h::<9>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -3666,7 +4795,7 @@ unsafe fn test_lsx_vsllwil_d_w() {
     let a = i32x4::new(1472328927, -2106442262, 379100488, -607174188);
     let r = i64x2::new(6030659284992, -8627987505152);
 
-    assert_eq!(r, transmute(lsx_vsllwil_d_w::<12>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vsllwil_d_w::<12>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -3676,7 +4805,10 @@ unsafe fn test_lsx_vsllwil_hu_bu() {
     );
     let r = i64x2::new(6953679870551405312, 6809531147446388736);
 
-    assert_eq!(r, transmute(lsx_vsllwil_hu_bu::<7>(transmute(a))));
+    assert_eq!(
+        r,
+        transmute(lsx_vsllwil_hu_bu::<7>(black_box(transmute(a))))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -3684,7 +4816,10 @@ unsafe fn test_lsx_vsllwil_wu_hu() {
     let a = u16x8::new(370, 47410, 29611, 6206, 10390, 34658, 65264, 5264);
     let r = i64x2::new(52127846272954880, 6823569169558272);
 
-    assert_eq!(r, transmute(lsx_vsllwil_wu_hu::<8>(transmute(a))));
+    assert_eq!(
+        r,
+        transmute(lsx_vsllwil_wu_hu::<8>(black_box(transmute(a))))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -3692,7 +4827,10 @@ unsafe fn test_lsx_vsllwil_du_wu() {
     let a = u32x4::new(3249798491, 4098547305, 1101510259, 3478509641);
     let r = i64x2::new(13630642809995264, 17190553355550720);
 
-    assert_eq!(r, transmute(lsx_vsllwil_du_wu::<22>(transmute(a))));
+    assert_eq!(
+        r,
+        transmute(lsx_vsllwil_du_wu::<22>(black_box(transmute(a))))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -3701,7 +4839,13 @@ unsafe fn test_lsx_vsran_b_h() {
     let b = i16x8::new(-12507, -16997, -17826, 5682, -298, -28572, -8117, -13478);
     let r = i64x2::new(-864943573596831881, 0);
 
-    assert_eq!(r, transmute(lsx_vsran_b_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vsran_b_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -3710,7 +4854,13 @@ unsafe fn test_lsx_vsran_h_w() {
     let b = i32x4::new(-52337348, -677553123, -58200260, -1473338606);
     let r = i64x2::new(1267763303694925820, 0);
 
-    assert_eq!(r, transmute(lsx_vsran_h_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vsran_h_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -3719,7 +4869,13 @@ unsafe fn test_lsx_vsran_w_d() {
     let b = i64x2::new(-8585295495893484131, -2657141976436452013);
     let r = i64x2::new(-5882350952887806270, 0);
 
-    assert_eq!(r, transmute(lsx_vsran_w_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vsran_w_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -3728,7 +4884,13 @@ unsafe fn test_lsx_vssran_b_h() {
     let b = i16x8::new(9459, 15241, 22170, 28027, 5348, 14784, 22613, -9469);
     let r = i64x2::new(9187483431610086528, 0);
 
-    assert_eq!(r, transmute(lsx_vssran_b_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vssran_b_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -3737,7 +4899,13 @@ unsafe fn test_lsx_vssran_h_w() {
     let b = i32x4::new(2070726003, -944816867, -160621862, -1222036466);
     let r = i64x2::new(-5219109151313101350, 0);
 
-    assert_eq!(r, transmute(lsx_vssran_h_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vssran_h_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -3746,7 +4914,13 @@ unsafe fn test_lsx_vssran_w_d() {
     let b = i64x2::new(-7078666005882550400, -2564990402652718339);
     let r = i64x2::new(-15032385536, 0);
 
-    assert_eq!(r, transmute(lsx_vssran_w_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vssran_w_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -3755,7 +4929,13 @@ unsafe fn test_lsx_vssran_bu_h() {
     let b = u16x8::new(2372, 26267, 4722, 47876, 44857, 55242, 45998, 51450);
     let r = i64x2::new(47227865344, 0);
 
-    assert_eq!(r, transmute(lsx_vssran_bu_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vssran_bu_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -3764,7 +4944,13 @@ unsafe fn test_lsx_vssran_hu_w() {
     let b = u32x4::new(2085279153, 2679576985, 2935643238, 3797496208);
     let r = i64x2::new(281470684234479, 0);
 
-    assert_eq!(r, transmute(lsx_vssran_hu_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vssran_hu_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -3773,7 +4959,13 @@ unsafe fn test_lsx_vssran_wu_d() {
     let b = u64x2::new(3904652404244024971, 4230656884168675704);
     let r = i64x2::new(536870912000, 0);
 
-    assert_eq!(r, transmute(lsx_vssran_wu_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vssran_wu_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -3782,7 +4974,13 @@ unsafe fn test_lsx_vsrarn_b_h() {
     let b = i16x8::new(-19071, -903, 11542, -25909, 24111, 14882, -27192, -8283);
     let r = i64x2::new(7076043428318610384, 0);
 
-    assert_eq!(r, transmute(lsx_vsrarn_b_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vsrarn_b_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -3791,7 +4989,13 @@ unsafe fn test_lsx_vsrarn_h_w() {
     let b = i32x4::new(-1571698573, 1467958613, -1857488008, 424713310);
     let r = i64x2::new(498163119212, 0);
 
-    assert_eq!(r, transmute(lsx_vsrarn_h_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vsrarn_h_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -3800,7 +5004,13 @@ unsafe fn test_lsx_vsrarn_w_d() {
     let b = i64x2::new(-8645668865455529235, -3129277582817496880);
     let r = i64x2::new(-8628090759335017621, 0);
 
-    assert_eq!(r, transmute(lsx_vsrarn_w_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vsrarn_w_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -3809,7 +5019,13 @@ unsafe fn test_lsx_vssrarn_b_h() {
     let b = i16x8::new(24298, 2343, 24641, 20910, 3142, -1171, 25850, 15932);
     let r = i64x2::new(-148338468081139694, 0);
 
-    assert_eq!(r, transmute(lsx_vssrarn_b_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vssrarn_b_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -3818,7 +5034,13 @@ unsafe fn test_lsx_vssrarn_h_w() {
     let b = i32x4::new(1911424854, -931292983, -1710824608, -1179580317);
     let r = i64x2::new(-9223231301513904204, 0);
 
-    assert_eq!(r, transmute(lsx_vssrarn_h_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vssrarn_h_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -3827,7 +5049,13 @@ unsafe fn test_lsx_vssrarn_w_d() {
     let b = i64x2::new(2843689038926761304, -6830262024912907383);
     let r = i64x2::new(-9223372034707292161, 0);
 
-    assert_eq!(r, transmute(lsx_vssrarn_w_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vssrarn_w_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -3836,7 +5064,13 @@ unsafe fn test_lsx_vssrarn_bu_h() {
     let b = u16x8::new(60210, 40155, 14296, 25577, 1550, 1674, 5330, 10645);
     let r = i64x2::new(10999415373897, 0);
 
-    assert_eq!(r, transmute(lsx_vssrarn_bu_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vssrarn_bu_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -3845,7 +5079,13 @@ unsafe fn test_lsx_vssrarn_hu_w() {
     let b = u32x4::new(3570029841, 3229468238, 1070101998, 3159433736);
     let r = i64x2::new(281474976645120, 0);
 
-    assert_eq!(r, transmute(lsx_vssrarn_hu_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vssrarn_hu_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -3854,7 +5094,13 @@ unsafe fn test_lsx_vssrarn_wu_d() {
     let b = u64x2::new(1112771813772164907, 646071836375127186);
     let r = i64x2::new(963446, 0);
 
-    assert_eq!(r, transmute(lsx_vssrarn_wu_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vssrarn_wu_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -3863,7 +5109,13 @@ unsafe fn test_lsx_vsrln_b_h() {
     let b = i16x8::new(-11667, 13077, -23656, 5150, -23771, -31329, 20729, 15169);
     let r = i64x2::new(23363148983015937, 0);
 
-    assert_eq!(r, transmute(lsx_vsrln_b_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vsrln_b_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -3872,7 +5124,13 @@ unsafe fn test_lsx_vsrln_h_w() {
     let b = i32x4::new(1775989751, -1602688801, -801213995, -1801759515);
     let r = i64x2::new(-7033214568759295968, 0);
 
-    assert_eq!(r, transmute(lsx_vsrln_h_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vsrln_h_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -3881,7 +5139,13 @@ unsafe fn test_lsx_vsrln_w_d() {
     let b = i64x2::new(-1428152872702150626, 3907864416256094744);
     let r = i64x2::new(-8718771486483115547, 0);
 
-    assert_eq!(r, transmute(lsx_vsrln_w_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vsrln_w_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -3890,7 +5154,13 @@ unsafe fn test_lsx_vssrln_bu_h() {
     let b = u16x8::new(41072, 41125, 44619, 49581, 20733, 905, 47558, 7801);
     let r = i64x2::new(8862857593125412863, 0);
 
-    assert_eq!(r, transmute(lsx_vssrln_bu_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vssrln_bu_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -3899,7 +5169,13 @@ unsafe fn test_lsx_vssrln_hu_w() {
     let b = u32x4::new(1325069171, 1380839173, 3495604120, 2839043866);
     let r = i64x2::new(16889194387279379, 0);
 
-    assert_eq!(r, transmute(lsx_vssrln_hu_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vssrln_hu_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -3908,7 +5184,13 @@ unsafe fn test_lsx_vssrln_wu_d() {
     let b = u64x2::new(3908262745817581251, 17131627096934512209);
     let r = i64x2::new(-1, 0);
 
-    assert_eq!(r, transmute(lsx_vssrln_wu_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vssrln_wu_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -3917,7 +5199,13 @@ unsafe fn test_lsx_vsrlrn_b_h() {
     let b = i16x8::new(22830, -27866, -24616, -9547, 11336, 320, 19908, 7056);
     let r = i64x2::new(-4888418841542521598, 0);
 
-    assert_eq!(r, transmute(lsx_vsrlrn_b_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vsrlrn_b_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -3926,7 +5214,13 @@ unsafe fn test_lsx_vsrlrn_h_w() {
     let b = i32x4::new(1387862348, 119424523, 185407104, 1890720739);
     let r = i64x2::new(2222313691660711041, 0);
 
-    assert_eq!(r, transmute(lsx_vsrlrn_h_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vsrlrn_h_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -3935,7 +5229,13 @@ unsafe fn test_lsx_vsrlrn_w_d() {
     let b = i64x2::new(-8550351213501194562, 7071641301481388656);
     let r = i64x2::new(182866822561795, 0);
 
-    assert_eq!(r, transmute(lsx_vsrlrn_w_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vsrlrn_w_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -3944,7 +5244,13 @@ unsafe fn test_lsx_vssrlrn_bu_h() {
     let b = u16x8::new(51122, 39148, 45511, 57479, 62603, 43668, 5537, 61004);
     let r = i64x2::new(432344477600776959, 0);
 
-    assert_eq!(r, transmute(lsx_vssrlrn_bu_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vssrlrn_bu_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -3953,7 +5259,13 @@ unsafe fn test_lsx_vssrlrn_hu_w() {
     let b = u32x4::new(1618795892, 3678356443, 862445734, 2115250342);
     let r = i64x2::new(-4293983341, 0);
 
-    assert_eq!(r, transmute(lsx_vssrlrn_hu_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vssrlrn_hu_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -3962,7 +5274,13 @@ unsafe fn test_lsx_vssrlrn_wu_d() {
     let b = u64x2::new(13406765083608623828, 7214649593148131096);
     let r = i64x2::new(-1, 0);
 
-    assert_eq!(r, transmute(lsx_vssrlrn_wu_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vssrlrn_wu_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -3977,7 +5295,10 @@ unsafe fn test_lsx_vfrstpi_b() {
 
     assert_eq!(
         r,
-        transmute(lsx_vfrstpi_b::<28>(transmute(a), transmute(b)))
+        transmute(lsx_vfrstpi_b::<28>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -3987,7 +5308,13 @@ unsafe fn test_lsx_vfrstpi_h() {
     let b = i16x8::new(9590, -8044, 15088, 4172, 1721, 27581, -19895, -25679);
     let r = i64x2::new(-4160352588467724069, 5959935604366651239);
 
-    assert_eq!(r, transmute(lsx_vfrstpi_h::<1>(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vfrstpi_h::<1>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -4005,7 +5332,11 @@ unsafe fn test_lsx_vfrstp_b() {
 
     assert_eq!(
         r,
-        transmute(lsx_vfrstp_b(transmute(a), transmute(b), transmute(c)))
+        transmute(lsx_vfrstp_b(
+            black_box(transmute(a)),
+            black_box(transmute(b)),
+            black_box(transmute(c))
+        ))
     );
 }
 
@@ -4018,7 +5349,11 @@ unsafe fn test_lsx_vfrstp_h() {
 
     assert_eq!(
         r,
-        transmute(lsx_vfrstp_h(transmute(a), transmute(b), transmute(c)))
+        transmute(lsx_vfrstp_h(
+            black_box(transmute(a)),
+            black_box(transmute(b)),
+            black_box(transmute(c))
+        ))
     );
 }
 
@@ -4030,7 +5365,10 @@ unsafe fn test_lsx_vshuf4i_d() {
 
     assert_eq!(
         r,
-        transmute(lsx_vshuf4i_d::<153>(transmute(a), transmute(b)))
+        transmute(lsx_vshuf4i_d::<153>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -4041,7 +5379,7 @@ unsafe fn test_lsx_vbsrl_v() {
     );
     let r = i64x2::new(4570595419764160432, 56);
 
-    assert_eq!(r, transmute(lsx_vbsrl_v::<7>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vbsrl_v::<7>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -4051,7 +5389,7 @@ unsafe fn test_lsx_vbsll_v() {
     );
     let r = i64x2::new(0, -1801439850948198400);
 
-    assert_eq!(r, transmute(lsx_vbsll_v::<15>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vbsll_v::<15>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -4066,7 +5404,10 @@ unsafe fn test_lsx_vextrins_b() {
 
     assert_eq!(
         r,
-        transmute(lsx_vextrins_b::<21>(transmute(a), transmute(b)))
+        transmute(lsx_vextrins_b::<21>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -4078,7 +5419,10 @@ unsafe fn test_lsx_vextrins_h() {
 
     assert_eq!(
         r,
-        transmute(lsx_vextrins_h::<33>(transmute(a), transmute(b)))
+        transmute(lsx_vextrins_h::<33>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -4090,7 +5434,10 @@ unsafe fn test_lsx_vextrins_w() {
 
     assert_eq!(
         r,
-        transmute(lsx_vextrins_w::<57>(transmute(a), transmute(b)))
+        transmute(lsx_vextrins_w::<57>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -4102,7 +5449,10 @@ unsafe fn test_lsx_vextrins_d() {
 
     assert_eq!(
         r,
-        transmute(lsx_vextrins_d::<62>(transmute(a), transmute(b)))
+        transmute(lsx_vextrins_d::<62>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -4113,7 +5463,7 @@ unsafe fn test_lsx_vmskltz_b() {
     );
     let r = i64x2::new(40038, 0);
 
-    assert_eq!(r, transmute(lsx_vmskltz_b(transmute(a))));
+    assert_eq!(r, transmute(lsx_vmskltz_b(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -4121,7 +5471,7 @@ unsafe fn test_lsx_vmskltz_h() {
     let a = i16x8::new(16730, 29121, -23447, -8647, -22303, 21817, 30964, -27069);
     let r = i64x2::new(156, 0);
 
-    assert_eq!(r, transmute(lsx_vmskltz_h(transmute(a))));
+    assert_eq!(r, transmute(lsx_vmskltz_h(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -4129,7 +5479,7 @@ unsafe fn test_lsx_vmskltz_w() {
     let a = i32x4::new(-657282776, -1247210048, 162595942, 949871015);
     let r = i64x2::new(3, 0);
 
-    assert_eq!(r, transmute(lsx_vmskltz_w(transmute(a))));
+    assert_eq!(r, transmute(lsx_vmskltz_w(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -4137,7 +5487,7 @@ unsafe fn test_lsx_vmskltz_d() {
     let a = i64x2::new(7728638770319849738, 4250984610820351699);
     let r = i64x2::new(0, 0);
 
-    assert_eq!(r, transmute(lsx_vmskltz_d(transmute(a))));
+    assert_eq!(r, transmute(lsx_vmskltz_d(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -4150,7 +5500,13 @@ unsafe fn test_lsx_vsigncov_b() {
     );
     let r = i64x2::new(-9074694153930972472, 1986788453588057010);
 
-    assert_eq!(r, transmute(lsx_vsigncov_b(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vsigncov_b(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -4159,7 +5515,13 @@ unsafe fn test_lsx_vsigncov_h() {
     let b = i16x8::new(27367, 4727, -2962, 14937, 26207, -19075, -26630, 10708);
     let r = i64x2::new(-4204122973533661927, -3013866947575178847);
 
-    assert_eq!(r, transmute(lsx_vsigncov_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vsigncov_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -4168,7 +5530,13 @@ unsafe fn test_lsx_vsigncov_w() {
     let b = i32x4::new(-1719915889, 290419288, 202835952, -1715336967);
     let r = i64x2::new(-1247341342367689359, -7367316170792699888);
 
-    assert_eq!(r, transmute(lsx_vsigncov_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vsigncov_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -4177,7 +5545,13 @@ unsafe fn test_lsx_vsigncov_d() {
     let b = i64x2::new(-7146260093067324952, -4263419240070336957);
     let r = i64x2::new(-7146260093067324952, 4263419240070336957);
 
-    assert_eq!(r, transmute(lsx_vsigncov_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vsigncov_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -4189,7 +5563,11 @@ unsafe fn test_lsx_vfmadd_s() {
 
     assert_eq!(
         r,
-        transmute(lsx_vfmadd_s(transmute(a), transmute(b), transmute(c)))
+        transmute(lsx_vfmadd_s(
+            black_box(transmute(a)),
+            black_box(transmute(b)),
+            black_box(transmute(c))
+        ))
     );
 }
 
@@ -4202,7 +5580,11 @@ unsafe fn test_lsx_vfmadd_d() {
 
     assert_eq!(
         r,
-        transmute(lsx_vfmadd_d(transmute(a), transmute(b), transmute(c)))
+        transmute(lsx_vfmadd_d(
+            black_box(transmute(a)),
+            black_box(transmute(b)),
+            black_box(transmute(c))
+        ))
     );
 }
 
@@ -4215,7 +5597,11 @@ unsafe fn test_lsx_vfmsub_s() {
 
     assert_eq!(
         r,
-        transmute(lsx_vfmsub_s(transmute(a), transmute(b), transmute(c)))
+        transmute(lsx_vfmsub_s(
+            black_box(transmute(a)),
+            black_box(transmute(b)),
+            black_box(transmute(c))
+        ))
     );
 }
 
@@ -4228,7 +5614,11 @@ unsafe fn test_lsx_vfmsub_d() {
 
     assert_eq!(
         r,
-        transmute(lsx_vfmsub_d(transmute(a), transmute(b), transmute(c)))
+        transmute(lsx_vfmsub_d(
+            black_box(transmute(a)),
+            black_box(transmute(b)),
+            black_box(transmute(c))
+        ))
     );
 }
 
@@ -4241,7 +5631,11 @@ unsafe fn test_lsx_vfnmadd_s() {
 
     assert_eq!(
         r,
-        transmute(lsx_vfnmadd_s(transmute(a), transmute(b), transmute(c)))
+        transmute(lsx_vfnmadd_s(
+            black_box(transmute(a)),
+            black_box(transmute(b)),
+            black_box(transmute(c))
+        ))
     );
 }
 
@@ -4254,7 +5648,11 @@ unsafe fn test_lsx_vfnmadd_d() {
 
     assert_eq!(
         r,
-        transmute(lsx_vfnmadd_d(transmute(a), transmute(b), transmute(c)))
+        transmute(lsx_vfnmadd_d(
+            black_box(transmute(a)),
+            black_box(transmute(b)),
+            black_box(transmute(c))
+        ))
     );
 }
 
@@ -4267,7 +5665,11 @@ unsafe fn test_lsx_vfnmsub_s() {
 
     assert_eq!(
         r,
-        transmute(lsx_vfnmsub_s(transmute(a), transmute(b), transmute(c)))
+        transmute(lsx_vfnmsub_s(
+            black_box(transmute(a)),
+            black_box(transmute(b)),
+            black_box(transmute(c))
+        ))
     );
 }
 
@@ -4280,7 +5682,11 @@ unsafe fn test_lsx_vfnmsub_d() {
 
     assert_eq!(
         r,
-        transmute(lsx_vfnmsub_d(transmute(a), transmute(b), transmute(c)))
+        transmute(lsx_vfnmsub_d(
+            black_box(transmute(a)),
+            black_box(transmute(b)),
+            black_box(transmute(c))
+        ))
     );
 }
 
@@ -4289,7 +5695,7 @@ unsafe fn test_lsx_vftintrne_w_s() {
     let a = u32x4::new(1031214064, 1059673230, 1042813024, 1053602874);
     let r = i64x2::new(4294967296, 0);
 
-    assert_eq!(r, transmute(lsx_vftintrne_w_s(transmute(a))));
+    assert_eq!(r, transmute(lsx_vftintrne_w_s(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -4297,7 +5703,7 @@ unsafe fn test_lsx_vftintrne_l_d() {
     let a = u64x2::new(4606989588359571497, 4604713245380178790);
     let r = i64x2::new(1, 1);
 
-    assert_eq!(r, transmute(lsx_vftintrne_l_d(transmute(a))));
+    assert_eq!(r, transmute(lsx_vftintrne_l_d(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -4305,7 +5711,7 @@ unsafe fn test_lsx_vftintrp_w_s() {
     let a = u32x4::new(1061716225, 1050491008, 1064711040, 1065018777);
     let r = i64x2::new(4294967297, 4294967297);
 
-    assert_eq!(r, transmute(lsx_vftintrp_w_s(transmute(a))));
+    assert_eq!(r, transmute(lsx_vftintrp_w_s(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -4313,7 +5719,7 @@ unsafe fn test_lsx_vftintrp_l_d() {
     let a = u64x2::new(4587516915944025472, 4601504548481216392);
     let r = i64x2::new(1, 1);
 
-    assert_eq!(r, transmute(lsx_vftintrp_l_d(transmute(a))));
+    assert_eq!(r, transmute(lsx_vftintrp_l_d(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -4321,7 +5727,7 @@ unsafe fn test_lsx_vftintrm_w_s() {
     let a = u32x4::new(1045772456, 1065200707, 1061587478, 1035467272);
     let r = i64x2::new(0, 0);
 
-    assert_eq!(r, transmute(lsx_vftintrm_w_s(transmute(a))));
+    assert_eq!(r, transmute(lsx_vftintrm_w_s(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -4329,7 +5735,7 @@ unsafe fn test_lsx_vftintrm_l_d() {
     let a = u64x2::new(4597123259408216804, 4594399417822716772);
     let r = i64x2::new(0, 0);
 
-    assert_eq!(r, transmute(lsx_vftintrm_l_d(transmute(a))));
+    assert_eq!(r, transmute(lsx_vftintrm_l_d(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -4338,7 +5744,13 @@ unsafe fn test_lsx_vftint_w_d() {
     let b = u64x2::new(4606905060326467647, 4606985586417166381);
     let r = i64x2::new(4294967297, 0);
 
-    assert_eq!(r, transmute(lsx_vftint_w_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vftint_w_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -4347,7 +5759,13 @@ unsafe fn test_lsx_vffint_s_l() {
     let b = i64x2::new(5814449889729512723, -111756032377486319);
     let r = i64x2::new(-2610252963668467161, 6669016150524087533);
 
-    assert_eq!(r, transmute(lsx_vffint_s_l(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vffint_s_l(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -4356,7 +5774,13 @@ unsafe fn test_lsx_vftintrz_w_d() {
     let b = u64x2::new(4599106720144900270, 4600531579473237336);
     let r = i64x2::new(0, 0);
 
-    assert_eq!(r, transmute(lsx_vftintrz_w_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vftintrz_w_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -4365,7 +5789,13 @@ unsafe fn test_lsx_vftintrp_w_d() {
     let b = u64x2::new(4606104970322966899, 4595679410565085836);
     let r = i64x2::new(4294967297, 4294967297);
 
-    assert_eq!(r, transmute(lsx_vftintrp_w_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vftintrp_w_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -4374,7 +5804,13 @@ unsafe fn test_lsx_vftintrm_w_d() {
     let b = u64x2::new(4606733822200032543, 4589510164179968984);
     let r = i64x2::new(0, 0);
 
-    assert_eq!(r, transmute(lsx_vftintrm_w_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vftintrm_w_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -4383,7 +5819,13 @@ unsafe fn test_lsx_vftintrne_w_d() {
     let b = u64x2::new(4599197176714081204, 4605745859931721980);
     let r = i64x2::new(4294967296, 0);
 
-    assert_eq!(r, transmute(lsx_vftintrne_w_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vftintrne_w_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -4391,7 +5833,7 @@ unsafe fn test_lsx_vftintl_l_s() {
     let a = u32x4::new(1058856635, 1060563398, 1061422616, 1056124918);
     let r = i64x2::new(1, 1);
 
-    assert_eq!(r, transmute(lsx_vftintl_l_s(transmute(a))));
+    assert_eq!(r, transmute(lsx_vftintl_l_s(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -4399,7 +5841,7 @@ unsafe fn test_lsx_vftinth_l_s() {
     let a = u32x4::new(1045383680, 1040752748, 1061879518, 1054801708);
     let r = i64x2::new(1, 0);
 
-    assert_eq!(r, transmute(lsx_vftinth_l_s(transmute(a))));
+    assert_eq!(r, transmute(lsx_vftinth_l_s(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -4407,7 +5849,7 @@ unsafe fn test_lsx_vffinth_d_w() {
     let a = i32x4::new(517100418, -188510766, 949226647, -87467194);
     let r = i64x2::new(4741245898611228672, -4497729803343888384);
 
-    assert_eq!(r, transmute(lsx_vffinth_d_w(transmute(a))));
+    assert_eq!(r, transmute(lsx_vffinth_d_w(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -4415,7 +5857,7 @@ unsafe fn test_lsx_vffintl_d_w() {
     let a = i32x4::new(1273684401, -2137528906, -2109294912, -1646387998);
     let r = i64x2::new(4743129027571613696, -4476619782820462592);
 
-    assert_eq!(r, transmute(lsx_vffintl_d_w(transmute(a))));
+    assert_eq!(r, transmute(lsx_vffintl_d_w(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -4423,7 +5865,7 @@ unsafe fn test_lsx_vftintrzl_l_s() {
     let a = u32x4::new(1031186688, 987838976, 1034565688, 1061017371);
     let r = i64x2::new(0, 0);
 
-    assert_eq!(r, transmute(lsx_vftintrzl_l_s(transmute(a))));
+    assert_eq!(r, transmute(lsx_vftintrzl_l_s(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -4431,7 +5873,7 @@ unsafe fn test_lsx_vftintrzh_l_s() {
     let a = u32x4::new(1049433828, 1048953580, 1060964637, 1059899586);
     let r = i64x2::new(0, 0);
 
-    assert_eq!(r, transmute(lsx_vftintrzh_l_s(transmute(a))));
+    assert_eq!(r, transmute(lsx_vftintrzh_l_s(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -4439,7 +5881,7 @@ unsafe fn test_lsx_vftintrpl_l_s() {
     let a = u32x4::new(1061834803, 1064858941, 1060475110, 1063896216);
     let r = i64x2::new(1, 1);
 
-    assert_eq!(r, transmute(lsx_vftintrpl_l_s(transmute(a))));
+    assert_eq!(r, transmute(lsx_vftintrpl_l_s(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -4447,7 +5889,7 @@ unsafe fn test_lsx_vftintrph_l_s() {
     let a = u32x4::new(1059691939, 1065187151, 1059017027, 1061117394);
     let r = i64x2::new(1, 1);
 
-    assert_eq!(r, transmute(lsx_vftintrph_l_s(transmute(a))));
+    assert_eq!(r, transmute(lsx_vftintrph_l_s(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -4455,7 +5897,7 @@ unsafe fn test_lsx_vftintrml_l_s() {
     let a = u32x4::new(1062985651, 1065211455, 1056421466, 1057373572);
     let r = i64x2::new(0, 0);
 
-    assert_eq!(r, transmute(lsx_vftintrml_l_s(transmute(a))));
+    assert_eq!(r, transmute(lsx_vftintrml_l_s(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -4463,7 +5905,7 @@ unsafe fn test_lsx_vftintrmh_l_s() {
     let a = u32x4::new(1050224290, 1063763666, 1057677270, 1063622234);
     let r = i64x2::new(0, 0);
 
-    assert_eq!(r, transmute(lsx_vftintrmh_l_s(transmute(a))));
+    assert_eq!(r, transmute(lsx_vftintrmh_l_s(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -4471,7 +5913,7 @@ unsafe fn test_lsx_vftintrnel_l_s() {
     let a = u32x4::new(1060174609, 1050974638, 1047193308, 1062040876);
     let r = i64x2::new(1, 0);
 
-    assert_eq!(r, transmute(lsx_vftintrnel_l_s(transmute(a))));
+    assert_eq!(r, transmute(lsx_vftintrnel_l_s(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -4479,7 +5921,7 @@ unsafe fn test_lsx_vftintrneh_l_s() {
     let a = u32x4::new(1055675382, 1036879184, 1064176794, 1063791852);
     let r = i64x2::new(1, 1);
 
-    assert_eq!(r, transmute(lsx_vftintrneh_l_s(transmute(a))));
+    assert_eq!(r, transmute(lsx_vftintrneh_l_s(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -4487,7 +5929,7 @@ unsafe fn test_lsx_vfrintrne_s() {
     let a = u32x4::new(1054667842, 1061395025, 1062986478, 1062529334);
     let r = i64x2::new(4575657221408423936, 4575657222473777152);
 
-    assert_eq!(r, transmute(lsx_vfrintrne_s(transmute(a))));
+    assert_eq!(r, transmute(lsx_vfrintrne_s(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -4495,7 +5937,7 @@ unsafe fn test_lsx_vfrintrne_d() {
     let a = u64x2::new(4603260356641870565, 4601614335120512898);
     let r = i64x2::new(4607182418800017408, 0);
 
-    assert_eq!(r, transmute(lsx_vfrintrne_d(transmute(a))));
+    assert_eq!(r, transmute(lsx_vfrintrne_d(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -4503,7 +5945,7 @@ unsafe fn test_lsx_vfrintrz_s() {
     let a = u32x4::new(1063039577, 1033416832, 1052369306, 1057885024);
     let r = i64x2::new(0, 0);
 
-    assert_eq!(r, transmute(lsx_vfrintrz_s(transmute(a))));
+    assert_eq!(r, transmute(lsx_vfrintrz_s(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -4511,7 +5953,7 @@ unsafe fn test_lsx_vfrintrz_d() {
     let a = u64x2::new(4601515428088814484, 4604735152905786794);
     let r = i64x2::new(0, 0);
 
-    assert_eq!(r, transmute(lsx_vfrintrz_d(transmute(a))));
+    assert_eq!(r, transmute(lsx_vfrintrz_d(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -4519,7 +5961,7 @@ unsafe fn test_lsx_vfrintrp_s() {
     let a = u32x4::new(1061968959, 1056597596, 1064869916, 1058742360);
     let r = i64x2::new(4575657222473777152, 4575657222473777152);
 
-    assert_eq!(r, transmute(lsx_vfrintrp_s(transmute(a))));
+    assert_eq!(r, transmute(lsx_vfrintrp_s(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -4527,7 +5969,7 @@ unsafe fn test_lsx_vfrintrp_d() {
     let a = u64x2::new(4603531792479663401, 4587997630530425392);
     let r = i64x2::new(4607182418800017408, 4607182418800017408);
 
-    assert_eq!(r, transmute(lsx_vfrintrp_d(transmute(a))));
+    assert_eq!(r, transmute(lsx_vfrintrp_d(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -4535,7 +5977,7 @@ unsafe fn test_lsx_vfrintrm_s() {
     let a = u32x4::new(1058024441, 1044087184, 1059777964, 1050835426);
     let r = i64x2::new(0, 0);
 
-    assert_eq!(r, transmute(lsx_vfrintrm_s(transmute(a))));
+    assert_eq!(r, transmute(lsx_vfrintrm_s(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -4543,7 +5985,7 @@ unsafe fn test_lsx_vfrintrm_d() {
     let a = u64x2::new(4589388034824743512, 4606800774570289382);
     let r = i64x2::new(0, 0);
 
-    assert_eq!(r, transmute(lsx_vfrintrm_d(transmute(a))));
+    assert_eq!(r, transmute(lsx_vfrintrm_d(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -4556,7 +5998,7 @@ unsafe fn test_lsx_vstelm_b() {
     ];
     let r = i64x2::new(2624488095427530938, -2742340989646681128);
 
-    lsx_vstelm_b::<0, 0>(transmute(a), o.as_mut_ptr());
+    lsx_vstelm_b::<0, 0>(black_box(transmute(a)), o.as_mut_ptr());
     assert_eq!(r, transmute(o));
 }
 
@@ -4568,7 +6010,7 @@ unsafe fn test_lsx_vstelm_h() {
     ];
     let r = i64x2::new(-5777879910580360821, -8010388107109560809);
 
-    lsx_vstelm_h::<0, 1>(transmute(a), o.as_mut_ptr());
+    lsx_vstelm_h::<0, 1>(black_box(transmute(a)), o.as_mut_ptr());
     assert_eq!(r, transmute(o));
 }
 
@@ -4580,7 +6022,7 @@ unsafe fn test_lsx_vstelm_w() {
     ];
     let r = i64x2::new(-7107014201697162202, -4954294907532227136);
 
-    lsx_vstelm_w::<0, 3>(transmute(a), o.as_mut_ptr());
+    lsx_vstelm_w::<0, 3>(black_box(transmute(a)), o.as_mut_ptr());
     assert_eq!(r, transmute(o));
 }
 
@@ -4592,7 +6034,7 @@ unsafe fn test_lsx_vstelm_d() {
     ];
     let r = i64x2::new(2628828971609511929, -1577551211298588582);
 
-    lsx_vstelm_d::<0, 0>(transmute(a), o.as_mut_ptr());
+    lsx_vstelm_d::<0, 0>(black_box(transmute(a)), o.as_mut_ptr());
     assert_eq!(r, transmute(o));
 }
 
@@ -4602,7 +6044,13 @@ unsafe fn test_lsx_vaddwev_d_w() {
     let b = i32x4::new(-2105551735, -1478351177, 1027048582, -607110700);
     let r = i64x2::new(-3995454036, 2115628395);
 
-    assert_eq!(r, transmute(lsx_vaddwev_d_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vaddwev_d_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -4611,7 +6059,13 @@ unsafe fn test_lsx_vaddwev_w_h() {
     let b = i16x8::new(-17479, -32614, 24343, 25426, -14077, -12419, 10115, 23013);
     let r = i64x2::new(57531086920254, -11304353922851);
 
-    assert_eq!(r, transmute(lsx_vaddwev_w_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vaddwev_w_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -4624,7 +6078,13 @@ unsafe fn test_lsx_vaddwev_h_b() {
     );
     let r = i64x2::new(-6191796646052051, 32369798417022969);
 
-    assert_eq!(r, transmute(lsx_vaddwev_h_b(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vaddwev_h_b(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -4633,7 +6093,13 @@ unsafe fn test_lsx_vaddwod_d_w() {
     let b = i32x4::new(420515981, 473447119, 1471756335, 1044924117);
     let r = i64x2::new(126219465, 3020814787);
 
-    assert_eq!(r, transmute(lsx_vaddwod_d_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vaddwod_d_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -4642,7 +6108,13 @@ unsafe fn test_lsx_vaddwod_w_h() {
     let b = i16x8::new(-26581, -22301, 18214, -3616, -24489, 12150, -10765, -24232);
     let r = i64x2::new(-151719719748481, -112154480997307);
 
-    assert_eq!(r, transmute(lsx_vaddwod_w_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vaddwod_w_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -4655,7 +6127,13 @@ unsafe fn test_lsx_vaddwod_h_b() {
     );
     let r = i64x2::new(-18014780768845678, 14636475441676413);
 
-    assert_eq!(r, transmute(lsx_vaddwod_h_b(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vaddwod_h_b(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -4664,7 +6142,13 @@ unsafe fn test_lsx_vaddwev_d_wu() {
     let b = u32x4::new(1482213353, 1001198416, 3345983326, 2244256337);
     let r = i64x2::new(4022160583, 4539965521);
 
-    assert_eq!(r, transmute(lsx_vaddwev_d_wu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vaddwev_d_wu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -4673,7 +6157,13 @@ unsafe fn test_lsx_vaddwev_w_hu() {
     let b = u16x8::new(28483, 24704, 9817, 62062, 47674, 8032, 29897, 62737);
     let r = i64x2::new(176725019407839, 226649719257774);
 
-    assert_eq!(r, transmute(lsx_vaddwev_w_hu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vaddwev_w_hu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -4686,7 +6176,13 @@ unsafe fn test_lsx_vaddwev_h_bu() {
     );
     let r = i64x2::new(85006057160704351, 47850943627526421);
 
-    assert_eq!(r, transmute(lsx_vaddwev_h_bu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vaddwev_h_bu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -4695,7 +6191,13 @@ unsafe fn test_lsx_vaddwod_d_wu() {
     let b = u32x4::new(2782520439, 2496077290, 2678772394, 196273109);
     let r = i64x2::new(4147231270, 2289089430);
 
-    assert_eq!(r, transmute(lsx_vaddwod_d_wu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vaddwod_d_wu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -4704,7 +6206,13 @@ unsafe fn test_lsx_vaddwod_w_hu() {
     let b = u16x8::new(20353, 34039, 21222, 4948, 58293, 4766, 51360, 37497);
     let r = i64x2::new(82519206727777, 206875689791292);
 
-    assert_eq!(r, transmute(lsx_vaddwod_w_hu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vaddwod_w_hu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -4717,7 +6225,13 @@ unsafe fn test_lsx_vaddwod_h_bu() {
     );
     let r = i64x2::new(73466429242409013, 32932877227196635);
 
-    assert_eq!(r, transmute(lsx_vaddwod_h_bu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vaddwod_h_bu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -4726,7 +6240,13 @@ unsafe fn test_lsx_vaddwev_d_wu_w() {
     let b = i32x4::new(-1308530150, 1427930358, 1723198474, 1987356336);
     let r = i64x2::new(2478528121, 3014708115);
 
-    assert_eq!(r, transmute(lsx_vaddwev_d_wu_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vaddwev_d_wu_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -4735,7 +6255,13 @@ unsafe fn test_lsx_vaddwev_w_hu_h() {
     let b = i16x8::new(-11621, -6593, 7431, -1189, -12361, -15174, 16182, -32434);
     let r = i64x2::new(64158221463769, 194716637325930);
 
-    assert_eq!(r, transmute(lsx_vaddwev_w_hu_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vaddwev_w_hu_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -4748,7 +6274,13 @@ unsafe fn test_lsx_vaddwev_h_bu_b() {
     );
     let r = i64x2::new(71776235037065355, -7880749580746636);
 
-    assert_eq!(r, transmute(lsx_vaddwev_h_bu_b(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vaddwev_h_bu_b(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -4757,7 +6289,13 @@ unsafe fn test_lsx_vaddwod_d_wu_w() {
     let b = i32x4::new(-1646368557, 586112311, 376247963, 1048800083);
     let r = i64x2::new(3497092601, 3306080422);
 
-    assert_eq!(r, transmute(lsx_vaddwod_d_wu_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vaddwod_d_wu_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -4766,7 +6304,13 @@ unsafe fn test_lsx_vaddwod_w_hu_h() {
     let b = i16x8::new(31700, 22725, 14068, -14860, -28839, -14513, -1195, 27082);
     let r = i64x2::new(-10273561712908, 369560461022726);
 
-    assert_eq!(r, transmute(lsx_vaddwod_w_hu_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vaddwod_w_hu_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -4779,7 +6323,13 @@ unsafe fn test_lsx_vaddwod_h_bu_b() {
     );
     let r = i64x2::new(49259327819481212, 19140654913421439);
 
-    assert_eq!(r, transmute(lsx_vaddwod_h_bu_b(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vaddwod_h_bu_b(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -4788,7 +6338,13 @@ unsafe fn test_lsx_vsubwev_d_w() {
     let b = i32x4::new(-2090701374, 629564229, -1170676885, 1069800209);
     let r = i64x2::new(4070621277, 63900397);
 
-    assert_eq!(r, transmute(lsx_vsubwev_d_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vsubwev_d_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -4797,7 +6353,13 @@ unsafe fn test_lsx_vsubwev_w_h() {
     let b = i16x8::new(-23957, 9416, -29569, -13210, 5333, 8420, 18648, -24201);
     let r = i64x2::new(228187317494294, -105188044063209);
 
-    assert_eq!(r, transmute(lsx_vsubwev_w_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vsubwev_w_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -4810,7 +6372,13 @@ unsafe fn test_lsx_vsubwev_h_b() {
     );
     let r = i64x2::new(-41939247539617653, -14355228098887689);
 
-    assert_eq!(r, transmute(lsx_vsubwev_h_b(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vsubwev_h_b(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -4819,7 +6387,13 @@ unsafe fn test_lsx_vsubwod_d_w() {
     let b = i32x4::new(1436617964, -45524609, 502994793, -2039550077);
     let r = i64x2::new(-1037882987, 3497647797);
 
-    assert_eq!(r, transmute(lsx_vsubwod_d_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vsubwod_d_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -4828,7 +6402,13 @@ unsafe fn test_lsx_vsubwod_w_h() {
     let b = i16x8::new(-1276, 12669, 24115, 19617, -26739, 1910, -757, 23994);
     let r = i64x2::new(-158286724709540, -182411556002309);
 
-    assert_eq!(r, transmute(lsx_vsubwod_w_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vsubwod_w_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -4841,7 +6421,13 @@ unsafe fn test_lsx_vsubwod_h_b() {
     );
     let r = i64x2::new(23925540523802608, 562958549909362);
 
-    assert_eq!(r, transmute(lsx_vsubwod_h_b(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vsubwod_h_b(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -4850,7 +6436,13 @@ unsafe fn test_lsx_vsubwev_d_wu() {
     let b = u32x4::new(1691253880, 1939268473, 1629937431, 2921768539);
     let r = i64x2::new(974418830, 1402878171);
 
-    assert_eq!(r, transmute(lsx_vsubwev_d_wu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vsubwev_d_wu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -4859,7 +6451,13 @@ unsafe fn test_lsx_vsubwev_w_hu() {
     let b = u16x8::new(15957, 42770, 43138, 30319, 50823, 18089, 64120, 18054);
     let r = i64x2::new(-41807211666923, -194858371266981);
 
-    assert_eq!(r, transmute(lsx_vsubwev_w_hu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vsubwev_w_hu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -4872,7 +6470,13 @@ unsafe fn test_lsx_vsubwev_h_bu() {
     );
     let r = i64x2::new(-1407181617889293, 47851128289689387);
 
-    assert_eq!(r, transmute(lsx_vsubwev_h_bu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vsubwev_h_bu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -4881,7 +6485,13 @@ unsafe fn test_lsx_vsubwod_d_wu() {
     let b = u32x4::new(103354715, 19070238, 1662532733, 3761231766);
     let r = i64x2::new(3487028338, -1512426824);
 
-    assert_eq!(r, transmute(lsx_vsubwod_d_wu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vsubwod_d_wu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -4890,7 +6500,13 @@ unsafe fn test_lsx_vsubwod_w_hu() {
     let b = u16x8::new(21739, 45406, 21733, 63910, 6659, 16020, 1211, 637);
     let r = i64x2::new(-93999654264447, 232211701825972);
 
-    assert_eq!(r, transmute(lsx_vsubwod_w_hu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vsubwod_w_hu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -4903,7 +6519,13 @@ unsafe fn test_lsx_vsubwod_h_bu() {
     );
     let r = i64x2::new(-14355150803107815, 14636020195655765);
 
-    assert_eq!(r, transmute(lsx_vsubwod_h_bu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vsubwod_h_bu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -4912,7 +6534,13 @@ unsafe fn test_lsx_vaddwev_q_d() {
     let b = i64x2::new(6738886902337351868, -5985538541381931477);
     let r = i64x2::new(5606769623790009521, 0);
 
-    assert_eq!(r, transmute(lsx_vaddwev_q_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vaddwev_q_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -4921,7 +6549,13 @@ unsafe fn test_lsx_vaddwod_q_d() {
     let b = i64x2::new(-1244049724346527963, -3275029038845457041);
     let r = i64x2::new(-4417812606654001824, -1);
 
-    assert_eq!(r, transmute(lsx_vaddwod_q_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vaddwod_q_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -4930,7 +6564,13 @@ unsafe fn test_lsx_vaddwev_q_du() {
     let b = u64x2::new(6745766838534849346, 15041258018068294402);
     let r = i64x2::new(5074243625310689089, 1);
 
-    assert_eq!(r, transmute(lsx_vaddwev_q_du(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vaddwev_q_du(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -4939,7 +6579,13 @@ unsafe fn test_lsx_vaddwod_q_du() {
     let b = u64x2::new(13496765248439164553, 4640846570780442359);
     let r = i64x2::new(-2107214925415534967, 0);
 
-    assert_eq!(r, transmute(lsx_vaddwod_q_du(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vaddwod_q_du(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -4948,7 +6594,13 @@ unsafe fn test_lsx_vsubwev_q_d() {
     let b = i64x2::new(8029026411722387723, -2105201823388787841);
     let r = i64x2::new(480269655671735476, 0);
 
-    assert_eq!(r, transmute(lsx_vsubwev_q_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vsubwev_q_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -4957,7 +6609,13 @@ unsafe fn test_lsx_vsubwod_q_d() {
     let b = i64x2::new(5758437127240728961, 2933507971643343184);
     let r = i64x2::new(-8752278892998837291, -1);
 
-    assert_eq!(r, transmute(lsx_vsubwod_q_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vsubwod_q_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -4966,7 +6624,13 @@ unsafe fn test_lsx_vsubwev_q_du() {
     let b = u64x2::new(1574118313456291324, 7787456577305510529);
     let r = i64x2::new(-4672772323591679948, 0);
 
-    assert_eq!(r, transmute(lsx_vsubwev_q_du(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vsubwev_q_du(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -4975,7 +6639,13 @@ unsafe fn test_lsx_vsubwod_q_du() {
     let b = u64x2::new(5627376085113520030, 12775637764770549815);
     let r = i64x2::new(6257163948134922640, -1);
 
-    assert_eq!(r, transmute(lsx_vsubwod_q_du(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vsubwod_q_du(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -4984,7 +6654,13 @@ unsafe fn test_lsx_vaddwev_q_du_d() {
     let b = i64x2::new(-1159499132550683978, -4257322329662100669);
     let r = i64x2::new(-8502520416635627524, 0);
 
-    assert_eq!(r, transmute(lsx_vaddwev_q_du_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vaddwev_q_du_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -4993,7 +6669,13 @@ unsafe fn test_lsx_vaddwod_q_du_d() {
     let b = i64x2::new(-3902573037873546881, 160140233311333524);
     let r = i64x2::new(286209858134078253, 0);
 
-    assert_eq!(r, transmute(lsx_vaddwod_q_du_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vaddwod_q_du_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -5002,7 +6684,13 @@ unsafe fn test_lsx_vmulwev_d_w() {
     let b = i32x4::new(8741677, -276509855, -1214560052, -1338519080);
     let r = i64x2::new(11251431313755612, -2205748716678689436);
 
-    assert_eq!(r, transmute(lsx_vmulwev_d_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vmulwev_d_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -5011,7 +6699,13 @@ unsafe fn test_lsx_vmulwev_w_h() {
     let b = i16x8::new(30661, -20472, 1422, -16868, 4256, 9713, -27765, -7287);
     let r = i64x2::new(-178740441125036345, 469367082934888736);
 
-    assert_eq!(r, transmute(lsx_vmulwev_w_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vmulwev_w_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -5024,7 +6718,13 @@ unsafe fn test_lsx_vmulwev_h_b() {
     );
     let r = i64x2::new(38855607073696482, 823864071118590255);
 
-    assert_eq!(r, transmute(lsx_vmulwev_h_b(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vmulwev_h_b(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -5033,7 +6733,13 @@ unsafe fn test_lsx_vmulwod_d_w() {
     let b = i32x4::new(63312847, -1377579771, -2054819244, -1416520586);
     let r = i64x2::new(1549708311038418702, 2478205834807109862);
 
-    assert_eq!(r, transmute(lsx_vmulwod_d_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vmulwod_d_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -5042,7 +6748,13 @@ unsafe fn test_lsx_vmulwod_w_h() {
     let b = i16x8::new(23748, 11912, 4946, -23048, 22372, 24702, -24875, -27771);
     let r = i64x2::new(3222038736804363232, 360450672278114574);
 
-    assert_eq!(r, transmute(lsx_vmulwod_w_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vmulwod_w_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -5055,7 +6767,13 @@ unsafe fn test_lsx_vmulwod_h_b() {
     );
     let r = i64x2::new(-351280556043402912, 951366355207905332);
 
-    assert_eq!(r, transmute(lsx_vmulwod_h_b(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vmulwod_h_b(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -5064,7 +6782,13 @@ unsafe fn test_lsx_vmulwev_d_wu() {
     let b = u32x4::new(1769900227, 2256955703, 2342391995, 2407560006);
     let r = i64x2::new(3651844205567962921, 7772247680216328210);
 
-    assert_eq!(r, transmute(lsx_vmulwev_d_wu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vmulwev_d_wu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -5073,7 +6797,13 @@ unsafe fn test_lsx_vmulwev_w_hu() {
     let b = u16x8::new(20499, 45056, 20580, 12771, 53914, 60742, 45402, 40547);
     let r = i64x2::new(4070644332601545987, 8033224333626513014);
 
-    assert_eq!(r, transmute(lsx_vmulwev_w_hu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vmulwev_w_hu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -5086,7 +6816,13 @@ unsafe fn test_lsx_vmulwev_h_bu() {
     );
     let r = i64x2::new(271910110892810861, 1947809607093856504);
 
-    assert_eq!(r, transmute(lsx_vmulwev_h_bu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vmulwev_h_bu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -5095,7 +6831,13 @@ unsafe fn test_lsx_vmulwod_d_wu() {
     let b = u32x4::new(3750239707, 1422851626, 1277923597, 1377279439);
     let r = i64x2::new(2821622727533716246, 3005960862740149995);
 
-    assert_eq!(r, transmute(lsx_vmulwod_d_wu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vmulwod_d_wu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -5104,7 +6846,13 @@ unsafe fn test_lsx_vmulwod_w_hu() {
     let b = u16x8::new(38950, 5357, 36233, 17707, 61077, 61518, 5789, 13317);
     let r = i64x2::new(2460325445475503463, 3109522059894091248);
 
-    assert_eq!(r, transmute(lsx_vmulwod_w_hu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vmulwod_w_hu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -5117,7 +6865,13 @@ unsafe fn test_lsx_vmulwod_h_bu() {
     );
     let r = i64x2::new(7364114643151226902, 6612146073643521312);
 
-    assert_eq!(r, transmute(lsx_vmulwod_h_bu(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vmulwod_h_bu(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -5126,7 +6880,13 @@ unsafe fn test_lsx_vmulwev_d_wu_w() {
     let b = i32x4::new(1254729285, 1938836163, -1902169358, -257980375);
     let r = i64x2::new(2295762833698990875, -6669027432954818262);
 
-    assert_eq!(r, transmute(lsx_vmulwev_d_wu_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vmulwev_d_wu_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -5135,7 +6895,13 @@ unsafe fn test_lsx_vmulwev_w_hu_h() {
     let b = i16x8::new(-30477, -10049, 16428, -30668, 21000, 24834, -3219, -9555);
     let r = i64x2::new(3369342936690107644, -701630285043265176);
 
-    assert_eq!(r, transmute(lsx_vmulwev_w_hu_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vmulwev_w_hu_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -5148,7 +6914,13 @@ unsafe fn test_lsx_vmulwev_h_bu_b() {
     );
     let r = i64x2::new(-1134643098233554544, -1885853116779133038);
 
-    assert_eq!(r, transmute(lsx_vmulwev_h_bu_b(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vmulwev_h_bu_b(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -5157,7 +6929,13 @@ unsafe fn test_lsx_vmulwod_d_wu_w() {
     let b = i32x4::new(1204047391, -1970001586, 608763444, -2082771896);
     let r = i64x2::new(-5967343163181744876, -3673352984882804288);
 
-    assert_eq!(r, transmute(lsx_vmulwod_d_wu_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vmulwod_d_wu_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -5166,7 +6944,13 @@ unsafe fn test_lsx_vmulwod_w_hu_h() {
     let b = i16x8::new(-3735, -12972, -4920, 7170, 11577, 9785, 4896, -537);
     let r = i64x2::new(1024392868267999948, -48053790042385565);
 
-    assert_eq!(r, transmute(lsx_vmulwod_w_hu_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vmulwod_w_hu_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -5179,7 +6963,13 @@ unsafe fn test_lsx_vmulwod_h_bu_b() {
     );
     let r = i64x2::new(1905300476090387090, -3940634277386171400);
 
-    assert_eq!(r, transmute(lsx_vmulwod_h_bu_b(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vmulwod_h_bu_b(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -5188,7 +6978,13 @@ unsafe fn test_lsx_vmulwev_q_d() {
     let b = i64x2::new(7023560313675997328, 4368639658790376608);
     let r = i64x2::new(-1409563343912029488, -2779799970834089134);
 
-    assert_eq!(r, transmute(lsx_vmulwev_q_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vmulwev_q_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -5197,7 +6993,13 @@ unsafe fn test_lsx_vmulwod_q_d() {
     let b = i64x2::new(1734538850547798281, 6505001633960390309);
     let r = i64x2::new(655114704133495137, -1013080750363369114);
 
-    assert_eq!(r, transmute(lsx_vmulwod_q_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vmulwod_q_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -5206,7 +7008,13 @@ unsafe fn test_lsx_vmulwev_q_du() {
     let b = u64x2::new(15048173707940873365, 13594773395779002998);
     let r = i64x2::new(-4049323972691826149, 6179334620527225413);
 
-    assert_eq!(r, transmute(lsx_vmulwev_q_du(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vmulwev_q_du(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -5215,7 +7023,13 @@ unsafe fn test_lsx_vmulwod_q_du() {
     let b = u64x2::new(16172423495582959833, 11676106279348566952);
     let r = i64x2::new(-66293137947075128, 3694303051148166412);
 
-    assert_eq!(r, transmute(lsx_vmulwod_q_du(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vmulwod_q_du(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -5224,7 +7038,13 @@ unsafe fn test_lsx_vmulwev_q_du_d() {
     let b = i64x2::new(-7071166739782294817, 8496829998090419991);
     let r = i64x2::new(5234431817964974175, -5931105679667820544);
 
-    assert_eq!(r, transmute(lsx_vmulwev_q_du_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vmulwev_q_du_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -5233,7 +7053,13 @@ unsafe fn test_lsx_vmulwod_q_du_d() {
     let b = i64x2::new(-9085162554263782091, -3351642387065053502);
     let r = i64x2::new(-3119502026085414102, -1153233394465180223);
 
-    assert_eq!(r, transmute(lsx_vmulwod_q_du_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vmulwod_q_du_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -5242,7 +7068,13 @@ unsafe fn test_lsx_vhaddw_q_d() {
     let b = i64x2::new(9222966760421493517, -8347454331188625422);
     let r = i64x2::new(6438946365641244151, 0);
 
-    assert_eq!(r, transmute(lsx_vhaddw_q_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vhaddw_q_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -5251,7 +7083,13 @@ unsafe fn test_lsx_vhaddw_qu_du() {
     let b = u64x2::new(2141387370256045519, 12417156199252644485);
     let r = i64x2::new(5083013417816990364, 0);
 
-    assert_eq!(r, transmute(lsx_vhaddw_qu_du(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vhaddw_qu_du(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -5260,7 +7098,13 @@ unsafe fn test_lsx_vhsubw_q_d() {
     let b = i64x2::new(-3245503809142406078, 8660213762027125085);
     let r = i64x2::new(817818278178354941, 0);
 
-    assert_eq!(r, transmute(lsx_vhsubw_q_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vhsubw_q_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -5269,7 +7113,13 @@ unsafe fn test_lsx_vhsubw_qu_du() {
     let b = u64x2::new(3098179646743711521, 11374525358855478565);
     let r = i64x2::new(-8990580109137044958, 0);
 
-    assert_eq!(r, transmute(lsx_vhsubw_qu_du(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vhsubw_qu_du(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -5281,7 +7131,11 @@ unsafe fn test_lsx_vmaddwev_d_w() {
 
     assert_eq!(
         r,
-        transmute(lsx_vmaddwev_d_w(transmute(a), transmute(b), transmute(c)))
+        transmute(lsx_vmaddwev_d_w(
+            black_box(transmute(a)),
+            black_box(transmute(b)),
+            black_box(transmute(c))
+        ))
     );
 }
 
@@ -5294,7 +7148,11 @@ unsafe fn test_lsx_vmaddwev_w_h() {
 
     assert_eq!(
         r,
-        transmute(lsx_vmaddwev_w_h(transmute(a), transmute(b), transmute(c)))
+        transmute(lsx_vmaddwev_w_h(
+            black_box(transmute(a)),
+            black_box(transmute(b)),
+            black_box(transmute(c))
+        ))
     );
 }
 
@@ -5311,7 +7169,11 @@ unsafe fn test_lsx_vmaddwev_h_b() {
 
     assert_eq!(
         r,
-        transmute(lsx_vmaddwev_h_b(transmute(a), transmute(b), transmute(c)))
+        transmute(lsx_vmaddwev_h_b(
+            black_box(transmute(a)),
+            black_box(transmute(b)),
+            black_box(transmute(c))
+        ))
     );
 }
 
@@ -5324,7 +7186,11 @@ unsafe fn test_lsx_vmaddwev_d_wu() {
 
     assert_eq!(
         r,
-        transmute(lsx_vmaddwev_d_wu(transmute(a), transmute(b), transmute(c)))
+        transmute(lsx_vmaddwev_d_wu(
+            black_box(transmute(a)),
+            black_box(transmute(b)),
+            black_box(transmute(c))
+        ))
     );
 }
 
@@ -5337,7 +7203,11 @@ unsafe fn test_lsx_vmaddwev_w_hu() {
 
     assert_eq!(
         r,
-        transmute(lsx_vmaddwev_w_hu(transmute(a), transmute(b), transmute(c)))
+        transmute(lsx_vmaddwev_w_hu(
+            black_box(transmute(a)),
+            black_box(transmute(b)),
+            black_box(transmute(c))
+        ))
     );
 }
 
@@ -5354,7 +7224,11 @@ unsafe fn test_lsx_vmaddwev_h_bu() {
 
     assert_eq!(
         r,
-        transmute(lsx_vmaddwev_h_bu(transmute(a), transmute(b), transmute(c)))
+        transmute(lsx_vmaddwev_h_bu(
+            black_box(transmute(a)),
+            black_box(transmute(b)),
+            black_box(transmute(c))
+        ))
     );
 }
 
@@ -5367,7 +7241,11 @@ unsafe fn test_lsx_vmaddwod_d_w() {
 
     assert_eq!(
         r,
-        transmute(lsx_vmaddwod_d_w(transmute(a), transmute(b), transmute(c)))
+        transmute(lsx_vmaddwod_d_w(
+            black_box(transmute(a)),
+            black_box(transmute(b)),
+            black_box(transmute(c))
+        ))
     );
 }
 
@@ -5380,7 +7258,11 @@ unsafe fn test_lsx_vmaddwod_w_h() {
 
     assert_eq!(
         r,
-        transmute(lsx_vmaddwod_w_h(transmute(a), transmute(b), transmute(c)))
+        transmute(lsx_vmaddwod_w_h(
+            black_box(transmute(a)),
+            black_box(transmute(b)),
+            black_box(transmute(c))
+        ))
     );
 }
 
@@ -5397,7 +7279,11 @@ unsafe fn test_lsx_vmaddwod_h_b() {
 
     assert_eq!(
         r,
-        transmute(lsx_vmaddwod_h_b(transmute(a), transmute(b), transmute(c)))
+        transmute(lsx_vmaddwod_h_b(
+            black_box(transmute(a)),
+            black_box(transmute(b)),
+            black_box(transmute(c))
+        ))
     );
 }
 
@@ -5410,7 +7296,11 @@ unsafe fn test_lsx_vmaddwod_d_wu() {
 
     assert_eq!(
         r,
-        transmute(lsx_vmaddwod_d_wu(transmute(a), transmute(b), transmute(c)))
+        transmute(lsx_vmaddwod_d_wu(
+            black_box(transmute(a)),
+            black_box(transmute(b)),
+            black_box(transmute(c))
+        ))
     );
 }
 
@@ -5423,7 +7313,11 @@ unsafe fn test_lsx_vmaddwod_w_hu() {
 
     assert_eq!(
         r,
-        transmute(lsx_vmaddwod_w_hu(transmute(a), transmute(b), transmute(c)))
+        transmute(lsx_vmaddwod_w_hu(
+            black_box(transmute(a)),
+            black_box(transmute(b)),
+            black_box(transmute(c))
+        ))
     );
 }
 
@@ -5440,7 +7334,11 @@ unsafe fn test_lsx_vmaddwod_h_bu() {
 
     assert_eq!(
         r,
-        transmute(lsx_vmaddwod_h_bu(transmute(a), transmute(b), transmute(c)))
+        transmute(lsx_vmaddwod_h_bu(
+            black_box(transmute(a)),
+            black_box(transmute(b)),
+            black_box(transmute(c))
+        ))
     );
 }
 
@@ -5454,9 +7352,9 @@ unsafe fn test_lsx_vmaddwev_d_wu_w() {
     assert_eq!(
         r,
         transmute(lsx_vmaddwev_d_wu_w(
-            transmute(a),
-            transmute(b),
-            transmute(c)
+            black_box(transmute(a)),
+            black_box(transmute(b)),
+            black_box(transmute(c))
         ))
     );
 }
@@ -5471,9 +7369,9 @@ unsafe fn test_lsx_vmaddwev_w_hu_h() {
     assert_eq!(
         r,
         transmute(lsx_vmaddwev_w_hu_h(
-            transmute(a),
-            transmute(b),
-            transmute(c)
+            black_box(transmute(a)),
+            black_box(transmute(b)),
+            black_box(transmute(c))
         ))
     );
 }
@@ -5492,9 +7390,9 @@ unsafe fn test_lsx_vmaddwev_h_bu_b() {
     assert_eq!(
         r,
         transmute(lsx_vmaddwev_h_bu_b(
-            transmute(a),
-            transmute(b),
-            transmute(c)
+            black_box(transmute(a)),
+            black_box(transmute(b)),
+            black_box(transmute(c))
         ))
     );
 }
@@ -5509,9 +7407,9 @@ unsafe fn test_lsx_vmaddwod_d_wu_w() {
     assert_eq!(
         r,
         transmute(lsx_vmaddwod_d_wu_w(
-            transmute(a),
-            transmute(b),
-            transmute(c)
+            black_box(transmute(a)),
+            black_box(transmute(b)),
+            black_box(transmute(c))
         ))
     );
 }
@@ -5526,9 +7424,9 @@ unsafe fn test_lsx_vmaddwod_w_hu_h() {
     assert_eq!(
         r,
         transmute(lsx_vmaddwod_w_hu_h(
-            transmute(a),
-            transmute(b),
-            transmute(c)
+            black_box(transmute(a)),
+            black_box(transmute(b)),
+            black_box(transmute(c))
         ))
     );
 }
@@ -5547,9 +7445,9 @@ unsafe fn test_lsx_vmaddwod_h_bu_b() {
     assert_eq!(
         r,
         transmute(lsx_vmaddwod_h_bu_b(
-            transmute(a),
-            transmute(b),
-            transmute(c)
+            black_box(transmute(a)),
+            black_box(transmute(b)),
+            black_box(transmute(c))
         ))
     );
 }
@@ -5563,7 +7461,11 @@ unsafe fn test_lsx_vmaddwev_q_d() {
 
     assert_eq!(
         r,
-        transmute(lsx_vmaddwev_q_d(transmute(a), transmute(b), transmute(c)))
+        transmute(lsx_vmaddwev_q_d(
+            black_box(transmute(a)),
+            black_box(transmute(b)),
+            black_box(transmute(c))
+        ))
     );
 }
 
@@ -5576,7 +7478,11 @@ unsafe fn test_lsx_vmaddwod_q_d() {
 
     assert_eq!(
         r,
-        transmute(lsx_vmaddwod_q_d(transmute(a), transmute(b), transmute(c)))
+        transmute(lsx_vmaddwod_q_d(
+            black_box(transmute(a)),
+            black_box(transmute(b)),
+            black_box(transmute(c))
+        ))
     );
 }
 
@@ -5589,7 +7495,11 @@ unsafe fn test_lsx_vmaddwev_q_du() {
 
     assert_eq!(
         r,
-        transmute(lsx_vmaddwev_q_du(transmute(a), transmute(b), transmute(c)))
+        transmute(lsx_vmaddwev_q_du(
+            black_box(transmute(a)),
+            black_box(transmute(b)),
+            black_box(transmute(c))
+        ))
     );
 }
 
@@ -5602,7 +7512,11 @@ unsafe fn test_lsx_vmaddwod_q_du() {
 
     assert_eq!(
         r,
-        transmute(lsx_vmaddwod_q_du(transmute(a), transmute(b), transmute(c)))
+        transmute(lsx_vmaddwod_q_du(
+            black_box(transmute(a)),
+            black_box(transmute(b)),
+            black_box(transmute(c))
+        ))
     );
 }
 
@@ -5616,9 +7530,9 @@ unsafe fn test_lsx_vmaddwev_q_du_d() {
     assert_eq!(
         r,
         transmute(lsx_vmaddwev_q_du_d(
-            transmute(a),
-            transmute(b),
-            transmute(c)
+            black_box(transmute(a)),
+            black_box(transmute(b)),
+            black_box(transmute(c))
         ))
     );
 }
@@ -5633,9 +7547,9 @@ unsafe fn test_lsx_vmaddwod_q_du_d() {
     assert_eq!(
         r,
         transmute(lsx_vmaddwod_q_du_d(
-            transmute(a),
-            transmute(b),
-            transmute(c)
+            black_box(transmute(a)),
+            black_box(transmute(b)),
+            black_box(transmute(c))
         ))
     );
 }
@@ -5650,7 +7564,13 @@ unsafe fn test_lsx_vrotr_b() {
     );
     let r = i64x2::new(2841128540244802403, -8694309599374351908);
 
-    assert_eq!(r, transmute(lsx_vrotr_b(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vrotr_b(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -5659,7 +7579,13 @@ unsafe fn test_lsx_vrotr_h() {
     let b = i16x8::new(-6485, 1418, 8263, -29872, -6491, 3930, -20621, 32531);
     let r = i64x2::new(2742461657407651598, 3308267577913279393);
 
-    assert_eq!(r, transmute(lsx_vrotr_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vrotr_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -5668,7 +7594,13 @@ unsafe fn test_lsx_vrotr_w() {
     let b = i32x4::new(1956224189, -1858012941, -1889446514, -2130978943);
     let r = i64x2::new(6458469860191573231, -8548346292466177157);
 
-    assert_eq!(r, transmute(lsx_vrotr_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vrotr_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -5677,7 +7609,13 @@ unsafe fn test_lsx_vrotr_d() {
     let b = i64x2::new(4553458262651691654, -5062393334123159235);
     let r = i64x2::new(-3594618648537251961, 7897385285240526033);
 
-    assert_eq!(r, transmute(lsx_vrotr_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vrotr_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -5686,7 +7624,10 @@ unsafe fn test_lsx_vadd_q() {
     let b = i64x2::new(114135477458514099, 3481307531297359399);
     let r = i64x2::new(2537705118259771652, 4159381110985057604);
 
-    assert_eq!(r, transmute(lsx_vadd_q(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vadd_q(black_box(transmute(a)), black_box(transmute(b))))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -5695,7 +7636,10 @@ unsafe fn test_lsx_vsub_q() {
     let b = i64x2::new(-8526086848853095438, -1323481969747305966);
     let r = i64x2::new(-2027679534337857341, -1789445478164204527);
 
-    assert_eq!(r, transmute(lsx_vsub_q(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vsub_q(black_box(transmute(a)), black_box(transmute(b))))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -5745,7 +7689,7 @@ unsafe fn test_lsx_vmskgez_b() {
     );
     let r = i64x2::new(24930, 0);
 
-    assert_eq!(r, transmute(lsx_vmskgez_b(transmute(a))));
+    assert_eq!(r, transmute(lsx_vmskgez_b(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -5755,7 +7699,7 @@ unsafe fn test_lsx_vmsknz_b() {
     );
     let r = i64x2::new(65535, 0);
 
-    assert_eq!(r, transmute(lsx_vmsknz_b(transmute(a))));
+    assert_eq!(r, transmute(lsx_vmsknz_b(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -5765,7 +7709,7 @@ unsafe fn test_lsx_vexth_h_b() {
     );
     let r = i64x2::new(-3377613816397739, 32088276197572514);
 
-    assert_eq!(r, transmute(lsx_vexth_h_b(transmute(a))));
+    assert_eq!(r, transmute(lsx_vexth_h_b(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -5773,7 +7717,7 @@ unsafe fn test_lsx_vexth_w_h() {
     let a = i16x8::new(14576, -26514, 14165, -15781, 10106, 1864, 23348, 30478);
     let r = i64x2::new(8005819049850, 130902013270836);
 
-    assert_eq!(r, transmute(lsx_vexth_w_h(transmute(a))));
+    assert_eq!(r, transmute(lsx_vexth_w_h(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -5781,7 +7725,7 @@ unsafe fn test_lsx_vexth_d_w() {
     let a = i32x4::new(863783254, 799653326, -1122161877, -652869192);
     let r = i64x2::new(-1122161877, -652869192);
 
-    assert_eq!(r, transmute(lsx_vexth_d_w(transmute(a))));
+    assert_eq!(r, transmute(lsx_vexth_d_w(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -5789,7 +7733,7 @@ unsafe fn test_lsx_vexth_q_d() {
     let a = i64x2::new(2924262436748867523, 1959694872821330818);
     let r = i64x2::new(1959694872821330818, 0);
 
-    assert_eq!(r, transmute(lsx_vexth_q_d(transmute(a))));
+    assert_eq!(r, transmute(lsx_vexth_q_d(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -5799,7 +7743,7 @@ unsafe fn test_lsx_vexth_hu_bu() {
     );
     let r = i64x2::new(61080980486815914, 60235902725652628);
 
-    assert_eq!(r, transmute(lsx_vexth_hu_bu(transmute(a))));
+    assert_eq!(r, transmute(lsx_vexth_hu_bu(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -5807,7 +7751,7 @@ unsafe fn test_lsx_vexth_wu_hu() {
     let a = u16x8::new(58875, 18924, 17611, 30197, 33869, 53931, 4693, 53025);
     let r = i64x2::new(231631881274445, 227740640875093);
 
-    assert_eq!(r, transmute(lsx_vexth_wu_hu(transmute(a))));
+    assert_eq!(r, transmute(lsx_vexth_wu_hu(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -5815,7 +7759,7 @@ unsafe fn test_lsx_vexth_du_wu() {
     let a = u32x4::new(3499742961, 2840979237, 2082263829, 1096292547);
     let r = i64x2::new(2082263829, 1096292547);
 
-    assert_eq!(r, transmute(lsx_vexth_du_wu(transmute(a))));
+    assert_eq!(r, transmute(lsx_vexth_du_wu(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -5823,7 +7767,7 @@ unsafe fn test_lsx_vexth_qu_du() {
     let a = u64x2::new(14170556367894986991, 14238702840099699193);
     let r = i64x2::new(-4208041233609852423, 0);
 
-    assert_eq!(r, transmute(lsx_vexth_qu_du(transmute(a))));
+    assert_eq!(r, transmute(lsx_vexth_qu_du(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -5833,7 +7777,7 @@ unsafe fn test_lsx_vrotri_b() {
     );
     let r = i64x2::new(-2919654548887155519, -96080239582005205);
 
-    assert_eq!(r, transmute(lsx_vrotri_b::<2>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vrotri_b::<2>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -5841,7 +7785,7 @@ unsafe fn test_lsx_vrotri_h() {
     let a = i16x8::new(-14120, -16812, -19570, -990, 24476, -7640, 20329, 8879);
     let r = i64x2::new(-556925602567188047, 4998607264501841720);
 
-    assert_eq!(r, transmute(lsx_vrotri_h::<15>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vrotri_h::<15>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -5849,7 +7793,7 @@ unsafe fn test_lsx_vrotri_w() {
     let a = i32x4::new(-1760224525, -1644621284, 1835781046, -1487934110);
     let r = i64x2::new(2845787365010917052, -6209343103231659283);
 
-    assert_eq!(r, transmute(lsx_vrotri_w::<2>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vrotri_w::<2>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -5857,7 +7801,7 @@ unsafe fn test_lsx_vrotri_d() {
     let a = i64x2::new(8884634342417174882, 244175985366916345);
     let r = i64x2::new(-3963790888197019724, 4020656082573561910);
 
-    assert_eq!(r, transmute(lsx_vrotri_d::<52>(transmute(a))));
+    assert_eq!(r, transmute(lsx_vrotri_d::<52>(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -5865,7 +7809,7 @@ unsafe fn test_lsx_vextl_q_d() {
     let a = i64x2::new(-5110246490938885255, 377414780188285171);
     let r = i64x2::new(-5110246490938885255, -1);
 
-    assert_eq!(r, transmute(lsx_vextl_q_d(transmute(a))));
+    assert_eq!(r, transmute(lsx_vextl_q_d(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -5880,7 +7824,10 @@ unsafe fn test_lsx_vsrlni_b_h() {
 
     assert_eq!(
         r,
-        transmute(lsx_vsrlni_b_h::<14>(transmute(a), transmute(b)))
+        transmute(lsx_vsrlni_b_h::<14>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -5892,7 +7839,10 @@ unsafe fn test_lsx_vsrlni_h_w() {
 
     assert_eq!(
         r,
-        transmute(lsx_vsrlni_h_w::<26>(transmute(a), transmute(b)))
+        transmute(lsx_vsrlni_h_w::<26>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -5904,7 +7854,10 @@ unsafe fn test_lsx_vsrlni_w_d() {
 
     assert_eq!(
         r,
-        transmute(lsx_vsrlni_w_d::<18>(transmute(a), transmute(b)))
+        transmute(lsx_vsrlni_w_d::<18>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -5916,7 +7869,10 @@ unsafe fn test_lsx_vsrlni_d_q() {
 
     assert_eq!(
         r,
-        transmute(lsx_vsrlni_d_q::<74>(transmute(a), transmute(b)))
+        transmute(lsx_vsrlni_d_q::<74>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -5932,7 +7888,10 @@ unsafe fn test_lsx_vsrlrni_b_h() {
 
     assert_eq!(
         r,
-        transmute(lsx_vsrlrni_b_h::<6>(transmute(a), transmute(b)))
+        transmute(lsx_vsrlrni_b_h::<6>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -5944,7 +7903,10 @@ unsafe fn test_lsx_vsrlrni_h_w() {
 
     assert_eq!(
         r,
-        transmute(lsx_vsrlrni_h_w::<6>(transmute(a), transmute(b)))
+        transmute(lsx_vsrlrni_h_w::<6>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -5956,7 +7918,10 @@ unsafe fn test_lsx_vsrlrni_w_d() {
 
     assert_eq!(
         r,
-        transmute(lsx_vsrlrni_w_d::<52>(transmute(a), transmute(b)))
+        transmute(lsx_vsrlrni_w_d::<52>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -5968,7 +7933,10 @@ unsafe fn test_lsx_vsrlrni_d_q() {
 
     assert_eq!(
         r,
-        transmute(lsx_vsrlrni_d_q::<101>(transmute(a), transmute(b)))
+        transmute(lsx_vsrlrni_d_q::<101>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -5984,7 +7952,10 @@ unsafe fn test_lsx_vssrlni_b_h() {
 
     assert_eq!(
         r,
-        transmute(lsx_vssrlni_b_h::<13>(transmute(a), transmute(b)))
+        transmute(lsx_vssrlni_b_h::<13>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -5996,7 +7967,10 @@ unsafe fn test_lsx_vssrlni_h_w() {
 
     assert_eq!(
         r,
-        transmute(lsx_vssrlni_h_w::<23>(transmute(a), transmute(b)))
+        transmute(lsx_vssrlni_h_w::<23>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -6008,7 +7982,10 @@ unsafe fn test_lsx_vssrlni_w_d() {
 
     assert_eq!(
         r,
-        transmute(lsx_vssrlni_w_d::<12>(transmute(a), transmute(b)))
+        transmute(lsx_vssrlni_w_d::<12>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -6020,7 +7997,10 @@ unsafe fn test_lsx_vssrlni_d_q() {
 
     assert_eq!(
         r,
-        transmute(lsx_vssrlni_d_q::<88>(transmute(a), transmute(b)))
+        transmute(lsx_vssrlni_d_q::<88>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -6036,7 +8016,10 @@ unsafe fn test_lsx_vssrlni_bu_h() {
 
     assert_eq!(
         r,
-        transmute(lsx_vssrlni_bu_h::<13>(transmute(a), transmute(b)))
+        transmute(lsx_vssrlni_bu_h::<13>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -6048,7 +8031,10 @@ unsafe fn test_lsx_vssrlni_hu_w() {
 
     assert_eq!(
         r,
-        transmute(lsx_vssrlni_hu_w::<9>(transmute(a), transmute(b)))
+        transmute(lsx_vssrlni_hu_w::<9>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -6060,7 +8046,10 @@ unsafe fn test_lsx_vssrlni_wu_d() {
 
     assert_eq!(
         r,
-        transmute(lsx_vssrlni_wu_d::<59>(transmute(a), transmute(b)))
+        transmute(lsx_vssrlni_wu_d::<59>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -6072,7 +8061,10 @@ unsafe fn test_lsx_vssrlni_du_q() {
 
     assert_eq!(
         r,
-        transmute(lsx_vssrlni_du_q::<6>(transmute(a), transmute(b)))
+        transmute(lsx_vssrlni_du_q::<6>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -6088,7 +8080,10 @@ unsafe fn test_lsx_vssrlrni_b_h() {
 
     assert_eq!(
         r,
-        transmute(lsx_vssrlrni_b_h::<0>(transmute(a), transmute(b)))
+        transmute(lsx_vssrlrni_b_h::<0>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -6100,7 +8095,10 @@ unsafe fn test_lsx_vssrlrni_h_w() {
 
     assert_eq!(
         r,
-        transmute(lsx_vssrlrni_h_w::<28>(transmute(a), transmute(b)))
+        transmute(lsx_vssrlrni_h_w::<28>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -6112,7 +8110,10 @@ unsafe fn test_lsx_vssrlrni_w_d() {
 
     assert_eq!(
         r,
-        transmute(lsx_vssrlrni_w_d::<1>(transmute(a), transmute(b)))
+        transmute(lsx_vssrlrni_w_d::<1>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -6124,7 +8125,10 @@ unsafe fn test_lsx_vssrlrni_d_q() {
 
     assert_eq!(
         r,
-        transmute(lsx_vssrlrni_d_q::<60>(transmute(a), transmute(b)))
+        transmute(lsx_vssrlrni_d_q::<60>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -6140,7 +8144,10 @@ unsafe fn test_lsx_vssrlrni_bu_h() {
 
     assert_eq!(
         r,
-        transmute(lsx_vssrlrni_bu_h::<13>(transmute(a), transmute(b)))
+        transmute(lsx_vssrlrni_bu_h::<13>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -6152,7 +8159,10 @@ unsafe fn test_lsx_vssrlrni_hu_w() {
 
     assert_eq!(
         r,
-        transmute(lsx_vssrlrni_hu_w::<25>(transmute(a), transmute(b)))
+        transmute(lsx_vssrlrni_hu_w::<25>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -6164,7 +8174,10 @@ unsafe fn test_lsx_vssrlrni_wu_d() {
 
     assert_eq!(
         r,
-        transmute(lsx_vssrlrni_wu_d::<36>(transmute(a), transmute(b)))
+        transmute(lsx_vssrlrni_wu_d::<36>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -6176,7 +8189,10 @@ unsafe fn test_lsx_vssrlrni_du_q() {
 
     assert_eq!(
         r,
-        transmute(lsx_vssrlrni_du_q::<38>(transmute(a), transmute(b)))
+        transmute(lsx_vssrlrni_du_q::<38>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -6192,7 +8208,10 @@ unsafe fn test_lsx_vsrani_b_h() {
 
     assert_eq!(
         r,
-        transmute(lsx_vsrani_b_h::<5>(transmute(a), transmute(b)))
+        transmute(lsx_vsrani_b_h::<5>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -6204,7 +8223,10 @@ unsafe fn test_lsx_vsrani_h_w() {
 
     assert_eq!(
         r,
-        transmute(lsx_vsrani_h_w::<4>(transmute(a), transmute(b)))
+        transmute(lsx_vsrani_h_w::<4>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -6216,7 +8238,10 @@ unsafe fn test_lsx_vsrani_w_d() {
 
     assert_eq!(
         r,
-        transmute(lsx_vsrani_w_d::<24>(transmute(a), transmute(b)))
+        transmute(lsx_vsrani_w_d::<24>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -6228,7 +8253,10 @@ unsafe fn test_lsx_vsrani_d_q() {
 
     assert_eq!(
         r,
-        transmute(lsx_vsrani_d_q::<81>(transmute(a), transmute(b)))
+        transmute(lsx_vsrani_d_q::<81>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -6244,7 +8272,10 @@ unsafe fn test_lsx_vsrarni_b_h() {
 
     assert_eq!(
         r,
-        transmute(lsx_vsrarni_b_h::<3>(transmute(a), transmute(b)))
+        transmute(lsx_vsrarni_b_h::<3>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -6256,7 +8287,10 @@ unsafe fn test_lsx_vsrarni_h_w() {
 
     assert_eq!(
         r,
-        transmute(lsx_vsrarni_h_w::<15>(transmute(a), transmute(b)))
+        transmute(lsx_vsrarni_h_w::<15>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -6268,7 +8302,10 @@ unsafe fn test_lsx_vsrarni_w_d() {
 
     assert_eq!(
         r,
-        transmute(lsx_vsrarni_w_d::<59>(transmute(a), transmute(b)))
+        transmute(lsx_vsrarni_w_d::<59>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -6280,7 +8317,10 @@ unsafe fn test_lsx_vsrarni_d_q() {
 
     assert_eq!(
         r,
-        transmute(lsx_vsrarni_d_q::<0>(transmute(a), transmute(b)))
+        transmute(lsx_vsrarni_d_q::<0>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -6296,7 +8336,10 @@ unsafe fn test_lsx_vssrani_b_h() {
 
     assert_eq!(
         r,
-        transmute(lsx_vssrani_b_h::<0>(transmute(a), transmute(b)))
+        transmute(lsx_vssrani_b_h::<0>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -6308,7 +8351,10 @@ unsafe fn test_lsx_vssrani_h_w() {
 
     assert_eq!(
         r,
-        transmute(lsx_vssrani_h_w::<28>(transmute(a), transmute(b)))
+        transmute(lsx_vssrani_h_w::<28>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -6320,7 +8366,10 @@ unsafe fn test_lsx_vssrani_w_d() {
 
     assert_eq!(
         r,
-        transmute(lsx_vssrani_w_d::<49>(transmute(a), transmute(b)))
+        transmute(lsx_vssrani_w_d::<49>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -6332,7 +8381,10 @@ unsafe fn test_lsx_vssrani_d_q() {
 
     assert_eq!(
         r,
-        transmute(lsx_vssrani_d_q::<80>(transmute(a), transmute(b)))
+        transmute(lsx_vssrani_d_q::<80>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -6348,7 +8400,10 @@ unsafe fn test_lsx_vssrani_bu_h() {
 
     assert_eq!(
         r,
-        transmute(lsx_vssrani_bu_h::<14>(transmute(a), transmute(b)))
+        transmute(lsx_vssrani_bu_h::<14>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -6360,7 +8415,10 @@ unsafe fn test_lsx_vssrani_hu_w() {
 
     assert_eq!(
         r,
-        transmute(lsx_vssrani_hu_w::<23>(transmute(a), transmute(b)))
+        transmute(lsx_vssrani_hu_w::<23>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -6372,7 +8430,10 @@ unsafe fn test_lsx_vssrani_wu_d() {
 
     assert_eq!(
         r,
-        transmute(lsx_vssrani_wu_d::<13>(transmute(a), transmute(b)))
+        transmute(lsx_vssrani_wu_d::<13>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -6384,7 +8445,10 @@ unsafe fn test_lsx_vssrani_du_q() {
 
     assert_eq!(
         r,
-        transmute(lsx_vssrani_du_q::<33>(transmute(a), transmute(b)))
+        transmute(lsx_vssrani_du_q::<33>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -6400,7 +8464,10 @@ unsafe fn test_lsx_vssrarni_b_h() {
 
     assert_eq!(
         r,
-        transmute(lsx_vssrarni_b_h::<2>(transmute(a), transmute(b)))
+        transmute(lsx_vssrarni_b_h::<2>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -6412,7 +8479,10 @@ unsafe fn test_lsx_vssrarni_h_w() {
 
     assert_eq!(
         r,
-        transmute(lsx_vssrarni_h_w::<29>(transmute(a), transmute(b)))
+        transmute(lsx_vssrarni_h_w::<29>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -6424,7 +8494,10 @@ unsafe fn test_lsx_vssrarni_w_d() {
 
     assert_eq!(
         r,
-        transmute(lsx_vssrarni_w_d::<18>(transmute(a), transmute(b)))
+        transmute(lsx_vssrarni_w_d::<18>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -6436,7 +8509,10 @@ unsafe fn test_lsx_vssrarni_d_q() {
 
     assert_eq!(
         r,
-        transmute(lsx_vssrarni_d_q::<70>(transmute(a), transmute(b)))
+        transmute(lsx_vssrarni_d_q::<70>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -6452,7 +8528,10 @@ unsafe fn test_lsx_vssrarni_bu_h() {
 
     assert_eq!(
         r,
-        transmute(lsx_vssrarni_bu_h::<14>(transmute(a), transmute(b)))
+        transmute(lsx_vssrarni_bu_h::<14>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -6464,7 +8543,10 @@ unsafe fn test_lsx_vssrarni_hu_w() {
 
     assert_eq!(
         r,
-        transmute(lsx_vssrarni_hu_w::<13>(transmute(a), transmute(b)))
+        transmute(lsx_vssrarni_hu_w::<13>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -6476,7 +8558,10 @@ unsafe fn test_lsx_vssrarni_wu_d() {
 
     assert_eq!(
         r,
-        transmute(lsx_vssrarni_wu_d::<15>(transmute(a), transmute(b)))
+        transmute(lsx_vssrarni_wu_d::<15>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -6488,7 +8573,10 @@ unsafe fn test_lsx_vssrarni_du_q() {
 
     assert_eq!(
         r,
-        transmute(lsx_vssrarni_du_q::<126>(transmute(a), transmute(b)))
+        transmute(lsx_vssrarni_du_q::<126>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -6500,7 +8588,10 @@ unsafe fn test_lsx_vpermi_w() {
 
     assert_eq!(
         r,
-        transmute(lsx_vpermi_w::<158>(transmute(a), transmute(b)))
+        transmute(lsx_vpermi_w::<158>(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
     );
 }
 
@@ -6524,7 +8615,7 @@ unsafe fn test_lsx_vst() {
     ];
     let r = i64x2::new(4153633675232462821, -2083384694265299697);
 
-    lsx_vst::<0>(transmute(a), o.as_mut_ptr());
+    lsx_vst::<0>(black_box(transmute(a)), o.as_mut_ptr());
     assert_eq!(r, transmute(o));
 }
 
@@ -6534,7 +8625,13 @@ unsafe fn test_lsx_vssrlrn_b_h() {
     let b = i16x8::new(17437, 9775, -20467, -31838, 5913, 4238, -7458, 2822);
     let r = i64x2::new(5981906731171643399, 0);
 
-    assert_eq!(r, transmute(lsx_vssrlrn_b_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vssrlrn_b_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -6543,7 +8640,13 @@ unsafe fn test_lsx_vssrlrn_h_w() {
     let b = i32x4::new(-2116426818, 1641049288, 712377342, -1572394121);
     let r = i64x2::new(31243728857268226, 0);
 
-    assert_eq!(r, transmute(lsx_vssrlrn_h_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vssrlrn_h_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -6552,7 +8655,13 @@ unsafe fn test_lsx_vssrlrn_w_d() {
     let b = i64x2::new(-3890929847852895653, -7819301294522132056);
     let r = i64x2::new(66519777023098879, 0);
 
-    assert_eq!(r, transmute(lsx_vssrlrn_w_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vssrlrn_w_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -6561,7 +8670,13 @@ unsafe fn test_lsx_vssrln_b_h() {
     let b = i16x8::new(-14062, -29610, -24609, -8884, -1818, 32133, 29934, -6498);
     let r = i64x2::new(140183437672319, 0);
 
-    assert_eq!(r, transmute(lsx_vssrln_b_h(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vssrln_b_h(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -6570,7 +8685,13 @@ unsafe fn test_lsx_vssrln_h_w() {
     let b = i32x4::new(-1437891045, 1546371535, -1800954476, -1892390372);
     let r = i64x2::new(2820489990832156, 0);
 
-    assert_eq!(r, transmute(lsx_vssrln_h_w(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vssrln_h_w(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -6579,7 +8700,13 @@ unsafe fn test_lsx_vssrln_w_d() {
     let b = i64x2::new(2034490755997557661, -3470252066162700534);
     let r = i64x2::new(9223372034707292159, 0);
 
-    assert_eq!(r, transmute(lsx_vssrln_w_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vssrln_w_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -6592,7 +8719,10 @@ unsafe fn test_lsx_vorn_v() {
     );
     let r = i64x2::new(-883973744907789059, -2901520201165080862);
 
-    assert_eq!(r, transmute(lsx_vorn_v(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vorn_v(black_box(transmute(a)), black_box(transmute(b))))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -6615,7 +8745,11 @@ unsafe fn test_lsx_vshuf_b() {
 
     assert_eq!(
         r,
-        transmute(lsx_vshuf_b(transmute(a), transmute(b), transmute(c)))
+        transmute(lsx_vshuf_b(
+            black_box(transmute(a)),
+            black_box(transmute(b)),
+            black_box(transmute(c))
+        ))
     );
 }
 
@@ -6639,7 +8773,7 @@ unsafe fn test_lsx_vstx() {
     ];
     let r = i64x2::new(-1493444417618012559, 7191635320606490850);
 
-    lsx_vstx(transmute(a), o.as_mut_ptr(), 0);
+    lsx_vstx(black_box(transmute(a)), o.as_mut_ptr(), 0);
     assert_eq!(r, transmute(o));
 }
 
@@ -6648,7 +8782,7 @@ unsafe fn test_lsx_vextl_qu_du() {
     let a = u64x2::new(14708598110732796778, 2132245682694336458);
     let r = i64x2::new(-3738145962976754838, 0);
 
-    assert_eq!(r, transmute(lsx_vextl_qu_du(transmute(a))));
+    assert_eq!(r, transmute(lsx_vextl_qu_du(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -6658,7 +8792,7 @@ unsafe fn test_lsx_bnz_b() {
     );
     let r: i32 = 1;
 
-    assert_eq!(r, transmute(lsx_bnz_b(transmute(a))));
+    assert_eq!(r, transmute(lsx_bnz_b(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -6666,7 +8800,7 @@ unsafe fn test_lsx_bnz_d() {
     let a = u64x2::new(2935166648440262530, 9853932033129373129);
     let r: i32 = 1;
 
-    assert_eq!(r, transmute(lsx_bnz_d(transmute(a))));
+    assert_eq!(r, transmute(lsx_bnz_d(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -6674,7 +8808,7 @@ unsafe fn test_lsx_bnz_h() {
     let a = u16x8::new(55695, 60003, 59560, 35123, 25693, 41352, 61626, 42007);
     let r: i32 = 1;
 
-    assert_eq!(r, transmute(lsx_bnz_h(transmute(a))));
+    assert_eq!(r, transmute(lsx_bnz_h(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -6684,7 +8818,7 @@ unsafe fn test_lsx_bnz_v() {
     );
     let r: i32 = 1;
 
-    assert_eq!(r, transmute(lsx_bnz_v(transmute(a))));
+    assert_eq!(r, transmute(lsx_bnz_v(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -6692,7 +8826,7 @@ unsafe fn test_lsx_bnz_w() {
     let a = u32x4::new(1172712391, 4211490091, 1954893853, 1606462106);
     let r: i32 = 1;
 
-    assert_eq!(r, transmute(lsx_bnz_w(transmute(a))));
+    assert_eq!(r, transmute(lsx_bnz_w(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -6702,7 +8836,7 @@ unsafe fn test_lsx_bz_b() {
     );
     let r: i32 = 0;
 
-    assert_eq!(r, transmute(lsx_bz_b(transmute(a))));
+    assert_eq!(r, transmute(lsx_bz_b(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -6710,7 +8844,7 @@ unsafe fn test_lsx_bz_d() {
     let a = u64x2::new(6051854163594201075, 9957257179760945130);
     let r: i32 = 0;
 
-    assert_eq!(r, transmute(lsx_bz_d(transmute(a))));
+    assert_eq!(r, transmute(lsx_bz_d(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -6718,7 +8852,7 @@ unsafe fn test_lsx_bz_h() {
     let a = u16x8::new(19470, 29377, 53886, 60432, 20799, 41755, 54479, 52192);
     let r: i32 = 0;
 
-    assert_eq!(r, transmute(lsx_bz_h(transmute(a))));
+    assert_eq!(r, transmute(lsx_bz_h(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -6728,7 +8862,7 @@ unsafe fn test_lsx_bz_v() {
     );
     let r: i32 = 0;
 
-    assert_eq!(r, transmute(lsx_bz_v(transmute(a))));
+    assert_eq!(r, transmute(lsx_bz_v(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -6736,7 +8870,7 @@ unsafe fn test_lsx_bz_w() {
     let a = u32x4::new(840335855, 1404686204, 628335401, 1171808080);
     let r: i32 = 0;
 
-    assert_eq!(r, transmute(lsx_bz_w(transmute(a))));
+    assert_eq!(r, transmute(lsx_bz_w(black_box(transmute(a)))));
 }
 
 #[simd_test(enable = "lsx")]
@@ -6745,7 +8879,13 @@ unsafe fn test_lsx_vfcmp_caf_d() {
     let b = u64x2::new(4594845432849836188, 4605165420863530034);
     let r = i64x2::new(0, 0);
 
-    assert_eq!(r, transmute(lsx_vfcmp_caf_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vfcmp_caf_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -6754,7 +8894,13 @@ unsafe fn test_lsx_vfcmp_caf_s() {
     let b = u32x4::new(1058412800, 1058762495, 1028487696, 1027290752);
     let r = i64x2::new(0, 0);
 
-    assert_eq!(r, transmute(lsx_vfcmp_caf_s(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vfcmp_caf_s(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -6763,7 +8909,13 @@ unsafe fn test_lsx_vfcmp_ceq_d() {
     let b = u64x2::new(4605937250150464526, 4596769502461699132);
     let r = i64x2::new(0, 0);
 
-    assert_eq!(r, transmute(lsx_vfcmp_ceq_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vfcmp_ceq_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -6772,7 +8924,13 @@ unsafe fn test_lsx_vfcmp_ceq_s() {
     let b = u32x4::new(1057471620, 1064008655, 1062698831, 1064822930);
     let r = i64x2::new(0, 0);
 
-    assert_eq!(r, transmute(lsx_vfcmp_ceq_s(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vfcmp_ceq_s(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -6781,7 +8939,13 @@ unsafe fn test_lsx_vfcmp_cle_d() {
     let b = u64x2::new(4596931282408842596, 4592481315209481584);
     let r = i64x2::new(-1, 0);
 
-    assert_eq!(r, transmute(lsx_vfcmp_cle_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vfcmp_cle_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -6790,7 +8954,13 @@ unsafe fn test_lsx_vfcmp_cle_s() {
     let b = u32x4::new(1021993344, 1043028808, 1064182329, 1054794412);
     let r = i64x2::new(-4294967296, -1);
 
-    assert_eq!(r, transmute(lsx_vfcmp_cle_s(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vfcmp_cle_s(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -6799,7 +8969,13 @@ unsafe fn test_lsx_vfcmp_clt_d() {
     let b = u64x2::new(4603056125735978454, 4595932368389116476);
     let r = i64x2::new(-1, -1);
 
-    assert_eq!(r, transmute(lsx_vfcmp_clt_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vfcmp_clt_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -6808,7 +8984,13 @@ unsafe fn test_lsx_vfcmp_clt_s() {
     let b = u32x4::new(1040327468, 1040072248, 1063314103, 1061361061);
     let r = i64x2::new(0, -1);
 
-    assert_eq!(r, transmute(lsx_vfcmp_clt_s(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vfcmp_clt_s(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -6817,7 +8999,13 @@ unsafe fn test_lsx_vfcmp_cne_d() {
     let b = u64x2::new(4602354759349431170, 4598595124838935466);
     let r = i64x2::new(-1, -1);
 
-    assert_eq!(r, transmute(lsx_vfcmp_cne_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vfcmp_cne_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -6826,7 +9014,13 @@ unsafe fn test_lsx_vfcmp_cne_s() {
     let b = u32x4::new(1063262940, 1058010357, 1052721962, 1061295988);
     let r = i64x2::new(-1, -1);
 
-    assert_eq!(r, transmute(lsx_vfcmp_cne_s(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vfcmp_cne_s(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -6835,7 +9029,13 @@ unsafe fn test_lsx_vfcmp_cor_d() {
     let b = u64x2::new(4606863361114437050, 4600753700959452152);
     let r = i64x2::new(-1, -1);
 
-    assert_eq!(r, transmute(lsx_vfcmp_cor_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vfcmp_cor_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -6844,7 +9044,13 @@ unsafe fn test_lsx_vfcmp_cor_s() {
     let b = u32x4::new(1053615382, 1065255138, 1051565294, 1041776832);
     let r = i64x2::new(-1, -1);
 
-    assert_eq!(r, transmute(lsx_vfcmp_cor_s(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vfcmp_cor_s(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -6853,7 +9059,13 @@ unsafe fn test_lsx_vfcmp_cueq_d() {
     let b = u64x2::new(4603317345052528721, 4586734343919602352);
     let r = i64x2::new(0, 0);
 
-    assert_eq!(r, transmute(lsx_vfcmp_cueq_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vfcmp_cueq_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -6862,7 +9074,13 @@ unsafe fn test_lsx_vfcmp_cueq_s() {
     let b = u32x4::new(1057082822, 1059761998, 1052599998, 1054369118);
     let r = i64x2::new(0, 0);
 
-    assert_eq!(r, transmute(lsx_vfcmp_cueq_s(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vfcmp_cueq_s(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -6871,7 +9089,13 @@ unsafe fn test_lsx_vfcmp_cule_d() {
     let b = u64x2::new(4604253448175093958, 4599648167588382448);
     let r = i64x2::new(-1, -1);
 
-    assert_eq!(r, transmute(lsx_vfcmp_cule_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vfcmp_cule_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -6880,7 +9104,13 @@ unsafe fn test_lsx_vfcmp_cule_s() {
     let b = u32x4::new(1051100696, 1062219104, 1064568294, 1032521352);
     let r = i64x2::new(-4294967296, 4294967295);
 
-    assert_eq!(r, transmute(lsx_vfcmp_cule_s(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vfcmp_cule_s(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -6889,7 +9119,13 @@ unsafe fn test_lsx_vfcmp_cult_d() {
     let b = u64x2::new(4602944708025910986, 4606429728449082215);
     let r = i64x2::new(0, -1);
 
-    assert_eq!(r, transmute(lsx_vfcmp_cult_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vfcmp_cult_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -6898,7 +9134,13 @@ unsafe fn test_lsx_vfcmp_cult_s() {
     let b = u32x4::new(1030808384, 1044268840, 1050761328, 1037308928);
     let r = i64x2::new(0, 0);
 
-    assert_eq!(r, transmute(lsx_vfcmp_cult_s(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vfcmp_cult_s(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -6907,7 +9149,13 @@ unsafe fn test_lsx_vfcmp_cun_d() {
     let b = u64x2::new(4599145506416791474, 4602762942707610466);
     let r = i64x2::new(0, 0);
 
-    assert_eq!(r, transmute(lsx_vfcmp_cun_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vfcmp_cun_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -6916,7 +9164,13 @@ unsafe fn test_lsx_vfcmp_cune_d() {
     let b = u64x2::new(4602895209237804084, 4598685577984089858);
     let r = i64x2::new(-1, -1);
 
-    assert_eq!(r, transmute(lsx_vfcmp_cune_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vfcmp_cune_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -6925,7 +9179,13 @@ unsafe fn test_lsx_vfcmp_cune_s() {
     let b = u32x4::new(1049955876, 1032474200, 1023410112, 1050347912);
     let r = i64x2::new(-1, -1);
 
-    assert_eq!(r, transmute(lsx_vfcmp_cune_s(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vfcmp_cune_s(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -6934,7 +9194,13 @@ unsafe fn test_lsx_vfcmp_cun_s() {
     let b = u32x4::new(1053288920, 1059911123, 1058695573, 1062913175);
     let r = i64x2::new(0, 0);
 
-    assert_eq!(r, transmute(lsx_vfcmp_cun_s(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vfcmp_cun_s(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -6943,7 +9209,13 @@ unsafe fn test_lsx_vfcmp_saf_d() {
     let b = u64x2::new(4589118818065931376, 4603302333347826011);
     let r = i64x2::new(0, 0);
 
-    assert_eq!(r, transmute(lsx_vfcmp_saf_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vfcmp_saf_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -6952,7 +9224,13 @@ unsafe fn test_lsx_vfcmp_saf_s() {
     let b = u32x4::new(1044756936, 1054667546, 1059141760, 1062203553);
     let r = i64x2::new(0, 0);
 
-    assert_eq!(r, transmute(lsx_vfcmp_saf_s(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vfcmp_saf_s(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -6961,7 +9239,13 @@ unsafe fn test_lsx_vfcmp_seq_d() {
     let b = u64x2::new(4594167956310606988, 4596272126122589228);
     let r = i64x2::new(0, 0);
 
-    assert_eq!(r, transmute(lsx_vfcmp_seq_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vfcmp_seq_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -6970,7 +9254,13 @@ unsafe fn test_lsx_vfcmp_seq_s() {
     let b = u32x4::new(1057231588, 1051495460, 1057998997, 1049117328);
     let r = i64x2::new(0, 0);
 
-    assert_eq!(r, transmute(lsx_vfcmp_seq_s(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vfcmp_seq_s(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -6979,7 +9269,13 @@ unsafe fn test_lsx_vfcmp_sle_d() {
     let b = u64x2::new(4603919005855163252, 4594682846653946884);
     let r = i64x2::new(0, 0);
 
-    assert_eq!(r, transmute(lsx_vfcmp_sle_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vfcmp_sle_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -6988,7 +9284,13 @@ unsafe fn test_lsx_vfcmp_sle_s() {
     let b = u32x4::new(1045989468, 1052518900, 1046184640, 1032417352);
     let r = i64x2::new(0, 0);
 
-    assert_eq!(r, transmute(lsx_vfcmp_sle_s(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vfcmp_sle_s(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -6997,7 +9299,13 @@ unsafe fn test_lsx_vfcmp_slt_d() {
     let b = u64x2::new(4600564867142526828, 4585131890265864544);
     let r = i64x2::new(0, 0);
 
-    assert_eq!(r, transmute(lsx_vfcmp_slt_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vfcmp_slt_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -7006,7 +9314,13 @@ unsafe fn test_lsx_vfcmp_slt_s() {
     let b = u32x4::new(1063435026, 1062439603, 1060665555, 1059252630);
     let r = i64x2::new(-1, -4294967296);
 
-    assert_eq!(r, transmute(lsx_vfcmp_slt_s(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vfcmp_slt_s(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -7015,7 +9329,13 @@ unsafe fn test_lsx_vfcmp_sne_d() {
     let b = u64x2::new(4606789952952688555, 4605380358192261377);
     let r = i64x2::new(-1, -1);
 
-    assert_eq!(r, transmute(lsx_vfcmp_sne_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vfcmp_sne_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -7024,7 +9344,13 @@ unsafe fn test_lsx_vfcmp_sne_s() {
     let b = u32x4::new(1055803760, 1063372602, 1062608900, 1054634370);
     let r = i64x2::new(-1, -1);
 
-    assert_eq!(r, transmute(lsx_vfcmp_sne_s(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vfcmp_sne_s(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -7033,7 +9359,13 @@ unsafe fn test_lsx_vfcmp_sor_d() {
     let b = u64x2::new(4606380175568635560, 4602092067387067462);
     let r = i64x2::new(-1, -1);
 
-    assert_eq!(r, transmute(lsx_vfcmp_sor_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vfcmp_sor_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -7042,7 +9374,13 @@ unsafe fn test_lsx_vfcmp_sor_s() {
     let b = u32x4::new(1064534350, 1035771168, 1059142426, 1034677600);
     let r = i64x2::new(-1, -1);
 
-    assert_eq!(r, transmute(lsx_vfcmp_sor_s(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vfcmp_sor_s(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -7051,7 +9389,13 @@ unsafe fn test_lsx_vfcmp_sueq_d() {
     let b = u64x2::new(4602917609947054533, 4605983209212177197);
     let r = i64x2::new(0, 0);
 
-    assert_eq!(r, transmute(lsx_vfcmp_sueq_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vfcmp_sueq_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -7060,7 +9404,13 @@ unsafe fn test_lsx_vfcmp_sueq_s() {
     let b = u32x4::new(1064871165, 1059796257, 1055456352, 1058662692);
     let r = i64x2::new(0, 0);
 
-    assert_eq!(r, transmute(lsx_vfcmp_sueq_s(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vfcmp_sueq_s(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -7069,7 +9419,13 @@ unsafe fn test_lsx_vfcmp_sule_d() {
     let b = u64x2::new(4594044173266256632, 4601549551994738386);
     let r = i64x2::new(0, -1);
 
-    assert_eq!(r, transmute(lsx_vfcmp_sule_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vfcmp_sule_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -7078,7 +9434,13 @@ unsafe fn test_lsx_vfcmp_sule_s() {
     let b = u32x4::new(1061061244, 1051874412, 1041025316, 1056018690);
     let r = i64x2::new(4294967295, -1);
 
-    assert_eq!(r, transmute(lsx_vfcmp_sule_s(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vfcmp_sule_s(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -7087,7 +9449,13 @@ unsafe fn test_lsx_vfcmp_sult_d() {
     let b = u64x2::new(4603848042095479627, 4605032971316970060);
     let r = i64x2::new(-1, -1);
 
-    assert_eq!(r, transmute(lsx_vfcmp_sult_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vfcmp_sult_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -7096,7 +9464,13 @@ unsafe fn test_lsx_vfcmp_sult_s() {
     let b = u32x4::new(1053631630, 1064026599, 1058029398, 1041182304);
     let r = i64x2::new(-4294967296, 4294967295);
 
-    assert_eq!(r, transmute(lsx_vfcmp_sult_s(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vfcmp_sult_s(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -7105,7 +9479,13 @@ unsafe fn test_lsx_vfcmp_sun_d() {
     let b = u64x2::new(4560681020073292800, 4604624347352815433);
     let r = i64x2::new(0, 0);
 
-    assert_eq!(r, transmute(lsx_vfcmp_sun_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vfcmp_sun_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -7114,7 +9494,13 @@ unsafe fn test_lsx_vfcmp_sune_d() {
     let b = u64x2::new(4593947987798339484, 4603656097008761637);
     let r = i64x2::new(-1, -1);
 
-    assert_eq!(r, transmute(lsx_vfcmp_sune_d(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vfcmp_sune_d(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -7123,7 +9509,13 @@ unsafe fn test_lsx_vfcmp_sune_s() {
     let b = u32x4::new(1049327168, 1034635272, 1042258196, 1062844003);
     let r = i64x2::new(-1, -1);
 
-    assert_eq!(r, transmute(lsx_vfcmp_sune_s(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vfcmp_sune_s(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
@@ -7132,7 +9524,13 @@ unsafe fn test_lsx_vfcmp_sun_s() {
     let b = u32x4::new(1057442863, 1064573466, 1058086753, 1015993248);
     let r = i64x2::new(0, 0);
 
-    assert_eq!(r, transmute(lsx_vfcmp_sun_s(transmute(a), transmute(b))));
+    assert_eq!(
+        r,
+        transmute(lsx_vfcmp_sun_s(
+            black_box(transmute(a)),
+            black_box(transmute(b))
+        ))
+    );
 }
 
 #[simd_test(enable = "lsx")]
diff --git a/crates/core_arch/src/loongarch64/mod.rs b/crates/core_arch/src/loongarch64/mod.rs
index ab968aff20..f464dbd356 100644
--- a/crates/core_arch/src/loongarch64/mod.rs
+++ b/crates/core_arch/src/loongarch64/mod.rs
@@ -2,6 +2,7 @@
 
 mod lasx;
 mod lsx;
+mod simd;
 
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
 pub use self::lasx::*;
@@ -11,7 +12,7 @@ pub use self::lsx::*;
 use crate::arch::asm;
 
 /// Reads the 64-bit stable counter value and the counter ID
-#[inline]
+#[inline(always)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
 pub fn rdtime_d() -> (i64, isize) {
     let (val, tid): (i64, isize);
@@ -48,21 +49,21 @@ unsafe extern "unadjusted" {
 }
 
 /// Calculate the CRC value using the IEEE 802.3 polynomial (0xEDB88320)
-#[inline]
+#[inline(always)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
 pub fn crc_w_d_w(a: i64, b: i32) -> i32 {
     unsafe { __crc_w_d_w(a, b) }
 }
 
 /// Calculate the CRC value using the Castagnoli polynomial (0x82F63B78)
-#[inline]
+#[inline(always)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
 pub fn crcc_w_d_w(a: i64, b: i32) -> i32 {
     unsafe { __crcc_w_d_w(a, b) }
 }
 
 /// Generates the cache operation instruction
-#[inline]
+#[inline(always)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
 pub unsafe fn cacop<const IMM5: i64, const IMM_S12: i64>(b: i64) {
     static_assert_uimm_bits!(IMM5, 5);
@@ -71,7 +72,7 @@ pub unsafe fn cacop<const IMM5: i64, const IMM_S12: i64>(b: i64) {
 }
 
 /// Reads the CSR
-#[inline]
+#[inline(always)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
 pub unsafe fn csrrd<const IMM14: i32>() -> i64 {
     static_assert_uimm_bits!(IMM14, 14);
@@ -79,7 +80,7 @@ pub unsafe fn csrrd<const IMM14: i32>() -> i64 {
 }
 
 /// Writes the CSR
-#[inline]
+#[inline(always)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
 pub unsafe fn csrwr<const IMM14: i32>(a: i64) -> i64 {
     static_assert_uimm_bits!(IMM14, 14);
@@ -87,7 +88,7 @@ pub unsafe fn csrwr<const IMM14: i32>(a: i64) -> i64 {
 }
 
 /// Exchanges the CSR
-#[inline]
+#[inline(always)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
 pub unsafe fn csrxchg<const IMM14: i32>(a: i64, b: i64) -> i64 {
     static_assert_uimm_bits!(IMM14, 14);
@@ -95,35 +96,35 @@ pub unsafe fn csrxchg<const IMM14: i32>(a: i64, b: i64) -> i64 {
 }
 
 /// Reads the 64-bit IO-CSR
-#[inline]
+#[inline(always)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
 pub unsafe fn iocsrrd_d(a: i32) -> i64 {
     __iocsrrd_d(a)
 }
 
 /// Writes the 64-bit IO-CSR
-#[inline]
+#[inline(always)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
 pub unsafe fn iocsrwr_d(a: i64, b: i32) {
     __iocsrwr_d(a, b)
 }
 
 /// Generates the less-than-or-equal asseration instruction
-#[inline]
+#[inline(always)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
 pub unsafe fn asrtle(a: i64, b: i64) {
     __asrtle(a, b);
 }
 
 /// Generates the greater-than asseration instruction
-#[inline]
+#[inline(always)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
 pub unsafe fn asrtgt(a: i64, b: i64) {
     __asrtgt(a, b);
 }
 
 /// Loads the page table directory entry
-#[inline]
+#[inline(always)]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
 pub unsafe fn lddir<const IMM8: i64>(a: i64) -> i64 {
@@ -132,7 +133,7 @@ pub unsafe fn lddir<const IMM8: i64>(a: i64) -> i64 {
 }
 
 /// Loads the page table entry
-#[inline]
+#[inline(always)]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
 pub unsafe fn ldpte<const IMM8: i64>(a: i64) {
diff --git a/crates/core_arch/src/loongarch64/simd.rs b/crates/core_arch/src/loongarch64/simd.rs
new file mode 100644
index 0000000000..b4ec6881c3
--- /dev/null
+++ b/crates/core_arch/src/loongarch64/simd.rs
@@ -0,0 +1,339 @@
+//! LoongArch64 SIMD helpers
+
+use self as ls;
+use crate::intrinsics::simd as is;
+
+// Internal extension trait for concrete `Simd<T, N>` types.
+//
+// Provides a small set of helper functionality (`Elem` and `splat`)
+// so generic and macro-based code can operate on different SIMD
+// vector types in a uniform way.
+pub(super) const trait SimdExt: Sized {
+    type Elem;
+
+    unsafe fn splat(v: i64) -> Self;
+}
+
+macro_rules! impl_simd_ext {
+    ($v:ident, $e:ty) => {
+        #[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
+        impl const SimdExt for crate::core_arch::simd::$v {
+            type Elem = $e;
+
+            #[inline(always)]
+            unsafe fn splat(v: i64) -> Self {
+                is::simd_splat(v as Self::Elem)
+            }
+        }
+    };
+}
+
+impl_simd_ext!(i8x16, i8);
+impl_simd_ext!(i8x32, i8);
+impl_simd_ext!(u8x16, u8);
+impl_simd_ext!(u8x32, u8);
+impl_simd_ext!(i16x8, i16);
+impl_simd_ext!(i16x16, i16);
+impl_simd_ext!(u16x8, u16);
+impl_simd_ext!(u16x16, u16);
+impl_simd_ext!(i32x4, i32);
+impl_simd_ext!(i32x8, i32);
+impl_simd_ext!(u32x4, u32);
+impl_simd_ext!(u32x8, u32);
+impl_simd_ext!(i64x2, i64);
+impl_simd_ext!(i64x4, i64);
+impl_simd_ext!(u64x2, u64);
+impl_simd_ext!(u64x4, u64);
+
+#[inline(always)]
+#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
+pub(crate) const unsafe fn simd_abs<T: Copy + const SimdExt>(a: T) -> T {
+    let m: T = is::simd_lt(a, ls::simd_splat(0));
+    is::simd_select(m, is::simd_neg(a), a)
+}
+
+#[inline(always)]
+#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
+pub(crate) const unsafe fn simd_absd<T: Copy>(a: T, b: T) -> T {
+    let m: T = is::simd_gt(a, b);
+    is::simd_select(m, is::simd_sub(a, b), is::simd_sub(b, a))
+}
+
+#[inline(always)]
+#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
+pub(crate) const unsafe fn simd_adda<T: Copy + const SimdExt>(a: T, b: T) -> T {
+    is::simd_add(ls::simd_abs(a), ls::simd_abs(b))
+}
+
+#[inline(always)]
+#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
+pub(super) const unsafe fn simd_andn<T: Copy + const SimdExt>(a: T, b: T) -> T {
+    is::simd_and(ls::simd_not(a), b)
+}
+
+#[inline(always)]
+#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
+pub(super) const unsafe fn simd_bitclr<T: Copy + const SimdExt>(a: T, b: T) -> T {
+    ls::simd_andn(ls::simd_shl(ls::simd_splat(1), b), a)
+}
+
+#[inline(always)]
+#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
+pub(super) const unsafe fn simd_bitrev<T: Copy + const SimdExt>(a: T, b: T) -> T {
+    is::simd_xor(ls::simd_shl(ls::simd_splat(1), b), a)
+}
+
+#[inline(always)]
+#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
+pub(super) const unsafe fn simd_bitset<T: Copy + const SimdExt>(a: T, b: T) -> T {
+    is::simd_or(ls::simd_shl(ls::simd_splat(1), b), a)
+}
+
+#[inline(always)]
+#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
+pub(super) const unsafe fn simd_fmsub<T: Copy>(a: T, b: T, c: T) -> T {
+    is::simd_fma(a, b, is::simd_neg(c))
+}
+
+#[inline(always)]
+#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
+pub(super) const unsafe fn simd_fnmadd<T: Copy>(a: T, b: T, c: T) -> T {
+    is::simd_neg(is::simd_fma(a, b, c))
+}
+
+#[inline(always)]
+#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
+pub(super) const unsafe fn simd_fnmsub<T: Copy>(a: T, b: T, c: T) -> T {
+    is::simd_neg(ls::simd_fmsub(a, b, c))
+}
+
+#[inline(always)]
+#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
+pub(super) const unsafe fn simd_madd<T: Copy>(a: T, b: T, c: T) -> T {
+    is::simd_add(a, is::simd_mul(b, c))
+}
+
+#[inline(always)]
+#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
+pub(super) const unsafe fn simd_msub<T: Copy>(a: T, b: T, c: T) -> T {
+    is::simd_sub(a, is::simd_mul(b, c))
+}
+
+#[inline(always)]
+#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
+pub(super) const unsafe fn simd_nor<T: Copy + const SimdExt>(a: T, b: T) -> T {
+    ls::simd_not(is::simd_or(a, b))
+}
+
+#[inline(always)]
+#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
+pub(super) const unsafe fn simd_not<T: Copy + const SimdExt>(a: T) -> T {
+    is::simd_xor(a, ls::simd_splat(!0))
+}
+
+#[inline(always)]
+#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
+pub(super) const unsafe fn simd_orn<T: Copy + const SimdExt>(a: T, b: T) -> T {
+    is::simd_or(a, ls::simd_not(b))
+}
+
+#[inline(always)]
+#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
+pub(super) const unsafe fn simd_shl<T: Copy + const SimdExt>(a: T, b: T) -> T {
+    let m = (size_of::<T::Elem>() * 8 - 1) as i64;
+    is::simd_shl(a, is::simd_and(b, ls::simd_splat(m)))
+}
+
+#[inline(always)]
+#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
+pub(super) const unsafe fn simd_shr<T: Copy + const SimdExt>(a: T, b: T) -> T {
+    let m = (size_of::<T::Elem>() * 8 - 1) as i64;
+    is::simd_shr(a, is::simd_and(b, ls::simd_splat(m)))
+}
+
+#[inline(always)]
+#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
+pub(super) const unsafe fn simd_splat<T: Copy + const SimdExt>(a: i64) -> T {
+    T::splat(a)
+}
+
+macro_rules! impl_vv {
+    ($ft:literal, $name:ident, $op:path, $oty:ty, $ity:ty) => {
+        #[inline]
+        #[target_feature(enable = $ft)]
+        #[unstable(feature = "stdarch_loongarch", issue = "117427")]
+        pub fn $name(a: $oty) -> $oty {
+            unsafe {
+                let a: $ity = transmute(a);
+                let r: $ity = $op(a);
+                transmute(r)
+            }
+        }
+    };
+}
+
+pub(super) use impl_vv;
+
+macro_rules! impl_gv {
+    ($ft:literal, $name:ident, $op:path, $oty:ty, $ity:ident, $gty:ty) => {
+        #[inline]
+        #[target_feature(enable = $ft)]
+        #[unstable(feature = "stdarch_loongarch", issue = "117427")]
+        pub fn $name(a: $gty) -> $oty {
+            unsafe {
+                let r: $ity = $op(a.into());
+                transmute(r)
+            }
+        }
+    };
+}
+
+pub(super) use impl_gv;
+
+macro_rules! impl_sv {
+    ($ft:literal, $name:ident, $op:path, $oty:ty, $ity:ident, $ibs:expr) => {
+        #[inline]
+        #[target_feature(enable = $ft)]
+        #[rustc_legacy_const_generics(0)]
+        #[unstable(feature = "stdarch_loongarch", issue = "117427")]
+        pub fn $name<const IMM: i32>() -> $oty {
+            static_assert_simm_bits!(IMM, $ibs);
+            unsafe {
+                let r: $ity = $op(IMM.into());
+                transmute(r)
+            }
+        }
+    };
+}
+
+pub(super) use impl_sv;
+
+macro_rules! impl_vvv {
+    ($ft:literal, $name:ident, $op:path, $oty:ty, $ity:ty) => {
+        #[inline]
+        #[target_feature(enable = $ft)]
+        #[unstable(feature = "stdarch_loongarch", issue = "117427")]
+        pub fn $name(a: $oty, b: $oty) -> $oty {
+            unsafe {
+                let a: $ity = transmute(a);
+                let b: $ity = transmute(b);
+                let r: $ity = $op(a, b);
+                transmute(r)
+            }
+        }
+    };
+}
+
+pub(super) use impl_vvv;
+
+macro_rules! impl_vuv {
+    ($ft:literal, $name:ident, $op:path, $oty:ty, $ity:ident) => {
+        #[inline]
+        #[target_feature(enable = $ft)]
+        #[rustc_legacy_const_generics(1)]
+        #[unstable(feature = "stdarch_loongarch", issue = "117427")]
+        pub fn $name<const IMM: u32>(a: $oty) -> $oty {
+            static_assert_uimm_bits!(IMM, (size_of::<<$ity as SimdExt>::Elem>() * 8).ilog2());
+            unsafe {
+                let a: $ity = transmute(a);
+                let b: $ity = ls::simd_splat(IMM.into());
+                let r: $ity = $op(a, b);
+                transmute(r)
+            }
+        }
+    };
+    ($ft:literal, $name:ident, $op:path, $oty:ty, $ity:ident, $ibs:expr) => {
+        #[inline]
+        #[target_feature(enable = $ft)]
+        #[rustc_legacy_const_generics(1)]
+        #[unstable(feature = "stdarch_loongarch", issue = "117427")]
+        pub fn $name<const IMM: u32>(a: $oty) -> $oty {
+            static_assert_uimm_bits!(IMM, $ibs);
+            unsafe {
+                let a: $ity = transmute(a);
+                let b: $ity = ls::simd_splat(IMM.into());
+                let r: $ity = $op(a, b);
+                transmute(r)
+            }
+        }
+    };
+}
+
+pub(super) use impl_vuv;
+
+macro_rules! impl_vug {
+    ($ft:literal, $name:ident, $op:path, $oty:ty, $ity:ident, $gty:ty, $ibs:expr) => {
+        #[inline]
+        #[target_feature(enable = $ft)]
+        #[rustc_legacy_const_generics(1)]
+        #[unstable(feature = "stdarch_loongarch", issue = "117427")]
+        pub fn $name<const IMM: u32>(a: $oty) -> $gty {
+            static_assert_uimm_bits!(IMM, $ibs);
+            unsafe {
+                let a: $ity = transmute(a);
+                let r: <$ity as SimdExt>::Elem = $op(a, IMM);
+                r as $gty
+            }
+        }
+    };
+}
+
+pub(super) use impl_vug;
+
+macro_rules! impl_vsv {
+    ($ft:literal, $name:ident, $op:path, $oty:ty, $ity:ident, $ibs:expr) => {
+        #[inline]
+        #[target_feature(enable = $ft)]
+        #[rustc_legacy_const_generics(1)]
+        #[unstable(feature = "stdarch_loongarch", issue = "117427")]
+        pub fn $name<const IMM: i32>(a: $oty) -> $oty {
+            static_assert_simm_bits!(IMM, $ibs);
+            unsafe {
+                let a: $ity = transmute(a);
+                let b: $ity = ls::simd_splat(IMM.into());
+                let r: $ity = $op(a, b);
+                transmute(r)
+            }
+        }
+    };
+}
+
+pub(super) use impl_vsv;
+
+macro_rules! impl_vvvv {
+    ($ft:literal, $name:ident, $op:path, $oty:ty, $ity:ty) => {
+        #[inline]
+        #[target_feature(enable = $ft)]
+        #[unstable(feature = "stdarch_loongarch", issue = "117427")]
+        pub fn $name(a: $oty, b: $oty, c: $oty) -> $oty {
+            unsafe {
+                let a: $ity = transmute(a);
+                let b: $ity = transmute(b);
+                let c: $ity = transmute(c);
+                let r: $ity = $op(a, b, c);
+                transmute(r)
+            }
+        }
+    };
+}
+
+pub(super) use impl_vvvv;
+
+macro_rules! impl_vugv {
+    ($ft:literal, $name:ident, $op:path, $oty:ty, $ity:ident, $gty:ty, $ibs:expr) => {
+        #[inline]
+        #[target_feature(enable = $ft)]
+        #[rustc_legacy_const_generics(1)]
+        #[unstable(feature = "stdarch_loongarch", issue = "117427")]
+        pub fn $name<const IMM: u32>(a: $oty, b: $gty) -> $oty {
+            static_assert_uimm_bits!(IMM, $ibs);
+            unsafe {
+                let a: $ity = transmute(a);
+                let r: $ity = $op(a, IMM, b as <$ity as SimdExt>::Elem);
+                transmute(r)
+            }
+        }
+    };
+}
+
+pub(super) use impl_vugv;
diff --git a/crates/core_arch/src/loongarch_shared/mod.rs b/crates/core_arch/src/loongarch_shared/mod.rs
index 8991fe8576..948c98df61 100644
--- a/crates/core_arch/src/loongarch_shared/mod.rs
+++ b/crates/core_arch/src/loongarch_shared/mod.rs
@@ -3,7 +3,7 @@
 use crate::arch::asm;
 
 /// Reads the lower 32-bit stable counter value and the counter ID
-#[inline]
+#[inline(always)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
 pub fn rdtimel_w() -> (i32, isize) {
     let (val, tid): (i32, isize);
@@ -12,7 +12,7 @@ pub fn rdtimel_w() -> (i32, isize) {
 }
 
 /// Reads the upper 32-bit stable counter value and the counter ID
-#[inline]
+#[inline(always)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
 pub fn rdtimeh_w() -> (i32, isize) {
     let (val, tid): (i32, isize);
@@ -71,49 +71,49 @@ unsafe extern "unadjusted" {
 }
 
 /// Calculate the CRC value using the IEEE 802.3 polynomial (0xEDB88320)
-#[inline]
+#[inline(always)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
 pub fn crc_w_b_w(a: i32, b: i32) -> i32 {
     unsafe { __crc_w_b_w(a, b) }
 }
 
 /// Calculate the CRC value using the IEEE 802.3 polynomial (0xEDB88320)
-#[inline]
+#[inline(always)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
 pub fn crc_w_h_w(a: i32, b: i32) -> i32 {
     unsafe { __crc_w_h_w(a, b) }
 }
 
 /// Calculate the CRC value using the IEEE 802.3 polynomial (0xEDB88320)
-#[inline]
+#[inline(always)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
 pub fn crc_w_w_w(a: i32, b: i32) -> i32 {
     unsafe { __crc_w_w_w(a, b) }
 }
 
 /// Calculate the CRC value using the Castagnoli polynomial (0x82F63B78)
-#[inline]
+#[inline(always)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
 pub fn crcc_w_b_w(a: i32, b: i32) -> i32 {
     unsafe { __crcc_w_b_w(a, b) }
 }
 
 /// Calculate the CRC value using the Castagnoli polynomial (0x82F63B78)
-#[inline]
+#[inline(always)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
 pub fn crcc_w_h_w(a: i32, b: i32) -> i32 {
     unsafe { __crcc_w_h_w(a, b) }
 }
 
 /// Calculate the CRC value using the Castagnoli polynomial (0x82F63B78)
-#[inline]
+#[inline(always)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
 pub fn crcc_w_w_w(a: i32, b: i32) -> i32 {
     unsafe { __crcc_w_w_w(a, b) }
 }
 
 /// Generates the memory barrier instruction
-#[inline]
+#[inline(always)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
 pub fn dbar<const IMM15: i32>() {
     static_assert_uimm_bits!(IMM15, 15);
@@ -121,7 +121,7 @@ pub fn dbar<const IMM15: i32>() {
 }
 
 /// Generates the instruction-fetch barrier instruction
-#[inline]
+#[inline(always)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
 pub fn ibar<const IMM15: i32>() {
     static_assert_uimm_bits!(IMM15, 15);
@@ -129,7 +129,7 @@ pub fn ibar<const IMM15: i32>() {
 }
 
 /// Moves data from a GPR to the FCSR
-#[inline]
+#[inline(always)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
 pub unsafe fn movgr2fcsr<const IMM2: i32>(a: i32) {
     static_assert_uimm_bits!(IMM2, 2);
@@ -137,7 +137,7 @@ pub unsafe fn movgr2fcsr<const IMM2: i32>(a: i32) {
 }
 
 /// Moves data from a FCSR to the GPR
-#[inline]
+#[inline(always)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
 pub fn movfcsr2gr<const IMM2: i32>() -> i32 {
     static_assert_uimm_bits!(IMM2, 2);
@@ -145,49 +145,49 @@ pub fn movfcsr2gr<const IMM2: i32>() -> i32 {
 }
 
 /// Reads the 8-bit IO-CSR
-#[inline]
+#[inline(always)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
 pub unsafe fn iocsrrd_b(a: i32) -> i32 {
     __iocsrrd_b(a)
 }
 
 /// Reads the 16-bit IO-CSR
-#[inline]
+#[inline(always)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
 pub unsafe fn iocsrrd_h(a: i32) -> i32 {
     __iocsrrd_h(a)
 }
 
 /// Reads the 32-bit IO-CSR
-#[inline]
+#[inline(always)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
 pub unsafe fn iocsrrd_w(a: i32) -> i32 {
     __iocsrrd_w(a)
 }
 
 /// Writes the 8-bit IO-CSR
-#[inline]
+#[inline(always)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
 pub unsafe fn iocsrwr_b(a: i32, b: i32) {
     __iocsrwr_b(a, b)
 }
 
 /// Writes the 16-bit IO-CSR
-#[inline]
+#[inline(always)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
 pub unsafe fn iocsrwr_h(a: i32, b: i32) {
     __iocsrwr_h(a, b)
 }
 
 /// Writes the 32-bit IO-CSR
-#[inline]
+#[inline(always)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
 pub unsafe fn iocsrwr_w(a: i32, b: i32) {
     __iocsrwr_w(a, b)
 }
 
 /// Generates the breakpoint instruction
-#[inline]
+#[inline(always)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
 pub unsafe fn brk<const IMM15: i32>() {
     static_assert_uimm_bits!(IMM15, 15);
@@ -195,14 +195,14 @@ pub unsafe fn brk<const IMM15: i32>() {
 }
 
 /// Reads the CPU configuration register
-#[inline]
+#[inline(always)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
 pub fn cpucfg(a: i32) -> i32 {
     unsafe { __cpucfg(a) }
 }
 
 /// Generates the syscall instruction
-#[inline]
+#[inline(always)]
 #[unstable(feature = "stdarch_loongarch", issue = "117427")]
 pub unsafe fn syscall<const IMM15: i32>() {
     static_assert_uimm_bits!(IMM15, 15);
diff --git a/crates/core_arch/src/macros.rs b/crates/core_arch/src/macros.rs
index e00b433536..def2fd682b 100644
--- a/crates/core_arch/src/macros.rs
+++ b/crates/core_arch/src/macros.rs
@@ -14,6 +14,22 @@ macro_rules! static_assert {
     };
 }
 
+#[allow(unused_macros)]
+macro_rules! static_assert_range {
+    ($imm:ident, $min:literal..=$max:literal) => {
+        static_assert!(
+            $min <= $imm && $imm <= $max,
+            concat!(
+                stringify!($imm),
+                " is not in range ",
+                stringify!($min),
+                "-",
+                stringify!($max),
+            )
+        )
+    };
+}
+
 #[allow(unused_macros)]
 macro_rules! static_assert_uimm_bits {
     ($imm:ident, $bits:expr) => {
@@ -26,7 +42,7 @@ macro_rules! static_assert_uimm_bits {
                     stringify!($imm),
                     " doesn't fit in ",
                     stringify!($bits),
-                    " bits",
+                    " bits (unsigned)",
                 )
             )
         }
@@ -42,7 +58,7 @@ macro_rules! static_assert_simm_bits {
                 stringify!($imm),
                 " doesn't fit in ",
                 stringify!($bits),
-                " bits",
+                " bits (signed)",
             )
         )
     };
@@ -90,17 +106,10 @@ macro_rules! types {
         pub struct $name($v [$elem_type; $len]);
 
         impl $name {
-            /// Using `my_simd([x; N])` seemingly fails tests,
-            /// so use this internal helper for it instead.
+            /// Put the same value in every lane.
             #[inline(always)]
             $v fn splat(value: $elem_type) -> $name {
-                #[derive(Copy, Clone)]
-                #[repr(simd)]
-                struct JustOne([$elem_type; 1]);
-                let one = JustOne([value]);
-                // SAFETY: 0 is always in-bounds because we're shuffling
-                // a simd type with exactly one element.
-                unsafe { simd_shuffle!(one, one, [0; $len]) }
+                unsafe { $crate::intrinsics::simd::simd_splat(value) }
             }
 
             /// Returns an array reference containing the entire SIMD vector.
@@ -135,6 +144,22 @@ macro_rules! types {
                 crate::core_arch::simd::debug_simd_finish(f, stringify!($name), self.as_array())
             }
         }
+
+        $(#[$stability])+
+        impl crate::convert::From<crate::core_arch::simd::Simd<$elem_type, $len>> for $name {
+            #[inline(always)]
+            fn from(simd: crate::core_arch::simd::Simd<$elem_type, $len>) -> Self {
+                unsafe { crate::mem::transmute(simd) }
+            }
+        }
+
+        $(#[$stability])+
+        impl crate::convert::From<$name> for crate::core_arch::simd::Simd<$elem_type, $len> {
+            #[inline(always)]
+            fn from(simd: $name) -> Self {
+                unsafe { crate::mem::transmute(simd) }
+            }
+        }
     )*);
 }
 
@@ -163,3 +188,190 @@ macro_rules! simd_extract {
     ($x:expr, $idx:expr $(,)?) => {{ $crate::intrinsics::simd::simd_extract($x, const { $idx }) }};
     ($x:expr, $idx:expr, $ty:ty $(,)?) => {{ $crate::intrinsics::simd::simd_extract::<_, $ty>($x, const { $idx }) }};
 }
+
+#[allow(unused)]
+macro_rules! simd_masked_load {
+    ($align:expr, $mask:expr, $ptr:expr, $default:expr) => {
+        $crate::intrinsics::simd::simd_masked_load::<_, _, _, { $align }>($mask, $ptr, $default)
+    };
+}
+
+#[allow(unused)]
+macro_rules! simd_masked_store {
+    ($align:expr, $mask:expr, $ptr:expr, $default:expr) => {
+        $crate::intrinsics::simd::simd_masked_store::<_, _, _, { $align }>($mask, $ptr, $default)
+    };
+}
+
+/// The first N indices `[0, 1, 2, ...]`.
+pub(crate) const fn identity<const N: usize>() -> [u32; N] {
+    let mut out = [0u32; N];
+    let mut i = 0usize;
+    while i < N {
+        out[i] = i as u32;
+        i += 1;
+    }
+    out
+}
+
+/// The first N even indices `[0, 2, 4, ...]`.
+pub(crate) const fn even<const N: usize>() -> [u32; N] {
+    let mut out = [0u32; N];
+    let mut i = 0usize;
+    while i < N {
+        out[i] = (2 * i) as u32;
+        i += 1;
+    }
+    out
+}
+
+/// The first N odd indices `[1, 3, 5, ...]`.
+pub(crate) const fn odd<const N: usize>() -> [u32; N] {
+    let mut out = [0u32; N];
+    let mut i = 0usize;
+    while i < N {
+        out[i] = (2 * i + 1) as u32;
+        i += 1;
+    }
+    out
+}
+
+/// Multiples of N offset by K `[K, K+N, K+2N, ...]`.
+pub(crate) const fn deinterleave_mask<const LANES: usize, const N: usize, const K: usize>()
+-> [u32; LANES] {
+    let mut out = [0u32; LANES];
+    let mut i = 0usize;
+    while i < LANES {
+        out[i] = (i * N + K) as u32;
+        i += 1;
+    }
+    out
+}
+
+#[allow(unused)]
+macro_rules! deinterleaving_load {
+    ($elem:ty, $lanes:literal, 2, $ptr:expr) => {{
+        use $crate::core_arch::macros::deinterleave_mask;
+        use $crate::core_arch::simd::Simd;
+        use $crate::mem::transmute;
+
+        type V = Simd<$elem, $lanes>;
+        type W = Simd<$elem, { $lanes * 2 }>;
+
+        let w: W = $crate::ptr::read_unaligned($ptr as *const W);
+
+        let v0: V = simd_shuffle!(w, w, deinterleave_mask::<$lanes, 2, 0>());
+        let v1: V = simd_shuffle!(w, w, deinterleave_mask::<$lanes, 2, 1>());
+
+        transmute((v0, v1))
+    }};
+
+    ($elem:ty, $lanes:literal, 3, $ptr:expr) => {{
+        use $crate::core_arch::macros::deinterleave_mask;
+        use $crate::core_arch::simd::Simd;
+        use $crate::mem::{MaybeUninit, transmute};
+
+        type V = Simd<$elem, $lanes>;
+        type W = Simd<$elem, { $lanes * 3 }>;
+
+        // NOTE: repr(simd) adds padding to make the total size a power of two.
+        // Hence reading W from ptr might read out of bounds.
+        let mut mem = MaybeUninit::<W>::uninit();
+        $crate::ptr::copy_nonoverlapping(
+            $ptr.cast::<$elem>(),
+            mem.as_mut_ptr().cast::<$elem>(),
+            $lanes * 3,
+        );
+        let w = mem.assume_init();
+
+        let v0: V = simd_shuffle!(w, w, deinterleave_mask::<$lanes, 3, 0>());
+        let v1: V = simd_shuffle!(w, w, deinterleave_mask::<$lanes, 3, 1>());
+        let v2: V = simd_shuffle!(w, w, deinterleave_mask::<$lanes, 3, 2>());
+
+        transmute((v0, v1, v2))
+    }};
+
+    ($elem:ty, $lanes:literal, 4, $ptr:expr) => {{
+        use $crate::core_arch::macros::deinterleave_mask;
+        use $crate::core_arch::simd::Simd;
+        use $crate::mem::transmute;
+
+        type V = Simd<$elem, $lanes>;
+        type W = Simd<$elem, { $lanes * 4 }>;
+
+        let w: W = $crate::ptr::read_unaligned($ptr as *const W);
+
+        let v0: V = simd_shuffle!(w, w, deinterleave_mask::<$lanes, 4, 0>());
+        let v1: V = simd_shuffle!(w, w, deinterleave_mask::<$lanes, 4, 1>());
+        let v2: V = simd_shuffle!(w, w, deinterleave_mask::<$lanes, 4, 2>());
+        let v3: V = simd_shuffle!(w, w, deinterleave_mask::<$lanes, 4, 3>());
+
+        transmute((v0, v1, v2, v3))
+    }};
+}
+
+#[allow(unused)]
+pub(crate) use deinterleaving_load;
+
+pub(crate) const fn interleave_mask<const LANES: usize, const N: usize, const K: usize>()
+-> [u32; LANES] {
+    let mut out = [0u32; LANES];
+    let mut j = 0usize;
+    while j < LANES {
+        out[j] = ((j % K) * N + j / K) as u32;
+        j += 1;
+    }
+    out
+}
+
+#[allow(unused)]
+macro_rules! interleaving_store {
+    ($elem:ty, $lanes:literal, 2, $ptr:expr, $v:expr) => {{
+        use $crate::core_arch::macros::interleave_mask;
+        use $crate::core_arch::simd::Simd;
+
+        type W = Simd<$elem, { $lanes * 2 }>;
+        let w: W = simd_shuffle!($v.0, $v.1, interleave_mask::<{ $lanes * 2 }, $lanes, 2>());
+        $crate::ptr::write_unaligned($ptr as *mut W, w);
+    }};
+
+    // N = 3
+    ($elem:ty, $lanes:literal, 3, $ptr:expr, $v:expr) => {{
+        use $crate::core_arch::macros::{identity, interleave_mask};
+        use $crate::core_arch::simd::Simd;
+
+        let v0v1: Simd<$elem, { $lanes * 2 }> =
+            simd_shuffle!($v.0, $v.1, identity::<{ $lanes * 2 }>());
+        let v2v2: Simd<$elem, { $lanes * 2 }> =
+            simd_shuffle!($v.2, $v.2, identity::<{ $lanes * 2 }>());
+
+        type W = Simd<$elem, { $lanes * 3 }>;
+
+        // NOTE: repr(simd) adds padding to make the total size a power of two.
+        // Hence writing W to ptr might write out of bounds.
+        let w: W = simd_shuffle!(v0v1, v2v2, interleave_mask::<{ $lanes * 3 }, $lanes, 3>());
+        $crate::ptr::copy_nonoverlapping(
+            (&w as *const W).cast::<$elem>(),
+            $ptr.cast::<$elem>(),
+            $lanes * 3,
+        );
+    }};
+
+    // N = 4
+    ($elem:ty, $lanes:literal, 4, $ptr:expr, $v:expr) => {{
+        use $crate::core_arch::macros::{identity, interleave_mask};
+        use $crate::core_arch::simd::Simd;
+
+        let v0v1: Simd<$elem, { $lanes * 2 }> =
+            simd_shuffle!($v.0, $v.1, identity::<{ $lanes * 2 }>());
+        let v2v3: Simd<$elem, { $lanes * 2 }> =
+            simd_shuffle!($v.2, $v.3, identity::<{ $lanes * 2 }>());
+
+        type W = Simd<$elem, { $lanes * 4 }>;
+        let w: W = simd_shuffle!(v0v1, v2v3, interleave_mask::<{ $lanes * 4 }, $lanes, 4>());
+        $crate::ptr::write_unaligned($ptr as *mut W, w);
+    }};
+}
+
+#[allow(unused)]
+pub(crate) use interleaving_store;
diff --git a/crates/core_arch/src/mips/msa.rs b/crates/core_arch/src/mips/msa.rs
index 563e121a7b..bc601baef9 100644
--- a/crates/core_arch/src/mips/msa.rs
+++ b/crates/core_arch/src/mips/msa.rs
@@ -1407,7 +1407,7 @@ pub unsafe fn __msa_addv_d(a: v2i64, b: v2i64) -> v2i64 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(addvi.b, imm5 = 0b10111))]
+#[cfg_attr(test, assert_instr(addvi.b, IMM5 = 0b10111))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_addvi_b<const IMM5: i32>(a: v16i8) -> v16i8 {
@@ -1423,7 +1423,7 @@ pub unsafe fn __msa_addvi_b<const IMM5: i32>(a: v16i8) -> v16i8 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(addvi.h, imm5 = 0b10111))]
+#[cfg_attr(test, assert_instr(addvi.h, IMM5 = 0b10111))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_addvi_h<const IMM5: i32>(a: v8i16) -> v8i16 {
@@ -1439,7 +1439,7 @@ pub unsafe fn __msa_addvi_h<const IMM5: i32>(a: v8i16) -> v8i16 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(addvi.w, imm5 = 0b10111))]
+#[cfg_attr(test, assert_instr(addvi.w, IMM5 = 0b10111))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_addvi_w<const IMM5: i32>(a: v4i32) -> v4i32 {
@@ -1455,7 +1455,7 @@ pub unsafe fn __msa_addvi_w<const IMM5: i32>(a: v4i32) -> v4i32 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(addvi.d, imm5 = 0b10111))]
+#[cfg_attr(test, assert_instr(addvi.d, IMM5 = 0b10111))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_addvi_d<const IMM5: i32>(a: v2i64) -> v2i64 {
@@ -1486,7 +1486,7 @@ pub unsafe fn __msa_and_v(a: v16u8, b: v16u8) -> v16u8 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(andi.b, imm8 = 0b10010111))]
+#[cfg_attr(test, assert_instr(andi.b, IMM8 = 0b10010111))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_andi_b<const IMM8: i32>(a: v16u8) -> v16u8 {
@@ -1938,7 +1938,7 @@ pub unsafe fn __msa_bclr_d(a: v2u64, b: v2u64) -> v2u64 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(bclri.b, imm3 = 0b111))]
+#[cfg_attr(test, assert_instr(bclri.b, IMM3 = 0b111))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_bclri_b<const IMM3: i32>(a: v16u8) -> v16u8 {
@@ -1954,7 +1954,7 @@ pub unsafe fn __msa_bclri_b<const IMM3: i32>(a: v16u8) -> v16u8 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(bclri.h, imm4 = 0b1111))]
+#[cfg_attr(test, assert_instr(bclri.h, IMM4 = 0b1111))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_bclri_h<const IMM4: i32>(a: v8u16) -> v8u16 {
@@ -1970,7 +1970,7 @@ pub unsafe fn __msa_bclri_h<const IMM4: i32>(a: v8u16) -> v8u16 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(bclri.w, imm5 = 0b11111))]
+#[cfg_attr(test, assert_instr(bclri.w, IMM5 = 0b11111))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_bclri_w<const IMM5: i32>(a: v4u32) -> v4u32 {
@@ -1986,7 +1986,7 @@ pub unsafe fn __msa_bclri_w<const IMM5: i32>(a: v4u32) -> v4u32 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(bclri.d, imm6 = 0b111111))]
+#[cfg_attr(test, assert_instr(bclri.d, IMM6 = 0b111111))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_bclri_d<const IMM6: i32>(a: v2u64) -> v2u64 {
@@ -2062,7 +2062,7 @@ pub unsafe fn __msa_binsl_d(a: v2u64, b: v2u64, c: v2u64) -> v2u64 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(binsli.b, imm3 = 0b111))]
+#[cfg_attr(test, assert_instr(binsli.b, IMM3 = 0b111))]
 #[rustc_legacy_const_generics(2)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_binsli_b<const IMM3: i32>(a: v16u8, b: v16u8) -> v16u8 {
@@ -2078,7 +2078,7 @@ pub unsafe fn __msa_binsli_b<const IMM3: i32>(a: v16u8, b: v16u8) -> v16u8 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(binsli.h, imm4 = 0b1111))]
+#[cfg_attr(test, assert_instr(binsli.h, IMM4 = 0b1111))]
 #[rustc_legacy_const_generics(2)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_binsli_h<const IMM4: i32>(a: v8u16, b: v8u16) -> v8u16 {
@@ -2094,7 +2094,7 @@ pub unsafe fn __msa_binsli_h<const IMM4: i32>(a: v8u16, b: v8u16) -> v8u16 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(binsli.w, imm5 = 0b11111))]
+#[cfg_attr(test, assert_instr(binsli.w, IMM5 = 0b11111))]
 #[rustc_legacy_const_generics(2)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_binsli_w<const IMM5: i32>(a: v4u32, b: v4u32) -> v4u32 {
@@ -2110,7 +2110,7 @@ pub unsafe fn __msa_binsli_w<const IMM5: i32>(a: v4u32, b: v4u32) -> v4u32 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(binsli.d, imm6 = 0b111111))]
+#[cfg_attr(test, assert_instr(binsli.d, IMM6 = 0b111111))]
 #[rustc_legacy_const_generics(2)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_binsli_d<const IMM6: i32>(a: v2u64, b: v2u64) -> v2u64 {
@@ -2186,7 +2186,7 @@ pub unsafe fn __msa_binsr_d(a: v2u64, b: v2u64, c: v2u64) -> v2u64 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(binsri.b, imm3 = 0b111))]
+#[cfg_attr(test, assert_instr(binsri.b, IMM3 = 0b111))]
 #[rustc_legacy_const_generics(2)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_binsri_b<const IMM3: i32>(a: v16u8, b: v16u8) -> v16u8 {
@@ -2202,7 +2202,7 @@ pub unsafe fn __msa_binsri_b<const IMM3: i32>(a: v16u8, b: v16u8) -> v16u8 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(binsri.h, imm4 = 0b1111))]
+#[cfg_attr(test, assert_instr(binsri.h, IMM4 = 0b1111))]
 #[rustc_legacy_const_generics(2)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_binsri_h<const IMM4: i32>(a: v8u16, b: v8u16) -> v8u16 {
@@ -2218,7 +2218,7 @@ pub unsafe fn __msa_binsri_h<const IMM4: i32>(a: v8u16, b: v8u16) -> v8u16 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(binsri.w, imm5 = 0b11111))]
+#[cfg_attr(test, assert_instr(binsri.w, IMM5 = 0b11111))]
 #[rustc_legacy_const_generics(2)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_binsri_w<const IMM5: i32>(a: v4u32, b: v4u32) -> v4u32 {
@@ -2234,7 +2234,7 @@ pub unsafe fn __msa_binsri_w<const IMM5: i32>(a: v4u32, b: v4u32) -> v4u32 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(binsri.d, imm6 = 0b111111))]
+#[cfg_attr(test, assert_instr(binsri.d, IMM6 = 0b111111))]
 #[rustc_legacy_const_generics(2)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_binsri_d<const IMM6: i32>(a: v2u64, b: v2u64) -> v2u64 {
@@ -2265,7 +2265,7 @@ pub unsafe fn __msa_bmnz_v(a: v16u8, b: v16u8, c: v16u8) -> v16u8 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(bmnzi.b, imm8 = 0b11111111))]
+#[cfg_attr(test, assert_instr(bmnzi.b, IMM8 = 0b11111111))]
 #[rustc_legacy_const_generics(2)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_bmnzi_b<const IMM8: i32>(a: v16u8, b: v16u8) -> v16u8 {
@@ -2296,7 +2296,7 @@ pub unsafe fn __msa_bmz_v(a: v16u8, b: v16u8, c: v16u8) -> v16u8 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(bmzi.b, imm8 = 0b11111111))]
+#[cfg_attr(test, assert_instr(bmzi.b, IMM8 = 0b11111111))]
 #[rustc_legacy_const_generics(2)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_bmzi_b<const IMM8: i32>(a: v16u8, b: v16u8) -> v16u8 {
@@ -2372,7 +2372,7 @@ pub unsafe fn __msa_bneg_d(a: v2u64, b: v2u64) -> v2u64 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(bnegi.b, imm3 = 0b111))]
+#[cfg_attr(test, assert_instr(bnegi.b, IMM3 = 0b111))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_bnegi_b<const IMM3: i32>(a: v16u8) -> v16u8 {
@@ -2388,7 +2388,7 @@ pub unsafe fn __msa_bnegi_b<const IMM3: i32>(a: v16u8) -> v16u8 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(bnegi.h, imm4 = 0b1111))]
+#[cfg_attr(test, assert_instr(bnegi.h, IMM4 = 0b1111))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_bnegi_h<const IMM4: i32>(a: v8u16) -> v8u16 {
@@ -2404,7 +2404,7 @@ pub unsafe fn __msa_bnegi_h<const IMM4: i32>(a: v8u16) -> v8u16 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(bnegi.w, imm5 = 0b11111))]
+#[cfg_attr(test, assert_instr(bnegi.w, IMM5 = 0b11111))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_bnegi_w<const IMM5: i32>(a: v4u32) -> v4u32 {
@@ -2420,7 +2420,7 @@ pub unsafe fn __msa_bnegi_w<const IMM5: i32>(a: v4u32) -> v4u32 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(bnegi.d, imm6 = 0b111111))]
+#[cfg_attr(test, assert_instr(bnegi.d, IMM6 = 0b111111))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_bnegi_d<const IMM6: i32>(a: v2u64) -> v2u64 {
@@ -2512,7 +2512,7 @@ pub unsafe fn __msa_bsel_v(a: v16u8, b: v16u8, c: v16u8) -> v16u8 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(bseli.b, imm8 = 0b11111111))]
+#[cfg_attr(test, assert_instr(bseli.b, IMM8 = 0b11111111))]
 #[rustc_legacy_const_generics(2)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_bseli_b<const IMM8: i32>(a: v16u8, b: v16u8) -> v16u8 {
@@ -2588,7 +2588,7 @@ pub unsafe fn __msa_bset_d(a: v2u64, b: v2u64) -> v2u64 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(bseti.b, imm3 = 0b111))]
+#[cfg_attr(test, assert_instr(bseti.b, IMM3 = 0b111))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_bseti_b<const IMM3: i32>(a: v16u8) -> v16u8 {
@@ -2604,7 +2604,7 @@ pub unsafe fn __msa_bseti_b<const IMM3: i32>(a: v16u8) -> v16u8 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(bseti.h, imm4 = 0b1111))]
+#[cfg_attr(test, assert_instr(bseti.h, IMM4 = 0b1111))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_bseti_h<const IMM4: i32>(a: v8u16) -> v8u16 {
@@ -2620,7 +2620,7 @@ pub unsafe fn __msa_bseti_h<const IMM4: i32>(a: v8u16) -> v8u16 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(bseti.w, imm5 = 0b11111))]
+#[cfg_attr(test, assert_instr(bseti.w, IMM5 = 0b11111))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_bseti_w<const IMM5: i32>(a: v4u32) -> v4u32 {
@@ -2636,7 +2636,7 @@ pub unsafe fn __msa_bseti_w<const IMM5: i32>(a: v4u32) -> v4u32 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(bseti.d, imm6 = 0b111111))]
+#[cfg_attr(test, assert_instr(bseti.d, IMM6 = 0b111111))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_bseti_d<const IMM6: i32>(a: v2u64) -> v2u64 {
@@ -2769,7 +2769,7 @@ pub unsafe fn __msa_ceq_d(a: v2i64, b: v2i64) -> v2i64 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(ceqi.b, imm_s5 = 0b11111))]
+#[cfg_attr(test, assert_instr(ceqi.b, IMM_S5 = -1))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_ceqi_b<const IMM_S5: i32>(a: v16i8) -> v16i8 {
@@ -2785,7 +2785,7 @@ pub unsafe fn __msa_ceqi_b<const IMM_S5: i32>(a: v16i8) -> v16i8 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(ceqi.h, imm_s5 = 0b11111))]
+#[cfg_attr(test, assert_instr(ceqi.h, IMM_S5 = -1))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_ceqi_h<const IMM_S5: i32>(a: v8i16) -> v8i16 {
@@ -2801,7 +2801,7 @@ pub unsafe fn __msa_ceqi_h<const IMM_S5: i32>(a: v8i16) -> v8i16 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(ceqi.w, imm_s5 = 0b11111))]
+#[cfg_attr(test, assert_instr(ceqi.w, IMM_S5 = -1))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_ceqi_w<const IMM_S5: i32>(a: v4i32) -> v4i32 {
@@ -2817,7 +2817,7 @@ pub unsafe fn __msa_ceqi_w<const IMM_S5: i32>(a: v4i32) -> v4i32 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(ceqi.d, imm_s5 = 0b11111))]
+#[cfg_attr(test, assert_instr(ceqi.d, IMM_S5 = -1))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_ceqi_d<const IMM_S5: i32>(a: v2i64) -> v2i64 {
@@ -2832,7 +2832,7 @@ pub unsafe fn __msa_ceqi_d<const IMM_S5: i32>(a: v2i64) -> v2i64 {
 /// Can not be tested in user mode
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(cfcmsa, imm5 = 0b11111))]
+#[cfg_attr(test, assert_instr(cfcmsa, IMM5 = 0b11111))]
 #[rustc_legacy_const_generics(0)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_cfcmsa<const IMM5: i32>() -> i32 {
@@ -2969,7 +2969,7 @@ pub unsafe fn __msa_cle_u_d(a: v2u64, b: v2u64) -> v2i64 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(clei_s.b, imm_s5 = 0b11111))]
+#[cfg_attr(test, assert_instr(clei_s.b, IMM_S5 = -1))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_clei_s_b<const IMM_S5: i32>(a: v16i8) -> v16i8 {
@@ -2986,7 +2986,7 @@ pub unsafe fn __msa_clei_s_b<const IMM_S5: i32>(a: v16i8) -> v16i8 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(clei_s.h, imm_s5 = 0b11111))]
+#[cfg_attr(test, assert_instr(clei_s.h, IMM_S5 = -1))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_clei_s_h<const IMM_S5: i32>(a: v8i16) -> v8i16 {
@@ -3003,7 +3003,7 @@ pub unsafe fn __msa_clei_s_h<const IMM_S5: i32>(a: v8i16) -> v8i16 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(clei_s.w, imm_s5 = 0b11111))]
+#[cfg_attr(test, assert_instr(clei_s.w, IMM_S5 = -1))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_clei_s_w<const IMM_S5: i32>(a: v4i32) -> v4i32 {
@@ -3020,7 +3020,7 @@ pub unsafe fn __msa_clei_s_w<const IMM_S5: i32>(a: v4i32) -> v4i32 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(clei_s.d, imm_s5 = 0b11111))]
+#[cfg_attr(test, assert_instr(clei_s.d, IMM_S5 = -1))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_clei_s_d<const IMM_S5: i32>(a: v2i64) -> v2i64 {
@@ -3037,7 +3037,7 @@ pub unsafe fn __msa_clei_s_d<const IMM_S5: i32>(a: v2i64) -> v2i64 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(clei_u.b, imm5 = 0b111))]
+#[cfg_attr(test, assert_instr(clei_u.b, IMM5 = 0b111))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_clei_u_b<const IMM5: i32>(a: v16u8) -> v16i8 {
@@ -3054,7 +3054,7 @@ pub unsafe fn __msa_clei_u_b<const IMM5: i32>(a: v16u8) -> v16i8 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(clei_u.h, imm5 = 0b11111))]
+#[cfg_attr(test, assert_instr(clei_u.h, IMM5 = 0b11111))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_clei_u_h<const IMM5: i32>(a: v8u16) -> v8i16 {
@@ -3071,7 +3071,7 @@ pub unsafe fn __msa_clei_u_h<const IMM5: i32>(a: v8u16) -> v8i16 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(clei_u.w, imm5 = 0b11111))]
+#[cfg_attr(test, assert_instr(clei_u.w, IMM5 = 0b11111))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_clei_u_w<const IMM5: i32>(a: v4u32) -> v4i32 {
@@ -3088,7 +3088,7 @@ pub unsafe fn __msa_clei_u_w<const IMM5: i32>(a: v4u32) -> v4i32 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(clei_u.d, imm5 = 0b11111))]
+#[cfg_attr(test, assert_instr(clei_u.d, IMM5 = 0b11111))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_clei_u_d<const IMM5: i32>(a: v2u64) -> v2i64 {
@@ -3225,7 +3225,7 @@ pub unsafe fn __msa_clt_u_d(a: v2u64, b: v2u64) -> v2i64 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(clti_s.b, imm_s5 = 0b111))]
+#[cfg_attr(test, assert_instr(clti_s.b, IMM_S5 = 0b111))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_clti_s_b<const IMM_S5: i32>(a: v16i8) -> v16i8 {
@@ -3242,7 +3242,7 @@ pub unsafe fn __msa_clti_s_b<const IMM_S5: i32>(a: v16i8) -> v16i8 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(clti_s.h, imm_s5 = 0b11111))]
+#[cfg_attr(test, assert_instr(clti_s.h, IMM_S5 = -1))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_clti_s_h<const IMM_S5: i32>(a: v8i16) -> v8i16 {
@@ -3259,7 +3259,7 @@ pub unsafe fn __msa_clti_s_h<const IMM_S5: i32>(a: v8i16) -> v8i16 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(clti_s.w, imm_s5 = 0b11111))]
+#[cfg_attr(test, assert_instr(clti_s.w, IMM_S5 = -1))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_clti_s_w<const IMM_S5: i32>(a: v4i32) -> v4i32 {
@@ -3276,7 +3276,7 @@ pub unsafe fn __msa_clti_s_w<const IMM_S5: i32>(a: v4i32) -> v4i32 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(clti_s.d, imm_s5 = 0b11111))]
+#[cfg_attr(test, assert_instr(clti_s.d, IMM_S5 = -1))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_clti_s_d<const IMM_S5: i32>(a: v2i64) -> v2i64 {
@@ -3293,7 +3293,7 @@ pub unsafe fn __msa_clti_s_d<const IMM_S5: i32>(a: v2i64) -> v2i64 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(clti_u.b, imm5 = 0b111))]
+#[cfg_attr(test, assert_instr(clti_u.b, IMM5 = 0b111))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_clti_u_b<const IMM5: i32>(a: v16u8) -> v16i8 {
@@ -3310,7 +3310,7 @@ pub unsafe fn __msa_clti_u_b<const IMM5: i32>(a: v16u8) -> v16i8 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(clti_u.h, imm5 = 0b11111))]
+#[cfg_attr(test, assert_instr(clti_u.h, IMM5 = 0b11111))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_clti_u_h<const IMM5: i32>(a: v8u16) -> v8i16 {
@@ -3327,7 +3327,7 @@ pub unsafe fn __msa_clti_u_h<const IMM5: i32>(a: v8u16) -> v8i16 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(clti_u.w, imm5 = 0b11111))]
+#[cfg_attr(test, assert_instr(clti_u.w, IMM5 = 0b11111))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_clti_u_w<const IMM5: i32>(a: v4u32) -> v4i32 {
@@ -3344,7 +3344,7 @@ pub unsafe fn __msa_clti_u_w<const IMM5: i32>(a: v4u32) -> v4i32 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(clti_u.d, imm5 = 0b11111))]
+#[cfg_attr(test, assert_instr(clti_u.d, IMM5 = 0b11111))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_clti_u_d<const IMM5: i32>(a: v2u64) -> v2i64 {
@@ -3359,7 +3359,7 @@ pub unsafe fn __msa_clti_u_d<const IMM5: i32>(a: v2u64) -> v2i64 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(copy_s.b, imm4 = 0b1111))]
+#[cfg_attr(test, assert_instr(copy_s.b, IMM4 = 0b1111))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_copy_s_b<const IMM4: i32>(a: v16i8) -> i32 {
@@ -3374,7 +3374,7 @@ pub unsafe fn __msa_copy_s_b<const IMM4: i32>(a: v16i8) -> i32 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(copy_s.h, imm3 = 0b111))]
+#[cfg_attr(test, assert_instr(copy_s.h, IMM3 = 0b111))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_copy_s_h<const IMM3: i32>(a: v8i16) -> i32 {
@@ -3389,7 +3389,7 @@ pub unsafe fn __msa_copy_s_h<const IMM3: i32>(a: v8i16) -> i32 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(copy_s.w, imm2 = 0b11))]
+#[cfg_attr(test, assert_instr(copy_s.w, IMM2 = 0b11))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_copy_s_w<const IMM2: i32>(a: v4i32) -> i32 {
@@ -3404,7 +3404,7 @@ pub unsafe fn __msa_copy_s_w<const IMM2: i32>(a: v4i32) -> i32 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(copy_s.d, imm1 = 0b1))]
+#[cfg_attr(test, assert_instr(copy_s.d, IMM1 = 0b1))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_copy_s_d<const IMM1: i32>(a: v2i64) -> i64 {
@@ -3419,7 +3419,7 @@ pub unsafe fn __msa_copy_s_d<const IMM1: i32>(a: v2i64) -> i64 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(copy_u.b, imm4 = 0b1111))]
+#[cfg_attr(test, assert_instr(copy_u.b, IMM4 = 0b1111))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_copy_u_b<const IMM4: i32>(a: v16i8) -> u32 {
@@ -3434,7 +3434,7 @@ pub unsafe fn __msa_copy_u_b<const IMM4: i32>(a: v16i8) -> u32 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(copy_u.h, imm3 = 0b111))]
+#[cfg_attr(test, assert_instr(copy_u.h, IMM3 = 0b111))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_copy_u_h<const IMM3: i32>(a: v8i16) -> u32 {
@@ -3449,7 +3449,7 @@ pub unsafe fn __msa_copy_u_h<const IMM3: i32>(a: v8i16) -> u32 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(copy_u.w, imm2 = 0b11))]
+#[cfg_attr(test, assert_instr(copy_u.w, IMM2 = 0b11))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_copy_u_w<const IMM2: i32>(a: v4i32) -> u32 {
@@ -3464,7 +3464,7 @@ pub unsafe fn __msa_copy_u_w<const IMM2: i32>(a: v4i32) -> u32 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(copy_u.d, imm1 = 0b1))]
+#[cfg_attr(test, assert_instr(copy_u.d, IMM1 = 0b1))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_copy_u_d<const IMM1: i32>(a: v2i64) -> u64 {
@@ -3481,7 +3481,7 @@ pub unsafe fn __msa_copy_u_d<const IMM1: i32>(a: v2i64) -> u64 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(ctcmsa, imm1 = 0b1))]
+#[cfg_attr(test, assert_instr(ctcmsa, IMM5 = 0b1))]
 #[rustc_legacy_const_generics(0)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_ctcmsa<const IMM5: i32>(a: i32) -> () {
@@ -5855,7 +5855,7 @@ pub unsafe fn __msa_ilvr_d(a: v2i64, b: v2i64) -> v2i64 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(insert.b, imm4 = 0b1111))]
+#[cfg_attr(test, assert_instr(insert.b, IMM4 = 0b1111))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_insert_b<const IMM4: i32>(a: v16i8, c: i32) -> v16i8 {
@@ -5871,7 +5871,7 @@ pub unsafe fn __msa_insert_b<const IMM4: i32>(a: v16i8, c: i32) -> v16i8 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(insert.h, imm3 = 0b111))]
+#[cfg_attr(test, assert_instr(insert.h, IMM3 = 0b111))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_insert_h<const IMM3: i32>(a: v8i16, c: i32) -> v8i16 {
@@ -5887,7 +5887,7 @@ pub unsafe fn __msa_insert_h<const IMM3: i32>(a: v8i16, c: i32) -> v8i16 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(insert.w, imm2 = 0b11))]
+#[cfg_attr(test, assert_instr(insert.w, IMM2 = 0b11))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_insert_w<const IMM2: i32>(a: v4i32, c: i32) -> v4i32 {
@@ -5903,7 +5903,7 @@ pub unsafe fn __msa_insert_w<const IMM2: i32>(a: v4i32, c: i32) -> v4i32 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(insert.d, imm1 = 0b1))]
+#[cfg_attr(test, assert_instr(insert.d, IMM1 = 0b1))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_insert_d<const IMM1: i32>(a: v2i64, c: i64) -> v2i64 {
@@ -5919,7 +5919,7 @@ pub unsafe fn __msa_insert_d<const IMM1: i32>(a: v2i64, c: i64) -> v2i64 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(insve.b, imm4 = 0b1111))]
+#[cfg_attr(test, assert_instr(insve.b, IMM4 = 0b1111))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_insve_b<const IMM4: i32>(a: v16i8, c: v16i8) -> v16i8 {
@@ -5935,7 +5935,7 @@ pub unsafe fn __msa_insve_b<const IMM4: i32>(a: v16i8, c: v16i8) -> v16i8 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(insve.h, imm3 = 0b111))]
+#[cfg_attr(test, assert_instr(insve.h, IMM3 = 0b111))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_insve_h<const IMM3: i32>(a: v8i16, c: v8i16) -> v8i16 {
@@ -5951,7 +5951,7 @@ pub unsafe fn __msa_insve_h<const IMM3: i32>(a: v8i16, c: v8i16) -> v8i16 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(insve.w, imm2 = 0b11))]
+#[cfg_attr(test, assert_instr(insve.w, IMM2 = 0b11))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_insve_w<const IMM2: i32>(a: v4i32, c: v4i32) -> v4i32 {
@@ -5967,7 +5967,7 @@ pub unsafe fn __msa_insve_w<const IMM2: i32>(a: v4i32, c: v4i32) -> v4i32 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(insve.d, imm1 = 0b1))]
+#[cfg_attr(test, assert_instr(insve.d, IMM1 = 0b1))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_insve_d<const IMM1: i32>(a: v2i64, c: v2i64) -> v2i64 {
@@ -5983,7 +5983,7 @@ pub unsafe fn __msa_insve_d<const IMM1: i32>(a: v2i64, c: v2i64) -> v2i64 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(ld.b, imm_s10 = 0b1111111111))]
+#[cfg_attr(test, assert_instr(ld.b, IMM_S10 = -1))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_ld_b<const IMM_S10: i32>(mem_addr: *mut u8) -> v16i8 {
@@ -5999,7 +5999,7 @@ pub unsafe fn __msa_ld_b<const IMM_S10: i32>(mem_addr: *mut u8) -> v16i8 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(ld.h, imm_s11 = 0b11111111111))]
+#[cfg_attr(test, assert_instr(ld.h, IMM_S11 = -2))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_ld_h<const IMM_S11: i32>(mem_addr: *mut u8) -> v8i16 {
@@ -6016,7 +6016,7 @@ pub unsafe fn __msa_ld_h<const IMM_S11: i32>(mem_addr: *mut u8) -> v8i16 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(ld.w, imm_s12 = 0b111111111111))]
+#[cfg_attr(test, assert_instr(ld.w, IMM_S12 = -4))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_ld_w<const IMM_S12: i32>(mem_addr: *mut u8) -> v4i32 {
@@ -6033,7 +6033,7 @@ pub unsafe fn __msa_ld_w<const IMM_S12: i32>(mem_addr: *mut u8) -> v4i32 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(ld.d, imm_s13 = 0b1111111111111))]
+#[cfg_attr(test, assert_instr(ld.d, IMM_S13 = -8))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_ld_d<const IMM_S13: i32>(mem_addr: *mut u8) -> v2i64 {
@@ -6050,7 +6050,7 @@ pub unsafe fn __msa_ld_d<const IMM_S13: i32>(mem_addr: *mut u8) -> v2i64 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(ldi.b, imm_s10 = 0b1111111111))]
+#[cfg_attr(test, assert_instr(ldi.b, IMM_S10 = -1))]
 #[rustc_legacy_const_generics(0)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_ldi_b<const IMM_S10: i32>() -> v16i8 {
@@ -6066,7 +6066,7 @@ pub unsafe fn __msa_ldi_b<const IMM_S10: i32>() -> v16i8 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(ldi.h, imm_s10 = 0b1111111111))]
+#[cfg_attr(test, assert_instr(ldi.h, IMM_S10 = -1))]
 #[rustc_legacy_const_generics(0)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_ldi_h<const IMM_S10: i32>() -> v8i16 {
@@ -6082,7 +6082,7 @@ pub unsafe fn __msa_ldi_h<const IMM_S10: i32>() -> v8i16 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(ldi.w, imm_s10 = 0b1111111111))]
+#[cfg_attr(test, assert_instr(ldi.w, IMM_S10 = -1))]
 #[rustc_legacy_const_generics(0)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_ldi_w<const IMM_S10: i32>() -> v4i32 {
@@ -6098,7 +6098,7 @@ pub unsafe fn __msa_ldi_w<const IMM_S10: i32>() -> v4i32 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(ldi.d, imm_s10 = 0b1111111111))]
+#[cfg_attr(test, assert_instr(ldi.d, IMM_S10 = -1))]
 #[rustc_legacy_const_generics(0)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_ldi_d<const IMM_S10: i32>() -> v2i64 {
@@ -6410,7 +6410,7 @@ pub unsafe fn __msa_max_u_d(a: v2u64, b: v2u64) -> v2u64 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(maxi_s.b, imm5 = 0b11111))]
+#[cfg_attr(test, assert_instr(maxi_s.b, IMM_S5 = -1))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_maxi_s_b<const IMM_S5: i32>(a: v16i8) -> v16i8 {
@@ -6426,7 +6426,7 @@ pub unsafe fn __msa_maxi_s_b<const IMM_S5: i32>(a: v16i8) -> v16i8 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(maxi_s.h, imm_s5 = 0b11111))]
+#[cfg_attr(test, assert_instr(maxi_s.h, IMM_S5 = -1))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_maxi_s_h<const IMM_S5: i32>(a: v8i16) -> v8i16 {
@@ -6442,7 +6442,7 @@ pub unsafe fn __msa_maxi_s_h<const IMM_S5: i32>(a: v8i16) -> v8i16 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(maxi_s.w, imm_s5 = 0b11111))]
+#[cfg_attr(test, assert_instr(maxi_s.w, IMM_S5 = -1))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_maxi_s_w<const IMM_S5: i32>(a: v4i32) -> v4i32 {
@@ -6458,7 +6458,7 @@ pub unsafe fn __msa_maxi_s_w<const IMM_S5: i32>(a: v4i32) -> v4i32 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(maxi_s.d, imm_s5 = 0b11111))]
+#[cfg_attr(test, assert_instr(maxi_s.d, IMM_S5 = -1))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_maxi_s_d<const IMM_S5: i32>(a: v2i64) -> v2i64 {
@@ -6474,7 +6474,7 @@ pub unsafe fn __msa_maxi_s_d<const IMM_S5: i32>(a: v2i64) -> v2i64 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(maxi_u.b, imm5 = 0b11111))]
+#[cfg_attr(test, assert_instr(maxi_u.b, IMM5 = 0b11111))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_maxi_u_b<const IMM5: i32>(a: v16u8) -> v16u8 {
@@ -6490,7 +6490,7 @@ pub unsafe fn __msa_maxi_u_b<const IMM5: i32>(a: v16u8) -> v16u8 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(maxi_u.h, imm5 = 0b11111))]
+#[cfg_attr(test, assert_instr(maxi_u.h, IMM5 = 0b11111))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_maxi_u_h<const IMM5: i32>(a: v8u16) -> v8u16 {
@@ -6506,7 +6506,7 @@ pub unsafe fn __msa_maxi_u_h<const IMM5: i32>(a: v8u16) -> v8u16 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(maxi_u.w, imm5 = 0b11111))]
+#[cfg_attr(test, assert_instr(maxi_u.w, IMM5 = 0b11111))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_maxi_u_w<const IMM5: i32>(a: v4u32) -> v4u32 {
@@ -6522,7 +6522,7 @@ pub unsafe fn __msa_maxi_u_w<const IMM5: i32>(a: v4u32) -> v4u32 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(maxi_u.d, imm5 = 0b11111))]
+#[cfg_attr(test, assert_instr(maxi_u.d, IMM5 = 0b11111))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_maxi_u_d<const IMM5: i32>(a: v2u64) -> v2u64 {
@@ -6654,7 +6654,7 @@ pub unsafe fn __msa_min_s_d(a: v2i64, b: v2i64) -> v2i64 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(mini_s.b, imm_s5 = 0b11111))]
+#[cfg_attr(test, assert_instr(mini_s.b, IMM_S5 = -1))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_mini_s_b<const IMM_S5: i32>(a: v16i8) -> v16i8 {
@@ -6670,7 +6670,7 @@ pub unsafe fn __msa_mini_s_b<const IMM_S5: i32>(a: v16i8) -> v16i8 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(mini_s.h, imm_s5 = 0b11111))]
+#[cfg_attr(test, assert_instr(mini_s.h, IMM_S5 = -1))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_mini_s_h<const IMM_S5: i32>(a: v8i16) -> v8i16 {
@@ -6686,7 +6686,7 @@ pub unsafe fn __msa_mini_s_h<const IMM_S5: i32>(a: v8i16) -> v8i16 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(mini_s.w, imm_s5 = 0b11111))]
+#[cfg_attr(test, assert_instr(mini_s.w, IMM_S5 = -1))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_mini_s_w<const IMM_S5: i32>(a: v4i32) -> v4i32 {
@@ -6702,7 +6702,7 @@ pub unsafe fn __msa_mini_s_w<const IMM_S5: i32>(a: v4i32) -> v4i32 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(mini_s.d, imm_s5 = 0b11111))]
+#[cfg_attr(test, assert_instr(mini_s.d, IMM_S5 = -1))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_mini_s_d<const IMM_S5: i32>(a: v2i64) -> v2i64 {
@@ -6774,7 +6774,7 @@ pub unsafe fn __msa_min_u_d(a: v2u64, b: v2u64) -> v2u64 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(mini_u.b, imm5 = 0b11111))]
+#[cfg_attr(test, assert_instr(mini_u.b, IMM5 = 0b11111))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_mini_u_b<const IMM5: i32>(a: v16u8) -> v16u8 {
@@ -6790,7 +6790,7 @@ pub unsafe fn __msa_mini_u_b<const IMM5: i32>(a: v16u8) -> v16u8 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(mini_u.h, imm5 = 0b11111))]
+#[cfg_attr(test, assert_instr(mini_u.h, IMM5 = 0b11111))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_mini_u_h<const IMM5: i32>(a: v8u16) -> v8u16 {
@@ -6806,7 +6806,7 @@ pub unsafe fn __msa_mini_u_h<const IMM5: i32>(a: v8u16) -> v8u16 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(mini_u.w, imm5 = 0b11111))]
+#[cfg_attr(test, assert_instr(mini_u.w, IMM5 = 0b11111))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_mini_u_w<const IMM5: i32>(a: v4u32) -> v4u32 {
@@ -6822,7 +6822,7 @@ pub unsafe fn __msa_mini_u_w<const IMM5: i32>(a: v4u32) -> v4u32 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(mini_u.d, imm5 = 0b11111))]
+#[cfg_attr(test, assert_instr(mini_u.d, IMM5 = 0b11111))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_mini_u_d<const IMM5: i32>(a: v2u64) -> v2u64 {
@@ -7343,7 +7343,7 @@ pub unsafe fn __msa_nor_v(a: v16u8, b: v16u8) -> v16u8 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(nori.b, imm8 = 0b11111111))]
+#[cfg_attr(test, assert_instr(nori.b, IMM8 = 0b11111111))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_nori_b<const IMM8: i32>(a: v16u8) -> v16u8 {
@@ -7375,7 +7375,7 @@ pub unsafe fn __msa_or_v(a: v16u8, b: v16u8) -> v16u8 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(ori.b, imm8 = 0b11111111))]
+#[cfg_attr(test, assert_instr(ori.b, IMM8 = 0b11111111))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_ori_b<const IMM8: i32>(a: v16u8) -> v16u8 {
@@ -7555,7 +7555,7 @@ pub unsafe fn __msa_pcnt_d(a: v2i64) -> v2i64 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(sat_s.b, imm4 = 0b111))]
+#[cfg_attr(test, assert_instr(sat_s.b, IMM3 = 0b111))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_sat_s_b<const IMM3: i32>(a: v16i8) -> v16i8 {
@@ -7571,7 +7571,7 @@ pub unsafe fn __msa_sat_s_b<const IMM3: i32>(a: v16i8) -> v16i8 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(sat_s.h, imm3 = 0b1111))]
+#[cfg_attr(test, assert_instr(sat_s.h, IMM4 = 0b1111))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_sat_s_h<const IMM4: i32>(a: v8i16) -> v8i16 {
@@ -7587,7 +7587,7 @@ pub unsafe fn __msa_sat_s_h<const IMM4: i32>(a: v8i16) -> v8i16 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(sat_s.w, imm2 = 0b11111))]
+#[cfg_attr(test, assert_instr(sat_s.w, IMM5 = 0b11111))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_sat_s_w<const IMM5: i32>(a: v4i32) -> v4i32 {
@@ -7603,7 +7603,7 @@ pub unsafe fn __msa_sat_s_w<const IMM5: i32>(a: v4i32) -> v4i32 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(sat_s.d, imm1 = 0b111111))]
+#[cfg_attr(test, assert_instr(sat_s.d, IMM6 = 0b111111))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_sat_s_d<const IMM6: i32>(a: v2i64) -> v2i64 {
@@ -7619,7 +7619,7 @@ pub unsafe fn __msa_sat_s_d<const IMM6: i32>(a: v2i64) -> v2i64 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(sat_u.b, imm4 = 0b111))]
+#[cfg_attr(test, assert_instr(sat_u.b, IMM3 = 0b111))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_sat_u_b<const IMM3: i32>(a: v16u8) -> v16u8 {
@@ -7635,7 +7635,7 @@ pub unsafe fn __msa_sat_u_b<const IMM3: i32>(a: v16u8) -> v16u8 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(sat_u.h, imm3 = 0b1111))]
+#[cfg_attr(test, assert_instr(sat_u.h, IMM4 = 0b1111))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_sat_u_h<const IMM4: i32>(a: v8u16) -> v8u16 {
@@ -7651,7 +7651,7 @@ pub unsafe fn __msa_sat_u_h<const IMM4: i32>(a: v8u16) -> v8u16 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(sat_u.w, imm2 = 0b11111))]
+#[cfg_attr(test, assert_instr(sat_u.w, IMM5 = 0b11111))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_sat_u_w<const IMM5: i32>(a: v4u32) -> v4u32 {
@@ -7667,7 +7667,7 @@ pub unsafe fn __msa_sat_u_w<const IMM5: i32>(a: v4u32) -> v4u32 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(sat_u.d, imm1 = 0b111111))]
+#[cfg_attr(test, assert_instr(sat_u.d, IMM6 = 0b111111))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_sat_u_d<const IMM6: i32>(a: v2u64) -> v2u64 {
@@ -7684,7 +7684,7 @@ pub unsafe fn __msa_sat_u_d<const IMM6: i32>(a: v2u64) -> v2u64 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(shf.b, imm8 = 0b11111111))]
+#[cfg_attr(test, assert_instr(shf.b, IMM8 = 0b11111111))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_shf_b<const IMM8: i32>(a: v16i8) -> v16i8 {
@@ -7701,7 +7701,7 @@ pub unsafe fn __msa_shf_b<const IMM8: i32>(a: v16i8) -> v16i8 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(shf.h, imm8 = 0b11111111))]
+#[cfg_attr(test, assert_instr(shf.h, IMM8 = 0b11111111))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_shf_h<const IMM8: i32>(a: v8i16) -> v8i16 {
@@ -7718,7 +7718,7 @@ pub unsafe fn __msa_shf_h<const IMM8: i32>(a: v8i16) -> v8i16 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(shf.w, imm8 = 0b11111111))]
+#[cfg_attr(test, assert_instr(shf.w, IMM8 = 0b11111111))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_shf_w<const IMM8: i32>(a: v4i32) -> v4i32 {
@@ -7823,7 +7823,7 @@ pub unsafe fn __msa_sld_d(a: v2i64, b: v2i64, c: i32) -> v2i64 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(sldi.b, imm4 = 0b1111))]
+#[cfg_attr(test, assert_instr(sldi.b, IMM4 = 0b1111))]
 #[rustc_legacy_const_generics(2)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_sldi_b<const IMM4: i32>(a: v16i8, b: v16i8) -> v16i8 {
@@ -7844,7 +7844,7 @@ pub unsafe fn __msa_sldi_b<const IMM4: i32>(a: v16i8, b: v16i8) -> v16i8 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(sldi.h, imm3 = 0b111))]
+#[cfg_attr(test, assert_instr(sldi.h, IMM3 = 0b111))]
 #[rustc_legacy_const_generics(2)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_sldi_h<const IMM3: i32>(a: v8i16, b: v8i16) -> v8i16 {
@@ -7865,7 +7865,7 @@ pub unsafe fn __msa_sldi_h<const IMM3: i32>(a: v8i16, b: v8i16) -> v8i16 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(sldi.w, imm2 = 0b11))]
+#[cfg_attr(test, assert_instr(sldi.w, IMM2 = 0b11))]
 #[rustc_legacy_const_generics(2)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_sldi_w<const IMM2: i32>(a: v4i32, b: v4i32) -> v4i32 {
@@ -7886,7 +7886,7 @@ pub unsafe fn __msa_sldi_w<const IMM2: i32>(a: v4i32, b: v4i32) -> v4i32 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(sldi.d, imm1 = 0b1))]
+#[cfg_attr(test, assert_instr(sldi.d, IMM1 = 0b1))]
 #[rustc_legacy_const_generics(2)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_sldi_d<const IMM1: i32>(a: v2i64, b: v2i64) -> v2i64 {
@@ -7962,7 +7962,7 @@ pub unsafe fn __msa_sll_d(a: v2i64, b: v2i64) -> v2i64 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(slli.b, imm4 = 0b1111))]
+#[cfg_attr(test, assert_instr(slli.b, IMM4 = 0b1111))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_slli_b<const IMM4: i32>(a: v16i8) -> v16i8 {
@@ -7978,7 +7978,7 @@ pub unsafe fn __msa_slli_b<const IMM4: i32>(a: v16i8) -> v16i8 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(slli.h, imm3 = 0b111))]
+#[cfg_attr(test, assert_instr(slli.h, IMM3 = 0b111))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_slli_h<const IMM3: i32>(a: v8i16) -> v8i16 {
@@ -7994,7 +7994,7 @@ pub unsafe fn __msa_slli_h<const IMM3: i32>(a: v8i16) -> v8i16 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(slli.w, imm2 = 0b11))]
+#[cfg_attr(test, assert_instr(slli.w, IMM2 = 0b11))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_slli_w<const IMM2: i32>(a: v4i32) -> v4i32 {
@@ -8010,7 +8010,7 @@ pub unsafe fn __msa_slli_w<const IMM2: i32>(a: v4i32) -> v4i32 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(slli.d, imm1 = 0b1))]
+#[cfg_attr(test, assert_instr(slli.d, IMM1 = 0b1))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_slli_d<const IMM1: i32>(a: v2i64) -> v2i64 {
@@ -8085,7 +8085,7 @@ pub unsafe fn __msa_splat_d(a: v2i64, b: i32) -> v2i64 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(splati.b, imm4 = 0b1111))]
+#[cfg_attr(test, assert_instr(splati.b, IMM4 = 0b1111))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_splati_b<const IMM4: i32>(a: v16i8) -> v16i8 {
@@ -8100,7 +8100,7 @@ pub unsafe fn __msa_splati_b<const IMM4: i32>(a: v16i8) -> v16i8 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(splati.h, imm3 = 0b111))]
+#[cfg_attr(test, assert_instr(splati.h, IMM3 = 0b111))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_splati_h<const IMM3: i32>(a: v8i16) -> v8i16 {
@@ -8115,7 +8115,7 @@ pub unsafe fn __msa_splati_h<const IMM3: i32>(a: v8i16) -> v8i16 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(splati.w, imm2 = 0b11))]
+#[cfg_attr(test, assert_instr(splati.w, IMM2 = 0b11))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_splati_w<const IMM2: i32>(a: v4i32) -> v4i32 {
@@ -8130,7 +8130,7 @@ pub unsafe fn __msa_splati_w<const IMM2: i32>(a: v4i32) -> v4i32 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(splati.d, imm1 = 0b1))]
+#[cfg_attr(test, assert_instr(splati.d, IMM1 = 0b1))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_splati_d<const IMM1: i32>(a: v2i64) -> v2i64 {
@@ -8206,7 +8206,7 @@ pub unsafe fn __msa_sra_d(a: v2i64, b: v2i64) -> v2i64 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(srai.b, imm3 = 0b111))]
+#[cfg_attr(test, assert_instr(srai.b, IMM3 = 0b111))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_srai_b<const IMM3: i32>(a: v16i8) -> v16i8 {
@@ -8222,7 +8222,7 @@ pub unsafe fn __msa_srai_b<const IMM3: i32>(a: v16i8) -> v16i8 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(srai.h, imm4 = 0b1111))]
+#[cfg_attr(test, assert_instr(srai.h, IMM4 = 0b1111))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_srai_h<const IMM4: i32>(a: v8i16) -> v8i16 {
@@ -8238,7 +8238,7 @@ pub unsafe fn __msa_srai_h<const IMM4: i32>(a: v8i16) -> v8i16 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(srai.w, imm5 = 0b11111))]
+#[cfg_attr(test, assert_instr(srai.w, IMM5 = 0b11111))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_srai_w<const IMM5: i32>(a: v4i32) -> v4i32 {
@@ -8254,7 +8254,7 @@ pub unsafe fn __msa_srai_w<const IMM5: i32>(a: v4i32) -> v4i32 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(srai.d, imm6 = 0b111111))]
+#[cfg_attr(test, assert_instr(srai.d, IMM6 = 0b111111))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_srai_d<const IMM6: i32>(a: v2i64) -> v2i64 {
@@ -8335,7 +8335,7 @@ pub unsafe fn __msa_srar_d(a: v2i64, b: v2i64) -> v2i64 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(srari.b, imm3 = 0b111))]
+#[cfg_attr(test, assert_instr(srari.b, IMM3 = 0b111))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_srari_b<const IMM3: i32>(a: v16i8) -> v16i8 {
@@ -8352,7 +8352,7 @@ pub unsafe fn __msa_srari_b<const IMM3: i32>(a: v16i8) -> v16i8 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(srari.h, imm4 = 0b1111))]
+#[cfg_attr(test, assert_instr(srari.h, IMM4 = 0b1111))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_srari_h<const IMM4: i32>(a: v8i16) -> v8i16 {
@@ -8369,7 +8369,7 @@ pub unsafe fn __msa_srari_h<const IMM4: i32>(a: v8i16) -> v8i16 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(srari.w, imm5 = 0b11111))]
+#[cfg_attr(test, assert_instr(srari.w, IMM5 = 0b11111))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_srari_w<const IMM5: i32>(a: v4i32) -> v4i32 {
@@ -8386,7 +8386,7 @@ pub unsafe fn __msa_srari_w<const IMM5: i32>(a: v4i32) -> v4i32 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(srari.d, imm6 = 0b111111))]
+#[cfg_attr(test, assert_instr(srari.d, IMM6 = 0b111111))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_srari_d<const IMM6: i32>(a: v2i64) -> v2i64 {
@@ -8462,7 +8462,7 @@ pub unsafe fn __msa_srl_d(a: v2i64, b: v2i64) -> v2i64 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(srli.b, imm4 = 0b1111))]
+#[cfg_attr(test, assert_instr(srli.b, IMM4 = 0b1111))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_srli_b<const IMM4: i32>(a: v16i8) -> v16i8 {
@@ -8478,7 +8478,7 @@ pub unsafe fn __msa_srli_b<const IMM4: i32>(a: v16i8) -> v16i8 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(srli.h, imm3 = 0b111))]
+#[cfg_attr(test, assert_instr(srli.h, IMM3 = 0b111))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_srli_h<const IMM3: i32>(a: v8i16) -> v8i16 {
@@ -8494,7 +8494,7 @@ pub unsafe fn __msa_srli_h<const IMM3: i32>(a: v8i16) -> v8i16 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(srli.w, imm2 = 0b11))]
+#[cfg_attr(test, assert_instr(srli.w, IMM2 = 0b11))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_srli_w<const IMM2: i32>(a: v4i32) -> v4i32 {
@@ -8510,7 +8510,7 @@ pub unsafe fn __msa_srli_w<const IMM2: i32>(a: v4i32) -> v4i32 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(srli.d, imm1 = 0b1))]
+#[cfg_attr(test, assert_instr(srli.d, IMM1 = 0b1))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_srli_d<const IMM1: i32>(a: v2i64) -> v2i64 {
@@ -8591,7 +8591,7 @@ pub unsafe fn __msa_srlr_d(a: v2i64, b: v2i64) -> v2i64 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(srlri.b, imm3 = 0b111))]
+#[cfg_attr(test, assert_instr(srlri.b, IMM3 = 0b111))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_srlri_b<const IMM3: i32>(a: v16i8) -> v16i8 {
@@ -8608,7 +8608,7 @@ pub unsafe fn __msa_srlri_b<const IMM3: i32>(a: v16i8) -> v16i8 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(srlri.h, imm4 = 0b1111))]
+#[cfg_attr(test, assert_instr(srlri.h, IMM4 = 0b1111))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_srlri_h<const IMM4: i32>(a: v8i16) -> v8i16 {
@@ -8625,7 +8625,7 @@ pub unsafe fn __msa_srlri_h<const IMM4: i32>(a: v8i16) -> v8i16 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(srlri.w, imm5 = 0b11111))]
+#[cfg_attr(test, assert_instr(srlri.w, IMM5 = 0b11111))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_srlri_w<const IMM5: i32>(a: v4i32) -> v4i32 {
@@ -8642,7 +8642,7 @@ pub unsafe fn __msa_srlri_w<const IMM5: i32>(a: v4i32) -> v4i32 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(srlri.d, imm6 = 0b111111))]
+#[cfg_attr(test, assert_instr(srlri.d, IMM6 = 0b111111))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_srlri_d<const IMM6: i32>(a: v2i64) -> v2i64 {
@@ -8658,7 +8658,7 @@ pub unsafe fn __msa_srlri_d<const IMM6: i32>(a: v2i64) -> v2i64 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(st.b, imm_s10 = 0b1111111111))]
+#[cfg_attr(test, assert_instr(st.b, IMM_S10 = -1))]
 #[rustc_legacy_const_generics(2)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_st_b<const IMM_S10: i32>(a: v16i8, mem_addr: *mut u8) -> () {
@@ -8674,7 +8674,7 @@ pub unsafe fn __msa_st_b<const IMM_S10: i32>(a: v16i8, mem_addr: *mut u8) -> ()
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(st.h, imm_s11 = 0b11111111111))]
+#[cfg_attr(test, assert_instr(st.h, IMM_S11 = -2))]
 #[rustc_legacy_const_generics(2)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_st_h<const IMM_S11: i32>(a: v8i16, mem_addr: *mut u8) -> () {
@@ -8691,7 +8691,7 @@ pub unsafe fn __msa_st_h<const IMM_S11: i32>(a: v8i16, mem_addr: *mut u8) -> ()
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(st.w, imm_s12 = 0b111111111111))]
+#[cfg_attr(test, assert_instr(st.w, IMM_S12 = -4))]
 #[rustc_legacy_const_generics(2)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_st_w<const IMM_S12: i32>(a: v4i32, mem_addr: *mut u8) -> () {
@@ -8708,7 +8708,7 @@ pub unsafe fn __msa_st_w<const IMM_S12: i32>(a: v4i32, mem_addr: *mut u8) -> ()
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(st.d, imm_s13 = 0b1111111111111))]
+#[cfg_attr(test, assert_instr(st.d, IMM_S13 = -8))]
 #[rustc_legacy_const_generics(2)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_st_d<const IMM_S13: i32>(a: v2i64, mem_addr: *mut u8) -> () {
@@ -9021,7 +9021,7 @@ pub unsafe fn __msa_subv_d(a: v2i64, b: v2i64) -> v2i64 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(subvi.b, imm5 = 0b10111))]
+#[cfg_attr(test, assert_instr(subvi.b, IMM5 = 0b10111))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_subvi_b<const IMM5: i32>(a: v16i8) -> v16i8 {
@@ -9037,7 +9037,7 @@ pub unsafe fn __msa_subvi_b<const IMM5: i32>(a: v16i8) -> v16i8 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(subvi.h, imm5 = 0b10111))]
+#[cfg_attr(test, assert_instr(subvi.h, IMM5 = 0b10111))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_subvi_h<const IMM5: i32>(a: v8i16) -> v8i16 {
@@ -9053,7 +9053,7 @@ pub unsafe fn __msa_subvi_h<const IMM5: i32>(a: v8i16) -> v8i16 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(subvi.w, imm5 = 0b10111))]
+#[cfg_attr(test, assert_instr(subvi.w, IMM5 = 0b10111))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_subvi_w<const IMM5: i32>(a: v4i32) -> v4i32 {
@@ -9069,7 +9069,7 @@ pub unsafe fn __msa_subvi_w<const IMM5: i32>(a: v4i32) -> v4i32 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(subvi.d, imm5 = 0b10111))]
+#[cfg_attr(test, assert_instr(subvi.d, IMM5 = 0b10111))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_subvi_d<const IMM5: i32>(a: v2i64) -> v2i64 {
@@ -9173,7 +9173,7 @@ pub unsafe fn __msa_xor_v(a: v16u8, b: v16u8) -> v16u8 {
 ///
 #[inline]
 #[target_feature(enable = "msa")]
-#[cfg_attr(test, assert_instr(xori.b, imm8 = 0b11111111))]
+#[cfg_attr(test, assert_instr(xori.b, IMM8 = 0b11111111))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_mips", issue = "111198")]
 pub unsafe fn __msa_xori_b<const IMM8: i32>(a: v16u8) -> v16u8 {
@@ -9187,7 +9187,6 @@ mod tests {
         core_arch::{mips::msa::*, simd::*},
         mem,
     };
-    use std::{f32, f64};
     use stdarch_test::simd_test;
 
     #[simd_test(enable = "msa")]
@@ -9603,7 +9602,7 @@ mod tests {
             103, -126, 103, -126
         );
 
-        assert_eq!(r, mem::transmute(__msa_addvi_b(mem::transmute(a), 67)));
+        assert_eq!(r, mem::transmute(__msa_addvi_b::<3>(mem::transmute(a))));
     }
 
     #[simd_test(enable = "msa")]
@@ -9619,7 +9618,7 @@ mod tests {
             -32766, 3279, -97, -124
         );
 
-        assert_eq!(r, mem::transmute(__msa_addvi_h(mem::transmute(a), 67)));
+        assert_eq!(r, mem::transmute(__msa_addvi_h::<3>(mem::transmute(a))));
     }
 
     #[simd_test(enable = "msa")]
@@ -9629,7 +9628,7 @@ mod tests {
         #[rustfmt::skip]
         let r = i32x4::new(103, -2147483646, 103, -2147483645);
 
-        assert_eq!(r, mem::transmute(__msa_addvi_w(mem::transmute(a), 67)));
+        assert_eq!(r, mem::transmute(__msa_addvi_w::<3>(mem::transmute(a))));
     }
 
     #[simd_test(enable = "msa")]
@@ -9639,7 +9638,7 @@ mod tests {
         #[rustfmt::skip]
         let r = i64x2::new(117, -9223372036854775791);
 
-        assert_eq!(r, mem::transmute(__msa_addvi_d(mem::transmute(a), 17)));
+        assert_eq!(r, mem::transmute(__msa_addvi_d::<17>(mem::transmute(a))));
     }
 
     #[simd_test(enable = "msa")]
@@ -9689,7 +9688,7 @@ mod tests {
             4, 5, 4, 5
         );
 
-        assert_eq!(r, mem::transmute(__msa_andi_b(mem::transmute(a), 5)));
+        assert_eq!(r, mem::transmute(__msa_andi_b::<5>(mem::transmute(a))));
     }
 
     #[simd_test(enable = "msa")]
@@ -10234,7 +10233,7 @@ mod tests {
             247, 147, 55, 1
         );
 
-        assert_eq!(r, mem::transmute(__msa_bclri_b(mem::transmute(a), 3)));
+        assert_eq!(r, mem::transmute(__msa_bclri_b::<3>(mem::transmute(a))));
     }
 
     #[simd_test(enable = "msa")]
@@ -10244,7 +10243,7 @@ mod tests {
         #[rustfmt::skip]
         let r = u16x8::new(107, 1155, 155, 1, 107, 1155, 155, 1);
 
-        assert_eq!(r, mem::transmute(__msa_bclri_h(mem::transmute(a), 11)));
+        assert_eq!(r, mem::transmute(__msa_bclri_h::<11>(mem::transmute(a))));
     }
 
     #[simd_test(enable = "msa")]
@@ -10254,7 +10253,7 @@ mod tests {
         #[rustfmt::skip]
         let r = u32x4::new(202722547, 102722547, 2722547, 1);
 
-        assert_eq!(r, mem::transmute(__msa_bclri_w(mem::transmute(a), 23)));
+        assert_eq!(r, mem::transmute(__msa_bclri_w::<23>(mem::transmute(a))));
     }
 
     #[simd_test(enable = "msa")]
@@ -10264,7 +10263,7 @@ mod tests {
         #[rustfmt::skip]
         let r = u64x2::new(73672157683, 11110973672157683);
 
-        assert_eq!(r, mem::transmute(__msa_bclri_d(mem::transmute(a), 37)));
+        assert_eq!(r, mem::transmute(__msa_bclri_d::<37>(mem::transmute(a))));
     }
 
     #[simd_test(enable = "msa")]
@@ -10409,7 +10408,7 @@ mod tests {
 
         assert_eq!(
             r,
-            mem::transmute(__msa_binsli_b(mem::transmute(a), mem::transmute(b), 5))
+            mem::transmute(__msa_binsli_b::<5>(mem::transmute(a), mem::transmute(b)))
         );
     }
 
@@ -10433,7 +10432,7 @@ mod tests {
 
         assert_eq!(
             r,
-            mem::transmute(__msa_binsli_h(mem::transmute(a), mem::transmute(b), 13))
+            mem::transmute(__msa_binsli_h::<13>(mem::transmute(a), mem::transmute(b)))
         );
     }
 
@@ -10448,7 +10447,7 @@ mod tests {
 
         assert_eq!(
             r,
-            mem::transmute(__msa_binsli_w(mem::transmute(a), mem::transmute(b), 17))
+            mem::transmute(__msa_binsli_w::<17>(mem::transmute(a), mem::transmute(b)))
         );
     }
 
@@ -10463,7 +10462,7 @@ mod tests {
 
         assert_eq!(
             r,
-            mem::transmute(__msa_binsli_d(mem::transmute(a), mem::transmute(b), 48))
+            mem::transmute(__msa_binsli_d::<48>(mem::transmute(a), mem::transmute(b)))
         );
     }
 
@@ -10609,7 +10608,7 @@ mod tests {
 
         assert_eq!(
             r,
-            mem::transmute(__msa_binsri_b(mem::transmute(a), mem::transmute(b), 5))
+            mem::transmute(__msa_binsri_b::<5>(mem::transmute(a), mem::transmute(b)))
         );
     }
 
@@ -10633,7 +10632,7 @@ mod tests {
 
         assert_eq!(
             r,
-            mem::transmute(__msa_binsri_h(mem::transmute(a), mem::transmute(b), 13))
+            mem::transmute(__msa_binsri_h::<13>(mem::transmute(a), mem::transmute(b)))
         );
     }
 
@@ -10648,7 +10647,7 @@ mod tests {
 
         assert_eq!(
             r,
-            mem::transmute(__msa_binsri_w(mem::transmute(a), mem::transmute(b), 17))
+            mem::transmute(__msa_binsri_w::<17>(mem::transmute(a), mem::transmute(b)))
         );
     }
 
@@ -10663,7 +10662,7 @@ mod tests {
 
         assert_eq!(
             r,
-            mem::transmute(__msa_binsri_d(mem::transmute(a), mem::transmute(b), 48))
+            mem::transmute(__msa_binsri_d::<48>(mem::transmute(a), mem::transmute(b)))
         );
     }
 
@@ -10734,7 +10733,7 @@ mod tests {
 
         assert_eq!(
             r,
-            mem::transmute(__msa_bmnzi_b(mem::transmute(a), mem::transmute(b), 7))
+            mem::transmute(__msa_bmnzi_b::<7>(mem::transmute(a), mem::transmute(b)))
         );
     }
 
@@ -10805,7 +10804,7 @@ mod tests {
 
         assert_eq!(
             r,
-            mem::transmute(__msa_bmzi_b(mem::transmute(a), mem::transmute(b), 7))
+            mem::transmute(__msa_bmzi_b::<7>(mem::transmute(a), mem::transmute(b)))
         );
     }
 
@@ -10901,7 +10900,7 @@ mod tests {
             34, 116, 111, 239
         );
 
-        assert_eq!(r, mem::transmute(__msa_bnegi_b(mem::transmute(a), 4)));
+        assert_eq!(r, mem::transmute(__msa_bnegi_b::<4>(mem::transmute(a))));
     }
 
     #[simd_test(enable = "msa")]
@@ -10917,7 +10916,7 @@ mod tests {
             30719, 1228, 2148, 2175
         );
 
-        assert_eq!(r, mem::transmute(__msa_bnegi_h(mem::transmute(a), 11)));
+        assert_eq!(r, mem::transmute(__msa_bnegi_h::<11>(mem::transmute(a))));
     }
 
     #[simd_test(enable = "msa")]
@@ -10927,7 +10926,7 @@ mod tests {
         #[rustfmt::skip]
         let r = u32x4::new(16777316, 2130706431, 16777316, 2164260864);
 
-        assert_eq!(r, mem::transmute(__msa_bnegi_w(mem::transmute(a), 24)));
+        assert_eq!(r, mem::transmute(__msa_bnegi_w::<24>(mem::transmute(a))));
     }
 
     #[simd_test(enable = "msa")]
@@ -10937,7 +10936,7 @@ mod tests {
         #[rustfmt::skip]
         let r = u64x2::new(4398046511204, 9223376434901286912);
 
-        assert_eq!(r, mem::transmute(__msa_bnegi_d(mem::transmute(a), 42)));
+        assert_eq!(r, mem::transmute(__msa_bnegi_d::<42>(mem::transmute(a))));
     }
 
     #[simd_test(enable = "msa")]
@@ -11066,7 +11065,7 @@ mod tests {
 
         assert_eq!(
             r,
-            mem::transmute(__msa_bseli_b(mem::transmute(a), mem::transmute(b), 121))
+            mem::transmute(__msa_bseli_b::<121>(mem::transmute(a), mem::transmute(b)))
         );
     }
 
@@ -11162,7 +11161,7 @@ mod tests {
             255, 159, 55, 5
         );
 
-        assert_eq!(r, mem::transmute(__msa_bseti_b(mem::transmute(a), 2)));
+        assert_eq!(r, mem::transmute(__msa_bseti_b::<2>(mem::transmute(a))));
     }
 
     #[simd_test(enable = "msa")]
@@ -11172,7 +11171,7 @@ mod tests {
         #[rustfmt::skip]
         let r = u16x8::new(255, 159, 55, 5, 255, 159, 55, 5);
 
-        assert_eq!(r, mem::transmute(__msa_bseti_h(mem::transmute(a), 2)));
+        assert_eq!(r, mem::transmute(__msa_bseti_h::<2>(mem::transmute(a))));
     }
 
     #[simd_test(enable = "msa")]
@@ -11182,7 +11181,7 @@ mod tests {
         #[rustfmt::skip]
         let r = u32x4::new(255, 159, 55, 5);
 
-        assert_eq!(r, mem::transmute(__msa_bseti_w(mem::transmute(a), 2)));
+        assert_eq!(r, mem::transmute(__msa_bseti_w::<2>(mem::transmute(a))));
     }
 
     #[simd_test(enable = "msa")]
@@ -11192,7 +11191,7 @@ mod tests {
         #[rustfmt::skip]
         let r = u64x2::new(255, 159);
 
-        assert_eq!(r, mem::transmute(__msa_bseti_d(mem::transmute(a), 2)));
+        assert_eq!(r, mem::transmute(__msa_bseti_d::<2>(mem::transmute(a))));
     }
 
     #[simd_test(enable = "msa")]
@@ -11342,7 +11341,7 @@ mod tests {
             0, 0, -1, 0
         );
 
-        assert_eq!(r, mem::transmute(__msa_ceqi_b(mem::transmute(a), -4)));
+        assert_eq!(r, mem::transmute(__msa_ceqi_b::<-4>(mem::transmute(a))));
     }
 
     #[simd_test(enable = "msa")]
@@ -11355,7 +11354,7 @@ mod tests {
         #[rustfmt::skip]
         let r = i16x8::new(0, 0, 0, -1, 0, 0, 0, -1);
 
-        assert_eq!(r, mem::transmute(__msa_ceqi_h(mem::transmute(a), -11)));
+        assert_eq!(r, mem::transmute(__msa_ceqi_h::<-11>(mem::transmute(a))));
     }
 
     #[simd_test(enable = "msa")]
@@ -11365,7 +11364,7 @@ mod tests {
         #[rustfmt::skip]
         let r = i32x4::new(0, 0, -1, 0);
 
-        assert_eq!(r, mem::transmute(__msa_ceqi_w(mem::transmute(a), 5)));
+        assert_eq!(r, mem::transmute(__msa_ceqi_w::<5>(mem::transmute(a))));
     }
 
     // FIXME: https://reviews.llvm.org/D59884
@@ -11552,7 +11551,7 @@ mod tests {
         #[rustfmt::skip]
         let r = i8x16::new(-1, -1, 0, -1, -1, -1, 0, -1, -1, -1, 0, -1, -1, -1, 0, -1);
 
-        assert_eq!(r, mem::transmute(__msa_clei_s_b(mem::transmute(a), -2)));
+        assert_eq!(r, mem::transmute(__msa_clei_s_b::<-2>(mem::transmute(a))));
     }
 
     #[simd_test(enable = "msa")]
@@ -11565,7 +11564,7 @@ mod tests {
         #[rustfmt::skip]
         let r = i16x8::new(0, 0, 0, -1, 0, 0, 0, -1);
 
-        assert_eq!(r, mem::transmute(__msa_clei_s_h(mem::transmute(a), -1)));
+        assert_eq!(r, mem::transmute(__msa_clei_s_h::<-1>(mem::transmute(a))));
     }
 
     #[simd_test(enable = "msa")]
@@ -11575,7 +11574,7 @@ mod tests {
         #[rustfmt::skip]
         let r = i32x4::new(0, 0, -1, 0);
 
-        assert_eq!(r, mem::transmute(__msa_clei_s_w(mem::transmute(a), 6)));
+        assert_eq!(r, mem::transmute(__msa_clei_s_w::<6>(mem::transmute(a))));
     }
 
     // FIXME: https://reviews.llvm.org/D59884
@@ -11608,7 +11607,7 @@ mod tests {
             -1, 0, 0, 0
         );
 
-        assert_eq!(r, mem::transmute(__msa_clei_u_b(mem::transmute(a), 25)));
+        assert_eq!(r, mem::transmute(__msa_clei_u_b::<25>(mem::transmute(a))));
     }
 
     #[simd_test(enable = "msa")]
@@ -11621,7 +11620,7 @@ mod tests {
         #[rustfmt::skip]
         let r = i16x8::new(-1, 0, -1, 0, -1, 0, -1, 0);
 
-        assert_eq!(r, mem::transmute(__msa_clei_u_h(mem::transmute(a), 25)));
+        assert_eq!(r, mem::transmute(__msa_clei_u_h::<25>(mem::transmute(a))));
     }
 
     #[simd_test(enable = "msa")]
@@ -11631,7 +11630,7 @@ mod tests {
         #[rustfmt::skip]
         let r = i32x4::new(-1, 0, -1, 0);
 
-        assert_eq!(r, mem::transmute(__msa_clei_u_w(mem::transmute(a), 31)));
+        assert_eq!(r, mem::transmute(__msa_clei_u_w::<31>(mem::transmute(a))));
     }
 
     #[simd_test(enable = "msa")]
@@ -11641,7 +11640,7 @@ mod tests {
         #[rustfmt::skip]
         let r = i64x2::new(-1, 0);
 
-        assert_eq!(r, mem::transmute(__msa_clei_u_d(mem::transmute(a), 25)));
+        assert_eq!(r, mem::transmute(__msa_clei_u_d::<25>(mem::transmute(a))));
     }
 
     #[simd_test(enable = "msa")]
@@ -11811,7 +11810,7 @@ mod tests {
             0, -1, 0, 0
         );
 
-        assert_eq!(r, mem::transmute(__msa_clti_s_b(mem::transmute(a), -5)));
+        assert_eq!(r, mem::transmute(__msa_clti_s_b::<-5>(mem::transmute(a))));
     }
 
     #[simd_test(enable = "msa")]
@@ -11824,7 +11823,7 @@ mod tests {
         #[rustfmt::skip]
         let r = i16x8::new(-1, 0, 0, 0, -1, 0, 0, 0);
 
-        assert_eq!(r, mem::transmute(__msa_clti_s_h(mem::transmute(a), 15)));
+        assert_eq!(r, mem::transmute(__msa_clti_s_h::<15>(mem::transmute(a))));
     }
 
     #[simd_test(enable = "msa")]
@@ -11834,7 +11833,7 @@ mod tests {
         #[rustfmt::skip]
         let r = i32x4::new(-1, 0, -1, 0);
 
-        assert_eq!(r, mem::transmute(__msa_clti_s_w(mem::transmute(a), -10)));
+        assert_eq!(r, mem::transmute(__msa_clti_s_w::<-10>(mem::transmute(a))));
     }
 
     // FIXME: https://reviews.llvm.org/D59884
@@ -11867,7 +11866,7 @@ mod tests {
             -1, 0, 0, 0
         );
 
-        assert_eq!(r, mem::transmute(__msa_clti_u_b(mem::transmute(a), 50)));
+        assert_eq!(r, mem::transmute(__msa_clti_u_b::<3>(mem::transmute(a))));
     }
 
     #[simd_test(enable = "msa")]
@@ -11880,7 +11879,7 @@ mod tests {
         #[rustfmt::skip]
         let r = i16x8::new(0, 0, 0, 0, 0, 0, 0, 0);
 
-        assert_eq!(r, mem::transmute(__msa_clti_u_h(mem::transmute(a), 30)));
+        assert_eq!(r, mem::transmute(__msa_clti_u_h::<30>(mem::transmute(a))));
     }
 
     #[simd_test(enable = "msa")]
@@ -11890,7 +11889,7 @@ mod tests {
         #[rustfmt::skip]
         let r = i32x4::new(0, 0, 0, 0);
 
-        assert_eq!(r, mem::transmute(__msa_clti_u_w(mem::transmute(a), 10)));
+        assert_eq!(r, mem::transmute(__msa_clti_u_w::<10>(mem::transmute(a))));
     }
 
     #[simd_test(enable = "msa")]
@@ -11900,7 +11899,7 @@ mod tests {
         #[rustfmt::skip]
         let r = i64x2::new(-1, 0);
 
-        assert_eq!(r, mem::transmute(__msa_clti_u_d(mem::transmute(a), 10)));
+        assert_eq!(r, mem::transmute(__msa_clti_u_d::<10>(mem::transmute(a))));
     }
 
     #[simd_test(enable = "msa")]
@@ -11915,7 +11914,7 @@ mod tests {
         #[rustfmt::skip]
         let r = -100 as i32;
 
-        assert_eq!(r, mem::transmute(__msa_copy_s_b(mem::transmute(a), 12)));
+        assert_eq!(r, mem::transmute(__msa_copy_s_b::<12>(mem::transmute(a))));
     }
 
     #[simd_test(enable = "msa")]
@@ -11928,7 +11927,7 @@ mod tests {
         #[rustfmt::skip]
         let r = 32767 as i32;
 
-        assert_eq!(r, mem::transmute(__msa_copy_s_h(mem::transmute(a), 4)));
+        assert_eq!(r, mem::transmute(__msa_copy_s_h::<4>(mem::transmute(a))));
     }
 
     #[simd_test(enable = "msa")]
@@ -11937,7 +11936,7 @@ mod tests {
         let a = i32x4::new(100, 2147483647, 5, -2147483647);
         let r = 2147483647 as i32;
 
-        assert_eq!(r, mem::transmute(__msa_copy_s_w(mem::transmute(a), 1)));
+        assert_eq!(r, mem::transmute(__msa_copy_s_w::<1>(mem::transmute(a))));
     }
 
     #[simd_test(enable = "msa")]
@@ -11947,7 +11946,7 @@ mod tests {
         #[rustfmt::skip]
         let r = 9223372036854775807 as i64;
 
-        assert_eq!(r, mem::transmute(__msa_copy_s_d(mem::transmute(a), 1)));
+        assert_eq!(r, mem::transmute(__msa_copy_s_d::<1>(mem::transmute(a))));
     }
 
     #[simd_test(enable = "msa")]
@@ -11962,7 +11961,7 @@ mod tests {
         #[rustfmt::skip]
         let r = 100 as u32;
 
-        assert_eq!(r, mem::transmute(__msa_copy_u_b(mem::transmute(a), 12)));
+        assert_eq!(r, mem::transmute(__msa_copy_u_b::<12>(mem::transmute(a))));
     }
 
     #[simd_test(enable = "msa")]
@@ -11975,7 +11974,7 @@ mod tests {
         #[rustfmt::skip]
         let r = 32767 as u32;
 
-        assert_eq!(r, mem::transmute(__msa_copy_u_h(mem::transmute(a), 4)));
+        assert_eq!(r, mem::transmute(__msa_copy_u_h::<4>(mem::transmute(a))));
     }
 
     #[simd_test(enable = "msa")]
@@ -11985,7 +11984,7 @@ mod tests {
         #[rustfmt::skip]
         let r = 2147483647 as u32;
 
-        assert_eq!(r, mem::transmute(__msa_copy_u_w(mem::transmute(a), 1)));
+        assert_eq!(r, mem::transmute(__msa_copy_u_w::<1>(mem::transmute(a))));
     }
 
     #[simd_test(enable = "msa")]
@@ -11995,7 +11994,7 @@ mod tests {
         #[rustfmt::skip]
         let r = 9223372036854775807 as u64;
 
-        assert_eq!(r, mem::transmute(__msa_copy_u_d(mem::transmute(a), 1)));
+        assert_eq!(r, mem::transmute(__msa_copy_u_d::<1>(mem::transmute(a))));
     }
 
     // Can not be tested in user mode
@@ -14619,7 +14618,10 @@ mod tests {
             5, 127, 4, 127
         );
 
-        assert_eq!(r, mem::transmute(__msa_insert_b(mem::transmute(a), 12, 5)));
+        assert_eq!(
+            r,
+            mem::transmute(__msa_insert_b::<12>(mem::transmute(a), 5))
+        );
     }
 
     #[simd_test(enable = "msa")]
@@ -14635,7 +14637,7 @@ mod tests {
             5, 3276, 100, 11
         );
 
-        assert_eq!(r, mem::transmute(__msa_insert_h(mem::transmute(a), 4, 5)));
+        assert_eq!(r, mem::transmute(__msa_insert_h::<4>(mem::transmute(a), 5)));
     }
 
     #[simd_test(enable = "msa")]
@@ -14645,7 +14647,7 @@ mod tests {
         #[rustfmt::skip]
         let r = i32x4::new(100, 7, 5, -2147483647);
 
-        assert_eq!(r, mem::transmute(__msa_insert_w(mem::transmute(a), 1, 7)));
+        assert_eq!(r, mem::transmute(__msa_insert_w::<1>(mem::transmute(a), 7)));
     }
 
     #[simd_test(enable = "msa")]
@@ -14655,7 +14657,10 @@ mod tests {
         #[rustfmt::skip]
         let r = i64x2::new(3, 100);
 
-        assert_eq!(r, mem::transmute(__msa_insert_d(mem::transmute(a), 1, 100)));
+        assert_eq!(
+            r,
+            mem::transmute(__msa_insert_d::<1>(mem::transmute(a), 100))
+        );
     }
 
     #[simd_test(enable = "msa")]
@@ -14684,7 +14689,7 @@ mod tests {
 
         assert_eq!(
             r,
-            mem::transmute(__msa_insve_b(mem::transmute(a), 12, mem::transmute(b)))
+            mem::transmute(__msa_insve_b::<12>(mem::transmute(a), mem::transmute(b)))
         );
     }
 
@@ -14708,7 +14713,7 @@ mod tests {
 
         assert_eq!(
             r,
-            mem::transmute(__msa_insve_h(mem::transmute(a), 4, mem::transmute(b)))
+            mem::transmute(__msa_insve_h::<4>(mem::transmute(a), mem::transmute(b)))
         );
     }
 
@@ -14723,7 +14728,7 @@ mod tests {
 
         assert_eq!(
             r,
-            mem::transmute(__msa_insve_w(mem::transmute(a), 3, mem::transmute(b)))
+            mem::transmute(__msa_insve_w::<3>(mem::transmute(a), mem::transmute(b)))
         );
     }
 
@@ -14738,7 +14743,7 @@ mod tests {
 
         assert_eq!(
             r,
-            mem::transmute(__msa_insve_d(mem::transmute(a), 1, mem::transmute(b)))
+            mem::transmute(__msa_insve_d::<1>(mem::transmute(a), mem::transmute(b)))
         );
     }
 
@@ -14760,7 +14765,7 @@ mod tests {
             25, 26, 27, 28
         );
 
-        assert_eq!(r, mem::transmute(__msa_ld_b(p, 9)));
+        assert_eq!(r, mem::transmute(__msa_ld_b::<9>(p)));
     }
 
     #[simd_test(enable = "msa")]
@@ -14774,7 +14779,7 @@ mod tests {
         #[rustfmt::skip]
         let r = i16x8::new(3, 4, 5, 6, 7, 8, 9, 10);
 
-        assert_eq!(r, mem::transmute(__msa_ld_h(p, -2)));
+        assert_eq!(r, mem::transmute(__msa_ld_h::<-2>(p)));
     }
 
     #[simd_test(enable = "msa")]
@@ -14785,7 +14790,7 @@ mod tests {
         #[rustfmt::skip]
         let r = i32x4::new(2, 3, 4, 5);
 
-        assert_eq!(r, mem::transmute(__msa_ld_w(p, -4)));
+        assert_eq!(r, mem::transmute(__msa_ld_w::<-4>(p)));
     }
 
     #[simd_test(enable = "msa")]
@@ -14796,7 +14801,7 @@ mod tests {
         #[rustfmt::skip]
         let r = i64x2::new(0, 1);
 
-        assert_eq!(r, mem::transmute(__msa_ld_d(p, -32)));
+        assert_eq!(r, mem::transmute(__msa_ld_d::<-32>(p)));
     }
 
     #[simd_test(enable = "msa")]
@@ -14809,7 +14814,7 @@ mod tests {
             -20, -20, -20, -20
         );
 
-        assert_eq!(r, mem::transmute(__msa_ldi_b(-20)));
+        assert_eq!(r, mem::transmute(__msa_ldi_b::<-20>()));
     }
 
     #[simd_test(enable = "msa")]
@@ -14820,7 +14825,7 @@ mod tests {
             255, 255, 255, 255
         );
 
-        assert_eq!(r, mem::transmute(__msa_ldi_h(255)));
+        assert_eq!(r, mem::transmute(__msa_ldi_h::<255>()));
     }
 
     #[simd_test(enable = "msa")]
@@ -14828,7 +14833,7 @@ mod tests {
         #[rustfmt::skip]
         let r = i32x4::new(-509, -509, -509, -509);
 
-        assert_eq!(r, mem::transmute(__msa_ldi_w(-509)));
+        assert_eq!(r, mem::transmute(__msa_ldi_w::<-509>()));
     }
 
     // FIXME: https://reviews.llvm.org/D59884
@@ -15289,7 +15294,7 @@ mod tests {
             1, -16, -6, 8
         );
 
-        assert_eq!(r, mem::transmute(__msa_maxi_s_b(mem::transmute(a), -16)));
+        assert_eq!(r, mem::transmute(__msa_maxi_s_b::<-16>(mem::transmute(a))));
     }
 
     #[simd_test(enable = "msa")]
@@ -15299,7 +15304,7 @@ mod tests {
         #[rustfmt::skip]
         let r = i16x8::new(15, 15, 15, 15, 15, 15, 15, 15);
 
-        assert_eq!(r, mem::transmute(__msa_maxi_s_h(mem::transmute(a), 15)));
+        assert_eq!(r, mem::transmute(__msa_maxi_s_h::<15>(mem::transmute(a))));
     }
 
     #[simd_test(enable = "msa")]
@@ -15309,7 +15314,7 @@ mod tests {
         #[rustfmt::skip]
         let r = i32x4::new(1, 3, -5, -5);
 
-        assert_eq!(r, mem::transmute(__msa_maxi_s_w(mem::transmute(a), -5)));
+        assert_eq!(r, mem::transmute(__msa_maxi_s_w::<-5>(mem::transmute(a))));
     }
 
     // FIXME: https://reviews.llvm.org/D59884
@@ -15342,7 +15347,7 @@ mod tests {
             5, 5, 6, 8
         );
 
-        assert_eq!(r, mem::transmute(__msa_maxi_u_b(mem::transmute(a), 5)));
+        assert_eq!(r, mem::transmute(__msa_maxi_u_b::<5>(mem::transmute(a))));
     }
 
     #[simd_test(enable = "msa")]
@@ -15352,7 +15357,7 @@ mod tests {
         #[rustfmt::skip]
         let r = u16x8::new(5, 5, 6, 8, 5, 5, 6, 8);
 
-        assert_eq!(r, mem::transmute(__msa_maxi_u_h(mem::transmute(a), 5)));
+        assert_eq!(r, mem::transmute(__msa_maxi_u_h::<5>(mem::transmute(a))));
     }
 
     #[simd_test(enable = "msa")]
@@ -15362,7 +15367,7 @@ mod tests {
         #[rustfmt::skip]
         let r = u32x4::new(5, 5, 6, 8);
 
-        assert_eq!(r, mem::transmute(__msa_maxi_u_w(mem::transmute(a), 5)));
+        assert_eq!(r, mem::transmute(__msa_maxi_u_w::<5>(mem::transmute(a))));
     }
 
     #[simd_test(enable = "msa")]
@@ -15372,7 +15377,7 @@ mod tests {
         #[rustfmt::skip]
         let r = u64x2::new(5, 8);
 
-        assert_eq!(r, mem::transmute(__msa_maxi_u_d(mem::transmute(a), 5)));
+        assert_eq!(r, mem::transmute(__msa_maxi_u_d::<5>(mem::transmute(a))));
     }
 
     #[simd_test(enable = "msa")]
@@ -15542,7 +15547,7 @@ mod tests {
             -10, -10, -10, -10
         );
 
-        assert_eq!(r, mem::transmute(__msa_mini_s_b(mem::transmute(a), -10)));
+        assert_eq!(r, mem::transmute(__msa_mini_s_b::<-10>(mem::transmute(a))));
     }
 
     #[simd_test(enable = "msa")]
@@ -15552,7 +15557,7 @@ mod tests {
         #[rustfmt::skip]
         let r = i16x8::new(-3, -3, -3, -4, -3, -3, -3, -4);
 
-        assert_eq!(r, mem::transmute(__msa_mini_s_h(mem::transmute(a), -3)));
+        assert_eq!(r, mem::transmute(__msa_mini_s_h::<-3>(mem::transmute(a))));
     }
 
     #[simd_test(enable = "msa")]
@@ -15562,7 +15567,7 @@ mod tests {
         #[rustfmt::skip]
         let r = i32x4::new(-3, -3, -3, -4);
 
-        assert_eq!(r, mem::transmute(__msa_mini_s_w(mem::transmute(a), -3)));
+        assert_eq!(r, mem::transmute(__msa_mini_s_w::<-3>(mem::transmute(a))));
     }
 
     // FIXME: https://reviews.llvm.org/D59884
@@ -15670,7 +15675,7 @@ mod tests {
             1, 3, 5, 5
         );
 
-        assert_eq!(r, mem::transmute(__msa_mini_u_b(mem::transmute(a), 5)));
+        assert_eq!(r, mem::transmute(__msa_mini_u_b::<5>(mem::transmute(a))));
     }
 
     #[simd_test(enable = "msa")]
@@ -15680,7 +15685,7 @@ mod tests {
         #[rustfmt::skip]
         let r = u16x8::new(1, 3, 5, 5, 1, 3, 5, 5);
 
-        assert_eq!(r, mem::transmute(__msa_mini_u_h(mem::transmute(a), 5)));
+        assert_eq!(r, mem::transmute(__msa_mini_u_h::<5>(mem::transmute(a))));
     }
 
     #[simd_test(enable = "msa")]
@@ -15690,7 +15695,7 @@ mod tests {
         #[rustfmt::skip]
         let r = u32x4::new(1, 3, 5, 5);
 
-        assert_eq!(r, mem::transmute(__msa_mini_u_w(mem::transmute(a), 5)));
+        assert_eq!(r, mem::transmute(__msa_mini_u_w::<5>(mem::transmute(a))));
     }
 
     #[simd_test(enable = "msa")]
@@ -15700,7 +15705,7 @@ mod tests {
         #[rustfmt::skip]
         let r = u64x2::new(1, 5);
 
-        assert_eq!(r, mem::transmute(__msa_mini_u_d(mem::transmute(a), 5)));
+        assert_eq!(r, mem::transmute(__msa_mini_u_d::<5>(mem::transmute(a))));
     }
 
     #[simd_test(enable = "msa")]
@@ -16394,7 +16399,7 @@ mod tests {
             242, 241, 240, 235
         );
 
-        assert_eq!(r, mem::transmute(__msa_nori_b(mem::transmute(a), 4)));
+        assert_eq!(r, mem::transmute(__msa_nori_b::<4>(mem::transmute(a))));
     }
 
     #[simd_test(enable = "msa")]
@@ -16444,7 +16449,7 @@ mod tests {
             13, 14, 15, 20
         );
 
-        assert_eq!(r, mem::transmute(__msa_ori_b(mem::transmute(a), 4)));
+        assert_eq!(r, mem::transmute(__msa_ori_b::<4>(mem::transmute(a))));
     }
 
     #[simd_test(enable = "msa")]
@@ -16670,7 +16675,7 @@ mod tests {
             3, 3, 3, 1
         );
 
-        assert_eq!(r, mem::transmute(__msa_sat_s_b(mem::transmute(a), 2)));
+        assert_eq!(r, mem::transmute(__msa_sat_s_b::<2>(mem::transmute(a))));
     }
 
     #[simd_test(enable = "msa")]
@@ -16683,7 +16688,7 @@ mod tests {
         #[rustfmt::skip]
         let r = i16x8::new(127, 127, 127, 1, 127, 127, 127, 1);
 
-        assert_eq!(r, mem::transmute(__msa_sat_s_h(mem::transmute(a), 7)));
+        assert_eq!(r, mem::transmute(__msa_sat_s_h::<7>(mem::transmute(a))));
     }
 
     #[simd_test(enable = "msa")]
@@ -16693,7 +16698,7 @@ mod tests {
         #[rustfmt::skip]
         let r = i32x4::new(131071, 131071, 131071, 1);
 
-        assert_eq!(r, mem::transmute(__msa_sat_s_w(mem::transmute(a), 17)));
+        assert_eq!(r, mem::transmute(__msa_sat_s_w::<17>(mem::transmute(a))));
     }
 
     #[simd_test(enable = "msa")]
@@ -16703,7 +16708,7 @@ mod tests {
         #[rustfmt::skip]
         let r = i64x2::new(137438953471, 1);
 
-        assert_eq!(r, mem::transmute(__msa_sat_s_d(mem::transmute(a), 37)));
+        assert_eq!(r, mem::transmute(__msa_sat_s_d::<37>(mem::transmute(a))));
     }
 
     #[simd_test(enable = "msa")]
@@ -16723,7 +16728,7 @@ mod tests {
             7, 7, 7, 1
         );
 
-        assert_eq!(r, mem::transmute(__msa_sat_u_b(mem::transmute(a), 2)));
+        assert_eq!(r, mem::transmute(__msa_sat_u_b::<2>(mem::transmute(a))));
     }
 
     #[simd_test(enable = "msa")]
@@ -16736,7 +16741,7 @@ mod tests {
         #[rustfmt::skip]
         let r = u16x8::new(255, 255, 155, 1, 255, 255, 155, 1);
 
-        assert_eq!(r, mem::transmute(__msa_sat_u_h(mem::transmute(a), 7)));
+        assert_eq!(r, mem::transmute(__msa_sat_u_h::<7>(mem::transmute(a))));
     }
 
     #[simd_test(enable = "msa")]
@@ -16746,7 +16751,7 @@ mod tests {
         #[rustfmt::skip]
         let r = u32x4::new(262143, 262143, 262143, 1);
 
-        assert_eq!(r, mem::transmute(__msa_sat_u_w(mem::transmute(a), 17)));
+        assert_eq!(r, mem::transmute(__msa_sat_u_w::<17>(mem::transmute(a))));
     }
 
     #[simd_test(enable = "msa")]
@@ -16756,7 +16761,7 @@ mod tests {
         #[rustfmt::skip]
         let r = u64x2::new(274877906943, 1);
 
-        assert_eq!(r, mem::transmute(__msa_sat_u_d(mem::transmute(a), 37)));
+        assert_eq!(r, mem::transmute(__msa_sat_u_d::<37>(mem::transmute(a))));
     }
 
     #[simd_test(enable = "msa")]
@@ -16776,7 +16781,7 @@ mod tests {
             11, 3, 4, 12
         );
 
-        assert_eq!(r, mem::transmute(__msa_shf_b(mem::transmute(a), 120)));
+        assert_eq!(r, mem::transmute(__msa_shf_b::<120>(mem::transmute(a))));
     }
 
     #[simd_test(enable = "msa")]
@@ -16789,7 +16794,7 @@ mod tests {
         #[rustfmt::skip]
         let r = i16x8::new(11, 14, 12, 13, 11, 14, 12, 13);
 
-        assert_eq!(r, mem::transmute(__msa_shf_h(mem::transmute(a), 156)));
+        assert_eq!(r, mem::transmute(__msa_shf_h::<156>(mem::transmute(a))));
     }
 
     #[simd_test(enable = "msa")]
@@ -16799,7 +16804,7 @@ mod tests {
         #[rustfmt::skip]
         let r = i32x4::new(1, 3, 2, 4);
 
-        assert_eq!(r, mem::transmute(__msa_shf_w(mem::transmute(a), 216)));
+        assert_eq!(r, mem::transmute(__msa_shf_w::<216>(mem::transmute(a))));
     }
 
     #[simd_test(enable = "msa")]
@@ -16903,7 +16908,7 @@ mod tests {
 
         assert_eq!(
             r,
-            mem::transmute(__msa_sldi_b(mem::transmute(a), mem::transmute(b), 5))
+            mem::transmute(__msa_sldi_b::<5>(mem::transmute(a), mem::transmute(b)))
         );
     }
 
@@ -16918,7 +16923,7 @@ mod tests {
 
         assert_eq!(
             r,
-            mem::transmute(__msa_sldi_h(mem::transmute(a), mem::transmute(b), 2))
+            mem::transmute(__msa_sldi_h::<2>(mem::transmute(a), mem::transmute(b)))
         );
     }
 
@@ -16933,7 +16938,7 @@ mod tests {
 
         assert_eq!(
             r,
-            mem::transmute(__msa_sldi_w(mem::transmute(a), mem::transmute(b), 4))
+            mem::transmute(__msa_sldi_w::<0>(mem::transmute(a), mem::transmute(b)))
         );
     }
 
@@ -16948,7 +16953,7 @@ mod tests {
 
         assert_eq!(
             r,
-            mem::transmute(__msa_sldi_d(mem::transmute(a), mem::transmute(b), 2))
+            mem::transmute(__msa_sldi_d::<0>(mem::transmute(a), mem::transmute(b)))
         );
     }
 
@@ -17044,7 +17049,7 @@ mod tests {
             4, 8, 12, 16
         );
 
-        assert_eq!(r, mem::transmute(__msa_slli_b(mem::transmute(a), 2)));
+        assert_eq!(r, mem::transmute(__msa_slli_b::<2>(mem::transmute(a))));
     }
 
     #[simd_test(enable = "msa")]
@@ -17057,7 +17062,7 @@ mod tests {
         #[rustfmt::skip]
         let r = i16x8::new(4, 8, 12, 16, 4, 8, 12, 16);
 
-        assert_eq!(r, mem::transmute(__msa_slli_h(mem::transmute(a), 2)));
+        assert_eq!(r, mem::transmute(__msa_slli_h::<2>(mem::transmute(a))));
     }
 
     #[simd_test(enable = "msa")]
@@ -17067,7 +17072,7 @@ mod tests {
         #[rustfmt::skip]
         let r = i32x4::new(4, 8, 12, 16);
 
-        assert_eq!(r, mem::transmute(__msa_slli_w(mem::transmute(a), 2)));
+        assert_eq!(r, mem::transmute(__msa_slli_w::<2>(mem::transmute(a))));
     }
 
     #[simd_test(enable = "msa")]
@@ -17077,7 +17082,7 @@ mod tests {
         #[rustfmt::skip]
         let r = i64x2::new(2, 4);
 
-        assert_eq!(r, mem::transmute(__msa_slli_d(mem::transmute(a), 1)));
+        assert_eq!(r, mem::transmute(__msa_slli_d::<1>(mem::transmute(a))));
     }
 
     #[simd_test(enable = "msa")]
@@ -17150,7 +17155,7 @@ mod tests {
             3, 3, 3, 3
         );
 
-        assert_eq!(r, mem::transmute(__msa_splati_b(mem::transmute(a), 2)));
+        assert_eq!(r, mem::transmute(__msa_splati_b::<2>(mem::transmute(a))));
     }
 
     #[simd_test(enable = "msa")]
@@ -17163,7 +17168,7 @@ mod tests {
         #[rustfmt::skip]
         let r = i16x8::new(3, 3, 3, 3, 3, 3, 3, 3);
 
-        assert_eq!(r, mem::transmute(__msa_splati_h(mem::transmute(a), 2)));
+        assert_eq!(r, mem::transmute(__msa_splati_h::<2>(mem::transmute(a))));
     }
 
     #[simd_test(enable = "msa")]
@@ -17173,7 +17178,7 @@ mod tests {
         #[rustfmt::skip]
         let r = i32x4::new(3, 3, 3, 3);
 
-        assert_eq!(r, mem::transmute(__msa_splati_w(mem::transmute(a), 2)));
+        assert_eq!(r, mem::transmute(__msa_splati_w::<2>(mem::transmute(a))));
     }
 
     #[simd_test(enable = "msa")]
@@ -17183,7 +17188,7 @@ mod tests {
         #[rustfmt::skip]
         let r = i64x2::new(2, 2);
 
-        assert_eq!(r, mem::transmute(__msa_splati_d(mem::transmute(a), 1)));
+        assert_eq!(r, mem::transmute(__msa_splati_d::<1>(mem::transmute(a))));
     }
 
     #[simd_test(enable = "msa")]
@@ -17287,7 +17292,7 @@ mod tests {
             31, 31, 13, 0
         );
 
-        assert_eq!(r, mem::transmute(__msa_srai_b(mem::transmute(a), 2)));
+        assert_eq!(r, mem::transmute(__msa_srai_b::<2>(mem::transmute(a))));
     }
 
     #[simd_test(enable = "msa")]
@@ -17300,7 +17305,7 @@ mod tests {
         #[rustfmt::skip]
         let r = i16x8::new(8191, 31, 13, 0, 8191, 31, 13, 0);
 
-        assert_eq!(r, mem::transmute(__msa_srai_h(mem::transmute(a), 2)));
+        assert_eq!(r, mem::transmute(__msa_srai_h::<2>(mem::transmute(a))));
     }
 
     #[simd_test(enable = "msa")]
@@ -17309,7 +17314,7 @@ mod tests {
         let a = i32x4::new(i32::MAX, 125, 55, 1);
         let r = i32x4::new(536870911, 31, 13, 0);
 
-        assert_eq!(r, mem::transmute(__msa_srai_w(mem::transmute(a), 2)));
+        assert_eq!(r, mem::transmute(__msa_srai_w::<2>(mem::transmute(a))));
     }
 
     #[simd_test(enable = "msa")]
@@ -17319,7 +17324,7 @@ mod tests {
         #[rustfmt::skip]
         let r = i64x2::new(2305843009213693951, 13);
 
-        assert_eq!(r, mem::transmute(__msa_srai_d(mem::transmute(a), 2)));
+        assert_eq!(r, mem::transmute(__msa_srai_d::<2>(mem::transmute(a))));
     }
 
     #[simd_test(enable = "msa")]
@@ -17423,7 +17428,7 @@ mod tests {
             31, 32, 14, 0
         );
 
-        assert_eq!(r, mem::transmute(__msa_srari_b(mem::transmute(a), 2)));
+        assert_eq!(r, mem::transmute(__msa_srari_b::<2>(mem::transmute(a))));
     }
 
     #[simd_test(enable = "msa")]
@@ -17433,7 +17438,7 @@ mod tests {
         #[rustfmt::skip]
         let r = i16x8::new(539, 289, 39, 0, 539, 289, 39, 0);
 
-        assert_eq!(r, mem::transmute(__msa_srari_h(mem::transmute(a), 2)));
+        assert_eq!(r, mem::transmute(__msa_srari_h::<2>(mem::transmute(a))));
     }
 
     #[simd_test(enable = "msa")]
@@ -17443,7 +17448,7 @@ mod tests {
         #[rustfmt::skip]
         let r = i32x4::new(52777789, 27777789, 2777789, 0);
 
-        assert_eq!(r, mem::transmute(__msa_srari_w(mem::transmute(a), 2)));
+        assert_eq!(r, mem::transmute(__msa_srari_w::<2>(mem::transmute(a))));
     }
 
     #[simd_test(enable = "msa")]
@@ -17453,7 +17458,7 @@ mod tests {
         #[rustfmt::skip]
         let r = i64x2::new(52777777789, 27777777789);
 
-        assert_eq!(r, mem::transmute(__msa_srari_d(mem::transmute(a), 2)));
+        assert_eq!(r, mem::transmute(__msa_srari_d::<2>(mem::transmute(a))));
     }
 
     #[simd_test(enable = "msa")]
@@ -17554,7 +17559,7 @@ mod tests {
             6, 12, 25, 31
         );
 
-        assert_eq!(r, mem::transmute(__msa_srli_b(mem::transmute(a), 2)));
+        assert_eq!(r, mem::transmute(__msa_srli_b::<2>(mem::transmute(a))));
     }
 
     #[simd_test(enable = "msa")]
@@ -17570,7 +17575,7 @@ mod tests {
             8191, 819, 25, 31
         );
 
-        assert_eq!(r, mem::transmute(__msa_srli_h(mem::transmute(a), 2)));
+        assert_eq!(r, mem::transmute(__msa_srli_h::<2>(mem::transmute(a))));
     }
 
     #[simd_test(enable = "msa")]
@@ -17580,7 +17585,7 @@ mod tests {
         #[rustfmt::skip]
         let r = i32x4::new(25, 536870911, 25, 536870911);
 
-        assert_eq!(r, mem::transmute(__msa_srli_w(mem::transmute(a), 2)));
+        assert_eq!(r, mem::transmute(__msa_srli_w::<2>(mem::transmute(a))));
     }
 
     #[simd_test(enable = "msa")]
@@ -17590,7 +17595,7 @@ mod tests {
         #[rustfmt::skip]
         let r = i64x2::new(50, 4611686018427387903);
 
-        assert_eq!(r, mem::transmute(__msa_srli_d(mem::transmute(a), 1)));
+        assert_eq!(r, mem::transmute(__msa_srli_d::<1>(mem::transmute(a))));
     }
 
     #[simd_test(enable = "msa")]
@@ -17690,7 +17695,7 @@ mod tests {
             6, 13, 25, 32
         );
 
-        assert_eq!(r, mem::transmute(__msa_srlri_b(mem::transmute(a), 2)));
+        assert_eq!(r, mem::transmute(__msa_srlri_b::<2>(mem::transmute(a))));
     }
 
     #[simd_test(enable = "msa")]
@@ -17702,7 +17707,7 @@ mod tests {
         );
         let r = i16x8::new(8192, 819, 25, 32, 8192, 819, 25, 32);
 
-        assert_eq!(r, mem::transmute(__msa_srlri_h(mem::transmute(a), 2)));
+        assert_eq!(r, mem::transmute(__msa_srlri_h::<2>(mem::transmute(a))));
     }
 
     #[simd_test(enable = "msa")]
@@ -17712,7 +17717,7 @@ mod tests {
         #[rustfmt::skip]
         let r = i32x4::new(25, 38, 50, 536870912);
 
-        assert_eq!(r, mem::transmute(__msa_srlri_w(mem::transmute(a), 2)));
+        assert_eq!(r, mem::transmute(__msa_srlri_w::<2>(mem::transmute(a))));
     }
 
     #[simd_test(enable = "msa")]
@@ -17722,7 +17727,7 @@ mod tests {
         #[rustfmt::skip]
         let r = i64x2::new(50, 4611686018427387904);
 
-        assert_eq!(r, mem::transmute(__msa_srlri_d(mem::transmute(a), 1)));
+        assert_eq!(r, mem::transmute(__msa_srlri_d::<1>(mem::transmute(a))));
     }
 
     #[simd_test(enable = "msa")]
@@ -17748,7 +17753,7 @@ mod tests {
             21, 22, 23, 24,
             25, 26, 27, 28
         ];
-        __msa_st_b(mem::transmute(a), arr.as_mut_ptr() as *mut u8, 0);
+        __msa_st_b::<0>(mem::transmute(a), arr.as_mut_ptr() as *mut u8);
         assert_eq!(arr, r);
     }
 
@@ -17759,7 +17764,7 @@ mod tests {
         let mut arr: [i16; 8] = [0, 0, 0, 0, 0, 0, 0, 0];
         #[rustfmt::skip]
         let r  : [i16; 8] = [13, 14, 15, 16, 17, 18, 19, 20];
-        __msa_st_h(mem::transmute(a), arr.as_mut_ptr() as *mut u8, 0);
+        __msa_st_h::<0>(mem::transmute(a), arr.as_mut_ptr() as *mut u8);
         assert_eq!(arr, r);
     }
 
@@ -17770,7 +17775,7 @@ mod tests {
         let mut arr: [i32; 4] = [0, 0, 0, 0];
         #[rustfmt::skip]
         let r  : [i32; 4] = [13, 14, 15, 16];
-        __msa_st_w(mem::transmute(a), arr.as_mut_ptr() as *mut u8, 0);
+        __msa_st_w::<0>(mem::transmute(a), arr.as_mut_ptr() as *mut u8);
         assert_eq!(arr, r);
     }
 
@@ -17781,7 +17786,7 @@ mod tests {
         let mut arr: [i64; 2] = [0, 0];
         #[rustfmt::skip]
         let r : [i64; 2] = [13, 14];
-        __msa_st_d(mem::transmute(a), arr.as_mut_ptr() as *mut u8, 0);
+        __msa_st_d::<0>(mem::transmute(a), arr.as_mut_ptr() as *mut u8);
         assert_eq!(arr, r);
     }
 
@@ -18195,7 +18200,7 @@ mod tests {
             95, 122, 45, 123
         );
 
-        assert_eq!(r, mem::transmute(__msa_subvi_b(mem::transmute(a), 5)));
+        assert_eq!(r, mem::transmute(__msa_subvi_b::<5>(mem::transmute(a))));
     }
 
     #[simd_test(enable = "msa")]
@@ -18211,7 +18216,7 @@ mod tests {
             32762, 3271, -105, 32763
         );
 
-        assert_eq!(r, mem::transmute(__msa_subvi_h(mem::transmute(a), 5)));
+        assert_eq!(r, mem::transmute(__msa_subvi_h::<5>(mem::transmute(a))));
     }
 
     #[simd_test(enable = "msa")]
@@ -18221,7 +18226,7 @@ mod tests {
         #[rustfmt::skip]
         let r = i32x4::new(95, 145, 195, 2147483642);
 
-        assert_eq!(r, mem::transmute(__msa_subvi_w(mem::transmute(a), 5)));
+        assert_eq!(r, mem::transmute(__msa_subvi_w::<5>(mem::transmute(a))));
     }
 
     #[simd_test(enable = "msa")]
@@ -18231,7 +18236,7 @@ mod tests {
         #[rustfmt::skip]
         let r = i64x2::new(95, 9223372036854775802);
 
-        assert_eq!(r, mem::transmute(__msa_subvi_d(mem::transmute(a), 5)));
+        assert_eq!(r, mem::transmute(__msa_subvi_d::<5>(mem::transmute(a))));
     }
 
     #[simd_test(enable = "msa")]
@@ -18393,6 +18398,6 @@ mod tests {
             9, 10, 11, 20
         );
 
-        assert_eq!(r, mem::transmute(__msa_xori_b(mem::transmute(a), 4)));
+        assert_eq!(r, mem::transmute(__msa_xori_b::<4>(mem::transmute(a))));
     }
 }
diff --git a/crates/core_arch/src/mod.rs b/crates/core_arch/src/mod.rs
index 2105cca1b4..2483d07b23 100644
--- a/crates/core_arch/src/mod.rs
+++ b/crates/core_arch/src/mod.rs
@@ -5,6 +5,11 @@
 #[macro_use]
 mod macros;
 
+#[cfg(test)]
+mod test;
+#[cfg(test)]
+use test::assert_eq_const;
+
 #[cfg(any(target_arch = "riscv32", target_arch = "riscv64", doc))]
 mod riscv_shared;
 
@@ -274,6 +279,16 @@ pub mod arch {
         pub use crate::core_arch::nvptx::*;
     }
 
+    /// Platform-specific intrinsics for the `amdgpu` platform.
+    ///
+    /// See the [module documentation](../index.html) for more details.
+    #[cfg(any(target_arch = "amdgpu", doc))]
+    #[doc(cfg(target_arch = "amdgpu"))]
+    #[unstable(feature = "stdarch_amdgpu", issue = "149988")]
+    pub mod amdgpu {
+        pub use crate::core_arch::amdgpu::*;
+    }
+
     /// Platform-specific intrinsics for the `loongarch32` platform.
     ///
     /// See the [module documentation](../index.html) for more details.
@@ -305,6 +320,19 @@ pub mod arch {
     pub mod s390x {
         pub use crate::core_arch::s390x::*;
     }
+
+    /// Platform-specific intrinsics for the `hexagon` platform.
+    ///
+    /// This module provides intrinsics for the Qualcomm Hexagon DSP architecture,
+    /// including the Hexagon Vector Extensions (HVX).
+    ///
+    /// See the [module documentation](../index.html) for more details.
+    #[cfg(any(target_arch = "hexagon", doc))]
+    #[doc(cfg(target_arch = "hexagon"))]
+    #[unstable(feature = "stdarch_hexagon", issue = "151523")]
+    pub mod hexagon {
+        pub use crate::core_arch::hexagon::*;
+    }
 }
 
 #[cfg(any(target_arch = "x86", target_arch = "x86_64", doc))]
@@ -349,6 +377,10 @@ mod powerpc64;
 #[doc(cfg(target_arch = "nvptx64"))]
 mod nvptx;
 
+#[cfg(any(target_arch = "amdgpu", doc))]
+#[doc(cfg(target_arch = "amdgpu"))]
+mod amdgpu;
+
 #[cfg(any(target_arch = "loongarch32", doc))]
 #[doc(cfg(target_arch = "loongarch32"))]
 mod loongarch32;
@@ -360,3 +392,7 @@ mod loongarch64;
 #[cfg(any(target_arch = "s390x", doc))]
 #[doc(cfg(target_arch = "s390x"))]
 mod s390x;
+
+#[cfg(any(target_arch = "hexagon", doc))]
+#[doc(cfg(target_arch = "hexagon"))]
+mod hexagon;
diff --git a/crates/core_arch/src/nvptx/mod.rs b/crates/core_arch/src/nvptx/mod.rs
index 8d16dfb53d..b63a5d01a7 100644
--- a/crates/core_arch/src/nvptx/mod.rs
+++ b/crates/core_arch/src/nvptx/mod.rs
@@ -23,32 +23,34 @@ unsafe extern "C" {
     #[link_name = "llvm.nvvm.barrier0"]
     fn syncthreads() -> ();
     #[link_name = "llvm.nvvm.read.ptx.sreg.ntid.x"]
-    fn block_dim_x() -> i32;
+    fn block_dim_x() -> u32;
     #[link_name = "llvm.nvvm.read.ptx.sreg.ntid.y"]
-    fn block_dim_y() -> i32;
+    fn block_dim_y() -> u32;
     #[link_name = "llvm.nvvm.read.ptx.sreg.ntid.z"]
-    fn block_dim_z() -> i32;
+    fn block_dim_z() -> u32;
     #[link_name = "llvm.nvvm.read.ptx.sreg.ctaid.x"]
-    fn block_idx_x() -> i32;
+    fn block_idx_x() -> u32;
     #[link_name = "llvm.nvvm.read.ptx.sreg.ctaid.y"]
-    fn block_idx_y() -> i32;
+    fn block_idx_y() -> u32;
     #[link_name = "llvm.nvvm.read.ptx.sreg.ctaid.z"]
-    fn block_idx_z() -> i32;
+    fn block_idx_z() -> u32;
     #[link_name = "llvm.nvvm.read.ptx.sreg.nctaid.x"]
-    fn grid_dim_x() -> i32;
+    fn grid_dim_x() -> u32;
     #[link_name = "llvm.nvvm.read.ptx.sreg.nctaid.y"]
-    fn grid_dim_y() -> i32;
+    fn grid_dim_y() -> u32;
     #[link_name = "llvm.nvvm.read.ptx.sreg.nctaid.z"]
-    fn grid_dim_z() -> i32;
+    fn grid_dim_z() -> u32;
     #[link_name = "llvm.nvvm.read.ptx.sreg.tid.x"]
-    fn thread_idx_x() -> i32;
+    fn thread_idx_x() -> u32;
     #[link_name = "llvm.nvvm.read.ptx.sreg.tid.y"]
-    fn thread_idx_y() -> i32;
+    fn thread_idx_y() -> u32;
     #[link_name = "llvm.nvvm.read.ptx.sreg.tid.z"]
-    fn thread_idx_z() -> i32;
+    fn thread_idx_z() -> u32;
 }
 
 /// Synchronizes all threads in the block.
+///
+#[doc = include_str!("../amdgpu/intrinsic_is_convergent.md")]
 #[inline]
 #[unstable(feature = "stdarch_nvptx", issue = "111199")]
 pub unsafe fn _syncthreads() -> () {
@@ -58,84 +60,84 @@ pub unsafe fn _syncthreads() -> () {
 /// x-th thread-block dimension.
 #[inline]
 #[unstable(feature = "stdarch_nvptx", issue = "111199")]
-pub unsafe fn _block_dim_x() -> i32 {
+pub unsafe fn _block_dim_x() -> u32 {
     block_dim_x()
 }
 
 /// y-th thread-block dimension.
 #[inline]
 #[unstable(feature = "stdarch_nvptx", issue = "111199")]
-pub unsafe fn _block_dim_y() -> i32 {
+pub unsafe fn _block_dim_y() -> u32 {
     block_dim_y()
 }
 
 /// z-th thread-block dimension.
 #[inline]
 #[unstable(feature = "stdarch_nvptx", issue = "111199")]
-pub unsafe fn _block_dim_z() -> i32 {
+pub unsafe fn _block_dim_z() -> u32 {
     block_dim_z()
 }
 
 /// x-th thread-block index.
 #[inline]
 #[unstable(feature = "stdarch_nvptx", issue = "111199")]
-pub unsafe fn _block_idx_x() -> i32 {
+pub unsafe fn _block_idx_x() -> u32 {
     block_idx_x()
 }
 
 /// y-th thread-block index.
 #[inline]
 #[unstable(feature = "stdarch_nvptx", issue = "111199")]
-pub unsafe fn _block_idx_y() -> i32 {
+pub unsafe fn _block_idx_y() -> u32 {
     block_idx_y()
 }
 
 /// z-th thread-block index.
 #[inline]
 #[unstable(feature = "stdarch_nvptx", issue = "111199")]
-pub unsafe fn _block_idx_z() -> i32 {
+pub unsafe fn _block_idx_z() -> u32 {
     block_idx_z()
 }
 
 /// x-th block-grid dimension.
 #[inline]
 #[unstable(feature = "stdarch_nvptx", issue = "111199")]
-pub unsafe fn _grid_dim_x() -> i32 {
+pub unsafe fn _grid_dim_x() -> u32 {
     grid_dim_x()
 }
 
 /// y-th block-grid dimension.
 #[inline]
 #[unstable(feature = "stdarch_nvptx", issue = "111199")]
-pub unsafe fn _grid_dim_y() -> i32 {
+pub unsafe fn _grid_dim_y() -> u32 {
     grid_dim_y()
 }
 
 /// z-th block-grid dimension.
 #[inline]
 #[unstable(feature = "stdarch_nvptx", issue = "111199")]
-pub unsafe fn _grid_dim_z() -> i32 {
+pub unsafe fn _grid_dim_z() -> u32 {
     grid_dim_z()
 }
 
 /// x-th thread index.
 #[inline]
 #[unstable(feature = "stdarch_nvptx", issue = "111199")]
-pub unsafe fn _thread_idx_x() -> i32 {
+pub unsafe fn _thread_idx_x() -> u32 {
     thread_idx_x()
 }
 
 /// y-th thread index.
 #[inline]
 #[unstable(feature = "stdarch_nvptx", issue = "111199")]
-pub unsafe fn _thread_idx_y() -> i32 {
+pub unsafe fn _thread_idx_y() -> u32 {
     thread_idx_y()
 }
 
 /// z-th thread index.
 #[inline]
 #[unstable(feature = "stdarch_nvptx", issue = "111199")]
-pub unsafe fn _thread_idx_z() -> i32 {
+pub unsafe fn _thread_idx_z() -> u32 {
     thread_idx_z()
 }
 
diff --git a/crates/core_arch/src/nvptx/packed.rs b/crates/core_arch/src/nvptx/packed.rs
index 856aeea4b6..1c7e81268f 100644
--- a/crates/core_arch/src/nvptx/packed.rs
+++ b/crates/core_arch/src/nvptx/packed.rs
@@ -99,7 +99,7 @@ pub unsafe fn f16x2_neg(a: f16x2) -> f16x2 {
 #[cfg_attr(test, assert_instr(min.f16x2))]
 #[unstable(feature = "stdarch_nvptx", issue = "111199")]
 pub unsafe fn f16x2_min(a: f16x2, b: f16x2) -> f16x2 {
-    simd_fmin(a, b)
+    simd_minimum_number_nsz(a, b)
 }
 
 /// Find the minimum of two values, NaNs pass through.
@@ -123,7 +123,7 @@ pub unsafe fn f16x2_min_nan(a: f16x2, b: f16x2) -> f16x2 {
 #[cfg_attr(test, assert_instr(max.f16x2))]
 #[unstable(feature = "stdarch_nvptx", issue = "111199")]
 pub unsafe fn f16x2_max(a: f16x2, b: f16x2) -> f16x2 {
-    simd_fmax(a, b)
+    simd_maximum_number_nsz(a, b)
 }
 
 /// Find the maximum of two values, NaNs pass through.
diff --git a/crates/core_arch/src/powerpc/altivec.rs b/crates/core_arch/src/powerpc/altivec.rs
index a7bbf35ed8..78ec39f91f 100644
--- a/crates/core_arch/src/powerpc/altivec.rs
+++ b/crates/core_arch/src/powerpc/altivec.rs
@@ -47,6 +47,54 @@ types! {
     pub struct vector_float(4 x f32);
 }
 
+#[unstable(feature = "stdarch_powerpc", issue = "111145")]
+impl From<m8x16> for vector_bool_char {
+    #[inline]
+    fn from(value: m8x16) -> Self {
+        unsafe { transmute(value) }
+    }
+}
+
+#[unstable(feature = "stdarch_powerpc", issue = "111145")]
+impl From<vector_bool_char> for m8x16 {
+    #[inline]
+    fn from(value: vector_bool_char) -> Self {
+        unsafe { transmute(value) }
+    }
+}
+
+#[unstable(feature = "stdarch_powerpc", issue = "111145")]
+impl From<m16x8> for vector_bool_short {
+    #[inline]
+    fn from(value: m16x8) -> Self {
+        unsafe { transmute(value) }
+    }
+}
+
+#[unstable(feature = "stdarch_powerpc", issue = "111145")]
+impl From<vector_bool_short> for m16x8 {
+    #[inline]
+    fn from(value: vector_bool_short) -> Self {
+        unsafe { transmute(value) }
+    }
+}
+
+#[unstable(feature = "stdarch_powerpc", issue = "111145")]
+impl From<m32x4> for vector_bool_int {
+    #[inline]
+    fn from(value: m32x4) -> Self {
+        unsafe { transmute(value) }
+    }
+}
+
+#[unstable(feature = "stdarch_powerpc", issue = "111145")]
+impl From<vector_bool_int> for m32x4 {
+    #[inline]
+    fn from(value: vector_bool_int) -> Self {
+        unsafe { transmute(value) }
+    }
+}
+
 #[allow(improper_ctypes)]
 unsafe extern "C" {
     #[link_name = "llvm.ppc.altivec.lvx"]
@@ -129,8 +177,6 @@ unsafe extern "C" {
         b: vector_signed_short,
         c: vector_signed_int,
     ) -> vector_signed_int;
-    #[link_name = "llvm.ppc.altivec.vnmsubfp"]
-    fn vnmsubfp(a: vector_float, b: vector_float, c: vector_float) -> vector_float;
     #[link_name = "llvm.ppc.altivec.vsum2sws"]
     fn vsum2sws(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int;
     #[link_name = "llvm.ppc.altivec.vsum4ubs"]
@@ -364,17 +410,46 @@ unsafe extern "C" {
     fn vrfin(a: vector_float) -> vector_float;
 }
 
-impl_from! { i8x16, u8x16,  i16x8, u16x8, i32x4, u32x4, f32x4 }
-
-impl_neg! { i8x16 : 0 }
-impl_neg! { i16x8 : 0 }
-impl_neg! { i32x4 : 0 }
-impl_neg! { f32x4 : 0f32 }
-
 #[macro_use]
 mod sealed {
     use super::*;
 
+    #[unstable(feature = "stdarch_powerpc", issue = "111145")]
+    pub trait VectorNeg {
+        unsafe fn vec_neg(self) -> Self;
+    }
+
+    macro_rules! impl_neg {
+        ($($v:ty)*) => {
+            $(
+                #[unstable(feature = "stdarch_powerpc", issue = "111145")]
+                impl VectorNeg for $v {
+                    #[inline]
+                    #[target_feature(enable = "altivec")]
+                    unsafe fn vec_neg(self) -> Self {
+                        simd_neg(self)
+                    }
+                }
+            )*
+        }
+    }
+
+    impl_neg! {
+        vector_signed_char
+        vector_unsigned_char
+        vector_bool_char
+
+        vector_signed_short
+        vector_unsigned_short
+        vector_bool_short
+
+        vector_signed_int
+        vector_unsigned_int
+        vector_bool_int
+
+        vector_float
+    }
+
     #[unstable(feature = "stdarch_powerpc", issue = "111145")]
     pub trait VectorInsert {
         type Scalar;
@@ -1380,7 +1455,7 @@ mod sealed {
             #[inline]
             #[target_feature(enable = "altivec")]
             unsafe fn $name(v: s_t_l!($ty)) -> s_t_l!($ty) {
-                v.vec_max(-v)
+                v.vec_max(simd_neg(v))
             }
 
             impl_vec_trait! { [VectorAbs vec_abs] $name (s_t_l!($ty)) }
@@ -1428,7 +1503,7 @@ mod sealed {
     #[cfg_attr(test, assert_instr(vspltb, IMM4 = 15))]
     unsafe fn vspltb<const IMM4: u32>(a: vector_signed_char) -> vector_signed_char {
         static_assert_uimm_bits!(IMM4, 4);
-        simd_shuffle(a, a, const { u32x16::from_array([IMM4; 16]) })
+        simd_shuffle(a, a, const { u32x16::splat(IMM4) })
     }
 
     #[inline]
@@ -1436,7 +1511,7 @@ mod sealed {
     #[cfg_attr(test, assert_instr(vsplth, IMM3 = 7))]
     unsafe fn vsplth<const IMM3: u32>(a: vector_signed_short) -> vector_signed_short {
         static_assert_uimm_bits!(IMM3, 3);
-        simd_shuffle(a, a, const { u32x8::from_array([IMM3; 8]) })
+        simd_shuffle(a, a, const { u32x8::splat(IMM3) })
     }
 
     #[inline]
@@ -1445,7 +1520,7 @@ mod sealed {
     #[cfg_attr(all(test, target_feature = "vsx"), assert_instr(xxspltw, IMM2 = 3))]
     unsafe fn vspltw<const IMM2: u32>(a: vector_signed_int) -> vector_signed_int {
         static_assert_uimm_bits!(IMM2, 2);
-        simd_shuffle(a, a, const { u32x4::from_array([IMM2; 4]) })
+        simd_shuffle(a, a, const { u32x4::splat(IMM2) })
     }
 
     #[unstable(feature = "stdarch_powerpc", issue = "111145")]
@@ -1852,9 +1927,9 @@ mod sealed {
 
     #[inline]
     #[target_feature(enable = "altivec")]
-    #[cfg_attr(test, assert_instr(vnmsubfp))]
-    unsafe fn vec_vnmsubfp(a: vector_float, b: vector_float, c: vector_float) -> vector_float {
-        vnmsubfp(a, b, c)
+    #[cfg_attr(test, assert_instr(xvnmsubasp))]
+    pub unsafe fn vec_vnmsubfp(a: vector_float, b: vector_float, c: vector_float) -> vector_float {
+        simd_neg(simd_fma(a, b, simd_neg(c)))
     }
 
     #[inline]
@@ -3220,7 +3295,7 @@ mod sealed {
         unsafe fn vec_round(self) -> Self;
     }
 
-    test_impl! { vec_vrfin(a: vector_float) -> vector_float [vrfin, xvrspic] }
+    test_impl! { vec_vrfin(a: vector_float) -> vector_float [vrfin, vrfin] }
 
     #[unstable(feature = "stdarch_powerpc", issue = "111145")]
     impl VectorRound for vector_float {
@@ -4032,6 +4107,14 @@ pub unsafe fn vec_mfvscr() -> vector_unsigned_short {
     mfvscr()
 }
 
+/// Vector Negate
+#[inline]
+#[target_feature(enable = "altivec")]
+#[unstable(feature = "stdarch_powerpc", issue = "111145")]
+pub unsafe fn vec_neg<T: sealed::VectorNeg>(a: T) -> T {
+    a.vec_neg()
+}
+
 /// Vector add.
 #[inline]
 #[target_feature(enable = "altivec")]
@@ -4244,7 +4327,7 @@ pub unsafe fn vec_madd(a: vector_float, b: vector_float, c: vector_float) -> vec
 #[target_feature(enable = "altivec")]
 #[unstable(feature = "stdarch_powerpc", issue = "111145")]
 pub unsafe fn vec_nmsub(a: vector_float, b: vector_float, c: vector_float) -> vector_float {
-    vnmsubfp(a, b, c)
+    sealed::vec_vnmsubfp(a, b, c)
 }
 
 /// Vector Select
@@ -4616,22 +4699,22 @@ mod tests {
         };
         { $name: ident, $fn:ident, $ty: ident -> $ty_out: ident, [$($a:expr),+], [$($b:expr),+], [$($d:expr),+] } => {
             #[simd_test(enable = "altivec")]
-            unsafe fn $name() {
-                let a: s_t_l!($ty) = transmute($ty::new($($a),+));
-                let b: s_t_l!($ty) = transmute($ty::new($($b),+));
+            fn $name() {
+                let a: s_t_l!($ty) = $ty::from_array([$($a),+]).into();
+                let b: s_t_l!($ty) = $ty::from_array([$($b),+]).into();
 
-                let d = $ty_out::new($($d),+);
-                let r : $ty_out = transmute($fn(a, b));
+                let d = $ty_out::from_array([$($d),+]);
+                let r = $ty_out::from(unsafe { $fn(a, b) });
                 assert_eq!(d, r);
             }
          };
          { $name: ident, $fn:ident, $ty: ident -> $ty_out: ident, [$($a:expr),+], [$($b:expr),+], $d:expr } => {
             #[simd_test(enable = "altivec")]
-            unsafe fn $name() {
-                let a: s_t_l!($ty) = transmute($ty::new($($a),+));
-                let b: s_t_l!($ty) = transmute($ty::new($($b),+));
+            fn $name() {
+                let a: s_t_l!($ty) = $ty::from_array([$($a),+]).into();
+                let b: s_t_l!($ty) = $ty::from_array([$($b),+]).into();
 
-                let r : $ty_out = transmute($fn(a, b));
+                let r = $ty_out::from(unsafe { $fn(a, b) });
                 assert_eq!($d, r);
             }
          }
@@ -4640,12 +4723,12 @@ mod tests {
     macro_rules! test_vec_1 {
         { $name: ident, $fn:ident, f32x4, [$($a:expr),+], ~[$($d:expr),+] } => {
             #[simd_test(enable = "altivec")]
-            unsafe fn $name() {
-                let a: vector_float = transmute(f32x4::new($($a),+));
+            fn $name() {
+                let a = vector_float::from(f32x4::new($($a),+));
 
-                let d: vector_float = transmute(f32x4::new($($d),+));
-                let r = transmute(vec_cmple(vec_abs(vec_sub($fn(a), d)), vec_splats(f32::EPSILON)));
-                let e = m32x4::new(true, true, true, true);
+                let d = vector_float::from(f32x4::new($($d),+));
+                let r = m32x4::from(unsafe { vec_cmple(vec_abs(vec_sub($fn(a), d)), vec_splats(f32::EPSILON)) });
+                let e = m32x4::splat(true);
                 assert_eq!(e, r);
             }
         };
@@ -4654,18 +4737,18 @@ mod tests {
         };
         { $name: ident, $fn:ident, $ty: ident -> $ty_out: ident, [$($a:expr),+], [$($d:expr),+] } => {
             #[simd_test(enable = "altivec")]
-            unsafe fn $name() {
-                let a: s_t_l!($ty) = transmute($ty::new($($a),+));
+            fn $name() {
+                let a: s_t_l!($ty) = $ty::new($($a),+).into();
 
                 let d = $ty_out::new($($d),+);
-                let r : $ty_out = transmute($fn(a));
+                let r = $ty_out::from(unsafe { $fn(a) });
                 assert_eq!(d, r);
             }
         }
     }
 
     #[simd_test(enable = "altivec")]
-    unsafe fn test_vec_ld() {
+    fn test_vec_ld() {
         let pat = [
             u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15),
             u8x16::new(
@@ -4674,14 +4757,14 @@ mod tests {
         ];
 
         for off in 0..16 {
-            let v: u8x16 = transmute(vec_ld(0, (pat.as_ptr() as *const u8).offset(off)));
+            let v = u8x16::from(unsafe { vec_ld(0, (pat.as_ptr() as *const u8).offset(off)) });
             assert_eq!(
                 v,
                 u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)
             );
         }
         for off in 16..32 {
-            let v: u8x16 = transmute(vec_ld(0, (pat.as_ptr() as *const u8).offset(off)));
+            let v = u8x16::from(unsafe { vec_ld(0, (pat.as_ptr() as *const u8).offset(off)) });
             assert_eq!(
                 v,
                 u8x16::new(
@@ -4692,7 +4775,7 @@ mod tests {
     }
 
     #[simd_test(enable = "altivec")]
-    unsafe fn test_vec_xl() {
+    fn test_vec_xl() {
         let pat = [
             u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15),
             u8x16::new(
@@ -4701,23 +4784,25 @@ mod tests {
         ];
 
         for off in 0..16 {
-            let val: u8x16 = transmute(vec_xl(0, (pat.as_ptr() as *const u8).offset(off)));
+            let val = u8x16::from(unsafe { vec_xl(0, (pat.as_ptr() as *const u8).offset(off)) });
             for i in 0..16 {
-                let v = val.extract(i);
+                let v = val.extract_dyn(i);
                 assert_eq!(off as usize + i, v as usize);
             }
         }
     }
 
     #[simd_test(enable = "altivec")]
-    unsafe fn test_vec_xst() {
-        let v: vector_unsigned_char = transmute(u8x16::new(
+    fn test_vec_xst() {
+        let v = vector_unsigned_char::from(u8x16::new(
             0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
         ));
 
         for off in 0..16 {
             let mut buf = [0u8; 32];
-            vec_xst(v, 0, (buf.as_mut_ptr() as *mut u8).offset(off));
+            unsafe {
+                vec_xst(v, 0, (buf.as_mut_ptr() as *mut u8).offset(off));
+            }
             for i in 0..16 {
                 assert_eq!(i as u8, buf[off as usize..][i]);
             }
@@ -4725,7 +4810,7 @@ mod tests {
     }
 
     #[simd_test(enable = "altivec")]
-    unsafe fn test_vec_ldl() {
+    fn test_vec_ldl() {
         let pat = [
             u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15),
             u8x16::new(
@@ -4734,14 +4819,14 @@ mod tests {
         ];
 
         for off in 0..16 {
-            let v: u8x16 = transmute(vec_ldl(0, (pat.as_ptr() as *const u8).offset(off)));
+            let v = u8x16::from(unsafe { vec_ldl(0, (pat.as_ptr() as *const u8).offset(off)) });
             assert_eq!(
                 v,
                 u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)
             );
         }
         for off in 16..32 {
-            let v: u8x16 = transmute(vec_ldl(0, (pat.as_ptr() as *const u8).offset(off)));
+            let v = u8x16::from(unsafe { vec_ldl(0, (pat.as_ptr() as *const u8).offset(off)) });
             assert_eq!(
                 v,
                 u8x16::new(
@@ -4752,31 +4837,31 @@ mod tests {
     }
 
     #[simd_test(enable = "altivec")]
-    unsafe fn test_vec_lde_u8() {
+    fn test_vec_lde_u8() {
         let pat = [u8x16::new(
             0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
         )];
         for off in 0..16 {
-            let v: u8x16 = transmute(vec_lde(off, pat.as_ptr() as *const u8));
-            assert_eq!(off as u8, v.extract(off as _));
+            let v = u8x16::from(unsafe { vec_lde(off, pat.as_ptr() as *const u8) });
+            assert_eq!(off as u8, v.extract_dyn(off as _));
         }
     }
 
     #[simd_test(enable = "altivec")]
-    unsafe fn test_vec_lde_u16() {
+    fn test_vec_lde_u16() {
         let pat = [u16x8::new(0, 1, 2, 3, 4, 5, 6, 7)];
         for off in 0..8 {
-            let v: u16x8 = transmute(vec_lde(off * 2, pat.as_ptr() as *const u16));
-            assert_eq!(off as u16, v.extract(off as _));
+            let v = u16x8::from(unsafe { vec_lde(off * 2, pat.as_ptr() as *const u16) });
+            assert_eq!(off as u16, v.extract_dyn(off as _));
         }
     }
 
     #[simd_test(enable = "altivec")]
-    unsafe fn test_vec_lde_u32() {
+    fn test_vec_lde_u32() {
         let pat = [u32x4::new(0, 1, 2, 3)];
         for off in 0..4 {
-            let v: u32x4 = transmute(vec_lde(off * 4, pat.as_ptr() as *const u32));
-            assert_eq!(off as u32, v.extract(off as _));
+            let v = u32x4::from(unsafe { vec_lde(off * 4, pat.as_ptr() as *const u32) });
+            assert_eq!(off as u32, v.extract_dyn(off as _));
         }
     }
 
@@ -5781,9 +5866,9 @@ mod tests {
     }
 
     #[simd_test(enable = "altivec")]
-    unsafe fn test_vec_cmpb() {
-        let a: vector_float = transmute(f32x4::new(0.1, 0.5, 0.6, 0.9));
-        let b: vector_float = transmute(f32x4::new(-0.1, 0.5, -0.6, 0.9));
+    fn test_vec_cmpb() {
+        let a = vector_float::from(f32x4::new(0.1, 0.5, 0.6, 0.9));
+        let b = vector_float::from(f32x4::new(-0.1, 0.5, -0.6, 0.9));
         let d = i32x4::new(
             -0b10000000000000000000000000000000,
             0,
@@ -5791,15 +5876,15 @@ mod tests {
             0,
         );
 
-        assert_eq!(d, transmute(vec_cmpb(a, b)));
+        assert_eq!(d, i32x4::from(unsafe { vec_cmpb(a, b) }));
     }
 
     #[simd_test(enable = "altivec")]
-    unsafe fn test_vec_ceil() {
-        let a: vector_float = transmute(f32x4::new(0.1, 0.5, 0.6, 0.9));
+    fn test_vec_ceil() {
+        let a = vector_float::from(f32x4::new(0.1, 0.5, 0.6, 0.9));
         let d = f32x4::new(1.0, 1.0, 1.0, 1.0);
 
-        assert_eq!(d, transmute(vec_ceil(a)));
+        assert_eq!(d, f32x4::from(unsafe { vec_ceil(a) }));
     }
 
     test_vec_2! { test_vec_andc, vec_andc, i32x4,
@@ -5889,11 +5974,11 @@ mod tests {
     macro_rules! test_vec_abs {
         { $name: ident, $ty: ident, $a: expr, $d: expr } => {
             #[simd_test(enable = "altivec")]
-            unsafe fn $name() {
-                let a = vec_splats($a);
-                let a: s_t_l!($ty) = vec_abs(a);
+            fn $name() {
+                let a = unsafe { vec_splats($a) };
+                let a: s_t_l!($ty) = unsafe { vec_abs(a) };
                 let d = $ty::splat($d);
-                assert_eq!(d, transmute(a));
+                assert_eq!(d, $ty::from(a));
             }
         }
     }
@@ -5906,11 +5991,11 @@ mod tests {
     macro_rules! test_vec_abss {
         { $name: ident, $ty: ident, $a: expr, $d: expr } => {
             #[simd_test(enable = "altivec")]
-            unsafe fn $name() {
-                let a = vec_splats($a);
-                let a: s_t_l!($ty) = vec_abss(a);
+            fn $name() {
+                let a = unsafe { vec_splats($a) };
+                let a: s_t_l!($ty) = unsafe { vec_abss(a) };
                 let d = $ty::splat($d);
-                assert_eq!(d, transmute(a));
+                assert_eq!(d, $ty::from(a));
             }
         }
     }
@@ -5922,10 +6007,10 @@ mod tests {
     macro_rules! test_vec_splats {
         { $name: ident, $ty: ident, $a: expr } => {
             #[simd_test(enable = "altivec")]
-            unsafe fn $name() {
-                let a: s_t_l!($ty) = vec_splats($a);
+            fn $name() {
+                let a: s_t_l!($ty) = unsafe { vec_splats($a) };
                 let d = $ty::splat($a);
-                assert_eq!(d, transmute(a));
+                assert_eq!(d, $ty::from(a));
             }
         }
     }
@@ -5941,10 +6026,10 @@ mod tests {
     macro_rules! test_vec_splat {
         { $name: ident, $fun: ident, $ty: ident, $a: expr, $b: expr} => {
             #[simd_test(enable = "altivec")]
-            unsafe fn $name() {
-                let a = $fun::<$a>();
+            fn $name() {
+                let a = unsafe { $fun::<$a>() };
                 let d = $ty::splat($b);
-                assert_eq!(d, transmute(a));
+                assert_eq!(d, $ty::from(a));
             }
         }
     }
@@ -6036,12 +6121,12 @@ mod tests {
     macro_rules! test_vec_min {
         { $name: ident, $ty: ident, [$($a:expr),+], [$($b:expr),+], [$($d:expr),+] } => {
             #[simd_test(enable = "altivec")]
-            unsafe fn $name() {
-                let a: s_t_l!($ty) = transmute($ty::new($($a),+));
-                let b: s_t_l!($ty) = transmute($ty::new($($b),+));
+            fn $name() {
+                let a: s_t_l!($ty) = $ty::new($($a),+).into();
+                let b: s_t_l!($ty) = $ty::new($($b),+).into();
 
                 let d = $ty::new($($d),+);
-                let r : $ty = transmute(vec_min(a, b));
+                let r = $ty::from(unsafe { vec_min(a, b) });
                 assert_eq!(d, r);
             }
          }
@@ -6080,12 +6165,12 @@ mod tests {
     macro_rules! test_vec_max {
         { $name: ident, $ty: ident, [$($a:expr),+], [$($b:expr),+], [$($d:expr),+] } => {
             #[simd_test(enable = "altivec")]
-            unsafe fn $name() {
-                let a: s_t_l!($ty) = transmute($ty::new($($a),+));
-                let b: s_t_l!($ty) = transmute($ty::new($($b),+));
+            fn $name() {
+                let a: s_t_l!($ty) = $ty::new($($a),+).into();
+                let b: s_t_l!($ty) = $ty::new($($b),+).into();
 
                 let d = $ty::new($($d),+);
-                let r : $ty = transmute(vec_max(a, b));
+                let r = $ty::from(unsafe { vec_max(a, b) });
                 assert_eq!(d, r);
             }
          }
@@ -6126,13 +6211,13 @@ mod tests {
          $shorttype:ident, $longtype:ident,
          [$($a:expr),+], [$($b:expr),+], [$($c:expr),+], [$($d:expr),+]} => {
             #[simd_test(enable = "altivec")]
-            unsafe fn $name() {
-                let a: $longtype = transmute($shorttype::new($($a),+));
-                let b: $longtype = transmute($shorttype::new($($b),+));
-                let c: vector_unsigned_char = transmute(u8x16::new($($c),+));
-                let d = $shorttype::new($($d),+);
+            fn $name() {
+                let a = $longtype::from($shorttype::from_array([$($a),+]));
+                let b = $longtype::from($shorttype::from_array([$($b),+]));
+                let c = vector_unsigned_char::from(u8x16::from_array([$($c),+]));
+                let d = $shorttype::from_array([$($d),+]);
 
-                let r: $shorttype = transmute(vec_perm(a, b, c));
+                let r = $shorttype::from(unsafe { vec_perm(a, b, c) });
                 assert_eq!(d, r);
             }
         }
@@ -6212,8 +6297,8 @@ mod tests {
     [0.0, 1.0, 1.0, 1.1]}
 
     #[simd_test(enable = "altivec")]
-    unsafe fn test_vec_madds() {
-        let a: vector_signed_short = transmute(i16x8::new(
+    fn test_vec_madds() {
+        let a = vector_signed_short::from(i16x8::new(
             0 * 256,
             1 * 256,
             2 * 256,
@@ -6223,19 +6308,19 @@ mod tests {
             6 * 256,
             7 * 256,
         ));
-        let b: vector_signed_short = transmute(i16x8::new(256, 256, 256, 256, 256, 256, 256, 256));
-        let c: vector_signed_short = transmute(i16x8::new(0, 1, 2, 3, 4, 5, 6, 7));
+        let b = vector_signed_short::from(i16x8::new(256, 256, 256, 256, 256, 256, 256, 256));
+        let c = vector_signed_short::from(i16x8::new(0, 1, 2, 3, 4, 5, 6, 7));
 
         let d = i16x8::new(0, 3, 6, 9, 12, 15, 18, 21);
 
-        assert_eq!(d, transmute(vec_madds(a, b, c)));
+        assert_eq!(d, i16x8::from(unsafe { vec_madds(a, b, c) }));
     }
 
     #[simd_test(enable = "altivec")]
-    unsafe fn test_vec_madd_float() {
-        let a: vector_float = transmute(f32x4::new(0.1, 0.2, 0.3, 0.4));
-        let b: vector_float = transmute(f32x4::new(0.1, 0.2, 0.3, 0.4));
-        let c: vector_float = transmute(f32x4::new(0.1, 0.2, 0.3, 0.4));
+    fn test_vec_madd_float() {
+        let a = vector_float::from(f32x4::new(0.1, 0.2, 0.3, 0.4));
+        let b = vector_float::from(f32x4::new(0.1, 0.2, 0.3, 0.4));
+        let c = vector_float::from(f32x4::new(0.1, 0.2, 0.3, 0.4));
         let d = f32x4::new(
             0.1 * 0.1 + 0.1,
             0.2 * 0.2 + 0.2,
@@ -6243,26 +6328,26 @@ mod tests {
             0.4 * 0.4 + 0.4,
         );
 
-        assert_eq!(d, transmute(vec_madd(a, b, c)));
+        assert_eq!(d, f32x4::from(unsafe { vec_madd(a, b, c) }));
     }
 
     #[simd_test(enable = "altivec")]
-    unsafe fn test_vec_nmsub_float() {
-        let a: vector_float = transmute(f32x4::new(0.1, 0.2, 0.3, 0.4));
-        let b: vector_float = transmute(f32x4::new(0.1, 0.2, 0.3, 0.4));
-        let c: vector_float = transmute(f32x4::new(0.1, 0.2, 0.3, 0.4));
+    fn test_vec_nmsub_float() {
+        let a = vector_float::from(f32x4::new(0.1, 0.2, 0.3, 0.4));
+        let b = vector_float::from(f32x4::new(0.1, 0.2, 0.3, 0.4));
+        let c = vector_float::from(f32x4::new(0.1, 0.2, 0.3, 0.4));
         let d = f32x4::new(
             -(0.1 * 0.1 - 0.1),
             -(0.2 * 0.2 - 0.2),
             -(0.3 * 0.3 - 0.3),
             -(0.4 * 0.4 - 0.4),
         );
-        assert_eq!(d, transmute(vec_nmsub(a, b, c)));
+        assert_eq!(d, f32x4::from(unsafe { vec_nmsub(a, b, c) }));
     }
 
     #[simd_test(enable = "altivec")]
-    unsafe fn test_vec_mradds() {
-        let a: vector_signed_short = transmute(i16x8::new(
+    fn test_vec_mradds() {
+        let a = vector_signed_short::from(i16x8::new(
             0 * 256,
             1 * 256,
             2 * 256,
@@ -6272,25 +6357,25 @@ mod tests {
             6 * 256,
             7 * 256,
         ));
-        let b: vector_signed_short = transmute(i16x8::new(256, 256, 256, 256, 256, 256, 256, 256));
-        let c: vector_signed_short = transmute(i16x8::new(0, 1, 2, 3, 4, 5, 6, i16::MAX - 1));
+        let b = vector_signed_short::from(i16x8::new(256, 256, 256, 256, 256, 256, 256, 256));
+        let c = vector_signed_short::from(i16x8::new(0, 1, 2, 3, 4, 5, 6, i16::MAX - 1));
 
         let d = i16x8::new(0, 3, 6, 9, 12, 15, 18, i16::MAX);
 
-        assert_eq!(d, transmute(vec_mradds(a, b, c)));
+        assert_eq!(d, i16x8::from(unsafe { vec_mradds(a, b, c) }));
     }
 
     macro_rules! test_vec_mladd {
         {$name:ident, $sa:ident, $la:ident, $sbc:ident, $lbc:ident, $sd:ident,
             [$($a:expr),+], [$($b:expr),+], [$($c:expr),+], [$($d:expr),+]} => {
             #[simd_test(enable = "altivec")]
-            unsafe fn $name() {
-                let a: $la = transmute($sa::new($($a),+));
-                let b: $lbc = transmute($sbc::new($($b),+));
-                let c = transmute($sbc::new($($c),+));
+            fn $name() {
+                let a = $la::from($sa::new($($a),+));
+                let b = $lbc::from($sbc::new($($b),+));
+                let c = $sbc::new($($c),+).into();
                 let d = $sd::new($($d),+);
 
-                assert_eq!(d, transmute(vec_mladd(a, b, c)));
+                assert_eq!(d, $sd::from(unsafe { vec_mladd(a, b, c) }));
             }
         }
     }
@@ -6298,24 +6383,24 @@ mod tests {
     test_vec_mladd! { test_vec_mladd_u16x8_u16x8, u16x8, vector_unsigned_short, u16x8, vector_unsigned_short, u16x8,
         [0, 1, 2, 3, 4, 5, 6, 7], [0, 1, 2, 3, 4, 5, 6, 7], [0, 1, 2, 3, 4, 5, 6, 7], [0, 2, 6, 12, 20, 30, 42, 56]
     }
-    test_vec_mladd! { test_vec_mladd_u16x8_i16x8, u16x8, vector_unsigned_short, i16x8, vector_unsigned_short, i16x8,
+    test_vec_mladd! { test_vec_mladd_u16x8_i16x8, u16x8, vector_unsigned_short, i16x8, vector_signed_short, i16x8,
         [0, 1, 2, 3, 4, 5, 6, 7], [0, 1, 2, 3, 4, 5, 6, 7], [0, 1, 2, 3, 4, 5, 6, 7], [0, 2, 6, 12, 20, 30, 42, 56]
     }
     test_vec_mladd! { test_vec_mladd_i16x8_u16x8, i16x8, vector_signed_short, u16x8, vector_unsigned_short, i16x8,
         [0, 1, 2, 3, 4, 5, 6, 7], [0, 1, 2, 3, 4, 5, 6, 7], [0, 1, 2, 3, 4, 5, 6, 7], [0, 2, 6, 12, 20, 30, 42, 56]
     }
-    test_vec_mladd! { test_vec_mladd_i16x8_i16x8, i16x8, vector_signed_short, i16x8, vector_unsigned_short, i16x8,
+    test_vec_mladd! { test_vec_mladd_i16x8_i16x8, i16x8, vector_signed_short, i16x8, vector_signed_short, i16x8,
         [0, 1, 2, 3, 4, 5, 6, 7], [0, 1, 2, 3, 4, 5, 6, 7], [0, 1, 2, 3, 4, 5, 6, 7], [0, 2, 6, 12, 20, 30, 42, 56]
     }
 
     #[simd_test(enable = "altivec")]
-    unsafe fn test_vec_msum_unsigned_char() {
-        let a: vector_unsigned_char =
-            transmute(u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7));
-        let b: vector_unsigned_char = transmute(u8x16::new(
+    fn test_vec_msum_unsigned_char() {
+        let a =
+            vector_unsigned_char::from(u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7));
+        let b = vector_unsigned_char::from(u8x16::new(
             255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
         ));
-        let c: vector_unsigned_int = transmute(u32x4::new(0, 1, 2, 3));
+        let c = vector_unsigned_int::from(u32x4::new(0, 1, 2, 3));
         let d = u32x4::new(
             (0 + 1 + 2 + 3) * 255 + 0,
             (4 + 5 + 6 + 7) * 255 + 1,
@@ -6323,17 +6408,17 @@ mod tests {
             (4 + 5 + 6 + 7) * 255 + 3,
         );
 
-        assert_eq!(d, transmute(vec_msum(a, b, c)));
+        assert_eq!(d, u32x4::from(unsafe { vec_msum(a, b, c) }));
     }
 
     #[simd_test(enable = "altivec")]
-    unsafe fn test_vec_msum_signed_char() {
-        let a: vector_signed_char = transmute(i8x16::new(
+    fn test_vec_msum_signed_char() {
+        let a = vector_signed_char::from(i8x16::new(
             0, -1, 2, -3, 1, -1, 1, -1, 0, 1, 2, 3, 4, -5, -6, -7,
         ));
-        let b: vector_unsigned_char =
-            transmute(i8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1));
-        let c: vector_signed_int = transmute(u32x4::new(0, 1, 2, 3));
+        let b =
+            vector_unsigned_char::from(u8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1));
+        let c = vector_signed_int::from(i32x4::new(0, 1, 2, 3));
         let d = i32x4::new(
             (0 - 1 + 2 - 3) + 0,
             (0) + 1,
@@ -6341,11 +6426,12 @@ mod tests {
             (4 - 5 - 6 - 7) + 3,
         );
 
-        assert_eq!(d, transmute(vec_msum(a, b, c)));
+        assert_eq!(d, i32x4::from(unsafe { vec_msum(a, b, c) }));
     }
+
     #[simd_test(enable = "altivec")]
-    unsafe fn test_vec_msum_unsigned_short() {
-        let a: vector_unsigned_short = transmute(u16x8::new(
+    fn test_vec_msum_unsigned_short() {
+        let a = vector_unsigned_short::from(u16x8::new(
             0 * 256,
             1 * 256,
             2 * 256,
@@ -6355,9 +6441,8 @@ mod tests {
             6 * 256,
             7 * 256,
         ));
-        let b: vector_unsigned_short =
-            transmute(u16x8::new(256, 256, 256, 256, 256, 256, 256, 256));
-        let c: vector_unsigned_int = transmute(u32x4::new(0, 1, 2, 3));
+        let b = vector_unsigned_short::from(u16x8::new(256, 256, 256, 256, 256, 256, 256, 256));
+        let c = vector_unsigned_int::from(u32x4::new(0, 1, 2, 3));
         let d = u32x4::new(
             (0 + 1) * 256 * 256 + 0,
             (2 + 3) * 256 * 256 + 1,
@@ -6365,12 +6450,12 @@ mod tests {
             (6 + 7) * 256 * 256 + 3,
         );
 
-        assert_eq!(d, transmute(vec_msum(a, b, c)));
+        assert_eq!(d, u32x4::from(unsafe { vec_msum(a, b, c) }));
     }
 
     #[simd_test(enable = "altivec")]
-    unsafe fn test_vec_msum_signed_short() {
-        let a: vector_signed_short = transmute(i16x8::new(
+    fn test_vec_msum_signed_short() {
+        let a = vector_signed_short::from(i16x8::new(
             0 * 256,
             -1 * 256,
             2 * 256,
@@ -6380,8 +6465,8 @@ mod tests {
             6 * 256,
             -7 * 256,
         ));
-        let b: vector_signed_short = transmute(i16x8::new(256, 256, 256, 256, 256, 256, 256, 256));
-        let c: vector_signed_int = transmute(i32x4::new(0, 1, 2, 3));
+        let b = vector_signed_short::from(i16x8::new(256, 256, 256, 256, 256, 256, 256, 256));
+        let c = vector_signed_int::from(i32x4::new(0, 1, 2, 3));
         let d = i32x4::new(
             (0 - 1) * 256 * 256 + 0,
             (2 - 3) * 256 * 256 + 1,
@@ -6389,12 +6474,12 @@ mod tests {
             (6 - 7) * 256 * 256 + 3,
         );
 
-        assert_eq!(d, transmute(vec_msum(a, b, c)));
+        assert_eq!(d, i32x4::from(unsafe { vec_msum(a, b, c) }));
     }
 
     #[simd_test(enable = "altivec")]
-    unsafe fn test_vec_msums_unsigned() {
-        let a: vector_unsigned_short = transmute(u16x8::new(
+    fn test_vec_msums_unsigned() {
+        let a = vector_unsigned_short::from(u16x8::new(
             0 * 256,
             1 * 256,
             2 * 256,
@@ -6404,9 +6489,8 @@ mod tests {
             6 * 256,
             7 * 256,
         ));
-        let b: vector_unsigned_short =
-            transmute(u16x8::new(256, 256, 256, 256, 256, 256, 256, 256));
-        let c: vector_unsigned_int = transmute(u32x4::new(0, 1, 2, 3));
+        let b = vector_unsigned_short::from(u16x8::new(256, 256, 256, 256, 256, 256, 256, 256));
+        let c = vector_unsigned_int::from(u32x4::new(0, 1, 2, 3));
         let d = u32x4::new(
             (0 + 1) * 256 * 256 + 0,
             (2 + 3) * 256 * 256 + 1,
@@ -6414,12 +6498,12 @@ mod tests {
             (6 + 7) * 256 * 256 + 3,
         );
 
-        assert_eq!(d, transmute(vec_msums(a, b, c)));
+        assert_eq!(d, u32x4::from(unsafe { vec_msums(a, b, c) }));
     }
 
     #[simd_test(enable = "altivec")]
-    unsafe fn test_vec_msums_signed() {
-        let a: vector_signed_short = transmute(i16x8::new(
+    fn test_vec_msums_signed() {
+        let a = vector_signed_short::from(i16x8::new(
             0 * 256,
             -1 * 256,
             2 * 256,
@@ -6429,8 +6513,8 @@ mod tests {
             6 * 256,
             -7 * 256,
         ));
-        let b: vector_signed_short = transmute(i16x8::new(256, 256, 256, 256, 256, 256, 256, 256));
-        let c: vector_signed_int = transmute(i32x4::new(0, 1, 2, 3));
+        let b = vector_signed_short::from(i16x8::new(256, 256, 256, 256, 256, 256, 256, 256));
+        let c = vector_signed_int::from(i32x4::new(0, 1, 2, 3));
         let d = i32x4::new(
             (0 - 1) * 256 * 256 + 0,
             (2 - 3) * 256 * 256 + 1,
@@ -6438,23 +6522,23 @@ mod tests {
             (6 - 7) * 256 * 256 + 3,
         );
 
-        assert_eq!(d, transmute(vec_msums(a, b, c)));
+        assert_eq!(d, i32x4::from(unsafe { vec_msums(a, b, c) }));
     }
 
     #[simd_test(enable = "altivec")]
-    unsafe fn test_vec_sum2s() {
-        let a: vector_signed_int = transmute(i32x4::new(0, 1, 2, 3));
-        let b: vector_signed_int = transmute(i32x4::new(0, 1, 2, 3));
+    fn test_vec_sum2s() {
+        let a = vector_signed_int::from(i32x4::new(0, 1, 2, 3));
+        let b = vector_signed_int::from(i32x4::new(0, 1, 2, 3));
         let d = i32x4::new(0, 0 + 1 + 1, 0, 2 + 3 + 3);
 
-        assert_eq!(d, transmute(vec_sum2s(a, b)));
+        assert_eq!(d, i32x4::from(unsafe { vec_sum2s(a, b) }));
     }
 
     #[simd_test(enable = "altivec")]
-    unsafe fn test_vec_sum4s_unsigned_char() {
-        let a: vector_unsigned_char =
-            transmute(u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7));
-        let b: vector_unsigned_int = transmute(u32x4::new(0, 1, 2, 3));
+    fn test_vec_sum4s_unsigned_char() {
+        let a =
+            vector_unsigned_char::from(u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7));
+        let b = vector_unsigned_int::from(u32x4::new(0, 1, 2, 3));
         let d = u32x4::new(
             0 + 1 + 2 + 3 + 0,
             4 + 5 + 6 + 7 + 1,
@@ -6462,13 +6546,13 @@ mod tests {
             4 + 5 + 6 + 7 + 3,
         );
 
-        assert_eq!(d, transmute(vec_sum4s(a, b)));
+        assert_eq!(d, u32x4::from(unsafe { vec_sum4s(a, b) }));
     }
     #[simd_test(enable = "altivec")]
-    unsafe fn test_vec_sum4s_signed_char() {
-        let a: vector_signed_char =
-            transmute(i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7));
-        let b: vector_signed_int = transmute(i32x4::new(0, 1, 2, 3));
+    fn test_vec_sum4s_signed_char() {
+        let a =
+            vector_signed_char::from(i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7));
+        let b = vector_signed_int::from(i32x4::new(0, 1, 2, 3));
         let d = i32x4::new(
             0 + 1 + 2 + 3 + 0,
             4 + 5 + 6 + 7 + 1,
@@ -6476,110 +6560,111 @@ mod tests {
             4 + 5 + 6 + 7 + 3,
         );
 
-        assert_eq!(d, transmute(vec_sum4s(a, b)));
+        assert_eq!(d, i32x4::from(unsafe { vec_sum4s(a, b) }));
     }
     #[simd_test(enable = "altivec")]
-    unsafe fn test_vec_sum4s_signed_short() {
-        let a: vector_signed_short = transmute(i16x8::new(0, 1, 2, 3, 4, 5, 6, 7));
-        let b: vector_signed_int = transmute(i32x4::new(0, 1, 2, 3));
+    fn test_vec_sum4s_signed_short() {
+        let a = vector_signed_short::from(i16x8::new(0, 1, 2, 3, 4, 5, 6, 7));
+        let b = vector_signed_int::from(i32x4::new(0, 1, 2, 3));
         let d = i32x4::new(0 + 1 + 0, 2 + 3 + 1, 4 + 5 + 2, 6 + 7 + 3);
 
-        assert_eq!(d, transmute(vec_sum4s(a, b)));
+        assert_eq!(d, i32x4::from(unsafe { vec_sum4s(a, b) }));
     }
 
     #[simd_test(enable = "altivec")]
-    unsafe fn test_vec_mule_unsigned_char() {
-        let a: vector_unsigned_char =
-            transmute(u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7));
+    fn test_vec_mule_unsigned_char() {
+        let a =
+            vector_unsigned_char::from(u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7));
         let d = u16x8::new(0 * 0, 2 * 2, 4 * 4, 6 * 6, 0 * 0, 2 * 2, 4 * 4, 6 * 6);
 
-        assert_eq!(d, transmute(vec_mule(a, a)));
+        assert_eq!(d, u16x8::from(unsafe { vec_mule(a, a) }));
     }
 
     #[simd_test(enable = "altivec")]
-    unsafe fn test_vec_mule_signed_char() {
-        let a: vector_signed_char = transmute(i8x16::new(
+    fn test_vec_mule_signed_char() {
+        let a = vector_signed_char::from(i8x16::new(
             0, 1, -2, 3, -4, 5, -6, 7, 0, 1, 2, 3, 4, 5, 6, 7,
         ));
         let d = i16x8::new(0 * 0, 2 * 2, 4 * 4, 6 * 6, 0 * 0, 2 * 2, 4 * 4, 6 * 6);
 
-        assert_eq!(d, transmute(vec_mule(a, a)));
+        assert_eq!(d, i16x8::from(unsafe { vec_mule(a, a) }));
     }
 
     #[simd_test(enable = "altivec")]
-    unsafe fn test_vec_mule_unsigned_short() {
-        let a: vector_unsigned_short = transmute(u16x8::new(0, 1, 2, 3, 4, 5, 6, 7));
+    fn test_vec_mule_unsigned_short() {
+        let a = vector_unsigned_short::from(u16x8::new(0, 1, 2, 3, 4, 5, 6, 7));
         let d = u32x4::new(0 * 0, 2 * 2, 4 * 4, 6 * 6);
 
-        assert_eq!(d, transmute(vec_mule(a, a)));
+        assert_eq!(d, u32x4::from(unsafe { vec_mule(a, a) }));
     }
 
     #[simd_test(enable = "altivec")]
-    unsafe fn test_vec_mule_signed_short() {
-        let a: vector_signed_short = transmute(i16x8::new(0, 1, -2, 3, -4, 5, -6, 7));
+    fn test_vec_mule_signed_short() {
+        let a = vector_signed_short::from(i16x8::new(0, 1, -2, 3, -4, 5, -6, 7));
         let d = i32x4::new(0 * 0, 2 * 2, 4 * 4, 6 * 6);
 
-        assert_eq!(d, transmute(vec_mule(a, a)));
+        assert_eq!(d, i32x4::from(unsafe { vec_mule(a, a) }));
     }
 
     #[simd_test(enable = "altivec")]
-    unsafe fn test_vec_mulo_unsigned_char() {
-        let a: vector_unsigned_char =
-            transmute(u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7));
+    fn test_vec_mulo_unsigned_char() {
+        let a =
+            vector_unsigned_char::from(u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7));
         let d = u16x8::new(1 * 1, 3 * 3, 5 * 5, 7 * 7, 1 * 1, 3 * 3, 5 * 5, 7 * 7);
 
-        assert_eq!(d, transmute(vec_mulo(a, a)));
+        assert_eq!(d, u16x8::from(unsafe { vec_mulo(a, a) }));
     }
 
     #[simd_test(enable = "altivec")]
-    unsafe fn test_vec_mulo_signed_char() {
-        let a: vector_signed_char = transmute(i8x16::new(
+    fn test_vec_mulo_signed_char() {
+        let a = vector_signed_char::from(i8x16::new(
             0, 1, -2, 3, -4, 5, -6, 7, 0, 1, 2, 3, 4, 5, 6, 7,
         ));
         let d = i16x8::new(1 * 1, 3 * 3, 5 * 5, 7 * 7, 1 * 1, 3 * 3, 5 * 5, 7 * 7);
 
-        assert_eq!(d, transmute(vec_mulo(a, a)));
+        assert_eq!(d, i16x8::from(unsafe { vec_mulo(a, a) }));
     }
 
     #[simd_test(enable = "altivec")]
-    unsafe fn test_vec_mulo_unsigned_short() {
-        let a: vector_unsigned_short = transmute(u16x8::new(0, 1, 2, 3, 4, 5, 6, 7));
+    fn test_vec_mulo_unsigned_short() {
+        let a = vector_unsigned_short::from(u16x8::new(0, 1, 2, 3, 4, 5, 6, 7));
         let d = u32x4::new(1 * 1, 3 * 3, 5 * 5, 7 * 7);
 
-        assert_eq!(d, transmute(vec_mulo(a, a)));
+        assert_eq!(d, u32x4::from(unsafe { vec_mulo(a, a) }));
     }
 
     #[simd_test(enable = "altivec")]
-    unsafe fn test_vec_mulo_signed_short() {
-        let a: vector_signed_short = transmute(i16x8::new(0, 1, -2, 3, -4, 5, -6, 7));
+    fn test_vec_mulo_signed_short() {
+        let a = vector_signed_short::from(i16x8::new(0, 1, -2, 3, -4, 5, -6, 7));
         let d = i32x4::new(1 * 1, 3 * 3, 5 * 5, 7 * 7);
 
-        assert_eq!(d, transmute(vec_mulo(a, a)));
+        assert_eq!(d, i32x4::from(unsafe { vec_mulo(a, a) }));
     }
 
     #[simd_test(enable = "altivec")]
-    unsafe fn vec_add_i32x4_i32x4() {
+    fn vec_add_i32x4_i32x4() {
         let x = i32x4::new(1, 2, 3, 4);
         let y = i32x4::new(4, 3, 2, 1);
-        let x: vector_signed_int = transmute(x);
-        let y: vector_signed_int = transmute(y);
-        let z = vec_add(x, y);
-        assert_eq!(i32x4::splat(5), transmute(z));
+        let x = vector_signed_int::from(x);
+        let y = vector_signed_int::from(y);
+        let z = unsafe { vec_add(x, y) };
+        assert_eq!(i32x4::splat(5), i32x4::from(z));
     }
 
     #[simd_test(enable = "altivec")]
-    unsafe fn vec_ctf_u32() {
-        let v: vector_unsigned_int = transmute(u32x4::new(u32::MIN, u32::MAX, u32::MAX, 42));
-        let v2 = vec_ctf::<1, _>(v);
-        let r2: vector_float = transmute(f32x4::new(0.0, 2147483600.0, 2147483600.0, 21.0));
-        let v4 = vec_ctf::<2, _>(v);
-        let r4: vector_float = transmute(f32x4::new(0.0, 1073741800.0, 1073741800.0, 10.5));
-        let v8 = vec_ctf::<3, _>(v);
-        let r8: vector_float = transmute(f32x4::new(0.0, 536870900.0, 536870900.0, 5.25));
+    fn vec_ctf_u32() {
+        let v = vector_unsigned_int::from(u32x4::new(u32::MIN, u32::MAX, u32::MAX, 42));
+        let v2 = unsafe { vec_ctf::<1, _>(v) };
+        let r2 = vector_float::from(f32x4::new(0.0, 2147483600.0, 2147483600.0, 21.0));
+        let v4 = unsafe { vec_ctf::<2, _>(v) };
+        let r4 = vector_float::from(f32x4::new(0.0, 1073741800.0, 1073741800.0, 10.5));
+        let v8 = unsafe { vec_ctf::<3, _>(v) };
+        let r8 = vector_float::from(f32x4::new(0.0, 536870900.0, 536870900.0, 5.25));
 
         let check = |a, b| {
-            let r = transmute(vec_cmple(vec_abs(vec_sub(a, b)), vec_splats(f32::EPSILON)));
-            let e = m32x4::new(true, true, true, true);
+            let r =
+                m32x4::from(unsafe { vec_cmple(vec_abs(vec_sub(a, b)), vec_splats(f32::EPSILON)) });
+            let e = m32x4::splat(true);
             assert_eq!(e, r);
         };
 
@@ -6589,26 +6674,32 @@ mod tests {
     }
 
     #[simd_test(enable = "altivec")]
-    unsafe fn test_vec_ctu() {
+    fn test_vec_ctu() {
         let v = u32x4::new(u32::MIN, u32::MAX, u32::MAX, 42);
-        let v2: u32x4 = transmute(vec_ctu::<1>(transmute(f32x4::new(
-            0.0,
-            2147483600.0,
-            2147483600.0,
-            21.0,
-        ))));
-        let v4: u32x4 = transmute(vec_ctu::<2>(transmute(f32x4::new(
-            0.0,
-            1073741800.0,
-            1073741800.0,
-            10.5,
-        ))));
-        let v8: u32x4 = transmute(vec_ctu::<3>(transmute(f32x4::new(
-            0.0,
-            536870900.0,
-            536870900.0,
-            5.25,
-        ))));
+        let v2 = u32x4::from(unsafe {
+            vec_ctu::<1>(vector_float::from(f32x4::new(
+                0.0,
+                2147483600.0,
+                2147483600.0,
+                21.0,
+            )))
+        });
+        let v4 = u32x4::from(unsafe {
+            vec_ctu::<2>(vector_float::from(f32x4::new(
+                0.0,
+                1073741800.0,
+                1073741800.0,
+                10.5,
+            )))
+        });
+        let v8 = u32x4::from(unsafe {
+            vec_ctu::<3>(vector_float::from(f32x4::new(
+                0.0,
+                536870900.0,
+                536870900.0,
+                5.25,
+            )))
+        });
 
         assert_eq!(v2, v);
         assert_eq!(v4, v);
@@ -6616,20 +6707,20 @@ mod tests {
     }
 
     #[simd_test(enable = "altivec")]
-    unsafe fn vec_ctf_i32() {
-        let v: vector_signed_int = transmute(i32x4::new(i32::MIN, i32::MAX, i32::MAX - 42, 42));
-        let v2 = vec_ctf::<1, _>(v);
-        let r2: vector_float =
-            transmute(f32x4::new(-1073741800.0, 1073741800.0, 1073741800.0, 21.0));
-        let v4 = vec_ctf::<2, _>(v);
-        let r4: vector_float = transmute(f32x4::new(-536870900.0, 536870900.0, 536870900.0, 10.5));
-        let v8 = vec_ctf::<3, _>(v);
-        let r8: vector_float = transmute(f32x4::new(-268435460.0, 268435460.0, 268435460.0, 5.25));
+    fn vec_ctf_i32() {
+        let v = vector_signed_int::from(i32x4::new(i32::MIN, i32::MAX, i32::MAX - 42, 42));
+        let v2 = unsafe { vec_ctf::<1, _>(v) };
+        let r2 = vector_float::from(f32x4::new(-1073741800.0, 1073741800.0, 1073741800.0, 21.0));
+        let v4 = unsafe { vec_ctf::<2, _>(v) };
+        let r4 = vector_float::from(f32x4::new(-536870900.0, 536870900.0, 536870900.0, 10.5));
+        let v8 = unsafe { vec_ctf::<3, _>(v) };
+        let r8 = vector_float::from(f32x4::new(-268435460.0, 268435460.0, 268435460.0, 5.25));
 
         let check = |a, b| {
-            let r = transmute(vec_cmple(vec_abs(vec_sub(a, b)), vec_splats(f32::EPSILON)));
+            let r =
+                m32x4::from(unsafe { vec_cmple(vec_abs(vec_sub(a, b)), vec_splats(f32::EPSILON)) });
             println!("{:?} {:?}", a, b);
-            let e = m32x4::new(true, true, true, true);
+            let e = m32x4::splat(true);
             assert_eq!(e, r);
         };
 
@@ -6639,26 +6730,32 @@ mod tests {
     }
 
     #[simd_test(enable = "altivec")]
-    unsafe fn test_vec_cts() {
+    fn test_vec_cts() {
         let v = i32x4::new(i32::MIN, i32::MAX, i32::MAX, 42);
-        let v2: i32x4 = transmute(vec_cts::<1>(transmute(f32x4::new(
-            -1073741800.0,
-            1073741800.0,
-            1073741800.0,
-            21.0,
-        ))));
-        let v4: i32x4 = transmute(vec_cts::<2>(transmute(f32x4::new(
-            -536870900.0,
-            536870900.0,
-            536870900.0,
-            10.5,
-        ))));
-        let v8: i32x4 = transmute(vec_cts::<3>(transmute(f32x4::new(
-            -268435460.0,
-            268435460.0,
-            268435460.0,
-            5.25,
-        ))));
+        let v2 = i32x4::from(unsafe {
+            vec_cts::<1>(transmute(f32x4::new(
+                -1073741800.0,
+                1073741800.0,
+                1073741800.0,
+                21.0,
+            )))
+        });
+        let v4 = i32x4::from(unsafe {
+            vec_cts::<2>(transmute(f32x4::new(
+                -536870900.0,
+                536870900.0,
+                536870900.0,
+                10.5,
+            )))
+        });
+        let v8 = i32x4::from(unsafe {
+            vec_cts::<3>(transmute(f32x4::new(
+                -268435460.0,
+                268435460.0,
+                268435460.0,
+                5.25,
+            )))
+        });
 
         assert_eq!(v2, v);
         assert_eq!(v4, v);
diff --git a/crates/core_arch/src/powerpc/macros.rs b/crates/core_arch/src/powerpc/macros.rs
index 24d86f1018..f697d4d257 100644
--- a/crates/core_arch/src/powerpc/macros.rs
+++ b/crates/core_arch/src/powerpc/macros.rs
@@ -274,40 +274,6 @@ macro_rules! t_b {
     };
 }
 
-macro_rules! impl_from {
-    ($s: ident) => {
-        #[unstable(feature = "stdarch_powerpc", issue = "111145")]
-        impl From<$s> for s_t_l!($s) {
-            #[inline]
-            fn from (v: $s) -> Self {
-                unsafe {
-                    transmute(v)
-                }
-            }
-        }
-    };
-    ($($s: ident),*) => {
-        $(
-            impl_from! { $s }
-        )*
-    };
-}
-
-macro_rules! impl_neg {
-    ($s: ident : $zero: expr) => {
-        #[unstable(feature = "stdarch_powerpc", issue = "111145")]
-        impl crate::ops::Neg for s_t_l!($s) {
-            type Output = s_t_l!($s);
-            #[inline]
-            fn neg(self) -> Self::Output {
-                unsafe { simd_neg(self) }
-            }
-        }
-    };
-}
-
-pub(crate) use impl_from;
-pub(crate) use impl_neg;
 pub(crate) use impl_vec_trait;
 pub(crate) use s_t_l;
 pub(crate) use t_b;
diff --git a/crates/core_arch/src/powerpc/vsx.rs b/crates/core_arch/src/powerpc/vsx.rs
index ca9fcaabe8..4a7b561a20 100644
--- a/crates/core_arch/src/powerpc/vsx.rs
+++ b/crates/core_arch/src/powerpc/vsx.rs
@@ -9,6 +9,7 @@
 #![allow(non_camel_case_types)]
 
 use crate::core_arch::powerpc::*;
+use crate::core_arch::simd::*;
 
 #[cfg(test)]
 use stdarch_test::assert_instr;
@@ -34,6 +35,22 @@ types! {
     // pub struct vector_unsigned___int128 = i128x1;
 }
 
+#[unstable(feature = "stdarch_powerpc", issue = "111145")]
+impl From<m64x2> for vector_bool_long {
+    #[inline]
+    fn from(value: m64x2) -> Self {
+        unsafe { transmute(value) }
+    }
+}
+
+#[unstable(feature = "stdarch_powerpc", issue = "111145")]
+impl From<vector_bool_long> for m64x2 {
+    #[inline]
+    fn from(value: vector_bool_long) -> Self {
+        unsafe { transmute(value) }
+    }
+}
+
 #[allow(improper_ctypes)]
 unsafe extern "C" {
     #[link_name = "llvm.ppc.altivec.vperm"]
@@ -46,7 +63,6 @@ unsafe extern "C" {
 
 mod sealed {
     use super::*;
-    use crate::core_arch::simd::*;
 
     #[unstable(feature = "stdarch_powerpc", issue = "111145")]
     pub trait VectorPermDI {
@@ -221,14 +237,16 @@ mod tests {
     macro_rules! test_vec_xxpermdi {
         {$name:ident, $shorttype:ident, $longtype:ident, [$($a:expr),+], [$($b:expr),+], [$($c:expr),+], [$($d:expr),+]} => {
             #[simd_test(enable = "vsx")]
-            unsafe fn $name() {
-                let a: $longtype = transmute($shorttype::new($($a),+, $($b),+));
-                let b = transmute($shorttype::new($($c),+, $($d),+));
-
-                assert_eq!($shorttype::new($($a),+, $($c),+), transmute(vec_xxpermdi::<_, 0>(a, b)));
-                assert_eq!($shorttype::new($($b),+, $($c),+), transmute(vec_xxpermdi::<_, 1>(a, b)));
-                assert_eq!($shorttype::new($($a),+, $($d),+), transmute(vec_xxpermdi::<_, 2>(a, b)));
-                assert_eq!($shorttype::new($($b),+, $($d),+), transmute(vec_xxpermdi::<_, 3>(a, b)));
+            fn $name() {
+                let a = $longtype::from($shorttype::from_array([$($a),+, $($b),+]));
+                let b = $longtype::from($shorttype::from_array([$($c),+, $($d),+]));
+
+                unsafe {
+                    assert_eq!($shorttype::from_array([$($a),+, $($c),+]), $shorttype::from(vec_xxpermdi::<_, 0>(a, b)));
+                    assert_eq!($shorttype::from_array([$($b),+, $($c),+]), $shorttype::from(vec_xxpermdi::<_, 1>(a, b)));
+                    assert_eq!($shorttype::from_array([$($a),+, $($d),+]), $shorttype::from(vec_xxpermdi::<_, 2>(a, b)));
+                    assert_eq!($shorttype::from_array([$($b),+, $($d),+]), $shorttype::from(vec_xxpermdi::<_, 3>(a, b)));
+                }
             }
         }
     }
diff --git a/crates/core_arch/src/riscv64/zk.rs b/crates/core_arch/src/riscv64/zk.rs
index a30653cbe0..3aa1ca39b3 100644
--- a/crates/core_arch/src/riscv64/zk.rs
+++ b/crates/core_arch/src/riscv64/zk.rs
@@ -1,6 +1,8 @@
 #[cfg(test)]
 use stdarch_test::assert_instr;
 
+use crate::arch::asm;
+
 unsafe extern "unadjusted" {
     #[link_name = "llvm.riscv.aes64es"]
     fn _aes64es(rs1: i64, rs2: i64) -> i64;
@@ -14,12 +16,6 @@ unsafe extern "unadjusted" {
     #[link_name = "llvm.riscv.aes64dsm"]
     fn _aes64dsm(rs1: i64, rs2: i64) -> i64;
 
-    #[link_name = "llvm.riscv.aes64ks1i"]
-    fn _aes64ks1i(rs1: i64, rnum: i32) -> i64;
-
-    #[link_name = "llvm.riscv.aes64ks2"]
-    fn _aes64ks2(rs1: i64, rs2: i64) -> i64;
-
     #[link_name = "llvm.riscv.aes64im"]
     fn _aes64im(rs1: i64) -> i64;
 
@@ -133,15 +129,26 @@ pub fn aes64dsm(rs1: u64, rs2: u64) -> u64 {
 /// # Note
 ///
 /// The `RNUM` parameter is expected to be a constant value inside the range of `0..=10`.
-#[target_feature(enable = "zkne", enable = "zknd")]
+#[target_feature(enable = "zkne_or_zknd")]
 #[rustc_legacy_const_generics(1)]
-#[cfg_attr(test, assert_instr(aes64ks1i, RNUM = 0))]
 #[inline]
 #[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
 pub fn aes64ks1i<const RNUM: u8>(rs1: u64) -> u64 {
     static_assert!(RNUM <= 10);
-
-    unsafe { _aes64ks1i(rs1 as i64, RNUM as i32) as u64 }
+    unsafe {
+        let rd: u64;
+        asm!(
+            ".option push",
+            ".option arch, +zkne",
+            "aes64ks1i {}, {}, {}",
+            ".option pop",
+            lateout(reg) rd,
+            in(reg) rs1,
+            const RNUM,
+            options(pure, nomem, nostack, preserves_flags)
+        );
+        rd
+    }
 }
 
 /// This instruction implements part of the KeySchedule operation for the AES Block cipher.
@@ -155,12 +162,24 @@ pub fn aes64ks1i<const RNUM: u8>(rs1: u64) -> u64 {
 /// Version: v1.0.1
 ///
 /// Section: 3.11
-#[target_feature(enable = "zkne", enable = "zknd")]
-#[cfg_attr(test, assert_instr(aes64ks2))]
+#[target_feature(enable = "zkne_or_zknd")]
 #[inline]
 #[unstable(feature = "riscv_ext_intrinsics", issue = "114544")]
 pub fn aes64ks2(rs1: u64, rs2: u64) -> u64 {
-    unsafe { _aes64ks2(rs1 as i64, rs2 as i64) as u64 }
+    unsafe {
+        let rd: u64;
+        asm!(
+            ".option push",
+            ".option arch, +zkne",
+            "aes64ks2 {}, {}, {}",
+            ".option pop",
+            lateout(reg) rd,
+            in(reg) rs1,
+            in(reg) rs2,
+            options(pure, nomem, nostack, preserves_flags)
+        );
+        rd
+    }
 }
 
 /// This instruction accelerates the inverse MixColumns step of the AES Block Cipher, and is used to aid creation of
diff --git a/crates/core_arch/src/s390x/macros.rs b/crates/core_arch/src/s390x/macros.rs
index 26afbaa45a..c47f242948 100644
--- a/crates/core_arch/src/s390x/macros.rs
+++ b/crates/core_arch/src/s390x/macros.rs
@@ -431,40 +431,6 @@ macro_rules! t_b {
     };
 }
 
-macro_rules! impl_from {
-    ($s: ident) => {
-        #[unstable(feature = "stdarch_s390x", issue = "135681")]
-        impl From<$s> for s_t_l!($s) {
-            #[inline]
-            fn from (v: $s) -> Self {
-                unsafe {
-                    transmute(v)
-                }
-            }
-        }
-    };
-    ($($s: ident),*) => {
-        $(
-            impl_from! { $s }
-        )*
-    };
-}
-
-macro_rules! impl_neg {
-    ($s: ident : $zero: expr) => {
-        #[unstable(feature = "stdarch_s390x", issue = "135681")]
-        impl crate::ops::Neg for s_t_l!($s) {
-            type Output = s_t_l!($s);
-            #[inline]
-            fn neg(self) -> Self::Output {
-                unsafe { simd_neg(self) }
-            }
-        }
-    };
-}
-
-pub(crate) use impl_from;
-pub(crate) use impl_neg;
 pub(crate) use impl_vec_trait;
 pub(crate) use l_t_t;
 pub(crate) use s_t_l;
diff --git a/crates/core_arch/src/s390x/mod.rs b/crates/core_arch/src/s390x/mod.rs
index 7d3b3f2d99..5b85020072 100644
--- a/crates/core_arch/src/s390x/mod.rs
+++ b/crates/core_arch/src/s390x/mod.rs
@@ -2,6 +2,11 @@
 
 pub(crate) mod macros;
 
+/// the float and vector registers overlap therefore we cannot use any vector
+/// extensions if softfloat is enabled.
+
+#[cfg(not(target_abi = "softfloat"))]
 mod vector;
+#[cfg(not(target_abi = "softfloat"))]
 #[unstable(feature = "stdarch_s390x", issue = "130869")]
 pub use self::vector::*;
diff --git a/crates/core_arch/src/s390x/vector.rs b/crates/core_arch/src/s390x/vector.rs
index 7208105fb8..fc5af1b14d 100644
--- a/crates/core_arch/src/s390x/vector.rs
+++ b/crates/core_arch/src/s390x/vector.rs
@@ -51,6 +51,54 @@ types! {
     pub struct vector_double(2 x f64);
 }
 
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+impl From<m8x16> for vector_bool_char {
+    #[inline]
+    fn from(value: m8x16) -> Self {
+        unsafe { transmute(value) }
+    }
+}
+
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+impl From<vector_bool_char> for m8x16 {
+    #[inline]
+    fn from(value: vector_bool_char) -> Self {
+        unsafe { transmute(value) }
+    }
+}
+
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+impl From<m16x8> for vector_bool_short {
+    #[inline]
+    fn from(value: m16x8) -> Self {
+        unsafe { transmute(value) }
+    }
+}
+
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+impl From<vector_bool_short> for m16x8 {
+    #[inline]
+    fn from(value: vector_bool_short) -> Self {
+        unsafe { transmute(value) }
+    }
+}
+
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+impl From<m32x4> for vector_bool_int {
+    #[inline]
+    fn from(value: m32x4) -> Self {
+        unsafe { transmute(value) }
+    }
+}
+
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+impl From<vector_bool_int> for m32x4 {
+    #[inline]
+    fn from(value: vector_bool_int) -> Self {
+        unsafe { transmute(value) }
+    }
+}
+
 #[repr(C, packed)]
 struct PackedTuple<T, U> {
     x: T,
@@ -281,16 +329,27 @@ unsafe extern "unadjusted" {
     #[link_name = "llvm.s390.vfenezbs"] fn vfenezbs(a: i8x16, b: i8x16) -> PackedTuple<i8x16, i32>;
     #[link_name = "llvm.s390.vfenezhs"] fn vfenezhs(a: i16x8, b: i16x8) -> PackedTuple<i16x8, i32>;
     #[link_name = "llvm.s390.vfenezfs"] fn vfenezfs(a: i32x4, b: i32x4) -> PackedTuple<i32x4, i32>;
-}
-
-impl_from! { i8x16, u8x16,  i16x8, u16x8, i32x4, u32x4, i64x2, u64x2, f32x4, f64x2 }
 
-impl_neg! { i8x16 : 0 }
-impl_neg! { i16x8 : 0 }
-impl_neg! { i32x4 : 0 }
-impl_neg! { i64x2 : 0 }
-impl_neg! { f32x4 : 0f32 }
-impl_neg! { f64x2 : 0f64 }
+    #[link_name = "llvm.s390.vclfnhs"] fn vclfnhs(a: vector_signed_short, immarg: i32) -> vector_float;
+    #[link_name = "llvm.s390.vclfnls"] fn vclfnls(a: vector_signed_short, immarg: i32) -> vector_float;
+    #[link_name = "llvm.s390.vcfn"] fn vcfn(a: vector_signed_short, immarg: i32) -> vector_signed_short;
+    #[link_name = "llvm.s390.vcnf"] fn vcnf(a: vector_signed_short, immarg: i32) -> vector_signed_short;
+    #[link_name = "llvm.s390.vcrnfs"] fn vcrnfs(a: vector_float, b: vector_float, immarg: i32) -> vector_signed_short;
+
+    // These are the intrinsics we'd like to use (with mode 0). However, they require
+    // "vector-enhancements-1" and don't have a fallback, whereas `vec_min`/`vec_max` should be
+    // available with just "vector". Therefore, we cannot use them.
+    // #[link_name = "llvm.s390.vfmaxsb"] fn vfmaxsb(a: vector_float, b: vector_float, mode: i32) -> vector_float;
+    // #[link_name = "llvm.s390.vfmaxdb"] fn vfmaxdb(a: vector_double, b: vector_double, mode: i32) -> vector_double;
+    // #[link_name = "llvm.s390.vfminsb"] fn vfminsb(a: vector_float, b: vector_float, mode: i32) -> vector_float;
+    // #[link_name = "llvm.s390.vfmindb"] fn vfmindb(a: vector_double, b: vector_double, mode: i32) -> vector_double;
+    // Instead, we use "portable" LLVM intrinsics -- even though those have the wrong semantics
+    // (https://github.com/rust-lang/stdarch/issues/2060), they usually do the right thing.
+    #[link_name = "llvm.minnum.v4f32"] fn minnum_v4f32(a: vector_float, b: vector_float) -> vector_float;
+    #[link_name = "llvm.minnum.v2f64"] fn minnum_v2f64(a: vector_double, b: vector_double) -> vector_double;
+    #[link_name = "llvm.maxnum.v4f32"] fn maxnum_v4f32(a: vector_float, b: vector_float) -> vector_float;
+    #[link_name = "llvm.maxnum.v2f64"] fn maxnum_v2f64(a: vector_double, b: vector_double) -> vector_double;
+}
 
 #[repr(simd)]
 struct ShuffleMask<const N: usize>([u32; N]);
@@ -439,6 +498,43 @@ enum FindImm {
 mod sealed {
     use super::*;
 
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    pub trait VectorNeg {
+        unsafe fn vec_neg(self) -> Self;
+    }
+
+    macro_rules! impl_neg {
+        ($($v:ty)*) => {
+            $(
+                #[unstable(feature = "stdarch_s390x", issue = "135681")]
+                impl VectorNeg for $v {
+                    #[inline]
+                    #[target_feature(enable = "vector")]
+                    unsafe fn vec_neg(self) -> Self {
+                        simd_neg(self)
+                    }
+                }
+            )*
+        }
+    }
+
+    impl_neg! {
+        vector_signed_char
+        vector_unsigned_char
+
+        vector_signed_short
+        vector_unsigned_short
+
+        vector_signed_int
+        vector_unsigned_int
+
+        vector_signed_long_long
+        vector_unsigned_long_long
+
+        vector_float
+        vector_double
+    }
+
     #[unstable(feature = "stdarch_s390x", issue = "135681")]
     pub trait VectorAdd<Other> {
         type Result;
@@ -698,8 +794,8 @@ mod sealed {
         impl_max!(vec_vmxslg, vector_unsigned_long_long, vmxlg);
     }
 
-    test_impl! { vec_vfmaxsb (a: vector_float, b: vector_float) -> vector_float [simd_fmax, "vector-enhancements-1" vfmaxsb ] }
-    test_impl! { vec_vfmaxdb (a: vector_double, b: vector_double) -> vector_double [simd_fmax, "vector-enhancements-1" vfmaxdb] }
+    test_impl! { vec_vfmaxsb (a: vector_float, b: vector_float) -> vector_float [maxnum_v4f32, "vector-enhancements-1" vfmaxsb] }
+    test_impl! { vec_vfmaxdb (a: vector_double, b: vector_double) -> vector_double [maxnum_v2f64, "vector-enhancements-1" vfmaxdb] }
 
     impl_vec_trait!([VectorMax vec_max] vec_vfmaxsb (vector_float, vector_float) -> vector_float);
     impl_vec_trait!([VectorMax vec_max] vec_vfmaxdb (vector_double, vector_double) -> vector_double);
@@ -745,8 +841,8 @@ mod sealed {
         impl_min!(vec_vmnslg, vector_unsigned_long_long, vmnlg);
     }
 
-    test_impl! { vec_vfminsb (a: vector_float, b: vector_float) -> vector_float [simd_fmin, "vector-enhancements-1" vfminsb]  }
-    test_impl! { vec_vfmindb (a: vector_double, b: vector_double) -> vector_double [simd_fmin, "vector-enhancements-1" vfmindb]  }
+    test_impl! { vec_vfminsb (a: vector_float, b: vector_float) -> vector_float [minnum_v4f32, "vector-enhancements-1" vfminsb] }
+    test_impl! { vec_vfmindb (a: vector_double, b: vector_double) -> vector_double [minnum_v2f64, "vector-enhancements-1" vfmindb] }
 
     impl_vec_trait!([VectorMin vec_min] vec_vfminsb (vector_float, vector_float) -> vector_float);
     impl_vec_trait!([VectorMin vec_min] vec_vfmindb (vector_double, vector_double) -> vector_double);
@@ -761,7 +857,7 @@ mod sealed {
             #[inline]
             #[target_feature(enable = "vector")]
             unsafe fn $name(v: s_t_l!($ty)) -> s_t_l!($ty) {
-                v.vec_max(-v)
+                v.vec_max(simd_neg(v))
             }
 
             impl_vec_trait! { [VectorAbs vec_abs] $name (s_t_l!($ty)) }
@@ -896,7 +992,7 @@ mod sealed {
     #[cfg_attr(test, assert_instr(vrepb, IMM2 = 1))]
     unsafe fn vrepb<const IMM2: u32>(a: vector_signed_char) -> vector_signed_char {
         static_assert_uimm_bits!(IMM2, 4);
-        simd_shuffle(a, a, const { u32x16::from_array([IMM2; 16]) })
+        simd_shuffle!(a, a, [IMM2; 16])
     }
 
     #[inline]
@@ -904,7 +1000,7 @@ mod sealed {
     #[cfg_attr(test, assert_instr(vreph, IMM2 = 1))]
     unsafe fn vreph<const IMM2: u32>(a: vector_signed_short) -> vector_signed_short {
         static_assert_uimm_bits!(IMM2, 3);
-        simd_shuffle(a, a, const { u32x8::from_array([IMM2; 8]) })
+        simd_shuffle!(a, a, [IMM2; 8])
     }
 
     #[inline]
@@ -912,7 +1008,7 @@ mod sealed {
     #[cfg_attr(test, assert_instr(vrepf, IMM2 = 1))]
     unsafe fn vrepf<const IMM2: u32>(a: vector_signed_int) -> vector_signed_int {
         static_assert_uimm_bits!(IMM2, 2);
-        simd_shuffle(a, a, const { u32x4::from_array([IMM2; 4]) })
+        simd_shuffle!(a, a, [IMM2; 4])
     }
 
     #[inline]
@@ -920,7 +1016,7 @@ mod sealed {
     #[cfg_attr(test, assert_instr(vrepg, IMM2 = 1))]
     unsafe fn vrepg<const IMM2: u32>(a: vector_signed_long_long) -> vector_signed_long_long {
         static_assert_uimm_bits!(IMM2, 1);
-        simd_shuffle(a, a, const { u32x2::from_array([IMM2; 2]) })
+        simd_shuffle!(a, a, [IMM2; 2])
     }
 
     macro_rules! impl_vec_splat {
@@ -4055,6 +4151,14 @@ unsafe fn __lcbb<const BLOCK_BOUNDARY: u16>(ptr: *const u8) -> u32 {
     lcbb(ptr, const { validate_block_boundary(BLOCK_BOUNDARY) })
 }
 
+/// Vector Negate
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_neg<T: sealed::VectorNeg>(a: T) -> T {
+    a.vec_neg()
+}
+
 /// Vector Add
 #[inline]
 #[target_feature(enable = "vector")]
@@ -5307,11 +5411,13 @@ pub unsafe fn vec_search_string_until_zero_cc<T: sealed::VectorSearchString>(
 #[inline]
 #[target_feature(enable = "vector-enhancements-1")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-// FIXME: this emits `vflls` where `vldeb` is expected
-// #[cfg_attr(all(test, target_feature = "vector-enhancements-1"), assert_instr(vldeb))]
+// NOTE: `vflls` and `vldeb` are equivalent; our disassmbler prefers vflls.
+#[cfg_attr(
+    all(test, target_feature = "vector-enhancements-1"),
+    assert_instr(vflls)
+)]
 pub unsafe fn vec_doublee(a: vector_float) -> vector_double {
-    let even = simd_shuffle::<_, _, f32x2>(a, a, const { u32x2::from_array([0, 2]) });
-    simd_as(even)
+    simd_as::<f32x2, vector_double>(simd_shuffle!(a, a, [0, 2]))
 }
 
 /// Vector Convert from double to float (even elements)
@@ -5322,11 +5428,7 @@ pub unsafe fn vec_doublee(a: vector_float) -> vector_double {
 // #[cfg_attr(all(test, target_feature = "vector-enhancements-1"), assert_instr(vledb))]
 pub unsafe fn vec_floate(a: vector_double) -> vector_float {
     let truncated: f32x2 = simd_as(a);
-    simd_shuffle(
-        truncated,
-        truncated,
-        const { u32x4::from_array([0, 0, 1, 1]) },
-    )
+    simd_shuffle!(truncated, truncated, [0, 0, 1, 1])
 }
 
 /// Vector Convert from int to float
@@ -5877,6 +5979,74 @@ pub unsafe fn vec_promote<T: sealed::VectorPromote>(a: T::ElementType, b: i32) -
     T::vec_promote(a, b)
 }
 
+/// Converts the left-most half of `a` to a vector of single-precision numbers.
+/// The format of the source vector elements is specified by `B`.
+#[inline]
+#[target_feature(enable = "nnp-assist")]
+#[cfg_attr(test, assert_instr(vclfnh, B = 0))]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_extend_to_fp32_hi<const B: i32>(a: vector_signed_short) -> vector_float {
+    // On processors implementing the IBM z16 architecture, only the value 0 is supported.
+    static_assert_uimm_bits!(B, 4);
+
+    vclfnhs(a, B)
+}
+
+/// Converts the right-most half of `a` to a vector of single-precision numbers.
+/// The format of the source vector elements is specified by `B`.
+#[inline]
+#[target_feature(enable = "nnp-assist")]
+#[cfg_attr(test, assert_instr(vclfnl, B = 0))]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_extend_to_fp32_lo<const B: i32>(a: vector_signed_short) -> vector_float {
+    // On processors implementing the IBM z16 architecture, only the value 0 is supported.
+    static_assert_uimm_bits!(B, 4);
+
+    vclfnls(a, B)
+}
+
+/// Converts the elements of vector `a` to the 16-bit IEEE floating point format.
+/// The format of the source vector elements is specified by `B`.
+#[inline]
+#[target_feature(enable = "nnp-assist")]
+#[cfg_attr(test, assert_instr(vcfn, B = 0))]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_convert_to_fp16<const B: i32>(a: vector_signed_short) -> vector_signed_short {
+    // On processors implementing the IBM z16 architecture, only the value 0 is supported.
+    static_assert_uimm_bits!(B, 4);
+
+    vcfn(a, B)
+}
+
+/// Converts the elements of vector `a` to an internal floating point format.
+/// The format of the target vector elements is specified by `B`.
+#[inline]
+#[target_feature(enable = "nnp-assist")]
+#[cfg_attr(test, assert_instr(vcnf, B = 0))]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_convert_from_fp16<const B: i32>(a: vector_signed_short) -> vector_signed_short {
+    // On processors implementing the IBM z16 architecture, only the value 0 is supported.
+    static_assert_uimm_bits!(B, 4);
+
+    vcnf(a, B)
+}
+
+/// Converts the elements of single-precision vectors `a` and `b` to an internal floating point
+/// format with 16-bit sized elements. The format of the target vector elements is specified by `C`.
+#[inline]
+#[target_feature(enable = "nnp-assist")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+#[cfg_attr(test, assert_instr(vcrnf, C = 0))]
+pub unsafe fn vec_round_from_fp32<const C: i32>(
+    a: vector_float,
+    b: vector_float,
+) -> vector_signed_short {
+    // On processors implementing the IBM z16 architecture, only the value 0 is supported.
+    static_assert_uimm_bits!(C, 4);
+
+    vcrnfs(a, b, C)
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -5943,27 +6113,16 @@ mod tests {
     }
 
     macro_rules! test_vec_1 {
-        { $name: ident, $fn:ident, f32x4, [$($a:expr),+], ~[$($d:expr),+] } => {
-            #[simd_test(enable = "vector")]
-            unsafe fn $name() {
-                let a: vector_float = transmute(f32x4::new($($a),+));
-
-                let d: vector_float = transmute(f32x4::new($($d),+));
-                let r = transmute(vec_cmple(vec_abs(vec_sub($fn(a), d)), vec_splats(f32::EPSILON)));
-                let e = m32x4::new(true, true, true, true);
-                assert_eq!(e, r);
-            }
-        };
         { $name: ident, $fn:ident, $ty: ident, [$($a:expr),+], [$($d:expr),+] } => {
             test_vec_1! { $name, $fn, $ty -> $ty, [$($a),+], [$($d),+] }
         };
         { $name: ident, $fn:ident, $ty: ident -> $ty_out: ident, [$($a:expr),+], [$($d:expr),+] } => {
             #[simd_test(enable = "vector")]
-            unsafe fn $name() {
-                let a: s_t_l!($ty) = transmute($ty::new($($a),+));
+            fn $name() {
+                let a: s_t_l!($ty) = $ty::new($($a),+).into();
 
                 let d = $ty_out::new($($d),+);
-                let r : $ty_out = transmute($fn(a));
+                let r = $ty_out::from(unsafe { $fn(a) });
                 assert_eq!(d, r);
             }
         }
@@ -5978,35 +6137,23 @@ mod tests {
          };
         { $name: ident, $fn:ident, $ty1: ident, $ty2: ident -> $ty_out: ident, [$($a:expr),+], [$($b:expr),+], [$($d:expr),+] } => {
             #[simd_test(enable = "vector")]
-            unsafe fn $name() {
-                let a: s_t_l!($ty1) = transmute($ty1::new($($a),+));
-                let b: s_t_l!($ty2) = transmute($ty2::new($($b),+));
+            fn $name() {
+                let a: s_t_l!($ty1) = $ty1::new($($a),+).into();
+                let b: s_t_l!($ty2) = $ty2::new($($b),+).into();
 
                 let d = $ty_out::new($($d),+);
-                let r : $ty_out = transmute($fn(a, b));
+                let r = $ty_out::from(unsafe { $fn(a, b) });
                 assert_eq!(d, r);
             }
          };
-         { $name: ident, $fn:ident, $ty: ident -> $ty_out: ident, [$($a:expr),+], [$($b:expr),+], $d:expr } => {
-            #[simd_test(enable = "vector")]
-            unsafe fn $name() {
-                let a: s_t_l!($ty) = transmute($ty::new($($a),+));
-                let b: s_t_l!($ty) = transmute($ty::new($($b),+));
-
-                let r : $ty_out = transmute($fn(a, b));
-                assert_eq!($d, r);
-            }
-         }
    }
 
     #[simd_test(enable = "vector")]
-    unsafe fn vec_add_i32x4_i32x4() {
-        let x = i32x4::new(1, 2, 3, 4);
-        let y = i32x4::new(4, 3, 2, 1);
-        let x: vector_signed_int = transmute(x);
-        let y: vector_signed_int = transmute(y);
-        let z = vec_add(x, y);
-        assert_eq!(i32x4::splat(5), transmute(z));
+    fn vec_add_i32x4_i32x4() {
+        let x = vector_signed_int::from(i32x4::new(1, 2, 3, 4));
+        let y = vector_signed_int::from(i32x4::new(4, 3, 2, 1));
+        let z = unsafe { vec_add(x, y) };
+        assert_eq!(i32x4::splat(5), i32x4::from(z));
     }
 
     macro_rules! test_vec_sub {
@@ -6124,11 +6271,11 @@ mod tests {
     macro_rules! test_vec_abs {
         { $name: ident, $ty: ident, $a: expr, $d: expr } => {
             #[simd_test(enable = "vector")]
-            unsafe fn $name() {
-                let a: s_t_l!($ty) = vec_splats($a);
-                let a: s_t_l!($ty) = vec_abs(a);
+            fn $name() {
+                let a: s_t_l!($ty) = unsafe { vec_splats($a) };
+                let a: s_t_l!($ty) = unsafe { vec_abs(a) };
                 let d = $ty::splat($d);
-                assert_eq!(d, transmute(a));
+                assert_eq!(d, $ty::from(a));
             }
         }
     }
@@ -6278,7 +6425,7 @@ mod tests {
     [0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 16],
     [4, 2, 1, 8] }
 
-    test_vec_2! { test_vec_sral_pos, vec_sral, u32x4, u8x16 -> i32x4,
+    test_vec_2! { test_vec_sral_pos, vec_sral, u32x4, u8x16 -> u32x4,
     [0b1000, 0b1000, 0b1000, 0b1000],
     [0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 16],
     [4, 2, 1, 8] }
@@ -6315,13 +6462,13 @@ mod tests {
          $shorttype:ident, $longtype:ident,
          [$($a:expr),+], [$($b:expr),+], [$($c:expr),+], [$($d:expr),+]} => {
             #[simd_test(enable = "vector")]
-            unsafe fn $name() {
-                let a: $longtype = transmute($shorttype::new($($a),+));
-                let b: $longtype = transmute($shorttype::new($($b),+));
-                let c: vector_unsigned_char = transmute(u8x16::new($($c),+));
-                let d = $shorttype::new($($d),+);
+            fn $name() {
+                let a = $longtype::from($shorttype::from_array([$($a),+]));
+                let b = $longtype::from($shorttype::from_array([$($b),+]));
+                let c = vector_unsigned_char::from(u8x16::from_array([$($c),+]));
+                let d = $shorttype::from_array([$($d),+]);
 
-                let r: $shorttype = transmute(vec_perm(a, b, c));
+                let r = $shorttype::from(unsafe { vec_perm(a, b, c) });
                 assert_eq!(d, r);
             }
         }
@@ -6404,46 +6551,46 @@ mod tests {
     [core::f32::consts::PI, 1.0, 25.0, 2.0],
     [core::f32::consts::PI.sqrt(), 1.0, 5.0, core::f32::consts::SQRT_2] }
 
-    test_vec_2! { test_vec_find_any_eq, vec_find_any_eq, i32x4, i32x4 -> u32x4,
+    test_vec_2! { test_vec_find_any_eq, vec_find_any_eq, i32x4, i32x4 -> i32x4,
         [1, -2, 3, -4],
         [-5, 3, -7, 8],
-        [0, 0, 0xFFFFFFFF, 0]
+        [0, 0, !0, 0]
     }
 
-    test_vec_2! { test_vec_find_any_ne, vec_find_any_ne, i32x4, i32x4 -> u32x4,
+    test_vec_2! { test_vec_find_any_ne, vec_find_any_ne, i32x4, i32x4 -> i32x4,
         [1, -2, 3, -4],
         [-5, 3, -7, 8],
-        [0xFFFFFFFF, 0xFFFFFFFF, 0, 0xFFFFFFFF]
+        [!0, !0, 0, !0]
     }
 
-    test_vec_2! { test_vec_find_any_eq_idx_1, vec_find_any_eq_idx, i32x4, i32x4 -> u32x4,
+    test_vec_2! { test_vec_find_any_eq_idx_1, vec_find_any_eq_idx, i32x4, i32x4 -> i32x4,
         [1, 2, 3, 4],
         [5, 3, 7, 8],
         [0, 8, 0, 0]
     }
-    test_vec_2! { test_vec_find_any_eq_idx_2, vec_find_any_eq_idx, i32x4, i32x4 -> u32x4,
+    test_vec_2! { test_vec_find_any_eq_idx_2, vec_find_any_eq_idx, i32x4, i32x4 -> i32x4,
         [1, 2, 3, 4],
         [5, 6, 7, 8],
         [0, 16, 0, 0]
     }
 
-    test_vec_2! { test_vec_find_any_ne_idx_1, vec_find_any_ne_idx, i32x4, i32x4 -> u32x4,
+    test_vec_2! { test_vec_find_any_ne_idx_1, vec_find_any_ne_idx, i32x4, i32x4 -> i32x4,
         [1, 2, 3, 4],
         [1, 5, 3, 4],
         [0, 4, 0, 0]
     }
-    test_vec_2! { test_vec_find_any_ne_idx_2, vec_find_any_ne_idx, i32x4, i32x4 -> u32x4,
+    test_vec_2! { test_vec_find_any_ne_idx_2, vec_find_any_ne_idx, i32x4, i32x4 -> i32x4,
         [1, 2, 3, 4],
         [1, 2, 3, 4],
         [0, 16, 0, 0]
     }
 
-    test_vec_2! { test_vec_find_any_eq_or_0_idx_1, vec_find_any_eq_or_0_idx, i32x4, i32x4 -> u32x4,
+    test_vec_2! { test_vec_find_any_eq_or_0_idx_1, vec_find_any_eq_or_0_idx, i32x4, i32x4 -> i32x4,
         [1, 2, 0, 4],
         [5, 6, 7, 8],
         [0, 8, 0, 0]
     }
-    test_vec_2! { test_vec_find_any_ne_or_0_idx_1, vec_find_any_ne_or_0_idx, i32x4, i32x4 -> u32x4,
+    test_vec_2! { test_vec_find_any_ne_or_0_idx_1, vec_find_any_ne_or_0_idx, i32x4, i32x4 -> i32x4,
         [1, 2, 0, 4],
         [1, 2, 3, 4],
         [0, 8, 0, 0]
@@ -7344,6 +7491,30 @@ mod tests {
         [0, !0, !0, !0]
     }
 
+    test_vec_2! { test_vec_max_f32, vec_max, f32x4, f32x4 -> f32x4,
+        [1.0,   f32::NAN, f32::INFINITY, 2.0],
+        [-10.0, -10.0,    5.0,           f32::NAN],
+        [1.0,   -10.0,    f32::INFINITY, 2.0]
+    }
+
+    test_vec_2! { test_vec_min_f32, vec_min, f32x4, f32x4 -> f32x4,
+        [1.0,   f32::NAN, f32::INFINITY, 2.0],
+        [-10.0, -10.0,    5.0,           f32::NAN],
+        [-10.0, -10.0,    5.0,           2.0]
+    }
+
+    test_vec_2! { test_vec_max_f64, vec_max, f64x2, f64x2 -> f64x2,
+        [f64::NAN, 2.0],
+        [-10.0,    f64::NAN],
+        [-10.0,    2.0]
+    }
+
+    test_vec_2! { test_vec_min_f64, vec_min, f64x2, f64x2 -> f64x2,
+        [f64::NAN, 2.0],
+        [-10.0,    f64::NAN],
+        [-10.0,    2.0]
+    }
+
     #[simd_test(enable = "vector")]
     fn test_vec_meadd() {
         let a = vector_unsigned_short([1, 0, 2, 0, 3, 0, 4, 0]);
diff --git a/crates/core_arch/src/simd.rs b/crates/core_arch/src/simd.rs
index 25834943f0..2c6829b465 100644
--- a/crates/core_arch/src/simd.rs
+++ b/crates/core_arch/src/simd.rs
@@ -2,1008 +2,409 @@
 
 #![allow(non_camel_case_types)]
 
-macro_rules! simd_ty {
-    ($id:ident [$elem_type:ty ; $len:literal]: $($param_name:ident),*) => {
-        #[repr(simd)]
-        #[derive(Copy, Clone)]
-        pub(crate) struct $id([$elem_type; $len]);
-
-        #[allow(clippy::use_self)]
-        impl $id {
-            /// A value of this type where all elements are zeroed out.
-            pub(crate) const ZERO: Self = unsafe { crate::mem::zeroed() };
-
-            #[inline(always)]
-            pub(crate) const fn new($($param_name: $elem_type),*) -> Self {
-                $id([$($param_name),*])
-            }
-            #[inline(always)]
-            pub(crate) const fn from_array(elements: [$elem_type; $len]) -> Self {
-                $id(elements)
-            }
-            // FIXME: Workaround rust@60637
-            #[inline(always)]
-            pub(crate) fn splat(value: $elem_type) -> Self {
-                #[derive(Copy, Clone)]
-                #[repr(simd)]
-                struct JustOne([$elem_type; 1]);
-                let one = JustOne([value]);
-                // SAFETY: 0 is always in-bounds because we're shuffling
-                // a simd type with exactly one element.
-                unsafe { simd_shuffle!(one, one, [0; $len]) }
-            }
+#[inline(always)]
+#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
+pub(crate) const unsafe fn simd_imax<T: Copy>(a: T, b: T) -> T {
+    let mask: T = crate::intrinsics::simd::simd_gt(a, b);
+    crate::intrinsics::simd::simd_select(mask, a, b)
+}
 
-            /// Extract the element at position `index`.
-            /// `index` is not a constant so this is not efficient!
-            /// Use for testing only.
-            // FIXME: Workaround rust@60637
-            #[inline(always)]
-            pub(crate) fn extract(&self, index: usize) -> $elem_type {
-                self.as_array()[index]
-            }
+#[inline(always)]
+#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
+pub(crate) const unsafe fn simd_imin<T: Copy>(a: T, b: T) -> T {
+    let mask: T = crate::intrinsics::simd::simd_lt(a, b);
+    crate::intrinsics::simd::simd_select(mask, a, b)
+}
 
-            #[inline]
-            pub(crate) fn as_array(&self) -> &[$elem_type; $len] {
-                let simd_ptr: *const Self = self;
-                let array_ptr: *const [$elem_type; $len] = simd_ptr.cast();
-                // SAFETY: We can always read the prefix of a simd type as an array.
-                // There might be more padding afterwards for some widths, but
-                // that's not a problem for reading less than that.
-                unsafe { &*array_ptr }
-            }
-        }
+/// SAFETY: All bits patterns must be valid
+pub(crate) unsafe trait SimdElement:
+    Copy + const PartialEq + crate::fmt::Debug
+{
+    // SAFETY: all bits patterns of types implementing this trait must be valid
+    const ZERO: Self = unsafe { crate::mem::zeroed() };
+}
 
-        impl core::cmp::PartialEq for $id {
-            #[inline]
-            fn eq(&self, other: &Self) -> bool {
-                self.as_array() == other.as_array()
-            }
-        }
+unsafe impl SimdElement for u8 {}
+unsafe impl SimdElement for u16 {}
+unsafe impl SimdElement for u32 {}
+unsafe impl SimdElement for u64 {}
 
-        impl core::fmt::Debug for $id {
-            #[inline]
-            fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
-                debug_simd_finish(f, stringify!($id), self.as_array())
-            }
-        }
+unsafe impl SimdElement for i8 {}
+unsafe impl SimdElement for i16 {}
+unsafe impl SimdElement for i32 {}
+unsafe impl SimdElement for i64 {}
+
+unsafe impl SimdElement for f16 {}
+unsafe impl SimdElement for f32 {}
+unsafe impl SimdElement for f64 {}
+
+#[repr(simd)]
+#[derive(Copy)]
+pub(crate) struct Simd<T: SimdElement, const N: usize>([T; N]);
+
+impl<T: SimdElement, const N: usize> Simd<T, N> {
+    /// A value of this type where all elements are zeroed out.
+    pub(crate) const ZERO: Self = Self::splat(T::ZERO);
+
+    #[inline(always)]
+    pub(crate) const fn from_array(elements: [T; N]) -> Self {
+        Self(elements)
+    }
+
+    #[inline]
+    #[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
+    pub(crate) const fn splat(value: T) -> Self {
+        unsafe { crate::intrinsics::simd::simd_splat(value) }
+    }
+
+    /// Extract the element at position `index`. Note that `index` is not a constant so this
+    /// operation is not efficient on most platforms. Use for testing only.
+    #[inline]
+    #[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
+    pub(crate) const fn extract_dyn(&self, index: usize) -> T {
+        assert!(index < N);
+        // SAFETY: self is a vector, T its element type.
+        unsafe { crate::intrinsics::simd::simd_extract_dyn(*self, index as u32) }
+    }
+
+    #[inline]
+    pub(crate) const fn as_array(&self) -> &[T; N] {
+        let simd_ptr: *const Self = self;
+        let array_ptr: *const [T; N] = simd_ptr.cast();
+        // SAFETY: We can always read the prefix of a simd type as an array.
+        // There might be more padding afterwards for some widths, but
+        // that's not a problem for reading less than that.
+        unsafe { &*array_ptr }
     }
 }
 
-macro_rules! simd_m_ty {
-    ($id:ident [$elem_type:ident ; $len:literal]: $($param_name:ident),*) => {
-        #[repr(simd)]
-        #[derive(Copy, Clone)]
-        pub(crate) struct $id([$elem_type; $len]);
-
-        #[allow(clippy::use_self)]
-        impl $id {
-            #[inline(always)]
-            const fn bool_to_internal(x: bool) -> $elem_type {
-                [0 as $elem_type, !(0 as $elem_type)][x as usize]
-            }
+// `#[derive(Clone)]` causes ICE "Projecting into SIMD type core_arch::simd::Simd is banned by MCP#838"
+impl<T: SimdElement, const N: usize> Clone for Simd<T, N> {
+    #[inline]
+    fn clone(&self) -> Self {
+        *self
+    }
+}
 
-            #[inline(always)]
-            pub(crate) const fn new($($param_name: bool),*) -> Self {
-                $id([$(Self::bool_to_internal($param_name)),*])
-            }
+#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
+impl<T: SimdElement, const N: usize> const crate::cmp::PartialEq for Simd<T, N> {
+    #[inline]
+    fn eq(&self, other: &Self) -> bool {
+        self.as_array() == other.as_array()
+    }
+}
 
-            // FIXME: Workaround rust@60637
-            #[inline(always)]
-            pub(crate) fn splat(value: bool) -> Self {
-                #[derive(Copy, Clone)]
-                #[repr(simd)]
-                struct JustOne([$elem_type; 1]);
-                let one = JustOne([Self::bool_to_internal(value)]);
-                // SAFETY: 0 is always in-bounds because we're shuffling
-                // a simd type with exactly one element.
-                unsafe { simd_shuffle!(one, one, [0; $len]) }
-            }
+impl<T: SimdElement, const N: usize> crate::fmt::Debug for Simd<T, N> {
+    #[inline]
+    fn fmt(&self, f: &mut crate::fmt::Formatter<'_>) -> crate::fmt::Result {
+        debug_simd_finish(f, "Simd", self.as_array())
+    }
+}
 
-            #[inline]
-            pub(crate) fn as_array(&self) -> &[$elem_type; $len] {
-                let simd_ptr: *const Self = self;
-                let array_ptr: *const [$elem_type; $len] = simd_ptr.cast();
-                // SAFETY: We can always read the prefix of a simd type as an array.
-                // There might be more padding afterwards for some widths, but
-                // that's not a problem for reading less than that.
-                unsafe { &*array_ptr }
-            }
-        }
+impl<T: SimdElement> Simd<T, 1> {
+    #[inline]
+    pub(crate) const fn new(x0: T) -> Self {
+        Self([x0])
+    }
+}
 
-        impl core::cmp::PartialEq for $id {
-            #[inline]
-            fn eq(&self, other: &Self) -> bool {
-                self.as_array() == other.as_array()
-            }
-        }
+impl<T: SimdElement> Simd<T, 2> {
+    #[inline]
+    pub(crate) const fn new(x0: T, x1: T) -> Self {
+        Self([x0, x1])
+    }
+}
+
+impl<T: SimdElement> Simd<T, 4> {
+    #[inline]
+    pub(crate) const fn new(x0: T, x1: T, x2: T, x3: T) -> Self {
+        Self([x0, x1, x2, x3])
+    }
+}
+
+impl<T: SimdElement> Simd<T, 8> {
+    #[inline]
+    pub(crate) const fn new(x0: T, x1: T, x2: T, x3: T, x4: T, x5: T, x6: T, x7: T) -> Self {
+        Self([x0, x1, x2, x3, x4, x5, x6, x7])
+    }
+}
+
+impl<T: SimdElement> Simd<T, 16> {
+    #[inline]
+    pub(crate) const fn new(
+        x0: T,
+        x1: T,
+        x2: T,
+        x3: T,
+        x4: T,
+        x5: T,
+        x6: T,
+        x7: T,
+        x8: T,
+        x9: T,
+        x10: T,
+        x11: T,
+        x12: T,
+        x13: T,
+        x14: T,
+        x15: T,
+    ) -> Self {
+        Self([
+            x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15,
+        ])
+    }
+}
+
+impl<T: SimdElement> Simd<T, 32> {
+    #[inline]
+    pub(crate) const fn new(
+        x0: T,
+        x1: T,
+        x2: T,
+        x3: T,
+        x4: T,
+        x5: T,
+        x6: T,
+        x7: T,
+        x8: T,
+        x9: T,
+        x10: T,
+        x11: T,
+        x12: T,
+        x13: T,
+        x14: T,
+        x15: T,
+        x16: T,
+        x17: T,
+        x18: T,
+        x19: T,
+        x20: T,
+        x21: T,
+        x22: T,
+        x23: T,
+        x24: T,
+        x25: T,
+        x26: T,
+        x27: T,
+        x28: T,
+        x29: T,
+        x30: T,
+        x31: T,
+    ) -> Self {
+        Self([
+            x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, x16, x17, x18,
+            x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31,
+        ])
+    }
+}
+
+impl<const N: usize> Simd<f16, N> {
+    #[inline]
+    pub(crate) const fn to_bits(self) -> Simd<u16, N> {
+        assert!(size_of::<Self>() == size_of::<Simd<u16, N>>());
+        unsafe { crate::mem::transmute_copy(&self) }
+    }
+
+    #[inline]
+    pub(crate) const fn from_bits(bits: Simd<u16, N>) -> Self {
+        assert!(size_of::<Self>() == size_of::<Simd<u16, N>>());
+        unsafe { crate::mem::transmute_copy(&bits) }
+    }
+}
+
+impl<const N: usize> Simd<f32, N> {
+    #[inline]
+    pub(crate) const fn to_bits(self) -> Simd<u32, N> {
+        assert!(size_of::<Self>() == size_of::<Simd<u32, N>>());
+        unsafe { crate::mem::transmute_copy(&self) }
+    }
 
-        impl core::fmt::Debug for $id {
-            #[inline]
-            fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
-                debug_simd_finish(f, stringify!($id), self.as_array())
+    #[inline]
+    pub(crate) const fn from_bits(bits: Simd<u32, N>) -> Self {
+        assert!(size_of::<Self>() == size_of::<Simd<u32, N>>());
+        unsafe { crate::mem::transmute_copy(&bits) }
+    }
+}
+
+impl<const N: usize> Simd<f64, N> {
+    #[inline]
+    pub(crate) const fn to_bits(self) -> Simd<u64, N> {
+        assert!(size_of::<Self>() == size_of::<Simd<u64, N>>());
+        unsafe { crate::mem::transmute_copy(&self) }
+    }
+
+    #[inline]
+    pub(crate) const fn from_bits(bits: Simd<u64, N>) -> Self {
+        assert!(size_of::<Self>() == size_of::<Simd<u64, N>>());
+        unsafe { crate::mem::transmute_copy(&bits) }
+    }
+}
+
+#[repr(simd)]
+#[derive(Copy)]
+pub(crate) struct SimdM<T: SimdElement, const N: usize>([T; N]);
+
+impl<T: SimdElement, const N: usize> SimdM<T, N> {
+    #[inline(always)]
+    const fn bool_to_internal(x: bool) -> T {
+        // SAFETY: `T` implements `SimdElement`, so all bit patterns are valid.
+        let ones = const {
+            // Ideally, this would be `transmute([0xFFu8; size_of::<T>()])`, but
+            // `size_of::<T>()` is not allowed to use a generic parameter there.
+            let mut r = crate::mem::MaybeUninit::<T>::uninit();
+            let mut i = 0;
+            while i < crate::mem::size_of::<T>() {
+                r.as_bytes_mut()[i] = crate::mem::MaybeUninit::new(0xFF);
+                i += 1;
             }
+            unsafe { r.assume_init() }
+        };
+        [T::ZERO, ones][x as usize]
+    }
+
+    #[inline]
+    pub(crate) const fn from_array(elements: [bool; N]) -> Self {
+        let mut internal = [T::ZERO; N];
+        let mut i = 0;
+        while i < N {
+            internal[i] = Self::bool_to_internal(elements[i]);
+            i += 1;
         }
+        Self(internal)
+    }
+
+    #[inline]
+    #[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
+    pub(crate) const fn splat(value: bool) -> Self {
+        unsafe { crate::intrinsics::simd::simd_splat(Self::bool_to_internal(value)) }
+    }
+
+    #[inline]
+    pub(crate) const fn as_array(&self) -> &[T; N] {
+        let simd_ptr: *const Self = self;
+        let array_ptr: *const [T; N] = simd_ptr.cast();
+        // SAFETY: We can always read the prefix of a simd type as an array.
+        // There might be more padding afterwards for some widths, but
+        // that's not a problem for reading less than that.
+        unsafe { &*array_ptr }
+    }
+}
+
+// `#[derive(Clone)]` causes ICE "Projecting into SIMD type core_arch::simd::SimdM is banned by MCP#838"
+impl<T: SimdElement, const N: usize> Clone for SimdM<T, N> {
+    #[inline]
+    fn clone(&self) -> Self {
+        *self
+    }
+}
+
+#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
+impl<T: SimdElement, const N: usize> const crate::cmp::PartialEq for SimdM<T, N> {
+    #[inline]
+    fn eq(&self, other: &Self) -> bool {
+        self.as_array() == other.as_array()
+    }
+}
+
+impl<T: SimdElement, const N: usize> crate::fmt::Debug for SimdM<T, N> {
+    #[inline]
+    fn fmt(&self, f: &mut crate::fmt::Formatter<'_>) -> crate::fmt::Result {
+        debug_simd_finish(f, "SimdM", self.as_array())
     }
 }
 
 // 16-bit wide types:
 
-simd_ty!(u8x2[u8;2]: x0, x1);
-simd_ty!(i8x2[i8;2]: x0, x1);
+pub(crate) type u8x2 = Simd<u8, 2>;
+pub(crate) type i8x2 = Simd<i8, 2>;
 
 // 32-bit wide types:
 
-simd_ty!(u8x4[u8;4]: x0, x1, x2, x3);
-simd_ty!(u16x2[u16;2]: x0, x1);
+pub(crate) type u8x4 = Simd<u8, 4>;
+pub(crate) type u16x2 = Simd<u16, 2>;
 
-simd_ty!(i8x4[i8;4]: x0, x1, x2, x3);
-simd_ty!(i16x2[i16;2]: x0, x1);
+pub(crate) type i8x4 = Simd<i8, 4>;
+pub(crate) type i16x2 = Simd<i16, 2>;
 
 // 64-bit wide types:
 
-simd_ty!(
-    u8x8[u8;8]:
-    x0,
-    x1,
-    x2,
-    x3,
-    x4,
-    x5,
-    x6,
-    x7
-);
-simd_ty!(u16x4[u16;4]: x0, x1, x2, x3);
-simd_ty!(u32x2[u32;2]: x0, x1);
-simd_ty!(u64x1[u64;1]: x1);
-
-simd_ty!(
-    i8x8[i8;8]:
-    x0,
-    x1,
-    x2,
-    x3,
-    x4,
-    x5,
-    x6,
-    x7
-);
-simd_ty!(i16x4[i16;4]: x0, x1, x2, x3);
-simd_ty!(i32x2[i32;2]: x0, x1);
-simd_ty!(i64x1[i64;1]: x1);
-
-simd_ty!(f32x2[f32;2]: x0, x1);
-simd_ty!(f64x1[f64;1]: x1);
+pub(crate) type u8x8 = Simd<u8, 8>;
+pub(crate) type u16x4 = Simd<u16, 4>;
+pub(crate) type u32x2 = Simd<u32, 2>;
+pub(crate) type u64x1 = Simd<u64, 1>;
+
+pub(crate) type i8x8 = Simd<i8, 8>;
+pub(crate) type i16x4 = Simd<i16, 4>;
+pub(crate) type i32x2 = Simd<i32, 2>;
+pub(crate) type i64x1 = Simd<i64, 1>;
+
+pub(crate) type f16x4 = Simd<f16, 4>;
+pub(crate) type f32x2 = Simd<f32, 2>;
+pub(crate) type f64x1 = Simd<f64, 1>;
 
 // 128-bit wide types:
 
-simd_ty!(
-    u8x16[u8;16]:
-    x0,
-    x1,
-    x2,
-    x3,
-    x4,
-    x5,
-    x6,
-    x7,
-    x8,
-    x9,
-    x10,
-    x11,
-    x12,
-    x13,
-    x14,
-    x15
-);
-simd_ty!(
-    u16x8[u16;8]:
-    x0,
-    x1,
-    x2,
-    x3,
-    x4,
-    x5,
-    x6,
-    x7
-);
-simd_ty!(u32x4[u32;4]: x0, x1, x2, x3);
-simd_ty!(u64x2[u64;2]: x0, x1);
-
-simd_ty!(
-    i8x16[i8;16]:
-    x0,
-    x1,
-    x2,
-    x3,
-    x4,
-    x5,
-    x6,
-    x7,
-    x8,
-    x9,
-    x10,
-    x11,
-    x12,
-    x13,
-    x14,
-    x15
-);
-simd_ty!(
-    i16x8[i16;8]:
-    x0,
-    x1,
-    x2,
-    x3,
-    x4,
-    x5,
-    x6,
-    x7
-);
-simd_ty!(i32x4[i32;4]: x0, x1, x2, x3);
-simd_ty!(i64x2[i64;2]: x0, x1);
-
-simd_ty!(f16x4[f16;4]: x0, x1, x2, x3);
-
-simd_ty!(
-    f16x8[f16;8]:
-    x0,
-    x1,
-    x2,
-    x3,
-    x4,
-    x5,
-    x6,
-    x7
-);
-simd_ty!(f32x4[f32;4]: x0, x1, x2, x3);
-simd_ty!(f64x2[f64;2]: x0, x1);
-
-simd_m_ty!(
-    m8x16[i8;16]:
-    x0,
-    x1,
-    x2,
-    x3,
-    x4,
-    x5,
-    x6,
-    x7,
-    x8,
-    x9,
-    x10,
-    x11,
-    x12,
-    x13,
-    x14,
-    x15
-);
-simd_m_ty!(
-    m16x8[i16;8]:
-    x0,
-    x1,
-    x2,
-    x3,
-    x4,
-    x5,
-    x6,
-    x7
-);
-simd_m_ty!(m32x4[i32;4]: x0, x1, x2, x3);
-simd_m_ty!(m64x2[i64;2]: x0, x1);
+pub(crate) type u8x16 = Simd<u8, 16>;
+pub(crate) type u16x8 = Simd<u16, 8>;
+pub(crate) type u32x4 = Simd<u32, 4>;
+pub(crate) type u64x2 = Simd<u64, 2>;
+
+pub(crate) type i8x16 = Simd<i8, 16>;
+pub(crate) type i16x8 = Simd<i16, 8>;
+pub(crate) type i32x4 = Simd<i32, 4>;
+pub(crate) type i64x2 = Simd<i64, 2>;
+
+pub(crate) type f16x8 = Simd<f16, 8>;
+pub(crate) type f32x4 = Simd<f32, 4>;
+pub(crate) type f64x2 = Simd<f64, 2>;
+
+pub(crate) type m8x16 = SimdM<i8, 16>;
+pub(crate) type m16x8 = SimdM<i16, 8>;
+pub(crate) type m32x4 = SimdM<i32, 4>;
+pub(crate) type m64x2 = SimdM<i64, 2>;
 
 // 256-bit wide types:
 
-simd_ty!(
-    u8x32[u8;32]:
-    x0,
-    x1,
-    x2,
-    x3,
-    x4,
-    x5,
-    x6,
-    x7,
-    x8,
-    x9,
-    x10,
-    x11,
-    x12,
-    x13,
-    x14,
-    x15,
-    x16,
-    x17,
-    x18,
-    x19,
-    x20,
-    x21,
-    x22,
-    x23,
-    x24,
-    x25,
-    x26,
-    x27,
-    x28,
-    x29,
-    x30,
-    x31
-);
-simd_ty!(
-    u16x16[u16;16]:
-    x0,
-    x1,
-    x2,
-    x3,
-    x4,
-    x5,
-    x6,
-    x7,
-    x8,
-    x9,
-    x10,
-    x11,
-    x12,
-    x13,
-    x14,
-    x15
-);
-simd_ty!(
-    u32x8[u32;8]:
-    x0,
-    x1,
-    x2,
-    x3,
-    x4,
-    x5,
-    x6,
-    x7
-);
-simd_ty!(u64x4[u64;4]: x0, x1, x2, x3);
-
-simd_ty!(
-    i8x32[i8;32]:
-    x0,
-    x1,
-    x2,
-    x3,
-    x4,
-    x5,
-    x6,
-    x7,
-    x8,
-    x9,
-    x10,
-    x11,
-    x12,
-    x13,
-    x14,
-    x15,
-    x16,
-    x17,
-    x18,
-    x19,
-    x20,
-    x21,
-    x22,
-    x23,
-    x24,
-    x25,
-    x26,
-    x27,
-    x28,
-    x29,
-    x30,
-    x31
-);
-simd_ty!(
-    i16x16[i16;16]:
-    x0,
-    x1,
-    x2,
-    x3,
-    x4,
-    x5,
-    x6,
-    x7,
-    x8,
-    x9,
-    x10,
-    x11,
-    x12,
-    x13,
-    x14,
-    x15
-);
-simd_ty!(
-    i32x8[i32;8]:
-    x0,
-    x1,
-    x2,
-    x3,
-    x4,
-    x5,
-    x6,
-    x7
-);
-simd_ty!(i64x4[i64;4]: x0, x1, x2, x3);
-
-simd_ty!(
-    f16x16[f16;16]:
-    x0,
-    x1,
-    x2,
-    x3,
-    x4,
-    x5,
-    x6,
-    x7,
-    x8,
-    x9,
-    x10,
-    x11,
-    x12,
-    x13,
-    x14,
-    x15
-);
-simd_ty!(
-    f32x8[f32;8]:
-    x0,
-    x1,
-    x2,
-    x3,
-    x4,
-    x5,
-    x6,
-    x7
-);
-simd_ty!(f64x4[f64;4]: x0, x1, x2, x3);
-
-simd_m_ty!(
-    m8x32[i8;32]:
-    x0,
-    x1,
-    x2,
-    x3,
-    x4,
-    x5,
-    x6,
-    x7,
-    x8,
-    x9,
-    x10,
-    x11,
-    x12,
-    x13,
-    x14,
-    x15,
-    x16,
-    x17,
-    x18,
-    x19,
-    x20,
-    x21,
-    x22,
-    x23,
-    x24,
-    x25,
-    x26,
-    x27,
-    x28,
-    x29,
-    x30,
-    x31
-);
-simd_m_ty!(
-    m16x16[i16;16]:
-    x0,
-    x1,
-    x2,
-    x3,
-    x4,
-    x5,
-    x6,
-    x7,
-    x8,
-    x9,
-    x10,
-    x11,
-    x12,
-    x13,
-    x14,
-    x15
-);
-simd_m_ty!(
-    m32x8[i32;8]:
-    x0,
-    x1,
-    x2,
-    x3,
-    x4,
-    x5,
-    x6,
-    x7
-);
+pub(crate) type u8x32 = Simd<u8, 32>;
+pub(crate) type u16x16 = Simd<u16, 16>;
+pub(crate) type u32x8 = Simd<u32, 8>;
+pub(crate) type u64x4 = Simd<u64, 4>;
+
+pub(crate) type i8x32 = Simd<i8, 32>;
+pub(crate) type i16x16 = Simd<i16, 16>;
+pub(crate) type i32x8 = Simd<i32, 8>;
+pub(crate) type i64x4 = Simd<i64, 4>;
+
+pub(crate) type f16x16 = Simd<f16, 16>;
+pub(crate) type f32x8 = Simd<f32, 8>;
+pub(crate) type f64x4 = Simd<f64, 4>;
+
+pub(crate) type m8x32 = SimdM<i8, 32>;
+pub(crate) type m16x16 = SimdM<i16, 16>;
+pub(crate) type m32x8 = SimdM<i32, 8>;
 
 // 512-bit wide types:
 
-simd_ty!(
-    i8x64[i8;64]:
-    x0,
-    x1,
-    x2,
-    x3,
-    x4,
-    x5,
-    x6,
-    x7,
-    x8,
-    x9,
-    x10,
-    x11,
-    x12,
-    x13,
-    x14,
-    x15,
-    x16,
-    x17,
-    x18,
-    x19,
-    x20,
-    x21,
-    x22,
-    x23,
-    x24,
-    x25,
-    x26,
-    x27,
-    x28,
-    x29,
-    x30,
-    x31,
-    x32,
-    x33,
-    x34,
-    x35,
-    x36,
-    x37,
-    x38,
-    x39,
-    x40,
-    x41,
-    x42,
-    x43,
-    x44,
-    x45,
-    x46,
-    x47,
-    x48,
-    x49,
-    x50,
-    x51,
-    x52,
-    x53,
-    x54,
-    x55,
-    x56,
-    x57,
-    x58,
-    x59,
-    x60,
-    x61,
-    x62,
-    x63
-);
-
-simd_ty!(
-    u8x64[u8;64]:
-    x0,
-    x1,
-    x2,
-    x3,
-    x4,
-    x5,
-    x6,
-    x7,
-    x8,
-    x9,
-    x10,
-    x11,
-    x12,
-    x13,
-    x14,
-    x15,
-    x16,
-    x17,
-    x18,
-    x19,
-    x20,
-    x21,
-    x22,
-    x23,
-    x24,
-    x25,
-    x26,
-    x27,
-    x28,
-    x29,
-    x30,
-    x31,
-    x32,
-    x33,
-    x34,
-    x35,
-    x36,
-    x37,
-    x38,
-    x39,
-    x40,
-    x41,
-    x42,
-    x43,
-    x44,
-    x45,
-    x46,
-    x47,
-    x48,
-    x49,
-    x50,
-    x51,
-    x52,
-    x53,
-    x54,
-    x55,
-    x56,
-    x57,
-    x58,
-    x59,
-    x60,
-    x61,
-    x62,
-    x63
-);
-
-simd_ty!(
-    i16x32[i16;32]:
-    x0,
-    x1,
-    x2,
-    x3,
-    x4,
-    x5,
-    x6,
-    x7,
-    x8,
-    x9,
-    x10,
-    x11,
-    x12,
-    x13,
-    x14,
-    x15,
-    x16,
-    x17,
-    x18,
-    x19,
-    x20,
-    x21,
-    x22,
-    x23,
-    x24,
-    x25,
-    x26,
-    x27,
-    x28,
-    x29,
-    x30,
-    x31
-);
-
-simd_ty!(
-    u16x32[u16;32]:
-    x0,
-    x1,
-    x2,
-    x3,
-    x4,
-    x5,
-    x6,
-    x7,
-    x8,
-    x9,
-    x10,
-    x11,
-    x12,
-    x13,
-    x14,
-    x15,
-    x16,
-    x17,
-    x18,
-    x19,
-    x20,
-    x21,
-    x22,
-    x23,
-    x24,
-    x25,
-    x26,
-    x27,
-    x28,
-    x29,
-    x30,
-    x31
-);
-
-simd_ty!(
-    i32x16[i32;16]:
-    x0,
-    x1,
-    x2,
-    x3,
-    x4,
-    x5,
-    x6,
-    x7,
-    x8,
-    x9,
-    x10,
-    x11,
-    x12,
-    x13,
-    x14,
-    x15
-);
-
-simd_ty!(
-    u32x16[u32;16]:
-    x0,
-    x1,
-    x2,
-    x3,
-    x4,
-    x5,
-    x6,
-    x7,
-    x8,
-    x9,
-    x10,
-    x11,
-    x12,
-    x13,
-    x14,
-    x15
-);
-
-simd_ty!(
-    f16x32[f16;32]:
-    x0,
-    x1,
-    x2,
-    x3,
-    x4,
-    x5,
-    x6,
-    x7,
-    x8,
-    x9,
-    x10,
-    x11,
-    x12,
-    x13,
-    x14,
-    x15,
-    x16,
-    x17,
-    x18,
-    x19,
-    x20,
-    x21,
-    x22,
-    x23,
-    x24,
-    x25,
-    x26,
-    x27,
-    x28,
-    x29,
-    x30,
-    x31
-);
-simd_ty!(
-    f32x16[f32;16]:
-    x0,
-    x1,
-    x2,
-    x3,
-    x4,
-    x5,
-    x6,
-    x7,
-    x8,
-    x9,
-    x10,
-    x11,
-    x12,
-    x13,
-    x14,
-    x15
-);
-
-simd_ty!(
-    i64x8[i64;8]:
-    x0,
-    x1,
-    x2,
-    x3,
-    x4,
-    x5,
-    x6,
-    x7
-);
-
-simd_ty!(
-    u64x8[u64;8]:
-    x0,
-    x1,
-    x2,
-    x3,
-    x4,
-    x5,
-    x6,
-    x7
-);
-
-simd_ty!(
-    f64x8[f64;8]:
-    x0,
-    x1,
-    x2,
-    x3,
-    x4,
-    x5,
-    x6,
-    x7
-);
+pub(crate) type u8x64 = Simd<u8, 64>;
+pub(crate) type u16x32 = Simd<u16, 32>;
+pub(crate) type u32x16 = Simd<u32, 16>;
+pub(crate) type u64x8 = Simd<u64, 8>;
+
+pub(crate) type i8x64 = Simd<i8, 64>;
+pub(crate) type i16x32 = Simd<i16, 32>;
+pub(crate) type i32x16 = Simd<i32, 16>;
+pub(crate) type i64x8 = Simd<i64, 8>;
+
+pub(crate) type f16x32 = Simd<f16, 32>;
+pub(crate) type f32x16 = Simd<f32, 16>;
+pub(crate) type f64x8 = Simd<f64, 8>;
 
 // 1024-bit wide types:
-simd_ty!(
-    u16x64[u16;64]:
-    x0,
-    x1,
-    x2,
-    x3,
-    x4,
-    x5,
-    x6,
-    x7,
-    x8,
-    x9,
-    x10,
-    x11,
-    x12,
-    x13,
-    x14,
-    x15,
-    x16,
-    x17,
-    x18,
-    x19,
-    x20,
-    x21,
-    x22,
-    x23,
-    x24,
-    x25,
-    x26,
-    x27,
-    x28,
-    x29,
-    x30,
-    x31,
-    x32,
-    x33,
-    x34,
-    x35,
-    x36,
-    x37,
-    x38,
-    x39,
-    x40,
-    x41,
-    x42,
-    x43,
-    x44,
-    x45,
-    x46,
-    x47,
-    x48,
-    x49,
-    x50,
-    x51,
-    x52,
-    x53,
-    x54,
-    x55,
-    x56,
-    x57,
-    x58,
-    x59,
-    x60,
-    x61,
-    x62,
-    x63
-);
-simd_ty!(
-    i32x32[i32;32]:
-    x0,
-    x1,
-    x2,
-    x3,
-    x4,
-    x5,
-    x6,
-    x7,
-    x8,
-    x9,
-    x10,
-    x11,
-    x12,
-    x13,
-    x14,
-    x15,
-    x16,
-    x17,
-    x18,
-    x19,
-    x20,
-    x21,
-    x22,
-    x23,
-    x24,
-    x25,
-    x26,
-    x27,
-    x28,
-    x29,
-    x30,
-    x31
-);
-simd_ty!(
-    u32x32[u32;32]:
-    x0,
-    x1,
-    x2,
-    x3,
-    x4,
-    x5,
-    x6,
-    x7,
-    x8,
-    x9,
-    x10,
-    x11,
-    x12,
-    x13,
-    x14,
-    x15,
-    x16,
-    x17,
-    x18,
-    x19,
-    x20,
-    x21,
-    x22,
-    x23,
-    x24,
-    x25,
-    x26,
-    x27,
-    x28,
-    x29,
-    x30,
-    x31
-);
+
+pub(crate) type u16x64 = Simd<u16, 64>;
+pub(crate) type u32x32 = Simd<u32, 32>;
+
+pub(crate) type i32x32 = Simd<i32, 32>;
 
 /// Used to continue `Debug`ging SIMD types as `MySimd(1, 2, 3, 4)`, as they
 /// were before moving to array-based simd.
diff --git a/crates/core_arch/src/test.rs b/crates/core_arch/src/test.rs
new file mode 100644
index 0000000000..976d4ac1b0
--- /dev/null
+++ b/crates/core_arch/src/test.rs
@@ -0,0 +1,25 @@
+use crate::fmt::Debug;
+
+#[track_caller]
+#[allow(unused)]
+pub(crate) fn assert_eq_rt<T: PartialEq + Debug>(a: &T, b: &T) {
+    std::assert_eq!(a, b)
+}
+
+#[allow(unused)]
+macro_rules! assert_eq_const {
+    ($a:expr, $b:expr $(,)?) => {{
+        #[inline(always)]
+        #[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
+        const fn assert_eq_ct<T: [const] PartialEq>(a: &T, b: &T) {
+            assert!(a == b, concat!("`", stringify!($a), "` != `", stringify!($b), "`"));
+        }
+
+        $crate::intrinsics::const_eval_select((&$a, &$b), assert_eq_ct, $crate::core_arch::test::assert_eq_rt);
+    }};
+    ($a:expr, $b:expr, $($t:tt)+) => {
+        ::std::assert_eq!($a, $b, $($t)+)
+    };
+}
+
+pub(crate) use assert_eq_const;
diff --git a/crates/core_arch/src/wasm32/mod.rs b/crates/core_arch/src/wasm32/mod.rs
index 82674a0d0b..57c9157bed 100644
--- a/crates/core_arch/src/wasm32/mod.rs
+++ b/crates/core_arch/src/wasm32/mod.rs
@@ -173,32 +173,3 @@ pub fn f64_nearest(a: f64) -> f64 {
 pub fn f64_sqrt(a: f64) -> f64 {
     crate::intrinsics::sqrtf64(a)
 }
-
-unsafe extern "C-unwind" {
-    #[link_name = "llvm.wasm.throw"]
-    fn wasm_throw(tag: i32, ptr: *mut u8) -> !;
-}
-
-/// Generates the [`throw`] instruction from the [exception-handling proposal] for WASM.
-///
-/// This function is unlikely to be stabilized until codegen backends have better support.
-///
-/// [`throw`]: https://webassembly.github.io/exception-handling/core/syntax/instructions.html#syntax-instr-control
-/// [exception-handling proposal]: https://github.com/WebAssembly/exception-handling
-#[cfg_attr(test, assert_instr(throw, TAG = 0, ptr = core::ptr::null_mut()))]
-#[inline]
-#[unstable(feature = "wasm_exception_handling_intrinsics", issue = "122465")]
-// FIXME: Since this instruction unwinds, `core` built with `-C panic=unwind`
-//        cannot be linked with `-C panic=abort` programs. But that's not
-//        entirely supported anyway, because runtimes without EH support won't
-//        be able to handle `try` blocks in `-C panic=unwind` crates either.
-//        We ship `-C panic=abort` `core`, so this doesn't affect users
-//        directly. Resolving this will likely require patching out both `try`
-//        and `throw` instructions, at which point we can look into whitelisting
-//        this function in the compiler to allow linking.
-//        See https://github.com/rust-lang/rust/issues/118168.
-#[allow(ffi_unwind_calls)]
-pub unsafe fn throw<const TAG: i32>(ptr: *mut u8) -> ! {
-    static_assert!(TAG == 0); // LLVM only supports tag 0 == C++ right now.
-    wasm_throw(TAG, ptr)
-}
diff --git a/crates/core_arch/src/wasm32/relaxed_simd.rs b/crates/core_arch/src/wasm32/relaxed_simd.rs
index a9b7e9c04d..8a9d46984d 100644
--- a/crates/core_arch/src/wasm32/relaxed_simd.rs
+++ b/crates/core_arch/src/wasm32/relaxed_simd.rs
@@ -248,8 +248,10 @@ pub fn i64x2_relaxed_laneselect(a: v128, b: v128, m: v128) -> v128 {
 #[stable(feature = "stdarch_wasm_relaxed_simd", since = "1.82.0")]
 pub use i64x2_relaxed_laneselect as u64x2_relaxed_laneselect;
 
-/// A relaxed version of `f32x4_min` which is either `f32x4_min` or
-/// `f32x4_pmin`.
+/// A relaxed version of `f32x4_min` which has implementation-specific behavior
+/// when its operands are NaN or signed zeroes. For more information, see [the
+/// WebAssembly
+/// specification](https://webassembly.github.io/spec/core/exec/numerics.html#op-frelaxed-min).
 #[inline]
 #[cfg_attr(test, assert_instr(f32x4.relaxed_min))]
 #[target_feature(enable = "relaxed-simd")]
@@ -259,8 +261,10 @@ pub fn f32x4_relaxed_min(a: v128, b: v128) -> v128 {
     unsafe { llvm_f32x4_relaxed_min(a.as_f32x4(), b.as_f32x4()).v128() }
 }
 
-/// A relaxed version of `f32x4_max` which is either `f32x4_max` or
-/// `f32x4_pmax`.
+/// A relaxed version of `f32x4_max` which has implementation-specific behavior
+/// when its operands are NaN or signed zeroes. For more information, see [the
+/// WebAssembly
+/// specification](https://webassembly.github.io/spec/core/exec/numerics.html#op-frelaxed-max).
 #[inline]
 #[cfg_attr(test, assert_instr(f32x4.relaxed_max))]
 #[target_feature(enable = "relaxed-simd")]
@@ -270,8 +274,10 @@ pub fn f32x4_relaxed_max(a: v128, b: v128) -> v128 {
     unsafe { llvm_f32x4_relaxed_max(a.as_f32x4(), b.as_f32x4()).v128() }
 }
 
-/// A relaxed version of `f64x2_min` which is either `f64x2_min` or
-/// `f64x2_pmin`.
+/// A relaxed version of `f64x2_min` which has implementation-specific behavior
+/// when its operands are NaN or signed zeroes. For more information, see [the
+/// WebAssembly
+/// specification](https://webassembly.github.io/spec/core/exec/numerics.html#op-frelaxed-min).
 #[inline]
 #[cfg_attr(test, assert_instr(f64x2.relaxed_min))]
 #[target_feature(enable = "relaxed-simd")]
@@ -281,8 +287,10 @@ pub fn f64x2_relaxed_min(a: v128, b: v128) -> v128 {
     unsafe { llvm_f64x2_relaxed_min(a.as_f64x2(), b.as_f64x2()).v128() }
 }
 
-/// A relaxed version of `f64x2_max` which is either `f64x2_max` or
-/// `f64x2_pmax`.
+/// A relaxed version of `f64x2_max` which has implementation-specific behavior
+/// when its operands are NaN or signed zeroes. For more information, see [the
+/// WebAssembly
+/// specification](https://webassembly.github.io/spec/core/exec/numerics.html#op-frelaxed-max).
 #[inline]
 #[cfg_attr(test, assert_instr(f64x2.relaxed_max))]
 #[target_feature(enable = "relaxed-simd")]
diff --git a/crates/core_arch/src/wasm32/simd128.rs b/crates/core_arch/src/wasm32/simd128.rs
index c864d6a516..e1a3754965 100644
--- a/crates/core_arch/src/wasm32/simd128.rs
+++ b/crates/core_arch/src/wasm32/simd128.rs
@@ -86,10 +86,6 @@ unsafe extern "unadjusted" {
     fn llvm_i8x16_all_true(x: simd::i8x16) -> i32;
     #[link_name = "llvm.wasm.bitmask.v16i8"]
     fn llvm_bitmask_i8x16(a: simd::i8x16) -> i32;
-    #[link_name = "llvm.wasm.narrow.signed.v16i8.v8i16"]
-    fn llvm_narrow_i8x16_s(a: simd::i16x8, b: simd::i16x8) -> simd::i8x16;
-    #[link_name = "llvm.wasm.narrow.unsigned.v16i8.v8i16"]
-    fn llvm_narrow_i8x16_u(a: simd::i16x8, b: simd::i16x8) -> simd::i8x16;
     #[link_name = "llvm.wasm.avgr.unsigned.v16i8"]
     fn llvm_avgr_u_i8x16(a: simd::i8x16, b: simd::i8x16) -> simd::i8x16;
 
@@ -103,10 +99,6 @@ unsafe extern "unadjusted" {
     fn llvm_i16x8_all_true(x: simd::i16x8) -> i32;
     #[link_name = "llvm.wasm.bitmask.v8i16"]
     fn llvm_bitmask_i16x8(a: simd::i16x8) -> i32;
-    #[link_name = "llvm.wasm.narrow.signed.v8i16.v4i32"]
-    fn llvm_narrow_i16x8_s(a: simd::i32x4, b: simd::i32x4) -> simd::i16x8;
-    #[link_name = "llvm.wasm.narrow.unsigned.v8i16.v4i32"]
-    fn llvm_narrow_i16x8_u(a: simd::i32x4, b: simd::i32x4) -> simd::i16x8;
     #[link_name = "llvm.wasm.avgr.unsigned.v8i16"]
     fn llvm_avgr_u_i16x8(a: simd::i16x8, b: simd::i16x8) -> simd::i16x8;
 
@@ -2281,7 +2273,23 @@ pub use i8x16_bitmask as u8x16_bitmask;
 #[doc(alias("i8x16.narrow_i16x8_s"))]
 #[stable(feature = "wasm_simd", since = "1.54.0")]
 pub fn i8x16_narrow_i16x8(a: v128, b: v128) -> v128 {
-    unsafe { llvm_narrow_i8x16_s(a.as_i16x8(), b.as_i16x8()).v128() }
+    unsafe {
+        let v: simd::i16x16 = simd_shuffle!(
+            a.as_i16x8(),
+            b.as_i16x8(),
+            [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
+        );
+
+        let max = simd_splat(i16::from(i8::MAX));
+        let min = simd_splat(i16::from(i8::MIN));
+
+        let v = simd_select(simd_gt::<_, simd::i16x16>(v, max), max, v);
+        let v = simd_select(simd_lt::<_, simd::i16x16>(v, min), min, v);
+
+        let v: simd::i8x16 = simd_cast(v);
+
+        v.v128()
+    }
 }
 
 /// Converts two input vectors into a smaller lane vector by narrowing each
@@ -2295,7 +2303,23 @@ pub fn i8x16_narrow_i16x8(a: v128, b: v128) -> v128 {
 #[doc(alias("i8x16.narrow_i16x8_u"))]
 #[stable(feature = "wasm_simd", since = "1.54.0")]
 pub fn u8x16_narrow_i16x8(a: v128, b: v128) -> v128 {
-    unsafe { llvm_narrow_i8x16_u(a.as_i16x8(), b.as_i16x8()).v128() }
+    unsafe {
+        let v: simd::i16x16 = simd_shuffle!(
+            a.as_i16x8(),
+            b.as_i16x8(),
+            [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
+        );
+
+        let max = simd_splat(i16::from(u8::MAX));
+        let min = simd_splat(i16::from(u8::MIN));
+
+        let v = simd_select(simd_gt::<_, simd::i16x16>(v, max), max, v);
+        let v = simd_select(simd_lt::<_, simd::i16x16>(v, min), min, v);
+
+        let v: simd::u8x16 = simd_cast(v);
+
+        v.v128()
+    }
 }
 
 /// Shifts each lane to the left by the specified number of bits.
@@ -2593,7 +2617,19 @@ pub use i16x8_bitmask as u16x8_bitmask;
 #[doc(alias("i16x8.narrow_i32x4_s"))]
 #[stable(feature = "wasm_simd", since = "1.54.0")]
 pub fn i16x8_narrow_i32x4(a: v128, b: v128) -> v128 {
-    unsafe { llvm_narrow_i16x8_s(a.as_i32x4(), b.as_i32x4()).v128() }
+    unsafe {
+        let v: simd::i32x8 = simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]);
+
+        let max = simd_splat(i32::from(i16::MAX));
+        let min = simd_splat(i32::from(i16::MIN));
+
+        let v = simd_select(simd_gt::<_, simd::i32x8>(v, max), max, v);
+        let v = simd_select(simd_lt::<_, simd::i32x8>(v, min), min, v);
+
+        let v: simd::i16x8 = simd_cast(v);
+
+        v.v128()
+    }
 }
 
 /// Converts two input vectors into a smaller lane vector by narrowing each
@@ -2607,7 +2643,19 @@ pub fn i16x8_narrow_i32x4(a: v128, b: v128) -> v128 {
 #[doc(alias("i16x8.narrow_i32x4_u"))]
 #[stable(feature = "wasm_simd", since = "1.54.0")]
 pub fn u16x8_narrow_i32x4(a: v128, b: v128) -> v128 {
-    unsafe { llvm_narrow_i16x8_u(a.as_i32x4(), b.as_i32x4()).v128() }
+    unsafe {
+        let v: simd::i32x8 = simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]);
+
+        let max = simd_splat(i32::from(u16::MAX));
+        let min = simd_splat(i32::from(u16::MIN));
+
+        let v = simd_select(simd_gt::<_, simd::i32x8>(v, max), max, v);
+        let v = simd_select(simd_lt::<_, simd::i32x8>(v, min), min, v);
+
+        let v: simd::u16x8 = simd_cast(v);
+
+        v.v128()
+    }
 }
 
 /// Converts low half of the smaller lane vector to a larger lane
diff --git a/crates/core_arch/src/x86/abm.rs b/crates/core_arch/src/x86/abm.rs
index e6d5517600..078c0c5980 100644
--- a/crates/core_arch/src/x86/abm.rs
+++ b/crates/core_arch/src/x86/abm.rs
@@ -12,8 +12,8 @@
 //! [Wikipedia][wikipedia_bmi] provides a quick overview of the instructions
 //! available.
 //!
-//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
-//! [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf
+//! [intel64_ref]: https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
+//! [amd64_ref]: https://docs.amd.com/v/u/en-US/24594_3.37
 //! [wikipedia_bmi]:
 //! https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#ABM_.28Advanced_Bit_Manipulation.29
 
@@ -29,7 +29,8 @@ use stdarch_test::assert_instr;
 #[target_feature(enable = "lzcnt")]
 #[cfg_attr(test, assert_instr(lzcnt))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _lzcnt_u32(x: u32) -> u32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _lzcnt_u32(x: u32) -> u32 {
     x.leading_zeros()
 }
 
@@ -40,23 +41,25 @@ pub fn _lzcnt_u32(x: u32) -> u32 {
 #[target_feature(enable = "popcnt")]
 #[cfg_attr(test, assert_instr(popcnt))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _popcnt32(x: i32) -> i32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _popcnt32(x: i32) -> i32 {
     x.count_ones() as i32
 }
 
 #[cfg(test)]
 mod tests {
+    use crate::core_arch::assert_eq_const as assert_eq;
     use stdarch_test::simd_test;
 
     use crate::core_arch::x86::*;
 
     #[simd_test(enable = "lzcnt")]
-    unsafe fn test_lzcnt_u32() {
+    const fn test_lzcnt_u32() {
         assert_eq!(_lzcnt_u32(0b0101_1010), 25);
     }
 
     #[simd_test(enable = "popcnt")]
-    unsafe fn test_popcnt32() {
+    const fn test_popcnt32() {
         assert_eq!(_popcnt32(0b0101_1010), 4);
     }
 }
diff --git a/crates/core_arch/src/x86/aes.rs b/crates/core_arch/src/x86/aes.rs
index 7db743b2cc..d07ab4dc2a 100644
--- a/crates/core_arch/src/x86/aes.rs
+++ b/crates/core_arch/src/x86/aes.rs
@@ -5,7 +5,7 @@
 //! The reference is [Intel 64 and IA-32 Architectures Software Developer's
 //! Manual Volume 2: Instruction Set Reference, A-Z][intel64_ref].
 //!
-//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
+//! [intel64_ref]: https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
 
 use crate::core_arch::x86::__m128i;
 
@@ -112,7 +112,7 @@ mod tests {
     use crate::core_arch::x86::*;
 
     #[simd_test(enable = "aes")]
-    unsafe fn test_mm_aesdec_si128() {
+    fn test_mm_aesdec_si128() {
         // Constants taken from https://msdn.microsoft.com/en-us/library/cc664949.aspx.
         let a = _mm_set_epi64x(0x0123456789abcdef, 0x8899aabbccddeeff);
         let k = _mm_set_epi64x(0x1133557799bbddff, 0x0022446688aaccee);
@@ -122,7 +122,7 @@ mod tests {
     }
 
     #[simd_test(enable = "aes")]
-    unsafe fn test_mm_aesdeclast_si128() {
+    fn test_mm_aesdeclast_si128() {
         // Constants taken from https://msdn.microsoft.com/en-us/library/cc714178.aspx.
         let a = _mm_set_epi64x(0x0123456789abcdef, 0x8899aabbccddeeff);
         let k = _mm_set_epi64x(0x1133557799bbddff, 0x0022446688aaccee);
@@ -132,7 +132,7 @@ mod tests {
     }
 
     #[simd_test(enable = "aes")]
-    unsafe fn test_mm_aesenc_si128() {
+    fn test_mm_aesenc_si128() {
         // Constants taken from https://msdn.microsoft.com/en-us/library/cc664810.aspx.
         let a = _mm_set_epi64x(0x0123456789abcdef, 0x8899aabbccddeeff);
         let k = _mm_set_epi64x(0x1133557799bbddff, 0x0022446688aaccee);
@@ -142,7 +142,7 @@ mod tests {
     }
 
     #[simd_test(enable = "aes")]
-    unsafe fn test_mm_aesenclast_si128() {
+    fn test_mm_aesenclast_si128() {
         // Constants taken from https://msdn.microsoft.com/en-us/library/cc714136.aspx.
         let a = _mm_set_epi64x(0x0123456789abcdef, 0x8899aabbccddeeff);
         let k = _mm_set_epi64x(0x1133557799bbddff, 0x0022446688aaccee);
@@ -152,7 +152,7 @@ mod tests {
     }
 
     #[simd_test(enable = "aes")]
-    unsafe fn test_mm_aesimc_si128() {
+    fn test_mm_aesimc_si128() {
         // Constants taken from https://msdn.microsoft.com/en-us/library/cc714195.aspx.
         let a = _mm_set_epi64x(0x0123456789abcdef, 0x8899aabbccddeeff);
         let e = _mm_set_epi64x(0xc66c82284ee40aa0, 0x6633441122770055);
@@ -161,7 +161,7 @@ mod tests {
     }
 
     #[simd_test(enable = "aes")]
-    unsafe fn test_mm_aeskeygenassist_si128() {
+    fn test_mm_aeskeygenassist_si128() {
         // Constants taken from https://msdn.microsoft.com/en-us/library/cc714138.aspx.
         let a = _mm_set_epi64x(0x0123456789abcdef, 0x8899aabbccddeeff);
         let e = _mm_set_epi64x(0x857c266b7c266e85, 0xeac4eea9c4eeacea);
diff --git a/crates/core_arch/src/x86/avx.rs b/crates/core_arch/src/x86/avx.rs
index c2c2febf18..ef434205b5 100644
--- a/crates/core_arch/src/x86/avx.rs
+++ b/crates/core_arch/src/x86/avx.rs
@@ -9,8 +9,8 @@
 //!
 //! [Wikipedia][wiki] provides a quick overview of the instructions available.
 //!
-//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
-//! [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf
+//! [intel64_ref]: https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
+//! [amd64_ref]: https://docs.amd.com/v/u/en-US/24594_3.37
 //! [wiki]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions
 
 use crate::{
@@ -30,7 +30,8 @@ use stdarch_test::assert_instr;
 #[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vaddpd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_add_pd(a: __m256d, b: __m256d) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_add_pd(a: __m256d, b: __m256d) -> __m256d {
     unsafe { simd_add(a, b) }
 }
 
@@ -42,7 +43,8 @@ pub fn _mm256_add_pd(a: __m256d, b: __m256d) -> __m256d {
 #[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vaddps))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_add_ps(a: __m256, b: __m256) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_add_ps(a: __m256, b: __m256) -> __m256 {
     unsafe { simd_add(a, b) }
 }
 
@@ -55,7 +57,8 @@ pub fn _mm256_add_ps(a: __m256, b: __m256) -> __m256 {
 // See https://github.com/rust-lang/stdarch/issues/71
 #[cfg_attr(test, assert_instr(vandp))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_and_pd(a: __m256d, b: __m256d) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_and_pd(a: __m256d, b: __m256d) -> __m256d {
     unsafe {
         let a: u64x4 = transmute(a);
         let b: u64x4 = transmute(b);
@@ -71,7 +74,8 @@ pub fn _mm256_and_pd(a: __m256d, b: __m256d) -> __m256d {
 #[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vandps))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_and_ps(a: __m256, b: __m256) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_and_ps(a: __m256, b: __m256) -> __m256 {
     unsafe {
         let a: u32x8 = transmute(a);
         let b: u32x8 = transmute(b);
@@ -88,7 +92,8 @@ pub fn _mm256_and_ps(a: __m256, b: __m256) -> __m256 {
 // See <https://github.com/rust-lang/stdarch/issues/71>.
 #[cfg_attr(test, assert_instr(vorp))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_or_pd(a: __m256d, b: __m256d) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_or_pd(a: __m256d, b: __m256d) -> __m256d {
     unsafe {
         let a: u64x4 = transmute(a);
         let b: u64x4 = transmute(b);
@@ -104,7 +109,8 @@ pub fn _mm256_or_pd(a: __m256d, b: __m256d) -> __m256d {
 #[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vorps))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_or_ps(a: __m256, b: __m256) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_or_ps(a: __m256, b: __m256) -> __m256 {
     unsafe {
         let a: u32x8 = transmute(a);
         let b: u32x8 = transmute(b);
@@ -121,7 +127,8 @@ pub fn _mm256_or_ps(a: __m256, b: __m256) -> __m256 {
 #[cfg_attr(test, assert_instr(vshufpd, MASK = 3))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_shuffle_pd<const MASK: i32>(a: __m256d, b: __m256d) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_shuffle_pd<const MASK: i32>(a: __m256d, b: __m256d) -> __m256d {
     static_assert_uimm_bits!(MASK, 8);
     unsafe {
         simd_shuffle!(
@@ -146,7 +153,8 @@ pub fn _mm256_shuffle_pd<const MASK: i32>(a: __m256d, b: __m256d) -> __m256d {
 #[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_shuffle_ps<const MASK: i32>(a: __m256, b: __m256) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_shuffle_ps<const MASK: i32>(a: __m256, b: __m256) -> __m256 {
     static_assert_uimm_bits!(MASK, 8);
     unsafe {
         simd_shuffle!(
@@ -174,7 +182,8 @@ pub fn _mm256_shuffle_ps<const MASK: i32>(a: __m256, b: __m256) -> __m256 {
 #[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vandnp))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_andnot_pd(a: __m256d, b: __m256d) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_andnot_pd(a: __m256d, b: __m256d) -> __m256d {
     unsafe {
         let a: u64x4 = transmute(a);
         let b: u64x4 = transmute(b);
@@ -191,7 +200,8 @@ pub fn _mm256_andnot_pd(a: __m256d, b: __m256d) -> __m256d {
 #[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vandnps))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_andnot_ps(a: __m256, b: __m256) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_andnot_ps(a: __m256, b: __m256) -> __m256 {
     unsafe {
         let a: u32x8 = transmute(a);
         let b: u32x8 = transmute(b);
@@ -255,7 +265,8 @@ pub fn _mm256_min_ps(a: __m256, b: __m256) -> __m256 {
 #[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vmulpd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_mul_pd(a: __m256d, b: __m256d) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mul_pd(a: __m256d, b: __m256d) -> __m256d {
     unsafe { simd_mul(a, b) }
 }
 
@@ -267,7 +278,8 @@ pub fn _mm256_mul_pd(a: __m256d, b: __m256d) -> __m256d {
 #[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vmulps))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_mul_ps(a: __m256, b: __m256) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mul_ps(a: __m256, b: __m256) -> __m256 {
     unsafe { simd_mul(a, b) }
 }
 
@@ -279,7 +291,8 @@ pub fn _mm256_mul_ps(a: __m256, b: __m256) -> __m256 {
 #[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vaddsubpd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_addsub_pd(a: __m256d, b: __m256d) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_addsub_pd(a: __m256d, b: __m256d) -> __m256d {
     unsafe {
         let a = a.as_f64x4();
         let b = b.as_f64x4();
@@ -297,7 +310,8 @@ pub fn _mm256_addsub_pd(a: __m256d, b: __m256d) -> __m256d {
 #[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vaddsubps))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_addsub_ps(a: __m256, b: __m256) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_addsub_ps(a: __m256, b: __m256) -> __m256 {
     unsafe {
         let a = a.as_f32x8();
         let b = b.as_f32x8();
@@ -315,7 +329,8 @@ pub fn _mm256_addsub_ps(a: __m256, b: __m256) -> __m256 {
 #[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vsubpd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_sub_pd(a: __m256d, b: __m256d) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_sub_pd(a: __m256d, b: __m256d) -> __m256d {
     unsafe { simd_sub(a, b) }
 }
 
@@ -327,7 +342,8 @@ pub fn _mm256_sub_pd(a: __m256d, b: __m256d) -> __m256d {
 #[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vsubps))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_sub_ps(a: __m256, b: __m256) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_sub_ps(a: __m256, b: __m256) -> __m256 {
     unsafe { simd_sub(a, b) }
 }
 
@@ -339,7 +355,8 @@ pub fn _mm256_sub_ps(a: __m256, b: __m256) -> __m256 {
 #[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vdivps))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_div_ps(a: __m256, b: __m256) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_div_ps(a: __m256, b: __m256) -> __m256 {
     unsafe { simd_div(a, b) }
 }
 
@@ -351,7 +368,8 @@ pub fn _mm256_div_ps(a: __m256, b: __m256) -> __m256 {
 #[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vdivpd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_div_pd(a: __m256d, b: __m256d) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_div_pd(a: __m256d, b: __m256d) -> __m256d {
     unsafe { simd_div(a, b) }
 }
 
@@ -386,7 +404,8 @@ pub fn _mm256_round_pd<const ROUNDING: i32>(a: __m256d) -> __m256d {
 #[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vroundpd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_ceil_pd(a: __m256d) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_ceil_pd(a: __m256d) -> __m256d {
     unsafe { simd_ceil(a) }
 }
 
@@ -398,7 +417,8 @@ pub fn _mm256_ceil_pd(a: __m256d) -> __m256d {
 #[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vroundpd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_floor_pd(a: __m256d) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_floor_pd(a: __m256d) -> __m256d {
     unsafe { simd_floor(a) }
 }
 
@@ -433,7 +453,8 @@ pub fn _mm256_round_ps<const ROUNDING: i32>(a: __m256) -> __m256 {
 #[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vroundps))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_ceil_ps(a: __m256) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_ceil_ps(a: __m256) -> __m256 {
     unsafe { simd_ceil(a) }
 }
 
@@ -445,7 +466,8 @@ pub fn _mm256_ceil_ps(a: __m256) -> __m256 {
 #[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vroundps))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_floor_ps(a: __m256) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_floor_ps(a: __m256) -> __m256 {
     unsafe { simd_floor(a) }
 }
 
@@ -485,7 +507,8 @@ pub fn _mm256_sqrt_pd(a: __m256d) -> __m256d {
 #[cfg_attr(test, assert_instr(vblendps, IMM4 = 9))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_blend_pd<const IMM4: i32>(a: __m256d, b: __m256d) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_blend_pd<const IMM4: i32>(a: __m256d, b: __m256d) -> __m256d {
     static_assert_uimm_bits!(IMM4, 4);
     unsafe {
         simd_shuffle!(
@@ -510,7 +533,8 @@ pub fn _mm256_blend_pd<const IMM4: i32>(a: __m256d, b: __m256d) -> __m256d {
 #[cfg_attr(test, assert_instr(vblendps, IMM8 = 9))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_blend_ps<const IMM8: i32>(a: __m256, b: __m256) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_blend_ps<const IMM8: i32>(a: __m256, b: __m256) -> __m256 {
     static_assert_uimm_bits!(IMM8, 8);
     unsafe {
         simd_shuffle!(
@@ -538,7 +562,8 @@ pub fn _mm256_blend_ps<const IMM8: i32>(a: __m256, b: __m256) -> __m256 {
 #[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vblendvpd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_blendv_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_blendv_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
     unsafe {
         let mask: i64x4 = simd_lt(transmute::<_, i64x4>(c), i64x4::ZERO);
         transmute(simd_select(mask, b.as_f64x4(), a.as_f64x4()))
@@ -553,7 +578,8 @@ pub fn _mm256_blendv_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
 #[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vblendvps))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_blendv_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_blendv_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
     unsafe {
         let mask: i32x8 = simd_lt(transmute::<_, i32x8>(c), i32x8::ZERO);
         transmute(simd_select(mask, b.as_f32x8(), a.as_f32x8()))
@@ -586,7 +612,8 @@ pub fn _mm256_dp_ps<const IMM8: i32>(a: __m256, b: __m256) -> __m256 {
 #[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vhaddpd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_hadd_pd(a: __m256d, b: __m256d) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_hadd_pd(a: __m256d, b: __m256d) -> __m256d {
     unsafe {
         let even = simd_shuffle!(a, b, [0, 4, 2, 6]);
         let odd = simd_shuffle!(a, b, [1, 5, 3, 7]);
@@ -605,7 +632,8 @@ pub fn _mm256_hadd_pd(a: __m256d, b: __m256d) -> __m256d {
 #[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vhaddps))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_hadd_ps(a: __m256, b: __m256) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_hadd_ps(a: __m256, b: __m256) -> __m256 {
     unsafe {
         let even = simd_shuffle!(a, b, [0, 2, 8, 10, 4, 6, 12, 14]);
         let odd = simd_shuffle!(a, b, [1, 3, 9, 11, 5, 7, 13, 15]);
@@ -623,7 +651,8 @@ pub fn _mm256_hadd_ps(a: __m256, b: __m256) -> __m256 {
 #[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vhsubpd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_hsub_pd(a: __m256d, b: __m256d) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_hsub_pd(a: __m256d, b: __m256d) -> __m256d {
     unsafe {
         let even = simd_shuffle!(a, b, [0, 4, 2, 6]);
         let odd = simd_shuffle!(a, b, [1, 5, 3, 7]);
@@ -642,7 +671,8 @@ pub fn _mm256_hsub_pd(a: __m256d, b: __m256d) -> __m256d {
 #[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vhsubps))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_hsub_ps(a: __m256, b: __m256) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_hsub_ps(a: __m256, b: __m256) -> __m256 {
     unsafe {
         let even = simd_shuffle!(a, b, [0, 2, 8, 10, 4, 6, 12, 14]);
         let odd = simd_shuffle!(a, b, [1, 3, 9, 11, 5, 7, 13, 15]);
@@ -658,7 +688,8 @@ pub fn _mm256_hsub_ps(a: __m256, b: __m256) -> __m256 {
 #[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vxorp))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_xor_pd(a: __m256d, b: __m256d) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_xor_pd(a: __m256d, b: __m256d) -> __m256d {
     unsafe {
         let a: u64x4 = transmute(a);
         let b: u64x4 = transmute(b);
@@ -674,7 +705,8 @@ pub fn _mm256_xor_pd(a: __m256d, b: __m256d) -> __m256d {
 #[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vxorps))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_xor_ps(a: __m256, b: __m256) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_xor_ps(a: __m256, b: __m256) -> __m256 {
     unsafe {
         let a: u32x8 = transmute(a);
         let b: u32x8 = transmute(b);
@@ -881,7 +913,8 @@ pub fn _mm_cmp_ss<const IMM5: i32>(a: __m128, b: __m128) -> __m128 {
 #[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vcvtdq2pd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_cvtepi32_pd(a: __m128i) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_cvtepi32_pd(a: __m128i) -> __m256d {
     unsafe { simd_cast(a.as_i32x4()) }
 }
 
@@ -893,7 +926,8 @@ pub fn _mm256_cvtepi32_pd(a: __m128i) -> __m256d {
 #[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vcvtdq2ps))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_cvtepi32_ps(a: __m256i) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_cvtepi32_ps(a: __m256i) -> __m256 {
     unsafe { simd_cast(a.as_i32x8()) }
 }
 
@@ -905,7 +939,8 @@ pub fn _mm256_cvtepi32_ps(a: __m256i) -> __m256 {
 #[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vcvtpd2ps))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_cvtpd_ps(a: __m256d) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_cvtpd_ps(a: __m256d) -> __m128 {
     unsafe { simd_cast(a) }
 }
 
@@ -929,7 +964,8 @@ pub fn _mm256_cvtps_epi32(a: __m256) -> __m256i {
 #[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vcvtps2pd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_cvtps_pd(a: __m128) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_cvtps_pd(a: __m128) -> __m256d {
     unsafe { simd_cast(a) }
 }
 
@@ -940,7 +976,8 @@ pub fn _mm256_cvtps_pd(a: __m128) -> __m256d {
 #[target_feature(enable = "avx")]
 //#[cfg_attr(test, assert_instr(movsd))] FIXME
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_cvtsd_f64(a: __m256d) -> f64 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_cvtsd_f64(a: __m256d) -> f64 {
     unsafe { simd_extract!(a, 0) }
 }
 
@@ -989,7 +1026,8 @@ pub fn _mm256_cvttps_epi32(a: __m256) -> __m256i {
 #[cfg_attr(test, assert_instr(vextractf128, IMM1 = 1))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_extractf128_ps<const IMM1: i32>(a: __m256) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_extractf128_ps<const IMM1: i32>(a: __m256) -> __m128 {
     static_assert_uimm_bits!(IMM1, 1);
     unsafe {
         simd_shuffle!(
@@ -1009,7 +1047,8 @@ pub fn _mm256_extractf128_ps<const IMM1: i32>(a: __m256) -> __m128 {
 #[cfg_attr(test, assert_instr(vextractf128, IMM1 = 1))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_extractf128_pd<const IMM1: i32>(a: __m256d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_extractf128_pd<const IMM1: i32>(a: __m256d) -> __m128d {
     static_assert_uimm_bits!(IMM1, 1);
     unsafe { simd_shuffle!(a, _mm256_undefined_pd(), [[0, 1], [2, 3]][IMM1 as usize]) }
 }
@@ -1022,7 +1061,8 @@ pub fn _mm256_extractf128_pd<const IMM1: i32>(a: __m256d) -> __m128d {
 #[cfg_attr(test, assert_instr(vextractf128, IMM1 = 1))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_extractf128_si256<const IMM1: i32>(a: __m256i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_extractf128_si256<const IMM1: i32>(a: __m256i) -> __m128i {
     static_assert_uimm_bits!(IMM1, 1);
     unsafe {
         let dst: i64x2 = simd_shuffle!(a.as_i64x4(), i64x4::ZERO, [[0, 1], [2, 3]][IMM1 as usize],);
@@ -1038,7 +1078,8 @@ pub fn _mm256_extractf128_si256<const IMM1: i32>(a: __m256i) -> __m128i {
 // This intrinsic has no corresponding instruction.
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_extract_epi32<const INDEX: i32>(a: __m256i) -> i32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_extract_epi32<const INDEX: i32>(a: __m256i) -> i32 {
     static_assert_uimm_bits!(INDEX, 3);
     unsafe { simd_extract!(a.as_i32x8(), INDEX as u32) }
 }
@@ -1049,12 +1090,16 @@ pub fn _mm256_extract_epi32<const INDEX: i32>(a: __m256i) -> i32 {
 #[inline]
 #[target_feature(enable = "avx")]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_cvtsi256_si32(a: __m256i) -> i32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_cvtsi256_si32(a: __m256i) -> i32 {
     unsafe { simd_extract!(a.as_i32x8(), 0) }
 }
 
 /// Zeroes the contents of all XMM or YMM registers.
 ///
+/// This operation is purely a performance hint for the CPU and has no effect on the Abstract
+/// Machine state.
+///
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_zeroall)
 #[inline]
 #[target_feature(enable = "avx")]
@@ -1067,6 +1112,9 @@ pub fn _mm256_zeroall() {
 /// Zeroes the upper 128 bits of all YMM registers;
 /// the lower 128-bits of the registers are unmodified.
 ///
+/// This operation is purely a performance hint for the CPU and has no effect on the Abstract
+/// Machine state.
+///
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_zeroupper)
 #[inline]
 #[target_feature(enable = "avx")]
@@ -1109,7 +1157,8 @@ pub fn _mm_permutevar_ps(a: __m128, b: __m128i) -> __m128 {
 #[cfg_attr(test, assert_instr(vshufps, IMM8 = 9))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_permute_ps<const IMM8: i32>(a: __m256) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_permute_ps<const IMM8: i32>(a: __m256) -> __m256 {
     static_assert_uimm_bits!(IMM8, 8);
     unsafe {
         simd_shuffle!(
@@ -1138,7 +1187,8 @@ pub fn _mm256_permute_ps<const IMM8: i32>(a: __m256) -> __m256 {
 #[cfg_attr(test, assert_instr(vshufps, IMM8 = 9))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_permute_ps<const IMM8: i32>(a: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_permute_ps<const IMM8: i32>(a: __m128) -> __m128 {
     static_assert_uimm_bits!(IMM8, 8);
     unsafe {
         simd_shuffle!(
@@ -1187,7 +1237,8 @@ pub fn _mm_permutevar_pd(a: __m128d, b: __m128i) -> __m128d {
 #[cfg_attr(test, assert_instr(vshufpd, IMM4 = 0x1))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_permute_pd<const IMM4: i32>(a: __m256d) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_permute_pd<const IMM4: i32>(a: __m256d) -> __m256d {
     static_assert_uimm_bits!(IMM4, 4);
     unsafe {
         simd_shuffle!(
@@ -1212,7 +1263,8 @@ pub fn _mm256_permute_pd<const IMM4: i32>(a: __m256d) -> __m256d {
 #[cfg_attr(test, assert_instr(vshufpd, IMM2 = 0x1))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_permute_pd<const IMM2: i32>(a: __m128d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_permute_pd<const IMM2: i32>(a: __m128d) -> __m128d {
     static_assert_uimm_bits!(IMM2, 2);
     unsafe {
         simd_shuffle!(
@@ -1232,7 +1284,8 @@ pub fn _mm_permute_pd<const IMM2: i32>(a: __m128d) -> __m128d {
 #[cfg_attr(test, assert_instr(vperm2f128, IMM8 = 0x5))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_permute2f128_ps<const IMM8: i32>(a: __m256, b: __m256) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_permute2f128_ps<const IMM8: i32>(a: __m256, b: __m256) -> __m256 {
     static_assert_uimm_bits!(IMM8, 8);
     _mm256_castsi256_ps(_mm256_permute2f128_si256::<IMM8>(
         _mm256_castps_si256(a),
@@ -1249,7 +1302,8 @@ pub fn _mm256_permute2f128_ps<const IMM8: i32>(a: __m256, b: __m256) -> __m256 {
 #[cfg_attr(test, assert_instr(vperm2f128, IMM8 = 0x31))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_permute2f128_pd<const IMM8: i32>(a: __m256d, b: __m256d) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_permute2f128_pd<const IMM8: i32>(a: __m256d, b: __m256d) -> __m256d {
     static_assert_uimm_bits!(IMM8, 8);
     _mm256_castsi256_pd(_mm256_permute2f128_si256::<IMM8>(
         _mm256_castpd_si256(a),
@@ -1266,7 +1320,8 @@ pub fn _mm256_permute2f128_pd<const IMM8: i32>(a: __m256d, b: __m256d) -> __m256
 #[cfg_attr(test, assert_instr(vperm2f128, IMM8 = 0x31))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_permute2f128_si256<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_permute2f128_si256<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
     static_assert_uimm_bits!(IMM8, 8);
     const fn idx(imm8: i32, pos: u32) -> u32 {
         let part = if pos < 2 {
@@ -1308,7 +1363,8 @@ pub fn _mm256_permute2f128_si256<const IMM8: i32>(a: __m256i, b: __m256i) -> __m
 #[cfg_attr(test, assert_instr(vbroadcastss))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
 #[allow(clippy::trivially_copy_pass_by_ref)]
-pub fn _mm256_broadcast_ss(f: &f32) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_broadcast_ss(f: &f32) -> __m256 {
     _mm256_set1_ps(*f)
 }
 
@@ -1321,7 +1377,8 @@ pub fn _mm256_broadcast_ss(f: &f32) -> __m256 {
 #[cfg_attr(test, assert_instr(vbroadcastss))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
 #[allow(clippy::trivially_copy_pass_by_ref)]
-pub fn _mm_broadcast_ss(f: &f32) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_broadcast_ss(f: &f32) -> __m128 {
     _mm_set1_ps(*f)
 }
 
@@ -1334,7 +1391,8 @@ pub fn _mm_broadcast_ss(f: &f32) -> __m128 {
 #[cfg_attr(test, assert_instr(vbroadcastsd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
 #[allow(clippy::trivially_copy_pass_by_ref)]
-pub fn _mm256_broadcast_sd(f: &f64) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_broadcast_sd(f: &f64) -> __m256d {
     _mm256_set1_pd(*f)
 }
 
@@ -1346,7 +1404,8 @@ pub fn _mm256_broadcast_sd(f: &f64) -> __m256d {
 #[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vbroadcastf128))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_broadcast_ps(a: &__m128) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_broadcast_ps(a: &__m128) -> __m256 {
     unsafe { simd_shuffle!(*a, _mm_setzero_ps(), [0, 1, 2, 3, 0, 1, 2, 3]) }
 }
 
@@ -1358,7 +1417,8 @@ pub fn _mm256_broadcast_ps(a: &__m128) -> __m256 {
 #[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vbroadcastf128))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_broadcast_pd(a: &__m128d) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_broadcast_pd(a: &__m128d) -> __m256d {
     unsafe { simd_shuffle!(*a, _mm_setzero_pd(), [0, 1, 0, 1]) }
 }
 
@@ -1372,7 +1432,8 @@ pub fn _mm256_broadcast_pd(a: &__m128d) -> __m256d {
 #[cfg_attr(test, assert_instr(vinsertf128, IMM1 = 1))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_insertf128_ps<const IMM1: i32>(a: __m256, b: __m128) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_insertf128_ps<const IMM1: i32>(a: __m256, b: __m128) -> __m256 {
     static_assert_uimm_bits!(IMM1, 1);
     unsafe {
         simd_shuffle!(
@@ -1393,7 +1454,8 @@ pub fn _mm256_insertf128_ps<const IMM1: i32>(a: __m256, b: __m128) -> __m256 {
 #[cfg_attr(test, assert_instr(vinsertf128, IMM1 = 1))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_insertf128_pd<const IMM1: i32>(a: __m256d, b: __m128d) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_insertf128_pd<const IMM1: i32>(a: __m256d, b: __m128d) -> __m256d {
     static_assert_uimm_bits!(IMM1, 1);
     unsafe {
         simd_shuffle!(
@@ -1413,7 +1475,8 @@ pub fn _mm256_insertf128_pd<const IMM1: i32>(a: __m256d, b: __m128d) -> __m256d
 #[cfg_attr(test, assert_instr(vinsertf128, IMM1 = 1))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_insertf128_si256<const IMM1: i32>(a: __m256i, b: __m128i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_insertf128_si256<const IMM1: i32>(a: __m256i, b: __m128i) -> __m256i {
     static_assert_uimm_bits!(IMM1, 1);
     unsafe {
         let dst: i64x4 = simd_shuffle!(
@@ -1434,7 +1497,8 @@ pub fn _mm256_insertf128_si256<const IMM1: i32>(a: __m256i, b: __m128i) -> __m25
 // This intrinsic has no corresponding instruction.
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_insert_epi8<const INDEX: i32>(a: __m256i, i: i8) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_insert_epi8<const INDEX: i32>(a: __m256i, i: i8) -> __m256i {
     static_assert_uimm_bits!(INDEX, 5);
     unsafe { transmute(simd_insert!(a.as_i8x32(), INDEX as u32, i)) }
 }
@@ -1448,7 +1512,8 @@ pub fn _mm256_insert_epi8<const INDEX: i32>(a: __m256i, i: i8) -> __m256i {
 // This intrinsic has no corresponding instruction.
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_insert_epi16<const INDEX: i32>(a: __m256i, i: i16) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_insert_epi16<const INDEX: i32>(a: __m256i, i: i16) -> __m256i {
     static_assert_uimm_bits!(INDEX, 4);
     unsafe { transmute(simd_insert!(a.as_i16x16(), INDEX as u32, i)) }
 }
@@ -1462,7 +1527,8 @@ pub fn _mm256_insert_epi16<const INDEX: i32>(a: __m256i, i: i16) -> __m256i {
 // This intrinsic has no corresponding instruction.
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_insert_epi32<const INDEX: i32>(a: __m256i, i: i32) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_insert_epi32<const INDEX: i32>(a: __m256i, i: i32) -> __m256i {
     static_assert_uimm_bits!(INDEX, 3);
     unsafe { transmute(simd_insert!(a.as_i32x8(), INDEX as u32, i)) }
 }
@@ -1481,7 +1547,8 @@ pub fn _mm256_insert_epi32<const INDEX: i32>(a: __m256i, i: i32) -> __m256i {
 )]
 #[stable(feature = "simd_x86", since = "1.27.0")]
 #[allow(clippy::cast_ptr_alignment)]
-pub unsafe fn _mm256_load_pd(mem_addr: *const f64) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm256_load_pd(mem_addr: *const f64) -> __m256d {
     *(mem_addr as *const __m256d)
 }
 
@@ -1499,7 +1566,8 @@ pub unsafe fn _mm256_load_pd(mem_addr: *const f64) -> __m256d {
 )]
 #[stable(feature = "simd_x86", since = "1.27.0")]
 #[allow(clippy::cast_ptr_alignment)]
-pub unsafe fn _mm256_store_pd(mem_addr: *mut f64, a: __m256d) {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm256_store_pd(mem_addr: *mut f64, a: __m256d) {
     *(mem_addr as *mut __m256d) = a;
 }
 
@@ -1517,7 +1585,8 @@ pub unsafe fn _mm256_store_pd(mem_addr: *mut f64, a: __m256d) {
 )]
 #[stable(feature = "simd_x86", since = "1.27.0")]
 #[allow(clippy::cast_ptr_alignment)]
-pub unsafe fn _mm256_load_ps(mem_addr: *const f32) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm256_load_ps(mem_addr: *const f32) -> __m256 {
     *(mem_addr as *const __m256)
 }
 
@@ -1535,7 +1604,8 @@ pub unsafe fn _mm256_load_ps(mem_addr: *const f32) -> __m256 {
 )]
 #[stable(feature = "simd_x86", since = "1.27.0")]
 #[allow(clippy::cast_ptr_alignment)]
-pub unsafe fn _mm256_store_ps(mem_addr: *mut f32, a: __m256) {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm256_store_ps(mem_addr: *mut f32, a: __m256) {
     *(mem_addr as *mut __m256) = a;
 }
 
@@ -1548,7 +1618,8 @@ pub unsafe fn _mm256_store_ps(mem_addr: *mut f32, a: __m256) {
 #[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vmovup))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm256_loadu_pd(mem_addr: *const f64) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm256_loadu_pd(mem_addr: *const f64) -> __m256d {
     let mut dst = _mm256_undefined_pd();
     ptr::copy_nonoverlapping(
         mem_addr as *const u8,
@@ -1567,7 +1638,8 @@ pub unsafe fn _mm256_loadu_pd(mem_addr: *const f64) -> __m256d {
 #[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vmovup))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm256_storeu_pd(mem_addr: *mut f64, a: __m256d) {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm256_storeu_pd(mem_addr: *mut f64, a: __m256d) {
     mem_addr.cast::<__m256d>().write_unaligned(a);
 }
 
@@ -1580,7 +1652,8 @@ pub unsafe fn _mm256_storeu_pd(mem_addr: *mut f64, a: __m256d) {
 #[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vmovups))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm256_loadu_ps(mem_addr: *const f32) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm256_loadu_ps(mem_addr: *const f32) -> __m256 {
     let mut dst = _mm256_undefined_ps();
     ptr::copy_nonoverlapping(
         mem_addr as *const u8,
@@ -1599,7 +1672,8 @@ pub unsafe fn _mm256_loadu_ps(mem_addr: *const f32) -> __m256 {
 #[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vmovups))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm256_storeu_ps(mem_addr: *mut f32, a: __m256) {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm256_storeu_ps(mem_addr: *mut f32, a: __m256) {
     mem_addr.cast::<__m256>().write_unaligned(a);
 }
 
@@ -1615,7 +1689,8 @@ pub unsafe fn _mm256_storeu_ps(mem_addr: *mut f32, a: __m256) {
     assert_instr(vmovaps)
 )] // FIXME vmovdqa expected
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm256_load_si256(mem_addr: *const __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm256_load_si256(mem_addr: *const __m256i) -> __m256i {
     *mem_addr
 }
 
@@ -1631,7 +1706,8 @@ pub unsafe fn _mm256_load_si256(mem_addr: *const __m256i) -> __m256i {
     assert_instr(vmovaps)
 )] // FIXME vmovdqa expected
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm256_store_si256(mem_addr: *mut __m256i, a: __m256i) {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm256_store_si256(mem_addr: *mut __m256i, a: __m256i) {
     *mem_addr = a;
 }
 
@@ -1643,7 +1719,8 @@ pub unsafe fn _mm256_store_si256(mem_addr: *mut __m256i, a: __m256i) {
 #[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vmovups))] // FIXME vmovdqu expected
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm256_loadu_si256(mem_addr: *const __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm256_loadu_si256(mem_addr: *const __m256i) -> __m256i {
     let mut dst = _mm256_undefined_si256();
     ptr::copy_nonoverlapping(
         mem_addr as *const u8,
@@ -1661,7 +1738,8 @@ pub unsafe fn _mm256_loadu_si256(mem_addr: *const __m256i) -> __m256i {
 #[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vmovups))] // FIXME vmovdqu expected
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm256_storeu_si256(mem_addr: *mut __m256i, a: __m256i) {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm256_storeu_si256(mem_addr: *mut __m256i, a: __m256i) {
     mem_addr.write_unaligned(a);
 }
 
@@ -1674,8 +1752,10 @@ pub unsafe fn _mm256_storeu_si256(mem_addr: *mut __m256i, a: __m256i) {
 #[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vmaskmovpd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm256_maskload_pd(mem_addr: *const f64, mask: __m256i) -> __m256d {
-    maskloadpd256(mem_addr as *const i8, mask.as_i64x4())
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm256_maskload_pd(mem_addr: *const f64, mask: __m256i) -> __m256d {
+    let mask = simd_shr(mask.as_i64x4(), i64x4::splat(63));
+    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, _mm256_setzero_pd())
 }
 
 /// Stores packed double-precision (64-bit) floating-point elements from `a`
@@ -1686,8 +1766,10 @@ pub unsafe fn _mm256_maskload_pd(mem_addr: *const f64, mask: __m256i) -> __m256d
 #[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vmaskmovpd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm256_maskstore_pd(mem_addr: *mut f64, mask: __m256i, a: __m256d) {
-    maskstorepd256(mem_addr as *mut i8, mask.as_i64x4(), a);
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm256_maskstore_pd(mem_addr: *mut f64, mask: __m256i, a: __m256d) {
+    let mask = simd_shr(mask.as_i64x4(), i64x4::splat(63));
+    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a)
 }
 
 /// Loads packed double-precision (64-bit) floating-point elements from memory
@@ -1699,8 +1781,10 @@ pub unsafe fn _mm256_maskstore_pd(mem_addr: *mut f64, mask: __m256i, a: __m256d)
 #[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vmaskmovpd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm_maskload_pd(mem_addr: *const f64, mask: __m128i) -> __m128d {
-    maskloadpd(mem_addr as *const i8, mask.as_i64x2())
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_maskload_pd(mem_addr: *const f64, mask: __m128i) -> __m128d {
+    let mask = simd_shr(mask.as_i64x2(), i64x2::splat(63));
+    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, _mm_setzero_pd())
 }
 
 /// Stores packed double-precision (64-bit) floating-point elements from `a`
@@ -1711,8 +1795,10 @@ pub unsafe fn _mm_maskload_pd(mem_addr: *const f64, mask: __m128i) -> __m128d {
 #[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vmaskmovpd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm_maskstore_pd(mem_addr: *mut f64, mask: __m128i, a: __m128d) {
-    maskstorepd(mem_addr as *mut i8, mask.as_i64x2(), a);
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_maskstore_pd(mem_addr: *mut f64, mask: __m128i, a: __m128d) {
+    let mask = simd_shr(mask.as_i64x2(), i64x2::splat(63));
+    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a)
 }
 
 /// Loads packed single-precision (32-bit) floating-point elements from memory
@@ -1724,8 +1810,10 @@ pub unsafe fn _mm_maskstore_pd(mem_addr: *mut f64, mask: __m128i, a: __m128d) {
 #[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vmaskmovps))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm256_maskload_ps(mem_addr: *const f32, mask: __m256i) -> __m256 {
-    maskloadps256(mem_addr as *const i8, mask.as_i32x8())
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm256_maskload_ps(mem_addr: *const f32, mask: __m256i) -> __m256 {
+    let mask = simd_shr(mask.as_i32x8(), i32x8::splat(31));
+    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, _mm256_setzero_ps())
 }
 
 /// Stores packed single-precision (32-bit) floating-point elements from `a`
@@ -1736,8 +1824,10 @@ pub unsafe fn _mm256_maskload_ps(mem_addr: *const f32, mask: __m256i) -> __m256
 #[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vmaskmovps))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm256_maskstore_ps(mem_addr: *mut f32, mask: __m256i, a: __m256) {
-    maskstoreps256(mem_addr as *mut i8, mask.as_i32x8(), a);
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm256_maskstore_ps(mem_addr: *mut f32, mask: __m256i, a: __m256) {
+    let mask = simd_shr(mask.as_i32x8(), i32x8::splat(31));
+    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a)
 }
 
 /// Loads packed single-precision (32-bit) floating-point elements from memory
@@ -1749,8 +1839,10 @@ pub unsafe fn _mm256_maskstore_ps(mem_addr: *mut f32, mask: __m256i, a: __m256)
 #[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vmaskmovps))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm_maskload_ps(mem_addr: *const f32, mask: __m128i) -> __m128 {
-    maskloadps(mem_addr as *const i8, mask.as_i32x4())
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_maskload_ps(mem_addr: *const f32, mask: __m128i) -> __m128 {
+    let mask = simd_shr(mask.as_i32x4(), i32x4::splat(31));
+    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, _mm_setzero_ps())
 }
 
 /// Stores packed single-precision (32-bit) floating-point elements from `a`
@@ -1761,8 +1853,10 @@ pub unsafe fn _mm_maskload_ps(mem_addr: *const f32, mask: __m128i) -> __m128 {
 #[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vmaskmovps))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm_maskstore_ps(mem_addr: *mut f32, mask: __m128i, a: __m128) {
-    maskstoreps(mem_addr as *mut i8, mask.as_i32x4(), a);
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_maskstore_ps(mem_addr: *mut f32, mask: __m128i, a: __m128) {
+    let mask = simd_shr(mask.as_i32x4(), i32x4::splat(31));
+    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a)
 }
 
 /// Duplicate odd-indexed single-precision (32-bit) floating-point elements
@@ -1773,7 +1867,8 @@ pub unsafe fn _mm_maskstore_ps(mem_addr: *mut f32, mask: __m128i, a: __m128) {
 #[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vmovshdup))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_movehdup_ps(a: __m256) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_movehdup_ps(a: __m256) -> __m256 {
     unsafe { simd_shuffle!(a, a, [1, 1, 3, 3, 5, 5, 7, 7]) }
 }
 
@@ -1785,7 +1880,8 @@ pub fn _mm256_movehdup_ps(a: __m256) -> __m256 {
 #[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vmovsldup))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_moveldup_ps(a: __m256) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_moveldup_ps(a: __m256) -> __m256 {
     unsafe { simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6]) }
 }
 
@@ -1797,7 +1893,8 @@ pub fn _mm256_moveldup_ps(a: __m256) -> __m256 {
 #[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vmovddup))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_movedup_pd(a: __m256d) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_movedup_pd(a: __m256d) -> __m256d {
     unsafe { simd_shuffle!(a, a, [0, 0, 2, 2]) }
 }
 
@@ -1935,7 +2032,8 @@ pub fn _mm256_rsqrt_ps(a: __m256) -> __m256 {
 #[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vunpckhpd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_unpackhi_pd(a: __m256d, b: __m256d) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_unpackhi_pd(a: __m256d, b: __m256d) -> __m256d {
     unsafe { simd_shuffle!(a, b, [1, 5, 3, 7]) }
 }
 
@@ -1947,7 +2045,8 @@ pub fn _mm256_unpackhi_pd(a: __m256d, b: __m256d) -> __m256d {
 #[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vunpckhps))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_unpackhi_ps(a: __m256, b: __m256) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_unpackhi_ps(a: __m256, b: __m256) -> __m256 {
     unsafe { simd_shuffle!(a, b, [2, 10, 3, 11, 6, 14, 7, 15]) }
 }
 
@@ -1959,7 +2058,8 @@ pub fn _mm256_unpackhi_ps(a: __m256, b: __m256) -> __m256 {
 #[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vunpcklpd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_unpacklo_pd(a: __m256d, b: __m256d) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_unpacklo_pd(a: __m256d, b: __m256d) -> __m256d {
     unsafe { simd_shuffle!(a, b, [0, 4, 2, 6]) }
 }
 
@@ -1971,7 +2071,8 @@ pub fn _mm256_unpacklo_pd(a: __m256d, b: __m256d) -> __m256d {
 #[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vunpcklps))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_unpacklo_ps(a: __m256, b: __m256) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_unpacklo_ps(a: __m256, b: __m256) -> __m256 {
     unsafe { simd_shuffle!(a, b, [0, 8, 1, 9, 4, 12, 5, 13]) }
 }
 
@@ -1985,7 +2086,8 @@ pub fn _mm256_unpacklo_ps(a: __m256, b: __m256) -> __m256 {
 #[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vptest))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_testz_si256(a: __m256i, b: __m256i) -> i32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_testz_si256(a: __m256i, b: __m256i) -> i32 {
     unsafe {
         let r = simd_and(a.as_i64x4(), b.as_i64x4());
         (0i64 == simd_reduce_or(r)) as i32
@@ -2002,7 +2104,8 @@ pub fn _mm256_testz_si256(a: __m256i, b: __m256i) -> i32 {
 #[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vptest))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_testc_si256(a: __m256i, b: __m256i) -> i32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_testc_si256(a: __m256i, b: __m256i) -> i32 {
     unsafe {
         let r = simd_and(simd_xor(a.as_i64x4(), i64x4::splat(!0)), b.as_i64x4());
         (0i64 == simd_reduce_or(r)) as i32
@@ -2089,7 +2192,8 @@ pub fn _mm256_testnzc_pd(a: __m256d, b: __m256d) -> i32 {
 #[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vtestpd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_testz_pd(a: __m128d, b: __m128d) -> i32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_testz_pd(a: __m128d, b: __m128d) -> i32 {
     unsafe {
         let r: i64x2 = simd_lt(transmute(_mm_and_pd(a, b)), i64x2::ZERO);
         (0i64 == simd_reduce_or(r)) as i32
@@ -2109,7 +2213,8 @@ pub fn _mm_testz_pd(a: __m128d, b: __m128d) -> i32 {
 #[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vtestpd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_testc_pd(a: __m128d, b: __m128d) -> i32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_testc_pd(a: __m128d, b: __m128d) -> i32 {
     unsafe {
         let r: i64x2 = simd_lt(transmute(_mm_andnot_pd(a, b)), i64x2::ZERO);
         (0i64 == simd_reduce_or(r)) as i32
@@ -2199,7 +2304,8 @@ pub fn _mm256_testnzc_ps(a: __m256, b: __m256) -> i32 {
 #[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vtestps))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_testz_ps(a: __m128, b: __m128) -> i32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_testz_ps(a: __m128, b: __m128) -> i32 {
     unsafe {
         let r: i32x4 = simd_lt(transmute(_mm_and_ps(a, b)), i32x4::ZERO);
         (0i32 == simd_reduce_or(r)) as i32
@@ -2219,7 +2325,8 @@ pub fn _mm_testz_ps(a: __m128, b: __m128) -> i32 {
 #[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vtestps))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_testc_ps(a: __m128, b: __m128) -> i32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_testc_ps(a: __m128, b: __m128) -> i32 {
     unsafe {
         let r: i32x4 = simd_lt(transmute(_mm_andnot_ps(a, b)), i32x4::ZERO);
         (0i32 == simd_reduce_or(r)) as i32
@@ -2253,12 +2360,13 @@ pub fn _mm_testnzc_ps(a: __m128, b: __m128) -> i32 {
 #[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vmovmskpd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_movemask_pd(a: __m256d) -> i32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_movemask_pd(a: __m256d) -> i32 {
     // Propagate the highest bit to the rest, because simd_bitmask
     // requires all-1 or all-0.
     unsafe {
         let mask: i64x4 = simd_lt(transmute(a), i64x4::ZERO);
-        simd_bitmask::<i64x4, u8>(mask).into()
+        simd_bitmask::<i64x4, u8>(mask) as i32
     }
 }
 
@@ -2271,12 +2379,13 @@ pub fn _mm256_movemask_pd(a: __m256d) -> i32 {
 #[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vmovmskps))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_movemask_ps(a: __m256) -> i32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_movemask_ps(a: __m256) -> i32 {
     // Propagate the highest bit to the rest, because simd_bitmask
     // requires all-1 or all-0.
     unsafe {
         let mask: i32x8 = simd_lt(transmute(a), i32x8::ZERO);
-        simd_bitmask::<i32x8, u8>(mask).into()
+        simd_bitmask::<i32x8, u8>(mask) as i32
     }
 }
 
@@ -2287,7 +2396,8 @@ pub fn _mm256_movemask_ps(a: __m256) -> i32 {
 #[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vxorp))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_setzero_pd() -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_setzero_pd() -> __m256d {
     const { unsafe { mem::zeroed() } }
 }
 
@@ -2298,7 +2408,8 @@ pub fn _mm256_setzero_pd() -> __m256d {
 #[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vxorps))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_setzero_ps() -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_setzero_ps() -> __m256 {
     const { unsafe { mem::zeroed() } }
 }
 
@@ -2309,7 +2420,8 @@ pub fn _mm256_setzero_ps() -> __m256 {
 #[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vxor))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_setzero_si256() -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_setzero_si256() -> __m256i {
     const { unsafe { mem::zeroed() } }
 }
 
@@ -2320,9 +2432,9 @@ pub fn _mm256_setzero_si256() -> __m256i {
 #[inline]
 #[target_feature(enable = "avx")]
 // This intrinsic has no corresponding instruction.
-#[cfg_attr(test, assert_instr(vinsertf128))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_set_pd(a: f64, b: f64, c: f64, d: f64) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_set_pd(a: f64, b: f64, c: f64, d: f64) -> __m256d {
     _mm256_setr_pd(d, c, b, a)
 }
 
@@ -2334,7 +2446,17 @@ pub fn _mm256_set_pd(a: f64, b: f64, c: f64, d: f64) -> __m256d {
 #[target_feature(enable = "avx")]
 // This intrinsic has no corresponding instruction.
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_set_ps(a: f32, b: f32, c: f32, d: f32, e: f32, f: f32, g: f32, h: f32) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_set_ps(
+    a: f32,
+    b: f32,
+    c: f32,
+    d: f32,
+    e: f32,
+    f: f32,
+    g: f32,
+    h: f32,
+) -> __m256 {
     _mm256_setr_ps(h, g, f, e, d, c, b, a)
 }
 
@@ -2345,7 +2467,8 @@ pub fn _mm256_set_ps(a: f32, b: f32, c: f32, d: f32, e: f32, f: f32, g: f32, h:
 #[target_feature(enable = "avx")]
 // This intrinsic has no corresponding instruction.
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_set_epi8(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_set_epi8(
     e00: i8,
     e01: i8,
     e02: i8,
@@ -2395,7 +2518,8 @@ pub fn _mm256_set_epi8(
 #[target_feature(enable = "avx")]
 // This intrinsic has no corresponding instruction.
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_set_epi16(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_set_epi16(
     e00: i16,
     e01: i16,
     e02: i16,
@@ -2429,7 +2553,8 @@ pub fn _mm256_set_epi16(
 #[target_feature(enable = "avx")]
 // This intrinsic has no corresponding instruction.
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_set_epi32(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_set_epi32(
     e0: i32,
     e1: i32,
     e2: i32,
@@ -2449,7 +2574,8 @@ pub fn _mm256_set_epi32(
 #[target_feature(enable = "avx")]
 // This intrinsic has no corresponding instruction.
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_set_epi64x(a: i64, b: i64, c: i64, d: i64) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_set_epi64x(a: i64, b: i64, c: i64, d: i64) -> __m256i {
     _mm256_setr_epi64x(d, c, b, a)
 }
 
@@ -2461,7 +2587,8 @@ pub fn _mm256_set_epi64x(a: i64, b: i64, c: i64, d: i64) -> __m256i {
 #[target_feature(enable = "avx")]
 // This intrinsic has no corresponding instruction.
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_setr_pd(a: f64, b: f64, c: f64, d: f64) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_setr_pd(a: f64, b: f64, c: f64, d: f64) -> __m256d {
     __m256d([a, b, c, d])
 }
 
@@ -2473,7 +2600,17 @@ pub fn _mm256_setr_pd(a: f64, b: f64, c: f64, d: f64) -> __m256d {
 #[target_feature(enable = "avx")]
 // This intrinsic has no corresponding instruction.
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_setr_ps(a: f32, b: f32, c: f32, d: f32, e: f32, f: f32, g: f32, h: f32) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_setr_ps(
+    a: f32,
+    b: f32,
+    c: f32,
+    d: f32,
+    e: f32,
+    f: f32,
+    g: f32,
+    h: f32,
+) -> __m256 {
     __m256([a, b, c, d, e, f, g, h])
 }
 
@@ -2485,7 +2622,8 @@ pub fn _mm256_setr_ps(a: f32, b: f32, c: f32, d: f32, e: f32, f: f32, g: f32, h:
 #[target_feature(enable = "avx")]
 // This intrinsic has no corresponding instruction.
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_setr_epi8(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_setr_epi8(
     e00: i8,
     e01: i8,
     e02: i8,
@@ -2538,7 +2676,8 @@ pub fn _mm256_setr_epi8(
 #[target_feature(enable = "avx")]
 // This intrinsic has no corresponding instruction.
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_setr_epi16(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_setr_epi16(
     e00: i16,
     e01: i16,
     e02: i16,
@@ -2575,7 +2714,8 @@ pub fn _mm256_setr_epi16(
 #[target_feature(enable = "avx")]
 // This intrinsic has no corresponding instruction.
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_setr_epi32(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_setr_epi32(
     e0: i32,
     e1: i32,
     e2: i32,
@@ -2596,7 +2736,8 @@ pub fn _mm256_setr_epi32(
 #[target_feature(enable = "avx")]
 // This intrinsic has no corresponding instruction.
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_setr_epi64x(a: i64, b: i64, c: i64, d: i64) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_setr_epi64x(a: i64, b: i64, c: i64, d: i64) -> __m256i {
     unsafe { transmute(i64x4::new(a, b, c, d)) }
 }
 
@@ -2608,8 +2749,9 @@ pub fn _mm256_setr_epi64x(a: i64, b: i64, c: i64, d: i64) -> __m256i {
 #[target_feature(enable = "avx")]
 // This intrinsic has no corresponding instruction.
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_set1_pd(a: f64) -> __m256d {
-    _mm256_setr_pd(a, a, a, a)
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_set1_pd(a: f64) -> __m256d {
+    f64x4::splat(a).as_m256d()
 }
 
 /// Broadcasts single-precision (32-bit) floating-point value `a` to all
@@ -2620,8 +2762,9 @@ pub fn _mm256_set1_pd(a: f64) -> __m256d {
 #[target_feature(enable = "avx")]
 // This intrinsic has no corresponding instruction.
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_set1_ps(a: f32) -> __m256 {
-    _mm256_setr_ps(a, a, a, a, a, a, a, a)
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_set1_ps(a: f32) -> __m256 {
+    f32x8::splat(a).as_m256()
 }
 
 /// Broadcasts 8-bit integer `a` to all elements of returned vector.
@@ -2632,14 +2775,9 @@ pub fn _mm256_set1_ps(a: f32) -> __m256 {
 #[target_feature(enable = "avx")]
 // This intrinsic has no corresponding instruction.
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_set1_epi8(a: i8) -> __m256i {
-    #[rustfmt::skip]
-    _mm256_setr_epi8(
-        a, a, a, a, a, a, a, a,
-        a, a, a, a, a, a, a, a,
-        a, a, a, a, a, a, a, a,
-        a, a, a, a, a, a, a, a,
-    )
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_set1_epi8(a: i8) -> __m256i {
+    i8x32::splat(a).as_m256i()
 }
 
 /// Broadcasts 16-bit integer `a` to all elements of returned vector.
@@ -2652,8 +2790,9 @@ pub fn _mm256_set1_epi8(a: i8) -> __m256i {
 #[cfg_attr(test, assert_instr(vinsertf128))]
 // This intrinsic has no corresponding instruction.
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_set1_epi16(a: i16) -> __m256i {
-    _mm256_setr_epi16(a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a)
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_set1_epi16(a: i16) -> __m256i {
+    i16x16::splat(a).as_m256i()
 }
 
 /// Broadcasts 32-bit integer `a` to all elements of returned vector.
@@ -2664,8 +2803,9 @@ pub fn _mm256_set1_epi16(a: i16) -> __m256i {
 #[target_feature(enable = "avx")]
 // This intrinsic has no corresponding instruction.
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_set1_epi32(a: i32) -> __m256i {
-    _mm256_setr_epi32(a, a, a, a, a, a, a, a)
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_set1_epi32(a: i32) -> __m256i {
+    i32x8::splat(a).as_m256i()
 }
 
 /// Broadcasts 64-bit integer `a` to all elements of returned vector.
@@ -2678,8 +2818,9 @@ pub fn _mm256_set1_epi32(a: i32) -> __m256i {
 #[cfg_attr(all(test, target_arch = "x86"), assert_instr(vbroadcastsd))]
 // This intrinsic has no corresponding instruction.
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_set1_epi64x(a: i64) -> __m256i {
-    _mm256_setr_epi64x(a, a, a, a)
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_set1_epi64x(a: i64) -> __m256i {
+    i64x4::splat(a).as_m256i()
 }
 
 /// Cast vector of type __m256d to type __m256.
@@ -2690,7 +2831,8 @@ pub fn _mm256_set1_epi64x(a: i64) -> __m256i {
 // This intrinsic is only used for compilation and does not generate any
 // instructions, thus it has zero latency.
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_castpd_ps(a: __m256d) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_castpd_ps(a: __m256d) -> __m256 {
     unsafe { transmute(a) }
 }
 
@@ -2702,7 +2844,8 @@ pub fn _mm256_castpd_ps(a: __m256d) -> __m256 {
 // This intrinsic is only used for compilation and does not generate any
 // instructions, thus it has zero latency.
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_castps_pd(a: __m256) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_castps_pd(a: __m256) -> __m256d {
     unsafe { transmute(a) }
 }
 
@@ -2714,7 +2857,8 @@ pub fn _mm256_castps_pd(a: __m256) -> __m256d {
 // This intrinsic is only used for compilation and does not generate any
 // instructions, thus it has zero latency.
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_castps_si256(a: __m256) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_castps_si256(a: __m256) -> __m256i {
     unsafe { transmute(a) }
 }
 
@@ -2726,7 +2870,8 @@ pub fn _mm256_castps_si256(a: __m256) -> __m256i {
 // This intrinsic is only used for compilation and does not generate any
 // instructions, thus it has zero latency.
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_castsi256_ps(a: __m256i) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_castsi256_ps(a: __m256i) -> __m256 {
     unsafe { transmute(a) }
 }
 
@@ -2738,7 +2883,8 @@ pub fn _mm256_castsi256_ps(a: __m256i) -> __m256 {
 // This intrinsic is only used for compilation and does not generate any
 // instructions, thus it has zero latency.
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_castpd_si256(a: __m256d) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_castpd_si256(a: __m256d) -> __m256i {
     unsafe { transmute(a) }
 }
 
@@ -2750,7 +2896,8 @@ pub fn _mm256_castpd_si256(a: __m256d) -> __m256i {
 // This intrinsic is only used for compilation and does not generate any
 // instructions, thus it has zero latency.
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_castsi256_pd(a: __m256i) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_castsi256_pd(a: __m256i) -> __m256d {
     unsafe { transmute(a) }
 }
 
@@ -2762,7 +2909,8 @@ pub fn _mm256_castsi256_pd(a: __m256i) -> __m256d {
 // This intrinsic is only used for compilation and does not generate any
 // instructions, thus it has zero latency.
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_castps256_ps128(a: __m256) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_castps256_ps128(a: __m256) -> __m128 {
     unsafe { simd_shuffle!(a, a, [0, 1, 2, 3]) }
 }
 
@@ -2774,7 +2922,8 @@ pub fn _mm256_castps256_ps128(a: __m256) -> __m128 {
 // This intrinsic is only used for compilation and does not generate any
 // instructions, thus it has zero latency.
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_castpd256_pd128(a: __m256d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_castpd256_pd128(a: __m256d) -> __m128d {
     unsafe { simd_shuffle!(a, a, [0, 1]) }
 }
 
@@ -2786,7 +2935,8 @@ pub fn _mm256_castpd256_pd128(a: __m256d) -> __m128d {
 // This intrinsic is only used for compilation and does not generate any
 // instructions, thus it has zero latency.
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_castsi256_si128(a: __m256i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_castsi256_si128(a: __m256i) -> __m128i {
     unsafe {
         let a = a.as_i64x4();
         let dst: i64x2 = simd_shuffle!(a, a, [0, 1]);
@@ -2795,7 +2945,11 @@ pub fn _mm256_castsi256_si128(a: __m256i) -> __m128i {
 }
 
 /// Casts vector of type __m128 to type __m256;
-/// the upper 128 bits of the result are undefined.
+/// the upper 128 bits of the result are indeterminate.
+///
+/// In the Intel documentation, the upper bits are declared to be "undefined".
+/// This is not equivalent to [`mem::MaybeUninit`]; instead, these bits are non-deterministically
+/// set to some valid value. In practice, this is typically equivalent to [`mem::zeroed`].
 ///
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_castps128_ps256)
 #[inline]
@@ -2803,12 +2957,17 @@ pub fn _mm256_castsi256_si128(a: __m256i) -> __m128i {
 // This intrinsic is only used for compilation and does not generate any
 // instructions, thus it has zero latency.
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_castps128_ps256(a: __m128) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_castps128_ps256(a: __m128) -> __m256 {
     unsafe { simd_shuffle!(a, _mm_undefined_ps(), [0, 1, 2, 3, 4, 4, 4, 4]) }
 }
 
 /// Casts vector of type __m128d to type __m256d;
-/// the upper 128 bits of the result are undefined.
+/// the upper 128 bits of the result are indeterminate.
+///
+/// In the Intel documentation, the upper bits are declared to be "undefined".
+/// This is not equivalent to [`mem::MaybeUninit`]; instead, these bits are non-deterministically
+/// set to some valid value. In practice, this is typically equivalent to [`mem::zeroed`].
 ///
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_castpd128_pd256)
 #[inline]
@@ -2816,12 +2975,17 @@ pub fn _mm256_castps128_ps256(a: __m128) -> __m256 {
 // This intrinsic is only used for compilation and does not generate any
 // instructions, thus it has zero latency.
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_castpd128_pd256(a: __m128d) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_castpd128_pd256(a: __m128d) -> __m256d {
     unsafe { simd_shuffle!(a, _mm_undefined_pd(), [0, 1, 2, 2]) }
 }
 
 /// Casts vector of type __m128i to type __m256i;
-/// the upper 128 bits of the result are undefined.
+/// the upper 128 bits of the result are indeterminate.
+///
+/// In the Intel documentation, the upper bits are declared to be "undefined".
+/// This is not equivalent to [`mem::MaybeUninit`]; instead, these bits are non-deterministically
+/// set to some valid value. In practice, this is typically equivalent to [`mem::zeroed`].
 ///
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_castsi128_si256)
 #[inline]
@@ -2829,7 +2993,8 @@ pub fn _mm256_castpd128_pd256(a: __m128d) -> __m256d {
 // This intrinsic is only used for compilation and does not generate any
 // instructions, thus it has zero latency.
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_castsi128_si256(a: __m128i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_castsi128_si256(a: __m128i) -> __m256i {
     unsafe {
         let a = a.as_i64x2();
         let undefined = i64x2::ZERO;
@@ -2848,7 +3013,8 @@ pub fn _mm256_castsi128_si256(a: __m128i) -> __m256i {
 // This intrinsic is only used for compilation and does not generate any
 // instructions, thus it has zero latency.
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_zextps128_ps256(a: __m128) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_zextps128_ps256(a: __m128) -> __m256 {
     unsafe { simd_shuffle!(a, _mm_setzero_ps(), [0, 1, 2, 3, 4, 5, 6, 7]) }
 }
 
@@ -2862,7 +3028,8 @@ pub fn _mm256_zextps128_ps256(a: __m128) -> __m256 {
 // This intrinsic is only used for compilation and does not generate any
 // instructions, thus it has zero latency.
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_zextsi128_si256(a: __m128i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_zextsi128_si256(a: __m128i) -> __m256i {
     unsafe {
         let b = i64x2::ZERO;
         let dst: i64x4 = simd_shuffle!(a.as_i64x2(), b, [0, 1, 2, 3]);
@@ -2881,7 +3048,8 @@ pub fn _mm256_zextsi128_si256(a: __m128i) -> __m256i {
 // This intrinsic is only used for compilation and does not generate any
 // instructions, thus it has zero latency.
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_zextpd128_pd256(a: __m128d) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_zextpd128_pd256(a: __m128d) -> __m256d {
     unsafe { simd_shuffle!(a, _mm_setzero_pd(), [0, 1, 2, 3]) }
 }
 
@@ -2895,7 +3063,8 @@ pub fn _mm256_zextpd128_pd256(a: __m128d) -> __m256d {
 #[target_feature(enable = "avx")]
 // This intrinsic has no corresponding instruction.
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_undefined_ps() -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_undefined_ps() -> __m256 {
     const { unsafe { mem::zeroed() } }
 }
 
@@ -2909,7 +3078,8 @@ pub fn _mm256_undefined_ps() -> __m256 {
 #[target_feature(enable = "avx")]
 // This intrinsic has no corresponding instruction.
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_undefined_pd() -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_undefined_pd() -> __m256d {
     const { unsafe { mem::zeroed() } }
 }
 
@@ -2923,7 +3093,8 @@ pub fn _mm256_undefined_pd() -> __m256d {
 #[target_feature(enable = "avx")]
 // This intrinsic has no corresponding instruction.
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_undefined_si256() -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_undefined_si256() -> __m256i {
     const { unsafe { mem::zeroed() } }
 }
 
@@ -2934,7 +3105,8 @@ pub fn _mm256_undefined_si256() -> __m256i {
 #[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vinsertf128))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_set_m128(hi: __m128, lo: __m128) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_set_m128(hi: __m128, lo: __m128) -> __m256 {
     unsafe { simd_shuffle!(lo, hi, [0, 1, 2, 3, 4, 5, 6, 7]) }
 }
 
@@ -2945,7 +3117,8 @@ pub fn _mm256_set_m128(hi: __m128, lo: __m128) -> __m256 {
 #[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vinsertf128))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_set_m128d(hi: __m128d, lo: __m128d) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_set_m128d(hi: __m128d, lo: __m128d) -> __m256d {
     unsafe {
         let hi: __m128 = transmute(hi);
         let lo: __m128 = transmute(lo);
@@ -2960,7 +3133,8 @@ pub fn _mm256_set_m128d(hi: __m128d, lo: __m128d) -> __m256d {
 #[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vinsertf128))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_set_m128i(hi: __m128i, lo: __m128i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_set_m128i(hi: __m128i, lo: __m128i) -> __m256i {
     unsafe {
         let hi: __m128 = transmute(hi);
         let lo: __m128 = transmute(lo);
@@ -2975,7 +3149,8 @@ pub fn _mm256_set_m128i(hi: __m128i, lo: __m128i) -> __m256i {
 #[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vinsertf128))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_setr_m128(lo: __m128, hi: __m128) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_setr_m128(lo: __m128, hi: __m128) -> __m256 {
     _mm256_set_m128(hi, lo)
 }
 
@@ -2986,7 +3161,8 @@ pub fn _mm256_setr_m128(lo: __m128, hi: __m128) -> __m256 {
 #[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vinsertf128))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_setr_m128d(lo: __m128d, hi: __m128d) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_setr_m128d(lo: __m128d, hi: __m128d) -> __m256d {
     _mm256_set_m128d(hi, lo)
 }
 
@@ -2997,7 +3173,8 @@ pub fn _mm256_setr_m128d(lo: __m128d, hi: __m128d) -> __m256d {
 #[target_feature(enable = "avx")]
 #[cfg_attr(test, assert_instr(vinsertf128))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_setr_m128i(lo: __m128i, hi: __m128i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_setr_m128i(lo: __m128i, hi: __m128i) -> __m256i {
     _mm256_set_m128i(hi, lo)
 }
 
@@ -3011,7 +3188,8 @@ pub fn _mm256_setr_m128i(lo: __m128i, hi: __m128i) -> __m256i {
 #[target_feature(enable = "avx")]
 // This intrinsic has no corresponding instruction.
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm256_loadu2_m128(hiaddr: *const f32, loaddr: *const f32) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm256_loadu2_m128(hiaddr: *const f32, loaddr: *const f32) -> __m256 {
     let a = _mm256_castps128_ps256(_mm_loadu_ps(loaddr));
     _mm256_insertf128_ps::<1>(a, _mm_loadu_ps(hiaddr))
 }
@@ -3026,7 +3204,8 @@ pub unsafe fn _mm256_loadu2_m128(hiaddr: *const f32, loaddr: *const f32) -> __m2
 #[target_feature(enable = "avx")]
 // This intrinsic has no corresponding instruction.
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm256_loadu2_m128d(hiaddr: *const f64, loaddr: *const f64) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm256_loadu2_m128d(hiaddr: *const f64, loaddr: *const f64) -> __m256d {
     let a = _mm256_castpd128_pd256(_mm_loadu_pd(loaddr));
     _mm256_insertf128_pd::<1>(a, _mm_loadu_pd(hiaddr))
 }
@@ -3040,7 +3219,8 @@ pub unsafe fn _mm256_loadu2_m128d(hiaddr: *const f64, loaddr: *const f64) -> __m
 #[target_feature(enable = "avx")]
 // This intrinsic has no corresponding instruction.
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm256_loadu2_m128i(hiaddr: *const __m128i, loaddr: *const __m128i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm256_loadu2_m128i(hiaddr: *const __m128i, loaddr: *const __m128i) -> __m256i {
     let a = _mm256_castsi128_si256(_mm_loadu_si128(loaddr));
     _mm256_insertf128_si256::<1>(a, _mm_loadu_si128(hiaddr))
 }
@@ -3055,7 +3235,8 @@ pub unsafe fn _mm256_loadu2_m128i(hiaddr: *const __m128i, loaddr: *const __m128i
 #[target_feature(enable = "avx")]
 // This intrinsic has no corresponding instruction.
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm256_storeu2_m128(hiaddr: *mut f32, loaddr: *mut f32, a: __m256) {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm256_storeu2_m128(hiaddr: *mut f32, loaddr: *mut f32, a: __m256) {
     let lo = _mm256_castps256_ps128(a);
     _mm_storeu_ps(loaddr, lo);
     let hi = _mm256_extractf128_ps::<1>(a);
@@ -3072,7 +3253,8 @@ pub unsafe fn _mm256_storeu2_m128(hiaddr: *mut f32, loaddr: *mut f32, a: __m256)
 #[target_feature(enable = "avx")]
 // This intrinsic has no corresponding instruction.
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm256_storeu2_m128d(hiaddr: *mut f64, loaddr: *mut f64, a: __m256d) {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm256_storeu2_m128d(hiaddr: *mut f64, loaddr: *mut f64, a: __m256d) {
     let lo = _mm256_castpd256_pd128(a);
     _mm_storeu_pd(loaddr, lo);
     let hi = _mm256_extractf128_pd::<1>(a);
@@ -3088,7 +3270,8 @@ pub unsafe fn _mm256_storeu2_m128d(hiaddr: *mut f64, loaddr: *mut f64, a: __m256
 #[target_feature(enable = "avx")]
 // This intrinsic has no corresponding instruction.
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm256_storeu2_m128i(hiaddr: *mut __m128i, loaddr: *mut __m128i, a: __m256i) {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm256_storeu2_m128i(hiaddr: *mut __m128i, loaddr: *mut __m128i, a: __m256i) {
     let lo = _mm256_castsi256_si128(a);
     _mm_storeu_si128(loaddr, lo);
     let hi = _mm256_extractf128_si256::<1>(a);
@@ -3102,7 +3285,8 @@ pub unsafe fn _mm256_storeu2_m128i(hiaddr: *mut __m128i, loaddr: *mut __m128i, a
 #[target_feature(enable = "avx")]
 //#[cfg_attr(test, assert_instr(movss))] FIXME
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_cvtss_f32(a: __m256) -> f32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_cvtss_f32(a: __m256) -> f32 {
     unsafe { simd_extract!(a, 0) }
 }
 
@@ -3147,22 +3331,6 @@ unsafe extern "C" {
     fn vpermilpd256(a: __m256d, b: i64x4) -> __m256d;
     #[link_name = "llvm.x86.avx.vpermilvar.pd"]
     fn vpermilpd(a: __m128d, b: i64x2) -> __m128d;
-    #[link_name = "llvm.x86.avx.maskload.pd.256"]
-    fn maskloadpd256(mem_addr: *const i8, mask: i64x4) -> __m256d;
-    #[link_name = "llvm.x86.avx.maskstore.pd.256"]
-    fn maskstorepd256(mem_addr: *mut i8, mask: i64x4, a: __m256d);
-    #[link_name = "llvm.x86.avx.maskload.pd"]
-    fn maskloadpd(mem_addr: *const i8, mask: i64x2) -> __m128d;
-    #[link_name = "llvm.x86.avx.maskstore.pd"]
-    fn maskstorepd(mem_addr: *mut i8, mask: i64x2, a: __m128d);
-    #[link_name = "llvm.x86.avx.maskload.ps.256"]
-    fn maskloadps256(mem_addr: *const i8, mask: i32x8) -> __m256;
-    #[link_name = "llvm.x86.avx.maskstore.ps.256"]
-    fn maskstoreps256(mem_addr: *mut i8, mask: i32x8, a: __m256);
-    #[link_name = "llvm.x86.avx.maskload.ps"]
-    fn maskloadps(mem_addr: *const i8, mask: i32x4) -> __m128;
-    #[link_name = "llvm.x86.avx.maskstore.ps"]
-    fn maskstoreps(mem_addr: *mut i8, mask: i32x4, a: __m128);
     #[link_name = "llvm.x86.avx.ldu.dq.256"]
     fn vlddqu(mem_addr: *const i8) -> i8x32;
     #[link_name = "llvm.x86.avx.rcp.ps.256"]
@@ -3199,6 +3367,8 @@ unsafe extern "C" {
 
 #[cfg(test)]
 mod tests {
+    use crate::core_arch::assert_eq_const as assert_eq;
+    use crate::core_arch::simd::*;
     use crate::hint::black_box;
     use crate::ptr;
     use stdarch_test::simd_test;
@@ -3206,7 +3376,7 @@ mod tests {
     use crate::core_arch::x86::*;
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_add_pd() {
+    const fn test_mm256_add_pd() {
         let a = _mm256_setr_pd(1., 2., 3., 4.);
         let b = _mm256_setr_pd(5., 6., 7., 8.);
         let r = _mm256_add_pd(a, b);
@@ -3215,7 +3385,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_add_ps() {
+    const fn test_mm256_add_ps() {
         let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         let b = _mm256_setr_ps(9., 10., 11., 12., 13., 14., 15., 16.);
         let r = _mm256_add_ps(a, b);
@@ -3224,7 +3394,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_and_pd() {
+    const fn test_mm256_and_pd() {
         let a = _mm256_set1_pd(1.);
         let b = _mm256_set1_pd(0.6);
         let r = _mm256_and_pd(a, b);
@@ -3233,7 +3403,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_and_ps() {
+    const fn test_mm256_and_ps() {
         let a = _mm256_set1_ps(1.);
         let b = _mm256_set1_ps(0.6);
         let r = _mm256_and_ps(a, b);
@@ -3242,7 +3412,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_or_pd() {
+    const fn test_mm256_or_pd() {
         let a = _mm256_set1_pd(1.);
         let b = _mm256_set1_pd(0.6);
         let r = _mm256_or_pd(a, b);
@@ -3251,7 +3421,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_or_ps() {
+    const fn test_mm256_or_ps() {
         let a = _mm256_set1_ps(1.);
         let b = _mm256_set1_ps(0.6);
         let r = _mm256_or_ps(a, b);
@@ -3260,7 +3430,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_shuffle_pd() {
+    const fn test_mm256_shuffle_pd() {
         let a = _mm256_setr_pd(1., 4., 5., 8.);
         let b = _mm256_setr_pd(2., 3., 6., 7.);
         let r = _mm256_shuffle_pd::<0b11_11_11_11>(a, b);
@@ -3269,7 +3439,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_shuffle_ps() {
+    const fn test_mm256_shuffle_ps() {
         let a = _mm256_setr_ps(1., 4., 5., 8., 9., 12., 13., 16.);
         let b = _mm256_setr_ps(2., 3., 6., 7., 10., 11., 14., 15.);
         let r = _mm256_shuffle_ps::<0b00_00_11_11>(a, b);
@@ -3278,7 +3448,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_andnot_pd() {
+    const fn test_mm256_andnot_pd() {
         let a = _mm256_set1_pd(0.);
         let b = _mm256_set1_pd(0.6);
         let r = _mm256_andnot_pd(a, b);
@@ -3286,7 +3456,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_andnot_ps() {
+    const fn test_mm256_andnot_ps() {
         let a = _mm256_set1_ps(0.);
         let b = _mm256_set1_ps(0.6);
         let r = _mm256_andnot_ps(a, b);
@@ -3294,7 +3464,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_max_pd() {
+    fn test_mm256_max_pd() {
         let a = _mm256_setr_pd(1., 4., 5., 8.);
         let b = _mm256_setr_pd(2., 3., 6., 7.);
         let r = _mm256_max_pd(a, b);
@@ -3304,23 +3474,22 @@ mod tests {
         // > value in the second operand (source operand) is returned.
         let w = _mm256_max_pd(_mm256_set1_pd(0.0), _mm256_set1_pd(-0.0));
         let x = _mm256_max_pd(_mm256_set1_pd(-0.0), _mm256_set1_pd(0.0));
-        let wu: [u64; 4] = transmute(w);
-        let xu: [u64; 4] = transmute(x);
-        assert_eq!(wu, [0x8000_0000_0000_0000u64; 4]);
-        assert_eq!(xu, [0u64; 4]);
+        let wu = _mm256_castpd_si256(w).as_u64x4();
+        let xu = _mm256_castpd_si256(x).as_u64x4();
+        assert_eq!(wu, u64x4::splat(0x8000_0000_0000_0000u64));
+        assert_eq!(xu, u64x4::splat(0u64));
         // > If only one value is a NaN (SNaN or QNaN) for this instruction, the
         // > second operand (source operand), either a NaN or a valid
         // > floating-point value, is written to the result.
         let y = _mm256_max_pd(_mm256_set1_pd(f64::NAN), _mm256_set1_pd(0.0));
         let z = _mm256_max_pd(_mm256_set1_pd(0.0), _mm256_set1_pd(f64::NAN));
-        let yf: [f64; 4] = transmute(y);
-        let zf: [f64; 4] = transmute(z);
-        assert_eq!(yf, [0.0; 4]);
+        assert_eq_m256d(y, _mm256_set1_pd(0.0));
+        let zf = *z.as_f64x4().as_array();
         assert!(zf.iter().all(|f| f.is_nan()), "{:?}", zf);
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_max_ps() {
+    fn test_mm256_max_ps() {
         let a = _mm256_setr_ps(1., 4., 5., 8., 9., 12., 13., 16.);
         let b = _mm256_setr_ps(2., 3., 6., 7., 10., 11., 14., 15.);
         let r = _mm256_max_ps(a, b);
@@ -3330,23 +3499,22 @@ mod tests {
         // > value in the second operand (source operand) is returned.
         let w = _mm256_max_ps(_mm256_set1_ps(0.0), _mm256_set1_ps(-0.0));
         let x = _mm256_max_ps(_mm256_set1_ps(-0.0), _mm256_set1_ps(0.0));
-        let wu: [u32; 8] = transmute(w);
-        let xu: [u32; 8] = transmute(x);
-        assert_eq!(wu, [0x8000_0000u32; 8]);
-        assert_eq!(xu, [0u32; 8]);
+        let wu = _mm256_castps_si256(w).as_u32x8();
+        let xu = _mm256_castps_si256(x).as_u32x8();
+        assert_eq!(wu, u32x8::splat(0x8000_0000u32));
+        assert_eq!(xu, u32x8::splat(0u32));
         // > If only one value is a NaN (SNaN or QNaN) for this instruction, the
         // > second operand (source operand), either a NaN or a valid
         // > floating-point value, is written to the result.
         let y = _mm256_max_ps(_mm256_set1_ps(f32::NAN), _mm256_set1_ps(0.0));
         let z = _mm256_max_ps(_mm256_set1_ps(0.0), _mm256_set1_ps(f32::NAN));
-        let yf: [f32; 8] = transmute(y);
-        let zf: [f32; 8] = transmute(z);
-        assert_eq!(yf, [0.0; 8]);
+        assert_eq_m256(y, _mm256_set1_ps(0.0));
+        let zf = *z.as_f32x8().as_array();
         assert!(zf.iter().all(|f| f.is_nan()), "{:?}", zf);
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_min_pd() {
+    fn test_mm256_min_pd() {
         let a = _mm256_setr_pd(1., 4., 5., 8.);
         let b = _mm256_setr_pd(2., 3., 6., 7.);
         let r = _mm256_min_pd(a, b);
@@ -3356,23 +3524,22 @@ mod tests {
         // > value in the second operand (source operand) is returned.
         let w = _mm256_min_pd(_mm256_set1_pd(0.0), _mm256_set1_pd(-0.0));
         let x = _mm256_min_pd(_mm256_set1_pd(-0.0), _mm256_set1_pd(0.0));
-        let wu: [u64; 4] = transmute(w);
-        let xu: [u64; 4] = transmute(x);
-        assert_eq!(wu, [0x8000_0000_0000_0000u64; 4]);
-        assert_eq!(xu, [0u64; 4]);
+        let wu = _mm256_castpd_si256(w).as_u64x4();
+        let xu = _mm256_castpd_si256(x).as_u64x4();
+        assert_eq!(wu, u64x4::splat(0x8000_0000_0000_0000u64));
+        assert_eq!(xu, u64x4::splat(0u64));
         // > If only one value is a NaN (SNaN or QNaN) for this instruction, the
         // > second operand (source operand), either a NaN or a valid
         // > floating-point value, is written to the result.
         let y = _mm256_min_pd(_mm256_set1_pd(f64::NAN), _mm256_set1_pd(0.0));
         let z = _mm256_min_pd(_mm256_set1_pd(0.0), _mm256_set1_pd(f64::NAN));
-        let yf: [f64; 4] = transmute(y);
-        let zf: [f64; 4] = transmute(z);
-        assert_eq!(yf, [0.0; 4]);
+        assert_eq_m256d(y, _mm256_set1_pd(0.0));
+        let zf = *z.as_f64x4().as_array();
         assert!(zf.iter().all(|f| f.is_nan()), "{:?}", zf);
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_min_ps() {
+    fn test_mm256_min_ps() {
         let a = _mm256_setr_ps(1., 4., 5., 8., 9., 12., 13., 16.);
         let b = _mm256_setr_ps(2., 3., 6., 7., 10., 11., 14., 15.);
         let r = _mm256_min_ps(a, b);
@@ -3382,23 +3549,22 @@ mod tests {
         // > value in the second operand (source operand) is returned.
         let w = _mm256_min_ps(_mm256_set1_ps(0.0), _mm256_set1_ps(-0.0));
         let x = _mm256_min_ps(_mm256_set1_ps(-0.0), _mm256_set1_ps(0.0));
-        let wu: [u32; 8] = transmute(w);
-        let xu: [u32; 8] = transmute(x);
-        assert_eq!(wu, [0x8000_0000u32; 8]);
-        assert_eq!(xu, [0u32; 8]);
+        let wu = _mm256_castps_si256(w).as_u32x8();
+        let xu = _mm256_castps_si256(x).as_u32x8();
+        assert_eq!(wu, u32x8::splat(0x8000_0000u32));
+        assert_eq!(xu, u32x8::splat(0u32));
         // > If only one value is a NaN (SNaN or QNaN) for this instruction, the
         // > second operand (source operand), either a NaN or a valid
         // > floating-point value, is written to the result.
         let y = _mm256_min_ps(_mm256_set1_ps(f32::NAN), _mm256_set1_ps(0.0));
         let z = _mm256_min_ps(_mm256_set1_ps(0.0), _mm256_set1_ps(f32::NAN));
-        let yf: [f32; 8] = transmute(y);
-        let zf: [f32; 8] = transmute(z);
-        assert_eq!(yf, [0.0; 8]);
+        assert_eq_m256(y, _mm256_set1_ps(0.0));
+        let zf = *z.as_f32x8().as_array();
         assert!(zf.iter().all(|f| f.is_nan()), "{:?}", zf);
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_mul_pd() {
+    const fn test_mm256_mul_pd() {
         let a = _mm256_setr_pd(1., 2., 3., 4.);
         let b = _mm256_setr_pd(5., 6., 7., 8.);
         let r = _mm256_mul_pd(a, b);
@@ -3407,7 +3573,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_mul_ps() {
+    const fn test_mm256_mul_ps() {
         let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         let b = _mm256_setr_ps(9., 10., 11., 12., 13., 14., 15., 16.);
         let r = _mm256_mul_ps(a, b);
@@ -3416,7 +3582,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_addsub_pd() {
+    const fn test_mm256_addsub_pd() {
         let a = _mm256_setr_pd(1., 2., 3., 4.);
         let b = _mm256_setr_pd(5., 6., 7., 8.);
         let r = _mm256_addsub_pd(a, b);
@@ -3425,7 +3591,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_addsub_ps() {
+    const fn test_mm256_addsub_ps() {
         let a = _mm256_setr_ps(1., 2., 3., 4., 1., 2., 3., 4.);
         let b = _mm256_setr_ps(5., 6., 7., 8., 5., 6., 7., 8.);
         let r = _mm256_addsub_ps(a, b);
@@ -3434,7 +3600,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_sub_pd() {
+    const fn test_mm256_sub_pd() {
         let a = _mm256_setr_pd(1., 2., 3., 4.);
         let b = _mm256_setr_pd(5., 6., 7., 8.);
         let r = _mm256_sub_pd(a, b);
@@ -3443,7 +3609,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_sub_ps() {
+    const fn test_mm256_sub_ps() {
         let a = _mm256_setr_ps(1., 2., 3., 4., -1., -2., -3., -4.);
         let b = _mm256_setr_ps(5., 6., 7., 8., 3., 2., 1., 0.);
         let r = _mm256_sub_ps(a, b);
@@ -3452,7 +3618,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_round_pd() {
+    fn test_mm256_round_pd() {
         let a = _mm256_setr_pd(1.55, 2.2, 3.99, -1.2);
         let result_closest = _mm256_round_pd::<0b0000>(a);
         let result_down = _mm256_round_pd::<0b0001>(a);
@@ -3466,7 +3632,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_floor_pd() {
+    const fn test_mm256_floor_pd() {
         let a = _mm256_setr_pd(1.55, 2.2, 3.99, -1.2);
         let result_down = _mm256_floor_pd(a);
         let expected_down = _mm256_setr_pd(1., 2., 3., -2.);
@@ -3474,7 +3640,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_ceil_pd() {
+    const fn test_mm256_ceil_pd() {
         let a = _mm256_setr_pd(1.55, 2.2, 3.99, -1.2);
         let result_up = _mm256_ceil_pd(a);
         let expected_up = _mm256_setr_pd(2., 3., 4., -1.);
@@ -3482,7 +3648,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_round_ps() {
+    fn test_mm256_round_ps() {
         let a = _mm256_setr_ps(1.55, 2.2, 3.99, -1.2, 1.55, 2.2, 3.99, -1.2);
         let result_closest = _mm256_round_ps::<0b0000>(a);
         let result_down = _mm256_round_ps::<0b0001>(a);
@@ -3496,7 +3662,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_floor_ps() {
+    const fn test_mm256_floor_ps() {
         let a = _mm256_setr_ps(1.55, 2.2, 3.99, -1.2, 1.55, 2.2, 3.99, -1.2);
         let result_down = _mm256_floor_ps(a);
         let expected_down = _mm256_setr_ps(1., 2., 3., -2., 1., 2., 3., -2.);
@@ -3504,7 +3670,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_ceil_ps() {
+    const fn test_mm256_ceil_ps() {
         let a = _mm256_setr_ps(1.55, 2.2, 3.99, -1.2, 1.55, 2.2, 3.99, -1.2);
         let result_up = _mm256_ceil_ps(a);
         let expected_up = _mm256_setr_ps(2., 3., 4., -1., 2., 3., 4., -1.);
@@ -3512,7 +3678,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_sqrt_pd() {
+    fn test_mm256_sqrt_pd() {
         let a = _mm256_setr_pd(4., 9., 16., 25.);
         let r = _mm256_sqrt_pd(a);
         let e = _mm256_setr_pd(2., 3., 4., 5.);
@@ -3520,7 +3686,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_sqrt_ps() {
+    fn test_mm256_sqrt_ps() {
         let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
         let r = _mm256_sqrt_ps(a);
         let e = _mm256_setr_ps(2., 3., 4., 5., 2., 3., 4., 5.);
@@ -3528,7 +3694,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_div_ps() {
+    const fn test_mm256_div_ps() {
         let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
         let b = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
         let r = _mm256_div_ps(a, b);
@@ -3537,7 +3703,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_div_pd() {
+    const fn test_mm256_div_pd() {
         let a = _mm256_setr_pd(4., 9., 16., 25.);
         let b = _mm256_setr_pd(4., 3., 2., 5.);
         let r = _mm256_div_pd(a, b);
@@ -3546,7 +3712,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_blend_pd() {
+    const fn test_mm256_blend_pd() {
         let a = _mm256_setr_pd(4., 9., 16., 25.);
         let b = _mm256_setr_pd(4., 3., 2., 5.);
         let r = _mm256_blend_pd::<0x0>(a, b);
@@ -3558,7 +3724,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_blend_ps() {
+    const fn test_mm256_blend_ps() {
         let a = _mm256_setr_ps(1., 4., 5., 8., 9., 12., 13., 16.);
         let b = _mm256_setr_ps(2., 3., 6., 7., 10., 11., 14., 15.);
         let r = _mm256_blend_ps::<0x0>(a, b);
@@ -3570,7 +3736,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_blendv_pd() {
+    const fn test_mm256_blendv_pd() {
         let a = _mm256_setr_pd(4., 9., 16., 25.);
         let b = _mm256_setr_pd(4., 3., 2., 5.);
         let c = _mm256_setr_pd(0., 0., !0 as f64, !0 as f64);
@@ -3580,7 +3746,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_blendv_ps() {
+    const fn test_mm256_blendv_ps() {
         let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
         let b = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
         #[rustfmt::skip]
@@ -3593,7 +3759,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_dp_ps() {
+    fn test_mm256_dp_ps() {
         let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
         let b = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
         let r = _mm256_dp_ps::<0xFF>(a, b);
@@ -3602,7 +3768,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_hadd_pd() {
+    const fn test_mm256_hadd_pd() {
         let a = _mm256_setr_pd(4., 9., 16., 25.);
         let b = _mm256_setr_pd(4., 3., 2., 5.);
         let r = _mm256_hadd_pd(a, b);
@@ -3617,7 +3783,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_hadd_ps() {
+    const fn test_mm256_hadd_ps() {
         let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
         let b = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
         let r = _mm256_hadd_ps(a, b);
@@ -3632,7 +3798,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_hsub_pd() {
+    const fn test_mm256_hsub_pd() {
         let a = _mm256_setr_pd(4., 9., 16., 25.);
         let b = _mm256_setr_pd(4., 3., 2., 5.);
         let r = _mm256_hsub_pd(a, b);
@@ -3647,7 +3813,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_hsub_ps() {
+    const fn test_mm256_hsub_ps() {
         let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
         let b = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
         let r = _mm256_hsub_ps(a, b);
@@ -3662,7 +3828,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_xor_pd() {
+    const fn test_mm256_xor_pd() {
         let a = _mm256_setr_pd(4., 9., 16., 25.);
         let b = _mm256_set1_pd(0.);
         let r = _mm256_xor_pd(a, b);
@@ -3670,7 +3836,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_xor_ps() {
+    const fn test_mm256_xor_ps() {
         let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
         let b = _mm256_set1_ps(0.);
         let r = _mm256_xor_ps(a, b);
@@ -3678,7 +3844,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm_cmp_pd() {
+    fn test_mm_cmp_pd() {
         let a = _mm_setr_pd(4., 9.);
         let b = _mm_setr_pd(4., 3.);
         let r = _mm_cmp_pd::<_CMP_GE_OS>(a, b);
@@ -3687,7 +3853,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_cmp_pd() {
+    fn test_mm256_cmp_pd() {
         let a = _mm256_setr_pd(1., 2., 3., 4.);
         let b = _mm256_setr_pd(5., 6., 7., 8.);
         let r = _mm256_cmp_pd::<_CMP_GE_OS>(a, b);
@@ -3696,7 +3862,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm_cmp_ps() {
+    fn test_mm_cmp_ps() {
         let a = _mm_setr_ps(4., 3., 2., 5.);
         let b = _mm_setr_ps(4., 9., 16., 25.);
         let r = _mm_cmp_ps::<_CMP_GE_OS>(a, b);
@@ -3707,7 +3873,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_cmp_ps() {
+    fn test_mm256_cmp_ps() {
         let a = _mm256_setr_ps(1., 2., 3., 4., 1., 2., 3., 4.);
         let b = _mm256_setr_ps(5., 6., 7., 8., 5., 6., 7., 8.);
         let r = _mm256_cmp_ps::<_CMP_GE_OS>(a, b);
@@ -3716,7 +3882,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm_cmp_sd() {
+    fn test_mm_cmp_sd() {
         let a = _mm_setr_pd(4., 9.);
         let b = _mm_setr_pd(4., 3.);
         let r = _mm_cmp_sd::<_CMP_GE_OS>(a, b);
@@ -3725,7 +3891,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm_cmp_ss() {
+    fn test_mm_cmp_ss() {
         let a = _mm_setr_ps(4., 3., 2., 5.);
         let b = _mm_setr_ps(4., 9., 16., 25.);
         let r = _mm_cmp_ss::<_CMP_GE_OS>(a, b);
@@ -3736,7 +3902,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_cvtepi32_pd() {
+    const fn test_mm256_cvtepi32_pd() {
         let a = _mm_setr_epi32(4, 9, 16, 25);
         let r = _mm256_cvtepi32_pd(a);
         let e = _mm256_setr_pd(4., 9., 16., 25.);
@@ -3744,7 +3910,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_cvtepi32_ps() {
+    const fn test_mm256_cvtepi32_ps() {
         let a = _mm256_setr_epi32(4, 9, 16, 25, 4, 9, 16, 25);
         let r = _mm256_cvtepi32_ps(a);
         let e = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
@@ -3752,7 +3918,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_cvtpd_ps() {
+    const fn test_mm256_cvtpd_ps() {
         let a = _mm256_setr_pd(4., 9., 16., 25.);
         let r = _mm256_cvtpd_ps(a);
         let e = _mm_setr_ps(4., 9., 16., 25.);
@@ -3760,7 +3926,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_cvtps_epi32() {
+    fn test_mm256_cvtps_epi32() {
         let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
         let r = _mm256_cvtps_epi32(a);
         let e = _mm256_setr_epi32(4, 9, 16, 25, 4, 9, 16, 25);
@@ -3768,7 +3934,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_cvtps_pd() {
+    const fn test_mm256_cvtps_pd() {
         let a = _mm_setr_ps(4., 9., 16., 25.);
         let r = _mm256_cvtps_pd(a);
         let e = _mm256_setr_pd(4., 9., 16., 25.);
@@ -3776,14 +3942,14 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_cvtsd_f64() {
+    const fn test_mm256_cvtsd_f64() {
         let a = _mm256_setr_pd(1., 2., 3., 4.);
         let r = _mm256_cvtsd_f64(a);
         assert_eq!(r, 1.);
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_cvttpd_epi32() {
+    fn test_mm256_cvttpd_epi32() {
         let a = _mm256_setr_pd(4., 9., 16., 25.);
         let r = _mm256_cvttpd_epi32(a);
         let e = _mm_setr_epi32(4, 9, 16, 25);
@@ -3791,7 +3957,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_cvtpd_epi32() {
+    fn test_mm256_cvtpd_epi32() {
         let a = _mm256_setr_pd(4., 9., 16., 25.);
         let r = _mm256_cvtpd_epi32(a);
         let e = _mm_setr_epi32(4, 9, 16, 25);
@@ -3799,7 +3965,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_cvttps_epi32() {
+    fn test_mm256_cvttps_epi32() {
         let a = _mm256_setr_ps(4., 9., 16., 25., 4., 9., 16., 25.);
         let r = _mm256_cvttps_epi32(a);
         let e = _mm256_setr_epi32(4, 9, 16, 25, 4, 9, 16, 25);
@@ -3807,7 +3973,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_extractf128_ps() {
+    const fn test_mm256_extractf128_ps() {
         let a = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
         let r = _mm256_extractf128_ps::<0>(a);
         let e = _mm_setr_ps(4., 3., 2., 5.);
@@ -3815,7 +3981,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_extractf128_pd() {
+    const fn test_mm256_extractf128_pd() {
         let a = _mm256_setr_pd(4., 3., 2., 5.);
         let r = _mm256_extractf128_pd::<0>(a);
         let e = _mm_setr_pd(4., 3.);
@@ -3823,7 +3989,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_extractf128_si256() {
+    const fn test_mm256_extractf128_si256() {
         let a = _mm256_setr_epi64x(4, 3, 2, 5);
         let r = _mm256_extractf128_si256::<0>(a);
         let e = _mm_setr_epi64x(4, 3);
@@ -3831,7 +3997,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_extract_epi32() {
+    const fn test_mm256_extract_epi32() {
         let a = _mm256_setr_epi32(-1, 1, 2, 3, 4, 5, 6, 7);
         let r1 = _mm256_extract_epi32::<0>(a);
         let r2 = _mm256_extract_epi32::<3>(a);
@@ -3840,26 +4006,24 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_cvtsi256_si32() {
+    const fn test_mm256_cvtsi256_si32() {
         let a = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
         let r = _mm256_cvtsi256_si32(a);
         assert_eq!(r, 1);
     }
 
     #[simd_test(enable = "avx")]
-    #[cfg_attr(miri, ignore)] // Register-level operation not supported by Miri
-    unsafe fn test_mm256_zeroall() {
+    fn test_mm256_zeroall() {
         _mm256_zeroall();
     }
 
     #[simd_test(enable = "avx")]
-    #[cfg_attr(miri, ignore)] // Register-level operation not supported by Miri
-    unsafe fn test_mm256_zeroupper() {
+    fn test_mm256_zeroupper() {
         _mm256_zeroupper();
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_permutevar_ps() {
+    fn test_mm256_permutevar_ps() {
         let a = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
         let b = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
         let r = _mm256_permutevar_ps(a, b);
@@ -3868,7 +4032,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm_permutevar_ps() {
+    fn test_mm_permutevar_ps() {
         let a = _mm_setr_ps(4., 3., 2., 5.);
         let b = _mm_setr_epi32(1, 2, 3, 4);
         let r = _mm_permutevar_ps(a, b);
@@ -3877,7 +4041,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_permute_ps() {
+    const fn test_mm256_permute_ps() {
         let a = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
         let r = _mm256_permute_ps::<0x1b>(a);
         let e = _mm256_setr_ps(5., 2., 3., 4., 50., 64., 9., 8.);
@@ -3885,7 +4049,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm_permute_ps() {
+    const fn test_mm_permute_ps() {
         let a = _mm_setr_ps(4., 3., 2., 5.);
         let r = _mm_permute_ps::<0x1b>(a);
         let e = _mm_setr_ps(5., 2., 3., 4.);
@@ -3893,7 +4057,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_permutevar_pd() {
+    fn test_mm256_permutevar_pd() {
         let a = _mm256_setr_pd(4., 3., 2., 5.);
         let b = _mm256_setr_epi64x(1, 2, 3, 4);
         let r = _mm256_permutevar_pd(a, b);
@@ -3902,7 +4066,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm_permutevar_pd() {
+    fn test_mm_permutevar_pd() {
         let a = _mm_setr_pd(4., 3.);
         let b = _mm_setr_epi64x(3, 0);
         let r = _mm_permutevar_pd(a, b);
@@ -3911,7 +4075,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_permute_pd() {
+    const fn test_mm256_permute_pd() {
         let a = _mm256_setr_pd(4., 3., 2., 5.);
         let r = _mm256_permute_pd::<5>(a);
         let e = _mm256_setr_pd(3., 4., 5., 2.);
@@ -3919,7 +4083,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm_permute_pd() {
+    const fn test_mm_permute_pd() {
         let a = _mm_setr_pd(4., 3.);
         let r = _mm_permute_pd::<1>(a);
         let e = _mm_setr_pd(3., 4.);
@@ -3927,55 +4091,70 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_permute2f128_ps() {
-        let a = _mm256_setr_ps(1., 2., 3., 4., 1., 2., 3., 4.);
-        let b = _mm256_setr_ps(5., 6., 7., 8., 5., 6., 7., 8.);
-        let r = _mm256_permute2f128_ps::<0x13>(a, b);
-        let e = _mm256_setr_ps(5., 6., 7., 8., 1., 2., 3., 4.);
+    const fn test_mm256_permute2f128_ps() {
+        let a = _mm256_setr_ps(11., 12., 13., 14., 15., 16., 17., 18.);
+        let b = _mm256_setr_ps(21., 22., 23., 24., 25., 26., 27., 28.);
+        let r = _mm256_permute2f128_ps::<0b0001_0011>(a, b);
+        let e = _mm256_setr_ps(25., 26., 27., 28., 15., 16., 17., 18.);
         assert_eq_m256(r, e);
+
+        // Setting bits 3 or 7 (zero-indexed) zeroes the corresponding field.
+        let r = _mm256_permute2f128_ps::<0b1001_1011>(a, b);
+        let z = _mm256_setr_ps(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0);
+        assert_eq_m256(r, z);
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_permute2f128_pd() {
+    const fn test_mm256_permute2f128_pd() {
         let a = _mm256_setr_pd(1., 2., 3., 4.);
         let b = _mm256_setr_pd(5., 6., 7., 8.);
-        let r = _mm256_permute2f128_pd::<0x31>(a, b);
+        let r = _mm256_permute2f128_pd::<0b0011_0001>(a, b);
         let e = _mm256_setr_pd(3., 4., 7., 8.);
         assert_eq_m256d(r, e);
+
+        // Setting bits 3 or 7 (zero-indexed) zeroes the corresponding field.
+        let r = _mm256_permute2f128_pd::<0b1011_1001>(a, b);
+        let e = _mm256_setr_pd(0.0, 0.0, 0.0, 0.0);
+        assert_eq_m256d(r, e);
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_permute2f128_si256() {
-        let a = _mm256_setr_epi32(1, 2, 3, 4, 1, 2, 3, 4);
-        let b = _mm256_setr_epi32(5, 6, 7, 8, 5, 6, 7, 8);
-        let r = _mm256_permute2f128_si256::<0x20>(a, b);
-        let e = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
+    const fn test_mm256_permute2f128_si256() {
+        let a = _mm256_setr_epi32(11, 12, 13, 14, 15, 16, 17, 18);
+        let b = _mm256_setr_epi32(21, 22, 23, 24, 25, 26, 27, 28);
+        let r = _mm256_permute2f128_si256::<0b0010_0000>(a, b);
+        let e = _mm256_setr_epi32(11, 12, 13, 14, 21, 22, 23, 24);
+        assert_eq_m256i(r, e);
+
+        // Setting bits 3 or 7 (zero-indexed) zeroes the corresponding field.
+        let r = _mm256_permute2f128_si256::<0b1010_1000>(a, b);
+        let e = _mm256_setr_epi32(0, 0, 0, 0, 0, 0, 0, 0);
         assert_eq_m256i(r, e);
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_broadcast_ss() {
+    const fn test_mm256_broadcast_ss() {
         let r = _mm256_broadcast_ss(&3.);
         let e = _mm256_set1_ps(3.);
         assert_eq_m256(r, e);
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm_broadcast_ss() {
+    const fn test_mm_broadcast_ss() {
         let r = _mm_broadcast_ss(&3.);
         let e = _mm_set1_ps(3.);
         assert_eq_m128(r, e);
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_broadcast_sd() {
+    const fn test_mm256_broadcast_sd() {
         let r = _mm256_broadcast_sd(&3.);
         let e = _mm256_set1_pd(3.);
         assert_eq_m256d(r, e);
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_broadcast_ps() {
+    const fn test_mm256_broadcast_ps() {
         let a = _mm_setr_ps(4., 3., 2., 5.);
         let r = _mm256_broadcast_ps(&a);
         let e = _mm256_setr_ps(4., 3., 2., 5., 4., 3., 2., 5.);
@@ -3983,7 +4162,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_broadcast_pd() {
+    const fn test_mm256_broadcast_pd() {
         let a = _mm_setr_pd(4., 3.);
         let r = _mm256_broadcast_pd(&a);
         let e = _mm256_setr_pd(4., 3., 4., 3.);
@@ -3991,7 +4170,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_insertf128_ps() {
+    const fn test_mm256_insertf128_ps() {
         let a = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
         let b = _mm_setr_ps(4., 9., 16., 25.);
         let r = _mm256_insertf128_ps::<0>(a, b);
@@ -4000,7 +4179,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_insertf128_pd() {
+    const fn test_mm256_insertf128_pd() {
         let a = _mm256_setr_pd(1., 2., 3., 4.);
         let b = _mm_setr_pd(5., 6.);
         let r = _mm256_insertf128_pd::<0>(a, b);
@@ -4009,7 +4188,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_insertf128_si256() {
+    const fn test_mm256_insertf128_si256() {
         let a = _mm256_setr_epi64x(1, 2, 3, 4);
         let b = _mm_setr_epi64x(5, 6);
         let r = _mm256_insertf128_si256::<0>(a, b);
@@ -4018,7 +4197,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_insert_epi8() {
+    const fn test_mm256_insert_epi8() {
         #[rustfmt::skip]
         let a = _mm256_setr_epi8(
             1, 2, 3, 4, 5, 6, 7, 8,
@@ -4038,7 +4217,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_insert_epi16() {
+    const fn test_mm256_insert_epi16() {
         #[rustfmt::skip]
         let a = _mm256_setr_epi16(
             0, 1, 2, 3, 4, 5, 6, 7,
@@ -4054,7 +4233,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_insert_epi32() {
+    const fn test_mm256_insert_epi32() {
         let a = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
         let r = _mm256_insert_epi32::<7>(a, 0);
         let e = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 0);
@@ -4062,189 +4241,209 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_load_pd() {
+    const fn test_mm256_load_pd() {
         let a = _mm256_setr_pd(1., 2., 3., 4.);
         let p = ptr::addr_of!(a) as *const f64;
-        let r = _mm256_load_pd(p);
+        let r = unsafe { _mm256_load_pd(p) };
         let e = _mm256_setr_pd(1., 2., 3., 4.);
         assert_eq_m256d(r, e);
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_store_pd() {
+    const fn test_mm256_store_pd() {
         let a = _mm256_setr_pd(1., 2., 3., 4.);
         let mut r = _mm256_undefined_pd();
-        _mm256_store_pd(ptr::addr_of_mut!(r) as *mut f64, a);
+        unsafe {
+            _mm256_store_pd(ptr::addr_of_mut!(r) as *mut f64, a);
+        }
         assert_eq_m256d(r, a);
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_load_ps() {
+    const fn test_mm256_load_ps() {
         let a = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
         let p = ptr::addr_of!(a) as *const f32;
-        let r = _mm256_load_ps(p);
+        let r = unsafe { _mm256_load_ps(p) };
         let e = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
         assert_eq_m256(r, e);
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_store_ps() {
+    const fn test_mm256_store_ps() {
         let a = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
         let mut r = _mm256_undefined_ps();
-        _mm256_store_ps(ptr::addr_of_mut!(r) as *mut f32, a);
+        unsafe {
+            _mm256_store_ps(ptr::addr_of_mut!(r) as *mut f32, a);
+        }
         assert_eq_m256(r, a);
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_loadu_pd() {
+    const fn test_mm256_loadu_pd() {
         let a = &[1.0f64, 2., 3., 4.];
         let p = a.as_ptr();
-        let r = _mm256_loadu_pd(black_box(p));
+        let r = unsafe { _mm256_loadu_pd(black_box(p)) };
         let e = _mm256_setr_pd(1., 2., 3., 4.);
         assert_eq_m256d(r, e);
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_storeu_pd() {
+    const fn test_mm256_storeu_pd() {
         let a = _mm256_set1_pd(9.);
         let mut r = _mm256_undefined_pd();
-        _mm256_storeu_pd(ptr::addr_of_mut!(r) as *mut f64, a);
+        unsafe {
+            _mm256_storeu_pd(ptr::addr_of_mut!(r) as *mut f64, a);
+        }
         assert_eq_m256d(r, a);
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_loadu_ps() {
+    const fn test_mm256_loadu_ps() {
         let a = &[4., 3., 2., 5., 8., 9., 64., 50.];
         let p = a.as_ptr();
-        let r = _mm256_loadu_ps(black_box(p));
+        let r = unsafe { _mm256_loadu_ps(black_box(p)) };
         let e = _mm256_setr_ps(4., 3., 2., 5., 8., 9., 64., 50.);
         assert_eq_m256(r, e);
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_storeu_ps() {
+    const fn test_mm256_storeu_ps() {
         let a = _mm256_set1_ps(9.);
         let mut r = _mm256_undefined_ps();
-        _mm256_storeu_ps(ptr::addr_of_mut!(r) as *mut f32, a);
+        unsafe {
+            _mm256_storeu_ps(ptr::addr_of_mut!(r) as *mut f32, a);
+        }
         assert_eq_m256(r, a);
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_load_si256() {
+    const fn test_mm256_load_si256() {
         let a = _mm256_setr_epi64x(1, 2, 3, 4);
         let p = ptr::addr_of!(a);
-        let r = _mm256_load_si256(p);
+        let r = unsafe { _mm256_load_si256(p) };
         let e = _mm256_setr_epi64x(1, 2, 3, 4);
         assert_eq_m256i(r, e);
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_store_si256() {
+    const fn test_mm256_store_si256() {
         let a = _mm256_setr_epi64x(1, 2, 3, 4);
         let mut r = _mm256_undefined_si256();
-        _mm256_store_si256(ptr::addr_of_mut!(r), a);
+        unsafe {
+            _mm256_store_si256(ptr::addr_of_mut!(r), a);
+        }
         assert_eq_m256i(r, a);
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_loadu_si256() {
+    const fn test_mm256_loadu_si256() {
         let a = _mm256_setr_epi64x(1, 2, 3, 4);
         let p = ptr::addr_of!(a);
-        let r = _mm256_loadu_si256(black_box(p));
+        let r = unsafe { _mm256_loadu_si256(black_box(p)) };
         let e = _mm256_setr_epi64x(1, 2, 3, 4);
         assert_eq_m256i(r, e);
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_storeu_si256() {
+    const fn test_mm256_storeu_si256() {
         let a = _mm256_set1_epi8(9);
         let mut r = _mm256_undefined_si256();
-        _mm256_storeu_si256(ptr::addr_of_mut!(r), a);
+        unsafe {
+            _mm256_storeu_si256(ptr::addr_of_mut!(r), a);
+        }
         assert_eq_m256i(r, a);
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_maskload_pd() {
+    const fn test_mm256_maskload_pd() {
         let a = &[1.0f64, 2., 3., 4.];
         let p = a.as_ptr();
         let mask = _mm256_setr_epi64x(0, !0, 0, !0);
-        let r = _mm256_maskload_pd(black_box(p), mask);
+        let r = unsafe { _mm256_maskload_pd(black_box(p), mask) };
         let e = _mm256_setr_pd(0., 2., 0., 4.);
         assert_eq_m256d(r, e);
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_maskstore_pd() {
+    const fn test_mm256_maskstore_pd() {
         let mut r = _mm256_set1_pd(0.);
         let mask = _mm256_setr_epi64x(0, !0, 0, !0);
         let a = _mm256_setr_pd(1., 2., 3., 4.);
-        _mm256_maskstore_pd(ptr::addr_of_mut!(r) as *mut f64, mask, a);
+        unsafe {
+            _mm256_maskstore_pd(ptr::addr_of_mut!(r) as *mut f64, mask, a);
+        }
         let e = _mm256_setr_pd(0., 2., 0., 4.);
         assert_eq_m256d(r, e);
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm_maskload_pd() {
+    const fn test_mm_maskload_pd() {
         let a = &[1.0f64, 2.];
         let p = a.as_ptr();
         let mask = _mm_setr_epi64x(0, !0);
-        let r = _mm_maskload_pd(black_box(p), mask);
+        let r = unsafe { _mm_maskload_pd(black_box(p), mask) };
         let e = _mm_setr_pd(0., 2.);
         assert_eq_m128d(r, e);
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm_maskstore_pd() {
+    const fn test_mm_maskstore_pd() {
         let mut r = _mm_set1_pd(0.);
         let mask = _mm_setr_epi64x(0, !0);
         let a = _mm_setr_pd(1., 2.);
-        _mm_maskstore_pd(ptr::addr_of_mut!(r) as *mut f64, mask, a);
+        unsafe {
+            _mm_maskstore_pd(ptr::addr_of_mut!(r) as *mut f64, mask, a);
+        }
         let e = _mm_setr_pd(0., 2.);
         assert_eq_m128d(r, e);
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_maskload_ps() {
+    const fn test_mm256_maskload_ps() {
         let a = &[1.0f32, 2., 3., 4., 5., 6., 7., 8.];
         let p = a.as_ptr();
         let mask = _mm256_setr_epi32(0, !0, 0, !0, 0, !0, 0, !0);
-        let r = _mm256_maskload_ps(black_box(p), mask);
+        let r = unsafe { _mm256_maskload_ps(black_box(p), mask) };
         let e = _mm256_setr_ps(0., 2., 0., 4., 0., 6., 0., 8.);
         assert_eq_m256(r, e);
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_maskstore_ps() {
+    const fn test_mm256_maskstore_ps() {
         let mut r = _mm256_set1_ps(0.);
         let mask = _mm256_setr_epi32(0, !0, 0, !0, 0, !0, 0, !0);
         let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
-        _mm256_maskstore_ps(ptr::addr_of_mut!(r) as *mut f32, mask, a);
+        unsafe {
+            _mm256_maskstore_ps(ptr::addr_of_mut!(r) as *mut f32, mask, a);
+        }
         let e = _mm256_setr_ps(0., 2., 0., 4., 0., 6., 0., 8.);
         assert_eq_m256(r, e);
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm_maskload_ps() {
+    const fn test_mm_maskload_ps() {
         let a = &[1.0f32, 2., 3., 4.];
         let p = a.as_ptr();
         let mask = _mm_setr_epi32(0, !0, 0, !0);
-        let r = _mm_maskload_ps(black_box(p), mask);
+        let r = unsafe { _mm_maskload_ps(black_box(p), mask) };
         let e = _mm_setr_ps(0., 2., 0., 4.);
         assert_eq_m128(r, e);
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm_maskstore_ps() {
+    const fn test_mm_maskstore_ps() {
         let mut r = _mm_set1_ps(0.);
         let mask = _mm_setr_epi32(0, !0, 0, !0);
         let a = _mm_setr_ps(1., 2., 3., 4.);
-        _mm_maskstore_ps(ptr::addr_of_mut!(r) as *mut f32, mask, a);
+        unsafe {
+            _mm_maskstore_ps(ptr::addr_of_mut!(r) as *mut f32, mask, a);
+        }
         let e = _mm_setr_ps(0., 2., 0., 4.);
         assert_eq_m128(r, e);
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_movehdup_ps() {
+    const fn test_mm256_movehdup_ps() {
         let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm256_movehdup_ps(a);
         let e = _mm256_setr_ps(2., 2., 4., 4., 6., 6., 8., 8.);
@@ -4252,7 +4451,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_moveldup_ps() {
+    const fn test_mm256_moveldup_ps() {
         let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm256_moveldup_ps(a);
         let e = _mm256_setr_ps(1., 1., 3., 3., 5., 5., 7., 7.);
@@ -4260,7 +4459,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_movedup_pd() {
+    const fn test_mm256_movedup_pd() {
         let a = _mm256_setr_pd(1., 2., 3., 4.);
         let r = _mm256_movedup_pd(a);
         let e = _mm256_setr_pd(1., 1., 3., 3.);
@@ -4268,7 +4467,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_lddqu_si256() {
+    fn test_mm256_lddqu_si256() {
         #[rustfmt::skip]
         let a = _mm256_setr_epi8(
             1, 2, 3, 4, 5, 6, 7, 8,
@@ -4277,7 +4476,7 @@ mod tests {
             25, 26, 27, 28, 29, 30, 31, 32,
         );
         let p = ptr::addr_of!(a);
-        let r = _mm256_lddqu_si256(black_box(p));
+        let r = unsafe { _mm256_lddqu_si256(black_box(p)) };
         #[rustfmt::skip]
         let e = _mm256_setr_epi8(
             1, 2, 3, 4, 5, 6, 7, 8,
@@ -4289,18 +4488,20 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    #[cfg_attr(miri, ignore)] // Non-temporal store, which is not supported by Miri
-    unsafe fn test_mm256_stream_si256() {
+    #[cfg_attr(miri, ignore)] // Inline asm (for non-temporal store), which is not supported by Miri
+    fn test_mm256_stream_si256() {
         let a = _mm256_setr_epi64x(1, 2, 3, 4);
         let mut r = _mm256_undefined_si256();
-        _mm256_stream_si256(ptr::addr_of_mut!(r), a);
+        unsafe {
+            _mm256_stream_si256(ptr::addr_of_mut!(r), a);
+        }
         _mm_sfence();
         assert_eq_m256i(r, a);
     }
 
     #[simd_test(enable = "avx")]
-    #[cfg_attr(miri, ignore)] // Non-temporal store, which is not supported by Miri
-    unsafe fn test_mm256_stream_pd() {
+    #[cfg_attr(miri, ignore)] // Inline asm (for non-temporal store), which is not supported by Miri
+    fn test_mm256_stream_pd() {
         #[repr(align(32))]
         struct Memory {
             pub data: [f64; 4],
@@ -4308,7 +4509,9 @@ mod tests {
         let a = _mm256_set1_pd(7.0);
         let mut mem = Memory { data: [-1.0; 4] };
 
-        _mm256_stream_pd(ptr::addr_of_mut!(mem.data[0]), a);
+        unsafe {
+            _mm256_stream_pd(ptr::addr_of_mut!(mem.data[0]), a);
+        }
         _mm_sfence();
         for i in 0..4 {
             assert_eq!(mem.data[i], get_m256d(a, i));
@@ -4316,8 +4519,8 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    #[cfg_attr(miri, ignore)] // Non-temporal store, which is not supported by Miri
-    unsafe fn test_mm256_stream_ps() {
+    #[cfg_attr(miri, ignore)] // Inline asm (for non-temporal store), which is not supported by Miri
+    fn test_mm256_stream_ps() {
         #[repr(align(32))]
         struct Memory {
             pub data: [f32; 8],
@@ -4325,7 +4528,9 @@ mod tests {
         let a = _mm256_set1_ps(7.0);
         let mut mem = Memory { data: [-1.0; 8] };
 
-        _mm256_stream_ps(ptr::addr_of_mut!(mem.data[0]), a);
+        unsafe {
+            _mm256_stream_ps(ptr::addr_of_mut!(mem.data[0]), a);
+        }
         _mm_sfence();
         for i in 0..8 {
             assert_eq!(mem.data[i], get_m256(a, i));
@@ -4333,7 +4538,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_rcp_ps() {
+    fn test_mm256_rcp_ps() {
         let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm256_rcp_ps(a);
         #[rustfmt::skip]
@@ -4348,7 +4553,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_rsqrt_ps() {
+    fn test_mm256_rsqrt_ps() {
         let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm256_rsqrt_ps(a);
         #[rustfmt::skip]
@@ -4363,7 +4568,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_unpackhi_pd() {
+    const fn test_mm256_unpackhi_pd() {
         let a = _mm256_setr_pd(1., 2., 3., 4.);
         let b = _mm256_setr_pd(5., 6., 7., 8.);
         let r = _mm256_unpackhi_pd(a, b);
@@ -4372,7 +4577,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_unpackhi_ps() {
+    const fn test_mm256_unpackhi_ps() {
         let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         let b = _mm256_setr_ps(9., 10., 11., 12., 13., 14., 15., 16.);
         let r = _mm256_unpackhi_ps(a, b);
@@ -4381,7 +4586,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_unpacklo_pd() {
+    const fn test_mm256_unpacklo_pd() {
         let a = _mm256_setr_pd(1., 2., 3., 4.);
         let b = _mm256_setr_pd(5., 6., 7., 8.);
         let r = _mm256_unpacklo_pd(a, b);
@@ -4390,7 +4595,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_unpacklo_ps() {
+    const fn test_mm256_unpacklo_ps() {
         let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         let b = _mm256_setr_ps(9., 10., 11., 12., 13., 14., 15., 16.);
         let r = _mm256_unpacklo_ps(a, b);
@@ -4399,7 +4604,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_testz_si256() {
+    const fn test_mm256_testz_si256() {
         let a = _mm256_setr_epi64x(1, 2, 3, 4);
         let b = _mm256_setr_epi64x(5, 6, 7, 8);
         let r = _mm256_testz_si256(a, b);
@@ -4410,7 +4615,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_testc_si256() {
+    const fn test_mm256_testc_si256() {
         let a = _mm256_setr_epi64x(1, 2, 3, 4);
         let b = _mm256_setr_epi64x(5, 6, 7, 8);
         let r = _mm256_testc_si256(a, b);
@@ -4421,7 +4626,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_testnzc_si256() {
+    fn test_mm256_testnzc_si256() {
         let a = _mm256_setr_epi64x(1, 2, 3, 4);
         let b = _mm256_setr_epi64x(5, 6, 7, 8);
         let r = _mm256_testnzc_si256(a, b);
@@ -4433,7 +4638,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_testz_pd() {
+    fn test_mm256_testz_pd() {
         let a = _mm256_setr_pd(1., 2., 3., 4.);
         let b = _mm256_setr_pd(5., 6., 7., 8.);
         let r = _mm256_testz_pd(a, b);
@@ -4444,7 +4649,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_testc_pd() {
+    fn test_mm256_testc_pd() {
         let a = _mm256_setr_pd(1., 2., 3., 4.);
         let b = _mm256_setr_pd(5., 6., 7., 8.);
         let r = _mm256_testc_pd(a, b);
@@ -4456,7 +4661,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_testnzc_pd() {
+    fn test_mm256_testnzc_pd() {
         let a = _mm256_setr_pd(1., 2., 3., 4.);
         let b = _mm256_setr_pd(5., 6., 7., 8.);
         let r = _mm256_testnzc_pd(a, b);
@@ -4468,7 +4673,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm_testz_pd() {
+    const fn test_mm_testz_pd() {
         let a = _mm_setr_pd(1., 2.);
         let b = _mm_setr_pd(5., 6.);
         let r = _mm_testz_pd(a, b);
@@ -4479,7 +4684,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm_testc_pd() {
+    const fn test_mm_testc_pd() {
         let a = _mm_setr_pd(1., 2.);
         let b = _mm_setr_pd(5., 6.);
         let r = _mm_testc_pd(a, b);
@@ -4491,7 +4696,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm_testnzc_pd() {
+    fn test_mm_testnzc_pd() {
         let a = _mm_setr_pd(1., 2.);
         let b = _mm_setr_pd(5., 6.);
         let r = _mm_testnzc_pd(a, b);
@@ -4503,7 +4708,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_testz_ps() {
+    fn test_mm256_testz_ps() {
         let a = _mm256_set1_ps(1.);
         let r = _mm256_testz_ps(a, a);
         assert_eq!(r, 1);
@@ -4513,7 +4718,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_testc_ps() {
+    fn test_mm256_testc_ps() {
         let a = _mm256_set1_ps(1.);
         let r = _mm256_testc_ps(a, a);
         assert_eq!(r, 1);
@@ -4523,7 +4728,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_testnzc_ps() {
+    fn test_mm256_testnzc_ps() {
         let a = _mm256_set1_ps(1.);
         let r = _mm256_testnzc_ps(a, a);
         assert_eq!(r, 0);
@@ -4534,7 +4739,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm_testz_ps() {
+    const fn test_mm_testz_ps() {
         let a = _mm_set1_ps(1.);
         let r = _mm_testz_ps(a, a);
         assert_eq!(r, 1);
@@ -4544,7 +4749,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm_testc_ps() {
+    const fn test_mm_testc_ps() {
         let a = _mm_set1_ps(1.);
         let r = _mm_testc_ps(a, a);
         assert_eq!(r, 1);
@@ -4554,7 +4759,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm_testnzc_ps() {
+    fn test_mm_testnzc_ps() {
         let a = _mm_set1_ps(1.);
         let r = _mm_testnzc_ps(a, a);
         assert_eq!(r, 0);
@@ -4565,51 +4770,51 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_movemask_pd() {
+    const fn test_mm256_movemask_pd() {
         let a = _mm256_setr_pd(1., -2., 3., -4.);
         let r = _mm256_movemask_pd(a);
         assert_eq!(r, 0xA);
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_movemask_ps() {
+    const fn test_mm256_movemask_ps() {
         let a = _mm256_setr_ps(1., -2., 3., -4., 1., -2., 3., -4.);
         let r = _mm256_movemask_ps(a);
         assert_eq!(r, 0xAA);
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_setzero_pd() {
+    const fn test_mm256_setzero_pd() {
         let r = _mm256_setzero_pd();
         assert_eq_m256d(r, _mm256_set1_pd(0.));
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_setzero_ps() {
+    const fn test_mm256_setzero_ps() {
         let r = _mm256_setzero_ps();
         assert_eq_m256(r, _mm256_set1_ps(0.));
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_setzero_si256() {
+    const fn test_mm256_setzero_si256() {
         let r = _mm256_setzero_si256();
         assert_eq_m256i(r, _mm256_set1_epi8(0));
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_set_pd() {
+    const fn test_mm256_set_pd() {
         let r = _mm256_set_pd(1., 2., 3., 4.);
         assert_eq_m256d(r, _mm256_setr_pd(4., 3., 2., 1.));
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_set_ps() {
+    const fn test_mm256_set_ps() {
         let r = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         assert_eq_m256(r, _mm256_setr_ps(8., 7., 6., 5., 4., 3., 2., 1.));
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_set_epi8() {
+    const fn test_mm256_set_epi8() {
         #[rustfmt::skip]
         let r = _mm256_set_epi8(
             1, 2, 3, 4, 5, 6, 7, 8,
@@ -4628,7 +4833,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_set_epi16() {
+    const fn test_mm256_set_epi16() {
         #[rustfmt::skip]
         let r = _mm256_set_epi16(
             1, 2, 3, 4, 5, 6, 7, 8,
@@ -4643,31 +4848,31 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_set_epi32() {
+    const fn test_mm256_set_epi32() {
         let r = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
         assert_eq_m256i(r, _mm256_setr_epi32(8, 7, 6, 5, 4, 3, 2, 1));
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_set_epi64x() {
+    const fn test_mm256_set_epi64x() {
         let r = _mm256_set_epi64x(1, 2, 3, 4);
         assert_eq_m256i(r, _mm256_setr_epi64x(4, 3, 2, 1));
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_setr_pd() {
+    const fn test_mm256_setr_pd() {
         let r = _mm256_setr_pd(1., 2., 3., 4.);
         assert_eq_m256d(r, _mm256_setr_pd(1., 2., 3., 4.));
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_setr_ps() {
+    const fn test_mm256_setr_ps() {
         let r = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         assert_eq_m256(r, _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.));
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_setr_epi8() {
+    const fn test_mm256_setr_epi8() {
         #[rustfmt::skip]
         let r = _mm256_setr_epi8(
             1, 2, 3, 4, 5, 6, 7, 8,
@@ -4687,7 +4892,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_setr_epi16() {
+    const fn test_mm256_setr_epi16() {
         #[rustfmt::skip]
         let r = _mm256_setr_epi16(
             1, 2, 3, 4, 5, 6, 7, 8,
@@ -4702,55 +4907,55 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_setr_epi32() {
+    const fn test_mm256_setr_epi32() {
         let r = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
         assert_eq_m256i(r, _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8));
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_setr_epi64x() {
+    const fn test_mm256_setr_epi64x() {
         let r = _mm256_setr_epi64x(1, 2, 3, 4);
         assert_eq_m256i(r, _mm256_setr_epi64x(1, 2, 3, 4));
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_set1_pd() {
+    const fn test_mm256_set1_pd() {
         let r = _mm256_set1_pd(1.);
         assert_eq_m256d(r, _mm256_set1_pd(1.));
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_set1_ps() {
+    const fn test_mm256_set1_ps() {
         let r = _mm256_set1_ps(1.);
         assert_eq_m256(r, _mm256_set1_ps(1.));
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_set1_epi8() {
+    const fn test_mm256_set1_epi8() {
         let r = _mm256_set1_epi8(1);
         assert_eq_m256i(r, _mm256_set1_epi8(1));
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_set1_epi16() {
+    const fn test_mm256_set1_epi16() {
         let r = _mm256_set1_epi16(1);
         assert_eq_m256i(r, _mm256_set1_epi16(1));
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_set1_epi32() {
+    const fn test_mm256_set1_epi32() {
         let r = _mm256_set1_epi32(1);
         assert_eq_m256i(r, _mm256_set1_epi32(1));
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_set1_epi64x() {
+    const fn test_mm256_set1_epi64x() {
         let r = _mm256_set1_epi64x(1);
         assert_eq_m256i(r, _mm256_set1_epi64x(1));
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_castpd_ps() {
+    const fn test_mm256_castpd_ps() {
         let a = _mm256_setr_pd(1., 2., 3., 4.);
         let r = _mm256_castpd_ps(a);
         let e = _mm256_setr_ps(0., 1.875, 0., 2., 0., 2.125, 0., 2.25);
@@ -4758,7 +4963,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_castps_pd() {
+    const fn test_mm256_castps_pd() {
         let a = _mm256_setr_ps(0., 1.875, 0., 2., 0., 2.125, 0., 2.25);
         let r = _mm256_castps_pd(a);
         let e = _mm256_setr_pd(1., 2., 3., 4.);
@@ -4766,7 +4971,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_castps_si256() {
+    const fn test_mm256_castps_si256() {
         let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm256_castps_si256(a);
         #[rustfmt::skip]
@@ -4780,7 +4985,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_castsi256_ps() {
+    const fn test_mm256_castsi256_ps() {
         #[rustfmt::skip]
         let a = _mm256_setr_epi8(
             0, 0, -128, 63, 0, 0, 0, 64,
@@ -4794,63 +4999,63 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_castpd_si256() {
+    const fn test_mm256_castpd_si256() {
         let a = _mm256_setr_pd(1., 2., 3., 4.);
         let r = _mm256_castpd_si256(a);
-        assert_eq_m256d(transmute(r), a);
+        assert_eq_m256d(unsafe { transmute(r) }, a);
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_castsi256_pd() {
+    const fn test_mm256_castsi256_pd() {
         let a = _mm256_setr_epi64x(1, 2, 3, 4);
         let r = _mm256_castsi256_pd(a);
-        assert_eq_m256d(r, transmute(a));
+        assert_eq_m256d(r, unsafe { transmute(a) });
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_castps256_ps128() {
+    const fn test_mm256_castps256_ps128() {
         let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm256_castps256_ps128(a);
         assert_eq_m128(r, _mm_setr_ps(1., 2., 3., 4.));
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_castpd256_pd128() {
+    const fn test_mm256_castpd256_pd128() {
         let a = _mm256_setr_pd(1., 2., 3., 4.);
         let r = _mm256_castpd256_pd128(a);
         assert_eq_m128d(r, _mm_setr_pd(1., 2.));
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_castsi256_si128() {
+    const fn test_mm256_castsi256_si128() {
         let a = _mm256_setr_epi64x(1, 2, 3, 4);
         let r = _mm256_castsi256_si128(a);
         assert_eq_m128i(r, _mm_setr_epi64x(1, 2));
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_castps128_ps256() {
+    const fn test_mm256_castps128_ps256() {
         let a = _mm_setr_ps(1., 2., 3., 4.);
         let r = _mm256_castps128_ps256(a);
         assert_eq_m128(_mm256_castps256_ps128(r), a);
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_castpd128_pd256() {
+    const fn test_mm256_castpd128_pd256() {
         let a = _mm_setr_pd(1., 2.);
         let r = _mm256_castpd128_pd256(a);
         assert_eq_m128d(_mm256_castpd256_pd128(r), a);
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_castsi128_si256() {
+    const fn test_mm256_castsi128_si256() {
         let a = _mm_setr_epi32(1, 2, 3, 4);
         let r = _mm256_castsi128_si256(a);
         assert_eq_m128i(_mm256_castsi256_si128(r), a);
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_zextps128_ps256() {
+    const fn test_mm256_zextps128_ps256() {
         let a = _mm_setr_ps(1., 2., 3., 4.);
         let r = _mm256_zextps128_ps256(a);
         let e = _mm256_setr_ps(1., 2., 3., 4., 0., 0., 0., 0.);
@@ -4858,7 +5063,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_zextsi128_si256() {
+    const fn test_mm256_zextsi128_si256() {
         let a = _mm_setr_epi64x(1, 2);
         let r = _mm256_zextsi128_si256(a);
         let e = _mm256_setr_epi64x(1, 2, 0, 0);
@@ -4866,7 +5071,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_zextpd128_pd256() {
+    const fn test_mm256_zextpd128_pd256() {
         let a = _mm_setr_pd(1., 2.);
         let r = _mm256_zextpd128_pd256(a);
         let e = _mm256_setr_pd(1., 2., 0., 0.);
@@ -4874,7 +5079,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_set_m128() {
+    const fn test_mm256_set_m128() {
         let hi = _mm_setr_ps(5., 6., 7., 8.);
         let lo = _mm_setr_ps(1., 2., 3., 4.);
         let r = _mm256_set_m128(hi, lo);
@@ -4883,7 +5088,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_set_m128d() {
+    const fn test_mm256_set_m128d() {
         let hi = _mm_setr_pd(3., 4.);
         let lo = _mm_setr_pd(1., 2.);
         let r = _mm256_set_m128d(hi, lo);
@@ -4892,7 +5097,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_set_m128i() {
+    const fn test_mm256_set_m128i() {
         #[rustfmt::skip]
         let hi = _mm_setr_epi8(
             17, 18, 19, 20,
@@ -4919,7 +5124,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_setr_m128() {
+    const fn test_mm256_setr_m128() {
         let lo = _mm_setr_ps(1., 2., 3., 4.);
         let hi = _mm_setr_ps(5., 6., 7., 8.);
         let r = _mm256_setr_m128(lo, hi);
@@ -4928,7 +5133,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_setr_m128d() {
+    const fn test_mm256_setr_m128d() {
         let lo = _mm_setr_pd(1., 2.);
         let hi = _mm_setr_pd(3., 4.);
         let r = _mm256_setr_m128d(lo, hi);
@@ -4937,7 +5142,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_setr_m128i() {
+    const fn test_mm256_setr_m128i() {
         #[rustfmt::skip]
         let lo = _mm_setr_epi8(
             1, 2, 3, 4,
@@ -4962,29 +5167,29 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_loadu2_m128() {
+    const fn test_mm256_loadu2_m128() {
         let hi = &[5., 6., 7., 8.];
         let hiaddr = hi.as_ptr();
         let lo = &[1., 2., 3., 4.];
         let loaddr = lo.as_ptr();
-        let r = _mm256_loadu2_m128(hiaddr, loaddr);
+        let r = unsafe { _mm256_loadu2_m128(hiaddr, loaddr) };
         let e = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         assert_eq_m256(r, e);
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_loadu2_m128d() {
+    const fn test_mm256_loadu2_m128d() {
         let hi = &[3., 4.];
         let hiaddr = hi.as_ptr();
         let lo = &[1., 2.];
         let loaddr = lo.as_ptr();
-        let r = _mm256_loadu2_m128d(hiaddr, loaddr);
+        let r = unsafe { _mm256_loadu2_m128d(hiaddr, loaddr) };
         let e = _mm256_setr_pd(1., 2., 3., 4.);
         assert_eq_m256d(r, e);
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_loadu2_m128i() {
+    const fn test_mm256_loadu2_m128i() {
         #[rustfmt::skip]
         let hi = _mm_setr_epi8(
             17, 18, 19, 20, 21, 22, 23, 24,
@@ -4995,7 +5200,9 @@ mod tests {
             1, 2, 3, 4, 5, 6, 7, 8,
             9, 10, 11, 12, 13, 14, 15, 16,
         );
-        let r = _mm256_loadu2_m128i(ptr::addr_of!(hi) as *const _, ptr::addr_of!(lo) as *const _);
+        let r = unsafe {
+            _mm256_loadu2_m128i(ptr::addr_of!(hi) as *const _, ptr::addr_of!(lo) as *const _)
+        };
         #[rustfmt::skip]
         let e = _mm256_setr_epi8(
             1, 2, 3, 4, 5, 6, 7, 8,
@@ -5007,35 +5214,39 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_storeu2_m128() {
+    const fn test_mm256_storeu2_m128() {
         let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         let mut hi = _mm_undefined_ps();
         let mut lo = _mm_undefined_ps();
-        _mm256_storeu2_m128(
-            ptr::addr_of_mut!(hi) as *mut f32,
-            ptr::addr_of_mut!(lo) as *mut f32,
-            a,
-        );
+        unsafe {
+            _mm256_storeu2_m128(
+                ptr::addr_of_mut!(hi) as *mut f32,
+                ptr::addr_of_mut!(lo) as *mut f32,
+                a,
+            );
+        }
         assert_eq_m128(hi, _mm_setr_ps(5., 6., 7., 8.));
         assert_eq_m128(lo, _mm_setr_ps(1., 2., 3., 4.));
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_storeu2_m128d() {
+    const fn test_mm256_storeu2_m128d() {
         let a = _mm256_setr_pd(1., 2., 3., 4.);
         let mut hi = _mm_undefined_pd();
         let mut lo = _mm_undefined_pd();
-        _mm256_storeu2_m128d(
-            ptr::addr_of_mut!(hi) as *mut f64,
-            ptr::addr_of_mut!(lo) as *mut f64,
-            a,
-        );
+        unsafe {
+            _mm256_storeu2_m128d(
+                ptr::addr_of_mut!(hi) as *mut f64,
+                ptr::addr_of_mut!(lo) as *mut f64,
+                a,
+            );
+        }
         assert_eq_m128d(hi, _mm_setr_pd(3., 4.));
         assert_eq_m128d(lo, _mm_setr_pd(1., 2.));
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_storeu2_m128i() {
+    const fn test_mm256_storeu2_m128i() {
         #[rustfmt::skip]
         let a = _mm256_setr_epi8(
             1, 2, 3, 4, 5, 6, 7, 8,
@@ -5045,7 +5256,9 @@ mod tests {
         );
         let mut hi = _mm_undefined_si128();
         let mut lo = _mm_undefined_si128();
-        _mm256_storeu2_m128i(ptr::addr_of_mut!(hi), ptr::addr_of_mut!(lo), a);
+        unsafe {
+            _mm256_storeu2_m128i(ptr::addr_of_mut!(hi), ptr::addr_of_mut!(lo), a);
+        }
         #[rustfmt::skip]
         let e_hi = _mm_setr_epi8(
             17, 18, 19, 20, 21, 22, 23, 24,
@@ -5062,7 +5275,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_cvtss_f32() {
+    const fn test_mm256_cvtss_f32() {
         let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm256_cvtss_f32(a);
         assert_eq!(r, 1.);
diff --git a/crates/core_arch/src/x86/avx2.rs b/crates/core_arch/src/x86/avx2.rs
index 91c10638e0..b49ad9522a 100644
--- a/crates/core_arch/src/x86/avx2.rs
+++ b/crates/core_arch/src/x86/avx2.rs
@@ -13,8 +13,8 @@
 //! Wikipedia's [AVX][wiki_avx] and [FMA][wiki_fma] pages provide a quick
 //! overview of the instructions available.
 //!
-//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
-//! [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf
+//! [intel64_ref]: https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
+//! [amd64_ref]: https://docs.amd.com/v/u/en-US/24594_3.37
 //! [wiki_avx]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions
 //! [wiki_fma]: https://en.wikipedia.org/wiki/Fused_multiply-accumulate
 
@@ -31,7 +31,8 @@ use stdarch_test::assert_instr;
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpabsd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_abs_epi32(a: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_abs_epi32(a: __m256i) -> __m256i {
     unsafe {
         let a = a.as_i32x8();
         let r = simd_select::<m32x8, _>(simd_lt(a, i32x8::ZERO), simd_neg(a), a);
@@ -46,7 +47,8 @@ pub fn _mm256_abs_epi32(a: __m256i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpabsw))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_abs_epi16(a: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_abs_epi16(a: __m256i) -> __m256i {
     unsafe {
         let a = a.as_i16x16();
         let r = simd_select::<m16x16, _>(simd_lt(a, i16x16::ZERO), simd_neg(a), a);
@@ -61,7 +63,8 @@ pub fn _mm256_abs_epi16(a: __m256i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpabsb))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_abs_epi8(a: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_abs_epi8(a: __m256i) -> __m256i {
     unsafe {
         let a = a.as_i8x32();
         let r = simd_select::<m8x32, _>(simd_lt(a, i8x32::ZERO), simd_neg(a), a);
@@ -76,7 +79,8 @@ pub fn _mm256_abs_epi8(a: __m256i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpaddq))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_add_epi64(a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_add_epi64(a: __m256i, b: __m256i) -> __m256i {
     unsafe { transmute(simd_add(a.as_i64x4(), b.as_i64x4())) }
 }
 
@@ -87,7 +91,8 @@ pub fn _mm256_add_epi64(a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpaddd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_add_epi32(a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_add_epi32(a: __m256i, b: __m256i) -> __m256i {
     unsafe { transmute(simd_add(a.as_i32x8(), b.as_i32x8())) }
 }
 
@@ -98,7 +103,8 @@ pub fn _mm256_add_epi32(a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpaddw))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_add_epi16(a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_add_epi16(a: __m256i, b: __m256i) -> __m256i {
     unsafe { transmute(simd_add(a.as_i16x16(), b.as_i16x16())) }
 }
 
@@ -109,7 +115,8 @@ pub fn _mm256_add_epi16(a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpaddb))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_add_epi8(a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_add_epi8(a: __m256i, b: __m256i) -> __m256i {
     unsafe { transmute(simd_add(a.as_i8x32(), b.as_i8x32())) }
 }
 
@@ -120,7 +127,8 @@ pub fn _mm256_add_epi8(a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpaddsb))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_adds_epi8(a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_adds_epi8(a: __m256i, b: __m256i) -> __m256i {
     unsafe { transmute(simd_saturating_add(a.as_i8x32(), b.as_i8x32())) }
 }
 
@@ -131,7 +139,8 @@ pub fn _mm256_adds_epi8(a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpaddsw))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_adds_epi16(a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_adds_epi16(a: __m256i, b: __m256i) -> __m256i {
     unsafe { transmute(simd_saturating_add(a.as_i16x16(), b.as_i16x16())) }
 }
 
@@ -142,7 +151,8 @@ pub fn _mm256_adds_epi16(a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpaddusb))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_adds_epu8(a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_adds_epu8(a: __m256i, b: __m256i) -> __m256i {
     unsafe { transmute(simd_saturating_add(a.as_u8x32(), b.as_u8x32())) }
 }
 
@@ -153,7 +163,8 @@ pub fn _mm256_adds_epu8(a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpaddusw))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_adds_epu16(a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_adds_epu16(a: __m256i, b: __m256i) -> __m256i {
     unsafe { transmute(simd_saturating_add(a.as_u16x16(), b.as_u16x16())) }
 }
 
@@ -166,7 +177,8 @@ pub fn _mm256_adds_epu16(a: __m256i, b: __m256i) -> __m256i {
 #[cfg_attr(test, assert_instr(vpalignr, IMM8 = 7))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_alignr_epi8<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_alignr_epi8<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
     static_assert_uimm_bits!(IMM8, 8);
 
     // If palignr is shifting the pair of vectors more than the size of two
@@ -247,7 +259,8 @@ pub fn _mm256_alignr_epi8<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vandps))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_and_si256(a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_and_si256(a: __m256i, b: __m256i) -> __m256i {
     unsafe { transmute(simd_and(a.as_i64x4(), b.as_i64x4())) }
 }
 
@@ -259,7 +272,8 @@ pub fn _mm256_and_si256(a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vandnps))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_andnot_si256(a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_andnot_si256(a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let all_ones = _mm256_set1_epi8(-1);
         transmute(simd_and(
@@ -276,7 +290,8 @@ pub fn _mm256_andnot_si256(a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpavgw))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_avg_epu16(a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_avg_epu16(a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let a = simd_cast::<_, u32x16>(a.as_u16x16());
         let b = simd_cast::<_, u32x16>(b.as_u16x16());
@@ -292,7 +307,8 @@ pub fn _mm256_avg_epu16(a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpavgb))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_avg_epu8(a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_avg_epu8(a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let a = simd_cast::<_, u16x32>(a.as_u8x32());
         let b = simd_cast::<_, u16x32>(b.as_u8x32());
@@ -309,7 +325,8 @@ pub fn _mm256_avg_epu8(a: __m256i, b: __m256i) -> __m256i {
 #[cfg_attr(test, assert_instr(vblendps, IMM4 = 9))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_blend_epi32<const IMM4: i32>(a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_blend_epi32<const IMM4: i32>(a: __m128i, b: __m128i) -> __m128i {
     static_assert_uimm_bits!(IMM4, 4);
     unsafe {
         let a = a.as_i32x4();
@@ -336,7 +353,8 @@ pub fn _mm_blend_epi32<const IMM4: i32>(a: __m128i, b: __m128i) -> __m128i {
 #[cfg_attr(test, assert_instr(vblendps, IMM8 = 9))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_blend_epi32<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_blend_epi32<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
     static_assert_uimm_bits!(IMM8, 8);
     unsafe {
         let a = a.as_i32x8();
@@ -367,7 +385,8 @@ pub fn _mm256_blend_epi32<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
 #[cfg_attr(test, assert_instr(vpblendw, IMM8 = 9))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_blend_epi16<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_blend_epi16<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
     static_assert_uimm_bits!(IMM8, 8);
     unsafe {
         let a = a.as_i16x16();
@@ -406,7 +425,8 @@ pub fn _mm256_blend_epi16<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpblendvb))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_blendv_epi8(a: __m256i, b: __m256i, mask: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_blendv_epi8(a: __m256i, b: __m256i, mask: __m256i) -> __m256i {
     unsafe {
         let mask: i8x32 = simd_lt(mask.as_i8x32(), i8x32::ZERO);
         transmute(simd_select(mask, b.as_i8x32(), a.as_i8x32()))
@@ -421,7 +441,8 @@ pub fn _mm256_blendv_epi8(a: __m256i, b: __m256i, mask: __m256i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpbroadcastb))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_broadcastb_epi8(a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_broadcastb_epi8(a: __m128i) -> __m128i {
     unsafe {
         let ret = simd_shuffle!(a.as_i8x16(), i8x16::ZERO, [0_u32; 16]);
         transmute::<i8x16, _>(ret)
@@ -436,7 +457,8 @@ pub fn _mm_broadcastb_epi8(a: __m128i) -> __m128i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpbroadcastb))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_broadcastb_epi8(a: __m128i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_broadcastb_epi8(a: __m128i) -> __m256i {
     unsafe {
         let ret = simd_shuffle!(a.as_i8x16(), i8x16::ZERO, [0_u32; 32]);
         transmute::<i8x32, _>(ret)
@@ -453,7 +475,8 @@ pub fn _mm256_broadcastb_epi8(a: __m128i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vbroadcastss))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_broadcastd_epi32(a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_broadcastd_epi32(a: __m128i) -> __m128i {
     unsafe {
         let ret = simd_shuffle!(a.as_i32x4(), i32x4::ZERO, [0_u32; 4]);
         transmute::<i32x4, _>(ret)
@@ -470,7 +493,8 @@ pub fn _mm_broadcastd_epi32(a: __m128i) -> __m128i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vbroadcastss))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_broadcastd_epi32(a: __m128i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_broadcastd_epi32(a: __m128i) -> __m256i {
     unsafe {
         let ret = simd_shuffle!(a.as_i32x4(), i32x4::ZERO, [0_u32; 8]);
         transmute::<i32x8, _>(ret)
@@ -487,7 +511,8 @@ pub fn _mm256_broadcastd_epi32(a: __m128i) -> __m256i {
 // See https://github.com/rust-lang/stdarch/issues/791
 #[cfg_attr(test, assert_instr(vmovddup))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_broadcastq_epi64(a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_broadcastq_epi64(a: __m128i) -> __m128i {
     unsafe {
         let ret = simd_shuffle!(a.as_i64x2(), a.as_i64x2(), [0_u32; 2]);
         transmute::<i64x2, _>(ret)
@@ -502,7 +527,8 @@ pub fn _mm_broadcastq_epi64(a: __m128i) -> __m128i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vbroadcastsd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_broadcastq_epi64(a: __m128i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_broadcastq_epi64(a: __m128i) -> __m256i {
     unsafe {
         let ret = simd_shuffle!(a.as_i64x2(), a.as_i64x2(), [0_u32; 4]);
         transmute::<i64x4, _>(ret)
@@ -517,7 +543,8 @@ pub fn _mm256_broadcastq_epi64(a: __m128i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vmovddup))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_broadcastsd_pd(a: __m128d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_broadcastsd_pd(a: __m128d) -> __m128d {
     unsafe { simd_shuffle!(a, _mm_setzero_pd(), [0_u32; 2]) }
 }
 
@@ -529,7 +556,8 @@ pub fn _mm_broadcastsd_pd(a: __m128d) -> __m128d {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vbroadcastsd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_broadcastsd_pd(a: __m128d) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_broadcastsd_pd(a: __m128d) -> __m256d {
     unsafe { simd_shuffle!(a, _mm_setzero_pd(), [0_u32; 4]) }
 }
 
@@ -540,7 +568,8 @@ pub fn _mm256_broadcastsd_pd(a: __m128d) -> __m256d {
 #[inline]
 #[target_feature(enable = "avx2")]
 #[stable(feature = "simd_x86_updates", since = "1.82.0")]
-pub fn _mm_broadcastsi128_si256(a: __m128i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_broadcastsi128_si256(a: __m128i) -> __m256i {
     unsafe {
         let ret = simd_shuffle!(a.as_i64x2(), i64x2::ZERO, [0, 1, 0, 1]);
         transmute::<i64x4, _>(ret)
@@ -556,7 +585,8 @@ pub fn _mm_broadcastsi128_si256(a: __m128i) -> __m256i {
 #[inline]
 #[target_feature(enable = "avx2")]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_broadcastsi128_si256(a: __m128i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_broadcastsi128_si256(a: __m128i) -> __m256i {
     unsafe {
         let ret = simd_shuffle!(a.as_i64x2(), i64x2::ZERO, [0, 1, 0, 1]);
         transmute::<i64x4, _>(ret)
@@ -571,7 +601,8 @@ pub fn _mm256_broadcastsi128_si256(a: __m128i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vbroadcastss))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_broadcastss_ps(a: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_broadcastss_ps(a: __m128) -> __m128 {
     unsafe { simd_shuffle!(a, _mm_setzero_ps(), [0_u32; 4]) }
 }
 
@@ -583,7 +614,8 @@ pub fn _mm_broadcastss_ps(a: __m128) -> __m128 {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vbroadcastss))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_broadcastss_ps(a: __m128) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_broadcastss_ps(a: __m128) -> __m256 {
     unsafe { simd_shuffle!(a, _mm_setzero_ps(), [0_u32; 8]) }
 }
 
@@ -595,7 +627,8 @@ pub fn _mm256_broadcastss_ps(a: __m128) -> __m256 {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpbroadcastw))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_broadcastw_epi16(a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_broadcastw_epi16(a: __m128i) -> __m128i {
     unsafe {
         let ret = simd_shuffle!(a.as_i16x8(), i16x8::ZERO, [0_u32; 8]);
         transmute::<i16x8, _>(ret)
@@ -610,7 +643,8 @@ pub fn _mm_broadcastw_epi16(a: __m128i) -> __m128i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpbroadcastw))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_broadcastw_epi16(a: __m128i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_broadcastw_epi16(a: __m128i) -> __m256i {
     unsafe {
         let ret = simd_shuffle!(a.as_i16x8(), i16x8::ZERO, [0_u32; 16]);
         transmute::<i16x16, _>(ret)
@@ -624,7 +658,8 @@ pub fn _mm256_broadcastw_epi16(a: __m128i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpcmpeqq))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_cmpeq_epi64(a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_cmpeq_epi64(a: __m256i, b: __m256i) -> __m256i {
     unsafe { transmute::<i64x4, _>(simd_eq(a.as_i64x4(), b.as_i64x4())) }
 }
 
@@ -635,7 +670,8 @@ pub fn _mm256_cmpeq_epi64(a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpcmpeqd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_cmpeq_epi32(a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_cmpeq_epi32(a: __m256i, b: __m256i) -> __m256i {
     unsafe { transmute::<i32x8, _>(simd_eq(a.as_i32x8(), b.as_i32x8())) }
 }
 
@@ -646,7 +682,8 @@ pub fn _mm256_cmpeq_epi32(a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpcmpeqw))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_cmpeq_epi16(a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_cmpeq_epi16(a: __m256i, b: __m256i) -> __m256i {
     unsafe { transmute::<i16x16, _>(simd_eq(a.as_i16x16(), b.as_i16x16())) }
 }
 
@@ -657,7 +694,8 @@ pub fn _mm256_cmpeq_epi16(a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpcmpeqb))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_cmpeq_epi8(a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_cmpeq_epi8(a: __m256i, b: __m256i) -> __m256i {
     unsafe { transmute::<i8x32, _>(simd_eq(a.as_i8x32(), b.as_i8x32())) }
 }
 
@@ -668,7 +706,8 @@ pub fn _mm256_cmpeq_epi8(a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpcmpgtq))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_cmpgt_epi64(a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_cmpgt_epi64(a: __m256i, b: __m256i) -> __m256i {
     unsafe { transmute::<i64x4, _>(simd_gt(a.as_i64x4(), b.as_i64x4())) }
 }
 
@@ -679,7 +718,8 @@ pub fn _mm256_cmpgt_epi64(a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpcmpgtd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_cmpgt_epi32(a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_cmpgt_epi32(a: __m256i, b: __m256i) -> __m256i {
     unsafe { transmute::<i32x8, _>(simd_gt(a.as_i32x8(), b.as_i32x8())) }
 }
 
@@ -690,7 +730,8 @@ pub fn _mm256_cmpgt_epi32(a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpcmpgtw))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_cmpgt_epi16(a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_cmpgt_epi16(a: __m256i, b: __m256i) -> __m256i {
     unsafe { transmute::<i16x16, _>(simd_gt(a.as_i16x16(), b.as_i16x16())) }
 }
 
@@ -701,7 +742,8 @@ pub fn _mm256_cmpgt_epi16(a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpcmpgtb))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_cmpgt_epi8(a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_cmpgt_epi8(a: __m256i, b: __m256i) -> __m256i {
     unsafe { transmute::<i8x32, _>(simd_gt(a.as_i8x32(), b.as_i8x32())) }
 }
 
@@ -712,7 +754,8 @@ pub fn _mm256_cmpgt_epi8(a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpmovsxwd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_cvtepi16_epi32(a: __m128i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_cvtepi16_epi32(a: __m128i) -> __m256i {
     unsafe { transmute::<i32x8, _>(simd_cast(a.as_i16x8())) }
 }
 
@@ -723,7 +766,8 @@ pub fn _mm256_cvtepi16_epi32(a: __m128i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpmovsxwq))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_cvtepi16_epi64(a: __m128i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_cvtepi16_epi64(a: __m128i) -> __m256i {
     unsafe {
         let a = a.as_i16x8();
         let v64: i16x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
@@ -738,7 +782,8 @@ pub fn _mm256_cvtepi16_epi64(a: __m128i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpmovsxdq))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_cvtepi32_epi64(a: __m128i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_cvtepi32_epi64(a: __m128i) -> __m256i {
     unsafe { transmute::<i64x4, _>(simd_cast(a.as_i32x4())) }
 }
 
@@ -749,7 +794,8 @@ pub fn _mm256_cvtepi32_epi64(a: __m128i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpmovsxbw))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_cvtepi8_epi16(a: __m128i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_cvtepi8_epi16(a: __m128i) -> __m256i {
     unsafe { transmute::<i16x16, _>(simd_cast(a.as_i8x16())) }
 }
 
@@ -760,7 +806,8 @@ pub fn _mm256_cvtepi8_epi16(a: __m128i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpmovsxbd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_cvtepi8_epi32(a: __m128i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_cvtepi8_epi32(a: __m128i) -> __m256i {
     unsafe {
         let a = a.as_i8x16();
         let v64: i8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
@@ -775,7 +822,8 @@ pub fn _mm256_cvtepi8_epi32(a: __m128i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpmovsxbq))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_cvtepi8_epi64(a: __m128i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_cvtepi8_epi64(a: __m128i) -> __m256i {
     unsafe {
         let a = a.as_i8x16();
         let v32: i8x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
@@ -791,7 +839,8 @@ pub fn _mm256_cvtepi8_epi64(a: __m128i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpmovzxwd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_cvtepu16_epi32(a: __m128i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_cvtepu16_epi32(a: __m128i) -> __m256i {
     unsafe { transmute::<i32x8, _>(simd_cast(a.as_u16x8())) }
 }
 
@@ -803,7 +852,8 @@ pub fn _mm256_cvtepu16_epi32(a: __m128i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpmovzxwq))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_cvtepu16_epi64(a: __m128i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_cvtepu16_epi64(a: __m128i) -> __m256i {
     unsafe {
         let a = a.as_u16x8();
         let v64: u16x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
@@ -818,7 +868,8 @@ pub fn _mm256_cvtepu16_epi64(a: __m128i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpmovzxdq))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_cvtepu32_epi64(a: __m128i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_cvtepu32_epi64(a: __m128i) -> __m256i {
     unsafe { transmute::<i64x4, _>(simd_cast(a.as_u32x4())) }
 }
 
@@ -829,7 +880,8 @@ pub fn _mm256_cvtepu32_epi64(a: __m128i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpmovzxbw))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_cvtepu8_epi16(a: __m128i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_cvtepu8_epi16(a: __m128i) -> __m256i {
     unsafe { transmute::<i16x16, _>(simd_cast(a.as_u8x16())) }
 }
 
@@ -841,7 +893,8 @@ pub fn _mm256_cvtepu8_epi16(a: __m128i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpmovzxbd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_cvtepu8_epi32(a: __m128i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_cvtepu8_epi32(a: __m128i) -> __m256i {
     unsafe {
         let a = a.as_u8x16();
         let v64: u8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
@@ -857,7 +910,8 @@ pub fn _mm256_cvtepu8_epi32(a: __m128i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpmovzxbq))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_cvtepu8_epi64(a: __m128i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_cvtepu8_epi64(a: __m128i) -> __m256i {
     unsafe {
         let a = a.as_u8x16();
         let v32: u8x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
@@ -873,7 +927,8 @@ pub fn _mm256_cvtepu8_epi64(a: __m128i) -> __m256i {
 #[cfg_attr(test, assert_instr(vextractf128, IMM1 = 1))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_extracti128_si256<const IMM1: i32>(a: __m256i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_extracti128_si256<const IMM1: i32>(a: __m256i) -> __m128i {
     static_assert_uimm_bits!(IMM1, 1);
     unsafe {
         let a = a.as_i64x4();
@@ -890,7 +945,8 @@ pub fn _mm256_extracti128_si256<const IMM1: i32>(a: __m256i) -> __m128i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vphaddw))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_hadd_epi16(a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_hadd_epi16(a: __m256i, b: __m256i) -> __m256i {
     let a = a.as_i16x16();
     let b = b.as_i16x16();
     unsafe {
@@ -915,7 +971,8 @@ pub fn _mm256_hadd_epi16(a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vphaddd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_hadd_epi32(a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_hadd_epi32(a: __m256i, b: __m256i) -> __m256i {
     let a = a.as_i32x8();
     let b = b.as_i32x8();
     unsafe {
@@ -934,7 +991,21 @@ pub fn _mm256_hadd_epi32(a: __m256i, b: __m256i) -> __m256i {
 #[cfg_attr(test, assert_instr(vphaddsw))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
 pub fn _mm256_hadds_epi16(a: __m256i, b: __m256i) -> __m256i {
-    unsafe { transmute(phaddsw(a.as_i16x16(), b.as_i16x16())) }
+    let a = a.as_i16x16();
+    let b = b.as_i16x16();
+    unsafe {
+        let even: i16x16 = simd_shuffle!(
+            a,
+            b,
+            [0, 2, 4, 6, 16, 18, 20, 22, 8, 10, 12, 14, 24, 26, 28, 30]
+        );
+        let odd: i16x16 = simd_shuffle!(
+            a,
+            b,
+            [1, 3, 5, 7, 17, 19, 21, 23, 9, 11, 13, 15, 25, 27, 29, 31]
+        );
+        simd_saturating_add(even, odd).as_m256i()
+    }
 }
 
 /// Horizontally subtract adjacent pairs of 16-bit integers in `a` and `b`.
@@ -944,7 +1015,8 @@ pub fn _mm256_hadds_epi16(a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vphsubw))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_hsub_epi16(a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_hsub_epi16(a: __m256i, b: __m256i) -> __m256i {
     let a = a.as_i16x16();
     let b = b.as_i16x16();
     unsafe {
@@ -969,7 +1041,8 @@ pub fn _mm256_hsub_epi16(a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vphsubd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_hsub_epi32(a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_hsub_epi32(a: __m256i, b: __m256i) -> __m256i {
     let a = a.as_i32x8();
     let b = b.as_i32x8();
     unsafe {
@@ -988,7 +1061,21 @@ pub fn _mm256_hsub_epi32(a: __m256i, b: __m256i) -> __m256i {
 #[cfg_attr(test, assert_instr(vphsubsw))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
 pub fn _mm256_hsubs_epi16(a: __m256i, b: __m256i) -> __m256i {
-    unsafe { transmute(phsubsw(a.as_i16x16(), b.as_i16x16())) }
+    let a = a.as_i16x16();
+    let b = b.as_i16x16();
+    unsafe {
+        let even: i16x16 = simd_shuffle!(
+            a,
+            b,
+            [0, 2, 4, 6, 16, 18, 20, 22, 8, 10, 12, 14, 24, 26, 28, 30]
+        );
+        let odd: i16x16 = simd_shuffle!(
+            a,
+            b,
+            [1, 3, 5, 7, 17, 19, 21, 23, 9, 11, 13, 15, 25, 27, 29, 31]
+        );
+        simd_saturating_sub(even, odd).as_m256i()
+    }
 }
 
 /// Returns values from `slice` at offsets determined by `offsets * scale`,
@@ -1734,7 +1821,8 @@ pub unsafe fn _mm256_mask_i64gather_pd<const SCALE: i32>(
 #[cfg_attr(test, assert_instr(vinsertf128, IMM1 = 1))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_inserti128_si256<const IMM1: i32>(a: __m256i, b: __m128i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_inserti128_si256<const IMM1: i32>(a: __m256i, b: __m128i) -> __m256i {
     static_assert_uimm_bits!(IMM1, 1);
     unsafe {
         let a = a.as_i64x4();
@@ -1754,12 +1842,19 @@ pub fn _mm256_inserti128_si256<const IMM1: i32>(a: __m256i, b: __m128i) -> __m25
 #[cfg_attr(test, assert_instr(vpmaddwd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
 pub fn _mm256_madd_epi16(a: __m256i, b: __m256i) -> __m256i {
-    unsafe {
-        let r: i32x16 = simd_mul(simd_cast(a.as_i16x16()), simd_cast(b.as_i16x16()));
-        let even: i32x8 = simd_shuffle!(r, r, [0, 2, 4, 6, 8, 10, 12, 14]);
-        let odd: i32x8 = simd_shuffle!(r, r, [1, 3, 5, 7, 9, 11, 13, 15]);
-        simd_add(even, odd).as_m256i()
-    }
+    // It's a trick used in the Adler-32 algorithm to perform a widening addition.
+    //
+    // ```rust
+    // #[target_feature(enable = "avx2")]
+    // unsafe fn widening_add(mad: __m256i) -> __m256i {
+    //     _mm256_madd_epi16(mad, _mm256_set1_epi16(1))
+    // }
+    // ```
+    //
+    // If we implement this using generic vector intrinsics, the optimizer
+    // will eliminate this pattern, and `vpmaddwd` will no longer be emitted.
+    // For this reason, we use x86 intrinsics.
+    unsafe { transmute(pmaddwd(a.as_i16x16(), b.as_i16x16())) }
 }
 
 /// Vertically multiplies each unsigned 8-bit integer from `a` with the
@@ -1773,7 +1868,7 @@ pub fn _mm256_madd_epi16(a: __m256i, b: __m256i) -> __m256i {
 #[cfg_attr(test, assert_instr(vpmaddubsw))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
 pub fn _mm256_maddubs_epi16(a: __m256i, b: __m256i) -> __m256i {
-    unsafe { transmute(pmaddubsw(a.as_u8x32(), b.as_u8x32())) }
+    unsafe { transmute(pmaddubsw(a.as_u8x32(), b.as_i8x32())) }
 }
 
 /// Loads packed 32-bit integers from memory pointed by `mem_addr` using `mask`
@@ -1785,8 +1880,10 @@ pub fn _mm256_maddubs_epi16(a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpmaskmovd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm_maskload_epi32(mem_addr: *const i32, mask: __m128i) -> __m128i {
-    transmute(maskloadd(mem_addr as *const i8, mask.as_i32x4()))
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_maskload_epi32(mem_addr: *const i32, mask: __m128i) -> __m128i {
+    let mask = simd_shr(mask.as_i32x4(), i32x4::splat(31));
+    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, i32x4::ZERO).as_m128i()
 }
 
 /// Loads packed 32-bit integers from memory pointed by `mem_addr` using `mask`
@@ -1798,8 +1895,10 @@ pub unsafe fn _mm_maskload_epi32(mem_addr: *const i32, mask: __m128i) -> __m128i
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpmaskmovd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm256_maskload_epi32(mem_addr: *const i32, mask: __m256i) -> __m256i {
-    transmute(maskloadd256(mem_addr as *const i8, mask.as_i32x8()))
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm256_maskload_epi32(mem_addr: *const i32, mask: __m256i) -> __m256i {
+    let mask = simd_shr(mask.as_i32x8(), i32x8::splat(31));
+    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, i32x8::ZERO).as_m256i()
 }
 
 /// Loads packed 64-bit integers from memory pointed by `mem_addr` using `mask`
@@ -1811,8 +1910,10 @@ pub unsafe fn _mm256_maskload_epi32(mem_addr: *const i32, mask: __m256i) -> __m2
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpmaskmovq))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm_maskload_epi64(mem_addr: *const i64, mask: __m128i) -> __m128i {
-    transmute(maskloadq(mem_addr as *const i8, mask.as_i64x2()))
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_maskload_epi64(mem_addr: *const i64, mask: __m128i) -> __m128i {
+    let mask = simd_shr(mask.as_i64x2(), i64x2::splat(63));
+    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, i64x2::ZERO).as_m128i()
 }
 
 /// Loads packed 64-bit integers from memory pointed by `mem_addr` using `mask`
@@ -1824,8 +1925,10 @@ pub unsafe fn _mm_maskload_epi64(mem_addr: *const i64, mask: __m128i) -> __m128i
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpmaskmovq))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm256_maskload_epi64(mem_addr: *const i64, mask: __m256i) -> __m256i {
-    transmute(maskloadq256(mem_addr as *const i8, mask.as_i64x4()))
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm256_maskload_epi64(mem_addr: *const i64, mask: __m256i) -> __m256i {
+    let mask = simd_shr(mask.as_i64x4(), i64x4::splat(63));
+    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, i64x4::ZERO).as_m256i()
 }
 
 /// Stores packed 32-bit integers from `a` into memory pointed by `mem_addr`
@@ -1837,8 +1940,10 @@ pub unsafe fn _mm256_maskload_epi64(mem_addr: *const i64, mask: __m256i) -> __m2
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpmaskmovd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm_maskstore_epi32(mem_addr: *mut i32, mask: __m128i, a: __m128i) {
-    maskstored(mem_addr as *mut i8, mask.as_i32x4(), a.as_i32x4())
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_maskstore_epi32(mem_addr: *mut i32, mask: __m128i, a: __m128i) {
+    let mask = simd_shr(mask.as_i32x4(), i32x4::splat(31));
+    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i32x4())
 }
 
 /// Stores packed 32-bit integers from `a` into memory pointed by `mem_addr`
@@ -1850,8 +1955,10 @@ pub unsafe fn _mm_maskstore_epi32(mem_addr: *mut i32, mask: __m128i, a: __m128i)
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpmaskmovd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm256_maskstore_epi32(mem_addr: *mut i32, mask: __m256i, a: __m256i) {
-    maskstored256(mem_addr as *mut i8, mask.as_i32x8(), a.as_i32x8())
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm256_maskstore_epi32(mem_addr: *mut i32, mask: __m256i, a: __m256i) {
+    let mask = simd_shr(mask.as_i32x8(), i32x8::splat(31));
+    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i32x8())
 }
 
 /// Stores packed 64-bit integers from `a` into memory pointed by `mem_addr`
@@ -1863,8 +1970,10 @@ pub unsafe fn _mm256_maskstore_epi32(mem_addr: *mut i32, mask: __m256i, a: __m25
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpmaskmovq))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm_maskstore_epi64(mem_addr: *mut i64, mask: __m128i, a: __m128i) {
-    maskstoreq(mem_addr as *mut i8, mask.as_i64x2(), a.as_i64x2())
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_maskstore_epi64(mem_addr: *mut i64, mask: __m128i, a: __m128i) {
+    let mask = simd_shr(mask.as_i64x2(), i64x2::splat(63));
+    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i64x2())
 }
 
 /// Stores packed 64-bit integers from `a` into memory pointed by `mem_addr`
@@ -1876,8 +1985,10 @@ pub unsafe fn _mm_maskstore_epi64(mem_addr: *mut i64, mask: __m128i, a: __m128i)
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpmaskmovq))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm256_maskstore_epi64(mem_addr: *mut i64, mask: __m256i, a: __m256i) {
-    maskstoreq256(mem_addr as *mut i8, mask.as_i64x4(), a.as_i64x4())
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm256_maskstore_epi64(mem_addr: *mut i64, mask: __m256i, a: __m256i) {
+    let mask = simd_shr(mask.as_i64x4(), i64x4::splat(63));
+    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i64x4())
 }
 
 /// Compares packed 16-bit integers in `a` and `b`, and returns the packed
@@ -1888,12 +1999,9 @@ pub unsafe fn _mm256_maskstore_epi64(mem_addr: *mut i64, mask: __m256i, a: __m25
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpmaxsw))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_max_epi16(a: __m256i, b: __m256i) -> __m256i {
-    unsafe {
-        let a = a.as_i16x16();
-        let b = b.as_i16x16();
-        transmute(simd_select::<i16x16, _>(simd_gt(a, b), a, b))
-    }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_max_epi16(a: __m256i, b: __m256i) -> __m256i {
+    unsafe { simd_imax(a.as_i16x16(), b.as_i16x16()).as_m256i() }
 }
 
 /// Compares packed 32-bit integers in `a` and `b`, and returns the packed
@@ -1904,12 +2012,9 @@ pub fn _mm256_max_epi16(a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpmaxsd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_max_epi32(a: __m256i, b: __m256i) -> __m256i {
-    unsafe {
-        let a = a.as_i32x8();
-        let b = b.as_i32x8();
-        transmute(simd_select::<i32x8, _>(simd_gt(a, b), a, b))
-    }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_max_epi32(a: __m256i, b: __m256i) -> __m256i {
+    unsafe { simd_imax(a.as_i32x8(), b.as_i32x8()).as_m256i() }
 }
 
 /// Compares packed 8-bit integers in `a` and `b`, and returns the packed
@@ -1920,12 +2025,9 @@ pub fn _mm256_max_epi32(a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpmaxsb))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_max_epi8(a: __m256i, b: __m256i) -> __m256i {
-    unsafe {
-        let a = a.as_i8x32();
-        let b = b.as_i8x32();
-        transmute(simd_select::<i8x32, _>(simd_gt(a, b), a, b))
-    }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_max_epi8(a: __m256i, b: __m256i) -> __m256i {
+    unsafe { simd_imax(a.as_i8x32(), b.as_i8x32()).as_m256i() }
 }
 
 /// Compares packed unsigned 16-bit integers in `a` and `b`, and returns
@@ -1936,12 +2038,9 @@ pub fn _mm256_max_epi8(a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpmaxuw))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_max_epu16(a: __m256i, b: __m256i) -> __m256i {
-    unsafe {
-        let a = a.as_u16x16();
-        let b = b.as_u16x16();
-        transmute(simd_select::<i16x16, _>(simd_gt(a, b), a, b))
-    }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_max_epu16(a: __m256i, b: __m256i) -> __m256i {
+    unsafe { simd_imax(a.as_u16x16(), b.as_u16x16()).as_m256i() }
 }
 
 /// Compares packed unsigned 32-bit integers in `a` and `b`, and returns
@@ -1952,12 +2051,9 @@ pub fn _mm256_max_epu16(a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpmaxud))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_max_epu32(a: __m256i, b: __m256i) -> __m256i {
-    unsafe {
-        let a = a.as_u32x8();
-        let b = b.as_u32x8();
-        transmute(simd_select::<i32x8, _>(simd_gt(a, b), a, b))
-    }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_max_epu32(a: __m256i, b: __m256i) -> __m256i {
+    unsafe { simd_imax(a.as_u32x8(), b.as_u32x8()).as_m256i() }
 }
 
 /// Compares packed unsigned 8-bit integers in `a` and `b`, and returns
@@ -1968,12 +2064,9 @@ pub fn _mm256_max_epu32(a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpmaxub))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_max_epu8(a: __m256i, b: __m256i) -> __m256i {
-    unsafe {
-        let a = a.as_u8x32();
-        let b = b.as_u8x32();
-        transmute(simd_select::<i8x32, _>(simd_gt(a, b), a, b))
-    }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_max_epu8(a: __m256i, b: __m256i) -> __m256i {
+    unsafe { simd_imax(a.as_u8x32(), b.as_u8x32()).as_m256i() }
 }
 
 /// Compares packed 16-bit integers in `a` and `b`, and returns the packed
@@ -1984,12 +2077,9 @@ pub fn _mm256_max_epu8(a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpminsw))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_min_epi16(a: __m256i, b: __m256i) -> __m256i {
-    unsafe {
-        let a = a.as_i16x16();
-        let b = b.as_i16x16();
-        transmute(simd_select::<i16x16, _>(simd_lt(a, b), a, b))
-    }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_min_epi16(a: __m256i, b: __m256i) -> __m256i {
+    unsafe { simd_imin(a.as_i16x16(), b.as_i16x16()).as_m256i() }
 }
 
 /// Compares packed 32-bit integers in `a` and `b`, and returns the packed
@@ -2000,12 +2090,9 @@ pub fn _mm256_min_epi16(a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpminsd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_min_epi32(a: __m256i, b: __m256i) -> __m256i {
-    unsafe {
-        let a = a.as_i32x8();
-        let b = b.as_i32x8();
-        transmute(simd_select::<i32x8, _>(simd_lt(a, b), a, b))
-    }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_min_epi32(a: __m256i, b: __m256i) -> __m256i {
+    unsafe { simd_imin(a.as_i32x8(), b.as_i32x8()).as_m256i() }
 }
 
 /// Compares packed 8-bit integers in `a` and `b`, and returns the packed
@@ -2016,12 +2103,9 @@ pub fn _mm256_min_epi32(a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpminsb))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_min_epi8(a: __m256i, b: __m256i) -> __m256i {
-    unsafe {
-        let a = a.as_i8x32();
-        let b = b.as_i8x32();
-        transmute(simd_select::<i8x32, _>(simd_lt(a, b), a, b))
-    }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_min_epi8(a: __m256i, b: __m256i) -> __m256i {
+    unsafe { simd_imin(a.as_i8x32(), b.as_i8x32()).as_m256i() }
 }
 
 /// Compares packed unsigned 16-bit integers in `a` and `b`, and returns
@@ -2032,12 +2116,9 @@ pub fn _mm256_min_epi8(a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpminuw))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_min_epu16(a: __m256i, b: __m256i) -> __m256i {
-    unsafe {
-        let a = a.as_u16x16();
-        let b = b.as_u16x16();
-        transmute(simd_select::<i16x16, _>(simd_lt(a, b), a, b))
-    }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_min_epu16(a: __m256i, b: __m256i) -> __m256i {
+    unsafe { simd_imin(a.as_u16x16(), b.as_u16x16()).as_m256i() }
 }
 
 /// Compares packed unsigned 32-bit integers in `a` and `b`, and returns
@@ -2048,12 +2129,9 @@ pub fn _mm256_min_epu16(a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpminud))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_min_epu32(a: __m256i, b: __m256i) -> __m256i {
-    unsafe {
-        let a = a.as_u32x8();
-        let b = b.as_u32x8();
-        transmute(simd_select::<i32x8, _>(simd_lt(a, b), a, b))
-    }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_min_epu32(a: __m256i, b: __m256i) -> __m256i {
+    unsafe { simd_imin(a.as_u32x8(), b.as_u32x8()).as_m256i() }
 }
 
 /// Compares packed unsigned 8-bit integers in `a` and `b`, and returns
@@ -2064,12 +2142,9 @@ pub fn _mm256_min_epu32(a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpminub))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_min_epu8(a: __m256i, b: __m256i) -> __m256i {
-    unsafe {
-        let a = a.as_u8x32();
-        let b = b.as_u8x32();
-        transmute(simd_select::<i8x32, _>(simd_lt(a, b), a, b))
-    }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_min_epu8(a: __m256i, b: __m256i) -> __m256i {
+    unsafe { simd_imin(a.as_u8x32(), b.as_u8x32()).as_m256i() }
 }
 
 /// Creates mask from the most significant bit of each 8-bit element in `a`,
@@ -2080,7 +2155,8 @@ pub fn _mm256_min_epu8(a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpmovmskb))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_movemask_epi8(a: __m256i) -> i32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_movemask_epi8(a: __m256i) -> i32 {
     unsafe {
         let z = i8x32::ZERO;
         let m: i8x32 = simd_lt(a.as_i8x32(), z);
@@ -2117,7 +2193,8 @@ pub fn _mm256_mpsadbw_epu8<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpmuldq))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_mul_epi32(a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mul_epi32(a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let a = simd_cast::<_, i64x4>(simd_cast::<_, i32x4>(a.as_i64x4()));
         let b = simd_cast::<_, i64x4>(simd_cast::<_, i32x4>(b.as_i64x4()));
@@ -2135,11 +2212,12 @@ pub fn _mm256_mul_epi32(a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpmuludq))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_mul_epu32(a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mul_epu32(a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let a = a.as_u64x4();
         let b = b.as_u64x4();
-        let mask = u64x4::splat(u32::MAX.into());
+        let mask = u64x4::splat(u32::MAX as u64);
         transmute(simd_mul(simd_and(a, mask), simd_and(b, mask)))
     }
 }
@@ -2153,7 +2231,8 @@ pub fn _mm256_mul_epu32(a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpmulhw))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_mulhi_epi16(a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mulhi_epi16(a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let a = simd_cast::<_, i32x16>(a.as_i16x16());
         let b = simd_cast::<_, i32x16>(b.as_i16x16());
@@ -2171,7 +2250,8 @@ pub fn _mm256_mulhi_epi16(a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpmulhuw))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_mulhi_epu16(a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mulhi_epu16(a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let a = simd_cast::<_, u32x16>(a.as_u16x16());
         let b = simd_cast::<_, u32x16>(b.as_u16x16());
@@ -2189,7 +2269,8 @@ pub fn _mm256_mulhi_epu16(a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpmullw))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_mullo_epi16(a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mullo_epi16(a: __m256i, b: __m256i) -> __m256i {
     unsafe { transmute(simd_mul(a.as_i16x16(), b.as_i16x16())) }
 }
 
@@ -2202,7 +2283,8 @@ pub fn _mm256_mullo_epi16(a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpmulld))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_mullo_epi32(a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mullo_epi32(a: __m256i, b: __m256i) -> __m256i {
     unsafe { transmute(simd_mul(a.as_i32x8(), b.as_i32x8())) }
 }
 
@@ -2228,11 +2310,12 @@ pub fn _mm256_mulhrs_epi16(a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vorps))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_or_si256(a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_or_si256(a: __m256i, b: __m256i) -> __m256i {
     unsafe { transmute(simd_or(a.as_i32x8(), b.as_i32x8())) }
 }
 
-/// Converts packed 16-bit integers from `a` and `b` to packed 8-bit integers
+/// Converts packed signed 16-bit integers from `a` and `b` to packed 8-bit integers
 /// using signed saturation
 ///
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_packs_epi16)
@@ -2240,11 +2323,33 @@ pub fn _mm256_or_si256(a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpacksswb))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_packs_epi16(a: __m256i, b: __m256i) -> __m256i {
-    unsafe { transmute(packsswb(a.as_i16x16(), b.as_i16x16())) }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_packs_epi16(a: __m256i, b: __m256i) -> __m256i {
+    unsafe {
+        let max = simd_splat(i8::MAX as i16);
+        let min = simd_splat(i8::MIN as i16);
+
+        let clamped_a = simd_imax(simd_imin(a.as_i16x16(), max), min)
+            .as_m256i()
+            .as_i8x32();
+        let clamped_b = simd_imax(simd_imin(b.as_i16x16(), max), min)
+            .as_m256i()
+            .as_i8x32();
+
+        #[rustfmt::skip]
+        const IDXS: [u32; 32] = [
+            00, 02, 04, 06, 08, 10, 12, 14, // a-lo i16 to i8 conversions
+            32, 34, 36, 38, 40, 42, 44, 46, // b-lo
+            16, 18, 20, 22, 24, 26, 28, 30, // a-hi
+            48, 50, 52, 54, 56, 58, 60, 62, // b-hi
+        ];
+        let result: i8x32 = simd_shuffle!(clamped_a, clamped_b, IDXS);
+
+        result.as_m256i()
+    }
 }
 
-/// Converts packed 32-bit integers from `a` and `b` to packed 16-bit integers
+/// Converts packed signed 32-bit integers from `a` and `b` to packed 16-bit integers
 /// using signed saturation
 ///
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_packs_epi32)
@@ -2252,11 +2357,33 @@ pub fn _mm256_packs_epi16(a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpackssdw))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_packs_epi32(a: __m256i, b: __m256i) -> __m256i {
-    unsafe { transmute(packssdw(a.as_i32x8(), b.as_i32x8())) }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_packs_epi32(a: __m256i, b: __m256i) -> __m256i {
+    unsafe {
+        let max = simd_splat(i16::MAX as i32);
+        let min = simd_splat(i16::MIN as i32);
+
+        let clamped_a = simd_imax(simd_imin(a.as_i32x8(), max), min)
+            .as_m256i()
+            .as_i16x16();
+        let clamped_b = simd_imax(simd_imin(b.as_i32x8(), max), min)
+            .as_m256i()
+            .as_i16x16();
+
+        #[rustfmt::skip]
+        const IDXS: [u32; 16] = [
+            00, 02, 04, 06, // a-lo i32 to i16 conversions
+            16, 18, 20, 22, // b-lo
+            08, 10, 12, 14, // a-hi
+            24, 26, 28, 30, // b-hi
+        ];
+        let result: i16x16 = simd_shuffle!(clamped_a, clamped_b, IDXS);
+
+        result.as_m256i()
+    }
 }
 
-/// Converts packed 16-bit integers from `a` and `b` to packed 8-bit integers
+/// Converts packed signed 16-bit integers from `a` and `b` to packed 8-bit integers
 /// using unsigned saturation
 ///
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_packus_epi16)
@@ -2264,11 +2391,33 @@ pub fn _mm256_packs_epi32(a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpackuswb))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_packus_epi16(a: __m256i, b: __m256i) -> __m256i {
-    unsafe { transmute(packuswb(a.as_i16x16(), b.as_i16x16())) }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_packus_epi16(a: __m256i, b: __m256i) -> __m256i {
+    unsafe {
+        let max = simd_splat(u8::MAX as i16);
+        let min = simd_splat(u8::MIN as i16);
+
+        let clamped_a = simd_imax(simd_imin(a.as_i16x16(), max), min)
+            .as_m256i()
+            .as_i8x32();
+        let clamped_b = simd_imax(simd_imin(b.as_i16x16(), max), min)
+            .as_m256i()
+            .as_i8x32();
+
+        #[rustfmt::skip]
+        const IDXS: [u32; 32] = [
+            00, 02, 04, 06, 08, 10, 12, 14, // a-lo i16 to u8 conversions
+            32, 34, 36, 38, 40, 42, 44, 46, // b-lo
+            16, 18, 20, 22, 24, 26, 28, 30, // a-hi
+            48, 50, 52, 54, 56, 58, 60, 62, // b-hi
+        ];
+        let result: i8x32 = simd_shuffle!(clamped_a, clamped_b, IDXS);
+
+        result.as_m256i()
+    }
 }
 
-/// Converts packed 32-bit integers from `a` and `b` to packed 16-bit integers
+/// Converts packed signed 32-bit integers from `a` and `b` to packed 16-bit integers
 /// using unsigned saturation
 ///
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_packus_epi32)
@@ -2276,8 +2425,30 @@ pub fn _mm256_packus_epi16(a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpackusdw))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_packus_epi32(a: __m256i, b: __m256i) -> __m256i {
-    unsafe { transmute(packusdw(a.as_i32x8(), b.as_i32x8())) }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_packus_epi32(a: __m256i, b: __m256i) -> __m256i {
+    unsafe {
+        let max = simd_splat(u16::MAX as i32);
+        let min = simd_splat(u16::MIN as i32);
+
+        let clamped_a = simd_imax(simd_imin(a.as_i32x8(), max), min)
+            .as_m256i()
+            .as_i16x16();
+        let clamped_b = simd_imax(simd_imin(b.as_i32x8(), max), min)
+            .as_m256i()
+            .as_i16x16();
+
+        #[rustfmt::skip]
+        const IDXS: [u32; 16] = [
+            00, 02, 04, 06, // a-lo i32 to u16 conversions
+            16, 18, 20, 22, // b-lo
+            08, 10, 12, 14, // a-hi
+            24, 26, 28, 30, // b-hi
+        ];
+        let result: i16x16 = simd_shuffle!(clamped_a, clamped_b, IDXS);
+
+        result.as_m256i()
+    }
 }
 
 /// Permutes packed 32-bit integers from `a` according to the content of `b`.
@@ -2302,7 +2473,8 @@ pub fn _mm256_permutevar8x32_epi32(a: __m256i, b: __m256i) -> __m256i {
 #[cfg_attr(test, assert_instr(vpermpd, IMM8 = 9))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_permute4x64_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_permute4x64_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
     static_assert_uimm_bits!(IMM8, 8);
     unsafe {
         let zero = i64x4::ZERO;
@@ -2328,7 +2500,8 @@ pub fn _mm256_permute4x64_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
 #[cfg_attr(test, assert_instr(vperm2f128, IMM8 = 9))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_permute2x128_si256<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_permute2x128_si256<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
     static_assert_uimm_bits!(IMM8, 8);
     _mm256_permute2f128_si256::<IMM8>(a, b)
 }
@@ -2342,7 +2515,8 @@ pub fn _mm256_permute2x128_si256<const IMM8: i32>(a: __m256i, b: __m256i) -> __m
 #[cfg_attr(test, assert_instr(vpermpd, IMM8 = 1))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_permute4x64_pd<const IMM8: i32>(a: __m256d) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_permute4x64_pd<const IMM8: i32>(a: __m256d) -> __m256d {
     static_assert_uimm_bits!(IMM8, 8);
     unsafe {
         simd_shuffle!(
@@ -2457,7 +2631,8 @@ pub fn _mm256_shuffle_epi8(a: __m256i, b: __m256i) -> __m256i {
 #[cfg_attr(test, assert_instr(vshufps, MASK = 9))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_shuffle_epi32<const MASK: i32>(a: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_shuffle_epi32<const MASK: i32>(a: __m256i) -> __m256i {
     static_assert_uimm_bits!(MASK, 8);
     unsafe {
         let r: i32x8 = simd_shuffle!(
@@ -2488,7 +2663,8 @@ pub fn _mm256_shuffle_epi32<const MASK: i32>(a: __m256i) -> __m256i {
 #[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 9))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_shufflehi_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_shufflehi_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
     static_assert_uimm_bits!(IMM8, 8);
     unsafe {
         let a = a.as_i16x16();
@@ -2528,7 +2704,8 @@ pub fn _mm256_shufflehi_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
 #[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 9))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_shufflelo_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_shufflelo_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
     static_assert_uimm_bits!(IMM8, 8);
     unsafe {
         let a = a.as_i16x16();
@@ -2642,7 +2819,8 @@ pub fn _mm256_sll_epi64(a: __m256i, count: __m128i) -> __m256i {
 #[cfg_attr(test, assert_instr(vpsllw, IMM8 = 7))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_slli_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_slli_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
     static_assert_uimm_bits!(IMM8, 8);
     unsafe {
         if IMM8 >= 16 {
@@ -2662,7 +2840,8 @@ pub fn _mm256_slli_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
 #[cfg_attr(test, assert_instr(vpslld, IMM8 = 7))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_slli_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_slli_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         if IMM8 >= 32 {
@@ -2682,7 +2861,8 @@ pub fn _mm256_slli_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
 #[cfg_attr(test, assert_instr(vpsllq, IMM8 = 7))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_slli_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_slli_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         if IMM8 >= 64 {
@@ -2701,7 +2881,8 @@ pub fn _mm256_slli_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
 #[cfg_attr(test, assert_instr(vpslldq, IMM8 = 3))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_slli_si256<const IMM8: i32>(a: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_slli_si256<const IMM8: i32>(a: __m256i) -> __m256i {
     static_assert_uimm_bits!(IMM8, 8);
     _mm256_bslli_epi128::<IMM8>(a)
 }
@@ -2714,7 +2895,8 @@ pub fn _mm256_slli_si256<const IMM8: i32>(a: __m256i) -> __m256i {
 #[cfg_attr(test, assert_instr(vpslldq, IMM8 = 3))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_bslli_epi128<const IMM8: i32>(a: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_bslli_epi128<const IMM8: i32>(a: __m256i) -> __m256i {
     static_assert_uimm_bits!(IMM8, 8);
     const fn mask(shift: i32, i: u32) -> u32 {
         let shift = shift as u32 & 0xff;
@@ -2777,8 +2959,14 @@ pub fn _mm256_bslli_epi128<const IMM8: i32>(a: __m256i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpsllvd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_sllv_epi32(a: __m128i, count: __m128i) -> __m128i {
-    unsafe { transmute(psllvd(a.as_i32x4(), count.as_i32x4())) }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_sllv_epi32(a: __m128i, count: __m128i) -> __m128i {
+    unsafe {
+        let count = count.as_u32x4();
+        let no_overflow: u32x4 = simd_lt(count, u32x4::splat(u32::BITS));
+        let count = simd_select(no_overflow, count, u32x4::ZERO);
+        simd_select(no_overflow, simd_shl(a.as_u32x4(), count), u32x4::ZERO).as_m128i()
+    }
 }
 
 /// Shifts packed 32-bit integers in `a` left by the amount
@@ -2790,8 +2978,14 @@ pub fn _mm_sllv_epi32(a: __m128i, count: __m128i) -> __m128i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpsllvd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_sllv_epi32(a: __m256i, count: __m256i) -> __m256i {
-    unsafe { transmute(psllvd256(a.as_i32x8(), count.as_i32x8())) }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_sllv_epi32(a: __m256i, count: __m256i) -> __m256i {
+    unsafe {
+        let count = count.as_u32x8();
+        let no_overflow: u32x8 = simd_lt(count, u32x8::splat(u32::BITS));
+        let count = simd_select(no_overflow, count, u32x8::ZERO);
+        simd_select(no_overflow, simd_shl(a.as_u32x8(), count), u32x8::ZERO).as_m256i()
+    }
 }
 
 /// Shifts packed 64-bit integers in `a` left by the amount
@@ -2803,8 +2997,14 @@ pub fn _mm256_sllv_epi32(a: __m256i, count: __m256i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpsllvq))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_sllv_epi64(a: __m128i, count: __m128i) -> __m128i {
-    unsafe { transmute(psllvq(a.as_i64x2(), count.as_i64x2())) }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_sllv_epi64(a: __m128i, count: __m128i) -> __m128i {
+    unsafe {
+        let count = count.as_u64x2();
+        let no_overflow: u64x2 = simd_lt(count, u64x2::splat(u64::BITS as u64));
+        let count = simd_select(no_overflow, count, u64x2::ZERO);
+        simd_select(no_overflow, simd_shl(a.as_u64x2(), count), u64x2::ZERO).as_m128i()
+    }
 }
 
 /// Shifts packed 64-bit integers in `a` left by the amount
@@ -2816,8 +3016,14 @@ pub fn _mm_sllv_epi64(a: __m128i, count: __m128i) -> __m128i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpsllvq))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_sllv_epi64(a: __m256i, count: __m256i) -> __m256i {
-    unsafe { transmute(psllvq256(a.as_i64x4(), count.as_i64x4())) }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_sllv_epi64(a: __m256i, count: __m256i) -> __m256i {
+    unsafe {
+        let count = count.as_u64x4();
+        let no_overflow: u64x4 = simd_lt(count, u64x4::splat(u64::BITS as u64));
+        let count = simd_select(no_overflow, count, u64x4::ZERO);
+        simd_select(no_overflow, simd_shl(a.as_u64x4(), count), u64x4::ZERO).as_m256i()
+    }
 }
 
 /// Shifts packed 16-bit integers in `a` right by `count` while
@@ -2853,7 +3059,8 @@ pub fn _mm256_sra_epi32(a: __m256i, count: __m128i) -> __m256i {
 #[cfg_attr(test, assert_instr(vpsraw, IMM8 = 7))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_srai_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_srai_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
     static_assert_uimm_bits!(IMM8, 8);
     unsafe { transmute(simd_shr(a.as_i16x16(), i16x16::splat(IMM8.min(15) as i16))) }
 }
@@ -2867,7 +3074,8 @@ pub fn _mm256_srai_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
 #[cfg_attr(test, assert_instr(vpsrad, IMM8 = 7))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_srai_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_srai_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
     static_assert_uimm_bits!(IMM8, 8);
     unsafe { transmute(simd_shr(a.as_i32x8(), i32x8::splat(IMM8.min(31)))) }
 }
@@ -2880,8 +3088,14 @@ pub fn _mm256_srai_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpsravd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_srav_epi32(a: __m128i, count: __m128i) -> __m128i {
-    unsafe { transmute(psravd(a.as_i32x4(), count.as_i32x4())) }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_srav_epi32(a: __m128i, count: __m128i) -> __m128i {
+    unsafe {
+        let count = count.as_u32x4();
+        let no_overflow: u32x4 = simd_lt(count, u32x4::splat(u32::BITS));
+        let count = simd_select(no_overflow, transmute(count), i32x4::splat(31));
+        simd_shr(a.as_i32x4(), count).as_m128i()
+    }
 }
 
 /// Shifts packed 32-bit integers in `a` right by the amount specified by the
@@ -2892,8 +3106,14 @@ pub fn _mm_srav_epi32(a: __m128i, count: __m128i) -> __m128i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpsravd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_srav_epi32(a: __m256i, count: __m256i) -> __m256i {
-    unsafe { transmute(psravd256(a.as_i32x8(), count.as_i32x8())) }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_srav_epi32(a: __m256i, count: __m256i) -> __m256i {
+    unsafe {
+        let count = count.as_u32x8();
+        let no_overflow: u32x8 = simd_lt(count, u32x8::splat(u32::BITS));
+        let count = simd_select(no_overflow, transmute(count), i32x8::splat(31));
+        simd_shr(a.as_i32x8(), count).as_m256i()
+    }
 }
 
 /// Shifts 128-bit lanes in `a` right by `imm8` bytes while shifting in zeros.
@@ -2904,7 +3124,8 @@ pub fn _mm256_srav_epi32(a: __m256i, count: __m256i) -> __m256i {
 #[cfg_attr(test, assert_instr(vpsrldq, IMM8 = 1))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_srli_si256<const IMM8: i32>(a: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_srli_si256<const IMM8: i32>(a: __m256i) -> __m256i {
     static_assert_uimm_bits!(IMM8, 8);
     _mm256_bsrli_epi128::<IMM8>(a)
 }
@@ -2917,7 +3138,8 @@ pub fn _mm256_srli_si256<const IMM8: i32>(a: __m256i) -> __m256i {
 #[cfg_attr(test, assert_instr(vpsrldq, IMM8 = 1))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_bsrli_epi128<const IMM8: i32>(a: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_bsrli_epi128<const IMM8: i32>(a: __m256i) -> __m256i {
     static_assert_uimm_bits!(IMM8, 8);
     const fn mask(shift: i32, i: u32) -> u32 {
         let shift = shift as u32 & 0xff;
@@ -3016,7 +3238,8 @@ pub fn _mm256_srl_epi64(a: __m256i, count: __m128i) -> __m256i {
 #[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 7))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_srli_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_srli_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
     static_assert_uimm_bits!(IMM8, 8);
     unsafe {
         if IMM8 >= 16 {
@@ -3036,7 +3259,8 @@ pub fn _mm256_srli_epi16<const IMM8: i32>(a: __m256i) -> __m256i {
 #[cfg_attr(test, assert_instr(vpsrld, IMM8 = 7))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_srli_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_srli_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
     static_assert_uimm_bits!(IMM8, 8);
     unsafe {
         if IMM8 >= 32 {
@@ -3056,7 +3280,8 @@ pub fn _mm256_srli_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
 #[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 7))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_srli_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_srli_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
     static_assert_uimm_bits!(IMM8, 8);
     unsafe {
         if IMM8 >= 64 {
@@ -3075,8 +3300,14 @@ pub fn _mm256_srli_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpsrlvd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_srlv_epi32(a: __m128i, count: __m128i) -> __m128i {
-    unsafe { transmute(psrlvd(a.as_i32x4(), count.as_i32x4())) }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_srlv_epi32(a: __m128i, count: __m128i) -> __m128i {
+    unsafe {
+        let count = count.as_u32x4();
+        let no_overflow: u32x4 = simd_lt(count, u32x4::splat(u32::BITS));
+        let count = simd_select(no_overflow, count, u32x4::ZERO);
+        simd_select(no_overflow, simd_shr(a.as_u32x4(), count), u32x4::ZERO).as_m128i()
+    }
 }
 
 /// Shifts packed 32-bit integers in `a` right by the amount specified by
@@ -3087,8 +3318,14 @@ pub fn _mm_srlv_epi32(a: __m128i, count: __m128i) -> __m128i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpsrlvd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_srlv_epi32(a: __m256i, count: __m256i) -> __m256i {
-    unsafe { transmute(psrlvd256(a.as_i32x8(), count.as_i32x8())) }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_srlv_epi32(a: __m256i, count: __m256i) -> __m256i {
+    unsafe {
+        let count = count.as_u32x8();
+        let no_overflow: u32x8 = simd_lt(count, u32x8::splat(u32::BITS));
+        let count = simd_select(no_overflow, count, u32x8::ZERO);
+        simd_select(no_overflow, simd_shr(a.as_u32x8(), count), u32x8::ZERO).as_m256i()
+    }
 }
 
 /// Shifts packed 64-bit integers in `a` right by the amount specified by
@@ -3099,8 +3336,14 @@ pub fn _mm256_srlv_epi32(a: __m256i, count: __m256i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpsrlvq))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_srlv_epi64(a: __m128i, count: __m128i) -> __m128i {
-    unsafe { transmute(psrlvq(a.as_i64x2(), count.as_i64x2())) }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_srlv_epi64(a: __m128i, count: __m128i) -> __m128i {
+    unsafe {
+        let count = count.as_u64x2();
+        let no_overflow: u64x2 = simd_lt(count, u64x2::splat(u64::BITS as u64));
+        let count = simd_select(no_overflow, count, u64x2::ZERO);
+        simd_select(no_overflow, simd_shr(a.as_u64x2(), count), u64x2::ZERO).as_m128i()
+    }
 }
 
 /// Shifts packed 64-bit integers in `a` right by the amount specified by
@@ -3111,8 +3354,14 @@ pub fn _mm_srlv_epi64(a: __m128i, count: __m128i) -> __m128i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpsrlvq))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_srlv_epi64(a: __m256i, count: __m256i) -> __m256i {
-    unsafe { transmute(psrlvq256(a.as_i64x4(), count.as_i64x4())) }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_srlv_epi64(a: __m256i, count: __m256i) -> __m256i {
+    unsafe {
+        let count = count.as_u64x4();
+        let no_overflow: u64x4 = simd_lt(count, u64x4::splat(u64::BITS as u64));
+        let count = simd_select(no_overflow, count, u64x4::ZERO);
+        simd_select(no_overflow, simd_shr(a.as_u64x4(), count), u64x4::ZERO).as_m256i()
+    }
 }
 
 /// Load 256-bits of integer data from memory into dst using a non-temporal memory hint. mem_addr
@@ -3142,7 +3391,8 @@ pub unsafe fn _mm256_stream_load_si256(mem_addr: *const __m256i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpsubw))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_sub_epi16(a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_sub_epi16(a: __m256i, b: __m256i) -> __m256i {
     unsafe { transmute(simd_sub(a.as_i16x16(), b.as_i16x16())) }
 }
 
@@ -3153,7 +3403,8 @@ pub fn _mm256_sub_epi16(a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpsubd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_sub_epi32(a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_sub_epi32(a: __m256i, b: __m256i) -> __m256i {
     unsafe { transmute(simd_sub(a.as_i32x8(), b.as_i32x8())) }
 }
 
@@ -3164,7 +3415,8 @@ pub fn _mm256_sub_epi32(a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpsubq))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_sub_epi64(a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_sub_epi64(a: __m256i, b: __m256i) -> __m256i {
     unsafe { transmute(simd_sub(a.as_i64x4(), b.as_i64x4())) }
 }
 
@@ -3175,7 +3427,8 @@ pub fn _mm256_sub_epi64(a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpsubb))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_sub_epi8(a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_sub_epi8(a: __m256i, b: __m256i) -> __m256i {
     unsafe { transmute(simd_sub(a.as_i8x32(), b.as_i8x32())) }
 }
 
@@ -3187,7 +3440,8 @@ pub fn _mm256_sub_epi8(a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpsubsw))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_subs_epi16(a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_subs_epi16(a: __m256i, b: __m256i) -> __m256i {
     unsafe { transmute(simd_saturating_sub(a.as_i16x16(), b.as_i16x16())) }
 }
 
@@ -3199,7 +3453,8 @@ pub fn _mm256_subs_epi16(a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpsubsb))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_subs_epi8(a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_subs_epi8(a: __m256i, b: __m256i) -> __m256i {
     unsafe { transmute(simd_saturating_sub(a.as_i8x32(), b.as_i8x32())) }
 }
 
@@ -3211,7 +3466,8 @@ pub fn _mm256_subs_epi8(a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpsubusw))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_subs_epu16(a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_subs_epu16(a: __m256i, b: __m256i) -> __m256i {
     unsafe { transmute(simd_saturating_sub(a.as_u16x16(), b.as_u16x16())) }
 }
 
@@ -3223,7 +3479,8 @@ pub fn _mm256_subs_epu16(a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpsubusb))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_subs_epu8(a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_subs_epu8(a: __m256i, b: __m256i) -> __m256i {
     unsafe { transmute(simd_saturating_sub(a.as_u8x32(), b.as_u8x32())) }
 }
 
@@ -3270,7 +3527,8 @@ pub fn _mm256_subs_epu8(a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpunpckhbw))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_unpackhi_epi8(a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_unpackhi_epi8(a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         #[rustfmt::skip]
         let r: i8x32 = simd_shuffle!(a.as_i8x32(), b.as_i8x32(), [
@@ -3325,7 +3583,8 @@ pub fn _mm256_unpackhi_epi8(a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpunpcklbw))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_unpacklo_epi8(a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_unpacklo_epi8(a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         #[rustfmt::skip]
         let r: i8x32 = simd_shuffle!(a.as_i8x32(), b.as_i8x32(), [
@@ -3376,7 +3635,8 @@ pub fn _mm256_unpacklo_epi8(a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpunpckhwd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_unpackhi_epi16(a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_unpackhi_epi16(a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let r: i16x16 = simd_shuffle!(
             a.as_i16x16(),
@@ -3426,7 +3686,8 @@ pub fn _mm256_unpackhi_epi16(a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vpunpcklwd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_unpacklo_epi16(a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_unpacklo_epi16(a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let r: i16x16 = simd_shuffle!(
             a.as_i16x16(),
@@ -3469,7 +3730,8 @@ pub fn _mm256_unpacklo_epi16(a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vunpckhps))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_unpackhi_epi32(a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_unpackhi_epi32(a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let r: i32x8 = simd_shuffle!(a.as_i32x8(), b.as_i32x8(), [2, 10, 3, 11, 6, 14, 7, 15]);
         transmute(r)
@@ -3508,7 +3770,8 @@ pub fn _mm256_unpackhi_epi32(a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vunpcklps))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_unpacklo_epi32(a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_unpacklo_epi32(a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let r: i32x8 = simd_shuffle!(a.as_i32x8(), b.as_i32x8(), [0, 8, 1, 9, 4, 12, 5, 13]);
         transmute(r)
@@ -3547,7 +3810,8 @@ pub fn _mm256_unpacklo_epi32(a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vunpckhpd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_unpackhi_epi64(a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_unpackhi_epi64(a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let r: i64x4 = simd_shuffle!(a.as_i64x4(), b.as_i64x4(), [1, 5, 3, 7]);
         transmute(r)
@@ -3586,7 +3850,8 @@ pub fn _mm256_unpackhi_epi64(a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vunpcklpd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_unpacklo_epi64(a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_unpacklo_epi64(a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let r: i64x4 = simd_shuffle!(a.as_i64x4(), b.as_i64x4(), [0, 4, 2, 6]);
         transmute(r)
@@ -3601,7 +3866,8 @@ pub fn _mm256_unpacklo_epi64(a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx2")]
 #[cfg_attr(test, assert_instr(vxorps))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_xor_si256(a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_xor_si256(a: __m256i, b: __m256i) -> __m256i {
     unsafe { transmute(simd_xor(a.as_i64x4(), b.as_i64x4())) }
 }
 
@@ -3616,7 +3882,8 @@ pub fn _mm256_xor_si256(a: __m256i, b: __m256i) -> __m256i {
 // This intrinsic has no corresponding instruction.
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_extract_epi8<const INDEX: i32>(a: __m256i) -> i32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_extract_epi8<const INDEX: i32>(a: __m256i) -> i32 {
     static_assert_uimm_bits!(INDEX, 5);
     unsafe { simd_extract!(a.as_u8x32(), INDEX as u32, u8) as i32 }
 }
@@ -3632,47 +3899,22 @@ pub fn _mm256_extract_epi8<const INDEX: i32>(a: __m256i) -> i32 {
 // This intrinsic has no corresponding instruction.
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_extract_epi16<const INDEX: i32>(a: __m256i) -> i32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_extract_epi16<const INDEX: i32>(a: __m256i) -> i32 {
     static_assert_uimm_bits!(INDEX, 4);
     unsafe { simd_extract!(a.as_u16x16(), INDEX as u32, u16) as i32 }
 }
 
 #[allow(improper_ctypes)]
 unsafe extern "C" {
-    #[link_name = "llvm.x86.avx2.phadd.sw"]
-    fn phaddsw(a: i16x16, b: i16x16) -> i16x16;
-    #[link_name = "llvm.x86.avx2.phsub.sw"]
-    fn phsubsw(a: i16x16, b: i16x16) -> i16x16;
+    #[link_name = "llvm.x86.avx2.pmadd.wd"]
+    fn pmaddwd(a: i16x16, b: i16x16) -> i32x8;
     #[link_name = "llvm.x86.avx2.pmadd.ub.sw"]
-    fn pmaddubsw(a: u8x32, b: u8x32) -> i16x16;
-    #[link_name = "llvm.x86.avx2.maskload.d"]
-    fn maskloadd(mem_addr: *const i8, mask: i32x4) -> i32x4;
-    #[link_name = "llvm.x86.avx2.maskload.d.256"]
-    fn maskloadd256(mem_addr: *const i8, mask: i32x8) -> i32x8;
-    #[link_name = "llvm.x86.avx2.maskload.q"]
-    fn maskloadq(mem_addr: *const i8, mask: i64x2) -> i64x2;
-    #[link_name = "llvm.x86.avx2.maskload.q.256"]
-    fn maskloadq256(mem_addr: *const i8, mask: i64x4) -> i64x4;
-    #[link_name = "llvm.x86.avx2.maskstore.d"]
-    fn maskstored(mem_addr: *mut i8, mask: i32x4, a: i32x4);
-    #[link_name = "llvm.x86.avx2.maskstore.d.256"]
-    fn maskstored256(mem_addr: *mut i8, mask: i32x8, a: i32x8);
-    #[link_name = "llvm.x86.avx2.maskstore.q"]
-    fn maskstoreq(mem_addr: *mut i8, mask: i64x2, a: i64x2);
-    #[link_name = "llvm.x86.avx2.maskstore.q.256"]
-    fn maskstoreq256(mem_addr: *mut i8, mask: i64x4, a: i64x4);
+    fn pmaddubsw(a: u8x32, b: i8x32) -> i16x16;
     #[link_name = "llvm.x86.avx2.mpsadbw"]
     fn mpsadbw(a: u8x32, b: u8x32, imm8: i8) -> u16x16;
     #[link_name = "llvm.x86.avx2.pmul.hr.sw"]
     fn pmulhrsw(a: i16x16, b: i16x16) -> i16x16;
-    #[link_name = "llvm.x86.avx2.packsswb"]
-    fn packsswb(a: i16x16, b: i16x16) -> i8x32;
-    #[link_name = "llvm.x86.avx2.packssdw"]
-    fn packssdw(a: i32x8, b: i32x8) -> i16x16;
-    #[link_name = "llvm.x86.avx2.packuswb"]
-    fn packuswb(a: i16x16, b: i16x16) -> u8x32;
-    #[link_name = "llvm.x86.avx2.packusdw"]
-    fn packusdw(a: i32x8, b: i32x8) -> u16x16;
     #[link_name = "llvm.x86.avx2.psad.bw"]
     fn psadbw(a: u8x32, b: u8x32) -> u64x4;
     #[link_name = "llvm.x86.avx2.psign.b"]
@@ -3687,36 +3929,16 @@ unsafe extern "C" {
     fn pslld(a: i32x8, count: i32x4) -> i32x8;
     #[link_name = "llvm.x86.avx2.psll.q"]
     fn psllq(a: i64x4, count: i64x2) -> i64x4;
-    #[link_name = "llvm.x86.avx2.psllv.d"]
-    fn psllvd(a: i32x4, count: i32x4) -> i32x4;
-    #[link_name = "llvm.x86.avx2.psllv.d.256"]
-    fn psllvd256(a: i32x8, count: i32x8) -> i32x8;
-    #[link_name = "llvm.x86.avx2.psllv.q"]
-    fn psllvq(a: i64x2, count: i64x2) -> i64x2;
-    #[link_name = "llvm.x86.avx2.psllv.q.256"]
-    fn psllvq256(a: i64x4, count: i64x4) -> i64x4;
     #[link_name = "llvm.x86.avx2.psra.w"]
     fn psraw(a: i16x16, count: i16x8) -> i16x16;
     #[link_name = "llvm.x86.avx2.psra.d"]
     fn psrad(a: i32x8, count: i32x4) -> i32x8;
-    #[link_name = "llvm.x86.avx2.psrav.d"]
-    fn psravd(a: i32x4, count: i32x4) -> i32x4;
-    #[link_name = "llvm.x86.avx2.psrav.d.256"]
-    fn psravd256(a: i32x8, count: i32x8) -> i32x8;
     #[link_name = "llvm.x86.avx2.psrl.w"]
     fn psrlw(a: i16x16, count: i16x8) -> i16x16;
     #[link_name = "llvm.x86.avx2.psrl.d"]
     fn psrld(a: i32x8, count: i32x4) -> i32x8;
     #[link_name = "llvm.x86.avx2.psrl.q"]
     fn psrlq(a: i64x4, count: i64x2) -> i64x4;
-    #[link_name = "llvm.x86.avx2.psrlv.d"]
-    fn psrlvd(a: i32x4, count: i32x4) -> i32x4;
-    #[link_name = "llvm.x86.avx2.psrlv.d.256"]
-    fn psrlvd256(a: i32x8, count: i32x8) -> i32x8;
-    #[link_name = "llvm.x86.avx2.psrlv.q"]
-    fn psrlvq(a: i64x2, count: i64x2) -> i64x2;
-    #[link_name = "llvm.x86.avx2.psrlv.q.256"]
-    fn psrlvq256(a: i64x4, count: i64x4) -> i64x4;
     #[link_name = "llvm.x86.avx2.pshuf.b"]
     fn pshufb(a: u8x32, b: u8x32) -> u8x32;
     #[link_name = "llvm.x86.avx2.permd"]
@@ -3797,13 +4019,14 @@ unsafe extern "C" {
 
 #[cfg(test)]
 mod tests {
+    use crate::core_arch::assert_eq_const as assert_eq;
 
     use stdarch_test::simd_test;
 
     use crate::core_arch::x86::*;
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_abs_epi32() {
+    const fn test_mm256_abs_epi32() {
         #[rustfmt::skip]
         let a = _mm256_setr_epi32(
             0, 1, -1, i32::MAX,
@@ -3819,7 +4042,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_abs_epi16() {
+    const fn test_mm256_abs_epi16() {
         #[rustfmt::skip]
         let a = _mm256_setr_epi16(
             0,  1, -1, 2, -2, 3, -3, 4,
@@ -3835,7 +4058,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_abs_epi8() {
+    const fn test_mm256_abs_epi8() {
         #[rustfmt::skip]
         let a = _mm256_setr_epi8(
             0, 1, -1, 2, -2, 3, -3, 4,
@@ -3855,7 +4078,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_add_epi64() {
+    const fn test_mm256_add_epi64() {
         let a = _mm256_setr_epi64x(-10, 0, 100, 1_000_000_000);
         let b = _mm256_setr_epi64x(-1, 0, 1, 2);
         let r = _mm256_add_epi64(a, b);
@@ -3864,7 +4087,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_add_epi32() {
+    const fn test_mm256_add_epi32() {
         let a = _mm256_setr_epi32(-1, 0, 1, 2, 3, 4, 5, 6);
         let b = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
         let r = _mm256_add_epi32(a, b);
@@ -3873,7 +4096,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_add_epi16() {
+    const fn test_mm256_add_epi16() {
         #[rustfmt::skip]
         let a = _mm256_setr_epi16(
             0, 1, 2, 3, 4, 5, 6, 7,
@@ -3894,7 +4117,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_add_epi8() {
+    const fn test_mm256_add_epi8() {
         #[rustfmt::skip]
         let a = _mm256_setr_epi8(
             0, 1, 2, 3, 4, 5, 6, 7,
@@ -3921,7 +4144,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_adds_epi8() {
+    const fn test_mm256_adds_epi8() {
         #[rustfmt::skip]
         let a = _mm256_setr_epi8(
             0, 1, 2, 3, 4, 5, 6, 7,
@@ -3948,7 +4171,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_adds_epi8_saturate_positive() {
+    fn test_mm256_adds_epi8_saturate_positive() {
         let a = _mm256_set1_epi8(0x7F);
         let b = _mm256_set1_epi8(1);
         let r = _mm256_adds_epi8(a, b);
@@ -3956,7 +4179,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_adds_epi8_saturate_negative() {
+    fn test_mm256_adds_epi8_saturate_negative() {
         let a = _mm256_set1_epi8(-0x80);
         let b = _mm256_set1_epi8(-1);
         let r = _mm256_adds_epi8(a, b);
@@ -3964,7 +4187,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_adds_epi16() {
+    const fn test_mm256_adds_epi16() {
         #[rustfmt::skip]
         let a = _mm256_setr_epi16(
             0, 1, 2, 3, 4, 5, 6, 7,
@@ -3986,7 +4209,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_adds_epi16_saturate_positive() {
+    fn test_mm256_adds_epi16_saturate_positive() {
         let a = _mm256_set1_epi16(0x7FFF);
         let b = _mm256_set1_epi16(1);
         let r = _mm256_adds_epi16(a, b);
@@ -3994,7 +4217,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_adds_epi16_saturate_negative() {
+    fn test_mm256_adds_epi16_saturate_negative() {
         let a = _mm256_set1_epi16(-0x8000);
         let b = _mm256_set1_epi16(-1);
         let r = _mm256_adds_epi16(a, b);
@@ -4002,7 +4225,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_adds_epu8() {
+    const fn test_mm256_adds_epu8() {
         #[rustfmt::skip]
         let a = _mm256_setr_epi8(
             0, 1, 2, 3, 4, 5, 6, 7,
@@ -4029,7 +4252,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_adds_epu8_saturate() {
+    fn test_mm256_adds_epu8_saturate() {
         let a = _mm256_set1_epi8(!0);
         let b = _mm256_set1_epi8(1);
         let r = _mm256_adds_epu8(a, b);
@@ -4037,7 +4260,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_adds_epu16() {
+    const fn test_mm256_adds_epu16() {
         #[rustfmt::skip]
         let a = _mm256_setr_epi16(
             0, 1, 2, 3, 4, 5, 6, 7,
@@ -4059,7 +4282,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_adds_epu16_saturate() {
+    fn test_mm256_adds_epu16_saturate() {
         let a = _mm256_set1_epi16(!0);
         let b = _mm256_set1_epi16(1);
         let r = _mm256_adds_epu16(a, b);
@@ -4067,7 +4290,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_and_si256() {
+    const fn test_mm256_and_si256() {
         let a = _mm256_set1_epi8(5);
         let b = _mm256_set1_epi8(3);
         let got = _mm256_and_si256(a, b);
@@ -4075,7 +4298,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_andnot_si256() {
+    const fn test_mm256_andnot_si256() {
         let a = _mm256_set1_epi8(5);
         let b = _mm256_set1_epi8(3);
         let got = _mm256_andnot_si256(a, b);
@@ -4083,21 +4306,21 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_avg_epu8() {
+    const fn test_mm256_avg_epu8() {
         let (a, b) = (_mm256_set1_epi8(3), _mm256_set1_epi8(9));
         let r = _mm256_avg_epu8(a, b);
         assert_eq_m256i(r, _mm256_set1_epi8(6));
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_avg_epu16() {
+    const fn test_mm256_avg_epu16() {
         let (a, b) = (_mm256_set1_epi16(3), _mm256_set1_epi16(9));
         let r = _mm256_avg_epu16(a, b);
         assert_eq_m256i(r, _mm256_set1_epi16(6));
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm_blend_epi32() {
+    const fn test_mm_blend_epi32() {
         let (a, b) = (_mm_set1_epi32(3), _mm_set1_epi32(9));
         let e = _mm_setr_epi32(9, 3, 3, 3);
         let r = _mm_blend_epi32::<0x01>(a, b);
@@ -4108,7 +4331,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_blend_epi32() {
+    const fn test_mm256_blend_epi32() {
         let (a, b) = (_mm256_set1_epi32(3), _mm256_set1_epi32(9));
         let e = _mm256_setr_epi32(9, 3, 3, 3, 3, 3, 3, 3);
         let r = _mm256_blend_epi32::<0x01>(a, b);
@@ -4124,7 +4347,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_blend_epi16() {
+    const fn test_mm256_blend_epi16() {
         let (a, b) = (_mm256_set1_epi16(3), _mm256_set1_epi16(9));
         let e = _mm256_setr_epi16(9, 3, 3, 3, 3, 3, 3, 3, 9, 3, 3, 3, 3, 3, 3, 3);
         let r = _mm256_blend_epi16::<0x01>(a, b);
@@ -4135,7 +4358,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_blendv_epi8() {
+    const fn test_mm256_blendv_epi8() {
         let (a, b) = (_mm256_set1_epi8(4), _mm256_set1_epi8(2));
         let mask = _mm256_insert_epi8::<2>(_mm256_set1_epi8(0), -1);
         let e = _mm256_insert_epi8::<2>(_mm256_set1_epi8(4), 2);
@@ -4144,63 +4367,63 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm_broadcastb_epi8() {
+    const fn test_mm_broadcastb_epi8() {
         let a = _mm_insert_epi8::<0>(_mm_set1_epi8(0x00), 0x2a);
         let res = _mm_broadcastb_epi8(a);
         assert_eq_m128i(res, _mm_set1_epi8(0x2a));
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_broadcastb_epi8() {
+    const fn test_mm256_broadcastb_epi8() {
         let a = _mm_insert_epi8::<0>(_mm_set1_epi8(0x00), 0x2a);
         let res = _mm256_broadcastb_epi8(a);
         assert_eq_m256i(res, _mm256_set1_epi8(0x2a));
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm_broadcastd_epi32() {
+    const fn test_mm_broadcastd_epi32() {
         let a = _mm_setr_epi32(0x2a, 0x8000000, 0, 0);
         let res = _mm_broadcastd_epi32(a);
         assert_eq_m128i(res, _mm_set1_epi32(0x2a));
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_broadcastd_epi32() {
+    const fn test_mm256_broadcastd_epi32() {
         let a = _mm_setr_epi32(0x2a, 0x8000000, 0, 0);
         let res = _mm256_broadcastd_epi32(a);
         assert_eq_m256i(res, _mm256_set1_epi32(0x2a));
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm_broadcastq_epi64() {
+    const fn test_mm_broadcastq_epi64() {
         let a = _mm_setr_epi64x(0x1ffffffff, 0);
         let res = _mm_broadcastq_epi64(a);
         assert_eq_m128i(res, _mm_set1_epi64x(0x1ffffffff));
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_broadcastq_epi64() {
+    const fn test_mm256_broadcastq_epi64() {
         let a = _mm_setr_epi64x(0x1ffffffff, 0);
         let res = _mm256_broadcastq_epi64(a);
         assert_eq_m256i(res, _mm256_set1_epi64x(0x1ffffffff));
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm_broadcastsd_pd() {
+    const fn test_mm_broadcastsd_pd() {
         let a = _mm_setr_pd(6.88, 3.44);
         let res = _mm_broadcastsd_pd(a);
         assert_eq_m128d(res, _mm_set1_pd(6.88));
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_broadcastsd_pd() {
+    const fn test_mm256_broadcastsd_pd() {
         let a = _mm_setr_pd(6.88, 3.44);
         let res = _mm256_broadcastsd_pd(a);
         assert_eq_m256d(res, _mm256_set1_pd(6.88f64));
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm_broadcastsi128_si256() {
+    const fn test_mm_broadcastsi128_si256() {
         let a = _mm_setr_epi64x(0x0987654321012334, 0x5678909876543210);
         let res = _mm_broadcastsi128_si256(a);
         let retval = _mm256_setr_epi64x(
@@ -4213,7 +4436,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_broadcastsi128_si256() {
+    const fn test_mm256_broadcastsi128_si256() {
         let a = _mm_setr_epi64x(0x0987654321012334, 0x5678909876543210);
         let res = _mm256_broadcastsi128_si256(a);
         let retval = _mm256_setr_epi64x(
@@ -4226,35 +4449,35 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm_broadcastss_ps() {
+    const fn test_mm_broadcastss_ps() {
         let a = _mm_setr_ps(6.88, 3.44, 0.0, 0.0);
         let res = _mm_broadcastss_ps(a);
         assert_eq_m128(res, _mm_set1_ps(6.88));
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_broadcastss_ps() {
+    const fn test_mm256_broadcastss_ps() {
         let a = _mm_setr_ps(6.88, 3.44, 0.0, 0.0);
         let res = _mm256_broadcastss_ps(a);
         assert_eq_m256(res, _mm256_set1_ps(6.88));
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm_broadcastw_epi16() {
+    const fn test_mm_broadcastw_epi16() {
         let a = _mm_insert_epi16::<0>(_mm_set1_epi16(0x2a), 0x22b);
         let res = _mm_broadcastw_epi16(a);
         assert_eq_m128i(res, _mm_set1_epi16(0x22b));
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_broadcastw_epi16() {
+    const fn test_mm256_broadcastw_epi16() {
         let a = _mm_insert_epi16::<0>(_mm_set1_epi16(0x2a), 0x22b);
         let res = _mm256_broadcastw_epi16(a);
         assert_eq_m256i(res, _mm256_set1_epi16(0x22b));
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_cmpeq_epi8() {
+    const fn test_mm256_cmpeq_epi8() {
         #[rustfmt::skip]
         let a = _mm256_setr_epi8(
             0, 1, 2, 3, 4, 5, 6, 7,
@@ -4274,7 +4497,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_cmpeq_epi16() {
+    const fn test_mm256_cmpeq_epi16() {
         #[rustfmt::skip]
         let a = _mm256_setr_epi16(
             0, 1, 2, 3, 4, 5, 6, 7,
@@ -4290,7 +4513,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_cmpeq_epi32() {
+    const fn test_mm256_cmpeq_epi32() {
         let a = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
         let b = _mm256_setr_epi32(7, 6, 2, 4, 3, 2, 1, 0);
         let r = _mm256_cmpeq_epi32(a, b);
@@ -4300,7 +4523,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_cmpeq_epi64() {
+    const fn test_mm256_cmpeq_epi64() {
         let a = _mm256_setr_epi64x(0, 1, 2, 3);
         let b = _mm256_setr_epi64x(3, 2, 2, 0);
         let r = _mm256_cmpeq_epi64(a, b);
@@ -4308,7 +4531,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_cmpgt_epi8() {
+    const fn test_mm256_cmpgt_epi8() {
         let a = _mm256_insert_epi8::<0>(_mm256_set1_epi8(0), 5);
         let b = _mm256_set1_epi8(0);
         let r = _mm256_cmpgt_epi8(a, b);
@@ -4316,7 +4539,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_cmpgt_epi16() {
+    const fn test_mm256_cmpgt_epi16() {
         let a = _mm256_insert_epi16::<0>(_mm256_set1_epi16(0), 5);
         let b = _mm256_set1_epi16(0);
         let r = _mm256_cmpgt_epi16(a, b);
@@ -4324,7 +4547,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_cmpgt_epi32() {
+    const fn test_mm256_cmpgt_epi32() {
         let a = _mm256_insert_epi32::<0>(_mm256_set1_epi32(0), 5);
         let b = _mm256_set1_epi32(0);
         let r = _mm256_cmpgt_epi32(a, b);
@@ -4332,7 +4555,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_cmpgt_epi64() {
+    const fn test_mm256_cmpgt_epi64() {
         let a = _mm256_insert_epi64::<0>(_mm256_set1_epi64x(0), 5);
         let b = _mm256_set1_epi64x(0);
         let r = _mm256_cmpgt_epi64(a, b);
@@ -4340,7 +4563,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_cvtepi8_epi16() {
+    const fn test_mm256_cvtepi8_epi16() {
         #[rustfmt::skip]
         let a = _mm_setr_epi8(
             0, 0, -1, 1, -2, 2, -3, 3,
@@ -4355,7 +4578,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_cvtepi8_epi32() {
+    const fn test_mm256_cvtepi8_epi32() {
         #[rustfmt::skip]
         let a = _mm_setr_epi8(
             0, 0, -1, 1, -2, 2, -3, 3,
@@ -4366,7 +4589,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_cvtepi8_epi64() {
+    const fn test_mm256_cvtepi8_epi64() {
         #[rustfmt::skip]
         let a = _mm_setr_epi8(
             0, 0, -1, 1, -2, 2, -3, 3,
@@ -4377,49 +4600,49 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_cvtepi16_epi32() {
+    const fn test_mm256_cvtepi16_epi32() {
         let a = _mm_setr_epi16(0, 0, -1, 1, -2, 2, -3, 3);
         let r = _mm256_setr_epi32(0, 0, -1, 1, -2, 2, -3, 3);
         assert_eq_m256i(r, _mm256_cvtepi16_epi32(a));
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_cvtepi16_epi64() {
+    const fn test_mm256_cvtepi16_epi64() {
         let a = _mm_setr_epi16(0, 0, -1, 1, -2, 2, -3, 3);
         let r = _mm256_setr_epi64x(0, 0, -1, 1);
         assert_eq_m256i(r, _mm256_cvtepi16_epi64(a));
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_cvtepi32_epi64() {
+    const fn test_mm256_cvtepi32_epi64() {
         let a = _mm_setr_epi32(0, 0, -1, 1);
         let r = _mm256_setr_epi64x(0, 0, -1, 1);
         assert_eq_m256i(r, _mm256_cvtepi32_epi64(a));
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_cvtepu16_epi32() {
+    const fn test_mm256_cvtepu16_epi32() {
         let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
         let r = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7);
         assert_eq_m256i(r, _mm256_cvtepu16_epi32(a));
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_cvtepu16_epi64() {
+    const fn test_mm256_cvtepu16_epi64() {
         let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
         let r = _mm256_setr_epi64x(0, 1, 2, 3);
         assert_eq_m256i(r, _mm256_cvtepu16_epi64(a));
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_cvtepu32_epi64() {
+    const fn test_mm256_cvtepu32_epi64() {
         let a = _mm_setr_epi32(0, 1, 2, 3);
         let r = _mm256_setr_epi64x(0, 1, 2, 3);
         assert_eq_m256i(r, _mm256_cvtepu32_epi64(a));
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_cvtepu8_epi16() {
+    const fn test_mm256_cvtepu8_epi16() {
         #[rustfmt::skip]
         let a = _mm_setr_epi8(
             0, 1, 2, 3, 4, 5, 6, 7,
@@ -4434,7 +4657,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_cvtepu8_epi32() {
+    const fn test_mm256_cvtepu8_epi32() {
         #[rustfmt::skip]
         let a = _mm_setr_epi8(
             0, 1, 2, 3, 4, 5, 6, 7,
@@ -4445,7 +4668,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_cvtepu8_epi64() {
+    const fn test_mm256_cvtepu8_epi64() {
         #[rustfmt::skip]
         let a = _mm_setr_epi8(
             0, 1, 2, 3, 4, 5, 6, 7,
@@ -4456,7 +4679,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_extracti128_si256() {
+    const fn test_mm256_extracti128_si256() {
         let a = _mm256_setr_epi64x(1, 2, 3, 4);
         let r = _mm256_extracti128_si256::<1>(a);
         let e = _mm_setr_epi64x(3, 4);
@@ -4464,7 +4687,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_hadd_epi16() {
+    const fn test_mm256_hadd_epi16() {
         let a = _mm256_set1_epi16(2);
         let b = _mm256_set1_epi16(4);
         let r = _mm256_hadd_epi16(a, b);
@@ -4473,7 +4696,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_hadd_epi32() {
+    const fn test_mm256_hadd_epi32() {
         let a = _mm256_set1_epi32(2);
         let b = _mm256_set1_epi32(4);
         let r = _mm256_hadd_epi32(a, b);
@@ -4482,7 +4705,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_hadds_epi16() {
+    fn test_mm256_hadds_epi16() {
         let a = _mm256_set1_epi16(2);
         let a = _mm256_insert_epi16::<0>(a, 0x7fff);
         let a = _mm256_insert_epi16::<1>(a, 1);
@@ -4497,7 +4720,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_hsub_epi16() {
+    const fn test_mm256_hsub_epi16() {
         let a = _mm256_set1_epi16(2);
         let b = _mm256_set1_epi16(4);
         let r = _mm256_hsub_epi16(a, b);
@@ -4506,7 +4729,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_hsub_epi32() {
+    const fn test_mm256_hsub_epi32() {
         let a = _mm256_set1_epi32(2);
         let b = _mm256_set1_epi32(4);
         let r = _mm256_hsub_epi32(a, b);
@@ -4515,7 +4738,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_hsubs_epi16() {
+    fn test_mm256_hsubs_epi16() {
         let a = _mm256_set1_epi16(2);
         let a = _mm256_insert_epi16::<0>(a, 0x7fff);
         let a = _mm256_insert_epi16::<1>(a, -1);
@@ -4526,7 +4749,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_madd_epi16() {
+    fn test_mm256_madd_epi16() {
         let a = _mm256_set1_epi16(2);
         let b = _mm256_set1_epi16(4);
         let r = _mm256_madd_epi16(a, b);
@@ -4534,8 +4757,28 @@ mod tests {
         assert_eq_m256i(r, e);
     }
 
+    #[target_feature(enable = "avx2")]
+    #[cfg_attr(test, assert_instr(vpmaddwd))]
+    unsafe fn test_mm256_madd_epi16_mul_one(v: __m256i) -> __m256i {
+        // This is a trick used in the adler32 algorithm to get a widening addition. The
+        // multiplication by 1 is trivial, but must not be optimized out because then the vpmaddwd
+        // instruction is no longer selected. The assert_instr verifies that this is the case.
+        let one_v = _mm256_set1_epi16(1);
+        _mm256_madd_epi16(v, one_v)
+    }
+
+    #[target_feature(enable = "avx2")]
+    #[cfg_attr(test, assert_instr(vpmaddwd))]
+    unsafe fn test_mm256_madd_epi16_shl(v: __m256i) -> __m256i {
+        // This is a trick used in the base64 algorithm to get a widening addition. Instead of a
+        // multiplication, a vector shl is used. In LLVM 22 that breaks the pattern recognition
+        // for the automatic optimization to vpmaddwd.
+        let shift_value = _mm256_set1_epi32(12i32);
+        _mm256_madd_epi16(v, shift_value)
+    }
+
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_inserti128_si256() {
+    const fn test_mm256_inserti128_si256() {
         let a = _mm256_setr_epi64x(1, 2, 3, 4);
         let b = _mm_setr_epi64x(7, 8);
         let r = _mm256_inserti128_si256::<1>(a, b);
@@ -4544,7 +4787,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_maddubs_epi16() {
+    fn test_mm256_maddubs_epi16() {
         let a = _mm256_set1_epi8(2);
         let b = _mm256_set1_epi8(4);
         let r = _mm256_maddubs_epi16(a, b);
@@ -4553,87 +4796,95 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm_maskload_epi32() {
+    const fn test_mm_maskload_epi32() {
         let nums = [1, 2, 3, 4];
         let a = &nums as *const i32;
         let mask = _mm_setr_epi32(-1, 0, 0, -1);
-        let r = _mm_maskload_epi32(a, mask);
+        let r = unsafe { _mm_maskload_epi32(a, mask) };
         let e = _mm_setr_epi32(1, 0, 0, 4);
         assert_eq_m128i(r, e);
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_maskload_epi32() {
+    const fn test_mm256_maskload_epi32() {
         let nums = [1, 2, 3, 4, 5, 6, 7, 8];
         let a = &nums as *const i32;
         let mask = _mm256_setr_epi32(-1, 0, 0, -1, 0, -1, -1, 0);
-        let r = _mm256_maskload_epi32(a, mask);
+        let r = unsafe { _mm256_maskload_epi32(a, mask) };
         let e = _mm256_setr_epi32(1, 0, 0, 4, 0, 6, 7, 0);
         assert_eq_m256i(r, e);
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm_maskload_epi64() {
+    const fn test_mm_maskload_epi64() {
         let nums = [1_i64, 2_i64];
         let a = &nums as *const i64;
         let mask = _mm_setr_epi64x(0, -1);
-        let r = _mm_maskload_epi64(a, mask);
+        let r = unsafe { _mm_maskload_epi64(a, mask) };
         let e = _mm_setr_epi64x(0, 2);
         assert_eq_m128i(r, e);
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_maskload_epi64() {
+    const fn test_mm256_maskload_epi64() {
         let nums = [1_i64, 2_i64, 3_i64, 4_i64];
         let a = &nums as *const i64;
         let mask = _mm256_setr_epi64x(0, -1, -1, 0);
-        let r = _mm256_maskload_epi64(a, mask);
+        let r = unsafe { _mm256_maskload_epi64(a, mask) };
         let e = _mm256_setr_epi64x(0, 2, 3, 0);
         assert_eq_m256i(r, e);
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm_maskstore_epi32() {
+    const fn test_mm_maskstore_epi32() {
         let a = _mm_setr_epi32(1, 2, 3, 4);
         let mut arr = [-1, -1, -1, -1];
         let mask = _mm_setr_epi32(-1, 0, 0, -1);
-        _mm_maskstore_epi32(arr.as_mut_ptr(), mask, a);
+        unsafe {
+            _mm_maskstore_epi32(arr.as_mut_ptr(), mask, a);
+        }
         let e = [1, -1, -1, 4];
         assert_eq!(arr, e);
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_maskstore_epi32() {
+    const fn test_mm256_maskstore_epi32() {
         let a = _mm256_setr_epi32(1, 0x6d726f, 3, 42, 0x777161, 6, 7, 8);
         let mut arr = [-1, -1, -1, 0x776173, -1, 0x68657265, -1, -1];
         let mask = _mm256_setr_epi32(-1, 0, 0, -1, 0, -1, -1, 0);
-        _mm256_maskstore_epi32(arr.as_mut_ptr(), mask, a);
+        unsafe {
+            _mm256_maskstore_epi32(arr.as_mut_ptr(), mask, a);
+        }
         let e = [1, -1, -1, 42, -1, 6, 7, -1];
         assert_eq!(arr, e);
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm_maskstore_epi64() {
+    const fn test_mm_maskstore_epi64() {
         let a = _mm_setr_epi64x(1_i64, 2_i64);
         let mut arr = [-1_i64, -1_i64];
         let mask = _mm_setr_epi64x(0, -1);
-        _mm_maskstore_epi64(arr.as_mut_ptr(), mask, a);
+        unsafe {
+            _mm_maskstore_epi64(arr.as_mut_ptr(), mask, a);
+        }
         let e = [-1, 2];
         assert_eq!(arr, e);
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_maskstore_epi64() {
+    const fn test_mm256_maskstore_epi64() {
         let a = _mm256_setr_epi64x(1_i64, 2_i64, 3_i64, 4_i64);
         let mut arr = [-1_i64, -1_i64, -1_i64, -1_i64];
         let mask = _mm256_setr_epi64x(0, -1, -1, 0);
-        _mm256_maskstore_epi64(arr.as_mut_ptr(), mask, a);
+        unsafe {
+            _mm256_maskstore_epi64(arr.as_mut_ptr(), mask, a);
+        }
         let e = [-1, 2, 3, -1];
         assert_eq!(arr, e);
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_max_epi16() {
+    const fn test_mm256_max_epi16() {
         let a = _mm256_set1_epi16(2);
         let b = _mm256_set1_epi16(4);
         let r = _mm256_max_epi16(a, b);
@@ -4641,7 +4892,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_max_epi32() {
+    const fn test_mm256_max_epi32() {
         let a = _mm256_set1_epi32(2);
         let b = _mm256_set1_epi32(4);
         let r = _mm256_max_epi32(a, b);
@@ -4649,7 +4900,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_max_epi8() {
+    const fn test_mm256_max_epi8() {
         let a = _mm256_set1_epi8(2);
         let b = _mm256_set1_epi8(4);
         let r = _mm256_max_epi8(a, b);
@@ -4657,7 +4908,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_max_epu16() {
+    const fn test_mm256_max_epu16() {
         let a = _mm256_set1_epi16(2);
         let b = _mm256_set1_epi16(4);
         let r = _mm256_max_epu16(a, b);
@@ -4665,7 +4916,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_max_epu32() {
+    const fn test_mm256_max_epu32() {
         let a = _mm256_set1_epi32(2);
         let b = _mm256_set1_epi32(4);
         let r = _mm256_max_epu32(a, b);
@@ -4673,7 +4924,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_max_epu8() {
+    const fn test_mm256_max_epu8() {
         let a = _mm256_set1_epi8(2);
         let b = _mm256_set1_epi8(4);
         let r = _mm256_max_epu8(a, b);
@@ -4681,7 +4932,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_min_epi16() {
+    const fn test_mm256_min_epi16() {
         let a = _mm256_set1_epi16(2);
         let b = _mm256_set1_epi16(4);
         let r = _mm256_min_epi16(a, b);
@@ -4689,7 +4940,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_min_epi32() {
+    const fn test_mm256_min_epi32() {
         let a = _mm256_set1_epi32(2);
         let b = _mm256_set1_epi32(4);
         let r = _mm256_min_epi32(a, b);
@@ -4697,7 +4948,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_min_epi8() {
+    const fn test_mm256_min_epi8() {
         let a = _mm256_set1_epi8(2);
         let b = _mm256_set1_epi8(4);
         let r = _mm256_min_epi8(a, b);
@@ -4705,7 +4956,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_min_epu16() {
+    const fn test_mm256_min_epu16() {
         let a = _mm256_set1_epi16(2);
         let b = _mm256_set1_epi16(4);
         let r = _mm256_min_epu16(a, b);
@@ -4713,7 +4964,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_min_epu32() {
+    const fn test_mm256_min_epu32() {
         let a = _mm256_set1_epi32(2);
         let b = _mm256_set1_epi32(4);
         let r = _mm256_min_epu32(a, b);
@@ -4721,7 +4972,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_min_epu8() {
+    const fn test_mm256_min_epu8() {
         let a = _mm256_set1_epi8(2);
         let b = _mm256_set1_epi8(4);
         let r = _mm256_min_epu8(a, b);
@@ -4729,7 +4980,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_movemask_epi8() {
+    const fn test_mm256_movemask_epi8() {
         let a = _mm256_set1_epi8(-1);
         let r = _mm256_movemask_epi8(a);
         let e = -1;
@@ -4737,7 +4988,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_mpsadbw_epu8() {
+    fn test_mm256_mpsadbw_epu8() {
         let a = _mm256_set1_epi8(2);
         let b = _mm256_set1_epi8(4);
         let r = _mm256_mpsadbw_epu8::<0>(a, b);
@@ -4746,7 +4997,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_mul_epi32() {
+    const fn test_mm256_mul_epi32() {
         let a = _mm256_setr_epi32(0, 0, 0, 0, 2, 2, 2, 2);
         let b = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
         let r = _mm256_mul_epi32(a, b);
@@ -4755,7 +5006,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_mul_epu32() {
+    const fn test_mm256_mul_epu32() {
         let a = _mm256_setr_epi32(0, 0, 0, 0, 2, 2, 2, 2);
         let b = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
         let r = _mm256_mul_epu32(a, b);
@@ -4764,7 +5015,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_mulhi_epi16() {
+    const fn test_mm256_mulhi_epi16() {
         let a = _mm256_set1_epi16(6535);
         let b = _mm256_set1_epi16(6535);
         let r = _mm256_mulhi_epi16(a, b);
@@ -4773,7 +5024,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_mulhi_epu16() {
+    const fn test_mm256_mulhi_epu16() {
         let a = _mm256_set1_epi16(6535);
         let b = _mm256_set1_epi16(6535);
         let r = _mm256_mulhi_epu16(a, b);
@@ -4782,7 +5033,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_mullo_epi16() {
+    const fn test_mm256_mullo_epi16() {
         let a = _mm256_set1_epi16(2);
         let b = _mm256_set1_epi16(4);
         let r = _mm256_mullo_epi16(a, b);
@@ -4791,7 +5042,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_mullo_epi32() {
+    const fn test_mm256_mullo_epi32() {
         let a = _mm256_set1_epi32(2);
         let b = _mm256_set1_epi32(4);
         let r = _mm256_mullo_epi32(a, b);
@@ -4800,7 +5051,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_mulhrs_epi16() {
+    fn test_mm256_mulhrs_epi16() {
         let a = _mm256_set1_epi16(2);
         let b = _mm256_set1_epi16(4);
         let r = _mm256_mullo_epi16(a, b);
@@ -4809,7 +5060,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_or_si256() {
+    const fn test_mm256_or_si256() {
         let a = _mm256_set1_epi8(-1);
         let b = _mm256_set1_epi8(0);
         let r = _mm256_or_si256(a, b);
@@ -4817,7 +5068,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_packs_epi16() {
+    const fn test_mm256_packs_epi16() {
         let a = _mm256_set1_epi16(2);
         let b = _mm256_set1_epi16(4);
         let r = _mm256_packs_epi16(a, b);
@@ -4833,7 +5084,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_packs_epi32() {
+    const fn test_mm256_packs_epi32() {
         let a = _mm256_set1_epi32(2);
         let b = _mm256_set1_epi32(4);
         let r = _mm256_packs_epi32(a, b);
@@ -4843,7 +5094,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_packus_epi16() {
+    const fn test_mm256_packus_epi16() {
         let a = _mm256_set1_epi16(2);
         let b = _mm256_set1_epi16(4);
         let r = _mm256_packus_epi16(a, b);
@@ -4859,7 +5110,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_packus_epi32() {
+    const fn test_mm256_packus_epi32() {
         let a = _mm256_set1_epi32(2);
         let b = _mm256_set1_epi32(4);
         let r = _mm256_packus_epi32(a, b);
@@ -4869,7 +5120,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_sad_epu8() {
+    fn test_mm256_sad_epu8() {
         let a = _mm256_set1_epi8(2);
         let b = _mm256_set1_epi8(4);
         let r = _mm256_sad_epu8(a, b);
@@ -4878,7 +5129,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_shufflehi_epi16() {
+    const fn test_mm256_shufflehi_epi16() {
         #[rustfmt::skip]
         let a = _mm256_setr_epi16(
             0, 1, 2, 3, 11, 22, 33, 44,
@@ -4894,7 +5145,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_shufflelo_epi16() {
+    const fn test_mm256_shufflelo_epi16() {
         #[rustfmt::skip]
         let a = _mm256_setr_epi16(
             11, 22, 33, 44, 0, 1, 2, 3,
@@ -4910,7 +5161,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_sign_epi16() {
+    fn test_mm256_sign_epi16() {
         let a = _mm256_set1_epi16(2);
         let b = _mm256_set1_epi16(-1);
         let r = _mm256_sign_epi16(a, b);
@@ -4919,7 +5170,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_sign_epi32() {
+    fn test_mm256_sign_epi32() {
         let a = _mm256_set1_epi32(2);
         let b = _mm256_set1_epi32(-1);
         let r = _mm256_sign_epi32(a, b);
@@ -4928,7 +5179,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_sign_epi8() {
+    fn test_mm256_sign_epi8() {
         let a = _mm256_set1_epi8(2);
         let b = _mm256_set1_epi8(-1);
         let r = _mm256_sign_epi8(a, b);
@@ -4937,7 +5188,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_sll_epi16() {
+    fn test_mm256_sll_epi16() {
         let a = _mm256_set1_epi16(0xFF);
         let b = _mm_insert_epi16::<0>(_mm_set1_epi16(0), 4);
         let r = _mm256_sll_epi16(a, b);
@@ -4945,7 +5196,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_sll_epi32() {
+    fn test_mm256_sll_epi32() {
         let a = _mm256_set1_epi32(0xFFFF);
         let b = _mm_insert_epi32::<0>(_mm_set1_epi32(0), 4);
         let r = _mm256_sll_epi32(a, b);
@@ -4953,7 +5204,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_sll_epi64() {
+    fn test_mm256_sll_epi64() {
         let a = _mm256_set1_epi64x(0xFFFFFFFF);
         let b = _mm_insert_epi64::<0>(_mm_set1_epi64x(0), 4);
         let r = _mm256_sll_epi64(a, b);
@@ -4961,7 +5212,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_slli_epi16() {
+    const fn test_mm256_slli_epi16() {
         assert_eq_m256i(
             _mm256_slli_epi16::<4>(_mm256_set1_epi16(0xFF)),
             _mm256_set1_epi16(0xFF0),
@@ -4969,7 +5220,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_slli_epi32() {
+    const fn test_mm256_slli_epi32() {
         assert_eq_m256i(
             _mm256_slli_epi32::<4>(_mm256_set1_epi32(0xFFFF)),
             _mm256_set1_epi32(0xFFFF0),
@@ -4977,7 +5228,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_slli_epi64() {
+    const fn test_mm256_slli_epi64() {
         assert_eq_m256i(
             _mm256_slli_epi64::<4>(_mm256_set1_epi64x(0xFFFFFFFF)),
             _mm256_set1_epi64x(0xFFFFFFFF0),
@@ -4985,14 +5236,14 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_slli_si256() {
+    const fn test_mm256_slli_si256() {
         let a = _mm256_set1_epi64x(0xFFFFFFFF);
         let r = _mm256_slli_si256::<3>(a);
         assert_eq_m256i(r, _mm256_set1_epi64x(0xFFFFFFFF000000));
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm_sllv_epi32() {
+    const fn test_mm_sllv_epi32() {
         let a = _mm_set1_epi32(2);
         let b = _mm_set1_epi32(1);
         let r = _mm_sllv_epi32(a, b);
@@ -5001,7 +5252,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_sllv_epi32() {
+    const fn test_mm256_sllv_epi32() {
         let a = _mm256_set1_epi32(2);
         let b = _mm256_set1_epi32(1);
         let r = _mm256_sllv_epi32(a, b);
@@ -5010,7 +5261,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm_sllv_epi64() {
+    const fn test_mm_sllv_epi64() {
         let a = _mm_set1_epi64x(2);
         let b = _mm_set1_epi64x(1);
         let r = _mm_sllv_epi64(a, b);
@@ -5019,7 +5270,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_sllv_epi64() {
+    const fn test_mm256_sllv_epi64() {
         let a = _mm256_set1_epi64x(2);
         let b = _mm256_set1_epi64x(1);
         let r = _mm256_sllv_epi64(a, b);
@@ -5028,7 +5279,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_sra_epi16() {
+    fn test_mm256_sra_epi16() {
         let a = _mm256_set1_epi16(-1);
         let b = _mm_setr_epi16(1, 0, 0, 0, 0, 0, 0, 0);
         let r = _mm256_sra_epi16(a, b);
@@ -5036,7 +5287,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_sra_epi32() {
+    fn test_mm256_sra_epi32() {
         let a = _mm256_set1_epi32(-1);
         let b = _mm_insert_epi32::<0>(_mm_set1_epi32(0), 1);
         let r = _mm256_sra_epi32(a, b);
@@ -5044,7 +5295,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_srai_epi16() {
+    const fn test_mm256_srai_epi16() {
         assert_eq_m256i(
             _mm256_srai_epi16::<1>(_mm256_set1_epi16(-1)),
             _mm256_set1_epi16(-1),
@@ -5052,7 +5303,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_srai_epi32() {
+    const fn test_mm256_srai_epi32() {
         assert_eq_m256i(
             _mm256_srai_epi32::<1>(_mm256_set1_epi32(-1)),
             _mm256_set1_epi32(-1),
@@ -5060,7 +5311,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm_srav_epi32() {
+    const fn test_mm_srav_epi32() {
         let a = _mm_set1_epi32(4);
         let count = _mm_set1_epi32(1);
         let r = _mm_srav_epi32(a, count);
@@ -5069,7 +5320,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_srav_epi32() {
+    const fn test_mm256_srav_epi32() {
         let a = _mm256_set1_epi32(4);
         let count = _mm256_set1_epi32(1);
         let r = _mm256_srav_epi32(a, count);
@@ -5078,7 +5329,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_srli_si256() {
+    const fn test_mm256_srli_si256() {
         #[rustfmt::skip]
         let a = _mm256_setr_epi8(
             1, 2, 3, 4, 5, 6, 7, 8,
@@ -5098,7 +5349,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_srl_epi16() {
+    fn test_mm256_srl_epi16() {
         let a = _mm256_set1_epi16(0xFF);
         let b = _mm_insert_epi16::<0>(_mm_set1_epi16(0), 4);
         let r = _mm256_srl_epi16(a, b);
@@ -5106,7 +5357,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_srl_epi32() {
+    fn test_mm256_srl_epi32() {
         let a = _mm256_set1_epi32(0xFFFF);
         let b = _mm_insert_epi32::<0>(_mm_set1_epi32(0), 4);
         let r = _mm256_srl_epi32(a, b);
@@ -5114,7 +5365,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_srl_epi64() {
+    fn test_mm256_srl_epi64() {
         let a = _mm256_set1_epi64x(0xFFFFFFFF);
         let b = _mm_setr_epi64x(4, 0);
         let r = _mm256_srl_epi64(a, b);
@@ -5122,7 +5373,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_srli_epi16() {
+    const fn test_mm256_srli_epi16() {
         assert_eq_m256i(
             _mm256_srli_epi16::<4>(_mm256_set1_epi16(0xFF)),
             _mm256_set1_epi16(0xF),
@@ -5130,7 +5381,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_srli_epi32() {
+    const fn test_mm256_srli_epi32() {
         assert_eq_m256i(
             _mm256_srli_epi32::<4>(_mm256_set1_epi32(0xFFFF)),
             _mm256_set1_epi32(0xFFF),
@@ -5138,7 +5389,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_srli_epi64() {
+    const fn test_mm256_srli_epi64() {
         assert_eq_m256i(
             _mm256_srli_epi64::<4>(_mm256_set1_epi64x(0xFFFFFFFF)),
             _mm256_set1_epi64x(0xFFFFFFF),
@@ -5146,7 +5397,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm_srlv_epi32() {
+    const fn test_mm_srlv_epi32() {
         let a = _mm_set1_epi32(2);
         let count = _mm_set1_epi32(1);
         let r = _mm_srlv_epi32(a, count);
@@ -5155,7 +5406,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_srlv_epi32() {
+    const fn test_mm256_srlv_epi32() {
         let a = _mm256_set1_epi32(2);
         let count = _mm256_set1_epi32(1);
         let r = _mm256_srlv_epi32(a, count);
@@ -5164,7 +5415,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm_srlv_epi64() {
+    const fn test_mm_srlv_epi64() {
         let a = _mm_set1_epi64x(2);
         let count = _mm_set1_epi64x(1);
         let r = _mm_srlv_epi64(a, count);
@@ -5173,7 +5424,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_srlv_epi64() {
+    const fn test_mm256_srlv_epi64() {
         let a = _mm256_set1_epi64x(2);
         let count = _mm256_set1_epi64x(1);
         let r = _mm256_srlv_epi64(a, count);
@@ -5182,14 +5433,14 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_stream_load_si256() {
+    fn test_mm256_stream_load_si256() {
         let a = _mm256_set_epi64x(5, 6, 7, 8);
-        let r = _mm256_stream_load_si256(core::ptr::addr_of!(a) as *const _);
+        let r = unsafe { _mm256_stream_load_si256(core::ptr::addr_of!(a) as *const _) };
         assert_eq_m256i(a, r);
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_sub_epi16() {
+    const fn test_mm256_sub_epi16() {
         let a = _mm256_set1_epi16(4);
         let b = _mm256_set1_epi16(2);
         let r = _mm256_sub_epi16(a, b);
@@ -5197,7 +5448,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_sub_epi32() {
+    const fn test_mm256_sub_epi32() {
         let a = _mm256_set1_epi32(4);
         let b = _mm256_set1_epi32(2);
         let r = _mm256_sub_epi32(a, b);
@@ -5205,7 +5456,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_sub_epi64() {
+    const fn test_mm256_sub_epi64() {
         let a = _mm256_set1_epi64x(4);
         let b = _mm256_set1_epi64x(2);
         let r = _mm256_sub_epi64(a, b);
@@ -5213,7 +5464,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_sub_epi8() {
+    const fn test_mm256_sub_epi8() {
         let a = _mm256_set1_epi8(4);
         let b = _mm256_set1_epi8(2);
         let r = _mm256_sub_epi8(a, b);
@@ -5221,7 +5472,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_subs_epi16() {
+    const fn test_mm256_subs_epi16() {
         let a = _mm256_set1_epi16(4);
         let b = _mm256_set1_epi16(2);
         let r = _mm256_subs_epi16(a, b);
@@ -5229,7 +5480,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_subs_epi8() {
+    const fn test_mm256_subs_epi8() {
         let a = _mm256_set1_epi8(4);
         let b = _mm256_set1_epi8(2);
         let r = _mm256_subs_epi8(a, b);
@@ -5237,7 +5488,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_subs_epu16() {
+    const fn test_mm256_subs_epu16() {
         let a = _mm256_set1_epi16(4);
         let b = _mm256_set1_epi16(2);
         let r = _mm256_subs_epu16(a, b);
@@ -5245,7 +5496,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_subs_epu8() {
+    const fn test_mm256_subs_epu8() {
         let a = _mm256_set1_epi8(4);
         let b = _mm256_set1_epi8(2);
         let r = _mm256_subs_epu8(a, b);
@@ -5253,7 +5504,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_xor_si256() {
+    const fn test_mm256_xor_si256() {
         let a = _mm256_set1_epi8(5);
         let b = _mm256_set1_epi8(3);
         let r = _mm256_xor_si256(a, b);
@@ -5261,7 +5512,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_alignr_epi8() {
+    const fn test_mm256_alignr_epi8() {
         #[rustfmt::skip]
         let a = _mm256_setr_epi8(
             1, 2, 3, 4, 5, 6, 7, 8,
@@ -5317,7 +5568,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_shuffle_epi8() {
+    fn test_mm256_shuffle_epi8() {
         #[rustfmt::skip]
         let a = _mm256_setr_epi8(
             1, 2, 3, 4, 5, 6, 7, 8,
@@ -5344,7 +5595,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_permutevar8x32_epi32() {
+    fn test_mm256_permutevar8x32_epi32() {
         let a = _mm256_setr_epi32(100, 200, 300, 400, 500, 600, 700, 800);
         let b = _mm256_setr_epi32(5, 0, 5, 1, 7, 6, 3, 4);
         let expected = _mm256_setr_epi32(600, 100, 600, 200, 800, 700, 400, 500);
@@ -5353,7 +5604,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_permute4x64_epi64() {
+    const fn test_mm256_permute4x64_epi64() {
         let a = _mm256_setr_epi64x(100, 200, 300, 400);
         let expected = _mm256_setr_epi64x(400, 100, 200, 100);
         let r = _mm256_permute4x64_epi64::<0b00010011>(a);
@@ -5361,7 +5612,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_permute2x128_si256() {
+    const fn test_mm256_permute2x128_si256() {
         let a = _mm256_setr_epi64x(100, 200, 500, 600);
         let b = _mm256_setr_epi64x(300, 400, 700, 800);
         let r = _mm256_permute2x128_si256::<0b00_01_00_11>(a, b);
@@ -5370,7 +5621,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_permute4x64_pd() {
+    const fn test_mm256_permute4x64_pd() {
         let a = _mm256_setr_pd(1., 2., 3., 4.);
         let r = _mm256_permute4x64_pd::<0b00_01_00_11>(a);
         let e = _mm256_setr_pd(4., 1., 2., 1.);
@@ -5378,7 +5629,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_permutevar8x32_ps() {
+    fn test_mm256_permutevar8x32_ps() {
         let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         let b = _mm256_setr_epi32(5, 0, 5, 1, 7, 6, 3, 4);
         let r = _mm256_permutevar8x32_ps(a, b);
@@ -5387,88 +5638,98 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm_i32gather_epi32() {
+    fn test_mm_i32gather_epi32() {
         let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
         // A multiplier of 4 is word-addressing
-        let r = _mm_i32gather_epi32::<4>(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48));
+        let r = unsafe { _mm_i32gather_epi32::<4>(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48)) };
         assert_eq_m128i(r, _mm_setr_epi32(0, 16, 32, 48));
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm_mask_i32gather_epi32() {
+    fn test_mm_mask_i32gather_epi32() {
         let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
         // A multiplier of 4 is word-addressing
-        let r = _mm_mask_i32gather_epi32::<4>(
-            _mm_set1_epi32(256),
-            arr.as_ptr(),
-            _mm_setr_epi32(0, 16, 64, 96),
-            _mm_setr_epi32(-1, -1, -1, 0),
-        );
+        let r = unsafe {
+            _mm_mask_i32gather_epi32::<4>(
+                _mm_set1_epi32(256),
+                arr.as_ptr(),
+                _mm_setr_epi32(0, 16, 64, 96),
+                _mm_setr_epi32(-1, -1, -1, 0),
+            )
+        };
         assert_eq_m128i(r, _mm_setr_epi32(0, 16, 64, 256));
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_i32gather_epi32() {
+    fn test_mm256_i32gather_epi32() {
         let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
         // A multiplier of 4 is word-addressing
-        let r =
-            _mm256_i32gather_epi32::<4>(arr.as_ptr(), _mm256_setr_epi32(0, 16, 32, 48, 1, 2, 3, 4));
+        let r = unsafe {
+            _mm256_i32gather_epi32::<4>(arr.as_ptr(), _mm256_setr_epi32(0, 16, 32, 48, 1, 2, 3, 4))
+        };
         assert_eq_m256i(r, _mm256_setr_epi32(0, 16, 32, 48, 1, 2, 3, 4));
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_mask_i32gather_epi32() {
+    fn test_mm256_mask_i32gather_epi32() {
         let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
         // A multiplier of 4 is word-addressing
-        let r = _mm256_mask_i32gather_epi32::<4>(
-            _mm256_set1_epi32(256),
-            arr.as_ptr(),
-            _mm256_setr_epi32(0, 16, 64, 96, 0, 0, 0, 0),
-            _mm256_setr_epi32(-1, -1, -1, 0, 0, 0, 0, 0),
-        );
+        let r = unsafe {
+            _mm256_mask_i32gather_epi32::<4>(
+                _mm256_set1_epi32(256),
+                arr.as_ptr(),
+                _mm256_setr_epi32(0, 16, 64, 96, 0, 0, 0, 0),
+                _mm256_setr_epi32(-1, -1, -1, 0, 0, 0, 0, 0),
+            )
+        };
         assert_eq_m256i(r, _mm256_setr_epi32(0, 16, 64, 256, 256, 256, 256, 256));
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm_i32gather_ps() {
+    fn test_mm_i32gather_ps() {
         let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
         // A multiplier of 4 is word-addressing for f32s
-        let r = _mm_i32gather_ps::<4>(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48));
+        let r = unsafe { _mm_i32gather_ps::<4>(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48)) };
         assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 32.0, 48.0));
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm_mask_i32gather_ps() {
+    fn test_mm_mask_i32gather_ps() {
         let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
         // A multiplier of 4 is word-addressing for f32s
-        let r = _mm_mask_i32gather_ps::<4>(
-            _mm_set1_ps(256.0),
-            arr.as_ptr(),
-            _mm_setr_epi32(0, 16, 64, 96),
-            _mm_setr_ps(-1.0, -1.0, -1.0, 0.0),
-        );
+        let r = unsafe {
+            _mm_mask_i32gather_ps::<4>(
+                _mm_set1_ps(256.0),
+                arr.as_ptr(),
+                _mm_setr_epi32(0, 16, 64, 96),
+                _mm_setr_ps(-1.0, -1.0, -1.0, 0.0),
+            )
+        };
         assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 64.0, 256.0));
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_i32gather_ps() {
+    fn test_mm256_i32gather_ps() {
         let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
         // A multiplier of 4 is word-addressing for f32s
-        let r =
-            _mm256_i32gather_ps::<4>(arr.as_ptr(), _mm256_setr_epi32(0, 16, 32, 48, 1, 2, 3, 4));
+        let r = unsafe {
+            _mm256_i32gather_ps::<4>(arr.as_ptr(), _mm256_setr_epi32(0, 16, 32, 48, 1, 2, 3, 4))
+        };
         assert_eq_m256(r, _mm256_setr_ps(0.0, 16.0, 32.0, 48.0, 1.0, 2.0, 3.0, 4.0));
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_mask_i32gather_ps() {
+    fn test_mm256_mask_i32gather_ps() {
         let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
         // A multiplier of 4 is word-addressing for f32s
-        let r = _mm256_mask_i32gather_ps::<4>(
-            _mm256_set1_ps(256.0),
-            arr.as_ptr(),
-            _mm256_setr_epi32(0, 16, 64, 96, 0, 0, 0, 0),
-            _mm256_setr_ps(-1.0, -1.0, -1.0, 0.0, 0.0, 0.0, 0.0, 0.0),
-        );
+        let r = unsafe {
+            _mm256_mask_i32gather_ps::<4>(
+                _mm256_set1_ps(256.0),
+                arr.as_ptr(),
+                _mm256_setr_epi32(0, 16, 64, 96, 0, 0, 0, 0),
+                _mm256_setr_ps(-1.0, -1.0, -1.0, 0.0, 0.0, 0.0, 0.0, 0.0),
+            )
+        };
         assert_eq_m256(
             r,
             _mm256_setr_ps(0.0, 16.0, 64.0, 256.0, 256.0, 256.0, 256.0, 256.0),
@@ -5476,259 +5737,287 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm_i32gather_epi64() {
+    fn test_mm_i32gather_epi64() {
         let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
         // A multiplier of 8 is word-addressing for i64s
-        let r = _mm_i32gather_epi64::<8>(arr.as_ptr(), _mm_setr_epi32(0, 16, 0, 0));
+        let r = unsafe { _mm_i32gather_epi64::<8>(arr.as_ptr(), _mm_setr_epi32(0, 16, 0, 0)) };
         assert_eq_m128i(r, _mm_setr_epi64x(0, 16));
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm_mask_i32gather_epi64() {
+    fn test_mm_mask_i32gather_epi64() {
         let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
         // A multiplier of 8 is word-addressing for i64s
-        let r = _mm_mask_i32gather_epi64::<8>(
-            _mm_set1_epi64x(256),
-            arr.as_ptr(),
-            _mm_setr_epi32(16, 16, 16, 16),
-            _mm_setr_epi64x(-1, 0),
-        );
+        let r = unsafe {
+            _mm_mask_i32gather_epi64::<8>(
+                _mm_set1_epi64x(256),
+                arr.as_ptr(),
+                _mm_setr_epi32(16, 16, 16, 16),
+                _mm_setr_epi64x(-1, 0),
+            )
+        };
         assert_eq_m128i(r, _mm_setr_epi64x(16, 256));
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_i32gather_epi64() {
+    fn test_mm256_i32gather_epi64() {
         let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
         // A multiplier of 8 is word-addressing for i64s
-        let r = _mm256_i32gather_epi64::<8>(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48));
+        let r = unsafe { _mm256_i32gather_epi64::<8>(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48)) };
         assert_eq_m256i(r, _mm256_setr_epi64x(0, 16, 32, 48));
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_mask_i32gather_epi64() {
+    fn test_mm256_mask_i32gather_epi64() {
         let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
         // A multiplier of 8 is word-addressing for i64s
-        let r = _mm256_mask_i32gather_epi64::<8>(
-            _mm256_set1_epi64x(256),
-            arr.as_ptr(),
-            _mm_setr_epi32(0, 16, 64, 96),
-            _mm256_setr_epi64x(-1, -1, -1, 0),
-        );
+        let r = unsafe {
+            _mm256_mask_i32gather_epi64::<8>(
+                _mm256_set1_epi64x(256),
+                arr.as_ptr(),
+                _mm_setr_epi32(0, 16, 64, 96),
+                _mm256_setr_epi64x(-1, -1, -1, 0),
+            )
+        };
         assert_eq_m256i(r, _mm256_setr_epi64x(0, 16, 64, 256));
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm_i32gather_pd() {
+    fn test_mm_i32gather_pd() {
         let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
         // A multiplier of 8 is word-addressing for f64s
-        let r = _mm_i32gather_pd::<8>(arr.as_ptr(), _mm_setr_epi32(0, 16, 0, 0));
+        let r = unsafe { _mm_i32gather_pd::<8>(arr.as_ptr(), _mm_setr_epi32(0, 16, 0, 0)) };
         assert_eq_m128d(r, _mm_setr_pd(0.0, 16.0));
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm_mask_i32gather_pd() {
+    fn test_mm_mask_i32gather_pd() {
         let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
         // A multiplier of 8 is word-addressing for f64s
-        let r = _mm_mask_i32gather_pd::<8>(
-            _mm_set1_pd(256.0),
-            arr.as_ptr(),
-            _mm_setr_epi32(16, 16, 16, 16),
-            _mm_setr_pd(-1.0, 0.0),
-        );
+        let r = unsafe {
+            _mm_mask_i32gather_pd::<8>(
+                _mm_set1_pd(256.0),
+                arr.as_ptr(),
+                _mm_setr_epi32(16, 16, 16, 16),
+                _mm_setr_pd(-1.0, 0.0),
+            )
+        };
         assert_eq_m128d(r, _mm_setr_pd(16.0, 256.0));
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_i32gather_pd() {
+    fn test_mm256_i32gather_pd() {
         let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
         // A multiplier of 8 is word-addressing for f64s
-        let r = _mm256_i32gather_pd::<8>(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48));
+        let r = unsafe { _mm256_i32gather_pd::<8>(arr.as_ptr(), _mm_setr_epi32(0, 16, 32, 48)) };
         assert_eq_m256d(r, _mm256_setr_pd(0.0, 16.0, 32.0, 48.0));
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_mask_i32gather_pd() {
+    fn test_mm256_mask_i32gather_pd() {
         let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
         // A multiplier of 8 is word-addressing for f64s
-        let r = _mm256_mask_i32gather_pd::<8>(
-            _mm256_set1_pd(256.0),
-            arr.as_ptr(),
-            _mm_setr_epi32(0, 16, 64, 96),
-            _mm256_setr_pd(-1.0, -1.0, -1.0, 0.0),
-        );
+        let r = unsafe {
+            _mm256_mask_i32gather_pd::<8>(
+                _mm256_set1_pd(256.0),
+                arr.as_ptr(),
+                _mm_setr_epi32(0, 16, 64, 96),
+                _mm256_setr_pd(-1.0, -1.0, -1.0, 0.0),
+            )
+        };
         assert_eq_m256d(r, _mm256_setr_pd(0.0, 16.0, 64.0, 256.0));
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm_i64gather_epi32() {
+    fn test_mm_i64gather_epi32() {
         let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
         // A multiplier of 4 is word-addressing
-        let r = _mm_i64gather_epi32::<4>(arr.as_ptr(), _mm_setr_epi64x(0, 16));
+        let r = unsafe { _mm_i64gather_epi32::<4>(arr.as_ptr(), _mm_setr_epi64x(0, 16)) };
         assert_eq_m128i(r, _mm_setr_epi32(0, 16, 0, 0));
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm_mask_i64gather_epi32() {
+    fn test_mm_mask_i64gather_epi32() {
         let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
         // A multiplier of 4 is word-addressing
-        let r = _mm_mask_i64gather_epi32::<4>(
-            _mm_set1_epi32(256),
-            arr.as_ptr(),
-            _mm_setr_epi64x(0, 16),
-            _mm_setr_epi32(-1, 0, -1, 0),
-        );
+        let r = unsafe {
+            _mm_mask_i64gather_epi32::<4>(
+                _mm_set1_epi32(256),
+                arr.as_ptr(),
+                _mm_setr_epi64x(0, 16),
+                _mm_setr_epi32(-1, 0, -1, 0),
+            )
+        };
         assert_eq_m128i(r, _mm_setr_epi32(0, 256, 0, 0));
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_i64gather_epi32() {
+    fn test_mm256_i64gather_epi32() {
         let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
         // A multiplier of 4 is word-addressing
-        let r = _mm256_i64gather_epi32::<4>(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48));
+        let r =
+            unsafe { _mm256_i64gather_epi32::<4>(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48)) };
         assert_eq_m128i(r, _mm_setr_epi32(0, 16, 32, 48));
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_mask_i64gather_epi32() {
+    fn test_mm256_mask_i64gather_epi32() {
         let arr: [i32; 128] = core::array::from_fn(|i| i as i32);
         // A multiplier of 4 is word-addressing
-        let r = _mm256_mask_i64gather_epi32::<4>(
-            _mm_set1_epi32(256),
-            arr.as_ptr(),
-            _mm256_setr_epi64x(0, 16, 64, 96),
-            _mm_setr_epi32(-1, -1, -1, 0),
-        );
+        let r = unsafe {
+            _mm256_mask_i64gather_epi32::<4>(
+                _mm_set1_epi32(256),
+                arr.as_ptr(),
+                _mm256_setr_epi64x(0, 16, 64, 96),
+                _mm_setr_epi32(-1, -1, -1, 0),
+            )
+        };
         assert_eq_m128i(r, _mm_setr_epi32(0, 16, 64, 256));
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm_i64gather_ps() {
+    fn test_mm_i64gather_ps() {
         let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
         // A multiplier of 4 is word-addressing for f32s
-        let r = _mm_i64gather_ps::<4>(arr.as_ptr(), _mm_setr_epi64x(0, 16));
+        let r = unsafe { _mm_i64gather_ps::<4>(arr.as_ptr(), _mm_setr_epi64x(0, 16)) };
         assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 0.0, 0.0));
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm_mask_i64gather_ps() {
+    fn test_mm_mask_i64gather_ps() {
         let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
         // A multiplier of 4 is word-addressing for f32s
-        let r = _mm_mask_i64gather_ps::<4>(
-            _mm_set1_ps(256.0),
-            arr.as_ptr(),
-            _mm_setr_epi64x(0, 16),
-            _mm_setr_ps(-1.0, 0.0, -1.0, 0.0),
-        );
+        let r = unsafe {
+            _mm_mask_i64gather_ps::<4>(
+                _mm_set1_ps(256.0),
+                arr.as_ptr(),
+                _mm_setr_epi64x(0, 16),
+                _mm_setr_ps(-1.0, 0.0, -1.0, 0.0),
+            )
+        };
         assert_eq_m128(r, _mm_setr_ps(0.0, 256.0, 0.0, 0.0));
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_i64gather_ps() {
+    fn test_mm256_i64gather_ps() {
         let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
         // A multiplier of 4 is word-addressing for f32s
-        let r = _mm256_i64gather_ps::<4>(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48));
+        let r =
+            unsafe { _mm256_i64gather_ps::<4>(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48)) };
         assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 32.0, 48.0));
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_mask_i64gather_ps() {
+    fn test_mm256_mask_i64gather_ps() {
         let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
         // A multiplier of 4 is word-addressing for f32s
-        let r = _mm256_mask_i64gather_ps::<4>(
-            _mm_set1_ps(256.0),
-            arr.as_ptr(),
-            _mm256_setr_epi64x(0, 16, 64, 96),
-            _mm_setr_ps(-1.0, -1.0, -1.0, 0.0),
-        );
+        let r = unsafe {
+            _mm256_mask_i64gather_ps::<4>(
+                _mm_set1_ps(256.0),
+                arr.as_ptr(),
+                _mm256_setr_epi64x(0, 16, 64, 96),
+                _mm_setr_ps(-1.0, -1.0, -1.0, 0.0),
+            )
+        };
         assert_eq_m128(r, _mm_setr_ps(0.0, 16.0, 64.0, 256.0));
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm_i64gather_epi64() {
+    fn test_mm_i64gather_epi64() {
         let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
         // A multiplier of 8 is word-addressing for i64s
-        let r = _mm_i64gather_epi64::<8>(arr.as_ptr(), _mm_setr_epi64x(0, 16));
+        let r = unsafe { _mm_i64gather_epi64::<8>(arr.as_ptr(), _mm_setr_epi64x(0, 16)) };
         assert_eq_m128i(r, _mm_setr_epi64x(0, 16));
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm_mask_i64gather_epi64() {
+    fn test_mm_mask_i64gather_epi64() {
         let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
         // A multiplier of 8 is word-addressing for i64s
-        let r = _mm_mask_i64gather_epi64::<8>(
-            _mm_set1_epi64x(256),
-            arr.as_ptr(),
-            _mm_setr_epi64x(16, 16),
-            _mm_setr_epi64x(-1, 0),
-        );
+        let r = unsafe {
+            _mm_mask_i64gather_epi64::<8>(
+                _mm_set1_epi64x(256),
+                arr.as_ptr(),
+                _mm_setr_epi64x(16, 16),
+                _mm_setr_epi64x(-1, 0),
+            )
+        };
         assert_eq_m128i(r, _mm_setr_epi64x(16, 256));
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_i64gather_epi64() {
+    fn test_mm256_i64gather_epi64() {
         let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
         // A multiplier of 8 is word-addressing for i64s
-        let r = _mm256_i64gather_epi64::<8>(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48));
+        let r =
+            unsafe { _mm256_i64gather_epi64::<8>(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48)) };
         assert_eq_m256i(r, _mm256_setr_epi64x(0, 16, 32, 48));
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_mask_i64gather_epi64() {
+    fn test_mm256_mask_i64gather_epi64() {
         let arr: [i64; 128] = core::array::from_fn(|i| i as i64);
         // A multiplier of 8 is word-addressing for i64s
-        let r = _mm256_mask_i64gather_epi64::<8>(
-            _mm256_set1_epi64x(256),
-            arr.as_ptr(),
-            _mm256_setr_epi64x(0, 16, 64, 96),
-            _mm256_setr_epi64x(-1, -1, -1, 0),
-        );
+        let r = unsafe {
+            _mm256_mask_i64gather_epi64::<8>(
+                _mm256_set1_epi64x(256),
+                arr.as_ptr(),
+                _mm256_setr_epi64x(0, 16, 64, 96),
+                _mm256_setr_epi64x(-1, -1, -1, 0),
+            )
+        };
         assert_eq_m256i(r, _mm256_setr_epi64x(0, 16, 64, 256));
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm_i64gather_pd() {
+    fn test_mm_i64gather_pd() {
         let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
         // A multiplier of 8 is word-addressing for f64s
-        let r = _mm_i64gather_pd::<8>(arr.as_ptr(), _mm_setr_epi64x(0, 16));
+        let r = unsafe { _mm_i64gather_pd::<8>(arr.as_ptr(), _mm_setr_epi64x(0, 16)) };
         assert_eq_m128d(r, _mm_setr_pd(0.0, 16.0));
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm_mask_i64gather_pd() {
+    fn test_mm_mask_i64gather_pd() {
         let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
         // A multiplier of 8 is word-addressing for f64s
-        let r = _mm_mask_i64gather_pd::<8>(
-            _mm_set1_pd(256.0),
-            arr.as_ptr(),
-            _mm_setr_epi64x(16, 16),
-            _mm_setr_pd(-1.0, 0.0),
-        );
+        let r = unsafe {
+            _mm_mask_i64gather_pd::<8>(
+                _mm_set1_pd(256.0),
+                arr.as_ptr(),
+                _mm_setr_epi64x(16, 16),
+                _mm_setr_pd(-1.0, 0.0),
+            )
+        };
         assert_eq_m128d(r, _mm_setr_pd(16.0, 256.0));
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_i64gather_pd() {
+    fn test_mm256_i64gather_pd() {
         let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
         // A multiplier of 8 is word-addressing for f64s
-        let r = _mm256_i64gather_pd::<8>(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48));
+        let r =
+            unsafe { _mm256_i64gather_pd::<8>(arr.as_ptr(), _mm256_setr_epi64x(0, 16, 32, 48)) };
         assert_eq_m256d(r, _mm256_setr_pd(0.0, 16.0, 32.0, 48.0));
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_mask_i64gather_pd() {
+    fn test_mm256_mask_i64gather_pd() {
         let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
         // A multiplier of 8 is word-addressing for f64s
-        let r = _mm256_mask_i64gather_pd::<8>(
-            _mm256_set1_pd(256.0),
-            arr.as_ptr(),
-            _mm256_setr_epi64x(0, 16, 64, 96),
-            _mm256_setr_pd(-1.0, -1.0, -1.0, 0.0),
-        );
+        let r = unsafe {
+            _mm256_mask_i64gather_pd::<8>(
+                _mm256_set1_pd(256.0),
+                arr.as_ptr(),
+                _mm256_setr_epi64x(0, 16, 64, 96),
+                _mm256_setr_pd(-1.0, -1.0, -1.0, 0.0),
+            )
+        };
         assert_eq_m256d(r, _mm256_setr_pd(0.0, 16.0, 64.0, 256.0));
     }
 
-    #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_extract_epi8() {
+    #[simd_test(enable = "avx2")]
+    const fn test_mm256_extract_epi8() {
         #[rustfmt::skip]
         let a = _mm256_setr_epi8(
             -1, 1, 2, 3, 4, 5, 6, 7,
@@ -5743,7 +6032,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_extract_epi16() {
+    const fn test_mm256_extract_epi16() {
         #[rustfmt::skip]
         let a = _mm256_setr_epi16(
             -1, 1, 2, 3, 4, 5, 6, 7,
diff --git a/crates/core_arch/src/x86/avx512bf16.rs b/crates/core_arch/src/x86/avx512bf16.rs
index 85afd91fba..8d944f5ba8 100644
--- a/crates/core_arch/src/x86/avx512bf16.rs
+++ b/crates/core_arch/src/x86/avx512bf16.rs
@@ -2,7 +2,6 @@
 //!
 //! [AVX512BF16 intrinsics]: https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=1769&avx512techs=AVX512_BF16
 
-use crate::arch::asm;
 use crate::core_arch::{simd::*, x86::*};
 use crate::intrinsics::simd::*;
 
@@ -17,6 +16,8 @@ unsafe extern "C" {
     fn cvtne2ps2bf16_256(a: f32x8, b: f32x8) -> i16x16;
     #[link_name = "llvm.x86.avx512bf16.cvtne2ps2bf16.512"]
     fn cvtne2ps2bf16_512(a: f32x16, b: f32x16) -> i16x32;
+    #[link_name = "llvm.x86.avx512bf16.mask.cvtneps2bf16.128"]
+    fn cvtneps2bf16_128(a: f32x4, src: i16x8, k: __mmask8) -> i16x8;
     #[link_name = "llvm.x86.avx512bf16.cvtneps2bf16.256"]
     fn cvtneps2bf16_256(a: f32x8) -> i16x8;
     #[link_name = "llvm.x86.avx512bf16.cvtneps2bf16.512"]
@@ -519,16 +520,7 @@ pub fn _mm_cvtsbh_ss(a: bf16) -> f32 {
 #[cfg_attr(test, assert_instr("vcvtneps2bf16"))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_cvtneps_pbh(a: __m128) -> __m128bh {
-    unsafe {
-        let mut dst: __m128bh;
-        asm!(
-            "vcvtneps2bf16 {dst}, {src}",
-            dst = lateout(xmm_reg) dst,
-            src = in(xmm_reg) a,
-            options(pure, nomem, nostack, preserves_flags)
-        );
-        dst
-    }
+    _mm_mask_cvtneps_pbh(__m128bh::splat(0), !0, a)
 }
 
 /// Converts packed single-precision (32-bit) floating-point elements in a to packed BF16 (16-bit)
@@ -541,17 +533,7 @@ pub fn _mm_cvtneps_pbh(a: __m128) -> __m128bh {
 #[cfg_attr(test, assert_instr("vcvtneps2bf16"))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_mask_cvtneps_pbh(src: __m128bh, k: __mmask8, a: __m128) -> __m128bh {
-    unsafe {
-        let mut dst = src;
-        asm!(
-            "vcvtneps2bf16 {dst}{{{k}}},{src}",
-            dst = inlateout(xmm_reg) dst,
-            src = in(xmm_reg) a,
-            k = in(kreg) k,
-            options(pure, nomem, nostack, preserves_flags)
-        );
-        dst
-    }
+    unsafe { cvtneps2bf16_128(a.as_f32x4(), src.as_i16x8(), k).as_m128bh() }
 }
 
 /// Converts packed single-precision (32-bit) floating-point elements in a to packed BF16 (16-bit)
@@ -564,17 +546,7 @@ pub fn _mm_mask_cvtneps_pbh(src: __m128bh, k: __mmask8, a: __m128) -> __m128bh {
 #[cfg_attr(test, assert_instr("vcvtneps2bf16"))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_maskz_cvtneps_pbh(k: __mmask8, a: __m128) -> __m128bh {
-    unsafe {
-        let mut dst: __m128bh;
-        asm!(
-            "vcvtneps2bf16 {dst}{{{k}}}{{z}},{src}",
-            dst = lateout(xmm_reg) dst,
-            src = in(xmm_reg) a,
-            k = in(kreg) k,
-            options(pure, nomem, nostack, preserves_flags)
-        );
-        dst
-    }
+    _mm_mask_cvtneps_pbh(__m128bh::splat(0), k, a)
 }
 
 /// Converts a single-precision (32-bit) floating-point element in a to a BF16 (16-bit) floating-point
@@ -593,7 +565,7 @@ pub fn _mm_cvtness_sbh(a: f32) -> bf16 {
 
 #[cfg(test)]
 mod tests {
-    use crate::core_arch::simd::u16x4;
+    use crate::core_arch::simd::{f32x4, f32x8, f32x16, u16x4, u16x8, u16x16, u16x32};
     use crate::{
         core_arch::x86::*,
         mem::{transmute, transmute_copy},
@@ -601,13 +573,13 @@ mod tests {
     use stdarch_test::simd_test;
 
     #[simd_test(enable = "avx512bf16,avx512vl")]
-    unsafe fn test_mm_cvtne2ps_pbh() {
+    fn test_mm_cvtne2ps_pbh() {
         let a_array = [178.125_f32, 10.5_f32, 3.75_f32, 50.25_f32];
         let b_array = [-178.125_f32, -10.5_f32, -3.75_f32, -50.25_f32];
-        let a: __m128 = transmute(a_array);
-        let b: __m128 = transmute(b_array);
+        let a = f32x4::from_array(a_array).as_m128();
+        let b = f32x4::from_array(b_array).as_m128();
         let c: __m128bh = _mm_cvtne2ps_pbh(a, b);
-        let result: [u16; 8] = transmute(c.as_u16x8());
+        let result = *c.as_u16x8().as_array();
         #[rustfmt::skip]
         let expected_result: [u16; 8] = [
             0b1_10000110_0110010,
@@ -623,7 +595,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bf16,avx512vl")]
-    unsafe fn test_mm_mask_cvtne2ps_pbh() {
+    fn test_mm_mask_cvtne2ps_pbh() {
         let a_array = [178.125_f32, 10.5_f32, 3.75_f32, 50.25_f32];
         let b_array = [-178.125_f32, -10.5_f32, -3.75_f32, -50.25_f32];
         #[rustfmt::skip]
@@ -637,12 +609,12 @@ mod tests {
             0b0_10000000_1110000,
             0b0_10000100_1001001,
         ];
-        let src: __m128bh = transmute(src_array);
-        let a: __m128 = transmute(a_array);
-        let b: __m128 = transmute(b_array);
+        let src = u16x8::from_array(src_array).as_m128bh();
+        let a = f32x4::from_array(a_array).as_m128();
+        let b = f32x4::from_array(b_array).as_m128();
         let k: __mmask8 = 0b1111_1111;
         let c: __m128bh = _mm_mask_cvtne2ps_pbh(src, k, a, b);
-        let result: [u16; 8] = transmute(c.as_u16x8());
+        let result = *c.as_u16x8().as_array();
         #[rustfmt::skip]
         let expected_result: [u16; 8] = [
             0b1_10000110_0110010,
@@ -657,20 +629,20 @@ mod tests {
         assert_eq!(result, expected_result);
         let k = 0b0000_0000;
         let c = _mm_mask_cvtne2ps_pbh(src, k, a, b);
-        let result: [u16; 8] = transmute(c.as_u16x8());
+        let result = *c.as_u16x8().as_array();
         let expected_result = src_array;
         assert_eq!(result, expected_result);
     }
 
     #[simd_test(enable = "avx512bf16,avx512vl")]
-    unsafe fn test_mm_maskz_cvtne2ps_pbh() {
+    fn test_mm_maskz_cvtne2ps_pbh() {
         let a_array = [178.125_f32, 10.5_f32, 3.75_f32, 50.25_f32];
         let b_array = [-178.125_f32, -10.5_f32, -3.75_f32, -50.25_f32];
-        let a: __m128 = transmute(a_array);
-        let b: __m128 = transmute(b_array);
+        let a = f32x4::from_array(a_array).as_m128();
+        let b = f32x4::from_array(b_array).as_m128();
         let k: __mmask8 = 0b1111_1111;
         let c: __m128bh = _mm_maskz_cvtne2ps_pbh(k, a, b);
-        let result: [u16; 8] = transmute(c.as_u16x8());
+        let result = *c.as_u16x8().as_array();
         #[rustfmt::skip]
         let expected_result: [u16; 8] = [
             0b1_10000110_0110010,
@@ -685,7 +657,7 @@ mod tests {
         assert_eq!(result, expected_result);
         let k = 0b0011_1100;
         let c = _mm_maskz_cvtne2ps_pbh(k, a, b);
-        let result: [u16; 8] = transmute(c.as_u16x8());
+        let result = *c.as_u16x8().as_array();
         #[rustfmt::skip]
         let expected_result: [u16; 8] = [
             0,
@@ -701,7 +673,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bf16,avx512vl")]
-    unsafe fn test_mm256_cvtne2ps_pbh() {
+    fn test_mm256_cvtne2ps_pbh() {
         #[rustfmt::skip]
         let a_array = [
             178.125_f32,
@@ -723,10 +695,10 @@ mod tests {
             -1000.158_f32,
             -575.575_f32,
         ];
-        let a: __m256 = transmute(a_array);
-        let b: __m256 = transmute(b_array);
+        let a = f32x8::from_array(a_array).as_m256();
+        let b = f32x8::from_array(b_array).as_m256();
         let c: __m256bh = _mm256_cvtne2ps_pbh(a, b);
-        let result: [u16; 16] = transmute(c.as_u16x16());
+        let result = *c.as_u16x16().as_array();
         #[rustfmt::skip]
         let expected_result: [u16; 16] = [
             0b1_10000110_0110010,
@@ -750,7 +722,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bf16,avx512vl")]
-    unsafe fn test_mm256_mask_cvtne2ps_pbh() {
+    fn test_mm256_mask_cvtne2ps_pbh() {
         #[rustfmt::skip]
         let a_array = [
             178.125_f32,
@@ -790,12 +762,12 @@ mod tests {
             0b0_10000000_1110000,
             0b0_10000100_1001001,
         ];
-        let src: __m256bh = transmute(src_array);
-        let a: __m256 = transmute(a_array);
-        let b: __m256 = transmute(b_array);
+        let src = u16x16::from_array(src_array).as_m256bh();
+        let a = f32x8::from_array(a_array).as_m256();
+        let b = f32x8::from_array(b_array).as_m256();
         let k: __mmask16 = 0xffff;
         let c: __m256bh = _mm256_mask_cvtne2ps_pbh(src, k, a, b);
-        let result: [u16; 16] = transmute(c.as_u16x16());
+        let result = *c.as_u16x16().as_array();
         #[rustfmt::skip]
         let expected_result: [u16; 16] = [
             0b1_10000110_0110010,
@@ -818,13 +790,13 @@ mod tests {
         assert_eq!(result, expected_result);
         let k: __mmask16 = 0;
         let c: __m256bh = _mm256_mask_cvtne2ps_pbh(src, k, a, b);
-        let result: [u16; 16] = transmute(c.as_u16x16());
+        let result = *c.as_u16x16().as_array();
         let expected_result = src_array;
         assert_eq!(result, expected_result);
     }
 
     #[simd_test(enable = "avx512bf16,avx512vl")]
-    unsafe fn test_mm256_maskz_cvtne2ps_pbh() {
+    fn test_mm256_maskz_cvtne2ps_pbh() {
         #[rustfmt::skip]
         let a_array = [
             178.125_f32,
@@ -846,11 +818,11 @@ mod tests {
             -1000.158_f32,
             -575.575_f32,
         ];
-        let a: __m256 = transmute(a_array);
-        let b: __m256 = transmute(b_array);
+        let a = f32x8::from_array(a_array).as_m256();
+        let b = f32x8::from_array(b_array).as_m256();
         let k: __mmask16 = 0xffff;
         let c: __m256bh = _mm256_maskz_cvtne2ps_pbh(k, a, b);
-        let result: [u16; 16] = transmute(c.as_u16x16());
+        let result = *c.as_u16x16().as_array();
         #[rustfmt::skip]
         let expected_result: [u16; 16] = [
             0b1_10000110_0110010,
@@ -873,7 +845,7 @@ mod tests {
         assert_eq!(result, expected_result);
         let k: __mmask16 = 0b0110_1100_0011_0110;
         let c: __m256bh = _mm256_maskz_cvtne2ps_pbh(k, a, b);
-        let result: [u16; 16] = transmute(c.as_u16x16());
+        let result = *c.as_u16x16().as_array();
         #[rustfmt::skip]
         let expected_result: [u16; 16] = [
             0,
@@ -897,7 +869,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bf16,avx512f")]
-    unsafe fn test_mm512_cvtne2ps_pbh() {
+    fn test_mm512_cvtne2ps_pbh() {
         #[rustfmt::skip]
         let a_array = [
             178.125_f32,
@@ -935,10 +907,10 @@ mod tests {
             -1000.158_f32,
             -575.575_f32,
         ];
-        let a: __m512 = transmute(a_array);
-        let b: __m512 = transmute(b_array);
+        let a = f32x16::from_array(a_array).as_m512();
+        let b = f32x16::from_array(b_array).as_m512();
         let c: __m512bh = _mm512_cvtne2ps_pbh(a, b);
-        let result: [u16; 32] = transmute(c.as_u16x32());
+        let result = *c.as_u16x32().as_array();
         #[rustfmt::skip]
         let expected_result: [u16; 32] = [
             0b1_10000110_0110010,
@@ -978,7 +950,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bf16,avx512f")]
-    unsafe fn test_mm512_mask_cvtne2ps_pbh() {
+    fn test_mm512_mask_cvtne2ps_pbh() {
         #[rustfmt::skip]
         let a_array = [
             178.125_f32,
@@ -1050,12 +1022,12 @@ mod tests {
             0b0_10000000_1110000,
             0b0_10000100_1001001,
         ];
-        let src: __m512bh = transmute(src_array);
-        let a: __m512 = transmute(a_array);
-        let b: __m512 = transmute(b_array);
+        let src = u16x32::from_array(src_array).as_m512bh();
+        let a = f32x16::from_array(a_array).as_m512();
+        let b = f32x16::from_array(b_array).as_m512();
         let k: __mmask32 = 0xffffffff;
         let c: __m512bh = _mm512_mask_cvtne2ps_pbh(src, k, a, b);
-        let result: [u16; 32] = transmute(c.as_u16x32());
+        let result = *c.as_u16x32().as_array();
         #[rustfmt::skip]
         let expected_result: [u16; 32] = [
             0b1_10000110_0110010,
@@ -1094,13 +1066,13 @@ mod tests {
         assert_eq!(result, expected_result);
         let k: __mmask32 = 0;
         let c: __m512bh = _mm512_mask_cvtne2ps_pbh(src, k, a, b);
-        let result: [u16; 32] = transmute(c.as_u16x32());
+        let result = *c.as_u16x32().as_array();
         let expected_result = src_array;
         assert_eq!(result, expected_result);
     }
 
     #[simd_test(enable = "avx512bf16,avx512f")]
-    unsafe fn test_mm512_maskz_cvtne2ps_pbh() {
+    fn test_mm512_maskz_cvtne2ps_pbh() {
         #[rustfmt::skip]
         let a_array = [
             178.125_f32,
@@ -1138,11 +1110,11 @@ mod tests {
             -1000.158_f32,
             -575.575_f32,
         ];
-        let a: __m512 = transmute(a_array);
-        let b: __m512 = transmute(b_array);
+        let a = f32x16::from_array(a_array).as_m512();
+        let b = f32x16::from_array(b_array).as_m512();
         let k: __mmask32 = 0xffffffff;
         let c: __m512bh = _mm512_maskz_cvtne2ps_pbh(k, a, b);
-        let result: [u16; 32] = transmute(c.as_u16x32());
+        let result = *c.as_u16x32().as_array();
         #[rustfmt::skip]
         let expected_result: [u16; 32] = [
             0b1_10000110_0110010,
@@ -1181,7 +1153,7 @@ mod tests {
         assert_eq!(result, expected_result);
         let k: __mmask32 = 0b1100_1010_1001_0110_1010_0011_0101_0110;
         let c: __m512bh = _mm512_maskz_cvtne2ps_pbh(k, a, b);
-        let result: [u16; 32] = transmute(c.as_u16x32());
+        let result = *c.as_u16x32().as_array();
         #[rustfmt::skip]
         let expected_result: [u16; 32] = [
             0,
@@ -1221,7 +1193,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bf16,avx512vl")]
-    unsafe fn test_mm256_cvtneps_pbh() {
+    fn test_mm256_cvtneps_pbh() {
         #[rustfmt::skip]
         let a_array = [
             178.125_f32,
@@ -1233,9 +1205,9 @@ mod tests {
             1000.158_f32,
             575.575_f32,
         ];
-        let a: __m256 = transmute(a_array);
+        let a = f32x8::from_array(a_array).as_m256();
         let c: __m128bh = _mm256_cvtneps_pbh(a);
-        let result: [u16; 8] = transmute(c.as_u16x8());
+        let result = *c.as_u16x8().as_array();
         #[rustfmt::skip]
         let expected_result: [u16; 8] = [
             0b0_10000110_0110010,
@@ -1251,7 +1223,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bf16,avx512vl")]
-    unsafe fn test_mm256_mask_cvtneps_pbh() {
+    fn test_mm256_mask_cvtneps_pbh() {
         #[rustfmt::skip]
         let a_array = [
             178.125_f32,
@@ -1273,11 +1245,11 @@ mod tests {
             0b1_10001000_1111010,
             0b1_10001000_0010000,
         ];
-        let src: __m128bh = transmute(src_array);
-        let a: __m256 = transmute(a_array);
+        let src = u16x8::from_array(src_array).as_m128bh();
+        let a = f32x8::from_array(a_array).as_m256();
         let k: __mmask8 = 0xff;
         let b = _mm256_mask_cvtneps_pbh(src, k, a);
-        let result: [u16; 8] = transmute(b.as_u16x8());
+        let result = *b.as_u16x8().as_array();
         #[rustfmt::skip]
         let expected_result: [u16; 8] = [
             0b0_10000110_0110010,
@@ -1292,13 +1264,13 @@ mod tests {
         assert_eq!(result, expected_result);
         let k: __mmask8 = 0x0;
         let b: __m128bh = _mm256_mask_cvtneps_pbh(src, k, a);
-        let result: [u16; 8] = transmute(b.as_u16x8());
+        let result = *b.as_u16x8().as_array();
         let expected_result: [u16; 8] = src_array;
         assert_eq!(result, expected_result);
     }
 
     #[simd_test(enable = "avx512bf16,avx512vl")]
-    unsafe fn test_mm256_maskz_cvtneps_pbh() {
+    fn test_mm256_maskz_cvtneps_pbh() {
         #[rustfmt::skip]
         let a_array = [
             178.125_f32,
@@ -1310,10 +1282,10 @@ mod tests {
             1000.158_f32,
             575.575_f32,
         ];
-        let a: __m256 = transmute(a_array);
+        let a = f32x8::from_array(a_array).as_m256();
         let k: __mmask8 = 0xff;
         let b = _mm256_maskz_cvtneps_pbh(k, a);
-        let result: [u16; 8] = transmute(b.as_u16x8());
+        let result = *b.as_u16x8().as_array();
         #[rustfmt::skip]
         let expected_result: [u16; 8] = [
             0b0_10000110_0110010,
@@ -1328,14 +1300,14 @@ mod tests {
         assert_eq!(result, expected_result);
         let k: __mmask8 = 0x6;
         let b: __m128bh = _mm256_maskz_cvtneps_pbh(k, a);
-        let result: [u16; 8] = transmute(b.as_u16x8());
+        let result = *b.as_u16x8().as_array();
         let expected_result: [u16; 8] =
             [0, 0b0_10000010_0101000, 0b0_10000000_1110000, 0, 0, 0, 0, 0];
         assert_eq!(result, expected_result);
     }
 
     #[simd_test(enable = "avx512bf16,avx512f")]
-    unsafe fn test_mm512_cvtneps_pbh() {
+    fn test_mm512_cvtneps_pbh() {
         #[rustfmt::skip]
         let a_array = [
             178.125_f32,
@@ -1355,9 +1327,9 @@ mod tests {
             1000.158_f32,
             575.575_f32,
         ];
-        let a: __m512 = transmute(a_array);
+        let a = f32x16::from_array(a_array).as_m512();
         let c: __m256bh = _mm512_cvtneps_pbh(a);
-        let result: [u16; 16] = transmute(c.as_u16x16());
+        let result = *c.as_u16x16().as_array();
         #[rustfmt::skip]
         let expected_result: [u16; 16] = [
             0b0_10000110_0110010,
@@ -1381,7 +1353,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bf16,avx512f")]
-    unsafe fn test_mm512_mask_cvtneps_pbh() {
+    fn test_mm512_mask_cvtneps_pbh() {
         #[rustfmt::skip]
         let a_array = [
             178.125_f32,
@@ -1419,11 +1391,11 @@ mod tests {
             0b1_10001000_1111010,
             0b1_10001000_0010000,
         ];
-        let src: __m256bh = transmute(src_array);
-        let a: __m512 = transmute(a_array);
+        let src = u16x16::from_array(src_array).as_m256bh();
+        let a = f32x16::from_array(a_array).as_m512();
         let k: __mmask16 = 0xffff;
         let c: __m256bh = _mm512_mask_cvtneps_pbh(src, k, a);
-        let result: [u16; 16] = transmute(c.as_u16x16());
+        let result = *c.as_u16x16().as_array();
         #[rustfmt::skip]
         let expected_result: [u16; 16] = [
             0b0_10000110_0110010,
@@ -1446,13 +1418,13 @@ mod tests {
         assert_eq!(result, expected_result);
         let k: __mmask16 = 0;
         let c: __m256bh = _mm512_mask_cvtneps_pbh(src, k, a);
-        let result: [u16; 16] = transmute(c.as_u16x16());
+        let result = *c.as_u16x16().as_array();
         let expected_result = src_array;
         assert_eq!(result, expected_result);
     }
 
     #[simd_test(enable = "avx512bf16,avx512f")]
-    unsafe fn test_mm512_maskz_cvtneps_pbh() {
+    fn test_mm512_maskz_cvtneps_pbh() {
         #[rustfmt::skip]
         let a_array = [
             178.125_f32,
@@ -1472,10 +1444,10 @@ mod tests {
             1000.158_f32,
             575.575_f32,
         ];
-        let a: __m512 = transmute(a_array);
+        let a = f32x16::from_array(a_array).as_m512();
         let k: __mmask16 = 0xffff;
         let c: __m256bh = _mm512_maskz_cvtneps_pbh(k, a);
-        let result: [u16; 16] = transmute(c.as_u16x16());
+        let result = *c.as_u16x16().as_array();
         #[rustfmt::skip]
         let expected_result: [u16; 16] = [
             0b0_10000110_0110010,
@@ -1498,7 +1470,7 @@ mod tests {
         assert_eq!(result, expected_result);
         let k: __mmask16 = 0x653a;
         let c: __m256bh = _mm512_maskz_cvtneps_pbh(k, a);
-        let result: [u16; 16] = transmute(c.as_u16x16());
+        let result = *c.as_u16x16().as_array();
         #[rustfmt::skip]
         let expected_result: [u16; 16] = [
             0,
@@ -1522,74 +1494,74 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bf16,avx512vl")]
-    unsafe fn test_mm_dpbf16_ps() {
+    fn test_mm_dpbf16_ps() {
         let a_array = [8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32];
         let b_array = [-1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32];
-        let a1: __m128 = transmute(a_array);
-        let b1: __m128 = transmute(b_array);
-        let src: __m128 = transmute([1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32]);
+        let a1 = f32x4::from_array(a_array).as_m128();
+        let b1 = f32x4::from_array(b_array).as_m128();
+        let src = f32x4::from_array([1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32]).as_m128();
         let a: __m128bh = _mm_cvtne2ps_pbh(a1, a1);
         let b: __m128bh = _mm_cvtne2ps_pbh(b1, b1);
         let c: __m128 = _mm_dpbf16_ps(src, a, b);
-        let result: [f32; 4] = transmute(c.as_f32x4());
+        let result = *c.as_f32x4().as_array();
         let expected_result: [f32; 4] = [-18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32];
         assert_eq!(result, expected_result);
     }
 
     #[simd_test(enable = "avx512bf16,avx512vl")]
-    unsafe fn test_mm_mask_dpbf16_ps() {
+    fn test_mm_mask_dpbf16_ps() {
         let a_array = [8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32];
         let b_array = [-1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32];
-        let a1: __m128 = transmute(a_array);
-        let b1: __m128 = transmute(b_array);
+        let a1 = f32x4::from_array(a_array).as_m128();
+        let b1 = f32x4::from_array(b_array).as_m128();
         let k: __mmask8 = 0xf3;
-        let src: __m128 = transmute([1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32]);
+        let src = f32x4::from_array([1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32]).as_m128();
         let a: __m128bh = _mm_cvtne2ps_pbh(a1, a1);
         let b: __m128bh = _mm_cvtne2ps_pbh(b1, b1);
         let c: __m128 = _mm_mask_dpbf16_ps(src, k, a, b);
-        let result: [f32; 4] = transmute(c.as_f32x4());
+        let result = *c.as_f32x4().as_array();
         let expected_result: [f32; 4] = [-18.0_f32, -52.0_f32, 3.0_f32, 4.0_f32];
         assert_eq!(result, expected_result);
         let k: __mmask8 = 0xff;
         let c: __m128 = _mm_mask_dpbf16_ps(src, k, a, b);
-        let result: [f32; 4] = transmute(c.as_f32x4());
+        let result = *c.as_f32x4().as_array();
         let expected_result: [f32; 4] = [-18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32];
         assert_eq!(result, expected_result);
         let k: __mmask8 = 0;
         let c: __m128 = _mm_mask_dpbf16_ps(src, k, a, b);
-        let result: [f32; 4] = transmute(c.as_f32x4());
+        let result = *c.as_f32x4().as_array();
         let expected_result: [f32; 4] = [1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32];
         assert_eq!(result, expected_result);
     }
 
     #[simd_test(enable = "avx512bf16,avx512vl")]
-    unsafe fn test_mm_maskz_dpbf16_ps() {
+    fn test_mm_maskz_dpbf16_ps() {
         let a_array = [8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32];
         let b_array = [-1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32];
-        let a1: __m128 = transmute(a_array);
-        let b1: __m128 = transmute(b_array);
+        let a1 = f32x4::from_array(a_array).as_m128();
+        let b1 = f32x4::from_array(b_array).as_m128();
         let k: __mmask8 = 0xf3;
-        let src: __m128 = transmute([1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32]);
+        let src = f32x4::from_array([1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32]).as_m128();
         let a: __m128bh = _mm_cvtne2ps_pbh(a1, a1);
         let b: __m128bh = _mm_cvtne2ps_pbh(b1, b1);
         let c: __m128 = _mm_maskz_dpbf16_ps(k, src, a, b);
-        let result: [f32; 4] = transmute(c.as_f32x4());
+        let result = *c.as_f32x4().as_array();
         let expected_result: [f32; 4] = [-18.0_f32, -52.0_f32, 0.0, 0.0];
         assert_eq!(result, expected_result);
         let k: __mmask8 = 0xff;
         let c: __m128 = _mm_maskz_dpbf16_ps(k, src, a, b);
-        let result: [f32; 4] = transmute(c.as_f32x4());
+        let result = *c.as_f32x4().as_array();
         let expected_result: [f32; 4] = [-18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32];
         assert_eq!(result, expected_result);
         let k: __mmask8 = 0;
         let c: __m128 = _mm_maskz_dpbf16_ps(k, src, a, b);
-        let result: [f32; 4] = transmute(c.as_f32x4());
+        let result = *c.as_f32x4().as_array();
         let expected_result: [f32; 4] = [0.0, 0.0, 0.0, 0.0];
         assert_eq!(result, expected_result);
     }
 
     #[simd_test(enable = "avx512bf16,avx512vl")]
-    unsafe fn test_mm256_dpbf16_ps() {
+    fn test_mm256_dpbf16_ps() {
         #[rustfmt::skip]
         let a_array = [
             8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32, 8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32,
@@ -1597,16 +1569,16 @@ mod tests {
         let b_array = [
             -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32,
         ];
-        let a1: __m256 = transmute(a_array);
-        let b1: __m256 = transmute(b_array);
+        let a1 = f32x8::from_array(a_array).as_m256();
+        let b1 = f32x8::from_array(b_array).as_m256();
         #[rustfmt::skip]
-        let src: __m256 = transmute([
+        let src = f32x8::from_array([
             1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32,
-        ]);
+        ]).as_m256();
         let a: __m256bh = _mm256_cvtne2ps_pbh(a1, a1);
         let b: __m256bh = _mm256_cvtne2ps_pbh(b1, b1);
         let c: __m256 = _mm256_dpbf16_ps(src, a, b);
-        let result: [f32; 8] = transmute(c.as_f32x8());
+        let result = *c.as_f32x8().as_array();
         #[rustfmt::skip]
         let expected_result: [f32; 8] = [
             -18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32, -18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32,
@@ -1615,7 +1587,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bf16,avx512vl")]
-    unsafe fn test_mm256_mask_dpbf16_ps() {
+    fn test_mm256_mask_dpbf16_ps() {
         #[rustfmt::skip]
         let a_array = [
             8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32, 8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32,
@@ -1623,17 +1595,17 @@ mod tests {
         let b_array = [
             -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32,
         ];
-        let a1: __m256 = transmute(a_array);
-        let b1: __m256 = transmute(b_array);
+        let a1 = f32x8::from_array(a_array).as_m256();
+        let b1 = f32x8::from_array(b_array).as_m256();
         let k: __mmask8 = 0x33;
         #[rustfmt::skip]
-        let src: __m256 = transmute([
+        let src = f32x8::from_array([
             1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32,
-        ]);
+        ]).as_m256();
         let a: __m256bh = _mm256_cvtne2ps_pbh(a1, a1);
         let b: __m256bh = _mm256_cvtne2ps_pbh(b1, b1);
         let c: __m256 = _mm256_mask_dpbf16_ps(src, k, a, b);
-        let result: [f32; 8] = transmute(c.as_f32x8());
+        let result = *c.as_f32x8().as_array();
         #[rustfmt::skip]
         let expected_result: [f32; 8] = [
             -18.0_f32, -52.0_f32, 3.0_f32, 4.0_f32, -18.0_f32, -52.0_f32, 3.0_f32, 4.0_f32,
@@ -1641,7 +1613,7 @@ mod tests {
         assert_eq!(result, expected_result);
         let k: __mmask8 = 0xff;
         let c: __m256 = _mm256_mask_dpbf16_ps(src, k, a, b);
-        let result: [f32; 8] = transmute(c.as_f32x8());
+        let result = *c.as_f32x8().as_array();
         #[rustfmt::skip]
         let expected_result: [f32; 8] = [
             -18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32, -18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32,
@@ -1649,7 +1621,7 @@ mod tests {
         assert_eq!(result, expected_result);
         let k: __mmask8 = 0;
         let c: __m256 = _mm256_mask_dpbf16_ps(src, k, a, b);
-        let result: [f32; 8] = transmute(c.as_f32x8());
+        let result = *c.as_f32x8().as_array();
         #[rustfmt::skip]
         let expected_result: [f32; 8] = [
             1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32,
@@ -1658,7 +1630,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bf16,avx512vl")]
-    unsafe fn test_mm256_maskz_dpbf16_ps() {
+    fn test_mm256_maskz_dpbf16_ps() {
         #[rustfmt::skip]
         let a_array = [
             8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32, 8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32,
@@ -1666,17 +1638,17 @@ mod tests {
         let b_array = [
             -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32,
         ];
-        let a1: __m256 = transmute(a_array);
-        let b1: __m256 = transmute(b_array);
+        let a1 = f32x8::from_array(a_array).as_m256();
+        let b1 = f32x8::from_array(b_array).as_m256();
         let k: __mmask8 = 0x33;
         #[rustfmt::skip]
-        let src: __m256 = transmute([
+        let src = f32x8::from_array([
             1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32,
-        ]);
+        ]).as_m256();
         let a: __m256bh = _mm256_cvtne2ps_pbh(a1, a1);
         let b: __m256bh = _mm256_cvtne2ps_pbh(b1, b1);
         let c: __m256 = _mm256_maskz_dpbf16_ps(k, src, a, b);
-        let result: [f32; 8] = transmute(c.as_f32x8());
+        let result = *c.as_f32x8().as_array();
         #[rustfmt::skip]
         let expected_result: [f32; 8] = [
             -18.0_f32, -52.0_f32, 0.0, 0.0, -18.0_f32, -52.0_f32, 0.0, 0.0,
@@ -1684,7 +1656,7 @@ mod tests {
         assert_eq!(result, expected_result);
         let k: __mmask8 = 0xff;
         let c: __m256 = _mm256_maskz_dpbf16_ps(k, src, a, b);
-        let result: [f32; 8] = transmute(c.as_f32x8());
+        let result = *c.as_f32x8().as_array();
         #[rustfmt::skip]
         let expected_result: [f32; 8] = [
             -18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32, -18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32,
@@ -1692,13 +1664,13 @@ mod tests {
         assert_eq!(result, expected_result);
         let k: __mmask8 = 0;
         let c: __m256 = _mm256_maskz_dpbf16_ps(k, src, a, b);
-        let result: [f32; 8] = transmute(c.as_f32x8());
+        let result = *c.as_f32x8().as_array();
         let expected_result: [f32; 8] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0];
         assert_eq!(result, expected_result);
     }
 
     #[simd_test(enable = "avx512bf16,avx512f")]
-    unsafe fn test_mm512_dpbf16_ps() {
+    fn test_mm512_dpbf16_ps() {
         #[rustfmt::skip]
         let a_array = [
             8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32, 8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32,
@@ -1708,16 +1680,17 @@ mod tests {
             -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32,
             -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32,
         ];
-        let a1: __m512 = transmute(a_array);
-        let b1: __m512 = transmute(b_array);
-        let src: __m512 = transmute([
+        let a1 = f32x16::from_array(a_array).as_m512();
+        let b1 = f32x16::from_array(b_array).as_m512();
+        let src = f32x16::from_array([
             1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32,
             2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32,
-        ]);
+        ])
+        .as_m512();
         let a: __m512bh = _mm512_cvtne2ps_pbh(a1, a1);
         let b: __m512bh = _mm512_cvtne2ps_pbh(b1, b1);
         let c: __m512 = _mm512_dpbf16_ps(src, a, b);
-        let result: [f32; 16] = transmute(c.as_f32x16());
+        let result = *c.as_f32x16().as_array();
         #[rustfmt::skip]
         let expected_result: [f32; 16] = [
             -18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32, -18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32,
@@ -1727,7 +1700,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bf16,avx512f")]
-    unsafe fn test_mm512_mask_dpbf16_ps() {
+    fn test_mm512_mask_dpbf16_ps() {
         #[rustfmt::skip]
         let a_array = [
             8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32, 8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32,
@@ -1737,18 +1710,18 @@ mod tests {
             -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32,
             -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32,
         ];
-        let a1: __m512 = transmute(a_array);
-        let b1: __m512 = transmute(b_array);
+        let a1 = f32x16::from_array(a_array).as_m512();
+        let b1 = f32x16::from_array(b_array).as_m512();
         let k: __mmask16 = 0x3333;
         #[rustfmt::skip]
-        let src: __m512 = transmute([
+        let src = f32x16::from_array([
             1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32,
             2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32,
-        ]);
+        ]).as_m512();
         let a: __m512bh = _mm512_cvtne2ps_pbh(a1, a1);
         let b: __m512bh = _mm512_cvtne2ps_pbh(b1, b1);
         let c: __m512 = _mm512_mask_dpbf16_ps(src, k, a, b);
-        let result: [f32; 16] = transmute(c.as_f32x16());
+        let result = *c.as_f32x16().as_array();
         #[rustfmt::skip]
         let expected_result: [f32; 16] = [
             -18.0_f32, -52.0_f32, 3.0_f32, 4.0_f32, -18.0_f32, -52.0_f32, 3.0_f32, 4.0_f32,
@@ -1757,7 +1730,7 @@ mod tests {
         assert_eq!(result, expected_result);
         let k: __mmask16 = 0xffff;
         let c: __m512 = _mm512_mask_dpbf16_ps(src, k, a, b);
-        let result: [f32; 16] = transmute(c.as_f32x16());
+        let result = *c.as_f32x16().as_array();
         #[rustfmt::skip]
         let expected_result: [f32; 16] = [
             -18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32, -18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32,
@@ -1766,7 +1739,7 @@ mod tests {
         assert_eq!(result, expected_result);
         let k: __mmask16 = 0;
         let c: __m512 = _mm512_mask_dpbf16_ps(src, k, a, b);
-        let result: [f32; 16] = transmute(c.as_f32x16());
+        let result = *c.as_f32x16().as_array();
         #[rustfmt::skip]
         let expected_result: [f32; 16] = [
             1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32,
@@ -1776,7 +1749,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bf16,avx512f")]
-    unsafe fn test_mm512_maskz_dpbf16_ps() {
+    fn test_mm512_maskz_dpbf16_ps() {
         #[rustfmt::skip]
         let a_array = [
             8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32, 8.5_f32, 10.5_f32, 3.75_f32, 50.25_f32,
@@ -1786,18 +1759,18 @@ mod tests {
             -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32,
             -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32, -1.0_f32,
         ];
-        let a1: __m512 = transmute(a_array);
-        let b1: __m512 = transmute(b_array);
+        let a1 = f32x16::from_array(a_array).as_m512();
+        let b1 = f32x16::from_array(b_array).as_m512();
         let k: __mmask16 = 0x3333;
         #[rustfmt::skip]
-        let src: __m512 = transmute([
+        let src = f32x16::from_array([
             1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32,
             2.0_f32, 3.0_f32, 4.0_f32, 1.0_f32, 2.0_f32, 3.0_f32, 4.0_f32,
-        ]);
+        ]).as_m512();
         let a: __m512bh = _mm512_cvtne2ps_pbh(a1, a1);
         let b: __m512bh = _mm512_cvtne2ps_pbh(b1, b1);
         let c: __m512 = _mm512_maskz_dpbf16_ps(k, src, a, b);
-        let result: [f32; 16] = transmute(c.as_f32x16());
+        let result = *c.as_f32x16().as_array();
         #[rustfmt::skip]
         let expected_result: [f32; 16] = [
             -18.0_f32, -52.0_f32, 0.0, 0.0, -18.0_f32, -52.0_f32, 0.0, 0.0, -18.0_f32, -52.0_f32,
@@ -1806,7 +1779,7 @@ mod tests {
         assert_eq!(result, expected_result);
         let k: __mmask16 = 0xffff;
         let c: __m512 = _mm512_maskz_dpbf16_ps(k, src, a, b);
-        let result: [f32; 16] = transmute(c.as_f32x16());
+        let result = *c.as_f32x16().as_array();
         #[rustfmt::skip]
         let expected_result: [f32; 16] = [
             -18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32, -18.0_f32, -52.0_f32, -16.0_f32, -50.0_f32,
@@ -1815,7 +1788,7 @@ mod tests {
         assert_eq!(result, expected_result);
         let k: __mmask16 = 0;
         let c: __m512 = _mm512_maskz_dpbf16_ps(k, src, a, b);
-        let result: [f32; 16] = transmute(c.as_f32x16());
+        let result = *c.as_f32x16().as_array();
         #[rustfmt::skip]
         let expected_result: [f32; 16] = [
             0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
@@ -1833,7 +1806,7 @@ mod tests {
     const BF16_EIGHT: u16 = 0b0_10000010_0000000;
 
     #[simd_test(enable = "avx512bf16")]
-    unsafe fn test_mm512_cvtpbh_ps() {
+    fn test_mm512_cvtpbh_ps() {
         let a = __m256bh([
             BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT,
             BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT,
@@ -1846,7 +1819,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bf16")]
-    unsafe fn test_mm512_mask_cvtpbh_ps() {
+    fn test_mm512_mask_cvtpbh_ps() {
         let a = __m256bh([
             BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT,
             BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT,
@@ -1863,7 +1836,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bf16")]
-    unsafe fn test_mm512_maskz_cvtpbh_ps() {
+    fn test_mm512_maskz_cvtpbh_ps() {
         let a = __m256bh([
             BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT,
             BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT,
@@ -1877,7 +1850,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bf16,avx512vl")]
-    unsafe fn test_mm256_cvtpbh_ps() {
+    fn test_mm256_cvtpbh_ps() {
         let a = __m128bh([
             BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT,
         ]);
@@ -1887,7 +1860,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bf16,avx512vl")]
-    unsafe fn test_mm256_mask_cvtpbh_ps() {
+    fn test_mm256_mask_cvtpbh_ps() {
         let a = __m128bh([
             BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT,
         ]);
@@ -1899,7 +1872,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bf16,avx512vl")]
-    unsafe fn test_mm256_maskz_cvtpbh_ps() {
+    fn test_mm256_maskz_cvtpbh_ps() {
         let a = __m128bh([
             BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT,
         ]);
@@ -1910,7 +1883,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bf16,avx512vl")]
-    unsafe fn test_mm_cvtpbh_ps() {
+    fn test_mm_cvtpbh_ps() {
         let a = __m128bh([BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, 0, 0, 0, 0]);
         let r = _mm_cvtpbh_ps(a);
         let e = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
@@ -1918,7 +1891,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bf16,avx512vl")]
-    unsafe fn test_mm_mask_cvtpbh_ps() {
+    fn test_mm_mask_cvtpbh_ps() {
         let a = __m128bh([BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, 0, 0, 0, 0]);
         let src = _mm_setr_ps(9., 10., 11., 12.);
         let k = 0b1010;
@@ -1928,7 +1901,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bf16,avx512vl")]
-    unsafe fn test_mm_maskz_cvtpbh_ps() {
+    fn test_mm_maskz_cvtpbh_ps() {
         let a = __m128bh([BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, 0, 0, 0, 0]);
         let k = 0b1010;
         let r = _mm_maskz_cvtpbh_ps(k, a);
@@ -1937,40 +1910,40 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bf16")]
-    unsafe fn test_mm_cvtsbh_ss() {
+    fn test_mm_cvtsbh_ss() {
         let r = _mm_cvtsbh_ss(bf16::from_bits(BF16_ONE));
         assert_eq!(r, 1.);
     }
 
     #[simd_test(enable = "avx512bf16,avx512vl")]
-    unsafe fn test_mm_cvtneps_pbh() {
+    fn test_mm_cvtneps_pbh() {
         let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
-        let r: u16x4 = transmute_copy(&_mm_cvtneps_pbh(a));
+        let r: u16x4 = unsafe { transmute_copy(&_mm_cvtneps_pbh(a)) };
         let e = u16x4::new(BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR);
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "avx512bf16,avx512vl")]
-    unsafe fn test_mm_mask_cvtneps_pbh() {
+    fn test_mm_mask_cvtneps_pbh() {
         let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
         let src = __m128bh([5, 6, 7, 8, !0, !0, !0, !0]);
         let k = 0b1010;
-        let r: u16x4 = transmute_copy(&_mm_mask_cvtneps_pbh(src, k, a));
+        let r: u16x4 = unsafe { transmute_copy(&_mm_mask_cvtneps_pbh(src, k, a)) };
         let e = u16x4::new(5, BF16_TWO, 7, BF16_FOUR);
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "avx512bf16,avx512vl")]
-    unsafe fn test_mm_maskz_cvtneps_pbh() {
+    fn test_mm_maskz_cvtneps_pbh() {
         let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
         let k = 0b1010;
-        let r: u16x4 = transmute_copy(&_mm_maskz_cvtneps_pbh(k, a));
+        let r: u16x4 = unsafe { transmute_copy(&_mm_maskz_cvtneps_pbh(k, a)) };
         let e = u16x4::new(0, BF16_TWO, 0, BF16_FOUR);
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "avx512bf16,avx512vl")]
-    unsafe fn test_mm_cvtness_sbh() {
+    fn test_mm_cvtness_sbh() {
         let r = _mm_cvtness_sbh(1.);
         assert_eq!(r.to_bits(), BF16_ONE);
     }
diff --git a/crates/core_arch/src/x86/avx512bitalg.rs b/crates/core_arch/src/x86/avx512bitalg.rs
index 1cbf0faea0..dd211854af 100644
--- a/crates/core_arch/src/x86/avx512bitalg.rs
+++ b/crates/core_arch/src/x86/avx512bitalg.rs
@@ -5,7 +5,7 @@
 //! The reference is [Intel 64 and IA-32 Architectures Software Developer's
 //! Manual Volume 2: Instruction Set Reference, A-Z][intel64_ref].
 //!
-//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
+//! [intel64_ref]: https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
 
 use crate::core_arch::simd::i8x16;
 use crate::core_arch::simd::i8x32;
@@ -28,12 +28,12 @@ use stdarch_test::assert_instr;
 
 #[allow(improper_ctypes)]
 unsafe extern "C" {
-    #[link_name = "llvm.x86.avx512.mask.vpshufbitqmb.512"]
-    fn bitshuffle_512(data: i8x64, indices: i8x64, mask: __mmask64) -> __mmask64;
-    #[link_name = "llvm.x86.avx512.mask.vpshufbitqmb.256"]
-    fn bitshuffle_256(data: i8x32, indices: i8x32, mask: __mmask32) -> __mmask32;
-    #[link_name = "llvm.x86.avx512.mask.vpshufbitqmb.128"]
-    fn bitshuffle_128(data: i8x16, indices: i8x16, mask: __mmask16) -> __mmask16;
+    #[link_name = "llvm.x86.avx512.vpshufbitqmb.512"]
+    fn bitshuffle_512(data: i8x64, indices: i8x64) -> __mmask64;
+    #[link_name = "llvm.x86.avx512.vpshufbitqmb.256"]
+    fn bitshuffle_256(data: i8x32, indices: i8x32) -> __mmask32;
+    #[link_name = "llvm.x86.avx512.vpshufbitqmb.128"]
+    fn bitshuffle_128(data: i8x16, indices: i8x16) -> __mmask16;
 }
 
 /// For each packed 16-bit integer maps the value to the number of logical 1 bits.
@@ -43,7 +43,8 @@ unsafe extern "C" {
 #[target_feature(enable = "avx512bitalg")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpopcntw))]
-pub fn _mm512_popcnt_epi16(a: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_popcnt_epi16(a: __m512i) -> __m512i {
     unsafe { transmute(simd_ctpop(a.as_i16x32())) }
 }
 
@@ -57,7 +58,8 @@ pub fn _mm512_popcnt_epi16(a: __m512i) -> __m512i {
 #[target_feature(enable = "avx512bitalg")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpopcntw))]
-pub fn _mm512_maskz_popcnt_epi16(k: __mmask32, a: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_popcnt_epi16(k: __mmask32, a: __m512i) -> __m512i {
     unsafe {
         transmute(simd_select_bitmask(
             k,
@@ -77,7 +79,8 @@ pub fn _mm512_maskz_popcnt_epi16(k: __mmask32, a: __m512i) -> __m512i {
 #[target_feature(enable = "avx512bitalg")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpopcntw))]
-pub fn _mm512_mask_popcnt_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_popcnt_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i {
     unsafe {
         transmute(simd_select_bitmask(
             k,
@@ -94,7 +97,8 @@ pub fn _mm512_mask_popcnt_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m51
 #[target_feature(enable = "avx512bitalg,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpopcntw))]
-pub fn _mm256_popcnt_epi16(a: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_popcnt_epi16(a: __m256i) -> __m256i {
     unsafe { transmute(simd_ctpop(a.as_i16x16())) }
 }
 
@@ -108,7 +112,8 @@ pub fn _mm256_popcnt_epi16(a: __m256i) -> __m256i {
 #[target_feature(enable = "avx512bitalg,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpopcntw))]
-pub fn _mm256_maskz_popcnt_epi16(k: __mmask16, a: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_popcnt_epi16(k: __mmask16, a: __m256i) -> __m256i {
     unsafe {
         transmute(simd_select_bitmask(
             k,
@@ -128,7 +133,8 @@ pub fn _mm256_maskz_popcnt_epi16(k: __mmask16, a: __m256i) -> __m256i {
 #[target_feature(enable = "avx512bitalg,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpopcntw))]
-pub fn _mm256_mask_popcnt_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_popcnt_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i {
     unsafe {
         transmute(simd_select_bitmask(
             k,
@@ -145,7 +151,8 @@ pub fn _mm256_mask_popcnt_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m25
 #[target_feature(enable = "avx512bitalg,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpopcntw))]
-pub fn _mm_popcnt_epi16(a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_popcnt_epi16(a: __m128i) -> __m128i {
     unsafe { transmute(simd_ctpop(a.as_i16x8())) }
 }
 
@@ -159,7 +166,8 @@ pub fn _mm_popcnt_epi16(a: __m128i) -> __m128i {
 #[target_feature(enable = "avx512bitalg,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpopcntw))]
-pub fn _mm_maskz_popcnt_epi16(k: __mmask8, a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_popcnt_epi16(k: __mmask8, a: __m128i) -> __m128i {
     unsafe {
         transmute(simd_select_bitmask(
             k,
@@ -179,7 +187,8 @@ pub fn _mm_maskz_popcnt_epi16(k: __mmask8, a: __m128i) -> __m128i {
 #[target_feature(enable = "avx512bitalg,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpopcntw))]
-pub fn _mm_mask_popcnt_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_popcnt_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
     unsafe {
         transmute(simd_select_bitmask(
             k,
@@ -196,7 +205,8 @@ pub fn _mm_mask_popcnt_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
 #[target_feature(enable = "avx512bitalg")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpopcntb))]
-pub fn _mm512_popcnt_epi8(a: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_popcnt_epi8(a: __m512i) -> __m512i {
     unsafe { transmute(simd_ctpop(a.as_i8x64())) }
 }
 
@@ -210,7 +220,8 @@ pub fn _mm512_popcnt_epi8(a: __m512i) -> __m512i {
 #[target_feature(enable = "avx512bitalg")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpopcntb))]
-pub fn _mm512_maskz_popcnt_epi8(k: __mmask64, a: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_popcnt_epi8(k: __mmask64, a: __m512i) -> __m512i {
     unsafe {
         transmute(simd_select_bitmask(
             k,
@@ -230,7 +241,8 @@ pub fn _mm512_maskz_popcnt_epi8(k: __mmask64, a: __m512i) -> __m512i {
 #[target_feature(enable = "avx512bitalg")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpopcntb))]
-pub fn _mm512_mask_popcnt_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_popcnt_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __m512i {
     unsafe {
         transmute(simd_select_bitmask(
             k,
@@ -247,7 +259,8 @@ pub fn _mm512_mask_popcnt_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __m512
 #[target_feature(enable = "avx512bitalg,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpopcntb))]
-pub fn _mm256_popcnt_epi8(a: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_popcnt_epi8(a: __m256i) -> __m256i {
     unsafe { transmute(simd_ctpop(a.as_i8x32())) }
 }
 
@@ -261,7 +274,8 @@ pub fn _mm256_popcnt_epi8(a: __m256i) -> __m256i {
 #[target_feature(enable = "avx512bitalg,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpopcntb))]
-pub fn _mm256_maskz_popcnt_epi8(k: __mmask32, a: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_popcnt_epi8(k: __mmask32, a: __m256i) -> __m256i {
     unsafe {
         transmute(simd_select_bitmask(
             k,
@@ -281,7 +295,8 @@ pub fn _mm256_maskz_popcnt_epi8(k: __mmask32, a: __m256i) -> __m256i {
 #[target_feature(enable = "avx512bitalg,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpopcntb))]
-pub fn _mm256_mask_popcnt_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_popcnt_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __m256i {
     unsafe {
         transmute(simd_select_bitmask(
             k,
@@ -298,7 +313,8 @@ pub fn _mm256_mask_popcnt_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __m256
 #[target_feature(enable = "avx512bitalg,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpopcntb))]
-pub fn _mm_popcnt_epi8(a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_popcnt_epi8(a: __m128i) -> __m128i {
     unsafe { transmute(simd_ctpop(a.as_i8x16())) }
 }
 
@@ -312,7 +328,8 @@ pub fn _mm_popcnt_epi8(a: __m128i) -> __m128i {
 #[target_feature(enable = "avx512bitalg,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpopcntb))]
-pub fn _mm_maskz_popcnt_epi8(k: __mmask16, a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_popcnt_epi8(k: __mmask16, a: __m128i) -> __m128i {
     unsafe {
         transmute(simd_select_bitmask(
             k,
@@ -332,7 +349,8 @@ pub fn _mm_maskz_popcnt_epi8(k: __mmask16, a: __m128i) -> __m128i {
 #[target_feature(enable = "avx512bitalg,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpopcntb))]
-pub fn _mm_mask_popcnt_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_popcnt_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i {
     unsafe {
         transmute(simd_select_bitmask(
             k,
@@ -352,7 +370,7 @@ pub fn _mm_mask_popcnt_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshufbitqmb))]
 pub fn _mm512_bitshuffle_epi64_mask(b: __m512i, c: __m512i) -> __mmask64 {
-    unsafe { bitshuffle_512(b.as_i8x64(), c.as_i8x64(), !0) }
+    unsafe { bitshuffle_512(b.as_i8x64(), c.as_i8x64()) }
 }
 
 /// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers.
@@ -368,7 +386,7 @@ pub fn _mm512_bitshuffle_epi64_mask(b: __m512i, c: __m512i) -> __mmask64 {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshufbitqmb))]
 pub fn _mm512_mask_bitshuffle_epi64_mask(k: __mmask64, b: __m512i, c: __m512i) -> __mmask64 {
-    unsafe { bitshuffle_512(b.as_i8x64(), c.as_i8x64(), k) }
+    _mm512_bitshuffle_epi64_mask(b, c) & k
 }
 
 /// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers.
@@ -381,7 +399,7 @@ pub fn _mm512_mask_bitshuffle_epi64_mask(k: __mmask64, b: __m512i, c: __m512i) -
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshufbitqmb))]
 pub fn _mm256_bitshuffle_epi64_mask(b: __m256i, c: __m256i) -> __mmask32 {
-    unsafe { bitshuffle_256(b.as_i8x32(), c.as_i8x32(), !0) }
+    unsafe { bitshuffle_256(b.as_i8x32(), c.as_i8x32()) }
 }
 
 /// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers.
@@ -397,7 +415,7 @@ pub fn _mm256_bitshuffle_epi64_mask(b: __m256i, c: __m256i) -> __mmask32 {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshufbitqmb))]
 pub fn _mm256_mask_bitshuffle_epi64_mask(k: __mmask32, b: __m256i, c: __m256i) -> __mmask32 {
-    unsafe { bitshuffle_256(b.as_i8x32(), c.as_i8x32(), k) }
+    _mm256_bitshuffle_epi64_mask(b, c) & k
 }
 
 /// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers.
@@ -410,7 +428,7 @@ pub fn _mm256_mask_bitshuffle_epi64_mask(k: __mmask32, b: __m256i, c: __m256i) -
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshufbitqmb))]
 pub fn _mm_bitshuffle_epi64_mask(b: __m128i, c: __m128i) -> __mmask16 {
-    unsafe { bitshuffle_128(b.as_i8x16(), c.as_i8x16(), !0) }
+    unsafe { bitshuffle_128(b.as_i8x16(), c.as_i8x16()) }
 }
 
 /// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers.
@@ -426,7 +444,7 @@ pub fn _mm_bitshuffle_epi64_mask(b: __m128i, c: __m128i) -> __mmask16 {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshufbitqmb))]
 pub fn _mm_mask_bitshuffle_epi64_mask(k: __mmask16, b: __m128i, c: __m128i) -> __mmask16 {
-    unsafe { bitshuffle_128(b.as_i8x16(), c.as_i8x16(), k) }
+    _mm_bitshuffle_epi64_mask(b, c) & k
 }
 
 #[cfg(test)]
@@ -436,12 +454,13 @@ mod tests {
     // __mXXXi happens to be defined in terms of signed integers.
     #![allow(overflowing_literals)]
 
+    use crate::core_arch::assert_eq_const as assert_eq;
     use stdarch_test::simd_test;
 
     use crate::core_arch::x86::*;
 
     #[simd_test(enable = "avx512bitalg,avx512f")]
-    unsafe fn test_mm512_popcnt_epi16() {
+    const fn test_mm512_popcnt_epi16() {
         let test_data = _mm512_set_epi16(
             0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1_FF, 0x3_FF, 0x7_FF, 0xF_FF, 0x1F_FF,
             0x3F_FF, 0x7F_FF, 0xFF_FF, -1, -100, 255, 256, 2, 4, 8, 16, 32, 64, 128, 256, 512,
@@ -456,7 +475,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bitalg,avx512f")]
-    unsafe fn test_mm512_maskz_popcnt_epi16() {
+    const fn test_mm512_maskz_popcnt_epi16() {
         let test_data = _mm512_set_epi16(
             0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1_FF, 0x3_FF, 0x7_FF, 0xF_FF, 0x1F_FF,
             0x3F_FF, 0x7F_FF, 0xFF_FF, -1, -100, 255, 256, 2, 4, 8, 16, 32, 64, 128, 256, 512,
@@ -472,7 +491,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bitalg,avx512f")]
-    unsafe fn test_mm512_mask_popcnt_epi16() {
+    const fn test_mm512_mask_popcnt_epi16() {
         let test_data = _mm512_set_epi16(
             0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1_FF, 0x3_FF, 0x7_FF, 0xF_FF, 0x1F_FF,
             0x3F_FF, 0x7F_FF, 0xFF_FF, -1, -100, 255, 256, 2, 4, 8, 16, 32, 64, 128, 256, 512,
@@ -488,7 +507,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
-    unsafe fn test_mm256_popcnt_epi16() {
+    const fn test_mm256_popcnt_epi16() {
         let test_data = _mm256_set_epi16(
             0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1_FF, 0x3_FF, 0x7_FF, 0xF_FF, 0x1F_FF,
             0x3F_FF, 0x7F_FF,
@@ -500,7 +519,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_popcnt_epi16() {
+    const fn test_mm256_maskz_popcnt_epi16() {
         let test_data = _mm256_set_epi16(
             0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1_FF, 0x3_FF, 0x7_FF, 0xF_FF, 0x1F_FF,
             0x3F_FF, 0x7F_FF,
@@ -512,7 +531,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_popcnt_epi16() {
+    const fn test_mm256_mask_popcnt_epi16() {
         let test_data = _mm256_set_epi16(
             0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1_FF, 0x3_FF, 0x7_FF, 0xF_FF, 0x1F_FF,
             0x3F_FF, 0x7F_FF,
@@ -526,7 +545,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
-    unsafe fn test_mm_popcnt_epi16() {
+    const fn test_mm_popcnt_epi16() {
         let test_data = _mm_set_epi16(0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F);
         let actual_result = _mm_popcnt_epi16(test_data);
         let reference_result = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
@@ -534,7 +553,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_popcnt_epi16() {
+    const fn test_mm_maskz_popcnt_epi16() {
         let test_data = _mm_set_epi16(0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F);
         let mask = 0xF0;
         let actual_result = _mm_maskz_popcnt_epi16(mask, test_data);
@@ -543,7 +562,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
-    unsafe fn test_mm_mask_popcnt_epi16() {
+    const fn test_mm_mask_popcnt_epi16() {
         let test_data = _mm_set_epi16(0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F);
         let mask = 0xF0;
         let actual_result = _mm_mask_popcnt_epi16(test_data, mask, test_data);
@@ -552,7 +571,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bitalg,avx512f")]
-    unsafe fn test_mm512_popcnt_epi8() {
+    const fn test_mm512_popcnt_epi8() {
         let test_data = _mm512_set_epi8(
             0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64, 128, 171, 206, 100,
             217, 109, 253, 190, 177, 254, 179, 215, 230, 68, 201, 172, 183, 154, 84, 56, 227, 189,
@@ -569,7 +588,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bitalg,avx512f")]
-    unsafe fn test_mm512_maskz_popcnt_epi8() {
+    const fn test_mm512_maskz_popcnt_epi8() {
         let test_data = _mm512_set_epi8(
             0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64, 128, 171, 206, 100,
             217, 109, 253, 190, 177, 254, 179, 215, 230, 68, 201, 172, 183, 154, 84, 56, 227, 189,
@@ -587,7 +606,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bitalg,avx512f")]
-    unsafe fn test_mm512_mask_popcnt_epi8() {
+    const fn test_mm512_mask_popcnt_epi8() {
         let test_data = _mm512_set_epi8(
             0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64, 128, 171, 206, 100,
             217, 109, 253, 190, 177, 254, 179, 215, 230, 68, 201, 172, 183, 154, 84, 56, 227, 189,
@@ -605,7 +624,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
-    unsafe fn test_mm256_popcnt_epi8() {
+    const fn test_mm256_popcnt_epi8() {
         let test_data = _mm256_set_epi8(
             0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64, 128, 171, 206, 100,
             217, 109, 253, 190, 177, 254, 179, 215, 230, 68, 201, 172,
@@ -619,7 +638,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_popcnt_epi8() {
+    const fn test_mm256_maskz_popcnt_epi8() {
         let test_data = _mm256_set_epi8(
             0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64, 251, 73, 121, 143,
             145, 85, 91, 137, 90, 225, 21, 249, 211, 155, 228, 70,
@@ -634,7 +653,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_popcnt_epi8() {
+    const fn test_mm256_mask_popcnt_epi8() {
         let test_data = _mm256_set_epi8(
             0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64, 251, 73, 121, 143,
             145, 85, 91, 137, 90, 225, 21, 249, 211, 155, 228, 70,
@@ -649,7 +668,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
-    unsafe fn test_mm_popcnt_epi8() {
+    const fn test_mm_popcnt_epi8() {
         let test_data = _mm_set_epi8(
             0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64,
         );
@@ -659,7 +678,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_popcnt_epi8() {
+    const fn test_mm_maskz_popcnt_epi8() {
         let test_data = _mm_set_epi8(
             0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 90, 225, 21, 249, 211, 155, 228, 70,
         );
@@ -670,7 +689,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
-    unsafe fn test_mm_mask_popcnt_epi8() {
+    const fn test_mm_mask_popcnt_epi8() {
         let test_data = _mm_set_epi8(
             0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 90, 225, 21, 249, 211, 155, 228, 70,
         );
@@ -682,7 +701,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bitalg,avx512f")]
-    unsafe fn test_mm512_bitshuffle_epi64_mask() {
+    fn test_mm512_bitshuffle_epi64_mask() {
         let test_indices = _mm512_set_epi8(
             63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59, 58, 57, 56, 32, 32, 16, 16, 0, 0,
             8, 8, 56, 48, 40, 32, 24, 16, 8, 0, 63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59,
@@ -712,7 +731,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bitalg,avx512f")]
-    unsafe fn test_mm512_mask_bitshuffle_epi64_mask() {
+    fn test_mm512_mask_bitshuffle_epi64_mask() {
         let test_indices = _mm512_set_epi8(
             63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59, 58, 57, 56, 32, 32, 16, 16, 0, 0,
             8, 8, 56, 48, 40, 32, 24, 16, 8, 0, 63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59,
@@ -743,7 +762,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
-    unsafe fn test_mm256_bitshuffle_epi64_mask() {
+    fn test_mm256_bitshuffle_epi64_mask() {
         let test_indices = _mm256_set_epi8(
             63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59, 58, 57, 56, 32, 32, 16, 16, 0, 0,
             8, 8, 56, 48, 40, 32, 24, 16, 8, 0,
@@ -761,7 +780,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_bitshuffle_epi64_mask() {
+    fn test_mm256_mask_bitshuffle_epi64_mask() {
         let test_indices = _mm256_set_epi8(
             63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59, 58, 57, 56, 32, 32, 16, 16, 0, 0,
             8, 8, 56, 48, 40, 32, 24, 16, 8, 0,
@@ -780,7 +799,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
-    unsafe fn test_mm_bitshuffle_epi64_mask() {
+    fn test_mm_bitshuffle_epi64_mask() {
         let test_indices = _mm_set_epi8(
             63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59, 58, 57, 56,
         );
@@ -792,7 +811,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
-    unsafe fn test_mm_mask_bitshuffle_epi64_mask() {
+    fn test_mm_mask_bitshuffle_epi64_mask() {
         let test_indices = _mm_set_epi8(
             63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59, 58, 57, 56,
         );
diff --git a/crates/core_arch/src/x86/avx512bw.rs b/crates/core_arch/src/x86/avx512bw.rs
index fadc0e2cc0..659d6c3be8 100644
--- a/crates/core_arch/src/x86/avx512bw.rs
+++ b/crates/core_arch/src/x86/avx512bw.rs
@@ -14,7 +14,8 @@ use stdarch_test::assert_instr;
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpabsw))]
-pub fn _mm512_abs_epi16(a: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_abs_epi16(a: __m512i) -> __m512i {
     unsafe {
         let a = a.as_i16x32();
         let cmp: i16x32 = simd_gt(a, i16x32::ZERO);
@@ -29,7 +30,8 @@ pub fn _mm512_abs_epi16(a: __m512i) -> __m512i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpabsw))]
-pub fn _mm512_mask_abs_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_abs_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i {
     unsafe {
         let abs = _mm512_abs_epi16(a).as_i16x32();
         transmute(simd_select_bitmask(k, abs, src.as_i16x32()))
@@ -43,7 +45,8 @@ pub fn _mm512_mask_abs_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpabsw))]
-pub fn _mm512_maskz_abs_epi16(k: __mmask32, a: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_abs_epi16(k: __mmask32, a: __m512i) -> __m512i {
     unsafe {
         let abs = _mm512_abs_epi16(a).as_i16x32();
         transmute(simd_select_bitmask(k, abs, i16x32::ZERO))
@@ -57,7 +60,8 @@ pub fn _mm512_maskz_abs_epi16(k: __mmask32, a: __m512i) -> __m512i {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpabsw))]
-pub fn _mm256_mask_abs_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_abs_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i {
     unsafe {
         let abs = _mm256_abs_epi16(a).as_i16x16();
         transmute(simd_select_bitmask(k, abs, src.as_i16x16()))
@@ -71,7 +75,8 @@ pub fn _mm256_mask_abs_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpabsw))]
-pub fn _mm256_maskz_abs_epi16(k: __mmask16, a: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_abs_epi16(k: __mmask16, a: __m256i) -> __m256i {
     unsafe {
         let abs = _mm256_abs_epi16(a).as_i16x16();
         transmute(simd_select_bitmask(k, abs, i16x16::ZERO))
@@ -85,7 +90,8 @@ pub fn _mm256_maskz_abs_epi16(k: __mmask16, a: __m256i) -> __m256i {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpabsw))]
-pub fn _mm_mask_abs_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_abs_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
     unsafe {
         let abs = _mm_abs_epi16(a).as_i16x8();
         transmute(simd_select_bitmask(k, abs, src.as_i16x8()))
@@ -99,7 +105,8 @@ pub fn _mm_mask_abs_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpabsw))]
-pub fn _mm_maskz_abs_epi16(k: __mmask8, a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_abs_epi16(k: __mmask8, a: __m128i) -> __m128i {
     unsafe {
         let abs = _mm_abs_epi16(a).as_i16x8();
         transmute(simd_select_bitmask(k, abs, i16x8::ZERO))
@@ -113,7 +120,8 @@ pub fn _mm_maskz_abs_epi16(k: __mmask8, a: __m128i) -> __m128i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpabsb))]
-pub fn _mm512_abs_epi8(a: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_abs_epi8(a: __m512i) -> __m512i {
     unsafe {
         let a = a.as_i8x64();
         let cmp: i8x64 = simd_gt(a, i8x64::ZERO);
@@ -128,7 +136,8 @@ pub fn _mm512_abs_epi8(a: __m512i) -> __m512i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpabsb))]
-pub fn _mm512_mask_abs_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_abs_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __m512i {
     unsafe {
         let abs = _mm512_abs_epi8(a).as_i8x64();
         transmute(simd_select_bitmask(k, abs, src.as_i8x64()))
@@ -142,7 +151,8 @@ pub fn _mm512_mask_abs_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __m512i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpabsb))]
-pub fn _mm512_maskz_abs_epi8(k: __mmask64, a: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_abs_epi8(k: __mmask64, a: __m512i) -> __m512i {
     unsafe {
         let abs = _mm512_abs_epi8(a).as_i8x64();
         transmute(simd_select_bitmask(k, abs, i8x64::ZERO))
@@ -156,7 +166,8 @@ pub fn _mm512_maskz_abs_epi8(k: __mmask64, a: __m512i) -> __m512i {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpabsb))]
-pub fn _mm256_mask_abs_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_abs_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __m256i {
     unsafe {
         let abs = _mm256_abs_epi8(a).as_i8x32();
         transmute(simd_select_bitmask(k, abs, src.as_i8x32()))
@@ -170,7 +181,8 @@ pub fn _mm256_mask_abs_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __m256i {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpabsb))]
-pub fn _mm256_maskz_abs_epi8(k: __mmask32, a: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_abs_epi8(k: __mmask32, a: __m256i) -> __m256i {
     unsafe {
         let abs = _mm256_abs_epi8(a).as_i8x32();
         transmute(simd_select_bitmask(k, abs, i8x32::ZERO))
@@ -184,7 +196,8 @@ pub fn _mm256_maskz_abs_epi8(k: __mmask32, a: __m256i) -> __m256i {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpabsb))]
-pub fn _mm_mask_abs_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_abs_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i {
     unsafe {
         let abs = _mm_abs_epi8(a).as_i8x16();
         transmute(simd_select_bitmask(k, abs, src.as_i8x16()))
@@ -198,7 +211,8 @@ pub fn _mm_mask_abs_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpabsb))]
-pub fn _mm_maskz_abs_epi8(k: __mmask16, a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_abs_epi8(k: __mmask16, a: __m128i) -> __m128i {
     unsafe {
         let abs = _mm_abs_epi8(a).as_i8x16();
         transmute(simd_select_bitmask(k, abs, i8x16::ZERO))
@@ -212,7 +226,8 @@ pub fn _mm_maskz_abs_epi8(k: __mmask16, a: __m128i) -> __m128i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpaddw))]
-pub fn _mm512_add_epi16(a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_add_epi16(a: __m512i, b: __m512i) -> __m512i {
     unsafe { transmute(simd_add(a.as_i16x32(), b.as_i16x32())) }
 }
 
@@ -223,7 +238,8 @@ pub fn _mm512_add_epi16(a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpaddw))]
-pub fn _mm512_mask_add_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_add_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let add = _mm512_add_epi16(a, b).as_i16x32();
         transmute(simd_select_bitmask(k, add, src.as_i16x32()))
@@ -237,7 +253,8 @@ pub fn _mm512_mask_add_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i)
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpaddw))]
-pub fn _mm512_maskz_add_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_add_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let add = _mm512_add_epi16(a, b).as_i16x32();
         transmute(simd_select_bitmask(k, add, i16x32::ZERO))
@@ -251,7 +268,8 @@ pub fn _mm512_maskz_add_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpaddw))]
-pub fn _mm256_mask_add_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_add_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let add = _mm256_add_epi16(a, b).as_i16x16();
         transmute(simd_select_bitmask(k, add, src.as_i16x16()))
@@ -265,7 +283,8 @@ pub fn _mm256_mask_add_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i)
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpaddw))]
-pub fn _mm256_maskz_add_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_add_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let add = _mm256_add_epi16(a, b).as_i16x16();
         transmute(simd_select_bitmask(k, add, i16x16::ZERO))
@@ -279,7 +298,8 @@ pub fn _mm256_maskz_add_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpaddw))]
-pub fn _mm_mask_add_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_add_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let add = _mm_add_epi16(a, b).as_i16x8();
         transmute(simd_select_bitmask(k, add, src.as_i16x8()))
@@ -293,7 +313,8 @@ pub fn _mm_mask_add_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) ->
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpaddw))]
-pub fn _mm_maskz_add_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_add_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let add = _mm_add_epi16(a, b).as_i16x8();
         transmute(simd_select_bitmask(k, add, i16x8::ZERO))
@@ -307,7 +328,8 @@ pub fn _mm_maskz_add_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpaddb))]
-pub fn _mm512_add_epi8(a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_add_epi8(a: __m512i, b: __m512i) -> __m512i {
     unsafe { transmute(simd_add(a.as_i8x64(), b.as_i8x64())) }
 }
 
@@ -318,7 +340,8 @@ pub fn _mm512_add_epi8(a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpaddb))]
-pub fn _mm512_mask_add_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_add_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let add = _mm512_add_epi8(a, b).as_i8x64();
         transmute(simd_select_bitmask(k, add, src.as_i8x64()))
@@ -332,7 +355,8 @@ pub fn _mm512_mask_add_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i)
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpaddb))]
-pub fn _mm512_maskz_add_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_add_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let add = _mm512_add_epi8(a, b).as_i8x64();
         transmute(simd_select_bitmask(k, add, i8x64::ZERO))
@@ -346,7 +370,8 @@ pub fn _mm512_maskz_add_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpaddb))]
-pub fn _mm256_mask_add_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_add_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let add = _mm256_add_epi8(a, b).as_i8x32();
         transmute(simd_select_bitmask(k, add, src.as_i8x32()))
@@ -360,7 +385,8 @@ pub fn _mm256_mask_add_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i)
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpaddb))]
-pub fn _mm256_maskz_add_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_add_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let add = _mm256_add_epi8(a, b).as_i8x32();
         transmute(simd_select_bitmask(k, add, i8x32::ZERO))
@@ -374,7 +400,8 @@ pub fn _mm256_maskz_add_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpaddb))]
-pub fn _mm_mask_add_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_add_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let add = _mm_add_epi8(a, b).as_i8x16();
         transmute(simd_select_bitmask(k, add, src.as_i8x16()))
@@ -388,7 +415,8 @@ pub fn _mm_mask_add_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) ->
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpaddb))]
-pub fn _mm_maskz_add_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_add_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let add = _mm_add_epi8(a, b).as_i8x16();
         transmute(simd_select_bitmask(k, add, i8x16::ZERO))
@@ -402,7 +430,8 @@ pub fn _mm_maskz_add_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpaddusw))]
-pub fn _mm512_adds_epu16(a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_adds_epu16(a: __m512i, b: __m512i) -> __m512i {
     unsafe { transmute(simd_saturating_add(a.as_u16x32(), b.as_u16x32())) }
 }
 
@@ -413,7 +442,8 @@ pub fn _mm512_adds_epu16(a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpaddusw))]
-pub fn _mm512_mask_adds_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_adds_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let add = _mm512_adds_epu16(a, b).as_u16x32();
         transmute(simd_select_bitmask(k, add, src.as_u16x32()))
@@ -427,7 +457,8 @@ pub fn _mm512_mask_adds_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpaddusw))]
-pub fn _mm512_maskz_adds_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_adds_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let add = _mm512_adds_epu16(a, b).as_u16x32();
         transmute(simd_select_bitmask(k, add, u16x32::ZERO))
@@ -441,7 +472,8 @@ pub fn _mm512_maskz_adds_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpaddusw))]
-pub fn _mm256_mask_adds_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_adds_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let add = _mm256_adds_epu16(a, b).as_u16x16();
         transmute(simd_select_bitmask(k, add, src.as_u16x16()))
@@ -455,7 +487,8 @@ pub fn _mm256_mask_adds_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpaddusw))]
-pub fn _mm256_maskz_adds_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_adds_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let add = _mm256_adds_epu16(a, b).as_u16x16();
         transmute(simd_select_bitmask(k, add, u16x16::ZERO))
@@ -469,7 +502,8 @@ pub fn _mm256_maskz_adds_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpaddusw))]
-pub fn _mm_mask_adds_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_adds_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let add = _mm_adds_epu16(a, b).as_u16x8();
         transmute(simd_select_bitmask(k, add, src.as_u16x8()))
@@ -483,7 +517,8 @@ pub fn _mm_mask_adds_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) ->
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpaddusw))]
-pub fn _mm_maskz_adds_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_adds_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let add = _mm_adds_epu16(a, b).as_u16x8();
         transmute(simd_select_bitmask(k, add, u16x8::ZERO))
@@ -497,7 +532,8 @@ pub fn _mm_maskz_adds_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpaddusb))]
-pub fn _mm512_adds_epu8(a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_adds_epu8(a: __m512i, b: __m512i) -> __m512i {
     unsafe { transmute(simd_saturating_add(a.as_u8x64(), b.as_u8x64())) }
 }
 
@@ -508,7 +544,8 @@ pub fn _mm512_adds_epu8(a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpaddusb))]
-pub fn _mm512_mask_adds_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_adds_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let add = _mm512_adds_epu8(a, b).as_u8x64();
         transmute(simd_select_bitmask(k, add, src.as_u8x64()))
@@ -522,7 +559,8 @@ pub fn _mm512_mask_adds_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i)
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpaddusb))]
-pub fn _mm512_maskz_adds_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_adds_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let add = _mm512_adds_epu8(a, b).as_u8x64();
         transmute(simd_select_bitmask(k, add, u8x64::ZERO))
@@ -536,7 +574,8 @@ pub fn _mm512_maskz_adds_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpaddusb))]
-pub fn _mm256_mask_adds_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_adds_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let add = _mm256_adds_epu8(a, b).as_u8x32();
         transmute(simd_select_bitmask(k, add, src.as_u8x32()))
@@ -550,7 +589,8 @@ pub fn _mm256_mask_adds_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i)
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpaddusb))]
-pub fn _mm256_maskz_adds_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_adds_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let add = _mm256_adds_epu8(a, b).as_u8x32();
         transmute(simd_select_bitmask(k, add, u8x32::ZERO))
@@ -564,7 +604,8 @@ pub fn _mm256_maskz_adds_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpaddusb))]
-pub fn _mm_mask_adds_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_adds_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let add = _mm_adds_epu8(a, b).as_u8x16();
         transmute(simd_select_bitmask(k, add, src.as_u8x16()))
@@ -578,7 +619,8 @@ pub fn _mm_mask_adds_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) ->
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpaddusb))]
-pub fn _mm_maskz_adds_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_adds_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let add = _mm_adds_epu8(a, b).as_u8x16();
         transmute(simd_select_bitmask(k, add, u8x16::ZERO))
@@ -592,7 +634,8 @@ pub fn _mm_maskz_adds_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpaddsw))]
-pub fn _mm512_adds_epi16(a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_adds_epi16(a: __m512i, b: __m512i) -> __m512i {
     unsafe { transmute(simd_saturating_add(a.as_i16x32(), b.as_i16x32())) }
 }
 
@@ -603,7 +646,8 @@ pub fn _mm512_adds_epi16(a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpaddsw))]
-pub fn _mm512_mask_adds_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_adds_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let add = _mm512_adds_epi16(a, b).as_i16x32();
         transmute(simd_select_bitmask(k, add, src.as_i16x32()))
@@ -617,7 +661,8 @@ pub fn _mm512_mask_adds_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpaddsw))]
-pub fn _mm512_maskz_adds_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_adds_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let add = _mm512_adds_epi16(a, b).as_i16x32();
         transmute(simd_select_bitmask(k, add, i16x32::ZERO))
@@ -631,7 +676,8 @@ pub fn _mm512_maskz_adds_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpaddsw))]
-pub fn _mm256_mask_adds_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_adds_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let add = _mm256_adds_epi16(a, b).as_i16x16();
         transmute(simd_select_bitmask(k, add, src.as_i16x16()))
@@ -645,7 +691,8 @@ pub fn _mm256_mask_adds_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpaddsw))]
-pub fn _mm256_maskz_adds_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_adds_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let add = _mm256_adds_epi16(a, b).as_i16x16();
         transmute(simd_select_bitmask(k, add, i16x16::ZERO))
@@ -659,7 +706,8 @@ pub fn _mm256_maskz_adds_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpaddsw))]
-pub fn _mm_mask_adds_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_adds_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let add = _mm_adds_epi16(a, b).as_i16x8();
         transmute(simd_select_bitmask(k, add, src.as_i16x8()))
@@ -673,7 +721,8 @@ pub fn _mm_mask_adds_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) ->
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpaddsw))]
-pub fn _mm_maskz_adds_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_adds_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let add = _mm_adds_epi16(a, b).as_i16x8();
         transmute(simd_select_bitmask(k, add, i16x8::ZERO))
@@ -687,7 +736,8 @@ pub fn _mm_maskz_adds_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpaddsb))]
-pub fn _mm512_adds_epi8(a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_adds_epi8(a: __m512i, b: __m512i) -> __m512i {
     unsafe { transmute(simd_saturating_add(a.as_i8x64(), b.as_i8x64())) }
 }
 
@@ -698,7 +748,8 @@ pub fn _mm512_adds_epi8(a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpaddsb))]
-pub fn _mm512_mask_adds_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_adds_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let add = _mm512_adds_epi8(a, b).as_i8x64();
         transmute(simd_select_bitmask(k, add, src.as_i8x64()))
@@ -712,7 +763,8 @@ pub fn _mm512_mask_adds_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i)
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpaddsb))]
-pub fn _mm512_maskz_adds_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_adds_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let add = _mm512_adds_epi8(a, b).as_i8x64();
         transmute(simd_select_bitmask(k, add, i8x64::ZERO))
@@ -726,7 +778,8 @@ pub fn _mm512_maskz_adds_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpaddsb))]
-pub fn _mm256_mask_adds_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_adds_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let add = _mm256_adds_epi8(a, b).as_i8x32();
         transmute(simd_select_bitmask(k, add, src.as_i8x32()))
@@ -740,7 +793,8 @@ pub fn _mm256_mask_adds_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i)
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpaddsb))]
-pub fn _mm256_maskz_adds_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_adds_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let add = _mm256_adds_epi8(a, b).as_i8x32();
         transmute(simd_select_bitmask(k, add, i8x32::ZERO))
@@ -754,7 +808,8 @@ pub fn _mm256_maskz_adds_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpaddsb))]
-pub fn _mm_mask_adds_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_adds_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let add = _mm_adds_epi8(a, b).as_i8x16();
         transmute(simd_select_bitmask(k, add, src.as_i8x16()))
@@ -768,7 +823,8 @@ pub fn _mm_mask_adds_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) ->
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpaddsb))]
-pub fn _mm_maskz_adds_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_adds_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let add = _mm_adds_epi8(a, b).as_i8x16();
         transmute(simd_select_bitmask(k, add, i8x16::ZERO))
@@ -782,7 +838,8 @@ pub fn _mm_maskz_adds_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsubw))]
-pub fn _mm512_sub_epi16(a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_sub_epi16(a: __m512i, b: __m512i) -> __m512i {
     unsafe { transmute(simd_sub(a.as_i16x32(), b.as_i16x32())) }
 }
 
@@ -793,7 +850,8 @@ pub fn _mm512_sub_epi16(a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsubw))]
-pub fn _mm512_mask_sub_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_sub_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let sub = _mm512_sub_epi16(a, b).as_i16x32();
         transmute(simd_select_bitmask(k, sub, src.as_i16x32()))
@@ -807,7 +865,8 @@ pub fn _mm512_mask_sub_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i)
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsubw))]
-pub fn _mm512_maskz_sub_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_sub_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let sub = _mm512_sub_epi16(a, b).as_i16x32();
         transmute(simd_select_bitmask(k, sub, i16x32::ZERO))
@@ -821,7 +880,8 @@ pub fn _mm512_maskz_sub_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsubw))]
-pub fn _mm256_mask_sub_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_sub_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let sub = _mm256_sub_epi16(a, b).as_i16x16();
         transmute(simd_select_bitmask(k, sub, src.as_i16x16()))
@@ -835,7 +895,8 @@ pub fn _mm256_mask_sub_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i)
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsubw))]
-pub fn _mm256_maskz_sub_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_sub_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let sub = _mm256_sub_epi16(a, b).as_i16x16();
         transmute(simd_select_bitmask(k, sub, i16x16::ZERO))
@@ -849,7 +910,8 @@ pub fn _mm256_maskz_sub_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsubw))]
-pub fn _mm_mask_sub_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_sub_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let sub = _mm_sub_epi16(a, b).as_i16x8();
         transmute(simd_select_bitmask(k, sub, src.as_i16x8()))
@@ -863,7 +925,8 @@ pub fn _mm_mask_sub_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) ->
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsubw))]
-pub fn _mm_maskz_sub_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_sub_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let sub = _mm_sub_epi16(a, b).as_i16x8();
         transmute(simd_select_bitmask(k, sub, i16x8::ZERO))
@@ -877,7 +940,8 @@ pub fn _mm_maskz_sub_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsubb))]
-pub fn _mm512_sub_epi8(a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_sub_epi8(a: __m512i, b: __m512i) -> __m512i {
     unsafe { transmute(simd_sub(a.as_i8x64(), b.as_i8x64())) }
 }
 
@@ -888,7 +952,8 @@ pub fn _mm512_sub_epi8(a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsubb))]
-pub fn _mm512_mask_sub_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_sub_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let sub = _mm512_sub_epi8(a, b).as_i8x64();
         transmute(simd_select_bitmask(k, sub, src.as_i8x64()))
@@ -902,7 +967,8 @@ pub fn _mm512_mask_sub_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i)
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsubb))]
-pub fn _mm512_maskz_sub_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_sub_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let sub = _mm512_sub_epi8(a, b).as_i8x64();
         transmute(simd_select_bitmask(k, sub, i8x64::ZERO))
@@ -916,7 +982,8 @@ pub fn _mm512_maskz_sub_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsubb))]
-pub fn _mm256_mask_sub_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_sub_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let sub = _mm256_sub_epi8(a, b).as_i8x32();
         transmute(simd_select_bitmask(k, sub, src.as_i8x32()))
@@ -930,7 +997,8 @@ pub fn _mm256_mask_sub_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i)
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsubb))]
-pub fn _mm256_maskz_sub_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_sub_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let sub = _mm256_sub_epi8(a, b).as_i8x32();
         transmute(simd_select_bitmask(k, sub, i8x32::ZERO))
@@ -944,7 +1012,8 @@ pub fn _mm256_maskz_sub_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsubb))]
-pub fn _mm_mask_sub_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_sub_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let sub = _mm_sub_epi8(a, b).as_i8x16();
         transmute(simd_select_bitmask(k, sub, src.as_i8x16()))
@@ -958,7 +1027,8 @@ pub fn _mm_mask_sub_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) ->
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsubb))]
-pub fn _mm_maskz_sub_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_sub_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let sub = _mm_sub_epi8(a, b).as_i8x16();
         transmute(simd_select_bitmask(k, sub, i8x16::ZERO))
@@ -972,7 +1042,8 @@ pub fn _mm_maskz_sub_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsubusw))]
-pub fn _mm512_subs_epu16(a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_subs_epu16(a: __m512i, b: __m512i) -> __m512i {
     unsafe { transmute(simd_saturating_sub(a.as_u16x32(), b.as_u16x32())) }
 }
 
@@ -983,7 +1054,8 @@ pub fn _mm512_subs_epu16(a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsubusw))]
-pub fn _mm512_mask_subs_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_subs_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let sub = _mm512_subs_epu16(a, b).as_u16x32();
         transmute(simd_select_bitmask(k, sub, src.as_u16x32()))
@@ -997,7 +1069,8 @@ pub fn _mm512_mask_subs_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsubusw))]
-pub fn _mm512_maskz_subs_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_subs_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let sub = _mm512_subs_epu16(a, b).as_u16x32();
         transmute(simd_select_bitmask(k, sub, u16x32::ZERO))
@@ -1011,7 +1084,8 @@ pub fn _mm512_maskz_subs_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsubusw))]
-pub fn _mm256_mask_subs_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_subs_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let sub = _mm256_subs_epu16(a, b).as_u16x16();
         transmute(simd_select_bitmask(k, sub, src.as_u16x16()))
@@ -1025,7 +1099,8 @@ pub fn _mm256_mask_subs_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsubusw))]
-pub fn _mm256_maskz_subs_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_subs_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let sub = _mm256_subs_epu16(a, b).as_u16x16();
         transmute(simd_select_bitmask(k, sub, u16x16::ZERO))
@@ -1039,7 +1114,8 @@ pub fn _mm256_maskz_subs_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsubusw))]
-pub fn _mm_mask_subs_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_subs_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let sub = _mm_subs_epu16(a, b).as_u16x8();
         transmute(simd_select_bitmask(k, sub, src.as_u16x8()))
@@ -1053,7 +1129,8 @@ pub fn _mm_mask_subs_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) ->
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsubusw))]
-pub fn _mm_maskz_subs_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_subs_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let sub = _mm_subs_epu16(a, b).as_u16x8();
         transmute(simd_select_bitmask(k, sub, u16x8::ZERO))
@@ -1067,7 +1144,8 @@ pub fn _mm_maskz_subs_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsubusb))]
-pub fn _mm512_subs_epu8(a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_subs_epu8(a: __m512i, b: __m512i) -> __m512i {
     unsafe { transmute(simd_saturating_sub(a.as_u8x64(), b.as_u8x64())) }
 }
 
@@ -1078,7 +1156,8 @@ pub fn _mm512_subs_epu8(a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsubusb))]
-pub fn _mm512_mask_subs_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_subs_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let sub = _mm512_subs_epu8(a, b).as_u8x64();
         transmute(simd_select_bitmask(k, sub, src.as_u8x64()))
@@ -1092,7 +1171,8 @@ pub fn _mm512_mask_subs_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i)
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsubusb))]
-pub fn _mm512_maskz_subs_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_subs_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let sub = _mm512_subs_epu8(a, b).as_u8x64();
         transmute(simd_select_bitmask(k, sub, u8x64::ZERO))
@@ -1106,7 +1186,8 @@ pub fn _mm512_maskz_subs_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsubusb))]
-pub fn _mm256_mask_subs_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_subs_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let sub = _mm256_subs_epu8(a, b).as_u8x32();
         transmute(simd_select_bitmask(k, sub, src.as_u8x32()))
@@ -1120,7 +1201,8 @@ pub fn _mm256_mask_subs_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i)
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsubusb))]
-pub fn _mm256_maskz_subs_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_subs_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let sub = _mm256_subs_epu8(a, b).as_u8x32();
         transmute(simd_select_bitmask(k, sub, u8x32::ZERO))
@@ -1134,7 +1216,8 @@ pub fn _mm256_maskz_subs_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsubusb))]
-pub fn _mm_mask_subs_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_subs_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let sub = _mm_subs_epu8(a, b).as_u8x16();
         transmute(simd_select_bitmask(k, sub, src.as_u8x16()))
@@ -1148,7 +1231,8 @@ pub fn _mm_mask_subs_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) ->
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsubusb))]
-pub fn _mm_maskz_subs_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_subs_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let sub = _mm_subs_epu8(a, b).as_u8x16();
         transmute(simd_select_bitmask(k, sub, u8x16::ZERO))
@@ -1162,7 +1246,8 @@ pub fn _mm_maskz_subs_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsubsw))]
-pub fn _mm512_subs_epi16(a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_subs_epi16(a: __m512i, b: __m512i) -> __m512i {
     unsafe { transmute(simd_saturating_sub(a.as_i16x32(), b.as_i16x32())) }
 }
 
@@ -1173,7 +1258,8 @@ pub fn _mm512_subs_epi16(a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsubsw))]
-pub fn _mm512_mask_subs_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_subs_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let sub = _mm512_subs_epi16(a, b).as_i16x32();
         transmute(simd_select_bitmask(k, sub, src.as_i16x32()))
@@ -1187,7 +1273,8 @@ pub fn _mm512_mask_subs_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsubsw))]
-pub fn _mm512_maskz_subs_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_subs_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let sub = _mm512_subs_epi16(a, b).as_i16x32();
         transmute(simd_select_bitmask(k, sub, i16x32::ZERO))
@@ -1201,7 +1288,8 @@ pub fn _mm512_maskz_subs_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsubsw))]
-pub fn _mm256_mask_subs_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_subs_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let sub = _mm256_subs_epi16(a, b).as_i16x16();
         transmute(simd_select_bitmask(k, sub, src.as_i16x16()))
@@ -1215,7 +1303,8 @@ pub fn _mm256_mask_subs_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsubsw))]
-pub fn _mm256_maskz_subs_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_subs_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let sub = _mm256_subs_epi16(a, b).as_i16x16();
         transmute(simd_select_bitmask(k, sub, i16x16::ZERO))
@@ -1229,7 +1318,8 @@ pub fn _mm256_maskz_subs_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsubsw))]
-pub fn _mm_mask_subs_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_subs_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let sub = _mm_subs_epi16(a, b).as_i16x8();
         transmute(simd_select_bitmask(k, sub, src.as_i16x8()))
@@ -1243,7 +1333,8 @@ pub fn _mm_mask_subs_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) ->
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsubsw))]
-pub fn _mm_maskz_subs_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_subs_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let sub = _mm_subs_epi16(a, b).as_i16x8();
         transmute(simd_select_bitmask(k, sub, i16x8::ZERO))
@@ -1257,7 +1348,8 @@ pub fn _mm_maskz_subs_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsubsb))]
-pub fn _mm512_subs_epi8(a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_subs_epi8(a: __m512i, b: __m512i) -> __m512i {
     unsafe { transmute(simd_saturating_sub(a.as_i8x64(), b.as_i8x64())) }
 }
 
@@ -1268,7 +1360,8 @@ pub fn _mm512_subs_epi8(a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsubsb))]
-pub fn _mm512_mask_subs_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_subs_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let sub = _mm512_subs_epi8(a, b).as_i8x64();
         transmute(simd_select_bitmask(k, sub, src.as_i8x64()))
@@ -1282,7 +1375,8 @@ pub fn _mm512_mask_subs_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i)
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsubsb))]
-pub fn _mm512_maskz_subs_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_subs_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let sub = _mm512_subs_epi8(a, b).as_i8x64();
         transmute(simd_select_bitmask(k, sub, i8x64::ZERO))
@@ -1296,7 +1390,8 @@ pub fn _mm512_maskz_subs_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsubsb))]
-pub fn _mm256_mask_subs_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_subs_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let sub = _mm256_subs_epi8(a, b).as_i8x32();
         transmute(simd_select_bitmask(k, sub, src.as_i8x32()))
@@ -1310,7 +1405,8 @@ pub fn _mm256_mask_subs_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i)
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsubsb))]
-pub fn _mm256_maskz_subs_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_subs_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let sub = _mm256_subs_epi8(a, b).as_i8x32();
         transmute(simd_select_bitmask(k, sub, i8x32::ZERO))
@@ -1324,7 +1420,8 @@ pub fn _mm256_maskz_subs_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsubsb))]
-pub fn _mm_mask_subs_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_subs_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let sub = _mm_subs_epi8(a, b).as_i8x16();
         transmute(simd_select_bitmask(k, sub, src.as_i8x16()))
@@ -1338,7 +1435,8 @@ pub fn _mm_mask_subs_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) ->
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsubsb))]
-pub fn _mm_maskz_subs_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_subs_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let sub = _mm_subs_epi8(a, b).as_i8x16();
         transmute(simd_select_bitmask(k, sub, i8x16::ZERO))
@@ -1352,7 +1450,8 @@ pub fn _mm_maskz_subs_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmulhuw))]
-pub fn _mm512_mulhi_epu16(a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mulhi_epu16(a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let a = simd_cast::<_, u32x32>(a.as_u16x32());
         let b = simd_cast::<_, u32x32>(b.as_u16x32());
@@ -1368,7 +1467,13 @@ pub fn _mm512_mulhi_epu16(a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmulhuw))]
-pub fn _mm512_mask_mulhi_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_mulhi_epu16(
+    src: __m512i,
+    k: __mmask32,
+    a: __m512i,
+    b: __m512i,
+) -> __m512i {
     unsafe {
         let mul = _mm512_mulhi_epu16(a, b).as_u16x32();
         transmute(simd_select_bitmask(k, mul, src.as_u16x32()))
@@ -1382,7 +1487,8 @@ pub fn _mm512_mask_mulhi_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmulhuw))]
-pub fn _mm512_maskz_mulhi_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_mulhi_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let mul = _mm512_mulhi_epu16(a, b).as_u16x32();
         transmute(simd_select_bitmask(k, mul, u16x32::ZERO))
@@ -1396,7 +1502,13 @@ pub fn _mm512_maskz_mulhi_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmulhuw))]
-pub fn _mm256_mask_mulhi_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_mulhi_epu16(
+    src: __m256i,
+    k: __mmask16,
+    a: __m256i,
+    b: __m256i,
+) -> __m256i {
     unsafe {
         let mul = _mm256_mulhi_epu16(a, b).as_u16x16();
         transmute(simd_select_bitmask(k, mul, src.as_u16x16()))
@@ -1410,7 +1522,8 @@ pub fn _mm256_mask_mulhi_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmulhuw))]
-pub fn _mm256_maskz_mulhi_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_mulhi_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let mul = _mm256_mulhi_epu16(a, b).as_u16x16();
         transmute(simd_select_bitmask(k, mul, u16x16::ZERO))
@@ -1424,7 +1537,8 @@ pub fn _mm256_maskz_mulhi_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmulhuw))]
-pub fn _mm_mask_mulhi_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_mulhi_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let mul = _mm_mulhi_epu16(a, b).as_u16x8();
         transmute(simd_select_bitmask(k, mul, src.as_u16x8()))
@@ -1438,7 +1552,8 @@ pub fn _mm_mask_mulhi_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmulhuw))]
-pub fn _mm_maskz_mulhi_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_mulhi_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let mul = _mm_mulhi_epu16(a, b).as_u16x8();
         transmute(simd_select_bitmask(k, mul, u16x8::ZERO))
@@ -1452,7 +1567,8 @@ pub fn _mm_maskz_mulhi_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmulhw))]
-pub fn _mm512_mulhi_epi16(a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mulhi_epi16(a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let a = simd_cast::<_, i32x32>(a.as_i16x32());
         let b = simd_cast::<_, i32x32>(b.as_i16x32());
@@ -1468,7 +1584,13 @@ pub fn _mm512_mulhi_epi16(a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmulhw))]
-pub fn _mm512_mask_mulhi_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_mulhi_epi16(
+    src: __m512i,
+    k: __mmask32,
+    a: __m512i,
+    b: __m512i,
+) -> __m512i {
     unsafe {
         let mul = _mm512_mulhi_epi16(a, b).as_i16x32();
         transmute(simd_select_bitmask(k, mul, src.as_i16x32()))
@@ -1482,7 +1604,8 @@ pub fn _mm512_mask_mulhi_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmulhw))]
-pub fn _mm512_maskz_mulhi_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_mulhi_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let mul = _mm512_mulhi_epi16(a, b).as_i16x32();
         transmute(simd_select_bitmask(k, mul, i16x32::ZERO))
@@ -1496,7 +1619,13 @@ pub fn _mm512_maskz_mulhi_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmulhw))]
-pub fn _mm256_mask_mulhi_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_mulhi_epi16(
+    src: __m256i,
+    k: __mmask16,
+    a: __m256i,
+    b: __m256i,
+) -> __m256i {
     unsafe {
         let mul = _mm256_mulhi_epi16(a, b).as_i16x16();
         transmute(simd_select_bitmask(k, mul, src.as_i16x16()))
@@ -1510,7 +1639,8 @@ pub fn _mm256_mask_mulhi_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmulhw))]
-pub fn _mm256_maskz_mulhi_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_mulhi_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let mul = _mm256_mulhi_epi16(a, b).as_i16x16();
         transmute(simd_select_bitmask(k, mul, i16x16::ZERO))
@@ -1524,7 +1654,8 @@ pub fn _mm256_maskz_mulhi_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmulhw))]
-pub fn _mm_mask_mulhi_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_mulhi_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let mul = _mm_mulhi_epi16(a, b).as_i16x8();
         transmute(simd_select_bitmask(k, mul, src.as_i16x8()))
@@ -1538,7 +1669,8 @@ pub fn _mm_mask_mulhi_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmulhw))]
-pub fn _mm_maskz_mulhi_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_mulhi_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let mul = _mm_mulhi_epi16(a, b).as_i16x8();
         transmute(simd_select_bitmask(k, mul, i16x8::ZERO))
@@ -1647,7 +1779,8 @@ pub fn _mm_maskz_mulhrs_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmullw))]
-pub fn _mm512_mullo_epi16(a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mullo_epi16(a: __m512i, b: __m512i) -> __m512i {
     unsafe { transmute(simd_mul(a.as_i16x32(), b.as_i16x32())) }
 }
 
@@ -1658,7 +1791,13 @@ pub fn _mm512_mullo_epi16(a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmullw))]
-pub fn _mm512_mask_mullo_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_mullo_epi16(
+    src: __m512i,
+    k: __mmask32,
+    a: __m512i,
+    b: __m512i,
+) -> __m512i {
     unsafe {
         let mul = _mm512_mullo_epi16(a, b).as_i16x32();
         transmute(simd_select_bitmask(k, mul, src.as_i16x32()))
@@ -1672,7 +1811,8 @@ pub fn _mm512_mask_mullo_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmullw))]
-pub fn _mm512_maskz_mullo_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_mullo_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let mul = _mm512_mullo_epi16(a, b).as_i16x32();
         transmute(simd_select_bitmask(k, mul, i16x32::ZERO))
@@ -1686,7 +1826,13 @@ pub fn _mm512_maskz_mullo_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmullw))]
-pub fn _mm256_mask_mullo_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_mullo_epi16(
+    src: __m256i,
+    k: __mmask16,
+    a: __m256i,
+    b: __m256i,
+) -> __m256i {
     unsafe {
         let mul = _mm256_mullo_epi16(a, b).as_i16x16();
         transmute(simd_select_bitmask(k, mul, src.as_i16x16()))
@@ -1700,7 +1846,8 @@ pub fn _mm256_mask_mullo_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmullw))]
-pub fn _mm256_maskz_mullo_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_mullo_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let mul = _mm256_mullo_epi16(a, b).as_i16x16();
         transmute(simd_select_bitmask(k, mul, i16x16::ZERO))
@@ -1714,7 +1861,8 @@ pub fn _mm256_maskz_mullo_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmullw))]
-pub fn _mm_mask_mullo_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_mullo_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let mul = _mm_mullo_epi16(a, b).as_i16x8();
         transmute(simd_select_bitmask(k, mul, src.as_i16x8()))
@@ -1728,7 +1876,8 @@ pub fn _mm_mask_mullo_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmullw))]
-pub fn _mm_maskz_mullo_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_mullo_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let mul = _mm_mullo_epi16(a, b).as_i16x8();
         transmute(simd_select_bitmask(k, mul, i16x8::ZERO))
@@ -1742,12 +1891,9 @@ pub fn _mm_maskz_mullo_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmaxuw))]
-pub fn _mm512_max_epu16(a: __m512i, b: __m512i) -> __m512i {
-    unsafe {
-        let a = a.as_u16x32();
-        let b = b.as_u16x32();
-        transmute(simd_select::<i16x32, _>(simd_gt(a, b), a, b))
-    }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_max_epu16(a: __m512i, b: __m512i) -> __m512i {
+    unsafe { simd_imax(a.as_u16x32(), b.as_u16x32()).as_m512i() }
 }
 
 /// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -1757,7 +1903,8 @@ pub fn _mm512_max_epu16(a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmaxuw))]
-pub fn _mm512_mask_max_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_max_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let max = _mm512_max_epu16(a, b).as_u16x32();
         transmute(simd_select_bitmask(k, max, src.as_u16x32()))
@@ -1771,7 +1918,8 @@ pub fn _mm512_mask_max_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i)
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmaxuw))]
-pub fn _mm512_maskz_max_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_max_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let max = _mm512_max_epu16(a, b).as_u16x32();
         transmute(simd_select_bitmask(k, max, u16x32::ZERO))
@@ -1785,7 +1933,8 @@ pub fn _mm512_maskz_max_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmaxuw))]
-pub fn _mm256_mask_max_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_max_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let max = _mm256_max_epu16(a, b).as_u16x16();
         transmute(simd_select_bitmask(k, max, src.as_u16x16()))
@@ -1799,7 +1948,8 @@ pub fn _mm256_mask_max_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i)
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmaxuw))]
-pub fn _mm256_maskz_max_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_max_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let max = _mm256_max_epu16(a, b).as_u16x16();
         transmute(simd_select_bitmask(k, max, u16x16::ZERO))
@@ -1813,7 +1963,8 @@ pub fn _mm256_maskz_max_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmaxuw))]
-pub fn _mm_mask_max_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_max_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let max = _mm_max_epu16(a, b).as_u16x8();
         transmute(simd_select_bitmask(k, max, src.as_u16x8()))
@@ -1827,7 +1978,8 @@ pub fn _mm_mask_max_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) ->
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmaxuw))]
-pub fn _mm_maskz_max_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_max_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let max = _mm_max_epu16(a, b).as_u16x8();
         transmute(simd_select_bitmask(k, max, u16x8::ZERO))
@@ -1841,12 +1993,9 @@ pub fn _mm_maskz_max_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmaxub))]
-pub fn _mm512_max_epu8(a: __m512i, b: __m512i) -> __m512i {
-    unsafe {
-        let a = a.as_u8x64();
-        let b = b.as_u8x64();
-        transmute(simd_select::<i8x64, _>(simd_gt(a, b), a, b))
-    }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_max_epu8(a: __m512i, b: __m512i) -> __m512i {
+    unsafe { simd_imax(a.as_u8x64(), b.as_u8x64()).as_m512i() }
 }
 
 /// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -1856,7 +2005,8 @@ pub fn _mm512_max_epu8(a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmaxub))]
-pub fn _mm512_mask_max_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_max_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let max = _mm512_max_epu8(a, b).as_u8x64();
         transmute(simd_select_bitmask(k, max, src.as_u8x64()))
@@ -1870,7 +2020,8 @@ pub fn _mm512_mask_max_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i)
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmaxub))]
-pub fn _mm512_maskz_max_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_max_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let max = _mm512_max_epu8(a, b).as_u8x64();
         transmute(simd_select_bitmask(k, max, u8x64::ZERO))
@@ -1884,7 +2035,8 @@ pub fn _mm512_maskz_max_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmaxub))]
-pub fn _mm256_mask_max_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_max_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let max = _mm256_max_epu8(a, b).as_u8x32();
         transmute(simd_select_bitmask(k, max, src.as_u8x32()))
@@ -1898,7 +2050,8 @@ pub fn _mm256_mask_max_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i)
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmaxub))]
-pub fn _mm256_maskz_max_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_max_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let max = _mm256_max_epu8(a, b).as_u8x32();
         transmute(simd_select_bitmask(k, max, u8x32::ZERO))
@@ -1912,7 +2065,8 @@ pub fn _mm256_maskz_max_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmaxub))]
-pub fn _mm_mask_max_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_max_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let max = _mm_max_epu8(a, b).as_u8x16();
         transmute(simd_select_bitmask(k, max, src.as_u8x16()))
@@ -1926,7 +2080,8 @@ pub fn _mm_mask_max_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) ->
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmaxub))]
-pub fn _mm_maskz_max_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_max_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let max = _mm_max_epu8(a, b).as_u8x16();
         transmute(simd_select_bitmask(k, max, u8x16::ZERO))
@@ -1940,12 +2095,9 @@ pub fn _mm_maskz_max_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmaxsw))]
-pub fn _mm512_max_epi16(a: __m512i, b: __m512i) -> __m512i {
-    unsafe {
-        let a = a.as_i16x32();
-        let b = b.as_i16x32();
-        transmute(simd_select::<i16x32, _>(simd_gt(a, b), a, b))
-    }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_max_epi16(a: __m512i, b: __m512i) -> __m512i {
+    unsafe { simd_imax(a.as_i16x32(), b.as_i16x32()).as_m512i() }
 }
 
 /// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -1955,7 +2107,8 @@ pub fn _mm512_max_epi16(a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmaxsw))]
-pub fn _mm512_mask_max_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_max_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let max = _mm512_max_epi16(a, b).as_i16x32();
         transmute(simd_select_bitmask(k, max, src.as_i16x32()))
@@ -1969,7 +2122,8 @@ pub fn _mm512_mask_max_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i)
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmaxsw))]
-pub fn _mm512_maskz_max_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_max_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let max = _mm512_max_epi16(a, b).as_i16x32();
         transmute(simd_select_bitmask(k, max, i16x32::ZERO))
@@ -1983,7 +2137,8 @@ pub fn _mm512_maskz_max_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmaxsw))]
-pub fn _mm256_mask_max_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_max_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let max = _mm256_max_epi16(a, b).as_i16x16();
         transmute(simd_select_bitmask(k, max, src.as_i16x16()))
@@ -1997,7 +2152,8 @@ pub fn _mm256_mask_max_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i)
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmaxsw))]
-pub fn _mm256_maskz_max_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_max_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let max = _mm256_max_epi16(a, b).as_i16x16();
         transmute(simd_select_bitmask(k, max, i16x16::ZERO))
@@ -2011,7 +2167,8 @@ pub fn _mm256_maskz_max_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmaxsw))]
-pub fn _mm_mask_max_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_max_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let max = _mm_max_epi16(a, b).as_i16x8();
         transmute(simd_select_bitmask(k, max, src.as_i16x8()))
@@ -2025,7 +2182,8 @@ pub fn _mm_mask_max_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) ->
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmaxsw))]
-pub fn _mm_maskz_max_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_max_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let max = _mm_max_epi16(a, b).as_i16x8();
         transmute(simd_select_bitmask(k, max, i16x8::ZERO))
@@ -2039,12 +2197,9 @@ pub fn _mm_maskz_max_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmaxsb))]
-pub fn _mm512_max_epi8(a: __m512i, b: __m512i) -> __m512i {
-    unsafe {
-        let a = a.as_i8x64();
-        let b = b.as_i8x64();
-        transmute(simd_select::<i8x64, _>(simd_gt(a, b), a, b))
-    }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_max_epi8(a: __m512i, b: __m512i) -> __m512i {
+    unsafe { simd_imax(a.as_i8x64(), b.as_i8x64()).as_m512i() }
 }
 
 /// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -2054,7 +2209,8 @@ pub fn _mm512_max_epi8(a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmaxsb))]
-pub fn _mm512_mask_max_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_max_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let max = _mm512_max_epi8(a, b).as_i8x64();
         transmute(simd_select_bitmask(k, max, src.as_i8x64()))
@@ -2068,7 +2224,8 @@ pub fn _mm512_mask_max_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i)
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmaxsb))]
-pub fn _mm512_maskz_max_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_max_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let max = _mm512_max_epi8(a, b).as_i8x64();
         transmute(simd_select_bitmask(k, max, i8x64::ZERO))
@@ -2082,7 +2239,8 @@ pub fn _mm512_maskz_max_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmaxsb))]
-pub fn _mm256_mask_max_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_max_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let max = _mm256_max_epi8(a, b).as_i8x32();
         transmute(simd_select_bitmask(k, max, src.as_i8x32()))
@@ -2096,7 +2254,8 @@ pub fn _mm256_mask_max_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i)
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmaxsb))]
-pub fn _mm256_maskz_max_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_max_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let max = _mm256_max_epi8(a, b).as_i8x32();
         transmute(simd_select_bitmask(k, max, i8x32::ZERO))
@@ -2110,7 +2269,8 @@ pub fn _mm256_maskz_max_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmaxsb))]
-pub fn _mm_mask_max_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_max_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let max = _mm_max_epi8(a, b).as_i8x16();
         transmute(simd_select_bitmask(k, max, src.as_i8x16()))
@@ -2124,7 +2284,8 @@ pub fn _mm_mask_max_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) ->
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmaxsb))]
-pub fn _mm_maskz_max_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_max_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let max = _mm_max_epi8(a, b).as_i8x16();
         transmute(simd_select_bitmask(k, max, i8x16::ZERO))
@@ -2138,12 +2299,9 @@ pub fn _mm_maskz_max_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpminuw))]
-pub fn _mm512_min_epu16(a: __m512i, b: __m512i) -> __m512i {
-    unsafe {
-        let a = a.as_u16x32();
-        let b = b.as_u16x32();
-        transmute(simd_select::<i16x32, _>(simd_lt(a, b), a, b))
-    }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_min_epu16(a: __m512i, b: __m512i) -> __m512i {
+    unsafe { simd_imin(a.as_u16x32(), b.as_u16x32()).as_m512i() }
 }
 
 /// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -2153,7 +2311,8 @@ pub fn _mm512_min_epu16(a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpminuw))]
-pub fn _mm512_mask_min_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_min_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let min = _mm512_min_epu16(a, b).as_u16x32();
         transmute(simd_select_bitmask(k, min, src.as_u16x32()))
@@ -2167,7 +2326,8 @@ pub fn _mm512_mask_min_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i)
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpminuw))]
-pub fn _mm512_maskz_min_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_min_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let min = _mm512_min_epu16(a, b).as_u16x32();
         transmute(simd_select_bitmask(k, min, u16x32::ZERO))
@@ -2181,7 +2341,8 @@ pub fn _mm512_maskz_min_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpminuw))]
-pub fn _mm256_mask_min_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_min_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let min = _mm256_min_epu16(a, b).as_u16x16();
         transmute(simd_select_bitmask(k, min, src.as_u16x16()))
@@ -2195,7 +2356,8 @@ pub fn _mm256_mask_min_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i)
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpminuw))]
-pub fn _mm256_maskz_min_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_min_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let min = _mm256_min_epu16(a, b).as_u16x16();
         transmute(simd_select_bitmask(k, min, u16x16::ZERO))
@@ -2209,7 +2371,8 @@ pub fn _mm256_maskz_min_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpminuw))]
-pub fn _mm_mask_min_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_min_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let min = _mm_min_epu16(a, b).as_u16x8();
         transmute(simd_select_bitmask(k, min, src.as_u16x8()))
@@ -2223,7 +2386,8 @@ pub fn _mm_mask_min_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) ->
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpminuw))]
-pub fn _mm_maskz_min_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_min_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let min = _mm_min_epu16(a, b).as_u16x8();
         transmute(simd_select_bitmask(k, min, u16x8::ZERO))
@@ -2237,12 +2401,9 @@ pub fn _mm_maskz_min_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpminub))]
-pub fn _mm512_min_epu8(a: __m512i, b: __m512i) -> __m512i {
-    unsafe {
-        let a = a.as_u8x64();
-        let b = b.as_u8x64();
-        transmute(simd_select::<i8x64, _>(simd_lt(a, b), a, b))
-    }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_min_epu8(a: __m512i, b: __m512i) -> __m512i {
+    unsafe { simd_imin(a.as_u8x64(), b.as_u8x64()).as_m512i() }
 }
 
 /// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -2252,7 +2413,8 @@ pub fn _mm512_min_epu8(a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpminub))]
-pub fn _mm512_mask_min_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_min_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let min = _mm512_min_epu8(a, b).as_u8x64();
         transmute(simd_select_bitmask(k, min, src.as_u8x64()))
@@ -2266,7 +2428,8 @@ pub fn _mm512_mask_min_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i)
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpminub))]
-pub fn _mm512_maskz_min_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_min_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let min = _mm512_min_epu8(a, b).as_u8x64();
         transmute(simd_select_bitmask(k, min, u8x64::ZERO))
@@ -2280,7 +2443,8 @@ pub fn _mm512_maskz_min_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpminub))]
-pub fn _mm256_mask_min_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_min_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let min = _mm256_min_epu8(a, b).as_u8x32();
         transmute(simd_select_bitmask(k, min, src.as_u8x32()))
@@ -2294,7 +2458,8 @@ pub fn _mm256_mask_min_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i)
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpminub))]
-pub fn _mm256_maskz_min_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_min_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let min = _mm256_min_epu8(a, b).as_u8x32();
         transmute(simd_select_bitmask(k, min, u8x32::ZERO))
@@ -2308,7 +2473,8 @@ pub fn _mm256_maskz_min_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpminub))]
-pub fn _mm_mask_min_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_min_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let min = _mm_min_epu8(a, b).as_u8x16();
         transmute(simd_select_bitmask(k, min, src.as_u8x16()))
@@ -2322,7 +2488,8 @@ pub fn _mm_mask_min_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) ->
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpminub))]
-pub fn _mm_maskz_min_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_min_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let min = _mm_min_epu8(a, b).as_u8x16();
         transmute(simd_select_bitmask(k, min, u8x16::ZERO))
@@ -2336,12 +2503,9 @@ pub fn _mm_maskz_min_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpminsw))]
-pub fn _mm512_min_epi16(a: __m512i, b: __m512i) -> __m512i {
-    unsafe {
-        let a = a.as_i16x32();
-        let b = b.as_i16x32();
-        transmute(simd_select::<i16x32, _>(simd_lt(a, b), a, b))
-    }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_min_epi16(a: __m512i, b: __m512i) -> __m512i {
+    unsafe { simd_imin(a.as_i16x32(), b.as_i16x32()).as_m512i() }
 }
 
 /// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -2351,7 +2515,8 @@ pub fn _mm512_min_epi16(a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpminsw))]
-pub fn _mm512_mask_min_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_min_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let min = _mm512_min_epi16(a, b).as_i16x32();
         transmute(simd_select_bitmask(k, min, src.as_i16x32()))
@@ -2365,7 +2530,8 @@ pub fn _mm512_mask_min_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i)
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpminsw))]
-pub fn _mm512_maskz_min_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_min_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let min = _mm512_min_epi16(a, b).as_i16x32();
         transmute(simd_select_bitmask(k, min, i16x32::ZERO))
@@ -2379,7 +2545,8 @@ pub fn _mm512_maskz_min_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpminsw))]
-pub fn _mm256_mask_min_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_min_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let min = _mm256_min_epi16(a, b).as_i16x16();
         transmute(simd_select_bitmask(k, min, src.as_i16x16()))
@@ -2393,7 +2560,8 @@ pub fn _mm256_mask_min_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i)
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpminsw))]
-pub fn _mm256_maskz_min_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_min_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let min = _mm256_min_epi16(a, b).as_i16x16();
         transmute(simd_select_bitmask(k, min, i16x16::ZERO))
@@ -2407,7 +2575,8 @@ pub fn _mm256_maskz_min_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpminsw))]
-pub fn _mm_mask_min_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_min_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let min = _mm_min_epi16(a, b).as_i16x8();
         transmute(simd_select_bitmask(k, min, src.as_i16x8()))
@@ -2421,7 +2590,8 @@ pub fn _mm_mask_min_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) ->
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpminsw))]
-pub fn _mm_maskz_min_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_min_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let min = _mm_min_epi16(a, b).as_i16x8();
         transmute(simd_select_bitmask(k, min, i16x8::ZERO))
@@ -2435,12 +2605,9 @@ pub fn _mm_maskz_min_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpminsb))]
-pub fn _mm512_min_epi8(a: __m512i, b: __m512i) -> __m512i {
-    unsafe {
-        let a = a.as_i8x64();
-        let b = b.as_i8x64();
-        transmute(simd_select::<i8x64, _>(simd_lt(a, b), a, b))
-    }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_min_epi8(a: __m512i, b: __m512i) -> __m512i {
+    unsafe { simd_imin(a.as_i8x64(), b.as_i8x64()).as_m512i() }
 }
 
 /// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -2450,7 +2617,8 @@ pub fn _mm512_min_epi8(a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpminsb))]
-pub fn _mm512_mask_min_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_min_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let min = _mm512_min_epi8(a, b).as_i8x64();
         transmute(simd_select_bitmask(k, min, src.as_i8x64()))
@@ -2464,7 +2632,8 @@ pub fn _mm512_mask_min_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i)
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpminsb))]
-pub fn _mm512_maskz_min_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_min_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let min = _mm512_min_epi8(a, b).as_i8x64();
         transmute(simd_select_bitmask(k, min, i8x64::ZERO))
@@ -2478,7 +2647,8 @@ pub fn _mm512_maskz_min_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpminsb))]
-pub fn _mm256_mask_min_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_min_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let min = _mm256_min_epi8(a, b).as_i8x32();
         transmute(simd_select_bitmask(k, min, src.as_i8x32()))
@@ -2492,7 +2662,8 @@ pub fn _mm256_mask_min_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i)
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpminsb))]
-pub fn _mm256_maskz_min_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_min_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let min = _mm256_min_epi8(a, b).as_i8x32();
         transmute(simd_select_bitmask(k, min, i8x32::ZERO))
@@ -2506,7 +2677,8 @@ pub fn _mm256_maskz_min_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpminsb))]
-pub fn _mm_mask_min_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_min_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let min = _mm_min_epi8(a, b).as_i8x16();
         transmute(simd_select_bitmask(k, min, src.as_i8x16()))
@@ -2520,7 +2692,8 @@ pub fn _mm_mask_min_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) ->
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpminsb))]
-pub fn _mm_maskz_min_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_min_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let min = _mm_min_epi8(a, b).as_i8x16();
         transmute(simd_select_bitmask(k, min, i8x16::ZERO))
@@ -2534,7 +2707,8 @@ pub fn _mm_maskz_min_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm512_cmplt_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_cmplt_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
     unsafe { simd_bitmask::<u16x32, _>(simd_lt(a.as_u16x32(), b.as_u16x32())) }
 }
 
@@ -2545,7 +2719,8 @@ pub fn _mm512_cmplt_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm512_mask_cmplt_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_cmplt_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
     _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(k1, a, b)
 }
 
@@ -2556,7 +2731,8 @@ pub fn _mm512_mask_cmplt_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm256_cmplt_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_cmplt_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
     unsafe { simd_bitmask::<u16x16, _>(simd_lt(a.as_u16x16(), b.as_u16x16())) }
 }
 
@@ -2567,7 +2743,8 @@ pub fn _mm256_cmplt_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm256_mask_cmplt_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_cmplt_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
     _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(k1, a, b)
 }
 
@@ -2578,7 +2755,8 @@ pub fn _mm256_mask_cmplt_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm_cmplt_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cmplt_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
     unsafe { simd_bitmask::<u16x8, _>(simd_lt(a.as_u16x8(), b.as_u16x8())) }
 }
 
@@ -2589,7 +2767,8 @@ pub fn _mm_cmplt_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm_mask_cmplt_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_cmplt_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
     _mm_mask_cmp_epu16_mask::<_MM_CMPINT_LT>(k1, a, b)
 }
 
@@ -2600,7 +2779,8 @@ pub fn _mm_mask_cmplt_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmas
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm512_cmplt_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_cmplt_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
     unsafe { simd_bitmask::<u8x64, _>(simd_lt(a.as_u8x64(), b.as_u8x64())) }
 }
 
@@ -2611,7 +2791,8 @@ pub fn _mm512_cmplt_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm512_mask_cmplt_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_cmplt_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
     _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(k1, a, b)
 }
 
@@ -2622,7 +2803,8 @@ pub fn _mm512_mask_cmplt_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __m
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm256_cmplt_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_cmplt_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
     unsafe { simd_bitmask::<u8x32, _>(simd_lt(a.as_u8x32(), b.as_u8x32())) }
 }
 
@@ -2633,7 +2815,8 @@ pub fn _mm256_cmplt_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm256_mask_cmplt_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_cmplt_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
     _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(k1, a, b)
 }
 
@@ -2644,7 +2827,8 @@ pub fn _mm256_mask_cmplt_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __m
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm_cmplt_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cmplt_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
     unsafe { simd_bitmask::<u8x16, _>(simd_lt(a.as_u8x16(), b.as_u8x16())) }
 }
 
@@ -2655,7 +2839,8 @@ pub fn _mm_cmplt_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm_mask_cmplt_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_cmplt_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
     _mm_mask_cmp_epu8_mask::<_MM_CMPINT_LT>(k1, a, b)
 }
 
@@ -2666,7 +2851,8 @@ pub fn _mm_mask_cmplt_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmas
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm512_cmplt_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_cmplt_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
     unsafe { simd_bitmask::<i16x32, _>(simd_lt(a.as_i16x32(), b.as_i16x32())) }
 }
 
@@ -2677,7 +2863,8 @@ pub fn _mm512_cmplt_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm512_mask_cmplt_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_cmplt_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
     _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(k1, a, b)
 }
 
@@ -2688,7 +2875,8 @@ pub fn _mm512_mask_cmplt_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm256_cmplt_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_cmplt_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
     unsafe { simd_bitmask::<i16x16, _>(simd_lt(a.as_i16x16(), b.as_i16x16())) }
 }
 
@@ -2699,7 +2887,8 @@ pub fn _mm256_cmplt_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm256_mask_cmplt_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_cmplt_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
     _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(k1, a, b)
 }
 
@@ -2710,7 +2899,8 @@ pub fn _mm256_mask_cmplt_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm_cmplt_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cmplt_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
     unsafe { simd_bitmask::<i16x8, _>(simd_lt(a.as_i16x8(), b.as_i16x8())) }
 }
 
@@ -2721,7 +2911,8 @@ pub fn _mm_cmplt_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm_mask_cmplt_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_cmplt_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
     _mm_mask_cmp_epi16_mask::<_MM_CMPINT_LT>(k1, a, b)
 }
 
@@ -2732,7 +2923,8 @@ pub fn _mm_mask_cmplt_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmas
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm512_cmplt_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_cmplt_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
     unsafe { simd_bitmask::<i8x64, _>(simd_lt(a.as_i8x64(), b.as_i8x64())) }
 }
 
@@ -2743,7 +2935,8 @@ pub fn _mm512_cmplt_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm512_mask_cmplt_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_cmplt_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
     _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(k1, a, b)
 }
 
@@ -2754,7 +2947,8 @@ pub fn _mm512_mask_cmplt_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __m
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm256_cmplt_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_cmplt_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
     unsafe { simd_bitmask::<i8x32, _>(simd_lt(a.as_i8x32(), b.as_i8x32())) }
 }
 
@@ -2765,7 +2959,8 @@ pub fn _mm256_cmplt_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm256_mask_cmplt_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_cmplt_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
     _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(k1, a, b)
 }
 
@@ -2776,7 +2971,8 @@ pub fn _mm256_mask_cmplt_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __m
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm_cmplt_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cmplt_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
     unsafe { simd_bitmask::<i8x16, _>(simd_lt(a.as_i8x16(), b.as_i8x16())) }
 }
 
@@ -2787,7 +2983,8 @@ pub fn _mm_cmplt_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm_mask_cmplt_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_cmplt_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
     _mm_mask_cmp_epi8_mask::<_MM_CMPINT_LT>(k1, a, b)
 }
 
@@ -2798,7 +2995,8 @@ pub fn _mm_mask_cmplt_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmas
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm512_cmpgt_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_cmpgt_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
     unsafe { simd_bitmask::<u16x32, _>(simd_gt(a.as_u16x32(), b.as_u16x32())) }
 }
 
@@ -2809,7 +3007,8 @@ pub fn _mm512_cmpgt_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm512_mask_cmpgt_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_cmpgt_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
     _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_NLE>(k1, a, b)
 }
 
@@ -2820,7 +3019,8 @@ pub fn _mm512_mask_cmpgt_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm256_cmpgt_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_cmpgt_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
     unsafe { simd_bitmask::<u16x16, _>(simd_gt(a.as_u16x16(), b.as_u16x16())) }
 }
 
@@ -2831,7 +3031,8 @@ pub fn _mm256_cmpgt_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm256_mask_cmpgt_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_cmpgt_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
     _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_NLE>(k1, a, b)
 }
 
@@ -2842,7 +3043,8 @@ pub fn _mm256_mask_cmpgt_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm_cmpgt_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cmpgt_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
     unsafe { simd_bitmask::<u16x8, _>(simd_gt(a.as_u16x8(), b.as_u16x8())) }
 }
 
@@ -2853,7 +3055,8 @@ pub fn _mm_cmpgt_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm_mask_cmpgt_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_cmpgt_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
     _mm_mask_cmp_epu16_mask::<_MM_CMPINT_NLE>(k1, a, b)
 }
 
@@ -2864,7 +3067,8 @@ pub fn _mm_mask_cmpgt_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmas
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm512_cmpgt_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_cmpgt_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
     unsafe { simd_bitmask::<u8x64, _>(simd_gt(a.as_u8x64(), b.as_u8x64())) }
 }
 
@@ -2875,7 +3079,8 @@ pub fn _mm512_cmpgt_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm512_mask_cmpgt_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_cmpgt_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
     _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_NLE>(k1, a, b)
 }
 
@@ -2886,7 +3091,8 @@ pub fn _mm512_mask_cmpgt_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __m
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm256_cmpgt_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_cmpgt_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
     unsafe { simd_bitmask::<u8x32, _>(simd_gt(a.as_u8x32(), b.as_u8x32())) }
 }
 
@@ -2897,7 +3103,8 @@ pub fn _mm256_cmpgt_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm256_mask_cmpgt_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_cmpgt_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
     _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_NLE>(k1, a, b)
 }
 
@@ -2908,7 +3115,8 @@ pub fn _mm256_mask_cmpgt_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __m
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm_cmpgt_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cmpgt_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
     unsafe { simd_bitmask::<u8x16, _>(simd_gt(a.as_u8x16(), b.as_u8x16())) }
 }
 
@@ -2919,7 +3127,8 @@ pub fn _mm_cmpgt_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm_mask_cmpgt_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_cmpgt_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
     _mm_mask_cmp_epu8_mask::<_MM_CMPINT_NLE>(k1, a, b)
 }
 
@@ -2930,7 +3139,8 @@ pub fn _mm_mask_cmpgt_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmas
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm512_cmpgt_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_cmpgt_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
     unsafe { simd_bitmask::<i16x32, _>(simd_gt(a.as_i16x32(), b.as_i16x32())) }
 }
 
@@ -2941,7 +3151,8 @@ pub fn _mm512_cmpgt_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm512_mask_cmpgt_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_cmpgt_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
     _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_NLE>(k1, a, b)
 }
 
@@ -2952,7 +3163,8 @@ pub fn _mm512_mask_cmpgt_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm256_cmpgt_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_cmpgt_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
     unsafe { simd_bitmask::<i16x16, _>(simd_gt(a.as_i16x16(), b.as_i16x16())) }
 }
 
@@ -2963,7 +3175,8 @@ pub fn _mm256_cmpgt_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm256_mask_cmpgt_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_cmpgt_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
     _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_NLE>(k1, a, b)
 }
 
@@ -2974,7 +3187,8 @@ pub fn _mm256_mask_cmpgt_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm_cmpgt_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cmpgt_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
     unsafe { simd_bitmask::<i16x8, _>(simd_gt(a.as_i16x8(), b.as_i16x8())) }
 }
 
@@ -2985,7 +3199,8 @@ pub fn _mm_cmpgt_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm_mask_cmpgt_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_cmpgt_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
     _mm_mask_cmp_epi16_mask::<_MM_CMPINT_NLE>(k1, a, b)
 }
 
@@ -2996,7 +3211,8 @@ pub fn _mm_mask_cmpgt_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmas
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm512_cmpgt_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_cmpgt_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
     unsafe { simd_bitmask::<i8x64, _>(simd_gt(a.as_i8x64(), b.as_i8x64())) }
 }
 
@@ -3007,7 +3223,8 @@ pub fn _mm512_cmpgt_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm512_mask_cmpgt_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_cmpgt_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
     _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_NLE>(k1, a, b)
 }
 
@@ -3018,7 +3235,8 @@ pub fn _mm512_mask_cmpgt_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __m
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm256_cmpgt_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_cmpgt_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
     unsafe { simd_bitmask::<i8x32, _>(simd_gt(a.as_i8x32(), b.as_i8x32())) }
 }
 
@@ -3029,7 +3247,8 @@ pub fn _mm256_cmpgt_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm256_mask_cmpgt_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_cmpgt_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
     _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_NLE>(k1, a, b)
 }
 
@@ -3040,7 +3259,8 @@ pub fn _mm256_mask_cmpgt_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __m
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm_cmpgt_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cmpgt_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
     unsafe { simd_bitmask::<i8x16, _>(simd_gt(a.as_i8x16(), b.as_i8x16())) }
 }
 
@@ -3051,7 +3271,8 @@ pub fn _mm_cmpgt_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm_mask_cmpgt_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_cmpgt_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
     _mm_mask_cmp_epi8_mask::<_MM_CMPINT_NLE>(k1, a, b)
 }
 
@@ -3062,7 +3283,8 @@ pub fn _mm_mask_cmpgt_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmas
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm512_cmple_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_cmple_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
     unsafe { simd_bitmask::<u16x32, _>(simd_le(a.as_u16x32(), b.as_u16x32())) }
 }
 
@@ -3073,7 +3295,8 @@ pub fn _mm512_cmple_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm512_mask_cmple_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_cmple_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
     _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_LE>(k1, a, b)
 }
 
@@ -3084,7 +3307,8 @@ pub fn _mm512_mask_cmple_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm256_cmple_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_cmple_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
     unsafe { simd_bitmask::<u16x16, _>(simd_le(a.as_u16x16(), b.as_u16x16())) }
 }
 
@@ -3095,7 +3319,8 @@ pub fn _mm256_cmple_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm256_mask_cmple_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_cmple_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
     _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_LE>(k1, a, b)
 }
 
@@ -3106,7 +3331,8 @@ pub fn _mm256_mask_cmple_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm_cmple_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cmple_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
     unsafe { simd_bitmask::<u16x8, _>(simd_le(a.as_u16x8(), b.as_u16x8())) }
 }
 
@@ -3117,7 +3343,8 @@ pub fn _mm_cmple_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm_mask_cmple_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_cmple_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
     _mm_mask_cmp_epu16_mask::<_MM_CMPINT_LE>(k1, a, b)
 }
 
@@ -3128,7 +3355,8 @@ pub fn _mm_mask_cmple_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmas
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm512_cmple_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_cmple_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
     unsafe { simd_bitmask::<u8x64, _>(simd_le(a.as_u8x64(), b.as_u8x64())) }
 }
 
@@ -3139,7 +3367,8 @@ pub fn _mm512_cmple_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm512_mask_cmple_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_cmple_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
     _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_LE>(k1, a, b)
 }
 
@@ -3150,7 +3379,8 @@ pub fn _mm512_mask_cmple_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __m
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm256_cmple_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_cmple_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
     unsafe { simd_bitmask::<u8x32, _>(simd_le(a.as_u8x32(), b.as_u8x32())) }
 }
 
@@ -3161,7 +3391,8 @@ pub fn _mm256_cmple_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm256_mask_cmple_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_cmple_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
     _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_LE>(k1, a, b)
 }
 
@@ -3172,7 +3403,8 @@ pub fn _mm256_mask_cmple_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __m
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm_cmple_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cmple_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
     unsafe { simd_bitmask::<u8x16, _>(simd_le(a.as_u8x16(), b.as_u8x16())) }
 }
 
@@ -3183,7 +3415,8 @@ pub fn _mm_cmple_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm_mask_cmple_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_cmple_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
     _mm_mask_cmp_epu8_mask::<_MM_CMPINT_LE>(k1, a, b)
 }
 
@@ -3194,7 +3427,8 @@ pub fn _mm_mask_cmple_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmas
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm512_cmple_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_cmple_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
     unsafe { simd_bitmask::<i16x32, _>(simd_le(a.as_i16x32(), b.as_i16x32())) }
 }
 
@@ -3205,7 +3439,8 @@ pub fn _mm512_cmple_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm512_mask_cmple_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_cmple_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
     _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_LE>(k1, a, b)
 }
 
@@ -3216,7 +3451,8 @@ pub fn _mm512_mask_cmple_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm256_cmple_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_cmple_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
     unsafe { simd_bitmask::<i16x16, _>(simd_le(a.as_i16x16(), b.as_i16x16())) }
 }
 
@@ -3227,7 +3463,8 @@ pub fn _mm256_cmple_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm256_mask_cmple_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_cmple_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
     _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_LE>(k1, a, b)
 }
 
@@ -3238,7 +3475,8 @@ pub fn _mm256_mask_cmple_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm_cmple_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cmple_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
     unsafe { simd_bitmask::<i16x8, _>(simd_le(a.as_i16x8(), b.as_i16x8())) }
 }
 
@@ -3249,7 +3487,8 @@ pub fn _mm_cmple_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm_mask_cmple_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_cmple_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
     _mm_mask_cmp_epi16_mask::<_MM_CMPINT_LE>(k1, a, b)
 }
 
@@ -3260,7 +3499,8 @@ pub fn _mm_mask_cmple_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmas
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm512_cmple_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_cmple_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
     unsafe { simd_bitmask::<i8x64, _>(simd_le(a.as_i8x64(), b.as_i8x64())) }
 }
 
@@ -3271,7 +3511,8 @@ pub fn _mm512_cmple_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm512_mask_cmple_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_cmple_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
     _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_LE>(k1, a, b)
 }
 
@@ -3282,7 +3523,8 @@ pub fn _mm512_mask_cmple_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __m
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm256_cmple_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_cmple_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
     unsafe { simd_bitmask::<i8x32, _>(simd_le(a.as_i8x32(), b.as_i8x32())) }
 }
 
@@ -3293,7 +3535,8 @@ pub fn _mm256_cmple_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm256_mask_cmple_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_cmple_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
     _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_LE>(k1, a, b)
 }
 
@@ -3304,7 +3547,8 @@ pub fn _mm256_mask_cmple_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __m
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm_cmple_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cmple_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
     unsafe { simd_bitmask::<i8x16, _>(simd_le(a.as_i8x16(), b.as_i8x16())) }
 }
 
@@ -3315,7 +3559,8 @@ pub fn _mm_cmple_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm_mask_cmple_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_cmple_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
     _mm_mask_cmp_epi8_mask::<_MM_CMPINT_LE>(k1, a, b)
 }
 
@@ -3326,7 +3571,8 @@ pub fn _mm_mask_cmple_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmas
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm512_cmpge_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_cmpge_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
     unsafe { simd_bitmask::<u16x32, _>(simd_ge(a.as_u16x32(), b.as_u16x32())) }
 }
 
@@ -3337,7 +3583,8 @@ pub fn _mm512_cmpge_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm512_mask_cmpge_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_cmpge_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
     _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_NLT>(k1, a, b)
 }
 
@@ -3348,7 +3595,8 @@ pub fn _mm512_mask_cmpge_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm256_cmpge_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_cmpge_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
     unsafe { simd_bitmask::<u16x16, _>(simd_ge(a.as_u16x16(), b.as_u16x16())) }
 }
 
@@ -3359,7 +3607,8 @@ pub fn _mm256_cmpge_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm256_mask_cmpge_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_cmpge_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
     _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_NLT>(k1, a, b)
 }
 
@@ -3370,7 +3619,8 @@ pub fn _mm256_mask_cmpge_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm_cmpge_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cmpge_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
     unsafe { simd_bitmask::<u16x8, _>(simd_ge(a.as_u16x8(), b.as_u16x8())) }
 }
 
@@ -3381,7 +3631,8 @@ pub fn _mm_cmpge_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm_mask_cmpge_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_cmpge_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
     _mm_mask_cmp_epu16_mask::<_MM_CMPINT_NLT>(k1, a, b)
 }
 
@@ -3392,7 +3643,8 @@ pub fn _mm_mask_cmpge_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmas
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm512_cmpge_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_cmpge_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
     unsafe { simd_bitmask::<u8x64, _>(simd_ge(a.as_u8x64(), b.as_u8x64())) }
 }
 
@@ -3403,7 +3655,8 @@ pub fn _mm512_cmpge_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm512_mask_cmpge_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_cmpge_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
     _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_NLT>(k1, a, b)
 }
 
@@ -3414,7 +3667,8 @@ pub fn _mm512_mask_cmpge_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __m
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm256_cmpge_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_cmpge_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
     unsafe { simd_bitmask::<u8x32, _>(simd_ge(a.as_u8x32(), b.as_u8x32())) }
 }
 
@@ -3425,7 +3679,8 @@ pub fn _mm256_cmpge_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm256_mask_cmpge_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_cmpge_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
     _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_NLT>(k1, a, b)
 }
 
@@ -3436,7 +3691,8 @@ pub fn _mm256_mask_cmpge_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __m
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm_cmpge_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cmpge_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
     unsafe { simd_bitmask::<u8x16, _>(simd_ge(a.as_u8x16(), b.as_u8x16())) }
 }
 
@@ -3447,7 +3703,8 @@ pub fn _mm_cmpge_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm_mask_cmpge_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_cmpge_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
     _mm_mask_cmp_epu8_mask::<_MM_CMPINT_NLT>(k1, a, b)
 }
 
@@ -3458,7 +3715,8 @@ pub fn _mm_mask_cmpge_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmas
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm512_cmpge_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_cmpge_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
     unsafe { simd_bitmask::<i16x32, _>(simd_ge(a.as_i16x32(), b.as_i16x32())) }
 }
 
@@ -3469,7 +3727,8 @@ pub fn _mm512_cmpge_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm512_mask_cmpge_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_cmpge_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
     _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_NLT>(k1, a, b)
 }
 
@@ -3480,7 +3739,8 @@ pub fn _mm512_mask_cmpge_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm256_cmpge_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_cmpge_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
     unsafe { simd_bitmask::<i16x16, _>(simd_ge(a.as_i16x16(), b.as_i16x16())) }
 }
 
@@ -3491,7 +3751,8 @@ pub fn _mm256_cmpge_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm256_mask_cmpge_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_cmpge_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
     _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_NLT>(k1, a, b)
 }
 
@@ -3502,7 +3763,8 @@ pub fn _mm256_mask_cmpge_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm_cmpge_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cmpge_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
     unsafe { simd_bitmask::<i16x8, _>(simd_ge(a.as_i16x8(), b.as_i16x8())) }
 }
 
@@ -3513,7 +3775,8 @@ pub fn _mm_cmpge_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm_mask_cmpge_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_cmpge_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
     _mm_mask_cmp_epi16_mask::<_MM_CMPINT_NLT>(k1, a, b)
 }
 
@@ -3524,7 +3787,8 @@ pub fn _mm_mask_cmpge_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmas
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm512_cmpge_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_cmpge_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
     unsafe { simd_bitmask::<i8x64, _>(simd_ge(a.as_i8x64(), b.as_i8x64())) }
 }
 
@@ -3535,7 +3799,8 @@ pub fn _mm512_cmpge_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm512_mask_cmpge_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_cmpge_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
     _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_NLT>(k1, a, b)
 }
 
@@ -3546,7 +3811,8 @@ pub fn _mm512_mask_cmpge_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __m
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm256_cmpge_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_cmpge_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
     unsafe { simd_bitmask::<i8x32, _>(simd_ge(a.as_i8x32(), b.as_i8x32())) }
 }
 
@@ -3557,7 +3823,8 @@ pub fn _mm256_cmpge_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm256_mask_cmpge_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_cmpge_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
     _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_NLT>(k1, a, b)
 }
 
@@ -3568,7 +3835,8 @@ pub fn _mm256_mask_cmpge_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __m
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm_cmpge_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cmpge_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
     unsafe { simd_bitmask::<i8x16, _>(simd_ge(a.as_i8x16(), b.as_i8x16())) }
 }
 
@@ -3579,7 +3847,8 @@ pub fn _mm_cmpge_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm_mask_cmpge_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_cmpge_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
     _mm_mask_cmp_epi8_mask::<_MM_CMPINT_NLT>(k1, a, b)
 }
 
@@ -3590,7 +3859,8 @@ pub fn _mm_mask_cmpge_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmas
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm512_cmpeq_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_cmpeq_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
     unsafe { simd_bitmask::<u16x32, _>(simd_eq(a.as_u16x32(), b.as_u16x32())) }
 }
 
@@ -3601,7 +3871,8 @@ pub fn _mm512_cmpeq_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm512_mask_cmpeq_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_cmpeq_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
     _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_EQ>(k1, a, b)
 }
 
@@ -3612,7 +3883,8 @@ pub fn _mm512_mask_cmpeq_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm256_cmpeq_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_cmpeq_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
     unsafe { simd_bitmask::<u16x16, _>(simd_eq(a.as_u16x16(), b.as_u16x16())) }
 }
 
@@ -3623,7 +3895,8 @@ pub fn _mm256_cmpeq_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm256_mask_cmpeq_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_cmpeq_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
     _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_EQ>(k1, a, b)
 }
 
@@ -3634,7 +3907,8 @@ pub fn _mm256_mask_cmpeq_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm_cmpeq_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cmpeq_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
     unsafe { simd_bitmask::<u16x8, _>(simd_eq(a.as_u16x8(), b.as_u16x8())) }
 }
 
@@ -3645,7 +3919,8 @@ pub fn _mm_cmpeq_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm_mask_cmpeq_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_cmpeq_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
     _mm_mask_cmp_epu16_mask::<_MM_CMPINT_EQ>(k1, a, b)
 }
 
@@ -3656,7 +3931,8 @@ pub fn _mm_mask_cmpeq_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmas
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm512_cmpeq_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_cmpeq_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
     unsafe { simd_bitmask::<u8x64, _>(simd_eq(a.as_u8x64(), b.as_u8x64())) }
 }
 
@@ -3667,7 +3943,8 @@ pub fn _mm512_cmpeq_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm512_mask_cmpeq_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_cmpeq_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
     _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_EQ>(k1, a, b)
 }
 
@@ -3678,7 +3955,8 @@ pub fn _mm512_mask_cmpeq_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __m
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm256_cmpeq_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_cmpeq_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
     unsafe { simd_bitmask::<u8x32, _>(simd_eq(a.as_u8x32(), b.as_u8x32())) }
 }
 
@@ -3689,7 +3967,8 @@ pub fn _mm256_cmpeq_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm256_mask_cmpeq_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_cmpeq_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
     _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_EQ>(k1, a, b)
 }
 
@@ -3700,7 +3979,8 @@ pub fn _mm256_mask_cmpeq_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __m
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm_cmpeq_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cmpeq_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
     unsafe { simd_bitmask::<u8x16, _>(simd_eq(a.as_u8x16(), b.as_u8x16())) }
 }
 
@@ -3711,7 +3991,8 @@ pub fn _mm_cmpeq_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm_mask_cmpeq_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_cmpeq_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
     _mm_mask_cmp_epu8_mask::<_MM_CMPINT_EQ>(k1, a, b)
 }
 
@@ -3722,7 +4003,8 @@ pub fn _mm_mask_cmpeq_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmas
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm512_cmpeq_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_cmpeq_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
     unsafe { simd_bitmask::<i16x32, _>(simd_eq(a.as_i16x32(), b.as_i16x32())) }
 }
 
@@ -3733,7 +4015,8 @@ pub fn _mm512_cmpeq_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm512_mask_cmpeq_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_cmpeq_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
     _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_EQ>(k1, a, b)
 }
 
@@ -3744,7 +4027,8 @@ pub fn _mm512_mask_cmpeq_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm256_cmpeq_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_cmpeq_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
     unsafe { simd_bitmask::<i16x16, _>(simd_eq(a.as_i16x16(), b.as_i16x16())) }
 }
 
@@ -3755,7 +4039,8 @@ pub fn _mm256_cmpeq_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm256_mask_cmpeq_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_cmpeq_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
     _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_EQ>(k1, a, b)
 }
 
@@ -3766,7 +4051,8 @@ pub fn _mm256_mask_cmpeq_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm_cmpeq_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cmpeq_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
     unsafe { simd_bitmask::<i16x8, _>(simd_eq(a.as_i16x8(), b.as_i16x8())) }
 }
 
@@ -3777,7 +4063,8 @@ pub fn _mm_cmpeq_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm_mask_cmpeq_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_cmpeq_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
     _mm_mask_cmp_epi16_mask::<_MM_CMPINT_EQ>(k1, a, b)
 }
 
@@ -3788,7 +4075,8 @@ pub fn _mm_mask_cmpeq_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmas
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm512_cmpeq_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_cmpeq_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
     unsafe { simd_bitmask::<i8x64, _>(simd_eq(a.as_i8x64(), b.as_i8x64())) }
 }
 
@@ -3799,7 +4087,8 @@ pub fn _mm512_cmpeq_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm512_mask_cmpeq_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_cmpeq_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
     _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_EQ>(k1, a, b)
 }
 
@@ -3810,7 +4099,8 @@ pub fn _mm512_mask_cmpeq_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __m
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm256_cmpeq_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_cmpeq_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
     unsafe { simd_bitmask::<i8x32, _>(simd_eq(a.as_i8x32(), b.as_i8x32())) }
 }
 
@@ -3821,7 +4111,8 @@ pub fn _mm256_cmpeq_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm256_mask_cmpeq_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_cmpeq_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
     _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_EQ>(k1, a, b)
 }
 
@@ -3832,7 +4123,8 @@ pub fn _mm256_mask_cmpeq_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __m
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm_cmpeq_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cmpeq_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
     unsafe { simd_bitmask::<i8x16, _>(simd_eq(a.as_i8x16(), b.as_i8x16())) }
 }
 
@@ -3843,7 +4135,8 @@ pub fn _mm_cmpeq_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm_mask_cmpeq_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_cmpeq_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
     _mm_mask_cmp_epi8_mask::<_MM_CMPINT_EQ>(k1, a, b)
 }
 
@@ -3854,7 +4147,8 @@ pub fn _mm_mask_cmpeq_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmas
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm512_cmpneq_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_cmpneq_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
     unsafe { simd_bitmask::<u16x32, _>(simd_ne(a.as_u16x32(), b.as_u16x32())) }
 }
 
@@ -3865,7 +4159,8 @@ pub fn _mm512_cmpneq_epu16_mask(a: __m512i, b: __m512i) -> __mmask32 {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm512_mask_cmpneq_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_cmpneq_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
     _mm512_mask_cmp_epu16_mask::<_MM_CMPINT_NE>(k1, a, b)
 }
 
@@ -3876,7 +4171,8 @@ pub fn _mm512_mask_cmpneq_epu16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> _
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm256_cmpneq_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_cmpneq_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
     unsafe { simd_bitmask::<u16x16, _>(simd_ne(a.as_u16x16(), b.as_u16x16())) }
 }
 
@@ -3887,7 +4183,8 @@ pub fn _mm256_cmpneq_epu16_mask(a: __m256i, b: __m256i) -> __mmask16 {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm256_mask_cmpneq_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_cmpneq_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
     _mm256_mask_cmp_epu16_mask::<_MM_CMPINT_NE>(k1, a, b)
 }
 
@@ -3898,7 +4195,8 @@ pub fn _mm256_mask_cmpneq_epu16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> _
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm_cmpneq_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cmpneq_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
     unsafe { simd_bitmask::<u16x8, _>(simd_ne(a.as_u16x8(), b.as_u16x8())) }
 }
 
@@ -3909,7 +4207,8 @@ pub fn _mm_cmpneq_epu16_mask(a: __m128i, b: __m128i) -> __mmask8 {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm_mask_cmpneq_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_cmpneq_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
     _mm_mask_cmp_epu16_mask::<_MM_CMPINT_NE>(k1, a, b)
 }
 
@@ -3920,7 +4219,8 @@ pub fn _mm_mask_cmpneq_epu16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mma
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm512_cmpneq_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_cmpneq_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
     unsafe { simd_bitmask::<u8x64, _>(simd_ne(a.as_u8x64(), b.as_u8x64())) }
 }
 
@@ -3931,7 +4231,8 @@ pub fn _mm512_cmpneq_epu8_mask(a: __m512i, b: __m512i) -> __mmask64 {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm512_mask_cmpneq_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_cmpneq_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
     _mm512_mask_cmp_epu8_mask::<_MM_CMPINT_NE>(k1, a, b)
 }
 
@@ -3942,7 +4243,8 @@ pub fn _mm512_mask_cmpneq_epu8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm256_cmpneq_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_cmpneq_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
     unsafe { simd_bitmask::<u8x32, _>(simd_ne(a.as_u8x32(), b.as_u8x32())) }
 }
 
@@ -3953,7 +4255,8 @@ pub fn _mm256_cmpneq_epu8_mask(a: __m256i, b: __m256i) -> __mmask32 {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm256_mask_cmpneq_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_cmpneq_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
     _mm256_mask_cmp_epu8_mask::<_MM_CMPINT_NE>(k1, a, b)
 }
 
@@ -3964,7 +4267,8 @@ pub fn _mm256_mask_cmpneq_epu8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm_cmpneq_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cmpneq_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
     unsafe { simd_bitmask::<u8x16, _>(simd_ne(a.as_u8x16(), b.as_u8x16())) }
 }
 
@@ -3975,7 +4279,8 @@ pub fn _mm_cmpneq_epu8_mask(a: __m128i, b: __m128i) -> __mmask16 {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm_mask_cmpneq_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_cmpneq_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
     _mm_mask_cmp_epu8_mask::<_MM_CMPINT_NE>(k1, a, b)
 }
 
@@ -3986,7 +4291,8 @@ pub fn _mm_mask_cmpneq_epu8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mma
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm512_cmpneq_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_cmpneq_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
     unsafe { simd_bitmask::<i16x32, _>(simd_ne(a.as_i16x32(), b.as_i16x32())) }
 }
 
@@ -3997,7 +4303,8 @@ pub fn _mm512_cmpneq_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm512_mask_cmpneq_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_cmpneq_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
     _mm512_mask_cmp_epi16_mask::<_MM_CMPINT_NE>(k1, a, b)
 }
 
@@ -4008,7 +4315,8 @@ pub fn _mm512_mask_cmpneq_epi16_mask(k1: __mmask32, a: __m512i, b: __m512i) -> _
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm256_cmpneq_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_cmpneq_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
     unsafe { simd_bitmask::<i16x16, _>(simd_ne(a.as_i16x16(), b.as_i16x16())) }
 }
 
@@ -4019,7 +4327,8 @@ pub fn _mm256_cmpneq_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm256_mask_cmpneq_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_cmpneq_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
     _mm256_mask_cmp_epi16_mask::<_MM_CMPINT_NE>(k1, a, b)
 }
 
@@ -4030,7 +4339,8 @@ pub fn _mm256_mask_cmpneq_epi16_mask(k1: __mmask16, a: __m256i, b: __m256i) -> _
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm_cmpneq_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cmpneq_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
     unsafe { simd_bitmask::<i16x8, _>(simd_ne(a.as_i16x8(), b.as_i16x8())) }
 }
 
@@ -4041,7 +4351,8 @@ pub fn _mm_cmpneq_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm_mask_cmpneq_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_cmpneq_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
     _mm_mask_cmp_epi16_mask::<_MM_CMPINT_NE>(k1, a, b)
 }
 
@@ -4052,7 +4363,8 @@ pub fn _mm_mask_cmpneq_epi16_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mma
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm512_cmpneq_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_cmpneq_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
     unsafe { simd_bitmask::<i8x64, _>(simd_ne(a.as_i8x64(), b.as_i8x64())) }
 }
 
@@ -4063,7 +4375,8 @@ pub fn _mm512_cmpneq_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm512_mask_cmpneq_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_cmpneq_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
     _mm512_mask_cmp_epi8_mask::<_MM_CMPINT_NE>(k1, a, b)
 }
 
@@ -4074,7 +4387,8 @@ pub fn _mm512_mask_cmpneq_epi8_mask(k1: __mmask64, a: __m512i, b: __m512i) -> __
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm256_cmpneq_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_cmpneq_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
     unsafe { simd_bitmask::<i8x32, _>(simd_ne(a.as_i8x32(), b.as_i8x32())) }
 }
 
@@ -4085,7 +4399,8 @@ pub fn _mm256_cmpneq_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm256_mask_cmpneq_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_cmpneq_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
     _mm256_mask_cmp_epi8_mask::<_MM_CMPINT_NE>(k1, a, b)
 }
 
@@ -4096,7 +4411,8 @@ pub fn _mm256_mask_cmpneq_epi8_mask(k1: __mmask32, a: __m256i, b: __m256i) -> __
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm_cmpneq_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cmpneq_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
     unsafe { simd_bitmask::<i8x16, _>(simd_ne(a.as_i8x16(), b.as_i8x16())) }
 }
 
@@ -4107,7 +4423,8 @@ pub fn _mm_cmpneq_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))]
-pub fn _mm_mask_cmpneq_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_cmpneq_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
     _mm_mask_cmp_epi8_mask::<_MM_CMPINT_NE>(k1, a, b)
 }
 
@@ -4119,7 +4436,8 @@ pub fn _mm_mask_cmpneq_epi8_mask(k1: __mmask16, a: __m128i, b: __m128i) -> __mma
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
-pub fn _mm512_cmp_epu16_mask<const IMM8: i32>(a: __m512i, b: __m512i) -> __mmask32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_cmp_epu16_mask<const IMM8: i32>(a: __m512i, b: __m512i) -> __mmask32 {
     unsafe {
         static_assert_uimm_bits!(IMM8, 3);
         let a = a.as_u16x32();
@@ -4146,7 +4464,8 @@ pub fn _mm512_cmp_epu16_mask<const IMM8: i32>(a: __m512i, b: __m512i) -> __mmask
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
-pub fn _mm512_mask_cmp_epu16_mask<const IMM8: i32>(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_cmp_epu16_mask<const IMM8: i32>(
     k1: __mmask32,
     a: __m512i,
     b: __m512i,
@@ -4178,7 +4497,8 @@ pub fn _mm512_mask_cmp_epu16_mask<const IMM8: i32>(
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
-pub fn _mm256_cmp_epu16_mask<const IMM8: i32>(a: __m256i, b: __m256i) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_cmp_epu16_mask<const IMM8: i32>(a: __m256i, b: __m256i) -> __mmask16 {
     unsafe {
         static_assert_uimm_bits!(IMM8, 3);
         let a = a.as_u16x16();
@@ -4205,7 +4525,8 @@ pub fn _mm256_cmp_epu16_mask<const IMM8: i32>(a: __m256i, b: __m256i) -> __mmask
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
-pub fn _mm256_mask_cmp_epu16_mask<const IMM8: i32>(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_cmp_epu16_mask<const IMM8: i32>(
     k1: __mmask16,
     a: __m256i,
     b: __m256i,
@@ -4237,7 +4558,8 @@ pub fn _mm256_mask_cmp_epu16_mask<const IMM8: i32>(
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
-pub fn _mm_cmp_epu16_mask<const IMM8: i32>(a: __m128i, b: __m128i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cmp_epu16_mask<const IMM8: i32>(a: __m128i, b: __m128i) -> __mmask8 {
     unsafe {
         static_assert_uimm_bits!(IMM8, 3);
         let a = a.as_u16x8();
@@ -4264,7 +4586,12 @@ pub fn _mm_cmp_epu16_mask<const IMM8: i32>(a: __m128i, b: __m128i) -> __mmask8 {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
-pub fn _mm_mask_cmp_epu16_mask<const IMM8: i32>(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_cmp_epu16_mask<const IMM8: i32>(
+    k1: __mmask8,
+    a: __m128i,
+    b: __m128i,
+) -> __mmask8 {
     unsafe {
         static_assert_uimm_bits!(IMM8, 3);
         let a = a.as_u16x8();
@@ -4292,7 +4619,8 @@ pub fn _mm_mask_cmp_epu16_mask<const IMM8: i32>(k1: __mmask8, a: __m128i, b: __m
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
-pub fn _mm512_cmp_epu8_mask<const IMM8: i32>(a: __m512i, b: __m512i) -> __mmask64 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_cmp_epu8_mask<const IMM8: i32>(a: __m512i, b: __m512i) -> __mmask64 {
     unsafe {
         static_assert_uimm_bits!(IMM8, 3);
         let a = a.as_u8x64();
@@ -4319,7 +4647,8 @@ pub fn _mm512_cmp_epu8_mask<const IMM8: i32>(a: __m512i, b: __m512i) -> __mmask6
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
-pub fn _mm512_mask_cmp_epu8_mask<const IMM8: i32>(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_cmp_epu8_mask<const IMM8: i32>(
     k1: __mmask64,
     a: __m512i,
     b: __m512i,
@@ -4351,7 +4680,8 @@ pub fn _mm512_mask_cmp_epu8_mask<const IMM8: i32>(
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
-pub fn _mm256_cmp_epu8_mask<const IMM8: i32>(a: __m256i, b: __m256i) -> __mmask32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_cmp_epu8_mask<const IMM8: i32>(a: __m256i, b: __m256i) -> __mmask32 {
     unsafe {
         static_assert_uimm_bits!(IMM8, 3);
         let a = a.as_u8x32();
@@ -4378,7 +4708,8 @@ pub fn _mm256_cmp_epu8_mask<const IMM8: i32>(a: __m256i, b: __m256i) -> __mmask3
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
-pub fn _mm256_mask_cmp_epu8_mask<const IMM8: i32>(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_cmp_epu8_mask<const IMM8: i32>(
     k1: __mmask32,
     a: __m256i,
     b: __m256i,
@@ -4410,7 +4741,8 @@ pub fn _mm256_mask_cmp_epu8_mask<const IMM8: i32>(
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
-pub fn _mm_cmp_epu8_mask<const IMM8: i32>(a: __m128i, b: __m128i) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cmp_epu8_mask<const IMM8: i32>(a: __m128i, b: __m128i) -> __mmask16 {
     unsafe {
         static_assert_uimm_bits!(IMM8, 3);
         let a = a.as_u8x16();
@@ -4437,7 +4769,12 @@ pub fn _mm_cmp_epu8_mask<const IMM8: i32>(a: __m128i, b: __m128i) -> __mmask16 {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
-pub fn _mm_mask_cmp_epu8_mask<const IMM8: i32>(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_cmp_epu8_mask<const IMM8: i32>(
+    k1: __mmask16,
+    a: __m128i,
+    b: __m128i,
+) -> __mmask16 {
     unsafe {
         static_assert_uimm_bits!(IMM8, 3);
         let a = a.as_u8x16();
@@ -4465,7 +4802,8 @@ pub fn _mm_mask_cmp_epu8_mask<const IMM8: i32>(k1: __mmask16, a: __m128i, b: __m
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
-pub fn _mm512_cmp_epi16_mask<const IMM8: i32>(a: __m512i, b: __m512i) -> __mmask32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_cmp_epi16_mask<const IMM8: i32>(a: __m512i, b: __m512i) -> __mmask32 {
     unsafe {
         static_assert_uimm_bits!(IMM8, 3);
         let a = a.as_i16x32();
@@ -4492,7 +4830,8 @@ pub fn _mm512_cmp_epi16_mask<const IMM8: i32>(a: __m512i, b: __m512i) -> __mmask
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
-pub fn _mm512_mask_cmp_epi16_mask<const IMM8: i32>(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_cmp_epi16_mask<const IMM8: i32>(
     k1: __mmask32,
     a: __m512i,
     b: __m512i,
@@ -4524,7 +4863,8 @@ pub fn _mm512_mask_cmp_epi16_mask<const IMM8: i32>(
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
-pub fn _mm256_cmp_epi16_mask<const IMM8: i32>(a: __m256i, b: __m256i) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_cmp_epi16_mask<const IMM8: i32>(a: __m256i, b: __m256i) -> __mmask16 {
     unsafe {
         static_assert_uimm_bits!(IMM8, 3);
         let a = a.as_i16x16();
@@ -4551,7 +4891,8 @@ pub fn _mm256_cmp_epi16_mask<const IMM8: i32>(a: __m256i, b: __m256i) -> __mmask
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
-pub fn _mm256_mask_cmp_epi16_mask<const IMM8: i32>(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_cmp_epi16_mask<const IMM8: i32>(
     k1: __mmask16,
     a: __m256i,
     b: __m256i,
@@ -4583,7 +4924,8 @@ pub fn _mm256_mask_cmp_epi16_mask<const IMM8: i32>(
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
-pub fn _mm_cmp_epi16_mask<const IMM8: i32>(a: __m128i, b: __m128i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cmp_epi16_mask<const IMM8: i32>(a: __m128i, b: __m128i) -> __mmask8 {
     unsafe {
         static_assert_uimm_bits!(IMM8, 3);
         let a = a.as_i16x8();
@@ -4610,7 +4952,12 @@ pub fn _mm_cmp_epi16_mask<const IMM8: i32>(a: __m128i, b: __m128i) -> __mmask8 {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
-pub fn _mm_mask_cmp_epi16_mask<const IMM8: i32>(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_cmp_epi16_mask<const IMM8: i32>(
+    k1: __mmask8,
+    a: __m128i,
+    b: __m128i,
+) -> __mmask8 {
     unsafe {
         static_assert_uimm_bits!(IMM8, 3);
         let a = a.as_i16x8();
@@ -4638,7 +4985,8 @@ pub fn _mm_mask_cmp_epi16_mask<const IMM8: i32>(k1: __mmask8, a: __m128i, b: __m
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
-pub fn _mm512_cmp_epi8_mask<const IMM8: i32>(a: __m512i, b: __m512i) -> __mmask64 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_cmp_epi8_mask<const IMM8: i32>(a: __m512i, b: __m512i) -> __mmask64 {
     unsafe {
         static_assert_uimm_bits!(IMM8, 3);
         let a = a.as_i8x64();
@@ -4665,7 +5013,8 @@ pub fn _mm512_cmp_epi8_mask<const IMM8: i32>(a: __m512i, b: __m512i) -> __mmask6
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
-pub fn _mm512_mask_cmp_epi8_mask<const IMM8: i32>(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_cmp_epi8_mask<const IMM8: i32>(
     k1: __mmask64,
     a: __m512i,
     b: __m512i,
@@ -4697,7 +5046,8 @@ pub fn _mm512_mask_cmp_epi8_mask<const IMM8: i32>(
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
-pub fn _mm256_cmp_epi8_mask<const IMM8: i32>(a: __m256i, b: __m256i) -> __mmask32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_cmp_epi8_mask<const IMM8: i32>(a: __m256i, b: __m256i) -> __mmask32 {
     unsafe {
         static_assert_uimm_bits!(IMM8, 3);
         let a = a.as_i8x32();
@@ -4724,7 +5074,8 @@ pub fn _mm256_cmp_epi8_mask<const IMM8: i32>(a: __m256i, b: __m256i) -> __mmask3
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
-pub fn _mm256_mask_cmp_epi8_mask<const IMM8: i32>(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_cmp_epi8_mask<const IMM8: i32>(
     k1: __mmask32,
     a: __m256i,
     b: __m256i,
@@ -4756,7 +5107,8 @@ pub fn _mm256_mask_cmp_epi8_mask<const IMM8: i32>(
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
-pub fn _mm_cmp_epi8_mask<const IMM8: i32>(a: __m128i, b: __m128i) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cmp_epi8_mask<const IMM8: i32>(a: __m128i, b: __m128i) -> __mmask16 {
     unsafe {
         static_assert_uimm_bits!(IMM8, 3);
         let a = a.as_i8x16();
@@ -4783,7 +5135,12 @@ pub fn _mm_cmp_epi8_mask<const IMM8: i32>(a: __m128i, b: __m128i) -> __mmask16 {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(test, assert_instr(vpcmp, IMM8 = 0))]
-pub fn _mm_mask_cmp_epi8_mask<const IMM8: i32>(k1: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_cmp_epi8_mask<const IMM8: i32>(
+    k1: __mmask16,
+    a: __m128i,
+    b: __m128i,
+) -> __mmask16 {
     unsafe {
         static_assert_uimm_bits!(IMM8, 3);
         let a = a.as_i8x16();
@@ -4809,8 +5166,9 @@ pub fn _mm_mask_cmp_epi8_mask<const IMM8: i32>(k1: __mmask16, a: __m128i, b: __m
 #[inline]
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm256_reduce_add_epi16(a: __m256i) -> i16 {
-    unsafe { simd_reduce_add_unordered(a.as_i16x16()) }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_reduce_add_epi16(a: __m256i) -> i16 {
+    unsafe { simd_reduce_add_ordered(a.as_i16x16(), 0) }
 }
 
 /// Reduce the packed 16-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
@@ -4819,8 +5177,9 @@ pub fn _mm256_reduce_add_epi16(a: __m256i) -> i16 {
 #[inline]
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm256_mask_reduce_add_epi16(k: __mmask16, a: __m256i) -> i16 {
-    unsafe { simd_reduce_add_unordered(simd_select_bitmask(k, a.as_i16x16(), i16x16::ZERO)) }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_reduce_add_epi16(k: __mmask16, a: __m256i) -> i16 {
+    unsafe { simd_reduce_add_ordered(simd_select_bitmask(k, a.as_i16x16(), i16x16::ZERO), 0) }
 }
 
 /// Reduce the packed 16-bit integers in a by addition. Returns the sum of all elements in a.
@@ -4829,8 +5188,9 @@ pub fn _mm256_mask_reduce_add_epi16(k: __mmask16, a: __m256i) -> i16 {
 #[inline]
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm_reduce_add_epi16(a: __m128i) -> i16 {
-    unsafe { simd_reduce_add_unordered(a.as_i16x8()) }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_reduce_add_epi16(a: __m128i) -> i16 {
+    unsafe { simd_reduce_add_ordered(a.as_i16x8(), 0) }
 }
 
 /// Reduce the packed 16-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
@@ -4839,8 +5199,9 @@ pub fn _mm_reduce_add_epi16(a: __m128i) -> i16 {
 #[inline]
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm_mask_reduce_add_epi16(k: __mmask8, a: __m128i) -> i16 {
-    unsafe { simd_reduce_add_unordered(simd_select_bitmask(k, a.as_i16x8(), i16x8::ZERO)) }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_reduce_add_epi16(k: __mmask8, a: __m128i) -> i16 {
+    unsafe { simd_reduce_add_ordered(simd_select_bitmask(k, a.as_i16x8(), i16x8::ZERO), 0) }
 }
 
 /// Reduce the packed 8-bit integers in a by addition. Returns the sum of all elements in a.
@@ -4849,8 +5210,9 @@ pub fn _mm_mask_reduce_add_epi16(k: __mmask8, a: __m128i) -> i16 {
 #[inline]
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm256_reduce_add_epi8(a: __m256i) -> i8 {
-    unsafe { simd_reduce_add_unordered(a.as_i8x32()) }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_reduce_add_epi8(a: __m256i) -> i8 {
+    unsafe { simd_reduce_add_ordered(a.as_i8x32(), 0) }
 }
 
 /// Reduce the packed 8-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
@@ -4859,8 +5221,9 @@ pub fn _mm256_reduce_add_epi8(a: __m256i) -> i8 {
 #[inline]
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm256_mask_reduce_add_epi8(k: __mmask32, a: __m256i) -> i8 {
-    unsafe { simd_reduce_add_unordered(simd_select_bitmask(k, a.as_i8x32(), i8x32::ZERO)) }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_reduce_add_epi8(k: __mmask32, a: __m256i) -> i8 {
+    unsafe { simd_reduce_add_ordered(simd_select_bitmask(k, a.as_i8x32(), i8x32::ZERO), 0) }
 }
 
 /// Reduce the packed 8-bit integers in a by addition. Returns the sum of all elements in a.
@@ -4869,8 +5232,9 @@ pub fn _mm256_mask_reduce_add_epi8(k: __mmask32, a: __m256i) -> i8 {
 #[inline]
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm_reduce_add_epi8(a: __m128i) -> i8 {
-    unsafe { simd_reduce_add_unordered(a.as_i8x16()) }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_reduce_add_epi8(a: __m128i) -> i8 {
+    unsafe { simd_reduce_add_ordered(a.as_i8x16(), 0) }
 }
 
 /// Reduce the packed 8-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
@@ -4879,8 +5243,9 @@ pub fn _mm_reduce_add_epi8(a: __m128i) -> i8 {
 #[inline]
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm_mask_reduce_add_epi8(k: __mmask16, a: __m128i) -> i8 {
-    unsafe { simd_reduce_add_unordered(simd_select_bitmask(k, a.as_i8x16(), i8x16::ZERO)) }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_reduce_add_epi8(k: __mmask16, a: __m128i) -> i8 {
+    unsafe { simd_reduce_add_ordered(simd_select_bitmask(k, a.as_i8x16(), i8x16::ZERO), 0) }
 }
 
 /// Reduce the packed 16-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a.
@@ -4889,7 +5254,8 @@ pub fn _mm_mask_reduce_add_epi8(k: __mmask16, a: __m128i) -> i8 {
 #[inline]
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm256_reduce_and_epi16(a: __m256i) -> i16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_reduce_and_epi16(a: __m256i) -> i16 {
     unsafe { simd_reduce_and(a.as_i16x16()) }
 }
 
@@ -4899,7 +5265,8 @@ pub fn _mm256_reduce_and_epi16(a: __m256i) -> i16 {
 #[inline]
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm256_mask_reduce_and_epi16(k: __mmask16, a: __m256i) -> i16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_reduce_and_epi16(k: __mmask16, a: __m256i) -> i16 {
     unsafe {
         simd_reduce_and(simd_select_bitmask(
             k,
@@ -4915,7 +5282,8 @@ pub fn _mm256_mask_reduce_and_epi16(k: __mmask16, a: __m256i) -> i16 {
 #[inline]
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm_reduce_and_epi16(a: __m128i) -> i16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_reduce_and_epi16(a: __m128i) -> i16 {
     unsafe { simd_reduce_and(a.as_i16x8()) }
 }
 
@@ -4925,7 +5293,8 @@ pub fn _mm_reduce_and_epi16(a: __m128i) -> i16 {
 #[inline]
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm_mask_reduce_and_epi16(k: __mmask8, a: __m128i) -> i16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_reduce_and_epi16(k: __mmask8, a: __m128i) -> i16 {
     unsafe {
         simd_reduce_and(simd_select_bitmask(
             k,
@@ -4941,7 +5310,8 @@ pub fn _mm_mask_reduce_and_epi16(k: __mmask8, a: __m128i) -> i16 {
 #[inline]
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm256_reduce_and_epi8(a: __m256i) -> i8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_reduce_and_epi8(a: __m256i) -> i8 {
     unsafe { simd_reduce_and(a.as_i8x32()) }
 }
 
@@ -4951,7 +5321,8 @@ pub fn _mm256_reduce_and_epi8(a: __m256i) -> i8 {
 #[inline]
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm256_mask_reduce_and_epi8(k: __mmask32, a: __m256i) -> i8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_reduce_and_epi8(k: __mmask32, a: __m256i) -> i8 {
     unsafe {
         simd_reduce_and(simd_select_bitmask(
             k,
@@ -4967,7 +5338,8 @@ pub fn _mm256_mask_reduce_and_epi8(k: __mmask32, a: __m256i) -> i8 {
 #[inline]
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm_reduce_and_epi8(a: __m128i) -> i8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_reduce_and_epi8(a: __m128i) -> i8 {
     unsafe { simd_reduce_and(a.as_i8x16()) }
 }
 
@@ -4977,7 +5349,8 @@ pub fn _mm_reduce_and_epi8(a: __m128i) -> i8 {
 #[inline]
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm_mask_reduce_and_epi8(k: __mmask16, a: __m128i) -> i8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_reduce_and_epi8(k: __mmask16, a: __m128i) -> i8 {
     unsafe {
         simd_reduce_and(simd_select_bitmask(
             k,
@@ -4993,7 +5366,8 @@ pub fn _mm_mask_reduce_and_epi8(k: __mmask16, a: __m128i) -> i8 {
 #[inline]
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm256_reduce_max_epi16(a: __m256i) -> i16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_reduce_max_epi16(a: __m256i) -> i16 {
     unsafe { simd_reduce_max(a.as_i16x16()) }
 }
 
@@ -5003,7 +5377,8 @@ pub fn _mm256_reduce_max_epi16(a: __m256i) -> i16 {
 #[inline]
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm256_mask_reduce_max_epi16(k: __mmask16, a: __m256i) -> i16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_reduce_max_epi16(k: __mmask16, a: __m256i) -> i16 {
     unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_i16x16(), i16x16::splat(-32768))) }
 }
 
@@ -5013,7 +5388,8 @@ pub fn _mm256_mask_reduce_max_epi16(k: __mmask16, a: __m256i) -> i16 {
 #[inline]
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm_reduce_max_epi16(a: __m128i) -> i16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_reduce_max_epi16(a: __m128i) -> i16 {
     unsafe { simd_reduce_max(a.as_i16x8()) }
 }
 
@@ -5023,7 +5399,8 @@ pub fn _mm_reduce_max_epi16(a: __m128i) -> i16 {
 #[inline]
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm_mask_reduce_max_epi16(k: __mmask8, a: __m128i) -> i16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_reduce_max_epi16(k: __mmask8, a: __m128i) -> i16 {
     unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_i16x8(), i16x8::splat(-32768))) }
 }
 
@@ -5033,7 +5410,8 @@ pub fn _mm_mask_reduce_max_epi16(k: __mmask8, a: __m128i) -> i16 {
 #[inline]
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm256_reduce_max_epi8(a: __m256i) -> i8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_reduce_max_epi8(a: __m256i) -> i8 {
     unsafe { simd_reduce_max(a.as_i8x32()) }
 }
 
@@ -5043,7 +5421,8 @@ pub fn _mm256_reduce_max_epi8(a: __m256i) -> i8 {
 #[inline]
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm256_mask_reduce_max_epi8(k: __mmask32, a: __m256i) -> i8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_reduce_max_epi8(k: __mmask32, a: __m256i) -> i8 {
     unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_i8x32(), i8x32::splat(-128))) }
 }
 
@@ -5053,7 +5432,8 @@ pub fn _mm256_mask_reduce_max_epi8(k: __mmask32, a: __m256i) -> i8 {
 #[inline]
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm_reduce_max_epi8(a: __m128i) -> i8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_reduce_max_epi8(a: __m128i) -> i8 {
     unsafe { simd_reduce_max(a.as_i8x16()) }
 }
 
@@ -5063,7 +5443,8 @@ pub fn _mm_reduce_max_epi8(a: __m128i) -> i8 {
 #[inline]
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm_mask_reduce_max_epi8(k: __mmask16, a: __m128i) -> i8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_reduce_max_epi8(k: __mmask16, a: __m128i) -> i8 {
     unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_i8x16(), i8x16::splat(-128))) }
 }
 
@@ -5073,7 +5454,8 @@ pub fn _mm_mask_reduce_max_epi8(k: __mmask16, a: __m128i) -> i8 {
 #[inline]
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm256_reduce_max_epu16(a: __m256i) -> u16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_reduce_max_epu16(a: __m256i) -> u16 {
     unsafe { simd_reduce_max(a.as_u16x16()) }
 }
 
@@ -5083,7 +5465,8 @@ pub fn _mm256_reduce_max_epu16(a: __m256i) -> u16 {
 #[inline]
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm256_mask_reduce_max_epu16(k: __mmask16, a: __m256i) -> u16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_reduce_max_epu16(k: __mmask16, a: __m256i) -> u16 {
     unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_u16x16(), u16x16::ZERO)) }
 }
 
@@ -5093,7 +5476,8 @@ pub fn _mm256_mask_reduce_max_epu16(k: __mmask16, a: __m256i) -> u16 {
 #[inline]
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm_reduce_max_epu16(a: __m128i) -> u16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_reduce_max_epu16(a: __m128i) -> u16 {
     unsafe { simd_reduce_max(a.as_u16x8()) }
 }
 
@@ -5103,7 +5487,8 @@ pub fn _mm_reduce_max_epu16(a: __m128i) -> u16 {
 #[inline]
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm_mask_reduce_max_epu16(k: __mmask8, a: __m128i) -> u16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_reduce_max_epu16(k: __mmask8, a: __m128i) -> u16 {
     unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_u16x8(), u16x8::ZERO)) }
 }
 
@@ -5113,7 +5498,8 @@ pub fn _mm_mask_reduce_max_epu16(k: __mmask8, a: __m128i) -> u16 {
 #[inline]
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm256_reduce_max_epu8(a: __m256i) -> u8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_reduce_max_epu8(a: __m256i) -> u8 {
     unsafe { simd_reduce_max(a.as_u8x32()) }
 }
 
@@ -5123,7 +5509,8 @@ pub fn _mm256_reduce_max_epu8(a: __m256i) -> u8 {
 #[inline]
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm256_mask_reduce_max_epu8(k: __mmask32, a: __m256i) -> u8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_reduce_max_epu8(k: __mmask32, a: __m256i) -> u8 {
     unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_u8x32(), u8x32::ZERO)) }
 }
 
@@ -5133,7 +5520,8 @@ pub fn _mm256_mask_reduce_max_epu8(k: __mmask32, a: __m256i) -> u8 {
 #[inline]
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm_reduce_max_epu8(a: __m128i) -> u8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_reduce_max_epu8(a: __m128i) -> u8 {
     unsafe { simd_reduce_max(a.as_u8x16()) }
 }
 
@@ -5143,7 +5531,8 @@ pub fn _mm_reduce_max_epu8(a: __m128i) -> u8 {
 #[inline]
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm_mask_reduce_max_epu8(k: __mmask16, a: __m128i) -> u8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_reduce_max_epu8(k: __mmask16, a: __m128i) -> u8 {
     unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_u8x16(), u8x16::ZERO)) }
 }
 
@@ -5153,7 +5542,8 @@ pub fn _mm_mask_reduce_max_epu8(k: __mmask16, a: __m128i) -> u8 {
 #[inline]
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm256_reduce_min_epi16(a: __m256i) -> i16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_reduce_min_epi16(a: __m256i) -> i16 {
     unsafe { simd_reduce_min(a.as_i16x16()) }
 }
 
@@ -5163,7 +5553,8 @@ pub fn _mm256_reduce_min_epi16(a: __m256i) -> i16 {
 #[inline]
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm256_mask_reduce_min_epi16(k: __mmask16, a: __m256i) -> i16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_reduce_min_epi16(k: __mmask16, a: __m256i) -> i16 {
     unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_i16x16(), i16x16::splat(0x7fff))) }
 }
 
@@ -5173,7 +5564,8 @@ pub fn _mm256_mask_reduce_min_epi16(k: __mmask16, a: __m256i) -> i16 {
 #[inline]
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm_reduce_min_epi16(a: __m128i) -> i16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_reduce_min_epi16(a: __m128i) -> i16 {
     unsafe { simd_reduce_min(a.as_i16x8()) }
 }
 
@@ -5183,7 +5575,8 @@ pub fn _mm_reduce_min_epi16(a: __m128i) -> i16 {
 #[inline]
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm_mask_reduce_min_epi16(k: __mmask8, a: __m128i) -> i16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_reduce_min_epi16(k: __mmask8, a: __m128i) -> i16 {
     unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_i16x8(), i16x8::splat(0x7fff))) }
 }
 
@@ -5193,7 +5586,8 @@ pub fn _mm_mask_reduce_min_epi16(k: __mmask8, a: __m128i) -> i16 {
 #[inline]
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm256_reduce_min_epi8(a: __m256i) -> i8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_reduce_min_epi8(a: __m256i) -> i8 {
     unsafe { simd_reduce_min(a.as_i8x32()) }
 }
 
@@ -5203,7 +5597,8 @@ pub fn _mm256_reduce_min_epi8(a: __m256i) -> i8 {
 #[inline]
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm256_mask_reduce_min_epi8(k: __mmask32, a: __m256i) -> i8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_reduce_min_epi8(k: __mmask32, a: __m256i) -> i8 {
     unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_i8x32(), i8x32::splat(0x7f))) }
 }
 
@@ -5213,7 +5608,8 @@ pub fn _mm256_mask_reduce_min_epi8(k: __mmask32, a: __m256i) -> i8 {
 #[inline]
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm_reduce_min_epi8(a: __m128i) -> i8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_reduce_min_epi8(a: __m128i) -> i8 {
     unsafe { simd_reduce_min(a.as_i8x16()) }
 }
 
@@ -5223,7 +5619,8 @@ pub fn _mm_reduce_min_epi8(a: __m128i) -> i8 {
 #[inline]
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm_mask_reduce_min_epi8(k: __mmask16, a: __m128i) -> i8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_reduce_min_epi8(k: __mmask16, a: __m128i) -> i8 {
     unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_i8x16(), i8x16::splat(0x7f))) }
 }
 
@@ -5233,7 +5630,8 @@ pub fn _mm_mask_reduce_min_epi8(k: __mmask16, a: __m128i) -> i8 {
 #[inline]
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm256_reduce_min_epu16(a: __m256i) -> u16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_reduce_min_epu16(a: __m256i) -> u16 {
     unsafe { simd_reduce_min(a.as_u16x16()) }
 }
 
@@ -5243,7 +5641,8 @@ pub fn _mm256_reduce_min_epu16(a: __m256i) -> u16 {
 #[inline]
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm256_mask_reduce_min_epu16(k: __mmask16, a: __m256i) -> u16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_reduce_min_epu16(k: __mmask16, a: __m256i) -> u16 {
     unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_u16x16(), u16x16::splat(0xffff))) }
 }
 
@@ -5253,7 +5652,8 @@ pub fn _mm256_mask_reduce_min_epu16(k: __mmask16, a: __m256i) -> u16 {
 #[inline]
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm_reduce_min_epu16(a: __m128i) -> u16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_reduce_min_epu16(a: __m128i) -> u16 {
     unsafe { simd_reduce_min(a.as_u16x8()) }
 }
 
@@ -5263,7 +5663,8 @@ pub fn _mm_reduce_min_epu16(a: __m128i) -> u16 {
 #[inline]
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm_mask_reduce_min_epu16(k: __mmask8, a: __m128i) -> u16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_reduce_min_epu16(k: __mmask8, a: __m128i) -> u16 {
     unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_u16x8(), u16x8::splat(0xffff))) }
 }
 
@@ -5273,7 +5674,8 @@ pub fn _mm_mask_reduce_min_epu16(k: __mmask8, a: __m128i) -> u16 {
 #[inline]
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm256_reduce_min_epu8(a: __m256i) -> u8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_reduce_min_epu8(a: __m256i) -> u8 {
     unsafe { simd_reduce_min(a.as_u8x32()) }
 }
 
@@ -5283,7 +5685,8 @@ pub fn _mm256_reduce_min_epu8(a: __m256i) -> u8 {
 #[inline]
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm256_mask_reduce_min_epu8(k: __mmask32, a: __m256i) -> u8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_reduce_min_epu8(k: __mmask32, a: __m256i) -> u8 {
     unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_u8x32(), u8x32::splat(0xff))) }
 }
 
@@ -5293,7 +5696,8 @@ pub fn _mm256_mask_reduce_min_epu8(k: __mmask32, a: __m256i) -> u8 {
 #[inline]
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm_reduce_min_epu8(a: __m128i) -> u8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_reduce_min_epu8(a: __m128i) -> u8 {
     unsafe { simd_reduce_min(a.as_u8x16()) }
 }
 
@@ -5303,7 +5707,8 @@ pub fn _mm_reduce_min_epu8(a: __m128i) -> u8 {
 #[inline]
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm_mask_reduce_min_epu8(k: __mmask16, a: __m128i) -> u8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_reduce_min_epu8(k: __mmask16, a: __m128i) -> u8 {
     unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_u8x16(), u8x16::splat(0xff))) }
 }
 
@@ -5313,8 +5718,9 @@ pub fn _mm_mask_reduce_min_epu8(k: __mmask16, a: __m128i) -> u8 {
 #[inline]
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm256_reduce_mul_epi16(a: __m256i) -> i16 {
-    unsafe { simd_reduce_mul_unordered(a.as_i16x16()) }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_reduce_mul_epi16(a: __m256i) -> i16 {
+    unsafe { simd_reduce_mul_ordered(a.as_i16x16(), 1) }
 }
 
 /// Reduce the packed 16-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
@@ -5323,8 +5729,9 @@ pub fn _mm256_reduce_mul_epi16(a: __m256i) -> i16 {
 #[inline]
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm256_mask_reduce_mul_epi16(k: __mmask16, a: __m256i) -> i16 {
-    unsafe { simd_reduce_mul_unordered(simd_select_bitmask(k, a.as_i16x16(), i16x16::splat(1))) }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_reduce_mul_epi16(k: __mmask16, a: __m256i) -> i16 {
+    unsafe { simd_reduce_mul_ordered(simd_select_bitmask(k, a.as_i16x16(), i16x16::splat(1)), 1) }
 }
 
 /// Reduce the packed 16-bit integers in a by multiplication. Returns the product of all elements in a.
@@ -5333,8 +5740,9 @@ pub fn _mm256_mask_reduce_mul_epi16(k: __mmask16, a: __m256i) -> i16 {
 #[inline]
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm_reduce_mul_epi16(a: __m128i) -> i16 {
-    unsafe { simd_reduce_mul_unordered(a.as_i16x8()) }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_reduce_mul_epi16(a: __m128i) -> i16 {
+    unsafe { simd_reduce_mul_ordered(a.as_i16x8(), 1) }
 }
 
 /// Reduce the packed 16-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
@@ -5343,8 +5751,9 @@ pub fn _mm_reduce_mul_epi16(a: __m128i) -> i16 {
 #[inline]
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm_mask_reduce_mul_epi16(k: __mmask8, a: __m128i) -> i16 {
-    unsafe { simd_reduce_mul_unordered(simd_select_bitmask(k, a.as_i16x8(), i16x8::splat(1))) }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_reduce_mul_epi16(k: __mmask8, a: __m128i) -> i16 {
+    unsafe { simd_reduce_mul_ordered(simd_select_bitmask(k, a.as_i16x8(), i16x8::splat(1)), 1) }
 }
 
 /// Reduce the packed 8-bit integers in a by multiplication. Returns the product of all elements in a.
@@ -5353,8 +5762,9 @@ pub fn _mm_mask_reduce_mul_epi16(k: __mmask8, a: __m128i) -> i16 {
 #[inline]
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm256_reduce_mul_epi8(a: __m256i) -> i8 {
-    unsafe { simd_reduce_mul_unordered(a.as_i8x32()) }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_reduce_mul_epi8(a: __m256i) -> i8 {
+    unsafe { simd_reduce_mul_ordered(a.as_i8x32(), 1) }
 }
 
 /// Reduce the packed 8-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
@@ -5363,8 +5773,9 @@ pub fn _mm256_reduce_mul_epi8(a: __m256i) -> i8 {
 #[inline]
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm256_mask_reduce_mul_epi8(k: __mmask32, a: __m256i) -> i8 {
-    unsafe { simd_reduce_mul_unordered(simd_select_bitmask(k, a.as_i8x32(), i8x32::splat(1))) }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_reduce_mul_epi8(k: __mmask32, a: __m256i) -> i8 {
+    unsafe { simd_reduce_mul_ordered(simd_select_bitmask(k, a.as_i8x32(), i8x32::splat(1)), 1) }
 }
 
 /// Reduce the packed 8-bit integers in a by multiplication. Returns the product of all elements in a.
@@ -5373,8 +5784,9 @@ pub fn _mm256_mask_reduce_mul_epi8(k: __mmask32, a: __m256i) -> i8 {
 #[inline]
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm_reduce_mul_epi8(a: __m128i) -> i8 {
-    unsafe { simd_reduce_mul_unordered(a.as_i8x16()) }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_reduce_mul_epi8(a: __m128i) -> i8 {
+    unsafe { simd_reduce_mul_ordered(a.as_i8x16(), 1) }
 }
 
 /// Reduce the packed 8-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
@@ -5383,8 +5795,9 @@ pub fn _mm_reduce_mul_epi8(a: __m128i) -> i8 {
 #[inline]
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm_mask_reduce_mul_epi8(k: __mmask16, a: __m128i) -> i8 {
-    unsafe { simd_reduce_mul_unordered(simd_select_bitmask(k, a.as_i8x16(), i8x16::splat(1))) }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_reduce_mul_epi8(k: __mmask16, a: __m128i) -> i8 {
+    unsafe { simd_reduce_mul_ordered(simd_select_bitmask(k, a.as_i8x16(), i8x16::splat(1)), 1) }
 }
 
 /// Reduce the packed 16-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a.
@@ -5393,7 +5806,8 @@ pub fn _mm_mask_reduce_mul_epi8(k: __mmask16, a: __m128i) -> i8 {
 #[inline]
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm256_reduce_or_epi16(a: __m256i) -> i16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_reduce_or_epi16(a: __m256i) -> i16 {
     unsafe { simd_reduce_or(a.as_i16x16()) }
 }
 
@@ -5403,7 +5817,8 @@ pub fn _mm256_reduce_or_epi16(a: __m256i) -> i16 {
 #[inline]
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm256_mask_reduce_or_epi16(k: __mmask16, a: __m256i) -> i16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_reduce_or_epi16(k: __mmask16, a: __m256i) -> i16 {
     unsafe { simd_reduce_or(simd_select_bitmask(k, a.as_i16x16(), i16x16::ZERO)) }
 }
 
@@ -5413,7 +5828,8 @@ pub fn _mm256_mask_reduce_or_epi16(k: __mmask16, a: __m256i) -> i16 {
 #[inline]
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm_reduce_or_epi16(a: __m128i) -> i16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_reduce_or_epi16(a: __m128i) -> i16 {
     unsafe { simd_reduce_or(a.as_i16x8()) }
 }
 
@@ -5423,7 +5839,8 @@ pub fn _mm_reduce_or_epi16(a: __m128i) -> i16 {
 #[inline]
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm_mask_reduce_or_epi16(k: __mmask8, a: __m128i) -> i16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_reduce_or_epi16(k: __mmask8, a: __m128i) -> i16 {
     unsafe { simd_reduce_or(simd_select_bitmask(k, a.as_i16x8(), i16x8::ZERO)) }
 }
 
@@ -5433,7 +5850,8 @@ pub fn _mm_mask_reduce_or_epi16(k: __mmask8, a: __m128i) -> i16 {
 #[inline]
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm256_reduce_or_epi8(a: __m256i) -> i8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_reduce_or_epi8(a: __m256i) -> i8 {
     unsafe { simd_reduce_or(a.as_i8x32()) }
 }
 
@@ -5443,7 +5861,8 @@ pub fn _mm256_reduce_or_epi8(a: __m256i) -> i8 {
 #[inline]
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm256_mask_reduce_or_epi8(k: __mmask32, a: __m256i) -> i8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_reduce_or_epi8(k: __mmask32, a: __m256i) -> i8 {
     unsafe { simd_reduce_or(simd_select_bitmask(k, a.as_i8x32(), i8x32::ZERO)) }
 }
 
@@ -5453,7 +5872,8 @@ pub fn _mm256_mask_reduce_or_epi8(k: __mmask32, a: __m256i) -> i8 {
 #[inline]
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm_reduce_or_epi8(a: __m128i) -> i8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_reduce_or_epi8(a: __m128i) -> i8 {
     unsafe { simd_reduce_or(a.as_i8x16()) }
 }
 
@@ -5463,7 +5883,8 @@ pub fn _mm_reduce_or_epi8(a: __m128i) -> i8 {
 #[inline]
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm_mask_reduce_or_epi8(k: __mmask16, a: __m128i) -> i8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_reduce_or_epi8(k: __mmask16, a: __m128i) -> i8 {
     unsafe { simd_reduce_or(simd_select_bitmask(k, a.as_i8x16(), i8x16::ZERO)) }
 }
 
@@ -5474,7 +5895,8 @@ pub fn _mm_mask_reduce_or_epi8(k: __mmask16, a: __m128i) -> i8 {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16
-pub unsafe fn _mm512_loadu_epi16(mem_addr: *const i16) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm512_loadu_epi16(mem_addr: *const i16) -> __m512i {
     ptr::read_unaligned(mem_addr as *const __m512i)
 }
 
@@ -5485,7 +5907,8 @@ pub unsafe fn _mm512_loadu_epi16(mem_addr: *const i16) -> __m512i {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16
-pub unsafe fn _mm256_loadu_epi16(mem_addr: *const i16) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm256_loadu_epi16(mem_addr: *const i16) -> __m256i {
     ptr::read_unaligned(mem_addr as *const __m256i)
 }
 
@@ -5496,7 +5919,8 @@ pub unsafe fn _mm256_loadu_epi16(mem_addr: *const i16) -> __m256i {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16
-pub unsafe fn _mm_loadu_epi16(mem_addr: *const i16) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_loadu_epi16(mem_addr: *const i16) -> __m128i {
     ptr::read_unaligned(mem_addr as *const __m128i)
 }
 
@@ -5507,7 +5931,8 @@ pub unsafe fn _mm_loadu_epi16(mem_addr: *const i16) -> __m128i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
-pub unsafe fn _mm512_loadu_epi8(mem_addr: *const i8) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm512_loadu_epi8(mem_addr: *const i8) -> __m512i {
     ptr::read_unaligned(mem_addr as *const __m512i)
 }
 
@@ -5518,7 +5943,8 @@ pub unsafe fn _mm512_loadu_epi8(mem_addr: *const i8) -> __m512i {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
-pub unsafe fn _mm256_loadu_epi8(mem_addr: *const i8) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm256_loadu_epi8(mem_addr: *const i8) -> __m256i {
     ptr::read_unaligned(mem_addr as *const __m256i)
 }
 
@@ -5529,7 +5955,8 @@ pub unsafe fn _mm256_loadu_epi8(mem_addr: *const i8) -> __m256i {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
-pub unsafe fn _mm_loadu_epi8(mem_addr: *const i8) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_loadu_epi8(mem_addr: *const i8) -> __m128i {
     ptr::read_unaligned(mem_addr as *const __m128i)
 }
 
@@ -5540,7 +5967,8 @@ pub unsafe fn _mm_loadu_epi8(mem_addr: *const i8) -> __m128i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16
-pub unsafe fn _mm512_storeu_epi16(mem_addr: *mut i16, a: __m512i) {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm512_storeu_epi16(mem_addr: *mut i16, a: __m512i) {
     ptr::write_unaligned(mem_addr as *mut __m512i, a);
 }
 
@@ -5551,7 +5979,8 @@ pub unsafe fn _mm512_storeu_epi16(mem_addr: *mut i16, a: __m512i) {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16
-pub unsafe fn _mm256_storeu_epi16(mem_addr: *mut i16, a: __m256i) {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm256_storeu_epi16(mem_addr: *mut i16, a: __m256i) {
     ptr::write_unaligned(mem_addr as *mut __m256i, a);
 }
 
@@ -5562,7 +5991,8 @@ pub unsafe fn _mm256_storeu_epi16(mem_addr: *mut i16, a: __m256i) {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu16
-pub unsafe fn _mm_storeu_epi16(mem_addr: *mut i16, a: __m128i) {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_storeu_epi16(mem_addr: *mut i16, a: __m128i) {
     ptr::write_unaligned(mem_addr as *mut __m128i, a);
 }
 
@@ -5573,7 +6003,8 @@ pub unsafe fn _mm_storeu_epi16(mem_addr: *mut i16, a: __m128i) {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
-pub unsafe fn _mm512_storeu_epi8(mem_addr: *mut i8, a: __m512i) {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm512_storeu_epi8(mem_addr: *mut i8, a: __m512i) {
     ptr::write_unaligned(mem_addr as *mut __m512i, a);
 }
 
@@ -5584,7 +6015,8 @@ pub unsafe fn _mm512_storeu_epi8(mem_addr: *mut i8, a: __m512i) {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
-pub unsafe fn _mm256_storeu_epi8(mem_addr: *mut i8, a: __m256i) {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm256_storeu_epi8(mem_addr: *mut i8, a: __m256i) {
     ptr::write_unaligned(mem_addr as *mut __m256i, a);
 }
 
@@ -5595,7 +6027,8 @@ pub unsafe fn _mm256_storeu_epi8(mem_addr: *mut i8, a: __m256i) {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu8
-pub unsafe fn _mm_storeu_epi8(mem_addr: *mut i8, a: __m128i) {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_storeu_epi8(mem_addr: *mut i8, a: __m128i) {
     ptr::write_unaligned(mem_addr as *mut __m128i, a);
 }
 
@@ -5608,8 +6041,14 @@ pub unsafe fn _mm_storeu_epi8(mem_addr: *mut i8, a: __m128i) {
 #[target_feature(enable = "avx512bw")]
 #[cfg_attr(test, assert_instr(vmovdqu16))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _mm512_mask_loadu_epi16(src: __m512i, k: __mmask32, mem_addr: *const i16) -> __m512i {
-    transmute(loaddqu16_512(mem_addr, src.as_i16x32(), k))
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm512_mask_loadu_epi16(
+    src: __m512i,
+    k: __mmask32,
+    mem_addr: *const i16,
+) -> __m512i {
+    let mask = simd_select_bitmask(k, i16x32::splat(!0), i16x32::ZERO);
+    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i16x32()).as_m512i()
 }
 
 /// Load packed 16-bit integers from memory into dst using zeromask k
@@ -5621,7 +6060,8 @@ pub unsafe fn _mm512_mask_loadu_epi16(src: __m512i, k: __mmask32, mem_addr: *con
 #[target_feature(enable = "avx512bw")]
 #[cfg_attr(test, assert_instr(vmovdqu16))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _mm512_maskz_loadu_epi16(k: __mmask32, mem_addr: *const i16) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm512_maskz_loadu_epi16(k: __mmask32, mem_addr: *const i16) -> __m512i {
     _mm512_mask_loadu_epi16(_mm512_setzero_si512(), k, mem_addr)
 }
 
@@ -5634,8 +6074,14 @@ pub unsafe fn _mm512_maskz_loadu_epi16(k: __mmask32, mem_addr: *const i16) -> __
 #[target_feature(enable = "avx512bw")]
 #[cfg_attr(test, assert_instr(vmovdqu8))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _mm512_mask_loadu_epi8(src: __m512i, k: __mmask64, mem_addr: *const i8) -> __m512i {
-    transmute(loaddqu8_512(mem_addr, src.as_i8x64(), k))
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm512_mask_loadu_epi8(
+    src: __m512i,
+    k: __mmask64,
+    mem_addr: *const i8,
+) -> __m512i {
+    let mask = simd_select_bitmask(k, i8x64::splat(!0), i8x64::ZERO);
+    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i8x64()).as_m512i()
 }
 
 /// Load packed 8-bit integers from memory into dst using zeromask k
@@ -5647,7 +6093,8 @@ pub unsafe fn _mm512_mask_loadu_epi8(src: __m512i, k: __mmask64, mem_addr: *cons
 #[target_feature(enable = "avx512bw")]
 #[cfg_attr(test, assert_instr(vmovdqu8))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _mm512_maskz_loadu_epi8(k: __mmask64, mem_addr: *const i8) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm512_maskz_loadu_epi8(k: __mmask64, mem_addr: *const i8) -> __m512i {
     _mm512_mask_loadu_epi8(_mm512_setzero_si512(), k, mem_addr)
 }
 
@@ -5660,8 +6107,14 @@ pub unsafe fn _mm512_maskz_loadu_epi8(k: __mmask64, mem_addr: *const i8) -> __m5
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[cfg_attr(test, assert_instr(vmovdqu16))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _mm256_mask_loadu_epi16(src: __m256i, k: __mmask16, mem_addr: *const i16) -> __m256i {
-    transmute(loaddqu16_256(mem_addr, src.as_i16x16(), k))
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm256_mask_loadu_epi16(
+    src: __m256i,
+    k: __mmask16,
+    mem_addr: *const i16,
+) -> __m256i {
+    let mask = simd_select_bitmask(k, i16x16::splat(!0), i16x16::ZERO);
+    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i16x16()).as_m256i()
 }
 
 /// Load packed 16-bit integers from memory into dst using zeromask k
@@ -5673,7 +6126,8 @@ pub unsafe fn _mm256_mask_loadu_epi16(src: __m256i, k: __mmask16, mem_addr: *con
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[cfg_attr(test, assert_instr(vmovdqu16))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _mm256_maskz_loadu_epi16(k: __mmask16, mem_addr: *const i16) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm256_maskz_loadu_epi16(k: __mmask16, mem_addr: *const i16) -> __m256i {
     _mm256_mask_loadu_epi16(_mm256_setzero_si256(), k, mem_addr)
 }
 
@@ -5686,8 +6140,14 @@ pub unsafe fn _mm256_maskz_loadu_epi16(k: __mmask16, mem_addr: *const i16) -> __
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[cfg_attr(test, assert_instr(vmovdqu8))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _mm256_mask_loadu_epi8(src: __m256i, k: __mmask32, mem_addr: *const i8) -> __m256i {
-    transmute(loaddqu8_256(mem_addr, src.as_i8x32(), k))
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm256_mask_loadu_epi8(
+    src: __m256i,
+    k: __mmask32,
+    mem_addr: *const i8,
+) -> __m256i {
+    let mask = simd_select_bitmask(k, i8x32::splat(!0), i8x32::ZERO);
+    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i8x32()).as_m256i()
 }
 
 /// Load packed 8-bit integers from memory into dst using zeromask k
@@ -5699,7 +6159,8 @@ pub unsafe fn _mm256_mask_loadu_epi8(src: __m256i, k: __mmask32, mem_addr: *cons
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[cfg_attr(test, assert_instr(vmovdqu8))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _mm256_maskz_loadu_epi8(k: __mmask32, mem_addr: *const i8) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm256_maskz_loadu_epi8(k: __mmask32, mem_addr: *const i8) -> __m256i {
     _mm256_mask_loadu_epi8(_mm256_setzero_si256(), k, mem_addr)
 }
 
@@ -5712,8 +6173,14 @@ pub unsafe fn _mm256_maskz_loadu_epi8(k: __mmask32, mem_addr: *const i8) -> __m2
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[cfg_attr(test, assert_instr(vmovdqu16))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _mm_mask_loadu_epi16(src: __m128i, k: __mmask8, mem_addr: *const i16) -> __m128i {
-    transmute(loaddqu16_128(mem_addr, src.as_i16x8(), k))
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_mask_loadu_epi16(
+    src: __m128i,
+    k: __mmask8,
+    mem_addr: *const i16,
+) -> __m128i {
+    let mask = simd_select_bitmask(k, i16x8::splat(!0), i16x8::ZERO);
+    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i16x8()).as_m128i()
 }
 
 /// Load packed 16-bit integers from memory into dst using zeromask k
@@ -5725,7 +6192,8 @@ pub unsafe fn _mm_mask_loadu_epi16(src: __m128i, k: __mmask8, mem_addr: *const i
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[cfg_attr(test, assert_instr(vmovdqu16))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _mm_maskz_loadu_epi16(k: __mmask8, mem_addr: *const i16) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_maskz_loadu_epi16(k: __mmask8, mem_addr: *const i16) -> __m128i {
     _mm_mask_loadu_epi16(_mm_setzero_si128(), k, mem_addr)
 }
 
@@ -5738,8 +6206,14 @@ pub unsafe fn _mm_maskz_loadu_epi16(k: __mmask8, mem_addr: *const i16) -> __m128
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[cfg_attr(test, assert_instr(vmovdqu8))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _mm_mask_loadu_epi8(src: __m128i, k: __mmask16, mem_addr: *const i8) -> __m128i {
-    transmute(loaddqu8_128(mem_addr, src.as_i8x16(), k))
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_mask_loadu_epi8(
+    src: __m128i,
+    k: __mmask16,
+    mem_addr: *const i8,
+) -> __m128i {
+    let mask = simd_select_bitmask(k, i8x16::splat(!0), i8x16::ZERO);
+    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i8x16()).as_m128i()
 }
 
 /// Load packed 8-bit integers from memory into dst using zeromask k
@@ -5751,7 +6225,8 @@ pub unsafe fn _mm_mask_loadu_epi8(src: __m128i, k: __mmask16, mem_addr: *const i
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[cfg_attr(test, assert_instr(vmovdqu8))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _mm_maskz_loadu_epi8(k: __mmask16, mem_addr: *const i8) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_maskz_loadu_epi8(k: __mmask16, mem_addr: *const i8) -> __m128i {
     _mm_mask_loadu_epi8(_mm_setzero_si128(), k, mem_addr)
 }
 
@@ -5763,8 +6238,10 @@ pub unsafe fn _mm_maskz_loadu_epi8(k: __mmask16, mem_addr: *const i8) -> __m128i
 #[target_feature(enable = "avx512bw")]
 #[cfg_attr(test, assert_instr(vmovdqu16))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _mm512_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask32, a: __m512i) {
-    storedqu16_512(mem_addr, a.as_i16x32(), mask)
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm512_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask32, a: __m512i) {
+    let mask = simd_select_bitmask(mask, i16x32::splat(!0), i16x32::ZERO);
+    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i16x32());
 }
 
 /// Store packed 8-bit integers from a into memory using writemask k.
@@ -5775,8 +6252,10 @@ pub unsafe fn _mm512_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask32, a: _
 #[target_feature(enable = "avx512bw")]
 #[cfg_attr(test, assert_instr(vmovdqu8))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _mm512_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask64, a: __m512i) {
-    storedqu8_512(mem_addr, a.as_i8x64(), mask)
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm512_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask64, a: __m512i) {
+    let mask = simd_select_bitmask(mask, i8x64::splat(!0), i8x64::ZERO);
+    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i8x64());
 }
 
 /// Store packed 16-bit integers from a into memory using writemask k.
@@ -5787,8 +6266,10 @@ pub unsafe fn _mm512_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask64, a: __m
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[cfg_attr(test, assert_instr(vmovdqu16))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _mm256_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask16, a: __m256i) {
-    storedqu16_256(mem_addr, a.as_i16x16(), mask)
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm256_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask16, a: __m256i) {
+    let mask = simd_select_bitmask(mask, i16x16::splat(!0), i16x16::ZERO);
+    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i16x16());
 }
 
 /// Store packed 8-bit integers from a into memory using writemask k.
@@ -5799,8 +6280,10 @@ pub unsafe fn _mm256_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask16, a: _
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[cfg_attr(test, assert_instr(vmovdqu8))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _mm256_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask32, a: __m256i) {
-    storedqu8_256(mem_addr, a.as_i8x32(), mask)
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm256_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask32, a: __m256i) {
+    let mask = simd_select_bitmask(mask, i8x32::splat(!0), i8x32::ZERO);
+    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i8x32());
 }
 
 /// Store packed 16-bit integers from a into memory using writemask k.
@@ -5811,8 +6294,10 @@ pub unsafe fn _mm256_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask32, a: __m
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[cfg_attr(test, assert_instr(vmovdqu16))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _mm_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask8, a: __m128i) {
-    storedqu16_128(mem_addr, a.as_i16x8(), mask)
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask8, a: __m128i) {
+    let mask = simd_select_bitmask(mask, i16x8::splat(!0), i16x8::ZERO);
+    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i16x8());
 }
 
 /// Store packed 8-bit integers from a into memory using writemask k.
@@ -5823,8 +6308,10 @@ pub unsafe fn _mm_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask8, a: __m12
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[cfg_attr(test, assert_instr(vmovdqu8))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _mm_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask16, a: __m128i) {
-    storedqu8_128(mem_addr, a.as_i8x16(), mask)
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask16, a: __m128i) {
+    let mask = simd_select_bitmask(mask, i8x16::splat(!0), i8x16::ZERO);
+    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i8x16());
 }
 
 /// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst.
@@ -5835,20 +6322,19 @@ pub unsafe fn _mm_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask16, a: __m128
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmaddwd))]
 pub fn _mm512_madd_epi16(a: __m512i, b: __m512i) -> __m512i {
-    unsafe {
-        let r: i32x32 = simd_mul(simd_cast(a.as_i16x32()), simd_cast(b.as_i16x32()));
-        let even: i32x16 = simd_shuffle!(
-            r,
-            r,
-            [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]
-        );
-        let odd: i32x16 = simd_shuffle!(
-            r,
-            r,
-            [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31]
-        );
-        simd_add(even, odd).as_m512i()
-    }
+    // It's a trick used in the Adler-32 algorithm to perform a widening addition.
+    //
+    // ```rust
+    // #[target_feature(enable = "avx512bw")]
+    // unsafe fn widening_add(mad: __m512i) -> __m512i {
+    //     _mm512_madd_epi16(mad, _mm512_set1_epi16(1))
+    // }
+    // ```
+    //
+    // If we implement this using generic vector intrinsics, the optimizer
+    // will eliminate this pattern, and `vpmaddwd` will no longer be emitted.
+    // For this reason, we use x86 intrinsics.
+    unsafe { transmute(vpmaddwd(a.as_i16x32(), b.as_i16x32())) }
 }
 
 /// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -5943,7 +6429,7 @@ pub fn _mm_maskz_madd_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmaddubsw))]
 pub fn _mm512_maddubs_epi16(a: __m512i, b: __m512i) -> __m512i {
-    unsafe { transmute(vpmaddubsw(a.as_i8x64(), b.as_i8x64())) }
+    unsafe { transmute(vpmaddubsw(a.as_u8x64(), b.as_i8x64())) }
 }
 
 /// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -6037,8 +6523,34 @@ pub fn _mm_maskz_maddubs_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpackssdw))]
-pub fn _mm512_packs_epi32(a: __m512i, b: __m512i) -> __m512i {
-    unsafe { transmute(vpackssdw(a.as_i32x16(), b.as_i32x16())) }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_packs_epi32(a: __m512i, b: __m512i) -> __m512i {
+    unsafe {
+        let max = simd_splat(i16::MAX as i32);
+        let min = simd_splat(i16::MIN as i32);
+
+        let clamped_a = simd_imax(simd_imin(a.as_i32x16(), max), min)
+            .as_m512i()
+            .as_i16x32();
+        let clamped_b = simd_imax(simd_imin(b.as_i32x16(), max), min)
+            .as_m512i()
+            .as_i16x32();
+
+        #[rustfmt::skip]
+        const IDXS: [u32; 32] = [
+            00, 02, 04, 06,
+            32, 34, 36, 38,
+            08, 10, 12, 14,
+            40, 42, 44, 46,
+            16, 18, 20, 22,
+            48, 50, 52, 54,
+            24, 26, 28, 30,
+            56, 58, 60, 62,
+        ];
+        let result: i16x32 = simd_shuffle!(clamped_a, clamped_b, IDXS);
+
+        result.as_m512i()
+    }
 }
 
 /// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -6048,7 +6560,13 @@ pub fn _mm512_packs_epi32(a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpackssdw))]
-pub fn _mm512_mask_packs_epi32(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_packs_epi32(
+    src: __m512i,
+    k: __mmask32,
+    a: __m512i,
+    b: __m512i,
+) -> __m512i {
     unsafe {
         let pack = _mm512_packs_epi32(a, b).as_i16x32();
         transmute(simd_select_bitmask(k, pack, src.as_i16x32()))
@@ -6062,7 +6580,8 @@ pub fn _mm512_mask_packs_epi32(src: __m512i, k: __mmask32, a: __m512i, b: __m512
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpackssdw))]
-pub fn _mm512_maskz_packs_epi32(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_packs_epi32(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let pack = _mm512_packs_epi32(a, b).as_i16x32();
         transmute(simd_select_bitmask(k, pack, i16x32::ZERO))
@@ -6076,7 +6595,13 @@ pub fn _mm512_maskz_packs_epi32(k: __mmask32, a: __m512i, b: __m512i) -> __m512i
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpackssdw))]
-pub fn _mm256_mask_packs_epi32(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_packs_epi32(
+    src: __m256i,
+    k: __mmask16,
+    a: __m256i,
+    b: __m256i,
+) -> __m256i {
     unsafe {
         let pack = _mm256_packs_epi32(a, b).as_i16x16();
         transmute(simd_select_bitmask(k, pack, src.as_i16x16()))
@@ -6104,7 +6629,8 @@ pub fn _mm256_maskz_packs_epi32(k: __mmask16, a: __m256i, b: __m256i) -> __m256i
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpackssdw))]
-pub fn _mm_mask_packs_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_packs_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let pack = _mm_packs_epi32(a, b).as_i16x8();
         transmute(simd_select_bitmask(k, pack, src.as_i16x8()))
@@ -6118,7 +6644,8 @@ pub fn _mm_mask_packs_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpackssdw))]
-pub fn _mm_maskz_packs_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_packs_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let pack = _mm_packs_epi32(a, b).as_i16x8();
         transmute(simd_select_bitmask(k, pack, i16x8::ZERO))
@@ -6132,8 +6659,34 @@ pub fn _mm_maskz_packs_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpacksswb))]
-pub fn _mm512_packs_epi16(a: __m512i, b: __m512i) -> __m512i {
-    unsafe { transmute(vpacksswb(a.as_i16x32(), b.as_i16x32())) }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_packs_epi16(a: __m512i, b: __m512i) -> __m512i {
+    unsafe {
+        let max = simd_splat(i8::MAX as i16);
+        let min = simd_splat(i8::MIN as i16);
+
+        let clamped_a = simd_imax(simd_imin(a.as_i16x32(), max), min)
+            .as_m512i()
+            .as_i8x64();
+        let clamped_b = simd_imax(simd_imin(b.as_i16x32(), max), min)
+            .as_m512i()
+            .as_i8x64();
+
+        #[rustfmt::skip]
+        const IDXS: [u32; 64] = [
+            000, 002, 004, 006, 008, 010, 012, 014,
+            064, 066, 068, 070, 072, 074, 076, 078,
+            016, 018, 020, 022, 024, 026, 028, 030,
+            080, 082, 084, 086, 088, 090, 092, 094,
+            032, 034, 036, 038, 040, 042, 044, 046,
+            096, 098, 100, 102, 104, 106, 108, 110,
+            048, 050, 052, 054, 056, 058, 060, 062,
+            112, 114, 116, 118, 120, 122, 124, 126,
+        ];
+        let result: i8x64 = simd_shuffle!(clamped_a, clamped_b, IDXS);
+
+        result.as_m512i()
+    }
 }
 
 /// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -6143,7 +6696,13 @@ pub fn _mm512_packs_epi16(a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpacksswb))]
-pub fn _mm512_mask_packs_epi16(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_packs_epi16(
+    src: __m512i,
+    k: __mmask64,
+    a: __m512i,
+    b: __m512i,
+) -> __m512i {
     unsafe {
         let pack = _mm512_packs_epi16(a, b).as_i8x64();
         transmute(simd_select_bitmask(k, pack, src.as_i8x64()))
@@ -6157,7 +6716,8 @@ pub fn _mm512_mask_packs_epi16(src: __m512i, k: __mmask64, a: __m512i, b: __m512
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpacksswb))]
-pub fn _mm512_maskz_packs_epi16(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_packs_epi16(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let pack = _mm512_packs_epi16(a, b).as_i8x64();
         transmute(simd_select_bitmask(k, pack, i8x64::ZERO))
@@ -6171,7 +6731,13 @@ pub fn _mm512_maskz_packs_epi16(k: __mmask64, a: __m512i, b: __m512i) -> __m512i
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpacksswb))]
-pub fn _mm256_mask_packs_epi16(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_packs_epi16(
+    src: __m256i,
+    k: __mmask32,
+    a: __m256i,
+    b: __m256i,
+) -> __m256i {
     unsafe {
         let pack = _mm256_packs_epi16(a, b).as_i8x32();
         transmute(simd_select_bitmask(k, pack, src.as_i8x32()))
@@ -6185,7 +6751,8 @@ pub fn _mm256_mask_packs_epi16(src: __m256i, k: __mmask32, a: __m256i, b: __m256
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpacksswb))]
-pub fn _mm256_maskz_packs_epi16(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_packs_epi16(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let pack = _mm256_packs_epi16(a, b).as_i8x32();
         transmute(simd_select_bitmask(k, pack, i8x32::ZERO))
@@ -6199,7 +6766,8 @@ pub fn _mm256_maskz_packs_epi16(k: __mmask32, a: __m256i, b: __m256i) -> __m256i
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpacksswb))]
-pub fn _mm_mask_packs_epi16(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_packs_epi16(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let pack = _mm_packs_epi16(a, b).as_i8x16();
         transmute(simd_select_bitmask(k, pack, src.as_i8x16()))
@@ -6213,7 +6781,8 @@ pub fn _mm_mask_packs_epi16(src: __m128i, k: __mmask16, a: __m128i, b: __m128i)
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpacksswb))]
-pub fn _mm_maskz_packs_epi16(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_packs_epi16(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let pack = _mm_packs_epi16(a, b).as_i8x16();
         transmute(simd_select_bitmask(k, pack, i8x16::ZERO))
@@ -6227,8 +6796,34 @@ pub fn _mm_maskz_packs_epi16(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpackusdw))]
-pub fn _mm512_packus_epi32(a: __m512i, b: __m512i) -> __m512i {
-    unsafe { transmute(vpackusdw(a.as_i32x16(), b.as_i32x16())) }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_packus_epi32(a: __m512i, b: __m512i) -> __m512i {
+    unsafe {
+        let max = simd_splat(u16::MAX as i32);
+        let min = simd_splat(u16::MIN as i32);
+
+        let clamped_a = simd_imax(simd_imin(a.as_i32x16(), max), min)
+            .as_m512i()
+            .as_i16x32();
+        let clamped_b = simd_imax(simd_imin(b.as_i32x16(), max), min)
+            .as_m512i()
+            .as_i16x32();
+
+        #[rustfmt::skip]
+        const IDXS: [u32; 32] = [
+            00, 02, 04, 06,
+            32, 34, 36, 38,
+            08, 10, 12, 14,
+            40, 42, 44, 46,
+            16, 18, 20, 22,
+            48, 50, 52, 54,
+            24, 26, 28, 30,
+            56, 58, 60, 62,
+        ];
+        let result: i16x32 = simd_shuffle!(clamped_a, clamped_b, IDXS);
+
+        result.as_m512i()
+    }
 }
 
 /// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -6238,7 +6833,13 @@ pub fn _mm512_packus_epi32(a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpackusdw))]
-pub fn _mm512_mask_packus_epi32(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_packus_epi32(
+    src: __m512i,
+    k: __mmask32,
+    a: __m512i,
+    b: __m512i,
+) -> __m512i {
     unsafe {
         let pack = _mm512_packus_epi32(a, b).as_i16x32();
         transmute(simd_select_bitmask(k, pack, src.as_i16x32()))
@@ -6252,7 +6853,8 @@ pub fn _mm512_mask_packus_epi32(src: __m512i, k: __mmask32, a: __m512i, b: __m51
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpackusdw))]
-pub fn _mm512_maskz_packus_epi32(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_packus_epi32(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let pack = _mm512_packus_epi32(a, b).as_i16x32();
         transmute(simd_select_bitmask(k, pack, i16x32::ZERO))
@@ -6266,7 +6868,13 @@ pub fn _mm512_maskz_packus_epi32(k: __mmask32, a: __m512i, b: __m512i) -> __m512
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpackusdw))]
-pub fn _mm256_mask_packus_epi32(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_packus_epi32(
+    src: __m256i,
+    k: __mmask16,
+    a: __m256i,
+    b: __m256i,
+) -> __m256i {
     unsafe {
         let pack = _mm256_packus_epi32(a, b).as_i16x16();
         transmute(simd_select_bitmask(k, pack, src.as_i16x16()))
@@ -6280,7 +6888,8 @@ pub fn _mm256_mask_packus_epi32(src: __m256i, k: __mmask16, a: __m256i, b: __m25
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpackusdw))]
-pub fn _mm256_maskz_packus_epi32(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_packus_epi32(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let pack = _mm256_packus_epi32(a, b).as_i16x16();
         transmute(simd_select_bitmask(k, pack, i16x16::ZERO))
@@ -6294,7 +6903,8 @@ pub fn _mm256_maskz_packus_epi32(k: __mmask16, a: __m256i, b: __m256i) -> __m256
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpackusdw))]
-pub fn _mm_mask_packus_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_packus_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let pack = _mm_packus_epi32(a, b).as_i16x8();
         transmute(simd_select_bitmask(k, pack, src.as_i16x8()))
@@ -6308,7 +6918,8 @@ pub fn _mm_mask_packus_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i)
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpackusdw))]
-pub fn _mm_maskz_packus_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_packus_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let pack = _mm_packus_epi32(a, b).as_i16x8();
         transmute(simd_select_bitmask(k, pack, i16x8::ZERO))
@@ -6322,8 +6933,34 @@ pub fn _mm_maskz_packus_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpackuswb))]
-pub fn _mm512_packus_epi16(a: __m512i, b: __m512i) -> __m512i {
-    unsafe { transmute(vpackuswb(a.as_i16x32(), b.as_i16x32())) }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_packus_epi16(a: __m512i, b: __m512i) -> __m512i {
+    unsafe {
+        let max = simd_splat(u8::MAX as i16);
+        let min = simd_splat(u8::MIN as i16);
+
+        let clamped_a = simd_imax(simd_imin(a.as_i16x32(), max), min)
+            .as_m512i()
+            .as_i8x64();
+        let clamped_b = simd_imax(simd_imin(b.as_i16x32(), max), min)
+            .as_m512i()
+            .as_i8x64();
+
+        #[rustfmt::skip]
+        const IDXS: [u32; 64] = [
+            000, 002, 004, 006, 008, 010, 012, 014,
+            064, 066, 068, 070, 072, 074, 076, 078,
+            016, 018, 020, 022, 024, 026, 028, 030,
+            080, 082, 084, 086, 088, 090, 092, 094,
+            032, 034, 036, 038, 040, 042, 044, 046,
+            096, 098, 100, 102, 104, 106, 108, 110,
+            048, 050, 052, 054, 056, 058, 060, 062,
+            112, 114, 116, 118, 120, 122, 124, 126,
+        ];
+        let result: i8x64 = simd_shuffle!(clamped_a, clamped_b, IDXS);
+
+        result.as_m512i()
+    }
 }
 
 /// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -6333,7 +6970,13 @@ pub fn _mm512_packus_epi16(a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpackuswb))]
-pub fn _mm512_mask_packus_epi16(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_packus_epi16(
+    src: __m512i,
+    k: __mmask64,
+    a: __m512i,
+    b: __m512i,
+) -> __m512i {
     unsafe {
         let pack = _mm512_packus_epi16(a, b).as_i8x64();
         transmute(simd_select_bitmask(k, pack, src.as_i8x64()))
@@ -6347,7 +6990,8 @@ pub fn _mm512_mask_packus_epi16(src: __m512i, k: __mmask64, a: __m512i, b: __m51
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpackuswb))]
-pub fn _mm512_maskz_packus_epi16(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_packus_epi16(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let pack = _mm512_packus_epi16(a, b).as_i8x64();
         transmute(simd_select_bitmask(k, pack, i8x64::ZERO))
@@ -6361,7 +7005,13 @@ pub fn _mm512_maskz_packus_epi16(k: __mmask64, a: __m512i, b: __m512i) -> __m512
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpackuswb))]
-pub fn _mm256_mask_packus_epi16(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_packus_epi16(
+    src: __m256i,
+    k: __mmask32,
+    a: __m256i,
+    b: __m256i,
+) -> __m256i {
     unsafe {
         let pack = _mm256_packus_epi16(a, b).as_i8x32();
         transmute(simd_select_bitmask(k, pack, src.as_i8x32()))
@@ -6375,7 +7025,8 @@ pub fn _mm256_mask_packus_epi16(src: __m256i, k: __mmask32, a: __m256i, b: __m25
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpackuswb))]
-pub fn _mm256_maskz_packus_epi16(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_packus_epi16(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let pack = _mm256_packus_epi16(a, b).as_i8x32();
         transmute(simd_select_bitmask(k, pack, i8x32::ZERO))
@@ -6389,7 +7040,8 @@ pub fn _mm256_maskz_packus_epi16(k: __mmask32, a: __m256i, b: __m256i) -> __m256
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpackuswb))]
-pub fn _mm_mask_packus_epi16(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_packus_epi16(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let pack = _mm_packus_epi16(a, b).as_i8x16();
         transmute(simd_select_bitmask(k, pack, src.as_i8x16()))
@@ -6403,7 +7055,8 @@ pub fn _mm_mask_packus_epi16(src: __m128i, k: __mmask16, a: __m128i, b: __m128i)
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpackuswb))]
-pub fn _mm_maskz_packus_epi16(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_packus_epi16(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let pack = _mm_packus_epi16(a, b).as_i8x16();
         transmute(simd_select_bitmask(k, pack, i8x16::ZERO))
@@ -6417,7 +7070,8 @@ pub fn _mm_maskz_packus_epi16(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpavgw))]
-pub fn _mm512_avg_epu16(a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_avg_epu16(a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let a = simd_cast::<_, u32x32>(a.as_u16x32());
         let b = simd_cast::<_, u32x32>(b.as_u16x32());
@@ -6433,7 +7087,8 @@ pub fn _mm512_avg_epu16(a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpavgw))]
-pub fn _mm512_mask_avg_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_avg_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let avg = _mm512_avg_epu16(a, b).as_u16x32();
         transmute(simd_select_bitmask(k, avg, src.as_u16x32()))
@@ -6447,7 +7102,8 @@ pub fn _mm512_mask_avg_epu16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i)
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpavgw))]
-pub fn _mm512_maskz_avg_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_avg_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let avg = _mm512_avg_epu16(a, b).as_u16x32();
         transmute(simd_select_bitmask(k, avg, u16x32::ZERO))
@@ -6461,7 +7117,8 @@ pub fn _mm512_maskz_avg_epu16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpavgw))]
-pub fn _mm256_mask_avg_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_avg_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let avg = _mm256_avg_epu16(a, b).as_u16x16();
         transmute(simd_select_bitmask(k, avg, src.as_u16x16()))
@@ -6475,7 +7132,8 @@ pub fn _mm256_mask_avg_epu16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i)
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpavgw))]
-pub fn _mm256_maskz_avg_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_avg_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let avg = _mm256_avg_epu16(a, b).as_u16x16();
         transmute(simd_select_bitmask(k, avg, u16x16::ZERO))
@@ -6489,7 +7147,8 @@ pub fn _mm256_maskz_avg_epu16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpavgw))]
-pub fn _mm_mask_avg_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_avg_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let avg = _mm_avg_epu16(a, b).as_u16x8();
         transmute(simd_select_bitmask(k, avg, src.as_u16x8()))
@@ -6503,7 +7162,8 @@ pub fn _mm_mask_avg_epu16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) ->
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpavgw))]
-pub fn _mm_maskz_avg_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_avg_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let avg = _mm_avg_epu16(a, b).as_u16x8();
         transmute(simd_select_bitmask(k, avg, u16x8::ZERO))
@@ -6517,7 +7177,8 @@ pub fn _mm_maskz_avg_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpavgb))]
-pub fn _mm512_avg_epu8(a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_avg_epu8(a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let a = simd_cast::<_, u16x64>(a.as_u8x64());
         let b = simd_cast::<_, u16x64>(b.as_u8x64());
@@ -6533,7 +7194,8 @@ pub fn _mm512_avg_epu8(a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpavgb))]
-pub fn _mm512_mask_avg_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_avg_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let avg = _mm512_avg_epu8(a, b).as_u8x64();
         transmute(simd_select_bitmask(k, avg, src.as_u8x64()))
@@ -6547,7 +7209,8 @@ pub fn _mm512_mask_avg_epu8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i)
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpavgb))]
-pub fn _mm512_maskz_avg_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_avg_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let avg = _mm512_avg_epu8(a, b).as_u8x64();
         transmute(simd_select_bitmask(k, avg, u8x64::ZERO))
@@ -6561,7 +7224,8 @@ pub fn _mm512_maskz_avg_epu8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpavgb))]
-pub fn _mm256_mask_avg_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_avg_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let avg = _mm256_avg_epu8(a, b).as_u8x32();
         transmute(simd_select_bitmask(k, avg, src.as_u8x32()))
@@ -6575,7 +7239,8 @@ pub fn _mm256_mask_avg_epu8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i)
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpavgb))]
-pub fn _mm256_maskz_avg_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_avg_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let avg = _mm256_avg_epu8(a, b).as_u8x32();
         transmute(simd_select_bitmask(k, avg, u8x32::ZERO))
@@ -6589,7 +7254,8 @@ pub fn _mm256_maskz_avg_epu8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpavgb))]
-pub fn _mm_mask_avg_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_avg_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let avg = _mm_avg_epu8(a, b).as_u8x16();
         transmute(simd_select_bitmask(k, avg, src.as_u8x16()))
@@ -6603,7 +7269,8 @@ pub fn _mm_mask_avg_epu8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) ->
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpavgb))]
-pub fn _mm_maskz_avg_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_avg_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let avg = _mm_avg_epu8(a, b).as_u8x16();
         transmute(simd_select_bitmask(k, avg, u8x16::ZERO))
@@ -6713,7 +7380,8 @@ pub fn _mm_maskz_sll_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
 #[rustc_legacy_const_generics(1)]
-pub fn _mm512_slli_epi16<const IMM8: u32>(a: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_slli_epi16<const IMM8: u32>(a: __m512i) -> __m512i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         if IMM8 >= 16 {
@@ -6732,7 +7400,12 @@ pub fn _mm512_slli_epi16<const IMM8: u32>(a: __m512i) -> __m512i {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
 #[rustc_legacy_const_generics(3)]
-pub fn _mm512_mask_slli_epi16<const IMM8: u32>(src: __m512i, k: __mmask32, a: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_slli_epi16<const IMM8: u32>(
+    src: __m512i,
+    k: __mmask32,
+    a: __m512i,
+) -> __m512i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         let shf = if IMM8 >= 16 {
@@ -6752,7 +7425,8 @@ pub fn _mm512_mask_slli_epi16<const IMM8: u32>(src: __m512i, k: __mmask32, a: __
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
 #[rustc_legacy_const_generics(2)]
-pub fn _mm512_maskz_slli_epi16<const IMM8: u32>(k: __mmask32, a: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_slli_epi16<const IMM8: u32>(k: __mmask32, a: __m512i) -> __m512i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         if IMM8 >= 16 {
@@ -6772,7 +7446,12 @@ pub fn _mm512_maskz_slli_epi16<const IMM8: u32>(k: __mmask32, a: __m512i) -> __m
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
 #[rustc_legacy_const_generics(3)]
-pub fn _mm256_mask_slli_epi16<const IMM8: u32>(src: __m256i, k: __mmask16, a: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_slli_epi16<const IMM8: u32>(
+    src: __m256i,
+    k: __mmask16,
+    a: __m256i,
+) -> __m256i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         let shf = if IMM8 >= 16 {
@@ -6792,7 +7471,8 @@ pub fn _mm256_mask_slli_epi16<const IMM8: u32>(src: __m256i, k: __mmask16, a: __
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
 #[rustc_legacy_const_generics(2)]
-pub fn _mm256_maskz_slli_epi16<const IMM8: u32>(k: __mmask16, a: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_slli_epi16<const IMM8: u32>(k: __mmask16, a: __m256i) -> __m256i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         if IMM8 >= 16 {
@@ -6812,7 +7492,12 @@ pub fn _mm256_maskz_slli_epi16<const IMM8: u32>(k: __mmask16, a: __m256i) -> __m
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
 #[rustc_legacy_const_generics(3)]
-pub fn _mm_mask_slli_epi16<const IMM8: u32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_slli_epi16<const IMM8: u32>(
+    src: __m128i,
+    k: __mmask8,
+    a: __m128i,
+) -> __m128i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         let shf = if IMM8 >= 16 {
@@ -6832,7 +7517,8 @@ pub fn _mm_mask_slli_epi16<const IMM8: u32>(src: __m128i, k: __mmask8, a: __m128
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsllw, IMM8 = 5))]
 #[rustc_legacy_const_generics(2)]
-pub fn _mm_maskz_slli_epi16<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_slli_epi16<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         if IMM8 >= 16 {
@@ -6851,8 +7537,14 @@ pub fn _mm_maskz_slli_epi16<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsllvw))]
-pub fn _mm512_sllv_epi16(a: __m512i, count: __m512i) -> __m512i {
-    unsafe { transmute(vpsllvw(a.as_i16x32(), count.as_i16x32())) }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_sllv_epi16(a: __m512i, count: __m512i) -> __m512i {
+    unsafe {
+        let count = count.as_u16x32();
+        let no_overflow: u16x32 = simd_lt(count, u16x32::splat(u16::BITS as u16));
+        let count = simd_select(no_overflow, count, u16x32::ZERO);
+        simd_select(no_overflow, simd_shl(a.as_u16x32(), count), u16x32::ZERO).as_m512i()
+    }
 }
 
 /// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -6862,7 +7554,13 @@ pub fn _mm512_sllv_epi16(a: __m512i, count: __m512i) -> __m512i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsllvw))]
-pub fn _mm512_mask_sllv_epi16(src: __m512i, k: __mmask32, a: __m512i, count: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_sllv_epi16(
+    src: __m512i,
+    k: __mmask32,
+    a: __m512i,
+    count: __m512i,
+) -> __m512i {
     unsafe {
         let shf = _mm512_sllv_epi16(a, count).as_i16x32();
         transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
@@ -6876,7 +7574,8 @@ pub fn _mm512_mask_sllv_epi16(src: __m512i, k: __mmask32, a: __m512i, count: __m
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsllvw))]
-pub fn _mm512_maskz_sllv_epi16(k: __mmask32, a: __m512i, count: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_sllv_epi16(k: __mmask32, a: __m512i, count: __m512i) -> __m512i {
     unsafe {
         let shf = _mm512_sllv_epi16(a, count).as_i16x32();
         transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
@@ -6890,8 +7589,14 @@ pub fn _mm512_maskz_sllv_epi16(k: __mmask32, a: __m512i, count: __m512i) -> __m5
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsllvw))]
-pub fn _mm256_sllv_epi16(a: __m256i, count: __m256i) -> __m256i {
-    unsafe { transmute(vpsllvw256(a.as_i16x16(), count.as_i16x16())) }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_sllv_epi16(a: __m256i, count: __m256i) -> __m256i {
+    unsafe {
+        let count = count.as_u16x16();
+        let no_overflow: u16x16 = simd_lt(count, u16x16::splat(u16::BITS as u16));
+        let count = simd_select(no_overflow, count, u16x16::ZERO);
+        simd_select(no_overflow, simd_shl(a.as_u16x16(), count), u16x16::ZERO).as_m256i()
+    }
 }
 
 /// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -6901,7 +7606,13 @@ pub fn _mm256_sllv_epi16(a: __m256i, count: __m256i) -> __m256i {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsllvw))]
-pub fn _mm256_mask_sllv_epi16(src: __m256i, k: __mmask16, a: __m256i, count: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_sllv_epi16(
+    src: __m256i,
+    k: __mmask16,
+    a: __m256i,
+    count: __m256i,
+) -> __m256i {
     unsafe {
         let shf = _mm256_sllv_epi16(a, count).as_i16x16();
         transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
@@ -6915,7 +7626,8 @@ pub fn _mm256_mask_sllv_epi16(src: __m256i, k: __mmask16, a: __m256i, count: __m
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsllvw))]
-pub fn _mm256_maskz_sllv_epi16(k: __mmask16, a: __m256i, count: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_sllv_epi16(k: __mmask16, a: __m256i, count: __m256i) -> __m256i {
     unsafe {
         let shf = _mm256_sllv_epi16(a, count).as_i16x16();
         transmute(simd_select_bitmask(k, shf, i16x16::ZERO))
@@ -6929,8 +7641,14 @@ pub fn _mm256_maskz_sllv_epi16(k: __mmask16, a: __m256i, count: __m256i) -> __m2
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsllvw))]
-pub fn _mm_sllv_epi16(a: __m128i, count: __m128i) -> __m128i {
-    unsafe { transmute(vpsllvw128(a.as_i16x8(), count.as_i16x8())) }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_sllv_epi16(a: __m128i, count: __m128i) -> __m128i {
+    unsafe {
+        let count = count.as_u16x8();
+        let no_overflow: u16x8 = simd_lt(count, u16x8::splat(u16::BITS as u16));
+        let count = simd_select(no_overflow, count, u16x8::ZERO);
+        simd_select(no_overflow, simd_shl(a.as_u16x8(), count), u16x8::ZERO).as_m128i()
+    }
 }
 
 /// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -6940,7 +7658,8 @@ pub fn _mm_sllv_epi16(a: __m128i, count: __m128i) -> __m128i {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsllvw))]
-pub fn _mm_mask_sllv_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_sllv_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
     unsafe {
         let shf = _mm_sllv_epi16(a, count).as_i16x8();
         transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
@@ -6954,7 +7673,8 @@ pub fn _mm_mask_sllv_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsllvw))]
-pub fn _mm_maskz_sllv_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_sllv_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
     unsafe {
         let shf = _mm_sllv_epi16(a, count).as_i16x8();
         transmute(simd_select_bitmask(k, shf, i16x8::ZERO))
@@ -7064,7 +7784,8 @@ pub fn _mm_maskz_srl_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
 #[rustc_legacy_const_generics(1)]
-pub fn _mm512_srli_epi16<const IMM8: u32>(a: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_srli_epi16<const IMM8: u32>(a: __m512i) -> __m512i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         if IMM8 >= 16 {
@@ -7083,7 +7804,12 @@ pub fn _mm512_srli_epi16<const IMM8: u32>(a: __m512i) -> __m512i {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
 #[rustc_legacy_const_generics(3)]
-pub fn _mm512_mask_srli_epi16<const IMM8: u32>(src: __m512i, k: __mmask32, a: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_srli_epi16<const IMM8: u32>(
+    src: __m512i,
+    k: __mmask32,
+    a: __m512i,
+) -> __m512i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         let shf = if IMM8 >= 16 {
@@ -7103,7 +7829,8 @@ pub fn _mm512_mask_srli_epi16<const IMM8: u32>(src: __m512i, k: __mmask32, a: __
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
 #[rustc_legacy_const_generics(2)]
-pub fn _mm512_maskz_srli_epi16<const IMM8: i32>(k: __mmask32, a: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_srli_epi16<const IMM8: i32>(k: __mmask32, a: __m512i) -> __m512i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         //imm8 should be u32, it seems the document to verify is incorrect
@@ -7124,7 +7851,12 @@ pub fn _mm512_maskz_srli_epi16<const IMM8: i32>(k: __mmask32, a: __m512i) -> __m
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
 #[rustc_legacy_const_generics(3)]
-pub fn _mm256_mask_srli_epi16<const IMM8: i32>(src: __m256i, k: __mmask16, a: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_srli_epi16<const IMM8: i32>(
+    src: __m256i,
+    k: __mmask16,
+    a: __m256i,
+) -> __m256i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         let shf = _mm256_srli_epi16::<IMM8>(a);
@@ -7140,7 +7872,8 @@ pub fn _mm256_mask_srli_epi16<const IMM8: i32>(src: __m256i, k: __mmask16, a: __
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
 #[rustc_legacy_const_generics(2)]
-pub fn _mm256_maskz_srli_epi16<const IMM8: i32>(k: __mmask16, a: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_srli_epi16<const IMM8: i32>(k: __mmask16, a: __m256i) -> __m256i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         let shf = _mm256_srli_epi16::<IMM8>(a);
@@ -7156,7 +7889,12 @@ pub fn _mm256_maskz_srli_epi16<const IMM8: i32>(k: __mmask16, a: __m256i) -> __m
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
 #[rustc_legacy_const_generics(3)]
-pub fn _mm_mask_srli_epi16<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_srli_epi16<const IMM8: i32>(
+    src: __m128i,
+    k: __mmask8,
+    a: __m128i,
+) -> __m128i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         let shf = _mm_srli_epi16::<IMM8>(a);
@@ -7172,7 +7910,8 @@ pub fn _mm_mask_srli_epi16<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsrlw, IMM8 = 5))]
 #[rustc_legacy_const_generics(2)]
-pub fn _mm_maskz_srli_epi16<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_srli_epi16<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         let shf = _mm_srli_epi16::<IMM8>(a);
@@ -7187,8 +7926,14 @@ pub fn _mm_maskz_srli_epi16<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsrlvw))]
-pub fn _mm512_srlv_epi16(a: __m512i, count: __m512i) -> __m512i {
-    unsafe { transmute(vpsrlvw(a.as_i16x32(), count.as_i16x32())) }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_srlv_epi16(a: __m512i, count: __m512i) -> __m512i {
+    unsafe {
+        let count = count.as_u16x32();
+        let no_overflow: u16x32 = simd_lt(count, u16x32::splat(u16::BITS as u16));
+        let count = simd_select(no_overflow, count, u16x32::ZERO);
+        simd_select(no_overflow, simd_shr(a.as_u16x32(), count), u16x32::ZERO).as_m512i()
+    }
 }
 
 /// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -7198,7 +7943,13 @@ pub fn _mm512_srlv_epi16(a: __m512i, count: __m512i) -> __m512i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsrlvw))]
-pub fn _mm512_mask_srlv_epi16(src: __m512i, k: __mmask32, a: __m512i, count: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_srlv_epi16(
+    src: __m512i,
+    k: __mmask32,
+    a: __m512i,
+    count: __m512i,
+) -> __m512i {
     unsafe {
         let shf = _mm512_srlv_epi16(a, count).as_i16x32();
         transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
@@ -7212,7 +7963,8 @@ pub fn _mm512_mask_srlv_epi16(src: __m512i, k: __mmask32, a: __m512i, count: __m
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsrlvw))]
-pub fn _mm512_maskz_srlv_epi16(k: __mmask32, a: __m512i, count: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_srlv_epi16(k: __mmask32, a: __m512i, count: __m512i) -> __m512i {
     unsafe {
         let shf = _mm512_srlv_epi16(a, count).as_i16x32();
         transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
@@ -7226,8 +7978,14 @@ pub fn _mm512_maskz_srlv_epi16(k: __mmask32, a: __m512i, count: __m512i) -> __m5
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsrlvw))]
-pub fn _mm256_srlv_epi16(a: __m256i, count: __m256i) -> __m256i {
-    unsafe { transmute(vpsrlvw256(a.as_i16x16(), count.as_i16x16())) }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_srlv_epi16(a: __m256i, count: __m256i) -> __m256i {
+    unsafe {
+        let count = count.as_u16x16();
+        let no_overflow: u16x16 = simd_lt(count, u16x16::splat(u16::BITS as u16));
+        let count = simd_select(no_overflow, count, u16x16::ZERO);
+        simd_select(no_overflow, simd_shr(a.as_u16x16(), count), u16x16::ZERO).as_m256i()
+    }
 }
 
 /// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -7237,7 +7995,13 @@ pub fn _mm256_srlv_epi16(a: __m256i, count: __m256i) -> __m256i {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsrlvw))]
-pub fn _mm256_mask_srlv_epi16(src: __m256i, k: __mmask16, a: __m256i, count: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_srlv_epi16(
+    src: __m256i,
+    k: __mmask16,
+    a: __m256i,
+    count: __m256i,
+) -> __m256i {
     unsafe {
         let shf = _mm256_srlv_epi16(a, count).as_i16x16();
         transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
@@ -7251,7 +8015,8 @@ pub fn _mm256_mask_srlv_epi16(src: __m256i, k: __mmask16, a: __m256i, count: __m
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsrlvw))]
-pub fn _mm256_maskz_srlv_epi16(k: __mmask16, a: __m256i, count: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_srlv_epi16(k: __mmask16, a: __m256i, count: __m256i) -> __m256i {
     unsafe {
         let shf = _mm256_srlv_epi16(a, count).as_i16x16();
         transmute(simd_select_bitmask(k, shf, i16x16::ZERO))
@@ -7265,8 +8030,14 @@ pub fn _mm256_maskz_srlv_epi16(k: __mmask16, a: __m256i, count: __m256i) -> __m2
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsrlvw))]
-pub fn _mm_srlv_epi16(a: __m128i, count: __m128i) -> __m128i {
-    unsafe { transmute(vpsrlvw128(a.as_i16x8(), count.as_i16x8())) }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_srlv_epi16(a: __m128i, count: __m128i) -> __m128i {
+    unsafe {
+        let count = count.as_u16x8();
+        let no_overflow: u16x8 = simd_lt(count, u16x8::splat(u16::BITS as u16));
+        let count = simd_select(no_overflow, count, u16x8::ZERO);
+        simd_select(no_overflow, simd_shr(a.as_u16x8(), count), u16x8::ZERO).as_m128i()
+    }
 }
 
 /// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -7276,7 +8047,8 @@ pub fn _mm_srlv_epi16(a: __m128i, count: __m128i) -> __m128i {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsrlvw))]
-pub fn _mm_mask_srlv_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_srlv_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
     unsafe {
         let shf = _mm_srlv_epi16(a, count).as_i16x8();
         transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
@@ -7290,7 +8062,8 @@ pub fn _mm_mask_srlv_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsrlvw))]
-pub fn _mm_maskz_srlv_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_srlv_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
     unsafe {
         let shf = _mm_srlv_epi16(a, count).as_i16x8();
         transmute(simd_select_bitmask(k, shf, i16x8::ZERO))
@@ -7400,7 +8173,8 @@ pub fn _mm_maskz_sra_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
 #[rustc_legacy_const_generics(1)]
-pub fn _mm512_srai_epi16<const IMM8: u32>(a: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_srai_epi16<const IMM8: u32>(a: __m512i) -> __m512i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         transmute(simd_shr(a.as_i16x32(), i16x32::splat(IMM8.min(15) as i16)))
@@ -7415,7 +8189,12 @@ pub fn _mm512_srai_epi16<const IMM8: u32>(a: __m512i) -> __m512i {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
 #[rustc_legacy_const_generics(3)]
-pub fn _mm512_mask_srai_epi16<const IMM8: u32>(src: __m512i, k: __mmask32, a: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_srai_epi16<const IMM8: u32>(
+    src: __m512i,
+    k: __mmask32,
+    a: __m512i,
+) -> __m512i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         let shf = simd_shr(a.as_i16x32(), i16x32::splat(IMM8.min(15) as i16));
@@ -7431,7 +8210,8 @@ pub fn _mm512_mask_srai_epi16<const IMM8: u32>(src: __m512i, k: __mmask32, a: __
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
 #[rustc_legacy_const_generics(2)]
-pub fn _mm512_maskz_srai_epi16<const IMM8: u32>(k: __mmask32, a: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_srai_epi16<const IMM8: u32>(k: __mmask32, a: __m512i) -> __m512i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         let shf = simd_shr(a.as_i16x32(), i16x32::splat(IMM8.min(15) as i16));
@@ -7447,7 +8227,12 @@ pub fn _mm512_maskz_srai_epi16<const IMM8: u32>(k: __mmask32, a: __m512i) -> __m
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
 #[rustc_legacy_const_generics(3)]
-pub fn _mm256_mask_srai_epi16<const IMM8: u32>(src: __m256i, k: __mmask16, a: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_srai_epi16<const IMM8: u32>(
+    src: __m256i,
+    k: __mmask16,
+    a: __m256i,
+) -> __m256i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         let r = simd_shr(a.as_i16x16(), i16x16::splat(IMM8.min(15) as i16));
@@ -7463,7 +8248,8 @@ pub fn _mm256_mask_srai_epi16<const IMM8: u32>(src: __m256i, k: __mmask16, a: __
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
 #[rustc_legacy_const_generics(2)]
-pub fn _mm256_maskz_srai_epi16<const IMM8: u32>(k: __mmask16, a: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_srai_epi16<const IMM8: u32>(k: __mmask16, a: __m256i) -> __m256i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         let r = simd_shr(a.as_i16x16(), i16x16::splat(IMM8.min(15) as i16));
@@ -7479,7 +8265,12 @@ pub fn _mm256_maskz_srai_epi16<const IMM8: u32>(k: __mmask16, a: __m256i) -> __m
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
 #[rustc_legacy_const_generics(3)]
-pub fn _mm_mask_srai_epi16<const IMM8: u32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_srai_epi16<const IMM8: u32>(
+    src: __m128i,
+    k: __mmask8,
+    a: __m128i,
+) -> __m128i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         let r = simd_shr(a.as_i16x8(), i16x8::splat(IMM8.min(15) as i16));
@@ -7495,7 +8286,8 @@ pub fn _mm_mask_srai_epi16<const IMM8: u32>(src: __m128i, k: __mmask8, a: __m128
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsraw, IMM8 = 1))]
 #[rustc_legacy_const_generics(2)]
-pub fn _mm_maskz_srai_epi16<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_srai_epi16<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         let r = simd_shr(a.as_i16x8(), i16x8::splat(IMM8.min(15) as i16));
@@ -7510,8 +8302,14 @@ pub fn _mm_maskz_srai_epi16<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsravw))]
-pub fn _mm512_srav_epi16(a: __m512i, count: __m512i) -> __m512i {
-    unsafe { transmute(vpsravw(a.as_i16x32(), count.as_i16x32())) }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_srav_epi16(a: __m512i, count: __m512i) -> __m512i {
+    unsafe {
+        let count = count.as_u16x32();
+        let no_overflow: u16x32 = simd_lt(count, u16x32::splat(u16::BITS as u16));
+        let count = simd_select(no_overflow, transmute(count), i16x32::splat(15));
+        simd_shr(a.as_i16x32(), count).as_m512i()
+    }
 }
 
 /// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -7521,7 +8319,13 @@ pub fn _mm512_srav_epi16(a: __m512i, count: __m512i) -> __m512i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsravw))]
-pub fn _mm512_mask_srav_epi16(src: __m512i, k: __mmask32, a: __m512i, count: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_srav_epi16(
+    src: __m512i,
+    k: __mmask32,
+    a: __m512i,
+    count: __m512i,
+) -> __m512i {
     unsafe {
         let shf = _mm512_srav_epi16(a, count).as_i16x32();
         transmute(simd_select_bitmask(k, shf, src.as_i16x32()))
@@ -7535,7 +8339,8 @@ pub fn _mm512_mask_srav_epi16(src: __m512i, k: __mmask32, a: __m512i, count: __m
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsravw))]
-pub fn _mm512_maskz_srav_epi16(k: __mmask32, a: __m512i, count: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_srav_epi16(k: __mmask32, a: __m512i, count: __m512i) -> __m512i {
     unsafe {
         let shf = _mm512_srav_epi16(a, count).as_i16x32();
         transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
@@ -7549,8 +8354,14 @@ pub fn _mm512_maskz_srav_epi16(k: __mmask32, a: __m512i, count: __m512i) -> __m5
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsravw))]
-pub fn _mm256_srav_epi16(a: __m256i, count: __m256i) -> __m256i {
-    unsafe { transmute(vpsravw256(a.as_i16x16(), count.as_i16x16())) }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_srav_epi16(a: __m256i, count: __m256i) -> __m256i {
+    unsafe {
+        let count = count.as_u16x16();
+        let no_overflow: u16x16 = simd_lt(count, u16x16::splat(u16::BITS as u16));
+        let count = simd_select(no_overflow, transmute(count), i16x16::splat(15));
+        simd_shr(a.as_i16x16(), count).as_m256i()
+    }
 }
 
 /// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -7560,7 +8371,13 @@ pub fn _mm256_srav_epi16(a: __m256i, count: __m256i) -> __m256i {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsravw))]
-pub fn _mm256_mask_srav_epi16(src: __m256i, k: __mmask16, a: __m256i, count: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_srav_epi16(
+    src: __m256i,
+    k: __mmask16,
+    a: __m256i,
+    count: __m256i,
+) -> __m256i {
     unsafe {
         let shf = _mm256_srav_epi16(a, count).as_i16x16();
         transmute(simd_select_bitmask(k, shf, src.as_i16x16()))
@@ -7574,7 +8391,8 @@ pub fn _mm256_mask_srav_epi16(src: __m256i, k: __mmask16, a: __m256i, count: __m
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsravw))]
-pub fn _mm256_maskz_srav_epi16(k: __mmask16, a: __m256i, count: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_srav_epi16(k: __mmask16, a: __m256i, count: __m256i) -> __m256i {
     unsafe {
         let shf = _mm256_srav_epi16(a, count).as_i16x16();
         transmute(simd_select_bitmask(k, shf, i16x16::ZERO))
@@ -7588,8 +8406,14 @@ pub fn _mm256_maskz_srav_epi16(k: __mmask16, a: __m256i, count: __m256i) -> __m2
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsravw))]
-pub fn _mm_srav_epi16(a: __m128i, count: __m128i) -> __m128i {
-    unsafe { transmute(vpsravw128(a.as_i16x8(), count.as_i16x8())) }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_srav_epi16(a: __m128i, count: __m128i) -> __m128i {
+    unsafe {
+        let count = count.as_u16x8();
+        let no_overflow: u16x8 = simd_lt(count, u16x8::splat(u16::BITS as u16));
+        let count = simd_select(no_overflow, transmute(count), i16x8::splat(15));
+        simd_shr(a.as_i16x8(), count).as_m128i()
+    }
 }
 
 /// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -7599,7 +8423,8 @@ pub fn _mm_srav_epi16(a: __m128i, count: __m128i) -> __m128i {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsravw))]
-pub fn _mm_mask_srav_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_srav_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
     unsafe {
         let shf = _mm_srav_epi16(a, count).as_i16x8();
         transmute(simd_select_bitmask(k, shf, src.as_i16x8()))
@@ -7613,7 +8438,8 @@ pub fn _mm_mask_srav_epi16(src: __m128i, k: __mmask8, a: __m128i, count: __m128i
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsravw))]
-pub fn _mm_maskz_srav_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_srav_epi16(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
     unsafe {
         let shf = _mm_srav_epi16(a, count).as_i16x8();
         transmute(simd_select_bitmask(k, shf, i16x8::ZERO))
@@ -7943,7 +8769,8 @@ pub fn _mm_maskz_permutexvar_epi16(k: __mmask8, idx: __m128i, a: __m128i) -> __m
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovdqu16))] //should be vpblendmw
-pub fn _mm512_mask_blend_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_blend_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
     unsafe { transmute(simd_select_bitmask(k, b.as_i16x32(), a.as_i16x32())) }
 }
 
@@ -7954,7 +8781,8 @@ pub fn _mm512_mask_blend_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovdqu16))] //should be vpblendmw
-pub fn _mm256_mask_blend_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_blend_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
     unsafe { transmute(simd_select_bitmask(k, b.as_i16x16(), a.as_i16x16())) }
 }
 
@@ -7965,7 +8793,8 @@ pub fn _mm256_mask_blend_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovdqu16))] //should be vpblendmw
-pub fn _mm_mask_blend_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_blend_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe { transmute(simd_select_bitmask(k, b.as_i16x8(), a.as_i16x8())) }
 }
 
@@ -7976,7 +8805,8 @@ pub fn _mm_mask_blend_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovdqu8))] //should be vpblendmb
-pub fn _mm512_mask_blend_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_blend_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
     unsafe { transmute(simd_select_bitmask(k, b.as_i8x64(), a.as_i8x64())) }
 }
 
@@ -7987,7 +8817,8 @@ pub fn _mm512_mask_blend_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovdqu8))] //should be vpblendmb
-pub fn _mm256_mask_blend_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_blend_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
     unsafe { transmute(simd_select_bitmask(k, b.as_i8x32(), a.as_i8x32())) }
 }
 
@@ -7998,7 +8829,8 @@ pub fn _mm256_mask_blend_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovdqu8))] //should be vpblendmb
-pub fn _mm_mask_blend_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_blend_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
     unsafe { transmute(simd_select_bitmask(k, b.as_i8x16(), a.as_i8x16())) }
 }
 
@@ -8009,7 +8841,8 @@ pub fn _mm_mask_blend_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpbroadcastw))]
-pub fn _mm512_broadcastw_epi16(a: __m128i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_broadcastw_epi16(a: __m128i) -> __m512i {
     unsafe {
         let a = _mm512_castsi128_si512(a).as_i16x32();
         let ret: i16x32 = simd_shuffle!(
@@ -8031,7 +8864,8 @@ pub fn _mm512_broadcastw_epi16(a: __m128i) -> __m512i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpbroadcastw))]
-pub fn _mm512_mask_broadcastw_epi16(src: __m512i, k: __mmask32, a: __m128i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_broadcastw_epi16(src: __m512i, k: __mmask32, a: __m128i) -> __m512i {
     unsafe {
         let broadcast = _mm512_broadcastw_epi16(a).as_i16x32();
         transmute(simd_select_bitmask(k, broadcast, src.as_i16x32()))
@@ -8045,7 +8879,8 @@ pub fn _mm512_mask_broadcastw_epi16(src: __m512i, k: __mmask32, a: __m128i) -> _
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpbroadcastw))]
-pub fn _mm512_maskz_broadcastw_epi16(k: __mmask32, a: __m128i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_broadcastw_epi16(k: __mmask32, a: __m128i) -> __m512i {
     unsafe {
         let broadcast = _mm512_broadcastw_epi16(a).as_i16x32();
         transmute(simd_select_bitmask(k, broadcast, i16x32::ZERO))
@@ -8059,7 +8894,8 @@ pub fn _mm512_maskz_broadcastw_epi16(k: __mmask32, a: __m128i) -> __m512i {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpbroadcastw))]
-pub fn _mm256_mask_broadcastw_epi16(src: __m256i, k: __mmask16, a: __m128i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_broadcastw_epi16(src: __m256i, k: __mmask16, a: __m128i) -> __m256i {
     unsafe {
         let broadcast = _mm256_broadcastw_epi16(a).as_i16x16();
         transmute(simd_select_bitmask(k, broadcast, src.as_i16x16()))
@@ -8073,7 +8909,8 @@ pub fn _mm256_mask_broadcastw_epi16(src: __m256i, k: __mmask16, a: __m128i) -> _
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpbroadcastw))]
-pub fn _mm256_maskz_broadcastw_epi16(k: __mmask16, a: __m128i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_broadcastw_epi16(k: __mmask16, a: __m128i) -> __m256i {
     unsafe {
         let broadcast = _mm256_broadcastw_epi16(a).as_i16x16();
         transmute(simd_select_bitmask(k, broadcast, i16x16::ZERO))
@@ -8087,7 +8924,8 @@ pub fn _mm256_maskz_broadcastw_epi16(k: __mmask16, a: __m128i) -> __m256i {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpbroadcastw))]
-pub fn _mm_mask_broadcastw_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_broadcastw_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
     unsafe {
         let broadcast = _mm_broadcastw_epi16(a).as_i16x8();
         transmute(simd_select_bitmask(k, broadcast, src.as_i16x8()))
@@ -8101,7 +8939,8 @@ pub fn _mm_mask_broadcastw_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m12
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpbroadcastw))]
-pub fn _mm_maskz_broadcastw_epi16(k: __mmask8, a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_broadcastw_epi16(k: __mmask8, a: __m128i) -> __m128i {
     unsafe {
         let broadcast = _mm_broadcastw_epi16(a).as_i16x8();
         transmute(simd_select_bitmask(k, broadcast, i16x8::ZERO))
@@ -8115,7 +8954,8 @@ pub fn _mm_maskz_broadcastw_epi16(k: __mmask8, a: __m128i) -> __m128i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpbroadcastb))]
-pub fn _mm512_broadcastb_epi8(a: __m128i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_broadcastb_epi8(a: __m128i) -> __m512i {
     unsafe {
         let a = _mm512_castsi128_si512(a).as_i8x64();
         let ret: i8x64 = simd_shuffle!(
@@ -8138,7 +8978,8 @@ pub fn _mm512_broadcastb_epi8(a: __m128i) -> __m512i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpbroadcastb))]
-pub fn _mm512_mask_broadcastb_epi8(src: __m512i, k: __mmask64, a: __m128i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_broadcastb_epi8(src: __m512i, k: __mmask64, a: __m128i) -> __m512i {
     unsafe {
         let broadcast = _mm512_broadcastb_epi8(a).as_i8x64();
         transmute(simd_select_bitmask(k, broadcast, src.as_i8x64()))
@@ -8152,7 +8993,8 @@ pub fn _mm512_mask_broadcastb_epi8(src: __m512i, k: __mmask64, a: __m128i) -> __
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpbroadcastb))]
-pub fn _mm512_maskz_broadcastb_epi8(k: __mmask64, a: __m128i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_broadcastb_epi8(k: __mmask64, a: __m128i) -> __m512i {
     unsafe {
         let broadcast = _mm512_broadcastb_epi8(a).as_i8x64();
         transmute(simd_select_bitmask(k, broadcast, i8x64::ZERO))
@@ -8166,7 +9008,8 @@ pub fn _mm512_maskz_broadcastb_epi8(k: __mmask64, a: __m128i) -> __m512i {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpbroadcastb))]
-pub fn _mm256_mask_broadcastb_epi8(src: __m256i, k: __mmask32, a: __m128i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_broadcastb_epi8(src: __m256i, k: __mmask32, a: __m128i) -> __m256i {
     unsafe {
         let broadcast = _mm256_broadcastb_epi8(a).as_i8x32();
         transmute(simd_select_bitmask(k, broadcast, src.as_i8x32()))
@@ -8180,7 +9023,8 @@ pub fn _mm256_mask_broadcastb_epi8(src: __m256i, k: __mmask32, a: __m128i) -> __
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpbroadcastb))]
-pub fn _mm256_maskz_broadcastb_epi8(k: __mmask32, a: __m128i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_broadcastb_epi8(k: __mmask32, a: __m128i) -> __m256i {
     unsafe {
         let broadcast = _mm256_broadcastb_epi8(a).as_i8x32();
         transmute(simd_select_bitmask(k, broadcast, i8x32::ZERO))
@@ -8194,7 +9038,8 @@ pub fn _mm256_maskz_broadcastb_epi8(k: __mmask32, a: __m128i) -> __m256i {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpbroadcastb))]
-pub fn _mm_mask_broadcastb_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_broadcastb_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i {
     unsafe {
         let broadcast = _mm_broadcastb_epi8(a).as_i8x16();
         transmute(simd_select_bitmask(k, broadcast, src.as_i8x16()))
@@ -8208,7 +9053,8 @@ pub fn _mm_mask_broadcastb_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m12
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpbroadcastb))]
-pub fn _mm_maskz_broadcastb_epi8(k: __mmask16, a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_broadcastb_epi8(k: __mmask16, a: __m128i) -> __m128i {
     unsafe {
         let broadcast = _mm_broadcastb_epi8(a).as_i8x16();
         transmute(simd_select_bitmask(k, broadcast, i8x16::ZERO))
@@ -8222,7 +9068,8 @@ pub fn _mm_maskz_broadcastb_epi8(k: __mmask16, a: __m128i) -> __m128i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpunpckhwd))]
-pub fn _mm512_unpackhi_epi16(a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_unpackhi_epi16(a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let a = a.as_i16x32();
         let b = b.as_i16x32();
@@ -8252,7 +9099,13 @@ pub fn _mm512_unpackhi_epi16(a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpunpckhwd))]
-pub fn _mm512_mask_unpackhi_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_unpackhi_epi16(
+    src: __m512i,
+    k: __mmask32,
+    a: __m512i,
+    b: __m512i,
+) -> __m512i {
     unsafe {
         let unpackhi = _mm512_unpackhi_epi16(a, b).as_i16x32();
         transmute(simd_select_bitmask(k, unpackhi, src.as_i16x32()))
@@ -8266,7 +9119,8 @@ pub fn _mm512_mask_unpackhi_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpunpckhwd))]
-pub fn _mm512_maskz_unpackhi_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_unpackhi_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let unpackhi = _mm512_unpackhi_epi16(a, b).as_i16x32();
         transmute(simd_select_bitmask(k, unpackhi, i16x32::ZERO))
@@ -8280,7 +9134,13 @@ pub fn _mm512_maskz_unpackhi_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m5
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpunpckhwd))]
-pub fn _mm256_mask_unpackhi_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_unpackhi_epi16(
+    src: __m256i,
+    k: __mmask16,
+    a: __m256i,
+    b: __m256i,
+) -> __m256i {
     unsafe {
         let unpackhi = _mm256_unpackhi_epi16(a, b).as_i16x16();
         transmute(simd_select_bitmask(k, unpackhi, src.as_i16x16()))
@@ -8294,7 +9154,8 @@ pub fn _mm256_mask_unpackhi_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpunpckhwd))]
-pub fn _mm256_maskz_unpackhi_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_unpackhi_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let unpackhi = _mm256_unpackhi_epi16(a, b).as_i16x16();
         transmute(simd_select_bitmask(k, unpackhi, i16x16::ZERO))
@@ -8308,7 +9169,8 @@ pub fn _mm256_maskz_unpackhi_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m2
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpunpckhwd))]
-pub fn _mm_mask_unpackhi_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_unpackhi_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let unpackhi = _mm_unpackhi_epi16(a, b).as_i16x8();
         transmute(simd_select_bitmask(k, unpackhi, src.as_i16x8()))
@@ -8322,7 +9184,8 @@ pub fn _mm_mask_unpackhi_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpunpckhwd))]
-pub fn _mm_maskz_unpackhi_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_unpackhi_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let unpackhi = _mm_unpackhi_epi16(a, b).as_i16x8();
         transmute(simd_select_bitmask(k, unpackhi, i16x8::ZERO))
@@ -8336,7 +9199,8 @@ pub fn _mm_maskz_unpackhi_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpunpckhbw))]
-pub fn _mm512_unpackhi_epi8(a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_unpackhi_epi8(a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let a = a.as_i8x64();
         let b = b.as_i8x64();
@@ -8374,7 +9238,13 @@ pub fn _mm512_unpackhi_epi8(a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpunpckhbw))]
-pub fn _mm512_mask_unpackhi_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_unpackhi_epi8(
+    src: __m512i,
+    k: __mmask64,
+    a: __m512i,
+    b: __m512i,
+) -> __m512i {
     unsafe {
         let unpackhi = _mm512_unpackhi_epi8(a, b).as_i8x64();
         transmute(simd_select_bitmask(k, unpackhi, src.as_i8x64()))
@@ -8388,7 +9258,8 @@ pub fn _mm512_mask_unpackhi_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m5
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpunpckhbw))]
-pub fn _mm512_maskz_unpackhi_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_unpackhi_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let unpackhi = _mm512_unpackhi_epi8(a, b).as_i8x64();
         transmute(simd_select_bitmask(k, unpackhi, i8x64::ZERO))
@@ -8402,7 +9273,13 @@ pub fn _mm512_maskz_unpackhi_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m51
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpunpckhbw))]
-pub fn _mm256_mask_unpackhi_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_unpackhi_epi8(
+    src: __m256i,
+    k: __mmask32,
+    a: __m256i,
+    b: __m256i,
+) -> __m256i {
     unsafe {
         let unpackhi = _mm256_unpackhi_epi8(a, b).as_i8x32();
         transmute(simd_select_bitmask(k, unpackhi, src.as_i8x32()))
@@ -8416,7 +9293,8 @@ pub fn _mm256_mask_unpackhi_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m2
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpunpckhbw))]
-pub fn _mm256_maskz_unpackhi_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_unpackhi_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let unpackhi = _mm256_unpackhi_epi8(a, b).as_i8x32();
         transmute(simd_select_bitmask(k, unpackhi, i8x32::ZERO))
@@ -8430,7 +9308,8 @@ pub fn _mm256_maskz_unpackhi_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m25
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpunpckhbw))]
-pub fn _mm_mask_unpackhi_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_unpackhi_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let unpackhi = _mm_unpackhi_epi8(a, b).as_i8x16();
         transmute(simd_select_bitmask(k, unpackhi, src.as_i8x16()))
@@ -8444,7 +9323,8 @@ pub fn _mm_mask_unpackhi_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpunpckhbw))]
-pub fn _mm_maskz_unpackhi_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_unpackhi_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let unpackhi = _mm_unpackhi_epi8(a, b).as_i8x16();
         transmute(simd_select_bitmask(k, unpackhi, i8x16::ZERO))
@@ -8458,7 +9338,8 @@ pub fn _mm_maskz_unpackhi_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpunpcklwd))]
-pub fn _mm512_unpacklo_epi16(a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_unpacklo_epi16(a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let a = a.as_i16x32();
         let b = b.as_i16x32();
@@ -8488,7 +9369,13 @@ pub fn _mm512_unpacklo_epi16(a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpunpcklwd))]
-pub fn _mm512_mask_unpacklo_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_unpacklo_epi16(
+    src: __m512i,
+    k: __mmask32,
+    a: __m512i,
+    b: __m512i,
+) -> __m512i {
     unsafe {
         let unpacklo = _mm512_unpacklo_epi16(a, b).as_i16x32();
         transmute(simd_select_bitmask(k, unpacklo, src.as_i16x32()))
@@ -8502,7 +9389,8 @@ pub fn _mm512_mask_unpacklo_epi16(src: __m512i, k: __mmask32, a: __m512i, b: __m
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpunpcklwd))]
-pub fn _mm512_maskz_unpacklo_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_unpacklo_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let unpacklo = _mm512_unpacklo_epi16(a, b).as_i16x32();
         transmute(simd_select_bitmask(k, unpacklo, i16x32::ZERO))
@@ -8516,7 +9404,13 @@ pub fn _mm512_maskz_unpacklo_epi16(k: __mmask32, a: __m512i, b: __m512i) -> __m5
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpunpcklwd))]
-pub fn _mm256_mask_unpacklo_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_unpacklo_epi16(
+    src: __m256i,
+    k: __mmask16,
+    a: __m256i,
+    b: __m256i,
+) -> __m256i {
     unsafe {
         let unpacklo = _mm256_unpacklo_epi16(a, b).as_i16x16();
         transmute(simd_select_bitmask(k, unpacklo, src.as_i16x16()))
@@ -8530,7 +9424,8 @@ pub fn _mm256_mask_unpacklo_epi16(src: __m256i, k: __mmask16, a: __m256i, b: __m
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpunpcklwd))]
-pub fn _mm256_maskz_unpacklo_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_unpacklo_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let unpacklo = _mm256_unpacklo_epi16(a, b).as_i16x16();
         transmute(simd_select_bitmask(k, unpacklo, i16x16::ZERO))
@@ -8544,7 +9439,8 @@ pub fn _mm256_maskz_unpacklo_epi16(k: __mmask16, a: __m256i, b: __m256i) -> __m2
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpunpcklwd))]
-pub fn _mm_mask_unpacklo_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_unpacklo_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let unpacklo = _mm_unpacklo_epi16(a, b).as_i16x8();
         transmute(simd_select_bitmask(k, unpacklo, src.as_i16x8()))
@@ -8558,7 +9454,8 @@ pub fn _mm_mask_unpacklo_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpunpcklwd))]
-pub fn _mm_maskz_unpacklo_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_unpacklo_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let unpacklo = _mm_unpacklo_epi16(a, b).as_i16x8();
         transmute(simd_select_bitmask(k, unpacklo, i16x8::ZERO))
@@ -8572,7 +9469,8 @@ pub fn _mm_maskz_unpacklo_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpunpcklbw))]
-pub fn _mm512_unpacklo_epi8(a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_unpacklo_epi8(a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let a = a.as_i8x64();
         let b = b.as_i8x64();
@@ -8610,7 +9508,13 @@ pub fn _mm512_unpacklo_epi8(a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpunpcklbw))]
-pub fn _mm512_mask_unpacklo_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_unpacklo_epi8(
+    src: __m512i,
+    k: __mmask64,
+    a: __m512i,
+    b: __m512i,
+) -> __m512i {
     unsafe {
         let unpacklo = _mm512_unpacklo_epi8(a, b).as_i8x64();
         transmute(simd_select_bitmask(k, unpacklo, src.as_i8x64()))
@@ -8624,7 +9528,8 @@ pub fn _mm512_mask_unpacklo_epi8(src: __m512i, k: __mmask64, a: __m512i, b: __m5
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpunpcklbw))]
-pub fn _mm512_maskz_unpacklo_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_unpacklo_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let unpacklo = _mm512_unpacklo_epi8(a, b).as_i8x64();
         transmute(simd_select_bitmask(k, unpacklo, i8x64::ZERO))
@@ -8638,7 +9543,13 @@ pub fn _mm512_maskz_unpacklo_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m51
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpunpcklbw))]
-pub fn _mm256_mask_unpacklo_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_unpacklo_epi8(
+    src: __m256i,
+    k: __mmask32,
+    a: __m256i,
+    b: __m256i,
+) -> __m256i {
     unsafe {
         let unpacklo = _mm256_unpacklo_epi8(a, b).as_i8x32();
         transmute(simd_select_bitmask(k, unpacklo, src.as_i8x32()))
@@ -8652,7 +9563,8 @@ pub fn _mm256_mask_unpacklo_epi8(src: __m256i, k: __mmask32, a: __m256i, b: __m2
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpunpcklbw))]
-pub fn _mm256_maskz_unpacklo_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_unpacklo_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let unpacklo = _mm256_unpacklo_epi8(a, b).as_i8x32();
         transmute(simd_select_bitmask(k, unpacklo, i8x32::ZERO))
@@ -8666,7 +9578,8 @@ pub fn _mm256_maskz_unpacklo_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m25
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpunpcklbw))]
-pub fn _mm_mask_unpacklo_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_unpacklo_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let unpacklo = _mm_unpacklo_epi8(a, b).as_i8x16();
         transmute(simd_select_bitmask(k, unpacklo, src.as_i8x16()))
@@ -8680,7 +9593,8 @@ pub fn _mm_mask_unpacklo_epi8(src: __m128i, k: __mmask16, a: __m128i, b: __m128i
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpunpcklbw))]
-pub fn _mm_maskz_unpacklo_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_unpacklo_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let unpacklo = _mm_unpacklo_epi8(a, b).as_i8x16();
         transmute(simd_select_bitmask(k, unpacklo, i8x16::ZERO))
@@ -8694,7 +9608,8 @@ pub fn _mm_maskz_unpacklo_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovdqu16))]
-pub fn _mm512_mask_mov_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_mov_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i {
     unsafe {
         let mov = a.as_i16x32();
         transmute(simd_select_bitmask(k, mov, src.as_i16x32()))
@@ -8708,7 +9623,8 @@ pub fn _mm512_mask_mov_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovdqu16))]
-pub fn _mm512_maskz_mov_epi16(k: __mmask32, a: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_mov_epi16(k: __mmask32, a: __m512i) -> __m512i {
     unsafe {
         let mov = a.as_i16x32();
         transmute(simd_select_bitmask(k, mov, i16x32::ZERO))
@@ -8722,7 +9638,8 @@ pub fn _mm512_maskz_mov_epi16(k: __mmask32, a: __m512i) -> __m512i {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovdqu16))]
-pub fn _mm256_mask_mov_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_mov_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i {
     unsafe {
         let mov = a.as_i16x16();
         transmute(simd_select_bitmask(k, mov, src.as_i16x16()))
@@ -8736,7 +9653,8 @@ pub fn _mm256_mask_mov_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovdqu16))]
-pub fn _mm256_maskz_mov_epi16(k: __mmask16, a: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_mov_epi16(k: __mmask16, a: __m256i) -> __m256i {
     unsafe {
         let mov = a.as_i16x16();
         transmute(simd_select_bitmask(k, mov, i16x16::ZERO))
@@ -8750,7 +9668,8 @@ pub fn _mm256_maskz_mov_epi16(k: __mmask16, a: __m256i) -> __m256i {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovdqu16))]
-pub fn _mm_mask_mov_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_mov_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
     unsafe {
         let mov = a.as_i16x8();
         transmute(simd_select_bitmask(k, mov, src.as_i16x8()))
@@ -8764,7 +9683,8 @@ pub fn _mm_mask_mov_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovdqu16))]
-pub fn _mm_maskz_mov_epi16(k: __mmask8, a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_mov_epi16(k: __mmask8, a: __m128i) -> __m128i {
     unsafe {
         let mov = a.as_i16x8();
         transmute(simd_select_bitmask(k, mov, i16x8::ZERO))
@@ -8778,7 +9698,8 @@ pub fn _mm_maskz_mov_epi16(k: __mmask8, a: __m128i) -> __m128i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovdqu8))]
-pub fn _mm512_mask_mov_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_mov_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __m512i {
     unsafe {
         let mov = a.as_i8x64();
         transmute(simd_select_bitmask(k, mov, src.as_i8x64()))
@@ -8792,7 +9713,8 @@ pub fn _mm512_mask_mov_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __m512i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovdqu8))]
-pub fn _mm512_maskz_mov_epi8(k: __mmask64, a: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_mov_epi8(k: __mmask64, a: __m512i) -> __m512i {
     unsafe {
         let mov = a.as_i8x64();
         transmute(simd_select_bitmask(k, mov, i8x64::ZERO))
@@ -8806,7 +9728,8 @@ pub fn _mm512_maskz_mov_epi8(k: __mmask64, a: __m512i) -> __m512i {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovdqu8))]
-pub fn _mm256_mask_mov_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_mov_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __m256i {
     unsafe {
         let mov = a.as_i8x32();
         transmute(simd_select_bitmask(k, mov, src.as_i8x32()))
@@ -8820,7 +9743,8 @@ pub fn _mm256_mask_mov_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __m256i {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovdqu8))]
-pub fn _mm256_maskz_mov_epi8(k: __mmask32, a: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_mov_epi8(k: __mmask32, a: __m256i) -> __m256i {
     unsafe {
         let mov = a.as_i8x32();
         transmute(simd_select_bitmask(k, mov, i8x32::ZERO))
@@ -8834,7 +9758,8 @@ pub fn _mm256_maskz_mov_epi8(k: __mmask32, a: __m256i) -> __m256i {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovdqu8))]
-pub fn _mm_mask_mov_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_mov_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i {
     unsafe {
         let mov = a.as_i8x16();
         transmute(simd_select_bitmask(k, mov, src.as_i8x16()))
@@ -8848,7 +9773,8 @@ pub fn _mm_mask_mov_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovdqu8))]
-pub fn _mm_maskz_mov_epi8(k: __mmask16, a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_mov_epi8(k: __mmask16, a: __m128i) -> __m128i {
     unsafe {
         let mov = a.as_i8x16();
         transmute(simd_select_bitmask(k, mov, i8x16::ZERO))
@@ -8862,7 +9788,8 @@ pub fn _mm_maskz_mov_epi8(k: __mmask16, a: __m128i) -> __m128i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpbroadcastw))]
-pub fn _mm512_mask_set1_epi16(src: __m512i, k: __mmask32, a: i16) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_set1_epi16(src: __m512i, k: __mmask32, a: i16) -> __m512i {
     unsafe {
         let r = _mm512_set1_epi16(a).as_i16x32();
         transmute(simd_select_bitmask(k, r, src.as_i16x32()))
@@ -8876,7 +9803,8 @@ pub fn _mm512_mask_set1_epi16(src: __m512i, k: __mmask32, a: i16) -> __m512i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpbroadcastw))]
-pub fn _mm512_maskz_set1_epi16(k: __mmask32, a: i16) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_set1_epi16(k: __mmask32, a: i16) -> __m512i {
     unsafe {
         let r = _mm512_set1_epi16(a).as_i16x32();
         transmute(simd_select_bitmask(k, r, i16x32::ZERO))
@@ -8890,7 +9818,8 @@ pub fn _mm512_maskz_set1_epi16(k: __mmask32, a: i16) -> __m512i {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpbroadcastw))]
-pub fn _mm256_mask_set1_epi16(src: __m256i, k: __mmask16, a: i16) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_set1_epi16(src: __m256i, k: __mmask16, a: i16) -> __m256i {
     unsafe {
         let r = _mm256_set1_epi16(a).as_i16x16();
         transmute(simd_select_bitmask(k, r, src.as_i16x16()))
@@ -8904,7 +9833,8 @@ pub fn _mm256_mask_set1_epi16(src: __m256i, k: __mmask16, a: i16) -> __m256i {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpbroadcastw))]
-pub fn _mm256_maskz_set1_epi16(k: __mmask16, a: i16) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_set1_epi16(k: __mmask16, a: i16) -> __m256i {
     unsafe {
         let r = _mm256_set1_epi16(a).as_i16x16();
         transmute(simd_select_bitmask(k, r, i16x16::ZERO))
@@ -8918,7 +9848,8 @@ pub fn _mm256_maskz_set1_epi16(k: __mmask16, a: i16) -> __m256i {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpbroadcastw))]
-pub fn _mm_mask_set1_epi16(src: __m128i, k: __mmask8, a: i16) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_set1_epi16(src: __m128i, k: __mmask8, a: i16) -> __m128i {
     unsafe {
         let r = _mm_set1_epi16(a).as_i16x8();
         transmute(simd_select_bitmask(k, r, src.as_i16x8()))
@@ -8932,7 +9863,8 @@ pub fn _mm_mask_set1_epi16(src: __m128i, k: __mmask8, a: i16) -> __m128i {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpbroadcastw))]
-pub fn _mm_maskz_set1_epi16(k: __mmask8, a: i16) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_set1_epi16(k: __mmask8, a: i16) -> __m128i {
     unsafe {
         let r = _mm_set1_epi16(a).as_i16x8();
         transmute(simd_select_bitmask(k, r, i16x8::ZERO))
@@ -8946,7 +9878,8 @@ pub fn _mm_maskz_set1_epi16(k: __mmask8, a: i16) -> __m128i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpbroadcast))]
-pub fn _mm512_mask_set1_epi8(src: __m512i, k: __mmask64, a: i8) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_set1_epi8(src: __m512i, k: __mmask64, a: i8) -> __m512i {
     unsafe {
         let r = _mm512_set1_epi8(a).as_i8x64();
         transmute(simd_select_bitmask(k, r, src.as_i8x64()))
@@ -8960,7 +9893,8 @@ pub fn _mm512_mask_set1_epi8(src: __m512i, k: __mmask64, a: i8) -> __m512i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpbroadcast))]
-pub fn _mm512_maskz_set1_epi8(k: __mmask64, a: i8) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_set1_epi8(k: __mmask64, a: i8) -> __m512i {
     unsafe {
         let r = _mm512_set1_epi8(a).as_i8x64();
         transmute(simd_select_bitmask(k, r, i8x64::ZERO))
@@ -8974,7 +9908,8 @@ pub fn _mm512_maskz_set1_epi8(k: __mmask64, a: i8) -> __m512i {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpbroadcast))]
-pub fn _mm256_mask_set1_epi8(src: __m256i, k: __mmask32, a: i8) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_set1_epi8(src: __m256i, k: __mmask32, a: i8) -> __m256i {
     unsafe {
         let r = _mm256_set1_epi8(a).as_i8x32();
         transmute(simd_select_bitmask(k, r, src.as_i8x32()))
@@ -8988,7 +9923,8 @@ pub fn _mm256_mask_set1_epi8(src: __m256i, k: __mmask32, a: i8) -> __m256i {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpbroadcast))]
-pub fn _mm256_maskz_set1_epi8(k: __mmask32, a: i8) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_set1_epi8(k: __mmask32, a: i8) -> __m256i {
     unsafe {
         let r = _mm256_set1_epi8(a).as_i8x32();
         transmute(simd_select_bitmask(k, r, i8x32::ZERO))
@@ -9002,7 +9938,8 @@ pub fn _mm256_maskz_set1_epi8(k: __mmask32, a: i8) -> __m256i {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpbroadcast))]
-pub fn _mm_mask_set1_epi8(src: __m128i, k: __mmask16, a: i8) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_set1_epi8(src: __m128i, k: __mmask16, a: i8) -> __m128i {
     unsafe {
         let r = _mm_set1_epi8(a).as_i8x16();
         transmute(simd_select_bitmask(k, r, src.as_i8x16()))
@@ -9016,7 +9953,8 @@ pub fn _mm_mask_set1_epi8(src: __m128i, k: __mmask16, a: i8) -> __m128i {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpbroadcast))]
-pub fn _mm_maskz_set1_epi8(k: __mmask16, a: i8) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_set1_epi8(k: __mmask16, a: i8) -> __m128i {
     unsafe {
         let r = _mm_set1_epi8(a).as_i8x16();
         transmute(simd_select_bitmask(k, r, i8x16::ZERO))
@@ -9031,7 +9969,8 @@ pub fn _mm_maskz_set1_epi8(k: __mmask16, a: i8) -> __m128i {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 0))]
 #[rustc_legacy_const_generics(1)]
-pub fn _mm512_shufflelo_epi16<const IMM8: i32>(a: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_shufflelo_epi16<const IMM8: i32>(a: __m512i) -> __m512i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         let a = a.as_i16x32();
@@ -9085,7 +10024,8 @@ pub fn _mm512_shufflelo_epi16<const IMM8: i32>(a: __m512i) -> __m512i {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 0))]
 #[rustc_legacy_const_generics(3)]
-pub fn _mm512_mask_shufflelo_epi16<const IMM8: i32>(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_shufflelo_epi16<const IMM8: i32>(
     src: __m512i,
     k: __mmask32,
     a: __m512i,
@@ -9105,7 +10045,8 @@ pub fn _mm512_mask_shufflelo_epi16<const IMM8: i32>(
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 0))]
 #[rustc_legacy_const_generics(2)]
-pub fn _mm512_maskz_shufflelo_epi16<const IMM8: i32>(k: __mmask32, a: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_shufflelo_epi16<const IMM8: i32>(k: __mmask32, a: __m512i) -> __m512i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         let r = _mm512_shufflelo_epi16::<IMM8>(a);
@@ -9121,7 +10062,8 @@ pub fn _mm512_maskz_shufflelo_epi16<const IMM8: i32>(k: __mmask32, a: __m512i) -
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 5))]
 #[rustc_legacy_const_generics(3)]
-pub fn _mm256_mask_shufflelo_epi16<const IMM8: i32>(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_shufflelo_epi16<const IMM8: i32>(
     src: __m256i,
     k: __mmask16,
     a: __m256i,
@@ -9141,7 +10083,8 @@ pub fn _mm256_mask_shufflelo_epi16<const IMM8: i32>(
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 5))]
 #[rustc_legacy_const_generics(2)]
-pub fn _mm256_maskz_shufflelo_epi16<const IMM8: i32>(k: __mmask16, a: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_shufflelo_epi16<const IMM8: i32>(k: __mmask16, a: __m256i) -> __m256i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         let shuffle = _mm256_shufflelo_epi16::<IMM8>(a);
@@ -9157,7 +10100,12 @@ pub fn _mm256_maskz_shufflelo_epi16<const IMM8: i32>(k: __mmask16, a: __m256i) -
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 5))]
 #[rustc_legacy_const_generics(3)]
-pub fn _mm_mask_shufflelo_epi16<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_shufflelo_epi16<const IMM8: i32>(
+    src: __m128i,
+    k: __mmask8,
+    a: __m128i,
+) -> __m128i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         let shuffle = _mm_shufflelo_epi16::<IMM8>(a);
@@ -9173,7 +10121,8 @@ pub fn _mm_mask_shufflelo_epi16<const IMM8: i32>(src: __m128i, k: __mmask8, a: _
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshuflw, IMM8 = 5))]
 #[rustc_legacy_const_generics(2)]
-pub fn _mm_maskz_shufflelo_epi16<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_shufflelo_epi16<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         let shuffle = _mm_shufflelo_epi16::<IMM8>(a);
@@ -9189,7 +10138,8 @@ pub fn _mm_maskz_shufflelo_epi16<const IMM8: i32>(k: __mmask8, a: __m128i) -> __
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 0))]
 #[rustc_legacy_const_generics(1)]
-pub fn _mm512_shufflehi_epi16<const IMM8: i32>(a: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_shufflehi_epi16<const IMM8: i32>(a: __m512i) -> __m512i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         let a = a.as_i16x32();
@@ -9243,7 +10193,8 @@ pub fn _mm512_shufflehi_epi16<const IMM8: i32>(a: __m512i) -> __m512i {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 0))]
 #[rustc_legacy_const_generics(3)]
-pub fn _mm512_mask_shufflehi_epi16<const IMM8: i32>(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_shufflehi_epi16<const IMM8: i32>(
     src: __m512i,
     k: __mmask32,
     a: __m512i,
@@ -9263,7 +10214,8 @@ pub fn _mm512_mask_shufflehi_epi16<const IMM8: i32>(
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 0))]
 #[rustc_legacy_const_generics(2)]
-pub fn _mm512_maskz_shufflehi_epi16<const IMM8: i32>(k: __mmask32, a: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_shufflehi_epi16<const IMM8: i32>(k: __mmask32, a: __m512i) -> __m512i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         let r = _mm512_shufflehi_epi16::<IMM8>(a);
@@ -9279,7 +10231,8 @@ pub fn _mm512_maskz_shufflehi_epi16<const IMM8: i32>(k: __mmask32, a: __m512i) -
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 5))]
 #[rustc_legacy_const_generics(3)]
-pub fn _mm256_mask_shufflehi_epi16<const IMM8: i32>(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_shufflehi_epi16<const IMM8: i32>(
     src: __m256i,
     k: __mmask16,
     a: __m256i,
@@ -9299,7 +10252,8 @@ pub fn _mm256_mask_shufflehi_epi16<const IMM8: i32>(
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 5))]
 #[rustc_legacy_const_generics(2)]
-pub fn _mm256_maskz_shufflehi_epi16<const IMM8: i32>(k: __mmask16, a: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_shufflehi_epi16<const IMM8: i32>(k: __mmask16, a: __m256i) -> __m256i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         let shuffle = _mm256_shufflehi_epi16::<IMM8>(a);
@@ -9315,7 +10269,12 @@ pub fn _mm256_maskz_shufflehi_epi16<const IMM8: i32>(k: __mmask16, a: __m256i) -
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 5))]
 #[rustc_legacy_const_generics(3)]
-pub fn _mm_mask_shufflehi_epi16<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_shufflehi_epi16<const IMM8: i32>(
+    src: __m128i,
+    k: __mmask8,
+    a: __m128i,
+) -> __m128i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         let shuffle = _mm_shufflehi_epi16::<IMM8>(a);
@@ -9331,7 +10290,8 @@ pub fn _mm_mask_shufflehi_epi16<const IMM8: i32>(src: __m128i, k: __mmask8, a: _
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshufhw, IMM8 = 5))]
 #[rustc_legacy_const_generics(2)]
-pub fn _mm_maskz_shufflehi_epi16<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_shufflehi_epi16<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         let shuffle = _mm_shufflehi_epi16::<IMM8>(a);
@@ -9441,7 +10401,8 @@ pub fn _mm_maskz_shuffle_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vptestmw))]
-pub fn _mm512_test_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_test_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
     let and = _mm512_and_si512(a, b);
     let zero = _mm512_setzero_si512();
     _mm512_cmpneq_epi16_mask(and, zero)
@@ -9454,7 +10415,8 @@ pub fn _mm512_test_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vptestmw))]
-pub fn _mm512_mask_test_epi16_mask(k: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_test_epi16_mask(k: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
     let and = _mm512_and_si512(a, b);
     let zero = _mm512_setzero_si512();
     _mm512_mask_cmpneq_epi16_mask(k, and, zero)
@@ -9467,7 +10429,8 @@ pub fn _mm512_mask_test_epi16_mask(k: __mmask32, a: __m512i, b: __m512i) -> __mm
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vptestmw))]
-pub fn _mm256_test_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_test_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
     let and = _mm256_and_si256(a, b);
     let zero = _mm256_setzero_si256();
     _mm256_cmpneq_epi16_mask(and, zero)
@@ -9480,7 +10443,8 @@ pub fn _mm256_test_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vptestmw))]
-pub fn _mm256_mask_test_epi16_mask(k: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_test_epi16_mask(k: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
     let and = _mm256_and_si256(a, b);
     let zero = _mm256_setzero_si256();
     _mm256_mask_cmpneq_epi16_mask(k, and, zero)
@@ -9493,7 +10457,8 @@ pub fn _mm256_mask_test_epi16_mask(k: __mmask16, a: __m256i, b: __m256i) -> __mm
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vptestmw))]
-pub fn _mm_test_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_test_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
     let and = _mm_and_si128(a, b);
     let zero = _mm_setzero_si128();
     _mm_cmpneq_epi16_mask(and, zero)
@@ -9506,7 +10471,8 @@ pub fn _mm_test_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vptestmw))]
-pub fn _mm_mask_test_epi16_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_test_epi16_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
     let and = _mm_and_si128(a, b);
     let zero = _mm_setzero_si128();
     _mm_mask_cmpneq_epi16_mask(k, and, zero)
@@ -9519,7 +10485,8 @@ pub fn _mm_mask_test_epi16_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vptestmb))]
-pub fn _mm512_test_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_test_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
     let and = _mm512_and_si512(a, b);
     let zero = _mm512_setzero_si512();
     _mm512_cmpneq_epi8_mask(and, zero)
@@ -9532,7 +10499,8 @@ pub fn _mm512_test_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vptestmb))]
-pub fn _mm512_mask_test_epi8_mask(k: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_test_epi8_mask(k: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
     let and = _mm512_and_si512(a, b);
     let zero = _mm512_setzero_si512();
     _mm512_mask_cmpneq_epi8_mask(k, and, zero)
@@ -9545,7 +10513,8 @@ pub fn _mm512_mask_test_epi8_mask(k: __mmask64, a: __m512i, b: __m512i) -> __mma
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vptestmb))]
-pub fn _mm256_test_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_test_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
     let and = _mm256_and_si256(a, b);
     let zero = _mm256_setzero_si256();
     _mm256_cmpneq_epi8_mask(and, zero)
@@ -9558,7 +10527,8 @@ pub fn _mm256_test_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vptestmb))]
-pub fn _mm256_mask_test_epi8_mask(k: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_test_epi8_mask(k: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
     let and = _mm256_and_si256(a, b);
     let zero = _mm256_setzero_si256();
     _mm256_mask_cmpneq_epi8_mask(k, and, zero)
@@ -9571,7 +10541,8 @@ pub fn _mm256_mask_test_epi8_mask(k: __mmask32, a: __m256i, b: __m256i) -> __mma
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vptestmb))]
-pub fn _mm_test_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_test_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
     let and = _mm_and_si128(a, b);
     let zero = _mm_setzero_si128();
     _mm_cmpneq_epi8_mask(and, zero)
@@ -9584,7 +10555,8 @@ pub fn _mm_test_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vptestmb))]
-pub fn _mm_mask_test_epi8_mask(k: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_test_epi8_mask(k: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
     let and = _mm_and_si128(a, b);
     let zero = _mm_setzero_si128();
     _mm_mask_cmpneq_epi8_mask(k, and, zero)
@@ -9597,7 +10569,8 @@ pub fn _mm_mask_test_epi8_mask(k: __mmask16, a: __m128i, b: __m128i) -> __mmask1
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vptestnmw))]
-pub fn _mm512_testn_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_testn_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
     let and = _mm512_and_si512(a, b);
     let zero = _mm512_setzero_si512();
     _mm512_cmpeq_epi16_mask(and, zero)
@@ -9610,7 +10583,8 @@ pub fn _mm512_testn_epi16_mask(a: __m512i, b: __m512i) -> __mmask32 {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vptestnmw))]
-pub fn _mm512_mask_testn_epi16_mask(k: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_testn_epi16_mask(k: __mmask32, a: __m512i, b: __m512i) -> __mmask32 {
     let and = _mm512_and_si512(a, b);
     let zero = _mm512_setzero_si512();
     _mm512_mask_cmpeq_epi16_mask(k, and, zero)
@@ -9623,7 +10597,8 @@ pub fn _mm512_mask_testn_epi16_mask(k: __mmask32, a: __m512i, b: __m512i) -> __m
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vptestnmw))]
-pub fn _mm256_testn_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_testn_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
     let and = _mm256_and_si256(a, b);
     let zero = _mm256_setzero_si256();
     _mm256_cmpeq_epi16_mask(and, zero)
@@ -9636,7 +10611,8 @@ pub fn _mm256_testn_epi16_mask(a: __m256i, b: __m256i) -> __mmask16 {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vptestnmw))]
-pub fn _mm256_mask_testn_epi16_mask(k: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_testn_epi16_mask(k: __mmask16, a: __m256i, b: __m256i) -> __mmask16 {
     let and = _mm256_and_si256(a, b);
     let zero = _mm256_setzero_si256();
     _mm256_mask_cmpeq_epi16_mask(k, and, zero)
@@ -9649,7 +10625,8 @@ pub fn _mm256_mask_testn_epi16_mask(k: __mmask16, a: __m256i, b: __m256i) -> __m
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vptestnmw))]
-pub fn _mm_testn_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_testn_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
     let and = _mm_and_si128(a, b);
     let zero = _mm_setzero_si128();
     _mm_cmpeq_epi16_mask(and, zero)
@@ -9662,7 +10639,8 @@ pub fn _mm_testn_epi16_mask(a: __m128i, b: __m128i) -> __mmask8 {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vptestnmw))]
-pub fn _mm_mask_testn_epi16_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_testn_epi16_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
     let and = _mm_and_si128(a, b);
     let zero = _mm_setzero_si128();
     _mm_mask_cmpeq_epi16_mask(k, and, zero)
@@ -9675,7 +10653,8 @@ pub fn _mm_mask_testn_epi16_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vptestnmb))]
-pub fn _mm512_testn_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_testn_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
     let and = _mm512_and_si512(a, b);
     let zero = _mm512_setzero_si512();
     _mm512_cmpeq_epi8_mask(and, zero)
@@ -9688,7 +10667,8 @@ pub fn _mm512_testn_epi8_mask(a: __m512i, b: __m512i) -> __mmask64 {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vptestnmb))]
-pub fn _mm512_mask_testn_epi8_mask(k: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_testn_epi8_mask(k: __mmask64, a: __m512i, b: __m512i) -> __mmask64 {
     let and = _mm512_and_si512(a, b);
     let zero = _mm512_setzero_si512();
     _mm512_mask_cmpeq_epi8_mask(k, and, zero)
@@ -9701,7 +10681,8 @@ pub fn _mm512_mask_testn_epi8_mask(k: __mmask64, a: __m512i, b: __m512i) -> __mm
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vptestnmb))]
-pub fn _mm256_testn_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_testn_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
     let and = _mm256_and_si256(a, b);
     let zero = _mm256_setzero_si256();
     _mm256_cmpeq_epi8_mask(and, zero)
@@ -9714,7 +10695,8 @@ pub fn _mm256_testn_epi8_mask(a: __m256i, b: __m256i) -> __mmask32 {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vptestnmb))]
-pub fn _mm256_mask_testn_epi8_mask(k: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_testn_epi8_mask(k: __mmask32, a: __m256i, b: __m256i) -> __mmask32 {
     let and = _mm256_and_si256(a, b);
     let zero = _mm256_setzero_si256();
     _mm256_mask_cmpeq_epi8_mask(k, and, zero)
@@ -9727,7 +10709,8 @@ pub fn _mm256_mask_testn_epi8_mask(k: __mmask32, a: __m256i, b: __m256i) -> __mm
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vptestnmb))]
-pub fn _mm_testn_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_testn_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
     let and = _mm_and_si128(a, b);
     let zero = _mm_setzero_si128();
     _mm_cmpeq_epi8_mask(and, zero)
@@ -9740,7 +10723,8 @@ pub fn _mm_testn_epi8_mask(a: __m128i, b: __m128i) -> __mmask16 {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vptestnmb))]
-pub fn _mm_mask_testn_epi8_mask(k: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_testn_epi8_mask(k: __mmask16, a: __m128i, b: __m128i) -> __mmask16 {
     let and = _mm_and_si128(a, b);
     let zero = _mm_setzero_si128();
     _mm_mask_cmpeq_epi8_mask(k, and, zero)
@@ -9753,7 +10737,8 @@ pub fn _mm_mask_testn_epi8_mask(k: __mmask16, a: __m128i, b: __m128i) -> __mmask
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(mov))] //should be kmovq
-pub unsafe fn _store_mask64(mem_addr: *mut __mmask64, a: __mmask64) {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _store_mask64(mem_addr: *mut __mmask64, a: __mmask64) {
     ptr::write(mem_addr as *mut __mmask64, a);
 }
 
@@ -9764,7 +10749,8 @@ pub unsafe fn _store_mask64(mem_addr: *mut __mmask64, a: __mmask64) {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(mov))] //should be kmovd
-pub unsafe fn _store_mask32(mem_addr: *mut __mmask32, a: __mmask32) {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _store_mask32(mem_addr: *mut __mmask32, a: __mmask32) {
     ptr::write(mem_addr as *mut __mmask32, a);
 }
 
@@ -9775,7 +10761,8 @@ pub unsafe fn _store_mask32(mem_addr: *mut __mmask32, a: __mmask32) {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(mov))] //should be kmovq
-pub unsafe fn _load_mask64(mem_addr: *const __mmask64) -> __mmask64 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _load_mask64(mem_addr: *const __mmask64) -> __mmask64 {
     ptr::read(mem_addr as *const __mmask64)
 }
 
@@ -9786,7 +10773,8 @@ pub unsafe fn _load_mask64(mem_addr: *const __mmask64) -> __mmask64 {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(mov))] //should be kmovd
-pub unsafe fn _load_mask32(mem_addr: *const __mmask32) -> __mmask32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _load_mask32(mem_addr: *const __mmask32) -> __mmask32 {
     ptr::read(mem_addr as *const __mmask32)
 }
 
@@ -9985,7 +10973,8 @@ pub fn _mm_maskz_dbsad_epu8<const IMM8: i32>(k: __mmask8, a: __m128i, b: __m128i
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovw2m))]
-pub fn _mm512_movepi16_mask(a: __m512i) -> __mmask32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_movepi16_mask(a: __m512i) -> __mmask32 {
     let filter = _mm512_set1_epi16(1 << 15);
     let a = _mm512_and_si512(a, filter);
     _mm512_cmpeq_epi16_mask(a, filter)
@@ -9998,7 +10987,8 @@ pub fn _mm512_movepi16_mask(a: __m512i) -> __mmask32 {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovw2m))]
-pub fn _mm256_movepi16_mask(a: __m256i) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_movepi16_mask(a: __m256i) -> __mmask16 {
     let filter = _mm256_set1_epi16(1 << 15);
     let a = _mm256_and_si256(a, filter);
     _mm256_cmpeq_epi16_mask(a, filter)
@@ -10011,7 +11001,8 @@ pub fn _mm256_movepi16_mask(a: __m256i) -> __mmask16 {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovw2m))]
-pub fn _mm_movepi16_mask(a: __m128i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_movepi16_mask(a: __m128i) -> __mmask8 {
     let filter = _mm_set1_epi16(1 << 15);
     let a = _mm_and_si128(a, filter);
     _mm_cmpeq_epi16_mask(a, filter)
@@ -10024,7 +11015,8 @@ pub fn _mm_movepi16_mask(a: __m128i) -> __mmask8 {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovb2m))]
-pub fn _mm512_movepi8_mask(a: __m512i) -> __mmask64 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_movepi8_mask(a: __m512i) -> __mmask64 {
     let filter = _mm512_set1_epi8(1 << 7);
     let a = _mm512_and_si512(a, filter);
     _mm512_cmpeq_epi8_mask(a, filter)
@@ -10036,9 +11028,11 @@ pub fn _mm512_movepi8_mask(a: __m512i) -> __mmask64 {
 #[inline]
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-#[cfg_attr(test, assert_instr(vpmovmskb))] // should be vpmovb2m but compiled to vpmovmskb in the test shim because that takes less cycles than
+#[cfg_attr(test, assert_instr(vpmovmskb))]
+// should be vpmovb2m but compiled to vpmovmskb in the test shim because that takes less cycles than
 // using vpmovb2m plus converting the mask register to a standard register.
-pub fn _mm256_movepi8_mask(a: __m256i) -> __mmask32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_movepi8_mask(a: __m256i) -> __mmask32 {
     let filter = _mm256_set1_epi8(1 << 7);
     let a = _mm256_and_si256(a, filter);
     _mm256_cmpeq_epi8_mask(a, filter)
@@ -10050,9 +11044,11 @@ pub fn _mm256_movepi8_mask(a: __m256i) -> __mmask32 {
 #[inline]
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-#[cfg_attr(test, assert_instr(vpmovmskb))] // should be vpmovb2m but compiled to vpmovmskb in the test shim because that takes less cycles than
+#[cfg_attr(test, assert_instr(vpmovmskb))]
+// should be vpmovb2m but compiled to vpmovmskb in the test shim because that takes less cycles than
 // using vpmovb2m plus converting the mask register to a standard register.
-pub fn _mm_movepi8_mask(a: __m128i) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_movepi8_mask(a: __m128i) -> __mmask16 {
     let filter = _mm_set1_epi8(1 << 7);
     let a = _mm_and_si128(a, filter);
     _mm_cmpeq_epi8_mask(a, filter)
@@ -10065,7 +11061,8 @@ pub fn _mm_movepi8_mask(a: __m128i) -> __mmask16 {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovm2w))]
-pub fn _mm512_movm_epi16(k: __mmask32) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_movm_epi16(k: __mmask32) -> __m512i {
     unsafe {
         let one = _mm512_set1_epi16(
             1 << 15
@@ -10097,7 +11094,8 @@ pub fn _mm512_movm_epi16(k: __mmask32) -> __m512i {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovm2w))]
-pub fn _mm256_movm_epi16(k: __mmask16) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_movm_epi16(k: __mmask16) -> __m256i {
     unsafe {
         let one = _mm256_set1_epi16(
             1 << 15
@@ -10129,7 +11127,8 @@ pub fn _mm256_movm_epi16(k: __mmask16) -> __m256i {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovm2w))]
-pub fn _mm_movm_epi16(k: __mmask8) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_movm_epi16(k: __mmask8) -> __m128i {
     unsafe {
         let one = _mm_set1_epi16(
             1 << 15
@@ -10161,7 +11160,8 @@ pub fn _mm_movm_epi16(k: __mmask8) -> __m128i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovm2b))]
-pub fn _mm512_movm_epi8(k: __mmask64) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_movm_epi8(k: __mmask64) -> __m512i {
     unsafe {
         let one =
             _mm512_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0)
@@ -10177,7 +11177,8 @@ pub fn _mm512_movm_epi8(k: __mmask64) -> __m512i {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovm2b))]
-pub fn _mm256_movm_epi8(k: __mmask32) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_movm_epi8(k: __mmask32) -> __m256i {
     unsafe {
         let one =
             _mm256_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0)
@@ -10193,7 +11194,8 @@ pub fn _mm256_movm_epi8(k: __mmask32) -> __m256i {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovm2b))]
-pub fn _mm_movm_epi8(k: __mmask16) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_movm_epi8(k: __mmask16) -> __m128i {
     unsafe {
         let one =
             _mm_set1_epi8(1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0)
@@ -10208,7 +11210,8 @@ pub fn _mm_movm_epi8(k: __mmask16) -> __m128i {
 #[inline]
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _cvtmask32_u32(a: __mmask32) -> u32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _cvtmask32_u32(a: __mmask32) -> u32 {
     a
 }
 
@@ -10218,7 +11221,8 @@ pub fn _cvtmask32_u32(a: __mmask32) -> u32 {
 #[inline]
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _cvtu32_mask32(a: u32) -> __mmask32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _cvtu32_mask32(a: u32) -> __mmask32 {
     a
 }
 
@@ -10228,8 +11232,9 @@ pub fn _cvtu32_mask32(a: u32) -> __mmask32 {
 #[inline]
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _kadd_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
-    a + b
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _kadd_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
+    a.wrapping_add(b)
 }
 
 /// Add 64-bit masks in a and b, and store the result in k.
@@ -10238,8 +11243,9 @@ pub fn _kadd_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
 #[inline]
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _kadd_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
-    a + b
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _kadd_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
+    a.wrapping_add(b)
 }
 
 /// Compute the bitwise AND of 32-bit masks a and b, and store the result in k.
@@ -10248,7 +11254,8 @@ pub fn _kadd_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
 #[inline]
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _kand_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _kand_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
     a & b
 }
 
@@ -10258,7 +11265,8 @@ pub fn _kand_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
 #[inline]
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _kand_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _kand_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
     a & b
 }
 
@@ -10268,7 +11276,8 @@ pub fn _kand_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
 #[inline]
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _knot_mask32(a: __mmask32) -> __mmask32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _knot_mask32(a: __mmask32) -> __mmask32 {
     !a
 }
 
@@ -10278,7 +11287,8 @@ pub fn _knot_mask32(a: __mmask32) -> __mmask32 {
 #[inline]
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _knot_mask64(a: __mmask64) -> __mmask64 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _knot_mask64(a: __mmask64) -> __mmask64 {
     !a
 }
 
@@ -10288,7 +11298,8 @@ pub fn _knot_mask64(a: __mmask64) -> __mmask64 {
 #[inline]
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _kandn_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _kandn_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
     _knot_mask32(a) & b
 }
 
@@ -10298,7 +11309,8 @@ pub fn _kandn_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
 #[inline]
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _kandn_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _kandn_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
     _knot_mask64(a) & b
 }
 
@@ -10308,7 +11320,8 @@ pub fn _kandn_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
 #[inline]
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _kor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _kor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
     a | b
 }
 
@@ -10318,7 +11331,8 @@ pub fn _kor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
 #[inline]
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _kor_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _kor_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
     a | b
 }
 
@@ -10328,7 +11342,8 @@ pub fn _kor_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
 #[inline]
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _kxor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _kxor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
     a ^ b
 }
 
@@ -10338,7 +11353,8 @@ pub fn _kxor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
 #[inline]
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _kxor_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _kxor_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
     a ^ b
 }
 
@@ -10348,7 +11364,8 @@ pub fn _kxor_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
 #[inline]
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _kxnor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _kxnor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
     _knot_mask32(a ^ b)
 }
 
@@ -10358,7 +11375,8 @@ pub fn _kxnor_mask32(a: __mmask32, b: __mmask32) -> __mmask32 {
 #[inline]
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _kxnor_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _kxnor_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
     _knot_mask64(a ^ b)
 }
 
@@ -10369,7 +11387,8 @@ pub fn _kxnor_mask64(a: __mmask64, b: __mmask64) -> __mmask64 {
 #[inline]
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _kortest_mask32_u8(a: __mmask32, b: __mmask32, all_ones: *mut u8) -> u8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _kortest_mask32_u8(a: __mmask32, b: __mmask32, all_ones: *mut u8) -> u8 {
     let tmp = _kor_mask32(a, b);
     *all_ones = (tmp == 0xffffffff) as u8;
     (tmp == 0) as u8
@@ -10382,7 +11401,8 @@ pub unsafe fn _kortest_mask32_u8(a: __mmask32, b: __mmask32, all_ones: *mut u8)
 #[inline]
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _kortest_mask64_u8(a: __mmask64, b: __mmask64, all_ones: *mut u8) -> u8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _kortest_mask64_u8(a: __mmask64, b: __mmask64, all_ones: *mut u8) -> u8 {
     let tmp = _kor_mask64(a, b);
     *all_ones = (tmp == 0xffffffff_ffffffff) as u8;
     (tmp == 0) as u8
@@ -10395,7 +11415,8 @@ pub unsafe fn _kortest_mask64_u8(a: __mmask64, b: __mmask64, all_ones: *mut u8)
 #[inline]
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _kortestc_mask32_u8(a: __mmask32, b: __mmask32) -> u8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _kortestc_mask32_u8(a: __mmask32, b: __mmask32) -> u8 {
     (_kor_mask32(a, b) == 0xffffffff) as u8
 }
 
@@ -10406,7 +11427,8 @@ pub fn _kortestc_mask32_u8(a: __mmask32, b: __mmask32) -> u8 {
 #[inline]
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _kortestc_mask64_u8(a: __mmask64, b: __mmask64) -> u8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _kortestc_mask64_u8(a: __mmask64, b: __mmask64) -> u8 {
     (_kor_mask64(a, b) == 0xffffffff_ffffffff) as u8
 }
 
@@ -10417,7 +11439,8 @@ pub fn _kortestc_mask64_u8(a: __mmask64, b: __mmask64) -> u8 {
 #[inline]
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _kortestz_mask32_u8(a: __mmask32, b: __mmask32) -> u8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _kortestz_mask32_u8(a: __mmask32, b: __mmask32) -> u8 {
     (_kor_mask32(a, b) == 0) as u8
 }
 
@@ -10428,7 +11451,8 @@ pub fn _kortestz_mask32_u8(a: __mmask32, b: __mmask32) -> u8 {
 #[inline]
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _kortestz_mask64_u8(a: __mmask64, b: __mmask64) -> u8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _kortestz_mask64_u8(a: __mmask64, b: __mmask64) -> u8 {
     (_kor_mask64(a, b) == 0) as u8
 }
 
@@ -10439,7 +11463,8 @@ pub fn _kortestz_mask64_u8(a: __mmask64, b: __mmask64) -> u8 {
 #[target_feature(enable = "avx512bw")]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _kshiftli_mask32<const COUNT: u32>(a: __mmask32) -> __mmask32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _kshiftli_mask32<const COUNT: u32>(a: __mmask32) -> __mmask32 {
     a.unbounded_shl(COUNT)
 }
 
@@ -10450,7 +11475,8 @@ pub fn _kshiftli_mask32<const COUNT: u32>(a: __mmask32) -> __mmask32 {
 #[target_feature(enable = "avx512bw")]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _kshiftli_mask64<const COUNT: u32>(a: __mmask64) -> __mmask64 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _kshiftli_mask64<const COUNT: u32>(a: __mmask64) -> __mmask64 {
     a.unbounded_shl(COUNT)
 }
 
@@ -10461,7 +11487,8 @@ pub fn _kshiftli_mask64<const COUNT: u32>(a: __mmask64) -> __mmask64 {
 #[target_feature(enable = "avx512bw")]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _kshiftri_mask32<const COUNT: u32>(a: __mmask32) -> __mmask32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _kshiftri_mask32<const COUNT: u32>(a: __mmask32) -> __mmask32 {
     a.unbounded_shr(COUNT)
 }
 
@@ -10472,7 +11499,8 @@ pub fn _kshiftri_mask32<const COUNT: u32>(a: __mmask32) -> __mmask32 {
 #[target_feature(enable = "avx512bw")]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _kshiftri_mask64<const COUNT: u32>(a: __mmask64) -> __mmask64 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _kshiftri_mask64<const COUNT: u32>(a: __mmask64) -> __mmask64 {
     a.unbounded_shr(COUNT)
 }
 
@@ -10484,7 +11512,8 @@ pub fn _kshiftri_mask64<const COUNT: u32>(a: __mmask64) -> __mmask64 {
 #[inline]
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _ktest_mask32_u8(a: __mmask32, b: __mmask32, and_not: *mut u8) -> u8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _ktest_mask32_u8(a: __mmask32, b: __mmask32, and_not: *mut u8) -> u8 {
     *and_not = (_kandn_mask32(a, b) == 0) as u8;
     (_kand_mask32(a, b) == 0) as u8
 }
@@ -10497,7 +11526,8 @@ pub unsafe fn _ktest_mask32_u8(a: __mmask32, b: __mmask32, and_not: *mut u8) ->
 #[inline]
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _ktest_mask64_u8(a: __mmask64, b: __mmask64, and_not: *mut u8) -> u8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _ktest_mask64_u8(a: __mmask64, b: __mmask64, and_not: *mut u8) -> u8 {
     *and_not = (_kandn_mask64(a, b) == 0) as u8;
     (_kand_mask64(a, b) == 0) as u8
 }
@@ -10509,7 +11539,8 @@ pub unsafe fn _ktest_mask64_u8(a: __mmask64, b: __mmask64, and_not: *mut u8) ->
 #[inline]
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _ktestc_mask32_u8(a: __mmask32, b: __mmask32) -> u8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _ktestc_mask32_u8(a: __mmask32, b: __mmask32) -> u8 {
     (_kandn_mask32(a, b) == 0) as u8
 }
 
@@ -10520,7 +11551,8 @@ pub fn _ktestc_mask32_u8(a: __mmask32, b: __mmask32) -> u8 {
 #[inline]
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _ktestc_mask64_u8(a: __mmask64, b: __mmask64) -> u8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _ktestc_mask64_u8(a: __mmask64, b: __mmask64) -> u8 {
     (_kandn_mask64(a, b) == 0) as u8
 }
 
@@ -10531,7 +11563,8 @@ pub fn _ktestc_mask64_u8(a: __mmask64, b: __mmask64) -> u8 {
 #[inline]
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _ktestz_mask32_u8(a: __mmask32, b: __mmask32) -> u8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _ktestz_mask32_u8(a: __mmask32, b: __mmask32) -> u8 {
     (_kand_mask32(a, b) == 0) as u8
 }
 
@@ -10542,7 +11575,8 @@ pub fn _ktestz_mask32_u8(a: __mmask32, b: __mmask32) -> u8 {
 #[inline]
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _ktestz_mask64_u8(a: __mmask64, b: __mmask64) -> u8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _ktestz_mask64_u8(a: __mmask64, b: __mmask64) -> u8 {
     (_kand_mask64(a, b) == 0) as u8
 }
 
@@ -10553,7 +11587,8 @@ pub fn _ktestz_mask64_u8(a: __mmask64, b: __mmask64) -> u8 {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kunpckwd
-pub fn _mm512_kunpackw(a: __mmask32, b: __mmask32) -> __mmask32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_kunpackw(a: __mmask32, b: __mmask32) -> __mmask32 {
     ((a & 0xffff) << 16) | (b & 0xffff)
 }
 
@@ -10564,7 +11599,8 @@ pub fn _mm512_kunpackw(a: __mmask32, b: __mmask32) -> __mmask32 {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kunpckdq
-pub fn _mm512_kunpackd(a: __mmask64, b: __mmask64) -> __mmask64 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_kunpackd(a: __mmask64, b: __mmask64) -> __mmask64 {
     ((a & 0xffffffff) << 32) | (b & 0xffffffff)
 }
 
@@ -10575,7 +11611,8 @@ pub fn _mm512_kunpackd(a: __mmask64, b: __mmask64) -> __mmask64 {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovwb))]
-pub fn _mm512_cvtepi16_epi8(a: __m512i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_cvtepi16_epi8(a: __m512i) -> __m256i {
     unsafe {
         let a = a.as_i16x32();
         transmute::<i8x32, _>(simd_cast(a))
@@ -10589,7 +11626,8 @@ pub fn _mm512_cvtepi16_epi8(a: __m512i) -> __m256i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovwb))]
-pub fn _mm512_mask_cvtepi16_epi8(src: __m256i, k: __mmask32, a: __m512i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_cvtepi16_epi8(src: __m256i, k: __mmask32, a: __m512i) -> __m256i {
     unsafe {
         let convert = _mm512_cvtepi16_epi8(a).as_i8x32();
         transmute(simd_select_bitmask(k, convert, src.as_i8x32()))
@@ -10603,7 +11641,8 @@ pub fn _mm512_mask_cvtepi16_epi8(src: __m256i, k: __mmask32, a: __m512i) -> __m2
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovwb))]
-pub fn _mm512_maskz_cvtepi16_epi8(k: __mmask32, a: __m512i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_cvtepi16_epi8(k: __mmask32, a: __m512i) -> __m256i {
     unsafe {
         let convert = _mm512_cvtepi16_epi8(a).as_i8x32();
         transmute(simd_select_bitmask(k, convert, i8x32::ZERO))
@@ -10617,7 +11656,8 @@ pub fn _mm512_maskz_cvtepi16_epi8(k: __mmask32, a: __m512i) -> __m256i {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovwb))]
-pub fn _mm256_cvtepi16_epi8(a: __m256i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_cvtepi16_epi8(a: __m256i) -> __m128i {
     unsafe {
         let a = a.as_i16x16();
         transmute::<i8x16, _>(simd_cast(a))
@@ -10631,7 +11671,8 @@ pub fn _mm256_cvtepi16_epi8(a: __m256i) -> __m128i {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovwb))]
-pub fn _mm256_mask_cvtepi16_epi8(src: __m128i, k: __mmask16, a: __m256i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_cvtepi16_epi8(src: __m128i, k: __mmask16, a: __m256i) -> __m128i {
     unsafe {
         let convert = _mm256_cvtepi16_epi8(a).as_i8x16();
         transmute(simd_select_bitmask(k, convert, src.as_i8x16()))
@@ -10645,7 +11686,8 @@ pub fn _mm256_mask_cvtepi16_epi8(src: __m128i, k: __mmask16, a: __m256i) -> __m1
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovwb))]
-pub fn _mm256_maskz_cvtepi16_epi8(k: __mmask16, a: __m256i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_cvtepi16_epi8(k: __mmask16, a: __m256i) -> __m128i {
     unsafe {
         let convert = _mm256_cvtepi16_epi8(a).as_i8x16();
         transmute(simd_select_bitmask(k, convert, i8x16::ZERO))
@@ -10659,7 +11701,8 @@ pub fn _mm256_maskz_cvtepi16_epi8(k: __mmask16, a: __m256i) -> __m128i {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovwb))]
-pub fn _mm_cvtepi16_epi8(a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cvtepi16_epi8(a: __m128i) -> __m128i {
     unsafe {
         let a = a.as_i16x8();
         let v256: i16x16 = simd_shuffle!(
@@ -10678,11 +11721,16 @@ pub fn _mm_cvtepi16_epi8(a: __m128i) -> __m128i {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovwb))]
-pub fn _mm_mask_cvtepi16_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
-    unsafe {
-        let convert = _mm_cvtepi16_epi8(a).as_i8x16();
-        let k: __mmask16 = 0b11111111_11111111 & k as __mmask16;
-        transmute(simd_select_bitmask(k, convert, src.as_i8x16()))
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_cvtepi16_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
+    unsafe {
+        let a = _mm_cvtepi16_epi8(a).as_i8x16();
+        let src = simd_shuffle!(
+            src.as_i8x16(),
+            i8x16::ZERO,
+            [0, 1, 2, 3, 4, 5, 6, 7, 16, 16, 16, 16, 16, 16, 16, 16]
+        );
+        simd_select_bitmask(k as u16, a, src).as_m128i()
     }
 }
 
@@ -10693,12 +11741,9 @@ pub fn _mm_mask_cvtepi16_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovwb))]
-pub fn _mm_maskz_cvtepi16_epi8(k: __mmask8, a: __m128i) -> __m128i {
-    unsafe {
-        let convert = _mm_cvtepi16_epi8(a).as_i8x16();
-        let k: __mmask16 = 0b11111111_11111111 & k as __mmask16;
-        transmute(simd_select_bitmask(k, convert, i8x16::ZERO))
-    }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_cvtepi16_epi8(k: __mmask8, a: __m128i) -> __m128i {
+    _mm_mask_cvtepi16_epi8(_mm_setzero_si128(), k, a)
 }
 
 /// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
@@ -10708,13 +11753,14 @@ pub fn _mm_maskz_cvtepi16_epi8(k: __mmask8, a: __m128i) -> __m128i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovswb))]
-pub fn _mm512_cvtsepi16_epi8(a: __m512i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_cvtsepi16_epi8(a: __m512i) -> __m256i {
     unsafe {
-        transmute(vpmovswb(
-            a.as_i16x32(),
-            i8x32::ZERO,
-            0b11111111_11111111_11111111_11111111,
+        simd_cast::<_, i8x32>(simd_imax(
+            simd_imin(a.as_i16x32(), i16x32::splat(i8::MAX as _)),
+            i16x32::splat(i8::MIN as _),
         ))
+        .as_m256i()
     }
 }
 
@@ -10725,8 +11771,11 @@ pub fn _mm512_cvtsepi16_epi8(a: __m512i) -> __m256i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovswb))]
-pub fn _mm512_mask_cvtsepi16_epi8(src: __m256i, k: __mmask32, a: __m512i) -> __m256i {
-    unsafe { transmute(vpmovswb(a.as_i16x32(), src.as_i8x32(), k)) }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_cvtsepi16_epi8(src: __m256i, k: __mmask32, a: __m512i) -> __m256i {
+    unsafe {
+        simd_select_bitmask(k, _mm512_cvtsepi16_epi8(a).as_i8x32(), src.as_i8x32()).as_m256i()
+    }
 }
 
 /// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -10736,8 +11785,9 @@ pub fn _mm512_mask_cvtsepi16_epi8(src: __m256i, k: __mmask32, a: __m512i) -> __m
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovswb))]
-pub fn _mm512_maskz_cvtsepi16_epi8(k: __mmask32, a: __m512i) -> __m256i {
-    unsafe { transmute(vpmovswb(a.as_i16x32(), i8x32::ZERO, k)) }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_cvtsepi16_epi8(k: __mmask32, a: __m512i) -> __m256i {
+    unsafe { simd_select_bitmask(k, _mm512_cvtsepi16_epi8(a).as_i8x32(), i8x32::ZERO).as_m256i() }
 }
 
 /// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
@@ -10747,8 +11797,15 @@ pub fn _mm512_maskz_cvtsepi16_epi8(k: __mmask32, a: __m512i) -> __m256i {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovswb))]
-pub fn _mm256_cvtsepi16_epi8(a: __m256i) -> __m128i {
-    unsafe { transmute(vpmovswb256(a.as_i16x16(), i8x16::ZERO, 0b11111111_11111111)) }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_cvtsepi16_epi8(a: __m256i) -> __m128i {
+    unsafe {
+        simd_cast::<_, i8x16>(simd_imax(
+            simd_imin(a.as_i16x16(), i16x16::splat(i8::MAX as _)),
+            i16x16::splat(i8::MIN as _),
+        ))
+        .as_m128i()
+    }
 }
 
 /// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -10758,8 +11815,11 @@ pub fn _mm256_cvtsepi16_epi8(a: __m256i) -> __m128i {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovswb))]
-pub fn _mm256_mask_cvtsepi16_epi8(src: __m128i, k: __mmask16, a: __m256i) -> __m128i {
-    unsafe { transmute(vpmovswb256(a.as_i16x16(), src.as_i8x16(), k)) }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_cvtsepi16_epi8(src: __m128i, k: __mmask16, a: __m256i) -> __m128i {
+    unsafe {
+        simd_select_bitmask(k, _mm256_cvtsepi16_epi8(a).as_i8x16(), src.as_i8x16()).as_m128i()
+    }
 }
 
 /// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -10769,8 +11829,9 @@ pub fn _mm256_mask_cvtsepi16_epi8(src: __m128i, k: __mmask16, a: __m256i) -> __m
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovswb))]
-pub fn _mm256_maskz_cvtsepi16_epi8(k: __mmask16, a: __m256i) -> __m128i {
-    unsafe { transmute(vpmovswb256(a.as_i16x16(), i8x16::ZERO, k)) }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_cvtsepi16_epi8(k: __mmask16, a: __m256i) -> __m128i {
+    unsafe { simd_select_bitmask(k, _mm256_cvtsepi16_epi8(a).as_i8x16(), i8x16::ZERO).as_m128i() }
 }
 
 /// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
@@ -10813,13 +11874,10 @@ pub fn _mm_maskz_cvtsepi16_epi8(k: __mmask8, a: __m128i) -> __m128i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovuswb))]
-pub fn _mm512_cvtusepi16_epi8(a: __m512i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_cvtusepi16_epi8(a: __m512i) -> __m256i {
     unsafe {
-        transmute(vpmovuswb(
-            a.as_u16x32(),
-            u8x32::ZERO,
-            0b11111111_11111111_11111111_11111111,
-        ))
+        simd_cast::<_, u8x32>(simd_imin(a.as_u16x32(), u16x32::splat(u8::MAX as _))).as_m256i()
     }
 }
 
@@ -10830,8 +11888,11 @@ pub fn _mm512_cvtusepi16_epi8(a: __m512i) -> __m256i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovuswb))]
-pub fn _mm512_mask_cvtusepi16_epi8(src: __m256i, k: __mmask32, a: __m512i) -> __m256i {
-    unsafe { transmute(vpmovuswb(a.as_u16x32(), src.as_u8x32(), k)) }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_cvtusepi16_epi8(src: __m256i, k: __mmask32, a: __m512i) -> __m256i {
+    unsafe {
+        simd_select_bitmask(k, _mm512_cvtusepi16_epi8(a).as_u8x32(), src.as_u8x32()).as_m256i()
+    }
 }
 
 /// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -10841,8 +11902,9 @@ pub fn _mm512_mask_cvtusepi16_epi8(src: __m256i, k: __mmask32, a: __m512i) -> __
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovuswb))]
-pub fn _mm512_maskz_cvtusepi16_epi8(k: __mmask32, a: __m512i) -> __m256i {
-    unsafe { transmute(vpmovuswb(a.as_u16x32(), u8x32::ZERO, k)) }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_cvtusepi16_epi8(k: __mmask32, a: __m512i) -> __m256i {
+    unsafe { simd_select_bitmask(k, _mm512_cvtusepi16_epi8(a).as_u8x32(), u8x32::ZERO).as_m256i() }
 }
 
 /// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
@@ -10852,13 +11914,10 @@ pub fn _mm512_maskz_cvtusepi16_epi8(k: __mmask32, a: __m512i) -> __m256i {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovuswb))]
-pub fn _mm256_cvtusepi16_epi8(a: __m256i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_cvtusepi16_epi8(a: __m256i) -> __m128i {
     unsafe {
-        transmute(vpmovuswb256(
-            a.as_u16x16(),
-            u8x16::ZERO,
-            0b11111111_11111111,
-        ))
+        simd_cast::<_, u8x16>(simd_imin(a.as_u16x16(), u16x16::splat(u8::MAX as _))).as_m128i()
     }
 }
 
@@ -10869,8 +11928,11 @@ pub fn _mm256_cvtusepi16_epi8(a: __m256i) -> __m128i {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovuswb))]
-pub fn _mm256_mask_cvtusepi16_epi8(src: __m128i, k: __mmask16, a: __m256i) -> __m128i {
-    unsafe { transmute(vpmovuswb256(a.as_u16x16(), src.as_u8x16(), k)) }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_cvtusepi16_epi8(src: __m128i, k: __mmask16, a: __m256i) -> __m128i {
+    unsafe {
+        simd_select_bitmask(k, _mm256_cvtusepi16_epi8(a).as_u8x16(), src.as_u8x16()).as_m128i()
+    }
 }
 
 /// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -10880,8 +11942,9 @@ pub fn _mm256_mask_cvtusepi16_epi8(src: __m128i, k: __mmask16, a: __m256i) -> __
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovuswb))]
-pub fn _mm256_maskz_cvtusepi16_epi8(k: __mmask16, a: __m256i) -> __m128i {
-    unsafe { transmute(vpmovuswb256(a.as_u16x16(), u8x16::ZERO, k)) }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_cvtusepi16_epi8(k: __mmask16, a: __m256i) -> __m128i {
+    unsafe { simd_select_bitmask(k, _mm256_cvtusepi16_epi8(a).as_u8x16(), u8x16::ZERO).as_m128i() }
 }
 
 /// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
@@ -10924,7 +11987,8 @@ pub fn _mm_maskz_cvtusepi16_epi8(k: __mmask8, a: __m128i) -> __m128i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovsxbw))]
-pub fn _mm512_cvtepi8_epi16(a: __m256i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_cvtepi8_epi16(a: __m256i) -> __m512i {
     unsafe {
         let a = a.as_i8x32();
         transmute::<i16x32, _>(simd_cast(a))
@@ -10938,7 +12002,8 @@ pub fn _mm512_cvtepi8_epi16(a: __m256i) -> __m512i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovsxbw))]
-pub fn _mm512_mask_cvtepi8_epi16(src: __m512i, k: __mmask32, a: __m256i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_cvtepi8_epi16(src: __m512i, k: __mmask32, a: __m256i) -> __m512i {
     unsafe {
         let convert = _mm512_cvtepi8_epi16(a).as_i16x32();
         transmute(simd_select_bitmask(k, convert, src.as_i16x32()))
@@ -10952,7 +12017,8 @@ pub fn _mm512_mask_cvtepi8_epi16(src: __m512i, k: __mmask32, a: __m256i) -> __m5
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovsxbw))]
-pub fn _mm512_maskz_cvtepi8_epi16(k: __mmask32, a: __m256i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_cvtepi8_epi16(k: __mmask32, a: __m256i) -> __m512i {
     unsafe {
         let convert = _mm512_cvtepi8_epi16(a).as_i16x32();
         transmute(simd_select_bitmask(k, convert, i16x32::ZERO))
@@ -10966,7 +12032,8 @@ pub fn _mm512_maskz_cvtepi8_epi16(k: __mmask32, a: __m256i) -> __m512i {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovsxbw))]
-pub fn _mm256_mask_cvtepi8_epi16(src: __m256i, k: __mmask16, a: __m128i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_cvtepi8_epi16(src: __m256i, k: __mmask16, a: __m128i) -> __m256i {
     unsafe {
         let convert = _mm256_cvtepi8_epi16(a).as_i16x16();
         transmute(simd_select_bitmask(k, convert, src.as_i16x16()))
@@ -10980,7 +12047,8 @@ pub fn _mm256_mask_cvtepi8_epi16(src: __m256i, k: __mmask16, a: __m128i) -> __m2
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovsxbw))]
-pub fn _mm256_maskz_cvtepi8_epi16(k: __mmask16, a: __m128i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_cvtepi8_epi16(k: __mmask16, a: __m128i) -> __m256i {
     unsafe {
         let convert = _mm256_cvtepi8_epi16(a).as_i16x16();
         transmute(simd_select_bitmask(k, convert, i16x16::ZERO))
@@ -10994,7 +12062,8 @@ pub fn _mm256_maskz_cvtepi8_epi16(k: __mmask16, a: __m128i) -> __m256i {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovsxbw))]
-pub fn _mm_mask_cvtepi8_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_cvtepi8_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
     unsafe {
         let convert = _mm_cvtepi8_epi16(a).as_i16x8();
         transmute(simd_select_bitmask(k, convert, src.as_i16x8()))
@@ -11008,7 +12077,8 @@ pub fn _mm_mask_cvtepi8_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovsxbw))]
-pub fn _mm_maskz_cvtepi8_epi16(k: __mmask8, a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_cvtepi8_epi16(k: __mmask8, a: __m128i) -> __m128i {
     unsafe {
         let convert = _mm_cvtepi8_epi16(a).as_i16x8();
         transmute(simd_select_bitmask(k, convert, i16x8::ZERO))
@@ -11022,7 +12092,8 @@ pub fn _mm_maskz_cvtepi8_epi16(k: __mmask8, a: __m128i) -> __m128i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovzxbw))]
-pub fn _mm512_cvtepu8_epi16(a: __m256i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_cvtepu8_epi16(a: __m256i) -> __m512i {
     unsafe {
         let a = a.as_u8x32();
         transmute::<i16x32, _>(simd_cast(a))
@@ -11036,7 +12107,8 @@ pub fn _mm512_cvtepu8_epi16(a: __m256i) -> __m512i {
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovzxbw))]
-pub fn _mm512_mask_cvtepu8_epi16(src: __m512i, k: __mmask32, a: __m256i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_cvtepu8_epi16(src: __m512i, k: __mmask32, a: __m256i) -> __m512i {
     unsafe {
         let convert = _mm512_cvtepu8_epi16(a).as_i16x32();
         transmute(simd_select_bitmask(k, convert, src.as_i16x32()))
@@ -11050,7 +12122,8 @@ pub fn _mm512_mask_cvtepu8_epi16(src: __m512i, k: __mmask32, a: __m256i) -> __m5
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovzxbw))]
-pub fn _mm512_maskz_cvtepu8_epi16(k: __mmask32, a: __m256i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_cvtepu8_epi16(k: __mmask32, a: __m256i) -> __m512i {
     unsafe {
         let convert = _mm512_cvtepu8_epi16(a).as_i16x32();
         transmute(simd_select_bitmask(k, convert, i16x32::ZERO))
@@ -11064,7 +12137,8 @@ pub fn _mm512_maskz_cvtepu8_epi16(k: __mmask32, a: __m256i) -> __m512i {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovzxbw))]
-pub fn _mm256_mask_cvtepu8_epi16(src: __m256i, k: __mmask16, a: __m128i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_cvtepu8_epi16(src: __m256i, k: __mmask16, a: __m128i) -> __m256i {
     unsafe {
         let convert = _mm256_cvtepu8_epi16(a).as_i16x16();
         transmute(simd_select_bitmask(k, convert, src.as_i16x16()))
@@ -11078,7 +12152,8 @@ pub fn _mm256_mask_cvtepu8_epi16(src: __m256i, k: __mmask16, a: __m128i) -> __m2
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovzxbw))]
-pub fn _mm256_maskz_cvtepu8_epi16(k: __mmask16, a: __m128i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_cvtepu8_epi16(k: __mmask16, a: __m128i) -> __m256i {
     unsafe {
         let convert = _mm256_cvtepu8_epi16(a).as_i16x16();
         transmute(simd_select_bitmask(k, convert, i16x16::ZERO))
@@ -11092,7 +12167,8 @@ pub fn _mm256_maskz_cvtepu8_epi16(k: __mmask16, a: __m128i) -> __m256i {
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovzxbw))]
-pub fn _mm_mask_cvtepu8_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_cvtepu8_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
     unsafe {
         let convert = _mm_cvtepu8_epi16(a).as_i16x8();
         transmute(simd_select_bitmask(k, convert, src.as_i16x8()))
@@ -11106,7 +12182,8 @@ pub fn _mm_mask_cvtepu8_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovzxbw))]
-pub fn _mm_maskz_cvtepu8_epi16(k: __mmask8, a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_cvtepu8_epi16(k: __mmask8, a: __m128i) -> __m128i {
     unsafe {
         let convert = _mm_cvtepu8_epi16(a).as_i16x8();
         transmute(simd_select_bitmask(k, convert, i16x8::ZERO))
@@ -11121,7 +12198,8 @@ pub fn _mm_maskz_cvtepu8_epi16(k: __mmask8, a: __m128i) -> __m128i {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpslldq, IMM8 = 3))]
 #[rustc_legacy_const_generics(1)]
-pub fn _mm512_bslli_epi128<const IMM8: i32>(a: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_bslli_epi128<const IMM8: i32>(a: __m512i) -> __m512i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         const fn mask(shift: i32, i: u32) -> u32 {
@@ -11216,7 +12294,8 @@ pub fn _mm512_bslli_epi128<const IMM8: i32>(a: __m512i) -> __m512i {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsrldq, IMM8 = 3))]
 #[rustc_legacy_const_generics(1)]
-pub fn _mm512_bsrli_epi128<const IMM8: i32>(a: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_bsrli_epi128<const IMM8: i32>(a: __m512i) -> __m512i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         const fn mask(shift: i32, i: u32) -> u32 {
@@ -11313,7 +12392,8 @@ pub fn _mm512_bsrli_epi128<const IMM8: i32>(a: __m512i) -> __m512i {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpalignr, IMM8 = 1))]
 #[rustc_legacy_const_generics(2)]
-pub fn _mm512_alignr_epi8<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_alignr_epi8<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
     const fn mask(shift: u32, i: u32) -> u32 {
         let shift = shift % 16;
         let mod_i = i % 16;
@@ -11423,7 +12503,8 @@ pub fn _mm512_alignr_epi8<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpalignr, IMM8 = 1))]
 #[rustc_legacy_const_generics(4)]
-pub fn _mm512_mask_alignr_epi8<const IMM8: i32>(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_alignr_epi8<const IMM8: i32>(
     src: __m512i,
     k: __mmask64,
     a: __m512i,
@@ -11444,7 +12525,12 @@ pub fn _mm512_mask_alignr_epi8<const IMM8: i32>(
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpalignr, IMM8 = 1))]
 #[rustc_legacy_const_generics(3)]
-pub fn _mm512_maskz_alignr_epi8<const IMM8: i32>(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_alignr_epi8<const IMM8: i32>(
+    k: __mmask64,
+    a: __m512i,
+    b: __m512i,
+) -> __m512i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         let r = _mm512_alignr_epi8::<IMM8>(a, b);
@@ -11460,7 +12546,8 @@ pub fn _mm512_maskz_alignr_epi8<const IMM8: i32>(k: __mmask64, a: __m512i, b: __
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[rustc_legacy_const_generics(4)]
 #[cfg_attr(test, assert_instr(vpalignr, IMM8 = 5))]
-pub fn _mm256_mask_alignr_epi8<const IMM8: i32>(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_alignr_epi8<const IMM8: i32>(
     src: __m256i,
     k: __mmask32,
     a: __m256i,
@@ -11481,7 +12568,12 @@ pub fn _mm256_mask_alignr_epi8<const IMM8: i32>(
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(test, assert_instr(vpalignr, IMM8 = 5))]
-pub fn _mm256_maskz_alignr_epi8<const IMM8: i32>(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_alignr_epi8<const IMM8: i32>(
+    k: __mmask32,
+    a: __m256i,
+    b: __m256i,
+) -> __m256i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         let r = _mm256_alignr_epi8::<IMM8>(a, b);
@@ -11497,7 +12589,8 @@ pub fn _mm256_maskz_alignr_epi8<const IMM8: i32>(k: __mmask32, a: __m256i, b: __
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[rustc_legacy_const_generics(4)]
 #[cfg_attr(test, assert_instr(vpalignr, IMM8 = 5))]
-pub fn _mm_mask_alignr_epi8<const IMM8: i32>(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_alignr_epi8<const IMM8: i32>(
     src: __m128i,
     k: __mmask16,
     a: __m128i,
@@ -11518,7 +12611,12 @@ pub fn _mm_mask_alignr_epi8<const IMM8: i32>(
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(test, assert_instr(vpalignr, IMM8 = 5))]
-pub fn _mm_maskz_alignr_epi8<const IMM8: i32>(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_alignr_epi8<const IMM8: i32>(
+    k: __mmask16,
+    a: __m128i,
+    b: __m128i,
+) -> __m128i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         let r = _mm_alignr_epi8::<IMM8>(a, b);
@@ -11545,7 +12643,14 @@ pub unsafe fn _mm512_mask_cvtsepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask32,
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovswb))]
 pub unsafe fn _mm256_mask_cvtsepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m256i) {
-    vpmovswbmem256(mem_addr, a.as_i16x16(), k);
+    let mask = simd_select_bitmask(k, i16x16::splat(!0), i16x16::ZERO);
+
+    let max = simd_splat(i16::from(i8::MAX));
+    let min = simd_splat(i16::from(i8::MIN));
+
+    let v = simd_imax(simd_imin(a.as_i16x16(), max), min);
+    let truncated: i8x16 = simd_cast(v);
+    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, truncated);
 }
 
 /// Convert packed signed 16-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
@@ -11556,7 +12661,14 @@ pub unsafe fn _mm256_mask_cvtsepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask16,
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovswb))]
 pub unsafe fn _mm_mask_cvtsepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
-    vpmovswbmem128(mem_addr, a.as_i16x8(), k);
+    let mask = simd_select_bitmask(k, i16x8::splat(!0), i16x8::ZERO);
+
+    let max = simd_splat(i16::from(i8::MAX));
+    let min = simd_splat(i16::from(i8::MIN));
+
+    let v = simd_imax(simd_imin(a.as_i16x8(), max), min);
+    let truncated: i8x8 = simd_cast(v);
+    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, truncated);
 }
 
 /// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
@@ -11566,8 +12678,11 @@ pub unsafe fn _mm_mask_cvtsepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a:
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovwb))]
-pub unsafe fn _mm512_mask_cvtepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask32, a: __m512i) {
-    vpmovwbmem(mem_addr, a.as_i16x32(), k);
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm512_mask_cvtepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask32, a: __m512i) {
+    let result = _mm512_cvtepi16_epi8(a).as_i8x32();
+    let mask = simd_select_bitmask(k, i8x32::splat(!0), i8x32::ZERO);
+    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, result);
 }
 
 /// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
@@ -11577,8 +12692,11 @@ pub unsafe fn _mm512_mask_cvtepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask32,
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovwb))]
-pub unsafe fn _mm256_mask_cvtepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m256i) {
-    vpmovwbmem256(mem_addr, a.as_i16x16(), k);
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm256_mask_cvtepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m256i) {
+    let result = _mm256_cvtepi16_epi8(a).as_i8x16();
+    let mask = simd_select_bitmask(k, i8x16::splat(!0), i8x16::ZERO);
+    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, result);
 }
 
 /// Convert packed 16-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
@@ -11588,8 +12706,15 @@ pub unsafe fn _mm256_mask_cvtepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask16,
 #[target_feature(enable = "avx512bw,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovwb))]
-pub unsafe fn _mm_mask_cvtepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
-    vpmovwbmem128(mem_addr, a.as_i16x8(), k);
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_mask_cvtepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
+    let result: i8x8 = simd_shuffle!(
+        _mm_cvtepi16_epi8(a).as_i8x16(),
+        i8x16::ZERO,
+        [0, 1, 2, 3, 4, 5, 6, 7]
+    );
+    let mask = simd_select_bitmask(k, i8x8::splat(!0), i8x8::ZERO);
+    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, result);
 }
 
 /// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
@@ -11611,7 +12736,12 @@ pub unsafe fn _mm512_mask_cvtusepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask32
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovuswb))]
 pub unsafe fn _mm256_mask_cvtusepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m256i) {
-    vpmovuswbmem256(mem_addr, a.as_i16x16(), k);
+    let mask = simd_select_bitmask(k, i16x16::splat(!0), i16x16::ZERO);
+    let mem_addr = mem_addr.cast::<u8>();
+    let max = simd_splat(u16::from(u8::MAX));
+
+    let truncated: u8x16 = simd_cast(simd_imin(a.as_u16x16(), max));
+    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, truncated);
 }
 
 /// Convert packed unsigned 16-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
@@ -11622,7 +12752,15 @@ pub unsafe fn _mm256_mask_cvtusepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask16
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovuswb))]
 pub unsafe fn _mm_mask_cvtusepi16_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
-    vpmovuswbmem128(mem_addr, a.as_i16x8(), k);
+    let mask = simd_select_bitmask(k, i16x8::splat(!0), i16x8::ZERO);
+    let mem_addr = mem_addr.cast::<u8>();
+    let max = simd_splat(u16::from(u8::MAX));
+
+    let v = a.as_u16x8();
+    let v = simd_imin(v, max);
+
+    let truncated: u8x8 = simd_cast(v);
+    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, truncated);
 }
 
 #[allow(improper_ctypes)]
@@ -11630,48 +12768,20 @@ unsafe extern "C" {
     #[link_name = "llvm.x86.avx512.pmul.hr.sw.512"]
     fn vpmulhrsw(a: i16x32, b: i16x32) -> i16x32;
 
+    #[link_name = "llvm.x86.avx512.pmaddw.d.512"]
+    fn vpmaddwd(a: i16x32, b: i16x32) -> i32x16;
     #[link_name = "llvm.x86.avx512.pmaddubs.w.512"]
-    fn vpmaddubsw(a: i8x64, b: i8x64) -> i16x32;
-
-    #[link_name = "llvm.x86.avx512.packssdw.512"]
-    fn vpackssdw(a: i32x16, b: i32x16) -> i16x32;
-    #[link_name = "llvm.x86.avx512.packsswb.512"]
-    fn vpacksswb(a: i16x32, b: i16x32) -> i8x64;
-    #[link_name = "llvm.x86.avx512.packusdw.512"]
-    fn vpackusdw(a: i32x16, b: i32x16) -> u16x32;
-    #[link_name = "llvm.x86.avx512.packuswb.512"]
-    fn vpackuswb(a: i16x32, b: i16x32) -> u8x64;
+    fn vpmaddubsw(a: u8x64, b: i8x64) -> i16x32;
 
     #[link_name = "llvm.x86.avx512.psll.w.512"]
     fn vpsllw(a: i16x32, count: i16x8) -> i16x32;
 
-    #[link_name = "llvm.x86.avx512.psllv.w.512"]
-    fn vpsllvw(a: i16x32, b: i16x32) -> i16x32;
-    #[link_name = "llvm.x86.avx512.psllv.w.256"]
-    fn vpsllvw256(a: i16x16, b: i16x16) -> i16x16;
-    #[link_name = "llvm.x86.avx512.psllv.w.128"]
-    fn vpsllvw128(a: i16x8, b: i16x8) -> i16x8;
-
     #[link_name = "llvm.x86.avx512.psrl.w.512"]
     fn vpsrlw(a: i16x32, count: i16x8) -> i16x32;
 
-    #[link_name = "llvm.x86.avx512.psrlv.w.512"]
-    fn vpsrlvw(a: i16x32, b: i16x32) -> i16x32;
-    #[link_name = "llvm.x86.avx512.psrlv.w.256"]
-    fn vpsrlvw256(a: i16x16, b: i16x16) -> i16x16;
-    #[link_name = "llvm.x86.avx512.psrlv.w.128"]
-    fn vpsrlvw128(a: i16x8, b: i16x8) -> i16x8;
-
     #[link_name = "llvm.x86.avx512.psra.w.512"]
     fn vpsraw(a: i16x32, count: i16x8) -> i16x32;
 
-    #[link_name = "llvm.x86.avx512.psrav.w.512"]
-    fn vpsravw(a: i16x32, count: i16x32) -> i16x32;
-    #[link_name = "llvm.x86.avx512.psrav.w.256"]
-    fn vpsravw256(a: i16x16, count: i16x16) -> i16x16;
-    #[link_name = "llvm.x86.avx512.psrav.w.128"]
-    fn vpsravw128(a: i16x8, count: i16x8) -> i16x8;
-
     #[link_name = "llvm.x86.avx512.vpermi2var.hi.512"]
     fn vpermi2w(a: i16x32, idx: i16x32, b: i16x32) -> i16x32;
     #[link_name = "llvm.x86.avx512.vpermi2var.hi.256"]
@@ -11699,71 +12809,22 @@ unsafe extern "C" {
     #[link_name = "llvm.x86.avx512.dbpsadbw.128"]
     fn vdbpsadbw128(a: u8x16, b: u8x16, imm8: i32) -> u16x8;
 
-    #[link_name = "llvm.x86.avx512.mask.pmovs.wb.512"]
-    fn vpmovswb(a: i16x32, src: i8x32, mask: u32) -> i8x32;
-    #[link_name = "llvm.x86.avx512.mask.pmovs.wb.256"]
-    fn vpmovswb256(a: i16x16, src: i8x16, mask: u16) -> i8x16;
     #[link_name = "llvm.x86.avx512.mask.pmovs.wb.128"]
     fn vpmovswb128(a: i16x8, src: i8x16, mask: u8) -> i8x16;
 
-    #[link_name = "llvm.x86.avx512.mask.pmovus.wb.512"]
-    fn vpmovuswb(a: u16x32, src: u8x32, mask: u32) -> u8x32;
-    #[link_name = "llvm.x86.avx512.mask.pmovus.wb.256"]
-    fn vpmovuswb256(a: u16x16, src: u8x16, mask: u16) -> u8x16;
     #[link_name = "llvm.x86.avx512.mask.pmovus.wb.128"]
     fn vpmovuswb128(a: u16x8, src: u8x16, mask: u8) -> u8x16;
 
     #[link_name = "llvm.x86.avx512.mask.pmovs.wb.mem.512"]
     fn vpmovswbmem(mem_addr: *mut i8, a: i16x32, mask: u32);
-    #[link_name = "llvm.x86.avx512.mask.pmovs.wb.mem.256"]
-    fn vpmovswbmem256(mem_addr: *mut i8, a: i16x16, mask: u16);
-    #[link_name = "llvm.x86.avx512.mask.pmovs.wb.mem.128"]
-    fn vpmovswbmem128(mem_addr: *mut i8, a: i16x8, mask: u8);
-
-    #[link_name = "llvm.x86.avx512.mask.pmov.wb.mem.512"]
-    fn vpmovwbmem(mem_addr: *mut i8, a: i16x32, mask: u32);
-    #[link_name = "llvm.x86.avx512.mask.pmov.wb.mem.256"]
-    fn vpmovwbmem256(mem_addr: *mut i8, a: i16x16, mask: u16);
-    #[link_name = "llvm.x86.avx512.mask.pmov.wb.mem.128"]
-    fn vpmovwbmem128(mem_addr: *mut i8, a: i16x8, mask: u8);
 
     #[link_name = "llvm.x86.avx512.mask.pmovus.wb.mem.512"]
     fn vpmovuswbmem(mem_addr: *mut i8, a: i16x32, mask: u32);
-    #[link_name = "llvm.x86.avx512.mask.pmovus.wb.mem.256"]
-    fn vpmovuswbmem256(mem_addr: *mut i8, a: i16x16, mask: u16);
-    #[link_name = "llvm.x86.avx512.mask.pmovus.wb.mem.128"]
-    fn vpmovuswbmem128(mem_addr: *mut i8, a: i16x8, mask: u8);
-
-    #[link_name = "llvm.x86.avx512.mask.loadu.b.128"]
-    fn loaddqu8_128(mem_addr: *const i8, a: i8x16, mask: u16) -> i8x16;
-    #[link_name = "llvm.x86.avx512.mask.loadu.w.128"]
-    fn loaddqu16_128(mem_addr: *const i16, a: i16x8, mask: u8) -> i16x8;
-    #[link_name = "llvm.x86.avx512.mask.loadu.b.256"]
-    fn loaddqu8_256(mem_addr: *const i8, a: i8x32, mask: u32) -> i8x32;
-    #[link_name = "llvm.x86.avx512.mask.loadu.w.256"]
-    fn loaddqu16_256(mem_addr: *const i16, a: i16x16, mask: u16) -> i16x16;
-    #[link_name = "llvm.x86.avx512.mask.loadu.b.512"]
-    fn loaddqu8_512(mem_addr: *const i8, a: i8x64, mask: u64) -> i8x64;
-    #[link_name = "llvm.x86.avx512.mask.loadu.w.512"]
-    fn loaddqu16_512(mem_addr: *const i16, a: i16x32, mask: u32) -> i16x32;
-
-    #[link_name = "llvm.x86.avx512.mask.storeu.b.128"]
-    fn storedqu8_128(mem_addr: *mut i8, a: i8x16, mask: u16);
-    #[link_name = "llvm.x86.avx512.mask.storeu.w.128"]
-    fn storedqu16_128(mem_addr: *mut i16, a: i16x8, mask: u8);
-    #[link_name = "llvm.x86.avx512.mask.storeu.b.256"]
-    fn storedqu8_256(mem_addr: *mut i8, a: i8x32, mask: u32);
-    #[link_name = "llvm.x86.avx512.mask.storeu.w.256"]
-    fn storedqu16_256(mem_addr: *mut i16, a: i16x16, mask: u16);
-    #[link_name = "llvm.x86.avx512.mask.storeu.b.512"]
-    fn storedqu8_512(mem_addr: *mut i8, a: i8x64, mask: u64);
-    #[link_name = "llvm.x86.avx512.mask.storeu.w.512"]
-    fn storedqu16_512(mem_addr: *mut i16, a: i16x32, mask: u32);
-
 }
 
 #[cfg(test)]
 mod tests {
+    use crate::core_arch::assert_eq_const as assert_eq;
 
     use stdarch_test::simd_test;
 
@@ -11772,7 +12833,7 @@ mod tests {
     use crate::mem::{self};
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_abs_epi16() {
+    const fn test_mm512_abs_epi16() {
         let a = _mm512_set1_epi16(-1);
         let r = _mm512_abs_epi16(a);
         let e = _mm512_set1_epi16(1);
@@ -11780,7 +12841,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mask_abs_epi16() {
+    const fn test_mm512_mask_abs_epi16() {
         let a = _mm512_set1_epi16(-1);
         let r = _mm512_mask_abs_epi16(a, 0, a);
         assert_eq_m512i(r, a);
@@ -11792,7 +12853,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_maskz_abs_epi16() {
+    const fn test_mm512_maskz_abs_epi16() {
         let a = _mm512_set1_epi16(-1);
         let r = _mm512_maskz_abs_epi16(0, a);
         assert_eq_m512i(r, _mm512_setzero_si512());
@@ -11804,7 +12865,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_abs_epi16() {
+    const fn test_mm256_mask_abs_epi16() {
         let a = _mm256_set1_epi16(-1);
         let r = _mm256_mask_abs_epi16(a, 0, a);
         assert_eq_m256i(r, a);
@@ -11814,7 +12875,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_maskz_abs_epi16() {
+    const fn test_mm256_maskz_abs_epi16() {
         let a = _mm256_set1_epi16(-1);
         let r = _mm256_maskz_abs_epi16(0, a);
         assert_eq_m256i(r, _mm256_setzero_si256());
@@ -11824,7 +12885,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_abs_epi16() {
+    const fn test_mm_mask_abs_epi16() {
         let a = _mm_set1_epi16(-1);
         let r = _mm_mask_abs_epi16(a, 0, a);
         assert_eq_m128i(r, a);
@@ -11834,7 +12895,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_maskz_abs_epi16() {
+    const fn test_mm_maskz_abs_epi16() {
         let a = _mm_set1_epi16(-1);
         let r = _mm_maskz_abs_epi16(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -11844,7 +12905,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_abs_epi8() {
+    const fn test_mm512_abs_epi8() {
         let a = _mm512_set1_epi8(-1);
         let r = _mm512_abs_epi8(a);
         let e = _mm512_set1_epi8(1);
@@ -11852,7 +12913,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mask_abs_epi8() {
+    const fn test_mm512_mask_abs_epi8() {
         let a = _mm512_set1_epi8(-1);
         let r = _mm512_mask_abs_epi8(a, 0, a);
         assert_eq_m512i(r, a);
@@ -11870,7 +12931,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_maskz_abs_epi8() {
+    const fn test_mm512_maskz_abs_epi8() {
         let a = _mm512_set1_epi8(-1);
         let r = _mm512_maskz_abs_epi8(0, a);
         assert_eq_m512i(r, _mm512_setzero_si512());
@@ -11887,7 +12948,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_abs_epi8() {
+    const fn test_mm256_mask_abs_epi8() {
         let a = _mm256_set1_epi8(-1);
         let r = _mm256_mask_abs_epi8(a, 0, a);
         assert_eq_m256i(r, a);
@@ -11899,7 +12960,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_maskz_abs_epi8() {
+    const fn test_mm256_maskz_abs_epi8() {
         let a = _mm256_set1_epi8(-1);
         let r = _mm256_maskz_abs_epi8(0, a);
         assert_eq_m256i(r, _mm256_setzero_si256());
@@ -11911,7 +12972,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_abs_epi8() {
+    const fn test_mm_mask_abs_epi8() {
         let a = _mm_set1_epi8(-1);
         let r = _mm_mask_abs_epi8(a, 0, a);
         assert_eq_m128i(r, a);
@@ -11921,7 +12982,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_maskz_abs_epi8() {
+    const fn test_mm_maskz_abs_epi8() {
         let a = _mm_set1_epi8(-1);
         let r = _mm_maskz_abs_epi8(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -11932,7 +12993,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_add_epi16() {
+    const fn test_mm512_add_epi16() {
         let a = _mm512_set1_epi16(1);
         let b = _mm512_set1_epi16(2);
         let r = _mm512_add_epi16(a, b);
@@ -11941,7 +13002,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mask_add_epi16() {
+    const fn test_mm512_mask_add_epi16() {
         let a = _mm512_set1_epi16(1);
         let b = _mm512_set1_epi16(2);
         let r = _mm512_mask_add_epi16(a, 0, a, b);
@@ -11954,7 +13015,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_maskz_add_epi16() {
+    const fn test_mm512_maskz_add_epi16() {
         let a = _mm512_set1_epi16(1);
         let b = _mm512_set1_epi16(2);
         let r = _mm512_maskz_add_epi16(0, a, b);
@@ -11967,7 +13028,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_add_epi16() {
+    const fn test_mm256_mask_add_epi16() {
         let a = _mm256_set1_epi16(1);
         let b = _mm256_set1_epi16(2);
         let r = _mm256_mask_add_epi16(a, 0, a, b);
@@ -11978,7 +13039,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_maskz_add_epi16() {
+    const fn test_mm256_maskz_add_epi16() {
         let a = _mm256_set1_epi16(1);
         let b = _mm256_set1_epi16(2);
         let r = _mm256_maskz_add_epi16(0, a, b);
@@ -11989,7 +13050,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_add_epi16() {
+    const fn test_mm_mask_add_epi16() {
         let a = _mm_set1_epi16(1);
         let b = _mm_set1_epi16(2);
         let r = _mm_mask_add_epi16(a, 0, a, b);
@@ -12000,7 +13061,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_maskz_add_epi16() {
+    const fn test_mm_maskz_add_epi16() {
         let a = _mm_set1_epi16(1);
         let b = _mm_set1_epi16(2);
         let r = _mm_maskz_add_epi16(0, a, b);
@@ -12011,7 +13072,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_add_epi8() {
+    const fn test_mm512_add_epi8() {
         let a = _mm512_set1_epi8(1);
         let b = _mm512_set1_epi8(2);
         let r = _mm512_add_epi8(a, b);
@@ -12020,7 +13081,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mask_add_epi8() {
+    const fn test_mm512_mask_add_epi8() {
         let a = _mm512_set1_epi8(1);
         let b = _mm512_set1_epi8(2);
         let r = _mm512_mask_add_epi8(a, 0, a, b);
@@ -12040,7 +13101,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_maskz_add_epi8() {
+    const fn test_mm512_maskz_add_epi8() {
         let a = _mm512_set1_epi8(1);
         let b = _mm512_set1_epi8(2);
         let r = _mm512_maskz_add_epi8(0, a, b);
@@ -12059,7 +13120,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_add_epi8() {
+    const fn test_mm256_mask_add_epi8() {
         let a = _mm256_set1_epi8(1);
         let b = _mm256_set1_epi8(2);
         let r = _mm256_mask_add_epi8(a, 0, a, b);
@@ -12072,7 +13133,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_maskz_add_epi8() {
+    const fn test_mm256_maskz_add_epi8() {
         let a = _mm256_set1_epi8(1);
         let b = _mm256_set1_epi8(2);
         let r = _mm256_maskz_add_epi8(0, a, b);
@@ -12085,7 +13146,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_add_epi8() {
+    const fn test_mm_mask_add_epi8() {
         let a = _mm_set1_epi8(1);
         let b = _mm_set1_epi8(2);
         let r = _mm_mask_add_epi8(a, 0, a, b);
@@ -12096,7 +13157,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_maskz_add_epi8() {
+    const fn test_mm_maskz_add_epi8() {
         let a = _mm_set1_epi8(1);
         let b = _mm_set1_epi8(2);
         let r = _mm_maskz_add_epi8(0, a, b);
@@ -12107,7 +13168,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_adds_epu16() {
+    const fn test_mm512_adds_epu16() {
         let a = _mm512_set1_epi16(1);
         let b = _mm512_set1_epi16(u16::MAX as i16);
         let r = _mm512_adds_epu16(a, b);
@@ -12116,7 +13177,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mask_adds_epu16() {
+    const fn test_mm512_mask_adds_epu16() {
         let a = _mm512_set1_epi16(1);
         let b = _mm512_set1_epi16(u16::MAX as i16);
         let r = _mm512_mask_adds_epu16(a, 0, a, b);
@@ -12129,7 +13190,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_maskz_adds_epu16() {
+    const fn test_mm512_maskz_adds_epu16() {
         let a = _mm512_set1_epi16(1);
         let b = _mm512_set1_epi16(u16::MAX as i16);
         let r = _mm512_maskz_adds_epu16(0, a, b);
@@ -12142,7 +13203,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_adds_epu16() {
+    const fn test_mm256_mask_adds_epu16() {
         let a = _mm256_set1_epi16(1);
         let b = _mm256_set1_epi16(u16::MAX as i16);
         let r = _mm256_mask_adds_epu16(a, 0, a, b);
@@ -12154,7 +13215,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_maskz_adds_epu16() {
+    const fn test_mm256_maskz_adds_epu16() {
         let a = _mm256_set1_epi16(1);
         let b = _mm256_set1_epi16(u16::MAX as i16);
         let r = _mm256_maskz_adds_epu16(0, a, b);
@@ -12166,7 +13227,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_adds_epu16() {
+    const fn test_mm_mask_adds_epu16() {
         let a = _mm_set1_epi16(1);
         let b = _mm_set1_epi16(u16::MAX as i16);
         let r = _mm_mask_adds_epu16(a, 0, a, b);
@@ -12178,7 +13239,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_maskz_adds_epu16() {
+    const fn test_mm_maskz_adds_epu16() {
         let a = _mm_set1_epi16(1);
         let b = _mm_set1_epi16(u16::MAX as i16);
         let r = _mm_maskz_adds_epu16(0, a, b);
@@ -12190,7 +13251,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_adds_epu8() {
+    const fn test_mm512_adds_epu8() {
         let a = _mm512_set1_epi8(1);
         let b = _mm512_set1_epi8(u8::MAX as i8);
         let r = _mm512_adds_epu8(a, b);
@@ -12199,7 +13260,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mask_adds_epu8() {
+    const fn test_mm512_mask_adds_epu8() {
         let a = _mm512_set1_epi8(1);
         let b = _mm512_set1_epi8(u8::MAX as i8);
         let r = _mm512_mask_adds_epu8(a, 0, a, b);
@@ -12219,7 +13280,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_maskz_adds_epu8() {
+    const fn test_mm512_maskz_adds_epu8() {
         let a = _mm512_set1_epi8(1);
         let b = _mm512_set1_epi8(u8::MAX as i8);
         let r = _mm512_maskz_adds_epu8(0, a, b);
@@ -12238,7 +13299,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_adds_epu8() {
+    const fn test_mm256_mask_adds_epu8() {
         let a = _mm256_set1_epi8(1);
         let b = _mm256_set1_epi8(u8::MAX as i8);
         let r = _mm256_mask_adds_epu8(a, 0, a, b);
@@ -12251,7 +13312,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_maskz_adds_epu8() {
+    const fn test_mm256_maskz_adds_epu8() {
         let a = _mm256_set1_epi8(1);
         let b = _mm256_set1_epi8(u8::MAX as i8);
         let r = _mm256_maskz_adds_epu8(0, a, b);
@@ -12264,7 +13325,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_adds_epu8() {
+    const fn test_mm_mask_adds_epu8() {
         let a = _mm_set1_epi8(1);
         let b = _mm_set1_epi8(u8::MAX as i8);
         let r = _mm_mask_adds_epu8(a, 0, a, b);
@@ -12276,7 +13337,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_maskz_adds_epu8() {
+    const fn test_mm_maskz_adds_epu8() {
         let a = _mm_set1_epi8(1);
         let b = _mm_set1_epi8(u8::MAX as i8);
         let r = _mm_maskz_adds_epu8(0, a, b);
@@ -12288,7 +13349,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_adds_epi16() {
+    const fn test_mm512_adds_epi16() {
         let a = _mm512_set1_epi16(1);
         let b = _mm512_set1_epi16(i16::MAX);
         let r = _mm512_adds_epi16(a, b);
@@ -12297,7 +13358,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mask_adds_epi16() {
+    const fn test_mm512_mask_adds_epi16() {
         let a = _mm512_set1_epi16(1);
         let b = _mm512_set1_epi16(i16::MAX);
         let r = _mm512_mask_adds_epi16(a, 0, a, b);
@@ -12310,7 +13371,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_maskz_adds_epi16() {
+    const fn test_mm512_maskz_adds_epi16() {
         let a = _mm512_set1_epi16(1);
         let b = _mm512_set1_epi16(i16::MAX);
         let r = _mm512_maskz_adds_epi16(0, a, b);
@@ -12323,7 +13384,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_adds_epi16() {
+    const fn test_mm256_mask_adds_epi16() {
         let a = _mm256_set1_epi16(1);
         let b = _mm256_set1_epi16(i16::MAX);
         let r = _mm256_mask_adds_epi16(a, 0, a, b);
@@ -12335,7 +13396,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_maskz_adds_epi16() {
+    const fn test_mm256_maskz_adds_epi16() {
         let a = _mm256_set1_epi16(1);
         let b = _mm256_set1_epi16(i16::MAX);
         let r = _mm256_maskz_adds_epi16(0, a, b);
@@ -12347,7 +13408,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_adds_epi16() {
+    const fn test_mm_mask_adds_epi16() {
         let a = _mm_set1_epi16(1);
         let b = _mm_set1_epi16(i16::MAX);
         let r = _mm_mask_adds_epi16(a, 0, a, b);
@@ -12358,7 +13419,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_maskz_adds_epi16() {
+    const fn test_mm_maskz_adds_epi16() {
         let a = _mm_set1_epi16(1);
         let b = _mm_set1_epi16(i16::MAX);
         let r = _mm_maskz_adds_epi16(0, a, b);
@@ -12369,7 +13430,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_adds_epi8() {
+    const fn test_mm512_adds_epi8() {
         let a = _mm512_set1_epi8(1);
         let b = _mm512_set1_epi8(i8::MAX);
         let r = _mm512_adds_epi8(a, b);
@@ -12378,7 +13439,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mask_adds_epi8() {
+    const fn test_mm512_mask_adds_epi8() {
         let a = _mm512_set1_epi8(1);
         let b = _mm512_set1_epi8(i8::MAX);
         let r = _mm512_mask_adds_epi8(a, 0, a, b);
@@ -12398,7 +13459,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_maskz_adds_epi8() {
+    const fn test_mm512_maskz_adds_epi8() {
         let a = _mm512_set1_epi8(1);
         let b = _mm512_set1_epi8(i8::MAX);
         let r = _mm512_maskz_adds_epi8(0, a, b);
@@ -12417,7 +13478,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_adds_epi8() {
+    const fn test_mm256_mask_adds_epi8() {
         let a = _mm256_set1_epi8(1);
         let b = _mm256_set1_epi8(i8::MAX);
         let r = _mm256_mask_adds_epi8(a, 0, a, b);
@@ -12430,7 +13491,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_maskz_adds_epi8() {
+    const fn test_mm256_maskz_adds_epi8() {
         let a = _mm256_set1_epi8(1);
         let b = _mm256_set1_epi8(i8::MAX);
         let r = _mm256_maskz_adds_epi8(0, a, b);
@@ -12443,7 +13504,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_adds_epi8() {
+    const fn test_mm_mask_adds_epi8() {
         let a = _mm_set1_epi8(1);
         let b = _mm_set1_epi8(i8::MAX);
         let r = _mm_mask_adds_epi8(a, 0, a, b);
@@ -12455,7 +13516,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_maskz_adds_epi8() {
+    const fn test_mm_maskz_adds_epi8() {
         let a = _mm_set1_epi8(1);
         let b = _mm_set1_epi8(i8::MAX);
         let r = _mm_maskz_adds_epi8(0, a, b);
@@ -12467,7 +13528,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_sub_epi16() {
+    const fn test_mm512_sub_epi16() {
         let a = _mm512_set1_epi16(1);
         let b = _mm512_set1_epi16(2);
         let r = _mm512_sub_epi16(a, b);
@@ -12476,7 +13537,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mask_sub_epi16() {
+    const fn test_mm512_mask_sub_epi16() {
         let a = _mm512_set1_epi16(1);
         let b = _mm512_set1_epi16(2);
         let r = _mm512_mask_sub_epi16(a, 0, a, b);
@@ -12489,7 +13550,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_maskz_sub_epi16() {
+    const fn test_mm512_maskz_sub_epi16() {
         let a = _mm512_set1_epi16(1);
         let b = _mm512_set1_epi16(2);
         let r = _mm512_maskz_sub_epi16(0, a, b);
@@ -12502,7 +13563,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_sub_epi16() {
+    const fn test_mm256_mask_sub_epi16() {
         let a = _mm256_set1_epi16(1);
         let b = _mm256_set1_epi16(2);
         let r = _mm256_mask_sub_epi16(a, 0, a, b);
@@ -12513,7 +13574,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_maskz_sub_epi16() {
+    const fn test_mm256_maskz_sub_epi16() {
         let a = _mm256_set1_epi16(1);
         let b = _mm256_set1_epi16(2);
         let r = _mm256_maskz_sub_epi16(0, a, b);
@@ -12524,7 +13585,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_sub_epi16() {
+    const fn test_mm_mask_sub_epi16() {
         let a = _mm_set1_epi16(1);
         let b = _mm_set1_epi16(2);
         let r = _mm_mask_sub_epi16(a, 0, a, b);
@@ -12535,7 +13596,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_maskz_sub_epi16() {
+    const fn test_mm_maskz_sub_epi16() {
         let a = _mm_set1_epi16(1);
         let b = _mm_set1_epi16(2);
         let r = _mm_maskz_sub_epi16(0, a, b);
@@ -12546,7 +13607,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_sub_epi8() {
+    const fn test_mm512_sub_epi8() {
         let a = _mm512_set1_epi8(1);
         let b = _mm512_set1_epi8(2);
         let r = _mm512_sub_epi8(a, b);
@@ -12555,7 +13616,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mask_sub_epi8() {
+    const fn test_mm512_mask_sub_epi8() {
         let a = _mm512_set1_epi8(1);
         let b = _mm512_set1_epi8(2);
         let r = _mm512_mask_sub_epi8(a, 0, a, b);
@@ -12575,7 +13636,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_maskz_sub_epi8() {
+    const fn test_mm512_maskz_sub_epi8() {
         let a = _mm512_set1_epi8(1);
         let b = _mm512_set1_epi8(2);
         let r = _mm512_maskz_sub_epi8(0, a, b);
@@ -12594,7 +13655,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_sub_epi8() {
+    const fn test_mm256_mask_sub_epi8() {
         let a = _mm256_set1_epi8(1);
         let b = _mm256_set1_epi8(2);
         let r = _mm256_mask_sub_epi8(a, 0, a, b);
@@ -12607,7 +13668,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_maskz_sub_epi8() {
+    const fn test_mm256_maskz_sub_epi8() {
         let a = _mm256_set1_epi8(1);
         let b = _mm256_set1_epi8(2);
         let r = _mm256_maskz_sub_epi8(0, a, b);
@@ -12620,7 +13681,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_sub_epi8() {
+    const fn test_mm_mask_sub_epi8() {
         let a = _mm_set1_epi8(1);
         let b = _mm_set1_epi8(2);
         let r = _mm_mask_sub_epi8(a, 0, a, b);
@@ -12631,7 +13692,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_maskz_sub_epi8() {
+    const fn test_mm_maskz_sub_epi8() {
         let a = _mm_set1_epi8(1);
         let b = _mm_set1_epi8(2);
         let r = _mm_maskz_sub_epi8(0, a, b);
@@ -12642,7 +13703,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_subs_epu16() {
+    const fn test_mm512_subs_epu16() {
         let a = _mm512_set1_epi16(1);
         let b = _mm512_set1_epi16(u16::MAX as i16);
         let r = _mm512_subs_epu16(a, b);
@@ -12651,7 +13712,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mask_subs_epu16() {
+    const fn test_mm512_mask_subs_epu16() {
         let a = _mm512_set1_epi16(1);
         let b = _mm512_set1_epi16(u16::MAX as i16);
         let r = _mm512_mask_subs_epu16(a, 0, a, b);
@@ -12664,7 +13725,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_maskz_subs_epu16() {
+    const fn test_mm512_maskz_subs_epu16() {
         let a = _mm512_set1_epi16(1);
         let b = _mm512_set1_epi16(u16::MAX as i16);
         let r = _mm512_maskz_subs_epu16(0, a, b);
@@ -12677,7 +13738,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_subs_epu16() {
+    const fn test_mm256_mask_subs_epu16() {
         let a = _mm256_set1_epi16(1);
         let b = _mm256_set1_epi16(u16::MAX as i16);
         let r = _mm256_mask_subs_epu16(a, 0, a, b);
@@ -12688,7 +13749,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_maskz_subs_epu16() {
+    const fn test_mm256_maskz_subs_epu16() {
         let a = _mm256_set1_epi16(1);
         let b = _mm256_set1_epi16(u16::MAX as i16);
         let r = _mm256_maskz_subs_epu16(0, a, b);
@@ -12699,7 +13760,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_subs_epu16() {
+    const fn test_mm_mask_subs_epu16() {
         let a = _mm_set1_epi16(1);
         let b = _mm_set1_epi16(u16::MAX as i16);
         let r = _mm_mask_subs_epu16(a, 0, a, b);
@@ -12710,7 +13771,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_maskz_subs_epu16() {
+    const fn test_mm_maskz_subs_epu16() {
         let a = _mm_set1_epi16(1);
         let b = _mm_set1_epi16(u16::MAX as i16);
         let r = _mm_maskz_subs_epu16(0, a, b);
@@ -12721,7 +13782,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_subs_epu8() {
+    const fn test_mm512_subs_epu8() {
         let a = _mm512_set1_epi8(1);
         let b = _mm512_set1_epi8(u8::MAX as i8);
         let r = _mm512_subs_epu8(a, b);
@@ -12730,7 +13791,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mask_subs_epu8() {
+    const fn test_mm512_mask_subs_epu8() {
         let a = _mm512_set1_epi8(1);
         let b = _mm512_set1_epi8(u8::MAX as i8);
         let r = _mm512_mask_subs_epu8(a, 0, a, b);
@@ -12750,7 +13811,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_maskz_subs_epu8() {
+    const fn test_mm512_maskz_subs_epu8() {
         let a = _mm512_set1_epi8(1);
         let b = _mm512_set1_epi8(u8::MAX as i8);
         let r = _mm512_maskz_subs_epu8(0, a, b);
@@ -12769,7 +13830,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_subs_epu8() {
+    const fn test_mm256_mask_subs_epu8() {
         let a = _mm256_set1_epi8(1);
         let b = _mm256_set1_epi8(u8::MAX as i8);
         let r = _mm256_mask_subs_epu8(a, 0, a, b);
@@ -12782,7 +13843,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_maskz_subs_epu8() {
+    const fn test_mm256_maskz_subs_epu8() {
         let a = _mm256_set1_epi8(1);
         let b = _mm256_set1_epi8(u8::MAX as i8);
         let r = _mm256_maskz_subs_epu8(0, a, b);
@@ -12795,7 +13856,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_subs_epu8() {
+    const fn test_mm_mask_subs_epu8() {
         let a = _mm_set1_epi8(1);
         let b = _mm_set1_epi8(u8::MAX as i8);
         let r = _mm_mask_subs_epu8(a, 0, a, b);
@@ -12806,7 +13867,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_maskz_subs_epu8() {
+    const fn test_mm_maskz_subs_epu8() {
         let a = _mm_set1_epi8(1);
         let b = _mm_set1_epi8(u8::MAX as i8);
         let r = _mm_maskz_subs_epu8(0, a, b);
@@ -12817,7 +13878,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_subs_epi16() {
+    const fn test_mm512_subs_epi16() {
         let a = _mm512_set1_epi16(-1);
         let b = _mm512_set1_epi16(i16::MAX);
         let r = _mm512_subs_epi16(a, b);
@@ -12826,7 +13887,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mask_subs_epi16() {
+    const fn test_mm512_mask_subs_epi16() {
         let a = _mm512_set1_epi16(-1);
         let b = _mm512_set1_epi16(i16::MAX);
         let r = _mm512_mask_subs_epi16(a, 0, a, b);
@@ -12839,7 +13900,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_maskz_subs_epi16() {
+    const fn test_mm512_maskz_subs_epi16() {
         let a = _mm512_set1_epi16(-1);
         let b = _mm512_set1_epi16(i16::MAX);
         let r = _mm512_maskz_subs_epi16(0, a, b);
@@ -12852,7 +13913,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_subs_epi16() {
+    const fn test_mm256_mask_subs_epi16() {
         let a = _mm256_set1_epi16(-1);
         let b = _mm256_set1_epi16(i16::MAX);
         let r = _mm256_mask_subs_epi16(a, 0, a, b);
@@ -12864,7 +13925,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_maskz_subs_epi16() {
+    const fn test_mm256_maskz_subs_epi16() {
         let a = _mm256_set1_epi16(-1);
         let b = _mm256_set1_epi16(i16::MAX);
         let r = _mm256_maskz_subs_epi16(0, a, b);
@@ -12876,7 +13937,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_subs_epi16() {
+    const fn test_mm_mask_subs_epi16() {
         let a = _mm_set1_epi16(-1);
         let b = _mm_set1_epi16(i16::MAX);
         let r = _mm_mask_subs_epi16(a, 0, a, b);
@@ -12887,7 +13948,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_maskz_subs_epi16() {
+    const fn test_mm_maskz_subs_epi16() {
         let a = _mm_set1_epi16(-1);
         let b = _mm_set1_epi16(i16::MAX);
         let r = _mm_maskz_subs_epi16(0, a, b);
@@ -12898,7 +13959,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_subs_epi8() {
+    const fn test_mm512_subs_epi8() {
         let a = _mm512_set1_epi8(-1);
         let b = _mm512_set1_epi8(i8::MAX);
         let r = _mm512_subs_epi8(a, b);
@@ -12907,7 +13968,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mask_subs_epi8() {
+    const fn test_mm512_mask_subs_epi8() {
         let a = _mm512_set1_epi8(-1);
         let b = _mm512_set1_epi8(i8::MAX);
         let r = _mm512_mask_subs_epi8(a, 0, a, b);
@@ -12927,7 +13988,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_maskz_subs_epi8() {
+    const fn test_mm512_maskz_subs_epi8() {
         let a = _mm512_set1_epi8(-1);
         let b = _mm512_set1_epi8(i8::MAX);
         let r = _mm512_maskz_subs_epi8(0, a, b);
@@ -12946,7 +14007,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_subs_epi8() {
+    const fn test_mm256_mask_subs_epi8() {
         let a = _mm256_set1_epi8(-1);
         let b = _mm256_set1_epi8(i8::MAX);
         let r = _mm256_mask_subs_epi8(a, 0, a, b);
@@ -12959,7 +14020,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_maskz_subs_epi8() {
+    const fn test_mm256_maskz_subs_epi8() {
         let a = _mm256_set1_epi8(-1);
         let b = _mm256_set1_epi8(i8::MAX);
         let r = _mm256_maskz_subs_epi8(0, a, b);
@@ -12972,7 +14033,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_subs_epi8() {
+    const fn test_mm_mask_subs_epi8() {
         let a = _mm_set1_epi8(-1);
         let b = _mm_set1_epi8(i8::MAX);
         let r = _mm_mask_subs_epi8(a, 0, a, b);
@@ -12984,7 +14045,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_maskz_subs_epi8() {
+    const fn test_mm_maskz_subs_epi8() {
         let a = _mm_set1_epi8(-1);
         let b = _mm_set1_epi8(i8::MAX);
         let r = _mm_maskz_subs_epi8(0, a, b);
@@ -12996,7 +14057,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mulhi_epu16() {
+    const fn test_mm512_mulhi_epu16() {
         let a = _mm512_set1_epi16(1);
         let b = _mm512_set1_epi16(1);
         let r = _mm512_mulhi_epu16(a, b);
@@ -13005,7 +14066,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mask_mulhi_epu16() {
+    const fn test_mm512_mask_mulhi_epu16() {
         let a = _mm512_set1_epi16(1);
         let b = _mm512_set1_epi16(1);
         let r = _mm512_mask_mulhi_epu16(a, 0, a, b);
@@ -13018,7 +14079,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_maskz_mulhi_epu16() {
+    const fn test_mm512_maskz_mulhi_epu16() {
         let a = _mm512_set1_epi16(1);
         let b = _mm512_set1_epi16(1);
         let r = _mm512_maskz_mulhi_epu16(0, a, b);
@@ -13031,7 +14092,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_mulhi_epu16() {
+    const fn test_mm256_mask_mulhi_epu16() {
         let a = _mm256_set1_epi16(1);
         let b = _mm256_set1_epi16(1);
         let r = _mm256_mask_mulhi_epu16(a, 0, a, b);
@@ -13042,7 +14103,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_maskz_mulhi_epu16() {
+    const fn test_mm256_maskz_mulhi_epu16() {
         let a = _mm256_set1_epi16(1);
         let b = _mm256_set1_epi16(1);
         let r = _mm256_maskz_mulhi_epu16(0, a, b);
@@ -13053,7 +14114,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_mulhi_epu16() {
+    const fn test_mm_mask_mulhi_epu16() {
         let a = _mm_set1_epi16(1);
         let b = _mm_set1_epi16(1);
         let r = _mm_mask_mulhi_epu16(a, 0, a, b);
@@ -13064,7 +14125,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_maskz_mulhi_epu16() {
+    const fn test_mm_maskz_mulhi_epu16() {
         let a = _mm_set1_epi16(1);
         let b = _mm_set1_epi16(1);
         let r = _mm_maskz_mulhi_epu16(0, a, b);
@@ -13075,7 +14136,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mulhi_epi16() {
+    const fn test_mm512_mulhi_epi16() {
         let a = _mm512_set1_epi16(1);
         let b = _mm512_set1_epi16(1);
         let r = _mm512_mulhi_epi16(a, b);
@@ -13084,7 +14145,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mask_mulhi_epi16() {
+    const fn test_mm512_mask_mulhi_epi16() {
         let a = _mm512_set1_epi16(1);
         let b = _mm512_set1_epi16(1);
         let r = _mm512_mask_mulhi_epi16(a, 0, a, b);
@@ -13097,7 +14158,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_maskz_mulhi_epi16() {
+    const fn test_mm512_maskz_mulhi_epi16() {
         let a = _mm512_set1_epi16(1);
         let b = _mm512_set1_epi16(1);
         let r = _mm512_maskz_mulhi_epi16(0, a, b);
@@ -13110,7 +14171,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_mulhi_epi16() {
+    const fn test_mm256_mask_mulhi_epi16() {
         let a = _mm256_set1_epi16(1);
         let b = _mm256_set1_epi16(1);
         let r = _mm256_mask_mulhi_epi16(a, 0, a, b);
@@ -13121,7 +14182,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_maskz_mulhi_epi16() {
+    const fn test_mm256_maskz_mulhi_epi16() {
         let a = _mm256_set1_epi16(1);
         let b = _mm256_set1_epi16(1);
         let r = _mm256_maskz_mulhi_epi16(0, a, b);
@@ -13132,7 +14193,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_mulhi_epi16() {
+    const fn test_mm_mask_mulhi_epi16() {
         let a = _mm_set1_epi16(1);
         let b = _mm_set1_epi16(1);
         let r = _mm_mask_mulhi_epi16(a, 0, a, b);
@@ -13143,7 +14204,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_maskz_mulhi_epi16() {
+    const fn test_mm_maskz_mulhi_epi16() {
         let a = _mm_set1_epi16(1);
         let b = _mm_set1_epi16(1);
         let r = _mm_maskz_mulhi_epi16(0, a, b);
@@ -13154,7 +14215,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mulhrs_epi16() {
+    fn test_mm512_mulhrs_epi16() {
         let a = _mm512_set1_epi16(1);
         let b = _mm512_set1_epi16(1);
         let r = _mm512_mulhrs_epi16(a, b);
@@ -13163,7 +14224,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mask_mulhrs_epi16() {
+    fn test_mm512_mask_mulhrs_epi16() {
         let a = _mm512_set1_epi16(1);
         let b = _mm512_set1_epi16(1);
         let r = _mm512_mask_mulhrs_epi16(a, 0, a, b);
@@ -13176,7 +14237,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_maskz_mulhrs_epi16() {
+    fn test_mm512_maskz_mulhrs_epi16() {
         let a = _mm512_set1_epi16(1);
         let b = _mm512_set1_epi16(1);
         let r = _mm512_maskz_mulhrs_epi16(0, a, b);
@@ -13189,7 +14250,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_mulhrs_epi16() {
+    fn test_mm256_mask_mulhrs_epi16() {
         let a = _mm256_set1_epi16(1);
         let b = _mm256_set1_epi16(1);
         let r = _mm256_mask_mulhrs_epi16(a, 0, a, b);
@@ -13200,7 +14261,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_maskz_mulhrs_epi16() {
+    fn test_mm256_maskz_mulhrs_epi16() {
         let a = _mm256_set1_epi16(1);
         let b = _mm256_set1_epi16(1);
         let r = _mm256_maskz_mulhrs_epi16(0, a, b);
@@ -13211,7 +14272,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_mulhrs_epi16() {
+    fn test_mm_mask_mulhrs_epi16() {
         let a = _mm_set1_epi16(1);
         let b = _mm_set1_epi16(1);
         let r = _mm_mask_mulhrs_epi16(a, 0, a, b);
@@ -13222,7 +14283,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_maskz_mulhrs_epi16() {
+    fn test_mm_maskz_mulhrs_epi16() {
         let a = _mm_set1_epi16(1);
         let b = _mm_set1_epi16(1);
         let r = _mm_maskz_mulhrs_epi16(0, a, b);
@@ -13233,7 +14294,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mullo_epi16() {
+    const fn test_mm512_mullo_epi16() {
         let a = _mm512_set1_epi16(1);
         let b = _mm512_set1_epi16(1);
         let r = _mm512_mullo_epi16(a, b);
@@ -13242,7 +14303,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mask_mullo_epi16() {
+    const fn test_mm512_mask_mullo_epi16() {
         let a = _mm512_set1_epi16(1);
         let b = _mm512_set1_epi16(1);
         let r = _mm512_mask_mullo_epi16(a, 0, a, b);
@@ -13255,7 +14316,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_maskz_mullo_epi16() {
+    const fn test_mm512_maskz_mullo_epi16() {
         let a = _mm512_set1_epi16(1);
         let b = _mm512_set1_epi16(1);
         let r = _mm512_maskz_mullo_epi16(0, a, b);
@@ -13268,7 +14329,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_mullo_epi16() {
+    const fn test_mm256_mask_mullo_epi16() {
         let a = _mm256_set1_epi16(1);
         let b = _mm256_set1_epi16(1);
         let r = _mm256_mask_mullo_epi16(a, 0, a, b);
@@ -13279,7 +14340,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_maskz_mullo_epi16() {
+    const fn test_mm256_maskz_mullo_epi16() {
         let a = _mm256_set1_epi16(1);
         let b = _mm256_set1_epi16(1);
         let r = _mm256_maskz_mullo_epi16(0, a, b);
@@ -13290,7 +14351,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_mullo_epi16() {
+    const fn test_mm_mask_mullo_epi16() {
         let a = _mm_set1_epi16(1);
         let b = _mm_set1_epi16(1);
         let r = _mm_mask_mullo_epi16(a, 0, a, b);
@@ -13301,7 +14362,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_maskz_mullo_epi16() {
+    const fn test_mm_maskz_mullo_epi16() {
         let a = _mm_set1_epi16(1);
         let b = _mm_set1_epi16(1);
         let r = _mm_maskz_mullo_epi16(0, a, b);
@@ -13312,7 +14373,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_max_epu16() {
+    const fn test_mm512_max_epu16() {
         #[rustfmt::skip]
         let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
                                  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
@@ -13326,8 +14387,8 @@ mod tests {
         assert_eq_m512i(r, e);
     }
 
-    #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_max_epu16() {
+    #[simd_test(enable = "avx512bw")]
+    const fn test_mm512_mask_max_epu16() {
         #[rustfmt::skip]
         let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
                                  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
@@ -13343,8 +14404,8 @@ mod tests {
         assert_eq_m512i(r, e);
     }
 
-    #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_max_epu16() {
+    #[simd_test(enable = "avx512bw")]
+    const fn test_mm512_maskz_max_epu16() {
         #[rustfmt::skip]
         let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
                                  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
@@ -13360,8 +14421,8 @@ mod tests {
         assert_eq_m512i(r, e);
     }
 
-    #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_max_epu16() {
+    #[simd_test(enable = "avx512bw,avx512vl")]
+    const fn test_mm256_mask_max_epu16() {
         let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
         let r = _mm256_mask_max_epu16(a, 0, a, b);
@@ -13371,8 +14432,8 @@ mod tests {
         assert_eq_m256i(r, e);
     }
 
-    #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_max_epu16() {
+    #[simd_test(enable = "avx512bw,avx512vl")]
+    const fn test_mm256_maskz_max_epu16() {
         let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
         let r = _mm256_maskz_max_epu16(0, a, b);
@@ -13382,8 +14443,8 @@ mod tests {
         assert_eq_m256i(r, e);
     }
 
-    #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_max_epu16() {
+    #[simd_test(enable = "avx512bw,avx512vl")]
+    const fn test_mm_mask_max_epu16() {
         let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
         let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
         let r = _mm_mask_max_epu16(a, 0, a, b);
@@ -13393,8 +14454,8 @@ mod tests {
         assert_eq_m128i(r, e);
     }
 
-    #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_max_epu16() {
+    #[simd_test(enable = "avx512bw,avx512vl")]
+    const fn test_mm_maskz_max_epu16() {
         let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
         let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
         let r = _mm_maskz_max_epu16(0, a, b);
@@ -13405,7 +14466,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_max_epu8() {
+    const fn test_mm512_max_epu8() {
         #[rustfmt::skip]
         let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
@@ -13425,8 +14486,8 @@ mod tests {
         assert_eq_m512i(r, e);
     }
 
-    #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_max_epu8() {
+    #[simd_test(enable = "avx512bw")]
+    const fn test_mm512_mask_max_epu8() {
         #[rustfmt::skip]
         let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
@@ -13453,8 +14514,8 @@ mod tests {
         assert_eq_m512i(r, e);
     }
 
-    #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_max_epu8() {
+    #[simd_test(enable = "avx512bw")]
+    const fn test_mm512_maskz_max_epu8() {
         #[rustfmt::skip]
         let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
@@ -13480,8 +14541,8 @@ mod tests {
         assert_eq_m512i(r, e);
     }
 
-    #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_max_epu8() {
+    #[simd_test(enable = "avx512bw,avx512vl")]
+    const fn test_mm256_mask_max_epu8() {
         #[rustfmt::skip]
         let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
@@ -13497,8 +14558,8 @@ mod tests {
         assert_eq_m256i(r, e);
     }
 
-    #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_max_epu8() {
+    #[simd_test(enable = "avx512bw,avx512vl")]
+    const fn test_mm256_maskz_max_epu8() {
         #[rustfmt::skip]
         let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
@@ -13514,8 +14575,8 @@ mod tests {
         assert_eq_m256i(r, e);
     }
 
-    #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_max_epu8() {
+    #[simd_test(enable = "avx512bw,avx512vl")]
+    const fn test_mm_mask_max_epu8() {
         let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
         let r = _mm_mask_max_epu8(a, 0, a, b);
@@ -13525,8 +14586,8 @@ mod tests {
         assert_eq_m128i(r, e);
     }
 
-    #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_max_epu8() {
+    #[simd_test(enable = "avx512bw,avx512vl")]
+    const fn test_mm_maskz_max_epu8() {
         let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
         let r = _mm_maskz_max_epu8(0, a, b);
@@ -13537,7 +14598,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_max_epi16() {
+    const fn test_mm512_max_epi16() {
         #[rustfmt::skip]
         let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
                                  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
@@ -13551,8 +14612,8 @@ mod tests {
         assert_eq_m512i(r, e);
     }
 
-    #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_max_epi16() {
+    #[simd_test(enable = "avx512bw")]
+    const fn test_mm512_mask_max_epi16() {
         #[rustfmt::skip]
         let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
                                  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
@@ -13568,8 +14629,8 @@ mod tests {
         assert_eq_m512i(r, e);
     }
 
-    #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_max_epi16() {
+    #[simd_test(enable = "avx512bw")]
+    const fn test_mm512_maskz_max_epi16() {
         #[rustfmt::skip]
         let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
                                  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
@@ -13585,8 +14646,8 @@ mod tests {
         assert_eq_m512i(r, e);
     }
 
-    #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_max_epi16() {
+    #[simd_test(enable = "avx512bw,avx512vl")]
+    const fn test_mm256_mask_max_epi16() {
         let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
         let r = _mm256_mask_max_epi16(a, 0, a, b);
@@ -13596,8 +14657,8 @@ mod tests {
         assert_eq_m256i(r, e);
     }
 
-    #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_max_epi16() {
+    #[simd_test(enable = "avx512bw,avx512vl")]
+    const fn test_mm256_maskz_max_epi16() {
         let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
         let r = _mm256_maskz_max_epi16(0, a, b);
@@ -13607,8 +14668,8 @@ mod tests {
         assert_eq_m256i(r, e);
     }
 
-    #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_max_epi16() {
+    #[simd_test(enable = "avx512bw,avx512vl")]
+    const fn test_mm_mask_max_epi16() {
         let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
         let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
         let r = _mm_mask_max_epi16(a, 0, a, b);
@@ -13618,8 +14679,8 @@ mod tests {
         assert_eq_m128i(r, e);
     }
 
-    #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_max_epi16() {
+    #[simd_test(enable = "avx512bw,avx512vl")]
+    const fn test_mm_maskz_max_epi16() {
         let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
         let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
         let r = _mm_maskz_max_epi16(0, a, b);
@@ -13630,7 +14691,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_max_epi8() {
+    const fn test_mm512_max_epi8() {
         #[rustfmt::skip]
         let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
@@ -13650,8 +14711,8 @@ mod tests {
         assert_eq_m512i(r, e);
     }
 
-    #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_max_epi8() {
+    #[simd_test(enable = "avx512bw")]
+    const fn test_mm512_mask_max_epi8() {
         #[rustfmt::skip]
         let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
@@ -13678,8 +14739,8 @@ mod tests {
         assert_eq_m512i(r, e);
     }
 
-    #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_max_epi8() {
+    #[simd_test(enable = "avx512bw")]
+    const fn test_mm512_maskz_max_epi8() {
         #[rustfmt::skip]
         let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
@@ -13705,8 +14766,8 @@ mod tests {
         assert_eq_m512i(r, e);
     }
 
-    #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_max_epi8() {
+    #[simd_test(enable = "avx512bw,avx512vl")]
+    const fn test_mm256_mask_max_epi8() {
         #[rustfmt::skip]
         let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
@@ -13722,8 +14783,8 @@ mod tests {
         assert_eq_m256i(r, e);
     }
 
-    #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_max_epi8() {
+    #[simd_test(enable = "avx512bw,avx512vl")]
+    const fn test_mm256_maskz_max_epi8() {
         #[rustfmt::skip]
         let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
@@ -13739,8 +14800,8 @@ mod tests {
         assert_eq_m256i(r, e);
     }
 
-    #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_max_epi8() {
+    #[simd_test(enable = "avx512bw,avx512vl")]
+    const fn test_mm_mask_max_epi8() {
         let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
         let r = _mm_mask_max_epi8(a, 0, a, b);
@@ -13750,8 +14811,8 @@ mod tests {
         assert_eq_m128i(r, e);
     }
 
-    #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_max_epi8() {
+    #[simd_test(enable = "avx512bw,avx512vl")]
+    const fn test_mm_maskz_max_epi8() {
         let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
         let r = _mm_maskz_max_epi8(0, a, b);
@@ -13762,7 +14823,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_min_epu16() {
+    const fn test_mm512_min_epu16() {
         #[rustfmt::skip]
         let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
                                  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
@@ -13776,8 +14837,8 @@ mod tests {
         assert_eq_m512i(r, e);
     }
 
-    #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_min_epu16() {
+    #[simd_test(enable = "avx512bw")]
+    const fn test_mm512_mask_min_epu16() {
         #[rustfmt::skip]
         let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
                                  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
@@ -13793,8 +14854,8 @@ mod tests {
         assert_eq_m512i(r, e);
     }
 
-    #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_min_epu16() {
+    #[simd_test(enable = "avx512bw")]
+    const fn test_mm512_maskz_min_epu16() {
         #[rustfmt::skip]
         let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
                                  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
@@ -13810,8 +14871,8 @@ mod tests {
         assert_eq_m512i(r, e);
     }
 
-    #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_min_epu16() {
+    #[simd_test(enable = "avx512bw,avx512vl")]
+    const fn test_mm256_mask_min_epu16() {
         let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
         let r = _mm256_mask_min_epu16(a, 0, a, b);
@@ -13821,8 +14882,8 @@ mod tests {
         assert_eq_m256i(r, e);
     }
 
-    #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_min_epu16() {
+    #[simd_test(enable = "avx512bw,avx512vl")]
+    const fn test_mm256_maskz_min_epu16() {
         let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
         let r = _mm256_maskz_min_epu16(0, a, b);
@@ -13832,8 +14893,8 @@ mod tests {
         assert_eq_m256i(r, e);
     }
 
-    #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_min_epu16() {
+    #[simd_test(enable = "avx512bw,avx512vl")]
+    const fn test_mm_mask_min_epu16() {
         let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
         let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
         let r = _mm_mask_min_epu16(a, 0, a, b);
@@ -13843,8 +14904,8 @@ mod tests {
         assert_eq_m128i(r, e);
     }
 
-    #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_min_epu16() {
+    #[simd_test(enable = "avx512bw,avx512vl")]
+    const fn test_mm_maskz_min_epu16() {
         let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
         let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
         let r = _mm_maskz_min_epu16(0, a, b);
@@ -13855,7 +14916,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_min_epu8() {
+    const fn test_mm512_min_epu8() {
         #[rustfmt::skip]
         let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
@@ -13875,8 +14936,8 @@ mod tests {
         assert_eq_m512i(r, e);
     }
 
-    #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_min_epu8() {
+    #[simd_test(enable = "avx512bw")]
+    const fn test_mm512_mask_min_epu8() {
         #[rustfmt::skip]
         let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
@@ -13903,8 +14964,8 @@ mod tests {
         assert_eq_m512i(r, e);
     }
 
-    #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_min_epu8() {
+    #[simd_test(enable = "avx512bw")]
+    const fn test_mm512_maskz_min_epu8() {
         #[rustfmt::skip]
         let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
@@ -13930,8 +14991,8 @@ mod tests {
         assert_eq_m512i(r, e);
     }
 
-    #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_min_epu8() {
+    #[simd_test(enable = "avx512bw,avx512vl")]
+    const fn test_mm256_mask_min_epu8() {
         #[rustfmt::skip]
         let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
@@ -13947,8 +15008,8 @@ mod tests {
         assert_eq_m256i(r, e);
     }
 
-    #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_min_epu8() {
+    #[simd_test(enable = "avx512bw,avx512vl")]
+    const fn test_mm256_maskz_min_epu8() {
         #[rustfmt::skip]
         let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
@@ -13964,8 +15025,8 @@ mod tests {
         assert_eq_m256i(r, e);
     }
 
-    #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_min_epu8() {
+    #[simd_test(enable = "avx512bw,avx512vl")]
+    const fn test_mm_mask_min_epu8() {
         let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
         let r = _mm_mask_min_epu8(a, 0, a, b);
@@ -13975,8 +15036,8 @@ mod tests {
         assert_eq_m128i(r, e);
     }
 
-    #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_min_epu8() {
+    #[simd_test(enable = "avx512bw,avx512vl")]
+    const fn test_mm_maskz_min_epu8() {
         let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
         let r = _mm_maskz_min_epu8(0, a, b);
@@ -13987,7 +15048,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_min_epi16() {
+    const fn test_mm512_min_epi16() {
         #[rustfmt::skip]
         let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
                                  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
@@ -14001,8 +15062,8 @@ mod tests {
         assert_eq_m512i(r, e);
     }
 
-    #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_min_epi16() {
+    #[simd_test(enable = "avx512bw")]
+    const fn test_mm512_mask_min_epi16() {
         #[rustfmt::skip]
         let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
                                  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
@@ -14018,8 +15079,8 @@ mod tests {
         assert_eq_m512i(r, e);
     }
 
-    #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_min_epi16() {
+    #[simd_test(enable = "avx512bw")]
+    const fn test_mm512_maskz_min_epi16() {
         #[rustfmt::skip]
         let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
                                  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
@@ -14035,8 +15096,8 @@ mod tests {
         assert_eq_m512i(r, e);
     }
 
-    #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_min_epi16() {
+    #[simd_test(enable = "avx512bw,avx512vl")]
+    const fn test_mm256_mask_min_epi16() {
         let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
         let r = _mm256_mask_min_epi16(a, 0, a, b);
@@ -14046,8 +15107,8 @@ mod tests {
         assert_eq_m256i(r, e);
     }
 
-    #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_min_epi16() {
+    #[simd_test(enable = "avx512bw,avx512vl")]
+    const fn test_mm256_maskz_min_epi16() {
         let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
         let r = _mm256_maskz_min_epi16(0, a, b);
@@ -14057,8 +15118,8 @@ mod tests {
         assert_eq_m256i(r, e);
     }
 
-    #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_min_epi16() {
+    #[simd_test(enable = "avx512bw,avx512vl")]
+    const fn test_mm_mask_min_epi16() {
         let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
         let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
         let r = _mm_mask_min_epi16(a, 0, a, b);
@@ -14068,8 +15129,8 @@ mod tests {
         assert_eq_m128i(r, e);
     }
 
-    #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_min_epi16() {
+    #[simd_test(enable = "avx512bw,avx512vl")]
+    const fn test_mm_maskz_min_epi16() {
         let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
         let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
         let r = _mm_maskz_min_epi16(0, a, b);
@@ -14080,7 +15141,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_min_epi8() {
+    const fn test_mm512_min_epi8() {
         #[rustfmt::skip]
         let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
@@ -14100,8 +15161,8 @@ mod tests {
         assert_eq_m512i(r, e);
     }
 
-    #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_min_epi8() {
+    #[simd_test(enable = "avx512bw")]
+    const fn test_mm512_mask_min_epi8() {
         #[rustfmt::skip]
         let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
@@ -14128,8 +15189,8 @@ mod tests {
         assert_eq_m512i(r, e);
     }
 
-    #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_min_epi8() {
+    #[simd_test(enable = "avx512bw")]
+    const fn test_mm512_maskz_min_epi8() {
         #[rustfmt::skip]
         let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
@@ -14155,8 +15216,8 @@ mod tests {
         assert_eq_m512i(r, e);
     }
 
-    #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_min_epi8() {
+    #[simd_test(enable = "avx512bw,avx512vl")]
+    const fn test_mm256_mask_min_epi8() {
         #[rustfmt::skip]
         let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
@@ -14172,8 +15233,8 @@ mod tests {
         assert_eq_m256i(r, e);
     }
 
-    #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_min_epi8() {
+    #[simd_test(enable = "avx512bw,avx512vl")]
+    const fn test_mm256_maskz_min_epi8() {
         #[rustfmt::skip]
         let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
                                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
@@ -14189,8 +15250,8 @@ mod tests {
         assert_eq_m256i(r, e);
     }
 
-    #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_min_epi8() {
+    #[simd_test(enable = "avx512bw,avx512vl")]
+    const fn test_mm_mask_min_epi8() {
         let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
         let r = _mm_mask_min_epi8(a, 0, a, b);
@@ -14200,8 +15261,8 @@ mod tests {
         assert_eq_m128i(r, e);
     }
 
-    #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_min_epi8() {
+    #[simd_test(enable = "avx512bw,avx512vl")]
+    const fn test_mm_maskz_min_epi8() {
         let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
         let r = _mm_maskz_min_epi8(0, a, b);
@@ -14212,7 +15273,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_cmplt_epu16_mask() {
+    const fn test_mm512_cmplt_epu16_mask() {
         let a = _mm512_set1_epi16(-2);
         let b = _mm512_set1_epi16(-1);
         let m = _mm512_cmplt_epu16_mask(a, b);
@@ -14220,7 +15281,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mask_cmplt_epu16_mask() {
+    const fn test_mm512_mask_cmplt_epu16_mask() {
         let a = _mm512_set1_epi16(-2);
         let b = _mm512_set1_epi16(-1);
         let mask = 0b01010101_01010101_01010101_01010101;
@@ -14229,7 +15290,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_cmplt_epu16_mask() {
+    const fn test_mm256_cmplt_epu16_mask() {
         let a = _mm256_set1_epi16(-2);
         let b = _mm256_set1_epi16(-1);
         let m = _mm256_cmplt_epu16_mask(a, b);
@@ -14237,7 +15298,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_cmplt_epu16_mask() {
+    const fn test_mm256_mask_cmplt_epu16_mask() {
         let a = _mm256_set1_epi16(-2);
         let b = _mm256_set1_epi16(-1);
         let mask = 0b01010101_01010101;
@@ -14246,7 +15307,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_cmplt_epu16_mask() {
+    const fn test_mm_cmplt_epu16_mask() {
         let a = _mm_set1_epi16(-2);
         let b = _mm_set1_epi16(-1);
         let m = _mm_cmplt_epu16_mask(a, b);
@@ -14254,7 +15315,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_cmplt_epu16_mask() {
+    const fn test_mm_mask_cmplt_epu16_mask() {
         let a = _mm_set1_epi16(-2);
         let b = _mm_set1_epi16(-1);
         let mask = 0b01010101;
@@ -14263,7 +15324,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_cmplt_epu8_mask() {
+    const fn test_mm512_cmplt_epu8_mask() {
         let a = _mm512_set1_epi8(-2);
         let b = _mm512_set1_epi8(-1);
         let m = _mm512_cmplt_epu8_mask(a, b);
@@ -14274,7 +15335,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mask_cmplt_epu8_mask() {
+    const fn test_mm512_mask_cmplt_epu8_mask() {
         let a = _mm512_set1_epi8(-2);
         let b = _mm512_set1_epi8(-1);
         let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
@@ -14286,7 +15347,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_cmplt_epu8_mask() {
+    const fn test_mm256_cmplt_epu8_mask() {
         let a = _mm256_set1_epi8(-2);
         let b = _mm256_set1_epi8(-1);
         let m = _mm256_cmplt_epu8_mask(a, b);
@@ -14294,7 +15355,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_cmplt_epu8_mask() {
+    const fn test_mm256_mask_cmplt_epu8_mask() {
         let a = _mm256_set1_epi8(-2);
         let b = _mm256_set1_epi8(-1);
         let mask = 0b01010101_01010101_01010101_01010101;
@@ -14303,7 +15364,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_cmplt_epu8_mask() {
+    const fn test_mm_cmplt_epu8_mask() {
         let a = _mm_set1_epi8(-2);
         let b = _mm_set1_epi8(-1);
         let m = _mm_cmplt_epu8_mask(a, b);
@@ -14311,7 +15372,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_cmplt_epu8_mask() {
+    const fn test_mm_mask_cmplt_epu8_mask() {
         let a = _mm_set1_epi8(-2);
         let b = _mm_set1_epi8(-1);
         let mask = 0b01010101_01010101;
@@ -14320,7 +15381,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_cmplt_epi16_mask() {
+    const fn test_mm512_cmplt_epi16_mask() {
         let a = _mm512_set1_epi16(-2);
         let b = _mm512_set1_epi16(-1);
         let m = _mm512_cmplt_epi16_mask(a, b);
@@ -14328,7 +15389,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mask_cmplt_epi16_mask() {
+    const fn test_mm512_mask_cmplt_epi16_mask() {
         let a = _mm512_set1_epi16(-2);
         let b = _mm512_set1_epi16(-1);
         let mask = 0b01010101_01010101_01010101_01010101;
@@ -14337,7 +15398,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_cmplt_epi16_mask() {
+    const fn test_mm256_cmplt_epi16_mask() {
         let a = _mm256_set1_epi16(-2);
         let b = _mm256_set1_epi16(-1);
         let m = _mm256_cmplt_epi16_mask(a, b);
@@ -14345,7 +15406,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_cmplt_epi16_mask() {
+    const fn test_mm256_mask_cmplt_epi16_mask() {
         let a = _mm256_set1_epi16(-2);
         let b = _mm256_set1_epi16(-1);
         let mask = 0b01010101_01010101;
@@ -14354,7 +15415,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_cmplt_epi16_mask() {
+    const fn test_mm_cmplt_epi16_mask() {
         let a = _mm_set1_epi16(-2);
         let b = _mm_set1_epi16(-1);
         let m = _mm_cmplt_epi16_mask(a, b);
@@ -14362,7 +15423,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_cmplt_epi16_mask() {
+    const fn test_mm_mask_cmplt_epi16_mask() {
         let a = _mm_set1_epi16(-2);
         let b = _mm_set1_epi16(-1);
         let mask = 0b01010101;
@@ -14371,7 +15432,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_cmplt_epi8_mask() {
+    const fn test_mm512_cmplt_epi8_mask() {
         let a = _mm512_set1_epi8(-2);
         let b = _mm512_set1_epi8(-1);
         let m = _mm512_cmplt_epi8_mask(a, b);
@@ -14382,7 +15443,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mask_cmplt_epi8_mask() {
+    const fn test_mm512_mask_cmplt_epi8_mask() {
         let a = _mm512_set1_epi8(-2);
         let b = _mm512_set1_epi8(-1);
         let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
@@ -14394,7 +15455,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_cmplt_epi8_mask() {
+    const fn test_mm256_cmplt_epi8_mask() {
         let a = _mm256_set1_epi8(-2);
         let b = _mm256_set1_epi8(-1);
         let m = _mm256_cmplt_epi8_mask(a, b);
@@ -14402,7 +15463,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_cmplt_epi8_mask() {
+    const fn test_mm256_mask_cmplt_epi8_mask() {
         let a = _mm256_set1_epi8(-2);
         let b = _mm256_set1_epi8(-1);
         let mask = 0b01010101_01010101_01010101_01010101;
@@ -14411,7 +15472,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_cmplt_epi8_mask() {
+    const fn test_mm_cmplt_epi8_mask() {
         let a = _mm_set1_epi8(-2);
         let b = _mm_set1_epi8(-1);
         let m = _mm_cmplt_epi8_mask(a, b);
@@ -14419,7 +15480,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_cmplt_epi8_mask() {
+    const fn test_mm_mask_cmplt_epi8_mask() {
         let a = _mm_set1_epi8(-2);
         let b = _mm_set1_epi8(-1);
         let mask = 0b01010101_01010101;
@@ -14428,7 +15489,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_cmpgt_epu16_mask() {
+    const fn test_mm512_cmpgt_epu16_mask() {
         let a = _mm512_set1_epi16(2);
         let b = _mm512_set1_epi16(1);
         let m = _mm512_cmpgt_epu16_mask(a, b);
@@ -14436,7 +15497,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mask_cmpgt_epu16_mask() {
+    const fn test_mm512_mask_cmpgt_epu16_mask() {
         let a = _mm512_set1_epi16(2);
         let b = _mm512_set1_epi16(1);
         let mask = 0b01010101_01010101_01010101_01010101;
@@ -14445,7 +15506,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_cmpgt_epu16_mask() {
+    const fn test_mm256_cmpgt_epu16_mask() {
         let a = _mm256_set1_epi16(2);
         let b = _mm256_set1_epi16(1);
         let m = _mm256_cmpgt_epu16_mask(a, b);
@@ -14453,7 +15514,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_cmpgt_epu16_mask() {
+    const fn test_mm256_mask_cmpgt_epu16_mask() {
         let a = _mm256_set1_epi16(2);
         let b = _mm256_set1_epi16(1);
         let mask = 0b01010101_01010101;
@@ -14462,7 +15523,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_cmpgt_epu16_mask() {
+    const fn test_mm_cmpgt_epu16_mask() {
         let a = _mm_set1_epi16(2);
         let b = _mm_set1_epi16(1);
         let m = _mm_cmpgt_epu16_mask(a, b);
@@ -14470,7 +15531,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_cmpgt_epu16_mask() {
+    const fn test_mm_mask_cmpgt_epu16_mask() {
         let a = _mm_set1_epi16(2);
         let b = _mm_set1_epi16(1);
         let mask = 0b01010101;
@@ -14479,7 +15540,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_cmpgt_epu8_mask() {
+    const fn test_mm512_cmpgt_epu8_mask() {
         let a = _mm512_set1_epi8(2);
         let b = _mm512_set1_epi8(1);
         let m = _mm512_cmpgt_epu8_mask(a, b);
@@ -14490,7 +15551,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mask_cmpgt_epu8_mask() {
+    const fn test_mm512_mask_cmpgt_epu8_mask() {
         let a = _mm512_set1_epi8(2);
         let b = _mm512_set1_epi8(1);
         let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
@@ -14502,7 +15563,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_cmpgt_epu8_mask() {
+    const fn test_mm256_cmpgt_epu8_mask() {
         let a = _mm256_set1_epi8(2);
         let b = _mm256_set1_epi8(1);
         let m = _mm256_cmpgt_epu8_mask(a, b);
@@ -14510,7 +15571,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_cmpgt_epu8_mask() {
+    const fn test_mm256_mask_cmpgt_epu8_mask() {
         let a = _mm256_set1_epi8(2);
         let b = _mm256_set1_epi8(1);
         let mask = 0b01010101_01010101_01010101_01010101;
@@ -14519,7 +15580,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_cmpgt_epu8_mask() {
+    const fn test_mm_cmpgt_epu8_mask() {
         let a = _mm_set1_epi8(2);
         let b = _mm_set1_epi8(1);
         let m = _mm_cmpgt_epu8_mask(a, b);
@@ -14527,7 +15588,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_cmpgt_epu8_mask() {
+    const fn test_mm_mask_cmpgt_epu8_mask() {
         let a = _mm_set1_epi8(2);
         let b = _mm_set1_epi8(1);
         let mask = 0b01010101_01010101;
@@ -14536,7 +15597,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_cmpgt_epi16_mask() {
+    const fn test_mm512_cmpgt_epi16_mask() {
         let a = _mm512_set1_epi16(2);
         let b = _mm512_set1_epi16(-1);
         let m = _mm512_cmpgt_epi16_mask(a, b);
@@ -14544,7 +15605,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mask_cmpgt_epi16_mask() {
+    const fn test_mm512_mask_cmpgt_epi16_mask() {
         let a = _mm512_set1_epi16(2);
         let b = _mm512_set1_epi16(-1);
         let mask = 0b01010101_01010101_01010101_01010101;
@@ -14553,7 +15614,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_cmpgt_epi16_mask() {
+    const fn test_mm256_cmpgt_epi16_mask() {
         let a = _mm256_set1_epi16(2);
         let b = _mm256_set1_epi16(-1);
         let m = _mm256_cmpgt_epi16_mask(a, b);
@@ -14561,7 +15622,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_cmpgt_epi16_mask() {
+    const fn test_mm256_mask_cmpgt_epi16_mask() {
         let a = _mm256_set1_epi16(2);
         let b = _mm256_set1_epi16(-1);
         let mask = 0b001010101_01010101;
@@ -14570,7 +15631,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_cmpgt_epi16_mask() {
+    const fn test_mm_cmpgt_epi16_mask() {
         let a = _mm_set1_epi16(2);
         let b = _mm_set1_epi16(-1);
         let m = _mm_cmpgt_epi16_mask(a, b);
@@ -14578,7 +15639,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_cmpgt_epi16_mask() {
+    const fn test_mm_mask_cmpgt_epi16_mask() {
         let a = _mm_set1_epi16(2);
         let b = _mm_set1_epi16(-1);
         let mask = 0b01010101;
@@ -14587,7 +15648,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_cmpgt_epi8_mask() {
+    const fn test_mm512_cmpgt_epi8_mask() {
         let a = _mm512_set1_epi8(2);
         let b = _mm512_set1_epi8(-1);
         let m = _mm512_cmpgt_epi8_mask(a, b);
@@ -14598,7 +15659,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mask_cmpgt_epi8_mask() {
+    const fn test_mm512_mask_cmpgt_epi8_mask() {
         let a = _mm512_set1_epi8(2);
         let b = _mm512_set1_epi8(-1);
         let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
@@ -14610,7 +15671,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_cmpgt_epi8_mask() {
+    const fn test_mm256_cmpgt_epi8_mask() {
         let a = _mm256_set1_epi8(2);
         let b = _mm256_set1_epi8(-1);
         let m = _mm256_cmpgt_epi8_mask(a, b);
@@ -14618,7 +15679,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_cmpgt_epi8_mask() {
+    const fn test_mm256_mask_cmpgt_epi8_mask() {
         let a = _mm256_set1_epi8(2);
         let b = _mm256_set1_epi8(-1);
         let mask = 0b01010101_01010101_01010101_01010101;
@@ -14627,7 +15688,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_cmpgt_epi8_mask() {
+    const fn test_mm_cmpgt_epi8_mask() {
         let a = _mm_set1_epi8(2);
         let b = _mm_set1_epi8(-1);
         let m = _mm_cmpgt_epi8_mask(a, b);
@@ -14635,7 +15696,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_cmpgt_epi8_mask() {
+    const fn test_mm_mask_cmpgt_epi8_mask() {
         let a = _mm_set1_epi8(2);
         let b = _mm_set1_epi8(-1);
         let mask = 0b01010101_01010101;
@@ -14644,7 +15705,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_cmple_epu16_mask() {
+    const fn test_mm512_cmple_epu16_mask() {
         let a = _mm512_set1_epi16(-1);
         let b = _mm512_set1_epi16(-1);
         let m = _mm512_cmple_epu16_mask(a, b);
@@ -14652,7 +15713,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mask_cmple_epu16_mask() {
+    const fn test_mm512_mask_cmple_epu16_mask() {
         let a = _mm512_set1_epi16(-1);
         let b = _mm512_set1_epi16(-1);
         let mask = 0b01010101_01010101_01010101_01010101;
@@ -14661,7 +15722,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_cmple_epu16_mask() {
+    const fn test_mm256_cmple_epu16_mask() {
         let a = _mm256_set1_epi16(-1);
         let b = _mm256_set1_epi16(-1);
         let m = _mm256_cmple_epu16_mask(a, b);
@@ -14669,7 +15730,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_cmple_epu16_mask() {
+    const fn test_mm256_mask_cmple_epu16_mask() {
         let a = _mm256_set1_epi16(-1);
         let b = _mm256_set1_epi16(-1);
         let mask = 0b01010101_01010101;
@@ -14678,7 +15739,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_cmple_epu16_mask() {
+    const fn test_mm_cmple_epu16_mask() {
         let a = _mm_set1_epi16(-1);
         let b = _mm_set1_epi16(-1);
         let m = _mm_cmple_epu16_mask(a, b);
@@ -14686,7 +15747,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_cmple_epu16_mask() {
+    const fn test_mm_mask_cmple_epu16_mask() {
         let a = _mm_set1_epi16(-1);
         let b = _mm_set1_epi16(-1);
         let mask = 0b01010101;
@@ -14695,7 +15756,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_cmple_epu8_mask() {
+    const fn test_mm512_cmple_epu8_mask() {
         let a = _mm512_set1_epi8(-1);
         let b = _mm512_set1_epi8(-1);
         let m = _mm512_cmple_epu8_mask(a, b);
@@ -14706,7 +15767,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mask_cmple_epu8_mask() {
+    const fn test_mm512_mask_cmple_epu8_mask() {
         let a = _mm512_set1_epi8(-1);
         let b = _mm512_set1_epi8(-1);
         let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
@@ -14718,7 +15779,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_cmple_epu8_mask() {
+    const fn test_mm256_cmple_epu8_mask() {
         let a = _mm256_set1_epi8(-1);
         let b = _mm256_set1_epi8(-1);
         let m = _mm256_cmple_epu8_mask(a, b);
@@ -14726,7 +15787,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_cmple_epu8_mask() {
+    const fn test_mm256_mask_cmple_epu8_mask() {
         let a = _mm256_set1_epi8(-1);
         let b = _mm256_set1_epi8(-1);
         let mask = 0b01010101_01010101_01010101_01010101;
@@ -14735,7 +15796,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_cmple_epu8_mask() {
+    const fn test_mm_cmple_epu8_mask() {
         let a = _mm_set1_epi8(-1);
         let b = _mm_set1_epi8(-1);
         let m = _mm_cmple_epu8_mask(a, b);
@@ -14743,7 +15804,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_cmple_epu8_mask() {
+    const fn test_mm_mask_cmple_epu8_mask() {
         let a = _mm_set1_epi8(-1);
         let b = _mm_set1_epi8(-1);
         let mask = 0b01010101_01010101;
@@ -14752,7 +15813,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_cmple_epi16_mask() {
+    const fn test_mm512_cmple_epi16_mask() {
         let a = _mm512_set1_epi16(-1);
         let b = _mm512_set1_epi16(-1);
         let m = _mm512_cmple_epi16_mask(a, b);
@@ -14760,7 +15821,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mask_cmple_epi16_mask() {
+    const fn test_mm512_mask_cmple_epi16_mask() {
         let a = _mm512_set1_epi16(-1);
         let b = _mm512_set1_epi16(-1);
         let mask = 0b01010101_01010101_01010101_01010101;
@@ -14769,7 +15830,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_cmple_epi16_mask() {
+    const fn test_mm256_cmple_epi16_mask() {
         let a = _mm256_set1_epi16(-1);
         let b = _mm256_set1_epi16(-1);
         let m = _mm256_cmple_epi16_mask(a, b);
@@ -14777,7 +15838,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_cmple_epi16_mask() {
+    const fn test_mm256_mask_cmple_epi16_mask() {
         let a = _mm256_set1_epi16(-1);
         let b = _mm256_set1_epi16(-1);
         let mask = 0b01010101_01010101;
@@ -14786,7 +15847,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_cmple_epi16_mask() {
+    const fn test_mm_cmple_epi16_mask() {
         let a = _mm_set1_epi16(-1);
         let b = _mm_set1_epi16(-1);
         let m = _mm_cmple_epi16_mask(a, b);
@@ -14794,7 +15855,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_cmple_epi16_mask() {
+    const fn test_mm_mask_cmple_epi16_mask() {
         let a = _mm_set1_epi16(-1);
         let b = _mm_set1_epi16(-1);
         let mask = 0b01010101;
@@ -14803,7 +15864,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_cmple_epi8_mask() {
+    const fn test_mm512_cmple_epi8_mask() {
         let a = _mm512_set1_epi8(-1);
         let b = _mm512_set1_epi8(-1);
         let m = _mm512_cmple_epi8_mask(a, b);
@@ -14814,7 +15875,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mask_cmple_epi8_mask() {
+    const fn test_mm512_mask_cmple_epi8_mask() {
         let a = _mm512_set1_epi8(-1);
         let b = _mm512_set1_epi8(-1);
         let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
@@ -14826,7 +15887,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_cmple_epi8_mask() {
+    const fn test_mm256_cmple_epi8_mask() {
         let a = _mm256_set1_epi8(-1);
         let b = _mm256_set1_epi8(-1);
         let m = _mm256_cmple_epi8_mask(a, b);
@@ -14834,7 +15895,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_cmple_epi8_mask() {
+    const fn test_mm256_mask_cmple_epi8_mask() {
         let a = _mm256_set1_epi8(-1);
         let b = _mm256_set1_epi8(-1);
         let mask = 0b01010101_01010101_01010101_01010101;
@@ -14843,7 +15904,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_cmple_epi8_mask() {
+    const fn test_mm_cmple_epi8_mask() {
         let a = _mm_set1_epi8(-1);
         let b = _mm_set1_epi8(-1);
         let m = _mm_cmple_epi8_mask(a, b);
@@ -14851,7 +15912,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_cmple_epi8_mask() {
+    const fn test_mm_mask_cmple_epi8_mask() {
         let a = _mm_set1_epi8(-1);
         let b = _mm_set1_epi8(-1);
         let mask = 0b01010101_01010101;
@@ -14860,7 +15921,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_cmpge_epu16_mask() {
+    const fn test_mm512_cmpge_epu16_mask() {
         let a = _mm512_set1_epi16(1);
         let b = _mm512_set1_epi16(1);
         let m = _mm512_cmpge_epu16_mask(a, b);
@@ -14868,7 +15929,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mask_cmpge_epu16_mask() {
+    const fn test_mm512_mask_cmpge_epu16_mask() {
         let a = _mm512_set1_epi16(1);
         let b = _mm512_set1_epi16(1);
         let mask = 0b01010101_01010101_01010101_01010101;
@@ -14877,7 +15938,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_cmpge_epu16_mask() {
+    const fn test_mm256_cmpge_epu16_mask() {
         let a = _mm256_set1_epi16(1);
         let b = _mm256_set1_epi16(1);
         let m = _mm256_cmpge_epu16_mask(a, b);
@@ -14885,7 +15946,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_cmpge_epu16_mask() {
+    const fn test_mm256_mask_cmpge_epu16_mask() {
         let a = _mm256_set1_epi16(1);
         let b = _mm256_set1_epi16(1);
         let mask = 0b01010101_01010101;
@@ -14894,7 +15955,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_cmpge_epu16_mask() {
+    const fn test_mm_cmpge_epu16_mask() {
         let a = _mm_set1_epi16(1);
         let b = _mm_set1_epi16(1);
         let m = _mm_cmpge_epu16_mask(a, b);
@@ -14902,7 +15963,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_cmpge_epu16_mask() {
+    const fn test_mm_mask_cmpge_epu16_mask() {
         let a = _mm_set1_epi16(1);
         let b = _mm_set1_epi16(1);
         let mask = 0b01010101;
@@ -14911,7 +15972,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_cmpge_epu8_mask() {
+    const fn test_mm512_cmpge_epu8_mask() {
         let a = _mm512_set1_epi8(1);
         let b = _mm512_set1_epi8(1);
         let m = _mm512_cmpge_epu8_mask(a, b);
@@ -14922,7 +15983,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mask_cmpge_epu8_mask() {
+    const fn test_mm512_mask_cmpge_epu8_mask() {
         let a = _mm512_set1_epi8(1);
         let b = _mm512_set1_epi8(1);
         let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
@@ -14934,7 +15995,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_cmpge_epu8_mask() {
+    const fn test_mm256_cmpge_epu8_mask() {
         let a = _mm256_set1_epi8(1);
         let b = _mm256_set1_epi8(1);
         let m = _mm256_cmpge_epu8_mask(a, b);
@@ -14942,7 +16003,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_cmpge_epu8_mask() {
+    const fn test_mm256_mask_cmpge_epu8_mask() {
         let a = _mm256_set1_epi8(1);
         let b = _mm256_set1_epi8(1);
         let mask = 0b01010101_01010101_01010101_01010101;
@@ -14951,7 +16012,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_cmpge_epu8_mask() {
+    const fn test_mm_cmpge_epu8_mask() {
         let a = _mm_set1_epi8(1);
         let b = _mm_set1_epi8(1);
         let m = _mm_cmpge_epu8_mask(a, b);
@@ -14959,7 +16020,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_cmpge_epu8_mask() {
+    const fn test_mm_mask_cmpge_epu8_mask() {
         let a = _mm_set1_epi8(1);
         let b = _mm_set1_epi8(1);
         let mask = 0b01010101_01010101;
@@ -14968,7 +16029,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_cmpge_epi16_mask() {
+    const fn test_mm512_cmpge_epi16_mask() {
         let a = _mm512_set1_epi16(-1);
         let b = _mm512_set1_epi16(-1);
         let m = _mm512_cmpge_epi16_mask(a, b);
@@ -14976,7 +16037,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mask_cmpge_epi16_mask() {
+    const fn test_mm512_mask_cmpge_epi16_mask() {
         let a = _mm512_set1_epi16(-1);
         let b = _mm512_set1_epi16(-1);
         let mask = 0b01010101_01010101_01010101_01010101;
@@ -14985,7 +16046,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_cmpge_epi16_mask() {
+    const fn test_mm256_cmpge_epi16_mask() {
         let a = _mm256_set1_epi16(-1);
         let b = _mm256_set1_epi16(-1);
         let m = _mm256_cmpge_epi16_mask(a, b);
@@ -14993,7 +16054,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_cmpge_epi16_mask() {
+    const fn test_mm256_mask_cmpge_epi16_mask() {
         let a = _mm256_set1_epi16(-1);
         let b = _mm256_set1_epi16(-1);
         let mask = 0b01010101_01010101;
@@ -15002,7 +16063,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_cmpge_epi16_mask() {
+    const fn test_mm_cmpge_epi16_mask() {
         let a = _mm_set1_epi16(-1);
         let b = _mm_set1_epi16(-1);
         let m = _mm_cmpge_epi16_mask(a, b);
@@ -15010,7 +16071,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_cmpge_epi16_mask() {
+    const fn test_mm_mask_cmpge_epi16_mask() {
         let a = _mm_set1_epi16(-1);
         let b = _mm_set1_epi16(-1);
         let mask = 0b01010101;
@@ -15019,7 +16080,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_cmpge_epi8_mask() {
+    const fn test_mm512_cmpge_epi8_mask() {
         let a = _mm512_set1_epi8(-1);
         let b = _mm512_set1_epi8(-1);
         let m = _mm512_cmpge_epi8_mask(a, b);
@@ -15030,7 +16091,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mask_cmpge_epi8_mask() {
+    const fn test_mm512_mask_cmpge_epi8_mask() {
         let a = _mm512_set1_epi8(-1);
         let b = _mm512_set1_epi8(-1);
         let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
@@ -15042,7 +16103,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_cmpge_epi8_mask() {
+    const fn test_mm256_cmpge_epi8_mask() {
         let a = _mm256_set1_epi8(-1);
         let b = _mm256_set1_epi8(-1);
         let m = _mm256_cmpge_epi8_mask(a, b);
@@ -15050,7 +16111,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_cmpge_epi8_mask() {
+    const fn test_mm256_mask_cmpge_epi8_mask() {
         let a = _mm256_set1_epi8(-1);
         let b = _mm256_set1_epi8(-1);
         let mask = 0b01010101_01010101_01010101_01010101;
@@ -15059,7 +16120,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_cmpge_epi8_mask() {
+    const fn test_mm_cmpge_epi8_mask() {
         let a = _mm_set1_epi8(-1);
         let b = _mm_set1_epi8(-1);
         let m = _mm_cmpge_epi8_mask(a, b);
@@ -15067,7 +16128,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_cmpge_epi8_mask() {
+    const fn test_mm_mask_cmpge_epi8_mask() {
         let a = _mm_set1_epi8(-1);
         let b = _mm_set1_epi8(-1);
         let mask = 0b01010101_01010101;
@@ -15076,7 +16137,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_cmpeq_epu16_mask() {
+    const fn test_mm512_cmpeq_epu16_mask() {
         let a = _mm512_set1_epi16(1);
         let b = _mm512_set1_epi16(1);
         let m = _mm512_cmpeq_epu16_mask(a, b);
@@ -15084,7 +16145,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mask_cmpeq_epu16_mask() {
+    const fn test_mm512_mask_cmpeq_epu16_mask() {
         let a = _mm512_set1_epi16(1);
         let b = _mm512_set1_epi16(1);
         let mask = 0b01010101_01010101_01010101_01010101;
@@ -15093,7 +16154,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_cmpeq_epu16_mask() {
+    const fn test_mm256_cmpeq_epu16_mask() {
         let a = _mm256_set1_epi16(1);
         let b = _mm256_set1_epi16(1);
         let m = _mm256_cmpeq_epu16_mask(a, b);
@@ -15101,7 +16162,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_cmpeq_epu16_mask() {
+    const fn test_mm256_mask_cmpeq_epu16_mask() {
         let a = _mm256_set1_epi16(1);
         let b = _mm256_set1_epi16(1);
         let mask = 0b01010101_01010101;
@@ -15110,7 +16171,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_cmpeq_epu16_mask() {
+    const fn test_mm_cmpeq_epu16_mask() {
         let a = _mm_set1_epi16(1);
         let b = _mm_set1_epi16(1);
         let m = _mm_cmpeq_epu16_mask(a, b);
@@ -15118,7 +16179,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_cmpeq_epu16_mask() {
+    const fn test_mm_mask_cmpeq_epu16_mask() {
         let a = _mm_set1_epi16(1);
         let b = _mm_set1_epi16(1);
         let mask = 0b01010101;
@@ -15127,7 +16188,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_cmpeq_epu8_mask() {
+    const fn test_mm512_cmpeq_epu8_mask() {
         let a = _mm512_set1_epi8(1);
         let b = _mm512_set1_epi8(1);
         let m = _mm512_cmpeq_epu8_mask(a, b);
@@ -15138,7 +16199,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mask_cmpeq_epu8_mask() {
+    const fn test_mm512_mask_cmpeq_epu8_mask() {
         let a = _mm512_set1_epi8(1);
         let b = _mm512_set1_epi8(1);
         let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
@@ -15150,7 +16211,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_cmpeq_epu8_mask() {
+    const fn test_mm256_cmpeq_epu8_mask() {
         let a = _mm256_set1_epi8(1);
         let b = _mm256_set1_epi8(1);
         let m = _mm256_cmpeq_epu8_mask(a, b);
@@ -15158,7 +16219,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_cmpeq_epu8_mask() {
+    const fn test_mm256_mask_cmpeq_epu8_mask() {
         let a = _mm256_set1_epi8(1);
         let b = _mm256_set1_epi8(1);
         let mask = 0b01010101_01010101_01010101_01010101;
@@ -15167,7 +16228,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_cmpeq_epu8_mask() {
+    const fn test_mm_cmpeq_epu8_mask() {
         let a = _mm_set1_epi8(1);
         let b = _mm_set1_epi8(1);
         let m = _mm_cmpeq_epu8_mask(a, b);
@@ -15175,7 +16236,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_cmpeq_epu8_mask() {
+    const fn test_mm_mask_cmpeq_epu8_mask() {
         let a = _mm_set1_epi8(1);
         let b = _mm_set1_epi8(1);
         let mask = 0b01010101_01010101;
@@ -15184,7 +16245,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_cmpeq_epi16_mask() {
+    const fn test_mm512_cmpeq_epi16_mask() {
         let a = _mm512_set1_epi16(-1);
         let b = _mm512_set1_epi16(-1);
         let m = _mm512_cmpeq_epi16_mask(a, b);
@@ -15192,7 +16253,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mask_cmpeq_epi16_mask() {
+    const fn test_mm512_mask_cmpeq_epi16_mask() {
         let a = _mm512_set1_epi16(-1);
         let b = _mm512_set1_epi16(-1);
         let mask = 0b01010101_01010101_01010101_01010101;
@@ -15201,7 +16262,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_cmpeq_epi16_mask() {
+    const fn test_mm256_cmpeq_epi16_mask() {
         let a = _mm256_set1_epi16(-1);
         let b = _mm256_set1_epi16(-1);
         let m = _mm256_cmpeq_epi16_mask(a, b);
@@ -15209,7 +16270,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_cmpeq_epi16_mask() {
+    const fn test_mm256_mask_cmpeq_epi16_mask() {
         let a = _mm256_set1_epi16(-1);
         let b = _mm256_set1_epi16(-1);
         let mask = 0b01010101_01010101;
@@ -15218,7 +16279,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_cmpeq_epi16_mask() {
+    const fn test_mm_cmpeq_epi16_mask() {
         let a = _mm_set1_epi16(-1);
         let b = _mm_set1_epi16(-1);
         let m = _mm_cmpeq_epi16_mask(a, b);
@@ -15226,7 +16287,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_cmpeq_epi16_mask() {
+    const fn test_mm_mask_cmpeq_epi16_mask() {
         let a = _mm_set1_epi16(-1);
         let b = _mm_set1_epi16(-1);
         let mask = 0b01010101;
@@ -15235,7 +16296,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_cmpeq_epi8_mask() {
+    const fn test_mm512_cmpeq_epi8_mask() {
         let a = _mm512_set1_epi8(-1);
         let b = _mm512_set1_epi8(-1);
         let m = _mm512_cmpeq_epi8_mask(a, b);
@@ -15246,7 +16307,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mask_cmpeq_epi8_mask() {
+    const fn test_mm512_mask_cmpeq_epi8_mask() {
         let a = _mm512_set1_epi8(-1);
         let b = _mm512_set1_epi8(-1);
         let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
@@ -15258,7 +16319,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_cmpeq_epi8_mask() {
+    const fn test_mm256_cmpeq_epi8_mask() {
         let a = _mm256_set1_epi8(-1);
         let b = _mm256_set1_epi8(-1);
         let m = _mm256_cmpeq_epi8_mask(a, b);
@@ -15266,7 +16327,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_cmpeq_epi8_mask() {
+    const fn test_mm256_mask_cmpeq_epi8_mask() {
         let a = _mm256_set1_epi8(-1);
         let b = _mm256_set1_epi8(-1);
         let mask = 0b01010101_01010101_01010101_01010101;
@@ -15275,7 +16336,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_cmpeq_epi8_mask() {
+    const fn test_mm_cmpeq_epi8_mask() {
         let a = _mm_set1_epi8(-1);
         let b = _mm_set1_epi8(-1);
         let m = _mm_cmpeq_epi8_mask(a, b);
@@ -15283,7 +16344,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_cmpeq_epi8_mask() {
+    const fn test_mm_mask_cmpeq_epi8_mask() {
         let a = _mm_set1_epi8(-1);
         let b = _mm_set1_epi8(-1);
         let mask = 0b01010101_01010101;
@@ -15292,7 +16353,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_cmpneq_epu16_mask() {
+    const fn test_mm512_cmpneq_epu16_mask() {
         let a = _mm512_set1_epi16(2);
         let b = _mm512_set1_epi16(1);
         let m = _mm512_cmpneq_epu16_mask(a, b);
@@ -15300,7 +16361,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mask_cmpneq_epu16_mask() {
+    const fn test_mm512_mask_cmpneq_epu16_mask() {
         let a = _mm512_set1_epi16(2);
         let b = _mm512_set1_epi16(1);
         let mask = 0b01010101_01010101_01010101_01010101;
@@ -15309,7 +16370,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_cmpneq_epu16_mask() {
+    const fn test_mm256_cmpneq_epu16_mask() {
         let a = _mm256_set1_epi16(2);
         let b = _mm256_set1_epi16(1);
         let m = _mm256_cmpneq_epu16_mask(a, b);
@@ -15317,7 +16378,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_cmpneq_epu16_mask() {
+    const fn test_mm256_mask_cmpneq_epu16_mask() {
         let a = _mm256_set1_epi16(2);
         let b = _mm256_set1_epi16(1);
         let mask = 0b01010101_01010101;
@@ -15326,7 +16387,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_cmpneq_epu16_mask() {
+    const fn test_mm_cmpneq_epu16_mask() {
         let a = _mm_set1_epi16(2);
         let b = _mm_set1_epi16(1);
         let m = _mm_cmpneq_epu16_mask(a, b);
@@ -15334,7 +16395,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_cmpneq_epu16_mask() {
+    const fn test_mm_mask_cmpneq_epu16_mask() {
         let a = _mm_set1_epi16(2);
         let b = _mm_set1_epi16(1);
         let mask = 0b01010101;
@@ -15343,7 +16404,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_cmpneq_epu8_mask() {
+    const fn test_mm512_cmpneq_epu8_mask() {
         let a = _mm512_set1_epi8(2);
         let b = _mm512_set1_epi8(1);
         let m = _mm512_cmpneq_epu8_mask(a, b);
@@ -15354,7 +16415,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mask_cmpneq_epu8_mask() {
+    const fn test_mm512_mask_cmpneq_epu8_mask() {
         let a = _mm512_set1_epi8(2);
         let b = _mm512_set1_epi8(1);
         let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
@@ -15366,7 +16427,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_cmpneq_epu8_mask() {
+    const fn test_mm256_cmpneq_epu8_mask() {
         let a = _mm256_set1_epi8(2);
         let b = _mm256_set1_epi8(1);
         let m = _mm256_cmpneq_epu8_mask(a, b);
@@ -15374,7 +16435,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_cmpneq_epu8_mask() {
+    const fn test_mm256_mask_cmpneq_epu8_mask() {
         let a = _mm256_set1_epi8(2);
         let b = _mm256_set1_epi8(1);
         let mask = 0b01010101_01010101_01010101_01010101;
@@ -15383,7 +16444,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_cmpneq_epu8_mask() {
+    const fn test_mm_cmpneq_epu8_mask() {
         let a = _mm_set1_epi8(2);
         let b = _mm_set1_epi8(1);
         let m = _mm_cmpneq_epu8_mask(a, b);
@@ -15391,7 +16452,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_cmpneq_epu8_mask() {
+    const fn test_mm_mask_cmpneq_epu8_mask() {
         let a = _mm_set1_epi8(2);
         let b = _mm_set1_epi8(1);
         let mask = 0b01010101_01010101;
@@ -15400,7 +16461,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_cmpneq_epi16_mask() {
+    const fn test_mm512_cmpneq_epi16_mask() {
         let a = _mm512_set1_epi16(1);
         let b = _mm512_set1_epi16(-1);
         let m = _mm512_cmpneq_epi16_mask(a, b);
@@ -15408,7 +16469,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mask_cmpneq_epi16_mask() {
+    const fn test_mm512_mask_cmpneq_epi16_mask() {
         let a = _mm512_set1_epi16(1);
         let b = _mm512_set1_epi16(-1);
         let mask = 0b01010101_01010101_01010101_01010101;
@@ -15417,7 +16478,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_cmpneq_epi16_mask() {
+    const fn test_mm256_cmpneq_epi16_mask() {
         let a = _mm256_set1_epi16(1);
         let b = _mm256_set1_epi16(-1);
         let m = _mm256_cmpneq_epi16_mask(a, b);
@@ -15425,7 +16486,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_cmpneq_epi16_mask() {
+    const fn test_mm256_mask_cmpneq_epi16_mask() {
         let a = _mm256_set1_epi16(1);
         let b = _mm256_set1_epi16(-1);
         let mask = 0b01010101_01010101;
@@ -15434,7 +16495,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_cmpneq_epi16_mask() {
+    const fn test_mm_cmpneq_epi16_mask() {
         let a = _mm_set1_epi16(1);
         let b = _mm_set1_epi16(-1);
         let m = _mm_cmpneq_epi16_mask(a, b);
@@ -15442,7 +16503,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_cmpneq_epi16_mask() {
+    const fn test_mm_mask_cmpneq_epi16_mask() {
         let a = _mm_set1_epi16(1);
         let b = _mm_set1_epi16(-1);
         let mask = 0b01010101;
@@ -15451,7 +16512,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_cmpneq_epi8_mask() {
+    const fn test_mm512_cmpneq_epi8_mask() {
         let a = _mm512_set1_epi8(1);
         let b = _mm512_set1_epi8(-1);
         let m = _mm512_cmpneq_epi8_mask(a, b);
@@ -15462,7 +16523,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mask_cmpneq_epi8_mask() {
+    const fn test_mm512_mask_cmpneq_epi8_mask() {
         let a = _mm512_set1_epi8(1);
         let b = _mm512_set1_epi8(-1);
         let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
@@ -15474,7 +16535,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_cmpneq_epi8_mask() {
+    const fn test_mm256_cmpneq_epi8_mask() {
         let a = _mm256_set1_epi8(1);
         let b = _mm256_set1_epi8(-1);
         let m = _mm256_cmpneq_epi8_mask(a, b);
@@ -15482,7 +16543,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_cmpneq_epi8_mask() {
+    const fn test_mm256_mask_cmpneq_epi8_mask() {
         let a = _mm256_set1_epi8(1);
         let b = _mm256_set1_epi8(-1);
         let mask = 0b01010101_01010101_01010101_01010101;
@@ -15491,7 +16552,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_cmpneq_epi8_mask() {
+    const fn test_mm_cmpneq_epi8_mask() {
         let a = _mm_set1_epi8(1);
         let b = _mm_set1_epi8(-1);
         let m = _mm_cmpneq_epi8_mask(a, b);
@@ -15499,7 +16560,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_cmpneq_epi8_mask() {
+    const fn test_mm_mask_cmpneq_epi8_mask() {
         let a = _mm_set1_epi8(1);
         let b = _mm_set1_epi8(-1);
         let mask = 0b01010101_01010101;
@@ -15508,7 +16569,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_cmp_epu16_mask() {
+    const fn test_mm512_cmp_epu16_mask() {
         let a = _mm512_set1_epi16(0);
         let b = _mm512_set1_epi16(1);
         let m = _mm512_cmp_epu16_mask::<_MM_CMPINT_LT>(a, b);
@@ -15516,7 +16577,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mask_cmp_epu16_mask() {
+    const fn test_mm512_mask_cmp_epu16_mask() {
         let a = _mm512_set1_epi16(0);
         let b = _mm512_set1_epi16(1);
         let mask = 0b01010101_01010101_01010101_01010101;
@@ -15525,7 +16586,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_cmp_epu16_mask() {
+    const fn test_mm256_cmp_epu16_mask() {
         let a = _mm256_set1_epi16(0);
         let b = _mm256_set1_epi16(1);
         let m = _mm256_cmp_epu16_mask::<_MM_CMPINT_LT>(a, b);
@@ -15533,7 +16594,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_cmp_epu16_mask() {
+    const fn test_mm256_mask_cmp_epu16_mask() {
         let a = _mm256_set1_epi16(0);
         let b = _mm256_set1_epi16(1);
         let mask = 0b01010101_01010101;
@@ -15542,7 +16603,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_cmp_epu16_mask() {
+    const fn test_mm_cmp_epu16_mask() {
         let a = _mm_set1_epi16(0);
         let b = _mm_set1_epi16(1);
         let m = _mm_cmp_epu16_mask::<_MM_CMPINT_LT>(a, b);
@@ -15550,7 +16611,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_cmp_epu16_mask() {
+    const fn test_mm_mask_cmp_epu16_mask() {
         let a = _mm_set1_epi16(0);
         let b = _mm_set1_epi16(1);
         let mask = 0b01010101;
@@ -15559,7 +16620,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_cmp_epu8_mask() {
+    const fn test_mm512_cmp_epu8_mask() {
         let a = _mm512_set1_epi8(0);
         let b = _mm512_set1_epi8(1);
         let m = _mm512_cmp_epu8_mask::<_MM_CMPINT_LT>(a, b);
@@ -15570,7 +16631,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mask_cmp_epu8_mask() {
+    const fn test_mm512_mask_cmp_epu8_mask() {
         let a = _mm512_set1_epi8(0);
         let b = _mm512_set1_epi8(1);
         let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
@@ -15582,7 +16643,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_cmp_epu8_mask() {
+    const fn test_mm256_cmp_epu8_mask() {
         let a = _mm256_set1_epi8(0);
         let b = _mm256_set1_epi8(1);
         let m = _mm256_cmp_epu8_mask::<_MM_CMPINT_LT>(a, b);
@@ -15590,7 +16651,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_cmp_epu8_mask() {
+    const fn test_mm256_mask_cmp_epu8_mask() {
         let a = _mm256_set1_epi8(0);
         let b = _mm256_set1_epi8(1);
         let mask = 0b01010101_01010101_01010101_01010101;
@@ -15599,7 +16660,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_cmp_epu8_mask() {
+    const fn test_mm_cmp_epu8_mask() {
         let a = _mm_set1_epi8(0);
         let b = _mm_set1_epi8(1);
         let m = _mm_cmp_epu8_mask::<_MM_CMPINT_LT>(a, b);
@@ -15607,7 +16668,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_cmp_epu8_mask() {
+    const fn test_mm_mask_cmp_epu8_mask() {
         let a = _mm_set1_epi8(0);
         let b = _mm_set1_epi8(1);
         let mask = 0b01010101_01010101;
@@ -15616,7 +16677,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_cmp_epi16_mask() {
+    const fn test_mm512_cmp_epi16_mask() {
         let a = _mm512_set1_epi16(0);
         let b = _mm512_set1_epi16(1);
         let m = _mm512_cmp_epi16_mask::<_MM_CMPINT_LT>(a, b);
@@ -15624,7 +16685,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mask_cmp_epi16_mask() {
+    const fn test_mm512_mask_cmp_epi16_mask() {
         let a = _mm512_set1_epi16(0);
         let b = _mm512_set1_epi16(1);
         let mask = 0b01010101_01010101_01010101_01010101;
@@ -15633,7 +16694,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_cmp_epi16_mask() {
+    const fn test_mm256_cmp_epi16_mask() {
         let a = _mm256_set1_epi16(0);
         let b = _mm256_set1_epi16(1);
         let m = _mm256_cmp_epi16_mask::<_MM_CMPINT_LT>(a, b);
@@ -15641,7 +16702,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_cmp_epi16_mask() {
+    const fn test_mm256_mask_cmp_epi16_mask() {
         let a = _mm256_set1_epi16(0);
         let b = _mm256_set1_epi16(1);
         let mask = 0b01010101_01010101;
@@ -15650,7 +16711,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_cmp_epi16_mask() {
+    const fn test_mm_cmp_epi16_mask() {
         let a = _mm_set1_epi16(0);
         let b = _mm_set1_epi16(1);
         let m = _mm_cmp_epi16_mask::<_MM_CMPINT_LT>(a, b);
@@ -15658,7 +16719,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_cmp_epi16_mask() {
+    const fn test_mm_mask_cmp_epi16_mask() {
         let a = _mm_set1_epi16(0);
         let b = _mm_set1_epi16(1);
         let mask = 0b01010101;
@@ -15667,7 +16728,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_cmp_epi8_mask() {
+    const fn test_mm512_cmp_epi8_mask() {
         let a = _mm512_set1_epi8(0);
         let b = _mm512_set1_epi8(1);
         let m = _mm512_cmp_epi8_mask::<_MM_CMPINT_LT>(a, b);
@@ -15678,7 +16739,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mask_cmp_epi8_mask() {
+    const fn test_mm512_mask_cmp_epi8_mask() {
         let a = _mm512_set1_epi8(0);
         let b = _mm512_set1_epi8(1);
         let mask = 0b01010101_01010101_01010101_01010101_01010101_01010101_01010101_01010101;
@@ -15690,7 +16751,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_cmp_epi8_mask() {
+    const fn test_mm256_cmp_epi8_mask() {
         let a = _mm256_set1_epi8(0);
         let b = _mm256_set1_epi8(1);
         let m = _mm256_cmp_epi8_mask::<_MM_CMPINT_LT>(a, b);
@@ -15698,7 +16759,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_cmp_epi8_mask() {
+    const fn test_mm256_mask_cmp_epi8_mask() {
         let a = _mm256_set1_epi8(0);
         let b = _mm256_set1_epi8(1);
         let mask = 0b01010101_01010101_01010101_01010101;
@@ -15707,7 +16768,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_cmp_epi8_mask() {
+    const fn test_mm_cmp_epi8_mask() {
         let a = _mm_set1_epi8(0);
         let b = _mm_set1_epi8(1);
         let m = _mm_cmp_epi8_mask::<_MM_CMPINT_LT>(a, b);
@@ -15715,7 +16776,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_cmp_epi8_mask() {
+    const fn test_mm_mask_cmp_epi8_mask() {
         let a = _mm_set1_epi8(0);
         let b = _mm_set1_epi8(1);
         let mask = 0b01010101_01010101;
@@ -15724,91 +16785,91 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_reduce_add_epi16() {
+    const fn test_mm256_reduce_add_epi16() {
         let a = _mm256_set1_epi16(1);
         let e = _mm256_reduce_add_epi16(a);
         assert_eq!(16, e);
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_reduce_add_epi16() {
+    const fn test_mm256_mask_reduce_add_epi16() {
         let a = _mm256_set1_epi16(1);
         let e = _mm256_mask_reduce_add_epi16(0b11111111_00000000, a);
         assert_eq!(8, e);
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_reduce_add_epi16() {
+    const fn test_mm_reduce_add_epi16() {
         let a = _mm_set1_epi16(1);
         let e = _mm_reduce_add_epi16(a);
         assert_eq!(8, e);
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_reduce_add_epi16() {
+    const fn test_mm_mask_reduce_add_epi16() {
         let a = _mm_set1_epi16(1);
         let e = _mm_mask_reduce_add_epi16(0b11110000, a);
         assert_eq!(4, e);
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_reduce_add_epi8() {
+    const fn test_mm256_reduce_add_epi8() {
         let a = _mm256_set1_epi8(1);
         let e = _mm256_reduce_add_epi8(a);
         assert_eq!(32, e);
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_reduce_add_epi8() {
+    const fn test_mm256_mask_reduce_add_epi8() {
         let a = _mm256_set1_epi8(1);
         let e = _mm256_mask_reduce_add_epi8(0b11111111_00000000_11111111_00000000, a);
         assert_eq!(16, e);
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_reduce_add_epi8() {
+    const fn test_mm_reduce_add_epi8() {
         let a = _mm_set1_epi8(1);
         let e = _mm_reduce_add_epi8(a);
         assert_eq!(16, e);
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_reduce_add_epi8() {
+    const fn test_mm_mask_reduce_add_epi8() {
         let a = _mm_set1_epi8(1);
         let e = _mm_mask_reduce_add_epi8(0b11111111_00000000, a);
         assert_eq!(8, e);
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_reduce_and_epi16() {
+    const fn test_mm256_reduce_and_epi16() {
         let a = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
         let e = _mm256_reduce_and_epi16(a);
         assert_eq!(0, e);
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_reduce_and_epi16() {
+    const fn test_mm256_mask_reduce_and_epi16() {
         let a = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
         let e = _mm256_mask_reduce_and_epi16(0b11111111_00000000, a);
         assert_eq!(1, e);
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_reduce_and_epi16() {
+    const fn test_mm_reduce_and_epi16() {
         let a = _mm_set_epi16(1, 1, 1, 1, 2, 2, 2, 2);
         let e = _mm_reduce_and_epi16(a);
         assert_eq!(0, e);
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_reduce_and_epi16() {
+    const fn test_mm_mask_reduce_and_epi16() {
         let a = _mm_set_epi16(1, 1, 1, 1, 2, 2, 2, 2);
         let e = _mm_mask_reduce_and_epi16(0b11110000, a);
         assert_eq!(1, e);
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_reduce_and_epi8() {
+    const fn test_mm256_reduce_and_epi8() {
         let a = _mm256_set_epi8(
             1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
             2, 2, 2,
@@ -15818,7 +16879,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_reduce_and_epi8() {
+    const fn test_mm256_mask_reduce_and_epi8() {
         let a = _mm256_set_epi8(
             1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
             2, 2, 2,
@@ -15828,49 +16889,49 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_reduce_and_epi8() {
+    const fn test_mm_reduce_and_epi8() {
         let a = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
         let e = _mm_reduce_and_epi8(a);
         assert_eq!(0, e);
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_reduce_and_epi8() {
+    const fn test_mm_mask_reduce_and_epi8() {
         let a = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
         let e = _mm_mask_reduce_and_epi8(0b11111111_00000000, a);
         assert_eq!(1, e);
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_reduce_mul_epi16() {
+    const fn test_mm256_reduce_mul_epi16() {
         let a = _mm256_set_epi16(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
         let e = _mm256_reduce_mul_epi16(a);
         assert_eq!(256, e);
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_reduce_mul_epi16() {
+    const fn test_mm256_mask_reduce_mul_epi16() {
         let a = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
         let e = _mm256_mask_reduce_mul_epi16(0b11111111_00000000, a);
         assert_eq!(1, e);
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_reduce_mul_epi16() {
+    const fn test_mm_reduce_mul_epi16() {
         let a = _mm_set_epi16(2, 2, 2, 2, 1, 1, 1, 1);
         let e = _mm_reduce_mul_epi16(a);
         assert_eq!(16, e);
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_reduce_mul_epi16() {
+    const fn test_mm_mask_reduce_mul_epi16() {
         let a = _mm_set_epi16(1, 1, 1, 1, 2, 2, 2, 2);
         let e = _mm_mask_reduce_mul_epi16(0b11110000, a);
         assert_eq!(1, e);
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_reduce_mul_epi8() {
+    const fn test_mm256_reduce_mul_epi8() {
         let a = _mm256_set_epi8(
             1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
             2, 2, 2,
@@ -15880,7 +16941,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_reduce_mul_epi8() {
+    const fn test_mm256_mask_reduce_mul_epi8() {
         let a = _mm256_set_epi8(
             1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
             2, 2, 2,
@@ -15890,49 +16951,49 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_reduce_mul_epi8() {
+    const fn test_mm_reduce_mul_epi8() {
         let a = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2);
         let e = _mm_reduce_mul_epi8(a);
         assert_eq!(8, e);
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_reduce_mul_epi8() {
+    const fn test_mm_mask_reduce_mul_epi8() {
         let a = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2);
         let e = _mm_mask_reduce_mul_epi8(0b11111111_00000000, a);
         assert_eq!(1, e);
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_reduce_max_epi16() {
+    const fn test_mm256_reduce_max_epi16() {
         let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let e: i16 = _mm256_reduce_max_epi16(a);
         assert_eq!(15, e);
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_reduce_max_epi16() {
+    const fn test_mm256_mask_reduce_max_epi16() {
         let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let e: i16 = _mm256_mask_reduce_max_epi16(0b11111111_00000000, a);
         assert_eq!(7, e);
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_reduce_max_epi16() {
+    const fn test_mm_reduce_max_epi16() {
         let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
         let e: i16 = _mm_reduce_max_epi16(a);
         assert_eq!(7, e);
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_reduce_max_epi16() {
+    const fn test_mm_mask_reduce_max_epi16() {
         let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
         let e: i16 = _mm_mask_reduce_max_epi16(0b11110000, a);
         assert_eq!(3, e);
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_reduce_max_epi8() {
+    const fn test_mm256_reduce_max_epi8() {
         let a = _mm256_set_epi8(
             0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
             24, 25, 26, 27, 28, 29, 30, 31,
@@ -15942,7 +17003,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_reduce_max_epi8() {
+    const fn test_mm256_mask_reduce_max_epi8() {
         let a = _mm256_set_epi8(
             0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
             24, 25, 26, 27, 28, 29, 30, 31,
@@ -15952,49 +17013,49 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_reduce_max_epi8() {
+    const fn test_mm_reduce_max_epi8() {
         let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let e: i8 = _mm_reduce_max_epi8(a);
         assert_eq!(15, e);
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_reduce_max_epi8() {
+    const fn test_mm_mask_reduce_max_epi8() {
         let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let e: i8 = _mm_mask_reduce_max_epi8(0b11111111_00000000, a);
         assert_eq!(7, e);
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_reduce_max_epu16() {
+    const fn test_mm256_reduce_max_epu16() {
         let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let e: u16 = _mm256_reduce_max_epu16(a);
         assert_eq!(15, e);
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_reduce_max_epu16() {
+    const fn test_mm256_mask_reduce_max_epu16() {
         let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let e: u16 = _mm256_mask_reduce_max_epu16(0b11111111_00000000, a);
         assert_eq!(7, e);
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_reduce_max_epu16() {
+    const fn test_mm_reduce_max_epu16() {
         let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
         let e: u16 = _mm_reduce_max_epu16(a);
         assert_eq!(7, e);
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_reduce_max_epu16() {
+    const fn test_mm_mask_reduce_max_epu16() {
         let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
         let e: u16 = _mm_mask_reduce_max_epu16(0b11110000, a);
         assert_eq!(3, e);
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_reduce_max_epu8() {
+    const fn test_mm256_reduce_max_epu8() {
         let a = _mm256_set_epi8(
             0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
             24, 25, 26, 27, 28, 29, 30, 31,
@@ -16004,7 +17065,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_reduce_max_epu8() {
+    const fn test_mm256_mask_reduce_max_epu8() {
         let a = _mm256_set_epi8(
             0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
             24, 25, 26, 27, 28, 29, 30, 31,
@@ -16014,49 +17075,49 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_reduce_max_epu8() {
+    const fn test_mm_reduce_max_epu8() {
         let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let e: u8 = _mm_reduce_max_epu8(a);
         assert_eq!(15, e);
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_reduce_max_epu8() {
+    const fn test_mm_mask_reduce_max_epu8() {
         let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let e: u8 = _mm_mask_reduce_max_epu8(0b11111111_00000000, a);
         assert_eq!(7, e);
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_reduce_min_epi16() {
+    const fn test_mm256_reduce_min_epi16() {
         let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let e: i16 = _mm256_reduce_min_epi16(a);
         assert_eq!(0, e);
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_reduce_min_epi16() {
+    const fn test_mm256_mask_reduce_min_epi16() {
         let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let e: i16 = _mm256_mask_reduce_min_epi16(0b11111111_00000000, a);
         assert_eq!(0, e);
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_reduce_min_epi16() {
+    const fn test_mm_reduce_min_epi16() {
         let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
         let e: i16 = _mm_reduce_min_epi16(a);
         assert_eq!(0, e);
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_reduce_min_epi16() {
+    const fn test_mm_mask_reduce_min_epi16() {
         let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
         let e: i16 = _mm_mask_reduce_min_epi16(0b11110000, a);
         assert_eq!(0, e);
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_reduce_min_epi8() {
+    const fn test_mm256_reduce_min_epi8() {
         let a = _mm256_set_epi8(
             0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
             24, 25, 26, 27, 28, 29, 30, 31,
@@ -16066,7 +17127,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_reduce_min_epi8() {
+    const fn test_mm256_mask_reduce_min_epi8() {
         let a = _mm256_set_epi8(
             0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
             24, 25, 26, 27, 28, 29, 30, 31,
@@ -16076,49 +17137,49 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_reduce_min_epi8() {
+    const fn test_mm_reduce_min_epi8() {
         let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let e: i8 = _mm_reduce_min_epi8(a);
         assert_eq!(0, e);
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_reduce_min_epi8() {
+    const fn test_mm_mask_reduce_min_epi8() {
         let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let e: i8 = _mm_mask_reduce_min_epi8(0b11111111_00000000, a);
         assert_eq!(0, e);
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_reduce_min_epu16() {
+    const fn test_mm256_reduce_min_epu16() {
         let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let e: u16 = _mm256_reduce_min_epu16(a);
         assert_eq!(0, e);
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_reduce_min_epu16() {
+    const fn test_mm256_mask_reduce_min_epu16() {
         let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let e: u16 = _mm256_mask_reduce_min_epu16(0b11111111_00000000, a);
         assert_eq!(0, e);
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_reduce_min_epu16() {
+    const fn test_mm_reduce_min_epu16() {
         let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
         let e: u16 = _mm_reduce_min_epu16(a);
         assert_eq!(0, e);
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_reduce_min_epu16() {
+    const fn test_mm_mask_reduce_min_epu16() {
         let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
         let e: u16 = _mm_mask_reduce_min_epu16(0b11110000, a);
         assert_eq!(0, e);
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_reduce_min_epu8() {
+    const fn test_mm256_reduce_min_epu8() {
         let a = _mm256_set_epi8(
             0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
             24, 25, 26, 27, 28, 29, 30, 31,
@@ -16128,7 +17189,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_reduce_min_epu8() {
+    const fn test_mm256_mask_reduce_min_epu8() {
         let a = _mm256_set_epi8(
             0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
             24, 25, 26, 27, 28, 29, 30, 31,
@@ -16138,49 +17199,49 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_reduce_min_epu8() {
+    const fn test_mm_reduce_min_epu8() {
         let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let e: u8 = _mm_reduce_min_epu8(a);
         assert_eq!(0, e);
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_reduce_min_epu8() {
+    const fn test_mm_mask_reduce_min_epu8() {
         let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let e: u8 = _mm_mask_reduce_min_epu8(0b11111111_00000000, a);
         assert_eq!(0, e);
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_reduce_or_epi16() {
+    const fn test_mm256_reduce_or_epi16() {
         let a = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
         let e = _mm256_reduce_or_epi16(a);
         assert_eq!(3, e);
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_reduce_or_epi16() {
+    const fn test_mm256_mask_reduce_or_epi16() {
         let a = _mm256_set_epi16(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
         let e = _mm256_mask_reduce_or_epi16(0b11111111_00000000, a);
         assert_eq!(1, e);
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_reduce_or_epi16() {
+    const fn test_mm_reduce_or_epi16() {
         let a = _mm_set_epi16(1, 1, 1, 1, 2, 2, 2, 2);
         let e = _mm_reduce_or_epi16(a);
         assert_eq!(3, e);
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_reduce_or_epi16() {
+    const fn test_mm_mask_reduce_or_epi16() {
         let a = _mm_set_epi16(1, 1, 1, 1, 2, 2, 2, 2);
         let e = _mm_mask_reduce_or_epi16(0b11110000, a);
         assert_eq!(1, e);
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_reduce_or_epi8() {
+    const fn test_mm256_reduce_or_epi8() {
         let a = _mm256_set_epi8(
             1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
             2, 2, 2,
@@ -16190,7 +17251,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_reduce_or_epi8() {
+    const fn test_mm256_mask_reduce_or_epi8() {
         let a = _mm256_set_epi8(
             1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
             2, 2, 2,
@@ -16200,51 +17261,51 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_reduce_or_epi8() {
+    const fn test_mm_reduce_or_epi8() {
         let a = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
         let e = _mm_reduce_or_epi8(a);
         assert_eq!(3, e);
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_reduce_or_epi8() {
+    const fn test_mm_mask_reduce_or_epi8() {
         let a = _mm_set_epi8(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
         let e = _mm_mask_reduce_or_epi8(0b11111111_00000000, a);
         assert_eq!(1, e);
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_loadu_epi16() {
+    const fn test_mm512_loadu_epi16() {
         #[rustfmt::skip]
         let a: [i16; 32] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
-        let r = _mm512_loadu_epi16(&a[0]);
+        let r = unsafe { _mm512_loadu_epi16(&a[0]) };
         #[rustfmt::skip]
         let e = _mm512_set_epi16(32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
         assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_loadu_epi16() {
+    const fn test_mm256_loadu_epi16() {
         let a: [i16; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
-        let r = _mm256_loadu_epi16(&a[0]);
+        let r = unsafe { _mm256_loadu_epi16(&a[0]) };
         let e = _mm256_set_epi16(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
         assert_eq_m256i(r, e);
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_loadu_epi16() {
+    const fn test_mm_loadu_epi16() {
         let a: [i16; 8] = [1, 2, 3, 4, 5, 6, 7, 8];
-        let r = _mm_loadu_epi16(&a[0]);
+        let r = unsafe { _mm_loadu_epi16(&a[0]) };
         let e = _mm_set_epi16(8, 7, 6, 5, 4, 3, 2, 1);
         assert_eq_m128i(r, e);
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_loadu_epi8() {
+    const fn test_mm512_loadu_epi8() {
         #[rustfmt::skip]
         let a: [i8; 64] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
                            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
-        let r = _mm512_loadu_epi8(&a[0]);
+        let r = unsafe { _mm512_loadu_epi8(&a[0]) };
         #[rustfmt::skip]
         let e = _mm512_set_epi8(32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1,
                                 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
@@ -16252,73 +17313,85 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_loadu_epi8() {
+    const fn test_mm256_loadu_epi8() {
         #[rustfmt::skip]
         let a: [i8; 32] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32];
-        let r = _mm256_loadu_epi8(&a[0]);
+        let r = unsafe { _mm256_loadu_epi8(&a[0]) };
         #[rustfmt::skip]
         let e = _mm256_set_epi8(32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
         assert_eq_m256i(r, e);
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_loadu_epi8() {
+    const fn test_mm_loadu_epi8() {
         let a: [i8; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
-        let r = _mm_loadu_epi8(&a[0]);
+        let r = unsafe { _mm_loadu_epi8(&a[0]) };
         let e = _mm_set_epi8(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
         assert_eq_m128i(r, e);
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_storeu_epi16() {
+    const fn test_mm512_storeu_epi16() {
         let a = _mm512_set1_epi16(9);
         let mut r = _mm512_undefined_epi32();
-        _mm512_storeu_epi16(&mut r as *mut _ as *mut i16, a);
+        unsafe {
+            _mm512_storeu_epi16(&mut r as *mut _ as *mut i16, a);
+        }
         assert_eq_m512i(r, a);
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_storeu_epi16() {
+    const fn test_mm256_storeu_epi16() {
         let a = _mm256_set1_epi16(9);
         let mut r = _mm256_set1_epi32(0);
-        _mm256_storeu_epi16(&mut r as *mut _ as *mut i16, a);
+        unsafe {
+            _mm256_storeu_epi16(&mut r as *mut _ as *mut i16, a);
+        }
         assert_eq_m256i(r, a);
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_storeu_epi16() {
+    const fn test_mm_storeu_epi16() {
         let a = _mm_set1_epi16(9);
         let mut r = _mm_set1_epi32(0);
-        _mm_storeu_epi16(&mut r as *mut _ as *mut i16, a);
+        unsafe {
+            _mm_storeu_epi16(&mut r as *mut _ as *mut i16, a);
+        }
         assert_eq_m128i(r, a);
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_storeu_epi8() {
+    const fn test_mm512_storeu_epi8() {
         let a = _mm512_set1_epi8(9);
         let mut r = _mm512_undefined_epi32();
-        _mm512_storeu_epi8(&mut r as *mut _ as *mut i8, a);
+        unsafe {
+            _mm512_storeu_epi8(&mut r as *mut _ as *mut i8, a);
+        }
         assert_eq_m512i(r, a);
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_storeu_epi8() {
+    const fn test_mm256_storeu_epi8() {
         let a = _mm256_set1_epi8(9);
         let mut r = _mm256_set1_epi32(0);
-        _mm256_storeu_epi8(&mut r as *mut _ as *mut i8, a);
+        unsafe {
+            _mm256_storeu_epi8(&mut r as *mut _ as *mut i8, a);
+        }
         assert_eq_m256i(r, a);
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_storeu_epi8() {
+    const fn test_mm_storeu_epi8() {
         let a = _mm_set1_epi8(9);
         let mut r = _mm_set1_epi32(0);
-        _mm_storeu_epi8(&mut r as *mut _ as *mut i8, a);
+        unsafe {
+            _mm_storeu_epi8(&mut r as *mut _ as *mut i8, a);
+        }
         assert_eq_m128i(r, a);
     }
 
-    #[simd_test(enable = "avx512f,avx512bw")]
-    unsafe fn test_mm512_mask_loadu_epi16() {
+    #[simd_test(enable = "avx512bw")]
+    const fn test_mm512_mask_loadu_epi16() {
         let src = _mm512_set1_epi16(42);
         let a = &[
             1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
@@ -16326,52 +17399,54 @@ mod tests {
         ];
         let p = a.as_ptr();
         let m = 0b10101010_11001100_11101000_11001010;
-        let r = _mm512_mask_loadu_epi16(src, m, black_box(p));
+        let r = unsafe { _mm512_mask_loadu_epi16(src, m, black_box(p)) };
         let e = &[
             42_i16, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42,
             23, 24, 42, 26, 42, 28, 42, 30, 42, 32,
         ];
-        let e = _mm512_loadu_epi16(e.as_ptr());
+        let e = unsafe { _mm512_loadu_epi16(e.as_ptr()) };
         assert_eq_m512i(r, e);
     }
 
-    #[simd_test(enable = "avx512f,avx512bw")]
-    unsafe fn test_mm512_maskz_loadu_epi16() {
+    #[simd_test(enable = "avx512bw")]
+    const fn test_mm512_maskz_loadu_epi16() {
         let a = &[
             1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
             24, 25, 26, 27, 28, 29, 30, 31, 32,
         ];
         let p = a.as_ptr();
         let m = 0b10101010_11001100_11101000_11001010;
-        let r = _mm512_maskz_loadu_epi16(m, black_box(p));
+        let r = unsafe { _mm512_maskz_loadu_epi16(m, black_box(p)) };
         let e = &[
             0_i16, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16, 0, 0, 19, 20, 0, 0, 23, 24, 0,
             26, 0, 28, 0, 30, 0, 32,
         ];
-        let e = _mm512_loadu_epi16(e.as_ptr());
+        let e = unsafe { _mm512_loadu_epi16(e.as_ptr()) };
         assert_eq_m512i(r, e);
     }
 
-    #[simd_test(enable = "avx512f,avx512bw")]
-    unsafe fn test_mm512_mask_storeu_epi16() {
+    #[simd_test(enable = "avx512bw")]
+    const fn test_mm512_mask_storeu_epi16() {
         let mut r = [42_i16; 32];
         let a = &[
             1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
             24, 25, 26, 27, 28, 29, 30, 31, 32,
         ];
-        let a = _mm512_loadu_epi16(a.as_ptr());
+        let a = unsafe { _mm512_loadu_epi16(a.as_ptr()) };
         let m = 0b10101010_11001100_11101000_11001010;
-        _mm512_mask_storeu_epi16(r.as_mut_ptr(), m, a);
+        unsafe {
+            _mm512_mask_storeu_epi16(r.as_mut_ptr(), m, a);
+        }
         let e = &[
             42_i16, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42,
             23, 24, 42, 26, 42, 28, 42, 30, 42, 32,
         ];
-        let e = _mm512_loadu_epi16(e.as_ptr());
-        assert_eq_m512i(_mm512_loadu_epi16(r.as_ptr()), e);
+        let e = unsafe { _mm512_loadu_epi16(e.as_ptr()) };
+        assert_eq_m512i(unsafe { _mm512_loadu_epi16(r.as_ptr()) }, e);
     }
 
-    #[simd_test(enable = "avx512f,avx512bw")]
-    unsafe fn test_mm512_mask_loadu_epi8() {
+    #[simd_test(enable = "avx512bw")]
+    const fn test_mm512_mask_loadu_epi8() {
         let src = _mm512_set1_epi8(42);
         let a = &[
             1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
@@ -16380,18 +17455,18 @@ mod tests {
         ];
         let p = a.as_ptr();
         let m = 0b00000000_11111111_11111111_00000000_10101010_11001100_11101000_11001010;
-        let r = _mm512_mask_loadu_epi8(src, m, black_box(p));
+        let r = unsafe { _mm512_mask_loadu_epi8(src, m, black_box(p)) };
         let e = &[
             42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42,
             23, 24, 42, 26, 42, 28, 42, 30, 42, 32, 42, 42, 42, 42, 42, 42, 42, 42, 41, 42, 43, 44,
             45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 42, 42, 42, 42, 42, 42, 42, 42,
         ];
-        let e = _mm512_loadu_epi8(e.as_ptr());
+        let e = unsafe { _mm512_loadu_epi8(e.as_ptr()) };
         assert_eq_m512i(r, e);
     }
 
-    #[simd_test(enable = "avx512f,avx512bw")]
-    unsafe fn test_mm512_maskz_loadu_epi8() {
+    #[simd_test(enable = "avx512bw")]
+    const fn test_mm512_maskz_loadu_epi8() {
         let a = &[
             1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
             24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
@@ -16399,77 +17474,81 @@ mod tests {
         ];
         let p = a.as_ptr();
         let m = 0b00000000_11111111_11111111_00000000_10101010_11001100_11101000_11001010;
-        let r = _mm512_maskz_loadu_epi8(m, black_box(p));
+        let r = unsafe { _mm512_maskz_loadu_epi8(m, black_box(p)) };
         let e = &[
             0_i8, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16, 0, 0, 19, 20, 0, 0, 23, 24, 0,
             26, 0, 28, 0, 30, 0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 41, 42, 43, 44, 45, 46, 47, 48, 49,
             50, 51, 52, 53, 54, 55, 56, 0, 0, 0, 0, 0, 0, 0, 0,
         ];
-        let e = _mm512_loadu_epi8(e.as_ptr());
+        let e = unsafe { _mm512_loadu_epi8(e.as_ptr()) };
         assert_eq_m512i(r, e);
     }
 
-    #[simd_test(enable = "avx512f,avx512bw")]
-    unsafe fn test_mm512_mask_storeu_epi8() {
+    #[simd_test(enable = "avx512bw")]
+    const fn test_mm512_mask_storeu_epi8() {
         let mut r = [42_i8; 64];
         let a = &[
             1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
             24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
             46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
         ];
-        let a = _mm512_loadu_epi8(a.as_ptr());
+        let a = unsafe { _mm512_loadu_epi8(a.as_ptr()) };
         let m = 0b00000000_11111111_11111111_00000000_10101010_11001100_11101000_11001010;
-        _mm512_mask_storeu_epi8(r.as_mut_ptr(), m, a);
+        unsafe {
+            _mm512_mask_storeu_epi8(r.as_mut_ptr(), m, a);
+        }
         let e = &[
             42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42,
             23, 24, 42, 26, 42, 28, 42, 30, 42, 32, 42, 42, 42, 42, 42, 42, 42, 42, 41, 42, 43, 44,
             45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 42, 42, 42, 42, 42, 42, 42, 42,
         ];
-        let e = _mm512_loadu_epi8(e.as_ptr());
-        assert_eq_m512i(_mm512_loadu_epi8(r.as_ptr()), e);
+        let e = unsafe { _mm512_loadu_epi8(e.as_ptr()) };
+        assert_eq_m512i(unsafe { _mm512_loadu_epi8(r.as_ptr()) }, e);
     }
 
-    #[simd_test(enable = "avx512f,avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_loadu_epi16() {
+    #[simd_test(enable = "avx512bw,avx512vl")]
+    const fn test_mm256_mask_loadu_epi16() {
         let src = _mm256_set1_epi16(42);
         let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
         let p = a.as_ptr();
         let m = 0b11101000_11001010;
-        let r = _mm256_mask_loadu_epi16(src, m, black_box(p));
+        let r = unsafe { _mm256_mask_loadu_epi16(src, m, black_box(p)) };
         let e = &[
             42_i16, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16,
         ];
-        let e = _mm256_loadu_epi16(e.as_ptr());
+        let e = unsafe { _mm256_loadu_epi16(e.as_ptr()) };
         assert_eq_m256i(r, e);
     }
 
-    #[simd_test(enable = "avx512f,avx512bw,avx512vl")]
-    unsafe fn test_mm256_maskz_loadu_epi16() {
+    #[simd_test(enable = "avx512bw,avx512vl")]
+    const fn test_mm256_maskz_loadu_epi16() {
         let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
         let p = a.as_ptr();
         let m = 0b11101000_11001010;
-        let r = _mm256_maskz_loadu_epi16(m, black_box(p));
+        let r = unsafe { _mm256_maskz_loadu_epi16(m, black_box(p)) };
         let e = &[0_i16, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16];
-        let e = _mm256_loadu_epi16(e.as_ptr());
+        let e = unsafe { _mm256_loadu_epi16(e.as_ptr()) };
         assert_eq_m256i(r, e);
     }
 
-    #[simd_test(enable = "avx512f,avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_storeu_epi16() {
+    #[simd_test(enable = "avx512bw,avx512vl")]
+    const fn test_mm256_mask_storeu_epi16() {
         let mut r = [42_i16; 16];
         let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
-        let a = _mm256_loadu_epi16(a.as_ptr());
+        let a = unsafe { _mm256_loadu_epi16(a.as_ptr()) };
         let m = 0b11101000_11001010;
-        _mm256_mask_storeu_epi16(r.as_mut_ptr(), m, a);
+        unsafe {
+            _mm256_mask_storeu_epi16(r.as_mut_ptr(), m, a);
+        }
         let e = &[
             42_i16, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16,
         ];
-        let e = _mm256_loadu_epi16(e.as_ptr());
-        assert_eq_m256i(_mm256_loadu_epi16(r.as_ptr()), e);
+        let e = unsafe { _mm256_loadu_epi16(e.as_ptr()) };
+        assert_eq_m256i(unsafe { _mm256_loadu_epi16(r.as_ptr()) }, e);
     }
 
-    #[simd_test(enable = "avx512f,avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_loadu_epi8() {
+    #[simd_test(enable = "avx512bw,avx512vl")]
+    const fn test_mm256_mask_loadu_epi8() {
         let src = _mm256_set1_epi8(42);
         let a = &[
             1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
@@ -16477,126 +17556,128 @@ mod tests {
         ];
         let p = a.as_ptr();
         let m = 0b10101010_11001100_11101000_11001010;
-        let r = _mm256_mask_loadu_epi8(src, m, black_box(p));
+        let r = unsafe { _mm256_mask_loadu_epi8(src, m, black_box(p)) };
         let e = &[
             42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42,
             23, 24, 42, 26, 42, 28, 42, 30, 42, 32,
         ];
-        let e = _mm256_loadu_epi8(e.as_ptr());
+        let e = unsafe { _mm256_loadu_epi8(e.as_ptr()) };
         assert_eq_m256i(r, e);
     }
 
-    #[simd_test(enable = "avx512f,avx512bw,avx512vl")]
-    unsafe fn test_mm256_maskz_loadu_epi8() {
+    #[simd_test(enable = "avx512bw,avx512vl")]
+    const fn test_mm256_maskz_loadu_epi8() {
         let a = &[
             1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
             24, 25, 26, 27, 28, 29, 30, 31, 32,
         ];
         let p = a.as_ptr();
         let m = 0b10101010_11001100_11101000_11001010;
-        let r = _mm256_maskz_loadu_epi8(m, black_box(p));
+        let r = unsafe { _mm256_maskz_loadu_epi8(m, black_box(p)) };
         let e = &[
             0_i8, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16, 0, 0, 19, 20, 0, 0, 23, 24, 0,
             26, 0, 28, 0, 30, 0, 32,
         ];
-        let e = _mm256_loadu_epi8(e.as_ptr());
+        let e = unsafe { _mm256_loadu_epi8(e.as_ptr()) };
         assert_eq_m256i(r, e);
     }
 
-    #[simd_test(enable = "avx512f,avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_storeu_epi8() {
+    #[simd_test(enable = "avx512bw,avx512vl")]
+    const fn test_mm256_mask_storeu_epi8() {
         let mut r = [42_i8; 32];
         let a = &[
             1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
             24, 25, 26, 27, 28, 29, 30, 31, 32,
         ];
-        let a = _mm256_loadu_epi8(a.as_ptr());
+        let a = unsafe { _mm256_loadu_epi8(a.as_ptr()) };
         let m = 0b10101010_11001100_11101000_11001010;
-        _mm256_mask_storeu_epi8(r.as_mut_ptr(), m, a);
+        unsafe {
+            _mm256_mask_storeu_epi8(r.as_mut_ptr(), m, a);
+        }
         let e = &[
             42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16, 42, 42, 19, 20, 42, 42,
             23, 24, 42, 26, 42, 28, 42, 30, 42, 32,
         ];
-        let e = _mm256_loadu_epi8(e.as_ptr());
-        assert_eq_m256i(_mm256_loadu_epi8(r.as_ptr()), e);
+        let e = unsafe { _mm256_loadu_epi8(e.as_ptr()) };
+        assert_eq_m256i(unsafe { _mm256_loadu_epi8(r.as_ptr()) }, e);
     }
 
-    #[simd_test(enable = "avx512f,avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_loadu_epi16() {
+    #[simd_test(enable = "avx512bw,avx512vl")]
+    const fn test_mm_mask_loadu_epi16() {
         let src = _mm_set1_epi16(42);
         let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8];
         let p = a.as_ptr();
         let m = 0b11001010;
-        let r = _mm_mask_loadu_epi16(src, m, black_box(p));
+        let r = unsafe { _mm_mask_loadu_epi16(src, m, black_box(p)) };
         let e = &[42_i16, 2, 42, 4, 42, 42, 7, 8];
-        let e = _mm_loadu_epi16(e.as_ptr());
+        let e = unsafe { _mm_loadu_epi16(e.as_ptr()) };
         assert_eq_m128i(r, e);
     }
 
-    #[simd_test(enable = "avx512f,avx512bw,avx512vl")]
-    unsafe fn test_mm_maskz_loadu_epi16() {
+    #[simd_test(enable = "avx512bw,avx512vl")]
+    const fn test_mm_maskz_loadu_epi16() {
         let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8];
         let p = a.as_ptr();
         let m = 0b11001010;
-        let r = _mm_maskz_loadu_epi16(m, black_box(p));
+        let r = unsafe { _mm_maskz_loadu_epi16(m, black_box(p)) };
         let e = &[0_i16, 2, 0, 4, 0, 0, 7, 8];
-        let e = _mm_loadu_epi16(e.as_ptr());
+        let e = unsafe { _mm_loadu_epi16(e.as_ptr()) };
         assert_eq_m128i(r, e);
     }
 
-    #[simd_test(enable = "avx512f,avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_storeu_epi16() {
+    #[simd_test(enable = "avx512bw,avx512vl")]
+    const fn test_mm_mask_storeu_epi16() {
         let mut r = [42_i16; 8];
         let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8];
-        let a = _mm_loadu_epi16(a.as_ptr());
+        let a = unsafe { _mm_loadu_epi16(a.as_ptr()) };
         let m = 0b11001010;
-        _mm_mask_storeu_epi16(r.as_mut_ptr(), m, a);
+        unsafe { _mm_mask_storeu_epi16(r.as_mut_ptr(), m, a) };
         let e = &[42_i16, 2, 42, 4, 42, 42, 7, 8];
-        let e = _mm_loadu_epi16(e.as_ptr());
-        assert_eq_m128i(_mm_loadu_epi16(r.as_ptr()), e);
+        let e = unsafe { _mm_loadu_epi16(e.as_ptr()) };
+        assert_eq_m128i(unsafe { _mm_loadu_epi16(r.as_ptr()) }, e);
     }
 
-    #[simd_test(enable = "avx512f,avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_loadu_epi8() {
+    #[simd_test(enable = "avx512bw,avx512vl")]
+    const fn test_mm_mask_loadu_epi8() {
         let src = _mm_set1_epi8(42);
         let a = &[1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
         let p = a.as_ptr();
         let m = 0b11101000_11001010;
-        let r = _mm_mask_loadu_epi8(src, m, black_box(p));
+        let r = unsafe { _mm_mask_loadu_epi8(src, m, black_box(p)) };
         let e = &[
             42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16,
         ];
-        let e = _mm_loadu_epi8(e.as_ptr());
+        let e = unsafe { _mm_loadu_epi8(e.as_ptr()) };
         assert_eq_m128i(r, e);
     }
 
-    #[simd_test(enable = "avx512f,avx512bw,avx512vl")]
-    unsafe fn test_mm_maskz_loadu_epi8() {
+    #[simd_test(enable = "avx512bw,avx512vl")]
+    const fn test_mm_maskz_loadu_epi8() {
         let a = &[1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
         let p = a.as_ptr();
         let m = 0b11101000_11001010;
-        let r = _mm_maskz_loadu_epi8(m, black_box(p));
+        let r = unsafe { _mm_maskz_loadu_epi8(m, black_box(p)) };
         let e = &[0_i8, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16];
-        let e = _mm_loadu_epi8(e.as_ptr());
+        let e = unsafe { _mm_loadu_epi8(e.as_ptr()) };
         assert_eq_m128i(r, e);
     }
 
-    #[simd_test(enable = "avx512f,avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_storeu_epi8() {
+    #[simd_test(enable = "avx512bw,avx512vl")]
+    const fn test_mm_mask_storeu_epi8() {
         let mut r = [42_i8; 16];
         let a = &[1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
-        let a = _mm_loadu_epi8(a.as_ptr());
+        let a = unsafe { _mm_loadu_epi8(a.as_ptr()) };
         let m = 0b11101000_11001010;
-        _mm_mask_storeu_epi8(r.as_mut_ptr(), m, a);
+        unsafe { _mm_mask_storeu_epi8(r.as_mut_ptr(), m, a) };
         let e = &[
             42_i8, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16,
         ];
-        let e = _mm_loadu_epi8(e.as_ptr());
-        assert_eq_m128i(_mm_loadu_epi8(r.as_ptr()), e);
+        let e = unsafe { _mm_loadu_epi8(e.as_ptr()) };
+        assert_eq_m128i(unsafe { _mm_loadu_epi8(r.as_ptr()) }, e);
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_madd_epi16() {
+    fn test_mm512_madd_epi16() {
         let a = _mm512_set1_epi16(1);
         let b = _mm512_set1_epi16(1);
         let r = _mm512_madd_epi16(a, b);
@@ -16605,7 +17686,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mask_madd_epi16() {
+    fn test_mm512_mask_madd_epi16() {
         let a = _mm512_set1_epi16(1);
         let b = _mm512_set1_epi16(1);
         let r = _mm512_mask_madd_epi16(a, 0, a, b);
@@ -16633,7 +17714,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_maskz_madd_epi16() {
+    fn test_mm512_maskz_madd_epi16() {
         let a = _mm512_set1_epi16(1);
         let b = _mm512_set1_epi16(1);
         let r = _mm512_maskz_madd_epi16(0, a, b);
@@ -16644,7 +17725,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_madd_epi16() {
+    fn test_mm256_mask_madd_epi16() {
         let a = _mm256_set1_epi16(1);
         let b = _mm256_set1_epi16(1);
         let r = _mm256_mask_madd_epi16(a, 0, a, b);
@@ -16664,7 +17745,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_maskz_madd_epi16() {
+    fn test_mm256_maskz_madd_epi16() {
         let a = _mm256_set1_epi16(1);
         let b = _mm256_set1_epi16(1);
         let r = _mm256_maskz_madd_epi16(0, a, b);
@@ -16675,7 +17756,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_madd_epi16() {
+    fn test_mm_mask_madd_epi16() {
         let a = _mm_set1_epi16(1);
         let b = _mm_set1_epi16(1);
         let r = _mm_mask_madd_epi16(a, 0, a, b);
@@ -16686,7 +17767,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_maskz_madd_epi16() {
+    fn test_mm_maskz_madd_epi16() {
         let a = _mm_set1_epi16(1);
         let b = _mm_set1_epi16(1);
         let r = _mm_maskz_madd_epi16(0, a, b);
@@ -16697,7 +17778,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_maddubs_epi16() {
+    fn test_mm512_maddubs_epi16() {
         let a = _mm512_set1_epi8(1);
         let b = _mm512_set1_epi8(1);
         let r = _mm512_maddubs_epi16(a, b);
@@ -16706,7 +17787,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mask_maddubs_epi16() {
+    fn test_mm512_mask_maddubs_epi16() {
         let a = _mm512_set1_epi8(1);
         let b = _mm512_set1_epi8(1);
         let src = _mm512_set1_epi16(1);
@@ -16720,7 +17801,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_maskz_maddubs_epi16() {
+    fn test_mm512_maskz_maddubs_epi16() {
         let a = _mm512_set1_epi8(1);
         let b = _mm512_set1_epi8(1);
         let r = _mm512_maskz_maddubs_epi16(0, a, b);
@@ -16733,7 +17814,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_maddubs_epi16() {
+    fn test_mm256_mask_maddubs_epi16() {
         let a = _mm256_set1_epi8(1);
         let b = _mm256_set1_epi8(1);
         let src = _mm256_set1_epi16(1);
@@ -16745,7 +17826,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_maskz_maddubs_epi16() {
+    fn test_mm256_maskz_maddubs_epi16() {
         let a = _mm256_set1_epi8(1);
         let b = _mm256_set1_epi8(1);
         let r = _mm256_maskz_maddubs_epi16(0, a, b);
@@ -16756,7 +17837,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_maddubs_epi16() {
+    fn test_mm_mask_maddubs_epi16() {
         let a = _mm_set1_epi8(1);
         let b = _mm_set1_epi8(1);
         let src = _mm_set1_epi16(1);
@@ -16768,7 +17849,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_maskz_maddubs_epi16() {
+    fn test_mm_maskz_maddubs_epi16() {
         let a = _mm_set1_epi8(1);
         let b = _mm_set1_epi8(1);
         let r = _mm_maskz_maddubs_epi16(0, a, b);
@@ -16779,7 +17860,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_packs_epi32() {
+    const fn test_mm512_packs_epi32() {
         let a = _mm512_set1_epi32(i32::MAX);
         let b = _mm512_set1_epi32(1);
         let r = _mm512_packs_epi32(a, b);
@@ -16790,7 +17871,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mask_packs_epi32() {
+    const fn test_mm512_mask_packs_epi32() {
         let a = _mm512_set1_epi32(i32::MAX);
         let b = _mm512_set1_epi32(1 << 16 | 1);
         let r = _mm512_mask_packs_epi32(a, 0, a, b);
@@ -16803,7 +17884,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_maskz_packs_epi32() {
+    const fn test_mm512_maskz_packs_epi32() {
         let a = _mm512_set1_epi32(i32::MAX);
         let b = _mm512_set1_epi32(1);
         let r = _mm512_maskz_packs_epi32(0, a, b);
@@ -16816,7 +17897,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_packs_epi32() {
+    const fn test_mm256_mask_packs_epi32() {
         let a = _mm256_set1_epi32(i32::MAX);
         let b = _mm256_set1_epi32(1 << 16 | 1);
         let r = _mm256_mask_packs_epi32(a, 0, a, b);
@@ -16828,7 +17909,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_maskz_packs_epi32() {
+    fn test_mm256_maskz_packs_epi32() {
         let a = _mm256_set1_epi32(i32::MAX);
         let b = _mm256_set1_epi32(1);
         let r = _mm256_maskz_packs_epi32(0, a, b);
@@ -16840,7 +17921,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_packs_epi32() {
+    const fn test_mm_mask_packs_epi32() {
         let a = _mm_set1_epi32(i32::MAX);
         let b = _mm_set1_epi32(1 << 16 | 1);
         let r = _mm_mask_packs_epi32(a, 0, a, b);
@@ -16851,7 +17932,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_maskz_packs_epi32() {
+    const fn test_mm_maskz_packs_epi32() {
         let a = _mm_set1_epi32(i32::MAX);
         let b = _mm_set1_epi32(1);
         let r = _mm_maskz_packs_epi32(0, a, b);
@@ -16862,7 +17943,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_packs_epi16() {
+    const fn test_mm512_packs_epi16() {
         let a = _mm512_set1_epi16(i16::MAX);
         let b = _mm512_set1_epi16(1);
         let r = _mm512_packs_epi16(a, b);
@@ -16875,7 +17956,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mask_packs_epi16() {
+    const fn test_mm512_mask_packs_epi16() {
         let a = _mm512_set1_epi16(i16::MAX);
         let b = _mm512_set1_epi16(1 << 8 | 1);
         let r = _mm512_mask_packs_epi16(a, 0, a, b);
@@ -16895,7 +17976,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_maskz_packs_epi16() {
+    const fn test_mm512_maskz_packs_epi16() {
         let a = _mm512_set1_epi16(i16::MAX);
         let b = _mm512_set1_epi16(1);
         let r = _mm512_maskz_packs_epi16(0, a, b);
@@ -16914,7 +17995,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_packs_epi16() {
+    const fn test_mm256_mask_packs_epi16() {
         let a = _mm256_set1_epi16(i16::MAX);
         let b = _mm256_set1_epi16(1 << 8 | 1);
         let r = _mm256_mask_packs_epi16(a, 0, a, b);
@@ -16927,7 +18008,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_maskz_packs_epi16() {
+    const fn test_mm256_maskz_packs_epi16() {
         let a = _mm256_set1_epi16(i16::MAX);
         let b = _mm256_set1_epi16(1);
         let r = _mm256_maskz_packs_epi16(0, a, b);
@@ -16940,7 +18021,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_packs_epi16() {
+    const fn test_mm_mask_packs_epi16() {
         let a = _mm_set1_epi16(i16::MAX);
         let b = _mm_set1_epi16(1 << 8 | 1);
         let r = _mm_mask_packs_epi16(a, 0, a, b);
@@ -16952,7 +18033,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_maskz_packs_epi16() {
+    const fn test_mm_maskz_packs_epi16() {
         let a = _mm_set1_epi16(i16::MAX);
         let b = _mm_set1_epi16(1);
         let r = _mm_maskz_packs_epi16(0, a, b);
@@ -16964,7 +18045,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_packus_epi32() {
+    const fn test_mm512_packus_epi32() {
         let a = _mm512_set1_epi32(-1);
         let b = _mm512_set1_epi32(1);
         let r = _mm512_packus_epi32(a, b);
@@ -16975,7 +18056,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mask_packus_epi32() {
+    const fn test_mm512_mask_packus_epi32() {
         let a = _mm512_set1_epi32(-1);
         let b = _mm512_set1_epi32(1 << 16 | 1);
         let r = _mm512_mask_packus_epi32(a, 0, a, b);
@@ -16988,7 +18069,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_maskz_packus_epi32() {
+    const fn test_mm512_maskz_packus_epi32() {
         let a = _mm512_set1_epi32(-1);
         let b = _mm512_set1_epi32(1);
         let r = _mm512_maskz_packus_epi32(0, a, b);
@@ -17001,7 +18082,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_packus_epi32() {
+    const fn test_mm256_mask_packus_epi32() {
         let a = _mm256_set1_epi32(-1);
         let b = _mm256_set1_epi32(1 << 16 | 1);
         let r = _mm256_mask_packus_epi32(a, 0, a, b);
@@ -17012,7 +18093,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_maskz_packus_epi32() {
+    const fn test_mm256_maskz_packus_epi32() {
         let a = _mm256_set1_epi32(-1);
         let b = _mm256_set1_epi32(1);
         let r = _mm256_maskz_packus_epi32(0, a, b);
@@ -17023,7 +18104,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_packus_epi32() {
+    const fn test_mm_mask_packus_epi32() {
         let a = _mm_set1_epi32(-1);
         let b = _mm_set1_epi32(1 << 16 | 1);
         let r = _mm_mask_packus_epi32(a, 0, a, b);
@@ -17034,7 +18115,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_maskz_packus_epi32() {
+    const fn test_mm_maskz_packus_epi32() {
         let a = _mm_set1_epi32(-1);
         let b = _mm_set1_epi32(1);
         let r = _mm_maskz_packus_epi32(0, a, b);
@@ -17045,7 +18126,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_packus_epi16() {
+    const fn test_mm512_packus_epi16() {
         let a = _mm512_set1_epi16(-1);
         let b = _mm512_set1_epi16(1);
         let r = _mm512_packus_epi16(a, b);
@@ -17058,7 +18139,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mask_packus_epi16() {
+    const fn test_mm512_mask_packus_epi16() {
         let a = _mm512_set1_epi16(-1);
         let b = _mm512_set1_epi16(1 << 8 | 1);
         let r = _mm512_mask_packus_epi16(a, 0, a, b);
@@ -17078,7 +18159,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_maskz_packus_epi16() {
+    const fn test_mm512_maskz_packus_epi16() {
         let a = _mm512_set1_epi16(-1);
         let b = _mm512_set1_epi16(1);
         let r = _mm512_maskz_packus_epi16(0, a, b);
@@ -17097,7 +18178,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_packus_epi16() {
+    const fn test_mm256_mask_packus_epi16() {
         let a = _mm256_set1_epi16(-1);
         let b = _mm256_set1_epi16(1 << 8 | 1);
         let r = _mm256_mask_packus_epi16(a, 0, a, b);
@@ -17110,7 +18191,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_maskz_packus_epi16() {
+    const fn test_mm256_maskz_packus_epi16() {
         let a = _mm256_set1_epi16(-1);
         let b = _mm256_set1_epi16(1);
         let r = _mm256_maskz_packus_epi16(0, a, b);
@@ -17123,7 +18204,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_packus_epi16() {
+    const fn test_mm_mask_packus_epi16() {
         let a = _mm_set1_epi16(-1);
         let b = _mm_set1_epi16(1 << 8 | 1);
         let r = _mm_mask_packus_epi16(a, 0, a, b);
@@ -17134,7 +18215,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_maskz_packus_epi16() {
+    const fn test_mm_maskz_packus_epi16() {
         let a = _mm_set1_epi16(-1);
         let b = _mm_set1_epi16(1);
         let r = _mm_maskz_packus_epi16(0, a, b);
@@ -17145,7 +18226,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_avg_epu16() {
+    const fn test_mm512_avg_epu16() {
         let a = _mm512_set1_epi16(1);
         let b = _mm512_set1_epi16(1);
         let r = _mm512_avg_epu16(a, b);
@@ -17154,7 +18235,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mask_avg_epu16() {
+    const fn test_mm512_mask_avg_epu16() {
         let a = _mm512_set1_epi16(1);
         let b = _mm512_set1_epi16(1);
         let r = _mm512_mask_avg_epu16(a, 0, a, b);
@@ -17167,7 +18248,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_maskz_avg_epu16() {
+    const fn test_mm512_maskz_avg_epu16() {
         let a = _mm512_set1_epi16(1);
         let b = _mm512_set1_epi16(1);
         let r = _mm512_maskz_avg_epu16(0, a, b);
@@ -17180,7 +18261,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_avg_epu16() {
+    const fn test_mm256_mask_avg_epu16() {
         let a = _mm256_set1_epi16(1);
         let b = _mm256_set1_epi16(1);
         let r = _mm256_mask_avg_epu16(a, 0, a, b);
@@ -17191,7 +18272,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_maskz_avg_epu16() {
+    const fn test_mm256_maskz_avg_epu16() {
         let a = _mm256_set1_epi16(1);
         let b = _mm256_set1_epi16(1);
         let r = _mm256_maskz_avg_epu16(0, a, b);
@@ -17202,7 +18283,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_avg_epu16() {
+    const fn test_mm_mask_avg_epu16() {
         let a = _mm_set1_epi16(1);
         let b = _mm_set1_epi16(1);
         let r = _mm_mask_avg_epu16(a, 0, a, b);
@@ -17213,7 +18294,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_maskz_avg_epu16() {
+    const fn test_mm_maskz_avg_epu16() {
         let a = _mm_set1_epi16(1);
         let b = _mm_set1_epi16(1);
         let r = _mm_maskz_avg_epu16(0, a, b);
@@ -17224,7 +18305,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_avg_epu8() {
+    const fn test_mm512_avg_epu8() {
         let a = _mm512_set1_epi8(1);
         let b = _mm512_set1_epi8(1);
         let r = _mm512_avg_epu8(a, b);
@@ -17233,7 +18314,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mask_avg_epu8() {
+    const fn test_mm512_mask_avg_epu8() {
         let a = _mm512_set1_epi8(1);
         let b = _mm512_set1_epi8(1);
         let r = _mm512_mask_avg_epu8(a, 0, a, b);
@@ -17253,7 +18334,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_maskz_avg_epu8() {
+    const fn test_mm512_maskz_avg_epu8() {
         let a = _mm512_set1_epi8(1);
         let b = _mm512_set1_epi8(1);
         let r = _mm512_maskz_avg_epu8(0, a, b);
@@ -17272,7 +18353,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_avg_epu8() {
+    const fn test_mm256_mask_avg_epu8() {
         let a = _mm256_set1_epi8(1);
         let b = _mm256_set1_epi8(1);
         let r = _mm256_mask_avg_epu8(a, 0, a, b);
@@ -17285,7 +18366,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_maskz_avg_epu8() {
+    const fn test_mm256_maskz_avg_epu8() {
         let a = _mm256_set1_epi8(1);
         let b = _mm256_set1_epi8(1);
         let r = _mm256_maskz_avg_epu8(0, a, b);
@@ -17298,7 +18379,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_avg_epu8() {
+    const fn test_mm_mask_avg_epu8() {
         let a = _mm_set1_epi8(1);
         let b = _mm_set1_epi8(1);
         let r = _mm_mask_avg_epu8(a, 0, a, b);
@@ -17309,7 +18390,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_maskz_avg_epu8() {
+    const fn test_mm_maskz_avg_epu8() {
         let a = _mm_set1_epi8(1);
         let b = _mm_set1_epi8(1);
         let r = _mm_maskz_avg_epu8(0, a, b);
@@ -17320,7 +18401,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_sll_epi16() {
+    fn test_mm512_sll_epi16() {
         let a = _mm512_set1_epi16(1 << 15);
         let count = _mm_set1_epi16(2);
         let r = _mm512_sll_epi16(a, count);
@@ -17329,7 +18410,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mask_sll_epi16() {
+    fn test_mm512_mask_sll_epi16() {
         let a = _mm512_set1_epi16(1 << 15);
         let count = _mm_set1_epi16(2);
         let r = _mm512_mask_sll_epi16(a, 0, a, count);
@@ -17340,7 +18421,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_maskz_sll_epi16() {
+    fn test_mm512_maskz_sll_epi16() {
         let a = _mm512_set1_epi16(1 << 15);
         let count = _mm_set1_epi16(2);
         let r = _mm512_maskz_sll_epi16(0, a, count);
@@ -17351,7 +18432,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_sll_epi16() {
+    fn test_mm256_mask_sll_epi16() {
         let a = _mm256_set1_epi16(1 << 15);
         let count = _mm_set1_epi16(2);
         let r = _mm256_mask_sll_epi16(a, 0, a, count);
@@ -17362,7 +18443,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_maskz_sll_epi16() {
+    fn test_mm256_maskz_sll_epi16() {
         let a = _mm256_set1_epi16(1 << 15);
         let count = _mm_set1_epi16(2);
         let r = _mm256_maskz_sll_epi16(0, a, count);
@@ -17373,7 +18454,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_sll_epi16() {
+    fn test_mm_mask_sll_epi16() {
         let a = _mm_set1_epi16(1 << 15);
         let count = _mm_set1_epi16(2);
         let r = _mm_mask_sll_epi16(a, 0, a, count);
@@ -17384,7 +18465,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_maskz_sll_epi16() {
+    fn test_mm_maskz_sll_epi16() {
         let a = _mm_set1_epi16(1 << 15);
         let count = _mm_set1_epi16(2);
         let r = _mm_maskz_sll_epi16(0, a, count);
@@ -17395,7 +18476,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_slli_epi16() {
+    const fn test_mm512_slli_epi16() {
         let a = _mm512_set1_epi16(1 << 15);
         let r = _mm512_slli_epi16::<1>(a);
         let e = _mm512_set1_epi16(0);
@@ -17403,7 +18484,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mask_slli_epi16() {
+    const fn test_mm512_mask_slli_epi16() {
         let a = _mm512_set1_epi16(1 << 15);
         let r = _mm512_mask_slli_epi16::<1>(a, 0, a);
         assert_eq_m512i(r, a);
@@ -17413,7 +18494,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_maskz_slli_epi16() {
+    const fn test_mm512_maskz_slli_epi16() {
         let a = _mm512_set1_epi16(1 << 15);
         let r = _mm512_maskz_slli_epi16::<1>(0, a);
         assert_eq_m512i(r, _mm512_setzero_si512());
@@ -17423,7 +18504,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_slli_epi16() {
+    const fn test_mm256_mask_slli_epi16() {
         let a = _mm256_set1_epi16(1 << 15);
         let r = _mm256_mask_slli_epi16::<1>(a, 0, a);
         assert_eq_m256i(r, a);
@@ -17433,7 +18514,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_maskz_slli_epi16() {
+    const fn test_mm256_maskz_slli_epi16() {
         let a = _mm256_set1_epi16(1 << 15);
         let r = _mm256_maskz_slli_epi16::<1>(0, a);
         assert_eq_m256i(r, _mm256_setzero_si256());
@@ -17443,7 +18524,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_slli_epi16() {
+    const fn test_mm_mask_slli_epi16() {
         let a = _mm_set1_epi16(1 << 15);
         let r = _mm_mask_slli_epi16::<1>(a, 0, a);
         assert_eq_m128i(r, a);
@@ -17453,7 +18534,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_maskz_slli_epi16() {
+    const fn test_mm_maskz_slli_epi16() {
         let a = _mm_set1_epi16(1 << 15);
         let r = _mm_maskz_slli_epi16::<1>(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -17463,7 +18544,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_sllv_epi16() {
+    const fn test_mm512_sllv_epi16() {
         let a = _mm512_set1_epi16(1 << 15);
         let count = _mm512_set1_epi16(2);
         let r = _mm512_sllv_epi16(a, count);
@@ -17472,7 +18553,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mask_sllv_epi16() {
+    const fn test_mm512_mask_sllv_epi16() {
         let a = _mm512_set1_epi16(1 << 15);
         let count = _mm512_set1_epi16(2);
         let r = _mm512_mask_sllv_epi16(a, 0, a, count);
@@ -17483,7 +18564,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_maskz_sllv_epi16() {
+    const fn test_mm512_maskz_sllv_epi16() {
         let a = _mm512_set1_epi16(1 << 15);
         let count = _mm512_set1_epi16(2);
         let r = _mm512_maskz_sllv_epi16(0, a, count);
@@ -17494,7 +18575,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_sllv_epi16() {
+    const fn test_mm256_sllv_epi16() {
         let a = _mm256_set1_epi16(1 << 15);
         let count = _mm256_set1_epi16(2);
         let r = _mm256_sllv_epi16(a, count);
@@ -17503,7 +18584,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_sllv_epi16() {
+    const fn test_mm256_mask_sllv_epi16() {
         let a = _mm256_set1_epi16(1 << 15);
         let count = _mm256_set1_epi16(2);
         let r = _mm256_mask_sllv_epi16(a, 0, a, count);
@@ -17514,7 +18595,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_maskz_sllv_epi16() {
+    const fn test_mm256_maskz_sllv_epi16() {
         let a = _mm256_set1_epi16(1 << 15);
         let count = _mm256_set1_epi16(2);
         let r = _mm256_maskz_sllv_epi16(0, a, count);
@@ -17525,7 +18606,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_sllv_epi16() {
+    const fn test_mm_sllv_epi16() {
         let a = _mm_set1_epi16(1 << 15);
         let count = _mm_set1_epi16(2);
         let r = _mm_sllv_epi16(a, count);
@@ -17534,7 +18615,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_sllv_epi16() {
+    const fn test_mm_mask_sllv_epi16() {
         let a = _mm_set1_epi16(1 << 15);
         let count = _mm_set1_epi16(2);
         let r = _mm_mask_sllv_epi16(a, 0, a, count);
@@ -17545,7 +18626,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_maskz_sllv_epi16() {
+    const fn test_mm_maskz_sllv_epi16() {
         let a = _mm_set1_epi16(1 << 15);
         let count = _mm_set1_epi16(2);
         let r = _mm_maskz_sllv_epi16(0, a, count);
@@ -17556,7 +18637,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_srl_epi16() {
+    fn test_mm512_srl_epi16() {
         let a = _mm512_set1_epi16(1 << 1);
         let count = _mm_set1_epi16(2);
         let r = _mm512_srl_epi16(a, count);
@@ -17565,7 +18646,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mask_srl_epi16() {
+    fn test_mm512_mask_srl_epi16() {
         let a = _mm512_set1_epi16(1 << 1);
         let count = _mm_set1_epi16(2);
         let r = _mm512_mask_srl_epi16(a, 0, a, count);
@@ -17576,7 +18657,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_maskz_srl_epi16() {
+    fn test_mm512_maskz_srl_epi16() {
         let a = _mm512_set1_epi16(1 << 1);
         let count = _mm_set1_epi16(2);
         let r = _mm512_maskz_srl_epi16(0, a, count);
@@ -17587,7 +18668,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_srl_epi16() {
+    fn test_mm256_mask_srl_epi16() {
         let a = _mm256_set1_epi16(1 << 1);
         let count = _mm_set1_epi16(2);
         let r = _mm256_mask_srl_epi16(a, 0, a, count);
@@ -17598,7 +18679,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_maskz_srl_epi16() {
+    fn test_mm256_maskz_srl_epi16() {
         let a = _mm256_set1_epi16(1 << 1);
         let count = _mm_set1_epi16(2);
         let r = _mm256_maskz_srl_epi16(0, a, count);
@@ -17609,7 +18690,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_srl_epi16() {
+    fn test_mm_mask_srl_epi16() {
         let a = _mm_set1_epi16(1 << 1);
         let count = _mm_set1_epi16(2);
         let r = _mm_mask_srl_epi16(a, 0, a, count);
@@ -17620,7 +18701,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_maskz_srl_epi16() {
+    fn test_mm_maskz_srl_epi16() {
         let a = _mm_set1_epi16(1 << 1);
         let count = _mm_set1_epi16(2);
         let r = _mm_maskz_srl_epi16(0, a, count);
@@ -17631,7 +18712,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_srli_epi16() {
+    const fn test_mm512_srli_epi16() {
         let a = _mm512_set1_epi16(1 << 1);
         let r = _mm512_srli_epi16::<2>(a);
         let e = _mm512_set1_epi16(0);
@@ -17639,7 +18720,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mask_srli_epi16() {
+    const fn test_mm512_mask_srli_epi16() {
         let a = _mm512_set1_epi16(1 << 1);
         let r = _mm512_mask_srli_epi16::<2>(a, 0, a);
         assert_eq_m512i(r, a);
@@ -17649,7 +18730,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_maskz_srli_epi16() {
+    const fn test_mm512_maskz_srli_epi16() {
         let a = _mm512_set1_epi16(1 << 1);
         let r = _mm512_maskz_srli_epi16::<2>(0, a);
         assert_eq_m512i(r, _mm512_setzero_si512());
@@ -17659,7 +18740,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_srli_epi16() {
+    const fn test_mm256_mask_srli_epi16() {
         let a = _mm256_set1_epi16(1 << 1);
         let r = _mm256_mask_srli_epi16::<2>(a, 0, a);
         assert_eq_m256i(r, a);
@@ -17669,7 +18750,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_maskz_srli_epi16() {
+    const fn test_mm256_maskz_srli_epi16() {
         let a = _mm256_set1_epi16(1 << 1);
         let r = _mm256_maskz_srli_epi16::<2>(0, a);
         assert_eq_m256i(r, _mm256_setzero_si256());
@@ -17679,7 +18760,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_srli_epi16() {
+    const fn test_mm_mask_srli_epi16() {
         let a = _mm_set1_epi16(1 << 1);
         let r = _mm_mask_srli_epi16::<2>(a, 0, a);
         assert_eq_m128i(r, a);
@@ -17689,7 +18770,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_maskz_srli_epi16() {
+    const fn test_mm_maskz_srli_epi16() {
         let a = _mm_set1_epi16(1 << 1);
         let r = _mm_maskz_srli_epi16::<2>(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -17699,7 +18780,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_srlv_epi16() {
+    const fn test_mm512_srlv_epi16() {
         let a = _mm512_set1_epi16(1 << 1);
         let count = _mm512_set1_epi16(2);
         let r = _mm512_srlv_epi16(a, count);
@@ -17708,7 +18789,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mask_srlv_epi16() {
+    const fn test_mm512_mask_srlv_epi16() {
         let a = _mm512_set1_epi16(1 << 1);
         let count = _mm512_set1_epi16(2);
         let r = _mm512_mask_srlv_epi16(a, 0, a, count);
@@ -17719,7 +18800,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_maskz_srlv_epi16() {
+    const fn test_mm512_maskz_srlv_epi16() {
         let a = _mm512_set1_epi16(1 << 1);
         let count = _mm512_set1_epi16(2);
         let r = _mm512_maskz_srlv_epi16(0, a, count);
@@ -17730,7 +18811,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_srlv_epi16() {
+    const fn test_mm256_srlv_epi16() {
         let a = _mm256_set1_epi16(1 << 1);
         let count = _mm256_set1_epi16(2);
         let r = _mm256_srlv_epi16(a, count);
@@ -17739,7 +18820,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_srlv_epi16() {
+    const fn test_mm256_mask_srlv_epi16() {
         let a = _mm256_set1_epi16(1 << 1);
         let count = _mm256_set1_epi16(2);
         let r = _mm256_mask_srlv_epi16(a, 0, a, count);
@@ -17750,7 +18831,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_maskz_srlv_epi16() {
+    const fn test_mm256_maskz_srlv_epi16() {
         let a = _mm256_set1_epi16(1 << 1);
         let count = _mm256_set1_epi16(2);
         let r = _mm256_maskz_srlv_epi16(0, a, count);
@@ -17761,7 +18842,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_srlv_epi16() {
+    const fn test_mm_srlv_epi16() {
         let a = _mm_set1_epi16(1 << 1);
         let count = _mm_set1_epi16(2);
         let r = _mm_srlv_epi16(a, count);
@@ -17770,7 +18851,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_srlv_epi16() {
+    const fn test_mm_mask_srlv_epi16() {
         let a = _mm_set1_epi16(1 << 1);
         let count = _mm_set1_epi16(2);
         let r = _mm_mask_srlv_epi16(a, 0, a, count);
@@ -17781,7 +18862,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_maskz_srlv_epi16() {
+    const fn test_mm_maskz_srlv_epi16() {
         let a = _mm_set1_epi16(1 << 1);
         let count = _mm_set1_epi16(2);
         let r = _mm_maskz_srlv_epi16(0, a, count);
@@ -17792,7 +18873,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_sra_epi16() {
+    fn test_mm512_sra_epi16() {
         let a = _mm512_set1_epi16(8);
         let count = _mm_set1_epi16(1);
         let r = _mm512_sra_epi16(a, count);
@@ -17801,7 +18882,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mask_sra_epi16() {
+    fn test_mm512_mask_sra_epi16() {
         let a = _mm512_set1_epi16(8);
         let count = _mm_set1_epi16(1);
         let r = _mm512_mask_sra_epi16(a, 0, a, count);
@@ -17812,7 +18893,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_maskz_sra_epi16() {
+    fn test_mm512_maskz_sra_epi16() {
         let a = _mm512_set1_epi16(8);
         let count = _mm_set1_epi16(1);
         let r = _mm512_maskz_sra_epi16(0, a, count);
@@ -17823,7 +18904,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_sra_epi16() {
+    fn test_mm256_mask_sra_epi16() {
         let a = _mm256_set1_epi16(8);
         let count = _mm_set1_epi16(1);
         let r = _mm256_mask_sra_epi16(a, 0, a, count);
@@ -17834,7 +18915,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_maskz_sra_epi16() {
+    fn test_mm256_maskz_sra_epi16() {
         let a = _mm256_set1_epi16(8);
         let count = _mm_set1_epi16(1);
         let r = _mm256_maskz_sra_epi16(0, a, count);
@@ -17845,7 +18926,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_sra_epi16() {
+    fn test_mm_mask_sra_epi16() {
         let a = _mm_set1_epi16(8);
         let count = _mm_set1_epi16(1);
         let r = _mm_mask_sra_epi16(a, 0, a, count);
@@ -17856,7 +18937,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_maskz_sra_epi16() {
+    fn test_mm_maskz_sra_epi16() {
         let a = _mm_set1_epi16(8);
         let count = _mm_set1_epi16(1);
         let r = _mm_maskz_sra_epi16(0, a, count);
@@ -17867,7 +18948,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_srai_epi16() {
+    const fn test_mm512_srai_epi16() {
         let a = _mm512_set1_epi16(8);
         let r = _mm512_srai_epi16::<2>(a);
         let e = _mm512_set1_epi16(2);
@@ -17875,7 +18956,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mask_srai_epi16() {
+    const fn test_mm512_mask_srai_epi16() {
         let a = _mm512_set1_epi16(8);
         let r = _mm512_mask_srai_epi16::<2>(a, 0, a);
         assert_eq_m512i(r, a);
@@ -17885,7 +18966,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_maskz_srai_epi16() {
+    const fn test_mm512_maskz_srai_epi16() {
         let a = _mm512_set1_epi16(8);
         let r = _mm512_maskz_srai_epi16::<2>(0, a);
         assert_eq_m512i(r, _mm512_setzero_si512());
@@ -17895,7 +18976,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_srai_epi16() {
+    const fn test_mm256_mask_srai_epi16() {
         let a = _mm256_set1_epi16(8);
         let r = _mm256_mask_srai_epi16::<2>(a, 0, a);
         assert_eq_m256i(r, a);
@@ -17905,7 +18986,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_maskz_srai_epi16() {
+    const fn test_mm256_maskz_srai_epi16() {
         let a = _mm256_set1_epi16(8);
         let r = _mm256_maskz_srai_epi16::<2>(0, a);
         assert_eq_m256i(r, _mm256_setzero_si256());
@@ -17915,7 +18996,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_srai_epi16() {
+    const fn test_mm_mask_srai_epi16() {
         let a = _mm_set1_epi16(8);
         let r = _mm_mask_srai_epi16::<2>(a, 0, a);
         assert_eq_m128i(r, a);
@@ -17925,7 +19006,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_maskz_srai_epi16() {
+    const fn test_mm_maskz_srai_epi16() {
         let a = _mm_set1_epi16(8);
         let r = _mm_maskz_srai_epi16::<2>(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -17935,7 +19016,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_srav_epi16() {
+    const fn test_mm512_srav_epi16() {
         let a = _mm512_set1_epi16(8);
         let count = _mm512_set1_epi16(2);
         let r = _mm512_srav_epi16(a, count);
@@ -17944,7 +19025,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mask_srav_epi16() {
+    const fn test_mm512_mask_srav_epi16() {
         let a = _mm512_set1_epi16(8);
         let count = _mm512_set1_epi16(2);
         let r = _mm512_mask_srav_epi16(a, 0, a, count);
@@ -17955,7 +19036,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_maskz_srav_epi16() {
+    const fn test_mm512_maskz_srav_epi16() {
         let a = _mm512_set1_epi16(8);
         let count = _mm512_set1_epi16(2);
         let r = _mm512_maskz_srav_epi16(0, a, count);
@@ -17966,7 +19047,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_srav_epi16() {
+    const fn test_mm256_srav_epi16() {
         let a = _mm256_set1_epi16(8);
         let count = _mm256_set1_epi16(2);
         let r = _mm256_srav_epi16(a, count);
@@ -17975,7 +19056,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_srav_epi16() {
+    const fn test_mm256_mask_srav_epi16() {
         let a = _mm256_set1_epi16(8);
         let count = _mm256_set1_epi16(2);
         let r = _mm256_mask_srav_epi16(a, 0, a, count);
@@ -17986,7 +19067,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_maskz_srav_epi16() {
+    const fn test_mm256_maskz_srav_epi16() {
         let a = _mm256_set1_epi16(8);
         let count = _mm256_set1_epi16(2);
         let r = _mm256_maskz_srav_epi16(0, a, count);
@@ -17997,7 +19078,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_srav_epi16() {
+    const fn test_mm_srav_epi16() {
         let a = _mm_set1_epi16(8);
         let count = _mm_set1_epi16(2);
         let r = _mm_srav_epi16(a, count);
@@ -18006,7 +19087,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_srav_epi16() {
+    const fn test_mm_mask_srav_epi16() {
         let a = _mm_set1_epi16(8);
         let count = _mm_set1_epi16(2);
         let r = _mm_mask_srav_epi16(a, 0, a, count);
@@ -18017,7 +19098,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_maskz_srav_epi16() {
+    const fn test_mm_maskz_srav_epi16() {
         let a = _mm_set1_epi16(8);
         let count = _mm_set1_epi16(2);
         let r = _mm_maskz_srav_epi16(0, a, count);
@@ -18028,7 +19109,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_permutex2var_epi16() {
+    fn test_mm512_permutex2var_epi16() {
         #[rustfmt::skip]
         let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
                                  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
@@ -18046,7 +19127,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mask_permutex2var_epi16() {
+    fn test_mm512_mask_permutex2var_epi16() {
         #[rustfmt::skip]
         let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
                                  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
@@ -18066,7 +19147,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_maskz_permutex2var_epi16() {
+    fn test_mm512_maskz_permutex2var_epi16() {
         #[rustfmt::skip]
         let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
                                  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
@@ -18086,7 +19167,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mask2_permutex2var_epi16() {
+    fn test_mm512_mask2_permutex2var_epi16() {
         #[rustfmt::skip]
         let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
                                  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
@@ -18106,7 +19187,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_permutex2var_epi16() {
+    fn test_mm256_permutex2var_epi16() {
         let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         #[rustfmt::skip]
         let idx = _mm256_set_epi16(1, 1<<4, 2, 1<<4, 3, 1<<4, 4, 1<<4, 5, 1<<4, 6, 1<<4, 7, 1<<4, 8, 1<<4);
@@ -18119,7 +19200,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_permutex2var_epi16() {
+    fn test_mm256_mask_permutex2var_epi16() {
         let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         #[rustfmt::skip]
         let idx = _mm256_set_epi16(1, 1<<4, 2, 1<<4, 3, 1<<4, 4, 1<<4, 5, 1<<4, 6, 1<<4, 7, 1<<4, 8, 1<<4);
@@ -18134,7 +19215,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_maskz_permutex2var_epi16() {
+    fn test_mm256_maskz_permutex2var_epi16() {
         let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         #[rustfmt::skip]
         let idx = _mm256_set_epi16(1, 1<<4, 2, 1<<4, 3, 1<<4, 4, 1<<4, 5, 1<<4, 6, 1<<4, 7, 1<<4, 8, 1<<4);
@@ -18149,7 +19230,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask2_permutex2var_epi16() {
+    fn test_mm256_mask2_permutex2var_epi16() {
         let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         #[rustfmt::skip]
         let idx = _mm256_set_epi16(1, 1<<4, 2, 1<<4, 3, 1<<4, 4, 1<<4, 5, 1<<4, 6, 1<<4, 7, 1<<4, 8, 1<<4);
@@ -18165,7 +19246,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_permutex2var_epi16() {
+    fn test_mm_permutex2var_epi16() {
         let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
         let idx = _mm_set_epi16(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
         let b = _mm_set1_epi16(100);
@@ -18175,7 +19256,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_permutex2var_epi16() {
+    fn test_mm_mask_permutex2var_epi16() {
         let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
         let idx = _mm_set_epi16(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
         let b = _mm_set1_epi16(100);
@@ -18187,7 +19268,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_maskz_permutex2var_epi16() {
+    fn test_mm_maskz_permutex2var_epi16() {
         let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
         let idx = _mm_set_epi16(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
         let b = _mm_set1_epi16(100);
@@ -18199,7 +19280,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask2_permutex2var_epi16() {
+    fn test_mm_mask2_permutex2var_epi16() {
         let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
         let idx = _mm_set_epi16(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
         let b = _mm_set1_epi16(100);
@@ -18211,7 +19292,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_permutexvar_epi16() {
+    fn test_mm512_permutexvar_epi16() {
         let idx = _mm512_set1_epi16(1);
         #[rustfmt::skip]
         let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
@@ -18222,7 +19303,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mask_permutexvar_epi16() {
+    fn test_mm512_mask_permutexvar_epi16() {
         let idx = _mm512_set1_epi16(1);
         #[rustfmt::skip]
         let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
@@ -18235,7 +19316,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_maskz_permutexvar_epi16() {
+    fn test_mm512_maskz_permutexvar_epi16() {
         let idx = _mm512_set1_epi16(1);
         #[rustfmt::skip]
         let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
@@ -18248,7 +19329,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_permutexvar_epi16() {
+    fn test_mm256_permutexvar_epi16() {
         let idx = _mm256_set1_epi16(1);
         let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let r = _mm256_permutexvar_epi16(idx, a);
@@ -18257,7 +19338,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_permutexvar_epi16() {
+    fn test_mm256_mask_permutexvar_epi16() {
         let idx = _mm256_set1_epi16(1);
         let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let r = _mm256_mask_permutexvar_epi16(a, 0, idx, a);
@@ -18268,7 +19349,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_maskz_permutexvar_epi16() {
+    fn test_mm256_maskz_permutexvar_epi16() {
         let idx = _mm256_set1_epi16(1);
         let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let r = _mm256_maskz_permutexvar_epi16(0, idx, a);
@@ -18279,7 +19360,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_permutexvar_epi16() {
+    fn test_mm_permutexvar_epi16() {
         let idx = _mm_set1_epi16(1);
         let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
         let r = _mm_permutexvar_epi16(idx, a);
@@ -18288,7 +19369,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_permutexvar_epi16() {
+    fn test_mm_mask_permutexvar_epi16() {
         let idx = _mm_set1_epi16(1);
         let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
         let r = _mm_mask_permutexvar_epi16(a, 0, idx, a);
@@ -18299,7 +19380,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_maskz_permutexvar_epi16() {
+    fn test_mm_maskz_permutexvar_epi16() {
         let idx = _mm_set1_epi16(1);
         let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
         let r = _mm_maskz_permutexvar_epi16(0, idx, a);
@@ -18310,7 +19391,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mask_blend_epi16() {
+    const fn test_mm512_mask_blend_epi16() {
         let a = _mm512_set1_epi16(1);
         let b = _mm512_set1_epi16(2);
         let r = _mm512_mask_blend_epi16(0b11111111_00000000_11111111_00000000, a, b);
@@ -18321,7 +19402,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_blend_epi16() {
+    const fn test_mm256_mask_blend_epi16() {
         let a = _mm256_set1_epi16(1);
         let b = _mm256_set1_epi16(2);
         let r = _mm256_mask_blend_epi16(0b11111111_00000000, a, b);
@@ -18330,7 +19411,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_blend_epi16() {
+    const fn test_mm_mask_blend_epi16() {
         let a = _mm_set1_epi16(1);
         let b = _mm_set1_epi16(2);
         let r = _mm_mask_blend_epi16(0b11110000, a, b);
@@ -18339,7 +19420,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mask_blend_epi8() {
+    const fn test_mm512_mask_blend_epi8() {
         let a = _mm512_set1_epi8(1);
         let b = _mm512_set1_epi8(2);
         let r = _mm512_mask_blend_epi8(
@@ -18356,7 +19437,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_blend_epi8() {
+    const fn test_mm256_mask_blend_epi8() {
         let a = _mm256_set1_epi8(1);
         let b = _mm256_set1_epi8(2);
         let r = _mm256_mask_blend_epi8(0b11111111_00000000_11111111_00000000, a, b);
@@ -18367,7 +19448,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_blend_epi8() {
+    const fn test_mm_mask_blend_epi8() {
         let a = _mm_set1_epi8(1);
         let b = _mm_set1_epi8(2);
         let r = _mm_mask_blend_epi8(0b11111111_00000000, a, b);
@@ -18376,7 +19457,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_broadcastw_epi16() {
+    const fn test_mm512_broadcastw_epi16() {
         let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
         let r = _mm512_broadcastw_epi16(a);
         let e = _mm512_set1_epi16(24);
@@ -18384,7 +19465,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mask_broadcastw_epi16() {
+    const fn test_mm512_mask_broadcastw_epi16() {
         let src = _mm512_set1_epi16(1);
         let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
         let r = _mm512_mask_broadcastw_epi16(src, 0, a);
@@ -18395,7 +19476,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_maskz_broadcastw_epi16() {
+    const fn test_mm512_maskz_broadcastw_epi16() {
         let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
         let r = _mm512_maskz_broadcastw_epi16(0, a);
         assert_eq_m512i(r, _mm512_setzero_si512());
@@ -18405,7 +19486,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_broadcastw_epi16() {
+    const fn test_mm256_mask_broadcastw_epi16() {
         let src = _mm256_set1_epi16(1);
         let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
         let r = _mm256_mask_broadcastw_epi16(src, 0, a);
@@ -18416,7 +19497,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_maskz_broadcastw_epi16() {
+    const fn test_mm256_maskz_broadcastw_epi16() {
         let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
         let r = _mm256_maskz_broadcastw_epi16(0, a);
         assert_eq_m256i(r, _mm256_setzero_si256());
@@ -18426,7 +19507,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_broadcastw_epi16() {
+    const fn test_mm_mask_broadcastw_epi16() {
         let src = _mm_set1_epi16(1);
         let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
         let r = _mm_mask_broadcastw_epi16(src, 0, a);
@@ -18437,7 +19518,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_maskz_broadcastw_epi16() {
+    const fn test_mm_maskz_broadcastw_epi16() {
         let a = _mm_set_epi16(17, 18, 19, 20, 21, 22, 23, 24);
         let r = _mm_maskz_broadcastw_epi16(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -18447,7 +19528,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_broadcastb_epi8() {
+    const fn test_mm512_broadcastb_epi8() {
         let a = _mm_set_epi8(
             17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
         );
@@ -18457,7 +19538,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mask_broadcastb_epi8() {
+    const fn test_mm512_mask_broadcastb_epi8() {
         let src = _mm512_set1_epi8(1);
         let a = _mm_set_epi8(
             17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
@@ -18474,7 +19555,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_maskz_broadcastb_epi8() {
+    const fn test_mm512_maskz_broadcastb_epi8() {
         let a = _mm_set_epi8(
             17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
         );
@@ -18489,7 +19570,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_broadcastb_epi8() {
+    const fn test_mm256_mask_broadcastb_epi8() {
         let src = _mm256_set1_epi8(1);
         let a = _mm_set_epi8(
             17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
@@ -18502,7 +19583,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_maskz_broadcastb_epi8() {
+    const fn test_mm256_maskz_broadcastb_epi8() {
         let a = _mm_set_epi8(
             17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
         );
@@ -18514,7 +19595,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_broadcastb_epi8() {
+    const fn test_mm_mask_broadcastb_epi8() {
         let src = _mm_set1_epi8(1);
         let a = _mm_set_epi8(
             17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
@@ -18527,7 +19608,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_maskz_broadcastb_epi8() {
+    const fn test_mm_maskz_broadcastb_epi8() {
         let a = _mm_set_epi8(
             17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
         );
@@ -18539,7 +19620,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_unpackhi_epi16() {
+    const fn test_mm512_unpackhi_epi16() {
         #[rustfmt::skip]
         let a = _mm512_set_epi16(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
                                  17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
@@ -18554,7 +19635,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mask_unpackhi_epi16() {
+    const fn test_mm512_mask_unpackhi_epi16() {
         #[rustfmt::skip]
         let a = _mm512_set_epi16(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
                                  17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
@@ -18571,7 +19652,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_maskz_unpackhi_epi16() {
+    const fn test_mm512_maskz_unpackhi_epi16() {
         #[rustfmt::skip]
         let a = _mm512_set_epi16(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
                                  17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
@@ -18588,7 +19669,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_unpackhi_epi16() {
+    const fn test_mm256_mask_unpackhi_epi16() {
         let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
         let b = _mm256_set_epi16(
             33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
@@ -18601,7 +19682,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_maskz_unpackhi_epi16() {
+    const fn test_mm256_maskz_unpackhi_epi16() {
         let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
         let b = _mm256_set_epi16(
             33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
@@ -18614,7 +19695,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_unpackhi_epi16() {
+    const fn test_mm_mask_unpackhi_epi16() {
         let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
         let b = _mm_set_epi16(33, 34, 35, 36, 37, 38, 39, 40);
         let r = _mm_mask_unpackhi_epi16(a, 0, a, b);
@@ -18625,7 +19706,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_maskz_unpackhi_epi16() {
+    const fn test_mm_maskz_unpackhi_epi16() {
         let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
         let b = _mm_set_epi16(33, 34, 35, 36, 37, 38, 39, 40);
         let r = _mm_maskz_unpackhi_epi16(0, a, b);
@@ -18636,7 +19717,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_unpackhi_epi8() {
+    const fn test_mm512_unpackhi_epi8() {
         #[rustfmt::skip]
         let a = _mm512_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
                                 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
@@ -18657,7 +19738,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mask_unpackhi_epi8() {
+    const fn test_mm512_mask_unpackhi_epi8() {
         #[rustfmt::skip]
         let a = _mm512_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
                                 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
@@ -18685,7 +19766,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_maskz_unpackhi_epi8() {
+    const fn test_mm512_maskz_unpackhi_epi8() {
         #[rustfmt::skip]
         let a = _mm512_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
                                 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
@@ -18712,7 +19793,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_unpackhi_epi8() {
+    const fn test_mm256_mask_unpackhi_epi8() {
         #[rustfmt::skip]
         let a = _mm256_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
                                 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
@@ -18729,7 +19810,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_maskz_unpackhi_epi8() {
+    const fn test_mm256_maskz_unpackhi_epi8() {
         #[rustfmt::skip]
         let a = _mm256_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
                                 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
@@ -18746,7 +19827,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_unpackhi_epi8() {
+    const fn test_mm_mask_unpackhi_epi8() {
         let a = _mm_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
         let b = _mm_set_epi8(
             65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
@@ -18759,7 +19840,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_maskz_unpackhi_epi8() {
+    const fn test_mm_maskz_unpackhi_epi8() {
         let a = _mm_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
         let b = _mm_set_epi8(
             65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
@@ -18772,7 +19853,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_unpacklo_epi16() {
+    const fn test_mm512_unpacklo_epi16() {
         #[rustfmt::skip]
         let a = _mm512_set_epi16(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
                                  17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
@@ -18787,7 +19868,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mask_unpacklo_epi16() {
+    const fn test_mm512_mask_unpacklo_epi16() {
         #[rustfmt::skip]
         let a = _mm512_set_epi16(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
                                  17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
@@ -18804,7 +19885,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_maskz_unpacklo_epi16() {
+    const fn test_mm512_maskz_unpacklo_epi16() {
         #[rustfmt::skip]
         let a = _mm512_set_epi16(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
                                  17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
@@ -18821,7 +19902,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_unpacklo_epi16() {
+    const fn test_mm256_mask_unpacklo_epi16() {
         let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
         let b = _mm256_set_epi16(
             33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
@@ -18834,7 +19915,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_maskz_unpacklo_epi16() {
+    const fn test_mm256_maskz_unpacklo_epi16() {
         let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
         let b = _mm256_set_epi16(
             33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
@@ -18847,7 +19928,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_unpacklo_epi16() {
+    const fn test_mm_mask_unpacklo_epi16() {
         let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
         let b = _mm_set_epi16(33, 34, 35, 36, 37, 38, 39, 40);
         let r = _mm_mask_unpacklo_epi16(a, 0, a, b);
@@ -18858,7 +19939,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_maskz_unpacklo_epi16() {
+    const fn test_mm_maskz_unpacklo_epi16() {
         let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
         let b = _mm_set_epi16(33, 34, 35, 36, 37, 38, 39, 40);
         let r = _mm_maskz_unpacklo_epi16(0, a, b);
@@ -18869,7 +19950,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_unpacklo_epi8() {
+    const fn test_mm512_unpacklo_epi8() {
         #[rustfmt::skip]
         let a = _mm512_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
                                 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
@@ -18890,7 +19971,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mask_unpacklo_epi8() {
+    const fn test_mm512_mask_unpacklo_epi8() {
         #[rustfmt::skip]
         let a = _mm512_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
                                 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
@@ -18918,7 +19999,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_maskz_unpacklo_epi8() {
+    const fn test_mm512_maskz_unpacklo_epi8() {
         #[rustfmt::skip]
         let a = _mm512_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
                                 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
@@ -18945,7 +20026,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_unpacklo_epi8() {
+    const fn test_mm256_mask_unpacklo_epi8() {
         #[rustfmt::skip]
         let a = _mm256_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
                                 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
@@ -18962,7 +20043,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_maskz_unpacklo_epi8() {
+    const fn test_mm256_maskz_unpacklo_epi8() {
         #[rustfmt::skip]
         let a = _mm256_set_epi8(1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16,
                                 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32);
@@ -18979,7 +20060,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_unpacklo_epi8() {
+    const fn test_mm_mask_unpacklo_epi8() {
         let a = _mm_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
         let b = _mm_set_epi8(
             65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
@@ -18994,7 +20075,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_maskz_unpacklo_epi8() {
+    const fn test_mm_maskz_unpacklo_epi8() {
         let a = _mm_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
         let b = _mm_set_epi8(
             65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
@@ -19009,7 +20090,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mask_mov_epi16() {
+    const fn test_mm512_mask_mov_epi16() {
         let src = _mm512_set1_epi16(1);
         let a = _mm512_set1_epi16(2);
         let r = _mm512_mask_mov_epi16(src, 0, a);
@@ -19019,7 +20100,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_maskz_mov_epi16() {
+    const fn test_mm512_maskz_mov_epi16() {
         let a = _mm512_set1_epi16(2);
         let r = _mm512_maskz_mov_epi16(0, a);
         assert_eq_m512i(r, _mm512_setzero_si512());
@@ -19028,7 +20109,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_mov_epi16() {
+    const fn test_mm256_mask_mov_epi16() {
         let src = _mm256_set1_epi16(1);
         let a = _mm256_set1_epi16(2);
         let r = _mm256_mask_mov_epi16(src, 0, a);
@@ -19038,7 +20119,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_maskz_mov_epi16() {
+    const fn test_mm256_maskz_mov_epi16() {
         let a = _mm256_set1_epi16(2);
         let r = _mm256_maskz_mov_epi16(0, a);
         assert_eq_m256i(r, _mm256_setzero_si256());
@@ -19047,7 +20128,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_mov_epi16() {
+    const fn test_mm_mask_mov_epi16() {
         let src = _mm_set1_epi16(1);
         let a = _mm_set1_epi16(2);
         let r = _mm_mask_mov_epi16(src, 0, a);
@@ -19057,7 +20138,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_maskz_mov_epi16() {
+    const fn test_mm_maskz_mov_epi16() {
         let a = _mm_set1_epi16(2);
         let r = _mm_maskz_mov_epi16(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -19066,7 +20147,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mask_mov_epi8() {
+    const fn test_mm512_mask_mov_epi8() {
         let src = _mm512_set1_epi8(1);
         let a = _mm512_set1_epi8(2);
         let r = _mm512_mask_mov_epi8(src, 0, a);
@@ -19080,7 +20161,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_maskz_mov_epi8() {
+    const fn test_mm512_maskz_mov_epi8() {
         let a = _mm512_set1_epi8(2);
         let r = _mm512_maskz_mov_epi8(0, a);
         assert_eq_m512i(r, _mm512_setzero_si512());
@@ -19092,7 +20173,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_mov_epi8() {
+    const fn test_mm256_mask_mov_epi8() {
         let src = _mm256_set1_epi8(1);
         let a = _mm256_set1_epi8(2);
         let r = _mm256_mask_mov_epi8(src, 0, a);
@@ -19102,7 +20183,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_maskz_mov_epi8() {
+    const fn test_mm256_maskz_mov_epi8() {
         let a = _mm256_set1_epi8(2);
         let r = _mm256_maskz_mov_epi8(0, a);
         assert_eq_m256i(r, _mm256_setzero_si256());
@@ -19111,7 +20192,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_mov_epi8() {
+    const fn test_mm_mask_mov_epi8() {
         let src = _mm_set1_epi8(1);
         let a = _mm_set1_epi8(2);
         let r = _mm_mask_mov_epi8(src, 0, a);
@@ -19121,7 +20202,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_maskz_mov_epi8() {
+    const fn test_mm_maskz_mov_epi8() {
         let a = _mm_set1_epi8(2);
         let r = _mm_maskz_mov_epi8(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -19130,7 +20211,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mask_set1_epi16() {
+    const fn test_mm512_mask_set1_epi16() {
         let src = _mm512_set1_epi16(2);
         let a: i16 = 11;
         let r = _mm512_mask_set1_epi16(src, 0, a);
@@ -19141,7 +20222,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_maskz_set1_epi16() {
+    const fn test_mm512_maskz_set1_epi16() {
         let a: i16 = 11;
         let r = _mm512_maskz_set1_epi16(0, a);
         assert_eq_m512i(r, _mm512_setzero_si512());
@@ -19151,7 +20232,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_set1_epi16() {
+    const fn test_mm256_mask_set1_epi16() {
         let src = _mm256_set1_epi16(2);
         let a: i16 = 11;
         let r = _mm256_mask_set1_epi16(src, 0, a);
@@ -19162,7 +20243,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_maskz_set1_epi16() {
+    const fn test_mm256_maskz_set1_epi16() {
         let a: i16 = 11;
         let r = _mm256_maskz_set1_epi16(0, a);
         assert_eq_m256i(r, _mm256_setzero_si256());
@@ -19172,7 +20253,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_set1_epi16() {
+    const fn test_mm_mask_set1_epi16() {
         let src = _mm_set1_epi16(2);
         let a: i16 = 11;
         let r = _mm_mask_set1_epi16(src, 0, a);
@@ -19183,7 +20264,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_maskz_set1_epi16() {
+    const fn test_mm_maskz_set1_epi16() {
         let a: i16 = 11;
         let r = _mm_maskz_set1_epi16(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -19193,7 +20274,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mask_set1_epi8() {
+    const fn test_mm512_mask_set1_epi8() {
         let src = _mm512_set1_epi8(2);
         let a: i8 = 11;
         let r = _mm512_mask_set1_epi8(src, 0, a);
@@ -19208,7 +20289,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_maskz_set1_epi8() {
+    const fn test_mm512_maskz_set1_epi8() {
         let a: i8 = 11;
         let r = _mm512_maskz_set1_epi8(0, a);
         assert_eq_m512i(r, _mm512_setzero_si512());
@@ -19221,7 +20302,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_set1_epi8() {
+    const fn test_mm256_mask_set1_epi8() {
         let src = _mm256_set1_epi8(2);
         let a: i8 = 11;
         let r = _mm256_mask_set1_epi8(src, 0, a);
@@ -19232,7 +20313,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_maskz_set1_epi8() {
+    const fn test_mm256_maskz_set1_epi8() {
         let a: i8 = 11;
         let r = _mm256_maskz_set1_epi8(0, a);
         assert_eq_m256i(r, _mm256_setzero_si256());
@@ -19242,7 +20323,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_set1_epi8() {
+    const fn test_mm_mask_set1_epi8() {
         let src = _mm_set1_epi8(2);
         let a: i8 = 11;
         let r = _mm_mask_set1_epi8(src, 0, a);
@@ -19253,7 +20334,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_maskz_set1_epi8() {
+    const fn test_mm_maskz_set1_epi8() {
         let a: i8 = 11;
         let r = _mm_maskz_set1_epi8(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -19263,7 +20344,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_shufflelo_epi16() {
+    const fn test_mm512_shufflelo_epi16() {
         #[rustfmt::skip]
         let a = _mm512_set_epi16(
             0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
@@ -19279,7 +20360,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mask_shufflelo_epi16() {
+    const fn test_mm512_mask_shufflelo_epi16() {
         #[rustfmt::skip]
         let a = _mm512_set_epi16(
             0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
@@ -19301,7 +20382,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_maskz_shufflelo_epi16() {
+    const fn test_mm512_maskz_shufflelo_epi16() {
         #[rustfmt::skip]
         let a = _mm512_set_epi16(
             0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
@@ -19320,7 +20401,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_shufflelo_epi16() {
+    const fn test_mm256_mask_shufflelo_epi16() {
         let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let r = _mm256_mask_shufflelo_epi16::<0b00_01_01_11>(a, 0, a);
         assert_eq_m256i(r, a);
@@ -19330,7 +20411,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_maskz_shufflelo_epi16() {
+    const fn test_mm256_maskz_shufflelo_epi16() {
         let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let r = _mm256_maskz_shufflelo_epi16::<0b00_01_01_11>(0, a);
         assert_eq_m256i(r, _mm256_setzero_si256());
@@ -19340,7 +20421,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_shufflelo_epi16() {
+    const fn test_mm_mask_shufflelo_epi16() {
         let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
         let r = _mm_mask_shufflelo_epi16::<0b00_01_01_11>(a, 0, a);
         assert_eq_m128i(r, a);
@@ -19350,7 +20431,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_maskz_shufflelo_epi16() {
+    const fn test_mm_maskz_shufflelo_epi16() {
         let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
         let r = _mm_maskz_shufflelo_epi16::<0b00_01_01_11>(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -19360,7 +20441,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_shufflehi_epi16() {
+    const fn test_mm512_shufflehi_epi16() {
         #[rustfmt::skip]
         let a = _mm512_set_epi16(
             0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
@@ -19376,7 +20457,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mask_shufflehi_epi16() {
+    const fn test_mm512_mask_shufflehi_epi16() {
         #[rustfmt::skip]
         let a = _mm512_set_epi16(
             0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
@@ -19398,7 +20479,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_maskz_shufflehi_epi16() {
+    const fn test_mm512_maskz_shufflehi_epi16() {
         #[rustfmt::skip]
         let a = _mm512_set_epi16(
             0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
@@ -19417,7 +20498,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_shufflehi_epi16() {
+    const fn test_mm256_mask_shufflehi_epi16() {
         let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let r = _mm256_mask_shufflehi_epi16::<0b00_01_01_11>(a, 0, a);
         assert_eq_m256i(r, a);
@@ -19427,7 +20508,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_maskz_shufflehi_epi16() {
+    const fn test_mm256_maskz_shufflehi_epi16() {
         let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let r = _mm256_maskz_shufflehi_epi16::<0b00_01_01_11>(0, a);
         assert_eq_m256i(r, _mm256_setzero_si256());
@@ -19437,7 +20518,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_shufflehi_epi16() {
+    const fn test_mm_mask_shufflehi_epi16() {
         let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
         let r = _mm_mask_shufflehi_epi16::<0b00_01_01_11>(a, 0, a);
         assert_eq_m128i(r, a);
@@ -19447,7 +20528,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_maskz_shufflehi_epi16() {
+    const fn test_mm_maskz_shufflehi_epi16() {
         let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
         let r = _mm_maskz_shufflehi_epi16::<0b00_01_01_11>(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -19457,7 +20538,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_shuffle_epi8() {
+    fn test_mm512_shuffle_epi8() {
         #[rustfmt::skip]
         let a = _mm512_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
@@ -19474,7 +20555,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mask_shuffle_epi8() {
+    fn test_mm512_mask_shuffle_epi8() {
         #[rustfmt::skip]
         let a = _mm512_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
@@ -19498,7 +20579,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_maskz_shuffle_epi8() {
+    fn test_mm512_maskz_shuffle_epi8() {
         #[rustfmt::skip]
         let a = _mm512_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
@@ -19521,7 +20602,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_shuffle_epi8() {
+    fn test_mm256_mask_shuffle_epi8() {
         #[rustfmt::skip]
         let a = _mm256_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
@@ -19536,7 +20617,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_maskz_shuffle_epi8() {
+    fn test_mm256_maskz_shuffle_epi8() {
         #[rustfmt::skip]
         let a = _mm256_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
@@ -19551,7 +20632,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_shuffle_epi8() {
+    fn test_mm_mask_shuffle_epi8() {
         let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let b = _mm_set1_epi8(1);
         let r = _mm_mask_shuffle_epi8(a, 0, a, b);
@@ -19564,7 +20645,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_maskz_shuffle_epi8() {
+    fn test_mm_maskz_shuffle_epi8() {
         #[rustfmt::skip]
         let a = _mm_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15);
         let b = _mm_set1_epi8(1);
@@ -19578,7 +20659,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_test_epi16_mask() {
+    const fn test_mm512_test_epi16_mask() {
         let a = _mm512_set1_epi16(1 << 0);
         let b = _mm512_set1_epi16(1 << 0 | 1 << 1);
         let r = _mm512_test_epi16_mask(a, b);
@@ -19587,7 +20668,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mask_test_epi16_mask() {
+    const fn test_mm512_mask_test_epi16_mask() {
         let a = _mm512_set1_epi16(1 << 0);
         let b = _mm512_set1_epi16(1 << 0 | 1 << 1);
         let r = _mm512_mask_test_epi16_mask(0, a, b);
@@ -19598,7 +20679,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_test_epi16_mask() {
+    const fn test_mm256_test_epi16_mask() {
         let a = _mm256_set1_epi16(1 << 0);
         let b = _mm256_set1_epi16(1 << 0 | 1 << 1);
         let r = _mm256_test_epi16_mask(a, b);
@@ -19607,7 +20688,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_test_epi16_mask() {
+    const fn test_mm256_mask_test_epi16_mask() {
         let a = _mm256_set1_epi16(1 << 0);
         let b = _mm256_set1_epi16(1 << 0 | 1 << 1);
         let r = _mm256_mask_test_epi16_mask(0, a, b);
@@ -19618,7 +20699,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_test_epi16_mask() {
+    const fn test_mm_test_epi16_mask() {
         let a = _mm_set1_epi16(1 << 0);
         let b = _mm_set1_epi16(1 << 0 | 1 << 1);
         let r = _mm_test_epi16_mask(a, b);
@@ -19627,7 +20708,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_test_epi16_mask() {
+    const fn test_mm_mask_test_epi16_mask() {
         let a = _mm_set1_epi16(1 << 0);
         let b = _mm_set1_epi16(1 << 0 | 1 << 1);
         let r = _mm_mask_test_epi16_mask(0, a, b);
@@ -19638,7 +20719,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_test_epi8_mask() {
+    const fn test_mm512_test_epi8_mask() {
         let a = _mm512_set1_epi8(1 << 0);
         let b = _mm512_set1_epi8(1 << 0 | 1 << 1);
         let r = _mm512_test_epi8_mask(a, b);
@@ -19648,7 +20729,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mask_test_epi8_mask() {
+    const fn test_mm512_mask_test_epi8_mask() {
         let a = _mm512_set1_epi8(1 << 0);
         let b = _mm512_set1_epi8(1 << 0 | 1 << 1);
         let r = _mm512_mask_test_epi8_mask(0, a, b);
@@ -19664,7 +20745,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_test_epi8_mask() {
+    const fn test_mm256_test_epi8_mask() {
         let a = _mm256_set1_epi8(1 << 0);
         let b = _mm256_set1_epi8(1 << 0 | 1 << 1);
         let r = _mm256_test_epi8_mask(a, b);
@@ -19673,7 +20754,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_test_epi8_mask() {
+    const fn test_mm256_mask_test_epi8_mask() {
         let a = _mm256_set1_epi8(1 << 0);
         let b = _mm256_set1_epi8(1 << 0 | 1 << 1);
         let r = _mm256_mask_test_epi8_mask(0, a, b);
@@ -19684,7 +20765,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_test_epi8_mask() {
+    const fn test_mm_test_epi8_mask() {
         let a = _mm_set1_epi8(1 << 0);
         let b = _mm_set1_epi8(1 << 0 | 1 << 1);
         let r = _mm_test_epi8_mask(a, b);
@@ -19693,7 +20774,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_test_epi8_mask() {
+    const fn test_mm_mask_test_epi8_mask() {
         let a = _mm_set1_epi8(1 << 0);
         let b = _mm_set1_epi8(1 << 0 | 1 << 1);
         let r = _mm_mask_test_epi8_mask(0, a, b);
@@ -19704,7 +20785,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_testn_epi16_mask() {
+    const fn test_mm512_testn_epi16_mask() {
         let a = _mm512_set1_epi16(1 << 0);
         let b = _mm512_set1_epi16(1 << 0 | 1 << 1);
         let r = _mm512_testn_epi16_mask(a, b);
@@ -19713,7 +20794,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mask_testn_epi16_mask() {
+    const fn test_mm512_mask_testn_epi16_mask() {
         let a = _mm512_set1_epi16(1 << 0);
         let b = _mm512_set1_epi16(1 << 0 | 1 << 1);
         let r = _mm512_mask_testn_epi16_mask(0, a, b);
@@ -19724,7 +20805,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_testn_epi16_mask() {
+    const fn test_mm256_testn_epi16_mask() {
         let a = _mm256_set1_epi16(1 << 0);
         let b = _mm256_set1_epi16(1 << 0 | 1 << 1);
         let r = _mm256_testn_epi16_mask(a, b);
@@ -19733,7 +20814,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_testn_epi16_mask() {
+    const fn test_mm256_mask_testn_epi16_mask() {
         let a = _mm256_set1_epi16(1 << 0);
         let b = _mm256_set1_epi16(1 << 0 | 1 << 1);
         let r = _mm256_mask_testn_epi16_mask(0, a, b);
@@ -19744,7 +20825,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_testn_epi16_mask() {
+    const fn test_mm_testn_epi16_mask() {
         let a = _mm_set1_epi16(1 << 0);
         let b = _mm_set1_epi16(1 << 0 | 1 << 1);
         let r = _mm_testn_epi16_mask(a, b);
@@ -19753,7 +20834,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_testn_epi16_mask() {
+    const fn test_mm_mask_testn_epi16_mask() {
         let a = _mm_set1_epi16(1 << 0);
         let b = _mm_set1_epi16(1 << 0 | 1 << 1);
         let r = _mm_mask_testn_epi16_mask(0, a, b);
@@ -19764,7 +20845,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_testn_epi8_mask() {
+    const fn test_mm512_testn_epi8_mask() {
         let a = _mm512_set1_epi8(1 << 0);
         let b = _mm512_set1_epi8(1 << 0 | 1 << 1);
         let r = _mm512_testn_epi8_mask(a, b);
@@ -19774,7 +20855,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mask_testn_epi8_mask() {
+    const fn test_mm512_mask_testn_epi8_mask() {
         let a = _mm512_set1_epi8(1 << 0);
         let b = _mm512_set1_epi8(1 << 0 | 1 << 1);
         let r = _mm512_mask_testn_epi8_mask(0, a, b);
@@ -19790,7 +20871,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_testn_epi8_mask() {
+    const fn test_mm256_testn_epi8_mask() {
         let a = _mm256_set1_epi8(1 << 0);
         let b = _mm256_set1_epi8(1 << 0 | 1 << 1);
         let r = _mm256_testn_epi8_mask(a, b);
@@ -19799,7 +20880,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_testn_epi8_mask() {
+    const fn test_mm256_mask_testn_epi8_mask() {
         let a = _mm256_set1_epi8(1 << 0);
         let b = _mm256_set1_epi8(1 << 0 | 1 << 1);
         let r = _mm256_mask_testn_epi8_mask(0, a, b);
@@ -19810,7 +20891,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_testn_epi8_mask() {
+    const fn test_mm_testn_epi8_mask() {
         let a = _mm_set1_epi8(1 << 0);
         let b = _mm_set1_epi8(1 << 0 | 1 << 1);
         let r = _mm_testn_epi8_mask(a, b);
@@ -19819,7 +20900,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_testn_epi8_mask() {
+    const fn test_mm_mask_testn_epi8_mask() {
         let a = _mm_set1_epi8(1 << 0);
         let b = _mm_set1_epi8(1 << 0 | 1 << 1);
         let r = _mm_mask_testn_epi8_mask(0, a, b);
@@ -19830,42 +20911,46 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_store_mask64() {
+    const fn test_store_mask64() {
         let a: __mmask64 =
             0b11111111_00000000_11111111_00000000_11111111_00000000_11111111_00000000;
         let mut r = 0;
-        _store_mask64(&mut r, a);
+        unsafe {
+            _store_mask64(&mut r, a);
+        }
         assert_eq!(r, a);
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_store_mask32() {
+    const fn test_store_mask32() {
         let a: __mmask32 = 0b11111111_00000000_11111111_00000000;
         let mut r = 0;
-        _store_mask32(&mut r, a);
+        unsafe {
+            _store_mask32(&mut r, a);
+        }
         assert_eq!(r, a);
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_load_mask64() {
+    const fn test_load_mask64() {
         let p: __mmask64 =
             0b11111111_00000000_11111111_00000000_11111111_00000000_11111111_00000000;
-        let r = _load_mask64(&p);
+        let r = unsafe { _load_mask64(&p) };
         let e: __mmask64 =
             0b11111111_00000000_11111111_00000000_11111111_00000000_11111111_00000000;
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_load_mask32() {
+    const fn test_load_mask32() {
         let p: __mmask32 = 0b11111111_00000000_11111111_00000000;
-        let r = _load_mask32(&p);
+        let r = unsafe { _load_mask32(&p) };
         let e: __mmask32 = 0b11111111_00000000_11111111_00000000;
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_sad_epu8() {
+    fn test_mm512_sad_epu8() {
         let a = _mm512_set1_epi8(2);
         let b = _mm512_set1_epi8(4);
         let r = _mm512_sad_epu8(a, b);
@@ -19874,7 +20959,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_dbsad_epu8() {
+    fn test_mm512_dbsad_epu8() {
         let a = _mm512_set1_epi8(2);
         let b = _mm512_set1_epi8(4);
         let r = _mm512_dbsad_epu8::<0>(a, b);
@@ -19883,7 +20968,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mask_dbsad_epu8() {
+    fn test_mm512_mask_dbsad_epu8() {
         let src = _mm512_set1_epi16(1);
         let a = _mm512_set1_epi8(2);
         let b = _mm512_set1_epi8(4);
@@ -19895,7 +20980,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_maskz_dbsad_epu8() {
+    fn test_mm512_maskz_dbsad_epu8() {
         let a = _mm512_set1_epi8(2);
         let b = _mm512_set1_epi8(4);
         let r = _mm512_maskz_dbsad_epu8::<0>(0, a, b);
@@ -19906,7 +20991,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_dbsad_epu8() {
+    fn test_mm256_dbsad_epu8() {
         let a = _mm256_set1_epi8(2);
         let b = _mm256_set1_epi8(4);
         let r = _mm256_dbsad_epu8::<0>(a, b);
@@ -19915,7 +21000,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_dbsad_epu8() {
+    fn test_mm256_mask_dbsad_epu8() {
         let src = _mm256_set1_epi16(1);
         let a = _mm256_set1_epi8(2);
         let b = _mm256_set1_epi8(4);
@@ -19927,7 +21012,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_maskz_dbsad_epu8() {
+    fn test_mm256_maskz_dbsad_epu8() {
         let a = _mm256_set1_epi8(2);
         let b = _mm256_set1_epi8(4);
         let r = _mm256_maskz_dbsad_epu8::<0>(0, a, b);
@@ -19938,7 +21023,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_dbsad_epu8() {
+    fn test_mm_dbsad_epu8() {
         let a = _mm_set1_epi8(2);
         let b = _mm_set1_epi8(4);
         let r = _mm_dbsad_epu8::<0>(a, b);
@@ -19947,7 +21032,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_dbsad_epu8() {
+    fn test_mm_mask_dbsad_epu8() {
         let src = _mm_set1_epi16(1);
         let a = _mm_set1_epi8(2);
         let b = _mm_set1_epi8(4);
@@ -19959,7 +21044,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_maskz_dbsad_epu8() {
+    fn test_mm_maskz_dbsad_epu8() {
         let a = _mm_set1_epi8(2);
         let b = _mm_set1_epi8(4);
         let r = _mm_maskz_dbsad_epu8::<0>(0, a, b);
@@ -19970,7 +21055,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_movepi16_mask() {
+    const fn test_mm512_movepi16_mask() {
         let a = _mm512_set1_epi16(1 << 15);
         let r = _mm512_movepi16_mask(a);
         let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
@@ -19978,7 +21063,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_movepi16_mask() {
+    const fn test_mm256_movepi16_mask() {
         let a = _mm256_set1_epi16(1 << 15);
         let r = _mm256_movepi16_mask(a);
         let e: __mmask16 = 0b11111111_11111111;
@@ -19986,7 +21071,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_movepi16_mask() {
+    const fn test_mm_movepi16_mask() {
         let a = _mm_set1_epi16(1 << 15);
         let r = _mm_movepi16_mask(a);
         let e: __mmask8 = 0b11111111;
@@ -19994,7 +21079,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_movepi8_mask() {
+    const fn test_mm512_movepi8_mask() {
         let a = _mm512_set1_epi8(1 << 7);
         let r = _mm512_movepi8_mask(a);
         let e: __mmask64 =
@@ -20003,7 +21088,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_movepi8_mask() {
+    const fn test_mm256_movepi8_mask() {
         let a = _mm256_set1_epi8(1 << 7);
         let r = _mm256_movepi8_mask(a);
         let e: __mmask32 = 0b11111111_11111111_11111111_11111111;
@@ -20011,7 +21096,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_movepi8_mask() {
+    const fn test_mm_movepi8_mask() {
         let a = _mm_set1_epi8(1 << 7);
         let r = _mm_movepi8_mask(a);
         let e: __mmask16 = 0b11111111_11111111;
@@ -20019,7 +21104,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_movm_epi16() {
+    const fn test_mm512_movm_epi16() {
         let a: __mmask32 = 0b11111111_11111111_11111111_11111111;
         let r = _mm512_movm_epi16(a);
         let e = _mm512_set1_epi16(
@@ -20044,7 +21129,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_movm_epi16() {
+    const fn test_mm256_movm_epi16() {
         let a: __mmask16 = 0b11111111_11111111;
         let r = _mm256_movm_epi16(a);
         let e = _mm256_set1_epi16(
@@ -20069,7 +21154,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_movm_epi16() {
+    const fn test_mm_movm_epi16() {
         let a: __mmask8 = 0b11111111;
         let r = _mm_movm_epi16(a);
         let e = _mm_set1_epi16(
@@ -20094,7 +21179,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_movm_epi8() {
+    const fn test_mm512_movm_epi8() {
         let a: __mmask64 =
             0b11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111;
         let r = _mm512_movm_epi8(a);
@@ -20104,7 +21189,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_movm_epi8() {
+    const fn test_mm256_movm_epi8() {
         let a: __mmask32 = 0b11111111_11111111_11111111_11111111;
         let r = _mm256_movm_epi8(a);
         let e =
@@ -20113,7 +21198,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_movm_epi8() {
+    const fn test_mm_movm_epi8() {
         let a: __mmask16 = 0b11111111_11111111;
         let r = _mm_movm_epi8(a);
         let e =
@@ -20122,7 +21207,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_cvtmask32_u32() {
+    const fn test_cvtmask32_u32() {
         let a: __mmask32 = 0b11001100_00110011_01100110_10011001;
         let r = _cvtmask32_u32(a);
         let e: u32 = 0b11001100_00110011_01100110_10011001;
@@ -20130,7 +21215,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_cvtu32_mask32() {
+    const fn test_cvtu32_mask32() {
         let a: u32 = 0b11001100_00110011_01100110_10011001;
         let r = _cvtu32_mask32(a);
         let e: __mmask32 = 0b11001100_00110011_01100110_10011001;
@@ -20138,7 +21223,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_kadd_mask32() {
+    const fn test_kadd_mask32() {
         let a: __mmask32 = 11;
         let b: __mmask32 = 22;
         let r = _kadd_mask32(a, b);
@@ -20147,7 +21232,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_kadd_mask64() {
+    const fn test_kadd_mask64() {
         let a: __mmask64 = 11;
         let b: __mmask64 = 22;
         let r = _kadd_mask64(a, b);
@@ -20156,7 +21241,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_kand_mask32() {
+    const fn test_kand_mask32() {
         let a: __mmask32 = 0b11001100_00110011_11001100_00110011;
         let b: __mmask32 = 0b11001100_00110011_11001100_00110011;
         let r = _kand_mask32(a, b);
@@ -20165,7 +21250,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_kand_mask64() {
+    const fn test_kand_mask64() {
         let a: __mmask64 =
             0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
         let b: __mmask64 =
@@ -20177,7 +21262,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_knot_mask32() {
+    const fn test_knot_mask32() {
         let a: __mmask32 = 0b11001100_00110011_11001100_00110011;
         let r = _knot_mask32(a);
         let e: __mmask32 = 0b00110011_11001100_00110011_11001100;
@@ -20185,7 +21270,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_knot_mask64() {
+    const fn test_knot_mask64() {
         let a: __mmask64 =
             0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
         let r = _knot_mask64(a);
@@ -20195,7 +21280,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_kandn_mask32() {
+    const fn test_kandn_mask32() {
         let a: __mmask32 = 0b11001100_00110011_11001100_00110011;
         let b: __mmask32 = 0b11001100_00110011_11001100_00110011;
         let r = _kandn_mask32(a, b);
@@ -20204,7 +21289,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_kandn_mask64() {
+    const fn test_kandn_mask64() {
         let a: __mmask64 =
             0b11001100_00110011_11001100_00110011_11001100_00110011_11001100_00110011;
         let b: __mmask64 =
@@ -20216,7 +21301,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_kor_mask32() {
+    const fn test_kor_mask32() {
         let a: __mmask32 = 0b00110011_11001100_00110011_11001100;
         let b: __mmask32 = 0b11001100_00110011_11001100_00110011;
         let r = _kor_mask32(a, b);
@@ -20225,7 +21310,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_kor_mask64() {
+    const fn test_kor_mask64() {
         let a: __mmask64 =
             0b00110011_11001100_00110011_11001100_00110011_11001100_00110011_11001100;
         let b: __mmask64 =
@@ -20237,7 +21322,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_kxor_mask32() {
+    const fn test_kxor_mask32() {
         let a: __mmask32 = 0b00110011_11001100_00110011_11001100;
         let b: __mmask32 = 0b11001100_00110011_11001100_00110011;
         let r = _kxor_mask32(a, b);
@@ -20246,7 +21331,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_kxor_mask64() {
+    const fn test_kxor_mask64() {
         let a: __mmask64 =
             0b00110011_11001100_00110011_11001100_00110011_11001100_00110011_11001100;
         let b: __mmask64 =
@@ -20258,7 +21343,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_kxnor_mask32() {
+    const fn test_kxnor_mask32() {
         let a: __mmask32 = 0b00110011_11001100_00110011_11001100;
         let b: __mmask32 = 0b11001100_00110011_11001100_00110011;
         let r = _kxnor_mask32(a, b);
@@ -20267,7 +21352,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_kxnor_mask64() {
+    const fn test_kxnor_mask64() {
         let a: __mmask64 =
             0b00110011_11001100_00110011_11001100_00110011_11001100_00110011_11001100;
         let b: __mmask64 =
@@ -20279,27 +21364,27 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_kortest_mask32_u8() {
+    const fn test_kortest_mask32_u8() {
         let a: __mmask32 = 0b0110100101101001_0110100101101001;
         let b: __mmask32 = 0b1011011010110110_1011011010110110;
         let mut all_ones: u8 = 0;
-        let r = _kortest_mask32_u8(a, b, &mut all_ones);
+        let r = unsafe { _kortest_mask32_u8(a, b, &mut all_ones) };
         assert_eq!(r, 0);
         assert_eq!(all_ones, 1);
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_kortest_mask64_u8() {
+    const fn test_kortest_mask64_u8() {
         let a: __mmask64 = 0b0110100101101001_0110100101101001;
         let b: __mmask64 = 0b1011011010110110_1011011010110110;
         let mut all_ones: u8 = 0;
-        let r = _kortest_mask64_u8(a, b, &mut all_ones);
+        let r = unsafe { _kortest_mask64_u8(a, b, &mut all_ones) };
         assert_eq!(r, 0);
         assert_eq!(all_ones, 0);
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_kortestc_mask32_u8() {
+    const fn test_kortestc_mask32_u8() {
         let a: __mmask32 = 0b0110100101101001_0110100101101001;
         let b: __mmask32 = 0b1011011010110110_1011011010110110;
         let r = _kortestc_mask32_u8(a, b);
@@ -20307,7 +21392,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_kortestc_mask64_u8() {
+    const fn test_kortestc_mask64_u8() {
         let a: __mmask64 = 0b0110100101101001_0110100101101001;
         let b: __mmask64 = 0b1011011010110110_1011011010110110;
         let r = _kortestc_mask64_u8(a, b);
@@ -20315,7 +21400,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_kortestz_mask32_u8() {
+    const fn test_kortestz_mask32_u8() {
         let a: __mmask32 = 0b0110100101101001_0110100101101001;
         let b: __mmask32 = 0b1011011010110110_1011011010110110;
         let r = _kortestz_mask32_u8(a, b);
@@ -20323,7 +21408,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_kortestz_mask64_u8() {
+    const fn test_kortestz_mask64_u8() {
         let a: __mmask64 = 0b0110100101101001_0110100101101001;
         let b: __mmask64 = 0b1011011010110110_1011011010110110;
         let r = _kortestz_mask64_u8(a, b);
@@ -20331,7 +21416,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_kshiftli_mask32() {
+    const fn test_kshiftli_mask32() {
         let a: __mmask32 = 0b0110100101101001_0110100101101001;
         let r = _kshiftli_mask32::<3>(a);
         let e: __mmask32 = 0b0100101101001011_0100101101001000;
@@ -20351,7 +21436,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_kshiftli_mask64() {
+    const fn test_kshiftli_mask64() {
         let a: __mmask64 = 0b0110100101101001_0110100101101001;
         let r = _kshiftli_mask64::<3>(a);
         let e: __mmask64 = 0b0110100101101001011_0100101101001000;
@@ -20371,7 +21456,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_kshiftri_mask32() {
+    const fn test_kshiftri_mask32() {
         let a: __mmask32 = 0b1010100101101001_0110100101101001;
         let r = _kshiftri_mask32::<3>(a);
         let e: __mmask32 = 0b0001010100101101_0010110100101101;
@@ -20391,7 +21476,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_kshiftri_mask64() {
+    const fn test_kshiftri_mask64() {
         let a: __mmask64 = 0b1010100101101001011_0100101101001000;
         let r = _kshiftri_mask64::<3>(a);
         let e: __mmask64 = 0b1010100101101001_0110100101101001;
@@ -20415,17 +21500,17 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_ktest_mask32_u8() {
+    const fn test_ktest_mask32_u8() {
         let a: __mmask32 = 0b0110100100111100_0110100100111100;
         let b: __mmask32 = 0b1001011011000011_1001011011000011;
         let mut and_not: u8 = 0;
-        let r = _ktest_mask32_u8(a, b, &mut and_not);
+        let r = unsafe { _ktest_mask32_u8(a, b, &mut and_not) };
         assert_eq!(r, 1);
         assert_eq!(and_not, 0);
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_ktestc_mask32_u8() {
+    const fn test_ktestc_mask32_u8() {
         let a: __mmask32 = 0b0110100100111100_0110100100111100;
         let b: __mmask32 = 0b1001011011000011_1001011011000011;
         let r = _ktestc_mask32_u8(a, b);
@@ -20433,7 +21518,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_ktestz_mask32_u8() {
+    const fn test_ktestz_mask32_u8() {
         let a: __mmask32 = 0b0110100100111100_0110100100111100;
         let b: __mmask32 = 0b1001011011000011_1001011011000011;
         let r = _ktestz_mask32_u8(a, b);
@@ -20441,17 +21526,17 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_ktest_mask64_u8() {
+    const fn test_ktest_mask64_u8() {
         let a: __mmask64 = 0b0110100100111100_0110100100111100;
         let b: __mmask64 = 0b1001011011000011_1001011011000011;
         let mut and_not: u8 = 0;
-        let r = _ktest_mask64_u8(a, b, &mut and_not);
+        let r = unsafe { _ktest_mask64_u8(a, b, &mut and_not) };
         assert_eq!(r, 1);
         assert_eq!(and_not, 0);
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_ktestc_mask64_u8() {
+    const fn test_ktestc_mask64_u8() {
         let a: __mmask64 = 0b0110100100111100_0110100100111100;
         let b: __mmask64 = 0b1001011011000011_1001011011000011;
         let r = _ktestc_mask64_u8(a, b);
@@ -20459,7 +21544,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_ktestz_mask64_u8() {
+    const fn test_ktestz_mask64_u8() {
         let a: __mmask64 = 0b0110100100111100_0110100100111100;
         let b: __mmask64 = 0b1001011011000011_1001011011000011;
         let r = _ktestz_mask64_u8(a, b);
@@ -20467,7 +21552,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_kunpackw() {
+    const fn test_mm512_kunpackw() {
         let a: u32 = 0x00110011;
         let b: u32 = 0x00001011;
         let r = _mm512_kunpackw(a, b);
@@ -20476,7 +21561,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_kunpackd() {
+    const fn test_mm512_kunpackd() {
         let a: u64 = 0x11001100_00110011;
         let b: u64 = 0x00101110_00001011;
         let r = _mm512_kunpackd(a, b);
@@ -20485,7 +21570,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_cvtepi16_epi8() {
+    const fn test_mm512_cvtepi16_epi8() {
         let a = _mm512_set1_epi16(2);
         let r = _mm512_cvtepi16_epi8(a);
         let e = _mm256_set1_epi8(2);
@@ -20493,7 +21578,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mask_cvtepi16_epi8() {
+    const fn test_mm512_mask_cvtepi16_epi8() {
         let src = _mm256_set1_epi8(1);
         let a = _mm512_set1_epi16(2);
         let r = _mm512_mask_cvtepi16_epi8(src, 0, a);
@@ -20504,7 +21589,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_maskz_cvtepi16_epi8() {
+    const fn test_mm512_maskz_cvtepi16_epi8() {
         let a = _mm512_set1_epi16(2);
         let r = _mm512_maskz_cvtepi16_epi8(0, a);
         assert_eq_m256i(r, _mm256_setzero_si256());
@@ -20514,7 +21599,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_cvtepi16_epi8() {
+    const fn test_mm256_cvtepi16_epi8() {
         let a = _mm256_set1_epi16(2);
         let r = _mm256_cvtepi16_epi8(a);
         let e = _mm_set1_epi8(2);
@@ -20522,7 +21607,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_cvtepi16_epi8() {
+    const fn test_mm256_mask_cvtepi16_epi8() {
         let src = _mm_set1_epi8(1);
         let a = _mm256_set1_epi16(2);
         let r = _mm256_mask_cvtepi16_epi8(src, 0, a);
@@ -20533,7 +21618,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_maskz_cvtepi16_epi8() {
+    const fn test_mm256_maskz_cvtepi16_epi8() {
         let a = _mm256_set1_epi16(2);
         let r = _mm256_maskz_cvtepi16_epi8(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -20543,7 +21628,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_cvtepi16_epi8() {
+    const fn test_mm_cvtepi16_epi8() {
         let a = _mm_set1_epi16(2);
         let r = _mm_cvtepi16_epi8(a);
         let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2);
@@ -20551,7 +21636,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_cvtepi16_epi8() {
+    const fn test_mm_mask_cvtepi16_epi8() {
         let src = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
         let a = _mm_set1_epi16(2);
         let r = _mm_mask_cvtepi16_epi8(src, 0, a);
@@ -20562,7 +21647,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_maskz_cvtepi16_epi8() {
+    const fn test_mm_maskz_cvtepi16_epi8() {
         let a = _mm_set1_epi16(2);
         let r = _mm_maskz_cvtepi16_epi8(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -20572,7 +21657,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_cvtsepi16_epi8() {
+    fn test_mm512_cvtsepi16_epi8() {
         let a = _mm512_set1_epi16(i16::MAX);
         let r = _mm512_cvtsepi16_epi8(a);
         let e = _mm256_set1_epi8(i8::MAX);
@@ -20580,7 +21665,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mask_cvtsepi16_epi8() {
+    fn test_mm512_mask_cvtsepi16_epi8() {
         let src = _mm256_set1_epi8(1);
         let a = _mm512_set1_epi16(i16::MAX);
         let r = _mm512_mask_cvtsepi16_epi8(src, 0, a);
@@ -20591,7 +21676,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_cvtsepi16_epi8() {
+    fn test_mm256_cvtsepi16_epi8() {
         let a = _mm256_set1_epi16(i16::MAX);
         let r = _mm256_cvtsepi16_epi8(a);
         let e = _mm_set1_epi8(i8::MAX);
@@ -20599,7 +21684,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_cvtsepi16_epi8() {
+    fn test_mm256_mask_cvtsepi16_epi8() {
         let src = _mm_set1_epi8(1);
         let a = _mm256_set1_epi16(i16::MAX);
         let r = _mm256_mask_cvtsepi16_epi8(src, 0, a);
@@ -20610,7 +21695,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_maskz_cvtsepi16_epi8() {
+    fn test_mm256_maskz_cvtsepi16_epi8() {
         let a = _mm256_set1_epi16(i16::MAX);
         let r = _mm256_maskz_cvtsepi16_epi8(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -20620,7 +21705,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_cvtsepi16_epi8() {
+    fn test_mm_cvtsepi16_epi8() {
         let a = _mm_set1_epi16(i16::MAX);
         let r = _mm_cvtsepi16_epi8(a);
         #[rustfmt::skip]
@@ -20629,7 +21714,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_cvtsepi16_epi8() {
+    fn test_mm_mask_cvtsepi16_epi8() {
         let src = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
         let a = _mm_set1_epi16(i16::MAX);
         let r = _mm_mask_cvtsepi16_epi8(src, 0, a);
@@ -20641,7 +21726,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_maskz_cvtsepi16_epi8() {
+    fn test_mm_maskz_cvtsepi16_epi8() {
         let a = _mm_set1_epi16(i16::MAX);
         let r = _mm_maskz_cvtsepi16_epi8(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -20652,7 +21737,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_maskz_cvtsepi16_epi8() {
+    fn test_mm512_maskz_cvtsepi16_epi8() {
         let a = _mm512_set1_epi16(i16::MAX);
         let r = _mm512_maskz_cvtsepi16_epi8(0, a);
         assert_eq_m256i(r, _mm256_setzero_si256());
@@ -20662,7 +21747,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_cvtusepi16_epi8() {
+    fn test_mm512_cvtusepi16_epi8() {
         let a = _mm512_set1_epi16(i16::MIN);
         let r = _mm512_cvtusepi16_epi8(a);
         let e = _mm256_set1_epi8(-1);
@@ -20670,7 +21755,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mask_cvtusepi16_epi8() {
+    fn test_mm512_mask_cvtusepi16_epi8() {
         let src = _mm256_set1_epi8(1);
         let a = _mm512_set1_epi16(i16::MIN);
         let r = _mm512_mask_cvtusepi16_epi8(src, 0, a);
@@ -20681,7 +21766,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_maskz_cvtusepi16_epi8() {
+    fn test_mm512_maskz_cvtusepi16_epi8() {
         let a = _mm512_set1_epi16(i16::MIN);
         let r = _mm512_maskz_cvtusepi16_epi8(0, a);
         assert_eq_m256i(r, _mm256_setzero_si256());
@@ -20691,7 +21776,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_cvtusepi16_epi8() {
+    fn test_mm256_cvtusepi16_epi8() {
         let a = _mm256_set1_epi16(i16::MIN);
         let r = _mm256_cvtusepi16_epi8(a);
         let e = _mm_set1_epi8(-1);
@@ -20699,7 +21784,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_cvtusepi16_epi8() {
+    fn test_mm256_mask_cvtusepi16_epi8() {
         let src = _mm_set1_epi8(1);
         let a = _mm256_set1_epi16(i16::MIN);
         let r = _mm256_mask_cvtusepi16_epi8(src, 0, a);
@@ -20710,7 +21795,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_maskz_cvtusepi16_epi8() {
+    fn test_mm256_maskz_cvtusepi16_epi8() {
         let a = _mm256_set1_epi16(i16::MIN);
         let r = _mm256_maskz_cvtusepi16_epi8(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -20720,7 +21805,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_cvtusepi16_epi8() {
+    fn test_mm_cvtusepi16_epi8() {
         let a = _mm_set1_epi16(i16::MIN);
         let r = _mm_cvtusepi16_epi8(a);
         let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
@@ -20728,7 +21813,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_cvtusepi16_epi8() {
+    fn test_mm_mask_cvtusepi16_epi8() {
         let src = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1);
         let a = _mm_set1_epi16(i16::MIN);
         let r = _mm_mask_cvtusepi16_epi8(src, 0, a);
@@ -20739,7 +21824,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_maskz_cvtusepi16_epi8() {
+    fn test_mm_maskz_cvtusepi16_epi8() {
         let a = _mm_set1_epi16(i16::MIN);
         let r = _mm_maskz_cvtusepi16_epi8(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -20749,7 +21834,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_cvtepi8_epi16() {
+    const fn test_mm512_cvtepi8_epi16() {
         let a = _mm256_set1_epi8(2);
         let r = _mm512_cvtepi8_epi16(a);
         let e = _mm512_set1_epi16(2);
@@ -20757,7 +21842,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mask_cvtepi8_epi16() {
+    const fn test_mm512_mask_cvtepi8_epi16() {
         let src = _mm512_set1_epi16(1);
         let a = _mm256_set1_epi8(2);
         let r = _mm512_mask_cvtepi8_epi16(src, 0, a);
@@ -20768,7 +21853,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_maskz_cvtepi8_epi16() {
+    const fn test_mm512_maskz_cvtepi8_epi16() {
         let a = _mm256_set1_epi8(2);
         let r = _mm512_maskz_cvtepi8_epi16(0, a);
         assert_eq_m512i(r, _mm512_setzero_si512());
@@ -20778,7 +21863,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_cvtepi8_epi16() {
+    const fn test_mm256_mask_cvtepi8_epi16() {
         let src = _mm256_set1_epi16(1);
         let a = _mm_set1_epi8(2);
         let r = _mm256_mask_cvtepi8_epi16(src, 0, a);
@@ -20789,7 +21874,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_maskz_cvtepi8_epi16() {
+    const fn test_mm256_maskz_cvtepi8_epi16() {
         let a = _mm_set1_epi8(2);
         let r = _mm256_maskz_cvtepi8_epi16(0, a);
         assert_eq_m256i(r, _mm256_setzero_si256());
@@ -20799,7 +21884,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_cvtepi8_epi16() {
+    const fn test_mm_mask_cvtepi8_epi16() {
         let src = _mm_set1_epi16(1);
         let a = _mm_set1_epi8(2);
         let r = _mm_mask_cvtepi8_epi16(src, 0, a);
@@ -20810,7 +21895,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_maskz_cvtepi8_epi16() {
+    const fn test_mm_maskz_cvtepi8_epi16() {
         let a = _mm_set1_epi8(2);
         let r = _mm_maskz_cvtepi8_epi16(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -20820,7 +21905,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_cvtepu8_epi16() {
+    const fn test_mm512_cvtepu8_epi16() {
         let a = _mm256_set1_epi8(2);
         let r = _mm512_cvtepu8_epi16(a);
         let e = _mm512_set1_epi16(2);
@@ -20828,7 +21913,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mask_cvtepu8_epi16() {
+    const fn test_mm512_mask_cvtepu8_epi16() {
         let src = _mm512_set1_epi16(1);
         let a = _mm256_set1_epi8(2);
         let r = _mm512_mask_cvtepu8_epi16(src, 0, a);
@@ -20839,7 +21924,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_maskz_cvtepu8_epi16() {
+    const fn test_mm512_maskz_cvtepu8_epi16() {
         let a = _mm256_set1_epi8(2);
         let r = _mm512_maskz_cvtepu8_epi16(0, a);
         assert_eq_m512i(r, _mm512_setzero_si512());
@@ -20849,7 +21934,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_cvtepu8_epi16() {
+    const fn test_mm256_mask_cvtepu8_epi16() {
         let src = _mm256_set1_epi16(1);
         let a = _mm_set1_epi8(2);
         let r = _mm256_mask_cvtepu8_epi16(src, 0, a);
@@ -20860,7 +21945,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_maskz_cvtepu8_epi16() {
+    const fn test_mm256_maskz_cvtepu8_epi16() {
         let a = _mm_set1_epi8(2);
         let r = _mm256_maskz_cvtepu8_epi16(0, a);
         assert_eq_m256i(r, _mm256_setzero_si256());
@@ -20870,7 +21955,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_cvtepu8_epi16() {
+    const fn test_mm_mask_cvtepu8_epi16() {
         let src = _mm_set1_epi16(1);
         let a = _mm_set1_epi8(2);
         let r = _mm_mask_cvtepu8_epi16(src, 0, a);
@@ -20881,7 +21966,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_maskz_cvtepu8_epi16() {
+    const fn test_mm_maskz_cvtepu8_epi16() {
         let a = _mm_set1_epi8(2);
         let r = _mm_maskz_cvtepu8_epi16(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -20891,7 +21976,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_bslli_epi128() {
+    const fn test_mm512_bslli_epi128() {
         #[rustfmt::skip]
         let a = _mm512_set_epi8(
             1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
@@ -20911,7 +21996,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_bsrli_epi128() {
+    const fn test_mm512_bsrli_epi128() {
         #[rustfmt::skip]
         let a = _mm512_set_epi8(
             1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
@@ -20931,7 +22016,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_alignr_epi8() {
+    const fn test_mm512_alignr_epi8() {
         #[rustfmt::skip]
         let a = _mm512_set_epi8(
             1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
@@ -20952,7 +22037,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mask_alignr_epi8() {
+    const fn test_mm512_mask_alignr_epi8() {
         #[rustfmt::skip]
         let a = _mm512_set_epi8(
             1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
@@ -20980,7 +22065,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_maskz_alignr_epi8() {
+    const fn test_mm512_maskz_alignr_epi8() {
         #[rustfmt::skip]
         let a = _mm512_set_epi8(
             1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
@@ -21007,7 +22092,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_alignr_epi8() {
+    const fn test_mm256_mask_alignr_epi8() {
         #[rustfmt::skip]
         let a = _mm256_set_epi8(
             1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
@@ -21026,7 +22111,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_maskz_alignr_epi8() {
+    const fn test_mm256_maskz_alignr_epi8() {
         #[rustfmt::skip]
         let a = _mm256_set_epi8(
             1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
@@ -21045,7 +22130,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_alignr_epi8() {
+    const fn test_mm_mask_alignr_epi8() {
         let a = _mm_set_epi8(1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0);
         let b = _mm_set1_epi8(1);
         let r = _mm_mask_alignr_epi8::<14>(a, 0, a, b);
@@ -21056,7 +22141,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_maskz_alignr_epi8() {
+    const fn test_mm_maskz_alignr_epi8() {
         let a = _mm_set_epi8(1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0);
         let b = _mm_set1_epi8(1);
         let r = _mm_maskz_alignr_epi8::<14>(0, a, b);
@@ -21067,32 +22152,38 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mask_cvtsepi16_storeu_epi8() {
+    fn test_mm512_mask_cvtsepi16_storeu_epi8() {
         let a = _mm512_set1_epi16(i16::MAX);
         let mut r = _mm256_undefined_si256();
-        _mm512_mask_cvtsepi16_storeu_epi8(
-            &mut r as *mut _ as *mut i8,
-            0b11111111_11111111_11111111_11111111,
-            a,
-        );
+        unsafe {
+            _mm512_mask_cvtsepi16_storeu_epi8(
+                &mut r as *mut _ as *mut i8,
+                0b11111111_11111111_11111111_11111111,
+                a,
+            );
+        }
         let e = _mm256_set1_epi8(i8::MAX);
         assert_eq_m256i(r, e);
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_cvtsepi16_storeu_epi8() {
+    fn test_mm256_mask_cvtsepi16_storeu_epi8() {
         let a = _mm256_set1_epi16(i16::MAX);
         let mut r = _mm_undefined_si128();
-        _mm256_mask_cvtsepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
+        unsafe {
+            _mm256_mask_cvtsepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
+        }
         let e = _mm_set1_epi8(i8::MAX);
         assert_eq_m128i(r, e);
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_cvtsepi16_storeu_epi8() {
+    fn test_mm_mask_cvtsepi16_storeu_epi8() {
         let a = _mm_set1_epi16(i16::MAX);
         let mut r = _mm_set1_epi8(0);
-        _mm_mask_cvtsepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
+        unsafe {
+            _mm_mask_cvtsepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
+        }
         #[rustfmt::skip]
         let e = _mm_set_epi8(
             0, 0, 0, 0, 0, 0, 0, 0,
@@ -21102,68 +22193,80 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mask_cvtepi16_storeu_epi8() {
+    fn test_mm512_mask_cvtepi16_storeu_epi8() {
         let a = _mm512_set1_epi16(8);
         let mut r = _mm256_undefined_si256();
-        _mm512_mask_cvtepi16_storeu_epi8(
-            &mut r as *mut _ as *mut i8,
-            0b11111111_11111111_11111111_11111111,
-            a,
-        );
+        unsafe {
+            _mm512_mask_cvtepi16_storeu_epi8(
+                &mut r as *mut _ as *mut i8,
+                0b11111111_11111111_11111111_11111111,
+                a,
+            );
+        }
         let e = _mm256_set1_epi8(8);
         assert_eq_m256i(r, e);
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_cvtepi16_storeu_epi8() {
+    fn test_mm256_mask_cvtepi16_storeu_epi8() {
         let a = _mm256_set1_epi16(8);
         let mut r = _mm_undefined_si128();
-        _mm256_mask_cvtepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
+        unsafe {
+            _mm256_mask_cvtepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
+        }
         let e = _mm_set1_epi8(8);
         assert_eq_m128i(r, e);
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_cvtepi16_storeu_epi8() {
+    fn test_mm_mask_cvtepi16_storeu_epi8() {
         let a = _mm_set1_epi16(8);
         let mut r = _mm_set1_epi8(0);
-        _mm_mask_cvtepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
+        unsafe {
+            _mm_mask_cvtepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
+        }
         let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 8, 8);
         assert_eq_m128i(r, e);
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_mm512_mask_cvtusepi16_storeu_epi8() {
+    fn test_mm512_mask_cvtusepi16_storeu_epi8() {
         let a = _mm512_set1_epi16(i16::MAX);
         let mut r = _mm256_undefined_si256();
-        _mm512_mask_cvtusepi16_storeu_epi8(
-            &mut r as *mut _ as *mut i8,
-            0b11111111_11111111_11111111_11111111,
-            a,
-        );
+        unsafe {
+            _mm512_mask_cvtusepi16_storeu_epi8(
+                &mut r as *mut _ as *mut i8,
+                0b11111111_11111111_11111111_11111111,
+                a,
+            );
+        }
         let e = _mm256_set1_epi8(u8::MAX as i8);
         assert_eq_m256i(r, e);
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_cvtusepi16_storeu_epi8() {
+    fn test_mm256_mask_cvtusepi16_storeu_epi8() {
         let a = _mm256_set1_epi16(i16::MAX);
         let mut r = _mm_undefined_si128();
-        _mm256_mask_cvtusepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
+        unsafe {
+            _mm256_mask_cvtusepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
+        }
         let e = _mm_set1_epi8(u8::MAX as i8);
         assert_eq_m128i(r, e);
     }
 
     #[simd_test(enable = "avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_cvtusepi16_storeu_epi8() {
+    fn test_mm_mask_cvtusepi16_storeu_epi8() {
         let a = _mm_set1_epi16(i16::MAX);
         let mut r = _mm_set1_epi8(0);
-        _mm_mask_cvtusepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
+        unsafe {
+            _mm_mask_cvtusepi16_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
+        }
         #[rustfmt::skip]
         let e = _mm_set_epi8(
             0, 0, 0, 0,
             0, 0, 0, 0,
-            u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, 
+            u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8,
             u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8,
         );
         assert_eq_m128i(r, e);
diff --git a/crates/core_arch/src/x86/avx512cd.rs b/crates/core_arch/src/x86/avx512cd.rs
index 78735fcc90..4082433e70 100644
--- a/crates/core_arch/src/x86/avx512cd.rs
+++ b/crates/core_arch/src/x86/avx512cd.rs
@@ -11,7 +11,8 @@ use stdarch_test::assert_instr;
 #[target_feature(enable = "avx512cd")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpbroadcast))] // should be vpbroadcastmw2d
-pub fn _mm512_broadcastmw_epi32(k: __mmask16) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_broadcastmw_epi32(k: __mmask16) -> __m512i {
     _mm512_set1_epi32(k as i32)
 }
 
@@ -22,7 +23,8 @@ pub fn _mm512_broadcastmw_epi32(k: __mmask16) -> __m512i {
 #[target_feature(enable = "avx512cd,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpbroadcast))] // should be vpbroadcastmw2d
-pub fn _mm256_broadcastmw_epi32(k: __mmask16) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_broadcastmw_epi32(k: __mmask16) -> __m256i {
     _mm256_set1_epi32(k as i32)
 }
 
@@ -33,7 +35,8 @@ pub fn _mm256_broadcastmw_epi32(k: __mmask16) -> __m256i {
 #[target_feature(enable = "avx512cd,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpbroadcast))] // should be vpbroadcastmw2d
-pub fn _mm_broadcastmw_epi32(k: __mmask16) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_broadcastmw_epi32(k: __mmask16) -> __m128i {
     _mm_set1_epi32(k as i32)
 }
 
@@ -44,7 +47,8 @@ pub fn _mm_broadcastmw_epi32(k: __mmask16) -> __m128i {
 #[target_feature(enable = "avx512cd")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpbroadcast))] // should be vpbroadcastmb2q
-pub fn _mm512_broadcastmb_epi64(k: __mmask8) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_broadcastmb_epi64(k: __mmask8) -> __m512i {
     _mm512_set1_epi64(k as i64)
 }
 
@@ -55,7 +59,8 @@ pub fn _mm512_broadcastmb_epi64(k: __mmask8) -> __m512i {
 #[target_feature(enable = "avx512cd,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpbroadcast))] // should be vpbroadcastmb2q
-pub fn _mm256_broadcastmb_epi64(k: __mmask8) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_broadcastmb_epi64(k: __mmask8) -> __m256i {
     _mm256_set1_epi64x(k as i64)
 }
 
@@ -66,7 +71,8 @@ pub fn _mm256_broadcastmb_epi64(k: __mmask8) -> __m256i {
 #[target_feature(enable = "avx512cd,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpbroadcast))] // should be vpbroadcastmb2q
-pub fn _mm_broadcastmb_epi64(k: __mmask8) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_broadcastmb_epi64(k: __mmask8) -> __m128i {
     _mm_set1_epi64x(k as i64)
 }
 
@@ -311,7 +317,8 @@ pub fn _mm_maskz_conflict_epi64(k: __mmask8, a: __m128i) -> __m128i {
 #[target_feature(enable = "avx512cd")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vplzcntd))]
-pub fn _mm512_lzcnt_epi32(a: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_lzcnt_epi32(a: __m512i) -> __m512i {
     unsafe { transmute(simd_ctlz(a.as_i32x16())) }
 }
 
@@ -322,7 +329,8 @@ pub fn _mm512_lzcnt_epi32(a: __m512i) -> __m512i {
 #[target_feature(enable = "avx512cd")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vplzcntd))]
-pub fn _mm512_mask_lzcnt_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_lzcnt_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
     unsafe {
         let zerocount = _mm512_lzcnt_epi32(a).as_i32x16();
         transmute(simd_select_bitmask(k, zerocount, src.as_i32x16()))
@@ -336,7 +344,8 @@ pub fn _mm512_mask_lzcnt_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512
 #[target_feature(enable = "avx512cd")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vplzcntd))]
-pub fn _mm512_maskz_lzcnt_epi32(k: __mmask16, a: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_lzcnt_epi32(k: __mmask16, a: __m512i) -> __m512i {
     unsafe {
         let zerocount = _mm512_lzcnt_epi32(a).as_i32x16();
         transmute(simd_select_bitmask(k, zerocount, i32x16::ZERO))
@@ -350,7 +359,8 @@ pub fn _mm512_maskz_lzcnt_epi32(k: __mmask16, a: __m512i) -> __m512i {
 #[target_feature(enable = "avx512cd,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vplzcntd))]
-pub fn _mm256_lzcnt_epi32(a: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_lzcnt_epi32(a: __m256i) -> __m256i {
     unsafe { transmute(simd_ctlz(a.as_i32x8())) }
 }
 
@@ -361,7 +371,8 @@ pub fn _mm256_lzcnt_epi32(a: __m256i) -> __m256i {
 #[target_feature(enable = "avx512cd,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vplzcntd))]
-pub fn _mm256_mask_lzcnt_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_lzcnt_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
     unsafe {
         let zerocount = _mm256_lzcnt_epi32(a).as_i32x8();
         transmute(simd_select_bitmask(k, zerocount, src.as_i32x8()))
@@ -375,7 +386,8 @@ pub fn _mm256_mask_lzcnt_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i
 #[target_feature(enable = "avx512cd,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vplzcntd))]
-pub fn _mm256_maskz_lzcnt_epi32(k: __mmask8, a: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_lzcnt_epi32(k: __mmask8, a: __m256i) -> __m256i {
     unsafe {
         let zerocount = _mm256_lzcnt_epi32(a).as_i32x8();
         transmute(simd_select_bitmask(k, zerocount, i32x8::ZERO))
@@ -389,7 +401,8 @@ pub fn _mm256_maskz_lzcnt_epi32(k: __mmask8, a: __m256i) -> __m256i {
 #[target_feature(enable = "avx512cd,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vplzcntd))]
-pub fn _mm_lzcnt_epi32(a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_lzcnt_epi32(a: __m128i) -> __m128i {
     unsafe { transmute(simd_ctlz(a.as_i32x4())) }
 }
 
@@ -400,7 +413,8 @@ pub fn _mm_lzcnt_epi32(a: __m128i) -> __m128i {
 #[target_feature(enable = "avx512cd,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vplzcntd))]
-pub fn _mm_mask_lzcnt_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_lzcnt_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
     unsafe {
         let zerocount = _mm_lzcnt_epi32(a).as_i32x4();
         transmute(simd_select_bitmask(k, zerocount, src.as_i32x4()))
@@ -414,7 +428,8 @@ pub fn _mm_mask_lzcnt_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
 #[target_feature(enable = "avx512cd,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vplzcntd))]
-pub fn _mm_maskz_lzcnt_epi32(k: __mmask8, a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_lzcnt_epi32(k: __mmask8, a: __m128i) -> __m128i {
     unsafe {
         let zerocount = _mm_lzcnt_epi32(a).as_i32x4();
         transmute(simd_select_bitmask(k, zerocount, i32x4::ZERO))
@@ -428,7 +443,8 @@ pub fn _mm_maskz_lzcnt_epi32(k: __mmask8, a: __m128i) -> __m128i {
 #[target_feature(enable = "avx512cd")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vplzcntq))]
-pub fn _mm512_lzcnt_epi64(a: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_lzcnt_epi64(a: __m512i) -> __m512i {
     unsafe { transmute(simd_ctlz(a.as_i64x8())) }
 }
 
@@ -439,7 +455,8 @@ pub fn _mm512_lzcnt_epi64(a: __m512i) -> __m512i {
 #[target_feature(enable = "avx512cd")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vplzcntq))]
-pub fn _mm512_mask_lzcnt_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_lzcnt_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
     unsafe {
         let zerocount = _mm512_lzcnt_epi64(a).as_i64x8();
         transmute(simd_select_bitmask(k, zerocount, src.as_i64x8()))
@@ -453,7 +470,8 @@ pub fn _mm512_mask_lzcnt_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i
 #[target_feature(enable = "avx512cd")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vplzcntq))]
-pub fn _mm512_maskz_lzcnt_epi64(k: __mmask8, a: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_lzcnt_epi64(k: __mmask8, a: __m512i) -> __m512i {
     unsafe {
         let zerocount = _mm512_lzcnt_epi64(a).as_i64x8();
         transmute(simd_select_bitmask(k, zerocount, i64x8::ZERO))
@@ -467,7 +485,8 @@ pub fn _mm512_maskz_lzcnt_epi64(k: __mmask8, a: __m512i) -> __m512i {
 #[target_feature(enable = "avx512cd,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vplzcntq))]
-pub fn _mm256_lzcnt_epi64(a: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_lzcnt_epi64(a: __m256i) -> __m256i {
     unsafe { transmute(simd_ctlz(a.as_i64x4())) }
 }
 
@@ -478,7 +497,8 @@ pub fn _mm256_lzcnt_epi64(a: __m256i) -> __m256i {
 #[target_feature(enable = "avx512cd,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vplzcntq))]
-pub fn _mm256_mask_lzcnt_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_lzcnt_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
     unsafe {
         let zerocount = _mm256_lzcnt_epi64(a).as_i64x4();
         transmute(simd_select_bitmask(k, zerocount, src.as_i64x4()))
@@ -492,7 +512,8 @@ pub fn _mm256_mask_lzcnt_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i
 #[target_feature(enable = "avx512cd,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vplzcntq))]
-pub fn _mm256_maskz_lzcnt_epi64(k: __mmask8, a: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_lzcnt_epi64(k: __mmask8, a: __m256i) -> __m256i {
     unsafe {
         let zerocount = _mm256_lzcnt_epi64(a).as_i64x4();
         transmute(simd_select_bitmask(k, zerocount, i64x4::ZERO))
@@ -506,7 +527,8 @@ pub fn _mm256_maskz_lzcnt_epi64(k: __mmask8, a: __m256i) -> __m256i {
 #[target_feature(enable = "avx512cd,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vplzcntq))]
-pub fn _mm_lzcnt_epi64(a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_lzcnt_epi64(a: __m128i) -> __m128i {
     unsafe { transmute(simd_ctlz(a.as_i64x2())) }
 }
 
@@ -517,7 +539,8 @@ pub fn _mm_lzcnt_epi64(a: __m128i) -> __m128i {
 #[target_feature(enable = "avx512cd,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vplzcntq))]
-pub fn _mm_mask_lzcnt_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_lzcnt_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
     unsafe {
         let zerocount = _mm_lzcnt_epi64(a).as_i64x2();
         transmute(simd_select_bitmask(k, zerocount, src.as_i64x2()))
@@ -531,7 +554,8 @@ pub fn _mm_mask_lzcnt_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
 #[target_feature(enable = "avx512cd,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vplzcntq))]
-pub fn _mm_maskz_lzcnt_epi64(k: __mmask8, a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_lzcnt_epi64(k: __mmask8, a: __m128i) -> __m128i {
     unsafe {
         let zerocount = _mm_lzcnt_epi64(a).as_i64x2();
         transmute(simd_select_bitmask(k, zerocount, i64x2::ZERO))
@@ -557,12 +581,13 @@ unsafe extern "C" {
 
 #[cfg(test)]
 mod tests {
+    use crate::core_arch::assert_eq_const as assert_eq;
 
     use crate::core_arch::x86::*;
     use stdarch_test::simd_test;
 
     #[simd_test(enable = "avx512cd")]
-    unsafe fn test_mm512_broadcastmw_epi32() {
+    const fn test_mm512_broadcastmw_epi32() {
         let a: __mmask16 = 2;
         let r = _mm512_broadcastmw_epi32(a);
         let e = _mm512_set1_epi32(2);
@@ -570,7 +595,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512cd,avx512vl")]
-    unsafe fn test_mm256_broadcastmw_epi32() {
+    const fn test_mm256_broadcastmw_epi32() {
         let a: __mmask16 = 2;
         let r = _mm256_broadcastmw_epi32(a);
         let e = _mm256_set1_epi32(2);
@@ -578,7 +603,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512cd,avx512vl")]
-    unsafe fn test_mm_broadcastmw_epi32() {
+    const fn test_mm_broadcastmw_epi32() {
         let a: __mmask16 = 2;
         let r = _mm_broadcastmw_epi32(a);
         let e = _mm_set1_epi32(2);
@@ -586,7 +611,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512cd")]
-    unsafe fn test_mm512_broadcastmb_epi64() {
+    const fn test_mm512_broadcastmb_epi64() {
         let a: __mmask8 = 2;
         let r = _mm512_broadcastmb_epi64(a);
         let e = _mm512_set1_epi64(2);
@@ -594,7 +619,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512cd,avx512vl")]
-    unsafe fn test_mm256_broadcastmb_epi64() {
+    const fn test_mm256_broadcastmb_epi64() {
         let a: __mmask8 = 2;
         let r = _mm256_broadcastmb_epi64(a);
         let e = _mm256_set1_epi64x(2);
@@ -602,7 +627,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512cd,avx512vl")]
-    unsafe fn test_mm_broadcastmb_epi64() {
+    const fn test_mm_broadcastmb_epi64() {
         let a: __mmask8 = 2;
         let r = _mm_broadcastmb_epi64(a);
         let e = _mm_set1_epi64x(2);
@@ -610,7 +635,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512cd")]
-    unsafe fn test_mm512_conflict_epi32() {
+    fn test_mm512_conflict_epi32() {
         let a = _mm512_set1_epi32(1);
         let r = _mm512_conflict_epi32(a);
         let e = _mm512_set_epi32(
@@ -695,7 +720,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512cd")]
-    unsafe fn test_mm512_mask_conflict_epi32() {
+    fn test_mm512_mask_conflict_epi32() {
         let a = _mm512_set1_epi32(1);
         let r = _mm512_mask_conflict_epi32(a, 0, a);
         assert_eq_m512i(r, a);
@@ -782,7 +807,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512cd")]
-    unsafe fn test_mm512_maskz_conflict_epi32() {
+    fn test_mm512_maskz_conflict_epi32() {
         let a = _mm512_set1_epi32(1);
         let r = _mm512_maskz_conflict_epi32(0, a);
         assert_eq_m512i(r, _mm512_setzero_si512());
@@ -869,7 +894,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512cd,avx512vl")]
-    unsafe fn test_mm256_conflict_epi32() {
+    fn test_mm256_conflict_epi32() {
         let a = _mm256_set1_epi32(1);
         let r = _mm256_conflict_epi32(a);
         let e = _mm256_set_epi32(
@@ -886,7 +911,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512cd,avx512vl")]
-    unsafe fn test_mm256_mask_conflict_epi32() {
+    fn test_mm256_mask_conflict_epi32() {
         let a = _mm256_set1_epi32(1);
         let r = _mm256_mask_conflict_epi32(a, 0, a);
         assert_eq_m256i(r, a);
@@ -905,7 +930,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512cd,avx512vl")]
-    unsafe fn test_mm256_maskz_conflict_epi32() {
+    fn test_mm256_maskz_conflict_epi32() {
         let a = _mm256_set1_epi32(1);
         let r = _mm256_maskz_conflict_epi32(0, a);
         assert_eq_m256i(r, _mm256_setzero_si256());
@@ -924,7 +949,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512cd,avx512vl")]
-    unsafe fn test_mm_conflict_epi32() {
+    fn test_mm_conflict_epi32() {
         let a = _mm_set1_epi32(1);
         let r = _mm_conflict_epi32(a);
         let e = _mm_set_epi32(1 << 2 | 1 << 1 | 1 << 0, 1 << 1 | 1 << 0, 1 << 0, 0);
@@ -932,7 +957,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512cd,avx512vl")]
-    unsafe fn test_mm_mask_conflict_epi32() {
+    fn test_mm_mask_conflict_epi32() {
         let a = _mm_set1_epi32(1);
         let r = _mm_mask_conflict_epi32(a, 0, a);
         assert_eq_m128i(r, a);
@@ -942,7 +967,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512cd,avx512vl")]
-    unsafe fn test_mm_maskz_conflict_epi32() {
+    fn test_mm_maskz_conflict_epi32() {
         let a = _mm_set1_epi32(1);
         let r = _mm_maskz_conflict_epi32(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -952,7 +977,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512cd")]
-    unsafe fn test_mm512_conflict_epi64() {
+    fn test_mm512_conflict_epi64() {
         let a = _mm512_set1_epi64(1);
         let r = _mm512_conflict_epi64(a);
         let e = _mm512_set_epi64(
@@ -969,7 +994,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512cd")]
-    unsafe fn test_mm512_mask_conflict_epi64() {
+    fn test_mm512_mask_conflict_epi64() {
         let a = _mm512_set1_epi64(1);
         let r = _mm512_mask_conflict_epi64(a, 0, a);
         assert_eq_m512i(r, a);
@@ -988,7 +1013,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512cd")]
-    unsafe fn test_mm512_maskz_conflict_epi64() {
+    fn test_mm512_maskz_conflict_epi64() {
         let a = _mm512_set1_epi64(1);
         let r = _mm512_maskz_conflict_epi64(0, a);
         assert_eq_m512i(r, _mm512_setzero_si512());
@@ -1007,7 +1032,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512cd,avx512vl")]
-    unsafe fn test_mm256_conflict_epi64() {
+    fn test_mm256_conflict_epi64() {
         let a = _mm256_set1_epi64x(1);
         let r = _mm256_conflict_epi64(a);
         let e = _mm256_set_epi64x(1 << 2 | 1 << 1 | 1 << 0, 1 << 1 | 1 << 0, 1 << 0, 0);
@@ -1015,7 +1040,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512cd,avx512vl")]
-    unsafe fn test_mm256_mask_conflict_epi64() {
+    fn test_mm256_mask_conflict_epi64() {
         let a = _mm256_set1_epi64x(1);
         let r = _mm256_mask_conflict_epi64(a, 0, a);
         assert_eq_m256i(r, a);
@@ -1025,7 +1050,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512cd,avx512vl")]
-    unsafe fn test_mm256_maskz_conflict_epi64() {
+    fn test_mm256_maskz_conflict_epi64() {
         let a = _mm256_set1_epi64x(1);
         let r = _mm256_maskz_conflict_epi64(0, a);
         assert_eq_m256i(r, _mm256_setzero_si256());
@@ -1035,7 +1060,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512cd,avx512vl")]
-    unsafe fn test_mm_conflict_epi64() {
+    fn test_mm_conflict_epi64() {
         let a = _mm_set1_epi64x(1);
         let r = _mm_conflict_epi64(a);
         let e = _mm_set_epi64x(1 << 0, 0);
@@ -1043,7 +1068,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512cd,avx512vl")]
-    unsafe fn test_mm_mask_conflict_epi64() {
+    fn test_mm_mask_conflict_epi64() {
         let a = _mm_set1_epi64x(1);
         let r = _mm_mask_conflict_epi64(a, 0, a);
         assert_eq_m128i(r, a);
@@ -1053,7 +1078,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512cd,avx512vl")]
-    unsafe fn test_mm_maskz_conflict_epi64() {
+    fn test_mm_maskz_conflict_epi64() {
         let a = _mm_set1_epi64x(1);
         let r = _mm_maskz_conflict_epi64(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -1063,7 +1088,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512cd")]
-    unsafe fn test_mm512_lzcnt_epi32() {
+    const fn test_mm512_lzcnt_epi32() {
         let a = _mm512_set1_epi32(1);
         let r = _mm512_lzcnt_epi32(a);
         let e = _mm512_set1_epi32(31);
@@ -1071,7 +1096,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512cd")]
-    unsafe fn test_mm512_mask_lzcnt_epi32() {
+    const fn test_mm512_mask_lzcnt_epi32() {
         let a = _mm512_set1_epi32(1);
         let r = _mm512_mask_lzcnt_epi32(a, 0, a);
         assert_eq_m512i(r, a);
@@ -1081,7 +1106,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512cd")]
-    unsafe fn test_mm512_maskz_lzcnt_epi32() {
+    const fn test_mm512_maskz_lzcnt_epi32() {
         let a = _mm512_set1_epi32(2);
         let r = _mm512_maskz_lzcnt_epi32(0, a);
         assert_eq_m512i(r, _mm512_setzero_si512());
@@ -1091,7 +1116,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512cd,avx512vl")]
-    unsafe fn test_mm256_lzcnt_epi32() {
+    const fn test_mm256_lzcnt_epi32() {
         let a = _mm256_set1_epi32(1);
         let r = _mm256_lzcnt_epi32(a);
         let e = _mm256_set1_epi32(31);
@@ -1099,7 +1124,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512cd,avx512vl")]
-    unsafe fn test_mm256_mask_lzcnt_epi32() {
+    const fn test_mm256_mask_lzcnt_epi32() {
         let a = _mm256_set1_epi32(1);
         let r = _mm256_mask_lzcnt_epi32(a, 0, a);
         assert_eq_m256i(r, a);
@@ -1109,7 +1134,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512cd,avx512vl")]
-    unsafe fn test_mm256_maskz_lzcnt_epi32() {
+    const fn test_mm256_maskz_lzcnt_epi32() {
         let a = _mm256_set1_epi32(1);
         let r = _mm256_maskz_lzcnt_epi32(0, a);
         assert_eq_m256i(r, _mm256_setzero_si256());
@@ -1119,7 +1144,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512cd,avx512vl")]
-    unsafe fn test_mm_lzcnt_epi32() {
+    const fn test_mm_lzcnt_epi32() {
         let a = _mm_set1_epi32(1);
         let r = _mm_lzcnt_epi32(a);
         let e = _mm_set1_epi32(31);
@@ -1127,7 +1152,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512cd,avx512vl")]
-    unsafe fn test_mm_mask_lzcnt_epi32() {
+    const fn test_mm_mask_lzcnt_epi32() {
         let a = _mm_set1_epi32(1);
         let r = _mm_mask_lzcnt_epi32(a, 0, a);
         assert_eq_m128i(r, a);
@@ -1137,7 +1162,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512cd,avx512vl")]
-    unsafe fn test_mm_maskz_lzcnt_epi32() {
+    const fn test_mm_maskz_lzcnt_epi32() {
         let a = _mm_set1_epi32(1);
         let r = _mm_maskz_lzcnt_epi32(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -1147,7 +1172,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512cd")]
-    unsafe fn test_mm512_lzcnt_epi64() {
+    const fn test_mm512_lzcnt_epi64() {
         let a = _mm512_set1_epi64(1);
         let r = _mm512_lzcnt_epi64(a);
         let e = _mm512_set1_epi64(63);
@@ -1155,7 +1180,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512cd")]
-    unsafe fn test_mm512_mask_lzcnt_epi64() {
+    const fn test_mm512_mask_lzcnt_epi64() {
         let a = _mm512_set1_epi64(1);
         let r = _mm512_mask_lzcnt_epi64(a, 0, a);
         assert_eq_m512i(r, a);
@@ -1165,7 +1190,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512cd")]
-    unsafe fn test_mm512_maskz_lzcnt_epi64() {
+    const fn test_mm512_maskz_lzcnt_epi64() {
         let a = _mm512_set1_epi64(2);
         let r = _mm512_maskz_lzcnt_epi64(0, a);
         assert_eq_m512i(r, _mm512_setzero_si512());
@@ -1175,7 +1200,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512cd,avx512vl")]
-    unsafe fn test_mm256_lzcnt_epi64() {
+    const fn test_mm256_lzcnt_epi64() {
         let a = _mm256_set1_epi64x(1);
         let r = _mm256_lzcnt_epi64(a);
         let e = _mm256_set1_epi64x(63);
@@ -1183,7 +1208,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512cd,avx512vl")]
-    unsafe fn test_mm256_mask_lzcnt_epi64() {
+    const fn test_mm256_mask_lzcnt_epi64() {
         let a = _mm256_set1_epi64x(1);
         let r = _mm256_mask_lzcnt_epi64(a, 0, a);
         assert_eq_m256i(r, a);
@@ -1193,7 +1218,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512cd,avx512vl")]
-    unsafe fn test_mm256_maskz_lzcnt_epi64() {
+    const fn test_mm256_maskz_lzcnt_epi64() {
         let a = _mm256_set1_epi64x(1);
         let r = _mm256_maskz_lzcnt_epi64(0, a);
         assert_eq_m256i(r, _mm256_setzero_si256());
@@ -1203,7 +1228,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512cd,avx512vl")]
-    unsafe fn test_mm_lzcnt_epi64() {
+    const fn test_mm_lzcnt_epi64() {
         let a = _mm_set1_epi64x(1);
         let r = _mm_lzcnt_epi64(a);
         let e = _mm_set1_epi64x(63);
@@ -1211,7 +1236,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512cd,avx512vl")]
-    unsafe fn test_mm_mask_lzcnt_epi64() {
+    const fn test_mm_mask_lzcnt_epi64() {
         let a = _mm_set1_epi64x(1);
         let r = _mm_mask_lzcnt_epi64(a, 0, a);
         assert_eq_m128i(r, a);
@@ -1221,7 +1246,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512cd,avx512vl")]
-    unsafe fn test_mm_maskz_lzcnt_epi64() {
+    const fn test_mm_maskz_lzcnt_epi64() {
         let a = _mm_set1_epi64x(1);
         let r = _mm_maskz_lzcnt_epi64(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
diff --git a/crates/core_arch/src/x86/avx512dq.rs b/crates/core_arch/src/x86/avx512dq.rs
index afeb548a55..0b322c8b83 100644
--- a/crates/core_arch/src/x86/avx512dq.rs
+++ b/crates/core_arch/src/x86/avx512dq.rs
@@ -15,7 +15,8 @@ use crate::{
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vandpd))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm_mask_and_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_and_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
     unsafe {
         let and = _mm_and_pd(a, b).as_f64x2();
         transmute(simd_select_bitmask(k, and, src.as_f64x2()))
@@ -30,7 +31,8 @@ pub fn _mm_mask_and_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vandpd))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm_maskz_and_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_and_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
     unsafe {
         let and = _mm_and_pd(a, b).as_f64x2();
         transmute(simd_select_bitmask(k, and, f64x2::ZERO))
@@ -46,7 +48,8 @@ pub fn _mm_maskz_and_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vandpd))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm256_mask_and_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_and_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
     unsafe {
         let and = _mm256_and_pd(a, b).as_f64x4();
         transmute(simd_select_bitmask(k, and, src.as_f64x4()))
@@ -61,7 +64,8 @@ pub fn _mm256_mask_and_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) ->
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vandpd))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm256_maskz_and_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_and_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
     unsafe {
         let and = _mm256_and_pd(a, b).as_f64x4();
         transmute(simd_select_bitmask(k, and, f64x4::ZERO))
@@ -76,7 +80,8 @@ pub fn _mm256_maskz_and_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vandp))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_and_pd(a: __m512d, b: __m512d) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_and_pd(a: __m512d, b: __m512d) -> __m512d {
     unsafe { transmute(simd_and(transmute::<_, u64x8>(a), transmute::<_, u64x8>(b))) }
 }
 
@@ -89,7 +94,8 @@ pub fn _mm512_and_pd(a: __m512d, b: __m512d) -> __m512d {
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vandpd))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_mask_and_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_and_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
     unsafe {
         let and = _mm512_and_pd(a, b).as_f64x8();
         transmute(simd_select_bitmask(k, and, src.as_f64x8()))
@@ -104,7 +110,8 @@ pub fn _mm512_mask_and_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) ->
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vandpd))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_maskz_and_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_and_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
     unsafe {
         let and = _mm512_and_pd(a, b).as_f64x8();
         transmute(simd_select_bitmask(k, and, f64x8::ZERO))
@@ -120,7 +127,8 @@ pub fn _mm512_maskz_and_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vandps))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm_mask_and_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_and_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
     unsafe {
         let and = _mm_and_ps(a, b).as_f32x4();
         transmute(simd_select_bitmask(k, and, src.as_f32x4()))
@@ -135,7 +143,8 @@ pub fn _mm_mask_and_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vandps))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm_maskz_and_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_and_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
     unsafe {
         let and = _mm_and_ps(a, b).as_f32x4();
         transmute(simd_select_bitmask(k, and, f32x4::ZERO))
@@ -151,7 +160,8 @@ pub fn _mm_maskz_and_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vandps))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm256_mask_and_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_and_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
     unsafe {
         let and = _mm256_and_ps(a, b).as_f32x8();
         transmute(simd_select_bitmask(k, and, src.as_f32x8()))
@@ -166,7 +176,8 @@ pub fn _mm256_mask_and_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vandps))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm256_maskz_and_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_and_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
     unsafe {
         let and = _mm256_and_ps(a, b).as_f32x8();
         transmute(simd_select_bitmask(k, and, f32x8::ZERO))
@@ -181,7 +192,8 @@ pub fn _mm256_maskz_and_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vandps))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_and_ps(a: __m512, b: __m512) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_and_ps(a: __m512, b: __m512) -> __m512 {
     unsafe {
         transmute(simd_and(
             transmute::<_, u32x16>(a),
@@ -199,7 +211,8 @@ pub fn _mm512_and_ps(a: __m512, b: __m512) -> __m512 {
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vandps))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_mask_and_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_and_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
     unsafe {
         let and = _mm512_and_ps(a, b).as_f32x16();
         transmute(simd_select_bitmask(k, and, src.as_f32x16()))
@@ -214,7 +227,8 @@ pub fn _mm512_mask_and_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vandps))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_maskz_and_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_and_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
     unsafe {
         let and = _mm512_and_ps(a, b).as_f32x16();
         transmute(simd_select_bitmask(k, and, f32x16::ZERO))
@@ -232,7 +246,8 @@ pub fn _mm512_maskz_and_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vandnpd))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm_mask_andnot_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_andnot_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
     unsafe {
         let andnot = _mm_andnot_pd(a, b).as_f64x2();
         transmute(simd_select_bitmask(k, andnot, src.as_f64x2()))
@@ -248,7 +263,8 @@ pub fn _mm_mask_andnot_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) ->
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vandnpd))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm_maskz_andnot_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_andnot_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
     unsafe {
         let andnot = _mm_andnot_pd(a, b).as_f64x2();
         transmute(simd_select_bitmask(k, andnot, f64x2::ZERO))
@@ -264,7 +280,8 @@ pub fn _mm_maskz_andnot_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vandnpd))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm256_mask_andnot_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_andnot_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
     unsafe {
         let andnot = _mm256_andnot_pd(a, b).as_f64x4();
         transmute(simd_select_bitmask(k, andnot, src.as_f64x4()))
@@ -280,7 +297,8 @@ pub fn _mm256_mask_andnot_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d)
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vandnpd))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm256_maskz_andnot_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_andnot_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
     unsafe {
         let andnot = _mm256_andnot_pd(a, b).as_f64x4();
         transmute(simd_select_bitmask(k, andnot, f64x4::ZERO))
@@ -295,7 +313,8 @@ pub fn _mm256_maskz_andnot_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vandnp))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_andnot_pd(a: __m512d, b: __m512d) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_andnot_pd(a: __m512d, b: __m512d) -> __m512d {
     unsafe { _mm512_and_pd(_mm512_xor_pd(a, transmute(_mm512_set1_epi64(-1))), b) }
 }
 
@@ -308,7 +327,8 @@ pub fn _mm512_andnot_pd(a: __m512d, b: __m512d) -> __m512d {
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vandnpd))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_mask_andnot_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_andnot_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
     unsafe {
         let andnot = _mm512_andnot_pd(a, b).as_f64x8();
         transmute(simd_select_bitmask(k, andnot, src.as_f64x8()))
@@ -324,7 +344,8 @@ pub fn _mm512_mask_andnot_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d)
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vandnpd))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_maskz_andnot_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_andnot_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
     unsafe {
         let andnot = _mm512_andnot_pd(a, b).as_f64x8();
         transmute(simd_select_bitmask(k, andnot, f64x8::ZERO))
@@ -340,7 +361,8 @@ pub fn _mm512_maskz_andnot_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vandnps))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm_mask_andnot_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_andnot_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
     unsafe {
         let andnot = _mm_andnot_ps(a, b).as_f32x4();
         transmute(simd_select_bitmask(k, andnot, src.as_f32x4()))
@@ -356,7 +378,8 @@ pub fn _mm_mask_andnot_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vandnps))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm_maskz_andnot_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_andnot_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
     unsafe {
         let andnot = _mm_andnot_ps(a, b).as_f32x4();
         transmute(simd_select_bitmask(k, andnot, f32x4::ZERO))
@@ -372,7 +395,8 @@ pub fn _mm_maskz_andnot_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vandnps))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm256_mask_andnot_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_andnot_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
     unsafe {
         let andnot = _mm256_andnot_ps(a, b).as_f32x8();
         transmute(simd_select_bitmask(k, andnot, src.as_f32x8()))
@@ -388,7 +412,8 @@ pub fn _mm256_mask_andnot_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) ->
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vandnps))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm256_maskz_andnot_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_andnot_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
     unsafe {
         let andnot = _mm256_andnot_ps(a, b).as_f32x8();
         transmute(simd_select_bitmask(k, andnot, f32x8::ZERO))
@@ -403,7 +428,8 @@ pub fn _mm256_maskz_andnot_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vandnps))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_andnot_ps(a: __m512, b: __m512) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_andnot_ps(a: __m512, b: __m512) -> __m512 {
     unsafe { _mm512_and_ps(_mm512_xor_ps(a, transmute(_mm512_set1_epi32(-1))), b) }
 }
 
@@ -416,7 +442,8 @@ pub fn _mm512_andnot_ps(a: __m512, b: __m512) -> __m512 {
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vandnps))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_mask_andnot_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_andnot_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
     unsafe {
         let andnot = _mm512_andnot_ps(a, b).as_f32x16();
         transmute(simd_select_bitmask(k, andnot, src.as_f32x16()))
@@ -432,7 +459,8 @@ pub fn _mm512_mask_andnot_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) ->
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vandnps))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_maskz_andnot_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_andnot_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
     unsafe {
         let andnot = _mm512_andnot_ps(a, b).as_f32x16();
         transmute(simd_select_bitmask(k, andnot, f32x16::ZERO))
@@ -450,7 +478,8 @@ pub fn _mm512_maskz_andnot_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vorpd))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm_mask_or_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_or_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
     unsafe {
         let or = _mm_or_pd(a, b).as_f64x2();
         transmute(simd_select_bitmask(k, or, src.as_f64x2()))
@@ -465,7 +494,8 @@ pub fn _mm_mask_or_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m1
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vorpd))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm_maskz_or_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_or_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
     unsafe {
         let or = _mm_or_pd(a, b).as_f64x2();
         transmute(simd_select_bitmask(k, or, f64x2::ZERO))
@@ -481,7 +511,8 @@ pub fn _mm_maskz_or_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vorpd))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm256_mask_or_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_or_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
     unsafe {
         let or = _mm256_or_pd(a, b).as_f64x4();
         transmute(simd_select_bitmask(k, or, src.as_f64x4()))
@@ -496,7 +527,8 @@ pub fn _mm256_mask_or_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> _
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vorpd))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm256_maskz_or_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_or_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
     unsafe {
         let or = _mm256_or_pd(a, b).as_f64x4();
         transmute(simd_select_bitmask(k, or, f64x4::ZERO))
@@ -511,7 +543,8 @@ pub fn _mm256_maskz_or_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vorp))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_or_pd(a: __m512d, b: __m512d) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_or_pd(a: __m512d, b: __m512d) -> __m512d {
     unsafe { transmute(simd_or(transmute::<_, u64x8>(a), transmute::<_, u64x8>(b))) }
 }
 
@@ -524,7 +557,8 @@ pub fn _mm512_or_pd(a: __m512d, b: __m512d) -> __m512d {
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vorpd))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_mask_or_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_or_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
     unsafe {
         let or = _mm512_or_pd(a, b).as_f64x8();
         transmute(simd_select_bitmask(k, or, src.as_f64x8()))
@@ -539,7 +573,8 @@ pub fn _mm512_mask_or_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> _
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vorpd))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_maskz_or_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_or_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
     unsafe {
         let or = _mm512_or_pd(a, b).as_f64x8();
         transmute(simd_select_bitmask(k, or, f64x8::ZERO))
@@ -555,7 +590,8 @@ pub fn _mm512_maskz_or_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vorps))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm_mask_or_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_or_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
     unsafe {
         let or = _mm_or_ps(a, b).as_f32x4();
         transmute(simd_select_bitmask(k, or, src.as_f32x4()))
@@ -570,7 +606,8 @@ pub fn _mm_mask_or_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vorps))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm_maskz_or_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_or_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
     unsafe {
         let or = _mm_or_ps(a, b).as_f32x4();
         transmute(simd_select_bitmask(k, or, f32x4::ZERO))
@@ -586,7 +623,8 @@ pub fn _mm_maskz_or_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vorps))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm256_mask_or_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_or_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
     unsafe {
         let or = _mm256_or_ps(a, b).as_f32x8();
         transmute(simd_select_bitmask(k, or, src.as_f32x8()))
@@ -601,7 +639,8 @@ pub fn _mm256_mask_or_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m2
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vorps))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm256_maskz_or_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_or_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
     unsafe {
         let or = _mm256_or_ps(a, b).as_f32x8();
         transmute(simd_select_bitmask(k, or, f32x8::ZERO))
@@ -616,7 +655,8 @@ pub fn _mm256_maskz_or_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vorps))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_or_ps(a: __m512, b: __m512) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_or_ps(a: __m512, b: __m512) -> __m512 {
     unsafe {
         transmute(simd_or(
             transmute::<_, u32x16>(a),
@@ -634,7 +674,8 @@ pub fn _mm512_or_ps(a: __m512, b: __m512) -> __m512 {
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vorps))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_mask_or_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_or_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
     unsafe {
         let or = _mm512_or_ps(a, b).as_f32x16();
         transmute(simd_select_bitmask(k, or, src.as_f32x16()))
@@ -649,7 +690,8 @@ pub fn _mm512_mask_or_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vorps))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_maskz_or_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_or_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
     unsafe {
         let or = _mm512_or_ps(a, b).as_f32x16();
         transmute(simd_select_bitmask(k, or, f32x16::ZERO))
@@ -667,7 +709,8 @@ pub fn _mm512_maskz_or_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vxorpd))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm_mask_xor_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_xor_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
     unsafe {
         let xor = _mm_xor_pd(a, b).as_f64x2();
         transmute(simd_select_bitmask(k, xor, src.as_f64x2()))
@@ -682,7 +725,8 @@ pub fn _mm_mask_xor_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vxorpd))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm_maskz_xor_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_xor_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
     unsafe {
         let xor = _mm_xor_pd(a, b).as_f64x2();
         transmute(simd_select_bitmask(k, xor, f64x2::ZERO))
@@ -698,7 +742,8 @@ pub fn _mm_maskz_xor_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vxorpd))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm256_mask_xor_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_xor_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
     unsafe {
         let xor = _mm256_xor_pd(a, b).as_f64x4();
         transmute(simd_select_bitmask(k, xor, src.as_f64x4()))
@@ -713,7 +758,8 @@ pub fn _mm256_mask_xor_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) ->
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vxorpd))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm256_maskz_xor_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_xor_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
     unsafe {
         let xor = _mm256_xor_pd(a, b).as_f64x4();
         transmute(simd_select_bitmask(k, xor, f64x4::ZERO))
@@ -728,7 +774,8 @@ pub fn _mm256_maskz_xor_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vxorp))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_xor_pd(a: __m512d, b: __m512d) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_xor_pd(a: __m512d, b: __m512d) -> __m512d {
     unsafe { transmute(simd_xor(transmute::<_, u64x8>(a), transmute::<_, u64x8>(b))) }
 }
 
@@ -741,7 +788,8 @@ pub fn _mm512_xor_pd(a: __m512d, b: __m512d) -> __m512d {
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vxorpd))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_mask_xor_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_xor_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
     unsafe {
         let xor = _mm512_xor_pd(a, b).as_f64x8();
         transmute(simd_select_bitmask(k, xor, src.as_f64x8()))
@@ -756,7 +804,8 @@ pub fn _mm512_mask_xor_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) ->
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vxorpd))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_maskz_xor_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_xor_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
     unsafe {
         let xor = _mm512_xor_pd(a, b).as_f64x8();
         transmute(simd_select_bitmask(k, xor, f64x8::ZERO))
@@ -772,7 +821,8 @@ pub fn _mm512_maskz_xor_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vxorps))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm_mask_xor_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_xor_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
     unsafe {
         let xor = _mm_xor_ps(a, b).as_f32x4();
         transmute(simd_select_bitmask(k, xor, src.as_f32x4()))
@@ -787,7 +837,8 @@ pub fn _mm_mask_xor_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vxorps))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm_maskz_xor_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_xor_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
     unsafe {
         let xor = _mm_xor_ps(a, b).as_f32x4();
         transmute(simd_select_bitmask(k, xor, f32x4::ZERO))
@@ -803,7 +854,8 @@ pub fn _mm_maskz_xor_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vxorps))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm256_mask_xor_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_xor_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
     unsafe {
         let xor = _mm256_xor_ps(a, b).as_f32x8();
         transmute(simd_select_bitmask(k, xor, src.as_f32x8()))
@@ -818,7 +870,8 @@ pub fn _mm256_mask_xor_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vxorps))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm256_maskz_xor_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_xor_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
     unsafe {
         let xor = _mm256_xor_ps(a, b).as_f32x8();
         transmute(simd_select_bitmask(k, xor, f32x8::ZERO))
@@ -833,7 +886,8 @@ pub fn _mm256_maskz_xor_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vxorps))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_xor_ps(a: __m512, b: __m512) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_xor_ps(a: __m512, b: __m512) -> __m512 {
     unsafe {
         transmute(simd_xor(
             transmute::<_, u32x16>(a),
@@ -851,7 +905,8 @@ pub fn _mm512_xor_ps(a: __m512, b: __m512) -> __m512 {
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vxorps))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_mask_xor_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_xor_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
     unsafe {
         let xor = _mm512_xor_ps(a, b).as_f32x16();
         transmute(simd_select_bitmask(k, xor, src.as_f32x16()))
@@ -866,7 +921,8 @@ pub fn _mm512_mask_xor_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vxorps))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_maskz_xor_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_xor_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
     unsafe {
         let xor = _mm512_xor_ps(a, b).as_f32x16();
         transmute(simd_select_bitmask(k, xor, f32x16::ZERO))
@@ -882,7 +938,8 @@ pub fn _mm512_maskz_xor_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm256_broadcast_f32x2(a: __m128) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_broadcast_f32x2(a: __m128) -> __m256 {
     unsafe {
         let b: f32x8 = simd_shuffle!(a, a, [0, 1, 0, 1, 0, 1, 0, 1]);
         transmute(b)
@@ -897,7 +954,8 @@ pub fn _mm256_broadcast_f32x2(a: __m128) -> __m256 {
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vbroadcastf32x2))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm256_mask_broadcast_f32x2(src: __m256, k: __mmask8, a: __m128) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_broadcast_f32x2(src: __m256, k: __mmask8, a: __m128) -> __m256 {
     unsafe {
         let b = _mm256_broadcast_f32x2(a).as_f32x8();
         transmute(simd_select_bitmask(k, b, src.as_f32x8()))
@@ -912,7 +970,8 @@ pub fn _mm256_mask_broadcast_f32x2(src: __m256, k: __mmask8, a: __m128) -> __m25
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vbroadcastf32x2))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm256_maskz_broadcast_f32x2(k: __mmask8, a: __m128) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_broadcast_f32x2(k: __mmask8, a: __m128) -> __m256 {
     unsafe {
         let b = _mm256_broadcast_f32x2(a).as_f32x8();
         transmute(simd_select_bitmask(k, b, f32x8::ZERO))
@@ -926,7 +985,8 @@ pub fn _mm256_maskz_broadcast_f32x2(k: __mmask8, a: __m128) -> __m256 {
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_broadcast_f32x2(a: __m128) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_broadcast_f32x2(a: __m128) -> __m512 {
     unsafe {
         let b: f32x16 = simd_shuffle!(a, a, [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1]);
         transmute(b)
@@ -941,7 +1001,8 @@ pub fn _mm512_broadcast_f32x2(a: __m128) -> __m512 {
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vbroadcastf32x2))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_mask_broadcast_f32x2(src: __m512, k: __mmask16, a: __m128) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_broadcast_f32x2(src: __m512, k: __mmask16, a: __m128) -> __m512 {
     unsafe {
         let b = _mm512_broadcast_f32x2(a).as_f32x16();
         transmute(simd_select_bitmask(k, b, src.as_f32x16()))
@@ -956,7 +1017,8 @@ pub fn _mm512_mask_broadcast_f32x2(src: __m512, k: __mmask16, a: __m128) -> __m5
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vbroadcastf32x2))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_maskz_broadcast_f32x2(k: __mmask16, a: __m128) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_broadcast_f32x2(k: __mmask16, a: __m128) -> __m512 {
     unsafe {
         let b = _mm512_broadcast_f32x2(a).as_f32x16();
         transmute(simd_select_bitmask(k, b, f32x16::ZERO))
@@ -970,7 +1032,8 @@ pub fn _mm512_maskz_broadcast_f32x2(k: __mmask16, a: __m128) -> __m512 {
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_broadcast_f32x8(a: __m256) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_broadcast_f32x8(a: __m256) -> __m512 {
     unsafe {
         let b: f32x16 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7]);
         transmute(b)
@@ -984,7 +1047,8 @@ pub fn _mm512_broadcast_f32x8(a: __m256) -> __m512 {
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_mask_broadcast_f32x8(src: __m512, k: __mmask16, a: __m256) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_broadcast_f32x8(src: __m512, k: __mmask16, a: __m256) -> __m512 {
     unsafe {
         let b = _mm512_broadcast_f32x8(a).as_f32x16();
         transmute(simd_select_bitmask(k, b, src.as_f32x16()))
@@ -998,7 +1062,8 @@ pub fn _mm512_mask_broadcast_f32x8(src: __m512, k: __mmask16, a: __m256) -> __m5
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_maskz_broadcast_f32x8(k: __mmask16, a: __m256) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_broadcast_f32x8(k: __mmask16, a: __m256) -> __m512 {
     unsafe {
         let b = _mm512_broadcast_f32x8(a).as_f32x16();
         transmute(simd_select_bitmask(k, b, f32x16::ZERO))
@@ -1012,7 +1077,8 @@ pub fn _mm512_maskz_broadcast_f32x8(k: __mmask16, a: __m256) -> __m512 {
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm256_broadcast_f64x2(a: __m128d) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_broadcast_f64x2(a: __m128d) -> __m256d {
     unsafe {
         let b: f64x4 = simd_shuffle!(a, a, [0, 1, 0, 1]);
         transmute(b)
@@ -1026,7 +1092,8 @@ pub fn _mm256_broadcast_f64x2(a: __m128d) -> __m256d {
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm256_mask_broadcast_f64x2(src: __m256d, k: __mmask8, a: __m128d) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_broadcast_f64x2(src: __m256d, k: __mmask8, a: __m128d) -> __m256d {
     unsafe {
         let b = _mm256_broadcast_f64x2(a).as_f64x4();
         transmute(simd_select_bitmask(k, b, src.as_f64x4()))
@@ -1040,7 +1107,8 @@ pub fn _mm256_mask_broadcast_f64x2(src: __m256d, k: __mmask8, a: __m128d) -> __m
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm256_maskz_broadcast_f64x2(k: __mmask8, a: __m128d) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_broadcast_f64x2(k: __mmask8, a: __m128d) -> __m256d {
     unsafe {
         let b = _mm256_broadcast_f64x2(a).as_f64x4();
         transmute(simd_select_bitmask(k, b, f64x4::ZERO))
@@ -1054,7 +1122,8 @@ pub fn _mm256_maskz_broadcast_f64x2(k: __mmask8, a: __m128d) -> __m256d {
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_broadcast_f64x2(a: __m128d) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_broadcast_f64x2(a: __m128d) -> __m512d {
     unsafe {
         let b: f64x8 = simd_shuffle!(a, a, [0, 1, 0, 1, 0, 1, 0, 1]);
         transmute(b)
@@ -1068,7 +1137,8 @@ pub fn _mm512_broadcast_f64x2(a: __m128d) -> __m512d {
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_mask_broadcast_f64x2(src: __m512d, k: __mmask8, a: __m128d) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_broadcast_f64x2(src: __m512d, k: __mmask8, a: __m128d) -> __m512d {
     unsafe {
         let b = _mm512_broadcast_f64x2(a).as_f64x8();
         transmute(simd_select_bitmask(k, b, src.as_f64x8()))
@@ -1082,7 +1152,8 @@ pub fn _mm512_mask_broadcast_f64x2(src: __m512d, k: __mmask8, a: __m128d) -> __m
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_maskz_broadcast_f64x2(k: __mmask8, a: __m128d) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_broadcast_f64x2(k: __mmask8, a: __m128d) -> __m512d {
     unsafe {
         let b = _mm512_broadcast_f64x2(a).as_f64x8();
         transmute(simd_select_bitmask(k, b, f64x8::ZERO))
@@ -1095,7 +1166,8 @@ pub fn _mm512_maskz_broadcast_f64x2(k: __mmask8, a: __m128d) -> __m512d {
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm_broadcast_i32x2(a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_broadcast_i32x2(a: __m128i) -> __m128i {
     unsafe {
         let a = a.as_i32x4();
         let b: i32x4 = simd_shuffle!(a, a, [0, 1, 0, 1]);
@@ -1111,7 +1183,8 @@ pub fn _mm_broadcast_i32x2(a: __m128i) -> __m128i {
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vbroadcasti32x2))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm_mask_broadcast_i32x2(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_broadcast_i32x2(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
     unsafe {
         let b = _mm_broadcast_i32x2(a).as_i32x4();
         transmute(simd_select_bitmask(k, b, src.as_i32x4()))
@@ -1126,7 +1199,8 @@ pub fn _mm_mask_broadcast_i32x2(src: __m128i, k: __mmask8, a: __m128i) -> __m128
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vbroadcasti32x2))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm_maskz_broadcast_i32x2(k: __mmask8, a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_broadcast_i32x2(k: __mmask8, a: __m128i) -> __m128i {
     unsafe {
         let b = _mm_broadcast_i32x2(a).as_i32x4();
         transmute(simd_select_bitmask(k, b, i32x4::ZERO))
@@ -1139,7 +1213,8 @@ pub fn _mm_maskz_broadcast_i32x2(k: __mmask8, a: __m128i) -> __m128i {
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm256_broadcast_i32x2(a: __m128i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_broadcast_i32x2(a: __m128i) -> __m256i {
     unsafe {
         let a = a.as_i32x4();
         let b: i32x8 = simd_shuffle!(a, a, [0, 1, 0, 1, 0, 1, 0, 1]);
@@ -1155,7 +1230,8 @@ pub fn _mm256_broadcast_i32x2(a: __m128i) -> __m256i {
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vbroadcasti32x2))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm256_mask_broadcast_i32x2(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_broadcast_i32x2(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
     unsafe {
         let b = _mm256_broadcast_i32x2(a).as_i32x8();
         transmute(simd_select_bitmask(k, b, src.as_i32x8()))
@@ -1170,7 +1246,8 @@ pub fn _mm256_mask_broadcast_i32x2(src: __m256i, k: __mmask8, a: __m128i) -> __m
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vbroadcasti32x2))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm256_maskz_broadcast_i32x2(k: __mmask8, a: __m128i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_broadcast_i32x2(k: __mmask8, a: __m128i) -> __m256i {
     unsafe {
         let b = _mm256_broadcast_i32x2(a).as_i32x8();
         transmute(simd_select_bitmask(k, b, i32x8::ZERO))
@@ -1183,7 +1260,8 @@ pub fn _mm256_maskz_broadcast_i32x2(k: __mmask8, a: __m128i) -> __m256i {
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_broadcast_i32x2(a: __m128i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_broadcast_i32x2(a: __m128i) -> __m512i {
     unsafe {
         let a = a.as_i32x4();
         let b: i32x16 = simd_shuffle!(a, a, [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1]);
@@ -1199,7 +1277,8 @@ pub fn _mm512_broadcast_i32x2(a: __m128i) -> __m512i {
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vbroadcasti32x2))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_mask_broadcast_i32x2(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_broadcast_i32x2(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
     unsafe {
         let b = _mm512_broadcast_i32x2(a).as_i32x16();
         transmute(simd_select_bitmask(k, b, src.as_i32x16()))
@@ -1214,7 +1293,8 @@ pub fn _mm512_mask_broadcast_i32x2(src: __m512i, k: __mmask16, a: __m128i) -> __
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vbroadcasti32x2))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_maskz_broadcast_i32x2(k: __mmask16, a: __m128i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_broadcast_i32x2(k: __mmask16, a: __m128i) -> __m512i {
     unsafe {
         let b = _mm512_broadcast_i32x2(a).as_i32x16();
         transmute(simd_select_bitmask(k, b, i32x16::ZERO))
@@ -1227,7 +1307,8 @@ pub fn _mm512_maskz_broadcast_i32x2(k: __mmask16, a: __m128i) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_broadcast_i32x8(a: __m256i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_broadcast_i32x8(a: __m256i) -> __m512i {
     unsafe {
         let a = a.as_i32x8();
         let b: i32x16 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7]);
@@ -1242,7 +1323,8 @@ pub fn _mm512_broadcast_i32x8(a: __m256i) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_mask_broadcast_i32x8(src: __m512i, k: __mmask16, a: __m256i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_broadcast_i32x8(src: __m512i, k: __mmask16, a: __m256i) -> __m512i {
     unsafe {
         let b = _mm512_broadcast_i32x8(a).as_i32x16();
         transmute(simd_select_bitmask(k, b, src.as_i32x16()))
@@ -1256,7 +1338,8 @@ pub fn _mm512_mask_broadcast_i32x8(src: __m512i, k: __mmask16, a: __m256i) -> __
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_maskz_broadcast_i32x8(k: __mmask16, a: __m256i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_broadcast_i32x8(k: __mmask16, a: __m256i) -> __m512i {
     unsafe {
         let b = _mm512_broadcast_i32x8(a).as_i32x16();
         transmute(simd_select_bitmask(k, b, i32x16::ZERO))
@@ -1269,7 +1352,8 @@ pub fn _mm512_maskz_broadcast_i32x8(k: __mmask16, a: __m256i) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm256_broadcast_i64x2(a: __m128i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_broadcast_i64x2(a: __m128i) -> __m256i {
     unsafe {
         let a = a.as_i64x2();
         let b: i64x4 = simd_shuffle!(a, a, [0, 1, 0, 1]);
@@ -1284,7 +1368,8 @@ pub fn _mm256_broadcast_i64x2(a: __m128i) -> __m256i {
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm256_mask_broadcast_i64x2(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_broadcast_i64x2(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
     unsafe {
         let b = _mm256_broadcast_i64x2(a).as_i64x4();
         transmute(simd_select_bitmask(k, b, src.as_i64x4()))
@@ -1298,7 +1383,8 @@ pub fn _mm256_mask_broadcast_i64x2(src: __m256i, k: __mmask8, a: __m128i) -> __m
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm256_maskz_broadcast_i64x2(k: __mmask8, a: __m128i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_broadcast_i64x2(k: __mmask8, a: __m128i) -> __m256i {
     unsafe {
         let b = _mm256_broadcast_i64x2(a).as_i64x4();
         transmute(simd_select_bitmask(k, b, i64x4::ZERO))
@@ -1311,7 +1397,8 @@ pub fn _mm256_maskz_broadcast_i64x2(k: __mmask8, a: __m128i) -> __m256i {
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_broadcast_i64x2(a: __m128i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_broadcast_i64x2(a: __m128i) -> __m512i {
     unsafe {
         let a = a.as_i64x2();
         let b: i64x8 = simd_shuffle!(a, a, [0, 1, 0, 1, 0, 1, 0, 1]);
@@ -1326,7 +1413,8 @@ pub fn _mm512_broadcast_i64x2(a: __m128i) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_mask_broadcast_i64x2(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_broadcast_i64x2(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
     unsafe {
         let b = _mm512_broadcast_i64x2(a).as_i64x8();
         transmute(simd_select_bitmask(k, b, src.as_i64x8()))
@@ -1340,7 +1428,8 @@ pub fn _mm512_mask_broadcast_i64x2(src: __m512i, k: __mmask8, a: __m128i) -> __m
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_maskz_broadcast_i64x2(k: __mmask8, a: __m128i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_broadcast_i64x2(k: __mmask8, a: __m128i) -> __m512i {
     unsafe {
         let b = _mm512_broadcast_i64x2(a).as_i64x8();
         transmute(simd_select_bitmask(k, b, i64x8::ZERO))
@@ -1357,7 +1446,8 @@ pub fn _mm512_maskz_broadcast_i64x2(k: __mmask8, a: __m128i) -> __m512i {
 #[target_feature(enable = "avx512dq")]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_extractf32x8_ps<const IMM8: i32>(a: __m512) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_extractf32x8_ps<const IMM8: i32>(a: __m512) -> __m256 {
     unsafe {
         static_assert_uimm_bits!(IMM8, 1);
         match IMM8 & 1 {
@@ -1377,7 +1467,12 @@ pub fn _mm512_extractf32x8_ps<const IMM8: i32>(a: __m512) -> __m256 {
 #[cfg_attr(test, assert_instr(vextractf32x8, IMM8 = 1))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_mask_extractf32x8_ps<const IMM8: i32>(src: __m256, k: __mmask8, a: __m512) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_extractf32x8_ps<const IMM8: i32>(
+    src: __m256,
+    k: __mmask8,
+    a: __m512,
+) -> __m256 {
     unsafe {
         static_assert_uimm_bits!(IMM8, 1);
         let b = _mm512_extractf32x8_ps::<IMM8>(a);
@@ -1395,7 +1490,8 @@ pub fn _mm512_mask_extractf32x8_ps<const IMM8: i32>(src: __m256, k: __mmask8, a:
 #[cfg_attr(test, assert_instr(vextractf32x8, IMM8 = 1))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_maskz_extractf32x8_ps<const IMM8: i32>(k: __mmask8, a: __m512) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_extractf32x8_ps<const IMM8: i32>(k: __mmask8, a: __m512) -> __m256 {
     unsafe {
         static_assert_uimm_bits!(IMM8, 1);
         let b = _mm512_extractf32x8_ps::<IMM8>(a);
@@ -1411,7 +1507,8 @@ pub fn _mm512_maskz_extractf32x8_ps<const IMM8: i32>(k: __mmask8, a: __m512) ->
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm256_extractf64x2_pd<const IMM8: i32>(a: __m256d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_extractf64x2_pd<const IMM8: i32>(a: __m256d) -> __m128d {
     unsafe {
         static_assert_uimm_bits!(IMM8, 1);
         match IMM8 & 1 {
@@ -1431,7 +1528,8 @@ pub fn _mm256_extractf64x2_pd<const IMM8: i32>(a: __m256d) -> __m128d {
 #[cfg_attr(test, assert_instr(vextractf64x2, IMM8 = 1))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm256_mask_extractf64x2_pd<const IMM8: i32>(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_extractf64x2_pd<const IMM8: i32>(
     src: __m128d,
     k: __mmask8,
     a: __m256d,
@@ -1453,7 +1551,8 @@ pub fn _mm256_mask_extractf64x2_pd<const IMM8: i32>(
 #[cfg_attr(test, assert_instr(vextractf64x2, IMM8 = 1))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm256_maskz_extractf64x2_pd<const IMM8: i32>(k: __mmask8, a: __m256d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_extractf64x2_pd<const IMM8: i32>(k: __mmask8, a: __m256d) -> __m128d {
     unsafe {
         static_assert_uimm_bits!(IMM8, 1);
         let b = _mm256_extractf64x2_pd::<IMM8>(a);
@@ -1469,7 +1568,8 @@ pub fn _mm256_maskz_extractf64x2_pd<const IMM8: i32>(k: __mmask8, a: __m256d) ->
 #[target_feature(enable = "avx512dq")]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_extractf64x2_pd<const IMM8: i32>(a: __m512d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_extractf64x2_pd<const IMM8: i32>(a: __m512d) -> __m128d {
     unsafe {
         static_assert_uimm_bits!(IMM8, 2);
         match IMM8 & 3 {
@@ -1491,7 +1591,8 @@ pub fn _mm512_extractf64x2_pd<const IMM8: i32>(a: __m512d) -> __m128d {
 #[cfg_attr(test, assert_instr(vextractf64x2, IMM8 = 3))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_mask_extractf64x2_pd<const IMM8: i32>(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_extractf64x2_pd<const IMM8: i32>(
     src: __m128d,
     k: __mmask8,
     a: __m512d,
@@ -1513,7 +1614,8 @@ pub fn _mm512_mask_extractf64x2_pd<const IMM8: i32>(
 #[cfg_attr(test, assert_instr(vextractf64x2, IMM8 = 3))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_maskz_extractf64x2_pd<const IMM8: i32>(k: __mmask8, a: __m512d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_extractf64x2_pd<const IMM8: i32>(k: __mmask8, a: __m512d) -> __m128d {
     unsafe {
         static_assert_uimm_bits!(IMM8, 2);
         let b = _mm512_extractf64x2_pd::<IMM8>(a).as_f64x2();
@@ -1529,7 +1631,8 @@ pub fn _mm512_maskz_extractf64x2_pd<const IMM8: i32>(k: __mmask8, a: __m512d) ->
 #[target_feature(enable = "avx512dq")]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_extracti32x8_epi32<const IMM8: i32>(a: __m512i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_extracti32x8_epi32<const IMM8: i32>(a: __m512i) -> __m256i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 1);
         let a = a.as_i32x16();
@@ -1550,7 +1653,8 @@ pub fn _mm512_extracti32x8_epi32<const IMM8: i32>(a: __m512i) -> __m256i {
 #[cfg_attr(test, assert_instr(vextracti32x8, IMM8 = 1))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_mask_extracti32x8_epi32<const IMM8: i32>(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_extracti32x8_epi32<const IMM8: i32>(
     src: __m256i,
     k: __mmask8,
     a: __m512i,
@@ -1571,7 +1675,8 @@ pub fn _mm512_mask_extracti32x8_epi32<const IMM8: i32>(
 #[cfg_attr(test, assert_instr(vextracti32x8, IMM8 = 1))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_maskz_extracti32x8_epi32<const IMM8: i32>(k: __mmask8, a: __m512i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_extracti32x8_epi32<const IMM8: i32>(k: __mmask8, a: __m512i) -> __m256i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 1);
         let b = _mm512_extracti32x8_epi32::<IMM8>(a).as_i32x8();
@@ -1587,7 +1692,8 @@ pub fn _mm512_maskz_extracti32x8_epi32<const IMM8: i32>(k: __mmask8, a: __m512i)
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm256_extracti64x2_epi64<const IMM8: i32>(a: __m256i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_extracti64x2_epi64<const IMM8: i32>(a: __m256i) -> __m128i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 1);
         let a = a.as_i64x4();
@@ -1607,7 +1713,8 @@ pub fn _mm256_extracti64x2_epi64<const IMM8: i32>(a: __m256i) -> __m128i {
 #[cfg_attr(test, assert_instr(vextracti64x2, IMM8 = 1))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm256_mask_extracti64x2_epi64<const IMM8: i32>(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_extracti64x2_epi64<const IMM8: i32>(
     src: __m128i,
     k: __mmask8,
     a: __m256i,
@@ -1628,7 +1735,8 @@ pub fn _mm256_mask_extracti64x2_epi64<const IMM8: i32>(
 #[cfg_attr(test, assert_instr(vextracti64x2, IMM8 = 1))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm256_maskz_extracti64x2_epi64<const IMM8: i32>(k: __mmask8, a: __m256i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_extracti64x2_epi64<const IMM8: i32>(k: __mmask8, a: __m256i) -> __m128i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 1);
         let b = _mm256_extracti64x2_epi64::<IMM8>(a).as_i64x2();
@@ -1644,7 +1752,8 @@ pub fn _mm256_maskz_extracti64x2_epi64<const IMM8: i32>(k: __mmask8, a: __m256i)
 #[target_feature(enable = "avx512dq")]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_extracti64x2_epi64<const IMM8: i32>(a: __m512i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_extracti64x2_epi64<const IMM8: i32>(a: __m512i) -> __m128i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 2);
         let a = a.as_i64x8();
@@ -1666,7 +1775,8 @@ pub fn _mm512_extracti64x2_epi64<const IMM8: i32>(a: __m512i) -> __m128i {
 #[cfg_attr(test, assert_instr(vextracti64x2, IMM8 = 3))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_mask_extracti64x2_epi64<const IMM8: i32>(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_extracti64x2_epi64<const IMM8: i32>(
     src: __m128i,
     k: __mmask8,
     a: __m512i,
@@ -1687,7 +1797,8 @@ pub fn _mm512_mask_extracti64x2_epi64<const IMM8: i32>(
 #[cfg_attr(test, assert_instr(vextracti64x2, IMM8 = 3))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_maskz_extracti64x2_epi64<const IMM8: i32>(k: __mmask8, a: __m512i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_extracti64x2_epi64<const IMM8: i32>(k: __mmask8, a: __m512i) -> __m128i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 2);
         let b = _mm512_extracti64x2_epi64::<IMM8>(a).as_i64x2();
@@ -1705,7 +1816,8 @@ pub fn _mm512_maskz_extracti64x2_epi64<const IMM8: i32>(k: __mmask8, a: __m512i)
 #[target_feature(enable = "avx512dq")]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_insertf32x8<const IMM8: i32>(a: __m512, b: __m256) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_insertf32x8<const IMM8: i32>(a: __m512, b: __m256) -> __m512 {
     unsafe {
         static_assert_uimm_bits!(IMM8, 1);
         let b = _mm512_castps256_ps512(b);
@@ -1738,7 +1850,8 @@ pub fn _mm512_insertf32x8<const IMM8: i32>(a: __m512, b: __m256) -> __m512 {
 #[cfg_attr(test, assert_instr(vinsertf32x8, IMM8 = 1))]
 #[rustc_legacy_const_generics(4)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_mask_insertf32x8<const IMM8: i32>(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_insertf32x8<const IMM8: i32>(
     src: __m512,
     k: __mmask16,
     a: __m512,
@@ -1761,7 +1874,12 @@ pub fn _mm512_mask_insertf32x8<const IMM8: i32>(
 #[cfg_attr(test, assert_instr(vinsertf32x8, IMM8 = 1))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_maskz_insertf32x8<const IMM8: i32>(k: __mmask16, a: __m512, b: __m256) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_insertf32x8<const IMM8: i32>(
+    k: __mmask16,
+    a: __m512,
+    b: __m256,
+) -> __m512 {
     unsafe {
         static_assert_uimm_bits!(IMM8, 1);
         let c = _mm512_insertf32x8::<IMM8>(a, b).as_f32x16();
@@ -1777,7 +1895,8 @@ pub fn _mm512_maskz_insertf32x8<const IMM8: i32>(k: __mmask16, a: __m512, b: __m
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm256_insertf64x2<const IMM8: i32>(a: __m256d, b: __m128d) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_insertf64x2<const IMM8: i32>(a: __m256d, b: __m128d) -> __m256d {
     unsafe {
         static_assert_uimm_bits!(IMM8, 1);
         let b = _mm256_castpd128_pd256(b);
@@ -1798,7 +1917,8 @@ pub fn _mm256_insertf64x2<const IMM8: i32>(a: __m256d, b: __m128d) -> __m256d {
 #[cfg_attr(test, assert_instr(vinsertf64x2, IMM8 = 1))]
 #[rustc_legacy_const_generics(4)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm256_mask_insertf64x2<const IMM8: i32>(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_insertf64x2<const IMM8: i32>(
     src: __m256d,
     k: __mmask8,
     a: __m256d,
@@ -1821,7 +1941,12 @@ pub fn _mm256_mask_insertf64x2<const IMM8: i32>(
 #[cfg_attr(test, assert_instr(vinsertf64x2, IMM8 = 1))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm256_maskz_insertf64x2<const IMM8: i32>(k: __mmask8, a: __m256d, b: __m128d) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_insertf64x2<const IMM8: i32>(
+    k: __mmask8,
+    a: __m256d,
+    b: __m128d,
+) -> __m256d {
     unsafe {
         static_assert_uimm_bits!(IMM8, 1);
         let c = _mm256_insertf64x2::<IMM8>(a, b).as_f64x4();
@@ -1837,7 +1962,8 @@ pub fn _mm256_maskz_insertf64x2<const IMM8: i32>(k: __mmask8, a: __m256d, b: __m
 #[target_feature(enable = "avx512dq")]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_insertf64x2<const IMM8: i32>(a: __m512d, b: __m128d) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_insertf64x2<const IMM8: i32>(a: __m512d, b: __m128d) -> __m512d {
     unsafe {
         static_assert_uimm_bits!(IMM8, 2);
         let b = _mm512_castpd128_pd512(b);
@@ -1860,7 +1986,8 @@ pub fn _mm512_insertf64x2<const IMM8: i32>(a: __m512d, b: __m128d) -> __m512d {
 #[cfg_attr(test, assert_instr(vinsertf64x2, IMM8 = 3))]
 #[rustc_legacy_const_generics(4)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_mask_insertf64x2<const IMM8: i32>(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_insertf64x2<const IMM8: i32>(
     src: __m512d,
     k: __mmask8,
     a: __m512d,
@@ -1883,7 +2010,12 @@ pub fn _mm512_mask_insertf64x2<const IMM8: i32>(
 #[cfg_attr(test, assert_instr(vinsertf64x2, IMM8 = 3))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_maskz_insertf64x2<const IMM8: i32>(k: __mmask8, a: __m512d, b: __m128d) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_insertf64x2<const IMM8: i32>(
+    k: __mmask8,
+    a: __m512d,
+    b: __m128d,
+) -> __m512d {
     unsafe {
         static_assert_uimm_bits!(IMM8, 2);
         let c = _mm512_insertf64x2::<IMM8>(a, b).as_f64x8();
@@ -1899,7 +2031,8 @@ pub fn _mm512_maskz_insertf64x2<const IMM8: i32>(k: __mmask8, a: __m512d, b: __m
 #[target_feature(enable = "avx512dq")]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_inserti32x8<const IMM8: i32>(a: __m512i, b: __m256i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_inserti32x8<const IMM8: i32>(a: __m512i, b: __m256i) -> __m512i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 1);
         let a = a.as_i32x16();
@@ -1934,7 +2067,8 @@ pub fn _mm512_inserti32x8<const IMM8: i32>(a: __m512i, b: __m256i) -> __m512i {
 #[cfg_attr(test, assert_instr(vinserti32x8, IMM8 = 1))]
 #[rustc_legacy_const_generics(4)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_mask_inserti32x8<const IMM8: i32>(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_inserti32x8<const IMM8: i32>(
     src: __m512i,
     k: __mmask16,
     a: __m512i,
@@ -1957,7 +2091,12 @@ pub fn _mm512_mask_inserti32x8<const IMM8: i32>(
 #[cfg_attr(test, assert_instr(vinserti32x8, IMM8 = 1))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_maskz_inserti32x8<const IMM8: i32>(k: __mmask16, a: __m512i, b: __m256i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_inserti32x8<const IMM8: i32>(
+    k: __mmask16,
+    a: __m512i,
+    b: __m256i,
+) -> __m512i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 1);
         let c = _mm512_inserti32x8::<IMM8>(a, b).as_i32x16();
@@ -1973,7 +2112,8 @@ pub fn _mm512_maskz_inserti32x8<const IMM8: i32>(k: __mmask16, a: __m512i, b: __
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm256_inserti64x2<const IMM8: i32>(a: __m256i, b: __m128i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_inserti64x2<const IMM8: i32>(a: __m256i, b: __m128i) -> __m256i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 1);
         let a = a.as_i64x4();
@@ -1995,7 +2135,8 @@ pub fn _mm256_inserti64x2<const IMM8: i32>(a: __m256i, b: __m128i) -> __m256i {
 #[cfg_attr(test, assert_instr(vinserti64x2, IMM8 = 1))]
 #[rustc_legacy_const_generics(4)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm256_mask_inserti64x2<const IMM8: i32>(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_inserti64x2<const IMM8: i32>(
     src: __m256i,
     k: __mmask8,
     a: __m256i,
@@ -2018,7 +2159,12 @@ pub fn _mm256_mask_inserti64x2<const IMM8: i32>(
 #[cfg_attr(test, assert_instr(vinserti64x2, IMM8 = 1))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm256_maskz_inserti64x2<const IMM8: i32>(k: __mmask8, a: __m256i, b: __m128i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_inserti64x2<const IMM8: i32>(
+    k: __mmask8,
+    a: __m256i,
+    b: __m128i,
+) -> __m256i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 1);
         let c = _mm256_inserti64x2::<IMM8>(a, b).as_i64x4();
@@ -2034,7 +2180,8 @@ pub fn _mm256_maskz_inserti64x2<const IMM8: i32>(k: __mmask8, a: __m256i, b: __m
 #[target_feature(enable = "avx512dq")]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_inserti64x2<const IMM8: i32>(a: __m512i, b: __m128i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_inserti64x2<const IMM8: i32>(a: __m512i, b: __m128i) -> __m512i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 2);
         let a = a.as_i64x8();
@@ -2058,7 +2205,8 @@ pub fn _mm512_inserti64x2<const IMM8: i32>(a: __m512i, b: __m128i) -> __m512i {
 #[cfg_attr(test, assert_instr(vinserti64x2, IMM8 = 3))]
 #[rustc_legacy_const_generics(4)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_mask_inserti64x2<const IMM8: i32>(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_inserti64x2<const IMM8: i32>(
     src: __m512i,
     k: __mmask8,
     a: __m512i,
@@ -2081,7 +2229,12 @@ pub fn _mm512_mask_inserti64x2<const IMM8: i32>(
 #[cfg_attr(test, assert_instr(vinserti64x2, IMM8 = 3))]
 #[rustc_legacy_const_generics(3)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_maskz_inserti64x2<const IMM8: i32>(k: __mmask8, a: __m512i, b: __m128i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_inserti64x2<const IMM8: i32>(
+    k: __mmask8,
+    a: __m512i,
+    b: __m128i,
+) -> __m512i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 2);
         let c = _mm512_inserti64x2::<IMM8>(a, b).as_i64x8();
@@ -4333,7 +4486,8 @@ pub fn _mm512_maskz_cvttps_epu64(k: __mmask8, a: __m256) -> __m512i {
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vpmullq))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm_mullo_epi64(a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mullo_epi64(a: __m128i, b: __m128i) -> __m128i {
     unsafe { transmute(simd_mul(a.as_i64x2(), b.as_i64x2())) }
 }
 
@@ -4346,7 +4500,8 @@ pub fn _mm_mullo_epi64(a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vpmullq))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm_mask_mullo_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_mullo_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let b = _mm_mullo_epi64(a, b).as_i64x2();
         transmute(simd_select_bitmask(k, b, src.as_i64x2()))
@@ -4362,7 +4517,8 @@ pub fn _mm_mask_mullo_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vpmullq))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm_maskz_mullo_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_mullo_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let b = _mm_mullo_epi64(a, b).as_i64x2();
         transmute(simd_select_bitmask(k, b, i64x2::ZERO))
@@ -4377,7 +4533,8 @@ pub fn _mm_maskz_mullo_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vpmullq))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm256_mullo_epi64(a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mullo_epi64(a: __m256i, b: __m256i) -> __m256i {
     unsafe { transmute(simd_mul(a.as_i64x4(), b.as_i64x4())) }
 }
 
@@ -4390,7 +4547,8 @@ pub fn _mm256_mullo_epi64(a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vpmullq))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm256_mask_mullo_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_mullo_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let b = _mm256_mullo_epi64(a, b).as_i64x4();
         transmute(simd_select_bitmask(k, b, src.as_i64x4()))
@@ -4406,7 +4564,8 @@ pub fn _mm256_mask_mullo_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vpmullq))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm256_maskz_mullo_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_mullo_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let b = _mm256_mullo_epi64(a, b).as_i64x4();
         transmute(simd_select_bitmask(k, b, i64x4::ZERO))
@@ -4421,7 +4580,8 @@ pub fn _mm256_maskz_mullo_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vpmullq))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_mullo_epi64(a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mullo_epi64(a: __m512i, b: __m512i) -> __m512i {
     unsafe { transmute(simd_mul(a.as_i64x8(), b.as_i64x8())) }
 }
 
@@ -4434,7 +4594,8 @@ pub fn _mm512_mullo_epi64(a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vpmullq))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_mask_mullo_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_mullo_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let b = _mm512_mullo_epi64(a, b).as_i64x8();
         transmute(simd_select_bitmask(k, b, src.as_i64x8()))
@@ -4450,7 +4611,8 @@ pub fn _mm512_mask_mullo_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vpmullq))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_maskz_mullo_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_mullo_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let b = _mm512_mullo_epi64(a, b).as_i64x8();
         transmute(simd_select_bitmask(k, b, i64x8::ZERO))
@@ -4465,7 +4627,8 @@ pub fn _mm512_maskz_mullo_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _cvtmask8_u32(a: __mmask8) -> u32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _cvtmask8_u32(a: __mmask8) -> u32 {
     a as u32
 }
 
@@ -4475,7 +4638,8 @@ pub fn _cvtmask8_u32(a: __mmask8) -> u32 {
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _cvtu32_mask8(a: u32) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _cvtu32_mask8(a: u32) -> __mmask8 {
     a as __mmask8
 }
 
@@ -4485,8 +4649,9 @@ pub fn _cvtu32_mask8(a: u32) -> __mmask8 {
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _kadd_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
-    a + b
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _kadd_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
+    a.wrapping_add(b)
 }
 
 /// Add 8-bit masks a and b, and store the result in dst.
@@ -4495,8 +4660,9 @@ pub fn _kadd_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _kadd_mask8(a: __mmask8, b: __mmask8) -> __mmask8 {
-    a + b
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _kadd_mask8(a: __mmask8, b: __mmask8) -> __mmask8 {
+    a.wrapping_add(b)
 }
 
 /// Bitwise AND of 8-bit masks a and b, and store the result in dst.
@@ -4505,7 +4671,8 @@ pub fn _kadd_mask8(a: __mmask8, b: __mmask8) -> __mmask8 {
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _kand_mask8(a: __mmask8, b: __mmask8) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _kand_mask8(a: __mmask8, b: __mmask8) -> __mmask8 {
     a & b
 }
 
@@ -4515,7 +4682,8 @@ pub fn _kand_mask8(a: __mmask8, b: __mmask8) -> __mmask8 {
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _kandn_mask8(a: __mmask8, b: __mmask8) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _kandn_mask8(a: __mmask8, b: __mmask8) -> __mmask8 {
     _knot_mask8(a) & b
 }
 
@@ -4525,7 +4693,8 @@ pub fn _kandn_mask8(a: __mmask8, b: __mmask8) -> __mmask8 {
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _knot_mask8(a: __mmask8) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _knot_mask8(a: __mmask8) -> __mmask8 {
     a ^ 0b11111111
 }
 
@@ -4535,7 +4704,8 @@ pub fn _knot_mask8(a: __mmask8) -> __mmask8 {
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _kor_mask8(a: __mmask8, b: __mmask8) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _kor_mask8(a: __mmask8, b: __mmask8) -> __mmask8 {
     a | b
 }
 
@@ -4545,7 +4715,8 @@ pub fn _kor_mask8(a: __mmask8, b: __mmask8) -> __mmask8 {
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _kxnor_mask8(a: __mmask8, b: __mmask8) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _kxnor_mask8(a: __mmask8, b: __mmask8) -> __mmask8 {
     _knot_mask8(_kxor_mask8(a, b))
 }
 
@@ -4555,7 +4726,8 @@ pub fn _kxnor_mask8(a: __mmask8, b: __mmask8) -> __mmask8 {
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _kxor_mask8(a: __mmask8, b: __mmask8) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _kxor_mask8(a: __mmask8, b: __mmask8) -> __mmask8 {
     a ^ b
 }
 
@@ -4566,7 +4738,8 @@ pub fn _kxor_mask8(a: __mmask8, b: __mmask8) -> __mmask8 {
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _kortest_mask8_u8(a: __mmask8, b: __mmask8, all_ones: *mut u8) -> u8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _kortest_mask8_u8(a: __mmask8, b: __mmask8, all_ones: *mut u8) -> u8 {
     let tmp = _kor_mask8(a, b);
     *all_ones = (tmp == 0xff) as u8;
     (tmp == 0) as u8
@@ -4579,7 +4752,8 @@ pub unsafe fn _kortest_mask8_u8(a: __mmask8, b: __mmask8, all_ones: *mut u8) ->
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _kortestc_mask8_u8(a: __mmask8, b: __mmask8) -> u8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _kortestc_mask8_u8(a: __mmask8, b: __mmask8) -> u8 {
     (_kor_mask8(a, b) == 0xff) as u8
 }
 
@@ -4590,7 +4764,8 @@ pub fn _kortestc_mask8_u8(a: __mmask8, b: __mmask8) -> u8 {
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _kortestz_mask8_u8(a: __mmask8, b: __mmask8) -> u8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _kortestz_mask8_u8(a: __mmask8, b: __mmask8) -> u8 {
     (_kor_mask8(a, b) == 0) as u8
 }
 
@@ -4601,7 +4776,8 @@ pub fn _kortestz_mask8_u8(a: __mmask8, b: __mmask8) -> u8 {
 #[target_feature(enable = "avx512dq")]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _kshiftli_mask8<const COUNT: u32>(a: __mmask8) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _kshiftli_mask8<const COUNT: u32>(a: __mmask8) -> __mmask8 {
     a.unbounded_shl(COUNT)
 }
 
@@ -4612,7 +4788,8 @@ pub fn _kshiftli_mask8<const COUNT: u32>(a: __mmask8) -> __mmask8 {
 #[target_feature(enable = "avx512dq")]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _kshiftri_mask8<const COUNT: u32>(a: __mmask8) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _kshiftri_mask8<const COUNT: u32>(a: __mmask8) -> __mmask8 {
     a.unbounded_shr(COUNT)
 }
 
@@ -4624,7 +4801,8 @@ pub fn _kshiftri_mask8<const COUNT: u32>(a: __mmask8) -> __mmask8 {
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _ktest_mask16_u8(a: __mmask16, b: __mmask16, and_not: *mut u8) -> u8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _ktest_mask16_u8(a: __mmask16, b: __mmask16, and_not: *mut u8) -> u8 {
     *and_not = (_kandn_mask16(a, b) == 0) as u8;
     (_kand_mask16(a, b) == 0) as u8
 }
@@ -4637,7 +4815,8 @@ pub unsafe fn _ktest_mask16_u8(a: __mmask16, b: __mmask16, and_not: *mut u8) ->
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _ktest_mask8_u8(a: __mmask8, b: __mmask8, and_not: *mut u8) -> u8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _ktest_mask8_u8(a: __mmask8, b: __mmask8, and_not: *mut u8) -> u8 {
     *and_not = (_kandn_mask8(a, b) == 0) as u8;
     (_kand_mask8(a, b) == 0) as u8
 }
@@ -4649,7 +4828,8 @@ pub unsafe fn _ktest_mask8_u8(a: __mmask8, b: __mmask8, and_not: *mut u8) -> u8
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _ktestc_mask16_u8(a: __mmask16, b: __mmask16) -> u8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _ktestc_mask16_u8(a: __mmask16, b: __mmask16) -> u8 {
     (_kandn_mask16(a, b) == 0) as u8
 }
 
@@ -4660,7 +4840,8 @@ pub fn _ktestc_mask16_u8(a: __mmask16, b: __mmask16) -> u8 {
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _ktestc_mask8_u8(a: __mmask8, b: __mmask8) -> u8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _ktestc_mask8_u8(a: __mmask8, b: __mmask8) -> u8 {
     (_kandn_mask8(a, b) == 0) as u8
 }
 
@@ -4671,7 +4852,8 @@ pub fn _ktestc_mask8_u8(a: __mmask8, b: __mmask8) -> u8 {
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _ktestz_mask16_u8(a: __mmask16, b: __mmask16) -> u8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _ktestz_mask16_u8(a: __mmask16, b: __mmask16) -> u8 {
     (_kand_mask16(a, b) == 0) as u8
 }
 
@@ -4682,7 +4864,8 @@ pub fn _ktestz_mask16_u8(a: __mmask16, b: __mmask16) -> u8 {
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _ktestz_mask8_u8(a: __mmask8, b: __mmask8) -> u8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _ktestz_mask8_u8(a: __mmask8, b: __mmask8) -> u8 {
     (_kand_mask8(a, b) == 0) as u8
 }
 
@@ -4692,7 +4875,8 @@ pub fn _ktestz_mask8_u8(a: __mmask8, b: __mmask8) -> u8 {
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _load_mask8(mem_addr: *const __mmask8) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _load_mask8(mem_addr: *const __mmask8) -> __mmask8 {
     *mem_addr
 }
 
@@ -4702,7 +4886,8 @@ pub unsafe fn _load_mask8(mem_addr: *const __mmask8) -> __mmask8 {
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _store_mask8(mem_addr: *mut __mmask8, a: __mmask8) {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _store_mask8(mem_addr: *mut __mmask8, a: __mmask8) {
     *mem_addr = a;
 }
 
@@ -4713,7 +4898,8 @@ pub unsafe fn _store_mask8(mem_addr: *mut __mmask8, a: __mmask8) {
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm_movepi32_mask(a: __m128i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_movepi32_mask(a: __m128i) -> __mmask8 {
     let zero = _mm_setzero_si128();
     _mm_cmplt_epi32_mask(a, zero)
 }
@@ -4725,7 +4911,8 @@ pub fn _mm_movepi32_mask(a: __m128i) -> __mmask8 {
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm256_movepi32_mask(a: __m256i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_movepi32_mask(a: __m256i) -> __mmask8 {
     let zero = _mm256_setzero_si256();
     _mm256_cmplt_epi32_mask(a, zero)
 }
@@ -4737,7 +4924,8 @@ pub fn _mm256_movepi32_mask(a: __m256i) -> __mmask8 {
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_movepi32_mask(a: __m512i) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_movepi32_mask(a: __m512i) -> __mmask16 {
     let zero = _mm512_setzero_si512();
     _mm512_cmplt_epi32_mask(a, zero)
 }
@@ -4749,7 +4937,8 @@ pub fn _mm512_movepi32_mask(a: __m512i) -> __mmask16 {
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm_movepi64_mask(a: __m128i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_movepi64_mask(a: __m128i) -> __mmask8 {
     let zero = _mm_setzero_si128();
     _mm_cmplt_epi64_mask(a, zero)
 }
@@ -4761,7 +4950,8 @@ pub fn _mm_movepi64_mask(a: __m128i) -> __mmask8 {
 #[inline]
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm256_movepi64_mask(a: __m256i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_movepi64_mask(a: __m256i) -> __mmask8 {
     let zero = _mm256_setzero_si256();
     _mm256_cmplt_epi64_mask(a, zero)
 }
@@ -4773,7 +4963,8 @@ pub fn _mm256_movepi64_mask(a: __m256i) -> __mmask8 {
 #[inline]
 #[target_feature(enable = "avx512dq")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_movepi64_mask(a: __m512i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_movepi64_mask(a: __m512i) -> __mmask8 {
     let zero = _mm512_setzero_si512();
     _mm512_cmplt_epi64_mask(a, zero)
 }
@@ -4786,7 +4977,8 @@ pub fn _mm512_movepi64_mask(a: __m512i) -> __mmask8 {
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vpmovm2d))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm_movm_epi32(k: __mmask8) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_movm_epi32(k: __mmask8) -> __m128i {
     let ones = _mm_set1_epi32(-1);
     _mm_maskz_mov_epi32(k, ones)
 }
@@ -4799,7 +4991,8 @@ pub fn _mm_movm_epi32(k: __mmask8) -> __m128i {
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vpmovm2d))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm256_movm_epi32(k: __mmask8) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_movm_epi32(k: __mmask8) -> __m256i {
     let ones = _mm256_set1_epi32(-1);
     _mm256_maskz_mov_epi32(k, ones)
 }
@@ -4812,7 +5005,8 @@ pub fn _mm256_movm_epi32(k: __mmask8) -> __m256i {
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vpmovm2d))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_movm_epi32(k: __mmask16) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_movm_epi32(k: __mmask16) -> __m512i {
     let ones = _mm512_set1_epi32(-1);
     _mm512_maskz_mov_epi32(k, ones)
 }
@@ -4825,7 +5019,8 @@ pub fn _mm512_movm_epi32(k: __mmask16) -> __m512i {
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vpmovm2q))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm_movm_epi64(k: __mmask8) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_movm_epi64(k: __mmask8) -> __m128i {
     let ones = _mm_set1_epi64x(-1);
     _mm_maskz_mov_epi64(k, ones)
 }
@@ -4838,7 +5033,8 @@ pub fn _mm_movm_epi64(k: __mmask8) -> __m128i {
 #[target_feature(enable = "avx512dq,avx512vl")]
 #[cfg_attr(test, assert_instr(vpmovm2q))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm256_movm_epi64(k: __mmask8) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_movm_epi64(k: __mmask8) -> __m256i {
     let ones = _mm256_set1_epi64x(-1);
     _mm256_maskz_mov_epi64(k, ones)
 }
@@ -4851,7 +5047,8 @@ pub fn _mm256_movm_epi64(k: __mmask8) -> __m256i {
 #[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vpmovm2q))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_movm_epi64(k: __mmask8) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_movm_epi64(k: __mmask8) -> __m512i {
     let ones = _mm512_set1_epi64(-1);
     _mm512_maskz_mov_epi64(k, ones)
 }
@@ -6668,7 +6865,7 @@ pub fn _mm_maskz_reduce_ss<const IMM8: i32>(k: __mmask8, a: __m128, b: __m128) -
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_fpclass_pd_mask<const IMM8: i32>(a: __m128d) -> __mmask8 {
     static_assert_uimm_bits!(IMM8, 8);
-    _mm_mask_fpclass_pd_mask::<IMM8>(0xff, a)
+    unsafe { vfpclasspd_128(a.as_f64x2(), IMM8) }
 }
 
 /// Test packed double-precision (64-bit) floating-point elements in a for special categories specified
@@ -6692,10 +6889,7 @@ pub fn _mm_fpclass_pd_mask<const IMM8: i32>(a: __m128d) -> __mmask8 {
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_mask_fpclass_pd_mask<const IMM8: i32>(k1: __mmask8, a: __m128d) -> __mmask8 {
-    unsafe {
-        static_assert_uimm_bits!(IMM8, 8);
-        transmute(vfpclasspd_128(a.as_f64x2(), IMM8, k1))
-    }
+    _mm_fpclass_pd_mask::<IMM8>(a) & k1
 }
 
 /// Test packed double-precision (64-bit) floating-point elements in a for special categories specified
@@ -6719,7 +6913,7 @@ pub fn _mm_mask_fpclass_pd_mask<const IMM8: i32>(k1: __mmask8, a: __m128d) -> __
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_fpclass_pd_mask<const IMM8: i32>(a: __m256d) -> __mmask8 {
     static_assert_uimm_bits!(IMM8, 8);
-    _mm256_mask_fpclass_pd_mask::<IMM8>(0xff, a)
+    unsafe { vfpclasspd_256(a.as_f64x4(), IMM8) }
 }
 
 /// Test packed double-precision (64-bit) floating-point elements in a for special categories specified
@@ -6743,10 +6937,7 @@ pub fn _mm256_fpclass_pd_mask<const IMM8: i32>(a: __m256d) -> __mmask8 {
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_mask_fpclass_pd_mask<const IMM8: i32>(k1: __mmask8, a: __m256d) -> __mmask8 {
-    unsafe {
-        static_assert_uimm_bits!(IMM8, 8);
-        transmute(vfpclasspd_256(a.as_f64x4(), IMM8, k1))
-    }
+    _mm256_fpclass_pd_mask::<IMM8>(a) & k1
 }
 
 /// Test packed double-precision (64-bit) floating-point elements in a for special categories specified
@@ -6770,7 +6961,7 @@ pub fn _mm256_mask_fpclass_pd_mask<const IMM8: i32>(k1: __mmask8, a: __m256d) ->
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_fpclass_pd_mask<const IMM8: i32>(a: __m512d) -> __mmask8 {
     static_assert_uimm_bits!(IMM8, 8);
-    _mm512_mask_fpclass_pd_mask::<IMM8>(0xff, a)
+    unsafe { vfpclasspd_512(a.as_f64x8(), IMM8) }
 }
 
 /// Test packed double-precision (64-bit) floating-point elements in a for special categories specified
@@ -6794,10 +6985,7 @@ pub fn _mm512_fpclass_pd_mask<const IMM8: i32>(a: __m512d) -> __mmask8 {
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_mask_fpclass_pd_mask<const IMM8: i32>(k1: __mmask8, a: __m512d) -> __mmask8 {
-    unsafe {
-        static_assert_uimm_bits!(IMM8, 8);
-        transmute(vfpclasspd_512(a.as_f64x8(), IMM8, k1))
-    }
+    _mm512_fpclass_pd_mask::<IMM8>(a) & k1
 }
 
 /// Test packed single-precision (32-bit) floating-point elements in a for special categories specified
@@ -6821,7 +7009,7 @@ pub fn _mm512_mask_fpclass_pd_mask<const IMM8: i32>(k1: __mmask8, a: __m512d) ->
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_fpclass_ps_mask<const IMM8: i32>(a: __m128) -> __mmask8 {
     static_assert_uimm_bits!(IMM8, 8);
-    _mm_mask_fpclass_ps_mask::<IMM8>(0xff, a)
+    unsafe { vfpclassps_128(a.as_f32x4(), IMM8) }
 }
 
 /// Test packed single-precision (32-bit) floating-point elements in a for special categories specified
@@ -6845,10 +7033,7 @@ pub fn _mm_fpclass_ps_mask<const IMM8: i32>(a: __m128) -> __mmask8 {
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_mask_fpclass_ps_mask<const IMM8: i32>(k1: __mmask8, a: __m128) -> __mmask8 {
-    unsafe {
-        static_assert_uimm_bits!(IMM8, 8);
-        transmute(vfpclassps_128(a.as_f32x4(), IMM8, k1))
-    }
+    _mm_fpclass_ps_mask::<IMM8>(a) & k1
 }
 
 /// Test packed single-precision (32-bit) floating-point elements in a for special categories specified
@@ -6872,7 +7057,7 @@ pub fn _mm_mask_fpclass_ps_mask<const IMM8: i32>(k1: __mmask8, a: __m128) -> __m
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_fpclass_ps_mask<const IMM8: i32>(a: __m256) -> __mmask8 {
     static_assert_uimm_bits!(IMM8, 8);
-    _mm256_mask_fpclass_ps_mask::<IMM8>(0xff, a)
+    unsafe { vfpclassps_256(a.as_f32x8(), IMM8) }
 }
 
 /// Test packed single-precision (32-bit) floating-point elements in a for special categories specified
@@ -6896,10 +7081,7 @@ pub fn _mm256_fpclass_ps_mask<const IMM8: i32>(a: __m256) -> __mmask8 {
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_mask_fpclass_ps_mask<const IMM8: i32>(k1: __mmask8, a: __m256) -> __mmask8 {
-    unsafe {
-        static_assert_uimm_bits!(IMM8, 8);
-        transmute(vfpclassps_256(a.as_f32x8(), IMM8, k1))
-    }
+    _mm256_fpclass_ps_mask::<IMM8>(a) & k1
 }
 
 /// Test packed single-precision (32-bit) floating-point elements in a for special categories specified
@@ -6923,7 +7105,7 @@ pub fn _mm256_mask_fpclass_ps_mask<const IMM8: i32>(k1: __mmask8, a: __m256) ->
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_fpclass_ps_mask<const IMM8: i32>(a: __m512) -> __mmask16 {
     static_assert_uimm_bits!(IMM8, 8);
-    _mm512_mask_fpclass_ps_mask::<IMM8>(0xffff, a)
+    unsafe { vfpclassps_512(a.as_f32x16(), IMM8) }
 }
 
 /// Test packed single-precision (32-bit) floating-point elements in a for special categories specified
@@ -6947,10 +7129,7 @@ pub fn _mm512_fpclass_ps_mask<const IMM8: i32>(a: __m512) -> __mmask16 {
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm512_mask_fpclass_ps_mask<const IMM8: i32>(k1: __mmask16, a: __m512) -> __mmask16 {
-    unsafe {
-        static_assert_uimm_bits!(IMM8, 8);
-        transmute(vfpclassps_512(a.as_f32x16(), IMM8, k1))
-    }
+    _mm512_fpclass_ps_mask::<IMM8>(a) & k1
 }
 
 /// Test the lower double-precision (64-bit) floating-point element in a for special categories specified
@@ -7180,19 +7359,19 @@ unsafe extern "C" {
     #[link_name = "llvm.x86.avx512.mask.reduce.ss"]
     fn vreducess(a: f32x4, b: f32x4, src: f32x4, k: __mmask8, imm8: i32, sae: i32) -> f32x4;
 
-    #[link_name = "llvm.x86.avx512.mask.fpclass.pd.128"]
-    fn vfpclasspd_128(a: f64x2, imm8: i32, k: __mmask8) -> __mmask8;
-    #[link_name = "llvm.x86.avx512.mask.fpclass.pd.256"]
-    fn vfpclasspd_256(a: f64x4, imm8: i32, k: __mmask8) -> __mmask8;
-    #[link_name = "llvm.x86.avx512.mask.fpclass.pd.512"]
-    fn vfpclasspd_512(a: f64x8, imm8: i32, k: __mmask8) -> __mmask8;
+    #[link_name = "llvm.x86.avx512.fpclass.pd.128"]
+    fn vfpclasspd_128(a: f64x2, imm8: i32) -> __mmask8;
+    #[link_name = "llvm.x86.avx512.fpclass.pd.256"]
+    fn vfpclasspd_256(a: f64x4, imm8: i32) -> __mmask8;
+    #[link_name = "llvm.x86.avx512.fpclass.pd.512"]
+    fn vfpclasspd_512(a: f64x8, imm8: i32) -> __mmask8;
 
-    #[link_name = "llvm.x86.avx512.mask.fpclass.ps.128"]
-    fn vfpclassps_128(a: f32x4, imm8: i32, k: __mmask8) -> __mmask8;
-    #[link_name = "llvm.x86.avx512.mask.fpclass.ps.256"]
-    fn vfpclassps_256(a: f32x8, imm8: i32, k: __mmask8) -> __mmask8;
-    #[link_name = "llvm.x86.avx512.mask.fpclass.ps.512"]
-    fn vfpclassps_512(a: f32x16, imm8: i32, k: __mmask16) -> __mmask16;
+    #[link_name = "llvm.x86.avx512.fpclass.ps.128"]
+    fn vfpclassps_128(a: f32x4, imm8: i32) -> __mmask8;
+    #[link_name = "llvm.x86.avx512.fpclass.ps.256"]
+    fn vfpclassps_256(a: f32x8, imm8: i32) -> __mmask8;
+    #[link_name = "llvm.x86.avx512.fpclass.ps.512"]
+    fn vfpclassps_512(a: f32x16, imm8: i32) -> __mmask16;
 
     #[link_name = "llvm.x86.avx512.mask.fpclass.sd"]
     fn vfpclasssd(a: f64x2, imm8: i32, k: __mmask8) -> __mmask8;
@@ -7203,30 +7382,29 @@ unsafe extern "C" {
 #[cfg(test)]
 mod tests {
     use super::*;
+    use crate::core_arch::assert_eq_const as assert_eq;
+    use crate::core_arch::x86::*;
 
     use stdarch_test::simd_test;
 
-    use crate::core_arch::x86::*;
-    use crate::mem::transmute;
-
-    const OPRND1_64: f64 = unsafe { transmute(0x3333333333333333_u64) };
-    const OPRND2_64: f64 = unsafe { transmute(0x5555555555555555_u64) };
+    const OPRND1_64: f64 = f64::from_bits(0x3333333333333333);
+    const OPRND2_64: f64 = f64::from_bits(0x5555555555555555);
 
-    const AND_64: f64 = unsafe { transmute(0x1111111111111111_u64) };
-    const ANDN_64: f64 = unsafe { transmute(0x4444444444444444_u64) };
-    const OR_64: f64 = unsafe { transmute(0x7777777777777777_u64) };
-    const XOR_64: f64 = unsafe { transmute(0x6666666666666666_u64) };
+    const AND_64: f64 = f64::from_bits(0x1111111111111111);
+    const ANDN_64: f64 = f64::from_bits(0x4444444444444444);
+    const OR_64: f64 = f64::from_bits(0x7777777777777777);
+    const XOR_64: f64 = f64::from_bits(0x6666666666666666);
 
-    const OPRND1_32: f32 = unsafe { transmute(0x33333333_u32) };
-    const OPRND2_32: f32 = unsafe { transmute(0x55555555_u32) };
+    const OPRND1_32: f32 = f32::from_bits(0x33333333);
+    const OPRND2_32: f32 = f32::from_bits(0x55555555);
 
-    const AND_32: f32 = unsafe { transmute(0x11111111_u32) };
-    const ANDN_32: f32 = unsafe { transmute(0x44444444_u32) };
-    const OR_32: f32 = unsafe { transmute(0x77777777_u32) };
-    const XOR_32: f32 = unsafe { transmute(0x66666666_u32) };
+    const AND_32: f32 = f32::from_bits(0x11111111);
+    const ANDN_32: f32 = f32::from_bits(0x44444444);
+    const OR_32: f32 = f32::from_bits(0x77777777);
+    const XOR_32: f32 = f32::from_bits(0x66666666);
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm_mask_and_pd() {
+    const fn test_mm_mask_and_pd() {
         let a = _mm_set1_pd(OPRND1_64);
         let b = _mm_set1_pd(OPRND2_64);
         let src = _mm_set_pd(1., 2.);
@@ -7236,7 +7414,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm_maskz_and_pd() {
+    const fn test_mm_maskz_and_pd() {
         let a = _mm_set1_pd(OPRND1_64);
         let b = _mm_set1_pd(OPRND2_64);
         let r = _mm_maskz_and_pd(0b01, a, b);
@@ -7245,7 +7423,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_mask_and_pd() {
+    const fn test_mm256_mask_and_pd() {
         let a = _mm256_set1_pd(OPRND1_64);
         let b = _mm256_set1_pd(OPRND2_64);
         let src = _mm256_set_pd(1., 2., 3., 4.);
@@ -7255,7 +7433,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_maskz_and_pd() {
+    const fn test_mm256_maskz_and_pd() {
         let a = _mm256_set1_pd(OPRND1_64);
         let b = _mm256_set1_pd(OPRND2_64);
         let r = _mm256_maskz_and_pd(0b0101, a, b);
@@ -7264,7 +7442,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_and_pd() {
+    const fn test_mm512_and_pd() {
         let a = _mm512_set1_pd(OPRND1_64);
         let b = _mm512_set1_pd(OPRND2_64);
         let r = _mm512_and_pd(a, b);
@@ -7273,7 +7451,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_mask_and_pd() {
+    const fn test_mm512_mask_and_pd() {
         let a = _mm512_set1_pd(OPRND1_64);
         let b = _mm512_set1_pd(OPRND2_64);
         let src = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
@@ -7283,7 +7461,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_maskz_and_pd() {
+    const fn test_mm512_maskz_and_pd() {
         let a = _mm512_set1_pd(OPRND1_64);
         let b = _mm512_set1_pd(OPRND2_64);
         let r = _mm512_maskz_and_pd(0b01010101, a, b);
@@ -7292,7 +7470,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm_mask_and_ps() {
+    const fn test_mm_mask_and_ps() {
         let a = _mm_set1_ps(OPRND1_32);
         let b = _mm_set1_ps(OPRND2_32);
         let src = _mm_set_ps(1., 2., 3., 4.);
@@ -7302,7 +7480,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm_maskz_and_ps() {
+    const fn test_mm_maskz_and_ps() {
         let a = _mm_set1_ps(OPRND1_32);
         let b = _mm_set1_ps(OPRND2_32);
         let r = _mm_maskz_and_ps(0b0101, a, b);
@@ -7311,7 +7489,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_mask_and_ps() {
+    const fn test_mm256_mask_and_ps() {
         let a = _mm256_set1_ps(OPRND1_32);
         let b = _mm256_set1_ps(OPRND2_32);
         let src = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
@@ -7321,7 +7499,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_maskz_and_ps() {
+    const fn test_mm256_maskz_and_ps() {
         let a = _mm256_set1_ps(OPRND1_32);
         let b = _mm256_set1_ps(OPRND2_32);
         let r = _mm256_maskz_and_ps(0b01010101, a, b);
@@ -7330,7 +7508,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_and_ps() {
+    const fn test_mm512_and_ps() {
         let a = _mm512_set1_ps(OPRND1_32);
         let b = _mm512_set1_ps(OPRND2_32);
         let r = _mm512_and_ps(a, b);
@@ -7339,7 +7517,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_mask_and_ps() {
+    const fn test_mm512_mask_and_ps() {
         let a = _mm512_set1_ps(OPRND1_32);
         let b = _mm512_set1_ps(OPRND2_32);
         let src = _mm512_set_ps(
@@ -7354,7 +7532,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_maskz_and_ps() {
+    const fn test_mm512_maskz_and_ps() {
         let a = _mm512_set1_ps(OPRND1_32);
         let b = _mm512_set1_ps(OPRND2_32);
         let r = _mm512_maskz_and_ps(0b0101010101010101, a, b);
@@ -7366,7 +7544,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm_mask_andnot_pd() {
+    const fn test_mm_mask_andnot_pd() {
         let a = _mm_set1_pd(OPRND1_64);
         let b = _mm_set1_pd(OPRND2_64);
         let src = _mm_set_pd(1., 2.);
@@ -7376,7 +7554,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm_maskz_andnot_pd() {
+    const fn test_mm_maskz_andnot_pd() {
         let a = _mm_set1_pd(OPRND1_64);
         let b = _mm_set1_pd(OPRND2_64);
         let r = _mm_maskz_andnot_pd(0b01, a, b);
@@ -7385,7 +7563,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_mask_andnot_pd() {
+    const fn test_mm256_mask_andnot_pd() {
         let a = _mm256_set1_pd(OPRND1_64);
         let b = _mm256_set1_pd(OPRND2_64);
         let src = _mm256_set_pd(1., 2., 3., 4.);
@@ -7395,7 +7573,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_maskz_andnot_pd() {
+    const fn test_mm256_maskz_andnot_pd() {
         let a = _mm256_set1_pd(OPRND1_64);
         let b = _mm256_set1_pd(OPRND2_64);
         let r = _mm256_maskz_andnot_pd(0b0101, a, b);
@@ -7404,7 +7582,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_andnot_pd() {
+    const fn test_mm512_andnot_pd() {
         let a = _mm512_set1_pd(OPRND1_64);
         let b = _mm512_set1_pd(OPRND2_64);
         let r = _mm512_andnot_pd(a, b);
@@ -7413,7 +7591,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_mask_andnot_pd() {
+    const fn test_mm512_mask_andnot_pd() {
         let a = _mm512_set1_pd(OPRND1_64);
         let b = _mm512_set1_pd(OPRND2_64);
         let src = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
@@ -7423,7 +7601,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_maskz_andnot_pd() {
+    const fn test_mm512_maskz_andnot_pd() {
         let a = _mm512_set1_pd(OPRND1_64);
         let b = _mm512_set1_pd(OPRND2_64);
         let r = _mm512_maskz_andnot_pd(0b01010101, a, b);
@@ -7432,7 +7610,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm_mask_andnot_ps() {
+    const fn test_mm_mask_andnot_ps() {
         let a = _mm_set1_ps(OPRND1_32);
         let b = _mm_set1_ps(OPRND2_32);
         let src = _mm_set_ps(1., 2., 3., 4.);
@@ -7442,7 +7620,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm_maskz_andnot_ps() {
+    const fn test_mm_maskz_andnot_ps() {
         let a = _mm_set1_ps(OPRND1_32);
         let b = _mm_set1_ps(OPRND2_32);
         let r = _mm_maskz_andnot_ps(0b0101, a, b);
@@ -7451,7 +7629,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_mask_andnot_ps() {
+    const fn test_mm256_mask_andnot_ps() {
         let a = _mm256_set1_ps(OPRND1_32);
         let b = _mm256_set1_ps(OPRND2_32);
         let src = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
@@ -7461,7 +7639,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_maskz_andnot_ps() {
+    const fn test_mm256_maskz_andnot_ps() {
         let a = _mm256_set1_ps(OPRND1_32);
         let b = _mm256_set1_ps(OPRND2_32);
         let r = _mm256_maskz_andnot_ps(0b01010101, a, b);
@@ -7470,7 +7648,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_andnot_ps() {
+    const fn test_mm512_andnot_ps() {
         let a = _mm512_set1_ps(OPRND1_32);
         let b = _mm512_set1_ps(OPRND2_32);
         let r = _mm512_andnot_ps(a, b);
@@ -7479,7 +7657,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_mask_andnot_ps() {
+    const fn test_mm512_mask_andnot_ps() {
         let a = _mm512_set1_ps(OPRND1_32);
         let b = _mm512_set1_ps(OPRND2_32);
         let src = _mm512_set_ps(
@@ -7494,7 +7672,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_maskz_andnot_ps() {
+    const fn test_mm512_maskz_andnot_ps() {
         let a = _mm512_set1_ps(OPRND1_32);
         let b = _mm512_set1_ps(OPRND2_32);
         let r = _mm512_maskz_andnot_ps(0b0101010101010101, a, b);
@@ -7506,7 +7684,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm_mask_or_pd() {
+    const fn test_mm_mask_or_pd() {
         let a = _mm_set1_pd(OPRND1_64);
         let b = _mm_set1_pd(OPRND2_64);
         let src = _mm_set_pd(1., 2.);
@@ -7516,7 +7694,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm_maskz_or_pd() {
+    const fn test_mm_maskz_or_pd() {
         let a = _mm_set1_pd(OPRND1_64);
         let b = _mm_set1_pd(OPRND2_64);
         let r = _mm_maskz_or_pd(0b01, a, b);
@@ -7525,7 +7703,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_mask_or_pd() {
+    const fn test_mm256_mask_or_pd() {
         let a = _mm256_set1_pd(OPRND1_64);
         let b = _mm256_set1_pd(OPRND2_64);
         let src = _mm256_set_pd(1., 2., 3., 4.);
@@ -7535,7 +7713,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_maskz_or_pd() {
+    const fn test_mm256_maskz_or_pd() {
         let a = _mm256_set1_pd(OPRND1_64);
         let b = _mm256_set1_pd(OPRND2_64);
         let r = _mm256_maskz_or_pd(0b0101, a, b);
@@ -7544,7 +7722,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_or_pd() {
+    const fn test_mm512_or_pd() {
         let a = _mm512_set1_pd(OPRND1_64);
         let b = _mm512_set1_pd(OPRND2_64);
         let r = _mm512_or_pd(a, b);
@@ -7553,7 +7731,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_mask_or_pd() {
+    const fn test_mm512_mask_or_pd() {
         let a = _mm512_set1_pd(OPRND1_64);
         let b = _mm512_set1_pd(OPRND2_64);
         let src = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
@@ -7563,7 +7741,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_maskz_or_pd() {
+    const fn test_mm512_maskz_or_pd() {
         let a = _mm512_set1_pd(OPRND1_64);
         let b = _mm512_set1_pd(OPRND2_64);
         let r = _mm512_maskz_or_pd(0b01010101, a, b);
@@ -7572,7 +7750,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm_mask_or_ps() {
+    const fn test_mm_mask_or_ps() {
         let a = _mm_set1_ps(OPRND1_32);
         let b = _mm_set1_ps(OPRND2_32);
         let src = _mm_set_ps(1., 2., 3., 4.);
@@ -7582,7 +7760,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm_maskz_or_ps() {
+    const fn test_mm_maskz_or_ps() {
         let a = _mm_set1_ps(OPRND1_32);
         let b = _mm_set1_ps(OPRND2_32);
         let r = _mm_maskz_or_ps(0b0101, a, b);
@@ -7591,7 +7769,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_mask_or_ps() {
+    const fn test_mm256_mask_or_ps() {
         let a = _mm256_set1_ps(OPRND1_32);
         let b = _mm256_set1_ps(OPRND2_32);
         let src = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
@@ -7601,7 +7779,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_maskz_or_ps() {
+    const fn test_mm256_maskz_or_ps() {
         let a = _mm256_set1_ps(OPRND1_32);
         let b = _mm256_set1_ps(OPRND2_32);
         let r = _mm256_maskz_or_ps(0b01010101, a, b);
@@ -7610,7 +7788,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_or_ps() {
+    const fn test_mm512_or_ps() {
         let a = _mm512_set1_ps(OPRND1_32);
         let b = _mm512_set1_ps(OPRND2_32);
         let r = _mm512_or_ps(a, b);
@@ -7619,7 +7797,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_mask_or_ps() {
+    const fn test_mm512_mask_or_ps() {
         let a = _mm512_set1_ps(OPRND1_32);
         let b = _mm512_set1_ps(OPRND2_32);
         let src = _mm512_set_ps(
@@ -7634,7 +7812,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_maskz_or_ps() {
+    const fn test_mm512_maskz_or_ps() {
         let a = _mm512_set1_ps(OPRND1_32);
         let b = _mm512_set1_ps(OPRND2_32);
         let r = _mm512_maskz_or_ps(0b0101010101010101, a, b);
@@ -7645,7 +7823,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm_mask_xor_pd() {
+    const fn test_mm_mask_xor_pd() {
         let a = _mm_set1_pd(OPRND1_64);
         let b = _mm_set1_pd(OPRND2_64);
         let src = _mm_set_pd(1., 2.);
@@ -7655,7 +7833,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm_maskz_xor_pd() {
+    const fn test_mm_maskz_xor_pd() {
         let a = _mm_set1_pd(OPRND1_64);
         let b = _mm_set1_pd(OPRND2_64);
         let r = _mm_maskz_xor_pd(0b01, a, b);
@@ -7664,7 +7842,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_mask_xor_pd() {
+    const fn test_mm256_mask_xor_pd() {
         let a = _mm256_set1_pd(OPRND1_64);
         let b = _mm256_set1_pd(OPRND2_64);
         let src = _mm256_set_pd(1., 2., 3., 4.);
@@ -7674,7 +7852,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_maskz_xor_pd() {
+    const fn test_mm256_maskz_xor_pd() {
         let a = _mm256_set1_pd(OPRND1_64);
         let b = _mm256_set1_pd(OPRND2_64);
         let r = _mm256_maskz_xor_pd(0b0101, a, b);
@@ -7683,7 +7861,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_xor_pd() {
+    const fn test_mm512_xor_pd() {
         let a = _mm512_set1_pd(OPRND1_64);
         let b = _mm512_set1_pd(OPRND2_64);
         let r = _mm512_xor_pd(a, b);
@@ -7692,7 +7870,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_mask_xor_pd() {
+    const fn test_mm512_mask_xor_pd() {
         let a = _mm512_set1_pd(OPRND1_64);
         let b = _mm512_set1_pd(OPRND2_64);
         let src = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
@@ -7702,7 +7880,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_maskz_xor_pd() {
+    const fn test_mm512_maskz_xor_pd() {
         let a = _mm512_set1_pd(OPRND1_64);
         let b = _mm512_set1_pd(OPRND2_64);
         let r = _mm512_maskz_xor_pd(0b01010101, a, b);
@@ -7711,7 +7889,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm_mask_xor_ps() {
+    const fn test_mm_mask_xor_ps() {
         let a = _mm_set1_ps(OPRND1_32);
         let b = _mm_set1_ps(OPRND2_32);
         let src = _mm_set_ps(1., 2., 3., 4.);
@@ -7721,7 +7899,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm_maskz_xor_ps() {
+    const fn test_mm_maskz_xor_ps() {
         let a = _mm_set1_ps(OPRND1_32);
         let b = _mm_set1_ps(OPRND2_32);
         let r = _mm_maskz_xor_ps(0b0101, a, b);
@@ -7730,7 +7908,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_mask_xor_ps() {
+    const fn test_mm256_mask_xor_ps() {
         let a = _mm256_set1_ps(OPRND1_32);
         let b = _mm256_set1_ps(OPRND2_32);
         let src = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
@@ -7740,7 +7918,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_maskz_xor_ps() {
+    const fn test_mm256_maskz_xor_ps() {
         let a = _mm256_set1_ps(OPRND1_32);
         let b = _mm256_set1_ps(OPRND2_32);
         let r = _mm256_maskz_xor_ps(0b01010101, a, b);
@@ -7749,7 +7927,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_xor_ps() {
+    const fn test_mm512_xor_ps() {
         let a = _mm512_set1_ps(OPRND1_32);
         let b = _mm512_set1_ps(OPRND2_32);
         let r = _mm512_xor_ps(a, b);
@@ -7758,7 +7936,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_mask_xor_ps() {
+    const fn test_mm512_mask_xor_ps() {
         let a = _mm512_set1_ps(OPRND1_32);
         let b = _mm512_set1_ps(OPRND2_32);
         let src = _mm512_set_ps(
@@ -7773,7 +7951,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_maskz_xor_ps() {
+    const fn test_mm512_maskz_xor_ps() {
         let a = _mm512_set1_ps(OPRND1_32);
         let b = _mm512_set1_ps(OPRND2_32);
         let r = _mm512_maskz_xor_ps(0b0101010101010101, a, b);
@@ -7785,7 +7963,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_broadcast_f32x2() {
+    const fn test_mm256_broadcast_f32x2() {
         let a = _mm_set_ps(1., 2., 3., 4.);
         let r = _mm256_broadcast_f32x2(a);
         let e = _mm256_set_ps(3., 4., 3., 4., 3., 4., 3., 4.);
@@ -7793,7 +7971,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_mask_broadcast_f32x2() {
+    const fn test_mm256_mask_broadcast_f32x2() {
         let a = _mm_set_ps(1., 2., 3., 4.);
         let b = _mm256_set_ps(5., 6., 7., 8., 9., 10., 11., 12.);
         let r = _mm256_mask_broadcast_f32x2(b, 0b01101001, a);
@@ -7802,7 +7980,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_maskz_broadcast_f32x2() {
+    const fn test_mm256_maskz_broadcast_f32x2() {
         let a = _mm_set_ps(1., 2., 3., 4.);
         let r = _mm256_maskz_broadcast_f32x2(0b01101001, a);
         let e = _mm256_set_ps(0., 4., 3., 0., 3., 0., 0., 4.);
@@ -7810,7 +7988,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_broadcast_f32x2() {
+    const fn test_mm512_broadcast_f32x2() {
         let a = _mm_set_ps(1., 2., 3., 4.);
         let r = _mm512_broadcast_f32x2(a);
         let e = _mm512_set_ps(
@@ -7820,7 +7998,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_mask_broadcast_f32x2() {
+    const fn test_mm512_mask_broadcast_f32x2() {
         let a = _mm_set_ps(1., 2., 3., 4.);
         let b = _mm512_set_ps(
             5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20.,
@@ -7833,7 +8011,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_maskz_broadcast_f32x2() {
+    const fn test_mm512_maskz_broadcast_f32x2() {
         let a = _mm_set_ps(1., 2., 3., 4.);
         let r = _mm512_maskz_broadcast_f32x2(0b0110100100111100, a);
         let e = _mm512_set_ps(
@@ -7843,7 +8021,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_broadcast_f32x8() {
+    const fn test_mm512_broadcast_f32x8() {
         let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm512_broadcast_f32x8(a);
         let e = _mm512_set_ps(
@@ -7853,7 +8031,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_mask_broadcast_f32x8() {
+    const fn test_mm512_mask_broadcast_f32x8() {
         let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         let b = _mm512_set_ps(
             9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24.,
@@ -7866,7 +8044,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_maskz_broadcast_f32x8() {
+    const fn test_mm512_maskz_broadcast_f32x8() {
         let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm512_maskz_broadcast_f32x8(0b0110100100111100, a);
         let e = _mm512_set_ps(
@@ -7876,7 +8054,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_broadcast_f64x2() {
+    const fn test_mm256_broadcast_f64x2() {
         let a = _mm_set_pd(1., 2.);
         let r = _mm256_broadcast_f64x2(a);
         let e = _mm256_set_pd(1., 2., 1., 2.);
@@ -7884,7 +8062,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_mask_broadcast_f64x2() {
+    const fn test_mm256_mask_broadcast_f64x2() {
         let a = _mm_set_pd(1., 2.);
         let b = _mm256_set_pd(3., 4., 5., 6.);
         let r = _mm256_mask_broadcast_f64x2(b, 0b0110, a);
@@ -7893,7 +8071,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_maskz_broadcast_f64x2() {
+    const fn test_mm256_maskz_broadcast_f64x2() {
         let a = _mm_set_pd(1., 2.);
         let r = _mm256_maskz_broadcast_f64x2(0b0110, a);
         let e = _mm256_set_pd(0., 2., 1., 0.);
@@ -7901,7 +8079,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_broadcast_f64x2() {
+    const fn test_mm512_broadcast_f64x2() {
         let a = _mm_set_pd(1., 2.);
         let r = _mm512_broadcast_f64x2(a);
         let e = _mm512_set_pd(1., 2., 1., 2., 1., 2., 1., 2.);
@@ -7909,7 +8087,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_mask_broadcast_f64x2() {
+    const fn test_mm512_mask_broadcast_f64x2() {
         let a = _mm_set_pd(1., 2.);
         let b = _mm512_set_pd(3., 4., 5., 6., 7., 8., 9., 10.);
         let r = _mm512_mask_broadcast_f64x2(b, 0b01101001, a);
@@ -7918,7 +8096,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_maskz_broadcast_f64x2() {
+    const fn test_mm512_maskz_broadcast_f64x2() {
         let a = _mm_set_pd(1., 2.);
         let r = _mm512_maskz_broadcast_f64x2(0b01101001, a);
         let e = _mm512_set_pd(0., 2., 1., 0., 1., 0., 0., 2.);
@@ -7926,7 +8104,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm_broadcast_i32x2() {
+    const fn test_mm_broadcast_i32x2() {
         let a = _mm_set_epi32(1, 2, 3, 4);
         let r = _mm_broadcast_i32x2(a);
         let e = _mm_set_epi32(3, 4, 3, 4);
@@ -7934,7 +8112,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm_mask_broadcast_i32x2() {
+    const fn test_mm_mask_broadcast_i32x2() {
         let a = _mm_set_epi32(1, 2, 3, 4);
         let b = _mm_set_epi32(5, 6, 7, 8);
         let r = _mm_mask_broadcast_i32x2(b, 0b0110, a);
@@ -7943,7 +8121,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm_maskz_broadcast_i32x2() {
+    const fn test_mm_maskz_broadcast_i32x2() {
         let a = _mm_set_epi32(1, 2, 3, 4);
         let r = _mm_maskz_broadcast_i32x2(0b0110, a);
         let e = _mm_set_epi32(0, 4, 3, 0);
@@ -7951,7 +8129,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_broadcast_i32x2() {
+    const fn test_mm256_broadcast_i32x2() {
         let a = _mm_set_epi32(1, 2, 3, 4);
         let r = _mm256_broadcast_i32x2(a);
         let e = _mm256_set_epi32(3, 4, 3, 4, 3, 4, 3, 4);
@@ -7959,7 +8137,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_mask_broadcast_i32x2() {
+    const fn test_mm256_mask_broadcast_i32x2() {
         let a = _mm_set_epi32(1, 2, 3, 4);
         let b = _mm256_set_epi32(5, 6, 7, 8, 9, 10, 11, 12);
         let r = _mm256_mask_broadcast_i32x2(b, 0b01101001, a);
@@ -7968,7 +8146,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_maskz_broadcast_i32x2() {
+    const fn test_mm256_maskz_broadcast_i32x2() {
         let a = _mm_set_epi32(1, 2, 3, 4);
         let r = _mm256_maskz_broadcast_i32x2(0b01101001, a);
         let e = _mm256_set_epi32(0, 4, 3, 0, 3, 0, 0, 4);
@@ -7976,7 +8154,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_broadcast_i32x2() {
+    const fn test_mm512_broadcast_i32x2() {
         let a = _mm_set_epi32(1, 2, 3, 4);
         let r = _mm512_broadcast_i32x2(a);
         let e = _mm512_set_epi32(3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4);
@@ -7984,7 +8162,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_mask_broadcast_i32x2() {
+    const fn test_mm512_mask_broadcast_i32x2() {
         let a = _mm_set_epi32(1, 2, 3, 4);
         let b = _mm512_set_epi32(5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20);
         let r = _mm512_mask_broadcast_i32x2(b, 0b0110100100111100, a);
@@ -7993,7 +8171,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_maskz_broadcast_i32x2() {
+    const fn test_mm512_maskz_broadcast_i32x2() {
         let a = _mm_set_epi32(1, 2, 3, 4);
         let r = _mm512_maskz_broadcast_i32x2(0b0110100100111100, a);
         let e = _mm512_set_epi32(0, 4, 3, 0, 3, 0, 0, 4, 0, 0, 3, 4, 3, 4, 0, 0);
@@ -8001,7 +8179,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_broadcast_i32x8() {
+    const fn test_mm512_broadcast_i32x8() {
         let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
         let r = _mm512_broadcast_i32x8(a);
         let e = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8);
@@ -8009,7 +8187,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_mask_broadcast_i32x8() {
+    const fn test_mm512_mask_broadcast_i32x8() {
         let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
         let b = _mm512_set_epi32(
             9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
@@ -8020,7 +8198,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_maskz_broadcast_i32x8() {
+    const fn test_mm512_maskz_broadcast_i32x8() {
         let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
         let r = _mm512_maskz_broadcast_i32x8(0b0110100100111100, a);
         let e = _mm512_set_epi32(0, 2, 3, 0, 5, 0, 0, 8, 0, 0, 3, 4, 5, 6, 0, 0);
@@ -8028,7 +8206,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_broadcast_i64x2() {
+    const fn test_mm256_broadcast_i64x2() {
         let a = _mm_set_epi64x(1, 2);
         let r = _mm256_broadcast_i64x2(a);
         let e = _mm256_set_epi64x(1, 2, 1, 2);
@@ -8036,7 +8214,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_mask_broadcast_i64x2() {
+    const fn test_mm256_mask_broadcast_i64x2() {
         let a = _mm_set_epi64x(1, 2);
         let b = _mm256_set_epi64x(3, 4, 5, 6);
         let r = _mm256_mask_broadcast_i64x2(b, 0b0110, a);
@@ -8045,7 +8223,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_maskz_broadcast_i64x2() {
+    const fn test_mm256_maskz_broadcast_i64x2() {
         let a = _mm_set_epi64x(1, 2);
         let r = _mm256_maskz_broadcast_i64x2(0b0110, a);
         let e = _mm256_set_epi64x(0, 2, 1, 0);
@@ -8053,7 +8231,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_broadcast_i64x2() {
+    const fn test_mm512_broadcast_i64x2() {
         let a = _mm_set_epi64x(1, 2);
         let r = _mm512_broadcast_i64x2(a);
         let e = _mm512_set_epi64(1, 2, 1, 2, 1, 2, 1, 2);
@@ -8061,7 +8239,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_mask_broadcast_i64x2() {
+    const fn test_mm512_mask_broadcast_i64x2() {
         let a = _mm_set_epi64x(1, 2);
         let b = _mm512_set_epi64(3, 4, 5, 6, 7, 8, 9, 10);
         let r = _mm512_mask_broadcast_i64x2(b, 0b01101001, a);
@@ -8070,7 +8248,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_maskz_broadcast_i64x2() {
+    const fn test_mm512_maskz_broadcast_i64x2() {
         let a = _mm_set_epi64x(1, 2);
         let r = _mm512_maskz_broadcast_i64x2(0b01101001, a);
         let e = _mm512_set_epi64(0, 2, 1, 0, 1, 0, 0, 2);
@@ -8078,7 +8256,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_extractf32x8_ps() {
+    const fn test_mm512_extractf32x8_ps() {
         let a = _mm512_set_ps(
             1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
         );
@@ -8088,7 +8266,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_mask_extractf32x8_ps() {
+    const fn test_mm512_mask_extractf32x8_ps() {
         let a = _mm512_set_ps(
             1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
         );
@@ -8099,7 +8277,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_maskz_extractf32x8_ps() {
+    const fn test_mm512_maskz_extractf32x8_ps() {
         let a = _mm512_set_ps(
             1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
         );
@@ -8109,7 +8287,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_extractf64x2_pd() {
+    const fn test_mm256_extractf64x2_pd() {
         let a = _mm256_set_pd(1., 2., 3., 4.);
         let r = _mm256_extractf64x2_pd::<1>(a);
         let e = _mm_set_pd(1., 2.);
@@ -8117,7 +8295,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_mask_extractf64x2_pd() {
+    const fn test_mm256_mask_extractf64x2_pd() {
         let a = _mm256_set_pd(1., 2., 3., 4.);
         let b = _mm_set_pd(5., 6.);
         let r = _mm256_mask_extractf64x2_pd::<1>(b, 0b01, a);
@@ -8126,7 +8304,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_maskz_extractf64x2_pd() {
+    const fn test_mm256_maskz_extractf64x2_pd() {
         let a = _mm256_set_pd(1., 2., 3., 4.);
         let r = _mm256_maskz_extractf64x2_pd::<1>(0b01, a);
         let e = _mm_set_pd(0., 2.);
@@ -8134,7 +8312,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_extractf64x2_pd() {
+    const fn test_mm512_extractf64x2_pd() {
         let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm512_extractf64x2_pd::<2>(a);
         let e = _mm_set_pd(3., 4.);
@@ -8142,7 +8320,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_mask_extractf64x2_pd() {
+    const fn test_mm512_mask_extractf64x2_pd() {
         let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         let b = _mm_set_pd(9., 10.);
         let r = _mm512_mask_extractf64x2_pd::<2>(b, 0b01, a);
@@ -8151,7 +8329,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_maskz_extractf64x2_pd() {
+    const fn test_mm512_maskz_extractf64x2_pd() {
         let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm512_maskz_extractf64x2_pd::<2>(0b01, a);
         let e = _mm_set_pd(0., 4.);
@@ -8159,7 +8337,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_extracti32x8_epi32() {
+    const fn test_mm512_extracti32x8_epi32() {
         let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
         let r = _mm512_extracti32x8_epi32::<1>(a);
         let e = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
@@ -8167,7 +8345,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_mask_extracti32x8_epi32() {
+    const fn test_mm512_mask_extracti32x8_epi32() {
         let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
         let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24);
         let r = _mm512_mask_extracti32x8_epi32::<1>(b, 0b01101001, a);
@@ -8176,7 +8354,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_maskz_extracti32x8_epi32() {
+    const fn test_mm512_maskz_extracti32x8_epi32() {
         let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
         let r = _mm512_maskz_extracti32x8_epi32::<1>(0b01101001, a);
         let e = _mm256_set_epi32(0, 2, 3, 0, 5, 0, 0, 8);
@@ -8184,7 +8362,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_extracti64x2_epi64() {
+    const fn test_mm256_extracti64x2_epi64() {
         let a = _mm256_set_epi64x(1, 2, 3, 4);
         let r = _mm256_extracti64x2_epi64::<1>(a);
         let e = _mm_set_epi64x(1, 2);
@@ -8192,7 +8370,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_mask_extracti64x2_epi64() {
+    const fn test_mm256_mask_extracti64x2_epi64() {
         let a = _mm256_set_epi64x(1, 2, 3, 4);
         let b = _mm_set_epi64x(5, 6);
         let r = _mm256_mask_extracti64x2_epi64::<1>(b, 0b01, a);
@@ -8201,7 +8379,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_maskz_extracti64x2_epi64() {
+    const fn test_mm256_maskz_extracti64x2_epi64() {
         let a = _mm256_set_epi64x(1, 2, 3, 4);
         let r = _mm256_maskz_extracti64x2_epi64::<1>(0b01, a);
         let e = _mm_set_epi64x(0, 2);
@@ -8209,7 +8387,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_extracti64x2_epi64() {
+    const fn test_mm512_extracti64x2_epi64() {
         let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let r = _mm512_extracti64x2_epi64::<2>(a);
         let e = _mm_set_epi64x(3, 4);
@@ -8217,7 +8395,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_mask_extracti64x2_epi64() {
+    const fn test_mm512_mask_extracti64x2_epi64() {
         let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let b = _mm_set_epi64x(9, 10);
         let r = _mm512_mask_extracti64x2_epi64::<2>(b, 0b01, a);
@@ -8226,7 +8404,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_maskz_extracti64x2_epi64() {
+    const fn test_mm512_maskz_extracti64x2_epi64() {
         let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let r = _mm512_maskz_extracti64x2_epi64::<2>(0b01, a);
         let e = _mm_set_epi64x(0, 4);
@@ -8234,7 +8412,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_insertf32x8() {
+    const fn test_mm512_insertf32x8() {
         let a = _mm512_set_ps(
             1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
         );
@@ -8247,7 +8425,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_mask_insertf32x8() {
+    const fn test_mm512_mask_insertf32x8() {
         let a = _mm512_set_ps(
             1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
         );
@@ -8263,7 +8441,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_maskz_insertf32x8() {
+    const fn test_mm512_maskz_insertf32x8() {
         let a = _mm512_set_ps(
             1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
         );
@@ -8276,7 +8454,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_insertf64x2() {
+    const fn test_mm256_insertf64x2() {
         let a = _mm256_set_pd(1., 2., 3., 4.);
         let b = _mm_set_pd(5., 6.);
         let r = _mm256_insertf64x2::<1>(a, b);
@@ -8285,7 +8463,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_mask_insertf64x2() {
+    const fn test_mm256_mask_insertf64x2() {
         let a = _mm256_set_pd(1., 2., 3., 4.);
         let b = _mm_set_pd(5., 6.);
         let src = _mm256_set_pd(7., 8., 9., 10.);
@@ -8295,7 +8473,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_maskz_insertf64x2() {
+    const fn test_mm256_maskz_insertf64x2() {
         let a = _mm256_set_pd(1., 2., 3., 4.);
         let b = _mm_set_pd(5., 6.);
         let r = _mm256_maskz_insertf64x2::<1>(0b0110, a, b);
@@ -8304,7 +8482,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_insertf64x2() {
+    const fn test_mm512_insertf64x2() {
         let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         let b = _mm_set_pd(9., 10.);
         let r = _mm512_insertf64x2::<2>(a, b);
@@ -8313,7 +8491,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_mask_insertf64x2() {
+    const fn test_mm512_mask_insertf64x2() {
         let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         let b = _mm_set_pd(9., 10.);
         let src = _mm512_set_pd(11., 12., 13., 14., 15., 16., 17., 18.);
@@ -8323,7 +8501,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_maskz_insertf64x2() {
+    const fn test_mm512_maskz_insertf64x2() {
         let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         let b = _mm_set_pd(9., 10.);
         let r = _mm512_maskz_insertf64x2::<2>(0b01101001, a, b);
@@ -8332,7 +8510,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_inserti32x8() {
+    const fn test_mm512_inserti32x8() {
         let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
         let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24);
         let r = _mm512_inserti32x8::<1>(a, b);
@@ -8343,7 +8521,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_mask_inserti32x8() {
+    const fn test_mm512_mask_inserti32x8() {
         let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
         let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24);
         let src = _mm512_set_epi32(
@@ -8357,7 +8535,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_maskz_inserti32x8() {
+    const fn test_mm512_maskz_inserti32x8() {
         let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
         let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24);
         let r = _mm512_maskz_inserti32x8::<1>(0b0110100100111100, a, b);
@@ -8366,7 +8544,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_inserti64x2() {
+    const fn test_mm256_inserti64x2() {
         let a = _mm256_set_epi64x(1, 2, 3, 4);
         let b = _mm_set_epi64x(5, 6);
         let r = _mm256_inserti64x2::<1>(a, b);
@@ -8375,7 +8553,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_mask_inserti64x2() {
+    const fn test_mm256_mask_inserti64x2() {
         let a = _mm256_set_epi64x(1, 2, 3, 4);
         let b = _mm_set_epi64x(5, 6);
         let src = _mm256_set_epi64x(7, 8, 9, 10);
@@ -8385,7 +8563,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_maskz_inserti64x2() {
+    const fn test_mm256_maskz_inserti64x2() {
         let a = _mm256_set_epi64x(1, 2, 3, 4);
         let b = _mm_set_epi64x(5, 6);
         let r = _mm256_maskz_inserti64x2::<1>(0b0110, a, b);
@@ -8394,7 +8572,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_inserti64x2() {
+    const fn test_mm512_inserti64x2() {
         let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let b = _mm_set_epi64x(9, 10);
         let r = _mm512_inserti64x2::<2>(a, b);
@@ -8403,7 +8581,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_mask_inserti64x2() {
+    const fn test_mm512_mask_inserti64x2() {
         let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let b = _mm_set_epi64x(9, 10);
         let src = _mm512_set_epi64(11, 12, 13, 14, 15, 16, 17, 18);
@@ -8413,7 +8591,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_maskz_inserti64x2() {
+    const fn test_mm512_maskz_inserti64x2() {
         let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let b = _mm_set_epi64x(9, 10);
         let r = _mm512_maskz_inserti64x2::<2>(0b01101001, a, b);
@@ -8422,7 +8600,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_cvt_roundepi64_pd() {
+    fn test_mm512_cvt_roundepi64_pd() {
         let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let r = _mm512_cvt_roundepi64_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
         let e = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
@@ -8430,7 +8608,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_mask_cvt_roundepi64_pd() {
+    fn test_mm512_mask_cvt_roundepi64_pd() {
         let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let b = _mm512_set_pd(9., 10., 11., 12., 13., 14., 15., 16.);
         let r = _mm512_mask_cvt_roundepi64_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
@@ -8441,7 +8619,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_maskz_cvt_roundepi64_pd() {
+    fn test_mm512_maskz_cvt_roundepi64_pd() {
         let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let r = _mm512_maskz_cvt_roundepi64_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
             0b01101001, a,
@@ -8451,7 +8629,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm_cvtepi64_pd() {
+    fn test_mm_cvtepi64_pd() {
         let a = _mm_set_epi64x(1, 2);
         let r = _mm_cvtepi64_pd(a);
         let e = _mm_set_pd(1., 2.);
@@ -8459,7 +8637,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm_mask_cvtepi64_pd() {
+    fn test_mm_mask_cvtepi64_pd() {
         let a = _mm_set_epi64x(1, 2);
         let b = _mm_set_pd(3., 4.);
         let r = _mm_mask_cvtepi64_pd(b, 0b01, a);
@@ -8468,7 +8646,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm_maskz_cvtepi64_pd() {
+    fn test_mm_maskz_cvtepi64_pd() {
         let a = _mm_set_epi64x(1, 2);
         let r = _mm_maskz_cvtepi64_pd(0b01, a);
         let e = _mm_set_pd(0., 2.);
@@ -8476,7 +8654,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_cvtepi64_pd() {
+    fn test_mm256_cvtepi64_pd() {
         let a = _mm256_set_epi64x(1, 2, 3, 4);
         let r = _mm256_cvtepi64_pd(a);
         let e = _mm256_set_pd(1., 2., 3., 4.);
@@ -8484,7 +8662,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_mask_cvtepi64_pd() {
+    fn test_mm256_mask_cvtepi64_pd() {
         let a = _mm256_set_epi64x(1, 2, 3, 4);
         let b = _mm256_set_pd(5., 6., 7., 8.);
         let r = _mm256_mask_cvtepi64_pd(b, 0b0110, a);
@@ -8493,7 +8671,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_maskz_cvtepi64_pd() {
+    fn test_mm256_maskz_cvtepi64_pd() {
         let a = _mm256_set_epi64x(1, 2, 3, 4);
         let r = _mm256_maskz_cvtepi64_pd(0b0110, a);
         let e = _mm256_set_pd(0., 2., 3., 0.);
@@ -8501,7 +8679,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_cvtepi64_pd() {
+    fn test_mm512_cvtepi64_pd() {
         let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let r = _mm512_cvtepi64_pd(a);
         let e = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
@@ -8509,7 +8687,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_mask_cvtepi64_pd() {
+    fn test_mm512_mask_cvtepi64_pd() {
         let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let b = _mm512_set_pd(9., 10., 11., 12., 13., 14., 15., 16.);
         let r = _mm512_mask_cvtepi64_pd(b, 0b01101001, a);
@@ -8518,7 +8696,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_maskz_cvtepi64_pd() {
+    fn test_mm512_maskz_cvtepi64_pd() {
         let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let r = _mm512_maskz_cvtepi64_pd(0b01101001, a);
         let e = _mm512_set_pd(0., 2., 3., 0., 5., 0., 0., 8.);
@@ -8526,7 +8704,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_cvt_roundepi64_ps() {
+    fn test_mm512_cvt_roundepi64_ps() {
         let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let r = _mm512_cvt_roundepi64_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
         let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
@@ -8534,7 +8712,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_mask_cvt_roundepi64_ps() {
+    fn test_mm512_mask_cvt_roundepi64_ps() {
         let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let b = _mm256_set_ps(9., 10., 11., 12., 13., 14., 15., 16.);
         let r = _mm512_mask_cvt_roundepi64_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
@@ -8545,7 +8723,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_maskz_cvt_roundepi64_ps() {
+    fn test_mm512_maskz_cvt_roundepi64_ps() {
         let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let r = _mm512_maskz_cvt_roundepi64_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
             0b01101001, a,
@@ -8555,7 +8733,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm_cvtepi64_ps() {
+    fn test_mm_cvtepi64_ps() {
         let a = _mm_set_epi64x(1, 2);
         let r = _mm_cvtepi64_ps(a);
         let e = _mm_set_ps(0., 0., 1., 2.);
@@ -8563,7 +8741,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm_mask_cvtepi64_ps() {
+    fn test_mm_mask_cvtepi64_ps() {
         let a = _mm_set_epi64x(1, 2);
         let b = _mm_set_ps(3., 4., 5., 6.);
         let r = _mm_mask_cvtepi64_ps(b, 0b01, a);
@@ -8572,7 +8750,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm_maskz_cvtepi64_ps() {
+    fn test_mm_maskz_cvtepi64_ps() {
         let a = _mm_set_epi64x(1, 2);
         let r = _mm_maskz_cvtepi64_ps(0b01, a);
         let e = _mm_set_ps(0., 0., 0., 2.);
@@ -8580,7 +8758,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_cvtepi64_ps() {
+    fn test_mm256_cvtepi64_ps() {
         let a = _mm256_set_epi64x(1, 2, 3, 4);
         let r = _mm256_cvtepi64_ps(a);
         let e = _mm_set_ps(1., 2., 3., 4.);
@@ -8588,7 +8766,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_mask_cvtepi64_ps() {
+    fn test_mm256_mask_cvtepi64_ps() {
         let a = _mm256_set_epi64x(1, 2, 3, 4);
         let b = _mm_set_ps(5., 6., 7., 8.);
         let r = _mm256_mask_cvtepi64_ps(b, 0b0110, a);
@@ -8597,7 +8775,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_maskz_cvtepi64_ps() {
+    fn test_mm256_maskz_cvtepi64_ps() {
         let a = _mm256_set_epi64x(1, 2, 3, 4);
         let r = _mm256_maskz_cvtepi64_ps(0b0110, a);
         let e = _mm_set_ps(0., 2., 3., 0.);
@@ -8605,7 +8783,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_cvtepi64_ps() {
+    fn test_mm512_cvtepi64_ps() {
         let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let r = _mm512_cvtepi64_ps(a);
         let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
@@ -8613,7 +8791,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_mask_cvtepi64_ps() {
+    fn test_mm512_mask_cvtepi64_ps() {
         let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let b = _mm256_set_ps(9., 10., 11., 12., 13., 14., 15., 16.);
         let r = _mm512_mask_cvtepi64_ps(b, 0b01101001, a);
@@ -8622,7 +8800,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_maskz_cvtepi64_ps() {
+    fn test_mm512_maskz_cvtepi64_ps() {
         let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let r = _mm512_maskz_cvtepi64_ps(0b01101001, a);
         let e = _mm256_set_ps(0., 2., 3., 0., 5., 0., 0., 8.);
@@ -8630,7 +8808,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_cvt_roundepu64_pd() {
+    fn test_mm512_cvt_roundepu64_pd() {
         let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let r = _mm512_cvt_roundepu64_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
         let e = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
@@ -8638,7 +8816,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_mask_cvt_roundepu64_pd() {
+    fn test_mm512_mask_cvt_roundepu64_pd() {
         let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let b = _mm512_set_pd(9., 10., 11., 12., 13., 14., 15., 16.);
         let r = _mm512_mask_cvt_roundepu64_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
@@ -8649,7 +8827,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_maskz_cvt_roundepu64_pd() {
+    fn test_mm512_maskz_cvt_roundepu64_pd() {
         let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let r = _mm512_maskz_cvt_roundepu64_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
             0b01101001, a,
@@ -8659,7 +8837,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm_cvtepu64_pd() {
+    fn test_mm_cvtepu64_pd() {
         let a = _mm_set_epi64x(1, 2);
         let r = _mm_cvtepu64_pd(a);
         let e = _mm_set_pd(1., 2.);
@@ -8667,7 +8845,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm_mask_cvtepu64_pd() {
+    fn test_mm_mask_cvtepu64_pd() {
         let a = _mm_set_epi64x(1, 2);
         let b = _mm_set_pd(3., 4.);
         let r = _mm_mask_cvtepu64_pd(b, 0b01, a);
@@ -8676,7 +8854,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm_maskz_cvtepu64_pd() {
+    fn test_mm_maskz_cvtepu64_pd() {
         let a = _mm_set_epi64x(1, 2);
         let r = _mm_maskz_cvtepu64_pd(0b01, a);
         let e = _mm_set_pd(0., 2.);
@@ -8684,7 +8862,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_cvtepu64_pd() {
+    fn test_mm256_cvtepu64_pd() {
         let a = _mm256_set_epi64x(1, 2, 3, 4);
         let r = _mm256_cvtepu64_pd(a);
         let e = _mm256_set_pd(1., 2., 3., 4.);
@@ -8692,7 +8870,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_mask_cvtepu64_pd() {
+    fn test_mm256_mask_cvtepu64_pd() {
         let a = _mm256_set_epi64x(1, 2, 3, 4);
         let b = _mm256_set_pd(5., 6., 7., 8.);
         let r = _mm256_mask_cvtepu64_pd(b, 0b0110, a);
@@ -8701,7 +8879,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_maskz_cvtepu64_pd() {
+    fn test_mm256_maskz_cvtepu64_pd() {
         let a = _mm256_set_epi64x(1, 2, 3, 4);
         let r = _mm256_maskz_cvtepu64_pd(0b0110, a);
         let e = _mm256_set_pd(0., 2., 3., 0.);
@@ -8709,7 +8887,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_cvtepu64_pd() {
+    fn test_mm512_cvtepu64_pd() {
         let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let r = _mm512_cvtepu64_pd(a);
         let e = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
@@ -8717,7 +8895,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_mask_cvtepu64_pd() {
+    fn test_mm512_mask_cvtepu64_pd() {
         let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let b = _mm512_set_pd(9., 10., 11., 12., 13., 14., 15., 16.);
         let r = _mm512_mask_cvtepu64_pd(b, 0b01101001, a);
@@ -8726,7 +8904,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_maskz_cvtepu64_pd() {
+    fn test_mm512_maskz_cvtepu64_pd() {
         let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let r = _mm512_maskz_cvtepu64_pd(0b01101001, a);
         let e = _mm512_set_pd(0., 2., 3., 0., 5., 0., 0., 8.);
@@ -8734,7 +8912,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_cvt_roundepu64_ps() {
+    fn test_mm512_cvt_roundepu64_ps() {
         let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let r = _mm512_cvt_roundepu64_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
         let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
@@ -8742,7 +8920,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_mask_cvt_roundepu64_ps() {
+    fn test_mm512_mask_cvt_roundepu64_ps() {
         let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let b = _mm256_set_ps(9., 10., 11., 12., 13., 14., 15., 16.);
         let r = _mm512_mask_cvt_roundepu64_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
@@ -8753,7 +8931,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_maskz_cvt_roundepu64_ps() {
+    fn test_mm512_maskz_cvt_roundepu64_ps() {
         let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let r = _mm512_maskz_cvt_roundepu64_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
             0b01101001, a,
@@ -8763,7 +8941,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm_cvtepu64_ps() {
+    fn test_mm_cvtepu64_ps() {
         let a = _mm_set_epi64x(1, 2);
         let r = _mm_cvtepu64_ps(a);
         let e = _mm_set_ps(0., 0., 1., 2.);
@@ -8771,7 +8949,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm_mask_cvtepu64_ps() {
+    fn test_mm_mask_cvtepu64_ps() {
         let a = _mm_set_epi64x(1, 2);
         let b = _mm_set_ps(3., 4., 5., 6.);
         let r = _mm_mask_cvtepu64_ps(b, 0b01, a);
@@ -8780,7 +8958,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm_maskz_cvtepu64_ps() {
+    fn test_mm_maskz_cvtepu64_ps() {
         let a = _mm_set_epi64x(1, 2);
         let r = _mm_maskz_cvtepu64_ps(0b01, a);
         let e = _mm_set_ps(0., 0., 0., 2.);
@@ -8788,7 +8966,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_cvtepu64_ps() {
+    fn test_mm256_cvtepu64_ps() {
         let a = _mm256_set_epi64x(1, 2, 3, 4);
         let r = _mm256_cvtepu64_ps(a);
         let e = _mm_set_ps(1., 2., 3., 4.);
@@ -8796,7 +8974,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_mask_cvtepu64_ps() {
+    fn test_mm256_mask_cvtepu64_ps() {
         let a = _mm256_set_epi64x(1, 2, 3, 4);
         let b = _mm_set_ps(5., 6., 7., 8.);
         let r = _mm256_mask_cvtepu64_ps(b, 0b0110, a);
@@ -8805,7 +8983,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_maskz_cvtepu64_ps() {
+    fn test_mm256_maskz_cvtepu64_ps() {
         let a = _mm256_set_epi64x(1, 2, 3, 4);
         let r = _mm256_maskz_cvtepu64_ps(0b0110, a);
         let e = _mm_set_ps(0., 2., 3., 0.);
@@ -8813,7 +8991,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_cvtepu64_ps() {
+    fn test_mm512_cvtepu64_ps() {
         let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let r = _mm512_cvtepu64_ps(a);
         let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
@@ -8821,7 +8999,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_mask_cvtepu64_ps() {
+    fn test_mm512_mask_cvtepu64_ps() {
         let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let b = _mm256_set_ps(9., 10., 11., 12., 13., 14., 15., 16.);
         let r = _mm512_mask_cvtepu64_ps(b, 0b01101001, a);
@@ -8830,7 +9008,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_maskz_cvtepu64_ps() {
+    fn test_mm512_maskz_cvtepu64_ps() {
         let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let r = _mm512_maskz_cvtepu64_ps(0b01101001, a);
         let e = _mm256_set_ps(0., 2., 3., 0., 5., 0., 0., 8.);
@@ -8838,7 +9016,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_cvt_roundpd_epi64() {
+    fn test_mm512_cvt_roundpd_epi64() {
         let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm512_cvt_roundpd_epi64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
         let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
@@ -8846,7 +9024,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_mask_cvt_roundpd_epi64() {
+    fn test_mm512_mask_cvt_roundpd_epi64() {
         let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
         let r = _mm512_mask_cvt_roundpd_epi64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
@@ -8857,7 +9035,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_maskz_cvt_roundpd_epi64() {
+    fn test_mm512_maskz_cvt_roundpd_epi64() {
         let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm512_maskz_cvt_roundpd_epi64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
             0b01101001, a,
@@ -8867,7 +9045,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm_cvtpd_epi64() {
+    fn test_mm_cvtpd_epi64() {
         let a = _mm_set_pd(1., 2.);
         let r = _mm_cvtpd_epi64(a);
         let e = _mm_set_epi64x(1, 2);
@@ -8875,7 +9053,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm_mask_cvtpd_epi64() {
+    fn test_mm_mask_cvtpd_epi64() {
         let a = _mm_set_pd(1., 2.);
         let b = _mm_set_epi64x(3, 4);
         let r = _mm_mask_cvtpd_epi64(b, 0b01, a);
@@ -8884,7 +9062,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm_maskz_cvtpd_epi64() {
+    fn test_mm_maskz_cvtpd_epi64() {
         let a = _mm_set_pd(1., 2.);
         let r = _mm_maskz_cvtpd_epi64(0b01, a);
         let e = _mm_set_epi64x(0, 2);
@@ -8892,7 +9070,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_cvtpd_epi64() {
+    fn test_mm256_cvtpd_epi64() {
         let a = _mm256_set_pd(1., 2., 3., 4.);
         let r = _mm256_cvtpd_epi64(a);
         let e = _mm256_set_epi64x(1, 2, 3, 4);
@@ -8900,7 +9078,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_mask_cvtpd_epi64() {
+    fn test_mm256_mask_cvtpd_epi64() {
         let a = _mm256_set_pd(1., 2., 3., 4.);
         let b = _mm256_set_epi64x(5, 6, 7, 8);
         let r = _mm256_mask_cvtpd_epi64(b, 0b0110, a);
@@ -8909,7 +9087,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_maskz_cvtpd_epi64() {
+    fn test_mm256_maskz_cvtpd_epi64() {
         let a = _mm256_set_pd(1., 2., 3., 4.);
         let r = _mm256_maskz_cvtpd_epi64(0b0110, a);
         let e = _mm256_set_epi64x(0, 2, 3, 0);
@@ -8917,7 +9095,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_cvtpd_epi64() {
+    fn test_mm512_cvtpd_epi64() {
         let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm512_cvtpd_epi64(a);
         let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
@@ -8925,7 +9103,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_mask_cvtpd_epi64() {
+    fn test_mm512_mask_cvtpd_epi64() {
         let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
         let r = _mm512_mask_cvtpd_epi64(b, 0b01101001, a);
@@ -8934,7 +9112,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_maskz_cvtpd_epi64() {
+    fn test_mm512_maskz_cvtpd_epi64() {
         let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm512_maskz_cvtpd_epi64(0b01101001, a);
         let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
@@ -8942,7 +9120,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_cvt_roundps_epi64() {
+    fn test_mm512_cvt_roundps_epi64() {
         let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm512_cvt_roundps_epi64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
         let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
@@ -8950,7 +9128,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_mask_cvt_roundps_epi64() {
+    fn test_mm512_mask_cvt_roundps_epi64() {
         let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
         let r = _mm512_mask_cvt_roundps_epi64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
@@ -8961,7 +9139,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_maskz_cvt_roundps_epi64() {
+    fn test_mm512_maskz_cvt_roundps_epi64() {
         let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm512_maskz_cvt_roundps_epi64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
             0b01101001, a,
@@ -8971,7 +9149,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm_cvtps_epi64() {
+    fn test_mm_cvtps_epi64() {
         let a = _mm_set_ps(1., 2., 3., 4.);
         let r = _mm_cvtps_epi64(a);
         let e = _mm_set_epi64x(3, 4);
@@ -8979,7 +9157,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm_mask_cvtps_epi64() {
+    fn test_mm_mask_cvtps_epi64() {
         let a = _mm_set_ps(1., 2., 3., 4.);
         let b = _mm_set_epi64x(5, 6);
         let r = _mm_mask_cvtps_epi64(b, 0b01, a);
@@ -8988,7 +9166,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm_maskz_cvtps_epi64() {
+    fn test_mm_maskz_cvtps_epi64() {
         let a = _mm_set_ps(1., 2., 3., 4.);
         let r = _mm_maskz_cvtps_epi64(0b01, a);
         let e = _mm_set_epi64x(0, 4);
@@ -8996,7 +9174,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_cvtps_epi64() {
+    fn test_mm256_cvtps_epi64() {
         let a = _mm_set_ps(1., 2., 3., 4.);
         let r = _mm256_cvtps_epi64(a);
         let e = _mm256_set_epi64x(1, 2, 3, 4);
@@ -9004,7 +9182,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_mask_cvtps_epi64() {
+    fn test_mm256_mask_cvtps_epi64() {
         let a = _mm_set_ps(1., 2., 3., 4.);
         let b = _mm256_set_epi64x(5, 6, 7, 8);
         let r = _mm256_mask_cvtps_epi64(b, 0b0110, a);
@@ -9013,7 +9191,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_maskz_cvtps_epi64() {
+    fn test_mm256_maskz_cvtps_epi64() {
         let a = _mm_set_ps(1., 2., 3., 4.);
         let r = _mm256_maskz_cvtps_epi64(0b0110, a);
         let e = _mm256_set_epi64x(0, 2, 3, 0);
@@ -9021,7 +9199,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_cvtps_epi64() {
+    fn test_mm512_cvtps_epi64() {
         let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm512_cvtps_epi64(a);
         let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
@@ -9029,7 +9207,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_mask_cvtps_epi64() {
+    fn test_mm512_mask_cvtps_epi64() {
         let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
         let r = _mm512_mask_cvtps_epi64(b, 0b01101001, a);
@@ -9038,7 +9216,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_maskz_cvtps_epi64() {
+    fn test_mm512_maskz_cvtps_epi64() {
         let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm512_maskz_cvtps_epi64(0b01101001, a);
         let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
@@ -9046,7 +9224,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_cvt_roundpd_epu64() {
+    fn test_mm512_cvt_roundpd_epu64() {
         let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm512_cvt_roundpd_epu64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
         let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
@@ -9054,7 +9232,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_mask_cvt_roundpd_epu64() {
+    fn test_mm512_mask_cvt_roundpd_epu64() {
         let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
         let r = _mm512_mask_cvt_roundpd_epu64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
@@ -9065,7 +9243,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_maskz_cvt_roundpd_epu64() {
+    fn test_mm512_maskz_cvt_roundpd_epu64() {
         let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm512_maskz_cvt_roundpd_epu64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
             0b01101001, a,
@@ -9075,7 +9253,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm_cvtpd_epu64() {
+    fn test_mm_cvtpd_epu64() {
         let a = _mm_set_pd(1., 2.);
         let r = _mm_cvtpd_epu64(a);
         let e = _mm_set_epi64x(1, 2);
@@ -9083,7 +9261,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm_mask_cvtpd_epu64() {
+    fn test_mm_mask_cvtpd_epu64() {
         let a = _mm_set_pd(1., 2.);
         let b = _mm_set_epi64x(3, 4);
         let r = _mm_mask_cvtpd_epu64(b, 0b01, a);
@@ -9092,7 +9270,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm_maskz_cvtpd_epu64() {
+    fn test_mm_maskz_cvtpd_epu64() {
         let a = _mm_set_pd(1., 2.);
         let r = _mm_maskz_cvtpd_epu64(0b01, a);
         let e = _mm_set_epi64x(0, 2);
@@ -9100,7 +9278,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_cvtpd_epu64() {
+    fn test_mm256_cvtpd_epu64() {
         let a = _mm256_set_pd(1., 2., 3., 4.);
         let r = _mm256_cvtpd_epu64(a);
         let e = _mm256_set_epi64x(1, 2, 3, 4);
@@ -9108,7 +9286,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_mask_cvtpd_epu64() {
+    fn test_mm256_mask_cvtpd_epu64() {
         let a = _mm256_set_pd(1., 2., 3., 4.);
         let b = _mm256_set_epi64x(5, 6, 7, 8);
         let r = _mm256_mask_cvtpd_epu64(b, 0b0110, a);
@@ -9117,7 +9295,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_maskz_cvtpd_epu64() {
+    fn test_mm256_maskz_cvtpd_epu64() {
         let a = _mm256_set_pd(1., 2., 3., 4.);
         let r = _mm256_maskz_cvtpd_epu64(0b0110, a);
         let e = _mm256_set_epi64x(0, 2, 3, 0);
@@ -9125,7 +9303,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_cvtpd_epu64() {
+    fn test_mm512_cvtpd_epu64() {
         let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm512_cvtpd_epu64(a);
         let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
@@ -9133,7 +9311,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_mask_cvtpd_epu64() {
+    fn test_mm512_mask_cvtpd_epu64() {
         let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
         let r = _mm512_mask_cvtpd_epu64(b, 0b01101001, a);
@@ -9142,7 +9320,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_maskz_cvtpd_epu64() {
+    fn test_mm512_maskz_cvtpd_epu64() {
         let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm512_maskz_cvtpd_epu64(0b01101001, a);
         let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
@@ -9150,7 +9328,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_cvt_roundps_epu64() {
+    fn test_mm512_cvt_roundps_epu64() {
         let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm512_cvt_roundps_epu64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
         let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
@@ -9158,7 +9336,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_mask_cvt_roundps_epu64() {
+    fn test_mm512_mask_cvt_roundps_epu64() {
         let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
         let r = _mm512_mask_cvt_roundps_epu64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
@@ -9169,7 +9347,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_maskz_cvt_roundps_epu64() {
+    fn test_mm512_maskz_cvt_roundps_epu64() {
         let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm512_maskz_cvt_roundps_epu64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
             0b01101001, a,
@@ -9179,7 +9357,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm_cvtps_epu64() {
+    fn test_mm_cvtps_epu64() {
         let a = _mm_set_ps(1., 2., 3., 4.);
         let r = _mm_cvtps_epu64(a);
         let e = _mm_set_epi64x(3, 4);
@@ -9187,7 +9365,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm_mask_cvtps_epu64() {
+    fn test_mm_mask_cvtps_epu64() {
         let a = _mm_set_ps(1., 2., 3., 4.);
         let b = _mm_set_epi64x(5, 6);
         let r = _mm_mask_cvtps_epu64(b, 0b01, a);
@@ -9196,7 +9374,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm_maskz_cvtps_epu64() {
+    fn test_mm_maskz_cvtps_epu64() {
         let a = _mm_set_ps(1., 2., 3., 4.);
         let r = _mm_maskz_cvtps_epu64(0b01, a);
         let e = _mm_set_epi64x(0, 4);
@@ -9204,7 +9382,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_cvtps_epu64() {
+    fn test_mm256_cvtps_epu64() {
         let a = _mm_set_ps(1., 2., 3., 4.);
         let r = _mm256_cvtps_epu64(a);
         let e = _mm256_set_epi64x(1, 2, 3, 4);
@@ -9212,7 +9390,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_mask_cvtps_epu64() {
+    fn test_mm256_mask_cvtps_epu64() {
         let a = _mm_set_ps(1., 2., 3., 4.);
         let b = _mm256_set_epi64x(5, 6, 7, 8);
         let r = _mm256_mask_cvtps_epu64(b, 0b0110, a);
@@ -9221,7 +9399,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_maskz_cvtps_epu64() {
+    fn test_mm256_maskz_cvtps_epu64() {
         let a = _mm_set_ps(1., 2., 3., 4.);
         let r = _mm256_maskz_cvtps_epu64(0b0110, a);
         let e = _mm256_set_epi64x(0, 2, 3, 0);
@@ -9229,7 +9407,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_cvtps_epu64() {
+    fn test_mm512_cvtps_epu64() {
         let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm512_cvtps_epu64(a);
         let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
@@ -9237,7 +9415,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_mask_cvtps_epu64() {
+    fn test_mm512_mask_cvtps_epu64() {
         let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
         let r = _mm512_mask_cvtps_epu64(b, 0b01101001, a);
@@ -9246,7 +9424,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_maskz_cvtps_epu64() {
+    fn test_mm512_maskz_cvtps_epu64() {
         let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm512_maskz_cvtps_epu64(0b01101001, a);
         let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
@@ -9254,7 +9432,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_cvtt_roundpd_epi64() {
+    fn test_mm512_cvtt_roundpd_epi64() {
         let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm512_cvtt_roundpd_epi64::<_MM_FROUND_NO_EXC>(a);
         let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
@@ -9262,7 +9440,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_mask_cvtt_roundpd_epi64() {
+    fn test_mm512_mask_cvtt_roundpd_epi64() {
         let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
         let r = _mm512_mask_cvtt_roundpd_epi64::<_MM_FROUND_NO_EXC>(b, 0b01101001, a);
@@ -9271,7 +9449,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_maskz_cvtt_roundpd_epi64() {
+    fn test_mm512_maskz_cvtt_roundpd_epi64() {
         let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm512_maskz_cvtt_roundpd_epi64::<_MM_FROUND_NO_EXC>(0b01101001, a);
         let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
@@ -9279,7 +9457,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm_cvttpd_epi64() {
+    fn test_mm_cvttpd_epi64() {
         let a = _mm_set_pd(1., 2.);
         let r = _mm_cvttpd_epi64(a);
         let e = _mm_set_epi64x(1, 2);
@@ -9287,7 +9465,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm_mask_cvttpd_epi64() {
+    fn test_mm_mask_cvttpd_epi64() {
         let a = _mm_set_pd(1., 2.);
         let b = _mm_set_epi64x(3, 4);
         let r = _mm_mask_cvttpd_epi64(b, 0b01, a);
@@ -9296,7 +9474,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm_maskz_cvttpd_epi64() {
+    fn test_mm_maskz_cvttpd_epi64() {
         let a = _mm_set_pd(1., 2.);
         let r = _mm_maskz_cvttpd_epi64(0b01, a);
         let e = _mm_set_epi64x(0, 2);
@@ -9304,7 +9482,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_cvttpd_epi64() {
+    fn test_mm256_cvttpd_epi64() {
         let a = _mm256_set_pd(1., 2., 3., 4.);
         let r = _mm256_cvttpd_epi64(a);
         let e = _mm256_set_epi64x(1, 2, 3, 4);
@@ -9312,7 +9490,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_mask_cvttpd_epi64() {
+    fn test_mm256_mask_cvttpd_epi64() {
         let a = _mm256_set_pd(1., 2., 3., 4.);
         let b = _mm256_set_epi64x(5, 6, 7, 8);
         let r = _mm256_mask_cvttpd_epi64(b, 0b0110, a);
@@ -9321,7 +9499,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_maskz_cvttpd_epi64() {
+    fn test_mm256_maskz_cvttpd_epi64() {
         let a = _mm256_set_pd(1., 2., 3., 4.);
         let r = _mm256_maskz_cvttpd_epi64(0b0110, a);
         let e = _mm256_set_epi64x(0, 2, 3, 0);
@@ -9329,7 +9507,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_cvttpd_epi64() {
+    fn test_mm512_cvttpd_epi64() {
         let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm512_cvttpd_epi64(a);
         let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
@@ -9337,7 +9515,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_mask_cvttpd_epi64() {
+    fn test_mm512_mask_cvttpd_epi64() {
         let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
         let r = _mm512_mask_cvttpd_epi64(b, 0b01101001, a);
@@ -9346,7 +9524,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_maskz_cvttpd_epi64() {
+    fn test_mm512_maskz_cvttpd_epi64() {
         let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm512_maskz_cvttpd_epi64(0b01101001, a);
         let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
@@ -9354,7 +9532,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_cvtt_roundps_epi64() {
+    fn test_mm512_cvtt_roundps_epi64() {
         let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm512_cvtt_roundps_epi64::<_MM_FROUND_NO_EXC>(a);
         let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
@@ -9362,7 +9540,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_mask_cvtt_roundps_epi64() {
+    fn test_mm512_mask_cvtt_roundps_epi64() {
         let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
         let r = _mm512_mask_cvtt_roundps_epi64::<_MM_FROUND_NO_EXC>(b, 0b01101001, a);
@@ -9371,7 +9549,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_maskz_cvtt_roundps_epi64() {
+    fn test_mm512_maskz_cvtt_roundps_epi64() {
         let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm512_maskz_cvtt_roundps_epi64::<_MM_FROUND_NO_EXC>(0b01101001, a);
         let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
@@ -9379,7 +9557,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm_cvttps_epi64() {
+    fn test_mm_cvttps_epi64() {
         let a = _mm_set_ps(1., 2., 3., 4.);
         let r = _mm_cvttps_epi64(a);
         let e = _mm_set_epi64x(3, 4);
@@ -9387,7 +9565,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm_mask_cvttps_epi64() {
+    fn test_mm_mask_cvttps_epi64() {
         let a = _mm_set_ps(1., 2., 3., 4.);
         let b = _mm_set_epi64x(5, 6);
         let r = _mm_mask_cvttps_epi64(b, 0b01, a);
@@ -9396,7 +9574,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm_maskz_cvttps_epi64() {
+    fn test_mm_maskz_cvttps_epi64() {
         let a = _mm_set_ps(1., 2., 3., 4.);
         let r = _mm_maskz_cvttps_epi64(0b01, a);
         let e = _mm_set_epi64x(0, 4);
@@ -9404,7 +9582,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_cvttps_epi64() {
+    fn test_mm256_cvttps_epi64() {
         let a = _mm_set_ps(1., 2., 3., 4.);
         let r = _mm256_cvttps_epi64(a);
         let e = _mm256_set_epi64x(1, 2, 3, 4);
@@ -9412,7 +9590,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_mask_cvttps_epi64() {
+    fn test_mm256_mask_cvttps_epi64() {
         let a = _mm_set_ps(1., 2., 3., 4.);
         let b = _mm256_set_epi64x(5, 6, 7, 8);
         let r = _mm256_mask_cvttps_epi64(b, 0b0110, a);
@@ -9421,7 +9599,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_maskz_cvttps_epi64() {
+    fn test_mm256_maskz_cvttps_epi64() {
         let a = _mm_set_ps(1., 2., 3., 4.);
         let r = _mm256_maskz_cvttps_epi64(0b0110, a);
         let e = _mm256_set_epi64x(0, 2, 3, 0);
@@ -9429,7 +9607,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_cvttps_epi64() {
+    fn test_mm512_cvttps_epi64() {
         let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm512_cvttps_epi64(a);
         let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
@@ -9437,7 +9615,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_mask_cvttps_epi64() {
+    fn test_mm512_mask_cvttps_epi64() {
         let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
         let r = _mm512_mask_cvttps_epi64(b, 0b01101001, a);
@@ -9446,7 +9624,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_maskz_cvttps_epi64() {
+    fn test_mm512_maskz_cvttps_epi64() {
         let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm512_maskz_cvttps_epi64(0b01101001, a);
         let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
@@ -9454,7 +9632,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_cvtt_roundpd_epu64() {
+    fn test_mm512_cvtt_roundpd_epu64() {
         let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm512_cvtt_roundpd_epu64::<_MM_FROUND_NO_EXC>(a);
         let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
@@ -9462,7 +9640,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_mask_cvtt_roundpd_epu64() {
+    fn test_mm512_mask_cvtt_roundpd_epu64() {
         let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
         let r = _mm512_mask_cvtt_roundpd_epu64::<_MM_FROUND_NO_EXC>(b, 0b01101001, a);
@@ -9471,7 +9649,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_maskz_cvtt_roundpd_epu64() {
+    fn test_mm512_maskz_cvtt_roundpd_epu64() {
         let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm512_maskz_cvtt_roundpd_epu64::<_MM_FROUND_NO_EXC>(0b01101001, a);
         let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
@@ -9479,7 +9657,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm_cvttpd_epu64() {
+    fn test_mm_cvttpd_epu64() {
         let a = _mm_set_pd(1., 2.);
         let r = _mm_cvttpd_epu64(a);
         let e = _mm_set_epi64x(1, 2);
@@ -9487,7 +9665,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm_mask_cvttpd_epu64() {
+    fn test_mm_mask_cvttpd_epu64() {
         let a = _mm_set_pd(1., 2.);
         let b = _mm_set_epi64x(3, 4);
         let r = _mm_mask_cvttpd_epu64(b, 0b01, a);
@@ -9496,7 +9674,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm_maskz_cvttpd_epu64() {
+    fn test_mm_maskz_cvttpd_epu64() {
         let a = _mm_set_pd(1., 2.);
         let r = _mm_maskz_cvttpd_epu64(0b01, a);
         let e = _mm_set_epi64x(0, 2);
@@ -9504,7 +9682,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_cvttpd_epu64() {
+    fn test_mm256_cvttpd_epu64() {
         let a = _mm256_set_pd(1., 2., 3., 4.);
         let r = _mm256_cvttpd_epu64(a);
         let e = _mm256_set_epi64x(1, 2, 3, 4);
@@ -9512,7 +9690,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_mask_cvttpd_epu64() {
+    fn test_mm256_mask_cvttpd_epu64() {
         let a = _mm256_set_pd(1., 2., 3., 4.);
         let b = _mm256_set_epi64x(5, 6, 7, 8);
         let r = _mm256_mask_cvttpd_epu64(b, 0b0110, a);
@@ -9521,7 +9699,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_maskz_cvttpd_epu64() {
+    fn test_mm256_maskz_cvttpd_epu64() {
         let a = _mm256_set_pd(1., 2., 3., 4.);
         let r = _mm256_maskz_cvttpd_epu64(0b0110, a);
         let e = _mm256_set_epi64x(0, 2, 3, 0);
@@ -9529,7 +9707,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_cvttpd_epu64() {
+    fn test_mm512_cvttpd_epu64() {
         let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm512_cvttpd_epu64(a);
         let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
@@ -9537,7 +9715,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_mask_cvttpd_epu64() {
+    fn test_mm512_mask_cvttpd_epu64() {
         let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
         let r = _mm512_mask_cvttpd_epu64(b, 0b01101001, a);
@@ -9546,7 +9724,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_maskz_cvttpd_epu64() {
+    fn test_mm512_maskz_cvttpd_epu64() {
         let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm512_maskz_cvttpd_epu64(0b01101001, a);
         let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
@@ -9554,7 +9732,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_cvtt_roundps_epu64() {
+    fn test_mm512_cvtt_roundps_epu64() {
         let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm512_cvtt_roundps_epu64::<_MM_FROUND_NO_EXC>(a);
         let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
@@ -9562,7 +9740,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_mask_cvtt_roundps_epu64() {
+    fn test_mm512_mask_cvtt_roundps_epu64() {
         let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
         let r = _mm512_mask_cvtt_roundps_epu64::<_MM_FROUND_NO_EXC>(b, 0b01101001, a);
@@ -9571,7 +9749,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_maskz_cvtt_roundps_epu64() {
+    fn test_mm512_maskz_cvtt_roundps_epu64() {
         let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm512_maskz_cvtt_roundps_epu64::<_MM_FROUND_NO_EXC>(0b01101001, a);
         let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
@@ -9579,7 +9757,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm_cvttps_epu64() {
+    fn test_mm_cvttps_epu64() {
         let a = _mm_set_ps(1., 2., 3., 4.);
         let r = _mm_cvttps_epu64(a);
         let e = _mm_set_epi64x(3, 4);
@@ -9587,7 +9765,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm_mask_cvttps_epu64() {
+    fn test_mm_mask_cvttps_epu64() {
         let a = _mm_set_ps(1., 2., 3., 4.);
         let b = _mm_set_epi64x(5, 6);
         let r = _mm_mask_cvttps_epu64(b, 0b01, a);
@@ -9596,7 +9774,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm_maskz_cvttps_epu64() {
+    fn test_mm_maskz_cvttps_epu64() {
         let a = _mm_set_ps(1., 2., 3., 4.);
         let r = _mm_maskz_cvttps_epu64(0b01, a);
         let e = _mm_set_epi64x(0, 4);
@@ -9604,7 +9782,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_cvttps_epu64() {
+    fn test_mm256_cvttps_epu64() {
         let a = _mm_set_ps(1., 2., 3., 4.);
         let r = _mm256_cvttps_epu64(a);
         let e = _mm256_set_epi64x(1, 2, 3, 4);
@@ -9612,7 +9790,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_mask_cvttps_epu64() {
+    fn test_mm256_mask_cvttps_epu64() {
         let a = _mm_set_ps(1., 2., 3., 4.);
         let b = _mm256_set_epi64x(5, 6, 7, 8);
         let r = _mm256_mask_cvttps_epu64(b, 0b0110, a);
@@ -9621,7 +9799,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_maskz_cvttps_epu64() {
+    fn test_mm256_maskz_cvttps_epu64() {
         let a = _mm_set_ps(1., 2., 3., 4.);
         let r = _mm256_maskz_cvttps_epu64(0b0110, a);
         let e = _mm256_set_epi64x(0, 2, 3, 0);
@@ -9629,7 +9807,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_cvttps_epu64() {
+    fn test_mm512_cvttps_epu64() {
         let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm512_cvttps_epu64(a);
         let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
@@ -9637,7 +9815,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_mask_cvttps_epu64() {
+    fn test_mm512_mask_cvttps_epu64() {
         let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
         let r = _mm512_mask_cvttps_epu64(b, 0b01101001, a);
@@ -9646,7 +9824,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_maskz_cvttps_epu64() {
+    fn test_mm512_maskz_cvttps_epu64() {
         let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm512_maskz_cvttps_epu64(0b01101001, a);
         let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
@@ -9654,7 +9832,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm_mullo_epi64() {
+    const fn test_mm_mullo_epi64() {
         let a = _mm_set_epi64x(1, 2);
         let b = _mm_set_epi64x(3, 4);
         let r = _mm_mullo_epi64(a, b);
@@ -9663,7 +9841,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm_mask_mullo_epi64() {
+    const fn test_mm_mask_mullo_epi64() {
         let a = _mm_set_epi64x(1, 2);
         let b = _mm_set_epi64x(3, 4);
         let c = _mm_set_epi64x(5, 6);
@@ -9673,7 +9851,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm_maskz_mullo_epi64() {
+    const fn test_mm_maskz_mullo_epi64() {
         let a = _mm_set_epi64x(1, 2);
         let b = _mm_set_epi64x(3, 4);
         let r = _mm_maskz_mullo_epi64(0b01, a, b);
@@ -9682,7 +9860,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_mullo_epi64() {
+    const fn test_mm256_mullo_epi64() {
         let a = _mm256_set_epi64x(1, 2, 3, 4);
         let b = _mm256_set_epi64x(5, 6, 7, 8);
         let r = _mm256_mullo_epi64(a, b);
@@ -9691,7 +9869,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_mask_mullo_epi64() {
+    const fn test_mm256_mask_mullo_epi64() {
         let a = _mm256_set_epi64x(1, 2, 3, 4);
         let b = _mm256_set_epi64x(5, 6, 7, 8);
         let c = _mm256_set_epi64x(9, 10, 11, 12);
@@ -9701,7 +9879,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_maskz_mullo_epi64() {
+    const fn test_mm256_maskz_mullo_epi64() {
         let a = _mm256_set_epi64x(1, 2, 3, 4);
         let b = _mm256_set_epi64x(5, 6, 7, 8);
         let r = _mm256_maskz_mullo_epi64(0b0110, a, b);
@@ -9710,7 +9888,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_mullo_epi64() {
+    const fn test_mm512_mullo_epi64() {
         let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
         let r = _mm512_mullo_epi64(a, b);
@@ -9719,7 +9897,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_mask_mullo_epi64() {
+    const fn test_mm512_mask_mullo_epi64() {
         let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
         let c = _mm512_set_epi64(17, 18, 19, 20, 21, 22, 23, 24);
@@ -9729,7 +9907,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_maskz_mullo_epi64() {
+    const fn test_mm512_maskz_mullo_epi64() {
         let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
         let r = _mm512_maskz_mullo_epi64(0b01101001, a, b);
@@ -9738,7 +9916,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_cvtmask8_u32() {
+    const fn test_cvtmask8_u32() {
         let a: __mmask8 = 0b01101001;
         let r = _cvtmask8_u32(a);
         let e: u32 = 0b01101001;
@@ -9746,7 +9924,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_cvtu32_mask8() {
+    const fn test_cvtu32_mask8() {
         let a: u32 = 0b01101001;
         let r = _cvtu32_mask8(a);
         let e: __mmask8 = 0b01101001;
@@ -9754,7 +9932,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_kadd_mask16() {
+    const fn test_kadd_mask16() {
         let a: __mmask16 = 27549;
         let b: __mmask16 = 23434;
         let r = _kadd_mask16(a, b);
@@ -9763,7 +9941,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_kadd_mask8() {
+    const fn test_kadd_mask8() {
         let a: __mmask8 = 98;
         let b: __mmask8 = 117;
         let r = _kadd_mask8(a, b);
@@ -9772,7 +9950,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_kand_mask8() {
+    const fn test_kand_mask8() {
         let a: __mmask8 = 0b01101001;
         let b: __mmask8 = 0b10110011;
         let r = _kand_mask8(a, b);
@@ -9781,7 +9959,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_kandn_mask8() {
+    const fn test_kandn_mask8() {
         let a: __mmask8 = 0b01101001;
         let b: __mmask8 = 0b10110011;
         let r = _kandn_mask8(a, b);
@@ -9790,7 +9968,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_knot_mask8() {
+    const fn test_knot_mask8() {
         let a: __mmask8 = 0b01101001;
         let r = _knot_mask8(a);
         let e: __mmask8 = 0b10010110;
@@ -9798,7 +9976,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_kor_mask8() {
+    const fn test_kor_mask8() {
         let a: __mmask8 = 0b01101001;
         let b: __mmask8 = 0b10110011;
         let r = _kor_mask8(a, b);
@@ -9807,7 +9985,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_kxnor_mask8() {
+    const fn test_kxnor_mask8() {
         let a: __mmask8 = 0b01101001;
         let b: __mmask8 = 0b10110011;
         let r = _kxnor_mask8(a, b);
@@ -9816,7 +9994,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_kxor_mask8() {
+    const fn test_kxor_mask8() {
         let a: __mmask8 = 0b01101001;
         let b: __mmask8 = 0b10110011;
         let r = _kxor_mask8(a, b);
@@ -9825,17 +10003,17 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_kortest_mask8_u8() {
+    const fn test_kortest_mask8_u8() {
         let a: __mmask8 = 0b01101001;
         let b: __mmask8 = 0b10110110;
         let mut all_ones: u8 = 0;
-        let r = _kortest_mask8_u8(a, b, &mut all_ones);
+        let r = unsafe { _kortest_mask8_u8(a, b, &mut all_ones) };
         assert_eq!(r, 0);
         assert_eq!(all_ones, 1);
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_kortestc_mask8_u8() {
+    const fn test_kortestc_mask8_u8() {
         let a: __mmask8 = 0b01101001;
         let b: __mmask8 = 0b10110110;
         let r = _kortestc_mask8_u8(a, b);
@@ -9843,7 +10021,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_kortestz_mask8_u8() {
+    const fn test_kortestz_mask8_u8() {
         let a: __mmask8 = 0b01101001;
         let b: __mmask8 = 0b10110110;
         let r = _kortestz_mask8_u8(a, b);
@@ -9851,7 +10029,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_kshiftli_mask8() {
+    const fn test_kshiftli_mask8() {
         let a: __mmask8 = 0b01101001;
         let r = _kshiftli_mask8::<3>(a);
         let e: __mmask8 = 0b01001000;
@@ -9871,7 +10049,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_kshiftri_mask8() {
+    const fn test_kshiftri_mask8() {
         let a: __mmask8 = 0b10101001;
         let r = _kshiftri_mask8::<3>(a);
         let e: __mmask8 = 0b00010101;
@@ -9891,17 +10069,17 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_ktest_mask8_u8() {
+    const fn test_ktest_mask8_u8() {
         let a: __mmask8 = 0b01101001;
         let b: __mmask8 = 0b10010110;
         let mut and_not: u8 = 0;
-        let r = _ktest_mask8_u8(a, b, &mut and_not);
+        let r = unsafe { _ktest_mask8_u8(a, b, &mut and_not) };
         assert_eq!(r, 1);
         assert_eq!(and_not, 0);
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_ktestc_mask8_u8() {
+    const fn test_ktestc_mask8_u8() {
         let a: __mmask8 = 0b01101001;
         let b: __mmask8 = 0b10010110;
         let r = _ktestc_mask8_u8(a, b);
@@ -9909,7 +10087,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_ktestz_mask8_u8() {
+    const fn test_ktestz_mask8_u8() {
         let a: __mmask8 = 0b01101001;
         let b: __mmask8 = 0b10010110;
         let r = _ktestz_mask8_u8(a, b);
@@ -9917,17 +10095,17 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_ktest_mask16_u8() {
+    const fn test_ktest_mask16_u8() {
         let a: __mmask16 = 0b0110100100111100;
         let b: __mmask16 = 0b1001011011000011;
         let mut and_not: u8 = 0;
-        let r = _ktest_mask16_u8(a, b, &mut and_not);
+        let r = unsafe { _ktest_mask16_u8(a, b, &mut and_not) };
         assert_eq!(r, 1);
         assert_eq!(and_not, 0);
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_ktestc_mask16_u8() {
+    const fn test_ktestc_mask16_u8() {
         let a: __mmask16 = 0b0110100100111100;
         let b: __mmask16 = 0b1001011011000011;
         let r = _ktestc_mask16_u8(a, b);
@@ -9935,7 +10113,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_ktestz_mask16_u8() {
+    const fn test_ktestz_mask16_u8() {
         let a: __mmask16 = 0b0110100100111100;
         let b: __mmask16 = 0b1001011011000011;
         let r = _ktestz_mask16_u8(a, b);
@@ -9943,24 +10121,26 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_load_mask8() {
+    const fn test_load_mask8() {
         let a: __mmask8 = 0b01101001;
-        let r = _load_mask8(&a);
+        let r = unsafe { _load_mask8(&a) };
         let e: __mmask8 = 0b01101001;
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_store_mask8() {
+    const fn test_store_mask8() {
         let a: __mmask8 = 0b01101001;
         let mut r = 0;
-        _store_mask8(&mut r, a);
+        unsafe {
+            _store_mask8(&mut r, a);
+        }
         let e: __mmask8 = 0b01101001;
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm_movepi32_mask() {
+    const fn test_mm_movepi32_mask() {
         let a = _mm_set_epi32(0, -2, -3, 4);
         let r = _mm_movepi32_mask(a);
         let e = 0b0110;
@@ -9968,7 +10148,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_movepi32_mask() {
+    const fn test_mm256_movepi32_mask() {
         let a = _mm256_set_epi32(0, -2, -3, 4, -5, 6, 7, -8);
         let r = _mm256_movepi32_mask(a);
         let e = 0b01101001;
@@ -9976,7 +10156,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_movepi32_mask() {
+    const fn test_mm512_movepi32_mask() {
         let a = _mm512_set_epi32(
             0, -2, -3, 4, -5, 6, 7, -8, 9, 10, -11, -12, -13, -14, 15, 16,
         );
@@ -9986,7 +10166,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm_movepi64_mask() {
+    const fn test_mm_movepi64_mask() {
         let a = _mm_set_epi64x(0, -2);
         let r = _mm_movepi64_mask(a);
         let e = 0b01;
@@ -9994,7 +10174,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_movepi64_mask() {
+    const fn test_mm256_movepi64_mask() {
         let a = _mm256_set_epi64x(0, -2, -3, 4);
         let r = _mm256_movepi64_mask(a);
         let e = 0b0110;
@@ -10002,7 +10182,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_movepi64_mask() {
+    const fn test_mm512_movepi64_mask() {
         let a = _mm512_set_epi64(0, -2, -3, 4, -5, 6, 7, -8);
         let r = _mm512_movepi64_mask(a);
         let e = 0b01101001;
@@ -10010,7 +10190,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm_movm_epi32() {
+    const fn test_mm_movm_epi32() {
         let a = 0b0110;
         let r = _mm_movm_epi32(a);
         let e = _mm_set_epi32(0, -1, -1, 0);
@@ -10018,7 +10198,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_movm_epi32() {
+    const fn test_mm256_movm_epi32() {
         let a = 0b01101001;
         let r = _mm256_movm_epi32(a);
         let e = _mm256_set_epi32(0, -1, -1, 0, -1, 0, 0, -1);
@@ -10026,7 +10206,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_movm_epi32() {
+    const fn test_mm512_movm_epi32() {
         let a = 0b0110100100111100;
         let r = _mm512_movm_epi32(a);
         let e = _mm512_set_epi32(0, -1, -1, 0, -1, 0, 0, -1, 0, 0, -1, -1, -1, -1, 0, 0);
@@ -10034,7 +10214,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm_movm_epi64() {
+    const fn test_mm_movm_epi64() {
         let a = 0b01;
         let r = _mm_movm_epi64(a);
         let e = _mm_set_epi64x(0, -1);
@@ -10042,7 +10222,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_movm_epi64() {
+    const fn test_mm256_movm_epi64() {
         let a = 0b0110;
         let r = _mm256_movm_epi64(a);
         let e = _mm256_set_epi64x(0, -1, -1, 0);
@@ -10050,7 +10230,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_movm_epi64() {
+    const fn test_mm512_movm_epi64() {
         let a = 0b01101001;
         let r = _mm512_movm_epi64(a);
         let e = _mm512_set_epi64(0, -1, -1, 0, -1, 0, 0, -1);
@@ -10058,7 +10238,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_range_round_pd() {
+    fn test_mm512_range_round_pd() {
         let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         let b = _mm512_set_pd(2., 1., 4., 3., 6., 5., 8., 7.);
         let r = _mm512_range_round_pd::<0b0101, _MM_FROUND_NO_EXC>(a, b);
@@ -10067,7 +10247,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_mask_range_round_pd() {
+    fn test_mm512_mask_range_round_pd() {
         let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         let b = _mm512_set_pd(2., 1., 4., 3., 6., 5., 8., 7.);
         let c = _mm512_set_pd(9., 10., 11., 12., 13., 14., 15., 16.);
@@ -10077,7 +10257,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_maskz_range_round_pd() {
+    fn test_mm512_maskz_range_round_pd() {
         let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         let b = _mm512_set_pd(2., 1., 4., 3., 6., 5., 8., 7.);
         let r = _mm512_maskz_range_round_pd::<0b0101, _MM_FROUND_NO_EXC>(0b01101001, a, b);
@@ -10086,7 +10266,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm_range_pd() {
+    fn test_mm_range_pd() {
         let a = _mm_set_pd(1., 2.);
         let b = _mm_set_pd(2., 1.);
         let r = _mm_range_pd::<0b0101>(a, b);
@@ -10095,7 +10275,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm_mask_range_pd() {
+    fn test_mm_mask_range_pd() {
         let a = _mm_set_pd(1., 2.);
         let b = _mm_set_pd(2., 1.);
         let c = _mm_set_pd(3., 4.);
@@ -10105,7 +10285,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm_maskz_range_pd() {
+    fn test_mm_maskz_range_pd() {
         let a = _mm_set_pd(1., 2.);
         let b = _mm_set_pd(2., 1.);
         let r = _mm_maskz_range_pd::<0b0101>(0b01, a, b);
@@ -10114,7 +10294,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_range_pd() {
+    fn test_mm256_range_pd() {
         let a = _mm256_set_pd(1., 2., 3., 4.);
         let b = _mm256_set_pd(2., 1., 4., 3.);
         let r = _mm256_range_pd::<0b0101>(a, b);
@@ -10123,7 +10303,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_mask_range_pd() {
+    fn test_mm256_mask_range_pd() {
         let a = _mm256_set_pd(1., 2., 3., 4.);
         let b = _mm256_set_pd(2., 1., 4., 3.);
         let c = _mm256_set_pd(5., 6., 7., 8.);
@@ -10133,7 +10313,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_maskz_range_pd() {
+    fn test_mm256_maskz_range_pd() {
         let a = _mm256_set_pd(1., 2., 3., 4.);
         let b = _mm256_set_pd(2., 1., 4., 3.);
         let r = _mm256_maskz_range_pd::<0b0101>(0b0110, a, b);
@@ -10142,7 +10322,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_range_pd() {
+    fn test_mm512_range_pd() {
         let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         let b = _mm512_set_pd(2., 1., 4., 3., 6., 5., 8., 7.);
         let r = _mm512_range_pd::<0b0101>(a, b);
@@ -10151,7 +10331,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_mask_range_pd() {
+    fn test_mm512_mask_range_pd() {
         let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         let b = _mm512_set_pd(2., 1., 4., 3., 6., 5., 8., 7.);
         let c = _mm512_set_pd(9., 10., 11., 12., 13., 14., 15., 16.);
@@ -10161,7 +10341,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_maskz_range_pd() {
+    fn test_mm512_maskz_range_pd() {
         let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         let b = _mm512_set_pd(2., 1., 4., 3., 6., 5., 8., 7.);
         let r = _mm512_maskz_range_pd::<0b0101>(0b01101001, a, b);
@@ -10170,7 +10350,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_range_round_ps() {
+    fn test_mm512_range_round_ps() {
         let a = _mm512_set_ps(
             1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
         );
@@ -10185,7 +10365,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_mask_range_round_ps() {
+    fn test_mm512_mask_range_round_ps() {
         let a = _mm512_set_ps(
             1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
         );
@@ -10204,7 +10384,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_maskz_range_round_ps() {
+    fn test_mm512_maskz_range_round_ps() {
         let a = _mm512_set_ps(
             1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
         );
@@ -10219,7 +10399,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm_range_ps() {
+    fn test_mm_range_ps() {
         let a = _mm_set_ps(1., 2., 3., 4.);
         let b = _mm_set_ps(2., 1., 4., 3.);
         let r = _mm_range_ps::<0b0101>(a, b);
@@ -10228,7 +10408,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm_mask_range_ps() {
+    fn test_mm_mask_range_ps() {
         let a = _mm_set_ps(1., 2., 3., 4.);
         let b = _mm_set_ps(2., 1., 4., 3.);
         let c = _mm_set_ps(5., 6., 7., 8.);
@@ -10238,7 +10418,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm_maskz_range_ps() {
+    fn test_mm_maskz_range_ps() {
         let a = _mm_set_ps(1., 2., 3., 4.);
         let b = _mm_set_ps(2., 1., 4., 3.);
         let r = _mm_maskz_range_ps::<0b0101>(0b0110, a, b);
@@ -10247,7 +10427,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_range_ps() {
+    fn test_mm256_range_ps() {
         let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         let b = _mm256_set_ps(2., 1., 4., 3., 6., 5., 8., 7.);
         let r = _mm256_range_ps::<0b0101>(a, b);
@@ -10256,7 +10436,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_mask_range_ps() {
+    fn test_mm256_mask_range_ps() {
         let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         let b = _mm256_set_ps(2., 1., 4., 3., 6., 5., 8., 7.);
         let c = _mm256_set_ps(9., 10., 11., 12., 13., 14., 15., 16.);
@@ -10266,7 +10446,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_maskz_range_ps() {
+    fn test_mm256_maskz_range_ps() {
         let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         let b = _mm256_set_ps(2., 1., 4., 3., 6., 5., 8., 7.);
         let r = _mm256_maskz_range_ps::<0b0101>(0b01101001, a, b);
@@ -10275,7 +10455,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_range_ps() {
+    fn test_mm512_range_ps() {
         let a = _mm512_set_ps(
             1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
         );
@@ -10290,7 +10470,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_mask_range_ps() {
+    fn test_mm512_mask_range_ps() {
         let a = _mm512_set_ps(
             1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
         );
@@ -10308,7 +10488,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_maskz_range_ps() {
+    fn test_mm512_maskz_range_ps() {
         let a = _mm512_set_ps(
             1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
         );
@@ -10323,7 +10503,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm_range_round_sd() {
+    fn test_mm_range_round_sd() {
         let a = _mm_set_sd(1.);
         let b = _mm_set_sd(2.);
         let r = _mm_range_round_sd::<0b0101, _MM_FROUND_NO_EXC>(a, b);
@@ -10332,7 +10512,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm_mask_range_round_sd() {
+    fn test_mm_mask_range_round_sd() {
         let a = _mm_set_sd(1.);
         let b = _mm_set_sd(2.);
         let c = _mm_set_sd(3.);
@@ -10342,7 +10522,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm_maskz_range_round_sd() {
+    fn test_mm_maskz_range_round_sd() {
         let a = _mm_set_sd(1.);
         let b = _mm_set_sd(2.);
         let r = _mm_maskz_range_round_sd::<0b0101, _MM_FROUND_NO_EXC>(0b0, a, b);
@@ -10351,7 +10531,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm_mask_range_sd() {
+    fn test_mm_mask_range_sd() {
         let a = _mm_set_sd(1.);
         let b = _mm_set_sd(2.);
         let c = _mm_set_sd(3.);
@@ -10361,7 +10541,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm_maskz_range_sd() {
+    fn test_mm_maskz_range_sd() {
         let a = _mm_set_sd(1.);
         let b = _mm_set_sd(2.);
         let r = _mm_maskz_range_sd::<0b0101>(0b0, a, b);
@@ -10370,7 +10550,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm_range_round_ss() {
+    fn test_mm_range_round_ss() {
         let a = _mm_set_ss(1.);
         let b = _mm_set_ss(2.);
         let r = _mm_range_round_ss::<0b0101, _MM_FROUND_NO_EXC>(a, b);
@@ -10379,7 +10559,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm_mask_range_round_ss() {
+    fn test_mm_mask_range_round_ss() {
         let a = _mm_set_ss(1.);
         let b = _mm_set_ss(2.);
         let c = _mm_set_ss(3.);
@@ -10389,7 +10569,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm_maskz_range_round_ss() {
+    fn test_mm_maskz_range_round_ss() {
         let a = _mm_set_ss(1.);
         let b = _mm_set_ss(2.);
         let r = _mm_maskz_range_round_ss::<0b0101, _MM_FROUND_NO_EXC>(0b0, a, b);
@@ -10398,7 +10578,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm_mask_range_ss() {
+    fn test_mm_mask_range_ss() {
         let a = _mm_set_ss(1.);
         let b = _mm_set_ss(2.);
         let c = _mm_set_ss(3.);
@@ -10408,7 +10588,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm_maskz_range_ss() {
+    fn test_mm_maskz_range_ss() {
         let a = _mm_set_ss(1.);
         let b = _mm_set_ss(2.);
         let r = _mm_maskz_range_ss::<0b0101>(0b0, a, b);
@@ -10417,7 +10597,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_reduce_round_pd() {
+    fn test_mm512_reduce_round_pd() {
         let a = _mm512_set_pd(0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0);
         let r = _mm512_reduce_round_pd::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(a);
         let e = _mm512_set_pd(0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0.);
@@ -10425,7 +10605,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_mask_reduce_round_pd() {
+    fn test_mm512_mask_reduce_round_pd() {
         let a = _mm512_set_pd(0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0);
         let src = _mm512_set_pd(3., 4., 5., 6., 7., 8., 9., 10.);
         let r = _mm512_mask_reduce_round_pd::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(
@@ -10436,7 +10616,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_maskz_reduce_round_pd() {
+    fn test_mm512_maskz_reduce_round_pd() {
         let a = _mm512_set_pd(0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0);
         let r = _mm512_maskz_reduce_round_pd::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(
             0b01101001, a,
@@ -10446,7 +10626,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm_reduce_pd() {
+    fn test_mm_reduce_pd() {
         let a = _mm_set_pd(0.25, 0.50);
         let r = _mm_reduce_pd::<{ 16 | _MM_FROUND_TO_ZERO }>(a);
         let e = _mm_set_pd(0.25, 0.);
@@ -10454,7 +10634,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm_mask_reduce_pd() {
+    fn test_mm_mask_reduce_pd() {
         let a = _mm_set_pd(0.25, 0.50);
         let src = _mm_set_pd(3., 4.);
         let r = _mm_mask_reduce_pd::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 0b01, a);
@@ -10463,7 +10643,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm_maskz_reduce_pd() {
+    fn test_mm_maskz_reduce_pd() {
         let a = _mm_set_pd(0.25, 0.50);
         let r = _mm_maskz_reduce_pd::<{ 16 | _MM_FROUND_TO_ZERO }>(0b01, a);
         let e = _mm_set_pd(0., 0.);
@@ -10471,7 +10651,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_reduce_pd() {
+    fn test_mm256_reduce_pd() {
         let a = _mm256_set_pd(0.25, 0.50, 0.75, 1.0);
         let r = _mm256_reduce_pd::<{ 16 | _MM_FROUND_TO_ZERO }>(a);
         let e = _mm256_set_pd(0.25, 0., 0.25, 0.);
@@ -10479,7 +10659,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_mask_reduce_pd() {
+    fn test_mm256_mask_reduce_pd() {
         let a = _mm256_set_pd(0.25, 0.50, 0.75, 1.0);
         let src = _mm256_set_pd(3., 4., 5., 6.);
         let r = _mm256_mask_reduce_pd::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 0b0110, a);
@@ -10488,7 +10668,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_maskz_reduce_pd() {
+    fn test_mm256_maskz_reduce_pd() {
         let a = _mm256_set_pd(0.25, 0.50, 0.75, 1.0);
         let r = _mm256_maskz_reduce_pd::<{ 16 | _MM_FROUND_TO_ZERO }>(0b0110, a);
         let e = _mm256_set_pd(0., 0., 0.25, 0.);
@@ -10496,7 +10676,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_reduce_pd() {
+    fn test_mm512_reduce_pd() {
         let a = _mm512_set_pd(0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0);
         let r = _mm512_reduce_pd::<{ 16 | _MM_FROUND_TO_ZERO }>(a);
         let e = _mm512_set_pd(0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0.);
@@ -10504,7 +10684,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_mask_reduce_pd() {
+    fn test_mm512_mask_reduce_pd() {
         let a = _mm512_set_pd(0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0);
         let src = _mm512_set_pd(3., 4., 5., 6., 7., 8., 9., 10.);
         let r = _mm512_mask_reduce_pd::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 0b01101001, a);
@@ -10513,7 +10693,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_maskz_reduce_pd() {
+    fn test_mm512_maskz_reduce_pd() {
         let a = _mm512_set_pd(0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0);
         let r = _mm512_maskz_reduce_pd::<{ 16 | _MM_FROUND_TO_ZERO }>(0b01101001, a);
         let e = _mm512_set_pd(0., 0., 0.25, 0., 0.25, 0., 0., 0.);
@@ -10521,7 +10701,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_reduce_round_ps() {
+    fn test_mm512_reduce_round_ps() {
         let a = _mm512_set_ps(
             0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0, 2.25, 2.50, 2.75, 3.0, 3.25, 3.50, 3.75,
             4.0,
@@ -10534,7 +10714,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_mask_reduce_round_ps() {
+    fn test_mm512_mask_reduce_round_ps() {
         let a = _mm512_set_ps(
             0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0, 2.25, 2.50, 2.75, 3.0, 3.25, 3.50, 3.75,
             4.0,
@@ -10554,7 +10734,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_maskz_reduce_round_ps() {
+    fn test_mm512_maskz_reduce_round_ps() {
         let a = _mm512_set_ps(
             0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0, 2.25, 2.50, 2.75, 3.0, 3.25, 3.50, 3.75,
             4.0,
@@ -10570,7 +10750,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm_reduce_ps() {
+    fn test_mm_reduce_ps() {
         let a = _mm_set_ps(0.25, 0.50, 0.75, 1.0);
         let r = _mm_reduce_ps::<{ 16 | _MM_FROUND_TO_ZERO }>(a);
         let e = _mm_set_ps(0.25, 0., 0.25, 0.);
@@ -10578,7 +10758,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm_mask_reduce_ps() {
+    fn test_mm_mask_reduce_ps() {
         let a = _mm_set_ps(0.25, 0.50, 0.75, 1.0);
         let src = _mm_set_ps(2., 3., 4., 5.);
         let r = _mm_mask_reduce_ps::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 0b0110, a);
@@ -10587,7 +10767,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm_maskz_reduce_ps() {
+    fn test_mm_maskz_reduce_ps() {
         let a = _mm_set_ps(0.25, 0.50, 0.75, 1.0);
         let r = _mm_maskz_reduce_ps::<{ 16 | _MM_FROUND_TO_ZERO }>(0b0110, a);
         let e = _mm_set_ps(0., 0., 0.25, 0.);
@@ -10595,7 +10775,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_reduce_ps() {
+    fn test_mm256_reduce_ps() {
         let a = _mm256_set_ps(0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0);
         let r = _mm256_reduce_ps::<{ 16 | _MM_FROUND_TO_ZERO }>(a);
         let e = _mm256_set_ps(0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0.);
@@ -10603,7 +10783,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_mask_reduce_ps() {
+    fn test_mm256_mask_reduce_ps() {
         let a = _mm256_set_ps(0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0);
         let src = _mm256_set_ps(3., 4., 5., 6., 7., 8., 9., 10.);
         let r = _mm256_mask_reduce_ps::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 0b01101001, a);
@@ -10612,7 +10792,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_maskz_reduce_ps() {
+    fn test_mm256_maskz_reduce_ps() {
         let a = _mm256_set_ps(0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0);
         let r = _mm256_maskz_reduce_ps::<{ 16 | _MM_FROUND_TO_ZERO }>(0b01101001, a);
         let e = _mm256_set_ps(0., 0., 0.25, 0., 0.25, 0., 0., 0.);
@@ -10620,7 +10800,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_reduce_ps() {
+    fn test_mm512_reduce_ps() {
         let a = _mm512_set_ps(
             0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0, 2.25, 2.50, 2.75, 3.0, 3.25, 3.50, 3.75,
             4.0,
@@ -10633,7 +10813,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_mask_reduce_ps() {
+    fn test_mm512_mask_reduce_ps() {
         let a = _mm512_set_ps(
             0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0, 2.25, 2.50, 2.75, 3.0, 3.25, 3.50, 3.75,
             4.0,
@@ -10649,7 +10829,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_maskz_reduce_ps() {
+    fn test_mm512_maskz_reduce_ps() {
         let a = _mm512_set_ps(
             0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0, 2.25, 2.50, 2.75, 3.0, 3.25, 3.50, 3.75,
             4.0,
@@ -10662,7 +10842,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm_reduce_round_sd() {
+    fn test_mm_reduce_round_sd() {
         let a = _mm_set_pd(1., 2.);
         let b = _mm_set_sd(0.25);
         let r = _mm_reduce_round_sd::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(a, b);
@@ -10671,7 +10851,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm_mask_reduce_round_sd() {
+    fn test_mm_mask_reduce_round_sd() {
         let a = _mm_set_pd(1., 2.);
         let b = _mm_set_sd(0.25);
         let c = _mm_set_pd(3., 4.);
@@ -10683,7 +10863,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm_maskz_reduce_round_sd() {
+    fn test_mm_maskz_reduce_round_sd() {
         let a = _mm_set_pd(1., 2.);
         let b = _mm_set_sd(0.25);
         let r =
@@ -10693,7 +10873,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm_reduce_sd() {
+    fn test_mm_reduce_sd() {
         let a = _mm_set_pd(1., 2.);
         let b = _mm_set_sd(0.25);
         let r = _mm_reduce_sd::<{ 16 | _MM_FROUND_TO_ZERO }>(a, b);
@@ -10702,7 +10882,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm_mask_reduce_sd() {
+    fn test_mm_mask_reduce_sd() {
         let a = _mm_set_pd(1., 2.);
         let b = _mm_set_sd(0.25);
         let c = _mm_set_pd(3., 4.);
@@ -10712,7 +10892,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm_maskz_reduce_sd() {
+    fn test_mm_maskz_reduce_sd() {
         let a = _mm_set_pd(1., 2.);
         let b = _mm_set_sd(0.25);
         let r = _mm_maskz_reduce_sd::<{ 16 | _MM_FROUND_TO_ZERO }>(0b0, a, b);
@@ -10721,7 +10901,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm_reduce_round_ss() {
+    fn test_mm_reduce_round_ss() {
         let a = _mm_set_ps(1., 2., 3., 4.);
         let b = _mm_set_ss(0.25);
         let r = _mm_reduce_round_ss::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(a, b);
@@ -10730,7 +10910,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm_mask_reduce_round_ss() {
+    fn test_mm_mask_reduce_round_ss() {
         let a = _mm_set_ps(1., 2., 3., 4.);
         let b = _mm_set_ss(0.25);
         let c = _mm_set_ps(5., 6., 7., 8.);
@@ -10742,7 +10922,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm_maskz_reduce_round_ss() {
+    fn test_mm_maskz_reduce_round_ss() {
         let a = _mm_set_ps(1., 2., 3., 4.);
         let b = _mm_set_ss(0.25);
         let r =
@@ -10752,7 +10932,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm_reduce_ss() {
+    fn test_mm_reduce_ss() {
         let a = _mm_set_ps(1., 2., 3., 4.);
         let b = _mm_set_ss(0.25);
         let r = _mm_reduce_ss::<{ 16 | _MM_FROUND_TO_ZERO }>(a, b);
@@ -10761,7 +10941,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm_mask_reduce_ss() {
+    fn test_mm_mask_reduce_ss() {
         let a = _mm_set_ps(1., 2., 3., 4.);
         let b = _mm_set_ss(0.25);
         let c = _mm_set_ps(5., 6., 7., 8.);
@@ -10771,7 +10951,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm_maskz_reduce_ss() {
+    fn test_mm_maskz_reduce_ss() {
         let a = _mm_set_ps(1., 2., 3., 4.);
         let b = _mm_set_ss(0.25);
         let r = _mm_maskz_reduce_ss::<{ 16 | _MM_FROUND_TO_ZERO }>(0b0, a, b);
@@ -10780,7 +10960,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm_fpclass_pd_mask() {
+    fn test_mm_fpclass_pd_mask() {
         let a = _mm_set_pd(1., f64::INFINITY);
         let r = _mm_fpclass_pd_mask::<0x18>(a);
         let e = 0b01;
@@ -10788,7 +10968,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm_mask_fpclass_pd_mask() {
+    fn test_mm_mask_fpclass_pd_mask() {
         let a = _mm_set_pd(1., f64::INFINITY);
         let r = _mm_mask_fpclass_pd_mask::<0x18>(0b10, a);
         let e = 0b00;
@@ -10796,7 +10976,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_fpclass_pd_mask() {
+    fn test_mm256_fpclass_pd_mask() {
         let a = _mm256_set_pd(1., f64::INFINITY, f64::NEG_INFINITY, 0.0);
         let r = _mm256_fpclass_pd_mask::<0x18>(a);
         let e = 0b0110;
@@ -10804,7 +10984,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_mask_fpclass_pd_mask() {
+    fn test_mm256_mask_fpclass_pd_mask() {
         let a = _mm256_set_pd(1., f64::INFINITY, f64::NEG_INFINITY, 0.0);
         let r = _mm256_mask_fpclass_pd_mask::<0x18>(0b1010, a);
         let e = 0b0010;
@@ -10812,7 +10992,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_fpclass_pd_mask() {
+    fn test_mm512_fpclass_pd_mask() {
         let a = _mm512_set_pd(
             1.,
             f64::INFINITY,
@@ -10829,7 +11009,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_mask_fpclass_pd_mask() {
+    fn test_mm512_mask_fpclass_pd_mask() {
         let a = _mm512_set_pd(
             1.,
             f64::INFINITY,
@@ -10846,7 +11026,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm_fpclass_ps_mask() {
+    fn test_mm_fpclass_ps_mask() {
         let a = _mm_set_ps(1., f32::INFINITY, f32::NEG_INFINITY, 0.0);
         let r = _mm_fpclass_ps_mask::<0x18>(a);
         let e = 0b0110;
@@ -10854,7 +11034,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm_mask_fpclass_ps_mask() {
+    fn test_mm_mask_fpclass_ps_mask() {
         let a = _mm_set_ps(1., f32::INFINITY, f32::NEG_INFINITY, 0.0);
         let r = _mm_mask_fpclass_ps_mask::<0x18>(0b1010, a);
         let e = 0b0010;
@@ -10862,7 +11042,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_fpclass_ps_mask() {
+    fn test_mm256_fpclass_ps_mask() {
         let a = _mm256_set_ps(
             1.,
             f32::INFINITY,
@@ -10879,7 +11059,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq,avx512vl")]
-    unsafe fn test_mm256_mask_fpclass_ps_mask() {
+    fn test_mm256_mask_fpclass_ps_mask() {
         let a = _mm256_set_ps(
             1.,
             f32::INFINITY,
@@ -10896,7 +11076,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_fpclass_ps_mask() {
+    fn test_mm512_fpclass_ps_mask() {
         let a = _mm512_set_ps(
             1.,
             f32::INFINITY,
@@ -10921,7 +11101,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm512_mask_fpclass_ps_mask() {
+    fn test_mm512_mask_fpclass_ps_mask() {
         let a = _mm512_set_ps(
             1.,
             f32::INFINITY,
@@ -10946,7 +11126,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm_fpclass_sd_mask() {
+    fn test_mm_fpclass_sd_mask() {
         let a = _mm_set_pd(1., f64::INFINITY);
         let r = _mm_fpclass_sd_mask::<0x18>(a);
         let e = 0b1;
@@ -10954,7 +11134,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm_mask_fpclass_sd_mask() {
+    fn test_mm_mask_fpclass_sd_mask() {
         let a = _mm_set_sd(f64::INFINITY);
         let r = _mm_mask_fpclass_sd_mask::<0x18>(0b0, a);
         let e = 0b0;
@@ -10962,7 +11142,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm_fpclass_ss_mask() {
+    fn test_mm_fpclass_ss_mask() {
         let a = _mm_set_ss(f32::INFINITY);
         let r = _mm_fpclass_ss_mask::<0x18>(a);
         let e = 0b1;
@@ -10970,7 +11150,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512dq")]
-    unsafe fn test_mm_mask_fpclass_ss_mask() {
+    fn test_mm_mask_fpclass_ss_mask() {
         let a = _mm_set_ss(f32::INFINITY);
         let r = _mm_mask_fpclass_ss_mask::<0x18>(0b0, a);
         let e = 0b0;
diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs
index b60df7dbc9..66ea63b674 100644
--- a/crates/core_arch/src/x86/avx512f.rs
+++ b/crates/core_arch/src/x86/avx512f.rs
@@ -17,7 +17,8 @@ use stdarch_test::assert_instr;
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpabsd))]
-pub fn _mm512_abs_epi32(a: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_abs_epi32(a: __m512i) -> __m512i {
     unsafe {
         let a = a.as_i32x16();
         let r = simd_select::<i32x16, _>(simd_lt(a, i32x16::ZERO), simd_neg(a), a);
@@ -34,7 +35,8 @@ pub fn _mm512_abs_epi32(a: __m512i) -> __m512i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpabsd))]
-pub fn _mm512_mask_abs_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_abs_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
     unsafe {
         let abs = _mm512_abs_epi32(a).as_i32x16();
         transmute(simd_select_bitmask(k, abs, src.as_i32x16()))
@@ -50,7 +52,8 @@ pub fn _mm512_mask_abs_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpabsd))]
-pub fn _mm512_maskz_abs_epi32(k: __mmask16, a: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_abs_epi32(k: __mmask16, a: __m512i) -> __m512i {
     unsafe {
         let abs = _mm512_abs_epi32(a).as_i32x16();
         transmute(simd_select_bitmask(k, abs, i32x16::ZERO))
@@ -64,7 +67,8 @@ pub fn _mm512_maskz_abs_epi32(k: __mmask16, a: __m512i) -> __m512i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpabsd))]
-pub fn _mm256_mask_abs_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_abs_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
     unsafe {
         let abs = _mm256_abs_epi32(a).as_i32x8();
         transmute(simd_select_bitmask(k, abs, src.as_i32x8()))
@@ -78,7 +82,8 @@ pub fn _mm256_mask_abs_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpabsd))]
-pub fn _mm256_maskz_abs_epi32(k: __mmask8, a: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_abs_epi32(k: __mmask8, a: __m256i) -> __m256i {
     unsafe {
         let abs = _mm256_abs_epi32(a).as_i32x8();
         transmute(simd_select_bitmask(k, abs, i32x8::ZERO))
@@ -92,7 +97,8 @@ pub fn _mm256_maskz_abs_epi32(k: __mmask8, a: __m256i) -> __m256i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpabsd))]
-pub fn _mm_mask_abs_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_abs_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
     unsafe {
         let abs = _mm_abs_epi32(a).as_i32x4();
         transmute(simd_select_bitmask(k, abs, src.as_i32x4()))
@@ -106,7 +112,8 @@ pub fn _mm_mask_abs_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpabsd))]
-pub fn _mm_maskz_abs_epi32(k: __mmask8, a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_abs_epi32(k: __mmask8, a: __m128i) -> __m128i {
     unsafe {
         let abs = _mm_abs_epi32(a).as_i32x4();
         transmute(simd_select_bitmask(k, abs, i32x4::ZERO))
@@ -120,7 +127,8 @@ pub fn _mm_maskz_abs_epi32(k: __mmask8, a: __m128i) -> __m128i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpabsq))]
-pub fn _mm512_abs_epi64(a: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_abs_epi64(a: __m512i) -> __m512i {
     unsafe {
         let a = a.as_i64x8();
         let r = simd_select::<i64x8, _>(simd_lt(a, i64x8::ZERO), simd_neg(a), a);
@@ -135,7 +143,8 @@ pub fn _mm512_abs_epi64(a: __m512i) -> __m512i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpabsq))]
-pub fn _mm512_mask_abs_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_abs_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
     unsafe {
         let abs = _mm512_abs_epi64(a).as_i64x8();
         transmute(simd_select_bitmask(k, abs, src.as_i64x8()))
@@ -149,7 +158,8 @@ pub fn _mm512_mask_abs_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpabsq))]
-pub fn _mm512_maskz_abs_epi64(k: __mmask8, a: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_abs_epi64(k: __mmask8, a: __m512i) -> __m512i {
     unsafe {
         let abs = _mm512_abs_epi64(a).as_i64x8();
         transmute(simd_select_bitmask(k, abs, i64x8::ZERO))
@@ -163,7 +173,8 @@ pub fn _mm512_maskz_abs_epi64(k: __mmask8, a: __m512i) -> __m512i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpabsq))]
-pub fn _mm256_abs_epi64(a: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_abs_epi64(a: __m256i) -> __m256i {
     unsafe {
         let a = a.as_i64x4();
         let r = simd_select::<i64x4, _>(simd_lt(a, i64x4::ZERO), simd_neg(a), a);
@@ -178,7 +189,8 @@ pub fn _mm256_abs_epi64(a: __m256i) -> __m256i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpabsq))]
-pub fn _mm256_mask_abs_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_abs_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
     unsafe {
         let abs = _mm256_abs_epi64(a).as_i64x4();
         transmute(simd_select_bitmask(k, abs, src.as_i64x4()))
@@ -192,7 +204,8 @@ pub fn _mm256_mask_abs_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpabsq))]
-pub fn _mm256_maskz_abs_epi64(k: __mmask8, a: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_abs_epi64(k: __mmask8, a: __m256i) -> __m256i {
     unsafe {
         let abs = _mm256_abs_epi64(a).as_i64x4();
         transmute(simd_select_bitmask(k, abs, i64x4::ZERO))
@@ -206,7 +219,8 @@ pub fn _mm256_maskz_abs_epi64(k: __mmask8, a: __m256i) -> __m256i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpabsq))]
-pub fn _mm_abs_epi64(a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_abs_epi64(a: __m128i) -> __m128i {
     unsafe {
         let a = a.as_i64x2();
         let r = simd_select::<i64x2, _>(simd_lt(a, i64x2::ZERO), simd_neg(a), a);
@@ -221,7 +235,8 @@ pub fn _mm_abs_epi64(a: __m128i) -> __m128i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpabsq))]
-pub fn _mm_mask_abs_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_abs_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
     unsafe {
         let abs = _mm_abs_epi64(a).as_i64x2();
         transmute(simd_select_bitmask(k, abs, src.as_i64x2()))
@@ -235,7 +250,8 @@ pub fn _mm_mask_abs_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpabsq))]
-pub fn _mm_maskz_abs_epi64(k: __mmask8, a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_abs_epi64(k: __mmask8, a: __m128i) -> __m128i {
     unsafe {
         let abs = _mm_abs_epi64(a).as_i64x2();
         transmute(simd_select_bitmask(k, abs, i64x2::ZERO))
@@ -249,7 +265,8 @@ pub fn _mm_maskz_abs_epi64(k: __mmask8, a: __m128i) -> __m128i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpandd))]
-pub fn _mm512_abs_ps(v2: __m512) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_abs_ps(v2: __m512) -> __m512 {
     unsafe { simd_fabs(v2) }
 }
 
@@ -260,7 +277,8 @@ pub fn _mm512_abs_ps(v2: __m512) -> __m512 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpandd))]
-pub fn _mm512_mask_abs_ps(src: __m512, k: __mmask16, v2: __m512) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_abs_ps(src: __m512, k: __mmask16, v2: __m512) -> __m512 {
     unsafe { simd_select_bitmask(k, simd_fabs(v2), src) }
 }
 
@@ -271,7 +289,8 @@ pub fn _mm512_mask_abs_ps(src: __m512, k: __mmask16, v2: __m512) -> __m512 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpandq))]
-pub fn _mm512_abs_pd(v2: __m512d) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_abs_pd(v2: __m512d) -> __m512d {
     unsafe { simd_fabs(v2) }
 }
 
@@ -282,7 +301,8 @@ pub fn _mm512_abs_pd(v2: __m512d) -> __m512d {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpandq))]
-pub fn _mm512_mask_abs_pd(src: __m512d, k: __mmask8, v2: __m512d) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_abs_pd(src: __m512d, k: __mmask8, v2: __m512d) -> __m512d {
     unsafe { simd_select_bitmask(k, simd_fabs(v2), src) }
 }
 
@@ -293,7 +313,8 @@ pub fn _mm512_mask_abs_pd(src: __m512d, k: __mmask8, v2: __m512d) -> __m512d {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovdqa32))]
-pub fn _mm512_mask_mov_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_mov_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
     unsafe {
         let mov = a.as_i32x16();
         transmute(simd_select_bitmask(k, mov, src.as_i32x16()))
@@ -307,7 +328,8 @@ pub fn _mm512_mask_mov_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovdqa32))]
-pub fn _mm512_maskz_mov_epi32(k: __mmask16, a: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_mov_epi32(k: __mmask16, a: __m512i) -> __m512i {
     unsafe {
         let mov = a.as_i32x16();
         transmute(simd_select_bitmask(k, mov, i32x16::ZERO))
@@ -321,7 +343,8 @@ pub fn _mm512_maskz_mov_epi32(k: __mmask16, a: __m512i) -> __m512i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovdqa32))]
-pub fn _mm256_mask_mov_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_mov_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
     unsafe {
         let mov = a.as_i32x8();
         transmute(simd_select_bitmask(k, mov, src.as_i32x8()))
@@ -335,7 +358,8 @@ pub fn _mm256_mask_mov_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovdqa32))]
-pub fn _mm256_maskz_mov_epi32(k: __mmask8, a: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_mov_epi32(k: __mmask8, a: __m256i) -> __m256i {
     unsafe {
         let mov = a.as_i32x8();
         transmute(simd_select_bitmask(k, mov, i32x8::ZERO))
@@ -349,7 +373,8 @@ pub fn _mm256_maskz_mov_epi32(k: __mmask8, a: __m256i) -> __m256i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovdqa32))]
-pub fn _mm_mask_mov_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_mov_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
     unsafe {
         let mov = a.as_i32x4();
         transmute(simd_select_bitmask(k, mov, src.as_i32x4()))
@@ -363,7 +388,8 @@ pub fn _mm_mask_mov_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovdqa32))]
-pub fn _mm_maskz_mov_epi32(k: __mmask8, a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_mov_epi32(k: __mmask8, a: __m128i) -> __m128i {
     unsafe {
         let mov = a.as_i32x4();
         transmute(simd_select_bitmask(k, mov, i32x4::ZERO))
@@ -377,7 +403,8 @@ pub fn _mm_maskz_mov_epi32(k: __mmask8, a: __m128i) -> __m128i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovdqa64))]
-pub fn _mm512_mask_mov_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_mov_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
     unsafe {
         let mov = a.as_i64x8();
         transmute(simd_select_bitmask(k, mov, src.as_i64x8()))
@@ -391,7 +418,8 @@ pub fn _mm512_mask_mov_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovdqa64))]
-pub fn _mm512_maskz_mov_epi64(k: __mmask8, a: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_mov_epi64(k: __mmask8, a: __m512i) -> __m512i {
     unsafe {
         let mov = a.as_i64x8();
         transmute(simd_select_bitmask(k, mov, i64x8::ZERO))
@@ -405,7 +433,8 @@ pub fn _mm512_maskz_mov_epi64(k: __mmask8, a: __m512i) -> __m512i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovdqa64))]
-pub fn _mm256_mask_mov_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_mov_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
     unsafe {
         let mov = a.as_i64x4();
         transmute(simd_select_bitmask(k, mov, src.as_i64x4()))
@@ -419,7 +448,8 @@ pub fn _mm256_mask_mov_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovdqa64))]
-pub fn _mm256_maskz_mov_epi64(k: __mmask8, a: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_mov_epi64(k: __mmask8, a: __m256i) -> __m256i {
     unsafe {
         let mov = a.as_i64x4();
         transmute(simd_select_bitmask(k, mov, i64x4::ZERO))
@@ -433,7 +463,8 @@ pub fn _mm256_maskz_mov_epi64(k: __mmask8, a: __m256i) -> __m256i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovdqa64))]
-pub fn _mm_mask_mov_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_mov_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
     unsafe {
         let mov = a.as_i64x2();
         transmute(simd_select_bitmask(k, mov, src.as_i64x2()))
@@ -447,7 +478,8 @@ pub fn _mm_mask_mov_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovdqa64))]
-pub fn _mm_maskz_mov_epi64(k: __mmask8, a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_mov_epi64(k: __mmask8, a: __m128i) -> __m128i {
     unsafe {
         let mov = a.as_i64x2();
         transmute(simd_select_bitmask(k, mov, i64x2::ZERO))
@@ -461,7 +493,8 @@ pub fn _mm_maskz_mov_epi64(k: __mmask8, a: __m128i) -> __m128i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovaps))]
-pub fn _mm512_mask_mov_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_mov_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
     unsafe {
         let mov = a.as_f32x16();
         transmute(simd_select_bitmask(k, mov, src.as_f32x16()))
@@ -475,7 +508,8 @@ pub fn _mm512_mask_mov_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovaps))]
-pub fn _mm512_maskz_mov_ps(k: __mmask16, a: __m512) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_mov_ps(k: __mmask16, a: __m512) -> __m512 {
     unsafe {
         let mov = a.as_f32x16();
         transmute(simd_select_bitmask(k, mov, f32x16::ZERO))
@@ -489,7 +523,8 @@ pub fn _mm512_maskz_mov_ps(k: __mmask16, a: __m512) -> __m512 {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovaps))]
-pub fn _mm256_mask_mov_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_mov_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
     unsafe {
         let mov = a.as_f32x8();
         transmute(simd_select_bitmask(k, mov, src.as_f32x8()))
@@ -503,7 +538,8 @@ pub fn _mm256_mask_mov_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovaps))]
-pub fn _mm256_maskz_mov_ps(k: __mmask8, a: __m256) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_mov_ps(k: __mmask8, a: __m256) -> __m256 {
     unsafe {
         let mov = a.as_f32x8();
         transmute(simd_select_bitmask(k, mov, f32x8::ZERO))
@@ -517,7 +553,8 @@ pub fn _mm256_maskz_mov_ps(k: __mmask8, a: __m256) -> __m256 {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovaps))]
-pub fn _mm_mask_mov_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_mov_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
     unsafe {
         let mov = a.as_f32x4();
         transmute(simd_select_bitmask(k, mov, src.as_f32x4()))
@@ -531,7 +568,8 @@ pub fn _mm_mask_mov_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovaps))]
-pub fn _mm_maskz_mov_ps(k: __mmask8, a: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_mov_ps(k: __mmask8, a: __m128) -> __m128 {
     unsafe {
         let mov = a.as_f32x4();
         transmute(simd_select_bitmask(k, mov, f32x4::ZERO))
@@ -545,7 +583,8 @@ pub fn _mm_maskz_mov_ps(k: __mmask8, a: __m128) -> __m128 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovapd))]
-pub fn _mm512_mask_mov_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_mov_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
     unsafe {
         let mov = a.as_f64x8();
         transmute(simd_select_bitmask(k, mov, src.as_f64x8()))
@@ -559,7 +598,8 @@ pub fn _mm512_mask_mov_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovapd))]
-pub fn _mm512_maskz_mov_pd(k: __mmask8, a: __m512d) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_mov_pd(k: __mmask8, a: __m512d) -> __m512d {
     unsafe {
         let mov = a.as_f64x8();
         transmute(simd_select_bitmask(k, mov, f64x8::ZERO))
@@ -573,7 +613,8 @@ pub fn _mm512_maskz_mov_pd(k: __mmask8, a: __m512d) -> __m512d {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovapd))]
-pub fn _mm256_mask_mov_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_mov_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
     unsafe {
         let mov = a.as_f64x4();
         transmute(simd_select_bitmask(k, mov, src.as_f64x4()))
@@ -587,7 +628,8 @@ pub fn _mm256_mask_mov_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovapd))]
-pub fn _mm256_maskz_mov_pd(k: __mmask8, a: __m256d) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_mov_pd(k: __mmask8, a: __m256d) -> __m256d {
     unsafe {
         let mov = a.as_f64x4();
         transmute(simd_select_bitmask(k, mov, f64x4::ZERO))
@@ -601,7 +643,8 @@ pub fn _mm256_maskz_mov_pd(k: __mmask8, a: __m256d) -> __m256d {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovapd))]
-pub fn _mm_mask_mov_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_mov_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
     unsafe {
         let mov = a.as_f64x2();
         transmute(simd_select_bitmask(k, mov, src.as_f64x2()))
@@ -615,7 +658,8 @@ pub fn _mm_mask_mov_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovapd))]
-pub fn _mm_maskz_mov_pd(k: __mmask8, a: __m128d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_mov_pd(k: __mmask8, a: __m128d) -> __m128d {
     unsafe {
         let mov = a.as_f64x2();
         transmute(simd_select_bitmask(k, mov, f64x2::ZERO))
@@ -629,7 +673,8 @@ pub fn _mm_maskz_mov_pd(k: __mmask8, a: __m128d) -> __m128d {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpaddd))]
-pub fn _mm512_add_epi32(a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_add_epi32(a: __m512i, b: __m512i) -> __m512i {
     unsafe { transmute(simd_add(a.as_i32x16(), b.as_i32x16())) }
 }
 
@@ -640,7 +685,8 @@ pub fn _mm512_add_epi32(a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpaddd))]
-pub fn _mm512_mask_add_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_add_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let add = _mm512_add_epi32(a, b).as_i32x16();
         transmute(simd_select_bitmask(k, add, src.as_i32x16()))
@@ -654,7 +700,8 @@ pub fn _mm512_mask_add_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i)
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpaddd))]
-pub fn _mm512_maskz_add_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_add_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let add = _mm512_add_epi32(a, b).as_i32x16();
         transmute(simd_select_bitmask(k, add, i32x16::ZERO))
@@ -668,7 +715,8 @@ pub fn _mm512_maskz_add_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpaddd))]
-pub fn _mm256_mask_add_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_add_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let add = _mm256_add_epi32(a, b).as_i32x8();
         transmute(simd_select_bitmask(k, add, src.as_i32x8()))
@@ -682,7 +730,8 @@ pub fn _mm256_mask_add_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i)
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpaddd))]
-pub fn _mm256_maskz_add_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_add_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let add = _mm256_add_epi32(a, b).as_i32x8();
         transmute(simd_select_bitmask(k, add, i32x8::ZERO))
@@ -696,7 +745,8 @@ pub fn _mm256_maskz_add_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpaddd))]
-pub fn _mm_mask_add_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_add_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let add = _mm_add_epi32(a, b).as_i32x4();
         transmute(simd_select_bitmask(k, add, src.as_i32x4()))
@@ -710,7 +760,8 @@ pub fn _mm_mask_add_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) ->
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpaddd))]
-pub fn _mm_maskz_add_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_add_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let add = _mm_add_epi32(a, b).as_i32x4();
         transmute(simd_select_bitmask(k, add, i32x4::ZERO))
@@ -724,7 +775,8 @@ pub fn _mm_maskz_add_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpaddq))]
-pub fn _mm512_add_epi64(a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_add_epi64(a: __m512i, b: __m512i) -> __m512i {
     unsafe { transmute(simd_add(a.as_i64x8(), b.as_i64x8())) }
 }
 
@@ -735,7 +787,8 @@ pub fn _mm512_add_epi64(a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpaddq))]
-pub fn _mm512_mask_add_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_add_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let add = _mm512_add_epi64(a, b).as_i64x8();
         transmute(simd_select_bitmask(k, add, src.as_i64x8()))
@@ -749,7 +802,8 @@ pub fn _mm512_mask_add_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i)
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpaddq))]
-pub fn _mm512_maskz_add_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_add_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let add = _mm512_add_epi64(a, b).as_i64x8();
         transmute(simd_select_bitmask(k, add, i64x8::ZERO))
@@ -763,7 +817,8 @@ pub fn _mm512_maskz_add_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpaddq))]
-pub fn _mm256_mask_add_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_add_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let add = _mm256_add_epi64(a, b).as_i64x4();
         transmute(simd_select_bitmask(k, add, src.as_i64x4()))
@@ -777,7 +832,8 @@ pub fn _mm256_mask_add_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i)
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpaddq))]
-pub fn _mm256_maskz_add_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_add_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let add = _mm256_add_epi64(a, b).as_i64x4();
         transmute(simd_select_bitmask(k, add, i64x4::ZERO))
@@ -791,7 +847,8 @@ pub fn _mm256_maskz_add_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpaddq))]
-pub fn _mm_mask_add_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_add_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let add = _mm_add_epi64(a, b).as_i64x2();
         transmute(simd_select_bitmask(k, add, src.as_i64x2()))
@@ -805,7 +862,8 @@ pub fn _mm_mask_add_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) ->
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpaddq))]
-pub fn _mm_maskz_add_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_add_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let add = _mm_add_epi64(a, b).as_i64x2();
         transmute(simd_select_bitmask(k, add, i64x2::ZERO))
@@ -819,7 +877,8 @@ pub fn _mm_maskz_add_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vaddps))]
-pub fn _mm512_add_ps(a: __m512, b: __m512) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_add_ps(a: __m512, b: __m512) -> __m512 {
     unsafe { transmute(simd_add(a.as_f32x16(), b.as_f32x16())) }
 }
 
@@ -830,7 +889,8 @@ pub fn _mm512_add_ps(a: __m512, b: __m512) -> __m512 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vaddps))]
-pub fn _mm512_mask_add_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_add_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
     unsafe {
         let add = _mm512_add_ps(a, b).as_f32x16();
         transmute(simd_select_bitmask(k, add, src.as_f32x16()))
@@ -844,7 +904,8 @@ pub fn _mm512_mask_add_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vaddps))]
-pub fn _mm512_maskz_add_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_add_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
     unsafe {
         let add = _mm512_add_ps(a, b).as_f32x16();
         transmute(simd_select_bitmask(k, add, f32x16::ZERO))
@@ -858,7 +919,8 @@ pub fn _mm512_maskz_add_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vaddps))]
-pub fn _mm256_mask_add_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_add_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
     unsafe {
         let add = _mm256_add_ps(a, b).as_f32x8();
         transmute(simd_select_bitmask(k, add, src.as_f32x8()))
@@ -872,7 +934,8 @@ pub fn _mm256_mask_add_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vaddps))]
-pub fn _mm256_maskz_add_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_add_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
     unsafe {
         let add = _mm256_add_ps(a, b).as_f32x8();
         transmute(simd_select_bitmask(k, add, f32x8::ZERO))
@@ -886,7 +949,8 @@ pub fn _mm256_maskz_add_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vaddps))]
-pub fn _mm_mask_add_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_add_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
     unsafe {
         let add = _mm_add_ps(a, b).as_f32x4();
         transmute(simd_select_bitmask(k, add, src.as_f32x4()))
@@ -900,7 +964,8 @@ pub fn _mm_mask_add_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vaddps))]
-pub fn _mm_maskz_add_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_add_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
     unsafe {
         let add = _mm_add_ps(a, b).as_f32x4();
         transmute(simd_select_bitmask(k, add, f32x4::ZERO))
@@ -914,7 +979,8 @@ pub fn _mm_maskz_add_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vaddpd))]
-pub fn _mm512_add_pd(a: __m512d, b: __m512d) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_add_pd(a: __m512d, b: __m512d) -> __m512d {
     unsafe { transmute(simd_add(a.as_f64x8(), b.as_f64x8())) }
 }
 
@@ -925,7 +991,8 @@ pub fn _mm512_add_pd(a: __m512d, b: __m512d) -> __m512d {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vaddpd))]
-pub fn _mm512_mask_add_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_add_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
     unsafe {
         let add = _mm512_add_pd(a, b).as_f64x8();
         transmute(simd_select_bitmask(k, add, src.as_f64x8()))
@@ -939,7 +1006,8 @@ pub fn _mm512_mask_add_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) ->
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vaddpd))]
-pub fn _mm512_maskz_add_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_add_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
     unsafe {
         let add = _mm512_add_pd(a, b).as_f64x8();
         transmute(simd_select_bitmask(k, add, f64x8::ZERO))
@@ -953,7 +1021,8 @@ pub fn _mm512_maskz_add_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vaddpd))]
-pub fn _mm256_mask_add_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_add_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
     unsafe {
         let add = _mm256_add_pd(a, b).as_f64x4();
         transmute(simd_select_bitmask(k, add, src.as_f64x4()))
@@ -967,7 +1036,8 @@ pub fn _mm256_mask_add_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) ->
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vaddpd))]
-pub fn _mm256_maskz_add_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_add_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
     unsafe {
         let add = _mm256_add_pd(a, b).as_f64x4();
         transmute(simd_select_bitmask(k, add, f64x4::ZERO))
@@ -981,7 +1051,8 @@ pub fn _mm256_maskz_add_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vaddpd))]
-pub fn _mm_mask_add_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_add_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
     unsafe {
         let add = _mm_add_pd(a, b).as_f64x2();
         transmute(simd_select_bitmask(k, add, src.as_f64x2()))
@@ -995,7 +1066,8 @@ pub fn _mm_mask_add_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vaddpd))]
-pub fn _mm_maskz_add_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_add_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
     unsafe {
         let add = _mm_add_pd(a, b).as_f64x2();
         transmute(simd_select_bitmask(k, add, f64x2::ZERO))
@@ -1009,7 +1081,8 @@ pub fn _mm_maskz_add_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsubd))]
-pub fn _mm512_sub_epi32(a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_sub_epi32(a: __m512i, b: __m512i) -> __m512i {
     unsafe { transmute(simd_sub(a.as_i32x16(), b.as_i32x16())) }
 }
 
@@ -1020,7 +1093,8 @@ pub fn _mm512_sub_epi32(a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsubd))]
-pub fn _mm512_mask_sub_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_sub_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let sub = _mm512_sub_epi32(a, b).as_i32x16();
         transmute(simd_select_bitmask(k, sub, src.as_i32x16()))
@@ -1034,7 +1108,8 @@ pub fn _mm512_mask_sub_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i)
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsubd))]
-pub fn _mm512_maskz_sub_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_sub_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let sub = _mm512_sub_epi32(a, b).as_i32x16();
         transmute(simd_select_bitmask(k, sub, i32x16::ZERO))
@@ -1048,7 +1123,8 @@ pub fn _mm512_maskz_sub_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsubd))]
-pub fn _mm256_mask_sub_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_sub_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let sub = _mm256_sub_epi32(a, b).as_i32x8();
         transmute(simd_select_bitmask(k, sub, src.as_i32x8()))
@@ -1062,7 +1138,8 @@ pub fn _mm256_mask_sub_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i)
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsubd))]
-pub fn _mm256_maskz_sub_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_sub_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let sub = _mm256_sub_epi32(a, b).as_i32x8();
         transmute(simd_select_bitmask(k, sub, i32x8::ZERO))
@@ -1076,7 +1153,8 @@ pub fn _mm256_maskz_sub_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsubd))]
-pub fn _mm_mask_sub_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_sub_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let sub = _mm_sub_epi32(a, b).as_i32x4();
         transmute(simd_select_bitmask(k, sub, src.as_i32x4()))
@@ -1090,7 +1168,8 @@ pub fn _mm_mask_sub_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) ->
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsubd))]
-pub fn _mm_maskz_sub_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_sub_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let sub = _mm_sub_epi32(a, b).as_i32x4();
         transmute(simd_select_bitmask(k, sub, i32x4::ZERO))
@@ -1104,7 +1183,8 @@ pub fn _mm_maskz_sub_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsubq))]
-pub fn _mm512_sub_epi64(a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_sub_epi64(a: __m512i, b: __m512i) -> __m512i {
     unsafe { transmute(simd_sub(a.as_i64x8(), b.as_i64x8())) }
 }
 
@@ -1115,7 +1195,8 @@ pub fn _mm512_sub_epi64(a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsubq))]
-pub fn _mm512_mask_sub_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_sub_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let sub = _mm512_sub_epi64(a, b).as_i64x8();
         transmute(simd_select_bitmask(k, sub, src.as_i64x8()))
@@ -1129,7 +1210,8 @@ pub fn _mm512_mask_sub_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i)
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsubq))]
-pub fn _mm512_maskz_sub_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_sub_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let sub = _mm512_sub_epi64(a, b).as_i64x8();
         transmute(simd_select_bitmask(k, sub, i64x8::ZERO))
@@ -1143,7 +1225,8 @@ pub fn _mm512_maskz_sub_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsubq))]
-pub fn _mm256_mask_sub_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_sub_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let sub = _mm256_sub_epi64(a, b).as_i64x4();
         transmute(simd_select_bitmask(k, sub, src.as_i64x4()))
@@ -1157,7 +1240,8 @@ pub fn _mm256_mask_sub_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i)
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsubq))]
-pub fn _mm256_maskz_sub_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_sub_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let sub = _mm256_sub_epi64(a, b).as_i64x4();
         transmute(simd_select_bitmask(k, sub, i64x4::ZERO))
@@ -1171,7 +1255,8 @@ pub fn _mm256_maskz_sub_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsubq))]
-pub fn _mm_mask_sub_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_sub_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let sub = _mm_sub_epi64(a, b).as_i64x2();
         transmute(simd_select_bitmask(k, sub, src.as_i64x2()))
@@ -1185,7 +1270,8 @@ pub fn _mm_mask_sub_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) ->
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsubq))]
-pub fn _mm_maskz_sub_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_sub_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let sub = _mm_sub_epi64(a, b).as_i64x2();
         transmute(simd_select_bitmask(k, sub, i64x2::ZERO))
@@ -1199,7 +1285,8 @@ pub fn _mm_maskz_sub_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vsubps))]
-pub fn _mm512_sub_ps(a: __m512, b: __m512) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_sub_ps(a: __m512, b: __m512) -> __m512 {
     unsafe { transmute(simd_sub(a.as_f32x16(), b.as_f32x16())) }
 }
 
@@ -1210,7 +1297,8 @@ pub fn _mm512_sub_ps(a: __m512, b: __m512) -> __m512 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vsubps))]
-pub fn _mm512_mask_sub_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_sub_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
     unsafe {
         let sub = _mm512_sub_ps(a, b).as_f32x16();
         transmute(simd_select_bitmask(k, sub, src.as_f32x16()))
@@ -1224,7 +1312,8 @@ pub fn _mm512_mask_sub_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vsubps))]
-pub fn _mm512_maskz_sub_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_sub_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
     unsafe {
         let sub = _mm512_sub_ps(a, b).as_f32x16();
         transmute(simd_select_bitmask(k, sub, f32x16::ZERO))
@@ -1238,7 +1327,8 @@ pub fn _mm512_maskz_sub_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vsubps))]
-pub fn _mm256_mask_sub_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_sub_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
     unsafe {
         let sub = _mm256_sub_ps(a, b).as_f32x8();
         transmute(simd_select_bitmask(k, sub, src.as_f32x8()))
@@ -1252,7 +1342,8 @@ pub fn _mm256_mask_sub_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vsubps))]
-pub fn _mm256_maskz_sub_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_sub_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
     unsafe {
         let sub = _mm256_sub_ps(a, b).as_f32x8();
         transmute(simd_select_bitmask(k, sub, f32x8::ZERO))
@@ -1266,7 +1357,8 @@ pub fn _mm256_maskz_sub_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vsubps))]
-pub fn _mm_mask_sub_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_sub_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
     unsafe {
         let sub = _mm_sub_ps(a, b).as_f32x4();
         transmute(simd_select_bitmask(k, sub, src.as_f32x4()))
@@ -1280,7 +1372,8 @@ pub fn _mm_mask_sub_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vsubps))]
-pub fn _mm_maskz_sub_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_sub_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
     unsafe {
         let sub = _mm_sub_ps(a, b).as_f32x4();
         transmute(simd_select_bitmask(k, sub, f32x4::ZERO))
@@ -1294,7 +1387,8 @@ pub fn _mm_maskz_sub_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vsubpd))]
-pub fn _mm512_sub_pd(a: __m512d, b: __m512d) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_sub_pd(a: __m512d, b: __m512d) -> __m512d {
     unsafe { transmute(simd_sub(a.as_f64x8(), b.as_f64x8())) }
 }
 
@@ -1305,7 +1399,8 @@ pub fn _mm512_sub_pd(a: __m512d, b: __m512d) -> __m512d {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vsubpd))]
-pub fn _mm512_mask_sub_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_sub_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
     unsafe {
         let sub = _mm512_sub_pd(a, b).as_f64x8();
         transmute(simd_select_bitmask(k, sub, src.as_f64x8()))
@@ -1319,7 +1414,8 @@ pub fn _mm512_mask_sub_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) ->
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vsubpd))]
-pub fn _mm512_maskz_sub_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_sub_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
     unsafe {
         let sub = _mm512_sub_pd(a, b).as_f64x8();
         transmute(simd_select_bitmask(k, sub, f64x8::ZERO))
@@ -1333,7 +1429,8 @@ pub fn _mm512_maskz_sub_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vsubpd))]
-pub fn _mm256_mask_sub_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_sub_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
     unsafe {
         let sub = _mm256_sub_pd(a, b).as_f64x4();
         transmute(simd_select_bitmask(k, sub, src.as_f64x4()))
@@ -1347,7 +1444,8 @@ pub fn _mm256_mask_sub_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) ->
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vsubpd))]
-pub fn _mm256_maskz_sub_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_sub_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
     unsafe {
         let sub = _mm256_sub_pd(a, b).as_f64x4();
         transmute(simd_select_bitmask(k, sub, f64x4::ZERO))
@@ -1361,7 +1459,8 @@ pub fn _mm256_maskz_sub_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vsubpd))]
-pub fn _mm_mask_sub_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_sub_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
     unsafe {
         let sub = _mm_sub_pd(a, b).as_f64x2();
         transmute(simd_select_bitmask(k, sub, src.as_f64x2()))
@@ -1375,7 +1474,8 @@ pub fn _mm_mask_sub_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vsubpd))]
-pub fn _mm_maskz_sub_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_sub_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
     unsafe {
         let sub = _mm_sub_pd(a, b).as_f64x2();
         transmute(simd_select_bitmask(k, sub, f64x2::ZERO))
@@ -1389,7 +1489,8 @@ pub fn _mm_maskz_sub_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmuldq))]
-pub fn _mm512_mul_epi32(a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mul_epi32(a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let a = simd_cast::<_, i64x8>(simd_cast::<_, i32x8>(a.as_i64x8()));
         let b = simd_cast::<_, i64x8>(simd_cast::<_, i32x8>(b.as_i64x8()));
@@ -1404,7 +1505,8 @@ pub fn _mm512_mul_epi32(a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmuldq))]
-pub fn _mm512_mask_mul_epi32(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_mul_epi32(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let mul = _mm512_mul_epi32(a, b).as_i64x8();
         transmute(simd_select_bitmask(k, mul, src.as_i64x8()))
@@ -1418,7 +1520,8 @@ pub fn _mm512_mask_mul_epi32(src: __m512i, k: __mmask8, a: __m512i, b: __m512i)
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmuldq))]
-pub fn _mm512_maskz_mul_epi32(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_mul_epi32(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let mul = _mm512_mul_epi32(a, b).as_i64x8();
         transmute(simd_select_bitmask(k, mul, i64x8::ZERO))
@@ -1432,7 +1535,8 @@ pub fn _mm512_maskz_mul_epi32(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmuldq))]
-pub fn _mm256_mask_mul_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_mul_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let mul = _mm256_mul_epi32(a, b).as_i64x4();
         transmute(simd_select_bitmask(k, mul, src.as_i64x4()))
@@ -1446,7 +1550,8 @@ pub fn _mm256_mask_mul_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i)
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmuldq))]
-pub fn _mm256_maskz_mul_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_mul_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let mul = _mm256_mul_epi32(a, b).as_i64x4();
         transmute(simd_select_bitmask(k, mul, i64x4::ZERO))
@@ -1460,7 +1565,8 @@ pub fn _mm256_maskz_mul_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmuldq))]
-pub fn _mm_mask_mul_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_mul_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let mul = _mm_mul_epi32(a, b).as_i64x2();
         transmute(simd_select_bitmask(k, mul, src.as_i64x2()))
@@ -1474,7 +1580,8 @@ pub fn _mm_mask_mul_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) ->
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmuldq))]
-pub fn _mm_maskz_mul_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_mul_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let mul = _mm_mul_epi32(a, b).as_i64x2();
         transmute(simd_select_bitmask(k, mul, i64x2::ZERO))
@@ -1488,7 +1595,8 @@ pub fn _mm_maskz_mul_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmulld))]
-pub fn _mm512_mullo_epi32(a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mullo_epi32(a: __m512i, b: __m512i) -> __m512i {
     unsafe { transmute(simd_mul(a.as_i32x16(), b.as_i32x16())) }
 }
 
@@ -1499,7 +1607,13 @@ pub fn _mm512_mullo_epi32(a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmulld))]
-pub fn _mm512_mask_mullo_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_mullo_epi32(
+    src: __m512i,
+    k: __mmask16,
+    a: __m512i,
+    b: __m512i,
+) -> __m512i {
     unsafe {
         let mul = _mm512_mullo_epi32(a, b).as_i32x16();
         transmute(simd_select_bitmask(k, mul, src.as_i32x16()))
@@ -1513,7 +1627,8 @@ pub fn _mm512_mask_mullo_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmulld))]
-pub fn _mm512_maskz_mullo_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_mullo_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let mul = _mm512_mullo_epi32(a, b).as_i32x16();
         transmute(simd_select_bitmask(k, mul, i32x16::ZERO))
@@ -1527,7 +1642,8 @@ pub fn _mm512_maskz_mullo_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmulld))]
-pub fn _mm256_mask_mullo_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_mullo_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let mul = _mm256_mullo_epi32(a, b).as_i32x8();
         transmute(simd_select_bitmask(k, mul, src.as_i32x8()))
@@ -1541,7 +1657,8 @@ pub fn _mm256_mask_mullo_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmulld))]
-pub fn _mm256_maskz_mullo_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_mullo_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let mul = _mm256_mullo_epi32(a, b).as_i32x8();
         transmute(simd_select_bitmask(k, mul, i32x8::ZERO))
@@ -1555,7 +1672,8 @@ pub fn _mm256_maskz_mullo_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmulld))]
-pub fn _mm_mask_mullo_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_mullo_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let mul = _mm_mullo_epi32(a, b).as_i32x4();
         transmute(simd_select_bitmask(k, mul, src.as_i32x4()))
@@ -1569,7 +1687,8 @@ pub fn _mm_mask_mullo_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmulld))]
-pub fn _mm_maskz_mullo_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_mullo_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let mul = _mm_mullo_epi32(a, b).as_i32x4();
         transmute(simd_select_bitmask(k, mul, i32x4::ZERO))
@@ -1584,7 +1703,8 @@ pub fn _mm_maskz_mullo_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_mullox_epi64(a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mullox_epi64(a: __m512i, b: __m512i) -> __m512i {
     unsafe { transmute(simd_mul(a.as_i64x8(), b.as_i64x8())) }
 }
 
@@ -1596,7 +1716,13 @@ pub fn _mm512_mullox_epi64(a: __m512i, b: __m512i) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_mask_mullox_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_mullox_epi64(
+    src: __m512i,
+    k: __mmask8,
+    a: __m512i,
+    b: __m512i,
+) -> __m512i {
     unsafe {
         let mul = _mm512_mullox_epi64(a, b).as_i64x8();
         transmute(simd_select_bitmask(k, mul, src.as_i64x8()))
@@ -1610,11 +1736,12 @@ pub fn _mm512_mask_mullox_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmuludq))]
-pub fn _mm512_mul_epu32(a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mul_epu32(a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let a = a.as_u64x8();
         let b = b.as_u64x8();
-        let mask = u64x8::splat(u32::MAX.into());
+        let mask = u64x8::splat(u32::MAX as u64);
         transmute(simd_mul(simd_and(a, mask), simd_and(b, mask)))
     }
 }
@@ -1626,7 +1753,8 @@ pub fn _mm512_mul_epu32(a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmuludq))]
-pub fn _mm512_mask_mul_epu32(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_mul_epu32(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let mul = _mm512_mul_epu32(a, b).as_u64x8();
         transmute(simd_select_bitmask(k, mul, src.as_u64x8()))
@@ -1640,7 +1768,8 @@ pub fn _mm512_mask_mul_epu32(src: __m512i, k: __mmask8, a: __m512i, b: __m512i)
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmuludq))]
-pub fn _mm512_maskz_mul_epu32(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_mul_epu32(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let mul = _mm512_mul_epu32(a, b).as_u64x8();
         transmute(simd_select_bitmask(k, mul, u64x8::ZERO))
@@ -1654,7 +1783,8 @@ pub fn _mm512_maskz_mul_epu32(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmuludq))]
-pub fn _mm256_mask_mul_epu32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_mul_epu32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let mul = _mm256_mul_epu32(a, b).as_u64x4();
         transmute(simd_select_bitmask(k, mul, src.as_u64x4()))
@@ -1668,7 +1798,8 @@ pub fn _mm256_mask_mul_epu32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i)
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmuludq))]
-pub fn _mm256_maskz_mul_epu32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_mul_epu32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let mul = _mm256_mul_epu32(a, b).as_u64x4();
         transmute(simd_select_bitmask(k, mul, u64x4::ZERO))
@@ -1682,7 +1813,8 @@ pub fn _mm256_maskz_mul_epu32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmuludq))]
-pub fn _mm_mask_mul_epu32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_mul_epu32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let mul = _mm_mul_epu32(a, b).as_u64x2();
         transmute(simd_select_bitmask(k, mul, src.as_u64x2()))
@@ -1696,7 +1828,8 @@ pub fn _mm_mask_mul_epu32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) ->
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmuludq))]
-pub fn _mm_maskz_mul_epu32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_mul_epu32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let mul = _mm_mul_epu32(a, b).as_u64x2();
         transmute(simd_select_bitmask(k, mul, u64x2::ZERO))
@@ -1710,7 +1843,8 @@ pub fn _mm_maskz_mul_epu32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmulps))]
-pub fn _mm512_mul_ps(a: __m512, b: __m512) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mul_ps(a: __m512, b: __m512) -> __m512 {
     unsafe { transmute(simd_mul(a.as_f32x16(), b.as_f32x16())) }
 }
 
@@ -1721,7 +1855,8 @@ pub fn _mm512_mul_ps(a: __m512, b: __m512) -> __m512 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmulps))]
-pub fn _mm512_mask_mul_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_mul_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
     unsafe {
         let mul = _mm512_mul_ps(a, b).as_f32x16();
         transmute(simd_select_bitmask(k, mul, src.as_f32x16()))
@@ -1735,7 +1870,8 @@ pub fn _mm512_mask_mul_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmulps))]
-pub fn _mm512_maskz_mul_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_mul_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
     unsafe {
         let mul = _mm512_mul_ps(a, b).as_f32x16();
         transmute(simd_select_bitmask(k, mul, f32x16::ZERO))
@@ -1749,7 +1885,8 @@ pub fn _mm512_maskz_mul_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmulps))]
-pub fn _mm256_mask_mul_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_mul_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
     unsafe {
         let mul = _mm256_mul_ps(a, b).as_f32x8();
         transmute(simd_select_bitmask(k, mul, src.as_f32x8()))
@@ -1763,7 +1900,8 @@ pub fn _mm256_mask_mul_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmulps))]
-pub fn _mm256_maskz_mul_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_mul_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
     unsafe {
         let mul = _mm256_mul_ps(a, b).as_f32x8();
         transmute(simd_select_bitmask(k, mul, f32x8::ZERO))
@@ -1777,7 +1915,8 @@ pub fn _mm256_maskz_mul_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmulps))]
-pub fn _mm_mask_mul_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_mul_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
     unsafe {
         let mul = _mm_mul_ps(a, b).as_f32x4();
         transmute(simd_select_bitmask(k, mul, src.as_f32x4()))
@@ -1791,7 +1930,8 @@ pub fn _mm_mask_mul_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmulps))]
-pub fn _mm_maskz_mul_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_mul_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
     unsafe {
         let mul = _mm_mul_ps(a, b).as_f32x4();
         transmute(simd_select_bitmask(k, mul, f32x4::ZERO))
@@ -1805,7 +1945,8 @@ pub fn _mm_maskz_mul_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmulpd))]
-pub fn _mm512_mul_pd(a: __m512d, b: __m512d) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mul_pd(a: __m512d, b: __m512d) -> __m512d {
     unsafe { transmute(simd_mul(a.as_f64x8(), b.as_f64x8())) }
 }
 
@@ -1816,7 +1957,8 @@ pub fn _mm512_mul_pd(a: __m512d, b: __m512d) -> __m512d {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmulpd))]
-pub fn _mm512_mask_mul_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_mul_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
     unsafe {
         let mul = _mm512_mul_pd(a, b).as_f64x8();
         transmute(simd_select_bitmask(k, mul, src.as_f64x8()))
@@ -1830,7 +1972,8 @@ pub fn _mm512_mask_mul_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) ->
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmulpd))]
-pub fn _mm512_maskz_mul_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_mul_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
     unsafe {
         let mul = _mm512_mul_pd(a, b).as_f64x8();
         transmute(simd_select_bitmask(k, mul, f64x8::ZERO))
@@ -1844,7 +1987,8 @@ pub fn _mm512_maskz_mul_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmulpd))]
-pub fn _mm256_mask_mul_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_mul_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
     unsafe {
         let mul = _mm256_mul_pd(a, b).as_f64x4();
         transmute(simd_select_bitmask(k, mul, src.as_f64x4()))
@@ -1858,7 +2002,8 @@ pub fn _mm256_mask_mul_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) ->
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmulpd))]
-pub fn _mm256_maskz_mul_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_mul_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
     unsafe {
         let mul = _mm256_mul_pd(a, b).as_f64x4();
         transmute(simd_select_bitmask(k, mul, f64x4::ZERO))
@@ -1872,7 +2017,8 @@ pub fn _mm256_maskz_mul_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmulpd))]
-pub fn _mm_mask_mul_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_mul_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
     unsafe {
         let mul = _mm_mul_pd(a, b).as_f64x2();
         transmute(simd_select_bitmask(k, mul, src.as_f64x2()))
@@ -1886,7 +2032,8 @@ pub fn _mm_mask_mul_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmulpd))]
-pub fn _mm_maskz_mul_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_mul_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
     unsafe {
         let mul = _mm_mul_pd(a, b).as_f64x2();
         transmute(simd_select_bitmask(k, mul, f64x2::ZERO))
@@ -1900,7 +2047,8 @@ pub fn _mm_maskz_mul_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vdivps))]
-pub fn _mm512_div_ps(a: __m512, b: __m512) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_div_ps(a: __m512, b: __m512) -> __m512 {
     unsafe { transmute(simd_div(a.as_f32x16(), b.as_f32x16())) }
 }
 
@@ -1911,7 +2059,8 @@ pub fn _mm512_div_ps(a: __m512, b: __m512) -> __m512 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vdivps))]
-pub fn _mm512_mask_div_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_div_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
     unsafe {
         let div = _mm512_div_ps(a, b).as_f32x16();
         transmute(simd_select_bitmask(k, div, src.as_f32x16()))
@@ -1925,7 +2074,8 @@ pub fn _mm512_mask_div_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vdivps))]
-pub fn _mm512_maskz_div_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_div_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
     unsafe {
         let div = _mm512_div_ps(a, b).as_f32x16();
         transmute(simd_select_bitmask(k, div, f32x16::ZERO))
@@ -1939,7 +2089,8 @@ pub fn _mm512_maskz_div_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vdivps))]
-pub fn _mm256_mask_div_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_div_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
     unsafe {
         let div = _mm256_div_ps(a, b).as_f32x8();
         transmute(simd_select_bitmask(k, div, src.as_f32x8()))
@@ -1953,7 +2104,8 @@ pub fn _mm256_mask_div_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vdivps))]
-pub fn _mm256_maskz_div_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_div_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
     unsafe {
         let div = _mm256_div_ps(a, b).as_f32x8();
         transmute(simd_select_bitmask(k, div, f32x8::ZERO))
@@ -1967,7 +2119,8 @@ pub fn _mm256_maskz_div_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vdivps))]
-pub fn _mm_mask_div_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_div_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
     unsafe {
         let div = _mm_div_ps(a, b).as_f32x4();
         transmute(simd_select_bitmask(k, div, src.as_f32x4()))
@@ -1981,7 +2134,8 @@ pub fn _mm_mask_div_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vdivps))]
-pub fn _mm_maskz_div_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_div_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
     unsafe {
         let div = _mm_div_ps(a, b).as_f32x4();
         transmute(simd_select_bitmask(k, div, f32x4::ZERO))
@@ -1995,7 +2149,8 @@ pub fn _mm_maskz_div_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vdivpd))]
-pub fn _mm512_div_pd(a: __m512d, b: __m512d) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_div_pd(a: __m512d, b: __m512d) -> __m512d {
     unsafe { transmute(simd_div(a.as_f64x8(), b.as_f64x8())) }
 }
 
@@ -2006,7 +2161,8 @@ pub fn _mm512_div_pd(a: __m512d, b: __m512d) -> __m512d {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vdivpd))]
-pub fn _mm512_mask_div_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_div_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
     unsafe {
         let div = _mm512_div_pd(a, b).as_f64x8();
         transmute(simd_select_bitmask(k, div, src.as_f64x8()))
@@ -2020,7 +2176,8 @@ pub fn _mm512_mask_div_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) ->
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vdivpd))]
-pub fn _mm512_maskz_div_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_div_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
     unsafe {
         let div = _mm512_div_pd(a, b).as_f64x8();
         transmute(simd_select_bitmask(k, div, f64x8::ZERO))
@@ -2034,7 +2191,8 @@ pub fn _mm512_maskz_div_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vdivpd))]
-pub fn _mm256_mask_div_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_div_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
     unsafe {
         let div = _mm256_div_pd(a, b).as_f64x4();
         transmute(simd_select_bitmask(k, div, src.as_f64x4()))
@@ -2048,7 +2206,8 @@ pub fn _mm256_mask_div_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) ->
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vdivpd))]
-pub fn _mm256_maskz_div_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_div_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
     unsafe {
         let div = _mm256_div_pd(a, b).as_f64x4();
         transmute(simd_select_bitmask(k, div, f64x4::ZERO))
@@ -2062,7 +2221,8 @@ pub fn _mm256_maskz_div_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vdivpd))]
-pub fn _mm_mask_div_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_div_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
     unsafe {
         let div = _mm_div_pd(a, b).as_f64x2();
         transmute(simd_select_bitmask(k, div, src.as_f64x2()))
@@ -2076,7 +2236,8 @@ pub fn _mm_mask_div_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vdivpd))]
-pub fn _mm_maskz_div_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_div_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
     unsafe {
         let div = _mm_div_pd(a, b).as_f64x2();
         transmute(simd_select_bitmask(k, div, f64x2::ZERO))
@@ -2090,12 +2251,9 @@ pub fn _mm_maskz_div_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmaxsd))]
-pub fn _mm512_max_epi32(a: __m512i, b: __m512i) -> __m512i {
-    unsafe {
-        let a = a.as_i32x16();
-        let b = b.as_i32x16();
-        transmute(simd_select::<i32x16, _>(simd_gt(a, b), a, b))
-    }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_max_epi32(a: __m512i, b: __m512i) -> __m512i {
+    unsafe { simd_imax(a.as_i32x16(), b.as_i32x16()).as_m512i() }
 }
 
 /// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -2105,7 +2263,8 @@ pub fn _mm512_max_epi32(a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmaxsd))]
-pub fn _mm512_mask_max_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_max_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let max = _mm512_max_epi32(a, b).as_i32x16();
         transmute(simd_select_bitmask(k, max, src.as_i32x16()))
@@ -2119,7 +2278,8 @@ pub fn _mm512_mask_max_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i)
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmaxsd))]
-pub fn _mm512_maskz_max_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_max_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let max = _mm512_max_epi32(a, b).as_i32x16();
         transmute(simd_select_bitmask(k, max, i32x16::ZERO))
@@ -2133,7 +2293,8 @@ pub fn _mm512_maskz_max_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmaxsd))]
-pub fn _mm256_mask_max_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_max_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let max = _mm256_max_epi32(a, b).as_i32x8();
         transmute(simd_select_bitmask(k, max, src.as_i32x8()))
@@ -2147,7 +2308,8 @@ pub fn _mm256_mask_max_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i)
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmaxsd))]
-pub fn _mm256_maskz_max_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_max_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let max = _mm256_max_epi32(a, b).as_i32x8();
         transmute(simd_select_bitmask(k, max, i32x8::ZERO))
@@ -2161,7 +2323,8 @@ pub fn _mm256_maskz_max_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmaxsd))]
-pub fn _mm_mask_max_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_max_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let max = _mm_max_epi32(a, b).as_i32x4();
         transmute(simd_select_bitmask(k, max, src.as_i32x4()))
@@ -2175,7 +2338,8 @@ pub fn _mm_mask_max_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) ->
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmaxsd))]
-pub fn _mm_maskz_max_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_max_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let max = _mm_max_epi32(a, b).as_i32x4();
         transmute(simd_select_bitmask(k, max, i32x4::ZERO))
@@ -2189,12 +2353,9 @@ pub fn _mm_maskz_max_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmaxsq))]
-pub fn _mm512_max_epi64(a: __m512i, b: __m512i) -> __m512i {
-    unsafe {
-        let a = a.as_i64x8();
-        let b = b.as_i64x8();
-        transmute(simd_select::<i64x8, _>(simd_gt(a, b), a, b))
-    }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_max_epi64(a: __m512i, b: __m512i) -> __m512i {
+    unsafe { simd_imax(a.as_i64x8(), b.as_i64x8()).as_m512i() }
 }
 
 /// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -2204,7 +2365,8 @@ pub fn _mm512_max_epi64(a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmaxsq))]
-pub fn _mm512_mask_max_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_max_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let max = _mm512_max_epi64(a, b).as_i64x8();
         transmute(simd_select_bitmask(k, max, src.as_i64x8()))
@@ -2218,7 +2380,8 @@ pub fn _mm512_mask_max_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i)
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmaxsq))]
-pub fn _mm512_maskz_max_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_max_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let max = _mm512_max_epi64(a, b).as_i64x8();
         transmute(simd_select_bitmask(k, max, i64x8::ZERO))
@@ -2232,12 +2395,9 @@ pub fn _mm512_maskz_max_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmaxsq))]
-pub fn _mm256_max_epi64(a: __m256i, b: __m256i) -> __m256i {
-    unsafe {
-        let a = a.as_i64x4();
-        let b = b.as_i64x4();
-        transmute(simd_select::<i64x4, _>(simd_gt(a, b), a, b))
-    }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_max_epi64(a: __m256i, b: __m256i) -> __m256i {
+    unsafe { simd_imax(a.as_i64x4(), b.as_i64x4()).as_m256i() }
 }
 
 /// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -2247,7 +2407,8 @@ pub fn _mm256_max_epi64(a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmaxsq))]
-pub fn _mm256_mask_max_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_max_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let max = _mm256_max_epi64(a, b).as_i64x4();
         transmute(simd_select_bitmask(k, max, src.as_i64x4()))
@@ -2261,7 +2422,8 @@ pub fn _mm256_mask_max_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i)
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmaxsq))]
-pub fn _mm256_maskz_max_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_max_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let max = _mm256_max_epi64(a, b).as_i64x4();
         transmute(simd_select_bitmask(k, max, i64x4::ZERO))
@@ -2275,12 +2437,9 @@ pub fn _mm256_maskz_max_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmaxsq))]
-pub fn _mm_max_epi64(a: __m128i, b: __m128i) -> __m128i {
-    unsafe {
-        let a = a.as_i64x2();
-        let b = b.as_i64x2();
-        transmute(simd_select::<i64x2, _>(simd_gt(a, b), a, b))
-    }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_max_epi64(a: __m128i, b: __m128i) -> __m128i {
+    unsafe { simd_imax(a.as_i64x2(), b.as_i64x2()).as_m128i() }
 }
 
 /// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -2290,7 +2449,8 @@ pub fn _mm_max_epi64(a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmaxsq))]
-pub fn _mm_mask_max_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_max_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let max = _mm_max_epi64(a, b).as_i64x2();
         transmute(simd_select_bitmask(k, max, src.as_i64x2()))
@@ -2304,7 +2464,8 @@ pub fn _mm_mask_max_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) ->
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmaxsq))]
-pub fn _mm_maskz_max_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_max_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let max = _mm_max_epi64(a, b).as_i64x2();
         transmute(simd_select_bitmask(k, max, i64x2::ZERO))
@@ -2514,12 +2675,9 @@ pub fn _mm_maskz_max_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmaxud))]
-pub fn _mm512_max_epu32(a: __m512i, b: __m512i) -> __m512i {
-    unsafe {
-        let a = a.as_u32x16();
-        let b = b.as_u32x16();
-        transmute(simd_select::<i32x16, _>(simd_gt(a, b), a, b))
-    }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_max_epu32(a: __m512i, b: __m512i) -> __m512i {
+    unsafe { simd_imax(a.as_u32x16(), b.as_u32x16()).as_m512i() }
 }
 
 /// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -2529,7 +2687,8 @@ pub fn _mm512_max_epu32(a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmaxud))]
-pub fn _mm512_mask_max_epu32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_max_epu32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let max = _mm512_max_epu32(a, b).as_u32x16();
         transmute(simd_select_bitmask(k, max, src.as_u32x16()))
@@ -2543,7 +2702,8 @@ pub fn _mm512_mask_max_epu32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i)
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmaxud))]
-pub fn _mm512_maskz_max_epu32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_max_epu32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let max = _mm512_max_epu32(a, b).as_u32x16();
         transmute(simd_select_bitmask(k, max, u32x16::ZERO))
@@ -2557,7 +2717,8 @@ pub fn _mm512_maskz_max_epu32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmaxud))]
-pub fn _mm256_mask_max_epu32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_max_epu32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let max = _mm256_max_epu32(a, b).as_u32x8();
         transmute(simd_select_bitmask(k, max, src.as_u32x8()))
@@ -2571,7 +2732,8 @@ pub fn _mm256_mask_max_epu32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i)
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmaxud))]
-pub fn _mm256_maskz_max_epu32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_max_epu32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let max = _mm256_max_epu32(a, b).as_u32x8();
         transmute(simd_select_bitmask(k, max, u32x8::ZERO))
@@ -2585,7 +2747,8 @@ pub fn _mm256_maskz_max_epu32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmaxud))]
-pub fn _mm_mask_max_epu32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_max_epu32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let max = _mm_max_epu32(a, b).as_u32x4();
         transmute(simd_select_bitmask(k, max, src.as_u32x4()))
@@ -2599,7 +2762,8 @@ pub fn _mm_mask_max_epu32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) ->
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmaxud))]
-pub fn _mm_maskz_max_epu32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_max_epu32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let max = _mm_max_epu32(a, b).as_u32x4();
         transmute(simd_select_bitmask(k, max, u32x4::ZERO))
@@ -2613,12 +2777,9 @@ pub fn _mm_maskz_max_epu32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmaxuq))]
-pub fn _mm512_max_epu64(a: __m512i, b: __m512i) -> __m512i {
-    unsafe {
-        let a = a.as_u64x8();
-        let b = b.as_u64x8();
-        transmute(simd_select::<i64x8, _>(simd_gt(a, b), a, b))
-    }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_max_epu64(a: __m512i, b: __m512i) -> __m512i {
+    unsafe { simd_imax(a.as_u64x8(), b.as_u64x8()).as_m512i() }
 }
 
 /// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -2628,7 +2789,8 @@ pub fn _mm512_max_epu64(a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmaxuq))]
-pub fn _mm512_mask_max_epu64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_max_epu64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let max = _mm512_max_epu64(a, b).as_u64x8();
         transmute(simd_select_bitmask(k, max, src.as_u64x8()))
@@ -2642,7 +2804,8 @@ pub fn _mm512_mask_max_epu64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i)
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmaxuq))]
-pub fn _mm512_maskz_max_epu64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_max_epu64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let max = _mm512_max_epu64(a, b).as_u64x8();
         transmute(simd_select_bitmask(k, max, u64x8::ZERO))
@@ -2656,12 +2819,9 @@ pub fn _mm512_maskz_max_epu64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmaxuq))]
-pub fn _mm256_max_epu64(a: __m256i, b: __m256i) -> __m256i {
-    unsafe {
-        let a = a.as_u64x4();
-        let b = b.as_u64x4();
-        transmute(simd_select::<i64x4, _>(simd_gt(a, b), a, b))
-    }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_max_epu64(a: __m256i, b: __m256i) -> __m256i {
+    unsafe { simd_imax(a.as_u64x4(), b.as_u64x4()).as_m256i() }
 }
 
 /// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -2671,7 +2831,8 @@ pub fn _mm256_max_epu64(a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmaxuq))]
-pub fn _mm256_mask_max_epu64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_max_epu64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let max = _mm256_max_epu64(a, b).as_u64x4();
         transmute(simd_select_bitmask(k, max, src.as_u64x4()))
@@ -2685,7 +2846,8 @@ pub fn _mm256_mask_max_epu64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i)
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmaxuq))]
-pub fn _mm256_maskz_max_epu64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_max_epu64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let max = _mm256_max_epu64(a, b).as_u64x4();
         transmute(simd_select_bitmask(k, max, u64x4::ZERO))
@@ -2699,12 +2861,9 @@ pub fn _mm256_maskz_max_epu64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmaxuq))]
-pub fn _mm_max_epu64(a: __m128i, b: __m128i) -> __m128i {
-    unsafe {
-        let a = a.as_u64x2();
-        let b = b.as_u64x2();
-        transmute(simd_select::<i64x2, _>(simd_gt(a, b), a, b))
-    }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_max_epu64(a: __m128i, b: __m128i) -> __m128i {
+    unsafe { simd_imax(a.as_u64x2(), b.as_u64x2()).as_m128i() }
 }
 
 /// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -2714,7 +2873,8 @@ pub fn _mm_max_epu64(a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmaxuq))]
-pub fn _mm_mask_max_epu64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_max_epu64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let max = _mm_max_epu64(a, b).as_u64x2();
         transmute(simd_select_bitmask(k, max, src.as_u64x2()))
@@ -2728,7 +2888,8 @@ pub fn _mm_mask_max_epu64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) ->
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmaxuq))]
-pub fn _mm_maskz_max_epu64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_max_epu64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let max = _mm_max_epu64(a, b).as_u64x2();
         transmute(simd_select_bitmask(k, max, u64x2::ZERO))
@@ -2742,12 +2903,9 @@ pub fn _mm_maskz_max_epu64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpminsd))]
-pub fn _mm512_min_epi32(a: __m512i, b: __m512i) -> __m512i {
-    unsafe {
-        let a = a.as_i32x16();
-        let b = b.as_i32x16();
-        transmute(simd_select::<i32x16, _>(simd_lt(a, b), a, b))
-    }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_min_epi32(a: __m512i, b: __m512i) -> __m512i {
+    unsafe { simd_imin(a.as_i32x16(), b.as_i32x16()).as_m512i() }
 }
 
 /// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -2757,7 +2915,8 @@ pub fn _mm512_min_epi32(a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpminsd))]
-pub fn _mm512_mask_min_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_min_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let min = _mm512_min_epi32(a, b).as_i32x16();
         transmute(simd_select_bitmask(k, min, src.as_i32x16()))
@@ -2771,7 +2930,8 @@ pub fn _mm512_mask_min_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i)
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpminsd))]
-pub fn _mm512_maskz_min_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_min_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let min = _mm512_min_epi32(a, b).as_i32x16();
         transmute(simd_select_bitmask(k, min, i32x16::ZERO))
@@ -2785,7 +2945,8 @@ pub fn _mm512_maskz_min_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpminsd))]
-pub fn _mm256_mask_min_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_min_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let min = _mm256_min_epi32(a, b).as_i32x8();
         transmute(simd_select_bitmask(k, min, src.as_i32x8()))
@@ -2799,7 +2960,8 @@ pub fn _mm256_mask_min_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i)
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpminsd))]
-pub fn _mm256_maskz_min_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_min_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let min = _mm256_min_epi32(a, b).as_i32x8();
         transmute(simd_select_bitmask(k, min, i32x8::ZERO))
@@ -2813,7 +2975,8 @@ pub fn _mm256_maskz_min_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpminsd))]
-pub fn _mm_mask_min_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_min_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let min = _mm_min_epi32(a, b).as_i32x4();
         transmute(simd_select_bitmask(k, min, src.as_i32x4()))
@@ -2827,7 +2990,8 @@ pub fn _mm_mask_min_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) ->
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpminsd))]
-pub fn _mm_maskz_min_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_min_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let min = _mm_min_epi32(a, b).as_i32x4();
         transmute(simd_select_bitmask(k, min, i32x4::ZERO))
@@ -2841,12 +3005,9 @@ pub fn _mm_maskz_min_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpminsq))]
-pub fn _mm512_min_epi64(a: __m512i, b: __m512i) -> __m512i {
-    unsafe {
-        let a = a.as_i64x8();
-        let b = b.as_i64x8();
-        transmute(simd_select::<i64x8, _>(simd_lt(a, b), a, b))
-    }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_min_epi64(a: __m512i, b: __m512i) -> __m512i {
+    unsafe { simd_imin(a.as_i64x8(), b.as_i64x8()).as_m512i() }
 }
 
 /// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -2856,7 +3017,8 @@ pub fn _mm512_min_epi64(a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpminsq))]
-pub fn _mm512_mask_min_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_min_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let min = _mm512_min_epi64(a, b).as_i64x8();
         transmute(simd_select_bitmask(k, min, src.as_i64x8()))
@@ -2870,7 +3032,8 @@ pub fn _mm512_mask_min_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i)
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpminsq))]
-pub fn _mm512_maskz_min_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_min_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let min = _mm512_min_epi64(a, b).as_i64x8();
         transmute(simd_select_bitmask(k, min, i64x8::ZERO))
@@ -2884,12 +3047,9 @@ pub fn _mm512_maskz_min_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpminsq))]
-pub fn _mm256_min_epi64(a: __m256i, b: __m256i) -> __m256i {
-    unsafe {
-        let a = a.as_i64x4();
-        let b = b.as_i64x4();
-        transmute(simd_select::<i64x4, _>(simd_lt(a, b), a, b))
-    }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_min_epi64(a: __m256i, b: __m256i) -> __m256i {
+    unsafe { simd_imin(a.as_i64x4(), b.as_i64x4()).as_m256i() }
 }
 
 /// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -2899,7 +3059,8 @@ pub fn _mm256_min_epi64(a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpminsq))]
-pub fn _mm256_mask_min_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_min_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let min = _mm256_min_epi64(a, b).as_i64x4();
         transmute(simd_select_bitmask(k, min, src.as_i64x4()))
@@ -2913,7 +3074,8 @@ pub fn _mm256_mask_min_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i)
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpminsq))]
-pub fn _mm256_maskz_min_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_min_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let min = _mm256_min_epi64(a, b).as_i64x4();
         transmute(simd_select_bitmask(k, min, i64x4::ZERO))
@@ -2927,12 +3089,9 @@ pub fn _mm256_maskz_min_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpminsq))]
-pub fn _mm_min_epi64(a: __m128i, b: __m128i) -> __m128i {
-    unsafe {
-        let a = a.as_i64x2();
-        let b = b.as_i64x2();
-        transmute(simd_select::<i64x2, _>(simd_lt(a, b), a, b))
-    }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_min_epi64(a: __m128i, b: __m128i) -> __m128i {
+    unsafe { simd_imin(a.as_i64x2(), b.as_i64x2()).as_m128i() }
 }
 
 /// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -2942,7 +3101,8 @@ pub fn _mm_min_epi64(a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpminsq))]
-pub fn _mm_mask_min_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_min_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let min = _mm_min_epi64(a, b).as_i64x2();
         transmute(simd_select_bitmask(k, min, src.as_i64x2()))
@@ -2956,7 +3116,8 @@ pub fn _mm_mask_min_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) ->
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpminsq))]
-pub fn _mm_maskz_min_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_min_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let min = _mm_min_epi64(a, b).as_i64x2();
         transmute(simd_select_bitmask(k, min, i64x2::ZERO))
@@ -3166,12 +3327,9 @@ pub fn _mm_maskz_min_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpminud))]
-pub fn _mm512_min_epu32(a: __m512i, b: __m512i) -> __m512i {
-    unsafe {
-        let a = a.as_u32x16();
-        let b = b.as_u32x16();
-        transmute(simd_select::<i32x16, _>(simd_lt(a, b), a, b))
-    }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_min_epu32(a: __m512i, b: __m512i) -> __m512i {
+    unsafe { simd_imin(a.as_u32x16(), b.as_u32x16()).as_m512i() }
 }
 
 /// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -3181,7 +3339,8 @@ pub fn _mm512_min_epu32(a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpminud))]
-pub fn _mm512_mask_min_epu32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_min_epu32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let min = _mm512_min_epu32(a, b).as_u32x16();
         transmute(simd_select_bitmask(k, min, src.as_u32x16()))
@@ -3195,7 +3354,8 @@ pub fn _mm512_mask_min_epu32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i)
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpminud))]
-pub fn _mm512_maskz_min_epu32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_min_epu32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let min = _mm512_min_epu32(a, b).as_u32x16();
         transmute(simd_select_bitmask(k, min, u32x16::ZERO))
@@ -3209,7 +3369,8 @@ pub fn _mm512_maskz_min_epu32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpminud))]
-pub fn _mm256_mask_min_epu32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_min_epu32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let min = _mm256_min_epu32(a, b).as_u32x8();
         transmute(simd_select_bitmask(k, min, src.as_u32x8()))
@@ -3223,7 +3384,8 @@ pub fn _mm256_mask_min_epu32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i)
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpminud))]
-pub fn _mm256_maskz_min_epu32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_min_epu32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let min = _mm256_min_epu32(a, b).as_u32x8();
         transmute(simd_select_bitmask(k, min, u32x8::ZERO))
@@ -3237,7 +3399,8 @@ pub fn _mm256_maskz_min_epu32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpminud))]
-pub fn _mm_mask_min_epu32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_min_epu32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let min = _mm_min_epu32(a, b).as_u32x4();
         transmute(simd_select_bitmask(k, min, src.as_u32x4()))
@@ -3251,7 +3414,8 @@ pub fn _mm_mask_min_epu32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) ->
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpminud))]
-pub fn _mm_maskz_min_epu32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_min_epu32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let min = _mm_min_epu32(a, b).as_u32x4();
         transmute(simd_select_bitmask(k, min, u32x4::ZERO))
@@ -3265,12 +3429,9 @@ pub fn _mm_maskz_min_epu32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpminuq))]
-pub fn _mm512_min_epu64(a: __m512i, b: __m512i) -> __m512i {
-    unsafe {
-        let a = a.as_u64x8();
-        let b = b.as_u64x8();
-        transmute(simd_select::<i64x8, _>(simd_lt(a, b), a, b))
-    }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_min_epu64(a: __m512i, b: __m512i) -> __m512i {
+    unsafe { simd_imin(a.as_u64x8(), b.as_u64x8()).as_m512i() }
 }
 
 /// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -3280,7 +3441,8 @@ pub fn _mm512_min_epu64(a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpminuq))]
-pub fn _mm512_mask_min_epu64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_min_epu64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let min = _mm512_min_epu64(a, b).as_u64x8();
         transmute(simd_select_bitmask(k, min, src.as_u64x8()))
@@ -3294,7 +3456,8 @@ pub fn _mm512_mask_min_epu64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i)
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpminuq))]
-pub fn _mm512_maskz_min_epu64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_min_epu64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let min = _mm512_min_epu64(a, b).as_u64x8();
         transmute(simd_select_bitmask(k, min, u64x8::ZERO))
@@ -3308,12 +3471,9 @@ pub fn _mm512_maskz_min_epu64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpminuq))]
-pub fn _mm256_min_epu64(a: __m256i, b: __m256i) -> __m256i {
-    unsafe {
-        let a = a.as_u64x4();
-        let b = b.as_u64x4();
-        transmute(simd_select::<i64x4, _>(simd_lt(a, b), a, b))
-    }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_min_epu64(a: __m256i, b: __m256i) -> __m256i {
+    unsafe { simd_imin(a.as_u64x4(), b.as_u64x4()).as_m256i() }
 }
 
 /// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -3323,7 +3483,8 @@ pub fn _mm256_min_epu64(a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpminuq))]
-pub fn _mm256_mask_min_epu64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_min_epu64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let min = _mm256_min_epu64(a, b).as_u64x4();
         transmute(simd_select_bitmask(k, min, src.as_u64x4()))
@@ -3337,7 +3498,8 @@ pub fn _mm256_mask_min_epu64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i)
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpminuq))]
-pub fn _mm256_maskz_min_epu64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_min_epu64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let min = _mm256_min_epu64(a, b).as_u64x4();
         transmute(simd_select_bitmask(k, min, u64x4::ZERO))
@@ -3351,12 +3513,9 @@ pub fn _mm256_maskz_min_epu64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpminuq))]
-pub fn _mm_min_epu64(a: __m128i, b: __m128i) -> __m128i {
-    unsafe {
-        let a = a.as_u64x2();
-        let b = b.as_u64x2();
-        transmute(simd_select::<i64x2, _>(simd_lt(a, b), a, b))
-    }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_min_epu64(a: __m128i, b: __m128i) -> __m128i {
+    unsafe { simd_imin(a.as_u64x2(), b.as_u64x2()).as_m128i() }
 }
 
 /// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -3366,7 +3525,8 @@ pub fn _mm_min_epu64(a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpminuq))]
-pub fn _mm_mask_min_epu64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_min_epu64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let min = _mm_min_epu64(a, b).as_u64x2();
         transmute(simd_select_bitmask(k, min, src.as_u64x2()))
@@ -3380,7 +3540,8 @@ pub fn _mm_mask_min_epu64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) ->
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpminuq))]
-pub fn _mm_maskz_min_epu64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_min_epu64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let min = _mm_min_epu64(a, b).as_u64x2();
         transmute(simd_select_bitmask(k, min, u64x2::ZERO))
@@ -3548,7 +3709,8 @@ pub fn _mm_maskz_sqrt_pd(k: __mmask8, a: __m128d) -> __m128d {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
-pub fn _mm512_fmadd_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_fmadd_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
     unsafe { simd_fma(a, b, c) }
 }
 
@@ -3559,7 +3721,8 @@ pub fn _mm512_fmadd_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
-pub fn _mm512_mask_fmadd_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_fmadd_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
     unsafe { simd_select_bitmask(k, _mm512_fmadd_ps(a, b, c), a) }
 }
 
@@ -3570,7 +3733,8 @@ pub fn _mm512_mask_fmadd_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
-pub fn _mm512_maskz_fmadd_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_fmadd_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
     unsafe { simd_select_bitmask(k, _mm512_fmadd_ps(a, b, c), _mm512_setzero_ps()) }
 }
 
@@ -3581,7 +3745,8 @@ pub fn _mm512_maskz_fmadd_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> _
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
-pub fn _mm512_mask3_fmadd_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask3_fmadd_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
     unsafe { simd_select_bitmask(k, _mm512_fmadd_ps(a, b, c), c) }
 }
 
@@ -3592,7 +3757,8 @@ pub fn _mm512_mask3_fmadd_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> _
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
-pub fn _mm256_mask_fmadd_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_fmadd_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
     unsafe { simd_select_bitmask(k, _mm256_fmadd_ps(a, b, c), a) }
 }
 
@@ -3603,7 +3769,8 @@ pub fn _mm256_mask_fmadd_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
-pub fn _mm256_maskz_fmadd_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_fmadd_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
     unsafe { simd_select_bitmask(k, _mm256_fmadd_ps(a, b, c), _mm256_setzero_ps()) }
 }
 
@@ -3614,7 +3781,8 @@ pub fn _mm256_maskz_fmadd_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
-pub fn _mm256_mask3_fmadd_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask3_fmadd_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
     unsafe { simd_select_bitmask(k, _mm256_fmadd_ps(a, b, c), c) }
 }
 
@@ -3625,7 +3793,8 @@ pub fn _mm256_mask3_fmadd_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
-pub fn _mm_mask_fmadd_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_fmadd_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
     unsafe { simd_select_bitmask(k, _mm_fmadd_ps(a, b, c), a) }
 }
 
@@ -3636,7 +3805,8 @@ pub fn _mm_mask_fmadd_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
-pub fn _mm_maskz_fmadd_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_fmadd_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
     unsafe { simd_select_bitmask(k, _mm_fmadd_ps(a, b, c), _mm_setzero_ps()) }
 }
 
@@ -3647,7 +3817,8 @@ pub fn _mm_maskz_fmadd_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m12
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
-pub fn _mm_mask3_fmadd_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask3_fmadd_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
     unsafe { simd_select_bitmask(k, _mm_fmadd_ps(a, b, c), c) }
 }
 
@@ -3658,7 +3829,8 @@ pub fn _mm_mask3_fmadd_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m12
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
-pub fn _mm512_fmadd_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_fmadd_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
     unsafe { simd_fma(a, b, c) }
 }
 
@@ -3669,7 +3841,8 @@ pub fn _mm512_fmadd_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
-pub fn _mm512_mask_fmadd_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_fmadd_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
     unsafe { simd_select_bitmask(k, _mm512_fmadd_pd(a, b, c), a) }
 }
 
@@ -3680,7 +3853,8 @@ pub fn _mm512_mask_fmadd_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) ->
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
-pub fn _mm512_maskz_fmadd_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_fmadd_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
     unsafe { simd_select_bitmask(k, _mm512_fmadd_pd(a, b, c), _mm512_setzero_pd()) }
 }
 
@@ -3691,7 +3865,8 @@ pub fn _mm512_maskz_fmadd_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) ->
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
-pub fn _mm512_mask3_fmadd_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask3_fmadd_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
     unsafe { simd_select_bitmask(k, _mm512_fmadd_pd(a, b, c), c) }
 }
 
@@ -3702,7 +3877,8 @@ pub fn _mm512_mask3_fmadd_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) ->
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
-pub fn _mm256_mask_fmadd_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_fmadd_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
     unsafe { simd_select_bitmask(k, _mm256_fmadd_pd(a, b, c), a) }
 }
 
@@ -3713,7 +3889,8 @@ pub fn _mm256_mask_fmadd_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) ->
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
-pub fn _mm256_maskz_fmadd_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_fmadd_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
     unsafe { simd_select_bitmask(k, _mm256_fmadd_pd(a, b, c), _mm256_setzero_pd()) }
 }
 
@@ -3724,7 +3901,8 @@ pub fn _mm256_maskz_fmadd_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) ->
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
-pub fn _mm256_mask3_fmadd_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask3_fmadd_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
     unsafe { simd_select_bitmask(k, _mm256_fmadd_pd(a, b, c), c) }
 }
 
@@ -3735,7 +3913,8 @@ pub fn _mm256_mask3_fmadd_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) ->
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
-pub fn _mm_mask_fmadd_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_fmadd_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
     unsafe { simd_select_bitmask(k, _mm_fmadd_pd(a, b, c), a) }
 }
 
@@ -3746,7 +3925,8 @@ pub fn _mm_mask_fmadd_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
-pub fn _mm_maskz_fmadd_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_fmadd_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
     unsafe { simd_select_bitmask(k, _mm_fmadd_pd(a, b, c), _mm_setzero_pd()) }
 }
 
@@ -3757,7 +3937,8 @@ pub fn _mm_maskz_fmadd_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
-pub fn _mm_mask3_fmadd_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask3_fmadd_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
     unsafe { simd_select_bitmask(k, _mm_fmadd_pd(a, b, c), c) }
 }
 
@@ -3768,7 +3949,8 @@ pub fn _mm_mask3_fmadd_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
-pub fn _mm512_fmsub_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_fmsub_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
     unsafe { simd_fma(a, b, simd_neg(c)) }
 }
 
@@ -3779,7 +3961,8 @@ pub fn _mm512_fmsub_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
-pub fn _mm512_mask_fmsub_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_fmsub_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
     unsafe { simd_select_bitmask(k, _mm512_fmsub_ps(a, b, c), a) }
 }
 
@@ -3790,7 +3973,8 @@ pub fn _mm512_mask_fmsub_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
-pub fn _mm512_maskz_fmsub_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_fmsub_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
     unsafe { simd_select_bitmask(k, _mm512_fmsub_ps(a, b, c), _mm512_setzero_ps()) }
 }
 
@@ -3801,7 +3985,8 @@ pub fn _mm512_maskz_fmsub_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> _
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
-pub fn _mm512_mask3_fmsub_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask3_fmsub_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
     unsafe { simd_select_bitmask(k, _mm512_fmsub_ps(a, b, c), c) }
 }
 
@@ -3812,7 +3997,8 @@ pub fn _mm512_mask3_fmsub_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> _
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
-pub fn _mm256_mask_fmsub_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_fmsub_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
     unsafe { simd_select_bitmask(k, _mm256_fmsub_ps(a, b, c), a) }
 }
 
@@ -3823,7 +4009,8 @@ pub fn _mm256_mask_fmsub_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
-pub fn _mm256_maskz_fmsub_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_fmsub_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
     unsafe { simd_select_bitmask(k, _mm256_fmsub_ps(a, b, c), _mm256_setzero_ps()) }
 }
 
@@ -3834,7 +4021,8 @@ pub fn _mm256_maskz_fmsub_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
-pub fn _mm256_mask3_fmsub_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask3_fmsub_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
     unsafe { simd_select_bitmask(k, _mm256_fmsub_ps(a, b, c), c) }
 }
 
@@ -3845,7 +4033,8 @@ pub fn _mm256_mask3_fmsub_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
-pub fn _mm_mask_fmsub_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_fmsub_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
     unsafe { simd_select_bitmask(k, _mm_fmsub_ps(a, b, c), a) }
 }
 
@@ -3856,7 +4045,8 @@ pub fn _mm_mask_fmsub_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
-pub fn _mm_maskz_fmsub_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_fmsub_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
     unsafe { simd_select_bitmask(k, _mm_fmsub_ps(a, b, c), _mm_setzero_ps()) }
 }
 
@@ -3867,7 +4057,8 @@ pub fn _mm_maskz_fmsub_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m12
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
-pub fn _mm_mask3_fmsub_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask3_fmsub_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
     unsafe { simd_select_bitmask(k, _mm_fmsub_ps(a, b, c), c) }
 }
 
@@ -3878,7 +4069,8 @@ pub fn _mm_mask3_fmsub_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m12
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
-pub fn _mm512_fmsub_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_fmsub_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
     unsafe { simd_fma(a, b, simd_neg(c)) }
 }
 
@@ -3889,7 +4081,8 @@ pub fn _mm512_fmsub_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
-pub fn _mm512_mask_fmsub_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_fmsub_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
     unsafe { simd_select_bitmask(k, _mm512_fmsub_pd(a, b, c), a) }
 }
 
@@ -3900,7 +4093,8 @@ pub fn _mm512_mask_fmsub_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) ->
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
-pub fn _mm512_maskz_fmsub_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_fmsub_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
     unsafe { simd_select_bitmask(k, _mm512_fmsub_pd(a, b, c), _mm512_setzero_pd()) }
 }
 
@@ -3911,7 +4105,8 @@ pub fn _mm512_maskz_fmsub_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) ->
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
-pub fn _mm512_mask3_fmsub_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask3_fmsub_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
     unsafe { simd_select_bitmask(k, _mm512_fmsub_pd(a, b, c), c) }
 }
 
@@ -3922,7 +4117,8 @@ pub fn _mm512_mask3_fmsub_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) ->
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
-pub fn _mm256_mask_fmsub_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_fmsub_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
     unsafe { simd_select_bitmask(k, _mm256_fmsub_pd(a, b, c), a) }
 }
 
@@ -3933,7 +4129,8 @@ pub fn _mm256_mask_fmsub_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) ->
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
-pub fn _mm256_maskz_fmsub_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_fmsub_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
     unsafe { simd_select_bitmask(k, _mm256_fmsub_pd(a, b, c), _mm256_setzero_pd()) }
 }
 
@@ -3944,7 +4141,8 @@ pub fn _mm256_maskz_fmsub_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) ->
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
-pub fn _mm256_mask3_fmsub_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask3_fmsub_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
     unsafe { simd_select_bitmask(k, _mm256_fmsub_pd(a, b, c), c) }
 }
 
@@ -3955,7 +4153,8 @@ pub fn _mm256_mask3_fmsub_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) ->
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
-pub fn _mm_mask_fmsub_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_fmsub_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
     unsafe { simd_select_bitmask(k, _mm_fmsub_pd(a, b, c), a) }
 }
 
@@ -3966,7 +4165,8 @@ pub fn _mm_mask_fmsub_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
-pub fn _mm_maskz_fmsub_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_fmsub_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
     unsafe { simd_select_bitmask(k, _mm_fmsub_pd(a, b, c), _mm_setzero_pd()) }
 }
 
@@ -3977,7 +4177,8 @@ pub fn _mm_maskz_fmsub_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
-pub fn _mm_mask3_fmsub_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask3_fmsub_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
     unsafe { simd_select_bitmask(k, _mm_fmsub_pd(a, b, c), c) }
 }
 
@@ -3988,7 +4189,8 @@ pub fn _mm_mask3_fmsub_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
-pub fn _mm512_fmaddsub_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_fmaddsub_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
     unsafe {
         let add = simd_fma(a, b, c);
         let sub = simd_fma(a, b, simd_neg(c));
@@ -4007,7 +4209,8 @@ pub fn _mm512_fmaddsub_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
-pub fn _mm512_mask_fmaddsub_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_fmaddsub_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
     unsafe { simd_select_bitmask(k, _mm512_fmaddsub_ps(a, b, c), a) }
 }
 
@@ -4018,7 +4221,8 @@ pub fn _mm512_mask_fmaddsub_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) ->
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
-pub fn _mm512_maskz_fmaddsub_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_fmaddsub_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
     unsafe { simd_select_bitmask(k, _mm512_fmaddsub_ps(a, b, c), _mm512_setzero_ps()) }
 }
 
@@ -4029,7 +4233,8 @@ pub fn _mm512_maskz_fmaddsub_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
-pub fn _mm512_mask3_fmaddsub_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask3_fmaddsub_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
     unsafe { simd_select_bitmask(k, _mm512_fmaddsub_ps(a, b, c), c) }
 }
 
@@ -4040,7 +4245,8 @@ pub fn _mm512_mask3_fmaddsub_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
-pub fn _mm256_mask_fmaddsub_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_fmaddsub_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
     unsafe { simd_select_bitmask(k, _mm256_fmaddsub_ps(a, b, c), a) }
 }
 
@@ -4051,7 +4257,8 @@ pub fn _mm256_mask_fmaddsub_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) ->
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
-pub fn _mm256_maskz_fmaddsub_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_fmaddsub_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
     unsafe { simd_select_bitmask(k, _mm256_fmaddsub_ps(a, b, c), _mm256_setzero_ps()) }
 }
 
@@ -4062,7 +4269,8 @@ pub fn _mm256_maskz_fmaddsub_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) ->
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
-pub fn _mm256_mask3_fmaddsub_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask3_fmaddsub_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
     unsafe { simd_select_bitmask(k, _mm256_fmaddsub_ps(a, b, c), c) }
 }
 
@@ -4073,7 +4281,8 @@ pub fn _mm256_mask3_fmaddsub_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) ->
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
-pub fn _mm_mask_fmaddsub_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_fmaddsub_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
     unsafe { simd_select_bitmask(k, _mm_fmaddsub_ps(a, b, c), a) }
 }
 
@@ -4084,7 +4293,8 @@ pub fn _mm_mask_fmaddsub_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
-pub fn _mm_maskz_fmaddsub_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_fmaddsub_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
     unsafe { simd_select_bitmask(k, _mm_fmaddsub_ps(a, b, c), _mm_setzero_ps()) }
 }
 
@@ -4095,7 +4305,8 @@ pub fn _mm_maskz_fmaddsub_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
-pub fn _mm_mask3_fmaddsub_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask3_fmaddsub_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
     unsafe { simd_select_bitmask(k, _mm_fmaddsub_ps(a, b, c), c) }
 }
 
@@ -4106,7 +4317,8 @@ pub fn _mm_mask3_fmaddsub_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
-pub fn _mm512_fmaddsub_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_fmaddsub_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
     unsafe {
         let add = simd_fma(a, b, c);
         let sub = simd_fma(a, b, simd_neg(c));
@@ -4121,7 +4333,8 @@ pub fn _mm512_fmaddsub_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
-pub fn _mm512_mask_fmaddsub_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_fmaddsub_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
     unsafe { simd_select_bitmask(k, _mm512_fmaddsub_pd(a, b, c), a) }
 }
 
@@ -4132,7 +4345,8 @@ pub fn _mm512_mask_fmaddsub_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d)
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
-pub fn _mm512_maskz_fmaddsub_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_fmaddsub_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
     unsafe { simd_select_bitmask(k, _mm512_fmaddsub_pd(a, b, c), _mm512_setzero_pd()) }
 }
 
@@ -4143,7 +4357,8 @@ pub fn _mm512_maskz_fmaddsub_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d)
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
-pub fn _mm512_mask3_fmaddsub_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask3_fmaddsub_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
     unsafe { simd_select_bitmask(k, _mm512_fmaddsub_pd(a, b, c), c) }
 }
 
@@ -4154,7 +4369,8 @@ pub fn _mm512_mask3_fmaddsub_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8)
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
-pub fn _mm256_mask_fmaddsub_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_fmaddsub_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
     unsafe { simd_select_bitmask(k, _mm256_fmaddsub_pd(a, b, c), a) }
 }
 
@@ -4165,7 +4381,8 @@ pub fn _mm256_mask_fmaddsub_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d)
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
-pub fn _mm256_maskz_fmaddsub_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_fmaddsub_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
     unsafe { simd_select_bitmask(k, _mm256_fmaddsub_pd(a, b, c), _mm256_setzero_pd()) }
 }
 
@@ -4176,7 +4393,8 @@ pub fn _mm256_maskz_fmaddsub_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d)
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
-pub fn _mm256_mask3_fmaddsub_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask3_fmaddsub_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
     unsafe { simd_select_bitmask(k, _mm256_fmaddsub_pd(a, b, c), c) }
 }
 
@@ -4187,7 +4405,8 @@ pub fn _mm256_mask3_fmaddsub_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8)
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
-pub fn _mm_mask_fmaddsub_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_fmaddsub_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
     unsafe { simd_select_bitmask(k, _mm_fmaddsub_pd(a, b, c), a) }
 }
 
@@ -4198,7 +4417,8 @@ pub fn _mm_mask_fmaddsub_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) ->
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
-pub fn _mm_maskz_fmaddsub_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_fmaddsub_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
     unsafe { simd_select_bitmask(k, _mm_fmaddsub_pd(a, b, c), _mm_setzero_pd()) }
 }
 
@@ -4209,7 +4429,8 @@ pub fn _mm_maskz_fmaddsub_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) ->
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
-pub fn _mm_mask3_fmaddsub_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask3_fmaddsub_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
     unsafe { simd_select_bitmask(k, _mm_fmaddsub_pd(a, b, c), c) }
 }
 
@@ -4220,7 +4441,8 @@ pub fn _mm_mask3_fmaddsub_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) ->
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
-pub fn _mm512_fmsubadd_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_fmsubadd_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
     unsafe {
         let add = simd_fma(a, b, c);
         let sub = simd_fma(a, b, simd_neg(c));
@@ -4239,7 +4461,8 @@ pub fn _mm512_fmsubadd_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
-pub fn _mm512_mask_fmsubadd_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_fmsubadd_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
     unsafe { simd_select_bitmask(k, _mm512_fmsubadd_ps(a, b, c), a) }
 }
 
@@ -4250,7 +4473,8 @@ pub fn _mm512_mask_fmsubadd_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) ->
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
-pub fn _mm512_maskz_fmsubadd_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_fmsubadd_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
     unsafe { simd_select_bitmask(k, _mm512_fmsubadd_ps(a, b, c), _mm512_setzero_ps()) }
 }
 
@@ -4261,7 +4485,8 @@ pub fn _mm512_maskz_fmsubadd_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
-pub fn _mm512_mask3_fmsubadd_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask3_fmsubadd_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
     unsafe { simd_select_bitmask(k, _mm512_fmsubadd_ps(a, b, c), c) }
 }
 
@@ -4272,7 +4497,8 @@ pub fn _mm512_mask3_fmsubadd_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
-pub fn _mm256_mask_fmsubadd_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_fmsubadd_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
     unsafe { simd_select_bitmask(k, _mm256_fmsubadd_ps(a, b, c), a) }
 }
 
@@ -4283,7 +4509,8 @@ pub fn _mm256_mask_fmsubadd_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) ->
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
-pub fn _mm256_maskz_fmsubadd_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_fmsubadd_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
     unsafe { simd_select_bitmask(k, _mm256_fmsubadd_ps(a, b, c), _mm256_setzero_ps()) }
 }
 
@@ -4294,7 +4521,8 @@ pub fn _mm256_maskz_fmsubadd_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) ->
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
-pub fn _mm256_mask3_fmsubadd_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask3_fmsubadd_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
     unsafe { simd_select_bitmask(k, _mm256_fmsubadd_ps(a, b, c), c) }
 }
 
@@ -4305,7 +4533,8 @@ pub fn _mm256_mask3_fmsubadd_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) ->
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
-pub fn _mm_mask_fmsubadd_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_fmsubadd_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
     unsafe { simd_select_bitmask(k, _mm_fmsubadd_ps(a, b, c), a) }
 }
 
@@ -4316,7 +4545,8 @@ pub fn _mm_mask_fmsubadd_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
-pub fn _mm_maskz_fmsubadd_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_fmsubadd_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
     unsafe { simd_select_bitmask(k, _mm_fmsubadd_ps(a, b, c), _mm_setzero_ps()) }
 }
 
@@ -4327,7 +4557,8 @@ pub fn _mm_maskz_fmsubadd_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
-pub fn _mm_mask3_fmsubadd_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask3_fmsubadd_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
     unsafe { simd_select_bitmask(k, _mm_fmsubadd_ps(a, b, c), c) }
 }
 
@@ -4338,7 +4569,8 @@ pub fn _mm_mask3_fmsubadd_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
-pub fn _mm512_fmsubadd_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_fmsubadd_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
     unsafe {
         let add = simd_fma(a, b, c);
         let sub = simd_fma(a, b, simd_neg(c));
@@ -4353,7 +4585,8 @@ pub fn _mm512_fmsubadd_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
-pub fn _mm512_mask_fmsubadd_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_fmsubadd_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
     unsafe { simd_select_bitmask(k, _mm512_fmsubadd_pd(a, b, c), a) }
 }
 
@@ -4364,7 +4597,8 @@ pub fn _mm512_mask_fmsubadd_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d)
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
-pub fn _mm512_maskz_fmsubadd_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_fmsubadd_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
     unsafe { simd_select_bitmask(k, _mm512_fmsubadd_pd(a, b, c), _mm512_setzero_pd()) }
 }
 
@@ -4375,7 +4609,8 @@ pub fn _mm512_maskz_fmsubadd_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d)
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
-pub fn _mm512_mask3_fmsubadd_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask3_fmsubadd_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
     unsafe { simd_select_bitmask(k, _mm512_fmsubadd_pd(a, b, c), c) }
 }
 
@@ -4386,7 +4621,8 @@ pub fn _mm512_mask3_fmsubadd_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8)
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
-pub fn _mm256_mask_fmsubadd_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_fmsubadd_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
     unsafe { simd_select_bitmask(k, _mm256_fmsubadd_pd(a, b, c), a) }
 }
 
@@ -4397,7 +4633,8 @@ pub fn _mm256_mask_fmsubadd_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d)
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
-pub fn _mm256_maskz_fmsubadd_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_fmsubadd_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
     unsafe { simd_select_bitmask(k, _mm256_fmsubadd_pd(a, b, c), _mm256_setzero_pd()) }
 }
 
@@ -4408,7 +4645,8 @@ pub fn _mm256_maskz_fmsubadd_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d)
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
-pub fn _mm256_mask3_fmsubadd_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask3_fmsubadd_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
     unsafe { simd_select_bitmask(k, _mm256_fmsubadd_pd(a, b, c), c) }
 }
 
@@ -4419,7 +4657,8 @@ pub fn _mm256_mask3_fmsubadd_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8)
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
-pub fn _mm_mask_fmsubadd_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_fmsubadd_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
     unsafe { simd_select_bitmask(k, _mm_fmsubadd_pd(a, b, c), a) }
 }
 
@@ -4430,7 +4669,8 @@ pub fn _mm_mask_fmsubadd_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) ->
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
-pub fn _mm_maskz_fmsubadd_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_fmsubadd_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
     unsafe { simd_select_bitmask(k, _mm_fmsubadd_pd(a, b, c), _mm_setzero_pd()) }
 }
 
@@ -4441,7 +4681,8 @@ pub fn _mm_maskz_fmsubadd_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) ->
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
-pub fn _mm_mask3_fmsubadd_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask3_fmsubadd_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
     unsafe { simd_select_bitmask(k, _mm_fmsubadd_pd(a, b, c), c) }
 }
 
@@ -4452,7 +4693,8 @@ pub fn _mm_mask3_fmsubadd_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) ->
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
-pub fn _mm512_fnmadd_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_fnmadd_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
     unsafe { simd_fma(simd_neg(a), b, c) }
 }
 
@@ -4463,7 +4705,8 @@ pub fn _mm512_fnmadd_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
-pub fn _mm512_mask_fnmadd_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_fnmadd_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
     unsafe { simd_select_bitmask(k, _mm512_fnmadd_ps(a, b, c), a) }
 }
 
@@ -4474,7 +4717,8 @@ pub fn _mm512_mask_fnmadd_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> _
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
-pub fn _mm512_maskz_fnmadd_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_fnmadd_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
     unsafe { simd_select_bitmask(k, _mm512_fnmadd_ps(a, b, c), _mm512_setzero_ps()) }
 }
 
@@ -4485,7 +4729,8 @@ pub fn _mm512_maskz_fnmadd_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) ->
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
-pub fn _mm512_mask3_fnmadd_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask3_fnmadd_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
     unsafe { simd_select_bitmask(k, _mm512_fnmadd_ps(a, b, c), c) }
 }
 
@@ -4496,7 +4741,8 @@ pub fn _mm512_mask3_fnmadd_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) ->
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
-pub fn _mm256_mask_fnmadd_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_fnmadd_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
     unsafe { simd_select_bitmask(k, _mm256_fnmadd_ps(a, b, c), a) }
 }
 
@@ -4507,7 +4753,8 @@ pub fn _mm256_mask_fnmadd_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
-pub fn _mm256_maskz_fnmadd_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_fnmadd_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
     unsafe { simd_select_bitmask(k, _mm256_fnmadd_ps(a, b, c), _mm256_setzero_ps()) }
 }
 
@@ -4518,7 +4765,8 @@ pub fn _mm256_maskz_fnmadd_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> _
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
-pub fn _mm256_mask3_fnmadd_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask3_fnmadd_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
     unsafe { simd_select_bitmask(k, _mm256_fnmadd_ps(a, b, c), c) }
 }
 
@@ -4529,7 +4777,8 @@ pub fn _mm256_mask3_fnmadd_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> _
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
-pub fn _mm_mask_fnmadd_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_fnmadd_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
     unsafe { simd_select_bitmask(k, _mm_fnmadd_ps(a, b, c), a) }
 }
 
@@ -4540,7 +4789,8 @@ pub fn _mm_mask_fnmadd_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m12
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
-pub fn _mm_maskz_fnmadd_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_fnmadd_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
     unsafe { simd_select_bitmask(k, _mm_fnmadd_ps(a, b, c), _mm_setzero_ps()) }
 }
 
@@ -4551,7 +4801,8 @@ pub fn _mm_maskz_fnmadd_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m1
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
-pub fn _mm_mask3_fnmadd_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask3_fnmadd_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
     unsafe { simd_select_bitmask(k, _mm_fnmadd_ps(a, b, c), c) }
 }
 
@@ -4562,7 +4813,8 @@ pub fn _mm_mask3_fnmadd_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m1
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
-pub fn _mm512_fnmadd_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_fnmadd_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
     unsafe { simd_fma(simd_neg(a), b, c) }
 }
 
@@ -4573,7 +4825,8 @@ pub fn _mm512_fnmadd_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
-pub fn _mm512_mask_fnmadd_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_fnmadd_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
     unsafe { simd_select_bitmask(k, _mm512_fnmadd_pd(a, b, c), a) }
 }
 
@@ -4584,7 +4837,8 @@ pub fn _mm512_mask_fnmadd_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) ->
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
-pub fn _mm512_maskz_fnmadd_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_fnmadd_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
     unsafe { simd_select_bitmask(k, _mm512_fnmadd_pd(a, b, c), _mm512_setzero_pd()) }
 }
 
@@ -4595,7 +4849,8 @@ pub fn _mm512_maskz_fnmadd_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
-pub fn _mm512_mask3_fnmadd_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask3_fnmadd_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
     unsafe { simd_select_bitmask(k, _mm512_fnmadd_pd(a, b, c), c) }
 }
 
@@ -4606,7 +4861,8 @@ pub fn _mm512_mask3_fnmadd_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
-pub fn _mm256_mask_fnmadd_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_fnmadd_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
     unsafe { simd_select_bitmask(k, _mm256_fnmadd_pd(a, b, c), a) }
 }
 
@@ -4617,7 +4873,8 @@ pub fn _mm256_mask_fnmadd_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) ->
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
-pub fn _mm256_maskz_fnmadd_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_fnmadd_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
     unsafe { simd_select_bitmask(k, _mm256_fnmadd_pd(a, b, c), _mm256_setzero_pd()) }
 }
 
@@ -4628,7 +4885,8 @@ pub fn _mm256_maskz_fnmadd_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
-pub fn _mm256_mask3_fnmadd_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask3_fnmadd_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
     unsafe { simd_select_bitmask(k, _mm256_fnmadd_pd(a, b, c), c) }
 }
 
@@ -4639,7 +4897,8 @@ pub fn _mm256_mask3_fnmadd_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
-pub fn _mm_mask_fnmadd_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_fnmadd_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
     unsafe { simd_select_bitmask(k, _mm_fnmadd_pd(a, b, c), a) }
 }
 
@@ -4650,7 +4909,8 @@ pub fn _mm_mask_fnmadd_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
-pub fn _mm_maskz_fnmadd_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_fnmadd_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
     unsafe { simd_select_bitmask(k, _mm_fnmadd_pd(a, b, c), _mm_setzero_pd()) }
 }
 
@@ -4661,7 +4921,8 @@ pub fn _mm_maskz_fnmadd_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> _
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
-pub fn _mm_mask3_fnmadd_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask3_fnmadd_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
     unsafe { simd_select_bitmask(k, _mm_fnmadd_pd(a, b, c), c) }
 }
 
@@ -4672,7 +4933,8 @@ pub fn _mm_mask3_fnmadd_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> _
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
-pub fn _mm512_fnmsub_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_fnmsub_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
     unsafe { simd_fma(simd_neg(a), b, simd_neg(c)) }
 }
 
@@ -4683,7 +4945,8 @@ pub fn _mm512_fnmsub_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
-pub fn _mm512_mask_fnmsub_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_fnmsub_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
     unsafe { simd_select_bitmask(k, _mm512_fnmsub_ps(a, b, c), a) }
 }
 
@@ -4694,7 +4957,8 @@ pub fn _mm512_mask_fnmsub_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> _
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
-pub fn _mm512_maskz_fnmsub_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_fnmsub_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
     unsafe { simd_select_bitmask(k, _mm512_fnmsub_ps(a, b, c), _mm512_setzero_ps()) }
 }
 
@@ -4705,7 +4969,8 @@ pub fn _mm512_maskz_fnmsub_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) ->
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
-pub fn _mm512_mask3_fnmsub_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask3_fnmsub_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
     unsafe { simd_select_bitmask(k, _mm512_fnmsub_ps(a, b, c), c) }
 }
 
@@ -4716,7 +4981,8 @@ pub fn _mm512_mask3_fnmsub_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) ->
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
-pub fn _mm256_mask_fnmsub_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_fnmsub_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
     unsafe { simd_select_bitmask(k, _mm256_fnmsub_ps(a, b, c), a) }
 }
 
@@ -4727,7 +4993,8 @@ pub fn _mm256_mask_fnmsub_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
-pub fn _mm256_maskz_fnmsub_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_fnmsub_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
     unsafe { simd_select_bitmask(k, _mm256_fnmsub_ps(a, b, c), _mm256_setzero_ps()) }
 }
 
@@ -4738,7 +5005,8 @@ pub fn _mm256_maskz_fnmsub_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> _
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
-pub fn _mm256_mask3_fnmsub_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask3_fnmsub_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
     unsafe { simd_select_bitmask(k, _mm256_fnmsub_ps(a, b, c), c) }
 }
 
@@ -4749,7 +5017,8 @@ pub fn _mm256_mask3_fnmsub_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> _
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
-pub fn _mm_mask_fnmsub_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_fnmsub_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
     unsafe { simd_select_bitmask(k, _mm_fnmsub_ps(a, b, c), a) }
 }
 
@@ -4760,7 +5029,8 @@ pub fn _mm_mask_fnmsub_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m12
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
-pub fn _mm_maskz_fnmsub_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_fnmsub_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
     unsafe { simd_select_bitmask(k, _mm_fnmsub_ps(a, b, c), _mm_setzero_ps()) }
 }
 
@@ -4771,7 +5041,8 @@ pub fn _mm_maskz_fnmsub_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m1
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
-pub fn _mm_mask3_fnmsub_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask3_fnmsub_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
     unsafe { simd_select_bitmask(k, _mm_fnmsub_ps(a, b, c), c) }
 }
 
@@ -4782,7 +5053,8 @@ pub fn _mm_mask3_fnmsub_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m1
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
-pub fn _mm512_fnmsub_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_fnmsub_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
     unsafe { simd_fma(simd_neg(a), b, simd_neg(c)) }
 }
 
@@ -4793,7 +5065,8 @@ pub fn _mm512_fnmsub_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
-pub fn _mm512_mask_fnmsub_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_fnmsub_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
     unsafe { simd_select_bitmask(k, _mm512_fnmsub_pd(a, b, c), a) }
 }
 
@@ -4804,7 +5077,8 @@ pub fn _mm512_mask_fnmsub_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) ->
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
-pub fn _mm512_maskz_fnmsub_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_fnmsub_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
     unsafe { simd_select_bitmask(k, _mm512_fnmsub_pd(a, b, c), _mm512_setzero_pd()) }
 }
 
@@ -4815,7 +5089,8 @@ pub fn _mm512_maskz_fnmsub_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
-pub fn _mm512_mask3_fnmsub_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask3_fnmsub_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
     unsafe { simd_select_bitmask(k, _mm512_fnmsub_pd(a, b, c), c) }
 }
 
@@ -4826,7 +5101,8 @@ pub fn _mm512_mask3_fnmsub_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
-pub fn _mm256_mask_fnmsub_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_fnmsub_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
     unsafe { simd_select_bitmask(k, _mm256_fnmsub_pd(a, b, c), a) }
 }
 
@@ -4837,7 +5113,8 @@ pub fn _mm256_mask_fnmsub_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) ->
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
-pub fn _mm256_maskz_fnmsub_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_fnmsub_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
     unsafe { simd_select_bitmask(k, _mm256_fnmsub_pd(a, b, c), _mm256_setzero_pd()) }
 }
 
@@ -4848,7 +5125,8 @@ pub fn _mm256_maskz_fnmsub_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
-pub fn _mm256_mask3_fnmsub_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask3_fnmsub_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
     unsafe { simd_select_bitmask(k, _mm256_fnmsub_pd(a, b, c), c) }
 }
 
@@ -4859,7 +5137,8 @@ pub fn _mm256_mask3_fnmsub_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
-pub fn _mm_mask_fnmsub_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_fnmsub_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
     unsafe { simd_select_bitmask(k, _mm_fnmsub_pd(a, b, c), a) }
 }
 
@@ -4870,7 +5149,8 @@ pub fn _mm_mask_fnmsub_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
-pub fn _mm_maskz_fnmsub_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_fnmsub_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
     unsafe { simd_select_bitmask(k, _mm_fnmsub_pd(a, b, c), _mm_setzero_pd()) }
 }
 
@@ -4881,7 +5161,8 @@ pub fn _mm_maskz_fnmsub_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> _
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
-pub fn _mm_mask3_fnmsub_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask3_fnmsub_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
     unsafe { simd_select_bitmask(k, _mm_fnmsub_pd(a, b, c), c) }
 }
 
@@ -11213,10 +11494,7 @@ pub fn _mm256_maskz_cvtpd_ps(k: __mmask8, a: __m256d) -> __m128 {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vcvtpd2ps))]
 pub fn _mm_mask_cvtpd_ps(src: __m128, k: __mmask8, a: __m128d) -> __m128 {
-    unsafe {
-        let convert = _mm_cvtpd_ps(a);
-        transmute(simd_select_bitmask(k, convert.as_f32x4(), src.as_f32x4()))
-    }
+    unsafe { vcvtpd2ps128(a.as_f64x2(), src.as_f32x4(), k).as_m128() }
 }
 
 /// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -11323,10 +11601,7 @@ pub fn _mm256_maskz_cvtpd_epi32(k: __mmask8, a: __m256d) -> __m128i {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vcvtpd2dq))]
 pub fn _mm_mask_cvtpd_epi32(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
-    unsafe {
-        let convert = _mm_cvtpd_epi32(a);
-        transmute(simd_select_bitmask(k, convert.as_i32x4(), src.as_i32x4()))
-    }
+    unsafe { vcvtpd2dq128(a.as_f64x2(), src.as_i32x4(), k).as_m128i() }
 }
 
 /// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -11516,7 +11791,8 @@ pub fn _mm512_mask_cvtpd_pslo(src: __m512, k: __mmask8, v2: __m512d) -> __m512 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovsxbd))]
-pub fn _mm512_cvtepi8_epi32(a: __m128i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_cvtepi8_epi32(a: __m128i) -> __m512i {
     unsafe {
         let a = a.as_i8x16();
         transmute::<i32x16, _>(simd_cast(a))
@@ -11530,7 +11806,8 @@ pub fn _mm512_cvtepi8_epi32(a: __m128i) -> __m512i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovsxbd))]
-pub fn _mm512_mask_cvtepi8_epi32(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_cvtepi8_epi32(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
     unsafe {
         let convert = _mm512_cvtepi8_epi32(a).as_i32x16();
         transmute(simd_select_bitmask(k, convert, src.as_i32x16()))
@@ -11544,7 +11821,8 @@ pub fn _mm512_mask_cvtepi8_epi32(src: __m512i, k: __mmask16, a: __m128i) -> __m5
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovsxbd))]
-pub fn _mm512_maskz_cvtepi8_epi32(k: __mmask16, a: __m128i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_cvtepi8_epi32(k: __mmask16, a: __m128i) -> __m512i {
     unsafe {
         let convert = _mm512_cvtepi8_epi32(a).as_i32x16();
         transmute(simd_select_bitmask(k, convert, i32x16::ZERO))
@@ -11558,7 +11836,8 @@ pub fn _mm512_maskz_cvtepi8_epi32(k: __mmask16, a: __m128i) -> __m512i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovsxbd))]
-pub fn _mm256_mask_cvtepi8_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_cvtepi8_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
     unsafe {
         let convert = _mm256_cvtepi8_epi32(a).as_i32x8();
         transmute(simd_select_bitmask(k, convert, src.as_i32x8()))
@@ -11572,7 +11851,8 @@ pub fn _mm256_mask_cvtepi8_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m25
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovsxbd))]
-pub fn _mm256_maskz_cvtepi8_epi32(k: __mmask8, a: __m128i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_cvtepi8_epi32(k: __mmask8, a: __m128i) -> __m256i {
     unsafe {
         let convert = _mm256_cvtepi8_epi32(a).as_i32x8();
         transmute(simd_select_bitmask(k, convert, i32x8::ZERO))
@@ -11586,7 +11866,8 @@ pub fn _mm256_maskz_cvtepi8_epi32(k: __mmask8, a: __m128i) -> __m256i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovsxbd))]
-pub fn _mm_mask_cvtepi8_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_cvtepi8_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
     unsafe {
         let convert = _mm_cvtepi8_epi32(a).as_i32x4();
         transmute(simd_select_bitmask(k, convert, src.as_i32x4()))
@@ -11600,7 +11881,8 @@ pub fn _mm_mask_cvtepi8_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovsxbd))]
-pub fn _mm_maskz_cvtepi8_epi32(k: __mmask8, a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_cvtepi8_epi32(k: __mmask8, a: __m128i) -> __m128i {
     unsafe {
         let convert = _mm_cvtepi8_epi32(a).as_i32x4();
         transmute(simd_select_bitmask(k, convert, i32x4::ZERO))
@@ -11614,7 +11896,8 @@ pub fn _mm_maskz_cvtepi8_epi32(k: __mmask8, a: __m128i) -> __m128i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovsxbq))]
-pub fn _mm512_cvtepi8_epi64(a: __m128i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_cvtepi8_epi64(a: __m128i) -> __m512i {
     unsafe {
         let a = a.as_i8x16();
         let v64: i8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
@@ -11629,7 +11912,8 @@ pub fn _mm512_cvtepi8_epi64(a: __m128i) -> __m512i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovsxbq))]
-pub fn _mm512_mask_cvtepi8_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_cvtepi8_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
     unsafe {
         let convert = _mm512_cvtepi8_epi64(a).as_i64x8();
         transmute(simd_select_bitmask(k, convert, src.as_i64x8()))
@@ -11643,7 +11927,8 @@ pub fn _mm512_mask_cvtepi8_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m51
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovsxbq))]
-pub fn _mm512_maskz_cvtepi8_epi64(k: __mmask8, a: __m128i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_cvtepi8_epi64(k: __mmask8, a: __m128i) -> __m512i {
     unsafe {
         let convert = _mm512_cvtepi8_epi64(a).as_i64x8();
         transmute(simd_select_bitmask(k, convert, i64x8::ZERO))
@@ -11657,7 +11942,8 @@ pub fn _mm512_maskz_cvtepi8_epi64(k: __mmask8, a: __m128i) -> __m512i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovsxbq))]
-pub fn _mm256_mask_cvtepi8_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_cvtepi8_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
     unsafe {
         let convert = _mm256_cvtepi8_epi64(a).as_i64x4();
         transmute(simd_select_bitmask(k, convert, src.as_i64x4()))
@@ -11671,7 +11957,8 @@ pub fn _mm256_mask_cvtepi8_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m25
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovsxbq))]
-pub fn _mm256_maskz_cvtepi8_epi64(k: __mmask8, a: __m128i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_cvtepi8_epi64(k: __mmask8, a: __m128i) -> __m256i {
     unsafe {
         let convert = _mm256_cvtepi8_epi64(a).as_i64x4();
         transmute(simd_select_bitmask(k, convert, i64x4::ZERO))
@@ -11685,7 +11972,8 @@ pub fn _mm256_maskz_cvtepi8_epi64(k: __mmask8, a: __m128i) -> __m256i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovsxbq))]
-pub fn _mm_mask_cvtepi8_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_cvtepi8_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
     unsafe {
         let convert = _mm_cvtepi8_epi64(a).as_i64x2();
         transmute(simd_select_bitmask(k, convert, src.as_i64x2()))
@@ -11699,7 +11987,8 @@ pub fn _mm_mask_cvtepi8_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovsxbq))]
-pub fn _mm_maskz_cvtepi8_epi64(k: __mmask8, a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_cvtepi8_epi64(k: __mmask8, a: __m128i) -> __m128i {
     unsafe {
         let convert = _mm_cvtepi8_epi64(a).as_i64x2();
         transmute(simd_select_bitmask(k, convert, i64x2::ZERO))
@@ -11713,7 +12002,8 @@ pub fn _mm_maskz_cvtepi8_epi64(k: __mmask8, a: __m128i) -> __m128i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovzxbd))]
-pub fn _mm512_cvtepu8_epi32(a: __m128i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_cvtepu8_epi32(a: __m128i) -> __m512i {
     unsafe {
         let a = a.as_u8x16();
         transmute::<i32x16, _>(simd_cast(a))
@@ -11727,7 +12017,8 @@ pub fn _mm512_cvtepu8_epi32(a: __m128i) -> __m512i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovzxbd))]
-pub fn _mm512_mask_cvtepu8_epi32(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_cvtepu8_epi32(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
     unsafe {
         let convert = _mm512_cvtepu8_epi32(a).as_i32x16();
         transmute(simd_select_bitmask(k, convert, src.as_i32x16()))
@@ -11741,7 +12032,8 @@ pub fn _mm512_mask_cvtepu8_epi32(src: __m512i, k: __mmask16, a: __m128i) -> __m5
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovzxbd))]
-pub fn _mm512_maskz_cvtepu8_epi32(k: __mmask16, a: __m128i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_cvtepu8_epi32(k: __mmask16, a: __m128i) -> __m512i {
     unsafe {
         let convert = _mm512_cvtepu8_epi32(a).as_i32x16();
         transmute(simd_select_bitmask(k, convert, i32x16::ZERO))
@@ -11755,7 +12047,8 @@ pub fn _mm512_maskz_cvtepu8_epi32(k: __mmask16, a: __m128i) -> __m512i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovzxbd))]
-pub fn _mm256_mask_cvtepu8_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_cvtepu8_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
     unsafe {
         let convert = _mm256_cvtepu8_epi32(a).as_i32x8();
         transmute(simd_select_bitmask(k, convert, src.as_i32x8()))
@@ -11769,7 +12062,8 @@ pub fn _mm256_mask_cvtepu8_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m25
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovzxbd))]
-pub fn _mm256_maskz_cvtepu8_epi32(k: __mmask8, a: __m128i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_cvtepu8_epi32(k: __mmask8, a: __m128i) -> __m256i {
     unsafe {
         let convert = _mm256_cvtepu8_epi32(a).as_i32x8();
         transmute(simd_select_bitmask(k, convert, i32x8::ZERO))
@@ -11783,7 +12077,8 @@ pub fn _mm256_maskz_cvtepu8_epi32(k: __mmask8, a: __m128i) -> __m256i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovzxbd))]
-pub fn _mm_mask_cvtepu8_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_cvtepu8_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
     unsafe {
         let convert = _mm_cvtepu8_epi32(a).as_i32x4();
         transmute(simd_select_bitmask(k, convert, src.as_i32x4()))
@@ -11797,7 +12092,8 @@ pub fn _mm_mask_cvtepu8_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovzxbd))]
-pub fn _mm_maskz_cvtepu8_epi32(k: __mmask8, a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_cvtepu8_epi32(k: __mmask8, a: __m128i) -> __m128i {
     unsafe {
         let convert = _mm_cvtepu8_epi32(a).as_i32x4();
         transmute(simd_select_bitmask(k, convert, i32x4::ZERO))
@@ -11811,7 +12107,8 @@ pub fn _mm_maskz_cvtepu8_epi32(k: __mmask8, a: __m128i) -> __m128i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovzxbq))]
-pub fn _mm512_cvtepu8_epi64(a: __m128i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_cvtepu8_epi64(a: __m128i) -> __m512i {
     unsafe {
         let a = a.as_u8x16();
         let v64: u8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
@@ -11826,7 +12123,8 @@ pub fn _mm512_cvtepu8_epi64(a: __m128i) -> __m512i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovzxbq))]
-pub fn _mm512_mask_cvtepu8_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_cvtepu8_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
     unsafe {
         let convert = _mm512_cvtepu8_epi64(a).as_i64x8();
         transmute(simd_select_bitmask(k, convert, src.as_i64x8()))
@@ -11840,7 +12138,8 @@ pub fn _mm512_mask_cvtepu8_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m51
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovzxbq))]
-pub fn _mm512_maskz_cvtepu8_epi64(k: __mmask8, a: __m128i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_cvtepu8_epi64(k: __mmask8, a: __m128i) -> __m512i {
     unsafe {
         let convert = _mm512_cvtepu8_epi64(a).as_i64x8();
         transmute(simd_select_bitmask(k, convert, i64x8::ZERO))
@@ -11854,7 +12153,8 @@ pub fn _mm512_maskz_cvtepu8_epi64(k: __mmask8, a: __m128i) -> __m512i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovzxbq))]
-pub fn _mm256_mask_cvtepu8_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_cvtepu8_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
     unsafe {
         let convert = _mm256_cvtepu8_epi64(a).as_i64x4();
         transmute(simd_select_bitmask(k, convert, src.as_i64x4()))
@@ -11868,7 +12168,8 @@ pub fn _mm256_mask_cvtepu8_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m25
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovzxbq))]
-pub fn _mm256_maskz_cvtepu8_epi64(k: __mmask8, a: __m128i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_cvtepu8_epi64(k: __mmask8, a: __m128i) -> __m256i {
     unsafe {
         let convert = _mm256_cvtepu8_epi64(a).as_i64x4();
         transmute(simd_select_bitmask(k, convert, i64x4::ZERO))
@@ -11882,7 +12183,8 @@ pub fn _mm256_maskz_cvtepu8_epi64(k: __mmask8, a: __m128i) -> __m256i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovzxbq))]
-pub fn _mm_mask_cvtepu8_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_cvtepu8_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
     unsafe {
         let convert = _mm_cvtepu8_epi64(a).as_i64x2();
         transmute(simd_select_bitmask(k, convert, src.as_i64x2()))
@@ -11896,7 +12198,8 @@ pub fn _mm_mask_cvtepu8_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovzxbq))]
-pub fn _mm_maskz_cvtepu8_epi64(k: __mmask8, a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_cvtepu8_epi64(k: __mmask8, a: __m128i) -> __m128i {
     unsafe {
         let convert = _mm_cvtepu8_epi64(a).as_i64x2();
         transmute(simd_select_bitmask(k, convert, i64x2::ZERO))
@@ -11910,7 +12213,8 @@ pub fn _mm_maskz_cvtepu8_epi64(k: __mmask8, a: __m128i) -> __m128i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovsxwd))]
-pub fn _mm512_cvtepi16_epi32(a: __m256i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_cvtepi16_epi32(a: __m256i) -> __m512i {
     unsafe {
         let a = a.as_i16x16();
         transmute::<i32x16, _>(simd_cast(a))
@@ -11924,7 +12228,8 @@ pub fn _mm512_cvtepi16_epi32(a: __m256i) -> __m512i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovsxwd))]
-pub fn _mm512_mask_cvtepi16_epi32(src: __m512i, k: __mmask16, a: __m256i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_cvtepi16_epi32(src: __m512i, k: __mmask16, a: __m256i) -> __m512i {
     unsafe {
         let convert = _mm512_cvtepi16_epi32(a).as_i32x16();
         transmute(simd_select_bitmask(k, convert, src.as_i32x16()))
@@ -11938,7 +12243,8 @@ pub fn _mm512_mask_cvtepi16_epi32(src: __m512i, k: __mmask16, a: __m256i) -> __m
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovsxwd))]
-pub fn _mm512_maskz_cvtepi16_epi32(k: __mmask16, a: __m256i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_cvtepi16_epi32(k: __mmask16, a: __m256i) -> __m512i {
     unsafe {
         let convert = _mm512_cvtepi16_epi32(a).as_i32x16();
         transmute(simd_select_bitmask(k, convert, i32x16::ZERO))
@@ -11952,7 +12258,8 @@ pub fn _mm512_maskz_cvtepi16_epi32(k: __mmask16, a: __m256i) -> __m512i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovsxwd))]
-pub fn _mm256_mask_cvtepi16_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_cvtepi16_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
     unsafe {
         let convert = _mm256_cvtepi16_epi32(a).as_i32x8();
         transmute(simd_select_bitmask(k, convert, src.as_i32x8()))
@@ -11966,7 +12273,8 @@ pub fn _mm256_mask_cvtepi16_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m2
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovsxwd))]
-pub fn _mm256_maskz_cvtepi16_epi32(k: __mmask8, a: __m128i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_cvtepi16_epi32(k: __mmask8, a: __m128i) -> __m256i {
     unsafe {
         let convert = _mm256_cvtepi16_epi32(a).as_i32x8();
         transmute(simd_select_bitmask(k, convert, i32x8::ZERO))
@@ -11980,7 +12288,8 @@ pub fn _mm256_maskz_cvtepi16_epi32(k: __mmask8, a: __m128i) -> __m256i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovsxwd))]
-pub fn _mm_mask_cvtepi16_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_cvtepi16_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
     unsafe {
         let convert = _mm_cvtepi16_epi32(a).as_i32x4();
         transmute(simd_select_bitmask(k, convert, src.as_i32x4()))
@@ -11994,7 +12303,8 @@ pub fn _mm_mask_cvtepi16_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovsxwd))]
-pub fn _mm_maskz_cvtepi16_epi32(k: __mmask8, a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_cvtepi16_epi32(k: __mmask8, a: __m128i) -> __m128i {
     unsafe {
         let convert = _mm_cvtepi16_epi32(a).as_i32x4();
         transmute(simd_select_bitmask(k, convert, i32x4::ZERO))
@@ -12008,7 +12318,8 @@ pub fn _mm_maskz_cvtepi16_epi32(k: __mmask8, a: __m128i) -> __m128i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovsxwq))]
-pub fn _mm512_cvtepi16_epi64(a: __m128i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_cvtepi16_epi64(a: __m128i) -> __m512i {
     unsafe {
         let a = a.as_i16x8();
         transmute::<i64x8, _>(simd_cast(a))
@@ -12022,7 +12333,8 @@ pub fn _mm512_cvtepi16_epi64(a: __m128i) -> __m512i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovsxwq))]
-pub fn _mm512_mask_cvtepi16_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_cvtepi16_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
     unsafe {
         let convert = _mm512_cvtepi16_epi64(a).as_i64x8();
         transmute(simd_select_bitmask(k, convert, src.as_i64x8()))
@@ -12036,7 +12348,8 @@ pub fn _mm512_mask_cvtepi16_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m5
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovsxwq))]
-pub fn _mm512_maskz_cvtepi16_epi64(k: __mmask8, a: __m128i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_cvtepi16_epi64(k: __mmask8, a: __m128i) -> __m512i {
     unsafe {
         let convert = _mm512_cvtepi16_epi64(a).as_i64x8();
         transmute(simd_select_bitmask(k, convert, i64x8::ZERO))
@@ -12050,7 +12363,8 @@ pub fn _mm512_maskz_cvtepi16_epi64(k: __mmask8, a: __m128i) -> __m512i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovsxwq))]
-pub fn _mm256_mask_cvtepi16_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_cvtepi16_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
     unsafe {
         let convert = _mm256_cvtepi16_epi64(a).as_i64x4();
         transmute(simd_select_bitmask(k, convert, src.as_i64x4()))
@@ -12064,7 +12378,8 @@ pub fn _mm256_mask_cvtepi16_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m2
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovsxwq))]
-pub fn _mm256_maskz_cvtepi16_epi64(k: __mmask8, a: __m128i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_cvtepi16_epi64(k: __mmask8, a: __m128i) -> __m256i {
     unsafe {
         let convert = _mm256_cvtepi16_epi64(a).as_i64x4();
         transmute(simd_select_bitmask(k, convert, i64x4::ZERO))
@@ -12078,7 +12393,8 @@ pub fn _mm256_maskz_cvtepi16_epi64(k: __mmask8, a: __m128i) -> __m256i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovsxwq))]
-pub fn _mm_mask_cvtepi16_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_cvtepi16_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
     unsafe {
         let convert = _mm_cvtepi16_epi64(a).as_i64x2();
         transmute(simd_select_bitmask(k, convert, src.as_i64x2()))
@@ -12092,7 +12408,8 @@ pub fn _mm_mask_cvtepi16_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovsxwq))]
-pub fn _mm_maskz_cvtepi16_epi64(k: __mmask8, a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_cvtepi16_epi64(k: __mmask8, a: __m128i) -> __m128i {
     unsafe {
         let convert = _mm_cvtepi16_epi64(a).as_i64x2();
         transmute(simd_select_bitmask(k, convert, i64x2::ZERO))
@@ -12106,7 +12423,8 @@ pub fn _mm_maskz_cvtepi16_epi64(k: __mmask8, a: __m128i) -> __m128i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovzxwd))]
-pub fn _mm512_cvtepu16_epi32(a: __m256i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_cvtepu16_epi32(a: __m256i) -> __m512i {
     unsafe {
         let a = a.as_u16x16();
         transmute::<i32x16, _>(simd_cast(a))
@@ -12120,7 +12438,8 @@ pub fn _mm512_cvtepu16_epi32(a: __m256i) -> __m512i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovzxwd))]
-pub fn _mm512_mask_cvtepu16_epi32(src: __m512i, k: __mmask16, a: __m256i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_cvtepu16_epi32(src: __m512i, k: __mmask16, a: __m256i) -> __m512i {
     unsafe {
         let convert = _mm512_cvtepu16_epi32(a).as_i32x16();
         transmute(simd_select_bitmask(k, convert, src.as_i32x16()))
@@ -12134,7 +12453,8 @@ pub fn _mm512_mask_cvtepu16_epi32(src: __m512i, k: __mmask16, a: __m256i) -> __m
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovzxwd))]
-pub fn _mm512_maskz_cvtepu16_epi32(k: __mmask16, a: __m256i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_cvtepu16_epi32(k: __mmask16, a: __m256i) -> __m512i {
     unsafe {
         let convert = _mm512_cvtepu16_epi32(a).as_i32x16();
         transmute(simd_select_bitmask(k, convert, i32x16::ZERO))
@@ -12148,7 +12468,8 @@ pub fn _mm512_maskz_cvtepu16_epi32(k: __mmask16, a: __m256i) -> __m512i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovzxwd))]
-pub fn _mm256_mask_cvtepu16_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_cvtepu16_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
     unsafe {
         let convert = _mm256_cvtepu16_epi32(a).as_i32x8();
         transmute(simd_select_bitmask(k, convert, src.as_i32x8()))
@@ -12162,7 +12483,8 @@ pub fn _mm256_mask_cvtepu16_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m2
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovzxwd))]
-pub fn _mm256_maskz_cvtepu16_epi32(k: __mmask8, a: __m128i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_cvtepu16_epi32(k: __mmask8, a: __m128i) -> __m256i {
     unsafe {
         let convert = _mm256_cvtepu16_epi32(a).as_i32x8();
         transmute(simd_select_bitmask(k, convert, i32x8::ZERO))
@@ -12176,7 +12498,8 @@ pub fn _mm256_maskz_cvtepu16_epi32(k: __mmask8, a: __m128i) -> __m256i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovzxwd))]
-pub fn _mm_mask_cvtepu16_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_cvtepu16_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
     unsafe {
         let convert = _mm_cvtepu16_epi32(a).as_i32x4();
         transmute(simd_select_bitmask(k, convert, src.as_i32x4()))
@@ -12190,7 +12513,8 @@ pub fn _mm_mask_cvtepu16_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovzxwd))]
-pub fn _mm_maskz_cvtepu16_epi32(k: __mmask8, a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_cvtepu16_epi32(k: __mmask8, a: __m128i) -> __m128i {
     unsafe {
         let convert = _mm_cvtepu16_epi32(a).as_i32x4();
         transmute(simd_select_bitmask(k, convert, i32x4::ZERO))
@@ -12204,7 +12528,8 @@ pub fn _mm_maskz_cvtepu16_epi32(k: __mmask8, a: __m128i) -> __m128i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovzxwq))]
-pub fn _mm512_cvtepu16_epi64(a: __m128i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_cvtepu16_epi64(a: __m128i) -> __m512i {
     unsafe {
         let a = a.as_u16x8();
         transmute::<i64x8, _>(simd_cast(a))
@@ -12218,7 +12543,8 @@ pub fn _mm512_cvtepu16_epi64(a: __m128i) -> __m512i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovzxwq))]
-pub fn _mm512_mask_cvtepu16_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_cvtepu16_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
     unsafe {
         let convert = _mm512_cvtepu16_epi64(a).as_i64x8();
         transmute(simd_select_bitmask(k, convert, src.as_i64x8()))
@@ -12232,7 +12558,8 @@ pub fn _mm512_mask_cvtepu16_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m5
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovzxwq))]
-pub fn _mm512_maskz_cvtepu16_epi64(k: __mmask8, a: __m128i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_cvtepu16_epi64(k: __mmask8, a: __m128i) -> __m512i {
     unsafe {
         let convert = _mm512_cvtepu16_epi64(a).as_i64x8();
         transmute(simd_select_bitmask(k, convert, i64x8::ZERO))
@@ -12246,7 +12573,8 @@ pub fn _mm512_maskz_cvtepu16_epi64(k: __mmask8, a: __m128i) -> __m512i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovzxwq))]
-pub fn _mm256_mask_cvtepu16_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_cvtepu16_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
     unsafe {
         let convert = _mm256_cvtepu16_epi64(a).as_i64x4();
         transmute(simd_select_bitmask(k, convert, src.as_i64x4()))
@@ -12260,7 +12588,8 @@ pub fn _mm256_mask_cvtepu16_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m2
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovzxwq))]
-pub fn _mm256_maskz_cvtepu16_epi64(k: __mmask8, a: __m128i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_cvtepu16_epi64(k: __mmask8, a: __m128i) -> __m256i {
     unsafe {
         let convert = _mm256_cvtepu16_epi64(a).as_i64x4();
         transmute(simd_select_bitmask(k, convert, i64x4::ZERO))
@@ -12274,7 +12603,8 @@ pub fn _mm256_maskz_cvtepu16_epi64(k: __mmask8, a: __m128i) -> __m256i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovzxwq))]
-pub fn _mm_mask_cvtepu16_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_cvtepu16_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
     unsafe {
         let convert = _mm_cvtepu16_epi64(a).as_i64x2();
         transmute(simd_select_bitmask(k, convert, src.as_i64x2()))
@@ -12288,7 +12618,8 @@ pub fn _mm_mask_cvtepu16_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovzxwq))]
-pub fn _mm_maskz_cvtepu16_epi64(k: __mmask8, a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_cvtepu16_epi64(k: __mmask8, a: __m128i) -> __m128i {
     unsafe {
         let convert = _mm_cvtepu16_epi64(a).as_i64x2();
         transmute(simd_select_bitmask(k, convert, i64x2::ZERO))
@@ -12302,7 +12633,8 @@ pub fn _mm_maskz_cvtepu16_epi64(k: __mmask8, a: __m128i) -> __m128i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovsxdq))]
-pub fn _mm512_cvtepi32_epi64(a: __m256i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_cvtepi32_epi64(a: __m256i) -> __m512i {
     unsafe {
         let a = a.as_i32x8();
         transmute::<i64x8, _>(simd_cast(a))
@@ -12316,7 +12648,8 @@ pub fn _mm512_cvtepi32_epi64(a: __m256i) -> __m512i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovsxdq))]
-pub fn _mm512_mask_cvtepi32_epi64(src: __m512i, k: __mmask8, a: __m256i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_cvtepi32_epi64(src: __m512i, k: __mmask8, a: __m256i) -> __m512i {
     unsafe {
         let convert = _mm512_cvtepi32_epi64(a).as_i64x8();
         transmute(simd_select_bitmask(k, convert, src.as_i64x8()))
@@ -12330,7 +12663,8 @@ pub fn _mm512_mask_cvtepi32_epi64(src: __m512i, k: __mmask8, a: __m256i) -> __m5
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovsxdq))]
-pub fn _mm512_maskz_cvtepi32_epi64(k: __mmask8, a: __m256i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_cvtepi32_epi64(k: __mmask8, a: __m256i) -> __m512i {
     unsafe {
         let convert = _mm512_cvtepi32_epi64(a).as_i64x8();
         transmute(simd_select_bitmask(k, convert, i64x8::ZERO))
@@ -12344,7 +12678,8 @@ pub fn _mm512_maskz_cvtepi32_epi64(k: __mmask8, a: __m256i) -> __m512i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovsxdq))]
-pub fn _mm256_mask_cvtepi32_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_cvtepi32_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
     unsafe {
         let convert = _mm256_cvtepi32_epi64(a).as_i64x4();
         transmute(simd_select_bitmask(k, convert, src.as_i64x4()))
@@ -12358,7 +12693,8 @@ pub fn _mm256_mask_cvtepi32_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m2
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovsxdq))]
-pub fn _mm256_maskz_cvtepi32_epi64(k: __mmask8, a: __m128i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_cvtepi32_epi64(k: __mmask8, a: __m128i) -> __m256i {
     unsafe {
         let convert = _mm256_cvtepi32_epi64(a).as_i64x4();
         transmute(simd_select_bitmask(k, convert, i64x4::ZERO))
@@ -12372,7 +12708,8 @@ pub fn _mm256_maskz_cvtepi32_epi64(k: __mmask8, a: __m128i) -> __m256i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovsxdq))]
-pub fn _mm_mask_cvtepi32_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_cvtepi32_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
     unsafe {
         let convert = _mm_cvtepi32_epi64(a).as_i64x2();
         transmute(simd_select_bitmask(k, convert, src.as_i64x2()))
@@ -12386,7 +12723,8 @@ pub fn _mm_mask_cvtepi32_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovsxdq))]
-pub fn _mm_maskz_cvtepi32_epi64(k: __mmask8, a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_cvtepi32_epi64(k: __mmask8, a: __m128i) -> __m128i {
     unsafe {
         let convert = _mm_cvtepi32_epi64(a).as_i64x2();
         transmute(simd_select_bitmask(k, convert, i64x2::ZERO))
@@ -12400,7 +12738,8 @@ pub fn _mm_maskz_cvtepi32_epi64(k: __mmask8, a: __m128i) -> __m128i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovzxdq))]
-pub fn _mm512_cvtepu32_epi64(a: __m256i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_cvtepu32_epi64(a: __m256i) -> __m512i {
     unsafe {
         let a = a.as_u32x8();
         transmute::<i64x8, _>(simd_cast(a))
@@ -12414,7 +12753,8 @@ pub fn _mm512_cvtepu32_epi64(a: __m256i) -> __m512i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovzxdq))]
-pub fn _mm512_mask_cvtepu32_epi64(src: __m512i, k: __mmask8, a: __m256i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_cvtepu32_epi64(src: __m512i, k: __mmask8, a: __m256i) -> __m512i {
     unsafe {
         let convert = _mm512_cvtepu32_epi64(a).as_i64x8();
         transmute(simd_select_bitmask(k, convert, src.as_i64x8()))
@@ -12428,7 +12768,8 @@ pub fn _mm512_mask_cvtepu32_epi64(src: __m512i, k: __mmask8, a: __m256i) -> __m5
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovzxdq))]
-pub fn _mm512_maskz_cvtepu32_epi64(k: __mmask8, a: __m256i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_cvtepu32_epi64(k: __mmask8, a: __m256i) -> __m512i {
     unsafe {
         let convert = _mm512_cvtepu32_epi64(a).as_i64x8();
         transmute(simd_select_bitmask(k, convert, i64x8::ZERO))
@@ -12442,7 +12783,8 @@ pub fn _mm512_maskz_cvtepu32_epi64(k: __mmask8, a: __m256i) -> __m512i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovzxdq))]
-pub fn _mm256_mask_cvtepu32_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_cvtepu32_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
     unsafe {
         let convert = _mm256_cvtepu32_epi64(a).as_i64x4();
         transmute(simd_select_bitmask(k, convert, src.as_i64x4()))
@@ -12456,7 +12798,8 @@ pub fn _mm256_mask_cvtepu32_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m2
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovzxdq))]
-pub fn _mm256_maskz_cvtepu32_epi64(k: __mmask8, a: __m128i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_cvtepu32_epi64(k: __mmask8, a: __m128i) -> __m256i {
     unsafe {
         let convert = _mm256_cvtepu32_epi64(a).as_i64x4();
         transmute(simd_select_bitmask(k, convert, i64x4::ZERO))
@@ -12470,7 +12813,8 @@ pub fn _mm256_maskz_cvtepu32_epi64(k: __mmask8, a: __m128i) -> __m256i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovzxdq))]
-pub fn _mm_mask_cvtepu32_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_cvtepu32_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
     unsafe {
         let convert = _mm_cvtepu32_epi64(a).as_i64x2();
         transmute(simd_select_bitmask(k, convert, src.as_i64x2()))
@@ -12484,7 +12828,8 @@ pub fn _mm_mask_cvtepu32_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovzxdq))]
-pub fn _mm_maskz_cvtepu32_epi64(k: __mmask8, a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_cvtepu32_epi64(k: __mmask8, a: __m128i) -> __m128i {
     unsafe {
         let convert = _mm_cvtepu32_epi64(a).as_i64x2();
         transmute(simd_select_bitmask(k, convert, i64x2::ZERO))
@@ -12498,7 +12843,8 @@ pub fn _mm_maskz_cvtepu32_epi64(k: __mmask8, a: __m128i) -> __m128i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vcvtdq2ps))]
-pub fn _mm512_cvtepi32_ps(a: __m512i) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_cvtepi32_ps(a: __m512i) -> __m512 {
     unsafe {
         let a = a.as_i32x16();
         transmute::<f32x16, _>(simd_cast(a))
@@ -12512,7 +12858,8 @@ pub fn _mm512_cvtepi32_ps(a: __m512i) -> __m512 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vcvtdq2ps))]
-pub fn _mm512_mask_cvtepi32_ps(src: __m512, k: __mmask16, a: __m512i) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_cvtepi32_ps(src: __m512, k: __mmask16, a: __m512i) -> __m512 {
     unsafe {
         let convert = _mm512_cvtepi32_ps(a).as_f32x16();
         transmute(simd_select_bitmask(k, convert, src.as_f32x16()))
@@ -12526,7 +12873,8 @@ pub fn _mm512_mask_cvtepi32_ps(src: __m512, k: __mmask16, a: __m512i) -> __m512
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vcvtdq2ps))]
-pub fn _mm512_maskz_cvtepi32_ps(k: __mmask16, a: __m512i) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_cvtepi32_ps(k: __mmask16, a: __m512i) -> __m512 {
     unsafe {
         let convert = _mm512_cvtepi32_ps(a).as_f32x16();
         transmute(simd_select_bitmask(k, convert, f32x16::ZERO))
@@ -12540,7 +12888,8 @@ pub fn _mm512_maskz_cvtepi32_ps(k: __mmask16, a: __m512i) -> __m512 {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vcvtdq2ps))]
-pub fn _mm256_mask_cvtepi32_ps(src: __m256, k: __mmask8, a: __m256i) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_cvtepi32_ps(src: __m256, k: __mmask8, a: __m256i) -> __m256 {
     unsafe {
         let convert = _mm256_cvtepi32_ps(a).as_f32x8();
         transmute(simd_select_bitmask(k, convert, src.as_f32x8()))
@@ -12554,7 +12903,8 @@ pub fn _mm256_mask_cvtepi32_ps(src: __m256, k: __mmask8, a: __m256i) -> __m256 {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vcvtdq2ps))]
-pub fn _mm256_maskz_cvtepi32_ps(k: __mmask8, a: __m256i) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_cvtepi32_ps(k: __mmask8, a: __m256i) -> __m256 {
     unsafe {
         let convert = _mm256_cvtepi32_ps(a).as_f32x8();
         transmute(simd_select_bitmask(k, convert, f32x8::ZERO))
@@ -12568,7 +12918,8 @@ pub fn _mm256_maskz_cvtepi32_ps(k: __mmask8, a: __m256i) -> __m256 {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vcvtdq2ps))]
-pub fn _mm_mask_cvtepi32_ps(src: __m128, k: __mmask8, a: __m128i) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_cvtepi32_ps(src: __m128, k: __mmask8, a: __m128i) -> __m128 {
     unsafe {
         let convert = _mm_cvtepi32_ps(a).as_f32x4();
         transmute(simd_select_bitmask(k, convert, src.as_f32x4()))
@@ -12582,7 +12933,8 @@ pub fn _mm_mask_cvtepi32_ps(src: __m128, k: __mmask8, a: __m128i) -> __m128 {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vcvtdq2ps))]
-pub fn _mm_maskz_cvtepi32_ps(k: __mmask8, a: __m128i) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_cvtepi32_ps(k: __mmask8, a: __m128i) -> __m128 {
     unsafe {
         let convert = _mm_cvtepi32_ps(a).as_f32x4();
         transmute(simd_select_bitmask(k, convert, f32x4::ZERO))
@@ -12596,7 +12948,8 @@ pub fn _mm_maskz_cvtepi32_ps(k: __mmask8, a: __m128i) -> __m128 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vcvtdq2pd))]
-pub fn _mm512_cvtepi32_pd(a: __m256i) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_cvtepi32_pd(a: __m256i) -> __m512d {
     unsafe {
         let a = a.as_i32x8();
         transmute::<f64x8, _>(simd_cast(a))
@@ -12610,7 +12963,8 @@ pub fn _mm512_cvtepi32_pd(a: __m256i) -> __m512d {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vcvtdq2pd))]
-pub fn _mm512_mask_cvtepi32_pd(src: __m512d, k: __mmask8, a: __m256i) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_cvtepi32_pd(src: __m512d, k: __mmask8, a: __m256i) -> __m512d {
     unsafe {
         let convert = _mm512_cvtepi32_pd(a).as_f64x8();
         transmute(simd_select_bitmask(k, convert, src.as_f64x8()))
@@ -12624,7 +12978,8 @@ pub fn _mm512_mask_cvtepi32_pd(src: __m512d, k: __mmask8, a: __m256i) -> __m512d
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vcvtdq2pd))]
-pub fn _mm512_maskz_cvtepi32_pd(k: __mmask8, a: __m256i) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_cvtepi32_pd(k: __mmask8, a: __m256i) -> __m512d {
     unsafe {
         let convert = _mm512_cvtepi32_pd(a).as_f64x8();
         transmute(simd_select_bitmask(k, convert, f64x8::ZERO))
@@ -12638,7 +12993,8 @@ pub fn _mm512_maskz_cvtepi32_pd(k: __mmask8, a: __m256i) -> __m512d {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vcvtdq2pd))]
-pub fn _mm256_mask_cvtepi32_pd(src: __m256d, k: __mmask8, a: __m128i) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_cvtepi32_pd(src: __m256d, k: __mmask8, a: __m128i) -> __m256d {
     unsafe {
         let convert = _mm256_cvtepi32_pd(a).as_f64x4();
         transmute(simd_select_bitmask(k, convert, src.as_f64x4()))
@@ -12652,7 +13008,8 @@ pub fn _mm256_mask_cvtepi32_pd(src: __m256d, k: __mmask8, a: __m128i) -> __m256d
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vcvtdq2pd))]
-pub fn _mm256_maskz_cvtepi32_pd(k: __mmask8, a: __m128i) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_cvtepi32_pd(k: __mmask8, a: __m128i) -> __m256d {
     unsafe {
         let convert = _mm256_cvtepi32_pd(a).as_f64x4();
         transmute(simd_select_bitmask(k, convert, f64x4::ZERO))
@@ -12666,7 +13023,8 @@ pub fn _mm256_maskz_cvtepi32_pd(k: __mmask8, a: __m128i) -> __m256d {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vcvtdq2pd))]
-pub fn _mm_mask_cvtepi32_pd(src: __m128d, k: __mmask8, a: __m128i) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_cvtepi32_pd(src: __m128d, k: __mmask8, a: __m128i) -> __m128d {
     unsafe {
         let convert = _mm_cvtepi32_pd(a).as_f64x2();
         transmute(simd_select_bitmask(k, convert, src.as_f64x2()))
@@ -12680,7 +13038,8 @@ pub fn _mm_mask_cvtepi32_pd(src: __m128d, k: __mmask8, a: __m128i) -> __m128d {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vcvtdq2pd))]
-pub fn _mm_maskz_cvtepi32_pd(k: __mmask8, a: __m128i) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_cvtepi32_pd(k: __mmask8, a: __m128i) -> __m128d {
     unsafe {
         let convert = _mm_cvtepi32_pd(a).as_f64x2();
         transmute(simd_select_bitmask(k, convert, f64x2::ZERO))
@@ -12694,7 +13053,8 @@ pub fn _mm_maskz_cvtepi32_pd(k: __mmask8, a: __m128i) -> __m128d {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vcvtudq2ps))]
-pub fn _mm512_cvtepu32_ps(a: __m512i) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_cvtepu32_ps(a: __m512i) -> __m512 {
     unsafe {
         let a = a.as_u32x16();
         transmute::<f32x16, _>(simd_cast(a))
@@ -12708,7 +13068,8 @@ pub fn _mm512_cvtepu32_ps(a: __m512i) -> __m512 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vcvtudq2ps))]
-pub fn _mm512_mask_cvtepu32_ps(src: __m512, k: __mmask16, a: __m512i) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_cvtepu32_ps(src: __m512, k: __mmask16, a: __m512i) -> __m512 {
     unsafe {
         let convert = _mm512_cvtepu32_ps(a).as_f32x16();
         transmute(simd_select_bitmask(k, convert, src.as_f32x16()))
@@ -12722,7 +13083,8 @@ pub fn _mm512_mask_cvtepu32_ps(src: __m512, k: __mmask16, a: __m512i) -> __m512
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vcvtudq2ps))]
-pub fn _mm512_maskz_cvtepu32_ps(k: __mmask16, a: __m512i) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_cvtepu32_ps(k: __mmask16, a: __m512i) -> __m512 {
     unsafe {
         let convert = _mm512_cvtepu32_ps(a).as_f32x16();
         transmute(simd_select_bitmask(k, convert, f32x16::ZERO))
@@ -12736,7 +13098,8 @@ pub fn _mm512_maskz_cvtepu32_ps(k: __mmask16, a: __m512i) -> __m512 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vcvtudq2pd))]
-pub fn _mm512_cvtepu32_pd(a: __m256i) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_cvtepu32_pd(a: __m256i) -> __m512d {
     unsafe {
         let a = a.as_u32x8();
         transmute::<f64x8, _>(simd_cast(a))
@@ -12750,7 +13113,8 @@ pub fn _mm512_cvtepu32_pd(a: __m256i) -> __m512d {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vcvtudq2pd))]
-pub fn _mm512_mask_cvtepu32_pd(src: __m512d, k: __mmask8, a: __m256i) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_cvtepu32_pd(src: __m512d, k: __mmask8, a: __m256i) -> __m512d {
     unsafe {
         let convert = _mm512_cvtepu32_pd(a).as_f64x8();
         transmute(simd_select_bitmask(k, convert, src.as_f64x8()))
@@ -12764,7 +13128,8 @@ pub fn _mm512_mask_cvtepu32_pd(src: __m512d, k: __mmask8, a: __m256i) -> __m512d
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vcvtudq2pd))]
-pub fn _mm512_maskz_cvtepu32_pd(k: __mmask8, a: __m256i) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_cvtepu32_pd(k: __mmask8, a: __m256i) -> __m512d {
     unsafe {
         let convert = _mm512_cvtepu32_pd(a).as_f64x8();
         transmute(simd_select_bitmask(k, convert, f64x8::ZERO))
@@ -12778,7 +13143,8 @@ pub fn _mm512_maskz_cvtepu32_pd(k: __mmask8, a: __m256i) -> __m512d {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vcvtudq2pd))]
-pub fn _mm256_cvtepu32_pd(a: __m128i) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_cvtepu32_pd(a: __m128i) -> __m256d {
     unsafe {
         let a = a.as_u32x4();
         transmute::<f64x4, _>(simd_cast(a))
@@ -12792,7 +13158,8 @@ pub fn _mm256_cvtepu32_pd(a: __m128i) -> __m256d {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vcvtudq2pd))]
-pub fn _mm256_mask_cvtepu32_pd(src: __m256d, k: __mmask8, a: __m128i) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_cvtepu32_pd(src: __m256d, k: __mmask8, a: __m128i) -> __m256d {
     unsafe {
         let convert = _mm256_cvtepu32_pd(a).as_f64x4();
         transmute(simd_select_bitmask(k, convert, src.as_f64x4()))
@@ -12806,7 +13173,8 @@ pub fn _mm256_mask_cvtepu32_pd(src: __m256d, k: __mmask8, a: __m128i) -> __m256d
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vcvtudq2pd))]
-pub fn _mm256_maskz_cvtepu32_pd(k: __mmask8, a: __m128i) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_cvtepu32_pd(k: __mmask8, a: __m128i) -> __m256d {
     unsafe {
         let convert = _mm256_cvtepu32_pd(a).as_f64x4();
         transmute(simd_select_bitmask(k, convert, f64x4::ZERO))
@@ -12820,7 +13188,8 @@ pub fn _mm256_maskz_cvtepu32_pd(k: __mmask8, a: __m128i) -> __m256d {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vcvtudq2pd))]
-pub fn _mm_cvtepu32_pd(a: __m128i) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cvtepu32_pd(a: __m128i) -> __m128d {
     unsafe {
         let a = a.as_u32x4();
         let u64: u32x2 = simd_shuffle!(a, a, [0, 1]);
@@ -12835,7 +13204,8 @@ pub fn _mm_cvtepu32_pd(a: __m128i) -> __m128d {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vcvtudq2pd))]
-pub fn _mm_mask_cvtepu32_pd(src: __m128d, k: __mmask8, a: __m128i) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_cvtepu32_pd(src: __m128d, k: __mmask8, a: __m128i) -> __m128d {
     unsafe {
         let convert = _mm_cvtepu32_pd(a).as_f64x2();
         transmute(simd_select_bitmask(k, convert, src.as_f64x2()))
@@ -12849,7 +13219,8 @@ pub fn _mm_mask_cvtepu32_pd(src: __m128d, k: __mmask8, a: __m128i) -> __m128d {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vcvtudq2pd))]
-pub fn _mm_maskz_cvtepu32_pd(k: __mmask8, a: __m128i) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_cvtepu32_pd(k: __mmask8, a: __m128i) -> __m128d {
     unsafe {
         let convert = _mm_cvtepu32_pd(a).as_f64x2();
         transmute(simd_select_bitmask(k, convert, f64x2::ZERO))
@@ -12863,7 +13234,8 @@ pub fn _mm_maskz_cvtepu32_pd(k: __mmask8, a: __m128i) -> __m128d {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vcvtdq2pd))]
-pub fn _mm512_cvtepi32lo_pd(v2: __m512i) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_cvtepi32lo_pd(v2: __m512i) -> __m512d {
     unsafe {
         let v2 = v2.as_i32x16();
         let v256: i32x8 = simd_shuffle!(v2, v2, [0, 1, 2, 3, 4, 5, 6, 7]);
@@ -12878,7 +13250,8 @@ pub fn _mm512_cvtepi32lo_pd(v2: __m512i) -> __m512d {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vcvtdq2pd))]
-pub fn _mm512_mask_cvtepi32lo_pd(src: __m512d, k: __mmask8, v2: __m512i) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_cvtepi32lo_pd(src: __m512d, k: __mmask8, v2: __m512i) -> __m512d {
     unsafe {
         let convert = _mm512_cvtepi32lo_pd(v2).as_f64x8();
         transmute(simd_select_bitmask(k, convert, src.as_f64x8()))
@@ -12892,7 +13265,8 @@ pub fn _mm512_mask_cvtepi32lo_pd(src: __m512d, k: __mmask8, v2: __m512i) -> __m5
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vcvtudq2pd))]
-pub fn _mm512_cvtepu32lo_pd(v2: __m512i) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_cvtepu32lo_pd(v2: __m512i) -> __m512d {
     unsafe {
         let v2 = v2.as_u32x16();
         let v256: u32x8 = simd_shuffle!(v2, v2, [0, 1, 2, 3, 4, 5, 6, 7]);
@@ -12907,7 +13281,8 @@ pub fn _mm512_cvtepu32lo_pd(v2: __m512i) -> __m512d {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vcvtudq2pd))]
-pub fn _mm512_mask_cvtepu32lo_pd(src: __m512d, k: __mmask8, v2: __m512i) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_cvtepu32lo_pd(src: __m512d, k: __mmask8, v2: __m512i) -> __m512d {
     unsafe {
         let convert = _mm512_cvtepu32lo_pd(v2).as_f64x8();
         transmute(simd_select_bitmask(k, convert, src.as_f64x8()))
@@ -12921,7 +13296,8 @@ pub fn _mm512_mask_cvtepu32lo_pd(src: __m512d, k: __mmask8, v2: __m512i) -> __m5
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovdw))]
-pub fn _mm512_cvtepi32_epi16(a: __m512i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_cvtepi32_epi16(a: __m512i) -> __m256i {
     unsafe {
         let a = a.as_i32x16();
         transmute::<i16x16, _>(simd_cast(a))
@@ -12935,7 +13311,8 @@ pub fn _mm512_cvtepi32_epi16(a: __m512i) -> __m256i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovdw))]
-pub fn _mm512_mask_cvtepi32_epi16(src: __m256i, k: __mmask16, a: __m512i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_cvtepi32_epi16(src: __m256i, k: __mmask16, a: __m512i) -> __m256i {
     unsafe {
         let convert = _mm512_cvtepi32_epi16(a).as_i16x16();
         transmute(simd_select_bitmask(k, convert, src.as_i16x16()))
@@ -12949,7 +13326,8 @@ pub fn _mm512_mask_cvtepi32_epi16(src: __m256i, k: __mmask16, a: __m512i) -> __m
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovdw))]
-pub fn _mm512_maskz_cvtepi32_epi16(k: __mmask16, a: __m512i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_cvtepi32_epi16(k: __mmask16, a: __m512i) -> __m256i {
     unsafe {
         let convert = _mm512_cvtepi32_epi16(a).as_i16x16();
         transmute(simd_select_bitmask(k, convert, i16x16::ZERO))
@@ -12963,7 +13341,8 @@ pub fn _mm512_maskz_cvtepi32_epi16(k: __mmask16, a: __m512i) -> __m256i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovdw))]
-pub fn _mm256_cvtepi32_epi16(a: __m256i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_cvtepi32_epi16(a: __m256i) -> __m128i {
     unsafe {
         let a = a.as_i32x8();
         transmute::<i16x8, _>(simd_cast(a))
@@ -12977,7 +13356,8 @@ pub fn _mm256_cvtepi32_epi16(a: __m256i) -> __m128i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovdw))]
-pub fn _mm256_mask_cvtepi32_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_cvtepi32_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
     unsafe {
         let convert = _mm256_cvtepi32_epi16(a).as_i16x8();
         transmute(simd_select_bitmask(k, convert, src.as_i16x8()))
@@ -12991,7 +13371,8 @@ pub fn _mm256_mask_cvtepi32_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m1
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovdw))]
-pub fn _mm256_maskz_cvtepi32_epi16(k: __mmask8, a: __m256i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_cvtepi32_epi16(k: __mmask8, a: __m256i) -> __m128i {
     unsafe {
         let convert = _mm256_cvtepi32_epi16(a).as_i16x8();
         transmute(simd_select_bitmask(k, convert, i16x8::ZERO))
@@ -13038,7 +13419,8 @@ pub fn _mm_maskz_cvtepi32_epi16(k: __mmask8, a: __m128i) -> __m128i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovdb))]
-pub fn _mm512_cvtepi32_epi8(a: __m512i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_cvtepi32_epi8(a: __m512i) -> __m128i {
     unsafe {
         let a = a.as_i32x16();
         transmute::<i8x16, _>(simd_cast(a))
@@ -13052,7 +13434,8 @@ pub fn _mm512_cvtepi32_epi8(a: __m512i) -> __m128i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovdb))]
-pub fn _mm512_mask_cvtepi32_epi8(src: __m128i, k: __mmask16, a: __m512i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_cvtepi32_epi8(src: __m128i, k: __mmask16, a: __m512i) -> __m128i {
     unsafe {
         let convert = _mm512_cvtepi32_epi8(a).as_i8x16();
         transmute(simd_select_bitmask(k, convert, src.as_i8x16()))
@@ -13066,7 +13449,8 @@ pub fn _mm512_mask_cvtepi32_epi8(src: __m128i, k: __mmask16, a: __m512i) -> __m1
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovdb))]
-pub fn _mm512_maskz_cvtepi32_epi8(k: __mmask16, a: __m512i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_cvtepi32_epi8(k: __mmask16, a: __m512i) -> __m128i {
     unsafe {
         let convert = _mm512_cvtepi32_epi8(a).as_i8x16();
         transmute(simd_select_bitmask(k, convert, i8x16::ZERO))
@@ -13146,7 +13530,8 @@ pub fn _mm_maskz_cvtepi32_epi8(k: __mmask8, a: __m128i) -> __m128i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovqd))]
-pub fn _mm512_cvtepi64_epi32(a: __m512i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_cvtepi64_epi32(a: __m512i) -> __m256i {
     unsafe {
         let a = a.as_i64x8();
         transmute::<i32x8, _>(simd_cast(a))
@@ -13160,7 +13545,8 @@ pub fn _mm512_cvtepi64_epi32(a: __m512i) -> __m256i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovqd))]
-pub fn _mm512_mask_cvtepi64_epi32(src: __m256i, k: __mmask8, a: __m512i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_cvtepi64_epi32(src: __m256i, k: __mmask8, a: __m512i) -> __m256i {
     unsafe {
         let convert = _mm512_cvtepi64_epi32(a).as_i32x8();
         transmute(simd_select_bitmask(k, convert, src.as_i32x8()))
@@ -13174,7 +13560,8 @@ pub fn _mm512_mask_cvtepi64_epi32(src: __m256i, k: __mmask8, a: __m512i) -> __m2
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovqd))]
-pub fn _mm512_maskz_cvtepi64_epi32(k: __mmask8, a: __m512i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_cvtepi64_epi32(k: __mmask8, a: __m512i) -> __m256i {
     unsafe {
         let convert = _mm512_cvtepi64_epi32(a).as_i32x8();
         transmute(simd_select_bitmask(k, convert, i32x8::ZERO))
@@ -13188,7 +13575,8 @@ pub fn _mm512_maskz_cvtepi64_epi32(k: __mmask8, a: __m512i) -> __m256i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovqd))]
-pub fn _mm256_cvtepi64_epi32(a: __m256i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_cvtepi64_epi32(a: __m256i) -> __m128i {
     unsafe {
         let a = a.as_i64x4();
         transmute::<i32x4, _>(simd_cast(a))
@@ -13202,7 +13590,8 @@ pub fn _mm256_cvtepi64_epi32(a: __m256i) -> __m128i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovqd))]
-pub fn _mm256_mask_cvtepi64_epi32(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_cvtepi64_epi32(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
     unsafe {
         let convert = _mm256_cvtepi64_epi32(a).as_i32x4();
         transmute(simd_select_bitmask(k, convert, src.as_i32x4()))
@@ -13216,7 +13605,8 @@ pub fn _mm256_mask_cvtepi64_epi32(src: __m128i, k: __mmask8, a: __m256i) -> __m1
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovqd))]
-pub fn _mm256_maskz_cvtepi64_epi32(k: __mmask8, a: __m256i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_cvtepi64_epi32(k: __mmask8, a: __m256i) -> __m128i {
     unsafe {
         let convert = _mm256_cvtepi64_epi32(a).as_i32x4();
         transmute(simd_select_bitmask(k, convert, i32x4::ZERO))
@@ -13263,7 +13653,8 @@ pub fn _mm_maskz_cvtepi64_epi32(k: __mmask8, a: __m128i) -> __m128i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovqw))]
-pub fn _mm512_cvtepi64_epi16(a: __m512i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_cvtepi64_epi16(a: __m512i) -> __m128i {
     unsafe {
         let a = a.as_i64x8();
         transmute::<i16x8, _>(simd_cast(a))
@@ -13277,7 +13668,8 @@ pub fn _mm512_cvtepi64_epi16(a: __m512i) -> __m128i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovqw))]
-pub fn _mm512_mask_cvtepi64_epi16(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_cvtepi64_epi16(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
     unsafe {
         let convert = _mm512_cvtepi64_epi16(a).as_i16x8();
         transmute(simd_select_bitmask(k, convert, src.as_i16x8()))
@@ -13291,7 +13683,8 @@ pub fn _mm512_mask_cvtepi64_epi16(src: __m128i, k: __mmask8, a: __m512i) -> __m1
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpmovqw))]
-pub fn _mm512_maskz_cvtepi64_epi16(k: __mmask8, a: __m512i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_cvtepi64_epi16(k: __mmask8, a: __m512i) -> __m128i {
     unsafe {
         let convert = _mm512_cvtepi64_epi16(a).as_i16x8();
         transmute(simd_select_bitmask(k, convert, i16x8::ZERO))
@@ -15849,7 +16242,7 @@ pub fn _mm512_maskz_cvttps_epi32(k: __mmask16, a: __m512) -> __m512i {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vcvttps2dq))]
 pub fn _mm256_mask_cvttps_epi32(src: __m256i, k: __mmask8, a: __m256) -> __m256i {
-    unsafe { transmute(vcvttps2dq256(a.as_f32x8(), src.as_i32x8(), k)) }
+    unsafe { simd_select_bitmask(k, _mm256_cvttps_epi32(a).as_i32x8(), src.as_i32x8()).as_m256i() }
 }
 
 /// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -15860,7 +16253,7 @@ pub fn _mm256_mask_cvttps_epi32(src: __m256i, k: __mmask8, a: __m256) -> __m256i
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vcvttps2dq))]
 pub fn _mm256_maskz_cvttps_epi32(k: __mmask8, a: __m256) -> __m256i {
-    unsafe { transmute(vcvttps2dq256(a.as_f32x8(), i32x8::ZERO, k)) }
+    _mm256_mask_cvttps_epi32(_mm256_setzero_si256(), k, a)
 }
 
 /// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -15871,7 +16264,7 @@ pub fn _mm256_maskz_cvttps_epi32(k: __mmask8, a: __m256) -> __m256i {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vcvttps2dq))]
 pub fn _mm_mask_cvttps_epi32(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
-    unsafe { transmute(vcvttps2dq128(a.as_f32x4(), src.as_i32x4(), k)) }
+    unsafe { simd_select_bitmask(k, _mm_cvttps_epi32(a).as_i32x4(), src.as_i32x4()).as_m128i() }
 }
 
 /// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -15882,7 +16275,7 @@ pub fn _mm_mask_cvttps_epi32(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vcvttps2dq))]
 pub fn _mm_maskz_cvttps_epi32(k: __mmask8, a: __m128) -> __m128i {
-    unsafe { transmute(vcvttps2dq128(a.as_f32x4(), i32x4::ZERO, k)) }
+    _mm_mask_cvttps_epi32(_mm_setzero_si128(), k, a)
 }
 
 /// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
@@ -16085,7 +16478,7 @@ pub fn _mm512_maskz_cvttpd_epi32(k: __mmask8, a: __m512d) -> __m256i {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vcvttpd2dq))]
 pub fn _mm256_mask_cvttpd_epi32(src: __m128i, k: __mmask8, a: __m256d) -> __m128i {
-    unsafe { transmute(vcvttpd2dq256(a.as_f64x4(), src.as_i32x4(), k)) }
+    unsafe { simd_select_bitmask(k, _mm256_cvttpd_epi32(a).as_i32x4(), src.as_i32x4()).as_m128i() }
 }
 
 /// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
@@ -16096,7 +16489,7 @@ pub fn _mm256_mask_cvttpd_epi32(src: __m128i, k: __mmask8, a: __m256d) -> __m128
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vcvttpd2dq))]
 pub fn _mm256_maskz_cvttpd_epi32(k: __mmask8, a: __m256d) -> __m128i {
-    unsafe { transmute(vcvttpd2dq256(a.as_f64x4(), i32x4::ZERO, k)) }
+    _mm256_mask_cvttpd_epi32(_mm_setzero_si128(), k, a)
 }
 
 /// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -16248,7 +16641,8 @@ pub fn _mm_maskz_cvttpd_epu32(k: __mmask8, a: __m128d) -> __m128i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vxorps))]
-pub fn _mm512_setzero_pd() -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_setzero_pd() -> __m512d {
     // All-0 is a properly initialized __m512d
     unsafe { const { mem::zeroed() } }
 }
@@ -16260,7 +16654,8 @@ pub fn _mm512_setzero_pd() -> __m512d {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vxorps))]
-pub fn _mm512_setzero_ps() -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_setzero_ps() -> __m512 {
     // All-0 is a properly initialized __m512
     unsafe { const { mem::zeroed() } }
 }
@@ -16272,7 +16667,8 @@ pub fn _mm512_setzero_ps() -> __m512 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vxorps))]
-pub fn _mm512_setzero() -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_setzero() -> __m512 {
     // All-0 is a properly initialized __m512
     unsafe { const { mem::zeroed() } }
 }
@@ -16284,7 +16680,8 @@ pub fn _mm512_setzero() -> __m512 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vxorps))]
-pub fn _mm512_setzero_si512() -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_setzero_si512() -> __m512i {
     // All-0 is a properly initialized __m512i
     unsafe { const { mem::zeroed() } }
 }
@@ -16296,7 +16693,8 @@ pub fn _mm512_setzero_si512() -> __m512i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vxorps))]
-pub fn _mm512_setzero_epi32() -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_setzero_epi32() -> __m512i {
     // All-0 is a properly initialized __m512i
     unsafe { const { mem::zeroed() } }
 }
@@ -16308,7 +16706,8 @@ pub fn _mm512_setzero_epi32() -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_setr_epi32(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_setr_epi32(
     e15: i32,
     e14: i32,
     e13: i32,
@@ -16340,7 +16739,8 @@ pub fn _mm512_setr_epi32(
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_set_epi8(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_set_epi8(
     e63: i8,
     e62: i8,
     e61: i8,
@@ -16407,12 +16807,12 @@ pub fn _mm512_set_epi8(
     e0: i8,
 ) -> __m512i {
     unsafe {
-        let r = i8x64::new(
+        let r = i8x64::from_array([
             e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, e16, e17, e18,
             e19, e20, e21, e22, e23, e24, e25, e26, e27, e28, e29, e30, e31, e32, e33, e34, e35,
             e36, e37, e38, e39, e40, e41, e42, e43, e44, e45, e46, e47, e48, e49, e50, e51, e52,
             e53, e54, e55, e56, e57, e58, e59, e60, e61, e62, e63,
-        );
+        ]);
         transmute(r)
     }
 }
@@ -16423,7 +16823,8 @@ pub fn _mm512_set_epi8(
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_set_epi16(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_set_epi16(
     e31: i16,
     e30: i16,
     e29: i16,
@@ -16472,7 +16873,8 @@ pub fn _mm512_set_epi16(
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_set4_epi32(d: i32, c: i32, b: i32, a: i32) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_set4_epi32(d: i32, c: i32, b: i32, a: i32) -> __m512i {
     _mm512_set_epi32(d, c, b, a, d, c, b, a, d, c, b, a, d, c, b, a)
 }
 
@@ -16482,7 +16884,8 @@ pub fn _mm512_set4_epi32(d: i32, c: i32, b: i32, a: i32) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_set4_ps(d: f32, c: f32, b: f32, a: f32) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_set4_ps(d: f32, c: f32, b: f32, a: f32) -> __m512 {
     _mm512_set_ps(d, c, b, a, d, c, b, a, d, c, b, a, d, c, b, a)
 }
 
@@ -16492,7 +16895,8 @@ pub fn _mm512_set4_ps(d: f32, c: f32, b: f32, a: f32) -> __m512 {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_set4_pd(d: f64, c: f64, b: f64, a: f64) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_set4_pd(d: f64, c: f64, b: f64, a: f64) -> __m512d {
     _mm512_set_pd(d, c, b, a, d, c, b, a)
 }
 
@@ -16502,7 +16906,8 @@ pub fn _mm512_set4_pd(d: f64, c: f64, b: f64, a: f64) -> __m512d {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_setr4_epi32(d: i32, c: i32, b: i32, a: i32) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_setr4_epi32(d: i32, c: i32, b: i32, a: i32) -> __m512i {
     _mm512_set_epi32(a, b, c, d, a, b, c, d, a, b, c, d, a, b, c, d)
 }
 
@@ -16512,7 +16917,8 @@ pub fn _mm512_setr4_epi32(d: i32, c: i32, b: i32, a: i32) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_setr4_ps(d: f32, c: f32, b: f32, a: f32) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_setr4_ps(d: f32, c: f32, b: f32, a: f32) -> __m512 {
     _mm512_set_ps(a, b, c, d, a, b, c, d, a, b, c, d, a, b, c, d)
 }
 
@@ -16522,7 +16928,8 @@ pub fn _mm512_setr4_ps(d: f32, c: f32, b: f32, a: f32) -> __m512 {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_setr4_pd(d: f64, c: f64, b: f64, a: f64) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_setr4_pd(d: f64, c: f64, b: f64, a: f64) -> __m512d {
     _mm512_set_pd(a, b, c, d, a, b, c, d)
 }
 
@@ -16532,7 +16939,8 @@ pub fn _mm512_setr4_pd(d: f64, c: f64, b: f64, a: f64) -> __m512d {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_set_epi64(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_set_epi64(
     e0: i64,
     e1: i64,
     e2: i64,
@@ -16551,7 +16959,8 @@ pub fn _mm512_set_epi64(
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_setr_epi64(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_setr_epi64(
     e0: i64,
     e1: i64,
     e2: i64,
@@ -19076,7 +19485,8 @@ pub fn _mm_maskz_expand_pd(k: __mmask8, a: __m128d) -> __m128d {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
 #[rustc_legacy_const_generics(1)]
-pub fn _mm512_rol_epi32<const IMM8: i32>(a: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_rol_epi32<const IMM8: i32>(a: __m512i) -> __m512i {
     static_assert_uimm_bits!(IMM8, 8);
     _mm512_rolv_epi32(a, _mm512_set1_epi32(IMM8))
 }
@@ -19089,7 +19499,12 @@ pub fn _mm512_rol_epi32<const IMM8: i32>(a: __m512i) -> __m512i {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
 #[rustc_legacy_const_generics(3)]
-pub fn _mm512_mask_rol_epi32<const IMM8: i32>(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_rol_epi32<const IMM8: i32>(
+    src: __m512i,
+    k: __mmask16,
+    a: __m512i,
+) -> __m512i {
     static_assert_uimm_bits!(IMM8, 8);
     _mm512_mask_rolv_epi32(src, k, a, _mm512_set1_epi32(IMM8))
 }
@@ -19102,7 +19517,8 @@ pub fn _mm512_mask_rol_epi32<const IMM8: i32>(src: __m512i, k: __mmask16, a: __m
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
 #[rustc_legacy_const_generics(2)]
-pub fn _mm512_maskz_rol_epi32<const IMM8: i32>(k: __mmask16, a: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_rol_epi32<const IMM8: i32>(k: __mmask16, a: __m512i) -> __m512i {
     static_assert_uimm_bits!(IMM8, 8);
     _mm512_maskz_rolv_epi32(k, a, _mm512_set1_epi32(IMM8))
 }
@@ -19115,7 +19531,8 @@ pub fn _mm512_maskz_rol_epi32<const IMM8: i32>(k: __mmask16, a: __m512i) -> __m5
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
 #[rustc_legacy_const_generics(1)]
-pub fn _mm256_rol_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_rol_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
     static_assert_uimm_bits!(IMM8, 8);
     _mm256_rolv_epi32(a, _mm256_set1_epi32(IMM8))
 }
@@ -19128,7 +19545,12 @@ pub fn _mm256_rol_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
 #[rustc_legacy_const_generics(3)]
-pub fn _mm256_mask_rol_epi32<const IMM8: i32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_rol_epi32<const IMM8: i32>(
+    src: __m256i,
+    k: __mmask8,
+    a: __m256i,
+) -> __m256i {
     static_assert_uimm_bits!(IMM8, 8);
     _mm256_mask_rolv_epi32(src, k, a, _mm256_set1_epi32(IMM8))
 }
@@ -19141,7 +19563,8 @@ pub fn _mm256_mask_rol_epi32<const IMM8: i32>(src: __m256i, k: __mmask8, a: __m2
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
 #[rustc_legacy_const_generics(2)]
-pub fn _mm256_maskz_rol_epi32<const IMM8: i32>(k: __mmask8, a: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_rol_epi32<const IMM8: i32>(k: __mmask8, a: __m256i) -> __m256i {
     static_assert_uimm_bits!(IMM8, 8);
     _mm256_maskz_rolv_epi32(k, a, _mm256_set1_epi32(IMM8))
 }
@@ -19154,7 +19577,8 @@ pub fn _mm256_maskz_rol_epi32<const IMM8: i32>(k: __mmask8, a: __m256i) -> __m25
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
 #[rustc_legacy_const_generics(1)]
-pub fn _mm_rol_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_rol_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
     static_assert_uimm_bits!(IMM8, 8);
     _mm_rolv_epi32(a, _mm_set1_epi32(IMM8))
 }
@@ -19167,7 +19591,8 @@ pub fn _mm_rol_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
 #[rustc_legacy_const_generics(3)]
-pub fn _mm_mask_rol_epi32<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_rol_epi32<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
     static_assert_uimm_bits!(IMM8, 8);
     _mm_mask_rolv_epi32(src, k, a, _mm_set1_epi32(IMM8))
 }
@@ -19180,7 +19605,8 @@ pub fn _mm_mask_rol_epi32<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128i
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
 #[rustc_legacy_const_generics(2)]
-pub fn _mm_maskz_rol_epi32<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_rol_epi32<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
     static_assert_uimm_bits!(IMM8, 8);
     _mm_maskz_rolv_epi32(k, a, _mm_set1_epi32(IMM8))
 }
@@ -19193,7 +19619,8 @@ pub fn _mm_maskz_rol_epi32<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
 #[rustc_legacy_const_generics(1)]
-pub fn _mm512_ror_epi32<const IMM8: i32>(a: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_ror_epi32<const IMM8: i32>(a: __m512i) -> __m512i {
     static_assert_uimm_bits!(IMM8, 8);
     _mm512_rorv_epi32(a, _mm512_set1_epi32(IMM8))
 }
@@ -19206,7 +19633,12 @@ pub fn _mm512_ror_epi32<const IMM8: i32>(a: __m512i) -> __m512i {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vprold, IMM8 = 123))]
 #[rustc_legacy_const_generics(3)]
-pub fn _mm512_mask_ror_epi32<const IMM8: i32>(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_ror_epi32<const IMM8: i32>(
+    src: __m512i,
+    k: __mmask16,
+    a: __m512i,
+) -> __m512i {
     static_assert_uimm_bits!(IMM8, 8);
     _mm512_mask_rorv_epi32(src, k, a, _mm512_set1_epi32(IMM8))
 }
@@ -19219,7 +19651,8 @@ pub fn _mm512_mask_ror_epi32<const IMM8: i32>(src: __m512i, k: __mmask16, a: __m
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vprold, IMM8 = 123))]
 #[rustc_legacy_const_generics(2)]
-pub fn _mm512_maskz_ror_epi32<const IMM8: i32>(k: __mmask16, a: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_ror_epi32<const IMM8: i32>(k: __mmask16, a: __m512i) -> __m512i {
     static_assert_uimm_bits!(IMM8, 8);
     _mm512_maskz_rorv_epi32(k, a, _mm512_set1_epi32(IMM8))
 }
@@ -19232,7 +19665,8 @@ pub fn _mm512_maskz_ror_epi32<const IMM8: i32>(k: __mmask16, a: __m512i) -> __m5
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
 #[rustc_legacy_const_generics(1)]
-pub fn _mm256_ror_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_ror_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
     static_assert_uimm_bits!(IMM8, 8);
     _mm256_rorv_epi32(a, _mm256_set1_epi32(IMM8))
 }
@@ -19245,7 +19679,12 @@ pub fn _mm256_ror_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vprold, IMM8 = 123))]
 #[rustc_legacy_const_generics(3)]
-pub fn _mm256_mask_ror_epi32<const IMM8: i32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_ror_epi32<const IMM8: i32>(
+    src: __m256i,
+    k: __mmask8,
+    a: __m256i,
+) -> __m256i {
     static_assert_uimm_bits!(IMM8, 8);
     _mm256_mask_rorv_epi32(src, k, a, _mm256_set1_epi32(IMM8))
 }
@@ -19258,7 +19697,8 @@ pub fn _mm256_mask_ror_epi32<const IMM8: i32>(src: __m256i, k: __mmask8, a: __m2
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vprold, IMM8 = 123))]
 #[rustc_legacy_const_generics(2)]
-pub fn _mm256_maskz_ror_epi32<const IMM8: i32>(k: __mmask8, a: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_ror_epi32<const IMM8: i32>(k: __mmask8, a: __m256i) -> __m256i {
     static_assert_uimm_bits!(IMM8, 8);
     _mm256_maskz_rorv_epi32(k, a, _mm256_set1_epi32(IMM8))
 }
@@ -19271,7 +19711,8 @@ pub fn _mm256_maskz_ror_epi32<const IMM8: i32>(k: __mmask8, a: __m256i) -> __m25
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
 #[rustc_legacy_const_generics(1)]
-pub fn _mm_ror_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_ror_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
     static_assert_uimm_bits!(IMM8, 8);
     _mm_rorv_epi32(a, _mm_set1_epi32(IMM8))
 }
@@ -19284,7 +19725,8 @@ pub fn _mm_ror_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vprold, IMM8 = 123))]
 #[rustc_legacy_const_generics(3)]
-pub fn _mm_mask_ror_epi32<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_ror_epi32<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
     static_assert_uimm_bits!(IMM8, 8);
     _mm_mask_rorv_epi32(src, k, a, _mm_set1_epi32(IMM8))
 }
@@ -19297,7 +19739,8 @@ pub fn _mm_mask_ror_epi32<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128i
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vprold, IMM8 = 123))]
 #[rustc_legacy_const_generics(2)]
-pub fn _mm_maskz_ror_epi32<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_ror_epi32<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
     static_assert_uimm_bits!(IMM8, 8);
     _mm_maskz_rorv_epi32(k, a, _mm_set1_epi32(IMM8))
 }
@@ -19310,7 +19753,8 @@ pub fn _mm_maskz_ror_epi32<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
 #[rustc_legacy_const_generics(1)]
-pub fn _mm512_rol_epi64<const IMM8: i32>(a: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_rol_epi64<const IMM8: i32>(a: __m512i) -> __m512i {
     static_assert_uimm_bits!(IMM8, 8);
     _mm512_rolv_epi64(a, _mm512_set1_epi64(IMM8 as i64))
 }
@@ -19323,7 +19767,12 @@ pub fn _mm512_rol_epi64<const IMM8: i32>(a: __m512i) -> __m512i {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
 #[rustc_legacy_const_generics(3)]
-pub fn _mm512_mask_rol_epi64<const IMM8: i32>(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_rol_epi64<const IMM8: i32>(
+    src: __m512i,
+    k: __mmask8,
+    a: __m512i,
+) -> __m512i {
     static_assert_uimm_bits!(IMM8, 8);
     _mm512_mask_rolv_epi64(src, k, a, _mm512_set1_epi64(IMM8 as i64))
 }
@@ -19336,7 +19785,8 @@ pub fn _mm512_mask_rol_epi64<const IMM8: i32>(src: __m512i, k: __mmask8, a: __m5
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
 #[rustc_legacy_const_generics(2)]
-pub fn _mm512_maskz_rol_epi64<const IMM8: i32>(k: __mmask8, a: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_rol_epi64<const IMM8: i32>(k: __mmask8, a: __m512i) -> __m512i {
     static_assert_uimm_bits!(IMM8, 8);
     _mm512_maskz_rolv_epi64(k, a, _mm512_set1_epi64(IMM8 as i64))
 }
@@ -19349,7 +19799,8 @@ pub fn _mm512_maskz_rol_epi64<const IMM8: i32>(k: __mmask8, a: __m512i) -> __m51
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
 #[rustc_legacy_const_generics(1)]
-pub fn _mm256_rol_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_rol_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
     static_assert_uimm_bits!(IMM8, 8);
     _mm256_rolv_epi64(a, _mm256_set1_epi64x(IMM8 as i64))
 }
@@ -19362,7 +19813,12 @@ pub fn _mm256_rol_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
 #[rustc_legacy_const_generics(3)]
-pub fn _mm256_mask_rol_epi64<const IMM8: i32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_rol_epi64<const IMM8: i32>(
+    src: __m256i,
+    k: __mmask8,
+    a: __m256i,
+) -> __m256i {
     static_assert_uimm_bits!(IMM8, 8);
     _mm256_mask_rolv_epi64(src, k, a, _mm256_set1_epi64x(IMM8 as i64))
 }
@@ -19375,7 +19831,8 @@ pub fn _mm256_mask_rol_epi64<const IMM8: i32>(src: __m256i, k: __mmask8, a: __m2
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
 #[rustc_legacy_const_generics(2)]
-pub fn _mm256_maskz_rol_epi64<const IMM8: i32>(k: __mmask8, a: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_rol_epi64<const IMM8: i32>(k: __mmask8, a: __m256i) -> __m256i {
     static_assert_uimm_bits!(IMM8, 8);
     _mm256_maskz_rolv_epi64(k, a, _mm256_set1_epi64x(IMM8 as i64))
 }
@@ -19388,7 +19845,8 @@ pub fn _mm256_maskz_rol_epi64<const IMM8: i32>(k: __mmask8, a: __m256i) -> __m25
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
 #[rustc_legacy_const_generics(1)]
-pub fn _mm_rol_epi64<const IMM8: i32>(a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_rol_epi64<const IMM8: i32>(a: __m128i) -> __m128i {
     static_assert_uimm_bits!(IMM8, 8);
     _mm_rolv_epi64(a, _mm_set1_epi64x(IMM8 as i64))
 }
@@ -19401,7 +19859,8 @@ pub fn _mm_rol_epi64<const IMM8: i32>(a: __m128i) -> __m128i {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
 #[rustc_legacy_const_generics(3)]
-pub fn _mm_mask_rol_epi64<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_rol_epi64<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
     static_assert_uimm_bits!(IMM8, 8);
     _mm_mask_rolv_epi64(src, k, a, _mm_set1_epi64x(IMM8 as i64))
 }
@@ -19414,7 +19873,8 @@ pub fn _mm_mask_rol_epi64<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128i
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
 #[rustc_legacy_const_generics(2)]
-pub fn _mm_maskz_rol_epi64<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_rol_epi64<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
     static_assert_uimm_bits!(IMM8, 8);
     _mm_maskz_rolv_epi64(k, a, _mm_set1_epi64x(IMM8 as i64))
 }
@@ -19427,7 +19887,8 @@ pub fn _mm_maskz_rol_epi64<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
 #[rustc_legacy_const_generics(1)]
-pub fn _mm512_ror_epi64<const IMM8: i32>(a: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_ror_epi64<const IMM8: i32>(a: __m512i) -> __m512i {
     static_assert_uimm_bits!(IMM8, 8);
     _mm512_rorv_epi64(a, _mm512_set1_epi64(IMM8 as i64))
 }
@@ -19440,7 +19901,12 @@ pub fn _mm512_ror_epi64<const IMM8: i32>(a: __m512i) -> __m512i {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
 #[rustc_legacy_const_generics(3)]
-pub fn _mm512_mask_ror_epi64<const IMM8: i32>(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_ror_epi64<const IMM8: i32>(
+    src: __m512i,
+    k: __mmask8,
+    a: __m512i,
+) -> __m512i {
     static_assert_uimm_bits!(IMM8, 8);
     _mm512_mask_rorv_epi64(src, k, a, _mm512_set1_epi64(IMM8 as i64))
 }
@@ -19453,7 +19919,8 @@ pub fn _mm512_mask_ror_epi64<const IMM8: i32>(src: __m512i, k: __mmask8, a: __m5
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
 #[rustc_legacy_const_generics(2)]
-pub fn _mm512_maskz_ror_epi64<const IMM8: i32>(k: __mmask8, a: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_ror_epi64<const IMM8: i32>(k: __mmask8, a: __m512i) -> __m512i {
     static_assert_uimm_bits!(IMM8, 8);
     _mm512_maskz_rorv_epi64(k, a, _mm512_set1_epi64(IMM8 as i64))
 }
@@ -19466,7 +19933,8 @@ pub fn _mm512_maskz_ror_epi64<const IMM8: i32>(k: __mmask8, a: __m512i) -> __m51
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
 #[rustc_legacy_const_generics(1)]
-pub fn _mm256_ror_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_ror_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
     static_assert_uimm_bits!(IMM8, 8);
     _mm256_rorv_epi64(a, _mm256_set1_epi64x(IMM8 as i64))
 }
@@ -19479,7 +19947,12 @@ pub fn _mm256_ror_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
 #[rustc_legacy_const_generics(3)]
-pub fn _mm256_mask_ror_epi64<const IMM8: i32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_ror_epi64<const IMM8: i32>(
+    src: __m256i,
+    k: __mmask8,
+    a: __m256i,
+) -> __m256i {
     static_assert_uimm_bits!(IMM8, 8);
     _mm256_mask_rorv_epi64(src, k, a, _mm256_set1_epi64x(IMM8 as i64))
 }
@@ -19492,7 +19965,8 @@ pub fn _mm256_mask_ror_epi64<const IMM8: i32>(src: __m256i, k: __mmask8, a: __m2
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
 #[rustc_legacy_const_generics(2)]
-pub fn _mm256_maskz_ror_epi64<const IMM8: i32>(k: __mmask8, a: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_ror_epi64<const IMM8: i32>(k: __mmask8, a: __m256i) -> __m256i {
     static_assert_uimm_bits!(IMM8, 8);
     _mm256_maskz_rorv_epi64(k, a, _mm256_set1_epi64x(IMM8 as i64))
 }
@@ -19505,7 +19979,8 @@ pub fn _mm256_maskz_ror_epi64<const IMM8: i32>(k: __mmask8, a: __m256i) -> __m25
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
 #[rustc_legacy_const_generics(1)]
-pub fn _mm_ror_epi64<const IMM8: i32>(a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_ror_epi64<const IMM8: i32>(a: __m128i) -> __m128i {
     static_assert_uimm_bits!(IMM8, 8);
     _mm_rorv_epi64(a, _mm_set1_epi64x(IMM8 as i64))
 }
@@ -19518,7 +19993,8 @@ pub fn _mm_ror_epi64<const IMM8: i32>(a: __m128i) -> __m128i {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
 #[rustc_legacy_const_generics(3)]
-pub fn _mm_mask_ror_epi64<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_ror_epi64<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
     static_assert_uimm_bits!(IMM8, 8);
     _mm_mask_rorv_epi64(src, k, a, _mm_set1_epi64x(IMM8 as i64))
 }
@@ -19531,7 +20007,8 @@ pub fn _mm_mask_ror_epi64<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128i
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
 #[rustc_legacy_const_generics(2)]
-pub fn _mm_maskz_ror_epi64<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_ror_epi64<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
     static_assert_uimm_bits!(IMM8, 8);
     _mm_maskz_rorv_epi64(k, a, _mm_set1_epi64x(IMM8 as i64))
 }
@@ -19544,7 +20021,8 @@ pub fn _mm_maskz_ror_epi64<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
 #[rustc_legacy_const_generics(1)]
-pub fn _mm512_slli_epi32<const IMM8: u32>(a: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_slli_epi32<const IMM8: u32>(a: __m512i) -> __m512i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         if IMM8 >= 32 {
@@ -19563,7 +20041,12 @@ pub fn _mm512_slli_epi32<const IMM8: u32>(a: __m512i) -> __m512i {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
 #[rustc_legacy_const_generics(3)]
-pub fn _mm512_mask_slli_epi32<const IMM8: u32>(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_slli_epi32<const IMM8: u32>(
+    src: __m512i,
+    k: __mmask16,
+    a: __m512i,
+) -> __m512i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         let shf = if IMM8 >= 32 {
@@ -19583,7 +20066,8 @@ pub fn _mm512_mask_slli_epi32<const IMM8: u32>(src: __m512i, k: __mmask16, a: __
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
 #[rustc_legacy_const_generics(2)]
-pub fn _mm512_maskz_slli_epi32<const IMM8: u32>(k: __mmask16, a: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_slli_epi32<const IMM8: u32>(k: __mmask16, a: __m512i) -> __m512i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         if IMM8 >= 32 {
@@ -19603,7 +20087,12 @@ pub fn _mm512_maskz_slli_epi32<const IMM8: u32>(k: __mmask16, a: __m512i) -> __m
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
 #[rustc_legacy_const_generics(3)]
-pub fn _mm256_mask_slli_epi32<const IMM8: u32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_slli_epi32<const IMM8: u32>(
+    src: __m256i,
+    k: __mmask8,
+    a: __m256i,
+) -> __m256i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         let r = if IMM8 >= 32 {
@@ -19623,7 +20112,8 @@ pub fn _mm256_mask_slli_epi32<const IMM8: u32>(src: __m256i, k: __mmask8, a: __m
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
 #[rustc_legacy_const_generics(2)]
-pub fn _mm256_maskz_slli_epi32<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_slli_epi32<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         if IMM8 >= 32 {
@@ -19643,7 +20133,12 @@ pub fn _mm256_maskz_slli_epi32<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m2
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
 #[rustc_legacy_const_generics(3)]
-pub fn _mm_mask_slli_epi32<const IMM8: u32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_slli_epi32<const IMM8: u32>(
+    src: __m128i,
+    k: __mmask8,
+    a: __m128i,
+) -> __m128i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         let r = if IMM8 >= 32 {
@@ -19663,7 +20158,8 @@ pub fn _mm_mask_slli_epi32<const IMM8: u32>(src: __m128i, k: __mmask8, a: __m128
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
 #[rustc_legacy_const_generics(2)]
-pub fn _mm_maskz_slli_epi32<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_slli_epi32<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         if IMM8 >= 32 {
@@ -19683,7 +20179,8 @@ pub fn _mm_maskz_slli_epi32<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
 #[rustc_legacy_const_generics(1)]
-pub fn _mm512_srli_epi32<const IMM8: u32>(a: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_srli_epi32<const IMM8: u32>(a: __m512i) -> __m512i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         if IMM8 >= 32 {
@@ -19702,7 +20199,12 @@ pub fn _mm512_srli_epi32<const IMM8: u32>(a: __m512i) -> __m512i {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
 #[rustc_legacy_const_generics(3)]
-pub fn _mm512_mask_srli_epi32<const IMM8: u32>(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_srli_epi32<const IMM8: u32>(
+    src: __m512i,
+    k: __mmask16,
+    a: __m512i,
+) -> __m512i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         let shf = if IMM8 >= 32 {
@@ -19722,7 +20224,8 @@ pub fn _mm512_mask_srli_epi32<const IMM8: u32>(src: __m512i, k: __mmask16, a: __
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
 #[rustc_legacy_const_generics(2)]
-pub fn _mm512_maskz_srli_epi32<const IMM8: u32>(k: __mmask16, a: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_srli_epi32<const IMM8: u32>(k: __mmask16, a: __m512i) -> __m512i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         if IMM8 >= 32 {
@@ -19742,7 +20245,12 @@ pub fn _mm512_maskz_srli_epi32<const IMM8: u32>(k: __mmask16, a: __m512i) -> __m
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
 #[rustc_legacy_const_generics(3)]
-pub fn _mm256_mask_srli_epi32<const IMM8: u32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_srli_epi32<const IMM8: u32>(
+    src: __m256i,
+    k: __mmask8,
+    a: __m256i,
+) -> __m256i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         let r = if IMM8 >= 32 {
@@ -19762,7 +20270,8 @@ pub fn _mm256_mask_srli_epi32<const IMM8: u32>(src: __m256i, k: __mmask8, a: __m
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
 #[rustc_legacy_const_generics(2)]
-pub fn _mm256_maskz_srli_epi32<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_srli_epi32<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         if IMM8 >= 32 {
@@ -19782,7 +20291,12 @@ pub fn _mm256_maskz_srli_epi32<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m2
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
 #[rustc_legacy_const_generics(3)]
-pub fn _mm_mask_srli_epi32<const IMM8: u32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_srli_epi32<const IMM8: u32>(
+    src: __m128i,
+    k: __mmask8,
+    a: __m128i,
+) -> __m128i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         let r = if IMM8 >= 32 {
@@ -19802,7 +20316,8 @@ pub fn _mm_mask_srli_epi32<const IMM8: u32>(src: __m128i, k: __mmask8, a: __m128
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
 #[rustc_legacy_const_generics(2)]
-pub fn _mm_maskz_srli_epi32<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_srli_epi32<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         if IMM8 >= 32 {
@@ -19822,7 +20337,8 @@ pub fn _mm_maskz_srli_epi32<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
 #[rustc_legacy_const_generics(1)]
-pub fn _mm512_slli_epi64<const IMM8: u32>(a: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_slli_epi64<const IMM8: u32>(a: __m512i) -> __m512i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         if IMM8 >= 64 {
@@ -19841,7 +20357,12 @@ pub fn _mm512_slli_epi64<const IMM8: u32>(a: __m512i) -> __m512i {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
 #[rustc_legacy_const_generics(3)]
-pub fn _mm512_mask_slli_epi64<const IMM8: u32>(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_slli_epi64<const IMM8: u32>(
+    src: __m512i,
+    k: __mmask8,
+    a: __m512i,
+) -> __m512i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         let shf = if IMM8 >= 64 {
@@ -19861,7 +20382,8 @@ pub fn _mm512_mask_slli_epi64<const IMM8: u32>(src: __m512i, k: __mmask8, a: __m
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
 #[rustc_legacy_const_generics(2)]
-pub fn _mm512_maskz_slli_epi64<const IMM8: u32>(k: __mmask8, a: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_slli_epi64<const IMM8: u32>(k: __mmask8, a: __m512i) -> __m512i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         if IMM8 >= 64 {
@@ -19881,7 +20403,12 @@ pub fn _mm512_maskz_slli_epi64<const IMM8: u32>(k: __mmask8, a: __m512i) -> __m5
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
 #[rustc_legacy_const_generics(3)]
-pub fn _mm256_mask_slli_epi64<const IMM8: u32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_slli_epi64<const IMM8: u32>(
+    src: __m256i,
+    k: __mmask8,
+    a: __m256i,
+) -> __m256i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         let r = if IMM8 >= 64 {
@@ -19901,7 +20428,8 @@ pub fn _mm256_mask_slli_epi64<const IMM8: u32>(src: __m256i, k: __mmask8, a: __m
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
 #[rustc_legacy_const_generics(2)]
-pub fn _mm256_maskz_slli_epi64<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_slli_epi64<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         if IMM8 >= 64 {
@@ -19921,7 +20449,12 @@ pub fn _mm256_maskz_slli_epi64<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m2
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
 #[rustc_legacy_const_generics(3)]
-pub fn _mm_mask_slli_epi64<const IMM8: u32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_slli_epi64<const IMM8: u32>(
+    src: __m128i,
+    k: __mmask8,
+    a: __m128i,
+) -> __m128i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         let r = if IMM8 >= 64 {
@@ -19941,7 +20474,8 @@ pub fn _mm_mask_slli_epi64<const IMM8: u32>(src: __m128i, k: __mmask8, a: __m128
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
 #[rustc_legacy_const_generics(2)]
-pub fn _mm_maskz_slli_epi64<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_slli_epi64<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         if IMM8 >= 64 {
@@ -19961,7 +20495,8 @@ pub fn _mm_maskz_slli_epi64<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
 #[rustc_legacy_const_generics(1)]
-pub fn _mm512_srli_epi64<const IMM8: u32>(a: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_srli_epi64<const IMM8: u32>(a: __m512i) -> __m512i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         if IMM8 >= 64 {
@@ -19980,7 +20515,12 @@ pub fn _mm512_srli_epi64<const IMM8: u32>(a: __m512i) -> __m512i {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
 #[rustc_legacy_const_generics(3)]
-pub fn _mm512_mask_srli_epi64<const IMM8: u32>(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_srli_epi64<const IMM8: u32>(
+    src: __m512i,
+    k: __mmask8,
+    a: __m512i,
+) -> __m512i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         let shf = if IMM8 >= 64 {
@@ -20000,7 +20540,8 @@ pub fn _mm512_mask_srli_epi64<const IMM8: u32>(src: __m512i, k: __mmask8, a: __m
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
 #[rustc_legacy_const_generics(2)]
-pub fn _mm512_maskz_srli_epi64<const IMM8: u32>(k: __mmask8, a: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_srli_epi64<const IMM8: u32>(k: __mmask8, a: __m512i) -> __m512i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         if IMM8 >= 64 {
@@ -20020,7 +20561,12 @@ pub fn _mm512_maskz_srli_epi64<const IMM8: u32>(k: __mmask8, a: __m512i) -> __m5
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
 #[rustc_legacy_const_generics(3)]
-pub fn _mm256_mask_srli_epi64<const IMM8: u32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_srli_epi64<const IMM8: u32>(
+    src: __m256i,
+    k: __mmask8,
+    a: __m256i,
+) -> __m256i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         let r = if IMM8 >= 64 {
@@ -20040,7 +20586,8 @@ pub fn _mm256_mask_srli_epi64<const IMM8: u32>(src: __m256i, k: __mmask8, a: __m
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
 #[rustc_legacy_const_generics(2)]
-pub fn _mm256_maskz_srli_epi64<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_srli_epi64<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         if IMM8 >= 64 {
@@ -20060,7 +20607,12 @@ pub fn _mm256_maskz_srli_epi64<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m2
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
 #[rustc_legacy_const_generics(3)]
-pub fn _mm_mask_srli_epi64<const IMM8: u32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_srli_epi64<const IMM8: u32>(
+    src: __m128i,
+    k: __mmask8,
+    a: __m128i,
+) -> __m128i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         let r = if IMM8 >= 64 {
@@ -20080,7 +20632,8 @@ pub fn _mm_mask_srli_epi64<const IMM8: u32>(src: __m128i, k: __mmask8, a: __m128
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
 #[rustc_legacy_const_generics(2)]
-pub fn _mm_maskz_srli_epi64<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_srli_epi64<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         if IMM8 >= 64 {
@@ -20692,7 +21245,8 @@ pub fn _mm_maskz_sra_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
 #[rustc_legacy_const_generics(1)]
-pub fn _mm512_srai_epi32<const IMM8: u32>(a: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_srai_epi32<const IMM8: u32>(a: __m512i) -> __m512i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         transmute(simd_shr(a.as_i32x16(), i32x16::splat(IMM8.min(31) as i32)))
@@ -20707,7 +21261,12 @@ pub fn _mm512_srai_epi32<const IMM8: u32>(a: __m512i) -> __m512i {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
 #[rustc_legacy_const_generics(3)]
-pub fn _mm512_mask_srai_epi32<const IMM8: u32>(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_srai_epi32<const IMM8: u32>(
+    src: __m512i,
+    k: __mmask16,
+    a: __m512i,
+) -> __m512i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         let r = simd_shr(a.as_i32x16(), i32x16::splat(IMM8.min(31) as i32));
@@ -20723,7 +21282,8 @@ pub fn _mm512_mask_srai_epi32<const IMM8: u32>(src: __m512i, k: __mmask16, a: __
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
 #[rustc_legacy_const_generics(2)]
-pub fn _mm512_maskz_srai_epi32<const IMM8: u32>(k: __mmask16, a: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_srai_epi32<const IMM8: u32>(k: __mmask16, a: __m512i) -> __m512i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         let r = simd_shr(a.as_i32x16(), i32x16::splat(IMM8.min(31) as i32));
@@ -20739,7 +21299,12 @@ pub fn _mm512_maskz_srai_epi32<const IMM8: u32>(k: __mmask16, a: __m512i) -> __m
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
 #[rustc_legacy_const_generics(3)]
-pub fn _mm256_mask_srai_epi32<const IMM8: u32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_srai_epi32<const IMM8: u32>(
+    src: __m256i,
+    k: __mmask8,
+    a: __m256i,
+) -> __m256i {
     unsafe {
         let r = simd_shr(a.as_i32x8(), i32x8::splat(IMM8.min(31) as i32));
         transmute(simd_select_bitmask(k, r, src.as_i32x8()))
@@ -20754,7 +21319,8 @@ pub fn _mm256_mask_srai_epi32<const IMM8: u32>(src: __m256i, k: __mmask8, a: __m
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
 #[rustc_legacy_const_generics(2)]
-pub fn _mm256_maskz_srai_epi32<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_srai_epi32<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
     unsafe {
         let r = simd_shr(a.as_i32x8(), i32x8::splat(IMM8.min(31) as i32));
         transmute(simd_select_bitmask(k, r, i32x8::ZERO))
@@ -20769,7 +21335,12 @@ pub fn _mm256_maskz_srai_epi32<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m2
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
 #[rustc_legacy_const_generics(3)]
-pub fn _mm_mask_srai_epi32<const IMM8: u32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_srai_epi32<const IMM8: u32>(
+    src: __m128i,
+    k: __mmask8,
+    a: __m128i,
+) -> __m128i {
     unsafe {
         let r = simd_shr(a.as_i32x4(), i32x4::splat(IMM8.min(31) as i32));
         transmute(simd_select_bitmask(k, r, src.as_i32x4()))
@@ -20784,7 +21355,8 @@ pub fn _mm_mask_srai_epi32<const IMM8: u32>(src: __m128i, k: __mmask8, a: __m128
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
 #[rustc_legacy_const_generics(2)]
-pub fn _mm_maskz_srai_epi32<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_srai_epi32<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
     unsafe {
         let r = simd_shr(a.as_i32x4(), i32x4::splat(IMM8.min(31) as i32));
         transmute(simd_select_bitmask(k, r, i32x4::ZERO))
@@ -20799,7 +21371,8 @@ pub fn _mm_maskz_srai_epi32<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
 #[rustc_legacy_const_generics(1)]
-pub fn _mm512_srai_epi64<const IMM8: u32>(a: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_srai_epi64<const IMM8: u32>(a: __m512i) -> __m512i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         transmute(simd_shr(a.as_i64x8(), i64x8::splat(IMM8.min(63) as i64)))
@@ -20814,7 +21387,12 @@ pub fn _mm512_srai_epi64<const IMM8: u32>(a: __m512i) -> __m512i {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
 #[rustc_legacy_const_generics(3)]
-pub fn _mm512_mask_srai_epi64<const IMM8: u32>(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_srai_epi64<const IMM8: u32>(
+    src: __m512i,
+    k: __mmask8,
+    a: __m512i,
+) -> __m512i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         let shf = simd_shr(a.as_i64x8(), i64x8::splat(IMM8.min(63) as i64));
@@ -20830,7 +21408,8 @@ pub fn _mm512_mask_srai_epi64<const IMM8: u32>(src: __m512i, k: __mmask8, a: __m
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
 #[rustc_legacy_const_generics(2)]
-pub fn _mm512_maskz_srai_epi64<const IMM8: u32>(k: __mmask8, a: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_srai_epi64<const IMM8: u32>(k: __mmask8, a: __m512i) -> __m512i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         let shf = simd_shr(a.as_i64x8(), i64x8::splat(IMM8.min(63) as i64));
@@ -20846,7 +21425,8 @@ pub fn _mm512_maskz_srai_epi64<const IMM8: u32>(k: __mmask8, a: __m512i) -> __m5
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
 #[rustc_legacy_const_generics(1)]
-pub fn _mm256_srai_epi64<const IMM8: u32>(a: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_srai_epi64<const IMM8: u32>(a: __m256i) -> __m256i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         transmute(simd_shr(a.as_i64x4(), i64x4::splat(IMM8.min(63) as i64)))
@@ -20861,7 +21441,12 @@ pub fn _mm256_srai_epi64<const IMM8: u32>(a: __m256i) -> __m256i {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
 #[rustc_legacy_const_generics(3)]
-pub fn _mm256_mask_srai_epi64<const IMM8: u32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_srai_epi64<const IMM8: u32>(
+    src: __m256i,
+    k: __mmask8,
+    a: __m256i,
+) -> __m256i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         let shf = simd_shr(a.as_i64x4(), i64x4::splat(IMM8.min(63) as i64));
@@ -20877,7 +21462,8 @@ pub fn _mm256_mask_srai_epi64<const IMM8: u32>(src: __m256i, k: __mmask8, a: __m
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
 #[rustc_legacy_const_generics(2)]
-pub fn _mm256_maskz_srai_epi64<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_srai_epi64<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         let shf = simd_shr(a.as_i64x4(), i64x4::splat(IMM8.min(63) as i64));
@@ -20893,7 +21479,8 @@ pub fn _mm256_maskz_srai_epi64<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m2
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
 #[rustc_legacy_const_generics(1)]
-pub fn _mm_srai_epi64<const IMM8: u32>(a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_srai_epi64<const IMM8: u32>(a: __m128i) -> __m128i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         transmute(simd_shr(a.as_i64x2(), i64x2::splat(IMM8.min(63) as i64)))
@@ -20908,7 +21495,12 @@ pub fn _mm_srai_epi64<const IMM8: u32>(a: __m128i) -> __m128i {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
 #[rustc_legacy_const_generics(3)]
-pub fn _mm_mask_srai_epi64<const IMM8: u32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_srai_epi64<const IMM8: u32>(
+    src: __m128i,
+    k: __mmask8,
+    a: __m128i,
+) -> __m128i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         let shf = simd_shr(a.as_i64x2(), i64x2::splat(IMM8.min(63) as i64));
@@ -20924,7 +21516,8 @@ pub fn _mm_mask_srai_epi64<const IMM8: u32>(src: __m128i, k: __mmask8, a: __m128
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
 #[rustc_legacy_const_generics(2)]
-pub fn _mm_maskz_srai_epi64<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_srai_epi64<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         let shf = simd_shr(a.as_i64x2(), i64x2::splat(IMM8.min(63) as i64));
@@ -20939,8 +21532,14 @@ pub fn _mm_maskz_srai_epi64<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsravd))]
-pub fn _mm512_srav_epi32(a: __m512i, count: __m512i) -> __m512i {
-    unsafe { transmute(vpsravd(a.as_i32x16(), count.as_i32x16())) }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_srav_epi32(a: __m512i, count: __m512i) -> __m512i {
+    unsafe {
+        let count = count.as_u32x16();
+        let no_overflow: u32x16 = simd_lt(count, u32x16::splat(u32::BITS));
+        let count = simd_select(no_overflow, transmute(count), i32x16::splat(31));
+        simd_shr(a.as_i32x16(), count).as_m512i()
+    }
 }
 
 /// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -20950,7 +21549,13 @@ pub fn _mm512_srav_epi32(a: __m512i, count: __m512i) -> __m512i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsravd))]
-pub fn _mm512_mask_srav_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_srav_epi32(
+    src: __m512i,
+    k: __mmask16,
+    a: __m512i,
+    count: __m512i,
+) -> __m512i {
     unsafe {
         let shf = _mm512_srav_epi32(a, count).as_i32x16();
         transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
@@ -20964,7 +21569,8 @@ pub fn _mm512_mask_srav_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsravd))]
-pub fn _mm512_maskz_srav_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_srav_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
     unsafe {
         let shf = _mm512_srav_epi32(a, count).as_i32x16();
         transmute(simd_select_bitmask(k, shf, i32x16::ZERO))
@@ -20978,7 +21584,13 @@ pub fn _mm512_maskz_srav_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m5
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsravd))]
-pub fn _mm256_mask_srav_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_srav_epi32(
+    src: __m256i,
+    k: __mmask8,
+    a: __m256i,
+    count: __m256i,
+) -> __m256i {
     unsafe {
         let shf = _mm256_srav_epi32(a, count).as_i32x8();
         transmute(simd_select_bitmask(k, shf, src.as_i32x8()))
@@ -20992,7 +21604,8 @@ pub fn _mm256_mask_srav_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m2
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsravd))]
-pub fn _mm256_maskz_srav_epi32(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_srav_epi32(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
     unsafe {
         let shf = _mm256_srav_epi32(a, count).as_i32x8();
         transmute(simd_select_bitmask(k, shf, i32x8::ZERO))
@@ -21006,7 +21619,8 @@ pub fn _mm256_maskz_srav_epi32(k: __mmask8, a: __m256i, count: __m256i) -> __m25
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsravd))]
-pub fn _mm_mask_srav_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_srav_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
     unsafe {
         let shf = _mm_srav_epi32(a, count).as_i32x4();
         transmute(simd_select_bitmask(k, shf, src.as_i32x4()))
@@ -21020,7 +21634,8 @@ pub fn _mm_mask_srav_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsravd))]
-pub fn _mm_maskz_srav_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_srav_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
     unsafe {
         let shf = _mm_srav_epi32(a, count).as_i32x4();
         transmute(simd_select_bitmask(k, shf, i32x4::ZERO))
@@ -21034,8 +21649,14 @@ pub fn _mm_maskz_srav_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsravq))]
-pub fn _mm512_srav_epi64(a: __m512i, count: __m512i) -> __m512i {
-    unsafe { transmute(vpsravq(a.as_i64x8(), count.as_i64x8())) }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_srav_epi64(a: __m512i, count: __m512i) -> __m512i {
+    unsafe {
+        let count = count.as_u64x8();
+        let no_overflow: u64x8 = simd_lt(count, u64x8::splat(u64::BITS as u64));
+        let count = simd_select(no_overflow, transmute(count), i64x8::splat(63));
+        simd_shr(a.as_i64x8(), count).as_m512i()
+    }
 }
 
 /// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -21045,7 +21666,13 @@ pub fn _mm512_srav_epi64(a: __m512i, count: __m512i) -> __m512i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsravq))]
-pub fn _mm512_mask_srav_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_srav_epi64(
+    src: __m512i,
+    k: __mmask8,
+    a: __m512i,
+    count: __m512i,
+) -> __m512i {
     unsafe {
         let shf = _mm512_srav_epi64(a, count).as_i64x8();
         transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
@@ -21059,7 +21686,8 @@ pub fn _mm512_mask_srav_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m5
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsravq))]
-pub fn _mm512_maskz_srav_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_srav_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
     unsafe {
         let shf = _mm512_srav_epi64(a, count).as_i64x8();
         transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
@@ -21073,8 +21701,14 @@ pub fn _mm512_maskz_srav_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m51
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsravq))]
-pub fn _mm256_srav_epi64(a: __m256i, count: __m256i) -> __m256i {
-    unsafe { transmute(vpsravq256(a.as_i64x4(), count.as_i64x4())) }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_srav_epi64(a: __m256i, count: __m256i) -> __m256i {
+    unsafe {
+        let count = count.as_u64x4();
+        let no_overflow: u64x4 = simd_lt(count, u64x4::splat(u64::BITS as u64));
+        let count = simd_select(no_overflow, transmute(count), i64x4::splat(63));
+        simd_shr(a.as_i64x4(), count).as_m256i()
+    }
 }
 
 /// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -21084,7 +21718,13 @@ pub fn _mm256_srav_epi64(a: __m256i, count: __m256i) -> __m256i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsravq))]
-pub fn _mm256_mask_srav_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_srav_epi64(
+    src: __m256i,
+    k: __mmask8,
+    a: __m256i,
+    count: __m256i,
+) -> __m256i {
     unsafe {
         let shf = _mm256_srav_epi64(a, count).as_i64x4();
         transmute(simd_select_bitmask(k, shf, src.as_i64x4()))
@@ -21098,7 +21738,8 @@ pub fn _mm256_mask_srav_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m2
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsravq))]
-pub fn _mm256_maskz_srav_epi64(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_srav_epi64(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
     unsafe {
         let shf = _mm256_srav_epi64(a, count).as_i64x4();
         transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
@@ -21112,8 +21753,14 @@ pub fn _mm256_maskz_srav_epi64(k: __mmask8, a: __m256i, count: __m256i) -> __m25
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsravq))]
-pub fn _mm_srav_epi64(a: __m128i, count: __m128i) -> __m128i {
-    unsafe { transmute(vpsravq128(a.as_i64x2(), count.as_i64x2())) }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_srav_epi64(a: __m128i, count: __m128i) -> __m128i {
+    unsafe {
+        let count = count.as_u64x2();
+        let no_overflow: u64x2 = simd_lt(count, u64x2::splat(u64::BITS as u64));
+        let count = simd_select(no_overflow, transmute(count), i64x2::splat(63));
+        simd_shr(a.as_i64x2(), count).as_m128i()
+    }
 }
 
 /// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -21123,7 +21770,8 @@ pub fn _mm_srav_epi64(a: __m128i, count: __m128i) -> __m128i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsravq))]
-pub fn _mm_mask_srav_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_srav_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
     unsafe {
         let shf = _mm_srav_epi64(a, count).as_i64x2();
         transmute(simd_select_bitmask(k, shf, src.as_i64x2()))
@@ -21137,7 +21785,8 @@ pub fn _mm_mask_srav_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsravq))]
-pub fn _mm_maskz_srav_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_srav_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
     unsafe {
         let shf = _mm_srav_epi64(a, count).as_i64x2();
         transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
@@ -21151,7 +21800,8 @@ pub fn _mm_maskz_srav_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vprolvd))]
-pub fn _mm512_rolv_epi32(a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_rolv_epi32(a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         transmute(simd_funnel_shl(
             a.as_u32x16(),
@@ -21168,7 +21818,8 @@ pub fn _mm512_rolv_epi32(a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vprolvd))]
-pub fn _mm512_mask_rolv_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_rolv_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let rol = _mm512_rolv_epi32(a, b).as_i32x16();
         transmute(simd_select_bitmask(k, rol, src.as_i32x16()))
@@ -21182,7 +21833,8 @@ pub fn _mm512_mask_rolv_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vprolvd))]
-pub fn _mm512_maskz_rolv_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_rolv_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let rol = _mm512_rolv_epi32(a, b).as_i32x16();
         transmute(simd_select_bitmask(k, rol, i32x16::ZERO))
@@ -21196,7 +21848,8 @@ pub fn _mm512_maskz_rolv_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vprolvd))]
-pub fn _mm256_rolv_epi32(a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_rolv_epi32(a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         transmute(simd_funnel_shl(
             a.as_u32x8(),
@@ -21213,7 +21866,8 @@ pub fn _mm256_rolv_epi32(a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vprolvd))]
-pub fn _mm256_mask_rolv_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_rolv_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let rol = _mm256_rolv_epi32(a, b).as_i32x8();
         transmute(simd_select_bitmask(k, rol, src.as_i32x8()))
@@ -21227,7 +21881,8 @@ pub fn _mm256_mask_rolv_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i)
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vprolvd))]
-pub fn _mm256_maskz_rolv_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_rolv_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let rol = _mm256_rolv_epi32(a, b).as_i32x8();
         transmute(simd_select_bitmask(k, rol, i32x8::ZERO))
@@ -21241,7 +21896,8 @@ pub fn _mm256_maskz_rolv_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vprolvd))]
-pub fn _mm_rolv_epi32(a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_rolv_epi32(a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         transmute(simd_funnel_shl(
             a.as_u32x4(),
@@ -21258,7 +21914,8 @@ pub fn _mm_rolv_epi32(a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vprolvd))]
-pub fn _mm_mask_rolv_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_rolv_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let rol = _mm_rolv_epi32(a, b).as_i32x4();
         transmute(simd_select_bitmask(k, rol, src.as_i32x4()))
@@ -21272,7 +21929,8 @@ pub fn _mm_mask_rolv_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) ->
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vprolvd))]
-pub fn _mm_maskz_rolv_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_rolv_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let rol = _mm_rolv_epi32(a, b).as_i32x4();
         transmute(simd_select_bitmask(k, rol, i32x4::ZERO))
@@ -21286,7 +21944,8 @@ pub fn _mm_maskz_rolv_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vprorvd))]
-pub fn _mm512_rorv_epi32(a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_rorv_epi32(a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         transmute(simd_funnel_shr(
             a.as_u32x16(),
@@ -21303,7 +21962,8 @@ pub fn _mm512_rorv_epi32(a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vprorvd))]
-pub fn _mm512_mask_rorv_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_rorv_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let ror = _mm512_rorv_epi32(a, b).as_i32x16();
         transmute(simd_select_bitmask(k, ror, src.as_i32x16()))
@@ -21317,7 +21977,8 @@ pub fn _mm512_mask_rorv_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vprorvd))]
-pub fn _mm512_maskz_rorv_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_rorv_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let ror = _mm512_rorv_epi32(a, b).as_i32x16();
         transmute(simd_select_bitmask(k, ror, i32x16::ZERO))
@@ -21331,7 +21992,8 @@ pub fn _mm512_maskz_rorv_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vprorvd))]
-pub fn _mm256_rorv_epi32(a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_rorv_epi32(a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         transmute(simd_funnel_shr(
             a.as_u32x8(),
@@ -21348,7 +22010,8 @@ pub fn _mm256_rorv_epi32(a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vprorvd))]
-pub fn _mm256_mask_rorv_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_rorv_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let ror = _mm256_rorv_epi32(a, b).as_i32x8();
         transmute(simd_select_bitmask(k, ror, src.as_i32x8()))
@@ -21362,7 +22025,8 @@ pub fn _mm256_mask_rorv_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i)
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vprorvd))]
-pub fn _mm256_maskz_rorv_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_rorv_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let ror = _mm256_rorv_epi32(a, b).as_i32x8();
         transmute(simd_select_bitmask(k, ror, i32x8::ZERO))
@@ -21376,7 +22040,8 @@ pub fn _mm256_maskz_rorv_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vprorvd))]
-pub fn _mm_rorv_epi32(a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_rorv_epi32(a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         transmute(simd_funnel_shr(
             a.as_u32x4(),
@@ -21393,7 +22058,8 @@ pub fn _mm_rorv_epi32(a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vprorvd))]
-pub fn _mm_mask_rorv_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_rorv_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let ror = _mm_rorv_epi32(a, b).as_i32x4();
         transmute(simd_select_bitmask(k, ror, src.as_i32x4()))
@@ -21407,7 +22073,8 @@ pub fn _mm_mask_rorv_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) ->
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vprorvd))]
-pub fn _mm_maskz_rorv_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_rorv_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let ror = _mm_rorv_epi32(a, b).as_i32x4();
         transmute(simd_select_bitmask(k, ror, i32x4::ZERO))
@@ -21421,7 +22088,8 @@ pub fn _mm_maskz_rorv_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vprolvq))]
-pub fn _mm512_rolv_epi64(a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_rolv_epi64(a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         transmute(simd_funnel_shl(
             a.as_u64x8(),
@@ -21438,7 +22106,8 @@ pub fn _mm512_rolv_epi64(a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vprolvq))]
-pub fn _mm512_mask_rolv_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_rolv_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let rol = _mm512_rolv_epi64(a, b).as_i64x8();
         transmute(simd_select_bitmask(k, rol, src.as_i64x8()))
@@ -21452,7 +22121,8 @@ pub fn _mm512_mask_rolv_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i)
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vprolvq))]
-pub fn _mm512_maskz_rolv_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_rolv_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let rol = _mm512_rolv_epi64(a, b).as_i64x8();
         transmute(simd_select_bitmask(k, rol, i64x8::ZERO))
@@ -21466,7 +22136,8 @@ pub fn _mm512_maskz_rolv_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vprolvq))]
-pub fn _mm256_rolv_epi64(a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_rolv_epi64(a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         transmute(simd_funnel_shl(
             a.as_u64x4(),
@@ -21483,7 +22154,8 @@ pub fn _mm256_rolv_epi64(a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vprolvq))]
-pub fn _mm256_mask_rolv_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_rolv_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let rol = _mm256_rolv_epi64(a, b).as_i64x4();
         transmute(simd_select_bitmask(k, rol, src.as_i64x4()))
@@ -21497,7 +22169,8 @@ pub fn _mm256_mask_rolv_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i)
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vprolvq))]
-pub fn _mm256_maskz_rolv_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_rolv_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let rol = _mm256_rolv_epi64(a, b).as_i64x4();
         transmute(simd_select_bitmask(k, rol, i64x4::ZERO))
@@ -21511,7 +22184,8 @@ pub fn _mm256_maskz_rolv_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vprolvq))]
-pub fn _mm_rolv_epi64(a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_rolv_epi64(a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         transmute(simd_funnel_shl(
             a.as_u64x2(),
@@ -21528,7 +22202,8 @@ pub fn _mm_rolv_epi64(a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vprolvq))]
-pub fn _mm_mask_rolv_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_rolv_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let rol = _mm_rolv_epi64(a, b).as_i64x2();
         transmute(simd_select_bitmask(k, rol, src.as_i64x2()))
@@ -21542,7 +22217,8 @@ pub fn _mm_mask_rolv_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) ->
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vprolvq))]
-pub fn _mm_maskz_rolv_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_rolv_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let rol = _mm_rolv_epi64(a, b).as_i64x2();
         transmute(simd_select_bitmask(k, rol, i64x2::ZERO))
@@ -21556,7 +22232,8 @@ pub fn _mm_maskz_rolv_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vprorvq))]
-pub fn _mm512_rorv_epi64(a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_rorv_epi64(a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         transmute(simd_funnel_shr(
             a.as_u64x8(),
@@ -21573,7 +22250,8 @@ pub fn _mm512_rorv_epi64(a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vprorvq))]
-pub fn _mm512_mask_rorv_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_rorv_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let ror = _mm512_rorv_epi64(a, b).as_i64x8();
         transmute(simd_select_bitmask(k, ror, src.as_i64x8()))
@@ -21587,7 +22265,8 @@ pub fn _mm512_mask_rorv_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i)
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vprorvq))]
-pub fn _mm512_maskz_rorv_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_rorv_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let ror = _mm512_rorv_epi64(a, b).as_i64x8();
         transmute(simd_select_bitmask(k, ror, i64x8::ZERO))
@@ -21601,7 +22280,8 @@ pub fn _mm512_maskz_rorv_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vprorvq))]
-pub fn _mm256_rorv_epi64(a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_rorv_epi64(a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         transmute(simd_funnel_shr(
             a.as_u64x4(),
@@ -21618,7 +22298,8 @@ pub fn _mm256_rorv_epi64(a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vprorvq))]
-pub fn _mm256_mask_rorv_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_rorv_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let ror = _mm256_rorv_epi64(a, b).as_i64x4();
         transmute(simd_select_bitmask(k, ror, src.as_i64x4()))
@@ -21632,7 +22313,8 @@ pub fn _mm256_mask_rorv_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i)
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vprorvq))]
-pub fn _mm256_maskz_rorv_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_rorv_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let ror = _mm256_rorv_epi64(a, b).as_i64x4();
         transmute(simd_select_bitmask(k, ror, i64x4::ZERO))
@@ -21646,7 +22328,8 @@ pub fn _mm256_maskz_rorv_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vprorvq))]
-pub fn _mm_rorv_epi64(a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_rorv_epi64(a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         transmute(simd_funnel_shr(
             a.as_u64x2(),
@@ -21663,7 +22346,8 @@ pub fn _mm_rorv_epi64(a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vprorvq))]
-pub fn _mm_mask_rorv_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_rorv_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let ror = _mm_rorv_epi64(a, b).as_i64x2();
         transmute(simd_select_bitmask(k, ror, src.as_i64x2()))
@@ -21677,7 +22361,8 @@ pub fn _mm_mask_rorv_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) ->
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vprorvq))]
-pub fn _mm_maskz_rorv_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_rorv_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let ror = _mm_rorv_epi64(a, b).as_i64x2();
         transmute(simd_select_bitmask(k, ror, i64x2::ZERO))
@@ -21691,8 +22376,14 @@ pub fn _mm_maskz_rorv_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsllvd))]
-pub fn _mm512_sllv_epi32(a: __m512i, count: __m512i) -> __m512i {
-    unsafe { transmute(vpsllvd(a.as_i32x16(), count.as_i32x16())) }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_sllv_epi32(a: __m512i, count: __m512i) -> __m512i {
+    unsafe {
+        let count = count.as_u32x16();
+        let no_overflow: u32x16 = simd_lt(count, u32x16::splat(u32::BITS));
+        let count = simd_select(no_overflow, count, u32x16::ZERO);
+        simd_select(no_overflow, simd_shl(a.as_u32x16(), count), u32x16::ZERO).as_m512i()
+    }
 }
 
 /// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -21702,7 +22393,13 @@ pub fn _mm512_sllv_epi32(a: __m512i, count: __m512i) -> __m512i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsllvd))]
-pub fn _mm512_mask_sllv_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_sllv_epi32(
+    src: __m512i,
+    k: __mmask16,
+    a: __m512i,
+    count: __m512i,
+) -> __m512i {
     unsafe {
         let shf = _mm512_sllv_epi32(a, count).as_i32x16();
         transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
@@ -21716,7 +22413,8 @@ pub fn _mm512_mask_sllv_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsllvd))]
-pub fn _mm512_maskz_sllv_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_sllv_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
     unsafe {
         let shf = _mm512_sllv_epi32(a, count).as_i32x16();
         transmute(simd_select_bitmask(k, shf, i32x16::ZERO))
@@ -21730,7 +22428,13 @@ pub fn _mm512_maskz_sllv_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m5
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsllvd))]
-pub fn _mm256_mask_sllv_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_sllv_epi32(
+    src: __m256i,
+    k: __mmask8,
+    a: __m256i,
+    count: __m256i,
+) -> __m256i {
     unsafe {
         let shf = _mm256_sllv_epi32(a, count).as_i32x8();
         transmute(simd_select_bitmask(k, shf, src.as_i32x8()))
@@ -21744,7 +22448,8 @@ pub fn _mm256_mask_sllv_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m2
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsllvd))]
-pub fn _mm256_maskz_sllv_epi32(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_sllv_epi32(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
     unsafe {
         let shf = _mm256_sllv_epi32(a, count).as_i32x8();
         transmute(simd_select_bitmask(k, shf, i32x8::ZERO))
@@ -21758,7 +22463,8 @@ pub fn _mm256_maskz_sllv_epi32(k: __mmask8, a: __m256i, count: __m256i) -> __m25
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsllvd))]
-pub fn _mm_mask_sllv_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_sllv_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
     unsafe {
         let shf = _mm_sllv_epi32(a, count).as_i32x4();
         transmute(simd_select_bitmask(k, shf, src.as_i32x4()))
@@ -21772,7 +22478,8 @@ pub fn _mm_mask_sllv_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsllvd))]
-pub fn _mm_maskz_sllv_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_sllv_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
     unsafe {
         let shf = _mm_sllv_epi32(a, count).as_i32x4();
         transmute(simd_select_bitmask(k, shf, i32x4::ZERO))
@@ -21786,8 +22493,14 @@ pub fn _mm_maskz_sllv_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsrlvd))]
-pub fn _mm512_srlv_epi32(a: __m512i, count: __m512i) -> __m512i {
-    unsafe { transmute(vpsrlvd(a.as_i32x16(), count.as_i32x16())) }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_srlv_epi32(a: __m512i, count: __m512i) -> __m512i {
+    unsafe {
+        let count = count.as_u32x16();
+        let no_overflow: u32x16 = simd_lt(count, u32x16::splat(u32::BITS));
+        let count = simd_select(no_overflow, count, u32x16::ZERO);
+        simd_select(no_overflow, simd_shr(a.as_u32x16(), count), u32x16::ZERO).as_m512i()
+    }
 }
 
 /// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -21797,7 +22510,13 @@ pub fn _mm512_srlv_epi32(a: __m512i, count: __m512i) -> __m512i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsrlvd))]
-pub fn _mm512_mask_srlv_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_srlv_epi32(
+    src: __m512i,
+    k: __mmask16,
+    a: __m512i,
+    count: __m512i,
+) -> __m512i {
     unsafe {
         let shf = _mm512_srlv_epi32(a, count).as_i32x16();
         transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
@@ -21811,7 +22530,8 @@ pub fn _mm512_mask_srlv_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsrlvd))]
-pub fn _mm512_maskz_srlv_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_srlv_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
     unsafe {
         let shf = _mm512_srlv_epi32(a, count).as_i32x16();
         transmute(simd_select_bitmask(k, shf, i32x16::ZERO))
@@ -21825,7 +22545,13 @@ pub fn _mm512_maskz_srlv_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m5
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsrlvd))]
-pub fn _mm256_mask_srlv_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_srlv_epi32(
+    src: __m256i,
+    k: __mmask8,
+    a: __m256i,
+    count: __m256i,
+) -> __m256i {
     unsafe {
         let shf = _mm256_srlv_epi32(a, count).as_i32x8();
         transmute(simd_select_bitmask(k, shf, src.as_i32x8()))
@@ -21839,7 +22565,8 @@ pub fn _mm256_mask_srlv_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m2
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsrlvd))]
-pub fn _mm256_maskz_srlv_epi32(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_srlv_epi32(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
     unsafe {
         let shf = _mm256_srlv_epi32(a, count).as_i32x8();
         transmute(simd_select_bitmask(k, shf, i32x8::ZERO))
@@ -21853,7 +22580,8 @@ pub fn _mm256_maskz_srlv_epi32(k: __mmask8, a: __m256i, count: __m256i) -> __m25
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsrlvd))]
-pub fn _mm_mask_srlv_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_srlv_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
     unsafe {
         let shf = _mm_srlv_epi32(a, count).as_i32x4();
         transmute(simd_select_bitmask(k, shf, src.as_i32x4()))
@@ -21867,7 +22595,8 @@ pub fn _mm_mask_srlv_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsrlvd))]
-pub fn _mm_maskz_srlv_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_srlv_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
     unsafe {
         let shf = _mm_srlv_epi32(a, count).as_i32x4();
         transmute(simd_select_bitmask(k, shf, i32x4::ZERO))
@@ -21881,8 +22610,14 @@ pub fn _mm_maskz_srlv_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsllvq))]
-pub fn _mm512_sllv_epi64(a: __m512i, count: __m512i) -> __m512i {
-    unsafe { transmute(vpsllvq(a.as_i64x8(), count.as_i64x8())) }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_sllv_epi64(a: __m512i, count: __m512i) -> __m512i {
+    unsafe {
+        let count = count.as_u64x8();
+        let no_overflow: u64x8 = simd_lt(count, u64x8::splat(u64::BITS as u64));
+        let count = simd_select(no_overflow, count, u64x8::ZERO);
+        simd_select(no_overflow, simd_shl(a.as_u64x8(), count), u64x8::ZERO).as_m512i()
+    }
 }
 
 /// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -21892,7 +22627,13 @@ pub fn _mm512_sllv_epi64(a: __m512i, count: __m512i) -> __m512i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsllvq))]
-pub fn _mm512_mask_sllv_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_sllv_epi64(
+    src: __m512i,
+    k: __mmask8,
+    a: __m512i,
+    count: __m512i,
+) -> __m512i {
     unsafe {
         let shf = _mm512_sllv_epi64(a, count).as_i64x8();
         transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
@@ -21906,7 +22647,8 @@ pub fn _mm512_mask_sllv_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m5
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsllvq))]
-pub fn _mm512_maskz_sllv_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_sllv_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
     unsafe {
         let shf = _mm512_sllv_epi64(a, count).as_i64x8();
         transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
@@ -21920,7 +22662,13 @@ pub fn _mm512_maskz_sllv_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m51
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsllvq))]
-pub fn _mm256_mask_sllv_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_sllv_epi64(
+    src: __m256i,
+    k: __mmask8,
+    a: __m256i,
+    count: __m256i,
+) -> __m256i {
     unsafe {
         let shf = _mm256_sllv_epi64(a, count).as_i64x4();
         transmute(simd_select_bitmask(k, shf, src.as_i64x4()))
@@ -21934,7 +22682,8 @@ pub fn _mm256_mask_sllv_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m2
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsllvq))]
-pub fn _mm256_maskz_sllv_epi64(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_sllv_epi64(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
     unsafe {
         let shf = _mm256_sllv_epi64(a, count).as_i64x4();
         transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
@@ -21948,7 +22697,8 @@ pub fn _mm256_maskz_sllv_epi64(k: __mmask8, a: __m256i, count: __m256i) -> __m25
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsllvq))]
-pub fn _mm_mask_sllv_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_sllv_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
     unsafe {
         let shf = _mm_sllv_epi64(a, count).as_i64x2();
         transmute(simd_select_bitmask(k, shf, src.as_i64x2()))
@@ -21962,7 +22712,8 @@ pub fn _mm_mask_sllv_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsllvq))]
-pub fn _mm_maskz_sllv_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_sllv_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
     unsafe {
         let shf = _mm_sllv_epi64(a, count).as_i64x2();
         transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
@@ -21976,8 +22727,14 @@ pub fn _mm_maskz_sllv_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsrlvq))]
-pub fn _mm512_srlv_epi64(a: __m512i, count: __m512i) -> __m512i {
-    unsafe { transmute(vpsrlvq(a.as_i64x8(), count.as_i64x8())) }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_srlv_epi64(a: __m512i, count: __m512i) -> __m512i {
+    unsafe {
+        let count = count.as_u64x8();
+        let no_overflow: u64x8 = simd_lt(count, u64x8::splat(u64::BITS as u64));
+        let count = simd_select(no_overflow, count, u64x8::ZERO);
+        simd_select(no_overflow, simd_shr(a.as_u64x8(), count), u64x8::ZERO).as_m512i()
+    }
 }
 
 /// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -21987,7 +22744,13 @@ pub fn _mm512_srlv_epi64(a: __m512i, count: __m512i) -> __m512i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsrlvq))]
-pub fn _mm512_mask_srlv_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_srlv_epi64(
+    src: __m512i,
+    k: __mmask8,
+    a: __m512i,
+    count: __m512i,
+) -> __m512i {
     unsafe {
         let shf = _mm512_srlv_epi64(a, count).as_i64x8();
         transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
@@ -22001,7 +22764,8 @@ pub fn _mm512_mask_srlv_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m5
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsrlvq))]
-pub fn _mm512_maskz_srlv_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_srlv_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
     unsafe {
         let shf = _mm512_srlv_epi64(a, count).as_i64x8();
         transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
@@ -22015,7 +22779,13 @@ pub fn _mm512_maskz_srlv_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m51
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsrlvq))]
-pub fn _mm256_mask_srlv_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_srlv_epi64(
+    src: __m256i,
+    k: __mmask8,
+    a: __m256i,
+    count: __m256i,
+) -> __m256i {
     unsafe {
         let shf = _mm256_srlv_epi64(a, count).as_i64x4();
         transmute(simd_select_bitmask(k, shf, src.as_i64x4()))
@@ -22029,7 +22799,8 @@ pub fn _mm256_mask_srlv_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m2
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsrlvq))]
-pub fn _mm256_maskz_srlv_epi64(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_srlv_epi64(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
     unsafe {
         let shf = _mm256_srlv_epi64(a, count).as_i64x4();
         transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
@@ -22043,7 +22814,8 @@ pub fn _mm256_maskz_srlv_epi64(k: __mmask8, a: __m256i, count: __m256i) -> __m25
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsrlvq))]
-pub fn _mm_mask_srlv_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_srlv_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
     unsafe {
         let shf = _mm_srlv_epi64(a, count).as_i64x2();
         transmute(simd_select_bitmask(k, shf, src.as_i64x2()))
@@ -22057,7 +22829,8 @@ pub fn _mm_mask_srlv_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpsrlvq))]
-pub fn _mm_maskz_srlv_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_srlv_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
     unsafe {
         let shf = _mm_srlv_epi64(a, count).as_i64x2();
         transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
@@ -22072,7 +22845,8 @@ pub fn _mm_maskz_srlv_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
 #[rustc_legacy_const_generics(1)]
-pub fn _mm512_permute_ps<const MASK: i32>(a: __m512) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_permute_ps<const MASK: i32>(a: __m512) -> __m512 {
     unsafe {
         static_assert_uimm_bits!(MASK, 8);
         simd_shuffle!(
@@ -22108,7 +22882,12 @@ pub fn _mm512_permute_ps<const MASK: i32>(a: __m512) -> __m512 {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
 #[rustc_legacy_const_generics(3)]
-pub fn _mm512_mask_permute_ps<const MASK: i32>(src: __m512, k: __mmask16, a: __m512) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_permute_ps<const MASK: i32>(
+    src: __m512,
+    k: __mmask16,
+    a: __m512,
+) -> __m512 {
     unsafe {
         static_assert_uimm_bits!(MASK, 8);
         let r = _mm512_permute_ps::<MASK>(a);
@@ -22124,7 +22903,8 @@ pub fn _mm512_mask_permute_ps<const MASK: i32>(src: __m512, k: __mmask16, a: __m
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
 #[rustc_legacy_const_generics(2)]
-pub fn _mm512_maskz_permute_ps<const MASK: i32>(k: __mmask16, a: __m512) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_permute_ps<const MASK: i32>(k: __mmask16, a: __m512) -> __m512 {
     unsafe {
         static_assert_uimm_bits!(MASK, 8);
         let r = _mm512_permute_ps::<MASK>(a);
@@ -22140,7 +22920,12 @@ pub fn _mm512_maskz_permute_ps<const MASK: i32>(k: __mmask16, a: __m512) -> __m5
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
 #[rustc_legacy_const_generics(3)]
-pub fn _mm256_mask_permute_ps<const MASK: i32>(src: __m256, k: __mmask8, a: __m256) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_permute_ps<const MASK: i32>(
+    src: __m256,
+    k: __mmask8,
+    a: __m256,
+) -> __m256 {
     unsafe {
         let r = _mm256_permute_ps::<MASK>(a);
         transmute(simd_select_bitmask(k, r.as_f32x8(), src.as_f32x8()))
@@ -22155,7 +22940,8 @@ pub fn _mm256_mask_permute_ps<const MASK: i32>(src: __m256, k: __mmask8, a: __m2
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
 #[rustc_legacy_const_generics(2)]
-pub fn _mm256_maskz_permute_ps<const MASK: i32>(k: __mmask8, a: __m256) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_permute_ps<const MASK: i32>(k: __mmask8, a: __m256) -> __m256 {
     unsafe {
         let r = _mm256_permute_ps::<MASK>(a);
         transmute(simd_select_bitmask(k, r.as_f32x8(), f32x8::ZERO))
@@ -22170,7 +22956,8 @@ pub fn _mm256_maskz_permute_ps<const MASK: i32>(k: __mmask8, a: __m256) -> __m25
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
 #[rustc_legacy_const_generics(3)]
-pub fn _mm_mask_permute_ps<const MASK: i32>(src: __m128, k: __mmask8, a: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_permute_ps<const MASK: i32>(src: __m128, k: __mmask8, a: __m128) -> __m128 {
     unsafe {
         let r = _mm_permute_ps::<MASK>(a);
         transmute(simd_select_bitmask(k, r.as_f32x4(), src.as_f32x4()))
@@ -22185,7 +22972,8 @@ pub fn _mm_mask_permute_ps<const MASK: i32>(src: __m128, k: __mmask8, a: __m128)
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
 #[rustc_legacy_const_generics(2)]
-pub fn _mm_maskz_permute_ps<const MASK: i32>(k: __mmask8, a: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_permute_ps<const MASK: i32>(k: __mmask8, a: __m128) -> __m128 {
     unsafe {
         let r = _mm_permute_ps::<MASK>(a);
         transmute(simd_select_bitmask(k, r.as_f32x4(), f32x4::ZERO))
@@ -22200,7 +22988,8 @@ pub fn _mm_maskz_permute_ps<const MASK: i32>(k: __mmask8, a: __m128) -> __m128 {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vshufpd, MASK = 0b11_01_10_01))]
 #[rustc_legacy_const_generics(1)]
-pub fn _mm512_permute_pd<const MASK: i32>(a: __m512d) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_permute_pd<const MASK: i32>(a: __m512d) -> __m512d {
     unsafe {
         static_assert_uimm_bits!(MASK, 8);
         simd_shuffle!(
@@ -22228,7 +23017,12 @@ pub fn _mm512_permute_pd<const MASK: i32>(a: __m512d) -> __m512d {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vshufpd, MASK = 0b11_01_10_01))]
 #[rustc_legacy_const_generics(3)]
-pub fn _mm512_mask_permute_pd<const MASK: i32>(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_permute_pd<const MASK: i32>(
+    src: __m512d,
+    k: __mmask8,
+    a: __m512d,
+) -> __m512d {
     unsafe {
         static_assert_uimm_bits!(MASK, 8);
         let r = _mm512_permute_pd::<MASK>(a);
@@ -22244,7 +23038,8 @@ pub fn _mm512_mask_permute_pd<const MASK: i32>(src: __m512d, k: __mmask8, a: __m
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vshufpd, MASK = 0b11_01_10_01))]
 #[rustc_legacy_const_generics(2)]
-pub fn _mm512_maskz_permute_pd<const MASK: i32>(k: __mmask8, a: __m512d) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_permute_pd<const MASK: i32>(k: __mmask8, a: __m512d) -> __m512d {
     unsafe {
         static_assert_uimm_bits!(MASK, 8);
         let r = _mm512_permute_pd::<MASK>(a);
@@ -22260,7 +23055,12 @@ pub fn _mm512_maskz_permute_pd<const MASK: i32>(k: __mmask8, a: __m512d) -> __m5
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vshufpd, MASK = 0b11_01))]
 #[rustc_legacy_const_generics(3)]
-pub fn _mm256_mask_permute_pd<const MASK: i32>(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_permute_pd<const MASK: i32>(
+    src: __m256d,
+    k: __mmask8,
+    a: __m256d,
+) -> __m256d {
     unsafe {
         static_assert_uimm_bits!(MASK, 4);
         let r = _mm256_permute_pd::<MASK>(a);
@@ -22276,7 +23076,8 @@ pub fn _mm256_mask_permute_pd<const MASK: i32>(src: __m256d, k: __mmask8, a: __m
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vshufpd, MASK = 0b11_01))]
 #[rustc_legacy_const_generics(2)]
-pub fn _mm256_maskz_permute_pd<const MASK: i32>(k: __mmask8, a: __m256d) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_permute_pd<const MASK: i32>(k: __mmask8, a: __m256d) -> __m256d {
     unsafe {
         static_assert_uimm_bits!(MASK, 4);
         let r = _mm256_permute_pd::<MASK>(a);
@@ -22292,7 +23093,12 @@ pub fn _mm256_maskz_permute_pd<const MASK: i32>(k: __mmask8, a: __m256d) -> __m2
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vshufpd, IMM2 = 0b01))]
 #[rustc_legacy_const_generics(3)]
-pub fn _mm_mask_permute_pd<const IMM2: i32>(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_permute_pd<const IMM2: i32>(
+    src: __m128d,
+    k: __mmask8,
+    a: __m128d,
+) -> __m128d {
     unsafe {
         static_assert_uimm_bits!(IMM2, 2);
         let r = _mm_permute_pd::<IMM2>(a);
@@ -22308,7 +23114,8 @@ pub fn _mm_mask_permute_pd<const IMM2: i32>(src: __m128d, k: __mmask8, a: __m128
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vshufpd, IMM2 = 0b01))]
 #[rustc_legacy_const_generics(2)]
-pub fn _mm_maskz_permute_pd<const IMM2: i32>(k: __mmask8, a: __m128d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_permute_pd<const IMM2: i32>(k: __mmask8, a: __m128d) -> __m128d {
     unsafe {
         static_assert_uimm_bits!(IMM2, 2);
         let r = _mm_permute_pd::<IMM2>(a);
@@ -22324,7 +23131,8 @@ pub fn _mm_maskz_permute_pd<const IMM2: i32>(k: __mmask8, a: __m128d) -> __m128d
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq
 #[rustc_legacy_const_generics(1)]
-pub fn _mm512_permutex_epi64<const MASK: i32>(a: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_permutex_epi64<const MASK: i32>(a: __m512i) -> __m512i {
     unsafe {
         static_assert_uimm_bits!(MASK, 8);
         simd_shuffle!(
@@ -22352,7 +23160,8 @@ pub fn _mm512_permutex_epi64<const MASK: i32>(a: __m512i) -> __m512i {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq
 #[rustc_legacy_const_generics(3)]
-pub fn _mm512_mask_permutex_epi64<const MASK: i32>(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_permutex_epi64<const MASK: i32>(
     src: __m512i,
     k: __mmask8,
     a: __m512i,
@@ -22372,7 +23181,8 @@ pub fn _mm512_mask_permutex_epi64<const MASK: i32>(
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq
 #[rustc_legacy_const_generics(2)]
-pub fn _mm512_maskz_permutex_epi64<const MASK: i32>(k: __mmask8, a: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_permutex_epi64<const MASK: i32>(k: __mmask8, a: __m512i) -> __m512i {
     unsafe {
         static_assert_uimm_bits!(MASK, 8);
         let r = _mm512_permutex_epi64::<MASK>(a);
@@ -22388,7 +23198,8 @@ pub fn _mm512_maskz_permutex_epi64<const MASK: i32>(k: __mmask8, a: __m512i) ->
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq
 #[rustc_legacy_const_generics(1)]
-pub fn _mm256_permutex_epi64<const MASK: i32>(a: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_permutex_epi64<const MASK: i32>(a: __m256i) -> __m256i {
     unsafe {
         static_assert_uimm_bits!(MASK, 8);
         simd_shuffle!(
@@ -22412,7 +23223,8 @@ pub fn _mm256_permutex_epi64<const MASK: i32>(a: __m256i) -> __m256i {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq
 #[rustc_legacy_const_generics(3)]
-pub fn _mm256_mask_permutex_epi64<const MASK: i32>(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_permutex_epi64<const MASK: i32>(
     src: __m256i,
     k: __mmask8,
     a: __m256i,
@@ -22432,7 +23244,8 @@ pub fn _mm256_mask_permutex_epi64<const MASK: i32>(
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq
 #[rustc_legacy_const_generics(2)]
-pub fn _mm256_maskz_permutex_epi64<const MASK: i32>(k: __mmask8, a: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_permutex_epi64<const MASK: i32>(k: __mmask8, a: __m256i) -> __m256i {
     unsafe {
         static_assert_uimm_bits!(MASK, 8);
         let r = _mm256_permutex_epi64::<MASK>(a);
@@ -22448,7 +23261,8 @@ pub fn _mm256_maskz_permutex_epi64<const MASK: i32>(k: __mmask8, a: __m256i) ->
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd
 #[rustc_legacy_const_generics(1)]
-pub fn _mm512_permutex_pd<const MASK: i32>(a: __m512d) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_permutex_pd<const MASK: i32>(a: __m512d) -> __m512d {
     unsafe {
         static_assert_uimm_bits!(MASK, 8);
         simd_shuffle!(
@@ -22476,7 +23290,12 @@ pub fn _mm512_permutex_pd<const MASK: i32>(a: __m512d) -> __m512d {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd
 #[rustc_legacy_const_generics(3)]
-pub fn _mm512_mask_permutex_pd<const MASK: i32>(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_permutex_pd<const MASK: i32>(
+    src: __m512d,
+    k: __mmask8,
+    a: __m512d,
+) -> __m512d {
     unsafe {
         let r = _mm512_permutex_pd::<MASK>(a);
         transmute(simd_select_bitmask(k, r.as_f64x8(), src.as_f64x8()))
@@ -22491,7 +23310,8 @@ pub fn _mm512_mask_permutex_pd<const MASK: i32>(src: __m512d, k: __mmask8, a: __
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd
 #[rustc_legacy_const_generics(2)]
-pub fn _mm512_maskz_permutex_pd<const MASK: i32>(k: __mmask8, a: __m512d) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_permutex_pd<const MASK: i32>(k: __mmask8, a: __m512d) -> __m512d {
     unsafe {
         let r = _mm512_permutex_pd::<MASK>(a);
         transmute(simd_select_bitmask(k, r.as_f64x8(), f64x8::ZERO))
@@ -22506,7 +23326,8 @@ pub fn _mm512_maskz_permutex_pd<const MASK: i32>(k: __mmask8, a: __m512d) -> __m
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd
 #[rustc_legacy_const_generics(1)]
-pub fn _mm256_permutex_pd<const MASK: i32>(a: __m256d) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_permutex_pd<const MASK: i32>(a: __m256d) -> __m256d {
     unsafe {
         static_assert_uimm_bits!(MASK, 8);
         simd_shuffle!(
@@ -22530,7 +23351,12 @@ pub fn _mm256_permutex_pd<const MASK: i32>(a: __m256d) -> __m256d {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd
 #[rustc_legacy_const_generics(3)]
-pub fn _mm256_mask_permutex_pd<const MASK: i32>(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_permutex_pd<const MASK: i32>(
+    src: __m256d,
+    k: __mmask8,
+    a: __m256d,
+) -> __m256d {
     unsafe {
         static_assert_uimm_bits!(MASK, 8);
         let r = _mm256_permutex_pd::<MASK>(a);
@@ -22546,7 +23372,8 @@ pub fn _mm256_mask_permutex_pd<const MASK: i32>(src: __m256d, k: __mmask8, a: __
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd
 #[rustc_legacy_const_generics(2)]
-pub fn _mm256_maskz_permutex_pd<const MASK: i32>(k: __mmask8, a: __m256d) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_permutex_pd<const MASK: i32>(k: __mmask8, a: __m256d) -> __m256d {
     unsafe {
         static_assert_uimm_bits!(MASK, 8);
         let r = _mm256_permutex_pd::<MASK>(a);
@@ -23816,7 +24643,8 @@ pub fn _mm_mask2_permutex2var_pd(a: __m128d, idx: __m128i, k: __mmask8, b: __m12
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vshufps, MASK = 9))] //should be vpshufd
 #[rustc_legacy_const_generics(1)]
-pub fn _mm512_shuffle_epi32<const MASK: _MM_PERM_ENUM>(a: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_shuffle_epi32<const MASK: _MM_PERM_ENUM>(a: __m512i) -> __m512i {
     unsafe {
         static_assert_uimm_bits!(MASK, 8);
         let r: i32x16 = simd_shuffle!(
@@ -23853,7 +24681,8 @@ pub fn _mm512_shuffle_epi32<const MASK: _MM_PERM_ENUM>(a: __m512i) -> __m512i {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshufd, MASK = 9))]
 #[rustc_legacy_const_generics(3)]
-pub fn _mm512_mask_shuffle_epi32<const MASK: _MM_PERM_ENUM>(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_shuffle_epi32<const MASK: _MM_PERM_ENUM>(
     src: __m512i,
     k: __mmask16,
     a: __m512i,
@@ -23873,7 +24702,11 @@ pub fn _mm512_mask_shuffle_epi32<const MASK: _MM_PERM_ENUM>(
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshufd, MASK = 9))]
 #[rustc_legacy_const_generics(2)]
-pub fn _mm512_maskz_shuffle_epi32<const MASK: _MM_PERM_ENUM>(k: __mmask16, a: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_shuffle_epi32<const MASK: _MM_PERM_ENUM>(
+    k: __mmask16,
+    a: __m512i,
+) -> __m512i {
     unsafe {
         static_assert_uimm_bits!(MASK, 8);
         let r = _mm512_shuffle_epi32::<MASK>(a);
@@ -23889,7 +24722,8 @@ pub fn _mm512_maskz_shuffle_epi32<const MASK: _MM_PERM_ENUM>(k: __mmask16, a: __
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshufd, MASK = 9))]
 #[rustc_legacy_const_generics(3)]
-pub fn _mm256_mask_shuffle_epi32<const MASK: _MM_PERM_ENUM>(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_shuffle_epi32<const MASK: _MM_PERM_ENUM>(
     src: __m256i,
     k: __mmask8,
     a: __m256i,
@@ -23909,7 +24743,11 @@ pub fn _mm256_mask_shuffle_epi32<const MASK: _MM_PERM_ENUM>(
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshufd, MASK = 9))]
 #[rustc_legacy_const_generics(2)]
-pub fn _mm256_maskz_shuffle_epi32<const MASK: _MM_PERM_ENUM>(k: __mmask8, a: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_shuffle_epi32<const MASK: _MM_PERM_ENUM>(
+    k: __mmask8,
+    a: __m256i,
+) -> __m256i {
     unsafe {
         static_assert_uimm_bits!(MASK, 8);
         let r = _mm256_shuffle_epi32::<MASK>(a);
@@ -23925,7 +24763,8 @@ pub fn _mm256_maskz_shuffle_epi32<const MASK: _MM_PERM_ENUM>(k: __mmask8, a: __m
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshufd, MASK = 9))]
 #[rustc_legacy_const_generics(3)]
-pub fn _mm_mask_shuffle_epi32<const MASK: _MM_PERM_ENUM>(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_shuffle_epi32<const MASK: _MM_PERM_ENUM>(
     src: __m128i,
     k: __mmask8,
     a: __m128i,
@@ -23945,7 +24784,11 @@ pub fn _mm_mask_shuffle_epi32<const MASK: _MM_PERM_ENUM>(
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshufd, MASK = 9))]
 #[rustc_legacy_const_generics(2)]
-pub fn _mm_maskz_shuffle_epi32<const MASK: _MM_PERM_ENUM>(k: __mmask8, a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_shuffle_epi32<const MASK: _MM_PERM_ENUM>(
+    k: __mmask8,
+    a: __m128i,
+) -> __m128i {
     unsafe {
         static_assert_uimm_bits!(MASK, 8);
         let r = _mm_shuffle_epi32::<MASK>(a);
@@ -23961,7 +24804,8 @@ pub fn _mm_maskz_shuffle_epi32<const MASK: _MM_PERM_ENUM>(k: __mmask8, a: __m128
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
 #[rustc_legacy_const_generics(2)]
-pub fn _mm512_shuffle_ps<const MASK: i32>(a: __m512, b: __m512) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_shuffle_ps<const MASK: i32>(a: __m512, b: __m512) -> __m512 {
     unsafe {
         static_assert_uimm_bits!(MASK, 8);
         simd_shuffle!(
@@ -23997,7 +24841,8 @@ pub fn _mm512_shuffle_ps<const MASK: i32>(a: __m512, b: __m512) -> __m512 {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
 #[rustc_legacy_const_generics(4)]
-pub fn _mm512_mask_shuffle_ps<const MASK: i32>(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_shuffle_ps<const MASK: i32>(
     src: __m512,
     k: __mmask16,
     a: __m512,
@@ -24018,7 +24863,12 @@ pub fn _mm512_mask_shuffle_ps<const MASK: i32>(
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
 #[rustc_legacy_const_generics(3)]
-pub fn _mm512_maskz_shuffle_ps<const MASK: i32>(k: __mmask16, a: __m512, b: __m512) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_shuffle_ps<const MASK: i32>(
+    k: __mmask16,
+    a: __m512,
+    b: __m512,
+) -> __m512 {
     unsafe {
         static_assert_uimm_bits!(MASK, 8);
         let r = _mm512_shuffle_ps::<MASK>(a, b);
@@ -24034,7 +24884,8 @@ pub fn _mm512_maskz_shuffle_ps<const MASK: i32>(k: __mmask16, a: __m512, b: __m5
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
 #[rustc_legacy_const_generics(4)]
-pub fn _mm256_mask_shuffle_ps<const MASK: i32>(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_shuffle_ps<const MASK: i32>(
     src: __m256,
     k: __mmask8,
     a: __m256,
@@ -24055,7 +24906,8 @@ pub fn _mm256_mask_shuffle_ps<const MASK: i32>(
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
 #[rustc_legacy_const_generics(3)]
-pub fn _mm256_maskz_shuffle_ps<const MASK: i32>(k: __mmask8, a: __m256, b: __m256) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_shuffle_ps<const MASK: i32>(k: __mmask8, a: __m256, b: __m256) -> __m256 {
     unsafe {
         static_assert_uimm_bits!(MASK, 8);
         let r = _mm256_shuffle_ps::<MASK>(a, b);
@@ -24071,7 +24923,8 @@ pub fn _mm256_maskz_shuffle_ps<const MASK: i32>(k: __mmask8, a: __m256, b: __m25
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
 #[rustc_legacy_const_generics(4)]
-pub fn _mm_mask_shuffle_ps<const MASK: i32>(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_shuffle_ps<const MASK: i32>(
     src: __m128,
     k: __mmask8,
     a: __m128,
@@ -24092,7 +24945,8 @@ pub fn _mm_mask_shuffle_ps<const MASK: i32>(
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
 #[rustc_legacy_const_generics(3)]
-pub fn _mm_maskz_shuffle_ps<const MASK: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_shuffle_ps<const MASK: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
     unsafe {
         static_assert_uimm_bits!(MASK, 8);
         let r = _mm_shuffle_ps::<MASK>(a, b);
@@ -24108,7 +24962,8 @@ pub fn _mm_maskz_shuffle_ps<const MASK: i32>(k: __mmask8, a: __m128, b: __m128)
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vshufpd, MASK = 3))]
 #[rustc_legacy_const_generics(2)]
-pub fn _mm512_shuffle_pd<const MASK: i32>(a: __m512d, b: __m512d) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_shuffle_pd<const MASK: i32>(a: __m512d, b: __m512d) -> __m512d {
     unsafe {
         static_assert_uimm_bits!(MASK, 8);
         simd_shuffle!(
@@ -24136,7 +24991,8 @@ pub fn _mm512_shuffle_pd<const MASK: i32>(a: __m512d, b: __m512d) -> __m512d {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vshufpd, MASK = 3))]
 #[rustc_legacy_const_generics(4)]
-pub fn _mm512_mask_shuffle_pd<const MASK: i32>(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_shuffle_pd<const MASK: i32>(
     src: __m512d,
     k: __mmask8,
     a: __m512d,
@@ -24157,7 +25013,12 @@ pub fn _mm512_mask_shuffle_pd<const MASK: i32>(
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vshufpd, MASK = 3))]
 #[rustc_legacy_const_generics(3)]
-pub fn _mm512_maskz_shuffle_pd<const MASK: i32>(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_shuffle_pd<const MASK: i32>(
+    k: __mmask8,
+    a: __m512d,
+    b: __m512d,
+) -> __m512d {
     unsafe {
         static_assert_uimm_bits!(MASK, 8);
         let r = _mm512_shuffle_pd::<MASK>(a, b);
@@ -24173,7 +25034,8 @@ pub fn _mm512_maskz_shuffle_pd<const MASK: i32>(k: __mmask8, a: __m512d, b: __m5
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vshufpd, MASK = 3))]
 #[rustc_legacy_const_generics(4)]
-pub fn _mm256_mask_shuffle_pd<const MASK: i32>(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_shuffle_pd<const MASK: i32>(
     src: __m256d,
     k: __mmask8,
     a: __m256d,
@@ -24194,7 +25056,12 @@ pub fn _mm256_mask_shuffle_pd<const MASK: i32>(
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vshufpd, MASK = 3))]
 #[rustc_legacy_const_generics(3)]
-pub fn _mm256_maskz_shuffle_pd<const MASK: i32>(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_shuffle_pd<const MASK: i32>(
+    k: __mmask8,
+    a: __m256d,
+    b: __m256d,
+) -> __m256d {
     unsafe {
         static_assert_uimm_bits!(MASK, 8);
         let r = _mm256_shuffle_pd::<MASK>(a, b);
@@ -24210,7 +25077,8 @@ pub fn _mm256_maskz_shuffle_pd<const MASK: i32>(k: __mmask8, a: __m256d, b: __m2
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vshufpd, MASK = 1))]
 #[rustc_legacy_const_generics(4)]
-pub fn _mm_mask_shuffle_pd<const MASK: i32>(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_shuffle_pd<const MASK: i32>(
     src: __m128d,
     k: __mmask8,
     a: __m128d,
@@ -24231,7 +25099,8 @@ pub fn _mm_mask_shuffle_pd<const MASK: i32>(
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vshufpd, MASK = 1))]
 #[rustc_legacy_const_generics(3)]
-pub fn _mm_maskz_shuffle_pd<const MASK: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_shuffle_pd<const MASK: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
     unsafe {
         static_assert_uimm_bits!(MASK, 8);
         let r = _mm_shuffle_pd::<MASK>(a, b);
@@ -24247,7 +25116,8 @@ pub fn _mm_maskz_shuffle_pd<const MASK: i32>(k: __mmask8, a: __m128d, b: __m128d
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b10_01_01_01))] //should be vshufi32x4
 #[rustc_legacy_const_generics(2)]
-pub fn _mm512_shuffle_i32x4<const MASK: i32>(a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_shuffle_i32x4<const MASK: i32>(a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         static_assert_uimm_bits!(MASK, 8);
         let a = a.as_i32x16();
@@ -24286,7 +25156,8 @@ pub fn _mm512_shuffle_i32x4<const MASK: i32>(a: __m512i, b: __m512i) -> __m512i
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vshufi32x4, MASK = 0b10_11_01_01))]
 #[rustc_legacy_const_generics(4)]
-pub fn _mm512_mask_shuffle_i32x4<const MASK: i32>(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_shuffle_i32x4<const MASK: i32>(
     src: __m512i,
     k: __mmask16,
     a: __m512i,
@@ -24307,7 +25178,8 @@ pub fn _mm512_mask_shuffle_i32x4<const MASK: i32>(
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vshufi32x4, MASK = 0b10_11_01_01))]
 #[rustc_legacy_const_generics(3)]
-pub fn _mm512_maskz_shuffle_i32x4<const MASK: i32>(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_shuffle_i32x4<const MASK: i32>(
     k: __mmask16,
     a: __m512i,
     b: __m512i,
@@ -24327,7 +25199,8 @@ pub fn _mm512_maskz_shuffle_i32x4<const MASK: i32>(
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vperm, MASK = 0b11))] //should be vshufi32x4
 #[rustc_legacy_const_generics(2)]
-pub fn _mm256_shuffle_i32x4<const MASK: i32>(a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_shuffle_i32x4<const MASK: i32>(a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         static_assert_uimm_bits!(MASK, 8);
         let a = a.as_i32x8();
@@ -24358,7 +25231,8 @@ pub fn _mm256_shuffle_i32x4<const MASK: i32>(a: __m256i, b: __m256i) -> __m256i
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vshufi32x4, MASK = 0b11))]
 #[rustc_legacy_const_generics(4)]
-pub fn _mm256_mask_shuffle_i32x4<const MASK: i32>(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_shuffle_i32x4<const MASK: i32>(
     src: __m256i,
     k: __mmask8,
     a: __m256i,
@@ -24379,7 +25253,12 @@ pub fn _mm256_mask_shuffle_i32x4<const MASK: i32>(
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vshufi32x4, MASK = 0b11))]
 #[rustc_legacy_const_generics(3)]
-pub fn _mm256_maskz_shuffle_i32x4<const MASK: i32>(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_shuffle_i32x4<const MASK: i32>(
+    k: __mmask8,
+    a: __m256i,
+    b: __m256i,
+) -> __m256i {
     unsafe {
         static_assert_uimm_bits!(MASK, 8);
         let r = _mm256_shuffle_i32x4::<MASK>(a, b);
@@ -24395,7 +25274,8 @@ pub fn _mm256_maskz_shuffle_i32x4<const MASK: i32>(k: __mmask8, a: __m256i, b: _
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b10_11_11_11))]
 #[rustc_legacy_const_generics(2)]
-pub fn _mm512_shuffle_i64x2<const MASK: i32>(a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_shuffle_i64x2<const MASK: i32>(a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         static_assert_uimm_bits!(MASK, 8);
         let a = a.as_i64x8();
@@ -24426,7 +25306,8 @@ pub fn _mm512_shuffle_i64x2<const MASK: i32>(a: __m512i, b: __m512i) -> __m512i
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b10_11_11_11))]
 #[rustc_legacy_const_generics(4)]
-pub fn _mm512_mask_shuffle_i64x2<const MASK: i32>(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_shuffle_i64x2<const MASK: i32>(
     src: __m512i,
     k: __mmask8,
     a: __m512i,
@@ -24447,7 +25328,12 @@ pub fn _mm512_mask_shuffle_i64x2<const MASK: i32>(
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b10_11_11_11))]
 #[rustc_legacy_const_generics(3)]
-pub fn _mm512_maskz_shuffle_i64x2<const MASK: i32>(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_shuffle_i64x2<const MASK: i32>(
+    k: __mmask8,
+    a: __m512i,
+    b: __m512i,
+) -> __m512i {
     unsafe {
         static_assert_uimm_bits!(MASK, 8);
         let r = _mm512_shuffle_i64x2::<MASK>(a, b);
@@ -24463,7 +25349,8 @@ pub fn _mm512_maskz_shuffle_i64x2<const MASK: i32>(k: __mmask8, a: __m512i, b: _
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vperm, MASK = 0b01))] //should be vshufi64x2
 #[rustc_legacy_const_generics(2)]
-pub fn _mm256_shuffle_i64x2<const MASK: i32>(a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_shuffle_i64x2<const MASK: i32>(a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         static_assert_uimm_bits!(MASK, 8);
         let a = a.as_i64x4();
@@ -24490,7 +25377,8 @@ pub fn _mm256_shuffle_i64x2<const MASK: i32>(a: __m256i, b: __m256i) -> __m256i
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b11))]
 #[rustc_legacy_const_generics(4)]
-pub fn _mm256_mask_shuffle_i64x2<const MASK: i32>(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_shuffle_i64x2<const MASK: i32>(
     src: __m256i,
     k: __mmask8,
     a: __m256i,
@@ -24511,7 +25399,12 @@ pub fn _mm256_mask_shuffle_i64x2<const MASK: i32>(
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b11))]
 #[rustc_legacy_const_generics(3)]
-pub fn _mm256_maskz_shuffle_i64x2<const MASK: i32>(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_shuffle_i64x2<const MASK: i32>(
+    k: __mmask8,
+    a: __m256i,
+    b: __m256i,
+) -> __m256i {
     unsafe {
         static_assert_uimm_bits!(MASK, 8);
         let r = _mm256_shuffle_i64x2::<MASK>(a, b);
@@ -24527,7 +25420,8 @@ pub fn _mm256_maskz_shuffle_i64x2<const MASK: i32>(k: __mmask8, a: __m256i, b: _
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b1011))] //should be vshuff32x4, but generate vshuff64x2
 #[rustc_legacy_const_generics(2)]
-pub fn _mm512_shuffle_f32x4<const MASK: i32>(a: __m512, b: __m512) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_shuffle_f32x4<const MASK: i32>(a: __m512, b: __m512) -> __m512 {
     unsafe {
         static_assert_uimm_bits!(MASK, 8);
         let a = a.as_f32x16();
@@ -24566,7 +25460,8 @@ pub fn _mm512_shuffle_f32x4<const MASK: i32>(a: __m512, b: __m512) -> __m512 {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vshuff32x4, MASK = 0b1011))]
 #[rustc_legacy_const_generics(4)]
-pub fn _mm512_mask_shuffle_f32x4<const MASK: i32>(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_shuffle_f32x4<const MASK: i32>(
     src: __m512,
     k: __mmask16,
     a: __m512,
@@ -24587,7 +25482,12 @@ pub fn _mm512_mask_shuffle_f32x4<const MASK: i32>(
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vshuff32x4, MASK = 0b1011))]
 #[rustc_legacy_const_generics(3)]
-pub fn _mm512_maskz_shuffle_f32x4<const MASK: i32>(k: __mmask16, a: __m512, b: __m512) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_shuffle_f32x4<const MASK: i32>(
+    k: __mmask16,
+    a: __m512,
+    b: __m512,
+) -> __m512 {
     unsafe {
         static_assert_uimm_bits!(MASK, 8);
         let r = _mm512_shuffle_f32x4::<MASK>(a, b);
@@ -24603,7 +25503,8 @@ pub fn _mm512_maskz_shuffle_f32x4<const MASK: i32>(k: __mmask16, a: __m512, b: _
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vperm, MASK = 0b01))] //should be vshuff32x4
 #[rustc_legacy_const_generics(2)]
-pub fn _mm256_shuffle_f32x4<const MASK: i32>(a: __m256, b: __m256) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_shuffle_f32x4<const MASK: i32>(a: __m256, b: __m256) -> __m256 {
     unsafe {
         static_assert_uimm_bits!(MASK, 8);
         let a = a.as_f32x8();
@@ -24634,7 +25535,8 @@ pub fn _mm256_shuffle_f32x4<const MASK: i32>(a: __m256, b: __m256) -> __m256 {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vshuff32x4, MASK = 0b11))]
 #[rustc_legacy_const_generics(4)]
-pub fn _mm256_mask_shuffle_f32x4<const MASK: i32>(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_shuffle_f32x4<const MASK: i32>(
     src: __m256,
     k: __mmask8,
     a: __m256,
@@ -24655,7 +25557,12 @@ pub fn _mm256_mask_shuffle_f32x4<const MASK: i32>(
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vshuff32x4, MASK = 0b11))]
 #[rustc_legacy_const_generics(3)]
-pub fn _mm256_maskz_shuffle_f32x4<const MASK: i32>(k: __mmask8, a: __m256, b: __m256) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_shuffle_f32x4<const MASK: i32>(
+    k: __mmask8,
+    a: __m256,
+    b: __m256,
+) -> __m256 {
     unsafe {
         static_assert_uimm_bits!(MASK, 8);
         let r = _mm256_shuffle_f32x4::<MASK>(a, b);
@@ -24671,7 +25578,8 @@ pub fn _mm256_maskz_shuffle_f32x4<const MASK: i32>(k: __mmask8, a: __m256, b: __
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b10_11_11_11))]
 #[rustc_legacy_const_generics(2)]
-pub fn _mm512_shuffle_f64x2<const MASK: i32>(a: __m512d, b: __m512d) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_shuffle_f64x2<const MASK: i32>(a: __m512d, b: __m512d) -> __m512d {
     unsafe {
         static_assert_uimm_bits!(MASK, 8);
         let a = a.as_f64x8();
@@ -24702,7 +25610,8 @@ pub fn _mm512_shuffle_f64x2<const MASK: i32>(a: __m512d, b: __m512d) -> __m512d
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b10_11_11_11))]
 #[rustc_legacy_const_generics(4)]
-pub fn _mm512_mask_shuffle_f64x2<const MASK: i32>(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_shuffle_f64x2<const MASK: i32>(
     src: __m512d,
     k: __mmask8,
     a: __m512d,
@@ -24723,7 +25632,12 @@ pub fn _mm512_mask_shuffle_f64x2<const MASK: i32>(
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b10_11_11_11))]
 #[rustc_legacy_const_generics(3)]
-pub fn _mm512_maskz_shuffle_f64x2<const MASK: i32>(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_shuffle_f64x2<const MASK: i32>(
+    k: __mmask8,
+    a: __m512d,
+    b: __m512d,
+) -> __m512d {
     unsafe {
         static_assert_uimm_bits!(MASK, 8);
         let r = _mm512_shuffle_f64x2::<MASK>(a, b);
@@ -24739,7 +25653,8 @@ pub fn _mm512_maskz_shuffle_f64x2<const MASK: i32>(k: __mmask8, a: __m512d, b: _
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vperm, MASK = 0b01))] //should be vshuff64x2
 #[rustc_legacy_const_generics(2)]
-pub fn _mm256_shuffle_f64x2<const MASK: i32>(a: __m256d, b: __m256d) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_shuffle_f64x2<const MASK: i32>(a: __m256d, b: __m256d) -> __m256d {
     unsafe {
         static_assert_uimm_bits!(MASK, 8);
         let a = a.as_f64x4();
@@ -24766,7 +25681,8 @@ pub fn _mm256_shuffle_f64x2<const MASK: i32>(a: __m256d, b: __m256d) -> __m256d
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b11))]
 #[rustc_legacy_const_generics(4)]
-pub fn _mm256_mask_shuffle_f64x2<const MASK: i32>(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_shuffle_f64x2<const MASK: i32>(
     src: __m256d,
     k: __mmask8,
     a: __m256d,
@@ -24787,7 +25703,12 @@ pub fn _mm256_mask_shuffle_f64x2<const MASK: i32>(
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b11))]
 #[rustc_legacy_const_generics(3)]
-pub fn _mm256_maskz_shuffle_f64x2<const MASK: i32>(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_shuffle_f64x2<const MASK: i32>(
+    k: __mmask8,
+    a: __m256d,
+    b: __m256d,
+) -> __m256d {
     unsafe {
         static_assert_uimm_bits!(MASK, 8);
         let r = _mm256_shuffle_f64x2::<MASK>(a, b);
@@ -24803,7 +25724,8 @@ pub fn _mm256_maskz_shuffle_f64x2<const MASK: i32>(k: __mmask8, a: __m256d, b: _
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vextractf32x4, IMM8 = 3))]
 #[rustc_legacy_const_generics(1)]
-pub fn _mm512_extractf32x4_ps<const IMM8: i32>(a: __m512) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_extractf32x4_ps<const IMM8: i32>(a: __m512) -> __m128 {
     unsafe {
         static_assert_uimm_bits!(IMM8, 2);
         match IMM8 & 0x3 {
@@ -24823,7 +25745,12 @@ pub fn _mm512_extractf32x4_ps<const IMM8: i32>(a: __m512) -> __m128 {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vextractf32x4, IMM8 = 3))]
 #[rustc_legacy_const_generics(3)]
-pub fn _mm512_mask_extractf32x4_ps<const IMM8: i32>(src: __m128, k: __mmask8, a: __m512) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_extractf32x4_ps<const IMM8: i32>(
+    src: __m128,
+    k: __mmask8,
+    a: __m512,
+) -> __m128 {
     unsafe {
         static_assert_uimm_bits!(IMM8, 2);
         let r = _mm512_extractf32x4_ps::<IMM8>(a);
@@ -24839,7 +25766,8 @@ pub fn _mm512_mask_extractf32x4_ps<const IMM8: i32>(src: __m128, k: __mmask8, a:
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vextractf32x4, IMM8 = 3))]
 #[rustc_legacy_const_generics(2)]
-pub fn _mm512_maskz_extractf32x4_ps<const IMM8: i32>(k: __mmask8, a: __m512) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_extractf32x4_ps<const IMM8: i32>(k: __mmask8, a: __m512) -> __m128 {
     unsafe {
         static_assert_uimm_bits!(IMM8, 2);
         let r = _mm512_extractf32x4_ps::<IMM8>(a);
@@ -24858,7 +25786,8 @@ pub fn _mm512_maskz_extractf32x4_ps<const IMM8: i32>(k: __mmask8, a: __m512) ->
     assert_instr(vextract, IMM8 = 1) //should be vextractf32x4
 )]
 #[rustc_legacy_const_generics(1)]
-pub fn _mm256_extractf32x4_ps<const IMM8: i32>(a: __m256) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_extractf32x4_ps<const IMM8: i32>(a: __m256) -> __m128 {
     unsafe {
         static_assert_uimm_bits!(IMM8, 1);
         match IMM8 & 0x1 {
@@ -24876,7 +25805,12 @@ pub fn _mm256_extractf32x4_ps<const IMM8: i32>(a: __m256) -> __m128 {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vextractf32x4, IMM8 = 1))]
 #[rustc_legacy_const_generics(3)]
-pub fn _mm256_mask_extractf32x4_ps<const IMM8: i32>(src: __m128, k: __mmask8, a: __m256) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_extractf32x4_ps<const IMM8: i32>(
+    src: __m128,
+    k: __mmask8,
+    a: __m256,
+) -> __m128 {
     unsafe {
         static_assert_uimm_bits!(IMM8, 1);
         let r = _mm256_extractf32x4_ps::<IMM8>(a);
@@ -24892,7 +25826,8 @@ pub fn _mm256_mask_extractf32x4_ps<const IMM8: i32>(src: __m128, k: __mmask8, a:
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vextractf32x4, IMM8 = 1))]
 #[rustc_legacy_const_generics(2)]
-pub fn _mm256_maskz_extractf32x4_ps<const IMM8: i32>(k: __mmask8, a: __m256) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_extractf32x4_ps<const IMM8: i32>(k: __mmask8, a: __m256) -> __m128 {
     unsafe {
         static_assert_uimm_bits!(IMM8, 1);
         let r = _mm256_extractf32x4_ps::<IMM8>(a);
@@ -24911,7 +25846,8 @@ pub fn _mm256_maskz_extractf32x4_ps<const IMM8: i32>(k: __mmask8, a: __m256) ->
     assert_instr(vextractf64x4, IMM1 = 1) //should be vextracti64x4
 )]
 #[rustc_legacy_const_generics(1)]
-pub fn _mm512_extracti64x4_epi64<const IMM1: i32>(a: __m512i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_extracti64x4_epi64<const IMM1: i32>(a: __m512i) -> __m256i {
     unsafe {
         static_assert_uimm_bits!(IMM1, 1);
         match IMM1 {
@@ -24929,7 +25865,8 @@ pub fn _mm512_extracti64x4_epi64<const IMM1: i32>(a: __m512i) -> __m256i {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vextracti64x4, IMM1 = 1))]
 #[rustc_legacy_const_generics(3)]
-pub fn _mm512_mask_extracti64x4_epi64<const IMM1: i32>(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_extracti64x4_epi64<const IMM1: i32>(
     src: __m256i,
     k: __mmask8,
     a: __m512i,
@@ -24949,7 +25886,8 @@ pub fn _mm512_mask_extracti64x4_epi64<const IMM1: i32>(
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vextracti64x4, IMM1 = 1))]
 #[rustc_legacy_const_generics(2)]
-pub fn _mm512_maskz_extracti64x4_epi64<const IMM1: i32>(k: __mmask8, a: __m512i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_extracti64x4_epi64<const IMM1: i32>(k: __mmask8, a: __m512i) -> __m256i {
     unsafe {
         static_assert_uimm_bits!(IMM1, 1);
         let r = _mm512_extracti64x4_epi64::<IMM1>(a);
@@ -24965,7 +25903,8 @@ pub fn _mm512_maskz_extracti64x4_epi64<const IMM1: i32>(k: __mmask8, a: __m512i)
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vextractf64x4, IMM8 = 1))]
 #[rustc_legacy_const_generics(1)]
-pub fn _mm512_extractf64x4_pd<const IMM8: i32>(a: __m512d) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_extractf64x4_pd<const IMM8: i32>(a: __m512d) -> __m256d {
     unsafe {
         static_assert_uimm_bits!(IMM8, 1);
         match IMM8 & 0x1 {
@@ -24983,7 +25922,8 @@ pub fn _mm512_extractf64x4_pd<const IMM8: i32>(a: __m512d) -> __m256d {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vextractf64x4, IMM8 = 1))]
 #[rustc_legacy_const_generics(3)]
-pub fn _mm512_mask_extractf64x4_pd<const IMM8: i32>(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_extractf64x4_pd<const IMM8: i32>(
     src: __m256d,
     k: __mmask8,
     a: __m512d,
@@ -25003,7 +25943,8 @@ pub fn _mm512_mask_extractf64x4_pd<const IMM8: i32>(
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vextractf64x4, IMM8 = 1))]
 #[rustc_legacy_const_generics(2)]
-pub fn _mm512_maskz_extractf64x4_pd<const IMM8: i32>(k: __mmask8, a: __m512d) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_extractf64x4_pd<const IMM8: i32>(k: __mmask8, a: __m512d) -> __m256d {
     unsafe {
         static_assert_uimm_bits!(IMM8, 1);
         let r = _mm512_extractf64x4_pd::<IMM8>(a);
@@ -25022,7 +25963,8 @@ pub fn _mm512_maskz_extractf64x4_pd<const IMM8: i32>(k: __mmask8, a: __m512d) ->
     assert_instr(vextractf32x4, IMM2 = 3) //should be vextracti32x4
 )]
 #[rustc_legacy_const_generics(1)]
-pub fn _mm512_extracti32x4_epi32<const IMM2: i32>(a: __m512i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_extracti32x4_epi32<const IMM2: i32>(a: __m512i) -> __m128i {
     unsafe {
         static_assert_uimm_bits!(IMM2, 2);
         let a = a.as_i32x16();
@@ -25045,7 +25987,8 @@ pub fn _mm512_extracti32x4_epi32<const IMM2: i32>(a: __m512i) -> __m128i {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vextracti32x4, IMM2 = 3))]
 #[rustc_legacy_const_generics(3)]
-pub fn _mm512_mask_extracti32x4_epi32<const IMM2: i32>(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_extracti32x4_epi32<const IMM2: i32>(
     src: __m128i,
     k: __mmask8,
     a: __m512i,
@@ -25065,7 +26008,8 @@ pub fn _mm512_mask_extracti32x4_epi32<const IMM2: i32>(
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vextracti32x4, IMM2 = 3))]
 #[rustc_legacy_const_generics(2)]
-pub fn _mm512_maskz_extracti32x4_epi32<const IMM2: i32>(k: __mmask8, a: __m512i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_extracti32x4_epi32<const IMM2: i32>(k: __mmask8, a: __m512i) -> __m128i {
     unsafe {
         static_assert_uimm_bits!(IMM2, 2);
         let r = _mm512_extracti32x4_epi32::<IMM2>(a);
@@ -25084,7 +26028,8 @@ pub fn _mm512_maskz_extracti32x4_epi32<const IMM2: i32>(k: __mmask8, a: __m512i)
     assert_instr(vextract, IMM1 = 1) //should be vextracti32x4
 )]
 #[rustc_legacy_const_generics(1)]
-pub fn _mm256_extracti32x4_epi32<const IMM1: i32>(a: __m256i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_extracti32x4_epi32<const IMM1: i32>(a: __m256i) -> __m128i {
     unsafe {
         static_assert_uimm_bits!(IMM1, 1);
         let a = a.as_i32x8();
@@ -25105,7 +26050,8 @@ pub fn _mm256_extracti32x4_epi32<const IMM1: i32>(a: __m256i) -> __m128i {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vextracti32x4, IMM1 = 1))]
 #[rustc_legacy_const_generics(3)]
-pub fn _mm256_mask_extracti32x4_epi32<const IMM1: i32>(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_extracti32x4_epi32<const IMM1: i32>(
     src: __m128i,
     k: __mmask8,
     a: __m256i,
@@ -25125,7 +26071,8 @@ pub fn _mm256_mask_extracti32x4_epi32<const IMM1: i32>(
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vextracti32x4, IMM1 = 1))]
 #[rustc_legacy_const_generics(2)]
-pub fn _mm256_maskz_extracti32x4_epi32<const IMM1: i32>(k: __mmask8, a: __m256i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_extracti32x4_epi32<const IMM1: i32>(k: __mmask8, a: __m256i) -> __m128i {
     unsafe {
         static_assert_uimm_bits!(IMM1, 1);
         let r = _mm256_extracti32x4_epi32::<IMM1>(a);
@@ -25140,7 +26087,8 @@ pub fn _mm256_maskz_extracti32x4_epi32<const IMM1: i32>(k: __mmask8, a: __m256i)
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovsldup))]
-pub fn _mm512_moveldup_ps(a: __m512) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_moveldup_ps(a: __m512) -> __m512 {
     unsafe {
         let r: f32x16 = simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14]);
         transmute(r)
@@ -25154,7 +26102,8 @@ pub fn _mm512_moveldup_ps(a: __m512) -> __m512 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovsldup))]
-pub fn _mm512_mask_moveldup_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_moveldup_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
     unsafe {
         let mov: f32x16 =
             simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14]);
@@ -25169,7 +26118,8 @@ pub fn _mm512_mask_moveldup_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovsldup))]
-pub fn _mm512_maskz_moveldup_ps(k: __mmask16, a: __m512) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_moveldup_ps(k: __mmask16, a: __m512) -> __m512 {
     unsafe {
         let mov: f32x16 =
             simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14]);
@@ -25184,7 +26134,8 @@ pub fn _mm512_maskz_moveldup_ps(k: __mmask16, a: __m512) -> __m512 {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovsldup))]
-pub fn _mm256_mask_moveldup_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_moveldup_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
     unsafe {
         let mov = _mm256_moveldup_ps(a);
         transmute(simd_select_bitmask(k, mov.as_f32x8(), src.as_f32x8()))
@@ -25198,7 +26149,8 @@ pub fn _mm256_mask_moveldup_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovsldup))]
-pub fn _mm256_maskz_moveldup_ps(k: __mmask8, a: __m256) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_moveldup_ps(k: __mmask8, a: __m256) -> __m256 {
     unsafe {
         let mov = _mm256_moveldup_ps(a);
         transmute(simd_select_bitmask(k, mov.as_f32x8(), f32x8::ZERO))
@@ -25212,7 +26164,8 @@ pub fn _mm256_maskz_moveldup_ps(k: __mmask8, a: __m256) -> __m256 {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovsldup))]
-pub fn _mm_mask_moveldup_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_moveldup_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
     unsafe {
         let mov = _mm_moveldup_ps(a);
         transmute(simd_select_bitmask(k, mov.as_f32x4(), src.as_f32x4()))
@@ -25226,7 +26179,8 @@ pub fn _mm_mask_moveldup_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovsldup))]
-pub fn _mm_maskz_moveldup_ps(k: __mmask8, a: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_moveldup_ps(k: __mmask8, a: __m128) -> __m128 {
     unsafe {
         let mov = _mm_moveldup_ps(a);
         transmute(simd_select_bitmask(k, mov.as_f32x4(), f32x4::ZERO))
@@ -25240,7 +26194,8 @@ pub fn _mm_maskz_moveldup_ps(k: __mmask8, a: __m128) -> __m128 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovshdup))]
-pub fn _mm512_movehdup_ps(a: __m512) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_movehdup_ps(a: __m512) -> __m512 {
     unsafe {
         let r: f32x16 = simd_shuffle!(a, a, [1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15]);
         transmute(r)
@@ -25254,7 +26209,8 @@ pub fn _mm512_movehdup_ps(a: __m512) -> __m512 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovshdup))]
-pub fn _mm512_mask_movehdup_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_movehdup_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
     unsafe {
         let mov: f32x16 =
             simd_shuffle!(a, a, [1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15]);
@@ -25269,7 +26225,8 @@ pub fn _mm512_mask_movehdup_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovshdup))]
-pub fn _mm512_maskz_movehdup_ps(k: __mmask16, a: __m512) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_movehdup_ps(k: __mmask16, a: __m512) -> __m512 {
     unsafe {
         let mov: f32x16 =
             simd_shuffle!(a, a, [1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15]);
@@ -25284,7 +26241,8 @@ pub fn _mm512_maskz_movehdup_ps(k: __mmask16, a: __m512) -> __m512 {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovshdup))]
-pub fn _mm256_mask_movehdup_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_movehdup_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
     unsafe {
         let mov = _mm256_movehdup_ps(a);
         transmute(simd_select_bitmask(k, mov.as_f32x8(), src.as_f32x8()))
@@ -25298,7 +26256,8 @@ pub fn _mm256_mask_movehdup_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovshdup))]
-pub fn _mm256_maskz_movehdup_ps(k: __mmask8, a: __m256) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_movehdup_ps(k: __mmask8, a: __m256) -> __m256 {
     unsafe {
         let mov = _mm256_movehdup_ps(a);
         transmute(simd_select_bitmask(k, mov.as_f32x8(), f32x8::ZERO))
@@ -25312,7 +26271,8 @@ pub fn _mm256_maskz_movehdup_ps(k: __mmask8, a: __m256) -> __m256 {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovshdup))]
-pub fn _mm_mask_movehdup_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_movehdup_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
     unsafe {
         let mov = _mm_movehdup_ps(a);
         transmute(simd_select_bitmask(k, mov.as_f32x4(), src.as_f32x4()))
@@ -25326,7 +26286,8 @@ pub fn _mm_mask_movehdup_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovshdup))]
-pub fn _mm_maskz_movehdup_ps(k: __mmask8, a: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_movehdup_ps(k: __mmask8, a: __m128) -> __m128 {
     unsafe {
         let mov = _mm_movehdup_ps(a);
         transmute(simd_select_bitmask(k, mov.as_f32x4(), f32x4::ZERO))
@@ -25340,7 +26301,8 @@ pub fn _mm_maskz_movehdup_ps(k: __mmask8, a: __m128) -> __m128 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovddup))]
-pub fn _mm512_movedup_pd(a: __m512d) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_movedup_pd(a: __m512d) -> __m512d {
     unsafe {
         let r: f64x8 = simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6]);
         transmute(r)
@@ -25354,7 +26316,8 @@ pub fn _mm512_movedup_pd(a: __m512d) -> __m512d {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovddup))]
-pub fn _mm512_mask_movedup_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_movedup_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
     unsafe {
         let mov: f64x8 = simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6]);
         transmute(simd_select_bitmask(k, mov, src.as_f64x8()))
@@ -25368,7 +26331,8 @@ pub fn _mm512_mask_movedup_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovddup))]
-pub fn _mm512_maskz_movedup_pd(k: __mmask8, a: __m512d) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_movedup_pd(k: __mmask8, a: __m512d) -> __m512d {
     unsafe {
         let mov: f64x8 = simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6]);
         transmute(simd_select_bitmask(k, mov, f64x8::ZERO))
@@ -25382,7 +26346,8 @@ pub fn _mm512_maskz_movedup_pd(k: __mmask8, a: __m512d) -> __m512d {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovddup))]
-pub fn _mm256_mask_movedup_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_movedup_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
     unsafe {
         let mov = _mm256_movedup_pd(a);
         transmute(simd_select_bitmask(k, mov.as_f64x4(), src.as_f64x4()))
@@ -25396,7 +26361,8 @@ pub fn _mm256_mask_movedup_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovddup))]
-pub fn _mm256_maskz_movedup_pd(k: __mmask8, a: __m256d) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_movedup_pd(k: __mmask8, a: __m256d) -> __m256d {
     unsafe {
         let mov = _mm256_movedup_pd(a);
         transmute(simd_select_bitmask(k, mov.as_f64x4(), f64x4::ZERO))
@@ -25410,7 +26376,8 @@ pub fn _mm256_maskz_movedup_pd(k: __mmask8, a: __m256d) -> __m256d {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovddup))]
-pub fn _mm_mask_movedup_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_movedup_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
     unsafe {
         let mov = _mm_movedup_pd(a);
         transmute(simd_select_bitmask(k, mov.as_f64x2(), src.as_f64x2()))
@@ -25424,7 +26391,8 @@ pub fn _mm_mask_movedup_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovddup))]
-pub fn _mm_maskz_movedup_pd(k: __mmask8, a: __m128d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_movedup_pd(k: __mmask8, a: __m128d) -> __m128d {
     unsafe {
         let mov = _mm_movedup_pd(a);
         transmute(simd_select_bitmask(k, mov.as_f64x2(), f64x2::ZERO))
@@ -25439,7 +26407,8 @@ pub fn _mm_maskz_movedup_pd(k: __mmask8, a: __m128d) -> __m128d {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = 2))] //should be vinserti32x4
 #[rustc_legacy_const_generics(2)]
-pub fn _mm512_inserti32x4<const IMM8: i32>(a: __m512i, b: __m128i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_inserti32x4<const IMM8: i32>(a: __m512i, b: __m128i) -> __m512i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 2);
         let a = a.as_i32x16();
@@ -25482,7 +26451,8 @@ pub fn _mm512_inserti32x4<const IMM8: i32>(a: __m512i, b: __m128i) -> __m512i {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vinserti32x4, IMM8 = 2))]
 #[rustc_legacy_const_generics(4)]
-pub fn _mm512_mask_inserti32x4<const IMM8: i32>(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_inserti32x4<const IMM8: i32>(
     src: __m512i,
     k: __mmask16,
     a: __m512i,
@@ -25503,7 +26473,12 @@ pub fn _mm512_mask_inserti32x4<const IMM8: i32>(
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vinserti32x4, IMM8 = 2))]
 #[rustc_legacy_const_generics(3)]
-pub fn _mm512_maskz_inserti32x4<const IMM8: i32>(k: __mmask16, a: __m512i, b: __m128i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_inserti32x4<const IMM8: i32>(
+    k: __mmask16,
+    a: __m512i,
+    b: __m128i,
+) -> __m512i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 2);
         let r = _mm512_inserti32x4::<IMM8>(a, b);
@@ -25522,7 +26497,8 @@ pub fn _mm512_maskz_inserti32x4<const IMM8: i32>(k: __mmask16, a: __m512i, b: __
     assert_instr(vinsert, IMM8 = 1) //should be vinserti32x4
 )]
 #[rustc_legacy_const_generics(2)]
-pub fn _mm256_inserti32x4<const IMM8: i32>(a: __m256i, b: __m128i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_inserti32x4<const IMM8: i32>(a: __m256i, b: __m128i) -> __m256i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 1);
         let a = a.as_i32x8();
@@ -25543,7 +26519,8 @@ pub fn _mm256_inserti32x4<const IMM8: i32>(a: __m256i, b: __m128i) -> __m256i {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vinserti32x4, IMM8 = 1))]
 #[rustc_legacy_const_generics(4)]
-pub fn _mm256_mask_inserti32x4<const IMM8: i32>(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_inserti32x4<const IMM8: i32>(
     src: __m256i,
     k: __mmask8,
     a: __m256i,
@@ -25564,7 +26541,12 @@ pub fn _mm256_mask_inserti32x4<const IMM8: i32>(
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vinserti32x4, IMM8 = 1))]
 #[rustc_legacy_const_generics(3)]
-pub fn _mm256_maskz_inserti32x4<const IMM8: i32>(k: __mmask8, a: __m256i, b: __m128i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_inserti32x4<const IMM8: i32>(
+    k: __mmask8,
+    a: __m256i,
+    b: __m128i,
+) -> __m256i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 1);
         let r = _mm256_inserti32x4::<IMM8>(a, b);
@@ -25580,7 +26562,8 @@ pub fn _mm256_maskz_inserti32x4<const IMM8: i32>(k: __mmask8, a: __m256i, b: __m
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vinsertf64x4, IMM8 = 1))] //should be vinserti64x4
 #[rustc_legacy_const_generics(2)]
-pub fn _mm512_inserti64x4<const IMM8: i32>(a: __m512i, b: __m256i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_inserti64x4<const IMM8: i32>(a: __m512i, b: __m256i) -> __m512i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 1);
         let b = _mm512_castsi256_si512(b);
@@ -25599,7 +26582,8 @@ pub fn _mm512_inserti64x4<const IMM8: i32>(a: __m512i, b: __m256i) -> __m512i {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vinserti64x4, IMM8 = 1))]
 #[rustc_legacy_const_generics(4)]
-pub fn _mm512_mask_inserti64x4<const IMM8: i32>(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_inserti64x4<const IMM8: i32>(
     src: __m512i,
     k: __mmask8,
     a: __m512i,
@@ -25620,7 +26604,12 @@ pub fn _mm512_mask_inserti64x4<const IMM8: i32>(
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vinserti64x4, IMM8 = 1))]
 #[rustc_legacy_const_generics(3)]
-pub fn _mm512_maskz_inserti64x4<const IMM8: i32>(k: __mmask8, a: __m512i, b: __m256i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_inserti64x4<const IMM8: i32>(
+    k: __mmask8,
+    a: __m512i,
+    b: __m256i,
+) -> __m512i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 1);
         let r = _mm512_inserti64x4::<IMM8>(a, b);
@@ -25636,7 +26625,8 @@ pub fn _mm512_maskz_inserti64x4<const IMM8: i32>(k: __mmask8, a: __m512i, b: __m
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = 2))]
 #[rustc_legacy_const_generics(2)]
-pub fn _mm512_insertf32x4<const IMM8: i32>(a: __m512, b: __m128) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_insertf32x4<const IMM8: i32>(a: __m512, b: __m128) -> __m512 {
     unsafe {
         static_assert_uimm_bits!(IMM8, 2);
         let b = _mm512_castps128_ps512(b);
@@ -25677,7 +26667,8 @@ pub fn _mm512_insertf32x4<const IMM8: i32>(a: __m512, b: __m128) -> __m512 {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = 2))]
 #[rustc_legacy_const_generics(4)]
-pub fn _mm512_mask_insertf32x4<const IMM8: i32>(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_insertf32x4<const IMM8: i32>(
     src: __m512,
     k: __mmask16,
     a: __m512,
@@ -25698,7 +26689,12 @@ pub fn _mm512_mask_insertf32x4<const IMM8: i32>(
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = 2))]
 #[rustc_legacy_const_generics(3)]
-pub fn _mm512_maskz_insertf32x4<const IMM8: i32>(k: __mmask16, a: __m512, b: __m128) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_insertf32x4<const IMM8: i32>(
+    k: __mmask16,
+    a: __m512,
+    b: __m128,
+) -> __m512 {
     unsafe {
         static_assert_uimm_bits!(IMM8, 2);
         let r = _mm512_insertf32x4::<IMM8>(a, b);
@@ -25717,7 +26713,8 @@ pub fn _mm512_maskz_insertf32x4<const IMM8: i32>(k: __mmask16, a: __m512, b: __m
     assert_instr(vinsert, IMM8 = 1) //should be vinsertf32x4
 )]
 #[rustc_legacy_const_generics(2)]
-pub fn _mm256_insertf32x4<const IMM8: i32>(a: __m256, b: __m128) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_insertf32x4<const IMM8: i32>(a: __m256, b: __m128) -> __m256 {
     unsafe {
         static_assert_uimm_bits!(IMM8, 1);
         let b = _mm256_castps128_ps256(b);
@@ -25736,7 +26733,8 @@ pub fn _mm256_insertf32x4<const IMM8: i32>(a: __m256, b: __m128) -> __m256 {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = 1))]
 #[rustc_legacy_const_generics(4)]
-pub fn _mm256_mask_insertf32x4<const IMM8: i32>(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_insertf32x4<const IMM8: i32>(
     src: __m256,
     k: __mmask8,
     a: __m256,
@@ -25757,7 +26755,12 @@ pub fn _mm256_mask_insertf32x4<const IMM8: i32>(
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = 1))]
 #[rustc_legacy_const_generics(3)]
-pub fn _mm256_maskz_insertf32x4<const IMM8: i32>(k: __mmask8, a: __m256, b: __m128) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_insertf32x4<const IMM8: i32>(
+    k: __mmask8,
+    a: __m256,
+    b: __m128,
+) -> __m256 {
     unsafe {
         static_assert_uimm_bits!(IMM8, 1);
         let r = _mm256_insertf32x4::<IMM8>(a, b);
@@ -25773,7 +26776,8 @@ pub fn _mm256_maskz_insertf32x4<const IMM8: i32>(k: __mmask8, a: __m256, b: __m1
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vinsertf64x4, IMM8 = 1))]
 #[rustc_legacy_const_generics(2)]
-pub fn _mm512_insertf64x4<const IMM8: i32>(a: __m512d, b: __m256d) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_insertf64x4<const IMM8: i32>(a: __m512d, b: __m256d) -> __m512d {
     unsafe {
         static_assert_uimm_bits!(IMM8, 1);
         let b = _mm512_castpd256_pd512(b);
@@ -25792,7 +26796,8 @@ pub fn _mm512_insertf64x4<const IMM8: i32>(a: __m512d, b: __m256d) -> __m512d {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vinsertf64x4, IMM8 = 1))]
 #[rustc_legacy_const_generics(4)]
-pub fn _mm512_mask_insertf64x4<const IMM8: i32>(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_insertf64x4<const IMM8: i32>(
     src: __m512d,
     k: __mmask8,
     a: __m512d,
@@ -25813,7 +26818,12 @@ pub fn _mm512_mask_insertf64x4<const IMM8: i32>(
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vinsertf64x4, IMM8 = 1))]
 #[rustc_legacy_const_generics(3)]
-pub fn _mm512_maskz_insertf64x4<const IMM8: i32>(k: __mmask8, a: __m512d, b: __m256d) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_insertf64x4<const IMM8: i32>(
+    k: __mmask8,
+    a: __m512d,
+    b: __m256d,
+) -> __m512d {
     unsafe {
         static_assert_uimm_bits!(IMM8, 1);
         let r = _mm512_insertf64x4::<IMM8>(a, b);
@@ -25828,7 +26838,8 @@ pub fn _mm512_maskz_insertf64x4<const IMM8: i32>(k: __mmask8, a: __m512d, b: __m
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vunpckhps))] //should be vpunpckhdq
-pub fn _mm512_unpackhi_epi32(a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_unpackhi_epi32(a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let a = a.as_i32x16();
         let b = b.as_i32x16();
@@ -25851,7 +26862,13 @@ pub fn _mm512_unpackhi_epi32(a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpunpckhdq))]
-pub fn _mm512_mask_unpackhi_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_unpackhi_epi32(
+    src: __m512i,
+    k: __mmask16,
+    a: __m512i,
+    b: __m512i,
+) -> __m512i {
     unsafe {
         let unpackhi = _mm512_unpackhi_epi32(a, b).as_i32x16();
         transmute(simd_select_bitmask(k, unpackhi, src.as_i32x16()))
@@ -25865,7 +26882,8 @@ pub fn _mm512_mask_unpackhi_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpunpckhdq))]
-pub fn _mm512_maskz_unpackhi_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_unpackhi_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let unpackhi = _mm512_unpackhi_epi32(a, b).as_i32x16();
         transmute(simd_select_bitmask(k, unpackhi, i32x16::ZERO))
@@ -25879,7 +26897,13 @@ pub fn _mm512_maskz_unpackhi_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m5
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpunpckhdq))]
-pub fn _mm256_mask_unpackhi_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_unpackhi_epi32(
+    src: __m256i,
+    k: __mmask8,
+    a: __m256i,
+    b: __m256i,
+) -> __m256i {
     unsafe {
         let unpackhi = _mm256_unpackhi_epi32(a, b).as_i32x8();
         transmute(simd_select_bitmask(k, unpackhi, src.as_i32x8()))
@@ -25893,7 +26917,8 @@ pub fn _mm256_mask_unpackhi_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m2
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpunpckhdq))]
-pub fn _mm256_maskz_unpackhi_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_unpackhi_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let unpackhi = _mm256_unpackhi_epi32(a, b).as_i32x8();
         transmute(simd_select_bitmask(k, unpackhi, i32x8::ZERO))
@@ -25907,7 +26932,8 @@ pub fn _mm256_maskz_unpackhi_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m25
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpunpckhdq))]
-pub fn _mm_mask_unpackhi_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_unpackhi_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let unpackhi = _mm_unpackhi_epi32(a, b).as_i32x4();
         transmute(simd_select_bitmask(k, unpackhi, src.as_i32x4()))
@@ -25921,7 +26947,8 @@ pub fn _mm_mask_unpackhi_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpunpckhdq))]
-pub fn _mm_maskz_unpackhi_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_unpackhi_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let unpackhi = _mm_unpackhi_epi32(a, b).as_i32x4();
         transmute(simd_select_bitmask(k, unpackhi, i32x4::ZERO))
@@ -25935,7 +26962,8 @@ pub fn _mm_maskz_unpackhi_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vunpckhpd))] //should be vpunpckhqdq
-pub fn _mm512_unpackhi_epi64(a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_unpackhi_epi64(a: __m512i, b: __m512i) -> __m512i {
     unsafe { simd_shuffle!(a, b, [1, 9, 1 + 2, 9 + 2, 1 + 4, 9 + 4, 1 + 6, 9 + 6]) }
 }
 
@@ -25946,7 +26974,13 @@ pub fn _mm512_unpackhi_epi64(a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpunpckhqdq))]
-pub fn _mm512_mask_unpackhi_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_unpackhi_epi64(
+    src: __m512i,
+    k: __mmask8,
+    a: __m512i,
+    b: __m512i,
+) -> __m512i {
     unsafe {
         let unpackhi = _mm512_unpackhi_epi64(a, b).as_i64x8();
         transmute(simd_select_bitmask(k, unpackhi, src.as_i64x8()))
@@ -25960,7 +26994,8 @@ pub fn _mm512_mask_unpackhi_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m5
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpunpckhqdq))]
-pub fn _mm512_maskz_unpackhi_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_unpackhi_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let unpackhi = _mm512_unpackhi_epi64(a, b).as_i64x8();
         transmute(simd_select_bitmask(k, unpackhi, i64x8::ZERO))
@@ -25974,7 +27009,13 @@ pub fn _mm512_maskz_unpackhi_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m51
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpunpckhqdq))]
-pub fn _mm256_mask_unpackhi_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_unpackhi_epi64(
+    src: __m256i,
+    k: __mmask8,
+    a: __m256i,
+    b: __m256i,
+) -> __m256i {
     unsafe {
         let unpackhi = _mm256_unpackhi_epi64(a, b).as_i64x4();
         transmute(simd_select_bitmask(k, unpackhi, src.as_i64x4()))
@@ -25988,7 +27029,8 @@ pub fn _mm256_mask_unpackhi_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m2
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpunpckhqdq))]
-pub fn _mm256_maskz_unpackhi_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_unpackhi_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let unpackhi = _mm256_unpackhi_epi64(a, b).as_i64x4();
         transmute(simd_select_bitmask(k, unpackhi, i64x4::ZERO))
@@ -26002,7 +27044,8 @@ pub fn _mm256_maskz_unpackhi_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m25
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpunpckhqdq))]
-pub fn _mm_mask_unpackhi_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_unpackhi_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let unpackhi = _mm_unpackhi_epi64(a, b).as_i64x2();
         transmute(simd_select_bitmask(k, unpackhi, src.as_i64x2()))
@@ -26016,7 +27059,8 @@ pub fn _mm_mask_unpackhi_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpunpckhqdq))]
-pub fn _mm_maskz_unpackhi_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_unpackhi_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let unpackhi = _mm_unpackhi_epi64(a, b).as_i64x2();
         transmute(simd_select_bitmask(k, unpackhi, i64x2::ZERO))
@@ -26030,7 +27074,8 @@ pub fn _mm_maskz_unpackhi_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vunpckhps))]
-pub fn _mm512_unpackhi_ps(a: __m512, b: __m512) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_unpackhi_ps(a: __m512, b: __m512) -> __m512 {
     unsafe {
         #[rustfmt::skip]
         simd_shuffle!(
@@ -26050,7 +27095,8 @@ pub fn _mm512_unpackhi_ps(a: __m512, b: __m512) -> __m512 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vunpckhps))]
-pub fn _mm512_mask_unpackhi_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_unpackhi_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
     unsafe {
         let unpackhi = _mm512_unpackhi_ps(a, b).as_f32x16();
         transmute(simd_select_bitmask(k, unpackhi, src.as_f32x16()))
@@ -26064,7 +27110,8 @@ pub fn _mm512_mask_unpackhi_ps(src: __m512, k: __mmask16, a: __m512, b: __m512)
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vunpckhps))]
-pub fn _mm512_maskz_unpackhi_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_unpackhi_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
     unsafe {
         let unpackhi = _mm512_unpackhi_ps(a, b).as_f32x16();
         transmute(simd_select_bitmask(k, unpackhi, f32x16::ZERO))
@@ -26078,7 +27125,8 @@ pub fn _mm512_maskz_unpackhi_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vunpckhps))]
-pub fn _mm256_mask_unpackhi_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_unpackhi_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
     unsafe {
         let unpackhi = _mm256_unpackhi_ps(a, b).as_f32x8();
         transmute(simd_select_bitmask(k, unpackhi, src.as_f32x8()))
@@ -26092,7 +27140,8 @@ pub fn _mm256_mask_unpackhi_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vunpckhps))]
-pub fn _mm256_maskz_unpackhi_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_unpackhi_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
     unsafe {
         let unpackhi = _mm256_unpackhi_ps(a, b).as_f32x8();
         transmute(simd_select_bitmask(k, unpackhi, f32x8::ZERO))
@@ -26106,7 +27155,8 @@ pub fn _mm256_maskz_unpackhi_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vunpckhps))]
-pub fn _mm_mask_unpackhi_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_unpackhi_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
     unsafe {
         let unpackhi = _mm_unpackhi_ps(a, b).as_f32x4();
         transmute(simd_select_bitmask(k, unpackhi, src.as_f32x4()))
@@ -26120,7 +27170,8 @@ pub fn _mm_mask_unpackhi_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> _
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vunpckhps))]
-pub fn _mm_maskz_unpackhi_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_unpackhi_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
     unsafe {
         let unpackhi = _mm_unpackhi_ps(a, b).as_f32x4();
         transmute(simd_select_bitmask(k, unpackhi, f32x4::ZERO))
@@ -26134,7 +27185,8 @@ pub fn _mm_maskz_unpackhi_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vunpckhpd))]
-pub fn _mm512_unpackhi_pd(a: __m512d, b: __m512d) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_unpackhi_pd(a: __m512d, b: __m512d) -> __m512d {
     unsafe { simd_shuffle!(a, b, [1, 9, 1 + 2, 9 + 2, 1 + 4, 9 + 4, 1 + 6, 9 + 6]) }
 }
 
@@ -26145,7 +27197,8 @@ pub fn _mm512_unpackhi_pd(a: __m512d, b: __m512d) -> __m512d {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vunpckhpd))]
-pub fn _mm512_mask_unpackhi_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_unpackhi_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
     unsafe {
         let unpackhi = _mm512_unpackhi_pd(a, b).as_f64x8();
         transmute(simd_select_bitmask(k, unpackhi, src.as_f64x8()))
@@ -26159,7 +27212,8 @@ pub fn _mm512_mask_unpackhi_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vunpckhpd))]
-pub fn _mm512_maskz_unpackhi_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_unpackhi_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
     unsafe {
         let unpackhi = _mm512_unpackhi_pd(a, b).as_f64x8();
         transmute(simd_select_bitmask(k, unpackhi, f64x8::ZERO))
@@ -26173,7 +27227,8 @@ pub fn _mm512_maskz_unpackhi_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vunpckhpd))]
-pub fn _mm256_mask_unpackhi_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_unpackhi_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
     unsafe {
         let unpackhi = _mm256_unpackhi_pd(a, b).as_f64x4();
         transmute(simd_select_bitmask(k, unpackhi, src.as_f64x4()))
@@ -26187,7 +27242,8 @@ pub fn _mm256_mask_unpackhi_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vunpckhpd))]
-pub fn _mm256_maskz_unpackhi_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_unpackhi_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
     unsafe {
         let unpackhi = _mm256_unpackhi_pd(a, b).as_f64x4();
         transmute(simd_select_bitmask(k, unpackhi, f64x4::ZERO))
@@ -26201,7 +27257,8 @@ pub fn _mm256_maskz_unpackhi_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vunpckhpd))]
-pub fn _mm_mask_unpackhi_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_unpackhi_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
     unsafe {
         let unpackhi = _mm_unpackhi_pd(a, b).as_f64x2();
         transmute(simd_select_bitmask(k, unpackhi, src.as_f64x2()))
@@ -26215,7 +27272,8 @@ pub fn _mm_mask_unpackhi_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vunpckhpd))]
-pub fn _mm_maskz_unpackhi_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_unpackhi_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
     unsafe {
         let unpackhi = _mm_unpackhi_pd(a, b).as_f64x2();
         transmute(simd_select_bitmask(k, unpackhi, f64x2::ZERO))
@@ -26229,7 +27287,8 @@ pub fn _mm_maskz_unpackhi_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vunpcklps))] //should be vpunpckldq
-pub fn _mm512_unpacklo_epi32(a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_unpacklo_epi32(a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let a = a.as_i32x16();
         let b = b.as_i32x16();
@@ -26252,7 +27311,13 @@ pub fn _mm512_unpacklo_epi32(a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpunpckldq))]
-pub fn _mm512_mask_unpacklo_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_unpacklo_epi32(
+    src: __m512i,
+    k: __mmask16,
+    a: __m512i,
+    b: __m512i,
+) -> __m512i {
     unsafe {
         let unpacklo = _mm512_unpacklo_epi32(a, b).as_i32x16();
         transmute(simd_select_bitmask(k, unpacklo, src.as_i32x16()))
@@ -26266,7 +27331,8 @@ pub fn _mm512_mask_unpacklo_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpunpckldq))]
-pub fn _mm512_maskz_unpacklo_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_unpacklo_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let unpacklo = _mm512_unpacklo_epi32(a, b).as_i32x16();
         transmute(simd_select_bitmask(k, unpacklo, i32x16::ZERO))
@@ -26280,7 +27346,13 @@ pub fn _mm512_maskz_unpacklo_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m5
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpunpckldq))]
-pub fn _mm256_mask_unpacklo_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_unpacklo_epi32(
+    src: __m256i,
+    k: __mmask8,
+    a: __m256i,
+    b: __m256i,
+) -> __m256i {
     unsafe {
         let unpacklo = _mm256_unpacklo_epi32(a, b).as_i32x8();
         transmute(simd_select_bitmask(k, unpacklo, src.as_i32x8()))
@@ -26294,7 +27366,8 @@ pub fn _mm256_mask_unpacklo_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m2
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpunpckldq))]
-pub fn _mm256_maskz_unpacklo_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_unpacklo_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let unpacklo = _mm256_unpacklo_epi32(a, b).as_i32x8();
         transmute(simd_select_bitmask(k, unpacklo, i32x8::ZERO))
@@ -26308,7 +27381,8 @@ pub fn _mm256_maskz_unpacklo_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m25
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpunpckldq))]
-pub fn _mm_mask_unpacklo_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_unpacklo_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let unpacklo = _mm_unpacklo_epi32(a, b).as_i32x4();
         transmute(simd_select_bitmask(k, unpacklo, src.as_i32x4()))
@@ -26322,7 +27396,8 @@ pub fn _mm_mask_unpacklo_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpunpckldq))]
-pub fn _mm_maskz_unpacklo_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_unpacklo_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let unpacklo = _mm_unpacklo_epi32(a, b).as_i32x4();
         transmute(simd_select_bitmask(k, unpacklo, i32x4::ZERO))
@@ -26336,7 +27411,8 @@ pub fn _mm_maskz_unpacklo_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vunpcklpd))] //should be vpunpcklqdq
-pub fn _mm512_unpacklo_epi64(a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_unpacklo_epi64(a: __m512i, b: __m512i) -> __m512i {
     unsafe { simd_shuffle!(a, b, [0, 8, 0 + 2, 8 + 2, 0 + 4, 8 + 4, 0 + 6, 8 + 6]) }
 }
 
@@ -26347,7 +27423,13 @@ pub fn _mm512_unpacklo_epi64(a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpunpcklqdq))]
-pub fn _mm512_mask_unpacklo_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_unpacklo_epi64(
+    src: __m512i,
+    k: __mmask8,
+    a: __m512i,
+    b: __m512i,
+) -> __m512i {
     unsafe {
         let unpacklo = _mm512_unpacklo_epi64(a, b).as_i64x8();
         transmute(simd_select_bitmask(k, unpacklo, src.as_i64x8()))
@@ -26361,7 +27443,8 @@ pub fn _mm512_mask_unpacklo_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m5
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpunpcklqdq))]
-pub fn _mm512_maskz_unpacklo_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_unpacklo_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let unpacklo = _mm512_unpacklo_epi64(a, b).as_i64x8();
         transmute(simd_select_bitmask(k, unpacklo, i64x8::ZERO))
@@ -26375,7 +27458,13 @@ pub fn _mm512_maskz_unpacklo_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m51
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpunpcklqdq))]
-pub fn _mm256_mask_unpacklo_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_unpacklo_epi64(
+    src: __m256i,
+    k: __mmask8,
+    a: __m256i,
+    b: __m256i,
+) -> __m256i {
     unsafe {
         let unpacklo = _mm256_unpacklo_epi64(a, b).as_i64x4();
         transmute(simd_select_bitmask(k, unpacklo, src.as_i64x4()))
@@ -26389,7 +27478,8 @@ pub fn _mm256_mask_unpacklo_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m2
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpunpcklqdq))]
-pub fn _mm256_maskz_unpacklo_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_unpacklo_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let unpacklo = _mm256_unpacklo_epi64(a, b).as_i64x4();
         transmute(simd_select_bitmask(k, unpacklo, i64x4::ZERO))
@@ -26403,7 +27493,8 @@ pub fn _mm256_maskz_unpacklo_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m25
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpunpcklqdq))]
-pub fn _mm_mask_unpacklo_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_unpacklo_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let unpacklo = _mm_unpacklo_epi64(a, b).as_i64x2();
         transmute(simd_select_bitmask(k, unpacklo, src.as_i64x2()))
@@ -26417,7 +27508,8 @@ pub fn _mm_mask_unpacklo_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpunpcklqdq))]
-pub fn _mm_maskz_unpacklo_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_unpacklo_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let unpacklo = _mm_unpacklo_epi64(a, b).as_i64x2();
         transmute(simd_select_bitmask(k, unpacklo, i64x2::ZERO))
@@ -26431,7 +27523,8 @@ pub fn _mm_maskz_unpacklo_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vunpcklps))]
-pub fn _mm512_unpacklo_ps(a: __m512, b: __m512) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_unpacklo_ps(a: __m512, b: __m512) -> __m512 {
     unsafe {
         #[rustfmt::skip]
         simd_shuffle!(a, b,
@@ -26450,7 +27543,8 @@ pub fn _mm512_unpacklo_ps(a: __m512, b: __m512) -> __m512 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vunpcklps))]
-pub fn _mm512_mask_unpacklo_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_unpacklo_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
     unsafe {
         let unpacklo = _mm512_unpacklo_ps(a, b).as_f32x16();
         transmute(simd_select_bitmask(k, unpacklo, src.as_f32x16()))
@@ -26464,7 +27558,8 @@ pub fn _mm512_mask_unpacklo_ps(src: __m512, k: __mmask16, a: __m512, b: __m512)
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vunpcklps))]
-pub fn _mm512_maskz_unpacklo_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_unpacklo_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
     unsafe {
         let unpacklo = _mm512_unpacklo_ps(a, b).as_f32x16();
         transmute(simd_select_bitmask(k, unpacklo, f32x16::ZERO))
@@ -26478,7 +27573,8 @@ pub fn _mm512_maskz_unpacklo_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vunpcklps))]
-pub fn _mm256_mask_unpacklo_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_unpacklo_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
     unsafe {
         let unpacklo = _mm256_unpacklo_ps(a, b).as_f32x8();
         transmute(simd_select_bitmask(k, unpacklo, src.as_f32x8()))
@@ -26492,7 +27588,8 @@ pub fn _mm256_mask_unpacklo_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vunpcklps))]
-pub fn _mm256_maskz_unpacklo_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_unpacklo_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
     unsafe {
         let unpacklo = _mm256_unpacklo_ps(a, b).as_f32x8();
         transmute(simd_select_bitmask(k, unpacklo, f32x8::ZERO))
@@ -26506,7 +27603,8 @@ pub fn _mm256_maskz_unpacklo_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vunpcklps))]
-pub fn _mm_mask_unpacklo_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_unpacklo_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
     unsafe {
         let unpacklo = _mm_unpacklo_ps(a, b).as_f32x4();
         transmute(simd_select_bitmask(k, unpacklo, src.as_f32x4()))
@@ -26520,7 +27618,8 @@ pub fn _mm_mask_unpacklo_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> _
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vunpcklps))]
-pub fn _mm_maskz_unpacklo_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_unpacklo_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
     unsafe {
         let unpacklo = _mm_unpacklo_ps(a, b).as_f32x4();
         transmute(simd_select_bitmask(k, unpacklo, f32x4::ZERO))
@@ -26534,7 +27633,8 @@ pub fn _mm_maskz_unpacklo_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vunpcklpd))]
-pub fn _mm512_unpacklo_pd(a: __m512d, b: __m512d) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_unpacklo_pd(a: __m512d, b: __m512d) -> __m512d {
     unsafe { simd_shuffle!(a, b, [0, 8, 0 + 2, 8 + 2, 0 + 4, 8 + 4, 0 + 6, 8 + 6]) }
 }
 
@@ -26545,7 +27645,8 @@ pub fn _mm512_unpacklo_pd(a: __m512d, b: __m512d) -> __m512d {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vunpcklpd))]
-pub fn _mm512_mask_unpacklo_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_unpacklo_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
     unsafe {
         let unpacklo = _mm512_unpacklo_pd(a, b).as_f64x8();
         transmute(simd_select_bitmask(k, unpacklo, src.as_f64x8()))
@@ -26559,7 +27660,8 @@ pub fn _mm512_mask_unpacklo_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vunpcklpd))]
-pub fn _mm512_maskz_unpacklo_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_unpacklo_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
     unsafe {
         let unpacklo = _mm512_unpacklo_pd(a, b).as_f64x8();
         transmute(simd_select_bitmask(k, unpacklo, f64x8::ZERO))
@@ -26573,7 +27675,8 @@ pub fn _mm512_maskz_unpacklo_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vunpcklpd))]
-pub fn _mm256_mask_unpacklo_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_unpacklo_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
     unsafe {
         let unpacklo = _mm256_unpacklo_pd(a, b).as_f64x4();
         transmute(simd_select_bitmask(k, unpacklo, src.as_f64x4()))
@@ -26587,7 +27690,8 @@ pub fn _mm256_mask_unpacklo_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vunpcklpd))]
-pub fn _mm256_maskz_unpacklo_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_unpacklo_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
     unsafe {
         let unpacklo = _mm256_unpacklo_pd(a, b).as_f64x4();
         transmute(simd_select_bitmask(k, unpacklo, f64x4::ZERO))
@@ -26601,7 +27705,8 @@ pub fn _mm256_maskz_unpacklo_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vunpcklpd))]
-pub fn _mm_mask_unpacklo_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_unpacklo_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
     unsafe {
         let unpacklo = _mm_unpacklo_pd(a, b).as_f64x2();
         transmute(simd_select_bitmask(k, unpacklo, src.as_f64x2()))
@@ -26615,20 +27720,28 @@ pub fn _mm_mask_unpacklo_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vunpcklpd))]
-pub fn _mm_maskz_unpacklo_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_unpacklo_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
     unsafe {
         let unpacklo = _mm_unpacklo_pd(a, b).as_f64x2();
         transmute(simd_select_bitmask(k, unpacklo, f64x2::ZERO))
     }
 }
 
-/// Cast vector of type __m128 to type __m512; the upper 384 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
+/// Cast vector of type __m128 to type __m512; the upper 384 bits of the result are indeterminate.
+///
+/// In the Intel documentation, the upper bits are declared to be "undefined".
+/// This is not equivalent to [`mem::MaybeUninit`]; instead, these bits are non-deterministically
+/// set to some valid value. In practice, this is typically equivalent to [`mem::zeroed`].
+///
+/// This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
 ///
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps128_ps512&expand=621)
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_castps128_ps512(a: __m128) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_castps128_ps512(a: __m128) -> __m512 {
     unsafe {
         simd_shuffle!(
             a,
@@ -26638,13 +27751,20 @@ pub fn _mm512_castps128_ps512(a: __m128) -> __m512 {
     }
 }
 
-/// Cast vector of type __m256 to type __m512; the upper 256 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
+/// Cast vector of type __m256 to type __m512; the upper 256 bits of the result are indeterminate.
+///
+/// In the Intel documentation, the upper bits are declared to be "undefined".
+/// This is not equivalent to [`mem::MaybeUninit`]; instead, these bits are non-deterministically
+/// set to some valid value. In practice, this is typically equivalent to [`mem::zeroed`].
+///
+/// This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
 ///
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps256_ps512&expand=623)
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_castps256_ps512(a: __m256) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_castps256_ps512(a: __m256) -> __m512 {
     unsafe {
         simd_shuffle!(
             a,
@@ -26660,7 +27780,8 @@ pub fn _mm512_castps256_ps512(a: __m256) -> __m512 {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_zextps128_ps512(a: __m128) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_zextps128_ps512(a: __m128) -> __m512 {
     unsafe {
         simd_shuffle!(
             a,
@@ -26676,7 +27797,8 @@ pub fn _mm512_zextps128_ps512(a: __m128) -> __m512 {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_zextps256_ps512(a: __m256) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_zextps256_ps512(a: __m256) -> __m512 {
     unsafe {
         simd_shuffle!(
             a,
@@ -26692,7 +27814,8 @@ pub fn _mm512_zextps256_ps512(a: __m256) -> __m512 {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_castps512_ps128(a: __m512) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_castps512_ps128(a: __m512) -> __m128 {
     unsafe { simd_shuffle!(a, a, [0, 1, 2, 3]) }
 }
 
@@ -26702,7 +27825,8 @@ pub fn _mm512_castps512_ps128(a: __m512) -> __m128 {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_castps512_ps256(a: __m512) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_castps512_ps256(a: __m512) -> __m256 {
     unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]) }
 }
 
@@ -26712,7 +27836,8 @@ pub fn _mm512_castps512_ps256(a: __m512) -> __m256 {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_castps_pd(a: __m512) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_castps_pd(a: __m512) -> __m512d {
     unsafe { transmute(a) }
 }
 
@@ -26722,27 +27847,42 @@ pub fn _mm512_castps_pd(a: __m512) -> __m512d {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_castps_si512(a: __m512) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_castps_si512(a: __m512) -> __m512i {
     unsafe { transmute(a) }
 }
 
-/// Cast vector of type __m128d to type __m512d; the upper 384 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
+/// Cast vector of type __m128d to type __m512d; the upper 384 bits of the result are indeterminate.
+///
+/// In the Intel documentation, the upper bits are declared to be "undefined".
+/// This is not equivalent to [`mem::MaybeUninit`]; instead, these bits are non-deterministically
+/// set to some valid value. In practice, this is typically equivalent to [`mem::zeroed`].
+///
+/// This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
 ///
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd128_pd512&expand=609)
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_castpd128_pd512(a: __m128d) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_castpd128_pd512(a: __m128d) -> __m512d {
     unsafe { simd_shuffle!(a, _mm_undefined_pd(), [0, 1, 2, 2, 2, 2, 2, 2]) }
 }
 
-/// Cast vector of type __m256d to type __m512d; the upper 256 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
+/// Cast vector of type __m256d to type __m512d; the upper 256 bits of the result are indeterminate.
+///
+/// In the Intel documentation, the upper bits are declared to be "undefined".
+/// This is not equivalent to [`mem::MaybeUninit`]; instead, these bits are non-deterministically
+/// set to some valid value. In practice, this is typically equivalent to [`mem::zeroed`].
+///
+/// This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
 ///
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd256_pd512&expand=611)
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_castpd256_pd512(a: __m256d) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_castpd256_pd512(a: __m256d) -> __m512d {
     unsafe { simd_shuffle!(a, _mm256_undefined_pd(), [0, 1, 2, 3, 4, 4, 4, 4]) }
 }
 
@@ -26752,7 +27892,8 @@ pub fn _mm512_castpd256_pd512(a: __m256d) -> __m512d {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_zextpd128_pd512(a: __m128d) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_zextpd128_pd512(a: __m128d) -> __m512d {
     unsafe { simd_shuffle!(a, _mm_set1_pd(0.), [0, 1, 2, 2, 2, 2, 2, 2]) }
 }
 
@@ -26762,7 +27903,8 @@ pub fn _mm512_zextpd128_pd512(a: __m128d) -> __m512d {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_zextpd256_pd512(a: __m256d) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_zextpd256_pd512(a: __m256d) -> __m512d {
     unsafe { simd_shuffle!(a, _mm256_set1_pd(0.), [0, 1, 2, 3, 4, 4, 4, 4]) }
 }
 
@@ -26772,7 +27914,8 @@ pub fn _mm512_zextpd256_pd512(a: __m256d) -> __m512d {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_castpd512_pd128(a: __m512d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_castpd512_pd128(a: __m512d) -> __m128d {
     unsafe { simd_shuffle!(a, a, [0, 1]) }
 }
 
@@ -26782,7 +27925,8 @@ pub fn _mm512_castpd512_pd128(a: __m512d) -> __m128d {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_castpd512_pd256(a: __m512d) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_castpd512_pd256(a: __m512d) -> __m256d {
     unsafe { simd_shuffle!(a, a, [0, 1, 2, 3]) }
 }
 
@@ -26792,7 +27936,8 @@ pub fn _mm512_castpd512_pd256(a: __m512d) -> __m256d {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_castpd_ps(a: __m512d) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_castpd_ps(a: __m512d) -> __m512 {
     unsafe { transmute(a) }
 }
 
@@ -26802,27 +27947,42 @@ pub fn _mm512_castpd_ps(a: __m512d) -> __m512 {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_castpd_si512(a: __m512d) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_castpd_si512(a: __m512d) -> __m512i {
     unsafe { transmute(a) }
 }
 
-/// Cast vector of type __m128i to type __m512i; the upper 384 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
+/// Cast vector of type __m128i to type __m512i; the upper 384 bits of the result are indeterminate.
+///
+/// In the Intel documentation, the upper bits are declared to be "undefined".
+/// This is not equivalent to [`mem::MaybeUninit`]; instead, these bits are non-deterministically
+/// set to some valid value. In practice, this is typically equivalent to [`mem::zeroed`].
+///
+/// This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
 ///
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi128_si512&expand=629)
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_castsi128_si512(a: __m128i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_castsi128_si512(a: __m128i) -> __m512i {
     unsafe { simd_shuffle!(a, _mm_undefined_si128(), [0, 1, 2, 2, 2, 2, 2, 2]) }
 }
 
-/// Cast vector of type __m256i to type __m512i; the upper 256 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
+/// Cast vector of type __m256i to type __m512i; the upper 256 bits of the result are indeterminate.
+///
+/// In the Intel documentation, the upper bits are declared to be "undefined".
+/// This is not equivalent to [`mem::MaybeUninit`]; instead, these bits are non-deterministically
+/// set to some valid value. In practice, this is typically equivalent to [`mem::zeroed`].
+///
+/// This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
 ///
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi256_si512&expand=633)
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_castsi256_si512(a: __m256i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_castsi256_si512(a: __m256i) -> __m512i {
     unsafe { simd_shuffle!(a, _mm256_undefined_si256(), [0, 1, 2, 3, 4, 4, 4, 4]) }
 }
 
@@ -26832,7 +27992,8 @@ pub fn _mm512_castsi256_si512(a: __m256i) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_zextsi128_si512(a: __m128i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_zextsi128_si512(a: __m128i) -> __m512i {
     unsafe { simd_shuffle!(a, _mm_setzero_si128(), [0, 1, 2, 2, 2, 2, 2, 2]) }
 }
 
@@ -26842,7 +28003,8 @@ pub fn _mm512_zextsi128_si512(a: __m128i) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_zextsi256_si512(a: __m256i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_zextsi256_si512(a: __m256i) -> __m512i {
     unsafe { simd_shuffle!(a, _mm256_setzero_si256(), [0, 1, 2, 3, 4, 4, 4, 4]) }
 }
 
@@ -26852,7 +28014,8 @@ pub fn _mm512_zextsi256_si512(a: __m256i) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_castsi512_si128(a: __m512i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_castsi512_si128(a: __m512i) -> __m128i {
     unsafe { simd_shuffle!(a, a, [0, 1]) }
 }
 
@@ -26862,7 +28025,8 @@ pub fn _mm512_castsi512_si128(a: __m512i) -> __m128i {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_castsi512_si256(a: __m512i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_castsi512_si256(a: __m512i) -> __m256i {
     unsafe { simd_shuffle!(a, a, [0, 1, 2, 3]) }
 }
 
@@ -26872,7 +28036,8 @@ pub fn _mm512_castsi512_si256(a: __m512i) -> __m256i {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_castsi512_ps(a: __m512i) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_castsi512_ps(a: __m512i) -> __m512 {
     unsafe { transmute(a) }
 }
 
@@ -26882,7 +28047,8 @@ pub fn _mm512_castsi512_ps(a: __m512i) -> __m512 {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_castsi512_pd(a: __m512i) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_castsi512_pd(a: __m512i) -> __m512d {
     unsafe { transmute(a) }
 }
 
@@ -26893,7 +28059,8 @@ pub fn _mm512_castsi512_pd(a: __m512i) -> __m512d {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovd))]
-pub fn _mm512_cvtsi512_si32(a: __m512i) -> i32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_cvtsi512_si32(a: __m512i) -> i32 {
     unsafe { simd_extract!(a.as_i32x16(), 0) }
 }
 
@@ -26903,7 +28070,8 @@ pub fn _mm512_cvtsi512_si32(a: __m512i) -> i32 {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_cvtss_f32(a: __m512) -> f32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_cvtss_f32(a: __m512) -> f32 {
     unsafe { simd_extract!(a, 0) }
 }
 
@@ -26913,7 +28081,8 @@ pub fn _mm512_cvtss_f32(a: __m512) -> f32 {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_cvtsd_f64(a: __m512d) -> f64 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_cvtsd_f64(a: __m512d) -> f64 {
     unsafe { simd_extract!(a, 0) }
 }
 
@@ -26924,7 +28093,8 @@ pub fn _mm512_cvtsd_f64(a: __m512d) -> f64 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vbroadcast))] //should be vpbroadcastd
-pub fn _mm512_broadcastd_epi32(a: __m128i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_broadcastd_epi32(a: __m128i) -> __m512i {
     unsafe {
         let a = _mm512_castsi128_si512(a).as_i32x16();
         let ret: i32x16 = simd_shuffle!(a, a, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]);
@@ -26939,7 +28109,8 @@ pub fn _mm512_broadcastd_epi32(a: __m128i) -> __m512i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
-pub fn _mm512_mask_broadcastd_epi32(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_broadcastd_epi32(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
     unsafe {
         let broadcast = _mm512_broadcastd_epi32(a).as_i32x16();
         transmute(simd_select_bitmask(k, broadcast, src.as_i32x16()))
@@ -26953,7 +28124,8 @@ pub fn _mm512_mask_broadcastd_epi32(src: __m512i, k: __mmask16, a: __m128i) -> _
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
-pub fn _mm512_maskz_broadcastd_epi32(k: __mmask16, a: __m128i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_broadcastd_epi32(k: __mmask16, a: __m128i) -> __m512i {
     unsafe {
         let broadcast = _mm512_broadcastd_epi32(a).as_i32x16();
         transmute(simd_select_bitmask(k, broadcast, i32x16::ZERO))
@@ -26967,7 +28139,8 @@ pub fn _mm512_maskz_broadcastd_epi32(k: __mmask16, a: __m128i) -> __m512i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
-pub fn _mm256_mask_broadcastd_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_broadcastd_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
     unsafe {
         let broadcast = _mm256_broadcastd_epi32(a).as_i32x8();
         transmute(simd_select_bitmask(k, broadcast, src.as_i32x8()))
@@ -26981,7 +28154,8 @@ pub fn _mm256_mask_broadcastd_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
-pub fn _mm256_maskz_broadcastd_epi32(k: __mmask8, a: __m128i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_broadcastd_epi32(k: __mmask8, a: __m128i) -> __m256i {
     unsafe {
         let broadcast = _mm256_broadcastd_epi32(a).as_i32x8();
         transmute(simd_select_bitmask(k, broadcast, i32x8::ZERO))
@@ -26995,7 +28169,8 @@ pub fn _mm256_maskz_broadcastd_epi32(k: __mmask8, a: __m128i) -> __m256i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
-pub fn _mm_mask_broadcastd_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_broadcastd_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
     unsafe {
         let broadcast = _mm_broadcastd_epi32(a).as_i32x4();
         transmute(simd_select_bitmask(k, broadcast, src.as_i32x4()))
@@ -27009,7 +28184,8 @@ pub fn _mm_mask_broadcastd_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m12
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
-pub fn _mm_maskz_broadcastd_epi32(k: __mmask8, a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_broadcastd_epi32(k: __mmask8, a: __m128i) -> __m128i {
     unsafe {
         let broadcast = _mm_broadcastd_epi32(a).as_i32x4();
         transmute(simd_select_bitmask(k, broadcast, i32x4::ZERO))
@@ -27023,7 +28199,8 @@ pub fn _mm_maskz_broadcastd_epi32(k: __mmask8, a: __m128i) -> __m128i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vbroadcast))] //should be vpbroadcastq
-pub fn _mm512_broadcastq_epi64(a: __m128i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_broadcastq_epi64(a: __m128i) -> __m512i {
     unsafe { simd_shuffle!(a, a, [0, 0, 0, 0, 0, 0, 0, 0]) }
 }
 
@@ -27034,7 +28211,8 @@ pub fn _mm512_broadcastq_epi64(a: __m128i) -> __m512i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
-pub fn _mm512_mask_broadcastq_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_broadcastq_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
     unsafe {
         let broadcast = _mm512_broadcastq_epi64(a).as_i64x8();
         transmute(simd_select_bitmask(k, broadcast, src.as_i64x8()))
@@ -27048,7 +28226,8 @@ pub fn _mm512_mask_broadcastq_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
-pub fn _mm512_maskz_broadcastq_epi64(k: __mmask8, a: __m128i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_broadcastq_epi64(k: __mmask8, a: __m128i) -> __m512i {
     unsafe {
         let broadcast = _mm512_broadcastq_epi64(a).as_i64x8();
         transmute(simd_select_bitmask(k, broadcast, i64x8::ZERO))
@@ -27062,7 +28241,8 @@ pub fn _mm512_maskz_broadcastq_epi64(k: __mmask8, a: __m128i) -> __m512i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
-pub fn _mm256_mask_broadcastq_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_broadcastq_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
     unsafe {
         let broadcast = _mm256_broadcastq_epi64(a).as_i64x4();
         transmute(simd_select_bitmask(k, broadcast, src.as_i64x4()))
@@ -27076,7 +28256,8 @@ pub fn _mm256_mask_broadcastq_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
-pub fn _mm256_maskz_broadcastq_epi64(k: __mmask8, a: __m128i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_broadcastq_epi64(k: __mmask8, a: __m128i) -> __m256i {
     unsafe {
         let broadcast = _mm256_broadcastq_epi64(a).as_i64x4();
         transmute(simd_select_bitmask(k, broadcast, i64x4::ZERO))
@@ -27090,7 +28271,8 @@ pub fn _mm256_maskz_broadcastq_epi64(k: __mmask8, a: __m128i) -> __m256i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
-pub fn _mm_mask_broadcastq_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_broadcastq_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
     unsafe {
         let broadcast = _mm_broadcastq_epi64(a).as_i64x2();
         transmute(simd_select_bitmask(k, broadcast, src.as_i64x2()))
@@ -27104,7 +28286,8 @@ pub fn _mm_mask_broadcastq_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m12
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
-pub fn _mm_maskz_broadcastq_epi64(k: __mmask8, a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_broadcastq_epi64(k: __mmask8, a: __m128i) -> __m128i {
     unsafe {
         let broadcast = _mm_broadcastq_epi64(a).as_i64x2();
         transmute(simd_select_bitmask(k, broadcast, i64x2::ZERO))
@@ -27118,7 +28301,8 @@ pub fn _mm_maskz_broadcastq_epi64(k: __mmask8, a: __m128i) -> __m128i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vbroadcastss))]
-pub fn _mm512_broadcastss_ps(a: __m128) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_broadcastss_ps(a: __m128) -> __m512 {
     unsafe { simd_shuffle!(a, a, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) }
 }
 
@@ -27129,7 +28313,8 @@ pub fn _mm512_broadcastss_ps(a: __m128) -> __m512 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vbroadcastss))]
-pub fn _mm512_mask_broadcastss_ps(src: __m512, k: __mmask16, a: __m128) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_broadcastss_ps(src: __m512, k: __mmask16, a: __m128) -> __m512 {
     unsafe {
         let broadcast = _mm512_broadcastss_ps(a).as_f32x16();
         transmute(simd_select_bitmask(k, broadcast, src.as_f32x16()))
@@ -27143,7 +28328,8 @@ pub fn _mm512_mask_broadcastss_ps(src: __m512, k: __mmask16, a: __m128) -> __m51
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vbroadcastss))]
-pub fn _mm512_maskz_broadcastss_ps(k: __mmask16, a: __m128) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_broadcastss_ps(k: __mmask16, a: __m128) -> __m512 {
     unsafe {
         let broadcast = _mm512_broadcastss_ps(a).as_f32x16();
         transmute(simd_select_bitmask(k, broadcast, f32x16::ZERO))
@@ -27157,7 +28343,8 @@ pub fn _mm512_maskz_broadcastss_ps(k: __mmask16, a: __m128) -> __m512 {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vbroadcastss))]
-pub fn _mm256_mask_broadcastss_ps(src: __m256, k: __mmask8, a: __m128) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_broadcastss_ps(src: __m256, k: __mmask8, a: __m128) -> __m256 {
     unsafe {
         let broadcast = _mm256_broadcastss_ps(a).as_f32x8();
         transmute(simd_select_bitmask(k, broadcast, src.as_f32x8()))
@@ -27171,7 +28358,8 @@ pub fn _mm256_mask_broadcastss_ps(src: __m256, k: __mmask8, a: __m128) -> __m256
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vbroadcastss))]
-pub fn _mm256_maskz_broadcastss_ps(k: __mmask8, a: __m128) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_broadcastss_ps(k: __mmask8, a: __m128) -> __m256 {
     unsafe {
         let broadcast = _mm256_broadcastss_ps(a).as_f32x8();
         transmute(simd_select_bitmask(k, broadcast, f32x8::ZERO))
@@ -27185,7 +28373,8 @@ pub fn _mm256_maskz_broadcastss_ps(k: __mmask8, a: __m128) -> __m256 {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vbroadcastss))]
-pub fn _mm_mask_broadcastss_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_broadcastss_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
     unsafe {
         let broadcast = _mm_broadcastss_ps(a).as_f32x4();
         transmute(simd_select_bitmask(k, broadcast, src.as_f32x4()))
@@ -27199,7 +28388,8 @@ pub fn _mm_mask_broadcastss_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vbroadcastss))]
-pub fn _mm_maskz_broadcastss_ps(k: __mmask8, a: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_broadcastss_ps(k: __mmask8, a: __m128) -> __m128 {
     unsafe {
         let broadcast = _mm_broadcastss_ps(a).as_f32x4();
         transmute(simd_select_bitmask(k, broadcast, f32x4::ZERO))
@@ -27213,7 +28403,8 @@ pub fn _mm_maskz_broadcastss_ps(k: __mmask8, a: __m128) -> __m128 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vbroadcastsd))]
-pub fn _mm512_broadcastsd_pd(a: __m128d) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_broadcastsd_pd(a: __m128d) -> __m512d {
     unsafe { simd_shuffle!(a, a, [0, 0, 0, 0, 0, 0, 0, 0]) }
 }
 
@@ -27224,7 +28415,8 @@ pub fn _mm512_broadcastsd_pd(a: __m128d) -> __m512d {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vbroadcastsd))]
-pub fn _mm512_mask_broadcastsd_pd(src: __m512d, k: __mmask8, a: __m128d) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_broadcastsd_pd(src: __m512d, k: __mmask8, a: __m128d) -> __m512d {
     unsafe {
         let broadcast = _mm512_broadcastsd_pd(a).as_f64x8();
         transmute(simd_select_bitmask(k, broadcast, src.as_f64x8()))
@@ -27238,7 +28430,8 @@ pub fn _mm512_mask_broadcastsd_pd(src: __m512d, k: __mmask8, a: __m128d) -> __m5
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vbroadcastsd))]
-pub fn _mm512_maskz_broadcastsd_pd(k: __mmask8, a: __m128d) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_broadcastsd_pd(k: __mmask8, a: __m128d) -> __m512d {
     unsafe {
         let broadcast = _mm512_broadcastsd_pd(a).as_f64x8();
         transmute(simd_select_bitmask(k, broadcast, f64x8::ZERO))
@@ -27252,7 +28445,8 @@ pub fn _mm512_maskz_broadcastsd_pd(k: __mmask8, a: __m128d) -> __m512d {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vbroadcastsd))]
-pub fn _mm256_mask_broadcastsd_pd(src: __m256d, k: __mmask8, a: __m128d) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_broadcastsd_pd(src: __m256d, k: __mmask8, a: __m128d) -> __m256d {
     unsafe {
         let broadcast = _mm256_broadcastsd_pd(a).as_f64x4();
         transmute(simd_select_bitmask(k, broadcast, src.as_f64x4()))
@@ -27266,7 +28460,8 @@ pub fn _mm256_mask_broadcastsd_pd(src: __m256d, k: __mmask8, a: __m128d) -> __m2
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vbroadcastsd))]
-pub fn _mm256_maskz_broadcastsd_pd(k: __mmask8, a: __m128d) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_broadcastsd_pd(k: __mmask8, a: __m128d) -> __m256d {
     unsafe {
         let broadcast = _mm256_broadcastsd_pd(a).as_f64x4();
         transmute(simd_select_bitmask(k, broadcast, f64x4::ZERO))
@@ -27279,7 +28474,8 @@ pub fn _mm256_maskz_broadcastsd_pd(k: __mmask8, a: __m128d) -> __m256d {
 #[inline]
 #[target_feature(enable = "avx512f")] //msvc: vbroadcasti32x4, linux: vshuf
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_broadcast_i32x4(a: __m128i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_broadcast_i32x4(a: __m128i) -> __m512i {
     unsafe {
         let a = a.as_i32x4();
         let ret: i32x16 = simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3]);
@@ -27293,7 +28489,8 @@ pub fn _mm512_broadcast_i32x4(a: __m128i) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512f")] //msvc: vbroadcasti32x4, linux: vshuf
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_mask_broadcast_i32x4(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_broadcast_i32x4(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
     unsafe {
         let broadcast = _mm512_broadcast_i32x4(a).as_i32x16();
         transmute(simd_select_bitmask(k, broadcast, src.as_i32x16()))
@@ -27306,7 +28503,8 @@ pub fn _mm512_mask_broadcast_i32x4(src: __m512i, k: __mmask16, a: __m128i) -> __
 #[inline]
 #[target_feature(enable = "avx512f")] //msvc: vbroadcasti32x4, linux: vshuf
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_maskz_broadcast_i32x4(k: __mmask16, a: __m128i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_broadcast_i32x4(k: __mmask16, a: __m128i) -> __m512i {
     unsafe {
         let broadcast = _mm512_broadcast_i32x4(a).as_i32x16();
         transmute(simd_select_bitmask(k, broadcast, i32x16::ZERO))
@@ -27319,7 +28517,8 @@ pub fn _mm512_maskz_broadcast_i32x4(k: __mmask16, a: __m128i) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcasti32x4, linux: vshuf
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm256_broadcast_i32x4(a: __m128i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_broadcast_i32x4(a: __m128i) -> __m256i {
     unsafe {
         let a = a.as_i32x4();
         let ret: i32x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3]);
@@ -27333,7 +28532,8 @@ pub fn _mm256_broadcast_i32x4(a: __m128i) -> __m256i {
 #[inline]
 #[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcasti32x4, linux: vshuf
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm256_mask_broadcast_i32x4(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_broadcast_i32x4(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
     unsafe {
         let broadcast = _mm256_broadcast_i32x4(a).as_i32x8();
         transmute(simd_select_bitmask(k, broadcast, src.as_i32x8()))
@@ -27346,7 +28546,8 @@ pub fn _mm256_mask_broadcast_i32x4(src: __m256i, k: __mmask8, a: __m128i) -> __m
 #[inline]
 #[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcasti32x4, linux: vshuf
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm256_maskz_broadcast_i32x4(k: __mmask8, a: __m128i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_broadcast_i32x4(k: __mmask8, a: __m128i) -> __m256i {
     unsafe {
         let broadcast = _mm256_broadcast_i32x4(a).as_i32x8();
         transmute(simd_select_bitmask(k, broadcast, i32x8::ZERO))
@@ -27359,7 +28560,8 @@ pub fn _mm256_maskz_broadcast_i32x4(k: __mmask8, a: __m128i) -> __m256i {
 #[inline]
 #[target_feature(enable = "avx512f")] //msvc: vbroadcasti64x4, linux: vperm
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_broadcast_i64x4(a: __m256i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_broadcast_i64x4(a: __m256i) -> __m512i {
     unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3]) }
 }
 
@@ -27369,7 +28571,8 @@ pub fn _mm512_broadcast_i64x4(a: __m256i) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512f")] //msvc: vbroadcasti64x4, linux: vperm
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_mask_broadcast_i64x4(src: __m512i, k: __mmask8, a: __m256i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_broadcast_i64x4(src: __m512i, k: __mmask8, a: __m256i) -> __m512i {
     unsafe {
         let broadcast = _mm512_broadcast_i64x4(a).as_i64x8();
         transmute(simd_select_bitmask(k, broadcast, src.as_i64x8()))
@@ -27382,7 +28585,8 @@ pub fn _mm512_mask_broadcast_i64x4(src: __m512i, k: __mmask8, a: __m256i) -> __m
 #[inline]
 #[target_feature(enable = "avx512f")] //msvc: vbroadcasti64x4, linux: vperm
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_maskz_broadcast_i64x4(k: __mmask8, a: __m256i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_broadcast_i64x4(k: __mmask8, a: __m256i) -> __m512i {
     unsafe {
         let broadcast = _mm512_broadcast_i64x4(a).as_i64x8();
         transmute(simd_select_bitmask(k, broadcast, i64x8::ZERO))
@@ -27395,7 +28599,8 @@ pub fn _mm512_maskz_broadcast_i64x4(k: __mmask8, a: __m256i) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512f")] //msvc: vbroadcastf32x4, linux: vshuf
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_broadcast_f32x4(a: __m128) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_broadcast_f32x4(a: __m128) -> __m512 {
     unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3]) }
 }
 
@@ -27405,7 +28610,8 @@ pub fn _mm512_broadcast_f32x4(a: __m128) -> __m512 {
 #[inline]
 #[target_feature(enable = "avx512f")] //msvc: vbroadcastf32x4, linux: vshu
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_mask_broadcast_f32x4(src: __m512, k: __mmask16, a: __m128) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_broadcast_f32x4(src: __m512, k: __mmask16, a: __m128) -> __m512 {
     unsafe {
         let broadcast = _mm512_broadcast_f32x4(a).as_f32x16();
         transmute(simd_select_bitmask(k, broadcast, src.as_f32x16()))
@@ -27418,7 +28624,8 @@ pub fn _mm512_mask_broadcast_f32x4(src: __m512, k: __mmask16, a: __m128) -> __m5
 #[inline]
 #[target_feature(enable = "avx512f")] //msvc: vbroadcastf32x4, linux: vshu
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_maskz_broadcast_f32x4(k: __mmask16, a: __m128) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_broadcast_f32x4(k: __mmask16, a: __m128) -> __m512 {
     unsafe {
         let broadcast = _mm512_broadcast_f32x4(a).as_f32x16();
         transmute(simd_select_bitmask(k, broadcast, f32x16::ZERO))
@@ -27431,7 +28638,8 @@ pub fn _mm512_maskz_broadcast_f32x4(k: __mmask16, a: __m128) -> __m512 {
 #[inline]
 #[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcastf32x4, linux: vshuf
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm256_broadcast_f32x4(a: __m128) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_broadcast_f32x4(a: __m128) -> __m256 {
     unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3]) }
 }
 
@@ -27441,7 +28649,8 @@ pub fn _mm256_broadcast_f32x4(a: __m128) -> __m256 {
 #[inline]
 #[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcastf32x4, linux: vshu
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm256_mask_broadcast_f32x4(src: __m256, k: __mmask8, a: __m128) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_broadcast_f32x4(src: __m256, k: __mmask8, a: __m128) -> __m256 {
     unsafe {
         let broadcast = _mm256_broadcast_f32x4(a).as_f32x8();
         transmute(simd_select_bitmask(k, broadcast, src.as_f32x8()))
@@ -27454,7 +28663,8 @@ pub fn _mm256_mask_broadcast_f32x4(src: __m256, k: __mmask8, a: __m128) -> __m25
 #[inline]
 #[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcastf32x4, linux: vshu
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm256_maskz_broadcast_f32x4(k: __mmask8, a: __m128) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_broadcast_f32x4(k: __mmask8, a: __m128) -> __m256 {
     unsafe {
         let broadcast = _mm256_broadcast_f32x4(a).as_f32x8();
         transmute(simd_select_bitmask(k, broadcast, f32x8::ZERO))
@@ -27467,7 +28677,8 @@ pub fn _mm256_maskz_broadcast_f32x4(k: __mmask8, a: __m128) -> __m256 {
 #[inline]
 #[target_feature(enable = "avx512f")] //msvc: vbroadcastf64x4, linux: vperm
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_broadcast_f64x4(a: __m256d) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_broadcast_f64x4(a: __m256d) -> __m512d {
     unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3]) }
 }
 
@@ -27477,7 +28688,8 @@ pub fn _mm512_broadcast_f64x4(a: __m256d) -> __m512d {
 #[inline]
 #[target_feature(enable = "avx512f")] //msvc: vbroadcastf64x4, linux: vper
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_mask_broadcast_f64x4(src: __m512d, k: __mmask8, a: __m256d) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_broadcast_f64x4(src: __m512d, k: __mmask8, a: __m256d) -> __m512d {
     unsafe {
         let broadcast = _mm512_broadcast_f64x4(a).as_f64x8();
         transmute(simd_select_bitmask(k, broadcast, src.as_f64x8()))
@@ -27490,7 +28702,8 @@ pub fn _mm512_mask_broadcast_f64x4(src: __m512d, k: __mmask8, a: __m256d) -> __m
 #[inline]
 #[target_feature(enable = "avx512f")] //msvc: vbroadcastf64x4, linux: vper
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_maskz_broadcast_f64x4(k: __mmask8, a: __m256d) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_broadcast_f64x4(k: __mmask8, a: __m256d) -> __m512d {
     unsafe {
         let broadcast = _mm512_broadcast_f64x4(a).as_f64x8();
         transmute(simd_select_bitmask(k, broadcast, f64x8::ZERO))
@@ -27504,7 +28717,8 @@ pub fn _mm512_maskz_broadcast_f64x4(k: __mmask8, a: __m256d) -> __m512d {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovdqa32))] //should be vpblendmd
-pub fn _mm512_mask_blend_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_blend_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
     unsafe { transmute(simd_select_bitmask(k, b.as_i32x16(), a.as_i32x16())) }
 }
 
@@ -27515,7 +28729,8 @@ pub fn _mm512_mask_blend_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovdqa32))] //should be vpblendmd
-pub fn _mm256_mask_blend_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_blend_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
     unsafe { transmute(simd_select_bitmask(k, b.as_i32x8(), a.as_i32x8())) }
 }
 
@@ -27526,7 +28741,8 @@ pub fn _mm256_mask_blend_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovdqa32))] //should be vpblendmd
-pub fn _mm_mask_blend_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_blend_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe { transmute(simd_select_bitmask(k, b.as_i32x4(), a.as_i32x4())) }
 }
 
@@ -27537,7 +28753,8 @@ pub fn _mm_mask_blend_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovdqa64))] //should be vpblendmq
-pub fn _mm512_mask_blend_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_blend_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
     unsafe { transmute(simd_select_bitmask(k, b.as_i64x8(), a.as_i64x8())) }
 }
 
@@ -27548,7 +28765,8 @@ pub fn _mm512_mask_blend_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovdqa64))] //should be vpblendmq
-pub fn _mm256_mask_blend_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_blend_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
     unsafe { transmute(simd_select_bitmask(k, b.as_i64x4(), a.as_i64x4())) }
 }
 
@@ -27559,7 +28777,8 @@ pub fn _mm256_mask_blend_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovdqa64))] //should be vpblendmq
-pub fn _mm_mask_blend_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_blend_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe { transmute(simd_select_bitmask(k, b.as_i64x2(), a.as_i64x2())) }
 }
 
@@ -27570,7 +28789,8 @@ pub fn _mm_mask_blend_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovaps))] //should be vpblendmps
-pub fn _mm512_mask_blend_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_blend_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
     unsafe { transmute(simd_select_bitmask(k, b.as_f32x16(), a.as_f32x16())) }
 }
 
@@ -27581,7 +28801,8 @@ pub fn _mm512_mask_blend_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovaps))] //should be vpblendmps
-pub fn _mm256_mask_blend_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_blend_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
     unsafe { transmute(simd_select_bitmask(k, b.as_f32x8(), a.as_f32x8())) }
 }
 
@@ -27592,7 +28813,8 @@ pub fn _mm256_mask_blend_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovaps))] //should be vpblendmps
-pub fn _mm_mask_blend_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_blend_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
     unsafe { transmute(simd_select_bitmask(k, b.as_f32x4(), a.as_f32x4())) }
 }
 
@@ -27603,7 +28825,8 @@ pub fn _mm_mask_blend_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovapd))] //should be vpblendmpd
-pub fn _mm512_mask_blend_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_blend_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
     unsafe { transmute(simd_select_bitmask(k, b.as_f64x8(), a.as_f64x8())) }
 }
 
@@ -27614,7 +28837,8 @@ pub fn _mm512_mask_blend_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovapd))] //should be vpblendmpd
-pub fn _mm256_mask_blend_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_blend_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
     unsafe { transmute(simd_select_bitmask(k, b.as_f64x4(), a.as_f64x4())) }
 }
 
@@ -27625,7 +28849,8 @@ pub fn _mm256_mask_blend_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovapd))] //should be vpblendmpd
-pub fn _mm_mask_blend_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_blend_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
     unsafe { transmute(simd_select_bitmask(k, b.as_f64x2(), a.as_f64x2())) }
 }
 
@@ -27639,7 +28864,8 @@ pub fn _mm_mask_blend_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
 #[rustc_legacy_const_generics(2)]
-pub fn _mm512_alignr_epi32<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_alignr_epi32<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         let a = a.as_i32x16();
@@ -27720,7 +28946,8 @@ pub fn _mm512_alignr_epi32<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
 #[rustc_legacy_const_generics(4)]
-pub fn _mm512_mask_alignr_epi32<const IMM8: i32>(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_alignr_epi32<const IMM8: i32>(
     src: __m512i,
     k: __mmask16,
     a: __m512i,
@@ -27741,7 +28968,12 @@ pub fn _mm512_mask_alignr_epi32<const IMM8: i32>(
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
 #[rustc_legacy_const_generics(3)]
-pub fn _mm512_maskz_alignr_epi32<const IMM8: i32>(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_alignr_epi32<const IMM8: i32>(
+    k: __mmask16,
+    a: __m512i,
+    b: __m512i,
+) -> __m512i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         let r = _mm512_alignr_epi32::<IMM8>(a, b);
@@ -27759,7 +28991,8 @@ pub fn _mm512_maskz_alignr_epi32<const IMM8: i32>(k: __mmask16, a: __m512i, b: _
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
 #[rustc_legacy_const_generics(2)]
-pub fn _mm256_alignr_epi32<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_alignr_epi32<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         let a = a.as_i32x8();
@@ -27788,7 +29021,8 @@ pub fn _mm256_alignr_epi32<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
 #[rustc_legacy_const_generics(4)]
-pub fn _mm256_mask_alignr_epi32<const IMM8: i32>(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_alignr_epi32<const IMM8: i32>(
     src: __m256i,
     k: __mmask8,
     a: __m256i,
@@ -27809,7 +29043,12 @@ pub fn _mm256_mask_alignr_epi32<const IMM8: i32>(
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
 #[rustc_legacy_const_generics(3)]
-pub fn _mm256_maskz_alignr_epi32<const IMM8: i32>(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_alignr_epi32<const IMM8: i32>(
+    k: __mmask8,
+    a: __m256i,
+    b: __m256i,
+) -> __m256i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         let r = _mm256_alignr_epi32::<IMM8>(a, b);
@@ -27827,7 +29066,8 @@ pub fn _mm256_maskz_alignr_epi32<const IMM8: i32>(k: __mmask8, a: __m256i, b: __
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpalignr, IMM8 = 1))] //should be valignd
 #[rustc_legacy_const_generics(2)]
-pub fn _mm_alignr_epi32<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_alignr_epi32<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         let a = a.as_i32x4();
@@ -27852,7 +29092,8 @@ pub fn _mm_alignr_epi32<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
 #[rustc_legacy_const_generics(4)]
-pub fn _mm_mask_alignr_epi32<const IMM8: i32>(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_alignr_epi32<const IMM8: i32>(
     src: __m128i,
     k: __mmask8,
     a: __m128i,
@@ -27873,7 +29114,12 @@ pub fn _mm_mask_alignr_epi32<const IMM8: i32>(
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
 #[rustc_legacy_const_generics(3)]
-pub fn _mm_maskz_alignr_epi32<const IMM8: i32>(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_alignr_epi32<const IMM8: i32>(
+    k: __mmask8,
+    a: __m128i,
+    b: __m128i,
+) -> __m128i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         let r = _mm_alignr_epi32::<IMM8>(a, b);
@@ -27891,7 +29137,8 @@ pub fn _mm_maskz_alignr_epi32<const IMM8: i32>(k: __mmask8, a: __m128i, b: __m12
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
 #[rustc_legacy_const_generics(2)]
-pub fn _mm512_alignr_epi64<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_alignr_epi64<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         let imm8: i32 = IMM8 % 8;
@@ -27918,7 +29165,8 @@ pub fn _mm512_alignr_epi64<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
 #[rustc_legacy_const_generics(4)]
-pub fn _mm512_mask_alignr_epi64<const IMM8: i32>(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_alignr_epi64<const IMM8: i32>(
     src: __m512i,
     k: __mmask8,
     a: __m512i,
@@ -27939,7 +29187,12 @@ pub fn _mm512_mask_alignr_epi64<const IMM8: i32>(
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
 #[rustc_legacy_const_generics(3)]
-pub fn _mm512_maskz_alignr_epi64<const IMM8: i32>(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_alignr_epi64<const IMM8: i32>(
+    k: __mmask8,
+    a: __m512i,
+    b: __m512i,
+) -> __m512i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         let r = _mm512_alignr_epi64::<IMM8>(a, b);
@@ -27957,7 +29210,8 @@ pub fn _mm512_maskz_alignr_epi64<const IMM8: i32>(k: __mmask8, a: __m512i, b: __
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
 #[rustc_legacy_const_generics(2)]
-pub fn _mm256_alignr_epi64<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_alignr_epi64<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         let imm8: i32 = IMM8 % 4;
@@ -27980,7 +29234,8 @@ pub fn _mm256_alignr_epi64<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
 #[rustc_legacy_const_generics(4)]
-pub fn _mm256_mask_alignr_epi64<const IMM8: i32>(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_alignr_epi64<const IMM8: i32>(
     src: __m256i,
     k: __mmask8,
     a: __m256i,
@@ -28001,7 +29256,12 @@ pub fn _mm256_mask_alignr_epi64<const IMM8: i32>(
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
 #[rustc_legacy_const_generics(3)]
-pub fn _mm256_maskz_alignr_epi64<const IMM8: i32>(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_alignr_epi64<const IMM8: i32>(
+    k: __mmask8,
+    a: __m256i,
+    b: __m256i,
+) -> __m256i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         let r = _mm256_alignr_epi64::<IMM8>(a, b);
@@ -28019,7 +29279,8 @@ pub fn _mm256_maskz_alignr_epi64<const IMM8: i32>(k: __mmask8, a: __m256i, b: __
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpalignr, IMM8 = 1))] //should be valignq
 #[rustc_legacy_const_generics(2)]
-pub fn _mm_alignr_epi64<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_alignr_epi64<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         let imm8: i32 = IMM8 % 2;
@@ -28040,7 +29301,8 @@ pub fn _mm_alignr_epi64<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
 #[rustc_legacy_const_generics(4)]
-pub fn _mm_mask_alignr_epi64<const IMM8: i32>(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_alignr_epi64<const IMM8: i32>(
     src: __m128i,
     k: __mmask8,
     a: __m128i,
@@ -28061,7 +29323,12 @@ pub fn _mm_mask_alignr_epi64<const IMM8: i32>(
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
 #[rustc_legacy_const_generics(3)]
-pub fn _mm_maskz_alignr_epi64<const IMM8: i32>(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_alignr_epi64<const IMM8: i32>(
+    k: __mmask8,
+    a: __m128i,
+    b: __m128i,
+) -> __m128i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         let r = _mm_alignr_epi64::<IMM8>(a, b);
@@ -28076,7 +29343,8 @@ pub fn _mm_maskz_alignr_epi64<const IMM8: i32>(k: __mmask8, a: __m128i, b: __m12
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpandq))] //should be vpandd, but generate vpandq
-pub fn _mm512_and_epi32(a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_and_epi32(a: __m512i, b: __m512i) -> __m512i {
     unsafe { transmute(simd_and(a.as_i32x16(), b.as_i32x16())) }
 }
 
@@ -28087,7 +29355,8 @@ pub fn _mm512_and_epi32(a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpandd))]
-pub fn _mm512_mask_and_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_and_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let and = _mm512_and_epi32(a, b).as_i32x16();
         transmute(simd_select_bitmask(k, and, src.as_i32x16()))
@@ -28101,7 +29370,8 @@ pub fn _mm512_mask_and_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i)
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpandd))]
-pub fn _mm512_maskz_and_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_and_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let and = _mm512_and_epi32(a, b).as_i32x16();
         transmute(simd_select_bitmask(k, and, i32x16::ZERO))
@@ -28115,7 +29385,8 @@ pub fn _mm512_maskz_and_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpandd))]
-pub fn _mm256_mask_and_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_and_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let and = simd_and(a.as_i32x8(), b.as_i32x8());
         transmute(simd_select_bitmask(k, and, src.as_i32x8()))
@@ -28129,7 +29400,8 @@ pub fn _mm256_mask_and_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i)
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpandd))]
-pub fn _mm256_maskz_and_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_and_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let and = simd_and(a.as_i32x8(), b.as_i32x8());
         transmute(simd_select_bitmask(k, and, i32x8::ZERO))
@@ -28143,7 +29415,8 @@ pub fn _mm256_maskz_and_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpandd))]
-pub fn _mm_mask_and_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_and_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let and = simd_and(a.as_i32x4(), b.as_i32x4());
         transmute(simd_select_bitmask(k, and, src.as_i32x4()))
@@ -28157,7 +29430,8 @@ pub fn _mm_mask_and_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) ->
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpandd))]
-pub fn _mm_maskz_and_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_and_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let and = simd_and(a.as_i32x4(), b.as_i32x4());
         transmute(simd_select_bitmask(k, and, i32x4::ZERO))
@@ -28171,7 +29445,8 @@ pub fn _mm_maskz_and_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpandq))]
-pub fn _mm512_and_epi64(a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_and_epi64(a: __m512i, b: __m512i) -> __m512i {
     unsafe { transmute(simd_and(a.as_i64x8(), b.as_i64x8())) }
 }
 
@@ -28182,7 +29457,8 @@ pub fn _mm512_and_epi64(a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpandq))]
-pub fn _mm512_mask_and_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_and_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let and = _mm512_and_epi64(a, b).as_i64x8();
         transmute(simd_select_bitmask(k, and, src.as_i64x8()))
@@ -28196,7 +29472,8 @@ pub fn _mm512_mask_and_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i)
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpandq))]
-pub fn _mm512_maskz_and_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_and_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let and = _mm512_and_epi64(a, b).as_i64x8();
         transmute(simd_select_bitmask(k, and, i64x8::ZERO))
@@ -28210,7 +29487,8 @@ pub fn _mm512_maskz_and_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpandq))]
-pub fn _mm256_mask_and_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_and_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let and = simd_and(a.as_i64x4(), b.as_i64x4());
         transmute(simd_select_bitmask(k, and, src.as_i64x4()))
@@ -28224,7 +29502,8 @@ pub fn _mm256_mask_and_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i)
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpandq))]
-pub fn _mm256_maskz_and_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_and_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let and = simd_and(a.as_i64x4(), b.as_i64x4());
         transmute(simd_select_bitmask(k, and, i64x4::ZERO))
@@ -28238,7 +29517,8 @@ pub fn _mm256_maskz_and_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpandq))]
-pub fn _mm_mask_and_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_and_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let and = simd_and(a.as_i64x2(), b.as_i64x2());
         transmute(simd_select_bitmask(k, and, src.as_i64x2()))
@@ -28252,7 +29532,8 @@ pub fn _mm_mask_and_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) ->
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpandq))]
-pub fn _mm_maskz_and_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_and_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let and = simd_and(a.as_i64x2(), b.as_i64x2());
         transmute(simd_select_bitmask(k, and, i64x2::ZERO))
@@ -28266,7 +29547,8 @@ pub fn _mm_maskz_and_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpandq))]
-pub fn _mm512_and_si512(a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_and_si512(a: __m512i, b: __m512i) -> __m512i {
     unsafe { transmute(simd_and(a.as_i32x16(), b.as_i32x16())) }
 }
 
@@ -28277,7 +29559,8 @@ pub fn _mm512_and_si512(a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vporq))]
-pub fn _mm512_or_epi32(a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_or_epi32(a: __m512i, b: __m512i) -> __m512i {
     unsafe { transmute(simd_or(a.as_i32x16(), b.as_i32x16())) }
 }
 
@@ -28288,7 +29571,8 @@ pub fn _mm512_or_epi32(a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpord))]
-pub fn _mm512_mask_or_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_or_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let or = _mm512_or_epi32(a, b).as_i32x16();
         transmute(simd_select_bitmask(k, or, src.as_i32x16()))
@@ -28302,7 +29586,8 @@ pub fn _mm512_mask_or_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i)
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpord))]
-pub fn _mm512_maskz_or_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_or_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let or = _mm512_or_epi32(a, b).as_i32x16();
         transmute(simd_select_bitmask(k, or, i32x16::ZERO))
@@ -28316,7 +29601,8 @@ pub fn _mm512_maskz_or_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vor))] //should be vpord
-pub fn _mm256_or_epi32(a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_or_epi32(a: __m256i, b: __m256i) -> __m256i {
     unsafe { transmute(simd_or(a.as_i32x8(), b.as_i32x8())) }
 }
 
@@ -28327,7 +29613,8 @@ pub fn _mm256_or_epi32(a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpord))]
-pub fn _mm256_mask_or_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_or_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let or = _mm256_or_epi32(a, b).as_i32x8();
         transmute(simd_select_bitmask(k, or, src.as_i32x8()))
@@ -28341,7 +29628,8 @@ pub fn _mm256_mask_or_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpord))]
-pub fn _mm256_maskz_or_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_or_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let or = _mm256_or_epi32(a, b).as_i32x8();
         transmute(simd_select_bitmask(k, or, i32x8::ZERO))
@@ -28355,7 +29643,8 @@ pub fn _mm256_maskz_or_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vor))] //should be vpord
-pub fn _mm_or_epi32(a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_or_epi32(a: __m128i, b: __m128i) -> __m128i {
     unsafe { transmute(simd_or(a.as_i32x4(), b.as_i32x4())) }
 }
 
@@ -28366,7 +29655,8 @@ pub fn _mm_or_epi32(a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpord))]
-pub fn _mm_mask_or_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_or_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let or = _mm_or_epi32(a, b).as_i32x4();
         transmute(simd_select_bitmask(k, or, src.as_i32x4()))
@@ -28380,7 +29670,8 @@ pub fn _mm_mask_or_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> _
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpord))]
-pub fn _mm_maskz_or_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_or_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let or = _mm_or_epi32(a, b).as_i32x4();
         transmute(simd_select_bitmask(k, or, i32x4::ZERO))
@@ -28394,7 +29685,8 @@ pub fn _mm_maskz_or_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vporq))]
-pub fn _mm512_or_epi64(a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_or_epi64(a: __m512i, b: __m512i) -> __m512i {
     unsafe { transmute(simd_or(a.as_i64x8(), b.as_i64x8())) }
 }
 
@@ -28405,7 +29697,8 @@ pub fn _mm512_or_epi64(a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vporq))]
-pub fn _mm512_mask_or_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_or_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let or = _mm512_or_epi64(a, b).as_i64x8();
         transmute(simd_select_bitmask(k, or, src.as_i64x8()))
@@ -28419,7 +29712,8 @@ pub fn _mm512_mask_or_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vporq))]
-pub fn _mm512_maskz_or_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_or_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let or = _mm512_or_epi64(a, b).as_i64x8();
         transmute(simd_select_bitmask(k, or, i64x8::ZERO))
@@ -28433,7 +29727,8 @@ pub fn _mm512_maskz_or_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vor))] //should be vporq
-pub fn _mm256_or_epi64(a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_or_epi64(a: __m256i, b: __m256i) -> __m256i {
     unsafe { transmute(simd_or(a.as_i64x4(), b.as_i64x4())) }
 }
 
@@ -28444,7 +29739,8 @@ pub fn _mm256_or_epi64(a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vporq))]
-pub fn _mm256_mask_or_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_or_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let or = _mm256_or_epi64(a, b).as_i64x4();
         transmute(simd_select_bitmask(k, or, src.as_i64x4()))
@@ -28458,7 +29754,8 @@ pub fn _mm256_mask_or_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vporq))]
-pub fn _mm256_maskz_or_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_or_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let or = _mm256_or_epi64(a, b).as_i64x4();
         transmute(simd_select_bitmask(k, or, i64x4::ZERO))
@@ -28472,7 +29769,8 @@ pub fn _mm256_maskz_or_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vor))] //should be vporq
-pub fn _mm_or_epi64(a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_or_epi64(a: __m128i, b: __m128i) -> __m128i {
     unsafe { transmute(simd_or(a.as_i64x2(), b.as_i64x2())) }
 }
 
@@ -28483,7 +29781,8 @@ pub fn _mm_or_epi64(a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vporq))]
-pub fn _mm_mask_or_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_or_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let or = _mm_or_epi64(a, b).as_i64x2();
         transmute(simd_select_bitmask(k, or, src.as_i64x2()))
@@ -28497,7 +29796,8 @@ pub fn _mm_mask_or_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> _
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vporq))]
-pub fn _mm_maskz_or_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_or_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let or = _mm_or_epi64(a, b).as_i64x2();
         transmute(simd_select_bitmask(k, or, i64x2::ZERO))
@@ -28511,7 +29811,8 @@ pub fn _mm_maskz_or_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vporq))]
-pub fn _mm512_or_si512(a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_or_si512(a: __m512i, b: __m512i) -> __m512i {
     unsafe { transmute(simd_or(a.as_i32x16(), b.as_i32x16())) }
 }
 
@@ -28522,7 +29823,8 @@ pub fn _mm512_or_si512(a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpxorq))] //should be vpxord
-pub fn _mm512_xor_epi32(a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_xor_epi32(a: __m512i, b: __m512i) -> __m512i {
     unsafe { transmute(simd_xor(a.as_i32x16(), b.as_i32x16())) }
 }
 
@@ -28533,7 +29835,8 @@ pub fn _mm512_xor_epi32(a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpxord))]
-pub fn _mm512_mask_xor_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_xor_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let xor = _mm512_xor_epi32(a, b).as_i32x16();
         transmute(simd_select_bitmask(k, xor, src.as_i32x16()))
@@ -28547,7 +29850,8 @@ pub fn _mm512_mask_xor_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i)
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpxord))]
-pub fn _mm512_maskz_xor_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_xor_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let xor = _mm512_xor_epi32(a, b).as_i32x16();
         transmute(simd_select_bitmask(k, xor, i32x16::ZERO))
@@ -28561,7 +29865,8 @@ pub fn _mm512_maskz_xor_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vxor))] //should be vpxord
-pub fn _mm256_xor_epi32(a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_xor_epi32(a: __m256i, b: __m256i) -> __m256i {
     unsafe { transmute(simd_xor(a.as_i32x8(), b.as_i32x8())) }
 }
 
@@ -28572,7 +29877,8 @@ pub fn _mm256_xor_epi32(a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpxord))]
-pub fn _mm256_mask_xor_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_xor_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let xor = _mm256_xor_epi32(a, b).as_i32x8();
         transmute(simd_select_bitmask(k, xor, src.as_i32x8()))
@@ -28586,7 +29892,8 @@ pub fn _mm256_mask_xor_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i)
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpxord))]
-pub fn _mm256_maskz_xor_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_xor_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let xor = _mm256_xor_epi32(a, b).as_i32x8();
         transmute(simd_select_bitmask(k, xor, i32x8::ZERO))
@@ -28600,7 +29907,8 @@ pub fn _mm256_maskz_xor_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vxor))] //should be vpxord
-pub fn _mm_xor_epi32(a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_xor_epi32(a: __m128i, b: __m128i) -> __m128i {
     unsafe { transmute(simd_xor(a.as_i32x4(), b.as_i32x4())) }
 }
 
@@ -28611,7 +29919,8 @@ pub fn _mm_xor_epi32(a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpxord))]
-pub fn _mm_mask_xor_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_xor_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let xor = _mm_xor_epi32(a, b).as_i32x4();
         transmute(simd_select_bitmask(k, xor, src.as_i32x4()))
@@ -28625,7 +29934,8 @@ pub fn _mm_mask_xor_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) ->
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpxord))]
-pub fn _mm_maskz_xor_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_xor_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let xor = _mm_xor_epi32(a, b).as_i32x4();
         transmute(simd_select_bitmask(k, xor, i32x4::ZERO))
@@ -28639,7 +29949,8 @@ pub fn _mm_maskz_xor_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpxorq))]
-pub fn _mm512_xor_epi64(a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_xor_epi64(a: __m512i, b: __m512i) -> __m512i {
     unsafe { transmute(simd_xor(a.as_i64x8(), b.as_i64x8())) }
 }
 
@@ -28650,7 +29961,8 @@ pub fn _mm512_xor_epi64(a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpxorq))]
-pub fn _mm512_mask_xor_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_xor_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let xor = _mm512_xor_epi64(a, b).as_i64x8();
         transmute(simd_select_bitmask(k, xor, src.as_i64x8()))
@@ -28664,7 +29976,8 @@ pub fn _mm512_mask_xor_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i)
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpxorq))]
-pub fn _mm512_maskz_xor_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_xor_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let xor = _mm512_xor_epi64(a, b).as_i64x8();
         transmute(simd_select_bitmask(k, xor, i64x8::ZERO))
@@ -28678,7 +29991,8 @@ pub fn _mm512_maskz_xor_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vxor))] //should be vpxorq
-pub fn _mm256_xor_epi64(a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_xor_epi64(a: __m256i, b: __m256i) -> __m256i {
     unsafe { transmute(simd_xor(a.as_i64x4(), b.as_i64x4())) }
 }
 
@@ -28689,7 +30003,8 @@ pub fn _mm256_xor_epi64(a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpxorq))]
-pub fn _mm256_mask_xor_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_xor_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let xor = _mm256_xor_epi64(a, b).as_i64x4();
         transmute(simd_select_bitmask(k, xor, src.as_i64x4()))
@@ -28703,7 +30018,8 @@ pub fn _mm256_mask_xor_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i)
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpxorq))]
-pub fn _mm256_maskz_xor_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_xor_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let xor = _mm256_xor_epi64(a, b).as_i64x4();
         transmute(simd_select_bitmask(k, xor, i64x4::ZERO))
@@ -28717,7 +30033,8 @@ pub fn _mm256_maskz_xor_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vxor))] //should be vpxorq
-pub fn _mm_xor_epi64(a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_xor_epi64(a: __m128i, b: __m128i) -> __m128i {
     unsafe { transmute(simd_xor(a.as_i64x2(), b.as_i64x2())) }
 }
 
@@ -28728,7 +30045,8 @@ pub fn _mm_xor_epi64(a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpxorq))]
-pub fn _mm_mask_xor_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_xor_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let xor = _mm_xor_epi64(a, b).as_i64x2();
         transmute(simd_select_bitmask(k, xor, src.as_i64x2()))
@@ -28742,7 +30060,8 @@ pub fn _mm_mask_xor_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) ->
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpxorq))]
-pub fn _mm_maskz_xor_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_xor_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let xor = _mm_xor_epi64(a, b).as_i64x2();
         transmute(simd_select_bitmask(k, xor, i64x2::ZERO))
@@ -28756,7 +30075,8 @@ pub fn _mm_maskz_xor_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpxorq))]
-pub fn _mm512_xor_si512(a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_xor_si512(a: __m512i, b: __m512i) -> __m512i {
     unsafe { transmute(simd_xor(a.as_i32x16(), b.as_i32x16())) }
 }
 
@@ -28767,7 +30087,8 @@ pub fn _mm512_xor_si512(a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpandnq))] //should be vpandnd
-pub fn _mm512_andnot_epi32(a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_andnot_epi32(a: __m512i, b: __m512i) -> __m512i {
     _mm512_and_epi32(_mm512_xor_epi32(a, _mm512_set1_epi32(u32::MAX as i32)), b)
 }
 
@@ -28778,7 +30099,13 @@ pub fn _mm512_andnot_epi32(a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpandnd))]
-pub fn _mm512_mask_andnot_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_andnot_epi32(
+    src: __m512i,
+    k: __mmask16,
+    a: __m512i,
+    b: __m512i,
+) -> __m512i {
     unsafe {
         let andnot = _mm512_andnot_epi32(a, b).as_i32x16();
         transmute(simd_select_bitmask(k, andnot, src.as_i32x16()))
@@ -28792,7 +30119,8 @@ pub fn _mm512_mask_andnot_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m51
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpandnd))]
-pub fn _mm512_maskz_andnot_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_andnot_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let andnot = _mm512_andnot_epi32(a, b).as_i32x16();
         transmute(simd_select_bitmask(k, andnot, i32x16::ZERO))
@@ -28806,7 +30134,13 @@ pub fn _mm512_maskz_andnot_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpandnd))]
-pub fn _mm256_mask_andnot_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_andnot_epi32(
+    src: __m256i,
+    k: __mmask8,
+    a: __m256i,
+    b: __m256i,
+) -> __m256i {
     unsafe {
         let not = _mm256_xor_epi32(a, _mm256_set1_epi32(u32::MAX as i32));
         let andnot = simd_and(not.as_i32x8(), b.as_i32x8());
@@ -28821,7 +30155,8 @@ pub fn _mm256_mask_andnot_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpandnd))]
-pub fn _mm256_maskz_andnot_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_andnot_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let not = _mm256_xor_epi32(a, _mm256_set1_epi32(u32::MAX as i32));
         let andnot = simd_and(not.as_i32x8(), b.as_i32x8());
@@ -28836,7 +30171,8 @@ pub fn _mm256_maskz_andnot_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpandnd))]
-pub fn _mm_mask_andnot_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_andnot_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let not = _mm_xor_epi32(a, _mm_set1_epi32(u32::MAX as i32));
         let andnot = simd_and(not.as_i32x4(), b.as_i32x4());
@@ -28851,7 +30187,8 @@ pub fn _mm_mask_andnot_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i)
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpandnd))]
-pub fn _mm_maskz_andnot_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_andnot_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let not = _mm_xor_epi32(a, _mm_set1_epi32(u32::MAX as i32));
         let andnot = simd_and(not.as_i32x4(), b.as_i32x4());
@@ -28866,7 +30203,8 @@ pub fn _mm_maskz_andnot_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpandnq))] //should be vpandnd
-pub fn _mm512_andnot_epi64(a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_andnot_epi64(a: __m512i, b: __m512i) -> __m512i {
     _mm512_and_epi64(_mm512_xor_epi64(a, _mm512_set1_epi64(u64::MAX as i64)), b)
 }
 
@@ -28877,7 +30215,13 @@ pub fn _mm512_andnot_epi64(a: __m512i, b: __m512i) -> __m512i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpandnq))]
-pub fn _mm512_mask_andnot_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_andnot_epi64(
+    src: __m512i,
+    k: __mmask8,
+    a: __m512i,
+    b: __m512i,
+) -> __m512i {
     unsafe {
         let andnot = _mm512_andnot_epi64(a, b).as_i64x8();
         transmute(simd_select_bitmask(k, andnot, src.as_i64x8()))
@@ -28891,7 +30235,8 @@ pub fn _mm512_mask_andnot_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpandnq))]
-pub fn _mm512_maskz_andnot_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_andnot_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
     unsafe {
         let andnot = _mm512_andnot_epi64(a, b).as_i64x8();
         transmute(simd_select_bitmask(k, andnot, i64x8::ZERO))
@@ -28905,7 +30250,13 @@ pub fn _mm512_maskz_andnot_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpandnq))]
-pub fn _mm256_mask_andnot_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_andnot_epi64(
+    src: __m256i,
+    k: __mmask8,
+    a: __m256i,
+    b: __m256i,
+) -> __m256i {
     unsafe {
         let not = _mm256_xor_epi64(a, _mm256_set1_epi64x(u64::MAX as i64));
         let andnot = simd_and(not.as_i64x4(), b.as_i64x4());
@@ -28920,7 +30271,8 @@ pub fn _mm256_mask_andnot_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpandnq))]
-pub fn _mm256_maskz_andnot_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_andnot_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
     unsafe {
         let not = _mm256_xor_epi64(a, _mm256_set1_epi64x(u64::MAX as i64));
         let andnot = simd_and(not.as_i64x4(), b.as_i64x4());
@@ -28935,7 +30287,8 @@ pub fn _mm256_maskz_andnot_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpandnq))]
-pub fn _mm_mask_andnot_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_andnot_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let not = _mm_xor_epi64(a, _mm_set1_epi64x(u64::MAX as i64));
         let andnot = simd_and(not.as_i64x2(), b.as_i64x2());
@@ -28950,7 +30303,8 @@ pub fn _mm_mask_andnot_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i)
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpandnq))]
-pub fn _mm_maskz_andnot_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_andnot_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let not = _mm_xor_epi64(a, _mm_set1_epi64x(u64::MAX as i64));
         let andnot = simd_and(not.as_i64x2(), b.as_i64x2());
@@ -28965,7 +30319,8 @@ pub fn _mm_maskz_andnot_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpandnq))]
-pub fn _mm512_andnot_si512(a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_andnot_si512(a: __m512i, b: __m512i) -> __m512i {
     _mm512_and_epi64(_mm512_xor_epi64(a, _mm512_set1_epi64(u64::MAX as i64)), b)
 }
 
@@ -28975,7 +30330,8 @@ pub fn _mm512_andnot_si512(a: __m512i, b: __m512i) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _cvtmask16_u32(a: __mmask16) -> u32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _cvtmask16_u32(a: __mmask16) -> u32 {
     a as u32
 }
 
@@ -28985,7 +30341,8 @@ pub fn _cvtmask16_u32(a: __mmask16) -> u32 {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _cvtu32_mask16(a: u32) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _cvtu32_mask16(a: u32) -> __mmask16 {
     a as __mmask16
 }
 
@@ -28996,7 +30353,8 @@ pub fn _cvtu32_mask16(a: u32) -> __mmask16 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(and))] // generate normal and code instead of kandw
-pub fn _kand_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _kand_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
     a & b
 }
 
@@ -29007,7 +30365,8 @@ pub fn _kand_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(and))] // generate normal and code instead of kandw
-pub fn _mm512_kand(a: __mmask16, b: __mmask16) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_kand(a: __mmask16, b: __mmask16) -> __mmask16 {
     a & b
 }
 
@@ -29018,7 +30377,8 @@ pub fn _mm512_kand(a: __mmask16, b: __mmask16) -> __mmask16 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(or))] // generate normal or code instead of korw
-pub fn _kor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _kor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
     a | b
 }
 
@@ -29029,7 +30389,8 @@ pub fn _kor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(or))] // generate normal or code instead of korw
-pub fn _mm512_kor(a: __mmask16, b: __mmask16) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_kor(a: __mmask16, b: __mmask16) -> __mmask16 {
     a | b
 }
 
@@ -29040,7 +30401,8 @@ pub fn _mm512_kor(a: __mmask16, b: __mmask16) -> __mmask16 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(xor))] // generate normal xor code instead of kxorw
-pub fn _kxor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _kxor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
     a ^ b
 }
 
@@ -29051,7 +30413,8 @@ pub fn _kxor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(xor))] // generate normal xor code instead of kxorw
-pub fn _mm512_kxor(a: __mmask16, b: __mmask16) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_kxor(a: __mmask16, b: __mmask16) -> __mmask16 {
     a ^ b
 }
 
@@ -29061,7 +30424,8 @@ pub fn _mm512_kxor(a: __mmask16, b: __mmask16) -> __mmask16 {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _knot_mask16(a: __mmask16) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _knot_mask16(a: __mmask16) -> __mmask16 {
     a ^ 0b11111111_11111111
 }
 
@@ -29071,7 +30435,8 @@ pub fn _knot_mask16(a: __mmask16) -> __mmask16 {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_knot(a: __mmask16) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_knot(a: __mmask16) -> __mmask16 {
     a ^ 0b11111111_11111111
 }
 
@@ -29082,7 +30447,8 @@ pub fn _mm512_knot(a: __mmask16) -> __mmask16 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(not))] // generate normal and, not code instead of kandnw
-pub fn _kandn_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _kandn_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
     _mm512_kand(_mm512_knot(a), b)
 }
 
@@ -29093,7 +30459,8 @@ pub fn _kandn_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(not))] // generate normal and code instead of kandw
-pub fn _mm512_kandn(a: __mmask16, b: __mmask16) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_kandn(a: __mmask16, b: __mmask16) -> __mmask16 {
     _mm512_kand(_mm512_knot(a), b)
 }
 
@@ -29104,7 +30471,8 @@ pub fn _mm512_kandn(a: __mmask16, b: __mmask16) -> __mmask16 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(xor))] // generate normal xor, not code instead of kxnorw
-pub fn _kxnor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _kxnor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
     _mm512_knot(_mm512_kxor(a, b))
 }
 
@@ -29115,7 +30483,8 @@ pub fn _kxnor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(xor))] // generate normal and code instead of kandw
-pub fn _mm512_kxnor(a: __mmask16, b: __mmask16) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_kxnor(a: __mmask16, b: __mmask16) -> __mmask16 {
     _mm512_knot(_mm512_kxor(a, b))
 }
 
@@ -29126,7 +30495,8 @@ pub fn _mm512_kxnor(a: __mmask16, b: __mmask16) -> __mmask16 {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _kortest_mask16_u8(a: __mmask16, b: __mmask16, all_ones: *mut u8) -> u8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _kortest_mask16_u8(a: __mmask16, b: __mmask16, all_ones: *mut u8) -> u8 {
     let tmp = _kor_mask16(a, b);
     *all_ones = (tmp == 0xffff) as u8;
     (tmp == 0) as u8
@@ -29139,7 +30509,8 @@ pub unsafe fn _kortest_mask16_u8(a: __mmask16, b: __mmask16, all_ones: *mut u8)
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _kortestc_mask16_u8(a: __mmask16, b: __mmask16) -> u8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _kortestc_mask16_u8(a: __mmask16, b: __mmask16) -> u8 {
     (_kor_mask16(a, b) == 0xffff) as u8
 }
 
@@ -29150,7 +30521,8 @@ pub fn _kortestc_mask16_u8(a: __mmask16, b: __mmask16) -> u8 {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _kortestz_mask16_u8(a: __mmask16, b: __mmask16) -> u8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _kortestz_mask16_u8(a: __mmask16, b: __mmask16) -> u8 {
     (_kor_mask16(a, b) == 0) as u8
 }
 
@@ -29161,7 +30533,8 @@ pub fn _kortestz_mask16_u8(a: __mmask16, b: __mmask16) -> u8 {
 #[target_feature(enable = "avx512f")]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _kshiftli_mask16<const COUNT: u32>(a: __mmask16) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _kshiftli_mask16<const COUNT: u32>(a: __mmask16) -> __mmask16 {
     a.unbounded_shl(COUNT)
 }
 
@@ -29172,7 +30545,8 @@ pub fn _kshiftli_mask16<const COUNT: u32>(a: __mmask16) -> __mmask16 {
 #[target_feature(enable = "avx512f")]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _kshiftri_mask16<const COUNT: u32>(a: __mmask16) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _kshiftri_mask16<const COUNT: u32>(a: __mmask16) -> __mmask16 {
     a.unbounded_shr(COUNT)
 }
 
@@ -29182,7 +30556,8 @@ pub fn _kshiftri_mask16<const COUNT: u32>(a: __mmask16) -> __mmask16 {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _load_mask16(mem_addr: *const __mmask16) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _load_mask16(mem_addr: *const __mmask16) -> __mmask16 {
     *mem_addr
 }
 
@@ -29192,7 +30567,8 @@ pub unsafe fn _load_mask16(mem_addr: *const __mmask16) -> __mmask16 {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _store_mask16(mem_addr: *mut __mmask16, a: __mmask16) {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _store_mask16(mem_addr: *mut __mmask16, a: __mmask16) {
     *mem_addr = a;
 }
 
@@ -29203,7 +30579,8 @@ pub unsafe fn _store_mask16(mem_addr: *mut __mmask16, a: __mmask16) {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kmovw
-pub fn _mm512_kmov(a: __mmask16) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_kmov(a: __mmask16) -> __mmask16 {
     a
 }
 
@@ -29213,7 +30590,8 @@ pub fn _mm512_kmov(a: __mmask16) -> __mmask16 {
 #[inline]
 #[target_feature(enable = "avx512f")] // generate normal and code instead of kmovw
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_int2mask(mask: i32) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_int2mask(mask: i32) -> __mmask16 {
     mask as u16
 }
 
@@ -29224,7 +30602,8 @@ pub fn _mm512_int2mask(mask: i32) -> __mmask16 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kmovw
-pub fn _mm512_mask2int(k1: __mmask16) -> i32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask2int(k1: __mmask16) -> i32 {
     k1 as i32
 }
 
@@ -29235,7 +30614,8 @@ pub fn _mm512_mask2int(k1: __mmask16) -> i32 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kunpckbw
-pub fn _mm512_kunpackb(a: __mmask16, b: __mmask16) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_kunpackb(a: __mmask16, b: __mmask16) -> __mmask16 {
     ((a & 0xff) << 8) | (b & 0xff)
 }
 
@@ -29246,7 +30626,8 @@ pub fn _mm512_kunpackb(a: __mmask16, b: __mmask16) -> __mmask16 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(cmp))] // generate normal and code instead of kortestw
-pub fn _mm512_kortestc(a: __mmask16, b: __mmask16) -> i32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_kortestc(a: __mmask16, b: __mmask16) -> i32 {
     let r = (a | b) == 0b11111111_11111111;
     r as i32
 }
@@ -29258,7 +30639,8 @@ pub fn _mm512_kortestc(a: __mmask16, b: __mmask16) -> i32 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(xor))] // generate normal and code instead of kortestw
-pub fn _mm512_kortestz(a: __mmask16, b: __mmask16) -> i32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_kortestz(a: __mmask16, b: __mmask16) -> i32 {
     let r = (a | b) == 0;
     r as i32
 }
@@ -29270,7 +30652,8 @@ pub fn _mm512_kortestz(a: __mmask16, b: __mmask16) -> i32 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vptestmd))]
-pub fn _mm512_test_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_test_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
     let and = _mm512_and_epi32(a, b);
     let zero = _mm512_setzero_si512();
     _mm512_cmpneq_epi32_mask(and, zero)
@@ -29283,7 +30666,8 @@ pub fn _mm512_test_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vptestmd))]
-pub fn _mm512_mask_test_epi32_mask(k: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_test_epi32_mask(k: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
     let and = _mm512_and_epi32(a, b);
     let zero = _mm512_setzero_si512();
     _mm512_mask_cmpneq_epi32_mask(k, and, zero)
@@ -29296,7 +30680,8 @@ pub fn _mm512_mask_test_epi32_mask(k: __mmask16, a: __m512i, b: __m512i) -> __mm
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vptestmd))]
-pub fn _mm256_test_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_test_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
     let and = _mm256_and_si256(a, b);
     let zero = _mm256_setzero_si256();
     _mm256_cmpneq_epi32_mask(and, zero)
@@ -29309,7 +30694,8 @@ pub fn _mm256_test_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vptestmd))]
-pub fn _mm256_mask_test_epi32_mask(k: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_test_epi32_mask(k: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
     let and = _mm256_and_si256(a, b);
     let zero = _mm256_setzero_si256();
     _mm256_mask_cmpneq_epi32_mask(k, and, zero)
@@ -29322,7 +30708,8 @@ pub fn _mm256_mask_test_epi32_mask(k: __mmask8, a: __m256i, b: __m256i) -> __mma
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vptestmd))]
-pub fn _mm_test_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_test_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
     let and = _mm_and_si128(a, b);
     let zero = _mm_setzero_si128();
     _mm_cmpneq_epi32_mask(and, zero)
@@ -29335,7 +30722,8 @@ pub fn _mm_test_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vptestmd))]
-pub fn _mm_mask_test_epi32_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_test_epi32_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
     let and = _mm_and_si128(a, b);
     let zero = _mm_setzero_si128();
     _mm_mask_cmpneq_epi32_mask(k, and, zero)
@@ -29348,7 +30736,8 @@ pub fn _mm_mask_test_epi32_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vptestmq))]
-pub fn _mm512_test_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_test_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
     let and = _mm512_and_epi64(a, b);
     let zero = _mm512_setzero_si512();
     _mm512_cmpneq_epi64_mask(and, zero)
@@ -29361,7 +30750,8 @@ pub fn _mm512_test_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vptestmq))]
-pub fn _mm512_mask_test_epi64_mask(k: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_test_epi64_mask(k: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
     let and = _mm512_and_epi64(a, b);
     let zero = _mm512_setzero_si512();
     _mm512_mask_cmpneq_epi64_mask(k, and, zero)
@@ -29374,7 +30764,8 @@ pub fn _mm512_mask_test_epi64_mask(k: __mmask8, a: __m512i, b: __m512i) -> __mma
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vptestmq))]
-pub fn _mm256_test_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_test_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
     let and = _mm256_and_si256(a, b);
     let zero = _mm256_setzero_si256();
     _mm256_cmpneq_epi64_mask(and, zero)
@@ -29387,7 +30778,8 @@ pub fn _mm256_test_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vptestmq))]
-pub fn _mm256_mask_test_epi64_mask(k: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_test_epi64_mask(k: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
     let and = _mm256_and_si256(a, b);
     let zero = _mm256_setzero_si256();
     _mm256_mask_cmpneq_epi64_mask(k, and, zero)
@@ -29400,7 +30792,8 @@ pub fn _mm256_mask_test_epi64_mask(k: __mmask8, a: __m256i, b: __m256i) -> __mma
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vptestmq))]
-pub fn _mm_test_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_test_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
     let and = _mm_and_si128(a, b);
     let zero = _mm_setzero_si128();
     _mm_cmpneq_epi64_mask(and, zero)
@@ -29413,7 +30806,8 @@ pub fn _mm_test_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vptestmq))]
-pub fn _mm_mask_test_epi64_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_test_epi64_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
     let and = _mm_and_si128(a, b);
     let zero = _mm_setzero_si128();
     _mm_mask_cmpneq_epi64_mask(k, and, zero)
@@ -29426,7 +30820,8 @@ pub fn _mm_mask_test_epi64_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vptestnmd))]
-pub fn _mm512_testn_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_testn_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
     let and = _mm512_and_epi32(a, b);
     let zero = _mm512_setzero_si512();
     _mm512_cmpeq_epi32_mask(and, zero)
@@ -29439,7 +30834,8 @@ pub fn _mm512_testn_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vptestnmd))]
-pub fn _mm512_mask_testn_epi32_mask(k: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_testn_epi32_mask(k: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
     let and = _mm512_and_epi32(a, b);
     let zero = _mm512_setzero_si512();
     _mm512_mask_cmpeq_epi32_mask(k, and, zero)
@@ -29452,7 +30848,8 @@ pub fn _mm512_mask_testn_epi32_mask(k: __mmask16, a: __m512i, b: __m512i) -> __m
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vptestnmd))]
-pub fn _mm256_testn_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_testn_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
     let and = _mm256_and_si256(a, b);
     let zero = _mm256_setzero_si256();
     _mm256_cmpeq_epi32_mask(and, zero)
@@ -29465,7 +30862,8 @@ pub fn _mm256_testn_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vptestnmd))]
-pub fn _mm256_mask_testn_epi32_mask(k: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_testn_epi32_mask(k: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
     let and = _mm256_and_si256(a, b);
     let zero = _mm256_setzero_si256();
     _mm256_mask_cmpeq_epi32_mask(k, and, zero)
@@ -29478,7 +30876,8 @@ pub fn _mm256_mask_testn_epi32_mask(k: __mmask8, a: __m256i, b: __m256i) -> __mm
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vptestnmd))]
-pub fn _mm_testn_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_testn_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
     let and = _mm_and_si128(a, b);
     let zero = _mm_setzero_si128();
     _mm_cmpeq_epi32_mask(and, zero)
@@ -29491,7 +30890,8 @@ pub fn _mm_testn_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vptestnmd))]
-pub fn _mm_mask_testn_epi32_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_testn_epi32_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
     let and = _mm_and_si128(a, b);
     let zero = _mm_setzero_si128();
     _mm_mask_cmpeq_epi32_mask(k, and, zero)
@@ -29504,7 +30904,8 @@ pub fn _mm_mask_testn_epi32_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vptestnmq))]
-pub fn _mm512_testn_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_testn_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
     let and = _mm512_and_epi64(a, b);
     let zero = _mm512_setzero_si512();
     _mm512_cmpeq_epi64_mask(and, zero)
@@ -29517,7 +30918,8 @@ pub fn _mm512_testn_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vptestnmq))]
-pub fn _mm512_mask_testn_epi64_mask(k: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_testn_epi64_mask(k: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
     let and = _mm512_and_epi64(a, b);
     let zero = _mm512_setzero_si512();
     _mm512_mask_cmpeq_epi64_mask(k, and, zero)
@@ -29530,7 +30932,8 @@ pub fn _mm512_mask_testn_epi64_mask(k: __mmask8, a: __m512i, b: __m512i) -> __mm
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vptestnmq))]
-pub fn _mm256_testn_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_testn_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
     let and = _mm256_and_si256(a, b);
     let zero = _mm256_setzero_si256();
     _mm256_cmpeq_epi64_mask(and, zero)
@@ -29543,7 +30946,8 @@ pub fn _mm256_testn_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vptestnmq))]
-pub fn _mm256_mask_testn_epi64_mask(k: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_testn_epi64_mask(k: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
     let and = _mm256_and_si256(a, b);
     let zero = _mm256_setzero_si256();
     _mm256_mask_cmpeq_epi64_mask(k, and, zero)
@@ -29556,7 +30960,8 @@ pub fn _mm256_mask_testn_epi64_mask(k: __mmask8, a: __m256i, b: __m256i) -> __mm
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vptestnmq))]
-pub fn _mm_testn_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_testn_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
     let and = _mm_and_si128(a, b);
     let zero = _mm_setzero_si128();
     _mm_cmpeq_epi64_mask(and, zero)
@@ -29569,7 +30974,8 @@ pub fn _mm_testn_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vptestnmq))]
-pub fn _mm_mask_testn_epi64_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_testn_epi64_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
     let and = _mm_and_si128(a, b);
     let zero = _mm_setzero_si128();
     _mm_mask_cmpeq_epi64_mask(k, and, zero)
@@ -29681,7 +31087,8 @@ pub unsafe fn _mm512_stream_load_si512(mem_addr: *const __m512i) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_set_ps(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_set_ps(
     e0: f32,
     e1: f32,
     e2: f32,
@@ -29711,7 +31118,8 @@ pub fn _mm512_set_ps(
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_setr_ps(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_setr_ps(
     e0: f32,
     e1: f32,
     e2: f32,
@@ -29743,7 +31151,8 @@ pub fn _mm512_setr_ps(
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_set1_pd(a: f64) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_set1_pd(a: f64) -> __m512d {
     unsafe { transmute(f64x8::splat(a)) }
 }
 
@@ -29753,7 +31162,8 @@ pub fn _mm512_set1_pd(a: f64) -> __m512d {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_set1_ps(a: f32) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_set1_ps(a: f32) -> __m512 {
     unsafe { transmute(f32x16::splat(a)) }
 }
 
@@ -29763,7 +31173,8 @@ pub fn _mm512_set1_ps(a: f32) -> __m512 {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_set_epi32(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_set_epi32(
     e15: i32,
     e14: i32,
     e13: i32,
@@ -29792,7 +31203,8 @@ pub fn _mm512_set_epi32(
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_set1_epi8(a: i8) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_set1_epi8(a: i8) -> __m512i {
     unsafe { transmute(i8x64::splat(a)) }
 }
 
@@ -29802,7 +31214,8 @@ pub fn _mm512_set1_epi8(a: i8) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_set1_epi16(a: i16) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_set1_epi16(a: i16) -> __m512i {
     unsafe { transmute(i16x32::splat(a)) }
 }
 
@@ -29812,7 +31225,8 @@ pub fn _mm512_set1_epi16(a: i16) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_set1_epi32(a: i32) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_set1_epi32(a: i32) -> __m512i {
     unsafe { transmute(i32x16::splat(a)) }
 }
 
@@ -29823,7 +31237,8 @@ pub fn _mm512_set1_epi32(a: i32) -> __m512i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpbroadcastd))]
-pub fn _mm512_mask_set1_epi32(src: __m512i, k: __mmask16, a: i32) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_set1_epi32(src: __m512i, k: __mmask16, a: i32) -> __m512i {
     unsafe {
         let r = _mm512_set1_epi32(a).as_i32x16();
         transmute(simd_select_bitmask(k, r, src.as_i32x16()))
@@ -29837,7 +31252,8 @@ pub fn _mm512_mask_set1_epi32(src: __m512i, k: __mmask16, a: i32) -> __m512i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpbroadcastd))]
-pub fn _mm512_maskz_set1_epi32(k: __mmask16, a: i32) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_set1_epi32(k: __mmask16, a: i32) -> __m512i {
     unsafe {
         let r = _mm512_set1_epi32(a).as_i32x16();
         transmute(simd_select_bitmask(k, r, i32x16::ZERO))
@@ -29851,7 +31267,8 @@ pub fn _mm512_maskz_set1_epi32(k: __mmask16, a: i32) -> __m512i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpbroadcastd))]
-pub fn _mm256_mask_set1_epi32(src: __m256i, k: __mmask8, a: i32) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_set1_epi32(src: __m256i, k: __mmask8, a: i32) -> __m256i {
     unsafe {
         let r = _mm256_set1_epi32(a).as_i32x8();
         transmute(simd_select_bitmask(k, r, src.as_i32x8()))
@@ -29865,7 +31282,8 @@ pub fn _mm256_mask_set1_epi32(src: __m256i, k: __mmask8, a: i32) -> __m256i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpbroadcastd))]
-pub fn _mm256_maskz_set1_epi32(k: __mmask8, a: i32) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_set1_epi32(k: __mmask8, a: i32) -> __m256i {
     unsafe {
         let r = _mm256_set1_epi32(a).as_i32x8();
         transmute(simd_select_bitmask(k, r, i32x8::ZERO))
@@ -29879,7 +31297,8 @@ pub fn _mm256_maskz_set1_epi32(k: __mmask8, a: i32) -> __m256i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpbroadcastd))]
-pub fn _mm_mask_set1_epi32(src: __m128i, k: __mmask8, a: i32) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_set1_epi32(src: __m128i, k: __mmask8, a: i32) -> __m128i {
     unsafe {
         let r = _mm_set1_epi32(a).as_i32x4();
         transmute(simd_select_bitmask(k, r, src.as_i32x4()))
@@ -29893,7 +31312,8 @@ pub fn _mm_mask_set1_epi32(src: __m128i, k: __mmask8, a: i32) -> __m128i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpbroadcastd))]
-pub fn _mm_maskz_set1_epi32(k: __mmask8, a: i32) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_set1_epi32(k: __mmask8, a: i32) -> __m128i {
     unsafe {
         let r = _mm_set1_epi32(a).as_i32x4();
         transmute(simd_select_bitmask(k, r, i32x4::ZERO))
@@ -29906,7 +31326,8 @@ pub fn _mm_maskz_set1_epi32(k: __mmask8, a: i32) -> __m128i {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_set1_epi64(a: i64) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_set1_epi64(a: i64) -> __m512i {
     unsafe { transmute(i64x8::splat(a)) }
 }
 
@@ -29917,7 +31338,8 @@ pub fn _mm512_set1_epi64(a: i64) -> __m512i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpbroadcastq))]
-pub fn _mm512_mask_set1_epi64(src: __m512i, k: __mmask8, a: i64) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_set1_epi64(src: __m512i, k: __mmask8, a: i64) -> __m512i {
     unsafe {
         let r = _mm512_set1_epi64(a).as_i64x8();
         transmute(simd_select_bitmask(k, r, src.as_i64x8()))
@@ -29931,7 +31353,8 @@ pub fn _mm512_mask_set1_epi64(src: __m512i, k: __mmask8, a: i64) -> __m512i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpbroadcastq))]
-pub fn _mm512_maskz_set1_epi64(k: __mmask8, a: i64) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_set1_epi64(k: __mmask8, a: i64) -> __m512i {
     unsafe {
         let r = _mm512_set1_epi64(a).as_i64x8();
         transmute(simd_select_bitmask(k, r, i64x8::ZERO))
@@ -29945,7 +31368,8 @@ pub fn _mm512_maskz_set1_epi64(k: __mmask8, a: i64) -> __m512i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpbroadcastq))]
-pub fn _mm256_mask_set1_epi64(src: __m256i, k: __mmask8, a: i64) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_set1_epi64(src: __m256i, k: __mmask8, a: i64) -> __m256i {
     unsafe {
         let r = _mm256_set1_epi64x(a).as_i64x4();
         transmute(simd_select_bitmask(k, r, src.as_i64x4()))
@@ -29959,7 +31383,8 @@ pub fn _mm256_mask_set1_epi64(src: __m256i, k: __mmask8, a: i64) -> __m256i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpbroadcastq))]
-pub fn _mm256_maskz_set1_epi64(k: __mmask8, a: i64) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_set1_epi64(k: __mmask8, a: i64) -> __m256i {
     unsafe {
         let r = _mm256_set1_epi64x(a).as_i64x4();
         transmute(simd_select_bitmask(k, r, i64x4::ZERO))
@@ -29973,7 +31398,8 @@ pub fn _mm256_maskz_set1_epi64(k: __mmask8, a: i64) -> __m256i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpbroadcastq))]
-pub fn _mm_mask_set1_epi64(src: __m128i, k: __mmask8, a: i64) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_set1_epi64(src: __m128i, k: __mmask8, a: i64) -> __m128i {
     unsafe {
         let r = _mm_set1_epi64x(a).as_i64x2();
         transmute(simd_select_bitmask(k, r, src.as_i64x2()))
@@ -29987,7 +31413,8 @@ pub fn _mm_mask_set1_epi64(src: __m128i, k: __mmask8, a: i64) -> __m128i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpbroadcastq))]
-pub fn _mm_maskz_set1_epi64(k: __mmask8, a: i64) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_set1_epi64(k: __mmask8, a: i64) -> __m128i {
     unsafe {
         let r = _mm_set1_epi64x(a).as_i64x2();
         transmute(simd_select_bitmask(k, r, i64x2::ZERO))
@@ -30000,7 +31427,8 @@ pub fn _mm_maskz_set1_epi64(k: __mmask8, a: i64) -> __m128i {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_set4_epi64(d: i64, c: i64, b: i64, a: i64) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_set4_epi64(d: i64, c: i64, b: i64, a: i64) -> __m512i {
     _mm512_set_epi64(d, c, b, a, d, c, b, a)
 }
 
@@ -30010,7 +31438,8 @@ pub fn _mm512_set4_epi64(d: i64, c: i64, b: i64, a: i64) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_setr4_epi64(d: i64, c: i64, b: i64, a: i64) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_setr4_epi64(d: i64, c: i64, b: i64, a: i64) -> __m512i {
     _mm512_set_epi64(a, b, c, d, a, b, c, d)
 }
 
@@ -30839,7 +32268,8 @@ pub fn _mm_mask_cmp_round_sd_mask<const IMM5: i32, const SAE: i32>(
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
-pub fn _mm512_cmplt_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_cmplt_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
     unsafe { simd_bitmask::<u32x16, _>(simd_lt(a.as_u32x16(), b.as_u32x16())) }
 }
 
@@ -30850,7 +32280,8 @@ pub fn _mm512_cmplt_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
-pub fn _mm512_mask_cmplt_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_cmplt_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
     _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(k1, a, b)
 }
 
@@ -30861,7 +32292,8 @@ pub fn _mm512_mask_cmplt_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
-pub fn _mm256_cmplt_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_cmplt_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
     unsafe { simd_bitmask::<u32x8, _>(simd_lt(a.as_u32x8(), b.as_u32x8())) }
 }
 
@@ -30872,7 +32304,8 @@ pub fn _mm256_cmplt_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
-pub fn _mm256_mask_cmplt_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_cmplt_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
     _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(k1, a, b)
 }
 
@@ -30883,7 +32316,8 @@ pub fn _mm256_mask_cmplt_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __m
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
-pub fn _mm_cmplt_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cmplt_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
     unsafe { simd_bitmask::<u32x4, _>(simd_lt(a.as_u32x4(), b.as_u32x4())) }
 }
 
@@ -30894,7 +32328,8 @@ pub fn _mm_cmplt_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
-pub fn _mm_mask_cmplt_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_cmplt_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
     _mm_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(k1, a, b)
 }
 
@@ -30905,7 +32340,8 @@ pub fn _mm_mask_cmplt_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmas
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
-pub fn _mm512_cmpgt_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_cmpgt_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
     unsafe { simd_bitmask::<u32x16, _>(simd_gt(a.as_u32x16(), b.as_u32x16())) }
 }
 
@@ -30916,7 +32352,8 @@ pub fn _mm512_cmpgt_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
-pub fn _mm512_mask_cmpgt_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_cmpgt_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
     _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_NLE>(k1, a, b)
 }
 
@@ -30927,7 +32364,8 @@ pub fn _mm512_mask_cmpgt_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
-pub fn _mm256_cmpgt_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_cmpgt_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
     unsafe { simd_bitmask::<u32x8, _>(simd_gt(a.as_u32x8(), b.as_u32x8())) }
 }
 
@@ -30938,7 +32376,8 @@ pub fn _mm256_cmpgt_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
-pub fn _mm256_mask_cmpgt_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_cmpgt_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
     _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_NLE>(k1, a, b)
 }
 
@@ -30949,7 +32388,8 @@ pub fn _mm256_mask_cmpgt_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __m
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
-pub fn _mm_cmpgt_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cmpgt_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
     unsafe { simd_bitmask::<u32x4, _>(simd_gt(a.as_u32x4(), b.as_u32x4())) }
 }
 
@@ -30960,7 +32400,8 @@ pub fn _mm_cmpgt_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
-pub fn _mm_mask_cmpgt_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_cmpgt_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
     _mm_mask_cmp_epu32_mask::<_MM_CMPINT_NLE>(k1, a, b)
 }
 
@@ -30971,7 +32412,8 @@ pub fn _mm_mask_cmpgt_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmas
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
-pub fn _mm512_cmple_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_cmple_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
     unsafe { simd_bitmask::<u32x16, _>(simd_le(a.as_u32x16(), b.as_u32x16())) }
 }
 
@@ -30982,7 +32424,8 @@ pub fn _mm512_cmple_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
-pub fn _mm512_mask_cmple_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_cmple_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
     _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_LE>(k1, a, b)
 }
 
@@ -30993,7 +32436,8 @@ pub fn _mm512_mask_cmple_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
-pub fn _mm256_cmple_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_cmple_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
     unsafe { simd_bitmask::<u32x8, _>(simd_le(a.as_u32x8(), b.as_u32x8())) }
 }
 
@@ -31004,7 +32448,8 @@ pub fn _mm256_cmple_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
-pub fn _mm256_mask_cmple_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_cmple_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
     _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_LE>(k1, a, b)
 }
 
@@ -31015,7 +32460,8 @@ pub fn _mm256_mask_cmple_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __m
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
-pub fn _mm_cmple_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cmple_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
     unsafe { simd_bitmask::<u32x4, _>(simd_le(a.as_u32x4(), b.as_u32x4())) }
 }
 
@@ -31026,7 +32472,8 @@ pub fn _mm_cmple_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
-pub fn _mm_mask_cmple_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_cmple_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
     _mm_mask_cmp_epu32_mask::<_MM_CMPINT_LE>(k1, a, b)
 }
 
@@ -31037,7 +32484,8 @@ pub fn _mm_mask_cmple_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmas
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
-pub fn _mm512_cmpge_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_cmpge_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
     unsafe { simd_bitmask::<u32x16, _>(simd_ge(a.as_u32x16(), b.as_u32x16())) }
 }
 
@@ -31048,7 +32496,8 @@ pub fn _mm512_cmpge_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
-pub fn _mm512_mask_cmpge_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_cmpge_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
     _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_NLT>(k1, a, b)
 }
 
@@ -31059,7 +32508,8 @@ pub fn _mm512_mask_cmpge_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
-pub fn _mm256_cmpge_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_cmpge_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
     unsafe { simd_bitmask::<u32x8, _>(simd_ge(a.as_u32x8(), b.as_u32x8())) }
 }
 
@@ -31070,7 +32520,8 @@ pub fn _mm256_cmpge_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
-pub fn _mm256_mask_cmpge_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_cmpge_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
     _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_NLT>(k1, a, b)
 }
 
@@ -31081,7 +32532,8 @@ pub fn _mm256_mask_cmpge_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __m
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
-pub fn _mm_cmpge_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cmpge_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
     unsafe { simd_bitmask::<u32x4, _>(simd_ge(a.as_u32x4(), b.as_u32x4())) }
 }
 
@@ -31092,7 +32544,8 @@ pub fn _mm_cmpge_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
-pub fn _mm_mask_cmpge_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_cmpge_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
     _mm_mask_cmp_epu32_mask::<_MM_CMPINT_NLT>(k1, a, b)
 }
 
@@ -31103,7 +32556,8 @@ pub fn _mm_mask_cmpge_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmas
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
-pub fn _mm512_cmpeq_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_cmpeq_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
     unsafe { simd_bitmask::<u32x16, _>(simd_eq(a.as_u32x16(), b.as_u32x16())) }
 }
 
@@ -31114,7 +32568,8 @@ pub fn _mm512_cmpeq_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
-pub fn _mm512_mask_cmpeq_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_cmpeq_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
     _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_EQ>(k1, a, b)
 }
 
@@ -31125,7 +32580,8 @@ pub fn _mm512_mask_cmpeq_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
-pub fn _mm256_cmpeq_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_cmpeq_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
     unsafe { simd_bitmask::<u32x8, _>(simd_eq(a.as_u32x8(), b.as_u32x8())) }
 }
 
@@ -31136,7 +32592,8 @@ pub fn _mm256_cmpeq_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
-pub fn _mm256_mask_cmpeq_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_cmpeq_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
     _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_EQ>(k1, a, b)
 }
 
@@ -31147,7 +32604,8 @@ pub fn _mm256_mask_cmpeq_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __m
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
-pub fn _mm_cmpeq_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cmpeq_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
     unsafe { simd_bitmask::<u32x4, _>(simd_eq(a.as_u32x4(), b.as_u32x4())) }
 }
 
@@ -31158,7 +32616,8 @@ pub fn _mm_cmpeq_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
-pub fn _mm_mask_cmpeq_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_cmpeq_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
     _mm_mask_cmp_epu32_mask::<_MM_CMPINT_EQ>(k1, a, b)
 }
 
@@ -31169,7 +32628,8 @@ pub fn _mm_mask_cmpeq_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmas
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
-pub fn _mm512_cmpneq_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_cmpneq_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
     unsafe { simd_bitmask::<u32x16, _>(simd_ne(a.as_u32x16(), b.as_u32x16())) }
 }
 
@@ -31180,7 +32640,8 @@ pub fn _mm512_cmpneq_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
-pub fn _mm512_mask_cmpneq_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_cmpneq_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
     _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_NE>(k1, a, b)
 }
 
@@ -31191,7 +32652,8 @@ pub fn _mm512_mask_cmpneq_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> _
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
-pub fn _mm256_cmpneq_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_cmpneq_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
     unsafe { simd_bitmask::<u32x8, _>(simd_ne(a.as_u32x8(), b.as_u32x8())) }
 }
 
@@ -31202,7 +32664,8 @@ pub fn _mm256_cmpneq_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
-pub fn _mm256_mask_cmpneq_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_cmpneq_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
     _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_NE>(k1, a, b)
 }
 
@@ -31213,7 +32676,8 @@ pub fn _mm256_mask_cmpneq_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
-pub fn _mm_cmpneq_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cmpneq_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
     unsafe { simd_bitmask::<u32x4, _>(simd_ne(a.as_u32x4(), b.as_u32x4())) }
 }
 
@@ -31224,7 +32688,8 @@ pub fn _mm_cmpneq_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
-pub fn _mm_mask_cmpneq_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_cmpneq_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
     _mm_mask_cmp_epu32_mask::<_MM_CMPINT_NE>(k1, a, b)
 }
 
@@ -31236,7 +32701,11 @@ pub fn _mm_mask_cmpneq_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mma
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
-pub fn _mm512_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m512i, b: __m512i) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(
+    a: __m512i,
+    b: __m512i,
+) -> __mmask16 {
     unsafe {
         static_assert_uimm_bits!(IMM3, 3);
         let a = a.as_u32x16();
@@ -31263,7 +32732,8 @@ pub fn _mm512_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m512i, b: __m512i
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
-pub fn _mm512_mask_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(
     k1: __mmask16,
     a: __m512i,
     b: __m512i,
@@ -31295,7 +32765,11 @@ pub fn _mm512_mask_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
-pub fn _mm256_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m256i, b: __m256i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(
+    a: __m256i,
+    b: __m256i,
+) -> __mmask8 {
     unsafe {
         static_assert_uimm_bits!(IMM3, 3);
         let a = a.as_u32x8();
@@ -31322,7 +32796,8 @@ pub fn _mm256_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m256i, b: __m256i
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
-pub fn _mm256_mask_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(
     k1: __mmask8,
     a: __m256i,
     b: __m256i,
@@ -31354,7 +32829,8 @@ pub fn _mm256_mask_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
-pub fn _mm_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m128i, b: __m128i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m128i, b: __m128i) -> __mmask8 {
     unsafe {
         static_assert_uimm_bits!(IMM3, 3);
         let a = a.as_u32x4();
@@ -31381,7 +32857,8 @@ pub fn _mm_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m128i, b: __m128i) -
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
-pub fn _mm_mask_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(
     k1: __mmask8,
     a: __m128i,
     b: __m128i,
@@ -31412,7 +32889,8 @@ pub fn _mm_mask_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
-pub fn _mm512_cmplt_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_cmplt_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
     unsafe { simd_bitmask::<i32x16, _>(simd_lt(a.as_i32x16(), b.as_i32x16())) }
 }
 
@@ -31423,7 +32901,8 @@ pub fn _mm512_cmplt_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
-pub fn _mm512_mask_cmplt_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_cmplt_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
     _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(k1, a, b)
 }
 
@@ -31434,7 +32913,8 @@ pub fn _mm512_mask_cmplt_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
-pub fn _mm256_cmplt_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_cmplt_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
     unsafe { simd_bitmask::<i32x8, _>(simd_lt(a.as_i32x8(), b.as_i32x8())) }
 }
 
@@ -31445,7 +32925,8 @@ pub fn _mm256_cmplt_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
-pub fn _mm256_mask_cmplt_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_cmplt_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
     _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(k1, a, b)
 }
 
@@ -31456,7 +32937,8 @@ pub fn _mm256_mask_cmplt_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __m
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
-pub fn _mm_cmplt_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cmplt_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
     unsafe { simd_bitmask::<i32x4, _>(simd_lt(a.as_i32x4(), b.as_i32x4())) }
 }
 
@@ -31467,7 +32949,8 @@ pub fn _mm_cmplt_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
-pub fn _mm_mask_cmplt_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_cmplt_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
     _mm_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(k1, a, b)
 }
 
@@ -31478,7 +32961,8 @@ pub fn _mm_mask_cmplt_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmas
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
-pub fn _mm512_cmpgt_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_cmpgt_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
     unsafe { simd_bitmask::<i32x16, _>(simd_gt(a.as_i32x16(), b.as_i32x16())) }
 }
 
@@ -31489,7 +32973,8 @@ pub fn _mm512_cmpgt_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
-pub fn _mm512_mask_cmpgt_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_cmpgt_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
     _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_NLE>(k1, a, b)
 }
 
@@ -31500,7 +32985,8 @@ pub fn _mm512_mask_cmpgt_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
-pub fn _mm256_cmpgt_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_cmpgt_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
     unsafe { simd_bitmask::<i32x8, _>(simd_gt(a.as_i32x8(), b.as_i32x8())) }
 }
 
@@ -31511,7 +32997,8 @@ pub fn _mm256_cmpgt_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
-pub fn _mm256_mask_cmpgt_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_cmpgt_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
     _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_NLE>(k1, a, b)
 }
 
@@ -31522,7 +33009,8 @@ pub fn _mm256_mask_cmpgt_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __m
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
-pub fn _mm_cmpgt_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cmpgt_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
     unsafe { simd_bitmask::<i32x4, _>(simd_gt(a.as_i32x4(), b.as_i32x4())) }
 }
 
@@ -31533,7 +33021,8 @@ pub fn _mm_cmpgt_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
-pub fn _mm_mask_cmpgt_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_cmpgt_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
     _mm_mask_cmp_epi32_mask::<_MM_CMPINT_NLE>(k1, a, b)
 }
 
@@ -31544,7 +33033,8 @@ pub fn _mm_mask_cmpgt_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmas
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
-pub fn _mm512_cmple_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_cmple_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
     unsafe { simd_bitmask::<i32x16, _>(simd_le(a.as_i32x16(), b.as_i32x16())) }
 }
 
@@ -31555,7 +33045,8 @@ pub fn _mm512_cmple_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
-pub fn _mm512_mask_cmple_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_cmple_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
     _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_LE>(k1, a, b)
 }
 
@@ -31566,7 +33057,8 @@ pub fn _mm512_mask_cmple_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
-pub fn _mm256_cmple_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_cmple_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
     unsafe { simd_bitmask::<i32x8, _>(simd_le(a.as_i32x8(), b.as_i32x8())) }
 }
 
@@ -31577,7 +33069,8 @@ pub fn _mm256_cmple_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
-pub fn _mm256_mask_cmple_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_cmple_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
     _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_LE>(k1, a, b)
 }
 
@@ -31588,7 +33081,8 @@ pub fn _mm256_mask_cmple_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __m
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
-pub fn _mm_cmple_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cmple_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
     unsafe { simd_bitmask::<i32x4, _>(simd_le(a.as_i32x4(), b.as_i32x4())) }
 }
 
@@ -31599,7 +33093,8 @@ pub fn _mm_cmple_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
-pub fn _mm_mask_cmple_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_cmple_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
     _mm_mask_cmp_epi32_mask::<_MM_CMPINT_LE>(k1, a, b)
 }
 
@@ -31610,7 +33105,8 @@ pub fn _mm_mask_cmple_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmas
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
-pub fn _mm512_cmpge_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_cmpge_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
     unsafe { simd_bitmask::<i32x16, _>(simd_ge(a.as_i32x16(), b.as_i32x16())) }
 }
 
@@ -31621,7 +33117,8 @@ pub fn _mm512_cmpge_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
-pub fn _mm512_mask_cmpge_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_cmpge_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
     _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_NLT>(k1, a, b)
 }
 
@@ -31632,7 +33129,8 @@ pub fn _mm512_mask_cmpge_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
-pub fn _mm256_cmpge_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_cmpge_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
     unsafe { simd_bitmask::<i32x8, _>(simd_ge(a.as_i32x8(), b.as_i32x8())) }
 }
 
@@ -31643,7 +33141,8 @@ pub fn _mm256_cmpge_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
-pub fn _mm256_mask_cmpge_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_cmpge_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
     _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_NLT>(k1, a, b)
 }
 
@@ -31654,7 +33153,8 @@ pub fn _mm256_mask_cmpge_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __m
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
-pub fn _mm_cmpge_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cmpge_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
     unsafe { simd_bitmask::<i32x4, _>(simd_ge(a.as_i32x4(), b.as_i32x4())) }
 }
 
@@ -31665,7 +33165,8 @@ pub fn _mm_cmpge_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
-pub fn _mm_mask_cmpge_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_cmpge_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
     _mm_mask_cmp_epi32_mask::<_MM_CMPINT_NLT>(k1, a, b)
 }
 
@@ -31676,7 +33177,8 @@ pub fn _mm_mask_cmpge_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmas
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
-pub fn _mm512_cmpeq_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_cmpeq_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
     unsafe { simd_bitmask::<i32x16, _>(simd_eq(a.as_i32x16(), b.as_i32x16())) }
 }
 
@@ -31687,7 +33189,8 @@ pub fn _mm512_cmpeq_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
-pub fn _mm512_mask_cmpeq_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_cmpeq_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
     _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_EQ>(k1, a, b)
 }
 
@@ -31698,7 +33201,8 @@ pub fn _mm512_mask_cmpeq_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
-pub fn _mm256_cmpeq_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_cmpeq_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
     unsafe { simd_bitmask::<i32x8, _>(simd_eq(a.as_i32x8(), b.as_i32x8())) }
 }
 
@@ -31709,7 +33213,8 @@ pub fn _mm256_cmpeq_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
-pub fn _mm256_mask_cmpeq_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_cmpeq_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
     _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_EQ>(k1, a, b)
 }
 
@@ -31720,7 +33225,8 @@ pub fn _mm256_mask_cmpeq_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __m
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
-pub fn _mm_cmpeq_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cmpeq_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
     unsafe { simd_bitmask::<i32x4, _>(simd_eq(a.as_i32x4(), b.as_i32x4())) }
 }
 
@@ -31731,7 +33237,8 @@ pub fn _mm_cmpeq_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
-pub fn _mm_mask_cmpeq_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_cmpeq_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
     _mm_mask_cmp_epi32_mask::<_MM_CMPINT_EQ>(k1, a, b)
 }
 
@@ -31742,7 +33249,8 @@ pub fn _mm_mask_cmpeq_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmas
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
-pub fn _mm512_cmpneq_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_cmpneq_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
     unsafe { simd_bitmask::<i32x16, _>(simd_ne(a.as_i32x16(), b.as_i32x16())) }
 }
 
@@ -31753,7 +33261,8 @@ pub fn _mm512_cmpneq_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
-pub fn _mm512_mask_cmpneq_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_cmpneq_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
     _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_NE>(k1, a, b)
 }
 
@@ -31764,7 +33273,8 @@ pub fn _mm512_mask_cmpneq_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> _
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
-pub fn _mm256_cmpneq_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_cmpneq_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
     unsafe { simd_bitmask::<i32x8, _>(simd_ne(a.as_i32x8(), b.as_i32x8())) }
 }
 
@@ -31775,7 +33285,8 @@ pub fn _mm256_cmpneq_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
-pub fn _mm256_mask_cmpneq_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_cmpneq_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
     _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_NE>(k1, a, b)
 }
 
@@ -31786,7 +33297,8 @@ pub fn _mm256_mask_cmpneq_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
-pub fn _mm_cmpneq_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cmpneq_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
     unsafe { simd_bitmask::<i32x4, _>(simd_ne(a.as_i32x4(), b.as_i32x4())) }
 }
 
@@ -31797,7 +33309,8 @@ pub fn _mm_cmpneq_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
-pub fn _mm_mask_cmpneq_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_cmpneq_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
     _mm_mask_cmp_epi32_mask::<_MM_CMPINT_NE>(k1, a, b)
 }
 
@@ -31809,7 +33322,11 @@ pub fn _mm_mask_cmpneq_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mma
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
-pub fn _mm512_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m512i, b: __m512i) -> __mmask16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(
+    a: __m512i,
+    b: __m512i,
+) -> __mmask16 {
     unsafe {
         static_assert_uimm_bits!(IMM3, 3);
         let a = a.as_i32x16();
@@ -31836,7 +33353,8 @@ pub fn _mm512_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m512i, b: __m512i
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
-pub fn _mm512_mask_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(
     k1: __mmask16,
     a: __m512i,
     b: __m512i,
@@ -31868,7 +33386,11 @@ pub fn _mm512_mask_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
-pub fn _mm256_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m256i, b: __m256i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(
+    a: __m256i,
+    b: __m256i,
+) -> __mmask8 {
     unsafe {
         static_assert_uimm_bits!(IMM3, 3);
         let a = a.as_i32x8();
@@ -31895,7 +33417,8 @@ pub fn _mm256_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m256i, b: __m256i
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
-pub fn _mm256_mask_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(
     k1: __mmask8,
     a: __m256i,
     b: __m256i,
@@ -31927,7 +33450,8 @@ pub fn _mm256_mask_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
-pub fn _mm_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m128i, b: __m128i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m128i, b: __m128i) -> __mmask8 {
     unsafe {
         static_assert_uimm_bits!(IMM3, 3);
         let a = a.as_i32x4();
@@ -31954,7 +33478,8 @@ pub fn _mm_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m128i, b: __m128i) -
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
-pub fn _mm_mask_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(
     k1: __mmask8,
     a: __m128i,
     b: __m128i,
@@ -31985,7 +33510,8 @@ pub fn _mm_mask_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
-pub fn _mm512_cmplt_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_cmplt_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
     unsafe { simd_bitmask::<__m512i, _>(simd_lt(a.as_u64x8(), b.as_u64x8())) }
 }
 
@@ -31996,7 +33522,8 @@ pub fn _mm512_cmplt_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
-pub fn _mm512_mask_cmplt_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_cmplt_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
     _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_LT>(k1, a, b)
 }
 
@@ -32007,7 +33534,8 @@ pub fn _mm512_mask_cmplt_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __m
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
-pub fn _mm256_cmplt_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_cmplt_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
     unsafe { simd_bitmask::<__m256i, _>(simd_lt(a.as_u64x4(), b.as_u64x4())) }
 }
 
@@ -32018,7 +33546,8 @@ pub fn _mm256_cmplt_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
-pub fn _mm256_mask_cmplt_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_cmplt_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
     _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_LT>(k1, a, b)
 }
 
@@ -32029,7 +33558,8 @@ pub fn _mm256_mask_cmplt_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __m
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
-pub fn _mm_cmplt_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cmplt_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
     unsafe { simd_bitmask::<__m128i, _>(simd_lt(a.as_u64x2(), b.as_u64x2())) }
 }
 
@@ -32040,7 +33570,8 @@ pub fn _mm_cmplt_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
-pub fn _mm_mask_cmplt_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_cmplt_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
     _mm_mask_cmp_epu64_mask::<_MM_CMPINT_LT>(k1, a, b)
 }
 
@@ -32051,7 +33582,8 @@ pub fn _mm_mask_cmplt_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmas
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
-pub fn _mm512_cmpgt_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_cmpgt_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
     unsafe { simd_bitmask::<__m512i, _>(simd_gt(a.as_u64x8(), b.as_u64x8())) }
 }
 
@@ -32062,7 +33594,8 @@ pub fn _mm512_cmpgt_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
-pub fn _mm512_mask_cmpgt_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_cmpgt_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
     _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_NLE>(k1, a, b)
 }
 
@@ -32073,7 +33606,8 @@ pub fn _mm512_mask_cmpgt_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __m
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
-pub fn _mm256_cmpgt_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_cmpgt_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
     unsafe { simd_bitmask::<__m256i, _>(simd_gt(a.as_u64x4(), b.as_u64x4())) }
 }
 
@@ -32084,7 +33618,8 @@ pub fn _mm256_cmpgt_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
-pub fn _mm256_mask_cmpgt_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_cmpgt_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
     _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_NLE>(k1, a, b)
 }
 
@@ -32095,7 +33630,8 @@ pub fn _mm256_mask_cmpgt_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __m
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
-pub fn _mm_cmpgt_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cmpgt_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
     unsafe { simd_bitmask::<__m128i, _>(simd_gt(a.as_u64x2(), b.as_u64x2())) }
 }
 
@@ -32106,7 +33642,8 @@ pub fn _mm_cmpgt_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
-pub fn _mm_mask_cmpgt_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_cmpgt_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
     _mm_mask_cmp_epu64_mask::<_MM_CMPINT_NLE>(k1, a, b)
 }
 
@@ -32117,7 +33654,8 @@ pub fn _mm_mask_cmpgt_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmas
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
-pub fn _mm512_cmple_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_cmple_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
     unsafe { simd_bitmask::<__m512i, _>(simd_le(a.as_u64x8(), b.as_u64x8())) }
 }
 
@@ -32128,7 +33666,8 @@ pub fn _mm512_cmple_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
-pub fn _mm512_mask_cmple_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_cmple_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
     _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_LE>(k1, a, b)
 }
 
@@ -32139,7 +33678,8 @@ pub fn _mm512_mask_cmple_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __m
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
-pub fn _mm256_cmple_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_cmple_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
     unsafe { simd_bitmask::<__m256i, _>(simd_le(a.as_u64x4(), b.as_u64x4())) }
 }
 
@@ -32150,7 +33690,8 @@ pub fn _mm256_cmple_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
-pub fn _mm256_mask_cmple_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_cmple_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
     _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_LE>(k1, a, b)
 }
 
@@ -32161,7 +33702,8 @@ pub fn _mm256_mask_cmple_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __m
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
-pub fn _mm_cmple_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cmple_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
     unsafe { simd_bitmask::<__m128i, _>(simd_le(a.as_u64x2(), b.as_u64x2())) }
 }
 
@@ -32172,7 +33714,8 @@ pub fn _mm_cmple_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
-pub fn _mm_mask_cmple_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_cmple_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
     _mm_mask_cmp_epu64_mask::<_MM_CMPINT_LE>(k1, a, b)
 }
 
@@ -32183,7 +33726,8 @@ pub fn _mm_mask_cmple_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmas
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
-pub fn _mm512_cmpge_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_cmpge_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
     unsafe { simd_bitmask::<__m512i, _>(simd_ge(a.as_u64x8(), b.as_u64x8())) }
 }
 
@@ -32194,7 +33738,8 @@ pub fn _mm512_cmpge_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
-pub fn _mm512_mask_cmpge_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_cmpge_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
     _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_NLT>(k1, a, b)
 }
 
@@ -32205,7 +33750,8 @@ pub fn _mm512_mask_cmpge_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __m
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
-pub fn _mm256_cmpge_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_cmpge_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
     unsafe { simd_bitmask::<__m256i, _>(simd_ge(a.as_u64x4(), b.as_u64x4())) }
 }
 
@@ -32216,7 +33762,8 @@ pub fn _mm256_cmpge_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
-pub fn _mm256_mask_cmpge_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_cmpge_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
     _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_NLT>(k1, a, b)
 }
 
@@ -32227,7 +33774,8 @@ pub fn _mm256_mask_cmpge_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __m
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
-pub fn _mm_cmpge_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cmpge_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
     unsafe { simd_bitmask::<__m128i, _>(simd_ge(a.as_u64x2(), b.as_u64x2())) }
 }
 
@@ -32238,7 +33786,8 @@ pub fn _mm_cmpge_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
-pub fn _mm_mask_cmpge_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_cmpge_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
     _mm_mask_cmp_epu64_mask::<_MM_CMPINT_NLT>(k1, a, b)
 }
 
@@ -32249,7 +33798,8 @@ pub fn _mm_mask_cmpge_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmas
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
-pub fn _mm512_cmpeq_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_cmpeq_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
     unsafe { simd_bitmask::<__m512i, _>(simd_eq(a.as_u64x8(), b.as_u64x8())) }
 }
 
@@ -32260,7 +33810,8 @@ pub fn _mm512_cmpeq_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
-pub fn _mm512_mask_cmpeq_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_cmpeq_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
     _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_EQ>(k1, a, b)
 }
 
@@ -32271,7 +33822,8 @@ pub fn _mm512_mask_cmpeq_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __m
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
-pub fn _mm256_cmpeq_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_cmpeq_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
     unsafe { simd_bitmask::<__m256i, _>(simd_eq(a.as_u64x4(), b.as_u64x4())) }
 }
 
@@ -32282,7 +33834,8 @@ pub fn _mm256_cmpeq_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
-pub fn _mm256_mask_cmpeq_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_cmpeq_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
     _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_EQ>(k1, a, b)
 }
 
@@ -32293,7 +33846,8 @@ pub fn _mm256_mask_cmpeq_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __m
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
-pub fn _mm_cmpeq_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cmpeq_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
     unsafe { simd_bitmask::<__m128i, _>(simd_eq(a.as_u64x2(), b.as_u64x2())) }
 }
 
@@ -32304,7 +33858,8 @@ pub fn _mm_cmpeq_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
-pub fn _mm_mask_cmpeq_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_cmpeq_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
     _mm_mask_cmp_epu64_mask::<_MM_CMPINT_EQ>(k1, a, b)
 }
 
@@ -32315,7 +33870,8 @@ pub fn _mm_mask_cmpeq_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmas
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
-pub fn _mm512_cmpneq_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_cmpneq_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
     unsafe { simd_bitmask::<__m512i, _>(simd_ne(a.as_u64x8(), b.as_u64x8())) }
 }
 
@@ -32326,7 +33882,8 @@ pub fn _mm512_cmpneq_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
-pub fn _mm512_mask_cmpneq_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_cmpneq_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
     _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_NE>(k1, a, b)
 }
 
@@ -32337,7 +33894,8 @@ pub fn _mm512_mask_cmpneq_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
-pub fn _mm256_cmpneq_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_cmpneq_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
     unsafe { simd_bitmask::<__m256i, _>(simd_ne(a.as_u64x4(), b.as_u64x4())) }
 }
 
@@ -32348,7 +33906,8 @@ pub fn _mm256_cmpneq_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
-pub fn _mm256_mask_cmpneq_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_cmpneq_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
     _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_NE>(k1, a, b)
 }
 
@@ -32359,7 +33918,8 @@ pub fn _mm256_mask_cmpneq_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
-pub fn _mm_cmpneq_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cmpneq_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
     unsafe { simd_bitmask::<__m128i, _>(simd_ne(a.as_u64x2(), b.as_u64x2())) }
 }
 
@@ -32370,7 +33930,8 @@ pub fn _mm_cmpneq_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
-pub fn _mm_mask_cmpneq_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_cmpneq_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
     _mm_mask_cmp_epu64_mask::<_MM_CMPINT_NE>(k1, a, b)
 }
 
@@ -32382,7 +33943,11 @@ pub fn _mm_mask_cmpneq_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mma
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
-pub fn _mm512_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m512i, b: __m512i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(
+    a: __m512i,
+    b: __m512i,
+) -> __mmask8 {
     unsafe {
         static_assert_uimm_bits!(IMM3, 3);
         let a = a.as_u64x8();
@@ -32409,7 +33974,8 @@ pub fn _mm512_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m512i, b: __m512i
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
-pub fn _mm512_mask_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(
     k1: __mmask8,
     a: __m512i,
     b: __m512i,
@@ -32441,7 +34007,11 @@ pub fn _mm512_mask_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
-pub fn _mm256_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m256i, b: __m256i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(
+    a: __m256i,
+    b: __m256i,
+) -> __mmask8 {
     unsafe {
         static_assert_uimm_bits!(IMM3, 3);
         let a = a.as_u64x4();
@@ -32468,7 +34038,8 @@ pub fn _mm256_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m256i, b: __m256i
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
-pub fn _mm256_mask_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(
     k1: __mmask8,
     a: __m256i,
     b: __m256i,
@@ -32500,7 +34071,8 @@ pub fn _mm256_mask_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
-pub fn _mm_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m128i, b: __m128i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m128i, b: __m128i) -> __mmask8 {
     unsafe {
         static_assert_uimm_bits!(IMM3, 3);
         let a = a.as_u64x2();
@@ -32527,7 +34099,8 @@ pub fn _mm_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m128i, b: __m128i) -
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
-pub fn _mm_mask_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(
     k1: __mmask8,
     a: __m128i,
     b: __m128i,
@@ -32558,7 +34131,8 @@ pub fn _mm_mask_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
-pub fn _mm512_cmplt_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_cmplt_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
     unsafe { simd_bitmask::<__m512i, _>(simd_lt(a.as_i64x8(), b.as_i64x8())) }
 }
 
@@ -32569,7 +34143,8 @@ pub fn _mm512_cmplt_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
-pub fn _mm512_mask_cmplt_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_cmplt_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
     _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_LT>(k1, a, b)
 }
 
@@ -32580,7 +34155,8 @@ pub fn _mm512_mask_cmplt_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __m
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
-pub fn _mm256_cmplt_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_cmplt_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
     unsafe { simd_bitmask::<__m256i, _>(simd_lt(a.as_i64x4(), b.as_i64x4())) }
 }
 
@@ -32591,7 +34167,8 @@ pub fn _mm256_cmplt_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
-pub fn _mm256_mask_cmplt_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_cmplt_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
     _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_LT>(k1, a, b)
 }
 
@@ -32602,7 +34179,8 @@ pub fn _mm256_mask_cmplt_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __m
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
-pub fn _mm_cmplt_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cmplt_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
     unsafe { simd_bitmask::<__m128i, _>(simd_lt(a.as_i64x2(), b.as_i64x2())) }
 }
 
@@ -32613,7 +34191,8 @@ pub fn _mm_cmplt_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
-pub fn _mm_mask_cmplt_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_cmplt_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
     _mm_mask_cmp_epi64_mask::<_MM_CMPINT_LT>(k1, a, b)
 }
 
@@ -32624,7 +34203,8 @@ pub fn _mm_mask_cmplt_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmas
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
-pub fn _mm512_cmpgt_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_cmpgt_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
     unsafe { simd_bitmask::<__m512i, _>(simd_gt(a.as_i64x8(), b.as_i64x8())) }
 }
 
@@ -32635,7 +34215,8 @@ pub fn _mm512_cmpgt_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
-pub fn _mm512_mask_cmpgt_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_cmpgt_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
     _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_NLE>(k1, a, b)
 }
 
@@ -32646,7 +34227,8 @@ pub fn _mm512_mask_cmpgt_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __m
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
-pub fn _mm256_cmpgt_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_cmpgt_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
     unsafe { simd_bitmask::<__m256i, _>(simd_gt(a.as_i64x4(), b.as_i64x4())) }
 }
 
@@ -32657,7 +34239,8 @@ pub fn _mm256_cmpgt_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
-pub fn _mm256_mask_cmpgt_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_cmpgt_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
     _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_NLE>(k1, a, b)
 }
 
@@ -32668,7 +34251,8 @@ pub fn _mm256_mask_cmpgt_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __m
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
-pub fn _mm_cmpgt_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cmpgt_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
     unsafe { simd_bitmask::<__m128i, _>(simd_gt(a.as_i64x2(), b.as_i64x2())) }
 }
 
@@ -32679,7 +34263,8 @@ pub fn _mm_cmpgt_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
-pub fn _mm_mask_cmpgt_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_cmpgt_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
     _mm_mask_cmp_epi64_mask::<_MM_CMPINT_NLE>(k1, a, b)
 }
 
@@ -32690,7 +34275,8 @@ pub fn _mm_mask_cmpgt_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmas
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
-pub fn _mm512_cmple_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_cmple_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
     unsafe { simd_bitmask::<__m512i, _>(simd_le(a.as_i64x8(), b.as_i64x8())) }
 }
 
@@ -32701,7 +34287,8 @@ pub fn _mm512_cmple_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
-pub fn _mm512_mask_cmple_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_cmple_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
     _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_LE>(k1, a, b)
 }
 
@@ -32712,7 +34299,8 @@ pub fn _mm512_mask_cmple_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __m
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
-pub fn _mm256_cmple_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_cmple_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
     unsafe { simd_bitmask::<__m256i, _>(simd_le(a.as_i64x4(), b.as_i64x4())) }
 }
 
@@ -32723,7 +34311,8 @@ pub fn _mm256_cmple_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
-pub fn _mm256_mask_cmple_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_cmple_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
     _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_LE>(k1, a, b)
 }
 
@@ -32734,7 +34323,8 @@ pub fn _mm256_mask_cmple_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __m
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
-pub fn _mm_cmple_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cmple_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
     unsafe { simd_bitmask::<__m128i, _>(simd_le(a.as_i64x2(), b.as_i64x2())) }
 }
 
@@ -32745,7 +34335,8 @@ pub fn _mm_cmple_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
-pub fn _mm_mask_cmple_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_cmple_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
     _mm_mask_cmp_epi64_mask::<_MM_CMPINT_LE>(k1, a, b)
 }
 
@@ -32756,7 +34347,8 @@ pub fn _mm_mask_cmple_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmas
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
-pub fn _mm512_cmpge_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_cmpge_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
     unsafe { simd_bitmask::<__m512i, _>(simd_ge(a.as_i64x8(), b.as_i64x8())) }
 }
 
@@ -32767,7 +34359,8 @@ pub fn _mm512_cmpge_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
-pub fn _mm512_mask_cmpge_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_cmpge_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
     _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_NLT>(k1, a, b)
 }
 
@@ -32778,7 +34371,8 @@ pub fn _mm512_mask_cmpge_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __m
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
-pub fn _mm256_cmpge_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_cmpge_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
     unsafe { simd_bitmask::<__m256i, _>(simd_ge(a.as_i64x4(), b.as_i64x4())) }
 }
 
@@ -32789,7 +34383,8 @@ pub fn _mm256_cmpge_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
-pub fn _mm256_mask_cmpge_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_cmpge_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
     _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_NLT>(k1, a, b)
 }
 
@@ -32800,7 +34395,8 @@ pub fn _mm256_mask_cmpge_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __m
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
-pub fn _mm_cmpge_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cmpge_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
     unsafe { simd_bitmask::<__m128i, _>(simd_ge(a.as_i64x2(), b.as_i64x2())) }
 }
 
@@ -32811,7 +34407,8 @@ pub fn _mm_cmpge_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
-pub fn _mm_mask_cmpge_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_cmpge_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
     _mm_mask_cmp_epi64_mask::<_MM_CMPINT_NLT>(k1, a, b)
 }
 
@@ -32822,7 +34419,8 @@ pub fn _mm_mask_cmpge_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmas
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
-pub fn _mm512_cmpeq_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_cmpeq_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
     unsafe { simd_bitmask::<__m512i, _>(simd_eq(a.as_i64x8(), b.as_i64x8())) }
 }
 
@@ -32833,7 +34431,8 @@ pub fn _mm512_cmpeq_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
-pub fn _mm512_mask_cmpeq_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_cmpeq_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
     _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_EQ>(k1, a, b)
 }
 
@@ -32844,7 +34443,8 @@ pub fn _mm512_mask_cmpeq_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __m
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
-pub fn _mm256_cmpeq_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_cmpeq_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
     unsafe { simd_bitmask::<__m256i, _>(simd_eq(a.as_i64x4(), b.as_i64x4())) }
 }
 
@@ -32855,7 +34455,8 @@ pub fn _mm256_cmpeq_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
-pub fn _mm256_mask_cmpeq_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_cmpeq_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
     _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_EQ>(k1, a, b)
 }
 
@@ -32866,7 +34467,8 @@ pub fn _mm256_mask_cmpeq_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __m
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
-pub fn _mm_cmpeq_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cmpeq_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
     unsafe { simd_bitmask::<__m128i, _>(simd_eq(a.as_i64x2(), b.as_i64x2())) }
 }
 
@@ -32877,7 +34479,8 @@ pub fn _mm_cmpeq_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
-pub fn _mm_mask_cmpeq_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_cmpeq_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
     _mm_mask_cmp_epi64_mask::<_MM_CMPINT_EQ>(k1, a, b)
 }
 
@@ -32888,7 +34491,8 @@ pub fn _mm_mask_cmpeq_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmas
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
-pub fn _mm512_cmpneq_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_cmpneq_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
     unsafe { simd_bitmask::<__m512i, _>(simd_ne(a.as_i64x8(), b.as_i64x8())) }
 }
 
@@ -32899,7 +34503,8 @@ pub fn _mm512_cmpneq_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
-pub fn _mm512_mask_cmpneq_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_cmpneq_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
     _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_NE>(k1, a, b)
 }
 
@@ -32910,7 +34515,8 @@ pub fn _mm512_mask_cmpneq_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
-pub fn _mm256_cmpneq_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_cmpneq_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
     unsafe { simd_bitmask::<__m256i, _>(simd_ne(a.as_i64x4(), b.as_i64x4())) }
 }
 
@@ -32921,7 +34527,8 @@ pub fn _mm256_cmpneq_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
-pub fn _mm256_mask_cmpneq_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_cmpneq_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
     _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_NE>(k1, a, b)
 }
 
@@ -32932,7 +34539,8 @@ pub fn _mm256_mask_cmpneq_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
-pub fn _mm_cmpneq_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cmpneq_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
     unsafe { simd_bitmask::<__m128i, _>(simd_ne(a.as_i64x2(), b.as_i64x2())) }
 }
 
@@ -32943,7 +34551,8 @@ pub fn _mm_cmpneq_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
-pub fn _mm_mask_cmpneq_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_cmpneq_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
     _mm_mask_cmp_epi64_mask::<_MM_CMPINT_NE>(k1, a, b)
 }
 
@@ -32955,7 +34564,11 @@ pub fn _mm_mask_cmpneq_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mma
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
-pub fn _mm512_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m512i, b: __m512i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(
+    a: __m512i,
+    b: __m512i,
+) -> __mmask8 {
     unsafe {
         static_assert_uimm_bits!(IMM3, 3);
         let a = a.as_i64x8();
@@ -32982,7 +34595,8 @@ pub fn _mm512_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m512i, b: __m512i
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
-pub fn _mm512_mask_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(
     k1: __mmask8,
     a: __m512i,
     b: __m512i,
@@ -33014,7 +34628,11 @@ pub fn _mm512_mask_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
-pub fn _mm256_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m256i, b: __m256i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(
+    a: __m256i,
+    b: __m256i,
+) -> __mmask8 {
     unsafe {
         static_assert_uimm_bits!(IMM3, 3);
         let a = a.as_i64x4();
@@ -33041,7 +34659,8 @@ pub fn _mm256_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m256i, b: __m256i
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
-pub fn _mm256_mask_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(
     k1: __mmask8,
     a: __m256i,
     b: __m256i,
@@ -33073,7 +34692,8 @@ pub fn _mm256_mask_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[rustc_legacy_const_generics(2)]
 #[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
-pub fn _mm_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m128i, b: __m128i) -> __mmask8 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m128i, b: __m128i) -> __mmask8 {
     unsafe {
         static_assert_uimm_bits!(IMM3, 3);
         let a = a.as_i64x2();
@@ -33100,7 +34720,8 @@ pub fn _mm_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m128i, b: __m128i) -
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[rustc_legacy_const_generics(3)]
 #[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
-pub fn _mm_mask_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(
     k1: __mmask8,
     a: __m128i,
     b: __m128i,
@@ -33130,8 +34751,9 @@ pub fn _mm_mask_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_reduce_add_epi32(a: __m512i) -> i32 {
-    unsafe { simd_reduce_add_unordered(a.as_i32x16()) }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_reduce_add_epi32(a: __m512i) -> i32 {
+    unsafe { simd_reduce_add_ordered(a.as_i32x16(), 0) }
 }
 
 /// Reduce the packed 32-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
@@ -33140,8 +34762,9 @@ pub fn _mm512_reduce_add_epi32(a: __m512i) -> i32 {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_mask_reduce_add_epi32(k: __mmask16, a: __m512i) -> i32 {
-    unsafe { simd_reduce_add_unordered(simd_select_bitmask(k, a.as_i32x16(), i32x16::ZERO)) }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_reduce_add_epi32(k: __mmask16, a: __m512i) -> i32 {
+    unsafe { simd_reduce_add_ordered(simd_select_bitmask(k, a.as_i32x16(), i32x16::ZERO), 0) }
 }
 
 /// Reduce the packed 64-bit integers in a by addition. Returns the sum of all elements in a.
@@ -33150,8 +34773,9 @@ pub fn _mm512_mask_reduce_add_epi32(k: __mmask16, a: __m512i) -> i32 {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_reduce_add_epi64(a: __m512i) -> i64 {
-    unsafe { simd_reduce_add_unordered(a.as_i64x8()) }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_reduce_add_epi64(a: __m512i) -> i64 {
+    unsafe { simd_reduce_add_ordered(a.as_i64x8(), 0) }
 }
 
 /// Reduce the packed 64-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
@@ -33160,8 +34784,9 @@ pub fn _mm512_reduce_add_epi64(a: __m512i) -> i64 {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_mask_reduce_add_epi64(k: __mmask8, a: __m512i) -> i64 {
-    unsafe { simd_reduce_add_unordered(simd_select_bitmask(k, a.as_i64x8(), i64x8::ZERO)) }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_reduce_add_epi64(k: __mmask8, a: __m512i) -> i64 {
+    unsafe { simd_reduce_add_ordered(simd_select_bitmask(k, a.as_i64x8(), i64x8::ZERO), 0) }
 }
 
 /// Reduce the packed single-precision (32-bit) floating-point elements in a by addition. Returns the sum of all elements in a.
@@ -33170,7 +34795,8 @@ pub fn _mm512_mask_reduce_add_epi64(k: __mmask8, a: __m512i) -> i64 {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_reduce_add_ps(a: __m512) -> f32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_reduce_add_ps(a: __m512) -> f32 {
     unsafe {
         // we have to use `simd_shuffle` here because `_mm512_extractf32x8_ps` is in AVX512DQ
         let a = _mm256_add_ps(
@@ -33189,7 +34815,8 @@ pub fn _mm512_reduce_add_ps(a: __m512) -> f32 {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_mask_reduce_add_ps(k: __mmask16, a: __m512) -> f32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_reduce_add_ps(k: __mmask16, a: __m512) -> f32 {
     unsafe { _mm512_reduce_add_ps(simd_select_bitmask(k, a, _mm512_setzero_ps())) }
 }
 
@@ -33199,7 +34826,8 @@ pub fn _mm512_mask_reduce_add_ps(k: __mmask16, a: __m512) -> f32 {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_reduce_add_pd(a: __m512d) -> f64 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_reduce_add_pd(a: __m512d) -> f64 {
     unsafe {
         let a = _mm256_add_pd(
             _mm512_extractf64x4_pd::<0>(a),
@@ -33216,7 +34844,8 @@ pub fn _mm512_reduce_add_pd(a: __m512d) -> f64 {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_mask_reduce_add_pd(k: __mmask8, a: __m512d) -> f64 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_reduce_add_pd(k: __mmask8, a: __m512d) -> f64 {
     unsafe { _mm512_reduce_add_pd(simd_select_bitmask(k, a, _mm512_setzero_pd())) }
 }
 
@@ -33226,8 +34855,9 @@ pub fn _mm512_mask_reduce_add_pd(k: __mmask8, a: __m512d) -> f64 {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_reduce_mul_epi32(a: __m512i) -> i32 {
-    unsafe { simd_reduce_mul_unordered(a.as_i32x16()) }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_reduce_mul_epi32(a: __m512i) -> i32 {
+    unsafe { simd_reduce_mul_ordered(a.as_i32x16(), 1) }
 }
 
 /// Reduce the packed 32-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
@@ -33236,13 +34866,13 @@ pub fn _mm512_reduce_mul_epi32(a: __m512i) -> i32 {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_mask_reduce_mul_epi32(k: __mmask16, a: __m512i) -> i32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_reduce_mul_epi32(k: __mmask16, a: __m512i) -> i32 {
     unsafe {
-        simd_reduce_mul_unordered(simd_select_bitmask(
-            k,
-            a.as_i32x16(),
-            _mm512_set1_epi32(1).as_i32x16(),
-        ))
+        simd_reduce_mul_ordered(
+            simd_select_bitmask(k, a.as_i32x16(), _mm512_set1_epi32(1).as_i32x16()),
+            1,
+        )
     }
 }
 
@@ -33252,8 +34882,9 @@ pub fn _mm512_mask_reduce_mul_epi32(k: __mmask16, a: __m512i) -> i32 {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_reduce_mul_epi64(a: __m512i) -> i64 {
-    unsafe { simd_reduce_mul_unordered(a.as_i64x8()) }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_reduce_mul_epi64(a: __m512i) -> i64 {
+    unsafe { simd_reduce_mul_ordered(a.as_i64x8(), 1) }
 }
 
 /// Reduce the packed 64-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
@@ -33262,13 +34893,13 @@ pub fn _mm512_reduce_mul_epi64(a: __m512i) -> i64 {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_mask_reduce_mul_epi64(k: __mmask8, a: __m512i) -> i64 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_reduce_mul_epi64(k: __mmask8, a: __m512i) -> i64 {
     unsafe {
-        simd_reduce_mul_unordered(simd_select_bitmask(
-            k,
-            a.as_i64x8(),
-            _mm512_set1_epi64(1).as_i64x8(),
-        ))
+        simd_reduce_mul_ordered(
+            simd_select_bitmask(k, a.as_i64x8(), _mm512_set1_epi64(1).as_i64x8()),
+            1,
+        )
     }
 }
 
@@ -33278,7 +34909,8 @@ pub fn _mm512_mask_reduce_mul_epi64(k: __mmask8, a: __m512i) -> i64 {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_reduce_mul_ps(a: __m512) -> f32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_reduce_mul_ps(a: __m512) -> f32 {
     unsafe {
         // we have to use `simd_shuffle` here because `_mm512_extractf32x8_ps` is in AVX512DQ
         let a = _mm256_mul_ps(
@@ -33297,7 +34929,8 @@ pub fn _mm512_reduce_mul_ps(a: __m512) -> f32 {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_mask_reduce_mul_ps(k: __mmask16, a: __m512) -> f32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_reduce_mul_ps(k: __mmask16, a: __m512) -> f32 {
     unsafe { _mm512_reduce_mul_ps(simd_select_bitmask(k, a, _mm512_set1_ps(1.))) }
 }
 
@@ -33307,7 +34940,8 @@ pub fn _mm512_mask_reduce_mul_ps(k: __mmask16, a: __m512) -> f32 {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_reduce_mul_pd(a: __m512d) -> f64 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_reduce_mul_pd(a: __m512d) -> f64 {
     unsafe {
         let a = _mm256_mul_pd(
             _mm512_extractf64x4_pd::<0>(a),
@@ -33324,7 +34958,8 @@ pub fn _mm512_reduce_mul_pd(a: __m512d) -> f64 {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_mask_reduce_mul_pd(k: __mmask8, a: __m512d) -> f64 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_reduce_mul_pd(k: __mmask8, a: __m512d) -> f64 {
     unsafe { _mm512_reduce_mul_pd(simd_select_bitmask(k, a, _mm512_set1_pd(1.))) }
 }
 
@@ -33334,7 +34969,8 @@ pub fn _mm512_mask_reduce_mul_pd(k: __mmask8, a: __m512d) -> f64 {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_reduce_max_epi32(a: __m512i) -> i32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_reduce_max_epi32(a: __m512i) -> i32 {
     unsafe { simd_reduce_max(a.as_i32x16()) }
 }
 
@@ -33344,7 +34980,8 @@ pub fn _mm512_reduce_max_epi32(a: __m512i) -> i32 {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_mask_reduce_max_epi32(k: __mmask16, a: __m512i) -> i32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_reduce_max_epi32(k: __mmask16, a: __m512i) -> i32 {
     unsafe {
         simd_reduce_max(simd_select_bitmask(
             k,
@@ -33360,7 +34997,8 @@ pub fn _mm512_mask_reduce_max_epi32(k: __mmask16, a: __m512i) -> i32 {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_reduce_max_epi64(a: __m512i) -> i64 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_reduce_max_epi64(a: __m512i) -> i64 {
     unsafe { simd_reduce_max(a.as_i64x8()) }
 }
 
@@ -33370,7 +35008,8 @@ pub fn _mm512_reduce_max_epi64(a: __m512i) -> i64 {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_mask_reduce_max_epi64(k: __mmask8, a: __m512i) -> i64 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_reduce_max_epi64(k: __mmask8, a: __m512i) -> i64 {
     unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_i64x8(), i64x8::splat(i64::MIN))) }
 }
 
@@ -33380,7 +35019,8 @@ pub fn _mm512_mask_reduce_max_epi64(k: __mmask8, a: __m512i) -> i64 {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_reduce_max_epu32(a: __m512i) -> u32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_reduce_max_epu32(a: __m512i) -> u32 {
     unsafe { simd_reduce_max(a.as_u32x16()) }
 }
 
@@ -33390,7 +35030,8 @@ pub fn _mm512_reduce_max_epu32(a: __m512i) -> u32 {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_mask_reduce_max_epu32(k: __mmask16, a: __m512i) -> u32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_reduce_max_epu32(k: __mmask16, a: __m512i) -> u32 {
     unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_u32x16(), u32x16::ZERO)) }
 }
 
@@ -33400,7 +35041,8 @@ pub fn _mm512_mask_reduce_max_epu32(k: __mmask16, a: __m512i) -> u32 {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_reduce_max_epu64(a: __m512i) -> u64 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_reduce_max_epu64(a: __m512i) -> u64 {
     unsafe { simd_reduce_max(a.as_u64x8()) }
 }
 
@@ -33410,7 +35052,8 @@ pub fn _mm512_reduce_max_epu64(a: __m512i) -> u64 {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_mask_reduce_max_epu64(k: __mmask8, a: __m512i) -> u64 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_reduce_max_epu64(k: __mmask8, a: __m512i) -> u64 {
     unsafe { simd_reduce_max(simd_select_bitmask(k, a.as_u64x8(), u64x8::ZERO)) }
 }
 
@@ -33475,7 +35118,8 @@ pub fn _mm512_mask_reduce_max_pd(k: __mmask8, a: __m512d) -> f64 {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_reduce_min_epi32(a: __m512i) -> i32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_reduce_min_epi32(a: __m512i) -> i32 {
     unsafe { simd_reduce_min(a.as_i32x16()) }
 }
 
@@ -33485,7 +35129,8 @@ pub fn _mm512_reduce_min_epi32(a: __m512i) -> i32 {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_mask_reduce_min_epi32(k: __mmask16, a: __m512i) -> i32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_reduce_min_epi32(k: __mmask16, a: __m512i) -> i32 {
     unsafe {
         simd_reduce_min(simd_select_bitmask(
             k,
@@ -33501,7 +35146,8 @@ pub fn _mm512_mask_reduce_min_epi32(k: __mmask16, a: __m512i) -> i32 {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_reduce_min_epi64(a: __m512i) -> i64 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_reduce_min_epi64(a: __m512i) -> i64 {
     unsafe { simd_reduce_min(a.as_i64x8()) }
 }
 
@@ -33511,7 +35157,8 @@ pub fn _mm512_reduce_min_epi64(a: __m512i) -> i64 {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_mask_reduce_min_epi64(k: __mmask8, a: __m512i) -> i64 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_reduce_min_epi64(k: __mmask8, a: __m512i) -> i64 {
     unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_i64x8(), i64x8::splat(i64::MAX))) }
 }
 
@@ -33521,7 +35168,8 @@ pub fn _mm512_mask_reduce_min_epi64(k: __mmask8, a: __m512i) -> i64 {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_reduce_min_epu32(a: __m512i) -> u32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_reduce_min_epu32(a: __m512i) -> u32 {
     unsafe { simd_reduce_min(a.as_u32x16()) }
 }
 
@@ -33531,7 +35179,8 @@ pub fn _mm512_reduce_min_epu32(a: __m512i) -> u32 {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_mask_reduce_min_epu32(k: __mmask16, a: __m512i) -> u32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_reduce_min_epu32(k: __mmask16, a: __m512i) -> u32 {
     unsafe {
         simd_reduce_min(simd_select_bitmask(
             k,
@@ -33547,7 +35196,8 @@ pub fn _mm512_mask_reduce_min_epu32(k: __mmask16, a: __m512i) -> u32 {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_reduce_min_epu64(a: __m512i) -> u64 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_reduce_min_epu64(a: __m512i) -> u64 {
     unsafe { simd_reduce_min(a.as_u64x8()) }
 }
 
@@ -33557,7 +35207,8 @@ pub fn _mm512_reduce_min_epu64(a: __m512i) -> u64 {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_mask_reduce_min_epu64(k: __mmask8, a: __m512i) -> u64 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_reduce_min_epu64(k: __mmask8, a: __m512i) -> u64 {
     unsafe { simd_reduce_min(simd_select_bitmask(k, a.as_u64x8(), u64x8::splat(u64::MAX))) }
 }
 
@@ -33622,7 +35273,8 @@ pub fn _mm512_mask_reduce_min_pd(k: __mmask8, a: __m512d) -> f64 {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_reduce_and_epi32(a: __m512i) -> i32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_reduce_and_epi32(a: __m512i) -> i32 {
     unsafe { simd_reduce_and(a.as_i32x16()) }
 }
 
@@ -33632,7 +35284,8 @@ pub fn _mm512_reduce_and_epi32(a: __m512i) -> i32 {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_mask_reduce_and_epi32(k: __mmask16, a: __m512i) -> i32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_reduce_and_epi32(k: __mmask16, a: __m512i) -> i32 {
     unsafe { simd_reduce_and(simd_select_bitmask(k, a.as_i32x16(), i32x16::splat(-1))) }
 }
 
@@ -33642,7 +35295,8 @@ pub fn _mm512_mask_reduce_and_epi32(k: __mmask16, a: __m512i) -> i32 {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_reduce_and_epi64(a: __m512i) -> i64 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_reduce_and_epi64(a: __m512i) -> i64 {
     unsafe { simd_reduce_and(a.as_i64x8()) }
 }
 
@@ -33652,7 +35306,8 @@ pub fn _mm512_reduce_and_epi64(a: __m512i) -> i64 {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_mask_reduce_and_epi64(k: __mmask8, a: __m512i) -> i64 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_reduce_and_epi64(k: __mmask8, a: __m512i) -> i64 {
     unsafe { simd_reduce_and(simd_select_bitmask(k, a.as_i64x8(), i64x8::splat(-1))) }
 }
 
@@ -33662,7 +35317,8 @@ pub fn _mm512_mask_reduce_and_epi64(k: __mmask8, a: __m512i) -> i64 {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_reduce_or_epi32(a: __m512i) -> i32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_reduce_or_epi32(a: __m512i) -> i32 {
     unsafe { simd_reduce_or(a.as_i32x16()) }
 }
 
@@ -33672,7 +35328,8 @@ pub fn _mm512_reduce_or_epi32(a: __m512i) -> i32 {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_mask_reduce_or_epi32(k: __mmask16, a: __m512i) -> i32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_reduce_or_epi32(k: __mmask16, a: __m512i) -> i32 {
     unsafe { simd_reduce_or(simd_select_bitmask(k, a.as_i32x16(), i32x16::ZERO)) }
 }
 
@@ -33682,7 +35339,8 @@ pub fn _mm512_mask_reduce_or_epi32(k: __mmask16, a: __m512i) -> i32 {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_reduce_or_epi64(a: __m512i) -> i64 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_reduce_or_epi64(a: __m512i) -> i64 {
     unsafe { simd_reduce_or(a.as_i64x8()) }
 }
 
@@ -33692,7 +35350,8 @@ pub fn _mm512_reduce_or_epi64(a: __m512i) -> i64 {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_mask_reduce_or_epi64(k: __mmask8, a: __m512i) -> i64 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_reduce_or_epi64(k: __mmask8, a: __m512i) -> i64 {
     unsafe { simd_reduce_or(simd_select_bitmask(k, a.as_i64x8(), i64x8::ZERO)) }
 }
 
@@ -33706,7 +35365,8 @@ pub fn _mm512_mask_reduce_or_epi64(k: __mmask8, a: __m512i) -> i64 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 // This intrinsic has no corresponding instruction.
-pub fn _mm512_undefined_pd() -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_undefined_pd() -> __m512d {
     unsafe { const { mem::zeroed() } }
 }
 
@@ -33720,7 +35380,8 @@ pub fn _mm512_undefined_pd() -> __m512d {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 // This intrinsic has no corresponding instruction.
-pub fn _mm512_undefined_ps() -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_undefined_ps() -> __m512 {
     unsafe { const { mem::zeroed() } }
 }
 
@@ -33734,7 +35395,8 @@ pub fn _mm512_undefined_ps() -> __m512 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 // This intrinsic has no corresponding instruction.
-pub fn _mm512_undefined_epi32() -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_undefined_epi32() -> __m512i {
     unsafe { const { mem::zeroed() } }
 }
 
@@ -33748,7 +35410,8 @@ pub fn _mm512_undefined_epi32() -> __m512i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 // This intrinsic has no corresponding instruction.
-pub fn _mm512_undefined() -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_undefined() -> __m512 {
     unsafe { const { mem::zeroed() } }
 }
 
@@ -33759,7 +35422,8 @@ pub fn _mm512_undefined() -> __m512 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
-pub unsafe fn _mm512_loadu_epi32(mem_addr: *const i32) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm512_loadu_epi32(mem_addr: *const i32) -> __m512i {
     ptr::read_unaligned(mem_addr as *const __m512i)
 }
 
@@ -33770,7 +35434,8 @@ pub unsafe fn _mm512_loadu_epi32(mem_addr: *const i32) -> __m512i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
-pub unsafe fn _mm256_loadu_epi32(mem_addr: *const i32) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm256_loadu_epi32(mem_addr: *const i32) -> __m256i {
     ptr::read_unaligned(mem_addr as *const __m256i)
 }
 
@@ -33781,7 +35446,8 @@ pub unsafe fn _mm256_loadu_epi32(mem_addr: *const i32) -> __m256i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
-pub unsafe fn _mm_loadu_epi32(mem_addr: *const i32) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_loadu_epi32(mem_addr: *const i32) -> __m128i {
     ptr::read_unaligned(mem_addr as *const __m128i)
 }
 
@@ -34287,7 +35953,8 @@ pub unsafe fn _mm_mask_cvtusepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8,
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
-pub unsafe fn _mm512_storeu_epi32(mem_addr: *mut i32, a: __m512i) {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm512_storeu_epi32(mem_addr: *mut i32, a: __m512i) {
     ptr::write_unaligned(mem_addr as *mut __m512i, a);
 }
 
@@ -34298,7 +35965,8 @@ pub unsafe fn _mm512_storeu_epi32(mem_addr: *mut i32, a: __m512i) {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
-pub unsafe fn _mm256_storeu_epi32(mem_addr: *mut i32, a: __m256i) {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm256_storeu_epi32(mem_addr: *mut i32, a: __m256i) {
     ptr::write_unaligned(mem_addr as *mut __m256i, a);
 }
 
@@ -34309,7 +35977,8 @@ pub unsafe fn _mm256_storeu_epi32(mem_addr: *mut i32, a: __m256i) {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
-pub unsafe fn _mm_storeu_epi32(mem_addr: *mut i32, a: __m128i) {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_storeu_epi32(mem_addr: *mut i32, a: __m128i) {
     ptr::write_unaligned(mem_addr as *mut __m128i, a);
 }
 
@@ -34320,7 +35989,8 @@ pub unsafe fn _mm_storeu_epi32(mem_addr: *mut i32, a: __m128i) {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
-pub unsafe fn _mm512_loadu_epi64(mem_addr: *const i64) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm512_loadu_epi64(mem_addr: *const i64) -> __m512i {
     ptr::read_unaligned(mem_addr as *const __m512i)
 }
 
@@ -34331,7 +36001,8 @@ pub unsafe fn _mm512_loadu_epi64(mem_addr: *const i64) -> __m512i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
-pub unsafe fn _mm256_loadu_epi64(mem_addr: *const i64) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm256_loadu_epi64(mem_addr: *const i64) -> __m256i {
     ptr::read_unaligned(mem_addr as *const __m256i)
 }
 
@@ -34342,7 +36013,8 @@ pub unsafe fn _mm256_loadu_epi64(mem_addr: *const i64) -> __m256i {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
-pub unsafe fn _mm_loadu_epi64(mem_addr: *const i64) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_loadu_epi64(mem_addr: *const i64) -> __m128i {
     ptr::read_unaligned(mem_addr as *const __m128i)
 }
 
@@ -34353,7 +36025,8 @@ pub unsafe fn _mm_loadu_epi64(mem_addr: *const i64) -> __m128i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
-pub unsafe fn _mm512_storeu_epi64(mem_addr: *mut i64, a: __m512i) {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm512_storeu_epi64(mem_addr: *mut i64, a: __m512i) {
     ptr::write_unaligned(mem_addr as *mut __m512i, a);
 }
 
@@ -34364,7 +36037,8 @@ pub unsafe fn _mm512_storeu_epi64(mem_addr: *mut i64, a: __m512i) {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
-pub unsafe fn _mm256_storeu_epi64(mem_addr: *mut i64, a: __m256i) {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm256_storeu_epi64(mem_addr: *mut i64, a: __m256i) {
     ptr::write_unaligned(mem_addr as *mut __m256i, a);
 }
 
@@ -34375,7 +36049,8 @@ pub unsafe fn _mm256_storeu_epi64(mem_addr: *mut i64, a: __m256i) {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
-pub unsafe fn _mm_storeu_epi64(mem_addr: *mut i64, a: __m128i) {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_storeu_epi64(mem_addr: *mut i64, a: __m128i) {
     ptr::write_unaligned(mem_addr as *mut __m128i, a);
 }
 
@@ -34386,7 +36061,8 @@ pub unsafe fn _mm_storeu_epi64(mem_addr: *mut i64, a: __m128i) {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
-pub unsafe fn _mm512_loadu_si512(mem_addr: *const __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm512_loadu_si512(mem_addr: *const __m512i) -> __m512i {
     ptr::read_unaligned(mem_addr)
 }
 
@@ -34397,7 +36073,8 @@ pub unsafe fn _mm512_loadu_si512(mem_addr: *const __m512i) -> __m512i {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
-pub unsafe fn _mm512_storeu_si512(mem_addr: *mut __m512i, a: __m512i) {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm512_storeu_si512(mem_addr: *mut __m512i, a: __m512i) {
     ptr::write_unaligned(mem_addr, a);
 }
 
@@ -34410,7 +36087,8 @@ pub unsafe fn _mm512_storeu_si512(mem_addr: *mut __m512i, a: __m512i) {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovups))]
-pub unsafe fn _mm512_loadu_pd(mem_addr: *const f64) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm512_loadu_pd(mem_addr: *const f64) -> __m512d {
     ptr::read_unaligned(mem_addr as *const __m512d)
 }
 
@@ -34423,7 +36101,8 @@ pub unsafe fn _mm512_loadu_pd(mem_addr: *const f64) -> __m512d {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovups))]
-pub unsafe fn _mm512_storeu_pd(mem_addr: *mut f64, a: __m512d) {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm512_storeu_pd(mem_addr: *mut f64, a: __m512d) {
     ptr::write_unaligned(mem_addr as *mut __m512d, a);
 }
 
@@ -34436,7 +36115,8 @@ pub unsafe fn _mm512_storeu_pd(mem_addr: *mut f64, a: __m512d) {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovups))]
-pub unsafe fn _mm512_loadu_ps(mem_addr: *const f32) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm512_loadu_ps(mem_addr: *const f32) -> __m512 {
     ptr::read_unaligned(mem_addr as *const __m512)
 }
 
@@ -34449,7 +36129,8 @@ pub unsafe fn _mm512_loadu_ps(mem_addr: *const f32) -> __m512 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovups))]
-pub unsafe fn _mm512_storeu_ps(mem_addr: *mut f32, a: __m512) {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm512_storeu_ps(mem_addr: *mut f32, a: __m512) {
     ptr::write_unaligned(mem_addr as *mut __m512, a);
 }
 
@@ -34463,7 +36144,8 @@ pub unsafe fn _mm512_storeu_ps(mem_addr: *mut f32, a: __m512) {
     all(test, not(all(target_arch = "x86", target_env = "msvc"))),
     assert_instr(vmovaps)
 )] //should be vmovdqa32
-pub unsafe fn _mm512_load_si512(mem_addr: *const __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm512_load_si512(mem_addr: *const __m512i) -> __m512i {
     ptr::read(mem_addr)
 }
 
@@ -34477,7 +36159,8 @@ pub unsafe fn _mm512_load_si512(mem_addr: *const __m512i) -> __m512i {
     all(test, not(all(target_arch = "x86", target_env = "msvc"))),
     assert_instr(vmovaps)
 )] //should be vmovdqa32
-pub unsafe fn _mm512_store_si512(mem_addr: *mut __m512i, a: __m512i) {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm512_store_si512(mem_addr: *mut __m512i, a: __m512i) {
     ptr::write(mem_addr, a);
 }
 
@@ -34491,7 +36174,8 @@ pub unsafe fn _mm512_store_si512(mem_addr: *mut __m512i, a: __m512i) {
     all(test, not(all(target_arch = "x86", target_env = "msvc"))),
     assert_instr(vmovaps)
 )] //should be vmovdqa32
-pub unsafe fn _mm512_load_epi32(mem_addr: *const i32) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm512_load_epi32(mem_addr: *const i32) -> __m512i {
     ptr::read(mem_addr as *const __m512i)
 }
 
@@ -34505,7 +36189,8 @@ pub unsafe fn _mm512_load_epi32(mem_addr: *const i32) -> __m512i {
     all(test, not(all(target_arch = "x86", target_env = "msvc"))),
     assert_instr(vmovaps)
 )] //should be vmovdqa32
-pub unsafe fn _mm256_load_epi32(mem_addr: *const i32) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm256_load_epi32(mem_addr: *const i32) -> __m256i {
     ptr::read(mem_addr as *const __m256i)
 }
 
@@ -34519,7 +36204,8 @@ pub unsafe fn _mm256_load_epi32(mem_addr: *const i32) -> __m256i {
     all(test, not(all(target_arch = "x86", target_env = "msvc"))),
     assert_instr(vmovaps)
 )] //should be vmovdqa32
-pub unsafe fn _mm_load_epi32(mem_addr: *const i32) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_load_epi32(mem_addr: *const i32) -> __m128i {
     ptr::read(mem_addr as *const __m128i)
 }
 
@@ -34533,7 +36219,8 @@ pub unsafe fn _mm_load_epi32(mem_addr: *const i32) -> __m128i {
     all(test, not(all(target_arch = "x86", target_env = "msvc"))),
     assert_instr(vmovaps)
 )] //should be vmovdqa32
-pub unsafe fn _mm512_store_epi32(mem_addr: *mut i32, a: __m512i) {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm512_store_epi32(mem_addr: *mut i32, a: __m512i) {
     ptr::write(mem_addr as *mut __m512i, a);
 }
 
@@ -34547,7 +36234,8 @@ pub unsafe fn _mm512_store_epi32(mem_addr: *mut i32, a: __m512i) {
     all(test, not(all(target_arch = "x86", target_env = "msvc"))),
     assert_instr(vmovaps)
 )] //should be vmovdqa32
-pub unsafe fn _mm256_store_epi32(mem_addr: *mut i32, a: __m256i) {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm256_store_epi32(mem_addr: *mut i32, a: __m256i) {
     ptr::write(mem_addr as *mut __m256i, a);
 }
 
@@ -34561,7 +36249,8 @@ pub unsafe fn _mm256_store_epi32(mem_addr: *mut i32, a: __m256i) {
     all(test, not(all(target_arch = "x86", target_env = "msvc"))),
     assert_instr(vmovaps)
 )] //should be vmovdqa32
-pub unsafe fn _mm_store_epi32(mem_addr: *mut i32, a: __m128i) {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_store_epi32(mem_addr: *mut i32, a: __m128i) {
     ptr::write(mem_addr as *mut __m128i, a);
 }
 
@@ -34575,7 +36264,8 @@ pub unsafe fn _mm_store_epi32(mem_addr: *mut i32, a: __m128i) {
     all(test, not(all(target_arch = "x86", target_env = "msvc"))),
     assert_instr(vmovaps)
 )] //should be vmovdqa64
-pub unsafe fn _mm512_load_epi64(mem_addr: *const i64) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm512_load_epi64(mem_addr: *const i64) -> __m512i {
     ptr::read(mem_addr as *const __m512i)
 }
 
@@ -34589,7 +36279,8 @@ pub unsafe fn _mm512_load_epi64(mem_addr: *const i64) -> __m512i {
     all(test, not(all(target_arch = "x86", target_env = "msvc"))),
     assert_instr(vmovaps)
 )] //should be vmovdqa64
-pub unsafe fn _mm256_load_epi64(mem_addr: *const i64) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm256_load_epi64(mem_addr: *const i64) -> __m256i {
     ptr::read(mem_addr as *const __m256i)
 }
 
@@ -34603,7 +36294,8 @@ pub unsafe fn _mm256_load_epi64(mem_addr: *const i64) -> __m256i {
     all(test, not(all(target_arch = "x86", target_env = "msvc"))),
     assert_instr(vmovaps)
 )] //should be vmovdqa64
-pub unsafe fn _mm_load_epi64(mem_addr: *const i64) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_load_epi64(mem_addr: *const i64) -> __m128i {
     ptr::read(mem_addr as *const __m128i)
 }
 
@@ -34617,7 +36309,8 @@ pub unsafe fn _mm_load_epi64(mem_addr: *const i64) -> __m128i {
     all(test, not(all(target_arch = "x86", target_env = "msvc"))),
     assert_instr(vmovaps)
 )] //should be vmovdqa64
-pub unsafe fn _mm512_store_epi64(mem_addr: *mut i64, a: __m512i) {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm512_store_epi64(mem_addr: *mut i64, a: __m512i) {
     ptr::write(mem_addr as *mut __m512i, a);
 }
 
@@ -34631,7 +36324,8 @@ pub unsafe fn _mm512_store_epi64(mem_addr: *mut i64, a: __m512i) {
     all(test, not(all(target_arch = "x86", target_env = "msvc"))),
     assert_instr(vmovaps)
 )] //should be vmovdqa64
-pub unsafe fn _mm256_store_epi64(mem_addr: *mut i64, a: __m256i) {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm256_store_epi64(mem_addr: *mut i64, a: __m256i) {
     ptr::write(mem_addr as *mut __m256i, a);
 }
 
@@ -34645,7 +36339,8 @@ pub unsafe fn _mm256_store_epi64(mem_addr: *mut i64, a: __m256i) {
     all(test, not(all(target_arch = "x86", target_env = "msvc"))),
     assert_instr(vmovaps)
 )] //should be vmovdqa64
-pub unsafe fn _mm_store_epi64(mem_addr: *mut i64, a: __m128i) {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_store_epi64(mem_addr: *mut i64, a: __m128i) {
     ptr::write(mem_addr as *mut __m128i, a);
 }
 
@@ -34659,7 +36354,8 @@ pub unsafe fn _mm_store_epi64(mem_addr: *mut i64, a: __m128i) {
     all(test, not(all(target_arch = "x86", target_env = "msvc"))),
     assert_instr(vmovaps)
 )]
-pub unsafe fn _mm512_load_ps(mem_addr: *const f32) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm512_load_ps(mem_addr: *const f32) -> __m512 {
     ptr::read(mem_addr as *const __m512)
 }
 
@@ -34673,7 +36369,8 @@ pub unsafe fn _mm512_load_ps(mem_addr: *const f32) -> __m512 {
     all(test, not(all(target_arch = "x86", target_env = "msvc"))),
     assert_instr(vmovaps)
 )]
-pub unsafe fn _mm512_store_ps(mem_addr: *mut f32, a: __m512) {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm512_store_ps(mem_addr: *mut f32, a: __m512) {
     ptr::write(mem_addr as *mut __m512, a);
 }
 
@@ -34687,7 +36384,8 @@ pub unsafe fn _mm512_store_ps(mem_addr: *mut f32, a: __m512) {
     all(test, not(all(target_arch = "x86", target_env = "msvc"))),
     assert_instr(vmovaps)
 )] //should be vmovapd
-pub unsafe fn _mm512_load_pd(mem_addr: *const f64) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm512_load_pd(mem_addr: *const f64) -> __m512d {
     ptr::read(mem_addr as *const __m512d)
 }
 
@@ -34701,7 +36399,8 @@ pub unsafe fn _mm512_load_pd(mem_addr: *const f64) -> __m512d {
     all(test, not(all(target_arch = "x86", target_env = "msvc"))),
     assert_instr(vmovaps)
 )] //should be vmovapd
-pub unsafe fn _mm512_store_pd(mem_addr: *mut f64, a: __m512d) {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm512_store_pd(mem_addr: *mut f64, a: __m512d) {
     ptr::write(mem_addr as *mut __m512d, a);
 }
 
@@ -34714,8 +36413,14 @@ pub unsafe fn _mm512_store_pd(mem_addr: *mut f64, a: __m512d) {
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vmovdqu32))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _mm512_mask_loadu_epi32(src: __m512i, k: __mmask16, mem_addr: *const i32) -> __m512i {
-    transmute(loaddqu32_512(mem_addr, src.as_i32x16(), k))
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm512_mask_loadu_epi32(
+    src: __m512i,
+    k: __mmask16,
+    mem_addr: *const i32,
+) -> __m512i {
+    let mask = simd_select_bitmask(k, i32x16::splat(!0), i32x16::ZERO);
+    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i32x16()).as_m512i()
 }
 
 /// Load packed 32-bit integers from memory into dst using zeromask k
@@ -34727,7 +36432,8 @@ pub unsafe fn _mm512_mask_loadu_epi32(src: __m512i, k: __mmask16, mem_addr: *con
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vmovdqu32))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _mm512_maskz_loadu_epi32(k: __mmask16, mem_addr: *const i32) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm512_maskz_loadu_epi32(k: __mmask16, mem_addr: *const i32) -> __m512i {
     _mm512_mask_loadu_epi32(_mm512_setzero_si512(), k, mem_addr)
 }
 
@@ -34740,8 +36446,14 @@ pub unsafe fn _mm512_maskz_loadu_epi32(k: __mmask16, mem_addr: *const i32) -> __
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vmovdqu64))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _mm512_mask_loadu_epi64(src: __m512i, k: __mmask8, mem_addr: *const i64) -> __m512i {
-    transmute(loaddqu64_512(mem_addr, src.as_i64x8(), k))
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm512_mask_loadu_epi64(
+    src: __m512i,
+    k: __mmask8,
+    mem_addr: *const i64,
+) -> __m512i {
+    let mask = simd_select_bitmask(k, i64x8::splat(!0), i64x8::ZERO);
+    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i64x8()).as_m512i()
 }
 
 /// Load packed 64-bit integers from memory into dst using zeromask k
@@ -34753,7 +36465,8 @@ pub unsafe fn _mm512_mask_loadu_epi64(src: __m512i, k: __mmask8, mem_addr: *cons
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vmovdqu64))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _mm512_maskz_loadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm512_maskz_loadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m512i {
     _mm512_mask_loadu_epi64(_mm512_setzero_si512(), k, mem_addr)
 }
 
@@ -34766,8 +36479,14 @@ pub unsafe fn _mm512_maskz_loadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vmovups))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _mm512_mask_loadu_ps(src: __m512, k: __mmask16, mem_addr: *const f32) -> __m512 {
-    transmute(loadups_512(mem_addr, src.as_f32x16(), k))
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm512_mask_loadu_ps(
+    src: __m512,
+    k: __mmask16,
+    mem_addr: *const f32,
+) -> __m512 {
+    let mask = simd_select_bitmask(k, i32x16::splat(!0), i32x16::ZERO);
+    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_f32x16()).as_m512()
 }
 
 /// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
@@ -34779,7 +36498,8 @@ pub unsafe fn _mm512_mask_loadu_ps(src: __m512, k: __mmask16, mem_addr: *const f
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vmovups))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _mm512_maskz_loadu_ps(k: __mmask16, mem_addr: *const f32) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm512_maskz_loadu_ps(k: __mmask16, mem_addr: *const f32) -> __m512 {
     _mm512_mask_loadu_ps(_mm512_setzero_ps(), k, mem_addr)
 }
 
@@ -34792,8 +36512,14 @@ pub unsafe fn _mm512_maskz_loadu_ps(k: __mmask16, mem_addr: *const f32) -> __m51
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vmovupd))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _mm512_mask_loadu_pd(src: __m512d, k: __mmask8, mem_addr: *const f64) -> __m512d {
-    transmute(loadupd_512(mem_addr, src.as_f64x8(), k))
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm512_mask_loadu_pd(
+    src: __m512d,
+    k: __mmask8,
+    mem_addr: *const f64,
+) -> __m512d {
+    let mask = simd_select_bitmask(k, i64x8::splat(!0), i64x8::ZERO);
+    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_f64x8()).as_m512d()
 }
 
 /// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
@@ -34805,7 +36531,8 @@ pub unsafe fn _mm512_mask_loadu_pd(src: __m512d, k: __mmask8, mem_addr: *const f
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vmovupd))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _mm512_maskz_loadu_pd(k: __mmask8, mem_addr: *const f64) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm512_maskz_loadu_pd(k: __mmask8, mem_addr: *const f64) -> __m512d {
     _mm512_mask_loadu_pd(_mm512_setzero_pd(), k, mem_addr)
 }
 
@@ -34818,8 +36545,14 @@ pub unsafe fn _mm512_maskz_loadu_pd(k: __mmask8, mem_addr: *const f64) -> __m512
 #[target_feature(enable = "avx512f,avx512vl")]
 #[cfg_attr(test, assert_instr(vmovdqu32))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _mm256_mask_loadu_epi32(src: __m256i, k: __mmask8, mem_addr: *const i32) -> __m256i {
-    transmute(loaddqu32_256(mem_addr, src.as_i32x8(), k))
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm256_mask_loadu_epi32(
+    src: __m256i,
+    k: __mmask8,
+    mem_addr: *const i32,
+) -> __m256i {
+    let mask = simd_select_bitmask(k, i32x8::splat(!0), i32x8::ZERO);
+    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i32x8()).as_m256i()
 }
 
 /// Load packed 32-bit integers from memory into dst using zeromask k
@@ -34831,7 +36564,8 @@ pub unsafe fn _mm256_mask_loadu_epi32(src: __m256i, k: __mmask8, mem_addr: *cons
 #[target_feature(enable = "avx512f,avx512vl")]
 #[cfg_attr(test, assert_instr(vmovdqu32))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _mm256_maskz_loadu_epi32(k: __mmask8, mem_addr: *const i32) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm256_maskz_loadu_epi32(k: __mmask8, mem_addr: *const i32) -> __m256i {
     _mm256_mask_loadu_epi32(_mm256_setzero_si256(), k, mem_addr)
 }
 
@@ -34844,8 +36578,14 @@ pub unsafe fn _mm256_maskz_loadu_epi32(k: __mmask8, mem_addr: *const i32) -> __m
 #[target_feature(enable = "avx512f,avx512vl")]
 #[cfg_attr(test, assert_instr(vmovdqu64))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _mm256_mask_loadu_epi64(src: __m256i, k: __mmask8, mem_addr: *const i64) -> __m256i {
-    transmute(loaddqu64_256(mem_addr, src.as_i64x4(), k))
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm256_mask_loadu_epi64(
+    src: __m256i,
+    k: __mmask8,
+    mem_addr: *const i64,
+) -> __m256i {
+    let mask = simd_select_bitmask(k, i64x4::splat(!0), i64x4::ZERO);
+    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i64x4()).as_m256i()
 }
 
 /// Load packed 64-bit integers from memory into dst using zeromask k
@@ -34857,7 +36597,8 @@ pub unsafe fn _mm256_mask_loadu_epi64(src: __m256i, k: __mmask8, mem_addr: *cons
 #[target_feature(enable = "avx512f,avx512vl")]
 #[cfg_attr(test, assert_instr(vmovdqu64))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _mm256_maskz_loadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm256_maskz_loadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m256i {
     _mm256_mask_loadu_epi64(_mm256_setzero_si256(), k, mem_addr)
 }
 
@@ -34870,8 +36611,10 @@ pub unsafe fn _mm256_maskz_loadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m
 #[target_feature(enable = "avx512f,avx512vl")]
 #[cfg_attr(test, assert_instr(vmovups))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _mm256_mask_loadu_ps(src: __m256, k: __mmask8, mem_addr: *const f32) -> __m256 {
-    transmute(loadups_256(mem_addr, src.as_f32x8(), k))
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm256_mask_loadu_ps(src: __m256, k: __mmask8, mem_addr: *const f32) -> __m256 {
+    let mask = simd_select_bitmask(k, i32x8::splat(!0), i32x8::ZERO);
+    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_f32x8()).as_m256()
 }
 
 /// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
@@ -34883,7 +36626,8 @@ pub unsafe fn _mm256_mask_loadu_ps(src: __m256, k: __mmask8, mem_addr: *const f3
 #[target_feature(enable = "avx512f,avx512vl")]
 #[cfg_attr(test, assert_instr(vmovups))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _mm256_maskz_loadu_ps(k: __mmask8, mem_addr: *const f32) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm256_maskz_loadu_ps(k: __mmask8, mem_addr: *const f32) -> __m256 {
     _mm256_mask_loadu_ps(_mm256_setzero_ps(), k, mem_addr)
 }
 
@@ -34896,8 +36640,14 @@ pub unsafe fn _mm256_maskz_loadu_ps(k: __mmask8, mem_addr: *const f32) -> __m256
 #[target_feature(enable = "avx512f,avx512vl")]
 #[cfg_attr(test, assert_instr(vmovupd))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _mm256_mask_loadu_pd(src: __m256d, k: __mmask8, mem_addr: *const f64) -> __m256d {
-    transmute(loadupd_256(mem_addr, src.as_f64x4(), k))
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm256_mask_loadu_pd(
+    src: __m256d,
+    k: __mmask8,
+    mem_addr: *const f64,
+) -> __m256d {
+    let mask = simd_select_bitmask(k, i64x4::splat(!0), i64x4::ZERO);
+    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_f64x4()).as_m256d()
 }
 
 /// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
@@ -34909,7 +36659,8 @@ pub unsafe fn _mm256_mask_loadu_pd(src: __m256d, k: __mmask8, mem_addr: *const f
 #[target_feature(enable = "avx512f,avx512vl")]
 #[cfg_attr(test, assert_instr(vmovupd))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _mm256_maskz_loadu_pd(k: __mmask8, mem_addr: *const f64) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm256_maskz_loadu_pd(k: __mmask8, mem_addr: *const f64) -> __m256d {
     _mm256_mask_loadu_pd(_mm256_setzero_pd(), k, mem_addr)
 }
 
@@ -34922,8 +36673,14 @@ pub unsafe fn _mm256_maskz_loadu_pd(k: __mmask8, mem_addr: *const f64) -> __m256
 #[target_feature(enable = "avx512f,avx512vl")]
 #[cfg_attr(test, assert_instr(vmovdqu32))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _mm_mask_loadu_epi32(src: __m128i, k: __mmask8, mem_addr: *const i32) -> __m128i {
-    transmute(loaddqu32_128(mem_addr, src.as_i32x4(), k))
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_mask_loadu_epi32(
+    src: __m128i,
+    k: __mmask8,
+    mem_addr: *const i32,
+) -> __m128i {
+    let mask = simd_select_bitmask(k, i32x4::splat(!0), i32x4::ZERO);
+    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i32x4()).as_m128i()
 }
 
 /// Load packed 32-bit integers from memory into dst using zeromask k
@@ -34935,7 +36692,8 @@ pub unsafe fn _mm_mask_loadu_epi32(src: __m128i, k: __mmask8, mem_addr: *const i
 #[target_feature(enable = "avx512f,avx512vl")]
 #[cfg_attr(test, assert_instr(vmovdqu32))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _mm_maskz_loadu_epi32(k: __mmask8, mem_addr: *const i32) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_maskz_loadu_epi32(k: __mmask8, mem_addr: *const i32) -> __m128i {
     _mm_mask_loadu_epi32(_mm_setzero_si128(), k, mem_addr)
 }
 
@@ -34948,8 +36706,14 @@ pub unsafe fn _mm_maskz_loadu_epi32(k: __mmask8, mem_addr: *const i32) -> __m128
 #[target_feature(enable = "avx512f,avx512vl")]
 #[cfg_attr(test, assert_instr(vmovdqu64))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _mm_mask_loadu_epi64(src: __m128i, k: __mmask8, mem_addr: *const i64) -> __m128i {
-    transmute(loaddqu64_128(mem_addr, src.as_i64x2(), k))
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_mask_loadu_epi64(
+    src: __m128i,
+    k: __mmask8,
+    mem_addr: *const i64,
+) -> __m128i {
+    let mask = simd_select_bitmask(k, i64x2::splat(!0), i64x2::ZERO);
+    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i64x2()).as_m128i()
 }
 
 /// Load packed 64-bit integers from memory into dst using zeromask k
@@ -34961,7 +36725,8 @@ pub unsafe fn _mm_mask_loadu_epi64(src: __m128i, k: __mmask8, mem_addr: *const i
 #[target_feature(enable = "avx512f,avx512vl")]
 #[cfg_attr(test, assert_instr(vmovdqu64))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _mm_maskz_loadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_maskz_loadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m128i {
     _mm_mask_loadu_epi64(_mm_setzero_si128(), k, mem_addr)
 }
 
@@ -34974,8 +36739,10 @@ pub unsafe fn _mm_maskz_loadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m128
 #[target_feature(enable = "avx512f,avx512vl")]
 #[cfg_attr(test, assert_instr(vmovups))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _mm_mask_loadu_ps(src: __m128, k: __mmask8, mem_addr: *const f32) -> __m128 {
-    transmute(loadups_128(mem_addr, src.as_f32x4(), k))
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_mask_loadu_ps(src: __m128, k: __mmask8, mem_addr: *const f32) -> __m128 {
+    let mask = simd_select_bitmask(k, i32x4::splat(!0), i32x4::ZERO);
+    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_f32x4()).as_m128()
 }
 
 /// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
@@ -34987,7 +36754,8 @@ pub unsafe fn _mm_mask_loadu_ps(src: __m128, k: __mmask8, mem_addr: *const f32)
 #[target_feature(enable = "avx512f,avx512vl")]
 #[cfg_attr(test, assert_instr(vmovups))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _mm_maskz_loadu_ps(k: __mmask8, mem_addr: *const f32) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_maskz_loadu_ps(k: __mmask8, mem_addr: *const f32) -> __m128 {
     _mm_mask_loadu_ps(_mm_setzero_ps(), k, mem_addr)
 }
 
@@ -35000,8 +36768,10 @@ pub unsafe fn _mm_maskz_loadu_ps(k: __mmask8, mem_addr: *const f32) -> __m128 {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[cfg_attr(test, assert_instr(vmovupd))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _mm_mask_loadu_pd(src: __m128d, k: __mmask8, mem_addr: *const f64) -> __m128d {
-    transmute(loadupd_128(mem_addr, src.as_f64x2(), k))
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_mask_loadu_pd(src: __m128d, k: __mmask8, mem_addr: *const f64) -> __m128d {
+    let mask = simd_select_bitmask(k, i64x2::splat(!0), i64x2::ZERO);
+    simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_f64x2()).as_m128d()
 }
 
 /// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
@@ -35013,7 +36783,8 @@ pub unsafe fn _mm_mask_loadu_pd(src: __m128d, k: __mmask8, mem_addr: *const f64)
 #[target_feature(enable = "avx512f,avx512vl")]
 #[cfg_attr(test, assert_instr(vmovupd))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _mm_maskz_loadu_pd(k: __mmask8, mem_addr: *const f64) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_maskz_loadu_pd(k: __mmask8, mem_addr: *const f64) -> __m128d {
     _mm_mask_loadu_pd(_mm_setzero_pd(), k, mem_addr)
 }
 
@@ -35026,8 +36797,14 @@ pub unsafe fn _mm_maskz_loadu_pd(k: __mmask8, mem_addr: *const f64) -> __m128d {
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vmovdqa32))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _mm512_mask_load_epi32(src: __m512i, k: __mmask16, mem_addr: *const i32) -> __m512i {
-    transmute(loaddqa32_512(mem_addr, src.as_i32x16(), k))
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm512_mask_load_epi32(
+    src: __m512i,
+    k: __mmask16,
+    mem_addr: *const i32,
+) -> __m512i {
+    let mask = simd_select_bitmask(k, i32x16::splat(!0), i32x16::ZERO);
+    simd_masked_load!(SimdAlign::Vector, mask, mem_addr, src.as_i32x16()).as_m512i()
 }
 
 /// Load packed 32-bit integers from memory into dst using zeromask k
@@ -35039,7 +36816,8 @@ pub unsafe fn _mm512_mask_load_epi32(src: __m512i, k: __mmask16, mem_addr: *cons
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vmovdqa32))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _mm512_maskz_load_epi32(k: __mmask16, mem_addr: *const i32) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm512_maskz_load_epi32(k: __mmask16, mem_addr: *const i32) -> __m512i {
     _mm512_mask_load_epi32(_mm512_setzero_si512(), k, mem_addr)
 }
 
@@ -35052,8 +36830,14 @@ pub unsafe fn _mm512_maskz_load_epi32(k: __mmask16, mem_addr: *const i32) -> __m
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vmovdqa64))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _mm512_mask_load_epi64(src: __m512i, k: __mmask8, mem_addr: *const i64) -> __m512i {
-    transmute(loaddqa64_512(mem_addr, src.as_i64x8(), k))
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm512_mask_load_epi64(
+    src: __m512i,
+    k: __mmask8,
+    mem_addr: *const i64,
+) -> __m512i {
+    let mask = simd_select_bitmask(k, i64x8::splat(!0), i64x8::ZERO);
+    simd_masked_load!(SimdAlign::Vector, mask, mem_addr, src.as_i64x8()).as_m512i()
 }
 
 /// Load packed 64-bit integers from memory into dst using zeromask k
@@ -35065,7 +36849,8 @@ pub unsafe fn _mm512_mask_load_epi64(src: __m512i, k: __mmask8, mem_addr: *const
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vmovdqa64))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _mm512_maskz_load_epi64(k: __mmask8, mem_addr: *const i64) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm512_maskz_load_epi64(k: __mmask8, mem_addr: *const i64) -> __m512i {
     _mm512_mask_load_epi64(_mm512_setzero_si512(), k, mem_addr)
 }
 
@@ -35078,8 +36863,10 @@ pub unsafe fn _mm512_maskz_load_epi64(k: __mmask8, mem_addr: *const i64) -> __m5
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vmovaps))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _mm512_mask_load_ps(src: __m512, k: __mmask16, mem_addr: *const f32) -> __m512 {
-    transmute(loadaps_512(mem_addr, src.as_f32x16(), k))
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm512_mask_load_ps(src: __m512, k: __mmask16, mem_addr: *const f32) -> __m512 {
+    let mask = simd_select_bitmask(k, i32x16::splat(!0), i32x16::ZERO);
+    simd_masked_load!(SimdAlign::Vector, mask, mem_addr, src.as_f32x16()).as_m512()
 }
 
 /// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
@@ -35091,7 +36878,8 @@ pub unsafe fn _mm512_mask_load_ps(src: __m512, k: __mmask16, mem_addr: *const f3
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vmovaps))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _mm512_maskz_load_ps(k: __mmask16, mem_addr: *const f32) -> __m512 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm512_maskz_load_ps(k: __mmask16, mem_addr: *const f32) -> __m512 {
     _mm512_mask_load_ps(_mm512_setzero_ps(), k, mem_addr)
 }
 
@@ -35104,8 +36892,14 @@ pub unsafe fn _mm512_maskz_load_ps(k: __mmask16, mem_addr: *const f32) -> __m512
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vmovapd))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _mm512_mask_load_pd(src: __m512d, k: __mmask8, mem_addr: *const f64) -> __m512d {
-    transmute(loadapd_512(mem_addr, src.as_f64x8(), k))
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm512_mask_load_pd(
+    src: __m512d,
+    k: __mmask8,
+    mem_addr: *const f64,
+) -> __m512d {
+    let mask = simd_select_bitmask(k, i64x8::splat(!0), i64x8::ZERO);
+    simd_masked_load!(SimdAlign::Vector, mask, mem_addr, src.as_f64x8()).as_m512d()
 }
 
 /// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
@@ -35117,7 +36911,8 @@ pub unsafe fn _mm512_mask_load_pd(src: __m512d, k: __mmask8, mem_addr: *const f6
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vmovapd))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _mm512_maskz_load_pd(k: __mmask8, mem_addr: *const f64) -> __m512d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm512_maskz_load_pd(k: __mmask8, mem_addr: *const f64) -> __m512d {
     _mm512_mask_load_pd(_mm512_setzero_pd(), k, mem_addr)
 }
 
@@ -35130,8 +36925,14 @@ pub unsafe fn _mm512_maskz_load_pd(k: __mmask8, mem_addr: *const f64) -> __m512d
 #[target_feature(enable = "avx512f,avx512vl")]
 #[cfg_attr(test, assert_instr(vmovdqa32))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _mm256_mask_load_epi32(src: __m256i, k: __mmask8, mem_addr: *const i32) -> __m256i {
-    transmute(loaddqa32_256(mem_addr, src.as_i32x8(), k))
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm256_mask_load_epi32(
+    src: __m256i,
+    k: __mmask8,
+    mem_addr: *const i32,
+) -> __m256i {
+    let mask = simd_select_bitmask(k, i32x8::splat(!0), i32x8::ZERO);
+    simd_masked_load!(SimdAlign::Vector, mask, mem_addr, src.as_i32x8()).as_m256i()
 }
 
 /// Load packed 32-bit integers from memory into dst using zeromask k
@@ -35143,7 +36944,8 @@ pub unsafe fn _mm256_mask_load_epi32(src: __m256i, k: __mmask8, mem_addr: *const
 #[target_feature(enable = "avx512f,avx512vl")]
 #[cfg_attr(test, assert_instr(vmovdqa32))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _mm256_maskz_load_epi32(k: __mmask8, mem_addr: *const i32) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm256_maskz_load_epi32(k: __mmask8, mem_addr: *const i32) -> __m256i {
     _mm256_mask_load_epi32(_mm256_setzero_si256(), k, mem_addr)
 }
 
@@ -35156,8 +36958,14 @@ pub unsafe fn _mm256_maskz_load_epi32(k: __mmask8, mem_addr: *const i32) -> __m2
 #[target_feature(enable = "avx512f,avx512vl")]
 #[cfg_attr(test, assert_instr(vmovdqa64))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _mm256_mask_load_epi64(src: __m256i, k: __mmask8, mem_addr: *const i64) -> __m256i {
-    transmute(loaddqa64_256(mem_addr, src.as_i64x4(), k))
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm256_mask_load_epi64(
+    src: __m256i,
+    k: __mmask8,
+    mem_addr: *const i64,
+) -> __m256i {
+    let mask = simd_select_bitmask(k, i64x4::splat(!0), i64x4::ZERO);
+    simd_masked_load!(SimdAlign::Vector, mask, mem_addr, src.as_i64x4()).as_m256i()
 }
 
 /// Load packed 64-bit integers from memory into dst using zeromask k
@@ -35169,7 +36977,8 @@ pub unsafe fn _mm256_mask_load_epi64(src: __m256i, k: __mmask8, mem_addr: *const
 #[target_feature(enable = "avx512f,avx512vl")]
 #[cfg_attr(test, assert_instr(vmovdqa64))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _mm256_maskz_load_epi64(k: __mmask8, mem_addr: *const i64) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm256_maskz_load_epi64(k: __mmask8, mem_addr: *const i64) -> __m256i {
     _mm256_mask_load_epi64(_mm256_setzero_si256(), k, mem_addr)
 }
 
@@ -35182,8 +36991,10 @@ pub unsafe fn _mm256_maskz_load_epi64(k: __mmask8, mem_addr: *const i64) -> __m2
 #[target_feature(enable = "avx512f,avx512vl")]
 #[cfg_attr(test, assert_instr(vmovaps))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _mm256_mask_load_ps(src: __m256, k: __mmask8, mem_addr: *const f32) -> __m256 {
-    transmute(loadaps_256(mem_addr, src.as_f32x8(), k))
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm256_mask_load_ps(src: __m256, k: __mmask8, mem_addr: *const f32) -> __m256 {
+    let mask = simd_select_bitmask(k, i32x8::splat(!0), i32x8::ZERO);
+    simd_masked_load!(SimdAlign::Vector, mask, mem_addr, src.as_f32x8()).as_m256()
 }
 
 /// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
@@ -35195,7 +37006,8 @@ pub unsafe fn _mm256_mask_load_ps(src: __m256, k: __mmask8, mem_addr: *const f32
 #[target_feature(enable = "avx512f,avx512vl")]
 #[cfg_attr(test, assert_instr(vmovaps))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _mm256_maskz_load_ps(k: __mmask8, mem_addr: *const f32) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm256_maskz_load_ps(k: __mmask8, mem_addr: *const f32) -> __m256 {
     _mm256_mask_load_ps(_mm256_setzero_ps(), k, mem_addr)
 }
 
@@ -35208,8 +37020,14 @@ pub unsafe fn _mm256_maskz_load_ps(k: __mmask8, mem_addr: *const f32) -> __m256
 #[target_feature(enable = "avx512f,avx512vl")]
 #[cfg_attr(test, assert_instr(vmovapd))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _mm256_mask_load_pd(src: __m256d, k: __mmask8, mem_addr: *const f64) -> __m256d {
-    transmute(loadapd_256(mem_addr, src.as_f64x4(), k))
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm256_mask_load_pd(
+    src: __m256d,
+    k: __mmask8,
+    mem_addr: *const f64,
+) -> __m256d {
+    let mask = simd_select_bitmask(k, i64x4::splat(!0), i64x4::ZERO);
+    simd_masked_load!(SimdAlign::Vector, mask, mem_addr, src.as_f64x4()).as_m256d()
 }
 
 /// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
@@ -35221,7 +37039,8 @@ pub unsafe fn _mm256_mask_load_pd(src: __m256d, k: __mmask8, mem_addr: *const f6
 #[target_feature(enable = "avx512f,avx512vl")]
 #[cfg_attr(test, assert_instr(vmovapd))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _mm256_maskz_load_pd(k: __mmask8, mem_addr: *const f64) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm256_maskz_load_pd(k: __mmask8, mem_addr: *const f64) -> __m256d {
     _mm256_mask_load_pd(_mm256_setzero_pd(), k, mem_addr)
 }
 
@@ -35234,8 +37053,14 @@ pub unsafe fn _mm256_maskz_load_pd(k: __mmask8, mem_addr: *const f64) -> __m256d
 #[target_feature(enable = "avx512f,avx512vl")]
 #[cfg_attr(test, assert_instr(vmovdqa32))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _mm_mask_load_epi32(src: __m128i, k: __mmask8, mem_addr: *const i32) -> __m128i {
-    transmute(loaddqa32_128(mem_addr, src.as_i32x4(), k))
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_mask_load_epi32(
+    src: __m128i,
+    k: __mmask8,
+    mem_addr: *const i32,
+) -> __m128i {
+    let mask = simd_select_bitmask(k, i32x4::splat(!0), i32x4::ZERO);
+    simd_masked_load!(SimdAlign::Vector, mask, mem_addr, src.as_i32x4()).as_m128i()
 }
 
 /// Load packed 32-bit integers from memory into dst using zeromask k
@@ -35247,7 +37072,8 @@ pub unsafe fn _mm_mask_load_epi32(src: __m128i, k: __mmask8, mem_addr: *const i3
 #[target_feature(enable = "avx512f,avx512vl")]
 #[cfg_attr(test, assert_instr(vmovdqa32))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _mm_maskz_load_epi32(k: __mmask8, mem_addr: *const i32) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_maskz_load_epi32(k: __mmask8, mem_addr: *const i32) -> __m128i {
     _mm_mask_load_epi32(_mm_setzero_si128(), k, mem_addr)
 }
 
@@ -35260,8 +37086,14 @@ pub unsafe fn _mm_maskz_load_epi32(k: __mmask8, mem_addr: *const i32) -> __m128i
 #[target_feature(enable = "avx512f,avx512vl")]
 #[cfg_attr(test, assert_instr(vmovdqa64))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _mm_mask_load_epi64(src: __m128i, k: __mmask8, mem_addr: *const i64) -> __m128i {
-    transmute(loaddqa64_128(mem_addr, src.as_i64x2(), k))
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_mask_load_epi64(
+    src: __m128i,
+    k: __mmask8,
+    mem_addr: *const i64,
+) -> __m128i {
+    let mask = simd_select_bitmask(k, i64x2::splat(!0), i64x2::ZERO);
+    simd_masked_load!(SimdAlign::Vector, mask, mem_addr, src.as_i64x2()).as_m128i()
 }
 
 /// Load packed 64-bit integers from memory into dst using zeromask k
@@ -35273,7 +37105,8 @@ pub unsafe fn _mm_mask_load_epi64(src: __m128i, k: __mmask8, mem_addr: *const i6
 #[target_feature(enable = "avx512f,avx512vl")]
 #[cfg_attr(test, assert_instr(vmovdqa64))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _mm_maskz_load_epi64(k: __mmask8, mem_addr: *const i64) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_maskz_load_epi64(k: __mmask8, mem_addr: *const i64) -> __m128i {
     _mm_mask_load_epi64(_mm_setzero_si128(), k, mem_addr)
 }
 
@@ -35286,8 +37119,10 @@ pub unsafe fn _mm_maskz_load_epi64(k: __mmask8, mem_addr: *const i64) -> __m128i
 #[target_feature(enable = "avx512f,avx512vl")]
 #[cfg_attr(test, assert_instr(vmovaps))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _mm_mask_load_ps(src: __m128, k: __mmask8, mem_addr: *const f32) -> __m128 {
-    transmute(loadaps_128(mem_addr, src.as_f32x4(), k))
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_mask_load_ps(src: __m128, k: __mmask8, mem_addr: *const f32) -> __m128 {
+    let mask = simd_select_bitmask(k, i32x4::splat(!0), i32x4::ZERO);
+    simd_masked_load!(SimdAlign::Vector, mask, mem_addr, src.as_f32x4()).as_m128()
 }
 
 /// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
@@ -35299,7 +37134,8 @@ pub unsafe fn _mm_mask_load_ps(src: __m128, k: __mmask8, mem_addr: *const f32) -
 #[target_feature(enable = "avx512f,avx512vl")]
 #[cfg_attr(test, assert_instr(vmovaps))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _mm_maskz_load_ps(k: __mmask8, mem_addr: *const f32) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_maskz_load_ps(k: __mmask8, mem_addr: *const f32) -> __m128 {
     _mm_mask_load_ps(_mm_setzero_ps(), k, mem_addr)
 }
 
@@ -35312,8 +37148,10 @@ pub unsafe fn _mm_maskz_load_ps(k: __mmask8, mem_addr: *const f32) -> __m128 {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[cfg_attr(test, assert_instr(vmovapd))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _mm_mask_load_pd(src: __m128d, k: __mmask8, mem_addr: *const f64) -> __m128d {
-    transmute(loadapd_128(mem_addr, src.as_f64x2(), k))
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_mask_load_pd(src: __m128d, k: __mmask8, mem_addr: *const f64) -> __m128d {
+    let mask = simd_select_bitmask(k, i64x2::splat(!0), i64x2::ZERO);
+    simd_masked_load!(SimdAlign::Vector, mask, mem_addr, src.as_f64x2()).as_m128d()
 }
 
 /// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
@@ -35325,7 +37163,8 @@ pub unsafe fn _mm_mask_load_pd(src: __m128d, k: __mmask8, mem_addr: *const f64)
 #[target_feature(enable = "avx512f,avx512vl")]
 #[cfg_attr(test, assert_instr(vmovapd))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _mm_maskz_load_pd(k: __mmask8, mem_addr: *const f64) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_maskz_load_pd(k: __mmask8, mem_addr: *const f64) -> __m128d {
     _mm_mask_load_pd(_mm_setzero_pd(), k, mem_addr)
 }
 
@@ -35425,8 +37264,10 @@ pub unsafe fn _mm_maskz_load_sd(k: __mmask8, mem_addr: *const f64) -> __m128d {
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vmovdqu32))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _mm512_mask_storeu_epi32(mem_addr: *mut i32, mask: __mmask16, a: __m512i) {
-    storedqu32_512(mem_addr, a.as_i32x16(), mask)
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm512_mask_storeu_epi32(mem_addr: *mut i32, mask: __mmask16, a: __m512i) {
+    let mask = simd_select_bitmask(mask, i32x16::splat(!0), i32x16::ZERO);
+    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i32x16());
 }
 
 /// Store packed 64-bit integers from a into memory using writemask k.
@@ -35437,8 +37278,10 @@ pub unsafe fn _mm512_mask_storeu_epi32(mem_addr: *mut i32, mask: __mmask16, a: _
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vmovdqu64))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _mm512_mask_storeu_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m512i) {
-    storedqu64_512(mem_addr, a.as_i64x8(), mask)
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm512_mask_storeu_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m512i) {
+    let mask = simd_select_bitmask(mask, i64x8::splat(!0), i64x8::ZERO);
+    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i64x8());
 }
 
 /// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
@@ -35449,8 +37292,10 @@ pub unsafe fn _mm512_mask_storeu_epi64(mem_addr: *mut i64, mask: __mmask8, a: __
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vmovups))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _mm512_mask_storeu_ps(mem_addr: *mut f32, mask: __mmask16, a: __m512) {
-    storeups_512(mem_addr, a.as_f32x16(), mask)
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm512_mask_storeu_ps(mem_addr: *mut f32, mask: __mmask16, a: __m512) {
+    let mask = simd_select_bitmask(mask, i32x16::splat(!0), i32x16::ZERO);
+    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_f32x16());
 }
 
 /// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
@@ -35461,8 +37306,10 @@ pub unsafe fn _mm512_mask_storeu_ps(mem_addr: *mut f32, mask: __mmask16, a: __m5
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vmovupd))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _mm512_mask_storeu_pd(mem_addr: *mut f64, mask: __mmask8, a: __m512d) {
-    storeupd_512(mem_addr, a.as_f64x8(), mask)
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm512_mask_storeu_pd(mem_addr: *mut f64, mask: __mmask8, a: __m512d) {
+    let mask = simd_select_bitmask(mask, i64x8::splat(!0), i64x8::ZERO);
+    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_f64x8());
 }
 
 /// Store packed 32-bit integers from a into memory using writemask k.
@@ -35473,8 +37320,10 @@ pub unsafe fn _mm512_mask_storeu_pd(mem_addr: *mut f64, mask: __mmask8, a: __m51
 #[target_feature(enable = "avx512f,avx512vl")]
 #[cfg_attr(test, assert_instr(vmovdqu32))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _mm256_mask_storeu_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m256i) {
-    storedqu32_256(mem_addr, a.as_i32x8(), mask)
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm256_mask_storeu_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m256i) {
+    let mask = simd_select_bitmask(mask, i32x8::splat(!0), i32x8::ZERO);
+    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i32x8());
 }
 
 /// Store packed 64-bit integers from a into memory using writemask k.
@@ -35485,8 +37334,10 @@ pub unsafe fn _mm256_mask_storeu_epi32(mem_addr: *mut i32, mask: __mmask8, a: __
 #[target_feature(enable = "avx512f,avx512vl")]
 #[cfg_attr(test, assert_instr(vmovdqu64))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _mm256_mask_storeu_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m256i) {
-    storedqu64_256(mem_addr, a.as_i64x4(), mask)
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm256_mask_storeu_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m256i) {
+    let mask = simd_select_bitmask(mask, i64x4::splat(!0), i64x4::ZERO);
+    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i64x4());
 }
 
 /// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
@@ -35497,8 +37348,10 @@ pub unsafe fn _mm256_mask_storeu_epi64(mem_addr: *mut i64, mask: __mmask8, a: __
 #[target_feature(enable = "avx512f,avx512vl")]
 #[cfg_attr(test, assert_instr(vmovups))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _mm256_mask_storeu_ps(mem_addr: *mut f32, mask: __mmask8, a: __m256) {
-    storeups_256(mem_addr, a.as_f32x8(), mask)
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm256_mask_storeu_ps(mem_addr: *mut f32, mask: __mmask8, a: __m256) {
+    let mask = simd_select_bitmask(mask, i32x8::splat(!0), i32x8::ZERO);
+    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_f32x8());
 }
 
 /// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
@@ -35509,8 +37362,10 @@ pub unsafe fn _mm256_mask_storeu_ps(mem_addr: *mut f32, mask: __mmask8, a: __m25
 #[target_feature(enable = "avx512f,avx512vl")]
 #[cfg_attr(test, assert_instr(vmovupd))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _mm256_mask_storeu_pd(mem_addr: *mut f64, mask: __mmask8, a: __m256d) {
-    storeupd_256(mem_addr, a.as_f64x4(), mask)
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm256_mask_storeu_pd(mem_addr: *mut f64, mask: __mmask8, a: __m256d) {
+    let mask = simd_select_bitmask(mask, i64x4::splat(!0), i64x4::ZERO);
+    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_f64x4());
 }
 
 /// Store packed 32-bit integers from a into memory using writemask k.
@@ -35521,8 +37376,10 @@ pub unsafe fn _mm256_mask_storeu_pd(mem_addr: *mut f64, mask: __mmask8, a: __m25
 #[target_feature(enable = "avx512f,avx512vl")]
 #[cfg_attr(test, assert_instr(vmovdqu32))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _mm_mask_storeu_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m128i) {
-    storedqu32_128(mem_addr, a.as_i32x4(), mask)
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_mask_storeu_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m128i) {
+    let mask = simd_select_bitmask(mask, i32x4::splat(!0), i32x4::ZERO);
+    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i32x4());
 }
 
 /// Store packed 64-bit integers from a into memory using writemask k.
@@ -35533,8 +37390,10 @@ pub unsafe fn _mm_mask_storeu_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m12
 #[target_feature(enable = "avx512f,avx512vl")]
 #[cfg_attr(test, assert_instr(vmovdqu64))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _mm_mask_storeu_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m128i) {
-    storedqu64_128(mem_addr, a.as_i64x2(), mask)
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_mask_storeu_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m128i) {
+    let mask = simd_select_bitmask(mask, i64x2::splat(!0), i64x2::ZERO);
+    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i64x2());
 }
 
 /// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
@@ -35545,8 +37404,10 @@ pub unsafe fn _mm_mask_storeu_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m12
 #[target_feature(enable = "avx512f,avx512vl")]
 #[cfg_attr(test, assert_instr(vmovups))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _mm_mask_storeu_ps(mem_addr: *mut f32, mask: __mmask8, a: __m128) {
-    storeups_128(mem_addr, a.as_f32x4(), mask)
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_mask_storeu_ps(mem_addr: *mut f32, mask: __mmask8, a: __m128) {
+    let mask = simd_select_bitmask(mask, i32x4::splat(!0), i32x4::ZERO);
+    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_f32x4());
 }
 
 /// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
@@ -35557,8 +37418,10 @@ pub unsafe fn _mm_mask_storeu_ps(mem_addr: *mut f32, mask: __mmask8, a: __m128)
 #[target_feature(enable = "avx512f,avx512vl")]
 #[cfg_attr(test, assert_instr(vmovupd))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _mm_mask_storeu_pd(mem_addr: *mut f64, mask: __mmask8, a: __m128d) {
-    storeupd_128(mem_addr, a.as_f64x2(), mask)
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_mask_storeu_pd(mem_addr: *mut f64, mask: __mmask8, a: __m128d) {
+    let mask = simd_select_bitmask(mask, i64x2::splat(!0), i64x2::ZERO);
+    simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_f64x2());
 }
 
 /// Store packed 32-bit integers from a into memory using writemask k.
@@ -35569,8 +37432,10 @@ pub unsafe fn _mm_mask_storeu_pd(mem_addr: *mut f64, mask: __mmask8, a: __m128d)
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vmovdqa32))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _mm512_mask_store_epi32(mem_addr: *mut i32, mask: __mmask16, a: __m512i) {
-    storedqa32_512(mem_addr, a.as_i32x16(), mask)
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm512_mask_store_epi32(mem_addr: *mut i32, mask: __mmask16, a: __m512i) {
+    let mask = simd_select_bitmask(mask, i32x16::splat(!0), i32x16::ZERO);
+    simd_masked_store!(SimdAlign::Vector, mask, mem_addr, a.as_i32x16());
 }
 
 /// Store packed 64-bit integers from a into memory using writemask k.
@@ -35581,8 +37446,10 @@ pub unsafe fn _mm512_mask_store_epi32(mem_addr: *mut i32, mask: __mmask16, a: __
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vmovdqa64))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _mm512_mask_store_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m512i) {
-    storedqa64_512(mem_addr, a.as_i64x8(), mask)
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm512_mask_store_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m512i) {
+    let mask = simd_select_bitmask(mask, i64x8::splat(!0), i64x8::ZERO);
+    simd_masked_store!(SimdAlign::Vector, mask, mem_addr, a.as_i64x8());
 }
 
 /// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
@@ -35593,8 +37460,10 @@ pub unsafe fn _mm512_mask_store_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vmovaps))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _mm512_mask_store_ps(mem_addr: *mut f32, mask: __mmask16, a: __m512) {
-    storeaps_512(mem_addr, a.as_f32x16(), mask)
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm512_mask_store_ps(mem_addr: *mut f32, mask: __mmask16, a: __m512) {
+    let mask = simd_select_bitmask(mask, i32x16::splat(!0), i32x16::ZERO);
+    simd_masked_store!(SimdAlign::Vector, mask, mem_addr, a.as_f32x16());
 }
 
 /// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
@@ -35605,8 +37474,10 @@ pub unsafe fn _mm512_mask_store_ps(mem_addr: *mut f32, mask: __mmask16, a: __m51
 #[target_feature(enable = "avx512f")]
 #[cfg_attr(test, assert_instr(vmovapd))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _mm512_mask_store_pd(mem_addr: *mut f64, mask: __mmask8, a: __m512d) {
-    storeapd_512(mem_addr, a.as_f64x8(), mask)
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm512_mask_store_pd(mem_addr: *mut f64, mask: __mmask8, a: __m512d) {
+    let mask = simd_select_bitmask(mask, i64x8::splat(!0), i64x8::ZERO);
+    simd_masked_store!(SimdAlign::Vector, mask, mem_addr, a.as_f64x8());
 }
 
 /// Store packed 32-bit integers from a into memory using writemask k.
@@ -35617,8 +37488,10 @@ pub unsafe fn _mm512_mask_store_pd(mem_addr: *mut f64, mask: __mmask8, a: __m512
 #[target_feature(enable = "avx512f,avx512vl")]
 #[cfg_attr(test, assert_instr(vmovdqa32))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _mm256_mask_store_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m256i) {
-    storedqa32_256(mem_addr, a.as_i32x8(), mask)
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm256_mask_store_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m256i) {
+    let mask = simd_select_bitmask(mask, i32x8::splat(!0), i32x8::ZERO);
+    simd_masked_store!(SimdAlign::Vector, mask, mem_addr, a.as_i32x8());
 }
 
 /// Store packed 64-bit integers from a into memory using writemask k.
@@ -35629,8 +37502,10 @@ pub unsafe fn _mm256_mask_store_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m
 #[target_feature(enable = "avx512f,avx512vl")]
 #[cfg_attr(test, assert_instr(vmovdqa64))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _mm256_mask_store_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m256i) {
-    storedqa64_256(mem_addr, a.as_i64x4(), mask)
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm256_mask_store_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m256i) {
+    let mask = simd_select_bitmask(mask, i64x4::splat(!0), i64x4::ZERO);
+    simd_masked_store!(SimdAlign::Vector, mask, mem_addr, a.as_i64x4());
 }
 
 /// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
@@ -35641,8 +37516,10 @@ pub unsafe fn _mm256_mask_store_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m
 #[target_feature(enable = "avx512f,avx512vl")]
 #[cfg_attr(test, assert_instr(vmovaps))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _mm256_mask_store_ps(mem_addr: *mut f32, mask: __mmask8, a: __m256) {
-    storeaps_256(mem_addr, a.as_f32x8(), mask)
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm256_mask_store_ps(mem_addr: *mut f32, mask: __mmask8, a: __m256) {
+    let mask = simd_select_bitmask(mask, i32x8::splat(!0), i32x8::ZERO);
+    simd_masked_store!(SimdAlign::Vector, mask, mem_addr, a.as_f32x8());
 }
 
 /// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
@@ -35653,8 +37530,10 @@ pub unsafe fn _mm256_mask_store_ps(mem_addr: *mut f32, mask: __mmask8, a: __m256
 #[target_feature(enable = "avx512f,avx512vl")]
 #[cfg_attr(test, assert_instr(vmovapd))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _mm256_mask_store_pd(mem_addr: *mut f64, mask: __mmask8, a: __m256d) {
-    storeapd_256(mem_addr, a.as_f64x4(), mask)
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm256_mask_store_pd(mem_addr: *mut f64, mask: __mmask8, a: __m256d) {
+    let mask = simd_select_bitmask(mask, i64x4::splat(!0), i64x4::ZERO);
+    simd_masked_store!(SimdAlign::Vector, mask, mem_addr, a.as_f64x4());
 }
 
 /// Store packed 32-bit integers from a into memory using writemask k.
@@ -35665,8 +37544,10 @@ pub unsafe fn _mm256_mask_store_pd(mem_addr: *mut f64, mask: __mmask8, a: __m256
 #[target_feature(enable = "avx512f,avx512vl")]
 #[cfg_attr(test, assert_instr(vmovdqa32))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _mm_mask_store_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m128i) {
-    storedqa32_128(mem_addr, a.as_i32x4(), mask)
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_mask_store_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m128i) {
+    let mask = simd_select_bitmask(mask, i32x4::splat(!0), i32x4::ZERO);
+    simd_masked_store!(SimdAlign::Vector, mask, mem_addr, a.as_i32x4());
 }
 
 /// Store packed 64-bit integers from a into memory using writemask k.
@@ -35677,8 +37558,10 @@ pub unsafe fn _mm_mask_store_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m128
 #[target_feature(enable = "avx512f,avx512vl")]
 #[cfg_attr(test, assert_instr(vmovdqa64))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _mm_mask_store_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m128i) {
-    storedqa64_128(mem_addr, a.as_i64x2(), mask)
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_mask_store_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m128i) {
+    let mask = simd_select_bitmask(mask, i64x2::splat(!0), i64x2::ZERO);
+    simd_masked_store!(SimdAlign::Vector, mask, mem_addr, a.as_i64x2());
 }
 
 /// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
@@ -35689,8 +37572,10 @@ pub unsafe fn _mm_mask_store_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m128
 #[target_feature(enable = "avx512f,avx512vl")]
 #[cfg_attr(test, assert_instr(vmovaps))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _mm_mask_store_ps(mem_addr: *mut f32, mask: __mmask8, a: __m128) {
-    storeaps_128(mem_addr, a.as_f32x4(), mask)
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_mask_store_ps(mem_addr: *mut f32, mask: __mmask8, a: __m128) {
+    let mask = simd_select_bitmask(mask, i32x4::splat(!0), i32x4::ZERO);
+    simd_masked_store!(SimdAlign::Vector, mask, mem_addr, a.as_f32x4());
 }
 
 /// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
@@ -35701,8 +37586,10 @@ pub unsafe fn _mm_mask_store_ps(mem_addr: *mut f32, mask: __mmask8, a: __m128) {
 #[target_feature(enable = "avx512f,avx512vl")]
 #[cfg_attr(test, assert_instr(vmovapd))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub unsafe fn _mm_mask_store_pd(mem_addr: *mut f64, mask: __mmask8, a: __m128d) {
-    storeapd_128(mem_addr, a.as_f64x2(), mask)
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_mask_store_pd(mem_addr: *mut f64, mask: __mmask8, a: __m128d) {
+    let mask = simd_select_bitmask(mask, i64x2::splat(!0), i64x2::ZERO);
+    simd_masked_store!(SimdAlign::Vector, mask, mem_addr, a.as_f64x2());
 }
 
 /// Store a single-precision (32-bit) floating-point element from a into memory using writemask k. mem_addr
@@ -36047,7 +37934,8 @@ pub unsafe fn _mm_maskz_expandloadu_pd(k: __mmask8, mem_addr: *const f64) -> __m
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_setr_pd(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_setr_pd(
     e0: f64,
     e1: f64,
     e2: f64,
@@ -36069,7 +37957,8 @@ pub fn _mm512_setr_pd(
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _mm512_set_pd(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_set_pd(
     e0: f64,
     e1: f64,
     e2: f64,
@@ -36089,7 +37978,8 @@ pub fn _mm512_set_pd(
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovss))]
-pub fn _mm_mask_move_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_move_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
     unsafe {
         let extractsrc: f32 = simd_extract!(src, 0);
         let mut mov: f32 = extractsrc;
@@ -36107,7 +37997,8 @@ pub fn _mm_mask_move_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m12
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovss))]
-pub fn _mm_maskz_move_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_move_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
     unsafe {
         let mut mov: f32 = 0.;
         if (k & 0b00000001) != 0 {
@@ -36124,7 +38015,8 @@ pub fn _mm_maskz_move_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovsd))]
-pub fn _mm_mask_move_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_move_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
     unsafe {
         let extractsrc: f64 = simd_extract!(src, 0);
         let mut mov: f64 = extractsrc;
@@ -36142,7 +38034,8 @@ pub fn _mm_mask_move_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmovsd))]
-pub fn _mm_maskz_move_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_move_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
     unsafe {
         let mut mov: f64 = 0.;
         if (k & 0b00000001) != 0 {
@@ -36159,7 +38052,8 @@ pub fn _mm_maskz_move_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vaddss))]
-pub fn _mm_mask_add_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_add_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
     unsafe {
         let extractsrc: f32 = simd_extract!(src, 0);
         let mut add: f32 = extractsrc;
@@ -36179,7 +38073,8 @@ pub fn _mm_mask_add_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vaddss))]
-pub fn _mm_maskz_add_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_add_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
     unsafe {
         let mut add: f32 = 0.;
         if (k & 0b00000001) != 0 {
@@ -36198,7 +38093,8 @@ pub fn _mm_maskz_add_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vaddsd))]
-pub fn _mm_mask_add_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_add_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
     unsafe {
         let extractsrc: f64 = simd_extract!(src, 0);
         let mut add: f64 = extractsrc;
@@ -36218,7 +38114,8 @@ pub fn _mm_mask_add_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vaddsd))]
-pub fn _mm_maskz_add_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_add_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
     unsafe {
         let mut add: f64 = 0.;
         if (k & 0b00000001) != 0 {
@@ -36237,7 +38134,8 @@ pub fn _mm_maskz_add_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vsubss))]
-pub fn _mm_mask_sub_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_sub_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
     unsafe {
         let extractsrc: f32 = simd_extract!(src, 0);
         let mut add: f32 = extractsrc;
@@ -36257,7 +38155,8 @@ pub fn _mm_mask_sub_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vsubss))]
-pub fn _mm_maskz_sub_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_sub_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
     unsafe {
         let mut add: f32 = 0.;
         if (k & 0b00000001) != 0 {
@@ -36276,7 +38175,8 @@ pub fn _mm_maskz_sub_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vsubsd))]
-pub fn _mm_mask_sub_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_sub_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
     unsafe {
         let extractsrc: f64 = simd_extract!(src, 0);
         let mut add: f64 = extractsrc;
@@ -36296,7 +38196,8 @@ pub fn _mm_mask_sub_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vsubsd))]
-pub fn _mm_maskz_sub_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_sub_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
     unsafe {
         let mut add: f64 = 0.;
         if (k & 0b00000001) != 0 {
@@ -36315,7 +38216,8 @@ pub fn _mm_maskz_sub_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmulss))]
-pub fn _mm_mask_mul_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_mul_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
     unsafe {
         let extractsrc: f32 = simd_extract!(src, 0);
         let mut add: f32 = extractsrc;
@@ -36335,7 +38237,8 @@ pub fn _mm_mask_mul_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmulss))]
-pub fn _mm_maskz_mul_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_mul_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
     unsafe {
         let mut add: f32 = 0.;
         if (k & 0b00000001) != 0 {
@@ -36354,7 +38257,8 @@ pub fn _mm_maskz_mul_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmulsd))]
-pub fn _mm_mask_mul_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_mul_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
     unsafe {
         let extractsrc: f64 = simd_extract!(src, 0);
         let mut add: f64 = extractsrc;
@@ -36374,7 +38278,8 @@ pub fn _mm_mask_mul_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vmulsd))]
-pub fn _mm_maskz_mul_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_mul_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
     unsafe {
         let mut add: f64 = 0.;
         if (k & 0b00000001) != 0 {
@@ -36393,7 +38298,8 @@ pub fn _mm_maskz_mul_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vdivss))]
-pub fn _mm_mask_div_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_div_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
     unsafe {
         let extractsrc: f32 = simd_extract!(src, 0);
         let mut add: f32 = extractsrc;
@@ -36413,7 +38319,8 @@ pub fn _mm_mask_div_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vdivss))]
-pub fn _mm_maskz_div_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_div_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
     unsafe {
         let mut add: f32 = 0.;
         if (k & 0b00000001) != 0 {
@@ -36432,7 +38339,8 @@ pub fn _mm_maskz_div_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vdivsd))]
-pub fn _mm_mask_div_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_div_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
     unsafe {
         let extractsrc: f64 = simd_extract!(src, 0);
         let mut add: f64 = extractsrc;
@@ -36452,7 +38360,8 @@ pub fn _mm_mask_div_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vdivsd))]
-pub fn _mm_maskz_div_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_div_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
     unsafe {
         let mut add: f64 = 0.;
         if (k & 0b00000001) != 0 {
@@ -37436,7 +39345,8 @@ pub fn _mm_maskz_scalef_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfmadd))]
-pub fn _mm_mask_fmadd_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_fmadd_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
     unsafe {
         let mut fmadd: f32 = simd_extract!(a, 0);
         if (k & 0b00000001) != 0 {
@@ -37455,7 +39365,8 @@ pub fn _mm_mask_fmadd_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfmadd))]
-pub fn _mm_maskz_fmadd_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_fmadd_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
     unsafe {
         let mut fmadd: f32 = 0.;
         if (k & 0b00000001) != 0 {
@@ -37475,7 +39386,8 @@ pub fn _mm_maskz_fmadd_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m12
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfmadd))]
-pub fn _mm_mask3_fmadd_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask3_fmadd_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
     unsafe {
         let mut fmadd: f32 = simd_extract!(c, 0);
         if (k & 0b00000001) != 0 {
@@ -37494,7 +39406,8 @@ pub fn _mm_mask3_fmadd_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m12
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfmadd))]
-pub fn _mm_mask_fmadd_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_fmadd_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
     unsafe {
         let mut fmadd: f64 = simd_extract!(a, 0);
         if (k & 0b00000001) != 0 {
@@ -37513,7 +39426,8 @@ pub fn _mm_mask_fmadd_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfmadd))]
-pub fn _mm_maskz_fmadd_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_fmadd_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
     unsafe {
         let mut fmadd: f64 = 0.;
         if (k & 0b00000001) != 0 {
@@ -37533,7 +39447,8 @@ pub fn _mm_maskz_fmadd_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfmadd))]
-pub fn _mm_mask3_fmadd_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask3_fmadd_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
     unsafe {
         let mut fmadd: f64 = simd_extract!(c, 0);
         if (k & 0b00000001) != 0 {
@@ -37552,7 +39467,8 @@ pub fn _mm_mask3_fmadd_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfmsub))]
-pub fn _mm_mask_fmsub_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_fmsub_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
     unsafe {
         let mut fmsub: f32 = simd_extract!(a, 0);
         if (k & 0b00000001) != 0 {
@@ -37572,7 +39488,8 @@ pub fn _mm_mask_fmsub_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfmsub))]
-pub fn _mm_maskz_fmsub_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_fmsub_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
     unsafe {
         let mut fmsub: f32 = 0.;
         if (k & 0b00000001) != 0 {
@@ -37593,7 +39510,8 @@ pub fn _mm_maskz_fmsub_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m12
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfmsub))]
-pub fn _mm_mask3_fmsub_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask3_fmsub_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
     unsafe {
         let mut fmsub: f32 = simd_extract!(c, 0);
         if (k & 0b00000001) != 0 {
@@ -37613,7 +39531,8 @@ pub fn _mm_mask3_fmsub_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m12
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfmsub))]
-pub fn _mm_mask_fmsub_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_fmsub_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
     unsafe {
         let mut fmsub: f64 = simd_extract!(a, 0);
         if (k & 0b00000001) != 0 {
@@ -37633,7 +39552,8 @@ pub fn _mm_mask_fmsub_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfmsub))]
-pub fn _mm_maskz_fmsub_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_fmsub_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
     unsafe {
         let mut fmsub: f64 = 0.;
         if (k & 0b00000001) != 0 {
@@ -37654,7 +39574,8 @@ pub fn _mm_maskz_fmsub_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfmsub))]
-pub fn _mm_mask3_fmsub_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask3_fmsub_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
     unsafe {
         let mut fmsub: f64 = simd_extract!(c, 0);
         if (k & 0b00000001) != 0 {
@@ -37674,7 +39595,8 @@ pub fn _mm_mask3_fmsub_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfnmadd))]
-pub fn _mm_mask_fnmadd_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_fnmadd_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
     unsafe {
         let mut fnmadd: f32 = simd_extract!(a, 0);
         if (k & 0b00000001) != 0 {
@@ -37694,7 +39616,8 @@ pub fn _mm_mask_fnmadd_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m12
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfnmadd))]
-pub fn _mm_maskz_fnmadd_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_fnmadd_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
     unsafe {
         let mut fnmadd: f32 = 0.;
         if (k & 0b00000001) != 0 {
@@ -37715,7 +39638,8 @@ pub fn _mm_maskz_fnmadd_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m1
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfnmadd))]
-pub fn _mm_mask3_fnmadd_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask3_fnmadd_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
     unsafe {
         let mut fnmadd: f32 = simd_extract!(c, 0);
         if (k & 0b00000001) != 0 {
@@ -37735,7 +39659,8 @@ pub fn _mm_mask3_fnmadd_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m1
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfnmadd))]
-pub fn _mm_mask_fnmadd_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_fnmadd_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
     unsafe {
         let mut fnmadd: f64 = simd_extract!(a, 0);
         if (k & 0b00000001) != 0 {
@@ -37755,7 +39680,8 @@ pub fn _mm_mask_fnmadd_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfnmadd))]
-pub fn _mm_maskz_fnmadd_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_fnmadd_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
     unsafe {
         let mut fnmadd: f64 = 0.;
         if (k & 0b00000001) != 0 {
@@ -37776,7 +39702,8 @@ pub fn _mm_maskz_fnmadd_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> _
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfnmadd))]
-pub fn _mm_mask3_fnmadd_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask3_fnmadd_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
     unsafe {
         let mut fnmadd: f64 = simd_extract!(c, 0);
         if (k & 0b00000001) != 0 {
@@ -37796,7 +39723,8 @@ pub fn _mm_mask3_fnmadd_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> _
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfnmsub))]
-pub fn _mm_mask_fnmsub_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_fnmsub_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
     unsafe {
         let mut fnmsub: f32 = simd_extract!(a, 0);
         if (k & 0b00000001) != 0 {
@@ -37817,7 +39745,8 @@ pub fn _mm_mask_fnmsub_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m12
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfnmsub))]
-pub fn _mm_maskz_fnmsub_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_fnmsub_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
     unsafe {
         let mut fnmsub: f32 = 0.;
         if (k & 0b00000001) != 0 {
@@ -37839,7 +39768,8 @@ pub fn _mm_maskz_fnmsub_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m1
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfnmsub))]
-pub fn _mm_mask3_fnmsub_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask3_fnmsub_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
     unsafe {
         let mut fnmsub: f32 = simd_extract!(c, 0);
         if (k & 0b00000001) != 0 {
@@ -37860,7 +39790,8 @@ pub fn _mm_mask3_fnmsub_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m1
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfnmsub))]
-pub fn _mm_mask_fnmsub_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_fnmsub_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
     unsafe {
         let mut fnmsub: f64 = simd_extract!(a, 0);
         if (k & 0b00000001) != 0 {
@@ -37881,7 +39812,8 @@ pub fn _mm_mask_fnmsub_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfnmsub))]
-pub fn _mm_maskz_fnmsub_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_fnmsub_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
     unsafe {
         let mut fnmsub: f64 = 0.;
         if (k & 0b00000001) != 0 {
@@ -37903,7 +39835,8 @@ pub fn _mm_maskz_fnmsub_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> _
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vfnmsub))]
-pub fn _mm_mask3_fnmsub_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask3_fnmsub_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
     unsafe {
         let mut fnmsub: f64 = simd_extract!(c, 0);
         if (k & 0b00000001) != 0 {
@@ -40700,7 +42633,7 @@ pub fn _mm_mask3_fnmsub_round_sd<const ROUNDING: i32>(
     }
 }
 
-/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.
+/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst, and copy the upper 3 packed elements from b to the upper elements of dst. imm8 is used to set the required flags reporting.
 ///
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fixupimm_ss&expand=2517)
 #[inline]
@@ -40716,12 +42649,12 @@ pub fn _mm_fixupimm_ss<const IMM8: i32>(a: __m128, b: __m128, c: __m128i) -> __m
         let c = c.as_i32x4();
         let r = vfixupimmss(a, b, c, IMM8, 0b11111111, _MM_FROUND_CUR_DIRECTION);
         let fixupimm: f32 = simd_extract!(r, 0);
-        let r = simd_insert!(a, 0, fixupimm);
+        let r = simd_insert!(b, 0, fixupimm);
         transmute(r)
     }
 }
 
-/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.
+/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from b to the upper elements of dst. imm8 is used to set the required flags reporting.
 ///
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fixupimm_ss&expand=2518)
 #[inline]
@@ -40742,12 +42675,12 @@ pub fn _mm_mask_fixupimm_ss<const IMM8: i32>(
         let c = c.as_i32x4();
         let fixupimm = vfixupimmss(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION);
         let fixupimm: f32 = simd_extract!(fixupimm, 0);
-        let r = simd_insert!(a, 0, fixupimm);
+        let r = simd_insert!(b, 0, fixupimm);
         transmute(r)
     }
 }
 
-/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.
+/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from b to the upper elements of dst. imm8 is used to set the required flags reporting.
 ///
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fixupimm_ss&expand=2519)
 #[inline]
@@ -40768,12 +42701,12 @@ pub fn _mm_maskz_fixupimm_ss<const IMM8: i32>(
         let c = c.as_i32x4();
         let fixupimm = vfixupimmssz(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION);
         let fixupimm: f32 = simd_extract!(fixupimm, 0);
-        let r = simd_insert!(a, 0, fixupimm);
+        let r = simd_insert!(b, 0, fixupimm);
         transmute(r)
     }
 }
 
-/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.
+/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst, and copy the upper element from b to the upper element of dst. imm8 is used to set the required flags reporting.
 ///
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fixupimm_sd&expand=2514)
 #[inline]
@@ -40789,12 +42722,12 @@ pub fn _mm_fixupimm_sd<const IMM8: i32>(a: __m128d, b: __m128d, c: __m128i) -> _
         let c = c.as_i64x2();
         let fixupimm = vfixupimmsd(a, b, c, IMM8, 0b11111111, _MM_FROUND_CUR_DIRECTION);
         let fixupimm: f64 = simd_extract!(fixupimm, 0);
-        let r = simd_insert!(a, 0, fixupimm);
+        let r = simd_insert!(b, 0, fixupimm);
         transmute(r)
     }
 }
 
-/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.
+/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from b to the upper element of dst. imm8 is used to set the required flags reporting.
 ///
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fixupimm_sd&expand=2515)
 #[inline]
@@ -40815,12 +42748,12 @@ pub fn _mm_mask_fixupimm_sd<const IMM8: i32>(
         let c = c.as_i64x2();
         let fixupimm = vfixupimmsd(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION);
         let fixupimm: f64 = simd_extract!(fixupimm, 0);
-        let r = simd_insert!(a, 0, fixupimm);
+        let r = simd_insert!(b, 0, fixupimm);
         transmute(r)
     }
 }
 
-/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.
+/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from b to the upper element of dst. imm8 is used to set the required flags reporting.
 ///
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fixupimm_sd&expand=2516)
 #[inline]
@@ -40841,12 +42774,12 @@ pub fn _mm_maskz_fixupimm_sd<const IMM8: i32>(
         let c = c.as_i64x2();
         let fixupimm = vfixupimmsdz(a, b, c, IMM8, k, _MM_FROUND_CUR_DIRECTION);
         let fixupimm: f64 = simd_extract!(fixupimm, 0);
-        let r = simd_insert!(a, 0, fixupimm);
+        let r = simd_insert!(b, 0, fixupimm);
         transmute(r)
     }
 }
 
-/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.\
+/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst, and copy the upper 3 packed elements from b to the upper elements of dst. imm8 is used to set the required flags reporting.\
 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
 ///
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fixupimm_round_ss&expand=2511)
@@ -40868,12 +42801,12 @@ pub fn _mm_fixupimm_round_ss<const IMM8: i32, const SAE: i32>(
         let c = c.as_i32x4();
         let r = vfixupimmss(a, b, c, IMM8, 0b11111111, SAE);
         let fixupimm: f32 = simd_extract!(r, 0);
-        let r = simd_insert!(a, 0, fixupimm);
+        let r = simd_insert!(b, 0, fixupimm);
         transmute(r)
     }
 }
 
-/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.\
+/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from b to the upper elements of dst. imm8 is used to set the required flags reporting.\
 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
 ///
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fixupimm_round_ss&expand=2512)
@@ -40896,12 +42829,12 @@ pub fn _mm_mask_fixupimm_round_ss<const IMM8: i32, const SAE: i32>(
         let c = c.as_i32x4();
         let r = vfixupimmss(a, b, c, IMM8, k, SAE);
         let fixupimm: f32 = simd_extract!(r, 0);
-        let r = simd_insert!(a, 0, fixupimm);
+        let r = simd_insert!(b, 0, fixupimm);
         transmute(r)
     }
 }
 
-/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.\
+/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from b to the upper elements of dst. imm8 is used to set the required flags reporting.\
 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
 ///
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fixupimm_round_ss&expand=2513)
@@ -40924,12 +42857,12 @@ pub fn _mm_maskz_fixupimm_round_ss<const IMM8: i32, const SAE: i32>(
         let c = c.as_i32x4();
         let r = vfixupimmssz(a, b, c, IMM8, k, SAE);
         let fixupimm: f32 = simd_extract!(r, 0);
-        let r = simd_insert!(a, 0, fixupimm);
+        let r = simd_insert!(b, 0, fixupimm);
         transmute(r)
     }
 }
 
-/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.\
+/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst, and copy the upper element from b to the upper element of dst. imm8 is used to set the required flags reporting.\
 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
 ///
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fixupimm_round_sd&expand=2508)
@@ -40951,12 +42884,12 @@ pub fn _mm_fixupimm_round_sd<const IMM8: i32, const SAE: i32>(
         let c = c.as_i64x2();
         let r = vfixupimmsd(a, b, c, IMM8, 0b11111111, SAE);
         let fixupimm: f64 = simd_extract!(r, 0);
-        let r = simd_insert!(a, 0, fixupimm);
+        let r = simd_insert!(b, 0, fixupimm);
         transmute(r)
     }
 }
 
-/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.\
+/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from b to the upper element of dst. imm8 is used to set the required flags reporting.\
 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
 ///
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fixupimm_round_sd&expand=2509)
@@ -40979,12 +42912,12 @@ pub fn _mm_mask_fixupimm_round_sd<const IMM8: i32, const SAE: i32>(
         let c = c.as_i64x2();
         let r = vfixupimmsd(a, b, c, IMM8, k, SAE);
         let fixupimm: f64 = simd_extract!(r, 0);
-        let r = simd_insert!(a, 0, fixupimm);
+        let r = simd_insert!(b, 0, fixupimm);
         transmute(r)
     }
 }
 
-/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.\
+/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from b to the upper element of dst. imm8 is used to set the required flags reporting.\
 /// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
 ///
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fixupimm_round_sd&expand=2510)
@@ -41007,7 +42940,7 @@ pub fn _mm_maskz_fixupimm_round_sd<const IMM8: i32, const SAE: i32>(
         let c = c.as_i64x2();
         let r = vfixupimmsdz(a, b, c, IMM8, k, SAE);
         let fixupimm: f64 = simd_extract!(r, 0);
-        let r = simd_insert!(a, 0, fixupimm);
+        let r = simd_insert!(b, 0, fixupimm);
         transmute(r)
     }
 }
@@ -41483,7 +43416,8 @@ pub fn _mm_cvt_roundu32_ss<const ROUNDING: i32>(a: __m128, b: u32) -> __m128 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vcvtsi2ss))]
-pub fn _mm_cvti32_ss(a: __m128, b: i32) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cvti32_ss(a: __m128, b: i32) -> __m128 {
     unsafe {
         let b = b as f32;
         simd_insert!(a, 0, b)
@@ -41497,7 +43431,8 @@ pub fn _mm_cvti32_ss(a: __m128, b: i32) -> __m128 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vcvtsi2sd))]
-pub fn _mm_cvti32_sd(a: __m128d, b: i32) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cvti32_sd(a: __m128d, b: i32) -> __m128d {
     unsafe {
         let b = b as f64;
         simd_insert!(a, 0, b)
@@ -41657,7 +43592,8 @@ pub fn _mm_cvttsd_u32(a: __m128d) -> u32 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vcvtusi2ss))]
-pub fn _mm_cvtu32_ss(a: __m128, b: u32) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cvtu32_ss(a: __m128, b: u32) -> __m128 {
     unsafe {
         let b = b as f32;
         simd_insert!(a, 0, b)
@@ -41671,7 +43607,8 @@ pub fn _mm_cvtu32_ss(a: __m128, b: u32) -> __m128 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vcvtusi2sd))]
-pub fn _mm_cvtu32_sd(a: __m128d, b: u32) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cvtu32_sd(a: __m128d, b: u32) -> __m128d {
     unsafe {
         let b = b as f64;
         simd_insert!(a, 0, b)
@@ -42459,9 +44396,13 @@ unsafe extern "C" {
 
     #[link_name = "llvm.x86.avx512.mask.cvtps2pd.512"]
     fn vcvtps2pd(a: f32x8, src: f64x8, mask: u8, sae: i32) -> f64x8;
+    #[link_name = "llvm.x86.avx512.mask.cvtpd2ps"]
+    fn vcvtpd2ps128(a: f64x2, src: f32x4, mask: u8) -> f32x4;
     #[link_name = "llvm.x86.avx512.mask.cvtpd2ps.512"]
     fn vcvtpd2ps(a: f64x8, src: f32x8, mask: u8, rounding: i32) -> f32x8;
 
+    #[link_name = "llvm.x86.avx512.mask.cvtpd2dq.128"]
+    fn vcvtpd2dq128(a: f64x2, src: i32x4, k: u8) -> i32x4;
     #[link_name = "llvm.x86.avx512.mask.cvtpd2dq.512"]
     fn vcvtpd2dq(a: f64x8, src: i32x8, mask: u8, rounding: i32) -> i32x8;
 
@@ -42489,10 +44430,6 @@ unsafe extern "C" {
 
     #[link_name = "llvm.x86.avx512.mask.cvttps2dq.512"]
     fn vcvttps2dq(a: f32x16, src: i32x16, mask: u16, rounding: i32) -> i32x16;
-    #[link_name = "llvm.x86.avx512.mask.cvttps2dq.256"]
-    fn vcvttps2dq256(a: f32x8, src: i32x8, mask: u8) -> i32x8;
-    #[link_name = "llvm.x86.avx512.mask.cvttps2dq.128"]
-    fn vcvttps2dq128(a: f32x4, src: i32x4, mask: u8) -> i32x4;
 
     #[link_name = "llvm.x86.avx512.mask.cvttps2udq.512"]
     fn vcvttps2udq(a: f32x16, src: u32x16, mask: u16, rounding: i32) -> u32x16;
@@ -42503,8 +44440,6 @@ unsafe extern "C" {
 
     #[link_name = "llvm.x86.avx512.mask.cvttpd2dq.512"]
     fn vcvttpd2dq(a: f64x8, src: i32x8, mask: u8, rounding: i32) -> i32x8;
-    #[link_name = "llvm.x86.avx512.mask.cvttpd2dq.256"]
-    fn vcvttpd2dq256(a: f64x4, src: i32x4, mask: u8) -> i32x4;
     #[link_name = "llvm.x86.avx512.mask.cvttpd2dq.128"]
     fn vcvttpd2dq128(a: f64x2, src: i32x4, mask: u8) -> i32x4;
 
@@ -42833,15 +44768,6 @@ unsafe extern "C" {
     #[link_name = "llvm.x86.avx512.mask.cmp.pd.128"]
     fn vcmppd128(a: f64x2, b: f64x2, op: i32, m: i8) -> i8;
 
-    #[link_name = "llvm.x86.avx512.psllv.d.512"]
-    fn vpsllvd(a: i32x16, b: i32x16) -> i32x16;
-    #[link_name = "llvm.x86.avx512.psrlv.d.512"]
-    fn vpsrlvd(a: i32x16, b: i32x16) -> i32x16;
-    #[link_name = "llvm.x86.avx512.psllv.q.512"]
-    fn vpsllvq(a: i64x8, b: i64x8) -> i64x8;
-    #[link_name = "llvm.x86.avx512.psrlv.q.512"]
-    fn vpsrlvq(a: i64x8, b: i64x8) -> i64x8;
-
     #[link_name = "llvm.x86.avx512.psll.d.512"]
     fn vpslld(a: i32x16, count: i32x4) -> i32x16;
     #[link_name = "llvm.x86.avx512.psrl.d.512"]
@@ -42861,16 +44787,6 @@ unsafe extern "C" {
     #[link_name = "llvm.x86.avx512.psra.q.128"]
     fn vpsraq128(a: i64x2, count: i64x2) -> i64x2;
 
-    #[link_name = "llvm.x86.avx512.psrav.d.512"]
-    fn vpsravd(a: i32x16, count: i32x16) -> i32x16;
-
-    #[link_name = "llvm.x86.avx512.psrav.q.512"]
-    fn vpsravq(a: i64x8, count: i64x8) -> i64x8;
-    #[link_name = "llvm.x86.avx512.psrav.q.256"]
-    fn vpsravq256(a: i64x4, count: i64x4) -> i64x4;
-    #[link_name = "llvm.x86.avx512.psrav.q.128"]
-    fn vpsravq128(a: i64x2, count: i64x2) -> i64x2;
-
     #[link_name = "llvm.x86.avx512.vpermilvar.ps.512"]
     fn vpermilps(a: f32x16, b: i32x16) -> f32x16;
     #[link_name = "llvm.x86.avx512.vpermilvar.pd.512"]
@@ -43109,106 +45025,6 @@ unsafe extern "C" {
     #[link_name = "llvm.x86.avx512.vcomi.sd"]
     fn vcomisd(a: f64x2, b: f64x2, imm8: i32, sae: i32) -> i32;
 
-    #[link_name = "llvm.x86.avx512.mask.loadu.d.128"]
-    fn loaddqu32_128(mem_addr: *const i32, a: i32x4, mask: u8) -> i32x4;
-    #[link_name = "llvm.x86.avx512.mask.loadu.q.128"]
-    fn loaddqu64_128(mem_addr: *const i64, a: i64x2, mask: u8) -> i64x2;
-    #[link_name = "llvm.x86.avx512.mask.loadu.ps.128"]
-    fn loadups_128(mem_addr: *const f32, a: f32x4, mask: u8) -> f32x4;
-    #[link_name = "llvm.x86.avx512.mask.loadu.pd.128"]
-    fn loadupd_128(mem_addr: *const f64, a: f64x2, mask: u8) -> f64x2;
-    #[link_name = "llvm.x86.avx512.mask.loadu.d.256"]
-    fn loaddqu32_256(mem_addr: *const i32, a: i32x8, mask: u8) -> i32x8;
-    #[link_name = "llvm.x86.avx512.mask.loadu.q.256"]
-    fn loaddqu64_256(mem_addr: *const i64, a: i64x4, mask: u8) -> i64x4;
-    #[link_name = "llvm.x86.avx512.mask.loadu.ps.256"]
-    fn loadups_256(mem_addr: *const f32, a: f32x8, mask: u8) -> f32x8;
-    #[link_name = "llvm.x86.avx512.mask.loadu.pd.256"]
-    fn loadupd_256(mem_addr: *const f64, a: f64x4, mask: u8) -> f64x4;
-    #[link_name = "llvm.x86.avx512.mask.loadu.d.512"]
-    fn loaddqu32_512(mem_addr: *const i32, a: i32x16, mask: u16) -> i32x16;
-    #[link_name = "llvm.x86.avx512.mask.loadu.q.512"]
-    fn loaddqu64_512(mem_addr: *const i64, a: i64x8, mask: u8) -> i64x8;
-    #[link_name = "llvm.x86.avx512.mask.loadu.ps.512"]
-    fn loadups_512(mem_addr: *const f32, a: f32x16, mask: u16) -> f32x16;
-    #[link_name = "llvm.x86.avx512.mask.loadu.pd.512"]
-    fn loadupd_512(mem_addr: *const f64, a: f64x8, mask: u8) -> f64x8;
-
-    #[link_name = "llvm.x86.avx512.mask.load.d.128"]
-    fn loaddqa32_128(mem_addr: *const i32, a: i32x4, mask: u8) -> i32x4;
-    #[link_name = "llvm.x86.avx512.mask.load.q.128"]
-    fn loaddqa64_128(mem_addr: *const i64, a: i64x2, mask: u8) -> i64x2;
-    #[link_name = "llvm.x86.avx512.mask.load.ps.128"]
-    fn loadaps_128(mem_addr: *const f32, a: f32x4, mask: u8) -> f32x4;
-    #[link_name = "llvm.x86.avx512.mask.load.pd.128"]
-    fn loadapd_128(mem_addr: *const f64, a: f64x2, mask: u8) -> f64x2;
-    #[link_name = "llvm.x86.avx512.mask.load.d.256"]
-    fn loaddqa32_256(mem_addr: *const i32, a: i32x8, mask: u8) -> i32x8;
-    #[link_name = "llvm.x86.avx512.mask.load.q.256"]
-    fn loaddqa64_256(mem_addr: *const i64, a: i64x4, mask: u8) -> i64x4;
-    #[link_name = "llvm.x86.avx512.mask.load.ps.256"]
-    fn loadaps_256(mem_addr: *const f32, a: f32x8, mask: u8) -> f32x8;
-    #[link_name = "llvm.x86.avx512.mask.load.pd.256"]
-    fn loadapd_256(mem_addr: *const f64, a: f64x4, mask: u8) -> f64x4;
-    #[link_name = "llvm.x86.avx512.mask.load.d.512"]
-    fn loaddqa32_512(mem_addr: *const i32, a: i32x16, mask: u16) -> i32x16;
-    #[link_name = "llvm.x86.avx512.mask.load.q.512"]
-    fn loaddqa64_512(mem_addr: *const i64, a: i64x8, mask: u8) -> i64x8;
-    #[link_name = "llvm.x86.avx512.mask.load.ps.512"]
-    fn loadaps_512(mem_addr: *const f32, a: f32x16, mask: u16) -> f32x16;
-    #[link_name = "llvm.x86.avx512.mask.load.pd.512"]
-    fn loadapd_512(mem_addr: *const f64, a: f64x8, mask: u8) -> f64x8;
-
-    #[link_name = "llvm.x86.avx512.mask.storeu.d.128"]
-    fn storedqu32_128(mem_addr: *mut i32, a: i32x4, mask: u8);
-    #[link_name = "llvm.x86.avx512.mask.storeu.q.128"]
-    fn storedqu64_128(mem_addr: *mut i64, a: i64x2, mask: u8);
-    #[link_name = "llvm.x86.avx512.mask.storeu.ps.128"]
-    fn storeups_128(mem_addr: *mut f32, a: f32x4, mask: u8);
-    #[link_name = "llvm.x86.avx512.mask.storeu.pd.128"]
-    fn storeupd_128(mem_addr: *mut f64, a: f64x2, mask: u8);
-    #[link_name = "llvm.x86.avx512.mask.storeu.d.256"]
-    fn storedqu32_256(mem_addr: *mut i32, a: i32x8, mask: u8);
-    #[link_name = "llvm.x86.avx512.mask.storeu.q.256"]
-    fn storedqu64_256(mem_addr: *mut i64, a: i64x4, mask: u8);
-    #[link_name = "llvm.x86.avx512.mask.storeu.ps.256"]
-    fn storeups_256(mem_addr: *mut f32, a: f32x8, mask: u8);
-    #[link_name = "llvm.x86.avx512.mask.storeu.pd.256"]
-    fn storeupd_256(mem_addr: *mut f64, a: f64x4, mask: u8);
-    #[link_name = "llvm.x86.avx512.mask.storeu.d.512"]
-    fn storedqu32_512(mem_addr: *mut i32, a: i32x16, mask: u16);
-    #[link_name = "llvm.x86.avx512.mask.storeu.q.512"]
-    fn storedqu64_512(mem_addr: *mut i64, a: i64x8, mask: u8);
-    #[link_name = "llvm.x86.avx512.mask.storeu.ps.512"]
-    fn storeups_512(mem_addr: *mut f32, a: f32x16, mask: u16);
-    #[link_name = "llvm.x86.avx512.mask.storeu.pd.512"]
-    fn storeupd_512(mem_addr: *mut f64, a: f64x8, mask: u8);
-
-    #[link_name = "llvm.x86.avx512.mask.store.d.128"]
-    fn storedqa32_128(mem_addr: *mut i32, a: i32x4, mask: u8);
-    #[link_name = "llvm.x86.avx512.mask.store.q.128"]
-    fn storedqa64_128(mem_addr: *mut i64, a: i64x2, mask: u8);
-    #[link_name = "llvm.x86.avx512.mask.store.ps.128"]
-    fn storeaps_128(mem_addr: *mut f32, a: f32x4, mask: u8);
-    #[link_name = "llvm.x86.avx512.mask.store.pd.128"]
-    fn storeapd_128(mem_addr: *mut f64, a: f64x2, mask: u8);
-    #[link_name = "llvm.x86.avx512.mask.store.d.256"]
-    fn storedqa32_256(mem_addr: *mut i32, a: i32x8, mask: u8);
-    #[link_name = "llvm.x86.avx512.mask.store.q.256"]
-    fn storedqa64_256(mem_addr: *mut i64, a: i64x4, mask: u8);
-    #[link_name = "llvm.x86.avx512.mask.store.ps.256"]
-    fn storeaps_256(mem_addr: *mut f32, a: f32x8, mask: u8);
-    #[link_name = "llvm.x86.avx512.mask.store.pd.256"]
-    fn storeapd_256(mem_addr: *mut f64, a: f64x4, mask: u8);
-    #[link_name = "llvm.x86.avx512.mask.store.d.512"]
-    fn storedqa32_512(mem_addr: *mut i32, a: i32x16, mask: u16);
-    #[link_name = "llvm.x86.avx512.mask.store.q.512"]
-    fn storedqa64_512(mem_addr: *mut i64, a: i64x8, mask: u8);
-    #[link_name = "llvm.x86.avx512.mask.store.ps.512"]
-    fn storeaps_512(mem_addr: *mut f32, a: f32x16, mask: u16);
-    #[link_name = "llvm.x86.avx512.mask.store.pd.512"]
-    fn storeapd_512(mem_addr: *mut f64, a: f64x8, mask: u8);
-
     #[link_name = "llvm.x86.avx512.mask.expand.load.d.128"]
     fn expandloadd_128(mem_addr: *const i32, a: i32x4, mask: u8) -> i32x4;
     #[link_name = "llvm.x86.avx512.mask.expand.load.q.128"]
@@ -43238,6 +45054,7 @@ unsafe extern "C" {
 
 #[cfg(test)]
 mod tests {
+    use crate::core_arch::assert_eq_const as assert_eq;
 
     use stdarch_test::simd_test;
 
@@ -43246,7 +45063,7 @@ mod tests {
     use crate::mem::{self};
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_abs_epi32() {
+    const fn test_mm512_abs_epi32() {
         #[rustfmt::skip]
         let a = _mm512_setr_epi32(
             0, 1, -1, i32::MAX,
@@ -43266,7 +45083,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_abs_epi32() {
+    const fn test_mm512_mask_abs_epi32() {
         #[rustfmt::skip]
         let a = _mm512_setr_epi32(
             0, 1, -1, i32::MAX,
@@ -43288,7 +45105,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_abs_epi32() {
+    const fn test_mm512_maskz_abs_epi32() {
         #[rustfmt::skip]
         let a = _mm512_setr_epi32(
             0, 1, -1, i32::MAX,
@@ -43310,7 +45127,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_abs_epi32() {
+    const fn test_mm256_mask_abs_epi32() {
         #[rustfmt::skip]
         let a = _mm256_setr_epi32(
             0, 1, -1, i32::MAX,
@@ -43328,7 +45145,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_abs_epi32() {
+    const fn test_mm256_maskz_abs_epi32() {
         #[rustfmt::skip]
         let a = _mm256_setr_epi32(
             0, 1, -1, i32::MAX,
@@ -43346,7 +45163,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_abs_epi32() {
+    const fn test_mm_mask_abs_epi32() {
         let a = _mm_setr_epi32(i32::MIN, 100, -100, -32);
         let r = _mm_mask_abs_epi32(a, 0, a);
         assert_eq_m128i(r, a);
@@ -43356,7 +45173,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_abs_epi32() {
+    const fn test_mm_maskz_abs_epi32() {
         let a = _mm_setr_epi32(i32::MIN, 100, -100, -32);
         let r = _mm_maskz_abs_epi32(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -43366,7 +45183,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_abs_ps() {
+    const fn test_mm512_abs_ps() {
         #[rustfmt::skip]
         let a = _mm512_setr_ps(
             0., 1., -1., f32::MAX,
@@ -43386,7 +45203,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_abs_ps() {
+    const fn test_mm512_mask_abs_ps() {
         #[rustfmt::skip]
         let a = _mm512_setr_ps(
             0., 1., -1., f32::MAX,
@@ -43408,7 +45225,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_mov_epi32() {
+    const fn test_mm512_mask_mov_epi32() {
         let src = _mm512_set1_epi32(1);
         let a = _mm512_set1_epi32(2);
         let r = _mm512_mask_mov_epi32(src, 0, a);
@@ -43418,7 +45235,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_mov_epi32() {
+    const fn test_mm512_maskz_mov_epi32() {
         let a = _mm512_set1_epi32(2);
         let r = _mm512_maskz_mov_epi32(0, a);
         assert_eq_m512i(r, _mm512_setzero_si512());
@@ -43427,7 +45244,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_mov_epi32() {
+    const fn test_mm256_mask_mov_epi32() {
         let src = _mm256_set1_epi32(1);
         let a = _mm256_set1_epi32(2);
         let r = _mm256_mask_mov_epi32(src, 0, a);
@@ -43437,7 +45254,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_mov_epi32() {
+    const fn test_mm256_maskz_mov_epi32() {
         let a = _mm256_set1_epi32(2);
         let r = _mm256_maskz_mov_epi32(0, a);
         assert_eq_m256i(r, _mm256_setzero_si256());
@@ -43446,7 +45263,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_mov_epi32() {
+    const fn test_mm_mask_mov_epi32() {
         let src = _mm_set1_epi32(1);
         let a = _mm_set1_epi32(2);
         let r = _mm_mask_mov_epi32(src, 0, a);
@@ -43456,7 +45273,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_mov_epi32() {
+    const fn test_mm_maskz_mov_epi32() {
         let a = _mm_set1_epi32(2);
         let r = _mm_maskz_mov_epi32(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -43465,7 +45282,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_mov_ps() {
+    const fn test_mm512_mask_mov_ps() {
         let src = _mm512_set1_ps(1.);
         let a = _mm512_set1_ps(2.);
         let r = _mm512_mask_mov_ps(src, 0, a);
@@ -43475,7 +45292,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_mov_ps() {
+    const fn test_mm512_maskz_mov_ps() {
         let a = _mm512_set1_ps(2.);
         let r = _mm512_maskz_mov_ps(0, a);
         assert_eq_m512(r, _mm512_setzero_ps());
@@ -43484,7 +45301,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_mov_ps() {
+    const fn test_mm256_mask_mov_ps() {
         let src = _mm256_set1_ps(1.);
         let a = _mm256_set1_ps(2.);
         let r = _mm256_mask_mov_ps(src, 0, a);
@@ -43494,7 +45311,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_mov_ps() {
+    const fn test_mm256_maskz_mov_ps() {
         let a = _mm256_set1_ps(2.);
         let r = _mm256_maskz_mov_ps(0, a);
         assert_eq_m256(r, _mm256_setzero_ps());
@@ -43503,7 +45320,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_mov_ps() {
+    const fn test_mm_mask_mov_ps() {
         let src = _mm_set1_ps(1.);
         let a = _mm_set1_ps(2.);
         let r = _mm_mask_mov_ps(src, 0, a);
@@ -43513,7 +45330,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_mov_ps() {
+    const fn test_mm_maskz_mov_ps() {
         let a = _mm_set1_ps(2.);
         let r = _mm_maskz_mov_ps(0, a);
         assert_eq_m128(r, _mm_setzero_ps());
@@ -43522,7 +45339,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_add_epi32() {
+    const fn test_mm512_add_epi32() {
         #[rustfmt::skip]
         let a = _mm512_setr_epi32(
             0, 1, -1, i32::MAX,
@@ -43543,7 +45360,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_add_epi32() {
+    const fn test_mm512_mask_add_epi32() {
         #[rustfmt::skip]
         let a = _mm512_setr_epi32(
             0, 1, -1, i32::MAX,
@@ -43566,7 +45383,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_add_epi32() {
+    const fn test_mm512_maskz_add_epi32() {
         #[rustfmt::skip]
         let a = _mm512_setr_epi32(
             0, 1, -1, i32::MAX,
@@ -43589,7 +45406,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_add_epi32() {
+    const fn test_mm256_mask_add_epi32() {
         let a = _mm256_set_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32);
         let b = _mm256_set1_epi32(1);
         let r = _mm256_mask_add_epi32(a, 0, a, b);
@@ -43600,7 +45417,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_add_epi32() {
+    const fn test_mm256_maskz_add_epi32() {
         let a = _mm256_setr_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32);
         let b = _mm256_set1_epi32(1);
         let r = _mm256_maskz_add_epi32(0, a, b);
@@ -43611,7 +45428,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_add_epi32() {
+    const fn test_mm_mask_add_epi32() {
         let a = _mm_set_epi32(1, -1, i32::MAX, i32::MIN);
         let b = _mm_set1_epi32(1);
         let r = _mm_mask_add_epi32(a, 0, a, b);
@@ -43622,7 +45439,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_add_epi32() {
+    const fn test_mm_maskz_add_epi32() {
         let a = _mm_setr_epi32(1, -1, i32::MAX, i32::MIN);
         let b = _mm_set1_epi32(1);
         let r = _mm_maskz_add_epi32(0, a, b);
@@ -43633,7 +45450,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_add_ps() {
+    const fn test_mm512_add_ps() {
         #[rustfmt::skip]
         let a = _mm512_setr_ps(
             0., 1., -1., f32::MAX,
@@ -43654,7 +45471,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_add_ps() {
+    const fn test_mm512_mask_add_ps() {
         #[rustfmt::skip]
         let a = _mm512_setr_ps(
             0., 1., -1., f32::MAX,
@@ -43677,7 +45494,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_add_ps() {
+    const fn test_mm512_maskz_add_ps() {
         #[rustfmt::skip]
         let a = _mm512_setr_ps(
             0., 1., -1., f32::MAX,
@@ -43700,7 +45517,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_add_ps() {
+    const fn test_mm256_mask_add_ps() {
         let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.);
         let b = _mm256_set1_ps(1.);
         let r = _mm256_mask_add_ps(a, 0, a, b);
@@ -43711,7 +45528,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_add_ps() {
+    const fn test_mm256_maskz_add_ps() {
         let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.);
         let b = _mm256_set1_ps(1.);
         let r = _mm256_maskz_add_ps(0, a, b);
@@ -43722,7 +45539,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_add_ps() {
+    const fn test_mm_mask_add_ps() {
         let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN);
         let b = _mm_set1_ps(1.);
         let r = _mm_mask_add_ps(a, 0, a, b);
@@ -43733,7 +45550,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_add_ps() {
+    const fn test_mm_maskz_add_ps() {
         let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN);
         let b = _mm_set1_ps(1.);
         let r = _mm_maskz_add_ps(0, a, b);
@@ -43744,7 +45561,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_sub_epi32() {
+    const fn test_mm512_sub_epi32() {
         #[rustfmt::skip]
         let a = _mm512_setr_epi32(
             0, 1, -1, i32::MAX,
@@ -43765,7 +45582,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_sub_epi32() {
+    const fn test_mm512_mask_sub_epi32() {
         #[rustfmt::skip]
         let a = _mm512_setr_epi32(
             0, 1, -1, i32::MAX,
@@ -43788,7 +45605,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_sub_epi32() {
+    const fn test_mm512_maskz_sub_epi32() {
         #[rustfmt::skip]
         let a = _mm512_setr_epi32(
             0, 1, -1, i32::MAX,
@@ -43811,7 +45628,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_sub_epi32() {
+    const fn test_mm256_mask_sub_epi32() {
         let a = _mm256_set_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32);
         let b = _mm256_set1_epi32(1);
         let r = _mm256_mask_sub_epi32(a, 0, a, b);
@@ -43822,7 +45639,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_sub_epi32() {
+    const fn test_mm256_maskz_sub_epi32() {
         let a = _mm256_set_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32);
         let b = _mm256_set1_epi32(1);
         let r = _mm256_maskz_sub_epi32(0, a, b);
@@ -43833,7 +45650,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_sub_epi32() {
+    const fn test_mm_mask_sub_epi32() {
         let a = _mm_set_epi32(1, -1, i32::MAX, i32::MIN);
         let b = _mm_set1_epi32(1);
         let r = _mm_mask_sub_epi32(a, 0, a, b);
@@ -43844,7 +45661,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_sub_epi32() {
+    const fn test_mm_maskz_sub_epi32() {
         let a = _mm_set_epi32(1, -1, i32::MAX, i32::MIN);
         let b = _mm_set1_epi32(1);
         let r = _mm_maskz_sub_epi32(0, a, b);
@@ -43855,7 +45672,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_sub_ps() {
+    const fn test_mm512_sub_ps() {
         #[rustfmt::skip]
         let a = _mm512_setr_ps(
             0., 1., -1., f32::MAX,
@@ -43876,7 +45693,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_sub_ps() {
+    const fn test_mm512_mask_sub_ps() {
         #[rustfmt::skip]
         let a = _mm512_setr_ps(
             0., 1., -1., f32::MAX,
@@ -43899,7 +45716,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_sub_ps() {
+    const fn test_mm512_maskz_sub_ps() {
         #[rustfmt::skip]
         let a = _mm512_setr_ps(
             0., 1., -1., f32::MAX,
@@ -43922,7 +45739,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_sub_ps() {
+    const fn test_mm256_mask_sub_ps() {
         let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.);
         let b = _mm256_set1_ps(1.);
         let r = _mm256_mask_sub_ps(a, 0, a, b);
@@ -43933,7 +45750,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_sub_ps() {
+    const fn test_mm256_maskz_sub_ps() {
         let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.);
         let b = _mm256_set1_ps(1.);
         let r = _mm256_maskz_sub_ps(0, a, b);
@@ -43944,7 +45761,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_sub_ps() {
+    const fn test_mm_mask_sub_ps() {
         let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN);
         let b = _mm_set1_ps(1.);
         let r = _mm_mask_sub_ps(a, 0, a, b);
@@ -43955,7 +45772,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_sub_ps() {
+    const fn test_mm_maskz_sub_ps() {
         let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN);
         let b = _mm_set1_ps(1.);
         let r = _mm_maskz_sub_ps(0, a, b);
@@ -43966,7 +45783,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mullo_epi32() {
+    const fn test_mm512_mullo_epi32() {
         #[rustfmt::skip]
         let a = _mm512_setr_epi32(
             0, 1, -1, i32::MAX,
@@ -43983,7 +45800,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_mullo_epi32() {
+    const fn test_mm512_mask_mullo_epi32() {
         #[rustfmt::skip]
         let a = _mm512_setr_epi32(
             0, 1, -1, i32::MAX,
@@ -44006,7 +45823,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_mullo_epi32() {
+    const fn test_mm512_maskz_mullo_epi32() {
         #[rustfmt::skip]
         let a = _mm512_setr_epi32(
             0, 1, -1, i32::MAX,
@@ -44023,7 +45840,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_mullo_epi32() {
+    const fn test_mm256_mask_mullo_epi32() {
         let a = _mm256_set_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32);
         let b = _mm256_set1_epi32(2);
         let r = _mm256_mask_mullo_epi32(a, 0, a, b);
@@ -44034,7 +45851,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_mullo_epi32() {
+    const fn test_mm256_maskz_mullo_epi32() {
         let a = _mm256_set_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32);
         let b = _mm256_set1_epi32(2);
         let r = _mm256_maskz_mullo_epi32(0, a, b);
@@ -44045,7 +45862,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_mullo_epi32() {
+    const fn test_mm_mask_mullo_epi32() {
         let a = _mm_set_epi32(1, -1, i32::MAX, i32::MIN);
         let b = _mm_set1_epi32(2);
         let r = _mm_mask_mullo_epi32(a, 0, a, b);
@@ -44056,7 +45873,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_mullo_epi32() {
+    const fn test_mm_maskz_mullo_epi32() {
         let a = _mm_set_epi32(1, -1, i32::MAX, i32::MIN);
         let b = _mm_set1_epi32(2);
         let r = _mm_maskz_mullo_epi32(0, a, b);
@@ -44067,7 +45884,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mul_ps() {
+    const fn test_mm512_mul_ps() {
         #[rustfmt::skip]
         let a = _mm512_setr_ps(
             0., 1., -1., f32::MAX,
@@ -44089,7 +45906,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_mul_ps() {
+    const fn test_mm512_mask_mul_ps() {
         #[rustfmt::skip]
         let a = _mm512_setr_ps(
             0., 1., -1., f32::MAX,
@@ -44112,7 +45929,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_mul_ps() {
+    const fn test_mm512_maskz_mul_ps() {
         #[rustfmt::skip]
         let a = _mm512_setr_ps(
             0., 1., -1., f32::MAX,
@@ -44135,7 +45952,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_mul_ps() {
+    const fn test_mm256_mask_mul_ps() {
         let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.);
         let b = _mm256_set1_ps(2.);
         let r = _mm256_mask_mul_ps(a, 0, a, b);
@@ -44150,7 +45967,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_mul_ps() {
+    const fn test_mm256_maskz_mul_ps() {
         let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.);
         let b = _mm256_set1_ps(2.);
         let r = _mm256_maskz_mul_ps(0, a, b);
@@ -44165,7 +45982,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_mul_ps() {
+    const fn test_mm_mask_mul_ps() {
         let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN);
         let b = _mm_set1_ps(2.);
         let r = _mm_mask_mul_ps(a, 0, a, b);
@@ -44176,7 +45993,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_mul_ps() {
+    const fn test_mm_maskz_mul_ps() {
         let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN);
         let b = _mm_set1_ps(2.);
         let r = _mm_maskz_mul_ps(0, a, b);
@@ -44187,7 +46004,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_div_ps() {
+    const fn test_mm512_div_ps() {
         let a = _mm512_setr_ps(
             0., 1., -1., -2., 100., 100., -100., -32., 0., 1., -1., 1000., -131., 100., -100., -32.,
         );
@@ -44206,7 +46023,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_div_ps() {
+    const fn test_mm512_mask_div_ps() {
         let a = _mm512_setr_ps(
             0., 1., -1., -2., 100., 100., -100., -32., 0., 1., -1., 1000., -131., 100., -100., -32.,
         );
@@ -44227,7 +46044,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_div_ps() {
+    const fn test_mm512_maskz_div_ps() {
         let a = _mm512_setr_ps(
             0., 1., -1., -2., 100., 100., -100., -32., 0., 1., -1., 1000., -131., 100., -100., -32.,
         );
@@ -44248,7 +46065,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_div_ps() {
+    const fn test_mm256_mask_div_ps() {
         let a = _mm256_set_ps(0., 1., -1., -2., 100., 100., -100., -32.);
         let b = _mm256_set_ps(2., 2., 2., 2., 2., 0., 2., 2.);
         let r = _mm256_mask_div_ps(a, 0, a, b);
@@ -44259,7 +46076,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_div_ps() {
+    const fn test_mm256_maskz_div_ps() {
         let a = _mm256_set_ps(0., 1., -1., -2., 100., 100., -100., -32.);
         let b = _mm256_set_ps(2., 2., 2., 2., 2., 0., 2., 2.);
         let r = _mm256_maskz_div_ps(0, a, b);
@@ -44270,7 +46087,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_div_ps() {
+    const fn test_mm_mask_div_ps() {
         let a = _mm_set_ps(100., 100., -100., -32.);
         let b = _mm_set_ps(2., 0., 2., 2.);
         let r = _mm_mask_div_ps(a, 0, a, b);
@@ -44281,7 +46098,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_div_ps() {
+    const fn test_mm_maskz_div_ps() {
         let a = _mm_set_ps(100., 100., -100., -32.);
         let b = _mm_set_ps(2., 0., 2., 2.);
         let r = _mm_maskz_div_ps(0, a, b);
@@ -44292,7 +46109,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_max_epi32() {
+    const fn test_mm512_max_epi32() {
         let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
         let r = _mm512_max_epi32(a, b);
@@ -44301,7 +46118,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_max_epi32() {
+    const fn test_mm512_mask_max_epi32() {
         let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
         let r = _mm512_mask_max_epi32(a, 0, a, b);
@@ -44312,7 +46129,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_max_epi32() {
+    const fn test_mm512_maskz_max_epi32() {
         let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
         let r = _mm512_maskz_max_epi32(0, a, b);
@@ -44323,7 +46140,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_max_epi32() {
+    const fn test_mm256_mask_max_epi32() {
         let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
         let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
         let r = _mm256_mask_max_epi32(a, 0, a, b);
@@ -44334,7 +46151,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_max_epi32() {
+    const fn test_mm256_maskz_max_epi32() {
         let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
         let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
         let r = _mm256_maskz_max_epi32(0, a, b);
@@ -44345,7 +46162,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_max_epi32() {
+    const fn test_mm_mask_max_epi32() {
         let a = _mm_set_epi32(0, 1, 2, 3);
         let b = _mm_set_epi32(3, 2, 1, 0);
         let r = _mm_mask_max_epi32(a, 0, a, b);
@@ -44356,7 +46173,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_max_epi32() {
+    const fn test_mm_maskz_max_epi32() {
         let a = _mm_set_epi32(0, 1, 2, 3);
         let b = _mm_set_epi32(3, 2, 1, 0);
         let r = _mm_maskz_max_epi32(0, a, b);
@@ -44367,7 +46184,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_max_ps() {
+    fn test_mm512_max_ps() {
         let a = _mm512_setr_ps(
             0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
         );
@@ -44382,7 +46199,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_max_ps() {
+    fn test_mm512_mask_max_ps() {
         let a = _mm512_setr_ps(
             0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
         );
@@ -44399,7 +46216,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_max_ps() {
+    fn test_mm512_maskz_max_ps() {
         let a = _mm512_setr_ps(
             0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
         );
@@ -44416,7 +46233,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_max_ps() {
+    fn test_mm256_mask_max_ps() {
         let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
         let b = _mm256_set_ps(7., 6., 5., 4., 3., 2., 1., 0.);
         let r = _mm256_mask_max_ps(a, 0, a, b);
@@ -44427,7 +46244,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_max_ps() {
+    fn test_mm256_maskz_max_ps() {
         let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
         let b = _mm256_set_ps(7., 6., 5., 4., 3., 2., 1., 0.);
         let r = _mm256_maskz_max_ps(0, a, b);
@@ -44438,7 +46255,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_max_ps() {
+    fn test_mm_mask_max_ps() {
         let a = _mm_set_ps(0., 1., 2., 3.);
         let b = _mm_set_ps(3., 2., 1., 0.);
         let r = _mm_mask_max_ps(a, 0, a, b);
@@ -44449,7 +46266,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_max_ps() {
+    fn test_mm_maskz_max_ps() {
         let a = _mm_set_ps(0., 1., 2., 3.);
         let b = _mm_set_ps(3., 2., 1., 0.);
         let r = _mm_maskz_max_ps(0, a, b);
@@ -44460,7 +46277,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_max_epu32() {
+    const fn test_mm512_max_epu32() {
         let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
         let r = _mm512_max_epu32(a, b);
@@ -44469,7 +46286,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_max_epu32() {
+    const fn test_mm512_mask_max_epu32() {
         let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
         let r = _mm512_mask_max_epu32(a, 0, a, b);
@@ -44480,7 +46297,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_max_epu32() {
+    const fn test_mm512_maskz_max_epu32() {
         let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
         let r = _mm512_maskz_max_epu32(0, a, b);
@@ -44491,7 +46308,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_max_epu32() {
+    const fn test_mm256_mask_max_epu32() {
         let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
         let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
         let r = _mm256_mask_max_epu32(a, 0, a, b);
@@ -44502,7 +46319,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_max_epu32() {
+    const fn test_mm256_maskz_max_epu32() {
         let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
         let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
         let r = _mm256_maskz_max_epu32(0, a, b);
@@ -44513,7 +46330,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_max_epu32() {
+    const fn test_mm_mask_max_epu32() {
         let a = _mm_set_epi32(0, 1, 2, 3);
         let b = _mm_set_epi32(3, 2, 1, 0);
         let r = _mm_mask_max_epu32(a, 0, a, b);
@@ -44524,7 +46341,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_max_epu32() {
+    const fn test_mm_maskz_max_epu32() {
         let a = _mm_set_epi32(0, 1, 2, 3);
         let b = _mm_set_epi32(3, 2, 1, 0);
         let r = _mm_maskz_max_epu32(0, a, b);
@@ -44535,7 +46352,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_min_epi32() {
+    const fn test_mm512_min_epi32() {
         let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
         let r = _mm512_min_epi32(a, b);
@@ -44544,7 +46361,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_min_epi32() {
+    const fn test_mm512_mask_min_epi32() {
         let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
         let r = _mm512_mask_min_epi32(a, 0, a, b);
@@ -44555,7 +46372,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_min_epi32() {
+    const fn test_mm512_maskz_min_epi32() {
         let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
         let r = _mm512_maskz_min_epi32(0, a, b);
@@ -44566,7 +46383,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_min_epi32() {
+    const fn test_mm256_mask_min_epi32() {
         let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
         let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
         let r = _mm256_mask_min_epi32(a, 0, a, b);
@@ -44577,7 +46394,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_min_epi32() {
+    const fn test_mm256_maskz_min_epi32() {
         let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
         let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
         let r = _mm256_maskz_min_epi32(0, a, b);
@@ -44588,7 +46405,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_min_epi32() {
+    const fn test_mm_mask_min_epi32() {
         let a = _mm_set_epi32(0, 1, 2, 3);
         let b = _mm_set_epi32(3, 2, 1, 0);
         let r = _mm_mask_min_epi32(a, 0, a, b);
@@ -44599,7 +46416,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_min_epi32() {
+    const fn test_mm_maskz_min_epi32() {
         let a = _mm_set_epi32(0, 1, 2, 3);
         let b = _mm_set_epi32(3, 2, 1, 0);
         let r = _mm_maskz_min_epi32(0, a, b);
@@ -44610,7 +46427,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_min_ps() {
+    fn test_mm512_min_ps() {
         let a = _mm512_setr_ps(
             0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
         );
@@ -44625,7 +46442,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_min_ps() {
+    fn test_mm512_mask_min_ps() {
         let a = _mm512_setr_ps(
             0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
         );
@@ -44642,7 +46459,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_min_ps() {
+    fn test_mm512_maskz_min_ps() {
         let a = _mm512_setr_ps(
             0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
         );
@@ -44659,7 +46476,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_min_ps() {
+    fn test_mm256_mask_min_ps() {
         let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
         let b = _mm256_set_ps(7., 6., 5., 4., 3., 2., 1., 0.);
         let r = _mm256_mask_min_ps(a, 0, a, b);
@@ -44670,7 +46487,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_min_ps() {
+    fn test_mm256_maskz_min_ps() {
         let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
         let b = _mm256_set_ps(7., 6., 5., 4., 3., 2., 1., 0.);
         let r = _mm256_maskz_min_ps(0, a, b);
@@ -44681,7 +46498,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_min_ps() {
+    fn test_mm_mask_min_ps() {
         let a = _mm_set_ps(0., 1., 2., 3.);
         let b = _mm_set_ps(3., 2., 1., 0.);
         let r = _mm_mask_min_ps(a, 0, a, b);
@@ -44692,7 +46509,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_min_ps() {
+    fn test_mm_maskz_min_ps() {
         let a = _mm_set_ps(0., 1., 2., 3.);
         let b = _mm_set_ps(3., 2., 1., 0.);
         let r = _mm_maskz_min_ps(0, a, b);
@@ -44703,7 +46520,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_min_epu32() {
+    const fn test_mm512_min_epu32() {
         let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
         let r = _mm512_min_epu32(a, b);
@@ -44712,7 +46529,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_min_epu32() {
+    const fn test_mm512_mask_min_epu32() {
         let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
         let r = _mm512_mask_min_epu32(a, 0, a, b);
@@ -44723,7 +46540,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_min_epu32() {
+    const fn test_mm512_maskz_min_epu32() {
         let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
         let r = _mm512_maskz_min_epu32(0, a, b);
@@ -44734,7 +46551,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_min_epu32() {
+    const fn test_mm256_mask_min_epu32() {
         let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
         let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
         let r = _mm256_mask_min_epu32(a, 0, a, b);
@@ -44745,7 +46562,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_min_epu32() {
+    const fn test_mm256_maskz_min_epu32() {
         let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
         let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
         let r = _mm256_maskz_min_epu32(0, a, b);
@@ -44756,7 +46573,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_min_epu32() {
+    const fn test_mm_mask_min_epu32() {
         let a = _mm_set_epi32(0, 1, 2, 3);
         let b = _mm_set_epi32(3, 2, 1, 0);
         let r = _mm_mask_min_epu32(a, 0, a, b);
@@ -44767,7 +46584,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_min_epu32() {
+    const fn test_mm_maskz_min_epu32() {
         let a = _mm_set_epi32(0, 1, 2, 3);
         let b = _mm_set_epi32(3, 2, 1, 0);
         let r = _mm_maskz_min_epu32(0, a, b);
@@ -44778,7 +46595,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_sqrt_ps() {
+    fn test_mm512_sqrt_ps() {
         let a = _mm512_setr_ps(
             0., 1., 4., 9., 16., 25., 36., 49., 64., 81., 100., 121., 144., 169., 196., 225.,
         );
@@ -44790,7 +46607,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_sqrt_ps() {
+    fn test_mm512_mask_sqrt_ps() {
         let a = _mm512_setr_ps(
             0., 1., 4., 9., 16., 25., 36., 49., 64., 81., 100., 121., 144., 169., 196., 225.,
         );
@@ -44804,7 +46621,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_sqrt_ps() {
+    fn test_mm512_maskz_sqrt_ps() {
         let a = _mm512_setr_ps(
             0., 1., 4., 9., 16., 25., 36., 49., 64., 81., 100., 121., 144., 169., 196., 225.,
         );
@@ -44818,7 +46635,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_sqrt_ps() {
+    fn test_mm256_mask_sqrt_ps() {
         let a = _mm256_set_ps(0., 1., 4., 9., 16., 25., 36., 49.);
         let r = _mm256_mask_sqrt_ps(a, 0, a);
         assert_eq_m256(r, a);
@@ -44828,7 +46645,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_sqrt_ps() {
+    fn test_mm256_maskz_sqrt_ps() {
         let a = _mm256_set_ps(0., 1., 4., 9., 16., 25., 36., 49.);
         let r = _mm256_maskz_sqrt_ps(0, a);
         assert_eq_m256(r, _mm256_setzero_ps());
@@ -44838,7 +46655,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_sqrt_ps() {
+    fn test_mm_mask_sqrt_ps() {
         let a = _mm_set_ps(0., 1., 4., 9.);
         let r = _mm_mask_sqrt_ps(a, 0, a);
         assert_eq_m128(r, a);
@@ -44848,7 +46665,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_sqrt_ps() {
+    fn test_mm_maskz_sqrt_ps() {
         let a = _mm_set_ps(0., 1., 4., 9.);
         let r = _mm_maskz_sqrt_ps(0, a);
         assert_eq_m128(r, _mm_setzero_ps());
@@ -44858,7 +46675,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_fmadd_ps() {
+    const fn test_mm512_fmadd_ps() {
         let a = _mm512_set1_ps(1.);
         let b = _mm512_setr_ps(
             0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
@@ -44872,7 +46689,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_fmadd_ps() {
+    const fn test_mm512_mask_fmadd_ps() {
         let a = _mm512_set1_ps(1.);
         let b = _mm512_setr_ps(
             0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
@@ -44888,7 +46705,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_fmadd_ps() {
+    const fn test_mm512_maskz_fmadd_ps() {
         let a = _mm512_set1_ps(1.);
         let b = _mm512_setr_ps(
             0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
@@ -44904,7 +46721,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask3_fmadd_ps() {
+    const fn test_mm512_mask3_fmadd_ps() {
         let a = _mm512_set1_ps(1.);
         let b = _mm512_setr_ps(
             0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
@@ -44920,7 +46737,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_fmadd_ps() {
+    const fn test_mm256_mask_fmadd_ps() {
         let a = _mm256_set1_ps(1.);
         let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
         let c = _mm256_set1_ps(1.);
@@ -44932,7 +46749,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_fmadd_ps() {
+    const fn test_mm256_maskz_fmadd_ps() {
         let a = _mm256_set1_ps(1.);
         let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
         let c = _mm256_set1_ps(1.);
@@ -44944,7 +46761,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask3_fmadd_ps() {
+    const fn test_mm256_mask3_fmadd_ps() {
         let a = _mm256_set1_ps(1.);
         let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
         let c = _mm256_set1_ps(1.);
@@ -44956,7 +46773,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_fmadd_ps() {
+    const fn test_mm_mask_fmadd_ps() {
         let a = _mm_set1_ps(1.);
         let b = _mm_set_ps(0., 1., 2., 3.);
         let c = _mm_set1_ps(1.);
@@ -44968,7 +46785,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_fmadd_ps() {
+    const fn test_mm_maskz_fmadd_ps() {
         let a = _mm_set1_ps(1.);
         let b = _mm_set_ps(0., 1., 2., 3.);
         let c = _mm_set1_ps(1.);
@@ -44980,7 +46797,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask3_fmadd_ps() {
+    const fn test_mm_mask3_fmadd_ps() {
         let a = _mm_set1_ps(1.);
         let b = _mm_set_ps(0., 1., 2., 3.);
         let c = _mm_set1_ps(1.);
@@ -44992,7 +46809,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_fmsub_ps() {
+    const fn test_mm512_fmsub_ps() {
         let a = _mm512_setr_ps(
             1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
         );
@@ -45010,7 +46827,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_fmsub_ps() {
+    const fn test_mm512_mask_fmsub_ps() {
         let a = _mm512_set1_ps(1.);
         let b = _mm512_setr_ps(
             0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
@@ -45026,7 +46843,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_fmsub_ps() {
+    const fn test_mm512_maskz_fmsub_ps() {
         let a = _mm512_set1_ps(1.);
         let b = _mm512_setr_ps(
             0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
@@ -45042,7 +46859,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask3_fmsub_ps() {
+    const fn test_mm512_mask3_fmsub_ps() {
         let a = _mm512_set1_ps(1.);
         let b = _mm512_setr_ps(
             0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
@@ -45060,7 +46877,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_fmsub_ps() {
+    const fn test_mm256_mask_fmsub_ps() {
         let a = _mm256_set1_ps(1.);
         let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
         let c = _mm256_set1_ps(1.);
@@ -45072,7 +46889,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_fmsub_ps() {
+    const fn test_mm256_maskz_fmsub_ps() {
         let a = _mm256_set1_ps(1.);
         let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
         let c = _mm256_set1_ps(1.);
@@ -45084,7 +46901,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask3_fmsub_ps() {
+    const fn test_mm256_mask3_fmsub_ps() {
         let a = _mm256_set1_ps(1.);
         let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
         let c = _mm256_set1_ps(1.);
@@ -45096,7 +46913,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_fmsub_ps() {
+    const fn test_mm_mask_fmsub_ps() {
         let a = _mm_set1_ps(1.);
         let b = _mm_set_ps(0., 1., 2., 3.);
         let c = _mm_set1_ps(1.);
@@ -45108,7 +46925,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_fmsub_ps() {
+    const fn test_mm_maskz_fmsub_ps() {
         let a = _mm_set1_ps(1.);
         let b = _mm_set_ps(0., 1., 2., 3.);
         let c = _mm_set1_ps(1.);
@@ -45120,7 +46937,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask3_fmsub_ps() {
+    const fn test_mm_mask3_fmsub_ps() {
         let a = _mm_set1_ps(1.);
         let b = _mm_set_ps(0., 1., 2., 3.);
         let c = _mm_set1_ps(1.);
@@ -45132,7 +46949,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_fmaddsub_ps() {
+    const fn test_mm512_fmaddsub_ps() {
         let a = _mm512_set1_ps(1.);
         let b = _mm512_setr_ps(
             0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
@@ -45146,7 +46963,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_fmaddsub_ps() {
+    const fn test_mm512_mask_fmaddsub_ps() {
         let a = _mm512_set1_ps(1.);
         let b = _mm512_setr_ps(
             0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
@@ -45162,7 +46979,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_fmaddsub_ps() {
+    const fn test_mm512_maskz_fmaddsub_ps() {
         let a = _mm512_set1_ps(1.);
         let b = _mm512_setr_ps(
             0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
@@ -45178,7 +46995,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask3_fmaddsub_ps() {
+    const fn test_mm512_mask3_fmaddsub_ps() {
         let a = _mm512_set1_ps(1.);
         let b = _mm512_setr_ps(
             0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
@@ -45196,7 +47013,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_fmaddsub_ps() {
+    const fn test_mm256_mask_fmaddsub_ps() {
         let a = _mm256_set1_ps(1.);
         let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
         let c = _mm256_set1_ps(1.);
@@ -45208,7 +47025,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_fmaddsub_ps() {
+    const fn test_mm256_maskz_fmaddsub_ps() {
         let a = _mm256_set1_ps(1.);
         let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
         let c = _mm256_set1_ps(1.);
@@ -45220,7 +47037,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask3_fmaddsub_ps() {
+    const fn test_mm256_mask3_fmaddsub_ps() {
         let a = _mm256_set1_ps(1.);
         let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
         let c = _mm256_set1_ps(1.);
@@ -45232,7 +47049,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_fmaddsub_ps() {
+    const fn test_mm_mask_fmaddsub_ps() {
         let a = _mm_set1_ps(1.);
         let b = _mm_set_ps(0., 1., 2., 3.);
         let c = _mm_set1_ps(1.);
@@ -45244,7 +47061,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_fmaddsub_ps() {
+    const fn test_mm_maskz_fmaddsub_ps() {
         let a = _mm_set1_ps(1.);
         let b = _mm_set_ps(0., 1., 2., 3.);
         let c = _mm_set1_ps(1.);
@@ -45256,7 +47073,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask3_fmaddsub_ps() {
+    const fn test_mm_mask3_fmaddsub_ps() {
         let a = _mm_set1_ps(1.);
         let b = _mm_set_ps(0., 1., 2., 3.);
         let c = _mm_set1_ps(1.);
@@ -45268,7 +47085,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_fmsubadd_ps() {
+    const fn test_mm512_fmsubadd_ps() {
         let a = _mm512_setr_ps(
             1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
         );
@@ -45286,7 +47103,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_fmsubadd_ps() {
+    const fn test_mm512_mask_fmsubadd_ps() {
         let a = _mm512_set1_ps(1.);
         let b = _mm512_setr_ps(
             0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
@@ -45302,7 +47119,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_fmsubadd_ps() {
+    const fn test_mm512_maskz_fmsubadd_ps() {
         let a = _mm512_set1_ps(1.);
         let b = _mm512_setr_ps(
             0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
@@ -45318,7 +47135,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask3_fmsubadd_ps() {
+    const fn test_mm512_mask3_fmsubadd_ps() {
         let a = _mm512_set1_ps(1.);
         let b = _mm512_setr_ps(
             0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
@@ -45336,7 +47153,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_fmsubadd_ps() {
+    const fn test_mm256_mask_fmsubadd_ps() {
         let a = _mm256_set1_ps(1.);
         let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
         let c = _mm256_set1_ps(1.);
@@ -45348,7 +47165,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_fmsubadd_ps() {
+    const fn test_mm256_maskz_fmsubadd_ps() {
         let a = _mm256_set1_ps(1.);
         let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
         let c = _mm256_set1_ps(1.);
@@ -45360,7 +47177,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask3_fmsubadd_ps() {
+    const fn test_mm256_mask3_fmsubadd_ps() {
         let a = _mm256_set1_ps(1.);
         let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
         let c = _mm256_set1_ps(1.);
@@ -45372,7 +47189,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_fmsubadd_ps() {
+    const fn test_mm_mask_fmsubadd_ps() {
         let a = _mm_set1_ps(1.);
         let b = _mm_set_ps(0., 1., 2., 3.);
         let c = _mm_set1_ps(1.);
@@ -45384,7 +47201,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_fmsubadd_ps() {
+    const fn test_mm_maskz_fmsubadd_ps() {
         let a = _mm_set1_ps(1.);
         let b = _mm_set_ps(0., 1., 2., 3.);
         let c = _mm_set1_ps(1.);
@@ -45396,7 +47213,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask3_fmsubadd_ps() {
+    const fn test_mm_mask3_fmsubadd_ps() {
         let a = _mm_set1_ps(1.);
         let b = _mm_set_ps(0., 1., 2., 3.);
         let c = _mm_set1_ps(1.);
@@ -45408,7 +47225,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_fnmadd_ps() {
+    const fn test_mm512_fnmadd_ps() {
         let a = _mm512_set1_ps(1.);
         let b = _mm512_setr_ps(
             0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
@@ -45422,7 +47239,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_fnmadd_ps() {
+    const fn test_mm512_mask_fnmadd_ps() {
         let a = _mm512_set1_ps(1.);
         let b = _mm512_setr_ps(
             0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
@@ -45438,7 +47255,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_fnmadd_ps() {
+    const fn test_mm512_maskz_fnmadd_ps() {
         let a = _mm512_set1_ps(1.);
         let b = _mm512_setr_ps(
             0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
@@ -45454,7 +47271,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask3_fnmadd_ps() {
+    const fn test_mm512_mask3_fnmadd_ps() {
         let a = _mm512_set1_ps(1.);
         let b = _mm512_setr_ps(
             0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
@@ -45472,7 +47289,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_fnmadd_ps() {
+    const fn test_mm256_mask_fnmadd_ps() {
         let a = _mm256_set1_ps(1.);
         let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
         let c = _mm256_set1_ps(1.);
@@ -45484,7 +47301,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_fnmadd_ps() {
+    const fn test_mm256_maskz_fnmadd_ps() {
         let a = _mm256_set1_ps(1.);
         let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
         let c = _mm256_set1_ps(1.);
@@ -45496,7 +47313,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask3_fnmadd_ps() {
+    const fn test_mm256_mask3_fnmadd_ps() {
         let a = _mm256_set1_ps(1.);
         let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
         let c = _mm256_set1_ps(1.);
@@ -45508,7 +47325,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_fnmadd_ps() {
+    const fn test_mm_mask_fnmadd_ps() {
         let a = _mm_set1_ps(1.);
         let b = _mm_set_ps(0., 1., 2., 3.);
         let c = _mm_set1_ps(1.);
@@ -45520,7 +47337,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_fnmadd_ps() {
+    const fn test_mm_maskz_fnmadd_ps() {
         let a = _mm_set1_ps(1.);
         let b = _mm_set_ps(0., 1., 2., 3.);
         let c = _mm_set1_ps(1.);
@@ -45532,7 +47349,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask3_fnmadd_ps() {
+    const fn test_mm_mask3_fnmadd_ps() {
         let a = _mm_set1_ps(1.);
         let b = _mm_set_ps(0., 1., 2., 3.);
         let c = _mm_set1_ps(1.);
@@ -45544,7 +47361,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_fnmsub_ps() {
+    const fn test_mm512_fnmsub_ps() {
         let a = _mm512_set1_ps(1.);
         let b = _mm512_setr_ps(
             0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
@@ -45558,7 +47375,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_fnmsub_ps() {
+    const fn test_mm512_mask_fnmsub_ps() {
         let a = _mm512_set1_ps(1.);
         let b = _mm512_setr_ps(
             0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
@@ -45574,7 +47391,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_fnmsub_ps() {
+    const fn test_mm512_maskz_fnmsub_ps() {
         let a = _mm512_set1_ps(1.);
         let b = _mm512_setr_ps(
             0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
@@ -45590,7 +47407,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask3_fnmsub_ps() {
+    const fn test_mm512_mask3_fnmsub_ps() {
         let a = _mm512_set1_ps(1.);
         let b = _mm512_setr_ps(
             0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
@@ -45608,7 +47425,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_fnmsub_ps() {
+    const fn test_mm256_mask_fnmsub_ps() {
         let a = _mm256_set1_ps(1.);
         let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
         let c = _mm256_set1_ps(1.);
@@ -45620,7 +47437,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_fnmsub_ps() {
+    const fn test_mm256_maskz_fnmsub_ps() {
         let a = _mm256_set1_ps(1.);
         let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
         let c = _mm256_set1_ps(1.);
@@ -45632,7 +47449,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask3_fnmsub_ps() {
+    const fn test_mm256_mask3_fnmsub_ps() {
         let a = _mm256_set1_ps(1.);
         let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
         let c = _mm256_set1_ps(1.);
@@ -45644,7 +47461,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_fnmsub_ps() {
+    const fn test_mm_mask_fnmsub_ps() {
         let a = _mm_set1_ps(1.);
         let b = _mm_set_ps(0., 1., 2., 3.);
         let c = _mm_set1_ps(1.);
@@ -45656,7 +47473,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_fnmsub_ps() {
+    const fn test_mm_maskz_fnmsub_ps() {
         let a = _mm_set1_ps(1.);
         let b = _mm_set_ps(0., 1., 2., 3.);
         let c = _mm_set1_ps(1.);
@@ -45668,7 +47485,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask3_fnmsub_ps() {
+    const fn test_mm_mask3_fnmsub_ps() {
         let a = _mm_set1_ps(1.);
         let b = _mm_set_ps(0., 1., 2., 3.);
         let c = _mm_set1_ps(1.);
@@ -45680,7 +47497,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_rcp14_ps() {
+    fn test_mm512_rcp14_ps() {
         let a = _mm512_set1_ps(3.);
         let r = _mm512_rcp14_ps(a);
         let e = _mm512_set1_ps(0.33333206);
@@ -45688,7 +47505,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_rcp14_ps() {
+    fn test_mm512_mask_rcp14_ps() {
         let a = _mm512_set1_ps(3.);
         let r = _mm512_mask_rcp14_ps(a, 0, a);
         assert_eq_m512(r, a);
@@ -45701,7 +47518,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_rcp14_ps() {
+    fn test_mm512_maskz_rcp14_ps() {
         let a = _mm512_set1_ps(3.);
         let r = _mm512_maskz_rcp14_ps(0, a);
         assert_eq_m512(r, _mm512_setzero_ps());
@@ -45714,7 +47531,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_rcp14_ps() {
+    fn test_mm256_rcp14_ps() {
         let a = _mm256_set1_ps(3.);
         let r = _mm256_rcp14_ps(a);
         let e = _mm256_set1_ps(0.33333206);
@@ -45722,7 +47539,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_rcp14_ps() {
+    fn test_mm256_mask_rcp14_ps() {
         let a = _mm256_set1_ps(3.);
         let r = _mm256_mask_rcp14_ps(a, 0, a);
         assert_eq_m256(r, a);
@@ -45732,7 +47549,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_rcp14_ps() {
+    fn test_mm256_maskz_rcp14_ps() {
         let a = _mm256_set1_ps(3.);
         let r = _mm256_maskz_rcp14_ps(0, a);
         assert_eq_m256(r, _mm256_setzero_ps());
@@ -45742,7 +47559,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_rcp14_ps() {
+    fn test_mm_rcp14_ps() {
         let a = _mm_set1_ps(3.);
         let r = _mm_rcp14_ps(a);
         let e = _mm_set1_ps(0.33333206);
@@ -45750,7 +47567,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_rcp14_ps() {
+    fn test_mm_mask_rcp14_ps() {
         let a = _mm_set1_ps(3.);
         let r = _mm_mask_rcp14_ps(a, 0, a);
         assert_eq_m128(r, a);
@@ -45760,7 +47577,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_rcp14_ps() {
+    fn test_mm_maskz_rcp14_ps() {
         let a = _mm_set1_ps(3.);
         let r = _mm_maskz_rcp14_ps(0, a);
         assert_eq_m128(r, _mm_setzero_ps());
@@ -45770,7 +47587,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_rsqrt14_ps() {
+    fn test_mm512_rsqrt14_ps() {
         let a = _mm512_set1_ps(3.);
         let r = _mm512_rsqrt14_ps(a);
         let e = _mm512_set1_ps(0.5773392);
@@ -45778,7 +47595,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_rsqrt14_ps() {
+    fn test_mm512_mask_rsqrt14_ps() {
         let a = _mm512_set1_ps(3.);
         let r = _mm512_mask_rsqrt14_ps(a, 0, a);
         assert_eq_m512(r, a);
@@ -45791,7 +47608,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_rsqrt14_ps() {
+    fn test_mm512_maskz_rsqrt14_ps() {
         let a = _mm512_set1_ps(3.);
         let r = _mm512_maskz_rsqrt14_ps(0, a);
         assert_eq_m512(r, _mm512_setzero_ps());
@@ -45804,7 +47621,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_rsqrt14_ps() {
+    fn test_mm256_rsqrt14_ps() {
         let a = _mm256_set1_ps(3.);
         let r = _mm256_rsqrt14_ps(a);
         let e = _mm256_set1_ps(0.5773392);
@@ -45812,7 +47629,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_rsqrt14_ps() {
+    fn test_mm256_mask_rsqrt14_ps() {
         let a = _mm256_set1_ps(3.);
         let r = _mm256_mask_rsqrt14_ps(a, 0, a);
         assert_eq_m256(r, a);
@@ -45822,7 +47639,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_rsqrt14_ps() {
+    fn test_mm256_maskz_rsqrt14_ps() {
         let a = _mm256_set1_ps(3.);
         let r = _mm256_maskz_rsqrt14_ps(0, a);
         assert_eq_m256(r, _mm256_setzero_ps());
@@ -45832,7 +47649,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_rsqrt14_ps() {
+    fn test_mm_rsqrt14_ps() {
         let a = _mm_set1_ps(3.);
         let r = _mm_rsqrt14_ps(a);
         let e = _mm_set1_ps(0.5773392);
@@ -45840,7 +47657,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_rsqrt14_ps() {
+    fn test_mm_mask_rsqrt14_ps() {
         let a = _mm_set1_ps(3.);
         let r = _mm_mask_rsqrt14_ps(a, 0, a);
         assert_eq_m128(r, a);
@@ -45850,7 +47667,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_rsqrt14_ps() {
+    fn test_mm_maskz_rsqrt14_ps() {
         let a = _mm_set1_ps(3.);
         let r = _mm_maskz_rsqrt14_ps(0, a);
         assert_eq_m128(r, _mm_setzero_ps());
@@ -45860,7 +47677,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_getexp_ps() {
+    fn test_mm512_getexp_ps() {
         let a = _mm512_set1_ps(3.);
         let r = _mm512_getexp_ps(a);
         let e = _mm512_set1_ps(1.);
@@ -45868,7 +47685,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_getexp_ps() {
+    fn test_mm512_mask_getexp_ps() {
         let a = _mm512_set1_ps(3.);
         let r = _mm512_mask_getexp_ps(a, 0, a);
         assert_eq_m512(r, a);
@@ -45880,7 +47697,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_getexp_ps() {
+    fn test_mm512_maskz_getexp_ps() {
         let a = _mm512_set1_ps(3.);
         let r = _mm512_maskz_getexp_ps(0, a);
         assert_eq_m512(r, _mm512_setzero_ps());
@@ -45892,7 +47709,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_getexp_ps() {
+    fn test_mm256_getexp_ps() {
         let a = _mm256_set1_ps(3.);
         let r = _mm256_getexp_ps(a);
         let e = _mm256_set1_ps(1.);
@@ -45900,7 +47717,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_getexp_ps() {
+    fn test_mm256_mask_getexp_ps() {
         let a = _mm256_set1_ps(3.);
         let r = _mm256_mask_getexp_ps(a, 0, a);
         assert_eq_m256(r, a);
@@ -45910,7 +47727,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_getexp_ps() {
+    fn test_mm256_maskz_getexp_ps() {
         let a = _mm256_set1_ps(3.);
         let r = _mm256_maskz_getexp_ps(0, a);
         assert_eq_m256(r, _mm256_setzero_ps());
@@ -45920,7 +47737,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_getexp_ps() {
+    fn test_mm_getexp_ps() {
         let a = _mm_set1_ps(3.);
         let r = _mm_getexp_ps(a);
         let e = _mm_set1_ps(1.);
@@ -45928,7 +47745,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_getexp_ps() {
+    fn test_mm_mask_getexp_ps() {
         let a = _mm_set1_ps(3.);
         let r = _mm_mask_getexp_ps(a, 0, a);
         assert_eq_m128(r, a);
@@ -45938,7 +47755,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_getexp_ps() {
+    fn test_mm_maskz_getexp_ps() {
         let a = _mm_set1_ps(3.);
         let r = _mm_maskz_getexp_ps(0, a);
         assert_eq_m128(r, _mm_setzero_ps());
@@ -45948,7 +47765,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_roundscale_ps() {
+    fn test_mm512_roundscale_ps() {
         let a = _mm512_set1_ps(1.1);
         let r = _mm512_roundscale_ps::<0b00_00_00_00>(a);
         let e = _mm512_set1_ps(1.0);
@@ -45956,7 +47773,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_roundscale_ps() {
+    fn test_mm512_mask_roundscale_ps() {
         let a = _mm512_set1_ps(1.1);
         let r = _mm512_mask_roundscale_ps::<0b00_00_00_00>(a, 0, a);
         let e = _mm512_set1_ps(1.1);
@@ -45967,7 +47784,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_roundscale_ps() {
+    fn test_mm512_maskz_roundscale_ps() {
         let a = _mm512_set1_ps(1.1);
         let r = _mm512_maskz_roundscale_ps::<0b00_00_00_00>(0, a);
         assert_eq_m512(r, _mm512_setzero_ps());
@@ -45977,7 +47794,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_roundscale_ps() {
+    fn test_mm256_roundscale_ps() {
         let a = _mm256_set1_ps(1.1);
         let r = _mm256_roundscale_ps::<0b00_00_00_00>(a);
         let e = _mm256_set1_ps(1.0);
@@ -45985,7 +47802,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_roundscale_ps() {
+    fn test_mm256_mask_roundscale_ps() {
         let a = _mm256_set1_ps(1.1);
         let r = _mm256_mask_roundscale_ps::<0b00_00_00_00>(a, 0, a);
         let e = _mm256_set1_ps(1.1);
@@ -45996,7 +47813,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_roundscale_ps() {
+    fn test_mm256_maskz_roundscale_ps() {
         let a = _mm256_set1_ps(1.1);
         let r = _mm256_maskz_roundscale_ps::<0b00_00_00_00>(0, a);
         assert_eq_m256(r, _mm256_setzero_ps());
@@ -46006,7 +47823,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_roundscale_ps() {
+    fn test_mm_roundscale_ps() {
         let a = _mm_set1_ps(1.1);
         let r = _mm_roundscale_ps::<0b00_00_00_00>(a);
         let e = _mm_set1_ps(1.0);
@@ -46014,7 +47831,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_roundscale_ps() {
+    fn test_mm_mask_roundscale_ps() {
         let a = _mm_set1_ps(1.1);
         let r = _mm_mask_roundscale_ps::<0b00_00_00_00>(a, 0, a);
         let e = _mm_set1_ps(1.1);
@@ -46025,7 +47842,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_roundscale_ps() {
+    fn test_mm_maskz_roundscale_ps() {
         let a = _mm_set1_ps(1.1);
         let r = _mm_maskz_roundscale_ps::<0b00_00_00_00>(0, a);
         assert_eq_m128(r, _mm_setzero_ps());
@@ -46035,7 +47852,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_scalef_ps() {
+    fn test_mm512_scalef_ps() {
         let a = _mm512_set1_ps(1.);
         let b = _mm512_set1_ps(3.);
         let r = _mm512_scalef_ps(a, b);
@@ -46044,7 +47861,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_scalef_ps() {
+    fn test_mm512_mask_scalef_ps() {
         let a = _mm512_set1_ps(1.);
         let b = _mm512_set1_ps(3.);
         let r = _mm512_mask_scalef_ps(a, 0, a, b);
@@ -46057,7 +47874,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_scalef_ps() {
+    fn test_mm512_maskz_scalef_ps() {
         let a = _mm512_set1_ps(1.);
         let b = _mm512_set1_ps(3.);
         let r = _mm512_maskz_scalef_ps(0, a, b);
@@ -46070,7 +47887,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_scalef_ps() {
+    fn test_mm256_scalef_ps() {
         let a = _mm256_set1_ps(1.);
         let b = _mm256_set1_ps(3.);
         let r = _mm256_scalef_ps(a, b);
@@ -46079,7 +47896,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_scalef_ps() {
+    fn test_mm256_mask_scalef_ps() {
         let a = _mm256_set1_ps(1.);
         let b = _mm256_set1_ps(3.);
         let r = _mm256_mask_scalef_ps(a, 0, a, b);
@@ -46090,7 +47907,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_scalef_ps() {
+    fn test_mm256_maskz_scalef_ps() {
         let a = _mm256_set1_ps(1.);
         let b = _mm256_set1_ps(3.);
         let r = _mm256_maskz_scalef_ps(0, a, b);
@@ -46101,7 +47918,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_scalef_ps() {
+    fn test_mm_scalef_ps() {
         let a = _mm_set1_ps(1.);
         let b = _mm_set1_ps(3.);
         let r = _mm_scalef_ps(a, b);
@@ -46110,7 +47927,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_scalef_ps() {
+    fn test_mm_mask_scalef_ps() {
         let a = _mm_set1_ps(1.);
         let b = _mm_set1_ps(3.);
         let r = _mm_mask_scalef_ps(a, 0, a, b);
@@ -46121,7 +47938,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_scalef_ps() {
+    fn test_mm_maskz_scalef_ps() {
         let a = _mm_set1_ps(1.);
         let b = _mm_set1_ps(3.);
         let r = _mm_maskz_scalef_ps(0, a, b);
@@ -46132,7 +47949,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_fixupimm_ps() {
+    fn test_mm512_fixupimm_ps() {
         let a = _mm512_set1_ps(f32::NAN);
         let b = _mm512_set1_ps(f32::MAX);
         let c = _mm512_set1_epi32(i32::MAX);
@@ -46143,7 +47960,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_fixupimm_ps() {
+    fn test_mm512_mask_fixupimm_ps() {
         #[rustfmt::skip]
         let a = _mm512_set_ps(
             f32::NAN, f32::NAN, f32::NAN, f32::NAN,
@@ -46161,7 +47978,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_fixupimm_ps() {
+    fn test_mm512_maskz_fixupimm_ps() {
         #[rustfmt::skip]
         let a = _mm512_set_ps(
             f32::NAN, f32::NAN, f32::NAN, f32::NAN,
@@ -46179,7 +47996,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_fixupimm_ps() {
+    fn test_mm256_fixupimm_ps() {
         let a = _mm256_set1_ps(f32::NAN);
         let b = _mm256_set1_ps(f32::MAX);
         let c = _mm256_set1_epi32(i32::MAX);
@@ -46189,7 +48006,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_fixupimm_ps() {
+    fn test_mm256_mask_fixupimm_ps() {
         let a = _mm256_set1_ps(f32::NAN);
         let b = _mm256_set1_ps(f32::MAX);
         let c = _mm256_set1_epi32(i32::MAX);
@@ -46199,7 +48016,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_fixupimm_ps() {
+    fn test_mm256_maskz_fixupimm_ps() {
         let a = _mm256_set1_ps(f32::NAN);
         let b = _mm256_set1_ps(f32::MAX);
         let c = _mm256_set1_epi32(i32::MAX);
@@ -46209,7 +48026,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_fixupimm_ps() {
+    fn test_mm_fixupimm_ps() {
         let a = _mm_set1_ps(f32::NAN);
         let b = _mm_set1_ps(f32::MAX);
         let c = _mm_set1_epi32(i32::MAX);
@@ -46219,7 +48036,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_fixupimm_ps() {
+    fn test_mm_mask_fixupimm_ps() {
         let a = _mm_set1_ps(f32::NAN);
         let b = _mm_set1_ps(f32::MAX);
         let c = _mm_set1_epi32(i32::MAX);
@@ -46229,7 +48046,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_fixupimm_ps() {
+    fn test_mm_maskz_fixupimm_ps() {
         let a = _mm_set1_ps(f32::NAN);
         let b = _mm_set1_ps(f32::MAX);
         let c = _mm_set1_epi32(i32::MAX);
@@ -46239,17 +48056,29 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_ternarylogic_epi32() {
-        let a = _mm512_set1_epi32(1 << 2);
-        let b = _mm512_set1_epi32(1 << 1);
-        let c = _mm512_set1_epi32(1 << 0);
-        let r = _mm512_ternarylogic_epi32::<8>(a, b, c);
-        let e = _mm512_set1_epi32(0);
+    fn test_mm512_ternarylogic_epi32() {
+        let a = _mm512_set4_epi32(0b100, 0b110, 0b001, 0b101);
+        let b = _mm512_set4_epi32(0b010, 0b011, 0b001, 0b110);
+        let c = _mm512_set4_epi32(0b001, 0b000, 0b001, 0b111);
+
+        // Identity of A.
+        let r = _mm512_ternarylogic_epi32::<0b1111_0000>(a, b, c);
+        assert_eq_m512i(r, a);
+
+        // Bitwise xor.
+        let r = _mm512_ternarylogic_epi32::<0b10010110>(a, b, c);
+        let e = _mm512_set4_epi32(0b111, 0b101, 0b001, 0b100);
+        assert_eq_m512i(r, e);
+        assert_eq_m512i(r, _mm512_xor_si512(_mm512_xor_si512(a, b), c));
+
+        // Majority (2 or more bits set).
+        let r = _mm512_ternarylogic_epi32::<0b1110_1000>(a, b, c);
+        let e = _mm512_set4_epi32(0b000, 0b010, 0b001, 0b111);
         assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_ternarylogic_epi32() {
+    fn test_mm512_mask_ternarylogic_epi32() {
         let src = _mm512_set1_epi32(1 << 2);
         let a = _mm512_set1_epi32(1 << 1);
         let b = _mm512_set1_epi32(1 << 0);
@@ -46261,7 +48090,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_ternarylogic_epi32() {
+    fn test_mm512_maskz_ternarylogic_epi32() {
         let a = _mm512_set1_epi32(1 << 2);
         let b = _mm512_set1_epi32(1 << 1);
         let c = _mm512_set1_epi32(1 << 0);
@@ -46273,17 +48102,31 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_ternarylogic_epi32() {
-        let a = _mm256_set1_epi32(1 << 2);
-        let b = _mm256_set1_epi32(1 << 1);
-        let c = _mm256_set1_epi32(1 << 0);
-        let r = _mm256_ternarylogic_epi32::<8>(a, b, c);
-        let e = _mm256_set1_epi32(0);
+    fn test_mm256_ternarylogic_epi32() {
+        let _mm256_set4_epi32 = |a, b, c, d| _mm256_setr_epi32(a, b, c, d, a, b, c, d);
+
+        let a = _mm256_set4_epi32(0b100, 0b110, 0b001, 0b101);
+        let b = _mm256_set4_epi32(0b010, 0b011, 0b001, 0b110);
+        let c = _mm256_set4_epi32(0b001, 0b000, 0b001, 0b111);
+
+        // Identity of A.
+        let r = _mm256_ternarylogic_epi32::<0b1111_0000>(a, b, c);
+        assert_eq_m256i(r, a);
+
+        // Bitwise xor.
+        let r = _mm256_ternarylogic_epi32::<0b10010110>(a, b, c);
+        let e = _mm256_set4_epi32(0b111, 0b101, 0b001, 0b100);
+        assert_eq_m256i(r, e);
+        assert_eq_m256i(r, _mm256_xor_si256(_mm256_xor_si256(a, b), c));
+
+        // Majority (2 or more bits set).
+        let r = _mm256_ternarylogic_epi32::<0b1110_1000>(a, b, c);
+        let e = _mm256_set4_epi32(0b000, 0b010, 0b001, 0b111);
         assert_eq_m256i(r, e);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_ternarylogic_epi32() {
+    fn test_mm256_mask_ternarylogic_epi32() {
         let src = _mm256_set1_epi32(1 << 2);
         let a = _mm256_set1_epi32(1 << 1);
         let b = _mm256_set1_epi32(1 << 0);
@@ -46295,7 +48138,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_ternarylogic_epi32() {
+    fn test_mm256_maskz_ternarylogic_epi32() {
         let a = _mm256_set1_epi32(1 << 2);
         let b = _mm256_set1_epi32(1 << 1);
         let c = _mm256_set1_epi32(1 << 0);
@@ -46307,17 +48150,29 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_ternarylogic_epi32() {
-        let a = _mm_set1_epi32(1 << 2);
-        let b = _mm_set1_epi32(1 << 1);
-        let c = _mm_set1_epi32(1 << 0);
-        let r = _mm_ternarylogic_epi32::<8>(a, b, c);
-        let e = _mm_set1_epi32(0);
+    fn test_mm_ternarylogic_epi32() {
+        let a = _mm_setr_epi32(0b100, 0b110, 0b001, 0b101);
+        let b = _mm_setr_epi32(0b010, 0b011, 0b001, 0b110);
+        let c = _mm_setr_epi32(0b001, 0b000, 0b001, 0b111);
+
+        // Identity of A.
+        let r = _mm_ternarylogic_epi32::<0b1111_0000>(a, b, c);
+        assert_eq_m128i(r, a);
+
+        // Bitwise xor.
+        let r = _mm_ternarylogic_epi32::<0b10010110>(a, b, c);
+        let e = _mm_setr_epi32(0b111, 0b101, 0b001, 0b100);
+        assert_eq_m128i(r, e);
+        assert_eq_m128i(r, _mm_xor_si128(_mm_xor_si128(a, b), c));
+
+        // Majority (2 or more bits set).
+        let r = _mm_ternarylogic_epi32::<0b1110_1000>(a, b, c);
+        let e = _mm_setr_epi32(0b000, 0b010, 0b001, 0b111);
         assert_eq_m128i(r, e);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_ternarylogic_epi32() {
+    fn test_mm_mask_ternarylogic_epi32() {
         let src = _mm_set1_epi32(1 << 2);
         let a = _mm_set1_epi32(1 << 1);
         let b = _mm_set1_epi32(1 << 0);
@@ -46329,7 +48184,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_ternarylogic_epi32() {
+    fn test_mm_maskz_ternarylogic_epi32() {
         let a = _mm_set1_epi32(1 << 2);
         let b = _mm_set1_epi32(1 << 1);
         let c = _mm_set1_epi32(1 << 0);
@@ -46341,7 +48196,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_getmant_ps() {
+    fn test_mm512_getmant_ps() {
         let a = _mm512_set1_ps(10.);
         let r = _mm512_getmant_ps::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a);
         let e = _mm512_set1_ps(1.25);
@@ -46349,7 +48204,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_getmant_ps() {
+    fn test_mm512_mask_getmant_ps() {
         let a = _mm512_set1_ps(10.);
         let r = _mm512_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0, a);
         assert_eq_m512(r, a);
@@ -46365,7 +48220,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_getmant_ps() {
+    fn test_mm512_maskz_getmant_ps() {
         let a = _mm512_set1_ps(10.);
         let r = _mm512_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0, a);
         assert_eq_m512(r, _mm512_setzero_ps());
@@ -46378,7 +48233,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_getmant_ps() {
+    fn test_mm256_getmant_ps() {
         let a = _mm256_set1_ps(10.);
         let r = _mm256_getmant_ps::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a);
         let e = _mm256_set1_ps(1.25);
@@ -46386,7 +48241,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_getmant_ps() {
+    fn test_mm256_mask_getmant_ps() {
         let a = _mm256_set1_ps(10.);
         let r = _mm256_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0, a);
         assert_eq_m256(r, a);
@@ -46396,7 +48251,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_getmant_ps() {
+    fn test_mm256_maskz_getmant_ps() {
         let a = _mm256_set1_ps(10.);
         let r = _mm256_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0, a);
         assert_eq_m256(r, _mm256_setzero_ps());
@@ -46406,7 +48261,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_getmant_ps() {
+    fn test_mm_getmant_ps() {
         let a = _mm_set1_ps(10.);
         let r = _mm_getmant_ps::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a);
         let e = _mm_set1_ps(1.25);
@@ -46414,7 +48269,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_getmant_ps() {
+    fn test_mm_mask_getmant_ps() {
         let a = _mm_set1_ps(10.);
         let r = _mm_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0, a);
         assert_eq_m128(r, a);
@@ -46424,7 +48279,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_getmant_ps() {
+    fn test_mm_maskz_getmant_ps() {
         let a = _mm_set1_ps(10.);
         let r = _mm_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0, a);
         assert_eq_m128(r, _mm_setzero_ps());
@@ -46434,7 +48289,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_add_round_ps() {
+    fn test_mm512_add_round_ps() {
         let a = _mm512_setr_ps(
             0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
         );
@@ -46456,7 +48311,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_add_round_ps() {
+    fn test_mm512_mask_add_round_ps() {
         let a = _mm512_setr_ps(
             0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
         );
@@ -46480,7 +48335,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_add_round_ps() {
+    fn test_mm512_maskz_add_round_ps() {
         let a = _mm512_setr_ps(
             0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
         );
@@ -46503,7 +48358,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_sub_round_ps() {
+    fn test_mm512_sub_round_ps() {
         let a = _mm512_setr_ps(
             0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
         );
@@ -46525,7 +48380,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_sub_round_ps() {
+    fn test_mm512_mask_sub_round_ps() {
         let a = _mm512_setr_ps(
             0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
         );
@@ -46551,7 +48406,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_sub_round_ps() {
+    fn test_mm512_maskz_sub_round_ps() {
         let a = _mm512_setr_ps(
             0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
         );
@@ -46575,7 +48430,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mul_round_ps() {
+    fn test_mm512_mul_round_ps() {
         #[rustfmt::skip]
         let a = _mm512_setr_ps(
             0., 1.5, 2., 3.5,
@@ -46605,7 +48460,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_mul_round_ps() {
+    fn test_mm512_mask_mul_round_ps() {
         #[rustfmt::skip]
         let a = _mm512_setr_ps(
             0., 1.5, 2., 3.5,
@@ -46635,7 +48490,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_mul_round_ps() {
+    fn test_mm512_maskz_mul_round_ps() {
         #[rustfmt::skip]
         let a = _mm512_setr_ps(
             0., 1.5, 2., 3.5,
@@ -46663,7 +48518,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_div_round_ps() {
+    fn test_mm512_div_round_ps() {
         let a = _mm512_set1_ps(1.);
         let b = _mm512_set1_ps(3.);
         let r = _mm512_div_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
@@ -46675,7 +48530,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_div_round_ps() {
+    fn test_mm512_mask_div_round_ps() {
         let a = _mm512_set1_ps(1.);
         let b = _mm512_set1_ps(3.);
         let r = _mm512_mask_div_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
@@ -46696,7 +48551,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_div_round_ps() {
+    fn test_mm512_maskz_div_round_ps() {
         let a = _mm512_set1_ps(1.);
         let b = _mm512_set1_ps(3.);
         let r =
@@ -46715,7 +48570,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_sqrt_round_ps() {
+    fn test_mm512_sqrt_round_ps() {
         let a = _mm512_set1_ps(3.);
         let r = _mm512_sqrt_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
         let e = _mm512_set1_ps(1.7320508);
@@ -46726,7 +48581,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_sqrt_round_ps() {
+    fn test_mm512_mask_sqrt_round_ps() {
         let a = _mm512_set1_ps(3.);
         let r =
             _mm512_mask_sqrt_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, 0, a);
@@ -46744,7 +48599,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_sqrt_round_ps() {
+    fn test_mm512_maskz_sqrt_round_ps() {
         let a = _mm512_set1_ps(3.);
         let r =
             _mm512_maskz_sqrt_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a);
@@ -46761,7 +48616,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_fmadd_round_ps() {
+    fn test_mm512_fmadd_round_ps() {
         let a = _mm512_set1_ps(0.00000007);
         let b = _mm512_set1_ps(1.);
         let c = _mm512_set1_ps(-1.);
@@ -46774,7 +48629,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_fmadd_round_ps() {
+    fn test_mm512_mask_fmadd_round_ps() {
         let a = _mm512_set1_ps(0.00000007);
         let b = _mm512_set1_ps(1.);
         let c = _mm512_set1_ps(-1.);
@@ -46799,7 +48654,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_fmadd_round_ps() {
+    fn test_mm512_maskz_fmadd_round_ps() {
         let a = _mm512_set1_ps(0.00000007);
         let b = _mm512_set1_ps(1.);
         let c = _mm512_set1_ps(-1.);
@@ -46825,7 +48680,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask3_fmadd_round_ps() {
+    fn test_mm512_mask3_fmadd_round_ps() {
         let a = _mm512_set1_ps(0.00000007);
         let b = _mm512_set1_ps(1.);
         let c = _mm512_set1_ps(-1.);
@@ -46850,7 +48705,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_fmsub_round_ps() {
+    fn test_mm512_fmsub_round_ps() {
         let a = _mm512_set1_ps(0.00000007);
         let b = _mm512_set1_ps(1.);
         let c = _mm512_set1_ps(1.);
@@ -46863,7 +48718,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_fmsub_round_ps() {
+    fn test_mm512_mask_fmsub_round_ps() {
         let a = _mm512_set1_ps(0.00000007);
         let b = _mm512_set1_ps(1.);
         let c = _mm512_set1_ps(1.);
@@ -46888,7 +48743,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_fmsub_round_ps() {
+    fn test_mm512_maskz_fmsub_round_ps() {
         let a = _mm512_set1_ps(0.00000007);
         let b = _mm512_set1_ps(1.);
         let c = _mm512_set1_ps(1.);
@@ -46913,7 +48768,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask3_fmsub_round_ps() {
+    fn test_mm512_mask3_fmsub_round_ps() {
         let a = _mm512_set1_ps(0.00000007);
         let b = _mm512_set1_ps(1.);
         let c = _mm512_set1_ps(1.);
@@ -46938,7 +48793,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_fmaddsub_round_ps() {
+    fn test_mm512_fmaddsub_round_ps() {
         let a = _mm512_set1_ps(0.00000007);
         let b = _mm512_set1_ps(1.);
         let c = _mm512_set1_ps(-1.);
@@ -46961,7 +48816,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_fmaddsub_round_ps() {
+    fn test_mm512_mask_fmaddsub_round_ps() {
         let a = _mm512_set1_ps(0.00000007);
         let b = _mm512_set1_ps(1.);
         let c = _mm512_set1_ps(-1.);
@@ -46986,7 +48841,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_fmaddsub_round_ps() {
+    fn test_mm512_maskz_fmaddsub_round_ps() {
         let a = _mm512_set1_ps(0.00000007);
         let b = _mm512_set1_ps(1.);
         let c = _mm512_set1_ps(-1.);
@@ -47011,7 +48866,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask3_fmaddsub_round_ps() {
+    fn test_mm512_mask3_fmaddsub_round_ps() {
         let a = _mm512_set1_ps(0.00000007);
         let b = _mm512_set1_ps(1.);
         let c = _mm512_set1_ps(-1.);
@@ -47036,7 +48891,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_fmsubadd_round_ps() {
+    fn test_mm512_fmsubadd_round_ps() {
         let a = _mm512_set1_ps(0.00000007);
         let b = _mm512_set1_ps(1.);
         let c = _mm512_set1_ps(-1.);
@@ -47059,7 +48914,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_fmsubadd_round_ps() {
+    fn test_mm512_mask_fmsubadd_round_ps() {
         let a = _mm512_set1_ps(0.00000007);
         let b = _mm512_set1_ps(1.);
         let c = _mm512_set1_ps(-1.);
@@ -47084,7 +48939,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_fmsubadd_round_ps() {
+    fn test_mm512_maskz_fmsubadd_round_ps() {
         let a = _mm512_set1_ps(0.00000007);
         let b = _mm512_set1_ps(1.);
         let c = _mm512_set1_ps(-1.);
@@ -47109,7 +48964,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask3_fmsubadd_round_ps() {
+    fn test_mm512_mask3_fmsubadd_round_ps() {
         let a = _mm512_set1_ps(0.00000007);
         let b = _mm512_set1_ps(1.);
         let c = _mm512_set1_ps(-1.);
@@ -47134,7 +48989,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_fnmadd_round_ps() {
+    fn test_mm512_fnmadd_round_ps() {
         let a = _mm512_set1_ps(0.00000007);
         let b = _mm512_set1_ps(1.);
         let c = _mm512_set1_ps(1.);
@@ -47148,7 +49003,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_fnmadd_round_ps() {
+    fn test_mm512_mask_fnmadd_round_ps() {
         let a = _mm512_set1_ps(0.00000007);
         let b = _mm512_set1_ps(1.);
         let c = _mm512_set1_ps(1.);
@@ -47171,7 +49026,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_fnmadd_round_ps() {
+    fn test_mm512_maskz_fnmadd_round_ps() {
         let a = _mm512_set1_ps(0.00000007);
         let b = _mm512_set1_ps(1.);
         let c = _mm512_set1_ps(1.);
@@ -47193,7 +49048,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask3_fnmadd_round_ps() {
+    fn test_mm512_mask3_fnmadd_round_ps() {
         let a = _mm512_set1_ps(0.00000007);
         let b = _mm512_set1_ps(1.);
         let c = _mm512_set1_ps(1.);
@@ -47215,7 +49070,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_fnmsub_round_ps() {
+    fn test_mm512_fnmsub_round_ps() {
         let a = _mm512_set1_ps(0.00000007);
         let b = _mm512_set1_ps(1.);
         let c = _mm512_set1_ps(-1.);
@@ -47229,7 +49084,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_fnmsub_round_ps() {
+    fn test_mm512_mask_fnmsub_round_ps() {
         let a = _mm512_set1_ps(0.00000007);
         let b = _mm512_set1_ps(1.);
         let c = _mm512_set1_ps(-1.);
@@ -47252,7 +49107,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_fnmsub_round_ps() {
+    fn test_mm512_maskz_fnmsub_round_ps() {
         let a = _mm512_set1_ps(0.00000007);
         let b = _mm512_set1_ps(1.);
         let c = _mm512_set1_ps(-1.);
@@ -47274,7 +49129,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask3_fnmsub_round_ps() {
+    fn test_mm512_mask3_fnmsub_round_ps() {
         let a = _mm512_set1_ps(0.00000007);
         let b = _mm512_set1_ps(1.);
         let c = _mm512_set1_ps(-1.);
@@ -47296,7 +49151,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_max_round_ps() {
+    fn test_mm512_max_round_ps() {
         let a = _mm512_setr_ps(
             0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
         );
@@ -47311,7 +49166,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_max_round_ps() {
+    fn test_mm512_mask_max_round_ps() {
         let a = _mm512_setr_ps(
             0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
         );
@@ -47328,7 +49183,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_max_round_ps() {
+    fn test_mm512_maskz_max_round_ps() {
         let a = _mm512_setr_ps(
             0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
         );
@@ -47345,7 +49200,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_min_round_ps() {
+    fn test_mm512_min_round_ps() {
         let a = _mm512_setr_ps(
             0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
         );
@@ -47360,7 +49215,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_min_round_ps() {
+    fn test_mm512_mask_min_round_ps() {
         let a = _mm512_setr_ps(
             0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
         );
@@ -47377,7 +49232,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_min_round_ps() {
+    fn test_mm512_maskz_min_round_ps() {
         let a = _mm512_setr_ps(
             0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
         );
@@ -47394,7 +49249,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_getexp_round_ps() {
+    fn test_mm512_getexp_round_ps() {
         let a = _mm512_set1_ps(3.);
         let r = _mm512_getexp_round_ps::<_MM_FROUND_CUR_DIRECTION>(a);
         let e = _mm512_set1_ps(1.);
@@ -47402,7 +49257,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_getexp_round_ps() {
+    fn test_mm512_mask_getexp_round_ps() {
         let a = _mm512_set1_ps(3.);
         let r = _mm512_mask_getexp_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, 0, a);
         assert_eq_m512(r, a);
@@ -47414,7 +49269,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_getexp_round_ps() {
+    fn test_mm512_maskz_getexp_round_ps() {
         let a = _mm512_set1_ps(3.);
         let r = _mm512_maskz_getexp_round_ps::<_MM_FROUND_CUR_DIRECTION>(0, a);
         assert_eq_m512(r, _mm512_setzero_ps());
@@ -47426,7 +49281,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_roundscale_round_ps() {
+    fn test_mm512_roundscale_round_ps() {
         let a = _mm512_set1_ps(1.1);
         let r = _mm512_roundscale_round_ps::<0, _MM_FROUND_CUR_DIRECTION>(a);
         let e = _mm512_set1_ps(1.0);
@@ -47434,7 +49289,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_roundscale_round_ps() {
+    fn test_mm512_mask_roundscale_round_ps() {
         let a = _mm512_set1_ps(1.1);
         let r = _mm512_mask_roundscale_round_ps::<0, _MM_FROUND_CUR_DIRECTION>(a, 0, a);
         let e = _mm512_set1_ps(1.1);
@@ -47449,7 +49304,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_roundscale_round_ps() {
+    fn test_mm512_maskz_roundscale_round_ps() {
         let a = _mm512_set1_ps(1.1);
         let r = _mm512_maskz_roundscale_round_ps::<0, _MM_FROUND_CUR_DIRECTION>(0, a);
         assert_eq_m512(r, _mm512_setzero_ps());
@@ -47460,7 +49315,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_scalef_round_ps() {
+    fn test_mm512_scalef_round_ps() {
         let a = _mm512_set1_ps(1.);
         let b = _mm512_set1_ps(3.);
         let r = _mm512_scalef_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
@@ -47469,7 +49324,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_scalef_round_ps() {
+    fn test_mm512_mask_scalef_round_ps() {
         let a = _mm512_set1_ps(1.);
         let b = _mm512_set1_ps(3.);
         let r = _mm512_mask_scalef_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
@@ -47489,7 +49344,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_scalef_round_ps() {
+    fn test_mm512_maskz_scalef_round_ps() {
         let a = _mm512_set1_ps(1.);
         let b = _mm512_set1_ps(3.);
         let r = _mm512_maskz_scalef_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
@@ -47508,7 +49363,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_fixupimm_round_ps() {
+    fn test_mm512_fixupimm_round_ps() {
         let a = _mm512_set1_ps(f32::NAN);
         let b = _mm512_set1_ps(f32::MAX);
         let c = _mm512_set1_epi32(i32::MAX);
@@ -47518,7 +49373,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_fixupimm_round_ps() {
+    fn test_mm512_mask_fixupimm_round_ps() {
         #[rustfmt::skip]
         let a = _mm512_set_ps(
             f32::NAN, f32::NAN, f32::NAN, f32::NAN,
@@ -47541,7 +49396,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_fixupimm_round_ps() {
+    fn test_mm512_maskz_fixupimm_round_ps() {
         #[rustfmt::skip]
         let a = _mm512_set_ps(
             f32::NAN, f32::NAN, f32::NAN, f32::NAN,
@@ -47564,7 +49419,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_getmant_round_ps() {
+    fn test_mm512_getmant_round_ps() {
         let a = _mm512_set1_ps(10.);
         let r = _mm512_getmant_round_ps::<
             _MM_MANT_NORM_1_2,
@@ -47576,7 +49431,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_getmant_round_ps() {
+    fn test_mm512_mask_getmant_round_ps() {
         let a = _mm512_set1_ps(10.);
         let r = _mm512_mask_getmant_round_ps::<
             _MM_MANT_NORM_1_2,
@@ -47596,7 +49451,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_getmant_round_ps() {
+    fn test_mm512_maskz_getmant_round_ps() {
         let a = _mm512_set1_ps(10.);
         let r = _mm512_maskz_getmant_round_ps::<
             _MM_MANT_NORM_1_2,
@@ -47616,7 +49471,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cvtps_epi32() {
+    fn test_mm512_cvtps_epi32() {
         let a = _mm512_setr_ps(
             0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
         );
@@ -47626,7 +49481,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cvtps_epi32() {
+    fn test_mm512_mask_cvtps_epi32() {
         let a = _mm512_setr_ps(
             0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
         );
@@ -47639,7 +49494,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_cvtps_epi32() {
+    fn test_mm512_maskz_cvtps_epi32() {
         let a = _mm512_setr_ps(
             0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
         );
@@ -47651,7 +49506,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_cvtps_epi32() {
+    fn test_mm256_mask_cvtps_epi32() {
         let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
         let src = _mm256_set1_epi32(0);
         let r = _mm256_mask_cvtps_epi32(src, 0, a);
@@ -47662,7 +49517,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_cvtps_epi32() {
+    fn test_mm256_maskz_cvtps_epi32() {
         let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
         let r = _mm256_maskz_cvtps_epi32(0, a);
         assert_eq_m256i(r, _mm256_setzero_si256());
@@ -47672,7 +49527,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_cvtps_epi32() {
+    fn test_mm_mask_cvtps_epi32() {
         let a = _mm_set_ps(12., 13.5, 14., 15.5);
         let src = _mm_set1_epi32(0);
         let r = _mm_mask_cvtps_epi32(src, 0, a);
@@ -47683,7 +49538,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_cvtps_epi32() {
+    fn test_mm_maskz_cvtps_epi32() {
         let a = _mm_set_ps(12., 13.5, 14., 15.5);
         let r = _mm_maskz_cvtps_epi32(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -47693,7 +49548,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cvtps_epu32() {
+    fn test_mm512_cvtps_epu32() {
         let a = _mm512_setr_ps(
             0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
         );
@@ -47703,7 +49558,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cvtps_epu32() {
+    fn test_mm512_mask_cvtps_epu32() {
         let a = _mm512_setr_ps(
             0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
         );
@@ -47716,7 +49571,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_cvtps_epu32() {
+    fn test_mm512_maskz_cvtps_epu32() {
         let a = _mm512_setr_ps(
             0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
         );
@@ -47728,7 +49583,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_cvtps_epu32() {
+    fn test_mm256_cvtps_epu32() {
         let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
         let r = _mm256_cvtps_epu32(a);
         let e = _mm256_set_epi32(8, 10, 10, 12, 12, 14, 14, 16);
@@ -47736,7 +49591,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_cvtps_epu32() {
+    fn test_mm256_mask_cvtps_epu32() {
         let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
         let src = _mm256_set1_epi32(0);
         let r = _mm256_mask_cvtps_epu32(src, 0, a);
@@ -47747,7 +49602,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_cvtps_epu32() {
+    fn test_mm256_maskz_cvtps_epu32() {
         let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
         let r = _mm256_maskz_cvtps_epu32(0, a);
         assert_eq_m256i(r, _mm256_setzero_si256());
@@ -47757,7 +49612,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_cvtps_epu32() {
+    fn test_mm_cvtps_epu32() {
         let a = _mm_set_ps(12., 13.5, 14., 15.5);
         let r = _mm_cvtps_epu32(a);
         let e = _mm_set_epi32(12, 14, 14, 16);
@@ -47765,7 +49620,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_cvtps_epu32() {
+    fn test_mm_mask_cvtps_epu32() {
         let a = _mm_set_ps(12., 13.5, 14., 15.5);
         let src = _mm_set1_epi32(0);
         let r = _mm_mask_cvtps_epu32(src, 0, a);
@@ -47776,7 +49631,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_cvtps_epu32() {
+    fn test_mm_maskz_cvtps_epu32() {
         let a = _mm_set_ps(12., 13.5, 14., 15.5);
         let r = _mm_maskz_cvtps_epu32(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -47786,7 +49641,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cvtepi8_epi32() {
+    const fn test_mm512_cvtepi8_epi32() {
         let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let r = _mm512_cvtepi8_epi32(a);
         let e = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
@@ -47794,7 +49649,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cvtepi8_epi32() {
+    const fn test_mm512_mask_cvtepi8_epi32() {
         let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let src = _mm512_set1_epi32(-1);
         let r = _mm512_mask_cvtepi8_epi32(src, 0, a);
@@ -47805,7 +49660,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_cvtepi8_epi32() {
+    const fn test_mm512_maskz_cvtepi8_epi32() {
         let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let r = _mm512_maskz_cvtepi8_epi32(0, a);
         assert_eq_m512i(r, _mm512_setzero_si512());
@@ -47815,7 +49670,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_cvtepi8_epi32() {
+    const fn test_mm256_mask_cvtepi8_epi32() {
         let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let src = _mm256_set1_epi32(-1);
         let r = _mm256_mask_cvtepi8_epi32(src, 0, a);
@@ -47826,7 +49681,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_cvtepi8_epi32() {
+    const fn test_mm256_maskz_cvtepi8_epi32() {
         let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let r = _mm256_maskz_cvtepi8_epi32(0, a);
         assert_eq_m256i(r, _mm256_setzero_si256());
@@ -47836,7 +49691,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_cvtepi8_epi32() {
+    const fn test_mm_mask_cvtepi8_epi32() {
         let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let src = _mm_set1_epi32(-1);
         let r = _mm_mask_cvtepi8_epi32(src, 0, a);
@@ -47847,7 +49702,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_cvtepi8_epi32() {
+    const fn test_mm_maskz_cvtepi8_epi32() {
         let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let r = _mm_maskz_cvtepi8_epi32(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -47857,7 +49712,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cvtepu8_epi32() {
+    const fn test_mm512_cvtepu8_epi32() {
         let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let r = _mm512_cvtepu8_epi32(a);
         let e = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
@@ -47865,7 +49720,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cvtepu8_epi32() {
+    const fn test_mm512_mask_cvtepu8_epi32() {
         let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let src = _mm512_set1_epi32(-1);
         let r = _mm512_mask_cvtepu8_epi32(src, 0, a);
@@ -47876,7 +49731,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_cvtepu8_epi32() {
+    const fn test_mm512_maskz_cvtepu8_epi32() {
         let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let r = _mm512_maskz_cvtepu8_epi32(0, a);
         assert_eq_m512i(r, _mm512_setzero_si512());
@@ -47886,7 +49741,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_cvtepu8_epi32() {
+    const fn test_mm256_mask_cvtepu8_epi32() {
         let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let src = _mm256_set1_epi32(-1);
         let r = _mm256_mask_cvtepu8_epi32(src, 0, a);
@@ -47897,7 +49752,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_cvtepu8_epi32() {
+    const fn test_mm256_maskz_cvtepu8_epi32() {
         let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let r = _mm256_maskz_cvtepu8_epi32(0, a);
         assert_eq_m256i(r, _mm256_setzero_si256());
@@ -47907,7 +49762,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_cvtepu8_epi32() {
+    const fn test_mm_mask_cvtepu8_epi32() {
         let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let src = _mm_set1_epi32(-1);
         let r = _mm_mask_cvtepu8_epi32(src, 0, a);
@@ -47918,7 +49773,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_cvtepu8_epi32() {
+    const fn test_mm_maskz_cvtepu8_epi32() {
         let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let r = _mm_maskz_cvtepu8_epi32(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -47928,7 +49783,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cvtepi16_epi32() {
+    const fn test_mm512_cvtepi16_epi32() {
         let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let r = _mm512_cvtepi16_epi32(a);
         let e = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
@@ -47936,7 +49791,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cvtepi16_epi32() {
+    const fn test_mm512_mask_cvtepi16_epi32() {
         let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let src = _mm512_set1_epi32(-1);
         let r = _mm512_mask_cvtepi16_epi32(src, 0, a);
@@ -47947,7 +49802,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_cvtepi16_epi32() {
+    const fn test_mm512_maskz_cvtepi16_epi32() {
         let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let r = _mm512_maskz_cvtepi16_epi32(0, a);
         assert_eq_m512i(r, _mm512_setzero_si512());
@@ -47957,7 +49812,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_cvtepi16_epi32() {
+    const fn test_mm256_mask_cvtepi16_epi32() {
         let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
         let src = _mm256_set1_epi32(-1);
         let r = _mm256_mask_cvtepi16_epi32(src, 0, a);
@@ -47968,7 +49823,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_cvtepi16_epi32() {
+    const fn test_mm256_maskz_cvtepi16_epi32() {
         let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
         let r = _mm256_maskz_cvtepi16_epi32(0, a);
         assert_eq_m256i(r, _mm256_setzero_si256());
@@ -47978,7 +49833,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_cvtepi16_epi32() {
+    const fn test_mm_mask_cvtepi16_epi32() {
         let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
         let src = _mm_set1_epi32(-1);
         let r = _mm_mask_cvtepi16_epi32(src, 0, a);
@@ -47989,7 +49844,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_cvtepi16_epi32() {
+    const fn test_mm_maskz_cvtepi16_epi32() {
         let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
         let r = _mm_maskz_cvtepi16_epi32(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -47999,7 +49854,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cvtepu16_epi32() {
+    const fn test_mm512_cvtepu16_epi32() {
         let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let r = _mm512_cvtepu16_epi32(a);
         let e = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
@@ -48007,7 +49862,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cvtepu16_epi32() {
+    const fn test_mm512_mask_cvtepu16_epi32() {
         let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let src = _mm512_set1_epi32(-1);
         let r = _mm512_mask_cvtepu16_epi32(src, 0, a);
@@ -48018,7 +49873,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_cvtepu16_epi32() {
+    const fn test_mm512_maskz_cvtepu16_epi32() {
         let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let r = _mm512_maskz_cvtepu16_epi32(0, a);
         assert_eq_m512i(r, _mm512_setzero_si512());
@@ -48028,7 +49883,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_cvtepu16_epi32() {
+    const fn test_mm256_mask_cvtepu16_epi32() {
         let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
         let src = _mm256_set1_epi32(-1);
         let r = _mm256_mask_cvtepu16_epi32(src, 0, a);
@@ -48039,7 +49894,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_cvtepu16_epi32() {
+    const fn test_mm256_maskz_cvtepu16_epi32() {
         let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
         let r = _mm256_maskz_cvtepu16_epi32(0, a);
         assert_eq_m256i(r, _mm256_setzero_si256());
@@ -48049,7 +49904,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_cvtepu16_epi32() {
+    const fn test_mm_mask_cvtepu16_epi32() {
         let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
         let src = _mm_set1_epi32(-1);
         let r = _mm_mask_cvtepu16_epi32(src, 0, a);
@@ -48060,7 +49915,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_cvtepu16_epi32() {
+    const fn test_mm_maskz_cvtepu16_epi32() {
         let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
         let r = _mm_maskz_cvtepu16_epi32(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -48070,7 +49925,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cvtepi32_ps() {
+    const fn test_mm512_cvtepi32_ps() {
         let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let r = _mm512_cvtepi32_ps(a);
         let e = _mm512_set_ps(
@@ -48080,7 +49935,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cvtepi32_ps() {
+    const fn test_mm512_mask_cvtepi32_ps() {
         let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let src = _mm512_set1_ps(-1.);
         let r = _mm512_mask_cvtepi32_ps(src, 0, a);
@@ -48093,7 +49948,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_cvtepi32_ps() {
+    const fn test_mm512_maskz_cvtepi32_ps() {
         let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let r = _mm512_maskz_cvtepi32_ps(0, a);
         assert_eq_m512(r, _mm512_setzero_ps());
@@ -48105,7 +49960,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_cvtepi32_ps() {
+    const fn test_mm256_mask_cvtepi32_ps() {
         let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
         let src = _mm256_set1_ps(-1.);
         let r = _mm256_mask_cvtepi32_ps(src, 0, a);
@@ -48116,7 +49971,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_cvtepi32_ps() {
+    const fn test_mm256_maskz_cvtepi32_ps() {
         let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
         let r = _mm256_maskz_cvtepi32_ps(0, a);
         assert_eq_m256(r, _mm256_setzero_ps());
@@ -48126,7 +49981,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_cvtepi32_ps() {
+    const fn test_mm_mask_cvtepi32_ps() {
         let a = _mm_set_epi32(1, 2, 3, 4);
         let src = _mm_set1_ps(-1.);
         let r = _mm_mask_cvtepi32_ps(src, 0, a);
@@ -48137,7 +49992,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_cvtepi32_ps() {
+    const fn test_mm_maskz_cvtepi32_ps() {
         let a = _mm_set_epi32(1, 2, 3, 4);
         let r = _mm_maskz_cvtepi32_ps(0, a);
         assert_eq_m128(r, _mm_setzero_ps());
@@ -48147,7 +50002,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cvtepu32_ps() {
+    const fn test_mm512_cvtepu32_ps() {
         let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let r = _mm512_cvtepu32_ps(a);
         let e = _mm512_set_ps(
@@ -48157,7 +50012,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cvtepu32_ps() {
+    const fn test_mm512_mask_cvtepu32_ps() {
         let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let src = _mm512_set1_ps(-1.);
         let r = _mm512_mask_cvtepu32_ps(src, 0, a);
@@ -48170,7 +50025,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_cvtepu32_ps() {
+    const fn test_mm512_maskz_cvtepu32_ps() {
         let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let r = _mm512_maskz_cvtepu32_ps(0, a);
         assert_eq_m512(r, _mm512_setzero_ps());
@@ -48182,7 +50037,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cvtepi32_epi16() {
+    const fn test_mm512_cvtepi32_epi16() {
         let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let r = _mm512_cvtepi32_epi16(a);
         let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
@@ -48190,7 +50045,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cvtepi32_epi16() {
+    const fn test_mm512_mask_cvtepi32_epi16() {
         let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let src = _mm256_set1_epi16(-1);
         let r = _mm512_mask_cvtepi32_epi16(src, 0, a);
@@ -48201,7 +50056,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_cvtepi32_epi16() {
+    const fn test_mm512_maskz_cvtepi32_epi16() {
         let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let r = _mm512_maskz_cvtepi32_epi16(0, a);
         assert_eq_m256i(r, _mm256_setzero_si256());
@@ -48211,7 +50066,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_cvtepi32_epi16() {
+    const fn test_mm256_cvtepi32_epi16() {
         let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
         let r = _mm256_cvtepi32_epi16(a);
         let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
@@ -48219,7 +50074,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_cvtepi32_epi16() {
+    const fn test_mm256_mask_cvtepi32_epi16() {
         let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
         let src = _mm_set1_epi16(-1);
         let r = _mm256_mask_cvtepi32_epi16(src, 0, a);
@@ -48230,7 +50085,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_cvtepi32_epi16() {
+    const fn test_mm256_maskz_cvtepi32_epi16() {
         let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
         let r = _mm256_maskz_cvtepi32_epi16(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -48240,7 +50095,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_cvtepi32_epi16() {
+    fn test_mm_cvtepi32_epi16() {
         let a = _mm_set_epi32(4, 5, 6, 7);
         let r = _mm_cvtepi32_epi16(a);
         let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
@@ -48248,7 +50103,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_cvtepi32_epi16() {
+    fn test_mm_mask_cvtepi32_epi16() {
         let a = _mm_set_epi32(4, 5, 6, 7);
         let src = _mm_set1_epi16(0);
         let r = _mm_mask_cvtepi32_epi16(src, 0, a);
@@ -48259,7 +50114,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_cvtepi32_epi16() {
+    fn test_mm_maskz_cvtepi32_epi16() {
         let a = _mm_set_epi32(4, 5, 6, 7);
         let r = _mm_maskz_cvtepi32_epi16(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -48269,7 +50124,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cvtepi32_epi8() {
+    const fn test_mm512_cvtepi32_epi8() {
         let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let r = _mm512_cvtepi32_epi8(a);
         let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
@@ -48277,7 +50132,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cvtepi32_epi8() {
+    const fn test_mm512_mask_cvtepi32_epi8() {
         let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let src = _mm_set1_epi8(-1);
         let r = _mm512_mask_cvtepi32_epi8(src, 0, a);
@@ -48288,7 +50143,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_cvtepi32_epi8() {
+    const fn test_mm512_maskz_cvtepi32_epi8() {
         let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let r = _mm512_maskz_cvtepi32_epi8(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -48298,7 +50153,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_cvtepi32_epi8() {
+    fn test_mm256_cvtepi32_epi8() {
         let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
         let r = _mm256_cvtepi32_epi8(a);
         let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7);
@@ -48306,7 +50161,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_cvtepi32_epi8() {
+    fn test_mm256_mask_cvtepi32_epi8() {
         let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
         let src = _mm_set1_epi8(0);
         let r = _mm256_mask_cvtepi32_epi8(src, 0, a);
@@ -48317,7 +50172,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_cvtepi32_epi8() {
+    fn test_mm256_maskz_cvtepi32_epi8() {
         let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
         let r = _mm256_maskz_cvtepi32_epi8(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -48327,7 +50182,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_cvtepi32_epi8() {
+    fn test_mm_cvtepi32_epi8() {
         let a = _mm_set_epi32(4, 5, 6, 7);
         let r = _mm_cvtepi32_epi8(a);
         let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 5, 6, 7);
@@ -48335,7 +50190,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_cvtepi32_epi8() {
+    fn test_mm_mask_cvtepi32_epi8() {
         let a = _mm_set_epi32(4, 5, 6, 7);
         let src = _mm_set1_epi8(0);
         let r = _mm_mask_cvtepi32_epi8(src, 0, a);
@@ -48346,7 +50201,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_cvtepi32_epi8() {
+    fn test_mm_maskz_cvtepi32_epi8() {
         let a = _mm_set_epi32(4, 5, 6, 7);
         let r = _mm_maskz_cvtepi32_epi8(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -48356,7 +50211,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cvtsepi32_epi16() {
+    fn test_mm512_cvtsepi32_epi16() {
         #[rustfmt::skip]
         let a = _mm512_set_epi32(
             0, 1, 2, 3,
@@ -48376,7 +50231,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cvtsepi32_epi16() {
+    fn test_mm512_mask_cvtsepi32_epi16() {
         #[rustfmt::skip]
         let a = _mm512_set_epi32(
             0, 1, 2, 3,
@@ -48399,7 +50254,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_cvtsepi32_epi16() {
+    fn test_mm512_maskz_cvtsepi32_epi16() {
         #[rustfmt::skip]
         let a = _mm512_set_epi32(
             0, 1, 2, 3,
@@ -48421,7 +50276,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_cvtsepi32_epi16() {
+    fn test_mm256_cvtsepi32_epi16() {
         let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
         let r = _mm256_cvtsepi32_epi16(a);
         let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
@@ -48429,7 +50284,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_cvtsepi32_epi16() {
+    fn test_mm256_mask_cvtsepi32_epi16() {
         let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
         let src = _mm_set1_epi16(-1);
         let r = _mm256_mask_cvtsepi32_epi16(src, 0, a);
@@ -48440,7 +50295,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_cvtsepi32_epi16() {
+    fn test_mm256_maskz_cvtsepi32_epi16() {
         let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
         let r = _mm256_maskz_cvtsepi32_epi16(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -48450,7 +50305,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_cvtsepi32_epi16() {
+    fn test_mm_cvtsepi32_epi16() {
         let a = _mm_set_epi32(4, 5, 6, 7);
         let r = _mm_cvtsepi32_epi16(a);
         let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
@@ -48458,7 +50313,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_cvtsepi32_epi16() {
+    fn test_mm_mask_cvtsepi32_epi16() {
         let a = _mm_set_epi32(4, 5, 6, 7);
         let src = _mm_set1_epi16(0);
         let r = _mm_mask_cvtsepi32_epi16(src, 0, a);
@@ -48469,7 +50324,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_cvtsepi32_epi16() {
+    fn test_mm_maskz_cvtsepi32_epi16() {
         let a = _mm_set_epi32(4, 5, 6, 7);
         let r = _mm_maskz_cvtsepi32_epi16(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -48479,7 +50334,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cvtsepi32_epi8() {
+    fn test_mm512_cvtsepi32_epi8() {
         #[rustfmt::skip]
         let a = _mm512_set_epi32(
             0, 1, 2, 3,
@@ -48499,7 +50354,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cvtsepi32_epi8() {
+    fn test_mm512_mask_cvtsepi32_epi8() {
         #[rustfmt::skip]
         let a = _mm512_set_epi32(
             0, 1, 2, 3,
@@ -48522,7 +50377,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_cvtsepi32_epi8() {
+    fn test_mm512_maskz_cvtsepi32_epi8() {
         #[rustfmt::skip]
         let a = _mm512_set_epi32(
             0, 1, 2, 3,
@@ -48544,7 +50399,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_cvtsepi32_epi8() {
+    fn test_mm256_cvtsepi32_epi8() {
         let a = _mm256_set_epi32(9, 10, 11, 12, 13, 14, 15, 16);
         let r = _mm256_cvtsepi32_epi8(a);
         #[rustfmt::skip]
@@ -48558,7 +50413,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_cvtsepi32_epi8() {
+    fn test_mm256_mask_cvtsepi32_epi8() {
         let a = _mm256_set_epi32(9, 10, 11, 12, 13, 14, 15, 16);
         let src = _mm_set1_epi8(0);
         let r = _mm256_mask_cvtsepi32_epi8(src, 0, a);
@@ -48575,7 +50430,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_cvtsepi32_epi8() {
+    fn test_mm256_maskz_cvtsepi32_epi8() {
         let a = _mm256_set_epi32(9, 10, 11, 12, 13, 14, 15, 16);
         let r = _mm256_maskz_cvtsepi32_epi8(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -48591,7 +50446,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_cvtsepi32_epi8() {
+    fn test_mm_cvtsepi32_epi8() {
         let a = _mm_set_epi32(13, 14, 15, 16);
         let r = _mm_cvtsepi32_epi8(a);
         #[rustfmt::skip]
@@ -48605,7 +50460,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_cvtsepi32_epi8() {
+    fn test_mm_mask_cvtsepi32_epi8() {
         let a = _mm_set_epi32(13, 14, 15, 16);
         let src = _mm_set1_epi8(0);
         let r = _mm_mask_cvtsepi32_epi8(src, 0, a);
@@ -48622,7 +50477,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_cvtsepi32_epi8() {
+    fn test_mm_maskz_cvtsepi32_epi8() {
         let a = _mm_set_epi32(13, 14, 15, 16);
         let r = _mm_maskz_cvtsepi32_epi8(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -48638,7 +50493,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cvtusepi32_epi16() {
+    fn test_mm512_cvtusepi32_epi16() {
         #[rustfmt::skip]
         let a = _mm512_set_epi32(
             0, 1, 2, 3,
@@ -48652,7 +50507,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cvtusepi32_epi16() {
+    fn test_mm512_mask_cvtusepi32_epi16() {
         #[rustfmt::skip]
         let a = _mm512_set_epi32(
             0, 1, 2, 3,
@@ -48669,7 +50524,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_cvtusepi32_epi16() {
+    fn test_mm512_maskz_cvtusepi32_epi16() {
         #[rustfmt::skip]
         let a = _mm512_set_epi32(
             0, 1, 2, 3,
@@ -48685,7 +50540,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_cvtusepi32_epi16() {
+    fn test_mm256_cvtusepi32_epi16() {
         let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
         let r = _mm256_cvtusepi32_epi16(a);
         let e = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
@@ -48693,7 +50548,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_cvtusepi32_epi16() {
+    fn test_mm256_mask_cvtusepi32_epi16() {
         let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
         let src = _mm_set1_epi16(0);
         let r = _mm256_mask_cvtusepi32_epi16(src, 0, a);
@@ -48704,7 +50559,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_cvtusepi32_epi16() {
+    fn test_mm256_maskz_cvtusepi32_epi16() {
         let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
         let r = _mm256_maskz_cvtusepi32_epi16(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -48714,7 +50569,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_cvtusepi32_epi16() {
+    fn test_mm_cvtusepi32_epi16() {
         let a = _mm_set_epi32(5, 6, 7, 8);
         let r = _mm_cvtusepi32_epi16(a);
         let e = _mm_set_epi16(0, 0, 0, 0, 5, 6, 7, 8);
@@ -48722,7 +50577,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_cvtusepi32_epi16() {
+    fn test_mm_mask_cvtusepi32_epi16() {
         let a = _mm_set_epi32(5, 6, 7, 8);
         let src = _mm_set1_epi16(0);
         let r = _mm_mask_cvtusepi32_epi16(src, 0, a);
@@ -48733,7 +50588,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_cvtusepi32_epi16() {
+    fn test_mm_maskz_cvtusepi32_epi16() {
         let a = _mm_set_epi32(5, 6, 7, 8);
         let r = _mm_maskz_cvtusepi32_epi16(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -48743,7 +50598,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cvtusepi32_epi8() {
+    fn test_mm512_cvtusepi32_epi8() {
         #[rustfmt::skip]
         let a = _mm512_set_epi32(
             0, 1, 2, 3,
@@ -48757,7 +50612,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cvtusepi32_epi8() {
+    fn test_mm512_mask_cvtusepi32_epi8() {
         #[rustfmt::skip]
         let a = _mm512_set_epi32(
             0, 1, 2, 3,
@@ -48774,7 +50629,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_cvtusepi32_epi8() {
+    fn test_mm512_maskz_cvtusepi32_epi8() {
         #[rustfmt::skip]
         let a = _mm512_set_epi32(
             0, 1, 2, 3,
@@ -48790,7 +50645,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_cvtusepi32_epi8() {
+    fn test_mm256_cvtusepi32_epi8() {
         let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, i32::MAX);
         let r = _mm256_cvtusepi32_epi8(a);
         let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, u8::MAX as i8);
@@ -48798,7 +50653,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_cvtusepi32_epi8() {
+    fn test_mm256_mask_cvtusepi32_epi8() {
         let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, i32::MAX);
         let src = _mm_set1_epi8(0);
         let r = _mm256_mask_cvtusepi32_epi8(src, 0, a);
@@ -48809,7 +50664,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_cvtusepi32_epi8() {
+    fn test_mm256_maskz_cvtusepi32_epi8() {
         let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, i32::MAX);
         let r = _mm256_maskz_cvtusepi32_epi8(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -48819,7 +50674,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_cvtusepi32_epi8() {
+    fn test_mm_cvtusepi32_epi8() {
         let a = _mm_set_epi32(5, 6, 7, i32::MAX);
         let r = _mm_cvtusepi32_epi8(a);
         let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 6, 7, u8::MAX as i8);
@@ -48827,7 +50682,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_cvtusepi32_epi8() {
+    fn test_mm_mask_cvtusepi32_epi8() {
         let a = _mm_set_epi32(5, 6, 7, i32::MAX);
         let src = _mm_set1_epi8(0);
         let r = _mm_mask_cvtusepi32_epi8(src, 0, a);
@@ -48838,7 +50693,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_cvtusepi32_epi8() {
+    fn test_mm_maskz_cvtusepi32_epi8() {
         let a = _mm_set_epi32(5, 6, 7, i32::MAX);
         let r = _mm_maskz_cvtusepi32_epi8(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -48848,7 +50703,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cvt_roundps_epi32() {
+    fn test_mm512_cvt_roundps_epi32() {
         let a = _mm512_setr_ps(
             0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
         );
@@ -48861,7 +50716,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cvt_roundps_epi32() {
+    fn test_mm512_mask_cvt_roundps_epi32() {
         let a = _mm512_setr_ps(
             0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
         );
@@ -48880,7 +50735,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_cvt_roundps_epi32() {
+    fn test_mm512_maskz_cvt_roundps_epi32() {
         let a = _mm512_setr_ps(
             0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
         );
@@ -48897,7 +50752,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cvt_roundps_epu32() {
+    fn test_mm512_cvt_roundps_epu32() {
         let a = _mm512_setr_ps(
             0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
         );
@@ -48910,7 +50765,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cvt_roundps_epu32() {
+    fn test_mm512_mask_cvt_roundps_epu32() {
         let a = _mm512_setr_ps(
             0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
         );
@@ -48929,7 +50784,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_cvt_roundps_epu32() {
+    fn test_mm512_maskz_cvt_roundps_epu32() {
         let a = _mm512_setr_ps(
             0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
         );
@@ -48946,7 +50801,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cvt_roundepi32_ps() {
+    fn test_mm512_cvt_roundepi32_ps() {
         let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
         let r = _mm512_cvt_roundepi32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
         let e = _mm512_setr_ps(
@@ -48956,7 +50811,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cvt_roundepi32_ps() {
+    fn test_mm512_mask_cvt_roundepi32_ps() {
         let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
         let src = _mm512_set1_ps(0.);
         let r = _mm512_mask_cvt_roundepi32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
@@ -48975,7 +50830,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_cvt_roundepi32_ps() {
+    fn test_mm512_maskz_cvt_roundepi32_ps() {
         let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
         let r = _mm512_maskz_cvt_roundepi32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
             0, a,
@@ -48992,7 +50847,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cvt_roundepu32_ps() {
+    fn test_mm512_cvt_roundepu32_ps() {
         let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
         let r = _mm512_cvt_roundepu32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
         #[rustfmt::skip]
@@ -49006,7 +50861,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cvt_roundepu32_ps() {
+    fn test_mm512_mask_cvt_roundepu32_ps() {
         let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
         let src = _mm512_set1_ps(0.);
         let r = _mm512_mask_cvt_roundepu32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
@@ -49029,7 +50884,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_cvt_roundepu32_ps() {
+    fn test_mm512_maskz_cvt_roundepu32_ps() {
         let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
         let r = _mm512_maskz_cvt_roundepu32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
             0, a,
@@ -49050,7 +50905,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cvt_roundps_ph() {
+    fn test_mm512_cvt_roundps_ph() {
         let a = _mm512_set1_ps(1.);
         let r = _mm512_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(a);
         let e = _mm256_setr_epi64x(
@@ -49063,7 +50918,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cvt_roundps_ph() {
+    fn test_mm512_mask_cvt_roundps_ph() {
         let a = _mm512_set1_ps(1.);
         let src = _mm256_set1_epi16(0);
         let r = _mm512_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, 0, a);
@@ -49074,7 +50929,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_cvt_roundps_ph() {
+    fn test_mm512_maskz_cvt_roundps_ph() {
         let a = _mm512_set1_ps(1.);
         let r = _mm512_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(0, a);
         assert_eq_m256i(r, _mm256_setzero_si256());
@@ -49084,7 +50939,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_cvt_roundps_ph() {
+    fn test_mm256_mask_cvt_roundps_ph() {
         let a = _mm256_set1_ps(1.);
         let src = _mm_set1_epi16(0);
         let r = _mm256_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, 0, a);
@@ -49095,7 +50950,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_cvt_roundps_ph() {
+    fn test_mm256_maskz_cvt_roundps_ph() {
         let a = _mm256_set1_ps(1.);
         let r = _mm256_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -49105,7 +50960,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_cvt_roundps_ph() {
+    fn test_mm_mask_cvt_roundps_ph() {
         let a = _mm_set1_ps(1.);
         let src = _mm_set1_epi16(0);
         let r = _mm_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, 0, a);
@@ -49116,7 +50971,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_cvt_roundps_ph() {
+    fn test_mm_maskz_cvt_roundps_ph() {
         let a = _mm_set1_ps(1.);
         let r = _mm_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -49126,7 +50981,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cvtps_ph() {
+    fn test_mm512_cvtps_ph() {
         let a = _mm512_set1_ps(1.);
         let r = _mm512_cvtps_ph::<_MM_FROUND_NO_EXC>(a);
         let e = _mm256_setr_epi64x(
@@ -49139,7 +50994,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cvtps_ph() {
+    fn test_mm512_mask_cvtps_ph() {
         let a = _mm512_set1_ps(1.);
         let src = _mm256_set1_epi16(0);
         let r = _mm512_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, 0, a);
@@ -49150,7 +51005,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_cvtps_ph() {
+    fn test_mm512_maskz_cvtps_ph() {
         let a = _mm512_set1_ps(1.);
         let r = _mm512_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(0, a);
         assert_eq_m256i(r, _mm256_setzero_si256());
@@ -49160,7 +51015,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_cvtps_ph() {
+    fn test_mm256_mask_cvtps_ph() {
         let a = _mm256_set1_ps(1.);
         let src = _mm_set1_epi16(0);
         let r = _mm256_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, 0, a);
@@ -49171,7 +51026,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_cvtps_ph() {
+    fn test_mm256_maskz_cvtps_ph() {
         let a = _mm256_set1_ps(1.);
         let r = _mm256_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -49181,7 +51036,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_cvtps_ph() {
+    fn test_mm_mask_cvtps_ph() {
         let a = _mm_set1_ps(1.);
         let src = _mm_set1_epi16(0);
         let r = _mm_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, 0, a);
@@ -49192,7 +51047,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_cvtps_ph() {
+    fn test_mm_maskz_cvtps_ph() {
         let a = _mm_set1_ps(1.);
         let r = _mm_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -49202,7 +51057,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cvt_roundph_ps() {
+    fn test_mm512_cvt_roundph_ps() {
         let a = _mm256_setr_epi64x(
             4323521613979991040,
             4323521613979991040,
@@ -49215,7 +51070,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cvt_roundph_ps() {
+    fn test_mm512_mask_cvt_roundph_ps() {
         let a = _mm256_setr_epi64x(
             4323521613979991040,
             4323521613979991040,
@@ -49233,7 +51088,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_cvt_roundph_ps() {
+    fn test_mm512_maskz_cvt_roundph_ps() {
         let a = _mm256_setr_epi64x(
             4323521613979991040,
             4323521613979991040,
@@ -49250,7 +51105,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cvtph_ps() {
+    fn test_mm512_cvtph_ps() {
         let a = _mm256_setr_epi64x(
             4323521613979991040,
             4323521613979991040,
@@ -49263,7 +51118,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cvtph_ps() {
+    fn test_mm512_mask_cvtph_ps() {
         let a = _mm256_setr_epi64x(
             4323521613979991040,
             4323521613979991040,
@@ -49281,7 +51136,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_cvtph_ps() {
+    fn test_mm512_maskz_cvtph_ps() {
         let a = _mm256_setr_epi64x(
             4323521613979991040,
             4323521613979991040,
@@ -49298,7 +51153,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_cvtph_ps() {
+    fn test_mm256_mask_cvtph_ps() {
         let a = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
         let src = _mm256_set1_ps(0.);
         let r = _mm256_mask_cvtph_ps(src, 0, a);
@@ -49309,7 +51164,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_cvtph_ps() {
+    fn test_mm256_maskz_cvtph_ps() {
         let a = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
         let r = _mm256_maskz_cvtph_ps(0, a);
         assert_eq_m256(r, _mm256_setzero_ps());
@@ -49319,7 +51174,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_cvtph_ps() {
+    fn test_mm_mask_cvtph_ps() {
         let a = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
         let src = _mm_set1_ps(0.);
         let r = _mm_mask_cvtph_ps(src, 0, a);
@@ -49330,7 +51185,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_cvtph_ps() {
+    fn test_mm_maskz_cvtph_ps() {
         let a = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
         let r = _mm_maskz_cvtph_ps(0, a);
         assert_eq_m128(r, _mm_setzero_ps());
@@ -49340,7 +51195,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cvtt_roundps_epi32() {
+    fn test_mm512_cvtt_roundps_epi32() {
         let a = _mm512_setr_ps(
             0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
         );
@@ -49350,7 +51205,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cvtt_roundps_epi32() {
+    fn test_mm512_mask_cvtt_roundps_epi32() {
         let a = _mm512_setr_ps(
             0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
         );
@@ -49363,7 +51218,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_cvtt_roundps_epi32() {
+    fn test_mm512_maskz_cvtt_roundps_epi32() {
         let a = _mm512_setr_ps(
             0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
         );
@@ -49375,7 +51230,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cvtt_roundps_epu32() {
+    fn test_mm512_cvtt_roundps_epu32() {
         let a = _mm512_setr_ps(
             0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
         );
@@ -49385,7 +51240,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cvtt_roundps_epu32() {
+    fn test_mm512_mask_cvtt_roundps_epu32() {
         let a = _mm512_setr_ps(
             0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
         );
@@ -49398,7 +51253,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_cvtt_roundps_epu32() {
+    fn test_mm512_maskz_cvtt_roundps_epu32() {
         let a = _mm512_setr_ps(
             0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
         );
@@ -49410,7 +51265,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cvttps_epi32() {
+    fn test_mm512_cvttps_epi32() {
         let a = _mm512_setr_ps(
             0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
         );
@@ -49420,7 +51275,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cvttps_epi32() {
+    fn test_mm512_mask_cvttps_epi32() {
         let a = _mm512_setr_ps(
             0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
         );
@@ -49433,7 +51288,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_cvttps_epi32() {
+    fn test_mm512_maskz_cvttps_epi32() {
         let a = _mm512_setr_ps(
             0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
         );
@@ -49445,7 +51300,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_cvttps_epi32() {
+    fn test_mm256_mask_cvttps_epi32() {
         let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
         let src = _mm256_set1_epi32(0);
         let r = _mm256_mask_cvttps_epi32(src, 0, a);
@@ -49456,7 +51311,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_cvttps_epi32() {
+    fn test_mm256_maskz_cvttps_epi32() {
         let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
         let r = _mm256_maskz_cvttps_epi32(0, a);
         assert_eq_m256i(r, _mm256_setzero_si256());
@@ -49466,7 +51321,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_cvttps_epi32() {
+    fn test_mm_mask_cvttps_epi32() {
         let a = _mm_set_ps(12., 13.5, 14., 15.5);
         let src = _mm_set1_epi32(0);
         let r = _mm_mask_cvttps_epi32(src, 0, a);
@@ -49477,7 +51332,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_cvttps_epi32() {
+    fn test_mm_maskz_cvttps_epi32() {
         let a = _mm_set_ps(12., 13.5, 14., 15.5);
         let r = _mm_maskz_cvttps_epi32(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -49487,7 +51342,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cvttps_epu32() {
+    fn test_mm512_cvttps_epu32() {
         let a = _mm512_setr_ps(
             0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
         );
@@ -49497,7 +51352,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cvttps_epu32() {
+    fn test_mm512_mask_cvttps_epu32() {
         let a = _mm512_setr_ps(
             0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
         );
@@ -49510,7 +51365,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_cvttps_epu32() {
+    fn test_mm512_maskz_cvttps_epu32() {
         let a = _mm512_setr_ps(
             0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
         );
@@ -49522,7 +51377,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_cvttps_epu32() {
+    fn test_mm256_cvttps_epu32() {
         let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
         let r = _mm256_cvttps_epu32(a);
         let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
@@ -49530,7 +51385,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_cvttps_epu32() {
+    fn test_mm256_mask_cvttps_epu32() {
         let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
         let src = _mm256_set1_epi32(0);
         let r = _mm256_mask_cvttps_epu32(src, 0, a);
@@ -49541,7 +51396,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_cvttps_epu32() {
+    fn test_mm256_maskz_cvttps_epu32() {
         let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
         let r = _mm256_maskz_cvttps_epu32(0, a);
         assert_eq_m256i(r, _mm256_setzero_si256());
@@ -49551,7 +51406,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_cvttps_epu32() {
+    fn test_mm_cvttps_epu32() {
         let a = _mm_set_ps(12., 13.5, 14., 15.5);
         let r = _mm_cvttps_epu32(a);
         let e = _mm_set_epi32(12, 13, 14, 15);
@@ -49559,7 +51414,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_cvttps_epu32() {
+    fn test_mm_mask_cvttps_epu32() {
         let a = _mm_set_ps(12., 13.5, 14., 15.5);
         let src = _mm_set1_epi32(0);
         let r = _mm_mask_cvttps_epu32(src, 0, a);
@@ -49570,7 +51425,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_cvttps_epu32() {
+    fn test_mm_maskz_cvttps_epu32() {
         let a = _mm_set_ps(12., 13.5, 14., 15.5);
         let r = _mm_maskz_cvttps_epu32(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -49580,20 +51435,20 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_i32gather_ps() {
+    fn test_mm512_i32gather_ps() {
         let arr: [f32; 256] = core::array::from_fn(|i| i as f32);
         // A multiplier of 4 is word-addressing
         #[rustfmt::skip]
         let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
                                       120, 128, 136, 144, 152, 160, 168, 176);
-        let r = _mm512_i32gather_ps::<4>(index, arr.as_ptr());
+        let r = unsafe { _mm512_i32gather_ps::<4>(index, arr.as_ptr()) };
         #[rustfmt::skip]
         assert_eq_m512(r, _mm512_setr_ps(0., 16., 32., 48., 64., 80., 96., 112.,
                                          120., 128., 136., 144., 152., 160., 168., 176.));
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_i32gather_ps() {
+    fn test_mm512_mask_i32gather_ps() {
         let arr: [f32; 256] = core::array::from_fn(|i| i as f32);
         let src = _mm512_set1_ps(2.);
         let mask = 0b10101010_10101010;
@@ -49601,27 +51456,27 @@ mod tests {
         let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
                                       120, 128, 136, 144, 152, 160, 168, 176);
         // A multiplier of 4 is word-addressing
-        let r = _mm512_mask_i32gather_ps::<4>(src, mask, index, arr.as_ptr());
+        let r = unsafe { _mm512_mask_i32gather_ps::<4>(src, mask, index, arr.as_ptr()) };
         #[rustfmt::skip]
         assert_eq_m512(r, _mm512_setr_ps(2., 16., 2., 48., 2., 80., 2., 112.,
                                          2., 128., 2., 144., 2., 160., 2., 176.));
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_i32gather_epi32() {
+    fn test_mm512_i32gather_epi32() {
         let arr: [i32; 256] = core::array::from_fn(|i| i as i32);
         // A multiplier of 4 is word-addressing
         #[rustfmt::skip]
         let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
                                       120, 128, 136, 144, 152, 160, 168, 176);
-        let r = _mm512_i32gather_epi32::<4>(index, arr.as_ptr());
+        let r = unsafe { _mm512_i32gather_epi32::<4>(index, arr.as_ptr()) };
         #[rustfmt::skip]
         assert_eq_m512i(r, _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
                                              120, 128, 136, 144, 152, 160, 168, 176));
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_i32gather_epi32() {
+    fn test_mm512_mask_i32gather_epi32() {
         let arr: [i32; 256] = core::array::from_fn(|i| i as i32);
         let src = _mm512_set1_epi32(2);
         let mask = 0b10101010_10101010;
@@ -49629,7 +51484,7 @@ mod tests {
             0, 16, 32, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192, 208, 224, 240,
         );
         // A multiplier of 4 is word-addressing
-        let r = _mm512_mask_i32gather_epi32::<4>(src, mask, index, arr.as_ptr());
+        let r = unsafe { _mm512_mask_i32gather_epi32::<4>(src, mask, index, arr.as_ptr()) };
         assert_eq_m512i(
             r,
             _mm512_setr_epi32(2, 16, 2, 48, 2, 80, 2, 112, 2, 144, 2, 176, 2, 208, 2, 240),
@@ -49637,7 +51492,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_i32scatter_ps() {
+    fn test_mm512_i32scatter_ps() {
         let mut arr = [0f32; 256];
         #[rustfmt::skip]
         let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
@@ -49646,7 +51501,9 @@ mod tests {
             1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
         );
         // A multiplier of 4 is word-addressing
-        _mm512_i32scatter_ps::<4>(arr.as_mut_ptr(), index, src);
+        unsafe {
+            _mm512_i32scatter_ps::<4>(arr.as_mut_ptr(), index, src);
+        }
         let mut expected = [0f32; 256];
         for i in 0..16 {
             expected[i * 16] = (i + 1) as f32;
@@ -49655,7 +51512,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_i32scatter_ps() {
+    fn test_mm512_mask_i32scatter_ps() {
         let mut arr = [0f32; 256];
         let mask = 0b10101010_10101010;
         #[rustfmt::skip]
@@ -49665,7 +51522,9 @@ mod tests {
             1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
         );
         // A multiplier of 4 is word-addressing
-        _mm512_mask_i32scatter_ps::<4>(arr.as_mut_ptr(), mask, index, src);
+        unsafe {
+            _mm512_mask_i32scatter_ps::<4>(arr.as_mut_ptr(), mask, index, src);
+        }
         let mut expected = [0f32; 256];
         for i in 0..8 {
             expected[i * 32 + 16] = 2. * (i + 1) as f32;
@@ -49674,7 +51533,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_i32scatter_epi32() {
+    fn test_mm512_i32scatter_epi32() {
         let mut arr = [0i32; 256];
         #[rustfmt::skip]
 
@@ -49682,7 +51541,9 @@ mod tests {
                                       128, 144, 160, 176, 192, 208, 224, 240);
         let src = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
         // A multiplier of 4 is word-addressing
-        _mm512_i32scatter_epi32::<4>(arr.as_mut_ptr(), index, src);
+        unsafe {
+            _mm512_i32scatter_epi32::<4>(arr.as_mut_ptr(), index, src);
+        }
         let mut expected = [0i32; 256];
         for i in 0..16 {
             expected[i * 16] = (i + 1) as i32;
@@ -49691,7 +51552,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_i32scatter_epi32() {
+    fn test_mm512_mask_i32scatter_epi32() {
         let mut arr = [0i32; 256];
         let mask = 0b10101010_10101010;
         #[rustfmt::skip]
@@ -49699,7 +51560,9 @@ mod tests {
                                       128, 144, 160, 176, 192, 208, 224, 240);
         let src = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
         // A multiplier of 4 is word-addressing
-        _mm512_mask_i32scatter_epi32::<4>(arr.as_mut_ptr(), mask, index, src);
+        unsafe {
+            _mm512_mask_i32scatter_epi32::<4>(arr.as_mut_ptr(), mask, index, src);
+        }
         let mut expected = [0i32; 256];
         for i in 0..8 {
             expected[i * 32 + 16] = 2 * (i + 1) as i32;
@@ -49708,7 +51571,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cmplt_ps_mask() {
+    fn test_mm512_cmplt_ps_mask() {
         #[rustfmt::skip]
         let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
                               0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
@@ -49718,7 +51581,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cmplt_ps_mask() {
+    fn test_mm512_mask_cmplt_ps_mask() {
         #[rustfmt::skip]
         let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
                               0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
@@ -49729,7 +51592,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cmpnlt_ps_mask() {
+    fn test_mm512_cmpnlt_ps_mask() {
         #[rustfmt::skip]
         let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
                               0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
@@ -49738,7 +51601,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cmpnlt_ps_mask() {
+    fn test_mm512_mask_cmpnlt_ps_mask() {
         #[rustfmt::skip]
         let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
                               0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
@@ -49748,7 +51611,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cmpnle_ps_mask() {
+    fn test_mm512_cmpnle_ps_mask() {
         #[rustfmt::skip]
         let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
                               0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
@@ -49758,7 +51621,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cmpnle_ps_mask() {
+    fn test_mm512_mask_cmpnle_ps_mask() {
         #[rustfmt::skip]
         let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
                               0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
@@ -49769,7 +51632,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cmple_ps_mask() {
+    fn test_mm512_cmple_ps_mask() {
         #[rustfmt::skip]
         let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
                               0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
@@ -49778,7 +51641,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cmple_ps_mask() {
+    fn test_mm512_mask_cmple_ps_mask() {
         #[rustfmt::skip]
         let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
                               0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
@@ -49788,7 +51651,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cmpeq_ps_mask() {
+    fn test_mm512_cmpeq_ps_mask() {
         #[rustfmt::skip]
         let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.,
                               0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.);
@@ -49800,7 +51663,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cmpeq_ps_mask() {
+    fn test_mm512_mask_cmpeq_ps_mask() {
         #[rustfmt::skip]
         let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.,
                               0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.);
@@ -49813,7 +51676,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cmpneq_ps_mask() {
+    fn test_mm512_cmpneq_ps_mask() {
         #[rustfmt::skip]
         let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.,
                               0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.);
@@ -49825,7 +51688,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cmpneq_ps_mask() {
+    fn test_mm512_mask_cmpneq_ps_mask() {
         #[rustfmt::skip]
         let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.,
                               0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.);
@@ -49838,7 +51701,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cmp_ps_mask() {
+    fn test_mm512_cmp_ps_mask() {
         #[rustfmt::skip]
         let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.,
                               0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
@@ -49848,7 +51711,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cmp_ps_mask() {
+    fn test_mm512_mask_cmp_ps_mask() {
         #[rustfmt::skip]
         let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.,
                               0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
@@ -49859,7 +51722,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_cmp_ps_mask() {
+    fn test_mm256_cmp_ps_mask() {
         let a = _mm256_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
         let b = _mm256_set1_ps(-1.);
         let m = _mm256_cmp_ps_mask::<_CMP_LT_OQ>(a, b);
@@ -49867,7 +51730,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_cmp_ps_mask() {
+    fn test_mm256_mask_cmp_ps_mask() {
         let a = _mm256_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
         let b = _mm256_set1_ps(-1.);
         let mask = 0b01100110;
@@ -49876,7 +51739,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_cmp_ps_mask() {
+    fn test_mm_cmp_ps_mask() {
         let a = _mm_set_ps(0., 1., -1., 13.);
         let b = _mm_set1_ps(1.);
         let m = _mm_cmp_ps_mask::<_CMP_LT_OQ>(a, b);
@@ -49884,7 +51747,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_cmp_ps_mask() {
+    fn test_mm_mask_cmp_ps_mask() {
         let a = _mm_set_ps(0., 1., -1., 13.);
         let b = _mm_set1_ps(1.);
         let mask = 0b11111111;
@@ -49893,7 +51756,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cmp_round_ps_mask() {
+    fn test_mm512_cmp_round_ps_mask() {
         #[rustfmt::skip]
         let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.,
                               0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
@@ -49903,7 +51766,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cmp_round_ps_mask() {
+    fn test_mm512_mask_cmp_round_ps_mask() {
         #[rustfmt::skip]
         let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.,
                               0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
@@ -49914,7 +51777,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cmpord_ps_mask() {
+    fn test_mm512_cmpord_ps_mask() {
         #[rustfmt::skip]
         let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0.,
                               f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.);
@@ -49926,7 +51789,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cmpord_ps_mask() {
+    fn test_mm512_mask_cmpord_ps_mask() {
         #[rustfmt::skip]
         let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0.,
                               f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.);
@@ -49939,7 +51802,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cmpunord_ps_mask() {
+    fn test_mm512_cmpunord_ps_mask() {
         #[rustfmt::skip]
         let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0.,
                               f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.);
@@ -49952,7 +51815,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cmpunord_ps_mask() {
+    fn test_mm512_mask_cmpunord_ps_mask() {
         #[rustfmt::skip]
         let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0.,
                               f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.);
@@ -49965,7 +51828,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_cmp_ss_mask() {
+    fn test_mm_cmp_ss_mask() {
         let a = _mm_setr_ps(2., 1., 1., 1.);
         let b = _mm_setr_ps(1., 2., 2., 2.);
         let m = _mm_cmp_ss_mask::<_CMP_GE_OS>(a, b);
@@ -49973,7 +51836,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask_cmp_ss_mask() {
+    fn test_mm_mask_cmp_ss_mask() {
         let a = _mm_setr_ps(2., 1., 1., 1.);
         let b = _mm_setr_ps(1., 2., 2., 2.);
         let m = _mm_mask_cmp_ss_mask::<_CMP_GE_OS>(0b10, a, b);
@@ -49983,7 +51846,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_cmp_round_ss_mask() {
+    fn test_mm_cmp_round_ss_mask() {
         let a = _mm_setr_ps(2., 1., 1., 1.);
         let b = _mm_setr_ps(1., 2., 2., 2.);
         let m = _mm_cmp_round_ss_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(a, b);
@@ -49991,7 +51854,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask_cmp_round_ss_mask() {
+    fn test_mm_mask_cmp_round_ss_mask() {
         let a = _mm_setr_ps(2., 1., 1., 1.);
         let b = _mm_setr_ps(1., 2., 2., 2.);
         let m = _mm_mask_cmp_round_ss_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(0b10, a, b);
@@ -50001,7 +51864,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_cmp_sd_mask() {
+    fn test_mm_cmp_sd_mask() {
         let a = _mm_setr_pd(2., 1.);
         let b = _mm_setr_pd(1., 2.);
         let m = _mm_cmp_sd_mask::<_CMP_GE_OS>(a, b);
@@ -50009,7 +51872,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask_cmp_sd_mask() {
+    fn test_mm_mask_cmp_sd_mask() {
         let a = _mm_setr_pd(2., 1.);
         let b = _mm_setr_pd(1., 2.);
         let m = _mm_mask_cmp_sd_mask::<_CMP_GE_OS>(0b10, a, b);
@@ -50019,7 +51882,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_cmp_round_sd_mask() {
+    fn test_mm_cmp_round_sd_mask() {
         let a = _mm_setr_pd(2., 1.);
         let b = _mm_setr_pd(1., 2.);
         let m = _mm_cmp_round_sd_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(a, b);
@@ -50027,7 +51890,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask_cmp_round_sd_mask() {
+    fn test_mm_mask_cmp_round_sd_mask() {
         let a = _mm_setr_pd(2., 1.);
         let b = _mm_setr_pd(1., 2.);
         let m = _mm_mask_cmp_round_sd_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(0b10, a, b);
@@ -50037,7 +51900,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cmplt_epu32_mask() {
+    const fn test_mm512_cmplt_epu32_mask() {
         #[rustfmt::skip]
         let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
                                  0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
@@ -50047,7 +51910,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cmplt_epu32_mask() {
+    const fn test_mm512_mask_cmplt_epu32_mask() {
         #[rustfmt::skip]
         let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
                                  0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
@@ -50058,7 +51921,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_cmplt_epu32_mask() {
+    const fn test_mm256_cmplt_epu32_mask() {
         let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 101, 100, 99);
         let b = _mm256_set1_epi32(1);
         let r = _mm256_cmplt_epu32_mask(a, b);
@@ -50066,7 +51929,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_cmplt_epu32_mask() {
+    const fn test_mm256_mask_cmplt_epu32_mask() {
         let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 101, 100, 99);
         let b = _mm256_set1_epi32(1);
         let mask = 0b11111111;
@@ -50075,7 +51938,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_cmplt_epu32_mask() {
+    const fn test_mm_cmplt_epu32_mask() {
         let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
         let b = _mm_set1_epi32(1);
         let r = _mm_cmplt_epu32_mask(a, b);
@@ -50083,7 +51946,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_cmplt_epu32_mask() {
+    const fn test_mm_mask_cmplt_epu32_mask() {
         let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
         let b = _mm_set1_epi32(1);
         let mask = 0b11111111;
@@ -50092,7 +51955,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cmpgt_epu32_mask() {
+    const fn test_mm512_cmpgt_epu32_mask() {
         #[rustfmt::skip]
         let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
                                  0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
@@ -50102,7 +51965,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cmpgt_epu32_mask() {
+    const fn test_mm512_mask_cmpgt_epu32_mask() {
         #[rustfmt::skip]
         let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
                                  0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
@@ -50113,7 +51976,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_cmpgt_epu32_mask() {
+    const fn test_mm256_cmpgt_epu32_mask() {
         let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 99, 100, 101);
         let b = _mm256_set1_epi32(1);
         let r = _mm256_cmpgt_epu32_mask(a, b);
@@ -50121,7 +51984,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_cmpgt_epu32_mask() {
+    const fn test_mm256_mask_cmpgt_epu32_mask() {
         let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 99, 100, 101);
         let b = _mm256_set1_epi32(1);
         let mask = 0b11111111;
@@ -50130,7 +51993,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_cmpgt_epu32_mask() {
+    const fn test_mm_cmpgt_epu32_mask() {
         let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
         let b = _mm_set1_epi32(1);
         let r = _mm_cmpgt_epu32_mask(a, b);
@@ -50138,7 +52001,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_cmpgt_epu32_mask() {
+    const fn test_mm_mask_cmpgt_epu32_mask() {
         let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
         let b = _mm_set1_epi32(1);
         let mask = 0b11111111;
@@ -50147,7 +52010,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cmple_epu32_mask() {
+    const fn test_mm512_cmple_epu32_mask() {
         #[rustfmt::skip]
         let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
                                  0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
@@ -50159,7 +52022,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cmple_epu32_mask() {
+    const fn test_mm512_mask_cmple_epu32_mask() {
         #[rustfmt::skip]
         let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
                                  0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
@@ -50172,7 +52035,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_cmple_epu32_mask() {
+    const fn test_mm256_cmple_epu32_mask() {
         let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 200, 100, 101);
         let b = _mm256_set1_epi32(1);
         let r = _mm256_cmple_epu32_mask(a, b);
@@ -50180,7 +52043,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_cmple_epu32_mask() {
+    const fn test_mm256_mask_cmple_epu32_mask() {
         let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 200, 100, 101);
         let b = _mm256_set1_epi32(1);
         let mask = 0b11111111;
@@ -50189,7 +52052,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_cmple_epu32_mask() {
+    const fn test_mm_cmple_epu32_mask() {
         let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
         let b = _mm_set1_epi32(1);
         let r = _mm_cmple_epu32_mask(a, b);
@@ -50197,7 +52060,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_cmple_epu32_mask() {
+    const fn test_mm_mask_cmple_epu32_mask() {
         let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
         let b = _mm_set1_epi32(1);
         let mask = 0b11111111;
@@ -50206,7 +52069,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cmpge_epu32_mask() {
+    const fn test_mm512_cmpge_epu32_mask() {
         #[rustfmt::skip]
         let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
                                  0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
@@ -50218,7 +52081,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cmpge_epu32_mask() {
+    const fn test_mm512_mask_cmpge_epu32_mask() {
         #[rustfmt::skip]
         let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
                                  0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
@@ -50228,7 +52091,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_cmpge_epu32_mask() {
+    const fn test_mm256_cmpge_epu32_mask() {
         let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 300, 100, 200);
         let b = _mm256_set1_epi32(1);
         let r = _mm256_cmpge_epu32_mask(a, b);
@@ -50236,7 +52099,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_cmpge_epu32_mask() {
+    const fn test_mm256_mask_cmpge_epu32_mask() {
         let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 300, 100, 200);
         let b = _mm256_set1_epi32(1);
         let mask = 0b11111111;
@@ -50245,7 +52108,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_cmpge_epu32_mask() {
+    const fn test_mm_cmpge_epu32_mask() {
         let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
         let b = _mm_set1_epi32(1);
         let r = _mm_cmpge_epu32_mask(a, b);
@@ -50253,7 +52116,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_cmpge_epu32_mask() {
+    const fn test_mm_mask_cmpge_epu32_mask() {
         let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
         let b = _mm_set1_epi32(1);
         let mask = 0b11111111;
@@ -50262,7 +52125,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cmpeq_epu32_mask() {
+    const fn test_mm512_cmpeq_epu32_mask() {
         #[rustfmt::skip]
         let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
                                  0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
@@ -50274,7 +52137,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cmpeq_epu32_mask() {
+    const fn test_mm512_mask_cmpeq_epu32_mask() {
         #[rustfmt::skip]
         let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
                                  0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
@@ -50287,7 +52150,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_cmpeq_epu32_mask() {
+    const fn test_mm256_cmpeq_epu32_mask() {
         let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
         let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
         let m = _mm256_cmpeq_epu32_mask(b, a);
@@ -50295,7 +52158,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_cmpeq_epu32_mask() {
+    const fn test_mm256_mask_cmpeq_epu32_mask() {
         let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
         let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
         let mask = 0b01111010;
@@ -50304,7 +52167,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_cmpeq_epu32_mask() {
+    const fn test_mm_cmpeq_epu32_mask() {
         let a = _mm_set_epi32(0, 1, -1, u32::MAX as i32);
         let b = _mm_set_epi32(0, 1, 13, 42);
         let m = _mm_cmpeq_epu32_mask(b, a);
@@ -50312,7 +52175,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_cmpeq_epu32_mask() {
+    const fn test_mm_mask_cmpeq_epu32_mask() {
         let a = _mm_set_epi32(0, 1, -1, u32::MAX as i32);
         let b = _mm_set_epi32(0, 1, 13, 42);
         let mask = 0b11111111;
@@ -50321,7 +52184,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cmpneq_epu32_mask() {
+    const fn test_mm512_cmpneq_epu32_mask() {
         #[rustfmt::skip]
         let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
                                  0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
@@ -50333,7 +52196,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cmpneq_epu32_mask() {
+    const fn test_mm512_mask_cmpneq_epu32_mask() {
         #[rustfmt::skip]
         let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, -100, 100,
                                  0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, -100, 100);
@@ -50346,7 +52209,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_cmpneq_epu32_mask() {
+    const fn test_mm256_cmpneq_epu32_mask() {
         let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, -100, 100);
         let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, -100, 100);
         let r = _mm256_cmpneq_epu32_mask(b, a);
@@ -50354,7 +52217,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_cmpneq_epu32_mask() {
+    const fn test_mm256_mask_cmpneq_epu32_mask() {
         let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, -100, 100);
         let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, -100, 100);
         let mask = 0b11111111;
@@ -50363,7 +52226,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_cmpneq_epu32_mask() {
+    const fn test_mm_cmpneq_epu32_mask() {
         let a = _mm_set_epi32(0, 1, -1, u32::MAX as i32);
         let b = _mm_set_epi32(0, 1, 13, 42);
         let r = _mm_cmpneq_epu32_mask(b, a);
@@ -50371,7 +52234,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_cmpneq_epu32_mask() {
+    const fn test_mm_mask_cmpneq_epu32_mask() {
         let a = _mm_set_epi32(0, 1, -1, u32::MAX as i32);
         let b = _mm_set_epi32(0, 1, 13, 42);
         let mask = 0b11111111;
@@ -50380,7 +52243,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cmp_epu32_mask() {
+    const fn test_mm512_cmp_epu32_mask() {
         #[rustfmt::skip]
         let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
                                  0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
@@ -50390,7 +52253,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cmp_epu32_mask() {
+    const fn test_mm512_mask_cmp_epu32_mask() {
         #[rustfmt::skip]
         let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
                                  0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
@@ -50401,7 +52264,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_cmp_epu32_mask() {
+    const fn test_mm256_cmp_epu32_mask() {
         let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
         let b = _mm256_set1_epi32(-1);
         let m = _mm256_cmp_epu32_mask::<_MM_CMPINT_LT>(a, b);
@@ -50409,7 +52272,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_cmp_epu32_mask() {
+    const fn test_mm256_mask_cmp_epu32_mask() {
         let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
         let b = _mm256_set1_epi32(-1);
         let mask = 0b11111111;
@@ -50418,7 +52281,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_cmp_epu32_mask() {
+    const fn test_mm_cmp_epu32_mask() {
         let a = _mm_set_epi32(0, 1, -1, i32::MAX);
         let b = _mm_set1_epi32(1);
         let m = _mm_cmp_epu32_mask::<_MM_CMPINT_LT>(a, b);
@@ -50426,7 +52289,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_cmp_epu32_mask() {
+    const fn test_mm_mask_cmp_epu32_mask() {
         let a = _mm_set_epi32(0, 1, -1, i32::MAX);
         let b = _mm_set1_epi32(1);
         let mask = 0b11111111;
@@ -50435,7 +52298,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cmplt_epi32_mask() {
+    const fn test_mm512_cmplt_epi32_mask() {
         #[rustfmt::skip]
         let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
                                  0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
@@ -50445,7 +52308,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cmplt_epi32_mask() {
+    const fn test_mm512_mask_cmplt_epi32_mask() {
         #[rustfmt::skip]
         let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
                                  0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
@@ -50456,7 +52319,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_cmplt_epi32_mask() {
+    const fn test_mm256_cmplt_epi32_mask() {
         let a = _mm256_set_epi32(0, 1, -1, 101, i32::MAX, i32::MIN, 100, -100);
         let b = _mm256_set1_epi32(-1);
         let r = _mm256_cmplt_epi32_mask(a, b);
@@ -50464,7 +52327,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_cmplt_epi32_mask() {
+    const fn test_mm256_mask_cmplt_epi32_mask() {
         let a = _mm256_set_epi32(0, 1, -1, 101, i32::MAX, i32::MIN, 100, -100);
         let b = _mm256_set1_epi32(-1);
         let mask = 0b11111111;
@@ -50473,7 +52336,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_cmplt_epi32_mask() {
+    const fn test_mm_cmplt_epi32_mask() {
         let a = _mm_set_epi32(i32::MAX, i32::MIN, 100, -100);
         let b = _mm_set1_epi32(-1);
         let r = _mm_cmplt_epi32_mask(a, b);
@@ -50481,7 +52344,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_cmplt_epi32_mask() {
+    const fn test_mm_mask_cmplt_epi32_mask() {
         let a = _mm_set_epi32(i32::MAX, i32::MIN, 100, -100);
         let b = _mm_set1_epi32(-1);
         let mask = 0b11111111;
@@ -50490,7 +52353,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cmpgt_epi32_mask() {
+    const fn test_mm512_cmpgt_epi32_mask() {
         #[rustfmt::skip]
         let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
                                  0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
@@ -50500,7 +52363,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cmpgt_epi32_mask() {
+    const fn test_mm512_mask_cmpgt_epi32_mask() {
         #[rustfmt::skip]
         let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
                                  0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
@@ -50511,7 +52374,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_cmpgt_epi32_mask() {
+    const fn test_mm256_cmpgt_epi32_mask() {
         let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
         let b = _mm256_set1_epi32(-1);
         let r = _mm256_cmpgt_epi32_mask(a, b);
@@ -50519,7 +52382,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_cmpgt_epi32_mask() {
+    const fn test_mm256_mask_cmpgt_epi32_mask() {
         let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
         let b = _mm256_set1_epi32(-1);
         let mask = 0b11111111;
@@ -50528,7 +52391,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_cmpgt_epi32_mask() {
+    const fn test_mm_cmpgt_epi32_mask() {
         let a = _mm_set_epi32(0, 1, -1, 13);
         let b = _mm_set1_epi32(-1);
         let r = _mm_cmpgt_epi32_mask(a, b);
@@ -50536,7 +52399,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_cmpgt_epi32_mask() {
+    const fn test_mm_mask_cmpgt_epi32_mask() {
         let a = _mm_set_epi32(0, 1, -1, 13);
         let b = _mm_set1_epi32(-1);
         let mask = 0b11111111;
@@ -50545,7 +52408,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cmple_epi32_mask() {
+    const fn test_mm512_cmple_epi32_mask() {
         #[rustfmt::skip]
         let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
                                  0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
@@ -50557,7 +52420,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cmple_epi32_mask() {
+    const fn test_mm512_mask_cmple_epi32_mask() {
         #[rustfmt::skip]
         let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
                                  0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
@@ -50567,7 +52430,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_cmple_epi32_mask() {
+    const fn test_mm256_cmple_epi32_mask() {
         let a = _mm256_set_epi32(0, 1, -1, 200, i32::MAX, i32::MIN, 100, -100);
         let b = _mm256_set1_epi32(-1);
         let r = _mm256_cmple_epi32_mask(a, b);
@@ -50575,7 +52438,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_cmple_epi32_mask() {
+    const fn test_mm256_mask_cmple_epi32_mask() {
         let a = _mm256_set_epi32(0, 1, -1, 200, i32::MAX, i32::MIN, 100, -100);
         let b = _mm256_set1_epi32(-1);
         let mask = 0b11111111;
@@ -50584,7 +52447,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_cmple_epi32_mask() {
+    const fn test_mm_cmple_epi32_mask() {
         let a = _mm_set_epi32(0, 1, -1, 200);
         let b = _mm_set1_epi32(-1);
         let r = _mm_cmple_epi32_mask(a, b);
@@ -50592,7 +52455,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_cmple_epi32_mask() {
+    const fn test_mm_mask_cmple_epi32_mask() {
         let a = _mm_set_epi32(0, 1, -1, 200);
         let b = _mm_set1_epi32(-1);
         let mask = 0b11111111;
@@ -50601,7 +52464,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cmpge_epi32_mask() {
+    const fn test_mm512_cmpge_epi32_mask() {
         #[rustfmt::skip]
         let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
                                  0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
@@ -50613,7 +52476,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cmpge_epi32_mask() {
+    const fn test_mm512_mask_cmpge_epi32_mask() {
         #[rustfmt::skip]
         let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
                                  0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
@@ -50626,7 +52489,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_cmpge_epi32_mask() {
+    const fn test_mm256_cmpge_epi32_mask() {
         let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
         let b = _mm256_set1_epi32(-1);
         let r = _mm256_cmpge_epi32_mask(a, b);
@@ -50634,7 +52497,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_cmpge_epi32_mask() {
+    const fn test_mm256_mask_cmpge_epi32_mask() {
         let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
         let b = _mm256_set1_epi32(-1);
         let mask = 0b11111111;
@@ -50643,7 +52506,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_cmpge_epi32_mask() {
+    const fn test_mm_cmpge_epi32_mask() {
         let a = _mm_set_epi32(0, 1, -1, u32::MAX as i32);
         let b = _mm_set1_epi32(-1);
         let r = _mm_cmpge_epi32_mask(a, b);
@@ -50651,7 +52514,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_cmpge_epi32_mask() {
+    const fn test_mm_mask_cmpge_epi32_mask() {
         let a = _mm_set_epi32(0, 1, -1, u32::MAX as i32);
         let b = _mm_set1_epi32(-1);
         let mask = 0b11111111;
@@ -50660,7 +52523,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cmpeq_epi32_mask() {
+    const fn test_mm512_cmpeq_epi32_mask() {
         #[rustfmt::skip]
         let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
                                  0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
@@ -50672,7 +52535,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cmpeq_epi32_mask() {
+    const fn test_mm512_mask_cmpeq_epi32_mask() {
         #[rustfmt::skip]
         let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
                                  0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
@@ -50685,7 +52548,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_cmpeq_epi32_mask() {
+    const fn test_mm256_cmpeq_epi32_mask() {
         let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
         let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
         let m = _mm256_cmpeq_epi32_mask(b, a);
@@ -50693,7 +52556,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_cmpeq_epi32_mask() {
+    const fn test_mm256_mask_cmpeq_epi32_mask() {
         let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
         let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
         let mask = 0b01111010;
@@ -50702,7 +52565,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_cmpeq_epi32_mask() {
+    const fn test_mm_cmpeq_epi32_mask() {
         let a = _mm_set_epi32(0, 1, -1, 13);
         let b = _mm_set_epi32(0, 1, 13, 42);
         let m = _mm_cmpeq_epi32_mask(b, a);
@@ -50710,7 +52573,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_cmpeq_epi32_mask() {
+    const fn test_mm_mask_cmpeq_epi32_mask() {
         let a = _mm_set_epi32(0, 1, -1, 13);
         let b = _mm_set_epi32(0, 1, 13, 42);
         let mask = 0b11111111;
@@ -50719,7 +52582,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cmpneq_epi32_mask() {
+    const fn test_mm512_cmpneq_epi32_mask() {
         #[rustfmt::skip]
         let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
                                  0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
@@ -50731,7 +52594,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cmpneq_epi32_mask() {
+    const fn test_mm512_mask_cmpneq_epi32_mask() {
         #[rustfmt::skip]
         let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, -100, 100,
                                  0, 1, -1, 13, i32::MAX, i32::MIN, -100, 100);
@@ -50744,7 +52607,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_cmpneq_epi32_mask() {
+    const fn test_mm256_cmpneq_epi32_mask() {
         let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
         let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
         let m = _mm256_cmpneq_epi32_mask(b, a);
@@ -50752,7 +52615,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_cmpneq_epi32_mask() {
+    const fn test_mm256_mask_cmpneq_epi32_mask() {
         let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, -100, 100);
         let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
         let mask = 0b11111111;
@@ -50761,7 +52624,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_cmpneq_epi32_mask() {
+    const fn test_mm_cmpneq_epi32_mask() {
         let a = _mm_set_epi32(0, 1, -1, 13);
         let b = _mm_set_epi32(0, 1, 13, 42);
         let r = _mm_cmpneq_epi32_mask(b, a);
@@ -50769,7 +52632,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_cmpneq_epi32_mask() {
+    const fn test_mm_mask_cmpneq_epi32_mask() {
         let a = _mm_set_epi32(0, 1, -1, 13);
         let b = _mm_set_epi32(0, 1, 13, 42);
         let mask = 0b11111111;
@@ -50778,7 +52641,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cmp_epi32_mask() {
+    const fn test_mm512_cmp_epi32_mask() {
         #[rustfmt::skip]
         let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
                                  0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
@@ -50788,7 +52651,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cmp_epi32_mask() {
+    const fn test_mm512_mask_cmp_epi32_mask() {
         #[rustfmt::skip]
         let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
                                  0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
@@ -50799,7 +52662,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_cmp_epi32_mask() {
+    const fn test_mm256_cmp_epi32_mask() {
         let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
         let b = _mm256_set1_epi32(-1);
         let m = _mm256_cmp_epi32_mask::<_MM_CMPINT_LT>(a, b);
@@ -50807,7 +52670,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_cmp_epi32_mask() {
+    const fn test_mm256_mask_cmp_epi32_mask() {
         let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
         let b = _mm256_set1_epi32(-1);
         let mask = 0b01100110;
@@ -50816,7 +52679,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_cmp_epi32_mask() {
+    const fn test_mm_cmp_epi32_mask() {
         let a = _mm_set_epi32(0, 1, -1, 13);
         let b = _mm_set1_epi32(1);
         let m = _mm_cmp_epi32_mask::<_MM_CMPINT_LT>(a, b);
@@ -50824,7 +52687,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_cmp_epi32_mask() {
+    const fn test_mm_mask_cmp_epi32_mask() {
         let a = _mm_set_epi32(0, 1, -1, 13);
         let b = _mm_set1_epi32(1);
         let mask = 0b11111111;
@@ -50833,7 +52696,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_set_epi8() {
+    const fn test_mm512_set_epi8() {
         let r = _mm512_set1_epi8(2);
         assert_eq_m512i(
             r,
@@ -50846,7 +52709,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_set_epi16() {
+    const fn test_mm512_set_epi16() {
         let r = _mm512_set1_epi16(2);
         assert_eq_m512i(
             r,
@@ -50858,7 +52721,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_set_epi32() {
+    const fn test_mm512_set_epi32() {
         let r = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         assert_eq_m512i(
             r,
@@ -50867,7 +52730,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_setr_epi32() {
+    const fn test_mm512_setr_epi32() {
         let r = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         assert_eq_m512i(
             r,
@@ -50876,7 +52739,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_set1_epi8() {
+    const fn test_mm512_set1_epi8() {
         let r = _mm512_set_epi8(
             2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
             2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
@@ -50886,7 +52749,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_set1_epi16() {
+    const fn test_mm512_set1_epi16() {
         let r = _mm512_set_epi16(
             2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
             2, 2, 2,
@@ -50895,23 +52758,23 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_set1_epi32() {
+    const fn test_mm512_set1_epi32() {
         let r = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
         assert_eq_m512i(r, _mm512_set1_epi32(2));
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_setzero_si512() {
+    const fn test_mm512_setzero_si512() {
         assert_eq_m512i(_mm512_set1_epi32(0), _mm512_setzero_si512());
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_setzero_epi32() {
+    const fn test_mm512_setzero_epi32() {
         assert_eq_m512i(_mm512_set1_epi32(0), _mm512_setzero_epi32());
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_set_ps() {
+    const fn test_mm512_set_ps() {
         let r = _mm512_setr_ps(
             0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
         );
@@ -50924,7 +52787,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_setr_ps() {
+    const fn test_mm512_setr_ps() {
         let r = _mm512_set_ps(
             0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
         );
@@ -50937,7 +52800,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_set1_ps() {
+    const fn test_mm512_set1_ps() {
         #[rustfmt::skip]
         let expected = _mm512_set_ps(2., 2., 2., 2., 2., 2., 2., 2.,
                                      2., 2., 2., 2., 2., 2., 2., 2.);
@@ -50945,13 +52808,13 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_set4_epi32() {
+    const fn test_mm512_set4_epi32() {
         let r = _mm512_set_epi32(4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1);
         assert_eq_m512i(r, _mm512_set4_epi32(4, 3, 2, 1));
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_set4_ps() {
+    const fn test_mm512_set4_ps() {
         let r = _mm512_set_ps(
             4., 3., 2., 1., 4., 3., 2., 1., 4., 3., 2., 1., 4., 3., 2., 1.,
         );
@@ -50959,13 +52822,13 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_setr4_epi32() {
+    const fn test_mm512_setr4_epi32() {
         let r = _mm512_set_epi32(4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1);
         assert_eq_m512i(r, _mm512_setr4_epi32(1, 2, 3, 4));
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_setr4_ps() {
+    const fn test_mm512_setr4_ps() {
         let r = _mm512_set_ps(
             4., 3., 2., 1., 4., 3., 2., 1., 4., 3., 2., 1., 4., 3., 2., 1.,
         );
@@ -50973,39 +52836,41 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_setzero_ps() {
+    const fn test_mm512_setzero_ps() {
         assert_eq_m512(_mm512_setzero_ps(), _mm512_set1_ps(0.));
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_setzero() {
+    const fn test_mm512_setzero() {
         assert_eq_m512(_mm512_setzero(), _mm512_set1_ps(0.));
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_loadu_pd() {
+    const fn test_mm512_loadu_pd() {
         let a = &[4., 3., 2., 5., 8., 9., 64., 50.];
         let p = a.as_ptr();
-        let r = _mm512_loadu_pd(black_box(p));
+        let r = unsafe { _mm512_loadu_pd(black_box(p)) };
         let e = _mm512_setr_pd(4., 3., 2., 5., 8., 9., 64., 50.);
         assert_eq_m512d(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_storeu_pd() {
+    const fn test_mm512_storeu_pd() {
         let a = _mm512_set1_pd(9.);
         let mut r = _mm512_undefined_pd();
-        _mm512_storeu_pd(&mut r as *mut _ as *mut f64, a);
+        unsafe {
+            _mm512_storeu_pd(&mut r as *mut _ as *mut f64, a);
+        }
         assert_eq_m512d(r, a);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_loadu_ps() {
+    const fn test_mm512_loadu_ps() {
         let a = &[
             4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
         ];
         let p = a.as_ptr();
-        let r = _mm512_loadu_ps(black_box(p));
+        let r = unsafe { _mm512_loadu_ps(black_box(p)) };
         let e = _mm512_setr_ps(
             4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
         );
@@ -51013,36 +52878,38 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_storeu_ps() {
+    const fn test_mm512_storeu_ps() {
         let a = _mm512_set1_ps(9.);
         let mut r = _mm512_undefined_ps();
-        _mm512_storeu_ps(&mut r as *mut _ as *mut f32, a);
+        unsafe {
+            _mm512_storeu_ps(&mut r as *mut _ as *mut f32, a);
+        }
         assert_eq_m512(r, a);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_loadu_epi32() {
+    const fn test_mm512_mask_loadu_epi32() {
         let src = _mm512_set1_epi32(42);
         let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
         let p = a.as_ptr();
         let m = 0b11101000_11001010;
-        let r = _mm512_mask_loadu_epi32(src, m, black_box(p));
+        let r = unsafe { _mm512_mask_loadu_epi32(src, m, black_box(p)) };
         let e = _mm512_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16);
         assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_loadu_epi32() {
+    const fn test_mm512_maskz_loadu_epi32() {
         let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
         let p = a.as_ptr();
         let m = 0b11101000_11001010;
-        let r = _mm512_maskz_loadu_epi32(m, black_box(p));
+        let r = unsafe { _mm512_maskz_loadu_epi32(m, black_box(p)) };
         let e = _mm512_setr_epi32(0, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16);
         assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_load_epi32() {
+    const fn test_mm512_mask_load_epi32() {
         #[repr(align(64))]
         struct Align {
             data: [i32; 16], // 64 bytes
@@ -51053,13 +52920,13 @@ mod tests {
         };
         let p = a.data.as_ptr();
         let m = 0b11101000_11001010;
-        let r = _mm512_mask_load_epi32(src, m, black_box(p));
+        let r = unsafe { _mm512_mask_load_epi32(src, m, black_box(p)) };
         let e = _mm512_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16);
         assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_load_epi32() {
+    const fn test_mm512_maskz_load_epi32() {
         #[repr(align(64))]
         struct Align {
             data: [i32; 16], // 64 bytes
@@ -51069,23 +52936,25 @@ mod tests {
         };
         let p = a.data.as_ptr();
         let m = 0b11101000_11001010;
-        let r = _mm512_maskz_load_epi32(m, black_box(p));
+        let r = unsafe { _mm512_maskz_load_epi32(m, black_box(p)) };
         let e = _mm512_setr_epi32(0, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16);
         assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_storeu_epi32() {
+    const fn test_mm512_mask_storeu_epi32() {
         let mut r = [42_i32; 16];
         let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
         let m = 0b11101000_11001010;
-        _mm512_mask_storeu_epi32(r.as_mut_ptr(), m, a);
+        unsafe {
+            _mm512_mask_storeu_epi32(r.as_mut_ptr(), m, a);
+        }
         let e = _mm512_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16);
-        assert_eq_m512i(_mm512_loadu_epi32(r.as_ptr()), e);
+        assert_eq_m512i(unsafe { _mm512_loadu_epi32(r.as_ptr()) }, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_store_epi32() {
+    const fn test_mm512_mask_store_epi32() {
         #[repr(align(64))]
         struct Align {
             data: [i32; 16],
@@ -51093,34 +52962,36 @@ mod tests {
         let mut r = Align { data: [42; 16] };
         let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
         let m = 0b11101000_11001010;
-        _mm512_mask_store_epi32(r.data.as_mut_ptr(), m, a);
+        unsafe {
+            _mm512_mask_store_epi32(r.data.as_mut_ptr(), m, a);
+        }
         let e = _mm512_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16);
-        assert_eq_m512i(_mm512_load_epi32(r.data.as_ptr()), e);
+        assert_eq_m512i(unsafe { _mm512_load_epi32(r.data.as_ptr()) }, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_loadu_epi64() {
+    const fn test_mm512_mask_loadu_epi64() {
         let src = _mm512_set1_epi64(42);
         let a = &[1_i64, 2, 3, 4, 5, 6, 7, 8];
         let p = a.as_ptr();
         let m = 0b11001010;
-        let r = _mm512_mask_loadu_epi64(src, m, black_box(p));
+        let r = unsafe { _mm512_mask_loadu_epi64(src, m, black_box(p)) };
         let e = _mm512_setr_epi64(42, 2, 42, 4, 42, 42, 7, 8);
         assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_loadu_epi64() {
+    const fn test_mm512_maskz_loadu_epi64() {
         let a = &[1_i64, 2, 3, 4, 5, 6, 7, 8];
         let p = a.as_ptr();
         let m = 0b11001010;
-        let r = _mm512_maskz_loadu_epi64(m, black_box(p));
+        let r = unsafe { _mm512_maskz_loadu_epi64(m, black_box(p)) };
         let e = _mm512_setr_epi64(0, 2, 0, 4, 0, 0, 7, 8);
         assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_load_epi64() {
+    const fn test_mm512_mask_load_epi64() {
         #[repr(align(64))]
         struct Align {
             data: [i64; 8], // 64 bytes
@@ -51131,13 +53002,13 @@ mod tests {
         };
         let p = a.data.as_ptr();
         let m = 0b11001010;
-        let r = _mm512_mask_load_epi64(src, m, black_box(p));
+        let r = unsafe { _mm512_mask_load_epi64(src, m, black_box(p)) };
         let e = _mm512_setr_epi64(42, 2, 42, 4, 42, 42, 7, 8);
         assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_load_epi64() {
+    const fn test_mm512_maskz_load_epi64() {
         #[repr(align(64))]
         struct Align {
             data: [i64; 8], // 64 bytes
@@ -51147,23 +53018,25 @@ mod tests {
         };
         let p = a.data.as_ptr();
         let m = 0b11001010;
-        let r = _mm512_maskz_load_epi64(m, black_box(p));
+        let r = unsafe { _mm512_maskz_load_epi64(m, black_box(p)) };
         let e = _mm512_setr_epi64(0, 2, 0, 4, 0, 0, 7, 8);
         assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_storeu_epi64() {
+    const fn test_mm512_mask_storeu_epi64() {
         let mut r = [42_i64; 8];
         let a = _mm512_setr_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let m = 0b11001010;
-        _mm512_mask_storeu_epi64(r.as_mut_ptr(), m, a);
+        unsafe {
+            _mm512_mask_storeu_epi64(r.as_mut_ptr(), m, a);
+        }
         let e = _mm512_setr_epi64(42, 2, 42, 4, 42, 42, 7, 8);
-        assert_eq_m512i(_mm512_loadu_epi64(r.as_ptr()), e);
+        assert_eq_m512i(unsafe { _mm512_loadu_epi64(r.as_ptr()) }, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_store_epi64() {
+    const fn test_mm512_mask_store_epi64() {
         #[repr(align(64))]
         struct Align {
             data: [i64; 8],
@@ -51172,13 +53045,15 @@ mod tests {
         let a = _mm512_setr_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let m = 0b11001010;
         let p = r.data.as_mut_ptr();
-        _mm512_mask_store_epi64(p, m, a);
+        unsafe {
+            _mm512_mask_store_epi64(p, m, a);
+        }
         let e = _mm512_setr_epi64(42, 2, 42, 4, 42, 42, 7, 8);
-        assert_eq_m512i(_mm512_load_epi64(r.data.as_ptr()), e);
+        assert_eq_m512i(unsafe { _mm512_load_epi64(r.data.as_ptr()) }, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_loadu_ps() {
+    const fn test_mm512_mask_loadu_ps() {
         let src = _mm512_set1_ps(42.0);
         let a = &[
             1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0,
@@ -51186,7 +53061,7 @@ mod tests {
         ];
         let p = a.as_ptr();
         let m = 0b11101000_11001010;
-        let r = _mm512_mask_loadu_ps(src, m, black_box(p));
+        let r = unsafe { _mm512_mask_loadu_ps(src, m, black_box(p)) };
         let e = _mm512_setr_ps(
             42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0, 42.0, 42.0, 42.0, 12.0, 42.0, 14.0, 15.0,
             16.0,
@@ -51195,14 +53070,14 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_loadu_ps() {
+    const fn test_mm512_maskz_loadu_ps() {
         let a = &[
             1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0,
             16.0,
         ];
         let p = a.as_ptr();
         let m = 0b11101000_11001010;
-        let r = _mm512_maskz_loadu_ps(m, black_box(p));
+        let r = unsafe { _mm512_maskz_loadu_ps(m, black_box(p)) };
         let e = _mm512_setr_ps(
             0.0, 2.0, 0.0, 4.0, 0.0, 0.0, 7.0, 8.0, 0.0, 0.0, 0.0, 12.0, 0.0, 14.0, 15.0, 16.0,
         );
@@ -51210,7 +53085,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_load_ps() {
+    const fn test_mm512_mask_load_ps() {
         #[repr(align(64))]
         struct Align {
             data: [f32; 16], // 64 bytes
@@ -51224,7 +53099,7 @@ mod tests {
         };
         let p = a.data.as_ptr();
         let m = 0b11101000_11001010;
-        let r = _mm512_mask_load_ps(src, m, black_box(p));
+        let r = unsafe { _mm512_mask_load_ps(src, m, black_box(p)) };
         let e = _mm512_setr_ps(
             42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0, 42.0, 42.0, 42.0, 12.0, 42.0, 14.0, 15.0,
             16.0,
@@ -51233,7 +53108,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_load_ps() {
+    const fn test_mm512_maskz_load_ps() {
         #[repr(align(64))]
         struct Align {
             data: [f32; 16], // 64 bytes
@@ -51246,7 +53121,7 @@ mod tests {
         };
         let p = a.data.as_ptr();
         let m = 0b11101000_11001010;
-        let r = _mm512_maskz_load_ps(m, black_box(p));
+        let r = unsafe { _mm512_maskz_load_ps(m, black_box(p)) };
         let e = _mm512_setr_ps(
             0.0, 2.0, 0.0, 4.0, 0.0, 0.0, 7.0, 8.0, 0.0, 0.0, 0.0, 12.0, 0.0, 14.0, 15.0, 16.0,
         );
@@ -51254,22 +53129,24 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_storeu_ps() {
+    const fn test_mm512_mask_storeu_ps() {
         let mut r = [42_f32; 16];
         let a = _mm512_setr_ps(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
         );
         let m = 0b11101000_11001010;
-        _mm512_mask_storeu_ps(r.as_mut_ptr(), m, a);
+        unsafe {
+            _mm512_mask_storeu_ps(r.as_mut_ptr(), m, a);
+        }
         let e = _mm512_setr_ps(
             42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0, 42.0, 42.0, 42.0, 12.0, 42.0, 14.0, 15.0,
             16.0,
         );
-        assert_eq_m512(_mm512_loadu_ps(r.as_ptr()), e);
+        assert_eq_m512(unsafe { _mm512_loadu_ps(r.as_ptr()) }, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_store_ps() {
+    const fn test_mm512_mask_store_ps() {
         #[repr(align(64))]
         struct Align {
             data: [f32; 16],
@@ -51279,37 +53156,39 @@ mod tests {
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
         );
         let m = 0b11101000_11001010;
-        _mm512_mask_store_ps(r.data.as_mut_ptr(), m, a);
+        unsafe {
+            _mm512_mask_store_ps(r.data.as_mut_ptr(), m, a);
+        }
         let e = _mm512_setr_ps(
             42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0, 42.0, 42.0, 42.0, 12.0, 42.0, 14.0, 15.0,
             16.0,
         );
-        assert_eq_m512(_mm512_load_ps(r.data.as_ptr()), e);
+        assert_eq_m512(unsafe { _mm512_load_ps(r.data.as_ptr()) }, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_loadu_pd() {
+    const fn test_mm512_mask_loadu_pd() {
         let src = _mm512_set1_pd(42.0);
         let a = &[1.0_f64, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
         let p = a.as_ptr();
         let m = 0b11001010;
-        let r = _mm512_mask_loadu_pd(src, m, black_box(p));
+        let r = unsafe { _mm512_mask_loadu_pd(src, m, black_box(p)) };
         let e = _mm512_setr_pd(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
         assert_eq_m512d(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_loadu_pd() {
+    const fn test_mm512_maskz_loadu_pd() {
         let a = &[1.0_f64, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
         let p = a.as_ptr();
         let m = 0b11001010;
-        let r = _mm512_maskz_loadu_pd(m, black_box(p));
+        let r = unsafe { _mm512_maskz_loadu_pd(m, black_box(p)) };
         let e = _mm512_setr_pd(0.0, 2.0, 0.0, 4.0, 0.0, 0.0, 7.0, 8.0);
         assert_eq_m512d(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_load_pd() {
+    const fn test_mm512_mask_load_pd() {
         #[repr(align(64))]
         struct Align {
             data: [f64; 8], // 64 bytes
@@ -51320,13 +53199,13 @@ mod tests {
         };
         let p = a.data.as_ptr();
         let m = 0b11001010;
-        let r = _mm512_mask_load_pd(src, m, black_box(p));
+        let r = unsafe { _mm512_mask_load_pd(src, m, black_box(p)) };
         let e = _mm512_setr_pd(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
         assert_eq_m512d(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_load_pd() {
+    const fn test_mm512_maskz_load_pd() {
         #[repr(align(64))]
         struct Align {
             data: [f64; 8], // 64 bytes
@@ -51336,23 +53215,25 @@ mod tests {
         };
         let p = a.data.as_ptr();
         let m = 0b11001010;
-        let r = _mm512_maskz_load_pd(m, black_box(p));
+        let r = unsafe { _mm512_maskz_load_pd(m, black_box(p)) };
         let e = _mm512_setr_pd(0.0, 2.0, 0.0, 4.0, 0.0, 0.0, 7.0, 8.0);
         assert_eq_m512d(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_storeu_pd() {
+    const fn test_mm512_mask_storeu_pd() {
         let mut r = [42_f64; 8];
         let a = _mm512_setr_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let m = 0b11001010;
-        _mm512_mask_storeu_pd(r.as_mut_ptr(), m, a);
+        unsafe {
+            _mm512_mask_storeu_pd(r.as_mut_ptr(), m, a);
+        }
         let e = _mm512_setr_pd(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
-        assert_eq_m512d(_mm512_loadu_pd(r.as_ptr()), e);
+        assert_eq_m512d(unsafe { _mm512_loadu_pd(r.as_ptr()) }, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_store_pd() {
+    const fn test_mm512_mask_store_pd() {
         #[repr(align(64))]
         struct Align {
             data: [f64; 8],
@@ -51360,34 +53241,36 @@ mod tests {
         let mut r = Align { data: [42.0; 8] };
         let a = _mm512_setr_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let m = 0b11001010;
-        _mm512_mask_store_pd(r.data.as_mut_ptr(), m, a);
+        unsafe {
+            _mm512_mask_store_pd(r.data.as_mut_ptr(), m, a);
+        }
         let e = _mm512_setr_pd(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
-        assert_eq_m512d(_mm512_load_pd(r.data.as_ptr()), e);
+        assert_eq_m512d(unsafe { _mm512_load_pd(r.data.as_ptr()) }, e);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_loadu_epi32() {
+    const fn test_mm256_mask_loadu_epi32() {
         let src = _mm256_set1_epi32(42);
         let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8];
         let p = a.as_ptr();
         let m = 0b11001010;
-        let r = _mm256_mask_loadu_epi32(src, m, black_box(p));
+        let r = unsafe { _mm256_mask_loadu_epi32(src, m, black_box(p)) };
         let e = _mm256_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8);
         assert_eq_m256i(r, e);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_loadu_epi32() {
+    const fn test_mm256_maskz_loadu_epi32() {
         let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8];
         let p = a.as_ptr();
         let m = 0b11001010;
-        let r = _mm256_maskz_loadu_epi32(m, black_box(p));
+        let r = unsafe { _mm256_maskz_loadu_epi32(m, black_box(p)) };
         let e = _mm256_setr_epi32(0, 2, 0, 4, 0, 0, 7, 8);
         assert_eq_m256i(r, e);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_load_epi32() {
+    const fn test_mm256_mask_load_epi32() {
         #[repr(align(32))]
         struct Align {
             data: [i32; 8], // 32 bytes
@@ -51398,13 +53281,13 @@ mod tests {
         };
         let p = a.data.as_ptr();
         let m = 0b11001010;
-        let r = _mm256_mask_load_epi32(src, m, black_box(p));
+        let r = unsafe { _mm256_mask_load_epi32(src, m, black_box(p)) };
         let e = _mm256_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8);
         assert_eq_m256i(r, e);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_load_epi32() {
+    const fn test_mm256_maskz_load_epi32() {
         #[repr(align(32))]
         struct Align {
             data: [i32; 8], // 32 bytes
@@ -51414,23 +53297,25 @@ mod tests {
         };
         let p = a.data.as_ptr();
         let m = 0b11001010;
-        let r = _mm256_maskz_load_epi32(m, black_box(p));
+        let r = unsafe { _mm256_maskz_load_epi32(m, black_box(p)) };
         let e = _mm256_setr_epi32(0, 2, 0, 4, 0, 0, 7, 8);
         assert_eq_m256i(r, e);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_storeu_epi32() {
+    const fn test_mm256_mask_storeu_epi32() {
         let mut r = [42_i32; 8];
         let a = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
         let m = 0b11001010;
-        _mm256_mask_storeu_epi32(r.as_mut_ptr(), m, a);
+        unsafe {
+            _mm256_mask_storeu_epi32(r.as_mut_ptr(), m, a);
+        }
         let e = _mm256_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8);
-        assert_eq_m256i(_mm256_loadu_epi32(r.as_ptr()), e);
+        assert_eq_m256i(unsafe { _mm256_loadu_epi32(r.as_ptr()) }, e);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_store_epi32() {
+    const fn test_mm256_mask_store_epi32() {
         #[repr(align(64))]
         struct Align {
             data: [i32; 8],
@@ -51438,34 +53323,36 @@ mod tests {
         let mut r = Align { data: [42; 8] };
         let a = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
         let m = 0b11001010;
-        _mm256_mask_store_epi32(r.data.as_mut_ptr(), m, a);
+        unsafe {
+            _mm256_mask_store_epi32(r.data.as_mut_ptr(), m, a);
+        }
         let e = _mm256_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8);
-        assert_eq_m256i(_mm256_load_epi32(r.data.as_ptr()), e);
+        assert_eq_m256i(unsafe { _mm256_load_epi32(r.data.as_ptr()) }, e);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_loadu_epi64() {
+    const fn test_mm256_mask_loadu_epi64() {
         let src = _mm256_set1_epi64x(42);
         let a = &[1_i64, 2, 3, 4];
         let p = a.as_ptr();
         let m = 0b1010;
-        let r = _mm256_mask_loadu_epi64(src, m, black_box(p));
+        let r = unsafe { _mm256_mask_loadu_epi64(src, m, black_box(p)) };
         let e = _mm256_setr_epi64x(42, 2, 42, 4);
         assert_eq_m256i(r, e);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_loadu_epi64() {
+    const fn test_mm256_maskz_loadu_epi64() {
         let a = &[1_i64, 2, 3, 4];
         let p = a.as_ptr();
         let m = 0b1010;
-        let r = _mm256_maskz_loadu_epi64(m, black_box(p));
+        let r = unsafe { _mm256_maskz_loadu_epi64(m, black_box(p)) };
         let e = _mm256_setr_epi64x(0, 2, 0, 4);
         assert_eq_m256i(r, e);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_load_epi64() {
+    const fn test_mm256_mask_load_epi64() {
         #[repr(align(32))]
         struct Align {
             data: [i64; 4], // 32 bytes
@@ -51476,13 +53363,13 @@ mod tests {
         };
         let p = a.data.as_ptr();
         let m = 0b1010;
-        let r = _mm256_mask_load_epi64(src, m, black_box(p));
+        let r = unsafe { _mm256_mask_load_epi64(src, m, black_box(p)) };
         let e = _mm256_setr_epi64x(42, 2, 42, 4);
         assert_eq_m256i(r, e);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_load_epi64() {
+    const fn test_mm256_maskz_load_epi64() {
         #[repr(align(32))]
         struct Align {
             data: [i64; 4], // 32 bytes
@@ -51492,23 +53379,25 @@ mod tests {
         };
         let p = a.data.as_ptr();
         let m = 0b1010;
-        let r = _mm256_maskz_load_epi64(m, black_box(p));
+        let r = unsafe { _mm256_maskz_load_epi64(m, black_box(p)) };
         let e = _mm256_setr_epi64x(0, 2, 0, 4);
         assert_eq_m256i(r, e);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_storeu_epi64() {
+    const fn test_mm256_mask_storeu_epi64() {
         let mut r = [42_i64; 4];
         let a = _mm256_setr_epi64x(1, 2, 3, 4);
         let m = 0b1010;
-        _mm256_mask_storeu_epi64(r.as_mut_ptr(), m, a);
+        unsafe {
+            _mm256_mask_storeu_epi64(r.as_mut_ptr(), m, a);
+        }
         let e = _mm256_setr_epi64x(42, 2, 42, 4);
-        assert_eq_m256i(_mm256_loadu_epi64(r.as_ptr()), e);
+        assert_eq_m256i(unsafe { _mm256_loadu_epi64(r.as_ptr()) }, e);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_store_epi64() {
+    const fn test_mm256_mask_store_epi64() {
         #[repr(align(32))]
         struct Align {
             data: [i64; 4],
@@ -51516,34 +53405,36 @@ mod tests {
         let mut r = Align { data: [42; 4] };
         let a = _mm256_setr_epi64x(1, 2, 3, 4);
         let m = 0b1010;
-        _mm256_mask_store_epi64(r.data.as_mut_ptr(), m, a);
+        unsafe {
+            _mm256_mask_store_epi64(r.data.as_mut_ptr(), m, a);
+        }
         let e = _mm256_setr_epi64x(42, 2, 42, 4);
-        assert_eq_m256i(_mm256_load_epi64(r.data.as_ptr()), e);
+        assert_eq_m256i(unsafe { _mm256_load_epi64(r.data.as_ptr()) }, e);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_loadu_ps() {
+    const fn test_mm256_mask_loadu_ps() {
         let src = _mm256_set1_ps(42.0);
         let a = &[1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
         let p = a.as_ptr();
         let m = 0b11001010;
-        let r = _mm256_mask_loadu_ps(src, m, black_box(p));
+        let r = unsafe { _mm256_mask_loadu_ps(src, m, black_box(p)) };
         let e = _mm256_setr_ps(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
         assert_eq_m256(r, e);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_loadu_ps() {
+    const fn test_mm256_maskz_loadu_ps() {
         let a = &[1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
         let p = a.as_ptr();
         let m = 0b11001010;
-        let r = _mm256_maskz_loadu_ps(m, black_box(p));
+        let r = unsafe { _mm256_maskz_loadu_ps(m, black_box(p)) };
         let e = _mm256_setr_ps(0.0, 2.0, 0.0, 4.0, 0.0, 0.0, 7.0, 8.0);
         assert_eq_m256(r, e);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_load_ps() {
+    const fn test_mm256_mask_load_ps() {
         #[repr(align(32))]
         struct Align {
             data: [f32; 8], // 32 bytes
@@ -51554,13 +53445,13 @@ mod tests {
         };
         let p = a.data.as_ptr();
         let m = 0b11001010;
-        let r = _mm256_mask_load_ps(src, m, black_box(p));
+        let r = unsafe { _mm256_mask_load_ps(src, m, black_box(p)) };
         let e = _mm256_setr_ps(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
         assert_eq_m256(r, e);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_load_ps() {
+    const fn test_mm256_maskz_load_ps() {
         #[repr(align(32))]
         struct Align {
             data: [f32; 8], // 32 bytes
@@ -51570,23 +53461,25 @@ mod tests {
         };
         let p = a.data.as_ptr();
         let m = 0b11001010;
-        let r = _mm256_maskz_load_ps(m, black_box(p));
+        let r = unsafe { _mm256_maskz_load_ps(m, black_box(p)) };
         let e = _mm256_setr_ps(0.0, 2.0, 0.0, 4.0, 0.0, 0.0, 7.0, 8.0);
         assert_eq_m256(r, e);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_storeu_ps() {
+    const fn test_mm256_mask_storeu_ps() {
         let mut r = [42_f32; 8];
         let a = _mm256_setr_ps(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let m = 0b11001010;
-        _mm256_mask_storeu_ps(r.as_mut_ptr(), m, a);
+        unsafe {
+            _mm256_mask_storeu_ps(r.as_mut_ptr(), m, a);
+        }
         let e = _mm256_setr_ps(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
-        assert_eq_m256(_mm256_loadu_ps(r.as_ptr()), e);
+        assert_eq_m256(unsafe { _mm256_loadu_ps(r.as_ptr()) }, e);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_store_ps() {
+    const fn test_mm256_mask_store_ps() {
         #[repr(align(32))]
         struct Align {
             data: [f32; 8],
@@ -51594,34 +53487,36 @@ mod tests {
         let mut r = Align { data: [42.0; 8] };
         let a = _mm256_setr_ps(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let m = 0b11001010;
-        _mm256_mask_store_ps(r.data.as_mut_ptr(), m, a);
+        unsafe {
+            _mm256_mask_store_ps(r.data.as_mut_ptr(), m, a);
+        }
         let e = _mm256_setr_ps(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
-        assert_eq_m256(_mm256_load_ps(r.data.as_ptr()), e);
+        assert_eq_m256(unsafe { _mm256_load_ps(r.data.as_ptr()) }, e);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_loadu_pd() {
+    const fn test_mm256_mask_loadu_pd() {
         let src = _mm256_set1_pd(42.0);
         let a = &[1.0_f64, 2.0, 3.0, 4.0];
         let p = a.as_ptr();
         let m = 0b1010;
-        let r = _mm256_mask_loadu_pd(src, m, black_box(p));
+        let r = unsafe { _mm256_mask_loadu_pd(src, m, black_box(p)) };
         let e = _mm256_setr_pd(42.0, 2.0, 42.0, 4.0);
         assert_eq_m256d(r, e);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_loadu_pd() {
+    const fn test_mm256_maskz_loadu_pd() {
         let a = &[1.0_f64, 2.0, 3.0, 4.0];
         let p = a.as_ptr();
         let m = 0b1010;
-        let r = _mm256_maskz_loadu_pd(m, black_box(p));
+        let r = unsafe { _mm256_maskz_loadu_pd(m, black_box(p)) };
         let e = _mm256_setr_pd(0.0, 2.0, 0.0, 4.0);
         assert_eq_m256d(r, e);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_load_pd() {
+    const fn test_mm256_mask_load_pd() {
         #[repr(align(32))]
         struct Align {
             data: [f64; 4], // 32 bytes
@@ -51632,13 +53527,13 @@ mod tests {
         };
         let p = a.data.as_ptr();
         let m = 0b1010;
-        let r = _mm256_mask_load_pd(src, m, black_box(p));
+        let r = unsafe { _mm256_mask_load_pd(src, m, black_box(p)) };
         let e = _mm256_setr_pd(42.0, 2.0, 42.0, 4.0);
         assert_eq_m256d(r, e);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_load_pd() {
+    const fn test_mm256_maskz_load_pd() {
         #[repr(align(32))]
         struct Align {
             data: [f64; 4], // 32 bytes
@@ -51648,23 +53543,25 @@ mod tests {
         };
         let p = a.data.as_ptr();
         let m = 0b1010;
-        let r = _mm256_maskz_load_pd(m, black_box(p));
+        let r = unsafe { _mm256_maskz_load_pd(m, black_box(p)) };
         let e = _mm256_setr_pd(0.0, 2.0, 0.0, 4.0);
         assert_eq_m256d(r, e);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_storeu_pd() {
+    const fn test_mm256_mask_storeu_pd() {
         let mut r = [42_f64; 4];
         let a = _mm256_setr_pd(1.0, 2.0, 3.0, 4.0);
         let m = 0b1010;
-        _mm256_mask_storeu_pd(r.as_mut_ptr(), m, a);
+        unsafe {
+            _mm256_mask_storeu_pd(r.as_mut_ptr(), m, a);
+        }
         let e = _mm256_setr_pd(42.0, 2.0, 42.0, 4.0);
-        assert_eq_m256d(_mm256_loadu_pd(r.as_ptr()), e);
+        assert_eq_m256d(unsafe { _mm256_loadu_pd(r.as_ptr()) }, e);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_store_pd() {
+    const fn test_mm256_mask_store_pd() {
         #[repr(align(32))]
         struct Align {
             data: [f64; 4],
@@ -51672,34 +53569,36 @@ mod tests {
         let mut r = Align { data: [42.0; 4] };
         let a = _mm256_setr_pd(1.0, 2.0, 3.0, 4.0);
         let m = 0b1010;
-        _mm256_mask_store_pd(r.data.as_mut_ptr(), m, a);
+        unsafe {
+            _mm256_mask_store_pd(r.data.as_mut_ptr(), m, a);
+        }
         let e = _mm256_setr_pd(42.0, 2.0, 42.0, 4.0);
-        assert_eq_m256d(_mm256_load_pd(r.data.as_ptr()), e);
+        assert_eq_m256d(unsafe { _mm256_load_pd(r.data.as_ptr()) }, e);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_loadu_epi32() {
+    const fn test_mm_mask_loadu_epi32() {
         let src = _mm_set1_epi32(42);
         let a = &[1_i32, 2, 3, 4];
         let p = a.as_ptr();
         let m = 0b1010;
-        let r = _mm_mask_loadu_epi32(src, m, black_box(p));
+        let r = unsafe { _mm_mask_loadu_epi32(src, m, black_box(p)) };
         let e = _mm_setr_epi32(42, 2, 42, 4);
         assert_eq_m128i(r, e);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_loadu_epi32() {
+    const fn test_mm_maskz_loadu_epi32() {
         let a = &[1_i32, 2, 3, 4];
         let p = a.as_ptr();
         let m = 0b1010;
-        let r = _mm_maskz_loadu_epi32(m, black_box(p));
+        let r = unsafe { _mm_maskz_loadu_epi32(m, black_box(p)) };
         let e = _mm_setr_epi32(0, 2, 0, 4);
         assert_eq_m128i(r, e);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_load_epi32() {
+    const fn test_mm_mask_load_epi32() {
         #[repr(align(16))]
         struct Align {
             data: [i32; 4], // 32 bytes
@@ -51710,13 +53609,13 @@ mod tests {
         };
         let p = a.data.as_ptr();
         let m = 0b1010;
-        let r = _mm_mask_load_epi32(src, m, black_box(p));
+        let r = unsafe { _mm_mask_load_epi32(src, m, black_box(p)) };
         let e = _mm_setr_epi32(42, 2, 42, 4);
         assert_eq_m128i(r, e);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_load_epi32() {
+    const fn test_mm_maskz_load_epi32() {
         #[repr(align(16))]
         struct Align {
             data: [i32; 4], // 16 bytes
@@ -51726,23 +53625,25 @@ mod tests {
         };
         let p = a.data.as_ptr();
         let m = 0b1010;
-        let r = _mm_maskz_load_epi32(m, black_box(p));
+        let r = unsafe { _mm_maskz_load_epi32(m, black_box(p)) };
         let e = _mm_setr_epi32(0, 2, 0, 4);
         assert_eq_m128i(r, e);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_storeu_epi32() {
+    const fn test_mm_mask_storeu_epi32() {
         let mut r = [42_i32; 4];
         let a = _mm_setr_epi32(1, 2, 3, 4);
         let m = 0b1010;
-        _mm_mask_storeu_epi32(r.as_mut_ptr(), m, a);
+        unsafe {
+            _mm_mask_storeu_epi32(r.as_mut_ptr(), m, a);
+        }
         let e = _mm_setr_epi32(42, 2, 42, 4);
-        assert_eq_m128i(_mm_loadu_epi32(r.as_ptr()), e);
+        assert_eq_m128i(unsafe { _mm_loadu_epi32(r.as_ptr()) }, e);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_store_epi32() {
+    const fn test_mm_mask_store_epi32() {
         #[repr(align(16))]
         struct Align {
             data: [i32; 4], // 16 bytes
@@ -51750,34 +53651,36 @@ mod tests {
         let mut r = Align { data: [42; 4] };
         let a = _mm_setr_epi32(1, 2, 3, 4);
         let m = 0b1010;
-        _mm_mask_store_epi32(r.data.as_mut_ptr(), m, a);
+        unsafe {
+            _mm_mask_store_epi32(r.data.as_mut_ptr(), m, a);
+        }
         let e = _mm_setr_epi32(42, 2, 42, 4);
-        assert_eq_m128i(_mm_load_epi32(r.data.as_ptr()), e);
+        assert_eq_m128i(unsafe { _mm_load_epi32(r.data.as_ptr()) }, e);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_loadu_epi64() {
+    const fn test_mm_mask_loadu_epi64() {
         let src = _mm_set1_epi64x(42);
         let a = &[1_i64, 2];
         let p = a.as_ptr();
         let m = 0b10;
-        let r = _mm_mask_loadu_epi64(src, m, black_box(p));
+        let r = unsafe { _mm_mask_loadu_epi64(src, m, black_box(p)) };
         let e = _mm_setr_epi64x(42, 2);
         assert_eq_m128i(r, e);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_loadu_epi64() {
+    const fn test_mm_maskz_loadu_epi64() {
         let a = &[1_i64, 2];
         let p = a.as_ptr();
         let m = 0b10;
-        let r = _mm_maskz_loadu_epi64(m, black_box(p));
+        let r = unsafe { _mm_maskz_loadu_epi64(m, black_box(p)) };
         let e = _mm_setr_epi64x(0, 2);
         assert_eq_m128i(r, e);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_load_epi64() {
+    const fn test_mm_mask_load_epi64() {
         #[repr(align(16))]
         struct Align {
             data: [i64; 2], // 16 bytes
@@ -51786,13 +53689,13 @@ mod tests {
         let a = Align { data: [1_i64, 2] };
         let p = a.data.as_ptr();
         let m = 0b10;
-        let r = _mm_mask_load_epi64(src, m, black_box(p));
+        let r = unsafe { _mm_mask_load_epi64(src, m, black_box(p)) };
         let e = _mm_setr_epi64x(42, 2);
         assert_eq_m128i(r, e);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_load_epi64() {
+    const fn test_mm_maskz_load_epi64() {
         #[repr(align(16))]
         struct Align {
             data: [i64; 2], // 16 bytes
@@ -51800,23 +53703,25 @@ mod tests {
         let a = Align { data: [1_i64, 2] };
         let p = a.data.as_ptr();
         let m = 0b10;
-        let r = _mm_maskz_load_epi64(m, black_box(p));
+        let r = unsafe { _mm_maskz_load_epi64(m, black_box(p)) };
         let e = _mm_setr_epi64x(0, 2);
         assert_eq_m128i(r, e);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_storeu_epi64() {
+    const fn test_mm_mask_storeu_epi64() {
         let mut r = [42_i64; 2];
         let a = _mm_setr_epi64x(1, 2);
         let m = 0b10;
-        _mm_mask_storeu_epi64(r.as_mut_ptr(), m, a);
+        unsafe {
+            _mm_mask_storeu_epi64(r.as_mut_ptr(), m, a);
+        }
         let e = _mm_setr_epi64x(42, 2);
-        assert_eq_m128i(_mm_loadu_epi64(r.as_ptr()), e);
+        assert_eq_m128i(unsafe { _mm_loadu_epi64(r.as_ptr()) }, e);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_store_epi64() {
+    const fn test_mm_mask_store_epi64() {
         #[repr(align(16))]
         struct Align {
             data: [i64; 2], // 16 bytes
@@ -51824,34 +53729,36 @@ mod tests {
         let mut r = Align { data: [42; 2] };
         let a = _mm_setr_epi64x(1, 2);
         let m = 0b10;
-        _mm_mask_store_epi64(r.data.as_mut_ptr(), m, a);
+        unsafe {
+            _mm_mask_store_epi64(r.data.as_mut_ptr(), m, a);
+        }
         let e = _mm_setr_epi64x(42, 2);
-        assert_eq_m128i(_mm_load_epi64(r.data.as_ptr()), e);
+        assert_eq_m128i(unsafe { _mm_load_epi64(r.data.as_ptr()) }, e);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_loadu_ps() {
+    const fn test_mm_mask_loadu_ps() {
         let src = _mm_set1_ps(42.0);
         let a = &[1.0_f32, 2.0, 3.0, 4.0];
         let p = a.as_ptr();
         let m = 0b1010;
-        let r = _mm_mask_loadu_ps(src, m, black_box(p));
+        let r = unsafe { _mm_mask_loadu_ps(src, m, black_box(p)) };
         let e = _mm_setr_ps(42.0, 2.0, 42.0, 4.0);
         assert_eq_m128(r, e);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_loadu_ps() {
+    const fn test_mm_maskz_loadu_ps() {
         let a = &[1.0_f32, 2.0, 3.0, 4.0];
         let p = a.as_ptr();
         let m = 0b1010;
-        let r = _mm_maskz_loadu_ps(m, black_box(p));
+        let r = unsafe { _mm_maskz_loadu_ps(m, black_box(p)) };
         let e = _mm_setr_ps(0.0, 2.0, 0.0, 4.0);
         assert_eq_m128(r, e);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_load_ps() {
+    const fn test_mm_mask_load_ps() {
         #[repr(align(16))]
         struct Align {
             data: [f32; 4], // 16 bytes
@@ -51862,13 +53769,13 @@ mod tests {
         };
         let p = a.data.as_ptr();
         let m = 0b1010;
-        let r = _mm_mask_load_ps(src, m, black_box(p));
+        let r = unsafe { _mm_mask_load_ps(src, m, black_box(p)) };
         let e = _mm_setr_ps(42.0, 2.0, 42.0, 4.0);
         assert_eq_m128(r, e);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_load_ps() {
+    const fn test_mm_maskz_load_ps() {
         #[repr(align(16))]
         struct Align {
             data: [f32; 4], // 16 bytes
@@ -51878,23 +53785,25 @@ mod tests {
         };
         let p = a.data.as_ptr();
         let m = 0b1010;
-        let r = _mm_maskz_load_ps(m, black_box(p));
+        let r = unsafe { _mm_maskz_load_ps(m, black_box(p)) };
         let e = _mm_setr_ps(0.0, 2.0, 0.0, 4.0);
         assert_eq_m128(r, e);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_storeu_ps() {
+    const fn test_mm_mask_storeu_ps() {
         let mut r = [42_f32; 4];
         let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
         let m = 0b1010;
-        _mm_mask_storeu_ps(r.as_mut_ptr(), m, a);
+        unsafe {
+            _mm_mask_storeu_ps(r.as_mut_ptr(), m, a);
+        }
         let e = _mm_setr_ps(42.0, 2.0, 42.0, 4.0);
-        assert_eq_m128(_mm_loadu_ps(r.as_ptr()), e);
+        assert_eq_m128(unsafe { _mm_loadu_ps(r.as_ptr()) }, e);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_store_ps() {
+    const fn test_mm_mask_store_ps() {
         #[repr(align(16))]
         struct Align {
             data: [f32; 4], // 16 bytes
@@ -51902,34 +53811,36 @@ mod tests {
         let mut r = Align { data: [42.0; 4] };
         let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
         let m = 0b1010;
-        _mm_mask_store_ps(r.data.as_mut_ptr(), m, a);
+        unsafe {
+            _mm_mask_store_ps(r.data.as_mut_ptr(), m, a);
+        }
         let e = _mm_setr_ps(42.0, 2.0, 42.0, 4.0);
-        assert_eq_m128(_mm_load_ps(r.data.as_ptr()), e);
+        assert_eq_m128(unsafe { _mm_load_ps(r.data.as_ptr()) }, e);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_loadu_pd() {
+    const fn test_mm_mask_loadu_pd() {
         let src = _mm_set1_pd(42.0);
         let a = &[1.0_f64, 2.0];
         let p = a.as_ptr();
         let m = 0b10;
-        let r = _mm_mask_loadu_pd(src, m, black_box(p));
+        let r = unsafe { _mm_mask_loadu_pd(src, m, black_box(p)) };
         let e = _mm_setr_pd(42.0, 2.0);
         assert_eq_m128d(r, e);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_loadu_pd() {
+    const fn test_mm_maskz_loadu_pd() {
         let a = &[1.0_f64, 2.0];
         let p = a.as_ptr();
         let m = 0b10;
-        let r = _mm_maskz_loadu_pd(m, black_box(p));
+        let r = unsafe { _mm_maskz_loadu_pd(m, black_box(p)) };
         let e = _mm_setr_pd(0.0, 2.0);
         assert_eq_m128d(r, e);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_load_pd() {
+    const fn test_mm_mask_load_pd() {
         #[repr(align(16))]
         struct Align {
             data: [f64; 2], // 16 bytes
@@ -51940,13 +53851,13 @@ mod tests {
         };
         let p = a.data.as_ptr();
         let m = 0b10;
-        let r = _mm_mask_load_pd(src, m, black_box(p));
+        let r = unsafe { _mm_mask_load_pd(src, m, black_box(p)) };
         let e = _mm_setr_pd(42.0, 2.0);
         assert_eq_m128d(r, e);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_load_pd() {
+    const fn test_mm_maskz_load_pd() {
         #[repr(align(16))]
         struct Align {
             data: [f64; 2], // 16 bytes
@@ -51956,77 +53867,79 @@ mod tests {
         };
         let p = a.data.as_ptr();
         let m = 0b10;
-        let r = _mm_maskz_load_pd(m, black_box(p));
+        let r = unsafe { _mm_maskz_load_pd(m, black_box(p)) };
         let e = _mm_setr_pd(0.0, 2.0);
         assert_eq_m128d(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask_load_ss() {
+    fn test_mm_mask_load_ss() {
         #[repr(align(16))]
         struct Align {
             data: f32,
         }
         let src = _mm_set_ss(2.0);
         let mem = Align { data: 1.0 };
-        let r = _mm_mask_load_ss(src, 0b1, &mem.data);
+        let r = unsafe { _mm_mask_load_ss(src, 0b1, &mem.data) };
         assert_eq_m128(r, _mm_set_ss(1.0));
-        let r = _mm_mask_load_ss(src, 0b0, &mem.data);
+        let r = unsafe { _mm_mask_load_ss(src, 0b0, &mem.data) };
         assert_eq_m128(r, _mm_set_ss(2.0));
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_maskz_load_ss() {
+    fn test_mm_maskz_load_ss() {
         #[repr(align(16))]
         struct Align {
             data: f32,
         }
         let mem = Align { data: 1.0 };
-        let r = _mm_maskz_load_ss(0b1, &mem.data);
+        let r = unsafe { _mm_maskz_load_ss(0b1, &mem.data) };
         assert_eq_m128(r, _mm_set_ss(1.0));
-        let r = _mm_maskz_load_ss(0b0, &mem.data);
+        let r = unsafe { _mm_maskz_load_ss(0b0, &mem.data) };
         assert_eq_m128(r, _mm_set_ss(0.0));
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask_load_sd() {
+    fn test_mm_mask_load_sd() {
         #[repr(align(16))]
         struct Align {
             data: f64,
         }
         let src = _mm_set_sd(2.0);
         let mem = Align { data: 1.0 };
-        let r = _mm_mask_load_sd(src, 0b1, &mem.data);
+        let r = unsafe { _mm_mask_load_sd(src, 0b1, &mem.data) };
         assert_eq_m128d(r, _mm_set_sd(1.0));
-        let r = _mm_mask_load_sd(src, 0b0, &mem.data);
+        let r = unsafe { _mm_mask_load_sd(src, 0b0, &mem.data) };
         assert_eq_m128d(r, _mm_set_sd(2.0));
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_maskz_load_sd() {
+    fn test_mm_maskz_load_sd() {
         #[repr(align(16))]
         struct Align {
             data: f64,
         }
         let mem = Align { data: 1.0 };
-        let r = _mm_maskz_load_sd(0b1, &mem.data);
+        let r = unsafe { _mm_maskz_load_sd(0b1, &mem.data) };
         assert_eq_m128d(r, _mm_set_sd(1.0));
-        let r = _mm_maskz_load_sd(0b0, &mem.data);
+        let r = unsafe { _mm_maskz_load_sd(0b0, &mem.data) };
         assert_eq_m128d(r, _mm_set_sd(0.0));
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_storeu_pd() {
+    const fn test_mm_mask_storeu_pd() {
         let mut r = [42_f64; 2];
         let a = _mm_setr_pd(1.0, 2.0);
         let m = 0b10;
-        _mm_mask_storeu_pd(r.as_mut_ptr(), m, a);
+        unsafe {
+            _mm_mask_storeu_pd(r.as_mut_ptr(), m, a);
+        }
         let e = _mm_setr_pd(42.0, 2.0);
-        assert_eq_m128d(_mm_loadu_pd(r.as_ptr()), e);
+        assert_eq_m128d(unsafe { _mm_loadu_pd(r.as_ptr()) }, e);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_store_pd() {
+    const fn test_mm_mask_store_pd() {
         #[repr(align(16))]
         struct Align {
             data: [f64; 2], // 16 bytes
@@ -52034,53 +53947,63 @@ mod tests {
         let mut r = Align { data: [42.0; 2] };
         let a = _mm_setr_pd(1.0, 2.0);
         let m = 0b10;
-        _mm_mask_store_pd(r.data.as_mut_ptr(), m, a);
+        unsafe {
+            _mm_mask_store_pd(r.data.as_mut_ptr(), m, a);
+        }
         let e = _mm_setr_pd(42.0, 2.0);
-        assert_eq_m128d(_mm_load_pd(r.data.as_ptr()), e);
+        assert_eq_m128d(unsafe { _mm_load_pd(r.data.as_ptr()) }, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask_store_ss() {
+    fn test_mm_mask_store_ss() {
         #[repr(align(16))]
         struct Align {
             data: f32,
         }
         let a = _mm_set_ss(2.0);
         let mut mem = Align { data: 1.0 };
-        _mm_mask_store_ss(&mut mem.data, 0b1, a);
+        unsafe {
+            _mm_mask_store_ss(&mut mem.data, 0b1, a);
+        }
         assert_eq!(mem.data, 2.0);
-        _mm_mask_store_ss(&mut mem.data, 0b0, a);
+        unsafe {
+            _mm_mask_store_ss(&mut mem.data, 0b0, a);
+        }
         assert_eq!(mem.data, 2.0);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask_store_sd() {
+    fn test_mm_mask_store_sd() {
         #[repr(align(16))]
         struct Align {
             data: f64,
         }
         let a = _mm_set_sd(2.0);
         let mut mem = Align { data: 1.0 };
-        _mm_mask_store_sd(&mut mem.data, 0b1, a);
+        unsafe {
+            _mm_mask_store_sd(&mut mem.data, 0b1, a);
+        }
         assert_eq!(mem.data, 2.0);
-        _mm_mask_store_sd(&mut mem.data, 0b0, a);
+        unsafe {
+            _mm_mask_store_sd(&mut mem.data, 0b0, a);
+        }
         assert_eq!(mem.data, 2.0);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_setr_pd() {
+    const fn test_mm512_setr_pd() {
         let r = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.);
         assert_eq_m512d(r, _mm512_setr_pd(7., 6., 5., 4., 3., 2., 1., 0.));
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_set_pd() {
+    const fn test_mm512_set_pd() {
         let r = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
         assert_eq_m512d(r, _mm512_set_pd(7., 6., 5., 4., 3., 2., 1., 0.));
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_rol_epi32() {
+    const fn test_mm512_rol_epi32() {
         let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
         let r = _mm512_rol_epi32::<1>(a);
         let e = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
@@ -52088,7 +54011,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_rol_epi32() {
+    const fn test_mm512_mask_rol_epi32() {
         let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
         let r = _mm512_mask_rol_epi32::<1>(a, 0, a);
         assert_eq_m512i(r, a);
@@ -52098,7 +54021,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_rol_epi32() {
+    const fn test_mm512_maskz_rol_epi32() {
         let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
         let r = _mm512_maskz_rol_epi32::<1>(0, a);
         assert_eq_m512i(r, _mm512_setzero_si512());
@@ -52108,7 +54031,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_rol_epi32() {
+    const fn test_mm256_rol_epi32() {
         let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
         let r = _mm256_rol_epi32::<1>(a);
         let e = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
@@ -52116,7 +54039,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_rol_epi32() {
+    const fn test_mm256_mask_rol_epi32() {
         let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
         let r = _mm256_mask_rol_epi32::<1>(a, 0, a);
         assert_eq_m256i(r, a);
@@ -52126,7 +54049,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_rol_epi32() {
+    const fn test_mm256_maskz_rol_epi32() {
         let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
         let r = _mm256_maskz_rol_epi32::<1>(0, a);
         assert_eq_m256i(r, _mm256_setzero_si256());
@@ -52136,7 +54059,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_rol_epi32() {
+    const fn test_mm_rol_epi32() {
         let a = _mm_set_epi32(1 << 31, 1, 1, 1);
         let r = _mm_rol_epi32::<1>(a);
         let e = _mm_set_epi32(1 << 0, 2, 2, 2);
@@ -52144,7 +54067,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_rol_epi32() {
+    const fn test_mm_mask_rol_epi32() {
         let a = _mm_set_epi32(1 << 31, 1, 1, 1);
         let r = _mm_mask_rol_epi32::<1>(a, 0, a);
         assert_eq_m128i(r, a);
@@ -52154,7 +54077,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_rol_epi32() {
+    const fn test_mm_maskz_rol_epi32() {
         let a = _mm_set_epi32(1 << 31, 1, 1, 1);
         let r = _mm_maskz_rol_epi32::<1>(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -52164,7 +54087,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_ror_epi32() {
+    const fn test_mm512_ror_epi32() {
         let a = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
         let r = _mm512_ror_epi32::<1>(a);
         let e = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
@@ -52172,7 +54095,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_ror_epi32() {
+    const fn test_mm512_mask_ror_epi32() {
         let a = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
         let r = _mm512_mask_ror_epi32::<1>(a, 0, a);
         assert_eq_m512i(r, a);
@@ -52182,7 +54105,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_ror_epi32() {
+    const fn test_mm512_maskz_ror_epi32() {
         let a = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 << 0);
         let r = _mm512_maskz_ror_epi32::<1>(0, a);
         assert_eq_m512i(r, _mm512_setzero_si512());
@@ -52192,7 +54115,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_ror_epi32() {
+    const fn test_mm256_ror_epi32() {
         let a = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
         let r = _mm256_ror_epi32::<1>(a);
         let e = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
@@ -52200,7 +54123,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_ror_epi32() {
+    const fn test_mm256_mask_ror_epi32() {
         let a = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
         let r = _mm256_mask_ror_epi32::<1>(a, 0, a);
         assert_eq_m256i(r, a);
@@ -52210,7 +54133,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_ror_epi32() {
+    const fn test_mm256_maskz_ror_epi32() {
         let a = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
         let r = _mm256_maskz_ror_epi32::<1>(0, a);
         assert_eq_m256i(r, _mm256_setzero_si256());
@@ -52220,7 +54143,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_ror_epi32() {
+    const fn test_mm_ror_epi32() {
         let a = _mm_set_epi32(1 << 0, 2, 2, 2);
         let r = _mm_ror_epi32::<1>(a);
         let e = _mm_set_epi32(1 << 31, 1, 1, 1);
@@ -52228,7 +54151,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_ror_epi32() {
+    const fn test_mm_mask_ror_epi32() {
         let a = _mm_set_epi32(1 << 0, 2, 2, 2);
         let r = _mm_mask_ror_epi32::<1>(a, 0, a);
         assert_eq_m128i(r, a);
@@ -52238,7 +54161,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_ror_epi32() {
+    const fn test_mm_maskz_ror_epi32() {
         let a = _mm_set_epi32(1 << 0, 2, 2, 2);
         let r = _mm_maskz_ror_epi32::<1>(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -52248,7 +54171,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_slli_epi32() {
+    const fn test_mm512_slli_epi32() {
         let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
         let r = _mm512_slli_epi32::<1>(a);
         let e = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
@@ -52256,7 +54179,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_slli_epi32() {
+    const fn test_mm512_mask_slli_epi32() {
         let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
         let r = _mm512_mask_slli_epi32::<1>(a, 0, a);
         assert_eq_m512i(r, a);
@@ -52266,7 +54189,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_slli_epi32() {
+    const fn test_mm512_maskz_slli_epi32() {
         let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
         let r = _mm512_maskz_slli_epi32::<1>(0, a);
         assert_eq_m512i(r, _mm512_setzero_si512());
@@ -52276,7 +54199,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_slli_epi32() {
+    const fn test_mm256_mask_slli_epi32() {
         let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
         let r = _mm256_mask_slli_epi32::<1>(a, 0, a);
         assert_eq_m256i(r, a);
@@ -52286,7 +54209,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_slli_epi32() {
+    const fn test_mm256_maskz_slli_epi32() {
         let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
         let r = _mm256_maskz_slli_epi32::<1>(0, a);
         assert_eq_m256i(r, _mm256_setzero_si256());
@@ -52296,7 +54219,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_slli_epi32() {
+    const fn test_mm_mask_slli_epi32() {
         let a = _mm_set_epi32(1 << 31, 1, 1, 1);
         let r = _mm_mask_slli_epi32::<1>(a, 0, a);
         assert_eq_m128i(r, a);
@@ -52306,7 +54229,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_slli_epi32() {
+    const fn test_mm_maskz_slli_epi32() {
         let a = _mm_set_epi32(1 << 31, 1, 1, 1);
         let r = _mm_maskz_slli_epi32::<1>(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -52316,7 +54239,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_srli_epi32() {
+    const fn test_mm512_srli_epi32() {
         let a = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
         let r = _mm512_srli_epi32::<1>(a);
         let e = _mm512_set_epi32(0 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
@@ -52324,7 +54247,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_srli_epi32() {
+    const fn test_mm512_mask_srli_epi32() {
         let a = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
         let r = _mm512_mask_srli_epi32::<1>(a, 0, a);
         assert_eq_m512i(r, a);
@@ -52334,7 +54257,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_srli_epi32() {
+    const fn test_mm512_maskz_srli_epi32() {
         let a = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0);
         let r = _mm512_maskz_srli_epi32::<1>(0, a);
         assert_eq_m512i(r, _mm512_setzero_si512());
@@ -52344,7 +54267,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_srli_epi32() {
+    const fn test_mm256_mask_srli_epi32() {
         let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
         let r = _mm256_mask_srli_epi32::<1>(a, 0, a);
         assert_eq_m256i(r, a);
@@ -52354,7 +54277,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_srli_epi32() {
+    const fn test_mm256_maskz_srli_epi32() {
         let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
         let r = _mm256_maskz_srli_epi32::<1>(0, a);
         assert_eq_m256i(r, _mm256_setzero_si256());
@@ -52364,7 +54287,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_srli_epi32() {
+    const fn test_mm_mask_srli_epi32() {
         let a = _mm_set_epi32(1 << 5, 0, 0, 0);
         let r = _mm_mask_srli_epi32::<1>(a, 0, a);
         assert_eq_m128i(r, a);
@@ -52374,7 +54297,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_srli_epi32() {
+    const fn test_mm_maskz_srli_epi32() {
         let a = _mm_set_epi32(1 << 5, 0, 0, 0);
         let r = _mm_maskz_srli_epi32::<1>(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -52384,7 +54307,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_rolv_epi32() {
+    const fn test_mm512_rolv_epi32() {
         let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
         let b = _mm512_set1_epi32(1);
         let r = _mm512_rolv_epi32(a, b);
@@ -52393,7 +54316,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_rolv_epi32() {
+    const fn test_mm512_mask_rolv_epi32() {
         let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
         let b = _mm512_set1_epi32(1);
         let r = _mm512_mask_rolv_epi32(a, 0, a, b);
@@ -52404,7 +54327,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_rolv_epi32() {
+    const fn test_mm512_maskz_rolv_epi32() {
         let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
         let b = _mm512_set1_epi32(1);
         let r = _mm512_maskz_rolv_epi32(0, a, b);
@@ -52415,7 +54338,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_rolv_epi32() {
+    const fn test_mm256_rolv_epi32() {
         let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
         let b = _mm256_set1_epi32(1);
         let r = _mm256_rolv_epi32(a, b);
@@ -52424,7 +54347,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_rolv_epi32() {
+    const fn test_mm256_mask_rolv_epi32() {
         let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
         let b = _mm256_set1_epi32(1);
         let r = _mm256_mask_rolv_epi32(a, 0, a, b);
@@ -52435,7 +54358,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_rolv_epi32() {
+    const fn test_mm256_maskz_rolv_epi32() {
         let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
         let b = _mm256_set1_epi32(1);
         let r = _mm256_maskz_rolv_epi32(0, a, b);
@@ -52446,7 +54369,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_rolv_epi32() {
+    const fn test_mm_rolv_epi32() {
         let a = _mm_set_epi32(1 << 31, 1, 1, 1);
         let b = _mm_set1_epi32(1);
         let r = _mm_rolv_epi32(a, b);
@@ -52455,7 +54378,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_rolv_epi32() {
+    const fn test_mm_mask_rolv_epi32() {
         let a = _mm_set_epi32(1 << 31, 1, 1, 1);
         let b = _mm_set1_epi32(1);
         let r = _mm_mask_rolv_epi32(a, 0, a, b);
@@ -52466,7 +54389,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_rolv_epi32() {
+    const fn test_mm_maskz_rolv_epi32() {
         let a = _mm_set_epi32(1 << 31, 1, 1, 1);
         let b = _mm_set1_epi32(1);
         let r = _mm_maskz_rolv_epi32(0, a, b);
@@ -52477,7 +54400,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_rorv_epi32() {
+    const fn test_mm512_rorv_epi32() {
         let a = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
         let b = _mm512_set1_epi32(1);
         let r = _mm512_rorv_epi32(a, b);
@@ -52486,7 +54409,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_rorv_epi32() {
+    const fn test_mm512_mask_rorv_epi32() {
         let a = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
         let b = _mm512_set1_epi32(1);
         let r = _mm512_mask_rorv_epi32(a, 0, a, b);
@@ -52497,7 +54420,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_rorv_epi32() {
+    const fn test_mm512_maskz_rorv_epi32() {
         let a = _mm512_set_epi32(3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 << 0);
         let b = _mm512_set1_epi32(1);
         let r = _mm512_maskz_rorv_epi32(0, a, b);
@@ -52508,7 +54431,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_rorv_epi32() {
+    const fn test_mm256_rorv_epi32() {
         let a = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
         let b = _mm256_set1_epi32(1);
         let r = _mm256_rorv_epi32(a, b);
@@ -52517,7 +54440,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_rorv_epi32() {
+    const fn test_mm256_mask_rorv_epi32() {
         let a = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
         let b = _mm256_set1_epi32(1);
         let r = _mm256_mask_rorv_epi32(a, 0, a, b);
@@ -52528,7 +54451,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_rorv_epi32() {
+    const fn test_mm256_maskz_rorv_epi32() {
         let a = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
         let b = _mm256_set1_epi32(1);
         let r = _mm256_maskz_rorv_epi32(0, a, b);
@@ -52539,7 +54462,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_rorv_epi32() {
+    const fn test_mm_rorv_epi32() {
         let a = _mm_set_epi32(1 << 0, 2, 2, 2);
         let b = _mm_set1_epi32(1);
         let r = _mm_rorv_epi32(a, b);
@@ -52548,7 +54471,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_rorv_epi32() {
+    const fn test_mm_mask_rorv_epi32() {
         let a = _mm_set_epi32(1 << 0, 2, 2, 2);
         let b = _mm_set1_epi32(1);
         let r = _mm_mask_rorv_epi32(a, 0, a, b);
@@ -52559,7 +54482,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_rorv_epi32() {
+    const fn test_mm_maskz_rorv_epi32() {
         let a = _mm_set_epi32(1 << 0, 2, 2, 2);
         let b = _mm_set1_epi32(1);
         let r = _mm_maskz_rorv_epi32(0, a, b);
@@ -52570,7 +54493,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_sllv_epi32() {
+    const fn test_mm512_sllv_epi32() {
         let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
         let count = _mm512_set1_epi32(1);
         let r = _mm512_sllv_epi32(a, count);
@@ -52579,7 +54502,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_sllv_epi32() {
+    const fn test_mm512_mask_sllv_epi32() {
         let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
         let count = _mm512_set1_epi32(1);
         let r = _mm512_mask_sllv_epi32(a, 0, a, count);
@@ -52590,7 +54513,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_sllv_epi32() {
+    const fn test_mm512_maskz_sllv_epi32() {
         let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
         let count = _mm512_set_epi32(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
         let r = _mm512_maskz_sllv_epi32(0, a, count);
@@ -52601,7 +54524,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_sllv_epi32() {
+    const fn test_mm256_mask_sllv_epi32() {
         let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
         let count = _mm256_set1_epi32(1);
         let r = _mm256_mask_sllv_epi32(a, 0, a, count);
@@ -52612,7 +54535,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_sllv_epi32() {
+    const fn test_mm256_maskz_sllv_epi32() {
         let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
         let count = _mm256_set1_epi32(1);
         let r = _mm256_maskz_sllv_epi32(0, a, count);
@@ -52623,7 +54546,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_sllv_epi32() {
+    const fn test_mm_mask_sllv_epi32() {
         let a = _mm_set_epi32(1 << 31, 1, 1, 1);
         let count = _mm_set1_epi32(1);
         let r = _mm_mask_sllv_epi32(a, 0, a, count);
@@ -52634,7 +54557,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_sllv_epi32() {
+    const fn test_mm_maskz_sllv_epi32() {
         let a = _mm_set_epi32(1 << 31, 1, 1, 1);
         let count = _mm_set1_epi32(1);
         let r = _mm_maskz_sllv_epi32(0, a, count);
@@ -52645,7 +54568,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_srlv_epi32() {
+    const fn test_mm512_srlv_epi32() {
         let a = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
         let count = _mm512_set1_epi32(1);
         let r = _mm512_srlv_epi32(a, count);
@@ -52654,7 +54577,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_srlv_epi32() {
+    const fn test_mm512_mask_srlv_epi32() {
         let a = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
         let count = _mm512_set1_epi32(1);
         let r = _mm512_mask_srlv_epi32(a, 0, a, count);
@@ -52665,7 +54588,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_srlv_epi32() {
+    const fn test_mm512_maskz_srlv_epi32() {
         let a = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0);
         let count = _mm512_set_epi32(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
         let r = _mm512_maskz_srlv_epi32(0, a, count);
@@ -52676,7 +54599,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_srlv_epi32() {
+    const fn test_mm256_mask_srlv_epi32() {
         let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
         let count = _mm256_set1_epi32(1);
         let r = _mm256_mask_srlv_epi32(a, 0, a, count);
@@ -52687,7 +54610,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_srlv_epi32() {
+    const fn test_mm256_maskz_srlv_epi32() {
         let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
         let count = _mm256_set1_epi32(1);
         let r = _mm256_maskz_srlv_epi32(0, a, count);
@@ -52698,7 +54621,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_srlv_epi32() {
+    const fn test_mm_mask_srlv_epi32() {
         let a = _mm_set_epi32(1 << 5, 0, 0, 0);
         let count = _mm_set1_epi32(1);
         let r = _mm_mask_srlv_epi32(a, 0, a, count);
@@ -52709,7 +54632,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_srlv_epi32() {
+    const fn test_mm_maskz_srlv_epi32() {
         let a = _mm_set_epi32(1 << 5, 0, 0, 0);
         let count = _mm_set1_epi32(1);
         let r = _mm_maskz_srlv_epi32(0, a, count);
@@ -52720,7 +54643,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_sll_epi32() {
+    fn test_mm512_sll_epi32() {
         #[rustfmt::skip]
         let a = _mm512_set_epi32(
             1 << 31, 1 << 0, 1 << 1, 1 << 2,
@@ -52741,7 +54664,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_sll_epi32() {
+    fn test_mm512_mask_sll_epi32() {
         #[rustfmt::skip]
         let a = _mm512_set_epi32(
             1 << 31, 1 << 0, 1 << 1, 1 << 2,
@@ -52764,7 +54687,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_sll_epi32() {
+    fn test_mm512_maskz_sll_epi32() {
         #[rustfmt::skip]
         let a = _mm512_set_epi32(
             1 << 31, 1 << 0, 1 << 1, 1 << 2,
@@ -52781,7 +54704,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_sll_epi32() {
+    fn test_mm256_mask_sll_epi32() {
         let a = _mm256_set_epi32(1 << 13, 0, 0, 0, 0, 0, 0, 0);
         let count = _mm_set_epi32(0, 0, 0, 1);
         let r = _mm256_mask_sll_epi32(a, 0, a, count);
@@ -52792,7 +54715,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_sll_epi32() {
+    fn test_mm256_maskz_sll_epi32() {
         let a = _mm256_set_epi32(1 << 13, 0, 0, 0, 0, 0, 0, 0);
         let count = _mm_set_epi32(0, 0, 0, 1);
         let r = _mm256_maskz_sll_epi32(0, a, count);
@@ -52803,7 +54726,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_sll_epi32() {
+    fn test_mm_mask_sll_epi32() {
         let a = _mm_set_epi32(1 << 13, 0, 0, 0);
         let count = _mm_set_epi32(0, 0, 0, 1);
         let r = _mm_mask_sll_epi32(a, 0, a, count);
@@ -52814,7 +54737,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_sll_epi32() {
+    fn test_mm_maskz_sll_epi32() {
         let a = _mm_set_epi32(1 << 13, 0, 0, 0);
         let count = _mm_set_epi32(0, 0, 0, 1);
         let r = _mm_maskz_sll_epi32(0, a, count);
@@ -52825,7 +54748,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_srl_epi32() {
+    fn test_mm512_srl_epi32() {
         #[rustfmt::skip]
         let a = _mm512_set_epi32(
             1 << 31, 1 << 0, 1 << 1, 1 << 2,
@@ -52840,7 +54763,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_srl_epi32() {
+    fn test_mm512_mask_srl_epi32() {
         #[rustfmt::skip]
         let a = _mm512_set_epi32(
             1 << 31, 1 << 0, 1 << 1, 1 << 2,
@@ -52857,7 +54780,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_srl_epi32() {
+    fn test_mm512_maskz_srl_epi32() {
         #[rustfmt::skip]
         let a = _mm512_set_epi32(
             1 << 31, 1 << 0, 1 << 1, 1 << 2,
@@ -52874,7 +54797,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_srl_epi32() {
+    fn test_mm256_mask_srl_epi32() {
         let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
         let count = _mm_set_epi32(0, 0, 0, 1);
         let r = _mm256_mask_srl_epi32(a, 0, a, count);
@@ -52885,7 +54808,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_srl_epi32() {
+    fn test_mm256_maskz_srl_epi32() {
         let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
         let count = _mm_set_epi32(0, 0, 0, 1);
         let r = _mm256_maskz_srl_epi32(0, a, count);
@@ -52896,7 +54819,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_srl_epi32() {
+    fn test_mm_mask_srl_epi32() {
         let a = _mm_set_epi32(1 << 5, 0, 0, 0);
         let count = _mm_set_epi32(0, 0, 0, 1);
         let r = _mm_mask_srl_epi32(a, 0, a, count);
@@ -52907,7 +54830,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_srl_epi32() {
+    fn test_mm_maskz_srl_epi32() {
         let a = _mm_set_epi32(1 << 5, 0, 0, 0);
         let count = _mm_set_epi32(0, 0, 0, 1);
         let r = _mm_maskz_srl_epi32(0, a, count);
@@ -52918,7 +54841,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_sra_epi32() {
+    fn test_mm512_sra_epi32() {
         let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
         let count = _mm_set_epi32(1, 0, 0, 2);
         let r = _mm512_sra_epi32(a, count);
@@ -52927,7 +54850,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_sra_epi32() {
+    fn test_mm512_mask_sra_epi32() {
         let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16);
         let count = _mm_set_epi32(0, 0, 0, 2);
         let r = _mm512_mask_sra_epi32(a, 0, a, count);
@@ -52938,7 +54861,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_sra_epi32() {
+    fn test_mm512_maskz_sra_epi32() {
         let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -15, -14);
         let count = _mm_set_epi32(2, 0, 0, 2);
         let r = _mm512_maskz_sra_epi32(0, a, count);
@@ -52949,7 +54872,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_sra_epi32() {
+    fn test_mm256_mask_sra_epi32() {
         let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
         let count = _mm_set_epi32(0, 0, 0, 1);
         let r = _mm256_mask_sra_epi32(a, 0, a, count);
@@ -52960,7 +54883,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_sra_epi32() {
+    fn test_mm256_maskz_sra_epi32() {
         let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
         let count = _mm_set_epi32(0, 0, 0, 1);
         let r = _mm256_maskz_sra_epi32(0, a, count);
@@ -52971,7 +54894,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_sra_epi32() {
+    fn test_mm_mask_sra_epi32() {
         let a = _mm_set_epi32(1 << 5, 0, 0, 0);
         let count = _mm_set_epi32(0, 0, 0, 1);
         let r = _mm_mask_sra_epi32(a, 0, a, count);
@@ -52982,7 +54905,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_sra_epi32() {
+    fn test_mm_maskz_sra_epi32() {
         let a = _mm_set_epi32(1 << 5, 0, 0, 0);
         let count = _mm_set_epi32(0, 0, 0, 1);
         let r = _mm_maskz_sra_epi32(0, a, count);
@@ -52993,7 +54916,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_srav_epi32() {
+    const fn test_mm512_srav_epi32() {
         let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
         let count = _mm512_set_epi32(2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
         let r = _mm512_srav_epi32(a, count);
@@ -53002,7 +54925,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_srav_epi32() {
+    const fn test_mm512_mask_srav_epi32() {
         let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16);
         let count = _mm512_set_epi32(2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
         let r = _mm512_mask_srav_epi32(a, 0, a, count);
@@ -53013,7 +54936,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_srav_epi32() {
+    const fn test_mm512_maskz_srav_epi32() {
         let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -15, -14);
         let count = _mm512_set_epi32(2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2);
         let r = _mm512_maskz_srav_epi32(0, a, count);
@@ -53024,7 +54947,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_srav_epi32() {
+    const fn test_mm256_mask_srav_epi32() {
         let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
         let count = _mm256_set1_epi32(1);
         let r = _mm256_mask_srav_epi32(a, 0, a, count);
@@ -53035,7 +54958,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_srav_epi32() {
+    const fn test_mm256_maskz_srav_epi32() {
         let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
         let count = _mm256_set1_epi32(1);
         let r = _mm256_maskz_srav_epi32(0, a, count);
@@ -53046,7 +54969,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_srav_epi32() {
+    const fn test_mm_mask_srav_epi32() {
         let a = _mm_set_epi32(1 << 5, 0, 0, 0);
         let count = _mm_set1_epi32(1);
         let r = _mm_mask_srav_epi32(a, 0, a, count);
@@ -53057,7 +54980,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_srav_epi32() {
+    const fn test_mm_maskz_srav_epi32() {
         let a = _mm_set_epi32(1 << 5, 0, 0, 0);
         let count = _mm_set1_epi32(1);
         let r = _mm_maskz_srav_epi32(0, a, count);
@@ -53068,7 +54991,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_srai_epi32() {
+    const fn test_mm512_srai_epi32() {
         let a = _mm512_set_epi32(8, -8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, -15);
         let r = _mm512_srai_epi32::<2>(a);
         let e = _mm512_set_epi32(2, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, -4);
@@ -53076,7 +54999,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_srai_epi32() {
+    const fn test_mm512_mask_srai_epi32() {
         let a = _mm512_set_epi32(8, -8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, -15);
         let r = _mm512_mask_srai_epi32::<2>(a, 0, a);
         assert_eq_m512i(r, a);
@@ -53086,7 +55009,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_srai_epi32() {
+    const fn test_mm512_maskz_srai_epi32() {
         let a = _mm512_set_epi32(8, -8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, -15);
         let r = _mm512_maskz_srai_epi32::<2>(0, a);
         assert_eq_m512i(r, _mm512_setzero_si512());
@@ -53096,7 +55019,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_srai_epi32() {
+    const fn test_mm256_mask_srai_epi32() {
         let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
         let r = _mm256_mask_srai_epi32::<1>(a, 0, a);
         assert_eq_m256i(r, a);
@@ -53106,7 +55029,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_srai_epi32() {
+    const fn test_mm256_maskz_srai_epi32() {
         let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
         let r = _mm256_maskz_srai_epi32::<1>(0, a);
         assert_eq_m256i(r, _mm256_setzero_si256());
@@ -53116,7 +55039,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_srai_epi32() {
+    const fn test_mm_mask_srai_epi32() {
         let a = _mm_set_epi32(1 << 5, 0, 0, 0);
         let r = _mm_mask_srai_epi32::<1>(a, 0, a);
         assert_eq_m128i(r, a);
@@ -53126,7 +55049,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_srai_epi32() {
+    const fn test_mm_maskz_srai_epi32() {
         let a = _mm_set_epi32(1 << 5, 0, 0, 0);
         let r = _mm_maskz_srai_epi32::<1>(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -53136,7 +55059,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_permute_ps() {
+    const fn test_mm512_permute_ps() {
         let a = _mm512_setr_ps(
             0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
         );
@@ -53148,7 +55071,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_permute_ps() {
+    const fn test_mm512_mask_permute_ps() {
         let a = _mm512_setr_ps(
             0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
         );
@@ -53162,7 +55085,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_permute_ps() {
+    const fn test_mm512_maskz_permute_ps() {
         let a = _mm512_setr_ps(
             0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
         );
@@ -53176,7 +55099,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_permute_ps() {
+    const fn test_mm256_mask_permute_ps() {
         let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
         let r = _mm256_mask_permute_ps::<0b11_11_11_11>(a, 0, a);
         assert_eq_m256(r, a);
@@ -53186,7 +55109,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_permute_ps() {
+    const fn test_mm256_maskz_permute_ps() {
         let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
         let r = _mm256_maskz_permute_ps::<0b11_11_11_11>(0, a);
         assert_eq_m256(r, _mm256_setzero_ps());
@@ -53196,7 +55119,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_permute_ps() {
+    const fn test_mm_mask_permute_ps() {
         let a = _mm_set_ps(0., 1., 2., 3.);
         let r = _mm_mask_permute_ps::<0b11_11_11_11>(a, 0, a);
         assert_eq_m128(r, a);
@@ -53206,7 +55129,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_permute_ps() {
+    const fn test_mm_maskz_permute_ps() {
         let a = _mm_set_ps(0., 1., 2., 3.);
         let r = _mm_maskz_permute_ps::<0b11_11_11_11>(0, a);
         assert_eq_m128(r, _mm_setzero_ps());
@@ -53216,7 +55139,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_permutevar_epi32() {
+    fn test_mm512_permutevar_epi32() {
         let idx = _mm512_set1_epi32(1);
         let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let r = _mm512_permutevar_epi32(idx, a);
@@ -53225,7 +55148,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_permutevar_epi32() {
+    fn test_mm512_mask_permutevar_epi32() {
         let idx = _mm512_set1_epi32(1);
         let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let r = _mm512_mask_permutevar_epi32(a, 0, idx, a);
@@ -53236,7 +55159,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_permutevar_ps() {
+    fn test_mm512_permutevar_ps() {
         let a = _mm512_set_ps(
             0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
         );
@@ -53249,7 +55172,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_permutevar_ps() {
+    fn test_mm512_mask_permutevar_ps() {
         let a = _mm512_set_ps(
             0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
         );
@@ -53264,7 +55187,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_permutevar_ps() {
+    fn test_mm512_maskz_permutevar_ps() {
         let a = _mm512_set_ps(
             0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
         );
@@ -53279,7 +55202,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_permutevar_ps() {
+    fn test_mm256_mask_permutevar_ps() {
         let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
         let b = _mm256_set1_epi32(0b01);
         let r = _mm256_mask_permutevar_ps(a, 0, a, b);
@@ -53290,7 +55213,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_permutevar_ps() {
+    fn test_mm256_maskz_permutevar_ps() {
         let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
         let b = _mm256_set1_epi32(0b01);
         let r = _mm256_maskz_permutevar_ps(0, a, b);
@@ -53301,7 +55224,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_permutevar_ps() {
+    fn test_mm_mask_permutevar_ps() {
         let a = _mm_set_ps(0., 1., 2., 3.);
         let b = _mm_set1_epi32(0b01);
         let r = _mm_mask_permutevar_ps(a, 0, a, b);
@@ -53312,7 +55235,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_permutevar_ps() {
+    fn test_mm_maskz_permutevar_ps() {
         let a = _mm_set_ps(0., 1., 2., 3.);
         let b = _mm_set1_epi32(0b01);
         let r = _mm_maskz_permutevar_ps(0, a, b);
@@ -53323,7 +55246,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_permutexvar_epi32() {
+    fn test_mm512_permutexvar_epi32() {
         let idx = _mm512_set1_epi32(1);
         let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let r = _mm512_permutexvar_epi32(idx, a);
@@ -53332,7 +55255,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_permutexvar_epi32() {
+    fn test_mm512_mask_permutexvar_epi32() {
         let idx = _mm512_set1_epi32(1);
         let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let r = _mm512_mask_permutexvar_epi32(a, 0, idx, a);
@@ -53343,7 +55266,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_permutexvar_epi32() {
+    fn test_mm512_maskz_permutexvar_epi32() {
         let idx = _mm512_set1_epi32(1);
         let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let r = _mm512_maskz_permutexvar_epi32(0, idx, a);
@@ -53354,7 +55277,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_permutexvar_epi32() {
+    fn test_mm256_permutexvar_epi32() {
         let idx = _mm256_set1_epi32(1);
         let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
         let r = _mm256_permutexvar_epi32(idx, a);
@@ -53363,7 +55286,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_permutexvar_epi32() {
+    fn test_mm256_mask_permutexvar_epi32() {
         let idx = _mm256_set1_epi32(1);
         let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
         let r = _mm256_mask_permutexvar_epi32(a, 0, idx, a);
@@ -53374,7 +55297,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_permutexvar_epi32() {
+    fn test_mm256_maskz_permutexvar_epi32() {
         let idx = _mm256_set1_epi32(1);
         let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
         let r = _mm256_maskz_permutexvar_epi32(0, idx, a);
@@ -53385,7 +55308,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_permutexvar_ps() {
+    fn test_mm512_permutexvar_ps() {
         let idx = _mm512_set1_epi32(1);
         let a = _mm512_set_ps(
             0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
@@ -53396,7 +55319,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_permutexvar_ps() {
+    fn test_mm512_mask_permutexvar_ps() {
         let idx = _mm512_set1_epi32(1);
         let a = _mm512_set_ps(
             0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
@@ -53409,7 +55332,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_permutexvar_ps() {
+    fn test_mm512_maskz_permutexvar_ps() {
         let idx = _mm512_set1_epi32(1);
         let a = _mm512_set_ps(
             0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
@@ -53424,7 +55347,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_permutexvar_ps() {
+    fn test_mm256_permutexvar_ps() {
         let idx = _mm256_set1_epi32(1);
         let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
         let r = _mm256_permutexvar_ps(idx, a);
@@ -53433,7 +55356,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_permutexvar_ps() {
+    fn test_mm256_mask_permutexvar_ps() {
         let idx = _mm256_set1_epi32(1);
         let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
         let r = _mm256_mask_permutexvar_ps(a, 0, idx, a);
@@ -53444,7 +55367,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_permutexvar_ps() {
+    fn test_mm256_maskz_permutexvar_ps() {
         let idx = _mm256_set1_epi32(1);
         let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
         let r = _mm256_maskz_permutexvar_ps(0, idx, a);
@@ -53455,7 +55378,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_permutex2var_epi32() {
+    fn test_mm512_permutex2var_epi32() {
         let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         #[rustfmt::skip]
         let idx = _mm512_set_epi32(
@@ -53473,7 +55396,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_permutex2var_epi32() {
+    fn test_mm512_mask_permutex2var_epi32() {
         let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         #[rustfmt::skip]
         let idx = _mm512_set_epi32(
@@ -53493,7 +55416,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_permutex2var_epi32() {
+    fn test_mm512_maskz_permutex2var_epi32() {
         let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         #[rustfmt::skip]
         let idx = _mm512_set_epi32(
@@ -53511,7 +55434,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask2_permutex2var_epi32() {
+    fn test_mm512_mask2_permutex2var_epi32() {
         let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         #[rustfmt::skip]
         let idx = _mm512_set_epi32(
@@ -53535,7 +55458,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_permutex2var_epi32() {
+    fn test_mm256_permutex2var_epi32() {
         let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
         let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
         let b = _mm256_set1_epi32(100);
@@ -53545,7 +55468,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_permutex2var_epi32() {
+    fn test_mm256_mask_permutex2var_epi32() {
         let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
         let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
         let b = _mm256_set1_epi32(100);
@@ -53557,7 +55480,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_permutex2var_epi32() {
+    fn test_mm256_maskz_permutex2var_epi32() {
         let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
         let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
         let b = _mm256_set1_epi32(100);
@@ -53569,7 +55492,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask2_permutex2var_epi32() {
+    fn test_mm256_mask2_permutex2var_epi32() {
         let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
         let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
         let b = _mm256_set1_epi32(100);
@@ -53581,7 +55504,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_permutex2var_epi32() {
+    fn test_mm_permutex2var_epi32() {
         let a = _mm_set_epi32(0, 1, 2, 3);
         let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
         let b = _mm_set1_epi32(100);
@@ -53591,7 +55514,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_permutex2var_epi32() {
+    fn test_mm_mask_permutex2var_epi32() {
         let a = _mm_set_epi32(0, 1, 2, 3);
         let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
         let b = _mm_set1_epi32(100);
@@ -53603,7 +55526,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_permutex2var_epi32() {
+    fn test_mm_maskz_permutex2var_epi32() {
         let a = _mm_set_epi32(0, 1, 2, 3);
         let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
         let b = _mm_set1_epi32(100);
@@ -53615,7 +55538,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask2_permutex2var_epi32() {
+    fn test_mm_mask2_permutex2var_epi32() {
         let a = _mm_set_epi32(0, 1, 2, 3);
         let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
         let b = _mm_set1_epi32(100);
@@ -53627,7 +55550,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_permutex2var_ps() {
+    fn test_mm512_permutex2var_ps() {
         let a = _mm512_set_ps(
             0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
         );
@@ -53647,7 +55570,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_permutex2var_ps() {
+    fn test_mm512_mask_permutex2var_ps() {
         let a = _mm512_set_ps(
             0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
         );
@@ -53669,7 +55592,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_permutex2var_ps() {
+    fn test_mm512_maskz_permutex2var_ps() {
         let a = _mm512_set_ps(
             0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
         );
@@ -53691,7 +55614,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask2_permutex2var_ps() {
+    fn test_mm512_mask2_permutex2var_ps() {
         let a = _mm512_set_ps(
             0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
         );
@@ -53713,7 +55636,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_permutex2var_ps() {
+    fn test_mm256_permutex2var_ps() {
         let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
         let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
         let b = _mm256_set1_ps(100.);
@@ -53723,7 +55646,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_permutex2var_ps() {
+    fn test_mm256_mask_permutex2var_ps() {
         let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
         let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
         let b = _mm256_set1_ps(100.);
@@ -53735,7 +55658,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_permutex2var_ps() {
+    fn test_mm256_maskz_permutex2var_ps() {
         let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
         let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
         let b = _mm256_set1_ps(100.);
@@ -53747,7 +55670,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask2_permutex2var_ps() {
+    fn test_mm256_mask2_permutex2var_ps() {
         let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
         let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
         let b = _mm256_set1_ps(100.);
@@ -53759,7 +55682,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_permutex2var_ps() {
+    fn test_mm_permutex2var_ps() {
         let a = _mm_set_ps(0., 1., 2., 3.);
         let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
         let b = _mm_set1_ps(100.);
@@ -53769,7 +55692,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_permutex2var_ps() {
+    fn test_mm_mask_permutex2var_ps() {
         let a = _mm_set_ps(0., 1., 2., 3.);
         let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
         let b = _mm_set1_ps(100.);
@@ -53781,7 +55704,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_permutex2var_ps() {
+    fn test_mm_maskz_permutex2var_ps() {
         let a = _mm_set_ps(0., 1., 2., 3.);
         let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
         let b = _mm_set1_ps(100.);
@@ -53793,7 +55716,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask2_permutex2var_ps() {
+    fn test_mm_mask2_permutex2var_ps() {
         let a = _mm_set_ps(0., 1., 2., 3.);
         let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
         let b = _mm_set1_ps(100.);
@@ -53805,7 +55728,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_shuffle_epi32() {
+    const fn test_mm512_shuffle_epi32() {
         let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
         let r = _mm512_shuffle_epi32::<_MM_PERM_AADD>(a);
         let e = _mm512_setr_epi32(8, 8, 1, 1, 16, 16, 9, 9, 8, 8, 1, 1, 16, 16, 9, 9);
@@ -53813,7 +55736,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_shuffle_epi32() {
+    const fn test_mm512_mask_shuffle_epi32() {
         let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
         let r = _mm512_mask_shuffle_epi32::<_MM_PERM_AADD>(a, 0, a);
         assert_eq_m512i(r, a);
@@ -53823,7 +55746,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_shuffle_epi32() {
+    const fn test_mm512_maskz_shuffle_epi32() {
         let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
         let r = _mm512_maskz_shuffle_epi32::<_MM_PERM_AADD>(0, a);
         assert_eq_m512i(r, _mm512_setzero_si512());
@@ -53833,7 +55756,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_shuffle_epi32() {
+    const fn test_mm256_mask_shuffle_epi32() {
         let a = _mm256_set_epi32(1, 4, 5, 8, 9, 12, 13, 16);
         let r = _mm256_mask_shuffle_epi32::<_MM_PERM_AADD>(a, 0, a);
         assert_eq_m256i(r, a);
@@ -53843,7 +55766,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_shuffle_epi32() {
+    const fn test_mm256_maskz_shuffle_epi32() {
         let a = _mm256_set_epi32(1, 4, 5, 8, 9, 12, 13, 16);
         let r = _mm256_maskz_shuffle_epi32::<_MM_PERM_AADD>(0, a);
         assert_eq_m256i(r, _mm256_setzero_si256());
@@ -53853,7 +55776,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_shuffle_epi32() {
+    const fn test_mm_mask_shuffle_epi32() {
         let a = _mm_set_epi32(1, 4, 5, 8);
         let r = _mm_mask_shuffle_epi32::<_MM_PERM_AADD>(a, 0, a);
         assert_eq_m128i(r, a);
@@ -53863,7 +55786,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_shuffle_epi32() {
+    const fn test_mm_maskz_shuffle_epi32() {
         let a = _mm_set_epi32(1, 4, 5, 8);
         let r = _mm_maskz_shuffle_epi32::<_MM_PERM_AADD>(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -53873,7 +55796,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_shuffle_ps() {
+    const fn test_mm512_shuffle_ps() {
         let a = _mm512_setr_ps(
             1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
         );
@@ -53888,7 +55811,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_shuffle_ps() {
+    const fn test_mm512_mask_shuffle_ps() {
         let a = _mm512_setr_ps(
             1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
         );
@@ -53905,7 +55828,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_shuffle_ps() {
+    const fn test_mm512_maskz_shuffle_ps() {
         let a = _mm512_setr_ps(
             1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
         );
@@ -53922,7 +55845,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_shuffle_ps() {
+    const fn test_mm256_mask_shuffle_ps() {
         let a = _mm256_set_ps(1., 4., 5., 8., 9., 12., 13., 16.);
         let b = _mm256_set_ps(2., 3., 6., 7., 10., 11., 14., 15.);
         let r = _mm256_mask_shuffle_ps::<0b11_11_11_11>(a, 0, a, b);
@@ -53933,7 +55856,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_shuffle_ps() {
+    const fn test_mm256_maskz_shuffle_ps() {
         let a = _mm256_set_ps(1., 4., 5., 8., 9., 12., 13., 16.);
         let b = _mm256_set_ps(2., 3., 6., 7., 10., 11., 14., 15.);
         let r = _mm256_maskz_shuffle_ps::<0b11_11_11_11>(0, a, b);
@@ -53944,7 +55867,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_shuffle_ps() {
+    const fn test_mm_mask_shuffle_ps() {
         let a = _mm_set_ps(1., 4., 5., 8.);
         let b = _mm_set_ps(2., 3., 6., 7.);
         let r = _mm_mask_shuffle_ps::<0b11_11_11_11>(a, 0, a, b);
@@ -53955,7 +55878,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_shuffle_ps() {
+    const fn test_mm_maskz_shuffle_ps() {
         let a = _mm_set_ps(1., 4., 5., 8.);
         let b = _mm_set_ps(2., 3., 6., 7.);
         let r = _mm_maskz_shuffle_ps::<0b11_11_11_11>(0, a, b);
@@ -53966,7 +55889,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_shuffle_i32x4() {
+    const fn test_mm512_shuffle_i32x4() {
         let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
         let b = _mm512_setr_epi32(2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15);
         let r = _mm512_shuffle_i32x4::<0b00_00_00_00>(a, b);
@@ -53975,7 +55898,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_shuffle_i32x4() {
+    const fn test_mm512_mask_shuffle_i32x4() {
         let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
         let b = _mm512_setr_epi32(2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15);
         let r = _mm512_mask_shuffle_i32x4::<0b00_00_00_00>(a, 0, a, b);
@@ -53986,7 +55909,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_shuffle_i32x4() {
+    const fn test_mm512_maskz_shuffle_i32x4() {
         let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
         let b = _mm512_setr_epi32(2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15);
         let r = _mm512_maskz_shuffle_i32x4::<0b00_00_00_00>(0, a, b);
@@ -53997,7 +55920,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_shuffle_i32x4() {
+    const fn test_mm256_shuffle_i32x4() {
         let a = _mm256_set_epi32(1, 4, 5, 8, 9, 12, 13, 16);
         let b = _mm256_set_epi32(2, 3, 6, 7, 10, 11, 14, 15);
         let r = _mm256_shuffle_i32x4::<0b00>(a, b);
@@ -54006,7 +55929,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_shuffle_i32x4() {
+    const fn test_mm256_mask_shuffle_i32x4() {
         let a = _mm256_set_epi32(1, 4, 5, 8, 9, 12, 13, 16);
         let b = _mm256_set_epi32(2, 3, 6, 7, 10, 11, 14, 15);
         let r = _mm256_mask_shuffle_i32x4::<0b00>(a, 0, a, b);
@@ -54017,7 +55940,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_shuffle_i32x4() {
+    const fn test_mm256_maskz_shuffle_i32x4() {
         let a = _mm256_set_epi32(1, 4, 5, 8, 9, 12, 13, 16);
         let b = _mm256_set_epi32(2, 3, 6, 7, 10, 11, 14, 15);
         let r = _mm256_maskz_shuffle_i32x4::<0b00>(0, a, b);
@@ -54028,7 +55951,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_shuffle_f32x4() {
+    const fn test_mm512_shuffle_f32x4() {
         let a = _mm512_setr_ps(
             1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
         );
@@ -54043,7 +55966,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_shuffle_f32x4() {
+    const fn test_mm512_mask_shuffle_f32x4() {
         let a = _mm512_setr_ps(
             1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
         );
@@ -54060,7 +55983,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_shuffle_f32x4() {
+    const fn test_mm512_maskz_shuffle_f32x4() {
         let a = _mm512_setr_ps(
             1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
         );
@@ -54077,7 +56000,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_shuffle_f32x4() {
+    const fn test_mm256_shuffle_f32x4() {
         let a = _mm256_set_ps(1., 4., 5., 8., 9., 12., 13., 16.);
         let b = _mm256_set_ps(2., 3., 6., 7., 10., 11., 14., 15.);
         let r = _mm256_shuffle_f32x4::<0b00>(a, b);
@@ -54086,7 +56009,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_shuffle_f32x4() {
+    const fn test_mm256_mask_shuffle_f32x4() {
         let a = _mm256_set_ps(1., 4., 5., 8., 9., 12., 13., 16.);
         let b = _mm256_set_ps(2., 3., 6., 7., 10., 11., 14., 15.);
         let r = _mm256_mask_shuffle_f32x4::<0b00>(a, 0, a, b);
@@ -54097,7 +56020,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_shuffle_f32x4() {
+    const fn test_mm256_maskz_shuffle_f32x4() {
         let a = _mm256_set_ps(1., 4., 5., 8., 9., 12., 13., 16.);
         let b = _mm256_set_ps(2., 3., 6., 7., 10., 11., 14., 15.);
         let r = _mm256_maskz_shuffle_f32x4::<0b00>(0, a, b);
@@ -54108,7 +56031,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_extractf32x4_ps() {
+    const fn test_mm512_extractf32x4_ps() {
         let a = _mm512_setr_ps(
             1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
         );
@@ -54118,7 +56041,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_extractf32x4_ps() {
+    const fn test_mm512_mask_extractf32x4_ps() {
         let a = _mm512_setr_ps(
             1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
         );
@@ -54131,7 +56054,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_extractf32x4_ps() {
+    const fn test_mm512_maskz_extractf32x4_ps() {
         let a = _mm512_setr_ps(
             1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
         );
@@ -54143,7 +56066,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_extractf32x4_ps() {
+    const fn test_mm256_extractf32x4_ps() {
         let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm256_extractf32x4_ps::<1>(a);
         let e = _mm_set_ps(1., 2., 3., 4.);
@@ -54151,7 +56074,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_extractf32x4_ps() {
+    const fn test_mm256_mask_extractf32x4_ps() {
         let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         let src = _mm_set1_ps(100.);
         let r = _mm256_mask_extractf32x4_ps::<1>(src, 0, a);
@@ -54162,7 +56085,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_extractf32x4_ps() {
+    const fn test_mm256_maskz_extractf32x4_ps() {
         let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm256_maskz_extractf32x4_ps::<1>(0, a);
         assert_eq_m128(r, _mm_setzero_ps());
@@ -54172,7 +56095,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_extracti32x4_epi32() {
+    const fn test_mm512_extracti32x4_epi32() {
         let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
         let r = _mm512_extracti32x4_epi32::<1>(a);
         let e = _mm_setr_epi32(5, 6, 7, 8);
@@ -54180,7 +56103,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_extracti32x4_epi32() {
+    const fn test_mm512_mask_extracti32x4_epi32() {
         let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
         let src = _mm_set1_epi32(100);
         let r = _mm512_mask_extracti32x4_epi32::<1>(src, 0, a);
@@ -54191,7 +56114,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm512_maskz_extracti32x4_epi32() {
+    const fn test_mm512_maskz_extracti32x4_epi32() {
         let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
         let r = _mm512_maskz_extracti32x4_epi32::<1>(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -54201,7 +56124,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_extracti32x4_epi32() {
+    const fn test_mm256_extracti32x4_epi32() {
         let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
         let r = _mm256_extracti32x4_epi32::<1>(a);
         let e = _mm_set_epi32(1, 2, 3, 4);
@@ -54209,7 +56132,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_extracti32x4_epi32() {
+    const fn test_mm256_mask_extracti32x4_epi32() {
         let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
         let src = _mm_set1_epi32(100);
         let r = _mm256_mask_extracti32x4_epi32::<1>(src, 0, a);
@@ -54220,7 +56143,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_extracti32x4_epi32() {
+    const fn test_mm256_maskz_extracti32x4_epi32() {
         let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
         let r = _mm256_maskz_extracti32x4_epi32::<1>(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -54230,7 +56153,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_moveldup_ps() {
+    const fn test_mm512_moveldup_ps() {
         let a = _mm512_setr_ps(
             1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
         );
@@ -54242,7 +56165,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_moveldup_ps() {
+    const fn test_mm512_mask_moveldup_ps() {
         let a = _mm512_setr_ps(
             1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
         );
@@ -54256,7 +56179,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_moveldup_ps() {
+    const fn test_mm512_maskz_moveldup_ps() {
         let a = _mm512_setr_ps(
             1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
         );
@@ -54270,7 +56193,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_moveldup_ps() {
+    const fn test_mm256_mask_moveldup_ps() {
         let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm256_mask_moveldup_ps(a, 0, a);
         assert_eq_m256(r, a);
@@ -54280,7 +56203,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_moveldup_ps() {
+    const fn test_mm256_maskz_moveldup_ps() {
         let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm256_maskz_moveldup_ps(0, a);
         assert_eq_m256(r, _mm256_setzero_ps());
@@ -54290,7 +56213,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_moveldup_ps() {
+    const fn test_mm_mask_moveldup_ps() {
         let a = _mm_set_ps(1., 2., 3., 4.);
         let r = _mm_mask_moveldup_ps(a, 0, a);
         assert_eq_m128(r, a);
@@ -54300,7 +56223,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_moveldup_ps() {
+    const fn test_mm_maskz_moveldup_ps() {
         let a = _mm_set_ps(1., 2., 3., 4.);
         let r = _mm_maskz_moveldup_ps(0, a);
         assert_eq_m128(r, _mm_setzero_ps());
@@ -54310,7 +56233,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_movehdup_ps() {
+    const fn test_mm512_movehdup_ps() {
         let a = _mm512_setr_ps(
             1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
         );
@@ -54322,7 +56245,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_movehdup_ps() {
+    const fn test_mm512_mask_movehdup_ps() {
         let a = _mm512_setr_ps(
             1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
         );
@@ -54336,7 +56259,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_movehdup_ps() {
+    const fn test_mm512_maskz_movehdup_ps() {
         let a = _mm512_setr_ps(
             1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
         );
@@ -54350,7 +56273,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_movehdup_ps() {
+    const fn test_mm256_mask_movehdup_ps() {
         let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm256_mask_movehdup_ps(a, 0, a);
         assert_eq_m256(r, a);
@@ -54360,7 +56283,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_movehdup_ps() {
+    const fn test_mm256_maskz_movehdup_ps() {
         let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm256_maskz_movehdup_ps(0, a);
         assert_eq_m256(r, _mm256_setzero_ps());
@@ -54370,7 +56293,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_movehdup_ps() {
+    const fn test_mm_mask_movehdup_ps() {
         let a = _mm_set_ps(1., 2., 3., 4.);
         let r = _mm_mask_movehdup_ps(a, 0, a);
         assert_eq_m128(r, a);
@@ -54380,7 +56303,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_movehdup_ps() {
+    const fn test_mm_maskz_movehdup_ps() {
         let a = _mm_set_ps(1., 2., 3., 4.);
         let r = _mm_maskz_movehdup_ps(0, a);
         assert_eq_m128(r, _mm_setzero_ps());
@@ -54390,7 +56313,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_inserti32x4() {
+    const fn test_mm512_inserti32x4() {
         let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
         let b = _mm_setr_epi32(17, 18, 19, 20);
         let r = _mm512_inserti32x4::<0>(a, b);
@@ -54399,7 +56322,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_inserti32x4() {
+    const fn test_mm512_mask_inserti32x4() {
         let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
         let b = _mm_setr_epi32(17, 18, 19, 20);
         let r = _mm512_mask_inserti32x4::<0>(a, 0, a, b);
@@ -54410,7 +56333,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_inserti32x4() {
+    const fn test_mm512_maskz_inserti32x4() {
         let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
         let b = _mm_setr_epi32(17, 18, 19, 20);
         let r = _mm512_maskz_inserti32x4::<0>(0, a, b);
@@ -54421,7 +56344,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_inserti32x4() {
+    const fn test_mm256_inserti32x4() {
         let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
         let b = _mm_set_epi32(17, 18, 19, 20);
         let r = _mm256_inserti32x4::<1>(a, b);
@@ -54430,7 +56353,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_inserti32x4() {
+    const fn test_mm256_mask_inserti32x4() {
         let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
         let b = _mm_set_epi32(17, 18, 19, 20);
         let r = _mm256_mask_inserti32x4::<0>(a, 0, a, b);
@@ -54441,7 +56364,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_inserti32x4() {
+    const fn test_mm256_maskz_inserti32x4() {
         let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
         let b = _mm_set_epi32(17, 18, 19, 20);
         let r = _mm256_maskz_inserti32x4::<0>(0, a, b);
@@ -54452,7 +56375,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_insertf32x4() {
+    const fn test_mm512_insertf32x4() {
         let a = _mm512_setr_ps(
             1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
         );
@@ -54465,7 +56388,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_insertf32x4() {
+    const fn test_mm512_mask_insertf32x4() {
         let a = _mm512_setr_ps(
             1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
         );
@@ -54480,7 +56403,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_insertf32x4() {
+    const fn test_mm512_maskz_insertf32x4() {
         let a = _mm512_setr_ps(
             1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
         );
@@ -54495,7 +56418,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_insertf32x4() {
+    const fn test_mm256_insertf32x4() {
         let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         let b = _mm_set_ps(17., 18., 19., 20.);
         let r = _mm256_insertf32x4::<1>(a, b);
@@ -54504,7 +56427,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_insertf32x4() {
+    const fn test_mm256_mask_insertf32x4() {
         let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         let b = _mm_set_ps(17., 18., 19., 20.);
         let r = _mm256_mask_insertf32x4::<0>(a, 0, a, b);
@@ -54515,7 +56438,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_insertf32x4() {
+    const fn test_mm256_maskz_insertf32x4() {
         let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         let b = _mm_set_ps(17., 18., 19., 20.);
         let r = _mm256_maskz_insertf32x4::<0>(0, a, b);
@@ -54526,21 +56449,21 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_castps128_ps512() {
+    const fn test_mm512_castps128_ps512() {
         let a = _mm_setr_ps(17., 18., 19., 20.);
         let r = _mm512_castps128_ps512(a);
         assert_eq_m128(_mm512_castps512_ps128(r), a);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_castps256_ps512() {
+    const fn test_mm512_castps256_ps512() {
         let a = _mm256_setr_ps(17., 18., 19., 20., 21., 22., 23., 24.);
         let r = _mm512_castps256_ps512(a);
         assert_eq_m256(_mm512_castps512_ps256(r), a);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_zextps128_ps512() {
+    const fn test_mm512_zextps128_ps512() {
         let a = _mm_setr_ps(17., 18., 19., 20.);
         let r = _mm512_zextps128_ps512(a);
         let e = _mm512_setr_ps(
@@ -54550,7 +56473,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_zextps256_ps512() {
+    const fn test_mm512_zextps256_ps512() {
         let a = _mm256_setr_ps(17., 18., 19., 20., 21., 22., 23., 24.);
         let r = _mm512_zextps256_ps512(a);
         let e = _mm512_setr_ps(
@@ -54560,7 +56483,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_castps512_ps128() {
+    const fn test_mm512_castps512_ps128() {
         let a = _mm512_setr_ps(
             17., 18., 19., 20., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
         );
@@ -54570,7 +56493,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_castps512_ps256() {
+    const fn test_mm512_castps512_ps256() {
         let a = _mm512_setr_ps(
             17., 18., 19., 20., 21., 22., 23., 24., -1., -1., -1., -1., -1., -1., -1., -1.,
         );
@@ -54580,7 +56503,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_castps_pd() {
+    const fn test_mm512_castps_pd() {
         let a = _mm512_set1_ps(1.);
         let r = _mm512_castps_pd(a);
         let e = _mm512_set1_pd(0.007812501848093234);
@@ -54588,7 +56511,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_castps_si512() {
+    const fn test_mm512_castps_si512() {
         let a = _mm512_set1_ps(1.);
         let r = _mm512_castps_si512(a);
         let e = _mm512_set1_epi32(1065353216);
@@ -54596,7 +56519,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_broadcastd_epi32() {
+    const fn test_mm512_broadcastd_epi32() {
         let a = _mm_set_epi32(17, 18, 19, 20);
         let r = _mm512_broadcastd_epi32(a);
         let e = _mm512_set1_epi32(20);
@@ -54604,7 +56527,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_broadcastd_epi32() {
+    const fn test_mm512_mask_broadcastd_epi32() {
         let src = _mm512_set1_epi32(20);
         let a = _mm_set_epi32(17, 18, 19, 20);
         let r = _mm512_mask_broadcastd_epi32(src, 0, a);
@@ -54615,7 +56538,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_broadcastd_epi32() {
+    const fn test_mm512_maskz_broadcastd_epi32() {
         let a = _mm_set_epi32(17, 18, 19, 20);
         let r = _mm512_maskz_broadcastd_epi32(0, a);
         assert_eq_m512i(r, _mm512_setzero_si512());
@@ -54625,7 +56548,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_broadcastd_epi32() {
+    const fn test_mm256_mask_broadcastd_epi32() {
         let src = _mm256_set1_epi32(20);
         let a = _mm_set_epi32(17, 18, 19, 20);
         let r = _mm256_mask_broadcastd_epi32(src, 0, a);
@@ -54636,7 +56559,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_broadcastd_epi32() {
+    const fn test_mm256_maskz_broadcastd_epi32() {
         let a = _mm_set_epi32(17, 18, 19, 20);
         let r = _mm256_maskz_broadcastd_epi32(0, a);
         assert_eq_m256i(r, _mm256_setzero_si256());
@@ -54646,7 +56569,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_broadcastd_epi32() {
+    const fn test_mm_mask_broadcastd_epi32() {
         let src = _mm_set1_epi32(20);
         let a = _mm_set_epi32(17, 18, 19, 20);
         let r = _mm_mask_broadcastd_epi32(src, 0, a);
@@ -54657,7 +56580,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_broadcastd_epi32() {
+    const fn test_mm_maskz_broadcastd_epi32() {
         let a = _mm_set_epi32(17, 18, 19, 20);
         let r = _mm_maskz_broadcastd_epi32(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -54667,7 +56590,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_broadcastss_ps() {
+    const fn test_mm512_broadcastss_ps() {
         let a = _mm_set_ps(17., 18., 19., 20.);
         let r = _mm512_broadcastss_ps(a);
         let e = _mm512_set1_ps(20.);
@@ -54675,7 +56598,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_broadcastss_ps() {
+    const fn test_mm512_mask_broadcastss_ps() {
         let src = _mm512_set1_ps(20.);
         let a = _mm_set_ps(17., 18., 19., 20.);
         let r = _mm512_mask_broadcastss_ps(src, 0, a);
@@ -54686,7 +56609,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_broadcastss_ps() {
+    const fn test_mm512_maskz_broadcastss_ps() {
         let a = _mm_set_ps(17., 18., 19., 20.);
         let r = _mm512_maskz_broadcastss_ps(0, a);
         assert_eq_m512(r, _mm512_setzero_ps());
@@ -54698,7 +56621,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_broadcastss_ps() {
+    const fn test_mm256_mask_broadcastss_ps() {
         let src = _mm256_set1_ps(20.);
         let a = _mm_set_ps(17., 18., 19., 20.);
         let r = _mm256_mask_broadcastss_ps(src, 0, a);
@@ -54709,7 +56632,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_broadcastss_ps() {
+    const fn test_mm256_maskz_broadcastss_ps() {
         let a = _mm_set_ps(17., 18., 19., 20.);
         let r = _mm256_maskz_broadcastss_ps(0, a);
         assert_eq_m256(r, _mm256_setzero_ps());
@@ -54719,7 +56642,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_broadcastss_ps() {
+    const fn test_mm_mask_broadcastss_ps() {
         let src = _mm_set1_ps(20.);
         let a = _mm_set_ps(17., 18., 19., 20.);
         let r = _mm_mask_broadcastss_ps(src, 0, a);
@@ -54730,7 +56653,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_broadcastss_ps() {
+    const fn test_mm_maskz_broadcastss_ps() {
         let a = _mm_set_ps(17., 18., 19., 20.);
         let r = _mm_maskz_broadcastss_ps(0, a);
         assert_eq_m128(r, _mm_setzero_ps());
@@ -54740,7 +56663,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_broadcast_i32x4() {
+    const fn test_mm512_broadcast_i32x4() {
         let a = _mm_set_epi32(17, 18, 19, 20);
         let r = _mm512_broadcast_i32x4(a);
         let e = _mm512_set_epi32(
@@ -54750,7 +56673,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_broadcast_i32x4() {
+    const fn test_mm512_mask_broadcast_i32x4() {
         let src = _mm512_set1_epi32(20);
         let a = _mm_set_epi32(17, 18, 19, 20);
         let r = _mm512_mask_broadcast_i32x4(src, 0, a);
@@ -54763,7 +56686,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_broadcast_i32x4() {
+    const fn test_mm512_maskz_broadcast_i32x4() {
         let a = _mm_set_epi32(17, 18, 19, 20);
         let r = _mm512_maskz_broadcast_i32x4(0, a);
         assert_eq_m512i(r, _mm512_setzero_si512());
@@ -54773,7 +56696,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_broadcast_i32x4() {
+    const fn test_mm256_broadcast_i32x4() {
         let a = _mm_set_epi32(17, 18, 19, 20);
         let r = _mm256_broadcast_i32x4(a);
         let e = _mm256_set_epi32(17, 18, 19, 20, 17, 18, 19, 20);
@@ -54781,7 +56704,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_broadcast_i32x4() {
+    const fn test_mm256_mask_broadcast_i32x4() {
         let src = _mm256_set1_epi32(20);
         let a = _mm_set_epi32(17, 18, 19, 20);
         let r = _mm256_mask_broadcast_i32x4(src, 0, a);
@@ -54792,7 +56715,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_broadcast_i32x4() {
+    const fn test_mm256_maskz_broadcast_i32x4() {
         let a = _mm_set_epi32(17, 18, 19, 20);
         let r = _mm256_maskz_broadcast_i32x4(0, a);
         assert_eq_m256i(r, _mm256_setzero_si256());
@@ -54802,7 +56725,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_broadcast_f32x4() {
+    const fn test_mm512_broadcast_f32x4() {
         let a = _mm_set_ps(17., 18., 19., 20.);
         let r = _mm512_broadcast_f32x4(a);
         let e = _mm512_set_ps(
@@ -54812,7 +56735,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_broadcast_f32x4() {
+    const fn test_mm512_mask_broadcast_f32x4() {
         let src = _mm512_set1_ps(20.);
         let a = _mm_set_ps(17., 18., 19., 20.);
         let r = _mm512_mask_broadcast_f32x4(src, 0, a);
@@ -54825,7 +56748,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_broadcast_f32x4() {
+    const fn test_mm512_maskz_broadcast_f32x4() {
         let a = _mm_set_ps(17., 18., 19., 20.);
         let r = _mm512_maskz_broadcast_f32x4(0, a);
         assert_eq_m512(r, _mm512_setzero_ps());
@@ -54837,7 +56760,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_broadcast_f32x4() {
+    const fn test_mm256_broadcast_f32x4() {
         let a = _mm_set_ps(17., 18., 19., 20.);
         let r = _mm256_broadcast_f32x4(a);
         let e = _mm256_set_ps(17., 18., 19., 20., 17., 18., 19., 20.);
@@ -54845,7 +56768,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_broadcast_f32x4() {
+    const fn test_mm256_mask_broadcast_f32x4() {
         let src = _mm256_set1_ps(20.);
         let a = _mm_set_ps(17., 18., 19., 20.);
         let r = _mm256_mask_broadcast_f32x4(src, 0, a);
@@ -54856,7 +56779,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_broadcast_f32x4() {
+    const fn test_mm256_maskz_broadcast_f32x4() {
         let a = _mm_set_ps(17., 18., 19., 20.);
         let r = _mm256_maskz_broadcast_f32x4(0, a);
         assert_eq_m256(r, _mm256_setzero_ps());
@@ -54866,7 +56789,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_blend_epi32() {
+    const fn test_mm512_mask_blend_epi32() {
         let a = _mm512_set1_epi32(1);
         let b = _mm512_set1_epi32(2);
         let r = _mm512_mask_blend_epi32(0b11111111_00000000, a, b);
@@ -54875,7 +56798,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_blend_epi32() {
+    const fn test_mm256_mask_blend_epi32() {
         let a = _mm256_set1_epi32(1);
         let b = _mm256_set1_epi32(2);
         let r = _mm256_mask_blend_epi32(0b11111111, a, b);
@@ -54884,7 +56807,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_blend_epi32() {
+    const fn test_mm_mask_blend_epi32() {
         let a = _mm_set1_epi32(1);
         let b = _mm_set1_epi32(2);
         let r = _mm_mask_blend_epi32(0b00001111, a, b);
@@ -54893,7 +56816,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_blend_ps() {
+    const fn test_mm512_mask_blend_ps() {
         let a = _mm512_set1_ps(1.);
         let b = _mm512_set1_ps(2.);
         let r = _mm512_mask_blend_ps(0b11111111_00000000, a, b);
@@ -54904,7 +56827,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_blend_ps() {
+    const fn test_mm256_mask_blend_ps() {
         let a = _mm256_set1_ps(1.);
         let b = _mm256_set1_ps(2.);
         let r = _mm256_mask_blend_ps(0b11111111, a, b);
@@ -54913,7 +56836,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_blend_ps() {
+    const fn test_mm_mask_blend_ps() {
         let a = _mm_set1_ps(1.);
         let b = _mm_set1_ps(2.);
         let r = _mm_mask_blend_ps(0b00001111, a, b);
@@ -54922,7 +56845,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_unpackhi_epi32() {
+    const fn test_mm512_unpackhi_epi32() {
         let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
         let b = _mm512_set_epi32(
             17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
@@ -54933,7 +56856,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_unpackhi_epi32() {
+    const fn test_mm512_mask_unpackhi_epi32() {
         let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
         let b = _mm512_set_epi32(
             17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
@@ -54946,7 +56869,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_unpackhi_epi32() {
+    const fn test_mm512_maskz_unpackhi_epi32() {
         let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
         let b = _mm512_set_epi32(
             17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
@@ -54959,7 +56882,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_unpackhi_epi32() {
+    const fn test_mm256_mask_unpackhi_epi32() {
         let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
         let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24);
         let r = _mm256_mask_unpackhi_epi32(a, 0, a, b);
@@ -54970,7 +56893,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_unpackhi_epi32() {
+    const fn test_mm256_maskz_unpackhi_epi32() {
         let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
         let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24);
         let r = _mm256_maskz_unpackhi_epi32(0, a, b);
@@ -54981,7 +56904,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_unpackhi_epi32() {
+    const fn test_mm_mask_unpackhi_epi32() {
         let a = _mm_set_epi32(1, 2, 3, 4);
         let b = _mm_set_epi32(17, 18, 19, 20);
         let r = _mm_mask_unpackhi_epi32(a, 0, a, b);
@@ -54992,7 +56915,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_unpackhi_epi32() {
+    const fn test_mm_maskz_unpackhi_epi32() {
         let a = _mm_set_epi32(1, 2, 3, 4);
         let b = _mm_set_epi32(17, 18, 19, 20);
         let r = _mm_maskz_unpackhi_epi32(0, a, b);
@@ -55003,7 +56926,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_unpackhi_ps() {
+    const fn test_mm512_unpackhi_ps() {
         let a = _mm512_set_ps(
             1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
         );
@@ -55018,7 +56941,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_unpackhi_ps() {
+    const fn test_mm512_mask_unpackhi_ps() {
         let a = _mm512_set_ps(
             1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
         );
@@ -55035,7 +56958,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_unpackhi_ps() {
+    const fn test_mm512_maskz_unpackhi_ps() {
         let a = _mm512_set_ps(
             1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
         );
@@ -55052,7 +56975,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_unpackhi_ps() {
+    const fn test_mm256_mask_unpackhi_ps() {
         let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.);
         let r = _mm256_mask_unpackhi_ps(a, 0, a, b);
@@ -55063,7 +56986,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_unpackhi_ps() {
+    const fn test_mm256_maskz_unpackhi_ps() {
         let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.);
         let r = _mm256_maskz_unpackhi_ps(0, a, b);
@@ -55074,7 +56997,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_unpackhi_ps() {
+    const fn test_mm_mask_unpackhi_ps() {
         let a = _mm_set_ps(1., 2., 3., 4.);
         let b = _mm_set_ps(17., 18., 19., 20.);
         let r = _mm_mask_unpackhi_ps(a, 0, a, b);
@@ -55085,7 +57008,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_unpackhi_ps() {
+    const fn test_mm_maskz_unpackhi_ps() {
         let a = _mm_set_ps(1., 2., 3., 4.);
         let b = _mm_set_ps(17., 18., 19., 20.);
         let r = _mm_maskz_unpackhi_ps(0, a, b);
@@ -55096,7 +57019,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_unpacklo_epi32() {
+    const fn test_mm512_unpacklo_epi32() {
         let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
         let b = _mm512_set_epi32(
             17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
@@ -55107,7 +57030,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_unpacklo_epi32() {
+    const fn test_mm512_mask_unpacklo_epi32() {
         let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
         let b = _mm512_set_epi32(
             17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
@@ -55120,7 +57043,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_unpacklo_epi32() {
+    const fn test_mm512_maskz_unpacklo_epi32() {
         let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
         let b = _mm512_set_epi32(
             17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
@@ -55133,7 +57056,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_unpacklo_epi32() {
+    const fn test_mm256_mask_unpacklo_epi32() {
         let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
         let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24);
         let r = _mm256_mask_unpacklo_epi32(a, 0, a, b);
@@ -55144,7 +57067,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_unpacklo_epi32() {
+    const fn test_mm256_maskz_unpacklo_epi32() {
         let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
         let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24);
         let r = _mm256_maskz_unpacklo_epi32(0, a, b);
@@ -55155,7 +57078,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_unpacklo_epi32() {
+    const fn test_mm_mask_unpacklo_epi32() {
         let a = _mm_set_epi32(1, 2, 3, 4);
         let b = _mm_set_epi32(17, 18, 19, 20);
         let r = _mm_mask_unpacklo_epi32(a, 0, a, b);
@@ -55166,7 +57089,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_unpacklo_epi32() {
+    const fn test_mm_maskz_unpacklo_epi32() {
         let a = _mm_set_epi32(1, 2, 3, 4);
         let b = _mm_set_epi32(17, 18, 19, 20);
         let r = _mm_maskz_unpacklo_epi32(0, a, b);
@@ -55177,7 +57100,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_unpacklo_ps() {
+    const fn test_mm512_unpacklo_ps() {
         let a = _mm512_set_ps(
             1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
         );
@@ -55192,7 +57115,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_unpacklo_ps() {
+    const fn test_mm512_mask_unpacklo_ps() {
         let a = _mm512_set_ps(
             1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
         );
@@ -55209,7 +57132,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_unpacklo_ps() {
+    const fn test_mm512_maskz_unpacklo_ps() {
         let a = _mm512_set_ps(
             1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
         );
@@ -55226,7 +57149,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_unpacklo_ps() {
+    const fn test_mm256_mask_unpacklo_ps() {
         let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.);
         let r = _mm256_mask_unpacklo_ps(a, 0, a, b);
@@ -55237,7 +57160,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_unpacklo_ps() {
+    const fn test_mm256_maskz_unpacklo_ps() {
         let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.);
         let r = _mm256_maskz_unpacklo_ps(0, a, b);
@@ -55248,7 +57171,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_unpacklo_ps() {
+    const fn test_mm_mask_unpacklo_ps() {
         let a = _mm_set_ps(1., 2., 3., 4.);
         let b = _mm_set_ps(17., 18., 19., 20.);
         let r = _mm_mask_unpacklo_ps(a, 0, a, b);
@@ -55259,7 +57182,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_unpacklo_ps() {
+    const fn test_mm_maskz_unpacklo_ps() {
         let a = _mm_set_ps(1., 2., 3., 4.);
         let b = _mm_set_ps(17., 18., 19., 20.);
         let r = _mm_maskz_unpacklo_ps(0, a, b);
@@ -55270,7 +57193,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_alignr_epi32() {
+    const fn test_mm512_alignr_epi32() {
         let a = _mm512_set_epi32(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
         let b = _mm512_set_epi32(
             32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17,
@@ -55287,7 +57210,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_alignr_epi32() {
+    const fn test_mm512_mask_alignr_epi32() {
         let a = _mm512_set_epi32(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
         let b = _mm512_set_epi32(
             32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17,
@@ -55302,7 +57225,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_alignr_epi32() {
+    const fn test_mm512_maskz_alignr_epi32() {
         let a = _mm512_set_epi32(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
         let b = _mm512_set_epi32(
             32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17,
@@ -55315,7 +57238,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_alignr_epi32() {
+    const fn test_mm256_alignr_epi32() {
         let a = _mm256_set_epi32(8, 7, 6, 5, 4, 3, 2, 1);
         let b = _mm256_set_epi32(16, 15, 14, 13, 12, 11, 10, 9);
         let r = _mm256_alignr_epi32::<0>(a, b);
@@ -55326,7 +57249,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_alignr_epi32() {
+    const fn test_mm256_mask_alignr_epi32() {
         let a = _mm256_set_epi32(8, 7, 6, 5, 4, 3, 2, 1);
         let b = _mm256_set_epi32(16, 15, 14, 13, 12, 11, 10, 9);
         let r = _mm256_mask_alignr_epi32::<1>(a, 0, a, b);
@@ -55337,7 +57260,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_alignr_epi32() {
+    const fn test_mm256_maskz_alignr_epi32() {
         let a = _mm256_set_epi32(8, 7, 6, 5, 4, 3, 2, 1);
         let b = _mm256_set_epi32(16, 15, 14, 13, 12, 11, 10, 9);
         let r = _mm256_maskz_alignr_epi32::<1>(0, a, b);
@@ -55348,7 +57271,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_alignr_epi32() {
+    const fn test_mm_alignr_epi32() {
         let a = _mm_set_epi32(4, 3, 2, 1);
         let b = _mm_set_epi32(8, 7, 6, 5);
         let r = _mm_alignr_epi32::<0>(a, b);
@@ -55359,7 +57282,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_alignr_epi32() {
+    const fn test_mm_mask_alignr_epi32() {
         let a = _mm_set_epi32(4, 3, 2, 1);
         let b = _mm_set_epi32(8, 7, 6, 5);
         let r = _mm_mask_alignr_epi32::<1>(a, 0, a, b);
@@ -55370,7 +57293,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_alignr_epi32() {
+    const fn test_mm_maskz_alignr_epi32() {
         let a = _mm_set_epi32(4, 3, 2, 1);
         let b = _mm_set_epi32(8, 7, 6, 5);
         let r = _mm_maskz_alignr_epi32::<1>(0, a, b);
@@ -55381,7 +57304,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_and_epi32() {
+    const fn test_mm512_and_epi32() {
         #[rustfmt::skip]
         let a = _mm512_set_epi32(
             1 << 1 | 1 << 2, 0, 0, 0,
@@ -55402,7 +57325,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_and_epi32() {
+    const fn test_mm512_mask_and_epi32() {
         #[rustfmt::skip]
         let a = _mm512_set_epi32(
             1 << 1 | 1 << 2, 0, 0, 0,
@@ -55431,7 +57354,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_and_epi32() {
+    const fn test_mm512_maskz_and_epi32() {
         #[rustfmt::skip]
         let a = _mm512_set_epi32(
             1 << 1 | 1 << 2, 0, 0, 0,
@@ -55454,7 +57377,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_and_epi32() {
+    const fn test_mm256_mask_and_epi32() {
         let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
         let b = _mm256_set1_epi32(1 << 1);
         let r = _mm256_mask_and_epi32(a, 0, a, b);
@@ -55465,7 +57388,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_and_epi32() {
+    const fn test_mm256_maskz_and_epi32() {
         let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
         let b = _mm256_set1_epi32(1 << 1);
         let r = _mm256_maskz_and_epi32(0, a, b);
@@ -55476,7 +57399,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_and_epi32() {
+    const fn test_mm_mask_and_epi32() {
         let a = _mm_set1_epi32(1 << 1 | 1 << 2);
         let b = _mm_set1_epi32(1 << 1);
         let r = _mm_mask_and_epi32(a, 0, a, b);
@@ -55487,7 +57410,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_and_epi32() {
+    const fn test_mm_maskz_and_epi32() {
         let a = _mm_set1_epi32(1 << 1 | 1 << 2);
         let b = _mm_set1_epi32(1 << 1);
         let r = _mm_maskz_and_epi32(0, a, b);
@@ -55498,7 +57421,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_and_si512() {
+    const fn test_mm512_and_si512() {
         #[rustfmt::skip]
         let a = _mm512_set_epi32(
             1 << 1 | 1 << 2, 0, 0, 0,
@@ -55513,13 +57436,13 @@ mod tests {
             0, 0, 0, 0,
             0, 0, 0, 1 << 3 | 1 << 4,
         );
-        let r = _mm512_and_epi32(a, b);
+        let r = _mm512_and_si512(a, b);
         let e = _mm512_set_epi32(1 << 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 3);
         assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_or_epi32() {
+    const fn test_mm512_or_epi32() {
         #[rustfmt::skip]
         let a = _mm512_set_epi32(
             1 << 1 | 1 << 2, 0, 0, 0,
@@ -55546,7 +57469,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_or_epi32() {
+    const fn test_mm512_mask_or_epi32() {
         #[rustfmt::skip]
         let a = _mm512_set_epi32(
             1 << 1 | 1 << 2, 0, 0, 0,
@@ -55575,7 +57498,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_or_epi32() {
+    const fn test_mm512_maskz_or_epi32() {
         #[rustfmt::skip]
         let a = _mm512_set_epi32(
             1 << 1 | 1 << 2, 0, 0, 0,
@@ -55604,7 +57527,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_or_epi32() {
+    const fn test_mm256_or_epi32() {
         let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
         let b = _mm256_set1_epi32(1 << 1);
         let r = _mm256_or_epi32(a, b);
@@ -55613,7 +57536,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_or_epi32() {
+    const fn test_mm256_mask_or_epi32() {
         let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
         let b = _mm256_set1_epi32(1 << 1);
         let r = _mm256_mask_or_epi32(a, 0, a, b);
@@ -55624,7 +57547,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_or_epi32() {
+    const fn test_mm256_maskz_or_epi32() {
         let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
         let b = _mm256_set1_epi32(1 << 1);
         let r = _mm256_maskz_or_epi32(0, a, b);
@@ -55635,7 +57558,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_or_epi32() {
+    const fn test_mm_or_epi32() {
         let a = _mm_set1_epi32(1 << 1 | 1 << 2);
         let b = _mm_set1_epi32(1 << 1);
         let r = _mm_or_epi32(a, b);
@@ -55644,7 +57567,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_or_epi32() {
+    const fn test_mm_mask_or_epi32() {
         let a = _mm_set1_epi32(1 << 1 | 1 << 2);
         let b = _mm_set1_epi32(1 << 1);
         let r = _mm_mask_or_epi32(a, 0, a, b);
@@ -55655,7 +57578,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_or_epi32() {
+    const fn test_mm_maskz_or_epi32() {
         let a = _mm_set1_epi32(1 << 1 | 1 << 2);
         let b = _mm_set1_epi32(1 << 1);
         let r = _mm_maskz_or_epi32(0, a, b);
@@ -55666,7 +57589,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_or_si512() {
+    const fn test_mm512_or_si512() {
         #[rustfmt::skip]
         let a = _mm512_set_epi32(
             1 << 1 | 1 << 2, 0, 0, 0,
@@ -55681,7 +57604,7 @@ mod tests {
             0, 0, 0, 0,
             0, 0, 0, 1 << 3 | 1 << 4,
         );
-        let r = _mm512_or_epi32(a, b);
+        let r = _mm512_or_si512(a, b);
         #[rustfmt::skip]
         let e = _mm512_set_epi32(
             1 << 1 | 1 << 2, 0, 0, 0,
@@ -55693,7 +57616,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_xor_epi32() {
+    const fn test_mm512_xor_epi32() {
         #[rustfmt::skip]
         let a = _mm512_set_epi32(
             1 << 1 | 1 << 2, 0, 0, 0,
@@ -55720,7 +57643,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_xor_epi32() {
+    const fn test_mm512_mask_xor_epi32() {
         #[rustfmt::skip]
         let a = _mm512_set_epi32(
             1 << 1 | 1 << 2, 0, 0, 0,
@@ -55749,7 +57672,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_xor_epi32() {
+    const fn test_mm512_maskz_xor_epi32() {
         #[rustfmt::skip]
         let a = _mm512_set_epi32(
             1 << 1 | 1 << 2, 0, 0, 0,
@@ -55772,7 +57695,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_xor_epi32() {
+    const fn test_mm256_xor_epi32() {
         let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
         let b = _mm256_set1_epi32(1 << 1);
         let r = _mm256_xor_epi32(a, b);
@@ -55781,7 +57704,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_xor_epi32() {
+    const fn test_mm256_mask_xor_epi32() {
         let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
         let b = _mm256_set1_epi32(1 << 1);
         let r = _mm256_mask_xor_epi32(a, 0, a, b);
@@ -55792,7 +57715,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_xor_epi32() {
+    const fn test_mm256_maskz_xor_epi32() {
         let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
         let b = _mm256_set1_epi32(1 << 1);
         let r = _mm256_maskz_xor_epi32(0, a, b);
@@ -55803,7 +57726,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_xor_epi32() {
+    const fn test_mm_xor_epi32() {
         let a = _mm_set1_epi32(1 << 1 | 1 << 2);
         let b = _mm_set1_epi32(1 << 1);
         let r = _mm_xor_epi32(a, b);
@@ -55812,7 +57735,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_xor_epi32() {
+    const fn test_mm_mask_xor_epi32() {
         let a = _mm_set1_epi32(1 << 1 | 1 << 2);
         let b = _mm_set1_epi32(1 << 1);
         let r = _mm_mask_xor_epi32(a, 0, a, b);
@@ -55823,7 +57746,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_xor_epi32() {
+    const fn test_mm_maskz_xor_epi32() {
         let a = _mm_set1_epi32(1 << 1 | 1 << 2);
         let b = _mm_set1_epi32(1 << 1);
         let r = _mm_maskz_xor_epi32(0, a, b);
@@ -55834,7 +57757,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_xor_si512() {
+    const fn test_mm512_xor_si512() {
         #[rustfmt::skip]
         let a = _mm512_set_epi32(
             1 << 1 | 1 << 2, 0, 0, 0,
@@ -55849,7 +57772,7 @@ mod tests {
             0, 0, 0, 0,
             0, 0, 0, 1 << 3 | 1 << 4,
         );
-        let r = _mm512_xor_epi32(a, b);
+        let r = _mm512_xor_si512(a, b);
         #[rustfmt::skip]
         let e = _mm512_set_epi32(
             1 << 2, 0, 0, 0,
@@ -55861,7 +57784,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_andnot_epi32() {
+    const fn test_mm512_andnot_epi32() {
         let a = _mm512_set1_epi32(0);
         let b = _mm512_set1_epi32(1 << 3 | 1 << 4);
         let r = _mm512_andnot_epi32(a, b);
@@ -55870,7 +57793,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_andnot_epi32() {
+    const fn test_mm512_mask_andnot_epi32() {
         let a = _mm512_set1_epi32(1 << 1 | 1 << 2);
         let b = _mm512_set1_epi32(1 << 3 | 1 << 4);
         let r = _mm512_mask_andnot_epi32(a, 0, a, b);
@@ -55881,7 +57804,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_andnot_epi32() {
+    const fn test_mm512_maskz_andnot_epi32() {
         let a = _mm512_set1_epi32(1 << 1 | 1 << 2);
         let b = _mm512_set1_epi32(1 << 3 | 1 << 4);
         let r = _mm512_maskz_andnot_epi32(0, a, b);
@@ -55898,7 +57821,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_andnot_epi32() {
+    const fn test_mm256_mask_andnot_epi32() {
         let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
         let b = _mm256_set1_epi32(1 << 3 | 1 << 4);
         let r = _mm256_mask_andnot_epi32(a, 0, a, b);
@@ -55909,7 +57832,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_andnot_epi32() {
+    const fn test_mm256_maskz_andnot_epi32() {
         let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
         let b = _mm256_set1_epi32(1 << 3 | 1 << 4);
         let r = _mm256_maskz_andnot_epi32(0, a, b);
@@ -55920,7 +57843,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_andnot_epi32() {
+    const fn test_mm_mask_andnot_epi32() {
         let a = _mm_set1_epi32(1 << 1 | 1 << 2);
         let b = _mm_set1_epi32(1 << 3 | 1 << 4);
         let r = _mm_mask_andnot_epi32(a, 0, a, b);
@@ -55931,7 +57854,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_andnot_epi32() {
+    const fn test_mm_maskz_andnot_epi32() {
         let a = _mm_set1_epi32(1 << 1 | 1 << 2);
         let b = _mm_set1_epi32(1 << 3 | 1 << 4);
         let r = _mm_maskz_andnot_epi32(0, a, b);
@@ -55942,7 +57865,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_cvtmask16_u32() {
+    const fn test_cvtmask16_u32() {
         let a: __mmask16 = 0b11001100_00110011;
         let r = _cvtmask16_u32(a);
         let e: u32 = 0b11001100_00110011;
@@ -55950,7 +57873,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_cvtu32_mask16() {
+    const fn test_cvtu32_mask16() {
         let a: u32 = 0b11001100_00110011;
         let r = _cvtu32_mask16(a);
         let e: __mmask16 = 0b11001100_00110011;
@@ -55958,7 +57881,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_kand() {
+    const fn test_mm512_kand() {
         let a: u16 = 0b11001100_00110011;
         let b: u16 = 0b11001100_00110011;
         let r = _mm512_kand(a, b);
@@ -55967,7 +57890,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_kand_mask16() {
+    const fn test_kand_mask16() {
         let a: u16 = 0b11001100_00110011;
         let b: u16 = 0b11001100_00110011;
         let r = _kand_mask16(a, b);
@@ -55976,7 +57899,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_kor() {
+    const fn test_mm512_kor() {
         let a: u16 = 0b11001100_00110011;
         let b: u16 = 0b00101110_00001011;
         let r = _mm512_kor(a, b);
@@ -55985,7 +57908,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_kor_mask16() {
+    const fn test_kor_mask16() {
         let a: u16 = 0b11001100_00110011;
         let b: u16 = 0b00101110_00001011;
         let r = _kor_mask16(a, b);
@@ -55994,7 +57917,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_kxor() {
+    const fn test_mm512_kxor() {
         let a: u16 = 0b11001100_00110011;
         let b: u16 = 0b00101110_00001011;
         let r = _mm512_kxor(a, b);
@@ -56003,7 +57926,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_kxor_mask16() {
+    const fn test_kxor_mask16() {
         let a: u16 = 0b11001100_00110011;
         let b: u16 = 0b00101110_00001011;
         let r = _kxor_mask16(a, b);
@@ -56012,7 +57935,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_knot() {
+    const fn test_mm512_knot() {
         let a: u16 = 0b11001100_00110011;
         let r = _mm512_knot(a);
         let e: u16 = 0b00110011_11001100;
@@ -56020,7 +57943,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_knot_mask16() {
+    const fn test_knot_mask16() {
         let a: u16 = 0b11001100_00110011;
         let r = _knot_mask16(a);
         let e: u16 = 0b00110011_11001100;
@@ -56028,7 +57951,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_kandn() {
+    const fn test_mm512_kandn() {
         let a: u16 = 0b11001100_00110011;
         let b: u16 = 0b00101110_00001011;
         let r = _mm512_kandn(a, b);
@@ -56037,7 +57960,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_kandn_mask16() {
+    const fn test_kandn_mask16() {
         let a: u16 = 0b11001100_00110011;
         let b: u16 = 0b00101110_00001011;
         let r = _kandn_mask16(a, b);
@@ -56046,7 +57969,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_kxnor() {
+    const fn test_mm512_kxnor() {
         let a: u16 = 0b11001100_00110011;
         let b: u16 = 0b00101110_00001011;
         let r = _mm512_kxnor(a, b);
@@ -56055,7 +57978,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_kxnor_mask16() {
+    const fn test_kxnor_mask16() {
         let a: u16 = 0b11001100_00110011;
         let b: u16 = 0b00101110_00001011;
         let r = _kxnor_mask16(a, b);
@@ -56063,34 +57986,34 @@ mod tests {
         assert_eq!(r, e);
     }
 
-    #[simd_test(enable = "avx512dq")]
-    unsafe fn test_kortest_mask16_u8() {
+    #[simd_test(enable = "avx512f")]
+    const fn test_kortest_mask16_u8() {
         let a: __mmask16 = 0b0110100101101001;
         let b: __mmask16 = 0b1011011010110110;
         let mut all_ones: u8 = 0;
-        let r = _kortest_mask16_u8(a, b, &mut all_ones);
+        let r = unsafe { _kortest_mask16_u8(a, b, &mut all_ones) };
         assert_eq!(r, 0);
         assert_eq!(all_ones, 1);
     }
 
-    #[simd_test(enable = "avx512dq")]
-    unsafe fn test_kortestc_mask16_u8() {
+    #[simd_test(enable = "avx512f")]
+    const fn test_kortestc_mask16_u8() {
         let a: __mmask16 = 0b0110100101101001;
         let b: __mmask16 = 0b1011011010110110;
         let r = _kortestc_mask16_u8(a, b);
         assert_eq!(r, 1);
     }
 
-    #[simd_test(enable = "avx512dq")]
-    unsafe fn test_kortestz_mask16_u8() {
+    #[simd_test(enable = "avx512f")]
+    const fn test_kortestz_mask16_u8() {
         let a: __mmask16 = 0b0110100101101001;
         let b: __mmask16 = 0b1011011010110110;
         let r = _kortestz_mask16_u8(a, b);
         assert_eq!(r, 0);
     }
 
-    #[simd_test(enable = "avx512dq")]
-    unsafe fn test_kshiftli_mask16() {
+    #[simd_test(enable = "avx512f")]
+    const fn test_kshiftli_mask16() {
         let a: __mmask16 = 0b1001011011000011;
         let r = _kshiftli_mask16::<3>(a);
         let e: __mmask16 = 0b1011011000011000;
@@ -56109,8 +58032,8 @@ mod tests {
         assert_eq!(r, e);
     }
 
-    #[simd_test(enable = "avx512dq")]
-    unsafe fn test_kshiftri_mask16() {
+    #[simd_test(enable = "avx512f")]
+    const fn test_kshiftri_mask16() {
         let a: __mmask16 = 0b1010100100111100;
         let r = _kshiftri_mask16::<3>(a);
         let e: __mmask16 = 0b0001010100100111;
@@ -56130,24 +58053,26 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_load_mask16() {
+    const fn test_load_mask16() {
         let a: __mmask16 = 0b1001011011000011;
-        let r = _load_mask16(&a);
+        let r = unsafe { _load_mask16(&a) };
         let e: __mmask16 = 0b1001011011000011;
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_store_mask16() {
+    const fn test_store_mask16() {
         let a: __mmask16 = 0b0110100100111100;
         let mut r = 0;
-        _store_mask16(&mut r, a);
+        unsafe {
+            _store_mask16(&mut r, a);
+        }
         let e: __mmask16 = 0b0110100100111100;
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_kmov() {
+    const fn test_mm512_kmov() {
         let a: u16 = 0b11001100_00110011;
         let r = _mm512_kmov(a);
         let e: u16 = 0b11001100_00110011;
@@ -56155,7 +58080,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_int2mask() {
+    const fn test_mm512_int2mask() {
         let a: i32 = 0b11001100_00110011;
         let r = _mm512_int2mask(a);
         let e: u16 = 0b11001100_00110011;
@@ -56163,7 +58088,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask2int() {
+    const fn test_mm512_mask2int() {
         let k1: __mmask16 = 0b11001100_00110011;
         let r = _mm512_mask2int(k1);
         let e: i32 = 0b11001100_00110011;
@@ -56171,7 +58096,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_kunpackb() {
+    const fn test_mm512_kunpackb() {
         let a: u16 = 0b11001100_00110011;
         let b: u16 = 0b00101110_00001011;
         let r = _mm512_kunpackb(a, b);
@@ -56180,7 +58105,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_kortestc() {
+    const fn test_mm512_kortestc() {
         let a: u16 = 0b11001100_00110011;
         let b: u16 = 0b00101110_00001011;
         let r = _mm512_kortestc(a, b);
@@ -56191,7 +58116,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_kortestz() {
+    const fn test_mm512_kortestz() {
         let a: u16 = 0b11001100_00110011;
         let b: u16 = 0b00101110_00001011;
         let r = _mm512_kortestz(a, b);
@@ -56201,7 +58126,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_test_epi32_mask() {
+    const fn test_mm512_test_epi32_mask() {
         let a = _mm512_set1_epi32(1 << 0);
         let b = _mm512_set1_epi32(1 << 0 | 1 << 1);
         let r = _mm512_test_epi32_mask(a, b);
@@ -56210,7 +58135,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_test_epi32_mask() {
+    const fn test_mm512_mask_test_epi32_mask() {
         let a = _mm512_set1_epi32(1 << 0);
         let b = _mm512_set1_epi32(1 << 0 | 1 << 1);
         let r = _mm512_mask_test_epi32_mask(0, a, b);
@@ -56221,7 +58146,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_test_epi32_mask() {
+    const fn test_mm256_test_epi32_mask() {
         let a = _mm256_set1_epi32(1 << 0);
         let b = _mm256_set1_epi32(1 << 0 | 1 << 1);
         let r = _mm256_test_epi32_mask(a, b);
@@ -56230,7 +58155,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_test_epi32_mask() {
+    const fn test_mm256_mask_test_epi32_mask() {
         let a = _mm256_set1_epi32(1 << 0);
         let b = _mm256_set1_epi32(1 << 0 | 1 << 1);
         let r = _mm256_mask_test_epi32_mask(0, a, b);
@@ -56241,7 +58166,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_test_epi32_mask() {
+    const fn test_mm_test_epi32_mask() {
         let a = _mm_set1_epi32(1 << 0);
         let b = _mm_set1_epi32(1 << 0 | 1 << 1);
         let r = _mm_test_epi32_mask(a, b);
@@ -56250,7 +58175,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_test_epi32_mask() {
+    const fn test_mm_mask_test_epi32_mask() {
         let a = _mm_set1_epi32(1 << 0);
         let b = _mm_set1_epi32(1 << 0 | 1 << 1);
         let r = _mm_mask_test_epi32_mask(0, a, b);
@@ -56261,7 +58186,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_testn_epi32_mask() {
+    const fn test_mm512_testn_epi32_mask() {
         let a = _mm512_set1_epi32(1 << 0);
         let b = _mm512_set1_epi32(1 << 0 | 1 << 1);
         let r = _mm512_testn_epi32_mask(a, b);
@@ -56270,7 +58195,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_testn_epi32_mask() {
+    const fn test_mm512_mask_testn_epi32_mask() {
         let a = _mm512_set1_epi32(1 << 0);
         let b = _mm512_set1_epi32(1 << 1);
         let r = _mm512_mask_test_epi32_mask(0, a, b);
@@ -56281,7 +58206,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_testn_epi32_mask() {
+    const fn test_mm256_testn_epi32_mask() {
         let a = _mm256_set1_epi32(1 << 0);
         let b = _mm256_set1_epi32(1 << 1);
         let r = _mm256_testn_epi32_mask(a, b);
@@ -56290,7 +58215,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_testn_epi32_mask() {
+    const fn test_mm256_mask_testn_epi32_mask() {
         let a = _mm256_set1_epi32(1 << 0);
         let b = _mm256_set1_epi32(1 << 1);
         let r = _mm256_mask_test_epi32_mask(0, a, b);
@@ -56301,7 +58226,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_testn_epi32_mask() {
+    const fn test_mm_testn_epi32_mask() {
         let a = _mm_set1_epi32(1 << 0);
         let b = _mm_set1_epi32(1 << 1);
         let r = _mm_testn_epi32_mask(a, b);
@@ -56310,7 +58235,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_testn_epi32_mask() {
+    const fn test_mm_mask_testn_epi32_mask() {
         let a = _mm_set1_epi32(1 << 0);
         let b = _mm_set1_epi32(1 << 1);
         let r = _mm_mask_test_epi32_mask(0, a, b);
@@ -56321,8 +58246,8 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    #[cfg_attr(miri, ignore)]
-    unsafe fn test_mm512_stream_ps() {
+    #[cfg_attr(miri, ignore)] // Inline asm (for non-temporal store), which is not supported by Miri
+    fn test_mm512_stream_ps() {
         #[repr(align(64))]
         struct Memory {
             pub data: [f32; 16], // 64 bytes
@@ -56330,7 +58255,9 @@ mod tests {
         let a = _mm512_set1_ps(7.0);
         let mut mem = Memory { data: [-1.0; 16] };
 
-        _mm512_stream_ps(&mut mem.data[0] as *mut f32, a);
+        unsafe {
+            _mm512_stream_ps(&mut mem.data[0] as *mut f32, a);
+        }
         _mm_sfence();
         for i in 0..16 {
             assert_eq!(mem.data[i], get_m512(a, i));
@@ -56338,8 +58265,8 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    #[cfg_attr(miri, ignore)]
-    unsafe fn test_mm512_stream_pd() {
+    #[cfg_attr(miri, ignore)] // Inline asm (for non-temporal store), which is not supported by Miri
+    fn test_mm512_stream_pd() {
         #[repr(align(64))]
         struct Memory {
             pub data: [f64; 8],
@@ -56347,7 +58274,9 @@ mod tests {
         let a = _mm512_set1_pd(7.0);
         let mut mem = Memory { data: [-1.0; 8] };
 
-        _mm512_stream_pd(&mut mem.data[0] as *mut f64, a);
+        unsafe {
+            _mm512_stream_pd(&mut mem.data[0] as *mut f64, a);
+        }
         _mm_sfence();
         for i in 0..8 {
             assert_eq!(mem.data[i], get_m512d(a, i));
@@ -56355,8 +58284,8 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    #[cfg_attr(miri, ignore)]
-    unsafe fn test_mm512_stream_si512() {
+    #[cfg_attr(miri, ignore)] // Inline asm (for non-temporal store), which is not supported by Miri
+    fn test_mm512_stream_si512() {
         #[repr(align(64))]
         struct Memory {
             pub data: [i64; 8],
@@ -56364,7 +58293,9 @@ mod tests {
         let a = _mm512_set1_epi32(7);
         let mut mem = Memory { data: [-1; 8] };
 
-        _mm512_stream_si512(mem.data.as_mut_ptr().cast(), a);
+        unsafe {
+            _mm512_stream_si512(mem.data.as_mut_ptr().cast(), a);
+        }
         _mm_sfence();
         for i in 0..8 {
             assert_eq!(mem.data[i], get_m512i(a, i));
@@ -56372,98 +58303,98 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_stream_load_si512() {
+    fn test_mm512_stream_load_si512() {
         let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
-        let r = _mm512_stream_load_si512(core::ptr::addr_of!(a) as *const _);
+        let r = unsafe { _mm512_stream_load_si512(core::ptr::addr_of!(a) as *const _) };
         assert_eq_m512i(a, r);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_reduce_add_epi32() {
+    const fn test_mm512_reduce_add_epi32() {
         let a = _mm512_set1_epi32(1);
         let e: i32 = _mm512_reduce_add_epi32(a);
         assert_eq!(16, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_reduce_add_epi32() {
+    const fn test_mm512_mask_reduce_add_epi32() {
         let a = _mm512_set1_epi32(1);
         let e: i32 = _mm512_mask_reduce_add_epi32(0b11111111_00000000, a);
         assert_eq!(8, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_reduce_add_ps() {
+    const fn test_mm512_reduce_add_ps() {
         let a = _mm512_set1_ps(1.);
         let e: f32 = _mm512_reduce_add_ps(a);
         assert_eq!(16., e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_reduce_add_ps() {
+    const fn test_mm512_mask_reduce_add_ps() {
         let a = _mm512_set1_ps(1.);
         let e: f32 = _mm512_mask_reduce_add_ps(0b11111111_00000000, a);
         assert_eq!(8., e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_reduce_mul_epi32() {
+    const fn test_mm512_reduce_mul_epi32() {
         let a = _mm512_set1_epi32(2);
         let e: i32 = _mm512_reduce_mul_epi32(a);
         assert_eq!(65536, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_reduce_mul_epi32() {
+    const fn test_mm512_mask_reduce_mul_epi32() {
         let a = _mm512_set1_epi32(2);
         let e: i32 = _mm512_mask_reduce_mul_epi32(0b11111111_00000000, a);
         assert_eq!(256, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_reduce_mul_ps() {
+    const fn test_mm512_reduce_mul_ps() {
         let a = _mm512_set1_ps(2.);
         let e: f32 = _mm512_reduce_mul_ps(a);
         assert_eq!(65536., e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_reduce_mul_ps() {
+    const fn test_mm512_mask_reduce_mul_ps() {
         let a = _mm512_set1_ps(2.);
         let e: f32 = _mm512_mask_reduce_mul_ps(0b11111111_00000000, a);
         assert_eq!(256., e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_reduce_max_epi32() {
+    const fn test_mm512_reduce_max_epi32() {
         let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let e: i32 = _mm512_reduce_max_epi32(a);
         assert_eq!(15, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_reduce_max_epi32() {
+    const fn test_mm512_mask_reduce_max_epi32() {
         let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let e: i32 = _mm512_mask_reduce_max_epi32(0b11111111_00000000, a);
         assert_eq!(7, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_reduce_max_epu32() {
+    const fn test_mm512_reduce_max_epu32() {
         let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let e: u32 = _mm512_reduce_max_epu32(a);
         assert_eq!(15, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_reduce_max_epu32() {
+    const fn test_mm512_mask_reduce_max_epu32() {
         let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let e: u32 = _mm512_mask_reduce_max_epu32(0b11111111_00000000, a);
         assert_eq!(7, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_reduce_max_ps() {
+    fn test_mm512_reduce_max_ps() {
         let a = _mm512_set_ps(
             0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
         );
@@ -56472,7 +58403,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_reduce_max_ps() {
+    fn test_mm512_mask_reduce_max_ps() {
         let a = _mm512_set_ps(
             0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
         );
@@ -56481,35 +58412,35 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_reduce_min_epi32() {
+    const fn test_mm512_reduce_min_epi32() {
         let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let e: i32 = _mm512_reduce_min_epi32(a);
         assert_eq!(0, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_reduce_min_epi32() {
+    const fn test_mm512_mask_reduce_min_epi32() {
         let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let e: i32 = _mm512_mask_reduce_min_epi32(0b11111111_00000000, a);
         assert_eq!(0, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_reduce_min_epu32() {
+    const fn test_mm512_reduce_min_epu32() {
         let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let e: u32 = _mm512_reduce_min_epu32(a);
         assert_eq!(0, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_reduce_min_epu32() {
+    const fn test_mm512_mask_reduce_min_epu32() {
         let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let e: u32 = _mm512_mask_reduce_min_epu32(0b11111111_00000000, a);
         assert_eq!(0, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_reduce_min_ps() {
+    fn test_mm512_reduce_min_ps() {
         let a = _mm512_set_ps(
             0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
         );
@@ -56518,7 +58449,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_reduce_min_ps() {
+    fn test_mm512_mask_reduce_min_ps() {
         let a = _mm512_set_ps(
             0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
         );
@@ -56527,35 +58458,35 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_reduce_and_epi32() {
+    const fn test_mm512_reduce_and_epi32() {
         let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
         let e: i32 = _mm512_reduce_and_epi32(a);
         assert_eq!(0, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_reduce_and_epi32() {
+    const fn test_mm512_mask_reduce_and_epi32() {
         let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
         let e: i32 = _mm512_mask_reduce_and_epi32(0b11111111_00000000, a);
         assert_eq!(1, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_reduce_or_epi32() {
+    const fn test_mm512_reduce_or_epi32() {
         let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
         let e: i32 = _mm512_reduce_or_epi32(a);
         assert_eq!(3, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_reduce_or_epi32() {
+    const fn test_mm512_mask_reduce_or_epi32() {
         let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
         let e: i32 = _mm512_mask_reduce_and_epi32(0b11111111_00000000, a);
         assert_eq!(1, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_compress_epi32() {
+    fn test_mm512_mask_compress_epi32() {
         let src = _mm512_set1_epi32(200);
         let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let r = _mm512_mask_compress_epi32(src, 0, a);
@@ -56568,7 +58499,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_compress_epi32() {
+    fn test_mm512_maskz_compress_epi32() {
         let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let r = _mm512_maskz_compress_epi32(0, a);
         assert_eq_m512i(r, _mm512_setzero_si512());
@@ -56578,7 +58509,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_compress_epi32() {
+    fn test_mm256_mask_compress_epi32() {
         let src = _mm256_set1_epi32(200);
         let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
         let r = _mm256_mask_compress_epi32(src, 0, a);
@@ -56589,7 +58520,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_compress_epi32() {
+    fn test_mm256_maskz_compress_epi32() {
         let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
         let r = _mm256_maskz_compress_epi32(0, a);
         assert_eq_m256i(r, _mm256_setzero_si256());
@@ -56599,7 +58530,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_compress_epi32() {
+    fn test_mm_mask_compress_epi32() {
         let src = _mm_set1_epi32(200);
         let a = _mm_set_epi32(0, 1, 2, 3);
         let r = _mm_mask_compress_epi32(src, 0, a);
@@ -56610,7 +58541,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_compress_epi32() {
+    fn test_mm_maskz_compress_epi32() {
         let a = _mm_set_epi32(0, 1, 2, 3);
         let r = _mm_maskz_compress_epi32(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -56620,7 +58551,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_compress_ps() {
+    fn test_mm512_mask_compress_ps() {
         let src = _mm512_set1_ps(200.);
         let a = _mm512_set_ps(
             0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
@@ -56635,7 +58566,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_compress_ps() {
+    fn test_mm512_maskz_compress_ps() {
         let a = _mm512_set_ps(
             0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
         );
@@ -56649,7 +58580,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_compress_ps() {
+    fn test_mm256_mask_compress_ps() {
         let src = _mm256_set1_ps(200.);
         let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
         let r = _mm256_mask_compress_ps(src, 0, a);
@@ -56660,7 +58591,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_compress_ps() {
+    fn test_mm256_maskz_compress_ps() {
         let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
         let r = _mm256_maskz_compress_ps(0, a);
         assert_eq_m256(r, _mm256_setzero_ps());
@@ -56670,7 +58601,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_compress_ps() {
+    fn test_mm_mask_compress_ps() {
         let src = _mm_set1_ps(200.);
         let a = _mm_set_ps(0., 1., 2., 3.);
         let r = _mm_mask_compress_ps(src, 0, a);
@@ -56681,7 +58612,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_compress_ps() {
+    fn test_mm_maskz_compress_ps() {
         let a = _mm_set_ps(0., 1., 2., 3.);
         let r = _mm_maskz_compress_ps(0, a);
         assert_eq_m128(r, _mm_setzero_ps());
@@ -56691,75 +58622,103 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_compressstoreu_epi32() {
+    fn test_mm512_mask_compressstoreu_epi32() {
         let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
         let mut r = [0_i32; 16];
-        _mm512_mask_compressstoreu_epi32(r.as_mut_ptr(), 0, a);
+        unsafe {
+            _mm512_mask_compressstoreu_epi32(r.as_mut_ptr(), 0, a);
+        }
         assert_eq!(&r, &[0_i32; 16]);
-        _mm512_mask_compressstoreu_epi32(r.as_mut_ptr(), 0b1111000011001010, a);
+        unsafe {
+            _mm512_mask_compressstoreu_epi32(r.as_mut_ptr(), 0b1111000011001010, a);
+        }
         assert_eq!(&r, &[2, 4, 7, 8, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 0, 0]);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_compressstoreu_epi32() {
+    fn test_mm256_mask_compressstoreu_epi32() {
         let a = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
         let mut r = [0_i32; 8];
-        _mm256_mask_compressstoreu_epi32(r.as_mut_ptr(), 0, a);
+        unsafe {
+            _mm256_mask_compressstoreu_epi32(r.as_mut_ptr(), 0, a);
+        }
         assert_eq!(&r, &[0_i32; 8]);
-        _mm256_mask_compressstoreu_epi32(r.as_mut_ptr(), 0b11001010, a);
+        unsafe {
+            _mm256_mask_compressstoreu_epi32(r.as_mut_ptr(), 0b11001010, a);
+        }
         assert_eq!(&r, &[2, 4, 7, 8, 0, 0, 0, 0]);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_compressstoreu_epi32() {
+    fn test_mm_mask_compressstoreu_epi32() {
         let a = _mm_setr_epi32(1, 2, 3, 4);
         let mut r = [0_i32; 4];
-        _mm_mask_compressstoreu_epi32(r.as_mut_ptr(), 0, a);
+        unsafe {
+            _mm_mask_compressstoreu_epi32(r.as_mut_ptr(), 0, a);
+        }
         assert_eq!(&r, &[0_i32; 4]);
-        _mm_mask_compressstoreu_epi32(r.as_mut_ptr(), 0b1011, a);
+        unsafe {
+            _mm_mask_compressstoreu_epi32(r.as_mut_ptr(), 0b1011, a);
+        }
         assert_eq!(&r, &[1, 2, 4, 0]);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_compressstoreu_epi64() {
+    fn test_mm512_mask_compressstoreu_epi64() {
         let a = _mm512_setr_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let mut r = [0_i64; 8];
-        _mm512_mask_compressstoreu_epi64(r.as_mut_ptr(), 0, a);
+        unsafe {
+            _mm512_mask_compressstoreu_epi64(r.as_mut_ptr(), 0, a);
+        }
         assert_eq!(&r, &[0_i64; 8]);
-        _mm512_mask_compressstoreu_epi64(r.as_mut_ptr(), 0b11001010, a);
+        unsafe {
+            _mm512_mask_compressstoreu_epi64(r.as_mut_ptr(), 0b11001010, a);
+        }
         assert_eq!(&r, &[2, 4, 7, 8, 0, 0, 0, 0]);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_compressstoreu_epi64() {
+    fn test_mm256_mask_compressstoreu_epi64() {
         let a = _mm256_setr_epi64x(1, 2, 3, 4);
         let mut r = [0_i64; 4];
-        _mm256_mask_compressstoreu_epi64(r.as_mut_ptr(), 0, a);
+        unsafe {
+            _mm256_mask_compressstoreu_epi64(r.as_mut_ptr(), 0, a);
+        }
         assert_eq!(&r, &[0_i64; 4]);
-        _mm256_mask_compressstoreu_epi64(r.as_mut_ptr(), 0b1011, a);
+        unsafe {
+            _mm256_mask_compressstoreu_epi64(r.as_mut_ptr(), 0b1011, a);
+        }
         assert_eq!(&r, &[1, 2, 4, 0]);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_compressstoreu_epi64() {
+    fn test_mm_mask_compressstoreu_epi64() {
         let a = _mm_setr_epi64x(1, 2);
         let mut r = [0_i64; 2];
-        _mm_mask_compressstoreu_epi64(r.as_mut_ptr(), 0, a);
+        unsafe {
+            _mm_mask_compressstoreu_epi64(r.as_mut_ptr(), 0, a);
+        }
         assert_eq!(&r, &[0_i64; 2]);
-        _mm_mask_compressstoreu_epi64(r.as_mut_ptr(), 0b10, a);
+        unsafe {
+            _mm_mask_compressstoreu_epi64(r.as_mut_ptr(), 0b10, a);
+        }
         assert_eq!(&r, &[2, 0]);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_compressstoreu_ps() {
+    fn test_mm512_mask_compressstoreu_ps() {
         let a = _mm512_setr_ps(
             1_f32, 2_f32, 3_f32, 4_f32, 5_f32, 6_f32, 7_f32, 8_f32, 9_f32, 10_f32, 11_f32, 12_f32,
             13_f32, 14_f32, 15_f32, 16_f32,
         );
         let mut r = [0_f32; 16];
-        _mm512_mask_compressstoreu_ps(r.as_mut_ptr(), 0, a);
+        unsafe {
+            _mm512_mask_compressstoreu_ps(r.as_mut_ptr(), 0, a);
+        }
         assert_eq!(&r, &[0_f32; 16]);
-        _mm512_mask_compressstoreu_ps(r.as_mut_ptr(), 0b1111000011001010, a);
+        unsafe {
+            _mm512_mask_compressstoreu_ps(r.as_mut_ptr(), 0b1111000011001010, a);
+        }
         assert_eq!(
             &r,
             &[
@@ -56770,12 +58729,16 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_compressstoreu_ps() {
+    fn test_mm256_mask_compressstoreu_ps() {
         let a = _mm256_setr_ps(1_f32, 2_f32, 3_f32, 4_f32, 5_f32, 6_f32, 7_f32, 8_f32);
         let mut r = [0_f32; 8];
-        _mm256_mask_compressstoreu_ps(r.as_mut_ptr(), 0, a);
+        unsafe {
+            _mm256_mask_compressstoreu_ps(r.as_mut_ptr(), 0, a);
+        }
         assert_eq!(&r, &[0_f32; 8]);
-        _mm256_mask_compressstoreu_ps(r.as_mut_ptr(), 0b11001010, a);
+        unsafe {
+            _mm256_mask_compressstoreu_ps(r.as_mut_ptr(), 0b11001010, a);
+        }
         assert_eq!(
             &r,
             &[2_f32, 4_f32, 7_f32, 8_f32, 0_f32, 0_f32, 0_f32, 0_f32]
@@ -56783,47 +58746,63 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_compressstoreu_ps() {
+    fn test_mm_mask_compressstoreu_ps() {
         let a = _mm_setr_ps(1_f32, 2_f32, 3_f32, 4_f32);
         let mut r = [0.; 4];
-        _mm_mask_compressstoreu_ps(r.as_mut_ptr(), 0, a);
+        unsafe {
+            _mm_mask_compressstoreu_ps(r.as_mut_ptr(), 0, a);
+        }
         assert_eq!(&r, &[0.; 4]);
-        _mm_mask_compressstoreu_ps(r.as_mut_ptr(), 0b1011, a);
+        unsafe {
+            _mm_mask_compressstoreu_ps(r.as_mut_ptr(), 0b1011, a);
+        }
         assert_eq!(&r, &[1_f32, 2_f32, 4_f32, 0_f32]);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_compressstoreu_pd() {
+    fn test_mm512_mask_compressstoreu_pd() {
         let a = _mm512_setr_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         let mut r = [0.; 8];
-        _mm512_mask_compressstoreu_pd(r.as_mut_ptr(), 0, a);
+        unsafe {
+            _mm512_mask_compressstoreu_pd(r.as_mut_ptr(), 0, a);
+        }
         assert_eq!(&r, &[0.; 8]);
-        _mm512_mask_compressstoreu_pd(r.as_mut_ptr(), 0b11001010, a);
+        unsafe {
+            _mm512_mask_compressstoreu_pd(r.as_mut_ptr(), 0b11001010, a);
+        }
         assert_eq!(&r, &[2., 4., 7., 8., 0., 0., 0., 0.]);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_compressstoreu_pd() {
+    fn test_mm256_mask_compressstoreu_pd() {
         let a = _mm256_setr_pd(1., 2., 3., 4.);
         let mut r = [0.; 4];
-        _mm256_mask_compressstoreu_pd(r.as_mut_ptr(), 0, a);
+        unsafe {
+            _mm256_mask_compressstoreu_pd(r.as_mut_ptr(), 0, a);
+        }
         assert_eq!(&r, &[0.; 4]);
-        _mm256_mask_compressstoreu_pd(r.as_mut_ptr(), 0b1011, a);
+        unsafe {
+            _mm256_mask_compressstoreu_pd(r.as_mut_ptr(), 0b1011, a);
+        }
         assert_eq!(&r, &[1., 2., 4., 0.]);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_compressstoreu_pd() {
+    fn test_mm_mask_compressstoreu_pd() {
         let a = _mm_setr_pd(1., 2.);
         let mut r = [0.; 2];
-        _mm_mask_compressstoreu_pd(r.as_mut_ptr(), 0, a);
+        unsafe {
+            _mm_mask_compressstoreu_pd(r.as_mut_ptr(), 0, a);
+        }
         assert_eq!(&r, &[0.; 2]);
-        _mm_mask_compressstoreu_pd(r.as_mut_ptr(), 0b10, a);
+        unsafe {
+            _mm_mask_compressstoreu_pd(r.as_mut_ptr(), 0b10, a);
+        }
         assert_eq!(&r, &[2., 0.]);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_expand_epi32() {
+    fn test_mm512_mask_expand_epi32() {
         let src = _mm512_set1_epi32(200);
         let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let r = _mm512_mask_expand_epi32(src, 0, a);
@@ -56836,7 +58815,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_expand_epi32() {
+    fn test_mm512_maskz_expand_epi32() {
         let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let r = _mm512_maskz_expand_epi32(0, a);
         assert_eq_m512i(r, _mm512_setzero_si512());
@@ -56846,7 +58825,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_expand_epi32() {
+    fn test_mm256_mask_expand_epi32() {
         let src = _mm256_set1_epi32(200);
         let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
         let r = _mm256_mask_expand_epi32(src, 0, a);
@@ -56857,7 +58836,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_expand_epi32() {
+    fn test_mm256_maskz_expand_epi32() {
         let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
         let r = _mm256_maskz_expand_epi32(0, a);
         assert_eq_m256i(r, _mm256_setzero_si256());
@@ -56867,7 +58846,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_expand_epi32() {
+    fn test_mm_mask_expand_epi32() {
         let src = _mm_set1_epi32(200);
         let a = _mm_set_epi32(0, 1, 2, 3);
         let r = _mm_mask_expand_epi32(src, 0, a);
@@ -56878,7 +58857,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_expand_epi32() {
+    fn test_mm_maskz_expand_epi32() {
         let a = _mm_set_epi32(0, 1, 2, 3);
         let r = _mm_maskz_expand_epi32(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -56888,7 +58867,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_expand_ps() {
+    fn test_mm512_mask_expand_ps() {
         let src = _mm512_set1_ps(200.);
         let a = _mm512_set_ps(
             0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
@@ -56903,7 +58882,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_expand_ps() {
+    fn test_mm512_maskz_expand_ps() {
         let a = _mm512_set_ps(
             0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
         );
@@ -56917,7 +58896,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_expand_ps() {
+    fn test_mm256_mask_expand_ps() {
         let src = _mm256_set1_ps(200.);
         let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
         let r = _mm256_mask_expand_ps(src, 0, a);
@@ -56928,7 +58907,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_expand_ps() {
+    fn test_mm256_maskz_expand_ps() {
         let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
         let r = _mm256_maskz_expand_ps(0, a);
         assert_eq_m256(r, _mm256_setzero_ps());
@@ -56938,7 +58917,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_expand_ps() {
+    fn test_mm_mask_expand_ps() {
         let src = _mm_set1_ps(200.);
         let a = _mm_set_ps(0., 1., 2., 3.);
         let r = _mm_mask_expand_ps(src, 0, a);
@@ -56949,7 +58928,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_expand_ps() {
+    fn test_mm_maskz_expand_ps() {
         let a = _mm_set_ps(0., 1., 2., 3.);
         let r = _mm_maskz_expand_ps(0, a);
         assert_eq_m128(r, _mm_setzero_ps());
@@ -56959,109 +58938,135 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_loadu_epi32() {
+    const fn test_mm512_loadu_epi32() {
         let a = &[4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50];
         let p = a.as_ptr();
-        let r = _mm512_loadu_epi32(black_box(p));
+        let r = unsafe { _mm512_loadu_epi32(black_box(p)) };
         let e = _mm512_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50);
         assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_loadu_epi32() {
+    const fn test_mm256_loadu_epi32() {
         let a = &[4, 3, 2, 5, 8, 9, 64, 50];
         let p = a.as_ptr();
-        let r = _mm256_loadu_epi32(black_box(p));
+        let r = unsafe { _mm256_loadu_epi32(black_box(p)) };
         let e = _mm256_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50);
         assert_eq_m256i(r, e);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_loadu_epi32() {
+    const fn test_mm_loadu_epi32() {
         let a = &[4, 3, 2, 5];
         let p = a.as_ptr();
-        let r = _mm_loadu_epi32(black_box(p));
+        let r = unsafe { _mm_loadu_epi32(black_box(p)) };
         let e = _mm_setr_epi32(4, 3, 2, 5);
         assert_eq_m128i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cvtepi32_storeu_epi16() {
+    fn test_mm512_mask_cvtepi32_storeu_epi16() {
         let a = _mm512_set1_epi32(9);
         let mut r = _mm256_undefined_si256();
-        _mm512_mask_cvtepi32_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111_11111111, a);
+        unsafe {
+            _mm512_mask_cvtepi32_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111_11111111, a);
+        }
         let e = _mm256_set1_epi16(9);
         assert_eq_m256i(r, e);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_cvtepi32_storeu_epi16() {
+    fn test_mm256_mask_cvtepi32_storeu_epi16() {
         let a = _mm256_set1_epi32(9);
         let mut r = _mm_undefined_si128();
-        _mm256_mask_cvtepi32_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a);
+        unsafe {
+            _mm256_mask_cvtepi32_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a);
+        }
         let e = _mm_set1_epi16(9);
         assert_eq_m128i(r, e);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_cvtepi32_storeu_epi16() {
+    fn test_mm_mask_cvtepi32_storeu_epi16() {
         let a = _mm_set1_epi32(9);
         let mut r = _mm_set1_epi8(0);
-        _mm_mask_cvtepi32_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a);
+        unsafe {
+            _mm_mask_cvtepi32_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a);
+        }
         let e = _mm_set_epi16(0, 0, 0, 0, 9, 9, 9, 9);
         assert_eq_m128i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cvtsepi32_storeu_epi16() {
+    fn test_mm512_mask_cvtsepi32_storeu_epi16() {
         let a = _mm512_set1_epi32(i32::MAX);
         let mut r = _mm256_undefined_si256();
-        _mm512_mask_cvtsepi32_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111_11111111, a);
+        unsafe {
+            _mm512_mask_cvtsepi32_storeu_epi16(
+                &mut r as *mut _ as *mut i16,
+                0b11111111_11111111,
+                a,
+            );
+        }
         let e = _mm256_set1_epi16(i16::MAX);
         assert_eq_m256i(r, e);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_cvtsepi32_storeu_epi16() {
+    fn test_mm256_mask_cvtsepi32_storeu_epi16() {
         let a = _mm256_set1_epi32(i32::MAX);
         let mut r = _mm_undefined_si128();
-        _mm256_mask_cvtsepi32_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a);
+        unsafe {
+            _mm256_mask_cvtsepi32_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a);
+        }
         let e = _mm_set1_epi16(i16::MAX);
         assert_eq_m128i(r, e);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_cvtsepi32_storeu_epi16() {
+    fn test_mm_mask_cvtsepi32_storeu_epi16() {
         let a = _mm_set1_epi32(i32::MAX);
         let mut r = _mm_set1_epi8(0);
-        _mm_mask_cvtsepi32_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a);
+        unsafe {
+            _mm_mask_cvtsepi32_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a);
+        }
         let e = _mm_set_epi16(0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
         assert_eq_m128i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cvtusepi32_storeu_epi16() {
+    fn test_mm512_mask_cvtusepi32_storeu_epi16() {
         let a = _mm512_set1_epi32(i32::MAX);
         let mut r = _mm256_undefined_si256();
-        _mm512_mask_cvtusepi32_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111_11111111, a);
+        unsafe {
+            _mm512_mask_cvtusepi32_storeu_epi16(
+                &mut r as *mut _ as *mut i16,
+                0b11111111_11111111,
+                a,
+            );
+        }
         let e = _mm256_set1_epi16(u16::MAX as i16);
         assert_eq_m256i(r, e);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_cvtusepi32_storeu_epi16() {
+    fn test_mm256_mask_cvtusepi32_storeu_epi16() {
         let a = _mm256_set1_epi32(i32::MAX);
         let mut r = _mm_undefined_si128();
-        _mm256_mask_cvtusepi32_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a);
+        unsafe {
+            _mm256_mask_cvtusepi32_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a);
+        }
         let e = _mm_set1_epi16(u16::MAX as i16);
         assert_eq_m128i(r, e);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_cvtusepi32_storeu_epi16() {
+    fn test_mm_mask_cvtusepi32_storeu_epi16() {
         let a = _mm_set1_epi32(i32::MAX);
         let mut r = _mm_set1_epi8(0);
-        _mm_mask_cvtusepi32_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a);
+        unsafe {
+            _mm_mask_cvtusepi32_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a);
+        }
         let e = _mm_set_epi16(
             0,
             0,
@@ -57076,46 +59081,56 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cvtepi32_storeu_epi8() {
+    fn test_mm512_mask_cvtepi32_storeu_epi8() {
         let a = _mm512_set1_epi32(9);
         let mut r = _mm_undefined_si128();
-        _mm512_mask_cvtepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
+        unsafe {
+            _mm512_mask_cvtepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
+        }
         let e = _mm_set1_epi8(9);
         assert_eq_m128i(r, e);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_cvtepi32_storeu_epi8() {
+    fn test_mm256_mask_cvtepi32_storeu_epi8() {
         let a = _mm256_set1_epi32(9);
         let mut r = _mm_set1_epi8(0);
-        _mm256_mask_cvtepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
+        unsafe {
+            _mm256_mask_cvtepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
+        }
         let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 9, 9, 9, 9, 9, 9, 9, 9);
         assert_eq_m128i(r, e);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_cvtepi32_storeu_epi8() {
+    fn test_mm_mask_cvtepi32_storeu_epi8() {
         let a = _mm_set1_epi32(9);
         let mut r = _mm_set1_epi8(0);
-        _mm_mask_cvtepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
+        unsafe {
+            _mm_mask_cvtepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
+        }
         let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 9, 9, 9);
         assert_eq_m128i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cvtsepi32_storeu_epi8() {
+    fn test_mm512_mask_cvtsepi32_storeu_epi8() {
         let a = _mm512_set1_epi32(i32::MAX);
         let mut r = _mm_undefined_si128();
-        _mm512_mask_cvtsepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
+        unsafe {
+            _mm512_mask_cvtsepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
+        }
         let e = _mm_set1_epi8(i8::MAX);
         assert_eq_m128i(r, e);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_cvtsepi32_storeu_epi8() {
+    fn test_mm256_mask_cvtsepi32_storeu_epi8() {
         let a = _mm256_set1_epi32(i32::MAX);
         let mut r = _mm_set1_epi8(0);
-        _mm256_mask_cvtsepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
+        unsafe {
+            _mm256_mask_cvtsepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
+        }
         #[rustfmt::skip]
         let e = _mm_set_epi8(
             0, 0, 0, 0,
@@ -57127,10 +59142,12 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_cvtsepi32_storeu_epi8() {
+    fn test_mm_mask_cvtsepi32_storeu_epi8() {
         let a = _mm_set1_epi32(i32::MAX);
         let mut r = _mm_set1_epi8(0);
-        _mm_mask_cvtsepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
+        unsafe {
+            _mm_mask_cvtsepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
+        }
         #[rustfmt::skip]
         let e = _mm_set_epi8(
             0, 0, 0, 0,
@@ -57142,19 +59159,23 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cvtusepi32_storeu_epi8() {
+    fn test_mm512_mask_cvtusepi32_storeu_epi8() {
         let a = _mm512_set1_epi32(i32::MAX);
         let mut r = _mm_undefined_si128();
-        _mm512_mask_cvtusepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
+        unsafe {
+            _mm512_mask_cvtusepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
+        }
         let e = _mm_set1_epi8(u8::MAX as i8);
         assert_eq_m128i(r, e);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_cvtusepi32_storeu_epi8() {
+    fn test_mm256_mask_cvtusepi32_storeu_epi8() {
         let a = _mm256_set1_epi32(i32::MAX);
         let mut r = _mm_set1_epi8(0);
-        _mm256_mask_cvtusepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
+        unsafe {
+            _mm256_mask_cvtusepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
+        }
         #[rustfmt::skip]
         let e = _mm_set_epi8(
             0, 0, 0, 0,
@@ -57166,10 +59187,12 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_cvtusepi32_storeu_epi8() {
+    fn test_mm_mask_cvtusepi32_storeu_epi8() {
         let a = _mm_set1_epi32(i32::MAX);
         let mut r = _mm_set1_epi8(0);
-        _mm_mask_cvtusepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
+        unsafe {
+            _mm_mask_cvtusepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
+        }
         #[rustfmt::skip]
         let e = _mm_set_epi8(
             0, 0, 0, 0,
@@ -57181,48 +59204,56 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_storeu_epi32() {
+    const fn test_mm512_storeu_epi32() {
         let a = _mm512_set1_epi32(9);
         let mut r = _mm512_undefined_epi32();
-        _mm512_storeu_epi32(&mut r as *mut _ as *mut i32, a);
+        unsafe {
+            _mm512_storeu_epi32(&mut r as *mut _ as *mut i32, a);
+        }
         assert_eq_m512i(r, a);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_storeu_epi32() {
+    const fn test_mm256_storeu_epi32() {
         let a = _mm256_set1_epi32(9);
         let mut r = _mm256_undefined_si256();
-        _mm256_storeu_epi32(&mut r as *mut _ as *mut i32, a);
+        unsafe {
+            _mm256_storeu_epi32(&mut r as *mut _ as *mut i32, a);
+        }
         assert_eq_m256i(r, a);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_storeu_epi32() {
+    const fn test_mm_storeu_epi32() {
         let a = _mm_set1_epi32(9);
         let mut r = _mm_undefined_si128();
-        _mm_storeu_epi32(&mut r as *mut _ as *mut i32, a);
+        unsafe {
+            _mm_storeu_epi32(&mut r as *mut _ as *mut i32, a);
+        }
         assert_eq_m128i(r, a);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_loadu_si512() {
+    const fn test_mm512_loadu_si512() {
         let a = &[4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50];
         let p = a.as_ptr().cast();
-        let r = _mm512_loadu_si512(black_box(p));
+        let r = unsafe { _mm512_loadu_si512(black_box(p)) };
         let e = _mm512_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50);
         assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_storeu_si512() {
+    const fn test_mm512_storeu_si512() {
         let a = _mm512_set1_epi32(9);
         let mut r = _mm512_undefined_epi32();
-        _mm512_storeu_si512(&mut r as *mut _, a);
+        unsafe {
+            _mm512_storeu_si512(&mut r as *mut _, a);
+        }
         assert_eq_m512i(r, a);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_load_si512() {
+    const fn test_mm512_load_si512() {
         #[repr(align(64))]
         struct Align {
             data: [i32; 16], // 64 bytes
@@ -57231,21 +59262,23 @@ mod tests {
             data: [4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50],
         };
         let p = (a.data).as_ptr().cast();
-        let r = _mm512_load_si512(black_box(p));
+        let r = unsafe { _mm512_load_si512(black_box(p)) };
         let e = _mm512_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50);
         assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_store_si512() {
+    const fn test_mm512_store_si512() {
         let a = _mm512_set1_epi32(9);
         let mut r = _mm512_undefined_epi32();
-        _mm512_store_si512(&mut r as *mut _, a);
+        unsafe {
+            _mm512_store_si512(&mut r as *mut _, a);
+        }
         assert_eq_m512i(r, a);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_load_epi32() {
+    const fn test_mm512_load_epi32() {
         #[repr(align(64))]
         struct Align {
             data: [i32; 16], // 64 bytes
@@ -57254,13 +59287,13 @@ mod tests {
             data: [4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50],
         };
         let p = (a.data).as_ptr();
-        let r = _mm512_load_epi32(black_box(p));
+        let r = unsafe { _mm512_load_epi32(black_box(p)) };
         let e = _mm512_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50);
         assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_load_epi32() {
+    const fn test_mm256_load_epi32() {
         #[repr(align(64))]
         struct Align {
             data: [i32; 8],
@@ -57269,50 +59302,56 @@ mod tests {
             data: [4, 3, 2, 5, 8, 9, 64, 50],
         };
         let p = (a.data).as_ptr();
-        let r = _mm256_load_epi32(black_box(p));
+        let r = unsafe { _mm256_load_epi32(black_box(p)) };
         let e = _mm256_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50);
         assert_eq_m256i(r, e);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_load_epi32() {
+    const fn test_mm_load_epi32() {
         #[repr(align(64))]
         struct Align {
             data: [i32; 4],
         }
         let a = Align { data: [4, 3, 2, 5] };
         let p = (a.data).as_ptr();
-        let r = _mm_load_epi32(black_box(p));
+        let r = unsafe { _mm_load_epi32(black_box(p)) };
         let e = _mm_setr_epi32(4, 3, 2, 5);
         assert_eq_m128i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_store_epi32() {
+    const fn test_mm512_store_epi32() {
         let a = _mm512_set1_epi32(9);
         let mut r = _mm512_undefined_epi32();
-        _mm512_store_epi32(&mut r as *mut _ as *mut i32, a);
+        unsafe {
+            _mm512_store_epi32(&mut r as *mut _ as *mut i32, a);
+        }
         assert_eq_m512i(r, a);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_store_epi32() {
+    const fn test_mm256_store_epi32() {
         let a = _mm256_set1_epi32(9);
         let mut r = _mm256_undefined_si256();
-        _mm256_store_epi32(&mut r as *mut _ as *mut i32, a);
+        unsafe {
+            _mm256_store_epi32(&mut r as *mut _ as *mut i32, a);
+        }
         assert_eq_m256i(r, a);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_store_epi32() {
+    const fn test_mm_store_epi32() {
         let a = _mm_set1_epi32(9);
         let mut r = _mm_undefined_si128();
-        _mm_store_epi32(&mut r as *mut _ as *mut i32, a);
+        unsafe {
+            _mm_store_epi32(&mut r as *mut _ as *mut i32, a);
+        }
         assert_eq_m128i(r, a);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_load_ps() {
+    const fn test_mm512_load_ps() {
         #[repr(align(64))]
         struct Align {
             data: [f32; 16], // 64 bytes
@@ -57323,7 +59362,7 @@ mod tests {
             ],
         };
         let p = (a.data).as_ptr();
-        let r = _mm512_load_ps(black_box(p));
+        let r = unsafe { _mm512_load_ps(black_box(p)) };
         let e = _mm512_setr_ps(
             4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
         );
@@ -57331,15 +59370,17 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_store_ps() {
+    const fn test_mm512_store_ps() {
         let a = _mm512_set1_ps(9.);
         let mut r = _mm512_undefined_ps();
-        _mm512_store_ps(&mut r as *mut _ as *mut f32, a);
+        unsafe {
+            _mm512_store_ps(&mut r as *mut _ as *mut f32, a);
+        }
         assert_eq_m512(r, a);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_set1_epi32() {
+    const fn test_mm512_mask_set1_epi32() {
         let src = _mm512_set1_epi32(2);
         let a: i32 = 11;
         let r = _mm512_mask_set1_epi32(src, 0, a);
@@ -57350,7 +59391,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_set1_epi32() {
+    const fn test_mm512_maskz_set1_epi32() {
         let a: i32 = 11;
         let r = _mm512_maskz_set1_epi32(0, a);
         assert_eq_m512i(r, _mm512_setzero_si512());
@@ -57360,7 +59401,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_set1_epi32() {
+    const fn test_mm256_mask_set1_epi32() {
         let src = _mm256_set1_epi32(2);
         let a: i32 = 11;
         let r = _mm256_mask_set1_epi32(src, 0, a);
@@ -57370,8 +59411,8 @@ mod tests {
         assert_eq_m256i(r, e);
     }
 
-    #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm256_maskz_set1_epi32() {
+    #[simd_test(enable = "avx512f,avx512vl")]
+    const fn test_mm256_maskz_set1_epi32() {
         let a: i32 = 11;
         let r = _mm256_maskz_set1_epi32(0, a);
         assert_eq_m256i(r, _mm256_setzero_si256());
@@ -57381,7 +59422,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_set1_epi32() {
+    const fn test_mm_mask_set1_epi32() {
         let src = _mm_set1_epi32(2);
         let a: i32 = 11;
         let r = _mm_mask_set1_epi32(src, 0, a);
@@ -57391,8 +59432,8 @@ mod tests {
         assert_eq_m128i(r, e);
     }
 
-    #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_maskz_set1_epi32() {
+    #[simd_test(enable = "avx512f,avx512vl")]
+    const fn test_mm_maskz_set1_epi32() {
         let a: i32 = 11;
         let r = _mm_maskz_set1_epi32(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -57402,7 +59443,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask_move_ss() {
+    const fn test_mm_mask_move_ss() {
         let src = _mm_set_ps(10., 11., 100., 110.);
         let a = _mm_set_ps(1., 2., 10., 20.);
         let b = _mm_set_ps(3., 4., 30., 40.);
@@ -57415,7 +59456,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_maskz_move_ss() {
+    const fn test_mm_maskz_move_ss() {
         let a = _mm_set_ps(1., 2., 10., 20.);
         let b = _mm_set_ps(3., 4., 30., 40.);
         let r = _mm_maskz_move_ss(0, a, b);
@@ -57427,7 +59468,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask_move_sd() {
+    const fn test_mm_mask_move_sd() {
         let src = _mm_set_pd(10., 11.);
         let a = _mm_set_pd(1., 2.);
         let b = _mm_set_pd(3., 4.);
@@ -57440,7 +59481,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_maskz_move_sd() {
+    const fn test_mm_maskz_move_sd() {
         let a = _mm_set_pd(1., 2.);
         let b = _mm_set_pd(3., 4.);
         let r = _mm_maskz_move_sd(0, a, b);
@@ -57452,7 +59493,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask_add_ss() {
+    const fn test_mm_mask_add_ss() {
         let src = _mm_set_ps(10., 11., 100., 110.);
         let a = _mm_set_ps(1., 2., 10., 20.);
         let b = _mm_set_ps(3., 4., 30., 40.);
@@ -57465,7 +59506,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_maskz_add_ss() {
+    const fn test_mm_maskz_add_ss() {
         let a = _mm_set_ps(1., 2., 10., 20.);
         let b = _mm_set_ps(3., 4., 30., 40.);
         let r = _mm_maskz_add_ss(0, a, b);
@@ -57477,7 +59518,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask_add_sd() {
+    const fn test_mm_mask_add_sd() {
         let src = _mm_set_pd(10., 11.);
         let a = _mm_set_pd(1., 2.);
         let b = _mm_set_pd(3., 4.);
@@ -57490,7 +59531,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_maskz_add_sd() {
+    const fn test_mm_maskz_add_sd() {
         let a = _mm_set_pd(1., 2.);
         let b = _mm_set_pd(3., 4.);
         let r = _mm_maskz_add_sd(0, a, b);
@@ -57502,7 +59543,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask_sub_ss() {
+    const fn test_mm_mask_sub_ss() {
         let src = _mm_set_ps(10., 11., 100., 110.);
         let a = _mm_set_ps(1., 2., 10., 20.);
         let b = _mm_set_ps(3., 4., 30., 40.);
@@ -57515,7 +59556,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_maskz_sub_ss() {
+    const fn test_mm_maskz_sub_ss() {
         let a = _mm_set_ps(1., 2., 10., 20.);
         let b = _mm_set_ps(3., 4., 30., 40.);
         let r = _mm_maskz_sub_ss(0, a, b);
@@ -57527,7 +59568,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask_sub_sd() {
+    const fn test_mm_mask_sub_sd() {
         let src = _mm_set_pd(10., 11.);
         let a = _mm_set_pd(1., 2.);
         let b = _mm_set_pd(3., 4.);
@@ -57540,7 +59581,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_maskz_sub_sd() {
+    const fn test_mm_maskz_sub_sd() {
         let a = _mm_set_pd(1., 2.);
         let b = _mm_set_pd(3., 4.);
         let r = _mm_maskz_sub_sd(0, a, b);
@@ -57552,7 +59593,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask_mul_ss() {
+    const fn test_mm_mask_mul_ss() {
         let src = _mm_set_ps(10., 11., 100., 110.);
         let a = _mm_set_ps(1., 2., 10., 20.);
         let b = _mm_set_ps(3., 4., 30., 40.);
@@ -57565,7 +59606,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_maskz_mul_ss() {
+    const fn test_mm_maskz_mul_ss() {
         let a = _mm_set_ps(1., 2., 10., 20.);
         let b = _mm_set_ps(3., 4., 30., 40.);
         let r = _mm_maskz_mul_ss(0, a, b);
@@ -57577,7 +59618,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask_mul_sd() {
+    const fn test_mm_mask_mul_sd() {
         let src = _mm_set_pd(10., 11.);
         let a = _mm_set_pd(1., 2.);
         let b = _mm_set_pd(3., 4.);
@@ -57590,7 +59631,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_maskz_mul_sd() {
+    const fn test_mm_maskz_mul_sd() {
         let a = _mm_set_pd(1., 2.);
         let b = _mm_set_pd(3., 4.);
         let r = _mm_maskz_mul_sd(0, a, b);
@@ -57602,7 +59643,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask_div_ss() {
+    const fn test_mm_mask_div_ss() {
         let src = _mm_set_ps(10., 11., 100., 110.);
         let a = _mm_set_ps(1., 2., 10., 20.);
         let b = _mm_set_ps(3., 4., 30., 40.);
@@ -57615,7 +59656,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_maskz_div_ss() {
+    const fn test_mm_maskz_div_ss() {
         let a = _mm_set_ps(1., 2., 10., 20.);
         let b = _mm_set_ps(3., 4., 30., 40.);
         let r = _mm_maskz_div_ss(0, a, b);
@@ -57627,7 +59668,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask_div_sd() {
+    const fn test_mm_mask_div_sd() {
         let src = _mm_set_pd(10., 11.);
         let a = _mm_set_pd(1., 2.);
         let b = _mm_set_pd(3., 4.);
@@ -57640,7 +59681,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_maskz_div_sd() {
+    const fn test_mm_maskz_div_sd() {
         let a = _mm_set_pd(1., 2.);
         let b = _mm_set_pd(3., 4.);
         let r = _mm_maskz_div_sd(0, a, b);
@@ -57652,7 +59693,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask_max_ss() {
+    fn test_mm_mask_max_ss() {
         let a = _mm_set_ps(0., 1., 2., 3.);
         let b = _mm_set_ps(4., 5., 6., 7.);
         let r = _mm_mask_max_ss(a, 0, a, b);
@@ -57664,7 +59705,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_maskz_max_ss() {
+    fn test_mm_maskz_max_ss() {
         let a = _mm_set_ps(0., 1., 2., 3.);
         let b = _mm_set_ps(4., 5., 6., 7.);
         let r = _mm_maskz_max_ss(0, a, b);
@@ -57676,7 +59717,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask_max_sd() {
+    fn test_mm_mask_max_sd() {
         let a = _mm_set_pd(0., 1.);
         let b = _mm_set_pd(2., 3.);
         let r = _mm_mask_max_sd(a, 0, a, b);
@@ -57688,7 +59729,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_maskz_max_sd() {
+    fn test_mm_maskz_max_sd() {
         let a = _mm_set_pd(0., 1.);
         let b = _mm_set_pd(2., 3.);
         let r = _mm_maskz_max_sd(0, a, b);
@@ -57700,7 +59741,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask_min_ss() {
+    fn test_mm_mask_min_ss() {
         let a = _mm_set_ps(0., 1., 2., 3.);
         let b = _mm_set_ps(4., 5., 6., 7.);
         let r = _mm_mask_min_ss(a, 0, a, b);
@@ -57712,7 +59753,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_maskz_min_ss() {
+    fn test_mm_maskz_min_ss() {
         let a = _mm_set_ps(0., 1., 2., 3.);
         let b = _mm_set_ps(4., 5., 6., 7.);
         let r = _mm_maskz_min_ss(0, a, b);
@@ -57724,7 +59765,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask_min_sd() {
+    fn test_mm_mask_min_sd() {
         let a = _mm_set_pd(0., 1.);
         let b = _mm_set_pd(2., 3.);
         let r = _mm_mask_min_sd(a, 0, a, b);
@@ -57736,7 +59777,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_maskz_min_sd() {
+    fn test_mm_maskz_min_sd() {
         let a = _mm_set_pd(0., 1.);
         let b = _mm_set_pd(2., 3.);
         let r = _mm_maskz_min_sd(0, a, b);
@@ -57748,7 +59789,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask_sqrt_ss() {
+    fn test_mm_mask_sqrt_ss() {
         let src = _mm_set_ps(10., 11., 100., 110.);
         let a = _mm_set_ps(1., 2., 10., 20.);
         let b = _mm_set_ps(3., 4., 30., 4.);
@@ -57761,7 +59802,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_maskz_sqrt_ss() {
+    fn test_mm_maskz_sqrt_ss() {
         let a = _mm_set_ps(1., 2., 10., 20.);
         let b = _mm_set_ps(3., 4., 30., 4.);
         let r = _mm_maskz_sqrt_ss(0, a, b);
@@ -57773,7 +59814,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask_sqrt_sd() {
+    fn test_mm_mask_sqrt_sd() {
         let src = _mm_set_pd(10., 11.);
         let a = _mm_set_pd(1., 2.);
         let b = _mm_set_pd(3., 4.);
@@ -57786,7 +59827,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_maskz_sqrt_sd() {
+    fn test_mm_maskz_sqrt_sd() {
         let a = _mm_set_pd(1., 2.);
         let b = _mm_set_pd(3., 4.);
         let r = _mm_maskz_sqrt_sd(0, a, b);
@@ -57798,7 +59839,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_rsqrt14_ss() {
+    fn test_mm_rsqrt14_ss() {
         let a = _mm_set_ps(1., 2., 10., 20.);
         let b = _mm_set_ps(3., 4., 30., 4.);
         let r = _mm_rsqrt14_ss(a, b);
@@ -57807,7 +59848,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask_rsqrt14_ss() {
+    fn test_mm_mask_rsqrt14_ss() {
         let src = _mm_set_ps(10., 11., 100., 110.);
         let a = _mm_set_ps(1., 2., 10., 20.);
         let b = _mm_set_ps(3., 4., 30., 4.);
@@ -57820,7 +59861,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_maskz_rsqrt14_ss() {
+    fn test_mm_maskz_rsqrt14_ss() {
         let a = _mm_set_ps(1., 2., 10., 20.);
         let b = _mm_set_ps(3., 4., 30., 4.);
         let r = _mm_maskz_rsqrt14_ss(0, a, b);
@@ -57832,7 +59873,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_rsqrt14_sd() {
+    fn test_mm_rsqrt14_sd() {
         let a = _mm_set_pd(1., 2.);
         let b = _mm_set_pd(3., 4.);
         let r = _mm_rsqrt14_sd(a, b);
@@ -57841,7 +59882,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask_rsqrt14_sd() {
+    fn test_mm_mask_rsqrt14_sd() {
         let src = _mm_set_pd(10., 11.);
         let a = _mm_set_pd(1., 2.);
         let b = _mm_set_pd(3., 4.);
@@ -57854,7 +59895,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_maskz_rsqrt14_sd() {
+    fn test_mm_maskz_rsqrt14_sd() {
         let a = _mm_set_pd(1., 2.);
         let b = _mm_set_pd(3., 4.);
         let r = _mm_maskz_rsqrt14_sd(0, a, b);
@@ -57866,7 +59907,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_rcp14_ss() {
+    fn test_mm_rcp14_ss() {
         let a = _mm_set_ps(1., 2., 10., 20.);
         let b = _mm_set_ps(3., 4., 30., 4.);
         let r = _mm_rcp14_ss(a, b);
@@ -57875,7 +59916,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask_rcp14_ss() {
+    fn test_mm_mask_rcp14_ss() {
         let src = _mm_set_ps(10., 11., 100., 110.);
         let a = _mm_set_ps(1., 2., 10., 20.);
         let b = _mm_set_ps(3., 4., 30., 4.);
@@ -57888,7 +59929,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_maskz_rcp14_ss() {
+    fn test_mm_maskz_rcp14_ss() {
         let a = _mm_set_ps(1., 2., 10., 20.);
         let b = _mm_set_ps(3., 4., 30., 4.);
         let r = _mm_maskz_rcp14_ss(0, a, b);
@@ -57900,7 +59941,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_rcp14_sd() {
+    fn test_mm_rcp14_sd() {
         let a = _mm_set_pd(1., 2.);
         let b = _mm_set_pd(3., 4.);
         let r = _mm_rcp14_sd(a, b);
@@ -57909,7 +59950,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask_rcp14_sd() {
+    fn test_mm_mask_rcp14_sd() {
         let src = _mm_set_pd(10., 11.);
         let a = _mm_set_pd(1., 2.);
         let b = _mm_set_pd(3., 4.);
@@ -57922,7 +59963,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_maskz_rcp14_sd() {
+    fn test_mm_maskz_rcp14_sd() {
         let a = _mm_set_pd(1., 2.);
         let b = _mm_set_pd(3., 4.);
         let r = _mm_maskz_rcp14_sd(0, a, b);
@@ -57934,7 +59975,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_getexp_ss() {
+    fn test_mm_getexp_ss() {
         let a = _mm_set1_ps(2.);
         let b = _mm_set1_ps(3.);
         let r = _mm_getexp_ss(a, b);
@@ -57943,7 +59984,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask_getexp_ss() {
+    fn test_mm_mask_getexp_ss() {
         let a = _mm_set1_ps(2.);
         let b = _mm_set1_ps(3.);
         let r = _mm_mask_getexp_ss(a, 0, a, b);
@@ -57955,7 +59996,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_maskz_getexp_ss() {
+    fn test_mm_maskz_getexp_ss() {
         let a = _mm_set1_ps(2.);
         let b = _mm_set1_ps(3.);
         let r = _mm_maskz_getexp_ss(0, a, b);
@@ -57967,7 +60008,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_getexp_sd() {
+    fn test_mm_getexp_sd() {
         let a = _mm_set1_pd(2.);
         let b = _mm_set1_pd(3.);
         let r = _mm_getexp_sd(a, b);
@@ -57976,7 +60017,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask_getexp_sd() {
+    fn test_mm_mask_getexp_sd() {
         let a = _mm_set1_pd(2.);
         let b = _mm_set1_pd(3.);
         let r = _mm_mask_getexp_sd(a, 0, a, b);
@@ -57988,7 +60029,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_maskz_getexp_sd() {
+    fn test_mm_maskz_getexp_sd() {
         let a = _mm_set1_pd(2.);
         let b = _mm_set1_pd(3.);
         let r = _mm_maskz_getexp_sd(0, a, b);
@@ -58000,7 +60041,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_getmant_ss() {
+    fn test_mm_getmant_ss() {
         let a = _mm_set1_ps(20.);
         let b = _mm_set1_ps(10.);
         let r = _mm_getmant_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, b);
@@ -58009,7 +60050,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask_getmant_ss() {
+    fn test_mm_mask_getmant_ss() {
         let a = _mm_set1_ps(20.);
         let b = _mm_set1_ps(10.);
         let r = _mm_mask_getmant_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0, a, b);
@@ -58021,7 +60062,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_maskz_getmant_ss() {
+    fn test_mm_maskz_getmant_ss() {
         let a = _mm_set1_ps(20.);
         let b = _mm_set1_ps(10.);
         let r = _mm_maskz_getmant_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0, a, b);
@@ -58033,7 +60074,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_getmant_sd() {
+    fn test_mm_getmant_sd() {
         let a = _mm_set1_pd(20.);
         let b = _mm_set1_pd(10.);
         let r = _mm_getmant_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, b);
@@ -58042,7 +60083,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask_getmant_sd() {
+    fn test_mm_mask_getmant_sd() {
         let a = _mm_set1_pd(20.);
         let b = _mm_set1_pd(10.);
         let r = _mm_mask_getmant_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0, a, b);
@@ -58054,7 +60095,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_maskz_getmant_sd() {
+    fn test_mm_maskz_getmant_sd() {
         let a = _mm_set1_pd(20.);
         let b = _mm_set1_pd(10.);
         let r = _mm_maskz_getmant_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0, a, b);
@@ -58066,7 +60107,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_roundscale_ss() {
+    fn test_mm_roundscale_ss() {
         let a = _mm_set1_ps(2.2);
         let b = _mm_set1_ps(1.1);
         let r = _mm_roundscale_ss::<0>(a, b);
@@ -58075,7 +60116,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask_roundscale_ss() {
+    fn test_mm_mask_roundscale_ss() {
         let a = _mm_set1_ps(2.2);
         let b = _mm_set1_ps(1.1);
         let r = _mm_mask_roundscale_ss::<0>(a, 0, a, b);
@@ -58087,7 +60128,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_maskz_roundscale_ss() {
+    fn test_mm_maskz_roundscale_ss() {
         let a = _mm_set1_ps(2.2);
         let b = _mm_set1_ps(1.1);
         let r = _mm_maskz_roundscale_ss::<0>(0, a, b);
@@ -58099,7 +60140,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_roundscale_sd() {
+    fn test_mm_roundscale_sd() {
         let a = _mm_set1_pd(2.2);
         let b = _mm_set1_pd(1.1);
         let r = _mm_roundscale_sd::<0>(a, b);
@@ -58108,7 +60149,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask_roundscale_sd() {
+    fn test_mm_mask_roundscale_sd() {
         let a = _mm_set1_pd(2.2);
         let b = _mm_set1_pd(1.1);
         let r = _mm_mask_roundscale_sd::<0>(a, 0, a, b);
@@ -58120,7 +60161,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_maskz_roundscale_sd() {
+    fn test_mm_maskz_roundscale_sd() {
         let a = _mm_set1_pd(2.2);
         let b = _mm_set1_pd(1.1);
         let r = _mm_maskz_roundscale_sd::<0>(0, a, b);
@@ -58132,7 +60173,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_scalef_ss() {
+    fn test_mm_scalef_ss() {
         let a = _mm_set1_ps(1.);
         let b = _mm_set1_ps(3.);
         let r = _mm_scalef_ss(a, b);
@@ -58141,7 +60182,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask_scalef_ss() {
+    fn test_mm_mask_scalef_ss() {
         let a = _mm_set1_ps(1.);
         let b = _mm_set1_ps(3.);
         let r = _mm_mask_scalef_ss(a, 0, a, b);
@@ -58153,7 +60194,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_maskz_scalef_ss() {
+    fn test_mm_maskz_scalef_ss() {
         let a = _mm_set1_ps(1.);
         let b = _mm_set1_ps(3.);
         let r = _mm_maskz_scalef_ss(0, a, b);
@@ -58165,7 +60206,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_scalef_sd() {
+    fn test_mm_scalef_sd() {
         let a = _mm_set1_pd(1.);
         let b = _mm_set1_pd(3.);
         let r = _mm_scalef_sd(a, b);
@@ -58174,7 +60215,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask_scalef_sd() {
+    fn test_mm_mask_scalef_sd() {
         let a = _mm_set1_pd(1.);
         let b = _mm_set1_pd(3.);
         let r = _mm_mask_scalef_sd(a, 0, a, b);
@@ -58186,7 +60227,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_maskz_scalef_sd() {
+    fn test_mm_maskz_scalef_sd() {
         let a = _mm_set1_pd(1.);
         let b = _mm_set1_pd(3.);
         let r = _mm_maskz_scalef_sd(0, a, b);
@@ -58198,7 +60239,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask_fmadd_ss() {
+    const fn test_mm_mask_fmadd_ss() {
         let a = _mm_set1_ps(1.);
         let b = _mm_set1_ps(2.);
         let c = _mm_set1_ps(3.);
@@ -58210,7 +60251,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_maskz_fmadd_ss() {
+    const fn test_mm_maskz_fmadd_ss() {
         let a = _mm_set1_ps(1.);
         let b = _mm_set1_ps(2.);
         let c = _mm_set1_ps(3.);
@@ -58223,7 +60264,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask3_fmadd_ss() {
+    const fn test_mm_mask3_fmadd_ss() {
         let a = _mm_set1_ps(1.);
         let b = _mm_set1_ps(2.);
         let c = _mm_set1_ps(3.);
@@ -58235,7 +60276,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask_fmadd_sd() {
+    const fn test_mm_mask_fmadd_sd() {
         let a = _mm_set1_pd(1.);
         let b = _mm_set1_pd(2.);
         let c = _mm_set1_pd(3.);
@@ -58247,7 +60288,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_maskz_fmadd_sd() {
+    const fn test_mm_maskz_fmadd_sd() {
         let a = _mm_set1_pd(1.);
         let b = _mm_set1_pd(2.);
         let c = _mm_set1_pd(3.);
@@ -58260,7 +60301,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask3_fmadd_sd() {
+    const fn test_mm_mask3_fmadd_sd() {
         let a = _mm_set1_pd(1.);
         let b = _mm_set1_pd(2.);
         let c = _mm_set1_pd(3.);
@@ -58272,7 +60313,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask_fmsub_ss() {
+    const fn test_mm_mask_fmsub_ss() {
         let a = _mm_set1_ps(1.);
         let b = _mm_set1_ps(2.);
         let c = _mm_set1_ps(3.);
@@ -58284,7 +60325,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_maskz_fmsub_ss() {
+    const fn test_mm_maskz_fmsub_ss() {
         let a = _mm_set1_ps(1.);
         let b = _mm_set1_ps(2.);
         let c = _mm_set1_ps(3.);
@@ -58297,7 +60338,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask3_fmsub_ss() {
+    const fn test_mm_mask3_fmsub_ss() {
         let a = _mm_set1_ps(1.);
         let b = _mm_set1_ps(2.);
         let c = _mm_set1_ps(3.);
@@ -58309,7 +60350,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask_fmsub_sd() {
+    const fn test_mm_mask_fmsub_sd() {
         let a = _mm_set1_pd(1.);
         let b = _mm_set1_pd(2.);
         let c = _mm_set1_pd(3.);
@@ -58321,7 +60362,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_maskz_fmsub_sd() {
+    const fn test_mm_maskz_fmsub_sd() {
         let a = _mm_set1_pd(1.);
         let b = _mm_set1_pd(2.);
         let c = _mm_set1_pd(3.);
@@ -58334,7 +60375,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask3_fmsub_sd() {
+    const fn test_mm_mask3_fmsub_sd() {
         let a = _mm_set1_pd(1.);
         let b = _mm_set1_pd(2.);
         let c = _mm_set1_pd(3.);
@@ -58346,7 +60387,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask_fnmadd_ss() {
+    const fn test_mm_mask_fnmadd_ss() {
         let a = _mm_set1_ps(1.);
         let b = _mm_set1_ps(2.);
         let c = _mm_set1_ps(3.);
@@ -58358,7 +60399,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_maskz_fnmadd_ss() {
+    const fn test_mm_maskz_fnmadd_ss() {
         let a = _mm_set1_ps(1.);
         let b = _mm_set1_ps(2.);
         let c = _mm_set1_ps(3.);
@@ -58371,7 +60412,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask3_fnmadd_ss() {
+    const fn test_mm_mask3_fnmadd_ss() {
         let a = _mm_set1_ps(1.);
         let b = _mm_set1_ps(2.);
         let c = _mm_set1_ps(3.);
@@ -58383,7 +60424,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask_fnmadd_sd() {
+    const fn test_mm_mask_fnmadd_sd() {
         let a = _mm_set1_pd(1.);
         let b = _mm_set1_pd(2.);
         let c = _mm_set1_pd(3.);
@@ -58395,7 +60436,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_maskz_fnmadd_sd() {
+    const fn test_mm_maskz_fnmadd_sd() {
         let a = _mm_set1_pd(1.);
         let b = _mm_set1_pd(2.);
         let c = _mm_set1_pd(3.);
@@ -58408,7 +60449,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask3_fnmadd_sd() {
+    const fn test_mm_mask3_fnmadd_sd() {
         let a = _mm_set1_pd(1.);
         let b = _mm_set1_pd(2.);
         let c = _mm_set1_pd(3.);
@@ -58420,7 +60461,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask_fnmsub_ss() {
+    const fn test_mm_mask_fnmsub_ss() {
         let a = _mm_set1_ps(1.);
         let b = _mm_set1_ps(2.);
         let c = _mm_set1_ps(3.);
@@ -58432,7 +60473,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_maskz_fnmsub_ss() {
+    const fn test_mm_maskz_fnmsub_ss() {
         let a = _mm_set1_ps(1.);
         let b = _mm_set1_ps(2.);
         let c = _mm_set1_ps(3.);
@@ -58445,7 +60486,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask3_fnmsub_ss() {
+    const fn test_mm_mask3_fnmsub_ss() {
         let a = _mm_set1_ps(1.);
         let b = _mm_set1_ps(2.);
         let c = _mm_set1_ps(3.);
@@ -58457,7 +60498,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask_fnmsub_sd() {
+    const fn test_mm_mask_fnmsub_sd() {
         let a = _mm_set1_pd(1.);
         let b = _mm_set1_pd(2.);
         let c = _mm_set1_pd(3.);
@@ -58469,7 +60510,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_maskz_fnmsub_sd() {
+    const fn test_mm_maskz_fnmsub_sd() {
         let a = _mm_set1_pd(1.);
         let b = _mm_set1_pd(2.);
         let c = _mm_set1_pd(3.);
@@ -58482,7 +60523,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask3_fnmsub_sd() {
+    const fn test_mm_mask3_fnmsub_sd() {
         let a = _mm_set1_pd(1.);
         let b = _mm_set1_pd(2.);
         let c = _mm_set1_pd(3.);
@@ -58494,7 +60535,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_add_round_ss() {
+    fn test_mm_add_round_ss() {
         let a = _mm_set_ps(1., 2., 10., 20.);
         let b = _mm_set_ps(3., 4., 30., 40.);
         let r = _mm_add_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
@@ -58503,7 +60544,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask_add_round_ss() {
+    fn test_mm_mask_add_round_ss() {
         let src = _mm_set_ps(10., 11., 100., 110.);
         let a = _mm_set_ps(1., 2., 10., 20.);
         let b = _mm_set_ps(3., 4., 30., 40.);
@@ -58518,7 +60559,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_maskz_add_round_ss() {
+    fn test_mm_maskz_add_round_ss() {
         let a = _mm_set_ps(1., 2., 10., 20.);
         let b = _mm_set_ps(3., 4., 30., 40.);
         let r = _mm_maskz_add_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
@@ -58531,7 +60572,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_add_round_sd() {
+    fn test_mm_add_round_sd() {
         let a = _mm_set_pd(1., 2.);
         let b = _mm_set_pd(3., 4.);
         let r = _mm_add_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
@@ -58540,7 +60581,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask_add_round_sd() {
+    fn test_mm_mask_add_round_sd() {
         let src = _mm_set_pd(10., 11.);
         let a = _mm_set_pd(1., 2.);
         let b = _mm_set_pd(3., 4.);
@@ -58555,7 +60596,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_maskz_add_round_sd() {
+    fn test_mm_maskz_add_round_sd() {
         let a = _mm_set_pd(1., 2.);
         let b = _mm_set_pd(3., 4.);
         let r = _mm_maskz_add_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
@@ -58568,7 +60609,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_sub_round_ss() {
+    fn test_mm_sub_round_ss() {
         let a = _mm_set_ps(1., 2., 10., 20.);
         let b = _mm_set_ps(3., 4., 30., 40.);
         let r = _mm_sub_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
@@ -58577,7 +60618,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask_sub_round_ss() {
+    fn test_mm_mask_sub_round_ss() {
         let src = _mm_set_ps(10., 11., 100., 110.);
         let a = _mm_set_ps(1., 2., 10., 20.);
         let b = _mm_set_ps(3., 4., 30., 40.);
@@ -58592,7 +60633,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_maskz_sub_round_ss() {
+    fn test_mm_maskz_sub_round_ss() {
         let a = _mm_set_ps(1., 2., 10., 20.);
         let b = _mm_set_ps(3., 4., 30., 40.);
         let r = _mm_maskz_sub_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
@@ -58605,7 +60646,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_sub_round_sd() {
+    fn test_mm_sub_round_sd() {
         let a = _mm_set_pd(1., 2.);
         let b = _mm_set_pd(3., 4.);
         let r = _mm_sub_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
@@ -58614,7 +60655,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask_sub_round_sd() {
+    fn test_mm_mask_sub_round_sd() {
         let src = _mm_set_pd(10., 11.);
         let a = _mm_set_pd(1., 2.);
         let b = _mm_set_pd(3., 4.);
@@ -58629,7 +60670,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_maskz_sub_round_sd() {
+    fn test_mm_maskz_sub_round_sd() {
         let a = _mm_set_pd(1., 2.);
         let b = _mm_set_pd(3., 4.);
         let r = _mm_maskz_sub_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
@@ -58642,7 +60683,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mul_round_ss() {
+    fn test_mm_mul_round_ss() {
         let a = _mm_set_ps(1., 2., 10., 20.);
         let b = _mm_set_ps(3., 4., 30., 40.);
         let r = _mm_mul_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
@@ -58651,7 +60692,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask_mul_round_ss() {
+    fn test_mm_mask_mul_round_ss() {
         let src = _mm_set_ps(10., 11., 100., 110.);
         let a = _mm_set_ps(1., 2., 10., 20.);
         let b = _mm_set_ps(3., 4., 30., 40.);
@@ -58666,7 +60707,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_maskz_mul_round_ss() {
+    fn test_mm_maskz_mul_round_ss() {
         let a = _mm_set_ps(1., 2., 10., 20.);
         let b = _mm_set_ps(3., 4., 30., 40.);
         let r = _mm_maskz_mul_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
@@ -58679,7 +60720,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mul_round_sd() {
+    fn test_mm_mul_round_sd() {
         let a = _mm_set_pd(1., 2.);
         let b = _mm_set_pd(3., 4.);
         let r = _mm_mul_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
@@ -58688,7 +60729,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask_mul_round_sd() {
+    fn test_mm_mask_mul_round_sd() {
         let src = _mm_set_pd(10., 11.);
         let a = _mm_set_pd(1., 2.);
         let b = _mm_set_pd(3., 4.);
@@ -58703,7 +60744,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_maskz_mul_round_sd() {
+    fn test_mm_maskz_mul_round_sd() {
         let a = _mm_set_pd(1., 2.);
         let b = _mm_set_pd(3., 4.);
         let r = _mm_maskz_mul_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
@@ -58716,7 +60757,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_div_round_ss() {
+    fn test_mm_div_round_ss() {
         let a = _mm_set_ps(1., 2., 10., 20.);
         let b = _mm_set_ps(3., 4., 30., 40.);
         let r = _mm_div_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
@@ -58725,7 +60766,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask_div_round_ss() {
+    fn test_mm_mask_div_round_ss() {
         let src = _mm_set_ps(10., 11., 100., 110.);
         let a = _mm_set_ps(1., 2., 10., 20.);
         let b = _mm_set_ps(3., 4., 30., 40.);
@@ -58740,7 +60781,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_maskz_div_round_ss() {
+    fn test_mm_maskz_div_round_ss() {
         let a = _mm_set_ps(1., 2., 10., 20.);
         let b = _mm_set_ps(3., 4., 30., 40.);
         let r = _mm_maskz_div_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
@@ -58753,7 +60794,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_div_round_sd() {
+    fn test_mm_div_round_sd() {
         let a = _mm_set_pd(1., 2.);
         let b = _mm_set_pd(3., 4.);
         let r = _mm_div_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
@@ -58762,7 +60803,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask_div_round_sd() {
+    fn test_mm_mask_div_round_sd() {
         let src = _mm_set_pd(10., 11.);
         let a = _mm_set_pd(1., 2.);
         let b = _mm_set_pd(3., 4.);
@@ -58777,7 +60818,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_maskz_div_round_sd() {
+    fn test_mm_maskz_div_round_sd() {
         let a = _mm_set_pd(1., 2.);
         let b = _mm_set_pd(3., 4.);
         let r = _mm_maskz_div_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
@@ -58790,7 +60831,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_max_round_ss() {
+    fn test_mm_max_round_ss() {
         let a = _mm_set_ps(0., 1., 2., 3.);
         let b = _mm_set_ps(4., 5., 6., 7.);
         let r = _mm_max_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, b);
@@ -58799,7 +60840,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask_max_round_ss() {
+    fn test_mm_mask_max_round_ss() {
         let a = _mm_set_ps(0., 1., 2., 3.);
         let b = _mm_set_ps(4., 5., 6., 7.);
         let r = _mm_mask_max_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
@@ -58811,7 +60852,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_maskz_max_round_ss() {
+    fn test_mm_maskz_max_round_ss() {
         let a = _mm_set_ps(0., 1., 2., 3.);
         let b = _mm_set_ps(4., 5., 6., 7.);
         let r = _mm_maskz_max_round_ss::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
@@ -58823,7 +60864,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_max_round_sd() {
+    fn test_mm_max_round_sd() {
         let a = _mm_set_pd(0., 1.);
         let b = _mm_set_pd(2., 3.);
         let r = _mm_max_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, b);
@@ -58832,7 +60873,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask_max_round_sd() {
+    fn test_mm_mask_max_round_sd() {
         let a = _mm_set_pd(0., 1.);
         let b = _mm_set_pd(2., 3.);
         let r = _mm_mask_max_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
@@ -58844,7 +60885,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_maskz_max_round_sd() {
+    fn test_mm_maskz_max_round_sd() {
         let a = _mm_set_pd(0., 1.);
         let b = _mm_set_pd(2., 3.);
         let r = _mm_maskz_max_round_sd::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
@@ -58856,7 +60897,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_min_round_ss() {
+    fn test_mm_min_round_ss() {
         let a = _mm_set_ps(0., 1., 2., 3.);
         let b = _mm_set_ps(4., 5., 6., 7.);
         let r = _mm_min_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, b);
@@ -58865,7 +60906,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask_min_round_ss() {
+    fn test_mm_mask_min_round_ss() {
         let a = _mm_set_ps(0., 1., 2., 3.);
         let b = _mm_set_ps(4., 5., 6., 7.);
         let r = _mm_mask_min_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
@@ -58877,7 +60918,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_maskz_min_round_ss() {
+    fn test_mm_maskz_min_round_ss() {
         let a = _mm_set_ps(0., 1., 2., 3.);
         let b = _mm_set_ps(4., 5., 6., 7.);
         let r = _mm_maskz_min_round_ss::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
@@ -58889,7 +60930,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_min_round_sd() {
+    fn test_mm_min_round_sd() {
         let a = _mm_set_pd(0., 1.);
         let b = _mm_set_pd(2., 3.);
         let r = _mm_min_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, b);
@@ -58898,7 +60939,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask_min_round_sd() {
+    fn test_mm_mask_min_round_sd() {
         let a = _mm_set_pd(0., 1.);
         let b = _mm_set_pd(2., 3.);
         let r = _mm_mask_min_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
@@ -58910,7 +60951,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_maskz_min_round_sd() {
+    fn test_mm_maskz_min_round_sd() {
         let a = _mm_set_pd(0., 1.);
         let b = _mm_set_pd(2., 3.);
         let r = _mm_maskz_min_round_sd::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
@@ -58922,7 +60963,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_sqrt_round_ss() {
+    fn test_mm_sqrt_round_ss() {
         let a = _mm_set_ps(1., 2., 10., 20.);
         let b = _mm_set_ps(3., 4., 30., 4.);
         let r = _mm_sqrt_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
@@ -58931,7 +60972,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask_sqrt_round_ss() {
+    fn test_mm_mask_sqrt_round_ss() {
         let src = _mm_set_ps(10., 11., 100., 110.);
         let a = _mm_set_ps(1., 2., 10., 20.);
         let b = _mm_set_ps(3., 4., 30., 4.);
@@ -58946,7 +60987,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_maskz_sqrt_round_ss() {
+    fn test_mm_maskz_sqrt_round_ss() {
         let a = _mm_set_ps(1., 2., 10., 20.);
         let b = _mm_set_ps(3., 4., 30., 4.);
         let r = _mm_maskz_sqrt_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
@@ -58959,7 +61000,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_sqrt_round_sd() {
+    fn test_mm_sqrt_round_sd() {
         let a = _mm_set_pd(1., 2.);
         let b = _mm_set_pd(3., 4.);
         let r = _mm_sqrt_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
@@ -58968,7 +61009,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask_sqrt_round_sd() {
+    fn test_mm_mask_sqrt_round_sd() {
         let src = _mm_set_pd(10., 11.);
         let a = _mm_set_pd(1., 2.);
         let b = _mm_set_pd(3., 4.);
@@ -58983,7 +61024,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_maskz_sqrt_round_sd() {
+    fn test_mm_maskz_sqrt_round_sd() {
         let a = _mm_set_pd(1., 2.);
         let b = _mm_set_pd(3., 4.);
         let r = _mm_maskz_sqrt_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
@@ -58996,7 +61037,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_getexp_round_ss() {
+    fn test_mm_getexp_round_ss() {
         let a = _mm_set1_ps(2.);
         let b = _mm_set1_ps(3.);
         let r = _mm_getexp_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, b);
@@ -59005,7 +61046,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask_getexp_round_ss() {
+    fn test_mm_mask_getexp_round_ss() {
         let a = _mm_set1_ps(2.);
         let b = _mm_set1_ps(3.);
         let r = _mm_mask_getexp_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
@@ -59017,7 +61058,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_maskz_getexp_round_ss() {
+    fn test_mm_maskz_getexp_round_ss() {
         let a = _mm_set1_ps(2.);
         let b = _mm_set1_ps(3.);
         let r = _mm_maskz_getexp_round_ss::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
@@ -59029,7 +61070,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_getexp_round_sd() {
+    fn test_mm_getexp_round_sd() {
         let a = _mm_set1_pd(2.);
         let b = _mm_set1_pd(3.);
         let r = _mm_getexp_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, b);
@@ -59038,7 +61079,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask_getexp_round_sd() {
+    fn test_mm_mask_getexp_round_sd() {
         let a = _mm_set1_pd(2.);
         let b = _mm_set1_pd(3.);
         let r = _mm_mask_getexp_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
@@ -59050,7 +61091,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_maskz_getexp_round_sd() {
+    fn test_mm_maskz_getexp_round_sd() {
         let a = _mm_set1_pd(2.);
         let b = _mm_set1_pd(3.);
         let r = _mm_maskz_getexp_round_sd::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
@@ -59062,7 +61103,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_getmant_round_ss() {
+    fn test_mm_getmant_round_ss() {
         let a = _mm_set1_ps(20.);
         let b = _mm_set1_ps(10.);
         let r =
@@ -59074,7 +61115,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask_getmant_round_ss() {
+    fn test_mm_mask_getmant_round_ss() {
         let a = _mm_set1_ps(20.);
         let b = _mm_set1_ps(10.);
         let r = _mm_mask_getmant_round_ss::<
@@ -59094,7 +61135,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_maskz_getmant_round_ss() {
+    fn test_mm_maskz_getmant_round_ss() {
         let a = _mm_set1_ps(20.);
         let b = _mm_set1_ps(10.);
         let r = _mm_maskz_getmant_round_ss::<
@@ -59114,7 +61155,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_getmant_round_sd() {
+    fn test_mm_getmant_round_sd() {
         let a = _mm_set1_pd(20.);
         let b = _mm_set1_pd(10.);
         let r =
@@ -59126,7 +61167,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask_getmant_round_sd() {
+    fn test_mm_mask_getmant_round_sd() {
         let a = _mm_set1_pd(20.);
         let b = _mm_set1_pd(10.);
         let r = _mm_mask_getmant_round_sd::<
@@ -59146,7 +61187,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_maskz_getmant_round_sd() {
+    fn test_mm_maskz_getmant_round_sd() {
         let a = _mm_set1_pd(20.);
         let b = _mm_set1_pd(10.);
         let r = _mm_maskz_getmant_round_sd::<
@@ -59166,7 +61207,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_roundscale_round_ss() {
+    fn test_mm_roundscale_round_ss() {
         let a = _mm_set1_ps(2.2);
         let b = _mm_set1_ps(1.1);
         let r = _mm_roundscale_round_ss::<0, _MM_FROUND_CUR_DIRECTION>(a, b);
@@ -59175,7 +61216,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask_roundscale_round_ss() {
+    fn test_mm_mask_roundscale_round_ss() {
         let a = _mm_set1_ps(2.2);
         let b = _mm_set1_ps(1.1);
         let r = _mm_mask_roundscale_round_ss::<0, _MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
@@ -59187,7 +61228,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_maskz_roundscale_round_ss() {
+    fn test_mm_maskz_roundscale_round_ss() {
         let a = _mm_set1_ps(2.2);
         let b = _mm_set1_ps(1.1);
         let r = _mm_maskz_roundscale_round_ss::<0, _MM_FROUND_CUR_DIRECTION>(0, a, b);
@@ -59199,7 +61240,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_roundscale_round_sd() {
+    fn test_mm_roundscale_round_sd() {
         let a = _mm_set1_pd(2.2);
         let b = _mm_set1_pd(1.1);
         let r = _mm_roundscale_round_sd::<0, _MM_FROUND_CUR_DIRECTION>(a, b);
@@ -59208,7 +61249,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask_roundscale_round_sd() {
+    fn test_mm_mask_roundscale_round_sd() {
         let a = _mm_set1_pd(2.2);
         let b = _mm_set1_pd(1.1);
         let r = _mm_mask_roundscale_round_sd::<0, _MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
@@ -59220,7 +61261,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_maskz_roundscale_round_sd() {
+    fn test_mm_maskz_roundscale_round_sd() {
         let a = _mm_set1_pd(2.2);
         let b = _mm_set1_pd(1.1);
         let r = _mm_maskz_roundscale_round_sd::<0, _MM_FROUND_CUR_DIRECTION>(0, a, b);
@@ -59232,7 +61273,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_scalef_round_ss() {
+    fn test_mm_scalef_round_ss() {
         let a = _mm_set1_ps(1.);
         let b = _mm_set1_ps(3.);
         let r = _mm_scalef_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
@@ -59241,7 +61282,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask_scalef_round_ss() {
+    fn test_mm_mask_scalef_round_ss() {
         let a = _mm_set1_ps(1.);
         let b = _mm_set1_ps(3.);
         let r = _mm_mask_scalef_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
@@ -59257,7 +61298,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_maskz_scalef_round_ss() {
+    fn test_mm_maskz_scalef_round_ss() {
         let a = _mm_set1_ps(1.);
         let b = _mm_set1_ps(3.);
         let r =
@@ -59272,7 +61313,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_scalef_round_sd() {
+    fn test_mm_scalef_round_sd() {
         let a = _mm_set1_pd(1.);
         let b = _mm_set1_pd(3.);
         let r = _mm_scalef_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
@@ -59281,7 +61322,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask_scalef_round_sd() {
+    fn test_mm_mask_scalef_round_sd() {
         let a = _mm_set1_pd(1.);
         let b = _mm_set1_pd(3.);
         let r = _mm_mask_scalef_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
@@ -59297,7 +61338,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_maskz_scalef_round_sd() {
+    fn test_mm_maskz_scalef_round_sd() {
         let a = _mm_set1_pd(1.);
         let b = _mm_set1_pd(3.);
         let r =
@@ -59312,7 +61353,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_fmadd_round_ss() {
+    fn test_mm_fmadd_round_ss() {
         let a = _mm_set1_ps(1.);
         let b = _mm_set1_ps(2.);
         let c = _mm_set1_ps(3.);
@@ -59322,7 +61363,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask_fmadd_round_ss() {
+    fn test_mm_mask_fmadd_round_ss() {
         let a = _mm_set1_ps(1.);
         let b = _mm_set1_ps(2.);
         let c = _mm_set1_ps(3.);
@@ -59338,7 +61379,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_maskz_fmadd_round_ss() {
+    fn test_mm_maskz_fmadd_round_ss() {
         let a = _mm_set1_ps(1.);
         let b = _mm_set1_ps(2.);
         let c = _mm_set1_ps(3.);
@@ -59355,7 +61396,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask3_fmadd_round_ss() {
+    fn test_mm_mask3_fmadd_round_ss() {
         let a = _mm_set1_ps(1.);
         let b = _mm_set1_ps(2.);
         let c = _mm_set1_ps(3.);
@@ -59371,7 +61412,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_fmadd_round_sd() {
+    fn test_mm_fmadd_round_sd() {
         let a = _mm_set1_pd(1.);
         let b = _mm_set1_pd(2.);
         let c = _mm_set1_pd(3.);
@@ -59381,7 +61422,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask_fmadd_round_sd() {
+    fn test_mm_mask_fmadd_round_sd() {
         let a = _mm_set1_pd(1.);
         let b = _mm_set1_pd(2.);
         let c = _mm_set1_pd(3.);
@@ -59397,7 +61438,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_maskz_fmadd_round_sd() {
+    fn test_mm_maskz_fmadd_round_sd() {
         let a = _mm_set1_pd(1.);
         let b = _mm_set1_pd(2.);
         let c = _mm_set1_pd(3.);
@@ -59414,7 +61455,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask3_fmadd_round_sd() {
+    fn test_mm_mask3_fmadd_round_sd() {
         let a = _mm_set1_pd(1.);
         let b = _mm_set1_pd(2.);
         let c = _mm_set1_pd(3.);
@@ -59430,7 +61471,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_fmsub_round_ss() {
+    fn test_mm_fmsub_round_ss() {
         let a = _mm_set1_ps(1.);
         let b = _mm_set1_ps(2.);
         let c = _mm_set1_ps(3.);
@@ -59440,7 +61481,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask_fmsub_round_ss() {
+    fn test_mm_mask_fmsub_round_ss() {
         let a = _mm_set1_ps(1.);
         let b = _mm_set1_ps(2.);
         let c = _mm_set1_ps(3.);
@@ -59456,7 +61497,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_maskz_fmsub_round_ss() {
+    fn test_mm_maskz_fmsub_round_ss() {
         let a = _mm_set1_ps(1.);
         let b = _mm_set1_ps(2.);
         let c = _mm_set1_ps(3.);
@@ -59473,7 +61514,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask3_fmsub_round_ss() {
+    fn test_mm_mask3_fmsub_round_ss() {
         let a = _mm_set1_ps(1.);
         let b = _mm_set1_ps(2.);
         let c = _mm_set1_ps(3.);
@@ -59489,7 +61530,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_fmsub_round_sd() {
+    fn test_mm_fmsub_round_sd() {
         let a = _mm_set1_pd(1.);
         let b = _mm_set1_pd(2.);
         let c = _mm_set1_pd(3.);
@@ -59499,7 +61540,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask_fmsub_round_sd() {
+    fn test_mm_mask_fmsub_round_sd() {
         let a = _mm_set1_pd(1.);
         let b = _mm_set1_pd(2.);
         let c = _mm_set1_pd(3.);
@@ -59515,7 +61556,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_maskz_fmsub_round_sd() {
+    fn test_mm_maskz_fmsub_round_sd() {
         let a = _mm_set1_pd(1.);
         let b = _mm_set1_pd(2.);
         let c = _mm_set1_pd(3.);
@@ -59532,7 +61573,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask3_fmsub_round_sd() {
+    fn test_mm_mask3_fmsub_round_sd() {
         let a = _mm_set1_pd(1.);
         let b = _mm_set1_pd(2.);
         let c = _mm_set1_pd(3.);
@@ -59548,7 +61589,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_fnmadd_round_ss() {
+    fn test_mm_fnmadd_round_ss() {
         let a = _mm_set1_ps(1.);
         let b = _mm_set1_ps(2.);
         let c = _mm_set1_ps(3.);
@@ -59558,7 +61599,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask_fnmadd_round_ss() {
+    fn test_mm_mask_fnmadd_round_ss() {
         let a = _mm_set1_ps(1.);
         let b = _mm_set1_ps(2.);
         let c = _mm_set1_ps(3.);
@@ -59574,7 +61615,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_maskz_fnmadd_round_ss() {
+    fn test_mm_maskz_fnmadd_round_ss() {
         let a = _mm_set1_ps(1.);
         let b = _mm_set1_ps(2.);
         let c = _mm_set1_ps(3.);
@@ -59591,7 +61632,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask3_fnmadd_round_ss() {
+    fn test_mm_mask3_fnmadd_round_ss() {
         let a = _mm_set1_ps(1.);
         let b = _mm_set1_ps(2.);
         let c = _mm_set1_ps(3.);
@@ -59607,7 +61648,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_fnmadd_round_sd() {
+    fn test_mm_fnmadd_round_sd() {
         let a = _mm_set1_pd(1.);
         let b = _mm_set1_pd(2.);
         let c = _mm_set1_pd(3.);
@@ -59617,7 +61658,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask_fnmadd_round_sd() {
+    fn test_mm_mask_fnmadd_round_sd() {
         let a = _mm_set1_pd(1.);
         let b = _mm_set1_pd(2.);
         let c = _mm_set1_pd(3.);
@@ -59633,7 +61674,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_maskz_fnmadd_round_sd() {
+    fn test_mm_maskz_fnmadd_round_sd() {
         let a = _mm_set1_pd(1.);
         let b = _mm_set1_pd(2.);
         let c = _mm_set1_pd(3.);
@@ -59650,7 +61691,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask3_fnmadd_round_sd() {
+    fn test_mm_mask3_fnmadd_round_sd() {
         let a = _mm_set1_pd(1.);
         let b = _mm_set1_pd(2.);
         let c = _mm_set1_pd(3.);
@@ -59666,7 +61707,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_fnmsub_round_ss() {
+    fn test_mm_fnmsub_round_ss() {
         let a = _mm_set1_ps(1.);
         let b = _mm_set1_ps(2.);
         let c = _mm_set1_ps(3.);
@@ -59676,7 +61717,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask_fnmsub_round_ss() {
+    fn test_mm_mask_fnmsub_round_ss() {
         let a = _mm_set1_ps(1.);
         let b = _mm_set1_ps(2.);
         let c = _mm_set1_ps(3.);
@@ -59692,7 +61733,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_maskz_fnmsub_round_ss() {
+    fn test_mm_maskz_fnmsub_round_ss() {
         let a = _mm_set1_ps(1.);
         let b = _mm_set1_ps(2.);
         let c = _mm_set1_ps(3.);
@@ -59709,7 +61750,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask3_fnmsub_round_ss() {
+    fn test_mm_mask3_fnmsub_round_ss() {
         let a = _mm_set1_ps(1.);
         let b = _mm_set1_ps(2.);
         let c = _mm_set1_ps(3.);
@@ -59725,7 +61766,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_fnmsub_round_sd() {
+    fn test_mm_fnmsub_round_sd() {
         let a = _mm_set1_pd(1.);
         let b = _mm_set1_pd(2.);
         let c = _mm_set1_pd(3.);
@@ -59735,7 +61776,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask_fnmsub_round_sd() {
+    fn test_mm_mask_fnmsub_round_sd() {
         let a = _mm_set1_pd(1.);
         let b = _mm_set1_pd(2.);
         let c = _mm_set1_pd(3.);
@@ -59751,7 +61792,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_maskz_fnmsub_round_sd() {
+    fn test_mm_maskz_fnmsub_round_sd() {
         let a = _mm_set1_pd(1.);
         let b = _mm_set1_pd(2.);
         let c = _mm_set1_pd(3.);
@@ -59768,7 +61809,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask3_fnmsub_round_sd() {
+    fn test_mm_mask3_fnmsub_round_sd() {
         let a = _mm_set1_pd(1.);
         let b = _mm_set1_pd(2.);
         let c = _mm_set1_pd(3.);
@@ -59784,139 +61825,139 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_fixupimm_ss() {
+    fn test_mm_fixupimm_ss() {
         let a = _mm_set_ps(0., 0., 0., f32::NAN);
         let b = _mm_set1_ps(f32::MAX);
         let c = _mm_set1_epi32(i32::MAX);
         let r = _mm_fixupimm_ss::<5>(a, b, c);
-        let e = _mm_set_ps(0., 0., 0., -0.0);
+        let e = _mm_set_ps(f32::MAX, f32::MAX, f32::MAX, -0.0);
         assert_eq_m128(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask_fixupimm_ss() {
+    fn test_mm_mask_fixupimm_ss() {
         let a = _mm_set_ps(0., 0., 0., f32::NAN);
         let b = _mm_set1_ps(f32::MAX);
         let c = _mm_set1_epi32(i32::MAX);
         let r = _mm_mask_fixupimm_ss::<5>(a, 0b11111111, b, c);
-        let e = _mm_set_ps(0., 0., 0., -0.0);
+        let e = _mm_set_ps(f32::MAX, f32::MAX, f32::MAX, -0.0);
         assert_eq_m128(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_maskz_fixupimm_ss() {
+    fn test_mm_maskz_fixupimm_ss() {
         let a = _mm_set_ps(0., 0., 0., f32::NAN);
         let b = _mm_set1_ps(f32::MAX);
         let c = _mm_set1_epi32(i32::MAX);
         let r = _mm_maskz_fixupimm_ss::<5>(0b00000000, a, b, c);
-        let e = _mm_set_ps(0., 0., 0., 0.0);
+        let e = _mm_set_ps(f32::MAX, f32::MAX, f32::MAX, 0.0);
         assert_eq_m128(r, e);
         let r = _mm_maskz_fixupimm_ss::<5>(0b11111111, a, b, c);
-        let e = _mm_set_ps(0., 0., 0., -0.0);
+        let e = _mm_set_ps(f32::MAX, f32::MAX, f32::MAX, -0.0);
         assert_eq_m128(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_fixupimm_sd() {
+    fn test_mm_fixupimm_sd() {
         let a = _mm_set_pd(0., f64::NAN);
         let b = _mm_set1_pd(f64::MAX);
         let c = _mm_set1_epi64x(i32::MAX as i64);
         let r = _mm_fixupimm_sd::<5>(a, b, c);
-        let e = _mm_set_pd(0., -0.0);
+        let e = _mm_set_pd(f64::MAX, -0.0);
         assert_eq_m128d(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask_fixupimm_sd() {
+    fn test_mm_mask_fixupimm_sd() {
         let a = _mm_set_pd(0., f64::NAN);
         let b = _mm_set1_pd(f64::MAX);
         let c = _mm_set1_epi64x(i32::MAX as i64);
         let r = _mm_mask_fixupimm_sd::<5>(a, 0b11111111, b, c);
-        let e = _mm_set_pd(0., -0.0);
+        let e = _mm_set_pd(f64::MAX, -0.0);
         assert_eq_m128d(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_maskz_fixupimm_sd() {
+    fn test_mm_maskz_fixupimm_sd() {
         let a = _mm_set_pd(0., f64::NAN);
         let b = _mm_set1_pd(f64::MAX);
         let c = _mm_set1_epi64x(i32::MAX as i64);
         let r = _mm_maskz_fixupimm_sd::<5>(0b00000000, a, b, c);
-        let e = _mm_set_pd(0., 0.0);
+        let e = _mm_set_pd(f64::MAX, 0.0);
         assert_eq_m128d(r, e);
         let r = _mm_maskz_fixupimm_sd::<5>(0b11111111, a, b, c);
-        let e = _mm_set_pd(0., -0.0);
+        let e = _mm_set_pd(f64::MAX, -0.0);
         assert_eq_m128d(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_fixupimm_round_ss() {
+    fn test_mm_fixupimm_round_ss() {
         let a = _mm_set_ps(1., 0., 0., f32::NAN);
         let b = _mm_set1_ps(f32::MAX);
         let c = _mm_set1_epi32(i32::MAX);
         let r = _mm_fixupimm_round_ss::<5, _MM_FROUND_CUR_DIRECTION>(a, b, c);
-        let e = _mm_set_ps(1., 0., 0., -0.0);
+        let e = _mm_set_ps(f32::MAX, f32::MAX, f32::MAX, -0.0);
         assert_eq_m128(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask_fixupimm_round_ss() {
+    fn test_mm_mask_fixupimm_round_ss() {
         let a = _mm_set_ps(0., 0., 0., f32::NAN);
         let b = _mm_set1_ps(f32::MAX);
         let c = _mm_set1_epi32(i32::MAX);
         let r = _mm_mask_fixupimm_round_ss::<5, _MM_FROUND_CUR_DIRECTION>(a, 0b11111111, b, c);
-        let e = _mm_set_ps(0., 0., 0., -0.0);
+        let e = _mm_set_ps(f32::MAX, f32::MAX, f32::MAX, -0.0);
         assert_eq_m128(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_maskz_fixupimm_round_ss() {
+    fn test_mm_maskz_fixupimm_round_ss() {
         let a = _mm_set_ps(0., 0., 0., f32::NAN);
         let b = _mm_set1_ps(f32::MAX);
         let c = _mm_set1_epi32(i32::MAX);
         let r = _mm_maskz_fixupimm_round_ss::<5, _MM_FROUND_CUR_DIRECTION>(0b00000000, a, b, c);
-        let e = _mm_set_ps(0., 0., 0., 0.0);
+        let e = _mm_set_ps(f32::MAX, f32::MAX, f32::MAX, 0.0);
         assert_eq_m128(r, e);
         let r = _mm_maskz_fixupimm_round_ss::<5, _MM_FROUND_CUR_DIRECTION>(0b11111111, a, b, c);
-        let e = _mm_set_ps(0., 0., 0., -0.0);
+        let e = _mm_set_ps(f32::MAX, f32::MAX, f32::MAX, -0.0);
         assert_eq_m128(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_fixupimm_round_sd() {
+    fn test_mm_fixupimm_round_sd() {
         let a = _mm_set_pd(0., f64::NAN);
         let b = _mm_set1_pd(f64::MAX);
         let c = _mm_set1_epi64x(i32::MAX as i64);
         let r = _mm_fixupimm_round_sd::<5, _MM_FROUND_CUR_DIRECTION>(a, b, c);
-        let e = _mm_set_pd(0., -0.0);
+        let e = _mm_set_pd(f64::MAX, -0.0);
         assert_eq_m128d(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask_fixupimm_round_sd() {
+    fn test_mm_mask_fixupimm_round_sd() {
         let a = _mm_set_pd(0., f64::NAN);
         let b = _mm_set1_pd(f64::MAX);
         let c = _mm_set1_epi64x(i32::MAX as i64);
         let r = _mm_mask_fixupimm_round_sd::<5, _MM_FROUND_CUR_DIRECTION>(a, 0b11111111, b, c);
-        let e = _mm_set_pd(0., -0.0);
+        let e = _mm_set_pd(f64::MAX, -0.0);
         assert_eq_m128d(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_maskz_fixupimm_round_sd() {
+    fn test_mm_maskz_fixupimm_round_sd() {
         let a = _mm_set_pd(0., f64::NAN);
         let b = _mm_set1_pd(f64::MAX);
         let c = _mm_set1_epi64x(i32::MAX as i64);
         let r = _mm_maskz_fixupimm_round_sd::<5, _MM_FROUND_CUR_DIRECTION>(0b00000000, a, b, c);
-        let e = _mm_set_pd(0., 0.0);
+        let e = _mm_set_pd(f64::MAX, 0.0);
         assert_eq_m128d(r, e);
         let r = _mm_maskz_fixupimm_round_sd::<5, _MM_FROUND_CUR_DIRECTION>(0b11111111, a, b, c);
-        let e = _mm_set_pd(0., -0.0);
+        let e = _mm_set_pd(f64::MAX, -0.0);
         assert_eq_m128d(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask_cvtss_sd() {
+    fn test_mm_mask_cvtss_sd() {
         let a = _mm_set_pd(6., -7.5);
         let b = _mm_set_ps(0., -0.5, 1., -1.5);
         let r = _mm_mask_cvtss_sd(a, 0, a, b);
@@ -59927,7 +61968,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_maskz_cvtss_sd() {
+    fn test_mm_maskz_cvtss_sd() {
         let a = _mm_set_pd(6., -7.5);
         let b = _mm_set_ps(0., -0.5, 1., -1.5);
         let r = _mm_maskz_cvtss_sd(0, a, b);
@@ -59939,7 +61980,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask_cvtsd_ss() {
+    fn test_mm_mask_cvtsd_ss() {
         let a = _mm_set_ps(0., -0.5, 1., -1.5);
         let b = _mm_set_pd(6., -7.5);
         let r = _mm_mask_cvtsd_ss(a, 0, a, b);
@@ -59950,7 +61991,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_maskz_cvtsd_ss() {
+    fn test_mm_maskz_cvtsd_ss() {
         let a = _mm_set_ps(0., -0.5, 1., -1.5);
         let b = _mm_set_pd(6., -7.5);
         let r = _mm_maskz_cvtsd_ss(0, a, b);
@@ -59962,7 +62003,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_cvt_roundss_sd() {
+    fn test_mm_cvt_roundss_sd() {
         let a = _mm_set_pd(6., -7.5);
         let b = _mm_set_ps(0., -0.5, 1., -1.5);
         let r = _mm_cvt_roundss_sd::<_MM_FROUND_CUR_DIRECTION>(a, b);
@@ -59971,7 +62012,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask_cvt_roundss_sd() {
+    fn test_mm_mask_cvt_roundss_sd() {
         let a = _mm_set_pd(6., -7.5);
         let b = _mm_set_ps(0., -0.5, 1., -1.5);
         let r = _mm_mask_cvt_roundss_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
@@ -59982,7 +62023,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_maskz_cvt_roundss_sd() {
+    fn test_mm_maskz_cvt_roundss_sd() {
         let a = _mm_set_pd(6., -7.5);
         let b = _mm_set_ps(0., -0.5, 1., -1.5);
         let r = _mm_maskz_cvt_roundss_sd::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
@@ -59994,7 +62035,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_cvt_roundsd_ss() {
+    fn test_mm_cvt_roundsd_ss() {
         let a = _mm_set_ps(0., -0.5, 1., -1.5);
         let b = _mm_set_pd(6., -7.5);
         let r = _mm_cvt_roundsd_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
@@ -60003,7 +62044,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_mask_cvt_roundsd_ss() {
+    fn test_mm_mask_cvt_roundsd_ss() {
         let a = _mm_set_ps(0., -0.5, 1., -1.5);
         let b = _mm_set_pd(6., -7.5);
         let r = _mm_mask_cvt_roundsd_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, 0, a, b);
@@ -60016,7 +62057,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_maskz_cvt_roundsd_ss() {
+    fn test_mm_maskz_cvt_roundsd_ss() {
         let a = _mm_set_ps(0., -0.5, 1., -1.5);
         let b = _mm_set_pd(6., -7.5);
         let r = _mm_maskz_cvt_roundsd_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
@@ -60030,7 +62071,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_cvt_roundss_si32() {
+    fn test_mm_cvt_roundss_si32() {
         let a = _mm_set_ps(0., -0.5, 1., -1.5);
         let r = _mm_cvt_roundss_si32::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a);
         let e: i32 = -1;
@@ -60038,7 +62079,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_cvt_roundss_i32() {
+    fn test_mm_cvt_roundss_i32() {
         let a = _mm_set_ps(0., -0.5, 1., -1.5);
         let r = _mm_cvt_roundss_i32::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a);
         let e: i32 = -1;
@@ -60046,7 +62087,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_cvt_roundss_u32() {
+    fn test_mm_cvt_roundss_u32() {
         let a = _mm_set_ps(0., -0.5, 1., -1.5);
         let r = _mm_cvt_roundss_u32::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a);
         let e: u32 = u32::MAX;
@@ -60054,7 +62095,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_cvtss_i32() {
+    fn test_mm_cvtss_i32() {
         let a = _mm_set_ps(0., -0.5, 1., -1.5);
         let r = _mm_cvtss_i32(a);
         let e: i32 = -2;
@@ -60062,7 +62103,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_cvtss_u32() {
+    fn test_mm_cvtss_u32() {
         let a = _mm_set_ps(0., -0.5, 1., -1.5);
         let r = _mm_cvtss_u32(a);
         let e: u32 = u32::MAX;
@@ -60070,7 +62111,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_cvt_roundsd_si32() {
+    fn test_mm_cvt_roundsd_si32() {
         let a = _mm_set_pd(1., -1.5);
         let r = _mm_cvt_roundsd_si32::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a);
         let e: i32 = -1;
@@ -60078,7 +62119,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_cvt_roundsd_i32() {
+    fn test_mm_cvt_roundsd_i32() {
         let a = _mm_set_pd(1., -1.5);
         let r = _mm_cvt_roundsd_i32::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a);
         let e: i32 = -1;
@@ -60086,7 +62127,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_cvt_roundsd_u32() {
+    fn test_mm_cvt_roundsd_u32() {
         let a = _mm_set_pd(1., -1.5);
         let r = _mm_cvt_roundsd_u32::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a);
         let e: u32 = u32::MAX;
@@ -60094,7 +62135,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_cvtsd_i32() {
+    fn test_mm_cvtsd_i32() {
         let a = _mm_set_pd(1., -1.5);
         let r = _mm_cvtsd_i32(a);
         let e: i32 = -2;
@@ -60102,7 +62143,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_cvtsd_u32() {
+    fn test_mm_cvtsd_u32() {
         let a = _mm_set_pd(1., -1.5);
         let r = _mm_cvtsd_u32(a);
         let e: u32 = u32::MAX;
@@ -60110,7 +62151,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_cvt_roundi32_ss() {
+    fn test_mm_cvt_roundi32_ss() {
         let a = _mm_set_ps(0., -0.5, 1., -1.5);
         let b: i32 = 9;
         let r = _mm_cvt_roundi32_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
@@ -60119,7 +62160,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_cvt_roundsi32_ss() {
+    fn test_mm_cvt_roundsi32_ss() {
         let a = _mm_set_ps(0., -0.5, 1., -1.5);
         let b: i32 = 9;
         let r = _mm_cvt_roundsi32_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
@@ -60128,7 +62169,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_cvt_roundu32_ss() {
+    fn test_mm_cvt_roundu32_ss() {
         let a = _mm_set_ps(0., -0.5, 1., -1.5);
         let b: u32 = 9;
         let r = _mm_cvt_roundu32_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
@@ -60137,7 +62178,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_cvti32_ss() {
+    const fn test_mm_cvti32_ss() {
         let a = _mm_set_ps(0., -0.5, 1., -1.5);
         let b: i32 = 9;
         let r = _mm_cvti32_ss(a, b);
@@ -60146,7 +62187,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_cvti32_sd() {
+    const fn test_mm_cvti32_sd() {
         let a = _mm_set_pd(1., -1.5);
         let b: i32 = 9;
         let r = _mm_cvti32_sd(a, b);
@@ -60155,7 +62196,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_cvtt_roundss_si32() {
+    fn test_mm_cvtt_roundss_si32() {
         let a = _mm_set_ps(0., -0.5, 1., -1.5);
         let r = _mm_cvtt_roundss_si32::<_MM_FROUND_NO_EXC>(a);
         let e: i32 = -1;
@@ -60163,7 +62204,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_cvtt_roundss_i32() {
+    fn test_mm_cvtt_roundss_i32() {
         let a = _mm_set_ps(0., -0.5, 1., -1.5);
         let r = _mm_cvtt_roundss_i32::<_MM_FROUND_NO_EXC>(a);
         let e: i32 = -1;
@@ -60171,7 +62212,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_cvtt_roundss_u32() {
+    fn test_mm_cvtt_roundss_u32() {
         let a = _mm_set_ps(0., -0.5, 1., -1.5);
         let r = _mm_cvtt_roundss_u32::<_MM_FROUND_NO_EXC>(a);
         let e: u32 = u32::MAX;
@@ -60179,7 +62220,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_cvttss_i32() {
+    fn test_mm_cvttss_i32() {
         let a = _mm_set_ps(0., -0.5, 1., -1.5);
         let r = _mm_cvttss_i32(a);
         let e: i32 = -1;
@@ -60187,7 +62228,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_cvttss_u32() {
+    fn test_mm_cvttss_u32() {
         let a = _mm_set_ps(0., -0.5, 1., -1.5);
         let r = _mm_cvttss_u32(a);
         let e: u32 = u32::MAX;
@@ -60195,7 +62236,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_cvtt_roundsd_si32() {
+    fn test_mm_cvtt_roundsd_si32() {
         let a = _mm_set_pd(1., -1.5);
         let r = _mm_cvtt_roundsd_si32::<_MM_FROUND_NO_EXC>(a);
         let e: i32 = -1;
@@ -60203,7 +62244,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_cvtt_roundsd_i32() {
+    fn test_mm_cvtt_roundsd_i32() {
         let a = _mm_set_pd(1., -1.5);
         let r = _mm_cvtt_roundsd_i32::<_MM_FROUND_NO_EXC>(a);
         let e: i32 = -1;
@@ -60211,7 +62252,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_cvtt_roundsd_u32() {
+    fn test_mm_cvtt_roundsd_u32() {
         let a = _mm_set_pd(1., -1.5);
         let r = _mm_cvtt_roundsd_u32::<_MM_FROUND_NO_EXC>(a);
         let e: u32 = u32::MAX;
@@ -60219,7 +62260,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_cvttsd_i32() {
+    fn test_mm_cvttsd_i32() {
         let a = _mm_set_pd(1., -1.5);
         let r = _mm_cvttsd_i32(a);
         let e: i32 = -1;
@@ -60227,7 +62268,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_cvttsd_u32() {
+    fn test_mm_cvttsd_u32() {
         let a = _mm_set_pd(1., -1.5);
         let r = _mm_cvttsd_u32(a);
         let e: u32 = u32::MAX;
@@ -60235,7 +62276,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_cvtu32_ss() {
+    const fn test_mm_cvtu32_ss() {
         let a = _mm_set_ps(0., -0.5, 1., -1.5);
         let b: u32 = 9;
         let r = _mm_cvtu32_ss(a, b);
@@ -60244,7 +62285,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_cvtu32_sd() {
+    const fn test_mm_cvtu32_sd() {
         let a = _mm_set_pd(1., -1.5);
         let b: u32 = 9;
         let r = _mm_cvtu32_sd(a, b);
@@ -60253,7 +62294,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_comi_round_ss() {
+    fn test_mm_comi_round_ss() {
         let a = _mm_set1_ps(2.2);
         let b = _mm_set1_ps(1.1);
         let r = _mm_comi_round_ss::<0, _MM_FROUND_CUR_DIRECTION>(a, b);
@@ -60262,7 +62303,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_comi_round_sd() {
+    fn test_mm_comi_round_sd() {
         let a = _mm_set1_pd(2.2);
         let b = _mm_set1_pd(1.1);
         let r = _mm_comi_round_sd::<0, _MM_FROUND_CUR_DIRECTION>(a, b);
@@ -60271,7 +62312,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cvtsi512_si32() {
+    const fn test_mm512_cvtsi512_si32() {
         let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
         let r = _mm512_cvtsi512_si32(a);
         let e: i32 = 1;
@@ -60279,7 +62320,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cvtss_f32() {
+    const fn test_mm512_cvtss_f32() {
         let a = _mm512_setr_ps(
             312.0134, 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
         );
@@ -60287,13 +62328,13 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cvtsd_f64() {
+    const fn test_mm512_cvtsd_f64() {
         let r = _mm512_cvtsd_f64(_mm512_setr_pd(-1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8));
         assert_eq!(r, -1.1);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_shuffle_pd() {
+    const fn test_mm512_shuffle_pd() {
         let a = _mm512_setr_pd(1., 4., 5., 8., 1., 4., 5., 8.);
         let b = _mm512_setr_pd(2., 3., 6., 7., 2., 3., 6., 7.);
         let r = _mm512_shuffle_pd::<0b11_11_11_11>(a, b);
@@ -60302,7 +62343,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_shuffle_pd() {
+    const fn test_mm512_mask_shuffle_pd() {
         let a = _mm512_setr_pd(1., 4., 5., 8., 1., 4., 5., 8.);
         let b = _mm512_setr_pd(2., 3., 6., 7., 2., 3., 6., 7.);
         let r = _mm512_mask_shuffle_pd::<0b11_11_11_11>(a, 0, a, b);
@@ -60313,7 +62354,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_shuffle_pd() {
+    const fn test_mm512_maskz_shuffle_pd() {
         let a = _mm512_setr_pd(1., 4., 5., 8., 1., 4., 5., 8.);
         let b = _mm512_setr_pd(2., 3., 6., 7., 2., 3., 6., 7.);
         let r = _mm512_maskz_shuffle_pd::<0b11_11_11_11>(0, a, b);
@@ -60324,140 +62365,140 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_expandloadu_epi32() {
+    fn test_mm512_mask_expandloadu_epi32() {
         let src = _mm512_set1_epi32(42);
         let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
         let p = a.as_ptr();
         let m = 0b11101000_11001010;
-        let r = _mm512_mask_expandloadu_epi32(src, m, black_box(p));
+        let r = unsafe { _mm512_mask_expandloadu_epi32(src, m, black_box(p)) };
         let e = _mm512_set_epi32(8, 7, 6, 42, 5, 42, 42, 42, 4, 3, 42, 42, 2, 42, 1, 42);
         assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_expandloadu_epi32() {
+    fn test_mm512_maskz_expandloadu_epi32() {
         let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
         let p = a.as_ptr();
         let m = 0b11101000_11001010;
-        let r = _mm512_maskz_expandloadu_epi32(m, black_box(p));
+        let r = unsafe { _mm512_maskz_expandloadu_epi32(m, black_box(p)) };
         let e = _mm512_set_epi32(8, 7, 6, 0, 5, 0, 0, 0, 4, 3, 0, 0, 2, 0, 1, 0);
         assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_expandloadu_epi32() {
+    fn test_mm256_mask_expandloadu_epi32() {
         let src = _mm256_set1_epi32(42);
         let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8];
         let p = a.as_ptr();
         let m = 0b11101000;
-        let r = _mm256_mask_expandloadu_epi32(src, m, black_box(p));
+        let r = unsafe { _mm256_mask_expandloadu_epi32(src, m, black_box(p)) };
         let e = _mm256_set_epi32(4, 3, 2, 42, 1, 42, 42, 42);
         assert_eq_m256i(r, e);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_expandloadu_epi32() {
+    fn test_mm256_maskz_expandloadu_epi32() {
         let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8];
         let p = a.as_ptr();
         let m = 0b11101000;
-        let r = _mm256_maskz_expandloadu_epi32(m, black_box(p));
+        let r = unsafe { _mm256_maskz_expandloadu_epi32(m, black_box(p)) };
         let e = _mm256_set_epi32(4, 3, 2, 0, 1, 0, 0, 0);
         assert_eq_m256i(r, e);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_expandloadu_epi32() {
+    fn test_mm_mask_expandloadu_epi32() {
         let src = _mm_set1_epi32(42);
         let a = &[1_i32, 2, 3, 4];
         let p = a.as_ptr();
         let m = 0b11111000;
-        let r = _mm_mask_expandloadu_epi32(src, m, black_box(p));
+        let r = unsafe { _mm_mask_expandloadu_epi32(src, m, black_box(p)) };
         let e = _mm_set_epi32(1, 42, 42, 42);
         assert_eq_m128i(r, e);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_expandloadu_epi32() {
+    fn test_mm_maskz_expandloadu_epi32() {
         let a = &[1_i32, 2, 3, 4];
         let p = a.as_ptr();
         let m = 0b11111000;
-        let r = _mm_maskz_expandloadu_epi32(m, black_box(p));
+        let r = unsafe { _mm_maskz_expandloadu_epi32(m, black_box(p)) };
         let e = _mm_set_epi32(1, 0, 0, 0);
         assert_eq_m128i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_expandloadu_epi64() {
+    fn test_mm512_mask_expandloadu_epi64() {
         let src = _mm512_set1_epi64(42);
         let a = &[1_i64, 2, 3, 4, 5, 6, 7, 8];
         let p = a.as_ptr();
         let m = 0b11101000;
-        let r = _mm512_mask_expandloadu_epi64(src, m, black_box(p));
+        let r = unsafe { _mm512_mask_expandloadu_epi64(src, m, black_box(p)) };
         let e = _mm512_set_epi64(4, 3, 2, 42, 1, 42, 42, 42);
         assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_expandloadu_epi64() {
+    fn test_mm512_maskz_expandloadu_epi64() {
         let a = &[1_i64, 2, 3, 4, 5, 6, 7, 8];
         let p = a.as_ptr();
         let m = 0b11101000;
-        let r = _mm512_maskz_expandloadu_epi64(m, black_box(p));
+        let r = unsafe { _mm512_maskz_expandloadu_epi64(m, black_box(p)) };
         let e = _mm512_set_epi64(4, 3, 2, 0, 1, 0, 0, 0);
         assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_expandloadu_epi64() {
+    fn test_mm256_mask_expandloadu_epi64() {
         let src = _mm256_set1_epi64x(42);
         let a = &[1_i64, 2, 3, 4];
         let p = a.as_ptr();
         let m = 0b11101000;
-        let r = _mm256_mask_expandloadu_epi64(src, m, black_box(p));
+        let r = unsafe { _mm256_mask_expandloadu_epi64(src, m, black_box(p)) };
         let e = _mm256_set_epi64x(1, 42, 42, 42);
         assert_eq_m256i(r, e);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_expandloadu_epi64() {
+    fn test_mm256_maskz_expandloadu_epi64() {
         let a = &[1_i64, 2, 3, 4];
         let p = a.as_ptr();
         let m = 0b11101000;
-        let r = _mm256_maskz_expandloadu_epi64(m, black_box(p));
+        let r = unsafe { _mm256_maskz_expandloadu_epi64(m, black_box(p)) };
         let e = _mm256_set_epi64x(1, 0, 0, 0);
         assert_eq_m256i(r, e);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_expandloadu_epi64() {
+    fn test_mm_mask_expandloadu_epi64() {
         let src = _mm_set1_epi64x(42);
         let a = &[1_i64, 2];
         let p = a.as_ptr();
         let m = 0b11101000;
-        let r = _mm_mask_expandloadu_epi64(src, m, black_box(p));
+        let r = unsafe { _mm_mask_expandloadu_epi64(src, m, black_box(p)) };
         let e = _mm_set_epi64x(42, 42);
         assert_eq_m128i(r, e);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_expandloadu_epi64() {
+    fn test_mm_maskz_expandloadu_epi64() {
         let a = &[1_i64, 2];
         let p = a.as_ptr();
         let m = 0b11101000;
-        let r = _mm_maskz_expandloadu_epi64(m, black_box(p));
+        let r = unsafe { _mm_maskz_expandloadu_epi64(m, black_box(p)) };
         let e = _mm_set_epi64x(0, 0);
         assert_eq_m128i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_expandloadu_ps() {
+    fn test_mm512_mask_expandloadu_ps() {
         let src = _mm512_set1_ps(42.);
         let a = &[
             1.0f32, 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
         ];
         let p = a.as_ptr();
         let m = 0b11101000_11001010;
-        let r = _mm512_mask_expandloadu_ps(src, m, black_box(p));
+        let r = unsafe { _mm512_mask_expandloadu_ps(src, m, black_box(p)) };
         let e = _mm512_set_ps(
             8., 7., 6., 42., 5., 42., 42., 42., 4., 3., 42., 42., 2., 42., 1., 42.,
         );
@@ -60465,13 +62506,13 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_expandloadu_ps() {
+    fn test_mm512_maskz_expandloadu_ps() {
         let a = &[
             1.0f32, 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
         ];
         let p = a.as_ptr();
         let m = 0b11101000_11001010;
-        let r = _mm512_maskz_expandloadu_ps(m, black_box(p));
+        let r = unsafe { _mm512_maskz_expandloadu_ps(m, black_box(p)) };
         let e = _mm512_set_ps(
             8., 7., 6., 0., 5., 0., 0., 0., 4., 3., 0., 0., 2., 0., 1., 0.,
         );
@@ -60479,106 +62520,106 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_expandloadu_ps() {
+    fn test_mm256_mask_expandloadu_ps() {
         let src = _mm256_set1_ps(42.);
         let a = &[1.0f32, 2., 3., 4., 5., 6., 7., 8.];
         let p = a.as_ptr();
         let m = 0b11101000;
-        let r = _mm256_mask_expandloadu_ps(src, m, black_box(p));
+        let r = unsafe { _mm256_mask_expandloadu_ps(src, m, black_box(p)) };
         let e = _mm256_set_ps(4., 3., 2., 42., 1., 42., 42., 42.);
         assert_eq_m256(r, e);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_expandloadu_ps() {
+    fn test_mm256_maskz_expandloadu_ps() {
         let a = &[1.0f32, 2., 3., 4., 5., 6., 7., 8.];
         let p = a.as_ptr();
         let m = 0b11101000;
-        let r = _mm256_maskz_expandloadu_ps(m, black_box(p));
+        let r = unsafe { _mm256_maskz_expandloadu_ps(m, black_box(p)) };
         let e = _mm256_set_ps(4., 3., 2., 0., 1., 0., 0., 0.);
         assert_eq_m256(r, e);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_expandloadu_ps() {
+    fn test_mm_mask_expandloadu_ps() {
         let src = _mm_set1_ps(42.);
         let a = &[1.0f32, 2., 3., 4.];
         let p = a.as_ptr();
         let m = 0b11101000;
-        let r = _mm_mask_expandloadu_ps(src, m, black_box(p));
+        let r = unsafe { _mm_mask_expandloadu_ps(src, m, black_box(p)) };
         let e = _mm_set_ps(1., 42., 42., 42.);
         assert_eq_m128(r, e);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_expandloadu_ps() {
+    fn test_mm_maskz_expandloadu_ps() {
         let a = &[1.0f32, 2., 3., 4.];
         let p = a.as_ptr();
         let m = 0b11101000;
-        let r = _mm_maskz_expandloadu_ps(m, black_box(p));
+        let r = unsafe { _mm_maskz_expandloadu_ps(m, black_box(p)) };
         let e = _mm_set_ps(1., 0., 0., 0.);
         assert_eq_m128(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_expandloadu_pd() {
+    fn test_mm512_mask_expandloadu_pd() {
         let src = _mm512_set1_pd(42.);
         let a = &[1.0f64, 2., 3., 4., 5., 6., 7., 8.];
         let p = a.as_ptr();
         let m = 0b11101000;
-        let r = _mm512_mask_expandloadu_pd(src, m, black_box(p));
+        let r = unsafe { _mm512_mask_expandloadu_pd(src, m, black_box(p)) };
         let e = _mm512_set_pd(4., 3., 2., 42., 1., 42., 42., 42.);
         assert_eq_m512d(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_expandloadu_pd() {
+    fn test_mm512_maskz_expandloadu_pd() {
         let a = &[1.0f64, 2., 3., 4., 5., 6., 7., 8.];
         let p = a.as_ptr();
         let m = 0b11101000;
-        let r = _mm512_maskz_expandloadu_pd(m, black_box(p));
+        let r = unsafe { _mm512_maskz_expandloadu_pd(m, black_box(p)) };
         let e = _mm512_set_pd(4., 3., 2., 0., 1., 0., 0., 0.);
         assert_eq_m512d(r, e);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_expandloadu_pd() {
+    fn test_mm256_mask_expandloadu_pd() {
         let src = _mm256_set1_pd(42.);
         let a = &[1.0f64, 2., 3., 4.];
         let p = a.as_ptr();
         let m = 0b11101000;
-        let r = _mm256_mask_expandloadu_pd(src, m, black_box(p));
+        let r = unsafe { _mm256_mask_expandloadu_pd(src, m, black_box(p)) };
         let e = _mm256_set_pd(1., 42., 42., 42.);
         assert_eq_m256d(r, e);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_expandloadu_pd() {
+    fn test_mm256_maskz_expandloadu_pd() {
         let a = &[1.0f64, 2., 3., 4.];
         let p = a.as_ptr();
         let m = 0b11101000;
-        let r = _mm256_maskz_expandloadu_pd(m, black_box(p));
+        let r = unsafe { _mm256_maskz_expandloadu_pd(m, black_box(p)) };
         let e = _mm256_set_pd(1., 0., 0., 0.);
         assert_eq_m256d(r, e);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_expandloadu_pd() {
+    fn test_mm_mask_expandloadu_pd() {
         let src = _mm_set1_pd(42.);
         let a = &[1.0f64, 2.];
         let p = a.as_ptr();
         let m = 0b11101000;
-        let r = _mm_mask_expandloadu_pd(src, m, black_box(p));
+        let r = unsafe { _mm_mask_expandloadu_pd(src, m, black_box(p)) };
         let e = _mm_set_pd(42., 42.);
         assert_eq_m128d(r, e);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_expandloadu_pd() {
+    fn test_mm_maskz_expandloadu_pd() {
         let a = &[1.0f64, 2.];
         let p = a.as_ptr();
         let m = 0b11101000;
-        let r = _mm_maskz_expandloadu_pd(m, black_box(p));
+        let r = unsafe { _mm_maskz_expandloadu_pd(m, black_box(p)) };
         let e = _mm_set_pd(0., 0.);
         assert_eq_m128d(r, e);
     }
diff --git a/crates/core_arch/src/x86/avx512fp16.rs b/crates/core_arch/src/x86/avx512fp16.rs
index 293fda3064..6523e98d0c 100644
--- a/crates/core_arch/src/x86/avx512fp16.rs
+++ b/crates/core_arch/src/x86/avx512fp16.rs
@@ -9,7 +9,8 @@ use crate::ptr;
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm_set_ph(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_set_ph(
     e7: f16,
     e6: f16,
     e5: f16,
@@ -28,7 +29,8 @@ pub fn _mm_set_ph(
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm256_set_ph(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_set_ph(
     e15: f16,
     e14: f16,
     e13: f16,
@@ -57,7 +59,8 @@ pub fn _mm256_set_ph(
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm512_set_ph(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_set_ph(
     e31: f16,
     e30: f16,
     e29: f16,
@@ -104,7 +107,8 @@ pub fn _mm512_set_ph(
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm_set_sh(a: f16) -> __m128h {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_set_sh(a: f16) -> __m128h {
     __m128h([a, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0])
 }
 
@@ -114,7 +118,8 @@ pub fn _mm_set_sh(a: f16) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm_set1_ph(a: f16) -> __m128h {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_set1_ph(a: f16) -> __m128h {
     unsafe { transmute(f16x8::splat(a)) }
 }
 
@@ -124,7 +129,8 @@ pub fn _mm_set1_ph(a: f16) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm256_set1_ph(a: f16) -> __m256h {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_set1_ph(a: f16) -> __m256h {
     unsafe { transmute(f16x16::splat(a)) }
 }
 
@@ -134,7 +140,8 @@ pub fn _mm256_set1_ph(a: f16) -> __m256h {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm512_set1_ph(a: f16) -> __m512h {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_set1_ph(a: f16) -> __m512h {
     unsafe { transmute(f16x32::splat(a)) }
 }
 
@@ -144,7 +151,8 @@ pub fn _mm512_set1_ph(a: f16) -> __m512h {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm_setr_ph(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_setr_ph(
     e0: f16,
     e1: f16,
     e2: f16,
@@ -163,7 +171,8 @@ pub fn _mm_setr_ph(
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm256_setr_ph(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_setr_ph(
     e0: f16,
     e1: f16,
     e2: f16,
@@ -192,7 +201,8 @@ pub fn _mm256_setr_ph(
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm512_setr_ph(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_setr_ph(
     e0: f16,
     e1: f16,
     e2: f16,
@@ -237,8 +247,9 @@ pub fn _mm512_setr_ph(
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setzero_ph)
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm_setzero_ph() -> __m128h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_setzero_ph() -> __m128h {
     unsafe { transmute(f16x8::ZERO) }
 }
 
@@ -247,8 +258,9 @@ pub fn _mm_setzero_ph() -> __m128h {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_setzero_ph)
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm256_setzero_ph() -> __m256h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_setzero_ph() -> __m256h {
     f16x16::ZERO.as_m256h()
 }
 
@@ -257,8 +269,9 @@ pub fn _mm256_setzero_ph() -> __m256h {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_setzero_ph)
 #[inline]
 #[target_feature(enable = "avx512fp16")]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm512_setzero_ph() -> __m512h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_setzero_ph() -> __m512h {
     f16x32::ZERO.as_m512h()
 }
 
@@ -270,8 +283,9 @@ pub fn _mm512_setzero_ph() -> __m512h {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_undefined_ph)
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm_undefined_ph() -> __m128h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_undefined_ph() -> __m128h {
     f16x8::ZERO.as_m128h()
 }
 
@@ -283,8 +297,9 @@ pub fn _mm_undefined_ph() -> __m128h {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_undefined_ph)
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm256_undefined_ph() -> __m256h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_undefined_ph() -> __m256h {
     f16x16::ZERO.as_m256h()
 }
 
@@ -296,8 +311,9 @@ pub fn _mm256_undefined_ph() -> __m256h {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_undefined_ph)
 #[inline]
 #[target_feature(enable = "avx512fp16")]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm512_undefined_ph() -> __m512h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_undefined_ph() -> __m512h {
     f16x32::ZERO.as_m512h()
 }
 
@@ -307,8 +323,9 @@ pub fn _mm512_undefined_ph() -> __m512h {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castpd_ph)
 #[inline]
 #[target_feature(enable = "avx512fp16")]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm_castpd_ph(a: __m128d) -> __m128h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_castpd_ph(a: __m128d) -> __m128h {
     unsafe { transmute(a) }
 }
 
@@ -318,8 +335,9 @@ pub fn _mm_castpd_ph(a: __m128d) -> __m128h {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castpd_ph)
 #[inline]
 #[target_feature(enable = "avx512fp16")]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm256_castpd_ph(a: __m256d) -> __m256h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_castpd_ph(a: __m256d) -> __m256h {
     unsafe { transmute(a) }
 }
 
@@ -329,8 +347,9 @@ pub fn _mm256_castpd_ph(a: __m256d) -> __m256h {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castpd_ph)
 #[inline]
 #[target_feature(enable = "avx512fp16")]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm512_castpd_ph(a: __m512d) -> __m512h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_castpd_ph(a: __m512d) -> __m512h {
     unsafe { transmute(a) }
 }
 
@@ -340,8 +359,9 @@ pub fn _mm512_castpd_ph(a: __m512d) -> __m512h {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castph_pd)
 #[inline]
 #[target_feature(enable = "avx512fp16")]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm_castph_pd(a: __m128h) -> __m128d {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_castph_pd(a: __m128h) -> __m128d {
     unsafe { transmute(a) }
 }
 
@@ -351,8 +371,9 @@ pub fn _mm_castph_pd(a: __m128h) -> __m128d {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castph_pd)
 #[inline]
 #[target_feature(enable = "avx512fp16")]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm256_castph_pd(a: __m256h) -> __m256d {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_castph_pd(a: __m256h) -> __m256d {
     unsafe { transmute(a) }
 }
 
@@ -362,8 +383,9 @@ pub fn _mm256_castph_pd(a: __m256h) -> __m256d {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castph_pd)
 #[inline]
 #[target_feature(enable = "avx512fp16")]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm512_castph_pd(a: __m512h) -> __m512d {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_castph_pd(a: __m512h) -> __m512d {
     unsafe { transmute(a) }
 }
 
@@ -373,8 +395,9 @@ pub fn _mm512_castph_pd(a: __m512h) -> __m512d {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castps_ph)
 #[inline]
 #[target_feature(enable = "avx512fp16")]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm_castps_ph(a: __m128) -> __m128h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_castps_ph(a: __m128) -> __m128h {
     unsafe { transmute(a) }
 }
 
@@ -384,8 +407,9 @@ pub fn _mm_castps_ph(a: __m128) -> __m128h {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castps_ph)
 #[inline]
 #[target_feature(enable = "avx512fp16")]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm256_castps_ph(a: __m256) -> __m256h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_castps_ph(a: __m256) -> __m256h {
     unsafe { transmute(a) }
 }
 
@@ -395,8 +419,9 @@ pub fn _mm256_castps_ph(a: __m256) -> __m256h {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castps_ph)
 #[inline]
 #[target_feature(enable = "avx512fp16")]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm512_castps_ph(a: __m512) -> __m512h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_castps_ph(a: __m512) -> __m512h {
     unsafe { transmute(a) }
 }
 
@@ -406,8 +431,9 @@ pub fn _mm512_castps_ph(a: __m512) -> __m512h {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castph_ps)
 #[inline]
 #[target_feature(enable = "avx512fp16")]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm_castph_ps(a: __m128h) -> __m128 {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_castph_ps(a: __m128h) -> __m128 {
     unsafe { transmute(a) }
 }
 
@@ -417,8 +443,9 @@ pub fn _mm_castph_ps(a: __m128h) -> __m128 {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castph_ps)
 #[inline]
 #[target_feature(enable = "avx512fp16")]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm256_castph_ps(a: __m256h) -> __m256 {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_castph_ps(a: __m256h) -> __m256 {
     unsafe { transmute(a) }
 }
 
@@ -428,8 +455,9 @@ pub fn _mm256_castph_ps(a: __m256h) -> __m256 {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castph_ps)
 #[inline]
 #[target_feature(enable = "avx512fp16")]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm512_castph_ps(a: __m512h) -> __m512 {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_castph_ps(a: __m512h) -> __m512 {
     unsafe { transmute(a) }
 }
 
@@ -439,8 +467,9 @@ pub fn _mm512_castph_ps(a: __m512h) -> __m512 {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castsi128_ph)
 #[inline]
 #[target_feature(enable = "avx512fp16")]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm_castsi128_ph(a: __m128i) -> __m128h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_castsi128_ph(a: __m128i) -> __m128h {
     unsafe { transmute(a) }
 }
 
@@ -450,8 +479,9 @@ pub fn _mm_castsi128_ph(a: __m128i) -> __m128h {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castsi256_ph)
 #[inline]
 #[target_feature(enable = "avx512fp16")]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm256_castsi256_ph(a: __m256i) -> __m256h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_castsi256_ph(a: __m256i) -> __m256h {
     unsafe { transmute(a) }
 }
 
@@ -461,8 +491,9 @@ pub fn _mm256_castsi256_ph(a: __m256i) -> __m256h {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castsi512_ph)
 #[inline]
 #[target_feature(enable = "avx512fp16")]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm512_castsi512_ph(a: __m512i) -> __m512h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_castsi512_ph(a: __m512i) -> __m512h {
     unsafe { transmute(a) }
 }
 
@@ -472,8 +503,9 @@ pub fn _mm512_castsi512_ph(a: __m512i) -> __m512h {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castph_si128)
 #[inline]
 #[target_feature(enable = "avx512fp16")]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm_castph_si128(a: __m128h) -> __m128i {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_castph_si128(a: __m128h) -> __m128i {
     unsafe { transmute(a) }
 }
 
@@ -483,8 +515,9 @@ pub fn _mm_castph_si128(a: __m128h) -> __m128i {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castph_si256)
 #[inline]
 #[target_feature(enable = "avx512fp16")]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm256_castph_si256(a: __m256h) -> __m256i {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_castph_si256(a: __m256h) -> __m256i {
     unsafe { transmute(a) }
 }
 
@@ -494,8 +527,9 @@ pub fn _mm256_castph_si256(a: __m256h) -> __m256i {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castph_si512)
 #[inline]
 #[target_feature(enable = "avx512fp16")]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm512_castph_si512(a: __m512h) -> __m512i {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_castph_si512(a: __m512h) -> __m512i {
     unsafe { transmute(a) }
 }
 
@@ -505,8 +539,9 @@ pub fn _mm512_castph_si512(a: __m512h) -> __m512i {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castph256_ph128)
 #[inline]
 #[target_feature(enable = "avx512fp16")]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm256_castph256_ph128(a: __m256h) -> __m128h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_castph256_ph128(a: __m256h) -> __m128h {
     unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]) }
 }
 
@@ -516,8 +551,9 @@ pub fn _mm256_castph256_ph128(a: __m256h) -> __m128h {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castph512_ph128)
 #[inline]
 #[target_feature(enable = "avx512fp16")]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm512_castph512_ph128(a: __m512h) -> __m128h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_castph512_ph128(a: __m512h) -> __m128h {
     unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]) }
 }
 
@@ -527,8 +563,9 @@ pub fn _mm512_castph512_ph128(a: __m512h) -> __m128h {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castph512_ph256)
 #[inline]
 #[target_feature(enable = "avx512fp16")]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm512_castph512_ph256(a: __m512h) -> __m256h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_castph512_ph256(a: __m512h) -> __m256h {
     unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) }
 }
 
@@ -539,8 +576,9 @@ pub fn _mm512_castph512_ph256(a: __m512h) -> __m256h {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_castph128_ph256)
 #[inline]
 #[target_feature(enable = "avx512fp16")]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm256_castph128_ph256(a: __m128h) -> __m256h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_castph128_ph256(a: __m128h) -> __m256h {
     unsafe {
         simd_shuffle!(
             a,
@@ -557,8 +595,9 @@ pub fn _mm256_castph128_ph256(a: __m128h) -> __m256h {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castph128_ph512)
 #[inline]
 #[target_feature(enable = "avx512fp16")]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm512_castph128_ph512(a: __m128h) -> __m512h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_castph128_ph512(a: __m128h) -> __m512h {
     unsafe {
         simd_shuffle!(
             a,
@@ -578,8 +617,9 @@ pub fn _mm512_castph128_ph512(a: __m128h) -> __m512h {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_castph256_ph512)
 #[inline]
 #[target_feature(enable = "avx512fp16")]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm512_castph256_ph512(a: __m256h) -> __m512h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_castph256_ph512(a: __m256h) -> __m512h {
     unsafe {
         simd_shuffle!(
             a,
@@ -599,8 +639,9 @@ pub fn _mm512_castph256_ph512(a: __m256h) -> __m512h {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_zextph128_ph256)
 #[inline]
 #[target_feature(enable = "avx512fp16")]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm256_zextph128_ph256(a: __m128h) -> __m256h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_zextph128_ph256(a: __m128h) -> __m256h {
     unsafe {
         simd_shuffle!(
             a,
@@ -617,8 +658,9 @@ pub fn _mm256_zextph128_ph256(a: __m128h) -> __m256h {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_zextph256_ph512)
 #[inline]
 #[target_feature(enable = "avx512fp16")]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm512_zextph256_ph512(a: __m256h) -> __m512h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_zextph256_ph512(a: __m256h) -> __m512h {
     unsafe {
         simd_shuffle!(
             a,
@@ -638,8 +680,9 @@ pub fn _mm512_zextph256_ph512(a: __m256h) -> __m512h {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_zextph128_ph512)
 #[inline]
 #[target_feature(enable = "avx512fp16")]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm512_zextph128_ph512(a: __m128h) -> __m512h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_zextph128_ph512(a: __m128h) -> __m512h {
     unsafe {
         simd_shuffle!(
             a,
@@ -652,34 +695,6 @@ pub fn _mm512_zextph128_ph512(a: __m128h) -> __m512h {
     }
 }
 
-macro_rules! cmp_asm { // FIXME: use LLVM intrinsics
-    ($mask_type: ty, $reg: ident, $a: expr, $b: expr) => {{
-        let dst: $mask_type;
-        asm!(
-            "vcmpph {k}, {a}, {b}, {imm8}",
-            k = lateout(kreg) dst,
-            a = in($reg) $a,
-            b = in($reg) $b,
-            imm8 = const IMM5,
-            options(pure, nomem, nostack)
-        );
-        dst
-    }};
-    ($mask_type: ty, $mask: expr, $reg: ident, $a: expr, $b: expr) => {{
-        let dst: $mask_type;
-        asm!(
-            "vcmpph {k} {{ {mask} }}, {a}, {b}, {imm8}",
-            k = lateout(kreg) dst,
-            mask = in(kreg) $mask,
-            a = in($reg) $a,
-            b = in($reg) $b,
-            imm8 = const IMM5,
-            options(pure, nomem, nostack)
-        );
-        dst
-    }};
-}
-
 /// Compare packed half-precision (16-bit) floating-point elements in a and b based on the comparison
 /// operand specified by imm8, and store the results in mask vector k.
 ///
@@ -687,12 +702,9 @@ macro_rules! cmp_asm { // FIXME: use LLVM intrinsics
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_cmp_ph_mask<const IMM5: i32>(a: __m128h, b: __m128h) -> __mmask8 {
-    unsafe {
-        static_assert_uimm_bits!(IMM5, 5);
-        cmp_asm!(__mmask8, xmm_reg, a, b)
-    }
+    _mm_mask_cmp_ph_mask::<IMM5>(!0, a, b)
 }
 
 /// Compare packed half-precision (16-bit) floating-point elements in a and b based on the comparison
@@ -703,11 +715,11 @@ pub fn _mm_cmp_ph_mask<const IMM5: i32>(a: __m128h, b: __m128h) -> __mmask8 {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask_cmp_ph_mask<const IMM5: i32>(k1: __mmask8, a: __m128h, b: __m128h) -> __mmask8 {
     unsafe {
         static_assert_uimm_bits!(IMM5, 5);
-        cmp_asm!(__mmask8, k1, xmm_reg, a, b)
+        vcmpph_128(a, b, IMM5, k1)
     }
 }
 
@@ -718,12 +730,9 @@ pub fn _mm_mask_cmp_ph_mask<const IMM5: i32>(k1: __mmask8, a: __m128h, b: __m128
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_cmp_ph_mask<const IMM5: i32>(a: __m256h, b: __m256h) -> __mmask16 {
-    unsafe {
-        static_assert_uimm_bits!(IMM5, 5);
-        cmp_asm!(__mmask16, ymm_reg, a, b)
-    }
+    _mm256_mask_cmp_ph_mask::<IMM5>(!0, a, b)
 }
 
 /// Compare packed half-precision (16-bit) floating-point elements in a and b based on the comparison
@@ -734,7 +743,7 @@ pub fn _mm256_cmp_ph_mask<const IMM5: i32>(a: __m256h, b: __m256h) -> __mmask16
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_mask_cmp_ph_mask<const IMM5: i32>(
     k1: __mmask16,
     a: __m256h,
@@ -742,7 +751,7 @@ pub fn _mm256_mask_cmp_ph_mask<const IMM5: i32>(
 ) -> __mmask16 {
     unsafe {
         static_assert_uimm_bits!(IMM5, 5);
-        cmp_asm!(__mmask16, k1, ymm_reg, a, b)
+        vcmpph_256(a, b, IMM5, k1)
     }
 }
 
@@ -753,12 +762,9 @@ pub fn _mm256_mask_cmp_ph_mask<const IMM5: i32>(
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_cmp_ph_mask<const IMM5: i32>(a: __m512h, b: __m512h) -> __mmask32 {
-    unsafe {
-        static_assert_uimm_bits!(IMM5, 5);
-        cmp_asm!(__mmask32, zmm_reg, a, b)
-    }
+    _mm512_mask_cmp_ph_mask::<IMM5>(!0, a, b)
 }
 
 /// Compare packed half-precision (16-bit) floating-point elements in a and b based on the comparison
@@ -769,16 +775,13 @@ pub fn _mm512_cmp_ph_mask<const IMM5: i32>(a: __m512h, b: __m512h) -> __mmask32
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mask_cmp_ph_mask<const IMM5: i32>(
     k1: __mmask32,
     a: __m512h,
     b: __m512h,
 ) -> __mmask32 {
-    unsafe {
-        static_assert_uimm_bits!(IMM5, 5);
-        cmp_asm!(__mmask32, k1, zmm_reg, a, b)
-    }
+    _mm512_mask_cmp_round_ph_mask::<IMM5, _MM_FROUND_CUR_DIRECTION>(k1, a, b)
 }
 
 /// Compare packed half-precision (16-bit) floating-point elements in a and b based on the comparison
@@ -790,29 +793,12 @@ pub fn _mm512_mask_cmp_ph_mask<const IMM5: i32>(
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[rustc_legacy_const_generics(2, 3)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_cmp_round_ph_mask<const IMM5: i32, const SAE: i32>(
     a: __m512h,
     b: __m512h,
 ) -> __mmask32 {
-    unsafe {
-        static_assert_uimm_bits!(IMM5, 5);
-        static_assert_sae!(SAE);
-        if SAE == _MM_FROUND_NO_EXC {
-            let dst: __mmask32;
-            asm!(
-                "vcmpph {k}, {a}, {b}, {{sae}}, {imm8}",
-                k = lateout(kreg) dst,
-                a = in(zmm_reg) a,
-                b = in(zmm_reg) b,
-                imm8 = const IMM5,
-                options(pure, nomem, nostack)
-            );
-            dst
-        } else {
-            cmp_asm!(__mmask32, zmm_reg, a, b)
-        }
-    }
+    _mm512_mask_cmp_round_ph_mask::<IMM5, SAE>(!0, a, b)
 }
 
 /// Compare packed half-precision (16-bit) floating-point elements in a and b based on the comparison
@@ -825,7 +811,7 @@ pub fn _mm512_cmp_round_ph_mask<const IMM5: i32, const SAE: i32>(
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[rustc_legacy_const_generics(3, 4)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mask_cmp_round_ph_mask<const IMM5: i32, const SAE: i32>(
     k1: __mmask32,
     a: __m512h,
@@ -834,21 +820,7 @@ pub fn _mm512_mask_cmp_round_ph_mask<const IMM5: i32, const SAE: i32>(
     unsafe {
         static_assert_uimm_bits!(IMM5, 5);
         static_assert_sae!(SAE);
-        if SAE == _MM_FROUND_NO_EXC {
-            let dst: __mmask32;
-            asm!(
-                "vcmpph {k} {{{k1}}}, {a}, {b}, {{sae}}, {imm8}",
-                k = lateout(kreg) dst,
-                k1 = in(kreg) k1,
-                a = in(zmm_reg) a,
-                b = in(zmm_reg) b,
-                imm8 = const IMM5,
-                options(pure, nomem, nostack)
-            );
-            dst
-        } else {
-            cmp_asm!(__mmask32, k1, zmm_reg, a, b)
-        }
+        vcmpph_512(a, b, IMM5, k1, SAE)
     }
 }
 
@@ -860,7 +832,7 @@ pub fn _mm512_mask_cmp_round_ph_mask<const IMM5: i32, const SAE: i32>(
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[rustc_legacy_const_generics(2, 3)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_cmp_round_sh_mask<const IMM5: i32, const SAE: i32>(a: __m128h, b: __m128h) -> __mmask8 {
     static_assert_uimm_bits!(IMM5, 5);
     static_assert_sae!(SAE);
@@ -875,7 +847,7 @@ pub fn _mm_cmp_round_sh_mask<const IMM5: i32, const SAE: i32>(a: __m128h, b: __m
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[rustc_legacy_const_generics(3, 4)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask_cmp_round_sh_mask<const IMM5: i32, const SAE: i32>(
     k1: __mmask8,
     a: __m128h,
@@ -895,7 +867,7 @@ pub fn _mm_mask_cmp_round_sh_mask<const IMM5: i32, const SAE: i32>(
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_cmp_sh_mask<const IMM5: i32>(a: __m128h, b: __m128h) -> __mmask8 {
     static_assert_uimm_bits!(IMM5, 5);
     _mm_cmp_round_sh_mask::<IMM5, _MM_FROUND_CUR_DIRECTION>(a, b)
@@ -908,7 +880,7 @@ pub fn _mm_cmp_sh_mask<const IMM5: i32>(a: __m128h, b: __m128h) -> __mmask8 {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask_cmp_sh_mask<const IMM5: i32>(k1: __mmask8, a: __m128h, b: __m128h) -> __mmask8 {
     static_assert_uimm_bits!(IMM5, 5);
     _mm_mask_cmp_round_sh_mask::<IMM5, _MM_FROUND_CUR_DIRECTION>(k1, a, b)
@@ -922,7 +894,7 @@ pub fn _mm_mask_cmp_sh_mask<const IMM5: i32>(k1: __mmask8, a: __m128h, b: __m128
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[rustc_legacy_const_generics(2, 3)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_comi_round_sh<const IMM5: i32, const SAE: i32>(a: __m128h, b: __m128h) -> i32 {
     unsafe {
         static_assert_uimm_bits!(IMM5, 5);
@@ -938,7 +910,7 @@ pub fn _mm_comi_round_sh<const IMM5: i32, const SAE: i32>(a: __m128h, b: __m128h
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_comi_sh<const IMM5: i32>(a: __m128h, b: __m128h) -> i32 {
     static_assert_uimm_bits!(IMM5, 5);
     _mm_comi_round_sh::<IMM5, _MM_FROUND_CUR_DIRECTION>(a, b)
@@ -950,7 +922,7 @@ pub fn _mm_comi_sh<const IMM5: i32>(a: __m128h, b: __m128h) -> i32 {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comieq_sh)
 #[inline]
 #[target_feature(enable = "avx512fp16")]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_comieq_sh(a: __m128h, b: __m128h) -> i32 {
     _mm_comi_sh::<_CMP_EQ_OS>(a, b)
 }
@@ -961,7 +933,7 @@ pub fn _mm_comieq_sh(a: __m128h, b: __m128h) -> i32 {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comige_sh)
 #[inline]
 #[target_feature(enable = "avx512fp16")]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_comige_sh(a: __m128h, b: __m128h) -> i32 {
     _mm_comi_sh::<_CMP_GE_OS>(a, b)
 }
@@ -972,7 +944,7 @@ pub fn _mm_comige_sh(a: __m128h, b: __m128h) -> i32 {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comigt_sh)
 #[inline]
 #[target_feature(enable = "avx512fp16")]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_comigt_sh(a: __m128h, b: __m128h) -> i32 {
     _mm_comi_sh::<_CMP_GT_OS>(a, b)
 }
@@ -983,7 +955,7 @@ pub fn _mm_comigt_sh(a: __m128h, b: __m128h) -> i32 {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comile_sh)
 #[inline]
 #[target_feature(enable = "avx512fp16")]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_comile_sh(a: __m128h, b: __m128h) -> i32 {
     _mm_comi_sh::<_CMP_LE_OS>(a, b)
 }
@@ -994,7 +966,7 @@ pub fn _mm_comile_sh(a: __m128h, b: __m128h) -> i32 {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comilt_sh)
 #[inline]
 #[target_feature(enable = "avx512fp16")]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_comilt_sh(a: __m128h, b: __m128h) -> i32 {
     _mm_comi_sh::<_CMP_LT_OS>(a, b)
 }
@@ -1005,9 +977,9 @@ pub fn _mm_comilt_sh(a: __m128h, b: __m128h) -> i32 {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comineq_sh)
 #[inline]
 #[target_feature(enable = "avx512fp16")]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_comineq_sh(a: __m128h, b: __m128h) -> i32 {
-    _mm_comi_sh::<_CMP_NEQ_OS>(a, b)
+    _mm_comi_sh::<_CMP_NEQ_US>(a, b)
 }
 
 /// Compare the lower half-precision (16-bit) floating-point elements in a and b for equality, and
@@ -1016,7 +988,7 @@ pub fn _mm_comineq_sh(a: __m128h, b: __m128h) -> i32 {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomieq_sh)
 #[inline]
 #[target_feature(enable = "avx512fp16")]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_ucomieq_sh(a: __m128h, b: __m128h) -> i32 {
     _mm_comi_sh::<_CMP_EQ_OQ>(a, b)
 }
@@ -1027,7 +999,7 @@ pub fn _mm_ucomieq_sh(a: __m128h, b: __m128h) -> i32 {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomige_sh)
 #[inline]
 #[target_feature(enable = "avx512fp16")]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_ucomige_sh(a: __m128h, b: __m128h) -> i32 {
     _mm_comi_sh::<_CMP_GE_OQ>(a, b)
 }
@@ -1038,7 +1010,7 @@ pub fn _mm_ucomige_sh(a: __m128h, b: __m128h) -> i32 {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomigt_sh)
 #[inline]
 #[target_feature(enable = "avx512fp16")]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_ucomigt_sh(a: __m128h, b: __m128h) -> i32 {
     _mm_comi_sh::<_CMP_GT_OQ>(a, b)
 }
@@ -1049,7 +1021,7 @@ pub fn _mm_ucomigt_sh(a: __m128h, b: __m128h) -> i32 {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomile_sh)
 #[inline]
 #[target_feature(enable = "avx512fp16")]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_ucomile_sh(a: __m128h, b: __m128h) -> i32 {
     _mm_comi_sh::<_CMP_LE_OQ>(a, b)
 }
@@ -1060,7 +1032,7 @@ pub fn _mm_ucomile_sh(a: __m128h, b: __m128h) -> i32 {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomilt_sh)
 #[inline]
 #[target_feature(enable = "avx512fp16")]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_ucomilt_sh(a: __m128h, b: __m128h) -> i32 {
     _mm_comi_sh::<_CMP_LT_OQ>(a, b)
 }
@@ -1071,9 +1043,9 @@ pub fn _mm_ucomilt_sh(a: __m128h, b: __m128h) -> i32 {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomineq_sh)
 #[inline]
 #[target_feature(enable = "avx512fp16")]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_ucomineq_sh(a: __m128h, b: __m128h) -> i32 {
-    _mm_comi_sh::<_CMP_NEQ_OQ>(a, b)
+    _mm_comi_sh::<_CMP_NEQ_UQ>(a, b)
 }
 
 /// Load 128-bits (composed of 8 packed half-precision (16-bit) floating-point elements) from memory into
@@ -1083,7 +1055,8 @@ pub fn _mm_ucomineq_sh(a: __m128h, b: __m128h) -> i32 {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub unsafe fn _mm_load_ph(mem_addr: *const f16) -> __m128h {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_load_ph(mem_addr: *const f16) -> __m128h {
     *mem_addr.cast()
 }
 
@@ -1094,7 +1067,8 @@ pub unsafe fn _mm_load_ph(mem_addr: *const f16) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub unsafe fn _mm256_load_ph(mem_addr: *const f16) -> __m256h {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm256_load_ph(mem_addr: *const f16) -> __m256h {
     *mem_addr.cast()
 }
 
@@ -1105,7 +1079,8 @@ pub unsafe fn _mm256_load_ph(mem_addr: *const f16) -> __m256h {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub unsafe fn _mm512_load_ph(mem_addr: *const f16) -> __m512h {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm512_load_ph(mem_addr: *const f16) -> __m512h {
     *mem_addr.cast()
 }
 
@@ -1116,7 +1091,8 @@ pub unsafe fn _mm512_load_ph(mem_addr: *const f16) -> __m512h {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub unsafe fn _mm_load_sh(mem_addr: *const f16) -> __m128h {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_load_sh(mem_addr: *const f16) -> __m128h {
     _mm_set_sh(*mem_addr)
 }
 
@@ -1165,7 +1141,8 @@ pub unsafe fn _mm_maskz_load_sh(k: __mmask8, mem_addr: *const f16) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub unsafe fn _mm_loadu_ph(mem_addr: *const f16) -> __m128h {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_loadu_ph(mem_addr: *const f16) -> __m128h {
     ptr::read_unaligned(mem_addr.cast())
 }
 
@@ -1176,7 +1153,8 @@ pub unsafe fn _mm_loadu_ph(mem_addr: *const f16) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub unsafe fn _mm256_loadu_ph(mem_addr: *const f16) -> __m256h {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm256_loadu_ph(mem_addr: *const f16) -> __m256h {
     ptr::read_unaligned(mem_addr.cast())
 }
 
@@ -1187,7 +1165,8 @@ pub unsafe fn _mm256_loadu_ph(mem_addr: *const f16) -> __m256h {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub unsafe fn _mm512_loadu_ph(mem_addr: *const f16) -> __m512h {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm512_loadu_ph(mem_addr: *const f16) -> __m512h {
     ptr::read_unaligned(mem_addr.cast())
 }
 
@@ -1198,8 +1177,9 @@ pub unsafe fn _mm512_loadu_ph(mem_addr: *const f16) -> __m512h {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_move_sh)
 #[inline]
 #[target_feature(enable = "avx512fp16")]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm_mask_move_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_move_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
     unsafe {
         let mut mov: f16 = simd_extract!(src, 0);
         if (k & 1) != 0 {
@@ -1216,8 +1196,9 @@ pub fn _mm_mask_move_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_move_sh)
 #[inline]
 #[target_feature(enable = "avx512fp16")]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm_maskz_move_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_move_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
     unsafe {
         let mut mov: f16 = 0.;
         if (k & 1) != 0 {
@@ -1233,8 +1214,9 @@ pub fn _mm_maskz_move_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_move_sh)
 #[inline]
 #[target_feature(enable = "avx512fp16")]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm_move_sh(a: __m128h, b: __m128h) -> __m128h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_move_sh(a: __m128h, b: __m128h) -> __m128h {
     unsafe {
         let mov: f16 = simd_extract!(b, 0);
         simd_insert!(a, 0, mov)
@@ -1248,7 +1230,8 @@ pub fn _mm_move_sh(a: __m128h, b: __m128h) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub unsafe fn _mm_store_ph(mem_addr: *mut f16, a: __m128h) {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_store_ph(mem_addr: *mut f16, a: __m128h) {
     *mem_addr.cast() = a;
 }
 
@@ -1259,7 +1242,8 @@ pub unsafe fn _mm_store_ph(mem_addr: *mut f16, a: __m128h) {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub unsafe fn _mm256_store_ph(mem_addr: *mut f16, a: __m256h) {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm256_store_ph(mem_addr: *mut f16, a: __m256h) {
     *mem_addr.cast() = a;
 }
 
@@ -1270,7 +1254,8 @@ pub unsafe fn _mm256_store_ph(mem_addr: *mut f16, a: __m256h) {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub unsafe fn _mm512_store_ph(mem_addr: *mut f16, a: __m512h) {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm512_store_ph(mem_addr: *mut f16, a: __m512h) {
     *mem_addr.cast() = a;
 }
 
@@ -1280,7 +1265,8 @@ pub unsafe fn _mm512_store_ph(mem_addr: *mut f16, a: __m512h) {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub unsafe fn _mm_store_sh(mem_addr: *mut f16, a: __m128h) {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_store_sh(mem_addr: *mut f16, a: __m128h) {
     *mem_addr = simd_extract!(a, 0);
 }
 
@@ -1307,7 +1293,8 @@ pub unsafe fn _mm_mask_store_sh(mem_addr: *mut f16, k: __mmask8, a: __m128h) {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub unsafe fn _mm_storeu_ph(mem_addr: *mut f16, a: __m128h) {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_storeu_ph(mem_addr: *mut f16, a: __m128h) {
     ptr::write_unaligned(mem_addr.cast(), a);
 }
 
@@ -1318,7 +1305,8 @@ pub unsafe fn _mm_storeu_ph(mem_addr: *mut f16, a: __m128h) {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub unsafe fn _mm256_storeu_ph(mem_addr: *mut f16, a: __m256h) {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm256_storeu_ph(mem_addr: *mut f16, a: __m256h) {
     ptr::write_unaligned(mem_addr.cast(), a);
 }
 
@@ -1329,7 +1317,8 @@ pub unsafe fn _mm256_storeu_ph(mem_addr: *mut f16, a: __m256h) {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub unsafe fn _mm512_storeu_ph(mem_addr: *mut f16, a: __m512h) {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm512_storeu_ph(mem_addr: *mut f16, a: __m512h) {
     ptr::write_unaligned(mem_addr.cast(), a);
 }
 
@@ -1339,8 +1328,9 @@ pub unsafe fn _mm512_storeu_ph(mem_addr: *mut f16, a: __m512h) {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vaddph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm_add_ph(a: __m128h, b: __m128h) -> __m128h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_add_ph(a: __m128h, b: __m128h) -> __m128h {
     unsafe { simd_add(a, b) }
 }
 
@@ -1351,8 +1341,9 @@ pub fn _mm_add_ph(a: __m128h, b: __m128h) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vaddph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm_mask_add_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_add_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
     unsafe {
         let r = _mm_add_ph(a, b);
         simd_select_bitmask(k, r, src)
@@ -1366,8 +1357,9 @@ pub fn _mm_mask_add_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vaddph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm_maskz_add_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_add_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
     unsafe {
         let r = _mm_add_ph(a, b);
         simd_select_bitmask(k, r, _mm_setzero_ph())
@@ -1380,8 +1372,9 @@ pub fn _mm_maskz_add_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vaddph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm256_add_ph(a: __m256h, b: __m256h) -> __m256h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_add_ph(a: __m256h, b: __m256h) -> __m256h {
     unsafe { simd_add(a, b) }
 }
 
@@ -1392,8 +1385,9 @@ pub fn _mm256_add_ph(a: __m256h, b: __m256h) -> __m256h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vaddph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm256_mask_add_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_add_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
     unsafe {
         let r = _mm256_add_ph(a, b);
         simd_select_bitmask(k, r, src)
@@ -1407,8 +1401,9 @@ pub fn _mm256_mask_add_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) ->
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vaddph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm256_maskz_add_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_add_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
     unsafe {
         let r = _mm256_add_ph(a, b);
         simd_select_bitmask(k, r, _mm256_setzero_ph())
@@ -1421,8 +1416,9 @@ pub fn _mm256_maskz_add_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vaddph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm512_add_ph(a: __m512h, b: __m512h) -> __m512h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_add_ph(a: __m512h, b: __m512h) -> __m512h {
     unsafe { simd_add(a, b) }
 }
 
@@ -1433,8 +1429,9 @@ pub fn _mm512_add_ph(a: __m512h, b: __m512h) -> __m512h {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vaddph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm512_mask_add_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_add_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
     unsafe {
         let r = _mm512_add_ph(a, b);
         simd_select_bitmask(k, r, src)
@@ -1448,8 +1445,9 @@ pub fn _mm512_mask_add_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) ->
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vaddph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm512_maskz_add_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_add_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
     unsafe {
         let r = _mm512_add_ph(a, b);
         simd_select_bitmask(k, r, _mm512_setzero_ph())
@@ -1470,7 +1468,7 @@ pub fn _mm512_maskz_add_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vaddph, ROUNDING = 8))]
 #[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_add_round_ph<const ROUNDING: i32>(a: __m512h, b: __m512h) -> __m512h {
     unsafe {
         static_assert_rounding!(ROUNDING);
@@ -1493,7 +1491,7 @@ pub fn _mm512_add_round_ph<const ROUNDING: i32>(a: __m512h, b: __m512h) -> __m51
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vaddph, ROUNDING = 8))]
 #[rustc_legacy_const_generics(4)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mask_add_round_ph<const ROUNDING: i32>(
     src: __m512h,
     k: __mmask32,
@@ -1521,7 +1519,7 @@ pub fn _mm512_mask_add_round_ph<const ROUNDING: i32>(
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vaddph, ROUNDING = 8))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_maskz_add_round_ph<const ROUNDING: i32>(
     k: __mmask32,
     a: __m512h,
@@ -1549,7 +1547,7 @@ pub fn _mm512_maskz_add_round_ph<const ROUNDING: i32>(
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vaddsh, ROUNDING = 8))]
 #[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_add_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128h {
     static_assert_rounding!(ROUNDING);
     _mm_mask_add_round_sh::<ROUNDING>(f16x8::ZERO.as_m128h(), 0xff, a, b)
@@ -1571,7 +1569,7 @@ pub fn _mm_add_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128h
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vaddsh, ROUNDING = 8))]
 #[rustc_legacy_const_generics(4)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask_add_round_sh<const ROUNDING: i32>(
     src: __m128h,
     k: __mmask8,
@@ -1600,7 +1598,7 @@ pub fn _mm_mask_add_round_sh<const ROUNDING: i32>(
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vaddsh, ROUNDING = 8))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_maskz_add_round_sh<const ROUNDING: i32>(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
     static_assert_rounding!(ROUNDING);
     _mm_mask_add_round_sh::<ROUNDING>(f16x8::ZERO.as_m128h(), k, a, b)
@@ -1613,8 +1611,9 @@ pub fn _mm_maskz_add_round_sh<const ROUNDING: i32>(k: __mmask8, a: __m128h, b: _
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vaddsh))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm_add_sh(a: __m128h, b: __m128h) -> __m128h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_add_sh(a: __m128h, b: __m128h) -> __m128h {
     unsafe { simd_insert!(a, 0, _mm_cvtsh_h(a) + _mm_cvtsh_h(b)) }
 }
 
@@ -1626,8 +1625,9 @@ pub fn _mm_add_sh(a: __m128h, b: __m128h) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vaddsh))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm_mask_add_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_add_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
     unsafe {
         let extractsrc: f16 = simd_extract!(src, 0);
         let mut add: f16 = extractsrc;
@@ -1648,8 +1648,9 @@ pub fn _mm_mask_add_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vaddsh))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm_maskz_add_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_add_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
     unsafe {
         let mut add: f16 = 0.;
         if (k & 0b00000001) != 0 {
@@ -1667,8 +1668,9 @@ pub fn _mm_maskz_add_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vsubph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm_sub_ph(a: __m128h, b: __m128h) -> __m128h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_sub_ph(a: __m128h, b: __m128h) -> __m128h {
     unsafe { simd_sub(a, b) }
 }
 
@@ -1679,8 +1681,9 @@ pub fn _mm_sub_ph(a: __m128h, b: __m128h) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vsubph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm_mask_sub_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_sub_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
     unsafe {
         let r = _mm_sub_ph(a, b);
         simd_select_bitmask(k, r, src)
@@ -1694,8 +1697,9 @@ pub fn _mm_mask_sub_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vsubph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm_maskz_sub_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_sub_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
     unsafe {
         let r = _mm_sub_ph(a, b);
         simd_select_bitmask(k, r, _mm_setzero_ph())
@@ -1708,8 +1712,9 @@ pub fn _mm_maskz_sub_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vsubph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm256_sub_ph(a: __m256h, b: __m256h) -> __m256h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_sub_ph(a: __m256h, b: __m256h) -> __m256h {
     unsafe { simd_sub(a, b) }
 }
 
@@ -1720,8 +1725,9 @@ pub fn _mm256_sub_ph(a: __m256h, b: __m256h) -> __m256h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vsubph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm256_mask_sub_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_sub_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
     unsafe {
         let r = _mm256_sub_ph(a, b);
         simd_select_bitmask(k, r, src)
@@ -1735,8 +1741,9 @@ pub fn _mm256_mask_sub_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) ->
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vsubph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm256_maskz_sub_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_sub_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
     unsafe {
         let r = _mm256_sub_ph(a, b);
         simd_select_bitmask(k, r, _mm256_setzero_ph())
@@ -1749,8 +1756,9 @@ pub fn _mm256_maskz_sub_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vsubph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm512_sub_ph(a: __m512h, b: __m512h) -> __m512h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_sub_ph(a: __m512h, b: __m512h) -> __m512h {
     unsafe { simd_sub(a, b) }
 }
 
@@ -1761,8 +1769,9 @@ pub fn _mm512_sub_ph(a: __m512h, b: __m512h) -> __m512h {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vsubph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm512_mask_sub_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_sub_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
     unsafe {
         let r = _mm512_sub_ph(a, b);
         simd_select_bitmask(k, r, src)
@@ -1776,8 +1785,9 @@ pub fn _mm512_mask_sub_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) ->
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vsubph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm512_maskz_sub_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_sub_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
     unsafe {
         let r = _mm512_sub_ph(a, b);
         simd_select_bitmask(k, r, _mm512_setzero_ph())
@@ -1798,7 +1808,7 @@ pub fn _mm512_maskz_sub_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vsubph, ROUNDING = 8))]
 #[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_sub_round_ph<const ROUNDING: i32>(a: __m512h, b: __m512h) -> __m512h {
     unsafe {
         static_assert_rounding!(ROUNDING);
@@ -1821,7 +1831,7 @@ pub fn _mm512_sub_round_ph<const ROUNDING: i32>(a: __m512h, b: __m512h) -> __m51
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vsubph, ROUNDING = 8))]
 #[rustc_legacy_const_generics(4)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mask_sub_round_ph<const ROUNDING: i32>(
     src: __m512h,
     k: __mmask32,
@@ -1850,7 +1860,7 @@ pub fn _mm512_mask_sub_round_ph<const ROUNDING: i32>(
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vsubph, ROUNDING = 8))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_maskz_sub_round_ph<const ROUNDING: i32>(
     k: __mmask32,
     a: __m512h,
@@ -1878,7 +1888,7 @@ pub fn _mm512_maskz_sub_round_ph<const ROUNDING: i32>(
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vsubsh, ROUNDING = 8))]
 #[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_sub_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128h {
     static_assert_rounding!(ROUNDING);
     _mm_mask_sub_round_sh::<ROUNDING>(f16x8::ZERO.as_m128h(), 0xff, a, b)
@@ -1900,7 +1910,7 @@ pub fn _mm_sub_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128h
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vsubsh, ROUNDING = 8))]
 #[rustc_legacy_const_generics(4)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask_sub_round_sh<const ROUNDING: i32>(
     src: __m128h,
     k: __mmask8,
@@ -1929,7 +1939,7 @@ pub fn _mm_mask_sub_round_sh<const ROUNDING: i32>(
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vsubsh, ROUNDING = 8))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_maskz_sub_round_sh<const ROUNDING: i32>(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
     static_assert_rounding!(ROUNDING);
     _mm_mask_sub_round_sh::<ROUNDING>(f16x8::ZERO.as_m128h(), k, a, b)
@@ -1942,8 +1952,9 @@ pub fn _mm_maskz_sub_round_sh<const ROUNDING: i32>(k: __mmask8, a: __m128h, b: _
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vsubsh))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm_sub_sh(a: __m128h, b: __m128h) -> __m128h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_sub_sh(a: __m128h, b: __m128h) -> __m128h {
     unsafe { simd_insert!(a, 0, _mm_cvtsh_h(a) - _mm_cvtsh_h(b)) }
 }
 
@@ -1955,8 +1966,9 @@ pub fn _mm_sub_sh(a: __m128h, b: __m128h) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vsubsh))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm_mask_sub_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_sub_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
     unsafe {
         let extractsrc: f16 = simd_extract!(src, 0);
         let mut add: f16 = extractsrc;
@@ -1977,8 +1989,9 @@ pub fn _mm_mask_sub_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vsubsh))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm_maskz_sub_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_sub_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
     unsafe {
         let mut add: f16 = 0.;
         if (k & 0b00000001) != 0 {
@@ -1996,8 +2009,9 @@ pub fn _mm_maskz_sub_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vmulph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm_mul_ph(a: __m128h, b: __m128h) -> __m128h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mul_ph(a: __m128h, b: __m128h) -> __m128h {
     unsafe { simd_mul(a, b) }
 }
 
@@ -2008,8 +2022,9 @@ pub fn _mm_mul_ph(a: __m128h, b: __m128h) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vmulph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm_mask_mul_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_mul_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
     unsafe {
         let r = _mm_mul_ph(a, b);
         simd_select_bitmask(k, r, src)
@@ -2023,8 +2038,9 @@ pub fn _mm_mask_mul_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vmulph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm_maskz_mul_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_mul_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
     unsafe {
         let r = _mm_mul_ph(a, b);
         simd_select_bitmask(k, r, _mm_setzero_ph())
@@ -2037,8 +2053,9 @@ pub fn _mm_maskz_mul_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vmulph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm256_mul_ph(a: __m256h, b: __m256h) -> __m256h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mul_ph(a: __m256h, b: __m256h) -> __m256h {
     unsafe { simd_mul(a, b) }
 }
 
@@ -2049,8 +2066,9 @@ pub fn _mm256_mul_ph(a: __m256h, b: __m256h) -> __m256h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vmulph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm256_mask_mul_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_mul_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
     unsafe {
         let r = _mm256_mul_ph(a, b);
         simd_select_bitmask(k, r, src)
@@ -2064,8 +2082,9 @@ pub fn _mm256_mask_mul_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) ->
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vmulph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm256_maskz_mul_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_mul_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
     unsafe {
         let r = _mm256_mul_ph(a, b);
         simd_select_bitmask(k, r, _mm256_setzero_ph())
@@ -2078,8 +2097,9 @@ pub fn _mm256_maskz_mul_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vmulph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm512_mul_ph(a: __m512h, b: __m512h) -> __m512h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mul_ph(a: __m512h, b: __m512h) -> __m512h {
     unsafe { simd_mul(a, b) }
 }
 
@@ -2090,8 +2110,9 @@ pub fn _mm512_mul_ph(a: __m512h, b: __m512h) -> __m512h {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vmulph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm512_mask_mul_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_mul_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
     unsafe {
         let r = _mm512_mul_ph(a, b);
         simd_select_bitmask(k, r, src)
@@ -2105,8 +2126,9 @@ pub fn _mm512_mask_mul_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) ->
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vmulph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm512_maskz_mul_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_mul_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
     unsafe {
         let r = _mm512_mul_ph(a, b);
         simd_select_bitmask(k, r, _mm512_setzero_ph())
@@ -2127,7 +2149,7 @@ pub fn _mm512_maskz_mul_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vmulph, ROUNDING = 8))]
 #[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mul_round_ph<const ROUNDING: i32>(a: __m512h, b: __m512h) -> __m512h {
     unsafe {
         static_assert_rounding!(ROUNDING);
@@ -2150,7 +2172,7 @@ pub fn _mm512_mul_round_ph<const ROUNDING: i32>(a: __m512h, b: __m512h) -> __m51
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vmulph, ROUNDING = 8))]
 #[rustc_legacy_const_generics(4)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mask_mul_round_ph<const ROUNDING: i32>(
     src: __m512h,
     k: __mmask32,
@@ -2179,7 +2201,7 @@ pub fn _mm512_mask_mul_round_ph<const ROUNDING: i32>(
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vmulph, ROUNDING = 8))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_maskz_mul_round_ph<const ROUNDING: i32>(
     k: __mmask32,
     a: __m512h,
@@ -2207,7 +2229,7 @@ pub fn _mm512_maskz_mul_round_ph<const ROUNDING: i32>(
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vmulsh, ROUNDING = 8))]
 #[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mul_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128h {
     static_assert_rounding!(ROUNDING);
     _mm_mask_mul_round_sh::<ROUNDING>(f16x8::ZERO.as_m128h(), 0xff, a, b)
@@ -2229,7 +2251,7 @@ pub fn _mm_mul_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128h
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vmulsh, ROUNDING = 8))]
 #[rustc_legacy_const_generics(4)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask_mul_round_sh<const ROUNDING: i32>(
     src: __m128h,
     k: __mmask8,
@@ -2258,7 +2280,7 @@ pub fn _mm_mask_mul_round_sh<const ROUNDING: i32>(
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vmulsh, ROUNDING = 8))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_maskz_mul_round_sh<const ROUNDING: i32>(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
     static_assert_rounding!(ROUNDING);
     _mm_mask_mul_round_sh::<ROUNDING>(f16x8::ZERO.as_m128h(), k, a, b)
@@ -2271,8 +2293,9 @@ pub fn _mm_maskz_mul_round_sh<const ROUNDING: i32>(k: __mmask8, a: __m128h, b: _
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vmulsh))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm_mul_sh(a: __m128h, b: __m128h) -> __m128h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mul_sh(a: __m128h, b: __m128h) -> __m128h {
     unsafe { simd_insert!(a, 0, _mm_cvtsh_h(a) * _mm_cvtsh_h(b)) }
 }
 
@@ -2284,8 +2307,9 @@ pub fn _mm_mul_sh(a: __m128h, b: __m128h) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vmulsh))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm_mask_mul_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_mul_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
     unsafe {
         let extractsrc: f16 = simd_extract!(src, 0);
         let mut add: f16 = extractsrc;
@@ -2306,8 +2330,9 @@ pub fn _mm_mask_mul_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vmulsh))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm_maskz_mul_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_mul_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
     unsafe {
         let mut add: f16 = 0.;
         if (k & 0b00000001) != 0 {
@@ -2325,8 +2350,9 @@ pub fn _mm_maskz_mul_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vdivph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm_div_ph(a: __m128h, b: __m128h) -> __m128h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_div_ph(a: __m128h, b: __m128h) -> __m128h {
     unsafe { simd_div(a, b) }
 }
 
@@ -2337,8 +2363,9 @@ pub fn _mm_div_ph(a: __m128h, b: __m128h) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vdivph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm_mask_div_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_div_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
     unsafe {
         let r = _mm_div_ph(a, b);
         simd_select_bitmask(k, r, src)
@@ -2352,8 +2379,9 @@ pub fn _mm_mask_div_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vdivph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm_maskz_div_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_div_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
     unsafe {
         let r = _mm_div_ph(a, b);
         simd_select_bitmask(k, r, _mm_setzero_ph())
@@ -2366,8 +2394,9 @@ pub fn _mm_maskz_div_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vdivph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm256_div_ph(a: __m256h, b: __m256h) -> __m256h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_div_ph(a: __m256h, b: __m256h) -> __m256h {
     unsafe { simd_div(a, b) }
 }
 
@@ -2378,8 +2407,9 @@ pub fn _mm256_div_ph(a: __m256h, b: __m256h) -> __m256h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vdivph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm256_mask_div_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_div_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
     unsafe {
         let r = _mm256_div_ph(a, b);
         simd_select_bitmask(k, r, src)
@@ -2393,8 +2423,9 @@ pub fn _mm256_mask_div_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) ->
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vdivph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm256_maskz_div_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_div_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
     unsafe {
         let r = _mm256_div_ph(a, b);
         simd_select_bitmask(k, r, _mm256_setzero_ph())
@@ -2407,8 +2438,9 @@ pub fn _mm256_maskz_div_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vdivph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm512_div_ph(a: __m512h, b: __m512h) -> __m512h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_div_ph(a: __m512h, b: __m512h) -> __m512h {
     unsafe { simd_div(a, b) }
 }
 
@@ -2419,8 +2451,9 @@ pub fn _mm512_div_ph(a: __m512h, b: __m512h) -> __m512h {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vdivph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm512_mask_div_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_div_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
     unsafe {
         let r = _mm512_div_ph(a, b);
         simd_select_bitmask(k, r, src)
@@ -2434,8 +2467,9 @@ pub fn _mm512_mask_div_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) ->
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vdivph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm512_maskz_div_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_div_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
     unsafe {
         let r = _mm512_div_ph(a, b);
         simd_select_bitmask(k, r, _mm512_setzero_ph())
@@ -2456,7 +2490,7 @@ pub fn _mm512_maskz_div_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vdivph, ROUNDING = 8))]
 #[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_div_round_ph<const ROUNDING: i32>(a: __m512h, b: __m512h) -> __m512h {
     unsafe {
         static_assert_rounding!(ROUNDING);
@@ -2479,7 +2513,7 @@ pub fn _mm512_div_round_ph<const ROUNDING: i32>(a: __m512h, b: __m512h) -> __m51
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vdivph, ROUNDING = 8))]
 #[rustc_legacy_const_generics(4)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mask_div_round_ph<const ROUNDING: i32>(
     src: __m512h,
     k: __mmask32,
@@ -2508,7 +2542,7 @@ pub fn _mm512_mask_div_round_ph<const ROUNDING: i32>(
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vdivph, ROUNDING = 8))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_maskz_div_round_ph<const ROUNDING: i32>(
     k: __mmask32,
     a: __m512h,
@@ -2536,7 +2570,7 @@ pub fn _mm512_maskz_div_round_ph<const ROUNDING: i32>(
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vdivsh, ROUNDING = 8))]
 #[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_div_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128h {
     static_assert_rounding!(ROUNDING);
     _mm_mask_div_round_sh::<ROUNDING>(f16x8::ZERO.as_m128h(), 0xff, a, b)
@@ -2558,7 +2592,7 @@ pub fn _mm_div_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128h
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vdivsh, ROUNDING = 8))]
 #[rustc_legacy_const_generics(4)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask_div_round_sh<const ROUNDING: i32>(
     src: __m128h,
     k: __mmask8,
@@ -2587,7 +2621,7 @@ pub fn _mm_mask_div_round_sh<const ROUNDING: i32>(
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vdivsh, ROUNDING = 8))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_maskz_div_round_sh<const ROUNDING: i32>(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
     static_assert_rounding!(ROUNDING);
     _mm_mask_div_round_sh::<ROUNDING>(f16x8::ZERO.as_m128h(), k, a, b)
@@ -2600,8 +2634,9 @@ pub fn _mm_maskz_div_round_sh<const ROUNDING: i32>(k: __mmask8, a: __m128h, b: _
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vdivsh))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm_div_sh(a: __m128h, b: __m128h) -> __m128h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_div_sh(a: __m128h, b: __m128h) -> __m128h {
     unsafe { simd_insert!(a, 0, _mm_cvtsh_h(a) / _mm_cvtsh_h(b)) }
 }
 
@@ -2613,8 +2648,9 @@ pub fn _mm_div_sh(a: __m128h, b: __m128h) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vdivsh))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm_mask_div_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_div_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
     unsafe {
         let extractsrc: f16 = simd_extract!(src, 0);
         let mut add: f16 = extractsrc;
@@ -2635,8 +2671,9 @@ pub fn _mm_mask_div_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vdivsh))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm_maskz_div_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_div_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
     unsafe {
         let mut add: f16 = 0.;
         if (k & 0b00000001) != 0 {
@@ -2656,7 +2693,7 @@ pub fn _mm_maskz_div_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vfmulcph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mul_pch(a: __m128h, b: __m128h) -> __m128h {
     _mm_mask_mul_pch(_mm_undefined_ph(), 0xff, a, b)
 }
@@ -2669,7 +2706,7 @@ pub fn _mm_mul_pch(a: __m128h, b: __m128h) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vfmulcph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask_mul_pch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
     unsafe { transmute(vfmulcph_128(transmute(a), transmute(b), transmute(src), k)) }
 }
@@ -2682,7 +2719,7 @@ pub fn _mm_mask_mul_pch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vfmulcph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_maskz_mul_pch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
     _mm_mask_mul_pch(_mm_setzero_ph(), k, a, b)
 }
@@ -2695,7 +2732,7 @@ pub fn _mm_maskz_mul_pch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vfmulcph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_mul_pch(a: __m256h, b: __m256h) -> __m256h {
     _mm256_mask_mul_pch(_mm256_undefined_ph(), 0xff, a, b)
 }
@@ -2708,7 +2745,7 @@ pub fn _mm256_mul_pch(a: __m256h, b: __m256h) -> __m256h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vfmulcph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_mask_mul_pch(src: __m256h, k: __mmask8, a: __m256h, b: __m256h) -> __m256h {
     unsafe { transmute(vfmulcph_256(transmute(a), transmute(b), transmute(src), k)) }
 }
@@ -2721,7 +2758,7 @@ pub fn _mm256_mask_mul_pch(src: __m256h, k: __mmask8, a: __m256h, b: __m256h) ->
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vfmulcph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_maskz_mul_pch(k: __mmask8, a: __m256h, b: __m256h) -> __m256h {
     _mm256_mask_mul_pch(_mm256_setzero_ph(), k, a, b)
 }
@@ -2734,7 +2771,7 @@ pub fn _mm256_maskz_mul_pch(k: __mmask8, a: __m256h, b: __m256h) -> __m256h {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfmulcph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mul_pch(a: __m512h, b: __m512h) -> __m512h {
     _mm512_mask_mul_pch(_mm512_undefined_ph(), 0xffff, a, b)
 }
@@ -2747,7 +2784,7 @@ pub fn _mm512_mul_pch(a: __m512h, b: __m512h) -> __m512h {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfmulcph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mask_mul_pch(src: __m512h, k: __mmask16, a: __m512h, b: __m512h) -> __m512h {
     _mm512_mask_mul_round_pch::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
 }
@@ -2760,7 +2797,7 @@ pub fn _mm512_mask_mul_pch(src: __m512h, k: __mmask16, a: __m512h, b: __m512h) -
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfmulcph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_maskz_mul_pch(k: __mmask16, a: __m512h, b: __m512h) -> __m512h {
     _mm512_mask_mul_pch(_mm512_setzero_ph(), k, a, b)
 }
@@ -2782,7 +2819,7 @@ pub fn _mm512_maskz_mul_pch(k: __mmask16, a: __m512h, b: __m512h) -> __m512h {
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfmulcph, ROUNDING = 8))]
 #[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mul_round_pch<const ROUNDING: i32>(a: __m512h, b: __m512h) -> __m512h {
     static_assert_rounding!(ROUNDING);
     _mm512_mask_mul_round_pch::<ROUNDING>(_mm512_undefined_ph(), 0xffff, a, b)
@@ -2805,7 +2842,7 @@ pub fn _mm512_mul_round_pch<const ROUNDING: i32>(a: __m512h, b: __m512h) -> __m5
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfmulcph, ROUNDING = 8))]
 #[rustc_legacy_const_generics(4)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mask_mul_round_pch<const ROUNDING: i32>(
     src: __m512h,
     k: __mmask16,
@@ -2841,7 +2878,7 @@ pub fn _mm512_mask_mul_round_pch<const ROUNDING: i32>(
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfmulcph, ROUNDING = 8))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_maskz_mul_round_pch<const ROUNDING: i32>(
     k: __mmask16,
     a: __m512h,
@@ -2860,7 +2897,7 @@ pub fn _mm512_maskz_mul_round_pch<const ROUNDING: i32>(
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfmulcsh))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mul_sch(a: __m128h, b: __m128h) -> __m128h {
     _mm_mask_mul_sch(f16x8::ZERO.as_m128h(), 0xff, a, b)
 }
@@ -2874,7 +2911,7 @@ pub fn _mm_mul_sch(a: __m128h, b: __m128h) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfmulcsh))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask_mul_sch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
     _mm_mask_mul_round_sch::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
 }
@@ -2888,7 +2925,7 @@ pub fn _mm_mask_mul_sch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfmulcsh))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_maskz_mul_sch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
     _mm_mask_mul_sch(f16x8::ZERO.as_m128h(), k, a, b)
 }
@@ -2911,7 +2948,7 @@ pub fn _mm_maskz_mul_sch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfmulcsh, ROUNDING = 8))]
 #[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mul_round_sch<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128h {
     static_assert_rounding!(ROUNDING);
     _mm_mask_mul_round_sch::<ROUNDING>(f16x8::ZERO.as_m128h(), 0xff, a, b)
@@ -2935,7 +2972,7 @@ pub fn _mm_mul_round_sch<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128h
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfmulcsh, ROUNDING = 8))]
 #[rustc_legacy_const_generics(4)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask_mul_round_sch<const ROUNDING: i32>(
     src: __m128h,
     k: __mmask8,
@@ -2972,7 +3009,7 @@ pub fn _mm_mask_mul_round_sch<const ROUNDING: i32>(
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfmulcsh, ROUNDING = 8))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_maskz_mul_round_sch<const ROUNDING: i32>(
     k: __mmask8,
     a: __m128h,
@@ -2990,7 +3027,7 @@ pub fn _mm_maskz_mul_round_sch<const ROUNDING: i32>(
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vfmulcph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_fmul_pch(a: __m128h, b: __m128h) -> __m128h {
     _mm_mul_pch(a, b)
 }
@@ -3003,7 +3040,7 @@ pub fn _mm_fmul_pch(a: __m128h, b: __m128h) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vfmulcph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask_fmul_pch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
     _mm_mask_mul_pch(src, k, a, b)
 }
@@ -3016,7 +3053,7 @@ pub fn _mm_mask_fmul_pch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> _
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vfmulcph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_maskz_fmul_pch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
     _mm_maskz_mul_pch(k, a, b)
 }
@@ -3029,7 +3066,7 @@ pub fn _mm_maskz_fmul_pch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vfmulcph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_fmul_pch(a: __m256h, b: __m256h) -> __m256h {
     _mm256_mul_pch(a, b)
 }
@@ -3042,7 +3079,7 @@ pub fn _mm256_fmul_pch(a: __m256h, b: __m256h) -> __m256h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vfmulcph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_mask_fmul_pch(src: __m256h, k: __mmask8, a: __m256h, b: __m256h) -> __m256h {
     _mm256_mask_mul_pch(src, k, a, b)
 }
@@ -3055,7 +3092,7 @@ pub fn _mm256_mask_fmul_pch(src: __m256h, k: __mmask8, a: __m256h, b: __m256h) -
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vfmulcph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_maskz_fmul_pch(k: __mmask8, a: __m256h, b: __m256h) -> __m256h {
     _mm256_maskz_mul_pch(k, a, b)
 }
@@ -3067,7 +3104,7 @@ pub fn _mm256_maskz_fmul_pch(k: __mmask8, a: __m256h, b: __m256h) -> __m256h {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfmulcph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_fmul_pch(a: __m512h, b: __m512h) -> __m512h {
     _mm512_mul_pch(a, b)
 }
@@ -3080,7 +3117,7 @@ pub fn _mm512_fmul_pch(a: __m512h, b: __m512h) -> __m512h {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfmulcph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mask_fmul_pch(src: __m512h, k: __mmask16, a: __m512h, b: __m512h) -> __m512h {
     _mm512_mask_mul_pch(src, k, a, b)
 }
@@ -3093,7 +3130,7 @@ pub fn _mm512_mask_fmul_pch(src: __m512h, k: __mmask16, a: __m512h, b: __m512h)
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfmulcph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_maskz_fmul_pch(k: __mmask16, a: __m512h, b: __m512h) -> __m512h {
     _mm512_maskz_mul_pch(k, a, b)
 }
@@ -3113,7 +3150,7 @@ pub fn _mm512_maskz_fmul_pch(k: __mmask16, a: __m512h, b: __m512h) -> __m512h {
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfmulcph, ROUNDING = 8))]
 #[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_fmul_round_pch<const ROUNDING: i32>(a: __m512h, b: __m512h) -> __m512h {
     static_assert_rounding!(ROUNDING);
     _mm512_mul_round_pch::<ROUNDING>(a, b)
@@ -3135,7 +3172,7 @@ pub fn _mm512_fmul_round_pch<const ROUNDING: i32>(a: __m512h, b: __m512h) -> __m
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfmulcph, ROUNDING = 8))]
 #[rustc_legacy_const_generics(4)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mask_fmul_round_pch<const ROUNDING: i32>(
     src: __m512h,
     k: __mmask16,
@@ -3162,7 +3199,7 @@ pub fn _mm512_mask_fmul_round_pch<const ROUNDING: i32>(
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfmulcph, ROUNDING = 8))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_maskz_fmul_round_pch<const ROUNDING: i32>(
     k: __mmask16,
     a: __m512h,
@@ -3180,7 +3217,7 @@ pub fn _mm512_maskz_fmul_round_pch<const ROUNDING: i32>(
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfmulcsh))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_fmul_sch(a: __m128h, b: __m128h) -> __m128h {
     _mm_mul_sch(a, b)
 }
@@ -3193,7 +3230,7 @@ pub fn _mm_fmul_sch(a: __m128h, b: __m128h) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfmulcsh))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask_fmul_sch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
     _mm_mask_mul_sch(src, k, a, b)
 }
@@ -3206,7 +3243,7 @@ pub fn _mm_mask_fmul_sch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> _
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfmulcsh))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_maskz_fmul_sch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
     _mm_maskz_mul_sch(k, a, b)
 }
@@ -3227,7 +3264,7 @@ pub fn _mm_maskz_fmul_sch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfmulcsh, ROUNDING = 8))]
 #[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_fmul_round_sch<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128h {
     static_assert_rounding!(ROUNDING);
     _mm_mul_round_sch::<ROUNDING>(a, b)
@@ -3250,7 +3287,7 @@ pub fn _mm_fmul_round_sch<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfmulcsh, ROUNDING = 8))]
 #[rustc_legacy_const_generics(4)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask_fmul_round_sch<const ROUNDING: i32>(
     src: __m128h,
     k: __mmask8,
@@ -3278,7 +3315,7 @@ pub fn _mm_mask_fmul_round_sch<const ROUNDING: i32>(
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfmulcsh, ROUNDING = 8))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_maskz_fmul_round_sch<const ROUNDING: i32>(
     k: __mmask8,
     a: __m128h,
@@ -3297,7 +3334,7 @@ pub fn _mm_maskz_fmul_round_sch<const ROUNDING: i32>(
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vfcmulcph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_cmul_pch(a: __m128h, b: __m128h) -> __m128h {
     _mm_mask_cmul_pch(_mm_undefined_ph(), 0xff, a, b)
 }
@@ -3311,7 +3348,7 @@ pub fn _mm_cmul_pch(a: __m128h, b: __m128h) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vfcmulcph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask_cmul_pch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
     unsafe { transmute(vfcmulcph_128(transmute(a), transmute(b), transmute(src), k)) }
 }
@@ -3325,7 +3362,7 @@ pub fn _mm_mask_cmul_pch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> _
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vfcmulcph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_maskz_cmul_pch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
     _mm_mask_cmul_pch(_mm_setzero_ph(), k, a, b)
 }
@@ -3339,7 +3376,7 @@ pub fn _mm_maskz_cmul_pch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vfcmulcph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_cmul_pch(a: __m256h, b: __m256h) -> __m256h {
     _mm256_mask_cmul_pch(_mm256_undefined_ph(), 0xff, a, b)
 }
@@ -3353,7 +3390,7 @@ pub fn _mm256_cmul_pch(a: __m256h, b: __m256h) -> __m256h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vfcmulcph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_mask_cmul_pch(src: __m256h, k: __mmask8, a: __m256h, b: __m256h) -> __m256h {
     unsafe { transmute(vfcmulcph_256(transmute(a), transmute(b), transmute(src), k)) }
 }
@@ -3367,7 +3404,7 @@ pub fn _mm256_mask_cmul_pch(src: __m256h, k: __mmask8, a: __m256h, b: __m256h) -
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vfcmulcph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_maskz_cmul_pch(k: __mmask8, a: __m256h, b: __m256h) -> __m256h {
     _mm256_mask_cmul_pch(_mm256_setzero_ph(), k, a, b)
 }
@@ -3381,7 +3418,7 @@ pub fn _mm256_maskz_cmul_pch(k: __mmask8, a: __m256h, b: __m256h) -> __m256h {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfcmulcph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_cmul_pch(a: __m512h, b: __m512h) -> __m512h {
     _mm512_mask_cmul_pch(_mm512_undefined_ph(), 0xffff, a, b)
 }
@@ -3395,7 +3432,7 @@ pub fn _mm512_cmul_pch(a: __m512h, b: __m512h) -> __m512h {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfcmulcph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mask_cmul_pch(src: __m512h, k: __mmask16, a: __m512h, b: __m512h) -> __m512h {
     _mm512_mask_cmul_round_pch::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
 }
@@ -3409,7 +3446,7 @@ pub fn _mm512_mask_cmul_pch(src: __m512h, k: __mmask16, a: __m512h, b: __m512h)
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfcmulcph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_maskz_cmul_pch(k: __mmask16, a: __m512h, b: __m512h) -> __m512h {
     _mm512_mask_cmul_pch(_mm512_setzero_ph(), k, a, b)
 }
@@ -3432,7 +3469,7 @@ pub fn _mm512_maskz_cmul_pch(k: __mmask16, a: __m512h, b: __m512h) -> __m512h {
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfcmulcph, ROUNDING = 8))]
 #[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_cmul_round_pch<const ROUNDING: i32>(a: __m512h, b: __m512h) -> __m512h {
     static_assert_rounding!(ROUNDING);
     _mm512_mask_cmul_round_pch::<ROUNDING>(_mm512_undefined_ph(), 0xffff, a, b)
@@ -3456,7 +3493,7 @@ pub fn _mm512_cmul_round_pch<const ROUNDING: i32>(a: __m512h, b: __m512h) -> __m
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfcmulcph, ROUNDING = 8))]
 #[rustc_legacy_const_generics(4)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mask_cmul_round_pch<const ROUNDING: i32>(
     src: __m512h,
     k: __mmask16,
@@ -3493,7 +3530,7 @@ pub fn _mm512_mask_cmul_round_pch<const ROUNDING: i32>(
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfcmulcph, ROUNDING = 8))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_maskz_cmul_round_pch<const ROUNDING: i32>(
     k: __mmask16,
     a: __m512h,
@@ -3511,7 +3548,7 @@ pub fn _mm512_maskz_cmul_round_pch<const ROUNDING: i32>(
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfcmulcsh))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_cmul_sch(a: __m128h, b: __m128h) -> __m128h {
     _mm_mask_cmul_sch(f16x8::ZERO.as_m128h(), 0xff, a, b)
 }
@@ -3525,7 +3562,7 @@ pub fn _mm_cmul_sch(a: __m128h, b: __m128h) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfcmulcsh))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask_cmul_sch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
     _mm_mask_cmul_round_sch::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
 }
@@ -3539,7 +3576,7 @@ pub fn _mm_mask_cmul_sch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> _
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfcmulcsh))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_maskz_cmul_sch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
     _mm_mask_cmul_sch(f16x8::ZERO.as_m128h(), k, a, b)
 }
@@ -3561,7 +3598,7 @@ pub fn _mm_maskz_cmul_sch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfcmulcsh, ROUNDING = 8))]
 #[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_cmul_round_sch<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128h {
     static_assert_rounding!(ROUNDING);
     _mm_mask_cmul_round_sch::<ROUNDING>(f16x8::ZERO.as_m128h(), 0xff, a, b)
@@ -3585,7 +3622,7 @@ pub fn _mm_cmul_round_sch<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfcmulcsh, ROUNDING = 8))]
 #[rustc_legacy_const_generics(4)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask_cmul_round_sch<const ROUNDING: i32>(
     src: __m128h,
     k: __mmask8,
@@ -3622,7 +3659,7 @@ pub fn _mm_mask_cmul_round_sch<const ROUNDING: i32>(
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfcmulcsh, ROUNDING = 8))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_maskz_cmul_round_sch<const ROUNDING: i32>(
     k: __mmask8,
     a: __m128h,
@@ -3641,7 +3678,7 @@ pub fn _mm_maskz_cmul_round_sch<const ROUNDING: i32>(
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vfcmulcph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_fcmul_pch(a: __m128h, b: __m128h) -> __m128h {
     _mm_cmul_pch(a, b)
 }
@@ -3655,7 +3692,7 @@ pub fn _mm_fcmul_pch(a: __m128h, b: __m128h) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vfcmulcph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask_fcmul_pch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
     _mm_mask_cmul_pch(src, k, a, b)
 }
@@ -3669,7 +3706,7 @@ pub fn _mm_mask_fcmul_pch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) ->
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vfcmulcph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_maskz_fcmul_pch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
     _mm_maskz_cmul_pch(k, a, b)
 }
@@ -3683,7 +3720,7 @@ pub fn _mm_maskz_fcmul_pch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vfcmulcph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_fcmul_pch(a: __m256h, b: __m256h) -> __m256h {
     _mm256_cmul_pch(a, b)
 }
@@ -3697,7 +3734,7 @@ pub fn _mm256_fcmul_pch(a: __m256h, b: __m256h) -> __m256h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vfcmulcph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_mask_fcmul_pch(src: __m256h, k: __mmask8, a: __m256h, b: __m256h) -> __m256h {
     _mm256_mask_cmul_pch(src, k, a, b)
 }
@@ -3711,7 +3748,7 @@ pub fn _mm256_mask_fcmul_pch(src: __m256h, k: __mmask8, a: __m256h, b: __m256h)
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vfcmulcph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_maskz_fcmul_pch(k: __mmask8, a: __m256h, b: __m256h) -> __m256h {
     _mm256_maskz_cmul_pch(k, a, b)
 }
@@ -3725,7 +3762,7 @@ pub fn _mm256_maskz_fcmul_pch(k: __mmask8, a: __m256h, b: __m256h) -> __m256h {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfcmulcph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_fcmul_pch(a: __m512h, b: __m512h) -> __m512h {
     _mm512_cmul_pch(a, b)
 }
@@ -3739,7 +3776,7 @@ pub fn _mm512_fcmul_pch(a: __m512h, b: __m512h) -> __m512h {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfcmulcph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mask_fcmul_pch(src: __m512h, k: __mmask16, a: __m512h, b: __m512h) -> __m512h {
     _mm512_mask_cmul_pch(src, k, a, b)
 }
@@ -3753,7 +3790,7 @@ pub fn _mm512_mask_fcmul_pch(src: __m512h, k: __mmask16, a: __m512h, b: __m512h)
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfcmulcph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_maskz_fcmul_pch(k: __mmask16, a: __m512h, b: __m512h) -> __m512h {
     _mm512_maskz_cmul_pch(k, a, b)
 }
@@ -3775,7 +3812,7 @@ pub fn _mm512_maskz_fcmul_pch(k: __mmask16, a: __m512h, b: __m512h) -> __m512h {
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfcmulcph, ROUNDING = 8))]
 #[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_fcmul_round_pch<const ROUNDING: i32>(a: __m512h, b: __m512h) -> __m512h {
     static_assert_rounding!(ROUNDING);
     _mm512_cmul_round_pch::<ROUNDING>(a, b)
@@ -3799,7 +3836,7 @@ pub fn _mm512_fcmul_round_pch<const ROUNDING: i32>(a: __m512h, b: __m512h) -> __
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfcmulcph, ROUNDING = 8))]
 #[rustc_legacy_const_generics(4)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mask_fcmul_round_pch<const ROUNDING: i32>(
     src: __m512h,
     k: __mmask16,
@@ -3828,7 +3865,7 @@ pub fn _mm512_mask_fcmul_round_pch<const ROUNDING: i32>(
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfcmulcph, ROUNDING = 8))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_maskz_fcmul_round_pch<const ROUNDING: i32>(
     k: __mmask16,
     a: __m512h,
@@ -3847,7 +3884,7 @@ pub fn _mm512_maskz_fcmul_round_pch<const ROUNDING: i32>(
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfcmulcsh))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_fcmul_sch(a: __m128h, b: __m128h) -> __m128h {
     _mm_cmul_sch(a, b)
 }
@@ -3861,7 +3898,7 @@ pub fn _mm_fcmul_sch(a: __m128h, b: __m128h) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfcmulcsh))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask_fcmul_sch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
     _mm_mask_cmul_sch(src, k, a, b)
 }
@@ -3875,7 +3912,7 @@ pub fn _mm_mask_fcmul_sch(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) ->
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfcmulcsh))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_maskz_fcmul_sch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
     _mm_maskz_cmul_sch(k, a, b)
 }
@@ -3897,7 +3934,7 @@ pub fn _mm_maskz_fcmul_sch(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfcmulcsh, ROUNDING = 8))]
 #[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_fcmul_round_sch<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128h {
     static_assert_rounding!(ROUNDING);
     _mm_cmul_round_sch::<ROUNDING>(a, b)
@@ -3921,7 +3958,7 @@ pub fn _mm_fcmul_round_sch<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m12
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfcmulcsh, ROUNDING = 8))]
 #[rustc_legacy_const_generics(4)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask_fcmul_round_sch<const ROUNDING: i32>(
     src: __m128h,
     k: __mmask8,
@@ -3950,7 +3987,7 @@ pub fn _mm_mask_fcmul_round_sch<const ROUNDING: i32>(
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfcmulcsh, ROUNDING = 8))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_maskz_fcmul_round_sch<const ROUNDING: i32>(
     k: __mmask8,
     a: __m128h,
@@ -3966,8 +4003,9 @@ pub fn _mm_maskz_fcmul_round_sch<const ROUNDING: i32>(
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_abs_ph)
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm_abs_ph(v2: __m128h) -> __m128h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_abs_ph(v2: __m128h) -> __m128h {
     unsafe { transmute(_mm_and_si128(transmute(v2), _mm_set1_epi16(i16::MAX))) }
 }
 
@@ -3977,8 +4015,9 @@ pub fn _mm_abs_ph(v2: __m128h) -> __m128h {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_abs_ph)
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm256_abs_ph(v2: __m256h) -> __m256h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_abs_ph(v2: __m256h) -> __m256h {
     unsafe { transmute(_mm256_and_si256(transmute(v2), _mm256_set1_epi16(i16::MAX))) }
 }
 
@@ -3988,8 +4027,9 @@ pub fn _mm256_abs_ph(v2: __m256h) -> __m256h {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_abs_ph)
 #[inline]
 #[target_feature(enable = "avx512fp16")]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm512_abs_ph(v2: __m512h) -> __m512h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_abs_ph(v2: __m512h) -> __m512h {
     unsafe { transmute(_mm512_and_si512(transmute(v2), _mm512_set1_epi16(i16::MAX))) }
 }
 
@@ -4001,8 +4041,9 @@ pub fn _mm512_abs_ph(v2: __m512h) -> __m512h {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_conj_pch)
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm_conj_pch(a: __m128h) -> __m128h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_conj_pch(a: __m128h) -> __m128h {
     unsafe { transmute(_mm_xor_si128(transmute(a), _mm_set1_epi32(i32::MIN))) }
 }
 
@@ -4014,8 +4055,9 @@ pub fn _mm_conj_pch(a: __m128h) -> __m128h {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_conj_pch)
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm_mask_conj_pch(src: __m128h, k: __mmask8, a: __m128h) -> __m128h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_conj_pch(src: __m128h, k: __mmask8, a: __m128h) -> __m128h {
     unsafe {
         let r: __m128 = transmute(_mm_conj_pch(a));
         transmute(simd_select_bitmask(k, r, transmute(src)))
@@ -4030,8 +4072,9 @@ pub fn _mm_mask_conj_pch(src: __m128h, k: __mmask8, a: __m128h) -> __m128h {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_conj_pch)
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm_maskz_conj_pch(k: __mmask8, a: __m128h) -> __m128h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_conj_pch(k: __mmask8, a: __m128h) -> __m128h {
     _mm_mask_conj_pch(_mm_setzero_ph(), k, a)
 }
 
@@ -4042,8 +4085,9 @@ pub fn _mm_maskz_conj_pch(k: __mmask8, a: __m128h) -> __m128h {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_conj_pch)
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm256_conj_pch(a: __m256h) -> __m256h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_conj_pch(a: __m256h) -> __m256h {
     unsafe { transmute(_mm256_xor_si256(transmute(a), _mm256_set1_epi32(i32::MIN))) }
 }
 
@@ -4055,8 +4099,9 @@ pub fn _mm256_conj_pch(a: __m256h) -> __m256h {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_conj_pch)
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm256_mask_conj_pch(src: __m256h, k: __mmask8, a: __m256h) -> __m256h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_conj_pch(src: __m256h, k: __mmask8, a: __m256h) -> __m256h {
     unsafe {
         let r: __m256 = transmute(_mm256_conj_pch(a));
         transmute(simd_select_bitmask(k, r, transmute(src)))
@@ -4071,8 +4116,9 @@ pub fn _mm256_mask_conj_pch(src: __m256h, k: __mmask8, a: __m256h) -> __m256h {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_conj_pch)
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm256_maskz_conj_pch(k: __mmask8, a: __m256h) -> __m256h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_conj_pch(k: __mmask8, a: __m256h) -> __m256h {
     _mm256_mask_conj_pch(_mm256_setzero_ph(), k, a)
 }
 
@@ -4083,8 +4129,9 @@ pub fn _mm256_maskz_conj_pch(k: __mmask8, a: __m256h) -> __m256h {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_conj_pch)
 #[inline]
 #[target_feature(enable = "avx512fp16")]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm512_conj_pch(a: __m512h) -> __m512h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_conj_pch(a: __m512h) -> __m512h {
     unsafe { transmute(_mm512_xor_si512(transmute(a), _mm512_set1_epi32(i32::MIN))) }
 }
 
@@ -4096,8 +4143,9 @@ pub fn _mm512_conj_pch(a: __m512h) -> __m512h {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_conj_pch)
 #[inline]
 #[target_feature(enable = "avx512fp16")]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm512_mask_conj_pch(src: __m512h, k: __mmask16, a: __m512h) -> __m512h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_conj_pch(src: __m512h, k: __mmask16, a: __m512h) -> __m512h {
     unsafe {
         let r: __m512 = transmute(_mm512_conj_pch(a));
         transmute(simd_select_bitmask(k, r, transmute(src)))
@@ -4112,8 +4160,9 @@ pub fn _mm512_mask_conj_pch(src: __m512h, k: __mmask16, a: __m512h) -> __m512h {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_conj_pch)
 #[inline]
 #[target_feature(enable = "avx512fp16")]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm512_maskz_conj_pch(k: __mmask16, a: __m512h) -> __m512h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_conj_pch(k: __mmask16, a: __m512h) -> __m512h {
     _mm512_mask_conj_pch(_mm512_setzero_ph(), k, a)
 }
 
@@ -4125,7 +4174,7 @@ pub fn _mm512_maskz_conj_pch(k: __mmask16, a: __m512h) -> __m512h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vfmaddcph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_fmadd_pch(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
     _mm_mask3_fmadd_pch(a, b, c, 0xff)
 }
@@ -4139,7 +4188,7 @@ pub fn _mm_fmadd_pch(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vfmaddcph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask_fmadd_pch(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
     unsafe {
         let r: __m128 = transmute(_mm_mask3_fmadd_pch(a, b, c, k)); // using `0xff` would have been fine here, but this is what CLang does
@@ -4156,7 +4205,7 @@ pub fn _mm_mask_fmadd_pch(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vfmaddcph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask3_fmadd_pch(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
     unsafe {
         transmute(vfmaddcph_mask3_128(
@@ -4177,7 +4226,7 @@ pub fn _mm_mask3_fmadd_pch(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> _
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vfmaddcph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_maskz_fmadd_pch(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
     unsafe {
         transmute(vfmaddcph_maskz_128(
@@ -4197,7 +4246,7 @@ pub fn _mm_maskz_fmadd_pch(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> _
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vfmaddcph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_fmadd_pch(a: __m256h, b: __m256h, c: __m256h) -> __m256h {
     _mm256_mask3_fmadd_pch(a, b, c, 0xff)
 }
@@ -4211,7 +4260,7 @@ pub fn _mm256_fmadd_pch(a: __m256h, b: __m256h, c: __m256h) -> __m256h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vfmaddcph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_mask_fmadd_pch(a: __m256h, k: __mmask8, b: __m256h, c: __m256h) -> __m256h {
     unsafe {
         let r: __m256 = transmute(_mm256_mask3_fmadd_pch(a, b, c, k)); // using `0xff` would have been fine here, but this is what CLang does
@@ -4228,7 +4277,7 @@ pub fn _mm256_mask_fmadd_pch(a: __m256h, k: __mmask8, b: __m256h, c: __m256h) ->
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vfmaddcph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_mask3_fmadd_pch(a: __m256h, b: __m256h, c: __m256h, k: __mmask8) -> __m256h {
     unsafe {
         transmute(vfmaddcph_mask3_256(
@@ -4249,7 +4298,7 @@ pub fn _mm256_mask3_fmadd_pch(a: __m256h, b: __m256h, c: __m256h, k: __mmask8) -
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vfmaddcph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_maskz_fmadd_pch(k: __mmask8, a: __m256h, b: __m256h, c: __m256h) -> __m256h {
     unsafe {
         transmute(vfmaddcph_maskz_256(
@@ -4269,7 +4318,7 @@ pub fn _mm256_maskz_fmadd_pch(k: __mmask8, a: __m256h, b: __m256h, c: __m256h) -
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfmaddcph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_fmadd_pch(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
     _mm512_fmadd_round_pch::<_MM_FROUND_CUR_DIRECTION>(a, b, c)
 }
@@ -4283,7 +4332,7 @@ pub fn _mm512_fmadd_pch(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfmaddcph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mask_fmadd_pch(a: __m512h, k: __mmask16, b: __m512h, c: __m512h) -> __m512h {
     _mm512_mask_fmadd_round_pch::<_MM_FROUND_CUR_DIRECTION>(a, k, b, c)
 }
@@ -4297,7 +4346,7 @@ pub fn _mm512_mask_fmadd_pch(a: __m512h, k: __mmask16, b: __m512h, c: __m512h) -
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfmaddcph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mask3_fmadd_pch(a: __m512h, b: __m512h, c: __m512h, k: __mmask16) -> __m512h {
     _mm512_mask3_fmadd_round_pch::<_MM_FROUND_CUR_DIRECTION>(a, b, c, k)
 }
@@ -4311,7 +4360,7 @@ pub fn _mm512_mask3_fmadd_pch(a: __m512h, b: __m512h, c: __m512h, k: __mmask16)
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfmaddcph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_maskz_fmadd_pch(k: __mmask16, a: __m512h, b: __m512h, c: __m512h) -> __m512h {
     _mm512_maskz_fmadd_round_pch::<_MM_FROUND_CUR_DIRECTION>(k, a, b, c)
 }
@@ -4333,7 +4382,7 @@ pub fn _mm512_maskz_fmadd_pch(k: __mmask16, a: __m512h, b: __m512h, c: __m512h)
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfmaddcph, ROUNDING = 8))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_fmadd_round_pch<const ROUNDING: i32>(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
     static_assert_rounding!(ROUNDING);
     _mm512_mask3_fmadd_round_pch::<ROUNDING>(a, b, c, 0xffff)
@@ -4357,7 +4406,7 @@ pub fn _mm512_fmadd_round_pch<const ROUNDING: i32>(a: __m512h, b: __m512h, c: __
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfmaddcph, ROUNDING = 8))]
 #[rustc_legacy_const_generics(4)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mask_fmadd_round_pch<const ROUNDING: i32>(
     a: __m512h,
     k: __mmask16,
@@ -4389,7 +4438,7 @@ pub fn _mm512_mask_fmadd_round_pch<const ROUNDING: i32>(
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfmaddcph, ROUNDING = 8))]
 #[rustc_legacy_const_generics(4)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mask3_fmadd_round_pch<const ROUNDING: i32>(
     a: __m512h,
     b: __m512h,
@@ -4426,7 +4475,7 @@ pub fn _mm512_mask3_fmadd_round_pch<const ROUNDING: i32>(
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfmaddcph, ROUNDING = 8))]
 #[rustc_legacy_const_generics(4)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_maskz_fmadd_round_pch<const ROUNDING: i32>(
     k: __mmask16,
     a: __m512h,
@@ -4454,7 +4503,7 @@ pub fn _mm512_maskz_fmadd_round_pch<const ROUNDING: i32>(
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfmaddcsh))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_fmadd_sch(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
     _mm_fmadd_round_sch::<_MM_FROUND_CUR_DIRECTION>(a, b, c)
 }
@@ -4469,7 +4518,7 @@ pub fn _mm_fmadd_sch(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfmaddcsh))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask_fmadd_sch(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
     _mm_mask_fmadd_round_sch::<_MM_FROUND_CUR_DIRECTION>(a, k, b, c)
 }
@@ -4484,7 +4533,7 @@ pub fn _mm_mask_fmadd_sch(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfmaddcsh))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask3_fmadd_sch(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
     _mm_mask3_fmadd_round_sch::<_MM_FROUND_CUR_DIRECTION>(a, b, c, k)
 }
@@ -4499,7 +4548,7 @@ pub fn _mm_mask3_fmadd_sch(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> _
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfmaddcsh))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_maskz_fmadd_sch(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
     _mm_maskz_fmadd_round_sch::<_MM_FROUND_CUR_DIRECTION>(k, a, b, c)
 }
@@ -4521,7 +4570,7 @@ pub fn _mm_maskz_fmadd_sch(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> _
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfmaddcsh, ROUNDING = 8))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_fmadd_round_sch<const ROUNDING: i32>(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
     unsafe {
         static_assert_rounding!(ROUNDING);
@@ -4554,7 +4603,7 @@ pub fn _mm_fmadd_round_sch<const ROUNDING: i32>(a: __m128h, b: __m128h, c: __m12
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfmaddcsh, ROUNDING = 8))]
 #[rustc_legacy_const_generics(4)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask_fmadd_round_sch<const ROUNDING: i32>(
     a: __m128h,
     k: __mmask8,
@@ -4588,7 +4637,7 @@ pub fn _mm_mask_fmadd_round_sch<const ROUNDING: i32>(
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfmaddcsh, ROUNDING = 8))]
 #[rustc_legacy_const_generics(4)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask3_fmadd_round_sch<const ROUNDING: i32>(
     a: __m128h,
     b: __m128h,
@@ -4622,7 +4671,7 @@ pub fn _mm_mask3_fmadd_round_sch<const ROUNDING: i32>(
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfmaddcsh, ROUNDING = 8))]
 #[rustc_legacy_const_generics(4)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_maskz_fmadd_round_sch<const ROUNDING: i32>(
     k: __mmask8,
     a: __m128h,
@@ -4650,7 +4699,7 @@ pub fn _mm_maskz_fmadd_round_sch<const ROUNDING: i32>(
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vfcmaddcph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_fcmadd_pch(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
     _mm_mask3_fcmadd_pch(a, b, c, 0xff)
 }
@@ -4665,7 +4714,7 @@ pub fn _mm_fcmadd_pch(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vfcmaddcph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask_fcmadd_pch(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
     unsafe {
         let r: __m128 = transmute(_mm_mask3_fcmadd_pch(a, b, c, k)); // using `0xff` would have been fine here, but this is what CLang does
@@ -4683,7 +4732,7 @@ pub fn _mm_mask_fcmadd_pch(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> _
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vfcmaddcph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask3_fcmadd_pch(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
     unsafe {
         transmute(vfcmaddcph_mask3_128(
@@ -4705,7 +4754,7 @@ pub fn _mm_mask3_fcmadd_pch(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) ->
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vfcmaddcph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_maskz_fcmadd_pch(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
     unsafe {
         transmute(vfcmaddcph_maskz_128(
@@ -4726,7 +4775,7 @@ pub fn _mm_maskz_fcmadd_pch(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) ->
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vfcmaddcph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_fcmadd_pch(a: __m256h, b: __m256h, c: __m256h) -> __m256h {
     _mm256_mask3_fcmadd_pch(a, b, c, 0xff)
 }
@@ -4741,7 +4790,7 @@ pub fn _mm256_fcmadd_pch(a: __m256h, b: __m256h, c: __m256h) -> __m256h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vfcmaddcph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_mask_fcmadd_pch(a: __m256h, k: __mmask8, b: __m256h, c: __m256h) -> __m256h {
     unsafe {
         let r: __m256 = transmute(_mm256_mask3_fcmadd_pch(a, b, c, k)); // using `0xff` would have been fine here, but this is what CLang does
@@ -4759,7 +4808,7 @@ pub fn _mm256_mask_fcmadd_pch(a: __m256h, k: __mmask8, b: __m256h, c: __m256h) -
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vfcmaddcph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_mask3_fcmadd_pch(a: __m256h, b: __m256h, c: __m256h, k: __mmask8) -> __m256h {
     unsafe {
         transmute(vfcmaddcph_mask3_256(
@@ -4781,7 +4830,7 @@ pub fn _mm256_mask3_fcmadd_pch(a: __m256h, b: __m256h, c: __m256h, k: __mmask8)
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vfcmaddcph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_maskz_fcmadd_pch(k: __mmask8, a: __m256h, b: __m256h, c: __m256h) -> __m256h {
     unsafe {
         transmute(vfcmaddcph_maskz_256(
@@ -4802,7 +4851,7 @@ pub fn _mm256_maskz_fcmadd_pch(k: __mmask8, a: __m256h, b: __m256h, c: __m256h)
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfcmaddcph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_fcmadd_pch(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
     _mm512_fcmadd_round_pch::<_MM_FROUND_CUR_DIRECTION>(a, b, c)
 }
@@ -4817,7 +4866,7 @@ pub fn _mm512_fcmadd_pch(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfcmaddcph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mask_fcmadd_pch(a: __m512h, k: __mmask16, b: __m512h, c: __m512h) -> __m512h {
     _mm512_mask_fcmadd_round_pch::<_MM_FROUND_CUR_DIRECTION>(a, k, b, c)
 }
@@ -4832,7 +4881,7 @@ pub fn _mm512_mask_fcmadd_pch(a: __m512h, k: __mmask16, b: __m512h, c: __m512h)
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfcmaddcph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mask3_fcmadd_pch(a: __m512h, b: __m512h, c: __m512h, k: __mmask16) -> __m512h {
     _mm512_mask3_fcmadd_round_pch::<_MM_FROUND_CUR_DIRECTION>(a, b, c, k)
 }
@@ -4847,7 +4896,7 @@ pub fn _mm512_mask3_fcmadd_pch(a: __m512h, b: __m512h, c: __m512h, k: __mmask16)
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfcmaddcph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_maskz_fcmadd_pch(k: __mmask16, a: __m512h, b: __m512h, c: __m512h) -> __m512h {
     _mm512_maskz_fcmadd_round_pch::<_MM_FROUND_CUR_DIRECTION>(k, a, b, c)
 }
@@ -4870,7 +4919,7 @@ pub fn _mm512_maskz_fcmadd_pch(k: __mmask16, a: __m512h, b: __m512h, c: __m512h)
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfcmaddcph, ROUNDING = 8))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_fcmadd_round_pch<const ROUNDING: i32>(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
     static_assert_rounding!(ROUNDING);
     _mm512_mask3_fcmadd_round_pch::<ROUNDING>(a, b, c, 0xffff)
@@ -4895,7 +4944,7 @@ pub fn _mm512_fcmadd_round_pch<const ROUNDING: i32>(a: __m512h, b: __m512h, c: _
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfcmaddcph, ROUNDING = 8))]
 #[rustc_legacy_const_generics(4)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mask_fcmadd_round_pch<const ROUNDING: i32>(
     a: __m512h,
     k: __mmask16,
@@ -4928,7 +4977,7 @@ pub fn _mm512_mask_fcmadd_round_pch<const ROUNDING: i32>(
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfcmaddcph, ROUNDING = 8))]
 #[rustc_legacy_const_generics(4)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mask3_fcmadd_round_pch<const ROUNDING: i32>(
     a: __m512h,
     b: __m512h,
@@ -4966,7 +5015,7 @@ pub fn _mm512_mask3_fcmadd_round_pch<const ROUNDING: i32>(
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfcmaddcph, ROUNDING = 8))]
 #[rustc_legacy_const_generics(4)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_maskz_fcmadd_round_pch<const ROUNDING: i32>(
     k: __mmask16,
     a: __m512h,
@@ -4995,7 +5044,7 @@ pub fn _mm512_maskz_fcmadd_round_pch<const ROUNDING: i32>(
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfcmaddcsh))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_fcmadd_sch(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
     _mm_fcmadd_round_sch::<_MM_FROUND_CUR_DIRECTION>(a, b, c)
 }
@@ -5011,7 +5060,7 @@ pub fn _mm_fcmadd_sch(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfcmaddcsh))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask_fcmadd_sch(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
     _mm_mask_fcmadd_round_sch::<_MM_FROUND_CUR_DIRECTION>(a, k, b, c)
 }
@@ -5027,7 +5076,7 @@ pub fn _mm_mask_fcmadd_sch(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> _
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfcmaddcsh))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask3_fcmadd_sch(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
     _mm_mask3_fcmadd_round_sch::<_MM_FROUND_CUR_DIRECTION>(a, b, c, k)
 }
@@ -5043,7 +5092,7 @@ pub fn _mm_mask3_fcmadd_sch(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) ->
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfcmaddcsh))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_maskz_fcmadd_sch(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
     _mm_maskz_fcmadd_round_sch::<_MM_FROUND_CUR_DIRECTION>(k, a, b, c)
 }
@@ -5067,7 +5116,7 @@ pub fn _mm_maskz_fcmadd_sch(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) ->
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfcmaddcsh, ROUNDING = 8))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_fcmadd_round_sch<const ROUNDING: i32>(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
     unsafe {
         static_assert_rounding!(ROUNDING);
@@ -5101,7 +5150,7 @@ pub fn _mm_fcmadd_round_sch<const ROUNDING: i32>(a: __m128h, b: __m128h, c: __m1
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfcmaddcsh, ROUNDING = 8))]
 #[rustc_legacy_const_generics(4)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask_fcmadd_round_sch<const ROUNDING: i32>(
     a: __m128h,
     k: __mmask8,
@@ -5136,7 +5185,7 @@ pub fn _mm_mask_fcmadd_round_sch<const ROUNDING: i32>(
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfcmaddcsh, ROUNDING = 8))]
 #[rustc_legacy_const_generics(4)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask3_fcmadd_round_sch<const ROUNDING: i32>(
     a: __m128h,
     b: __m128h,
@@ -5171,7 +5220,7 @@ pub fn _mm_mask3_fcmadd_round_sch<const ROUNDING: i32>(
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfcmaddcsh, ROUNDING = 8))]
 #[rustc_legacy_const_generics(4)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_maskz_fcmadd_round_sch<const ROUNDING: i32>(
     k: __mmask8,
     a: __m128h,
@@ -5197,8 +5246,9 @@ pub fn _mm_maskz_fcmadd_round_sch<const ROUNDING: i32>(
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vfmadd))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm_fmadd_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_fmadd_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
     unsafe { simd_fma(a, b, c) }
 }
 
@@ -5210,8 +5260,9 @@ pub fn _mm_fmadd_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vfmadd))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm_mask_fmadd_ph(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_fmadd_ph(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
     unsafe { simd_select_bitmask(k, _mm_fmadd_ph(a, b, c), a) }
 }
 
@@ -5223,8 +5274,9 @@ pub fn _mm_mask_fmadd_ph(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vfmadd))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm_mask3_fmadd_ph(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask3_fmadd_ph(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
     unsafe { simd_select_bitmask(k, _mm_fmadd_ph(a, b, c), c) }
 }
 
@@ -5236,8 +5288,9 @@ pub fn _mm_mask3_fmadd_ph(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vfmadd))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm_maskz_fmadd_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_fmadd_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
     unsafe { simd_select_bitmask(k, _mm_fmadd_ph(a, b, c), _mm_setzero_ph()) }
 }
 
@@ -5248,8 +5301,9 @@ pub fn _mm_maskz_fmadd_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vfmadd))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm256_fmadd_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_fmadd_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h {
     unsafe { simd_fma(a, b, c) }
 }
 
@@ -5261,8 +5315,9 @@ pub fn _mm256_fmadd_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vfmadd))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm256_mask_fmadd_ph(a: __m256h, k: __mmask16, b: __m256h, c: __m256h) -> __m256h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_fmadd_ph(a: __m256h, k: __mmask16, b: __m256h, c: __m256h) -> __m256h {
     unsafe { simd_select_bitmask(k, _mm256_fmadd_ph(a, b, c), a) }
 }
 
@@ -5274,8 +5329,9 @@ pub fn _mm256_mask_fmadd_ph(a: __m256h, k: __mmask16, b: __m256h, c: __m256h) ->
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vfmadd))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm256_mask3_fmadd_ph(a: __m256h, b: __m256h, c: __m256h, k: __mmask16) -> __m256h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask3_fmadd_ph(a: __m256h, b: __m256h, c: __m256h, k: __mmask16) -> __m256h {
     unsafe { simd_select_bitmask(k, _mm256_fmadd_ph(a, b, c), c) }
 }
 
@@ -5287,8 +5343,9 @@ pub fn _mm256_mask3_fmadd_ph(a: __m256h, b: __m256h, c: __m256h, k: __mmask16) -
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vfmadd))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm256_maskz_fmadd_ph(k: __mmask16, a: __m256h, b: __m256h, c: __m256h) -> __m256h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_fmadd_ph(k: __mmask16, a: __m256h, b: __m256h, c: __m256h) -> __m256h {
     unsafe { simd_select_bitmask(k, _mm256_fmadd_ph(a, b, c), _mm256_setzero_ph()) }
 }
 
@@ -5299,8 +5356,9 @@ pub fn _mm256_maskz_fmadd_ph(k: __mmask16, a: __m256h, b: __m256h, c: __m256h) -
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfmadd))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm512_fmadd_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_fmadd_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
     unsafe { simd_fma(a, b, c) }
 }
 
@@ -5312,8 +5370,9 @@ pub fn _mm512_fmadd_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfmadd))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm512_mask_fmadd_ph(a: __m512h, k: __mmask32, b: __m512h, c: __m512h) -> __m512h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_fmadd_ph(a: __m512h, k: __mmask32, b: __m512h, c: __m512h) -> __m512h {
     unsafe { simd_select_bitmask(k, _mm512_fmadd_ph(a, b, c), a) }
 }
 
@@ -5325,8 +5384,9 @@ pub fn _mm512_mask_fmadd_ph(a: __m512h, k: __mmask32, b: __m512h, c: __m512h) ->
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfmadd))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm512_mask3_fmadd_ph(a: __m512h, b: __m512h, c: __m512h, k: __mmask32) -> __m512h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask3_fmadd_ph(a: __m512h, b: __m512h, c: __m512h, k: __mmask32) -> __m512h {
     unsafe { simd_select_bitmask(k, _mm512_fmadd_ph(a, b, c), c) }
 }
 
@@ -5338,8 +5398,9 @@ pub fn _mm512_mask3_fmadd_ph(a: __m512h, b: __m512h, c: __m512h, k: __mmask32) -
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfmadd))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm512_maskz_fmadd_ph(k: __mmask32, a: __m512h, b: __m512h, c: __m512h) -> __m512h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_fmadd_ph(k: __mmask32, a: __m512h, b: __m512h, c: __m512h) -> __m512h {
     unsafe { simd_select_bitmask(k, _mm512_fmadd_ph(a, b, c), _mm512_setzero_ph()) }
 }
 
@@ -5359,7 +5420,7 @@ pub fn _mm512_maskz_fmadd_ph(k: __mmask32, a: __m512h, b: __m512h, c: __m512h) -
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_fmadd_round_ph<const ROUNDING: i32>(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
     unsafe {
         static_assert_rounding!(ROUNDING);
@@ -5384,7 +5445,7 @@ pub fn _mm512_fmadd_round_ph<const ROUNDING: i32>(a: __m512h, b: __m512h, c: __m
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
 #[rustc_legacy_const_generics(4)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mask_fmadd_round_ph<const ROUNDING: i32>(
     a: __m512h,
     k: __mmask32,
@@ -5414,7 +5475,7 @@ pub fn _mm512_mask_fmadd_round_ph<const ROUNDING: i32>(
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
 #[rustc_legacy_const_generics(4)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mask3_fmadd_round_ph<const ROUNDING: i32>(
     a: __m512h,
     b: __m512h,
@@ -5444,7 +5505,7 @@ pub fn _mm512_mask3_fmadd_round_ph<const ROUNDING: i32>(
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
 #[rustc_legacy_const_generics(4)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_maskz_fmadd_round_ph<const ROUNDING: i32>(
     k: __mmask32,
     a: __m512h,
@@ -5469,8 +5530,9 @@ pub fn _mm512_maskz_fmadd_round_ph<const ROUNDING: i32>(
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfmadd))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm_fmadd_sh(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_fmadd_sh(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
     unsafe {
         let extracta: f16 = simd_extract!(a, 0);
         let extractb: f16 = simd_extract!(b, 0);
@@ -5489,8 +5551,9 @@ pub fn _mm_fmadd_sh(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfmadd))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm_mask_fmadd_sh(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_fmadd_sh(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
     unsafe {
         let mut fmadd: f16 = simd_extract!(a, 0);
         if k & 1 != 0 {
@@ -5511,8 +5574,9 @@ pub fn _mm_mask_fmadd_sh(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfmadd))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm_mask3_fmadd_sh(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask3_fmadd_sh(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
     unsafe {
         let mut fmadd: f16 = simd_extract!(c, 0);
         if k & 1 != 0 {
@@ -5533,8 +5597,9 @@ pub fn _mm_mask3_fmadd_sh(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfmadd))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm_maskz_fmadd_sh(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_fmadd_sh(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
     unsafe {
         let mut fmadd: f16 = 0.0;
         if k & 1 != 0 {
@@ -5564,7 +5629,7 @@ pub fn _mm_maskz_fmadd_sh(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_fmadd_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
     unsafe {
         static_assert_rounding!(ROUNDING);
@@ -5594,7 +5659,7 @@ pub fn _mm_fmadd_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h, c: __m128
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
 #[rustc_legacy_const_generics(4)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask_fmadd_round_sh<const ROUNDING: i32>(
     a: __m128h,
     k: __mmask8,
@@ -5631,7 +5696,7 @@ pub fn _mm_mask_fmadd_round_sh<const ROUNDING: i32>(
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
 #[rustc_legacy_const_generics(4)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask3_fmadd_round_sh<const ROUNDING: i32>(
     a: __m128h,
     b: __m128h,
@@ -5668,7 +5733,7 @@ pub fn _mm_mask3_fmadd_round_sh<const ROUNDING: i32>(
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
 #[rustc_legacy_const_generics(4)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_maskz_fmadd_round_sh<const ROUNDING: i32>(
     k: __mmask8,
     a: __m128h,
@@ -5696,8 +5761,9 @@ pub fn _mm_maskz_fmadd_round_sh<const ROUNDING: i32>(
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vfmsub))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm_fmsub_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_fmsub_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
     unsafe { simd_fma(a, b, simd_neg(c)) }
 }
 
@@ -5709,8 +5775,9 @@ pub fn _mm_fmsub_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vfmsub))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm_mask_fmsub_ph(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_fmsub_ph(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
     unsafe { simd_select_bitmask(k, _mm_fmsub_ph(a, b, c), a) }
 }
 
@@ -5722,8 +5789,9 @@ pub fn _mm_mask_fmsub_ph(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vfmsub))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm_mask3_fmsub_ph(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask3_fmsub_ph(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
     unsafe { simd_select_bitmask(k, _mm_fmsub_ph(a, b, c), c) }
 }
 
@@ -5735,8 +5803,9 @@ pub fn _mm_mask3_fmsub_ph(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vfmsub))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm_maskz_fmsub_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_fmsub_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
     unsafe { simd_select_bitmask(k, _mm_fmsub_ph(a, b, c), _mm_setzero_ph()) }
 }
 
@@ -5747,8 +5816,9 @@ pub fn _mm_maskz_fmsub_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vfmsub))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm256_fmsub_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_fmsub_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h {
     unsafe { simd_fma(a, b, simd_neg(c)) }
 }
 
@@ -5760,8 +5830,9 @@ pub fn _mm256_fmsub_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vfmsub))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm256_mask_fmsub_ph(a: __m256h, k: __mmask16, b: __m256h, c: __m256h) -> __m256h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_fmsub_ph(a: __m256h, k: __mmask16, b: __m256h, c: __m256h) -> __m256h {
     unsafe { simd_select_bitmask(k, _mm256_fmsub_ph(a, b, c), a) }
 }
 
@@ -5773,8 +5844,9 @@ pub fn _mm256_mask_fmsub_ph(a: __m256h, k: __mmask16, b: __m256h, c: __m256h) ->
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vfmsub))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm256_mask3_fmsub_ph(a: __m256h, b: __m256h, c: __m256h, k: __mmask16) -> __m256h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask3_fmsub_ph(a: __m256h, b: __m256h, c: __m256h, k: __mmask16) -> __m256h {
     unsafe { simd_select_bitmask(k, _mm256_fmsub_ph(a, b, c), c) }
 }
 
@@ -5786,8 +5858,9 @@ pub fn _mm256_mask3_fmsub_ph(a: __m256h, b: __m256h, c: __m256h, k: __mmask16) -
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vfmsub))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm256_maskz_fmsub_ph(k: __mmask16, a: __m256h, b: __m256h, c: __m256h) -> __m256h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_fmsub_ph(k: __mmask16, a: __m256h, b: __m256h, c: __m256h) -> __m256h {
     unsafe { simd_select_bitmask(k, _mm256_fmsub_ph(a, b, c), _mm256_setzero_ph()) }
 }
 
@@ -5798,8 +5871,9 @@ pub fn _mm256_maskz_fmsub_ph(k: __mmask16, a: __m256h, b: __m256h, c: __m256h) -
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfmsub))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm512_fmsub_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_fmsub_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
     unsafe { simd_fma(a, b, simd_neg(c)) }
 }
 
@@ -5811,8 +5885,9 @@ pub fn _mm512_fmsub_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfmsub))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm512_mask_fmsub_ph(a: __m512h, k: __mmask32, b: __m512h, c: __m512h) -> __m512h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_fmsub_ph(a: __m512h, k: __mmask32, b: __m512h, c: __m512h) -> __m512h {
     unsafe { simd_select_bitmask(k, _mm512_fmsub_ph(a, b, c), a) }
 }
 
@@ -5824,8 +5899,9 @@ pub fn _mm512_mask_fmsub_ph(a: __m512h, k: __mmask32, b: __m512h, c: __m512h) ->
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfmsub))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm512_mask3_fmsub_ph(a: __m512h, b: __m512h, c: __m512h, k: __mmask32) -> __m512h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask3_fmsub_ph(a: __m512h, b: __m512h, c: __m512h, k: __mmask32) -> __m512h {
     unsafe { simd_select_bitmask(k, _mm512_fmsub_ph(a, b, c), c) }
 }
 
@@ -5837,8 +5913,9 @@ pub fn _mm512_mask3_fmsub_ph(a: __m512h, b: __m512h, c: __m512h, k: __mmask32) -
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfmsub))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm512_maskz_fmsub_ph(k: __mmask32, a: __m512h, b: __m512h, c: __m512h) -> __m512h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_fmsub_ph(k: __mmask32, a: __m512h, b: __m512h, c: __m512h) -> __m512h {
     unsafe { simd_select_bitmask(k, _mm512_fmsub_ph(a, b, c), _mm512_setzero_ph()) }
 }
 
@@ -5858,7 +5935,7 @@ pub fn _mm512_maskz_fmsub_ph(k: __mmask32, a: __m512h, b: __m512h, c: __m512h) -
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_fmsub_round_ph<const ROUNDING: i32>(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
     unsafe {
         static_assert_rounding!(ROUNDING);
@@ -5883,7 +5960,7 @@ pub fn _mm512_fmsub_round_ph<const ROUNDING: i32>(a: __m512h, b: __m512h, c: __m
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
 #[rustc_legacy_const_generics(4)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mask_fmsub_round_ph<const ROUNDING: i32>(
     a: __m512h,
     k: __mmask32,
@@ -5913,7 +5990,7 @@ pub fn _mm512_mask_fmsub_round_ph<const ROUNDING: i32>(
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
 #[rustc_legacy_const_generics(4)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mask3_fmsub_round_ph<const ROUNDING: i32>(
     a: __m512h,
     b: __m512h,
@@ -5943,7 +6020,7 @@ pub fn _mm512_mask3_fmsub_round_ph<const ROUNDING: i32>(
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
 #[rustc_legacy_const_generics(4)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_maskz_fmsub_round_ph<const ROUNDING: i32>(
     k: __mmask32,
     a: __m512h,
@@ -5968,8 +6045,9 @@ pub fn _mm512_maskz_fmsub_round_ph<const ROUNDING: i32>(
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfmsub))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm_fmsub_sh(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_fmsub_sh(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
     unsafe {
         let extracta: f16 = simd_extract!(a, 0);
         let extractb: f16 = simd_extract!(b, 0);
@@ -5988,8 +6066,9 @@ pub fn _mm_fmsub_sh(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfmsub))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm_mask_fmsub_sh(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_fmsub_sh(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
     unsafe {
         let mut fmsub: f16 = simd_extract!(a, 0);
         if k & 1 != 0 {
@@ -6010,8 +6089,9 @@ pub fn _mm_mask_fmsub_sh(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfmsub))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm_mask3_fmsub_sh(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask3_fmsub_sh(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
     unsafe {
         let mut fmsub: f16 = simd_extract!(c, 0);
         if k & 1 != 0 {
@@ -6032,8 +6112,9 @@ pub fn _mm_mask3_fmsub_sh(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfmsub))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm_maskz_fmsub_sh(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_fmsub_sh(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
     unsafe {
         let mut fmsub: f16 = 0.0;
         if k & 1 != 0 {
@@ -6063,7 +6144,7 @@ pub fn _mm_maskz_fmsub_sh(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_fmsub_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
     unsafe {
         static_assert_rounding!(ROUNDING);
@@ -6093,7 +6174,7 @@ pub fn _mm_fmsub_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h, c: __m128
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
 #[rustc_legacy_const_generics(4)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask_fmsub_round_sh<const ROUNDING: i32>(
     a: __m128h,
     k: __mmask8,
@@ -6130,7 +6211,7 @@ pub fn _mm_mask_fmsub_round_sh<const ROUNDING: i32>(
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
 #[rustc_legacy_const_generics(4)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask3_fmsub_round_sh<const ROUNDING: i32>(
     a: __m128h,
     b: __m128h,
@@ -6159,7 +6240,7 @@ pub fn _mm_mask3_fmsub_round_sh<const ROUNDING: i32>(
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
 #[rustc_legacy_const_generics(4)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_maskz_fmsub_round_sh<const ROUNDING: i32>(
     k: __mmask8,
     a: __m128h,
@@ -6186,8 +6267,9 @@ pub fn _mm_maskz_fmsub_round_sh<const ROUNDING: i32>(
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vfnmadd))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm_fnmadd_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_fnmadd_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
     unsafe { simd_fma(simd_neg(a), b, c) }
 }
 
@@ -6199,8 +6281,9 @@ pub fn _mm_fnmadd_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vfnmadd))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm_mask_fnmadd_ph(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_fnmadd_ph(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
     unsafe { simd_select_bitmask(k, _mm_fnmadd_ph(a, b, c), a) }
 }
 
@@ -6212,8 +6295,9 @@ pub fn _mm_mask_fnmadd_ph(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vfnmadd))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm_mask3_fnmadd_ph(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask3_fnmadd_ph(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
     unsafe { simd_select_bitmask(k, _mm_fnmadd_ph(a, b, c), c) }
 }
 
@@ -6225,8 +6309,9 @@ pub fn _mm_mask3_fnmadd_ph(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> _
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vfnmadd))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm_maskz_fnmadd_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_fnmadd_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
     unsafe { simd_select_bitmask(k, _mm_fnmadd_ph(a, b, c), _mm_setzero_ph()) }
 }
 
@@ -6237,8 +6322,9 @@ pub fn _mm_maskz_fnmadd_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> _
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vfnmadd))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm256_fnmadd_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_fnmadd_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h {
     unsafe { simd_fma(simd_neg(a), b, c) }
 }
 
@@ -6250,8 +6336,9 @@ pub fn _mm256_fnmadd_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vfnmadd))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm256_mask_fnmadd_ph(a: __m256h, k: __mmask16, b: __m256h, c: __m256h) -> __m256h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_fnmadd_ph(a: __m256h, k: __mmask16, b: __m256h, c: __m256h) -> __m256h {
     unsafe { simd_select_bitmask(k, _mm256_fnmadd_ph(a, b, c), a) }
 }
 
@@ -6263,8 +6350,9 @@ pub fn _mm256_mask_fnmadd_ph(a: __m256h, k: __mmask16, b: __m256h, c: __m256h) -
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vfnmadd))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm256_mask3_fnmadd_ph(a: __m256h, b: __m256h, c: __m256h, k: __mmask16) -> __m256h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask3_fnmadd_ph(a: __m256h, b: __m256h, c: __m256h, k: __mmask16) -> __m256h {
     unsafe { simd_select_bitmask(k, _mm256_fnmadd_ph(a, b, c), c) }
 }
 
@@ -6276,8 +6364,9 @@ pub fn _mm256_mask3_fnmadd_ph(a: __m256h, b: __m256h, c: __m256h, k: __mmask16)
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vfnmadd))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm256_maskz_fnmadd_ph(k: __mmask16, a: __m256h, b: __m256h, c: __m256h) -> __m256h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_fnmadd_ph(k: __mmask16, a: __m256h, b: __m256h, c: __m256h) -> __m256h {
     unsafe { simd_select_bitmask(k, _mm256_fnmadd_ph(a, b, c), _mm256_setzero_ph()) }
 }
 
@@ -6288,8 +6377,9 @@ pub fn _mm256_maskz_fnmadd_ph(k: __mmask16, a: __m256h, b: __m256h, c: __m256h)
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfnmadd))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm512_fnmadd_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_fnmadd_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
     unsafe { simd_fma(simd_neg(a), b, c) }
 }
 
@@ -6301,8 +6391,9 @@ pub fn _mm512_fnmadd_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfnmadd))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm512_mask_fnmadd_ph(a: __m512h, k: __mmask32, b: __m512h, c: __m512h) -> __m512h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_fnmadd_ph(a: __m512h, k: __mmask32, b: __m512h, c: __m512h) -> __m512h {
     unsafe { simd_select_bitmask(k, _mm512_fnmadd_ph(a, b, c), a) }
 }
 
@@ -6314,8 +6405,9 @@ pub fn _mm512_mask_fnmadd_ph(a: __m512h, k: __mmask32, b: __m512h, c: __m512h) -
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfnmadd))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm512_mask3_fnmadd_ph(a: __m512h, b: __m512h, c: __m512h, k: __mmask32) -> __m512h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask3_fnmadd_ph(a: __m512h, b: __m512h, c: __m512h, k: __mmask32) -> __m512h {
     unsafe { simd_select_bitmask(k, _mm512_fnmadd_ph(a, b, c), c) }
 }
 
@@ -6327,8 +6419,9 @@ pub fn _mm512_mask3_fnmadd_ph(a: __m512h, b: __m512h, c: __m512h, k: __mmask32)
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfnmadd))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm512_maskz_fnmadd_ph(k: __mmask32, a: __m512h, b: __m512h, c: __m512h) -> __m512h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_fnmadd_ph(k: __mmask32, a: __m512h, b: __m512h, c: __m512h) -> __m512h {
     unsafe { simd_select_bitmask(k, _mm512_fnmadd_ph(a, b, c), _mm512_setzero_ph()) }
 }
 
@@ -6348,7 +6441,7 @@ pub fn _mm512_maskz_fnmadd_ph(k: __mmask32, a: __m512h, b: __m512h, c: __m512h)
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_fnmadd_round_ph<const ROUNDING: i32>(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
     unsafe {
         static_assert_rounding!(ROUNDING);
@@ -6373,7 +6466,7 @@ pub fn _mm512_fnmadd_round_ph<const ROUNDING: i32>(a: __m512h, b: __m512h, c: __
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
 #[rustc_legacy_const_generics(4)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mask_fnmadd_round_ph<const ROUNDING: i32>(
     a: __m512h,
     k: __mmask32,
@@ -6403,7 +6496,7 @@ pub fn _mm512_mask_fnmadd_round_ph<const ROUNDING: i32>(
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
 #[rustc_legacy_const_generics(4)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mask3_fnmadd_round_ph<const ROUNDING: i32>(
     a: __m512h,
     b: __m512h,
@@ -6433,7 +6526,7 @@ pub fn _mm512_mask3_fnmadd_round_ph<const ROUNDING: i32>(
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
 #[rustc_legacy_const_generics(4)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_maskz_fnmadd_round_ph<const ROUNDING: i32>(
     k: __mmask32,
     a: __m512h,
@@ -6458,8 +6551,9 @@ pub fn _mm512_maskz_fnmadd_round_ph<const ROUNDING: i32>(
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfnmadd))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm_fnmadd_sh(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_fnmadd_sh(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
     unsafe {
         let extracta: f16 = simd_extract!(a, 0);
         let extractb: f16 = simd_extract!(b, 0);
@@ -6478,8 +6572,9 @@ pub fn _mm_fnmadd_sh(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfnmadd))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm_mask_fnmadd_sh(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_fnmadd_sh(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
     unsafe {
         let mut fnmadd: f16 = simd_extract!(a, 0);
         if k & 1 != 0 {
@@ -6500,8 +6595,9 @@ pub fn _mm_mask_fnmadd_sh(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfnmadd))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm_mask3_fnmadd_sh(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask3_fnmadd_sh(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
     unsafe {
         let mut fnmadd: f16 = simd_extract!(c, 0);
         if k & 1 != 0 {
@@ -6522,8 +6618,9 @@ pub fn _mm_mask3_fnmadd_sh(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> _
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfnmadd))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm_maskz_fnmadd_sh(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_fnmadd_sh(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
     unsafe {
         let mut fnmadd: f16 = 0.0;
         if k & 1 != 0 {
@@ -6553,7 +6650,7 @@ pub fn _mm_maskz_fnmadd_sh(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> _
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_fnmadd_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
     unsafe {
         static_assert_rounding!(ROUNDING);
@@ -6583,7 +6680,7 @@ pub fn _mm_fnmadd_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h, c: __m12
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
 #[rustc_legacy_const_generics(4)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask_fnmadd_round_sh<const ROUNDING: i32>(
     a: __m128h,
     k: __mmask8,
@@ -6620,7 +6717,7 @@ pub fn _mm_mask_fnmadd_round_sh<const ROUNDING: i32>(
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
 #[rustc_legacy_const_generics(4)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask3_fnmadd_round_sh<const ROUNDING: i32>(
     a: __m128h,
     b: __m128h,
@@ -6657,7 +6754,7 @@ pub fn _mm_mask3_fnmadd_round_sh<const ROUNDING: i32>(
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
 #[rustc_legacy_const_generics(4)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_maskz_fnmadd_round_sh<const ROUNDING: i32>(
     k: __mmask8,
     a: __m128h,
@@ -6684,8 +6781,9 @@ pub fn _mm_maskz_fnmadd_round_sh<const ROUNDING: i32>(
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vfnmsub))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm_fnmsub_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_fnmsub_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
     unsafe { simd_fma(simd_neg(a), b, simd_neg(c)) }
 }
 
@@ -6697,8 +6795,9 @@ pub fn _mm_fnmsub_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vfnmsub))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm_mask_fnmsub_ph(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_fnmsub_ph(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
     unsafe { simd_select_bitmask(k, _mm_fnmsub_ph(a, b, c), a) }
 }
 
@@ -6710,8 +6809,9 @@ pub fn _mm_mask_fnmsub_ph(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vfnmsub))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm_mask3_fnmsub_ph(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask3_fnmsub_ph(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
     unsafe { simd_select_bitmask(k, _mm_fnmsub_ph(a, b, c), c) }
 }
 
@@ -6723,8 +6823,9 @@ pub fn _mm_mask3_fnmsub_ph(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> _
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vfnmsub))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm_maskz_fnmsub_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_fnmsub_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
     unsafe { simd_select_bitmask(k, _mm_fnmsub_ph(a, b, c), _mm_setzero_ph()) }
 }
 
@@ -6735,8 +6836,9 @@ pub fn _mm_maskz_fnmsub_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> _
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vfnmsub))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm256_fnmsub_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_fnmsub_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h {
     unsafe { simd_fma(simd_neg(a), b, simd_neg(c)) }
 }
 
@@ -6748,8 +6850,9 @@ pub fn _mm256_fnmsub_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vfnmsub))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm256_mask_fnmsub_ph(a: __m256h, k: __mmask16, b: __m256h, c: __m256h) -> __m256h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_fnmsub_ph(a: __m256h, k: __mmask16, b: __m256h, c: __m256h) -> __m256h {
     unsafe { simd_select_bitmask(k, _mm256_fnmsub_ph(a, b, c), a) }
 }
 
@@ -6761,8 +6864,9 @@ pub fn _mm256_mask_fnmsub_ph(a: __m256h, k: __mmask16, b: __m256h, c: __m256h) -
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vfnmsub))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm256_mask3_fnmsub_ph(a: __m256h, b: __m256h, c: __m256h, k: __mmask16) -> __m256h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask3_fnmsub_ph(a: __m256h, b: __m256h, c: __m256h, k: __mmask16) -> __m256h {
     unsafe { simd_select_bitmask(k, _mm256_fnmsub_ph(a, b, c), c) }
 }
 
@@ -6774,8 +6878,9 @@ pub fn _mm256_mask3_fnmsub_ph(a: __m256h, b: __m256h, c: __m256h, k: __mmask16)
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vfnmsub))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm256_maskz_fnmsub_ph(k: __mmask16, a: __m256h, b: __m256h, c: __m256h) -> __m256h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_fnmsub_ph(k: __mmask16, a: __m256h, b: __m256h, c: __m256h) -> __m256h {
     unsafe { simd_select_bitmask(k, _mm256_fnmsub_ph(a, b, c), _mm256_setzero_ph()) }
 }
 
@@ -6786,8 +6891,9 @@ pub fn _mm256_maskz_fnmsub_ph(k: __mmask16, a: __m256h, b: __m256h, c: __m256h)
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfnmsub))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm512_fnmsub_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_fnmsub_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
     unsafe { simd_fma(simd_neg(a), b, simd_neg(c)) }
 }
 
@@ -6799,8 +6905,9 @@ pub fn _mm512_fnmsub_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfnmsub))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm512_mask_fnmsub_ph(a: __m512h, k: __mmask32, b: __m512h, c: __m512h) -> __m512h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_fnmsub_ph(a: __m512h, k: __mmask32, b: __m512h, c: __m512h) -> __m512h {
     unsafe { simd_select_bitmask(k, _mm512_fnmsub_ph(a, b, c), a) }
 }
 
@@ -6812,8 +6919,9 @@ pub fn _mm512_mask_fnmsub_ph(a: __m512h, k: __mmask32, b: __m512h, c: __m512h) -
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfnmsub))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm512_mask3_fnmsub_ph(a: __m512h, b: __m512h, c: __m512h, k: __mmask32) -> __m512h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask3_fnmsub_ph(a: __m512h, b: __m512h, c: __m512h, k: __mmask32) -> __m512h {
     unsafe { simd_select_bitmask(k, _mm512_fnmsub_ph(a, b, c), c) }
 }
 
@@ -6825,8 +6933,9 @@ pub fn _mm512_mask3_fnmsub_ph(a: __m512h, b: __m512h, c: __m512h, k: __mmask32)
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfnmsub))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm512_maskz_fnmsub_ph(k: __mmask32, a: __m512h, b: __m512h, c: __m512h) -> __m512h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_fnmsub_ph(k: __mmask32, a: __m512h, b: __m512h, c: __m512h) -> __m512h {
     unsafe { simd_select_bitmask(k, _mm512_fnmsub_ph(a, b, c), _mm512_setzero_ph()) }
 }
 
@@ -6846,7 +6955,7 @@ pub fn _mm512_maskz_fnmsub_ph(k: __mmask32, a: __m512h, b: __m512h, c: __m512h)
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_fnmsub_round_ph<const ROUNDING: i32>(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
     unsafe {
         static_assert_rounding!(ROUNDING);
@@ -6871,7 +6980,7 @@ pub fn _mm512_fnmsub_round_ph<const ROUNDING: i32>(a: __m512h, b: __m512h, c: __
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
 #[rustc_legacy_const_generics(4)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mask_fnmsub_round_ph<const ROUNDING: i32>(
     a: __m512h,
     k: __mmask32,
@@ -6901,7 +7010,7 @@ pub fn _mm512_mask_fnmsub_round_ph<const ROUNDING: i32>(
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
 #[rustc_legacy_const_generics(4)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mask3_fnmsub_round_ph<const ROUNDING: i32>(
     a: __m512h,
     b: __m512h,
@@ -6931,7 +7040,7 @@ pub fn _mm512_mask3_fnmsub_round_ph<const ROUNDING: i32>(
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
 #[rustc_legacy_const_generics(4)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_maskz_fnmsub_round_ph<const ROUNDING: i32>(
     k: __mmask32,
     a: __m512h,
@@ -6956,8 +7065,9 @@ pub fn _mm512_maskz_fnmsub_round_ph<const ROUNDING: i32>(
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfnmsub))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm_fnmsub_sh(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_fnmsub_sh(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
     unsafe {
         let extracta: f16 = simd_extract!(a, 0);
         let extractb: f16 = simd_extract!(b, 0);
@@ -6976,8 +7086,9 @@ pub fn _mm_fnmsub_sh(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfnmsub))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm_mask_fnmsub_sh(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_fnmsub_sh(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
     unsafe {
         let mut fnmsub: f16 = simd_extract!(a, 0);
         if k & 1 != 0 {
@@ -6998,8 +7109,9 @@ pub fn _mm_mask_fnmsub_sh(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfnmsub))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm_mask3_fnmsub_sh(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask3_fnmsub_sh(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
     unsafe {
         let mut fnmsub: f16 = simd_extract!(c, 0);
         if k & 1 != 0 {
@@ -7020,8 +7132,9 @@ pub fn _mm_mask3_fnmsub_sh(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> _
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfnmsub))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm_maskz_fnmsub_sh(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_fnmsub_sh(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
     unsafe {
         let mut fnmsub: f16 = 0.0;
         if k & 1 != 0 {
@@ -7051,7 +7164,7 @@ pub fn _mm_maskz_fnmsub_sh(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> _
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_fnmsub_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
     unsafe {
         static_assert_rounding!(ROUNDING);
@@ -7081,7 +7194,7 @@ pub fn _mm_fnmsub_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h, c: __m12
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
 #[rustc_legacy_const_generics(4)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask_fnmsub_round_sh<const ROUNDING: i32>(
     a: __m128h,
     k: __mmask8,
@@ -7118,7 +7231,7 @@ pub fn _mm_mask_fnmsub_round_sh<const ROUNDING: i32>(
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
 #[rustc_legacy_const_generics(4)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask3_fnmsub_round_sh<const ROUNDING: i32>(
     a: __m128h,
     b: __m128h,
@@ -7155,7 +7268,7 @@ pub fn _mm_mask3_fnmsub_round_sh<const ROUNDING: i32>(
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
 #[rustc_legacy_const_generics(4)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_maskz_fnmsub_round_sh<const ROUNDING: i32>(
     k: __mmask8,
     a: __m128h,
@@ -7182,8 +7295,9 @@ pub fn _mm_maskz_fnmsub_round_sh<const ROUNDING: i32>(
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vfmaddsub))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm_fmaddsub_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_fmaddsub_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
     unsafe {
         let add = simd_fma(a, b, c);
         let sub = simd_fma(a, b, simd_neg(c));
@@ -7199,8 +7313,9 @@ pub fn _mm_fmaddsub_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vfmaddsub))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm_mask_fmaddsub_ph(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_fmaddsub_ph(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
     unsafe { simd_select_bitmask(k, _mm_fmaddsub_ph(a, b, c), a) }
 }
 
@@ -7212,8 +7327,9 @@ pub fn _mm_mask_fmaddsub_ph(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) ->
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vfmaddsub))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm_mask3_fmaddsub_ph(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask3_fmaddsub_ph(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
     unsafe { simd_select_bitmask(k, _mm_fmaddsub_ph(a, b, c), c) }
 }
 
@@ -7225,8 +7341,9 @@ pub fn _mm_mask3_fmaddsub_ph(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) ->
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vfmaddsub))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm_maskz_fmaddsub_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_fmaddsub_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
     unsafe { simd_select_bitmask(k, _mm_fmaddsub_ph(a, b, c), _mm_setzero_ph()) }
 }
 
@@ -7237,8 +7354,9 @@ pub fn _mm_maskz_fmaddsub_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) ->
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vfmaddsub))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm256_fmaddsub_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_fmaddsub_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h {
     unsafe {
         let add = simd_fma(a, b, c);
         let sub = simd_fma(a, b, simd_neg(c));
@@ -7258,8 +7376,9 @@ pub fn _mm256_fmaddsub_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vfmaddsub))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm256_mask_fmaddsub_ph(a: __m256h, k: __mmask16, b: __m256h, c: __m256h) -> __m256h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_fmaddsub_ph(a: __m256h, k: __mmask16, b: __m256h, c: __m256h) -> __m256h {
     unsafe { simd_select_bitmask(k, _mm256_fmaddsub_ph(a, b, c), a) }
 }
 
@@ -7271,8 +7390,9 @@ pub fn _mm256_mask_fmaddsub_ph(a: __m256h, k: __mmask16, b: __m256h, c: __m256h)
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vfmaddsub))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm256_mask3_fmaddsub_ph(a: __m256h, b: __m256h, c: __m256h, k: __mmask16) -> __m256h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask3_fmaddsub_ph(a: __m256h, b: __m256h, c: __m256h, k: __mmask16) -> __m256h {
     unsafe { simd_select_bitmask(k, _mm256_fmaddsub_ph(a, b, c), c) }
 }
 
@@ -7284,8 +7404,9 @@ pub fn _mm256_mask3_fmaddsub_ph(a: __m256h, b: __m256h, c: __m256h, k: __mmask16
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vfmaddsub))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm256_maskz_fmaddsub_ph(k: __mmask16, a: __m256h, b: __m256h, c: __m256h) -> __m256h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_fmaddsub_ph(k: __mmask16, a: __m256h, b: __m256h, c: __m256h) -> __m256h {
     unsafe { simd_select_bitmask(k, _mm256_fmaddsub_ph(a, b, c), _mm256_setzero_ph()) }
 }
 
@@ -7296,8 +7417,9 @@ pub fn _mm256_maskz_fmaddsub_ph(k: __mmask16, a: __m256h, b: __m256h, c: __m256h
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfmaddsub))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm512_fmaddsub_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_fmaddsub_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
     unsafe {
         let add = simd_fma(a, b, c);
         let sub = simd_fma(a, b, simd_neg(c));
@@ -7320,8 +7442,9 @@ pub fn _mm512_fmaddsub_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfmaddsub))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm512_mask_fmaddsub_ph(a: __m512h, k: __mmask32, b: __m512h, c: __m512h) -> __m512h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_fmaddsub_ph(a: __m512h, k: __mmask32, b: __m512h, c: __m512h) -> __m512h {
     unsafe { simd_select_bitmask(k, _mm512_fmaddsub_ph(a, b, c), a) }
 }
 
@@ -7333,8 +7456,9 @@ pub fn _mm512_mask_fmaddsub_ph(a: __m512h, k: __mmask32, b: __m512h, c: __m512h)
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfmaddsub))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm512_mask3_fmaddsub_ph(a: __m512h, b: __m512h, c: __m512h, k: __mmask32) -> __m512h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask3_fmaddsub_ph(a: __m512h, b: __m512h, c: __m512h, k: __mmask32) -> __m512h {
     unsafe { simd_select_bitmask(k, _mm512_fmaddsub_ph(a, b, c), c) }
 }
 
@@ -7346,8 +7470,9 @@ pub fn _mm512_mask3_fmaddsub_ph(a: __m512h, b: __m512h, c: __m512h, k: __mmask32
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfmaddsub))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm512_maskz_fmaddsub_ph(k: __mmask32, a: __m512h, b: __m512h, c: __m512h) -> __m512h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_fmaddsub_ph(k: __mmask32, a: __m512h, b: __m512h, c: __m512h) -> __m512h {
     unsafe { simd_select_bitmask(k, _mm512_fmaddsub_ph(a, b, c), _mm512_setzero_ph()) }
 }
 
@@ -7367,7 +7492,7 @@ pub fn _mm512_maskz_fmaddsub_ph(k: __mmask32, a: __m512h, b: __m512h, c: __m512h
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_fmaddsub_round_ph<const ROUNDING: i32>(
     a: __m512h,
     b: __m512h,
@@ -7396,7 +7521,7 @@ pub fn _mm512_fmaddsub_round_ph<const ROUNDING: i32>(
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))]
 #[rustc_legacy_const_generics(4)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mask_fmaddsub_round_ph<const ROUNDING: i32>(
     a: __m512h,
     k: __mmask32,
@@ -7426,7 +7551,7 @@ pub fn _mm512_mask_fmaddsub_round_ph<const ROUNDING: i32>(
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))]
 #[rustc_legacy_const_generics(4)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mask3_fmaddsub_round_ph<const ROUNDING: i32>(
     a: __m512h,
     b: __m512h,
@@ -7456,7 +7581,7 @@ pub fn _mm512_mask3_fmaddsub_round_ph<const ROUNDING: i32>(
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))]
 #[rustc_legacy_const_generics(4)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_maskz_fmaddsub_round_ph<const ROUNDING: i32>(
     k: __mmask32,
     a: __m512h,
@@ -7480,8 +7605,9 @@ pub fn _mm512_maskz_fmaddsub_round_ph<const ROUNDING: i32>(
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vfmsubadd))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm_fmsubadd_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_fmsubadd_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
     _mm_fmaddsub_ph(a, b, unsafe { simd_neg(c) })
 }
 
@@ -7493,8 +7619,9 @@ pub fn _mm_fmsubadd_ph(a: __m128h, b: __m128h, c: __m128h) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vfmsubadd))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm_mask_fmsubadd_ph(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_fmsubadd_ph(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) -> __m128h {
     unsafe { simd_select_bitmask(k, _mm_fmsubadd_ph(a, b, c), a) }
 }
 
@@ -7506,8 +7633,9 @@ pub fn _mm_mask_fmsubadd_ph(a: __m128h, k: __mmask8, b: __m128h, c: __m128h) ->
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vfmsubadd))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm_mask3_fmsubadd_ph(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask3_fmsubadd_ph(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) -> __m128h {
     unsafe { simd_select_bitmask(k, _mm_fmsubadd_ph(a, b, c), c) }
 }
 
@@ -7519,8 +7647,9 @@ pub fn _mm_mask3_fmsubadd_ph(a: __m128h, b: __m128h, c: __m128h, k: __mmask8) ->
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vfmsubadd))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm_maskz_fmsubadd_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_fmsubadd_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) -> __m128h {
     unsafe { simd_select_bitmask(k, _mm_fmsubadd_ph(a, b, c), _mm_setzero_ph()) }
 }
 
@@ -7531,8 +7660,9 @@ pub fn _mm_maskz_fmsubadd_ph(k: __mmask8, a: __m128h, b: __m128h, c: __m128h) ->
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vfmsubadd))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm256_fmsubadd_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_fmsubadd_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h {
     _mm256_fmaddsub_ph(a, b, unsafe { simd_neg(c) })
 }
 
@@ -7544,8 +7674,9 @@ pub fn _mm256_fmsubadd_ph(a: __m256h, b: __m256h, c: __m256h) -> __m256h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vfmsubadd))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm256_mask_fmsubadd_ph(a: __m256h, k: __mmask16, b: __m256h, c: __m256h) -> __m256h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_fmsubadd_ph(a: __m256h, k: __mmask16, b: __m256h, c: __m256h) -> __m256h {
     unsafe { simd_select_bitmask(k, _mm256_fmsubadd_ph(a, b, c), a) }
 }
 
@@ -7557,8 +7688,9 @@ pub fn _mm256_mask_fmsubadd_ph(a: __m256h, k: __mmask16, b: __m256h, c: __m256h)
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vfmsubadd))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm256_mask3_fmsubadd_ph(a: __m256h, b: __m256h, c: __m256h, k: __mmask16) -> __m256h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask3_fmsubadd_ph(a: __m256h, b: __m256h, c: __m256h, k: __mmask16) -> __m256h {
     unsafe { simd_select_bitmask(k, _mm256_fmsubadd_ph(a, b, c), c) }
 }
 
@@ -7570,8 +7702,9 @@ pub fn _mm256_mask3_fmsubadd_ph(a: __m256h, b: __m256h, c: __m256h, k: __mmask16
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vfmsubadd))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm256_maskz_fmsubadd_ph(k: __mmask16, a: __m256h, b: __m256h, c: __m256h) -> __m256h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_fmsubadd_ph(k: __mmask16, a: __m256h, b: __m256h, c: __m256h) -> __m256h {
     unsafe { simd_select_bitmask(k, _mm256_fmsubadd_ph(a, b, c), _mm256_setzero_ph()) }
 }
 
@@ -7582,8 +7715,9 @@ pub fn _mm256_maskz_fmsubadd_ph(k: __mmask16, a: __m256h, b: __m256h, c: __m256h
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfmsubadd))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm512_fmsubadd_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_fmsubadd_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
     _mm512_fmaddsub_ph(a, b, unsafe { simd_neg(c) })
 }
 
@@ -7595,8 +7729,9 @@ pub fn _mm512_fmsubadd_ph(a: __m512h, b: __m512h, c: __m512h) -> __m512h {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfmsubadd))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm512_mask_fmsubadd_ph(a: __m512h, k: __mmask32, b: __m512h, c: __m512h) -> __m512h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_fmsubadd_ph(a: __m512h, k: __mmask32, b: __m512h, c: __m512h) -> __m512h {
     unsafe { simd_select_bitmask(k, _mm512_fmsubadd_ph(a, b, c), a) }
 }
 
@@ -7608,8 +7743,9 @@ pub fn _mm512_mask_fmsubadd_ph(a: __m512h, k: __mmask32, b: __m512h, c: __m512h)
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfmsubadd))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm512_mask3_fmsubadd_ph(a: __m512h, b: __m512h, c: __m512h, k: __mmask32) -> __m512h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask3_fmsubadd_ph(a: __m512h, b: __m512h, c: __m512h, k: __mmask32) -> __m512h {
     unsafe { simd_select_bitmask(k, _mm512_fmsubadd_ph(a, b, c), c) }
 }
 
@@ -7621,8 +7757,9 @@ pub fn _mm512_mask3_fmsubadd_ph(a: __m512h, b: __m512h, c: __m512h, k: __mmask32
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfmsubadd))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm512_maskz_fmsubadd_ph(k: __mmask32, a: __m512h, b: __m512h, c: __m512h) -> __m512h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_fmsubadd_ph(k: __mmask32, a: __m512h, b: __m512h, c: __m512h) -> __m512h {
     unsafe { simd_select_bitmask(k, _mm512_fmsubadd_ph(a, b, c), _mm512_setzero_ph()) }
 }
 
@@ -7642,7 +7779,7 @@ pub fn _mm512_maskz_fmsubadd_ph(k: __mmask32, a: __m512h, b: __m512h, c: __m512h
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_fmsubadd_round_ph<const ROUNDING: i32>(
     a: __m512h,
     b: __m512h,
@@ -7671,7 +7808,7 @@ pub fn _mm512_fmsubadd_round_ph<const ROUNDING: i32>(
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))]
 #[rustc_legacy_const_generics(4)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mask_fmsubadd_round_ph<const ROUNDING: i32>(
     a: __m512h,
     k: __mmask32,
@@ -7701,7 +7838,7 @@ pub fn _mm512_mask_fmsubadd_round_ph<const ROUNDING: i32>(
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))]
 #[rustc_legacy_const_generics(4)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mask3_fmsubadd_round_ph<const ROUNDING: i32>(
     a: __m512h,
     b: __m512h,
@@ -7731,7 +7868,7 @@ pub fn _mm512_mask3_fmsubadd_round_ph<const ROUNDING: i32>(
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))]
 #[rustc_legacy_const_generics(4)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_maskz_fmsubadd_round_ph<const ROUNDING: i32>(
     k: __mmask32,
     a: __m512h,
@@ -7755,7 +7892,7 @@ pub fn _mm512_maskz_fmsubadd_round_ph<const ROUNDING: i32>(
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vrcpph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_rcp_ph(a: __m128h) -> __m128h {
     _mm_mask_rcp_ph(_mm_undefined_ph(), 0xff, a)
 }
@@ -7768,7 +7905,7 @@ pub fn _mm_rcp_ph(a: __m128h) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vrcpph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask_rcp_ph(src: __m128h, k: __mmask8, a: __m128h) -> __m128h {
     unsafe { vrcpph_128(a, src, k) }
 }
@@ -7781,7 +7918,7 @@ pub fn _mm_mask_rcp_ph(src: __m128h, k: __mmask8, a: __m128h) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vrcpph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_maskz_rcp_ph(k: __mmask8, a: __m128h) -> __m128h {
     _mm_mask_rcp_ph(_mm_setzero_ph(), k, a)
 }
@@ -7793,7 +7930,7 @@ pub fn _mm_maskz_rcp_ph(k: __mmask8, a: __m128h) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vrcpph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_rcp_ph(a: __m256h) -> __m256h {
     _mm256_mask_rcp_ph(_mm256_undefined_ph(), 0xffff, a)
 }
@@ -7806,7 +7943,7 @@ pub fn _mm256_rcp_ph(a: __m256h) -> __m256h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vrcpph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_mask_rcp_ph(src: __m256h, k: __mmask16, a: __m256h) -> __m256h {
     unsafe { vrcpph_256(a, src, k) }
 }
@@ -7819,7 +7956,7 @@ pub fn _mm256_mask_rcp_ph(src: __m256h, k: __mmask16, a: __m256h) -> __m256h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vrcpph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_maskz_rcp_ph(k: __mmask16, a: __m256h) -> __m256h {
     _mm256_mask_rcp_ph(_mm256_setzero_ph(), k, a)
 }
@@ -7831,7 +7968,7 @@ pub fn _mm256_maskz_rcp_ph(k: __mmask16, a: __m256h) -> __m256h {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vrcpph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_rcp_ph(a: __m512h) -> __m512h {
     _mm512_mask_rcp_ph(_mm512_undefined_ph(), 0xffffffff, a)
 }
@@ -7844,7 +7981,7 @@ pub fn _mm512_rcp_ph(a: __m512h) -> __m512h {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vrcpph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mask_rcp_ph(src: __m512h, k: __mmask32, a: __m512h) -> __m512h {
     unsafe { vrcpph_512(a, src, k) }
 }
@@ -7857,7 +7994,7 @@ pub fn _mm512_mask_rcp_ph(src: __m512h, k: __mmask32, a: __m512h) -> __m512h {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vrcpph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_maskz_rcp_ph(k: __mmask32, a: __m512h) -> __m512h {
     _mm512_mask_rcp_ph(_mm512_setzero_ph(), k, a)
 }
@@ -7871,7 +8008,7 @@ pub fn _mm512_maskz_rcp_ph(k: __mmask32, a: __m512h) -> __m512h {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vrcpsh))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_rcp_sh(a: __m128h, b: __m128h) -> __m128h {
     _mm_mask_rcp_sh(f16x8::ZERO.as_m128h(), 0xff, a, b)
 }
@@ -7885,7 +8022,7 @@ pub fn _mm_rcp_sh(a: __m128h, b: __m128h) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vrcpsh))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask_rcp_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
     unsafe { vrcpsh(a, b, src, k) }
 }
@@ -7899,7 +8036,7 @@ pub fn _mm_mask_rcp_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vrcpsh))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_maskz_rcp_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
     _mm_mask_rcp_sh(f16x8::ZERO.as_m128h(), k, a, b)
 }
@@ -7912,7 +8049,7 @@ pub fn _mm_maskz_rcp_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vrsqrtph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_rsqrt_ph(a: __m128h) -> __m128h {
     _mm_mask_rsqrt_ph(_mm_undefined_ph(), 0xff, a)
 }
@@ -7926,7 +8063,7 @@ pub fn _mm_rsqrt_ph(a: __m128h) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vrsqrtph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask_rsqrt_ph(src: __m128h, k: __mmask8, a: __m128h) -> __m128h {
     unsafe { vrsqrtph_128(a, src, k) }
 }
@@ -7940,7 +8077,7 @@ pub fn _mm_mask_rsqrt_ph(src: __m128h, k: __mmask8, a: __m128h) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vrsqrtph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_maskz_rsqrt_ph(k: __mmask8, a: __m128h) -> __m128h {
     _mm_mask_rsqrt_ph(_mm_setzero_ph(), k, a)
 }
@@ -7953,7 +8090,7 @@ pub fn _mm_maskz_rsqrt_ph(k: __mmask8, a: __m128h) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vrsqrtph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_rsqrt_ph(a: __m256h) -> __m256h {
     _mm256_mask_rsqrt_ph(_mm256_undefined_ph(), 0xffff, a)
 }
@@ -7967,7 +8104,7 @@ pub fn _mm256_rsqrt_ph(a: __m256h) -> __m256h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vrsqrtph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_mask_rsqrt_ph(src: __m256h, k: __mmask16, a: __m256h) -> __m256h {
     unsafe { vrsqrtph_256(a, src, k) }
 }
@@ -7981,7 +8118,7 @@ pub fn _mm256_mask_rsqrt_ph(src: __m256h, k: __mmask16, a: __m256h) -> __m256h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vrsqrtph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_maskz_rsqrt_ph(k: __mmask16, a: __m256h) -> __m256h {
     _mm256_mask_rsqrt_ph(_mm256_setzero_ph(), k, a)
 }
@@ -7994,7 +8131,7 @@ pub fn _mm256_maskz_rsqrt_ph(k: __mmask16, a: __m256h) -> __m256h {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vrsqrtph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_rsqrt_ph(a: __m512h) -> __m512h {
     _mm512_mask_rsqrt_ph(_mm512_undefined_ph(), 0xffffffff, a)
 }
@@ -8008,7 +8145,7 @@ pub fn _mm512_rsqrt_ph(a: __m512h) -> __m512h {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vrsqrtph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mask_rsqrt_ph(src: __m512h, k: __mmask32, a: __m512h) -> __m512h {
     unsafe { vrsqrtph_512(a, src, k) }
 }
@@ -8022,7 +8159,7 @@ pub fn _mm512_mask_rsqrt_ph(src: __m512h, k: __mmask32, a: __m512h) -> __m512h {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vrsqrtph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_maskz_rsqrt_ph(k: __mmask32, a: __m512h) -> __m512h {
     _mm512_mask_rsqrt_ph(_mm512_setzero_ph(), k, a)
 }
@@ -8036,7 +8173,7 @@ pub fn _mm512_maskz_rsqrt_ph(k: __mmask32, a: __m512h) -> __m512h {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vrsqrtsh))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_rsqrt_sh(a: __m128h, b: __m128h) -> __m128h {
     _mm_mask_rsqrt_sh(f16x8::ZERO.as_m128h(), 0xff, a, b)
 }
@@ -8050,7 +8187,7 @@ pub fn _mm_rsqrt_sh(a: __m128h, b: __m128h) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vrsqrtsh))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask_rsqrt_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
     unsafe { vrsqrtsh(a, b, src, k) }
 }
@@ -8064,7 +8201,7 @@ pub fn _mm_mask_rsqrt_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> _
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vrsqrtsh))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_maskz_rsqrt_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
     _mm_mask_rsqrt_sh(f16x8::ZERO.as_m128h(), k, a, b)
 }
@@ -8076,7 +8213,7 @@ pub fn _mm_maskz_rsqrt_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vsqrtph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_sqrt_ph(a: __m128h) -> __m128h {
     unsafe { simd_fsqrt(a) }
 }
@@ -8088,7 +8225,7 @@ pub fn _mm_sqrt_ph(a: __m128h) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vsqrtph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask_sqrt_ph(src: __m128h, k: __mmask8, a: __m128h) -> __m128h {
     unsafe { simd_select_bitmask(k, _mm_sqrt_ph(a), src) }
 }
@@ -8100,7 +8237,7 @@ pub fn _mm_mask_sqrt_ph(src: __m128h, k: __mmask8, a: __m128h) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vsqrtph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_maskz_sqrt_ph(k: __mmask8, a: __m128h) -> __m128h {
     unsafe { simd_select_bitmask(k, _mm_sqrt_ph(a), _mm_setzero_ph()) }
 }
@@ -8112,7 +8249,7 @@ pub fn _mm_maskz_sqrt_ph(k: __mmask8, a: __m128h) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vsqrtph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_sqrt_ph(a: __m256h) -> __m256h {
     unsafe { simd_fsqrt(a) }
 }
@@ -8124,7 +8261,7 @@ pub fn _mm256_sqrt_ph(a: __m256h) -> __m256h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vsqrtph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_mask_sqrt_ph(src: __m256h, k: __mmask16, a: __m256h) -> __m256h {
     unsafe { simd_select_bitmask(k, _mm256_sqrt_ph(a), src) }
 }
@@ -8136,7 +8273,7 @@ pub fn _mm256_mask_sqrt_ph(src: __m256h, k: __mmask16, a: __m256h) -> __m256h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vsqrtph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_maskz_sqrt_ph(k: __mmask16, a: __m256h) -> __m256h {
     unsafe { simd_select_bitmask(k, _mm256_sqrt_ph(a), _mm256_setzero_ph()) }
 }
@@ -8148,7 +8285,7 @@ pub fn _mm256_maskz_sqrt_ph(k: __mmask16, a: __m256h) -> __m256h {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vsqrtph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_sqrt_ph(a: __m512h) -> __m512h {
     unsafe { simd_fsqrt(a) }
 }
@@ -8160,7 +8297,7 @@ pub fn _mm512_sqrt_ph(a: __m512h) -> __m512h {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vsqrtph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mask_sqrt_ph(src: __m512h, k: __mmask32, a: __m512h) -> __m512h {
     unsafe { simd_select_bitmask(k, _mm512_sqrt_ph(a), src) }
 }
@@ -8172,7 +8309,7 @@ pub fn _mm512_mask_sqrt_ph(src: __m512h, k: __mmask32, a: __m512h) -> __m512h {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vsqrtph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_maskz_sqrt_ph(k: __mmask32, a: __m512h) -> __m512h {
     unsafe { simd_select_bitmask(k, _mm512_sqrt_ph(a), _mm512_setzero_ph()) }
 }
@@ -8192,7 +8329,7 @@ pub fn _mm512_maskz_sqrt_ph(k: __mmask32, a: __m512h) -> __m512h {
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vsqrtph, ROUNDING = 8))]
 #[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_sqrt_round_ph<const ROUNDING: i32>(a: __m512h) -> __m512h {
     unsafe {
         static_assert_rounding!(ROUNDING);
@@ -8215,7 +8352,7 @@ pub fn _mm512_sqrt_round_ph<const ROUNDING: i32>(a: __m512h) -> __m512h {
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vsqrtph, ROUNDING = 8))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mask_sqrt_round_ph<const ROUNDING: i32>(
     src: __m512h,
     k: __mmask32,
@@ -8242,7 +8379,7 @@ pub fn _mm512_mask_sqrt_round_ph<const ROUNDING: i32>(
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vsqrtph, ROUNDING = 8))]
 #[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_maskz_sqrt_round_ph<const ROUNDING: i32>(k: __mmask32, a: __m512h) -> __m512h {
     unsafe {
         static_assert_rounding!(ROUNDING);
@@ -8258,7 +8395,7 @@ pub fn _mm512_maskz_sqrt_round_ph<const ROUNDING: i32>(k: __mmask32, a: __m512h)
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vsqrtsh))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_sqrt_sh(a: __m128h, b: __m128h) -> __m128h {
     _mm_mask_sqrt_sh(f16x8::ZERO.as_m128h(), 0xff, a, b)
 }
@@ -8271,7 +8408,7 @@ pub fn _mm_sqrt_sh(a: __m128h, b: __m128h) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vsqrtsh))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask_sqrt_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
     _mm_mask_sqrt_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
 }
@@ -8284,7 +8421,7 @@ pub fn _mm_mask_sqrt_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vsqrtsh))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_maskz_sqrt_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
     _mm_mask_sqrt_sh(f16x8::ZERO.as_m128h(), k, a, b)
 }
@@ -8305,7 +8442,7 @@ pub fn _mm_maskz_sqrt_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vsqrtsh, ROUNDING = 8))]
 #[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_sqrt_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128h {
     static_assert_rounding!(ROUNDING);
     _mm_mask_sqrt_round_sh::<ROUNDING>(f16x8::ZERO.as_m128h(), 0xff, a, b)
@@ -8327,7 +8464,7 @@ pub fn _mm_sqrt_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128h
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vsqrtsh, ROUNDING = 8))]
 #[rustc_legacy_const_generics(4)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask_sqrt_round_sh<const ROUNDING: i32>(
     src: __m128h,
     k: __mmask8,
@@ -8356,7 +8493,7 @@ pub fn _mm_mask_sqrt_round_sh<const ROUNDING: i32>(
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vsqrtsh, ROUNDING = 8))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_maskz_sqrt_round_sh<const ROUNDING: i32>(
     k: __mmask8,
     a: __m128h,
@@ -8374,7 +8511,7 @@ pub fn _mm_maskz_sqrt_round_sh<const ROUNDING: i32>(
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vmaxph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_max_ph(a: __m128h, b: __m128h) -> __m128h {
     unsafe { vmaxph_128(a, b) }
 }
@@ -8388,7 +8525,7 @@ pub fn _mm_max_ph(a: __m128h, b: __m128h) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vmaxph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask_max_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
     unsafe { simd_select_bitmask(k, _mm_max_ph(a, b), src) }
 }
@@ -8402,7 +8539,7 @@ pub fn _mm_mask_max_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vmaxph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_maskz_max_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
     unsafe { simd_select_bitmask(k, _mm_max_ph(a, b), _mm_setzero_ph()) }
 }
@@ -8415,7 +8552,7 @@ pub fn _mm_maskz_max_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vmaxph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_max_ph(a: __m256h, b: __m256h) -> __m256h {
     unsafe { vmaxph_256(a, b) }
 }
@@ -8429,7 +8566,7 @@ pub fn _mm256_max_ph(a: __m256h, b: __m256h) -> __m256h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vmaxph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_mask_max_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
     unsafe { simd_select_bitmask(k, _mm256_max_ph(a, b), src) }
 }
@@ -8443,7 +8580,7 @@ pub fn _mm256_mask_max_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) ->
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vmaxph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_maskz_max_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
     unsafe { simd_select_bitmask(k, _mm256_max_ph(a, b), _mm256_setzero_ph()) }
 }
@@ -8456,7 +8593,7 @@ pub fn _mm256_maskz_max_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vmaxph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_max_ph(a: __m512h, b: __m512h) -> __m512h {
     _mm512_max_round_ph::<_MM_FROUND_CUR_DIRECTION>(a, b)
 }
@@ -8470,7 +8607,7 @@ pub fn _mm512_max_ph(a: __m512h, b: __m512h) -> __m512h {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vmaxph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mask_max_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
     unsafe { simd_select_bitmask(k, _mm512_max_ph(a, b), src) }
 }
@@ -8484,7 +8621,7 @@ pub fn _mm512_mask_max_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) ->
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vmaxph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_maskz_max_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
     unsafe { simd_select_bitmask(k, _mm512_max_ph(a, b), _mm512_setzero_ph()) }
 }
@@ -8499,7 +8636,7 @@ pub fn _mm512_maskz_max_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vmaxph, SAE = 8))]
 #[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_max_round_ph<const SAE: i32>(a: __m512h, b: __m512h) -> __m512h {
     unsafe {
         static_assert_sae!(SAE);
@@ -8517,7 +8654,7 @@ pub fn _mm512_max_round_ph<const SAE: i32>(a: __m512h, b: __m512h) -> __m512h {
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vmaxph, SAE = 8))]
 #[rustc_legacy_const_generics(4)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mask_max_round_ph<const SAE: i32>(
     src: __m512h,
     k: __mmask32,
@@ -8540,7 +8677,7 @@ pub fn _mm512_mask_max_round_ph<const SAE: i32>(
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vmaxph, SAE = 8))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_maskz_max_round_ph<const SAE: i32>(k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
     unsafe {
         static_assert_sae!(SAE);
@@ -8557,7 +8694,7 @@ pub fn _mm512_maskz_max_round_ph<const SAE: i32>(k: __mmask32, a: __m512h, b: __
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vmaxsh))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_max_sh(a: __m128h, b: __m128h) -> __m128h {
     _mm_mask_max_sh(_mm_undefined_ph(), 0xff, a, b)
 }
@@ -8571,7 +8708,7 @@ pub fn _mm_max_sh(a: __m128h, b: __m128h) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vmaxsh))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask_max_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
     _mm_mask_max_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
 }
@@ -8585,7 +8722,7 @@ pub fn _mm_mask_max_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vmaxsh))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_maskz_max_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
     _mm_mask_max_sh(f16x8::ZERO.as_m128h(), k, a, b)
 }
@@ -8600,7 +8737,7 @@ pub fn _mm_maskz_max_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vmaxsh, SAE = 8))]
 #[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_max_round_sh<const SAE: i32>(a: __m128h, b: __m128h) -> __m128h {
     static_assert_sae!(SAE);
     _mm_mask_max_round_sh::<SAE>(_mm_undefined_ph(), 0xff, a, b)
@@ -8617,7 +8754,7 @@ pub fn _mm_max_round_sh<const SAE: i32>(a: __m128h, b: __m128h) -> __m128h {
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vmaxsh, SAE = 8))]
 #[rustc_legacy_const_generics(4)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask_max_round_sh<const SAE: i32>(
     src: __m128h,
     k: __mmask8,
@@ -8641,7 +8778,7 @@ pub fn _mm_mask_max_round_sh<const SAE: i32>(
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vmaxsh, SAE = 8))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_maskz_max_round_sh<const SAE: i32>(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
     static_assert_sae!(SAE);
     _mm_mask_max_round_sh::<SAE>(f16x8::ZERO.as_m128h(), k, a, b)
@@ -8655,7 +8792,7 @@ pub fn _mm_maskz_max_round_sh<const SAE: i32>(k: __mmask8, a: __m128h, b: __m128
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vminph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_min_ph(a: __m128h, b: __m128h) -> __m128h {
     unsafe { vminph_128(a, b) }
 }
@@ -8669,7 +8806,7 @@ pub fn _mm_min_ph(a: __m128h, b: __m128h) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vminph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask_min_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
     unsafe { simd_select_bitmask(k, _mm_min_ph(a, b), src) }
 }
@@ -8683,7 +8820,7 @@ pub fn _mm_mask_min_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vminph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_maskz_min_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
     unsafe { simd_select_bitmask(k, _mm_min_ph(a, b), _mm_setzero_ph()) }
 }
@@ -8696,7 +8833,7 @@ pub fn _mm_maskz_min_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vminph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_min_ph(a: __m256h, b: __m256h) -> __m256h {
     unsafe { vminph_256(a, b) }
 }
@@ -8710,7 +8847,7 @@ pub fn _mm256_min_ph(a: __m256h, b: __m256h) -> __m256h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vminph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_mask_min_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
     unsafe { simd_select_bitmask(k, _mm256_min_ph(a, b), src) }
 }
@@ -8724,7 +8861,7 @@ pub fn _mm256_mask_min_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) ->
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vminph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_maskz_min_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
     unsafe { simd_select_bitmask(k, _mm256_min_ph(a, b), _mm256_setzero_ph()) }
 }
@@ -8737,7 +8874,7 @@ pub fn _mm256_maskz_min_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vminph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_min_ph(a: __m512h, b: __m512h) -> __m512h {
     _mm512_min_round_ph::<_MM_FROUND_CUR_DIRECTION>(a, b)
 }
@@ -8751,7 +8888,7 @@ pub fn _mm512_min_ph(a: __m512h, b: __m512h) -> __m512h {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vminph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mask_min_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
     unsafe { simd_select_bitmask(k, _mm512_min_ph(a, b), src) }
 }
@@ -8765,7 +8902,7 @@ pub fn _mm512_mask_min_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) ->
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vminph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_maskz_min_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
     unsafe { simd_select_bitmask(k, _mm512_min_ph(a, b), _mm512_setzero_ph()) }
 }
@@ -8779,7 +8916,7 @@ pub fn _mm512_maskz_min_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vminph, SAE = 8))]
 #[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_min_round_ph<const SAE: i32>(a: __m512h, b: __m512h) -> __m512h {
     unsafe {
         static_assert_sae!(SAE);
@@ -8797,7 +8934,7 @@ pub fn _mm512_min_round_ph<const SAE: i32>(a: __m512h, b: __m512h) -> __m512h {
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vminph, SAE = 8))]
 #[rustc_legacy_const_generics(4)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mask_min_round_ph<const SAE: i32>(
     src: __m512h,
     k: __mmask32,
@@ -8820,7 +8957,7 @@ pub fn _mm512_mask_min_round_ph<const SAE: i32>(
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vminph, SAE = 8))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_maskz_min_round_ph<const SAE: i32>(k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
     unsafe {
         static_assert_sae!(SAE);
@@ -8837,7 +8974,7 @@ pub fn _mm512_maskz_min_round_ph<const SAE: i32>(k: __mmask32, a: __m512h, b: __
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vminsh))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_min_sh(a: __m128h, b: __m128h) -> __m128h {
     _mm_mask_min_sh(_mm_undefined_ph(), 0xff, a, b)
 }
@@ -8851,7 +8988,7 @@ pub fn _mm_min_sh(a: __m128h, b: __m128h) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vminsh))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask_min_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
     _mm_mask_min_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
 }
@@ -8865,7 +9002,7 @@ pub fn _mm_mask_min_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vminsh))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_maskz_min_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
     _mm_mask_min_sh(f16x8::ZERO.as_m128h(), k, a, b)
 }
@@ -8880,7 +9017,7 @@ pub fn _mm_maskz_min_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vminsh, SAE = 8))]
 #[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_min_round_sh<const SAE: i32>(a: __m128h, b: __m128h) -> __m128h {
     static_assert_sae!(SAE);
     _mm_mask_min_round_sh::<SAE>(_mm_undefined_ph(), 0xff, a, b)
@@ -8897,7 +9034,7 @@ pub fn _mm_min_round_sh<const SAE: i32>(a: __m128h, b: __m128h) -> __m128h {
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vminsh, SAE = 8))]
 #[rustc_legacy_const_generics(4)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask_min_round_sh<const SAE: i32>(
     src: __m128h,
     k: __mmask8,
@@ -8921,7 +9058,7 @@ pub fn _mm_mask_min_round_sh<const SAE: i32>(
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vminsh, SAE = 8))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_maskz_min_round_sh<const SAE: i32>(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
     static_assert_sae!(SAE);
     _mm_mask_min_round_sh::<SAE>(f16x8::ZERO.as_m128h(), k, a, b)
@@ -8935,7 +9072,7 @@ pub fn _mm_maskz_min_round_sh<const SAE: i32>(k: __mmask8, a: __m128h, b: __m128
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vgetexpph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_getexp_ph(a: __m128h) -> __m128h {
     _mm_mask_getexp_ph(_mm_undefined_ph(), 0xff, a)
 }
@@ -8949,7 +9086,7 @@ pub fn _mm_getexp_ph(a: __m128h) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vgetexpph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask_getexp_ph(src: __m128h, k: __mmask8, a: __m128h) -> __m128h {
     unsafe { vgetexpph_128(a, src, k) }
 }
@@ -8963,7 +9100,7 @@ pub fn _mm_mask_getexp_ph(src: __m128h, k: __mmask8, a: __m128h) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vgetexpph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_maskz_getexp_ph(k: __mmask8, a: __m128h) -> __m128h {
     _mm_mask_getexp_ph(_mm_setzero_ph(), k, a)
 }
@@ -8976,7 +9113,7 @@ pub fn _mm_maskz_getexp_ph(k: __mmask8, a: __m128h) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vgetexpph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_getexp_ph(a: __m256h) -> __m256h {
     _mm256_mask_getexp_ph(_mm256_undefined_ph(), 0xffff, a)
 }
@@ -8990,7 +9127,7 @@ pub fn _mm256_getexp_ph(a: __m256h) -> __m256h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vgetexpph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_mask_getexp_ph(src: __m256h, k: __mmask16, a: __m256h) -> __m256h {
     unsafe { vgetexpph_256(a, src, k) }
 }
@@ -9004,7 +9141,7 @@ pub fn _mm256_mask_getexp_ph(src: __m256h, k: __mmask16, a: __m256h) -> __m256h
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vgetexpph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_maskz_getexp_ph(k: __mmask16, a: __m256h) -> __m256h {
     _mm256_mask_getexp_ph(_mm256_setzero_ph(), k, a)
 }
@@ -9017,7 +9154,7 @@ pub fn _mm256_maskz_getexp_ph(k: __mmask16, a: __m256h) -> __m256h {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vgetexpph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_getexp_ph(a: __m512h) -> __m512h {
     _mm512_mask_getexp_ph(_mm512_undefined_ph(), 0xffffffff, a)
 }
@@ -9031,7 +9168,7 @@ pub fn _mm512_getexp_ph(a: __m512h) -> __m512h {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vgetexpph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mask_getexp_ph(src: __m512h, k: __mmask32, a: __m512h) -> __m512h {
     _mm512_mask_getexp_round_ph::<_MM_FROUND_CUR_DIRECTION>(src, k, a)
 }
@@ -9045,7 +9182,7 @@ pub fn _mm512_mask_getexp_ph(src: __m512h, k: __mmask32, a: __m512h) -> __m512h
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vgetexpph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_maskz_getexp_ph(k: __mmask32, a: __m512h) -> __m512h {
     _mm512_mask_getexp_ph(_mm512_setzero_ph(), k, a)
 }
@@ -9060,7 +9197,7 @@ pub fn _mm512_maskz_getexp_ph(k: __mmask32, a: __m512h) -> __m512h {
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vgetexpph, SAE = 8))]
 #[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_getexp_round_ph<const SAE: i32>(a: __m512h) -> __m512h {
     static_assert_sae!(SAE);
     _mm512_mask_getexp_round_ph::<SAE>(_mm512_undefined_ph(), 0xffffffff, a)
@@ -9076,7 +9213,7 @@ pub fn _mm512_getexp_round_ph<const SAE: i32>(a: __m512h) -> __m512h {
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vgetexpph, SAE = 8))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mask_getexp_round_ph<const SAE: i32>(
     src: __m512h,
     k: __mmask32,
@@ -9098,7 +9235,7 @@ pub fn _mm512_mask_getexp_round_ph<const SAE: i32>(
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vgetexpph, SAE = 8))]
 #[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_maskz_getexp_round_ph<const SAE: i32>(k: __mmask32, a: __m512h) -> __m512h {
     static_assert_sae!(SAE);
     _mm512_mask_getexp_round_ph::<SAE>(_mm512_setzero_ph(), k, a)
@@ -9113,7 +9250,7 @@ pub fn _mm512_maskz_getexp_round_ph<const SAE: i32>(k: __mmask32, a: __m512h) ->
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vgetexpsh))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_getexp_sh(a: __m128h, b: __m128h) -> __m128h {
     _mm_mask_getexp_sh(f16x8::ZERO.as_m128h(), 0xff, a, b)
 }
@@ -9128,7 +9265,7 @@ pub fn _mm_getexp_sh(a: __m128h, b: __m128h) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vgetexpsh))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask_getexp_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
     _mm_mask_getexp_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
 }
@@ -9143,7 +9280,7 @@ pub fn _mm_mask_getexp_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) ->
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vgetexpsh))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_maskz_getexp_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
     _mm_mask_getexp_sh(f16x8::ZERO.as_m128h(), k, a, b)
 }
@@ -9159,7 +9296,7 @@ pub fn _mm_maskz_getexp_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vgetexpsh, SAE = 8))]
 #[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_getexp_round_sh<const SAE: i32>(a: __m128h, b: __m128h) -> __m128h {
     static_assert_sae!(SAE);
     _mm_mask_getexp_round_sh::<SAE>(f16x8::ZERO.as_m128h(), 0xff, a, b)
@@ -9176,7 +9313,7 @@ pub fn _mm_getexp_round_sh<const SAE: i32>(a: __m128h, b: __m128h) -> __m128h {
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vgetexpsh, SAE = 8))]
 #[rustc_legacy_const_generics(4)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask_getexp_round_sh<const SAE: i32>(
     src: __m128h,
     k: __mmask8,
@@ -9200,7 +9337,7 @@ pub fn _mm_mask_getexp_round_sh<const SAE: i32>(
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vgetexpsh, SAE = 8))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_maskz_getexp_round_sh<const SAE: i32>(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
     static_assert_sae!(SAE);
     _mm_mask_getexp_round_sh::<SAE>(f16x8::ZERO.as_m128h(), k, a, b)
@@ -9228,7 +9365,7 @@ pub fn _mm_maskz_getexp_round_sh<const SAE: i32>(k: __mmask8, a: __m128h, b: __m
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))]
 #[rustc_legacy_const_generics(1, 2)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_getmant_ph<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
     a: __m128h,
 ) -> __m128h {
@@ -9260,7 +9397,7 @@ pub fn _mm_getmant_ph<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTIS
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))]
 #[rustc_legacy_const_generics(3, 4)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask_getmant_ph<
     const NORM: _MM_MANTISSA_NORM_ENUM,
     const SIGN: _MM_MANTISSA_SIGN_ENUM,
@@ -9299,7 +9436,7 @@ pub fn _mm_mask_getmant_ph<
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))]
 #[rustc_legacy_const_generics(2, 3)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_maskz_getmant_ph<
     const NORM: _MM_MANTISSA_NORM_ENUM,
     const SIGN: _MM_MANTISSA_SIGN_ENUM,
@@ -9334,7 +9471,7 @@ pub fn _mm_maskz_getmant_ph<
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))]
 #[rustc_legacy_const_generics(1, 2)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_getmant_ph<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
     a: __m256h,
 ) -> __m256h {
@@ -9366,7 +9503,7 @@ pub fn _mm256_getmant_ph<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MAN
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))]
 #[rustc_legacy_const_generics(3, 4)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_mask_getmant_ph<
     const NORM: _MM_MANTISSA_NORM_ENUM,
     const SIGN: _MM_MANTISSA_SIGN_ENUM,
@@ -9405,7 +9542,7 @@ pub fn _mm256_mask_getmant_ph<
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))]
 #[rustc_legacy_const_generics(2, 3)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_maskz_getmant_ph<
     const NORM: _MM_MANTISSA_NORM_ENUM,
     const SIGN: _MM_MANTISSA_SIGN_ENUM,
@@ -9440,7 +9577,7 @@ pub fn _mm256_maskz_getmant_ph<
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))]
 #[rustc_legacy_const_generics(1, 2)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_getmant_ph<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
     a: __m512h,
 ) -> __m512h {
@@ -9472,7 +9609,7 @@ pub fn _mm512_getmant_ph<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MAN
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))]
 #[rustc_legacy_const_generics(3, 4)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mask_getmant_ph<
     const NORM: _MM_MANTISSA_NORM_ENUM,
     const SIGN: _MM_MANTISSA_SIGN_ENUM,
@@ -9509,7 +9646,7 @@ pub fn _mm512_mask_getmant_ph<
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0))]
 #[rustc_legacy_const_generics(2, 3)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_maskz_getmant_ph<
     const NORM: _MM_MANTISSA_NORM_ENUM,
     const SIGN: _MM_MANTISSA_SIGN_ENUM,
@@ -9547,7 +9684,7 @@ pub fn _mm512_maskz_getmant_ph<
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0, SAE = 8))]
 #[rustc_legacy_const_generics(1, 2, 3)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_getmant_round_ph<
     const NORM: _MM_MANTISSA_NORM_ENUM,
     const SIGN: _MM_MANTISSA_SIGN_ENUM,
@@ -9587,7 +9724,7 @@ pub fn _mm512_getmant_round_ph<
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0, SAE = 8))]
 #[rustc_legacy_const_generics(3, 4, 5)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mask_getmant_round_ph<
     const NORM: _MM_MANTISSA_NORM_ENUM,
     const SIGN: _MM_MANTISSA_SIGN_ENUM,
@@ -9631,7 +9768,7 @@ pub fn _mm512_mask_getmant_round_ph<
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vgetmantph, NORM = 0, SIGN = 0, SAE = 8))]
 #[rustc_legacy_const_generics(2, 3, 4)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_maskz_getmant_round_ph<
     const NORM: _MM_MANTISSA_NORM_ENUM,
     const SIGN: _MM_MANTISSA_SIGN_ENUM,
@@ -9669,7 +9806,7 @@ pub fn _mm512_maskz_getmant_round_ph<
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vgetmantsh, NORM = 0, SIGN = 0))]
 #[rustc_legacy_const_generics(2, 3)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_getmant_sh<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
     a: __m128h,
     b: __m128h,
@@ -9703,7 +9840,7 @@ pub fn _mm_getmant_sh<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTIS
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vgetmantsh, NORM = 0, SIGN = 0))]
 #[rustc_legacy_const_generics(4, 5)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask_getmant_sh<
     const NORM: _MM_MANTISSA_NORM_ENUM,
     const SIGN: _MM_MANTISSA_SIGN_ENUM,
@@ -9742,7 +9879,7 @@ pub fn _mm_mask_getmant_sh<
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vgetmantsh, NORM = 0, SIGN = 0))]
 #[rustc_legacy_const_generics(3, 4)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_maskz_getmant_sh<
     const NORM: _MM_MANTISSA_NORM_ENUM,
     const SIGN: _MM_MANTISSA_SIGN_ENUM,
@@ -9782,7 +9919,7 @@ pub fn _mm_maskz_getmant_sh<
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vgetmantsh, NORM = 0, SIGN = 0, SAE = 8))]
 #[rustc_legacy_const_generics(2, 3, 4)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_getmant_round_sh<
     const NORM: _MM_MANTISSA_NORM_ENUM,
     const SIGN: _MM_MANTISSA_SIGN_ENUM,
@@ -9823,7 +9960,7 @@ pub fn _mm_getmant_round_sh<
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vgetmantsh, NORM = 0, SIGN = 0, SAE = 8))]
 #[rustc_legacy_const_generics(4, 5, 6)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask_getmant_round_sh<
     const NORM: _MM_MANTISSA_NORM_ENUM,
     const SIGN: _MM_MANTISSA_SIGN_ENUM,
@@ -9868,7 +10005,7 @@ pub fn _mm_mask_getmant_round_sh<
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vgetmantsh, NORM = 0, SIGN = 0, SAE = 8))]
 #[rustc_legacy_const_generics(3, 4, 5)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_maskz_getmant_round_sh<
     const NORM: _MM_MANTISSA_NORM_ENUM,
     const SIGN: _MM_MANTISSA_SIGN_ENUM,
@@ -9900,7 +10037,7 @@ pub fn _mm_maskz_getmant_round_sh<
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0))]
 #[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_roundscale_ph<const IMM8: i32>(a: __m128h) -> __m128h {
     static_assert_uimm_bits!(IMM8, 8);
     _mm_mask_roundscale_ph::<IMM8>(_mm_undefined_ph(), 0xff, a)
@@ -9923,7 +10060,7 @@ pub fn _mm_roundscale_ph<const IMM8: i32>(a: __m128h) -> __m128h {
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask_roundscale_ph<const IMM8: i32>(src: __m128h, k: __mmask8, a: __m128h) -> __m128h {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
@@ -9948,7 +10085,7 @@ pub fn _mm_mask_roundscale_ph<const IMM8: i32>(src: __m128h, k: __mmask8, a: __m
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0))]
 #[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_maskz_roundscale_ph<const IMM8: i32>(k: __mmask8, a: __m128h) -> __m128h {
     static_assert_uimm_bits!(IMM8, 8);
     _mm_mask_roundscale_ph::<IMM8>(_mm_setzero_ph(), k, a)
@@ -9970,7 +10107,7 @@ pub fn _mm_maskz_roundscale_ph<const IMM8: i32>(k: __mmask8, a: __m128h) -> __m1
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0))]
 #[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_roundscale_ph<const IMM8: i32>(a: __m256h) -> __m256h {
     static_assert_uimm_bits!(IMM8, 8);
     _mm256_mask_roundscale_ph::<IMM8>(_mm256_undefined_ph(), 0xffff, a)
@@ -9993,7 +10130,7 @@ pub fn _mm256_roundscale_ph<const IMM8: i32>(a: __m256h) -> __m256h {
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_mask_roundscale_ph<const IMM8: i32>(
     src: __m256h,
     k: __mmask16,
@@ -10022,7 +10159,7 @@ pub fn _mm256_mask_roundscale_ph<const IMM8: i32>(
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0))]
 #[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_maskz_roundscale_ph<const IMM8: i32>(k: __mmask16, a: __m256h) -> __m256h {
     static_assert_uimm_bits!(IMM8, 8);
     _mm256_mask_roundscale_ph::<IMM8>(_mm256_setzero_ph(), k, a)
@@ -10044,7 +10181,7 @@ pub fn _mm256_maskz_roundscale_ph<const IMM8: i32>(k: __mmask16, a: __m256h) ->
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0))]
 #[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_roundscale_ph<const IMM8: i32>(a: __m512h) -> __m512h {
     static_assert_uimm_bits!(IMM8, 8);
     _mm512_mask_roundscale_ph::<IMM8>(_mm512_undefined_ph(), 0xffffffff, a)
@@ -10067,7 +10204,7 @@ pub fn _mm512_roundscale_ph<const IMM8: i32>(a: __m512h) -> __m512h {
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mask_roundscale_ph<const IMM8: i32>(
     src: __m512h,
     k: __mmask32,
@@ -10094,7 +10231,7 @@ pub fn _mm512_mask_roundscale_ph<const IMM8: i32>(
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0))]
 #[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_maskz_roundscale_ph<const IMM8: i32>(k: __mmask32, a: __m512h) -> __m512h {
     static_assert_uimm_bits!(IMM8, 8);
     _mm512_mask_roundscale_ph::<IMM8>(_mm512_setzero_ph(), k, a)
@@ -10117,7 +10254,7 @@ pub fn _mm512_maskz_roundscale_ph<const IMM8: i32>(k: __mmask32, a: __m512h) ->
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0, SAE = 8))]
 #[rustc_legacy_const_generics(1, 2)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_roundscale_round_ph<const IMM8: i32, const SAE: i32>(a: __m512h) -> __m512h {
     static_assert_uimm_bits!(IMM8, 8);
     static_assert_sae!(SAE);
@@ -10142,7 +10279,7 @@ pub fn _mm512_roundscale_round_ph<const IMM8: i32, const SAE: i32>(a: __m512h) -
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0, SAE = 8))]
 #[rustc_legacy_const_generics(3, 4)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mask_roundscale_round_ph<const IMM8: i32, const SAE: i32>(
     src: __m512h,
     k: __mmask32,
@@ -10172,7 +10309,7 @@ pub fn _mm512_mask_roundscale_round_ph<const IMM8: i32, const SAE: i32>(
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vrndscaleph, IMM8 = 0, SAE = 8))]
 #[rustc_legacy_const_generics(2, 3)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_maskz_roundscale_round_ph<const IMM8: i32, const SAE: i32>(
     k: __mmask32,
     a: __m512h,
@@ -10199,7 +10336,7 @@ pub fn _mm512_maskz_roundscale_round_ph<const IMM8: i32, const SAE: i32>(
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vrndscalesh, IMM8 = 0))]
 #[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_roundscale_sh<const IMM8: i32>(a: __m128h, b: __m128h) -> __m128h {
     static_assert_uimm_bits!(IMM8, 8);
     _mm_mask_roundscale_sh::<IMM8>(f16x8::ZERO.as_m128h(), 0xff, a, b)
@@ -10222,7 +10359,7 @@ pub fn _mm_roundscale_sh<const IMM8: i32>(a: __m128h, b: __m128h) -> __m128h {
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vrndscalesh, IMM8 = 0))]
 #[rustc_legacy_const_generics(4)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask_roundscale_sh<const IMM8: i32>(
     src: __m128h,
     k: __mmask8,
@@ -10250,7 +10387,7 @@ pub fn _mm_mask_roundscale_sh<const IMM8: i32>(
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vrndscalesh, IMM8 = 0))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_maskz_roundscale_sh<const IMM8: i32>(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
     static_assert_uimm_bits!(IMM8, 8);
     _mm_mask_roundscale_sh::<IMM8>(f16x8::ZERO.as_m128h(), k, a, b)
@@ -10275,7 +10412,7 @@ pub fn _mm_maskz_roundscale_sh<const IMM8: i32>(k: __mmask8, a: __m128h, b: __m1
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vrndscalesh, IMM8 = 0, SAE = 8))]
 #[rustc_legacy_const_generics(2, 3)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_roundscale_round_sh<const IMM8: i32, const SAE: i32>(a: __m128h, b: __m128h) -> __m128h {
     static_assert_uimm_bits!(IMM8, 8);
     static_assert_sae!(SAE);
@@ -10301,7 +10438,7 @@ pub fn _mm_roundscale_round_sh<const IMM8: i32, const SAE: i32>(a: __m128h, b: _
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vrndscalesh, IMM8 = 0, SAE = 8))]
 #[rustc_legacy_const_generics(4, 5)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask_roundscale_round_sh<const IMM8: i32, const SAE: i32>(
     src: __m128h,
     k: __mmask8,
@@ -10334,7 +10471,7 @@ pub fn _mm_mask_roundscale_round_sh<const IMM8: i32, const SAE: i32>(
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vrndscalesh, IMM8 = 0, SAE = 8))]
 #[rustc_legacy_const_generics(3, 4)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_maskz_roundscale_round_sh<const IMM8: i32, const SAE: i32>(
     k: __mmask8,
     a: __m128h,
@@ -10352,7 +10489,7 @@ pub fn _mm_maskz_roundscale_round_sh<const IMM8: i32, const SAE: i32>(
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vscalefph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_scalef_ph(a: __m128h, b: __m128h) -> __m128h {
     _mm_mask_scalef_ph(_mm_undefined_ph(), 0xff, a, b)
 }
@@ -10364,7 +10501,7 @@ pub fn _mm_scalef_ph(a: __m128h, b: __m128h) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vscalefph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask_scalef_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
     unsafe { vscalefph_128(a, b, src, k) }
 }
@@ -10376,7 +10513,7 @@ pub fn _mm_mask_scalef_ph(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) ->
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vscalefph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_maskz_scalef_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
     _mm_mask_scalef_ph(_mm_setzero_ph(), k, a, b)
 }
@@ -10388,7 +10525,7 @@ pub fn _mm_maskz_scalef_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vscalefph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_scalef_ph(a: __m256h, b: __m256h) -> __m256h {
     _mm256_mask_scalef_ph(_mm256_undefined_ph(), 0xffff, a, b)
 }
@@ -10400,7 +10537,7 @@ pub fn _mm256_scalef_ph(a: __m256h, b: __m256h) -> __m256h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vscalefph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_mask_scalef_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
     unsafe { vscalefph_256(a, b, src, k) }
 }
@@ -10412,7 +10549,7 @@ pub fn _mm256_mask_scalef_ph(src: __m256h, k: __mmask16, a: __m256h, b: __m256h)
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vscalefph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_maskz_scalef_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
     _mm256_mask_scalef_ph(_mm256_setzero_ph(), k, a, b)
 }
@@ -10424,7 +10561,7 @@ pub fn _mm256_maskz_scalef_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vscalefph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_scalef_ph(a: __m512h, b: __m512h) -> __m512h {
     _mm512_mask_scalef_ph(_mm512_undefined_ph(), 0xffffffff, a, b)
 }
@@ -10436,7 +10573,7 @@ pub fn _mm512_scalef_ph(a: __m512h, b: __m512h) -> __m512h {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vscalefph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mask_scalef_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
     _mm512_mask_scalef_round_ph::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
 }
@@ -10448,7 +10585,7 @@ pub fn _mm512_mask_scalef_ph(src: __m512h, k: __mmask32, a: __m512h, b: __m512h)
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vscalefph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_maskz_scalef_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
     _mm512_mask_scalef_ph(_mm512_setzero_ph(), k, a, b)
 }
@@ -10469,7 +10606,7 @@ pub fn _mm512_maskz_scalef_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vscalefph, ROUNDING = 8))]
 #[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_scalef_round_ph<const ROUNDING: i32>(a: __m512h, b: __m512h) -> __m512h {
     static_assert_rounding!(ROUNDING);
     _mm512_mask_scalef_round_ph::<ROUNDING>(_mm512_undefined_ph(), 0xffffffff, a, b)
@@ -10491,7 +10628,7 @@ pub fn _mm512_scalef_round_ph<const ROUNDING: i32>(a: __m512h, b: __m512h) -> __
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vscalefph, ROUNDING = 8))]
 #[rustc_legacy_const_generics(4)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mask_scalef_round_ph<const ROUNDING: i32>(
     src: __m512h,
     k: __mmask32,
@@ -10520,7 +10657,7 @@ pub fn _mm512_mask_scalef_round_ph<const ROUNDING: i32>(
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vscalefph, ROUNDING = 8))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_maskz_scalef_round_ph<const ROUNDING: i32>(
     k: __mmask32,
     a: __m512h,
@@ -10538,7 +10675,7 @@ pub fn _mm512_maskz_scalef_round_ph<const ROUNDING: i32>(
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vscalefsh))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_scalef_sh(a: __m128h, b: __m128h) -> __m128h {
     _mm_mask_scalef_sh(f16x8::ZERO.as_m128h(), 0xff, a, b)
 }
@@ -10551,7 +10688,7 @@ pub fn _mm_scalef_sh(a: __m128h, b: __m128h) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vscalefsh))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask_scalef_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
     _mm_mask_scalef_round_sh::<_MM_FROUND_CUR_DIRECTION>(src, k, a, b)
 }
@@ -10564,7 +10701,7 @@ pub fn _mm_mask_scalef_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128h) ->
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vscalefsh))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_maskz_scalef_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
     _mm_mask_scalef_sh(f16x8::ZERO.as_m128h(), k, a, b)
 }
@@ -10586,7 +10723,7 @@ pub fn _mm_maskz_scalef_sh(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vscalefsh, ROUNDING = 8))]
 #[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_scalef_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m128h {
     static_assert_rounding!(ROUNDING);
     _mm_mask_scalef_round_sh::<ROUNDING>(f16x8::ZERO.as_m128h(), 0xff, a, b)
@@ -10609,7 +10746,7 @@ pub fn _mm_scalef_round_sh<const ROUNDING: i32>(a: __m128h, b: __m128h) -> __m12
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vscalefsh, ROUNDING = 8))]
 #[rustc_legacy_const_generics(4)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask_scalef_round_sh<const ROUNDING: i32>(
     src: __m128h,
     k: __mmask8,
@@ -10639,7 +10776,7 @@ pub fn _mm_mask_scalef_round_sh<const ROUNDING: i32>(
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vscalefsh, ROUNDING = 8))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_maskz_scalef_round_sh<const ROUNDING: i32>(
     k: __mmask8,
     a: __m128h,
@@ -10665,7 +10802,7 @@ pub fn _mm_maskz_scalef_round_sh<const ROUNDING: i32>(
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0))]
 #[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_reduce_ph<const IMM8: i32>(a: __m128h) -> __m128h {
     static_assert_uimm_bits!(IMM8, 8);
     _mm_mask_reduce_ph::<IMM8>(_mm_undefined_ph(), 0xff, a)
@@ -10688,7 +10825,7 @@ pub fn _mm_reduce_ph<const IMM8: i32>(a: __m128h) -> __m128h {
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask_reduce_ph<const IMM8: i32>(src: __m128h, k: __mmask8, a: __m128h) -> __m128h {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
@@ -10713,7 +10850,7 @@ pub fn _mm_mask_reduce_ph<const IMM8: i32>(src: __m128h, k: __mmask8, a: __m128h
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0))]
 #[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_maskz_reduce_ph<const IMM8: i32>(k: __mmask8, a: __m128h) -> __m128h {
     static_assert_uimm_bits!(IMM8, 8);
     _mm_mask_reduce_ph::<IMM8>(_mm_setzero_ph(), k, a)
@@ -10735,7 +10872,7 @@ pub fn _mm_maskz_reduce_ph<const IMM8: i32>(k: __mmask8, a: __m128h) -> __m128h
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0))]
 #[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_reduce_ph<const IMM8: i32>(a: __m256h) -> __m256h {
     static_assert_uimm_bits!(IMM8, 8);
     _mm256_mask_reduce_ph::<IMM8>(_mm256_undefined_ph(), 0xffff, a)
@@ -10758,7 +10895,7 @@ pub fn _mm256_reduce_ph<const IMM8: i32>(a: __m256h) -> __m256h {
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_mask_reduce_ph<const IMM8: i32>(src: __m256h, k: __mmask16, a: __m256h) -> __m256h {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
@@ -10783,7 +10920,7 @@ pub fn _mm256_mask_reduce_ph<const IMM8: i32>(src: __m256h, k: __mmask16, a: __m
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0))]
 #[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_maskz_reduce_ph<const IMM8: i32>(k: __mmask16, a: __m256h) -> __m256h {
     static_assert_uimm_bits!(IMM8, 8);
     _mm256_mask_reduce_ph::<IMM8>(_mm256_setzero_ph(), k, a)
@@ -10805,7 +10942,7 @@ pub fn _mm256_maskz_reduce_ph<const IMM8: i32>(k: __mmask16, a: __m256h) -> __m2
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0))]
 #[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_reduce_ph<const IMM8: i32>(a: __m512h) -> __m512h {
     static_assert_uimm_bits!(IMM8, 8);
     _mm512_mask_reduce_ph::<IMM8>(_mm512_undefined_ph(), 0xffffffff, a)
@@ -10828,7 +10965,7 @@ pub fn _mm512_reduce_ph<const IMM8: i32>(a: __m512h) -> __m512h {
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mask_reduce_ph<const IMM8: i32>(src: __m512h, k: __mmask32, a: __m512h) -> __m512h {
     static_assert_uimm_bits!(IMM8, 8);
     _mm512_mask_reduce_round_ph::<IMM8, _MM_FROUND_CUR_DIRECTION>(src, k, a)
@@ -10851,7 +10988,7 @@ pub fn _mm512_mask_reduce_ph<const IMM8: i32>(src: __m512h, k: __mmask32, a: __m
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0))]
 #[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_maskz_reduce_ph<const IMM8: i32>(k: __mmask32, a: __m512h) -> __m512h {
     static_assert_uimm_bits!(IMM8, 8);
     _mm512_mask_reduce_ph::<IMM8>(_mm512_setzero_ph(), k, a)
@@ -10875,7 +11012,7 @@ pub fn _mm512_maskz_reduce_ph<const IMM8: i32>(k: __mmask32, a: __m512h) -> __m5
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0, SAE = 8))]
 #[rustc_legacy_const_generics(1, 2)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_reduce_round_ph<const IMM8: i32, const SAE: i32>(a: __m512h) -> __m512h {
     static_assert_uimm_bits!(IMM8, 8);
     static_assert_sae!(SAE);
@@ -10901,7 +11038,7 @@ pub fn _mm512_reduce_round_ph<const IMM8: i32, const SAE: i32>(a: __m512h) -> __
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0, SAE = 8))]
 #[rustc_legacy_const_generics(3, 4)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mask_reduce_round_ph<const IMM8: i32, const SAE: i32>(
     src: __m512h,
     k: __mmask32,
@@ -10933,7 +11070,7 @@ pub fn _mm512_mask_reduce_round_ph<const IMM8: i32, const SAE: i32>(
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vreduceph, IMM8 = 0, SAE = 8))]
 #[rustc_legacy_const_generics(2, 3)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_maskz_reduce_round_ph<const IMM8: i32, const SAE: i32>(
     k: __mmask32,
     a: __m512h,
@@ -10960,7 +11097,7 @@ pub fn _mm512_maskz_reduce_round_ph<const IMM8: i32, const SAE: i32>(
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vreducesh, IMM8 = 0))]
 #[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_reduce_sh<const IMM8: i32>(a: __m128h, b: __m128h) -> __m128h {
     static_assert_uimm_bits!(IMM8, 8);
     _mm_mask_reduce_sh::<IMM8>(f16x8::ZERO.as_m128h(), 0xff, a, b)
@@ -10984,7 +11121,7 @@ pub fn _mm_reduce_sh<const IMM8: i32>(a: __m128h, b: __m128h) -> __m128h {
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vreducesh, IMM8 = 0))]
 #[rustc_legacy_const_generics(4)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask_reduce_sh<const IMM8: i32>(
     src: __m128h,
     k: __mmask8,
@@ -11013,7 +11150,7 @@ pub fn _mm_mask_reduce_sh<const IMM8: i32>(
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vreducesh, IMM8 = 0))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_maskz_reduce_sh<const IMM8: i32>(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
     static_assert_uimm_bits!(IMM8, 8);
     _mm_mask_reduce_sh::<IMM8>(f16x8::ZERO.as_m128h(), k, a, b)
@@ -11038,7 +11175,7 @@ pub fn _mm_maskz_reduce_sh<const IMM8: i32>(k: __mmask8, a: __m128h, b: __m128h)
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vreducesh, IMM8 = 0, SAE = 8))]
 #[rustc_legacy_const_generics(2, 3)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_reduce_round_sh<const IMM8: i32, const SAE: i32>(a: __m128h, b: __m128h) -> __m128h {
     static_assert_uimm_bits!(IMM8, 8);
     static_assert_sae!(SAE);
@@ -11065,7 +11202,7 @@ pub fn _mm_reduce_round_sh<const IMM8: i32, const SAE: i32>(a: __m128h, b: __m12
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vreducesh, IMM8 = 0, SAE = 8))]
 #[rustc_legacy_const_generics(4, 5)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask_reduce_round_sh<const IMM8: i32, const SAE: i32>(
     src: __m128h,
     k: __mmask8,
@@ -11099,7 +11236,7 @@ pub fn _mm_mask_reduce_round_sh<const IMM8: i32, const SAE: i32>(
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vreducesh, IMM8 = 0, SAE = 8))]
 #[rustc_legacy_const_generics(3, 4)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_maskz_reduce_round_sh<const IMM8: i32, const SAE: i32>(
     k: __mmask8,
     a: __m128h,
@@ -11117,7 +11254,8 @@ pub fn _mm_maskz_reduce_round_sh<const IMM8: i32, const SAE: i32>(
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm_reduce_add_ph(a: __m128h) -> f16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_reduce_add_ph(a: __m128h) -> f16 {
     unsafe {
         let b = simd_shuffle!(a, a, [4, 5, 6, 7, 0, 1, 2, 3]);
         let a = _mm_add_ph(a, b);
@@ -11134,7 +11272,8 @@ pub fn _mm_reduce_add_ph(a: __m128h) -> f16 {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm256_reduce_add_ph(a: __m256h) -> f16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_reduce_add_ph(a: __m256h) -> f16 {
     unsafe {
         let p = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
         let q = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
@@ -11149,7 +11288,8 @@ pub fn _mm256_reduce_add_ph(a: __m256h) -> f16 {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm512_reduce_add_ph(a: __m512h) -> f16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_reduce_add_ph(a: __m512h) -> f16 {
     unsafe {
         let p = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
         let q = simd_shuffle!(
@@ -11170,7 +11310,8 @@ pub fn _mm512_reduce_add_ph(a: __m512h) -> f16 {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm_reduce_mul_ph(a: __m128h) -> f16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_reduce_mul_ph(a: __m128h) -> f16 {
     unsafe {
         let b = simd_shuffle!(a, a, [4, 5, 6, 7, 0, 1, 2, 3]);
         let a = _mm_mul_ph(a, b);
@@ -11187,7 +11328,8 @@ pub fn _mm_reduce_mul_ph(a: __m128h) -> f16 {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm256_reduce_mul_ph(a: __m256h) -> f16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_reduce_mul_ph(a: __m256h) -> f16 {
     unsafe {
         let p = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
         let q = simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
@@ -11202,7 +11344,8 @@ pub fn _mm256_reduce_mul_ph(a: __m256h) -> f16 {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm512_reduce_mul_ph(a: __m512h) -> f16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_reduce_mul_ph(a: __m512h) -> f16 {
     unsafe {
         let p = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
         let q = simd_shuffle!(
@@ -11324,32 +11467,6 @@ pub fn _mm512_reduce_max_ph(a: __m512h) -> f16 {
     }
 }
 
-macro_rules! fpclass_asm { // FIXME: use LLVM intrinsics
-    ($mask_type: ty, $reg: ident, $a: expr) => {{
-        let dst: $mask_type;
-        asm!(
-            "vfpclassph {k}, {src}, {imm8}",
-            k = lateout(kreg) dst,
-            src = in($reg) $a,
-            imm8 = const IMM8,
-            options(pure, nomem, nostack)
-        );
-        dst
-    }};
-    ($mask_type: ty, $mask: expr, $reg: ident, $a: expr) => {{
-        let dst: $mask_type;
-        asm!(
-            "vfpclassph {k} {{ {mask} }}, {src}, {imm8}",
-            k = lateout(kreg) dst,
-            mask = in(kreg) $mask,
-            src = in($reg) $a,
-            imm8 = const IMM8,
-            options(pure, nomem, nostack)
-        );
-        dst
-    }};
-}
-
 /// Test packed half-precision (16-bit) floating-point elements in a for special categories specified
 /// by imm8, and store the results in mask vector k.
 /// imm can be a combination of:
@@ -11368,11 +11485,11 @@ macro_rules! fpclass_asm { // FIXME: use LLVM intrinsics
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vfpclassph, IMM8 = 0))]
 #[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_fpclass_ph_mask<const IMM8: i32>(a: __m128h) -> __mmask8 {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
-        fpclass_asm!(__mmask8, xmm_reg, a)
+        vfpclassph_128(a, IMM8)
     }
 }
 
@@ -11395,12 +11512,9 @@ pub fn _mm_fpclass_ph_mask<const IMM8: i32>(a: __m128h) -> __mmask8 {
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vfpclassph, IMM8 = 0))]
 #[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask_fpclass_ph_mask<const IMM8: i32>(k1: __mmask8, a: __m128h) -> __mmask8 {
-    unsafe {
-        static_assert_uimm_bits!(IMM8, 8);
-        fpclass_asm!(__mmask8, k1, xmm_reg, a)
-    }
+    _mm_fpclass_ph_mask::<IMM8>(a) & k1
 }
 
 /// Test packed half-precision (16-bit) floating-point elements in a for special categories specified
@@ -11421,11 +11535,11 @@ pub fn _mm_mask_fpclass_ph_mask<const IMM8: i32>(k1: __mmask8, a: __m128h) -> __
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vfpclassph, IMM8 = 0))]
 #[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_fpclass_ph_mask<const IMM8: i32>(a: __m256h) -> __mmask16 {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
-        fpclass_asm!(__mmask16, ymm_reg, a)
+        vfpclassph_256(a, IMM8)
     }
 }
 
@@ -11448,12 +11562,9 @@ pub fn _mm256_fpclass_ph_mask<const IMM8: i32>(a: __m256h) -> __mmask16 {
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vfpclassph, IMM8 = 0))]
 #[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_mask_fpclass_ph_mask<const IMM8: i32>(k1: __mmask16, a: __m256h) -> __mmask16 {
-    unsafe {
-        static_assert_uimm_bits!(IMM8, 8);
-        fpclass_asm!(__mmask16, k1, ymm_reg, a)
-    }
+    _mm256_fpclass_ph_mask::<IMM8>(a) & k1
 }
 
 /// Test packed half-precision (16-bit) floating-point elements in a for special categories specified
@@ -11474,11 +11585,11 @@ pub fn _mm256_mask_fpclass_ph_mask<const IMM8: i32>(k1: __mmask16, a: __m256h) -
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfpclassph, IMM8 = 0))]
 #[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_fpclass_ph_mask<const IMM8: i32>(a: __m512h) -> __mmask32 {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
-        fpclass_asm!(__mmask32, zmm_reg, a)
+        vfpclassph_512(a, IMM8)
     }
 }
 
@@ -11501,12 +11612,9 @@ pub fn _mm512_fpclass_ph_mask<const IMM8: i32>(a: __m512h) -> __mmask32 {
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfpclassph, IMM8 = 0))]
 #[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mask_fpclass_ph_mask<const IMM8: i32>(k1: __mmask32, a: __m512h) -> __mmask32 {
-    unsafe {
-        static_assert_uimm_bits!(IMM8, 8);
-        fpclass_asm!(__mmask32, k1, zmm_reg, a)
-    }
+    _mm512_fpclass_ph_mask::<IMM8>(a) & k1
 }
 
 /// Test the lower half-precision (16-bit) floating-point element in a for special categories specified
@@ -11527,7 +11635,7 @@ pub fn _mm512_mask_fpclass_ph_mask<const IMM8: i32>(k1: __mmask32, a: __m512h) -
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfpclasssh, IMM8 = 0))]
 #[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_fpclass_sh_mask<const IMM8: i32>(a: __m128h) -> __mmask8 {
     _mm_mask_fpclass_sh_mask::<IMM8>(0xff, a)
 }
@@ -11551,7 +11659,7 @@ pub fn _mm_fpclass_sh_mask<const IMM8: i32>(a: __m128h) -> __mmask8 {
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vfpclasssh, IMM8 = 0))]
 #[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask_fpclass_sh_mask<const IMM8: i32>(k1: __mmask8, a: __m128h) -> __mmask8 {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
@@ -11565,8 +11673,9 @@ pub fn _mm_mask_fpclass_sh_mask<const IMM8: i32>(k1: __mmask8, a: __m128h) -> __
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_blend_ph)
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm_mask_blend_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_blend_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
     unsafe { simd_select_bitmask(k, b, a) }
 }
 
@@ -11576,8 +11685,9 @@ pub fn _mm_mask_blend_ph(k: __mmask8, a: __m128h, b: __m128h) -> __m128h {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_blend_ph)
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm256_mask_blend_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_blend_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
     unsafe { simd_select_bitmask(k, b, a) }
 }
 
@@ -11587,8 +11697,9 @@ pub fn _mm256_mask_blend_ph(k: __mmask16, a: __m256h, b: __m256h) -> __m256h {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_blend_ph)
 #[inline]
 #[target_feature(enable = "avx512fp16")]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm512_mask_blend_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_blend_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
     unsafe { simd_select_bitmask(k, b, a) }
 }
 
@@ -11598,7 +11709,7 @@ pub fn _mm512_mask_blend_ph(k: __mmask32, a: __m512h, b: __m512h) -> __m512h {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_permutex2var_ph)
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_permutex2var_ph(a: __m128h, idx: __m128i, b: __m128h) -> __m128h {
     _mm_castsi128_ph(_mm_permutex2var_epi16(
         _mm_castph_si128(a),
@@ -11613,7 +11724,7 @@ pub fn _mm_permutex2var_ph(a: __m128h, idx: __m128i, b: __m128h) -> __m128h {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permutex2var_ph)
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_permutex2var_ph(a: __m256h, idx: __m256i, b: __m256h) -> __m256h {
     _mm256_castsi256_ph(_mm256_permutex2var_epi16(
         _mm256_castph_si256(a),
@@ -11628,7 +11739,7 @@ pub fn _mm256_permutex2var_ph(a: __m256h, idx: __m256i, b: __m256h) -> __m256h {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_permutex2var_ph)
 #[inline]
 #[target_feature(enable = "avx512fp16")]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_permutex2var_ph(a: __m512h, idx: __m512i, b: __m512h) -> __m512h {
     _mm512_castsi512_ph(_mm512_permutex2var_epi16(
         _mm512_castph_si512(a),
@@ -11643,7 +11754,7 @@ pub fn _mm512_permutex2var_ph(a: __m512h, idx: __m512i, b: __m512h) -> __m512h {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_permutexvar_ph)
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_permutexvar_ph(idx: __m128i, a: __m128h) -> __m128h {
     _mm_castsi128_ph(_mm_permutexvar_epi16(idx, _mm_castph_si128(a)))
 }
@@ -11654,7 +11765,7 @@ pub fn _mm_permutexvar_ph(idx: __m128i, a: __m128h) -> __m128h {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_permutexvar_ph)
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_permutexvar_ph(idx: __m256i, a: __m256h) -> __m256h {
     _mm256_castsi256_ph(_mm256_permutexvar_epi16(idx, _mm256_castph_si256(a)))
 }
@@ -11665,7 +11776,7 @@ pub fn _mm256_permutexvar_ph(idx: __m256i, a: __m256h) -> __m256h {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_permutexvar_ph)
 #[inline]
 #[target_feature(enable = "avx512fp16")]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_permutexvar_ph(idx: __m512i, a: __m512h) -> __m512h {
     _mm512_castsi512_ph(_mm512_permutexvar_epi16(idx, _mm512_castph_si512(a)))
 }
@@ -11677,7 +11788,7 @@ pub fn _mm512_permutexvar_ph(idx: __m512i, a: __m512h) -> __m512h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtw2ph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_cvtepi16_ph(a: __m128i) -> __m128h {
     unsafe { vcvtw2ph_128(a.as_i16x8(), _MM_FROUND_CUR_DIRECTION) }
 }
@@ -11690,7 +11801,7 @@ pub fn _mm_cvtepi16_ph(a: __m128i) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtw2ph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask_cvtepi16_ph(src: __m128h, k: __mmask8, a: __m128i) -> __m128h {
     unsafe { simd_select_bitmask(k, _mm_cvtepi16_ph(a), src) }
 }
@@ -11702,7 +11813,7 @@ pub fn _mm_mask_cvtepi16_ph(src: __m128h, k: __mmask8, a: __m128i) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtw2ph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_maskz_cvtepi16_ph(k: __mmask8, a: __m128i) -> __m128h {
     _mm_mask_cvtepi16_ph(_mm_setzero_ph(), k, a)
 }
@@ -11714,7 +11825,7 @@ pub fn _mm_maskz_cvtepi16_ph(k: __mmask8, a: __m128i) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtw2ph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_cvtepi16_ph(a: __m256i) -> __m256h {
     unsafe { vcvtw2ph_256(a.as_i16x16(), _MM_FROUND_CUR_DIRECTION) }
 }
@@ -11727,7 +11838,7 @@ pub fn _mm256_cvtepi16_ph(a: __m256i) -> __m256h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtw2ph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_mask_cvtepi16_ph(src: __m256h, k: __mmask16, a: __m256i) -> __m256h {
     unsafe { simd_select_bitmask(k, _mm256_cvtepi16_ph(a), src) }
 }
@@ -11739,7 +11850,7 @@ pub fn _mm256_mask_cvtepi16_ph(src: __m256h, k: __mmask16, a: __m256i) -> __m256
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtw2ph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_maskz_cvtepi16_ph(k: __mmask16, a: __m256i) -> __m256h {
     _mm256_mask_cvtepi16_ph(_mm256_setzero_ph(), k, a)
 }
@@ -11751,7 +11862,7 @@ pub fn _mm256_maskz_cvtepi16_ph(k: __mmask16, a: __m256i) -> __m256h {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtw2ph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_cvtepi16_ph(a: __m512i) -> __m512h {
     unsafe { vcvtw2ph_512(a.as_i16x32(), _MM_FROUND_CUR_DIRECTION) }
 }
@@ -11764,7 +11875,7 @@ pub fn _mm512_cvtepi16_ph(a: __m512i) -> __m512h {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtw2ph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mask_cvtepi16_ph(src: __m512h, k: __mmask32, a: __m512i) -> __m512h {
     unsafe { simd_select_bitmask(k, _mm512_cvtepi16_ph(a), src) }
 }
@@ -11776,7 +11887,7 @@ pub fn _mm512_mask_cvtepi16_ph(src: __m512h, k: __mmask32, a: __m512i) -> __m512
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtw2ph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_maskz_cvtepi16_ph(k: __mmask32, a: __m512i) -> __m512h {
     _mm512_mask_cvtepi16_ph(_mm512_setzero_ph(), k, a)
 }
@@ -11797,7 +11908,7 @@ pub fn _mm512_maskz_cvtepi16_ph(k: __mmask32, a: __m512i) -> __m512h {
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtw2ph, ROUNDING = 8))]
 #[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_cvt_roundepi16_ph<const ROUNDING: i32>(a: __m512i) -> __m512h {
     unsafe {
         static_assert_rounding!(ROUNDING);
@@ -11822,7 +11933,7 @@ pub fn _mm512_cvt_roundepi16_ph<const ROUNDING: i32>(a: __m512i) -> __m512h {
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtw2ph, ROUNDING = 8))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mask_cvt_roundepi16_ph<const ROUNDING: i32>(
     src: __m512h,
     k: __mmask32,
@@ -11850,7 +11961,7 @@ pub fn _mm512_mask_cvt_roundepi16_ph<const ROUNDING: i32>(
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtw2ph, ROUNDING = 8))]
 #[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_maskz_cvt_roundepi16_ph<const ROUNDING: i32>(k: __mmask32, a: __m512i) -> __m512h {
     static_assert_rounding!(ROUNDING);
     _mm512_mask_cvt_roundepi16_ph::<ROUNDING>(_mm512_setzero_ph(), k, a)
@@ -11863,7 +11974,7 @@ pub fn _mm512_maskz_cvt_roundepi16_ph<const ROUNDING: i32>(k: __mmask32, a: __m5
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtuw2ph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_cvtepu16_ph(a: __m128i) -> __m128h {
     unsafe { vcvtuw2ph_128(a.as_u16x8(), _MM_FROUND_CUR_DIRECTION) }
 }
@@ -11876,7 +11987,7 @@ pub fn _mm_cvtepu16_ph(a: __m128i) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtuw2ph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask_cvtepu16_ph(src: __m128h, k: __mmask8, a: __m128i) -> __m128h {
     unsafe { simd_select_bitmask(k, _mm_cvtepu16_ph(a), src) }
 }
@@ -11888,7 +11999,7 @@ pub fn _mm_mask_cvtepu16_ph(src: __m128h, k: __mmask8, a: __m128i) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtuw2ph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_maskz_cvtepu16_ph(k: __mmask8, a: __m128i) -> __m128h {
     _mm_mask_cvtepu16_ph(_mm_setzero_ph(), k, a)
 }
@@ -11900,7 +12011,7 @@ pub fn _mm_maskz_cvtepu16_ph(k: __mmask8, a: __m128i) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtuw2ph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_cvtepu16_ph(a: __m256i) -> __m256h {
     unsafe { vcvtuw2ph_256(a.as_u16x16(), _MM_FROUND_CUR_DIRECTION) }
 }
@@ -11913,7 +12024,7 @@ pub fn _mm256_cvtepu16_ph(a: __m256i) -> __m256h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtuw2ph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_mask_cvtepu16_ph(src: __m256h, k: __mmask16, a: __m256i) -> __m256h {
     unsafe { simd_select_bitmask(k, _mm256_cvtepu16_ph(a), src) }
 }
@@ -11925,7 +12036,7 @@ pub fn _mm256_mask_cvtepu16_ph(src: __m256h, k: __mmask16, a: __m256i) -> __m256
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtuw2ph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_maskz_cvtepu16_ph(k: __mmask16, a: __m256i) -> __m256h {
     _mm256_mask_cvtepu16_ph(_mm256_setzero_ph(), k, a)
 }
@@ -11937,7 +12048,7 @@ pub fn _mm256_maskz_cvtepu16_ph(k: __mmask16, a: __m256i) -> __m256h {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtuw2ph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_cvtepu16_ph(a: __m512i) -> __m512h {
     unsafe { vcvtuw2ph_512(a.as_u16x32(), _MM_FROUND_CUR_DIRECTION) }
 }
@@ -11950,7 +12061,7 @@ pub fn _mm512_cvtepu16_ph(a: __m512i) -> __m512h {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtuw2ph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mask_cvtepu16_ph(src: __m512h, k: __mmask32, a: __m512i) -> __m512h {
     unsafe { simd_select_bitmask(k, _mm512_cvtepu16_ph(a), src) }
 }
@@ -11962,7 +12073,7 @@ pub fn _mm512_mask_cvtepu16_ph(src: __m512h, k: __mmask32, a: __m512i) -> __m512
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtuw2ph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_maskz_cvtepu16_ph(k: __mmask32, a: __m512i) -> __m512h {
     _mm512_mask_cvtepu16_ph(_mm512_setzero_ph(), k, a)
 }
@@ -11983,7 +12094,7 @@ pub fn _mm512_maskz_cvtepu16_ph(k: __mmask32, a: __m512i) -> __m512h {
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtuw2ph, ROUNDING = 8))]
 #[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_cvt_roundepu16_ph<const ROUNDING: i32>(a: __m512i) -> __m512h {
     unsafe {
         static_assert_rounding!(ROUNDING);
@@ -12008,7 +12119,7 @@ pub fn _mm512_cvt_roundepu16_ph<const ROUNDING: i32>(a: __m512i) -> __m512h {
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtuw2ph, ROUNDING = 8))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mask_cvt_roundepu16_ph<const ROUNDING: i32>(
     src: __m512h,
     k: __mmask32,
@@ -12036,7 +12147,7 @@ pub fn _mm512_mask_cvt_roundepu16_ph<const ROUNDING: i32>(
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtuw2ph, ROUNDING = 8))]
 #[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_maskz_cvt_roundepu16_ph<const ROUNDING: i32>(k: __mmask32, a: __m512i) -> __m512h {
     static_assert_rounding!(ROUNDING);
     _mm512_mask_cvt_roundepu16_ph::<ROUNDING>(_mm512_setzero_ph(), k, a)
@@ -12049,7 +12160,7 @@ pub fn _mm512_maskz_cvt_roundepu16_ph<const ROUNDING: i32>(k: __mmask32, a: __m5
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtdq2ph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_cvtepi32_ph(a: __m128i) -> __m128h {
     _mm_mask_cvtepi32_ph(_mm_setzero_ph(), 0xff, a)
 }
@@ -12062,7 +12173,7 @@ pub fn _mm_cvtepi32_ph(a: __m128i) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtdq2ph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask_cvtepi32_ph(src: __m128h, k: __mmask8, a: __m128i) -> __m128h {
     unsafe { vcvtdq2ph_128(a.as_i32x4(), src, k) }
 }
@@ -12075,7 +12186,7 @@ pub fn _mm_mask_cvtepi32_ph(src: __m128h, k: __mmask8, a: __m128i) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtdq2ph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_maskz_cvtepi32_ph(k: __mmask8, a: __m128i) -> __m128h {
     _mm_mask_cvtepi32_ph(_mm_setzero_ph(), k, a)
 }
@@ -12087,7 +12198,7 @@ pub fn _mm_maskz_cvtepi32_ph(k: __mmask8, a: __m128i) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtdq2ph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_cvtepi32_ph(a: __m256i) -> __m128h {
     unsafe { vcvtdq2ph_256(a.as_i32x8(), _MM_FROUND_CUR_DIRECTION) }
 }
@@ -12100,7 +12211,7 @@ pub fn _mm256_cvtepi32_ph(a: __m256i) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtdq2ph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_mask_cvtepi32_ph(src: __m128h, k: __mmask8, a: __m256i) -> __m128h {
     unsafe { simd_select_bitmask(k, _mm256_cvtepi32_ph(a), src) }
 }
@@ -12112,7 +12223,7 @@ pub fn _mm256_mask_cvtepi32_ph(src: __m128h, k: __mmask8, a: __m256i) -> __m128h
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtdq2ph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_maskz_cvtepi32_ph(k: __mmask8, a: __m256i) -> __m128h {
     _mm256_mask_cvtepi32_ph(_mm_setzero_ph(), k, a)
 }
@@ -12124,7 +12235,7 @@ pub fn _mm256_maskz_cvtepi32_ph(k: __mmask8, a: __m256i) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtdq2ph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_cvtepi32_ph(a: __m512i) -> __m256h {
     unsafe { vcvtdq2ph_512(a.as_i32x16(), _MM_FROUND_CUR_DIRECTION) }
 }
@@ -12137,7 +12248,7 @@ pub fn _mm512_cvtepi32_ph(a: __m512i) -> __m256h {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtdq2ph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mask_cvtepi32_ph(src: __m256h, k: __mmask16, a: __m512i) -> __m256h {
     unsafe { simd_select_bitmask(k, _mm512_cvtepi32_ph(a), src) }
 }
@@ -12149,7 +12260,7 @@ pub fn _mm512_mask_cvtepi32_ph(src: __m256h, k: __mmask16, a: __m512i) -> __m256
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtdq2ph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_maskz_cvtepi32_ph(k: __mmask16, a: __m512i) -> __m256h {
     _mm512_mask_cvtepi32_ph(f16x16::ZERO.as_m256h(), k, a)
 }
@@ -12170,7 +12281,7 @@ pub fn _mm512_maskz_cvtepi32_ph(k: __mmask16, a: __m512i) -> __m256h {
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtdq2ph, ROUNDING = 8))]
 #[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_cvt_roundepi32_ph<const ROUNDING: i32>(a: __m512i) -> __m256h {
     unsafe {
         static_assert_rounding!(ROUNDING);
@@ -12195,7 +12306,7 @@ pub fn _mm512_cvt_roundepi32_ph<const ROUNDING: i32>(a: __m512i) -> __m256h {
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtdq2ph, ROUNDING = 8))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mask_cvt_roundepi32_ph<const ROUNDING: i32>(
     src: __m256h,
     k: __mmask16,
@@ -12223,7 +12334,7 @@ pub fn _mm512_mask_cvt_roundepi32_ph<const ROUNDING: i32>(
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtdq2ph, ROUNDING = 8))]
 #[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_maskz_cvt_roundepi32_ph<const ROUNDING: i32>(k: __mmask16, a: __m512i) -> __m256h {
     static_assert_rounding!(ROUNDING);
     _mm512_mask_cvt_roundepi32_ph::<ROUNDING>(f16x16::ZERO.as_m256h(), k, a)
@@ -12237,7 +12348,7 @@ pub fn _mm512_maskz_cvt_roundepi32_ph<const ROUNDING: i32>(k: __mmask16, a: __m5
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtsi2sh))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_cvti32_sh(a: __m128h, b: i32) -> __m128h {
     unsafe { vcvtsi2sh(a, b, _MM_FROUND_CUR_DIRECTION) }
 }
@@ -12259,7 +12370,7 @@ pub fn _mm_cvti32_sh(a: __m128h, b: i32) -> __m128h {
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtsi2sh, ROUNDING = 8))]
 #[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_cvt_roundi32_sh<const ROUNDING: i32>(a: __m128h, b: i32) -> __m128h {
     unsafe {
         static_assert_rounding!(ROUNDING);
@@ -12274,7 +12385,7 @@ pub fn _mm_cvt_roundi32_sh<const ROUNDING: i32>(a: __m128h, b: i32) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtudq2ph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_cvtepu32_ph(a: __m128i) -> __m128h {
     _mm_mask_cvtepu32_ph(_mm_setzero_ph(), 0xff, a)
 }
@@ -12287,7 +12398,7 @@ pub fn _mm_cvtepu32_ph(a: __m128i) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtudq2ph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask_cvtepu32_ph(src: __m128h, k: __mmask8, a: __m128i) -> __m128h {
     unsafe { vcvtudq2ph_128(a.as_u32x4(), src, k) }
 }
@@ -12300,7 +12411,7 @@ pub fn _mm_mask_cvtepu32_ph(src: __m128h, k: __mmask8, a: __m128i) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtudq2ph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_maskz_cvtepu32_ph(k: __mmask8, a: __m128i) -> __m128h {
     _mm_mask_cvtepu32_ph(_mm_setzero_ph(), k, a)
 }
@@ -12312,7 +12423,7 @@ pub fn _mm_maskz_cvtepu32_ph(k: __mmask8, a: __m128i) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtudq2ph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_cvtepu32_ph(a: __m256i) -> __m128h {
     unsafe { vcvtudq2ph_256(a.as_u32x8(), _MM_FROUND_CUR_DIRECTION) }
 }
@@ -12325,7 +12436,7 @@ pub fn _mm256_cvtepu32_ph(a: __m256i) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtudq2ph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_mask_cvtepu32_ph(src: __m128h, k: __mmask8, a: __m256i) -> __m128h {
     unsafe { simd_select_bitmask(k, _mm256_cvtepu32_ph(a), src) }
 }
@@ -12337,7 +12448,7 @@ pub fn _mm256_mask_cvtepu32_ph(src: __m128h, k: __mmask8, a: __m256i) -> __m128h
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtudq2ph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_maskz_cvtepu32_ph(k: __mmask8, a: __m256i) -> __m128h {
     _mm256_mask_cvtepu32_ph(_mm_setzero_ph(), k, a)
 }
@@ -12349,7 +12460,7 @@ pub fn _mm256_maskz_cvtepu32_ph(k: __mmask8, a: __m256i) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtudq2ph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_cvtepu32_ph(a: __m512i) -> __m256h {
     unsafe { vcvtudq2ph_512(a.as_u32x16(), _MM_FROUND_CUR_DIRECTION) }
 }
@@ -12362,7 +12473,7 @@ pub fn _mm512_cvtepu32_ph(a: __m512i) -> __m256h {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtudq2ph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mask_cvtepu32_ph(src: __m256h, k: __mmask16, a: __m512i) -> __m256h {
     unsafe { simd_select_bitmask(k, _mm512_cvtepu32_ph(a), src) }
 }
@@ -12374,7 +12485,7 @@ pub fn _mm512_mask_cvtepu32_ph(src: __m256h, k: __mmask16, a: __m512i) -> __m256
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtudq2ph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_maskz_cvtepu32_ph(k: __mmask16, a: __m512i) -> __m256h {
     _mm512_mask_cvtepu32_ph(f16x16::ZERO.as_m256h(), k, a)
 }
@@ -12395,7 +12506,7 @@ pub fn _mm512_maskz_cvtepu32_ph(k: __mmask16, a: __m512i) -> __m256h {
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtudq2ph, ROUNDING = 8))]
 #[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_cvt_roundepu32_ph<const ROUNDING: i32>(a: __m512i) -> __m256h {
     unsafe {
         static_assert_rounding!(ROUNDING);
@@ -12420,7 +12531,7 @@ pub fn _mm512_cvt_roundepu32_ph<const ROUNDING: i32>(a: __m512i) -> __m256h {
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtudq2ph, ROUNDING = 8))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mask_cvt_roundepu32_ph<const ROUNDING: i32>(
     src: __m256h,
     k: __mmask16,
@@ -12448,7 +12559,7 @@ pub fn _mm512_mask_cvt_roundepu32_ph<const ROUNDING: i32>(
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtudq2ph, ROUNDING = 8))]
 #[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_maskz_cvt_roundepu32_ph<const ROUNDING: i32>(k: __mmask16, a: __m512i) -> __m256h {
     static_assert_rounding!(ROUNDING);
     _mm512_mask_cvt_roundepu32_ph::<ROUNDING>(f16x16::ZERO.as_m256h(), k, a)
@@ -12462,7 +12573,7 @@ pub fn _mm512_maskz_cvt_roundepu32_ph<const ROUNDING: i32>(k: __mmask16, a: __m5
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtusi2sh))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_cvtu32_sh(a: __m128h, b: u32) -> __m128h {
     unsafe { vcvtusi2sh(a, b, _MM_FROUND_CUR_DIRECTION) }
 }
@@ -12484,7 +12595,7 @@ pub fn _mm_cvtu32_sh(a: __m128h, b: u32) -> __m128h {
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtusi2sh, ROUNDING = 8))]
 #[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_cvt_roundu32_sh<const ROUNDING: i32>(a: __m128h, b: u32) -> __m128h {
     unsafe {
         static_assert_rounding!(ROUNDING);
@@ -12499,7 +12610,7 @@ pub fn _mm_cvt_roundu32_sh<const ROUNDING: i32>(a: __m128h, b: u32) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtqq2ph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_cvtepi64_ph(a: __m128i) -> __m128h {
     _mm_mask_cvtepi64_ph(_mm_setzero_ph(), 0xff, a)
 }
@@ -12512,7 +12623,7 @@ pub fn _mm_cvtepi64_ph(a: __m128i) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtqq2ph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask_cvtepi64_ph(src: __m128h, k: __mmask8, a: __m128i) -> __m128h {
     unsafe { vcvtqq2ph_128(a.as_i64x2(), src, k) }
 }
@@ -12525,7 +12636,7 @@ pub fn _mm_mask_cvtepi64_ph(src: __m128h, k: __mmask8, a: __m128i) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtqq2ph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_maskz_cvtepi64_ph(k: __mmask8, a: __m128i) -> __m128h {
     _mm_mask_cvtepi64_ph(_mm_setzero_ph(), k, a)
 }
@@ -12537,7 +12648,7 @@ pub fn _mm_maskz_cvtepi64_ph(k: __mmask8, a: __m128i) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtqq2ph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_cvtepi64_ph(a: __m256i) -> __m128h {
     _mm256_mask_cvtepi64_ph(_mm_setzero_ph(), 0xff, a)
 }
@@ -12550,7 +12661,7 @@ pub fn _mm256_cvtepi64_ph(a: __m256i) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtqq2ph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_mask_cvtepi64_ph(src: __m128h, k: __mmask8, a: __m256i) -> __m128h {
     unsafe { vcvtqq2ph_256(a.as_i64x4(), src, k) }
 }
@@ -12563,7 +12674,7 @@ pub fn _mm256_mask_cvtepi64_ph(src: __m128h, k: __mmask8, a: __m256i) -> __m128h
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtqq2ph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_maskz_cvtepi64_ph(k: __mmask8, a: __m256i) -> __m128h {
     _mm256_mask_cvtepi64_ph(_mm_setzero_ph(), k, a)
 }
@@ -12575,7 +12686,7 @@ pub fn _mm256_maskz_cvtepi64_ph(k: __mmask8, a: __m256i) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtqq2ph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_cvtepi64_ph(a: __m512i) -> __m128h {
     unsafe { vcvtqq2ph_512(a.as_i64x8(), _MM_FROUND_CUR_DIRECTION) }
 }
@@ -12588,7 +12699,7 @@ pub fn _mm512_cvtepi64_ph(a: __m512i) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtqq2ph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mask_cvtepi64_ph(src: __m128h, k: __mmask8, a: __m512i) -> __m128h {
     unsafe { simd_select_bitmask(k, _mm512_cvtepi64_ph(a), src) }
 }
@@ -12600,7 +12711,7 @@ pub fn _mm512_mask_cvtepi64_ph(src: __m128h, k: __mmask8, a: __m512i) -> __m128h
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtqq2ph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_maskz_cvtepi64_ph(k: __mmask8, a: __m512i) -> __m128h {
     _mm512_mask_cvtepi64_ph(f16x8::ZERO.as_m128h(), k, a)
 }
@@ -12621,7 +12732,7 @@ pub fn _mm512_maskz_cvtepi64_ph(k: __mmask8, a: __m512i) -> __m128h {
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtqq2ph, ROUNDING = 8))]
 #[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_cvt_roundepi64_ph<const ROUNDING: i32>(a: __m512i) -> __m128h {
     unsafe {
         static_assert_rounding!(ROUNDING);
@@ -12646,7 +12757,7 @@ pub fn _mm512_cvt_roundepi64_ph<const ROUNDING: i32>(a: __m512i) -> __m128h {
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtqq2ph, ROUNDING = 8))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mask_cvt_roundepi64_ph<const ROUNDING: i32>(
     src: __m128h,
     k: __mmask8,
@@ -12674,7 +12785,7 @@ pub fn _mm512_mask_cvt_roundepi64_ph<const ROUNDING: i32>(
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtqq2ph, ROUNDING = 8))]
 #[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_maskz_cvt_roundepi64_ph<const ROUNDING: i32>(k: __mmask8, a: __m512i) -> __m128h {
     static_assert_rounding!(ROUNDING);
     _mm512_mask_cvt_roundepi64_ph::<ROUNDING>(f16x8::ZERO.as_m128h(), k, a)
@@ -12687,7 +12798,7 @@ pub fn _mm512_maskz_cvt_roundepi64_ph<const ROUNDING: i32>(k: __mmask8, a: __m51
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtuqq2ph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_cvtepu64_ph(a: __m128i) -> __m128h {
     _mm_mask_cvtepu64_ph(_mm_setzero_ph(), 0xff, a)
 }
@@ -12700,7 +12811,7 @@ pub fn _mm_cvtepu64_ph(a: __m128i) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtuqq2ph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask_cvtepu64_ph(src: __m128h, k: __mmask8, a: __m128i) -> __m128h {
     unsafe { vcvtuqq2ph_128(a.as_u64x2(), src, k) }
 }
@@ -12713,7 +12824,7 @@ pub fn _mm_mask_cvtepu64_ph(src: __m128h, k: __mmask8, a: __m128i) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtuqq2ph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_maskz_cvtepu64_ph(k: __mmask8, a: __m128i) -> __m128h {
     _mm_mask_cvtepu64_ph(_mm_setzero_ph(), k, a)
 }
@@ -12725,7 +12836,7 @@ pub fn _mm_maskz_cvtepu64_ph(k: __mmask8, a: __m128i) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtuqq2ph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_cvtepu64_ph(a: __m256i) -> __m128h {
     _mm256_mask_cvtepu64_ph(_mm_setzero_ph(), 0xff, a)
 }
@@ -12738,7 +12849,7 @@ pub fn _mm256_cvtepu64_ph(a: __m256i) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtuqq2ph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_mask_cvtepu64_ph(src: __m128h, k: __mmask8, a: __m256i) -> __m128h {
     unsafe { vcvtuqq2ph_256(a.as_u64x4(), src, k) }
 }
@@ -12751,7 +12862,7 @@ pub fn _mm256_mask_cvtepu64_ph(src: __m128h, k: __mmask8, a: __m256i) -> __m128h
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtuqq2ph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_maskz_cvtepu64_ph(k: __mmask8, a: __m256i) -> __m128h {
     _mm256_mask_cvtepu64_ph(_mm_setzero_ph(), k, a)
 }
@@ -12763,7 +12874,7 @@ pub fn _mm256_maskz_cvtepu64_ph(k: __mmask8, a: __m256i) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtuqq2ph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_cvtepu64_ph(a: __m512i) -> __m128h {
     unsafe { vcvtuqq2ph_512(a.as_u64x8(), _MM_FROUND_CUR_DIRECTION) }
 }
@@ -12776,7 +12887,7 @@ pub fn _mm512_cvtepu64_ph(a: __m512i) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtuqq2ph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mask_cvtepu64_ph(src: __m128h, k: __mmask8, a: __m512i) -> __m128h {
     unsafe { simd_select_bitmask(k, _mm512_cvtepu64_ph(a), src) }
 }
@@ -12788,7 +12899,7 @@ pub fn _mm512_mask_cvtepu64_ph(src: __m128h, k: __mmask8, a: __m512i) -> __m128h
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtuqq2ph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_maskz_cvtepu64_ph(k: __mmask8, a: __m512i) -> __m128h {
     _mm512_mask_cvtepu64_ph(f16x8::ZERO.as_m128h(), k, a)
 }
@@ -12809,7 +12920,7 @@ pub fn _mm512_maskz_cvtepu64_ph(k: __mmask8, a: __m512i) -> __m128h {
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtuqq2ph, ROUNDING = 8))]
 #[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_cvt_roundepu64_ph<const ROUNDING: i32>(a: __m512i) -> __m128h {
     unsafe {
         static_assert_rounding!(ROUNDING);
@@ -12834,7 +12945,7 @@ pub fn _mm512_cvt_roundepu64_ph<const ROUNDING: i32>(a: __m512i) -> __m128h {
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtuqq2ph, ROUNDING = 8))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mask_cvt_roundepu64_ph<const ROUNDING: i32>(
     src: __m128h,
     k: __mmask8,
@@ -12862,7 +12973,7 @@ pub fn _mm512_mask_cvt_roundepu64_ph<const ROUNDING: i32>(
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtuqq2ph, ROUNDING = 8))]
 #[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_maskz_cvt_roundepu64_ph<const ROUNDING: i32>(k: __mmask8, a: __m512i) -> __m128h {
     static_assert_rounding!(ROUNDING);
     _mm512_mask_cvt_roundepu64_ph::<ROUNDING>(f16x8::ZERO.as_m128h(), k, a)
@@ -12875,7 +12986,7 @@ pub fn _mm512_maskz_cvt_roundepu64_ph<const ROUNDING: i32>(k: __mmask8, a: __m51
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtps2phx))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_cvtxps_ph(a: __m128) -> __m128h {
     _mm_mask_cvtxps_ph(_mm_setzero_ph(), 0xff, a)
 }
@@ -12888,7 +12999,7 @@ pub fn _mm_cvtxps_ph(a: __m128) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtps2phx))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask_cvtxps_ph(src: __m128h, k: __mmask8, a: __m128) -> __m128h {
     unsafe { vcvtps2phx_128(a, src, k) }
 }
@@ -12901,7 +13012,7 @@ pub fn _mm_mask_cvtxps_ph(src: __m128h, k: __mmask8, a: __m128) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtps2phx))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_maskz_cvtxps_ph(k: __mmask8, a: __m128) -> __m128h {
     _mm_mask_cvtxps_ph(_mm_setzero_ph(), k, a)
 }
@@ -12913,7 +13024,7 @@ pub fn _mm_maskz_cvtxps_ph(k: __mmask8, a: __m128) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtps2phx))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_cvtxps_ph(a: __m256) -> __m128h {
     _mm256_mask_cvtxps_ph(_mm_setzero_ph(), 0xff, a)
 }
@@ -12926,7 +13037,7 @@ pub fn _mm256_cvtxps_ph(a: __m256) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtps2phx))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_mask_cvtxps_ph(src: __m128h, k: __mmask8, a: __m256) -> __m128h {
     unsafe { vcvtps2phx_256(a, src, k) }
 }
@@ -12939,7 +13050,7 @@ pub fn _mm256_mask_cvtxps_ph(src: __m128h, k: __mmask8, a: __m256) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtps2phx))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_maskz_cvtxps_ph(k: __mmask8, a: __m256) -> __m128h {
     _mm256_mask_cvtxps_ph(_mm_setzero_ph(), k, a)
 }
@@ -12951,7 +13062,7 @@ pub fn _mm256_maskz_cvtxps_ph(k: __mmask8, a: __m256) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtps2phx))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_cvtxps_ph(a: __m512) -> __m256h {
     _mm512_mask_cvtxps_ph(f16x16::ZERO.as_m256h(), 0xffff, a)
 }
@@ -12964,7 +13075,7 @@ pub fn _mm512_cvtxps_ph(a: __m512) -> __m256h {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtps2phx))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mask_cvtxps_ph(src: __m256h, k: __mmask16, a: __m512) -> __m256h {
     unsafe { vcvtps2phx_512(a, src, k, _MM_FROUND_CUR_DIRECTION) }
 }
@@ -12977,7 +13088,7 @@ pub fn _mm512_mask_cvtxps_ph(src: __m256h, k: __mmask16, a: __m512) -> __m256h {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtps2phx))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_maskz_cvtxps_ph(k: __mmask16, a: __m512) -> __m256h {
     _mm512_mask_cvtxps_ph(f16x16::ZERO.as_m256h(), k, a)
 }
@@ -12998,7 +13109,7 @@ pub fn _mm512_maskz_cvtxps_ph(k: __mmask16, a: __m512) -> __m256h {
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtps2phx, ROUNDING = 8))]
 #[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_cvtx_roundps_ph<const ROUNDING: i32>(a: __m512) -> __m256h {
     static_assert_rounding!(ROUNDING);
     _mm512_mask_cvtx_roundps_ph::<ROUNDING>(f16x16::ZERO.as_m256h(), 0xffff, a)
@@ -13021,7 +13132,7 @@ pub fn _mm512_cvtx_roundps_ph<const ROUNDING: i32>(a: __m512) -> __m256h {
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtps2phx, ROUNDING = 8))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mask_cvtx_roundps_ph<const ROUNDING: i32>(
     src: __m256h,
     k: __mmask16,
@@ -13050,7 +13161,7 @@ pub fn _mm512_mask_cvtx_roundps_ph<const ROUNDING: i32>(
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtps2phx, ROUNDING = 8))]
 #[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_maskz_cvtx_roundps_ph<const ROUNDING: i32>(k: __mmask16, a: __m512) -> __m256h {
     static_assert_rounding!(ROUNDING);
     _mm512_mask_cvtx_roundps_ph::<ROUNDING>(f16x16::ZERO.as_m256h(), k, a)
@@ -13064,7 +13175,7 @@ pub fn _mm512_maskz_cvtx_roundps_ph<const ROUNDING: i32>(k: __mmask16, a: __m512
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtss2sh))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_cvtss_sh(a: __m128h, b: __m128) -> __m128h {
     _mm_mask_cvtss_sh(f16x8::ZERO.as_m128h(), 0xff, a, b)
 }
@@ -13078,7 +13189,7 @@ pub fn _mm_cvtss_sh(a: __m128h, b: __m128) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtss2sh))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask_cvtss_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128) -> __m128h {
     unsafe { vcvtss2sh(a, b, src, k, _MM_FROUND_CUR_DIRECTION) }
 }
@@ -13092,7 +13203,7 @@ pub fn _mm_mask_cvtss_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128) -> __
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtss2sh))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_maskz_cvtss_sh(k: __mmask8, a: __m128h, b: __m128) -> __m128h {
     _mm_mask_cvtss_sh(f16x8::ZERO.as_m128h(), k, a, b)
 }
@@ -13114,7 +13225,7 @@ pub fn _mm_maskz_cvtss_sh(k: __mmask8, a: __m128h, b: __m128) -> __m128h {
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtss2sh, ROUNDING = 8))]
 #[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_cvt_roundss_sh<const ROUNDING: i32>(a: __m128h, b: __m128) -> __m128h {
     static_assert_rounding!(ROUNDING);
     _mm_mask_cvt_roundss_sh::<ROUNDING>(f16x8::ZERO.as_m128h(), 0xff, a, b)
@@ -13138,7 +13249,7 @@ pub fn _mm_cvt_roundss_sh<const ROUNDING: i32>(a: __m128h, b: __m128) -> __m128h
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtss2sh, ROUNDING = 8))]
 #[rustc_legacy_const_generics(4)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask_cvt_roundss_sh<const ROUNDING: i32>(
     src: __m128h,
     k: __mmask8,
@@ -13169,7 +13280,7 @@ pub fn _mm_mask_cvt_roundss_sh<const ROUNDING: i32>(
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtss2sh, ROUNDING = 8))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_maskz_cvt_roundss_sh<const ROUNDING: i32>(
     k: __mmask8,
     a: __m128h,
@@ -13186,7 +13297,7 @@ pub fn _mm_maskz_cvt_roundss_sh<const ROUNDING: i32>(
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtpd2ph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_cvtpd_ph(a: __m128d) -> __m128h {
     _mm_mask_cvtpd_ph(_mm_setzero_ph(), 0xff, a)
 }
@@ -13199,7 +13310,7 @@ pub fn _mm_cvtpd_ph(a: __m128d) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtpd2ph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask_cvtpd_ph(src: __m128h, k: __mmask8, a: __m128d) -> __m128h {
     unsafe { vcvtpd2ph_128(a, src, k) }
 }
@@ -13212,7 +13323,7 @@ pub fn _mm_mask_cvtpd_ph(src: __m128h, k: __mmask8, a: __m128d) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtpd2ph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_maskz_cvtpd_ph(k: __mmask8, a: __m128d) -> __m128h {
     _mm_mask_cvtpd_ph(_mm_setzero_ph(), k, a)
 }
@@ -13224,7 +13335,7 @@ pub fn _mm_maskz_cvtpd_ph(k: __mmask8, a: __m128d) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtpd2ph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_cvtpd_ph(a: __m256d) -> __m128h {
     _mm256_mask_cvtpd_ph(_mm_setzero_ph(), 0xff, a)
 }
@@ -13237,7 +13348,7 @@ pub fn _mm256_cvtpd_ph(a: __m256d) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtpd2ph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_mask_cvtpd_ph(src: __m128h, k: __mmask8, a: __m256d) -> __m128h {
     unsafe { vcvtpd2ph_256(a, src, k) }
 }
@@ -13250,7 +13361,7 @@ pub fn _mm256_mask_cvtpd_ph(src: __m128h, k: __mmask8, a: __m256d) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtpd2ph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_maskz_cvtpd_ph(k: __mmask8, a: __m256d) -> __m128h {
     _mm256_mask_cvtpd_ph(_mm_setzero_ph(), k, a)
 }
@@ -13262,7 +13373,7 @@ pub fn _mm256_maskz_cvtpd_ph(k: __mmask8, a: __m256d) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtpd2ph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_cvtpd_ph(a: __m512d) -> __m128h {
     _mm512_mask_cvtpd_ph(f16x8::ZERO.as_m128h(), 0xff, a)
 }
@@ -13275,7 +13386,7 @@ pub fn _mm512_cvtpd_ph(a: __m512d) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtpd2ph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mask_cvtpd_ph(src: __m128h, k: __mmask8, a: __m512d) -> __m128h {
     unsafe { vcvtpd2ph_512(a, src, k, _MM_FROUND_CUR_DIRECTION) }
 }
@@ -13288,7 +13399,7 @@ pub fn _mm512_mask_cvtpd_ph(src: __m128h, k: __mmask8, a: __m512d) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtpd2ph))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_maskz_cvtpd_ph(k: __mmask8, a: __m512d) -> __m128h {
     _mm512_mask_cvtpd_ph(f16x8::ZERO.as_m128h(), k, a)
 }
@@ -13309,7 +13420,7 @@ pub fn _mm512_maskz_cvtpd_ph(k: __mmask8, a: __m512d) -> __m128h {
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtpd2ph, ROUNDING = 8))]
 #[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_cvt_roundpd_ph<const ROUNDING: i32>(a: __m512d) -> __m128h {
     static_assert_rounding!(ROUNDING);
     _mm512_mask_cvt_roundpd_ph::<ROUNDING>(f16x8::ZERO.as_m128h(), 0xff, a)
@@ -13332,7 +13443,7 @@ pub fn _mm512_cvt_roundpd_ph<const ROUNDING: i32>(a: __m512d) -> __m128h {
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtpd2ph, ROUNDING = 8))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mask_cvt_roundpd_ph<const ROUNDING: i32>(
     src: __m128h,
     k: __mmask8,
@@ -13361,7 +13472,7 @@ pub fn _mm512_mask_cvt_roundpd_ph<const ROUNDING: i32>(
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtpd2ph, ROUNDING = 8))]
 #[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_maskz_cvt_roundpd_ph<const ROUNDING: i32>(k: __mmask8, a: __m512d) -> __m128h {
     static_assert_rounding!(ROUNDING);
     _mm512_mask_cvt_roundpd_ph::<ROUNDING>(f16x8::ZERO.as_m128h(), k, a)
@@ -13375,7 +13486,7 @@ pub fn _mm512_maskz_cvt_roundpd_ph<const ROUNDING: i32>(k: __mmask8, a: __m512d)
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtsd2sh))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_cvtsd_sh(a: __m128h, b: __m128d) -> __m128h {
     _mm_mask_cvtsd_sh(f16x8::ZERO.as_m128h(), 0xff, a, b)
 }
@@ -13389,7 +13500,7 @@ pub fn _mm_cvtsd_sh(a: __m128h, b: __m128d) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtsd2sh))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask_cvtsd_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128d) -> __m128h {
     unsafe { vcvtsd2sh(a, b, src, k, _MM_FROUND_CUR_DIRECTION) }
 }
@@ -13403,7 +13514,7 @@ pub fn _mm_mask_cvtsd_sh(src: __m128h, k: __mmask8, a: __m128h, b: __m128d) -> _
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtsd2sh))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_maskz_cvtsd_sh(k: __mmask8, a: __m128h, b: __m128d) -> __m128h {
     _mm_mask_cvtsd_sh(f16x8::ZERO.as_m128h(), k, a, b)
 }
@@ -13425,7 +13536,7 @@ pub fn _mm_maskz_cvtsd_sh(k: __mmask8, a: __m128h, b: __m128d) -> __m128h {
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtsd2sh, ROUNDING = 8))]
 #[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_cvt_roundsd_sh<const ROUNDING: i32>(a: __m128h, b: __m128d) -> __m128h {
     static_assert_rounding!(ROUNDING);
     _mm_mask_cvt_roundsd_sh::<ROUNDING>(f16x8::ZERO.as_m128h(), 0xff, a, b)
@@ -13449,7 +13560,7 @@ pub fn _mm_cvt_roundsd_sh<const ROUNDING: i32>(a: __m128h, b: __m128d) -> __m128
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtsd2sh, ROUNDING = 8))]
 #[rustc_legacy_const_generics(4)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask_cvt_roundsd_sh<const ROUNDING: i32>(
     src: __m128h,
     k: __mmask8,
@@ -13480,7 +13591,7 @@ pub fn _mm_mask_cvt_roundsd_sh<const ROUNDING: i32>(
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtsd2sh, ROUNDING = 8))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_maskz_cvt_roundsd_sh<const ROUNDING: i32>(
     k: __mmask8,
     a: __m128h,
@@ -13497,7 +13608,7 @@ pub fn _mm_maskz_cvt_roundsd_sh<const ROUNDING: i32>(
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtph2w))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_cvtph_epi16(a: __m128h) -> __m128i {
     _mm_mask_cvtph_epi16(_mm_undefined_si128(), 0xff, a)
 }
@@ -13510,7 +13621,7 @@ pub fn _mm_cvtph_epi16(a: __m128h) -> __m128i {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtph2w))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask_cvtph_epi16(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
     unsafe { transmute(vcvtph2w_128(a, src.as_i16x8(), k)) }
 }
@@ -13522,7 +13633,7 @@ pub fn _mm_mask_cvtph_epi16(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtph2w))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_maskz_cvtph_epi16(k: __mmask8, a: __m128h) -> __m128i {
     _mm_mask_cvtph_epi16(_mm_setzero_si128(), k, a)
 }
@@ -13534,7 +13645,7 @@ pub fn _mm_maskz_cvtph_epi16(k: __mmask8, a: __m128h) -> __m128i {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtph2w))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_cvtph_epi16(a: __m256h) -> __m256i {
     _mm256_mask_cvtph_epi16(_mm256_undefined_si256(), 0xffff, a)
 }
@@ -13547,7 +13658,7 @@ pub fn _mm256_cvtph_epi16(a: __m256h) -> __m256i {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtph2w))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_mask_cvtph_epi16(src: __m256i, k: __mmask16, a: __m256h) -> __m256i {
     unsafe { transmute(vcvtph2w_256(a, src.as_i16x16(), k)) }
 }
@@ -13559,7 +13670,7 @@ pub fn _mm256_mask_cvtph_epi16(src: __m256i, k: __mmask16, a: __m256h) -> __m256
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtph2w))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_maskz_cvtph_epi16(k: __mmask16, a: __m256h) -> __m256i {
     _mm256_mask_cvtph_epi16(_mm256_setzero_si256(), k, a)
 }
@@ -13571,7 +13682,7 @@ pub fn _mm256_maskz_cvtph_epi16(k: __mmask16, a: __m256h) -> __m256i {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtph2w))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_cvtph_epi16(a: __m512h) -> __m512i {
     _mm512_mask_cvtph_epi16(_mm512_undefined_epi32(), 0xffffffff, a)
 }
@@ -13584,7 +13695,7 @@ pub fn _mm512_cvtph_epi16(a: __m512h) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtph2w))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mask_cvtph_epi16(src: __m512i, k: __mmask32, a: __m512h) -> __m512i {
     unsafe {
         transmute(vcvtph2w_512(
@@ -13603,7 +13714,7 @@ pub fn _mm512_mask_cvtph_epi16(src: __m512i, k: __mmask32, a: __m512h) -> __m512
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtph2w))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_maskz_cvtph_epi16(k: __mmask32, a: __m512h) -> __m512i {
     _mm512_mask_cvtph_epi16(_mm512_setzero_si512(), k, a)
 }
@@ -13624,7 +13735,7 @@ pub fn _mm512_maskz_cvtph_epi16(k: __mmask32, a: __m512h) -> __m512i {
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtph2w, ROUNDING = 8))]
 #[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_cvt_roundph_epi16<const ROUNDING: i32>(a: __m512h) -> __m512i {
     static_assert_rounding!(ROUNDING);
     _mm512_mask_cvt_roundph_epi16::<ROUNDING>(_mm512_undefined_epi32(), 0xffffffff, a)
@@ -13647,7 +13758,7 @@ pub fn _mm512_cvt_roundph_epi16<const ROUNDING: i32>(a: __m512h) -> __m512i {
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtph2w, ROUNDING = 8))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mask_cvt_roundph_epi16<const ROUNDING: i32>(
     src: __m512i,
     k: __mmask32,
@@ -13675,7 +13786,7 @@ pub fn _mm512_mask_cvt_roundph_epi16<const ROUNDING: i32>(
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtph2w, ROUNDING = 8))]
 #[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_maskz_cvt_roundph_epi16<const ROUNDING: i32>(k: __mmask32, a: __m512h) -> __m512i {
     static_assert_rounding!(ROUNDING);
     _mm512_mask_cvt_roundph_epi16::<ROUNDING>(_mm512_setzero_si512(), k, a)
@@ -13688,7 +13799,7 @@ pub fn _mm512_maskz_cvt_roundph_epi16<const ROUNDING: i32>(k: __mmask32, a: __m5
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtph2uw))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_cvtph_epu16(a: __m128h) -> __m128i {
     _mm_mask_cvtph_epu16(_mm_undefined_si128(), 0xff, a)
 }
@@ -13701,7 +13812,7 @@ pub fn _mm_cvtph_epu16(a: __m128h) -> __m128i {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtph2uw))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask_cvtph_epu16(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
     unsafe { transmute(vcvtph2uw_128(a, src.as_u16x8(), k)) }
 }
@@ -13713,7 +13824,7 @@ pub fn _mm_mask_cvtph_epu16(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtph2uw))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_maskz_cvtph_epu16(k: __mmask8, a: __m128h) -> __m128i {
     _mm_mask_cvtph_epu16(_mm_setzero_si128(), k, a)
 }
@@ -13725,7 +13836,7 @@ pub fn _mm_maskz_cvtph_epu16(k: __mmask8, a: __m128h) -> __m128i {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtph2uw))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_cvtph_epu16(a: __m256h) -> __m256i {
     _mm256_mask_cvtph_epu16(_mm256_undefined_si256(), 0xffff, a)
 }
@@ -13738,7 +13849,7 @@ pub fn _mm256_cvtph_epu16(a: __m256h) -> __m256i {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtph2uw))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_mask_cvtph_epu16(src: __m256i, k: __mmask16, a: __m256h) -> __m256i {
     unsafe { transmute(vcvtph2uw_256(a, src.as_u16x16(), k)) }
 }
@@ -13750,7 +13861,7 @@ pub fn _mm256_mask_cvtph_epu16(src: __m256i, k: __mmask16, a: __m256h) -> __m256
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtph2uw))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_maskz_cvtph_epu16(k: __mmask16, a: __m256h) -> __m256i {
     _mm256_mask_cvtph_epu16(_mm256_setzero_si256(), k, a)
 }
@@ -13762,7 +13873,7 @@ pub fn _mm256_maskz_cvtph_epu16(k: __mmask16, a: __m256h) -> __m256i {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtph2uw))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_cvtph_epu16(a: __m512h) -> __m512i {
     _mm512_mask_cvtph_epu16(_mm512_undefined_epi32(), 0xffffffff, a)
 }
@@ -13775,7 +13886,7 @@ pub fn _mm512_cvtph_epu16(a: __m512h) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtph2uw))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mask_cvtph_epu16(src: __m512i, k: __mmask32, a: __m512h) -> __m512i {
     unsafe {
         transmute(vcvtph2uw_512(
@@ -13794,7 +13905,7 @@ pub fn _mm512_mask_cvtph_epu16(src: __m512i, k: __mmask32, a: __m512h) -> __m512
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtph2uw))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_maskz_cvtph_epu16(k: __mmask32, a: __m512h) -> __m512i {
     _mm512_mask_cvtph_epu16(_mm512_setzero_si512(), k, a)
 }
@@ -13809,7 +13920,7 @@ pub fn _mm512_maskz_cvtph_epu16(k: __mmask32, a: __m512h) -> __m512i {
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtph2uw, SAE = 8))]
 #[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_cvt_roundph_epu16<const SAE: i32>(a: __m512h) -> __m512i {
     static_assert_sae!(SAE);
     _mm512_mask_cvt_roundph_epu16::<SAE>(_mm512_undefined_epi32(), 0xffffffff, a)
@@ -13826,7 +13937,7 @@ pub fn _mm512_cvt_roundph_epu16<const SAE: i32>(a: __m512h) -> __m512i {
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtph2uw, SAE = 8))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mask_cvt_roundph_epu16<const SAE: i32>(
     src: __m512i,
     k: __mmask32,
@@ -13848,7 +13959,7 @@ pub fn _mm512_mask_cvt_roundph_epu16<const SAE: i32>(
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtph2uw, SAE = 8))]
 #[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_maskz_cvt_roundph_epu16<const SAE: i32>(k: __mmask32, a: __m512h) -> __m512i {
     static_assert_sae!(SAE);
     _mm512_mask_cvt_roundph_epu16::<SAE>(_mm512_setzero_si512(), k, a)
@@ -13861,7 +13972,7 @@ pub fn _mm512_maskz_cvt_roundph_epu16<const SAE: i32>(k: __mmask32, a: __m512h)
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvttph2w))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_cvttph_epi16(a: __m128h) -> __m128i {
     _mm_mask_cvttph_epi16(_mm_undefined_si128(), 0xff, a)
 }
@@ -13874,7 +13985,7 @@ pub fn _mm_cvttph_epi16(a: __m128h) -> __m128i {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvttph2w))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask_cvttph_epi16(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
     unsafe { transmute(vcvttph2w_128(a, src.as_i16x8(), k)) }
 }
@@ -13887,7 +13998,7 @@ pub fn _mm_mask_cvttph_epi16(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvttph2w))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_maskz_cvttph_epi16(k: __mmask8, a: __m128h) -> __m128i {
     _mm_mask_cvttph_epi16(_mm_setzero_si128(), k, a)
 }
@@ -13899,7 +14010,7 @@ pub fn _mm_maskz_cvttph_epi16(k: __mmask8, a: __m128h) -> __m128i {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvttph2w))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_cvttph_epi16(a: __m256h) -> __m256i {
     _mm256_mask_cvttph_epi16(_mm256_undefined_si256(), 0xffff, a)
 }
@@ -13912,7 +14023,7 @@ pub fn _mm256_cvttph_epi16(a: __m256h) -> __m256i {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvttph2w))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_mask_cvttph_epi16(src: __m256i, k: __mmask16, a: __m256h) -> __m256i {
     unsafe { transmute(vcvttph2w_256(a, src.as_i16x16(), k)) }
 }
@@ -13925,7 +14036,7 @@ pub fn _mm256_mask_cvttph_epi16(src: __m256i, k: __mmask16, a: __m256h) -> __m25
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvttph2w))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_maskz_cvttph_epi16(k: __mmask16, a: __m256h) -> __m256i {
     _mm256_mask_cvttph_epi16(_mm256_setzero_si256(), k, a)
 }
@@ -13937,7 +14048,7 @@ pub fn _mm256_maskz_cvttph_epi16(k: __mmask16, a: __m256h) -> __m256i {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvttph2w))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_cvttph_epi16(a: __m512h) -> __m512i {
     _mm512_mask_cvttph_epi16(_mm512_undefined_epi32(), 0xffffffff, a)
 }
@@ -13950,7 +14061,7 @@ pub fn _mm512_cvttph_epi16(a: __m512h) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvttph2w))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mask_cvttph_epi16(src: __m512i, k: __mmask32, a: __m512h) -> __m512i {
     unsafe {
         transmute(vcvttph2w_512(
@@ -13970,7 +14081,7 @@ pub fn _mm512_mask_cvttph_epi16(src: __m512i, k: __mmask32, a: __m512h) -> __m51
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvttph2w))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_maskz_cvttph_epi16(k: __mmask32, a: __m512h) -> __m512i {
     _mm512_mask_cvttph_epi16(_mm512_setzero_si512(), k, a)
 }
@@ -13985,7 +14096,7 @@ pub fn _mm512_maskz_cvttph_epi16(k: __mmask32, a: __m512h) -> __m512i {
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvttph2w, SAE = 8))]
 #[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_cvtt_roundph_epi16<const SAE: i32>(a: __m512h) -> __m512i {
     static_assert_sae!(SAE);
     _mm512_mask_cvtt_roundph_epi16::<SAE>(_mm512_undefined_epi32(), 0xffffffff, a)
@@ -14002,7 +14113,7 @@ pub fn _mm512_cvtt_roundph_epi16<const SAE: i32>(a: __m512h) -> __m512i {
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvttph2w, SAE = 8))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mask_cvtt_roundph_epi16<const SAE: i32>(
     src: __m512i,
     k: __mmask32,
@@ -14025,7 +14136,7 @@ pub fn _mm512_mask_cvtt_roundph_epi16<const SAE: i32>(
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvttph2w, SAE = 8))]
 #[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_maskz_cvtt_roundph_epi16<const SAE: i32>(k: __mmask32, a: __m512h) -> __m512i {
     static_assert_sae!(SAE);
     _mm512_mask_cvtt_roundph_epi16::<SAE>(_mm512_setzero_si512(), k, a)
@@ -14038,7 +14149,7 @@ pub fn _mm512_maskz_cvtt_roundph_epi16<const SAE: i32>(k: __mmask32, a: __m512h)
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvttph2uw))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_cvttph_epu16(a: __m128h) -> __m128i {
     _mm_mask_cvttph_epu16(_mm_undefined_si128(), 0xff, a)
 }
@@ -14051,7 +14162,7 @@ pub fn _mm_cvttph_epu16(a: __m128h) -> __m128i {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvttph2uw))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask_cvttph_epu16(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
     unsafe { transmute(vcvttph2uw_128(a, src.as_u16x8(), k)) }
 }
@@ -14064,7 +14175,7 @@ pub fn _mm_mask_cvttph_epu16(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvttph2uw))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_maskz_cvttph_epu16(k: __mmask8, a: __m128h) -> __m128i {
     _mm_mask_cvttph_epu16(_mm_setzero_si128(), k, a)
 }
@@ -14076,7 +14187,7 @@ pub fn _mm_maskz_cvttph_epu16(k: __mmask8, a: __m128h) -> __m128i {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvttph2uw))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_cvttph_epu16(a: __m256h) -> __m256i {
     _mm256_mask_cvttph_epu16(_mm256_undefined_si256(), 0xffff, a)
 }
@@ -14089,7 +14200,7 @@ pub fn _mm256_cvttph_epu16(a: __m256h) -> __m256i {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvttph2uw))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_mask_cvttph_epu16(src: __m256i, k: __mmask16, a: __m256h) -> __m256i {
     unsafe { transmute(vcvttph2uw_256(a, src.as_u16x16(), k)) }
 }
@@ -14102,7 +14213,7 @@ pub fn _mm256_mask_cvttph_epu16(src: __m256i, k: __mmask16, a: __m256h) -> __m25
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvttph2uw))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_maskz_cvttph_epu16(k: __mmask16, a: __m256h) -> __m256i {
     _mm256_mask_cvttph_epu16(_mm256_setzero_si256(), k, a)
 }
@@ -14114,7 +14225,7 @@ pub fn _mm256_maskz_cvttph_epu16(k: __mmask16, a: __m256h) -> __m256i {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvttph2uw))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_cvttph_epu16(a: __m512h) -> __m512i {
     _mm512_mask_cvttph_epu16(_mm512_undefined_epi32(), 0xffffffff, a)
 }
@@ -14127,7 +14238,7 @@ pub fn _mm512_cvttph_epu16(a: __m512h) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvttph2uw))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mask_cvttph_epu16(src: __m512i, k: __mmask32, a: __m512h) -> __m512i {
     unsafe {
         transmute(vcvttph2uw_512(
@@ -14147,7 +14258,7 @@ pub fn _mm512_mask_cvttph_epu16(src: __m512i, k: __mmask32, a: __m512h) -> __m51
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvttph2uw))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_maskz_cvttph_epu16(k: __mmask32, a: __m512h) -> __m512i {
     _mm512_mask_cvttph_epu16(_mm512_setzero_si512(), k, a)
 }
@@ -14162,7 +14273,7 @@ pub fn _mm512_maskz_cvttph_epu16(k: __mmask32, a: __m512h) -> __m512i {
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvttph2uw, SAE = 8))]
 #[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_cvtt_roundph_epu16<const SAE: i32>(a: __m512h) -> __m512i {
     static_assert_sae!(SAE);
     _mm512_mask_cvtt_roundph_epu16::<SAE>(_mm512_undefined_epi32(), 0xffffffff, a)
@@ -14179,7 +14290,7 @@ pub fn _mm512_cvtt_roundph_epu16<const SAE: i32>(a: __m512h) -> __m512i {
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvttph2uw, SAE = 8))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mask_cvtt_roundph_epu16<const SAE: i32>(
     src: __m512i,
     k: __mmask32,
@@ -14202,7 +14313,7 @@ pub fn _mm512_mask_cvtt_roundph_epu16<const SAE: i32>(
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvttph2uw, SAE = 8))]
 #[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_maskz_cvtt_roundph_epu16<const SAE: i32>(k: __mmask32, a: __m512h) -> __m512i {
     static_assert_sae!(SAE);
     _mm512_mask_cvtt_roundph_epu16::<SAE>(_mm512_setzero_si512(), k, a)
@@ -14215,7 +14326,7 @@ pub fn _mm512_maskz_cvtt_roundph_epu16<const SAE: i32>(k: __mmask32, a: __m512h)
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtph2dq))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_cvtph_epi32(a: __m128h) -> __m128i {
     _mm_mask_cvtph_epi32(_mm_undefined_si128(), 0xff, a)
 }
@@ -14227,7 +14338,7 @@ pub fn _mm_cvtph_epi32(a: __m128h) -> __m128i {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtph2dq))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask_cvtph_epi32(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
     unsafe { transmute(vcvtph2dq_128(a, src.as_i32x4(), k)) }
 }
@@ -14239,7 +14350,7 @@ pub fn _mm_mask_cvtph_epi32(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtph2dq))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_maskz_cvtph_epi32(k: __mmask8, a: __m128h) -> __m128i {
     _mm_mask_cvtph_epi32(_mm_setzero_si128(), k, a)
 }
@@ -14251,7 +14362,7 @@ pub fn _mm_maskz_cvtph_epi32(k: __mmask8, a: __m128h) -> __m128i {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtph2dq))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_cvtph_epi32(a: __m128h) -> __m256i {
     _mm256_mask_cvtph_epi32(_mm256_undefined_si256(), 0xff, a)
 }
@@ -14263,7 +14374,7 @@ pub fn _mm256_cvtph_epi32(a: __m128h) -> __m256i {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtph2dq))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_mask_cvtph_epi32(src: __m256i, k: __mmask8, a: __m128h) -> __m256i {
     unsafe { transmute(vcvtph2dq_256(a, src.as_i32x8(), k)) }
 }
@@ -14275,7 +14386,7 @@ pub fn _mm256_mask_cvtph_epi32(src: __m256i, k: __mmask8, a: __m128h) -> __m256i
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtph2dq))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_maskz_cvtph_epi32(k: __mmask8, a: __m128h) -> __m256i {
     _mm256_mask_cvtph_epi32(_mm256_setzero_si256(), k, a)
 }
@@ -14287,7 +14398,7 @@ pub fn _mm256_maskz_cvtph_epi32(k: __mmask8, a: __m128h) -> __m256i {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtph2dq))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_cvtph_epi32(a: __m256h) -> __m512i {
     _mm512_mask_cvtph_epi32(_mm512_undefined_epi32(), 0xffff, a)
 }
@@ -14299,7 +14410,7 @@ pub fn _mm512_cvtph_epi32(a: __m256h) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtph2dq))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mask_cvtph_epi32(src: __m512i, k: __mmask16, a: __m256h) -> __m512i {
     unsafe {
         transmute(vcvtph2dq_512(
@@ -14318,7 +14429,7 @@ pub fn _mm512_mask_cvtph_epi32(src: __m512i, k: __mmask16, a: __m256h) -> __m512
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtph2dq))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_maskz_cvtph_epi32(k: __mmask16, a: __m256h) -> __m512i {
     _mm512_mask_cvtph_epi32(_mm512_setzero_si512(), k, a)
 }
@@ -14339,7 +14450,7 @@ pub fn _mm512_maskz_cvtph_epi32(k: __mmask16, a: __m256h) -> __m512i {
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtph2dq, ROUNDING = 8))]
 #[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_cvt_roundph_epi32<const ROUNDING: i32>(a: __m256h) -> __m512i {
     static_assert_rounding!(ROUNDING);
     _mm512_mask_cvt_roundph_epi32::<ROUNDING>(_mm512_undefined_epi32(), 0xffff, a)
@@ -14361,7 +14472,7 @@ pub fn _mm512_cvt_roundph_epi32<const ROUNDING: i32>(a: __m256h) -> __m512i {
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtph2dq, ROUNDING = 8))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mask_cvt_roundph_epi32<const ROUNDING: i32>(
     src: __m512i,
     k: __mmask16,
@@ -14389,7 +14500,7 @@ pub fn _mm512_mask_cvt_roundph_epi32<const ROUNDING: i32>(
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtph2dq, ROUNDING = 8))]
 #[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_maskz_cvt_roundph_epi32<const ROUNDING: i32>(k: __mmask16, a: __m256h) -> __m512i {
     static_assert_rounding!(ROUNDING);
     _mm512_mask_cvt_roundph_epi32::<ROUNDING>(_mm512_setzero_si512(), k, a)
@@ -14402,7 +14513,7 @@ pub fn _mm512_maskz_cvt_roundph_epi32<const ROUNDING: i32>(k: __mmask16, a: __m2
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtsh2si))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_cvtsh_i32(a: __m128h) -> i32 {
     unsafe { vcvtsh2si32(a, _MM_FROUND_CUR_DIRECTION) }
 }
@@ -14423,7 +14534,7 @@ pub fn _mm_cvtsh_i32(a: __m128h) -> i32 {
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtsh2si, ROUNDING = 8))]
 #[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_cvt_roundsh_i32<const ROUNDING: i32>(a: __m128h) -> i32 {
     unsafe {
         static_assert_rounding!(ROUNDING);
@@ -14438,7 +14549,7 @@ pub fn _mm_cvt_roundsh_i32<const ROUNDING: i32>(a: __m128h) -> i32 {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtph2udq))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_cvtph_epu32(a: __m128h) -> __m128i {
     _mm_mask_cvtph_epu32(_mm_undefined_si128(), 0xff, a)
 }
@@ -14450,7 +14561,7 @@ pub fn _mm_cvtph_epu32(a: __m128h) -> __m128i {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtph2udq))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask_cvtph_epu32(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
     unsafe { transmute(vcvtph2udq_128(a, src.as_u32x4(), k)) }
 }
@@ -14462,7 +14573,7 @@ pub fn _mm_mask_cvtph_epu32(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtph2udq))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_maskz_cvtph_epu32(k: __mmask8, a: __m128h) -> __m128i {
     _mm_mask_cvtph_epu32(_mm_setzero_si128(), k, a)
 }
@@ -14474,7 +14585,7 @@ pub fn _mm_maskz_cvtph_epu32(k: __mmask8, a: __m128h) -> __m128i {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtph2udq))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_cvtph_epu32(a: __m128h) -> __m256i {
     _mm256_mask_cvtph_epu32(_mm256_undefined_si256(), 0xff, a)
 }
@@ -14486,7 +14597,7 @@ pub fn _mm256_cvtph_epu32(a: __m128h) -> __m256i {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtph2udq))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_mask_cvtph_epu32(src: __m256i, k: __mmask8, a: __m128h) -> __m256i {
     unsafe { transmute(vcvtph2udq_256(a, src.as_u32x8(), k)) }
 }
@@ -14498,7 +14609,7 @@ pub fn _mm256_mask_cvtph_epu32(src: __m256i, k: __mmask8, a: __m128h) -> __m256i
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtph2udq))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_maskz_cvtph_epu32(k: __mmask8, a: __m128h) -> __m256i {
     _mm256_mask_cvtph_epu32(_mm256_setzero_si256(), k, a)
 }
@@ -14510,7 +14621,7 @@ pub fn _mm256_maskz_cvtph_epu32(k: __mmask8, a: __m128h) -> __m256i {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtph2udq))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_cvtph_epu32(a: __m256h) -> __m512i {
     _mm512_mask_cvtph_epu32(_mm512_undefined_epi32(), 0xffff, a)
 }
@@ -14522,7 +14633,7 @@ pub fn _mm512_cvtph_epu32(a: __m256h) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtph2udq))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mask_cvtph_epu32(src: __m512i, k: __mmask16, a: __m256h) -> __m512i {
     unsafe {
         transmute(vcvtph2udq_512(
@@ -14541,7 +14652,7 @@ pub fn _mm512_mask_cvtph_epu32(src: __m512i, k: __mmask16, a: __m256h) -> __m512
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtph2udq))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_maskz_cvtph_epu32(k: __mmask16, a: __m256h) -> __m512i {
     _mm512_mask_cvtph_epu32(_mm512_setzero_si512(), k, a)
 }
@@ -14562,7 +14673,7 @@ pub fn _mm512_maskz_cvtph_epu32(k: __mmask16, a: __m256h) -> __m512i {
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtph2udq, ROUNDING = 8))]
 #[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_cvt_roundph_epu32<const ROUNDING: i32>(a: __m256h) -> __m512i {
     static_assert_rounding!(ROUNDING);
     _mm512_mask_cvt_roundph_epu32::<ROUNDING>(_mm512_undefined_epi32(), 0xffff, a)
@@ -14584,7 +14695,7 @@ pub fn _mm512_cvt_roundph_epu32<const ROUNDING: i32>(a: __m256h) -> __m512i {
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtph2udq, ROUNDING = 8))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mask_cvt_roundph_epu32<const ROUNDING: i32>(
     src: __m512i,
     k: __mmask16,
@@ -14612,7 +14723,7 @@ pub fn _mm512_mask_cvt_roundph_epu32<const ROUNDING: i32>(
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtph2udq, ROUNDING = 8))]
 #[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_maskz_cvt_roundph_epu32<const ROUNDING: i32>(k: __mmask16, a: __m256h) -> __m512i {
     static_assert_rounding!(ROUNDING);
     _mm512_mask_cvt_roundph_epu32::<ROUNDING>(_mm512_setzero_si512(), k, a)
@@ -14625,7 +14736,7 @@ pub fn _mm512_maskz_cvt_roundph_epu32<const ROUNDING: i32>(k: __mmask16, a: __m2
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtsh2usi))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_cvtsh_u32(a: __m128h) -> u32 {
     unsafe { vcvtsh2usi32(a, _MM_FROUND_CUR_DIRECTION) }
 }
@@ -14640,7 +14751,7 @@ pub fn _mm_cvtsh_u32(a: __m128h) -> u32 {
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtsh2usi, SAE = 8))]
 #[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_cvt_roundsh_u32<const SAE: i32>(a: __m128h) -> u32 {
     unsafe {
         static_assert_rounding!(SAE);
@@ -14655,7 +14766,7 @@ pub fn _mm_cvt_roundsh_u32<const SAE: i32>(a: __m128h) -> u32 {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvttph2dq))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_cvttph_epi32(a: __m128h) -> __m128i {
     _mm_mask_cvttph_epi32(_mm_undefined_si128(), 0xff, a)
 }
@@ -14667,7 +14778,7 @@ pub fn _mm_cvttph_epi32(a: __m128h) -> __m128i {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvttph2dq))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask_cvttph_epi32(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
     unsafe { transmute(vcvttph2dq_128(a, src.as_i32x4(), k)) }
 }
@@ -14679,7 +14790,7 @@ pub fn _mm_mask_cvttph_epi32(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvttph2dq))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_maskz_cvttph_epi32(k: __mmask8, a: __m128h) -> __m128i {
     _mm_mask_cvttph_epi32(_mm_setzero_si128(), k, a)
 }
@@ -14691,7 +14802,7 @@ pub fn _mm_maskz_cvttph_epi32(k: __mmask8, a: __m128h) -> __m128i {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvttph2dq))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_cvttph_epi32(a: __m128h) -> __m256i {
     _mm256_mask_cvttph_epi32(_mm256_undefined_si256(), 0xff, a)
 }
@@ -14703,7 +14814,7 @@ pub fn _mm256_cvttph_epi32(a: __m128h) -> __m256i {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvttph2dq))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_mask_cvttph_epi32(src: __m256i, k: __mmask8, a: __m128h) -> __m256i {
     unsafe { transmute(vcvttph2dq_256(a, src.as_i32x8(), k)) }
 }
@@ -14715,7 +14826,7 @@ pub fn _mm256_mask_cvttph_epi32(src: __m256i, k: __mmask8, a: __m128h) -> __m256
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvttph2dq))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_maskz_cvttph_epi32(k: __mmask8, a: __m128h) -> __m256i {
     _mm256_mask_cvttph_epi32(_mm256_setzero_si256(), k, a)
 }
@@ -14727,7 +14838,7 @@ pub fn _mm256_maskz_cvttph_epi32(k: __mmask8, a: __m128h) -> __m256i {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvttph2dq))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_cvttph_epi32(a: __m256h) -> __m512i {
     _mm512_mask_cvttph_epi32(_mm512_undefined_epi32(), 0xffff, a)
 }
@@ -14739,7 +14850,7 @@ pub fn _mm512_cvttph_epi32(a: __m256h) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvttph2dq))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mask_cvttph_epi32(src: __m512i, k: __mmask16, a: __m256h) -> __m512i {
     unsafe {
         transmute(vcvttph2dq_512(
@@ -14758,7 +14869,7 @@ pub fn _mm512_mask_cvttph_epi32(src: __m512i, k: __mmask16, a: __m256h) -> __m51
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvttph2dq))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_maskz_cvttph_epi32(k: __mmask16, a: __m256h) -> __m512i {
     _mm512_mask_cvttph_epi32(_mm512_setzero_si512(), k, a)
 }
@@ -14773,7 +14884,7 @@ pub fn _mm512_maskz_cvttph_epi32(k: __mmask16, a: __m256h) -> __m512i {
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvttph2dq, SAE = 8))]
 #[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_cvtt_roundph_epi32<const SAE: i32>(a: __m256h) -> __m512i {
     static_assert_sae!(SAE);
     _mm512_mask_cvtt_roundph_epi32::<SAE>(_mm512_undefined_epi32(), 0xffff, a)
@@ -14789,7 +14900,7 @@ pub fn _mm512_cvtt_roundph_epi32<const SAE: i32>(a: __m256h) -> __m512i {
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvttph2dq, SAE = 8))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mask_cvtt_roundph_epi32<const SAE: i32>(
     src: __m512i,
     k: __mmask16,
@@ -14811,7 +14922,7 @@ pub fn _mm512_mask_cvtt_roundph_epi32<const SAE: i32>(
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvttph2dq, SAE = 8))]
 #[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_maskz_cvtt_roundph_epi32<const SAE: i32>(k: __mmask16, a: __m256h) -> __m512i {
     static_assert_sae!(SAE);
     _mm512_mask_cvtt_roundph_epi32::<SAE>(_mm512_setzero_si512(), k, a)
@@ -14824,7 +14935,7 @@ pub fn _mm512_maskz_cvtt_roundph_epi32<const SAE: i32>(k: __mmask16, a: __m256h)
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvttsh2si))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_cvttsh_i32(a: __m128h) -> i32 {
     unsafe { vcvttsh2si32(a, _MM_FROUND_CUR_DIRECTION) }
 }
@@ -14839,7 +14950,7 @@ pub fn _mm_cvttsh_i32(a: __m128h) -> i32 {
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvttsh2si, SAE = 8))]
 #[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_cvtt_roundsh_i32<const SAE: i32>(a: __m128h) -> i32 {
     unsafe {
         static_assert_sae!(SAE);
@@ -14854,7 +14965,7 @@ pub fn _mm_cvtt_roundsh_i32<const SAE: i32>(a: __m128h) -> i32 {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvttph2udq))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_cvttph_epu32(a: __m128h) -> __m128i {
     _mm_mask_cvttph_epu32(_mm_undefined_si128(), 0xff, a)
 }
@@ -14866,7 +14977,7 @@ pub fn _mm_cvttph_epu32(a: __m128h) -> __m128i {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvttph2udq))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask_cvttph_epu32(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
     unsafe { transmute(vcvttph2udq_128(a, src.as_u32x4(), k)) }
 }
@@ -14878,7 +14989,7 @@ pub fn _mm_mask_cvttph_epu32(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvttph2udq))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_maskz_cvttph_epu32(k: __mmask8, a: __m128h) -> __m128i {
     _mm_mask_cvttph_epu32(_mm_setzero_si128(), k, a)
 }
@@ -14890,7 +15001,7 @@ pub fn _mm_maskz_cvttph_epu32(k: __mmask8, a: __m128h) -> __m128i {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvttph2udq))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_cvttph_epu32(a: __m128h) -> __m256i {
     _mm256_mask_cvttph_epu32(_mm256_undefined_si256(), 0xff, a)
 }
@@ -14902,7 +15013,7 @@ pub fn _mm256_cvttph_epu32(a: __m128h) -> __m256i {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvttph2udq))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_mask_cvttph_epu32(src: __m256i, k: __mmask8, a: __m128h) -> __m256i {
     unsafe { transmute(vcvttph2udq_256(a, src.as_u32x8(), k)) }
 }
@@ -14914,7 +15025,7 @@ pub fn _mm256_mask_cvttph_epu32(src: __m256i, k: __mmask8, a: __m128h) -> __m256
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvttph2udq))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_maskz_cvttph_epu32(k: __mmask8, a: __m128h) -> __m256i {
     _mm256_mask_cvttph_epu32(_mm256_setzero_si256(), k, a)
 }
@@ -14926,7 +15037,7 @@ pub fn _mm256_maskz_cvttph_epu32(k: __mmask8, a: __m128h) -> __m256i {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvttph2udq))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_cvttph_epu32(a: __m256h) -> __m512i {
     _mm512_mask_cvttph_epu32(_mm512_undefined_epi32(), 0xffff, a)
 }
@@ -14938,7 +15049,7 @@ pub fn _mm512_cvttph_epu32(a: __m256h) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvttph2udq))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mask_cvttph_epu32(src: __m512i, k: __mmask16, a: __m256h) -> __m512i {
     unsafe {
         transmute(vcvttph2udq_512(
@@ -14957,7 +15068,7 @@ pub fn _mm512_mask_cvttph_epu32(src: __m512i, k: __mmask16, a: __m256h) -> __m51
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvttph2udq))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_maskz_cvttph_epu32(k: __mmask16, a: __m256h) -> __m512i {
     _mm512_mask_cvttph_epu32(_mm512_setzero_si512(), k, a)
 }
@@ -14972,7 +15083,7 @@ pub fn _mm512_maskz_cvttph_epu32(k: __mmask16, a: __m256h) -> __m512i {
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvttph2udq, SAE = 8))]
 #[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_cvtt_roundph_epu32<const SAE: i32>(a: __m256h) -> __m512i {
     static_assert_sae!(SAE);
     _mm512_mask_cvtt_roundph_epu32::<SAE>(_mm512_undefined_epi32(), 0xffff, a)
@@ -14988,7 +15099,7 @@ pub fn _mm512_cvtt_roundph_epu32<const SAE: i32>(a: __m256h) -> __m512i {
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvttph2udq, SAE = 8))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mask_cvtt_roundph_epu32<const SAE: i32>(
     src: __m512i,
     k: __mmask16,
@@ -15010,7 +15121,7 @@ pub fn _mm512_mask_cvtt_roundph_epu32<const SAE: i32>(
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvttph2udq, SAE = 8))]
 #[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_maskz_cvtt_roundph_epu32<const SAE: i32>(k: __mmask16, a: __m256h) -> __m512i {
     static_assert_sae!(SAE);
     _mm512_mask_cvtt_roundph_epu32::<SAE>(_mm512_setzero_si512(), k, a)
@@ -15023,7 +15134,7 @@ pub fn _mm512_maskz_cvtt_roundph_epu32<const SAE: i32>(k: __mmask16, a: __m256h)
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvttsh2usi))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_cvttsh_u32(a: __m128h) -> u32 {
     unsafe { vcvttsh2usi32(a, _MM_FROUND_CUR_DIRECTION) }
 }
@@ -15038,7 +15149,7 @@ pub fn _mm_cvttsh_u32(a: __m128h) -> u32 {
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvttsh2usi, SAE = 8))]
 #[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_cvtt_roundsh_u32<const SAE: i32>(a: __m128h) -> u32 {
     unsafe {
         static_assert_sae!(SAE);
@@ -15053,7 +15164,7 @@ pub fn _mm_cvtt_roundsh_u32<const SAE: i32>(a: __m128h) -> u32 {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtph2qq))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_cvtph_epi64(a: __m128h) -> __m128i {
     _mm_mask_cvtph_epi64(_mm_undefined_si128(), 0xff, a)
 }
@@ -15065,7 +15176,7 @@ pub fn _mm_cvtph_epi64(a: __m128h) -> __m128i {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtph2qq))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask_cvtph_epi64(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
     unsafe { transmute(vcvtph2qq_128(a, src.as_i64x2(), k)) }
 }
@@ -15077,7 +15188,7 @@ pub fn _mm_mask_cvtph_epi64(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtph2qq))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_maskz_cvtph_epi64(k: __mmask8, a: __m128h) -> __m128i {
     _mm_mask_cvtph_epi64(_mm_setzero_si128(), k, a)
 }
@@ -15089,7 +15200,7 @@ pub fn _mm_maskz_cvtph_epi64(k: __mmask8, a: __m128h) -> __m128i {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtph2qq))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_cvtph_epi64(a: __m128h) -> __m256i {
     _mm256_mask_cvtph_epi64(_mm256_undefined_si256(), 0xff, a)
 }
@@ -15101,7 +15212,7 @@ pub fn _mm256_cvtph_epi64(a: __m128h) -> __m256i {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtph2qq))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_mask_cvtph_epi64(src: __m256i, k: __mmask8, a: __m128h) -> __m256i {
     unsafe { transmute(vcvtph2qq_256(a, src.as_i64x4(), k)) }
 }
@@ -15113,7 +15224,7 @@ pub fn _mm256_mask_cvtph_epi64(src: __m256i, k: __mmask8, a: __m128h) -> __m256i
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtph2qq))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_maskz_cvtph_epi64(k: __mmask8, a: __m128h) -> __m256i {
     _mm256_mask_cvtph_epi64(_mm256_setzero_si256(), k, a)
 }
@@ -15125,7 +15236,7 @@ pub fn _mm256_maskz_cvtph_epi64(k: __mmask8, a: __m128h) -> __m256i {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtph2qq))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_cvtph_epi64(a: __m128h) -> __m512i {
     _mm512_mask_cvtph_epi64(_mm512_undefined_epi32(), 0xff, a)
 }
@@ -15137,7 +15248,7 @@ pub fn _mm512_cvtph_epi64(a: __m128h) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtph2qq))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mask_cvtph_epi64(src: __m512i, k: __mmask8, a: __m128h) -> __m512i {
     unsafe {
         transmute(vcvtph2qq_512(
@@ -15156,7 +15267,7 @@ pub fn _mm512_mask_cvtph_epi64(src: __m512i, k: __mmask8, a: __m128h) -> __m512i
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtph2qq))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_maskz_cvtph_epi64(k: __mmask8, a: __m128h) -> __m512i {
     _mm512_mask_cvtph_epi64(_mm512_setzero_si512(), k, a)
 }
@@ -15177,7 +15288,7 @@ pub fn _mm512_maskz_cvtph_epi64(k: __mmask8, a: __m128h) -> __m512i {
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtph2qq, ROUNDING = 8))]
 #[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_cvt_roundph_epi64<const ROUNDING: i32>(a: __m128h) -> __m512i {
     static_assert_rounding!(ROUNDING);
     _mm512_mask_cvt_roundph_epi64::<ROUNDING>(_mm512_undefined_epi32(), 0xff, a)
@@ -15199,7 +15310,7 @@ pub fn _mm512_cvt_roundph_epi64<const ROUNDING: i32>(a: __m128h) -> __m512i {
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtph2qq, ROUNDING = 8))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mask_cvt_roundph_epi64<const ROUNDING: i32>(
     src: __m512i,
     k: __mmask8,
@@ -15227,7 +15338,7 @@ pub fn _mm512_mask_cvt_roundph_epi64<const ROUNDING: i32>(
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtph2qq, ROUNDING = 8))]
 #[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_maskz_cvt_roundph_epi64<const ROUNDING: i32>(k: __mmask8, a: __m128h) -> __m512i {
     static_assert_rounding!(ROUNDING);
     _mm512_mask_cvt_roundph_epi64::<ROUNDING>(_mm512_setzero_si512(), k, a)
@@ -15240,7 +15351,7 @@ pub fn _mm512_maskz_cvt_roundph_epi64<const ROUNDING: i32>(k: __mmask8, a: __m12
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtph2uqq))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_cvtph_epu64(a: __m128h) -> __m128i {
     _mm_mask_cvtph_epu64(_mm_undefined_si128(), 0xff, a)
 }
@@ -15252,7 +15363,7 @@ pub fn _mm_cvtph_epu64(a: __m128h) -> __m128i {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtph2uqq))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask_cvtph_epu64(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
     unsafe { transmute(vcvtph2uqq_128(a, src.as_u64x2(), k)) }
 }
@@ -15264,7 +15375,7 @@ pub fn _mm_mask_cvtph_epu64(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtph2uqq))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_maskz_cvtph_epu64(k: __mmask8, a: __m128h) -> __m128i {
     _mm_mask_cvtph_epu64(_mm_setzero_si128(), k, a)
 }
@@ -15276,7 +15387,7 @@ pub fn _mm_maskz_cvtph_epu64(k: __mmask8, a: __m128h) -> __m128i {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtph2uqq))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_cvtph_epu64(a: __m128h) -> __m256i {
     _mm256_mask_cvtph_epu64(_mm256_undefined_si256(), 0xff, a)
 }
@@ -15288,7 +15399,7 @@ pub fn _mm256_cvtph_epu64(a: __m128h) -> __m256i {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtph2uqq))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_mask_cvtph_epu64(src: __m256i, k: __mmask8, a: __m128h) -> __m256i {
     unsafe { transmute(vcvtph2uqq_256(a, src.as_u64x4(), k)) }
 }
@@ -15300,7 +15411,7 @@ pub fn _mm256_mask_cvtph_epu64(src: __m256i, k: __mmask8, a: __m128h) -> __m256i
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtph2uqq))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_maskz_cvtph_epu64(k: __mmask8, a: __m128h) -> __m256i {
     _mm256_mask_cvtph_epu64(_mm256_setzero_si256(), k, a)
 }
@@ -15312,7 +15423,7 @@ pub fn _mm256_maskz_cvtph_epu64(k: __mmask8, a: __m128h) -> __m256i {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtph2uqq))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_cvtph_epu64(a: __m128h) -> __m512i {
     _mm512_mask_cvtph_epu64(_mm512_undefined_epi32(), 0xff, a)
 }
@@ -15324,7 +15435,7 @@ pub fn _mm512_cvtph_epu64(a: __m128h) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtph2uqq))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mask_cvtph_epu64(src: __m512i, k: __mmask8, a: __m128h) -> __m512i {
     unsafe {
         transmute(vcvtph2uqq_512(
@@ -15343,7 +15454,7 @@ pub fn _mm512_mask_cvtph_epu64(src: __m512i, k: __mmask8, a: __m128h) -> __m512i
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtph2uqq))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_maskz_cvtph_epu64(k: __mmask8, a: __m128h) -> __m512i {
     _mm512_mask_cvtph_epu64(_mm512_setzero_si512(), k, a)
 }
@@ -15364,7 +15475,7 @@ pub fn _mm512_maskz_cvtph_epu64(k: __mmask8, a: __m128h) -> __m512i {
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtph2uqq, ROUNDING = 8))]
 #[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_cvt_roundph_epu64<const ROUNDING: i32>(a: __m128h) -> __m512i {
     static_assert_rounding!(ROUNDING);
     _mm512_mask_cvt_roundph_epu64::<ROUNDING>(_mm512_undefined_epi32(), 0xff, a)
@@ -15386,7 +15497,7 @@ pub fn _mm512_cvt_roundph_epu64<const ROUNDING: i32>(a: __m128h) -> __m512i {
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtph2uqq, ROUNDING = 8))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mask_cvt_roundph_epu64<const ROUNDING: i32>(
     src: __m512i,
     k: __mmask8,
@@ -15414,7 +15525,7 @@ pub fn _mm512_mask_cvt_roundph_epu64<const ROUNDING: i32>(
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtph2uqq, ROUNDING = 8))]
 #[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_maskz_cvt_roundph_epu64<const ROUNDING: i32>(k: __mmask8, a: __m128h) -> __m512i {
     static_assert_rounding!(ROUNDING);
     _mm512_mask_cvt_roundph_epu64::<ROUNDING>(_mm512_setzero_si512(), k, a)
@@ -15427,7 +15538,7 @@ pub fn _mm512_maskz_cvt_roundph_epu64<const ROUNDING: i32>(k: __mmask8, a: __m12
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvttph2qq))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_cvttph_epi64(a: __m128h) -> __m128i {
     _mm_mask_cvttph_epi64(_mm_undefined_si128(), 0xff, a)
 }
@@ -15439,7 +15550,7 @@ pub fn _mm_cvttph_epi64(a: __m128h) -> __m128i {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvttph2qq))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask_cvttph_epi64(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
     unsafe { transmute(vcvttph2qq_128(a, src.as_i64x2(), k)) }
 }
@@ -15451,7 +15562,7 @@ pub fn _mm_mask_cvttph_epi64(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvttph2qq))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_maskz_cvttph_epi64(k: __mmask8, a: __m128h) -> __m128i {
     _mm_mask_cvttph_epi64(_mm_setzero_si128(), k, a)
 }
@@ -15463,7 +15574,7 @@ pub fn _mm_maskz_cvttph_epi64(k: __mmask8, a: __m128h) -> __m128i {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvttph2qq))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_cvttph_epi64(a: __m128h) -> __m256i {
     _mm256_mask_cvttph_epi64(_mm256_undefined_si256(), 0xff, a)
 }
@@ -15475,7 +15586,7 @@ pub fn _mm256_cvttph_epi64(a: __m128h) -> __m256i {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvttph2qq))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_mask_cvttph_epi64(src: __m256i, k: __mmask8, a: __m128h) -> __m256i {
     unsafe { transmute(vcvttph2qq_256(a, src.as_i64x4(), k)) }
 }
@@ -15487,7 +15598,7 @@ pub fn _mm256_mask_cvttph_epi64(src: __m256i, k: __mmask8, a: __m128h) -> __m256
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvttph2qq))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_maskz_cvttph_epi64(k: __mmask8, a: __m128h) -> __m256i {
     _mm256_mask_cvttph_epi64(_mm256_setzero_si256(), k, a)
 }
@@ -15499,7 +15610,7 @@ pub fn _mm256_maskz_cvttph_epi64(k: __mmask8, a: __m128h) -> __m256i {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvttph2qq))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_cvttph_epi64(a: __m128h) -> __m512i {
     _mm512_mask_cvttph_epi64(_mm512_undefined_epi32(), 0xff, a)
 }
@@ -15511,7 +15622,7 @@ pub fn _mm512_cvttph_epi64(a: __m128h) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvttph2qq))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mask_cvttph_epi64(src: __m512i, k: __mmask8, a: __m128h) -> __m512i {
     unsafe {
         transmute(vcvttph2qq_512(
@@ -15530,7 +15641,7 @@ pub fn _mm512_mask_cvttph_epi64(src: __m512i, k: __mmask8, a: __m128h) -> __m512
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvttph2qq))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_maskz_cvttph_epi64(k: __mmask8, a: __m128h) -> __m512i {
     _mm512_mask_cvttph_epi64(_mm512_setzero_si512(), k, a)
 }
@@ -15545,7 +15656,7 @@ pub fn _mm512_maskz_cvttph_epi64(k: __mmask8, a: __m128h) -> __m512i {
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvttph2qq, SAE = 8))]
 #[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_cvtt_roundph_epi64<const SAE: i32>(a: __m128h) -> __m512i {
     static_assert_sae!(SAE);
     _mm512_mask_cvtt_roundph_epi64::<SAE>(_mm512_undefined_epi32(), 0xff, a)
@@ -15561,7 +15672,7 @@ pub fn _mm512_cvtt_roundph_epi64<const SAE: i32>(a: __m128h) -> __m512i {
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvttph2qq, SAE = 8))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mask_cvtt_roundph_epi64<const SAE: i32>(
     src: __m512i,
     k: __mmask8,
@@ -15583,7 +15694,7 @@ pub fn _mm512_mask_cvtt_roundph_epi64<const SAE: i32>(
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvttph2qq, SAE = 8))]
 #[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_maskz_cvtt_roundph_epi64<const SAE: i32>(k: __mmask8, a: __m128h) -> __m512i {
     static_assert_sae!(SAE);
     _mm512_mask_cvtt_roundph_epi64::<SAE>(_mm512_setzero_si512(), k, a)
@@ -15596,7 +15707,7 @@ pub fn _mm512_maskz_cvtt_roundph_epi64<const SAE: i32>(k: __mmask8, a: __m128h)
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvttph2uqq))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_cvttph_epu64(a: __m128h) -> __m128i {
     _mm_mask_cvttph_epu64(_mm_undefined_si128(), 0xff, a)
 }
@@ -15608,7 +15719,7 @@ pub fn _mm_cvttph_epu64(a: __m128h) -> __m128i {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvttph2uqq))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask_cvttph_epu64(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
     unsafe { transmute(vcvttph2uqq_128(a, src.as_u64x2(), k)) }
 }
@@ -15620,7 +15731,7 @@ pub fn _mm_mask_cvttph_epu64(src: __m128i, k: __mmask8, a: __m128h) -> __m128i {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvttph2uqq))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_maskz_cvttph_epu64(k: __mmask8, a: __m128h) -> __m128i {
     _mm_mask_cvttph_epu64(_mm_setzero_si128(), k, a)
 }
@@ -15632,7 +15743,7 @@ pub fn _mm_maskz_cvttph_epu64(k: __mmask8, a: __m128h) -> __m128i {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvttph2uqq))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_cvttph_epu64(a: __m128h) -> __m256i {
     _mm256_mask_cvttph_epu64(_mm256_undefined_si256(), 0xff, a)
 }
@@ -15644,7 +15755,7 @@ pub fn _mm256_cvttph_epu64(a: __m128h) -> __m256i {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvttph2uqq))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_mask_cvttph_epu64(src: __m256i, k: __mmask8, a: __m128h) -> __m256i {
     unsafe { transmute(vcvttph2uqq_256(a, src.as_u64x4(), k)) }
 }
@@ -15656,7 +15767,7 @@ pub fn _mm256_mask_cvttph_epu64(src: __m256i, k: __mmask8, a: __m128h) -> __m256
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvttph2uqq))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_maskz_cvttph_epu64(k: __mmask8, a: __m128h) -> __m256i {
     _mm256_mask_cvttph_epu64(_mm256_setzero_si256(), k, a)
 }
@@ -15668,7 +15779,7 @@ pub fn _mm256_maskz_cvttph_epu64(k: __mmask8, a: __m128h) -> __m256i {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvttph2uqq))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_cvttph_epu64(a: __m128h) -> __m512i {
     _mm512_mask_cvttph_epu64(_mm512_undefined_epi32(), 0xff, a)
 }
@@ -15680,7 +15791,7 @@ pub fn _mm512_cvttph_epu64(a: __m128h) -> __m512i {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvttph2uqq))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mask_cvttph_epu64(src: __m512i, k: __mmask8, a: __m128h) -> __m512i {
     unsafe {
         transmute(vcvttph2uqq_512(
@@ -15699,7 +15810,7 @@ pub fn _mm512_mask_cvttph_epu64(src: __m512i, k: __mmask8, a: __m128h) -> __m512
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvttph2uqq))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_maskz_cvttph_epu64(k: __mmask8, a: __m128h) -> __m512i {
     _mm512_mask_cvttph_epu64(_mm512_setzero_si512(), k, a)
 }
@@ -15714,7 +15825,7 @@ pub fn _mm512_maskz_cvttph_epu64(k: __mmask8, a: __m128h) -> __m512i {
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvttph2uqq, SAE = 8))]
 #[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_cvtt_roundph_epu64<const SAE: i32>(a: __m128h) -> __m512i {
     static_assert_sae!(SAE);
     _mm512_mask_cvtt_roundph_epu64::<SAE>(_mm512_undefined_epi32(), 0xff, a)
@@ -15730,7 +15841,7 @@ pub fn _mm512_cvtt_roundph_epu64<const SAE: i32>(a: __m128h) -> __m512i {
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvttph2uqq, SAE = 8))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mask_cvtt_roundph_epu64<const SAE: i32>(
     src: __m512i,
     k: __mmask8,
@@ -15752,7 +15863,7 @@ pub fn _mm512_mask_cvtt_roundph_epu64<const SAE: i32>(
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvttph2uqq, SAE = 8))]
 #[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_maskz_cvtt_roundph_epu64<const SAE: i32>(k: __mmask8, a: __m128h) -> __m512i {
     static_assert_sae!(SAE);
     _mm512_mask_cvtt_roundph_epu64::<SAE>(_mm512_setzero_si512(), k, a)
@@ -15765,7 +15876,7 @@ pub fn _mm512_maskz_cvtt_roundph_epu64<const SAE: i32>(k: __mmask8, a: __m128h)
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtph2psx))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_cvtxph_ps(a: __m128h) -> __m128 {
     _mm_mask_cvtxph_ps(_mm_setzero_ps(), 0xff, a)
 }
@@ -15778,7 +15889,7 @@ pub fn _mm_cvtxph_ps(a: __m128h) -> __m128 {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtph2psx))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask_cvtxph_ps(src: __m128, k: __mmask8, a: __m128h) -> __m128 {
     unsafe { vcvtph2psx_128(a, src, k) }
 }
@@ -15791,7 +15902,7 @@ pub fn _mm_mask_cvtxph_ps(src: __m128, k: __mmask8, a: __m128h) -> __m128 {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtph2psx))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_maskz_cvtxph_ps(k: __mmask8, a: __m128h) -> __m128 {
     _mm_mask_cvtxph_ps(_mm_setzero_ps(), k, a)
 }
@@ -15803,7 +15914,7 @@ pub fn _mm_maskz_cvtxph_ps(k: __mmask8, a: __m128h) -> __m128 {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtph2psx))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_cvtxph_ps(a: __m128h) -> __m256 {
     _mm256_mask_cvtxph_ps(_mm256_setzero_ps(), 0xff, a)
 }
@@ -15816,7 +15927,7 @@ pub fn _mm256_cvtxph_ps(a: __m128h) -> __m256 {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtph2psx))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_mask_cvtxph_ps(src: __m256, k: __mmask8, a: __m128h) -> __m256 {
     unsafe { vcvtph2psx_256(a, src, k) }
 }
@@ -15829,7 +15940,7 @@ pub fn _mm256_mask_cvtxph_ps(src: __m256, k: __mmask8, a: __m128h) -> __m256 {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtph2psx))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_maskz_cvtxph_ps(k: __mmask8, a: __m128h) -> __m256 {
     _mm256_mask_cvtxph_ps(_mm256_setzero_ps(), k, a)
 }
@@ -15841,7 +15952,7 @@ pub fn _mm256_maskz_cvtxph_ps(k: __mmask8, a: __m128h) -> __m256 {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtph2psx))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_cvtxph_ps(a: __m256h) -> __m512 {
     _mm512_mask_cvtxph_ps(_mm512_setzero_ps(), 0xffff, a)
 }
@@ -15854,7 +15965,7 @@ pub fn _mm512_cvtxph_ps(a: __m256h) -> __m512 {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtph2psx))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mask_cvtxph_ps(src: __m512, k: __mmask16, a: __m256h) -> __m512 {
     unsafe { vcvtph2psx_512(a, src, k, _MM_FROUND_CUR_DIRECTION) }
 }
@@ -15867,7 +15978,7 @@ pub fn _mm512_mask_cvtxph_ps(src: __m512, k: __mmask16, a: __m256h) -> __m512 {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtph2psx))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_maskz_cvtxph_ps(k: __mmask16, a: __m256h) -> __m512 {
     _mm512_mask_cvtxph_ps(_mm512_setzero_ps(), k, a)
 }
@@ -15882,7 +15993,7 @@ pub fn _mm512_maskz_cvtxph_ps(k: __mmask16, a: __m256h) -> __m512 {
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtph2psx, SAE = 8))]
 #[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_cvtx_roundph_ps<const SAE: i32>(a: __m256h) -> __m512 {
     static_assert_sae!(SAE);
     _mm512_mask_cvtx_roundph_ps::<SAE>(_mm512_setzero_ps(), 0xffff, a)
@@ -15899,7 +16010,7 @@ pub fn _mm512_cvtx_roundph_ps<const SAE: i32>(a: __m256h) -> __m512 {
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtph2psx, SAE = 8))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mask_cvtx_roundph_ps<const SAE: i32>(
     src: __m512,
     k: __mmask16,
@@ -15922,7 +16033,7 @@ pub fn _mm512_mask_cvtx_roundph_ps<const SAE: i32>(
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtph2psx, SAE = 8))]
 #[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_maskz_cvtx_roundph_ps<const SAE: i32>(k: __mmask16, a: __m256h) -> __m512 {
     static_assert_sae!(SAE);
     _mm512_mask_cvtx_roundph_ps::<SAE>(_mm512_setzero_ps(), k, a)
@@ -15936,7 +16047,7 @@ pub fn _mm512_maskz_cvtx_roundph_ps<const SAE: i32>(k: __mmask16, a: __m256h) ->
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtsh2ss))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_cvtsh_ss(a: __m128, b: __m128h) -> __m128 {
     _mm_mask_cvtsh_ss(a, 0xff, a, b)
 }
@@ -15950,7 +16061,7 @@ pub fn _mm_cvtsh_ss(a: __m128, b: __m128h) -> __m128 {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtsh2ss))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask_cvtsh_ss(src: __m128, k: __mmask8, a: __m128, b: __m128h) -> __m128 {
     unsafe { vcvtsh2ss(a, b, src, k, _MM_FROUND_CUR_DIRECTION) }
 }
@@ -15964,7 +16075,7 @@ pub fn _mm_mask_cvtsh_ss(src: __m128, k: __mmask8, a: __m128, b: __m128h) -> __m
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtsh2ss))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_maskz_cvtsh_ss(k: __mmask8, a: __m128, b: __m128h) -> __m128 {
     _mm_mask_cvtsh_ss(_mm_set_ss(0.0), k, a, b)
 }
@@ -15980,7 +16091,7 @@ pub fn _mm_maskz_cvtsh_ss(k: __mmask8, a: __m128, b: __m128h) -> __m128 {
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtsh2ss, SAE = 8))]
 #[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_cvt_roundsh_ss<const SAE: i32>(a: __m128, b: __m128h) -> __m128 {
     static_assert_sae!(SAE);
     _mm_mask_cvt_roundsh_ss::<SAE>(_mm_undefined_ps(), 0xff, a, b)
@@ -15998,7 +16109,7 @@ pub fn _mm_cvt_roundsh_ss<const SAE: i32>(a: __m128, b: __m128h) -> __m128 {
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtsh2ss, SAE = 8))]
 #[rustc_legacy_const_generics(4)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask_cvt_roundsh_ss<const SAE: i32>(
     src: __m128,
     k: __mmask8,
@@ -16023,7 +16134,7 @@ pub fn _mm_mask_cvt_roundsh_ss<const SAE: i32>(
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtsh2ss, SAE = 8))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_maskz_cvt_roundsh_ss<const SAE: i32>(k: __mmask8, a: __m128, b: __m128h) -> __m128 {
     static_assert_sae!(SAE);
     _mm_mask_cvt_roundsh_ss::<SAE>(_mm_set_ss(0.0), k, a, b)
@@ -16036,7 +16147,7 @@ pub fn _mm_maskz_cvt_roundsh_ss<const SAE: i32>(k: __mmask8, a: __m128, b: __m12
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtph2pd))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_cvtph_pd(a: __m128h) -> __m128d {
     _mm_mask_cvtph_pd(_mm_setzero_pd(), 0xff, a)
 }
@@ -16049,7 +16160,7 @@ pub fn _mm_cvtph_pd(a: __m128h) -> __m128d {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtph2pd))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask_cvtph_pd(src: __m128d, k: __mmask8, a: __m128h) -> __m128d {
     unsafe { vcvtph2pd_128(a, src, k) }
 }
@@ -16062,7 +16173,7 @@ pub fn _mm_mask_cvtph_pd(src: __m128d, k: __mmask8, a: __m128h) -> __m128d {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtph2pd))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_maskz_cvtph_pd(k: __mmask8, a: __m128h) -> __m128d {
     _mm_mask_cvtph_pd(_mm_setzero_pd(), k, a)
 }
@@ -16074,7 +16185,7 @@ pub fn _mm_maskz_cvtph_pd(k: __mmask8, a: __m128h) -> __m128d {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtph2pd))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_cvtph_pd(a: __m128h) -> __m256d {
     _mm256_mask_cvtph_pd(_mm256_setzero_pd(), 0xff, a)
 }
@@ -16087,7 +16198,7 @@ pub fn _mm256_cvtph_pd(a: __m128h) -> __m256d {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtph2pd))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_mask_cvtph_pd(src: __m256d, k: __mmask8, a: __m128h) -> __m256d {
     unsafe { vcvtph2pd_256(a, src, k) }
 }
@@ -16100,7 +16211,7 @@ pub fn _mm256_mask_cvtph_pd(src: __m256d, k: __mmask8, a: __m128h) -> __m256d {
 #[inline]
 #[target_feature(enable = "avx512fp16,avx512vl")]
 #[cfg_attr(test, assert_instr(vcvtph2pd))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm256_maskz_cvtph_pd(k: __mmask8, a: __m128h) -> __m256d {
     _mm256_mask_cvtph_pd(_mm256_setzero_pd(), k, a)
 }
@@ -16112,7 +16223,7 @@ pub fn _mm256_maskz_cvtph_pd(k: __mmask8, a: __m128h) -> __m256d {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtph2pd))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_cvtph_pd(a: __m128h) -> __m512d {
     _mm512_mask_cvtph_pd(_mm512_setzero_pd(), 0xff, a)
 }
@@ -16125,7 +16236,7 @@ pub fn _mm512_cvtph_pd(a: __m128h) -> __m512d {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtph2pd))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mask_cvtph_pd(src: __m512d, k: __mmask8, a: __m128h) -> __m512d {
     unsafe { vcvtph2pd_512(a, src, k, _MM_FROUND_CUR_DIRECTION) }
 }
@@ -16138,7 +16249,7 @@ pub fn _mm512_mask_cvtph_pd(src: __m512d, k: __mmask8, a: __m128h) -> __m512d {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtph2pd))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_maskz_cvtph_pd(k: __mmask8, a: __m128h) -> __m512d {
     _mm512_mask_cvtph_pd(_mm512_setzero_pd(), k, a)
 }
@@ -16153,7 +16264,7 @@ pub fn _mm512_maskz_cvtph_pd(k: __mmask8, a: __m128h) -> __m512d {
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtph2pd, SAE = 8))]
 #[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_cvt_roundph_pd<const SAE: i32>(a: __m128h) -> __m512d {
     static_assert_sae!(SAE);
     _mm512_mask_cvt_roundph_pd::<SAE>(_mm512_setzero_pd(), 0xff, a)
@@ -16170,7 +16281,7 @@ pub fn _mm512_cvt_roundph_pd<const SAE: i32>(a: __m128h) -> __m512d {
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtph2pd, SAE = 8))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_mask_cvt_roundph_pd<const SAE: i32>(
     src: __m512d,
     k: __mmask8,
@@ -16193,7 +16304,7 @@ pub fn _mm512_mask_cvt_roundph_pd<const SAE: i32>(
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtph2pd, SAE = 8))]
 #[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm512_maskz_cvt_roundph_pd<const SAE: i32>(k: __mmask8, a: __m128h) -> __m512d {
     static_assert_sae!(SAE);
     _mm512_mask_cvt_roundph_pd::<SAE>(_mm512_setzero_pd(), k, a)
@@ -16207,7 +16318,7 @@ pub fn _mm512_maskz_cvt_roundph_pd<const SAE: i32>(k: __mmask8, a: __m128h) -> _
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtsh2sd))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_cvtsh_sd(a: __m128d, b: __m128h) -> __m128d {
     _mm_mask_cvtsh_sd(a, 0xff, a, b)
 }
@@ -16221,7 +16332,7 @@ pub fn _mm_cvtsh_sd(a: __m128d, b: __m128h) -> __m128d {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtsh2sd))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask_cvtsh_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128h) -> __m128d {
     unsafe { vcvtsh2sd(a, b, src, k, _MM_FROUND_CUR_DIRECTION) }
 }
@@ -16234,7 +16345,7 @@ pub fn _mm_mask_cvtsh_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128h) -> _
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtsh2sd))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_maskz_cvtsh_sd(k: __mmask8, a: __m128d, b: __m128h) -> __m128d {
     _mm_mask_cvtsh_sd(_mm_set_sd(0.0), k, a, b)
 }
@@ -16250,7 +16361,7 @@ pub fn _mm_maskz_cvtsh_sd(k: __mmask8, a: __m128d, b: __m128h) -> __m128d {
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtsh2sd, SAE = 8))]
 #[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_cvt_roundsh_sd<const SAE: i32>(a: __m128d, b: __m128h) -> __m128d {
     static_assert_sae!(SAE);
     _mm_mask_cvt_roundsh_sd::<SAE>(a, 0xff, a, b)
@@ -16268,7 +16379,7 @@ pub fn _mm_cvt_roundsh_sd<const SAE: i32>(a: __m128d, b: __m128h) -> __m128d {
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtsh2sd, SAE = 8))]
 #[rustc_legacy_const_generics(4)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_mask_cvt_roundsh_sd<const SAE: i32>(
     src: __m128d,
     k: __mmask8,
@@ -16292,7 +16403,7 @@ pub fn _mm_mask_cvt_roundsh_sd<const SAE: i32>(
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtsh2sd, SAE = 8))]
 #[rustc_legacy_const_generics(3)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_maskz_cvt_roundsh_sd<const SAE: i32>(k: __mmask8, a: __m128d, b: __m128h) -> __m128d {
     static_assert_sae!(SAE);
     _mm_mask_cvt_roundsh_sd::<SAE>(_mm_set_sd(0.0), k, a, b)
@@ -16304,7 +16415,8 @@ pub fn _mm_maskz_cvt_roundsh_sd<const SAE: i32>(k: __mmask8, a: __m128d, b: __m1
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm_cvtsh_h(a: __m128h) -> f16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cvtsh_h(a: __m128h) -> f16 {
     unsafe { simd_extract!(a, 0) }
 }
 
@@ -16314,7 +16426,8 @@ pub fn _mm_cvtsh_h(a: __m128h) -> f16 {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm256_cvtsh_h(a: __m256h) -> f16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_cvtsh_h(a: __m256h) -> f16 {
     unsafe { simd_extract!(a, 0) }
 }
 
@@ -16324,7 +16437,8 @@ pub fn _mm256_cvtsh_h(a: __m256h) -> f16 {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm512_cvtsh_h(a: __m512h) -> f16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_cvtsh_h(a: __m512h) -> f16 {
     unsafe { simd_extract!(a, 0) }
 }
 
@@ -16333,8 +16447,9 @@ pub fn _mm512_cvtsh_h(a: __m512h) -> f16 {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi128_si16)
 #[inline]
 #[target_feature(enable = "avx512fp16")]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm_cvtsi128_si16(a: __m128i) -> i16 {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cvtsi128_si16(a: __m128i) -> i16 {
     unsafe { simd_extract!(a.as_i16x8(), 0) }
 }
 
@@ -16343,17 +16458,25 @@ pub fn _mm_cvtsi128_si16(a: __m128i) -> i16 {
 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi16_si128)
 #[inline]
 #[target_feature(enable = "avx512fp16")]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
-pub fn _mm_cvtsi16_si128(a: i16) -> __m128i {
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cvtsi16_si128(a: i16) -> __m128i {
     unsafe { transmute(simd_insert!(i16x8::ZERO, 0, a)) }
 }
 
 #[allow(improper_ctypes)]
-unsafe extern "C" {
+unsafe extern "unadjusted" {
+    #[link_name = "llvm.x86.avx512fp16.mask.cmp.ph.128"]
+    fn vcmpph_128(a: __m128h, b: __m128h, imm5: i32, mask: __mmask8) -> __mmask8;
+    #[link_name = "llvm.x86.avx512fp16.mask.cmp.ph.256"]
+    fn vcmpph_256(a: __m256h, b: __m256h, imm5: i32, mask: __mmask16) -> __mmask16;
+    #[link_name = "llvm.x86.avx512fp16.mask.cmp.ph.512"]
+    fn vcmpph_512(a: __m512h, b: __m512h, imm5: i32, mask: __mmask32, sae: i32) -> __mmask32;
+
     #[link_name = "llvm.x86.avx512fp16.mask.cmp.sh"]
-    fn vcmpsh(a: __m128h, b: __m128h, imm8: i32, mask: __mmask8, sae: i32) -> __mmask8;
+    fn vcmpsh(a: __m128h, b: __m128h, imm5: i32, mask: __mmask8, sae: i32) -> __mmask8;
     #[link_name = "llvm.x86.avx512fp16.vcomi.sh"]
-    fn vcomish(a: __m128h, b: __m128h, imm8: i32, sae: i32) -> i32;
+    fn vcomish(a: __m128h, b: __m128h, imm5: i32, sae: i32) -> i32;
 
     #[link_name = "llvm.x86.avx512fp16.add.ph.512"]
     fn vaddph(a: __m512h, b: __m512h, rounding: i32) -> __m512h;
@@ -16536,6 +16659,13 @@ unsafe extern "C" {
     fn vreducesh(a: __m128h, b: __m128h, src: __m128h, k: __mmask8, imm8: i32, sae: i32)
     -> __m128h;
 
+    #[link_name = "llvm.x86.avx512fp16.fpclass.ph.128"]
+    fn vfpclassph_128(a: __m128h, imm8: i32) -> __mmask8;
+    #[link_name = "llvm.x86.avx512fp16.fpclass.ph.256"]
+    fn vfpclassph_256(a: __m256h, imm8: i32) -> __mmask16;
+    #[link_name = "llvm.x86.avx512fp16.fpclass.ph.512"]
+    fn vfpclassph_512(a: __m512h, imm8: i32) -> __mmask32;
+
     #[link_name = "llvm.x86.avx512fp16.mask.fpclass.sh"]
     fn vfpclasssh(a: __m128h, imm8: i32, k: __mmask8) -> __mmask8;
 
@@ -16708,25 +16838,28 @@ unsafe extern "C" {
 
 #[cfg(test)]
 mod tests {
+    use crate::core_arch::assert_eq_const as assert_eq;
     use crate::core_arch::x86::*;
-    use crate::mem::transmute;
     use crate::ptr::{addr_of, addr_of_mut};
     use stdarch_test::simd_test;
 
     #[target_feature(enable = "avx512fp16")]
-    unsafe fn _mm_set1_pch(re: f16, im: f16) -> __m128h {
+    #[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
+    const fn _mm_set1_pch(re: f16, im: f16) -> __m128h {
         _mm_setr_ph(re, im, re, im, re, im, re, im)
     }
 
     #[target_feature(enable = "avx512fp16")]
-    unsafe fn _mm256_set1_pch(re: f16, im: f16) -> __m256h {
+    #[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
+    const fn _mm256_set1_pch(re: f16, im: f16) -> __m256h {
         _mm256_setr_ph(
             re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im,
         )
     }
 
     #[target_feature(enable = "avx512fp16")]
-    unsafe fn _mm512_set1_pch(re: f16, im: f16) -> __m512h {
+    #[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
+    const fn _mm512_set1_pch(re: f16, im: f16) -> __m512h {
         _mm512_setr_ph(
             re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im, re, im,
             re, im, re, im, re, im, re, im, re, im,
@@ -16734,14 +16867,14 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_set_ph() {
+    const fn test_mm_set_ph() {
         let r = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let e = _mm_setr_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
         assert_eq_m128h(r, e);
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_set_ph() {
+    const fn test_mm256_set_ph() {
         let r = _mm256_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
         );
@@ -16752,7 +16885,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_set_ph() {
+    const fn test_mm512_set_ph() {
         let r = _mm512_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
             17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
@@ -16767,21 +16900,21 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_set_sh() {
+    const fn test_mm_set_sh() {
         let r = _mm_set_sh(1.0);
         let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0);
         assert_eq_m128h(r, e);
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_set1_ph() {
+    const fn test_mm_set1_ph() {
         let r = _mm_set1_ph(1.0);
         let e = _mm_set_ph(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0);
         assert_eq_m128h(r, e);
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_set1_ph() {
+    const fn test_mm256_set1_ph() {
         let r = _mm256_set1_ph(1.0);
         let e = _mm256_set_ph(
             1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
@@ -16790,7 +16923,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_set1_ph() {
+    const fn test_mm512_set1_ph() {
         let r = _mm512_set1_ph(1.0);
         let e = _mm512_set_ph(
             1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
@@ -16800,14 +16933,14 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_setr_ph() {
+    const fn test_mm_setr_ph() {
         let r = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let e = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
         assert_eq_m128h(r, e);
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_setr_ph() {
+    const fn test_mm256_setr_ph() {
         let r = _mm256_setr_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
         );
@@ -16818,7 +16951,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_setr_ph() {
+    const fn test_mm512_setr_ph() {
         let r = _mm512_setr_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
             17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
@@ -16833,28 +16966,28 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_setzero_ph() {
+    const fn test_mm_setzero_ph() {
         let r = _mm_setzero_ph();
         let e = _mm_set1_ph(0.0);
         assert_eq_m128h(r, e);
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_setzero_ph() {
+    const fn test_mm256_setzero_ph() {
         let r = _mm256_setzero_ph();
         let e = _mm256_set1_ph(0.0);
         assert_eq_m256h(r, e);
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_setzero_ph() {
+    const fn test_mm512_setzero_ph() {
         let r = _mm512_setzero_ph();
         let e = _mm512_set1_ph(0.0);
         assert_eq_m512h(r, e);
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_castsi128_ph() {
+    const fn test_mm_castsi128_ph() {
         let a = _mm_set1_epi16(0x3c00);
         let r = _mm_castsi128_ph(a);
         let e = _mm_set1_ph(1.0);
@@ -16862,7 +16995,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_castsi256_ph() {
+    const fn test_mm256_castsi256_ph() {
         let a = _mm256_set1_epi16(0x3c00);
         let r = _mm256_castsi256_ph(a);
         let e = _mm256_set1_ph(1.0);
@@ -16870,7 +17003,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_castsi512_ph() {
+    const fn test_mm512_castsi512_ph() {
         let a = _mm512_set1_epi16(0x3c00);
         let r = _mm512_castsi512_ph(a);
         let e = _mm512_set1_ph(1.0);
@@ -16878,7 +17011,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm_castph_si128() {
+    const fn test_mm_castph_si128() {
         let a = _mm_set1_ph(1.0);
         let r = _mm_castph_si128(a);
         let e = _mm_set1_epi16(0x3c00);
@@ -16886,7 +17019,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm256_castph_si256() {
+    const fn test_mm256_castph_si256() {
         let a = _mm256_set1_ph(1.0);
         let r = _mm256_castph_si256(a);
         let e = _mm256_set1_epi16(0x3c00);
@@ -16894,7 +17027,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_castph_si512() {
+    const fn test_mm512_castph_si512() {
         let a = _mm512_set1_ph(1.0);
         let r = _mm512_castph_si512(a);
         let e = _mm512_set1_epi16(0x3c00);
@@ -16902,7 +17035,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_castps_ph() {
+    const fn test_mm_castps_ph() {
         let a = _mm_castsi128_ps(_mm_set1_epi16(0x3c00));
         let r = _mm_castps_ph(a);
         let e = _mm_set1_ph(1.0);
@@ -16910,7 +17043,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_castps_ph() {
+    const fn test_mm256_castps_ph() {
         let a = _mm256_castsi256_ps(_mm256_set1_epi16(0x3c00));
         let r = _mm256_castps_ph(a);
         let e = _mm256_set1_ph(1.0);
@@ -16918,7 +17051,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_castps_ph() {
+    const fn test_mm512_castps_ph() {
         let a = _mm512_castsi512_ps(_mm512_set1_epi16(0x3c00));
         let r = _mm512_castps_ph(a);
         let e = _mm512_set1_ph(1.0);
@@ -16926,7 +17059,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm_castph_ps() {
+    const fn test_mm_castph_ps() {
         let a = _mm_castsi128_ph(_mm_set1_epi32(0x3f800000));
         let r = _mm_castph_ps(a);
         let e = _mm_set1_ps(1.0);
@@ -16934,7 +17067,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm256_castph_ps() {
+    const fn test_mm256_castph_ps() {
         let a = _mm256_castsi256_ph(_mm256_set1_epi32(0x3f800000));
         let r = _mm256_castph_ps(a);
         let e = _mm256_set1_ps(1.0);
@@ -16942,7 +17075,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_castph_ps() {
+    const fn test_mm512_castph_ps() {
         let a = _mm512_castsi512_ph(_mm512_set1_epi32(0x3f800000));
         let r = _mm512_castph_ps(a);
         let e = _mm512_set1_ps(1.0);
@@ -16950,7 +17083,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_castpd_ph() {
+    const fn test_mm_castpd_ph() {
         let a = _mm_castsi128_pd(_mm_set1_epi16(0x3c00));
         let r = _mm_castpd_ph(a);
         let e = _mm_set1_ph(1.0);
@@ -16958,7 +17091,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_castpd_ph() {
+    const fn test_mm256_castpd_ph() {
         let a = _mm256_castsi256_pd(_mm256_set1_epi16(0x3c00));
         let r = _mm256_castpd_ph(a);
         let e = _mm256_set1_ph(1.0);
@@ -16966,7 +17099,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_castpd_ph() {
+    const fn test_mm512_castpd_ph() {
         let a = _mm512_castsi512_pd(_mm512_set1_epi16(0x3c00));
         let r = _mm512_castpd_ph(a);
         let e = _mm512_set1_ph(1.0);
@@ -16974,7 +17107,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm_castph_pd() {
+    const fn test_mm_castph_pd() {
         let a = _mm_castsi128_ph(_mm_set1_epi64x(0x3ff0000000000000));
         let r = _mm_castph_pd(a);
         let e = _mm_set1_pd(1.0);
@@ -16982,7 +17115,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm256_castph_pd() {
+    const fn test_mm256_castph_pd() {
         let a = _mm256_castsi256_ph(_mm256_set1_epi64x(0x3ff0000000000000));
         let r = _mm256_castph_pd(a);
         let e = _mm256_set1_pd(1.0);
@@ -16990,7 +17123,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_castph_pd() {
+    const fn test_mm512_castph_pd() {
         let a = _mm512_castsi512_ph(_mm512_set1_epi64(0x3ff0000000000000));
         let r = _mm512_castph_pd(a);
         let e = _mm512_set1_pd(1.0);
@@ -16998,7 +17131,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_castph256_ph128() {
+    const fn test_mm256_castph256_ph128() {
         let a = _mm256_setr_ph(
             1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
         );
@@ -17008,7 +17141,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm512_castph512_ph128() {
+    const fn test_mm512_castph512_ph128() {
         let a = _mm512_setr_ph(
             1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19.,
             20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
@@ -17019,7 +17152,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm512_castph512_ph256() {
+    const fn test_mm512_castph512_ph256() {
         let a = _mm512_setr_ph(
             1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19.,
             20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
@@ -17032,21 +17165,21 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_castph128_ph256() {
+    const fn test_mm256_castph128_ph256() {
         let a = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm256_castph128_ph256(a);
         assert_eq_m128h(_mm256_castph256_ph128(r), a);
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm512_castph128_ph512() {
+    const fn test_mm512_castph128_ph512() {
         let a = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm512_castph128_ph512(a);
         assert_eq_m128h(_mm512_castph512_ph128(r), a);
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm512_castph256_ph512() {
+    const fn test_mm512_castph256_ph512() {
         let a = _mm256_setr_ph(
             1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
         );
@@ -17055,7 +17188,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_zextph128_ph256() {
+    const fn test_mm256_zextph128_ph256() {
         let a = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm256_zextph128_ph256(a);
         let e = _mm256_setr_ph(
@@ -17065,7 +17198,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_zextph128_ph512() {
+    const fn test_mm512_zextph128_ph512() {
         let a = _mm_setr_ph(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm512_zextph128_ph512(a);
         let e = _mm512_setr_ph(
@@ -17076,7 +17209,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_zextph256_ph512() {
+    const fn test_mm512_zextph256_ph512() {
         let a = _mm256_setr_ph(
             1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
         );
@@ -17089,7 +17222,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_cmp_ph_mask() {
+    fn test_mm_cmp_ph_mask() {
         let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let b = _mm_set_ph(1.0, 2.0, 3.0, 4.0, -5.0, -6.0, -7.0, -8.0);
         let r = _mm_cmp_ph_mask::<_CMP_EQ_OQ>(a, b);
@@ -17097,7 +17230,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_cmp_ph_mask() {
+    fn test_mm_mask_cmp_ph_mask() {
         let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let b = _mm_set_ph(1.0, 2.0, 3.0, 4.0, -5.0, -6.0, -7.0, -8.0);
         let r = _mm_mask_cmp_ph_mask::<_CMP_EQ_OQ>(0b01010101, a, b);
@@ -17105,7 +17238,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_cmp_ph_mask() {
+    fn test_mm256_cmp_ph_mask() {
         let a = _mm256_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
         );
@@ -17118,7 +17251,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_mask_cmp_ph_mask() {
+    fn test_mm256_mask_cmp_ph_mask() {
         let a = _mm256_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
         );
@@ -17131,7 +17264,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_cmp_ph_mask() {
+    fn test_mm512_cmp_ph_mask() {
         let a = _mm512_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
             17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
@@ -17147,7 +17280,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask_cmp_ph_mask() {
+    fn test_mm512_mask_cmp_ph_mask() {
         let a = _mm512_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
             17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
@@ -17163,7 +17296,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_cmp_round_ph_mask() {
+    fn test_mm512_cmp_round_ph_mask() {
         let a = _mm512_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
             17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
@@ -17179,7 +17312,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask_cmp_round_ph_mask() {
+    fn test_mm512_mask_cmp_round_ph_mask() {
         let a = _mm512_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
             17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
@@ -17199,7 +17332,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm_cmp_round_sh_mask() {
+    fn test_mm_cmp_round_sh_mask() {
         let a = _mm_set_sh(1.0);
         let b = _mm_set_sh(1.0);
         let r = _mm_cmp_round_sh_mask::<_CMP_EQ_OQ, _MM_FROUND_NO_EXC>(a, b);
@@ -17207,7 +17340,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm_mask_cmp_round_sh_mask() {
+    fn test_mm_mask_cmp_round_sh_mask() {
         let a = _mm_set_sh(1.0);
         let b = _mm_set_sh(1.0);
         let r = _mm_mask_cmp_round_sh_mask::<_CMP_EQ_OQ, _MM_FROUND_NO_EXC>(0, a, b);
@@ -17215,7 +17348,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm_cmp_sh_mask() {
+    fn test_mm_cmp_sh_mask() {
         let a = _mm_set_sh(1.0);
         let b = _mm_set_sh(1.0);
         let r = _mm_cmp_sh_mask::<_CMP_EQ_OQ>(a, b);
@@ -17223,7 +17356,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm_mask_cmp_sh_mask() {
+    fn test_mm_mask_cmp_sh_mask() {
         let a = _mm_set_sh(1.0);
         let b = _mm_set_sh(1.0);
         let r = _mm_mask_cmp_sh_mask::<_CMP_EQ_OQ>(0, a, b);
@@ -17231,7 +17364,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm_comi_round_sh() {
+    fn test_mm_comi_round_sh() {
         let a = _mm_set_sh(1.0);
         let b = _mm_set_sh(1.0);
         let r = _mm_comi_round_sh::<_CMP_EQ_OQ, _MM_FROUND_NO_EXC>(a, b);
@@ -17239,7 +17372,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm_comi_sh() {
+    fn test_mm_comi_sh() {
         let a = _mm_set_sh(1.0);
         let b = _mm_set_sh(1.0);
         let r = _mm_comi_sh::<_CMP_EQ_OQ>(a, b);
@@ -17247,7 +17380,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm_comieq_sh() {
+    fn test_mm_comieq_sh() {
         let a = _mm_set_sh(1.0);
         let b = _mm_set_sh(1.0);
         let r = _mm_comieq_sh(a, b);
@@ -17255,7 +17388,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm_comige_sh() {
+    fn test_mm_comige_sh() {
         let a = _mm_set_sh(2.0);
         let b = _mm_set_sh(1.0);
         let r = _mm_comige_sh(a, b);
@@ -17263,7 +17396,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm_comigt_sh() {
+    fn test_mm_comigt_sh() {
         let a = _mm_set_sh(2.0);
         let b = _mm_set_sh(1.0);
         let r = _mm_comigt_sh(a, b);
@@ -17271,7 +17404,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm_comile_sh() {
+    fn test_mm_comile_sh() {
         let a = _mm_set_sh(1.0);
         let b = _mm_set_sh(2.0);
         let r = _mm_comile_sh(a, b);
@@ -17279,7 +17412,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm_comilt_sh() {
+    fn test_mm_comilt_sh() {
         let a = _mm_set_sh(1.0);
         let b = _mm_set_sh(2.0);
         let r = _mm_comilt_sh(a, b);
@@ -17287,7 +17420,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm_comineq_sh() {
+    fn test_mm_comineq_sh() {
         let a = _mm_set_sh(1.0);
         let b = _mm_set_sh(2.0);
         let r = _mm_comineq_sh(a, b);
@@ -17295,7 +17428,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm_ucomieq_sh() {
+    fn test_mm_ucomieq_sh() {
         let a = _mm_set_sh(1.0);
         let b = _mm_set_sh(1.0);
         let r = _mm_ucomieq_sh(a, b);
@@ -17303,7 +17436,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm_ucomige_sh() {
+    fn test_mm_ucomige_sh() {
         let a = _mm_set_sh(2.0);
         let b = _mm_set_sh(1.0);
         let r = _mm_ucomige_sh(a, b);
@@ -17311,7 +17444,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm_ucomigt_sh() {
+    fn test_mm_ucomigt_sh() {
         let a = _mm_set_sh(2.0);
         let b = _mm_set_sh(1.0);
         let r = _mm_ucomigt_sh(a, b);
@@ -17319,7 +17452,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm_ucomile_sh() {
+    fn test_mm_ucomile_sh() {
         let a = _mm_set_sh(1.0);
         let b = _mm_set_sh(2.0);
         let r = _mm_ucomile_sh(a, b);
@@ -17327,7 +17460,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm_ucomilt_sh() {
+    fn test_mm_ucomilt_sh() {
         let a = _mm_set_sh(1.0);
         let b = _mm_set_sh(2.0);
         let r = _mm_ucomilt_sh(a, b);
@@ -17335,7 +17468,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm_ucomineq_sh() {
+    fn test_mm_ucomineq_sh() {
         let a = _mm_set_sh(1.0);
         let b = _mm_set_sh(2.0);
         let r = _mm_ucomineq_sh(a, b);
@@ -17343,72 +17476,72 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_load_ph() {
+    const fn test_mm_load_ph() {
         let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
-        let b = _mm_load_ph(addr_of!(a).cast());
+        let b = unsafe { _mm_load_ph(addr_of!(a).cast()) };
         assert_eq_m128h(a, b);
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_load_ph() {
+    const fn test_mm256_load_ph() {
         let a = _mm256_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
         );
-        let b = _mm256_load_ph(addr_of!(a).cast());
+        let b = unsafe { _mm256_load_ph(addr_of!(a).cast()) };
         assert_eq_m256h(a, b);
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_load_ph() {
+    const fn test_mm512_load_ph() {
         let a = _mm512_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
             17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
             31.0, 32.0,
         );
-        let b = _mm512_load_ph(addr_of!(a).cast());
+        let b = unsafe { _mm512_load_ph(addr_of!(a).cast()) };
         assert_eq_m512h(a, b);
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_load_sh() {
+    const fn test_mm_load_sh() {
         let a = _mm_set_sh(1.0);
-        let b = _mm_load_sh(addr_of!(a).cast());
+        let b = unsafe { _mm_load_sh(addr_of!(a).cast()) };
         assert_eq_m128h(a, b);
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_load_sh() {
+    fn test_mm_mask_load_sh() {
         let a = _mm_set_sh(1.0);
         let src = _mm_set_sh(2.);
-        let b = _mm_mask_load_sh(src, 1, addr_of!(a).cast());
+        let b = unsafe { _mm_mask_load_sh(src, 1, addr_of!(a).cast()) };
         assert_eq_m128h(a, b);
-        let b = _mm_mask_load_sh(src, 0, addr_of!(a).cast());
+        let b = unsafe { _mm_mask_load_sh(src, 0, addr_of!(a).cast()) };
         assert_eq_m128h(src, b);
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_load_sh() {
+    fn test_mm_maskz_load_sh() {
         let a = _mm_set_sh(1.0);
-        let b = _mm_maskz_load_sh(1, addr_of!(a).cast());
+        let b = unsafe { _mm_maskz_load_sh(1, addr_of!(a).cast()) };
         assert_eq_m128h(a, b);
-        let b = _mm_maskz_load_sh(0, addr_of!(a).cast());
+        let b = unsafe { _mm_maskz_load_sh(0, addr_of!(a).cast()) };
         assert_eq_m128h(_mm_setzero_ph(), b);
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_loadu_ph() {
+    const fn test_mm_loadu_ph() {
         let array = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
-        let r = _mm_loadu_ph(array.as_ptr());
+        let r = unsafe { _mm_loadu_ph(array.as_ptr()) };
         let e = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         assert_eq_m128h(r, e);
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_loadu_ph() {
+    const fn test_mm256_loadu_ph() {
         let array = [
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
         ];
-        let r = _mm256_loadu_ph(array.as_ptr());
+        let r = unsafe { _mm256_loadu_ph(array.as_ptr()) };
         let e = _mm256_setr_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
         );
@@ -17416,13 +17549,13 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_loadu_ph() {
+    const fn test_mm512_loadu_ph() {
         let array = [
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
             17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
             31.0, 32.0,
         ];
-        let r = _mm512_loadu_ph(array.as_ptr());
+        let r = unsafe { _mm512_loadu_ph(array.as_ptr()) };
         let e = _mm512_setr_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
             17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
@@ -17432,7 +17565,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_move_sh() {
+    const fn test_mm_move_sh() {
         let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let b = _mm_set_sh(9.0);
         let r = _mm_move_sh(a, b);
@@ -17441,7 +17574,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_move_sh() {
+    const fn test_mm_mask_move_sh() {
         let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let b = _mm_set_sh(9.0);
         let src = _mm_set_sh(10.0);
@@ -17451,7 +17584,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_move_sh() {
+    const fn test_mm_maskz_move_sh() {
         let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let b = _mm_set_sh(9.0);
         let r = _mm_maskz_move_sh(0, a, b);
@@ -17460,85 +17593,103 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_store_ph() {
+    const fn test_mm_store_ph() {
         let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let mut b = _mm_setzero_ph();
-        _mm_store_ph(addr_of_mut!(b).cast(), a);
+        unsafe {
+            _mm_store_ph(addr_of_mut!(b).cast(), a);
+        }
         assert_eq_m128h(a, b);
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_store_ph() {
+    const fn test_mm256_store_ph() {
         let a = _mm256_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
         );
         let mut b = _mm256_setzero_ph();
-        _mm256_store_ph(addr_of_mut!(b).cast(), a);
+        unsafe {
+            _mm256_store_ph(addr_of_mut!(b).cast(), a);
+        }
         assert_eq_m256h(a, b);
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_store_ph() {
+    const fn test_mm512_store_ph() {
         let a = _mm512_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
             17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
             31.0, 32.0,
         );
         let mut b = _mm512_setzero_ph();
-        _mm512_store_ph(addr_of_mut!(b).cast(), a);
+        unsafe {
+            _mm512_store_ph(addr_of_mut!(b).cast(), a);
+        }
         assert_eq_m512h(a, b);
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_store_sh() {
+    const fn test_mm_store_sh() {
         let a = _mm_set_sh(1.0);
         let mut b = _mm_setzero_ph();
-        _mm_store_sh(addr_of_mut!(b).cast(), a);
+        unsafe {
+            _mm_store_sh(addr_of_mut!(b).cast(), a);
+        }
         assert_eq_m128h(a, b);
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_store_sh() {
+    fn test_mm_mask_store_sh() {
         let a = _mm_set_sh(1.0);
         let mut b = _mm_setzero_ph();
-        _mm_mask_store_sh(addr_of_mut!(b).cast(), 0, a);
+        unsafe {
+            _mm_mask_store_sh(addr_of_mut!(b).cast(), 0, a);
+        }
         assert_eq_m128h(_mm_setzero_ph(), b);
-        _mm_mask_store_sh(addr_of_mut!(b).cast(), 1, a);
+        unsafe {
+            _mm_mask_store_sh(addr_of_mut!(b).cast(), 1, a);
+        }
         assert_eq_m128h(a, b);
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_storeu_ph() {
+    const fn test_mm_storeu_ph() {
         let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let mut array = [0.0; 8];
-        _mm_storeu_ph(array.as_mut_ptr(), a);
-        assert_eq_m128h(a, _mm_loadu_ph(array.as_ptr()));
+        unsafe {
+            _mm_storeu_ph(array.as_mut_ptr(), a);
+        }
+        assert_eq_m128h(a, unsafe { _mm_loadu_ph(array.as_ptr()) });
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_storeu_ph() {
+    const fn test_mm256_storeu_ph() {
         let a = _mm256_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
         );
         let mut array = [0.0; 16];
-        _mm256_storeu_ph(array.as_mut_ptr(), a);
-        assert_eq_m256h(a, _mm256_loadu_ph(array.as_ptr()));
+        unsafe {
+            _mm256_storeu_ph(array.as_mut_ptr(), a);
+        }
+        assert_eq_m256h(a, unsafe { _mm256_loadu_ph(array.as_ptr()) });
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_storeu_ph() {
+    const fn test_mm512_storeu_ph() {
         let a = _mm512_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
             17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
             31.0, 32.0,
         );
         let mut array = [0.0; 32];
-        _mm512_storeu_ph(array.as_mut_ptr(), a);
-        assert_eq_m512h(a, _mm512_loadu_ph(array.as_ptr()));
+        unsafe {
+            _mm512_storeu_ph(array.as_mut_ptr(), a);
+        }
+        assert_eq_m512h(a, unsafe { _mm512_loadu_ph(array.as_ptr()) });
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_add_ph() {
+    const fn test_mm_add_ph() {
         let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
         let r = _mm_add_ph(a, b);
@@ -17547,7 +17698,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_add_ph() {
+    const fn test_mm_mask_add_ph() {
         let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
         let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
@@ -17557,7 +17708,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_add_ph() {
+    const fn test_mm_maskz_add_ph() {
         let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
         let r = _mm_maskz_add_ph(0b01010101, a, b);
@@ -17566,7 +17717,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_add_ph() {
+    const fn test_mm256_add_ph() {
         let a = _mm256_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
         );
@@ -17579,7 +17730,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_mask_add_ph() {
+    const fn test_mm256_mask_add_ph() {
         let a = _mm256_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
         );
@@ -17597,7 +17748,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_maskz_add_ph() {
+    const fn test_mm256_maskz_add_ph() {
         let a = _mm256_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
         );
@@ -17612,7 +17763,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_add_ph() {
+    const fn test_mm512_add_ph() {
         let a = _mm512_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
             17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
@@ -17629,7 +17780,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask_add_ph() {
+    const fn test_mm512_mask_add_ph() {
         let a = _mm512_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
             17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
@@ -17653,7 +17804,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_maskz_add_ph() {
+    const fn test_mm512_maskz_add_ph() {
         let a = _mm512_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
             17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
@@ -17673,7 +17824,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_add_round_ph() {
+    fn test_mm512_add_round_ph() {
         let a = _mm512_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
             17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
@@ -17690,7 +17841,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask_add_round_ph() {
+    fn test_mm512_mask_add_round_ph() {
         let a = _mm512_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
             17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
@@ -17719,7 +17870,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_maskz_add_round_ph() {
+    fn test_mm512_maskz_add_round_ph() {
         let a = _mm512_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
             17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
@@ -17743,7 +17894,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_add_round_sh() {
+    fn test_mm_add_round_sh() {
         let a = _mm_set_sh(1.0);
         let b = _mm_set_sh(2.0);
         let r = _mm_add_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
@@ -17752,7 +17903,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_add_round_sh() {
+    fn test_mm_mask_add_round_sh() {
         let a = _mm_set_sh(1.0);
         let b = _mm_set_sh(2.0);
         let src = _mm_set_sh(4.0);
@@ -17769,7 +17920,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_add_round_sh() {
+    fn test_mm_maskz_add_round_sh() {
         let a = _mm_set_sh(1.0);
         let b = _mm_set_sh(2.0);
         let r =
@@ -17783,7 +17934,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_add_sh() {
+    const fn test_mm_add_sh() {
         let a = _mm_set_sh(1.0);
         let b = _mm_set_sh(2.0);
         let r = _mm_add_sh(a, b);
@@ -17792,7 +17943,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_add_sh() {
+    const fn test_mm_mask_add_sh() {
         let a = _mm_set_sh(1.0);
         let b = _mm_set_sh(2.0);
         let src = _mm_set_sh(4.0);
@@ -17805,7 +17956,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_add_sh() {
+    const fn test_mm_maskz_add_sh() {
         let a = _mm_set_sh(1.0);
         let b = _mm_set_sh(2.0);
         let r = _mm_maskz_add_sh(0, a, b);
@@ -17817,7 +17968,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_sub_ph() {
+    const fn test_mm_sub_ph() {
         let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
         let r = _mm_sub_ph(a, b);
@@ -17826,7 +17977,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_sub_ph() {
+    const fn test_mm_mask_sub_ph() {
         let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
         let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
@@ -17836,7 +17987,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_sub_ph() {
+    const fn test_mm_maskz_sub_ph() {
         let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
         let r = _mm_maskz_sub_ph(0b01010101, a, b);
@@ -17845,7 +17996,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_sub_ph() {
+    const fn test_mm256_sub_ph() {
         let a = _mm256_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
         );
@@ -17861,7 +18012,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_mask_sub_ph() {
+    const fn test_mm256_mask_sub_ph() {
         let a = _mm256_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
         );
@@ -17879,7 +18030,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_maskz_sub_ph() {
+    const fn test_mm256_maskz_sub_ph() {
         let a = _mm256_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
         );
@@ -17894,7 +18045,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_sub_ph() {
+    const fn test_mm512_sub_ph() {
         let a = _mm512_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
             17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
@@ -17915,7 +18066,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask_sub_ph() {
+    const fn test_mm512_mask_sub_ph() {
         let a = _mm512_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
             17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
@@ -17939,7 +18090,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_maskz_sub_ph() {
+    const fn test_mm512_maskz_sub_ph() {
         let a = _mm512_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
             17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
@@ -17959,7 +18110,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_sub_round_ph() {
+    fn test_mm512_sub_round_ph() {
         let a = _mm512_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
             17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
@@ -17980,7 +18131,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask_sub_round_ph() {
+    fn test_mm512_mask_sub_round_ph() {
         let a = _mm512_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
             17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
@@ -18009,7 +18160,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_maskz_sub_round_ph() {
+    fn test_mm512_maskz_sub_round_ph() {
         let a = _mm512_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
             17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
@@ -18033,7 +18184,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_sub_round_sh() {
+    fn test_mm_sub_round_sh() {
         let a = _mm_set_sh(1.0);
         let b = _mm_set_sh(2.0);
         let r = _mm_sub_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
@@ -18042,7 +18193,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_sub_round_sh() {
+    fn test_mm_mask_sub_round_sh() {
         let a = _mm_set_sh(1.0);
         let b = _mm_set_sh(2.0);
         let src = _mm_set_sh(4.0);
@@ -18059,7 +18210,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_sub_round_sh() {
+    fn test_mm_maskz_sub_round_sh() {
         let a = _mm_set_sh(1.0);
         let b = _mm_set_sh(2.0);
         let r =
@@ -18073,7 +18224,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_sub_sh() {
+    const fn test_mm_sub_sh() {
         let a = _mm_set_sh(1.0);
         let b = _mm_set_sh(2.0);
         let r = _mm_sub_sh(a, b);
@@ -18082,7 +18233,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_sub_sh() {
+    const fn test_mm_mask_sub_sh() {
         let a = _mm_set_sh(1.0);
         let b = _mm_set_sh(2.0);
         let src = _mm_set_sh(4.0);
@@ -18095,7 +18246,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_sub_sh() {
+    const fn test_mm_maskz_sub_sh() {
         let a = _mm_set_sh(1.0);
         let b = _mm_set_sh(2.0);
         let r = _mm_maskz_sub_sh(0, a, b);
@@ -18107,7 +18258,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mul_ph() {
+    const fn test_mm_mul_ph() {
         let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
         let r = _mm_mul_ph(a, b);
@@ -18116,7 +18267,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_mul_ph() {
+    const fn test_mm_mask_mul_ph() {
         let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
         let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
@@ -18126,7 +18277,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_mul_ph() {
+    const fn test_mm_maskz_mul_ph() {
         let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let b = _mm_set_ph(8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0);
         let r = _mm_maskz_mul_ph(0b01010101, a, b);
@@ -18135,7 +18286,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_mul_ph() {
+    const fn test_mm256_mul_ph() {
         let a = _mm256_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
         );
@@ -18151,7 +18302,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_mask_mul_ph() {
+    const fn test_mm256_mask_mul_ph() {
         let a = _mm256_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
         );
@@ -18169,7 +18320,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_maskz_mul_ph() {
+    const fn test_mm256_maskz_mul_ph() {
         let a = _mm256_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
         );
@@ -18184,7 +18335,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mul_ph() {
+    const fn test_mm512_mul_ph() {
         let a = _mm512_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
             17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
@@ -18205,7 +18356,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask_mul_ph() {
+    const fn test_mm512_mask_mul_ph() {
         let a = _mm512_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
             17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
@@ -18229,7 +18380,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_maskz_mul_ph() {
+    const fn test_mm512_maskz_mul_ph() {
         let a = _mm512_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
             17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
@@ -18249,7 +18400,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mul_round_ph() {
+    fn test_mm512_mul_round_ph() {
         let a = _mm512_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
             17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
@@ -18270,7 +18421,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask_mul_round_ph() {
+    fn test_mm512_mask_mul_round_ph() {
         let a = _mm512_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
             17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
@@ -18299,7 +18450,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_maskz_mul_round_ph() {
+    fn test_mm512_maskz_mul_round_ph() {
         let a = _mm512_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
             17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
@@ -18323,7 +18474,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mul_round_sh() {
+    fn test_mm_mul_round_sh() {
         let a = _mm_set_sh(1.0);
         let b = _mm_set_sh(2.0);
         let r = _mm_mul_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
@@ -18332,7 +18483,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_mul_round_sh() {
+    fn test_mm_mask_mul_round_sh() {
         let a = _mm_set_sh(1.0);
         let b = _mm_set_sh(2.0);
         let src = _mm_set_sh(4.0);
@@ -18349,7 +18500,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_mul_round_sh() {
+    fn test_mm_maskz_mul_round_sh() {
         let a = _mm_set_sh(1.0);
         let b = _mm_set_sh(2.0);
         let r =
@@ -18363,7 +18514,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mul_sh() {
+    const fn test_mm_mul_sh() {
         let a = _mm_set_sh(1.0);
         let b = _mm_set_sh(2.0);
         let r = _mm_mul_sh(a, b);
@@ -18372,7 +18523,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_mul_sh() {
+    const fn test_mm_mask_mul_sh() {
         let a = _mm_set_sh(1.0);
         let b = _mm_set_sh(2.0);
         let src = _mm_set_sh(4.0);
@@ -18385,7 +18536,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_mul_sh() {
+    const fn test_mm_maskz_mul_sh() {
         let a = _mm_set_sh(1.0);
         let b = _mm_set_sh(2.0);
         let r = _mm_maskz_mul_sh(0, a, b);
@@ -18397,7 +18548,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_div_ph() {
+    const fn test_mm_div_ph() {
         let a = _mm_set1_ph(1.0);
         let b = _mm_set1_ph(2.0);
         let r = _mm_div_ph(a, b);
@@ -18406,7 +18557,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_div_ph() {
+    const fn test_mm_mask_div_ph() {
         let a = _mm_set1_ph(1.0);
         let b = _mm_set1_ph(2.0);
         let src = _mm_set_ph(4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0);
@@ -18416,7 +18567,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_div_ph() {
+    const fn test_mm_maskz_div_ph() {
         let a = _mm_set1_ph(1.0);
         let b = _mm_set1_ph(2.0);
         let r = _mm_maskz_div_ph(0b01010101, a, b);
@@ -18425,7 +18576,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_div_ph() {
+    const fn test_mm256_div_ph() {
         let a = _mm256_set1_ph(1.0);
         let b = _mm256_set1_ph(2.0);
         let r = _mm256_div_ph(a, b);
@@ -18434,7 +18585,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_mask_div_ph() {
+    const fn test_mm256_mask_div_ph() {
         let a = _mm256_set1_ph(1.0);
         let b = _mm256_set1_ph(2.0);
         let src = _mm256_set_ph(
@@ -18449,7 +18600,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_maskz_div_ph() {
+    const fn test_mm256_maskz_div_ph() {
         let a = _mm256_set1_ph(1.0);
         let b = _mm256_set1_ph(2.0);
         let r = _mm256_maskz_div_ph(0b0101010101010101, a, b);
@@ -18460,7 +18611,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_div_ph() {
+    const fn test_mm512_div_ph() {
         let a = _mm512_set1_ph(1.0);
         let b = _mm512_set1_ph(2.0);
         let r = _mm512_div_ph(a, b);
@@ -18469,7 +18620,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask_div_ph() {
+    const fn test_mm512_mask_div_ph() {
         let a = _mm512_set1_ph(1.0);
         let b = _mm512_set1_ph(2.0);
         let src = _mm512_set_ph(
@@ -18486,7 +18637,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_maskz_div_ph() {
+    const fn test_mm512_maskz_div_ph() {
         let a = _mm512_set1_ph(1.0);
         let b = _mm512_set1_ph(2.0);
         let r = _mm512_maskz_div_ph(0b01010101010101010101010101010101, a, b);
@@ -18498,7 +18649,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_div_round_ph() {
+    fn test_mm512_div_round_ph() {
         let a = _mm512_set1_ph(1.0);
         let b = _mm512_set1_ph(2.0);
         let r = _mm512_div_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
@@ -18507,7 +18658,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask_div_round_ph() {
+    fn test_mm512_mask_div_round_ph() {
         let a = _mm512_set1_ph(1.0);
         let b = _mm512_set1_ph(2.0);
         let src = _mm512_set_ph(
@@ -18529,7 +18680,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_maskz_div_round_ph() {
+    fn test_mm512_maskz_div_round_ph() {
         let a = _mm512_set1_ph(1.0);
         let b = _mm512_set1_ph(2.0);
         let r = _mm512_maskz_div_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
@@ -18545,7 +18696,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_div_round_sh() {
+    fn test_mm_div_round_sh() {
         let a = _mm_set_sh(1.0);
         let b = _mm_set_sh(2.0);
         let r = _mm_div_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
@@ -18554,7 +18705,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_div_round_sh() {
+    fn test_mm_mask_div_round_sh() {
         let a = _mm_set_sh(1.0);
         let b = _mm_set_sh(2.0);
         let src = _mm_set_sh(4.0);
@@ -18571,7 +18722,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_div_round_sh() {
+    fn test_mm_maskz_div_round_sh() {
         let a = _mm_set_sh(1.0);
         let b = _mm_set_sh(2.0);
         let r =
@@ -18585,7 +18736,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_div_sh() {
+    const fn test_mm_div_sh() {
         let a = _mm_set_sh(1.0);
         let b = _mm_set_sh(2.0);
         let r = _mm_div_sh(a, b);
@@ -18594,7 +18745,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_div_sh() {
+    const fn test_mm_mask_div_sh() {
         let a = _mm_set_sh(1.0);
         let b = _mm_set_sh(2.0);
         let src = _mm_set_sh(4.0);
@@ -18607,7 +18758,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_div_sh() {
+    const fn test_mm_maskz_div_sh() {
         let a = _mm_set_sh(1.0);
         let b = _mm_set_sh(2.0);
         let r = _mm_maskz_div_sh(0, a, b);
@@ -18619,7 +18770,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mul_pch() {
+    fn test_mm_mul_pch() {
         let a = _mm_set1_pch(0.0, 1.0);
         let b = _mm_set1_pch(0.0, 1.0);
         let r = _mm_mul_pch(a, b);
@@ -18628,7 +18779,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_mul_pch() {
+    fn test_mm_mask_mul_pch() {
         let a = _mm_set1_pch(0.0, 1.0);
         let b = _mm_set1_pch(0.0, 1.0);
         let src = _mm_setr_ph(2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0);
@@ -18638,7 +18789,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_mul_pch() {
+    fn test_mm_maskz_mul_pch() {
         let a = _mm_set1_pch(0.0, 1.0);
         let b = _mm_set1_pch(0.0, 1.0);
         let r = _mm_maskz_mul_pch(0b0101, a, b);
@@ -18647,7 +18798,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_mul_pch() {
+    fn test_mm256_mul_pch() {
         let a = _mm256_set1_pch(0.0, 1.0);
         let b = _mm256_set1_pch(0.0, 1.0);
         let r = _mm256_mul_pch(a, b);
@@ -18656,7 +18807,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_mask_mul_pch() {
+    fn test_mm256_mask_mul_pch() {
         let a = _mm256_set1_pch(0.0, 1.0);
         let b = _mm256_set1_pch(0.0, 1.0);
         let src = _mm256_setr_ph(
@@ -18670,7 +18821,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_maskz_mul_pch() {
+    fn test_mm256_maskz_mul_pch() {
         let a = _mm256_set1_pch(0.0, 1.0);
         let b = _mm256_set1_pch(0.0, 1.0);
         let r = _mm256_maskz_mul_pch(0b01010101, a, b);
@@ -18681,7 +18832,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mul_pch() {
+    fn test_mm512_mul_pch() {
         let a = _mm512_set1_pch(0.0, 1.0);
         let b = _mm512_set1_pch(0.0, 1.0);
         let r = _mm512_mul_pch(a, b);
@@ -18690,7 +18841,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask_mul_pch() {
+    fn test_mm512_mask_mul_pch() {
         let a = _mm512_set1_pch(0.0, 1.0);
         let b = _mm512_set1_pch(0.0, 1.0);
         let src = _mm512_setr_ph(
@@ -18708,7 +18859,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_maskz_mul_pch() {
+    fn test_mm512_maskz_mul_pch() {
         let a = _mm512_set1_pch(0.0, 1.0);
         let b = _mm512_set1_pch(0.0, 1.0);
         let r = _mm512_maskz_mul_pch(0b0101010101010101, a, b);
@@ -18720,7 +18871,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mul_round_pch() {
+    fn test_mm512_mul_round_pch() {
         let a = _mm512_set1_pch(0.0, 1.0);
         let b = _mm512_set1_pch(0.0, 1.0);
         let r = _mm512_mul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
@@ -18729,7 +18880,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask_mul_round_pch() {
+    fn test_mm512_mask_mul_round_pch() {
         let a = _mm512_set1_pch(0.0, 1.0);
         let b = _mm512_set1_pch(0.0, 1.0);
         let src = _mm512_setr_ph(
@@ -18752,7 +18903,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_maskz_mul_round_pch() {
+    fn test_mm512_maskz_mul_round_pch() {
         let a = _mm512_set1_pch(0.0, 1.0);
         let b = _mm512_set1_pch(0.0, 1.0);
         let r = _mm512_maskz_mul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
@@ -18768,7 +18919,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mul_round_sch() {
+    fn test_mm_mul_round_sch() {
         let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
         let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
         let r = _mm_mul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
@@ -18777,7 +18928,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_mul_round_sch() {
+    fn test_mm_mask_mul_round_sch() {
         let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
         let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
         let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
@@ -18789,7 +18940,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_mul_round_sch() {
+    fn test_mm_maskz_mul_round_sch() {
         let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
         let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
         let r =
@@ -18799,7 +18950,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mul_sch() {
+    fn test_mm_mul_sch() {
         let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
         let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
         let r = _mm_mul_sch(a, b);
@@ -18808,7 +18959,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_mul_sch() {
+    fn test_mm_mask_mul_sch() {
         let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
         let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
         let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
@@ -18818,7 +18969,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_mul_sch() {
+    fn test_mm_maskz_mul_sch() {
         let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
         let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
         let r = _mm_maskz_mul_sch(0, a, b);
@@ -18827,7 +18978,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_fmul_pch() {
+    fn test_mm_fmul_pch() {
         let a = _mm_set1_pch(0.0, 1.0);
         let b = _mm_set1_pch(0.0, 1.0);
         let r = _mm_fmul_pch(a, b);
@@ -18836,7 +18987,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_fmul_pch() {
+    fn test_mm_mask_fmul_pch() {
         let a = _mm_set1_pch(0.0, 1.0);
         let b = _mm_set1_pch(0.0, 1.0);
         let src = _mm_setr_ph(2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0);
@@ -18846,7 +18997,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_fmul_pch() {
+    fn test_mm_maskz_fmul_pch() {
         let a = _mm_set1_pch(0.0, 1.0);
         let b = _mm_set1_pch(0.0, 1.0);
         let r = _mm_maskz_fmul_pch(0b0101, a, b);
@@ -18855,7 +19006,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_fmul_pch() {
+    fn test_mm256_fmul_pch() {
         let a = _mm256_set1_pch(0.0, 1.0);
         let b = _mm256_set1_pch(0.0, 1.0);
         let r = _mm256_fmul_pch(a, b);
@@ -18864,7 +19015,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_mask_fmul_pch() {
+    fn test_mm256_mask_fmul_pch() {
         let a = _mm256_set1_pch(0.0, 1.0);
         let b = _mm256_set1_pch(0.0, 1.0);
         let src = _mm256_setr_ph(
@@ -18878,7 +19029,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_maskz_fmul_pch() {
+    fn test_mm256_maskz_fmul_pch() {
         let a = _mm256_set1_pch(0.0, 1.0);
         let b = _mm256_set1_pch(0.0, 1.0);
         let r = _mm256_maskz_fmul_pch(0b01010101, a, b);
@@ -18889,7 +19040,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_fmul_pch() {
+    fn test_mm512_fmul_pch() {
         let a = _mm512_set1_pch(0.0, 1.0);
         let b = _mm512_set1_pch(0.0, 1.0);
         let r = _mm512_fmul_pch(a, b);
@@ -18898,7 +19049,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask_fmul_pch() {
+    fn test_mm512_mask_fmul_pch() {
         let a = _mm512_set1_pch(0.0, 1.0);
         let b = _mm512_set1_pch(0.0, 1.0);
         let src = _mm512_setr_ph(
@@ -18916,7 +19067,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_maskz_fmul_pch() {
+    fn test_mm512_maskz_fmul_pch() {
         let a = _mm512_set1_pch(0.0, 1.0);
         let b = _mm512_set1_pch(0.0, 1.0);
         let r = _mm512_maskz_fmul_pch(0b0101010101010101, a, b);
@@ -18928,7 +19079,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_fmul_round_pch() {
+    fn test_mm512_fmul_round_pch() {
         let a = _mm512_set1_pch(0.0, 1.0);
         let b = _mm512_set1_pch(0.0, 1.0);
         let r = _mm512_fmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
@@ -18937,7 +19088,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask_fmul_round_pch() {
+    fn test_mm512_mask_fmul_round_pch() {
         let a = _mm512_set1_pch(0.0, 1.0);
         let b = _mm512_set1_pch(0.0, 1.0);
         let src = _mm512_setr_ph(
@@ -18960,7 +19111,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_maskz_fmul_round_pch() {
+    fn test_mm512_maskz_fmul_round_pch() {
         let a = _mm512_set1_pch(0.0, 1.0);
         let b = _mm512_set1_pch(0.0, 1.0);
         let r = _mm512_maskz_fmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
@@ -18976,7 +19127,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_fmul_round_sch() {
+    fn test_mm_fmul_round_sch() {
         let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
         let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
         let r = _mm_fmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
@@ -18985,7 +19136,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_fmul_round_sch() {
+    fn test_mm_mask_fmul_round_sch() {
         let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
         let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
         let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
@@ -18997,7 +19148,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_fmul_round_sch() {
+    fn test_mm_maskz_fmul_round_sch() {
         let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
         let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
         let r =
@@ -19007,7 +19158,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_fmul_sch() {
+    fn test_mm_fmul_sch() {
         let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
         let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
         let r = _mm_fmul_sch(a, b);
@@ -19016,7 +19167,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_fmul_sch() {
+    fn test_mm_mask_fmul_sch() {
         let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
         let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
         let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
@@ -19026,7 +19177,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_fmul_sch() {
+    fn test_mm_maskz_fmul_sch() {
         let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
         let b = _mm_setr_ph(0.0, 1.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
         let r = _mm_maskz_fmul_sch(0, a, b);
@@ -19035,7 +19186,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_cmul_pch() {
+    fn test_mm_cmul_pch() {
         let a = _mm_set1_pch(0.0, 1.0);
         let b = _mm_set1_pch(0.0, -1.0);
         let r = _mm_cmul_pch(a, b);
@@ -19044,7 +19195,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_cmul_pch() {
+    fn test_mm_mask_cmul_pch() {
         let a = _mm_set1_pch(0.0, 1.0);
         let b = _mm_set1_pch(0.0, -1.0);
         let src = _mm_setr_ph(2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0);
@@ -19054,7 +19205,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_cmul_pch() {
+    fn test_mm_maskz_cmul_pch() {
         let a = _mm_set1_pch(0.0, 1.0);
         let b = _mm_set1_pch(0.0, -1.0);
         let r = _mm_maskz_cmul_pch(0b0101, a, b);
@@ -19063,7 +19214,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_cmul_pch() {
+    fn test_mm256_cmul_pch() {
         let a = _mm256_set1_pch(0.0, 1.0);
         let b = _mm256_set1_pch(0.0, -1.0);
         let r = _mm256_cmul_pch(a, b);
@@ -19072,7 +19223,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_mask_cmul_pch() {
+    fn test_mm256_mask_cmul_pch() {
         let a = _mm256_set1_pch(0.0, 1.0);
         let b = _mm256_set1_pch(0.0, -1.0);
         let src = _mm256_setr_ph(
@@ -19086,7 +19237,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_maskz_cmul_pch() {
+    fn test_mm256_maskz_cmul_pch() {
         let a = _mm256_set1_pch(0.0, 1.0);
         let b = _mm256_set1_pch(0.0, -1.0);
         let r = _mm256_maskz_cmul_pch(0b01010101, a, b);
@@ -19097,7 +19248,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_cmul_pch() {
+    fn test_mm512_cmul_pch() {
         let a = _mm512_set1_pch(0.0, 1.0);
         let b = _mm512_set1_pch(0.0, -1.0);
         let r = _mm512_cmul_pch(a, b);
@@ -19106,7 +19257,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask_cmul_pch() {
+    fn test_mm512_mask_cmul_pch() {
         let a = _mm512_set1_pch(0.0, 1.0);
         let b = _mm512_set1_pch(0.0, -1.0);
         let src = _mm512_setr_ph(
@@ -19124,7 +19275,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_maskz_cmul_pch() {
+    fn test_mm512_maskz_cmul_pch() {
         let a = _mm512_set1_pch(0.0, 1.0);
         let b = _mm512_set1_pch(0.0, -1.0);
         let r = _mm512_maskz_cmul_pch(0b0101010101010101, a, b);
@@ -19136,7 +19287,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_cmul_round_pch() {
+    fn test_mm512_cmul_round_pch() {
         let a = _mm512_set1_pch(0.0, 1.0);
         let b = _mm512_set1_pch(0.0, -1.0);
         let r = _mm512_cmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
@@ -19145,7 +19296,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask_cmul_round_pch() {
+    fn test_mm512_mask_cmul_round_pch() {
         let a = _mm512_set1_pch(0.0, 1.0);
         let b = _mm512_set1_pch(0.0, -1.0);
         let src = _mm512_setr_ph(
@@ -19168,7 +19319,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_maskz_cmul_round_pch() {
+    fn test_mm512_maskz_cmul_round_pch() {
         let a = _mm512_set1_pch(0.0, 1.0);
         let b = _mm512_set1_pch(0.0, -1.0);
         let r = _mm512_maskz_cmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
@@ -19184,7 +19335,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_cmul_sch() {
+    fn test_mm_cmul_sch() {
         let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
         let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
         let r = _mm_cmul_sch(a, b);
@@ -19193,7 +19344,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_cmul_sch() {
+    fn test_mm_mask_cmul_sch() {
         let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
         let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
         let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
@@ -19203,7 +19354,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_cmul_sch() {
+    fn test_mm_maskz_cmul_sch() {
         let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
         let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
         let r = _mm_maskz_cmul_sch(0, a, b);
@@ -19212,7 +19363,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_cmul_round_sch() {
+    fn test_mm_cmul_round_sch() {
         let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
         let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
         let r = _mm_cmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
@@ -19221,7 +19372,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_cmul_round_sch() {
+    fn test_mm_mask_cmul_round_sch() {
         let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
         let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
         let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
@@ -19233,7 +19384,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_cmul_round_sch() {
+    fn test_mm_maskz_cmul_round_sch() {
         let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
         let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
         let r =
@@ -19243,7 +19394,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_fcmul_pch() {
+    fn test_mm_fcmul_pch() {
         let a = _mm_set1_pch(0.0, 1.0);
         let b = _mm_set1_pch(0.0, -1.0);
         let r = _mm_fcmul_pch(a, b);
@@ -19252,7 +19403,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_fcmul_pch() {
+    fn test_mm_mask_fcmul_pch() {
         let a = _mm_set1_pch(0.0, 1.0);
         let b = _mm_set1_pch(0.0, -1.0);
         let src = _mm_setr_ph(2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0);
@@ -19262,7 +19413,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_fcmul_pch() {
+    fn test_mm_maskz_fcmul_pch() {
         let a = _mm_set1_pch(0.0, 1.0);
         let b = _mm_set1_pch(0.0, -1.0);
         let r = _mm_maskz_fcmul_pch(0b0101, a, b);
@@ -19271,7 +19422,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_fcmul_pch() {
+    fn test_mm256_fcmul_pch() {
         let a = _mm256_set1_pch(0.0, 1.0);
         let b = _mm256_set1_pch(0.0, -1.0);
         let r = _mm256_fcmul_pch(a, b);
@@ -19280,7 +19431,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_mask_fcmul_pch() {
+    fn test_mm256_mask_fcmul_pch() {
         let a = _mm256_set1_pch(0.0, 1.0);
         let b = _mm256_set1_pch(0.0, -1.0);
         let src = _mm256_setr_ph(
@@ -19294,7 +19445,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_maskz_fcmul_pch() {
+    fn test_mm256_maskz_fcmul_pch() {
         let a = _mm256_set1_pch(0.0, 1.0);
         let b = _mm256_set1_pch(0.0, -1.0);
         let r = _mm256_maskz_fcmul_pch(0b01010101, a, b);
@@ -19305,7 +19456,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_fcmul_pch() {
+    fn test_mm512_fcmul_pch() {
         let a = _mm512_set1_pch(0.0, 1.0);
         let b = _mm512_set1_pch(0.0, -1.0);
         let r = _mm512_fcmul_pch(a, b);
@@ -19314,7 +19465,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask_fcmul_pch() {
+    fn test_mm512_mask_fcmul_pch() {
         let a = _mm512_set1_pch(0.0, 1.0);
         let b = _mm512_set1_pch(0.0, -1.0);
         let src = _mm512_setr_ph(
@@ -19332,7 +19483,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_maskz_fcmul_pch() {
+    fn test_mm512_maskz_fcmul_pch() {
         let a = _mm512_set1_pch(0.0, 1.0);
         let b = _mm512_set1_pch(0.0, -1.0);
         let r = _mm512_maskz_fcmul_pch(0b0101010101010101, a, b);
@@ -19344,7 +19495,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_fcmul_round_pch() {
+    fn test_mm512_fcmul_round_pch() {
         let a = _mm512_set1_pch(0.0, 1.0);
         let b = _mm512_set1_pch(0.0, -1.0);
         let r = _mm512_fcmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
@@ -19353,7 +19504,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask_fcmul_round_pch() {
+    fn test_mm512_mask_fcmul_round_pch() {
         let a = _mm512_set1_pch(0.0, 1.0);
         let b = _mm512_set1_pch(0.0, -1.0);
         let src = _mm512_setr_ph(
@@ -19376,7 +19527,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_maskz_fcmul_round_pch() {
+    fn test_mm512_maskz_fcmul_round_pch() {
         let a = _mm512_set1_pch(0.0, 1.0);
         let b = _mm512_set1_pch(0.0, -1.0);
         let r = _mm512_maskz_fcmul_round_pch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
@@ -19392,7 +19543,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_fcmul_sch() {
+    fn test_mm_fcmul_sch() {
         let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
         let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
         let r = _mm_fcmul_sch(a, b);
@@ -19401,7 +19552,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_fcmul_sch() {
+    fn test_mm_mask_fcmul_sch() {
         let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
         let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
         let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
@@ -19411,7 +19562,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_fcmul_sch() {
+    fn test_mm_maskz_fcmul_sch() {
         let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
         let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
         let r = _mm_maskz_fcmul_sch(0, a, b);
@@ -19420,7 +19571,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_fcmul_round_sch() {
+    fn test_mm_fcmul_round_sch() {
         let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
         let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
         let r = _mm_fcmul_round_sch::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
@@ -19429,7 +19580,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_fcmul_round_sch() {
+    fn test_mm_mask_fcmul_round_sch() {
         let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
         let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
         let src = _mm_setr_ph(14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0);
@@ -19441,7 +19592,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_fcmul_round_sch() {
+    fn test_mm_maskz_fcmul_round_sch() {
         let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
         let b = _mm_setr_ph(0.0, -1.0, 8.0, -9.0, 10.0, -11.0, 12.0, -13.0);
         let r =
@@ -19451,7 +19602,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_abs_ph() {
+    const fn test_mm_abs_ph() {
         let a = _mm_set_ph(-1.0, 0.0, 1.0, -2.0, 3.0, -4.0, 5.0, -6.0);
         let r = _mm_abs_ph(a);
         let e = _mm_set_ph(1.0, 0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0);
@@ -19459,7 +19610,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_abs_ph() {
+    const fn test_mm256_abs_ph() {
         let a = _mm256_set_ph(
             -1.0, 0.0, 1.0, -2.0, 3.0, -4.0, 5.0, -6.0, 7.0, -8.0, 9.0, -10.0, 11.0, -12.0, 13.0,
             -14.0,
@@ -19472,7 +19623,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_abs_ph() {
+    const fn test_mm512_abs_ph() {
         let a = _mm512_set_ph(
             -1.0, 0.0, 1.0, -2.0, 3.0, -4.0, 5.0, -6.0, 7.0, -8.0, 9.0, -10.0, 11.0, -12.0, 13.0,
             -14.0, 15.0, -16.0, 17.0, -18.0, 19.0, -20.0, 21.0, -22.0, 23.0, -24.0, 25.0, -26.0,
@@ -19488,7 +19639,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_conj_pch() {
+    const fn test_mm_conj_pch() {
         let a = _mm_set1_pch(0.0, 1.0);
         let r = _mm_conj_pch(a);
         let e = _mm_set1_pch(0.0, -1.0);
@@ -19496,7 +19647,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_conj_pch() {
+    const fn test_mm_mask_conj_pch() {
         let a = _mm_set1_pch(0.0, 1.0);
         let src = _mm_setr_ph(2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0);
         let r = _mm_mask_conj_pch(src, 0b0101, a);
@@ -19505,7 +19656,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_conj_pch() {
+    const fn test_mm_maskz_conj_pch() {
         let a = _mm_set1_pch(0.0, 1.0);
         let r = _mm_maskz_conj_pch(0b0101, a);
         let e = _mm_setr_ph(0.0, -1.0, 0.0, 0.0, 0.0, -1.0, 0.0, 0.0);
@@ -19513,7 +19664,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_conj_pch() {
+    const fn test_mm256_conj_pch() {
         let a = _mm256_set1_pch(0.0, 1.0);
         let r = _mm256_conj_pch(a);
         let e = _mm256_set1_pch(0.0, -1.0);
@@ -19521,7 +19672,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_mask_conj_pch() {
+    const fn test_mm256_mask_conj_pch() {
         let a = _mm256_set1_pch(0.0, 1.0);
         let src = _mm256_setr_ph(
             2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
@@ -19534,7 +19685,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_maskz_conj_pch() {
+    const fn test_mm256_maskz_conj_pch() {
         let a = _mm256_set1_pch(0.0, 1.0);
         let r = _mm256_maskz_conj_pch(0b01010101, a);
         let e = _mm256_setr_ph(
@@ -19544,7 +19695,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_conj_pch() {
+    const fn test_mm512_conj_pch() {
         let a = _mm512_set1_pch(0.0, 1.0);
         let r = _mm512_conj_pch(a);
         let e = _mm512_set1_pch(0.0, -1.0);
@@ -19552,7 +19703,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask_conj_pch() {
+    const fn test_mm512_mask_conj_pch() {
         let a = _mm512_set1_pch(0.0, 1.0);
         let src = _mm512_setr_ph(
             2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0,
@@ -19569,7 +19720,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_maskz_conj_pch() {
+    const fn test_mm512_maskz_conj_pch() {
         let a = _mm512_set1_pch(0.0, 1.0);
         let r = _mm512_maskz_conj_pch(0b0101010101010101, a);
         let e = _mm512_setr_ph(
@@ -19580,7 +19731,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_fmadd_pch() {
+    fn test_mm_fmadd_pch() {
         let a = _mm_set1_pch(0.0, 1.0);
         let b = _mm_set1_pch(0.0, 2.0);
         let c = _mm_set1_pch(0.0, 3.0);
@@ -19590,7 +19741,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_fmadd_pch() {
+    fn test_mm_mask_fmadd_pch() {
         let a = _mm_set1_pch(0.0, 1.0);
         let b = _mm_set1_pch(0.0, 2.0);
         let c = _mm_set1_pch(0.0, 3.0);
@@ -19600,7 +19751,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask3_fmadd_pch() {
+    fn test_mm_mask3_fmadd_pch() {
         let a = _mm_set1_pch(0.0, 1.0);
         let b = _mm_set1_pch(0.0, 2.0);
         let c = _mm_set1_pch(0.0, 3.0);
@@ -19610,7 +19761,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_fmadd_pch() {
+    fn test_mm_maskz_fmadd_pch() {
         let a = _mm_set1_pch(0.0, 1.0);
         let b = _mm_set1_pch(0.0, 2.0);
         let c = _mm_set1_pch(0.0, 3.0);
@@ -19620,7 +19771,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_fmadd_pch() {
+    fn test_mm256_fmadd_pch() {
         let a = _mm256_set1_pch(0.0, 1.0);
         let b = _mm256_set1_pch(0.0, 2.0);
         let c = _mm256_set1_pch(0.0, 3.0);
@@ -19630,7 +19781,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_mask_fmadd_pch() {
+    fn test_mm256_mask_fmadd_pch() {
         let a = _mm256_set1_pch(0.0, 1.0);
         let b = _mm256_set1_pch(0.0, 2.0);
         let c = _mm256_set1_pch(0.0, 3.0);
@@ -19642,7 +19793,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_mask3_fmadd_pch() {
+    fn test_mm256_mask3_fmadd_pch() {
         let a = _mm256_set1_pch(0.0, 1.0);
         let b = _mm256_set1_pch(0.0, 2.0);
         let c = _mm256_set1_pch(0.0, 3.0);
@@ -19654,7 +19805,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_maskz_fmadd_pch() {
+    fn test_mm256_maskz_fmadd_pch() {
         let a = _mm256_set1_pch(0.0, 1.0);
         let b = _mm256_set1_pch(0.0, 2.0);
         let c = _mm256_set1_pch(0.0, 3.0);
@@ -19666,7 +19817,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_fmadd_pch() {
+    fn test_mm512_fmadd_pch() {
         let a = _mm512_set1_pch(0.0, 1.0);
         let b = _mm512_set1_pch(0.0, 2.0);
         let c = _mm512_set1_pch(0.0, 3.0);
@@ -19676,7 +19827,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask_fmadd_pch() {
+    fn test_mm512_mask_fmadd_pch() {
         let a = _mm512_set1_pch(0.0, 1.0);
         let b = _mm512_set1_pch(0.0, 2.0);
         let c = _mm512_set1_pch(0.0, 3.0);
@@ -19689,7 +19840,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask3_fmadd_pch() {
+    fn test_mm512_mask3_fmadd_pch() {
         let a = _mm512_set1_pch(0.0, 1.0);
         let b = _mm512_set1_pch(0.0, 2.0);
         let c = _mm512_set1_pch(0.0, 3.0);
@@ -19702,7 +19853,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_maskz_fmadd_pch() {
+    fn test_mm512_maskz_fmadd_pch() {
         let a = _mm512_set1_pch(0.0, 1.0);
         let b = _mm512_set1_pch(0.0, 2.0);
         let c = _mm512_set1_pch(0.0, 3.0);
@@ -19715,7 +19866,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_fmadd_round_pch() {
+    fn test_mm512_fmadd_round_pch() {
         let a = _mm512_set1_pch(0.0, 1.0);
         let b = _mm512_set1_pch(0.0, 2.0);
         let c = _mm512_set1_pch(0.0, 3.0);
@@ -19726,7 +19877,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask_fmadd_round_pch() {
+    fn test_mm512_mask_fmadd_round_pch() {
         let a = _mm512_set1_pch(0.0, 1.0);
         let b = _mm512_set1_pch(0.0, 2.0);
         let c = _mm512_set1_pch(0.0, 3.0);
@@ -19744,7 +19895,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask3_fmadd_round_pch() {
+    fn test_mm512_mask3_fmadd_round_pch() {
         let a = _mm512_set1_pch(0.0, 1.0);
         let b = _mm512_set1_pch(0.0, 2.0);
         let c = _mm512_set1_pch(0.0, 3.0);
@@ -19762,7 +19913,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_maskz_fmadd_round_pch() {
+    fn test_mm512_maskz_fmadd_round_pch() {
         let a = _mm512_set1_pch(0.0, 1.0);
         let b = _mm512_set1_pch(0.0, 2.0);
         let c = _mm512_set1_pch(0.0, 3.0);
@@ -19780,7 +19931,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_fmadd_sch() {
+    fn test_mm_fmadd_sch() {
         let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
         let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
         let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
@@ -19790,7 +19941,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_fmadd_sch() {
+    fn test_mm_mask_fmadd_sch() {
         let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
         let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
         let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
@@ -19803,7 +19954,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask3_fmadd_sch() {
+    fn test_mm_mask3_fmadd_sch() {
         let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
         let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
         let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
@@ -19816,7 +19967,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_fmadd_sch() {
+    fn test_mm_maskz_fmadd_sch() {
         let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
         let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
         let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
@@ -19829,7 +19980,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_fmadd_round_sch() {
+    fn test_mm_fmadd_round_sch() {
         let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
         let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
         let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
@@ -19839,7 +19990,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_fmadd_round_sch() {
+    fn test_mm_mask_fmadd_round_sch() {
         let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
         let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
         let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
@@ -19856,7 +20007,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask3_fmadd_round_sch() {
+    fn test_mm_mask3_fmadd_round_sch() {
         let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
         let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
         let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
@@ -19873,7 +20024,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_fmadd_round_sch() {
+    fn test_mm_maskz_fmadd_round_sch() {
         let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
         let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
         let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
@@ -19890,7 +20041,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_fcmadd_pch() {
+    fn test_mm_fcmadd_pch() {
         let a = _mm_set1_pch(0.0, 1.0);
         let b = _mm_set1_pch(0.0, 2.0);
         let c = _mm_set1_pch(0.0, 3.0);
@@ -19900,7 +20051,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_fcmadd_pch() {
+    fn test_mm_mask_fcmadd_pch() {
         let a = _mm_set1_pch(0.0, 1.0);
         let b = _mm_set1_pch(0.0, 2.0);
         let c = _mm_set1_pch(0.0, 3.0);
@@ -19910,7 +20061,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask3_fcmadd_pch() {
+    fn test_mm_mask3_fcmadd_pch() {
         let a = _mm_set1_pch(0.0, 1.0);
         let b = _mm_set1_pch(0.0, 2.0);
         let c = _mm_set1_pch(0.0, 3.0);
@@ -19920,7 +20071,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_fcmadd_pch() {
+    fn test_mm_maskz_fcmadd_pch() {
         let a = _mm_set1_pch(0.0, 1.0);
         let b = _mm_set1_pch(0.0, 2.0);
         let c = _mm_set1_pch(0.0, 3.0);
@@ -19930,7 +20081,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_fcmadd_pch() {
+    fn test_mm256_fcmadd_pch() {
         let a = _mm256_set1_pch(0.0, 1.0);
         let b = _mm256_set1_pch(0.0, 2.0);
         let c = _mm256_set1_pch(0.0, 3.0);
@@ -19940,7 +20091,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_mask_fcmadd_pch() {
+    fn test_mm256_mask_fcmadd_pch() {
         let a = _mm256_set1_pch(0.0, 1.0);
         let b = _mm256_set1_pch(0.0, 2.0);
         let c = _mm256_set1_pch(0.0, 3.0);
@@ -19952,7 +20103,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_mask3_fcmadd_pch() {
+    fn test_mm256_mask3_fcmadd_pch() {
         let a = _mm256_set1_pch(0.0, 1.0);
         let b = _mm256_set1_pch(0.0, 2.0);
         let c = _mm256_set1_pch(0.0, 3.0);
@@ -19964,7 +20115,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_maskz_fcmadd_pch() {
+    fn test_mm256_maskz_fcmadd_pch() {
         let a = _mm256_set1_pch(0.0, 1.0);
         let b = _mm256_set1_pch(0.0, 2.0);
         let c = _mm256_set1_pch(0.0, 3.0);
@@ -19976,7 +20127,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_fcmadd_pch() {
+    fn test_mm512_fcmadd_pch() {
         let a = _mm512_set1_pch(0.0, 1.0);
         let b = _mm512_set1_pch(0.0, 2.0);
         let c = _mm512_set1_pch(0.0, 3.0);
@@ -19986,7 +20137,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask_fcmadd_pch() {
+    fn test_mm512_mask_fcmadd_pch() {
         let a = _mm512_set1_pch(0.0, 1.0);
         let b = _mm512_set1_pch(0.0, 2.0);
         let c = _mm512_set1_pch(0.0, 3.0);
@@ -19999,7 +20150,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask3_fcmadd_pch() {
+    fn test_mm512_mask3_fcmadd_pch() {
         let a = _mm512_set1_pch(0.0, 1.0);
         let b = _mm512_set1_pch(0.0, 2.0);
         let c = _mm512_set1_pch(0.0, 3.0);
@@ -20012,7 +20163,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_maskz_fcmadd_pch() {
+    fn test_mm512_maskz_fcmadd_pch() {
         let a = _mm512_set1_pch(0.0, 1.0);
         let b = _mm512_set1_pch(0.0, 2.0);
         let c = _mm512_set1_pch(0.0, 3.0);
@@ -20025,7 +20176,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_fcmadd_round_pch() {
+    fn test_mm512_fcmadd_round_pch() {
         let a = _mm512_set1_pch(0.0, 1.0);
         let b = _mm512_set1_pch(0.0, 2.0);
         let c = _mm512_set1_pch(0.0, 3.0);
@@ -20036,7 +20187,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask_fcmadd_round_pch() {
+    fn test_mm512_mask_fcmadd_round_pch() {
         let a = _mm512_set1_pch(0.0, 1.0);
         let b = _mm512_set1_pch(0.0, 2.0);
         let c = _mm512_set1_pch(0.0, 3.0);
@@ -20054,7 +20205,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask3_fcmadd_round_pch() {
+    fn test_mm512_mask3_fcmadd_round_pch() {
         let a = _mm512_set1_pch(0.0, 1.0);
         let b = _mm512_set1_pch(0.0, 2.0);
         let c = _mm512_set1_pch(0.0, 3.0);
@@ -20072,7 +20223,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_maskz_fcmadd_round_pch() {
+    fn test_mm512_maskz_fcmadd_round_pch() {
         let a = _mm512_set1_pch(0.0, 1.0);
         let b = _mm512_set1_pch(0.0, 2.0);
         let c = _mm512_set1_pch(0.0, 3.0);
@@ -20090,7 +20241,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_fcmadd_sch() {
+    fn test_mm_fcmadd_sch() {
         let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
         let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
         let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
@@ -20100,7 +20251,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_fcmadd_sch() {
+    fn test_mm_mask_fcmadd_sch() {
         let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
         let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
         let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
@@ -20113,7 +20264,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask3_fcmadd_sch() {
+    fn test_mm_mask3_fcmadd_sch() {
         let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
         let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
         let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
@@ -20126,7 +20277,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_fcmadd_sch() {
+    fn test_mm_maskz_fcmadd_sch() {
         let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
         let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
         let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
@@ -20139,7 +20290,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_fcmadd_round_sch() {
+    fn test_mm_fcmadd_round_sch() {
         let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
         let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
         let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
@@ -20149,7 +20300,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_fcmadd_round_sch() {
+    fn test_mm_mask_fcmadd_round_sch() {
         let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
         let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
         let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
@@ -20166,7 +20317,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask3_fcmadd_round_sch() {
+    fn test_mm_mask3_fcmadd_round_sch() {
         let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
         let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
         let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
@@ -20183,7 +20334,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_fcmadd_round_sch() {
+    fn test_mm_maskz_fcmadd_round_sch() {
         let a = _mm_setr_ph(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0);
         let b = _mm_setr_ph(0.0, 2.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0);
         let c = _mm_setr_ph(0.0, 3.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0);
@@ -20200,7 +20351,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_fmadd_ph() {
+    const fn test_mm_fmadd_ph() {
         let a = _mm_set1_ph(1.0);
         let b = _mm_set1_ph(2.0);
         let c = _mm_set1_ph(3.0);
@@ -20210,7 +20361,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_fmadd_ph() {
+    const fn test_mm_mask_fmadd_ph() {
         let a = _mm_set1_ph(1.0);
         let b = _mm_set1_ph(2.0);
         let c = _mm_set1_ph(3.0);
@@ -20220,7 +20371,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask3_fmadd_ph() {
+    const fn test_mm_mask3_fmadd_ph() {
         let a = _mm_set1_ph(1.0);
         let b = _mm_set1_ph(2.0);
         let c = _mm_set1_ph(3.0);
@@ -20230,7 +20381,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_fmadd_ph() {
+    const fn test_mm_maskz_fmadd_ph() {
         let a = _mm_set1_ph(1.0);
         let b = _mm_set1_ph(2.0);
         let c = _mm_set1_ph(3.0);
@@ -20240,7 +20391,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_fmadd_ph() {
+    const fn test_mm256_fmadd_ph() {
         let a = _mm256_set1_ph(1.0);
         let b = _mm256_set1_ph(2.0);
         let c = _mm256_set1_ph(3.0);
@@ -20250,7 +20401,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_mask_fmadd_ph() {
+    const fn test_mm256_mask_fmadd_ph() {
         let a = _mm256_set1_ph(1.0);
         let b = _mm256_set1_ph(2.0);
         let c = _mm256_set1_ph(3.0);
@@ -20262,7 +20413,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_mask3_fmadd_ph() {
+    const fn test_mm256_mask3_fmadd_ph() {
         let a = _mm256_set1_ph(1.0);
         let b = _mm256_set1_ph(2.0);
         let c = _mm256_set1_ph(3.0);
@@ -20274,7 +20425,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_maskz_fmadd_ph() {
+    const fn test_mm256_maskz_fmadd_ph() {
         let a = _mm256_set1_ph(1.0);
         let b = _mm256_set1_ph(2.0);
         let c = _mm256_set1_ph(3.0);
@@ -20286,7 +20437,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_fmadd_ph() {
+    const fn test_mm512_fmadd_ph() {
         let a = _mm512_set1_ph(1.0);
         let b = _mm512_set1_ph(2.0);
         let c = _mm512_set1_ph(3.0);
@@ -20296,7 +20447,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask_fmadd_ph() {
+    const fn test_mm512_mask_fmadd_ph() {
         let a = _mm512_set1_ph(1.0);
         let b = _mm512_set1_ph(2.0);
         let c = _mm512_set1_ph(3.0);
@@ -20309,7 +20460,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask3_fmadd_ph() {
+    const fn test_mm512_mask3_fmadd_ph() {
         let a = _mm512_set1_ph(1.0);
         let b = _mm512_set1_ph(2.0);
         let c = _mm512_set1_ph(3.0);
@@ -20322,7 +20473,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_maskz_fmadd_ph() {
+    const fn test_mm512_maskz_fmadd_ph() {
         let a = _mm512_set1_ph(1.0);
         let b = _mm512_set1_ph(2.0);
         let c = _mm512_set1_ph(3.0);
@@ -20335,7 +20486,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_fmadd_round_ph() {
+    fn test_mm512_fmadd_round_ph() {
         let a = _mm512_set1_ph(1.0);
         let b = _mm512_set1_ph(2.0);
         let c = _mm512_set1_ph(3.0);
@@ -20345,7 +20496,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask_fmadd_round_ph() {
+    fn test_mm512_mask_fmadd_round_ph() {
         let a = _mm512_set1_ph(1.0);
         let b = _mm512_set1_ph(2.0);
         let c = _mm512_set1_ph(3.0);
@@ -20363,7 +20514,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask3_fmadd_round_ph() {
+    fn test_mm512_mask3_fmadd_round_ph() {
         let a = _mm512_set1_ph(1.0);
         let b = _mm512_set1_ph(2.0);
         let c = _mm512_set1_ph(3.0);
@@ -20381,7 +20532,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_maskz_fmadd_round_ph() {
+    fn test_mm512_maskz_fmadd_round_ph() {
         let a = _mm512_set1_ph(1.0);
         let b = _mm512_set1_ph(2.0);
         let c = _mm512_set1_ph(3.0);
@@ -20399,7 +20550,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_fmadd_sh() {
+    const fn test_mm_fmadd_sh() {
         let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
         let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
@@ -20409,7 +20560,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_fmadd_sh() {
+    const fn test_mm_mask_fmadd_sh() {
         let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
         let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
@@ -20422,7 +20573,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask3_fmadd_sh() {
+    const fn test_mm_mask3_fmadd_sh() {
         let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
         let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
@@ -20435,7 +20586,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_fmadd_sh() {
+    const fn test_mm_maskz_fmadd_sh() {
         let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
         let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
@@ -20448,7 +20599,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_fmadd_round_sh() {
+    fn test_mm_fmadd_round_sh() {
         let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
         let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
@@ -20458,7 +20609,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_fmadd_round_sh() {
+    fn test_mm_mask_fmadd_round_sh() {
         let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
         let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
@@ -20475,7 +20626,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask3_fmadd_round_sh() {
+    fn test_mm_mask3_fmadd_round_sh() {
         let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
         let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
@@ -20492,7 +20643,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_fmadd_round_sh() {
+    fn test_mm_maskz_fmadd_round_sh() {
         let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
         let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
@@ -20509,7 +20660,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_fmsub_ph() {
+    const fn test_mm_fmsub_ph() {
         let a = _mm_set1_ph(1.0);
         let b = _mm_set1_ph(2.0);
         let c = _mm_set1_ph(3.0);
@@ -20519,7 +20670,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_fmsub_ph() {
+    const fn test_mm_mask_fmsub_ph() {
         let a = _mm_set1_ph(1.0);
         let b = _mm_set1_ph(2.0);
         let c = _mm_set1_ph(3.0);
@@ -20529,7 +20680,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask3_fmsub_ph() {
+    const fn test_mm_mask3_fmsub_ph() {
         let a = _mm_set1_ph(1.0);
         let b = _mm_set1_ph(2.0);
         let c = _mm_set1_ph(3.0);
@@ -20539,7 +20690,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_fmsub_ph() {
+    const fn test_mm_maskz_fmsub_ph() {
         let a = _mm_set1_ph(1.0);
         let b = _mm_set1_ph(2.0);
         let c = _mm_set1_ph(3.0);
@@ -20549,7 +20700,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_fmsub_ph() {
+    const fn test_mm256_fmsub_ph() {
         let a = _mm256_set1_ph(1.0);
         let b = _mm256_set1_ph(2.0);
         let c = _mm256_set1_ph(3.0);
@@ -20559,7 +20710,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_mask_fmsub_ph() {
+    const fn test_mm256_mask_fmsub_ph() {
         let a = _mm256_set1_ph(1.0);
         let b = _mm256_set1_ph(2.0);
         let c = _mm256_set1_ph(3.0);
@@ -20571,7 +20722,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_mask3_fmsub_ph() {
+    const fn test_mm256_mask3_fmsub_ph() {
         let a = _mm256_set1_ph(1.0);
         let b = _mm256_set1_ph(2.0);
         let c = _mm256_set1_ph(3.0);
@@ -20583,7 +20734,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_maskz_fmsub_ph() {
+    const fn test_mm256_maskz_fmsub_ph() {
         let a = _mm256_set1_ph(1.0);
         let b = _mm256_set1_ph(2.0);
         let c = _mm256_set1_ph(3.0);
@@ -20595,7 +20746,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_fmsub_ph() {
+    const fn test_mm512_fmsub_ph() {
         let a = _mm512_set1_ph(1.0);
         let b = _mm512_set1_ph(2.0);
         let c = _mm512_set1_ph(3.0);
@@ -20605,7 +20756,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask_fmsub_ph() {
+    const fn test_mm512_mask_fmsub_ph() {
         let a = _mm512_set1_ph(1.0);
         let b = _mm512_set1_ph(2.0);
         let c = _mm512_set1_ph(3.0);
@@ -20618,7 +20769,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask3_fmsub_ph() {
+    const fn test_mm512_mask3_fmsub_ph() {
         let a = _mm512_set1_ph(1.0);
         let b = _mm512_set1_ph(2.0);
         let c = _mm512_set1_ph(3.0);
@@ -20631,7 +20782,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_maskz_fmsub_ph() {
+    const fn test_mm512_maskz_fmsub_ph() {
         let a = _mm512_set1_ph(1.0);
         let b = _mm512_set1_ph(2.0);
         let c = _mm512_set1_ph(3.0);
@@ -20644,7 +20795,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_fmsub_round_ph() {
+    fn test_mm512_fmsub_round_ph() {
         let a = _mm512_set1_ph(1.0);
         let b = _mm512_set1_ph(2.0);
         let c = _mm512_set1_ph(3.0);
@@ -20654,7 +20805,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask_fmsub_round_ph() {
+    fn test_mm512_mask_fmsub_round_ph() {
         let a = _mm512_set1_ph(1.0);
         let b = _mm512_set1_ph(2.0);
         let c = _mm512_set1_ph(3.0);
@@ -20672,7 +20823,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask3_fmsub_round_ph() {
+    fn test_mm512_mask3_fmsub_round_ph() {
         let a = _mm512_set1_ph(1.0);
         let b = _mm512_set1_ph(2.0);
         let c = _mm512_set1_ph(3.0);
@@ -20690,7 +20841,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_maskz_fmsub_round_ph() {
+    fn test_mm512_maskz_fmsub_round_ph() {
         let a = _mm512_set1_ph(1.0);
         let b = _mm512_set1_ph(2.0);
         let c = _mm512_set1_ph(3.0);
@@ -20708,7 +20859,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_fmsub_sh() {
+    const fn test_mm_fmsub_sh() {
         let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
         let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
@@ -20718,7 +20869,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_fmsub_sh() {
+    const fn test_mm_mask_fmsub_sh() {
         let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
         let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
@@ -20731,7 +20882,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask3_fmsub_sh() {
+    const fn test_mm_mask3_fmsub_sh() {
         let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
         let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
@@ -20744,7 +20895,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_fmsub_sh() {
+    const fn test_mm_maskz_fmsub_sh() {
         let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
         let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
@@ -20757,7 +20908,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_fmsub_round_sh() {
+    fn test_mm_fmsub_round_sh() {
         let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
         let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
@@ -20766,8 +20917,8 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm_mask_fmsub_round_sh() {
+    #[simd_test(enable = "avx512fp16,avx512vl")]
+    fn test_mm_mask_fmsub_round_sh() {
         let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
         let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
@@ -20783,8 +20934,8 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm_mask3_fmsub_round_sh() {
+    #[simd_test(enable = "avx512fp16,avx512vl")]
+    fn test_mm_mask3_fmsub_round_sh() {
         let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
         let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
@@ -20800,8 +20951,8 @@ mod tests {
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm_maskz_fmsub_round_sh() {
+    #[simd_test(enable = "avx512fp16,avx512vl")]
+    fn test_mm_maskz_fmsub_round_sh() {
         let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
         let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
@@ -20818,7 +20969,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_fnmadd_ph() {
+    const fn test_mm_fnmadd_ph() {
         let a = _mm_set1_ph(1.0);
         let b = _mm_set1_ph(2.0);
         let c = _mm_set1_ph(3.0);
@@ -20828,7 +20979,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_fnmadd_ph() {
+    const fn test_mm_mask_fnmadd_ph() {
         let a = _mm_set1_ph(1.0);
         let b = _mm_set1_ph(2.0);
         let c = _mm_set1_ph(3.0);
@@ -20838,7 +20989,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask3_fnmadd_ph() {
+    const fn test_mm_mask3_fnmadd_ph() {
         let a = _mm_set1_ph(1.0);
         let b = _mm_set1_ph(2.0);
         let c = _mm_set1_ph(3.0);
@@ -20848,7 +20999,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_fnmadd_ph() {
+    const fn test_mm_maskz_fnmadd_ph() {
         let a = _mm_set1_ph(1.0);
         let b = _mm_set1_ph(2.0);
         let c = _mm_set1_ph(3.0);
@@ -20858,7 +21009,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_fnmadd_ph() {
+    const fn test_mm256_fnmadd_ph() {
         let a = _mm256_set1_ph(1.0);
         let b = _mm256_set1_ph(2.0);
         let c = _mm256_set1_ph(3.0);
@@ -20868,7 +21019,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_mask_fnmadd_ph() {
+    const fn test_mm256_mask_fnmadd_ph() {
         let a = _mm256_set1_ph(1.0);
         let b = _mm256_set1_ph(2.0);
         let c = _mm256_set1_ph(3.0);
@@ -20880,7 +21031,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_mask3_fnmadd_ph() {
+    const fn test_mm256_mask3_fnmadd_ph() {
         let a = _mm256_set1_ph(1.0);
         let b = _mm256_set1_ph(2.0);
         let c = _mm256_set1_ph(3.0);
@@ -20892,7 +21043,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_maskz_fnmadd_ph() {
+    const fn test_mm256_maskz_fnmadd_ph() {
         let a = _mm256_set1_ph(1.0);
         let b = _mm256_set1_ph(2.0);
         let c = _mm256_set1_ph(3.0);
@@ -20904,7 +21055,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_fnmadd_ph() {
+    const fn test_mm512_fnmadd_ph() {
         let a = _mm512_set1_ph(1.0);
         let b = _mm512_set1_ph(2.0);
         let c = _mm512_set1_ph(3.0);
@@ -20914,7 +21065,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask_fnmadd_ph() {
+    const fn test_mm512_mask_fnmadd_ph() {
         let a = _mm512_set1_ph(1.0);
         let b = _mm512_set1_ph(2.0);
         let c = _mm512_set1_ph(3.0);
@@ -20927,7 +21078,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask3_fnmadd_ph() {
+    const fn test_mm512_mask3_fnmadd_ph() {
         let a = _mm512_set1_ph(1.0);
         let b = _mm512_set1_ph(2.0);
         let c = _mm512_set1_ph(3.0);
@@ -20940,7 +21091,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_maskz_fnmadd_ph() {
+    const fn test_mm512_maskz_fnmadd_ph() {
         let a = _mm512_set1_ph(1.0);
         let b = _mm512_set1_ph(2.0);
         let c = _mm512_set1_ph(3.0);
@@ -20953,7 +21104,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_fnmadd_round_ph() {
+    fn test_mm512_fnmadd_round_ph() {
         let a = _mm512_set1_ph(1.0);
         let b = _mm512_set1_ph(2.0);
         let c = _mm512_set1_ph(3.0);
@@ -20964,7 +21115,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask_fnmadd_round_ph() {
+    fn test_mm512_mask_fnmadd_round_ph() {
         let a = _mm512_set1_ph(1.0);
         let b = _mm512_set1_ph(2.0);
         let c = _mm512_set1_ph(3.0);
@@ -20982,7 +21133,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask3_fnmadd_round_ph() {
+    fn test_mm512_mask3_fnmadd_round_ph() {
         let a = _mm512_set1_ph(1.0);
         let b = _mm512_set1_ph(2.0);
         let c = _mm512_set1_ph(3.0);
@@ -21000,7 +21151,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_maskz_fnmadd_round_ph() {
+    fn test_mm512_maskz_fnmadd_round_ph() {
         let a = _mm512_set1_ph(1.0);
         let b = _mm512_set1_ph(2.0);
         let c = _mm512_set1_ph(3.0);
@@ -21018,7 +21169,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_fnmadd_sh() {
+    const fn test_mm_fnmadd_sh() {
         let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
         let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
@@ -21028,7 +21179,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_fnmadd_sh() {
+    const fn test_mm_mask_fnmadd_sh() {
         let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
         let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
@@ -21041,7 +21192,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask3_fnmadd_sh() {
+    const fn test_mm_mask3_fnmadd_sh() {
         let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
         let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
@@ -21054,7 +21205,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_fnmadd_sh() {
+    const fn test_mm_maskz_fnmadd_sh() {
         let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
         let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
@@ -21067,7 +21218,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_fnmadd_round_sh() {
+    fn test_mm_fnmadd_round_sh() {
         let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
         let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
@@ -21077,7 +21228,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_fnmadd_round_sh() {
+    fn test_mm_mask_fnmadd_round_sh() {
         let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
         let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
@@ -21094,7 +21245,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask3_fnmadd_round_sh() {
+    fn test_mm_mask3_fnmadd_round_sh() {
         let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
         let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
@@ -21111,7 +21262,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_fnmadd_round_sh() {
+    fn test_mm_maskz_fnmadd_round_sh() {
         let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
         let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
@@ -21128,7 +21279,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_fnmsub_ph() {
+    const fn test_mm_fnmsub_ph() {
         let a = _mm_set1_ph(1.0);
         let b = _mm_set1_ph(2.0);
         let c = _mm_set1_ph(3.0);
@@ -21138,7 +21289,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_fnmsub_ph() {
+    const fn test_mm_mask_fnmsub_ph() {
         let a = _mm_set1_ph(1.0);
         let b = _mm_set1_ph(2.0);
         let c = _mm_set1_ph(3.0);
@@ -21148,7 +21299,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask3_fnmsub_ph() {
+    const fn test_mm_mask3_fnmsub_ph() {
         let a = _mm_set1_ph(1.0);
         let b = _mm_set1_ph(2.0);
         let c = _mm_set1_ph(3.0);
@@ -21158,7 +21309,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_fnmsub_ph() {
+    const fn test_mm_maskz_fnmsub_ph() {
         let a = _mm_set1_ph(1.0);
         let b = _mm_set1_ph(2.0);
         let c = _mm_set1_ph(3.0);
@@ -21168,7 +21319,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_fnmsub_ph() {
+    const fn test_mm256_fnmsub_ph() {
         let a = _mm256_set1_ph(1.0);
         let b = _mm256_set1_ph(2.0);
         let c = _mm256_set1_ph(3.0);
@@ -21178,7 +21329,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_mask_fnmsub_ph() {
+    const fn test_mm256_mask_fnmsub_ph() {
         let a = _mm256_set1_ph(1.0);
         let b = _mm256_set1_ph(2.0);
         let c = _mm256_set1_ph(3.0);
@@ -21190,7 +21341,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_mask3_fnmsub_ph() {
+    const fn test_mm256_mask3_fnmsub_ph() {
         let a = _mm256_set1_ph(1.0);
         let b = _mm256_set1_ph(2.0);
         let c = _mm256_set1_ph(3.0);
@@ -21202,7 +21353,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_maskz_fnmsub_ph() {
+    const fn test_mm256_maskz_fnmsub_ph() {
         let a = _mm256_set1_ph(1.0);
         let b = _mm256_set1_ph(2.0);
         let c = _mm256_set1_ph(3.0);
@@ -21214,7 +21365,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_fnmsub_ph() {
+    const fn test_mm512_fnmsub_ph() {
         let a = _mm512_set1_ph(1.0);
         let b = _mm512_set1_ph(2.0);
         let c = _mm512_set1_ph(3.0);
@@ -21224,7 +21375,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask_fnmsub_ph() {
+    const fn test_mm512_mask_fnmsub_ph() {
         let a = _mm512_set1_ph(1.0);
         let b = _mm512_set1_ph(2.0);
         let c = _mm512_set1_ph(3.0);
@@ -21237,7 +21388,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask3_fnmsub_ph() {
+    const fn test_mm512_mask3_fnmsub_ph() {
         let a = _mm512_set1_ph(1.0);
         let b = _mm512_set1_ph(2.0);
         let c = _mm512_set1_ph(3.0);
@@ -21250,7 +21401,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_maskz_fnmsub_ph() {
+    const fn test_mm512_maskz_fnmsub_ph() {
         let a = _mm512_set1_ph(1.0);
         let b = _mm512_set1_ph(2.0);
         let c = _mm512_set1_ph(3.0);
@@ -21263,7 +21414,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_fnmsub_round_ph() {
+    fn test_mm512_fnmsub_round_ph() {
         let a = _mm512_set1_ph(1.0);
         let b = _mm512_set1_ph(2.0);
         let c = _mm512_set1_ph(3.0);
@@ -21274,7 +21425,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask_fnmsub_round_ph() {
+    fn test_mm512_mask_fnmsub_round_ph() {
         let a = _mm512_set1_ph(1.0);
         let b = _mm512_set1_ph(2.0);
         let c = _mm512_set1_ph(3.0);
@@ -21292,7 +21443,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask3_fnmsub_round_ph() {
+    fn test_mm512_mask3_fnmsub_round_ph() {
         let a = _mm512_set1_ph(1.0);
         let b = _mm512_set1_ph(2.0);
         let c = _mm512_set1_ph(3.0);
@@ -21310,7 +21461,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_maskz_fnmsub_round_ph() {
+    fn test_mm512_maskz_fnmsub_round_ph() {
         let a = _mm512_set1_ph(1.0);
         let b = _mm512_set1_ph(2.0);
         let c = _mm512_set1_ph(3.0);
@@ -21328,7 +21479,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_fnmsub_sh() {
+    const fn test_mm_fnmsub_sh() {
         let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
         let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
@@ -21338,7 +21489,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_fnmsub_sh() {
+    const fn test_mm_mask_fnmsub_sh() {
         let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
         let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
@@ -21351,7 +21502,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask3_fnmsub_sh() {
+    const fn test_mm_mask3_fnmsub_sh() {
         let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
         let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
@@ -21364,7 +21515,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_fnmsub_sh() {
+    const fn test_mm_maskz_fnmsub_sh() {
         let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
         let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
@@ -21377,7 +21528,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_fnmsub_round_sh() {
+    fn test_mm_fnmsub_round_sh() {
         let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
         let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
@@ -21387,7 +21538,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_fnmsub_round_sh() {
+    fn test_mm_mask_fnmsub_round_sh() {
         let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
         let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
@@ -21404,7 +21555,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask3_fnmsub_round_sh() {
+    fn test_mm_mask3_fnmsub_round_sh() {
         let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
         let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
@@ -21421,7 +21572,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_fnmsub_round_sh() {
+    fn test_mm_maskz_fnmsub_round_sh() {
         let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
         let c = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
@@ -21438,7 +21589,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_fmaddsub_ph() {
+    const fn test_mm_fmaddsub_ph() {
         let a = _mm_set1_ph(1.0);
         let b = _mm_set1_ph(2.0);
         let c = _mm_set1_ph(3.0);
@@ -21448,7 +21599,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_fmaddsub_ph() {
+    const fn test_mm_mask_fmaddsub_ph() {
         let a = _mm_set1_ph(1.0);
         let b = _mm_set1_ph(2.0);
         let c = _mm_set1_ph(3.0);
@@ -21458,7 +21609,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask3_fmaddsub_ph() {
+    const fn test_mm_mask3_fmaddsub_ph() {
         let a = _mm_set1_ph(1.0);
         let b = _mm_set1_ph(2.0);
         let c = _mm_set1_ph(3.0);
@@ -21468,7 +21619,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_fmaddsub_ph() {
+    const fn test_mm_maskz_fmaddsub_ph() {
         let a = _mm_set1_ph(1.0);
         let b = _mm_set1_ph(2.0);
         let c = _mm_set1_ph(3.0);
@@ -21478,7 +21629,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_fmaddsub_ph() {
+    const fn test_mm256_fmaddsub_ph() {
         let a = _mm256_set1_ph(1.0);
         let b = _mm256_set1_ph(2.0);
         let c = _mm256_set1_ph(3.0);
@@ -21490,7 +21641,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_mask_fmaddsub_ph() {
+    const fn test_mm256_mask_fmaddsub_ph() {
         let a = _mm256_set1_ph(1.0);
         let b = _mm256_set1_ph(2.0);
         let c = _mm256_set1_ph(3.0);
@@ -21502,7 +21653,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_mask3_fmaddsub_ph() {
+    const fn test_mm256_mask3_fmaddsub_ph() {
         let a = _mm256_set1_ph(1.0);
         let b = _mm256_set1_ph(2.0);
         let c = _mm256_set1_ph(3.0);
@@ -21514,7 +21665,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_maskz_fmaddsub_ph() {
+    const fn test_mm256_maskz_fmaddsub_ph() {
         let a = _mm256_set1_ph(1.0);
         let b = _mm256_set1_ph(2.0);
         let c = _mm256_set1_ph(3.0);
@@ -21526,7 +21677,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_fmaddsub_ph() {
+    const fn test_mm512_fmaddsub_ph() {
         let a = _mm512_set1_ph(1.0);
         let b = _mm512_set1_ph(2.0);
         let c = _mm512_set1_ph(3.0);
@@ -21539,7 +21690,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask_fmaddsub_ph() {
+    const fn test_mm512_mask_fmaddsub_ph() {
         let a = _mm512_set1_ph(1.0);
         let b = _mm512_set1_ph(2.0);
         let c = _mm512_set1_ph(3.0);
@@ -21552,7 +21703,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask3_fmaddsub_ph() {
+    const fn test_mm512_mask3_fmaddsub_ph() {
         let a = _mm512_set1_ph(1.0);
         let b = _mm512_set1_ph(2.0);
         let c = _mm512_set1_ph(3.0);
@@ -21565,7 +21716,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_maskz_fmaddsub_ph() {
+    const fn test_mm512_maskz_fmaddsub_ph() {
         let a = _mm512_set1_ph(1.0);
         let b = _mm512_set1_ph(2.0);
         let c = _mm512_set1_ph(3.0);
@@ -21578,7 +21729,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_fmaddsub_round_ph() {
+    fn test_mm512_fmaddsub_round_ph() {
         let a = _mm512_set1_ph(1.0);
         let b = _mm512_set1_ph(2.0);
         let c = _mm512_set1_ph(3.0);
@@ -21592,7 +21743,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask_fmaddsub_round_ph() {
+    fn test_mm512_mask_fmaddsub_round_ph() {
         let a = _mm512_set1_ph(1.0);
         let b = _mm512_set1_ph(2.0);
         let c = _mm512_set1_ph(3.0);
@@ -21610,7 +21761,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask3_fmaddsub_round_ph() {
+    fn test_mm512_mask3_fmaddsub_round_ph() {
         let a = _mm512_set1_ph(1.0);
         let b = _mm512_set1_ph(2.0);
         let c = _mm512_set1_ph(3.0);
@@ -21628,7 +21779,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_maskz_fmaddsub_round_ph() {
+    fn test_mm512_maskz_fmaddsub_round_ph() {
         let a = _mm512_set1_ph(1.0);
         let b = _mm512_set1_ph(2.0);
         let c = _mm512_set1_ph(3.0);
@@ -21646,7 +21797,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_fmsubadd_ph() {
+    const fn test_mm_fmsubadd_ph() {
         let a = _mm_set1_ph(1.0);
         let b = _mm_set1_ph(2.0);
         let c = _mm_set1_ph(3.0);
@@ -21656,7 +21807,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_fmsubadd_ph() {
+    const fn test_mm_mask_fmsubadd_ph() {
         let a = _mm_set1_ph(1.0);
         let b = _mm_set1_ph(2.0);
         let c = _mm_set1_ph(3.0);
@@ -21666,7 +21817,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask3_fmsubadd_ph() {
+    const fn test_mm_mask3_fmsubadd_ph() {
         let a = _mm_set1_ph(1.0);
         let b = _mm_set1_ph(2.0);
         let c = _mm_set1_ph(3.0);
@@ -21676,7 +21827,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_fmsubadd_ph() {
+    const fn test_mm_maskz_fmsubadd_ph() {
         let a = _mm_set1_ph(1.0);
         let b = _mm_set1_ph(2.0);
         let c = _mm_set1_ph(3.0);
@@ -21686,7 +21837,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_fmsubadd_ph() {
+    const fn test_mm256_fmsubadd_ph() {
         let a = _mm256_set1_ph(1.0);
         let b = _mm256_set1_ph(2.0);
         let c = _mm256_set1_ph(3.0);
@@ -21698,7 +21849,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_mask_fmsubadd_ph() {
+    const fn test_mm256_mask_fmsubadd_ph() {
         let a = _mm256_set1_ph(1.0);
         let b = _mm256_set1_ph(2.0);
         let c = _mm256_set1_ph(3.0);
@@ -21710,7 +21861,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_mask3_fmsubadd_ph() {
+    const fn test_mm256_mask3_fmsubadd_ph() {
         let a = _mm256_set1_ph(1.0);
         let b = _mm256_set1_ph(2.0);
         let c = _mm256_set1_ph(3.0);
@@ -21722,7 +21873,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_maskz_fmsubadd_ph() {
+    const fn test_mm256_maskz_fmsubadd_ph() {
         let a = _mm256_set1_ph(1.0);
         let b = _mm256_set1_ph(2.0);
         let c = _mm256_set1_ph(3.0);
@@ -21734,7 +21885,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_fmsubadd_ph() {
+    const fn test_mm512_fmsubadd_ph() {
         let a = _mm512_set1_ph(1.0);
         let b = _mm512_set1_ph(2.0);
         let c = _mm512_set1_ph(3.0);
@@ -21747,7 +21898,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask_fmsubadd_ph() {
+    const fn test_mm512_mask_fmsubadd_ph() {
         let a = _mm512_set1_ph(1.0);
         let b = _mm512_set1_ph(2.0);
         let c = _mm512_set1_ph(3.0);
@@ -21760,7 +21911,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask3_fmsubadd_ph() {
+    const fn test_mm512_mask3_fmsubadd_ph() {
         let a = _mm512_set1_ph(1.0);
         let b = _mm512_set1_ph(2.0);
         let c = _mm512_set1_ph(3.0);
@@ -21773,7 +21924,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_maskz_fmsubadd_ph() {
+    const fn test_mm512_maskz_fmsubadd_ph() {
         let a = _mm512_set1_ph(1.0);
         let b = _mm512_set1_ph(2.0);
         let c = _mm512_set1_ph(3.0);
@@ -21786,7 +21937,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_fmsubadd_round_ph() {
+    fn test_mm512_fmsubadd_round_ph() {
         let a = _mm512_set1_ph(1.0);
         let b = _mm512_set1_ph(2.0);
         let c = _mm512_set1_ph(3.0);
@@ -21800,7 +21951,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask_fmsubadd_round_ph() {
+    fn test_mm512_mask_fmsubadd_round_ph() {
         let a = _mm512_set1_ph(1.0);
         let b = _mm512_set1_ph(2.0);
         let c = _mm512_set1_ph(3.0);
@@ -21818,7 +21969,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask3_fmsubadd_round_ph() {
+    fn test_mm512_mask3_fmsubadd_round_ph() {
         let a = _mm512_set1_ph(1.0);
         let b = _mm512_set1_ph(2.0);
         let c = _mm512_set1_ph(3.0);
@@ -21836,7 +21987,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_maskz_fmsubadd_round_ph() {
+    fn test_mm512_maskz_fmsubadd_round_ph() {
         let a = _mm512_set1_ph(1.0);
         let b = _mm512_set1_ph(2.0);
         let c = _mm512_set1_ph(3.0);
@@ -21854,7 +22005,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_rcp_ph() {
+    fn test_mm_rcp_ph() {
         let a = _mm_set1_ph(2.0);
         let r = _mm_rcp_ph(a);
         let e = _mm_set1_ph(0.5);
@@ -21862,7 +22013,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_rcp_ph() {
+    fn test_mm_mask_rcp_ph() {
         let a = _mm_set1_ph(2.0);
         let src = _mm_set1_ph(1.0);
         let r = _mm_mask_rcp_ph(src, 0b01010101, a);
@@ -21871,7 +22022,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_rcp_ph() {
+    fn test_mm_maskz_rcp_ph() {
         let a = _mm_set1_ph(2.0);
         let r = _mm_maskz_rcp_ph(0b01010101, a);
         let e = _mm_set_ph(0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5);
@@ -21879,7 +22030,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_rcp_ph() {
+    fn test_mm256_rcp_ph() {
         let a = _mm256_set1_ph(2.0);
         let r = _mm256_rcp_ph(a);
         let e = _mm256_set1_ph(0.5);
@@ -21887,7 +22038,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_mask_rcp_ph() {
+    fn test_mm256_mask_rcp_ph() {
         let a = _mm256_set1_ph(2.0);
         let src = _mm256_set1_ph(1.0);
         let r = _mm256_mask_rcp_ph(src, 0b0101010101010101, a);
@@ -21898,7 +22049,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_maskz_rcp_ph() {
+    fn test_mm256_maskz_rcp_ph() {
         let a = _mm256_set1_ph(2.0);
         let r = _mm256_maskz_rcp_ph(0b0101010101010101, a);
         let e = _mm256_set_ph(
@@ -21908,7 +22059,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_rcp_ph() {
+    fn test_mm512_rcp_ph() {
         let a = _mm512_set1_ph(2.0);
         let r = _mm512_rcp_ph(a);
         let e = _mm512_set1_ph(0.5);
@@ -21916,7 +22067,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask_rcp_ph() {
+    fn test_mm512_mask_rcp_ph() {
         let a = _mm512_set1_ph(2.0);
         let src = _mm512_set1_ph(1.0);
         let r = _mm512_mask_rcp_ph(src, 0b01010101010101010101010101010101, a);
@@ -21928,7 +22079,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_maskz_rcp_ph() {
+    fn test_mm512_maskz_rcp_ph() {
         let a = _mm512_set1_ph(2.0);
         let r = _mm512_maskz_rcp_ph(0b01010101010101010101010101010101, a);
         let e = _mm512_set_ph(
@@ -21939,7 +22090,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_rcp_sh() {
+    fn test_mm_rcp_sh() {
         let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
         let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
         let r = _mm_rcp_sh(a, b);
@@ -21948,7 +22099,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_rcp_sh() {
+    fn test_mm_mask_rcp_sh() {
         let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
         let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
         let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0);
@@ -21961,7 +22112,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_rcp_sh() {
+    fn test_mm_maskz_rcp_sh() {
         let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
         let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
         let r = _mm_maskz_rcp_sh(0, a, b);
@@ -21973,7 +22124,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_rsqrt_ph() {
+    fn test_mm_rsqrt_ph() {
         let a = _mm_set1_ph(4.0);
         let r = _mm_rsqrt_ph(a);
         let e = _mm_set1_ph(0.5);
@@ -21981,7 +22132,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_rsqrt_ph() {
+    fn test_mm_mask_rsqrt_ph() {
         let a = _mm_set1_ph(4.0);
         let src = _mm_set1_ph(1.0);
         let r = _mm_mask_rsqrt_ph(src, 0b01010101, a);
@@ -21990,7 +22141,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_rsqrt_ph() {
+    fn test_mm_maskz_rsqrt_ph() {
         let a = _mm_set1_ph(4.0);
         let r = _mm_maskz_rsqrt_ph(0b01010101, a);
         let e = _mm_set_ph(0.0, 0.5, 0.0, 0.5, 0.0, 0.5, 0.0, 0.5);
@@ -21998,7 +22149,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_rsqrt_ph() {
+    fn test_mm256_rsqrt_ph() {
         let a = _mm256_set1_ph(4.0);
         let r = _mm256_rsqrt_ph(a);
         let e = _mm256_set1_ph(0.5);
@@ -22006,7 +22157,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_mask_rsqrt_ph() {
+    fn test_mm256_mask_rsqrt_ph() {
         let a = _mm256_set1_ph(4.0);
         let src = _mm256_set1_ph(1.0);
         let r = _mm256_mask_rsqrt_ph(src, 0b0101010101010101, a);
@@ -22017,7 +22168,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_maskz_rsqrt_ph() {
+    fn test_mm256_maskz_rsqrt_ph() {
         let a = _mm256_set1_ph(4.0);
         let r = _mm256_maskz_rsqrt_ph(0b0101010101010101, a);
         let e = _mm256_set_ph(
@@ -22027,7 +22178,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_rsqrt_ph() {
+    fn test_mm512_rsqrt_ph() {
         let a = _mm512_set1_ph(4.0);
         let r = _mm512_rsqrt_ph(a);
         let e = _mm512_set1_ph(0.5);
@@ -22035,7 +22186,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask_rsqrt_ph() {
+    fn test_mm512_mask_rsqrt_ph() {
         let a = _mm512_set1_ph(4.0);
         let src = _mm512_set1_ph(1.0);
         let r = _mm512_mask_rsqrt_ph(src, 0b01010101010101010101010101010101, a);
@@ -22047,7 +22198,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_maskz_rsqrt_ph() {
+    fn test_mm512_maskz_rsqrt_ph() {
         let a = _mm512_set1_ph(4.0);
         let r = _mm512_maskz_rsqrt_ph(0b01010101010101010101010101010101, a);
         let e = _mm512_set_ph(
@@ -22058,7 +22209,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_rsqrt_sh() {
+    fn test_mm_rsqrt_sh() {
         let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
         let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
         let r = _mm_rsqrt_sh(a, b);
@@ -22067,7 +22218,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_rsqrt_sh() {
+    fn test_mm_mask_rsqrt_sh() {
         let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
         let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
         let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0);
@@ -22080,7 +22231,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_rsqrt_sh() {
+    fn test_mm_maskz_rsqrt_sh() {
         let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
         let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
         let r = _mm_maskz_rsqrt_sh(0, a, b);
@@ -22092,7 +22243,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_sqrt_ph() {
+    fn test_mm_sqrt_ph() {
         let a = _mm_set1_ph(4.0);
         let r = _mm_sqrt_ph(a);
         let e = _mm_set1_ph(2.0);
@@ -22100,7 +22251,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_sqrt_ph() {
+    fn test_mm_mask_sqrt_ph() {
         let a = _mm_set1_ph(4.0);
         let src = _mm_set1_ph(1.0);
         let r = _mm_mask_sqrt_ph(src, 0b01010101, a);
@@ -22109,7 +22260,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_sqrt_ph() {
+    fn test_mm_maskz_sqrt_ph() {
         let a = _mm_set1_ph(4.0);
         let r = _mm_maskz_sqrt_ph(0b01010101, a);
         let e = _mm_set_ph(0.0, 2.0, 0.0, 2.0, 0.0, 2.0, 0.0, 2.0);
@@ -22117,7 +22268,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_sqrt_ph() {
+    fn test_mm256_sqrt_ph() {
         let a = _mm256_set1_ph(4.0);
         let r = _mm256_sqrt_ph(a);
         let e = _mm256_set1_ph(2.0);
@@ -22125,7 +22276,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_mask_sqrt_ph() {
+    fn test_mm256_mask_sqrt_ph() {
         let a = _mm256_set1_ph(4.0);
         let src = _mm256_set1_ph(1.0);
         let r = _mm256_mask_sqrt_ph(src, 0b0101010101010101, a);
@@ -22136,7 +22287,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_maskz_sqrt_ph() {
+    fn test_mm256_maskz_sqrt_ph() {
         let a = _mm256_set1_ph(4.0);
         let r = _mm256_maskz_sqrt_ph(0b0101010101010101, a);
         let e = _mm256_set_ph(
@@ -22146,7 +22297,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_sqrt_ph() {
+    fn test_mm512_sqrt_ph() {
         let a = _mm512_set1_ph(4.0);
         let r = _mm512_sqrt_ph(a);
         let e = _mm512_set1_ph(2.0);
@@ -22154,7 +22305,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask_sqrt_ph() {
+    fn test_mm512_mask_sqrt_ph() {
         let a = _mm512_set1_ph(4.0);
         let src = _mm512_set1_ph(1.0);
         let r = _mm512_mask_sqrt_ph(src, 0b01010101010101010101010101010101, a);
@@ -22166,7 +22317,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_maskz_sqrt_ph() {
+    fn test_mm512_maskz_sqrt_ph() {
         let a = _mm512_set1_ph(4.0);
         let r = _mm512_maskz_sqrt_ph(0b01010101010101010101010101010101, a);
         let e = _mm512_set_ph(
@@ -22177,7 +22328,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_sqrt_round_ph() {
+    fn test_mm512_sqrt_round_ph() {
         let a = _mm512_set1_ph(4.0);
         let r = _mm512_sqrt_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
         let e = _mm512_set1_ph(2.0);
@@ -22185,7 +22336,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask_sqrt_round_ph() {
+    fn test_mm512_mask_sqrt_round_ph() {
         let a = _mm512_set1_ph(4.0);
         let src = _mm512_set1_ph(1.0);
         let r = _mm512_mask_sqrt_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
@@ -22201,7 +22352,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_maskz_sqrt_round_ph() {
+    fn test_mm512_maskz_sqrt_round_ph() {
         let a = _mm512_set1_ph(4.0);
         let r = _mm512_maskz_sqrt_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
             0b01010101010101010101010101010101,
@@ -22215,7 +22366,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_sqrt_sh() {
+    fn test_mm_sqrt_sh() {
         let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
         let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
         let r = _mm_sqrt_sh(a, b);
@@ -22224,7 +22375,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_sqrt_sh() {
+    fn test_mm_mask_sqrt_sh() {
         let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
         let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
         let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0);
@@ -22237,7 +22388,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_sqrt_sh() {
+    fn test_mm_maskz_sqrt_sh() {
         let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
         let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
         let r = _mm_maskz_sqrt_sh(0, a, b);
@@ -22249,7 +22400,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_sqrt_round_sh() {
+    fn test_mm_sqrt_round_sh() {
         let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
         let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
         let r = _mm_sqrt_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
@@ -22258,7 +22409,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_sqrt_round_sh() {
+    fn test_mm_mask_sqrt_round_sh() {
         let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
         let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
         let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0);
@@ -22275,7 +22426,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_sqrt_round_sh() {
+    fn test_mm_maskz_sqrt_round_sh() {
         let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
         let b = _mm_setr_ph(4.0, 40.0, 41.0, 42.0, 43.0, 44.0, 45.0, 46.0);
         let r =
@@ -22289,7 +22440,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_max_ph() {
+    fn test_mm_max_ph() {
         let a = _mm_set1_ph(2.0);
         let b = _mm_set1_ph(1.0);
         let r = _mm_max_ph(a, b);
@@ -22298,7 +22449,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_max_ph() {
+    fn test_mm_mask_max_ph() {
         let a = _mm_set1_ph(2.0);
         let b = _mm_set1_ph(1.0);
         let src = _mm_set1_ph(3.0);
@@ -22308,7 +22459,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_max_ph() {
+    fn test_mm_maskz_max_ph() {
         let a = _mm_set1_ph(2.0);
         let b = _mm_set1_ph(1.0);
         let r = _mm_maskz_max_ph(0b01010101, a, b);
@@ -22317,7 +22468,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_max_ph() {
+    fn test_mm256_max_ph() {
         let a = _mm256_set1_ph(2.0);
         let b = _mm256_set1_ph(1.0);
         let r = _mm256_max_ph(a, b);
@@ -22326,7 +22477,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_mask_max_ph() {
+    fn test_mm256_mask_max_ph() {
         let a = _mm256_set1_ph(2.0);
         let b = _mm256_set1_ph(1.0);
         let src = _mm256_set1_ph(3.0);
@@ -22338,7 +22489,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_maskz_max_ph() {
+    fn test_mm256_maskz_max_ph() {
         let a = _mm256_set1_ph(2.0);
         let b = _mm256_set1_ph(1.0);
         let r = _mm256_maskz_max_ph(0b0101010101010101, a, b);
@@ -22349,7 +22500,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_max_ph() {
+    fn test_mm512_max_ph() {
         let a = _mm512_set1_ph(2.0);
         let b = _mm512_set1_ph(1.0);
         let r = _mm512_max_ph(a, b);
@@ -22358,7 +22509,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask_max_ph() {
+    fn test_mm512_mask_max_ph() {
         let a = _mm512_set1_ph(2.0);
         let b = _mm512_set1_ph(1.0);
         let src = _mm512_set1_ph(3.0);
@@ -22371,7 +22522,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_maskz_max_ph() {
+    fn test_mm512_maskz_max_ph() {
         let a = _mm512_set1_ph(2.0);
         let b = _mm512_set1_ph(1.0);
         let r = _mm512_maskz_max_ph(0b01010101010101010101010101010101, a, b);
@@ -22383,7 +22534,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_max_round_ph() {
+    fn test_mm512_max_round_ph() {
         let a = _mm512_set1_ph(2.0);
         let b = _mm512_set1_ph(1.0);
         let r = _mm512_max_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
@@ -22392,7 +22543,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask_max_round_ph() {
+    fn test_mm512_mask_max_round_ph() {
         let a = _mm512_set1_ph(2.0);
         let b = _mm512_set1_ph(1.0);
         let src = _mm512_set1_ph(3.0);
@@ -22410,7 +22561,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_maskz_max_round_ph() {
+    fn test_mm512_maskz_max_round_ph() {
         let a = _mm512_set1_ph(2.0);
         let b = _mm512_set1_ph(1.0);
         let r = _mm512_maskz_max_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
@@ -22426,7 +22577,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_max_sh() {
+    fn test_mm_max_sh() {
         let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
         let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
         let r = _mm_max_sh(a, b);
@@ -22435,7 +22586,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_max_sh() {
+    fn test_mm_mask_max_sh() {
         let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
         let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
         let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0);
@@ -22448,7 +22599,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_max_sh() {
+    fn test_mm_maskz_max_sh() {
         let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
         let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
         let r = _mm_maskz_max_sh(0, a, b);
@@ -22460,7 +22611,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_max_round_sh() {
+    fn test_mm_max_round_sh() {
         let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
         let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
         let r = _mm_max_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
@@ -22469,7 +22620,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_max_round_sh() {
+    fn test_mm_mask_max_round_sh() {
         let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
         let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
         let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0);
@@ -22486,7 +22637,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_max_round_sh() {
+    fn test_mm_maskz_max_round_sh() {
         let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
         let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
         let r =
@@ -22500,7 +22651,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_min_ph() {
+    fn test_mm_min_ph() {
         let a = _mm_set1_ph(2.0);
         let b = _mm_set1_ph(1.0);
         let r = _mm_min_ph(a, b);
@@ -22509,7 +22660,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_min_ph() {
+    fn test_mm_mask_min_ph() {
         let a = _mm_set1_ph(2.0);
         let b = _mm_set1_ph(1.0);
         let src = _mm_set1_ph(3.0);
@@ -22519,7 +22670,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_min_ph() {
+    fn test_mm_maskz_min_ph() {
         let a = _mm_set1_ph(2.0);
         let b = _mm_set1_ph(1.0);
         let r = _mm_maskz_min_ph(0b01010101, a, b);
@@ -22528,7 +22679,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_min_ph() {
+    fn test_mm256_min_ph() {
         let a = _mm256_set1_ph(2.0);
         let b = _mm256_set1_ph(1.0);
         let r = _mm256_min_ph(a, b);
@@ -22537,7 +22688,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_mask_min_ph() {
+    fn test_mm256_mask_min_ph() {
         let a = _mm256_set1_ph(2.0);
         let b = _mm256_set1_ph(1.0);
         let src = _mm256_set1_ph(3.0);
@@ -22549,7 +22700,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_maskz_min_ph() {
+    fn test_mm256_maskz_min_ph() {
         let a = _mm256_set1_ph(2.0);
         let b = _mm256_set1_ph(1.0);
         let r = _mm256_maskz_min_ph(0b0101010101010101, a, b);
@@ -22560,7 +22711,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_min_ph() {
+    fn test_mm512_min_ph() {
         let a = _mm512_set1_ph(2.0);
         let b = _mm512_set1_ph(1.0);
         let r = _mm512_min_ph(a, b);
@@ -22569,7 +22720,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask_min_ph() {
+    fn test_mm512_mask_min_ph() {
         let a = _mm512_set1_ph(2.0);
         let b = _mm512_set1_ph(1.0);
         let src = _mm512_set1_ph(3.0);
@@ -22582,7 +22733,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_maskz_min_ph() {
+    fn test_mm512_maskz_min_ph() {
         let a = _mm512_set1_ph(2.0);
         let b = _mm512_set1_ph(1.0);
         let r = _mm512_maskz_min_ph(0b01010101010101010101010101010101, a, b);
@@ -22594,7 +22745,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_min_round_ph() {
+    fn test_mm512_min_round_ph() {
         let a = _mm512_set1_ph(2.0);
         let b = _mm512_set1_ph(1.0);
         let r = _mm512_min_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
@@ -22603,7 +22754,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask_min_round_ph() {
+    fn test_mm512_mask_min_round_ph() {
         let a = _mm512_set1_ph(2.0);
         let b = _mm512_set1_ph(1.0);
         let src = _mm512_set1_ph(3.0);
@@ -22621,7 +22772,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_maskz_min_round_ph() {
+    fn test_mm512_maskz_min_round_ph() {
         let a = _mm512_set1_ph(2.0);
         let b = _mm512_set1_ph(1.0);
         let r = _mm512_maskz_min_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
@@ -22637,7 +22788,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_min_sh() {
+    fn test_mm_min_sh() {
         let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
         let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
         let r = _mm_min_sh(a, b);
@@ -22646,7 +22797,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_min_sh() {
+    fn test_mm_mask_min_sh() {
         let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
         let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
         let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0);
@@ -22659,7 +22810,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_min_sh() {
+    fn test_mm_maskz_min_sh() {
         let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
         let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
         let r = _mm_maskz_min_sh(0, a, b);
@@ -22671,7 +22822,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_min_round_sh() {
+    fn test_mm_min_round_sh() {
         let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
         let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
         let r = _mm_min_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
@@ -22680,7 +22831,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_min_round_sh() {
+    fn test_mm_mask_min_round_sh() {
         let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
         let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
         let src = _mm_setr_ph(3.0, 30.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0);
@@ -22697,7 +22848,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_min_round_sh() {
+    fn test_mm_maskz_min_round_sh() {
         let a = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
         let b = _mm_setr_ph(2.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0);
         let r =
@@ -22711,7 +22862,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_getexp_ph() {
+    fn test_mm_getexp_ph() {
         let a = _mm_set1_ph(3.0);
         let r = _mm_getexp_ph(a);
         let e = _mm_set1_ph(1.0);
@@ -22719,7 +22870,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_getexp_ph() {
+    fn test_mm_mask_getexp_ph() {
         let a = _mm_set1_ph(3.0);
         let src = _mm_set1_ph(4.0);
         let r = _mm_mask_getexp_ph(src, 0b01010101, a);
@@ -22728,7 +22879,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_getexp_ph() {
+    fn test_mm_maskz_getexp_ph() {
         let a = _mm_set1_ph(3.0);
         let r = _mm_maskz_getexp_ph(0b01010101, a);
         let e = _mm_set_ph(0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0);
@@ -22736,7 +22887,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_getexp_ph() {
+    fn test_mm256_getexp_ph() {
         let a = _mm256_set1_ph(3.0);
         let r = _mm256_getexp_ph(a);
         let e = _mm256_set1_ph(1.0);
@@ -22744,7 +22895,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_mask_getexp_ph() {
+    fn test_mm256_mask_getexp_ph() {
         let a = _mm256_set1_ph(3.0);
         let src = _mm256_set1_ph(4.0);
         let r = _mm256_mask_getexp_ph(src, 0b0101010101010101, a);
@@ -22755,7 +22906,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_maskz_getexp_ph() {
+    fn test_mm256_maskz_getexp_ph() {
         let a = _mm256_set1_ph(3.0);
         let r = _mm256_maskz_getexp_ph(0b0101010101010101, a);
         let e = _mm256_set_ph(
@@ -22765,7 +22916,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_getexp_ph() {
+    fn test_mm512_getexp_ph() {
         let a = _mm512_set1_ph(3.0);
         let r = _mm512_getexp_ph(a);
         let e = _mm512_set1_ph(1.0);
@@ -22773,7 +22924,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask_getexp_ph() {
+    fn test_mm512_mask_getexp_ph() {
         let a = _mm512_set1_ph(3.0);
         let src = _mm512_set1_ph(4.0);
         let r = _mm512_mask_getexp_ph(src, 0b01010101010101010101010101010101, a);
@@ -22785,7 +22936,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_maskz_getexp_ph() {
+    fn test_mm512_maskz_getexp_ph() {
         let a = _mm512_set1_ph(3.0);
         let r = _mm512_maskz_getexp_ph(0b01010101010101010101010101010101, a);
         let e = _mm512_set_ph(
@@ -22796,7 +22947,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_getexp_round_ph() {
+    fn test_mm512_getexp_round_ph() {
         let a = _mm512_set1_ph(3.0);
         let r = _mm512_getexp_round_ph::<_MM_FROUND_NO_EXC>(a);
         let e = _mm512_set1_ph(1.0);
@@ -22804,7 +22955,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask_getexp_round_ph() {
+    fn test_mm512_mask_getexp_round_ph() {
         let a = _mm512_set1_ph(3.0);
         let src = _mm512_set1_ph(4.0);
         let r = _mm512_mask_getexp_round_ph::<_MM_FROUND_NO_EXC>(
@@ -22820,7 +22971,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_maskz_getexp_round_ph() {
+    fn test_mm512_maskz_getexp_round_ph() {
         let a = _mm512_set1_ph(3.0);
         let r = _mm512_maskz_getexp_round_ph::<_MM_FROUND_NO_EXC>(
             0b01010101010101010101010101010101,
@@ -22834,7 +22985,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_getexp_sh() {
+    fn test_mm_getexp_sh() {
         let a = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
         let r = _mm_getexp_sh(a, b);
@@ -22843,7 +22994,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_getexp_sh() {
+    fn test_mm_mask_getexp_sh() {
         let a = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
         let src = _mm_setr_ph(4.0, 30., 31., 32., 33., 34., 35., 36.);
@@ -22856,7 +23007,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_getexp_sh() {
+    fn test_mm_maskz_getexp_sh() {
         let a = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
         let r = _mm_maskz_getexp_sh(0, a, b);
@@ -22868,7 +23019,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_getexp_round_sh() {
+    fn test_mm_getexp_round_sh() {
         let a = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
         let r = _mm_getexp_round_sh::<_MM_FROUND_NO_EXC>(a, b);
@@ -22877,7 +23028,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_getexp_round_sh() {
+    fn test_mm_mask_getexp_round_sh() {
         let a = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
         let src = _mm_setr_ph(4.0, 30., 31., 32., 33., 34., 35., 36.);
@@ -22890,7 +23041,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_getexp_round_sh() {
+    fn test_mm_maskz_getexp_round_sh() {
         let a = _mm_setr_ph(4.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
         let r = _mm_maskz_getexp_round_sh::<_MM_FROUND_NO_EXC>(0, a, b);
@@ -22902,7 +23053,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_getmant_ph() {
+    fn test_mm_getmant_ph() {
         let a = _mm_set1_ph(10.0);
         let r = _mm_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a);
         let e = _mm_set1_ph(1.25);
@@ -22910,7 +23061,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_getmant_ph() {
+    fn test_mm_mask_getmant_ph() {
         let a = _mm_set1_ph(10.0);
         let src = _mm_set1_ph(20.0);
         let r = _mm_mask_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(src, 0b01010101, a);
@@ -22919,7 +23070,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_getmant_ph() {
+    fn test_mm_maskz_getmant_ph() {
         let a = _mm_set1_ph(10.0);
         let r = _mm_maskz_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(0b01010101, a);
         let e = _mm_set_ph(0.0, 1.25, 0.0, 1.25, 0.0, 1.25, 0.0, 1.25);
@@ -22927,7 +23078,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_getmant_ph() {
+    fn test_mm256_getmant_ph() {
         let a = _mm256_set1_ph(10.0);
         let r = _mm256_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a);
         let e = _mm256_set1_ph(1.25);
@@ -22935,7 +23086,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_mask_getmant_ph() {
+    fn test_mm256_mask_getmant_ph() {
         let a = _mm256_set1_ph(10.0);
         let src = _mm256_set1_ph(20.0);
         let r = _mm256_mask_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(
@@ -22951,7 +23102,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_maskz_getmant_ph() {
+    fn test_mm256_maskz_getmant_ph() {
         let a = _mm256_set1_ph(10.0);
         let r = _mm256_maskz_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(
             0b0101010101010101,
@@ -22964,7 +23115,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_getmant_ph() {
+    fn test_mm512_getmant_ph() {
         let a = _mm512_set1_ph(10.0);
         let r = _mm512_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a);
         let e = _mm512_set1_ph(1.25);
@@ -22972,7 +23123,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask_getmant_ph() {
+    fn test_mm512_mask_getmant_ph() {
         let a = _mm512_set1_ph(10.0);
         let src = _mm512_set1_ph(20.0);
         let r = _mm512_mask_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(
@@ -22989,7 +23140,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_maskz_getmant_ph() {
+    fn test_mm512_maskz_getmant_ph() {
         let a = _mm512_set1_ph(10.0);
         let r = _mm512_maskz_getmant_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(
             0b01010101010101010101010101010101,
@@ -23003,7 +23154,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_getmant_round_ph() {
+    fn test_mm512_getmant_round_ph() {
         let a = _mm512_set1_ph(10.0);
         let r =
             _mm512_getmant_round_ph::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN, _MM_FROUND_NO_EXC>(
@@ -23014,7 +23165,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask_getmant_round_ph() {
+    fn test_mm512_mask_getmant_round_ph() {
         let a = _mm512_set1_ph(10.0);
         let src = _mm512_set1_ph(20.0);
         let r = _mm512_mask_getmant_round_ph::<
@@ -23031,7 +23182,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_maskz_getmant_round_ph() {
+    fn test_mm512_maskz_getmant_round_ph() {
         let a = _mm512_set1_ph(10.0);
         let r = _mm512_maskz_getmant_round_ph::<
             _MM_MANT_NORM_P75_1P5,
@@ -23046,7 +23197,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_getmant_sh() {
+    fn test_mm_getmant_sh() {
         let a = _mm_setr_ph(15.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(10.0, 20., 21., 22., 23., 24., 25., 26.);
         let r = _mm_getmant_sh::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a, b);
@@ -23055,7 +23206,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_getmant_sh() {
+    fn test_mm_mask_getmant_sh() {
         let a = _mm_setr_ph(15.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(10.0, 20., 21., 22., 23., 24., 25., 26.);
         let src = _mm_setr_ph(20.0, 30., 31., 32., 33., 34., 35., 36.);
@@ -23068,7 +23219,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_getmant_sh() {
+    fn test_mm_maskz_getmant_sh() {
         let a = _mm_setr_ph(15.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(10.0, 20., 21., 22., 23., 24., 25., 26.);
         let r = _mm_maskz_getmant_sh::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(0, a, b);
@@ -23080,7 +23231,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_getmant_round_sh() {
+    fn test_mm_getmant_round_sh() {
         let a = _mm_setr_ph(15.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(10.0, 20., 21., 22., 23., 24., 25., 26.);
         let r = _mm_getmant_round_sh::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN, _MM_FROUND_NO_EXC>(
@@ -23091,7 +23242,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_getmant_round_sh() {
+    fn test_mm_mask_getmant_round_sh() {
         let a = _mm_setr_ph(15.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(10.0, 20., 21., 22., 23., 24., 25., 26.);
         let src = _mm_setr_ph(20.0, 30., 31., 32., 33., 34., 35., 36.);
@@ -23112,7 +23263,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_getmant_round_sh() {
+    fn test_mm_maskz_getmant_round_sh() {
         let a = _mm_setr_ph(15.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(10.0, 20., 21., 22., 23., 24., 25., 26.);
         let r = _mm_maskz_getmant_round_sh::<
@@ -23132,7 +23283,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_roundscale_ph() {
+    fn test_mm_roundscale_ph() {
         let a = _mm_set1_ph(1.1);
         let r = _mm_roundscale_ph::<0>(a);
         let e = _mm_set1_ph(1.0);
@@ -23140,7 +23291,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_roundscale_ph() {
+    fn test_mm_mask_roundscale_ph() {
         let a = _mm_set1_ph(1.1);
         let src = _mm_set1_ph(2.0);
         let r = _mm_mask_roundscale_ph::<0>(src, 0b01010101, a);
@@ -23149,7 +23300,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_roundscale_ph() {
+    fn test_mm_maskz_roundscale_ph() {
         let a = _mm_set1_ph(1.1);
         let r = _mm_maskz_roundscale_ph::<0>(0b01010101, a);
         let e = _mm_set_ph(0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0);
@@ -23157,7 +23308,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_roundscale_ph() {
+    fn test_mm256_roundscale_ph() {
         let a = _mm256_set1_ph(1.1);
         let r = _mm256_roundscale_ph::<0>(a);
         let e = _mm256_set1_ph(1.0);
@@ -23165,7 +23316,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_mask_roundscale_ph() {
+    fn test_mm256_mask_roundscale_ph() {
         let a = _mm256_set1_ph(1.1);
         let src = _mm256_set1_ph(2.0);
         let r = _mm256_mask_roundscale_ph::<0>(src, 0b0101010101010101, a);
@@ -23176,7 +23327,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_maskz_roundscale_ph() {
+    fn test_mm256_maskz_roundscale_ph() {
         let a = _mm256_set1_ph(1.1);
         let r = _mm256_maskz_roundscale_ph::<0>(0b0101010101010101, a);
         let e = _mm256_set_ph(
@@ -23186,7 +23337,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_roundscale_ph() {
+    fn test_mm512_roundscale_ph() {
         let a = _mm512_set1_ph(1.1);
         let r = _mm512_roundscale_ph::<0>(a);
         let e = _mm512_set1_ph(1.0);
@@ -23194,7 +23345,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask_roundscale_ph() {
+    fn test_mm512_mask_roundscale_ph() {
         let a = _mm512_set1_ph(1.1);
         let src = _mm512_set1_ph(2.0);
         let r = _mm512_mask_roundscale_ph::<0>(src, 0b01010101010101010101010101010101, a);
@@ -23206,7 +23357,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_maskz_roundscale_ph() {
+    fn test_mm512_maskz_roundscale_ph() {
         let a = _mm512_set1_ph(1.1);
         let r = _mm512_maskz_roundscale_ph::<0>(0b01010101010101010101010101010101, a);
         let e = _mm512_set_ph(
@@ -23217,7 +23368,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_roundscale_round_ph() {
+    fn test_mm512_roundscale_round_ph() {
         let a = _mm512_set1_ph(1.1);
         let r = _mm512_roundscale_round_ph::<0, _MM_FROUND_NO_EXC>(a);
         let e = _mm512_set1_ph(1.0);
@@ -23225,7 +23376,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask_roundscale_round_ph() {
+    fn test_mm512_mask_roundscale_round_ph() {
         let a = _mm512_set1_ph(1.1);
         let src = _mm512_set1_ph(2.0);
         let r = _mm512_mask_roundscale_round_ph::<0, _MM_FROUND_NO_EXC>(
@@ -23241,7 +23392,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_maskz_roundscale_round_ph() {
+    fn test_mm512_maskz_roundscale_round_ph() {
         let a = _mm512_set1_ph(1.1);
         let r = _mm512_maskz_roundscale_round_ph::<0, _MM_FROUND_NO_EXC>(
             0b01010101010101010101010101010101,
@@ -23255,7 +23406,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_roundscale_sh() {
+    fn test_mm_roundscale_sh() {
         let a = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(1.1, 20., 21., 22., 23., 24., 25., 26.);
         let r = _mm_roundscale_sh::<0>(a, b);
@@ -23264,7 +23415,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_roundscale_sh() {
+    fn test_mm_mask_roundscale_sh() {
         let a = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(1.1, 20., 21., 22., 23., 24., 25., 26.);
         let src = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
@@ -23277,7 +23428,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_roundscale_sh() {
+    fn test_mm_maskz_roundscale_sh() {
         let a = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(1.1, 20., 21., 22., 23., 24., 25., 26.);
         let r = _mm_maskz_roundscale_sh::<0>(0, a, b);
@@ -23289,7 +23440,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_roundscale_round_sh() {
+    fn test_mm_roundscale_round_sh() {
         let a = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(1.1, 20., 21., 22., 23., 24., 25., 26.);
         let r = _mm_roundscale_round_sh::<0, _MM_FROUND_NO_EXC>(a, b);
@@ -23298,7 +23449,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_roundscale_round_sh() {
+    fn test_mm_mask_roundscale_round_sh() {
         let a = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(1.1, 20., 21., 22., 23., 24., 25., 26.);
         let src = _mm_setr_ph(3.0, 30., 31., 32., 33., 34., 35., 36.);
@@ -23311,7 +23462,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_roundscale_round_sh() {
+    fn test_mm_maskz_roundscale_round_sh() {
         let a = _mm_setr_ph(2.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(1.1, 20., 21., 22., 23., 24., 25., 26.);
         let r = _mm_maskz_roundscale_round_sh::<0, _MM_FROUND_NO_EXC>(0, a, b);
@@ -23323,7 +23474,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_scalef_ph() {
+    fn test_mm_scalef_ph() {
         let a = _mm_set1_ph(1.);
         let b = _mm_set1_ph(3.);
         let r = _mm_scalef_ph(a, b);
@@ -23332,7 +23483,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_scalef_ph() {
+    fn test_mm_mask_scalef_ph() {
         let a = _mm_set1_ph(1.);
         let b = _mm_set1_ph(3.);
         let src = _mm_set1_ph(2.);
@@ -23342,7 +23493,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_scalef_ph() {
+    fn test_mm_maskz_scalef_ph() {
         let a = _mm_set1_ph(1.);
         let b = _mm_set1_ph(3.);
         let r = _mm_maskz_scalef_ph(0b01010101, a, b);
@@ -23351,7 +23502,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_scalef_ph() {
+    fn test_mm256_scalef_ph() {
         let a = _mm256_set1_ph(1.);
         let b = _mm256_set1_ph(3.);
         let r = _mm256_scalef_ph(a, b);
@@ -23360,7 +23511,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_mask_scalef_ph() {
+    fn test_mm256_mask_scalef_ph() {
         let a = _mm256_set1_ph(1.);
         let b = _mm256_set1_ph(3.);
         let src = _mm256_set1_ph(2.);
@@ -23372,7 +23523,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_maskz_scalef_ph() {
+    fn test_mm256_maskz_scalef_ph() {
         let a = _mm256_set1_ph(1.);
         let b = _mm256_set1_ph(3.);
         let r = _mm256_maskz_scalef_ph(0b0101010101010101, a, b);
@@ -23383,7 +23534,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_scalef_ph() {
+    fn test_mm512_scalef_ph() {
         let a = _mm512_set1_ph(1.);
         let b = _mm512_set1_ph(3.);
         let r = _mm512_scalef_ph(a, b);
@@ -23392,7 +23543,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask_scalef_ph() {
+    fn test_mm512_mask_scalef_ph() {
         let a = _mm512_set1_ph(1.);
         let b = _mm512_set1_ph(3.);
         let src = _mm512_set1_ph(2.);
@@ -23405,7 +23556,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_maskz_scalef_ph() {
+    fn test_mm512_maskz_scalef_ph() {
         let a = _mm512_set1_ph(1.);
         let b = _mm512_set1_ph(3.);
         let r = _mm512_maskz_scalef_ph(0b01010101010101010101010101010101, a, b);
@@ -23417,7 +23568,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_scalef_round_ph() {
+    fn test_mm512_scalef_round_ph() {
         let a = _mm512_set1_ph(1.);
         let b = _mm512_set1_ph(3.);
         let r = _mm512_scalef_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
@@ -23426,7 +23577,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask_scalef_round_ph() {
+    fn test_mm512_mask_scalef_round_ph() {
         let a = _mm512_set1_ph(1.);
         let b = _mm512_set1_ph(3.);
         let src = _mm512_set1_ph(2.);
@@ -23444,7 +23595,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_maskz_scalef_round_ph() {
+    fn test_mm512_maskz_scalef_round_ph() {
         let a = _mm512_set1_ph(1.);
         let b = _mm512_set1_ph(3.);
         let r = _mm512_maskz_scalef_round_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
@@ -23460,7 +23611,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_scalef_sh() {
+    fn test_mm_scalef_sh() {
         let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
         let r = _mm_scalef_sh(a, b);
@@ -23469,7 +23620,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_scalef_sh() {
+    fn test_mm_mask_scalef_sh() {
         let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
         let src = _mm_setr_ph(2.0, 30., 31., 32., 33., 34., 35., 36.);
@@ -23482,7 +23633,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_scalef_sh() {
+    fn test_mm_maskz_scalef_sh() {
         let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
         let r = _mm_maskz_scalef_sh(0, a, b);
@@ -23494,7 +23645,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_scalef_round_sh() {
+    fn test_mm_scalef_round_sh() {
         let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
         let r = _mm_scalef_round_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
@@ -23503,7 +23654,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_scalef_round_sh() {
+    fn test_mm_mask_scalef_round_sh() {
         let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
         let src = _mm_setr_ph(2.0, 30., 31., 32., 33., 34., 35., 36.);
@@ -23520,7 +23671,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_scalef_round_sh() {
+    fn test_mm_maskz_scalef_round_sh() {
         let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(3.0, 20., 21., 22., 23., 24., 25., 26.);
         let r =
@@ -23534,7 +23685,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_reduce_ph() {
+    fn test_mm_reduce_ph() {
         let a = _mm_set1_ph(1.25);
         let r = _mm_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(a);
         let e = _mm_set1_ph(0.25);
@@ -23542,7 +23693,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_reduce_ph() {
+    fn test_mm_mask_reduce_ph() {
         let a = _mm_set1_ph(1.25);
         let src = _mm_set1_ph(2.0);
         let r = _mm_mask_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 0b01010101, a);
@@ -23551,7 +23702,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_reduce_ph() {
+    fn test_mm_maskz_reduce_ph() {
         let a = _mm_set1_ph(1.25);
         let r = _mm_maskz_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(0b01010101, a);
         let e = _mm_set_ph(0.0, 0.25, 0.0, 0.25, 0.0, 0.25, 0.0, 0.25);
@@ -23559,7 +23710,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_reduce_ph() {
+    fn test_mm256_reduce_ph() {
         let a = _mm256_set1_ph(1.25);
         let r = _mm256_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(a);
         let e = _mm256_set1_ph(0.25);
@@ -23567,7 +23718,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_mask_reduce_ph() {
+    fn test_mm256_mask_reduce_ph() {
         let a = _mm256_set1_ph(1.25);
         let src = _mm256_set1_ph(2.0);
         let r = _mm256_mask_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 0b0101010101010101, a);
@@ -23578,7 +23729,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_maskz_reduce_ph() {
+    fn test_mm256_maskz_reduce_ph() {
         let a = _mm256_set1_ph(1.25);
         let r = _mm256_maskz_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(0b0101010101010101, a);
         let e = _mm256_set_ph(
@@ -23588,7 +23739,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_reduce_ph() {
+    fn test_mm512_reduce_ph() {
         let a = _mm512_set1_ph(1.25);
         let r = _mm512_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(a);
         let e = _mm512_set1_ph(0.25);
@@ -23596,7 +23747,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask_reduce_ph() {
+    fn test_mm512_mask_reduce_ph() {
         let a = _mm512_set1_ph(1.25);
         let src = _mm512_set1_ph(2.0);
         let r = _mm512_mask_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(
@@ -23612,7 +23763,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_maskz_reduce_ph() {
+    fn test_mm512_maskz_reduce_ph() {
         let a = _mm512_set1_ph(1.25);
         let r = _mm512_maskz_reduce_ph::<{ 16 | _MM_FROUND_TO_ZERO }>(
             0b01010101010101010101010101010101,
@@ -23626,7 +23777,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_reduce_round_ph() {
+    fn test_mm512_reduce_round_ph() {
         let a = _mm512_set1_ph(1.25);
         let r = _mm512_reduce_round_ph::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(a);
         let e = _mm512_set1_ph(0.25);
@@ -23634,7 +23785,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask_reduce_round_ph() {
+    fn test_mm512_mask_reduce_round_ph() {
         let a = _mm512_set1_ph(1.25);
         let src = _mm512_set1_ph(2.0);
         let r = _mm512_mask_reduce_round_ph::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(
@@ -23650,7 +23801,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_maskz_reduce_round_ph() {
+    fn test_mm512_maskz_reduce_round_ph() {
         let a = _mm512_set1_ph(1.25);
         let r = _mm512_maskz_reduce_round_ph::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(
             0b01010101010101010101010101010101,
@@ -23664,7 +23815,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_reduce_sh() {
+    fn test_mm_reduce_sh() {
         let a = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(1.25, 20., 21., 22., 23., 24., 25., 26.);
         let r = _mm_reduce_sh::<{ 16 | _MM_FROUND_TO_ZERO }>(a, b);
@@ -23673,7 +23824,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_reduce_sh() {
+    fn test_mm_mask_reduce_sh() {
         let a = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(1.25, 20., 21., 22., 23., 24., 25., 26.);
         let src = _mm_setr_ph(2.0, 30., 31., 32., 33., 34., 35., 36.);
@@ -23686,7 +23837,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_reduce_sh() {
+    fn test_mm_maskz_reduce_sh() {
         let a = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(1.25, 20., 21., 22., 23., 24., 25., 26.);
         let r = _mm_maskz_reduce_sh::<{ 16 | _MM_FROUND_TO_ZERO }>(0, a, b);
@@ -23698,7 +23849,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_reduce_round_sh() {
+    fn test_mm_reduce_round_sh() {
         let a = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(1.25, 20., 21., 22., 23., 24., 25., 26.);
         let r = _mm_reduce_round_sh::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(a, b);
@@ -23707,7 +23858,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_reduce_round_sh() {
+    fn test_mm_mask_reduce_round_sh() {
         let a = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(1.25, 20., 21., 22., 23., 24., 25., 26.);
         let src = _mm_setr_ph(2.0, 30., 31., 32., 33., 34., 35., 36.);
@@ -23724,7 +23875,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_reduce_round_sh() {
+    fn test_mm_maskz_reduce_round_sh() {
         let a = _mm_setr_ph(3.0, 10., 11., 12., 13., 14., 15., 16.);
         let b = _mm_setr_ph(1.25, 20., 21., 22., 23., 24., 25., 26.);
         let r =
@@ -23738,56 +23889,56 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_reduce_add_ph() {
+    const fn test_mm_reduce_add_ph() {
         let a = _mm_set1_ph(2.0);
         let r = _mm_reduce_add_ph(a);
         assert_eq!(r, 16.0);
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_reduce_add_ph() {
+    const fn test_mm256_reduce_add_ph() {
         let a = _mm256_set1_ph(2.0);
         let r = _mm256_reduce_add_ph(a);
         assert_eq!(r, 32.0);
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_reduce_add_ph() {
+    const fn test_mm512_reduce_add_ph() {
         let a = _mm512_set1_ph(2.0);
         let r = _mm512_reduce_add_ph(a);
         assert_eq!(r, 64.0);
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_reduce_mul_ph() {
+    const fn test_mm_reduce_mul_ph() {
         let a = _mm_set1_ph(2.0);
         let r = _mm_reduce_mul_ph(a);
         assert_eq!(r, 256.0);
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_reduce_mul_ph() {
-        let a = _mm256_set1_ph(2.0);
+    const fn test_mm256_reduce_mul_ph() {
+        let a = _mm256_set1_ph(1.2);
         let r = _mm256_reduce_mul_ph(a);
-        assert_eq!(r, 65536.0);
+        assert_eq!(r, 18.5);
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_reduce_mul_ph() {
-        let a = _mm512_set1_ph(2.0);
+    const fn test_mm512_reduce_mul_ph() {
+        let a = _mm512_set1_ph(1.2);
         let r = _mm512_reduce_mul_ph(a);
-        assert_eq!(r, 16777216.0);
+        assert_eq!(r, 342.3);
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_reduce_max_ph() {
+    fn test_mm_reduce_max_ph() {
         let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let r = _mm_reduce_max_ph(a);
         assert_eq!(r, 8.0);
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_reduce_max_ph() {
+    fn test_mm256_reduce_max_ph() {
         let a = _mm256_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
         );
@@ -23796,7 +23947,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_reduce_max_ph() {
+    fn test_mm512_reduce_max_ph() {
         let a = _mm512_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
             17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
@@ -23807,14 +23958,14 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_reduce_min_ph() {
+    fn test_mm_reduce_min_ph() {
         let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let r = _mm_reduce_min_ph(a);
         assert_eq!(r, 1.0);
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_reduce_min_ph() {
+    fn test_mm256_reduce_min_ph() {
         let a = _mm256_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
         );
@@ -23823,7 +23974,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_reduce_min_ph() {
+    fn test_mm512_reduce_min_ph() {
         let a = _mm512_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
             17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
@@ -23834,7 +23985,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_fpclass_ph_mask() {
+    fn test_mm_fpclass_ph_mask() {
         let a = _mm_set_ph(
             1.,
             f16::INFINITY,
@@ -23850,7 +24001,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_fpclass_ph_mask() {
+    fn test_mm_mask_fpclass_ph_mask() {
         let a = _mm_set_ph(
             1.,
             f16::INFINITY,
@@ -23866,7 +24017,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_fpclass_ph_mask() {
+    fn test_mm256_fpclass_ph_mask() {
         let a = _mm256_set_ph(
             1.,
             f16::INFINITY,
@@ -23890,7 +24041,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_mask_fpclass_ph_mask() {
+    fn test_mm256_mask_fpclass_ph_mask() {
         let a = _mm256_set_ph(
             1.,
             f16::INFINITY,
@@ -23914,7 +24065,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_fpclass_ph_mask() {
+    fn test_mm512_fpclass_ph_mask() {
         let a = _mm512_set_ph(
             1.,
             f16::INFINITY,
@@ -23954,7 +24105,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask_fpclass_ph_mask() {
+    fn test_mm512_mask_fpclass_ph_mask() {
         let a = _mm512_set_ph(
             1.,
             f16::INFINITY,
@@ -23994,14 +24145,14 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm_fpclass_sh_mask() {
+    fn test_mm_fpclass_sh_mask() {
         let a = _mm_set_sh(f16::INFINITY);
         let r = _mm_fpclass_sh_mask::<0x18>(a);
         assert_eq!(r, 1);
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm_mask_fpclass_sh_mask() {
+    fn test_mm_mask_fpclass_sh_mask() {
         let a = _mm_set_sh(f16::INFINITY);
         let r = _mm_mask_fpclass_sh_mask::<0x18>(0, a);
         assert_eq!(r, 0);
@@ -24010,7 +24161,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_blend_ph() {
+    const fn test_mm_mask_blend_ph() {
         let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let b = _mm_set_ph(-1.0, -2.0, -3.0, -4.0, -5.0, -6.0, -7.0, -8.0);
         let r = _mm_mask_blend_ph(0b01010101, a, b);
@@ -24019,7 +24170,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_mask_blend_ph() {
+    const fn test_mm256_mask_blend_ph() {
         let a = _mm256_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
         );
@@ -24036,7 +24187,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask_blend_ph() {
+    const fn test_mm512_mask_blend_ph() {
         let a = _mm512_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
             17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
@@ -24057,7 +24208,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_permutex2var_ph() {
+    fn test_mm_permutex2var_ph() {
         let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let b = _mm_setr_ph(9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
         let idx = _mm_setr_epi16(0, 2, 4, 6, 8, 10, 12, 14);
@@ -24067,7 +24218,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_permutex2var_ph() {
+    fn test_mm256_permutex2var_ph() {
         let a = _mm256_setr_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
         );
@@ -24085,7 +24236,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_permutex2var_ph() {
+    fn test_mm512_permutex2var_ph() {
         let a = _mm512_setr_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
             17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
@@ -24110,7 +24261,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_permutexvar_ph() {
+    fn test_mm_permutexvar_ph() {
         let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let idx = _mm_set_epi16(0, 2, 4, 6, 1, 3, 5, 7);
         let r = _mm_permutexvar_ph(idx, a);
@@ -24119,7 +24270,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_permutexvar_ph() {
+    fn test_mm256_permutexvar_ph() {
         let a = _mm256_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
         );
@@ -24132,7 +24283,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_permutexvar_ph() {
+    fn test_mm512_permutexvar_ph() {
         let a = _mm512_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
             17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
@@ -24152,7 +24303,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_cvtepi16_ph() {
+    fn test_mm_cvtepi16_ph() {
         let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
         let r = _mm_cvtepi16_ph(a);
         let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
@@ -24160,7 +24311,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_cvtepi16_ph() {
+    fn test_mm_mask_cvtepi16_ph() {
         let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
         let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
         let r = _mm_mask_cvtepi16_ph(src, 0b01010101, a);
@@ -24169,7 +24320,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_cvtepi16_ph() {
+    fn test_mm_maskz_cvtepi16_ph() {
         let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
         let r = _mm_maskz_cvtepi16_ph(0b01010101, a);
         let e = _mm_set_ph(0., 2., 0., 4., 0., 6., 0., 8.);
@@ -24177,7 +24328,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_cvtepi16_ph() {
+    fn test_mm256_cvtepi16_ph() {
         let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
         let r = _mm256_cvtepi16_ph(a);
         let e = _mm256_set_ph(
@@ -24187,7 +24338,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_mask_cvtepi16_ph() {
+    fn test_mm256_mask_cvtepi16_ph() {
         let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
         let src = _mm256_set_ph(
             10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25.,
@@ -24200,7 +24351,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_maskz_cvtepi16_ph() {
+    fn test_mm256_maskz_cvtepi16_ph() {
         let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
         let r = _mm256_maskz_cvtepi16_ph(0b0101010101010101, a);
         let e = _mm256_set_ph(
@@ -24210,7 +24361,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_cvtepi16_ph() {
+    fn test_mm512_cvtepi16_ph() {
         let a = _mm512_set_epi16(
             1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
             25, 26, 27, 28, 29, 30, 31, 32,
@@ -24225,7 +24376,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask_cvtepi16_ph() {
+    fn test_mm512_mask_cvtepi16_ph() {
         let a = _mm512_set_epi16(
             1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
             25, 26, 27, 28, 29, 30, 31, 32,
@@ -24243,7 +24394,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_maskz_cvtepi16_ph() {
+    fn test_mm512_maskz_cvtepi16_ph() {
         let a = _mm512_set_epi16(
             1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
             25, 26, 27, 28, 29, 30, 31, 32,
@@ -24257,7 +24408,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_cvt_roundepi16_ph() {
+    fn test_mm512_cvt_roundepi16_ph() {
         let a = _mm512_set_epi16(
             1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
             25, 26, 27, 28, 29, 30, 31, 32,
@@ -24272,7 +24423,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask_cvt_roundepi16_ph() {
+    fn test_mm512_mask_cvt_roundepi16_ph() {
         let a = _mm512_set_epi16(
             1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
             25, 26, 27, 28, 29, 30, 31, 32,
@@ -24294,7 +24445,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_maskz_cvt_roundepi16_ph() {
+    fn test_mm512_maskz_cvt_roundepi16_ph() {
         let a = _mm512_set_epi16(
             1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
             25, 26, 27, 28, 29, 30, 31, 32,
@@ -24311,7 +24462,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_cvtepu16_ph() {
+    fn test_mm_cvtepu16_ph() {
         let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
         let r = _mm_cvtepu16_ph(a);
         let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
@@ -24319,7 +24470,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_cvtepu16_ph() {
+    fn test_mm_mask_cvtepu16_ph() {
         let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
         let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
         let r = _mm_mask_cvtepu16_ph(src, 0b01010101, a);
@@ -24328,7 +24479,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_cvtepu16_ph() {
+    fn test_mm_maskz_cvtepu16_ph() {
         let a = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
         let r = _mm_maskz_cvtepu16_ph(0b01010101, a);
         let e = _mm_set_ph(0., 2., 0., 4., 0., 6., 0., 8.);
@@ -24336,7 +24487,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_cvtepu16_ph() {
+    fn test_mm256_cvtepu16_ph() {
         let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
         let r = _mm256_cvtepu16_ph(a);
         let e = _mm256_set_ph(
@@ -24346,7 +24497,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_mask_cvtepu16_ph() {
+    fn test_mm256_mask_cvtepu16_ph() {
         let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
         let src = _mm256_set_ph(
             10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25.,
@@ -24359,7 +24510,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_maskz_cvtepu16_ph() {
+    fn test_mm256_maskz_cvtepu16_ph() {
         let a = _mm256_set_epi16(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
         let r = _mm256_maskz_cvtepu16_ph(0b0101010101010101, a);
         let e = _mm256_set_ph(
@@ -24369,7 +24520,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_cvtepu16_ph() {
+    fn test_mm512_cvtepu16_ph() {
         let a = _mm512_set_epi16(
             1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
             25, 26, 27, 28, 29, 30, 31, 32,
@@ -24384,7 +24535,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask_cvtepu16_ph() {
+    fn test_mm512_mask_cvtepu16_ph() {
         let a = _mm512_set_epi16(
             1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
             25, 26, 27, 28, 29, 30, 31, 32,
@@ -24402,7 +24553,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_maskz_cvtepu16_ph() {
+    fn test_mm512_maskz_cvtepu16_ph() {
         let a = _mm512_set_epi16(
             1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
             25, 26, 27, 28, 29, 30, 31, 32,
@@ -24416,7 +24567,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_cvt_roundepu16_ph() {
+    fn test_mm512_cvt_roundepu16_ph() {
         let a = _mm512_set_epi16(
             1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
             25, 26, 27, 28, 29, 30, 31, 32,
@@ -24431,7 +24582,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask_cvt_roundepu16_ph() {
+    fn test_mm512_mask_cvt_roundepu16_ph() {
         let a = _mm512_set_epi16(
             1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
             25, 26, 27, 28, 29, 30, 31, 32,
@@ -24453,7 +24604,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_maskz_cvt_roundepu16_ph() {
+    fn test_mm512_maskz_cvt_roundepu16_ph() {
         let a = _mm512_set_epi16(
             1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
             25, 26, 27, 28, 29, 30, 31, 32,
@@ -24470,7 +24621,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_cvtepi32_ph() {
+    fn test_mm_cvtepi32_ph() {
         let a = _mm_set_epi32(1, 2, 3, 4);
         let r = _mm_cvtepi32_ph(a);
         let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
@@ -24478,7 +24629,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_cvtepi32_ph() {
+    fn test_mm_mask_cvtepi32_ph() {
         let a = _mm_set_epi32(1, 2, 3, 4);
         let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
         let r = _mm_mask_cvtepi32_ph(src, 0b0101, a);
@@ -24487,7 +24638,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_cvtepi32_ph() {
+    fn test_mm_maskz_cvtepi32_ph() {
         let a = _mm_set_epi32(1, 2, 3, 4);
         let r = _mm_maskz_cvtepi32_ph(0b0101, a);
         let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 2., 0.0, 4.);
@@ -24495,7 +24646,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_cvtepi32_ph() {
+    fn test_mm256_cvtepi32_ph() {
         let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
         let r = _mm256_cvtepi32_ph(a);
         let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
@@ -24503,7 +24654,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_mask_cvtepi32_ph() {
+    fn test_mm256_mask_cvtepi32_ph() {
         let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
         let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
         let r = _mm256_mask_cvtepi32_ph(src, 0b01010101, a);
@@ -24512,7 +24663,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_maskz_cvtepi32_ph() {
+    fn test_mm256_maskz_cvtepi32_ph() {
         let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
         let r = _mm256_maskz_cvtepi32_ph(0b01010101, a);
         let e = _mm_set_ph(0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0);
@@ -24520,7 +24671,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_cvtepi32_ph() {
+    fn test_mm512_cvtepi32_ph() {
         let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
         let r = _mm512_cvtepi32_ph(a);
         let e = _mm256_set_ph(
@@ -24529,8 +24680,8 @@ mod tests {
         assert_eq_m256h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask_cvtepi32_ph() {
+    #[simd_test(enable = "avx512fp16,avx512vl")]
+    fn test_mm512_mask_cvtepi32_ph() {
         let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
         let src = _mm256_set_ph(
             10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25.,
@@ -24542,8 +24693,8 @@ mod tests {
         assert_eq_m256h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_maskz_cvtepi32_ph() {
+    #[simd_test(enable = "avx512fp16,avx512vl")]
+    fn test_mm512_maskz_cvtepi32_ph() {
         let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
         let r = _mm512_maskz_cvtepi32_ph(0b0101010101010101, a);
         let e = _mm256_set_ph(
@@ -24552,8 +24703,8 @@ mod tests {
         assert_eq_m256h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_cvt_roundepi32_ph() {
+    #[simd_test(enable = "avx512fp16,avx512vl")]
+    fn test_mm512_cvt_roundepi32_ph() {
         let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
         let r = _mm512_cvt_roundepi32_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
         let e = _mm256_set_ph(
@@ -24562,8 +24713,8 @@ mod tests {
         assert_eq_m256h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask_cvt_roundepi32_ph() {
+    #[simd_test(enable = "avx512fp16,avx512vl")]
+    fn test_mm512_mask_cvt_roundepi32_ph() {
         let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
         let src = _mm256_set_ph(
             10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25.,
@@ -24579,8 +24730,8 @@ mod tests {
         assert_eq_m256h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_maskz_cvt_roundepi32_ph() {
+    #[simd_test(enable = "avx512fp16,avx512vl")]
+    fn test_mm512_maskz_cvt_roundepi32_ph() {
         let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
         let r = _mm512_maskz_cvt_roundepi32_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
             0b0101010101010101,
@@ -24593,7 +24744,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_cvti32_sh() {
+    fn test_mm_cvti32_sh() {
         let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let r = _mm_cvti32_sh(a, 10);
         let e = _mm_setr_ph(10.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
@@ -24601,7 +24752,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_cvt_roundi32_sh() {
+    fn test_mm_cvt_roundi32_sh() {
         let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let r = _mm_cvt_roundi32_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, 10);
         let e = _mm_setr_ph(10.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
@@ -24609,7 +24760,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_cvtepu32_ph() {
+    fn test_mm_cvtepu32_ph() {
         let a = _mm_set_epi32(1, 2, 3, 4);
         let r = _mm_cvtepu32_ph(a);
         let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
@@ -24617,7 +24768,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_cvtepu32_ph() {
+    fn test_mm_mask_cvtepu32_ph() {
         let a = _mm_set_epi32(1, 2, 3, 4);
         let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
         let r = _mm_mask_cvtepu32_ph(src, 0b0101, a);
@@ -24626,7 +24777,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_cvtepu32_ph() {
+    fn test_mm_maskz_cvtepu32_ph() {
         let a = _mm_set_epi32(1, 2, 3, 4);
         let r = _mm_maskz_cvtepu32_ph(0b0101, a);
         let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 2., 0.0, 4.);
@@ -24634,7 +24785,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_cvtepu32_ph() {
+    fn test_mm256_cvtepu32_ph() {
         let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
         let r = _mm256_cvtepu32_ph(a);
         let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
@@ -24642,7 +24793,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_mask_cvtepu32_ph() {
+    fn test_mm256_mask_cvtepu32_ph() {
         let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
         let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
         let r = _mm256_mask_cvtepu32_ph(src, 0b01010101, a);
@@ -24651,15 +24802,15 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_maskz_cvtepu32_ph() {
+    fn test_mm256_maskz_cvtepu32_ph() {
         let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
         let r = _mm256_maskz_cvtepu32_ph(0b01010101, a);
         let e = _mm_set_ph(0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0);
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_cvtepu32_ph() {
+    #[simd_test(enable = "avx512fp16,avx512vl")]
+    fn test_mm512_cvtepu32_ph() {
         let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
         let r = _mm512_cvtepu32_ph(a);
         let e = _mm256_set_ph(
@@ -24668,8 +24819,8 @@ mod tests {
         assert_eq_m256h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask_cvtepu32_ph() {
+    #[simd_test(enable = "avx512fp16,avx512vl")]
+    fn test_mm512_mask_cvtepu32_ph() {
         let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
         let src = _mm256_set_ph(
             10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25.,
@@ -24681,8 +24832,8 @@ mod tests {
         assert_eq_m256h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_maskz_cvtepu32_ph() {
+    #[simd_test(enable = "avx512fp16,avx512vl")]
+    fn test_mm512_maskz_cvtepu32_ph() {
         let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
         let r = _mm512_maskz_cvtepu32_ph(0b0101010101010101, a);
         let e = _mm256_set_ph(
@@ -24691,8 +24842,8 @@ mod tests {
         assert_eq_m256h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_cvt_roundepu32_ph() {
+    #[simd_test(enable = "avx512fp16,avx512vl")]
+    fn test_mm512_cvt_roundepu32_ph() {
         let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
         let r = _mm512_cvt_roundepu32_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
         let e = _mm256_set_ph(
@@ -24701,8 +24852,8 @@ mod tests {
         assert_eq_m256h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask_cvt_roundepu32_ph() {
+    #[simd_test(enable = "avx512fp16,avx512vl")]
+    fn test_mm512_mask_cvt_roundepu32_ph() {
         let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
         let src = _mm256_set_ph(
             10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25.,
@@ -24719,8 +24870,8 @@ mod tests {
         assert_eq_m256h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_maskz_cvt_roundepu32_ph() {
+    #[simd_test(enable = "avx512fp16,avx512vl")]
+    fn test_mm512_maskz_cvt_roundepu32_ph() {
         let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
         let r = _mm512_maskz_cvt_roundepu32_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
             0b0101010101010101,
@@ -24733,7 +24884,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_cvtu32_sh() {
+    fn test_mm_cvtu32_sh() {
         let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let r = _mm_cvtu32_sh(a, 10);
         let e = _mm_setr_ph(10.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
@@ -24741,7 +24892,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_cvt_roundu32_sh() {
+    fn test_mm_cvt_roundu32_sh() {
         let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let r = _mm_cvt_roundu32_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, 10);
         let e = _mm_setr_ph(10.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
@@ -24749,7 +24900,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_cvtepi64_ph() {
+    fn test_mm_cvtepi64_ph() {
         let a = _mm_set_epi64x(1, 2);
         let r = _mm_cvtepi64_ph(a);
         let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
@@ -24757,7 +24908,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_cvtepi64_ph() {
+    fn test_mm_mask_cvtepi64_ph() {
         let a = _mm_set_epi64x(1, 2);
         let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
         let r = _mm_mask_cvtepi64_ph(src, 0b01, a);
@@ -24766,7 +24917,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_cvtepi64_ph() {
+    fn test_mm_maskz_cvtepi64_ph() {
         let a = _mm_set_epi64x(1, 2);
         let r = _mm_maskz_cvtepi64_ph(0b01, a);
         let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.);
@@ -24774,7 +24925,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_cvtepi64_ph() {
+    fn test_mm256_cvtepi64_ph() {
         let a = _mm256_set_epi64x(1, 2, 3, 4);
         let r = _mm256_cvtepi64_ph(a);
         let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
@@ -24782,7 +24933,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_mask_cvtepi64_ph() {
+    fn test_mm256_mask_cvtepi64_ph() {
         let a = _mm256_set_epi64x(1, 2, 3, 4);
         let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
         let r = _mm256_mask_cvtepi64_ph(src, 0b0101, a);
@@ -24791,7 +24942,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_maskz_cvtepi64_ph() {
+    fn test_mm256_maskz_cvtepi64_ph() {
         let a = _mm256_set_epi64x(1, 2, 3, 4);
         let r = _mm256_maskz_cvtepi64_ph(0b0101, a);
         let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 4.0);
@@ -24799,7 +24950,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm512_cvtepi64_ph() {
+    fn test_mm512_cvtepi64_ph() {
         let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let r = _mm512_cvtepi64_ph(a);
         let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
@@ -24807,7 +24958,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm512_mask_cvtepi64_ph() {
+    fn test_mm512_mask_cvtepi64_ph() {
         let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
         let r = _mm512_mask_cvtepi64_ph(src, 0b01010101, a);
@@ -24816,7 +24967,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm512_maskz_cvtepi64_ph() {
+    fn test_mm512_maskz_cvtepi64_ph() {
         let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let r = _mm512_maskz_cvtepi64_ph(0b01010101, a);
         let e = _mm_set_ph(0.0, 2., 0.0, 4., 0.0, 6., 0.0, 8.);
@@ -24824,7 +24975,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm512_cvt_roundepi64_ph() {
+    fn test_mm512_cvt_roundepi64_ph() {
         let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let r = _mm512_cvt_roundepi64_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
         let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
@@ -24832,7 +24983,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask_cvt_roundepi64_ph() {
+    fn test_mm512_mask_cvt_roundepi64_ph() {
         let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
         let r = _mm512_mask_cvt_roundepi64_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
@@ -24843,7 +24994,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm512_maskz_cvt_roundepi64_ph() {
+    fn test_mm512_maskz_cvt_roundepi64_ph() {
         let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let r = _mm512_maskz_cvt_roundepi64_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
             0b01010101, a,
@@ -24853,7 +25004,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_cvtepu64_ph() {
+    fn test_mm_cvtepu64_ph() {
         let a = _mm_set_epi64x(1, 2);
         let r = _mm_cvtepu64_ph(a);
         let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
@@ -24861,7 +25012,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_cvtepu64_ph() {
+    fn test_mm_mask_cvtepu64_ph() {
         let a = _mm_set_epi64x(1, 2);
         let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
         let r = _mm_mask_cvtepu64_ph(src, 0b01, a);
@@ -24870,7 +25021,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_cvtepu64_ph() {
+    fn test_mm_maskz_cvtepu64_ph() {
         let a = _mm_set_epi64x(1, 2);
         let r = _mm_maskz_cvtepu64_ph(0b01, a);
         let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0);
@@ -24878,7 +25029,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_cvtepu64_ph() {
+    fn test_mm256_cvtepu64_ph() {
         let a = _mm256_set_epi64x(1, 2, 3, 4);
         let r = _mm256_cvtepu64_ph(a);
         let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
@@ -24886,7 +25037,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_mask_cvtepu64_ph() {
+    fn test_mm256_mask_cvtepu64_ph() {
         let a = _mm256_set_epi64x(1, 2, 3, 4);
         let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
         let r = _mm256_mask_cvtepu64_ph(src, 0b0101, a);
@@ -24895,7 +25046,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_maskz_cvtepu64_ph() {
+    fn test_mm256_maskz_cvtepu64_ph() {
         let a = _mm256_set_epi64x(1, 2, 3, 4);
         let r = _mm256_maskz_cvtepu64_ph(0b0101, a);
         let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 4.0);
@@ -24903,7 +25054,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm512_cvtepu64_ph() {
+    fn test_mm512_cvtepu64_ph() {
         let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let r = _mm512_cvtepu64_ph(a);
         let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
@@ -24911,7 +25062,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm512_mask_cvtepu64_ph() {
+    fn test_mm512_mask_cvtepu64_ph() {
         let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
         let r = _mm512_mask_cvtepu64_ph(src, 0b01010101, a);
@@ -24920,7 +25071,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm512_maskz_cvtepu64_ph() {
+    fn test_mm512_maskz_cvtepu64_ph() {
         let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let r = _mm512_maskz_cvtepu64_ph(0b01010101, a);
         let e = _mm_set_ph(0.0, 2., 0.0, 4., 0.0, 6., 0.0, 8.);
@@ -24928,7 +25079,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm512_cvt_roundepu64_ph() {
+    fn test_mm512_cvt_roundepu64_ph() {
         let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let r = _mm512_cvt_roundepu64_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
         let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
@@ -24936,7 +25087,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm512_mask_cvt_roundepu64_ph() {
+    fn test_mm512_mask_cvt_roundepu64_ph() {
         let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
         let r = _mm512_mask_cvt_roundepu64_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
@@ -24947,7 +25098,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm512_maskz_cvt_roundepu64_ph() {
+    fn test_mm512_maskz_cvt_roundepu64_ph() {
         let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let r = _mm512_maskz_cvt_roundepu64_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
             0b01010101, a,
@@ -24957,7 +25108,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_cvtxps_ph() {
+    fn test_mm_cvtxps_ph() {
         let a = _mm_set_ps(1.0, 2.0, 3.0, 4.0);
         let r = _mm_cvtxps_ph(a);
         let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
@@ -24965,7 +25116,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_cvtxps_ph() {
+    fn test_mm_mask_cvtxps_ph() {
         let a = _mm_set_ps(1.0, 2.0, 3.0, 4.0);
         let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
         let r = _mm_mask_cvtxps_ph(src, 0b0101, a);
@@ -24974,7 +25125,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_cvtxps_ph() {
+    fn test_mm_maskz_cvtxps_ph() {
         let a = _mm_set_ps(1.0, 2.0, 3.0, 4.0);
         let r = _mm_maskz_cvtxps_ph(0b0101, a);
         let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 4.0);
@@ -24982,7 +25133,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_cvtxps_ph() {
+    fn test_mm256_cvtxps_ph() {
         let a = _mm256_set_ps(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let r = _mm256_cvtxps_ph(a);
         let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
@@ -24990,7 +25141,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_mask_cvtxps_ph() {
+    fn test_mm256_mask_cvtxps_ph() {
         let a = _mm256_set_ps(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
         let r = _mm256_mask_cvtxps_ph(src, 0b01010101, a);
@@ -24999,15 +25150,15 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_maskz_cvtxps_ph() {
+    fn test_mm256_maskz_cvtxps_ph() {
         let a = _mm256_set_ps(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let r = _mm256_maskz_cvtxps_ph(0b01010101, a);
         let e = _mm_set_ph(0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0);
         assert_eq_m128h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_cvtxps_ph() {
+    #[simd_test(enable = "avx512fp16,avx512vl")]
+    fn test_mm512_cvtxps_ph() {
         let a = _mm512_set_ps(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
         );
@@ -25018,8 +25169,8 @@ mod tests {
         assert_eq_m256h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask_cvtxps_ph() {
+    #[simd_test(enable = "avx512fp16,avx512vl")]
+    fn test_mm512_mask_cvtxps_ph() {
         let a = _mm512_set_ps(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
         );
@@ -25033,8 +25184,8 @@ mod tests {
         assert_eq_m256h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_maskz_cvtxps_ph() {
+    #[simd_test(enable = "avx512fp16,avx512vl")]
+    fn test_mm512_maskz_cvtxps_ph() {
         let a = _mm512_set_ps(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
         );
@@ -25045,8 +25196,8 @@ mod tests {
         assert_eq_m256h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_cvtx_roundps_ph() {
+    #[simd_test(enable = "avx512fp16,avx512vl")]
+    fn test_mm512_cvtx_roundps_ph() {
         let a = _mm512_set_ps(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
         );
@@ -25057,8 +25208,8 @@ mod tests {
         assert_eq_m256h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask_cvtx_roundps_ph() {
+    #[simd_test(enable = "avx512fp16,avx512vl")]
+    fn test_mm512_mask_cvtx_roundps_ph() {
         let a = _mm512_set_ps(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
         );
@@ -25077,8 +25228,8 @@ mod tests {
         assert_eq_m256h(r, e);
     }
 
-    #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_maskz_cvtx_roundps_ph() {
+    #[simd_test(enable = "avx512fp16,avx512vl")]
+    fn test_mm512_maskz_cvtx_roundps_ph() {
         let a = _mm512_set_ps(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
         );
@@ -25093,7 +25244,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_cvtss_sh() {
+    fn test_mm_cvtss_sh() {
         let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.);
         let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
         let r = _mm_cvtss_sh(a, b);
@@ -25102,7 +25253,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_cvtss_sh() {
+    fn test_mm_mask_cvtss_sh() {
         let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.);
         let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
         let src = _mm_setr_ph(20., 21., 22., 23., 24., 25., 26., 27.);
@@ -25115,7 +25266,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_cvtss_sh() {
+    fn test_mm_maskz_cvtss_sh() {
         let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.);
         let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
         let r = _mm_maskz_cvtss_sh(0, a, b);
@@ -25127,7 +25278,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_cvt_roundss_sh() {
+    fn test_mm_cvt_roundss_sh() {
         let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.);
         let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
         let r = _mm_cvt_roundss_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
@@ -25136,7 +25287,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_cvt_roundss_sh() {
+    fn test_mm_mask_cvt_roundss_sh() {
         let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.);
         let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
         let src = _mm_setr_ph(20., 21., 22., 23., 24., 25., 26., 27.);
@@ -25153,7 +25304,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_cvt_roundss_sh() {
+    fn test_mm_maskz_cvt_roundss_sh() {
         let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.);
         let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
         let r =
@@ -25167,7 +25318,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_cvtpd_ph() {
+    fn test_mm_cvtpd_ph() {
         let a = _mm_set_pd(1.0, 2.0);
         let r = _mm_cvtpd_ph(a);
         let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
@@ -25175,7 +25326,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_cvtpd_ph() {
+    fn test_mm_mask_cvtpd_ph() {
         let a = _mm_set_pd(1.0, 2.0);
         let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
         let r = _mm_mask_cvtpd_ph(src, 0b01, a);
@@ -25184,7 +25335,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_cvtpd_ph() {
+    fn test_mm_maskz_cvtpd_ph() {
         let a = _mm_set_pd(1.0, 2.0);
         let r = _mm_maskz_cvtpd_ph(0b01, a);
         let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0);
@@ -25192,7 +25343,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_cvtpd_ph() {
+    fn test_mm256_cvtpd_ph() {
         let a = _mm256_set_pd(1.0, 2.0, 3.0, 4.0);
         let r = _mm256_cvtpd_ph(a);
         let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
@@ -25200,7 +25351,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_mask_cvtpd_ph() {
+    fn test_mm256_mask_cvtpd_ph() {
         let a = _mm256_set_pd(1.0, 2.0, 3.0, 4.0);
         let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
         let r = _mm256_mask_cvtpd_ph(src, 0b0101, a);
@@ -25209,7 +25360,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_maskz_cvtpd_ph() {
+    fn test_mm256_maskz_cvtpd_ph() {
         let a = _mm256_set_pd(1.0, 2.0, 3.0, 4.0);
         let r = _mm256_maskz_cvtpd_ph(0b0101, a);
         let e = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 4.0);
@@ -25217,7 +25368,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm512_cvtpd_ph() {
+    fn test_mm512_cvtpd_ph() {
         let a = _mm512_set_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let r = _mm512_cvtpd_ph(a);
         let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
@@ -25225,7 +25376,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm512_mask_cvtpd_ph() {
+    fn test_mm512_mask_cvtpd_ph() {
         let a = _mm512_set_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
         let r = _mm512_mask_cvtpd_ph(src, 0b01010101, a);
@@ -25234,7 +25385,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm512_maskz_cvtpd_ph() {
+    fn test_mm512_maskz_cvtpd_ph() {
         let a = _mm512_set_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let r = _mm512_maskz_cvtpd_ph(0b01010101, a);
         let e = _mm_set_ph(0.0, 2., 0.0, 4., 0.0, 6., 0.0, 8.);
@@ -25242,7 +25393,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm512_cvt_roundpd_ph() {
+    fn test_mm512_cvt_roundpd_ph() {
         let a = _mm512_set_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let r = _mm512_cvt_roundpd_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
         let e = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
@@ -25250,7 +25401,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm512_mask_cvt_roundpd_ph() {
+    fn test_mm512_mask_cvt_roundpd_ph() {
         let a = _mm512_set_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let src = _mm_set_ph(10., 11., 12., 13., 14., 15., 16., 17.);
         let r = _mm512_mask_cvt_roundpd_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
@@ -25261,7 +25412,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm512_maskz_cvt_roundpd_ph() {
+    fn test_mm512_maskz_cvt_roundpd_ph() {
         let a = _mm512_set_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let r = _mm512_maskz_cvt_roundpd_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
             0b01010101, a,
@@ -25271,7 +25422,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_cvtsd_sh() {
+    fn test_mm_cvtsd_sh() {
         let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.);
         let b = _mm_setr_pd(1.0, 2.0);
         let r = _mm_cvtsd_sh(a, b);
@@ -25280,7 +25431,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_cvtsd_sh() {
+    fn test_mm_mask_cvtsd_sh() {
         let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.);
         let b = _mm_setr_pd(1.0, 2.0);
         let src = _mm_setr_ph(20., 21., 22., 23., 24., 25., 26., 27.);
@@ -25293,7 +25444,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_cvtsd_sh() {
+    fn test_mm_maskz_cvtsd_sh() {
         let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.);
         let b = _mm_setr_pd(1.0, 2.0);
         let r = _mm_maskz_cvtsd_sh(0, a, b);
@@ -25305,7 +25456,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_cvt_roundsd_sh() {
+    fn test_mm_cvt_roundsd_sh() {
         let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.);
         let b = _mm_setr_pd(1.0, 2.0);
         let r = _mm_cvt_roundsd_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
@@ -25314,7 +25465,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_cvt_roundsd_sh() {
+    fn test_mm_mask_cvt_roundsd_sh() {
         let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.);
         let b = _mm_setr_pd(1.0, 2.0);
         let src = _mm_setr_ph(20., 21., 22., 23., 24., 25., 26., 27.);
@@ -25331,7 +25482,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_cvt_roundsd_sh() {
+    fn test_mm_maskz_cvt_roundsd_sh() {
         let a = _mm_setr_ph(10., 11., 12., 13., 14., 15., 16., 17.);
         let b = _mm_setr_pd(1.0, 2.0);
         let r =
@@ -25345,7 +25496,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_cvtph_epi16() {
+    fn test_mm_cvtph_epi16() {
         let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let r = _mm_cvttph_epi16(a);
         let e = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
@@ -25353,7 +25504,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_cvtph_epi16() {
+    fn test_mm_mask_cvtph_epi16() {
         let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let src = _mm_set_epi16(10, 11, 12, 13, 14, 15, 16, 17);
         let r = _mm_mask_cvttph_epi16(src, 0b01010101, a);
@@ -25362,7 +25513,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_cvtph_epi16() {
+    fn test_mm_maskz_cvtph_epi16() {
         let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let r = _mm_maskz_cvttph_epi16(0b01010101, a);
         let e = _mm_set_epi16(0, 2, 0, 4, 0, 6, 0, 8);
@@ -25370,7 +25521,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_cvtph_epi16() {
+    fn test_mm256_cvtph_epi16() {
         let a = _mm256_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
         );
@@ -25380,7 +25531,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_mask_cvtph_epi16() {
+    fn test_mm256_mask_cvtph_epi16() {
         let a = _mm256_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
         );
@@ -25393,7 +25544,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_maskz_cvtph_epi16() {
+    fn test_mm256_maskz_cvtph_epi16() {
         let a = _mm256_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
         );
@@ -25403,7 +25554,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_cvtph_epi16() {
+    fn test_mm512_cvtph_epi16() {
         let a = _mm512_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
             17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
@@ -25418,7 +25569,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask_cvtph_epi16() {
+    fn test_mm512_mask_cvtph_epi16() {
         let a = _mm512_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
             17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
@@ -25437,7 +25588,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_maskz_cvtph_epi16() {
+    fn test_mm512_maskz_cvtph_epi16() {
         let a = _mm512_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
             17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
@@ -25452,7 +25603,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_cvt_roundph_epi16() {
+    fn test_mm512_cvt_roundph_epi16() {
         let a = _mm512_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
             17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
@@ -25467,7 +25618,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask_cvt_roundph_epi16() {
+    fn test_mm512_mask_cvt_roundph_epi16() {
         let a = _mm512_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
             17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
@@ -25490,7 +25641,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_maskz_cvt_roundph_epi16() {
+    fn test_mm512_maskz_cvt_roundph_epi16() {
         let a = _mm512_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
             17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
@@ -25508,7 +25659,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_cvtph_epu16() {
+    fn test_mm_cvtph_epu16() {
         let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let r = _mm_cvttph_epu16(a);
         let e = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
@@ -25516,7 +25667,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_cvtph_epu16() {
+    fn test_mm_mask_cvtph_epu16() {
         let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let src = _mm_set_epi16(10, 11, 12, 13, 14, 15, 16, 17);
         let r = _mm_mask_cvttph_epu16(src, 0b01010101, a);
@@ -25525,7 +25676,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_cvtph_epu16() {
+    fn test_mm_maskz_cvtph_epu16() {
         let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let r = _mm_maskz_cvttph_epu16(0b01010101, a);
         let e = _mm_set_epi16(0, 2, 0, 4, 0, 6, 0, 8);
@@ -25533,7 +25684,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_cvtph_epu16() {
+    fn test_mm256_cvtph_epu16() {
         let a = _mm256_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
         );
@@ -25543,7 +25694,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_mask_cvtph_epu16() {
+    fn test_mm256_mask_cvtph_epu16() {
         let a = _mm256_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
         );
@@ -25556,7 +25707,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_maskz_cvtph_epu16() {
+    fn test_mm256_maskz_cvtph_epu16() {
         let a = _mm256_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
         );
@@ -25566,7 +25717,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_cvtph_epu16() {
+    fn test_mm512_cvtph_epu16() {
         let a = _mm512_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
             17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
@@ -25581,7 +25732,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask_cvtph_epu16() {
+    fn test_mm512_mask_cvtph_epu16() {
         let a = _mm512_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
             17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
@@ -25600,7 +25751,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_maskz_cvtph_epu16() {
+    fn test_mm512_maskz_cvtph_epu16() {
         let a = _mm512_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
             17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
@@ -25615,7 +25766,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_cvt_roundph_epu16() {
+    fn test_mm512_cvt_roundph_epu16() {
         let a = _mm512_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
             17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
@@ -25630,7 +25781,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask_cvt_roundph_epu16() {
+    fn test_mm512_mask_cvt_roundph_epu16() {
         let a = _mm512_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
             17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
@@ -25653,7 +25804,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_maskz_cvt_roundph_epu16() {
+    fn test_mm512_maskz_cvt_roundph_epu16() {
         let a = _mm512_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
             17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
@@ -25671,7 +25822,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_cvttph_epi16() {
+    fn test_mm_cvttph_epi16() {
         let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let r = _mm_cvttph_epi16(a);
         let e = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
@@ -25679,7 +25830,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_cvttph_epi16() {
+    fn test_mm_mask_cvttph_epi16() {
         let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let src = _mm_set_epi16(10, 11, 12, 13, 14, 15, 16, 17);
         let r = _mm_mask_cvttph_epi16(src, 0b01010101, a);
@@ -25688,7 +25839,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_cvttph_epi16() {
+    fn test_mm_maskz_cvttph_epi16() {
         let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let r = _mm_maskz_cvttph_epi16(0b01010101, a);
         let e = _mm_set_epi16(0, 2, 0, 4, 0, 6, 0, 8);
@@ -25696,7 +25847,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_cvttph_epi16() {
+    fn test_mm256_cvttph_epi16() {
         let a = _mm256_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
         );
@@ -25706,7 +25857,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_mask_cvttph_epi16() {
+    fn test_mm256_mask_cvttph_epi16() {
         let a = _mm256_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
         );
@@ -25719,7 +25870,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_maskz_cvttph_epi16() {
+    fn test_mm256_maskz_cvttph_epi16() {
         let a = _mm256_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
         );
@@ -25729,7 +25880,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_cvttph_epi16() {
+    fn test_mm512_cvttph_epi16() {
         let a = _mm512_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
             17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
@@ -25744,7 +25895,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask_cvttph_epi16() {
+    fn test_mm512_mask_cvttph_epi16() {
         let a = _mm512_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
             17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
@@ -25763,7 +25914,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_maskz_cvttph_epi16() {
+    fn test_mm512_maskz_cvttph_epi16() {
         let a = _mm512_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
             17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
@@ -25778,7 +25929,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_cvtt_roundph_epi16() {
+    fn test_mm512_cvtt_roundph_epi16() {
         let a = _mm512_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
             17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
@@ -25793,7 +25944,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask_cvtt_roundph_epi16() {
+    fn test_mm512_mask_cvtt_roundph_epi16() {
         let a = _mm512_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
             17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
@@ -25816,7 +25967,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_maskz_cvtt_roundph_epi16() {
+    fn test_mm512_maskz_cvtt_roundph_epi16() {
         let a = _mm512_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
             17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
@@ -25834,7 +25985,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_cvttph_epu16() {
+    fn test_mm_cvttph_epu16() {
         let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let r = _mm_cvttph_epu16(a);
         let e = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
@@ -25842,7 +25993,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_cvttph_epu16() {
+    fn test_mm_mask_cvttph_epu16() {
         let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let src = _mm_set_epi16(10, 11, 12, 13, 14, 15, 16, 17);
         let r = _mm_mask_cvttph_epu16(src, 0b01010101, a);
@@ -25851,7 +26002,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_cvttph_epu16() {
+    fn test_mm_maskz_cvttph_epu16() {
         let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let r = _mm_maskz_cvttph_epu16(0b01010101, a);
         let e = _mm_set_epi16(0, 2, 0, 4, 0, 6, 0, 8);
@@ -25859,7 +26010,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_cvttph_epu16() {
+    fn test_mm256_cvttph_epu16() {
         let a = _mm256_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
         );
@@ -25869,7 +26020,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_mask_cvttph_epu16() {
+    fn test_mm256_mask_cvttph_epu16() {
         let a = _mm256_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
         );
@@ -25882,7 +26033,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_maskz_cvttph_epu16() {
+    fn test_mm256_maskz_cvttph_epu16() {
         let a = _mm256_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
         );
@@ -25892,7 +26043,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_cvttph_epu16() {
+    fn test_mm512_cvttph_epu16() {
         let a = _mm512_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
             17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
@@ -25907,7 +26058,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask_cvttph_epu16() {
+    fn test_mm512_mask_cvttph_epu16() {
         let a = _mm512_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
             17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
@@ -25926,7 +26077,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_maskz_cvttph_epu16() {
+    fn test_mm512_maskz_cvttph_epu16() {
         let a = _mm512_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
             17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
@@ -25941,7 +26092,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_cvtt_roundph_epu16() {
+    fn test_mm512_cvtt_roundph_epu16() {
         let a = _mm512_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
             17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
@@ -25956,7 +26107,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask_cvtt_roundph_epu16() {
+    fn test_mm512_mask_cvtt_roundph_epu16() {
         let a = _mm512_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
             17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
@@ -25979,7 +26130,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_maskz_cvtt_roundph_epu16() {
+    fn test_mm512_maskz_cvtt_roundph_epu16() {
         let a = _mm512_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
             17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
@@ -25997,7 +26148,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_cvtph_epi32() {
+    fn test_mm_cvtph_epi32() {
         let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
         let r = _mm_cvtph_epi32(a);
         let e = _mm_set_epi32(1, 2, 3, 4);
@@ -26005,7 +26156,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_cvtph_epi32() {
+    fn test_mm_mask_cvtph_epi32() {
         let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
         let src = _mm_set_epi32(10, 11, 12, 13);
         let r = _mm_mask_cvtph_epi32(src, 0b0101, a);
@@ -26014,7 +26165,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_cvtph_epi32() {
+    fn test_mm_maskz_cvtph_epi32() {
         let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
         let r = _mm_maskz_cvtph_epi32(0b0101, a);
         let e = _mm_set_epi32(0, 2, 0, 4);
@@ -26022,7 +26173,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_cvtph_epi32() {
+    fn test_mm256_cvtph_epi32() {
         let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let r = _mm256_cvtph_epi32(a);
         let e = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
@@ -26030,7 +26181,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_mask_cvtph_epi32() {
+    fn test_mm256_mask_cvtph_epi32() {
         let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let src = _mm256_set_epi32(10, 11, 12, 13, 14, 15, 16, 17);
         let r = _mm256_mask_cvtph_epi32(src, 0b01010101, a);
@@ -26039,7 +26190,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_maskz_cvtph_epi32() {
+    fn test_mm256_maskz_cvtph_epi32() {
         let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let r = _mm256_maskz_cvtph_epi32(0b01010101, a);
         let e = _mm256_set_epi32(0, 2, 0, 4, 0, 6, 0, 8);
@@ -26047,7 +26198,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_cvtph_epi32() {
+    fn test_mm512_cvtph_epi32() {
         let a = _mm256_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
         );
@@ -26057,7 +26208,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask_cvtph_epi32() {
+    fn test_mm512_mask_cvtph_epi32() {
         let a = _mm256_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
         );
@@ -26070,7 +26221,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_maskz_cvtph_epi32() {
+    fn test_mm512_maskz_cvtph_epi32() {
         let a = _mm256_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
         );
@@ -26080,7 +26231,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_cvt_roundph_epi32() {
+    fn test_mm512_cvt_roundph_epi32() {
         let a = _mm256_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
         );
@@ -26090,7 +26241,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask_cvt_roundph_epi32() {
+    fn test_mm512_mask_cvt_roundph_epi32() {
         let a = _mm256_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
         );
@@ -26107,7 +26258,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_maskz_cvt_roundph_epi32() {
+    fn test_mm512_maskz_cvt_roundph_epi32() {
         let a = _mm256_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
         );
@@ -26120,21 +26271,21 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm_cvtsh_i32() {
+    fn test_mm_cvtsh_i32() {
         let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let r = _mm_cvtsh_i32(a);
         assert_eq!(r, 1);
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm_cvt_roundsh_i32() {
+    fn test_mm_cvt_roundsh_i32() {
         let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let r = _mm_cvt_roundsh_i32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
         assert_eq!(r, 1);
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_cvtph_epu32() {
+    fn test_mm_cvtph_epu32() {
         let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
         let r = _mm_cvtph_epu32(a);
         let e = _mm_set_epi32(1, 2, 3, 4);
@@ -26142,7 +26293,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_cvtph_epu32() {
+    fn test_mm_mask_cvtph_epu32() {
         let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
         let src = _mm_set_epi32(10, 11, 12, 13);
         let r = _mm_mask_cvtph_epu32(src, 0b0101, a);
@@ -26151,7 +26302,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_cvtph_epu32() {
+    fn test_mm_maskz_cvtph_epu32() {
         let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
         let r = _mm_maskz_cvtph_epu32(0b0101, a);
         let e = _mm_set_epi32(0, 2, 0, 4);
@@ -26159,7 +26310,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_cvtph_epu32() {
+    fn test_mm256_cvtph_epu32() {
         let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let r = _mm256_cvtph_epu32(a);
         let e = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
@@ -26167,7 +26318,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_mask_cvtph_epu32() {
+    fn test_mm256_mask_cvtph_epu32() {
         let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let src = _mm256_set_epi32(10, 11, 12, 13, 14, 15, 16, 17);
         let r = _mm256_mask_cvtph_epu32(src, 0b01010101, a);
@@ -26176,7 +26327,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_maskz_cvtph_epu32() {
+    fn test_mm256_maskz_cvtph_epu32() {
         let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let r = _mm256_maskz_cvtph_epu32(0b01010101, a);
         let e = _mm256_set_epi32(0, 2, 0, 4, 0, 6, 0, 8);
@@ -26184,7 +26335,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_cvtph_epu32() {
+    fn test_mm512_cvtph_epu32() {
         let a = _mm256_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
         );
@@ -26194,7 +26345,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask_cvtph_epu32() {
+    fn test_mm512_mask_cvtph_epu32() {
         let a = _mm256_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
         );
@@ -26207,7 +26358,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_maskz_cvtph_epu32() {
+    fn test_mm512_maskz_cvtph_epu32() {
         let a = _mm256_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
         );
@@ -26217,7 +26368,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_cvt_roundph_epu32() {
+    fn test_mm512_cvt_roundph_epu32() {
         let a = _mm256_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
         );
@@ -26227,7 +26378,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask_cvt_roundph_epu32() {
+    fn test_mm512_mask_cvt_roundph_epu32() {
         let a = _mm256_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
         );
@@ -26244,7 +26395,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_maskz_cvt_roundph_epu32() {
+    fn test_mm512_maskz_cvt_roundph_epu32() {
         let a = _mm256_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
         );
@@ -26257,21 +26408,21 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm_cvtsh_u32() {
+    fn test_mm_cvtsh_u32() {
         let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let r = _mm_cvtsh_u32(a);
         assert_eq!(r, 1);
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm_cvt_roundsh_u32() {
+    fn test_mm_cvt_roundsh_u32() {
         let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let r = _mm_cvt_roundsh_u32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
         assert_eq!(r, 1);
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_cvttph_epi32() {
+    fn test_mm_cvttph_epi32() {
         let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
         let r = _mm_cvttph_epi32(a);
         let e = _mm_set_epi32(1, 2, 3, 4);
@@ -26279,7 +26430,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_cvttph_epi32() {
+    fn test_mm_mask_cvttph_epi32() {
         let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
         let src = _mm_set_epi32(10, 11, 12, 13);
         let r = _mm_mask_cvttph_epi32(src, 0b0101, a);
@@ -26288,7 +26439,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_cvttph_epi32() {
+    fn test_mm_maskz_cvttph_epi32() {
         let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
         let r = _mm_maskz_cvttph_epi32(0b0101, a);
         let e = _mm_set_epi32(0, 2, 0, 4);
@@ -26296,7 +26447,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_cvttph_epi32() {
+    fn test_mm256_cvttph_epi32() {
         let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let r = _mm256_cvttph_epi32(a);
         let e = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
@@ -26304,7 +26455,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_mask_cvttph_epi32() {
+    fn test_mm256_mask_cvttph_epi32() {
         let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let src = _mm256_set_epi32(10, 11, 12, 13, 14, 15, 16, 17);
         let r = _mm256_mask_cvttph_epi32(src, 0b01010101, a);
@@ -26313,7 +26464,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_maskz_cvttph_epi32() {
+    fn test_mm256_maskz_cvttph_epi32() {
         let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let r = _mm256_maskz_cvttph_epi32(0b01010101, a);
         let e = _mm256_set_epi32(0, 2, 0, 4, 0, 6, 0, 8);
@@ -26321,7 +26472,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_cvttph_epi32() {
+    fn test_mm512_cvttph_epi32() {
         let a = _mm256_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
         );
@@ -26331,7 +26482,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask_cvttph_epi32() {
+    fn test_mm512_mask_cvttph_epi32() {
         let a = _mm256_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
         );
@@ -26344,7 +26495,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_maskz_cvttph_epi32() {
+    fn test_mm512_maskz_cvttph_epi32() {
         let a = _mm256_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
         );
@@ -26354,7 +26505,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_cvtt_roundph_epi32() {
+    fn test_mm512_cvtt_roundph_epi32() {
         let a = _mm256_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
         );
@@ -26364,7 +26515,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask_cvtt_roundph_epi32() {
+    fn test_mm512_mask_cvtt_roundph_epi32() {
         let a = _mm256_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
         );
@@ -26377,7 +26528,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_maskz_cvtt_roundph_epi32() {
+    fn test_mm512_maskz_cvtt_roundph_epi32() {
         let a = _mm256_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
         );
@@ -26387,21 +26538,21 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm_cvttsh_i32() {
+    fn test_mm_cvttsh_i32() {
         let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let r = _mm_cvttsh_i32(a);
         assert_eq!(r, 1);
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm_cvtt_roundsh_i32() {
+    fn test_mm_cvtt_roundsh_i32() {
         let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let r = _mm_cvtt_roundsh_i32::<_MM_FROUND_NO_EXC>(a);
         assert_eq!(r, 1);
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_cvttph_epu32() {
+    fn test_mm_cvttph_epu32() {
         let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
         let r = _mm_cvttph_epu32(a);
         let e = _mm_set_epi32(1, 2, 3, 4);
@@ -26409,7 +26560,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_cvttph_epu32() {
+    fn test_mm_mask_cvttph_epu32() {
         let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
         let src = _mm_set_epi32(10, 11, 12, 13);
         let r = _mm_mask_cvttph_epu32(src, 0b0101, a);
@@ -26418,7 +26569,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_cvttph_epu32() {
+    fn test_mm_maskz_cvttph_epu32() {
         let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
         let r = _mm_maskz_cvttph_epu32(0b0101, a);
         let e = _mm_set_epi32(0, 2, 0, 4);
@@ -26426,7 +26577,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_cvttph_epu32() {
+    fn test_mm256_cvttph_epu32() {
         let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let r = _mm256_cvttph_epu32(a);
         let e = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
@@ -26434,7 +26585,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_mask_cvttph_epu32() {
+    fn test_mm256_mask_cvttph_epu32() {
         let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let src = _mm256_set_epi32(10, 11, 12, 13, 14, 15, 16, 17);
         let r = _mm256_mask_cvttph_epu32(src, 0b01010101, a);
@@ -26443,7 +26594,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_maskz_cvttph_epu32() {
+    fn test_mm256_maskz_cvttph_epu32() {
         let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let r = _mm256_maskz_cvttph_epu32(0b01010101, a);
         let e = _mm256_set_epi32(0, 2, 0, 4, 0, 6, 0, 8);
@@ -26451,7 +26602,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_cvttph_epu32() {
+    fn test_mm512_cvttph_epu32() {
         let a = _mm256_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
         );
@@ -26461,7 +26612,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask_cvttph_epu32() {
+    fn test_mm512_mask_cvttph_epu32() {
         let a = _mm256_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
         );
@@ -26474,7 +26625,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_maskz_cvttph_epu32() {
+    fn test_mm512_maskz_cvttph_epu32() {
         let a = _mm256_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
         );
@@ -26484,7 +26635,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_cvtt_roundph_epu32() {
+    fn test_mm512_cvtt_roundph_epu32() {
         let a = _mm256_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
         );
@@ -26494,7 +26645,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask_cvtt_roundph_epu32() {
+    fn test_mm512_mask_cvtt_roundph_epu32() {
         let a = _mm256_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
         );
@@ -26507,7 +26658,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_maskz_cvtt_roundph_epu32() {
+    fn test_mm512_maskz_cvtt_roundph_epu32() {
         let a = _mm256_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
         );
@@ -26517,21 +26668,21 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm_cvttsh_u32() {
+    fn test_mm_cvttsh_u32() {
         let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let r = _mm_cvttsh_u32(a);
         assert_eq!(r, 1);
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm_cvtt_roundsh_u32() {
+    fn test_mm_cvtt_roundsh_u32() {
         let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let r = _mm_cvtt_roundsh_u32::<_MM_FROUND_NO_EXC>(a);
         assert_eq!(r, 1);
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_cvtph_epi64() {
+    fn test_mm_cvtph_epi64() {
         let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
         let r = _mm_cvtph_epi64(a);
         let e = _mm_set_epi64x(1, 2);
@@ -26539,7 +26690,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_cvtph_epi64() {
+    fn test_mm_mask_cvtph_epi64() {
         let src = _mm_set_epi64x(3, 4);
         let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
         let r = _mm_mask_cvtph_epi64(src, 0b01, a);
@@ -26548,7 +26699,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_cvtph_epi64() {
+    fn test_mm_maskz_cvtph_epi64() {
         let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
         let r = _mm_maskz_cvtph_epi64(0b01, a);
         let e = _mm_set_epi64x(0, 2);
@@ -26556,7 +26707,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_cvtph_epi64() {
+    fn test_mm256_cvtph_epi64() {
         let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
         let r = _mm256_cvtph_epi64(a);
         let e = _mm256_set_epi64x(1, 2, 3, 4);
@@ -26564,7 +26715,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_mask_cvtph_epi64() {
+    fn test_mm256_mask_cvtph_epi64() {
         let src = _mm256_set_epi64x(5, 6, 7, 8);
         let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
         let r = _mm256_mask_cvtph_epi64(src, 0b0101, a);
@@ -26573,7 +26724,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_maskz_cvtph_epi64() {
+    fn test_mm256_maskz_cvtph_epi64() {
         let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
         let r = _mm256_maskz_cvtph_epi64(0b0101, a);
         let e = _mm256_set_epi64x(0, 2, 0, 4);
@@ -26581,7 +26732,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_cvtph_epi64() {
+    fn test_mm512_cvtph_epi64() {
         let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let r = _mm512_cvtph_epi64(a);
         let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
@@ -26589,7 +26740,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask_cvtph_epi64() {
+    fn test_mm512_mask_cvtph_epi64() {
         let src = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
         let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let r = _mm512_mask_cvtph_epi64(src, 0b01010101, a);
@@ -26598,7 +26749,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_maskz_cvtph_epi64() {
+    fn test_mm512_maskz_cvtph_epi64() {
         let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let r = _mm512_maskz_cvtph_epi64(0b01010101, a);
         let e = _mm512_set_epi64(0, 2, 0, 4, 0, 6, 0, 8);
@@ -26606,7 +26757,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_cvt_roundph_epi64() {
+    fn test_mm512_cvt_roundph_epi64() {
         let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let r = _mm512_cvt_roundph_epi64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
         let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
@@ -26614,7 +26765,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask_cvt_roundph_epi64() {
+    fn test_mm512_mask_cvt_roundph_epi64() {
         let src = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
         let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let r = _mm512_mask_cvt_roundph_epi64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
@@ -26625,7 +26776,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_maskz_cvt_roundph_epi64() {
+    fn test_mm512_maskz_cvt_roundph_epi64() {
         let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let r = _mm512_maskz_cvt_roundph_epi64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
             0b01010101, a,
@@ -26635,7 +26786,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_cvtph_epu64() {
+    fn test_mm_cvtph_epu64() {
         let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
         let r = _mm_cvtph_epu64(a);
         let e = _mm_set_epi64x(1, 2);
@@ -26643,7 +26794,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_cvtph_epu64() {
+    fn test_mm_mask_cvtph_epu64() {
         let src = _mm_set_epi64x(3, 4);
         let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
         let r = _mm_mask_cvtph_epu64(src, 0b01, a);
@@ -26652,7 +26803,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_cvtph_epu64() {
+    fn test_mm_maskz_cvtph_epu64() {
         let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
         let r = _mm_maskz_cvtph_epu64(0b01, a);
         let e = _mm_set_epi64x(0, 2);
@@ -26660,7 +26811,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_cvtph_epu64() {
+    fn test_mm256_cvtph_epu64() {
         let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
         let r = _mm256_cvtph_epu64(a);
         let e = _mm256_set_epi64x(1, 2, 3, 4);
@@ -26668,7 +26819,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_mask_cvtph_epu64() {
+    fn test_mm256_mask_cvtph_epu64() {
         let src = _mm256_set_epi64x(5, 6, 7, 8);
         let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
         let r = _mm256_mask_cvtph_epu64(src, 0b0101, a);
@@ -26677,7 +26828,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_maskz_cvtph_epu64() {
+    fn test_mm256_maskz_cvtph_epu64() {
         let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
         let r = _mm256_maskz_cvtph_epu64(0b0101, a);
         let e = _mm256_set_epi64x(0, 2, 0, 4);
@@ -26685,7 +26836,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_cvtph_epu64() {
+    fn test_mm512_cvtph_epu64() {
         let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let r = _mm512_cvtph_epu64(a);
         let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
@@ -26693,7 +26844,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask_cvtph_epu64() {
+    fn test_mm512_mask_cvtph_epu64() {
         let src = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
         let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let r = _mm512_mask_cvtph_epu64(src, 0b01010101, a);
@@ -26702,7 +26853,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_maskz_cvtph_epu64() {
+    fn test_mm512_maskz_cvtph_epu64() {
         let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let r = _mm512_maskz_cvtph_epu64(0b01010101, a);
         let e = _mm512_set_epi64(0, 2, 0, 4, 0, 6, 0, 8);
@@ -26710,7 +26861,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_cvt_roundph_epu64() {
+    fn test_mm512_cvt_roundph_epu64() {
         let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let r = _mm512_cvt_roundph_epu64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
         let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
@@ -26718,7 +26869,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask_cvt_roundph_epu64() {
+    fn test_mm512_mask_cvt_roundph_epu64() {
         let src = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
         let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let r = _mm512_mask_cvt_roundph_epu64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
@@ -26729,7 +26880,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_maskz_cvt_roundph_epu64() {
+    fn test_mm512_maskz_cvt_roundph_epu64() {
         let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let r = _mm512_maskz_cvt_roundph_epu64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
             0b01010101, a,
@@ -26739,7 +26890,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_cvttph_epi64() {
+    fn test_mm_cvttph_epi64() {
         let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
         let r = _mm_cvttph_epi64(a);
         let e = _mm_set_epi64x(1, 2);
@@ -26747,7 +26898,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_cvttph_epi64() {
+    fn test_mm_mask_cvttph_epi64() {
         let src = _mm_set_epi64x(3, 4);
         let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
         let r = _mm_mask_cvttph_epi64(src, 0b01, a);
@@ -26756,7 +26907,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_cvttph_epi64() {
+    fn test_mm_maskz_cvttph_epi64() {
         let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
         let r = _mm_maskz_cvttph_epi64(0b01, a);
         let e = _mm_set_epi64x(0, 2);
@@ -26764,7 +26915,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_cvttph_epi64() {
+    fn test_mm256_cvttph_epi64() {
         let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
         let r = _mm256_cvttph_epi64(a);
         let e = _mm256_set_epi64x(1, 2, 3, 4);
@@ -26772,7 +26923,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_mask_cvttph_epi64() {
+    fn test_mm256_mask_cvttph_epi64() {
         let src = _mm256_set_epi64x(5, 6, 7, 8);
         let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
         let r = _mm256_mask_cvttph_epi64(src, 0b0101, a);
@@ -26781,7 +26932,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_maskz_cvttph_epi64() {
+    fn test_mm256_maskz_cvttph_epi64() {
         let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
         let r = _mm256_maskz_cvttph_epi64(0b0101, a);
         let e = _mm256_set_epi64x(0, 2, 0, 4);
@@ -26789,7 +26940,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_cvttph_epi64() {
+    fn test_mm512_cvttph_epi64() {
         let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let r = _mm512_cvttph_epi64(a);
         let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
@@ -26797,7 +26948,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask_cvttph_epi64() {
+    fn test_mm512_mask_cvttph_epi64() {
         let src = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
         let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let r = _mm512_mask_cvttph_epi64(src, 0b01010101, a);
@@ -26806,7 +26957,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_maskz_cvttph_epi64() {
+    fn test_mm512_maskz_cvttph_epi64() {
         let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let r = _mm512_maskz_cvttph_epi64(0b01010101, a);
         let e = _mm512_set_epi64(0, 2, 0, 4, 0, 6, 0, 8);
@@ -26814,7 +26965,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_cvtt_roundph_epi64() {
+    fn test_mm512_cvtt_roundph_epi64() {
         let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let r = _mm512_cvtt_roundph_epi64::<_MM_FROUND_NO_EXC>(a);
         let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
@@ -26822,7 +26973,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask_cvtt_roundph_epi64() {
+    fn test_mm512_mask_cvtt_roundph_epi64() {
         let src = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
         let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let r = _mm512_mask_cvtt_roundph_epi64::<_MM_FROUND_NO_EXC>(src, 0b01010101, a);
@@ -26831,7 +26982,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_maskz_cvtt_roundph_epi64() {
+    fn test_mm512_maskz_cvtt_roundph_epi64() {
         let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let r = _mm512_maskz_cvtt_roundph_epi64::<_MM_FROUND_NO_EXC>(0b01010101, a);
         let e = _mm512_set_epi64(0, 2, 0, 4, 0, 6, 0, 8);
@@ -26839,7 +26990,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_cvttph_epu64() {
+    fn test_mm_cvttph_epu64() {
         let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
         let r = _mm_cvttph_epu64(a);
         let e = _mm_set_epi64x(1, 2);
@@ -26847,7 +26998,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_cvttph_epu64() {
+    fn test_mm_mask_cvttph_epu64() {
         let src = _mm_set_epi64x(3, 4);
         let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
         let r = _mm_mask_cvttph_epu64(src, 0b01, a);
@@ -26856,7 +27007,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_cvttph_epu64() {
+    fn test_mm_maskz_cvttph_epu64() {
         let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
         let r = _mm_maskz_cvttph_epu64(0b01, a);
         let e = _mm_set_epi64x(0, 2);
@@ -26864,7 +27015,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_cvttph_epu64() {
+    fn test_mm256_cvttph_epu64() {
         let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
         let r = _mm256_cvttph_epu64(a);
         let e = _mm256_set_epi64x(1, 2, 3, 4);
@@ -26872,7 +27023,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_mask_cvttph_epu64() {
+    fn test_mm256_mask_cvttph_epu64() {
         let src = _mm256_set_epi64x(5, 6, 7, 8);
         let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
         let r = _mm256_mask_cvttph_epu64(src, 0b0101, a);
@@ -26881,7 +27032,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_maskz_cvttph_epu64() {
+    fn test_mm256_maskz_cvttph_epu64() {
         let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
         let r = _mm256_maskz_cvttph_epu64(0b0101, a);
         let e = _mm256_set_epi64x(0, 2, 0, 4);
@@ -26889,7 +27040,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_cvttph_epu64() {
+    fn test_mm512_cvttph_epu64() {
         let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let r = _mm512_cvttph_epu64(a);
         let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
@@ -26897,7 +27048,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask_cvttph_epu64() {
+    fn test_mm512_mask_cvttph_epu64() {
         let src = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
         let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let r = _mm512_mask_cvttph_epu64(src, 0b01010101, a);
@@ -26906,7 +27057,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_maskz_cvttph_epu64() {
+    fn test_mm512_maskz_cvttph_epu64() {
         let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let r = _mm512_maskz_cvttph_epu64(0b01010101, a);
         let e = _mm512_set_epi64(0, 2, 0, 4, 0, 6, 0, 8);
@@ -26914,7 +27065,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_cvtt_roundph_epu64() {
+    fn test_mm512_cvtt_roundph_epu64() {
         let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let r = _mm512_cvtt_roundph_epu64::<_MM_FROUND_NO_EXC>(a);
         let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
@@ -26922,7 +27073,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask_cvtt_roundph_epu64() {
+    fn test_mm512_mask_cvtt_roundph_epu64() {
         let src = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
         let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let r = _mm512_mask_cvtt_roundph_epu64::<_MM_FROUND_NO_EXC>(src, 0b01010101, a);
@@ -26931,7 +27082,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_maskz_cvtt_roundph_epu64() {
+    fn test_mm512_maskz_cvtt_roundph_epu64() {
         let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let r = _mm512_maskz_cvtt_roundph_epu64::<_MM_FROUND_NO_EXC>(0b01010101, a);
         let e = _mm512_set_epi64(0, 2, 0, 4, 0, 6, 0, 8);
@@ -26939,7 +27090,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_cvtxph_ps() {
+    fn test_mm_cvtxph_ps() {
         let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
         let r = _mm_cvtxph_ps(a);
         let e = _mm_set_ps(1.0, 2.0, 3.0, 4.0);
@@ -26947,7 +27098,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_cvtxph_ps() {
+    fn test_mm_mask_cvtxph_ps() {
         let src = _mm_set_ps(10.0, 11.0, 12.0, 13.0);
         let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
         let r = _mm_mask_cvtxph_ps(src, 0b0101, a);
@@ -26956,7 +27107,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_cvtxph_ps() {
+    fn test_mm_maskz_cvtxph_ps() {
         let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
         let r = _mm_maskz_cvtxph_ps(0b0101, a);
         let e = _mm_set_ps(0.0, 2.0, 0.0, 4.0);
@@ -26964,7 +27115,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_cvtxph_ps() {
+    fn test_mm256_cvtxph_ps() {
         let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let r = _mm256_cvtxph_ps(a);
         let e = _mm256_set_ps(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
@@ -26972,7 +27123,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_mask_cvtxph_ps() {
+    fn test_mm256_mask_cvtxph_ps() {
         let src = _mm256_set_ps(10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0);
         let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let r = _mm256_mask_cvtxph_ps(src, 0b01010101, a);
@@ -26981,7 +27132,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_maskz_cvtxph_ps() {
+    fn test_mm256_maskz_cvtxph_ps() {
         let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let r = _mm256_maskz_cvtxph_ps(0b01010101, a);
         let e = _mm256_set_ps(0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0);
@@ -26989,7 +27140,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_cvtxph_ps() {
+    fn test_mm512_cvtxph_ps() {
         let a = _mm256_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
         );
@@ -27001,7 +27152,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask_cvtxph_ps() {
+    fn test_mm512_mask_cvtxph_ps() {
         let src = _mm512_set_ps(
             10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0,
             24.0, 25.0,
@@ -27018,7 +27169,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_maskz_cvtxph_ps() {
+    fn test_mm512_maskz_cvtxph_ps() {
         let a = _mm256_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
         );
@@ -27030,7 +27181,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_cvtx_roundph_ps() {
+    fn test_mm512_cvtx_roundph_ps() {
         let a = _mm256_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
         );
@@ -27042,7 +27193,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask_cvtx_roundph_ps() {
+    fn test_mm512_mask_cvtx_roundph_ps() {
         let src = _mm512_set_ps(
             10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0,
             24.0, 25.0,
@@ -27059,7 +27210,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_maskz_cvtx_roundph_ps() {
+    fn test_mm512_maskz_cvtx_roundph_ps() {
         let a = _mm256_set_ph(
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
         );
@@ -27071,7 +27222,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm_cvtsh_ss() {
+    fn test_mm_cvtsh_ss() {
         let a = _mm_setr_ps(2.0, 20.0, 21.0, 22.0);
         let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
         let r = _mm_cvtsh_ss(a, b);
@@ -27080,7 +27231,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm_mask_cvtsh_ss() {
+    fn test_mm_mask_cvtsh_ss() {
         let src = _mm_setr_ps(3.0, 11.0, 12.0, 13.0);
         let a = _mm_setr_ps(2.0, 20.0, 21.0, 22.0);
         let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
@@ -27093,7 +27244,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm_maskz_cvtsh_ss() {
+    fn test_mm_maskz_cvtsh_ss() {
         let a = _mm_setr_ps(2.0, 20.0, 21.0, 22.0);
         let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
         let r = _mm_maskz_cvtsh_ss(0, a, b);
@@ -27105,7 +27256,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm_cvt_roundsh_ss() {
+    fn test_mm_cvt_roundsh_ss() {
         let a = _mm_setr_ps(2.0, 20.0, 21.0, 22.0);
         let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
         let r = _mm_cvt_roundsh_ss::<_MM_FROUND_NO_EXC>(a, b);
@@ -27114,7 +27265,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm_mask_cvt_roundsh_ss() {
+    fn test_mm_mask_cvt_roundsh_ss() {
         let src = _mm_setr_ps(3.0, 11.0, 12.0, 13.0);
         let a = _mm_setr_ps(2.0, 20.0, 21.0, 22.0);
         let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
@@ -27127,7 +27278,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm_maskz_cvt_roundsh_ss() {
+    fn test_mm_maskz_cvt_roundsh_ss() {
         let a = _mm_setr_ps(2.0, 20.0, 21.0, 22.0);
         let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
         let r = _mm_maskz_cvt_roundsh_ss::<_MM_FROUND_NO_EXC>(0, a, b);
@@ -27139,7 +27290,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_cvtph_pd() {
+    fn test_mm_cvtph_pd() {
         let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
         let r = _mm_cvtph_pd(a);
         let e = _mm_set_pd(1.0, 2.0);
@@ -27147,7 +27298,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_mask_cvtph_pd() {
+    fn test_mm_mask_cvtph_pd() {
         let src = _mm_set_pd(10.0, 11.0);
         let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
         let r = _mm_mask_cvtph_pd(src, 0b01, a);
@@ -27156,7 +27307,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_maskz_cvtph_pd() {
+    fn test_mm_maskz_cvtph_pd() {
         let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 2.0);
         let r = _mm_maskz_cvtph_pd(0b01, a);
         let e = _mm_set_pd(0.0, 2.0);
@@ -27164,7 +27315,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_cvtph_pd() {
+    fn test_mm256_cvtph_pd() {
         let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
         let r = _mm256_cvtph_pd(a);
         let e = _mm256_set_pd(1.0, 2.0, 3.0, 4.0);
@@ -27172,7 +27323,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_mask_cvtph_pd() {
+    fn test_mm256_mask_cvtph_pd() {
         let src = _mm256_set_pd(10.0, 11.0, 12.0, 13.0);
         let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
         let r = _mm256_mask_cvtph_pd(src, 0b0101, a);
@@ -27181,7 +27332,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm256_maskz_cvtph_pd() {
+    fn test_mm256_maskz_cvtph_pd() {
         let a = _mm_set_ph(0.0, 0.0, 0.0, 0.0, 1.0, 2.0, 3.0, 4.0);
         let r = _mm256_maskz_cvtph_pd(0b0101, a);
         let e = _mm256_set_pd(0.0, 2.0, 0.0, 4.0);
@@ -27189,7 +27340,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_cvtph_pd() {
+    fn test_mm512_cvtph_pd() {
         let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let r = _mm512_cvtph_pd(a);
         let e = _mm512_set_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
@@ -27197,7 +27348,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask_cvtph_pd() {
+    fn test_mm512_mask_cvtph_pd() {
         let src = _mm512_set_pd(10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0);
         let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let r = _mm512_mask_cvtph_pd(src, 0b01010101, a);
@@ -27206,7 +27357,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_maskz_cvtph_pd() {
+    fn test_mm512_maskz_cvtph_pd() {
         let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let r = _mm512_maskz_cvtph_pd(0b01010101, a);
         let e = _mm512_set_pd(0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0);
@@ -27214,7 +27365,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_cvt_roundph_pd() {
+    fn test_mm512_cvt_roundph_pd() {
         let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let r = _mm512_cvt_roundph_pd::<_MM_FROUND_NO_EXC>(a);
         let e = _mm512_set_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
@@ -27222,7 +27373,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_mask_cvt_roundph_pd() {
+    fn test_mm512_mask_cvt_roundph_pd() {
         let src = _mm512_set_pd(10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0);
         let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let r = _mm512_mask_cvt_roundph_pd::<_MM_FROUND_NO_EXC>(src, 0b01010101, a);
@@ -27231,7 +27382,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_maskz_cvt_roundph_pd() {
+    fn test_mm512_maskz_cvt_roundph_pd() {
         let a = _mm_set_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let r = _mm512_maskz_cvt_roundph_pd::<_MM_FROUND_NO_EXC>(0b01010101, a);
         let e = _mm512_set_pd(0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0, 8.0);
@@ -27239,7 +27390,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm_cvtsh_sd() {
+    fn test_mm_cvtsh_sd() {
         let a = _mm_setr_pd(2.0, 20.0);
         let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
         let r = _mm_cvtsh_sd(a, b);
@@ -27248,7 +27399,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm_mask_cvtsh_sd() {
+    fn test_mm_mask_cvtsh_sd() {
         let src = _mm_setr_pd(3.0, 11.0);
         let a = _mm_setr_pd(2.0, 20.0);
         let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
@@ -27261,7 +27412,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm_maskz_cvtsh_sd() {
+    fn test_mm_maskz_cvtsh_sd() {
         let a = _mm_setr_pd(2.0, 20.0);
         let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
         let r = _mm_maskz_cvtsh_sd(0, a, b);
@@ -27273,7 +27424,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm_cvt_roundsh_sd() {
+    fn test_mm_cvt_roundsh_sd() {
         let a = _mm_setr_pd(2.0, 20.0);
         let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
         let r = _mm_cvt_roundsh_sd::<_MM_FROUND_NO_EXC>(a, b);
@@ -27282,7 +27433,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm_mask_cvt_roundsh_sd() {
+    fn test_mm_mask_cvt_roundsh_sd() {
         let src = _mm_setr_pd(3.0, 11.0);
         let a = _mm_setr_pd(2.0, 20.0);
         let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
@@ -27295,7 +27446,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm_maskz_cvt_roundsh_sd() {
+    fn test_mm_maskz_cvt_roundsh_sd() {
         let a = _mm_setr_pd(2.0, 20.0);
         let b = _mm_setr_ph(1.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0);
         let r = _mm_maskz_cvt_roundsh_sd::<_MM_FROUND_NO_EXC>(0, a, b);
@@ -27307,14 +27458,14 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm_cvtsh_h() {
+    const fn test_mm_cvtsh_h() {
         let a = _mm_setr_ph(1.0, 2.0, 3.0, 42.0, 5.0, 6.0, 7.0, 8.0);
         let r = _mm_cvtsh_h(a);
         assert_eq!(r, 1.0);
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm256_cvtsh_h() {
+    const fn test_mm256_cvtsh_h() {
         let a = _mm256_setr_ph(
             1.0, 2.0, 3.0, 42.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
         );
@@ -27323,7 +27474,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm512_cvtsh_h() {
+    const fn test_mm512_cvtsh_h() {
         let a = _mm512_setr_ph(
             1.0, 2.0, 3.0, 42.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
             17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0, 29.0, 30.0,
@@ -27334,14 +27485,14 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm_cvtsi128_si16() {
+    const fn test_mm_cvtsi128_si16() {
         let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
         let r = _mm_cvtsi128_si16(a);
         assert_eq!(r, 1);
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm_cvtsi16_si128() {
+    const fn test_mm_cvtsi16_si128() {
         let a = 1;
         let r = _mm_cvtsi16_si128(a);
         let e = _mm_setr_epi16(1, 0, 0, 0, 0, 0, 0, 0);
diff --git a/crates/core_arch/src/x86/avx512ifma.rs b/crates/core_arch/src/x86/avx512ifma.rs
index 7c9d07f690..5ce28565d1 100644
--- a/crates/core_arch/src/x86/avx512ifma.rs
+++ b/crates/core_arch/src/x86/avx512ifma.rs
@@ -372,7 +372,7 @@ mod tests {
     const K: __mmask8 = 0b01101101;
 
     #[simd_test(enable = "avx512ifma")]
-    unsafe fn test_mm512_madd52hi_epu64() {
+    fn test_mm512_madd52hi_epu64() {
         let a = _mm512_set1_epi64(10 << 40);
         let b = _mm512_set1_epi64((11 << 40) + 4);
         let c = _mm512_set1_epi64((12 << 40) + 3);
@@ -386,7 +386,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512ifma")]
-    unsafe fn test_mm512_mask_madd52hi_epu64() {
+    fn test_mm512_mask_madd52hi_epu64() {
         let a = _mm512_set1_epi64(10 << 40);
         let b = _mm512_set1_epi64((11 << 40) + 4);
         let c = _mm512_set1_epi64((12 << 40) + 3);
@@ -401,7 +401,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512ifma")]
-    unsafe fn test_mm512_maskz_madd52hi_epu64() {
+    fn test_mm512_maskz_madd52hi_epu64() {
         let a = _mm512_set1_epi64(10 << 40);
         let b = _mm512_set1_epi64((11 << 40) + 4);
         let c = _mm512_set1_epi64((12 << 40) + 3);
@@ -416,7 +416,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512ifma")]
-    unsafe fn test_mm512_madd52lo_epu64() {
+    fn test_mm512_madd52lo_epu64() {
         let a = _mm512_set1_epi64(10 << 40);
         let b = _mm512_set1_epi64((11 << 40) + 4);
         let c = _mm512_set1_epi64((12 << 40) + 3);
@@ -430,7 +430,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512ifma")]
-    unsafe fn test_mm512_mask_madd52lo_epu64() {
+    fn test_mm512_mask_madd52lo_epu64() {
         let a = _mm512_set1_epi64(10 << 40);
         let b = _mm512_set1_epi64((11 << 40) + 4);
         let c = _mm512_set1_epi64((12 << 40) + 3);
@@ -445,7 +445,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512ifma")]
-    unsafe fn test_mm512_maskz_madd52lo_epu64() {
+    fn test_mm512_maskz_madd52lo_epu64() {
         let a = _mm512_set1_epi64(10 << 40);
         let b = _mm512_set1_epi64((11 << 40) + 4);
         let c = _mm512_set1_epi64((12 << 40) + 3);
@@ -460,7 +460,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avxifma")]
-    unsafe fn test_mm256_madd52hi_avx_epu64() {
+    fn test_mm256_madd52hi_avx_epu64() {
         let a = _mm256_set1_epi64x(10 << 40);
         let b = _mm256_set1_epi64x((11 << 40) + 4);
         let c = _mm256_set1_epi64x((12 << 40) + 3);
@@ -474,7 +474,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512ifma,avx512vl")]
-    unsafe fn test_mm256_madd52hi_epu64() {
+    fn test_mm256_madd52hi_epu64() {
         let a = _mm256_set1_epi64x(10 << 40);
         let b = _mm256_set1_epi64x((11 << 40) + 4);
         let c = _mm256_set1_epi64x((12 << 40) + 3);
@@ -488,7 +488,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512ifma,avx512vl")]
-    unsafe fn test_mm256_mask_madd52hi_epu64() {
+    fn test_mm256_mask_madd52hi_epu64() {
         let a = _mm256_set1_epi64x(10 << 40);
         let b = _mm256_set1_epi64x((11 << 40) + 4);
         let c = _mm256_set1_epi64x((12 << 40) + 3);
@@ -503,7 +503,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512ifma,avx512vl")]
-    unsafe fn test_mm256_maskz_madd52hi_epu64() {
+    fn test_mm256_maskz_madd52hi_epu64() {
         let a = _mm256_set1_epi64x(10 << 40);
         let b = _mm256_set1_epi64x((11 << 40) + 4);
         let c = _mm256_set1_epi64x((12 << 40) + 3);
@@ -518,7 +518,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avxifma")]
-    unsafe fn test_mm256_madd52lo_avx_epu64() {
+    fn test_mm256_madd52lo_avx_epu64() {
         let a = _mm256_set1_epi64x(10 << 40);
         let b = _mm256_set1_epi64x((11 << 40) + 4);
         let c = _mm256_set1_epi64x((12 << 40) + 3);
@@ -532,7 +532,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512ifma,avx512vl")]
-    unsafe fn test_mm256_madd52lo_epu64() {
+    fn test_mm256_madd52lo_epu64() {
         let a = _mm256_set1_epi64x(10 << 40);
         let b = _mm256_set1_epi64x((11 << 40) + 4);
         let c = _mm256_set1_epi64x((12 << 40) + 3);
@@ -546,7 +546,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512ifma,avx512vl")]
-    unsafe fn test_mm256_mask_madd52lo_epu64() {
+    fn test_mm256_mask_madd52lo_epu64() {
         let a = _mm256_set1_epi64x(10 << 40);
         let b = _mm256_set1_epi64x((11 << 40) + 4);
         let c = _mm256_set1_epi64x((12 << 40) + 3);
@@ -561,7 +561,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512ifma,avx512vl")]
-    unsafe fn test_mm256_maskz_madd52lo_epu64() {
+    fn test_mm256_maskz_madd52lo_epu64() {
         let a = _mm256_set1_epi64x(10 << 40);
         let b = _mm256_set1_epi64x((11 << 40) + 4);
         let c = _mm256_set1_epi64x((12 << 40) + 3);
@@ -576,7 +576,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avxifma")]
-    unsafe fn test_mm_madd52hi_avx_epu64() {
+    fn test_mm_madd52hi_avx_epu64() {
         let a = _mm_set1_epi64x(10 << 40);
         let b = _mm_set1_epi64x((11 << 40) + 4);
         let c = _mm_set1_epi64x((12 << 40) + 3);
@@ -590,7 +590,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512ifma,avx512vl")]
-    unsafe fn test_mm_madd52hi_epu64() {
+    fn test_mm_madd52hi_epu64() {
         let a = _mm_set1_epi64x(10 << 40);
         let b = _mm_set1_epi64x((11 << 40) + 4);
         let c = _mm_set1_epi64x((12 << 40) + 3);
@@ -604,7 +604,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512ifma,avx512vl")]
-    unsafe fn test_mm_mask_madd52hi_epu64() {
+    fn test_mm_mask_madd52hi_epu64() {
         let a = _mm_set1_epi64x(10 << 40);
         let b = _mm_set1_epi64x((11 << 40) + 4);
         let c = _mm_set1_epi64x((12 << 40) + 3);
@@ -619,7 +619,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512ifma,avx512vl")]
-    unsafe fn test_mm_maskz_madd52hi_epu64() {
+    fn test_mm_maskz_madd52hi_epu64() {
         let a = _mm_set1_epi64x(10 << 40);
         let b = _mm_set1_epi64x((11 << 40) + 4);
         let c = _mm_set1_epi64x((12 << 40) + 3);
@@ -634,7 +634,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avxifma")]
-    unsafe fn test_mm_madd52lo_avx_epu64() {
+    fn test_mm_madd52lo_avx_epu64() {
         let a = _mm_set1_epi64x(10 << 40);
         let b = _mm_set1_epi64x((11 << 40) + 4);
         let c = _mm_set1_epi64x((12 << 40) + 3);
@@ -648,7 +648,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512ifma,avx512vl")]
-    unsafe fn test_mm_madd52lo_epu64() {
+    fn test_mm_madd52lo_epu64() {
         let a = _mm_set1_epi64x(10 << 40);
         let b = _mm_set1_epi64x((11 << 40) + 4);
         let c = _mm_set1_epi64x((12 << 40) + 3);
@@ -662,7 +662,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512ifma,avx512vl")]
-    unsafe fn test_mm_mask_madd52lo_epu64() {
+    fn test_mm_mask_madd52lo_epu64() {
         let a = _mm_set1_epi64x(10 << 40);
         let b = _mm_set1_epi64x((11 << 40) + 4);
         let c = _mm_set1_epi64x((12 << 40) + 3);
@@ -677,7 +677,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512ifma,avx512vl")]
-    unsafe fn test_mm_maskz_madd52lo_epu64() {
+    fn test_mm_maskz_madd52lo_epu64() {
         let a = _mm_set1_epi64x(10 << 40);
         let b = _mm_set1_epi64x((11 << 40) + 4);
         let c = _mm_set1_epi64x((12 << 40) + 3);
diff --git a/crates/core_arch/src/x86/avx512vbmi.rs b/crates/core_arch/src/x86/avx512vbmi.rs
index 3527ccc9e4..d9ad14ef00 100644
--- a/crates/core_arch/src/x86/avx512vbmi.rs
+++ b/crates/core_arch/src/x86/avx512vbmi.rs
@@ -484,7 +484,7 @@ mod tests {
     use crate::core_arch::x86::*;
 
     #[simd_test(enable = "avx512vbmi")]
-    unsafe fn test_mm512_permutex2var_epi8() {
+    fn test_mm512_permutex2var_epi8() {
         #[rustfmt::skip]
         let a = _mm512_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
@@ -508,7 +508,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi")]
-    unsafe fn test_mm512_mask_permutex2var_epi8() {
+    fn test_mm512_mask_permutex2var_epi8() {
         #[rustfmt::skip]
         let a = _mm512_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
@@ -539,7 +539,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi")]
-    unsafe fn test_mm512_maskz_permutex2var_epi8() {
+    fn test_mm512_maskz_permutex2var_epi8() {
         #[rustfmt::skip]
         let a = _mm512_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
@@ -570,7 +570,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi")]
-    unsafe fn test_mm512_mask2_permutex2var_epi8() {
+    fn test_mm512_mask2_permutex2var_epi8() {
         #[rustfmt::skip]
         let a = _mm512_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
@@ -601,7 +601,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi,avx512vl")]
-    unsafe fn test_mm256_permutex2var_epi8() {
+    fn test_mm256_permutex2var_epi8() {
         #[rustfmt::skip]
         let a = _mm256_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
@@ -619,7 +619,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi,avx512vl")]
-    unsafe fn test_mm256_mask_permutex2var_epi8() {
+    fn test_mm256_mask_permutex2var_epi8() {
         #[rustfmt::skip]
         let a = _mm256_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
@@ -639,7 +639,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi,avx512vl")]
-    unsafe fn test_mm256_maskz_permutex2var_epi8() {
+    fn test_mm256_maskz_permutex2var_epi8() {
         #[rustfmt::skip]
         let a = _mm256_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
@@ -659,7 +659,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi,avx512vl")]
-    unsafe fn test_mm256_mask2_permutex2var_epi8() {
+    fn test_mm256_mask2_permutex2var_epi8() {
         #[rustfmt::skip]
         let a = _mm256_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
@@ -679,7 +679,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi,avx512vl")]
-    unsafe fn test_mm_permutex2var_epi8() {
+    fn test_mm_permutex2var_epi8() {
         let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         #[rustfmt::skip]
         let idx = _mm_set_epi8(1, 1 << 4, 2, 1 << 4, 3, 1 << 4, 4, 1 << 4, 5, 1 << 4, 6, 1 << 4, 7, 1 << 4, 8, 1 << 4);
@@ -692,7 +692,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi,avx512vl")]
-    unsafe fn test_mm_mask_permutex2var_epi8() {
+    fn test_mm_mask_permutex2var_epi8() {
         let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         #[rustfmt::skip]
         let idx = _mm_set_epi8(1, 1 << 4, 2, 1 << 4, 3, 1 << 4, 4, 1 << 4, 5, 1 << 4, 6, 1 << 4, 7, 1 << 4, 8, 1 << 4);
@@ -707,7 +707,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi,avx512vl")]
-    unsafe fn test_mm_maskz_permutex2var_epi8() {
+    fn test_mm_maskz_permutex2var_epi8() {
         let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         #[rustfmt::skip]
         let idx = _mm_set_epi8(1, 1 << 4, 2, 1 << 4, 3, 1 << 4, 4, 1 << 4, 5, 1 << 4, 6, 1 << 4, 7, 1 << 4, 8, 1 << 4);
@@ -722,7 +722,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi,avx512vl")]
-    unsafe fn test_mm_mask2_permutex2var_epi8() {
+    fn test_mm_mask2_permutex2var_epi8() {
         let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         #[rustfmt::skip]
         let idx = _mm_set_epi8(1, 1 << 4, 2, 1 << 4, 3, 1 << 4, 4, 1 << 4, 5, 1 << 4, 6, 1 << 4, 7, 1 << 4, 8, 1 << 4);
@@ -737,7 +737,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi")]
-    unsafe fn test_mm512_permutexvar_epi8() {
+    fn test_mm512_permutexvar_epi8() {
         let idx = _mm512_set1_epi8(1);
         #[rustfmt::skip]
         let a = _mm512_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
@@ -750,7 +750,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi")]
-    unsafe fn test_mm512_mask_permutexvar_epi8() {
+    fn test_mm512_mask_permutexvar_epi8() {
         let idx = _mm512_set1_epi8(1);
         #[rustfmt::skip]
         let a = _mm512_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
@@ -770,7 +770,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi")]
-    unsafe fn test_mm512_maskz_permutexvar_epi8() {
+    fn test_mm512_maskz_permutexvar_epi8() {
         let idx = _mm512_set1_epi8(1);
         #[rustfmt::skip]
         let a = _mm512_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
@@ -789,7 +789,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi,avx512vl")]
-    unsafe fn test_mm256_permutexvar_epi8() {
+    fn test_mm256_permutexvar_epi8() {
         let idx = _mm256_set1_epi8(1);
         #[rustfmt::skip]
         let a = _mm256_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
@@ -800,7 +800,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi,avx512vl")]
-    unsafe fn test_mm256_mask_permutexvar_epi8() {
+    fn test_mm256_mask_permutexvar_epi8() {
         let idx = _mm256_set1_epi8(1);
         #[rustfmt::skip]
         let a = _mm256_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
@@ -813,7 +813,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi,avx512vl")]
-    unsafe fn test_mm256_maskz_permutexvar_epi8() {
+    fn test_mm256_maskz_permutexvar_epi8() {
         let idx = _mm256_set1_epi8(1);
         #[rustfmt::skip]
         let a = _mm256_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
@@ -826,7 +826,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi,avx512vl")]
-    unsafe fn test_mm_permutexvar_epi8() {
+    fn test_mm_permutexvar_epi8() {
         let idx = _mm_set1_epi8(1);
         let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let r = _mm_permutexvar_epi8(idx, a);
@@ -835,7 +835,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi,avx512vl")]
-    unsafe fn test_mm_mask_permutexvar_epi8() {
+    fn test_mm_mask_permutexvar_epi8() {
         let idx = _mm_set1_epi8(1);
         let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let r = _mm_mask_permutexvar_epi8(a, 0, idx, a);
@@ -846,7 +846,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi,avx512vl")]
-    unsafe fn test_mm_maskz_permutexvar_epi8() {
+    fn test_mm_maskz_permutexvar_epi8() {
         let idx = _mm_set1_epi8(1);
         let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let r = _mm_maskz_permutexvar_epi8(0, idx, a);
@@ -857,7 +857,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi")]
-    unsafe fn test_mm512_multishift_epi64_epi8() {
+    fn test_mm512_multishift_epi64_epi8() {
         let a = _mm512_set1_epi8(1);
         let b = _mm512_set1_epi8(1);
         let r = _mm512_multishift_epi64_epi8(a, b);
@@ -866,7 +866,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi")]
-    unsafe fn test_mm512_mask_multishift_epi64_epi8() {
+    fn test_mm512_mask_multishift_epi64_epi8() {
         let a = _mm512_set1_epi8(1);
         let b = _mm512_set1_epi8(1);
         let r = _mm512_mask_multishift_epi64_epi8(a, 0, a, b);
@@ -882,7 +882,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi")]
-    unsafe fn test_mm512_maskz_multishift_epi64_epi8() {
+    fn test_mm512_maskz_multishift_epi64_epi8() {
         let a = _mm512_set1_epi8(1);
         let b = _mm512_set1_epi8(1);
         let r = _mm512_maskz_multishift_epi64_epi8(0, a, b);
@@ -897,7 +897,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi,avx512vl")]
-    unsafe fn test_mm256_multishift_epi64_epi8() {
+    fn test_mm256_multishift_epi64_epi8() {
         let a = _mm256_set1_epi8(1);
         let b = _mm256_set1_epi8(1);
         let r = _mm256_multishift_epi64_epi8(a, b);
@@ -906,7 +906,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi,avx512vl")]
-    unsafe fn test_mm256_mask_multishift_epi64_epi8() {
+    fn test_mm256_mask_multishift_epi64_epi8() {
         let a = _mm256_set1_epi8(1);
         let b = _mm256_set1_epi8(1);
         let r = _mm256_mask_multishift_epi64_epi8(a, 0, a, b);
@@ -917,7 +917,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi,avx512vl")]
-    unsafe fn test_mm256_maskz_multishift_epi64_epi8() {
+    fn test_mm256_maskz_multishift_epi64_epi8() {
         let a = _mm256_set1_epi8(1);
         let b = _mm256_set1_epi8(1);
         let r = _mm256_maskz_multishift_epi64_epi8(0, a, b);
@@ -928,7 +928,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi,avx512vl")]
-    unsafe fn test_mm_multishift_epi64_epi8() {
+    fn test_mm_multishift_epi64_epi8() {
         let a = _mm_set1_epi8(1);
         let b = _mm_set1_epi8(1);
         let r = _mm_multishift_epi64_epi8(a, b);
@@ -937,7 +937,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi,avx512vl")]
-    unsafe fn test_mm_mask_multishift_epi64_epi8() {
+    fn test_mm_mask_multishift_epi64_epi8() {
         let a = _mm_set1_epi8(1);
         let b = _mm_set1_epi8(1);
         let r = _mm_mask_multishift_epi64_epi8(a, 0, a, b);
@@ -948,7 +948,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi,avx512vl")]
-    unsafe fn test_mm_maskz_multishift_epi64_epi8() {
+    fn test_mm_maskz_multishift_epi64_epi8() {
         let a = _mm_set1_epi8(1);
         let b = _mm_set1_epi8(1);
         let r = _mm_maskz_multishift_epi64_epi8(0, a, b);
diff --git a/crates/core_arch/src/x86/avx512vbmi2.rs b/crates/core_arch/src/x86/avx512vbmi2.rs
index e25fd4528d..78a50b90c8 100644
--- a/crates/core_arch/src/x86/avx512vbmi2.rs
+++ b/crates/core_arch/src/x86/avx512vbmi2.rs
@@ -499,7 +499,8 @@ pub fn _mm_maskz_expand_epi8(k: __mmask16, a: __m128i) -> __m128i {
 #[target_feature(enable = "avx512vbmi2")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldvq))]
-pub fn _mm512_shldv_epi64(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_shldv_epi64(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
     unsafe {
         transmute(simd_funnel_shl(
             a.as_i64x8(),
@@ -516,7 +517,8 @@ pub fn _mm512_shldv_epi64(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
 #[target_feature(enable = "avx512vbmi2")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldvq))]
-pub fn _mm512_mask_shldv_epi64(a: __m512i, k: __mmask8, b: __m512i, c: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_shldv_epi64(a: __m512i, k: __mmask8, b: __m512i, c: __m512i) -> __m512i {
     unsafe {
         let shf = _mm512_shldv_epi64(a, b, c).as_i64x8();
         transmute(simd_select_bitmask(k, shf, a.as_i64x8()))
@@ -530,7 +532,8 @@ pub fn _mm512_mask_shldv_epi64(a: __m512i, k: __mmask8, b: __m512i, c: __m512i)
 #[target_feature(enable = "avx512vbmi2")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldvq))]
-pub fn _mm512_maskz_shldv_epi64(k: __mmask8, a: __m512i, b: __m512i, c: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_shldv_epi64(k: __mmask8, a: __m512i, b: __m512i, c: __m512i) -> __m512i {
     unsafe {
         let shf = _mm512_shldv_epi64(a, b, c).as_i64x8();
         transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
@@ -544,7 +547,8 @@ pub fn _mm512_maskz_shldv_epi64(k: __mmask8, a: __m512i, b: __m512i, c: __m512i)
 #[target_feature(enable = "avx512vbmi2,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldvq))]
-pub fn _mm256_shldv_epi64(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_shldv_epi64(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
     unsafe {
         transmute(simd_funnel_shl(
             a.as_i64x4(),
@@ -561,7 +565,8 @@ pub fn _mm256_shldv_epi64(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
 #[target_feature(enable = "avx512vbmi2,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldvq))]
-pub fn _mm256_mask_shldv_epi64(a: __m256i, k: __mmask8, b: __m256i, c: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_shldv_epi64(a: __m256i, k: __mmask8, b: __m256i, c: __m256i) -> __m256i {
     unsafe {
         let shf = _mm256_shldv_epi64(a, b, c).as_i64x4();
         transmute(simd_select_bitmask(k, shf, a.as_i64x4()))
@@ -575,7 +580,8 @@ pub fn _mm256_mask_shldv_epi64(a: __m256i, k: __mmask8, b: __m256i, c: __m256i)
 #[target_feature(enable = "avx512vbmi2,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldvq))]
-pub fn _mm256_maskz_shldv_epi64(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_shldv_epi64(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) -> __m256i {
     unsafe {
         let shf = _mm256_shldv_epi64(a, b, c).as_i64x4();
         transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
@@ -589,7 +595,8 @@ pub fn _mm256_maskz_shldv_epi64(k: __mmask8, a: __m256i, b: __m256i, c: __m256i)
 #[target_feature(enable = "avx512vbmi2,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldvq))]
-pub fn _mm_shldv_epi64(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_shldv_epi64(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
     unsafe {
         transmute(simd_funnel_shl(
             a.as_i64x2(),
@@ -606,7 +613,8 @@ pub fn _mm_shldv_epi64(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
 #[target_feature(enable = "avx512vbmi2,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldvq))]
-pub fn _mm_mask_shldv_epi64(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_shldv_epi64(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i {
     unsafe {
         let shf = _mm_shldv_epi64(a, b, c).as_i64x2();
         transmute(simd_select_bitmask(k, shf, a.as_i64x2()))
@@ -620,7 +628,8 @@ pub fn _mm_mask_shldv_epi64(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) ->
 #[target_feature(enable = "avx512vbmi2,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldvq))]
-pub fn _mm_maskz_shldv_epi64(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_shldv_epi64(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i {
     unsafe {
         let shf = _mm_shldv_epi64(a, b, c).as_i64x2();
         transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
@@ -634,7 +643,8 @@ pub fn _mm_maskz_shldv_epi64(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) ->
 #[target_feature(enable = "avx512vbmi2")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldvd))]
-pub fn _mm512_shldv_epi32(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_shldv_epi32(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
     unsafe {
         transmute(simd_funnel_shl(
             a.as_i32x16(),
@@ -651,7 +661,8 @@ pub fn _mm512_shldv_epi32(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
 #[target_feature(enable = "avx512vbmi2")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldvd))]
-pub fn _mm512_mask_shldv_epi32(a: __m512i, k: __mmask16, b: __m512i, c: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_shldv_epi32(a: __m512i, k: __mmask16, b: __m512i, c: __m512i) -> __m512i {
     unsafe {
         let shf = _mm512_shldv_epi32(a, b, c).as_i32x16();
         transmute(simd_select_bitmask(k, shf, a.as_i32x16()))
@@ -665,7 +676,8 @@ pub fn _mm512_mask_shldv_epi32(a: __m512i, k: __mmask16, b: __m512i, c: __m512i)
 #[target_feature(enable = "avx512vbmi2")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldvd))]
-pub fn _mm512_maskz_shldv_epi32(k: __mmask16, a: __m512i, b: __m512i, c: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_shldv_epi32(k: __mmask16, a: __m512i, b: __m512i, c: __m512i) -> __m512i {
     unsafe {
         let shf = _mm512_shldv_epi32(a, b, c).as_i32x16();
         transmute(simd_select_bitmask(k, shf, i32x16::ZERO))
@@ -679,7 +691,8 @@ pub fn _mm512_maskz_shldv_epi32(k: __mmask16, a: __m512i, b: __m512i, c: __m512i
 #[target_feature(enable = "avx512vbmi2,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldvd))]
-pub fn _mm256_shldv_epi32(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_shldv_epi32(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
     unsafe {
         transmute(simd_funnel_shl(
             a.as_i32x8(),
@@ -696,7 +709,8 @@ pub fn _mm256_shldv_epi32(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
 #[target_feature(enable = "avx512vbmi2,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldvd))]
-pub fn _mm256_mask_shldv_epi32(a: __m256i, k: __mmask8, b: __m256i, c: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_shldv_epi32(a: __m256i, k: __mmask8, b: __m256i, c: __m256i) -> __m256i {
     unsafe {
         let shf = _mm256_shldv_epi32(a, b, c).as_i32x8();
         transmute(simd_select_bitmask(k, shf, a.as_i32x8()))
@@ -710,7 +724,8 @@ pub fn _mm256_mask_shldv_epi32(a: __m256i, k: __mmask8, b: __m256i, c: __m256i)
 #[target_feature(enable = "avx512vbmi2,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldvd))]
-pub fn _mm256_maskz_shldv_epi32(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_shldv_epi32(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) -> __m256i {
     unsafe {
         let shf = _mm256_shldv_epi32(a, b, c).as_i32x8();
         transmute(simd_select_bitmask(k, shf, i32x8::ZERO))
@@ -724,7 +739,8 @@ pub fn _mm256_maskz_shldv_epi32(k: __mmask8, a: __m256i, b: __m256i, c: __m256i)
 #[target_feature(enable = "avx512vbmi2,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldvd))]
-pub fn _mm_shldv_epi32(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_shldv_epi32(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
     unsafe {
         transmute(simd_funnel_shl(
             a.as_i32x4(),
@@ -741,7 +757,8 @@ pub fn _mm_shldv_epi32(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
 #[target_feature(enable = "avx512vbmi2,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldvd))]
-pub fn _mm_mask_shldv_epi32(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_shldv_epi32(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i {
     unsafe {
         let shf = _mm_shldv_epi32(a, b, c).as_i32x4();
         transmute(simd_select_bitmask(k, shf, a.as_i32x4()))
@@ -755,7 +772,8 @@ pub fn _mm_mask_shldv_epi32(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) ->
 #[target_feature(enable = "avx512vbmi2,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldvd))]
-pub fn _mm_maskz_shldv_epi32(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_shldv_epi32(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i {
     unsafe {
         let shf = _mm_shldv_epi32(a, b, c).as_i32x4();
         transmute(simd_select_bitmask(k, shf, i32x4::ZERO))
@@ -769,7 +787,8 @@ pub fn _mm_maskz_shldv_epi32(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) ->
 #[target_feature(enable = "avx512vbmi2")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldvw))]
-pub fn _mm512_shldv_epi16(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_shldv_epi16(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
     unsafe {
         transmute(simd_funnel_shl(
             a.as_i16x32(),
@@ -786,7 +805,8 @@ pub fn _mm512_shldv_epi16(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
 #[target_feature(enable = "avx512vbmi2")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldvw))]
-pub fn _mm512_mask_shldv_epi16(a: __m512i, k: __mmask32, b: __m512i, c: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_shldv_epi16(a: __m512i, k: __mmask32, b: __m512i, c: __m512i) -> __m512i {
     unsafe {
         let shf = _mm512_shldv_epi16(a, b, c).as_i16x32();
         transmute(simd_select_bitmask(k, shf, a.as_i16x32()))
@@ -800,7 +820,8 @@ pub fn _mm512_mask_shldv_epi16(a: __m512i, k: __mmask32, b: __m512i, c: __m512i)
 #[target_feature(enable = "avx512vbmi2")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldvw))]
-pub fn _mm512_maskz_shldv_epi16(k: __mmask32, a: __m512i, b: __m512i, c: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_shldv_epi16(k: __mmask32, a: __m512i, b: __m512i, c: __m512i) -> __m512i {
     unsafe {
         let shf = _mm512_shldv_epi16(a, b, c).as_i16x32();
         transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
@@ -814,7 +835,8 @@ pub fn _mm512_maskz_shldv_epi16(k: __mmask32, a: __m512i, b: __m512i, c: __m512i
 #[target_feature(enable = "avx512vbmi2,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldvw))]
-pub fn _mm256_shldv_epi16(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_shldv_epi16(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
     unsafe {
         transmute(simd_funnel_shl(
             a.as_i16x16(),
@@ -831,7 +853,8 @@ pub fn _mm256_shldv_epi16(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
 #[target_feature(enable = "avx512vbmi2,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldvw))]
-pub fn _mm256_mask_shldv_epi16(a: __m256i, k: __mmask16, b: __m256i, c: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_shldv_epi16(a: __m256i, k: __mmask16, b: __m256i, c: __m256i) -> __m256i {
     unsafe {
         let shf = _mm256_shldv_epi16(a, b, c).as_i16x16();
         transmute(simd_select_bitmask(k, shf, a.as_i16x16()))
@@ -845,7 +868,8 @@ pub fn _mm256_mask_shldv_epi16(a: __m256i, k: __mmask16, b: __m256i, c: __m256i)
 #[target_feature(enable = "avx512vbmi2,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldvw))]
-pub fn _mm256_maskz_shldv_epi16(k: __mmask16, a: __m256i, b: __m256i, c: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_shldv_epi16(k: __mmask16, a: __m256i, b: __m256i, c: __m256i) -> __m256i {
     unsafe {
         let shf = _mm256_shldv_epi16(a, b, c).as_i16x16();
         transmute(simd_select_bitmask(k, shf, i16x16::ZERO))
@@ -859,7 +883,8 @@ pub fn _mm256_maskz_shldv_epi16(k: __mmask16, a: __m256i, b: __m256i, c: __m256i
 #[target_feature(enable = "avx512vbmi2,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldvw))]
-pub fn _mm_shldv_epi16(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_shldv_epi16(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
     unsafe {
         transmute(simd_funnel_shl(
             a.as_i16x8(),
@@ -876,7 +901,8 @@ pub fn _mm_shldv_epi16(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
 #[target_feature(enable = "avx512vbmi2,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldvw))]
-pub fn _mm_mask_shldv_epi16(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_shldv_epi16(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i {
     unsafe {
         let shf = _mm_shldv_epi16(a, b, c).as_i16x8();
         transmute(simd_select_bitmask(k, shf, a.as_i16x8()))
@@ -890,7 +916,8 @@ pub fn _mm_mask_shldv_epi16(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) ->
 #[target_feature(enable = "avx512vbmi2,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldvw))]
-pub fn _mm_maskz_shldv_epi16(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_shldv_epi16(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i {
     unsafe {
         let shf = _mm_shldv_epi16(a, b, c).as_i16x8();
         transmute(simd_select_bitmask(k, shf, i16x8::ZERO))
@@ -904,7 +931,8 @@ pub fn _mm_maskz_shldv_epi16(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) ->
 #[target_feature(enable = "avx512vbmi2")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshrdvq))]
-pub fn _mm512_shrdv_epi64(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_shrdv_epi64(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
     unsafe {
         transmute(simd_funnel_shr(
             b.as_i64x8(),
@@ -921,7 +949,8 @@ pub fn _mm512_shrdv_epi64(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
 #[target_feature(enable = "avx512vbmi2")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshrdvq))]
-pub fn _mm512_mask_shrdv_epi64(a: __m512i, k: __mmask8, b: __m512i, c: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_shrdv_epi64(a: __m512i, k: __mmask8, b: __m512i, c: __m512i) -> __m512i {
     unsafe {
         let shf = _mm512_shrdv_epi64(a, b, c).as_i64x8();
         transmute(simd_select_bitmask(k, shf, a.as_i64x8()))
@@ -935,7 +964,8 @@ pub fn _mm512_mask_shrdv_epi64(a: __m512i, k: __mmask8, b: __m512i, c: __m512i)
 #[target_feature(enable = "avx512vbmi2")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshrdvq))]
-pub fn _mm512_maskz_shrdv_epi64(k: __mmask8, a: __m512i, b: __m512i, c: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_shrdv_epi64(k: __mmask8, a: __m512i, b: __m512i, c: __m512i) -> __m512i {
     unsafe {
         let shf = _mm512_shrdv_epi64(a, b, c).as_i64x8();
         transmute(simd_select_bitmask(k, shf, i64x8::ZERO))
@@ -949,7 +979,8 @@ pub fn _mm512_maskz_shrdv_epi64(k: __mmask8, a: __m512i, b: __m512i, c: __m512i)
 #[target_feature(enable = "avx512vbmi2,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshrdvq))]
-pub fn _mm256_shrdv_epi64(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_shrdv_epi64(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
     unsafe {
         transmute(simd_funnel_shr(
             b.as_i64x4(),
@@ -966,7 +997,8 @@ pub fn _mm256_shrdv_epi64(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
 #[target_feature(enable = "avx512vbmi2,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshrdvq))]
-pub fn _mm256_mask_shrdv_epi64(a: __m256i, k: __mmask8, b: __m256i, c: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_shrdv_epi64(a: __m256i, k: __mmask8, b: __m256i, c: __m256i) -> __m256i {
     unsafe {
         let shf = _mm256_shrdv_epi64(a, b, c).as_i64x4();
         transmute(simd_select_bitmask(k, shf, a.as_i64x4()))
@@ -980,7 +1012,8 @@ pub fn _mm256_mask_shrdv_epi64(a: __m256i, k: __mmask8, b: __m256i, c: __m256i)
 #[target_feature(enable = "avx512vbmi2,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshrdvq))]
-pub fn _mm256_maskz_shrdv_epi64(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_shrdv_epi64(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) -> __m256i {
     unsafe {
         let shf = _mm256_shrdv_epi64(a, b, c).as_i64x4();
         transmute(simd_select_bitmask(k, shf, i64x4::ZERO))
@@ -994,7 +1027,8 @@ pub fn _mm256_maskz_shrdv_epi64(k: __mmask8, a: __m256i, b: __m256i, c: __m256i)
 #[target_feature(enable = "avx512vbmi2,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshrdvq))]
-pub fn _mm_shrdv_epi64(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_shrdv_epi64(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
     unsafe {
         transmute(simd_funnel_shr(
             b.as_i64x2(),
@@ -1011,7 +1045,8 @@ pub fn _mm_shrdv_epi64(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
 #[target_feature(enable = "avx512vbmi2,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshrdvq))]
-pub fn _mm_mask_shrdv_epi64(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_shrdv_epi64(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i {
     unsafe {
         let shf = _mm_shrdv_epi64(a, b, c).as_i64x2();
         transmute(simd_select_bitmask(k, shf, a.as_i64x2()))
@@ -1025,7 +1060,8 @@ pub fn _mm_mask_shrdv_epi64(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) ->
 #[target_feature(enable = "avx512vbmi2,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshrdvq))]
-pub fn _mm_maskz_shrdv_epi64(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_shrdv_epi64(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i {
     unsafe {
         let shf = _mm_shrdv_epi64(a, b, c).as_i64x2();
         transmute(simd_select_bitmask(k, shf, i64x2::ZERO))
@@ -1039,7 +1075,8 @@ pub fn _mm_maskz_shrdv_epi64(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) ->
 #[target_feature(enable = "avx512vbmi2")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshrdvd))]
-pub fn _mm512_shrdv_epi32(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_shrdv_epi32(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
     unsafe {
         transmute(simd_funnel_shr(
             b.as_i32x16(),
@@ -1056,7 +1093,8 @@ pub fn _mm512_shrdv_epi32(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
 #[target_feature(enable = "avx512vbmi2")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshrdvd))]
-pub fn _mm512_mask_shrdv_epi32(a: __m512i, k: __mmask16, b: __m512i, c: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_shrdv_epi32(a: __m512i, k: __mmask16, b: __m512i, c: __m512i) -> __m512i {
     unsafe {
         let shf = _mm512_shrdv_epi32(a, b, c).as_i32x16();
         transmute(simd_select_bitmask(k, shf, a.as_i32x16()))
@@ -1070,7 +1108,8 @@ pub fn _mm512_mask_shrdv_epi32(a: __m512i, k: __mmask16, b: __m512i, c: __m512i)
 #[target_feature(enable = "avx512vbmi2")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshrdvd))]
-pub fn _mm512_maskz_shrdv_epi32(k: __mmask16, a: __m512i, b: __m512i, c: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_shrdv_epi32(k: __mmask16, a: __m512i, b: __m512i, c: __m512i) -> __m512i {
     unsafe {
         let shf = _mm512_shrdv_epi32(a, b, c).as_i32x16();
         transmute(simd_select_bitmask(k, shf, i32x16::ZERO))
@@ -1084,7 +1123,8 @@ pub fn _mm512_maskz_shrdv_epi32(k: __mmask16, a: __m512i, b: __m512i, c: __m512i
 #[target_feature(enable = "avx512vbmi2,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshrdvd))]
-pub fn _mm256_shrdv_epi32(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_shrdv_epi32(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
     unsafe {
         transmute(simd_funnel_shr(
             b.as_i32x8(),
@@ -1101,7 +1141,8 @@ pub fn _mm256_shrdv_epi32(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
 #[target_feature(enable = "avx512vbmi2,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshrdvd))]
-pub fn _mm256_mask_shrdv_epi32(a: __m256i, k: __mmask8, b: __m256i, c: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_shrdv_epi32(a: __m256i, k: __mmask8, b: __m256i, c: __m256i) -> __m256i {
     unsafe {
         let shf = _mm256_shrdv_epi32(a, b, c).as_i32x8();
         transmute(simd_select_bitmask(k, shf, a.as_i32x8()))
@@ -1115,7 +1156,8 @@ pub fn _mm256_mask_shrdv_epi32(a: __m256i, k: __mmask8, b: __m256i, c: __m256i)
 #[target_feature(enable = "avx512vbmi2,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshrdvd))]
-pub fn _mm256_maskz_shrdv_epi32(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_shrdv_epi32(k: __mmask8, a: __m256i, b: __m256i, c: __m256i) -> __m256i {
     unsafe {
         let shf = _mm256_shrdv_epi32(a, b, c).as_i32x8();
         transmute(simd_select_bitmask(k, shf, i32x8::ZERO))
@@ -1129,7 +1171,8 @@ pub fn _mm256_maskz_shrdv_epi32(k: __mmask8, a: __m256i, b: __m256i, c: __m256i)
 #[target_feature(enable = "avx512vbmi2,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshrdvd))]
-pub fn _mm_shrdv_epi32(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_shrdv_epi32(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
     unsafe {
         transmute(simd_funnel_shr(
             b.as_i32x4(),
@@ -1146,7 +1189,8 @@ pub fn _mm_shrdv_epi32(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
 #[target_feature(enable = "avx512vbmi2,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshrdvd))]
-pub fn _mm_mask_shrdv_epi32(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_shrdv_epi32(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i {
     unsafe {
         let shf = _mm_shrdv_epi32(a, b, c).as_i32x4();
         transmute(simd_select_bitmask(k, shf, a.as_i32x4()))
@@ -1160,7 +1204,8 @@ pub fn _mm_mask_shrdv_epi32(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) ->
 #[target_feature(enable = "avx512vbmi2,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshrdvd))]
-pub fn _mm_maskz_shrdv_epi32(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_shrdv_epi32(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i {
     unsafe {
         let shf = _mm_shrdv_epi32(a, b, c).as_i32x4();
         transmute(simd_select_bitmask(k, shf, i32x4::ZERO))
@@ -1174,7 +1219,8 @@ pub fn _mm_maskz_shrdv_epi32(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) ->
 #[target_feature(enable = "avx512vbmi2")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshrdvw))]
-pub fn _mm512_shrdv_epi16(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_shrdv_epi16(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
     unsafe {
         transmute(simd_funnel_shr(
             b.as_i16x32(),
@@ -1191,7 +1237,8 @@ pub fn _mm512_shrdv_epi16(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
 #[target_feature(enable = "avx512vbmi2")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshrdvw))]
-pub fn _mm512_mask_shrdv_epi16(a: __m512i, k: __mmask32, b: __m512i, c: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_shrdv_epi16(a: __m512i, k: __mmask32, b: __m512i, c: __m512i) -> __m512i {
     unsafe {
         let shf = _mm512_shrdv_epi16(a, b, c).as_i16x32();
         transmute(simd_select_bitmask(k, shf, a.as_i16x32()))
@@ -1205,7 +1252,8 @@ pub fn _mm512_mask_shrdv_epi16(a: __m512i, k: __mmask32, b: __m512i, c: __m512i)
 #[target_feature(enable = "avx512vbmi2")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshrdvw))]
-pub fn _mm512_maskz_shrdv_epi16(k: __mmask32, a: __m512i, b: __m512i, c: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_shrdv_epi16(k: __mmask32, a: __m512i, b: __m512i, c: __m512i) -> __m512i {
     unsafe {
         let shf = _mm512_shrdv_epi16(a, b, c).as_i16x32();
         transmute(simd_select_bitmask(k, shf, i16x32::ZERO))
@@ -1219,7 +1267,8 @@ pub fn _mm512_maskz_shrdv_epi16(k: __mmask32, a: __m512i, b: __m512i, c: __m512i
 #[target_feature(enable = "avx512vbmi2,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshrdvw))]
-pub fn _mm256_shrdv_epi16(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_shrdv_epi16(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
     unsafe {
         transmute(simd_funnel_shr(
             b.as_i16x16(),
@@ -1236,7 +1285,8 @@ pub fn _mm256_shrdv_epi16(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
 #[target_feature(enable = "avx512vbmi2,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshrdvw))]
-pub fn _mm256_mask_shrdv_epi16(a: __m256i, k: __mmask16, b: __m256i, c: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_shrdv_epi16(a: __m256i, k: __mmask16, b: __m256i, c: __m256i) -> __m256i {
     unsafe {
         let shf = _mm256_shrdv_epi16(a, b, c).as_i16x16();
         transmute(simd_select_bitmask(k, shf, a.as_i16x16()))
@@ -1250,7 +1300,8 @@ pub fn _mm256_mask_shrdv_epi16(a: __m256i, k: __mmask16, b: __m256i, c: __m256i)
 #[target_feature(enable = "avx512vbmi2,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshrdvw))]
-pub fn _mm256_maskz_shrdv_epi16(k: __mmask16, a: __m256i, b: __m256i, c: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_shrdv_epi16(k: __mmask16, a: __m256i, b: __m256i, c: __m256i) -> __m256i {
     unsafe {
         let shf = _mm256_shrdv_epi16(a, b, c).as_i16x16();
         transmute(simd_select_bitmask(k, shf, i16x16::ZERO))
@@ -1264,7 +1315,8 @@ pub fn _mm256_maskz_shrdv_epi16(k: __mmask16, a: __m256i, b: __m256i, c: __m256i
 #[target_feature(enable = "avx512vbmi2,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshrdvw))]
-pub fn _mm_shrdv_epi16(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_shrdv_epi16(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
     unsafe {
         transmute(simd_funnel_shr(
             b.as_i16x8(),
@@ -1281,7 +1333,8 @@ pub fn _mm_shrdv_epi16(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
 #[target_feature(enable = "avx512vbmi2,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshrdvw))]
-pub fn _mm_mask_shrdv_epi16(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_shrdv_epi16(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) -> __m128i {
     unsafe {
         let shf = _mm_shrdv_epi16(a, b, c).as_i16x8();
         transmute(simd_select_bitmask(k, shf, a.as_i16x8()))
@@ -1295,7 +1348,8 @@ pub fn _mm_mask_shrdv_epi16(a: __m128i, k: __mmask8, b: __m128i, c: __m128i) ->
 #[target_feature(enable = "avx512vbmi2,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshrdvw))]
-pub fn _mm_maskz_shrdv_epi16(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_shrdv_epi16(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) -> __m128i {
     unsafe {
         let shf = _mm_shrdv_epi16(a, b, c).as_i16x8();
         transmute(simd_select_bitmask(k, shf, i16x8::ZERO))
@@ -1310,7 +1364,8 @@ pub fn _mm_maskz_shrdv_epi16(k: __mmask8, a: __m128i, b: __m128i, c: __m128i) ->
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))]
 #[rustc_legacy_const_generics(2)]
-pub fn _mm512_shldi_epi64<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_shldi_epi64<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
     static_assert_uimm_bits!(IMM8, 8);
     _mm512_shldv_epi64(a, b, _mm512_set1_epi64(IMM8 as i64))
 }
@@ -1323,7 +1378,8 @@ pub fn _mm512_shldi_epi64<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))]
 #[rustc_legacy_const_generics(4)]
-pub fn _mm512_mask_shldi_epi64<const IMM8: i32>(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_shldi_epi64<const IMM8: i32>(
     src: __m512i,
     k: __mmask8,
     a: __m512i,
@@ -1344,7 +1400,12 @@ pub fn _mm512_mask_shldi_epi64<const IMM8: i32>(
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))]
 #[rustc_legacy_const_generics(3)]
-pub fn _mm512_maskz_shldi_epi64<const IMM8: i32>(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_shldi_epi64<const IMM8: i32>(
+    k: __mmask8,
+    a: __m512i,
+    b: __m512i,
+) -> __m512i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         let shf = _mm512_shldi_epi64::<IMM8>(a, b).as_i64x8();
@@ -1360,7 +1421,8 @@ pub fn _mm512_maskz_shldi_epi64<const IMM8: i32>(k: __mmask8, a: __m512i, b: __m
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))]
 #[rustc_legacy_const_generics(2)]
-pub fn _mm256_shldi_epi64<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_shldi_epi64<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
     static_assert_uimm_bits!(IMM8, 8);
     _mm256_shldv_epi64(a, b, _mm256_set1_epi64x(IMM8 as i64))
 }
@@ -1373,7 +1435,8 @@ pub fn _mm256_shldi_epi64<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))]
 #[rustc_legacy_const_generics(4)]
-pub fn _mm256_mask_shldi_epi64<const IMM8: i32>(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_shldi_epi64<const IMM8: i32>(
     src: __m256i,
     k: __mmask8,
     a: __m256i,
@@ -1394,7 +1457,12 @@ pub fn _mm256_mask_shldi_epi64<const IMM8: i32>(
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))]
 #[rustc_legacy_const_generics(3)]
-pub fn _mm256_maskz_shldi_epi64<const IMM8: i32>(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_shldi_epi64<const IMM8: i32>(
+    k: __mmask8,
+    a: __m256i,
+    b: __m256i,
+) -> __m256i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         let shf = _mm256_shldi_epi64::<IMM8>(a, b).as_i64x4();
@@ -1410,7 +1478,8 @@ pub fn _mm256_maskz_shldi_epi64<const IMM8: i32>(k: __mmask8, a: __m256i, b: __m
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))]
 #[rustc_legacy_const_generics(2)]
-pub fn _mm_shldi_epi64<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_shldi_epi64<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
     static_assert_uimm_bits!(IMM8, 8);
     _mm_shldv_epi64(a, b, _mm_set1_epi64x(IMM8 as i64))
 }
@@ -1423,7 +1492,8 @@ pub fn _mm_shldi_epi64<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))]
 #[rustc_legacy_const_generics(4)]
-pub fn _mm_mask_shldi_epi64<const IMM8: i32>(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_shldi_epi64<const IMM8: i32>(
     src: __m128i,
     k: __mmask8,
     a: __m128i,
@@ -1444,7 +1514,12 @@ pub fn _mm_mask_shldi_epi64<const IMM8: i32>(
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))]
 #[rustc_legacy_const_generics(3)]
-pub fn _mm_maskz_shldi_epi64<const IMM8: i32>(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_shldi_epi64<const IMM8: i32>(
+    k: __mmask8,
+    a: __m128i,
+    b: __m128i,
+) -> __m128i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         let shf = _mm_shldi_epi64::<IMM8>(a, b).as_i64x2();
@@ -1460,7 +1535,8 @@ pub fn _mm_maskz_shldi_epi64<const IMM8: i32>(k: __mmask8, a: __m128i, b: __m128
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))]
 #[rustc_legacy_const_generics(2)]
-pub fn _mm512_shldi_epi32<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_shldi_epi32<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
     static_assert_uimm_bits!(IMM8, 8);
     _mm512_shldv_epi32(a, b, _mm512_set1_epi32(IMM8))
 }
@@ -1473,7 +1549,8 @@ pub fn _mm512_shldi_epi32<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))]
 #[rustc_legacy_const_generics(4)]
-pub fn _mm512_mask_shldi_epi32<const IMM8: i32>(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_shldi_epi32<const IMM8: i32>(
     src: __m512i,
     k: __mmask16,
     a: __m512i,
@@ -1494,7 +1571,12 @@ pub fn _mm512_mask_shldi_epi32<const IMM8: i32>(
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))]
 #[rustc_legacy_const_generics(3)]
-pub fn _mm512_maskz_shldi_epi32<const IMM8: i32>(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_shldi_epi32<const IMM8: i32>(
+    k: __mmask16,
+    a: __m512i,
+    b: __m512i,
+) -> __m512i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         let shf = _mm512_shldi_epi32::<IMM8>(a, b).as_i32x16();
@@ -1510,7 +1592,8 @@ pub fn _mm512_maskz_shldi_epi32<const IMM8: i32>(k: __mmask16, a: __m512i, b: __
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))]
 #[rustc_legacy_const_generics(2)]
-pub fn _mm256_shldi_epi32<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_shldi_epi32<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
     static_assert_uimm_bits!(IMM8, 8);
     _mm256_shldv_epi32(a, b, _mm256_set1_epi32(IMM8))
 }
@@ -1523,7 +1606,8 @@ pub fn _mm256_shldi_epi32<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))]
 #[rustc_legacy_const_generics(4)]
-pub fn _mm256_mask_shldi_epi32<const IMM8: i32>(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_shldi_epi32<const IMM8: i32>(
     src: __m256i,
     k: __mmask8,
     a: __m256i,
@@ -1544,7 +1628,12 @@ pub fn _mm256_mask_shldi_epi32<const IMM8: i32>(
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))]
 #[rustc_legacy_const_generics(3)]
-pub fn _mm256_maskz_shldi_epi32<const IMM8: i32>(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_shldi_epi32<const IMM8: i32>(
+    k: __mmask8,
+    a: __m256i,
+    b: __m256i,
+) -> __m256i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         let shf = _mm256_shldi_epi32::<IMM8>(a, b).as_i32x8();
@@ -1560,7 +1649,8 @@ pub fn _mm256_maskz_shldi_epi32<const IMM8: i32>(k: __mmask8, a: __m256i, b: __m
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))]
 #[rustc_legacy_const_generics(2)]
-pub fn _mm_shldi_epi32<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_shldi_epi32<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
     static_assert_uimm_bits!(IMM8, 8);
     _mm_shldv_epi32(a, b, _mm_set1_epi32(IMM8))
 }
@@ -1573,7 +1663,8 @@ pub fn _mm_shldi_epi32<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))]
 #[rustc_legacy_const_generics(4)]
-pub fn _mm_mask_shldi_epi32<const IMM8: i32>(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_shldi_epi32<const IMM8: i32>(
     src: __m128i,
     k: __mmask8,
     a: __m128i,
@@ -1594,7 +1685,12 @@ pub fn _mm_mask_shldi_epi32<const IMM8: i32>(
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))]
 #[rustc_legacy_const_generics(3)]
-pub fn _mm_maskz_shldi_epi32<const IMM8: i32>(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_shldi_epi32<const IMM8: i32>(
+    k: __mmask8,
+    a: __m128i,
+    b: __m128i,
+) -> __m128i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         let shf = _mm_shldi_epi32::<IMM8>(a, b).as_i32x4();
@@ -1610,7 +1706,8 @@ pub fn _mm_maskz_shldi_epi32<const IMM8: i32>(k: __mmask8, a: __m128i, b: __m128
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))]
 #[rustc_legacy_const_generics(2)]
-pub fn _mm512_shldi_epi16<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_shldi_epi16<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
     static_assert_uimm_bits!(IMM8, 8);
     _mm512_shldv_epi16(a, b, _mm512_set1_epi16(IMM8 as i16))
 }
@@ -1623,7 +1720,8 @@ pub fn _mm512_shldi_epi16<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))]
 #[rustc_legacy_const_generics(4)]
-pub fn _mm512_mask_shldi_epi16<const IMM8: i32>(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_shldi_epi16<const IMM8: i32>(
     src: __m512i,
     k: __mmask32,
     a: __m512i,
@@ -1644,7 +1742,12 @@ pub fn _mm512_mask_shldi_epi16<const IMM8: i32>(
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))]
 #[rustc_legacy_const_generics(3)]
-pub fn _mm512_maskz_shldi_epi16<const IMM8: i32>(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_shldi_epi16<const IMM8: i32>(
+    k: __mmask32,
+    a: __m512i,
+    b: __m512i,
+) -> __m512i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         let shf = _mm512_shldi_epi16::<IMM8>(a, b).as_i16x32();
@@ -1660,7 +1763,8 @@ pub fn _mm512_maskz_shldi_epi16<const IMM8: i32>(k: __mmask32, a: __m512i, b: __
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))]
 #[rustc_legacy_const_generics(2)]
-pub fn _mm256_shldi_epi16<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_shldi_epi16<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
     static_assert_uimm_bits!(IMM8, 8);
     _mm256_shldv_epi16(a, b, _mm256_set1_epi16(IMM8 as i16))
 }
@@ -1673,7 +1777,8 @@ pub fn _mm256_shldi_epi16<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))]
 #[rustc_legacy_const_generics(4)]
-pub fn _mm256_mask_shldi_epi16<const IMM8: i32>(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_shldi_epi16<const IMM8: i32>(
     src: __m256i,
     k: __mmask16,
     a: __m256i,
@@ -1694,7 +1799,12 @@ pub fn _mm256_mask_shldi_epi16<const IMM8: i32>(
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))]
 #[rustc_legacy_const_generics(3)]
-pub fn _mm256_maskz_shldi_epi16<const IMM8: i32>(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_shldi_epi16<const IMM8: i32>(
+    k: __mmask16,
+    a: __m256i,
+    b: __m256i,
+) -> __m256i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         let shf = _mm256_shldi_epi16::<IMM8>(a, b).as_i16x16();
@@ -1710,7 +1820,8 @@ pub fn _mm256_maskz_shldi_epi16<const IMM8: i32>(k: __mmask16, a: __m256i, b: __
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))]
 #[rustc_legacy_const_generics(2)]
-pub fn _mm_shldi_epi16<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_shldi_epi16<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
     static_assert_uimm_bits!(IMM8, 8);
     _mm_shldv_epi16(a, b, _mm_set1_epi16(IMM8 as i16))
 }
@@ -1723,7 +1834,8 @@ pub fn _mm_shldi_epi16<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))]
 #[rustc_legacy_const_generics(4)]
-pub fn _mm_mask_shldi_epi16<const IMM8: i32>(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_shldi_epi16<const IMM8: i32>(
     src: __m128i,
     k: __mmask8,
     a: __m128i,
@@ -1744,7 +1856,12 @@ pub fn _mm_mask_shldi_epi16<const IMM8: i32>(
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))]
 #[rustc_legacy_const_generics(3)]
-pub fn _mm_maskz_shldi_epi16<const IMM8: i32>(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_shldi_epi16<const IMM8: i32>(
+    k: __mmask8,
+    a: __m128i,
+    b: __m128i,
+) -> __m128i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         let shf = _mm_shldi_epi16::<IMM8>(a, b).as_i16x8();
@@ -1760,7 +1877,8 @@ pub fn _mm_maskz_shldi_epi16<const IMM8: i32>(k: __mmask8, a: __m128i, b: __m128
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq
 #[rustc_legacy_const_generics(2)]
-pub fn _mm512_shrdi_epi64<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_shrdi_epi64<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
     static_assert_uimm_bits!(IMM8, 8);
     _mm512_shrdv_epi64(a, b, _mm512_set1_epi64(IMM8 as i64))
 }
@@ -1773,7 +1891,8 @@ pub fn _mm512_shrdi_epi64<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq
 #[rustc_legacy_const_generics(4)]
-pub fn _mm512_mask_shrdi_epi64<const IMM8: i32>(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_shrdi_epi64<const IMM8: i32>(
     src: __m512i,
     k: __mmask8,
     a: __m512i,
@@ -1794,7 +1913,12 @@ pub fn _mm512_mask_shrdi_epi64<const IMM8: i32>(
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldq, IMM8 = 255))] //should be vpshrdq
 #[rustc_legacy_const_generics(3)]
-pub fn _mm512_maskz_shrdi_epi64<const IMM8: i32>(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_shrdi_epi64<const IMM8: i32>(
+    k: __mmask8,
+    a: __m512i,
+    b: __m512i,
+) -> __m512i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         let shf = _mm512_shrdi_epi64::<IMM8>(a, b).as_i64x8();
@@ -1810,7 +1934,8 @@ pub fn _mm512_maskz_shrdi_epi64<const IMM8: i32>(k: __mmask8, a: __m512i, b: __m
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq
 #[rustc_legacy_const_generics(2)]
-pub fn _mm256_shrdi_epi64<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_shrdi_epi64<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
     static_assert_uimm_bits!(IMM8, 8);
     _mm256_shrdv_epi64(a, b, _mm256_set1_epi64x(IMM8 as i64))
 }
@@ -1823,7 +1948,8 @@ pub fn _mm256_shrdi_epi64<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq
 #[rustc_legacy_const_generics(4)]
-pub fn _mm256_mask_shrdi_epi64<const IMM8: i32>(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_shrdi_epi64<const IMM8: i32>(
     src: __m256i,
     k: __mmask8,
     a: __m256i,
@@ -1844,7 +1970,12 @@ pub fn _mm256_mask_shrdi_epi64<const IMM8: i32>(
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq
 #[rustc_legacy_const_generics(3)]
-pub fn _mm256_maskz_shrdi_epi64<const IMM8: i32>(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_shrdi_epi64<const IMM8: i32>(
+    k: __mmask8,
+    a: __m256i,
+    b: __m256i,
+) -> __m256i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         let shf = _mm256_shrdi_epi64::<IMM8>(a, b).as_i64x4();
@@ -1860,7 +1991,8 @@ pub fn _mm256_maskz_shrdi_epi64<const IMM8: i32>(k: __mmask8, a: __m256i, b: __m
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq
 #[rustc_legacy_const_generics(2)]
-pub fn _mm_shrdi_epi64<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_shrdi_epi64<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
     static_assert_uimm_bits!(IMM8, 8);
     _mm_shrdv_epi64(a, b, _mm_set1_epi64x(IMM8 as i64))
 }
@@ -1873,7 +2005,8 @@ pub fn _mm_shrdi_epi64<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq
 #[rustc_legacy_const_generics(4)]
-pub fn _mm_mask_shrdi_epi64<const IMM8: i32>(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_shrdi_epi64<const IMM8: i32>(
     src: __m128i,
     k: __mmask8,
     a: __m128i,
@@ -1894,7 +2027,12 @@ pub fn _mm_mask_shrdi_epi64<const IMM8: i32>(
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldq, IMM8 = 5))] //should be vpshrdq
 #[rustc_legacy_const_generics(3)]
-pub fn _mm_maskz_shrdi_epi64<const IMM8: i32>(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_shrdi_epi64<const IMM8: i32>(
+    k: __mmask8,
+    a: __m128i,
+    b: __m128i,
+) -> __m128i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         let shf = _mm_shrdi_epi64::<IMM8>(a, b).as_i64x2();
@@ -1910,7 +2048,8 @@ pub fn _mm_maskz_shrdi_epi64<const IMM8: i32>(k: __mmask8, a: __m128i, b: __m128
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd
 #[rustc_legacy_const_generics(2)]
-pub fn _mm512_shrdi_epi32<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_shrdi_epi32<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
     static_assert_uimm_bits!(IMM8, 8);
     _mm512_shrdv_epi32(a, b, _mm512_set1_epi32(IMM8))
 }
@@ -1923,7 +2062,8 @@ pub fn _mm512_shrdi_epi32<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd
 #[rustc_legacy_const_generics(4)]
-pub fn _mm512_mask_shrdi_epi32<const IMM8: i32>(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_shrdi_epi32<const IMM8: i32>(
     src: __m512i,
     k: __mmask16,
     a: __m512i,
@@ -1944,7 +2084,12 @@ pub fn _mm512_mask_shrdi_epi32<const IMM8: i32>(
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd
 #[rustc_legacy_const_generics(3)]
-pub fn _mm512_maskz_shrdi_epi32<const IMM8: i32>(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_shrdi_epi32<const IMM8: i32>(
+    k: __mmask16,
+    a: __m512i,
+    b: __m512i,
+) -> __m512i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         let shf = _mm512_shrdi_epi32::<IMM8>(a, b).as_i32x16();
@@ -1960,7 +2105,8 @@ pub fn _mm512_maskz_shrdi_epi32<const IMM8: i32>(k: __mmask16, a: __m512i, b: __
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd
 #[rustc_legacy_const_generics(2)]
-pub fn _mm256_shrdi_epi32<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_shrdi_epi32<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
     static_assert_uimm_bits!(IMM8, 8);
     _mm256_shrdv_epi32(a, b, _mm256_set1_epi32(IMM8))
 }
@@ -1973,7 +2119,8 @@ pub fn _mm256_shrdi_epi32<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd
 #[rustc_legacy_const_generics(4)]
-pub fn _mm256_mask_shrdi_epi32<const IMM8: i32>(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_shrdi_epi32<const IMM8: i32>(
     src: __m256i,
     k: __mmask8,
     a: __m256i,
@@ -1994,7 +2141,12 @@ pub fn _mm256_mask_shrdi_epi32<const IMM8: i32>(
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd
 #[rustc_legacy_const_generics(3)]
-pub fn _mm256_maskz_shrdi_epi32<const IMM8: i32>(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_shrdi_epi32<const IMM8: i32>(
+    k: __mmask8,
+    a: __m256i,
+    b: __m256i,
+) -> __m256i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         let shf = _mm256_shrdi_epi32::<IMM8>(a, b).as_i32x8();
@@ -2010,7 +2162,8 @@ pub fn _mm256_maskz_shrdi_epi32<const IMM8: i32>(k: __mmask8, a: __m256i, b: __m
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd
 #[rustc_legacy_const_generics(2)]
-pub fn _mm_shrdi_epi32<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_shrdi_epi32<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
     static_assert_uimm_bits!(IMM8, 8);
     _mm_shrdv_epi32(a, b, _mm_set1_epi32(IMM8))
 }
@@ -2023,7 +2176,8 @@ pub fn _mm_shrdi_epi32<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd
 #[rustc_legacy_const_generics(4)]
-pub fn _mm_mask_shrdi_epi32<const IMM8: i32>(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_shrdi_epi32<const IMM8: i32>(
     src: __m128i,
     k: __mmask8,
     a: __m128i,
@@ -2044,7 +2198,12 @@ pub fn _mm_mask_shrdi_epi32<const IMM8: i32>(
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldd, IMM8 = 5))] //should be vpshldd
 #[rustc_legacy_const_generics(3)]
-pub fn _mm_maskz_shrdi_epi32<const IMM8: i32>(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_shrdi_epi32<const IMM8: i32>(
+    k: __mmask8,
+    a: __m128i,
+    b: __m128i,
+) -> __m128i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         let shf = _mm_shrdi_epi32::<IMM8>(a, b).as_i32x4();
@@ -2060,7 +2219,8 @@ pub fn _mm_maskz_shrdi_epi32<const IMM8: i32>(k: __mmask8, a: __m128i, b: __m128
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw
 #[rustc_legacy_const_generics(2)]
-pub fn _mm512_shrdi_epi16<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_shrdi_epi16<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
     static_assert_uimm_bits!(IMM8, 8);
     _mm512_shrdv_epi16(a, b, _mm512_set1_epi16(IMM8 as i16))
 }
@@ -2073,7 +2233,8 @@ pub fn _mm512_shrdi_epi16<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw
 #[rustc_legacy_const_generics(4)]
-pub fn _mm512_mask_shrdi_epi16<const IMM8: i32>(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_shrdi_epi16<const IMM8: i32>(
     src: __m512i,
     k: __mmask32,
     a: __m512i,
@@ -2094,7 +2255,12 @@ pub fn _mm512_mask_shrdi_epi16<const IMM8: i32>(
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw
 #[rustc_legacy_const_generics(3)]
-pub fn _mm512_maskz_shrdi_epi16<const IMM8: i32>(k: __mmask32, a: __m512i, b: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_shrdi_epi16<const IMM8: i32>(
+    k: __mmask32,
+    a: __m512i,
+    b: __m512i,
+) -> __m512i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         let shf = _mm512_shrdi_epi16::<IMM8>(a, b).as_i16x32();
@@ -2110,7 +2276,8 @@ pub fn _mm512_maskz_shrdi_epi16<const IMM8: i32>(k: __mmask32, a: __m512i, b: __
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw
 #[rustc_legacy_const_generics(2)]
-pub fn _mm256_shrdi_epi16<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_shrdi_epi16<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
     static_assert_uimm_bits!(IMM8, 8);
     _mm256_shrdv_epi16(a, b, _mm256_set1_epi16(IMM8 as i16))
 }
@@ -2123,7 +2290,8 @@ pub fn _mm256_shrdi_epi16<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw
 #[rustc_legacy_const_generics(4)]
-pub fn _mm256_mask_shrdi_epi16<const IMM8: i32>(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_shrdi_epi16<const IMM8: i32>(
     src: __m256i,
     k: __mmask16,
     a: __m256i,
@@ -2144,7 +2312,12 @@ pub fn _mm256_mask_shrdi_epi16<const IMM8: i32>(
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw
 #[rustc_legacy_const_generics(3)]
-pub fn _mm256_maskz_shrdi_epi16<const IMM8: i32>(k: __mmask16, a: __m256i, b: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_shrdi_epi16<const IMM8: i32>(
+    k: __mmask16,
+    a: __m256i,
+    b: __m256i,
+) -> __m256i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         let shf = _mm256_shrdi_epi16::<IMM8>(a, b).as_i16x16();
@@ -2160,7 +2333,8 @@ pub fn _mm256_maskz_shrdi_epi16<const IMM8: i32>(k: __mmask16, a: __m256i, b: __
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw
 #[rustc_legacy_const_generics(2)]
-pub fn _mm_shrdi_epi16<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_shrdi_epi16<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
     static_assert_uimm_bits!(IMM8, 8);
     _mm_shrdv_epi16(a, b, _mm_set1_epi16(IMM8 as i16))
 }
@@ -2173,7 +2347,8 @@ pub fn _mm_shrdi_epi16<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw
 #[rustc_legacy_const_generics(4)]
-pub fn _mm_mask_shrdi_epi16<const IMM8: i32>(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_shrdi_epi16<const IMM8: i32>(
     src: __m128i,
     k: __mmask8,
     a: __m128i,
@@ -2194,7 +2369,12 @@ pub fn _mm_mask_shrdi_epi16<const IMM8: i32>(
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpshldw, IMM8 = 5))] //should be vpshrdw
 #[rustc_legacy_const_generics(3)]
-pub fn _mm_maskz_shrdi_epi16<const IMM8: i32>(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_shrdi_epi16<const IMM8: i32>(
+    k: __mmask8,
+    a: __m128i,
+    b: __m128i,
+) -> __m128i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         let shf = _mm_shrdi_epi16::<IMM8>(a, b).as_i16x8();
@@ -2262,6 +2442,7 @@ unsafe extern "C" {
 
 #[cfg(test)]
 mod tests {
+    use crate::core_arch::assert_eq_const as assert_eq;
 
     use stdarch_test::simd_test;
 
@@ -2269,7 +2450,7 @@ mod tests {
     use crate::hint::black_box;
 
     #[simd_test(enable = "avx512vbmi2")]
-    unsafe fn test_mm512_mask_compress_epi16() {
+    fn test_mm512_mask_compress_epi16() {
         let src = _mm512_set1_epi16(200);
         #[rustfmt::skip]
         let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
@@ -2284,7 +2465,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2")]
-    unsafe fn test_mm512_maskz_compress_epi16() {
+    fn test_mm512_maskz_compress_epi16() {
         #[rustfmt::skip]
         let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
                                  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
@@ -2298,7 +2479,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm256_mask_compress_epi16() {
+    fn test_mm256_mask_compress_epi16() {
         let src = _mm256_set1_epi16(200);
         let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let r = _mm256_mask_compress_epi16(src, 0b01010101_01010101, a);
@@ -2309,7 +2490,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm256_maskz_compress_epi16() {
+    fn test_mm256_maskz_compress_epi16() {
         let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let r = _mm256_maskz_compress_epi16(0b01010101_01010101, a);
         let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 5, 7, 9, 11, 13, 15);
@@ -2317,7 +2498,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm_mask_compress_epi16() {
+    fn test_mm_mask_compress_epi16() {
         let src = _mm_set1_epi16(200);
         let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
         let r = _mm_mask_compress_epi16(src, 0b01010101, a);
@@ -2326,7 +2507,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm_maskz_compress_epi16() {
+    fn test_mm_maskz_compress_epi16() {
         let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
         let r = _mm_maskz_compress_epi16(0b01010101, a);
         let e = _mm_set_epi16(0, 0, 0, 0, 1, 3, 5, 7);
@@ -2334,7 +2515,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2")]
-    unsafe fn test_mm512_mask_compress_epi8() {
+    fn test_mm512_mask_compress_epi8() {
         let src = _mm512_set1_epi8(100);
         #[rustfmt::skip]
         let a = _mm512_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
@@ -2357,7 +2538,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2")]
-    unsafe fn test_mm512_maskz_compress_epi8() {
+    fn test_mm512_maskz_compress_epi8() {
         #[rustfmt::skip]
         let a = _mm512_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
@@ -2378,7 +2559,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm256_mask_compress_epi8() {
+    fn test_mm256_mask_compress_epi8() {
         let src = _mm256_set1_epi8(100);
         #[rustfmt::skip]
         let a = _mm256_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
@@ -2393,7 +2574,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm256_maskz_compress_epi8() {
+    fn test_mm256_maskz_compress_epi8() {
         #[rustfmt::skip]
         let a = _mm256_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
@@ -2407,7 +2588,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm_mask_compress_epi8() {
+    fn test_mm_mask_compress_epi8() {
         let src = _mm_set1_epi8(100);
         let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let r = _mm_mask_compress_epi8(src, 0b01010101_01010101, a);
@@ -2418,7 +2599,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm_maskz_compress_epi8() {
+    fn test_mm_maskz_compress_epi8() {
         let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let r = _mm_maskz_compress_epi8(0b01010101_01010101, a);
         let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 5, 7, 9, 11, 13, 15);
@@ -2426,7 +2607,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2")]
-    unsafe fn test_mm512_mask_expand_epi16() {
+    fn test_mm512_mask_expand_epi16() {
         let src = _mm512_set1_epi16(200);
         #[rustfmt::skip]
         let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
@@ -2441,7 +2622,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2")]
-    unsafe fn test_mm512_maskz_expand_epi16() {
+    fn test_mm512_maskz_expand_epi16() {
         #[rustfmt::skip]
         let a = _mm512_set_epi16(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
                                  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
@@ -2453,7 +2634,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm256_mask_expand_epi16() {
+    fn test_mm256_mask_expand_epi16() {
         let src = _mm256_set1_epi16(200);
         let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let r = _mm256_mask_expand_epi16(src, 0b01010101_01010101, a);
@@ -2464,7 +2645,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm256_maskz_expand_epi16() {
+    fn test_mm256_maskz_expand_epi16() {
         let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let r = _mm256_maskz_expand_epi16(0b01010101_01010101, a);
         let e = _mm256_set_epi16(0, 8, 0, 9, 0, 10, 0, 11, 0, 12, 0, 13, 0, 14, 0, 15);
@@ -2472,7 +2653,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm_mask_expand_epi16() {
+    fn test_mm_mask_expand_epi16() {
         let src = _mm_set1_epi16(200);
         let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
         let r = _mm_mask_expand_epi16(src, 0b01010101, a);
@@ -2481,7 +2662,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm_maskz_expand_epi16() {
+    fn test_mm_maskz_expand_epi16() {
         let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
         let r = _mm_maskz_expand_epi16(0b01010101, a);
         let e = _mm_set_epi16(0, 4, 0, 5, 0, 6, 0, 7);
@@ -2489,7 +2670,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2")]
-    unsafe fn test_mm512_mask_expand_epi8() {
+    fn test_mm512_mask_expand_epi8() {
         let src = _mm512_set1_epi8(100);
         #[rustfmt::skip]
         let a = _mm512_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
@@ -2512,7 +2693,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2")]
-    unsafe fn test_mm512_maskz_expand_epi8() {
+    fn test_mm512_maskz_expand_epi8() {
         #[rustfmt::skip]
         let a = _mm512_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
@@ -2533,7 +2714,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm256_mask_expand_epi8() {
+    fn test_mm256_mask_expand_epi8() {
         let src = _mm256_set1_epi8(100);
         #[rustfmt::skip]
         let a = _mm256_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
@@ -2548,7 +2729,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm256_maskz_expand_epi8() {
+    fn test_mm256_maskz_expand_epi8() {
         #[rustfmt::skip]
         let a = _mm256_set_epi8(0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15,
                                 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
@@ -2562,7 +2743,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm_mask_expand_epi8() {
+    fn test_mm_mask_expand_epi8() {
         let src = _mm_set1_epi8(100);
         let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let r = _mm_mask_expand_epi8(src, 0b01010101_01010101, a);
@@ -2573,7 +2754,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm_maskz_expand_epi8() {
+    fn test_mm_maskz_expand_epi8() {
         let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let r = _mm_maskz_expand_epi8(0b01010101_01010101, a);
         let e = _mm_set_epi8(0, 8, 0, 9, 0, 10, 0, 11, 0, 12, 0, 13, 0, 14, 0, 15);
@@ -2581,7 +2762,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2")]
-    unsafe fn test_mm512_shldv_epi64() {
+    const fn test_mm512_shldv_epi64() {
         let a = _mm512_set1_epi64(1);
         let b = _mm512_set1_epi64(1 << 63);
         let c = _mm512_set1_epi64(2);
@@ -2591,7 +2772,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2")]
-    unsafe fn test_mm512_mask_shldv_epi64() {
+    const fn test_mm512_mask_shldv_epi64() {
         let a = _mm512_set1_epi64(1);
         let b = _mm512_set1_epi64(1 << 63);
         let c = _mm512_set1_epi64(2);
@@ -2603,7 +2784,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2")]
-    unsafe fn test_mm512_maskz_shldv_epi64() {
+    const fn test_mm512_maskz_shldv_epi64() {
         let a = _mm512_set1_epi64(1);
         let b = _mm512_set1_epi64(1 << 63);
         let c = _mm512_set1_epi64(2);
@@ -2615,7 +2796,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm256_shldv_epi64() {
+    const fn test_mm256_shldv_epi64() {
         let a = _mm256_set1_epi64x(1);
         let b = _mm256_set1_epi64x(1 << 63);
         let c = _mm256_set1_epi64x(2);
@@ -2625,7 +2806,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm256_mask_shldv_epi64() {
+    const fn test_mm256_mask_shldv_epi64() {
         let a = _mm256_set1_epi64x(1);
         let b = _mm256_set1_epi64x(1 << 63);
         let c = _mm256_set1_epi64x(2);
@@ -2637,7 +2818,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm256_maskz_shldv_epi64() {
+    const fn test_mm256_maskz_shldv_epi64() {
         let a = _mm256_set1_epi64x(1);
         let b = _mm256_set1_epi64x(1 << 63);
         let c = _mm256_set1_epi64x(2);
@@ -2649,7 +2830,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm_shldv_epi64() {
+    const fn test_mm_shldv_epi64() {
         let a = _mm_set1_epi64x(1);
         let b = _mm_set1_epi64x(1 << 63);
         let c = _mm_set1_epi64x(2);
@@ -2659,7 +2840,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm_mask_shldv_epi64() {
+    const fn test_mm_mask_shldv_epi64() {
         let a = _mm_set1_epi64x(1);
         let b = _mm_set1_epi64x(1 << 63);
         let c = _mm_set1_epi64x(2);
@@ -2671,7 +2852,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm_maskz_shldv_epi64() {
+    const fn test_mm_maskz_shldv_epi64() {
         let a = _mm_set1_epi64x(1);
         let b = _mm_set1_epi64x(1 << 63);
         let c = _mm_set1_epi64x(2);
@@ -2683,7 +2864,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2")]
-    unsafe fn test_mm512_shldv_epi32() {
+    const fn test_mm512_shldv_epi32() {
         let a = _mm512_set1_epi32(1);
         let b = _mm512_set1_epi32(1 << 31);
         let c = _mm512_set1_epi32(2);
@@ -2693,7 +2874,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2")]
-    unsafe fn test_mm512_mask_shldv_epi32() {
+    const fn test_mm512_mask_shldv_epi32() {
         let a = _mm512_set1_epi32(1);
         let b = _mm512_set1_epi32(1 << 31);
         let c = _mm512_set1_epi32(2);
@@ -2705,7 +2886,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2")]
-    unsafe fn test_mm512_maskz_shldv_epi32() {
+    const fn test_mm512_maskz_shldv_epi32() {
         let a = _mm512_set1_epi32(1);
         let b = _mm512_set1_epi32(1 << 31);
         let c = _mm512_set1_epi32(2);
@@ -2717,7 +2898,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm256_shldv_epi32() {
+    const fn test_mm256_shldv_epi32() {
         let a = _mm256_set1_epi32(1);
         let b = _mm256_set1_epi32(1 << 31);
         let c = _mm256_set1_epi32(2);
@@ -2727,7 +2908,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm256_mask_shldv_epi32() {
+    const fn test_mm256_mask_shldv_epi32() {
         let a = _mm256_set1_epi32(1);
         let b = _mm256_set1_epi32(1 << 31);
         let c = _mm256_set1_epi32(2);
@@ -2739,7 +2920,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm256_maskz_shldv_epi32() {
+    const fn test_mm256_maskz_shldv_epi32() {
         let a = _mm256_set1_epi32(1);
         let b = _mm256_set1_epi32(1 << 31);
         let c = _mm256_set1_epi32(2);
@@ -2751,7 +2932,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm_shldv_epi32() {
+    const fn test_mm_shldv_epi32() {
         let a = _mm_set1_epi32(1);
         let b = _mm_set1_epi32(1 << 31);
         let c = _mm_set1_epi32(2);
@@ -2761,7 +2942,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm_mask_shldv_epi32() {
+    const fn test_mm_mask_shldv_epi32() {
         let a = _mm_set1_epi32(1);
         let b = _mm_set1_epi32(1 << 31);
         let c = _mm_set1_epi32(2);
@@ -2773,7 +2954,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm_maskz_shldv_epi32() {
+    const fn test_mm_maskz_shldv_epi32() {
         let a = _mm_set1_epi32(1);
         let b = _mm_set1_epi32(1 << 31);
         let c = _mm_set1_epi32(2);
@@ -2785,7 +2966,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2")]
-    unsafe fn test_mm512_shldv_epi16() {
+    const fn test_mm512_shldv_epi16() {
         let a = _mm512_set1_epi16(1);
         let b = _mm512_set1_epi16(1 << 15);
         let c = _mm512_set1_epi16(2);
@@ -2795,7 +2976,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2")]
-    unsafe fn test_mm512_mask_shldv_epi16() {
+    const fn test_mm512_mask_shldv_epi16() {
         let a = _mm512_set1_epi16(1);
         let b = _mm512_set1_epi16(1 << 15);
         let c = _mm512_set1_epi16(2);
@@ -2807,7 +2988,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2")]
-    unsafe fn test_mm512_maskz_shldv_epi16() {
+    const fn test_mm512_maskz_shldv_epi16() {
         let a = _mm512_set1_epi16(1);
         let b = _mm512_set1_epi16(1 << 15);
         let c = _mm512_set1_epi16(2);
@@ -2819,7 +3000,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm256_shldv_epi16() {
+    const fn test_mm256_shldv_epi16() {
         let a = _mm256_set1_epi16(1);
         let b = _mm256_set1_epi16(1 << 15);
         let c = _mm256_set1_epi16(2);
@@ -2829,7 +3010,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm256_mask_shldv_epi16() {
+    const fn test_mm256_mask_shldv_epi16() {
         let a = _mm256_set1_epi16(1);
         let b = _mm256_set1_epi16(1 << 15);
         let c = _mm256_set1_epi16(2);
@@ -2841,7 +3022,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm256_maskz_shldv_epi16() {
+    const fn test_mm256_maskz_shldv_epi16() {
         let a = _mm256_set1_epi16(1);
         let b = _mm256_set1_epi16(1 << 15);
         let c = _mm256_set1_epi16(2);
@@ -2853,7 +3034,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm_shldv_epi16() {
+    const fn test_mm_shldv_epi16() {
         let a = _mm_set1_epi16(1);
         let b = _mm_set1_epi16(1 << 15);
         let c = _mm_set1_epi16(2);
@@ -2863,7 +3044,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm_mask_shldv_epi16() {
+    const fn test_mm_mask_shldv_epi16() {
         let a = _mm_set1_epi16(1);
         let b = _mm_set1_epi16(1 << 15);
         let c = _mm_set1_epi16(2);
@@ -2875,7 +3056,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm_maskz_shldv_epi16() {
+    const fn test_mm_maskz_shldv_epi16() {
         let a = _mm_set1_epi16(1);
         let b = _mm_set1_epi16(1 << 15);
         let c = _mm_set1_epi16(2);
@@ -2887,7 +3068,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2")]
-    unsafe fn test_mm512_shrdv_epi64() {
+    const fn test_mm512_shrdv_epi64() {
         let a = _mm512_set1_epi64(2);
         let b = _mm512_set1_epi64(8);
         let c = _mm512_set1_epi64(1);
@@ -2897,7 +3078,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2")]
-    unsafe fn test_mm512_mask_shrdv_epi64() {
+    const fn test_mm512_mask_shrdv_epi64() {
         let a = _mm512_set1_epi64(2);
         let b = _mm512_set1_epi64(8);
         let c = _mm512_set1_epi64(1);
@@ -2909,7 +3090,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2")]
-    unsafe fn test_mm512_maskz_shrdv_epi64() {
+    const fn test_mm512_maskz_shrdv_epi64() {
         let a = _mm512_set1_epi64(2);
         let b = _mm512_set1_epi64(8);
         let c = _mm512_set1_epi64(1);
@@ -2921,7 +3102,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm256_shrdv_epi64() {
+    const fn test_mm256_shrdv_epi64() {
         let a = _mm256_set1_epi64x(2);
         let b = _mm256_set1_epi64x(8);
         let c = _mm256_set1_epi64x(1);
@@ -2931,7 +3112,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm256_mask_shrdv_epi64() {
+    const fn test_mm256_mask_shrdv_epi64() {
         let a = _mm256_set1_epi64x(2);
         let b = _mm256_set1_epi64x(8);
         let c = _mm256_set1_epi64x(1);
@@ -2943,7 +3124,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm256_maskz_shrdv_epi64() {
+    const fn test_mm256_maskz_shrdv_epi64() {
         let a = _mm256_set1_epi64x(2);
         let b = _mm256_set1_epi64x(8);
         let c = _mm256_set1_epi64x(1);
@@ -2955,7 +3136,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm_shrdv_epi64() {
+    const fn test_mm_shrdv_epi64() {
         let a = _mm_set1_epi64x(2);
         let b = _mm_set1_epi64x(8);
         let c = _mm_set1_epi64x(1);
@@ -2965,7 +3146,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm_mask_shrdv_epi64() {
+    const fn test_mm_mask_shrdv_epi64() {
         let a = _mm_set1_epi64x(2);
         let b = _mm_set1_epi64x(8);
         let c = _mm_set1_epi64x(1);
@@ -2977,7 +3158,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm_maskz_shrdv_epi64() {
+    const fn test_mm_maskz_shrdv_epi64() {
         let a = _mm_set1_epi64x(2);
         let b = _mm_set1_epi64x(8);
         let c = _mm_set1_epi64x(1);
@@ -2989,7 +3170,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2")]
-    unsafe fn test_mm512_shrdv_epi32() {
+    const fn test_mm512_shrdv_epi32() {
         let a = _mm512_set1_epi32(2);
         let b = _mm512_set1_epi32(8);
         let c = _mm512_set1_epi32(1);
@@ -2999,7 +3180,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2")]
-    unsafe fn test_mm512_mask_shrdv_epi32() {
+    const fn test_mm512_mask_shrdv_epi32() {
         let a = _mm512_set1_epi32(2);
         let b = _mm512_set1_epi32(8);
         let c = _mm512_set1_epi32(1);
@@ -3011,7 +3192,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2")]
-    unsafe fn test_mm512_maskz_shrdv_epi32() {
+    const fn test_mm512_maskz_shrdv_epi32() {
         let a = _mm512_set1_epi32(2);
         let b = _mm512_set1_epi32(8);
         let c = _mm512_set1_epi32(1);
@@ -3023,7 +3204,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm256_shrdv_epi32() {
+    const fn test_mm256_shrdv_epi32() {
         let a = _mm256_set1_epi32(2);
         let b = _mm256_set1_epi32(8);
         let c = _mm256_set1_epi32(1);
@@ -3033,7 +3214,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm256_mask_shrdv_epi32() {
+    const fn test_mm256_mask_shrdv_epi32() {
         let a = _mm256_set1_epi32(2);
         let b = _mm256_set1_epi32(8);
         let c = _mm256_set1_epi32(1);
@@ -3045,7 +3226,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm256_maskz_shrdv_epi32() {
+    const fn test_mm256_maskz_shrdv_epi32() {
         let a = _mm256_set1_epi32(2);
         let b = _mm256_set1_epi32(8);
         let c = _mm256_set1_epi32(1);
@@ -3057,7 +3238,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm_shrdv_epi32() {
+    const fn test_mm_shrdv_epi32() {
         let a = _mm_set1_epi32(2);
         let b = _mm_set1_epi32(8);
         let c = _mm_set1_epi32(1);
@@ -3067,7 +3248,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm_mask_shrdv_epi32() {
+    const fn test_mm_mask_shrdv_epi32() {
         let a = _mm_set1_epi32(2);
         let b = _mm_set1_epi32(8);
         let c = _mm_set1_epi32(1);
@@ -3079,7 +3260,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm_maskz_shrdv_epi32() {
+    const fn test_mm_maskz_shrdv_epi32() {
         let a = _mm_set1_epi32(2);
         let b = _mm_set1_epi32(8);
         let c = _mm_set1_epi32(1);
@@ -3091,7 +3272,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2")]
-    unsafe fn test_mm512_shrdv_epi16() {
+    const fn test_mm512_shrdv_epi16() {
         let a = _mm512_set1_epi16(2);
         let b = _mm512_set1_epi16(8);
         let c = _mm512_set1_epi16(1);
@@ -3101,7 +3282,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2")]
-    unsafe fn test_mm512_mask_shrdv_epi16() {
+    const fn test_mm512_mask_shrdv_epi16() {
         let a = _mm512_set1_epi16(2);
         let b = _mm512_set1_epi16(8);
         let c = _mm512_set1_epi16(1);
@@ -3113,7 +3294,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2")]
-    unsafe fn test_mm512_maskz_shrdv_epi16() {
+    const fn test_mm512_maskz_shrdv_epi16() {
         let a = _mm512_set1_epi16(2);
         let b = _mm512_set1_epi16(8);
         let c = _mm512_set1_epi16(1);
@@ -3125,7 +3306,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm256_shrdv_epi16() {
+    const fn test_mm256_shrdv_epi16() {
         let a = _mm256_set1_epi16(2);
         let b = _mm256_set1_epi16(8);
         let c = _mm256_set1_epi16(1);
@@ -3135,7 +3316,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm256_mask_shrdv_epi16() {
+    const fn test_mm256_mask_shrdv_epi16() {
         let a = _mm256_set1_epi16(2);
         let b = _mm256_set1_epi16(8);
         let c = _mm256_set1_epi16(1);
@@ -3147,7 +3328,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm256_maskz_shrdv_epi16() {
+    const fn test_mm256_maskz_shrdv_epi16() {
         let a = _mm256_set1_epi16(2);
         let b = _mm256_set1_epi16(8);
         let c = _mm256_set1_epi16(1);
@@ -3159,7 +3340,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm_shrdv_epi16() {
+    const fn test_mm_shrdv_epi16() {
         let a = _mm_set1_epi16(2);
         let b = _mm_set1_epi16(8);
         let c = _mm_set1_epi16(1);
@@ -3169,7 +3350,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm_mask_shrdv_epi16() {
+    const fn test_mm_mask_shrdv_epi16() {
         let a = _mm_set1_epi16(2);
         let b = _mm_set1_epi16(8);
         let c = _mm_set1_epi16(1);
@@ -3181,7 +3362,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm_maskz_shrdv_epi16() {
+    const fn test_mm_maskz_shrdv_epi16() {
         let a = _mm_set1_epi16(2);
         let b = _mm_set1_epi16(8);
         let c = _mm_set1_epi16(1);
@@ -3193,7 +3374,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2")]
-    unsafe fn test_mm512_shldi_epi64() {
+    const fn test_mm512_shldi_epi64() {
         let a = _mm512_set1_epi64(1);
         let b = _mm512_set1_epi64(1 << 63);
         let r = _mm512_shldi_epi64::<2>(a, b);
@@ -3202,7 +3383,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2")]
-    unsafe fn test_mm512_mask_shldi_epi64() {
+    const fn test_mm512_mask_shldi_epi64() {
         let a = _mm512_set1_epi64(1);
         let b = _mm512_set1_epi64(1 << 63);
         let r = _mm512_mask_shldi_epi64::<2>(a, 0, a, b);
@@ -3213,7 +3394,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2")]
-    unsafe fn test_mm512_maskz_shldi_epi64() {
+    const fn test_mm512_maskz_shldi_epi64() {
         let a = _mm512_set1_epi64(1);
         let b = _mm512_set1_epi64(1 << 63);
         let r = _mm512_maskz_shldi_epi64::<2>(0, a, b);
@@ -3224,7 +3405,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm256_shldi_epi64() {
+    const fn test_mm256_shldi_epi64() {
         let a = _mm256_set1_epi64x(1);
         let b = _mm256_set1_epi64x(1 << 63);
         let r = _mm256_shldi_epi64::<2>(a, b);
@@ -3233,7 +3414,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm256_mask_shldi_epi64() {
+    const fn test_mm256_mask_shldi_epi64() {
         let a = _mm256_set1_epi64x(1);
         let b = _mm256_set1_epi64x(1 << 63);
         let r = _mm256_mask_shldi_epi64::<2>(a, 0, a, b);
@@ -3244,7 +3425,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm256_maskz_shldi_epi64() {
+    const fn test_mm256_maskz_shldi_epi64() {
         let a = _mm256_set1_epi64x(1);
         let b = _mm256_set1_epi64x(1 << 63);
         let r = _mm256_maskz_shldi_epi64::<2>(0, a, b);
@@ -3255,7 +3436,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm_shldi_epi64() {
+    const fn test_mm_shldi_epi64() {
         let a = _mm_set1_epi64x(1);
         let b = _mm_set1_epi64x(1 << 63);
         let r = _mm_shldi_epi64::<2>(a, b);
@@ -3264,7 +3445,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm_mask_shldi_epi64() {
+    const fn test_mm_mask_shldi_epi64() {
         let a = _mm_set1_epi64x(1);
         let b = _mm_set1_epi64x(1 << 63);
         let r = _mm_mask_shldi_epi64::<2>(a, 0, a, b);
@@ -3275,7 +3456,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm_maskz_shldi_epi64() {
+    const fn test_mm_maskz_shldi_epi64() {
         let a = _mm_set1_epi64x(1);
         let b = _mm_set1_epi64x(1 << 63);
         let r = _mm_maskz_shldi_epi64::<2>(0, a, b);
@@ -3286,7 +3467,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2")]
-    unsafe fn test_mm512_shldi_epi32() {
+    const fn test_mm512_shldi_epi32() {
         let a = _mm512_set1_epi32(1);
         let b = _mm512_set1_epi32(1 << 31);
         let r = _mm512_shldi_epi32::<2>(a, b);
@@ -3295,7 +3476,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2")]
-    unsafe fn test_mm512_mask_shldi_epi32() {
+    const fn test_mm512_mask_shldi_epi32() {
         let a = _mm512_set1_epi32(1);
         let b = _mm512_set1_epi32(1 << 31);
         let r = _mm512_mask_shldi_epi32::<2>(a, 0, a, b);
@@ -3306,7 +3487,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2")]
-    unsafe fn test_mm512_maskz_shldi_epi32() {
+    const fn test_mm512_maskz_shldi_epi32() {
         let a = _mm512_set1_epi32(1);
         let b = _mm512_set1_epi32(1 << 31);
         let r = _mm512_maskz_shldi_epi32::<2>(0, a, b);
@@ -3317,7 +3498,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm256_shldi_epi32() {
+    const fn test_mm256_shldi_epi32() {
         let a = _mm256_set1_epi32(1);
         let b = _mm256_set1_epi32(1 << 31);
         let r = _mm256_shldi_epi32::<2>(a, b);
@@ -3326,7 +3507,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm256_mask_shldi_epi32() {
+    const fn test_mm256_mask_shldi_epi32() {
         let a = _mm256_set1_epi32(1);
         let b = _mm256_set1_epi32(1 << 31);
         let r = _mm256_mask_shldi_epi32::<2>(a, 0, a, b);
@@ -3337,7 +3518,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm256_maskz_shldi_epi32() {
+    const fn test_mm256_maskz_shldi_epi32() {
         let a = _mm256_set1_epi32(1);
         let b = _mm256_set1_epi32(1 << 31);
         let r = _mm256_maskz_shldi_epi32::<2>(0, a, b);
@@ -3348,7 +3529,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm_shldi_epi32() {
+    const fn test_mm_shldi_epi32() {
         let a = _mm_set1_epi32(1);
         let b = _mm_set1_epi32(1 << 31);
         let r = _mm_shldi_epi32::<2>(a, b);
@@ -3357,7 +3538,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm_mask_shldi_epi32() {
+    const fn test_mm_mask_shldi_epi32() {
         let a = _mm_set1_epi32(1);
         let b = _mm_set1_epi32(1 << 31);
         let r = _mm_mask_shldi_epi32::<2>(a, 0, a, b);
@@ -3368,7 +3549,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm_maskz_shldi_epi32() {
+    const fn test_mm_maskz_shldi_epi32() {
         let a = _mm_set1_epi32(1);
         let b = _mm_set1_epi32(1 << 31);
         let r = _mm_maskz_shldi_epi32::<2>(0, a, b);
@@ -3379,7 +3560,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2")]
-    unsafe fn test_mm512_shldi_epi16() {
+    const fn test_mm512_shldi_epi16() {
         let a = _mm512_set1_epi16(1);
         let b = _mm512_set1_epi16(1 << 15);
         let r = _mm512_shldi_epi16::<2>(a, b);
@@ -3388,7 +3569,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2")]
-    unsafe fn test_mm512_mask_shldi_epi16() {
+    const fn test_mm512_mask_shldi_epi16() {
         let a = _mm512_set1_epi16(1);
         let b = _mm512_set1_epi16(1 << 15);
         let r = _mm512_mask_shldi_epi16::<2>(a, 0, a, b);
@@ -3399,7 +3580,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2")]
-    unsafe fn test_mm512_maskz_shldi_epi16() {
+    const fn test_mm512_maskz_shldi_epi16() {
         let a = _mm512_set1_epi16(1);
         let b = _mm512_set1_epi16(1 << 15);
         let r = _mm512_maskz_shldi_epi16::<2>(0, a, b);
@@ -3410,7 +3591,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm256_shldi_epi16() {
+    const fn test_mm256_shldi_epi16() {
         let a = _mm256_set1_epi16(1);
         let b = _mm256_set1_epi16(1 << 15);
         let r = _mm256_shldi_epi16::<2>(a, b);
@@ -3419,7 +3600,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm256_mask_shldi_epi16() {
+    const fn test_mm256_mask_shldi_epi16() {
         let a = _mm256_set1_epi16(1);
         let b = _mm256_set1_epi16(1 << 15);
         let r = _mm256_mask_shldi_epi16::<2>(a, 0, a, b);
@@ -3430,7 +3611,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm256_maskz_shldi_epi16() {
+    const fn test_mm256_maskz_shldi_epi16() {
         let a = _mm256_set1_epi16(1);
         let b = _mm256_set1_epi16(1 << 15);
         let r = _mm256_maskz_shldi_epi16::<2>(0, a, b);
@@ -3441,7 +3622,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm_shldi_epi16() {
+    const fn test_mm_shldi_epi16() {
         let a = _mm_set1_epi16(1);
         let b = _mm_set1_epi16(1 << 15);
         let r = _mm_shldi_epi16::<2>(a, b);
@@ -3450,7 +3631,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm_mask_shldi_epi16() {
+    const fn test_mm_mask_shldi_epi16() {
         let a = _mm_set1_epi16(1);
         let b = _mm_set1_epi16(1 << 15);
         let r = _mm_mask_shldi_epi16::<2>(a, 0, a, b);
@@ -3461,7 +3642,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm_maskz_shldi_epi16() {
+    const fn test_mm_maskz_shldi_epi16() {
         let a = _mm_set1_epi16(1);
         let b = _mm_set1_epi16(1 << 15);
         let r = _mm_maskz_shldi_epi16::<2>(0, a, b);
@@ -3472,7 +3653,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2")]
-    unsafe fn test_mm512_shrdi_epi64() {
+    const fn test_mm512_shrdi_epi64() {
         let a = _mm512_set1_epi64(2);
         let b = _mm512_set1_epi64(8);
         let r = _mm512_shrdi_epi64::<1>(a, b);
@@ -3481,7 +3662,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2")]
-    unsafe fn test_mm512_mask_shrdi_epi64() {
+    const fn test_mm512_mask_shrdi_epi64() {
         let a = _mm512_set1_epi64(2);
         let b = _mm512_set1_epi64(8);
         let r = _mm512_mask_shrdi_epi64::<1>(a, 0, a, b);
@@ -3492,7 +3673,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2")]
-    unsafe fn test_mm512_maskz_shrdi_epi64() {
+    const fn test_mm512_maskz_shrdi_epi64() {
         let a = _mm512_set1_epi64(2);
         let b = _mm512_set1_epi64(8);
         let r = _mm512_maskz_shrdi_epi64::<1>(0, a, b);
@@ -3503,7 +3684,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm256_shrdi_epi64() {
+    const fn test_mm256_shrdi_epi64() {
         let a = _mm256_set1_epi64x(2);
         let b = _mm256_set1_epi64x(8);
         let r = _mm256_shrdi_epi64::<1>(a, b);
@@ -3512,7 +3693,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm256_mask_shrdi_epi64() {
+    const fn test_mm256_mask_shrdi_epi64() {
         let a = _mm256_set1_epi64x(2);
         let b = _mm256_set1_epi64x(8);
         let r = _mm256_mask_shrdi_epi64::<1>(a, 0, a, b);
@@ -3523,7 +3704,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm256_maskz_shrdi_epi64() {
+    const fn test_mm256_maskz_shrdi_epi64() {
         let a = _mm256_set1_epi64x(2);
         let b = _mm256_set1_epi64x(8);
         let r = _mm256_maskz_shrdi_epi64::<1>(0, a, b);
@@ -3534,7 +3715,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm_shrdi_epi64() {
+    const fn test_mm_shrdi_epi64() {
         let a = _mm_set1_epi64x(2);
         let b = _mm_set1_epi64x(8);
         let r = _mm_shrdi_epi64::<1>(a, b);
@@ -3543,7 +3724,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm_mask_shrdi_epi64() {
+    const fn test_mm_mask_shrdi_epi64() {
         let a = _mm_set1_epi64x(2);
         let b = _mm_set1_epi64x(8);
         let r = _mm_mask_shrdi_epi64::<1>(a, 0, a, b);
@@ -3554,7 +3735,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm_maskz_shrdi_epi64() {
+    const fn test_mm_maskz_shrdi_epi64() {
         let a = _mm_set1_epi64x(2);
         let b = _mm_set1_epi64x(8);
         let r = _mm_maskz_shrdi_epi64::<1>(0, a, b);
@@ -3565,7 +3746,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2")]
-    unsafe fn test_mm512_shrdi_epi32() {
+    const fn test_mm512_shrdi_epi32() {
         let a = _mm512_set1_epi32(2);
         let b = _mm512_set1_epi32(8);
         let r = _mm512_shrdi_epi32::<1>(a, b);
@@ -3574,7 +3755,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2")]
-    unsafe fn test_mm512_mask_shrdi_epi32() {
+    const fn test_mm512_mask_shrdi_epi32() {
         let a = _mm512_set1_epi32(2);
         let b = _mm512_set1_epi32(8);
         let r = _mm512_mask_shrdi_epi32::<1>(a, 0, a, b);
@@ -3585,7 +3766,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2")]
-    unsafe fn test_mm512_maskz_shrdi_epi32() {
+    const fn test_mm512_maskz_shrdi_epi32() {
         let a = _mm512_set1_epi32(2);
         let b = _mm512_set1_epi32(8);
         let r = _mm512_maskz_shrdi_epi32::<1>(0, a, b);
@@ -3596,7 +3777,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm256_shrdi_epi32() {
+    const fn test_mm256_shrdi_epi32() {
         let a = _mm256_set1_epi32(2);
         let b = _mm256_set1_epi32(8);
         let r = _mm256_shrdi_epi32::<1>(a, b);
@@ -3605,7 +3786,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm256_mask_shrdi_epi32() {
+    const fn test_mm256_mask_shrdi_epi32() {
         let a = _mm256_set1_epi32(2);
         let b = _mm256_set1_epi32(8);
         let r = _mm256_mask_shrdi_epi32::<1>(a, 0, a, b);
@@ -3616,7 +3797,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm256_maskz_shrdi_epi32() {
+    const fn test_mm256_maskz_shrdi_epi32() {
         let a = _mm256_set1_epi32(2);
         let b = _mm256_set1_epi32(8);
         let r = _mm256_maskz_shrdi_epi32::<1>(0, a, b);
@@ -3627,7 +3808,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm_shrdi_epi32() {
+    const fn test_mm_shrdi_epi32() {
         let a = _mm_set1_epi32(2);
         let b = _mm_set1_epi32(8);
         let r = _mm_shrdi_epi32::<1>(a, b);
@@ -3636,7 +3817,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm_mask_shrdi_epi32() {
+    const fn test_mm_mask_shrdi_epi32() {
         let a = _mm_set1_epi32(2);
         let b = _mm_set1_epi32(8);
         let r = _mm_mask_shrdi_epi32::<1>(a, 0, a, b);
@@ -3647,7 +3828,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm_maskz_shrdi_epi32() {
+    const fn test_mm_maskz_shrdi_epi32() {
         let a = _mm_set1_epi32(2);
         let b = _mm_set1_epi32(8);
         let r = _mm_maskz_shrdi_epi32::<1>(0, a, b);
@@ -3658,7 +3839,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2")]
-    unsafe fn test_mm512_shrdi_epi16() {
+    const fn test_mm512_shrdi_epi16() {
         let a = _mm512_set1_epi16(2);
         let b = _mm512_set1_epi16(8);
         let r = _mm512_shrdi_epi16::<1>(a, b);
@@ -3667,7 +3848,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2")]
-    unsafe fn test_mm512_mask_shrdi_epi16() {
+    const fn test_mm512_mask_shrdi_epi16() {
         let a = _mm512_set1_epi16(2);
         let b = _mm512_set1_epi16(8);
         let r = _mm512_mask_shrdi_epi16::<1>(a, 0, a, b);
@@ -3678,7 +3859,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2")]
-    unsafe fn test_mm512_maskz_shrdi_epi16() {
+    const fn test_mm512_maskz_shrdi_epi16() {
         let a = _mm512_set1_epi16(2);
         let b = _mm512_set1_epi16(8);
         let r = _mm512_maskz_shrdi_epi16::<1>(0, a, b);
@@ -3689,7 +3870,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm256_shrdi_epi16() {
+    const fn test_mm256_shrdi_epi16() {
         let a = _mm256_set1_epi16(2);
         let b = _mm256_set1_epi16(8);
         let r = _mm256_shrdi_epi16::<1>(a, b);
@@ -3698,7 +3879,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm256_mask_shrdi_epi16() {
+    const fn test_mm256_mask_shrdi_epi16() {
         let a = _mm256_set1_epi16(2);
         let b = _mm256_set1_epi16(8);
         let r = _mm256_mask_shrdi_epi16::<1>(a, 0, a, b);
@@ -3709,7 +3890,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm256_maskz_shrdi_epi16() {
+    const fn test_mm256_maskz_shrdi_epi16() {
         let a = _mm256_set1_epi16(2);
         let b = _mm256_set1_epi16(8);
         let r = _mm256_maskz_shrdi_epi16::<1>(0, a, b);
@@ -3720,7 +3901,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm_shrdi_epi16() {
+    const fn test_mm_shrdi_epi16() {
         let a = _mm_set1_epi16(2);
         let b = _mm_set1_epi16(8);
         let r = _mm_shrdi_epi16::<1>(a, b);
@@ -3729,7 +3910,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm_mask_shrdi_epi16() {
+    const fn test_mm_mask_shrdi_epi16() {
         let a = _mm_set1_epi16(2);
         let b = _mm_set1_epi16(8);
         let r = _mm_mask_shrdi_epi16::<1>(a, 0, a, b);
@@ -3740,7 +3921,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm_maskz_shrdi_epi16() {
+    const fn test_mm_maskz_shrdi_epi16() {
         let a = _mm_set1_epi16(2);
         let b = _mm_set1_epi16(8);
         let r = _mm_maskz_shrdi_epi16::<1>(0, a, b);
@@ -3751,7 +3932,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2")]
-    unsafe fn test_mm512_mask_expandloadu_epi16() {
+    fn test_mm512_mask_expandloadu_epi16() {
         let src = _mm512_set1_epi16(42);
         let a = &[
             1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
@@ -3759,7 +3940,7 @@ mod tests {
         ];
         let p = a.as_ptr();
         let m = 0b11101000_11001010_11110000_00001111;
-        let r = _mm512_mask_expandloadu_epi16(src, m, black_box(p));
+        let r = unsafe { _mm512_mask_expandloadu_epi16(src, m, black_box(p)) };
         let e = _mm512_set_epi16(
             16, 15, 14, 42, 13, 42, 42, 42, 12, 11, 42, 42, 10, 42, 9, 42, 8, 7, 6, 5, 42, 42, 42,
             42, 42, 42, 42, 42, 4, 3, 2, 1,
@@ -3768,14 +3949,14 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2")]
-    unsafe fn test_mm512_maskz_expandloadu_epi16() {
+    fn test_mm512_maskz_expandloadu_epi16() {
         let a = &[
             1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
             24, 25, 26, 27, 28, 29, 30, 31, 32,
         ];
         let p = a.as_ptr();
         let m = 0b11101000_11001010_11110000_00001111;
-        let r = _mm512_maskz_expandloadu_epi16(m, black_box(p));
+        let r = unsafe { _mm512_maskz_expandloadu_epi16(m, black_box(p)) };
         let e = _mm512_set_epi16(
             16, 15, 14, 0, 13, 0, 0, 0, 12, 11, 0, 0, 10, 0, 9, 0, 8, 7, 6, 5, 0, 0, 0, 0, 0, 0, 0,
             0, 4, 3, 2, 1,
@@ -3784,49 +3965,49 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm256_mask_expandloadu_epi16() {
+    fn test_mm256_mask_expandloadu_epi16() {
         let src = _mm256_set1_epi16(42);
         let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
         let p = a.as_ptr();
         let m = 0b11101000_11001010;
-        let r = _mm256_mask_expandloadu_epi16(src, m, black_box(p));
+        let r = unsafe { _mm256_mask_expandloadu_epi16(src, m, black_box(p)) };
         let e = _mm256_set_epi16(8, 7, 6, 42, 5, 42, 42, 42, 4, 3, 42, 42, 2, 42, 1, 42);
         assert_eq_m256i(r, e);
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm256_maskz_expandloadu_epi16() {
+    fn test_mm256_maskz_expandloadu_epi16() {
         let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
         let p = a.as_ptr();
         let m = 0b11101000_11001010;
-        let r = _mm256_maskz_expandloadu_epi16(m, black_box(p));
+        let r = unsafe { _mm256_maskz_expandloadu_epi16(m, black_box(p)) };
         let e = _mm256_set_epi16(8, 7, 6, 0, 5, 0, 0, 0, 4, 3, 0, 0, 2, 0, 1, 0);
         assert_eq_m256i(r, e);
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm_mask_expandloadu_epi16() {
+    fn test_mm_mask_expandloadu_epi16() {
         let src = _mm_set1_epi16(42);
         let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8];
         let p = a.as_ptr();
         let m = 0b11101000;
-        let r = _mm_mask_expandloadu_epi16(src, m, black_box(p));
+        let r = unsafe { _mm_mask_expandloadu_epi16(src, m, black_box(p)) };
         let e = _mm_set_epi16(4, 3, 2, 42, 1, 42, 42, 42);
         assert_eq_m128i(r, e);
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm_maskz_expandloadu_epi16() {
+    fn test_mm_maskz_expandloadu_epi16() {
         let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8];
         let p = a.as_ptr();
         let m = 0b11101000;
-        let r = _mm_maskz_expandloadu_epi16(m, black_box(p));
+        let r = unsafe { _mm_maskz_expandloadu_epi16(m, black_box(p)) };
         let e = _mm_set_epi16(4, 3, 2, 0, 1, 0, 0, 0);
         assert_eq_m128i(r, e);
     }
 
     #[simd_test(enable = "avx512vbmi2")]
-    unsafe fn test_mm512_mask_expandloadu_epi8() {
+    fn test_mm512_mask_expandloadu_epi8() {
         let src = _mm512_set1_epi8(42);
         let a = &[
             1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
@@ -3835,7 +4016,7 @@ mod tests {
         ];
         let p = a.as_ptr();
         let m = 0b11101000_11001010_11110000_00001111_11111111_00000000_10101010_01010101;
-        let r = _mm512_mask_expandloadu_epi8(src, m, black_box(p));
+        let r = unsafe { _mm512_mask_expandloadu_epi8(src, m, black_box(p)) };
         let e = _mm512_set_epi8(
             32, 31, 30, 42, 29, 42, 42, 42, 28, 27, 42, 42, 26, 42, 25, 42, 24, 23, 22, 21, 42, 42,
             42, 42, 42, 42, 42, 42, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 42, 42, 42, 42,
@@ -3845,7 +4026,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2")]
-    unsafe fn test_mm512_maskz_expandloadu_epi8() {
+    fn test_mm512_maskz_expandloadu_epi8() {
         let a = &[
             1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
             24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
@@ -3853,7 +4034,7 @@ mod tests {
         ];
         let p = a.as_ptr();
         let m = 0b11101000_11001010_11110000_00001111_11111111_00000000_10101010_01010101;
-        let r = _mm512_maskz_expandloadu_epi8(m, black_box(p));
+        let r = unsafe { _mm512_maskz_expandloadu_epi8(m, black_box(p)) };
         let e = _mm512_set_epi8(
             32, 31, 30, 0, 29, 0, 0, 0, 28, 27, 0, 0, 26, 0, 25, 0, 24, 23, 22, 21, 0, 0, 0, 0, 0,
             0, 0, 0, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0,
@@ -3863,7 +4044,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm256_mask_expandloadu_epi8() {
+    fn test_mm256_mask_expandloadu_epi8() {
         let src = _mm256_set1_epi8(42);
         let a = &[
             1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
@@ -3871,7 +4052,7 @@ mod tests {
         ];
         let p = a.as_ptr();
         let m = 0b11101000_11001010_11110000_00001111;
-        let r = _mm256_mask_expandloadu_epi8(src, m, black_box(p));
+        let r = unsafe { _mm256_mask_expandloadu_epi8(src, m, black_box(p)) };
         let e = _mm256_set_epi8(
             16, 15, 14, 42, 13, 42, 42, 42, 12, 11, 42, 42, 10, 42, 9, 42, 8, 7, 6, 5, 42, 42, 42,
             42, 42, 42, 42, 42, 4, 3, 2, 1,
@@ -3880,14 +4061,14 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm256_maskz_expandloadu_epi8() {
+    fn test_mm256_maskz_expandloadu_epi8() {
         let a = &[
             1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
             24, 25, 26, 27, 28, 29, 30, 31, 32,
         ];
         let p = a.as_ptr();
         let m = 0b11101000_11001010_11110000_00001111;
-        let r = _mm256_maskz_expandloadu_epi8(m, black_box(p));
+        let r = unsafe { _mm256_maskz_expandloadu_epi8(m, black_box(p)) };
         let e = _mm256_set_epi8(
             16, 15, 14, 0, 13, 0, 0, 0, 12, 11, 0, 0, 10, 0, 9, 0, 8, 7, 6, 5, 0, 0, 0, 0, 0, 0, 0,
             0, 4, 3, 2, 1,
@@ -3896,36 +4077,44 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm_mask_expandloadu_epi8() {
+    fn test_mm_mask_expandloadu_epi8() {
         let src = _mm_set1_epi8(42);
         let a = &[1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
         let p = a.as_ptr();
         let m = 0b11101000_11001010;
-        let r = _mm_mask_expandloadu_epi8(src, m, black_box(p));
+        let r = unsafe { _mm_mask_expandloadu_epi8(src, m, black_box(p)) };
         let e = _mm_set_epi8(8, 7, 6, 42, 5, 42, 42, 42, 4, 3, 42, 42, 2, 42, 1, 42);
         assert_eq_m128i(r, e);
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm_maskz_expandloadu_epi8() {
+    fn test_mm_maskz_expandloadu_epi8() {
         let a = &[1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
         let p = a.as_ptr();
         let m = 0b11101000_11001010;
-        let r = _mm_maskz_expandloadu_epi8(m, black_box(p));
+        let r = unsafe { _mm_maskz_expandloadu_epi8(m, black_box(p)) };
         let e = _mm_set_epi8(8, 7, 6, 0, 5, 0, 0, 0, 4, 3, 0, 0, 2, 0, 1, 0);
         assert_eq_m128i(r, e);
     }
 
     #[simd_test(enable = "avx512vbmi2")]
-    unsafe fn test_mm512_mask_compressstoreu_epi16() {
+    fn test_mm512_mask_compressstoreu_epi16() {
         let a = _mm512_set_epi16(
             32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11,
             10, 9, 8, 7, 6, 5, 4, 3, 2, 1,
         );
         let mut r = [0_i16; 32];
-        _mm512_mask_compressstoreu_epi16(r.as_mut_ptr(), 0, a);
+        unsafe {
+            _mm512_mask_compressstoreu_epi16(r.as_mut_ptr(), 0, a);
+        }
         assert_eq!(&r, &[0_i16; 32]);
-        _mm512_mask_compressstoreu_epi16(r.as_mut_ptr(), 0b11110000_11001010_11111111_00000000, a);
+        unsafe {
+            _mm512_mask_compressstoreu_epi16(
+                r.as_mut_ptr(),
+                0b11110000_11001010_11111111_00000000,
+                a,
+            );
+        }
         assert_eq!(
             &r,
             &[
@@ -3936,40 +4125,52 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm256_mask_compressstoreu_epi16() {
+    fn test_mm256_mask_compressstoreu_epi16() {
         let a = _mm256_set_epi16(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
         let mut r = [0_i16; 16];
-        _mm256_mask_compressstoreu_epi16(r.as_mut_ptr(), 0, a);
+        unsafe {
+            _mm256_mask_compressstoreu_epi16(r.as_mut_ptr(), 0, a);
+        }
         assert_eq!(&r, &[0_i16; 16]);
-        _mm256_mask_compressstoreu_epi16(r.as_mut_ptr(), 0b11110000_11001010, a);
+        unsafe {
+            _mm256_mask_compressstoreu_epi16(r.as_mut_ptr(), 0b11110000_11001010, a);
+        }
         assert_eq!(&r, &[2, 4, 7, 8, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 0, 0]);
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm_mask_compressstoreu_epi16() {
+    fn test_mm_mask_compressstoreu_epi16() {
         let a = _mm_set_epi16(8, 7, 6, 5, 4, 3, 2, 1);
         let mut r = [0_i16; 8];
-        _mm_mask_compressstoreu_epi16(r.as_mut_ptr(), 0, a);
+        unsafe {
+            _mm_mask_compressstoreu_epi16(r.as_mut_ptr(), 0, a);
+        }
         assert_eq!(&r, &[0_i16; 8]);
-        _mm_mask_compressstoreu_epi16(r.as_mut_ptr(), 0b11110000, a);
+        unsafe {
+            _mm_mask_compressstoreu_epi16(r.as_mut_ptr(), 0b11110000, a);
+        }
         assert_eq!(&r, &[5, 6, 7, 8, 0, 0, 0, 0]);
     }
 
     #[simd_test(enable = "avx512vbmi2")]
-    unsafe fn test_mm512_mask_compressstoreu_epi8() {
+    fn test_mm512_mask_compressstoreu_epi8() {
         let a = _mm512_set_epi8(
             64, 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49, 48, 47, 46, 45, 44, 43,
             42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21,
             20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1,
         );
         let mut r = [0_i8; 64];
-        _mm512_mask_compressstoreu_epi8(r.as_mut_ptr(), 0, a);
+        unsafe {
+            _mm512_mask_compressstoreu_epi8(r.as_mut_ptr(), 0, a);
+        }
         assert_eq!(&r, &[0_i8; 64]);
-        _mm512_mask_compressstoreu_epi8(
-            r.as_mut_ptr(),
-            0b11110000_11001010_11111111_00000000_10101010_01010101_11110000_00001111,
-            a,
-        );
+        unsafe {
+            _mm512_mask_compressstoreu_epi8(
+                r.as_mut_ptr(),
+                0b11110000_11001010_11111111_00000000_10101010_01010101_11110000_00001111,
+                a,
+            );
+        }
         assert_eq!(
             &r,
             &[
@@ -3981,15 +4182,23 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm256_mask_compressstoreu_epi8() {
+    fn test_mm256_mask_compressstoreu_epi8() {
         let a = _mm256_set_epi8(
             32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11,
             10, 9, 8, 7, 6, 5, 4, 3, 2, 1,
         );
         let mut r = [0_i8; 32];
-        _mm256_mask_compressstoreu_epi8(r.as_mut_ptr(), 0, a);
+        unsafe {
+            _mm256_mask_compressstoreu_epi8(r.as_mut_ptr(), 0, a);
+        }
         assert_eq!(&r, &[0_i8; 32]);
-        _mm256_mask_compressstoreu_epi8(r.as_mut_ptr(), 0b11110000_11001010_11111111_00000000, a);
+        unsafe {
+            _mm256_mask_compressstoreu_epi8(
+                r.as_mut_ptr(),
+                0b11110000_11001010_11111111_00000000,
+                a,
+            );
+        }
         assert_eq!(
             &r,
             &[
@@ -4000,12 +4209,16 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vbmi2,avx512vl")]
-    unsafe fn test_mm_mask_compressstoreu_epi8() {
+    fn test_mm_mask_compressstoreu_epi8() {
         let a = _mm_set_epi8(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
         let mut r = [0_i8; 16];
-        _mm_mask_compressstoreu_epi8(r.as_mut_ptr(), 0, a);
+        unsafe {
+            _mm_mask_compressstoreu_epi8(r.as_mut_ptr(), 0, a);
+        }
         assert_eq!(&r, &[0_i8; 16]);
-        _mm_mask_compressstoreu_epi8(r.as_mut_ptr(), 0b11110000_11001010, a);
+        unsafe {
+            _mm_mask_compressstoreu_epi8(r.as_mut_ptr(), 0b11110000_11001010, a);
+        }
         assert_eq!(&r, &[2, 4, 7, 8, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 0, 0]);
     }
 }
diff --git a/crates/core_arch/src/x86/avx512vnni.rs b/crates/core_arch/src/x86/avx512vnni.rs
index 93ea01cbb4..8cd8764f24 100644
--- a/crates/core_arch/src/x86/avx512vnni.rs
+++ b/crates/core_arch/src/x86/avx512vnni.rs
@@ -12,7 +12,7 @@ use stdarch_test::assert_instr;
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpdpwssd))]
 pub fn _mm512_dpwssd_epi32(src: __m512i, a: __m512i, b: __m512i) -> __m512i {
-    unsafe { transmute(vpdpwssd(src.as_i32x16(), a.as_i32x16(), b.as_i32x16())) }
+    unsafe { transmute(vpdpwssd(src.as_i32x16(), a.as_i16x32(), b.as_i16x32())) }
 }
 
 /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -51,7 +51,7 @@ pub fn _mm512_maskz_dpwssd_epi32(k: __mmask16, src: __m512i, a: __m512i, b: __m5
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpdpwssd))]
 pub fn _mm256_dpwssd_avx_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
-    unsafe { transmute(vpdpwssd256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
+    unsafe { transmute(vpdpwssd256(src.as_i32x8(), a.as_i16x16(), b.as_i16x16())) }
 }
 
 /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
@@ -62,7 +62,7 @@ pub fn _mm256_dpwssd_avx_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpdpwssd))]
 pub fn _mm256_dpwssd_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
-    unsafe { transmute(vpdpwssd256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
+    unsafe { transmute(vpdpwssd256(src.as_i32x8(), a.as_i16x16(), b.as_i16x16())) }
 }
 
 /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -101,7 +101,7 @@ pub fn _mm256_maskz_dpwssd_epi32(k: __mmask8, src: __m256i, a: __m256i, b: __m25
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpdpwssd))]
 pub fn _mm_dpwssd_avx_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
-    unsafe { transmute(vpdpwssd128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
+    unsafe { transmute(vpdpwssd128(src.as_i32x4(), a.as_i16x8(), b.as_i16x8())) }
 }
 
 /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
@@ -112,7 +112,7 @@ pub fn _mm_dpwssd_avx_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpdpwssd))]
 pub fn _mm_dpwssd_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
-    unsafe { transmute(vpdpwssd128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
+    unsafe { transmute(vpdpwssd128(src.as_i32x4(), a.as_i16x8(), b.as_i16x8())) }
 }
 
 /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -151,7 +151,7 @@ pub fn _mm_maskz_dpwssd_epi32(k: __mmask8, src: __m128i, a: __m128i, b: __m128i)
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpdpwssds))]
 pub fn _mm512_dpwssds_epi32(src: __m512i, a: __m512i, b: __m512i) -> __m512i {
-    unsafe { transmute(vpdpwssds(src.as_i32x16(), a.as_i32x16(), b.as_i32x16())) }
+    unsafe { transmute(vpdpwssds(src.as_i32x16(), a.as_i16x32(), b.as_i16x32())) }
 }
 
 /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -190,7 +190,7 @@ pub fn _mm512_maskz_dpwssds_epi32(k: __mmask16, src: __m512i, a: __m512i, b: __m
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpdpwssds))]
 pub fn _mm256_dpwssds_avx_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
-    unsafe { transmute(vpdpwssds256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
+    unsafe { transmute(vpdpwssds256(src.as_i32x8(), a.as_i16x16(), b.as_i16x16())) }
 }
 
 /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
@@ -201,7 +201,7 @@ pub fn _mm256_dpwssds_avx_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpdpwssds))]
 pub fn _mm256_dpwssds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
-    unsafe { transmute(vpdpwssds256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
+    unsafe { transmute(vpdpwssds256(src.as_i32x8(), a.as_i16x16(), b.as_i16x16())) }
 }
 
 /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -240,7 +240,7 @@ pub fn _mm256_maskz_dpwssds_epi32(k: __mmask8, src: __m256i, a: __m256i, b: __m2
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpdpwssds))]
 pub fn _mm_dpwssds_avx_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
-    unsafe { transmute(vpdpwssds128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
+    unsafe { transmute(vpdpwssds128(src.as_i32x4(), a.as_i16x8(), b.as_i16x8())) }
 }
 
 /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
@@ -251,7 +251,7 @@ pub fn _mm_dpwssds_avx_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpdpwssds))]
 pub fn _mm_dpwssds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
-    unsafe { transmute(vpdpwssds128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
+    unsafe { transmute(vpdpwssds128(src.as_i32x4(), a.as_i16x8(), b.as_i16x8())) }
 }
 
 /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding 16-bit integers in b, producing 2 intermediate signed 32-bit results. Sum these 2 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -290,7 +290,7 @@ pub fn _mm_maskz_dpwssds_epi32(k: __mmask8, src: __m128i, a: __m128i, b: __m128i
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpdpbusd))]
 pub fn _mm512_dpbusd_epi32(src: __m512i, a: __m512i, b: __m512i) -> __m512i {
-    unsafe { transmute(vpdpbusd(src.as_i32x16(), a.as_i32x16(), b.as_i32x16())) }
+    unsafe { transmute(vpdpbusd(src.as_i32x16(), a.as_u8x64(), b.as_i8x64())) }
 }
 
 /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -329,7 +329,7 @@ pub fn _mm512_maskz_dpbusd_epi32(k: __mmask16, src: __m512i, a: __m512i, b: __m5
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpdpbusd))]
 pub fn _mm256_dpbusd_avx_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
-    unsafe { transmute(vpdpbusd256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
+    unsafe { transmute(vpdpbusd256(src.as_i32x8(), a.as_u8x32(), b.as_i8x32())) }
 }
 
 /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
@@ -340,7 +340,7 @@ pub fn _mm256_dpbusd_avx_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpdpbusd))]
 pub fn _mm256_dpbusd_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
-    unsafe { transmute(vpdpbusd256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
+    unsafe { transmute(vpdpbusd256(src.as_i32x8(), a.as_u8x32(), b.as_i8x32())) }
 }
 
 /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -379,7 +379,7 @@ pub fn _mm256_maskz_dpbusd_epi32(k: __mmask8, src: __m256i, a: __m256i, b: __m25
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpdpbusd))]
 pub fn _mm_dpbusd_avx_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
-    unsafe { transmute(vpdpbusd128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
+    unsafe { transmute(vpdpbusd128(src.as_i32x4(), a.as_u8x16(), b.as_i8x16())) }
 }
 
 /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst.
@@ -390,7 +390,7 @@ pub fn _mm_dpbusd_avx_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpdpbusd))]
 pub fn _mm_dpbusd_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
-    unsafe { transmute(vpdpbusd128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
+    unsafe { transmute(vpdpbusd128(src.as_i32x4(), a.as_u8x16(), b.as_i8x16())) }
 }
 
 /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -429,7 +429,7 @@ pub fn _mm_maskz_dpbusd_epi32(k: __mmask8, src: __m128i, a: __m128i, b: __m128i)
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpdpbusds))]
 pub fn _mm512_dpbusds_epi32(src: __m512i, a: __m512i, b: __m512i) -> __m512i {
-    unsafe { transmute(vpdpbusds(src.as_i32x16(), a.as_i32x16(), b.as_i32x16())) }
+    unsafe { transmute(vpdpbusds(src.as_i32x16(), a.as_u8x64(), b.as_i8x64())) }
 }
 
 /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -468,7 +468,7 @@ pub fn _mm512_maskz_dpbusds_epi32(k: __mmask16, src: __m512i, a: __m512i, b: __m
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpdpbusds))]
 pub fn _mm256_dpbusds_avx_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
-    unsafe { transmute(vpdpbusds256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
+    unsafe { transmute(vpdpbusds256(src.as_i32x8(), a.as_u8x32(), b.as_i8x32())) }
 }
 
 /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
@@ -479,7 +479,7 @@ pub fn _mm256_dpbusds_avx_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpdpbusds))]
 pub fn _mm256_dpbusds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
-    unsafe { transmute(vpdpbusds256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
+    unsafe { transmute(vpdpbusds256(src.as_i32x8(), a.as_u8x32(), b.as_i8x32())) }
 }
 
 /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -518,7 +518,7 @@ pub fn _mm256_maskz_dpbusds_epi32(k: __mmask8, src: __m256i, a: __m256i, b: __m2
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpdpbusds))]
 pub fn _mm_dpbusds_avx_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
-    unsafe { transmute(vpdpbusds128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
+    unsafe { transmute(vpdpbusds128(src.as_i32x4(), a.as_u8x16(), b.as_i8x16())) }
 }
 
 /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst.
@@ -529,7 +529,7 @@ pub fn _mm_dpbusds_avx_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpdpbusds))]
 pub fn _mm_dpbusds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
-    unsafe { transmute(vpdpbusds128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
+    unsafe { transmute(vpdpbusds128(src.as_i32x4(), a.as_u8x16(), b.as_i8x16())) }
 }
 
 /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding signed 8-bit integers in b, producing 4 intermediate signed 16-bit results. Sum these 4 results with the corresponding 32-bit integer in src using signed saturation, and store the packed 32-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -570,7 +570,7 @@ pub fn _mm_maskz_dpbusds_epi32(k: __mmask8, src: __m128i, a: __m128i, b: __m128i
 #[cfg_attr(test, assert_instr(vpdpbssd))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_dpbssd_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
-    unsafe { transmute(vpdpbssd_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
+    unsafe { transmute(vpdpbssd_128(src.as_i32x4(), a.as_i8x16(), b.as_i8x16())) }
 }
 
 /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding signed 8-bit
@@ -583,7 +583,7 @@ pub fn _mm_dpbssd_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
 #[cfg_attr(test, assert_instr(vpdpbssd))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_dpbssd_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
-    unsafe { transmute(vpdpbssd_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
+    unsafe { transmute(vpdpbssd_256(src.as_i32x8(), a.as_i8x32(), b.as_i8x32())) }
 }
 
 /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding signed 8-bit
@@ -596,7 +596,7 @@ pub fn _mm256_dpbssd_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
 #[cfg_attr(test, assert_instr(vpdpbssds))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_dpbssds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
-    unsafe { transmute(vpdpbssds_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
+    unsafe { transmute(vpdpbssds_128(src.as_i32x4(), a.as_i8x16(), b.as_i8x16())) }
 }
 
 /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding signed 8-bit
@@ -609,7 +609,7 @@ pub fn _mm_dpbssds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
 #[cfg_attr(test, assert_instr(vpdpbssds))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_dpbssds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
-    unsafe { transmute(vpdpbssds_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
+    unsafe { transmute(vpdpbssds_256(src.as_i32x8(), a.as_i8x32(), b.as_i8x32())) }
 }
 
 /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding unsigned 8-bit
@@ -622,7 +622,7 @@ pub fn _mm256_dpbssds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
 #[cfg_attr(test, assert_instr(vpdpbsud))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_dpbsud_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
-    unsafe { transmute(vpdpbsud_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
+    unsafe { transmute(vpdpbsud_128(src.as_i32x4(), a.as_i8x16(), b.as_u8x16())) }
 }
 
 /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding unsigned 8-bit
@@ -635,7 +635,7 @@ pub fn _mm_dpbsud_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
 #[cfg_attr(test, assert_instr(vpdpbsud))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_dpbsud_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
-    unsafe { transmute(vpdpbsud_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
+    unsafe { transmute(vpdpbsud_256(src.as_i32x8(), a.as_i8x32(), b.as_u8x32())) }
 }
 
 /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding unsigned 8-bit
@@ -648,7 +648,7 @@ pub fn _mm256_dpbsud_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
 #[cfg_attr(test, assert_instr(vpdpbsuds))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_dpbsuds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
-    unsafe { transmute(vpdpbsuds_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
+    unsafe { transmute(vpdpbsuds_128(src.as_i32x4(), a.as_i8x16(), b.as_u8x16())) }
 }
 
 /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in a with corresponding unsigned 8-bit
@@ -661,7 +661,7 @@ pub fn _mm_dpbsuds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
 #[cfg_attr(test, assert_instr(vpdpbsuds))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_dpbsuds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
-    unsafe { transmute(vpdpbsuds_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
+    unsafe { transmute(vpdpbsuds_256(src.as_i32x8(), a.as_i8x32(), b.as_u8x32())) }
 }
 
 /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding unsigned 8-bit
@@ -674,7 +674,7 @@ pub fn _mm256_dpbsuds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
 #[cfg_attr(test, assert_instr(vpdpbuud))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_dpbuud_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
-    unsafe { transmute(vpdpbuud_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
+    unsafe { transmute(vpdpbuud_128(src.as_i32x4(), a.as_u8x16(), b.as_u8x16())) }
 }
 
 /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding unsigned 8-bit
@@ -687,7 +687,7 @@ pub fn _mm_dpbuud_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
 #[cfg_attr(test, assert_instr(vpdpbuud))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_dpbuud_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
-    unsafe { transmute(vpdpbuud_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
+    unsafe { transmute(vpdpbuud_256(src.as_i32x8(), a.as_u8x32(), b.as_u8x32())) }
 }
 
 /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding unsigned 8-bit
@@ -700,7 +700,7 @@ pub fn _mm256_dpbuud_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
 #[cfg_attr(test, assert_instr(vpdpbuuds))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_dpbuuds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
-    unsafe { transmute(vpdpbuuds_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
+    unsafe { transmute(vpdpbuuds_128(src.as_i32x4(), a.as_u8x16(), b.as_u8x16())) }
 }
 
 /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in a with corresponding unsigned 8-bit
@@ -713,7 +713,7 @@ pub fn _mm_dpbuuds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
 #[cfg_attr(test, assert_instr(vpdpbuuds))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_dpbuuds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
-    unsafe { transmute(vpdpbuuds_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
+    unsafe { transmute(vpdpbuuds_256(src.as_i32x8(), a.as_u8x32(), b.as_u8x32())) }
 }
 
 /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding unsigned 16-bit
@@ -726,7 +726,7 @@ pub fn _mm256_dpbuuds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
 #[cfg_attr(test, assert_instr(vpdpwsud))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_dpwsud_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
-    unsafe { transmute(vpdpwsud_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
+    unsafe { transmute(vpdpwsud_128(src.as_i32x4(), a.as_i16x8(), b.as_u16x8())) }
 }
 
 /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding unsigned 16-bit
@@ -739,7 +739,7 @@ pub fn _mm_dpwsud_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
 #[cfg_attr(test, assert_instr(vpdpwsud))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_dpwsud_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
-    unsafe { transmute(vpdpwsud_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
+    unsafe { transmute(vpdpwsud_256(src.as_i32x8(), a.as_i16x16(), b.as_u16x16())) }
 }
 
 /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding unsigned 16-bit
@@ -752,7 +752,7 @@ pub fn _mm256_dpwsud_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
 #[cfg_attr(test, assert_instr(vpdpwsuds))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_dpwsuds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
-    unsafe { transmute(vpdpwsuds_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
+    unsafe { transmute(vpdpwsuds_128(src.as_i32x4(), a.as_i16x8(), b.as_u16x8())) }
 }
 
 /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in a with corresponding unsigned 16-bit
@@ -765,7 +765,7 @@ pub fn _mm_dpwsuds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
 #[cfg_attr(test, assert_instr(vpdpwsuds))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_dpwsuds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
-    unsafe { transmute(vpdpwsuds_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
+    unsafe { transmute(vpdpwsuds_256(src.as_i32x8(), a.as_i16x16(), b.as_u16x16())) }
 }
 
 /// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding signed 16-bit
@@ -778,7 +778,7 @@ pub fn _mm256_dpwsuds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
 #[cfg_attr(test, assert_instr(vpdpwusd))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_dpwusd_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
-    unsafe { transmute(vpdpwusd_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
+    unsafe { transmute(vpdpwusd_128(src.as_i32x4(), a.as_u16x8(), b.as_i16x8())) }
 }
 
 /// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding signed 16-bit
@@ -791,7 +791,7 @@ pub fn _mm_dpwusd_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
 #[cfg_attr(test, assert_instr(vpdpwusd))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_dpwusd_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
-    unsafe { transmute(vpdpwusd_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
+    unsafe { transmute(vpdpwusd_256(src.as_i32x8(), a.as_u16x16(), b.as_i16x16())) }
 }
 
 /// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding signed 16-bit
@@ -804,7 +804,7 @@ pub fn _mm256_dpwusd_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
 #[cfg_attr(test, assert_instr(vpdpwusds))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_dpwusds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
-    unsafe { transmute(vpdpwusds_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
+    unsafe { transmute(vpdpwusds_128(src.as_i32x4(), a.as_u16x8(), b.as_i16x8())) }
 }
 
 /// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding signed 16-bit
@@ -817,7 +817,7 @@ pub fn _mm_dpwusds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
 #[cfg_attr(test, assert_instr(vpdpwusds))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_dpwusds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
-    unsafe { transmute(vpdpwusds_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
+    unsafe { transmute(vpdpwusds_256(src.as_i32x8(), a.as_u16x16(), b.as_i16x16())) }
 }
 
 /// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding unsigned 16-bit
@@ -830,7 +830,7 @@ pub fn _mm256_dpwusds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
 #[cfg_attr(test, assert_instr(vpdpwuud))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_dpwuud_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
-    unsafe { transmute(vpdpwuud_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
+    unsafe { transmute(vpdpwuud_128(src.as_i32x4(), a.as_u16x8(), b.as_u16x8())) }
 }
 
 /// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding unsigned 16-bit
@@ -843,7 +843,7 @@ pub fn _mm_dpwuud_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
 #[cfg_attr(test, assert_instr(vpdpwuud))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_dpwuud_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
-    unsafe { transmute(vpdpwuud_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
+    unsafe { transmute(vpdpwuud_256(src.as_i32x8(), a.as_u16x16(), b.as_u16x16())) }
 }
 
 /// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding unsigned 16-bit
@@ -856,7 +856,7 @@ pub fn _mm256_dpwuud_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
 #[cfg_attr(test, assert_instr(vpdpwuuds))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_dpwuuds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
-    unsafe { transmute(vpdpwuuds_128(src.as_i32x4(), a.as_i32x4(), b.as_i32x4())) }
+    unsafe { transmute(vpdpwuuds_128(src.as_i32x4(), a.as_u16x8(), b.as_u16x8())) }
 }
 
 /// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in a with corresponding unsigned 16-bit
@@ -869,98 +869,98 @@ pub fn _mm_dpwuuds_epi32(src: __m128i, a: __m128i, b: __m128i) -> __m128i {
 #[cfg_attr(test, assert_instr(vpdpwuuds))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_dpwuuds_epi32(src: __m256i, a: __m256i, b: __m256i) -> __m256i {
-    unsafe { transmute(vpdpwuuds_256(src.as_i32x8(), a.as_i32x8(), b.as_i32x8())) }
+    unsafe { transmute(vpdpwuuds_256(src.as_i32x8(), a.as_u16x16(), b.as_u16x16())) }
 }
 
 #[allow(improper_ctypes)]
 unsafe extern "C" {
     #[link_name = "llvm.x86.avx512.vpdpwssd.512"]
-    fn vpdpwssd(src: i32x16, a: i32x16, b: i32x16) -> i32x16;
+    fn vpdpwssd(src: i32x16, a: i16x32, b: i16x32) -> i32x16;
     #[link_name = "llvm.x86.avx512.vpdpwssd.256"]
-    fn vpdpwssd256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
+    fn vpdpwssd256(src: i32x8, a: i16x16, b: i16x16) -> i32x8;
     #[link_name = "llvm.x86.avx512.vpdpwssd.128"]
-    fn vpdpwssd128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
+    fn vpdpwssd128(src: i32x4, a: i16x8, b: i16x8) -> i32x4;
 
     #[link_name = "llvm.x86.avx512.vpdpwssds.512"]
-    fn vpdpwssds(src: i32x16, a: i32x16, b: i32x16) -> i32x16;
+    fn vpdpwssds(src: i32x16, a: i16x32, b: i16x32) -> i32x16;
     #[link_name = "llvm.x86.avx512.vpdpwssds.256"]
-    fn vpdpwssds256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
+    fn vpdpwssds256(src: i32x8, a: i16x16, b: i16x16) -> i32x8;
     #[link_name = "llvm.x86.avx512.vpdpwssds.128"]
-    fn vpdpwssds128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
+    fn vpdpwssds128(src: i32x4, a: i16x8, b: i16x8) -> i32x4;
 
     #[link_name = "llvm.x86.avx512.vpdpbusd.512"]
-    fn vpdpbusd(src: i32x16, a: i32x16, b: i32x16) -> i32x16;
+    fn vpdpbusd(src: i32x16, a: u8x64, b: i8x64) -> i32x16;
     #[link_name = "llvm.x86.avx512.vpdpbusd.256"]
-    fn vpdpbusd256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
+    fn vpdpbusd256(src: i32x8, a: u8x32, b: i8x32) -> i32x8;
     #[link_name = "llvm.x86.avx512.vpdpbusd.128"]
-    fn vpdpbusd128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
+    fn vpdpbusd128(src: i32x4, a: u8x16, b: i8x16) -> i32x4;
 
     #[link_name = "llvm.x86.avx512.vpdpbusds.512"]
-    fn vpdpbusds(src: i32x16, a: i32x16, b: i32x16) -> i32x16;
+    fn vpdpbusds(src: i32x16, a: u8x64, b: i8x64) -> i32x16;
     #[link_name = "llvm.x86.avx512.vpdpbusds.256"]
-    fn vpdpbusds256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
+    fn vpdpbusds256(src: i32x8, a: u8x32, b: i8x32) -> i32x8;
     #[link_name = "llvm.x86.avx512.vpdpbusds.128"]
-    fn vpdpbusds128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
+    fn vpdpbusds128(src: i32x4, a: u8x16, b: i8x16) -> i32x4;
 
     #[link_name = "llvm.x86.avx2.vpdpbssd.128"]
-    fn vpdpbssd_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
+    fn vpdpbssd_128(src: i32x4, a: i8x16, b: i8x16) -> i32x4;
     #[link_name = "llvm.x86.avx2.vpdpbssd.256"]
-    fn vpdpbssd_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
+    fn vpdpbssd_256(src: i32x8, a: i8x32, b: i8x32) -> i32x8;
 
     #[link_name = "llvm.x86.avx2.vpdpbssds.128"]
-    fn vpdpbssds_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
+    fn vpdpbssds_128(src: i32x4, a: i8x16, b: i8x16) -> i32x4;
     #[link_name = "llvm.x86.avx2.vpdpbssds.256"]
-    fn vpdpbssds_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
+    fn vpdpbssds_256(src: i32x8, a: i8x32, b: i8x32) -> i32x8;
 
     #[link_name = "llvm.x86.avx2.vpdpbsud.128"]
-    fn vpdpbsud_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
+    fn vpdpbsud_128(src: i32x4, a: i8x16, b: u8x16) -> i32x4;
     #[link_name = "llvm.x86.avx2.vpdpbsud.256"]
-    fn vpdpbsud_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
+    fn vpdpbsud_256(src: i32x8, a: i8x32, b: u8x32) -> i32x8;
 
     #[link_name = "llvm.x86.avx2.vpdpbsuds.128"]
-    fn vpdpbsuds_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
+    fn vpdpbsuds_128(src: i32x4, a: i8x16, b: u8x16) -> i32x4;
     #[link_name = "llvm.x86.avx2.vpdpbsuds.256"]
-    fn vpdpbsuds_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
+    fn vpdpbsuds_256(src: i32x8, a: i8x32, b: u8x32) -> i32x8;
 
     #[link_name = "llvm.x86.avx2.vpdpbuud.128"]
-    fn vpdpbuud_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
+    fn vpdpbuud_128(src: i32x4, a: u8x16, b: u8x16) -> i32x4;
     #[link_name = "llvm.x86.avx2.vpdpbuud.256"]
-    fn vpdpbuud_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
+    fn vpdpbuud_256(src: i32x8, a: u8x32, b: u8x32) -> i32x8;
 
     #[link_name = "llvm.x86.avx2.vpdpbuuds.128"]
-    fn vpdpbuuds_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
+    fn vpdpbuuds_128(src: i32x4, a: u8x16, b: u8x16) -> i32x4;
     #[link_name = "llvm.x86.avx2.vpdpbuuds.256"]
-    fn vpdpbuuds_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
+    fn vpdpbuuds_256(src: i32x8, a: u8x32, b: u8x32) -> i32x8;
 
     #[link_name = "llvm.x86.avx2.vpdpwsud.128"]
-    fn vpdpwsud_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
+    fn vpdpwsud_128(src: i32x4, a: i16x8, b: u16x8) -> i32x4;
     #[link_name = "llvm.x86.avx2.vpdpwsud.256"]
-    fn vpdpwsud_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
+    fn vpdpwsud_256(src: i32x8, a: i16x16, b: u16x16) -> i32x8;
 
     #[link_name = "llvm.x86.avx2.vpdpwsuds.128"]
-    fn vpdpwsuds_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
+    fn vpdpwsuds_128(src: i32x4, a: i16x8, b: u16x8) -> i32x4;
     #[link_name = "llvm.x86.avx2.vpdpwsuds.256"]
-    fn vpdpwsuds_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
+    fn vpdpwsuds_256(src: i32x8, a: i16x16, b: u16x16) -> i32x8;
 
     #[link_name = "llvm.x86.avx2.vpdpwusd.128"]
-    fn vpdpwusd_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
+    fn vpdpwusd_128(src: i32x4, a: u16x8, b: i16x8) -> i32x4;
     #[link_name = "llvm.x86.avx2.vpdpwusd.256"]
-    fn vpdpwusd_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
+    fn vpdpwusd_256(src: i32x8, a: u16x16, b: i16x16) -> i32x8;
 
     #[link_name = "llvm.x86.avx2.vpdpwusds.128"]
-    fn vpdpwusds_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
+    fn vpdpwusds_128(src: i32x4, a: u16x8, b: i16x8) -> i32x4;
     #[link_name = "llvm.x86.avx2.vpdpwusds.256"]
-    fn vpdpwusds_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
+    fn vpdpwusds_256(src: i32x8, a: u16x16, b: i16x16) -> i32x8;
 
     #[link_name = "llvm.x86.avx2.vpdpwuud.128"]
-    fn vpdpwuud_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
+    fn vpdpwuud_128(src: i32x4, a: u16x8, b: u16x8) -> i32x4;
     #[link_name = "llvm.x86.avx2.vpdpwuud.256"]
-    fn vpdpwuud_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
+    fn vpdpwuud_256(src: i32x8, a: u16x16, b: u16x16) -> i32x8;
 
     #[link_name = "llvm.x86.avx2.vpdpwuuds.128"]
-    fn vpdpwuuds_128(src: i32x4, a: i32x4, b: i32x4) -> i32x4;
+    fn vpdpwuuds_128(src: i32x4, a: u16x8, b: u16x8) -> i32x4;
     #[link_name = "llvm.x86.avx2.vpdpwuuds.256"]
-    fn vpdpwuuds_256(src: i32x8, a: i32x8, b: i32x8) -> i32x8;
+    fn vpdpwuuds_256(src: i32x8, a: u16x16, b: u16x16) -> i32x8;
 }
 
 #[cfg(test)]
@@ -970,7 +970,7 @@ mod tests {
     use stdarch_test::simd_test;
 
     #[simd_test(enable = "avx512vnni")]
-    unsafe fn test_mm512_dpwssd_epi32() {
+    fn test_mm512_dpwssd_epi32() {
         let src = _mm512_set1_epi32(1);
         let a = _mm512_set1_epi32(1 << 16 | 1 << 0);
         let b = _mm512_set1_epi32(1 << 16 | 1 << 0);
@@ -980,7 +980,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vnni")]
-    unsafe fn test_mm512_mask_dpwssd_epi32() {
+    fn test_mm512_mask_dpwssd_epi32() {
         let src = _mm512_set1_epi32(1);
         let a = _mm512_set1_epi32(1 << 16 | 1 << 0);
         let b = _mm512_set1_epi32(1 << 16 | 1 << 0);
@@ -992,7 +992,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vnni")]
-    unsafe fn test_mm512_maskz_dpwssd_epi32() {
+    fn test_mm512_maskz_dpwssd_epi32() {
         let src = _mm512_set1_epi32(1);
         let a = _mm512_set1_epi32(1 << 16 | 1 << 0);
         let b = _mm512_set1_epi32(1 << 16 | 1 << 0);
@@ -1004,7 +1004,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avxvnni")]
-    unsafe fn test_mm256_dpwssd_avx_epi32() {
+    fn test_mm256_dpwssd_avx_epi32() {
         let src = _mm256_set1_epi32(1);
         let a = _mm256_set1_epi32(1 << 16 | 1 << 0);
         let b = _mm256_set1_epi32(1 << 16 | 1 << 0);
@@ -1014,7 +1014,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vnni,avx512vl")]
-    unsafe fn test_mm256_dpwssd_epi32() {
+    fn test_mm256_dpwssd_epi32() {
         let src = _mm256_set1_epi32(1);
         let a = _mm256_set1_epi32(1 << 16 | 1 << 0);
         let b = _mm256_set1_epi32(1 << 16 | 1 << 0);
@@ -1024,7 +1024,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vnni,avx512vl")]
-    unsafe fn test_mm256_mask_dpwssd_epi32() {
+    fn test_mm256_mask_dpwssd_epi32() {
         let src = _mm256_set1_epi32(1);
         let a = _mm256_set1_epi32(1 << 16 | 1 << 0);
         let b = _mm256_set1_epi32(1 << 16 | 1 << 0);
@@ -1036,7 +1036,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vnni,avx512vl")]
-    unsafe fn test_mm256_maskz_dpwssd_epi32() {
+    fn test_mm256_maskz_dpwssd_epi32() {
         let src = _mm256_set1_epi32(1);
         let a = _mm256_set1_epi32(1 << 16 | 1 << 0);
         let b = _mm256_set1_epi32(1 << 16 | 1 << 0);
@@ -1048,7 +1048,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avxvnni")]
-    unsafe fn test_mm_dpwssd_avx_epi32() {
+    fn test_mm_dpwssd_avx_epi32() {
         let src = _mm_set1_epi32(1);
         let a = _mm_set1_epi32(1 << 16 | 1 << 0);
         let b = _mm_set1_epi32(1 << 16 | 1 << 0);
@@ -1058,7 +1058,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vnni,avx512vl")]
-    unsafe fn test_mm_dpwssd_epi32() {
+    fn test_mm_dpwssd_epi32() {
         let src = _mm_set1_epi32(1);
         let a = _mm_set1_epi32(1 << 16 | 1 << 0);
         let b = _mm_set1_epi32(1 << 16 | 1 << 0);
@@ -1068,7 +1068,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vnni,avx512vl")]
-    unsafe fn test_mm_mask_dpwssd_epi32() {
+    fn test_mm_mask_dpwssd_epi32() {
         let src = _mm_set1_epi32(1);
         let a = _mm_set1_epi32(1 << 16 | 1 << 0);
         let b = _mm_set1_epi32(1 << 16 | 1 << 0);
@@ -1080,7 +1080,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vnni,avx512vl")]
-    unsafe fn test_mm_maskz_dpwssd_epi32() {
+    fn test_mm_maskz_dpwssd_epi32() {
         let src = _mm_set1_epi32(1);
         let a = _mm_set1_epi32(1 << 16 | 1 << 0);
         let b = _mm_set1_epi32(1 << 16 | 1 << 0);
@@ -1092,7 +1092,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vnni")]
-    unsafe fn test_mm512_dpwssds_epi32() {
+    fn test_mm512_dpwssds_epi32() {
         let src = _mm512_set1_epi32(1);
         let a = _mm512_set1_epi32(1 << 16 | 1 << 0);
         let b = _mm512_set1_epi32(1 << 16 | 1 << 0);
@@ -1102,7 +1102,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vnni")]
-    unsafe fn test_mm512_mask_dpwssds_epi32() {
+    fn test_mm512_mask_dpwssds_epi32() {
         let src = _mm512_set1_epi32(1);
         let a = _mm512_set1_epi32(1 << 16 | 1 << 0);
         let b = _mm512_set1_epi32(1 << 16 | 1 << 0);
@@ -1114,7 +1114,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vnni")]
-    unsafe fn test_mm512_maskz_dpwssds_epi32() {
+    fn test_mm512_maskz_dpwssds_epi32() {
         let src = _mm512_set1_epi32(1);
         let a = _mm512_set1_epi32(1 << 16 | 1 << 0);
         let b = _mm512_set1_epi32(1 << 16 | 1 << 0);
@@ -1126,7 +1126,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avxvnni")]
-    unsafe fn test_mm256_dpwssds_avx_epi32() {
+    fn test_mm256_dpwssds_avx_epi32() {
         let src = _mm256_set1_epi32(1);
         let a = _mm256_set1_epi32(1 << 16 | 1 << 0);
         let b = _mm256_set1_epi32(1 << 16 | 1 << 0);
@@ -1136,7 +1136,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vnni,avx512vl")]
-    unsafe fn test_mm256_dpwssds_epi32() {
+    fn test_mm256_dpwssds_epi32() {
         let src = _mm256_set1_epi32(1);
         let a = _mm256_set1_epi32(1 << 16 | 1 << 0);
         let b = _mm256_set1_epi32(1 << 16 | 1 << 0);
@@ -1146,7 +1146,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vnni,avx512vl")]
-    unsafe fn test_mm256_mask_dpwssds_epi32() {
+    fn test_mm256_mask_dpwssds_epi32() {
         let src = _mm256_set1_epi32(1);
         let a = _mm256_set1_epi32(1 << 16 | 1 << 0);
         let b = _mm256_set1_epi32(1 << 16 | 1 << 0);
@@ -1158,7 +1158,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vnni,avx512vl")]
-    unsafe fn test_mm256_maskz_dpwssds_epi32() {
+    fn test_mm256_maskz_dpwssds_epi32() {
         let src = _mm256_set1_epi32(1);
         let a = _mm256_set1_epi32(1 << 16 | 1 << 0);
         let b = _mm256_set1_epi32(1 << 16 | 1 << 0);
@@ -1170,7 +1170,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avxvnni")]
-    unsafe fn test_mm_dpwssds_avx_epi32() {
+    fn test_mm_dpwssds_avx_epi32() {
         let src = _mm_set1_epi32(1);
         let a = _mm_set1_epi32(1 << 16 | 1 << 0);
         let b = _mm_set1_epi32(1 << 16 | 1 << 0);
@@ -1180,7 +1180,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vnni,avx512vl")]
-    unsafe fn test_mm_dpwssds_epi32() {
+    fn test_mm_dpwssds_epi32() {
         let src = _mm_set1_epi32(1);
         let a = _mm_set1_epi32(1 << 16 | 1 << 0);
         let b = _mm_set1_epi32(1 << 16 | 1 << 0);
@@ -1190,7 +1190,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vnni,avx512vl")]
-    unsafe fn test_mm_mask_dpwssds_epi32() {
+    fn test_mm_mask_dpwssds_epi32() {
         let src = _mm_set1_epi32(1);
         let a = _mm_set1_epi32(1 << 16 | 1 << 0);
         let b = _mm_set1_epi32(1 << 16 | 1 << 0);
@@ -1202,7 +1202,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vnni,avx512vl")]
-    unsafe fn test_mm_maskz_dpwssds_epi32() {
+    fn test_mm_maskz_dpwssds_epi32() {
         let src = _mm_set1_epi32(1);
         let a = _mm_set1_epi32(1 << 16 | 1 << 0);
         let b = _mm_set1_epi32(1 << 16 | 1 << 0);
@@ -1214,7 +1214,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vnni")]
-    unsafe fn test_mm512_dpbusd_epi32() {
+    fn test_mm512_dpbusd_epi32() {
         let src = _mm512_set1_epi32(1);
         let a = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
         let b = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
@@ -1224,7 +1224,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vnni")]
-    unsafe fn test_mm512_mask_dpbusd_epi32() {
+    fn test_mm512_mask_dpbusd_epi32() {
         let src = _mm512_set1_epi32(1);
         let a = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
         let b = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
@@ -1236,7 +1236,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vnni")]
-    unsafe fn test_mm512_maskz_dpbusd_epi32() {
+    fn test_mm512_maskz_dpbusd_epi32() {
         let src = _mm512_set1_epi32(1);
         let a = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
         let b = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
@@ -1248,7 +1248,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avxvnni")]
-    unsafe fn test_mm256_dpbusd_avx_epi32() {
+    fn test_mm256_dpbusd_avx_epi32() {
         let src = _mm256_set1_epi32(1);
         let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
         let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
@@ -1258,7 +1258,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vnni,avx512vl")]
-    unsafe fn test_mm256_dpbusd_epi32() {
+    fn test_mm256_dpbusd_epi32() {
         let src = _mm256_set1_epi32(1);
         let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
         let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
@@ -1268,7 +1268,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vnni,avx512vl")]
-    unsafe fn test_mm256_mask_dpbusd_epi32() {
+    fn test_mm256_mask_dpbusd_epi32() {
         let src = _mm256_set1_epi32(1);
         let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
         let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
@@ -1280,7 +1280,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vnni,avx512vl")]
-    unsafe fn test_mm256_maskz_dpbusd_epi32() {
+    fn test_mm256_maskz_dpbusd_epi32() {
         let src = _mm256_set1_epi32(1);
         let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
         let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
@@ -1292,7 +1292,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avxvnni")]
-    unsafe fn test_mm_dpbusd_avx_epi32() {
+    fn test_mm_dpbusd_avx_epi32() {
         let src = _mm_set1_epi32(1);
         let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
         let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
@@ -1302,7 +1302,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vnni,avx512vl")]
-    unsafe fn test_mm_dpbusd_epi32() {
+    fn test_mm_dpbusd_epi32() {
         let src = _mm_set1_epi32(1);
         let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
         let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
@@ -1312,7 +1312,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vnni,avx512vl")]
-    unsafe fn test_mm_mask_dpbusd_epi32() {
+    fn test_mm_mask_dpbusd_epi32() {
         let src = _mm_set1_epi32(1);
         let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
         let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
@@ -1324,7 +1324,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vnni,avx512vl")]
-    unsafe fn test_mm_maskz_dpbusd_epi32() {
+    fn test_mm_maskz_dpbusd_epi32() {
         let src = _mm_set1_epi32(1);
         let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
         let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
@@ -1336,7 +1336,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vnni")]
-    unsafe fn test_mm512_dpbusds_epi32() {
+    fn test_mm512_dpbusds_epi32() {
         let src = _mm512_set1_epi32(1);
         let a = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
         let b = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
@@ -1346,7 +1346,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vnni")]
-    unsafe fn test_mm512_mask_dpbusds_epi32() {
+    fn test_mm512_mask_dpbusds_epi32() {
         let src = _mm512_set1_epi32(1);
         let a = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
         let b = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
@@ -1358,7 +1358,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vnni")]
-    unsafe fn test_mm512_maskz_dpbusds_epi32() {
+    fn test_mm512_maskz_dpbusds_epi32() {
         let src = _mm512_set1_epi32(1);
         let a = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
         let b = _mm512_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
@@ -1370,7 +1370,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avxvnni")]
-    unsafe fn test_mm256_dpbusds_avx_epi32() {
+    fn test_mm256_dpbusds_avx_epi32() {
         let src = _mm256_set1_epi32(1);
         let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
         let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
@@ -1380,7 +1380,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vnni,avx512vl")]
-    unsafe fn test_mm256_dpbusds_epi32() {
+    fn test_mm256_dpbusds_epi32() {
         let src = _mm256_set1_epi32(1);
         let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
         let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
@@ -1390,7 +1390,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vnni,avx512vl")]
-    unsafe fn test_mm256_mask_dpbusds_epi32() {
+    fn test_mm256_mask_dpbusds_epi32() {
         let src = _mm256_set1_epi32(1);
         let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
         let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
@@ -1402,7 +1402,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vnni,avx512vl")]
-    unsafe fn test_mm256_maskz_dpbusds_epi32() {
+    fn test_mm256_maskz_dpbusds_epi32() {
         let src = _mm256_set1_epi32(1);
         let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
         let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
@@ -1414,7 +1414,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avxvnni")]
-    unsafe fn test_mm_dpbusds_avx_epi32() {
+    fn test_mm_dpbusds_avx_epi32() {
         let src = _mm_set1_epi32(1);
         let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
         let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
@@ -1424,7 +1424,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vnni,avx512vl")]
-    unsafe fn test_mm_dpbusds_epi32() {
+    fn test_mm_dpbusds_epi32() {
         let src = _mm_set1_epi32(1);
         let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
         let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
@@ -1434,7 +1434,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vnni,avx512vl")]
-    unsafe fn test_mm_mask_dpbusds_epi32() {
+    fn test_mm_mask_dpbusds_epi32() {
         let src = _mm_set1_epi32(1);
         let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
         let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
@@ -1446,7 +1446,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vnni,avx512vl")]
-    unsafe fn test_mm_maskz_dpbusds_epi32() {
+    fn test_mm_maskz_dpbusds_epi32() {
         let src = _mm_set1_epi32(1);
         let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
         let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
@@ -1458,7 +1458,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avxvnniint8")]
-    unsafe fn test_mm_dpbssd_epi32() {
+    fn test_mm_dpbssd_epi32() {
         let src = _mm_set1_epi32(1);
         let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
         let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
@@ -1468,7 +1468,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avxvnniint8")]
-    unsafe fn test_mm256_dpbssd_epi32() {
+    fn test_mm256_dpbssd_epi32() {
         let src = _mm256_set1_epi32(1);
         let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
         let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
@@ -1478,7 +1478,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avxvnniint8")]
-    unsafe fn test_mm_dpbssds_epi32() {
+    fn test_mm_dpbssds_epi32() {
         let src = _mm_set1_epi32(1);
         let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
         let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
@@ -1488,7 +1488,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avxvnniint8")]
-    unsafe fn test_mm256_dpbssds_epi32() {
+    fn test_mm256_dpbssds_epi32() {
         let src = _mm256_set1_epi32(1);
         let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
         let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
@@ -1498,7 +1498,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avxvnniint8")]
-    unsafe fn test_mm_dpbsud_epi32() {
+    fn test_mm_dpbsud_epi32() {
         let src = _mm_set1_epi32(1);
         let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
         let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
@@ -1508,7 +1508,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avxvnniint8")]
-    unsafe fn test_mm256_dpbsud_epi32() {
+    fn test_mm256_dpbsud_epi32() {
         let src = _mm256_set1_epi32(1);
         let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
         let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
@@ -1518,7 +1518,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avxvnniint8")]
-    unsafe fn test_mm_dpbsuds_epi32() {
+    fn test_mm_dpbsuds_epi32() {
         let src = _mm_set1_epi32(1);
         let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
         let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
@@ -1528,7 +1528,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avxvnniint8")]
-    unsafe fn test_mm256_dpbsuds_epi32() {
+    fn test_mm256_dpbsuds_epi32() {
         let src = _mm256_set1_epi32(1);
         let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
         let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
@@ -1538,7 +1538,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avxvnniint8")]
-    unsafe fn test_mm_dpbuud_epi32() {
+    fn test_mm_dpbuud_epi32() {
         let src = _mm_set1_epi32(1);
         let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
         let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
@@ -1548,7 +1548,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avxvnniint8")]
-    unsafe fn test_mm256_dpbuud_epi32() {
+    fn test_mm256_dpbuud_epi32() {
         let src = _mm256_set1_epi32(1);
         let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
         let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
@@ -1558,7 +1558,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avxvnniint8")]
-    unsafe fn test_mm_dpbuuds_epi32() {
+    fn test_mm_dpbuuds_epi32() {
         let src = _mm_set1_epi32(1);
         let a = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
         let b = _mm_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
@@ -1568,7 +1568,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avxvnniint8")]
-    unsafe fn test_mm256_dpbuuds_epi32() {
+    fn test_mm256_dpbuuds_epi32() {
         let src = _mm256_set1_epi32(1);
         let a = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
         let b = _mm256_set1_epi32(1 << 24 | 1 << 16 | 1 << 8 | 1 << 0);
@@ -1578,7 +1578,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avxvnniint16")]
-    unsafe fn test_mm_dpwsud_epi32() {
+    fn test_mm_dpwsud_epi32() {
         let src = _mm_set1_epi32(1);
         let a = _mm_set1_epi32(1 << 16 | 1 << 0);
         let b = _mm_set1_epi32(1 << 16 | 1 << 0);
@@ -1588,7 +1588,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avxvnniint16")]
-    unsafe fn test_mm256_dpwsud_epi32() {
+    fn test_mm256_dpwsud_epi32() {
         let src = _mm256_set1_epi32(1);
         let a = _mm256_set1_epi32(1 << 16 | 1 << 0);
         let b = _mm256_set1_epi32(1 << 16 | 1 << 0);
@@ -1598,7 +1598,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avxvnniint16")]
-    unsafe fn test_mm_dpwsuds_epi32() {
+    fn test_mm_dpwsuds_epi32() {
         let src = _mm_set1_epi32(1);
         let a = _mm_set1_epi32(1 << 16 | 1 << 0);
         let b = _mm_set1_epi32(1 << 16 | 1 << 0);
@@ -1608,7 +1608,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avxvnniint16")]
-    unsafe fn test_mm256_dpwsuds_epi32() {
+    fn test_mm256_dpwsuds_epi32() {
         let src = _mm256_set1_epi32(1);
         let a = _mm256_set1_epi32(1 << 16 | 1 << 0);
         let b = _mm256_set1_epi32(1 << 16 | 1 << 0);
@@ -1618,7 +1618,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avxvnniint16")]
-    unsafe fn test_mm_dpwusd_epi32() {
+    fn test_mm_dpwusd_epi32() {
         let src = _mm_set1_epi32(1);
         let a = _mm_set1_epi32(1 << 16 | 1 << 0);
         let b = _mm_set1_epi32(1 << 16 | 1 << 0);
@@ -1628,7 +1628,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avxvnniint16")]
-    unsafe fn test_mm256_dpwusd_epi32() {
+    fn test_mm256_dpwusd_epi32() {
         let src = _mm256_set1_epi32(1);
         let a = _mm256_set1_epi32(1 << 16 | 1 << 0);
         let b = _mm256_set1_epi32(1 << 16 | 1 << 0);
@@ -1638,7 +1638,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avxvnniint16")]
-    unsafe fn test_mm_dpwusds_epi32() {
+    fn test_mm_dpwusds_epi32() {
         let src = _mm_set1_epi32(1);
         let a = _mm_set1_epi32(1 << 16 | 1 << 0);
         let b = _mm_set1_epi32(1 << 16 | 1 << 0);
@@ -1648,7 +1648,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avxvnniint16")]
-    unsafe fn test_mm256_dpwusds_epi32() {
+    fn test_mm256_dpwusds_epi32() {
         let src = _mm256_set1_epi32(1);
         let a = _mm256_set1_epi32(1 << 16 | 1 << 0);
         let b = _mm256_set1_epi32(1 << 16 | 1 << 0);
@@ -1658,7 +1658,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avxvnniint16")]
-    unsafe fn test_mm_dpwuud_epi32() {
+    fn test_mm_dpwuud_epi32() {
         let src = _mm_set1_epi32(1);
         let a = _mm_set1_epi32(1 << 16 | 1 << 0);
         let b = _mm_set1_epi32(1 << 16 | 1 << 0);
@@ -1668,7 +1668,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avxvnniint16")]
-    unsafe fn test_mm256_dpwuud_epi32() {
+    fn test_mm256_dpwuud_epi32() {
         let src = _mm256_set1_epi32(1);
         let a = _mm256_set1_epi32(1 << 16 | 1 << 0);
         let b = _mm256_set1_epi32(1 << 16 | 1 << 0);
@@ -1678,7 +1678,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avxvnniint16")]
-    unsafe fn test_mm_dpwuuds_epi32() {
+    fn test_mm_dpwuuds_epi32() {
         let src = _mm_set1_epi32(1);
         let a = _mm_set1_epi32(1 << 16 | 1 << 0);
         let b = _mm_set1_epi32(1 << 16 | 1 << 0);
@@ -1688,7 +1688,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avxvnniint16")]
-    unsafe fn test_mm256_dpwuuds_epi32() {
+    fn test_mm256_dpwuuds_epi32() {
         let src = _mm256_set1_epi32(1);
         let a = _mm256_set1_epi32(1 << 16 | 1 << 0);
         let b = _mm256_set1_epi32(1 << 16 | 1 << 0);
diff --git a/crates/core_arch/src/x86/avx512vp2intersect.rs b/crates/core_arch/src/x86/avx512vp2intersect.rs
new file mode 100644
index 0000000000..4dd7412e9e
--- /dev/null
+++ b/crates/core_arch/src/x86/avx512vp2intersect.rs
@@ -0,0 +1,244 @@
+//! Vector Pair Intersection to a Pair of Mask Registers (VP2INTERSECT)
+
+use crate::core_arch::{simd::*, x86::*};
+
+#[cfg(test)]
+use stdarch_test::assert_instr;
+
+/// Compute intersection of packed 32-bit integer vectors a and b,
+/// and store indication of match in the corresponding bit of two mask registers
+/// specified by k1 and k2. A match in corresponding elements of a and b is
+/// indicated by a set bit in the corresponding bit of the mask registers.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_2intersect_epi32&expand=0)
+#[inline]
+#[target_feature(enable = "avx512vp2intersect,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512vp2intersect", issue = "111137")]
+#[cfg_attr(test, assert_instr(vp2intersectd))]
+pub unsafe fn _mm_2intersect_epi32(a: __m128i, b: __m128i, k1: *mut __mmask8, k2: *mut __mmask8) {
+    (*k1, *k2) = vp2intersectd_128(a.as_i32x4(), b.as_i32x4());
+}
+
+/// Compute intersection of packed 64-bit integer vectors a and b,
+/// and store indication of match in the corresponding bit of two mask registers
+/// specified by k1 and k2. A match in corresponding elements of a and b is
+/// indicated by a set bit in the corresponding bit of the mask registers.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_2intersect_epi64&expand=0)
+#[inline]
+#[target_feature(enable = "avx512vp2intersect,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512vp2intersect", issue = "111137")]
+#[cfg_attr(test, assert_instr(vp2intersectq))]
+pub unsafe fn _mm_2intersect_epi64(a: __m128i, b: __m128i, k1: *mut __mmask8, k2: *mut __mmask8) {
+    (*k1, *k2) = vp2intersectq_128(a.as_i64x2(), b.as_i64x2());
+}
+
+/// Compute intersection of packed 32-bit integer vectors a and b,
+/// and store indication of match in the corresponding bit of two mask registers
+/// specified by k1 and k2. A match in corresponding elements of a and b is
+/// indicated by a set bit in the corresponding bit of the mask registers.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_2intersect_epi32&expand=0)
+#[inline]
+#[target_feature(enable = "avx512vp2intersect,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512vp2intersect", issue = "111137")]
+#[cfg_attr(test, assert_instr(vp2intersectd))]
+pub unsafe fn _mm256_2intersect_epi32(
+    a: __m256i,
+    b: __m256i,
+    k1: *mut __mmask8,
+    k2: *mut __mmask8,
+) {
+    (*k1, *k2) = vp2intersectd_256(a.as_i32x8(), b.as_i32x8());
+}
+
+/// Compute intersection of packed 64-bit integer vectors a and b,
+/// and store indication of match in the corresponding bit of two mask registers
+/// specified by k1 and k2. A match in corresponding elements of a and b is
+/// indicated by a set bit in the corresponding bit of the mask registers.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_2intersect_epi64&expand=0)
+#[inline]
+#[target_feature(enable = "avx512vp2intersect,avx512vl")]
+#[unstable(feature = "stdarch_x86_avx512vp2intersect", issue = "111137")]
+#[cfg_attr(test, assert_instr(vp2intersectq))]
+pub unsafe fn _mm256_2intersect_epi64(
+    a: __m256i,
+    b: __m256i,
+    k1: *mut __mmask8,
+    k2: *mut __mmask8,
+) {
+    (*k1, *k2) = vp2intersectq_256(a.as_i64x4(), b.as_i64x4());
+}
+
+/// Compute intersection of packed 32-bit integer vectors a and b,
+/// and store indication of match in the corresponding bit of two mask registers
+/// specified by k1 and k2. A match in corresponding elements of a and b is
+/// indicated by a set bit in the corresponding bit of the mask registers.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_2intersect_epi32&expand=0)
+#[inline]
+#[target_feature(enable = "avx512vp2intersect,avx512f")]
+#[unstable(feature = "stdarch_x86_avx512vp2intersect", issue = "111137")]
+#[cfg_attr(test, assert_instr(vp2intersectd))]
+pub unsafe fn _mm512_2intersect_epi32(
+    a: __m512i,
+    b: __m512i,
+    k1: *mut __mmask16,
+    k2: *mut __mmask16,
+) {
+    (*k1, *k2) = vp2intersectd_512(a.as_i32x16(), b.as_i32x16());
+}
+
+/// Compute intersection of packed 64-bit integer vectors a and b,
+/// and store indication of match in the corresponding bit of two mask registers
+/// specified by k1 and k2. A match in corresponding elements of a and b is
+/// indicated by a set bit in the corresponding bit of the mask registers.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_2intersect_epi64&expand=0)
+#[inline]
+#[target_feature(enable = "avx512vp2intersect,avx512f")]
+#[unstable(feature = "stdarch_x86_avx512vp2intersect", issue = "111137")]
+#[cfg_attr(test, assert_instr(vp2intersectq))]
+pub unsafe fn _mm512_2intersect_epi64(
+    a: __m512i,
+    b: __m512i,
+    k1: *mut __mmask8,
+    k2: *mut __mmask8,
+) {
+    (*k1, *k2) = vp2intersectq_512(a.as_i64x8(), b.as_i64x8());
+}
+
+#[allow(improper_ctypes)]
+unsafe extern "C" {
+    #[link_name = "llvm.x86.avx512.vp2intersect.d.128"]
+    fn vp2intersectd_128(a: i32x4, b: i32x4) -> (u8, u8);
+    #[link_name = "llvm.x86.avx512.vp2intersect.q.128"]
+    fn vp2intersectq_128(a: i64x2, b: i64x2) -> (u8, u8);
+
+    #[link_name = "llvm.x86.avx512.vp2intersect.d.256"]
+    fn vp2intersectd_256(a: i32x8, b: i32x8) -> (u8, u8);
+    #[link_name = "llvm.x86.avx512.vp2intersect.q.256"]
+    fn vp2intersectq_256(a: i64x4, b: i64x4) -> (u8, u8);
+
+    #[link_name = "llvm.x86.avx512.vp2intersect.d.512"]
+    fn vp2intersectd_512(a: i32x16, b: i32x16) -> (u16, u16);
+    #[link_name = "llvm.x86.avx512.vp2intersect.q.512"]
+    fn vp2intersectq_512(a: i64x8, b: i64x8) -> (u8, u8);
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::core_arch::x86::*;
+    use stdarch_test::simd_test;
+
+    #[simd_test(enable = "avx512vp2intersect,avx512vl")]
+    unsafe fn test_mm_2intersect_epi32() {
+        let mut k1 = 0;
+        let mut k2 = 0;
+
+        let a = _mm_set_epi32(1, 2, 3, 4);
+        let b = _mm_set_epi32(3, 4, 5, 6);
+        _mm_2intersect_epi32(a, b, &mut k1, &mut k2);
+        assert_eq!(k1, 0b0011);
+        assert_eq!(k2, 0b1100);
+
+        let a = _mm_set_epi32(1, 2, 3, 4);
+        let b = _mm_set_epi32(2, 3, 4, 5);
+        _mm_2intersect_epi32(a, b, &mut k1, &mut k2);
+        assert_eq!(k1, 0b0111);
+        assert_eq!(k2, 0b1110);
+    }
+
+    #[simd_test(enable = "avx512vp2intersect,avx512vl")]
+    unsafe fn test_mm_2intersect_epi64() {
+        let mut k1 = 0;
+        let mut k2 = 0;
+
+        let a = _mm_set_epi64x(1, 2);
+        let b = _mm_set_epi64x(2, 3);
+        _mm_2intersect_epi64(a, b, &mut k1, &mut k2);
+        assert_eq!(k1, 0b01);
+        assert_eq!(k2, 0b10);
+
+        let a = _mm_set_epi64x(1, 2);
+        let b = _mm_set_epi64x(2, 2);
+        _mm_2intersect_epi64(a, b, &mut k1, &mut k2);
+        assert_eq!(k1, 0b01);
+        assert_eq!(k2, 0b11);
+    }
+
+    #[simd_test(enable = "avx512vp2intersect,avx512vl")]
+    unsafe fn test_mm256_2intersect_epi32() {
+        let mut k1 = 0;
+        let mut k2 = 0;
+
+        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
+        let b = _mm256_set_epi32(5, 6, 7, 8, 9, 10, 11, 12);
+        _mm256_2intersect_epi32(a, b, &mut k1, &mut k2);
+        assert_eq!(k1, 0b00001111);
+        assert_eq!(k2, 0b11110000);
+
+        let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
+        let b = _mm256_set_epi32(2, 3, 4, 5, 6, 7, 8, 9);
+        _mm256_2intersect_epi32(a, b, &mut k1, &mut k2);
+        assert_eq!(k1, 0b01111111);
+        assert_eq!(k2, 0b11111110);
+    }
+
+    #[simd_test(enable = "avx512vp2intersect,avx512vl")]
+    unsafe fn test_mm256_2intersect_epi64() {
+        let mut k1 = 0;
+        let mut k2 = 0;
+
+        let a = _mm256_set_epi64x(1, 2, 3, 4);
+        let b = _mm256_set_epi64x(3, 4, 5, 6);
+        _mm256_2intersect_epi64(a, b, &mut k1, &mut k2);
+        assert_eq!(k1, 0b0011);
+        assert_eq!(k2, 0b1100);
+
+        let a = _mm256_set_epi64x(1, 2, 3, 4);
+        let b = _mm256_set_epi64x(2, 3, 4, 5);
+        _mm256_2intersect_epi64(a, b, &mut k1, &mut k2);
+        assert_eq!(k1, 0b0111);
+        assert_eq!(k2, 0b1110);
+    }
+
+    #[simd_test(enable = "avx512vp2intersect,avx512f")]
+    unsafe fn test_mm512_2intersect_epi32() {
+        let mut k1 = 0;
+        let mut k2 = 0;
+
+        let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+        let b = _mm512_set_epi32(
+            9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
+        );
+        _mm512_2intersect_epi32(a, b, &mut k1, &mut k2);
+        assert_eq!(k1, 0b0000000011111111);
+        assert_eq!(k2, 0b1111111100000000);
+
+        let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+        let b = _mm512_set_epi32(2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17);
+        _mm512_2intersect_epi32(a, b, &mut k1, &mut k2);
+        assert_eq!(k1, 0b0111111111111111);
+        assert_eq!(k2, 0b1111111111111110);
+    }
+
+    #[simd_test(enable = "avx512vp2intersect,avx512f")]
+    unsafe fn test_mm512_2intersect_epi64() {
+        let mut k1 = 0;
+        let mut k2 = 0;
+
+        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
+        let b = _mm512_set_epi64(5, 6, 7, 8, 9, 10, 11, 12);
+        _mm512_2intersect_epi64(a, b, &mut k1, &mut k2);
+        assert_eq!(k1, 0b00001111);
+        assert_eq!(k2, 0b11110000);
+
+        let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
+        let b = _mm512_set_epi64(2, 3, 4, 5, 6, 7, 8, 9);
+        _mm512_2intersect_epi64(a, b, &mut k1, &mut k2);
+        assert_eq!(k1, 0b01111111);
+        assert_eq!(k2, 0b11111110);
+    }
+}
diff --git a/crates/core_arch/src/x86/avx512vpopcntdq.rs b/crates/core_arch/src/x86/avx512vpopcntdq.rs
index e47a14b24d..476640fab5 100644
--- a/crates/core_arch/src/x86/avx512vpopcntdq.rs
+++ b/crates/core_arch/src/x86/avx512vpopcntdq.rs
@@ -5,7 +5,7 @@
 //! The reference is [Intel 64 and IA-32 Architectures Software Developer's
 //! Manual Volume 2: Instruction Set Reference, A-Z][intel64_ref].
 //!
-//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
+//! [intel64_ref]: https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
 
 use crate::core_arch::simd::*;
 use crate::core_arch::x86::__m128i;
@@ -26,7 +26,8 @@ use stdarch_test::assert_instr;
 #[target_feature(enable = "avx512vpopcntdq")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpopcntd))]
-pub fn _mm512_popcnt_epi32(a: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_popcnt_epi32(a: __m512i) -> __m512i {
     unsafe { transmute(simd_ctpop(a.as_i32x16())) }
 }
 
@@ -40,7 +41,8 @@ pub fn _mm512_popcnt_epi32(a: __m512i) -> __m512i {
 #[target_feature(enable = "avx512vpopcntdq")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpopcntd))]
-pub fn _mm512_maskz_popcnt_epi32(k: __mmask16, a: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_popcnt_epi32(k: __mmask16, a: __m512i) -> __m512i {
     unsafe {
         transmute(simd_select_bitmask(
             k,
@@ -60,7 +62,8 @@ pub fn _mm512_maskz_popcnt_epi32(k: __mmask16, a: __m512i) -> __m512i {
 #[target_feature(enable = "avx512vpopcntdq")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpopcntd))]
-pub fn _mm512_mask_popcnt_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_popcnt_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
     unsafe {
         transmute(simd_select_bitmask(
             k,
@@ -77,7 +80,8 @@ pub fn _mm512_mask_popcnt_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m51
 #[target_feature(enable = "avx512vpopcntdq,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpopcntd))]
-pub fn _mm256_popcnt_epi32(a: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_popcnt_epi32(a: __m256i) -> __m256i {
     unsafe { transmute(simd_ctpop(a.as_i32x8())) }
 }
 
@@ -91,7 +95,8 @@ pub fn _mm256_popcnt_epi32(a: __m256i) -> __m256i {
 #[target_feature(enable = "avx512vpopcntdq,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpopcntd))]
-pub fn _mm256_maskz_popcnt_epi32(k: __mmask8, a: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_popcnt_epi32(k: __mmask8, a: __m256i) -> __m256i {
     unsafe {
         transmute(simd_select_bitmask(
             k,
@@ -111,7 +116,8 @@ pub fn _mm256_maskz_popcnt_epi32(k: __mmask8, a: __m256i) -> __m256i {
 #[target_feature(enable = "avx512vpopcntdq,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpopcntd))]
-pub fn _mm256_mask_popcnt_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_popcnt_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
     unsafe {
         transmute(simd_select_bitmask(
             k,
@@ -128,7 +134,8 @@ pub fn _mm256_mask_popcnt_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256
 #[target_feature(enable = "avx512vpopcntdq,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpopcntd))]
-pub fn _mm_popcnt_epi32(a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_popcnt_epi32(a: __m128i) -> __m128i {
     unsafe { transmute(simd_ctpop(a.as_i32x4())) }
 }
 
@@ -142,7 +149,8 @@ pub fn _mm_popcnt_epi32(a: __m128i) -> __m128i {
 #[target_feature(enable = "avx512vpopcntdq,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpopcntd))]
-pub fn _mm_maskz_popcnt_epi32(k: __mmask8, a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_popcnt_epi32(k: __mmask8, a: __m128i) -> __m128i {
     unsafe {
         transmute(simd_select_bitmask(
             k,
@@ -162,7 +170,8 @@ pub fn _mm_maskz_popcnt_epi32(k: __mmask8, a: __m128i) -> __m128i {
 #[target_feature(enable = "avx512vpopcntdq,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpopcntd))]
-pub fn _mm_mask_popcnt_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_popcnt_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
     unsafe {
         transmute(simd_select_bitmask(
             k,
@@ -179,7 +188,8 @@ pub fn _mm_mask_popcnt_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
 #[target_feature(enable = "avx512vpopcntdq")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpopcntq))]
-pub fn _mm512_popcnt_epi64(a: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_popcnt_epi64(a: __m512i) -> __m512i {
     unsafe { transmute(simd_ctpop(a.as_i64x8())) }
 }
 
@@ -193,7 +203,8 @@ pub fn _mm512_popcnt_epi64(a: __m512i) -> __m512i {
 #[target_feature(enable = "avx512vpopcntdq")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpopcntq))]
-pub fn _mm512_maskz_popcnt_epi64(k: __mmask8, a: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_maskz_popcnt_epi64(k: __mmask8, a: __m512i) -> __m512i {
     unsafe {
         transmute(simd_select_bitmask(
             k,
@@ -213,7 +224,8 @@ pub fn _mm512_maskz_popcnt_epi64(k: __mmask8, a: __m512i) -> __m512i {
 #[target_feature(enable = "avx512vpopcntdq")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpopcntq))]
-pub fn _mm512_mask_popcnt_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm512_mask_popcnt_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
     unsafe {
         transmute(simd_select_bitmask(
             k,
@@ -230,7 +242,8 @@ pub fn _mm512_mask_popcnt_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512
 #[target_feature(enable = "avx512vpopcntdq,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpopcntq))]
-pub fn _mm256_popcnt_epi64(a: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_popcnt_epi64(a: __m256i) -> __m256i {
     unsafe { transmute(simd_ctpop(a.as_i64x4())) }
 }
 
@@ -244,7 +257,8 @@ pub fn _mm256_popcnt_epi64(a: __m256i) -> __m256i {
 #[target_feature(enable = "avx512vpopcntdq,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpopcntq))]
-pub fn _mm256_maskz_popcnt_epi64(k: __mmask8, a: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_maskz_popcnt_epi64(k: __mmask8, a: __m256i) -> __m256i {
     unsafe {
         transmute(simd_select_bitmask(
             k,
@@ -264,7 +278,8 @@ pub fn _mm256_maskz_popcnt_epi64(k: __mmask8, a: __m256i) -> __m256i {
 #[target_feature(enable = "avx512vpopcntdq,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpopcntq))]
-pub fn _mm256_mask_popcnt_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_mask_popcnt_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
     unsafe {
         transmute(simd_select_bitmask(
             k,
@@ -281,7 +296,8 @@ pub fn _mm256_mask_popcnt_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256
 #[target_feature(enable = "avx512vpopcntdq,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpopcntq))]
-pub fn _mm_popcnt_epi64(a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_popcnt_epi64(a: __m128i) -> __m128i {
     unsafe { transmute(simd_ctpop(a.as_i64x2())) }
 }
 
@@ -295,7 +311,8 @@ pub fn _mm_popcnt_epi64(a: __m128i) -> __m128i {
 #[target_feature(enable = "avx512vpopcntdq,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpopcntq))]
-pub fn _mm_maskz_popcnt_epi64(k: __mmask8, a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_maskz_popcnt_epi64(k: __mmask8, a: __m128i) -> __m128i {
     unsafe {
         transmute(simd_select_bitmask(
             k,
@@ -315,7 +332,8 @@ pub fn _mm_maskz_popcnt_epi64(k: __mmask8, a: __m128i) -> __m128i {
 #[target_feature(enable = "avx512vpopcntdq,avx512vl")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vpopcntq))]
-pub fn _mm_mask_popcnt_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mask_popcnt_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
     unsafe {
         transmute(simd_select_bitmask(
             k,
@@ -327,12 +345,13 @@ pub fn _mm_mask_popcnt_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
 
 #[cfg(test)]
 mod tests {
+    use crate::core_arch::assert_eq_const as assert_eq;
     use stdarch_test::simd_test;
 
     use crate::core_arch::x86::*;
 
     #[simd_test(enable = "avx512vpopcntdq,avx512f")]
-    unsafe fn test_mm512_popcnt_epi32() {
+    const fn test_mm512_popcnt_epi32() {
         let test_data = _mm512_set_epi32(
             0,
             1,
@@ -358,7 +377,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vpopcntdq,avx512f")]
-    unsafe fn test_mm512_mask_popcnt_epi32() {
+    const fn test_mm512_mask_popcnt_epi32() {
         let test_data = _mm512_set_epi32(
             0,
             1,
@@ -401,7 +420,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vpopcntdq,avx512f")]
-    unsafe fn test_mm512_maskz_popcnt_epi32() {
+    const fn test_mm512_maskz_popcnt_epi32() {
         let test_data = _mm512_set_epi32(
             0,
             1,
@@ -427,7 +446,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vpopcntdq,avx512f,avx512vl")]
-    unsafe fn test_mm256_popcnt_epi32() {
+    const fn test_mm256_popcnt_epi32() {
         let test_data = _mm256_set_epi32(0, 1, -1, 2, 7, 0xFF_FE, 0x7F_FF_FF_FF, -100);
         let actual_result = _mm256_popcnt_epi32(test_data);
         let reference_result = _mm256_set_epi32(0, 1, 32, 1, 3, 15, 31, 28);
@@ -435,7 +454,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vpopcntdq,avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_popcnt_epi32() {
+    const fn test_mm256_mask_popcnt_epi32() {
         let test_data = _mm256_set_epi32(0, 1, -1, 2, 7, 0xFF_FE, 0x7F_FF_FF_FF, -100);
         let mask = 0xF0;
         let actual_result = _mm256_mask_popcnt_epi32(test_data, mask, test_data);
@@ -444,7 +463,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vpopcntdq,avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_popcnt_epi32() {
+    const fn test_mm256_maskz_popcnt_epi32() {
         let test_data = _mm256_set_epi32(0, 1, -1, 2, 7, 0xFF_FE, 0x7F_FF_FF_FF, -100);
         let mask = 0xF0;
         let actual_result = _mm256_maskz_popcnt_epi32(mask, test_data);
@@ -453,7 +472,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vpopcntdq,avx512f,avx512vl")]
-    unsafe fn test_mm_popcnt_epi32() {
+    const fn test_mm_popcnt_epi32() {
         let test_data = _mm_set_epi32(0, 1, -1, -100);
         let actual_result = _mm_popcnt_epi32(test_data);
         let reference_result = _mm_set_epi32(0, 1, 32, 28);
@@ -461,7 +480,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vpopcntdq,avx512f,avx512vl")]
-    unsafe fn test_mm_mask_popcnt_epi32() {
+    const fn test_mm_mask_popcnt_epi32() {
         let test_data = _mm_set_epi32(0, 1, -1, -100);
         let mask = 0xE;
         let actual_result = _mm_mask_popcnt_epi32(test_data, mask, test_data);
@@ -470,7 +489,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vpopcntdq,avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_popcnt_epi32() {
+    const fn test_mm_maskz_popcnt_epi32() {
         let test_data = _mm_set_epi32(0, 1, -1, -100);
         let mask = 0xE;
         let actual_result = _mm_maskz_popcnt_epi32(mask, test_data);
@@ -479,7 +498,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vpopcntdq,avx512f")]
-    unsafe fn test_mm512_popcnt_epi64() {
+    const fn test_mm512_popcnt_epi64() {
         let test_data = _mm512_set_epi64(0, 1, -1, 2, 7, 0xFF_FE, 0x7F_FF_FF_FF_FF_FF_FF_FF, -100);
         let actual_result = _mm512_popcnt_epi64(test_data);
         let reference_result = _mm512_set_epi64(0, 1, 64, 1, 3, 15, 63, 60);
@@ -487,7 +506,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vpopcntdq,avx512f")]
-    unsafe fn test_mm512_mask_popcnt_epi64() {
+    const fn test_mm512_mask_popcnt_epi64() {
         let test_data = _mm512_set_epi64(0, 1, -1, 2, 7, 0xFF_FE, 0x7F_FF_FF_FF_FF_FF_FF_FF, -100);
         let mask = 0xF0;
         let actual_result = _mm512_mask_popcnt_epi64(test_data, mask, test_data);
@@ -497,7 +516,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vpopcntdq,avx512f")]
-    unsafe fn test_mm512_maskz_popcnt_epi64() {
+    const fn test_mm512_maskz_popcnt_epi64() {
         let test_data = _mm512_set_epi64(0, 1, -1, 2, 7, 0xFF_FE, 0x7F_FF_FF_FF_FF_FF_FF_FF, -100);
         let mask = 0xF0;
         let actual_result = _mm512_maskz_popcnt_epi64(mask, test_data);
@@ -506,7 +525,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vpopcntdq,avx512vl")]
-    unsafe fn test_mm256_popcnt_epi64() {
+    const fn test_mm256_popcnt_epi64() {
         let test_data = _mm256_set_epi64x(0, 1, -1, -100);
         let actual_result = _mm256_popcnt_epi64(test_data);
         let reference_result = _mm256_set_epi64x(0, 1, 64, 60);
@@ -514,7 +533,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vpopcntdq,avx512vl")]
-    unsafe fn test_mm256_mask_popcnt_epi64() {
+    const fn test_mm256_mask_popcnt_epi64() {
         let test_data = _mm256_set_epi64x(0, 1, -1, -100);
         let mask = 0xE;
         let actual_result = _mm256_mask_popcnt_epi64(test_data, mask, test_data);
@@ -523,7 +542,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vpopcntdq,avx512vl")]
-    unsafe fn test_mm256_maskz_popcnt_epi64() {
+    const fn test_mm256_maskz_popcnt_epi64() {
         let test_data = _mm256_set_epi64x(0, 1, -1, -100);
         let mask = 0xE;
         let actual_result = _mm256_maskz_popcnt_epi64(mask, test_data);
@@ -532,7 +551,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vpopcntdq,avx512vl")]
-    unsafe fn test_mm_popcnt_epi64() {
+    const fn test_mm_popcnt_epi64() {
         let test_data = _mm_set_epi64x(0, 1);
         let actual_result = _mm_popcnt_epi64(test_data);
         let reference_result = _mm_set_epi64x(0, 1);
@@ -544,7 +563,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vpopcntdq,avx512vl")]
-    unsafe fn test_mm_mask_popcnt_epi64() {
+    const fn test_mm_mask_popcnt_epi64() {
         let test_data = _mm_set_epi64x(0, -100);
         let mask = 0x2;
         let actual_result = _mm_mask_popcnt_epi64(test_data, mask, test_data);
@@ -558,7 +577,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512vpopcntdq,avx512vl")]
-    unsafe fn test_mm_maskz_popcnt_epi64() {
+    const fn test_mm_maskz_popcnt_epi64() {
         let test_data = _mm_set_epi64x(0, 1);
         let mask = 0x2;
         let actual_result = _mm_maskz_popcnt_epi64(mask, test_data);
diff --git a/crates/core_arch/src/x86/avxneconvert.rs b/crates/core_arch/src/x86/avxneconvert.rs
index b92ec823ec..861213eb42 100644
--- a/crates/core_arch/src/x86/avxneconvert.rs
+++ b/crates/core_arch/src/x86/avxneconvert.rs
@@ -1,4 +1,3 @@
-use crate::arch::asm;
 use crate::core_arch::x86::*;
 
 #[cfg(test)]
@@ -87,7 +86,7 @@ pub unsafe fn _mm256_cvtneebf16_ps(a: *const __m256bh) -> __m256 {
 #[inline]
 #[target_feature(enable = "avxneconvert")]
 #[cfg_attr(test, assert_instr(vcvtneeph2ps))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub unsafe fn _mm_cvtneeph_ps(a: *const __m128h) -> __m128 {
     transmute(cvtneeph2ps_128(a))
 }
@@ -99,7 +98,7 @@ pub unsafe fn _mm_cvtneeph_ps(a: *const __m128h) -> __m128 {
 #[inline]
 #[target_feature(enable = "avxneconvert")]
 #[cfg_attr(test, assert_instr(vcvtneeph2ps))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub unsafe fn _mm256_cvtneeph_ps(a: *const __m256h) -> __m256 {
     transmute(cvtneeph2ps_256(a))
 }
@@ -135,7 +134,7 @@ pub unsafe fn _mm256_cvtneobf16_ps(a: *const __m256bh) -> __m256 {
 #[inline]
 #[target_feature(enable = "avxneconvert")]
 #[cfg_attr(test, assert_instr(vcvtneoph2ps))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub unsafe fn _mm_cvtneoph_ps(a: *const __m128h) -> __m128 {
     transmute(cvtneoph2ps_128(a))
 }
@@ -147,7 +146,7 @@ pub unsafe fn _mm_cvtneoph_ps(a: *const __m128h) -> __m128 {
 #[inline]
 #[target_feature(enable = "avxneconvert")]
 #[cfg_attr(test, assert_instr(vcvtneoph2ps))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub unsafe fn _mm256_cvtneoph_ps(a: *const __m256h) -> __m256 {
     transmute(cvtneoph2ps_256(a))
 }
@@ -161,16 +160,7 @@ pub unsafe fn _mm256_cvtneoph_ps(a: *const __m256h) -> __m256 {
 #[cfg_attr(test, assert_instr(vcvtneps2bf16))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm_cvtneps_avx_pbh(a: __m128) -> __m128bh {
-    unsafe {
-        let mut dst: __m128bh;
-        asm!(
-            "{{vex}}vcvtneps2bf16 {dst},{src}",
-            dst = lateout(xmm_reg) dst,
-            src = in(xmm_reg) a,
-            options(pure, nomem, nostack, preserves_flags)
-        );
-        dst
-    }
+    unsafe { vcvtneps2bf16_128(a) }
 }
 
 /// Convert packed single precision (32-bit) floating-point elements in a to packed BF16 (16-bit) floating-point
@@ -182,16 +172,7 @@ pub fn _mm_cvtneps_avx_pbh(a: __m128) -> __m128bh {
 #[cfg_attr(test, assert_instr(vcvtneps2bf16))]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 pub fn _mm256_cvtneps_avx_pbh(a: __m256) -> __m128bh {
-    unsafe {
-        let mut dst: __m128bh;
-        asm!(
-            "{{vex}}vcvtneps2bf16 {dst},{src}",
-            dst = lateout(xmm_reg) dst,
-            src = in(ymm_reg) a,
-            options(pure, nomem, nostack, preserves_flags)
-        );
-        dst
-    }
+    unsafe { vcvtneps2bf16_256(a) }
 }
 
 #[allow(improper_ctypes)]
@@ -222,6 +203,11 @@ unsafe extern "C" {
     fn cvtneoph2ps_128(a: *const __m128h) -> __m128;
     #[link_name = "llvm.x86.vcvtneoph2ps256"]
     fn cvtneoph2ps_256(a: *const __m256h) -> __m256;
+
+    #[link_name = "llvm.x86.vcvtneps2bf16128"]
+    fn vcvtneps2bf16_128(a: __m128) -> __m128bh;
+    #[link_name = "llvm.x86.vcvtneps2bf16256"]
+    fn vcvtneps2bf16_256(a: __m256) -> __m128bh;
 }
 
 #[cfg(test)]
@@ -242,127 +228,127 @@ mod tests {
     const BF16_EIGHT: u16 = 0b0_10000010_0000000;
 
     #[simd_test(enable = "avxneconvert")]
-    unsafe fn test_mm_bcstnebf16_ps() {
+    fn test_mm_bcstnebf16_ps() {
         let a = bf16::from_bits(BF16_ONE);
-        let r = _mm_bcstnebf16_ps(addr_of!(a));
+        let r = unsafe { _mm_bcstnebf16_ps(addr_of!(a)) };
         let e = _mm_set_ps(1., 1., 1., 1.);
         assert_eq_m128(r, e);
     }
 
     #[simd_test(enable = "avxneconvert")]
-    unsafe fn test_mm256_bcstnebf16_ps() {
+    fn test_mm256_bcstnebf16_ps() {
         let a = bf16::from_bits(BF16_ONE);
-        let r = _mm256_bcstnebf16_ps(addr_of!(a));
+        let r = unsafe { _mm256_bcstnebf16_ps(addr_of!(a)) };
         let e = _mm256_set_ps(1., 1., 1., 1., 1., 1., 1., 1.);
         assert_eq_m256(r, e);
     }
 
     #[simd_test(enable = "avxneconvert")]
-    unsafe fn test_mm_bcstnesh_ps() {
+    fn test_mm_bcstnesh_ps() {
         let a = 1.0_f16;
-        let r = _mm_bcstnesh_ps(addr_of!(a));
+        let r = unsafe { _mm_bcstnesh_ps(addr_of!(a)) };
         let e = _mm_set_ps(1., 1., 1., 1.);
         assert_eq_m128(r, e);
     }
 
     #[simd_test(enable = "avxneconvert")]
-    unsafe fn test_mm256_bcstnesh_ps() {
+    fn test_mm256_bcstnesh_ps() {
         let a = 1.0_f16;
-        let r = _mm256_bcstnesh_ps(addr_of!(a));
+        let r = unsafe { _mm256_bcstnesh_ps(addr_of!(a)) };
         let e = _mm256_set_ps(1., 1., 1., 1., 1., 1., 1., 1.);
         assert_eq_m256(r, e);
     }
 
     #[simd_test(enable = "avxneconvert")]
-    unsafe fn test_mm_cvtneebf16_ps() {
+    fn test_mm_cvtneebf16_ps() {
         let a = __m128bh([
             BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT,
         ]);
-        let r = _mm_cvtneebf16_ps(addr_of!(a));
+        let r = unsafe { _mm_cvtneebf16_ps(addr_of!(a)) };
         let e = _mm_setr_ps(1., 3., 5., 7.);
         assert_eq_m128(r, e);
     }
 
     #[simd_test(enable = "avxneconvert")]
-    unsafe fn test_mm256_cvtneebf16_ps() {
+    fn test_mm256_cvtneebf16_ps() {
         let a = __m256bh([
             BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT,
             BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT,
         ]);
-        let r = _mm256_cvtneebf16_ps(addr_of!(a));
+        let r = unsafe { _mm256_cvtneebf16_ps(addr_of!(a)) };
         let e = _mm256_setr_ps(1., 3., 5., 7., 1., 3., 5., 7.);
         assert_eq_m256(r, e);
     }
 
     #[simd_test(enable = "avxneconvert")]
-    unsafe fn test_mm_cvtneeph_ps() {
+    fn test_mm_cvtneeph_ps() {
         let a = __m128h([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]);
-        let r = _mm_cvtneeph_ps(addr_of!(a));
+        let r = unsafe { _mm_cvtneeph_ps(addr_of!(a)) };
         let e = _mm_setr_ps(1., 3., 5., 7.);
         assert_eq_m128(r, e);
     }
 
     #[simd_test(enable = "avxneconvert")]
-    unsafe fn test_mm256_cvtneeph_ps() {
+    fn test_mm256_cvtneeph_ps() {
         let a = __m256h([
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
         ]);
-        let r = _mm256_cvtneeph_ps(addr_of!(a));
+        let r = unsafe { _mm256_cvtneeph_ps(addr_of!(a)) };
         let e = _mm256_setr_ps(1., 3., 5., 7., 9., 11., 13., 15.);
         assert_eq_m256(r, e);
     }
 
     #[simd_test(enable = "avxneconvert")]
-    unsafe fn test_mm_cvtneobf16_ps() {
+    fn test_mm_cvtneobf16_ps() {
         let a = __m128bh([
             BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT,
         ]);
-        let r = _mm_cvtneobf16_ps(addr_of!(a));
+        let r = unsafe { _mm_cvtneobf16_ps(addr_of!(a)) };
         let e = _mm_setr_ps(2., 4., 6., 8.);
         assert_eq_m128(r, e);
     }
 
     #[simd_test(enable = "avxneconvert")]
-    unsafe fn test_mm256_cvtneobf16_ps() {
+    fn test_mm256_cvtneobf16_ps() {
         let a = __m256bh([
             BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT,
             BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT,
         ]);
-        let r = _mm256_cvtneobf16_ps(addr_of!(a));
+        let r = unsafe { _mm256_cvtneobf16_ps(addr_of!(a)) };
         let e = _mm256_setr_ps(2., 4., 6., 8., 2., 4., 6., 8.);
         assert_eq_m256(r, e);
     }
 
     #[simd_test(enable = "avxneconvert")]
-    unsafe fn test_mm_cvtneoph_ps() {
+    fn test_mm_cvtneoph_ps() {
         let a = __m128h([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]);
-        let r = _mm_cvtneoph_ps(addr_of!(a));
+        let r = unsafe { _mm_cvtneoph_ps(addr_of!(a)) };
         let e = _mm_setr_ps(2., 4., 6., 8.);
         assert_eq_m128(r, e);
     }
 
     #[simd_test(enable = "avxneconvert")]
-    unsafe fn test_mm256_cvtneoph_ps() {
+    fn test_mm256_cvtneoph_ps() {
         let a = __m256h([
             1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
         ]);
-        let r = _mm256_cvtneoph_ps(addr_of!(a));
+        let r = unsafe { _mm256_cvtneoph_ps(addr_of!(a)) };
         let e = _mm256_setr_ps(2., 4., 6., 8., 10., 12., 14., 16.);
         assert_eq_m256(r, e);
     }
 
     #[simd_test(enable = "avxneconvert")]
-    unsafe fn test_mm_cvtneps_avx_pbh() {
+    fn test_mm_cvtneps_avx_pbh() {
         let a = _mm_setr_ps(1., 2., 3., 4.);
-        let r: u16x4 = transmute_copy(&_mm_cvtneps_avx_pbh(a));
+        let r: u16x4 = unsafe { transmute_copy(&_mm_cvtneps_avx_pbh(a)) };
         let e = u16x4::new(BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR);
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "avxneconvert")]
-    unsafe fn test_mm256_cvtneps_avx_pbh() {
+    fn test_mm256_cvtneps_avx_pbh() {
         let a = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
-        let r: u16x8 = transmute(_mm256_cvtneps_avx_pbh(a));
+        let r: u16x8 = _mm256_cvtneps_avx_pbh(a).as_u16x8();
         let e = u16x8::new(
             BF16_ONE, BF16_TWO, BF16_THREE, BF16_FOUR, BF16_FIVE, BF16_SIX, BF16_SEVEN, BF16_EIGHT,
         );
diff --git a/crates/core_arch/src/x86/bmi1.rs b/crates/core_arch/src/x86/bmi1.rs
index eb7242944a..432051abd1 100644
--- a/crates/core_arch/src/x86/bmi1.rs
+++ b/crates/core_arch/src/x86/bmi1.rs
@@ -6,7 +6,7 @@
 //! [Wikipedia][wikipedia_bmi] provides a quick overview of the instructions
 //! available.
 //!
-//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
+//! [intel64_ref]: https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
 //! [wikipedia_bmi]: https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#ABM_.28Advanced_Bit_Manipulation.29
 
 #[cfg(test)]
@@ -46,7 +46,8 @@ pub fn _bextr2_u32(a: u32, control: u32) -> u32 {
 #[target_feature(enable = "bmi1")]
 #[cfg_attr(test, assert_instr(andn))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _andn_u32(a: u32, b: u32) -> u32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _andn_u32(a: u32, b: u32) -> u32 {
     !a & b
 }
 
@@ -57,7 +58,8 @@ pub fn _andn_u32(a: u32, b: u32) -> u32 {
 #[target_feature(enable = "bmi1")]
 #[cfg_attr(test, assert_instr(blsi))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _blsi_u32(x: u32) -> u32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _blsi_u32(x: u32) -> u32 {
     x & x.wrapping_neg()
 }
 
@@ -68,7 +70,8 @@ pub fn _blsi_u32(x: u32) -> u32 {
 #[target_feature(enable = "bmi1")]
 #[cfg_attr(test, assert_instr(blsmsk))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _blsmsk_u32(x: u32) -> u32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _blsmsk_u32(x: u32) -> u32 {
     x ^ (x.wrapping_sub(1_u32))
 }
 
@@ -81,7 +84,8 @@ pub fn _blsmsk_u32(x: u32) -> u32 {
 #[target_feature(enable = "bmi1")]
 #[cfg_attr(test, assert_instr(blsr))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _blsr_u32(x: u32) -> u32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _blsr_u32(x: u32) -> u32 {
     x & (x.wrapping_sub(1))
 }
 
@@ -94,7 +98,8 @@ pub fn _blsr_u32(x: u32) -> u32 {
 #[target_feature(enable = "bmi1")]
 #[cfg_attr(test, assert_instr(tzcnt))]
 #[stable(feature = "simd_x86_updates", since = "1.82.0")]
-pub fn _tzcnt_u16(x: u16) -> u16 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _tzcnt_u16(x: u16) -> u16 {
     x.trailing_zeros() as u16
 }
 
@@ -107,7 +112,8 @@ pub fn _tzcnt_u16(x: u16) -> u16 {
 #[target_feature(enable = "bmi1")]
 #[cfg_attr(test, assert_instr(tzcnt))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _tzcnt_u32(x: u32) -> u32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _tzcnt_u32(x: u32) -> u32 {
     x.trailing_zeros()
 }
 
@@ -120,7 +126,8 @@ pub fn _tzcnt_u32(x: u32) -> u32 {
 #[target_feature(enable = "bmi1")]
 #[cfg_attr(test, assert_instr(tzcnt))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_tzcnt_32(x: u32) -> i32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_tzcnt_32(x: u32) -> i32 {
     x.trailing_zeros() as i32
 }
 
@@ -131,18 +138,19 @@ unsafe extern "C" {
 
 #[cfg(test)]
 mod tests {
+    use crate::core_arch::assert_eq_const as assert_eq;
     use stdarch_test::simd_test;
 
     use crate::core_arch::x86::*;
 
     #[simd_test(enable = "bmi1")]
-    unsafe fn test_bextr_u32() {
+    fn test_bextr_u32() {
         let r = _bextr_u32(0b0101_0000u32, 4, 4);
         assert_eq!(r, 0b0000_0101u32);
     }
 
     #[simd_test(enable = "bmi1")]
-    unsafe fn test_andn_u32() {
+    const fn test_andn_u32() {
         assert_eq!(_andn_u32(0, 0), 0);
         assert_eq!(_andn_u32(0, 1), 1);
         assert_eq!(_andn_u32(1, 0), 0);
@@ -165,32 +173,32 @@ mod tests {
     }
 
     #[simd_test(enable = "bmi1")]
-    unsafe fn test_blsi_u32() {
+    const fn test_blsi_u32() {
         assert_eq!(_blsi_u32(0b1101_0000u32), 0b0001_0000u32);
     }
 
     #[simd_test(enable = "bmi1")]
-    unsafe fn test_blsmsk_u32() {
+    const fn test_blsmsk_u32() {
         let r = _blsmsk_u32(0b0011_0000u32);
         assert_eq!(r, 0b0001_1111u32);
     }
 
     #[simd_test(enable = "bmi1")]
-    unsafe fn test_blsr_u32() {
+    const fn test_blsr_u32() {
         // TODO: test the behavior when the input is `0`.
         let r = _blsr_u32(0b0011_0000u32);
         assert_eq!(r, 0b0010_0000u32);
     }
 
     #[simd_test(enable = "bmi1")]
-    unsafe fn test_tzcnt_u16() {
+    const fn test_tzcnt_u16() {
         assert_eq!(_tzcnt_u16(0b0000_0001u16), 0u16);
         assert_eq!(_tzcnt_u16(0b0000_0000u16), 16u16);
         assert_eq!(_tzcnt_u16(0b1001_0000u16), 4u16);
     }
 
     #[simd_test(enable = "bmi1")]
-    unsafe fn test_tzcnt_u32() {
+    const fn test_tzcnt_u32() {
         assert_eq!(_tzcnt_u32(0b0000_0001u32), 0u32);
         assert_eq!(_tzcnt_u32(0b0000_0000u32), 32u32);
         assert_eq!(_tzcnt_u32(0b1001_0000u32), 4u32);
diff --git a/crates/core_arch/src/x86/bmi2.rs b/crates/core_arch/src/x86/bmi2.rs
index 83cf650923..5320640d96 100644
--- a/crates/core_arch/src/x86/bmi2.rs
+++ b/crates/core_arch/src/x86/bmi2.rs
@@ -6,7 +6,7 @@
 //! [Wikipedia][wikipedia_bmi] provides a quick overview of the instructions
 //! available.
 //!
-//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
+//! [intel64_ref]: https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
 //! [wikipedia_bmi]:
 //! https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#ABM_.28Advanced_Bit_Manipulation.29
 
@@ -25,7 +25,8 @@ use stdarch_test::assert_instr;
 #[cfg_attr(all(test, target_arch = "x86"), assert_instr(mul))]
 #[target_feature(enable = "bmi2")]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mulx_u32(a: u32, b: u32, hi: &mut u32) -> u32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mulx_u32(a: u32, b: u32, hi: &mut u32) -> u32 {
     let result: u64 = (a as u64) * (b as u64);
     *hi = (result >> 32) as u32;
     result as u32
@@ -77,12 +78,13 @@ unsafe extern "C" {
 
 #[cfg(test)]
 mod tests {
+    use crate::core_arch::assert_eq_const as assert_eq;
     use stdarch_test::simd_test;
 
     use crate::core_arch::x86::*;
 
     #[simd_test(enable = "bmi2")]
-    unsafe fn test_pext_u32() {
+    fn test_pext_u32() {
         let n = 0b1011_1110_1001_0011u32;
 
         let m0 = 0b0110_0011_1000_0101u32;
@@ -96,7 +98,7 @@ mod tests {
     }
 
     #[simd_test(enable = "bmi2")]
-    unsafe fn test_pdep_u32() {
+    fn test_pdep_u32() {
         let n = 0b1011_1110_1001_0011u32;
 
         let m0 = 0b0110_0011_1000_0101u32;
@@ -110,14 +112,14 @@ mod tests {
     }
 
     #[simd_test(enable = "bmi2")]
-    unsafe fn test_bzhi_u32() {
+    fn test_bzhi_u32() {
         let n = 0b1111_0010u32;
         let s = 0b0001_0010u32;
         assert_eq!(_bzhi_u32(n, 5), s);
     }
 
     #[simd_test(enable = "bmi2")]
-    unsafe fn test_mulx_u32() {
+    const fn test_mulx_u32() {
         let a: u32 = 4_294_967_200;
         let b: u32 = 2;
         let mut hi = 0;
diff --git a/crates/core_arch/src/x86/bswap.rs b/crates/core_arch/src/x86/bswap.rs
index ea07a7d622..f8e177f7c2 100644
--- a/crates/core_arch/src/x86/bswap.rs
+++ b/crates/core_arch/src/x86/bswap.rs
@@ -10,16 +10,20 @@ use stdarch_test::assert_instr;
 #[inline]
 #[cfg_attr(test, assert_instr(bswap))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _bswap(x: i32) -> i32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _bswap(x: i32) -> i32 {
     x.swap_bytes()
 }
 
 #[cfg(test)]
 mod tests {
+    use crate::core_arch::assert_eq_const as assert_eq;
+    use stdarch_test::simd_test;
+
     use super::*;
 
-    #[test]
-    fn test_bswap() {
+    #[simd_test]
+    const fn test_bswap() {
         assert_eq!(_bswap(0x0EADBE0F), 0x0FBEAD0E);
         assert_eq!(_bswap(0x00000000), 0x00000000);
     }
diff --git a/crates/core_arch/src/x86/cpuid.rs b/crates/core_arch/src/x86/cpuid.rs
index 0634f10a99..3f13ea8369 100644
--- a/crates/core_arch/src/x86/cpuid.rs
+++ b/crates/core_arch/src/x86/cpuid.rs
@@ -28,12 +28,21 @@ pub struct CpuidResult {
 /// Returns the result of the `cpuid` instruction for a given `leaf` (`EAX`)
 /// and `sub_leaf` (`ECX`).
 ///
-/// The highest-supported leaf value is returned by the first tuple argument of
-/// [`__get_cpuid_max(0)`](fn.__get_cpuid_max.html). For leaves containing
-/// sub-leaves, the second tuple argument returns the highest-supported
-/// sub-leaf value.
+/// There are two types of information leaves - basic leaves (with `leaf < 0x8000000`)
+/// and extended leaves (with `leaf >= 0x80000000`). The highest supported basic and
+/// extended leaves can be obtained by calling CPUID with `0` and `0x80000000`,
+/// respectively, and reading the value in the `EAX` register. If the leaf supports
+/// more than one sub-leaf, then the procedure of obtaining the highest supported
+/// sub-leaf, as well as the behavior if a invalid sub-leaf value is passed, depends
+/// on the specific leaf.
 ///
-/// The [CPUID Wikipedia page][wiki_cpuid] contains how to query which
+/// If the `leaf` value is higher than the maximum supported basic or extended leaf
+/// for the processor, this returns the information for the highest supported basic
+/// information leaf (with the passed `sub_leaf` value). If the `leaf` value is less
+/// than or equal to the highest basic or extended leaf value, but the leaf is not
+/// supported on the processor, all zeros are returned.
+///
+/// The [CPUID Wikipedia page][wiki_cpuid] contains information on how to query which
 /// information using the `EAX` and `ECX` registers, and the interpretation of
 /// the results returned in `EAX`, `EBX`, `ECX`, and `EDX`.
 ///
@@ -45,11 +54,15 @@ pub struct CpuidResult {
 ///
 /// [wiki_cpuid]: https://en.wikipedia.org/wiki/CPUID
 /// [intel64_ref]: https://cdrdv2-public.intel.com/671110/325383-sdm-vol-2abcd.pdf
-/// [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf
+/// [amd64_ref]: https://docs.amd.com/v/u/en-US/24594_3.37
 #[inline]
 #[cfg_attr(test, assert_instr(cpuid))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn __cpuid_count(leaf: u32, sub_leaf: u32) -> CpuidResult {
+pub fn __cpuid_count(leaf: u32, sub_leaf: u32) -> CpuidResult {
+    if cfg!(target_env = "sgx") {
+        panic!("`__cpuid` cannot be used in SGX");
+    }
+
     let eax;
     let ebx;
     let ecx;
@@ -58,7 +71,7 @@ pub unsafe fn __cpuid_count(leaf: u32, sub_leaf: u32) -> CpuidResult {
     // LLVM sometimes reserves `ebx` for its internal use, we so we need to use
     // a scratch register for it instead.
     #[cfg(target_arch = "x86")]
-    {
+    unsafe {
         asm!(
             "mov {0}, ebx",
             "cpuid",
@@ -71,7 +84,7 @@ pub unsafe fn __cpuid_count(leaf: u32, sub_leaf: u32) -> CpuidResult {
         );
     }
     #[cfg(target_arch = "x86_64")]
-    {
+    unsafe {
         asm!(
             "mov {0:r}, rbx",
             "cpuid",
@@ -86,27 +99,26 @@ pub unsafe fn __cpuid_count(leaf: u32, sub_leaf: u32) -> CpuidResult {
     CpuidResult { eax, ebx, ecx, edx }
 }
 
+/// Calls CPUID with the provided `leaf` value, with `sub_leaf` set to 0.
 /// See [`__cpuid_count`](fn.__cpuid_count.html).
 #[inline]
 #[cfg_attr(test, assert_instr(cpuid))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn __cpuid(leaf: u32) -> CpuidResult {
+pub fn __cpuid(leaf: u32) -> CpuidResult {
     __cpuid_count(leaf, 0)
 }
 
-/// Returns the highest-supported `leaf` (`EAX`) and sub-leaf (`ECX`) `cpuid`
-/// values.
+/// Returns the EAX and EBX register after calling CPUID with the provided `leaf`,
+/// with `sub_leaf` set to 0.
 ///
-/// If `cpuid` is supported, and `leaf` is zero, then the first tuple argument
-/// contains the highest `leaf` value that `cpuid` supports. For `leaf`s
-/// containing sub-leafs, the second tuple argument contains the
-/// highest-supported sub-leaf value.
+/// If `leaf` if 0 or `0x80000000`, the first tuple argument contains the maximum
+/// supported basic or extended leaf, respectively.
 ///
 /// See also [`__cpuid`](fn.__cpuid.html) and
 /// [`__cpuid_count`](fn.__cpuid_count.html).
 #[inline]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn __get_cpuid_max(leaf: u32) -> (u32, u32) {
+pub fn __get_cpuid_max(leaf: u32) -> (u32, u32) {
     let CpuidResult { eax, ebx, .. } = __cpuid(leaf);
     (eax, ebx)
 }
diff --git a/crates/core_arch/src/x86/f16c.rs b/crates/core_arch/src/x86/f16c.rs
index 519cc38294..a0bb992bb9 100644
--- a/crates/core_arch/src/x86/f16c.rs
+++ b/crates/core_arch/src/x86/f16c.rs
@@ -25,7 +25,8 @@ unsafe extern "unadjusted" {
 #[target_feature(enable = "f16c")]
 #[cfg_attr(test, assert_instr("vcvtph2ps"))]
 #[stable(feature = "x86_f16c_intrinsics", since = "1.68.0")]
-pub fn _mm_cvtph_ps(a: __m128i) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cvtph_ps(a: __m128i) -> __m128 {
     unsafe {
         let a: f16x8 = transmute(a);
         let a: f16x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
@@ -41,7 +42,8 @@ pub fn _mm_cvtph_ps(a: __m128i) -> __m128 {
 #[target_feature(enable = "f16c")]
 #[cfg_attr(test, assert_instr("vcvtph2ps"))]
 #[stable(feature = "x86_f16c_intrinsics", since = "1.68.0")]
-pub fn _mm256_cvtph_ps(a: __m128i) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_cvtph_ps(a: __m128i) -> __m256 {
     unsafe {
         let a: f16x8 = transmute(a);
         simd_cast(a)
@@ -103,7 +105,8 @@ pub fn _mm256_cvtps_ph<const IMM_ROUNDING: i32>(a: __m256) -> __m128i {
 
 #[cfg(test)]
 mod tests {
-    use crate::{core_arch::x86::*, mem::transmute};
+    use crate::core_arch::assert_eq_const as assert_eq;
+    use crate::core_arch::x86::*;
     use stdarch_test::simd_test;
 
     const F16_ONE: i16 = 0x3c00;
@@ -116,7 +119,7 @@ mod tests {
     const F16_EIGHT: i16 = 0x4800;
 
     #[simd_test(enable = "f16c")]
-    unsafe fn test_mm_cvtph_ps() {
+    const fn test_mm_cvtph_ps() {
         let a = _mm_set_epi16(0, 0, 0, 0, F16_ONE, F16_TWO, F16_THREE, F16_FOUR);
         let r = _mm_cvtph_ps(a);
         let e = _mm_set_ps(1.0, 2.0, 3.0, 4.0);
@@ -124,7 +127,7 @@ mod tests {
     }
 
     #[simd_test(enable = "f16c")]
-    unsafe fn test_mm256_cvtph_ps() {
+    const fn test_mm256_cvtph_ps() {
         let a = _mm_set_epi16(
             F16_ONE, F16_TWO, F16_THREE, F16_FOUR, F16_FIVE, F16_SIX, F16_SEVEN, F16_EIGHT,
         );
@@ -134,7 +137,7 @@ mod tests {
     }
 
     #[simd_test(enable = "f16c")]
-    unsafe fn test_mm_cvtps_ph() {
+    fn test_mm_cvtps_ph() {
         let a = _mm_set_ps(1.0, 2.0, 3.0, 4.0);
         let r = _mm_cvtps_ph::<_MM_FROUND_CUR_DIRECTION>(a);
         let e = _mm_set_epi16(0, 0, 0, 0, F16_ONE, F16_TWO, F16_THREE, F16_FOUR);
@@ -142,7 +145,7 @@ mod tests {
     }
 
     #[simd_test(enable = "f16c")]
-    unsafe fn test_mm256_cvtps_ph() {
+    fn test_mm256_cvtps_ph() {
         let a = _mm256_set_ps(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let r = _mm256_cvtps_ph::<_MM_FROUND_CUR_DIRECTION>(a);
         let e = _mm_set_epi16(
diff --git a/crates/core_arch/src/x86/fma.rs b/crates/core_arch/src/x86/fma.rs
index d3988422b9..b95bb331df 100644
--- a/crates/core_arch/src/x86/fma.rs
+++ b/crates/core_arch/src/x86/fma.rs
@@ -14,8 +14,8 @@
 //! Wikipedia's [FMA][wiki_fma] page provides a quick overview of the
 //! instructions available.
 //!
-//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
-//! [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf
+//! [intel64_ref]: https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
+//! [amd64_ref]: https://docs.amd.com/v/u/en-US/24594_3.37
 //! [wiki_fma]: https://en.wikipedia.org/wiki/Fused_multiply-accumulate
 
 use crate::core_arch::x86::*;
@@ -33,7 +33,8 @@ use stdarch_test::assert_instr;
 #[target_feature(enable = "fma")]
 #[cfg_attr(test, assert_instr(vfmadd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_fmadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_fmadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
     unsafe { simd_fma(a, b, c) }
 }
 
@@ -45,7 +46,8 @@ pub fn _mm_fmadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
 #[target_feature(enable = "fma")]
 #[cfg_attr(test, assert_instr(vfmadd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_fmadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_fmadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
     unsafe { simd_fma(a, b, c) }
 }
 
@@ -57,7 +59,8 @@ pub fn _mm256_fmadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
 #[target_feature(enable = "fma")]
 #[cfg_attr(test, assert_instr(vfmadd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_fmadd_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_fmadd_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
     unsafe { simd_fma(a, b, c) }
 }
 
@@ -69,7 +72,8 @@ pub fn _mm_fmadd_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
 #[target_feature(enable = "fma")]
 #[cfg_attr(test, assert_instr(vfmadd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_fmadd_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_fmadd_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
     unsafe { simd_fma(a, b, c) }
 }
 
@@ -83,7 +87,8 @@ pub fn _mm256_fmadd_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
 #[target_feature(enable = "fma")]
 #[cfg_attr(test, assert_instr(vfmadd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_fmadd_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_fmadd_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
     unsafe {
         simd_insert!(
             a,
@@ -103,7 +108,8 @@ pub fn _mm_fmadd_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
 #[target_feature(enable = "fma")]
 #[cfg_attr(test, assert_instr(vfmadd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_fmadd_ss(a: __m128, b: __m128, c: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_fmadd_ss(a: __m128, b: __m128, c: __m128) -> __m128 {
     unsafe {
         simd_insert!(
             a,
@@ -122,7 +128,8 @@ pub fn _mm_fmadd_ss(a: __m128, b: __m128, c: __m128) -> __m128 {
 #[target_feature(enable = "fma")]
 #[cfg_attr(test, assert_instr(vfmaddsub))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_fmaddsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_fmaddsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
     unsafe {
         let add = simd_fma(a, b, c);
         let sub = simd_fma(a, b, simd_neg(c));
@@ -139,7 +146,8 @@ pub fn _mm_fmaddsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
 #[target_feature(enable = "fma")]
 #[cfg_attr(test, assert_instr(vfmaddsub))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_fmaddsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_fmaddsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
     unsafe {
         let add = simd_fma(a, b, c);
         let sub = simd_fma(a, b, simd_neg(c));
@@ -156,7 +164,8 @@ pub fn _mm256_fmaddsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
 #[target_feature(enable = "fma")]
 #[cfg_attr(test, assert_instr(vfmaddsub))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_fmaddsub_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_fmaddsub_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
     unsafe {
         let add = simd_fma(a, b, c);
         let sub = simd_fma(a, b, simd_neg(c));
@@ -173,7 +182,8 @@ pub fn _mm_fmaddsub_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
 #[target_feature(enable = "fma")]
 #[cfg_attr(test, assert_instr(vfmaddsub))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_fmaddsub_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_fmaddsub_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
     unsafe {
         let add = simd_fma(a, b, c);
         let sub = simd_fma(a, b, simd_neg(c));
@@ -189,7 +199,8 @@ pub fn _mm256_fmaddsub_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
 #[target_feature(enable = "fma")]
 #[cfg_attr(test, assert_instr(vfmsub))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_fmsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_fmsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
     unsafe { simd_fma(a, b, simd_neg(c)) }
 }
 
@@ -201,7 +212,8 @@ pub fn _mm_fmsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
 #[target_feature(enable = "fma")]
 #[cfg_attr(test, assert_instr(vfmsub))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_fmsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_fmsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
     unsafe { simd_fma(a, b, simd_neg(c)) }
 }
 
@@ -213,7 +225,8 @@ pub fn _mm256_fmsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
 #[target_feature(enable = "fma")]
 #[cfg_attr(test, assert_instr(vfmsub213ps))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_fmsub_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_fmsub_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
     unsafe { simd_fma(a, b, simd_neg(c)) }
 }
 
@@ -225,7 +238,8 @@ pub fn _mm_fmsub_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
 #[target_feature(enable = "fma")]
 #[cfg_attr(test, assert_instr(vfmsub213ps))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_fmsub_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_fmsub_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
     unsafe { simd_fma(a, b, simd_neg(c)) }
 }
 
@@ -239,7 +253,8 @@ pub fn _mm256_fmsub_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
 #[target_feature(enable = "fma")]
 #[cfg_attr(test, assert_instr(vfmsub))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_fmsub_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_fmsub_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
     unsafe {
         simd_insert!(
             a,
@@ -259,7 +274,8 @@ pub fn _mm_fmsub_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
 #[target_feature(enable = "fma")]
 #[cfg_attr(test, assert_instr(vfmsub))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_fmsub_ss(a: __m128, b: __m128, c: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_fmsub_ss(a: __m128, b: __m128, c: __m128) -> __m128 {
     unsafe {
         simd_insert!(
             a,
@@ -278,7 +294,8 @@ pub fn _mm_fmsub_ss(a: __m128, b: __m128, c: __m128) -> __m128 {
 #[target_feature(enable = "fma")]
 #[cfg_attr(test, assert_instr(vfmsubadd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_fmsubadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_fmsubadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
     unsafe {
         let add = simd_fma(a, b, c);
         let sub = simd_fma(a, b, simd_neg(c));
@@ -295,7 +312,8 @@ pub fn _mm_fmsubadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
 #[target_feature(enable = "fma")]
 #[cfg_attr(test, assert_instr(vfmsubadd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_fmsubadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_fmsubadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
     unsafe {
         let add = simd_fma(a, b, c);
         let sub = simd_fma(a, b, simd_neg(c));
@@ -312,7 +330,8 @@ pub fn _mm256_fmsubadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
 #[target_feature(enable = "fma")]
 #[cfg_attr(test, assert_instr(vfmsubadd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_fmsubadd_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_fmsubadd_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
     unsafe {
         let add = simd_fma(a, b, c);
         let sub = simd_fma(a, b, simd_neg(c));
@@ -329,7 +348,8 @@ pub fn _mm_fmsubadd_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
 #[target_feature(enable = "fma")]
 #[cfg_attr(test, assert_instr(vfmsubadd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_fmsubadd_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_fmsubadd_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
     unsafe {
         let add = simd_fma(a, b, c);
         let sub = simd_fma(a, b, simd_neg(c));
@@ -345,7 +365,8 @@ pub fn _mm256_fmsubadd_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
 #[target_feature(enable = "fma")]
 #[cfg_attr(test, assert_instr(vfnmadd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_fnmadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_fnmadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
     unsafe { simd_fma(simd_neg(a), b, c) }
 }
 
@@ -357,7 +378,8 @@ pub fn _mm_fnmadd_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
 #[target_feature(enable = "fma")]
 #[cfg_attr(test, assert_instr(vfnmadd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_fnmadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_fnmadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
     unsafe { simd_fma(simd_neg(a), b, c) }
 }
 
@@ -369,7 +391,8 @@ pub fn _mm256_fnmadd_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
 #[target_feature(enable = "fma")]
 #[cfg_attr(test, assert_instr(vfnmadd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_fnmadd_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_fnmadd_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
     unsafe { simd_fma(simd_neg(a), b, c) }
 }
 
@@ -381,7 +404,8 @@ pub fn _mm_fnmadd_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
 #[target_feature(enable = "fma")]
 #[cfg_attr(test, assert_instr(vfnmadd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_fnmadd_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_fnmadd_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
     unsafe { simd_fma(simd_neg(a), b, c) }
 }
 
@@ -395,7 +419,8 @@ pub fn _mm256_fnmadd_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
 #[target_feature(enable = "fma")]
 #[cfg_attr(test, assert_instr(vfnmadd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_fnmadd_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_fnmadd_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
     unsafe {
         simd_insert!(
             a,
@@ -415,7 +440,8 @@ pub fn _mm_fnmadd_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
 #[target_feature(enable = "fma")]
 #[cfg_attr(test, assert_instr(vfnmadd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_fnmadd_ss(a: __m128, b: __m128, c: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_fnmadd_ss(a: __m128, b: __m128, c: __m128) -> __m128 {
     unsafe {
         simd_insert!(
             a,
@@ -434,7 +460,8 @@ pub fn _mm_fnmadd_ss(a: __m128, b: __m128, c: __m128) -> __m128 {
 #[target_feature(enable = "fma")]
 #[cfg_attr(test, assert_instr(vfnmsub))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_fnmsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_fnmsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
     unsafe { simd_fma(simd_neg(a), b, simd_neg(c)) }
 }
 
@@ -447,7 +474,8 @@ pub fn _mm_fnmsub_pd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
 #[target_feature(enable = "fma")]
 #[cfg_attr(test, assert_instr(vfnmsub))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_fnmsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_fnmsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
     unsafe { simd_fma(simd_neg(a), b, simd_neg(c)) }
 }
 
@@ -460,7 +488,8 @@ pub fn _mm256_fnmsub_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
 #[target_feature(enable = "fma")]
 #[cfg_attr(test, assert_instr(vfnmsub))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_fnmsub_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_fnmsub_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
     unsafe { simd_fma(simd_neg(a), b, simd_neg(c)) }
 }
 
@@ -473,7 +502,8 @@ pub fn _mm_fnmsub_ps(a: __m128, b: __m128, c: __m128) -> __m128 {
 #[target_feature(enable = "fma")]
 #[cfg_attr(test, assert_instr(vfnmsub))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_fnmsub_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_fnmsub_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
     unsafe { simd_fma(simd_neg(a), b, simd_neg(c)) }
 }
 
@@ -488,7 +518,8 @@ pub fn _mm256_fnmsub_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
 #[target_feature(enable = "fma")]
 #[cfg_attr(test, assert_instr(vfnmsub))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_fnmsub_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_fnmsub_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
     unsafe {
         simd_insert!(
             a,
@@ -509,7 +540,8 @@ pub fn _mm_fnmsub_sd(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
 #[target_feature(enable = "fma")]
 #[cfg_attr(test, assert_instr(vfnmsub))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_fnmsub_ss(a: __m128, b: __m128, c: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_fnmsub_ss(a: __m128, b: __m128, c: __m128) -> __m128 {
     unsafe {
         simd_insert!(
             a,
@@ -521,13 +553,14 @@ pub fn _mm_fnmsub_ss(a: __m128, b: __m128, c: __m128) -> __m128 {
 
 #[cfg(test)]
 mod tests {
+    use crate::core_arch::assert_eq_const as assert_eq;
 
     use stdarch_test::simd_test;
 
     use crate::core_arch::x86::*;
 
     #[simd_test(enable = "fma")]
-    unsafe fn test_mm_fmadd_pd() {
+    const fn test_mm_fmadd_pd() {
         let a = _mm_setr_pd(1., 2.);
         let b = _mm_setr_pd(5., 3.);
         let c = _mm_setr_pd(4., 9.);
@@ -536,7 +569,7 @@ mod tests {
     }
 
     #[simd_test(enable = "fma")]
-    unsafe fn test_mm256_fmadd_pd() {
+    const fn test_mm256_fmadd_pd() {
         let a = _mm256_setr_pd(1., 2., 3., 4.);
         let b = _mm256_setr_pd(5., 3., 7., 2.);
         let c = _mm256_setr_pd(4., 9., 1., 7.);
@@ -545,7 +578,7 @@ mod tests {
     }
 
     #[simd_test(enable = "fma")]
-    unsafe fn test_mm_fmadd_ps() {
+    const fn test_mm_fmadd_ps() {
         let a = _mm_setr_ps(1., 2., 3., 4.);
         let b = _mm_setr_ps(5., 3., 7., 2.);
         let c = _mm_setr_ps(4., 9., 1., 7.);
@@ -554,7 +587,7 @@ mod tests {
     }
 
     #[simd_test(enable = "fma")]
-    unsafe fn test_mm256_fmadd_ps() {
+    const fn test_mm256_fmadd_ps() {
         let a = _mm256_setr_ps(1., 2., 3., 4., 0., 10., -1., -2.);
         let b = _mm256_setr_ps(5., 3., 7., 2., 4., -6., 0., 14.);
         let c = _mm256_setr_ps(4., 9., 1., 7., -5., 11., -2., -3.);
@@ -563,7 +596,7 @@ mod tests {
     }
 
     #[simd_test(enable = "fma")]
-    unsafe fn test_mm_fmadd_sd() {
+    const fn test_mm_fmadd_sd() {
         let a = _mm_setr_pd(1., 2.);
         let b = _mm_setr_pd(5., 3.);
         let c = _mm_setr_pd(4., 9.);
@@ -572,7 +605,7 @@ mod tests {
     }
 
     #[simd_test(enable = "fma")]
-    unsafe fn test_mm_fmadd_ss() {
+    const fn test_mm_fmadd_ss() {
         let a = _mm_setr_ps(1., 2., 3., 4.);
         let b = _mm_setr_ps(5., 3., 7., 2.);
         let c = _mm_setr_ps(4., 9., 1., 7.);
@@ -581,7 +614,7 @@ mod tests {
     }
 
     #[simd_test(enable = "fma")]
-    unsafe fn test_mm_fmaddsub_pd() {
+    const fn test_mm_fmaddsub_pd() {
         let a = _mm_setr_pd(1., 2.);
         let b = _mm_setr_pd(5., 3.);
         let c = _mm_setr_pd(4., 9.);
@@ -590,7 +623,7 @@ mod tests {
     }
 
     #[simd_test(enable = "fma")]
-    unsafe fn test_mm256_fmaddsub_pd() {
+    const fn test_mm256_fmaddsub_pd() {
         let a = _mm256_setr_pd(1., 2., 3., 4.);
         let b = _mm256_setr_pd(5., 3., 7., 2.);
         let c = _mm256_setr_pd(4., 9., 1., 7.);
@@ -599,7 +632,7 @@ mod tests {
     }
 
     #[simd_test(enable = "fma")]
-    unsafe fn test_mm_fmaddsub_ps() {
+    const fn test_mm_fmaddsub_ps() {
         let a = _mm_setr_ps(1., 2., 3., 4.);
         let b = _mm_setr_ps(5., 3., 7., 2.);
         let c = _mm_setr_ps(4., 9., 1., 7.);
@@ -608,7 +641,7 @@ mod tests {
     }
 
     #[simd_test(enable = "fma")]
-    unsafe fn test_mm256_fmaddsub_ps() {
+    const fn test_mm256_fmaddsub_ps() {
         let a = _mm256_setr_ps(1., 2., 3., 4., 0., 10., -1., -2.);
         let b = _mm256_setr_ps(5., 3., 7., 2., 4., -6., 0., 14.);
         let c = _mm256_setr_ps(4., 9., 1., 7., -5., 11., -2., -3.);
@@ -617,7 +650,7 @@ mod tests {
     }
 
     #[simd_test(enable = "fma")]
-    unsafe fn test_mm_fmsub_pd() {
+    const fn test_mm_fmsub_pd() {
         let a = _mm_setr_pd(1., 2.);
         let b = _mm_setr_pd(5., 3.);
         let c = _mm_setr_pd(4., 9.);
@@ -626,7 +659,7 @@ mod tests {
     }
 
     #[simd_test(enable = "fma")]
-    unsafe fn test_mm256_fmsub_pd() {
+    const fn test_mm256_fmsub_pd() {
         let a = _mm256_setr_pd(1., 2., 3., 4.);
         let b = _mm256_setr_pd(5., 3., 7., 2.);
         let c = _mm256_setr_pd(4., 9., 1., 7.);
@@ -635,7 +668,7 @@ mod tests {
     }
 
     #[simd_test(enable = "fma")]
-    unsafe fn test_mm_fmsub_ps() {
+    const fn test_mm_fmsub_ps() {
         let a = _mm_setr_ps(1., 2., 3., 4.);
         let b = _mm_setr_ps(5., 3., 7., 2.);
         let c = _mm_setr_ps(4., 9., 1., 7.);
@@ -644,7 +677,7 @@ mod tests {
     }
 
     #[simd_test(enable = "fma")]
-    unsafe fn test_mm256_fmsub_ps() {
+    const fn test_mm256_fmsub_ps() {
         let a = _mm256_setr_ps(1., 2., 3., 4., 0., 10., -1., -2.);
         let b = _mm256_setr_ps(5., 3., 7., 2., 4., -6., 0., 14.);
         let c = _mm256_setr_ps(4., 9., 1., 7., -5., 11., -2., -3.);
@@ -653,7 +686,7 @@ mod tests {
     }
 
     #[simd_test(enable = "fma")]
-    unsafe fn test_mm_fmsub_sd() {
+    const fn test_mm_fmsub_sd() {
         let a = _mm_setr_pd(1., 2.);
         let b = _mm_setr_pd(5., 3.);
         let c = _mm_setr_pd(4., 9.);
@@ -662,7 +695,7 @@ mod tests {
     }
 
     #[simd_test(enable = "fma")]
-    unsafe fn test_mm_fmsub_ss() {
+    const fn test_mm_fmsub_ss() {
         let a = _mm_setr_ps(1., 2., 3., 4.);
         let b = _mm_setr_ps(5., 3., 7., 2.);
         let c = _mm_setr_ps(4., 9., 1., 7.);
@@ -671,7 +704,7 @@ mod tests {
     }
 
     #[simd_test(enable = "fma")]
-    unsafe fn test_mm_fmsubadd_pd() {
+    const fn test_mm_fmsubadd_pd() {
         let a = _mm_setr_pd(1., 2.);
         let b = _mm_setr_pd(5., 3.);
         let c = _mm_setr_pd(4., 9.);
@@ -680,7 +713,7 @@ mod tests {
     }
 
     #[simd_test(enable = "fma")]
-    unsafe fn test_mm256_fmsubadd_pd() {
+    const fn test_mm256_fmsubadd_pd() {
         let a = _mm256_setr_pd(1., 2., 3., 4.);
         let b = _mm256_setr_pd(5., 3., 7., 2.);
         let c = _mm256_setr_pd(4., 9., 1., 7.);
@@ -689,7 +722,7 @@ mod tests {
     }
 
     #[simd_test(enable = "fma")]
-    unsafe fn test_mm_fmsubadd_ps() {
+    const fn test_mm_fmsubadd_ps() {
         let a = _mm_setr_ps(1., 2., 3., 4.);
         let b = _mm_setr_ps(5., 3., 7., 2.);
         let c = _mm_setr_ps(4., 9., 1., 7.);
@@ -698,7 +731,7 @@ mod tests {
     }
 
     #[simd_test(enable = "fma")]
-    unsafe fn test_mm256_fmsubadd_ps() {
+    const fn test_mm256_fmsubadd_ps() {
         let a = _mm256_setr_ps(1., 2., 3., 4., 0., 10., -1., -2.);
         let b = _mm256_setr_ps(5., 3., 7., 2., 4., -6., 0., 14.);
         let c = _mm256_setr_ps(4., 9., 1., 7., -5., 11., -2., -3.);
@@ -707,7 +740,7 @@ mod tests {
     }
 
     #[simd_test(enable = "fma")]
-    unsafe fn test_mm_fnmadd_pd() {
+    const fn test_mm_fnmadd_pd() {
         let a = _mm_setr_pd(1., 2.);
         let b = _mm_setr_pd(5., 3.);
         let c = _mm_setr_pd(4., 9.);
@@ -716,7 +749,7 @@ mod tests {
     }
 
     #[simd_test(enable = "fma")]
-    unsafe fn test_mm256_fnmadd_pd() {
+    const fn test_mm256_fnmadd_pd() {
         let a = _mm256_setr_pd(1., 2., 3., 4.);
         let b = _mm256_setr_pd(5., 3., 7., 2.);
         let c = _mm256_setr_pd(4., 9., 1., 7.);
@@ -725,7 +758,7 @@ mod tests {
     }
 
     #[simd_test(enable = "fma")]
-    unsafe fn test_mm_fnmadd_ps() {
+    const fn test_mm_fnmadd_ps() {
         let a = _mm_setr_ps(1., 2., 3., 4.);
         let b = _mm_setr_ps(5., 3., 7., 2.);
         let c = _mm_setr_ps(4., 9., 1., 7.);
@@ -734,7 +767,7 @@ mod tests {
     }
 
     #[simd_test(enable = "fma")]
-    unsafe fn test_mm256_fnmadd_ps() {
+    const fn test_mm256_fnmadd_ps() {
         let a = _mm256_setr_ps(1., 2., 3., 4., 0., 10., -1., -2.);
         let b = _mm256_setr_ps(5., 3., 7., 2., 4., -6., 0., 14.);
         let c = _mm256_setr_ps(4., 9., 1., 7., -5., 11., -2., -3.);
@@ -743,7 +776,7 @@ mod tests {
     }
 
     #[simd_test(enable = "fma")]
-    unsafe fn test_mm_fnmadd_sd() {
+    const fn test_mm_fnmadd_sd() {
         let a = _mm_setr_pd(1., 2.);
         let b = _mm_setr_pd(5., 3.);
         let c = _mm_setr_pd(4., 9.);
@@ -752,7 +785,7 @@ mod tests {
     }
 
     #[simd_test(enable = "fma")]
-    unsafe fn test_mm_fnmadd_ss() {
+    const fn test_mm_fnmadd_ss() {
         let a = _mm_setr_ps(1., 2., 3., 4.);
         let b = _mm_setr_ps(5., 3., 7., 2.);
         let c = _mm_setr_ps(4., 9., 1., 7.);
@@ -761,7 +794,7 @@ mod tests {
     }
 
     #[simd_test(enable = "fma")]
-    unsafe fn test_mm_fnmsub_pd() {
+    const fn test_mm_fnmsub_pd() {
         let a = _mm_setr_pd(1., 2.);
         let b = _mm_setr_pd(5., 3.);
         let c = _mm_setr_pd(4., 9.);
@@ -770,7 +803,7 @@ mod tests {
     }
 
     #[simd_test(enable = "fma")]
-    unsafe fn test_mm256_fnmsub_pd() {
+    const fn test_mm256_fnmsub_pd() {
         let a = _mm256_setr_pd(1., 2., 3., 4.);
         let b = _mm256_setr_pd(5., 3., 7., 2.);
         let c = _mm256_setr_pd(4., 9., 1., 7.);
@@ -779,7 +812,7 @@ mod tests {
     }
 
     #[simd_test(enable = "fma")]
-    unsafe fn test_mm_fnmsub_ps() {
+    const fn test_mm_fnmsub_ps() {
         let a = _mm_setr_ps(1., 2., 3., 4.);
         let b = _mm_setr_ps(5., 3., 7., 2.);
         let c = _mm_setr_ps(4., 9., 1., 7.);
@@ -788,7 +821,7 @@ mod tests {
     }
 
     #[simd_test(enable = "fma")]
-    unsafe fn test_mm256_fnmsub_ps() {
+    const fn test_mm256_fnmsub_ps() {
         let a = _mm256_setr_ps(1., 2., 3., 4., 0., 10., -1., -2.);
         let b = _mm256_setr_ps(5., 3., 7., 2., 4., -6., 0., 14.);
         let c = _mm256_setr_ps(4., 9., 1., 7., -5., 11., -2., -3.);
@@ -797,7 +830,7 @@ mod tests {
     }
 
     #[simd_test(enable = "fma")]
-    unsafe fn test_mm_fnmsub_sd() {
+    const fn test_mm_fnmsub_sd() {
         let a = _mm_setr_pd(1., 2.);
         let b = _mm_setr_pd(5., 3.);
         let c = _mm_setr_pd(4., 9.);
@@ -806,7 +839,7 @@ mod tests {
     }
 
     #[simd_test(enable = "fma")]
-    unsafe fn test_mm_fnmsub_ss() {
+    const fn test_mm_fnmsub_ss() {
         let a = _mm_setr_ps(1., 2., 3., 4.);
         let b = _mm_setr_ps(5., 3., 7., 2.);
         let c = _mm_setr_ps(4., 9., 1., 7.);
diff --git a/crates/core_arch/src/x86/fxsr.rs b/crates/core_arch/src/x86/fxsr.rs
index 71fd52ca14..08619efe7c 100644
--- a/crates/core_arch/src/x86/fxsr.rs
+++ b/crates/core_arch/src/x86/fxsr.rs
@@ -77,12 +77,14 @@ mod tests {
 
     #[simd_test(enable = "fxsr")]
     #[cfg_attr(miri, ignore)] // Register saving/restoring is not supported in Miri
-    unsafe fn test_fxsave() {
+    fn test_fxsave() {
         let mut a = FxsaveArea::new();
         let mut b = FxsaveArea::new();
 
-        fxsr::_fxsave(a.ptr());
-        fxsr::_fxrstor(a.ptr());
-        fxsr::_fxsave(b.ptr());
+        unsafe {
+            fxsr::_fxsave(a.ptr());
+            fxsr::_fxrstor(a.ptr());
+            fxsr::_fxsave(b.ptr());
+        }
     }
 }
diff --git a/crates/core_arch/src/x86/gfni.rs b/crates/core_arch/src/x86/gfni.rs
index 8c4d097fee..e9ee27a7b8 100644
--- a/crates/core_arch/src/x86/gfni.rs
+++ b/crates/core_arch/src/x86/gfni.rs
@@ -5,7 +5,7 @@
 //! The reference is [Intel 64 and IA-32 Architectures Software Developer's
 //! Manual Volume 2: Instruction Set Reference, A-Z][intel64_ref].
 //!
-//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
+//! [intel64_ref]: https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
 
 use crate::core_arch::simd::i8x16;
 use crate::core_arch::simd::i8x32;
@@ -898,25 +898,25 @@ mod tests {
     }
 
     #[simd_test(enable = "gfni,avx512f")]
-    unsafe fn test_mm512_gf2p8mul_epi8() {
+    fn test_mm512_gf2p8mul_epi8() {
         let (left, right, expected) = generate_byte_mul_test_data();
 
         for i in 0..NUM_TEST_WORDS_512 {
-            let left = load_m512i_word(&left, i);
-            let right = load_m512i_word(&right, i);
-            let expected = load_m512i_word(&expected, i);
+            let left = unsafe { load_m512i_word(&left, i) };
+            let right = unsafe { load_m512i_word(&right, i) };
+            let expected = unsafe { load_m512i_word(&expected, i) };
             let result = _mm512_gf2p8mul_epi8(left, right);
             assert_eq_m512i(result, expected);
         }
     }
 
     #[simd_test(enable = "gfni,avx512bw")]
-    unsafe fn test_mm512_maskz_gf2p8mul_epi8() {
+    fn test_mm512_maskz_gf2p8mul_epi8() {
         let (left, right, _expected) = generate_byte_mul_test_data();
 
         for i in 0..NUM_TEST_WORDS_512 {
-            let left = load_m512i_word(&left, i);
-            let right = load_m512i_word(&right, i);
+            let left = unsafe { load_m512i_word(&left, i) };
+            let right = unsafe { load_m512i_word(&right, i) };
             let result_zero = _mm512_maskz_gf2p8mul_epi8(0, left, right);
             assert_eq_m512i(result_zero, _mm512_setzero_si512());
             let mask_bytes: __mmask64 = 0x0F_0F_0F_0F_FF_FF_00_00;
@@ -930,12 +930,12 @@ mod tests {
     }
 
     #[simd_test(enable = "gfni,avx512bw")]
-    unsafe fn test_mm512_mask_gf2p8mul_epi8() {
+    fn test_mm512_mask_gf2p8mul_epi8() {
         let (left, right, _expected) = generate_byte_mul_test_data();
 
         for i in 0..NUM_TEST_WORDS_512 {
-            let left = load_m512i_word(&left, i);
-            let right = load_m512i_word(&right, i);
+            let left = unsafe { load_m512i_word(&left, i) };
+            let right = unsafe { load_m512i_word(&right, i) };
             let result_left = _mm512_mask_gf2p8mul_epi8(left, 0, left, right);
             assert_eq_m512i(result_left, left);
             let mask_bytes: __mmask64 = 0x0F_0F_0F_0F_FF_FF_00_00;
@@ -948,25 +948,25 @@ mod tests {
     }
 
     #[simd_test(enable = "gfni,avx")]
-    unsafe fn test_mm256_gf2p8mul_epi8() {
+    fn test_mm256_gf2p8mul_epi8() {
         let (left, right, expected) = generate_byte_mul_test_data();
 
         for i in 0..NUM_TEST_WORDS_256 {
-            let left = load_m256i_word(&left, i);
-            let right = load_m256i_word(&right, i);
-            let expected = load_m256i_word(&expected, i);
+            let left = unsafe { load_m256i_word(&left, i) };
+            let right = unsafe { load_m256i_word(&right, i) };
+            let expected = unsafe { load_m256i_word(&expected, i) };
             let result = _mm256_gf2p8mul_epi8(left, right);
             assert_eq_m256i(result, expected);
         }
     }
 
     #[simd_test(enable = "gfni,avx512bw,avx512vl")]
-    unsafe fn test_mm256_maskz_gf2p8mul_epi8() {
+    fn test_mm256_maskz_gf2p8mul_epi8() {
         let (left, right, _expected) = generate_byte_mul_test_data();
 
         for i in 0..NUM_TEST_WORDS_256 {
-            let left = load_m256i_word(&left, i);
-            let right = load_m256i_word(&right, i);
+            let left = unsafe { load_m256i_word(&left, i) };
+            let right = unsafe { load_m256i_word(&right, i) };
             let result_zero = _mm256_maskz_gf2p8mul_epi8(0, left, right);
             assert_eq_m256i(result_zero, _mm256_setzero_si256());
             let mask_bytes: __mmask32 = 0x0F_F0_FF_00;
@@ -980,12 +980,12 @@ mod tests {
     }
 
     #[simd_test(enable = "gfni,avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_gf2p8mul_epi8() {
+    fn test_mm256_mask_gf2p8mul_epi8() {
         let (left, right, _expected) = generate_byte_mul_test_data();
 
         for i in 0..NUM_TEST_WORDS_256 {
-            let left = load_m256i_word(&left, i);
-            let right = load_m256i_word(&right, i);
+            let left = unsafe { load_m256i_word(&left, i) };
+            let right = unsafe { load_m256i_word(&right, i) };
             let result_left = _mm256_mask_gf2p8mul_epi8(left, 0, left, right);
             assert_eq_m256i(result_left, left);
             let mask_bytes: __mmask32 = 0x0F_F0_FF_00;
@@ -998,25 +998,25 @@ mod tests {
     }
 
     #[simd_test(enable = "gfni")]
-    unsafe fn test_mm_gf2p8mul_epi8() {
+    fn test_mm_gf2p8mul_epi8() {
         let (left, right, expected) = generate_byte_mul_test_data();
 
         for i in 0..NUM_TEST_WORDS_128 {
-            let left = load_m128i_word(&left, i);
-            let right = load_m128i_word(&right, i);
-            let expected = load_m128i_word(&expected, i);
+            let left = unsafe { load_m128i_word(&left, i) };
+            let right = unsafe { load_m128i_word(&right, i) };
+            let expected = unsafe { load_m128i_word(&expected, i) };
             let result = _mm_gf2p8mul_epi8(left, right);
             assert_eq_m128i(result, expected);
         }
     }
 
     #[simd_test(enable = "gfni,avx512bw,avx512vl")]
-    unsafe fn test_mm_maskz_gf2p8mul_epi8() {
+    fn test_mm_maskz_gf2p8mul_epi8() {
         let (left, right, _expected) = generate_byte_mul_test_data();
 
         for i in 0..NUM_TEST_WORDS_128 {
-            let left = load_m128i_word(&left, i);
-            let right = load_m128i_word(&right, i);
+            let left = unsafe { load_m128i_word(&left, i) };
+            let right = unsafe { load_m128i_word(&right, i) };
             let result_zero = _mm_maskz_gf2p8mul_epi8(0, left, right);
             assert_eq_m128i(result_zero, _mm_setzero_si128());
             let mask_bytes: __mmask16 = 0x0F_F0;
@@ -1030,12 +1030,12 @@ mod tests {
     }
 
     #[simd_test(enable = "gfni,avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_gf2p8mul_epi8() {
+    fn test_mm_mask_gf2p8mul_epi8() {
         let (left, right, _expected) = generate_byte_mul_test_data();
 
         for i in 0..NUM_TEST_WORDS_128 {
-            let left = load_m128i_word(&left, i);
-            let right = load_m128i_word(&right, i);
+            let left = unsafe { load_m128i_word(&left, i) };
+            let right = unsafe { load_m128i_word(&right, i) };
             let result_left = _mm_mask_gf2p8mul_epi8(left, 0, left, right);
             assert_eq_m128i(result_left, left);
             let mask_bytes: __mmask16 = 0x0F_F0;
@@ -1048,7 +1048,7 @@ mod tests {
     }
 
     #[simd_test(enable = "gfni,avx512f")]
-    unsafe fn test_mm512_gf2p8affine_epi64_epi8() {
+    fn test_mm512_gf2p8affine_epi64_epi8() {
         let identity: i64 = 0x01_02_04_08_10_20_40_80;
         const IDENTITY_BYTE: i32 = 0;
         let constant: i64 = 0;
@@ -1061,20 +1061,20 @@ mod tests {
         let (matrices, vectors, references) = generate_affine_mul_test_data(IDENTITY_BYTE as u8);
 
         for i in 0..NUM_TEST_WORDS_512 {
-            let data = load_m512i_word(&bytes, i);
+            let data = unsafe { load_m512i_word(&bytes, i) };
             let result = _mm512_gf2p8affine_epi64_epi8::<IDENTITY_BYTE>(data, identity);
             assert_eq_m512i(result, data);
             let result = _mm512_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(data, constant);
             assert_eq_m512i(result, constant_reference);
-            let data = load_m512i_word(&more_bytes, i);
+            let data = unsafe { load_m512i_word(&more_bytes, i) };
             let result = _mm512_gf2p8affine_epi64_epi8::<IDENTITY_BYTE>(data, identity);
             assert_eq_m512i(result, data);
             let result = _mm512_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(data, constant);
             assert_eq_m512i(result, constant_reference);
 
-            let matrix = load_m512i_word(&matrices, i);
-            let vector = load_m512i_word(&vectors, i);
-            let reference = load_m512i_word(&references, i);
+            let matrix = unsafe { load_m512i_word(&matrices, i) };
+            let vector = unsafe { load_m512i_word(&vectors, i) };
+            let reference = unsafe { load_m512i_word(&references, i) };
 
             let result = _mm512_gf2p8affine_epi64_epi8::<IDENTITY_BYTE>(vector, matrix);
             assert_eq_m512i(result, reference);
@@ -1082,13 +1082,13 @@ mod tests {
     }
 
     #[simd_test(enable = "gfni,avx512bw")]
-    unsafe fn test_mm512_maskz_gf2p8affine_epi64_epi8() {
+    fn test_mm512_maskz_gf2p8affine_epi64_epi8() {
         const CONSTANT_BYTE: i32 = 0x63;
         let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
 
         for i in 0..NUM_TEST_WORDS_512 {
-            let matrix = load_m512i_word(&matrices, i);
-            let vector = load_m512i_word(&vectors, i);
+            let matrix = unsafe { load_m512i_word(&matrices, i) };
+            let vector = unsafe { load_m512i_word(&vectors, i) };
             let result_zero =
                 _mm512_maskz_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(0, vector, matrix);
             assert_eq_m512i(result_zero, _mm512_setzero_si512());
@@ -1104,13 +1104,13 @@ mod tests {
     }
 
     #[simd_test(enable = "gfni,avx512bw")]
-    unsafe fn test_mm512_mask_gf2p8affine_epi64_epi8() {
+    fn test_mm512_mask_gf2p8affine_epi64_epi8() {
         const CONSTANT_BYTE: i32 = 0x63;
         let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
 
         for i in 0..NUM_TEST_WORDS_512 {
-            let left = load_m512i_word(&vectors, i);
-            let right = load_m512i_word(&matrices, i);
+            let left = unsafe { load_m512i_word(&vectors, i) };
+            let right = unsafe { load_m512i_word(&matrices, i) };
             let result_left =
                 _mm512_mask_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(left, 0, left, right);
             assert_eq_m512i(result_left, left);
@@ -1125,7 +1125,7 @@ mod tests {
     }
 
     #[simd_test(enable = "gfni,avx")]
-    unsafe fn test_mm256_gf2p8affine_epi64_epi8() {
+    fn test_mm256_gf2p8affine_epi64_epi8() {
         let identity: i64 = 0x01_02_04_08_10_20_40_80;
         const IDENTITY_BYTE: i32 = 0;
         let constant: i64 = 0;
@@ -1138,20 +1138,20 @@ mod tests {
         let (matrices, vectors, references) = generate_affine_mul_test_data(IDENTITY_BYTE as u8);
 
         for i in 0..NUM_TEST_WORDS_256 {
-            let data = load_m256i_word(&bytes, i);
+            let data = unsafe { load_m256i_word(&bytes, i) };
             let result = _mm256_gf2p8affine_epi64_epi8::<IDENTITY_BYTE>(data, identity);
             assert_eq_m256i(result, data);
             let result = _mm256_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(data, constant);
             assert_eq_m256i(result, constant_reference);
-            let data = load_m256i_word(&more_bytes, i);
+            let data = unsafe { load_m256i_word(&more_bytes, i) };
             let result = _mm256_gf2p8affine_epi64_epi8::<IDENTITY_BYTE>(data, identity);
             assert_eq_m256i(result, data);
             let result = _mm256_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(data, constant);
             assert_eq_m256i(result, constant_reference);
 
-            let matrix = load_m256i_word(&matrices, i);
-            let vector = load_m256i_word(&vectors, i);
-            let reference = load_m256i_word(&references, i);
+            let matrix = unsafe { load_m256i_word(&matrices, i) };
+            let vector = unsafe { load_m256i_word(&vectors, i) };
+            let reference = unsafe { load_m256i_word(&references, i) };
 
             let result = _mm256_gf2p8affine_epi64_epi8::<IDENTITY_BYTE>(vector, matrix);
             assert_eq_m256i(result, reference);
@@ -1159,13 +1159,13 @@ mod tests {
     }
 
     #[simd_test(enable = "gfni,avx512bw,avx512vl")]
-    unsafe fn test_mm256_maskz_gf2p8affine_epi64_epi8() {
+    fn test_mm256_maskz_gf2p8affine_epi64_epi8() {
         const CONSTANT_BYTE: i32 = 0x63;
         let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
 
         for i in 0..NUM_TEST_WORDS_256 {
-            let matrix = load_m256i_word(&matrices, i);
-            let vector = load_m256i_word(&vectors, i);
+            let matrix = unsafe { load_m256i_word(&matrices, i) };
+            let vector = unsafe { load_m256i_word(&vectors, i) };
             let result_zero =
                 _mm256_maskz_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(0, vector, matrix);
             assert_eq_m256i(result_zero, _mm256_setzero_si256());
@@ -1181,13 +1181,13 @@ mod tests {
     }
 
     #[simd_test(enable = "gfni,avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_gf2p8affine_epi64_epi8() {
+    fn test_mm256_mask_gf2p8affine_epi64_epi8() {
         const CONSTANT_BYTE: i32 = 0x63;
         let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
 
         for i in 0..NUM_TEST_WORDS_256 {
-            let left = load_m256i_word(&vectors, i);
-            let right = load_m256i_word(&matrices, i);
+            let left = unsafe { load_m256i_word(&vectors, i) };
+            let right = unsafe { load_m256i_word(&matrices, i) };
             let result_left =
                 _mm256_mask_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(left, 0, left, right);
             assert_eq_m256i(result_left, left);
@@ -1202,7 +1202,7 @@ mod tests {
     }
 
     #[simd_test(enable = "gfni")]
-    unsafe fn test_mm_gf2p8affine_epi64_epi8() {
+    fn test_mm_gf2p8affine_epi64_epi8() {
         let identity: i64 = 0x01_02_04_08_10_20_40_80;
         const IDENTITY_BYTE: i32 = 0;
         let constant: i64 = 0;
@@ -1215,20 +1215,20 @@ mod tests {
         let (matrices, vectors, references) = generate_affine_mul_test_data(IDENTITY_BYTE as u8);
 
         for i in 0..NUM_TEST_WORDS_128 {
-            let data = load_m128i_word(&bytes, i);
+            let data = unsafe { load_m128i_word(&bytes, i) };
             let result = _mm_gf2p8affine_epi64_epi8::<IDENTITY_BYTE>(data, identity);
             assert_eq_m128i(result, data);
             let result = _mm_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(data, constant);
             assert_eq_m128i(result, constant_reference);
-            let data = load_m128i_word(&more_bytes, i);
+            let data = unsafe { load_m128i_word(&more_bytes, i) };
             let result = _mm_gf2p8affine_epi64_epi8::<IDENTITY_BYTE>(data, identity);
             assert_eq_m128i(result, data);
             let result = _mm_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(data, constant);
             assert_eq_m128i(result, constant_reference);
 
-            let matrix = load_m128i_word(&matrices, i);
-            let vector = load_m128i_word(&vectors, i);
-            let reference = load_m128i_word(&references, i);
+            let matrix = unsafe { load_m128i_word(&matrices, i) };
+            let vector = unsafe { load_m128i_word(&vectors, i) };
+            let reference = unsafe { load_m128i_word(&references, i) };
 
             let result = _mm_gf2p8affine_epi64_epi8::<IDENTITY_BYTE>(vector, matrix);
             assert_eq_m128i(result, reference);
@@ -1236,13 +1236,13 @@ mod tests {
     }
 
     #[simd_test(enable = "gfni,avx512bw,avx512vl")]
-    unsafe fn test_mm_maskz_gf2p8affine_epi64_epi8() {
+    fn test_mm_maskz_gf2p8affine_epi64_epi8() {
         const CONSTANT_BYTE: i32 = 0x63;
         let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
 
         for i in 0..NUM_TEST_WORDS_128 {
-            let matrix = load_m128i_word(&matrices, i);
-            let vector = load_m128i_word(&vectors, i);
+            let matrix = unsafe { load_m128i_word(&matrices, i) };
+            let vector = unsafe { load_m128i_word(&vectors, i) };
             let result_zero = _mm_maskz_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(0, vector, matrix);
             assert_eq_m128i(result_zero, _mm_setzero_si128());
             let mask_bytes: __mmask16 = 0x0F_F0;
@@ -1257,13 +1257,13 @@ mod tests {
     }
 
     #[simd_test(enable = "gfni,avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_gf2p8affine_epi64_epi8() {
+    fn test_mm_mask_gf2p8affine_epi64_epi8() {
         const CONSTANT_BYTE: i32 = 0x63;
         let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
 
         for i in 0..NUM_TEST_WORDS_128 {
-            let left = load_m128i_word(&vectors, i);
-            let right = load_m128i_word(&matrices, i);
+            let left = unsafe { load_m128i_word(&vectors, i) };
+            let right = unsafe { load_m128i_word(&matrices, i) };
             let result_left =
                 _mm_mask_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(left, 0, left, right);
             assert_eq_m128i(result_left, left);
@@ -1278,7 +1278,7 @@ mod tests {
     }
 
     #[simd_test(enable = "gfni,avx512f")]
-    unsafe fn test_mm512_gf2p8affineinv_epi64_epi8() {
+    fn test_mm512_gf2p8affineinv_epi64_epi8() {
         let identity: i64 = 0x01_02_04_08_10_20_40_80;
         const IDENTITY_BYTE: i32 = 0;
         const CONSTANT_BYTE: i32 = 0x63;
@@ -1288,8 +1288,8 @@ mod tests {
         let (inputs, results) = generate_inv_tests_data();
 
         for i in 0..NUM_BYTES_WORDS_512 {
-            let input = load_m512i_word(&inputs, i);
-            let reference = load_m512i_word(&results, i);
+            let input = unsafe { load_m512i_word(&inputs, i) };
+            let reference = unsafe { load_m512i_word(&results, i) };
             let result = _mm512_gf2p8affineinv_epi64_epi8::<IDENTITY_BYTE>(input, identity);
             let remultiplied = _mm512_gf2p8mul_epi8(result, input);
             assert_eq_m512i(remultiplied, reference);
@@ -1300,8 +1300,8 @@ mod tests {
             generate_affine_mul_test_data(CONSTANT_BYTE as u8);
 
         for i in 0..NUM_TEST_WORDS_512 {
-            let vector = load_m512i_word(&vectors, i);
-            let matrix = load_m512i_word(&matrices, i);
+            let vector = unsafe { load_m512i_word(&vectors, i) };
+            let matrix = unsafe { load_m512i_word(&matrices, i) };
 
             let inv_vec = _mm512_gf2p8affineinv_epi64_epi8::<IDENTITY_BYTE>(vector, identity);
             let reference = _mm512_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(inv_vec, matrix);
@@ -1314,21 +1314,21 @@ mod tests {
         let sbox_matrix = _mm512_set1_epi64(AES_S_BOX_MATRIX);
 
         for i in 0..NUM_BYTES_WORDS_512 {
-            let reference = load_m512i_word(&AES_S_BOX, i);
-            let input = load_m512i_word(&inputs, i);
+            let reference = unsafe { load_m512i_word(&AES_S_BOX, i) };
+            let input = unsafe { load_m512i_word(&inputs, i) };
             let result = _mm512_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(input, sbox_matrix);
             assert_eq_m512i(result, reference);
         }
     }
 
     #[simd_test(enable = "gfni,avx512bw")]
-    unsafe fn test_mm512_maskz_gf2p8affineinv_epi64_epi8() {
+    fn test_mm512_maskz_gf2p8affineinv_epi64_epi8() {
         const CONSTANT_BYTE: i32 = 0x63;
         let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
 
         for i in 0..NUM_TEST_WORDS_512 {
-            let matrix = load_m512i_word(&matrices, i);
-            let vector = load_m512i_word(&vectors, i);
+            let matrix = unsafe { load_m512i_word(&matrices, i) };
+            let vector = unsafe { load_m512i_word(&vectors, i) };
             let result_zero =
                 _mm512_maskz_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(0, vector, matrix);
             assert_eq_m512i(result_zero, _mm512_setzero_si512());
@@ -1344,13 +1344,13 @@ mod tests {
     }
 
     #[simd_test(enable = "gfni,avx512bw")]
-    unsafe fn test_mm512_mask_gf2p8affineinv_epi64_epi8() {
+    fn test_mm512_mask_gf2p8affineinv_epi64_epi8() {
         const CONSTANT_BYTE: i32 = 0x63;
         let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
 
         for i in 0..NUM_TEST_WORDS_512 {
-            let left = load_m512i_word(&vectors, i);
-            let right = load_m512i_word(&matrices, i);
+            let left = unsafe { load_m512i_word(&vectors, i) };
+            let right = unsafe { load_m512i_word(&matrices, i) };
             let result_left =
                 _mm512_mask_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(left, 0, left, right);
             assert_eq_m512i(result_left, left);
@@ -1366,7 +1366,7 @@ mod tests {
     }
 
     #[simd_test(enable = "gfni,avx")]
-    unsafe fn test_mm256_gf2p8affineinv_epi64_epi8() {
+    fn test_mm256_gf2p8affineinv_epi64_epi8() {
         let identity: i64 = 0x01_02_04_08_10_20_40_80;
         const IDENTITY_BYTE: i32 = 0;
         const CONSTANT_BYTE: i32 = 0x63;
@@ -1376,8 +1376,8 @@ mod tests {
         let (inputs, results) = generate_inv_tests_data();
 
         for i in 0..NUM_BYTES_WORDS_256 {
-            let input = load_m256i_word(&inputs, i);
-            let reference = load_m256i_word(&results, i);
+            let input = unsafe { load_m256i_word(&inputs, i) };
+            let reference = unsafe { load_m256i_word(&results, i) };
             let result = _mm256_gf2p8affineinv_epi64_epi8::<IDENTITY_BYTE>(input, identity);
             let remultiplied = _mm256_gf2p8mul_epi8(result, input);
             assert_eq_m256i(remultiplied, reference);
@@ -1388,8 +1388,8 @@ mod tests {
             generate_affine_mul_test_data(CONSTANT_BYTE as u8);
 
         for i in 0..NUM_TEST_WORDS_256 {
-            let vector = load_m256i_word(&vectors, i);
-            let matrix = load_m256i_word(&matrices, i);
+            let vector = unsafe { load_m256i_word(&vectors, i) };
+            let matrix = unsafe { load_m256i_word(&matrices, i) };
 
             let inv_vec = _mm256_gf2p8affineinv_epi64_epi8::<IDENTITY_BYTE>(vector, identity);
             let reference = _mm256_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(inv_vec, matrix);
@@ -1402,21 +1402,21 @@ mod tests {
         let sbox_matrix = _mm256_set1_epi64x(AES_S_BOX_MATRIX);
 
         for i in 0..NUM_BYTES_WORDS_256 {
-            let reference = load_m256i_word(&AES_S_BOX, i);
-            let input = load_m256i_word(&inputs, i);
+            let reference = unsafe { load_m256i_word(&AES_S_BOX, i) };
+            let input = unsafe { load_m256i_word(&inputs, i) };
             let result = _mm256_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(input, sbox_matrix);
             assert_eq_m256i(result, reference);
         }
     }
 
     #[simd_test(enable = "gfni,avx512bw,avx512vl")]
-    unsafe fn test_mm256_maskz_gf2p8affineinv_epi64_epi8() {
+    fn test_mm256_maskz_gf2p8affineinv_epi64_epi8() {
         const CONSTANT_BYTE: i32 = 0x63;
         let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
 
         for i in 0..NUM_TEST_WORDS_256 {
-            let matrix = load_m256i_word(&matrices, i);
-            let vector = load_m256i_word(&vectors, i);
+            let matrix = unsafe { load_m256i_word(&matrices, i) };
+            let vector = unsafe { load_m256i_word(&vectors, i) };
             let result_zero =
                 _mm256_maskz_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(0, vector, matrix);
             assert_eq_m256i(result_zero, _mm256_setzero_si256());
@@ -1432,13 +1432,13 @@ mod tests {
     }
 
     #[simd_test(enable = "gfni,avx512bw,avx512vl")]
-    unsafe fn test_mm256_mask_gf2p8affineinv_epi64_epi8() {
+    fn test_mm256_mask_gf2p8affineinv_epi64_epi8() {
         const CONSTANT_BYTE: i32 = 0x63;
         let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
 
         for i in 0..NUM_TEST_WORDS_256 {
-            let left = load_m256i_word(&vectors, i);
-            let right = load_m256i_word(&matrices, i);
+            let left = unsafe { load_m256i_word(&vectors, i) };
+            let right = unsafe { load_m256i_word(&matrices, i) };
             let result_left =
                 _mm256_mask_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(left, 0, left, right);
             assert_eq_m256i(result_left, left);
@@ -1454,7 +1454,7 @@ mod tests {
     }
 
     #[simd_test(enable = "gfni")]
-    unsafe fn test_mm_gf2p8affineinv_epi64_epi8() {
+    fn test_mm_gf2p8affineinv_epi64_epi8() {
         let identity: i64 = 0x01_02_04_08_10_20_40_80;
         const IDENTITY_BYTE: i32 = 0;
         const CONSTANT_BYTE: i32 = 0x63;
@@ -1464,8 +1464,8 @@ mod tests {
         let (inputs, results) = generate_inv_tests_data();
 
         for i in 0..NUM_BYTES_WORDS_128 {
-            let input = load_m128i_word(&inputs, i);
-            let reference = load_m128i_word(&results, i);
+            let input = unsafe { load_m128i_word(&inputs, i) };
+            let reference = unsafe { load_m128i_word(&results, i) };
             let result = _mm_gf2p8affineinv_epi64_epi8::<IDENTITY_BYTE>(input, identity);
             let remultiplied = _mm_gf2p8mul_epi8(result, input);
             assert_eq_m128i(remultiplied, reference);
@@ -1476,8 +1476,8 @@ mod tests {
             generate_affine_mul_test_data(CONSTANT_BYTE as u8);
 
         for i in 0..NUM_TEST_WORDS_128 {
-            let vector = load_m128i_word(&vectors, i);
-            let matrix = load_m128i_word(&matrices, i);
+            let vector = unsafe { load_m128i_word(&vectors, i) };
+            let matrix = unsafe { load_m128i_word(&matrices, i) };
 
             let inv_vec = _mm_gf2p8affineinv_epi64_epi8::<IDENTITY_BYTE>(vector, identity);
             let reference = _mm_gf2p8affine_epi64_epi8::<CONSTANT_BYTE>(inv_vec, matrix);
@@ -1490,21 +1490,21 @@ mod tests {
         let sbox_matrix = _mm_set1_epi64x(AES_S_BOX_MATRIX);
 
         for i in 0..NUM_BYTES_WORDS_128 {
-            let reference = load_m128i_word(&AES_S_BOX, i);
-            let input = load_m128i_word(&inputs, i);
+            let reference = unsafe { load_m128i_word(&AES_S_BOX, i) };
+            let input = unsafe { load_m128i_word(&inputs, i) };
             let result = _mm_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(input, sbox_matrix);
             assert_eq_m128i(result, reference);
         }
     }
 
     #[simd_test(enable = "gfni,avx512bw,avx512vl")]
-    unsafe fn test_mm_maskz_gf2p8affineinv_epi64_epi8() {
+    fn test_mm_maskz_gf2p8affineinv_epi64_epi8() {
         const CONSTANT_BYTE: i32 = 0x63;
         let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
 
         for i in 0..NUM_TEST_WORDS_128 {
-            let matrix = load_m128i_word(&matrices, i);
-            let vector = load_m128i_word(&vectors, i);
+            let matrix = unsafe { load_m128i_word(&matrices, i) };
+            let vector = unsafe { load_m128i_word(&vectors, i) };
             let result_zero =
                 _mm_maskz_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(0, vector, matrix);
             assert_eq_m128i(result_zero, _mm_setzero_si128());
@@ -1520,13 +1520,13 @@ mod tests {
     }
 
     #[simd_test(enable = "gfni,avx512bw,avx512vl")]
-    unsafe fn test_mm_mask_gf2p8affineinv_epi64_epi8() {
+    fn test_mm_mask_gf2p8affineinv_epi64_epi8() {
         const CONSTANT_BYTE: i32 = 0x63;
         let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
 
         for i in 0..NUM_TEST_WORDS_128 {
-            let left = load_m128i_word(&vectors, i);
-            let right = load_m128i_word(&matrices, i);
+            let left = unsafe { load_m128i_word(&vectors, i) };
+            let right = unsafe { load_m128i_word(&matrices, i) };
             let result_left =
                 _mm_mask_gf2p8affineinv_epi64_epi8::<CONSTANT_BYTE>(left, 0, left, right);
             assert_eq_m128i(result_left, left);
diff --git a/crates/core_arch/src/x86/kl.rs b/crates/core_arch/src/x86/kl.rs
index 26e5a46c62..7cb52847f5 100644
--- a/crates/core_arch/src/x86/kl.rs
+++ b/crates/core_arch/src/x86/kl.rs
@@ -352,45 +352,47 @@ mod tests {
     use stdarch_test::simd_test;
 
     #[target_feature(enable = "kl")]
-    unsafe fn encodekey128() -> [u8; 48] {
+    fn encodekey128() -> [u8; 48] {
         let mut handle = [0; 48];
-        let _ = _mm_encodekey128_u32(0, _mm_setzero_si128(), handle.as_mut_ptr());
+        let _ = unsafe { _mm_encodekey128_u32(0, _mm_setzero_si128(), handle.as_mut_ptr()) };
         handle
     }
 
     #[target_feature(enable = "kl")]
-    unsafe fn encodekey256() -> [u8; 64] {
+    fn encodekey256() -> [u8; 64] {
         let mut handle = [0; 64];
-        let _ = _mm_encodekey256_u32(
-            0,
-            _mm_setzero_si128(),
-            _mm_setzero_si128(),
-            handle.as_mut_ptr(),
-        );
+        let _ = unsafe {
+            _mm_encodekey256_u32(
+                0,
+                _mm_setzero_si128(),
+                _mm_setzero_si128(),
+                handle.as_mut_ptr(),
+            )
+        };
         handle
     }
 
     #[simd_test(enable = "kl")]
-    unsafe fn test_mm_encodekey128_u32() {
+    fn test_mm_encodekey128_u32() {
         encodekey128();
     }
 
     #[simd_test(enable = "kl")]
-    unsafe fn test_mm_encodekey256_u32() {
+    fn test_mm_encodekey256_u32() {
         encodekey256();
     }
 
     #[simd_test(enable = "kl")]
-    unsafe fn test_mm_aesenc128kl_u8() {
+    fn test_mm_aesenc128kl_u8() {
         let mut buffer = _mm_setzero_si128();
         let key = encodekey128();
 
         for _ in 0..100 {
-            let status = _mm_aesenc128kl_u8(&mut buffer, buffer, key.as_ptr());
+            let status = unsafe { _mm_aesenc128kl_u8(&mut buffer, buffer, key.as_ptr()) };
             assert_eq!(status, 0);
         }
         for _ in 0..100 {
-            let status = _mm_aesdec128kl_u8(&mut buffer, buffer, key.as_ptr());
+            let status = unsafe { _mm_aesdec128kl_u8(&mut buffer, buffer, key.as_ptr()) };
             assert_eq!(status, 0);
         }
 
@@ -398,16 +400,16 @@ mod tests {
     }
 
     #[simd_test(enable = "kl")]
-    unsafe fn test_mm_aesdec128kl_u8() {
+    fn test_mm_aesdec128kl_u8() {
         let mut buffer = _mm_setzero_si128();
         let key = encodekey128();
 
         for _ in 0..100 {
-            let status = _mm_aesdec128kl_u8(&mut buffer, buffer, key.as_ptr());
+            let status = unsafe { _mm_aesdec128kl_u8(&mut buffer, buffer, key.as_ptr()) };
             assert_eq!(status, 0);
         }
         for _ in 0..100 {
-            let status = _mm_aesenc128kl_u8(&mut buffer, buffer, key.as_ptr());
+            let status = unsafe { _mm_aesenc128kl_u8(&mut buffer, buffer, key.as_ptr()) };
             assert_eq!(status, 0);
         }
 
@@ -415,16 +417,16 @@ mod tests {
     }
 
     #[simd_test(enable = "kl")]
-    unsafe fn test_mm_aesenc256kl_u8() {
+    fn test_mm_aesenc256kl_u8() {
         let mut buffer = _mm_setzero_si128();
         let key = encodekey256();
 
         for _ in 0..100 {
-            let status = _mm_aesenc256kl_u8(&mut buffer, buffer, key.as_ptr());
+            let status = unsafe { _mm_aesenc256kl_u8(&mut buffer, buffer, key.as_ptr()) };
             assert_eq!(status, 0);
         }
         for _ in 0..100 {
-            let status = _mm_aesdec256kl_u8(&mut buffer, buffer, key.as_ptr());
+            let status = unsafe { _mm_aesdec256kl_u8(&mut buffer, buffer, key.as_ptr()) };
             assert_eq!(status, 0);
         }
 
@@ -432,16 +434,16 @@ mod tests {
     }
 
     #[simd_test(enable = "kl")]
-    unsafe fn test_mm_aesdec256kl_u8() {
+    fn test_mm_aesdec256kl_u8() {
         let mut buffer = _mm_setzero_si128();
         let key = encodekey256();
 
         for _ in 0..100 {
-            let status = _mm_aesdec256kl_u8(&mut buffer, buffer, key.as_ptr());
+            let status = unsafe { _mm_aesdec256kl_u8(&mut buffer, buffer, key.as_ptr()) };
             assert_eq!(status, 0);
         }
         for _ in 0..100 {
-            let status = _mm_aesenc256kl_u8(&mut buffer, buffer, key.as_ptr());
+            let status = unsafe { _mm_aesenc256kl_u8(&mut buffer, buffer, key.as_ptr()) };
             assert_eq!(status, 0);
         }
 
@@ -449,16 +451,20 @@ mod tests {
     }
 
     #[simd_test(enable = "widekl")]
-    unsafe fn test_mm_aesencwide128kl_u8() {
+    fn test_mm_aesencwide128kl_u8() {
         let mut buffer = [_mm_setzero_si128(); 8];
         let key = encodekey128();
 
         for _ in 0..100 {
-            let status = _mm_aesencwide128kl_u8(buffer.as_mut_ptr(), buffer.as_ptr(), key.as_ptr());
+            let status = unsafe {
+                _mm_aesencwide128kl_u8(buffer.as_mut_ptr(), buffer.as_ptr(), key.as_ptr())
+            };
             assert_eq!(status, 0);
         }
         for _ in 0..100 {
-            let status = _mm_aesdecwide128kl_u8(buffer.as_mut_ptr(), buffer.as_ptr(), key.as_ptr());
+            let status = unsafe {
+                _mm_aesdecwide128kl_u8(buffer.as_mut_ptr(), buffer.as_ptr(), key.as_ptr())
+            };
             assert_eq!(status, 0);
         }
 
@@ -468,16 +474,20 @@ mod tests {
     }
 
     #[simd_test(enable = "widekl")]
-    unsafe fn test_mm_aesdecwide128kl_u8() {
+    fn test_mm_aesdecwide128kl_u8() {
         let mut buffer = [_mm_setzero_si128(); 8];
         let key = encodekey128();
 
         for _ in 0..100 {
-            let status = _mm_aesdecwide128kl_u8(buffer.as_mut_ptr(), buffer.as_ptr(), key.as_ptr());
+            let status = unsafe {
+                _mm_aesdecwide128kl_u8(buffer.as_mut_ptr(), buffer.as_ptr(), key.as_ptr())
+            };
             assert_eq!(status, 0);
         }
         for _ in 0..100 {
-            let status = _mm_aesencwide128kl_u8(buffer.as_mut_ptr(), buffer.as_ptr(), key.as_ptr());
+            let status = unsafe {
+                _mm_aesencwide128kl_u8(buffer.as_mut_ptr(), buffer.as_ptr(), key.as_ptr())
+            };
             assert_eq!(status, 0);
         }
 
@@ -487,16 +497,20 @@ mod tests {
     }
 
     #[simd_test(enable = "widekl")]
-    unsafe fn test_mm_aesencwide256kl_u8() {
+    fn test_mm_aesencwide256kl_u8() {
         let mut buffer = [_mm_setzero_si128(); 8];
         let key = encodekey256();
 
         for _ in 0..100 {
-            let status = _mm_aesencwide256kl_u8(buffer.as_mut_ptr(), buffer.as_ptr(), key.as_ptr());
+            let status = unsafe {
+                _mm_aesencwide256kl_u8(buffer.as_mut_ptr(), buffer.as_ptr(), key.as_ptr())
+            };
             assert_eq!(status, 0);
         }
         for _ in 0..100 {
-            let status = _mm_aesdecwide256kl_u8(buffer.as_mut_ptr(), buffer.as_ptr(), key.as_ptr());
+            let status = unsafe {
+                _mm_aesdecwide256kl_u8(buffer.as_mut_ptr(), buffer.as_ptr(), key.as_ptr())
+            };
             assert_eq!(status, 0);
         }
 
@@ -506,16 +520,20 @@ mod tests {
     }
 
     #[simd_test(enable = "widekl")]
-    unsafe fn test_mm_aesdecwide256kl_u8() {
+    fn test_mm_aesdecwide256kl_u8() {
         let mut buffer = [_mm_setzero_si128(); 8];
         let key = encodekey256();
 
         for _ in 0..100 {
-            let status = _mm_aesdecwide256kl_u8(buffer.as_mut_ptr(), buffer.as_ptr(), key.as_ptr());
+            let status = unsafe {
+                _mm_aesdecwide256kl_u8(buffer.as_mut_ptr(), buffer.as_ptr(), key.as_ptr())
+            };
             assert_eq!(status, 0);
         }
         for _ in 0..100 {
-            let status = _mm_aesencwide256kl_u8(buffer.as_mut_ptr(), buffer.as_ptr(), key.as_ptr());
+            let status = unsafe {
+                _mm_aesencwide256kl_u8(buffer.as_mut_ptr(), buffer.as_ptr(), key.as_ptr())
+            };
             assert_eq!(status, 0);
         }
 
diff --git a/crates/core_arch/src/x86/mod.rs b/crates/core_arch/src/x86/mod.rs
index 79a593e647..f5a8acbd8f 100644
--- a/crates/core_arch/src/x86/mod.rs
+++ b/crates/core_arch/src/x86/mod.rs
@@ -401,7 +401,7 @@ types! {
 }
 
 types! {
-    #![unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+    #![stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 
     /// 128-bit wide set of 8 `f16` types, x86-specific
     ///
@@ -520,14 +520,14 @@ macro_rules! as_transmute {
     ($from:ty => $as_from:ident, $($as_to:ident -> $to:ident),* $(,)?) => {
         impl $from {$(
             #[inline]
-            pub(crate) fn $as_to(self) -> crate::core_arch::simd::$to {
+            pub(crate) const fn $as_to(self) -> crate::core_arch::simd::$to {
                 unsafe { transmute(self) }
             }
         )*}
         $(
             impl crate::core_arch::simd::$to {
                 #[inline]
-                pub(crate) fn $as_from(self) -> $from {
+                pub(crate) const fn $as_from(self) -> $from {
                     unsafe { transmute(self) }
                 }
             }
@@ -768,9 +768,17 @@ mod avxneconvert;
 pub use self::avxneconvert::*;
 
 mod avx512fp16;
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub use self::avx512fp16::*;
 
 mod kl;
 #[stable(feature = "keylocker_x86", since = "1.89.0")]
 pub use self::kl::*;
+
+mod movrs;
+#[unstable(feature = "movrs_target_feature", issue = "137976")]
+pub use self::movrs::*;
+
+mod avx512vp2intersect;
+#[unstable(feature = "stdarch_x86_avx512vp2intersect", issue = "111137")]
+pub use self::avx512vp2intersect::*;
diff --git a/crates/core_arch/src/x86/movrs.rs b/crates/core_arch/src/x86/movrs.rs
new file mode 100644
index 0000000000..d5f4d146c4
--- /dev/null
+++ b/crates/core_arch/src/x86/movrs.rs
@@ -0,0 +1,23 @@
+//! Read-shared move intrinsics
+
+#[cfg(test)]
+use stdarch_test::assert_instr;
+
+unsafe extern "unadjusted" {
+    #[link_name = "llvm.x86.prefetchrs"]
+    fn prefetchrs(p: *const u8);
+}
+
+/// Prefetches the cache line that contains address `p`, with an indication that the source memory
+/// location is likely to become read-shared by multiple processors, i.e., read in the future by at
+/// least one other processor before it is written, assuming it is ever written in the future.
+///
+/// Note: this intrinsic is safe to use even though it takes a raw pointer argument. In general, this
+/// cannot change the behavior of the program, including not trapping on invalid pointers.
+#[inline]
+#[target_feature(enable = "movrs")]
+#[cfg_attr(all(test, not(target_vendor = "apple")), assert_instr(prefetchrst2))]
+#[unstable(feature = "movrs_target_feature", issue = "137976")]
+pub fn _m_prefetchrs(p: *const u8) {
+    unsafe { prefetchrs(p) }
+}
diff --git a/crates/core_arch/src/x86/pclmulqdq.rs b/crates/core_arch/src/x86/pclmulqdq.rs
index cce6a51e2c..0f2769257f 100644
--- a/crates/core_arch/src/x86/pclmulqdq.rs
+++ b/crates/core_arch/src/x86/pclmulqdq.rs
@@ -3,7 +3,7 @@
 //! The reference is [Intel 64 and IA-32 Architectures Software Developer's
 //! Manual Volume 2: Instruction Set Reference, A-Z][intel64_ref] (p. 4-241).
 //!
-//! [intel64_ref]: http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
+//! [intel64_ref]: https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
 
 use crate::core_arch::x86::__m128i;
 
@@ -45,7 +45,7 @@ mod tests {
     use crate::core_arch::x86::*;
 
     #[simd_test(enable = "pclmulqdq")]
-    unsafe fn test_mm_clmulepi64_si128() {
+    fn test_mm_clmulepi64_si128() {
         // Constants taken from https://software.intel.com/sites/default/files/managed/72/cc/clmul-wp-rev-2.02-2014-04-20.pdf
         let a = _mm_set_epi64x(0x7b5b546573745665, 0x63746f725d53475d);
         let b = _mm_set_epi64x(0x4869285368617929, 0x5b477565726f6e5d);
diff --git a/crates/core_arch/src/x86/rdtsc.rs b/crates/core_arch/src/x86/rdtsc.rs
index 3b348153d6..89292d78af 100644
--- a/crates/core_arch/src/x86/rdtsc.rs
+++ b/crates/core_arch/src/x86/rdtsc.rs
@@ -64,16 +64,16 @@ mod tests {
     use crate::core_arch::x86::*;
     use stdarch_test::simd_test;
 
-    #[simd_test(enable = "sse2")]
-    unsafe fn test_rdtsc() {
-        let r = _rdtsc();
+    #[test]
+    fn test_rdtsc() {
+        let r = unsafe { _rdtsc() };
         assert_ne!(r, 0); // The chances of this being 0 are infinitesimal
     }
 
-    #[simd_test(enable = "sse2")]
-    unsafe fn test_rdtscp() {
+    #[test]
+    fn test_rdtscp() {
         let mut aux = 0;
-        let r = __rdtscp(&mut aux);
+        let r = unsafe { __rdtscp(&mut aux) };
         assert_ne!(r, 0); // The chances of this being 0 are infinitesimal
     }
 }
diff --git a/crates/core_arch/src/x86/rtm.rs b/crates/core_arch/src/x86/rtm.rs
index b807305d6a..c88bd6592d 100644
--- a/crates/core_arch/src/x86/rtm.rs
+++ b/crates/core_arch/src/x86/rtm.rs
@@ -9,7 +9,7 @@
 //! Intel's [programming considerations][intel_consid] details what sorts of instructions within a
 //! transaction are likely to cause an abort.
 //!
-//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
+//! [intel64_ref]: https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
 //! [wikipedia_rtm]: https://en.wikipedia.org/wiki/Transactional_Synchronization_Extensions#Restricted_Transactional_Memory
 //! [intel_consid]: https://software.intel.com/en-us/cpp-compiler-developer-guide-and-reference-intel-transactional-synchronization-extensions-intel-tsx-programming-considerations
 
@@ -120,13 +120,15 @@ mod tests {
     use crate::core_arch::x86::*;
 
     #[simd_test(enable = "rtm")]
-    unsafe fn test_xbegin() {
+    fn test_xbegin() {
         let mut x = 0;
         for _ in 0..10 {
-            let code = _xbegin();
+            let code = unsafe { _xbegin() };
             if code == _XBEGIN_STARTED {
                 x += 1;
-                _xend();
+                unsafe {
+                    _xend();
+                }
                 assert_eq!(x, 1);
                 break;
             }
@@ -135,19 +137,23 @@ mod tests {
     }
 
     #[simd_test(enable = "rtm")]
-    unsafe fn test_xabort() {
+    fn test_xabort() {
         const ABORT_CODE: u32 = 42;
         // aborting outside a transactional region does nothing
-        _xabort::<ABORT_CODE>();
+        unsafe {
+            _xabort::<ABORT_CODE>();
+        }
 
         for _ in 0..10 {
             let mut x = 0;
-            let code = rtm::_xbegin();
+            let code = unsafe { _xbegin() };
             if code == _XBEGIN_STARTED {
                 x += 1;
-                rtm::_xabort::<ABORT_CODE>();
+                unsafe {
+                    _xabort::<ABORT_CODE>();
+                }
             } else if code & _XABORT_EXPLICIT != 0 {
-                let test_abort_code = rtm::_xabort_code(code);
+                let test_abort_code = _xabort_code(code);
                 assert_eq!(test_abort_code, ABORT_CODE);
             }
             assert_eq!(x, 0);
@@ -155,14 +161,16 @@ mod tests {
     }
 
     #[simd_test(enable = "rtm")]
-    unsafe fn test_xtest() {
-        assert_eq!(_xtest(), 0);
+    fn test_xtest() {
+        assert_eq!(unsafe { _xtest() }, 0);
 
         for _ in 0..10 {
-            let code = rtm::_xbegin();
+            let code = unsafe { _xbegin() };
             if code == _XBEGIN_STARTED {
-                let in_tx = _xtest();
-                rtm::_xend();
+                let in_tx = unsafe { _xtest() };
+                unsafe {
+                    _xend();
+                }
 
                 // putting the assert inside the transaction would abort the transaction on fail
                 // without any output/panic/etc
diff --git a/crates/core_arch/src/x86/sha.rs b/crates/core_arch/src/x86/sha.rs
index 9ad1a9f14c..f8a3295d19 100644
--- a/crates/core_arch/src/x86/sha.rs
+++ b/crates/core_arch/src/x86/sha.rs
@@ -286,7 +286,7 @@ mod tests {
 
     #[simd_test(enable = "sha")]
     #[allow(overflowing_literals)]
-    unsafe fn test_mm_sha1msg1_epu32() {
+    fn test_mm_sha1msg1_epu32() {
         let a = _mm_set_epi64x(0xe9b5dba5b5c0fbcf, 0x71374491428a2f98);
         let b = _mm_set_epi64x(0xab1c5ed5923f82a4, 0x59f111f13956c25b);
         let expected = _mm_set_epi64x(0x98829f34f74ad457, 0xda2b1a44d0b5ad3c);
@@ -296,7 +296,7 @@ mod tests {
 
     #[simd_test(enable = "sha")]
     #[allow(overflowing_literals)]
-    unsafe fn test_mm_sha1msg2_epu32() {
+    fn test_mm_sha1msg2_epu32() {
         let a = _mm_set_epi64x(0xe9b5dba5b5c0fbcf, 0x71374491428a2f98);
         let b = _mm_set_epi64x(0xab1c5ed5923f82a4, 0x59f111f13956c25b);
         let expected = _mm_set_epi64x(0xf714b202d863d47d, 0x90c30d946b3d3b35);
@@ -306,7 +306,7 @@ mod tests {
 
     #[simd_test(enable = "sha")]
     #[allow(overflowing_literals)]
-    unsafe fn test_mm_sha1nexte_epu32() {
+    fn test_mm_sha1nexte_epu32() {
         let a = _mm_set_epi64x(0xe9b5dba5b5c0fbcf, 0x71374491428a2f98);
         let b = _mm_set_epi64x(0xab1c5ed5923f82a4, 0x59f111f13956c25b);
         let expected = _mm_set_epi64x(0x2589d5be923f82a4, 0x59f111f13956c25b);
@@ -316,7 +316,7 @@ mod tests {
 
     #[simd_test(enable = "sha")]
     #[allow(overflowing_literals)]
-    unsafe fn test_mm_sha1rnds4_epu32() {
+    fn test_mm_sha1rnds4_epu32() {
         let a = _mm_set_epi64x(0xe9b5dba5b5c0fbcf, 0x71374491428a2f98);
         let b = _mm_set_epi64x(0xab1c5ed5923f82a4, 0x59f111f13956c25b);
         let expected = _mm_set_epi64x(0x32b13cd8322f5268, 0xc54420862bd9246f);
@@ -338,7 +338,7 @@ mod tests {
 
     #[simd_test(enable = "sha")]
     #[allow(overflowing_literals)]
-    unsafe fn test_mm_sha256msg1_epu32() {
+    fn test_mm_sha256msg1_epu32() {
         let a = _mm_set_epi64x(0xe9b5dba5b5c0fbcf, 0x71374491428a2f98);
         let b = _mm_set_epi64x(0xab1c5ed5923f82a4, 0x59f111f13956c25b);
         let expected = _mm_set_epi64x(0xeb84973fd5cda67d, 0x2857b88f406b09ee);
@@ -348,7 +348,7 @@ mod tests {
 
     #[simd_test(enable = "sha")]
     #[allow(overflowing_literals)]
-    unsafe fn test_mm_sha256msg2_epu32() {
+    fn test_mm_sha256msg2_epu32() {
         let a = _mm_set_epi64x(0xe9b5dba5b5c0fbcf, 0x71374491428a2f98);
         let b = _mm_set_epi64x(0xab1c5ed5923f82a4, 0x59f111f13956c25b);
         let expected = _mm_set_epi64x(0xb58777ce887fd851, 0x15d1ec8b73ac8450);
@@ -358,7 +358,7 @@ mod tests {
 
     #[simd_test(enable = "sha")]
     #[allow(overflowing_literals)]
-    unsafe fn test_mm_sha256rnds2_epu32() {
+    fn test_mm_sha256rnds2_epu32() {
         let a = _mm_set_epi64x(0xe9b5dba5b5c0fbcf, 0x71374491428a2f98);
         let b = _mm_set_epi64x(0xab1c5ed5923f82a4, 0x59f111f13956c25b);
         let k = _mm_set_epi64x(0, 0x12835b01d807aa98);
@@ -381,7 +381,7 @@ mod tests {
     ];
 
     #[simd_test(enable = "sha512,avx")]
-    unsafe fn test_mm256_sha512msg1_epi64() {
+    fn test_mm256_sha512msg1_epi64() {
         fn s0(word: u64) -> u64 {
             word.rotate_right(1) ^ word.rotate_right(8) ^ (word >> 7)
         }
@@ -389,8 +389,8 @@ mod tests {
         let A = &DATA_64[0..4];
         let B = &DATA_64[4..6];
 
-        let a = _mm256_loadu_si256(A.as_ptr().cast());
-        let b = _mm_loadu_si128(B.as_ptr().cast());
+        let a = unsafe { _mm256_loadu_si256(A.as_ptr().cast()) };
+        let b = unsafe { _mm_loadu_si128(B.as_ptr().cast()) };
 
         let r = _mm256_sha512msg1_epi64(a, b);
 
@@ -405,7 +405,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sha512,avx")]
-    unsafe fn test_mm256_sha512msg2_epi64() {
+    fn test_mm256_sha512msg2_epi64() {
         fn s1(word: u64) -> u64 {
             word.rotate_right(19) ^ word.rotate_right(61) ^ (word >> 6)
         }
@@ -413,8 +413,8 @@ mod tests {
         let A = &DATA_64[0..4];
         let B = &DATA_64[4..8];
 
-        let a = _mm256_loadu_si256(A.as_ptr().cast());
-        let b = _mm256_loadu_si256(B.as_ptr().cast());
+        let a = unsafe { _mm256_loadu_si256(A.as_ptr().cast()) };
+        let b = unsafe { _mm256_loadu_si256(B.as_ptr().cast()) };
 
         let r = _mm256_sha512msg2_epi64(a, b);
 
@@ -431,7 +431,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sha512,avx")]
-    unsafe fn test_mm256_sha512rnds2_epi64() {
+    fn test_mm256_sha512rnds2_epi64() {
         fn cap_sigma0(word: u64) -> u64 {
             word.rotate_right(28) ^ word.rotate_right(34) ^ word.rotate_right(39)
         }
@@ -452,9 +452,9 @@ mod tests {
         let B = &DATA_64[4..8];
         let K = &DATA_64[8..10];
 
-        let a = _mm256_loadu_si256(A.as_ptr().cast());
-        let b = _mm256_loadu_si256(B.as_ptr().cast());
-        let k = _mm_loadu_si128(K.as_ptr().cast());
+        let a = unsafe { _mm256_loadu_si256(A.as_ptr().cast()) };
+        let b = unsafe { _mm256_loadu_si256(B.as_ptr().cast()) };
+        let k = unsafe { _mm_loadu_si128(K.as_ptr().cast()) };
 
         let r = _mm256_sha512rnds2_epi64(a, b, k);
 
@@ -482,7 +482,7 @@ mod tests {
     ];
 
     #[simd_test(enable = "sm3,avx")]
-    unsafe fn test_mm_sm3msg1_epi32() {
+    fn test_mm_sm3msg1_epi32() {
         fn p1(x: u32) -> u32 {
             x ^ x.rotate_left(15) ^ x.rotate_left(23)
         }
@@ -490,9 +490,9 @@ mod tests {
         let B = &DATA_32[4..8];
         let C = &DATA_32[8..12];
 
-        let a = _mm_loadu_si128(A.as_ptr().cast());
-        let b = _mm_loadu_si128(B.as_ptr().cast());
-        let c = _mm_loadu_si128(C.as_ptr().cast());
+        let a = unsafe { _mm_loadu_si128(A.as_ptr().cast()) };
+        let b = unsafe { _mm_loadu_si128(B.as_ptr().cast()) };
+        let c = unsafe { _mm_loadu_si128(C.as_ptr().cast()) };
 
         let r = _mm_sm3msg1_epi32(a, b, c);
 
@@ -507,14 +507,14 @@ mod tests {
     }
 
     #[simd_test(enable = "sm3,avx")]
-    unsafe fn test_mm_sm3msg2_epi32() {
+    fn test_mm_sm3msg2_epi32() {
         let A = &DATA_32[0..4];
         let B = &DATA_32[4..8];
         let C = &DATA_32[8..12];
 
-        let a = _mm_loadu_si128(A.as_ptr().cast());
-        let b = _mm_loadu_si128(B.as_ptr().cast());
-        let c = _mm_loadu_si128(C.as_ptr().cast());
+        let a = unsafe { _mm_loadu_si128(A.as_ptr().cast()) };
+        let b = unsafe { _mm_loadu_si128(B.as_ptr().cast()) };
+        let c = unsafe { _mm_loadu_si128(C.as_ptr().cast()) };
 
         let r = _mm_sm3msg2_epi32(a, b, c);
 
@@ -535,7 +535,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sm3,avx")]
-    unsafe fn test_mm_sm3rnds2_epi32() {
+    fn test_mm_sm3rnds2_epi32() {
         fn p0(x: u32) -> u32 {
             x ^ x.rotate_left(9) ^ x.rotate_left(17)
         }
@@ -560,9 +560,9 @@ mod tests {
         let B = &DATA_32[4..8];
         let C = &DATA_32[8..12];
 
-        let a = _mm_loadu_si128(A.as_ptr().cast());
-        let b = _mm_loadu_si128(B.as_ptr().cast());
-        let c = _mm_loadu_si128(C.as_ptr().cast());
+        let a = unsafe { _mm_loadu_si128(A.as_ptr().cast()) };
+        let b = unsafe { _mm_loadu_si128(B.as_ptr().cast()) };
+        let c = unsafe { _mm_loadu_si128(C.as_ptr().cast()) };
 
         let r = _mm_sm3rnds2_epi32::<{ ROUND as i32 }>(a, b, c);
 
@@ -641,7 +641,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sm4,avx")]
-    unsafe fn test_mm_sm4key4_epi32() {
+    fn test_mm_sm4key4_epi32() {
         fn l_key(x: u32) -> u32 {
             x ^ x.rotate_left(13) ^ x.rotate_left(23)
         }
@@ -652,8 +652,8 @@ mod tests {
         let A = &DATA_32[0..4];
         let B = &DATA_32[4..8];
 
-        let a = _mm_loadu_si128(A.as_ptr().cast());
-        let b = _mm_loadu_si128(B.as_ptr().cast());
+        let a = unsafe { _mm_loadu_si128(A.as_ptr().cast()) };
+        let b = unsafe { _mm_loadu_si128(B.as_ptr().cast()) };
 
         let r = _mm_sm4key4_epi32(a, b);
 
@@ -667,11 +667,11 @@ mod tests {
     }
 
     #[simd_test(enable = "sm4,avx")]
-    unsafe fn test_mm256_sm4key4_epi32() {
-        let a_low = _mm_loadu_si128(DATA_32.as_ptr().cast());
-        let a_high = _mm_loadu_si128(DATA_32[4..].as_ptr().cast());
-        let b_low = _mm_loadu_si128(DATA_32[8..].as_ptr().cast());
-        let b_high = _mm_loadu_si128(DATA_32[12..].as_ptr().cast());
+    fn test_mm256_sm4key4_epi32() {
+        let a_low = unsafe { _mm_loadu_si128(DATA_32.as_ptr().cast()) };
+        let a_high = unsafe { _mm_loadu_si128(DATA_32[4..].as_ptr().cast()) };
+        let b_low = unsafe { _mm_loadu_si128(DATA_32[8..].as_ptr().cast()) };
+        let b_high = unsafe { _mm_loadu_si128(DATA_32[12..].as_ptr().cast()) };
 
         let a = _mm256_set_m128i(a_high, a_low);
         let b = _mm256_set_m128i(b_high, b_low);
@@ -686,7 +686,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sm4,avx")]
-    unsafe fn test_mm_sm4rnds4_epi32() {
+    fn test_mm_sm4rnds4_epi32() {
         fn l_rnd(x: u32) -> u32 {
             x ^ x.rotate_left(2) ^ x.rotate_left(10) ^ x.rotate_left(18) ^ x.rotate_left(24)
         }
@@ -697,8 +697,8 @@ mod tests {
         let A = &DATA_32[0..4];
         let B = &DATA_32[4..8];
 
-        let a = _mm_loadu_si128(A.as_ptr().cast());
-        let b = _mm_loadu_si128(B.as_ptr().cast());
+        let a = unsafe { _mm_loadu_si128(A.as_ptr().cast()) };
+        let b = unsafe { _mm_loadu_si128(B.as_ptr().cast()) };
 
         let r = _mm_sm4rnds4_epi32(a, b);
 
@@ -712,11 +712,11 @@ mod tests {
     }
 
     #[simd_test(enable = "sm4,avx")]
-    unsafe fn test_mm256_sm4rnds4_epi32() {
-        let a_low = _mm_loadu_si128(DATA_32.as_ptr().cast());
-        let a_high = _mm_loadu_si128(DATA_32[4..].as_ptr().cast());
-        let b_low = _mm_loadu_si128(DATA_32[8..].as_ptr().cast());
-        let b_high = _mm_loadu_si128(DATA_32[12..].as_ptr().cast());
+    fn test_mm256_sm4rnds4_epi32() {
+        let a_low = unsafe { _mm_loadu_si128(DATA_32.as_ptr().cast()) };
+        let a_high = unsafe { _mm_loadu_si128(DATA_32[4..].as_ptr().cast()) };
+        let b_low = unsafe { _mm_loadu_si128(DATA_32[8..].as_ptr().cast()) };
+        let b_high = unsafe { _mm_loadu_si128(DATA_32[12..].as_ptr().cast()) };
 
         let a = _mm256_set_m128i(a_high, a_low);
         let b = _mm256_set_m128i(b_high, b_low);
diff --git a/crates/core_arch/src/x86/sse.rs b/crates/core_arch/src/x86/sse.rs
index 86f743e76d..8673872901 100644
--- a/crates/core_arch/src/x86/sse.rs
+++ b/crates/core_arch/src/x86/sse.rs
@@ -18,7 +18,8 @@ use stdarch_test::assert_instr;
 #[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(addss))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_add_ss(a: __m128, b: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_add_ss(a: __m128, b: __m128) -> __m128 {
     unsafe { simd_insert!(a, 0, _mm_cvtss_f32(a) + _mm_cvtss_f32(b)) }
 }
 
@@ -30,7 +31,8 @@ pub fn _mm_add_ss(a: __m128, b: __m128) -> __m128 {
 #[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(addps))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_add_ps(a: __m128, b: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_add_ps(a: __m128, b: __m128) -> __m128 {
     unsafe { simd_add(a, b) }
 }
 
@@ -42,7 +44,8 @@ pub fn _mm_add_ps(a: __m128, b: __m128) -> __m128 {
 #[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(subss))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_sub_ss(a: __m128, b: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_sub_ss(a: __m128, b: __m128) -> __m128 {
     unsafe { simd_insert!(a, 0, _mm_cvtss_f32(a) - _mm_cvtss_f32(b)) }
 }
 
@@ -54,7 +57,8 @@ pub fn _mm_sub_ss(a: __m128, b: __m128) -> __m128 {
 #[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(subps))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_sub_ps(a: __m128, b: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_sub_ps(a: __m128, b: __m128) -> __m128 {
     unsafe { simd_sub(a, b) }
 }
 
@@ -66,7 +70,8 @@ pub fn _mm_sub_ps(a: __m128, b: __m128) -> __m128 {
 #[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(mulss))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_mul_ss(a: __m128, b: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mul_ss(a: __m128, b: __m128) -> __m128 {
     unsafe { simd_insert!(a, 0, _mm_cvtss_f32(a) * _mm_cvtss_f32(b)) }
 }
 
@@ -78,7 +83,8 @@ pub fn _mm_mul_ss(a: __m128, b: __m128) -> __m128 {
 #[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(mulps))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_mul_ps(a: __m128, b: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mul_ps(a: __m128, b: __m128) -> __m128 {
     unsafe { simd_mul(a, b) }
 }
 
@@ -90,7 +96,8 @@ pub fn _mm_mul_ps(a: __m128, b: __m128) -> __m128 {
 #[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(divss))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_div_ss(a: __m128, b: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_div_ss(a: __m128, b: __m128) -> __m128 {
     unsafe { simd_insert!(a, 0, _mm_cvtss_f32(a) / _mm_cvtss_f32(b)) }
 }
 
@@ -102,7 +109,8 @@ pub fn _mm_div_ss(a: __m128, b: __m128) -> __m128 {
 #[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(divps))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_div_ps(a: __m128, b: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_div_ps(a: __m128, b: __m128) -> __m128 {
     unsafe { simd_div(a, b) }
 }
 
@@ -200,7 +208,7 @@ pub fn _mm_min_ss(a: __m128, b: __m128) -> __m128 {
 #[cfg_attr(test, assert_instr(minps))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
 pub fn _mm_min_ps(a: __m128, b: __m128) -> __m128 {
-    // See the `test_mm_min_ps` test why this can't be implemented using `simd_fmin`.
+    // See the `test_mm_min_ps` test why this can't be implemented using `simd_minimum_number_nsz`.
     unsafe { minps(a, b) }
 }
 
@@ -226,7 +234,7 @@ pub fn _mm_max_ss(a: __m128, b: __m128) -> __m128 {
 #[cfg_attr(test, assert_instr(maxps))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
 pub fn _mm_max_ps(a: __m128, b: __m128) -> __m128 {
-    // See the `test_mm_min_ps` test why this can't be implemented using `simd_fmax`.
+    // See the `test_mm_min_ps` test why this can't be implemented using `simd_maximum_number_nsz`.
     unsafe { maxps(a, b) }
 }
 
@@ -241,7 +249,8 @@ pub fn _mm_max_ps(a: __m128, b: __m128) -> __m128 {
     assert_instr(andps)
 )]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_and_ps(a: __m128, b: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_and_ps(a: __m128, b: __m128) -> __m128 {
     unsafe {
         let a: __m128i = mem::transmute(a);
         let b: __m128i = mem::transmute(b);
@@ -264,7 +273,8 @@ pub fn _mm_and_ps(a: __m128, b: __m128) -> __m128 {
     assert_instr(andnps)
 )]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_andnot_ps(a: __m128, b: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_andnot_ps(a: __m128, b: __m128) -> __m128 {
     unsafe {
         let a: __m128i = mem::transmute(a);
         let b: __m128i = mem::transmute(b);
@@ -284,7 +294,8 @@ pub fn _mm_andnot_ps(a: __m128, b: __m128) -> __m128 {
     assert_instr(orps)
 )]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_or_ps(a: __m128, b: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_or_ps(a: __m128, b: __m128) -> __m128 {
     unsafe {
         let a: __m128i = mem::transmute(a);
         let b: __m128i = mem::transmute(b);
@@ -304,7 +315,8 @@ pub fn _mm_or_ps(a: __m128, b: __m128) -> __m128 {
     assert_instr(xorps)
 )]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_xor_ps(a: __m128, b: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_xor_ps(a: __m128, b: __m128) -> __m128 {
     unsafe {
         let a: __m128i = mem::transmute(a);
         let b: __m128i = mem::transmute(b);
@@ -866,7 +878,8 @@ pub fn _mm_cvtt_ss2si(a: __m128) -> i32 {
 // No point in using assert_instrs. In Unix x86_64 calling convention this is a
 // no-op, and on msvc it's just a `mov`.
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_cvtss_f32(a: __m128) -> f32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cvtss_f32(a: __m128) -> f32 {
     unsafe { simd_extract!(a, 0) }
 }
 
@@ -881,7 +894,8 @@ pub fn _mm_cvtss_f32(a: __m128) -> f32 {
 #[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(cvtsi2ss))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_cvtsi32_ss(a: __m128, b: i32) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cvtsi32_ss(a: __m128, b: i32) -> __m128 {
     unsafe { simd_insert!(a, 0, b as f32) }
 }
 
@@ -904,7 +918,8 @@ pub fn _mm_cvt_si2ss(a: __m128, b: i32) -> __m128 {
 #[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(movss))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_set_ss(a: f32) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_set_ss(a: f32) -> __m128 {
     __m128([a, 0.0, 0.0, 0.0])
 }
 
@@ -915,8 +930,9 @@ pub fn _mm_set_ss(a: f32) -> __m128 {
 #[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(shufps))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_set1_ps(a: f32) -> __m128 {
-    __m128([a, a, a, a])
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_set1_ps(a: f32) -> __m128 {
+    f32x4::splat(a).as_m128()
 }
 
 /// Alias for [`_mm_set1_ps`](fn._mm_set1_ps.html)
@@ -926,7 +942,8 @@ pub fn _mm_set1_ps(a: f32) -> __m128 {
 #[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(shufps))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_set_ps1(a: f32) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_set_ps1(a: f32) -> __m128 {
     _mm_set1_ps(a)
 }
 
@@ -951,9 +968,10 @@ pub fn _mm_set_ps1(a: f32) -> __m128 {
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_set_ps)
 #[inline]
 #[target_feature(enable = "sse")]
-#[cfg_attr(test, assert_instr(unpcklps))]
+// This intrinsic has no corresponding instruction.
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_set_ps(a: f32, b: f32, c: f32, d: f32) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_set_ps(a: f32, b: f32, c: f32, d: f32) -> __m128 {
     __m128([d, c, b, a])
 }
 
@@ -979,7 +997,8 @@ pub fn _mm_set_ps(a: f32, b: f32, c: f32, d: f32) -> __m128 {
     assert_instr(movaps)
 )]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_setr_ps(a: f32, b: f32, c: f32, d: f32) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_setr_ps(a: f32, b: f32, c: f32, d: f32) -> __m128 {
     __m128([a, b, c, d])
 }
 
@@ -990,7 +1009,8 @@ pub fn _mm_setr_ps(a: f32, b: f32, c: f32, d: f32) -> __m128 {
 #[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(xorps))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_setzero_ps() -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_setzero_ps() -> __m128 {
     const { unsafe { mem::zeroed() } }
 }
 
@@ -1021,7 +1041,8 @@ pub const fn _MM_SHUFFLE(z: u32, y: u32, x: u32, w: u32) -> i32 {
 #[cfg_attr(test, assert_instr(shufps, MASK = 3))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_shuffle_ps<const MASK: i32>(a: __m128, b: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_shuffle_ps<const MASK: i32>(a: __m128, b: __m128) -> __m128 {
     static_assert_uimm_bits!(MASK, 8);
     unsafe {
         simd_shuffle!(
@@ -1045,7 +1066,8 @@ pub fn _mm_shuffle_ps<const MASK: i32>(a: __m128, b: __m128) -> __m128 {
 #[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(unpckhps))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_unpackhi_ps(a: __m128, b: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_unpackhi_ps(a: __m128, b: __m128) -> __m128 {
     unsafe { simd_shuffle!(a, b, [2, 6, 3, 7]) }
 }
 
@@ -1057,7 +1079,8 @@ pub fn _mm_unpackhi_ps(a: __m128, b: __m128) -> __m128 {
 #[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(unpcklps))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_unpacklo_ps(a: __m128, b: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_unpacklo_ps(a: __m128, b: __m128) -> __m128 {
     unsafe { simd_shuffle!(a, b, [0, 4, 1, 5]) }
 }
 
@@ -1069,7 +1092,8 @@ pub fn _mm_unpacklo_ps(a: __m128, b: __m128) -> __m128 {
 #[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(movhlps))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_movehl_ps(a: __m128, b: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_movehl_ps(a: __m128, b: __m128) -> __m128 {
     // TODO; figure why this is a different instruction on msvc?
     unsafe { simd_shuffle!(a, b, [6, 7, 2, 3]) }
 }
@@ -1082,7 +1106,8 @@ pub fn _mm_movehl_ps(a: __m128, b: __m128) -> __m128 {
 #[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(movlhps))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_movelh_ps(a: __m128, b: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_movelh_ps(a: __m128, b: __m128) -> __m128 {
     unsafe { simd_shuffle!(a, b, [0, 1, 4, 5]) }
 }
 
@@ -1096,12 +1121,13 @@ pub fn _mm_movelh_ps(a: __m128, b: __m128) -> __m128 {
 #[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(movmskps))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_movemask_ps(a: __m128) -> i32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_movemask_ps(a: __m128) -> i32 {
     // Propagate the highest bit to the rest, because simd_bitmask
     // requires all-1 or all-0.
     unsafe {
         let mask: i32x4 = simd_lt(transmute(a), i32x4::ZERO);
-        simd_bitmask::<i32x4, u8>(mask).into()
+        simd_bitmask::<i32x4, u8>(mask) as i32
     }
 }
 
@@ -1115,7 +1141,8 @@ pub fn _mm_movemask_ps(a: __m128) -> i32 {
 #[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(movss))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm_load_ss(p: *const f32) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_load_ss(p: *const f32) -> __m128 {
     __m128([*p, 0.0, 0.0, 0.0])
 }
 
@@ -1130,7 +1157,8 @@ pub unsafe fn _mm_load_ss(p: *const f32) -> __m128 {
 #[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(movss))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm_load1_ps(p: *const f32) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_load1_ps(p: *const f32) -> __m128 {
     let a = *p;
     __m128([a, a, a, a])
 }
@@ -1142,7 +1170,8 @@ pub unsafe fn _mm_load1_ps(p: *const f32) -> __m128 {
 #[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(movss))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm_load_ps1(p: *const f32) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_load_ps1(p: *const f32) -> __m128 {
     _mm_load1_ps(p)
 }
 
@@ -1166,7 +1195,8 @@ pub unsafe fn _mm_load_ps1(p: *const f32) -> __m128 {
 )]
 #[stable(feature = "simd_x86", since = "1.27.0")]
 #[allow(clippy::cast_ptr_alignment)]
-pub unsafe fn _mm_load_ps(p: *const f32) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_load_ps(p: *const f32) -> __m128 {
     *(p as *const __m128)
 }
 
@@ -1183,7 +1213,8 @@ pub unsafe fn _mm_load_ps(p: *const f32) -> __m128 {
 #[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(movups))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm_loadu_ps(p: *const f32) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_loadu_ps(p: *const f32) -> __m128 {
     // Note: Using `*p` would require `f32` alignment, but `movups` has no
     // alignment restrictions.
     let mut dst = _mm_undefined_ps();
@@ -1223,7 +1254,8 @@ pub unsafe fn _mm_loadu_ps(p: *const f32) -> __m128 {
     assert_instr(movaps)
 )]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm_loadr_ps(p: *const f32) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_loadr_ps(p: *const f32) -> __m128 {
     let a = _mm_load_ps(p);
     simd_shuffle!(a, a, [3, 2, 1, 0])
 }
@@ -1237,7 +1269,8 @@ pub unsafe fn _mm_loadr_ps(p: *const f32) -> __m128 {
 #[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(movss))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm_store_ss(p: *mut f32, a: __m128) {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_store_ss(p: *mut f32, a: __m128) {
     *p = simd_extract!(a, 0);
 }
 
@@ -1267,7 +1300,8 @@ pub unsafe fn _mm_store_ss(p: *mut f32, a: __m128) {
 )]
 #[stable(feature = "simd_x86", since = "1.27.0")]
 #[allow(clippy::cast_ptr_alignment)]
-pub unsafe fn _mm_store1_ps(p: *mut f32, a: __m128) {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_store1_ps(p: *mut f32, a: __m128) {
     let b: __m128 = simd_shuffle!(a, a, [0, 0, 0, 0]);
     *(p as *mut __m128) = b;
 }
@@ -1282,7 +1316,8 @@ pub unsafe fn _mm_store1_ps(p: *mut f32, a: __m128) {
     assert_instr(movaps)
 )]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm_store_ps1(p: *mut f32, a: __m128) {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_store_ps1(p: *mut f32, a: __m128) {
     _mm_store1_ps(p, a);
 }
 
@@ -1305,7 +1340,8 @@ pub unsafe fn _mm_store_ps1(p: *mut f32, a: __m128) {
 )]
 #[stable(feature = "simd_x86", since = "1.27.0")]
 #[allow(clippy::cast_ptr_alignment)]
-pub unsafe fn _mm_store_ps(p: *mut f32, a: __m128) {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_store_ps(p: *mut f32, a: __m128) {
     *(p as *mut __m128) = a;
 }
 
@@ -1320,7 +1356,8 @@ pub unsafe fn _mm_store_ps(p: *mut f32, a: __m128) {
 #[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(movups))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm_storeu_ps(p: *mut f32, a: __m128) {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_storeu_ps(p: *mut f32, a: __m128) {
     ptr::copy_nonoverlapping(
         ptr::addr_of!(a) as *const u8,
         p as *mut u8,
@@ -1352,7 +1389,8 @@ pub unsafe fn _mm_storeu_ps(p: *mut f32, a: __m128) {
 )]
 #[stable(feature = "simd_x86", since = "1.27.0")]
 #[allow(clippy::cast_ptr_alignment)]
-pub unsafe fn _mm_storer_ps(p: *mut f32, a: __m128) {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_storer_ps(p: *mut f32, a: __m128) {
     let b: __m128 = simd_shuffle!(a, a, [3, 2, 1, 0]);
     *(p as *mut __m128) = b;
 }
@@ -1370,7 +1408,8 @@ pub unsafe fn _mm_storer_ps(p: *mut f32, a: __m128) {
 #[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(movss))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_move_ss(a: __m128, b: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_move_ss(a: __m128, b: __m128) -> __m128 {
     unsafe { simd_shuffle!(a, b, [4, 1, 2, 3]) }
 }
 
@@ -1908,7 +1947,7 @@ pub fn _mm_prefetch<const STRATEGY: i32>(p: *const i8) {
     }
 }
 
-/// Returns vector of type __m128 with indeterminate elements.with indetermination elements.
+/// Returns vector of type __m128 with indeterminate elements.
 /// Despite using the word "undefined" (following Intel's naming scheme), this non-deterministically
 /// picks some valid value and is not equivalent to [`mem::MaybeUninit`].
 /// In practice, this is typically equivalent to [`mem::zeroed`].
@@ -1917,7 +1956,8 @@ pub fn _mm_prefetch<const STRATEGY: i32>(p: *const i8) {
 #[inline]
 #[target_feature(enable = "sse")]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_undefined_ps() -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_undefined_ps() -> __m128 {
     const { unsafe { mem::zeroed() } }
 }
 
@@ -1928,7 +1968,8 @@ pub fn _mm_undefined_ps() -> __m128 {
 #[allow(non_snake_case)]
 #[target_feature(enable = "sse")]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _MM_TRANSPOSE4_PS(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _MM_TRANSPOSE4_PS(
     row0: &mut __m128,
     row1: &mut __m128,
     row2: &mut __m128,
@@ -2037,7 +2078,8 @@ pub unsafe fn _mm_stream_ps(mem_addr: *mut f32, a: __m128) {
 
 #[cfg(test)]
 mod tests {
-    use crate::{hint::black_box, mem::transmute, ptr};
+    use crate::core_arch::assert_eq_const as assert_eq;
+    use crate::{hint::black_box, ptr};
     use std::boxed;
     use stdarch_test::simd_test;
 
@@ -2046,7 +2088,7 @@ mod tests {
     const NAN: f32 = f32::NAN;
 
     #[simd_test(enable = "sse")]
-    unsafe fn test_mm_add_ps() {
+    const fn test_mm_add_ps() {
         let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
         let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
         let r = _mm_add_ps(a, b);
@@ -2054,7 +2096,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse")]
-    unsafe fn test_mm_add_ss() {
+    const fn test_mm_add_ss() {
         let a = _mm_set_ps(-1.0, 5.0, 0.0, -10.0);
         let b = _mm_set_ps(-100.0, 20.0, 0.0, -5.0);
         let r = _mm_add_ss(a, b);
@@ -2062,7 +2104,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse")]
-    unsafe fn test_mm_sub_ps() {
+    const fn test_mm_sub_ps() {
         let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
         let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
         let r = _mm_sub_ps(a, b);
@@ -2070,7 +2112,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse")]
-    unsafe fn test_mm_sub_ss() {
+    const fn test_mm_sub_ss() {
         let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
         let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
         let r = _mm_sub_ss(a, b);
@@ -2078,7 +2120,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse")]
-    unsafe fn test_mm_mul_ps() {
+    const fn test_mm_mul_ps() {
         let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
         let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
         let r = _mm_mul_ps(a, b);
@@ -2086,7 +2128,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse")]
-    unsafe fn test_mm_mul_ss() {
+    const fn test_mm_mul_ss() {
         let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
         let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
         let r = _mm_mul_ss(a, b);
@@ -2094,7 +2136,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse")]
-    unsafe fn test_mm_div_ps() {
+    const fn test_mm_div_ps() {
         let a = _mm_setr_ps(-1.0, 5.0, 2.0, -10.0);
         let b = _mm_setr_ps(-100.0, 20.0, 0.2, -5.0);
         let r = _mm_div_ps(a, b);
@@ -2102,7 +2144,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse")]
-    unsafe fn test_mm_div_ss() {
+    const fn test_mm_div_ss() {
         let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
         let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
         let r = _mm_div_ss(a, b);
@@ -2110,7 +2152,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse")]
-    unsafe fn test_mm_sqrt_ss() {
+    fn test_mm_sqrt_ss() {
         let a = _mm_setr_ps(4.0, 13.0, 16.0, 100.0);
         let r = _mm_sqrt_ss(a);
         let e = _mm_setr_ps(2.0, 13.0, 16.0, 100.0);
@@ -2118,7 +2160,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse")]
-    unsafe fn test_mm_sqrt_ps() {
+    fn test_mm_sqrt_ps() {
         let a = _mm_setr_ps(4.0, 13.0, 16.0, 100.0);
         let r = _mm_sqrt_ps(a);
         let e = _mm_setr_ps(2.0, 3.6055512, 4.0, 10.0);
@@ -2126,7 +2168,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse")]
-    unsafe fn test_mm_rcp_ss() {
+    fn test_mm_rcp_ss() {
         let a = _mm_setr_ps(4.0, 13.0, 16.0, 100.0);
         let r = _mm_rcp_ss(a);
         let e = _mm_setr_ps(0.24993896, 13.0, 16.0, 100.0);
@@ -2138,7 +2180,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse")]
-    unsafe fn test_mm_rcp_ps() {
+    fn test_mm_rcp_ps() {
         let a = _mm_setr_ps(4.0, 13.0, 16.0, 100.0);
         let r = _mm_rcp_ps(a);
         let e = _mm_setr_ps(0.24993896, 0.0769043, 0.06248474, 0.0099983215);
@@ -2149,7 +2191,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse")]
-    unsafe fn test_mm_rsqrt_ss() {
+    fn test_mm_rsqrt_ss() {
         let a = _mm_setr_ps(4.0, 13.0, 16.0, 100.0);
         let r = _mm_rsqrt_ss(a);
         let e = _mm_setr_ps(0.49987793, 13.0, 16.0, 100.0);
@@ -2160,7 +2202,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse")]
-    unsafe fn test_mm_rsqrt_ps() {
+    fn test_mm_rsqrt_ps() {
         let a = _mm_setr_ps(4.0, 13.0, 16.0, 100.0);
         let r = _mm_rsqrt_ps(a);
         let e = _mm_setr_ps(0.49987793, 0.2772827, 0.24993896, 0.099990845);
@@ -2171,7 +2213,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse")]
-    unsafe fn test_mm_min_ss() {
+    fn test_mm_min_ss() {
         let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
         let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
         let r = _mm_min_ss(a, b);
@@ -2179,30 +2221,30 @@ mod tests {
     }
 
     #[simd_test(enable = "sse")]
-    unsafe fn test_mm_min_ps() {
+    fn test_mm_min_ps() {
         let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
         let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
         let r = _mm_min_ps(a, b);
         assert_eq_m128(r, _mm_setr_ps(-100.0, 5.0, 0.0, -10.0));
 
-        // `_mm_min_ps` can **not** be implemented using the `simd_min` rust intrinsic. `simd_min`
-        // is lowered by the llvm codegen backend to `llvm.minnum.v*` llvm intrinsic. This intrinsic
-        // doesn't specify how -0.0 is handled. Unfortunately it happens to behave different from
-        // the `minps` x86 instruction on x86. The `llvm.minnum.v*` llvm intrinsic equals
-        // `r1` to `a` and `r2` to `b`.
+        // `_mm_min_ps` can **not** be implemented using the `simd_minimum_number_nsz` rust
+        // intrinsic. That intrinsic is lowered by the llvm codegen backend to `llvm.minimumnum.v*`
+        // llvm intrinsic with the `nsz` attribute. The `nsz` attribute means -0.0 is handled
+        // non-deterministically. The `minps` x86 instruction however has a deterministic semantics
+        // for signed zeros.
         let a = _mm_setr_ps(-0.0, 0.0, 0.0, 0.0);
         let b = _mm_setr_ps(0.0, 0.0, 0.0, 0.0);
-        let r1: [u8; 16] = transmute(_mm_min_ps(a, b));
-        let r2: [u8; 16] = transmute(_mm_min_ps(b, a));
-        let a: [u8; 16] = transmute(a);
-        let b: [u8; 16] = transmute(b);
+        let r1 = _mm_min_ps(a, b).as_f32x4().to_bits();
+        let r2 = _mm_min_ps(b, a).as_f32x4().to_bits();
+        let a = a.as_f32x4().to_bits();
+        let b = b.as_f32x4().to_bits();
         assert_eq!(r1, b);
         assert_eq!(r2, a);
         assert_ne!(a, b); // sanity check that -0.0 is actually present
     }
 
     #[simd_test(enable = "sse")]
-    unsafe fn test_mm_max_ss() {
+    fn test_mm_max_ss() {
         let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
         let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
         let r = _mm_max_ss(a, b);
@@ -2210,7 +2252,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse")]
-    unsafe fn test_mm_max_ps() {
+    fn test_mm_max_ps() {
         let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
         let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
         let r = _mm_max_ps(a, b);
@@ -2219,67 +2261,67 @@ mod tests {
         // Check SSE-specific semantics for -0.0 handling.
         let a = _mm_setr_ps(-0.0, 0.0, 0.0, 0.0);
         let b = _mm_setr_ps(0.0, 0.0, 0.0, 0.0);
-        let r1: [u8; 16] = transmute(_mm_max_ps(a, b));
-        let r2: [u8; 16] = transmute(_mm_max_ps(b, a));
-        let a: [u8; 16] = transmute(a);
-        let b: [u8; 16] = transmute(b);
+        let r1 = _mm_max_ps(a, b).as_f32x4().to_bits();
+        let r2 = _mm_max_ps(b, a).as_f32x4().to_bits();
+        let a = a.as_f32x4().to_bits();
+        let b = b.as_f32x4().to_bits();
         assert_eq!(r1, b);
         assert_eq!(r2, a);
         assert_ne!(a, b); // sanity check that -0.0 is actually present
     }
 
     #[simd_test(enable = "sse")]
-    unsafe fn test_mm_and_ps() {
-        let a = transmute(u32x4::splat(0b0011));
-        let b = transmute(u32x4::splat(0b0101));
+    const fn test_mm_and_ps() {
+        let a = f32x4::from_bits(u32x4::splat(0b0011)).as_m128();
+        let b = f32x4::from_bits(u32x4::splat(0b0101)).as_m128();
         let r = _mm_and_ps(*black_box(&a), *black_box(&b));
-        let e = transmute(u32x4::splat(0b0001));
+        let e = f32x4::from_bits(u32x4::splat(0b0001)).as_m128();
         assert_eq_m128(r, e);
     }
 
     #[simd_test(enable = "sse")]
-    unsafe fn test_mm_andnot_ps() {
-        let a = transmute(u32x4::splat(0b0011));
-        let b = transmute(u32x4::splat(0b0101));
+    const fn test_mm_andnot_ps() {
+        let a = f32x4::from_bits(u32x4::splat(0b0011)).as_m128();
+        let b = f32x4::from_bits(u32x4::splat(0b0101)).as_m128();
         let r = _mm_andnot_ps(*black_box(&a), *black_box(&b));
-        let e = transmute(u32x4::splat(0b0100));
+        let e = f32x4::from_bits(u32x4::splat(0b0100)).as_m128();
         assert_eq_m128(r, e);
     }
 
     #[simd_test(enable = "sse")]
-    unsafe fn test_mm_or_ps() {
-        let a = transmute(u32x4::splat(0b0011));
-        let b = transmute(u32x4::splat(0b0101));
+    const fn test_mm_or_ps() {
+        let a = f32x4::from_bits(u32x4::splat(0b0011)).as_m128();
+        let b = f32x4::from_bits(u32x4::splat(0b0101)).as_m128();
         let r = _mm_or_ps(*black_box(&a), *black_box(&b));
-        let e = transmute(u32x4::splat(0b0111));
+        let e = f32x4::from_bits(u32x4::splat(0b0111)).as_m128();
         assert_eq_m128(r, e);
     }
 
     #[simd_test(enable = "sse")]
-    unsafe fn test_mm_xor_ps() {
-        let a = transmute(u32x4::splat(0b0011));
-        let b = transmute(u32x4::splat(0b0101));
+    const fn test_mm_xor_ps() {
+        let a = f32x4::from_bits(u32x4::splat(0b0011)).as_m128();
+        let b = f32x4::from_bits(u32x4::splat(0b0101)).as_m128();
         let r = _mm_xor_ps(*black_box(&a), *black_box(&b));
-        let e = transmute(u32x4::splat(0b0110));
+        let e = f32x4::from_bits(u32x4::splat(0b0110)).as_m128();
         assert_eq_m128(r, e);
     }
 
     #[simd_test(enable = "sse")]
-    unsafe fn test_mm_cmpeq_ss() {
+    fn test_mm_cmpeq_ss() {
         let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
         let b = _mm_setr_ps(-1.0, 5.0, 6.0, 7.0);
-        let r: u32x4 = transmute(_mm_cmpeq_ss(a, b));
-        let e: u32x4 = transmute(_mm_setr_ps(f32::from_bits(0), 2.0, 3.0, 4.0));
+        let r = _mm_cmpeq_ss(a, b).as_f32x4().to_bits();
+        let e = f32x4::new(f32::from_bits(0), 2.0, 3.0, 4.0).to_bits();
         assert_eq!(r, e);
 
         let b2 = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
-        let r2: u32x4 = transmute(_mm_cmpeq_ss(a, b2));
-        let e2: u32x4 = transmute(_mm_setr_ps(f32::from_bits(0xffffffff), 2.0, 3.0, 4.0));
+        let r2 = _mm_cmpeq_ss(a, b2).as_f32x4().to_bits();
+        let e2 = f32x4::new(f32::from_bits(0xffffffff), 2.0, 3.0, 4.0).to_bits();
         assert_eq!(r2, e2);
     }
 
     #[simd_test(enable = "sse")]
-    unsafe fn test_mm_cmplt_ss() {
+    fn test_mm_cmplt_ss() {
         let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
         let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
         let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
@@ -2289,21 +2331,21 @@ mod tests {
         let c1 = 0u32; // a.extract(0) < c.extract(0)
         let d1 = !0u32; // a.extract(0) < d.extract(0)
 
-        let rb: u32x4 = transmute(_mm_cmplt_ss(a, b));
-        let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
+        let rb = _mm_cmplt_ss(a, b).as_f32x4().to_bits();
+        let eb = f32x4::new(f32::from_bits(b1), 2.0, 3.0, 4.0).to_bits();
         assert_eq!(rb, eb);
 
-        let rc: u32x4 = transmute(_mm_cmplt_ss(a, c));
-        let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
+        let rc = _mm_cmplt_ss(a, c).as_f32x4().to_bits();
+        let ec = f32x4::new(f32::from_bits(c1), 2.0, 3.0, 4.0).to_bits();
         assert_eq!(rc, ec);
 
-        let rd: u32x4 = transmute(_mm_cmplt_ss(a, d));
-        let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
+        let rd = _mm_cmplt_ss(a, d).as_f32x4().to_bits();
+        let ed = f32x4::new(f32::from_bits(d1), 2.0, 3.0, 4.0).to_bits();
         assert_eq!(rd, ed);
     }
 
     #[simd_test(enable = "sse")]
-    unsafe fn test_mm_cmple_ss() {
+    fn test_mm_cmple_ss() {
         let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
         let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
         let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
@@ -2313,21 +2355,21 @@ mod tests {
         let c1 = !0u32; // a.extract(0) <= c.extract(0)
         let d1 = !0u32; // a.extract(0) <= d.extract(0)
 
-        let rb: u32x4 = transmute(_mm_cmple_ss(a, b));
-        let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
+        let rb = _mm_cmple_ss(a, b).as_f32x4().to_bits();
+        let eb = f32x4::new(f32::from_bits(b1), 2.0, 3.0, 4.0).to_bits();
         assert_eq!(rb, eb);
 
-        let rc: u32x4 = transmute(_mm_cmple_ss(a, c));
-        let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
+        let rc = _mm_cmple_ss(a, c).as_f32x4().to_bits();
+        let ec = f32x4::new(f32::from_bits(c1), 2.0, 3.0, 4.0).to_bits();
         assert_eq!(rc, ec);
 
-        let rd: u32x4 = transmute(_mm_cmple_ss(a, d));
-        let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
+        let rd = _mm_cmple_ss(a, d).as_f32x4().to_bits();
+        let ed = f32x4::new(f32::from_bits(d1), 2.0, 3.0, 4.0).to_bits();
         assert_eq!(rd, ed);
     }
 
     #[simd_test(enable = "sse")]
-    unsafe fn test_mm_cmpgt_ss() {
+    fn test_mm_cmpgt_ss() {
         let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
         let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
         let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
@@ -2337,21 +2379,21 @@ mod tests {
         let c1 = 0u32; // a.extract(0) > c.extract(0)
         let d1 = 0u32; // a.extract(0) > d.extract(0)
 
-        let rb: u32x4 = transmute(_mm_cmpgt_ss(a, b));
-        let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
+        let rb = _mm_cmpgt_ss(a, b).as_f32x4().to_bits();
+        let eb = f32x4::new(f32::from_bits(b1), 2.0, 3.0, 4.0).to_bits();
         assert_eq!(rb, eb);
 
-        let rc: u32x4 = transmute(_mm_cmpgt_ss(a, c));
-        let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
+        let rc = _mm_cmpgt_ss(a, c).as_f32x4().to_bits();
+        let ec = f32x4::new(f32::from_bits(c1), 2.0, 3.0, 4.0).to_bits();
         assert_eq!(rc, ec);
 
-        let rd: u32x4 = transmute(_mm_cmpgt_ss(a, d));
-        let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
+        let rd = _mm_cmpgt_ss(a, d).as_f32x4().to_bits();
+        let ed = f32x4::new(f32::from_bits(d1), 2.0, 3.0, 4.0).to_bits();
         assert_eq!(rd, ed);
     }
 
     #[simd_test(enable = "sse")]
-    unsafe fn test_mm_cmpge_ss() {
+    fn test_mm_cmpge_ss() {
         let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
         let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
         let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
@@ -2361,21 +2403,21 @@ mod tests {
         let c1 = !0u32; // a.extract(0) >= c.extract(0)
         let d1 = 0u32; // a.extract(0) >= d.extract(0)
 
-        let rb: u32x4 = transmute(_mm_cmpge_ss(a, b));
-        let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
+        let rb = _mm_cmpge_ss(a, b).as_f32x4().to_bits();
+        let eb = f32x4::new(f32::from_bits(b1), 2.0, 3.0, 4.0).to_bits();
         assert_eq!(rb, eb);
 
-        let rc: u32x4 = transmute(_mm_cmpge_ss(a, c));
-        let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
+        let rc = _mm_cmpge_ss(a, c).as_f32x4().to_bits();
+        let ec = f32x4::new(f32::from_bits(c1), 2.0, 3.0, 4.0).to_bits();
         assert_eq!(rc, ec);
 
-        let rd: u32x4 = transmute(_mm_cmpge_ss(a, d));
-        let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
+        let rd = _mm_cmpge_ss(a, d).as_f32x4().to_bits();
+        let ed = f32x4::new(f32::from_bits(d1), 2.0, 3.0, 4.0).to_bits();
         assert_eq!(rd, ed);
     }
 
     #[simd_test(enable = "sse")]
-    unsafe fn test_mm_cmpneq_ss() {
+    fn test_mm_cmpneq_ss() {
         let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
         let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
         let c = _mm_setr_ps(1.0, 5.0, 6.0, 7.0);
@@ -2385,21 +2427,21 @@ mod tests {
         let c1 = 0u32; // a.extract(0) != c.extract(0)
         let d1 = !0u32; // a.extract(0) != d.extract(0)
 
-        let rb: u32x4 = transmute(_mm_cmpneq_ss(a, b));
-        let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
+        let rb = _mm_cmpneq_ss(a, b).as_f32x4().to_bits();
+        let eb = f32x4::new(f32::from_bits(b1), 2.0, 3.0, 4.0).to_bits();
         assert_eq!(rb, eb);
 
-        let rc: u32x4 = transmute(_mm_cmpneq_ss(a, c));
-        let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
+        let rc = _mm_cmpneq_ss(a, c).as_f32x4().to_bits();
+        let ec = f32x4::new(f32::from_bits(c1), 2.0, 3.0, 4.0).to_bits();
         assert_eq!(rc, ec);
 
-        let rd: u32x4 = transmute(_mm_cmpneq_ss(a, d));
-        let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
+        let rd = _mm_cmpneq_ss(a, d).as_f32x4().to_bits();
+        let ed = f32x4::new(f32::from_bits(d1), 2.0, 3.0, 4.0).to_bits();
         assert_eq!(rd, ed);
     }
 
     #[simd_test(enable = "sse")]
-    unsafe fn test_mm_cmpnlt_ss() {
+    fn test_mm_cmpnlt_ss() {
         // TODO: this test is exactly the same as for `_mm_cmpge_ss`, but there
         // must be a difference. It may have to do with behavior in the
         // presence of NaNs (signaling or quiet). If so, we should add tests
@@ -2414,21 +2456,21 @@ mod tests {
         let c1 = !0u32; // a.extract(0) >= c.extract(0)
         let d1 = 0u32; // a.extract(0) >= d.extract(0)
 
-        let rb: u32x4 = transmute(_mm_cmpnlt_ss(a, b));
-        let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
+        let rb = _mm_cmpnlt_ss(a, b).as_f32x4().to_bits();
+        let eb = f32x4::new(f32::from_bits(b1), 2.0, 3.0, 4.0).to_bits();
         assert_eq!(rb, eb);
 
-        let rc: u32x4 = transmute(_mm_cmpnlt_ss(a, c));
-        let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
+        let rc = _mm_cmpnlt_ss(a, c).as_f32x4().to_bits();
+        let ec = f32x4::new(f32::from_bits(c1), 2.0, 3.0, 4.0).to_bits();
         assert_eq!(rc, ec);
 
-        let rd: u32x4 = transmute(_mm_cmpnlt_ss(a, d));
-        let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
+        let rd = _mm_cmpnlt_ss(a, d).as_f32x4().to_bits();
+        let ed = f32x4::new(f32::from_bits(d1), 2.0, 3.0, 4.0).to_bits();
         assert_eq!(rd, ed);
     }
 
     #[simd_test(enable = "sse")]
-    unsafe fn test_mm_cmpnle_ss() {
+    fn test_mm_cmpnle_ss() {
         // TODO: this test is exactly the same as for `_mm_cmpgt_ss`, but there
         // must be a difference. It may have to do with behavior in the
         // presence
@@ -2443,21 +2485,21 @@ mod tests {
         let c1 = 0u32; // a.extract(0) > c.extract(0)
         let d1 = 0u32; // a.extract(0) > d.extract(0)
 
-        let rb: u32x4 = transmute(_mm_cmpnle_ss(a, b));
-        let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
+        let rb = _mm_cmpnle_ss(a, b).as_f32x4().to_bits();
+        let eb = f32x4::new(f32::from_bits(b1), 2.0, 3.0, 4.0).to_bits();
         assert_eq!(rb, eb);
 
-        let rc: u32x4 = transmute(_mm_cmpnle_ss(a, c));
-        let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
+        let rc = _mm_cmpnle_ss(a, c).as_f32x4().to_bits();
+        let ec = f32x4::new(f32::from_bits(c1), 2.0, 3.0, 4.0).to_bits();
         assert_eq!(rc, ec);
 
-        let rd: u32x4 = transmute(_mm_cmpnle_ss(a, d));
-        let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
+        let rd = _mm_cmpnle_ss(a, d).as_f32x4().to_bits();
+        let ed = f32x4::new(f32::from_bits(d1), 2.0, 3.0, 4.0).to_bits();
         assert_eq!(rd, ed);
     }
 
     #[simd_test(enable = "sse")]
-    unsafe fn test_mm_cmpngt_ss() {
+    fn test_mm_cmpngt_ss() {
         // TODO: this test is exactly the same as for `_mm_cmple_ss`, but there
         // must be a difference. It may have to do with behavior in the
         // presence of NaNs (signaling or quiet). If so, we should add tests
@@ -2472,21 +2514,21 @@ mod tests {
         let c1 = !0u32; // a.extract(0) <= c.extract(0)
         let d1 = !0u32; // a.extract(0) <= d.extract(0)
 
-        let rb: u32x4 = transmute(_mm_cmpngt_ss(a, b));
-        let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
+        let rb = _mm_cmpngt_ss(a, b).as_f32x4().to_bits();
+        let eb = f32x4::new(f32::from_bits(b1), 2.0, 3.0, 4.0).to_bits();
         assert_eq!(rb, eb);
 
-        let rc: u32x4 = transmute(_mm_cmpngt_ss(a, c));
-        let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
+        let rc = _mm_cmpngt_ss(a, c).as_f32x4().to_bits();
+        let ec = f32x4::new(f32::from_bits(c1), 2.0, 3.0, 4.0).to_bits();
         assert_eq!(rc, ec);
 
-        let rd: u32x4 = transmute(_mm_cmpngt_ss(a, d));
-        let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
+        let rd = _mm_cmpngt_ss(a, d).as_f32x4().to_bits();
+        let ed = f32x4::new(f32::from_bits(d1), 2.0, 3.0, 4.0).to_bits();
         assert_eq!(rd, ed);
     }
 
     #[simd_test(enable = "sse")]
-    unsafe fn test_mm_cmpnge_ss() {
+    fn test_mm_cmpnge_ss() {
         // TODO: this test is exactly the same as for `_mm_cmplt_ss`, but there
         // must be a difference. It may have to do with behavior in the
         // presence of NaNs (signaling or quiet). If so, we should add tests
@@ -2501,21 +2543,21 @@ mod tests {
         let c1 = 0u32; // a.extract(0) < c.extract(0)
         let d1 = !0u32; // a.extract(0) < d.extract(0)
 
-        let rb: u32x4 = transmute(_mm_cmpnge_ss(a, b));
-        let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
+        let rb = _mm_cmpnge_ss(a, b).as_f32x4().to_bits();
+        let eb = f32x4::new(f32::from_bits(b1), 2.0, 3.0, 4.0).to_bits();
         assert_eq!(rb, eb);
 
-        let rc: u32x4 = transmute(_mm_cmpnge_ss(a, c));
-        let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
+        let rc = _mm_cmpnge_ss(a, c).as_f32x4().to_bits();
+        let ec = f32x4::new(f32::from_bits(c1), 2.0, 3.0, 4.0).to_bits();
         assert_eq!(rc, ec);
 
-        let rd: u32x4 = transmute(_mm_cmpnge_ss(a, d));
-        let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
+        let rd = _mm_cmpnge_ss(a, d).as_f32x4().to_bits();
+        let ed = f32x4::new(f32::from_bits(d1), 2.0, 3.0, 4.0).to_bits();
         assert_eq!(rd, ed);
     }
 
     #[simd_test(enable = "sse")]
-    unsafe fn test_mm_cmpord_ss() {
+    fn test_mm_cmpord_ss() {
         let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
         let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
         let c = _mm_setr_ps(NAN, 5.0, 6.0, 7.0);
@@ -2525,21 +2567,21 @@ mod tests {
         let c1 = 0u32; // a.extract(0) ord c.extract(0)
         let d1 = !0u32; // a.extract(0) ord d.extract(0)
 
-        let rb: u32x4 = transmute(_mm_cmpord_ss(a, b));
-        let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
+        let rb = _mm_cmpord_ss(a, b).as_f32x4().to_bits();
+        let eb = f32x4::new(f32::from_bits(b1), 2.0, 3.0, 4.0).to_bits();
         assert_eq!(rb, eb);
 
-        let rc: u32x4 = transmute(_mm_cmpord_ss(a, c));
-        let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
+        let rc = _mm_cmpord_ss(a, c).as_f32x4().to_bits();
+        let ec = f32x4::new(f32::from_bits(c1), 2.0, 3.0, 4.0).to_bits();
         assert_eq!(rc, ec);
 
-        let rd: u32x4 = transmute(_mm_cmpord_ss(a, d));
-        let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
+        let rd = _mm_cmpord_ss(a, d).as_f32x4().to_bits();
+        let ed = f32x4::new(f32::from_bits(d1), 2.0, 3.0, 4.0).to_bits();
         assert_eq!(rd, ed);
     }
 
     #[simd_test(enable = "sse")]
-    unsafe fn test_mm_cmpunord_ss() {
+    fn test_mm_cmpunord_ss() {
         let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
         let b = _mm_setr_ps(0.0, 5.0, 6.0, 7.0);
         let c = _mm_setr_ps(NAN, 5.0, 6.0, 7.0);
@@ -2549,165 +2591,165 @@ mod tests {
         let c1 = !0u32; // a.extract(0) unord c.extract(0)
         let d1 = 0u32; // a.extract(0) unord d.extract(0)
 
-        let rb: u32x4 = transmute(_mm_cmpunord_ss(a, b));
-        let eb: u32x4 = transmute(_mm_setr_ps(f32::from_bits(b1), 2.0, 3.0, 4.0));
+        let rb = _mm_cmpunord_ss(a, b).as_f32x4().to_bits();
+        let eb = f32x4::new(f32::from_bits(b1), 2.0, 3.0, 4.0).to_bits();
         assert_eq!(rb, eb);
 
-        let rc: u32x4 = transmute(_mm_cmpunord_ss(a, c));
-        let ec: u32x4 = transmute(_mm_setr_ps(f32::from_bits(c1), 2.0, 3.0, 4.0));
+        let rc = _mm_cmpunord_ss(a, c).as_f32x4().to_bits();
+        let ec = f32x4::new(f32::from_bits(c1), 2.0, 3.0, 4.0).to_bits();
         assert_eq!(rc, ec);
 
-        let rd: u32x4 = transmute(_mm_cmpunord_ss(a, d));
-        let ed: u32x4 = transmute(_mm_setr_ps(f32::from_bits(d1), 2.0, 3.0, 4.0));
+        let rd = _mm_cmpunord_ss(a, d).as_f32x4().to_bits();
+        let ed = f32x4::new(f32::from_bits(d1), 2.0, 3.0, 4.0).to_bits();
         assert_eq!(rd, ed);
     }
 
     #[simd_test(enable = "sse")]
-    unsafe fn test_mm_cmpeq_ps() {
+    fn test_mm_cmpeq_ps() {
         let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
         let b = _mm_setr_ps(15.0, 20.0, 1.0, NAN);
         let tru = !0u32;
         let fls = 0u32;
 
         let e = u32x4::new(fls, fls, tru, fls);
-        let r: u32x4 = transmute(_mm_cmpeq_ps(a, b));
+        let r = _mm_cmpeq_ps(a, b).as_f32x4().to_bits();
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "sse")]
-    unsafe fn test_mm_cmplt_ps() {
+    fn test_mm_cmplt_ps() {
         let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
         let b = _mm_setr_ps(15.0, 20.0, 1.0, NAN);
         let tru = !0u32;
         let fls = 0u32;
 
         let e = u32x4::new(tru, fls, fls, fls);
-        let r: u32x4 = transmute(_mm_cmplt_ps(a, b));
+        let r = _mm_cmplt_ps(a, b).as_f32x4().to_bits();
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "sse")]
-    unsafe fn test_mm_cmple_ps() {
+    fn test_mm_cmple_ps() {
         let a = _mm_setr_ps(10.0, 50.0, 1.0, 4.0);
         let b = _mm_setr_ps(15.0, 20.0, 1.0, NAN);
         let tru = !0u32;
         let fls = 0u32;
 
         let e = u32x4::new(tru, fls, tru, fls);
-        let r: u32x4 = transmute(_mm_cmple_ps(a, b));
+        let r = _mm_cmple_ps(a, b).as_f32x4().to_bits();
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "sse")]
-    unsafe fn test_mm_cmpgt_ps() {
+    fn test_mm_cmpgt_ps() {
         let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
         let b = _mm_setr_ps(15.0, 20.0, 1.0, 42.0);
         let tru = !0u32;
         let fls = 0u32;
 
         let e = u32x4::new(fls, tru, fls, fls);
-        let r: u32x4 = transmute(_mm_cmpgt_ps(a, b));
+        let r = _mm_cmpgt_ps(a, b).as_f32x4().to_bits();
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "sse")]
-    unsafe fn test_mm_cmpge_ps() {
+    fn test_mm_cmpge_ps() {
         let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
         let b = _mm_setr_ps(15.0, 20.0, 1.0, 42.0);
         let tru = !0u32;
         let fls = 0u32;
 
         let e = u32x4::new(fls, tru, tru, fls);
-        let r: u32x4 = transmute(_mm_cmpge_ps(a, b));
+        let r = _mm_cmpge_ps(a, b).as_f32x4().to_bits();
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "sse")]
-    unsafe fn test_mm_cmpneq_ps() {
+    fn test_mm_cmpneq_ps() {
         let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
         let b = _mm_setr_ps(15.0, 20.0, 1.0, NAN);
         let tru = !0u32;
         let fls = 0u32;
 
         let e = u32x4::new(tru, tru, fls, tru);
-        let r: u32x4 = transmute(_mm_cmpneq_ps(a, b));
+        let r = _mm_cmpneq_ps(a, b).as_f32x4().to_bits();
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "sse")]
-    unsafe fn test_mm_cmpnlt_ps() {
+    fn test_mm_cmpnlt_ps() {
         let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
         let b = _mm_setr_ps(15.0, 20.0, 1.0, 5.0);
         let tru = !0u32;
         let fls = 0u32;
 
         let e = u32x4::new(fls, tru, tru, tru);
-        let r: u32x4 = transmute(_mm_cmpnlt_ps(a, b));
+        let r = _mm_cmpnlt_ps(a, b).as_f32x4().to_bits();
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "sse")]
-    unsafe fn test_mm_cmpnle_ps() {
+    fn test_mm_cmpnle_ps() {
         let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
         let b = _mm_setr_ps(15.0, 20.0, 1.0, 5.0);
         let tru = !0u32;
         let fls = 0u32;
 
         let e = u32x4::new(fls, tru, fls, tru);
-        let r: u32x4 = transmute(_mm_cmpnle_ps(a, b));
+        let r = _mm_cmpnle_ps(a, b).as_f32x4().to_bits();
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "sse")]
-    unsafe fn test_mm_cmpngt_ps() {
+    fn test_mm_cmpngt_ps() {
         let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
         let b = _mm_setr_ps(15.0, 20.0, 1.0, 5.0);
         let tru = !0u32;
         let fls = 0u32;
 
         let e = u32x4::new(tru, fls, tru, tru);
-        let r: u32x4 = transmute(_mm_cmpngt_ps(a, b));
+        let r = _mm_cmpngt_ps(a, b).as_f32x4().to_bits();
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "sse")]
-    unsafe fn test_mm_cmpnge_ps() {
+    fn test_mm_cmpnge_ps() {
         let a = _mm_setr_ps(10.0, 50.0, 1.0, NAN);
         let b = _mm_setr_ps(15.0, 20.0, 1.0, 5.0);
         let tru = !0u32;
         let fls = 0u32;
 
         let e = u32x4::new(tru, fls, fls, tru);
-        let r: u32x4 = transmute(_mm_cmpnge_ps(a, b));
+        let r = _mm_cmpnge_ps(a, b).as_f32x4().to_bits();
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "sse")]
-    unsafe fn test_mm_cmpord_ps() {
+    fn test_mm_cmpord_ps() {
         let a = _mm_setr_ps(10.0, 50.0, NAN, NAN);
         let b = _mm_setr_ps(15.0, NAN, 1.0, NAN);
         let tru = !0u32;
         let fls = 0u32;
 
         let e = u32x4::new(tru, fls, fls, fls);
-        let r: u32x4 = transmute(_mm_cmpord_ps(a, b));
+        let r = _mm_cmpord_ps(a, b).as_f32x4().to_bits();
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "sse")]
-    unsafe fn test_mm_cmpunord_ps() {
+    fn test_mm_cmpunord_ps() {
         let a = _mm_setr_ps(10.0, 50.0, NAN, NAN);
         let b = _mm_setr_ps(15.0, NAN, 1.0, NAN);
         let tru = !0u32;
         let fls = 0u32;
 
         let e = u32x4::new(fls, tru, tru, tru);
-        let r: u32x4 = transmute(_mm_cmpunord_ps(a, b));
+        let r = _mm_cmpunord_ps(a, b).as_f32x4().to_bits();
         assert_eq!(r, e);
     }
 
     #[simd_test(enable = "sse")]
-    unsafe fn test_mm_comieq_ss() {
+    fn test_mm_comieq_ss() {
         let aa = &[3.0f32, 12.0, 23.0, NAN];
         let bb = &[3.0f32, 47.5, 1.5, NAN];
 
@@ -2728,7 +2770,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse")]
-    unsafe fn test_mm_comilt_ss() {
+    fn test_mm_comilt_ss() {
         let aa = &[3.0f32, 12.0, 23.0, NAN];
         let bb = &[3.0f32, 47.5, 1.5, NAN];
 
@@ -2749,7 +2791,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse")]
-    unsafe fn test_mm_comile_ss() {
+    fn test_mm_comile_ss() {
         let aa = &[3.0f32, 12.0, 23.0, NAN];
         let bb = &[3.0f32, 47.5, 1.5, NAN];
 
@@ -2770,18 +2812,36 @@ mod tests {
     }
 
     #[simd_test(enable = "sse")]
-    unsafe fn test_mm_comigt_ss() {
+    fn test_mm_comigt_ss() {
         let aa = &[3.0f32, 12.0, 23.0, NAN];
         let bb = &[3.0f32, 47.5, 1.5, NAN];
 
-        let ee = &[1i32, 0, 1, 0];
+        let ee = &[0i32, 0, 1, 0];
 
         for i in 0..4 {
             let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
             let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
 
-            let r = _mm_comige_ss(a, b);
+            let r = _mm_comigt_ss(a, b);
+
+            assert_eq!(
+                ee[i], r,
+                "_mm_comigt_ss({:?}, {:?}) = {}, expected: {} (i={})",
+                a, b, r, ee[i], i
+            );
+        }
+    }
+
+    #[simd_test(enable = "sse")]
+    fn test_mm_comige_ss() {
+        let aa = &[3.0f32, 23.0, 12.0, NAN];
+        let bb = &[3.0f32, 1.5, 47.5, NAN];
+        let ee = &[1i32, 1, 0, 0];
 
+        for i in 0..4 {
+            let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0);
+            let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0);
+            let r = _mm_comige_ss(a, b);
             assert_eq!(
                 ee[i], r,
                 "_mm_comige_ss({:?}, {:?}) = {}, expected: {} (i={})",
@@ -2791,7 +2851,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse")]
-    unsafe fn test_mm_comineq_ss() {
+    fn test_mm_comineq_ss() {
         let aa = &[3.0f32, 12.0, 23.0, NAN];
         let bb = &[3.0f32, 47.5, 1.5, NAN];
 
@@ -2812,7 +2872,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse")]
-    unsafe fn test_mm_ucomieq_ss() {
+    fn test_mm_ucomieq_ss() {
         let aa = &[3.0f32, 12.0, 23.0, NAN];
         let bb = &[3.0f32, 47.5, 1.5, NAN];
 
@@ -2833,7 +2893,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse")]
-    unsafe fn test_mm_ucomilt_ss() {
+    fn test_mm_ucomilt_ss() {
         let aa = &[3.0f32, 12.0, 23.0, NAN];
         let bb = &[3.0f32, 47.5, 1.5, NAN];
 
@@ -2854,7 +2914,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse")]
-    unsafe fn test_mm_ucomile_ss() {
+    fn test_mm_ucomile_ss() {
         let aa = &[3.0f32, 12.0, 23.0, NAN];
         let bb = &[3.0f32, 47.5, 1.5, NAN];
 
@@ -2875,7 +2935,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse")]
-    unsafe fn test_mm_ucomigt_ss() {
+    fn test_mm_ucomigt_ss() {
         let aa = &[3.0f32, 12.0, 23.0, NAN];
         let bb = &[3.0f32, 47.5, 1.5, NAN];
 
@@ -2896,7 +2956,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse")]
-    unsafe fn test_mm_ucomige_ss() {
+    fn test_mm_ucomige_ss() {
         let aa = &[3.0f32, 12.0, 23.0, NAN];
         let bb = &[3.0f32, 47.5, 1.5, NAN];
 
@@ -2917,7 +2977,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse")]
-    unsafe fn test_mm_ucomineq_ss() {
+    fn test_mm_ucomineq_ss() {
         let aa = &[3.0f32, 12.0, 23.0, NAN];
         let bb = &[3.0f32, 47.5, 1.5, NAN];
 
@@ -2937,92 +2997,123 @@ mod tests {
         }
     }
 
+    macro_rules! test_mm_cvtss_si32_impl {
+        ($alias:ident) => {
+            let inputs = &[42.0f32, -3.1, 4.0e10, 4.0e-20, NAN, 2147483500.1];
+            let result = &[42i32, -3, i32::MIN, 0, i32::MIN, 2147483520];
+            for i in 0..inputs.len() {
+                let x = _mm_setr_ps(inputs[i], 1.0, 3.0, 4.0);
+                let e = result[i];
+                let r = $alias(x);
+                assert_eq!(e, r, "TestCase #{} f({:?}) = {}, expected: {}", i, x, r, e);
+            }
+        };
+    }
+
     #[simd_test(enable = "sse")]
-    unsafe fn test_mm_cvtss_si32() {
-        let inputs = &[42.0f32, -3.1, 4.0e10, 4.0e-20, NAN, 2147483500.1];
-        let result = &[42i32, -3, i32::MIN, 0, i32::MIN, 2147483520];
-        for i in 0..inputs.len() {
-            let x = _mm_setr_ps(inputs[i], 1.0, 3.0, 4.0);
-            let e = result[i];
-            let r = _mm_cvtss_si32(x);
-            assert_eq!(
-                e, r,
-                "TestCase #{} _mm_cvtss_si32({:?}) = {}, expected: {}",
-                i, x, r, e
-            );
-        }
+    fn test_mm_cvtss_si32() {
+        test_mm_cvtss_si32_impl!(_mm_cvtss_si32);
     }
 
     #[simd_test(enable = "sse")]
-    unsafe fn test_mm_cvttss_si32() {
-        let inputs = &[
-            (42.0f32, 42i32),
-            (-31.4, -31),
-            (-33.5, -33),
-            (-34.5, -34),
-            (10.999, 10),
-            (-5.99, -5),
-            (4.0e10, i32::MIN),
-            (4.0e-10, 0),
-            (NAN, i32::MIN),
-            (2147483500.1, 2147483520),
-        ];
-        for (i, &(xi, e)) in inputs.iter().enumerate() {
-            let x = _mm_setr_ps(xi, 1.0, 3.0, 4.0);
-            let r = _mm_cvttss_si32(x);
-            assert_eq!(
-                e, r,
-                "TestCase #{} _mm_cvttss_si32({:?}) = {}, expected: {}",
-                i, x, r, e
-            );
-        }
+    fn test_mm_cvt_ss2si() {
+        test_mm_cvtss_si32_impl!(_mm_cvt_ss2si);
+    }
+
+    macro_rules! test_cvttss_si32_impl {
+        ($alias:ident) => {
+            let inputs = &[
+                (42.0f32, 42i32),
+                (-31.4, -31),
+                (-33.5, -33),
+                (-34.5, -34),
+                (10.999, 10),
+                (-5.99, -5),
+                (4.0e10, i32::MIN),
+                (4.0e-10, 0),
+                (NAN, i32::MIN),
+                (2147483500.1, 2147483520),
+            ];
+            for (i, &(xi, e)) in inputs.iter().enumerate() {
+                let x = _mm_setr_ps(xi, 1.0, 3.0, 4.0);
+                let r = $alias(x);
+                assert_eq!(e, r, "TestCase #{} f({:?}) = {}, expected: {}", i, x, r, e);
+            }
+        };
     }
 
     #[simd_test(enable = "sse")]
-    unsafe fn test_mm_cvtsi32_ss() {
-        let inputs = &[
-            (4555i32, 4555.0f32),
-            (322223333, 322223330.0),
-            (-432, -432.0),
-            (-322223333, -322223330.0),
-        ];
+    fn test_mm_cvttss_si32() {
+        test_cvttss_si32_impl!(_mm_cvttss_si32);
+    }
+
+    #[simd_test(enable = "sse")]
+    fn test_mm_cvtt_ss2si() {
+        test_cvttss_si32_impl!(_mm_cvtt_ss2si);
+    }
 
-        for &(x, f) in inputs.iter() {
+    macro_rules! test_mm_cvtsi32_ss_impl {
+        ($alias:ident) => {
             let a = _mm_setr_ps(5.0, 6.0, 7.0, 8.0);
-            let r = _mm_cvtsi32_ss(a, x);
-            let e = _mm_setr_ps(f, 6.0, 7.0, 8.0);
-            assert_eq_m128(e, r);
-        }
+
+            let r = $alias(a, 4555);
+            assert_eq_m128(_mm_setr_ps(4555.0, 6.0, 7.0, 8.0), r);
+
+            let r = $alias(a, 322223333);
+            assert_eq_m128(_mm_setr_ps(322223333.0, 6.0, 7.0, 8.0), r);
+
+            let r = $alias(a, -432);
+            assert_eq_m128(_mm_setr_ps(-432.0, 6.0, 7.0, 8.0), r);
+
+            let r = $alias(a, -322223333);
+            assert_eq_m128(_mm_setr_ps(-322223333.0, 6.0, 7.0, 8.0), r);
+        };
+    }
+
+    #[simd_test(enable = "sse")]
+    const fn test_mm_cvtsi32_ss() {
+        test_mm_cvtsi32_ss_impl!(_mm_cvtsi32_ss);
+    }
+
+    #[simd_test(enable = "sse")]
+    fn test_mm_cvt_si2ss() {
+        test_mm_cvtsi32_ss_impl!(_mm_cvt_si2ss);
     }
 
     #[simd_test(enable = "sse")]
-    unsafe fn test_mm_cvtss_f32() {
+    const fn test_mm_cvtss_f32() {
         let a = _mm_setr_ps(312.0134, 5.0, 6.0, 7.0);
         assert_eq!(_mm_cvtss_f32(a), 312.0134);
     }
 
     #[simd_test(enable = "sse")]
-    unsafe fn test_mm_set_ss() {
+    const fn test_mm_set_ss() {
         let r = _mm_set_ss(black_box(4.25));
         assert_eq_m128(r, _mm_setr_ps(4.25, 0.0, 0.0, 0.0));
     }
 
+    macro_rules! test_mm_set1_ps_impl {
+        ($alias:ident) => {
+            let r = $alias(black_box(4.25));
+            assert_eq!(get_m128(r, 0), 4.25);
+            assert_eq!(get_m128(r, 1), 4.25);
+            assert_eq!(get_m128(r, 2), 4.25);
+            assert_eq!(get_m128(r, 3), 4.25);
+        };
+    }
+
+    #[simd_test(enable = "sse")]
+    const fn test_mm_set1_ps() {
+        test_mm_set1_ps_impl!(_mm_set1_ps);
+    }
+
     #[simd_test(enable = "sse")]
-    unsafe fn test_mm_set1_ps() {
-        let r1 = _mm_set1_ps(black_box(4.25));
-        let r2 = _mm_set_ps1(black_box(4.25));
-        assert_eq!(get_m128(r1, 0), 4.25);
-        assert_eq!(get_m128(r1, 1), 4.25);
-        assert_eq!(get_m128(r1, 2), 4.25);
-        assert_eq!(get_m128(r1, 3), 4.25);
-        assert_eq!(get_m128(r2, 0), 4.25);
-        assert_eq!(get_m128(r2, 1), 4.25);
-        assert_eq!(get_m128(r2, 2), 4.25);
-        assert_eq!(get_m128(r2, 3), 4.25);
+    const fn test_mm_set_ps1() {
+        test_mm_set1_ps_impl!(_mm_set_ps1);
     }
 
     #[simd_test(enable = "sse")]
-    unsafe fn test_mm_set_ps() {
+    const fn test_mm_set_ps() {
         let r = _mm_set_ps(
             black_box(1.0),
             black_box(2.0),
@@ -3036,7 +3127,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse")]
-    unsafe fn test_mm_setr_ps() {
+    const fn test_mm_setr_ps() {
         let r = _mm_setr_ps(
             black_box(1.0),
             black_box(2.0),
@@ -3047,20 +3138,21 @@ mod tests {
     }
 
     #[simd_test(enable = "sse")]
-    unsafe fn test_mm_setzero_ps() {
+    const fn test_mm_setzero_ps() {
         let r = *black_box(&_mm_setzero_ps());
         assert_eq_m128(r, _mm_set1_ps(0.0));
     }
 
-    #[simd_test(enable = "sse")]
-    unsafe fn test_MM_SHUFFLE() {
+    #[simd_test]
+    #[allow(non_snake_case)]
+    const fn test_MM_SHUFFLE() {
         assert_eq!(_MM_SHUFFLE(0, 1, 1, 3), 0b00_01_01_11);
         assert_eq!(_MM_SHUFFLE(3, 1, 1, 0), 0b11_01_01_00);
         assert_eq!(_MM_SHUFFLE(1, 2, 2, 1), 0b01_10_10_01);
     }
 
     #[simd_test(enable = "sse")]
-    unsafe fn test_mm_shuffle_ps() {
+    const fn test_mm_shuffle_ps() {
         let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
         let b = _mm_setr_ps(5.0, 6.0, 7.0, 8.0);
         let r = _mm_shuffle_ps::<0b00_01_01_11>(a, b);
@@ -3068,7 +3160,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse")]
-    unsafe fn test_mm_unpackhi_ps() {
+    const fn test_mm_unpackhi_ps() {
         let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
         let b = _mm_setr_ps(5.0, 6.0, 7.0, 8.0);
         let r = _mm_unpackhi_ps(a, b);
@@ -3076,7 +3168,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse")]
-    unsafe fn test_mm_unpacklo_ps() {
+    const fn test_mm_unpacklo_ps() {
         let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
         let b = _mm_setr_ps(5.0, 6.0, 7.0, 8.0);
         let r = _mm_unpacklo_ps(a, b);
@@ -3084,7 +3176,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse")]
-    unsafe fn test_mm_movehl_ps() {
+    const fn test_mm_movehl_ps() {
         let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
         let b = _mm_setr_ps(5.0, 6.0, 7.0, 8.0);
         let r = _mm_movehl_ps(a, b);
@@ -3092,7 +3184,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse")]
-    unsafe fn test_mm_movelh_ps() {
+    const fn test_mm_movelh_ps() {
         let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
         let b = _mm_setr_ps(5.0, 6.0, 7.0, 8.0);
         let r = _mm_movelh_ps(a, b);
@@ -3100,187 +3192,152 @@ mod tests {
     }
 
     #[simd_test(enable = "sse")]
-    unsafe fn test_mm_load_ss() {
+    const fn test_mm_load_ss() {
         let a = 42.0f32;
-        let r = _mm_load_ss(ptr::addr_of!(a));
+        let r = unsafe { _mm_load_ss(ptr::addr_of!(a)) };
         assert_eq_m128(r, _mm_setr_ps(42.0, 0.0, 0.0, 0.0));
     }
 
-    #[simd_test(enable = "sse")]
-    unsafe fn test_mm_load1_ps() {
-        let a = 42.0f32;
-        let r = _mm_load1_ps(ptr::addr_of!(a));
-        assert_eq_m128(r, _mm_setr_ps(42.0, 42.0, 42.0, 42.0));
+    macro_rules! test_mm_load1_ps_impl {
+        ($alias:ident) => {
+            let a = 42.0f32;
+            let r = unsafe { $alias(ptr::addr_of!(a)) };
+            assert_eq_m128(r, _mm_setr_ps(42.0, 42.0, 42.0, 42.0));
+        };
     }
 
     #[simd_test(enable = "sse")]
-    unsafe fn test_mm_load_ps() {
-        let vals = &[1.0f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
+    const fn test_mm_load1_ps() {
+        test_mm_load1_ps_impl!(_mm_load1_ps);
+    }
 
-        let mut p = vals.as_ptr();
-        let mut fixup = 0.0f32;
+    #[simd_test(enable = "sse")]
+    const fn test_mm_load_ps1() {
+        test_mm_load1_ps_impl!(_mm_load_ps1);
+    }
 
-        // Make sure p is aligned, otherwise we might get a
-        // (signal: 11, SIGSEGV: invalid memory reference)
+    #[simd_test(enable = "sse")]
+    const fn test_mm_load_ps() {
+        let vals = Memory {
+            data: [1.0f32, 2.0, 3.0, 4.0],
+        };
 
-        let unalignment = (p as usize) & 0xf;
-        if unalignment != 0 {
-            let delta = (16 - unalignment) >> 2;
-            fixup = delta as f32;
-            p = p.add(delta);
-        }
+        // guaranteed to be aligned to 16 bytes
+        let p = vals.data.as_ptr();
 
-        let r = _mm_load_ps(p);
-        let e = _mm_add_ps(_mm_setr_ps(1.0, 2.0, 3.0, 4.0), _mm_set1_ps(fixup));
+        let r = unsafe { _mm_load_ps(p) };
+        let e = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
         assert_eq_m128(r, e);
     }
 
     #[simd_test(enable = "sse")]
-    unsafe fn test_mm_loadu_ps() {
+    const fn test_mm_loadu_ps() {
         let vals = &[1.0f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
-        let p = vals.as_ptr().add(3);
-        let r = _mm_loadu_ps(black_box(p));
+        let p = unsafe { vals.as_ptr().add(3) };
+        let r = unsafe { _mm_loadu_ps(black_box(p)) };
         assert_eq_m128(r, _mm_setr_ps(4.0, 5.0, 6.0, 7.0));
     }
 
     #[simd_test(enable = "sse")]
-    unsafe fn test_mm_loadr_ps() {
-        let vals = &[1.0f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
-
-        let mut p = vals.as_ptr();
-        let mut fixup = 0.0f32;
+    const fn test_mm_loadr_ps() {
+        let vals = Memory {
+            data: [1.0f32, 2.0, 3.0, 4.0],
+        };
 
-        // Make sure p is aligned, otherwise we might get a
-        // (signal: 11, SIGSEGV: invalid memory reference)
+        // guaranteed to be aligned to 16 bytes
+        let p = vals.data.as_ptr();
 
-        let unalignment = (p as usize) & 0xf;
-        if unalignment != 0 {
-            let delta = (16 - unalignment) >> 2;
-            fixup = delta as f32;
-            p = p.add(delta);
-        }
-
-        let r = _mm_loadr_ps(p);
-        let e = _mm_add_ps(_mm_setr_ps(4.0, 3.0, 2.0, 1.0), _mm_set1_ps(fixup));
+        let r = unsafe { _mm_loadr_ps(p) };
+        let e = _mm_setr_ps(4.0, 3.0, 2.0, 1.0);
         assert_eq_m128(r, e);
     }
 
     #[simd_test(enable = "sse")]
-    unsafe fn test_mm_store_ss() {
+    const fn test_mm_store_ss() {
         let mut vals = [0.0f32; 8];
         let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
-        _mm_store_ss(vals.as_mut_ptr().add(1), a);
+        unsafe {
+            _mm_store_ss(vals.as_mut_ptr().add(1), a);
+        }
 
         assert_eq!(vals[0], 0.0);
         assert_eq!(vals[1], 1.0);
         assert_eq!(vals[2], 0.0);
     }
 
-    #[simd_test(enable = "sse")]
-    unsafe fn test_mm_store1_ps() {
-        let mut vals = [0.0f32; 8];
-        let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
-
-        let mut ofs = 0;
-        let mut p = vals.as_mut_ptr();
-
-        if (p as usize) & 0xf != 0 {
-            ofs = (16 - ((p as usize) & 0xf)) >> 2;
-            p = p.add(ofs);
-        }
+    macro_rules! test_mm_store1_ps_impl {
+        ($alias:ident) => {
+            let mut vals = Memory { data: [0.0f32; 4] };
+            let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
+            let p = vals.data.as_mut_ptr();
+            unsafe { $alias(p, *black_box(&a)) };
+            assert_eq!(vals.data, [1.0, 1.0, 1.0, 1.0]);
+        };
+    }
 
-        _mm_store1_ps(p, *black_box(&a));
+    #[simd_test(enable = "sse")]
+    const fn test_mm_store1_ps() {
+        test_mm_store1_ps_impl!(_mm_store1_ps);
+    }
 
-        if ofs > 0 {
-            assert_eq!(vals[ofs - 1], 0.0);
-        }
-        assert_eq!(vals[ofs + 0], 1.0);
-        assert_eq!(vals[ofs + 1], 1.0);
-        assert_eq!(vals[ofs + 2], 1.0);
-        assert_eq!(vals[ofs + 3], 1.0);
-        assert_eq!(vals[ofs + 4], 0.0);
+    #[simd_test(enable = "sse")]
+    const fn test_mm_store_ps1() {
+        test_mm_store1_ps_impl!(_mm_store_ps1);
     }
 
     #[simd_test(enable = "sse")]
-    unsafe fn test_mm_store_ps() {
-        let mut vals = [0.0f32; 8];
+    const fn test_mm_store_ps() {
+        let mut vals = Memory { data: [0.0f32; 4] };
         let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
 
-        let mut ofs = 0;
-        let mut p = vals.as_mut_ptr();
+        // guaranteed to be aligned to 16 bytes
+        let p = vals.data.as_mut_ptr();
 
-        // Align p to 16-byte boundary
-        if (p as usize) & 0xf != 0 {
-            ofs = (16 - ((p as usize) & 0xf)) >> 2;
-            p = p.add(ofs);
+        unsafe {
+            _mm_store_ps(p, *black_box(&a));
         }
 
-        _mm_store_ps(p, *black_box(&a));
-
-        if ofs > 0 {
-            assert_eq!(vals[ofs - 1], 0.0);
-        }
-        assert_eq!(vals[ofs + 0], 1.0);
-        assert_eq!(vals[ofs + 1], 2.0);
-        assert_eq!(vals[ofs + 2], 3.0);
-        assert_eq!(vals[ofs + 3], 4.0);
-        assert_eq!(vals[ofs + 4], 0.0);
+        assert_eq!(vals.data, [1.0, 2.0, 3.0, 4.0]);
     }
 
     #[simd_test(enable = "sse")]
-    unsafe fn test_mm_storer_ps() {
-        let mut vals = [0.0f32; 8];
+    const fn test_mm_storer_ps() {
+        let mut vals = Memory { data: [0.0f32; 4] };
         let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
 
-        let mut ofs = 0;
-        let mut p = vals.as_mut_ptr();
+        // guaranteed to be aligned to 16 bytes
+        let p = vals.data.as_mut_ptr();
 
-        // Align p to 16-byte boundary
-        if (p as usize) & 0xf != 0 {
-            ofs = (16 - ((p as usize) & 0xf)) >> 2;
-            p = p.add(ofs);
+        unsafe {
+            _mm_storer_ps(p, *black_box(&a));
         }
 
-        _mm_storer_ps(p, *black_box(&a));
-
-        if ofs > 0 {
-            assert_eq!(vals[ofs - 1], 0.0);
-        }
-        assert_eq!(vals[ofs + 0], 4.0);
-        assert_eq!(vals[ofs + 1], 3.0);
-        assert_eq!(vals[ofs + 2], 2.0);
-        assert_eq!(vals[ofs + 3], 1.0);
-        assert_eq!(vals[ofs + 4], 0.0);
+        assert_eq!(vals.data, [4.0, 3.0, 2.0, 1.0]);
     }
 
     #[simd_test(enable = "sse")]
-    unsafe fn test_mm_storeu_ps() {
-        let mut vals = [0.0f32; 8];
+    const fn test_mm_storeu_ps() {
+        #[repr(align(16))]
+        struct Memory8 {
+            data: [f32; 8],
+        }
+
+        // guaranteed to be aligned to 16 bytes
+        let mut vals = Memory8 { data: [0.0f32; 8] };
         let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
 
-        let mut ofs = 0;
-        let mut p = vals.as_mut_ptr();
+        // guaranteed to be *not* aligned to 16 bytes
+        let p = unsafe { vals.data.as_mut_ptr().offset(1) };
 
-        // Make sure p is **not** aligned to 16-byte boundary
-        if (p as usize) & 0xf == 0 {
-            ofs = 1;
-            p = p.add(1);
+        unsafe {
+            _mm_storeu_ps(p, *black_box(&a));
         }
 
-        _mm_storeu_ps(p, *black_box(&a));
-
-        if ofs > 0 {
-            assert_eq!(vals[ofs - 1], 0.0);
-        }
-        assert_eq!(vals[ofs + 0], 1.0);
-        assert_eq!(vals[ofs + 1], 2.0);
-        assert_eq!(vals[ofs + 2], 3.0);
-        assert_eq!(vals[ofs + 3], 4.0);
-        assert_eq!(vals[ofs + 4], 0.0);
+        assert_eq!(vals.data, [0.0, 1.0, 2.0, 3.0, 4.0, 0.0, 0.0, 0.0]);
     }
 
     #[simd_test(enable = "sse")]
-    unsafe fn test_mm_move_ss() {
+    const fn test_mm_move_ss() {
         let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
         let b = _mm_setr_ps(5.0, 6.0, 7.0, 8.0);
 
@@ -3290,7 +3347,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse")]
-    unsafe fn test_mm_movemask_ps() {
+    const fn test_mm_movemask_ps() {
         let r = _mm_movemask_ps(_mm_setr_ps(-1.0, 5.0, -5.0, 0.0));
         assert_eq!(r, 0b0101);
 
@@ -3301,12 +3358,12 @@ mod tests {
     #[simd_test(enable = "sse")]
     // Miri cannot support this until it is clear how it fits in the Rust memory model
     #[cfg_attr(miri, ignore)]
-    unsafe fn test_mm_sfence() {
+    fn test_mm_sfence() {
         _mm_sfence();
     }
 
     #[simd_test(enable = "sse")]
-    unsafe fn test_MM_TRANSPOSE4_PS() {
+    const fn test_MM_TRANSPOSE4_PS() {
         let mut a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
         let mut b = _mm_setr_ps(5.0, 6.0, 7.0, 8.0);
         let mut c = _mm_setr_ps(9.0, 10.0, 11.0, 12.0);
@@ -3329,11 +3386,13 @@ mod tests {
     // Miri cannot support this until it is clear how it fits in the Rust memory model
     // (non-temporal store)
     #[cfg_attr(miri, ignore)]
-    unsafe fn test_mm_stream_ps() {
+    fn test_mm_stream_ps() {
         let a = _mm_set1_ps(7.0);
         let mut mem = Memory { data: [-1.0; 4] };
 
-        _mm_stream_ps(ptr::addr_of_mut!(mem.data[0]), a);
+        unsafe {
+            _mm_stream_ps(ptr::addr_of_mut!(mem.data[0]), a);
+        }
         _mm_sfence();
         for i in 0..4 {
             assert_eq!(mem.data[i], get_m128(a, i));
diff --git a/crates/core_arch/src/x86/sse2.rs b/crates/core_arch/src/x86/sse2.rs
index 11335856fb..1f97f3c69d 100644
--- a/crates/core_arch/src/x86/sse2.rs
+++ b/crates/core_arch/src/x86/sse2.rs
@@ -76,7 +76,8 @@ pub fn _mm_mfence() {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(paddb))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_add_epi8(a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_add_epi8(a: __m128i, b: __m128i) -> __m128i {
     unsafe { transmute(simd_add(a.as_i8x16(), b.as_i8x16())) }
 }
 
@@ -87,7 +88,8 @@ pub fn _mm_add_epi8(a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(paddw))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_add_epi16(a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_add_epi16(a: __m128i, b: __m128i) -> __m128i {
     unsafe { transmute(simd_add(a.as_i16x8(), b.as_i16x8())) }
 }
 
@@ -98,7 +100,8 @@ pub fn _mm_add_epi16(a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(paddd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_add_epi32(a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_add_epi32(a: __m128i, b: __m128i) -> __m128i {
     unsafe { transmute(simd_add(a.as_i32x4(), b.as_i32x4())) }
 }
 
@@ -109,7 +112,8 @@ pub fn _mm_add_epi32(a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(paddq))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_add_epi64(a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_add_epi64(a: __m128i, b: __m128i) -> __m128i {
     unsafe { transmute(simd_add(a.as_i64x2(), b.as_i64x2())) }
 }
 
@@ -120,7 +124,8 @@ pub fn _mm_add_epi64(a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(paddsb))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_adds_epi8(a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_adds_epi8(a: __m128i, b: __m128i) -> __m128i {
     unsafe { transmute(simd_saturating_add(a.as_i8x16(), b.as_i8x16())) }
 }
 
@@ -131,7 +136,8 @@ pub fn _mm_adds_epi8(a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(paddsw))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_adds_epi16(a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_adds_epi16(a: __m128i, b: __m128i) -> __m128i {
     unsafe { transmute(simd_saturating_add(a.as_i16x8(), b.as_i16x8())) }
 }
 
@@ -142,7 +148,8 @@ pub fn _mm_adds_epi16(a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(paddusb))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_adds_epu8(a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_adds_epu8(a: __m128i, b: __m128i) -> __m128i {
     unsafe { transmute(simd_saturating_add(a.as_u8x16(), b.as_u8x16())) }
 }
 
@@ -153,7 +160,8 @@ pub fn _mm_adds_epu8(a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(paddusw))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_adds_epu16(a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_adds_epu16(a: __m128i, b: __m128i) -> __m128i {
     unsafe { transmute(simd_saturating_add(a.as_u16x8(), b.as_u16x8())) }
 }
 
@@ -164,7 +172,8 @@ pub fn _mm_adds_epu16(a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(pavgb))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_avg_epu8(a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_avg_epu8(a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let a = simd_cast::<_, u16x16>(a.as_u8x16());
         let b = simd_cast::<_, u16x16>(b.as_u8x16());
@@ -180,7 +189,8 @@ pub fn _mm_avg_epu8(a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(pavgw))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_avg_epu16(a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_avg_epu16(a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let a = simd_cast::<_, u32x8>(a.as_u16x8());
         let b = simd_cast::<_, u32x8>(b.as_u16x8());
@@ -201,12 +211,19 @@ pub fn _mm_avg_epu16(a: __m128i, b: __m128i) -> __m128i {
 #[cfg_attr(test, assert_instr(pmaddwd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
 pub fn _mm_madd_epi16(a: __m128i, b: __m128i) -> __m128i {
-    unsafe {
-        let r: i32x8 = simd_mul(simd_cast(a.as_i16x8()), simd_cast(b.as_i16x8()));
-        let even: i32x4 = simd_shuffle!(r, r, [0, 2, 4, 6]);
-        let odd: i32x4 = simd_shuffle!(r, r, [1, 3, 5, 7]);
-        simd_add(even, odd).as_m128i()
-    }
+    // It's a trick used in the Adler-32 algorithm to perform a widening addition.
+    //
+    // ```rust
+    // #[target_feature(enable = "sse2")]
+    // unsafe fn widening_add(mad: __m128i) -> __m128i {
+    //     _mm_madd_epi16(mad, _mm_set1_epi16(1))
+    // }
+    // ```
+    //
+    // If we implement this using generic vector intrinsics, the optimizer
+    // will eliminate this pattern, and `pmaddwd` will no longer be emitted.
+    // For this reason, we use x86 intrinsics.
+    unsafe { transmute(pmaddwd(a.as_i16x8(), b.as_i16x8())) }
 }
 
 /// Compares packed 16-bit integers in `a` and `b`, and returns the packed
@@ -217,12 +234,9 @@ pub fn _mm_madd_epi16(a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(pmaxsw))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_max_epi16(a: __m128i, b: __m128i) -> __m128i {
-    unsafe {
-        let a = a.as_i16x8();
-        let b = b.as_i16x8();
-        transmute(simd_select::<i16x8, _>(simd_gt(a, b), a, b))
-    }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_max_epi16(a: __m128i, b: __m128i) -> __m128i {
+    unsafe { simd_imax(a.as_i16x8(), b.as_i16x8()).as_m128i() }
 }
 
 /// Compares packed unsigned 8-bit integers in `a` and `b`, and returns the
@@ -233,12 +247,9 @@ pub fn _mm_max_epi16(a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(pmaxub))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_max_epu8(a: __m128i, b: __m128i) -> __m128i {
-    unsafe {
-        let a = a.as_u8x16();
-        let b = b.as_u8x16();
-        transmute(simd_select::<i8x16, _>(simd_gt(a, b), a, b))
-    }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_max_epu8(a: __m128i, b: __m128i) -> __m128i {
+    unsafe { simd_imax(a.as_u8x16(), b.as_u8x16()).as_m128i() }
 }
 
 /// Compares packed 16-bit integers in `a` and `b`, and returns the packed
@@ -249,12 +260,9 @@ pub fn _mm_max_epu8(a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(pminsw))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_min_epi16(a: __m128i, b: __m128i) -> __m128i {
-    unsafe {
-        let a = a.as_i16x8();
-        let b = b.as_i16x8();
-        transmute(simd_select::<i16x8, _>(simd_lt(a, b), a, b))
-    }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_min_epi16(a: __m128i, b: __m128i) -> __m128i {
+    unsafe { simd_imin(a.as_i16x8(), b.as_i16x8()).as_m128i() }
 }
 
 /// Compares packed unsigned 8-bit integers in `a` and `b`, and returns the
@@ -265,12 +273,9 @@ pub fn _mm_min_epi16(a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(pminub))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_min_epu8(a: __m128i, b: __m128i) -> __m128i {
-    unsafe {
-        let a = a.as_u8x16();
-        let b = b.as_u8x16();
-        transmute(simd_select::<i8x16, _>(simd_lt(a, b), a, b))
-    }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_min_epu8(a: __m128i, b: __m128i) -> __m128i {
+    unsafe { simd_imin(a.as_u8x16(), b.as_u8x16()).as_m128i() }
 }
 
 /// Multiplies the packed 16-bit integers in `a` and `b`.
@@ -283,7 +288,8 @@ pub fn _mm_min_epu8(a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(pmulhw))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_mulhi_epi16(a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mulhi_epi16(a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let a = simd_cast::<_, i32x8>(a.as_i16x8());
         let b = simd_cast::<_, i32x8>(b.as_i16x8());
@@ -302,7 +308,8 @@ pub fn _mm_mulhi_epi16(a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(pmulhuw))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_mulhi_epu16(a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mulhi_epu16(a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let a = simd_cast::<_, u32x8>(a.as_u16x8());
         let b = simd_cast::<_, u32x8>(b.as_u16x8());
@@ -321,7 +328,8 @@ pub fn _mm_mulhi_epu16(a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(pmullw))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_mullo_epi16(a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mullo_epi16(a: __m128i, b: __m128i) -> __m128i {
     unsafe { transmute(simd_mul(a.as_i16x8(), b.as_i16x8())) }
 }
 
@@ -335,11 +343,12 @@ pub fn _mm_mullo_epi16(a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(pmuludq))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_mul_epu32(a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mul_epu32(a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let a = a.as_u64x2();
         let b = b.as_u64x2();
-        let mask = u64x2::splat(u32::MAX.into());
+        let mask = u64x2::splat(u32::MAX as u64);
         transmute(simd_mul(simd_and(a, mask), simd_and(b, mask)))
     }
 }
@@ -367,7 +376,8 @@ pub fn _mm_sad_epu8(a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(psubb))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_sub_epi8(a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_sub_epi8(a: __m128i, b: __m128i) -> __m128i {
     unsafe { transmute(simd_sub(a.as_i8x16(), b.as_i8x16())) }
 }
 
@@ -378,7 +388,8 @@ pub fn _mm_sub_epi8(a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(psubw))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_sub_epi16(a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_sub_epi16(a: __m128i, b: __m128i) -> __m128i {
     unsafe { transmute(simd_sub(a.as_i16x8(), b.as_i16x8())) }
 }
 
@@ -389,7 +400,8 @@ pub fn _mm_sub_epi16(a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(psubd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_sub_epi32(a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_sub_epi32(a: __m128i, b: __m128i) -> __m128i {
     unsafe { transmute(simd_sub(a.as_i32x4(), b.as_i32x4())) }
 }
 
@@ -400,7 +412,8 @@ pub fn _mm_sub_epi32(a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(psubq))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_sub_epi64(a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_sub_epi64(a: __m128i, b: __m128i) -> __m128i {
     unsafe { transmute(simd_sub(a.as_i64x2(), b.as_i64x2())) }
 }
 
@@ -412,7 +425,8 @@ pub fn _mm_sub_epi64(a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(psubsb))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_subs_epi8(a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_subs_epi8(a: __m128i, b: __m128i) -> __m128i {
     unsafe { transmute(simd_saturating_sub(a.as_i8x16(), b.as_i8x16())) }
 }
 
@@ -424,7 +438,8 @@ pub fn _mm_subs_epi8(a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(psubsw))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_subs_epi16(a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_subs_epi16(a: __m128i, b: __m128i) -> __m128i {
     unsafe { transmute(simd_saturating_sub(a.as_i16x8(), b.as_i16x8())) }
 }
 
@@ -436,7 +451,8 @@ pub fn _mm_subs_epi16(a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(psubusb))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_subs_epu8(a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_subs_epu8(a: __m128i, b: __m128i) -> __m128i {
     unsafe { transmute(simd_saturating_sub(a.as_u8x16(), b.as_u8x16())) }
 }
 
@@ -448,7 +464,8 @@ pub fn _mm_subs_epu8(a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(psubusw))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_subs_epu16(a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_subs_epu16(a: __m128i, b: __m128i) -> __m128i {
     unsafe { transmute(simd_saturating_sub(a.as_u16x8(), b.as_u16x8())) }
 }
 
@@ -460,7 +477,8 @@ pub fn _mm_subs_epu16(a: __m128i, b: __m128i) -> __m128i {
 #[cfg_attr(test, assert_instr(pslldq, IMM8 = 1))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_slli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_slli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
     static_assert_uimm_bits!(IMM8, 8);
     unsafe { _mm_slli_si128_impl::<IMM8>(a) }
 }
@@ -469,7 +487,8 @@ pub fn _mm_slli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
 /// `_mm_slli_si128` intrinsic into a compile-time constant.
 #[inline]
 #[target_feature(enable = "sse2")]
-unsafe fn _mm_slli_si128_impl<const IMM8: i32>(a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+const unsafe fn _mm_slli_si128_impl<const IMM8: i32>(a: __m128i) -> __m128i {
     const fn mask(shift: i32, i: u32) -> u32 {
         let shift = shift as u32 & 0xff;
         if shift > 15 { i } else { 16 - shift + i }
@@ -506,7 +525,8 @@ unsafe fn _mm_slli_si128_impl<const IMM8: i32>(a: __m128i) -> __m128i {
 #[cfg_attr(test, assert_instr(pslldq, IMM8 = 1))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_bslli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_bslli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         _mm_slli_si128_impl::<IMM8>(a)
@@ -521,7 +541,8 @@ pub fn _mm_bslli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
 #[cfg_attr(test, assert_instr(psrldq, IMM8 = 1))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_bsrli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_bsrli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
     unsafe {
         static_assert_uimm_bits!(IMM8, 8);
         _mm_srli_si128_impl::<IMM8>(a)
@@ -536,7 +557,8 @@ pub fn _mm_bsrli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
 #[cfg_attr(test, assert_instr(psllw, IMM8 = 7))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_slli_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_slli_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
     static_assert_uimm_bits!(IMM8, 8);
     unsafe {
         if IMM8 >= 16 {
@@ -567,7 +589,8 @@ pub fn _mm_sll_epi16(a: __m128i, count: __m128i) -> __m128i {
 #[cfg_attr(test, assert_instr(pslld, IMM8 = 7))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_slli_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_slli_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
     static_assert_uimm_bits!(IMM8, 8);
     unsafe {
         if IMM8 >= 32 {
@@ -598,7 +621,8 @@ pub fn _mm_sll_epi32(a: __m128i, count: __m128i) -> __m128i {
 #[cfg_attr(test, assert_instr(psllq, IMM8 = 7))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_slli_epi64<const IMM8: i32>(a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_slli_epi64<const IMM8: i32>(a: __m128i) -> __m128i {
     static_assert_uimm_bits!(IMM8, 8);
     unsafe {
         if IMM8 >= 64 {
@@ -630,7 +654,8 @@ pub fn _mm_sll_epi64(a: __m128i, count: __m128i) -> __m128i {
 #[cfg_attr(test, assert_instr(psraw, IMM8 = 1))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_srai_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_srai_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
     static_assert_uimm_bits!(IMM8, 8);
     unsafe { transmute(simd_shr(a.as_i16x8(), i16x8::splat(IMM8.min(15) as i16))) }
 }
@@ -656,7 +681,8 @@ pub fn _mm_sra_epi16(a: __m128i, count: __m128i) -> __m128i {
 #[cfg_attr(test, assert_instr(psrad, IMM8 = 1))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_srai_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_srai_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
     static_assert_uimm_bits!(IMM8, 8);
     unsafe { transmute(simd_shr(a.as_i32x4(), i32x4::splat(IMM8.min(31)))) }
 }
@@ -681,7 +707,8 @@ pub fn _mm_sra_epi32(a: __m128i, count: __m128i) -> __m128i {
 #[cfg_attr(test, assert_instr(psrldq, IMM8 = 1))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_srli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_srli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
     static_assert_uimm_bits!(IMM8, 8);
     unsafe { _mm_srli_si128_impl::<IMM8>(a) }
 }
@@ -690,7 +717,8 @@ pub fn _mm_srli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
 /// `_mm_srli_si128` intrinsic into a compile-time constant.
 #[inline]
 #[target_feature(enable = "sse2")]
-unsafe fn _mm_srli_si128_impl<const IMM8: i32>(a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+const unsafe fn _mm_srli_si128_impl<const IMM8: i32>(a: __m128i) -> __m128i {
     const fn mask(shift: i32, i: u32) -> u32 {
         if (shift as u32) > 15 {
             i + 16
@@ -732,7 +760,8 @@ unsafe fn _mm_srli_si128_impl<const IMM8: i32>(a: __m128i) -> __m128i {
 #[cfg_attr(test, assert_instr(psrlw, IMM8 = 1))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_srli_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_srli_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
     static_assert_uimm_bits!(IMM8, 8);
     unsafe {
         if IMM8 >= 16 {
@@ -764,7 +793,8 @@ pub fn _mm_srl_epi16(a: __m128i, count: __m128i) -> __m128i {
 #[cfg_attr(test, assert_instr(psrld, IMM8 = 8))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_srli_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_srli_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
     static_assert_uimm_bits!(IMM8, 8);
     unsafe {
         if IMM8 >= 32 {
@@ -796,7 +826,8 @@ pub fn _mm_srl_epi32(a: __m128i, count: __m128i) -> __m128i {
 #[cfg_attr(test, assert_instr(psrlq, IMM8 = 1))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_srli_epi64<const IMM8: i32>(a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_srli_epi64<const IMM8: i32>(a: __m128i) -> __m128i {
     static_assert_uimm_bits!(IMM8, 8);
     unsafe {
         if IMM8 >= 64 {
@@ -827,7 +858,8 @@ pub fn _mm_srl_epi64(a: __m128i, count: __m128i) -> __m128i {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(andps))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_and_si128(a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_and_si128(a: __m128i, b: __m128i) -> __m128i {
     unsafe { simd_and(a, b) }
 }
 
@@ -839,7 +871,8 @@ pub fn _mm_and_si128(a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(andnps))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_andnot_si128(a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_andnot_si128(a: __m128i, b: __m128i) -> __m128i {
     unsafe { simd_and(simd_xor(_mm_set1_epi8(-1), a), b) }
 }
 
@@ -851,7 +884,8 @@ pub fn _mm_andnot_si128(a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(orps))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_or_si128(a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_or_si128(a: __m128i, b: __m128i) -> __m128i {
     unsafe { simd_or(a, b) }
 }
 
@@ -863,7 +897,8 @@ pub fn _mm_or_si128(a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(xorps))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_xor_si128(a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_xor_si128(a: __m128i, b: __m128i) -> __m128i {
     unsafe { simd_xor(a, b) }
 }
 
@@ -874,7 +909,8 @@ pub fn _mm_xor_si128(a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(pcmpeqb))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_cmpeq_epi8(a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cmpeq_epi8(a: __m128i, b: __m128i) -> __m128i {
     unsafe { transmute::<i8x16, _>(simd_eq(a.as_i8x16(), b.as_i8x16())) }
 }
 
@@ -885,7 +921,8 @@ pub fn _mm_cmpeq_epi8(a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(pcmpeqw))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_cmpeq_epi16(a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cmpeq_epi16(a: __m128i, b: __m128i) -> __m128i {
     unsafe { transmute::<i16x8, _>(simd_eq(a.as_i16x8(), b.as_i16x8())) }
 }
 
@@ -896,7 +933,8 @@ pub fn _mm_cmpeq_epi16(a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(pcmpeqd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_cmpeq_epi32(a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cmpeq_epi32(a: __m128i, b: __m128i) -> __m128i {
     unsafe { transmute::<i32x4, _>(simd_eq(a.as_i32x4(), b.as_i32x4())) }
 }
 
@@ -907,7 +945,8 @@ pub fn _mm_cmpeq_epi32(a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(pcmpgtb))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_cmpgt_epi8(a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cmpgt_epi8(a: __m128i, b: __m128i) -> __m128i {
     unsafe { transmute::<i8x16, _>(simd_gt(a.as_i8x16(), b.as_i8x16())) }
 }
 
@@ -918,7 +957,8 @@ pub fn _mm_cmpgt_epi8(a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(pcmpgtw))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_cmpgt_epi16(a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cmpgt_epi16(a: __m128i, b: __m128i) -> __m128i {
     unsafe { transmute::<i16x8, _>(simd_gt(a.as_i16x8(), b.as_i16x8())) }
 }
 
@@ -929,7 +969,8 @@ pub fn _mm_cmpgt_epi16(a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(pcmpgtd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_cmpgt_epi32(a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cmpgt_epi32(a: __m128i, b: __m128i) -> __m128i {
     unsafe { transmute::<i32x4, _>(simd_gt(a.as_i32x4(), b.as_i32x4())) }
 }
 
@@ -940,7 +981,8 @@ pub fn _mm_cmpgt_epi32(a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(pcmpgtb))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_cmplt_epi8(a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cmplt_epi8(a: __m128i, b: __m128i) -> __m128i {
     unsafe { transmute::<i8x16, _>(simd_lt(a.as_i8x16(), b.as_i8x16())) }
 }
 
@@ -951,7 +993,8 @@ pub fn _mm_cmplt_epi8(a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(pcmpgtw))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_cmplt_epi16(a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cmplt_epi16(a: __m128i, b: __m128i) -> __m128i {
     unsafe { transmute::<i16x8, _>(simd_lt(a.as_i16x8(), b.as_i16x8())) }
 }
 
@@ -962,7 +1005,8 @@ pub fn _mm_cmplt_epi16(a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(pcmpgtd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_cmplt_epi32(a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cmplt_epi32(a: __m128i, b: __m128i) -> __m128i {
     unsafe { transmute::<i32x4, _>(simd_lt(a.as_i32x4(), b.as_i32x4())) }
 }
 
@@ -974,7 +1018,8 @@ pub fn _mm_cmplt_epi32(a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(cvtdq2pd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_cvtepi32_pd(a: __m128i) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cvtepi32_pd(a: __m128i) -> __m128d {
     unsafe {
         let a = a.as_i32x4();
         simd_cast::<i32x2, __m128d>(simd_shuffle!(a, a, [0, 1]))
@@ -989,7 +1034,8 @@ pub fn _mm_cvtepi32_pd(a: __m128i) -> __m128d {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(cvtsi2sd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_cvtsi32_sd(a: __m128d, b: i32) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cvtsi32_sd(a: __m128d, b: i32) -> __m128d {
     unsafe { simd_insert!(a, 0, b as f64) }
 }
 
@@ -1001,7 +1047,8 @@ pub fn _mm_cvtsi32_sd(a: __m128d, b: i32) -> __m128d {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(cvtdq2ps))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_cvtepi32_ps(a: __m128i) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cvtepi32_ps(a: __m128i) -> __m128 {
     unsafe { transmute(simd_cast::<_, f32x4>(a.as_i32x4())) }
 }
 
@@ -1024,7 +1071,8 @@ pub fn _mm_cvtps_epi32(a: __m128) -> __m128i {
 #[inline]
 #[target_feature(enable = "sse2")]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_cvtsi32_si128(a: i32) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cvtsi32_si128(a: i32) -> __m128i {
     unsafe { transmute(i32x4::new(a, 0, 0, 0)) }
 }
 
@@ -1034,7 +1082,8 @@ pub fn _mm_cvtsi32_si128(a: i32) -> __m128i {
 #[inline]
 #[target_feature(enable = "sse2")]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_cvtsi128_si32(a: __m128i) -> i32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cvtsi128_si32(a: __m128i) -> i32 {
     unsafe { simd_extract!(a.as_i32x4(), 0) }
 }
 
@@ -1046,7 +1095,8 @@ pub fn _mm_cvtsi128_si32(a: __m128i) -> i32 {
 #[target_feature(enable = "sse2")]
 // no particular instruction to test
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_set_epi64x(e1: i64, e0: i64) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_set_epi64x(e1: i64, e0: i64) -> __m128i {
     unsafe { transmute(i64x2::new(e0, e1)) }
 }
 
@@ -1057,7 +1107,8 @@ pub fn _mm_set_epi64x(e1: i64, e0: i64) -> __m128i {
 #[target_feature(enable = "sse2")]
 // no particular instruction to test
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_set_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_set_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i {
     unsafe { transmute(i32x4::new(e0, e1, e2, e3)) }
 }
 
@@ -1068,7 +1119,8 @@ pub fn _mm_set_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i {
 #[target_feature(enable = "sse2")]
 // no particular instruction to test
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_set_epi16(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_set_epi16(
     e7: i16,
     e6: i16,
     e5: i16,
@@ -1088,7 +1140,8 @@ pub fn _mm_set_epi16(
 #[target_feature(enable = "sse2")]
 // no particular instruction to test
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_set_epi8(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_set_epi8(
     e15: i8,
     e14: i8,
     e13: i8,
@@ -1121,8 +1174,9 @@ pub fn _mm_set_epi8(
 #[target_feature(enable = "sse2")]
 // no particular instruction to test
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_set1_epi64x(a: i64) -> __m128i {
-    _mm_set_epi64x(a, a)
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_set1_epi64x(a: i64) -> __m128i {
+    i64x2::splat(a).as_m128i()
 }
 
 /// Broadcasts 32-bit integer `a` to all elements.
@@ -1132,8 +1186,9 @@ pub fn _mm_set1_epi64x(a: i64) -> __m128i {
 #[target_feature(enable = "sse2")]
 // no particular instruction to test
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_set1_epi32(a: i32) -> __m128i {
-    _mm_set_epi32(a, a, a, a)
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_set1_epi32(a: i32) -> __m128i {
+    i32x4::splat(a).as_m128i()
 }
 
 /// Broadcasts 16-bit integer `a` to all elements.
@@ -1143,8 +1198,9 @@ pub fn _mm_set1_epi32(a: i32) -> __m128i {
 #[target_feature(enable = "sse2")]
 // no particular instruction to test
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_set1_epi16(a: i16) -> __m128i {
-    _mm_set_epi16(a, a, a, a, a, a, a, a)
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_set1_epi16(a: i16) -> __m128i {
+    i16x8::splat(a).as_m128i()
 }
 
 /// Broadcasts 8-bit integer `a` to all elements.
@@ -1154,8 +1210,9 @@ pub fn _mm_set1_epi16(a: i16) -> __m128i {
 #[target_feature(enable = "sse2")]
 // no particular instruction to test
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_set1_epi8(a: i8) -> __m128i {
-    _mm_set_epi8(a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a)
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_set1_epi8(a: i8) -> __m128i {
+    i8x16::splat(a).as_m128i()
 }
 
 /// Sets packed 32-bit integers with the supplied values in reverse order.
@@ -1165,7 +1222,8 @@ pub fn _mm_set1_epi8(a: i8) -> __m128i {
 #[target_feature(enable = "sse2")]
 // no particular instruction to test
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_setr_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_setr_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i {
     _mm_set_epi32(e0, e1, e2, e3)
 }
 
@@ -1176,7 +1234,8 @@ pub fn _mm_setr_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i {
 #[target_feature(enable = "sse2")]
 // no particular instruction to test
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_setr_epi16(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_setr_epi16(
     e7: i16,
     e6: i16,
     e5: i16,
@@ -1196,7 +1255,8 @@ pub fn _mm_setr_epi16(
 #[target_feature(enable = "sse2")]
 // no particular instruction to test
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_setr_epi8(
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_setr_epi8(
     e15: i8,
     e14: i8,
     e13: i8,
@@ -1227,7 +1287,8 @@ pub fn _mm_setr_epi8(
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(xorps))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_setzero_si128() -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_setzero_si128() -> __m128i {
     const { unsafe { mem::zeroed() } }
 }
 
@@ -1237,7 +1298,8 @@ pub fn _mm_setzero_si128() -> __m128i {
 #[inline]
 #[target_feature(enable = "sse2")]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm_loadl_epi64(mem_addr: *const __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_loadl_epi64(mem_addr: *const __m128i) -> __m128i {
     _mm_set_epi64x(0, ptr::read_unaligned(mem_addr as *const i64))
 }
 
@@ -1253,7 +1315,8 @@ pub unsafe fn _mm_loadl_epi64(mem_addr: *const __m128i) -> __m128i {
     assert_instr(movaps)
 )]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm_load_si128(mem_addr: *const __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_load_si128(mem_addr: *const __m128i) -> __m128i {
     *mem_addr
 }
 
@@ -1266,7 +1329,8 @@ pub unsafe fn _mm_load_si128(mem_addr: *const __m128i) -> __m128i {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(movups))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm_loadu_si128(mem_addr: *const __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_loadu_si128(mem_addr: *const __m128i) -> __m128i {
     let mut dst: __m128i = _mm_undefined_si128();
     ptr::copy_nonoverlapping(
         mem_addr as *const u8,
@@ -1315,7 +1379,8 @@ pub unsafe fn _mm_maskmoveu_si128(a: __m128i, mask: __m128i, mem_addr: *mut i8)
     assert_instr(movaps)
 )]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm_store_si128(mem_addr: *mut __m128i, a: __m128i) {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_store_si128(mem_addr: *mut __m128i, a: __m128i) {
     *mem_addr = a;
 }
 
@@ -1328,7 +1393,8 @@ pub unsafe fn _mm_store_si128(mem_addr: *mut __m128i, a: __m128i) {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(movups))] // FIXME movdqu expected
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm_storeu_si128(mem_addr: *mut __m128i, a: __m128i) {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_storeu_si128(mem_addr: *mut __m128i, a: __m128i) {
     mem_addr.write_unaligned(a);
 }
 
@@ -1340,7 +1406,8 @@ pub unsafe fn _mm_storeu_si128(mem_addr: *mut __m128i, a: __m128i) {
 #[inline]
 #[target_feature(enable = "sse2")]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm_storel_epi64(mem_addr: *mut __m128i, a: __m128i) {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_storel_epi64(mem_addr: *mut __m128i, a: __m128i) {
     ptr::copy_nonoverlapping(ptr::addr_of!(a) as *const u8, mem_addr as *mut u8, 8);
 }
 
@@ -1409,14 +1476,15 @@ pub unsafe fn _mm_stream_si32(mem_addr: *mut i32, a: i32) {
 // FIXME movd on msvc, movd on i686
 #[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(movq))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_move_epi64(a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_move_epi64(a: __m128i) -> __m128i {
     unsafe {
         let r: i64x2 = simd_shuffle!(a.as_i64x2(), i64x2::ZERO, [0, 2]);
         transmute(r)
     }
 }
 
-/// Converts packed 16-bit integers from `a` and `b` to packed 8-bit integers
+/// Converts packed signed 16-bit integers from `a` and `b` to packed 8-bit integers
 /// using signed saturation.
 ///
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_packs_epi16)
@@ -1424,11 +1492,29 @@ pub fn _mm_move_epi64(a: __m128i) -> __m128i {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(packsswb))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_packs_epi16(a: __m128i, b: __m128i) -> __m128i {
-    unsafe { transmute(packsswb(a.as_i16x8(), b.as_i16x8())) }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_packs_epi16(a: __m128i, b: __m128i) -> __m128i {
+    unsafe {
+        let max = simd_splat(i8::MAX as i16);
+        let min = simd_splat(i8::MIN as i16);
+
+        let clamped_a = simd_imax(simd_imin(a.as_i16x8(), max), min)
+            .as_m128i()
+            .as_i8x16();
+        let clamped_b = simd_imax(simd_imin(b.as_i16x8(), max), min)
+            .as_m128i()
+            .as_i8x16();
+
+        // Shuffle the low i8 of each i16 from two concatenated vectors into
+        // the low bits of the result register.
+        const IDXS: [u32; 16] = [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30];
+        let result: i8x16 = simd_shuffle!(clamped_a, clamped_b, IDXS);
+
+        result.as_m128i()
+    }
 }
 
-/// Converts packed 32-bit integers from `a` and `b` to packed 16-bit integers
+/// Converts packed signed 32-bit integers from `a` and `b` to packed 16-bit integers
 /// using signed saturation.
 ///
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_packs_epi32)
@@ -1436,11 +1522,25 @@ pub fn _mm_packs_epi16(a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(packssdw))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_packs_epi32(a: __m128i, b: __m128i) -> __m128i {
-    unsafe { transmute(packssdw(a.as_i32x4(), b.as_i32x4())) }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_packs_epi32(a: __m128i, b: __m128i) -> __m128i {
+    unsafe {
+        let max = simd_splat(i16::MAX as i32);
+        let min = simd_splat(i16::MIN as i32);
+
+        let clamped_a = simd_imax(simd_imin(a.as_i32x4(), max), min);
+        let clamped_b = simd_imax(simd_imin(b.as_i32x4(), max), min);
+
+        let clamped_a: i16x4 = simd_cast(clamped_a);
+        let clamped_b: i16x4 = simd_cast(clamped_b);
+
+        let a: i64 = transmute(clamped_a);
+        let b: i64 = transmute(clamped_b);
+        i64x2::new(a, b).as_m128i()
+    }
 }
 
-/// Converts packed 16-bit integers from `a` and `b` to packed 8-bit integers
+/// Converts packed signed 16-bit integers from `a` and `b` to packed 8-bit integers
 /// using unsigned saturation.
 ///
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_packus_epi16)
@@ -1448,8 +1548,28 @@ pub fn _mm_packs_epi32(a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(packuswb))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_packus_epi16(a: __m128i, b: __m128i) -> __m128i {
-    unsafe { transmute(packuswb(a.as_i16x8(), b.as_i16x8())) }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_packus_epi16(a: __m128i, b: __m128i) -> __m128i {
+    unsafe {
+        let max = simd_splat(u8::MAX as i16);
+        let min = simd_splat(u8::MIN as i16);
+
+        let clamped_a = simd_imax(simd_imin(a.as_i16x8(), max), min)
+            .as_m128i()
+            .as_i8x16();
+        let clamped_b = simd_imax(simd_imin(b.as_i16x8(), max), min)
+            .as_m128i()
+            .as_i8x16();
+
+        // Shuffle the low bytes of each i16 from two concatenated vectors into
+        // the low bits of the result register.
+        // Without `simd_shuffle`, this intrinsic will cause the AVX-512BW
+        // `_mm_mask_packus_epi16` and `_mm_maskz_packus_epi16` tests to fail.
+        const IDXS: [u32; 16] = [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30];
+        let result: i8x16 = simd_shuffle!(clamped_a, clamped_b, IDXS);
+
+        result.as_m128i()
+    }
 }
 
 /// Returns the `imm8` element of `a`.
@@ -1460,7 +1580,8 @@ pub fn _mm_packus_epi16(a: __m128i, b: __m128i) -> __m128i {
 #[cfg_attr(test, assert_instr(pextrw, IMM8 = 7))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_extract_epi16<const IMM8: i32>(a: __m128i) -> i32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_extract_epi16<const IMM8: i32>(a: __m128i) -> i32 {
     static_assert_uimm_bits!(IMM8, 3);
     unsafe { simd_extract!(a.as_u16x8(), IMM8 as u32, u16) as i32 }
 }
@@ -1473,7 +1594,8 @@ pub fn _mm_extract_epi16<const IMM8: i32>(a: __m128i) -> i32 {
 #[cfg_attr(test, assert_instr(pinsrw, IMM8 = 7))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_insert_epi16<const IMM8: i32>(a: __m128i, i: i32) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_insert_epi16<const IMM8: i32>(a: __m128i, i: i32) -> __m128i {
     static_assert_uimm_bits!(IMM8, 3);
     unsafe { transmute(simd_insert!(a.as_i16x8(), IMM8 as u32, i as i16)) }
 }
@@ -1485,7 +1607,8 @@ pub fn _mm_insert_epi16<const IMM8: i32>(a: __m128i, i: i32) -> __m128i {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(pmovmskb))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_movemask_epi8(a: __m128i) -> i32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_movemask_epi8(a: __m128i) -> i32 {
     unsafe {
         let z = i8x16::ZERO;
         let m: i8x16 = simd_lt(a.as_i8x16(), z);
@@ -1501,7 +1624,8 @@ pub fn _mm_movemask_epi8(a: __m128i) -> i32 {
 #[cfg_attr(test, assert_instr(pshufd, IMM8 = 9))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_shuffle_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_shuffle_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
     static_assert_uimm_bits!(IMM8, 8);
     unsafe {
         let a = a.as_i32x4();
@@ -1531,7 +1655,8 @@ pub fn _mm_shuffle_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
 #[cfg_attr(test, assert_instr(pshufhw, IMM8 = 9))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_shufflehi_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_shufflehi_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
     static_assert_uimm_bits!(IMM8, 8);
     unsafe {
         let a = a.as_i16x8();
@@ -1565,7 +1690,8 @@ pub fn _mm_shufflehi_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
 #[cfg_attr(test, assert_instr(pshuflw, IMM8 = 9))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_shufflelo_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_shufflelo_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
     static_assert_uimm_bits!(IMM8, 8);
     unsafe {
         let a = a.as_i16x8();
@@ -1594,7 +1720,8 @@ pub fn _mm_shufflelo_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(punpckhbw))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_unpackhi_epi8(a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_unpackhi_epi8(a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         transmute::<i8x16, _>(simd_shuffle!(
             a.as_i8x16(),
@@ -1611,7 +1738,8 @@ pub fn _mm_unpackhi_epi8(a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(punpckhwd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_unpackhi_epi16(a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_unpackhi_epi16(a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let x = simd_shuffle!(a.as_i16x8(), b.as_i16x8(), [4, 12, 5, 13, 6, 14, 7, 15]);
         transmute::<i16x8, _>(x)
@@ -1625,7 +1753,8 @@ pub fn _mm_unpackhi_epi16(a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(unpckhps))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_unpackhi_epi32(a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_unpackhi_epi32(a: __m128i, b: __m128i) -> __m128i {
     unsafe { transmute::<i32x4, _>(simd_shuffle!(a.as_i32x4(), b.as_i32x4(), [2, 6, 3, 7])) }
 }
 
@@ -1636,7 +1765,8 @@ pub fn _mm_unpackhi_epi32(a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(unpckhpd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_unpackhi_epi64(a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_unpackhi_epi64(a: __m128i, b: __m128i) -> __m128i {
     unsafe { transmute::<i64x2, _>(simd_shuffle!(a.as_i64x2(), b.as_i64x2(), [1, 3])) }
 }
 
@@ -1647,7 +1777,8 @@ pub fn _mm_unpackhi_epi64(a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(punpcklbw))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_unpacklo_epi8(a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_unpacklo_epi8(a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         transmute::<i8x16, _>(simd_shuffle!(
             a.as_i8x16(),
@@ -1664,7 +1795,8 @@ pub fn _mm_unpacklo_epi8(a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(punpcklwd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_unpacklo_epi16(a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_unpacklo_epi16(a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let x = simd_shuffle!(a.as_i16x8(), b.as_i16x8(), [0, 8, 1, 9, 2, 10, 3, 11]);
         transmute::<i16x8, _>(x)
@@ -1678,7 +1810,8 @@ pub fn _mm_unpacklo_epi16(a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(unpcklps))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_unpacklo_epi32(a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_unpacklo_epi32(a: __m128i, b: __m128i) -> __m128i {
     unsafe { transmute::<i32x4, _>(simd_shuffle!(a.as_i32x4(), b.as_i32x4(), [0, 4, 1, 5])) }
 }
 
@@ -1689,7 +1822,8 @@ pub fn _mm_unpacklo_epi32(a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(movlhps))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_unpacklo_epi64(a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_unpacklo_epi64(a: __m128i, b: __m128i) -> __m128i {
     unsafe { transmute::<i64x2, _>(simd_shuffle!(a.as_i64x2(), b.as_i64x2(), [0, 2])) }
 }
 
@@ -1701,7 +1835,8 @@ pub fn _mm_unpacklo_epi64(a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(addsd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_add_sd(a: __m128d, b: __m128d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_add_sd(a: __m128d, b: __m128d) -> __m128d {
     unsafe { simd_insert!(a, 0, _mm_cvtsd_f64(a) + _mm_cvtsd_f64(b)) }
 }
 
@@ -1713,7 +1848,8 @@ pub fn _mm_add_sd(a: __m128d, b: __m128d) -> __m128d {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(addpd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_add_pd(a: __m128d, b: __m128d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_add_pd(a: __m128d, b: __m128d) -> __m128d {
     unsafe { simd_add(a, b) }
 }
 
@@ -1725,7 +1861,8 @@ pub fn _mm_add_pd(a: __m128d, b: __m128d) -> __m128d {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(divsd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_div_sd(a: __m128d, b: __m128d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_div_sd(a: __m128d, b: __m128d) -> __m128d {
     unsafe { simd_insert!(a, 0, _mm_cvtsd_f64(a) / _mm_cvtsd_f64(b)) }
 }
 
@@ -1737,7 +1874,8 @@ pub fn _mm_div_sd(a: __m128d, b: __m128d) -> __m128d {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(divpd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_div_pd(a: __m128d, b: __m128d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_div_pd(a: __m128d, b: __m128d) -> __m128d {
     unsafe { simd_div(a, b) }
 }
 
@@ -1797,7 +1935,8 @@ pub fn _mm_min_pd(a: __m128d, b: __m128d) -> __m128d {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(mulsd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_mul_sd(a: __m128d, b: __m128d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mul_sd(a: __m128d, b: __m128d) -> __m128d {
     unsafe { simd_insert!(a, 0, _mm_cvtsd_f64(a) * _mm_cvtsd_f64(b)) }
 }
 
@@ -1809,7 +1948,8 @@ pub fn _mm_mul_sd(a: __m128d, b: __m128d) -> __m128d {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(mulpd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_mul_pd(a: __m128d, b: __m128d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mul_pd(a: __m128d, b: __m128d) -> __m128d {
     unsafe { simd_mul(a, b) }
 }
 
@@ -1844,7 +1984,8 @@ pub fn _mm_sqrt_pd(a: __m128d) -> __m128d {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(subsd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_sub_sd(a: __m128d, b: __m128d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_sub_sd(a: __m128d, b: __m128d) -> __m128d {
     unsafe { simd_insert!(a, 0, _mm_cvtsd_f64(a) - _mm_cvtsd_f64(b)) }
 }
 
@@ -1856,7 +1997,8 @@ pub fn _mm_sub_sd(a: __m128d, b: __m128d) -> __m128d {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(subpd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_sub_pd(a: __m128d, b: __m128d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_sub_pd(a: __m128d, b: __m128d) -> __m128d {
     unsafe { simd_sub(a, b) }
 }
 
@@ -1868,7 +2010,8 @@ pub fn _mm_sub_pd(a: __m128d, b: __m128d) -> __m128d {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(andps))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_and_pd(a: __m128d, b: __m128d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_and_pd(a: __m128d, b: __m128d) -> __m128d {
     unsafe {
         let a: __m128i = transmute(a);
         let b: __m128i = transmute(b);
@@ -1883,7 +2026,8 @@ pub fn _mm_and_pd(a: __m128d, b: __m128d) -> __m128d {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(andnps))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_andnot_pd(a: __m128d, b: __m128d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_andnot_pd(a: __m128d, b: __m128d) -> __m128d {
     unsafe {
         let a: __m128i = transmute(a);
         let b: __m128i = transmute(b);
@@ -1898,7 +2042,8 @@ pub fn _mm_andnot_pd(a: __m128d, b: __m128d) -> __m128d {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(orps))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_or_pd(a: __m128d, b: __m128d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_or_pd(a: __m128d, b: __m128d) -> __m128d {
     unsafe {
         let a: __m128i = transmute(a);
         let b: __m128i = transmute(b);
@@ -1913,7 +2058,8 @@ pub fn _mm_or_pd(a: __m128d, b: __m128d) -> __m128d {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(xorps))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_xor_pd(a: __m128d, b: __m128d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_xor_pd(a: __m128d, b: __m128d) -> __m128d {
     unsafe {
         let a: __m128i = transmute(a);
         let b: __m128i = transmute(b);
@@ -2341,7 +2487,8 @@ pub fn _mm_ucomineq_sd(a: __m128d, b: __m128d) -> i32 {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(cvtpd2ps))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_cvtpd_ps(a: __m128d) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cvtpd_ps(a: __m128d) -> __m128 {
     unsafe {
         let r = simd_cast::<_, f32x2>(a.as_f64x2());
         let zero = f32x2::ZERO;
@@ -2358,7 +2505,8 @@ pub fn _mm_cvtpd_ps(a: __m128d) -> __m128 {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(cvtps2pd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_cvtps_pd(a: __m128) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cvtps_pd(a: __m128) -> __m128d {
     unsafe {
         let a = a.as_f32x4();
         transmute(simd_cast::<f32x2, f64x2>(simd_shuffle!(a, a, [0, 1])))
@@ -2409,7 +2557,8 @@ pub fn _mm_cvtsd_ss(a: __m128, b: __m128d) -> __m128 {
 #[inline]
 #[target_feature(enable = "sse2")]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_cvtsd_f64(a: __m128d) -> f64 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cvtsd_f64(a: __m128d) -> f64 {
     unsafe { simd_extract!(a, 0) }
 }
 
@@ -2423,7 +2572,8 @@ pub fn _mm_cvtsd_f64(a: __m128d) -> f64 {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(cvtss2sd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_cvtss_sd(a: __m128d, b: __m128) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cvtss_sd(a: __m128d, b: __m128) -> __m128d {
     unsafe {
         let elt: f32 = simd_extract!(b, 0);
         simd_insert!(a, 0, elt as f64)
@@ -2473,7 +2623,8 @@ pub fn _mm_cvttps_epi32(a: __m128) -> __m128i {
 #[inline]
 #[target_feature(enable = "sse2")]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_set_sd(a: f64) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_set_sd(a: f64) -> __m128d {
     _mm_set_pd(0.0, a)
 }
 
@@ -2484,7 +2635,8 @@ pub fn _mm_set_sd(a: f64) -> __m128d {
 #[inline]
 #[target_feature(enable = "sse2")]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_set1_pd(a: f64) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_set1_pd(a: f64) -> __m128d {
     _mm_set_pd(a, a)
 }
 
@@ -2495,7 +2647,8 @@ pub fn _mm_set1_pd(a: f64) -> __m128d {
 #[inline]
 #[target_feature(enable = "sse2")]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_set_pd1(a: f64) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_set_pd1(a: f64) -> __m128d {
     _mm_set_pd(a, a)
 }
 
@@ -2506,7 +2659,8 @@ pub fn _mm_set_pd1(a: f64) -> __m128d {
 #[inline]
 #[target_feature(enable = "sse2")]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_set_pd(a: f64, b: f64) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_set_pd(a: f64, b: f64) -> __m128d {
     __m128d([b, a])
 }
 
@@ -2517,7 +2671,8 @@ pub fn _mm_set_pd(a: f64, b: f64) -> __m128d {
 #[inline]
 #[target_feature(enable = "sse2")]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_setr_pd(a: f64, b: f64) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_setr_pd(a: f64, b: f64) -> __m128d {
     _mm_set_pd(b, a)
 }
 
@@ -2529,7 +2684,8 @@ pub fn _mm_setr_pd(a: f64, b: f64) -> __m128d {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(xorp))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_setzero_pd() -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_setzero_pd() -> __m128d {
     const { unsafe { mem::zeroed() } }
 }
 
@@ -2543,12 +2699,13 @@ pub fn _mm_setzero_pd() -> __m128d {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(movmskpd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_movemask_pd(a: __m128d) -> i32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_movemask_pd(a: __m128d) -> i32 {
     // Propagate the highest bit to the rest, because simd_bitmask
     // requires all-1 or all-0.
     unsafe {
         let mask: i64x2 = simd_lt(transmute(a), i64x2::ZERO);
-        simd_bitmask::<i64x2, u8>(mask).into()
+        simd_bitmask::<i64x2, u8>(mask) as i32
     }
 }
 
@@ -2566,7 +2723,8 @@ pub fn _mm_movemask_pd(a: __m128d) -> i32 {
 )]
 #[stable(feature = "simd_x86", since = "1.27.0")]
 #[allow(clippy::cast_ptr_alignment)]
-pub unsafe fn _mm_load_pd(mem_addr: *const f64) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_load_pd(mem_addr: *const f64) -> __m128d {
     *(mem_addr as *const __m128d)
 }
 
@@ -2578,7 +2736,8 @@ pub unsafe fn _mm_load_pd(mem_addr: *const f64) -> __m128d {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(movsd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm_load_sd(mem_addr: *const f64) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_load_sd(mem_addr: *const f64) -> __m128d {
     _mm_setr_pd(*mem_addr, 0.)
 }
 
@@ -2591,7 +2750,8 @@ pub unsafe fn _mm_load_sd(mem_addr: *const f64) -> __m128d {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(movhps))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm_loadh_pd(a: __m128d, mem_addr: *const f64) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_loadh_pd(a: __m128d, mem_addr: *const f64) -> __m128d {
     _mm_setr_pd(simd_extract!(a, 0), *mem_addr)
 }
 
@@ -2604,7 +2764,8 @@ pub unsafe fn _mm_loadh_pd(a: __m128d, mem_addr: *const f64) -> __m128d {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(movlps))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm_loadl_pd(a: __m128d, mem_addr: *const f64) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_loadl_pd(a: __m128d, mem_addr: *const f64) -> __m128d {
     _mm_setr_pd(*mem_addr, simd_extract!(a, 1))
 }
 
@@ -2646,7 +2807,8 @@ pub unsafe fn _mm_stream_pd(mem_addr: *mut f64, a: __m128d) {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(movlps))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm_store_sd(mem_addr: *mut f64, a: __m128d) {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_store_sd(mem_addr: *mut f64, a: __m128d) {
     *mem_addr = simd_extract!(a, 0)
 }
 
@@ -2663,7 +2825,8 @@ pub unsafe fn _mm_store_sd(mem_addr: *mut f64, a: __m128d) {
 )]
 #[stable(feature = "simd_x86", since = "1.27.0")]
 #[allow(clippy::cast_ptr_alignment)]
-pub unsafe fn _mm_store_pd(mem_addr: *mut f64, a: __m128d) {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_store_pd(mem_addr: *mut f64, a: __m128d) {
     *(mem_addr as *mut __m128d) = a;
 }
 
@@ -2676,7 +2839,8 @@ pub unsafe fn _mm_store_pd(mem_addr: *mut f64, a: __m128d) {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(movups))] // FIXME movupd expected
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm_storeu_pd(mem_addr: *mut f64, a: __m128d) {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_storeu_pd(mem_addr: *mut f64, a: __m128d) {
     mem_addr.cast::<__m128d>().write_unaligned(a);
 }
 
@@ -2688,7 +2852,8 @@ pub unsafe fn _mm_storeu_pd(mem_addr: *mut f64, a: __m128d) {
 #[inline]
 #[target_feature(enable = "sse2")]
 #[stable(feature = "simd_x86_updates", since = "1.82.0")]
-pub unsafe fn _mm_storeu_si16(mem_addr: *mut u8, a: __m128i) {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_storeu_si16(mem_addr: *mut u8, a: __m128i) {
     ptr::write_unaligned(mem_addr as *mut i16, simd_extract(a.as_i16x8(), 0))
 }
 
@@ -2700,7 +2865,8 @@ pub unsafe fn _mm_storeu_si16(mem_addr: *mut u8, a: __m128i) {
 #[inline]
 #[target_feature(enable = "sse2")]
 #[stable(feature = "simd_x86_updates", since = "1.82.0")]
-pub unsafe fn _mm_storeu_si32(mem_addr: *mut u8, a: __m128i) {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_storeu_si32(mem_addr: *mut u8, a: __m128i) {
     ptr::write_unaligned(mem_addr as *mut i32, simd_extract(a.as_i32x4(), 0))
 }
 
@@ -2712,7 +2878,8 @@ pub unsafe fn _mm_storeu_si32(mem_addr: *mut u8, a: __m128i) {
 #[inline]
 #[target_feature(enable = "sse2")]
 #[stable(feature = "simd_x86_updates", since = "1.82.0")]
-pub unsafe fn _mm_storeu_si64(mem_addr: *mut u8, a: __m128i) {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_storeu_si64(mem_addr: *mut u8, a: __m128i) {
     ptr::write_unaligned(mem_addr as *mut i64, simd_extract(a.as_i64x2(), 0))
 }
 
@@ -2725,7 +2892,8 @@ pub unsafe fn _mm_storeu_si64(mem_addr: *mut u8, a: __m128i) {
 #[target_feature(enable = "sse2")]
 #[stable(feature = "simd_x86", since = "1.27.0")]
 #[allow(clippy::cast_ptr_alignment)]
-pub unsafe fn _mm_store1_pd(mem_addr: *mut f64, a: __m128d) {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_store1_pd(mem_addr: *mut f64, a: __m128d) {
     let b: __m128d = simd_shuffle!(a, a, [0, 0]);
     *(mem_addr as *mut __m128d) = b;
 }
@@ -2739,7 +2907,8 @@ pub unsafe fn _mm_store1_pd(mem_addr: *mut f64, a: __m128d) {
 #[target_feature(enable = "sse2")]
 #[stable(feature = "simd_x86", since = "1.27.0")]
 #[allow(clippy::cast_ptr_alignment)]
-pub unsafe fn _mm_store_pd1(mem_addr: *mut f64, a: __m128d) {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_store_pd1(mem_addr: *mut f64, a: __m128d) {
     let b: __m128d = simd_shuffle!(a, a, [0, 0]);
     *(mem_addr as *mut __m128d) = b;
 }
@@ -2754,7 +2923,8 @@ pub unsafe fn _mm_store_pd1(mem_addr: *mut f64, a: __m128d) {
 #[target_feature(enable = "sse2")]
 #[stable(feature = "simd_x86", since = "1.27.0")]
 #[allow(clippy::cast_ptr_alignment)]
-pub unsafe fn _mm_storer_pd(mem_addr: *mut f64, a: __m128d) {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_storer_pd(mem_addr: *mut f64, a: __m128d) {
     let b: __m128d = simd_shuffle!(a, a, [1, 0]);
     *(mem_addr as *mut __m128d) = b;
 }
@@ -2767,7 +2937,8 @@ pub unsafe fn _mm_storer_pd(mem_addr: *mut f64, a: __m128d) {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(movhps))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm_storeh_pd(mem_addr: *mut f64, a: __m128d) {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_storeh_pd(mem_addr: *mut f64, a: __m128d) {
     *mem_addr = simd_extract!(a, 1);
 }
 
@@ -2779,7 +2950,8 @@ pub unsafe fn _mm_storeh_pd(mem_addr: *mut f64, a: __m128d) {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(movlps))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm_storel_pd(mem_addr: *mut f64, a: __m128d) {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_storel_pd(mem_addr: *mut f64, a: __m128d) {
     *mem_addr = simd_extract!(a, 0);
 }
 
@@ -2791,7 +2963,8 @@ pub unsafe fn _mm_storel_pd(mem_addr: *mut f64, a: __m128d) {
 #[target_feature(enable = "sse2")]
 // #[cfg_attr(test, assert_instr(movapd))] // FIXME LLVM uses different codegen
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm_load1_pd(mem_addr: *const f64) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_load1_pd(mem_addr: *const f64) -> __m128d {
     let d = *mem_addr;
     _mm_setr_pd(d, d)
 }
@@ -2804,7 +2977,8 @@ pub unsafe fn _mm_load1_pd(mem_addr: *const f64) -> __m128d {
 #[target_feature(enable = "sse2")]
 // #[cfg_attr(test, assert_instr(movapd))] // FIXME same as _mm_load1_pd
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm_load_pd1(mem_addr: *const f64) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_load_pd1(mem_addr: *const f64) -> __m128d {
     _mm_load1_pd(mem_addr)
 }
 
@@ -2820,7 +2994,8 @@ pub unsafe fn _mm_load_pd1(mem_addr: *const f64) -> __m128d {
     assert_instr(movaps)
 )]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm_loadr_pd(mem_addr: *const f64) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_loadr_pd(mem_addr: *const f64) -> __m128d {
     let a = _mm_load_pd(mem_addr);
     simd_shuffle!(a, a, [1, 0])
 }
@@ -2834,7 +3009,8 @@ pub unsafe fn _mm_loadr_pd(mem_addr: *const f64) -> __m128d {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(movups))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm_loadu_pd(mem_addr: *const f64) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_loadu_pd(mem_addr: *const f64) -> __m128d {
     let mut dst = _mm_undefined_pd();
     ptr::copy_nonoverlapping(
         mem_addr as *const u8,
@@ -2852,7 +3028,8 @@ pub unsafe fn _mm_loadu_pd(mem_addr: *const f64) -> __m128d {
 #[inline]
 #[target_feature(enable = "sse2")]
 #[stable(feature = "simd_x86_updates", since = "1.82.0")]
-pub unsafe fn _mm_loadu_si16(mem_addr: *const u8) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_loadu_si16(mem_addr: *const u8) -> __m128i {
     transmute(i16x8::new(
         ptr::read_unaligned(mem_addr as *const i16),
         0,
@@ -2873,7 +3050,8 @@ pub unsafe fn _mm_loadu_si16(mem_addr: *const u8) -> __m128i {
 #[inline]
 #[target_feature(enable = "sse2")]
 #[stable(feature = "simd_x86_updates", since = "1.82.0")]
-pub unsafe fn _mm_loadu_si32(mem_addr: *const u8) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_loadu_si32(mem_addr: *const u8) -> __m128i {
     transmute(i32x4::new(
         ptr::read_unaligned(mem_addr as *const i32),
         0,
@@ -2890,7 +3068,8 @@ pub unsafe fn _mm_loadu_si32(mem_addr: *const u8) -> __m128i {
 #[inline]
 #[target_feature(enable = "sse2")]
 #[stable(feature = "simd_x86_mm_loadu_si64", since = "1.46.0")]
-pub unsafe fn _mm_loadu_si64(mem_addr: *const u8) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_loadu_si64(mem_addr: *const u8) -> __m128i {
     transmute(i64x2::new(ptr::read_unaligned(mem_addr as *const i64), 0))
 }
 
@@ -2904,7 +3083,8 @@ pub unsafe fn _mm_loadu_si64(mem_addr: *const u8) -> __m128i {
 #[cfg_attr(test, assert_instr(shufps, MASK = 2))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_shuffle_pd<const MASK: i32>(a: __m128d, b: __m128d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_shuffle_pd<const MASK: i32>(a: __m128d, b: __m128d) -> __m128d {
     static_assert_uimm_bits!(MASK, 8);
     unsafe { simd_shuffle!(a, b, [MASK as u32 & 0b1, ((MASK as u32 >> 1) & 0b1) + 2]) }
 }
@@ -2918,7 +3098,8 @@ pub fn _mm_shuffle_pd<const MASK: i32>(a: __m128d, b: __m128d) -> __m128d {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(movsd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_move_sd(a: __m128d, b: __m128d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_move_sd(a: __m128d, b: __m128d) -> __m128d {
     unsafe { _mm_setr_pd(simd_extract!(b, 0), simd_extract!(a, 1)) }
 }
 
@@ -2929,7 +3110,8 @@ pub fn _mm_move_sd(a: __m128d, b: __m128d) -> __m128d {
 #[inline]
 #[target_feature(enable = "sse2")]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_castpd_ps(a: __m128d) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_castpd_ps(a: __m128d) -> __m128 {
     unsafe { transmute(a) }
 }
 
@@ -2940,7 +3122,8 @@ pub fn _mm_castpd_ps(a: __m128d) -> __m128 {
 #[inline]
 #[target_feature(enable = "sse2")]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_castpd_si128(a: __m128d) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_castpd_si128(a: __m128d) -> __m128i {
     unsafe { transmute(a) }
 }
 
@@ -2951,7 +3134,8 @@ pub fn _mm_castpd_si128(a: __m128d) -> __m128i {
 #[inline]
 #[target_feature(enable = "sse2")]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_castps_pd(a: __m128) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_castps_pd(a: __m128) -> __m128d {
     unsafe { transmute(a) }
 }
 
@@ -2962,7 +3146,8 @@ pub fn _mm_castps_pd(a: __m128) -> __m128d {
 #[inline]
 #[target_feature(enable = "sse2")]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_castps_si128(a: __m128) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_castps_si128(a: __m128) -> __m128i {
     unsafe { transmute(a) }
 }
 
@@ -2973,7 +3158,8 @@ pub fn _mm_castps_si128(a: __m128) -> __m128i {
 #[inline]
 #[target_feature(enable = "sse2")]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_castsi128_pd(a: __m128i) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_castsi128_pd(a: __m128i) -> __m128d {
     unsafe { transmute(a) }
 }
 
@@ -2984,7 +3170,8 @@ pub fn _mm_castsi128_pd(a: __m128i) -> __m128d {
 #[inline]
 #[target_feature(enable = "sse2")]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_castsi128_ps(a: __m128i) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_castsi128_ps(a: __m128i) -> __m128 {
     unsafe { transmute(a) }
 }
 
@@ -2997,7 +3184,8 @@ pub fn _mm_castsi128_ps(a: __m128i) -> __m128 {
 #[inline]
 #[target_feature(enable = "sse2")]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_undefined_pd() -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_undefined_pd() -> __m128d {
     const { unsafe { mem::zeroed() } }
 }
 
@@ -3010,7 +3198,8 @@ pub fn _mm_undefined_pd() -> __m128d {
 #[inline]
 #[target_feature(enable = "sse2")]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_undefined_si128() -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_undefined_si128() -> __m128i {
     const { unsafe { mem::zeroed() } }
 }
 
@@ -3025,7 +3214,8 @@ pub fn _mm_undefined_si128() -> __m128i {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(unpckhpd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_unpackhi_pd(a: __m128d, b: __m128d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_unpackhi_pd(a: __m128d, b: __m128d) -> __m128d {
     unsafe { simd_shuffle!(a, b, [1, 3]) }
 }
 
@@ -3040,7 +3230,8 @@ pub fn _mm_unpackhi_pd(a: __m128d, b: __m128d) -> __m128d {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(movlhps))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_unpacklo_pd(a: __m128d, b: __m128d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_unpacklo_pd(a: __m128d, b: __m128d) -> __m128d {
     unsafe { simd_shuffle!(a, b, [0, 2]) }
 }
 
@@ -3054,6 +3245,8 @@ unsafe extern "C" {
     fn lfence();
     #[link_name = "llvm.x86.sse2.mfence"]
     fn mfence();
+    #[link_name = "llvm.x86.sse2.pmadd.wd"]
+    fn pmaddwd(a: i16x8, b: i16x8) -> i32x4;
     #[link_name = "llvm.x86.sse2.psad.bw"]
     fn psadbw(a: u8x16, b: u8x16) -> u64x2;
     #[link_name = "llvm.x86.sse2.psll.w"]
@@ -3076,12 +3269,6 @@ unsafe extern "C" {
     fn cvtps2dq(a: __m128) -> i32x4;
     #[link_name = "llvm.x86.sse2.maskmov.dqu"]
     fn maskmovdqu(a: i8x16, mask: i8x16, mem_addr: *mut i8);
-    #[link_name = "llvm.x86.sse2.packsswb.128"]
-    fn packsswb(a: i16x8, b: i16x8) -> i8x16;
-    #[link_name = "llvm.x86.sse2.packssdw.128"]
-    fn packssdw(a: i32x4, b: i32x4) -> i16x8;
-    #[link_name = "llvm.x86.sse2.packuswb.128"]
-    fn packuswb(a: i16x8, b: i16x8) -> u8x16;
     #[link_name = "llvm.x86.sse2.max.sd"]
     fn maxsd(a: __m128d, b: __m128d) -> __m128d;
     #[link_name = "llvm.x86.sse2.max.pd"]
@@ -3134,15 +3321,12 @@ unsafe extern "C" {
 
 #[cfg(test)]
 mod tests {
+    use crate::core_arch::assert_eq_const as assert_eq;
     use crate::{
         core_arch::{simd::*, x86::*},
         hint::black_box,
     };
-    use std::{
-        boxed, f32, f64,
-        mem::{self, transmute},
-        ptr,
-    };
+    use std::{boxed, f32, f64, mem, ptr};
     use stdarch_test::simd_test;
 
     const NAN: f64 = f64::NAN;
@@ -3153,27 +3337,29 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_clflush() {
+    fn test_mm_clflush() {
         let x = 0_u8;
-        _mm_clflush(ptr::addr_of!(x));
+        unsafe {
+            _mm_clflush(ptr::addr_of!(x));
+        }
     }
 
     #[simd_test(enable = "sse2")]
     // Miri cannot support this until it is clear how it fits in the Rust memory model
     #[cfg_attr(miri, ignore)]
-    unsafe fn test_mm_lfence() {
+    fn test_mm_lfence() {
         _mm_lfence();
     }
 
     #[simd_test(enable = "sse2")]
     // Miri cannot support this until it is clear how it fits in the Rust memory model
     #[cfg_attr(miri, ignore)]
-    unsafe fn test_mm_mfence() {
+    fn test_mm_mfence() {
         _mm_mfence();
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_add_epi8() {
+    const fn test_mm_add_epi8() {
         let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         #[rustfmt::skip]
         let b = _mm_setr_epi8(
@@ -3188,7 +3374,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_add_epi8_overflow() {
+    fn test_mm_add_epi8_overflow() {
         let a = _mm_set1_epi8(0x7F);
         let b = _mm_set1_epi8(1);
         let r = _mm_add_epi8(a, b);
@@ -3196,7 +3382,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_add_epi16() {
+    const fn test_mm_add_epi16() {
         let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
         let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
         let r = _mm_add_epi16(a, b);
@@ -3205,7 +3391,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_add_epi32() {
+    const fn test_mm_add_epi32() {
         let a = _mm_setr_epi32(0, 1, 2, 3);
         let b = _mm_setr_epi32(4, 5, 6, 7);
         let r = _mm_add_epi32(a, b);
@@ -3214,7 +3400,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_add_epi64() {
+    const fn test_mm_add_epi64() {
         let a = _mm_setr_epi64x(0, 1);
         let b = _mm_setr_epi64x(2, 3);
         let r = _mm_add_epi64(a, b);
@@ -3223,7 +3409,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_adds_epi8() {
+    const fn test_mm_adds_epi8() {
         let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         #[rustfmt::skip]
         let b = _mm_setr_epi8(
@@ -3238,7 +3424,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_adds_epi8_saturate_positive() {
+    fn test_mm_adds_epi8_saturate_positive() {
         let a = _mm_set1_epi8(0x7F);
         let b = _mm_set1_epi8(1);
         let r = _mm_adds_epi8(a, b);
@@ -3246,7 +3432,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_adds_epi8_saturate_negative() {
+    fn test_mm_adds_epi8_saturate_negative() {
         let a = _mm_set1_epi8(-0x80);
         let b = _mm_set1_epi8(-1);
         let r = _mm_adds_epi8(a, b);
@@ -3254,7 +3440,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_adds_epi16() {
+    const fn test_mm_adds_epi16() {
         let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
         let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
         let r = _mm_adds_epi16(a, b);
@@ -3263,7 +3449,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_adds_epi16_saturate_positive() {
+    fn test_mm_adds_epi16_saturate_positive() {
         let a = _mm_set1_epi16(0x7FFF);
         let b = _mm_set1_epi16(1);
         let r = _mm_adds_epi16(a, b);
@@ -3271,7 +3457,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_adds_epi16_saturate_negative() {
+    fn test_mm_adds_epi16_saturate_negative() {
         let a = _mm_set1_epi16(-0x8000);
         let b = _mm_set1_epi16(-1);
         let r = _mm_adds_epi16(a, b);
@@ -3279,7 +3465,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_adds_epu8() {
+    const fn test_mm_adds_epu8() {
         let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         #[rustfmt::skip]
         let b = _mm_setr_epi8(
@@ -3294,7 +3480,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_adds_epu8_saturate() {
+    fn test_mm_adds_epu8_saturate() {
         let a = _mm_set1_epi8(!0);
         let b = _mm_set1_epi8(1);
         let r = _mm_adds_epu8(a, b);
@@ -3302,7 +3488,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_adds_epu16() {
+    const fn test_mm_adds_epu16() {
         let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
         let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
         let r = _mm_adds_epu16(a, b);
@@ -3311,7 +3497,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_adds_epu16_saturate() {
+    fn test_mm_adds_epu16_saturate() {
         let a = _mm_set1_epi16(!0);
         let b = _mm_set1_epi16(1);
         let r = _mm_adds_epu16(a, b);
@@ -3319,21 +3505,21 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_avg_epu8() {
+    const fn test_mm_avg_epu8() {
         let (a, b) = (_mm_set1_epi8(3), _mm_set1_epi8(9));
         let r = _mm_avg_epu8(a, b);
         assert_eq_m128i(r, _mm_set1_epi8(6));
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_avg_epu16() {
+    const fn test_mm_avg_epu16() {
         let (a, b) = (_mm_set1_epi16(3), _mm_set1_epi16(9));
         let r = _mm_avg_epu16(a, b);
         assert_eq_m128i(r, _mm_set1_epi16(6));
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_madd_epi16() {
+    fn test_mm_madd_epi16() {
         let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
         let b = _mm_setr_epi16(9, 10, 11, 12, 13, 14, 15, 16);
         let r = _mm_madd_epi16(a, b);
@@ -3368,7 +3554,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_max_epi16() {
+    const fn test_mm_max_epi16() {
         let a = _mm_set1_epi16(1);
         let b = _mm_set1_epi16(-1);
         let r = _mm_max_epi16(a, b);
@@ -3376,7 +3562,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_max_epu8() {
+    const fn test_mm_max_epu8() {
         let a = _mm_set1_epi8(1);
         let b = _mm_set1_epi8(!0);
         let r = _mm_max_epu8(a, b);
@@ -3384,7 +3570,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_min_epi16() {
+    const fn test_mm_min_epi16() {
         let a = _mm_set1_epi16(1);
         let b = _mm_set1_epi16(-1);
         let r = _mm_min_epi16(a, b);
@@ -3392,7 +3578,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_min_epu8() {
+    const fn test_mm_min_epu8() {
         let a = _mm_set1_epi8(1);
         let b = _mm_set1_epi8(!0);
         let r = _mm_min_epu8(a, b);
@@ -3400,28 +3586,28 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_mulhi_epi16() {
+    const fn test_mm_mulhi_epi16() {
         let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(-1001));
         let r = _mm_mulhi_epi16(a, b);
         assert_eq_m128i(r, _mm_set1_epi16(-16));
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_mulhi_epu16() {
+    const fn test_mm_mulhi_epu16() {
         let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(1001));
         let r = _mm_mulhi_epu16(a, b);
         assert_eq_m128i(r, _mm_set1_epi16(15));
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_mullo_epi16() {
+    const fn test_mm_mullo_epi16() {
         let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(-1001));
         let r = _mm_mullo_epi16(a, b);
         assert_eq_m128i(r, _mm_set1_epi16(-17960));
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_mul_epu32() {
+    const fn test_mm_mul_epu32() {
         let a = _mm_setr_epi64x(1_000_000_000, 1 << 34);
         let b = _mm_setr_epi64x(1_000_000_000, 1 << 35);
         let r = _mm_mul_epu32(a, b);
@@ -3430,7 +3616,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_sad_epu8() {
+    fn test_mm_sad_epu8() {
         #[rustfmt::skip]
         let a = _mm_setr_epi8(
             255u8 as i8, 254u8 as i8, 253u8 as i8, 252u8 as i8,
@@ -3445,42 +3631,42 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_sub_epi8() {
+    const fn test_mm_sub_epi8() {
         let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(6));
         let r = _mm_sub_epi8(a, b);
         assert_eq_m128i(r, _mm_set1_epi8(-1));
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_sub_epi16() {
+    const fn test_mm_sub_epi16() {
         let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(6));
         let r = _mm_sub_epi16(a, b);
         assert_eq_m128i(r, _mm_set1_epi16(-1));
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_sub_epi32() {
+    const fn test_mm_sub_epi32() {
         let (a, b) = (_mm_set1_epi32(5), _mm_set1_epi32(6));
         let r = _mm_sub_epi32(a, b);
         assert_eq_m128i(r, _mm_set1_epi32(-1));
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_sub_epi64() {
+    const fn test_mm_sub_epi64() {
         let (a, b) = (_mm_set1_epi64x(5), _mm_set1_epi64x(6));
         let r = _mm_sub_epi64(a, b);
         assert_eq_m128i(r, _mm_set1_epi64x(-1));
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_subs_epi8() {
+    const fn test_mm_subs_epi8() {
         let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(2));
         let r = _mm_subs_epi8(a, b);
         assert_eq_m128i(r, _mm_set1_epi8(3));
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_subs_epi8_saturate_positive() {
+    fn test_mm_subs_epi8_saturate_positive() {
         let a = _mm_set1_epi8(0x7F);
         let b = _mm_set1_epi8(-1);
         let r = _mm_subs_epi8(a, b);
@@ -3488,7 +3674,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_subs_epi8_saturate_negative() {
+    fn test_mm_subs_epi8_saturate_negative() {
         let a = _mm_set1_epi8(-0x80);
         let b = _mm_set1_epi8(1);
         let r = _mm_subs_epi8(a, b);
@@ -3496,14 +3682,14 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_subs_epi16() {
+    const fn test_mm_subs_epi16() {
         let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(2));
         let r = _mm_subs_epi16(a, b);
         assert_eq_m128i(r, _mm_set1_epi16(3));
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_subs_epi16_saturate_positive() {
+    fn test_mm_subs_epi16_saturate_positive() {
         let a = _mm_set1_epi16(0x7FFF);
         let b = _mm_set1_epi16(-1);
         let r = _mm_subs_epi16(a, b);
@@ -3511,7 +3697,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_subs_epi16_saturate_negative() {
+    fn test_mm_subs_epi16_saturate_negative() {
         let a = _mm_set1_epi16(-0x8000);
         let b = _mm_set1_epi16(1);
         let r = _mm_subs_epi16(a, b);
@@ -3519,14 +3705,14 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_subs_epu8() {
+    const fn test_mm_subs_epu8() {
         let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(2));
         let r = _mm_subs_epu8(a, b);
         assert_eq_m128i(r, _mm_set1_epi8(3));
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_subs_epu8_saturate() {
+    fn test_mm_subs_epu8_saturate() {
         let a = _mm_set1_epi8(0);
         let b = _mm_set1_epi8(1);
         let r = _mm_subs_epu8(a, b);
@@ -3534,14 +3720,14 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_subs_epu16() {
+    const fn test_mm_subs_epu16() {
         let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(2));
         let r = _mm_subs_epu16(a, b);
         assert_eq_m128i(r, _mm_set1_epi16(3));
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_subs_epu16_saturate() {
+    fn test_mm_subs_epu16_saturate() {
         let a = _mm_set1_epi16(0);
         let b = _mm_set1_epi16(1);
         let r = _mm_subs_epu16(a, b);
@@ -3549,7 +3735,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_slli_si128() {
+    const fn test_mm_slli_si128() {
         #[rustfmt::skip]
         let a = _mm_setr_epi8(
             1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
@@ -3575,7 +3761,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_slli_epi16() {
+    const fn test_mm_slli_epi16() {
         let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
         let r = _mm_slli_epi16::<4>(a);
         assert_eq_m128i(
@@ -3587,7 +3773,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_sll_epi16() {
+    fn test_mm_sll_epi16() {
         let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
         let r = _mm_sll_epi16(a, _mm_set_epi64x(0, 4));
         assert_eq_m128i(
@@ -3603,7 +3789,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_slli_epi32() {
+    const fn test_mm_slli_epi32() {
         let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
         let r = _mm_slli_epi32::<4>(a);
         assert_eq_m128i(r, _mm_setr_epi32(0xEEEE0, -0xEEEE0, 0xFFFF0, -0xFFFF0));
@@ -3612,7 +3798,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_sll_epi32() {
+    fn test_mm_sll_epi32() {
         let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
         let r = _mm_sll_epi32(a, _mm_set_epi64x(0, 4));
         assert_eq_m128i(r, _mm_setr_epi32(0xEEEE0, -0xEEEE0, 0xFFFF0, -0xFFFF0));
@@ -3625,7 +3811,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_slli_epi64() {
+    const fn test_mm_slli_epi64() {
         let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF);
         let r = _mm_slli_epi64::<4>(a);
         assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFFF0, -0xFFFFFFFF0));
@@ -3634,7 +3820,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_sll_epi64() {
+    fn test_mm_sll_epi64() {
         let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF);
         let r = _mm_sll_epi64(a, _mm_set_epi64x(0, 4));
         assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFFF0, -0xFFFFFFFF0));
@@ -3647,7 +3833,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_srai_epi16() {
+    const fn test_mm_srai_epi16() {
         let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
         let r = _mm_srai_epi16::<4>(a);
         assert_eq_m128i(
@@ -3659,7 +3845,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_sra_epi16() {
+    fn test_mm_sra_epi16() {
         let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
         let r = _mm_sra_epi16(a, _mm_set_epi64x(0, 4));
         assert_eq_m128i(
@@ -3675,7 +3861,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_srai_epi32() {
+    const fn test_mm_srai_epi32() {
         let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
         let r = _mm_srai_epi32::<4>(a);
         assert_eq_m128i(r, _mm_setr_epi32(0xEEE, -0xEEF, 0xFFF, -0x1000));
@@ -3684,7 +3870,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_sra_epi32() {
+    fn test_mm_sra_epi32() {
         let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
         let r = _mm_sra_epi32(a, _mm_set_epi64x(0, 4));
         assert_eq_m128i(r, _mm_setr_epi32(0xEEE, -0xEEF, 0xFFF, -0x1000));
@@ -3697,7 +3883,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_srli_si128() {
+    const fn test_mm_srli_si128() {
         #[rustfmt::skip]
         let a = _mm_setr_epi8(
             1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
@@ -3726,7 +3912,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_srli_epi16() {
+    const fn test_mm_srli_epi16() {
         let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
         let r = _mm_srli_epi16::<4>(a);
         assert_eq_m128i(
@@ -3738,7 +3924,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_srl_epi16() {
+    fn test_mm_srl_epi16() {
         let a = _mm_setr_epi16(0xCC, -0xCC, 0xDD, -0xDD, 0xEE, -0xEE, 0xFF, -0xFF);
         let r = _mm_srl_epi16(a, _mm_set_epi64x(0, 4));
         assert_eq_m128i(
@@ -3754,7 +3940,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_srli_epi32() {
+    const fn test_mm_srli_epi32() {
         let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
         let r = _mm_srli_epi32::<4>(a);
         assert_eq_m128i(r, _mm_setr_epi32(0xEEE, 0xFFFF111, 0xFFF, 0xFFFF000));
@@ -3763,7 +3949,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_srl_epi32() {
+    fn test_mm_srl_epi32() {
         let a = _mm_setr_epi32(0xEEEE, -0xEEEE, 0xFFFF, -0xFFFF);
         let r = _mm_srl_epi32(a, _mm_set_epi64x(0, 4));
         assert_eq_m128i(r, _mm_setr_epi32(0xEEE, 0xFFFF111, 0xFFF, 0xFFFF000));
@@ -3776,7 +3962,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_srli_epi64() {
+    const fn test_mm_srli_epi64() {
         let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF);
         let r = _mm_srli_epi64::<4>(a);
         assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFF, 0xFFFFFFFF0000000));
@@ -3785,7 +3971,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_srl_epi64() {
+    fn test_mm_srl_epi64() {
         let a = _mm_set_epi64x(0xFFFFFFFF, -0xFFFFFFFF);
         let r = _mm_srl_epi64(a, _mm_set_epi64x(0, 4));
         assert_eq_m128i(r, _mm_set_epi64x(0xFFFFFFF, 0xFFFFFFFF0000000));
@@ -3798,7 +3984,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_and_si128() {
+    const fn test_mm_and_si128() {
         let a = _mm_set1_epi8(5);
         let b = _mm_set1_epi8(3);
         let r = _mm_and_si128(a, b);
@@ -3806,7 +3992,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_andnot_si128() {
+    const fn test_mm_andnot_si128() {
         let a = _mm_set1_epi8(5);
         let b = _mm_set1_epi8(3);
         let r = _mm_andnot_si128(a, b);
@@ -3814,7 +4000,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_or_si128() {
+    const fn test_mm_or_si128() {
         let a = _mm_set1_epi8(5);
         let b = _mm_set1_epi8(3);
         let r = _mm_or_si128(a, b);
@@ -3822,7 +4008,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_xor_si128() {
+    const fn test_mm_xor_si128() {
         let a = _mm_set1_epi8(5);
         let b = _mm_set1_epi8(3);
         let r = _mm_xor_si128(a, b);
@@ -3830,7 +4016,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_cmpeq_epi8() {
+    const fn test_mm_cmpeq_epi8() {
         let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let b = _mm_setr_epi8(15, 14, 2, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
         let r = _mm_cmpeq_epi8(a, b);
@@ -3844,7 +4030,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_cmpeq_epi16() {
+    const fn test_mm_cmpeq_epi16() {
         let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
         let b = _mm_setr_epi16(7, 6, 2, 4, 3, 2, 1, 0);
         let r = _mm_cmpeq_epi16(a, b);
@@ -3852,7 +4038,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_cmpeq_epi32() {
+    const fn test_mm_cmpeq_epi32() {
         let a = _mm_setr_epi32(0, 1, 2, 3);
         let b = _mm_setr_epi32(3, 2, 2, 0);
         let r = _mm_cmpeq_epi32(a, b);
@@ -3860,7 +4046,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_cmpgt_epi8() {
+    const fn test_mm_cmpgt_epi8() {
         let a = _mm_set_epi8(5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
         let b = _mm_set1_epi8(0);
         let r = _mm_cmpgt_epi8(a, b);
@@ -3869,7 +4055,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_cmpgt_epi16() {
+    const fn test_mm_cmpgt_epi16() {
         let a = _mm_set_epi16(5, 0, 0, 0, 0, 0, 0, 0);
         let b = _mm_set1_epi16(0);
         let r = _mm_cmpgt_epi16(a, b);
@@ -3878,7 +4064,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_cmpgt_epi32() {
+    const fn test_mm_cmpgt_epi32() {
         let a = _mm_set_epi32(5, 0, 0, 0);
         let b = _mm_set1_epi32(0);
         let r = _mm_cmpgt_epi32(a, b);
@@ -3886,7 +4072,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_cmplt_epi8() {
+    const fn test_mm_cmplt_epi8() {
         let a = _mm_set1_epi8(0);
         let b = _mm_set_epi8(5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
         let r = _mm_cmplt_epi8(a, b);
@@ -3895,7 +4081,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_cmplt_epi16() {
+    const fn test_mm_cmplt_epi16() {
         let a = _mm_set1_epi16(0);
         let b = _mm_set_epi16(5, 0, 0, 0, 0, 0, 0, 0);
         let r = _mm_cmplt_epi16(a, b);
@@ -3904,7 +4090,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_cmplt_epi32() {
+    const fn test_mm_cmplt_epi32() {
         let a = _mm_set1_epi32(0);
         let b = _mm_set_epi32(5, 0, 0, 0);
         let r = _mm_cmplt_epi32(a, b);
@@ -3912,65 +4098,65 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_cvtepi32_pd() {
+    const fn test_mm_cvtepi32_pd() {
         let a = _mm_set_epi32(35, 25, 15, 5);
         let r = _mm_cvtepi32_pd(a);
         assert_eq_m128d(r, _mm_setr_pd(5.0, 15.0));
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_cvtsi32_sd() {
+    const fn test_mm_cvtsi32_sd() {
         let a = _mm_set1_pd(3.5);
         let r = _mm_cvtsi32_sd(a, 5);
         assert_eq_m128d(r, _mm_setr_pd(5.0, 3.5));
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_cvtepi32_ps() {
+    const fn test_mm_cvtepi32_ps() {
         let a = _mm_setr_epi32(1, 2, 3, 4);
         let r = _mm_cvtepi32_ps(a);
         assert_eq_m128(r, _mm_setr_ps(1.0, 2.0, 3.0, 4.0));
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_cvtps_epi32() {
+    fn test_mm_cvtps_epi32() {
         let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
         let r = _mm_cvtps_epi32(a);
         assert_eq_m128i(r, _mm_setr_epi32(1, 2, 3, 4));
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_cvtsi32_si128() {
+    const fn test_mm_cvtsi32_si128() {
         let r = _mm_cvtsi32_si128(5);
         assert_eq_m128i(r, _mm_setr_epi32(5, 0, 0, 0));
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_cvtsi128_si32() {
+    const fn test_mm_cvtsi128_si32() {
         let r = _mm_cvtsi128_si32(_mm_setr_epi32(5, 0, 0, 0));
         assert_eq!(r, 5);
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_set_epi64x() {
+    const fn test_mm_set_epi64x() {
         let r = _mm_set_epi64x(0, 1);
         assert_eq_m128i(r, _mm_setr_epi64x(1, 0));
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_set_epi32() {
+    const fn test_mm_set_epi32() {
         let r = _mm_set_epi32(0, 1, 2, 3);
         assert_eq_m128i(r, _mm_setr_epi32(3, 2, 1, 0));
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_set_epi16() {
+    const fn test_mm_set_epi16() {
         let r = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
         assert_eq_m128i(r, _mm_setr_epi16(7, 6, 5, 4, 3, 2, 1, 0));
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_set_epi8() {
+    const fn test_mm_set_epi8() {
         #[rustfmt::skip]
         let r = _mm_set_epi8(
             0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
@@ -3984,43 +4170,43 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_set1_epi64x() {
+    const fn test_mm_set1_epi64x() {
         let r = _mm_set1_epi64x(1);
         assert_eq_m128i(r, _mm_set1_epi64x(1));
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_set1_epi32() {
+    const fn test_mm_set1_epi32() {
         let r = _mm_set1_epi32(1);
         assert_eq_m128i(r, _mm_set1_epi32(1));
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_set1_epi16() {
+    const fn test_mm_set1_epi16() {
         let r = _mm_set1_epi16(1);
         assert_eq_m128i(r, _mm_set1_epi16(1));
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_set1_epi8() {
+    const fn test_mm_set1_epi8() {
         let r = _mm_set1_epi8(1);
         assert_eq_m128i(r, _mm_set1_epi8(1));
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_setr_epi32() {
+    const fn test_mm_setr_epi32() {
         let r = _mm_setr_epi32(0, 1, 2, 3);
         assert_eq_m128i(r, _mm_setr_epi32(0, 1, 2, 3));
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_setr_epi16() {
+    const fn test_mm_setr_epi16() {
         let r = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
         assert_eq_m128i(r, _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7));
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_setr_epi8() {
+    const fn test_mm_setr_epi8() {
         #[rustfmt::skip]
         let r = _mm_setr_epi8(
             0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
@@ -4034,29 +4220,29 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_setzero_si128() {
+    const fn test_mm_setzero_si128() {
         let r = _mm_setzero_si128();
         assert_eq_m128i(r, _mm_set1_epi64x(0));
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_loadl_epi64() {
+    const fn test_mm_loadl_epi64() {
         let a = _mm_setr_epi64x(6, 5);
-        let r = _mm_loadl_epi64(ptr::addr_of!(a));
+        let r = unsafe { _mm_loadl_epi64(ptr::addr_of!(a)) };
         assert_eq_m128i(r, _mm_setr_epi64x(6, 0));
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_load_si128() {
+    const fn test_mm_load_si128() {
         let a = _mm_set_epi64x(5, 6);
-        let r = _mm_load_si128(ptr::addr_of!(a) as *const _);
+        let r = unsafe { _mm_load_si128(ptr::addr_of!(a) as *const _) };
         assert_eq_m128i(a, r);
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_loadu_si128() {
+    const fn test_mm_loadu_si128() {
         let a = _mm_set_epi64x(5, 6);
-        let r = _mm_loadu_si128(ptr::addr_of!(a) as *const _);
+        let r = unsafe { _mm_loadu_si128(ptr::addr_of!(a) as *const _) };
         assert_eq_m128i(a, r);
     }
 
@@ -4064,7 +4250,7 @@ mod tests {
     // Miri cannot support this until it is clear how it fits in the Rust memory model
     // (non-temporal store)
     #[cfg_attr(miri, ignore)]
-    unsafe fn test_mm_maskmoveu_si128() {
+    fn test_mm_maskmoveu_si128() {
         let a = _mm_set1_epi8(9);
         #[rustfmt::skip]
         let mask = _mm_set_epi8(
@@ -4072,33 +4258,41 @@ mod tests {
             0, 0, 0, 0, 0, 0, 0, 0,
         );
         let mut r = _mm_set1_epi8(0);
-        _mm_maskmoveu_si128(a, mask, ptr::addr_of_mut!(r) as *mut i8);
+        unsafe {
+            _mm_maskmoveu_si128(a, mask, ptr::addr_of_mut!(r) as *mut i8);
+        }
         _mm_sfence();
         let e = _mm_set_epi8(0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
         assert_eq_m128i(r, e);
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_store_si128() {
+    const fn test_mm_store_si128() {
         let a = _mm_set1_epi8(9);
         let mut r = _mm_set1_epi8(0);
-        _mm_store_si128(&mut r, a);
+        unsafe {
+            _mm_store_si128(&mut r, a);
+        }
         assert_eq_m128i(r, a);
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_storeu_si128() {
+    const fn test_mm_storeu_si128() {
         let a = _mm_set1_epi8(9);
         let mut r = _mm_set1_epi8(0);
-        _mm_storeu_si128(&mut r, a);
+        unsafe {
+            _mm_storeu_si128(&mut r, a);
+        }
         assert_eq_m128i(r, a);
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_storel_epi64() {
+    const fn test_mm_storel_epi64() {
         let a = _mm_setr_epi64x(2, 9);
         let mut r = _mm_set1_epi8(0);
-        _mm_storel_epi64(&mut r, a);
+        unsafe {
+            _mm_storel_epi64(&mut r, a);
+        }
         assert_eq_m128i(r, _mm_setr_epi64x(2, 0));
     }
 
@@ -4106,10 +4300,12 @@ mod tests {
     // Miri cannot support this until it is clear how it fits in the Rust memory model
     // (non-temporal store)
     #[cfg_attr(miri, ignore)]
-    unsafe fn test_mm_stream_si128() {
+    fn test_mm_stream_si128() {
         let a = _mm_setr_epi32(1, 2, 3, 4);
         let mut r = _mm_undefined_si128();
-        _mm_stream_si128(ptr::addr_of_mut!(r), a);
+        unsafe {
+            _mm_stream_si128(ptr::addr_of_mut!(r), a);
+        }
         _mm_sfence();
         assert_eq_m128i(r, a);
     }
@@ -4118,23 +4314,25 @@ mod tests {
     // Miri cannot support this until it is clear how it fits in the Rust memory model
     // (non-temporal store)
     #[cfg_attr(miri, ignore)]
-    unsafe fn test_mm_stream_si32() {
+    fn test_mm_stream_si32() {
         let a: i32 = 7;
         let mut mem = boxed::Box::<i32>::new(-1);
-        _mm_stream_si32(ptr::addr_of_mut!(*mem), a);
+        unsafe {
+            _mm_stream_si32(ptr::addr_of_mut!(*mem), a);
+        }
         _mm_sfence();
         assert_eq!(a, *mem);
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_move_epi64() {
+    const fn test_mm_move_epi64() {
         let a = _mm_setr_epi64x(5, 6);
         let r = _mm_move_epi64(a);
         assert_eq_m128i(r, _mm_setr_epi64x(5, 0));
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_packs_epi16() {
+    const fn test_mm_packs_epi16() {
         let a = _mm_setr_epi16(0x80, -0x81, 0, 0, 0, 0, 0, 0);
         let b = _mm_setr_epi16(0, 0, 0, 0, 0, 0, -0x81, 0x80);
         let r = _mm_packs_epi16(a, b);
@@ -4148,7 +4346,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_packs_epi32() {
+    const fn test_mm_packs_epi32() {
         let a = _mm_setr_epi32(0x8000, -0x8001, 0, 0);
         let b = _mm_setr_epi32(0, 0, -0x8001, 0x8000);
         let r = _mm_packs_epi32(a, b);
@@ -4159,7 +4357,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_packus_epi16() {
+    const fn test_mm_packus_epi16() {
         let a = _mm_setr_epi16(0x100, -1, 0, 0, 0, 0, 0, 0);
         let b = _mm_setr_epi16(0, 0, 0, 0, 0, 0, -1, 0x100);
         let r = _mm_packus_epi16(a, b);
@@ -4170,7 +4368,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_extract_epi16() {
+    const fn test_mm_extract_epi16() {
         let a = _mm_setr_epi16(-1, 1, 2, 3, 4, 5, 6, 7);
         let r1 = _mm_extract_epi16::<0>(a);
         let r2 = _mm_extract_epi16::<3>(a);
@@ -4179,7 +4377,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_insert_epi16() {
+    const fn test_mm_insert_epi16() {
         let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
         let r = _mm_insert_epi16::<0>(a, 9);
         let e = _mm_setr_epi16(9, 1, 2, 3, 4, 5, 6, 7);
@@ -4187,7 +4385,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_movemask_epi8() {
+    const fn test_mm_movemask_epi8() {
         #[rustfmt::skip]
         let a = _mm_setr_epi8(
             0b1000_0000u8 as i8, 0b0, 0b1000_0000u8 as i8, 0b01,
@@ -4200,7 +4398,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_shuffle_epi32() {
+    const fn test_mm_shuffle_epi32() {
         let a = _mm_setr_epi32(5, 10, 15, 20);
         let r = _mm_shuffle_epi32::<0b00_01_01_11>(a);
         let e = _mm_setr_epi32(20, 10, 10, 5);
@@ -4208,7 +4406,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_shufflehi_epi16() {
+    const fn test_mm_shufflehi_epi16() {
         let a = _mm_setr_epi16(1, 2, 3, 4, 5, 10, 15, 20);
         let r = _mm_shufflehi_epi16::<0b00_01_01_11>(a);
         let e = _mm_setr_epi16(1, 2, 3, 4, 20, 10, 10, 5);
@@ -4216,7 +4414,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_shufflelo_epi16() {
+    const fn test_mm_shufflelo_epi16() {
         let a = _mm_setr_epi16(5, 10, 15, 20, 1, 2, 3, 4);
         let r = _mm_shufflelo_epi16::<0b00_01_01_11>(a);
         let e = _mm_setr_epi16(20, 10, 10, 5, 1, 2, 3, 4);
@@ -4224,7 +4422,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_unpackhi_epi8() {
+    const fn test_mm_unpackhi_epi8() {
         #[rustfmt::skip]
         let a = _mm_setr_epi8(
             0, 1, 2, 3, 4, 5, 6, 7,
@@ -4243,7 +4441,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_unpackhi_epi16() {
+    const fn test_mm_unpackhi_epi16() {
         let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
         let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
         let r = _mm_unpackhi_epi16(a, b);
@@ -4252,7 +4450,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_unpackhi_epi32() {
+    const fn test_mm_unpackhi_epi32() {
         let a = _mm_setr_epi32(0, 1, 2, 3);
         let b = _mm_setr_epi32(4, 5, 6, 7);
         let r = _mm_unpackhi_epi32(a, b);
@@ -4261,7 +4459,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_unpackhi_epi64() {
+    const fn test_mm_unpackhi_epi64() {
         let a = _mm_setr_epi64x(0, 1);
         let b = _mm_setr_epi64x(2, 3);
         let r = _mm_unpackhi_epi64(a, b);
@@ -4270,7 +4468,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_unpacklo_epi8() {
+    const fn test_mm_unpacklo_epi8() {
         #[rustfmt::skip]
         let a = _mm_setr_epi8(
             0, 1, 2, 3, 4, 5, 6, 7,
@@ -4290,7 +4488,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_unpacklo_epi16() {
+    const fn test_mm_unpacklo_epi16() {
         let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
         let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
         let r = _mm_unpacklo_epi16(a, b);
@@ -4299,7 +4497,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_unpacklo_epi32() {
+    const fn test_mm_unpacklo_epi32() {
         let a = _mm_setr_epi32(0, 1, 2, 3);
         let b = _mm_setr_epi32(4, 5, 6, 7);
         let r = _mm_unpacklo_epi32(a, b);
@@ -4308,7 +4506,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_unpacklo_epi64() {
+    const fn test_mm_unpacklo_epi64() {
         let a = _mm_setr_epi64x(0, 1);
         let b = _mm_setr_epi64x(2, 3);
         let r = _mm_unpacklo_epi64(a, b);
@@ -4317,7 +4515,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_add_sd() {
+    const fn test_mm_add_sd() {
         let a = _mm_setr_pd(1.0, 2.0);
         let b = _mm_setr_pd(5.0, 10.0);
         let r = _mm_add_sd(a, b);
@@ -4325,7 +4523,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_add_pd() {
+    const fn test_mm_add_pd() {
         let a = _mm_setr_pd(1.0, 2.0);
         let b = _mm_setr_pd(5.0, 10.0);
         let r = _mm_add_pd(a, b);
@@ -4333,7 +4531,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_div_sd() {
+    const fn test_mm_div_sd() {
         let a = _mm_setr_pd(1.0, 2.0);
         let b = _mm_setr_pd(5.0, 10.0);
         let r = _mm_div_sd(a, b);
@@ -4341,7 +4539,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_div_pd() {
+    const fn test_mm_div_pd() {
         let a = _mm_setr_pd(1.0, 2.0);
         let b = _mm_setr_pd(5.0, 10.0);
         let r = _mm_div_pd(a, b);
@@ -4349,7 +4547,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_max_sd() {
+    fn test_mm_max_sd() {
         let a = _mm_setr_pd(1.0, 2.0);
         let b = _mm_setr_pd(5.0, 10.0);
         let r = _mm_max_sd(a, b);
@@ -4357,7 +4555,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_max_pd() {
+    fn test_mm_max_pd() {
         let a = _mm_setr_pd(1.0, 2.0);
         let b = _mm_setr_pd(5.0, 10.0);
         let r = _mm_max_pd(a, b);
@@ -4366,17 +4564,18 @@ mod tests {
         // Check SSE(2)-specific semantics for -0.0 handling.
         let a = _mm_setr_pd(-0.0, 0.0);
         let b = _mm_setr_pd(0.0, 0.0);
-        let r1: [u8; 16] = transmute(_mm_max_pd(a, b));
-        let r2: [u8; 16] = transmute(_mm_max_pd(b, a));
-        let a: [u8; 16] = transmute(a);
-        let b: [u8; 16] = transmute(b);
-        assert_eq!(r1, b);
-        assert_eq!(r2, a);
-        assert_ne!(a, b); // sanity check that -0.0 is actually present
+        // Cast to __m128i to compare exact bit patterns
+        let r1 = _mm_castpd_si128(_mm_max_pd(a, b));
+        let r2 = _mm_castpd_si128(_mm_max_pd(b, a));
+        let a = _mm_castpd_si128(a);
+        let b = _mm_castpd_si128(b);
+        assert_eq_m128i(r1, b);
+        assert_eq_m128i(r2, a);
+        assert_ne!(a.as_u8x16(), b.as_u8x16()); // sanity check that -0.0 is actually present
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_min_sd() {
+    fn test_mm_min_sd() {
         let a = _mm_setr_pd(1.0, 2.0);
         let b = _mm_setr_pd(5.0, 10.0);
         let r = _mm_min_sd(a, b);
@@ -4384,7 +4583,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_min_pd() {
+    fn test_mm_min_pd() {
         let a = _mm_setr_pd(1.0, 2.0);
         let b = _mm_setr_pd(5.0, 10.0);
         let r = _mm_min_pd(a, b);
@@ -4393,17 +4592,18 @@ mod tests {
         // Check SSE(2)-specific semantics for -0.0 handling.
         let a = _mm_setr_pd(-0.0, 0.0);
         let b = _mm_setr_pd(0.0, 0.0);
-        let r1: [u8; 16] = transmute(_mm_min_pd(a, b));
-        let r2: [u8; 16] = transmute(_mm_min_pd(b, a));
-        let a: [u8; 16] = transmute(a);
-        let b: [u8; 16] = transmute(b);
-        assert_eq!(r1, b);
-        assert_eq!(r2, a);
-        assert_ne!(a, b); // sanity check that -0.0 is actually present
+        // Cast to __m128i to compare exact bit patterns
+        let r1 = _mm_castpd_si128(_mm_min_pd(a, b));
+        let r2 = _mm_castpd_si128(_mm_min_pd(b, a));
+        let a = _mm_castpd_si128(a);
+        let b = _mm_castpd_si128(b);
+        assert_eq_m128i(r1, b);
+        assert_eq_m128i(r2, a);
+        assert_ne!(a.as_u8x16(), b.as_u8x16()); // sanity check that -0.0 is actually present
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_mul_sd() {
+    const fn test_mm_mul_sd() {
         let a = _mm_setr_pd(1.0, 2.0);
         let b = _mm_setr_pd(5.0, 10.0);
         let r = _mm_mul_sd(a, b);
@@ -4411,7 +4611,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_mul_pd() {
+    const fn test_mm_mul_pd() {
         let a = _mm_setr_pd(1.0, 2.0);
         let b = _mm_setr_pd(5.0, 10.0);
         let r = _mm_mul_pd(a, b);
@@ -4419,7 +4619,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_sqrt_sd() {
+    fn test_mm_sqrt_sd() {
         let a = _mm_setr_pd(1.0, 2.0);
         let b = _mm_setr_pd(5.0, 10.0);
         let r = _mm_sqrt_sd(a, b);
@@ -4427,13 +4627,13 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_sqrt_pd() {
+    fn test_mm_sqrt_pd() {
         let r = _mm_sqrt_pd(_mm_setr_pd(1.0, 2.0));
         assert_eq_m128d(r, _mm_setr_pd(1.0f64.sqrt(), 2.0f64.sqrt()));
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_sub_sd() {
+    const fn test_mm_sub_sd() {
         let a = _mm_setr_pd(1.0, 2.0);
         let b = _mm_setr_pd(5.0, 10.0);
         let r = _mm_sub_sd(a, b);
@@ -4441,7 +4641,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_sub_pd() {
+    const fn test_mm_sub_pd() {
         let a = _mm_setr_pd(1.0, 2.0);
         let b = _mm_setr_pd(5.0, 10.0);
         let r = _mm_sub_pd(a, b);
@@ -4449,235 +4649,235 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_and_pd() {
-        let a = transmute(u64x2::splat(5));
-        let b = transmute(u64x2::splat(3));
+    const fn test_mm_and_pd() {
+        let a = f64x2::from_bits(u64x2::splat(5)).as_m128d();
+        let b = f64x2::from_bits(u64x2::splat(3)).as_m128d();
         let r = _mm_and_pd(a, b);
-        let e = transmute(u64x2::splat(1));
+        let e = f64x2::from_bits(u64x2::splat(1)).as_m128d();
         assert_eq_m128d(r, e);
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_andnot_pd() {
-        let a = transmute(u64x2::splat(5));
-        let b = transmute(u64x2::splat(3));
+    const fn test_mm_andnot_pd() {
+        let a = f64x2::from_bits(u64x2::splat(5)).as_m128d();
+        let b = f64x2::from_bits(u64x2::splat(3)).as_m128d();
         let r = _mm_andnot_pd(a, b);
-        let e = transmute(u64x2::splat(2));
+        let e = f64x2::from_bits(u64x2::splat(2)).as_m128d();
         assert_eq_m128d(r, e);
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_or_pd() {
-        let a = transmute(u64x2::splat(5));
-        let b = transmute(u64x2::splat(3));
+    const fn test_mm_or_pd() {
+        let a = f64x2::from_bits(u64x2::splat(5)).as_m128d();
+        let b = f64x2::from_bits(u64x2::splat(3)).as_m128d();
         let r = _mm_or_pd(a, b);
-        let e = transmute(u64x2::splat(7));
+        let e = f64x2::from_bits(u64x2::splat(7)).as_m128d();
         assert_eq_m128d(r, e);
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_xor_pd() {
-        let a = transmute(u64x2::splat(5));
-        let b = transmute(u64x2::splat(3));
+    const fn test_mm_xor_pd() {
+        let a = f64x2::from_bits(u64x2::splat(5)).as_m128d();
+        let b = f64x2::from_bits(u64x2::splat(3)).as_m128d();
         let r = _mm_xor_pd(a, b);
-        let e = transmute(u64x2::splat(6));
+        let e = f64x2::from_bits(u64x2::splat(6)).as_m128d();
         assert_eq_m128d(r, e);
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_cmpeq_sd() {
+    fn test_mm_cmpeq_sd() {
         let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
         let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
-        let r = transmute::<_, __m128i>(_mm_cmpeq_sd(a, b));
+        let r = _mm_castpd_si128(_mm_cmpeq_sd(a, b));
         assert_eq_m128i(r, e);
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_cmplt_sd() {
+    fn test_mm_cmplt_sd() {
         let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
         let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
-        let r = transmute::<_, __m128i>(_mm_cmplt_sd(a, b));
+        let r = _mm_castpd_si128(_mm_cmplt_sd(a, b));
         assert_eq_m128i(r, e);
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_cmple_sd() {
+    fn test_mm_cmple_sd() {
         let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
         let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
-        let r = transmute::<_, __m128i>(_mm_cmple_sd(a, b));
+        let r = _mm_castpd_si128(_mm_cmple_sd(a, b));
         assert_eq_m128i(r, e);
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_cmpgt_sd() {
+    fn test_mm_cmpgt_sd() {
         let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0));
         let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
-        let r = transmute::<_, __m128i>(_mm_cmpgt_sd(a, b));
+        let r = _mm_castpd_si128(_mm_cmpgt_sd(a, b));
         assert_eq_m128i(r, e);
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_cmpge_sd() {
+    fn test_mm_cmpge_sd() {
         let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
         let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
-        let r = transmute::<_, __m128i>(_mm_cmpge_sd(a, b));
+        let r = _mm_castpd_si128(_mm_cmpge_sd(a, b));
         assert_eq_m128i(r, e);
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_cmpord_sd() {
+    fn test_mm_cmpord_sd() {
         let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
         let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
-        let r = transmute::<_, __m128i>(_mm_cmpord_sd(a, b));
+        let r = _mm_castpd_si128(_mm_cmpord_sd(a, b));
         assert_eq_m128i(r, e);
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_cmpunord_sd() {
+    fn test_mm_cmpunord_sd() {
         let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
         let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
-        let r = transmute::<_, __m128i>(_mm_cmpunord_sd(a, b));
+        let r = _mm_castpd_si128(_mm_cmpunord_sd(a, b));
         assert_eq_m128i(r, e);
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_cmpneq_sd() {
+    fn test_mm_cmpneq_sd() {
         let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
         let e = _mm_setr_epi64x(!0, 2.0f64.to_bits() as i64);
-        let r = transmute::<_, __m128i>(_mm_cmpneq_sd(a, b));
+        let r = _mm_castpd_si128(_mm_cmpneq_sd(a, b));
         assert_eq_m128i(r, e);
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_cmpnlt_sd() {
+    fn test_mm_cmpnlt_sd() {
         let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
         let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
-        let r = transmute::<_, __m128i>(_mm_cmpnlt_sd(a, b));
+        let r = _mm_castpd_si128(_mm_cmpnlt_sd(a, b));
         assert_eq_m128i(r, e);
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_cmpnle_sd() {
+    fn test_mm_cmpnle_sd() {
         let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
         let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
-        let r = transmute::<_, __m128i>(_mm_cmpnle_sd(a, b));
+        let r = _mm_castpd_si128(_mm_cmpnle_sd(a, b));
         assert_eq_m128i(r, e);
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_cmpngt_sd() {
+    fn test_mm_cmpngt_sd() {
         let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0));
         let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
-        let r = transmute::<_, __m128i>(_mm_cmpngt_sd(a, b));
+        let r = _mm_castpd_si128(_mm_cmpngt_sd(a, b));
         assert_eq_m128i(r, e);
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_cmpnge_sd() {
+    fn test_mm_cmpnge_sd() {
         let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
         let e = _mm_setr_epi64x(0, 2.0f64.to_bits() as i64);
-        let r = transmute::<_, __m128i>(_mm_cmpnge_sd(a, b));
+        let r = _mm_castpd_si128(_mm_cmpnge_sd(a, b));
         assert_eq_m128i(r, e);
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_cmpeq_pd() {
+    fn test_mm_cmpeq_pd() {
         let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
         let e = _mm_setr_epi64x(!0, 0);
-        let r = transmute::<_, __m128i>(_mm_cmpeq_pd(a, b));
+        let r = _mm_castpd_si128(_mm_cmpeq_pd(a, b));
         assert_eq_m128i(r, e);
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_cmplt_pd() {
+    fn test_mm_cmplt_pd() {
         let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
         let e = _mm_setr_epi64x(0, !0);
-        let r = transmute::<_, __m128i>(_mm_cmplt_pd(a, b));
+        let r = _mm_castpd_si128(_mm_cmplt_pd(a, b));
         assert_eq_m128i(r, e);
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_cmple_pd() {
+    fn test_mm_cmple_pd() {
         let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
         let e = _mm_setr_epi64x(!0, !0);
-        let r = transmute::<_, __m128i>(_mm_cmple_pd(a, b));
+        let r = _mm_castpd_si128(_mm_cmple_pd(a, b));
         assert_eq_m128i(r, e);
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_cmpgt_pd() {
+    fn test_mm_cmpgt_pd() {
         let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
         let e = _mm_setr_epi64x(0, 0);
-        let r = transmute::<_, __m128i>(_mm_cmpgt_pd(a, b));
+        let r = _mm_castpd_si128(_mm_cmpgt_pd(a, b));
         assert_eq_m128i(r, e);
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_cmpge_pd() {
+    fn test_mm_cmpge_pd() {
         let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
         let e = _mm_setr_epi64x(!0, 0);
-        let r = transmute::<_, __m128i>(_mm_cmpge_pd(a, b));
+        let r = _mm_castpd_si128(_mm_cmpge_pd(a, b));
         assert_eq_m128i(r, e);
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_cmpord_pd() {
+    fn test_mm_cmpord_pd() {
         let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
         let e = _mm_setr_epi64x(0, !0);
-        let r = transmute::<_, __m128i>(_mm_cmpord_pd(a, b));
+        let r = _mm_castpd_si128(_mm_cmpord_pd(a, b));
         assert_eq_m128i(r, e);
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_cmpunord_pd() {
+    fn test_mm_cmpunord_pd() {
         let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
         let e = _mm_setr_epi64x(!0, 0);
-        let r = transmute::<_, __m128i>(_mm_cmpunord_pd(a, b));
+        let r = _mm_castpd_si128(_mm_cmpunord_pd(a, b));
         assert_eq_m128i(r, e);
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_cmpneq_pd() {
+    fn test_mm_cmpneq_pd() {
         let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
         let e = _mm_setr_epi64x(!0, !0);
-        let r = transmute::<_, __m128i>(_mm_cmpneq_pd(a, b));
+        let r = _mm_castpd_si128(_mm_cmpneq_pd(a, b));
         assert_eq_m128i(r, e);
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_cmpnlt_pd() {
+    fn test_mm_cmpnlt_pd() {
         let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
         let e = _mm_setr_epi64x(0, 0);
-        let r = transmute::<_, __m128i>(_mm_cmpnlt_pd(a, b));
+        let r = _mm_castpd_si128(_mm_cmpnlt_pd(a, b));
         assert_eq_m128i(r, e);
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_cmpnle_pd() {
+    fn test_mm_cmpnle_pd() {
         let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
         let e = _mm_setr_epi64x(0, 0);
-        let r = transmute::<_, __m128i>(_mm_cmpnle_pd(a, b));
+        let r = _mm_castpd_si128(_mm_cmpnle_pd(a, b));
         assert_eq_m128i(r, e);
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_cmpngt_pd() {
+    fn test_mm_cmpngt_pd() {
         let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0));
         let e = _mm_setr_epi64x(0, !0);
-        let r = transmute::<_, __m128i>(_mm_cmpngt_pd(a, b));
+        let r = _mm_castpd_si128(_mm_cmpngt_pd(a, b));
         assert_eq_m128i(r, e);
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_cmpnge_pd() {
+    fn test_mm_cmpnge_pd() {
         let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
         let e = _mm_setr_epi64x(0, !0);
-        let r = transmute::<_, __m128i>(_mm_cmpnge_pd(a, b));
+        let r = _mm_castpd_si128(_mm_cmpnge_pd(a, b));
         assert_eq_m128i(r, e);
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_comieq_sd() {
+    fn test_mm_comieq_sd() {
         let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
         assert!(_mm_comieq_sd(a, b) != 0);
 
@@ -4686,37 +4886,37 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_comilt_sd() {
+    fn test_mm_comilt_sd() {
         let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
         assert!(_mm_comilt_sd(a, b) == 0);
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_comile_sd() {
+    fn test_mm_comile_sd() {
         let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
         assert!(_mm_comile_sd(a, b) != 0);
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_comigt_sd() {
+    fn test_mm_comigt_sd() {
         let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
         assert!(_mm_comigt_sd(a, b) == 0);
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_comige_sd() {
+    fn test_mm_comige_sd() {
         let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
         assert!(_mm_comige_sd(a, b) != 0);
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_comineq_sd() {
+    fn test_mm_comineq_sd() {
         let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
         assert!(_mm_comineq_sd(a, b) == 0);
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_ucomieq_sd() {
+    fn test_mm_ucomieq_sd() {
         let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
         assert!(_mm_ucomieq_sd(a, b) != 0);
 
@@ -4725,37 +4925,37 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_ucomilt_sd() {
+    fn test_mm_ucomilt_sd() {
         let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
         assert!(_mm_ucomilt_sd(a, b) == 0);
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_ucomile_sd() {
+    fn test_mm_ucomile_sd() {
         let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
         assert!(_mm_ucomile_sd(a, b) != 0);
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_ucomigt_sd() {
+    fn test_mm_ucomigt_sd() {
         let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
         assert!(_mm_ucomigt_sd(a, b) == 0);
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_ucomige_sd() {
+    fn test_mm_ucomige_sd() {
         let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
         assert!(_mm_ucomige_sd(a, b) != 0);
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_ucomineq_sd() {
+    fn test_mm_ucomineq_sd() {
         let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
         assert!(_mm_ucomineq_sd(a, b) == 0);
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_movemask_pd() {
+    const fn test_mm_movemask_pd() {
         let r = _mm_movemask_pd(_mm_setr_pd(-1.0, 5.0));
         assert_eq!(r, 0b01);
 
@@ -4769,40 +4969,40 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_load_pd() {
+    const fn test_mm_load_pd() {
         let mem = Memory {
             data: [1.0f64, 2.0, 3.0, 4.0],
         };
         let vals = &mem.data;
         let d = vals.as_ptr();
 
-        let r = _mm_load_pd(d);
+        let r = unsafe { _mm_load_pd(d) };
         assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0));
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_load_sd() {
+    const fn test_mm_load_sd() {
         let a = 1.;
         let expected = _mm_setr_pd(a, 0.);
-        let r = _mm_load_sd(&a);
+        let r = unsafe { _mm_load_sd(&a) };
         assert_eq_m128d(r, expected);
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_loadh_pd() {
+    const fn test_mm_loadh_pd() {
         let a = _mm_setr_pd(1., 2.);
         let b = 3.;
         let expected = _mm_setr_pd(_mm_cvtsd_f64(a), 3.);
-        let r = _mm_loadh_pd(a, &b);
+        let r = unsafe { _mm_loadh_pd(a, &b) };
         assert_eq_m128d(r, expected);
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_loadl_pd() {
+    const fn test_mm_loadl_pd() {
         let a = _mm_setr_pd(1., 2.);
         let b = 3.;
         let expected = _mm_setr_pd(3., get_m128d(a, 1));
-        let r = _mm_loadl_pd(a, &b);
+        let r = unsafe { _mm_loadl_pd(a, &b) };
         assert_eq_m128d(r, expected);
     }
 
@@ -4810,7 +5010,7 @@ mod tests {
     // Miri cannot support this until it is clear how it fits in the Rust memory model
     // (non-temporal store)
     #[cfg_attr(miri, ignore)]
-    unsafe fn test_mm_stream_pd() {
+    fn test_mm_stream_pd() {
         #[repr(align(128))]
         struct Memory {
             pub data: [f64; 2],
@@ -4818,7 +5018,9 @@ mod tests {
         let a = _mm_set1_pd(7.0);
         let mut mem = Memory { data: [-1.0; 2] };
 
-        _mm_stream_pd(ptr::addr_of_mut!(mem.data[0]), a);
+        unsafe {
+            _mm_stream_pd(ptr::addr_of_mut!(mem.data[0]), a);
+        }
         _mm_sfence();
         for i in 0..2 {
             assert_eq!(mem.data[i], get_m128d(a, i));
@@ -4826,183 +5028,191 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_store_sd() {
+    const fn test_mm_store_sd() {
         let mut dest = 0.;
         let a = _mm_setr_pd(1., 2.);
-        _mm_store_sd(&mut dest, a);
+        unsafe {
+            _mm_store_sd(&mut dest, a);
+        }
         assert_eq!(dest, _mm_cvtsd_f64(a));
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_store_pd() {
+    const fn test_mm_store_pd() {
         let mut mem = Memory { data: [0.0f64; 4] };
         let vals = &mut mem.data;
         let a = _mm_setr_pd(1.0, 2.0);
         let d = vals.as_mut_ptr();
 
-        _mm_store_pd(d, *black_box(&a));
+        unsafe {
+            _mm_store_pd(d, *black_box(&a));
+        }
         assert_eq!(vals[0], 1.0);
         assert_eq!(vals[1], 2.0);
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_storeu_pd() {
+    const fn test_mm_storeu_pd() {
+        // guaranteed to be aligned to 16 bytes
         let mut mem = Memory { data: [0.0f64; 4] };
         let vals = &mut mem.data;
         let a = _mm_setr_pd(1.0, 2.0);
 
-        let mut ofs = 0;
-        let mut p = vals.as_mut_ptr();
-
-        // Make sure p is **not** aligned to 16-byte boundary
-        if (p as usize) & 0xf == 0 {
-            ofs = 1;
-            p = p.add(1);
+        // so p is *not* aligned to 16 bytes
+        unsafe {
+            let p = vals.as_mut_ptr().offset(1);
+            _mm_storeu_pd(p, *black_box(&a));
         }
 
-        _mm_storeu_pd(p, *black_box(&a));
-
-        if ofs > 0 {
-            assert_eq!(vals[ofs - 1], 0.0);
-        }
-        assert_eq!(vals[ofs + 0], 1.0);
-        assert_eq!(vals[ofs + 1], 2.0);
+        assert_eq!(*vals, [0.0, 1.0, 2.0, 0.0]);
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_storeu_si16() {
+    const fn test_mm_storeu_si16() {
         let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
         let mut r = _mm_setr_epi16(9, 10, 11, 12, 13, 14, 15, 16);
-        _mm_storeu_si16(ptr::addr_of_mut!(r).cast(), a);
+        unsafe {
+            _mm_storeu_si16(ptr::addr_of_mut!(r).cast(), a);
+        }
         let e = _mm_setr_epi16(1, 10, 11, 12, 13, 14, 15, 16);
         assert_eq_m128i(r, e);
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_storeu_si32() {
+    const fn test_mm_storeu_si32() {
         let a = _mm_setr_epi32(1, 2, 3, 4);
         let mut r = _mm_setr_epi32(5, 6, 7, 8);
-        _mm_storeu_si32(ptr::addr_of_mut!(r).cast(), a);
+        unsafe {
+            _mm_storeu_si32(ptr::addr_of_mut!(r).cast(), a);
+        }
         let e = _mm_setr_epi32(1, 6, 7, 8);
         assert_eq_m128i(r, e);
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_storeu_si64() {
+    const fn test_mm_storeu_si64() {
         let a = _mm_setr_epi64x(1, 2);
         let mut r = _mm_setr_epi64x(3, 4);
-        _mm_storeu_si64(ptr::addr_of_mut!(r).cast(), a);
+        unsafe {
+            _mm_storeu_si64(ptr::addr_of_mut!(r).cast(), a);
+        }
         let e = _mm_setr_epi64x(1, 4);
         assert_eq_m128i(r, e);
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_store1_pd() {
+    const fn test_mm_store1_pd() {
         let mut mem = Memory { data: [0.0f64; 4] };
         let vals = &mut mem.data;
         let a = _mm_setr_pd(1.0, 2.0);
         let d = vals.as_mut_ptr();
 
-        _mm_store1_pd(d, *black_box(&a));
+        unsafe {
+            _mm_store1_pd(d, *black_box(&a));
+        }
         assert_eq!(vals[0], 1.0);
         assert_eq!(vals[1], 1.0);
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_store_pd1() {
+    const fn test_mm_store_pd1() {
         let mut mem = Memory { data: [0.0f64; 4] };
         let vals = &mut mem.data;
         let a = _mm_setr_pd(1.0, 2.0);
         let d = vals.as_mut_ptr();
 
-        _mm_store_pd1(d, *black_box(&a));
+        unsafe {
+            _mm_store_pd1(d, *black_box(&a));
+        }
         assert_eq!(vals[0], 1.0);
         assert_eq!(vals[1], 1.0);
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_storer_pd() {
+    const fn test_mm_storer_pd() {
         let mut mem = Memory { data: [0.0f64; 4] };
         let vals = &mut mem.data;
         let a = _mm_setr_pd(1.0, 2.0);
         let d = vals.as_mut_ptr();
 
-        _mm_storer_pd(d, *black_box(&a));
+        unsafe {
+            _mm_storer_pd(d, *black_box(&a));
+        }
         assert_eq!(vals[0], 2.0);
         assert_eq!(vals[1], 1.0);
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_storeh_pd() {
+    const fn test_mm_storeh_pd() {
         let mut dest = 0.;
         let a = _mm_setr_pd(1., 2.);
-        _mm_storeh_pd(&mut dest, a);
+        unsafe {
+            _mm_storeh_pd(&mut dest, a);
+        }
         assert_eq!(dest, get_m128d(a, 1));
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_storel_pd() {
+    const fn test_mm_storel_pd() {
         let mut dest = 0.;
         let a = _mm_setr_pd(1., 2.);
-        _mm_storel_pd(&mut dest, a);
+        unsafe {
+            _mm_storel_pd(&mut dest, a);
+        }
         assert_eq!(dest, _mm_cvtsd_f64(a));
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_loadr_pd() {
+    const fn test_mm_loadr_pd() {
         let mut mem = Memory {
             data: [1.0f64, 2.0, 3.0, 4.0],
         };
         let vals = &mut mem.data;
         let d = vals.as_ptr();
 
-        let r = _mm_loadr_pd(d);
+        let r = unsafe { _mm_loadr_pd(d) };
         assert_eq_m128d(r, _mm_setr_pd(2.0, 1.0));
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_loadu_pd() {
+    const fn test_mm_loadu_pd() {
+        // guaranteed to be aligned to 16 bytes
         let mut mem = Memory {
             data: [1.0f64, 2.0, 3.0, 4.0],
         };
         let vals = &mut mem.data;
-        let mut d = vals.as_ptr();
 
-        // make sure d is not aligned to 16-byte boundary
-        let mut offset = 0;
-        if (d as usize) & 0xf == 0 {
-            offset = 1;
-            d = d.add(offset);
-        }
+        // so this will *not* be aligned to 16 bytes
+        let d = unsafe { vals.as_ptr().offset(1) };
 
-        let r = _mm_loadu_pd(d);
-        let e = _mm_add_pd(_mm_setr_pd(1.0, 2.0), _mm_set1_pd(offset as f64));
+        let r = unsafe { _mm_loadu_pd(d) };
+        let e = _mm_setr_pd(2.0, 3.0);
         assert_eq_m128d(r, e);
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_loadu_si16() {
+    const fn test_mm_loadu_si16() {
         let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
-        let r = _mm_loadu_si16(ptr::addr_of!(a) as *const _);
+        let r = unsafe { _mm_loadu_si16(ptr::addr_of!(a) as *const _) };
         assert_eq_m128i(r, _mm_setr_epi16(1, 0, 0, 0, 0, 0, 0, 0));
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_loadu_si32() {
+    const fn test_mm_loadu_si32() {
         let a = _mm_setr_epi32(1, 2, 3, 4);
-        let r = _mm_loadu_si32(ptr::addr_of!(a) as *const _);
+        let r = unsafe { _mm_loadu_si32(ptr::addr_of!(a) as *const _) };
         assert_eq_m128i(r, _mm_setr_epi32(1, 0, 0, 0));
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_loadu_si64() {
+    const fn test_mm_loadu_si64() {
         let a = _mm_setr_epi64x(5, 6);
-        let r = _mm_loadu_si64(ptr::addr_of!(a) as *const _);
+        let r = unsafe { _mm_loadu_si64(ptr::addr_of!(a) as *const _) };
         assert_eq_m128i(r, _mm_setr_epi64x(5, 0));
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_cvtpd_ps() {
+    const fn test_mm_cvtpd_ps() {
         let r = _mm_cvtpd_ps(_mm_setr_pd(-1.0, 5.0));
         assert_eq_m128(r, _mm_setr_ps(-1.0, 5.0, 0.0, 0.0));
 
@@ -5017,7 +5227,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_cvtps_pd() {
+    const fn test_mm_cvtps_pd() {
         let r = _mm_cvtps_pd(_mm_setr_ps(-1.0, 2.0, -3.0, 5.0));
         assert_eq_m128d(r, _mm_setr_pd(-1.0, 2.0));
 
@@ -5031,7 +5241,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_cvtpd_epi32() {
+    fn test_mm_cvtpd_epi32() {
         let r = _mm_cvtpd_epi32(_mm_setr_pd(-1.0, 5.0));
         assert_eq_m128i(r, _mm_setr_epi32(-1, 5, 0, 0));
 
@@ -5049,7 +5259,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_cvtsd_si32() {
+    fn test_mm_cvtsd_si32() {
         let r = _mm_cvtsd_si32(_mm_setr_pd(-2.0, 5.0));
         assert_eq!(r, -2);
 
@@ -5061,7 +5271,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_cvtsd_ss() {
+    fn test_mm_cvtsd_ss() {
         let a = _mm_setr_ps(-1.1, -2.2, 3.3, 4.4);
         let b = _mm_setr_pd(2.0, -5.0);
 
@@ -5086,13 +5296,13 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_cvtsd_f64() {
+    const fn test_mm_cvtsd_f64() {
         let r = _mm_cvtsd_f64(_mm_setr_pd(-1.1, 2.2));
         assert_eq!(r, -1.1);
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_cvtss_sd() {
+    const fn test_mm_cvtss_sd() {
         let a = _mm_setr_pd(-1.1, 2.2);
         let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
 
@@ -5107,7 +5317,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_cvttpd_epi32() {
+    fn test_mm_cvttpd_epi32() {
         let a = _mm_setr_pd(-1.1, 2.2);
         let r = _mm_cvttpd_epi32(a);
         assert_eq_m128i(r, _mm_setr_epi32(-1, 2, 0, 0));
@@ -5118,7 +5328,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_cvttsd_si32() {
+    fn test_mm_cvttsd_si32() {
         let a = _mm_setr_pd(-1.1, 2.2);
         let r = _mm_cvttsd_si32(a);
         assert_eq!(r, -1);
@@ -5129,7 +5339,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_cvttps_epi32() {
+    fn test_mm_cvttps_epi32() {
         let a = _mm_setr_ps(-1.1, 2.2, -3.3, 6.6);
         let r = _mm_cvttps_epi32(a);
         assert_eq_m128i(r, _mm_setr_epi32(-1, 2, -3, 6));
@@ -5140,57 +5350,57 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_set_sd() {
+    const fn test_mm_set_sd() {
         let r = _mm_set_sd(-1.0_f64);
         assert_eq_m128d(r, _mm_setr_pd(-1.0_f64, 0_f64));
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_set1_pd() {
+    const fn test_mm_set1_pd() {
         let r = _mm_set1_pd(-1.0_f64);
         assert_eq_m128d(r, _mm_setr_pd(-1.0_f64, -1.0_f64));
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_set_pd1() {
+    const fn test_mm_set_pd1() {
         let r = _mm_set_pd1(-2.0_f64);
         assert_eq_m128d(r, _mm_setr_pd(-2.0_f64, -2.0_f64));
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_set_pd() {
+    const fn test_mm_set_pd() {
         let r = _mm_set_pd(1.0_f64, 5.0_f64);
         assert_eq_m128d(r, _mm_setr_pd(5.0_f64, 1.0_f64));
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_setr_pd() {
+    const fn test_mm_setr_pd() {
         let r = _mm_setr_pd(1.0_f64, -5.0_f64);
         assert_eq_m128d(r, _mm_setr_pd(1.0_f64, -5.0_f64));
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_setzero_pd() {
+    const fn test_mm_setzero_pd() {
         let r = _mm_setzero_pd();
         assert_eq_m128d(r, _mm_setr_pd(0_f64, 0_f64));
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_load1_pd() {
+    const fn test_mm_load1_pd() {
         let d = -5.0;
-        let r = _mm_load1_pd(&d);
+        let r = unsafe { _mm_load1_pd(&d) };
         assert_eq_m128d(r, _mm_setr_pd(d, d));
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_load_pd1() {
+    const fn test_mm_load_pd1() {
         let d = -5.0;
-        let r = _mm_load_pd1(&d);
+        let r = unsafe { _mm_load_pd1(&d) };
         assert_eq_m128d(r, _mm_setr_pd(d, d));
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_unpackhi_pd() {
+    const fn test_mm_unpackhi_pd() {
         let a = _mm_setr_pd(1.0, 2.0);
         let b = _mm_setr_pd(3.0, 4.0);
         let r = _mm_unpackhi_pd(a, b);
@@ -5198,7 +5408,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_unpacklo_pd() {
+    const fn test_mm_unpacklo_pd() {
         let a = _mm_setr_pd(1.0, 2.0);
         let b = _mm_setr_pd(3.0, 4.0);
         let r = _mm_unpacklo_pd(a, b);
@@ -5206,7 +5416,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_shuffle_pd() {
+    const fn test_mm_shuffle_pd() {
         let a = _mm_setr_pd(1., 2.);
         let b = _mm_setr_pd(3., 4.);
         let expected = _mm_setr_pd(1., 3.);
@@ -5215,7 +5425,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_move_sd() {
+    const fn test_mm_move_sd() {
         let a = _mm_setr_pd(1., 2.);
         let b = _mm_setr_pd(3., 4.);
         let expected = _mm_setr_pd(3., 2.);
@@ -5224,7 +5434,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_castpd_ps() {
+    const fn test_mm_castpd_ps() {
         let a = _mm_set1_pd(0.);
         let expected = _mm_set1_ps(0.);
         let r = _mm_castpd_ps(a);
@@ -5232,7 +5442,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_castpd_si128() {
+    const fn test_mm_castpd_si128() {
         let a = _mm_set1_pd(0.);
         let expected = _mm_set1_epi64x(0);
         let r = _mm_castpd_si128(a);
@@ -5240,7 +5450,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_castps_pd() {
+    const fn test_mm_castps_pd() {
         let a = _mm_set1_ps(0.);
         let expected = _mm_set1_pd(0.);
         let r = _mm_castps_pd(a);
@@ -5248,7 +5458,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_castps_si128() {
+    const fn test_mm_castps_si128() {
         let a = _mm_set1_ps(0.);
         let expected = _mm_set1_epi32(0);
         let r = _mm_castps_si128(a);
@@ -5256,7 +5466,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_castsi128_pd() {
+    const fn test_mm_castsi128_pd() {
         let a = _mm_set1_epi64x(0);
         let expected = _mm_set1_pd(0.);
         let r = _mm_castsi128_pd(a);
@@ -5264,7 +5474,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_castsi128_ps() {
+    const fn test_mm_castsi128_ps() {
         let a = _mm_set1_epi32(0);
         let expected = _mm_set1_ps(0.);
         let r = _mm_castsi128_ps(a);
diff --git a/crates/core_arch/src/x86/sse3.rs b/crates/core_arch/src/x86/sse3.rs
index 79be7a7e9b..e4c7570254 100644
--- a/crates/core_arch/src/x86/sse3.rs
+++ b/crates/core_arch/src/x86/sse3.rs
@@ -14,7 +14,8 @@ use stdarch_test::assert_instr;
 #[target_feature(enable = "sse3")]
 #[cfg_attr(test, assert_instr(addsubps))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_addsub_ps(a: __m128, b: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_addsub_ps(a: __m128, b: __m128) -> __m128 {
     unsafe {
         let a = a.as_f32x4();
         let b = b.as_f32x4();
@@ -32,7 +33,8 @@ pub fn _mm_addsub_ps(a: __m128, b: __m128) -> __m128 {
 #[target_feature(enable = "sse3")]
 #[cfg_attr(test, assert_instr(addsubpd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_addsub_pd(a: __m128d, b: __m128d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_addsub_pd(a: __m128d, b: __m128d) -> __m128d {
     unsafe {
         let a = a.as_f64x2();
         let b = b.as_f64x2();
@@ -50,7 +52,8 @@ pub fn _mm_addsub_pd(a: __m128d, b: __m128d) -> __m128d {
 #[target_feature(enable = "sse3")]
 #[cfg_attr(test, assert_instr(haddpd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_hadd_pd(a: __m128d, b: __m128d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_hadd_pd(a: __m128d, b: __m128d) -> __m128d {
     unsafe {
         let even = simd_shuffle!(a, b, [0, 2]);
         let odd = simd_shuffle!(a, b, [1, 3]);
@@ -66,7 +69,8 @@ pub fn _mm_hadd_pd(a: __m128d, b: __m128d) -> __m128d {
 #[target_feature(enable = "sse3")]
 #[cfg_attr(test, assert_instr(haddps))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_hadd_ps(a: __m128, b: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_hadd_ps(a: __m128, b: __m128) -> __m128 {
     unsafe {
         let even = simd_shuffle!(a, b, [0, 2, 4, 6]);
         let odd = simd_shuffle!(a, b, [1, 3, 5, 7]);
@@ -82,7 +86,8 @@ pub fn _mm_hadd_ps(a: __m128, b: __m128) -> __m128 {
 #[target_feature(enable = "sse3")]
 #[cfg_attr(test, assert_instr(hsubpd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_hsub_pd(a: __m128d, b: __m128d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_hsub_pd(a: __m128d, b: __m128d) -> __m128d {
     unsafe {
         let even = simd_shuffle!(a, b, [0, 2]);
         let odd = simd_shuffle!(a, b, [1, 3]);
@@ -98,7 +103,8 @@ pub fn _mm_hsub_pd(a: __m128d, b: __m128d) -> __m128d {
 #[target_feature(enable = "sse3")]
 #[cfg_attr(test, assert_instr(hsubps))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_hsub_ps(a: __m128, b: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_hsub_ps(a: __m128, b: __m128) -> __m128 {
     unsafe {
         let even = simd_shuffle!(a, b, [0, 2, 4, 6]);
         let odd = simd_shuffle!(a, b, [1, 3, 5, 7]);
@@ -127,7 +133,8 @@ pub unsafe fn _mm_lddqu_si128(mem_addr: *const __m128i) -> __m128i {
 #[target_feature(enable = "sse3")]
 #[cfg_attr(test, assert_instr(movddup))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_movedup_pd(a: __m128d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_movedup_pd(a: __m128d) -> __m128d {
     unsafe { simd_shuffle!(a, a, [0, 0]) }
 }
 
@@ -139,7 +146,8 @@ pub fn _mm_movedup_pd(a: __m128d) -> __m128d {
 #[target_feature(enable = "sse3")]
 #[cfg_attr(test, assert_instr(movddup))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm_loaddup_pd(mem_addr: *const f64) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const unsafe fn _mm_loaddup_pd(mem_addr: *const f64) -> __m128d {
     _mm_load1_pd(mem_addr)
 }
 
@@ -151,7 +159,8 @@ pub unsafe fn _mm_loaddup_pd(mem_addr: *const f64) -> __m128d {
 #[target_feature(enable = "sse3")]
 #[cfg_attr(test, assert_instr(movshdup))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_movehdup_ps(a: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_movehdup_ps(a: __m128) -> __m128 {
     unsafe { simd_shuffle!(a, a, [1, 1, 3, 3]) }
 }
 
@@ -163,7 +172,8 @@ pub fn _mm_movehdup_ps(a: __m128) -> __m128 {
 #[target_feature(enable = "sse3")]
 #[cfg_attr(test, assert_instr(movsldup))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_moveldup_ps(a: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_moveldup_ps(a: __m128) -> __m128 {
     unsafe { simd_shuffle!(a, a, [0, 0, 2, 2]) }
 }
 
@@ -175,12 +185,13 @@ unsafe extern "C" {
 
 #[cfg(test)]
 mod tests {
+    use crate::core_arch::assert_eq_const as assert_eq;
     use stdarch_test::simd_test;
 
     use crate::core_arch::x86::*;
 
     #[simd_test(enable = "sse3")]
-    unsafe fn test_mm_addsub_ps() {
+    const fn test_mm_addsub_ps() {
         let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
         let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
         let r = _mm_addsub_ps(a, b);
@@ -188,7 +199,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse3")]
-    unsafe fn test_mm_addsub_pd() {
+    const fn test_mm_addsub_pd() {
         let a = _mm_setr_pd(-1.0, 5.0);
         let b = _mm_setr_pd(-100.0, 20.0);
         let r = _mm_addsub_pd(a, b);
@@ -196,7 +207,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse3")]
-    unsafe fn test_mm_hadd_pd() {
+    const fn test_mm_hadd_pd() {
         let a = _mm_setr_pd(-1.0, 5.0);
         let b = _mm_setr_pd(-100.0, 20.0);
         let r = _mm_hadd_pd(a, b);
@@ -204,7 +215,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse3")]
-    unsafe fn test_mm_hadd_ps() {
+    const fn test_mm_hadd_ps() {
         let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
         let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
         let r = _mm_hadd_ps(a, b);
@@ -212,7 +223,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse3")]
-    unsafe fn test_mm_hsub_pd() {
+    const fn test_mm_hsub_pd() {
         let a = _mm_setr_pd(-1.0, 5.0);
         let b = _mm_setr_pd(-100.0, 20.0);
         let r = _mm_hsub_pd(a, b);
@@ -220,7 +231,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse3")]
-    unsafe fn test_mm_hsub_ps() {
+    const fn test_mm_hsub_ps() {
         let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
         let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
         let r = _mm_hsub_ps(a, b);
@@ -228,7 +239,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse3")]
-    unsafe fn test_mm_lddqu_si128() {
+    fn test_mm_lddqu_si128() {
         #[rustfmt::skip]
         let a = _mm_setr_epi8(
             1, 2, 3, 4,
@@ -236,35 +247,35 @@ mod tests {
             9, 10, 11, 12,
             13, 14, 15, 16,
         );
-        let r = _mm_lddqu_si128(&a);
+        let r = unsafe { _mm_lddqu_si128(&a) };
         assert_eq_m128i(a, r);
     }
 
     #[simd_test(enable = "sse3")]
-    unsafe fn test_mm_movedup_pd() {
+    const fn test_mm_movedup_pd() {
         let a = _mm_setr_pd(-1.0, 5.0);
         let r = _mm_movedup_pd(a);
         assert_eq_m128d(r, _mm_setr_pd(-1.0, -1.0));
     }
 
     #[simd_test(enable = "sse3")]
-    unsafe fn test_mm_movehdup_ps() {
+    const fn test_mm_movehdup_ps() {
         let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
         let r = _mm_movehdup_ps(a);
         assert_eq_m128(r, _mm_setr_ps(5.0, 5.0, -10.0, -10.0));
     }
 
     #[simd_test(enable = "sse3")]
-    unsafe fn test_mm_moveldup_ps() {
+    const fn test_mm_moveldup_ps() {
         let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
         let r = _mm_moveldup_ps(a);
         assert_eq_m128(r, _mm_setr_ps(-1.0, -1.0, 0.0, 0.0));
     }
 
     #[simd_test(enable = "sse3")]
-    unsafe fn test_mm_loaddup_pd() {
+    const fn test_mm_loaddup_pd() {
         let d = -5.0;
-        let r = _mm_loaddup_pd(&d);
+        let r = unsafe { _mm_loaddup_pd(&d) };
         assert_eq_m128d(r, _mm_setr_pd(d, d));
     }
 }
diff --git a/crates/core_arch/src/x86/sse41.rs b/crates/core_arch/src/x86/sse41.rs
index f457c74aa9..4ebf7d3bd3 100644
--- a/crates/core_arch/src/x86/sse41.rs
+++ b/crates/core_arch/src/x86/sse41.rs
@@ -59,7 +59,8 @@ pub const _MM_FROUND_NEARBYINT: i32 = _MM_FROUND_NO_EXC | _MM_FROUND_CUR_DIRECTI
 #[target_feature(enable = "sse4.1")]
 #[cfg_attr(test, assert_instr(pblendvb))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_blendv_epi8(a: __m128i, b: __m128i, mask: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_blendv_epi8(a: __m128i, b: __m128i, mask: __m128i) -> __m128i {
     unsafe {
         let mask: i8x16 = simd_lt(mask.as_i8x16(), i8x16::ZERO);
         transmute(simd_select(mask, b.as_i8x16(), a.as_i8x16()))
@@ -78,7 +79,8 @@ pub fn _mm_blendv_epi8(a: __m128i, b: __m128i, mask: __m128i) -> __m128i {
 #[cfg_attr(test, assert_instr(pblendw, IMM8 = 0xB1))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_blend_epi16<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_blend_epi16<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
     static_assert_uimm_bits!(IMM8, 8);
     unsafe {
         transmute::<i16x8, _>(simd_shuffle!(
@@ -106,7 +108,8 @@ pub fn _mm_blend_epi16<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "sse4.1")]
 #[cfg_attr(test, assert_instr(blendvpd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_blendv_pd(a: __m128d, b: __m128d, mask: __m128d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_blendv_pd(a: __m128d, b: __m128d, mask: __m128d) -> __m128d {
     unsafe {
         let mask: i64x2 = simd_lt(transmute::<_, i64x2>(mask), i64x2::ZERO);
         transmute(simd_select(mask, b.as_f64x2(), a.as_f64x2()))
@@ -121,7 +124,8 @@ pub fn _mm_blendv_pd(a: __m128d, b: __m128d, mask: __m128d) -> __m128d {
 #[target_feature(enable = "sse4.1")]
 #[cfg_attr(test, assert_instr(blendvps))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_blendv_ps(a: __m128, b: __m128, mask: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_blendv_ps(a: __m128, b: __m128, mask: __m128) -> __m128 {
     unsafe {
         let mask: i32x4 = simd_lt(transmute::<_, i32x4>(mask), i32x4::ZERO);
         transmute(simd_select(mask, b.as_f32x4(), a.as_f32x4()))
@@ -140,7 +144,8 @@ pub fn _mm_blendv_ps(a: __m128, b: __m128, mask: __m128) -> __m128 {
 #[cfg_attr(test, assert_instr(blendps, IMM2 = 0b10))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_blend_pd<const IMM2: i32>(a: __m128d, b: __m128d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_blend_pd<const IMM2: i32>(a: __m128d, b: __m128d) -> __m128d {
     static_assert_uimm_bits!(IMM2, 2);
     unsafe {
         transmute::<f64x2, _>(simd_shuffle!(
@@ -160,7 +165,8 @@ pub fn _mm_blend_pd<const IMM2: i32>(a: __m128d, b: __m128d) -> __m128d {
 #[cfg_attr(test, assert_instr(blendps, IMM4 = 0b0101))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_blend_ps<const IMM4: i32>(a: __m128, b: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_blend_ps<const IMM4: i32>(a: __m128, b: __m128) -> __m128 {
     static_assert_uimm_bits!(IMM4, 4);
     unsafe {
         transmute::<f32x4, _>(simd_shuffle!(
@@ -207,7 +213,8 @@ pub fn _mm_blend_ps<const IMM4: i32>(a: __m128, b: __m128) -> __m128 {
 #[cfg_attr(test, assert_instr(extractps, IMM8 = 0))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_extract_ps<const IMM8: i32>(a: __m128) -> i32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_extract_ps<const IMM8: i32>(a: __m128) -> i32 {
     static_assert_uimm_bits!(IMM8, 2);
     unsafe { simd_extract!(a, IMM8 as u32, f32).to_bits() as i32 }
 }
@@ -223,7 +230,8 @@ pub fn _mm_extract_ps<const IMM8: i32>(a: __m128) -> i32 {
 #[cfg_attr(test, assert_instr(pextrb, IMM8 = 0))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_extract_epi8<const IMM8: i32>(a: __m128i) -> i32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_extract_epi8<const IMM8: i32>(a: __m128i) -> i32 {
     static_assert_uimm_bits!(IMM8, 4);
     unsafe { simd_extract!(a.as_u8x16(), IMM8 as u32, u8) as i32 }
 }
@@ -236,7 +244,8 @@ pub fn _mm_extract_epi8<const IMM8: i32>(a: __m128i) -> i32 {
 #[cfg_attr(test, assert_instr(extractps, IMM8 = 1))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_extract_epi32<const IMM8: i32>(a: __m128i) -> i32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_extract_epi32<const IMM8: i32>(a: __m128i) -> i32 {
     static_assert_uimm_bits!(IMM8, 2);
     unsafe { simd_extract!(a.as_i32x4(), IMM8 as u32, i32) }
 }
@@ -284,7 +293,8 @@ pub fn _mm_insert_ps<const IMM8: i32>(a: __m128, b: __m128) -> __m128 {
 #[cfg_attr(test, assert_instr(pinsrb, IMM8 = 0))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_insert_epi8<const IMM8: i32>(a: __m128i, i: i32) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_insert_epi8<const IMM8: i32>(a: __m128i, i: i32) -> __m128i {
     static_assert_uimm_bits!(IMM8, 4);
     unsafe { transmute(simd_insert!(a.as_i8x16(), IMM8 as u32, i as i8)) }
 }
@@ -298,7 +308,8 @@ pub fn _mm_insert_epi8<const IMM8: i32>(a: __m128i, i: i32) -> __m128i {
 #[cfg_attr(test, assert_instr(pinsrd, IMM8 = 0))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_insert_epi32<const IMM8: i32>(a: __m128i, i: i32) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_insert_epi32<const IMM8: i32>(a: __m128i, i: i32) -> __m128i {
     static_assert_uimm_bits!(IMM8, 2);
     unsafe { transmute(simd_insert!(a.as_i32x4(), IMM8 as u32, i)) }
 }
@@ -311,12 +322,9 @@ pub fn _mm_insert_epi32<const IMM8: i32>(a: __m128i, i: i32) -> __m128i {
 #[target_feature(enable = "sse4.1")]
 #[cfg_attr(test, assert_instr(pmaxsb))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_max_epi8(a: __m128i, b: __m128i) -> __m128i {
-    unsafe {
-        let a = a.as_i8x16();
-        let b = b.as_i8x16();
-        transmute(simd_select::<i8x16, _>(simd_gt(a, b), a, b))
-    }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_max_epi8(a: __m128i, b: __m128i) -> __m128i {
+    unsafe { simd_imax(a.as_i8x16(), b.as_i8x16()).as_m128i() }
 }
 
 /// Compares packed unsigned 16-bit integers in `a` and `b`, and returns packed
@@ -327,12 +335,9 @@ pub fn _mm_max_epi8(a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "sse4.1")]
 #[cfg_attr(test, assert_instr(pmaxuw))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_max_epu16(a: __m128i, b: __m128i) -> __m128i {
-    unsafe {
-        let a = a.as_u16x8();
-        let b = b.as_u16x8();
-        transmute(simd_select::<i16x8, _>(simd_gt(a, b), a, b))
-    }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_max_epu16(a: __m128i, b: __m128i) -> __m128i {
+    unsafe { simd_imax(a.as_u16x8(), b.as_u16x8()).as_m128i() }
 }
 
 /// Compares packed 32-bit integers in `a` and `b`, and returns packed maximum
@@ -343,12 +348,9 @@ pub fn _mm_max_epu16(a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "sse4.1")]
 #[cfg_attr(test, assert_instr(pmaxsd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_max_epi32(a: __m128i, b: __m128i) -> __m128i {
-    unsafe {
-        let a = a.as_i32x4();
-        let b = b.as_i32x4();
-        transmute(simd_select::<i32x4, _>(simd_gt(a, b), a, b))
-    }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_max_epi32(a: __m128i, b: __m128i) -> __m128i {
+    unsafe { simd_imax(a.as_i32x4(), b.as_i32x4()).as_m128i() }
 }
 
 /// Compares packed unsigned 32-bit integers in `a` and `b`, and returns packed
@@ -359,12 +361,9 @@ pub fn _mm_max_epi32(a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "sse4.1")]
 #[cfg_attr(test, assert_instr(pmaxud))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_max_epu32(a: __m128i, b: __m128i) -> __m128i {
-    unsafe {
-        let a = a.as_u32x4();
-        let b = b.as_u32x4();
-        transmute(simd_select::<i32x4, _>(simd_gt(a, b), a, b))
-    }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_max_epu32(a: __m128i, b: __m128i) -> __m128i {
+    unsafe { simd_imax(a.as_u32x4(), b.as_u32x4()).as_m128i() }
 }
 
 /// Compares packed 8-bit integers in `a` and `b` and returns packed minimum
@@ -375,12 +374,9 @@ pub fn _mm_max_epu32(a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "sse4.1")]
 #[cfg_attr(test, assert_instr(pminsb))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_min_epi8(a: __m128i, b: __m128i) -> __m128i {
-    unsafe {
-        let a = a.as_i8x16();
-        let b = b.as_i8x16();
-        transmute(simd_select::<i8x16, _>(simd_lt(a, b), a, b))
-    }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_min_epi8(a: __m128i, b: __m128i) -> __m128i {
+    unsafe { simd_imin(a.as_i8x16(), b.as_i8x16()).as_m128i() }
 }
 
 /// Compares packed unsigned 16-bit integers in `a` and `b`, and returns packed
@@ -391,12 +387,9 @@ pub fn _mm_min_epi8(a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "sse4.1")]
 #[cfg_attr(test, assert_instr(pminuw))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_min_epu16(a: __m128i, b: __m128i) -> __m128i {
-    unsafe {
-        let a = a.as_u16x8();
-        let b = b.as_u16x8();
-        transmute(simd_select::<i16x8, _>(simd_lt(a, b), a, b))
-    }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_min_epu16(a: __m128i, b: __m128i) -> __m128i {
+    unsafe { simd_imin(a.as_u16x8(), b.as_u16x8()).as_m128i() }
 }
 
 /// Compares packed 32-bit integers in `a` and `b`, and returns packed minimum
@@ -407,12 +400,9 @@ pub fn _mm_min_epu16(a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "sse4.1")]
 #[cfg_attr(test, assert_instr(pminsd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_min_epi32(a: __m128i, b: __m128i) -> __m128i {
-    unsafe {
-        let a = a.as_i32x4();
-        let b = b.as_i32x4();
-        transmute(simd_select::<i32x4, _>(simd_lt(a, b), a, b))
-    }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_min_epi32(a: __m128i, b: __m128i) -> __m128i {
+    unsafe { simd_imin(a.as_i32x4(), b.as_i32x4()).as_m128i() }
 }
 
 /// Compares packed unsigned 32-bit integers in `a` and `b`, and returns packed
@@ -423,15 +413,12 @@ pub fn _mm_min_epi32(a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "sse4.1")]
 #[cfg_attr(test, assert_instr(pminud))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_min_epu32(a: __m128i, b: __m128i) -> __m128i {
-    unsafe {
-        let a = a.as_u32x4();
-        let b = b.as_u32x4();
-        transmute(simd_select::<i32x4, _>(simd_lt(a, b), a, b))
-    }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_min_epu32(a: __m128i, b: __m128i) -> __m128i {
+    unsafe { simd_imin(a.as_u32x4(), b.as_u32x4()).as_m128i() }
 }
 
-/// Converts packed 32-bit integers from `a` and `b` to packed 16-bit integers
+/// Converts packed signed 32-bit integers from `a` and `b` to packed 16-bit integers
 /// using unsigned saturation
 ///
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_packus_epi32)
@@ -439,8 +426,26 @@ pub fn _mm_min_epu32(a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "sse4.1")]
 #[cfg_attr(test, assert_instr(packusdw))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_packus_epi32(a: __m128i, b: __m128i) -> __m128i {
-    unsafe { transmute(packusdw(a.as_i32x4(), b.as_i32x4())) }
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_packus_epi32(a: __m128i, b: __m128i) -> __m128i {
+    unsafe {
+        let max = simd_splat(u16::MAX as i32);
+        let min = simd_splat(u16::MIN as i32);
+
+        let clamped_a = simd_imax(simd_imin(a.as_i32x4(), max), min)
+            .as_m128i()
+            .as_i16x8();
+        let clamped_b = simd_imax(simd_imin(b.as_i32x4(), max), min)
+            .as_m128i()
+            .as_i16x8();
+
+        // Shuffle the low u16 of each i32 from two concatenated vectors into
+        // the low bits of the result register.
+        const IDXS: [u32; 8] = [0, 2, 4, 6, 8, 10, 12, 14];
+        let result: i16x8 = simd_shuffle!(clamped_a, clamped_b, IDXS);
+
+        result.as_m128i()
+    }
 }
 
 /// Compares packed 64-bit integers in `a` and `b` for equality
@@ -450,7 +455,8 @@ pub fn _mm_packus_epi32(a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "sse4.1")]
 #[cfg_attr(test, assert_instr(pcmpeqq))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_cmpeq_epi64(a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cmpeq_epi64(a: __m128i, b: __m128i) -> __m128i {
     unsafe { transmute(simd_eq::<_, i64x2>(a.as_i64x2(), b.as_i64x2())) }
 }
 
@@ -461,7 +467,8 @@ pub fn _mm_cmpeq_epi64(a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "sse4.1")]
 #[cfg_attr(test, assert_instr(pmovsxbw))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_cvtepi8_epi16(a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cvtepi8_epi16(a: __m128i) -> __m128i {
     unsafe {
         let a = a.as_i8x16();
         let a: i8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
@@ -476,7 +483,8 @@ pub fn _mm_cvtepi8_epi16(a: __m128i) -> __m128i {
 #[target_feature(enable = "sse4.1")]
 #[cfg_attr(test, assert_instr(pmovsxbd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_cvtepi8_epi32(a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cvtepi8_epi32(a: __m128i) -> __m128i {
     unsafe {
         let a = a.as_i8x16();
         let a: i8x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
@@ -492,7 +500,8 @@ pub fn _mm_cvtepi8_epi32(a: __m128i) -> __m128i {
 #[target_feature(enable = "sse4.1")]
 #[cfg_attr(test, assert_instr(pmovsxbq))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_cvtepi8_epi64(a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cvtepi8_epi64(a: __m128i) -> __m128i {
     unsafe {
         let a = a.as_i8x16();
         let a: i8x2 = simd_shuffle!(a, a, [0, 1]);
@@ -507,7 +516,8 @@ pub fn _mm_cvtepi8_epi64(a: __m128i) -> __m128i {
 #[target_feature(enable = "sse4.1")]
 #[cfg_attr(test, assert_instr(pmovsxwd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_cvtepi16_epi32(a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cvtepi16_epi32(a: __m128i) -> __m128i {
     unsafe {
         let a = a.as_i16x8();
         let a: i16x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
@@ -522,7 +532,8 @@ pub fn _mm_cvtepi16_epi32(a: __m128i) -> __m128i {
 #[target_feature(enable = "sse4.1")]
 #[cfg_attr(test, assert_instr(pmovsxwq))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_cvtepi16_epi64(a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cvtepi16_epi64(a: __m128i) -> __m128i {
     unsafe {
         let a = a.as_i16x8();
         let a: i16x2 = simd_shuffle!(a, a, [0, 1]);
@@ -537,7 +548,8 @@ pub fn _mm_cvtepi16_epi64(a: __m128i) -> __m128i {
 #[target_feature(enable = "sse4.1")]
 #[cfg_attr(test, assert_instr(pmovsxdq))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_cvtepi32_epi64(a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cvtepi32_epi64(a: __m128i) -> __m128i {
     unsafe {
         let a = a.as_i32x4();
         let a: i32x2 = simd_shuffle!(a, a, [0, 1]);
@@ -552,7 +564,8 @@ pub fn _mm_cvtepi32_epi64(a: __m128i) -> __m128i {
 #[target_feature(enable = "sse4.1")]
 #[cfg_attr(test, assert_instr(pmovzxbw))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_cvtepu8_epi16(a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cvtepu8_epi16(a: __m128i) -> __m128i {
     unsafe {
         let a = a.as_u8x16();
         let a: u8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
@@ -567,7 +580,8 @@ pub fn _mm_cvtepu8_epi16(a: __m128i) -> __m128i {
 #[target_feature(enable = "sse4.1")]
 #[cfg_attr(test, assert_instr(pmovzxbd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_cvtepu8_epi32(a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cvtepu8_epi32(a: __m128i) -> __m128i {
     unsafe {
         let a = a.as_u8x16();
         let a: u8x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
@@ -582,7 +596,8 @@ pub fn _mm_cvtepu8_epi32(a: __m128i) -> __m128i {
 #[target_feature(enable = "sse4.1")]
 #[cfg_attr(test, assert_instr(pmovzxbq))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_cvtepu8_epi64(a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cvtepu8_epi64(a: __m128i) -> __m128i {
     unsafe {
         let a = a.as_u8x16();
         let a: u8x2 = simd_shuffle!(a, a, [0, 1]);
@@ -598,7 +613,8 @@ pub fn _mm_cvtepu8_epi64(a: __m128i) -> __m128i {
 #[target_feature(enable = "sse4.1")]
 #[cfg_attr(test, assert_instr(pmovzxwd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_cvtepu16_epi32(a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cvtepu16_epi32(a: __m128i) -> __m128i {
     unsafe {
         let a = a.as_u16x8();
         let a: u16x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
@@ -614,7 +630,8 @@ pub fn _mm_cvtepu16_epi32(a: __m128i) -> __m128i {
 #[target_feature(enable = "sse4.1")]
 #[cfg_attr(test, assert_instr(pmovzxwq))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_cvtepu16_epi64(a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cvtepu16_epi64(a: __m128i) -> __m128i {
     unsafe {
         let a = a.as_u16x8();
         let a: u16x2 = simd_shuffle!(a, a, [0, 1]);
@@ -630,7 +647,8 @@ pub fn _mm_cvtepu16_epi64(a: __m128i) -> __m128i {
 #[target_feature(enable = "sse4.1")]
 #[cfg_attr(test, assert_instr(pmovzxdq))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_cvtepu32_epi64(a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cvtepu32_epi64(a: __m128i) -> __m128i {
     unsafe {
         let a = a.as_u32x4();
         let a: u32x2 = simd_shuffle!(a, a, [0, 1]);
@@ -687,7 +705,8 @@ pub fn _mm_dp_ps<const IMM8: i32>(a: __m128, b: __m128) -> __m128 {
 #[target_feature(enable = "sse4.1")]
 #[cfg_attr(test, assert_instr(roundpd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_floor_pd(a: __m128d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_floor_pd(a: __m128d) -> __m128d {
     unsafe { simd_floor(a) }
 }
 
@@ -700,7 +719,8 @@ pub fn _mm_floor_pd(a: __m128d) -> __m128d {
 #[target_feature(enable = "sse4.1")]
 #[cfg_attr(test, assert_instr(roundps))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_floor_ps(a: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_floor_ps(a: __m128) -> __m128 {
     unsafe { simd_floor(a) }
 }
 
@@ -743,7 +763,8 @@ pub fn _mm_floor_ss(a: __m128, b: __m128) -> __m128 {
 #[target_feature(enable = "sse4.1")]
 #[cfg_attr(test, assert_instr(roundpd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_ceil_pd(a: __m128d) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_ceil_pd(a: __m128d) -> __m128d {
     unsafe { simd_ceil(a) }
 }
 
@@ -756,7 +777,8 @@ pub fn _mm_ceil_pd(a: __m128d) -> __m128d {
 #[target_feature(enable = "sse4.1")]
 #[cfg_attr(test, assert_instr(roundps))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_ceil_ps(a: __m128) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_ceil_ps(a: __m128) -> __m128 {
     unsafe { simd_ceil(a) }
 }
 
@@ -919,7 +941,8 @@ pub fn _mm_minpos_epu16(a: __m128i) -> __m128i {
 #[target_feature(enable = "sse4.1")]
 #[cfg_attr(test, assert_instr(pmuldq))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_mul_epi32(a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mul_epi32(a: __m128i, b: __m128i) -> __m128i {
     unsafe {
         let a = simd_cast::<_, i64x2>(simd_cast::<_, i32x2>(a.as_i64x2()));
         let b = simd_cast::<_, i64x2>(simd_cast::<_, i32x2>(b.as_i64x2()));
@@ -939,7 +962,8 @@ pub fn _mm_mul_epi32(a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "sse4.1")]
 #[cfg_attr(test, assert_instr(pmulld))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_mullo_epi32(a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_mullo_epi32(a: __m128i, b: __m128i) -> __m128i {
     unsafe { transmute(simd_mul(a.as_i32x4(), b.as_i32x4())) }
 }
 
@@ -1005,7 +1029,8 @@ pub fn _mm_mpsadbw_epu8<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "sse4.1")]
 #[cfg_attr(test, assert_instr(ptest))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_testz_si128(a: __m128i, mask: __m128i) -> i32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_testz_si128(a: __m128i, mask: __m128i) -> i32 {
     unsafe {
         let r = simd_reduce_or(simd_and(a.as_i64x2(), mask.as_i64x2()));
         (0i64 == r) as i32
@@ -1031,7 +1056,8 @@ pub fn _mm_testz_si128(a: __m128i, mask: __m128i) -> i32 {
 #[target_feature(enable = "sse4.1")]
 #[cfg_attr(test, assert_instr(ptest))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_testc_si128(a: __m128i, mask: __m128i) -> i32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_testc_si128(a: __m128i, mask: __m128i) -> i32 {
     unsafe {
         let r = simd_reduce_or(simd_and(
             simd_xor(a.as_i64x2(), i64x2::splat(!0)),
@@ -1083,7 +1109,8 @@ pub fn _mm_testnzc_si128(a: __m128i, mask: __m128i) -> i32 {
 #[target_feature(enable = "sse4.1")]
 #[cfg_attr(test, assert_instr(ptest))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_test_all_zeros(a: __m128i, mask: __m128i) -> i32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_test_all_zeros(a: __m128i, mask: __m128i) -> i32 {
     _mm_testz_si128(a, mask)
 }
 
@@ -1105,7 +1132,8 @@ pub fn _mm_test_all_zeros(a: __m128i, mask: __m128i) -> i32 {
 #[cfg_attr(test, assert_instr(pcmpeqd))]
 #[cfg_attr(test, assert_instr(ptest))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_test_all_ones(a: __m128i) -> i32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_test_all_ones(a: __m128i) -> i32 {
     _mm_testc_si128(a, _mm_cmpeq_epi32(a, a))
 }
 
@@ -1156,8 +1184,6 @@ pub unsafe fn _mm_stream_load_si128(mem_addr: *const __m128i) -> __m128i {
 unsafe extern "C" {
     #[link_name = "llvm.x86.sse41.insertps"]
     fn insertps(a: __m128, b: __m128, imm8: u8) -> __m128;
-    #[link_name = "llvm.x86.sse41.packusdw"]
-    fn packusdw(a: i32x4, b: i32x4) -> u16x8;
     #[link_name = "llvm.x86.sse41.dppd"]
     fn dppd(a: __m128d, b: __m128d, imm8: u8) -> __m128d;
     #[link_name = "llvm.x86.sse41.dpps"]
@@ -1180,12 +1206,13 @@ unsafe extern "C" {
 
 #[cfg(test)]
 mod tests {
+    use crate::core_arch::assert_eq_const as assert_eq;
     use crate::core_arch::x86::*;
     use std::mem;
     use stdarch_test::simd_test;
 
     #[simd_test(enable = "sse4.1")]
-    unsafe fn test_mm_blendv_epi8() {
+    const fn test_mm_blendv_epi8() {
         #[rustfmt::skip]
         let a = _mm_setr_epi8(
             0, 1, 2, 3, 4, 5, 6, 7,
@@ -1208,27 +1235,27 @@ mod tests {
     }
 
     #[simd_test(enable = "sse4.1")]
-    unsafe fn test_mm_blendv_pd() {
+    const fn test_mm_blendv_pd() {
         let a = _mm_set1_pd(0.0);
         let b = _mm_set1_pd(1.0);
-        let mask = transmute(_mm_setr_epi64x(0, -1));
+        let mask = _mm_castsi128_pd(_mm_setr_epi64x(0, -1));
         let r = _mm_blendv_pd(a, b, mask);
         let e = _mm_setr_pd(0.0, 1.0);
         assert_eq_m128d(r, e);
     }
 
     #[simd_test(enable = "sse4.1")]
-    unsafe fn test_mm_blendv_ps() {
+    const fn test_mm_blendv_ps() {
         let a = _mm_set1_ps(0.0);
         let b = _mm_set1_ps(1.0);
-        let mask = transmute(_mm_setr_epi32(0, -1, 0, -1));
+        let mask = _mm_castsi128_ps(_mm_setr_epi32(0, -1, 0, -1));
         let r = _mm_blendv_ps(a, b, mask);
         let e = _mm_setr_ps(0.0, 1.0, 0.0, 1.0);
         assert_eq_m128(r, e);
     }
 
     #[simd_test(enable = "sse4.1")]
-    unsafe fn test_mm_blend_pd() {
+    const fn test_mm_blend_pd() {
         let a = _mm_set1_pd(0.0);
         let b = _mm_set1_pd(1.0);
         let r = _mm_blend_pd::<0b10>(a, b);
@@ -1237,7 +1264,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse4.1")]
-    unsafe fn test_mm_blend_ps() {
+    const fn test_mm_blend_ps() {
         let a = _mm_set1_ps(0.0);
         let b = _mm_set1_ps(1.0);
         let r = _mm_blend_ps::<0b1010>(a, b);
@@ -1246,7 +1273,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse4.1")]
-    unsafe fn test_mm_blend_epi16() {
+    const fn test_mm_blend_epi16() {
         let a = _mm_set1_epi16(0);
         let b = _mm_set1_epi16(1);
         let r = _mm_blend_epi16::<0b1010_1100>(a, b);
@@ -1255,7 +1282,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse4.1")]
-    unsafe fn test_mm_extract_ps() {
+    const fn test_mm_extract_ps() {
         let a = _mm_setr_ps(0.0, 1.0, 2.0, 3.0);
         let r: f32 = f32::from_bits(_mm_extract_ps::<1>(a) as u32);
         assert_eq!(r, 1.0);
@@ -1264,7 +1291,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse4.1")]
-    unsafe fn test_mm_extract_epi8() {
+    const fn test_mm_extract_epi8() {
         #[rustfmt::skip]
         let a = _mm_setr_epi8(
             -1, 1, 2, 3, 4, 5, 6, 7,
@@ -1277,7 +1304,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse4.1")]
-    unsafe fn test_mm_extract_epi32() {
+    const fn test_mm_extract_epi32() {
         let a = _mm_setr_epi32(0, 1, 2, 3);
         let r = _mm_extract_epi32::<1>(a);
         assert_eq!(r, 1);
@@ -1286,7 +1313,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse4.1")]
-    unsafe fn test_mm_insert_ps() {
+    fn test_mm_insert_ps() {
         let a = _mm_set1_ps(1.0);
         let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
         let r = _mm_insert_ps::<0b11_00_1100>(a, b);
@@ -1302,7 +1329,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse4.1")]
-    unsafe fn test_mm_insert_epi8() {
+    const fn test_mm_insert_epi8() {
         let a = _mm_set1_epi8(0);
         let e = _mm_setr_epi8(0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
         let r = _mm_insert_epi8::<1>(a, 32);
@@ -1313,7 +1340,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse4.1")]
-    unsafe fn test_mm_insert_epi32() {
+    const fn test_mm_insert_epi32() {
         let a = _mm_set1_epi32(0);
         let e = _mm_setr_epi32(0, 32, 0, 0);
         let r = _mm_insert_epi32::<1>(a, 32);
@@ -1324,7 +1351,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse4.1")]
-    unsafe fn test_mm_max_epi8() {
+    const fn test_mm_max_epi8() {
         #[rustfmt::skip]
         let a = _mm_setr_epi8(
             1, 4, 5, 8, 9, 12, 13, 16,
@@ -1345,7 +1372,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse4.1")]
-    unsafe fn test_mm_max_epu16() {
+    const fn test_mm_max_epu16() {
         let a = _mm_setr_epi16(1, 4, 5, 8, 9, 12, 13, 16);
         let b = _mm_setr_epi16(2, 3, 6, 7, 10, 11, 14, 15);
         let r = _mm_max_epu16(a, b);
@@ -1354,7 +1381,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse4.1")]
-    unsafe fn test_mm_max_epi32() {
+    const fn test_mm_max_epi32() {
         let a = _mm_setr_epi32(1, 4, 5, 8);
         let b = _mm_setr_epi32(2, 3, 6, 7);
         let r = _mm_max_epi32(a, b);
@@ -1363,7 +1390,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse4.1")]
-    unsafe fn test_mm_max_epu32() {
+    const fn test_mm_max_epu32() {
         let a = _mm_setr_epi32(1, 4, 5, 8);
         let b = _mm_setr_epi32(2, 3, 6, 7);
         let r = _mm_max_epu32(a, b);
@@ -1372,7 +1399,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse4.1")]
-    unsafe fn test_mm_min_epi8() {
+    const fn test_mm_min_epi8() {
         #[rustfmt::skip]
         let a = _mm_setr_epi8(
             1, 4, 5, 8, 9, 12, 13, 16,
@@ -1411,7 +1438,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse4.1")]
-    unsafe fn test_mm_min_epu16() {
+    const fn test_mm_min_epu16() {
         let a = _mm_setr_epi16(1, 4, 5, 8, 9, 12, 13, 16);
         let b = _mm_setr_epi16(2, 3, 6, 7, 10, 11, 14, 15);
         let r = _mm_min_epu16(a, b);
@@ -1420,7 +1447,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse4.1")]
-    unsafe fn test_mm_min_epi32() {
+    const fn test_mm_min_epi32() {
         let a = _mm_setr_epi32(1, 4, 5, 8);
         let b = _mm_setr_epi32(2, 3, 6, 7);
         let r = _mm_min_epi32(a, b);
@@ -1435,7 +1462,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse4.1")]
-    unsafe fn test_mm_min_epu32() {
+    const fn test_mm_min_epu32() {
         let a = _mm_setr_epi32(1, 4, 5, 8);
         let b = _mm_setr_epi32(2, 3, 6, 7);
         let r = _mm_min_epu32(a, b);
@@ -1444,7 +1471,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse4.1")]
-    unsafe fn test_mm_packus_epi32() {
+    const fn test_mm_packus_epi32() {
         let a = _mm_setr_epi32(1, 2, 3, 4);
         let b = _mm_setr_epi32(-1, -2, -3, -4);
         let r = _mm_packus_epi32(a, b);
@@ -1453,7 +1480,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse4.1")]
-    unsafe fn test_mm_cmpeq_epi64() {
+    const fn test_mm_cmpeq_epi64() {
         let a = _mm_setr_epi64x(0, 1);
         let b = _mm_setr_epi64x(0, 0);
         let r = _mm_cmpeq_epi64(a, b);
@@ -1462,7 +1489,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse4.1")]
-    unsafe fn test_mm_cvtepi8_epi16() {
+    const fn test_mm_cvtepi8_epi16() {
         let a = _mm_set1_epi8(10);
         let r = _mm_cvtepi8_epi16(a);
         let e = _mm_set1_epi16(10);
@@ -1474,7 +1501,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse4.1")]
-    unsafe fn test_mm_cvtepi8_epi32() {
+    const fn test_mm_cvtepi8_epi32() {
         let a = _mm_set1_epi8(10);
         let r = _mm_cvtepi8_epi32(a);
         let e = _mm_set1_epi32(10);
@@ -1486,7 +1513,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse4.1")]
-    unsafe fn test_mm_cvtepi8_epi64() {
+    const fn test_mm_cvtepi8_epi64() {
         let a = _mm_set1_epi8(10);
         let r = _mm_cvtepi8_epi64(a);
         let e = _mm_set1_epi64x(10);
@@ -1498,7 +1525,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse4.1")]
-    unsafe fn test_mm_cvtepi16_epi32() {
+    const fn test_mm_cvtepi16_epi32() {
         let a = _mm_set1_epi16(10);
         let r = _mm_cvtepi16_epi32(a);
         let e = _mm_set1_epi32(10);
@@ -1510,7 +1537,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse4.1")]
-    unsafe fn test_mm_cvtepi16_epi64() {
+    const fn test_mm_cvtepi16_epi64() {
         let a = _mm_set1_epi16(10);
         let r = _mm_cvtepi16_epi64(a);
         let e = _mm_set1_epi64x(10);
@@ -1522,7 +1549,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse4.1")]
-    unsafe fn test_mm_cvtepi32_epi64() {
+    const fn test_mm_cvtepi32_epi64() {
         let a = _mm_set1_epi32(10);
         let r = _mm_cvtepi32_epi64(a);
         let e = _mm_set1_epi64x(10);
@@ -1534,7 +1561,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse4.1")]
-    unsafe fn test_mm_cvtepu8_epi16() {
+    const fn test_mm_cvtepu8_epi16() {
         let a = _mm_set1_epi8(10);
         let r = _mm_cvtepu8_epi16(a);
         let e = _mm_set1_epi16(10);
@@ -1542,7 +1569,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse4.1")]
-    unsafe fn test_mm_cvtepu8_epi32() {
+    const fn test_mm_cvtepu8_epi32() {
         let a = _mm_set1_epi8(10);
         let r = _mm_cvtepu8_epi32(a);
         let e = _mm_set1_epi32(10);
@@ -1550,7 +1577,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse4.1")]
-    unsafe fn test_mm_cvtepu8_epi64() {
+    const fn test_mm_cvtepu8_epi64() {
         let a = _mm_set1_epi8(10);
         let r = _mm_cvtepu8_epi64(a);
         let e = _mm_set1_epi64x(10);
@@ -1558,7 +1585,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse4.1")]
-    unsafe fn test_mm_cvtepu16_epi32() {
+    const fn test_mm_cvtepu16_epi32() {
         let a = _mm_set1_epi16(10);
         let r = _mm_cvtepu16_epi32(a);
         let e = _mm_set1_epi32(10);
@@ -1566,7 +1593,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse4.1")]
-    unsafe fn test_mm_cvtepu16_epi64() {
+    const fn test_mm_cvtepu16_epi64() {
         let a = _mm_set1_epi16(10);
         let r = _mm_cvtepu16_epi64(a);
         let e = _mm_set1_epi64x(10);
@@ -1574,7 +1601,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse4.1")]
-    unsafe fn test_mm_cvtepu32_epi64() {
+    const fn test_mm_cvtepu32_epi64() {
         let a = _mm_set1_epi32(10);
         let r = _mm_cvtepu32_epi64(a);
         let e = _mm_set1_epi64x(10);
@@ -1582,7 +1609,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse4.1")]
-    unsafe fn test_mm_dp_pd() {
+    fn test_mm_dp_pd() {
         let a = _mm_setr_pd(2.0, 3.0);
         let b = _mm_setr_pd(1.0, 4.0);
         let e = _mm_setr_pd(14.0, 0.0);
@@ -1590,7 +1617,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse4.1")]
-    unsafe fn test_mm_dp_ps() {
+    fn test_mm_dp_ps() {
         let a = _mm_setr_ps(2.0, 3.0, 1.0, 10.0);
         let b = _mm_setr_ps(1.0, 4.0, 0.5, 10.0);
         let e = _mm_setr_ps(14.5, 0.0, 14.5, 0.0);
@@ -1598,7 +1625,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse4.1")]
-    unsafe fn test_mm_floor_pd() {
+    const fn test_mm_floor_pd() {
         let a = _mm_setr_pd(2.5, 4.5);
         let r = _mm_floor_pd(a);
         let e = _mm_setr_pd(2.0, 4.0);
@@ -1606,7 +1633,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse4.1")]
-    unsafe fn test_mm_floor_ps() {
+    const fn test_mm_floor_ps() {
         let a = _mm_setr_ps(2.5, 4.5, 8.5, 16.5);
         let r = _mm_floor_ps(a);
         let e = _mm_setr_ps(2.0, 4.0, 8.0, 16.0);
@@ -1614,7 +1641,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse4.1")]
-    unsafe fn test_mm_floor_sd() {
+    fn test_mm_floor_sd() {
         let a = _mm_setr_pd(2.5, 4.5);
         let b = _mm_setr_pd(-1.5, -3.5);
         let r = _mm_floor_sd(a, b);
@@ -1623,7 +1650,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse4.1")]
-    unsafe fn test_mm_floor_ss() {
+    fn test_mm_floor_ss() {
         let a = _mm_setr_ps(2.5, 4.5, 8.5, 16.5);
         let b = _mm_setr_ps(-1.5, -3.5, -7.5, -15.5);
         let r = _mm_floor_ss(a, b);
@@ -1632,7 +1659,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse4.1")]
-    unsafe fn test_mm_ceil_pd() {
+    const fn test_mm_ceil_pd() {
         let a = _mm_setr_pd(1.5, 3.5);
         let r = _mm_ceil_pd(a);
         let e = _mm_setr_pd(2.0, 4.0);
@@ -1640,7 +1667,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse4.1")]
-    unsafe fn test_mm_ceil_ps() {
+    const fn test_mm_ceil_ps() {
         let a = _mm_setr_ps(1.5, 3.5, 7.5, 15.5);
         let r = _mm_ceil_ps(a);
         let e = _mm_setr_ps(2.0, 4.0, 8.0, 16.0);
@@ -1648,7 +1675,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse4.1")]
-    unsafe fn test_mm_ceil_sd() {
+    fn test_mm_ceil_sd() {
         let a = _mm_setr_pd(1.5, 3.5);
         let b = _mm_setr_pd(-2.5, -4.5);
         let r = _mm_ceil_sd(a, b);
@@ -1657,7 +1684,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse4.1")]
-    unsafe fn test_mm_ceil_ss() {
+    fn test_mm_ceil_ss() {
         let a = _mm_setr_ps(1.5, 3.5, 7.5, 15.5);
         let b = _mm_setr_ps(-2.5, -4.5, -8.5, -16.5);
         let r = _mm_ceil_ss(a, b);
@@ -1666,7 +1693,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse4.1")]
-    unsafe fn test_mm_round_pd() {
+    fn test_mm_round_pd() {
         let a = _mm_setr_pd(1.25, 3.75);
         let r = _mm_round_pd::<_MM_FROUND_TO_NEAREST_INT>(a);
         let e = _mm_setr_pd(1.0, 4.0);
@@ -1674,7 +1701,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse4.1")]
-    unsafe fn test_mm_round_ps() {
+    fn test_mm_round_ps() {
         let a = _mm_setr_ps(2.25, 4.75, -1.75, -4.25);
         let r = _mm_round_ps::<_MM_FROUND_TO_ZERO>(a);
         let e = _mm_setr_ps(2.0, 4.0, -1.0, -4.0);
@@ -1682,7 +1709,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse4.1")]
-    unsafe fn test_mm_round_sd() {
+    fn test_mm_round_sd() {
         let a = _mm_setr_pd(1.5, 3.5);
         let b = _mm_setr_pd(-2.5, -4.5);
         let r = _mm_round_sd::<_MM_FROUND_TO_NEAREST_INT>(a, b);
@@ -1709,7 +1736,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse4.1")]
-    unsafe fn test_mm_round_ss() {
+    fn test_mm_round_ss() {
         let a = _mm_setr_ps(1.5, 3.5, 7.5, 15.5);
         let b = _mm_setr_ps(-1.75, -4.5, -8.5, -16.5);
         let r = _mm_round_ss::<_MM_FROUND_TO_NEAREST_INT>(a, b);
@@ -1736,7 +1763,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse4.1")]
-    unsafe fn test_mm_minpos_epu16_1() {
+    fn test_mm_minpos_epu16_1() {
         let a = _mm_setr_epi16(23, 18, 44, 97, 50, 13, 67, 66);
         let r = _mm_minpos_epu16(a);
         let e = _mm_setr_epi16(13, 5, 0, 0, 0, 0, 0, 0);
@@ -1744,7 +1771,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse4.1")]
-    unsafe fn test_mm_minpos_epu16_2() {
+    fn test_mm_minpos_epu16_2() {
         let a = _mm_setr_epi16(0, 18, 44, 97, 50, 13, 67, 66);
         let r = _mm_minpos_epu16(a);
         let e = _mm_setr_epi16(0, 0, 0, 0, 0, 0, 0, 0);
@@ -1752,7 +1779,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse4.1")]
-    unsafe fn test_mm_minpos_epu16_3() {
+    fn test_mm_minpos_epu16_3() {
         // Case where the minimum value is repeated
         let a = _mm_setr_epi16(23, 18, 44, 97, 50, 13, 67, 13);
         let r = _mm_minpos_epu16(a);
@@ -1761,7 +1788,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse4.1")]
-    unsafe fn test_mm_mul_epi32() {
+    const fn test_mm_mul_epi32() {
         {
             let a = _mm_setr_epi32(1, 1, 1, 1);
             let b = _mm_setr_epi32(1, 2, 3, 4);
@@ -1782,7 +1809,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse4.1")]
-    unsafe fn test_mm_mullo_epi32() {
+    const fn test_mm_mullo_epi32() {
         {
             let a = _mm_setr_epi32(1, 1, 1, 1);
             let b = _mm_setr_epi32(1, 2, 3, 4);
@@ -1803,7 +1830,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse4.1")]
-    unsafe fn test_mm_minpos_epu16() {
+    fn test_mm_minpos_epu16() {
         let a = _mm_setr_epi16(8, 7, 6, 5, 4, 1, 2, 3);
         let r = _mm_minpos_epu16(a);
         let e = _mm_setr_epi16(1, 5, 0, 0, 0, 0, 0, 0);
@@ -1811,7 +1838,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse4.1")]
-    unsafe fn test_mm_mpsadbw_epu8() {
+    fn test_mm_mpsadbw_epu8() {
         #[rustfmt::skip]
         let a = _mm_setr_epi8(
             0, 1, 2, 3, 4, 5, 6, 7,
@@ -1840,7 +1867,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse4.1")]
-    unsafe fn test_mm_testz_si128() {
+    const fn test_mm_testz_si128() {
         let a = _mm_set1_epi8(1);
         let mask = _mm_set1_epi8(0);
         let r = _mm_testz_si128(a, mask);
@@ -1856,7 +1883,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse4.1")]
-    unsafe fn test_mm_testc_si128() {
+    const fn test_mm_testc_si128() {
         let a = _mm_set1_epi8(-1);
         let mask = _mm_set1_epi8(0);
         let r = _mm_testc_si128(a, mask);
@@ -1872,7 +1899,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse4.1")]
-    unsafe fn test_mm_testnzc_si128() {
+    fn test_mm_testnzc_si128() {
         let a = _mm_set1_epi8(0);
         let mask = _mm_set1_epi8(1);
         let r = _mm_testnzc_si128(a, mask);
@@ -1892,7 +1919,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse4.1")]
-    unsafe fn test_mm_test_all_zeros() {
+    const fn test_mm_test_all_zeros() {
         let a = _mm_set1_epi8(1);
         let mask = _mm_set1_epi8(0);
         let r = _mm_test_all_zeros(a, mask);
@@ -1908,7 +1935,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse4.1")]
-    unsafe fn test_mm_test_all_ones() {
+    const fn test_mm_test_all_ones() {
         let a = _mm_set1_epi8(-1);
         let r = _mm_test_all_ones(a);
         assert_eq!(r, 1);
@@ -1918,7 +1945,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse4.1")]
-    unsafe fn test_mm_test_mix_ones_zeros() {
+    fn test_mm_test_mix_ones_zeros() {
         let a = _mm_set1_epi8(0);
         let mask = _mm_set1_epi8(1);
         let r = _mm_test_mix_ones_zeros(a, mask);
@@ -1938,9 +1965,9 @@ mod tests {
     }
 
     #[simd_test(enable = "sse4.1")]
-    unsafe fn test_mm_stream_load_si128() {
+    fn test_mm_stream_load_si128() {
         let a = _mm_set_epi64x(5, 6);
-        let r = _mm_stream_load_si128(core::ptr::addr_of!(a) as *const _);
+        let r = unsafe { _mm_stream_load_si128(core::ptr::addr_of!(a) as *const _) };
         assert_eq_m128i(a, r);
     }
 }
diff --git a/crates/core_arch/src/x86/sse42.rs b/crates/core_arch/src/x86/sse42.rs
index 83c51f2b70..55e2259263 100644
--- a/crates/core_arch/src/x86/sse42.rs
+++ b/crates/core_arch/src/x86/sse42.rs
@@ -563,7 +563,8 @@ pub fn _mm_crc32_u32(crc: u32, v: u32) -> u32 {
 #[target_feature(enable = "sse4.2")]
 #[cfg_attr(test, assert_instr(pcmpgtq))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_cmpgt_epi64(a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cmpgt_epi64(a: __m128i, b: __m128i) -> __m128i {
     unsafe { transmute(simd_gt::<_, i64x2>(a.as_i64x2(), b.as_i64x2())) }
 }
 
@@ -609,26 +610,27 @@ unsafe extern "C" {
 
 #[cfg(test)]
 mod tests {
+    use crate::core_arch::assert_eq_const as assert_eq;
     use stdarch_test::simd_test;
 
+    use crate::core_arch::simd::*;
     use crate::core_arch::x86::*;
     use std::ptr;
 
     // Currently one cannot `load` a &[u8] that is less than 16
     // in length. This makes loading strings less than 16 in length
     // a bit difficult. Rather than `load` and mutate the __m128i,
-    // it is easier to memcpy the given string to a local slice with
-    // length 16 and `load` the local slice.
-    #[target_feature(enable = "sse4.2")]
-    unsafe fn str_to_m128i(s: &[u8]) -> __m128i {
+    // it is easier to memcpy the given string to a zero-padded
+    // 16-byte array and transmute it to `__m128i`.
+    fn str_to_m128i(s: &[u8]) -> __m128i {
         assert!(s.len() <= 16);
-        let slice = &mut [0u8; 16];
-        ptr::copy_nonoverlapping(s.as_ptr(), slice.as_mut_ptr(), s.len());
-        _mm_loadu_si128(slice.as_ptr() as *const _)
+        let mut array = [0u8; 16];
+        array[..s.len()].copy_from_slice(s);
+        u8x16::from_array(array).as_m128i()
     }
 
     #[simd_test(enable = "sse4.2")]
-    unsafe fn test_mm_cmpistrm() {
+    fn test_mm_cmpistrm() {
         let a = str_to_m128i(b"Hello! Good-Bye!");
         let b = str_to_m128i(b"hello! good-bye!");
         let i = _mm_cmpistrm::<_SIDD_UNIT_MASK>(a, b);
@@ -641,7 +643,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse4.2")]
-    unsafe fn test_mm_cmpistri() {
+    fn test_mm_cmpistri() {
         let a = str_to_m128i(b"Hello");
         let b = str_to_m128i(b"   Hello        ");
         let i = _mm_cmpistri::<_SIDD_CMP_EQUAL_ORDERED>(a, b);
@@ -649,7 +651,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse4.2")]
-    unsafe fn test_mm_cmpistrz() {
+    fn test_mm_cmpistrz() {
         let a = str_to_m128i(b"");
         let b = str_to_m128i(b"Hello");
         let i = _mm_cmpistrz::<_SIDD_CMP_EQUAL_ORDERED>(a, b);
@@ -657,7 +659,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse4.2")]
-    unsafe fn test_mm_cmpistrc() {
+    fn test_mm_cmpistrc() {
         let a = str_to_m128i(b"                ");
         let b = str_to_m128i(b"       !        ");
         let i = _mm_cmpistrc::<_SIDD_UNIT_MASK>(a, b);
@@ -665,7 +667,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse4.2")]
-    unsafe fn test_mm_cmpistrs() {
+    fn test_mm_cmpistrs() {
         let a = str_to_m128i(b"Hello");
         let b = str_to_m128i(b"");
         let i = _mm_cmpistrs::<_SIDD_CMP_EQUAL_ORDERED>(a, b);
@@ -673,7 +675,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse4.2")]
-    unsafe fn test_mm_cmpistro() {
+    fn test_mm_cmpistro() {
         #[rustfmt::skip]
         let a_bytes = _mm_setr_epi8(
             0x00, 0x47, 0x00, 0x65, 0x00, 0x6c, 0x00, 0x6c,
@@ -691,7 +693,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse4.2")]
-    unsafe fn test_mm_cmpistra() {
+    fn test_mm_cmpistra() {
         let a = str_to_m128i(b"");
         let b = str_to_m128i(b"Hello!!!!!!!!!!!");
         let i = _mm_cmpistra::<_SIDD_UNIT_MASK>(a, b);
@@ -699,7 +701,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse4.2")]
-    unsafe fn test_mm_cmpestrm() {
+    fn test_mm_cmpestrm() {
         let a = str_to_m128i(b"Hello!");
         let b = str_to_m128i(b"Hello.");
         let i = _mm_cmpestrm::<_SIDD_UNIT_MASK>(a, 5, b, 5);
@@ -712,7 +714,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse4.2")]
-    unsafe fn test_mm_cmpestri() {
+    fn test_mm_cmpestri() {
         let a = str_to_m128i(b"bar - garbage");
         let b = str_to_m128i(b"foobar");
         let i = _mm_cmpestri::<_SIDD_CMP_EQUAL_ORDERED>(a, 3, b, 6);
@@ -720,7 +722,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse4.2")]
-    unsafe fn test_mm_cmpestrz() {
+    fn test_mm_cmpestrz() {
         let a = str_to_m128i(b"");
         let b = str_to_m128i(b"Hello");
         let i = _mm_cmpestrz::<_SIDD_CMP_EQUAL_ORDERED>(a, 16, b, 6);
@@ -728,7 +730,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse4.2")]
-    unsafe fn test_mm_cmpestrc() {
+    fn test_mm_cmpestrc() {
         let va = str_to_m128i(b"!!!!!!!!");
         let vb = str_to_m128i(b"        ");
         let i = _mm_cmpestrc::<_SIDD_UNIT_MASK>(va, 7, vb, 7);
@@ -736,7 +738,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse4.2")]
-    unsafe fn test_mm_cmpestrs() {
+    fn test_mm_cmpestrs() {
         #[rustfmt::skip]
         let a_bytes = _mm_setr_epi8(
             0x00, 0x48, 0x00, 0x65, 0x00, 0x6c, 0x00, 0x6c,
@@ -749,7 +751,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse4.2")]
-    unsafe fn test_mm_cmpestro() {
+    fn test_mm_cmpestro() {
         let a = str_to_m128i(b"Hello");
         let b = str_to_m128i(b"World");
         let i = _mm_cmpestro::<_SIDD_UBYTE_OPS>(a, 5, b, 5);
@@ -757,7 +759,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse4.2")]
-    unsafe fn test_mm_cmpestra() {
+    fn test_mm_cmpestra() {
         let a = str_to_m128i(b"Cannot match a");
         let b = str_to_m128i(b"Null after 14");
         let i = _mm_cmpestra::<{ _SIDD_CMP_EQUAL_EACH | _SIDD_UNIT_MASK }>(a, 14, b, 16);
@@ -765,7 +767,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse4.2")]
-    unsafe fn test_mm_crc32_u8() {
+    fn test_mm_crc32_u8() {
         let crc = 0x2aa1e72b;
         let v = 0x2a;
         let i = _mm_crc32_u8(crc, v);
@@ -773,7 +775,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse4.2")]
-    unsafe fn test_mm_crc32_u16() {
+    fn test_mm_crc32_u16() {
         let crc = 0x8ecec3b5;
         let v = 0x22b;
         let i = _mm_crc32_u16(crc, v);
@@ -781,7 +783,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse4.2")]
-    unsafe fn test_mm_crc32_u32() {
+    fn test_mm_crc32_u32() {
         let crc = 0xae2912c8;
         let v = 0x845fed;
         let i = _mm_crc32_u32(crc, v);
@@ -789,7 +791,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse4.2")]
-    unsafe fn test_mm_cmpgt_epi64() {
+    const fn test_mm_cmpgt_epi64() {
         let a = _mm_setr_epi64x(0, 0x2a);
         let b = _mm_set1_epi64x(0x00);
         let i = _mm_cmpgt_epi64(a, b);
diff --git a/crates/core_arch/src/x86/sse4a.rs b/crates/core_arch/src/x86/sse4a.rs
index 7978d018e4..f36b879a03 100644
--- a/crates/core_arch/src/x86/sse4a.rs
+++ b/crates/core_arch/src/x86/sse4a.rs
@@ -151,7 +151,7 @@ mod tests {
     use stdarch_test::simd_test;
 
     #[simd_test(enable = "sse4a")]
-    unsafe fn test_mm_extract_si64() {
+    fn test_mm_extract_si64() {
         let b = 0b0110_0000_0000_i64;
         //        ^^^^ bit range extracted
         let x = _mm_setr_epi64x(b, 0);
@@ -164,7 +164,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse4a")]
-    unsafe fn test_mm_extracti_si64() {
+    fn test_mm_extracti_si64() {
         let a = _mm_setr_epi64x(0x0123456789abcdef, 0);
         let r = _mm_extracti_si64::<8, 8>(a);
         let e = _mm_setr_epi64x(0xcd, 0);
@@ -172,7 +172,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse4a")]
-    unsafe fn test_mm_insert_si64() {
+    fn test_mm_insert_si64() {
         let i = 0b0110_i64;
         //        ^^^^ bit range inserted
         let z = 0b1010_1010_1010i64;
@@ -189,7 +189,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse4a")]
-    unsafe fn test_mm_inserti_si64() {
+    fn test_mm_inserti_si64() {
         let a = _mm_setr_epi64x(0x0123456789abcdef, 0);
         let b = _mm_setr_epi64x(0x0011223344556677, 0);
         let r = _mm_inserti_si64::<8, 8>(a, b);
@@ -206,7 +206,7 @@ mod tests {
     // Miri cannot support this until it is clear how it fits in the Rust memory model
     // (non-temporal store)
     #[cfg_attr(miri, ignore)]
-    unsafe fn test_mm_stream_sd() {
+    fn test_mm_stream_sd() {
         let mut mem = MemoryF64 {
             data: [1.0_f64, 2.0],
         };
@@ -216,7 +216,9 @@ mod tests {
 
             let x = _mm_setr_pd(3.0, 4.0);
 
-            _mm_stream_sd(d, x);
+            unsafe {
+                _mm_stream_sd(d, x);
+            }
             _mm_sfence();
         }
         assert_eq!(mem.data[0], 3.0);
@@ -232,7 +234,7 @@ mod tests {
     // Miri cannot support this until it is clear how it fits in the Rust memory model
     // (non-temporal store)
     #[cfg_attr(miri, ignore)]
-    unsafe fn test_mm_stream_ss() {
+    fn test_mm_stream_ss() {
         let mut mem = MemoryF32 {
             data: [1.0_f32, 2.0, 3.0, 4.0],
         };
@@ -242,7 +244,9 @@ mod tests {
 
             let x = _mm_setr_ps(5.0, 6.0, 7.0, 8.0);
 
-            _mm_stream_ss(d, x);
+            unsafe {
+                _mm_stream_ss(d, x);
+            }
             _mm_sfence();
         }
         assert_eq!(mem.data[0], 5.0);
diff --git a/crates/core_arch/src/x86/ssse3.rs b/crates/core_arch/src/x86/ssse3.rs
index ac067bd4b5..1d7a97944a 100644
--- a/crates/core_arch/src/x86/ssse3.rs
+++ b/crates/core_arch/src/x86/ssse3.rs
@@ -16,7 +16,8 @@ use stdarch_test::assert_instr;
 #[target_feature(enable = "ssse3")]
 #[cfg_attr(test, assert_instr(pabsb))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_abs_epi8(a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_abs_epi8(a: __m128i) -> __m128i {
     unsafe {
         let a = a.as_i8x16();
         let zero = i8x16::ZERO;
@@ -34,7 +35,8 @@ pub fn _mm_abs_epi8(a: __m128i) -> __m128i {
 #[target_feature(enable = "ssse3")]
 #[cfg_attr(test, assert_instr(pabsw))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_abs_epi16(a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_abs_epi16(a: __m128i) -> __m128i {
     unsafe {
         let a = a.as_i16x8();
         let zero = i16x8::ZERO;
@@ -52,7 +54,8 @@ pub fn _mm_abs_epi16(a: __m128i) -> __m128i {
 #[target_feature(enable = "ssse3")]
 #[cfg_attr(test, assert_instr(pabsd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_abs_epi32(a: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_abs_epi32(a: __m128i) -> __m128i {
     unsafe {
         let a = a.as_i32x4();
         let zero = i32x4::ZERO;
@@ -104,7 +107,8 @@ pub fn _mm_shuffle_epi8(a: __m128i, b: __m128i) -> __m128i {
 #[cfg_attr(test, assert_instr(palignr, IMM8 = 15))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_alignr_epi8<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_alignr_epi8<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
     static_assert_uimm_bits!(IMM8, 8);
     // If palignr is shifting the pair of vectors more than the size of two
     // lanes, emit zero.
@@ -163,7 +167,8 @@ pub fn _mm_alignr_epi8<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "ssse3")]
 #[cfg_attr(test, assert_instr(phaddw))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_hadd_epi16(a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_hadd_epi16(a: __m128i, b: __m128i) -> __m128i {
     let a = a.as_i16x8();
     let b = b.as_i16x8();
     unsafe {
@@ -183,7 +188,13 @@ pub fn _mm_hadd_epi16(a: __m128i, b: __m128i) -> __m128i {
 #[cfg_attr(test, assert_instr(phaddsw))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
 pub fn _mm_hadds_epi16(a: __m128i, b: __m128i) -> __m128i {
-    unsafe { transmute(phaddsw128(a.as_i16x8(), b.as_i16x8())) }
+    let a = a.as_i16x8();
+    let b = b.as_i16x8();
+    unsafe {
+        let even: i16x8 = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]);
+        let odd: i16x8 = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]);
+        simd_saturating_add(even, odd).as_m128i()
+    }
 }
 
 /// Horizontally adds the adjacent pairs of values contained in 2 packed
@@ -194,7 +205,8 @@ pub fn _mm_hadds_epi16(a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "ssse3")]
 #[cfg_attr(test, assert_instr(phaddd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_hadd_epi32(a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_hadd_epi32(a: __m128i, b: __m128i) -> __m128i {
     let a = a.as_i32x4();
     let b = b.as_i32x4();
     unsafe {
@@ -212,7 +224,8 @@ pub fn _mm_hadd_epi32(a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "ssse3")]
 #[cfg_attr(test, assert_instr(phsubw))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_hsub_epi16(a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_hsub_epi16(a: __m128i, b: __m128i) -> __m128i {
     let a = a.as_i16x8();
     let b = b.as_i16x8();
     unsafe {
@@ -233,7 +246,13 @@ pub fn _mm_hsub_epi16(a: __m128i, b: __m128i) -> __m128i {
 #[cfg_attr(test, assert_instr(phsubsw))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
 pub fn _mm_hsubs_epi16(a: __m128i, b: __m128i) -> __m128i {
-    unsafe { transmute(phsubsw128(a.as_i16x8(), b.as_i16x8())) }
+    let a = a.as_i16x8();
+    let b = b.as_i16x8();
+    unsafe {
+        let even: i16x8 = simd_shuffle!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]);
+        let odd: i16x8 = simd_shuffle!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]);
+        simd_saturating_sub(even, odd).as_m128i()
+    }
 }
 
 /// Horizontally subtract the adjacent pairs of values contained in 2
@@ -244,7 +263,8 @@ pub fn _mm_hsubs_epi16(a: __m128i, b: __m128i) -> __m128i {
 #[target_feature(enable = "ssse3")]
 #[cfg_attr(test, assert_instr(phsubd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_hsub_epi32(a: __m128i, b: __m128i) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_hsub_epi32(a: __m128i, b: __m128i) -> __m128i {
     let a = a.as_i32x4();
     let b = b.as_i32x4();
     unsafe {
@@ -329,12 +349,6 @@ unsafe extern "C" {
     #[link_name = "llvm.x86.ssse3.pshuf.b.128"]
     fn pshufb128(a: u8x16, b: u8x16) -> u8x16;
 
-    #[link_name = "llvm.x86.ssse3.phadd.sw.128"]
-    fn phaddsw128(a: i16x8, b: i16x8) -> i16x8;
-
-    #[link_name = "llvm.x86.ssse3.phsub.sw.128"]
-    fn phsubsw128(a: i16x8, b: i16x8) -> i16x8;
-
     #[link_name = "llvm.x86.ssse3.pmadd.ub.sw.128"]
     fn pmaddubsw128(a: u8x16, b: i8x16) -> i16x8;
 
@@ -353,30 +367,31 @@ unsafe extern "C" {
 
 #[cfg(test)]
 mod tests {
+    use crate::core_arch::assert_eq_const as assert_eq;
     use stdarch_test::simd_test;
 
     use crate::core_arch::x86::*;
 
     #[simd_test(enable = "ssse3")]
-    unsafe fn test_mm_abs_epi8() {
+    const fn test_mm_abs_epi8() {
         let r = _mm_abs_epi8(_mm_set1_epi8(-5));
         assert_eq_m128i(r, _mm_set1_epi8(5));
     }
 
     #[simd_test(enable = "ssse3")]
-    unsafe fn test_mm_abs_epi16() {
+    const fn test_mm_abs_epi16() {
         let r = _mm_abs_epi16(_mm_set1_epi16(-5));
         assert_eq_m128i(r, _mm_set1_epi16(5));
     }
 
     #[simd_test(enable = "ssse3")]
-    unsafe fn test_mm_abs_epi32() {
+    const fn test_mm_abs_epi32() {
         let r = _mm_abs_epi32(_mm_set1_epi32(-5));
         assert_eq_m128i(r, _mm_set1_epi32(5));
     }
 
     #[simd_test(enable = "ssse3")]
-    unsafe fn test_mm_shuffle_epi8() {
+    fn test_mm_shuffle_epi8() {
         #[rustfmt::skip]
         let a = _mm_setr_epi8(
             1, 2, 3, 4, 5, 6, 7, 8,
@@ -400,7 +415,7 @@ mod tests {
     }
 
     #[simd_test(enable = "ssse3")]
-    unsafe fn test_mm_alignr_epi8() {
+    const fn test_mm_alignr_epi8() {
         #[rustfmt::skip]
         let a = _mm_setr_epi8(
             1, 2, 3, 4, 5, 6, 7, 8,
@@ -440,7 +455,7 @@ mod tests {
     }
 
     #[simd_test(enable = "ssse3")]
-    unsafe fn test_mm_hadd_epi16() {
+    const fn test_mm_hadd_epi16() {
         let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
         let b = _mm_setr_epi16(4, 128, 4, 3, 24, 12, 6, 19);
         let expected = _mm_setr_epi16(3, 7, 11, 15, 132, 7, 36, 25);
@@ -465,7 +480,7 @@ mod tests {
     }
 
     #[simd_test(enable = "ssse3")]
-    unsafe fn test_mm_hadds_epi16() {
+    fn test_mm_hadds_epi16() {
         let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
         let b = _mm_setr_epi16(4, 128, 4, 3, 32767, 1, -32768, -1);
         let expected = _mm_setr_epi16(3, 7, 11, 15, 132, 7, 32767, -32768);
@@ -490,7 +505,7 @@ mod tests {
     }
 
     #[simd_test(enable = "ssse3")]
-    unsafe fn test_mm_hadd_epi32() {
+    const fn test_mm_hadd_epi32() {
         let a = _mm_setr_epi32(1, 2, 3, 4);
         let b = _mm_setr_epi32(4, 128, 4, 3);
         let expected = _mm_setr_epi32(3, 7, 132, 7);
@@ -506,7 +521,7 @@ mod tests {
     }
 
     #[simd_test(enable = "ssse3")]
-    unsafe fn test_mm_hsub_epi16() {
+    const fn test_mm_hsub_epi16() {
         let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
         let b = _mm_setr_epi16(4, 128, 4, 3, 24, 12, 6, 19);
         let expected = _mm_setr_epi16(-1, -1, -1, -1, -124, 1, 12, -13);
@@ -531,7 +546,7 @@ mod tests {
     }
 
     #[simd_test(enable = "ssse3")]
-    unsafe fn test_mm_hsubs_epi16() {
+    fn test_mm_hsubs_epi16() {
         let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
         let b = _mm_setr_epi16(4, 128, 4, 3, 32767, -1, -32768, 1);
         let expected = _mm_setr_epi16(-1, -1, -1, -1, -124, 1, 32767, -32768);
@@ -556,7 +571,7 @@ mod tests {
     }
 
     #[simd_test(enable = "ssse3")]
-    unsafe fn test_mm_hsub_epi32() {
+    const fn test_mm_hsub_epi32() {
         let a = _mm_setr_epi32(1, 2, 3, 4);
         let b = _mm_setr_epi32(4, 128, 4, 3);
         let expected = _mm_setr_epi32(-1, -1, -124, 1);
@@ -572,7 +587,7 @@ mod tests {
     }
 
     #[simd_test(enable = "ssse3")]
-    unsafe fn test_mm_maddubs_epi16() {
+    fn test_mm_maddubs_epi16() {
         #[rustfmt::skip]
         let a = _mm_setr_epi8(
             1, 2, 3, 4, 5, 6, 7, 8,
@@ -612,7 +627,7 @@ mod tests {
     }
 
     #[simd_test(enable = "ssse3")]
-    unsafe fn test_mm_mulhrs_epi16() {
+    fn test_mm_mulhrs_epi16() {
         let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
         let b = _mm_setr_epi16(4, 128, 4, 3, 32767, -1, -32768, 1);
         let expected = _mm_setr_epi16(0, 0, 0, 0, 5, 0, -7, 0);
@@ -628,7 +643,7 @@ mod tests {
     }
 
     #[simd_test(enable = "ssse3")]
-    unsafe fn test_mm_sign_epi8() {
+    fn test_mm_sign_epi8() {
         #[rustfmt::skip]
         let a = _mm_setr_epi8(
             1, 2, 3, 4, 5, 6, 7, 8,
@@ -649,7 +664,7 @@ mod tests {
     }
 
     #[simd_test(enable = "ssse3")]
-    unsafe fn test_mm_sign_epi16() {
+    fn test_mm_sign_epi16() {
         let a = _mm_setr_epi16(1, 2, 3, 4, -5, -6, 7, 8);
         let b = _mm_setr_epi16(4, 128, 0, 3, 1, -1, -2, 1);
         let expected = _mm_setr_epi16(1, 2, 0, 4, -5, 6, -7, 8);
@@ -658,7 +673,7 @@ mod tests {
     }
 
     #[simd_test(enable = "ssse3")]
-    unsafe fn test_mm_sign_epi32() {
+    fn test_mm_sign_epi32() {
         let a = _mm_setr_epi32(-1, 2, 3, 4);
         let b = _mm_setr_epi32(1, -1, 1, 0);
         let expected = _mm_setr_epi32(-1, -2, 3, 0);
diff --git a/crates/core_arch/src/x86/tbm.rs b/crates/core_arch/src/x86/tbm.rs
index 5a01752d8a..0ba4572dcd 100644
--- a/crates/core_arch/src/x86/tbm.rs
+++ b/crates/core_arch/src/x86/tbm.rs
@@ -6,7 +6,7 @@
 //! [Wikipedia][wikipedia_bmi] provides a quick overview of the available
 //! instructions.
 //!
-//! [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf
+//! [amd64_ref]: https://docs.amd.com/v/u/en-US/24594_3.37
 //! [wikipedia_bmi]:
 //! https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#ABM_.28Advanced_Bit_Manipulation.29
 
@@ -42,7 +42,8 @@ pub fn _bextri_u32<const CONTROL: u32>(a: u32) -> u32 {
 #[target_feature(enable = "tbm")]
 #[cfg_attr(test, assert_instr(blcfill))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _blcfill_u32(x: u32) -> u32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _blcfill_u32(x: u32) -> u32 {
     x & (x.wrapping_add(1))
 }
 
@@ -53,7 +54,8 @@ pub fn _blcfill_u32(x: u32) -> u32 {
 #[target_feature(enable = "tbm")]
 #[cfg_attr(test, assert_instr(blci))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _blci_u32(x: u32) -> u32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _blci_u32(x: u32) -> u32 {
     x | !x.wrapping_add(1)
 }
 
@@ -64,7 +66,8 @@ pub fn _blci_u32(x: u32) -> u32 {
 #[target_feature(enable = "tbm")]
 #[cfg_attr(test, assert_instr(blcic))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _blcic_u32(x: u32) -> u32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _blcic_u32(x: u32) -> u32 {
     !x & x.wrapping_add(1)
 }
 
@@ -76,7 +79,8 @@ pub fn _blcic_u32(x: u32) -> u32 {
 #[target_feature(enable = "tbm")]
 #[cfg_attr(test, assert_instr(blcmsk))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _blcmsk_u32(x: u32) -> u32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _blcmsk_u32(x: u32) -> u32 {
     x ^ x.wrapping_add(1)
 }
 
@@ -87,7 +91,8 @@ pub fn _blcmsk_u32(x: u32) -> u32 {
 #[target_feature(enable = "tbm")]
 #[cfg_attr(test, assert_instr(blcs))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _blcs_u32(x: u32) -> u32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _blcs_u32(x: u32) -> u32 {
     x | x.wrapping_add(1)
 }
 
@@ -98,7 +103,8 @@ pub fn _blcs_u32(x: u32) -> u32 {
 #[target_feature(enable = "tbm")]
 #[cfg_attr(test, assert_instr(blsfill))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _blsfill_u32(x: u32) -> u32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _blsfill_u32(x: u32) -> u32 {
     x | x.wrapping_sub(1)
 }
 
@@ -109,7 +115,8 @@ pub fn _blsfill_u32(x: u32) -> u32 {
 #[target_feature(enable = "tbm")]
 #[cfg_attr(test, assert_instr(blsic))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _blsic_u32(x: u32) -> u32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _blsic_u32(x: u32) -> u32 {
     !x | x.wrapping_sub(1)
 }
 
@@ -121,7 +128,8 @@ pub fn _blsic_u32(x: u32) -> u32 {
 #[target_feature(enable = "tbm")]
 #[cfg_attr(test, assert_instr(t1mskc))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _t1mskc_u32(x: u32) -> u32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _t1mskc_u32(x: u32) -> u32 {
     !x | x.wrapping_add(1)
 }
 
@@ -133,29 +141,31 @@ pub fn _t1mskc_u32(x: u32) -> u32 {
 #[target_feature(enable = "tbm")]
 #[cfg_attr(test, assert_instr(tzmsk))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _tzmsk_u32(x: u32) -> u32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _tzmsk_u32(x: u32) -> u32 {
     !x & x.wrapping_sub(1)
 }
 
 #[cfg(test)]
 mod tests {
+    use crate::core_arch::assert_eq_const as assert_eq;
     use stdarch_test::simd_test;
 
     use crate::core_arch::x86::*;
 
     #[simd_test(enable = "tbm")]
-    unsafe fn test_bextri_u32() {
+    fn test_bextri_u32() {
         assert_eq!(_bextri_u32::<0x0404>(0b0101_0000u32), 0b0000_0101u32);
     }
 
     #[simd_test(enable = "tbm")]
-    unsafe fn test_blcfill_u32() {
+    const fn test_blcfill_u32() {
         assert_eq!(_blcfill_u32(0b0101_0111u32), 0b0101_0000u32);
         assert_eq!(_blcfill_u32(0b1111_1111u32), 0u32);
     }
 
     #[simd_test(enable = "tbm")]
-    unsafe fn test_blci_u32() {
+    const fn test_blci_u32() {
         assert_eq!(
             _blci_u32(0b0101_0000u32),
             0b1111_1111_1111_1111_1111_1111_1111_1110u32
@@ -167,25 +177,25 @@ mod tests {
     }
 
     #[simd_test(enable = "tbm")]
-    unsafe fn test_blcic_u32() {
+    const fn test_blcic_u32() {
         assert_eq!(_blcic_u32(0b0101_0001u32), 0b0000_0010u32);
         assert_eq!(_blcic_u32(0b1111_1111u32), 0b1_0000_0000u32);
     }
 
     #[simd_test(enable = "tbm")]
-    unsafe fn test_blcmsk_u32() {
+    const fn test_blcmsk_u32() {
         assert_eq!(_blcmsk_u32(0b0101_0001u32), 0b0000_0011u32);
         assert_eq!(_blcmsk_u32(0b1111_1111u32), 0b1_1111_1111u32);
     }
 
     #[simd_test(enable = "tbm")]
-    unsafe fn test_blcs_u32() {
+    const fn test_blcs_u32() {
         assert_eq!(_blcs_u32(0b0101_0001u32), 0b0101_0011u32);
         assert_eq!(_blcs_u32(0b1111_1111u32), 0b1_1111_1111u32);
     }
 
     #[simd_test(enable = "tbm")]
-    unsafe fn test_blsfill_u32() {
+    const fn test_blsfill_u32() {
         assert_eq!(_blsfill_u32(0b0101_0100u32), 0b0101_0111u32);
         assert_eq!(
             _blsfill_u32(0u32),
@@ -194,7 +204,7 @@ mod tests {
     }
 
     #[simd_test(enable = "tbm")]
-    unsafe fn test_blsic_u32() {
+    const fn test_blsic_u32() {
         assert_eq!(
             _blsic_u32(0b0101_0100u32),
             0b1111_1111_1111_1111_1111_1111_1111_1011u32
@@ -206,7 +216,7 @@ mod tests {
     }
 
     #[simd_test(enable = "tbm")]
-    unsafe fn test_t1mskc_u32() {
+    const fn test_t1mskc_u32() {
         assert_eq!(
             _t1mskc_u32(0b0101_0111u32),
             0b1111_1111_1111_1111_1111_1111_1111_1000u32
@@ -218,7 +228,7 @@ mod tests {
     }
 
     #[simd_test(enable = "tbm")]
-    unsafe fn test_tzmsk_u32() {
+    const fn test_tzmsk_u32() {
         assert_eq!(_tzmsk_u32(0b0101_1000u32), 0b0000_0111u32);
         assert_eq!(_tzmsk_u32(0b0101_1001u32), 0b0000_0000u32);
     }
diff --git a/crates/core_arch/src/x86/test.rs b/crates/core_arch/src/x86/test.rs
index fec25ce2bc..402c2a6a81 100644
--- a/crates/core_arch/src/x86/test.rs
+++ b/crates/core_arch/src/x86/test.rs
@@ -1,113 +1,130 @@
 //! Utilities used in testing the x86 intrinsics
 
+use crate::core_arch::assert_eq_const as assert_eq;
+use crate::core_arch::simd::*;
 use crate::core_arch::x86::*;
 use std::mem::transmute;
 
 #[track_caller]
-#[target_feature(enable = "sse2")]
-pub unsafe fn assert_eq_m128i(a: __m128i, b: __m128i) {
-    assert_eq!(transmute::<_, [u64; 2]>(a), transmute::<_, [u64; 2]>(b))
+#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
+pub(crate) const fn assert_eq_m128i(a: __m128i, b: __m128i) {
+    assert_eq!(a.as_u32x4(), b.as_u32x4());
 }
 
 #[track_caller]
-#[target_feature(enable = "sse2")]
-pub unsafe fn assert_eq_m128d(a: __m128d, b: __m128d) {
-    if _mm_movemask_pd(_mm_cmpeq_pd(a, b)) != 0b11 {
-        panic!("{:?} != {:?}", a, b);
-    }
+#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
+pub(crate) const fn assert_eq_m128(a: __m128, b: __m128) {
+    assert_eq!(a.as_f32x4(), b.as_f32x4());
 }
 
-#[target_feature(enable = "sse2")]
-pub unsafe fn get_m128d(a: __m128d, idx: usize) -> f64 {
-    transmute::<_, [f64; 2]>(a)[idx]
+#[track_caller]
+#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
+pub(crate) const fn assert_eq_m128d(a: __m128d, b: __m128d) {
+    assert_eq!(a.as_f64x2(), b.as_f64x2());
 }
 
 #[track_caller]
-#[target_feature(enable = "sse")]
-pub unsafe fn assert_eq_m128(a: __m128, b: __m128) {
-    let r = _mm_cmpeq_ps(a, b);
-    if _mm_movemask_ps(r) != 0b1111 {
-        panic!("{:?} != {:?}", a, b);
-    }
+#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
+pub(crate) const fn assert_eq_m128h(a: __m128h, b: __m128h) {
+    assert_eq!(a.as_f16x8(), b.as_f16x8());
 }
 
-#[target_feature(enable = "sse")]
-pub unsafe fn get_m128(a: __m128, idx: usize) -> f32 {
-    transmute::<_, [f32; 4]>(a)[idx]
+#[track_caller]
+#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
+pub(crate) const fn assert_eq_m256i(a: __m256i, b: __m256i) {
+    assert_eq!(a.as_u32x8(), b.as_u32x8());
 }
 
 #[track_caller]
-#[target_feature(enable = "avx512fp16,avx512vl")]
-pub unsafe fn assert_eq_m128h(a: __m128h, b: __m128h) {
-    let r = _mm_cmp_ph_mask::<_CMP_EQ_OQ>(a, b);
-    if r != 0b1111_1111 {
-        panic!("{:?} != {:?}", a, b);
-    }
+#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
+pub(crate) const fn assert_eq_m256(a: __m256, b: __m256) {
+    assert_eq!(a.as_f32x8(), b.as_f32x8());
 }
 
-// not actually an intrinsic but useful in various tests as we proted from
-// `i64x2::new` which is backwards from `_mm_set_epi64x`
-#[target_feature(enable = "sse2")]
-pub unsafe fn _mm_setr_epi64x(a: i64, b: i64) -> __m128i {
-    _mm_set_epi64x(b, a)
+#[track_caller]
+#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
+pub(crate) const fn assert_eq_m256d(a: __m256d, b: __m256d) {
+    assert_eq!(a.as_f64x4(), b.as_f64x4());
 }
 
 #[track_caller]
-#[target_feature(enable = "avx")]
-pub unsafe fn assert_eq_m256i(a: __m256i, b: __m256i) {
-    assert_eq!(transmute::<_, [u64; 4]>(a), transmute::<_, [u64; 4]>(b))
+#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
+pub(crate) const fn assert_eq_m256h(a: __m256h, b: __m256h) {
+    assert_eq!(a.as_f16x16(), b.as_f16x16());
 }
 
 #[track_caller]
-#[target_feature(enable = "avx")]
-pub unsafe fn assert_eq_m256d(a: __m256d, b: __m256d) {
-    let cmp = _mm256_cmp_pd::<_CMP_EQ_OQ>(a, b);
-    if _mm256_movemask_pd(cmp) != 0b1111 {
-        panic!("{:?} != {:?}", a, b);
-    }
+#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
+pub(crate) const fn assert_eq_m512i(a: __m512i, b: __m512i) {
+    assert_eq!(a.as_i64x8(), b.as_i64x8());
 }
 
-#[target_feature(enable = "avx")]
-pub unsafe fn get_m256d(a: __m256d, idx: usize) -> f64 {
-    transmute::<_, [f64; 4]>(a)[idx]
+#[track_caller]
+#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
+pub(crate) const fn assert_eq_m512(a: __m512, b: __m512) {
+    assert_eq!(a.as_f32x16(), b.as_f32x16());
 }
 
 #[track_caller]
-#[target_feature(enable = "avx")]
-pub unsafe fn assert_eq_m256(a: __m256, b: __m256) {
-    let cmp = _mm256_cmp_ps::<_CMP_EQ_OQ>(a, b);
-    if _mm256_movemask_ps(cmp) != 0b11111111 {
-        panic!("{:?} != {:?}", a, b);
-    }
+#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
+pub(crate) const fn assert_eq_m512d(a: __m512d, b: __m512d) {
+    assert_eq!(a.as_f64x8(), b.as_f64x8());
+}
+
+#[track_caller]
+#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
+pub(crate) const fn assert_eq_m512h(a: __m512h, b: __m512h) {
+    assert_eq!(a.as_f16x32(), b.as_f16x32());
+}
+
+#[target_feature(enable = "sse2")]
+#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
+pub(crate) const fn get_m128d(a: __m128d, idx: usize) -> f64 {
+    a.as_f64x2().extract_dyn(idx)
+}
+
+#[target_feature(enable = "sse")]
+#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
+pub(crate) const fn get_m128(a: __m128, idx: usize) -> f32 {
+    a.as_f32x4().extract_dyn(idx)
 }
 
 #[target_feature(enable = "avx")]
-pub unsafe fn get_m256(a: __m256, idx: usize) -> f32 {
-    transmute::<_, [f32; 8]>(a)[idx]
+#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
+pub(crate) const fn get_m256d(a: __m256d, idx: usize) -> f64 {
+    a.as_f64x4().extract_dyn(idx)
 }
 
-#[track_caller]
-#[target_feature(enable = "avx512fp16,avx512vl")]
-pub unsafe fn assert_eq_m256h(a: __m256h, b: __m256h) {
-    let r = _mm256_cmp_ph_mask::<_CMP_EQ_OQ>(a, b);
-    if r != 0b11111111_11111111 {
-        panic!("{:?} != {:?}", a, b);
-    }
+#[target_feature(enable = "avx")]
+#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
+pub(crate) const fn get_m256(a: __m256, idx: usize) -> f32 {
+    a.as_f32x8().extract_dyn(idx)
 }
 
 #[target_feature(enable = "avx512f")]
-pub unsafe fn get_m512(a: __m512, idx: usize) -> f32 {
-    transmute::<_, [f32; 16]>(a)[idx]
+#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
+pub(crate) const fn get_m512(a: __m512, idx: usize) -> f32 {
+    a.as_f32x16().extract_dyn(idx)
 }
 
 #[target_feature(enable = "avx512f")]
-pub unsafe fn get_m512d(a: __m512d, idx: usize) -> f64 {
-    transmute::<_, [f64; 8]>(a)[idx]
+#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
+pub(crate) const fn get_m512d(a: __m512d, idx: usize) -> f64 {
+    a.as_f64x8().extract_dyn(idx)
 }
 
 #[target_feature(enable = "avx512f")]
-pub unsafe fn get_m512i(a: __m512i, idx: usize) -> i64 {
-    transmute::<_, [i64; 8]>(a)[idx]
+#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
+pub(crate) const fn get_m512i(a: __m512i, idx: usize) -> i64 {
+    a.as_i64x8().extract_dyn(idx)
+}
+
+// not actually an intrinsic but useful in various tests as we ported from
+// `i64x2::new` which is backwards from `_mm_set_epi64x`
+#[target_feature(enable = "sse2")]
+#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
+pub const fn _mm_setr_epi64x(a: i64, b: i64) -> __m128i {
+    _mm_set_epi64x(b, a)
 }
 
 // These intrinsics doesn't exist on x86 b/c it requires a 64-bit register,
@@ -118,16 +135,18 @@ mod x86_polyfill {
     use crate::intrinsics::simd::*;
 
     #[rustc_legacy_const_generics(2)]
-    pub unsafe fn _mm_insert_epi64<const INDEX: i32>(a: __m128i, val: i64) -> __m128i {
+    #[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
+    pub const fn _mm_insert_epi64<const INDEX: i32>(a: __m128i, val: i64) -> __m128i {
         static_assert_uimm_bits!(INDEX, 1);
-        transmute(simd_insert!(a.as_i64x2(), INDEX as u32, val))
+        unsafe { transmute(simd_insert!(a.as_i64x2(), INDEX as u32, val)) }
     }
 
     #[target_feature(enable = "avx2")]
     #[rustc_legacy_const_generics(2)]
-    pub unsafe fn _mm256_insert_epi64<const INDEX: i32>(a: __m256i, val: i64) -> __m256i {
+    #[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
+    pub const fn _mm256_insert_epi64<const INDEX: i32>(a: __m256i, val: i64) -> __m256i {
         static_assert_uimm_bits!(INDEX, 2);
-        transmute(simd_insert!(a.as_i64x4(), INDEX as u32, val))
+        unsafe { transmute(simd_insert!(a.as_i64x4(), INDEX as u32, val)) }
     }
 }
 
@@ -136,33 +155,3 @@ mod x86_polyfill {
     pub use crate::core_arch::x86_64::{_mm_insert_epi64, _mm256_insert_epi64};
 }
 pub use self::x86_polyfill::*;
-
-#[track_caller]
-pub unsafe fn assert_eq_m512i(a: __m512i, b: __m512i) {
-    assert_eq!(transmute::<_, [i32; 16]>(a), transmute::<_, [i32; 16]>(b))
-}
-
-#[track_caller]
-pub unsafe fn assert_eq_m512(a: __m512, b: __m512) {
-    let cmp = _mm512_cmp_ps_mask::<_CMP_EQ_OQ>(a, b);
-    if cmp != 0b11111111_11111111 {
-        panic!("{:?} != {:?}", a, b);
-    }
-}
-
-#[track_caller]
-pub unsafe fn assert_eq_m512d(a: __m512d, b: __m512d) {
-    let cmp = _mm512_cmp_pd_mask::<_CMP_EQ_OQ>(a, b);
-    if cmp != 0b11111111 {
-        panic!("{:?} != {:?}", a, b);
-    }
-}
-
-#[track_caller]
-#[target_feature(enable = "avx512fp16")]
-pub unsafe fn assert_eq_m512h(a: __m512h, b: __m512h) {
-    let r = _mm512_cmp_ph_mask::<_CMP_EQ_OQ>(a, b);
-    if r != 0b11111111_11111111_11111111_11111111 {
-        panic!("{:?} != {:?}", a, b);
-    }
-}
diff --git a/crates/core_arch/src/x86/vaes.rs b/crates/core_arch/src/x86/vaes.rs
index b1fe193e3f..864b1d56d1 100644
--- a/crates/core_arch/src/x86/vaes.rs
+++ b/crates/core_arch/src/x86/vaes.rs
@@ -5,7 +5,7 @@
 //! The reference is [Intel 64 and IA-32 Architectures Software Developer's
 //! Manual Volume 2: Instruction Set Reference, A-Z][intel64_ref].
 //!
-//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
+//! [intel64_ref]: https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
 
 use crate::core_arch::x86::__m256i;
 use crate::core_arch::x86::__m512i;
@@ -146,9 +146,9 @@ mod tests {
     // ideally we'd be using quickcheck here instead
 
     #[target_feature(enable = "avx2")]
-    unsafe fn helper_for_256_vaes(
-        linear: unsafe fn(__m128i, __m128i) -> __m128i,
-        vectorized: unsafe fn(__m256i, __m256i) -> __m256i,
+    fn helper_for_256_vaes(
+        linear: fn(__m128i, __m128i) -> __m128i,
+        vectorized: fn(__m256i, __m256i) -> __m256i,
     ) {
         let a = _mm256_set_epi64x(
             0xDCB4DB3657BF0B7D,
@@ -178,7 +178,7 @@ mod tests {
     }
 
     #[target_feature(enable = "sse2")]
-    unsafe fn setup_state_key<T>(broadcast: unsafe fn(__m128i) -> T) -> (T, T) {
+    fn setup_state_key<T>(broadcast: fn(__m128i) -> T) -> (T, T) {
         // Constants taken from https://msdn.microsoft.com/en-us/library/cc664949.aspx.
         let a = _mm_set_epi64x(0x0123456789abcdef, 0x8899aabbccddeeff);
         let k = _mm_set_epi64x(0x1133557799bbddff, 0x0022446688aaccee);
@@ -186,17 +186,17 @@ mod tests {
     }
 
     #[target_feature(enable = "avx2")]
-    unsafe fn setup_state_key_256() -> (__m256i, __m256i) {
+    fn setup_state_key_256() -> (__m256i, __m256i) {
         setup_state_key(_mm256_broadcastsi128_si256)
     }
 
     #[target_feature(enable = "avx512f")]
-    unsafe fn setup_state_key_512() -> (__m512i, __m512i) {
+    fn setup_state_key_512() -> (__m512i, __m512i) {
         setup_state_key(_mm512_broadcast_i32x4)
     }
 
     #[simd_test(enable = "vaes,avx512vl")]
-    unsafe fn test_mm256_aesdec_epi128() {
+    fn test_mm256_aesdec_epi128() {
         // Constants taken from https://msdn.microsoft.com/en-us/library/cc664949.aspx.
         let (a, k) = setup_state_key_256();
         let e = _mm_set_epi64x(0x044e4f5176fec48f, 0xb57ecfa381da39ee);
@@ -208,7 +208,7 @@ mod tests {
     }
 
     #[simd_test(enable = "vaes,avx512vl")]
-    unsafe fn test_mm256_aesdeclast_epi128() {
+    fn test_mm256_aesdeclast_epi128() {
         // Constants taken from https://msdn.microsoft.com/en-us/library/cc714178.aspx.
         let (a, k) = setup_state_key_256();
         let e = _mm_set_epi64x(0x36cad57d9072bf9e, 0xf210dd981fa4a493);
@@ -220,7 +220,7 @@ mod tests {
     }
 
     #[simd_test(enable = "vaes,avx512vl")]
-    unsafe fn test_mm256_aesenc_epi128() {
+    fn test_mm256_aesenc_epi128() {
         // Constants taken from https://msdn.microsoft.com/en-us/library/cc664810.aspx.
         // they are repeated appropriately
         let (a, k) = setup_state_key_256();
@@ -233,7 +233,7 @@ mod tests {
     }
 
     #[simd_test(enable = "vaes,avx512vl")]
-    unsafe fn test_mm256_aesenclast_epi128() {
+    fn test_mm256_aesenclast_epi128() {
         // Constants taken from https://msdn.microsoft.com/en-us/library/cc714136.aspx.
         let (a, k) = setup_state_key_256();
         let e = _mm_set_epi64x(0xb6dd7df25d7ab320, 0x4b04f98cf4c860f8);
@@ -245,9 +245,9 @@ mod tests {
     }
 
     #[target_feature(enable = "avx512f")]
-    unsafe fn helper_for_512_vaes(
-        linear: unsafe fn(__m128i, __m128i) -> __m128i,
-        vectorized: unsafe fn(__m512i, __m512i) -> __m512i,
+    fn helper_for_512_vaes(
+        linear: fn(__m128i, __m128i) -> __m128i,
+        vectorized: fn(__m512i, __m512i) -> __m512i,
     ) {
         let a = _mm512_set_epi64(
             0xDCB4DB3657BF0B7D,
@@ -291,7 +291,7 @@ mod tests {
     }
 
     #[simd_test(enable = "vaes,avx512f")]
-    unsafe fn test_mm512_aesdec_epi128() {
+    fn test_mm512_aesdec_epi128() {
         // Constants taken from https://msdn.microsoft.com/en-us/library/cc664949.aspx.
         let (a, k) = setup_state_key_512();
         let e = _mm_set_epi64x(0x044e4f5176fec48f, 0xb57ecfa381da39ee);
@@ -303,7 +303,7 @@ mod tests {
     }
 
     #[simd_test(enable = "vaes,avx512f")]
-    unsafe fn test_mm512_aesdeclast_epi128() {
+    fn test_mm512_aesdeclast_epi128() {
         // Constants taken from https://msdn.microsoft.com/en-us/library/cc714178.aspx.
         let (a, k) = setup_state_key_512();
         let e = _mm_set_epi64x(0x36cad57d9072bf9e, 0xf210dd981fa4a493);
@@ -315,7 +315,7 @@ mod tests {
     }
 
     #[simd_test(enable = "vaes,avx512f")]
-    unsafe fn test_mm512_aesenc_epi128() {
+    fn test_mm512_aesenc_epi128() {
         // Constants taken from https://msdn.microsoft.com/en-us/library/cc664810.aspx.
         let (a, k) = setup_state_key_512();
         let e = _mm_set_epi64x(0x16ab0e57dfc442ed, 0x28e4ee1884504333);
@@ -327,7 +327,7 @@ mod tests {
     }
 
     #[simd_test(enable = "vaes,avx512f")]
-    unsafe fn test_mm512_aesenclast_epi128() {
+    fn test_mm512_aesenclast_epi128() {
         // Constants taken from https://msdn.microsoft.com/en-us/library/cc714136.aspx.
         let (a, k) = setup_state_key_512();
         let e = _mm_set_epi64x(0xb6dd7df25d7ab320, 0x4b04f98cf4c860f8);
diff --git a/crates/core_arch/src/x86/vpclmulqdq.rs b/crates/core_arch/src/x86/vpclmulqdq.rs
index b1f23bd2f4..ad44e59f3a 100644
--- a/crates/core_arch/src/x86/vpclmulqdq.rs
+++ b/crates/core_arch/src/x86/vpclmulqdq.rs
@@ -124,9 +124,9 @@ mod tests {
     // this function tests one of the possible 4 instances
     // with different inputs across lanes
     #[target_feature(enable = "vpclmulqdq,avx512f")]
-    unsafe fn verify_512_helper(
-        linear: unsafe fn(__m128i, __m128i) -> __m128i,
-        vectorized: unsafe fn(__m512i, __m512i) -> __m512i,
+    fn verify_512_helper(
+        linear: fn(__m128i, __m128i) -> __m128i,
+        vectorized: fn(__m512i, __m512i) -> __m512i,
     ) {
         let a = _mm512_set_epi64(
             0xDCB4DB3657BF0B7D,
@@ -165,9 +165,9 @@ mod tests {
     // this function tests one of the possible 4 instances
     // with different inputs across lanes for the VL version
     #[target_feature(enable = "vpclmulqdq,avx512vl")]
-    unsafe fn verify_256_helper(
-        linear: unsafe fn(__m128i, __m128i) -> __m128i,
-        vectorized: unsafe fn(__m256i, __m256i) -> __m256i,
+    fn verify_256_helper(
+        linear: fn(__m128i, __m128i) -> __m128i,
+        vectorized: fn(__m256i, __m256i) -> __m256i,
     ) {
         let a = _mm512_set_epi64(
             0xDCB4DB3657BF0B7D,
@@ -207,7 +207,7 @@ mod tests {
     }
 
     #[simd_test(enable = "vpclmulqdq,avx512f")]
-    unsafe fn test_mm512_clmulepi64_epi128() {
+    fn test_mm512_clmulepi64_epi128() {
         verify_kat_pclmul!(
             _mm512_broadcast_i32x4,
             _mm512_clmulepi64_epi128,
@@ -233,7 +233,7 @@ mod tests {
     }
 
     #[simd_test(enable = "vpclmulqdq,avx512vl")]
-    unsafe fn test_mm256_clmulepi64_epi128() {
+    fn test_mm256_clmulepi64_epi128() {
         verify_kat_pclmul!(
             _mm256_broadcastsi128_si256,
             _mm256_clmulepi64_epi128,
diff --git a/crates/core_arch/src/x86/xsave.rs b/crates/core_arch/src/x86/xsave.rs
index 190cef929e..e22d3580ff 100644
--- a/crates/core_arch/src/x86/xsave.rs
+++ b/crates/core_arch/src/x86/xsave.rs
@@ -184,7 +184,7 @@ mod tests {
             // `CPUID.(EAX=0DH,ECX=0):ECX` contains the size required to hold all supported xsave
             // components. `EBX` contains the size required to hold all xsave components currently
             // enabled in `XCR0`. We are using `ECX` to ensure enough space in all scenarios
-            let CpuidResult { ecx, .. } = unsafe { __cpuid(0x0d) };
+            let CpuidResult { ecx, .. } = __cpuid(0x0d);
 
             XsaveArea {
                 data: vec![AlignedArray([0; 64]); ecx.div_ceil(64) as usize].into_boxed_slice(),
@@ -197,47 +197,53 @@ mod tests {
 
     #[simd_test(enable = "xsave")]
     #[cfg_attr(miri, ignore)] // Register saving/restoring is not supported in Miri
-    unsafe fn test_xsave() {
+    fn test_xsave() {
         let m = 0xFFFFFFFFFFFFFFFF_u64; //< all registers
         let mut a = XsaveArea::new();
         let mut b = XsaveArea::new();
 
-        _xsave(a.ptr(), m);
-        _xrstor(a.ptr(), m);
-        _xsave(b.ptr(), m);
+        unsafe {
+            _xsave(a.ptr(), m);
+            _xrstor(a.ptr(), m);
+            _xsave(b.ptr(), m);
+        }
     }
 
     #[simd_test(enable = "xsave")]
     #[cfg_attr(miri, ignore)] // Register saving/restoring is not supported in Miri
-    unsafe fn test_xgetbv() {
+    fn test_xgetbv() {
         let xcr_n: u32 = _XCR_XFEATURE_ENABLED_MASK;
 
-        let xcr: u64 = _xgetbv(xcr_n);
-        let xcr_cpy: u64 = _xgetbv(xcr_n);
+        let xcr: u64 = unsafe { _xgetbv(xcr_n) };
+        let xcr_cpy: u64 = unsafe { _xgetbv(xcr_n) };
         assert_eq!(xcr, xcr_cpy);
     }
 
     #[simd_test(enable = "xsave,xsaveopt")]
     #[cfg_attr(miri, ignore)] // Register saving/restoring is not supported in Miri
-    unsafe fn test_xsaveopt() {
+    fn test_xsaveopt() {
         let m = 0xFFFFFFFFFFFFFFFF_u64; //< all registers
         let mut a = XsaveArea::new();
         let mut b = XsaveArea::new();
 
-        _xsaveopt(a.ptr(), m);
-        _xrstor(a.ptr(), m);
-        _xsaveopt(b.ptr(), m);
+        unsafe {
+            _xsaveopt(a.ptr(), m);
+            _xrstor(a.ptr(), m);
+            _xsaveopt(b.ptr(), m);
+        }
     }
 
     #[simd_test(enable = "xsave,xsavec")]
     #[cfg_attr(miri, ignore)] // Register saving/restoring is not supported in Miri
-    unsafe fn test_xsavec() {
+    fn test_xsavec() {
         let m = 0xFFFFFFFFFFFFFFFF_u64; //< all registers
         let mut a = XsaveArea::new();
         let mut b = XsaveArea::new();
 
-        _xsavec(a.ptr(), m);
-        _xrstor(a.ptr(), m);
-        _xsavec(b.ptr(), m);
+        unsafe {
+            _xsavec(a.ptr(), m);
+            _xrstor(a.ptr(), m);
+            _xsavec(b.ptr(), m);
+        }
     }
 }
diff --git a/crates/core_arch/src/x86_64/abm.rs b/crates/core_arch/src/x86_64/abm.rs
index bf59cc4632..21b5f26a9b 100644
--- a/crates/core_arch/src/x86_64/abm.rs
+++ b/crates/core_arch/src/x86_64/abm.rs
@@ -12,8 +12,8 @@
 //! [Wikipedia][wikipedia_bmi] provides a quick overview of the instructions
 //! available.
 //!
-//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
-//! [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf
+//! [intel64_ref]: https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
+//! [amd64_ref]: https://docs.amd.com/v/u/en-US/24594_3.37
 //! [wikipedia_bmi]:
 //! https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#ABM_.28Advanced_Bit_Manipulation.29
 
@@ -29,7 +29,8 @@ use stdarch_test::assert_instr;
 #[target_feature(enable = "lzcnt")]
 #[cfg_attr(test, assert_instr(lzcnt))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _lzcnt_u64(x: u64) -> u64 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _lzcnt_u64(x: u64) -> u64 {
     x.leading_zeros() as u64
 }
 
@@ -40,23 +41,25 @@ pub fn _lzcnt_u64(x: u64) -> u64 {
 #[target_feature(enable = "popcnt")]
 #[cfg_attr(test, assert_instr(popcnt))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _popcnt64(x: i64) -> i32 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _popcnt64(x: i64) -> i32 {
     x.count_ones() as i32
 }
 
 #[cfg(test)]
 mod tests {
+    use crate::core_arch::assert_eq_const as assert_eq;
     use stdarch_test::simd_test;
 
     use crate::core_arch::arch::x86_64::*;
 
     #[simd_test(enable = "lzcnt")]
-    unsafe fn test_lzcnt_u64() {
+    const fn test_lzcnt_u64() {
         assert_eq!(_lzcnt_u64(0b0101_1010), 57);
     }
 
     #[simd_test(enable = "popcnt")]
-    unsafe fn test_popcnt64() {
+    const fn test_popcnt64() {
         assert_eq!(_popcnt64(0b0101_1010), 4);
     }
 }
diff --git a/crates/core_arch/src/x86_64/amx.rs b/crates/core_arch/src/x86_64/amx.rs
index 4b33c0ab6c..b3b3e86750 100644
--- a/crates/core_arch/src/x86_64/amx.rs
+++ b/crates/core_arch/src/x86_64/amx.rs
@@ -1,3 +1,5 @@
+use crate::core_arch::{simd::*, x86::*};
+
 #[cfg(test)]
 use stdarch_test::assert_instr;
 
@@ -242,6 +244,337 @@ pub unsafe fn _tile_cmmrlfp16ps<const DST: i32, const A: i32, const B: i32>() {
     tcmmrlfp16ps(DST as i8, A as i8, B as i8);
 }
 
+/// Compute dot-product of BF8 (8-bit E5M2) floating-point elements in tile a and BF8 (8-bit E5M2)
+/// floating-point elements in tile b, accumulating the intermediate single-precision
+/// (32-bit) floating-point elements with elements in dst, and store the 32-bit result
+/// back to tile dst.
+#[inline]
+#[rustc_legacy_const_generics(0, 1, 2)]
+#[target_feature(enable = "amx-fp8")]
+#[cfg_attr(
+    all(test, not(target_vendor = "apple")),
+    assert_instr(tdpbf8ps, DST = 0, A = 1, B = 2)
+)]
+#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
+pub unsafe fn _tile_dpbf8ps<const DST: i32, const A: i32, const B: i32>() {
+    static_assert_uimm_bits!(DST, 3);
+    static_assert_uimm_bits!(A, 3);
+    static_assert_uimm_bits!(B, 3);
+    tdpbf8ps(DST as i8, A as i8, B as i8);
+}
+
+/// Compute dot-product of BF8 (8-bit E5M2) floating-point elements in tile a and HF8
+/// (8-bit E4M3) floating-point elements in tile b, accumulating the intermediate single-precision
+/// (32-bit) floating-point elements with elements in dst, and store the 32-bit result
+/// back to tile dst.
+#[inline]
+#[rustc_legacy_const_generics(0, 1, 2)]
+#[target_feature(enable = "amx-fp8")]
+#[cfg_attr(
+    all(test, not(target_vendor = "apple")),
+    assert_instr(tdpbhf8ps, DST = 0, A = 1, B = 2)
+)]
+#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
+pub unsafe fn _tile_dpbhf8ps<const DST: i32, const A: i32, const B: i32>() {
+    static_assert_uimm_bits!(DST, 3);
+    static_assert_uimm_bits!(A, 3);
+    static_assert_uimm_bits!(B, 3);
+    tdpbhf8ps(DST as i8, A as i8, B as i8);
+}
+
+/// Compute dot-product of HF8 (8-bit E4M3) floating-point elements in tile a and BF8
+/// (8-bit E5M2) floating-point elements in tile b, accumulating the intermediate single-precision
+/// (32-bit) floating-point elements with elements in dst, and store the 32-bit result
+/// back to tile dst.
+#[inline]
+#[rustc_legacy_const_generics(0, 1, 2)]
+#[target_feature(enable = "amx-fp8")]
+#[cfg_attr(
+    all(test, not(target_vendor = "apple")),
+    assert_instr(tdphbf8ps, DST = 0, A = 1, B = 2)
+)]
+#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
+pub unsafe fn _tile_dphbf8ps<const DST: i32, const A: i32, const B: i32>() {
+    static_assert_uimm_bits!(DST, 3);
+    static_assert_uimm_bits!(A, 3);
+    static_assert_uimm_bits!(B, 3);
+    tdphbf8ps(DST as i8, A as i8, B as i8);
+}
+
+/// Compute dot-product of HF8 (8-bit E4M3) floating-point elements in tile a and HF8 (8-bit E4M3)
+/// floating-point elements in tile b, accumulating the intermediate single-precision
+/// (32-bit) floating-point elements with elements in dst, and store the 32-bit result
+/// back to tile dst.
+#[inline]
+#[rustc_legacy_const_generics(0, 1, 2)]
+#[target_feature(enable = "amx-fp8")]
+#[cfg_attr(
+    all(test, not(target_vendor = "apple")),
+    assert_instr(tdphf8ps, DST = 0, A = 1, B = 2)
+)]
+#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
+pub unsafe fn _tile_dphf8ps<const DST: i32, const A: i32, const B: i32>() {
+    static_assert_uimm_bits!(DST, 3);
+    static_assert_uimm_bits!(A, 3);
+    static_assert_uimm_bits!(B, 3);
+    tdphf8ps(DST as i8, A as i8, B as i8);
+}
+
+/// Load tile rows from memory specified by base address and stride into destination tile dst
+/// using the tile configuration previously configured via _tile_loadconfig.
+/// Additionally, this intrinsic indicates the source memory location is likely to become
+/// read-shared by multiple processors, i.e., read in the future by at least one other processor
+/// before it is written, assuming it is ever written in the future.
+#[inline]
+#[rustc_legacy_const_generics(0)]
+#[target_feature(enable = "amx-movrs")]
+#[cfg_attr(
+    all(test, not(target_vendor = "apple")),
+    assert_instr(tileloaddrs, DST = 0)
+)]
+#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
+pub unsafe fn _tile_loaddrs<const DST: i32>(base: *const u8, stride: usize) {
+    static_assert_uimm_bits!(DST, 3);
+    tileloaddrs64(DST as i8, base, stride);
+}
+
+/// Load tile rows from memory specified by base address and stride into destination tile dst
+/// using the tile configuration previously configured via _tile_loadconfig.
+/// Provides a hint to the implementation that the data would be reused but does not need
+/// to be resident in the nearest cache levels.
+/// Additionally, this intrinsic indicates the source memory location is likely to become
+/// read-shared by multiple processors, i.e., read in the future by at least one other processor
+/// before it is written, assuming it is ever written in the future.
+#[inline]
+#[rustc_legacy_const_generics(0)]
+#[target_feature(enable = "amx-movrs")]
+#[cfg_attr(
+    all(test, not(target_vendor = "apple")),
+    assert_instr(tileloaddrst1, DST = 0)
+)]
+#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
+pub unsafe fn _tile_stream_loaddrs<const DST: i32>(base: *const u8, stride: usize) {
+    static_assert_uimm_bits!(DST, 3);
+    tileloaddrst164(DST as i8, base, stride);
+}
+
+/// Perform matrix multiplication of two tiles a and b, containing packed single precision (32-bit)
+/// floating-point elements, which are converted to TF32 (tensor-float32) format, and accumulate the
+///  results into a packed single precision tile.
+/// For each possible combination of (row of a, column of b), it performs
+///  - convert to TF32
+///  - multiply the corresponding elements of a and b
+///  - accumulate the results into the corresponding row and column of dst using round-to-nearest-even
+/// rounding mode.
+/// Output FP32 denormals are always flushed to zero, input single precision denormals are always
+/// handled and *not* treated as zero.
+#[inline]
+#[rustc_legacy_const_generics(0, 1, 2)]
+#[target_feature(enable = "amx-tf32")]
+#[cfg_attr(
+    all(test, not(target_vendor = "apple")),
+    assert_instr(tmmultf32ps, DST = 0, A = 1, B = 2)
+)]
+#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
+pub unsafe fn _tile_mmultf32ps<const DST: i32, const A: i32, const B: i32>() {
+    static_assert_uimm_bits!(DST, 3);
+    static_assert_uimm_bits!(A, 3);
+    static_assert_uimm_bits!(B, 3);
+    tmmultf32ps(DST as i8, A as i8, B as i8);
+}
+
+/// Moves a row from a tile register to a zmm register, converting the packed 32-bit signed integer
+/// elements to packed single-precision (32-bit) floating-point elements.
+#[inline]
+#[rustc_legacy_const_generics(0)]
+#[target_feature(enable = "amx-avx512,avx10.2")]
+#[cfg_attr(
+    all(test, not(target_vendor = "apple")),
+    assert_instr(tcvtrowd2ps, TILE = 0)
+)]
+#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
+pub unsafe fn _tile_cvtrowd2ps<const TILE: i32>(row: u32) -> __m512 {
+    static_assert_uimm_bits!(TILE, 3);
+    tcvtrowd2ps(TILE as i8, row).as_m512()
+}
+
+/// Moves a row from a tile register to a zmm register, converting the packed 32-bit signed integer
+/// elements to packed single-precision (32-bit) floating-point elements.
+#[inline]
+#[rustc_legacy_const_generics(0, 1)]
+#[target_feature(enable = "amx-avx512,avx10.2")]
+#[cfg_attr(
+    all(test, not(target_vendor = "apple")),
+    assert_instr(tcvtrowd2ps, TILE = 0, ROW = 0)
+)]
+#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
+pub unsafe fn _tile_cvtrowd2psi<const TILE: i32, const ROW: i32>() -> __m512 {
+    static_assert_uimm_bits!(TILE, 3);
+    static_assert_uimm_bits!(ROW, 6);
+    tcvtrowd2psi(TILE as i8, ROW as u32).as_m512()
+}
+
+/// Moves a row from a tile register to a zmm register, converting the packed single-precision (32-bit)
+/// floating-point elements to packed half-precision (16-bit) floating-point elements. The resulting
+/// 16-bit elements are placed in the high 16-bits within each 32-bit element of the returned vector.
+#[inline]
+#[rustc_legacy_const_generics(0)]
+#[target_feature(enable = "amx-avx512,avx10.2")]
+#[cfg_attr(
+    all(test, not(target_vendor = "apple")),
+    assert_instr(tcvtrowps2phh, TILE = 0)
+)]
+#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
+pub unsafe fn _tile_cvtrowps2phh<const TILE: i32>(row: u32) -> __m512h {
+    static_assert_uimm_bits!(TILE, 3);
+    tcvtrowps2phh(TILE as i8, row).as_m512h()
+}
+
+/// Moves a row from a tile register to a zmm register, converting the packed single-precision (32-bit)
+/// floating-point elements to packed half-precision (16-bit) floating-point elements. The resulting
+/// 16-bit elements are placed in the high 16-bits within each 32-bit element of the returned vector.
+#[inline]
+#[rustc_legacy_const_generics(0, 1)]
+#[target_feature(enable = "amx-avx512,avx10.2")]
+#[cfg_attr(
+    all(test, not(target_vendor = "apple")),
+    assert_instr(tcvtrowps2phh, TILE = 0, ROW = 0)
+)]
+#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
+pub unsafe fn _tile_cvtrowps2phhi<const TILE: i32, const ROW: i32>() -> __m512h {
+    static_assert_uimm_bits!(TILE, 3);
+    static_assert_uimm_bits!(ROW, 6);
+    tcvtrowps2phhi(TILE as i8, ROW as u32).as_m512h()
+}
+
+/// Moves a row from a tile register to a zmm register, converting the packed single-precision (32-bit)
+/// floating-point elements to packed half-precision (16-bit) floating-point elements. The resulting
+/// 16-bit elements are placed in the low 16-bits within each 32-bit element of the returned vector.
+#[inline]
+#[rustc_legacy_const_generics(0)]
+#[target_feature(enable = "amx-avx512,avx10.2")]
+#[cfg_attr(
+    all(test, not(target_vendor = "apple")),
+    assert_instr(tcvtrowps2phl, TILE = 0)
+)]
+#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
+pub unsafe fn _tile_cvtrowps2phl<const TILE: i32>(row: u32) -> __m512h {
+    static_assert_uimm_bits!(TILE, 3);
+    tcvtrowps2phl(TILE as i8, row).as_m512h()
+}
+
+/// Moves a row from a tile register to a zmm register, converting the packed single-precision (32-bit)
+/// floating-point elements to packed half-precision (16-bit) floating-point elements. The resulting
+/// 16-bit elements are placed in the low 16-bits within each 32-bit element of the returned vector.
+#[inline]
+#[rustc_legacy_const_generics(0, 1)]
+#[target_feature(enable = "amx-avx512,avx10.2")]
+#[cfg_attr(
+    all(test, not(target_vendor = "apple")),
+    assert_instr(tcvtrowps2phl, TILE = 0, ROW = 0)
+)]
+#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
+pub unsafe fn _tile_cvtrowps2phli<const TILE: i32, const ROW: i32>() -> __m512h {
+    static_assert_uimm_bits!(TILE, 3);
+    static_assert_uimm_bits!(ROW, 6);
+    tcvtrowps2phli(TILE as i8, ROW as u32).as_m512h()
+}
+
+/// Moves a row from a tile register to a zmm register, converting the packed single-precision (32-bit)
+/// floating-point elements to packed BF16 (16-bit) floating-point elements. The resulting
+/// 16-bit elements are placed in the high 16-bits within each 32-bit element of the returned vector.
+#[inline]
+#[rustc_legacy_const_generics(0)]
+#[target_feature(enable = "amx-avx512,avx10.2")]
+#[cfg_attr(
+    all(test, not(target_vendor = "apple")),
+    assert_instr(tcvtrowps2bf16h, TILE = 0)
+)]
+#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
+pub unsafe fn _tile_cvtrowps2bf16h<const TILE: i32>(row: u32) -> __m512bh {
+    static_assert_uimm_bits!(TILE, 3);
+    tcvtrowps2bf16h(TILE as i8, row).as_m512bh()
+}
+
+/// Moves a row from a tile register to a zmm register, converting the packed single-precision (32-bit)
+/// floating-point elements to packed BF16 (16-bit) floating-point elements. The resulting
+/// 16-bit elements are placed in the high 16-bits within each 32-bit element of the returned vector.
+#[inline]
+#[rustc_legacy_const_generics(0, 1)]
+#[target_feature(enable = "amx-avx512,avx10.2")]
+#[cfg_attr(
+    all(test, not(target_vendor = "apple")),
+    assert_instr(tcvtrowps2bf16h, TILE = 0, ROW = 0)
+)]
+#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
+pub unsafe fn _tile_cvtrowps2bf16hi<const TILE: i32, const ROW: i32>() -> __m512bh {
+    static_assert_uimm_bits!(TILE, 3);
+    static_assert_uimm_bits!(ROW, 6);
+    tcvtrowps2bf16hi(TILE as i8, ROW as u32).as_m512bh()
+}
+
+/// Moves a row from a tile register to a zmm register, converting the packed single-precision (32-bit)
+/// floating-point elements to packed BF16 (16-bit) floating-point elements. The resulting
+/// 16-bit elements are placed in the low 16-bits within each 32-bit element of the returned vector.
+#[inline]
+#[rustc_legacy_const_generics(0)]
+#[target_feature(enable = "amx-avx512,avx10.2")]
+#[cfg_attr(
+    all(test, not(target_vendor = "apple")),
+    assert_instr(tcvtrowps2bf16l, TILE = 0)
+)]
+#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
+pub unsafe fn _tile_cvtrowps2bf16l<const TILE: i32>(row: u32) -> __m512bh {
+    static_assert_uimm_bits!(TILE, 3);
+    tcvtrowps2bf16l(TILE as i8, row).as_m512bh()
+}
+
+/// Moves a row from a tile register to a zmm register, converting the packed single-precision (32-bit)
+/// floating-point elements to packed BF16 (16-bit) floating-point elements. The resulting
+/// 16-bit elements are placed in the low 16-bits within each 32-bit element of the returned vector.
+#[inline]
+#[rustc_legacy_const_generics(0, 1)]
+#[target_feature(enable = "amx-avx512,avx10.2")]
+#[cfg_attr(
+    all(test, not(target_vendor = "apple")),
+    assert_instr(tcvtrowps2bf16l, TILE = 0, ROW = 0)
+)]
+#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
+pub unsafe fn _tile_cvtrowps2bf16li<const TILE: i32, const ROW: i32>() -> __m512bh {
+    static_assert_uimm_bits!(TILE, 3);
+    static_assert_uimm_bits!(ROW, 6);
+    tcvtrowps2bf16li(TILE as i8, ROW as u32).as_m512bh()
+}
+
+/// Moves one row of tile data into a zmm vector register
+#[inline]
+#[rustc_legacy_const_generics(0)]
+#[target_feature(enable = "amx-avx512,avx10.2")]
+#[cfg_attr(
+    all(test, not(target_vendor = "apple")),
+    assert_instr(tilemovrow, TILE = 0)
+)]
+#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
+pub unsafe fn _tile_movrow<const TILE: i32>(row: u32) -> __m512i {
+    static_assert_uimm_bits!(TILE, 3);
+    tilemovrow(TILE as i8, row).as_m512i()
+}
+
+/// Moves one row of tile data into a zmm vector register
+#[inline]
+#[rustc_legacy_const_generics(0, 1)]
+#[target_feature(enable = "amx-avx512,avx10.2")]
+#[cfg_attr(
+    all(test, not(target_vendor = "apple")),
+    assert_instr(tilemovrow, TILE = 0, ROW = 0)
+)]
+#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
+pub unsafe fn _tile_movrowi<const TILE: i32, const ROW: i32>() -> __m512i {
+    static_assert_uimm_bits!(TILE, 3);
+    static_assert_uimm_bits!(ROW, 6);
+    tilemovrowi(TILE as i8, ROW as u32).as_m512i()
+}
+
 #[allow(improper_ctypes)]
 unsafe extern "C" {
     #[link_name = "llvm.x86.ldtilecfg"]
@@ -274,19 +607,57 @@ unsafe extern "C" {
     fn tcmmimfp16ps(dst: i8, a: i8, b: i8);
     #[link_name = "llvm.x86.tcmmrlfp16ps"]
     fn tcmmrlfp16ps(dst: i8, a: i8, b: i8);
+    #[link_name = "llvm.x86.tdpbf8ps"]
+    fn tdpbf8ps(dst: i8, a: i8, b: i8);
+    #[link_name = "llvm.x86.tdpbhf8ps"]
+    fn tdpbhf8ps(dst: i8, a: i8, b: i8);
+    #[link_name = "llvm.x86.tdphbf8ps"]
+    fn tdphbf8ps(dst: i8, a: i8, b: i8);
+    #[link_name = "llvm.x86.tdphf8ps"]
+    fn tdphf8ps(dst: i8, a: i8, b: i8);
+    #[link_name = "llvm.x86.tileloaddrs64"]
+    fn tileloaddrs64(dst: i8, base: *const u8, stride: usize);
+    #[link_name = "llvm.x86.tileloaddrst164"]
+    fn tileloaddrst164(dst: i8, base: *const u8, stride: usize);
+    #[link_name = "llvm.x86.tmmultf32ps"]
+    fn tmmultf32ps(dst: i8, a: i8, b: i8);
+    #[link_name = "llvm.x86.tcvtrowd2ps"]
+    fn tcvtrowd2ps(tile: i8, row: u32) -> f32x16;
+    #[link_name = "llvm.x86.tcvtrowd2psi"]
+    fn tcvtrowd2psi(tile: i8, row: u32) -> f32x16;
+    #[link_name = "llvm.x86.tcvtrowps2phh"]
+    fn tcvtrowps2phh(tile: i8, row: u32) -> f16x32;
+    #[link_name = "llvm.x86.tcvtrowps2phhi"]
+    fn tcvtrowps2phhi(tile: i8, row: u32) -> f16x32;
+    #[link_name = "llvm.x86.tcvtrowps2phl"]
+    fn tcvtrowps2phl(tile: i8, row: u32) -> f16x32;
+    #[link_name = "llvm.x86.tcvtrowps2phli"]
+    fn tcvtrowps2phli(tile: i8, row: u32) -> f16x32;
+    #[link_name = "llvm.x86.tcvtrowps2bf16h"]
+    fn tcvtrowps2bf16h(tile: i8, row: u32) -> u16x32;
+    #[link_name = "llvm.x86.tcvtrowps2bf16hi"]
+    fn tcvtrowps2bf16hi(tile: i8, row: u32) -> u16x32;
+    #[link_name = "llvm.x86.tcvtrowps2bf16l"]
+    fn tcvtrowps2bf16l(tile: i8, row: u32) -> u16x32;
+    #[link_name = "llvm.x86.tcvtrowps2bf16li"]
+    fn tcvtrowps2bf16li(tile: i8, row: u32) -> u16x32;
+    #[link_name = "llvm.x86.tilemovrow"]
+    fn tilemovrow(tile: i8, row: u32) -> i32x16;
+    #[link_name = "llvm.x86.tilemovrowi"]
+    fn tilemovrowi(tile: i8, row: u32) -> i32x16;
 }
 
 #[cfg(test)]
 mod tests {
     use crate::core_arch::x86::_mm_cvtness_sbh;
     use crate::core_arch::x86_64::*;
-    use core::mem::transmute;
+    use core::{array, mem::transmute};
     use stdarch_test::simd_test;
     #[cfg(target_os = "linux")]
     use syscalls::{Sysno, syscall};
 
     #[allow(non_camel_case_types)]
-    #[repr(packed)]
+    #[repr(C, packed)]
     #[derive(Copy, Clone, Default, Debug, PartialEq)]
     struct __tilecfg {
         /// 0 `or` 1
@@ -357,266 +728,756 @@ mod tests {
     }
 
     #[simd_test(enable = "amx-tile")]
-    unsafe fn test_tile_loadconfig() {
-        let config = __tilecfg::default();
-        _tile_loadconfig(config.as_ptr());
-        _tile_release();
+    fn test_tile_loadconfig() {
+        unsafe {
+            let config = __tilecfg::default();
+            _tile_loadconfig(config.as_ptr());
+            _tile_release();
+        }
     }
 
     #[simd_test(enable = "amx-tile")]
-    unsafe fn test_tile_storeconfig() {
-        let config = __tilecfg::new(1, 0, [32; 8], [8; 8]);
-        _tile_loadconfig(config.as_ptr());
-        let mut _config = __tilecfg::default();
-        _tile_storeconfig(_config.as_mut_ptr());
-        _tile_release();
-        assert_eq!(config, _config);
+    fn test_tile_storeconfig() {
+        unsafe {
+            let config = __tilecfg::new(1, 0, [32; 8], [8; 8]);
+            _tile_loadconfig(config.as_ptr());
+            let mut _config = __tilecfg::default();
+            _tile_storeconfig(_config.as_mut_ptr());
+            _tile_release();
+            assert_eq!(config, _config);
+        }
     }
 
     #[simd_test(enable = "amx-tile")]
-    unsafe fn test_tile_zero() {
-        _init_amx();
-        let mut config = __tilecfg::default();
-        config.palette = 1;
-        config.colsb[0] = 64;
-        config.rows[0] = 16;
-        _tile_loadconfig(config.as_ptr());
-        _tile_zero::<0>();
-        let mut out = [[1_i8; 64]; 16];
-        _tile_stored::<0>(&mut out as *mut [i8; 64] as *mut u8, 64);
-        _tile_release();
-        assert_eq!(out, [[0; 64]; 16]);
+    fn test_tile_zero() {
+        unsafe {
+            _init_amx();
+            let mut config = __tilecfg::default();
+            config.palette = 1;
+            config.colsb[0] = 64;
+            config.rows[0] = 16;
+            _tile_loadconfig(config.as_ptr());
+            _tile_zero::<0>();
+            let mut out = [[1_i8; 64]; 16];
+            _tile_stored::<0>(&mut out as *mut [i8; 64] as *mut u8, 64);
+            _tile_release();
+            assert_eq!(out, [[0; 64]; 16]);
+        }
     }
 
     #[simd_test(enable = "amx-tile")]
-    unsafe fn test_tile_stored() {
-        _init_amx();
-        let mut config = __tilecfg::default();
-        config.palette = 1;
-        config.colsb[0] = 64;
-        config.rows[0] = 16;
-        _tile_loadconfig(config.as_ptr());
-        _tile_zero::<0>();
-        let mut out = [[1_i8; 64]; 16];
-        _tile_stored::<0>(&mut out as *mut [i8; 64] as *mut u8, 64);
-        _tile_release();
-        assert_eq!(out, [[0; 64]; 16]);
+    fn test_tile_stored() {
+        unsafe {
+            _init_amx();
+            let mut config = __tilecfg::default();
+            config.palette = 1;
+            config.colsb[0] = 64;
+            config.rows[0] = 16;
+            _tile_loadconfig(config.as_ptr());
+            _tile_zero::<0>();
+            let mut out = [[1_i8; 64]; 16];
+            _tile_stored::<0>(&mut out as *mut [i8; 64] as *mut u8, 64);
+            _tile_release();
+            assert_eq!(out, [[0; 64]; 16]);
+        }
     }
 
     #[simd_test(enable = "amx-tile")]
-    unsafe fn test_tile_loadd() {
-        _init_amx();
-        let mut config = __tilecfg::default();
-        config.palette = 1;
-        config.colsb[0] = 64;
-        config.rows[0] = 16;
-        _tile_loadconfig(config.as_ptr());
-        _tile_zero::<0>();
-        let mat = [1_i8; 1024];
-        _tile_loadd::<0>(&mat as *const i8 as *const u8, 64);
-        let mut out = [[0_i8; 64]; 16];
-        _tile_stored::<0>(&mut out as *mut [i8; 64] as *mut u8, 64);
-        _tile_release();
-        assert_eq!(out, [[1; 64]; 16]);
+    fn test_tile_loadd() {
+        unsafe {
+            _init_amx();
+            let mut config = __tilecfg::default();
+            config.palette = 1;
+            config.colsb[0] = 64;
+            config.rows[0] = 16;
+            _tile_loadconfig(config.as_ptr());
+            _tile_zero::<0>();
+            let mat = [1_i8; 1024];
+            _tile_loadd::<0>(&mat as *const i8 as *const u8, 64);
+            let mut out = [[0_i8; 64]; 16];
+            _tile_stored::<0>(&mut out as *mut [i8; 64] as *mut u8, 64);
+            _tile_release();
+            assert_eq!(out, [[1; 64]; 16]);
+        }
     }
 
     #[simd_test(enable = "amx-tile")]
-    unsafe fn test_tile_stream_loadd() {
-        _init_amx();
-        let mut config = __tilecfg::default();
-        config.palette = 1;
-        config.colsb[0] = 64;
-        config.rows[0] = 16;
-        _tile_loadconfig(config.as_ptr());
-        _tile_zero::<0>();
-        let mat = [1_i8; 1024];
-        _tile_stream_loadd::<0>(&mat as *const i8 as *const u8, 64);
-        let mut out = [[0_i8; 64]; 16];
-        _tile_stored::<0>(&mut out as *mut [i8; 64] as *mut u8, 64);
-        _tile_release();
-        assert_eq!(out, [[1; 64]; 16]);
+    fn test_tile_stream_loadd() {
+        unsafe {
+            _init_amx();
+            let mut config = __tilecfg::default();
+            config.palette = 1;
+            config.colsb[0] = 64;
+            config.rows[0] = 16;
+            _tile_loadconfig(config.as_ptr());
+            _tile_zero::<0>();
+            let mat = [1_i8; 1024];
+            _tile_stream_loadd::<0>(&mat as *const i8 as *const u8, 64);
+            let mut out = [[0_i8; 64]; 16];
+            _tile_stored::<0>(&mut out as *mut [i8; 64] as *mut u8, 64);
+            _tile_release();
+            assert_eq!(out, [[1; 64]; 16]);
+        }
     }
 
     #[simd_test(enable = "amx-tile")]
-    unsafe fn test_tile_release() {
-        _tile_release();
+    fn test_tile_release() {
+        unsafe {
+            _tile_release();
+        }
     }
 
     #[simd_test(enable = "amx-bf16,avx512f")]
-    unsafe fn test_tile_dpbf16ps() {
-        _init_amx();
-        let bf16_1: u16 = _mm_cvtness_sbh(1.0).to_bits();
-        let bf16_2: u16 = _mm_cvtness_sbh(2.0).to_bits();
-        let ones: [u8; 1024] = transmute([bf16_1; 512]);
-        let twos: [u8; 1024] = transmute([bf16_2; 512]);
-        let mut res = [[0f32; 16]; 16];
-        let mut config = __tilecfg::default();
-        config.palette = 1;
-        (0..=2).for_each(|i| {
-            config.colsb[i] = 64;
-            config.rows[i] = 16;
-        });
-        _tile_loadconfig(config.as_ptr());
-        _tile_zero::<0>();
-        _tile_loadd::<1>(&ones as *const u8, 64);
-        _tile_loadd::<2>(&twos as *const u8, 64);
-        _tile_dpbf16ps::<0, 1, 2>();
-        _tile_stored::<0>(&mut res as *mut [f32; 16] as *mut u8, 64);
-        _tile_release();
-        assert_eq!(res, [[64f32; 16]; 16]);
+    fn test_tile_dpbf16ps() {
+        unsafe {
+            _init_amx();
+            let bf16_1: u16 = _mm_cvtness_sbh(1.0).to_bits();
+            let bf16_2: u16 = _mm_cvtness_sbh(2.0).to_bits();
+            let ones: [u8; 1024] = transmute([bf16_1; 512]);
+            let twos: [u8; 1024] = transmute([bf16_2; 512]);
+            let mut res = [[0f32; 16]; 16];
+            let mut config = __tilecfg::default();
+            config.palette = 1;
+            (0..=2).for_each(|i| {
+                config.colsb[i] = 64;
+                config.rows[i] = 16;
+            });
+            _tile_loadconfig(config.as_ptr());
+            _tile_zero::<0>();
+            _tile_loadd::<1>(&ones as *const u8, 64);
+            _tile_loadd::<2>(&twos as *const u8, 64);
+            _tile_dpbf16ps::<0, 1, 2>();
+            _tile_stored::<0>(&mut res as *mut [f32; 16] as *mut u8, 64);
+            _tile_release();
+            assert_eq!(res, [[64f32; 16]; 16]);
+        }
     }
 
     #[simd_test(enable = "amx-int8")]
-    unsafe fn test_tile_dpbssd() {
-        _init_amx();
-        let ones = [-1_i8; 1024];
-        let twos = [-2_i8; 1024];
-        let mut res = [[0_i32; 16]; 16];
-        let mut config = __tilecfg::default();
-        config.palette = 1;
-        (0..=2).for_each(|i| {
-            config.colsb[i] = 64;
-            config.rows[i] = 16;
-        });
-        _tile_loadconfig(config.as_ptr());
-        _tile_zero::<0>();
-        _tile_loadd::<1>(&ones as *const i8 as *const u8, 64);
-        _tile_loadd::<2>(&twos as *const i8 as *const u8, 64);
-        _tile_dpbssd::<0, 1, 2>();
-        _tile_stored::<0>(&mut res as *mut [i32; 16] as *mut u8, 64);
-        _tile_release();
-        assert_eq!(res, [[128_i32; 16]; 16]);
+    fn test_tile_dpbssd() {
+        unsafe {
+            _init_amx();
+            let ones = [-1_i8; 1024];
+            let twos = [-2_i8; 1024];
+            let mut res = [[0_i32; 16]; 16];
+            let mut config = __tilecfg::default();
+            config.palette = 1;
+            (0..=2).for_each(|i| {
+                config.colsb[i] = 64;
+                config.rows[i] = 16;
+            });
+            _tile_loadconfig(config.as_ptr());
+            _tile_zero::<0>();
+            _tile_loadd::<1>(&ones as *const i8 as *const u8, 64);
+            _tile_loadd::<2>(&twos as *const i8 as *const u8, 64);
+            _tile_dpbssd::<0, 1, 2>();
+            _tile_stored::<0>(&mut res as *mut [i32; 16] as *mut u8, 64);
+            _tile_release();
+            assert_eq!(res, [[128_i32; 16]; 16]);
+        }
     }
 
     #[simd_test(enable = "amx-int8")]
-    unsafe fn test_tile_dpbsud() {
-        _init_amx();
-        let ones = [-1_i8; 1024];
-        let twos = [2_u8; 1024];
-        let mut res = [[0_i32; 16]; 16];
-        let mut config = __tilecfg::default();
-        config.palette = 1;
-        (0..=2).for_each(|i| {
-            config.colsb[i] = 64;
-            config.rows[i] = 16;
-        });
-        _tile_loadconfig(config.as_ptr());
-        _tile_zero::<0>();
-        _tile_loadd::<1>(&ones as *const i8 as *const u8, 64);
-        _tile_loadd::<2>(&twos as *const u8, 64);
-        _tile_dpbsud::<0, 1, 2>();
-        _tile_stored::<0>(&mut res as *mut [i32; 16] as *mut u8, 64);
-        _tile_release();
-        assert_eq!(res, [[-128_i32; 16]; 16]);
+    fn test_tile_dpbsud() {
+        unsafe {
+            _init_amx();
+            let ones = [-1_i8; 1024];
+            let twos = [2_u8; 1024];
+            let mut res = [[0_i32; 16]; 16];
+            let mut config = __tilecfg::default();
+            config.palette = 1;
+            (0..=2).for_each(|i| {
+                config.colsb[i] = 64;
+                config.rows[i] = 16;
+            });
+            _tile_loadconfig(config.as_ptr());
+            _tile_zero::<0>();
+            _tile_loadd::<1>(&ones as *const i8 as *const u8, 64);
+            _tile_loadd::<2>(&twos as *const u8, 64);
+            _tile_dpbsud::<0, 1, 2>();
+            _tile_stored::<0>(&mut res as *mut [i32; 16] as *mut u8, 64);
+            _tile_release();
+            assert_eq!(res, [[-128_i32; 16]; 16]);
+        }
     }
 
     #[simd_test(enable = "amx-int8")]
-    unsafe fn test_tile_dpbusd() {
-        _init_amx();
-        let ones = [1_u8; 1024];
-        let twos = [-2_i8; 1024];
-        let mut res = [[0_i32; 16]; 16];
-        let mut config = __tilecfg::default();
-        config.palette = 1;
-        (0..=2).for_each(|i| {
-            config.colsb[i] = 64;
-            config.rows[i] = 16;
-        });
-        _tile_loadconfig(config.as_ptr());
-        _tile_zero::<0>();
-        _tile_loadd::<1>(&ones as *const u8, 64);
-        _tile_loadd::<2>(&twos as *const i8 as *const u8, 64);
-        _tile_dpbusd::<0, 1, 2>();
-        _tile_stored::<0>(&mut res as *mut [i32; 16] as *mut u8, 64);
-        _tile_release();
-        assert_eq!(res, [[-128_i32; 16]; 16]);
+    fn test_tile_dpbusd() {
+        unsafe {
+            _init_amx();
+            let ones = [1_u8; 1024];
+            let twos = [-2_i8; 1024];
+            let mut res = [[0_i32; 16]; 16];
+            let mut config = __tilecfg::default();
+            config.palette = 1;
+            (0..=2).for_each(|i| {
+                config.colsb[i] = 64;
+                config.rows[i] = 16;
+            });
+            _tile_loadconfig(config.as_ptr());
+            _tile_zero::<0>();
+            _tile_loadd::<1>(&ones as *const u8, 64);
+            _tile_loadd::<2>(&twos as *const i8 as *const u8, 64);
+            _tile_dpbusd::<0, 1, 2>();
+            _tile_stored::<0>(&mut res as *mut [i32; 16] as *mut u8, 64);
+            _tile_release();
+            assert_eq!(res, [[-128_i32; 16]; 16]);
+        }
     }
 
     #[simd_test(enable = "amx-int8")]
-    unsafe fn test_tile_dpbuud() {
-        _init_amx();
-        let ones = [1_u8; 1024];
-        let twos = [2_u8; 1024];
-        let mut res = [[0_i32; 16]; 16];
-        let mut config = __tilecfg::default();
-        config.palette = 1;
-        (0..=2).for_each(|i| {
-            config.colsb[i] = 64;
-            config.rows[i] = 16;
-        });
-        _tile_loadconfig(config.as_ptr());
-        _tile_zero::<0>();
-        _tile_loadd::<1>(&ones as *const u8, 64);
-        _tile_loadd::<2>(&twos as *const u8, 64);
-        _tile_dpbuud::<0, 1, 2>();
-        _tile_stored::<0>(&mut res as *mut [i32; 16] as *mut u8, 64);
-        _tile_release();
-        assert_eq!(res, [[128_i32; 16]; 16]);
+    fn test_tile_dpbuud() {
+        unsafe {
+            _init_amx();
+            let ones = [1_u8; 1024];
+            let twos = [2_u8; 1024];
+            let mut res = [[0_i32; 16]; 16];
+            let mut config = __tilecfg::default();
+            config.palette = 1;
+            (0..=2).for_each(|i| {
+                config.colsb[i] = 64;
+                config.rows[i] = 16;
+            });
+            _tile_loadconfig(config.as_ptr());
+            _tile_zero::<0>();
+            _tile_loadd::<1>(&ones as *const u8, 64);
+            _tile_loadd::<2>(&twos as *const u8, 64);
+            _tile_dpbuud::<0, 1, 2>();
+            _tile_stored::<0>(&mut res as *mut [i32; 16] as *mut u8, 64);
+            _tile_release();
+            assert_eq!(res, [[128_i32; 16]; 16]);
+        }
     }
 
     #[simd_test(enable = "amx-fp16")]
-    unsafe fn test_tile_dpfp16ps() {
-        _init_amx();
-        let ones = [1f16; 512];
-        let twos = [2f16; 512];
-        let mut res = [[0f32; 16]; 16];
-        let mut config = __tilecfg::default();
-        config.palette = 1;
-        (0..=2).for_each(|i| {
-            config.colsb[i] = 64;
-            config.rows[i] = 16;
-        });
-        _tile_loadconfig(config.as_ptr());
-        _tile_zero::<0>();
-        _tile_loadd::<1>(&ones as *const f16 as *const u8, 64);
-        _tile_loadd::<2>(&twos as *const f16 as *const u8, 64);
-        _tile_dpfp16ps::<0, 1, 2>();
-        _tile_stored::<0>(&mut res as *mut [f32; 16] as *mut u8, 64);
-        _tile_release();
-        assert_eq!(res, [[64f32; 16]; 16]);
+    fn test_tile_dpfp16ps() {
+        unsafe {
+            _init_amx();
+            let ones = [1f16; 512];
+            let twos = [2f16; 512];
+            let mut res = [[0f32; 16]; 16];
+            let mut config = __tilecfg::default();
+            config.palette = 1;
+            (0..=2).for_each(|i| {
+                config.colsb[i] = 64;
+                config.rows[i] = 16;
+            });
+            _tile_loadconfig(config.as_ptr());
+            _tile_zero::<0>();
+            _tile_loadd::<1>(&ones as *const f16 as *const u8, 64);
+            _tile_loadd::<2>(&twos as *const f16 as *const u8, 64);
+            _tile_dpfp16ps::<0, 1, 2>();
+            _tile_stored::<0>(&mut res as *mut [f32; 16] as *mut u8, 64);
+            _tile_release();
+            assert_eq!(res, [[64f32; 16]; 16]);
+        }
     }
 
     #[simd_test(enable = "amx-complex")]
-    unsafe fn test_tile_cmmimfp16ps() {
-        _init_amx();
-        let ones = [1f16; 512];
-        let twos = [2f16; 512];
-        let mut res = [[0f32; 16]; 16];
-        let mut config = __tilecfg::default();
-        config.palette = 1;
-        (0..=2).for_each(|i| {
-            config.colsb[i] = 64;
-            config.rows[i] = 16;
-        });
-        _tile_loadconfig(config.as_ptr());
-        _tile_zero::<0>();
-        _tile_loadd::<1>(&ones as *const f16 as *const u8, 64);
-        _tile_loadd::<2>(&twos as *const f16 as *const u8, 64);
-        _tile_cmmimfp16ps::<0, 1, 2>();
-        _tile_stored::<0>(&mut res as *mut [f32; 16] as *mut u8, 64);
-        _tile_release();
-        assert_eq!(res, [[64f32; 16]; 16]);
+    fn test_tile_cmmimfp16ps() {
+        unsafe {
+            _init_amx();
+            let ones = [1f16; 512];
+            let twos = [2f16; 512];
+            let mut res = [[0f32; 16]; 16];
+            let mut config = __tilecfg::default();
+            config.palette = 1;
+            (0..=2).for_each(|i| {
+                config.colsb[i] = 64;
+                config.rows[i] = 16;
+            });
+            _tile_loadconfig(config.as_ptr());
+            _tile_zero::<0>();
+            _tile_loadd::<1>(&ones as *const f16 as *const u8, 64);
+            _tile_loadd::<2>(&twos as *const f16 as *const u8, 64);
+            _tile_cmmimfp16ps::<0, 1, 2>();
+            _tile_stored::<0>(&mut res as *mut [f32; 16] as *mut u8, 64);
+            _tile_release();
+            assert_eq!(res, [[64f32; 16]; 16]);
+        }
     }
 
     #[simd_test(enable = "amx-complex")]
-    unsafe fn test_tile_cmmrlfp16ps() {
-        _init_amx();
-        let ones = [1f16; 512];
-        let twos = [2f16; 512];
-        let mut res = [[0f32; 16]; 16];
-        let mut config = __tilecfg::default();
-        config.palette = 1;
-        (0..=2).for_each(|i| {
-            config.colsb[i] = 64;
-            config.rows[i] = 16;
-        });
-        _tile_loadconfig(config.as_ptr());
-        _tile_zero::<0>();
-        _tile_loadd::<1>(&ones as *const f16 as *const u8, 64);
-        _tile_loadd::<2>(&twos as *const f16 as *const u8, 64);
-        _tile_cmmrlfp16ps::<0, 1, 2>();
-        _tile_stored::<0>(&mut res as *mut [f32; 16] as *mut u8, 64);
-        _tile_release();
-        assert_eq!(res, [[0f32; 16]; 16]);
+    fn test_tile_cmmrlfp16ps() {
+        unsafe {
+            _init_amx();
+            let ones = [1f16; 512];
+            let twos = [2f16; 512];
+            let mut res = [[0f32; 16]; 16];
+            let mut config = __tilecfg::default();
+            config.palette = 1;
+            (0..=2).for_each(|i| {
+                config.colsb[i] = 64;
+                config.rows[i] = 16;
+            });
+            _tile_loadconfig(config.as_ptr());
+            _tile_zero::<0>();
+            _tile_loadd::<1>(&ones as *const f16 as *const u8, 64);
+            _tile_loadd::<2>(&twos as *const f16 as *const u8, 64);
+            _tile_cmmrlfp16ps::<0, 1, 2>();
+            _tile_stored::<0>(&mut res as *mut [f32; 16] as *mut u8, 64);
+            _tile_release();
+            assert_eq!(res, [[0f32; 16]; 16]);
+        }
+    }
+
+    const BF8_ONE: u8 = 0x3c;
+    const BF8_TWO: u8 = 0x40;
+    const HF8_ONE: u8 = 0x38;
+    const HF8_TWO: u8 = 0x40;
+
+    #[simd_test(enable = "amx-fp8")]
+    fn test_tile_dpbf8ps() {
+        unsafe {
+            _init_amx();
+            let ones = [BF8_ONE; 1024];
+            let twos = [BF8_TWO; 1024];
+            let mut res = [[0.0_f32; 16]; 16];
+            let mut config = __tilecfg::default();
+            config.palette = 1;
+            (0..=2).for_each(|i| {
+                config.colsb[i] = 64;
+                config.rows[i] = 16;
+            });
+            _tile_loadconfig(config.as_ptr());
+            _tile_zero::<0>();
+            _tile_loadd::<1>(&ones as *const u8, 64);
+            _tile_loadd::<2>(&twos as *const u8, 64);
+            _tile_dpbf8ps::<0, 1, 2>();
+            _tile_stored::<0>(res.as_mut_ptr().cast(), 64);
+            _tile_release();
+            assert_eq!(res, [[128.0_f32; 16]; 16]);
+        }
+    }
+
+    #[simd_test(enable = "amx-fp8")]
+    fn test_tile_dpbhf8ps() {
+        unsafe {
+            _init_amx();
+            let ones = [BF8_ONE; 1024];
+            let twos = [HF8_TWO; 1024];
+            let mut res = [[0.0_f32; 16]; 16];
+            let mut config = __tilecfg::default();
+            config.palette = 1;
+            (0..=2).for_each(|i| {
+                config.colsb[i] = 64;
+                config.rows[i] = 16;
+            });
+            _tile_loadconfig(config.as_ptr());
+            _tile_zero::<0>();
+            _tile_loadd::<1>(&ones as *const u8, 64);
+            _tile_loadd::<2>(&twos as *const u8, 64);
+            _tile_dpbhf8ps::<0, 1, 2>();
+            _tile_stored::<0>(res.as_mut_ptr().cast(), 64);
+            _tile_release();
+            assert_eq!(res, [[128.0_f32; 16]; 16]);
+        }
+    }
+
+    #[simd_test(enable = "amx-fp8")]
+    fn test_tile_dphbf8ps() {
+        unsafe {
+            _init_amx();
+            let ones = [HF8_ONE; 1024];
+            let twos = [BF8_TWO; 1024];
+            let mut res = [[0.0_f32; 16]; 16];
+            let mut config = __tilecfg::default();
+            config.palette = 1;
+            (0..=2).for_each(|i| {
+                config.colsb[i] = 64;
+                config.rows[i] = 16;
+            });
+            _tile_loadconfig(config.as_ptr());
+            _tile_zero::<0>();
+            _tile_loadd::<1>(&ones as *const u8, 64);
+            _tile_loadd::<2>(&twos as *const u8, 64);
+            _tile_dphbf8ps::<0, 1, 2>();
+            _tile_stored::<0>(res.as_mut_ptr().cast(), 64);
+            _tile_release();
+            assert_eq!(res, [[128.0_f32; 16]; 16]);
+        }
+    }
+
+    #[simd_test(enable = "amx-fp8")]
+    fn test_tile_dphf8ps() {
+        unsafe {
+            _init_amx();
+            let ones = [HF8_ONE; 1024];
+            let twos = [HF8_TWO; 1024];
+            let mut res = [[0.0_f32; 16]; 16];
+            let mut config = __tilecfg::default();
+            config.palette = 1;
+            (0..=2).for_each(|i| {
+                config.colsb[i] = 64;
+                config.rows[i] = 16;
+            });
+            _tile_loadconfig(config.as_ptr());
+            _tile_zero::<0>();
+            _tile_loadd::<1>(&ones as *const u8, 64);
+            _tile_loadd::<2>(&twos as *const u8, 64);
+            _tile_dphf8ps::<0, 1, 2>();
+            _tile_stored::<0>(res.as_mut_ptr().cast(), 64);
+            _tile_release();
+            assert_eq!(res, [[128.0_f32; 16]; 16]);
+        }
+    }
+
+    #[simd_test(enable = "amx-movrs")]
+    fn test_tile_loaddrs() {
+        unsafe {
+            _init_amx();
+            let mut config = __tilecfg::default();
+            config.palette = 1;
+            config.colsb[0] = 64;
+            config.rows[0] = 16;
+            _tile_loadconfig(config.as_ptr());
+            _tile_zero::<0>();
+            let mat = [1_i8; 1024];
+            _tile_loaddrs::<0>(&mat as *const i8 as *const u8, 64);
+            let mut out = [[0_i8; 64]; 16];
+            _tile_stored::<0>(&mut out as *mut [i8; 64] as *mut u8, 64);
+            _tile_release();
+            assert_eq!(out, [[1; 64]; 16]);
+        }
+    }
+
+    #[simd_test(enable = "amx-movrs")]
+    fn test_tile_stream_loaddrs() {
+        unsafe {
+            _init_amx();
+            let mut config = __tilecfg::default();
+            config.palette = 1;
+            config.colsb[0] = 64;
+            config.rows[0] = 16;
+            _tile_loadconfig(config.as_ptr());
+            _tile_zero::<0>();
+            let mat = [1_i8; 1024];
+            _tile_stream_loaddrs::<0>(&mat as *const i8 as *const u8, 64);
+            let mut out = [[0_i8; 64]; 16];
+            _tile_stored::<0>(&mut out as *mut [i8; 64] as *mut u8, 64);
+            _tile_release();
+            assert_eq!(out, [[1; 64]; 16]);
+        }
+    }
+
+    #[simd_test(enable = "amx-avx512,avx10.2")]
+    fn test_tile_movrow() {
+        unsafe {
+            _init_amx();
+            let array: [[u8; 64]; 16] = array::from_fn(|i| [i as _; _]);
+
+            let mut config = __tilecfg::default();
+            config.palette = 1;
+            config.colsb[0] = 64;
+            config.rows[0] = 16;
+            _tile_loadconfig(config.as_ptr());
+            _tile_loadd::<0>(array.as_ptr().cast(), 64);
+            for i in 0..16 {
+                let row = _tile_movrow::<0>(i);
+                assert_eq!(*row.as_u8x64().as_array(), [i as _; _]);
+            }
+        }
+    }
+
+    macro_rules! wrap_imm4 {
+        ($name:ident :: <$TILE:literal>, $row:expr) => {
+            match $row {
+                0 => $name::<$TILE, 0>(),
+                1 => $name::<$TILE, 1>(),
+                2 => $name::<$TILE, 2>(),
+                3 => $name::<$TILE, 3>(),
+                4 => $name::<$TILE, 4>(),
+                5 => $name::<$TILE, 5>(),
+                6 => $name::<$TILE, 6>(),
+                7 => $name::<$TILE, 7>(),
+                8 => $name::<$TILE, 8>(),
+                9 => $name::<$TILE, 9>(),
+                10 => $name::<$TILE, 10>(),
+                11 => $name::<$TILE, 11>(),
+                12 => $name::<$TILE, 12>(),
+                13 => $name::<$TILE, 13>(),
+                14 => $name::<$TILE, 14>(),
+                15 => $name::<$TILE, 15>(),
+                _ => panic!("row index out of range"),
+            }
+        };
+    }
+
+    #[simd_test(enable = "amx-avx512,avx10.2")]
+    fn test_tile_movrowi() {
+        unsafe {
+            _init_amx();
+            let array: [[u8; 64]; 16] = array::from_fn(|i| [i as _; _]);
+
+            let mut config = __tilecfg::default();
+            config.palette = 1;
+            config.colsb[0] = 64;
+            config.rows[0] = 16;
+            _tile_loadconfig(config.as_ptr());
+            _tile_loadd::<0>(array.as_ptr().cast(), 64);
+
+            for i in 0..16 {
+                let row = wrap_imm4!(_tile_movrowi::<0>, i);
+                assert_eq!(*row.as_u8x64().as_array(), [i as _; _]);
+            }
+        }
+    }
+
+    #[simd_test(enable = "amx-avx512,avx10.2")]
+    fn test_tile_cvtrowd2ps() {
+        unsafe {
+            _init_amx();
+            let array: [[u32; 16]; 16] = array::from_fn(|i| [i as _; _]);
+
+            let mut config = __tilecfg::default();
+            config.palette = 1;
+            config.colsb[0] = 64;
+            config.rows[0] = 16;
+            _tile_loadconfig(config.as_ptr());
+            _tile_loadd::<0>(array.as_ptr().cast(), 64);
+            for i in 0..16 {
+                let row = _tile_cvtrowd2ps::<0>(i);
+                assert_eq!(*row.as_f32x16().as_array(), [i as _; _]);
+            }
+        }
+    }
+
+    #[simd_test(enable = "amx-avx512,avx10.2")]
+    fn test_tile_cvtrowd2psi() {
+        unsafe {
+            _init_amx();
+            let array: [[u32; 16]; 16] = array::from_fn(|i| [i as _; _]);
+
+            let mut config = __tilecfg::default();
+            config.palette = 1;
+            config.colsb[0] = 64;
+            config.rows[0] = 16;
+            _tile_loadconfig(config.as_ptr());
+            _tile_loadd::<0>(array.as_ptr().cast(), 64);
+
+            for i in 0..16 {
+                let row = wrap_imm4!(_tile_cvtrowd2psi::<0>, i);
+                assert_eq!(*row.as_f32x16().as_array(), [i as _; _]);
+            }
+        }
+    }
+
+    #[simd_test(enable = "amx-avx512,avx10.2")]
+    fn test_tile_cvtrowps2phh() {
+        unsafe {
+            _init_amx();
+            let array: [[f32; 16]; 16] = array::from_fn(|i| [i as _; _]);
+
+            let mut config = __tilecfg::default();
+            config.palette = 1;
+            config.colsb[0] = 64;
+            config.rows[0] = 16;
+            _tile_loadconfig(config.as_ptr());
+            _tile_loadd::<0>(array.as_ptr().cast(), 64);
+            for i in 0..16 {
+                let row = _tile_cvtrowps2phh::<0>(i);
+                assert_eq!(
+                    *row.as_f16x32().as_array(),
+                    array::from_fn(|j| if j & 1 == 0 { 0.0 } else { i as _ })
+                );
+            }
+        }
+    }
+
+    #[simd_test(enable = "amx-avx512,avx10.2")]
+    fn test_tile_cvtrowps2phhi() {
+        unsafe {
+            _init_amx();
+            let array: [[f32; 16]; 16] = array::from_fn(|i| [i as _; _]);
+
+            let mut config = __tilecfg::default();
+            config.palette = 1;
+            config.colsb[0] = 64;
+            config.rows[0] = 16;
+            _tile_loadconfig(config.as_ptr());
+            _tile_loadd::<0>(array.as_ptr().cast(), 64);
+            for i in 0..16 {
+                let row = wrap_imm4!(_tile_cvtrowps2phhi::<0>, i);
+                assert_eq!(
+                    *row.as_f16x32().as_array(),
+                    array::from_fn(|j| if j & 1 == 0 { 0.0 } else { i as _ })
+                );
+            }
+        }
+    }
+
+    #[simd_test(enable = "amx-avx512,avx10.2")]
+    fn test_tile_cvtrowps2phl() {
+        unsafe {
+            _init_amx();
+            let array: [[f32; 16]; 16] = array::from_fn(|i| [i as _; _]);
+
+            let mut config = __tilecfg::default();
+            config.palette = 1;
+            config.colsb[0] = 64;
+            config.rows[0] = 16;
+            _tile_loadconfig(config.as_ptr());
+            _tile_loadd::<0>(array.as_ptr().cast(), 64);
+            for i in 0..16 {
+                let row = _tile_cvtrowps2phl::<0>(i);
+                assert_eq!(
+                    *row.as_f16x32().as_array(),
+                    array::from_fn(|j| if j & 1 == 0 { i as _ } else { 0.0 })
+                );
+            }
+        }
+    }
+
+    #[simd_test(enable = "amx-avx512,avx10.2")]
+    fn test_tile_cvtrowps2phli() {
+        unsafe {
+            _init_amx();
+            let array: [[f32; 16]; 16] = array::from_fn(|i| [i as _; _]);
+
+            let mut config = __tilecfg::default();
+            config.palette = 1;
+            config.colsb[0] = 64;
+            config.rows[0] = 16;
+            _tile_loadconfig(config.as_ptr());
+            _tile_loadd::<0>(array.as_ptr().cast(), 64);
+            for i in 0..16 {
+                let row = wrap_imm4!(_tile_cvtrowps2phli::<0>, i);
+                assert_eq!(
+                    *row.as_f16x32().as_array(),
+                    array::from_fn(|j| if j & 1 == 0 { i as _ } else { 0.0 })
+                );
+            }
+        }
+    }
+
+    #[simd_test(enable = "amx-avx512,avx10.2")]
+    fn test_tile_cvtrowps2bf16h() {
+        unsafe {
+            _init_amx();
+            let array: [[f32; 16]; 16] = array::from_fn(|i| [i as _; _]);
+
+            let mut config = __tilecfg::default();
+            config.palette = 1;
+            config.colsb[0] = 64;
+            config.rows[0] = 16;
+            _tile_loadconfig(config.as_ptr());
+            _tile_loadd::<0>(array.as_ptr().cast(), 64);
+            for i in 0..16 {
+                let row = _tile_cvtrowps2bf16h::<0>(i);
+                assert_eq!(
+                    *row.as_u16x32().as_array(),
+                    array::from_fn(|j| if j & 1 == 0 {
+                        0
+                    } else {
+                        _mm_cvtness_sbh(i as _).to_bits()
+                    })
+                );
+            }
+        }
+    }
+
+    #[simd_test(enable = "amx-avx512,avx10.2")]
+    fn test_tile_cvtrowps2bf16hi() {
+        unsafe {
+            _init_amx();
+            let array: [[f32; 16]; 16] = array::from_fn(|i| [i as _; _]);
+
+            let mut config = __tilecfg::default();
+            config.palette = 1;
+            config.colsb[0] = 64;
+            config.rows[0] = 16;
+            _tile_loadconfig(config.as_ptr());
+            _tile_loadd::<0>(array.as_ptr().cast(), 64);
+            for i in 0..16 {
+                let row = wrap_imm4!(_tile_cvtrowps2bf16hi::<0>, i);
+                assert_eq!(
+                    *row.as_u16x32().as_array(),
+                    array::from_fn(|j| if j & 1 == 0 {
+                        0
+                    } else {
+                        _mm_cvtness_sbh(i as _).to_bits()
+                    })
+                );
+            }
+        }
+    }
+
+    #[simd_test(enable = "amx-avx512,avx10.2")]
+    fn test_tile_cvtrowps2bf16l() {
+        unsafe {
+            _init_amx();
+            let array: [[f32; 16]; 16] = array::from_fn(|i| [i as _; _]);
+
+            let mut config = __tilecfg::default();
+            config.palette = 1;
+            config.colsb[0] = 64;
+            config.rows[0] = 16;
+            _tile_loadconfig(config.as_ptr());
+            _tile_loadd::<0>(array.as_ptr().cast(), 64);
+            for i in 0..16 {
+                let row = _tile_cvtrowps2bf16l::<0>(i);
+                assert_eq!(
+                    *row.as_u16x32().as_array(),
+                    array::from_fn(|j| if j & 1 == 0 {
+                        _mm_cvtness_sbh(i as _).to_bits()
+                    } else {
+                        0
+                    })
+                );
+            }
+        }
+    }
+
+    #[simd_test(enable = "amx-avx512,avx10.2")]
+    fn test_tile_cvtrowps2bf16li() {
+        unsafe {
+            _init_amx();
+            let array: [[f32; 16]; 16] = array::from_fn(|i| [i as _; _]);
+
+            let mut config = __tilecfg::default();
+            config.palette = 1;
+            config.colsb[0] = 64;
+            config.rows[0] = 16;
+            _tile_loadconfig(config.as_ptr());
+            _tile_loadd::<0>(array.as_ptr().cast(), 64);
+            for i in 0..16 {
+                let row = wrap_imm4!(_tile_cvtrowps2bf16li::<0>, i);
+                assert_eq!(
+                    *row.as_u16x32().as_array(),
+                    array::from_fn(|j| if j & 1 == 0 {
+                        _mm_cvtness_sbh(i as _).to_bits()
+                    } else {
+                        0
+                    })
+                );
+            }
+        }
+    }
+
+    #[simd_test(enable = "amx-tf32")]
+    fn test_tile_mmultf32ps() {
+        unsafe {
+            _init_amx();
+            let a: [[f32; 16]; 16] = array::from_fn(|i| [i as _; _]);
+            let b: [[f32; 16]; 16] = [array::from_fn(|j| j as _); _];
+            let mut res = [[0.0; 16]; 16];
+
+            let mut config = __tilecfg::default();
+            config.palette = 1;
+            (0..=2).for_each(|i| {
+                config.colsb[i] = 64;
+                config.rows[i] = 16;
+            });
+            _tile_loadconfig(config.as_ptr());
+            _tile_zero::<0>();
+            _tile_loadd::<1>(a.as_ptr().cast(), 64);
+            _tile_loadd::<2>(b.as_ptr().cast(), 64);
+            _tile_mmultf32ps::<0, 1, 2>();
+            _tile_stored::<0>(res.as_mut_ptr().cast(), 64);
+            _tile_release();
+
+            let expected = array::from_fn(|i| array::from_fn(|j| 16.0 * i as f32 * j as f32));
+            assert_eq!(res, expected);
+        }
     }
 }
diff --git a/crates/core_arch/src/x86_64/avx.rs b/crates/core_arch/src/x86_64/avx.rs
index b494385e4a..b626c1a592 100644
--- a/crates/core_arch/src/x86_64/avx.rs
+++ b/crates/core_arch/src/x86_64/avx.rs
@@ -9,8 +9,8 @@
 //!
 //! [Wikipedia][wiki] provides a quick overview of the instructions available.
 //!
-//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
-//! [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf
+//! [intel64_ref]: https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
+//! [amd64_ref]: https://docs.amd.com/v/u/en-US/24594_3.37
 //! [wiki]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions
 
 use crate::{core_arch::x86::*, mem::transmute};
@@ -24,7 +24,8 @@ use crate::{core_arch::x86::*, mem::transmute};
 #[target_feature(enable = "avx")]
 // This intrinsic has no corresponding instruction.
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_insert_epi64<const INDEX: i32>(a: __m256i, i: i64) -> __m256i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_insert_epi64<const INDEX: i32>(a: __m256i, i: i64) -> __m256i {
     static_assert_uimm_bits!(INDEX, 2);
     unsafe { transmute(simd_insert!(a.as_i64x4(), INDEX as u32, i)) }
 }
@@ -37,19 +38,21 @@ pub fn _mm256_insert_epi64<const INDEX: i32>(a: __m256i, i: i64) -> __m256i {
 #[rustc_legacy_const_generics(1)]
 // This intrinsic has no corresponding instruction.
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm256_extract_epi64<const INDEX: i32>(a: __m256i) -> i64 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm256_extract_epi64<const INDEX: i32>(a: __m256i) -> i64 {
     static_assert_uimm_bits!(INDEX, 2);
     unsafe { simd_extract!(a.as_i64x4(), INDEX as u32) }
 }
 
 #[cfg(test)]
 mod tests {
+    use crate::core_arch::assert_eq_const as assert_eq;
     use stdarch_test::simd_test;
 
     use crate::core_arch::arch::x86_64::*;
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_insert_epi64() {
+    const fn test_mm256_insert_epi64() {
         let a = _mm256_setr_epi64x(1, 2, 3, 4);
         let r = _mm256_insert_epi64::<3>(a, 0);
         let e = _mm256_setr_epi64x(1, 2, 3, 0);
@@ -57,7 +60,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx")]
-    unsafe fn test_mm256_extract_epi64() {
+    const fn test_mm256_extract_epi64() {
         let a = _mm256_setr_epi64x(0, 1, 2, 3);
         let r = _mm256_extract_epi64::<3>(a);
         assert_eq!(r, 3);
diff --git a/crates/core_arch/src/x86_64/avx512bw.rs b/crates/core_arch/src/x86_64/avx512bw.rs
index 466c36ef31..3450f6e194 100644
--- a/crates/core_arch/src/x86_64/avx512bw.rs
+++ b/crates/core_arch/src/x86_64/avx512bw.rs
@@ -6,7 +6,8 @@ use crate::core_arch::x86::*;
 #[inline]
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _cvtmask64_u64(a: __mmask64) -> u64 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _cvtmask64_u64(a: __mmask64) -> u64 {
     a
 }
 
@@ -16,19 +17,21 @@ pub fn _cvtmask64_u64(a: __mmask64) -> u64 {
 #[inline]
 #[target_feature(enable = "avx512bw")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
-pub fn _cvtu64_mask64(a: u64) -> __mmask64 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _cvtu64_mask64(a: u64) -> __mmask64 {
     a
 }
 
 #[cfg(test)]
 mod tests {
+    use crate::core_arch::assert_eq_const as assert_eq;
 
     use stdarch_test::simd_test;
 
     use crate::core_arch::{x86::*, x86_64::*};
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_cvtmask64_u64() {
+    const fn test_cvtmask64_u64() {
         let a: __mmask64 = 0b11001100_00110011_01100110_10011001;
         let r = _cvtmask64_u64(a);
         let e: u64 = 0b11001100_00110011_01100110_10011001;
@@ -36,7 +39,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512bw")]
-    unsafe fn test_cvtu64_mask64() {
+    const fn test_cvtu64_mask64() {
         let a: u64 = 0b11001100_00110011_01100110_10011001;
         let r = _cvtu64_mask64(a);
         let e: __mmask64 = 0b11001100_00110011_01100110_10011001;
diff --git a/crates/core_arch/src/x86_64/avx512f.rs b/crates/core_arch/src/x86_64/avx512f.rs
index 934c9e2812..0fd9b09363 100644
--- a/crates/core_arch/src/x86_64/avx512f.rs
+++ b/crates/core_arch/src/x86_64/avx512f.rs
@@ -57,7 +57,8 @@ pub fn _mm_cvtsd_u64(a: __m128d) -> u64 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vcvtsi2ss))]
-pub fn _mm_cvti64_ss(a: __m128, b: i64) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cvti64_ss(a: __m128, b: i64) -> __m128 {
     unsafe {
         let b = b as f32;
         simd_insert!(a, 0, b)
@@ -71,7 +72,8 @@ pub fn _mm_cvti64_ss(a: __m128, b: i64) -> __m128 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vcvtsi2sd))]
-pub fn _mm_cvti64_sd(a: __m128d, b: i64) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cvti64_sd(a: __m128d, b: i64) -> __m128d {
     unsafe {
         let b = b as f64;
         simd_insert!(a, 0, b)
@@ -85,7 +87,8 @@ pub fn _mm_cvti64_sd(a: __m128d, b: i64) -> __m128d {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vcvtusi2ss))]
-pub fn _mm_cvtu64_ss(a: __m128, b: u64) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cvtu64_ss(a: __m128, b: u64) -> __m128 {
     unsafe {
         let b = b as f32;
         simd_insert!(a, 0, b)
@@ -99,7 +102,8 @@ pub fn _mm_cvtu64_ss(a: __m128, b: u64) -> __m128 {
 #[target_feature(enable = "avx512f")]
 #[stable(feature = "stdarch_x86_avx512", since = "1.89")]
 #[cfg_attr(test, assert_instr(vcvtusi2sd))]
-pub fn _mm_cvtu64_sd(a: __m128d, b: u64) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cvtu64_sd(a: __m128d, b: u64) -> __m128d {
     unsafe {
         let b = b as f64;
         simd_insert!(a, 0, b)
@@ -554,6 +558,7 @@ unsafe extern "C" {
 
 #[cfg(test)]
 mod tests {
+    use crate::core_arch::assert_eq_const as assert_eq;
 
     use stdarch_test::simd_test;
 
@@ -562,7 +567,7 @@ mod tests {
     use crate::hint::black_box;
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_abs_epi64() {
+    const fn test_mm512_abs_epi64() {
         let a = _mm512_set_epi64(0, 1, -1, i64::MAX, i64::MIN, 100, -100, -32);
         let r = _mm512_abs_epi64(a);
         let e = _mm512_set_epi64(0, 1, 1, i64::MAX, i64::MAX.wrapping_add(1), 100, 100, 32);
@@ -570,7 +575,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_abs_epi64() {
+    const fn test_mm512_mask_abs_epi64() {
         let a = _mm512_set_epi64(0, 1, -1, i64::MAX, i64::MIN, 100, -100, -32);
         let r = _mm512_mask_abs_epi64(a, 0, a);
         assert_eq_m512i(r, a);
@@ -580,7 +585,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_abs_epi64() {
+    const fn test_mm512_maskz_abs_epi64() {
         let a = _mm512_set_epi64(0, 1, -1, i64::MAX, i64::MIN, 100, -100, -32);
         let r = _mm512_maskz_abs_epi64(0, a);
         assert_eq_m512i(r, _mm512_setzero_si512());
@@ -590,7 +595,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_abs_epi64() {
+    const fn test_mm256_abs_epi64() {
         let a = _mm256_set_epi64x(i64::MAX, i64::MIN, 100, -100);
         let r = _mm256_abs_epi64(a);
         let e = _mm256_set_epi64x(i64::MAX, i64::MAX.wrapping_add(1), 100, 100);
@@ -598,7 +603,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_abs_epi64() {
+    const fn test_mm256_mask_abs_epi64() {
         let a = _mm256_set_epi64x(i64::MAX, i64::MIN, 100, -100);
         let r = _mm256_mask_abs_epi64(a, 0, a);
         assert_eq_m256i(r, a);
@@ -608,7 +613,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_abs_epi64() {
+    const fn test_mm256_maskz_abs_epi64() {
         let a = _mm256_set_epi64x(i64::MAX, i64::MIN, 100, -100);
         let r = _mm256_maskz_abs_epi64(0, a);
         assert_eq_m256i(r, _mm256_setzero_si256());
@@ -618,7 +623,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_abs_epi64() {
+    const fn test_mm_abs_epi64() {
         let a = _mm_set_epi64x(i64::MAX, i64::MIN);
         let r = _mm_abs_epi64(a);
         let e = _mm_set_epi64x(i64::MAX, i64::MAX.wrapping_add(1));
@@ -630,7 +635,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_abs_epi64() {
+    const fn test_mm_mask_abs_epi64() {
         let a = _mm_set_epi64x(i64::MAX, i64::MIN);
         let r = _mm_mask_abs_epi64(a, 0, a);
         assert_eq_m128i(r, a);
@@ -644,7 +649,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_abs_epi64() {
+    const fn test_mm_maskz_abs_epi64() {
         let a = _mm_set_epi64x(i64::MAX, i64::MIN);
         let r = _mm_maskz_abs_epi64(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -658,7 +663,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_abs_pd() {
+    const fn test_mm512_abs_pd() {
         let a = _mm512_setr_pd(0., 1., -1., f64::MAX, f64::MIN, 100., -100., -32.);
         let r = _mm512_abs_pd(a);
         let e = _mm512_setr_pd(0., 1., 1., f64::MAX, f64::MAX, 100., 100., 32.);
@@ -666,7 +671,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_abs_pd() {
+    const fn test_mm512_mask_abs_pd() {
         let a = _mm512_setr_pd(0., 1., -1., f64::MAX, f64::MIN, 100., -100., -32.);
         let r = _mm512_mask_abs_pd(a, 0, a);
         assert_eq_m512d(r, a);
@@ -676,7 +681,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_mov_epi64() {
+    const fn test_mm512_mask_mov_epi64() {
         let src = _mm512_set1_epi64(1);
         let a = _mm512_set1_epi64(2);
         let r = _mm512_mask_mov_epi64(src, 0, a);
@@ -686,7 +691,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_mov_epi64() {
+    const fn test_mm512_maskz_mov_epi64() {
         let a = _mm512_set1_epi64(2);
         let r = _mm512_maskz_mov_epi64(0, a);
         assert_eq_m512i(r, _mm512_setzero_si512());
@@ -695,7 +700,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_mov_epi64() {
+    const fn test_mm256_mask_mov_epi64() {
         let src = _mm256_set1_epi64x(1);
         let a = _mm256_set1_epi64x(2);
         let r = _mm256_mask_mov_epi64(src, 0, a);
@@ -705,7 +710,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_mov_epi64() {
+    const fn test_mm256_maskz_mov_epi64() {
         let a = _mm256_set1_epi64x(2);
         let r = _mm256_maskz_mov_epi64(0, a);
         assert_eq_m256i(r, _mm256_setzero_si256());
@@ -714,7 +719,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_mov_epi64() {
+    const fn test_mm_mask_mov_epi64() {
         let src = _mm_set1_epi64x(1);
         let a = _mm_set1_epi64x(2);
         let r = _mm_mask_mov_epi64(src, 0, a);
@@ -724,7 +729,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_mov_epi64() {
+    const fn test_mm_maskz_mov_epi64() {
         let a = _mm_set1_epi64x(2);
         let r = _mm_maskz_mov_epi64(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -733,7 +738,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_mov_pd() {
+    const fn test_mm512_mask_mov_pd() {
         let src = _mm512_set1_pd(1.);
         let a = _mm512_set1_pd(2.);
         let r = _mm512_mask_mov_pd(src, 0, a);
@@ -743,7 +748,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_mov_pd() {
+    const fn test_mm512_maskz_mov_pd() {
         let a = _mm512_set1_pd(2.);
         let r = _mm512_maskz_mov_pd(0, a);
         assert_eq_m512d(r, _mm512_setzero_pd());
@@ -752,7 +757,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_mov_pd() {
+    const fn test_mm256_mask_mov_pd() {
         let src = _mm256_set1_pd(1.);
         let a = _mm256_set1_pd(2.);
         let r = _mm256_mask_mov_pd(src, 0, a);
@@ -762,7 +767,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_mov_pd() {
+    const fn test_mm256_maskz_mov_pd() {
         let a = _mm256_set1_pd(2.);
         let r = _mm256_maskz_mov_pd(0, a);
         assert_eq_m256d(r, _mm256_setzero_pd());
@@ -771,7 +776,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_mov_pd() {
+    const fn test_mm_mask_mov_pd() {
         let src = _mm_set1_pd(1.);
         let a = _mm_set1_pd(2.);
         let r = _mm_mask_mov_pd(src, 0, a);
@@ -781,7 +786,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_mov_pd() {
+    const fn test_mm_maskz_mov_pd() {
         let a = _mm_set1_pd(2.);
         let r = _mm_maskz_mov_pd(0, a);
         assert_eq_m128d(r, _mm_setzero_pd());
@@ -790,7 +795,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_add_epi64() {
+    const fn test_mm512_add_epi64() {
         let a = _mm512_setr_epi64(0, 1, -1, i64::MAX, i64::MIN, 100, -100, -32);
         let b = _mm512_set1_epi64(1);
         let r = _mm512_add_epi64(a, b);
@@ -799,7 +804,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_add_epi64() {
+    const fn test_mm512_mask_add_epi64() {
         let a = _mm512_setr_epi64(0, 1, -1, i64::MAX, i64::MIN, 100, -100, -32);
         let b = _mm512_set1_epi64(1);
         let r = _mm512_mask_add_epi64(a, 0, a, b);
@@ -810,7 +815,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_add_epi64() {
+    const fn test_mm512_maskz_add_epi64() {
         let a = _mm512_setr_epi64(0, 1, -1, i64::MAX, i64::MIN, 100, -100, -32);
         let b = _mm512_set1_epi64(1);
         let r = _mm512_maskz_add_epi64(0, a, b);
@@ -821,7 +826,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_add_epi64() {
+    const fn test_mm256_mask_add_epi64() {
         let a = _mm256_set_epi64x(1, -1, i64::MAX, i64::MIN);
         let b = _mm256_set1_epi64x(1);
         let r = _mm256_mask_add_epi64(a, 0, a, b);
@@ -832,7 +837,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_add_epi64() {
+    const fn test_mm256_maskz_add_epi64() {
         let a = _mm256_set_epi64x(1, -1, i64::MAX, i64::MIN);
         let b = _mm256_set1_epi64x(1);
         let r = _mm256_maskz_add_epi64(0, a, b);
@@ -843,7 +848,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_add_epi64() {
+    const fn test_mm_mask_add_epi64() {
         let a = _mm_set_epi64x(i64::MAX, i64::MIN);
         let b = _mm_set1_epi64x(1);
         let r = _mm_mask_add_epi64(a, 0, a, b);
@@ -854,7 +859,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_add_epi64() {
+    const fn test_mm_maskz_add_epi64() {
         let a = _mm_set_epi64x(i64::MAX, i64::MIN);
         let b = _mm_set1_epi64x(1);
         let r = _mm_maskz_add_epi64(0, a, b);
@@ -865,7 +870,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_add_pd() {
+    const fn test_mm512_add_pd() {
         let a = _mm512_setr_pd(0., 1., -1., f64::MAX, f64::MIN, 100., -100., -32.);
         let b = _mm512_set1_pd(1.);
         let r = _mm512_add_pd(a, b);
@@ -874,7 +879,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_add_pd() {
+    const fn test_mm512_mask_add_pd() {
         let a = _mm512_setr_pd(0., 1., -1., f64::MAX, f64::MIN, 100., -100., -32.);
         let b = _mm512_set1_pd(1.);
         let r = _mm512_mask_add_pd(a, 0, a, b);
@@ -885,7 +890,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_add_pd() {
+    const fn test_mm512_maskz_add_pd() {
         let a = _mm512_setr_pd(0., 1., -1., f64::MAX, f64::MIN, 100., -100., -32.);
         let b = _mm512_set1_pd(1.);
         let r = _mm512_maskz_add_pd(0, a, b);
@@ -896,7 +901,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_add_pd() {
+    const fn test_mm256_mask_add_pd() {
         let a = _mm256_set_pd(1., -1., f64::MAX, f64::MIN);
         let b = _mm256_set1_pd(1.);
         let r = _mm256_mask_add_pd(a, 0, a, b);
@@ -907,7 +912,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_add_pd() {
+    const fn test_mm256_maskz_add_pd() {
         let a = _mm256_set_pd(1., -1., f64::MAX, f64::MIN);
         let b = _mm256_set1_pd(1.);
         let r = _mm256_maskz_add_pd(0, a, b);
@@ -918,7 +923,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_add_pd() {
+    const fn test_mm_mask_add_pd() {
         let a = _mm_set_pd(f64::MAX, f64::MIN);
         let b = _mm_set1_pd(1.);
         let r = _mm_mask_add_pd(a, 0, a, b);
@@ -929,7 +934,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_add_pd() {
+    const fn test_mm_maskz_add_pd() {
         let a = _mm_set_pd(f64::MAX, f64::MIN);
         let b = _mm_set1_pd(1.);
         let r = _mm_maskz_add_pd(0, a, b);
@@ -940,7 +945,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_sub_epi64() {
+    const fn test_mm512_sub_epi64() {
         let a = _mm512_setr_epi64(0, 1, -1, i64::MAX, i64::MIN, 100, -100, -32);
         let b = _mm512_set1_epi64(1);
         let r = _mm512_sub_epi64(a, b);
@@ -949,7 +954,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_sub_epi64() {
+    const fn test_mm512_mask_sub_epi64() {
         let a = _mm512_setr_epi64(0, 1, -1, i64::MAX, i64::MIN, 100, -100, -32);
         let b = _mm512_set1_epi64(1);
         let r = _mm512_mask_sub_epi64(a, 0, a, b);
@@ -960,7 +965,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_sub_epi64() {
+    const fn test_mm512_maskz_sub_epi64() {
         let a = _mm512_setr_epi64(0, 1, -1, i64::MAX, i64::MIN, 100, -100, -32);
         let b = _mm512_set1_epi64(1);
         let r = _mm512_maskz_sub_epi64(0, a, b);
@@ -971,7 +976,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_sub_epi64() {
+    const fn test_mm256_mask_sub_epi64() {
         let a = _mm256_set_epi64x(1, -1, i64::MAX, i64::MIN);
         let b = _mm256_set1_epi64x(1);
         let r = _mm256_mask_sub_epi64(a, 0, a, b);
@@ -982,7 +987,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_sub_epi64() {
+    const fn test_mm256_maskz_sub_epi64() {
         let a = _mm256_set_epi64x(1, -1, i64::MAX, i64::MIN);
         let b = _mm256_set1_epi64x(1);
         let r = _mm256_maskz_sub_epi64(0, a, b);
@@ -993,7 +998,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_sub_epi64() {
+    const fn test_mm_mask_sub_epi64() {
         let a = _mm_set_epi64x(i64::MAX, i64::MIN);
         let b = _mm_set1_epi64x(1);
         let r = _mm_mask_sub_epi64(a, 0, a, b);
@@ -1004,7 +1009,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_sub_epi64() {
+    const fn test_mm_maskz_sub_epi64() {
         let a = _mm_set_epi64x(i64::MAX, i64::MIN);
         let b = _mm_set1_epi64x(1);
         let r = _mm_maskz_sub_epi64(0, a, b);
@@ -1015,7 +1020,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_sub_pd() {
+    const fn test_mm512_sub_pd() {
         let a = _mm512_setr_pd(0., 1., -1., f64::MAX, f64::MIN, 100., -100., -32.);
         let b = _mm512_set1_pd(1.);
         let r = _mm512_sub_pd(a, b);
@@ -1024,7 +1029,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_sub_pd() {
+    const fn test_mm512_mask_sub_pd() {
         let a = _mm512_setr_pd(0., 1., -1., f64::MAX, f64::MIN, 100., -100., -32.);
         let b = _mm512_set1_pd(1.);
         let r = _mm512_mask_sub_pd(a, 0, a, b);
@@ -1035,7 +1040,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_sub_pd() {
+    const fn test_mm512_maskz_sub_pd() {
         let a = _mm512_setr_pd(0., 1., -1., f64::MAX, f64::MIN, 100., -100., -32.);
         let b = _mm512_set1_pd(1.);
         let r = _mm512_maskz_sub_pd(0, a, b);
@@ -1046,7 +1051,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_sub_pd() {
+    const fn test_mm256_mask_sub_pd() {
         let a = _mm256_set_pd(1., -1., f64::MAX, f64::MIN);
         let b = _mm256_set1_pd(1.);
         let r = _mm256_mask_sub_pd(a, 0, a, b);
@@ -1057,7 +1062,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_sub_pd() {
+    const fn test_mm256_maskz_sub_pd() {
         let a = _mm256_set_pd(1., -1., f64::MAX, f64::MIN);
         let b = _mm256_set1_pd(1.);
         let r = _mm256_maskz_sub_pd(0, a, b);
@@ -1068,7 +1073,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_sub_pd() {
+    const fn test_mm_mask_sub_pd() {
         let a = _mm_set_pd(f64::MAX, f64::MIN);
         let b = _mm_set1_pd(1.);
         let r = _mm_mask_sub_pd(a, 0, a, b);
@@ -1079,7 +1084,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_sub_pd() {
+    const fn test_mm_maskz_sub_pd() {
         let a = _mm_set_pd(f64::MAX, f64::MIN);
         let b = _mm_set1_pd(1.);
         let r = _mm_maskz_sub_pd(0, a, b);
@@ -1090,7 +1095,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mul_epi32() {
+    const fn test_mm512_mul_epi32() {
         let a = _mm512_set1_epi32(1);
         let b = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
         let r = _mm512_mul_epi32(a, b);
@@ -1099,7 +1104,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_mul_epi32() {
+    const fn test_mm512_mask_mul_epi32() {
         let a = _mm512_set1_epi32(1);
         let b = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
         let r = _mm512_mask_mul_epi32(a, 0, a, b);
@@ -1114,7 +1119,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_mul_epi32() {
+    const fn test_mm512_maskz_mul_epi32() {
         let a = _mm512_set1_epi32(1);
         let b = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
         let r = _mm512_maskz_mul_epi32(0, a, b);
@@ -1125,7 +1130,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_mul_epi32() {
+    const fn test_mm256_mask_mul_epi32() {
         let a = _mm256_set1_epi32(1);
         let b = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
         let r = _mm256_mask_mul_epi32(a, 0, a, b);
@@ -1136,7 +1141,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_mul_epi32() {
+    const fn test_mm256_maskz_mul_epi32() {
         let a = _mm256_set1_epi32(1);
         let b = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
         let r = _mm256_maskz_mul_epi32(0, a, b);
@@ -1147,7 +1152,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_mul_epi32() {
+    const fn test_mm_mask_mul_epi32() {
         let a = _mm_set1_epi32(1);
         let b = _mm_set_epi32(1, 2, 3, 4);
         let r = _mm_mask_mul_epi32(a, 0, a, b);
@@ -1158,7 +1163,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_mul_epi32() {
+    const fn test_mm_maskz_mul_epi32() {
         let a = _mm_set1_epi32(1);
         let b = _mm_set_epi32(1, 2, 3, 4);
         let r = _mm_maskz_mul_epi32(0, a, b);
@@ -1169,7 +1174,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mul_epu32() {
+    const fn test_mm512_mul_epu32() {
         let a = _mm512_set1_epi32(1);
         let b = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
         let r = _mm512_mul_epu32(a, b);
@@ -1178,7 +1183,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_mul_epu32() {
+    const fn test_mm512_mask_mul_epu32() {
         let a = _mm512_set1_epi32(1);
         let b = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
         let r = _mm512_mask_mul_epu32(a, 0, a, b);
@@ -1193,7 +1198,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_mul_epu32() {
+    const fn test_mm512_maskz_mul_epu32() {
         let a = _mm512_set1_epi32(1);
         let b = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
         let r = _mm512_maskz_mul_epu32(0, a, b);
@@ -1204,7 +1209,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_mul_epu32() {
+    const fn test_mm256_mask_mul_epu32() {
         let a = _mm256_set1_epi32(1);
         let b = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
         let r = _mm256_mask_mul_epu32(a, 0, a, b);
@@ -1215,7 +1220,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_mul_epu32() {
+    const fn test_mm256_maskz_mul_epu32() {
         let a = _mm256_set1_epi32(1);
         let b = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
         let r = _mm256_maskz_mul_epu32(0, a, b);
@@ -1226,7 +1231,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_mul_epu32() {
+    const fn test_mm_mask_mul_epu32() {
         let a = _mm_set1_epi32(1);
         let b = _mm_set_epi32(1, 2, 3, 4);
         let r = _mm_mask_mul_epu32(a, 0, a, b);
@@ -1237,7 +1242,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_mul_epu32() {
+    const fn test_mm_maskz_mul_epu32() {
         let a = _mm_set1_epi32(1);
         let b = _mm_set_epi32(1, 2, 3, 4);
         let r = _mm_maskz_mul_epu32(0, a, b);
@@ -1248,7 +1253,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mullox_epi64() {
+    const fn test_mm512_mullox_epi64() {
         let a = _mm512_setr_epi64(0, 1, i64::MAX, i64::MIN, i64::MAX, 100, -100, -32);
         let b = _mm512_set1_epi64(2);
         let r = _mm512_mullox_epi64(a, b);
@@ -1257,7 +1262,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_mullox_epi64() {
+    const fn test_mm512_mask_mullox_epi64() {
         let a = _mm512_setr_epi64(0, 1, i64::MAX, i64::MIN, i64::MAX, 100, -100, -32);
         let b = _mm512_set1_epi64(2);
         let r = _mm512_mask_mullox_epi64(a, 0, a, b);
@@ -1268,7 +1273,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mul_pd() {
+    const fn test_mm512_mul_pd() {
         let a = _mm512_setr_pd(0., 1., f64::MAX, f64::MIN, f64::MAX, f64::MIN, -100., -32.);
         let b = _mm512_set1_pd(2.);
         let r = _mm512_mul_pd(a, b);
@@ -1281,7 +1286,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_mul_pd() {
+    const fn test_mm512_mask_mul_pd() {
         let a = _mm512_setr_pd(0., 1., f64::MAX, f64::MIN, f64::MAX, f64::MIN, -100., -32.);
         let b = _mm512_set1_pd(2.);
         let r = _mm512_mask_mul_pd(a, 0, a, b);
@@ -1296,7 +1301,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_mul_pd() {
+    const fn test_mm512_maskz_mul_pd() {
         let a = _mm512_setr_pd(0., 1., f64::MAX, f64::MIN, f64::MAX, f64::MIN, -100., -32.);
         let b = _mm512_set1_pd(2.);
         let r = _mm512_maskz_mul_pd(0, a, b);
@@ -1307,7 +1312,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_mul_pd() {
+    const fn test_mm256_mask_mul_pd() {
         let a = _mm256_set_pd(0., 1., f64::MAX, f64::MIN);
         let b = _mm256_set1_pd(2.);
         let r = _mm256_mask_mul_pd(a, 0, a, b);
@@ -1318,7 +1323,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_mul_pd() {
+    const fn test_mm256_maskz_mul_pd() {
         let a = _mm256_set_pd(0., 1., f64::MAX, f64::MIN);
         let b = _mm256_set1_pd(2.);
         let r = _mm256_maskz_mul_pd(0, a, b);
@@ -1329,7 +1334,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_mul_pd() {
+    const fn test_mm_mask_mul_pd() {
         let a = _mm_set_pd(f64::MAX, f64::MIN);
         let b = _mm_set1_pd(2.);
         let r = _mm_mask_mul_pd(a, 0, a, b);
@@ -1340,7 +1345,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_mul_pd() {
+    const fn test_mm_maskz_mul_pd() {
         let a = _mm_set_pd(f64::MAX, f64::MIN);
         let b = _mm_set1_pd(2.);
         let r = _mm_maskz_mul_pd(0, a, b);
@@ -1351,7 +1356,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_div_pd() {
+    const fn test_mm512_div_pd() {
         let a = _mm512_setr_pd(0., 1., f64::MAX, f64::MIN, f64::MAX, f64::MIN, -100., -32.);
         let b = _mm512_setr_pd(2., 2., 0., 0., 0., 0., 2., 2.);
         let r = _mm512_div_pd(a, b);
@@ -1364,7 +1369,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_div_pd() {
+    const fn test_mm512_mask_div_pd() {
         let a = _mm512_setr_pd(0., 1., f64::MAX, f64::MIN, f64::MAX, f64::MIN, -100., -32.);
         let b = _mm512_setr_pd(2., 2., 0., 0., 0., 0., 2., 2.);
         let r = _mm512_mask_div_pd(a, 0, a, b);
@@ -1379,7 +1384,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_div_pd() {
+    const fn test_mm512_maskz_div_pd() {
         let a = _mm512_setr_pd(0., 1., f64::MAX, f64::MIN, f64::MAX, f64::MIN, -100., -32.);
         let b = _mm512_setr_pd(2., 2., 0., 0., 0., 0., 2., 2.);
         let r = _mm512_maskz_div_pd(0, a, b);
@@ -1390,7 +1395,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_div_pd() {
+    const fn test_mm256_mask_div_pd() {
         let a = _mm256_set_pd(0., 1., f64::MAX, f64::MIN);
         let b = _mm256_set_pd(2., 2., 0., 0.);
         let r = _mm256_mask_div_pd(a, 0, a, b);
@@ -1401,7 +1406,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_div_pd() {
+    const fn test_mm256_maskz_div_pd() {
         let a = _mm256_set_pd(0., 1., f64::MAX, f64::MIN);
         let b = _mm256_set_pd(2., 2., 0., 0.);
         let r = _mm256_maskz_div_pd(0, a, b);
@@ -1412,7 +1417,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_div_pd() {
+    const fn test_mm_mask_div_pd() {
         let a = _mm_set_pd(f64::MAX, f64::MIN);
         let b = _mm_set_pd(0., 0.);
         let r = _mm_mask_div_pd(a, 0, a, b);
@@ -1423,7 +1428,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_div_pd() {
+    const fn test_mm_maskz_div_pd() {
         let a = _mm_set_pd(f64::MAX, f64::MIN);
         let b = _mm_set_pd(0., 0.);
         let r = _mm_maskz_div_pd(0, a, b);
@@ -1434,7 +1439,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_max_epi64() {
+    const fn test_mm512_max_epi64() {
         let a = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7);
         let b = _mm512_setr_epi64(7, 6, 5, 4, 3, 2, 1, 0);
         let r = _mm512_max_epi64(a, b);
@@ -1443,7 +1448,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_max_epi64() {
+    const fn test_mm512_mask_max_epi64() {
         let a = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7);
         let b = _mm512_setr_epi64(7, 6, 5, 4, 3, 2, 1, 0);
         let r = _mm512_mask_max_epi64(a, 0, a, b);
@@ -1454,7 +1459,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_max_epi64() {
+    const fn test_mm512_maskz_max_epi64() {
         let a = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7);
         let b = _mm512_setr_epi64(7, 6, 5, 4, 3, 2, 1, 0);
         let r = _mm512_maskz_max_epi64(0, a, b);
@@ -1465,7 +1470,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_max_epi64() {
+    const fn test_mm256_max_epi64() {
         let a = _mm256_set_epi64x(0, 1, 2, 3);
         let b = _mm256_set_epi64x(3, 2, 1, 0);
         let r = _mm256_max_epi64(a, b);
@@ -1474,7 +1479,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_max_epi64() {
+    const fn test_mm256_mask_max_epi64() {
         let a = _mm256_set_epi64x(0, 1, 2, 3);
         let b = _mm256_set_epi64x(3, 2, 1, 0);
         let r = _mm256_mask_max_epi64(a, 0, a, b);
@@ -1485,7 +1490,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_max_epi64() {
+    const fn test_mm256_maskz_max_epi64() {
         let a = _mm256_set_epi64x(0, 1, 2, 3);
         let b = _mm256_set_epi64x(3, 2, 1, 0);
         let r = _mm256_maskz_max_epi64(0, a, b);
@@ -1496,7 +1501,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_max_epi64() {
+    const fn test_mm_max_epi64() {
         let a = _mm_set_epi64x(2, 3);
         let b = _mm_set_epi64x(3, 2);
         let r = _mm_max_epi64(a, b);
@@ -1505,7 +1510,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_max_epi64() {
+    const fn test_mm_mask_max_epi64() {
         let a = _mm_set_epi64x(2, 3);
         let b = _mm_set_epi64x(3, 2);
         let r = _mm_mask_max_epi64(a, 0, a, b);
@@ -1516,7 +1521,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_max_epi64() {
+    const fn test_mm_maskz_max_epi64() {
         let a = _mm_set_epi64x(2, 3);
         let b = _mm_set_epi64x(3, 2);
         let r = _mm_maskz_max_epi64(0, a, b);
@@ -1527,7 +1532,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_max_pd() {
+    fn test_mm512_max_pd() {
         let a = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
         let b = _mm512_setr_pd(7., 6., 5., 4., 3., 2., 1., 0.);
         let r = _mm512_max_pd(a, b);
@@ -1536,7 +1541,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_max_pd() {
+    fn test_mm512_mask_max_pd() {
         let a = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
         let b = _mm512_setr_pd(7., 6., 5., 4., 3., 2., 1., 0.);
         let r = _mm512_mask_max_pd(a, 0, a, b);
@@ -1547,7 +1552,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_max_pd() {
+    fn test_mm512_maskz_max_pd() {
         let a = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
         let b = _mm512_setr_pd(7., 6., 5., 4., 3., 2., 1., 0.);
         let r = _mm512_maskz_max_pd(0, a, b);
@@ -1558,7 +1563,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_max_pd() {
+    fn test_mm256_mask_max_pd() {
         let a = _mm256_set_pd(0., 1., 2., 3.);
         let b = _mm256_set_pd(3., 2., 1., 0.);
         let r = _mm256_mask_max_pd(a, 0, a, b);
@@ -1569,7 +1574,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_max_pd() {
+    fn test_mm256_maskz_max_pd() {
         let a = _mm256_set_pd(0., 1., 2., 3.);
         let b = _mm256_set_pd(3., 2., 1., 0.);
         let r = _mm256_maskz_max_pd(0, a, b);
@@ -1580,7 +1585,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_max_pd() {
+    fn test_mm_mask_max_pd() {
         let a = _mm_set_pd(2., 3.);
         let b = _mm_set_pd(3., 2.);
         let r = _mm_mask_max_pd(a, 0, a, b);
@@ -1591,7 +1596,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_max_pd() {
+    fn test_mm_maskz_max_pd() {
         let a = _mm_set_pd(2., 3.);
         let b = _mm_set_pd(3., 2.);
         let r = _mm_maskz_max_pd(0, a, b);
@@ -1602,7 +1607,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_max_epu64() {
+    const fn test_mm512_max_epu64() {
         let a = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7);
         let b = _mm512_setr_epi64(7, 6, 5, 4, 3, 2, 1, 0);
         let r = _mm512_max_epu64(a, b);
@@ -1611,7 +1616,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_max_epu64() {
+    const fn test_mm512_mask_max_epu64() {
         let a = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7);
         let b = _mm512_setr_epi64(7, 6, 5, 4, 3, 2, 1, 0);
         let r = _mm512_mask_max_epu64(a, 0, a, b);
@@ -1622,7 +1627,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_max_epu64() {
+    const fn test_mm512_maskz_max_epu64() {
         let a = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7);
         let b = _mm512_setr_epi64(7, 6, 5, 4, 3, 2, 1, 0);
         let r = _mm512_maskz_max_epu64(0, a, b);
@@ -1633,7 +1638,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_max_epu64() {
+    const fn test_mm256_max_epu64() {
         let a = _mm256_set_epi64x(0, 1, 2, 3);
         let b = _mm256_set_epi64x(3, 2, 1, 0);
         let r = _mm256_max_epu64(a, b);
@@ -1642,7 +1647,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_max_epu64() {
+    const fn test_mm256_mask_max_epu64() {
         let a = _mm256_set_epi64x(0, 1, 2, 3);
         let b = _mm256_set_epi64x(3, 2, 1, 0);
         let r = _mm256_mask_max_epu64(a, 0, a, b);
@@ -1653,7 +1658,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_max_epu64() {
+    const fn test_mm256_maskz_max_epu64() {
         let a = _mm256_set_epi64x(0, 1, 2, 3);
         let b = _mm256_set_epi64x(3, 2, 1, 0);
         let r = _mm256_maskz_max_epu64(0, a, b);
@@ -1664,7 +1669,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_max_epu64() {
+    const fn test_mm_max_epu64() {
         let a = _mm_set_epi64x(2, 3);
         let b = _mm_set_epi64x(3, 2);
         let r = _mm_max_epu64(a, b);
@@ -1673,7 +1678,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_max_epu64() {
+    const fn test_mm_mask_max_epu64() {
         let a = _mm_set_epi64x(2, 3);
         let b = _mm_set_epi64x(3, 2);
         let r = _mm_mask_max_epu64(a, 0, a, b);
@@ -1684,7 +1689,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_max_epu64() {
+    const fn test_mm_maskz_max_epu64() {
         let a = _mm_set_epi64x(2, 3);
         let b = _mm_set_epi64x(3, 2);
         let r = _mm_maskz_max_epu64(0, a, b);
@@ -1695,7 +1700,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_min_epi64() {
+    const fn test_mm512_min_epi64() {
         let a = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7);
         let b = _mm512_setr_epi64(7, 6, 5, 4, 3, 2, 1, 0);
         let r = _mm512_min_epi64(a, b);
@@ -1704,7 +1709,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_min_epi64() {
+    const fn test_mm512_mask_min_epi64() {
         let a = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7);
         let b = _mm512_setr_epi64(7, 6, 5, 4, 3, 2, 1, 0);
         let r = _mm512_mask_min_epi64(a, 0, a, b);
@@ -1715,7 +1720,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_min_epi64() {
+    const fn test_mm512_maskz_min_epi64() {
         let a = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7);
         let b = _mm512_setr_epi64(7, 6, 5, 4, 3, 2, 1, 0);
         let r = _mm512_maskz_min_epi64(0, a, b);
@@ -1726,7 +1731,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_min_epi64() {
+    const fn test_mm256_min_epi64() {
         let a = _mm256_set_epi64x(0, 1, 2, 3);
         let b = _mm256_set_epi64x(3, 2, 1, 0);
         let r = _mm256_min_epi64(a, b);
@@ -1735,7 +1740,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_min_epi64() {
+    const fn test_mm256_mask_min_epi64() {
         let a = _mm256_set_epi64x(0, 1, 2, 3);
         let b = _mm256_set_epi64x(3, 2, 1, 0);
         let r = _mm256_mask_min_epi64(a, 0, a, b);
@@ -1746,7 +1751,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_min_epi64() {
+    const fn test_mm256_maskz_min_epi64() {
         let a = _mm256_set_epi64x(0, 1, 2, 3);
         let b = _mm256_set_epi64x(3, 2, 1, 0);
         let r = _mm256_maskz_min_epi64(0, a, b);
@@ -1757,7 +1762,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_min_epi64() {
+    const fn test_mm_min_epi64() {
         let a = _mm_set_epi64x(0, 1);
         let b = _mm_set_epi64x(3, 2);
         let r = _mm_min_epi64(a, b);
@@ -1771,7 +1776,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_min_epi64() {
+    const fn test_mm_mask_min_epi64() {
         let a = _mm_set_epi64x(0, 1);
         let b = _mm_set_epi64x(3, 2);
         let r = _mm_mask_min_epi64(a, 0, a, b);
@@ -1782,7 +1787,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_min_epi64() {
+    const fn test_mm_maskz_min_epi64() {
         let a = _mm_set_epi64x(0, 1);
         let b = _mm_set_epi64x(3, 2);
         let r = _mm_maskz_min_epi64(0, a, b);
@@ -1793,7 +1798,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_min_pd() {
+    fn test_mm512_min_pd() {
         let a = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
         let b = _mm512_setr_pd(7., 6., 5., 4., 3., 2., 1., 0.);
         let r = _mm512_min_pd(a, b);
@@ -1802,7 +1807,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_min_pd() {
+    fn test_mm512_mask_min_pd() {
         let a = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
         let b = _mm512_setr_pd(7., 6., 5., 4., 3., 2., 1., 0.);
         let r = _mm512_mask_min_pd(a, 0, a, b);
@@ -1813,7 +1818,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_min_pd() {
+    fn test_mm512_maskz_min_pd() {
         let a = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
         let b = _mm512_setr_pd(7., 6., 5., 4., 3., 2., 1., 0.);
         let r = _mm512_maskz_min_pd(0, a, b);
@@ -1824,7 +1829,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_min_pd() {
+    fn test_mm256_mask_min_pd() {
         let a = _mm256_set_pd(0., 1., 2., 3.);
         let b = _mm256_set_pd(3., 2., 1., 0.);
         let r = _mm256_mask_min_pd(a, 0, a, b);
@@ -1835,7 +1840,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_min_pd() {
+    fn test_mm256_maskz_min_pd() {
         let a = _mm256_set_pd(0., 1., 2., 3.);
         let b = _mm256_set_pd(3., 2., 1., 0.);
         let r = _mm256_maskz_min_pd(0, a, b);
@@ -1846,7 +1851,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_min_pd() {
+    fn test_mm_mask_min_pd() {
         let a = _mm_set_pd(0., 1.);
         let b = _mm_set_pd(1., 0.);
         let r = _mm_mask_min_pd(a, 0, a, b);
@@ -1857,7 +1862,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_min_pd() {
+    fn test_mm_maskz_min_pd() {
         let a = _mm_set_pd(0., 1.);
         let b = _mm_set_pd(1., 0.);
         let r = _mm_maskz_min_pd(0, a, b);
@@ -1868,7 +1873,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_min_epu64() {
+    const fn test_mm512_min_epu64() {
         let a = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7);
         let b = _mm512_setr_epi64(7, 6, 5, 4, 3, 2, 1, 0);
         let r = _mm512_min_epu64(a, b);
@@ -1877,7 +1882,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_min_epu64() {
+    const fn test_mm512_mask_min_epu64() {
         let a = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7);
         let b = _mm512_setr_epi64(7, 6, 5, 4, 3, 2, 1, 0);
         let r = _mm512_mask_min_epu64(a, 0, a, b);
@@ -1888,7 +1893,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_min_epu64() {
+    const fn test_mm512_maskz_min_epu64() {
         let a = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7);
         let b = _mm512_setr_epi64(7, 6, 5, 4, 3, 2, 1, 0);
         let r = _mm512_maskz_min_epu64(0, a, b);
@@ -1899,7 +1904,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_min_epu64() {
+    const fn test_mm256_min_epu64() {
         let a = _mm256_set_epi64x(0, 1, 2, 3);
         let b = _mm256_set_epi64x(3, 2, 1, 0);
         let r = _mm256_min_epu64(a, b);
@@ -1908,7 +1913,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_min_epu64() {
+    const fn test_mm256_mask_min_epu64() {
         let a = _mm256_set_epi64x(0, 1, 2, 3);
         let b = _mm256_set_epi64x(3, 2, 1, 0);
         let r = _mm256_mask_min_epu64(a, 0, a, b);
@@ -1919,7 +1924,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_min_epu64() {
+    const fn test_mm256_maskz_min_epu64() {
         let a = _mm256_set_epi64x(0, 1, 2, 3);
         let b = _mm256_set_epi64x(3, 2, 1, 0);
         let r = _mm256_maskz_min_epu64(0, a, b);
@@ -1930,7 +1935,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_min_epu64() {
+    const fn test_mm_min_epu64() {
         let a = _mm_set_epi64x(0, 1);
         let b = _mm_set_epi64x(1, 0);
         let r = _mm_min_epu64(a, b);
@@ -1939,7 +1944,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_min_epu64() {
+    const fn test_mm_mask_min_epu64() {
         let a = _mm_set_epi64x(0, 1);
         let b = _mm_set_epi64x(1, 0);
         let r = _mm_mask_min_epu64(a, 0, a, b);
@@ -1950,7 +1955,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_min_epu64() {
+    const fn test_mm_maskz_min_epu64() {
         let a = _mm_set_epi64x(0, 1);
         let b = _mm_set_epi64x(1, 0);
         let r = _mm_maskz_min_epu64(0, a, b);
@@ -1961,7 +1966,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_sqrt_pd() {
+    fn test_mm512_sqrt_pd() {
         let a = _mm512_setr_pd(0., 1., 4., 9., 16., 25., 36., 49.);
         let r = _mm512_sqrt_pd(a);
         let e = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
@@ -1969,7 +1974,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_sqrt_pd() {
+    fn test_mm512_mask_sqrt_pd() {
         let a = _mm512_setr_pd(0., 1., 4., 9., 16., 25., 36., 49.);
         let r = _mm512_mask_sqrt_pd(a, 0, a);
         assert_eq_m512d(r, a);
@@ -1979,7 +1984,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_sqrt_pd() {
+    fn test_mm512_maskz_sqrt_pd() {
         let a = _mm512_setr_pd(0., 1., 4., 9., 16., 25., 36., 49.);
         let r = _mm512_maskz_sqrt_pd(0, a);
         assert_eq_m512d(r, _mm512_setzero_pd());
@@ -1989,7 +1994,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_sqrt_pd() {
+    fn test_mm256_mask_sqrt_pd() {
         let a = _mm256_set_pd(0., 1., 4., 9.);
         let r = _mm256_mask_sqrt_pd(a, 0, a);
         assert_eq_m256d(r, a);
@@ -1999,7 +2004,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_sqrt_pd() {
+    fn test_mm256_maskz_sqrt_pd() {
         let a = _mm256_set_pd(0., 1., 4., 9.);
         let r = _mm256_maskz_sqrt_pd(0, a);
         assert_eq_m256d(r, _mm256_setzero_pd());
@@ -2009,7 +2014,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_sqrt_pd() {
+    fn test_mm_mask_sqrt_pd() {
         let a = _mm_set_pd(0., 1.);
         let r = _mm_mask_sqrt_pd(a, 0, a);
         assert_eq_m128d(r, a);
@@ -2019,7 +2024,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_sqrt_pd() {
+    fn test_mm_maskz_sqrt_pd() {
         let a = _mm_set_pd(0., 1.);
         let r = _mm_maskz_sqrt_pd(0, a);
         assert_eq_m128d(r, _mm_setzero_pd());
@@ -2029,7 +2034,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_fmadd_pd() {
+    const fn test_mm512_fmadd_pd() {
         let a = _mm512_setr_pd(1., 1., 1., 1., 1., 1., 1., 1.);
         let b = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
         let c = _mm512_setr_pd(1., 1., 1., 1., 1., 1., 1., 1.);
@@ -2039,7 +2044,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_fmadd_pd() {
+    const fn test_mm512_mask_fmadd_pd() {
         let a = _mm512_setr_pd(1., 1., 1., 1., 1., 1., 1., 1.);
         let b = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
         let c = _mm512_setr_pd(1., 1., 1., 1., 1., 1., 1., 1.);
@@ -2051,7 +2056,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_fmadd_pd() {
+    const fn test_mm512_maskz_fmadd_pd() {
         let a = _mm512_setr_pd(1., 1., 1., 1., 1., 1., 1., 1.);
         let b = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
         let c = _mm512_setr_pd(1., 1., 1., 1., 1., 1., 1., 1.);
@@ -2063,7 +2068,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask3_fmadd_pd() {
+    const fn test_mm512_mask3_fmadd_pd() {
         let a = _mm512_setr_pd(1., 1., 1., 1., 1., 1., 1., 1.);
         let b = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
         let c = _mm512_setr_pd(1., 1., 1., 1., 2., 2., 2., 2.);
@@ -2075,7 +2080,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_fmadd_pd() {
+    const fn test_mm256_mask_fmadd_pd() {
         let a = _mm256_set1_pd(1.);
         let b = _mm256_set_pd(0., 1., 2., 3.);
         let c = _mm256_set1_pd(1.);
@@ -2087,7 +2092,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_fmadd_pd() {
+    const fn test_mm256_maskz_fmadd_pd() {
         let a = _mm256_set1_pd(1.);
         let b = _mm256_set_pd(0., 1., 2., 3.);
         let c = _mm256_set1_pd(1.);
@@ -2099,7 +2104,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask3_fmadd_pd() {
+    const fn test_mm256_mask3_fmadd_pd() {
         let a = _mm256_set1_pd(1.);
         let b = _mm256_set_pd(0., 1., 2., 3.);
         let c = _mm256_set1_pd(1.);
@@ -2111,7 +2116,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_fmadd_pd() {
+    const fn test_mm_mask_fmadd_pd() {
         let a = _mm_set1_pd(1.);
         let b = _mm_set_pd(0., 1.);
         let c = _mm_set1_pd(1.);
@@ -2123,7 +2128,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_fmadd_pd() {
+    const fn test_mm_maskz_fmadd_pd() {
         let a = _mm_set1_pd(1.);
         let b = _mm_set_pd(0., 1.);
         let c = _mm_set1_pd(1.);
@@ -2135,7 +2140,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask3_fmadd_pd() {
+    const fn test_mm_mask3_fmadd_pd() {
         let a = _mm_set1_pd(1.);
         let b = _mm_set_pd(0., 1.);
         let c = _mm_set1_pd(1.);
@@ -2147,7 +2152,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_fmsub_pd() {
+    const fn test_mm512_fmsub_pd() {
         let a = _mm512_set1_pd(1.);
         let b = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
         let c = _mm512_set1_pd(1.);
@@ -2157,7 +2162,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_fmsub_pd() {
+    const fn test_mm512_mask_fmsub_pd() {
         let a = _mm512_set1_pd(1.);
         let b = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
         let c = _mm512_set1_pd(1.);
@@ -2169,7 +2174,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_fmsub_pd() {
+    const fn test_mm512_maskz_fmsub_pd() {
         let a = _mm512_set1_pd(1.);
         let b = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
         let c = _mm512_set1_pd(1.);
@@ -2181,7 +2186,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask3_fmsub_pd() {
+    const fn test_mm512_mask3_fmsub_pd() {
         let a = _mm512_set1_pd(1.);
         let b = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
         let c = _mm512_setr_pd(1., 1., 1., 1., 2., 2., 2., 2.);
@@ -2193,7 +2198,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_fmsub_pd() {
+    const fn test_mm256_mask_fmsub_pd() {
         let a = _mm256_set1_pd(1.);
         let b = _mm256_set_pd(0., 1., 2., 3.);
         let c = _mm256_set1_pd(1.);
@@ -2205,7 +2210,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_fmsub_pd() {
+    const fn test_mm256_maskz_fmsub_pd() {
         let a = _mm256_set1_pd(1.);
         let b = _mm256_set_pd(0., 1., 2., 3.);
         let c = _mm256_set1_pd(1.);
@@ -2217,7 +2222,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask3_fmsub_pd() {
+    const fn test_mm256_mask3_fmsub_pd() {
         let a = _mm256_set1_pd(1.);
         let b = _mm256_set_pd(0., 1., 2., 3.);
         let c = _mm256_set1_pd(1.);
@@ -2229,7 +2234,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_fmsub_pd() {
+    const fn test_mm_mask_fmsub_pd() {
         let a = _mm_set1_pd(1.);
         let b = _mm_set_pd(0., 1.);
         let c = _mm_set1_pd(1.);
@@ -2241,7 +2246,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_fmsub_pd() {
+    const fn test_mm_maskz_fmsub_pd() {
         let a = _mm_set1_pd(1.);
         let b = _mm_set_pd(0., 1.);
         let c = _mm_set1_pd(1.);
@@ -2253,7 +2258,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask3_fmsub_pd() {
+    const fn test_mm_mask3_fmsub_pd() {
         let a = _mm_set1_pd(1.);
         let b = _mm_set_pd(0., 1.);
         let c = _mm_set1_pd(1.);
@@ -2265,7 +2270,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_fmaddsub_pd() {
+    const fn test_mm512_fmaddsub_pd() {
         let a = _mm512_set1_pd(1.);
         let b = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
         let c = _mm512_set1_pd(1.);
@@ -2275,7 +2280,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_fmaddsub_pd() {
+    const fn test_mm512_mask_fmaddsub_pd() {
         let a = _mm512_set1_pd(1.);
         let b = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
         let c = _mm512_set1_pd(1.);
@@ -2287,7 +2292,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_fmaddsub_pd() {
+    const fn test_mm512_maskz_fmaddsub_pd() {
         let a = _mm512_set1_pd(1.);
         let b = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
         let c = _mm512_set1_pd(1.);
@@ -2299,7 +2304,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask3_fmaddsub_pd() {
+    const fn test_mm512_mask3_fmaddsub_pd() {
         let a = _mm512_set1_pd(1.);
         let b = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
         let c = _mm512_setr_pd(1., 1., 1., 1., 2., 2., 2., 2.);
@@ -2311,7 +2316,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_fmaddsub_pd() {
+    const fn test_mm256_mask_fmaddsub_pd() {
         let a = _mm256_set1_pd(1.);
         let b = _mm256_set_pd(0., 1., 2., 3.);
         let c = _mm256_set1_pd(1.);
@@ -2323,7 +2328,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_fmaddsub_pd() {
+    const fn test_mm256_maskz_fmaddsub_pd() {
         let a = _mm256_set1_pd(1.);
         let b = _mm256_set_pd(0., 1., 2., 3.);
         let c = _mm256_set1_pd(1.);
@@ -2335,7 +2340,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask3_fmaddsub_pd() {
+    const fn test_mm256_mask3_fmaddsub_pd() {
         let a = _mm256_set1_pd(1.);
         let b = _mm256_set_pd(0., 1., 2., 3.);
         let c = _mm256_set1_pd(1.);
@@ -2347,7 +2352,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_fmaddsub_pd() {
+    const fn test_mm_mask_fmaddsub_pd() {
         let a = _mm_set1_pd(1.);
         let b = _mm_set_pd(0., 1.);
         let c = _mm_set1_pd(1.);
@@ -2359,7 +2364,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_fmaddsub_pd() {
+    const fn test_mm_maskz_fmaddsub_pd() {
         let a = _mm_set1_pd(1.);
         let b = _mm_set_pd(0., 1.);
         let c = _mm_set1_pd(1.);
@@ -2371,7 +2376,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask3_fmaddsub_pd() {
+    const fn test_mm_mask3_fmaddsub_pd() {
         let a = _mm_set1_pd(1.);
         let b = _mm_set_pd(0., 1.);
         let c = _mm_set1_pd(1.);
@@ -2383,7 +2388,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_fmsubadd_pd() {
+    const fn test_mm512_fmsubadd_pd() {
         let a = _mm512_set1_pd(1.);
         let b = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
         let c = _mm512_set1_pd(1.);
@@ -2393,7 +2398,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_fmsubadd_pd() {
+    const fn test_mm512_mask_fmsubadd_pd() {
         let a = _mm512_set1_pd(1.);
         let b = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
         let c = _mm512_set1_pd(1.);
@@ -2405,7 +2410,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_fmsubadd_pd() {
+    const fn test_mm512_maskz_fmsubadd_pd() {
         let a = _mm512_set1_pd(1.);
         let b = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
         let c = _mm512_set1_pd(1.);
@@ -2417,7 +2422,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask3_fmsubadd_pd() {
+    const fn test_mm512_mask3_fmsubadd_pd() {
         let a = _mm512_set1_pd(1.);
         let b = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
         let c = _mm512_setr_pd(1., 1., 1., 1., 2., 2., 2., 2.);
@@ -2429,7 +2434,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_fmsubadd_pd() {
+    const fn test_mm256_mask_fmsubadd_pd() {
         let a = _mm256_set1_pd(1.);
         let b = _mm256_set_pd(0., 1., 2., 3.);
         let c = _mm256_set1_pd(1.);
@@ -2441,7 +2446,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_fmsubadd_pd() {
+    const fn test_mm256_maskz_fmsubadd_pd() {
         let a = _mm256_set1_pd(1.);
         let b = _mm256_set_pd(0., 1., 2., 3.);
         let c = _mm256_set1_pd(1.);
@@ -2453,7 +2458,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask3_fmsubadd_pd() {
+    const fn test_mm256_mask3_fmsubadd_pd() {
         let a = _mm256_set1_pd(1.);
         let b = _mm256_set_pd(0., 1., 2., 3.);
         let c = _mm256_set1_pd(1.);
@@ -2465,7 +2470,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_fmsubadd_pd() {
+    const fn test_mm_mask_fmsubadd_pd() {
         let a = _mm_set1_pd(1.);
         let b = _mm_set_pd(0., 1.);
         let c = _mm_set1_pd(1.);
@@ -2477,7 +2482,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_fmsubadd_pd() {
+    const fn test_mm_maskz_fmsubadd_pd() {
         let a = _mm_set1_pd(1.);
         let b = _mm_set_pd(0., 1.);
         let c = _mm_set1_pd(1.);
@@ -2489,7 +2494,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask3_fmsubadd_pd() {
+    const fn test_mm_mask3_fmsubadd_pd() {
         let a = _mm_set1_pd(1.);
         let b = _mm_set_pd(0., 1.);
         let c = _mm_set1_pd(1.);
@@ -2501,7 +2506,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_fnmadd_pd() {
+    const fn test_mm512_fnmadd_pd() {
         let a = _mm512_set1_pd(1.);
         let b = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
         let c = _mm512_set1_pd(1.);
@@ -2511,7 +2516,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_fnmadd_pd() {
+    const fn test_mm512_mask_fnmadd_pd() {
         let a = _mm512_set1_pd(1.);
         let b = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
         let c = _mm512_set1_pd(1.);
@@ -2523,7 +2528,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_fnmadd_pd() {
+    const fn test_mm512_maskz_fnmadd_pd() {
         let a = _mm512_set1_pd(1.);
         let b = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
         let c = _mm512_set1_pd(1.);
@@ -2535,7 +2540,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask3_fnmadd_pd() {
+    const fn test_mm512_mask3_fnmadd_pd() {
         let a = _mm512_set1_pd(1.);
         let b = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
         let c = _mm512_setr_pd(1., 1., 1., 1., 2., 2., 2., 2.);
@@ -2547,7 +2552,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_fnmadd_pd() {
+    const fn test_mm256_mask_fnmadd_pd() {
         let a = _mm256_set1_pd(1.);
         let b = _mm256_set_pd(0., 1., 2., 3.);
         let c = _mm256_set1_pd(1.);
@@ -2559,7 +2564,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_fnmadd_pd() {
+    const fn test_mm256_maskz_fnmadd_pd() {
         let a = _mm256_set1_pd(1.);
         let b = _mm256_set_pd(0., 1., 2., 3.);
         let c = _mm256_set1_pd(1.);
@@ -2571,7 +2576,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask3_fnmadd_pd() {
+    const fn test_mm256_mask3_fnmadd_pd() {
         let a = _mm256_set1_pd(1.);
         let b = _mm256_set_pd(0., 1., 2., 3.);
         let c = _mm256_set1_pd(1.);
@@ -2583,7 +2588,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_fnmadd_pd() {
+    const fn test_mm_mask_fnmadd_pd() {
         let a = _mm_set1_pd(1.);
         let b = _mm_set_pd(0., 1.);
         let c = _mm_set1_pd(1.);
@@ -2595,7 +2600,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_fnmadd_pd() {
+    const fn test_mm_maskz_fnmadd_pd() {
         let a = _mm_set1_pd(1.);
         let b = _mm_set_pd(0., 1.);
         let c = _mm_set1_pd(1.);
@@ -2607,7 +2612,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask3_fnmadd_pd() {
+    const fn test_mm_mask3_fnmadd_pd() {
         let a = _mm_set1_pd(1.);
         let b = _mm_set_pd(0., 1.);
         let c = _mm_set1_pd(1.);
@@ -2619,7 +2624,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_fnmsub_pd() {
+    const fn test_mm512_fnmsub_pd() {
         let a = _mm512_set1_pd(1.);
         let b = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
         let c = _mm512_set1_pd(1.);
@@ -2629,7 +2634,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_fnmsub_pd() {
+    const fn test_mm512_mask_fnmsub_pd() {
         let a = _mm512_set1_pd(1.);
         let b = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
         let c = _mm512_set1_pd(1.);
@@ -2641,7 +2646,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_fnmsub_pd() {
+    const fn test_mm512_maskz_fnmsub_pd() {
         let a = _mm512_set1_pd(1.);
         let b = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
         let c = _mm512_set1_pd(1.);
@@ -2653,7 +2658,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask3_fnmsub_pd() {
+    const fn test_mm512_mask3_fnmsub_pd() {
         let a = _mm512_set1_pd(1.);
         let b = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
         let c = _mm512_setr_pd(1., 1., 1., 1., 2., 2., 2., 2.);
@@ -2665,7 +2670,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_fnmsub_pd() {
+    const fn test_mm256_mask_fnmsub_pd() {
         let a = _mm256_set1_pd(1.);
         let b = _mm256_set_pd(0., 1., 2., 3.);
         let c = _mm256_set1_pd(1.);
@@ -2677,7 +2682,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_fnmsub_pd() {
+    const fn test_mm256_maskz_fnmsub_pd() {
         let a = _mm256_set1_pd(1.);
         let b = _mm256_set_pd(0., 1., 2., 3.);
         let c = _mm256_set1_pd(1.);
@@ -2689,7 +2694,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask3_fnmsub_pd() {
+    const fn test_mm256_mask3_fnmsub_pd() {
         let a = _mm256_set1_pd(1.);
         let b = _mm256_set_pd(0., 1., 2., 3.);
         let c = _mm256_set1_pd(1.);
@@ -2701,7 +2706,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_fnmsub_pd() {
+    const fn test_mm_mask_fnmsub_pd() {
         let a = _mm_set1_pd(1.);
         let b = _mm_set_pd(0., 1.);
         let c = _mm_set1_pd(1.);
@@ -2713,7 +2718,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_fnmsub_pd() {
+    const fn test_mm_maskz_fnmsub_pd() {
         let a = _mm_set1_pd(1.);
         let b = _mm_set_pd(0., 1.);
         let c = _mm_set1_pd(1.);
@@ -2725,7 +2730,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask3_fnmsub_pd() {
+    const fn test_mm_mask3_fnmsub_pd() {
         let a = _mm_set1_pd(1.);
         let b = _mm_set_pd(0., 1.);
         let c = _mm_set1_pd(1.);
@@ -2737,7 +2742,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_rcp14_pd() {
+    fn test_mm512_rcp14_pd() {
         let a = _mm512_set1_pd(3.);
         let r = _mm512_rcp14_pd(a);
         let e = _mm512_set1_pd(0.3333320617675781);
@@ -2745,7 +2750,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_rcp14_pd() {
+    fn test_mm512_mask_rcp14_pd() {
         let a = _mm512_set1_pd(3.);
         let r = _mm512_mask_rcp14_pd(a, 0, a);
         assert_eq_m512d(r, a);
@@ -2759,7 +2764,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_rcp14_pd() {
+    fn test_mm512_maskz_rcp14_pd() {
         let a = _mm512_set1_pd(3.);
         let r = _mm512_maskz_rcp14_pd(0, a);
         assert_eq_m512d(r, _mm512_setzero_pd());
@@ -2773,7 +2778,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_rcp14_pd() {
+    fn test_mm256_rcp14_pd() {
         let a = _mm256_set1_pd(3.);
         let r = _mm256_rcp14_pd(a);
         let e = _mm256_set1_pd(0.3333320617675781);
@@ -2781,7 +2786,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_rcp14_pd() {
+    fn test_mm256_mask_rcp14_pd() {
         let a = _mm256_set1_pd(3.);
         let r = _mm256_mask_rcp14_pd(a, 0, a);
         assert_eq_m256d(r, a);
@@ -2791,7 +2796,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_rcp14_pd() {
+    fn test_mm256_maskz_rcp14_pd() {
         let a = _mm256_set1_pd(3.);
         let r = _mm256_maskz_rcp14_pd(0, a);
         assert_eq_m256d(r, _mm256_setzero_pd());
@@ -2801,7 +2806,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_rcp14_pd() {
+    fn test_mm_rcp14_pd() {
         let a = _mm_set1_pd(3.);
         let r = _mm_rcp14_pd(a);
         let e = _mm_set1_pd(0.3333320617675781);
@@ -2809,7 +2814,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_rcp14_pd() {
+    fn test_mm_mask_rcp14_pd() {
         let a = _mm_set1_pd(3.);
         let r = _mm_mask_rcp14_pd(a, 0, a);
         assert_eq_m128d(r, a);
@@ -2819,7 +2824,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_rcp14_pd() {
+    fn test_mm_maskz_rcp14_pd() {
         let a = _mm_set1_pd(3.);
         let r = _mm_maskz_rcp14_pd(0, a);
         assert_eq_m128d(r, _mm_setzero_pd());
@@ -2829,7 +2834,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_rsqrt14_pd() {
+    fn test_mm512_rsqrt14_pd() {
         let a = _mm512_set1_pd(3.);
         let r = _mm512_rsqrt14_pd(a);
         let e = _mm512_set1_pd(0.5773391723632813);
@@ -2837,7 +2842,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_rsqrt14_pd() {
+    fn test_mm512_mask_rsqrt14_pd() {
         let a = _mm512_set1_pd(3.);
         let r = _mm512_mask_rsqrt14_pd(a, 0, a);
         assert_eq_m512d(r, a);
@@ -2851,7 +2856,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_rsqrt14_pd() {
+    fn test_mm512_maskz_rsqrt14_pd() {
         let a = _mm512_set1_pd(3.);
         let r = _mm512_maskz_rsqrt14_pd(0, a);
         assert_eq_m512d(r, _mm512_setzero_pd());
@@ -2865,7 +2870,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_rsqrt14_pd() {
+    fn test_mm256_rsqrt14_pd() {
         let a = _mm256_set1_pd(3.);
         let r = _mm256_rsqrt14_pd(a);
         let e = _mm256_set1_pd(0.5773391723632813);
@@ -2873,7 +2878,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_rsqrt14_pd() {
+    fn test_mm256_mask_rsqrt14_pd() {
         let a = _mm256_set1_pd(3.);
         let r = _mm256_mask_rsqrt14_pd(a, 0, a);
         assert_eq_m256d(r, a);
@@ -2883,7 +2888,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_rsqrt14_pd() {
+    fn test_mm256_maskz_rsqrt14_pd() {
         let a = _mm256_set1_pd(3.);
         let r = _mm256_maskz_rsqrt14_pd(0, a);
         assert_eq_m256d(r, _mm256_setzero_pd());
@@ -2893,7 +2898,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_rsqrt14_pd() {
+    fn test_mm_rsqrt14_pd() {
         let a = _mm_set1_pd(3.);
         let r = _mm_rsqrt14_pd(a);
         let e = _mm_set1_pd(0.5773391723632813);
@@ -2901,7 +2906,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_rsqrt14_pd() {
+    fn test_mm_mask_rsqrt14_pd() {
         let a = _mm_set1_pd(3.);
         let r = _mm_mask_rsqrt14_pd(a, 0, a);
         assert_eq_m128d(r, a);
@@ -2911,7 +2916,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_rsqrt14_pd() {
+    fn test_mm_maskz_rsqrt14_pd() {
         let a = _mm_set1_pd(3.);
         let r = _mm_maskz_rsqrt14_pd(0, a);
         assert_eq_m128d(r, _mm_setzero_pd());
@@ -2921,7 +2926,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_getexp_pd() {
+    fn test_mm512_getexp_pd() {
         let a = _mm512_set1_pd(3.);
         let r = _mm512_getexp_pd(a);
         let e = _mm512_set1_pd(1.);
@@ -2929,7 +2934,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_getexp_pd() {
+    fn test_mm512_mask_getexp_pd() {
         let a = _mm512_set1_pd(3.);
         let r = _mm512_mask_getexp_pd(a, 0, a);
         assert_eq_m512d(r, a);
@@ -2939,7 +2944,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_getexp_pd() {
+    fn test_mm512_maskz_getexp_pd() {
         let a = _mm512_set1_pd(3.);
         let r = _mm512_maskz_getexp_pd(0, a);
         assert_eq_m512d(r, _mm512_setzero_pd());
@@ -2949,7 +2954,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_getexp_pd() {
+    fn test_mm256_getexp_pd() {
         let a = _mm256_set1_pd(3.);
         let r = _mm256_getexp_pd(a);
         let e = _mm256_set1_pd(1.);
@@ -2957,7 +2962,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_getexp_pd() {
+    fn test_mm256_mask_getexp_pd() {
         let a = _mm256_set1_pd(3.);
         let r = _mm256_mask_getexp_pd(a, 0, a);
         assert_eq_m256d(r, a);
@@ -2967,7 +2972,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_getexp_pd() {
+    fn test_mm256_maskz_getexp_pd() {
         let a = _mm256_set1_pd(3.);
         let r = _mm256_maskz_getexp_pd(0, a);
         assert_eq_m256d(r, _mm256_setzero_pd());
@@ -2977,7 +2982,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_getexp_pd() {
+    fn test_mm_getexp_pd() {
         let a = _mm_set1_pd(3.);
         let r = _mm_getexp_pd(a);
         let e = _mm_set1_pd(1.);
@@ -2985,7 +2990,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_getexp_pd() {
+    fn test_mm_mask_getexp_pd() {
         let a = _mm_set1_pd(3.);
         let r = _mm_mask_getexp_pd(a, 0, a);
         assert_eq_m128d(r, a);
@@ -2995,7 +3000,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_getexp_pd() {
+    fn test_mm_maskz_getexp_pd() {
         let a = _mm_set1_pd(3.);
         let r = _mm_maskz_getexp_pd(0, a);
         assert_eq_m128d(r, _mm_setzero_pd());
@@ -3005,7 +3010,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_roundscale_pd() {
+    fn test_mm512_roundscale_pd() {
         let a = _mm512_set1_pd(1.1);
         let r = _mm512_roundscale_pd::<0b00_00_00_00>(a);
         let e = _mm512_set1_pd(1.0);
@@ -3013,7 +3018,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_roundscale_pd() {
+    fn test_mm512_mask_roundscale_pd() {
         let a = _mm512_set1_pd(1.1);
         let r = _mm512_mask_roundscale_pd::<0b00_00_00_00>(a, 0, a);
         let e = _mm512_set1_pd(1.1);
@@ -3024,7 +3029,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_roundscale_pd() {
+    fn test_mm512_maskz_roundscale_pd() {
         let a = _mm512_set1_pd(1.1);
         let r = _mm512_maskz_roundscale_pd::<0b00_00_00_00>(0, a);
         assert_eq_m512d(r, _mm512_setzero_pd());
@@ -3034,7 +3039,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_roundscale_pd() {
+    fn test_mm256_roundscale_pd() {
         let a = _mm256_set1_pd(1.1);
         let r = _mm256_roundscale_pd::<0b00_00_00_00>(a);
         let e = _mm256_set1_pd(1.0);
@@ -3042,7 +3047,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_roundscale_pd() {
+    fn test_mm256_mask_roundscale_pd() {
         let a = _mm256_set1_pd(1.1);
         let r = _mm256_mask_roundscale_pd::<0b00_00_00_00>(a, 0, a);
         assert_eq_m256d(r, a);
@@ -3052,7 +3057,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_roundscale_pd() {
+    fn test_mm256_maskz_roundscale_pd() {
         let a = _mm256_set1_pd(1.1);
         let r = _mm256_maskz_roundscale_pd::<0b00_00_00_00>(0, a);
         assert_eq_m256d(r, _mm256_setzero_pd());
@@ -3062,7 +3067,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_roundscale_pd() {
+    fn test_mm_roundscale_pd() {
         let a = _mm_set1_pd(1.1);
         let r = _mm_roundscale_pd::<0b00_00_00_00>(a);
         let e = _mm_set1_pd(1.0);
@@ -3070,7 +3075,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_roundscale_pd() {
+    fn test_mm_mask_roundscale_pd() {
         let a = _mm_set1_pd(1.1);
         let r = _mm_mask_roundscale_pd::<0b00_00_00_00>(a, 0, a);
         let e = _mm_set1_pd(1.1);
@@ -3081,7 +3086,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_roundscale_pd() {
+    fn test_mm_maskz_roundscale_pd() {
         let a = _mm_set1_pd(1.1);
         let r = _mm_maskz_roundscale_pd::<0b00_00_00_00>(0, a);
         assert_eq_m128d(r, _mm_setzero_pd());
@@ -3091,7 +3096,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_scalef_pd() {
+    fn test_mm512_scalef_pd() {
         let a = _mm512_set1_pd(1.);
         let b = _mm512_set1_pd(3.);
         let r = _mm512_scalef_pd(a, b);
@@ -3100,7 +3105,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_scalef_pd() {
+    fn test_mm512_mask_scalef_pd() {
         let a = _mm512_set1_pd(1.);
         let b = _mm512_set1_pd(3.);
         let r = _mm512_mask_scalef_pd(a, 0, a, b);
@@ -3111,7 +3116,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_scalef_pd() {
+    fn test_mm512_maskz_scalef_pd() {
         let a = _mm512_set1_pd(1.);
         let b = _mm512_set1_pd(3.);
         let r = _mm512_maskz_scalef_pd(0, a, b);
@@ -3122,7 +3127,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_scalef_pd() {
+    fn test_mm256_scalef_pd() {
         let a = _mm256_set1_pd(1.);
         let b = _mm256_set1_pd(3.);
         let r = _mm256_scalef_pd(a, b);
@@ -3131,7 +3136,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_scalef_pd() {
+    fn test_mm256_mask_scalef_pd() {
         let a = _mm256_set1_pd(1.);
         let b = _mm256_set1_pd(3.);
         let r = _mm256_mask_scalef_pd(a, 0, a, b);
@@ -3142,7 +3147,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_scalef_pd() {
+    fn test_mm256_maskz_scalef_pd() {
         let a = _mm256_set1_pd(1.);
         let b = _mm256_set1_pd(3.);
         let r = _mm256_maskz_scalef_pd(0, a, b);
@@ -3153,7 +3158,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_scalef_pd() {
+    fn test_mm_scalef_pd() {
         let a = _mm_set1_pd(1.);
         let b = _mm_set1_pd(3.);
         let r = _mm_scalef_pd(a, b);
@@ -3162,7 +3167,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_scalef_pd() {
+    fn test_mm_mask_scalef_pd() {
         let a = _mm_set1_pd(1.);
         let b = _mm_set1_pd(3.);
         let r = _mm_mask_scalef_pd(a, 0, a, b);
@@ -3173,7 +3178,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_scalef_pd() {
+    fn test_mm_maskz_scalef_pd() {
         let a = _mm_set1_pd(1.);
         let b = _mm_set1_pd(3.);
         let r = _mm_maskz_scalef_pd(0, a, b);
@@ -3184,7 +3189,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_fixupimm_pd() {
+    fn test_mm512_fixupimm_pd() {
         let a = _mm512_set1_pd(f64::NAN);
         let b = _mm512_set1_pd(f64::MAX);
         let c = _mm512_set1_epi64(i32::MAX as i64);
@@ -3194,7 +3199,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_fixupimm_pd() {
+    fn test_mm512_mask_fixupimm_pd() {
         let a = _mm512_set_pd(f64::NAN, f64::NAN, f64::NAN, f64::NAN, 1., 1., 1., 1.);
         let b = _mm512_set1_pd(f64::MAX);
         let c = _mm512_set1_epi64(i32::MAX as i64);
@@ -3204,7 +3209,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_fixupimm_pd() {
+    fn test_mm512_maskz_fixupimm_pd() {
         let a = _mm512_set_pd(f64::NAN, f64::NAN, f64::NAN, f64::NAN, 1., 1., 1., 1.);
         let b = _mm512_set1_pd(f64::MAX);
         let c = _mm512_set1_epi64(i32::MAX as i64);
@@ -3214,7 +3219,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_fixupimm_pd() {
+    fn test_mm256_fixupimm_pd() {
         let a = _mm256_set1_pd(f64::NAN);
         let b = _mm256_set1_pd(f64::MAX);
         let c = _mm256_set1_epi64x(i32::MAX as i64);
@@ -3224,7 +3229,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_fixupimm_pd() {
+    fn test_mm256_mask_fixupimm_pd() {
         let a = _mm256_set1_pd(f64::NAN);
         let b = _mm256_set1_pd(f64::MAX);
         let c = _mm256_set1_epi64x(i32::MAX as i64);
@@ -3234,7 +3239,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_fixupimm_pd() {
+    fn test_mm256_maskz_fixupimm_pd() {
         let a = _mm256_set1_pd(f64::NAN);
         let b = _mm256_set1_pd(f64::MAX);
         let c = _mm256_set1_epi64x(i32::MAX as i64);
@@ -3244,7 +3249,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_fixupimm_pd() {
+    fn test_mm_fixupimm_pd() {
         let a = _mm_set1_pd(f64::NAN);
         let b = _mm_set1_pd(f64::MAX);
         let c = _mm_set1_epi64x(i32::MAX as i64);
@@ -3254,7 +3259,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_fixupimm_pd() {
+    fn test_mm_mask_fixupimm_pd() {
         let a = _mm_set1_pd(f64::NAN);
         let b = _mm_set1_pd(f64::MAX);
         let c = _mm_set1_epi64x(i32::MAX as i64);
@@ -3264,7 +3269,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_fixupimm_pd() {
+    fn test_mm_maskz_fixupimm_pd() {
         let a = _mm_set1_pd(f64::NAN);
         let b = _mm_set1_pd(f64::MAX);
         let c = _mm_set1_epi64x(i32::MAX as i64);
@@ -3274,7 +3279,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_ternarylogic_epi64() {
+    fn test_mm512_ternarylogic_epi64() {
         let a = _mm512_set1_epi64(1 << 2);
         let b = _mm512_set1_epi64(1 << 1);
         let c = _mm512_set1_epi64(1 << 0);
@@ -3284,7 +3289,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_ternarylogic_epi64() {
+    fn test_mm512_mask_ternarylogic_epi64() {
         let src = _mm512_set1_epi64(1 << 2);
         let a = _mm512_set1_epi64(1 << 1);
         let b = _mm512_set1_epi64(1 << 0);
@@ -3296,7 +3301,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_ternarylogic_epi64() {
+    fn test_mm512_maskz_ternarylogic_epi64() {
         let a = _mm512_set1_epi64(1 << 2);
         let b = _mm512_set1_epi64(1 << 1);
         let c = _mm512_set1_epi64(1 << 0);
@@ -3308,7 +3313,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_ternarylogic_epi64() {
+    fn test_mm256_ternarylogic_epi64() {
         let a = _mm256_set1_epi64x(1 << 2);
         let b = _mm256_set1_epi64x(1 << 1);
         let c = _mm256_set1_epi64x(1 << 0);
@@ -3318,7 +3323,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_ternarylogic_epi64() {
+    fn test_mm256_mask_ternarylogic_epi64() {
         let src = _mm256_set1_epi64x(1 << 2);
         let a = _mm256_set1_epi64x(1 << 1);
         let b = _mm256_set1_epi64x(1 << 0);
@@ -3330,7 +3335,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_ternarylogic_epi64() {
+    fn test_mm256_maskz_ternarylogic_epi64() {
         let a = _mm256_set1_epi64x(1 << 2);
         let b = _mm256_set1_epi64x(1 << 1);
         let c = _mm256_set1_epi64x(1 << 0);
@@ -3342,7 +3347,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_ternarylogic_epi64() {
+    fn test_mm_ternarylogic_epi64() {
         let a = _mm_set1_epi64x(1 << 2);
         let b = _mm_set1_epi64x(1 << 1);
         let c = _mm_set1_epi64x(1 << 0);
@@ -3352,7 +3357,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_ternarylogic_epi64() {
+    fn test_mm_mask_ternarylogic_epi64() {
         let src = _mm_set1_epi64x(1 << 2);
         let a = _mm_set1_epi64x(1 << 1);
         let b = _mm_set1_epi64x(1 << 0);
@@ -3364,7 +3369,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_ternarylogic_epi64() {
+    fn test_mm_maskz_ternarylogic_epi64() {
         let a = _mm_set1_epi64x(1 << 2);
         let b = _mm_set1_epi64x(1 << 1);
         let c = _mm_set1_epi64x(1 << 0);
@@ -3376,7 +3381,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_getmant_pd() {
+    fn test_mm512_getmant_pd() {
         let a = _mm512_set1_pd(10.);
         let r = _mm512_getmant_pd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a);
         let e = _mm512_set1_pd(1.25);
@@ -3384,7 +3389,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_getmant_pd() {
+    fn test_mm512_mask_getmant_pd() {
         let a = _mm512_set1_pd(10.);
         let r = _mm512_mask_getmant_pd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0, a);
         assert_eq_m512d(r, a);
@@ -3394,7 +3399,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_getmant_pd() {
+    fn test_mm512_maskz_getmant_pd() {
         let a = _mm512_set1_pd(10.);
         let r = _mm512_maskz_getmant_pd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0, a);
         assert_eq_m512d(r, _mm512_setzero_pd());
@@ -3404,7 +3409,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_getmant_pd() {
+    fn test_mm256_getmant_pd() {
         let a = _mm256_set1_pd(10.);
         let r = _mm256_getmant_pd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a);
         let e = _mm256_set1_pd(1.25);
@@ -3412,7 +3417,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_getmant_pd() {
+    fn test_mm256_mask_getmant_pd() {
         let a = _mm256_set1_pd(10.);
         let r = _mm256_mask_getmant_pd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0, a);
         assert_eq_m256d(r, a);
@@ -3422,7 +3427,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_getmant_pd() {
+    fn test_mm256_maskz_getmant_pd() {
         let a = _mm256_set1_pd(10.);
         let r = _mm256_maskz_getmant_pd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0, a);
         assert_eq_m256d(r, _mm256_setzero_pd());
@@ -3432,7 +3437,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_getmant_pd() {
+    fn test_mm_getmant_pd() {
         let a = _mm_set1_pd(10.);
         let r = _mm_getmant_pd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a);
         let e = _mm_set1_pd(1.25);
@@ -3440,7 +3445,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_getmant_pd() {
+    fn test_mm_mask_getmant_pd() {
         let a = _mm_set1_pd(10.);
         let r = _mm_mask_getmant_pd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0, a);
         assert_eq_m128d(r, a);
@@ -3450,7 +3455,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_getmant_pd() {
+    fn test_mm_maskz_getmant_pd() {
         let a = _mm_set1_pd(10.);
         let r = _mm_maskz_getmant_pd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0, a);
         assert_eq_m128d(r, _mm_setzero_pd());
@@ -3460,7 +3465,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cvtps_pd() {
+    fn test_mm512_cvtps_pd() {
         let a = _mm256_setr_ps(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5);
         let r = _mm512_cvtps_pd(a);
         let e = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5);
@@ -3468,7 +3473,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cvtps_pd() {
+    fn test_mm512_mask_cvtps_pd() {
         let a = _mm256_setr_ps(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5);
         let src = _mm512_set1_pd(0.);
         let r = _mm512_mask_cvtps_pd(src, 0, a);
@@ -3479,7 +3484,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_cvtps_pd() {
+    fn test_mm512_maskz_cvtps_pd() {
         let a = _mm256_setr_ps(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5);
         let r = _mm512_maskz_cvtps_pd(0, a);
         assert_eq_m512d(r, _mm512_setzero_pd());
@@ -3489,7 +3494,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cvtpslo_pd() {
+    fn test_mm512_cvtpslo_pd() {
         let v2 = _mm512_setr_ps(
             0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 100., 100., 100., 100., 100., 100., 100., 100.,
         );
@@ -3499,7 +3504,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cvtpslo_pd() {
+    fn test_mm512_mask_cvtpslo_pd() {
         let v2 = _mm512_setr_ps(
             0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 100., 100., 100., 100., 100., 100., 100., 100.,
         );
@@ -3512,7 +3517,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cvtpd_ps() {
+    fn test_mm512_cvtpd_ps() {
         let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5);
         let r = _mm512_cvtpd_ps(a);
         let e = _mm256_setr_ps(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5);
@@ -3520,7 +3525,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cvtpd_ps() {
+    fn test_mm512_mask_cvtpd_ps() {
         let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5);
         let src = _mm256_set1_ps(0.);
         let r = _mm512_mask_cvtpd_ps(src, 0, a);
@@ -3531,7 +3536,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_cvtpd_ps() {
+    fn test_mm512_maskz_cvtpd_ps() {
         let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5);
         let r = _mm512_maskz_cvtpd_ps(0, a);
         assert_eq_m256(r, _mm256_setzero_ps());
@@ -3541,7 +3546,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_cvtpd_ps() {
+    fn test_mm256_mask_cvtpd_ps() {
         let a = _mm256_set_pd(4., -5.5, 6., -7.5);
         let src = _mm_set1_ps(0.);
         let r = _mm256_mask_cvtpd_ps(src, 0, a);
@@ -3552,7 +3557,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_cvtpd_ps() {
+    fn test_mm256_maskz_cvtpd_ps() {
         let a = _mm256_set_pd(4., -5.5, 6., -7.5);
         let r = _mm256_maskz_cvtpd_ps(0, a);
         assert_eq_m128(r, _mm_setzero_ps());
@@ -3562,7 +3567,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_cvtpd_ps() {
+    fn test_mm_mask_cvtpd_ps() {
         let a = _mm_set_pd(6., -7.5);
         let src = _mm_set1_ps(0.);
         let r = _mm_mask_cvtpd_ps(src, 0, a);
@@ -3573,7 +3578,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_cvtpd_ps() {
+    fn test_mm_maskz_cvtpd_ps() {
         let a = _mm_set_pd(6., -7.5);
         let r = _mm_maskz_cvtpd_ps(0, a);
         assert_eq_m128(r, _mm_setzero_ps());
@@ -3583,7 +3588,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cvtpd_epi32() {
+    fn test_mm512_cvtpd_epi32() {
         let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5);
         let r = _mm512_cvtpd_epi32(a);
         let e = _mm256_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8);
@@ -3591,7 +3596,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cvtpd_epi32() {
+    fn test_mm512_mask_cvtpd_epi32() {
         let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5);
         let src = _mm256_set1_epi32(0);
         let r = _mm512_mask_cvtpd_epi32(src, 0, a);
@@ -3602,7 +3607,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_cvtpd_epi32() {
+    fn test_mm512_maskz_cvtpd_epi32() {
         let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5);
         let r = _mm512_maskz_cvtpd_epi32(0, a);
         assert_eq_m256i(r, _mm256_setzero_si256());
@@ -3612,7 +3617,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_cvtpd_epi32() {
+    fn test_mm256_mask_cvtpd_epi32() {
         let a = _mm256_set_pd(4., -5.5, 6., -7.5);
         let src = _mm_set1_epi32(0);
         let r = _mm256_mask_cvtpd_epi32(src, 0, a);
@@ -3623,7 +3628,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_cvtpd_epi32() {
+    fn test_mm256_maskz_cvtpd_epi32() {
         let a = _mm256_set_pd(4., -5.5, 6., -7.5);
         let r = _mm256_maskz_cvtpd_epi32(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -3633,7 +3638,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_cvtpd_epi32() {
+    fn test_mm_mask_cvtpd_epi32() {
         let a = _mm_set_pd(6., -7.5);
         let src = _mm_set1_epi32(0);
         let r = _mm_mask_cvtpd_epi32(src, 0, a);
@@ -3644,7 +3649,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_cvtpd_epi32() {
+    fn test_mm_maskz_cvtpd_epi32() {
         let a = _mm_set_pd(6., -7.5);
         let r = _mm_maskz_cvtpd_epi32(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -3654,7 +3659,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cvtpd_epu32() {
+    fn test_mm512_cvtpd_epu32() {
         let a = _mm512_setr_pd(0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5);
         let r = _mm512_cvtpd_epu32(a);
         let e = _mm256_setr_epi32(0, 2, 2, 4, 4, 6, 6, 8);
@@ -3662,7 +3667,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cvtpd_epu32() {
+    fn test_mm512_mask_cvtpd_epu32() {
         let a = _mm512_setr_pd(0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5);
         let src = _mm256_set1_epi32(0);
         let r = _mm512_mask_cvtpd_epu32(src, 0, a);
@@ -3673,7 +3678,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_cvtpd_epu32() {
+    fn test_mm512_maskz_cvtpd_epu32() {
         let a = _mm512_setr_pd(0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5);
         let r = _mm512_maskz_cvtpd_epu32(0, a);
         assert_eq_m256i(r, _mm256_setzero_si256());
@@ -3683,7 +3688,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_cvtpd_epu32() {
+    fn test_mm256_cvtpd_epu32() {
         let a = _mm256_set_pd(4., 5.5, 6., 7.5);
         let r = _mm256_cvtpd_epu32(a);
         let e = _mm_set_epi32(4, 6, 6, 8);
@@ -3691,7 +3696,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_cvtpd_epu32() {
+    fn test_mm256_mask_cvtpd_epu32() {
         let a = _mm256_set_pd(4., 5.5, 6., 7.5);
         let src = _mm_set1_epi32(0);
         let r = _mm256_mask_cvtpd_epu32(src, 0, a);
@@ -3702,7 +3707,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_cvtpd_epu32() {
+    fn test_mm256_maskz_cvtpd_epu32() {
         let a = _mm256_set_pd(4., 5.5, 6., 7.5);
         let r = _mm256_maskz_cvtpd_epu32(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -3712,7 +3717,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_cvtpd_epu32() {
+    fn test_mm_cvtpd_epu32() {
         let a = _mm_set_pd(6., 7.5);
         let r = _mm_cvtpd_epu32(a);
         let e = _mm_set_epi32(0, 0, 6, 8);
@@ -3720,7 +3725,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_cvtpd_epu32() {
+    fn test_mm_mask_cvtpd_epu32() {
         let a = _mm_set_pd(6., 7.5);
         let src = _mm_set1_epi32(0);
         let r = _mm_mask_cvtpd_epu32(src, 0, a);
@@ -3731,7 +3736,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_cvtpd_epu32() {
+    fn test_mm_maskz_cvtpd_epu32() {
         let a = _mm_set_pd(6., 7.5);
         let r = _mm_maskz_cvtpd_epu32(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -3741,7 +3746,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cvtpd_pslo() {
+    fn test_mm512_cvtpd_pslo() {
         let v2 = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5);
         let r = _mm512_cvtpd_pslo(v2);
         let e = _mm512_setr_ps(
@@ -3751,7 +3756,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cvtpd_pslo() {
+    fn test_mm512_mask_cvtpd_pslo() {
         let v2 = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5);
         let src = _mm512_set1_ps(0.);
         let r = _mm512_mask_cvtpd_pslo(src, 0, v2);
@@ -3764,7 +3769,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cvtepi8_epi64() {
+    const fn test_mm512_cvtepi8_epi64() {
         let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let r = _mm512_cvtepi8_epi64(a);
         let e = _mm512_set_epi64(8, 9, 10, 11, 12, 13, 14, 15);
@@ -3772,7 +3777,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cvtepi8_epi64() {
+    const fn test_mm512_mask_cvtepi8_epi64() {
         let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let src = _mm512_set1_epi64(-1);
         let r = _mm512_mask_cvtepi8_epi64(src, 0, a);
@@ -3783,7 +3788,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_cvtepi8_epi64() {
+    const fn test_mm512_maskz_cvtepi8_epi64() {
         let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let r = _mm512_maskz_cvtepi8_epi64(0, a);
         assert_eq_m512i(r, _mm512_setzero_si512());
@@ -3793,7 +3798,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_cvtepi8_epi64() {
+    const fn test_mm256_mask_cvtepi8_epi64() {
         let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let src = _mm256_set1_epi64x(-1);
         let r = _mm256_mask_cvtepi8_epi64(src, 0, a);
@@ -3804,7 +3809,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_cvtepi8_epi64() {
+    const fn test_mm256_maskz_cvtepi8_epi64() {
         let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let r = _mm256_maskz_cvtepi8_epi64(0, a);
         assert_eq_m256i(r, _mm256_setzero_si256());
@@ -3814,7 +3819,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_cvtepi8_epi64() {
+    const fn test_mm_mask_cvtepi8_epi64() {
         let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let src = _mm_set1_epi64x(-1);
         let r = _mm_mask_cvtepi8_epi64(src, 0, a);
@@ -3825,7 +3830,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_cvtepi8_epi64() {
+    const fn test_mm_maskz_cvtepi8_epi64() {
         let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let r = _mm_maskz_cvtepi8_epi64(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -3835,7 +3840,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cvtepu8_epi64() {
+    const fn test_mm512_cvtepu8_epi64() {
         let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let r = _mm512_cvtepu8_epi64(a);
         let e = _mm512_set_epi64(8, 9, 10, 11, 12, 13, 14, 15);
@@ -3843,7 +3848,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cvtepu8_epi64() {
+    const fn test_mm512_mask_cvtepu8_epi64() {
         let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let src = _mm512_set1_epi64(-1);
         let r = _mm512_mask_cvtepu8_epi64(src, 0, a);
@@ -3854,7 +3859,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_cvtepu8_epi64() {
+    const fn test_mm512_maskz_cvtepu8_epi64() {
         let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let r = _mm512_maskz_cvtepu8_epi64(0, a);
         assert_eq_m512i(r, _mm512_setzero_si512());
@@ -3864,7 +3869,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_cvtepu8_epi64() {
+    const fn test_mm256_mask_cvtepu8_epi64() {
         let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let src = _mm256_set1_epi64x(-1);
         let r = _mm256_mask_cvtepu8_epi64(src, 0, a);
@@ -3875,7 +3880,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_cvtepu8_epi64() {
+    const fn test_mm256_maskz_cvtepu8_epi64() {
         let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let r = _mm256_maskz_cvtepu8_epi64(0, a);
         assert_eq_m256i(r, _mm256_setzero_si256());
@@ -3885,7 +3890,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_cvtepu8_epi64() {
+    const fn test_mm_mask_cvtepu8_epi64() {
         let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let src = _mm_set1_epi64x(-1);
         let r = _mm_mask_cvtepu8_epi64(src, 0, a);
@@ -3896,7 +3901,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_cvtepu8_epi64() {
+    const fn test_mm_maskz_cvtepu8_epi64() {
         let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let r = _mm_maskz_cvtepu8_epi64(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -3906,7 +3911,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cvtepi16_epi64() {
+    const fn test_mm512_cvtepi16_epi64() {
         let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
         let r = _mm512_cvtepi16_epi64(a);
         let e = _mm512_set_epi64(8, 9, 10, 11, 12, 13, 14, 15);
@@ -3914,7 +3919,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cvtepi16_epi64() {
+    const fn test_mm512_mask_cvtepi16_epi64() {
         let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
         let src = _mm512_set1_epi64(-1);
         let r = _mm512_mask_cvtepi16_epi64(src, 0, a);
@@ -3925,7 +3930,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_cvtepi16_epi64() {
+    const fn test_mm512_maskz_cvtepi16_epi64() {
         let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
         let r = _mm512_maskz_cvtepi16_epi64(0, a);
         assert_eq_m512i(r, _mm512_setzero_si512());
@@ -3935,7 +3940,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_cvtepi16_epi64() {
+    const fn test_mm256_mask_cvtepi16_epi64() {
         let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
         let src = _mm256_set1_epi64x(-1);
         let r = _mm256_mask_cvtepi16_epi64(src, 0, a);
@@ -3946,7 +3951,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_cvtepi16_epi64() {
+    const fn test_mm256_maskz_cvtepi16_epi64() {
         let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
         let r = _mm256_maskz_cvtepi16_epi64(0, a);
         assert_eq_m256i(r, _mm256_setzero_si256());
@@ -3956,7 +3961,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_cvtepi16_epi64() {
+    const fn test_mm_mask_cvtepi16_epi64() {
         let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
         let src = _mm_set1_epi64x(-1);
         let r = _mm_mask_cvtepi16_epi64(src, 0, a);
@@ -3967,7 +3972,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_cvtepi16_epi64() {
+    const fn test_mm_maskz_cvtepi16_epi64() {
         let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
         let r = _mm_maskz_cvtepi16_epi64(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -3977,7 +3982,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cvtepu16_epi64() {
+    const fn test_mm512_cvtepu16_epi64() {
         let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
         let r = _mm512_cvtepu16_epi64(a);
         let e = _mm512_set_epi64(8, 9, 10, 11, 12, 13, 14, 15);
@@ -3985,7 +3990,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cvtepu16_epi64() {
+    const fn test_mm512_mask_cvtepu16_epi64() {
         let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
         let src = _mm512_set1_epi64(-1);
         let r = _mm512_mask_cvtepu16_epi64(src, 0, a);
@@ -3996,7 +4001,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_cvtepu16_epi64() {
+    const fn test_mm512_maskz_cvtepu16_epi64() {
         let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
         let r = _mm512_maskz_cvtepu16_epi64(0, a);
         assert_eq_m512i(r, _mm512_setzero_si512());
@@ -4006,7 +4011,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_cvtepu16_epi64() {
+    const fn test_mm256_mask_cvtepu16_epi64() {
         let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
         let src = _mm256_set1_epi64x(-1);
         let r = _mm256_mask_cvtepu16_epi64(src, 0, a);
@@ -4017,7 +4022,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_cvtepu16_epi64() {
+    const fn test_mm256_maskz_cvtepu16_epi64() {
         let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
         let r = _mm256_maskz_cvtepu16_epi64(0, a);
         assert_eq_m256i(r, _mm256_setzero_si256());
@@ -4027,7 +4032,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_cvtepu16_epi64() {
+    const fn test_mm_mask_cvtepu16_epi64() {
         let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
         let src = _mm_set1_epi64x(-1);
         let r = _mm_mask_cvtepu16_epi64(src, 0, a);
@@ -4038,7 +4043,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_cvtepu16_epi64() {
+    const fn test_mm_maskz_cvtepu16_epi64() {
         let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
         let r = _mm_maskz_cvtepu16_epi64(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -4048,7 +4053,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cvtepi32_epi64() {
+    const fn test_mm512_cvtepi32_epi64() {
         let a = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
         let r = _mm512_cvtepi32_epi64(a);
         let e = _mm512_set_epi64(8, 9, 10, 11, 12, 13, 14, 15);
@@ -4056,7 +4061,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cvtepi32_epi64() {
+    const fn test_mm512_mask_cvtepi32_epi64() {
         let a = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
         let src = _mm512_set1_epi64(-1);
         let r = _mm512_mask_cvtepi32_epi64(src, 0, a);
@@ -4067,7 +4072,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_cvtepi32_epi64() {
+    const fn test_mm512_maskz_cvtepi32_epi64() {
         let a = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
         let r = _mm512_maskz_cvtepi32_epi64(0, a);
         assert_eq_m512i(r, _mm512_setzero_si512());
@@ -4077,7 +4082,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_cvtepi32_epi64() {
+    const fn test_mm256_mask_cvtepi32_epi64() {
         let a = _mm_set_epi32(8, 9, 10, 11);
         let src = _mm256_set1_epi64x(-1);
         let r = _mm256_mask_cvtepi32_epi64(src, 0, a);
@@ -4088,7 +4093,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_cvtepi32_epi64() {
+    const fn test_mm256_maskz_cvtepi32_epi64() {
         let a = _mm_set_epi32(8, 9, 10, 11);
         let r = _mm256_maskz_cvtepi32_epi64(0, a);
         assert_eq_m256i(r, _mm256_setzero_si256());
@@ -4098,7 +4103,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_cvtepi32_epi64() {
+    const fn test_mm_mask_cvtepi32_epi64() {
         let a = _mm_set_epi32(8, 9, 10, 11);
         let src = _mm_set1_epi64x(0);
         let r = _mm_mask_cvtepi32_epi64(src, 0, a);
@@ -4109,7 +4114,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_cvtepi32_epi64() {
+    const fn test_mm_maskz_cvtepi32_epi64() {
         let a = _mm_set_epi32(8, 9, 10, 11);
         let r = _mm_maskz_cvtepi32_epi64(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -4119,7 +4124,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cvtepu32_epi64() {
+    const fn test_mm512_cvtepu32_epi64() {
         let a = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
         let r = _mm512_cvtepu32_epi64(a);
         let e = _mm512_set_epi64(8, 9, 10, 11, 12, 13, 14, 15);
@@ -4127,7 +4132,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cvtepu32_epi64() {
+    const fn test_mm512_mask_cvtepu32_epi64() {
         let a = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
         let src = _mm512_set1_epi64(-1);
         let r = _mm512_mask_cvtepu32_epi64(src, 0, a);
@@ -4138,7 +4143,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_cvtepu32_epi64() {
+    const fn test_mm512_maskz_cvtepu32_epi64() {
         let a = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
         let r = _mm512_maskz_cvtepu32_epi64(0, a);
         assert_eq_m512i(r, _mm512_setzero_si512());
@@ -4148,7 +4153,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_cvtepu32_epi64() {
+    const fn test_mm256_mask_cvtepu32_epi64() {
         let a = _mm_set_epi32(12, 13, 14, 15);
         let src = _mm256_set1_epi64x(-1);
         let r = _mm256_mask_cvtepu32_epi64(src, 0, a);
@@ -4159,7 +4164,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_cvtepu32_epi64() {
+    const fn test_mm256_maskz_cvtepu32_epi64() {
         let a = _mm_set_epi32(12, 13, 14, 15);
         let r = _mm256_maskz_cvtepu32_epi64(0, a);
         assert_eq_m256i(r, _mm256_setzero_si256());
@@ -4169,7 +4174,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_cvtepu32_epi64() {
+    const fn test_mm_mask_cvtepu32_epi64() {
         let a = _mm_set_epi32(12, 13, 14, 15);
         let src = _mm_set1_epi64x(-1);
         let r = _mm_mask_cvtepu32_epi64(src, 0, a);
@@ -4180,7 +4185,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_cvtepu32_epi64() {
+    const fn test_mm_maskz_cvtepu32_epi64() {
         let a = _mm_set_epi32(12, 13, 14, 15);
         let r = _mm_maskz_cvtepu32_epi64(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -4190,7 +4195,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cvtepi32_pd() {
+    const fn test_mm512_cvtepi32_pd() {
         let a = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
         let r = _mm512_cvtepi32_pd(a);
         let e = _mm512_set_pd(8., 9., 10., 11., 12., 13., 14., 15.);
@@ -4198,7 +4203,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cvtepi32_pd() {
+    const fn test_mm512_mask_cvtepi32_pd() {
         let a = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
         let src = _mm512_set1_pd(-1.);
         let r = _mm512_mask_cvtepi32_pd(src, 0, a);
@@ -4209,7 +4214,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_cvtepi32_pd() {
+    const fn test_mm512_maskz_cvtepi32_pd() {
         let a = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
         let r = _mm512_maskz_cvtepi32_pd(0, a);
         assert_eq_m512d(r, _mm512_setzero_pd());
@@ -4219,7 +4224,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_cvtepi32_pd() {
+    const fn test_mm256_mask_cvtepi32_pd() {
         let a = _mm_set_epi32(12, 13, 14, 15);
         let src = _mm256_set1_pd(-1.);
         let r = _mm256_mask_cvtepi32_pd(src, 0, a);
@@ -4230,7 +4235,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_cvtepi32_pd() {
+    const fn test_mm256_maskz_cvtepi32_pd() {
         let a = _mm_set_epi32(12, 13, 14, 15);
         let r = _mm256_maskz_cvtepi32_pd(0, a);
         assert_eq_m256d(r, _mm256_setzero_pd());
@@ -4240,7 +4245,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_cvtepi32_pd() {
+    const fn test_mm_mask_cvtepi32_pd() {
         let a = _mm_set_epi32(12, 13, 14, 15);
         let src = _mm_set1_pd(-1.);
         let r = _mm_mask_cvtepi32_pd(src, 0, a);
@@ -4251,7 +4256,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_cvtepi32_pd() {
+    const fn test_mm_maskz_cvtepi32_pd() {
         let a = _mm_set_epi32(12, 13, 14, 15);
         let r = _mm_maskz_cvtepi32_pd(0, a);
         assert_eq_m128d(r, _mm_setzero_pd());
@@ -4261,7 +4266,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cvtepu32_pd() {
+    const fn test_mm512_cvtepu32_pd() {
         let a = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
         let r = _mm512_cvtepu32_pd(a);
         let e = _mm512_set_pd(8., 9., 10., 11., 12., 13., 14., 15.);
@@ -4269,7 +4274,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cvtepu32_pd() {
+    const fn test_mm512_mask_cvtepu32_pd() {
         let a = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
         let src = _mm512_set1_pd(-1.);
         let r = _mm512_mask_cvtepu32_pd(src, 0, a);
@@ -4280,7 +4285,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_cvtepu32_pd() {
+    const fn test_mm512_maskz_cvtepu32_pd() {
         let a = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
         let r = _mm512_maskz_cvtepu32_pd(0, a);
         assert_eq_m512d(r, _mm512_setzero_pd());
@@ -4290,7 +4295,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_cvtepu32_pd() {
+    const fn test_mm256_cvtepu32_pd() {
         let a = _mm_set_epi32(12, 13, 14, 15);
         let r = _mm256_cvtepu32_pd(a);
         let e = _mm256_set_pd(12., 13., 14., 15.);
@@ -4298,7 +4303,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_cvtepu32_pd() {
+    const fn test_mm256_mask_cvtepu32_pd() {
         let a = _mm_set_epi32(12, 13, 14, 15);
         let src = _mm256_set1_pd(-1.);
         let r = _mm256_mask_cvtepu32_pd(src, 0, a);
@@ -4309,7 +4314,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_cvtepu32_pd() {
+    const fn test_mm256_maskz_cvtepu32_pd() {
         let a = _mm_set_epi32(12, 13, 14, 15);
         let r = _mm256_maskz_cvtepu32_pd(0, a);
         assert_eq_m256d(r, _mm256_setzero_pd());
@@ -4319,7 +4324,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_cvtepu32_pd() {
+    const fn test_mm_cvtepu32_pd() {
         let a = _mm_set_epi32(12, 13, 14, 15);
         let r = _mm_cvtepu32_pd(a);
         let e = _mm_set_pd(14., 15.);
@@ -4327,7 +4332,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_cvtepu32_pd() {
+    const fn test_mm_mask_cvtepu32_pd() {
         let a = _mm_set_epi32(12, 13, 14, 15);
         let src = _mm_set1_pd(-1.);
         let r = _mm_mask_cvtepu32_pd(src, 0, a);
@@ -4338,7 +4343,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_cvtepu32_pd() {
+    const fn test_mm_maskz_cvtepu32_pd() {
         let a = _mm_set_epi32(12, 13, 14, 15);
         let r = _mm_maskz_cvtepu32_pd(0, a);
         assert_eq_m128d(r, _mm_setzero_pd());
@@ -4348,7 +4353,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cvtepi32lo_pd() {
+    const fn test_mm512_cvtepi32lo_pd() {
         let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let r = _mm512_cvtepi32lo_pd(a);
         let e = _mm512_set_pd(8., 9., 10., 11., 12., 13., 14., 15.);
@@ -4356,7 +4361,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cvtepi32lo_pd() {
+    const fn test_mm512_mask_cvtepi32lo_pd() {
         let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let src = _mm512_set1_pd(-1.);
         let r = _mm512_mask_cvtepi32lo_pd(src, 0, a);
@@ -4367,7 +4372,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cvtepu32lo_pd() {
+    const fn test_mm512_cvtepu32lo_pd() {
         let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let r = _mm512_cvtepu32lo_pd(a);
         let e = _mm512_set_pd(8., 9., 10., 11., 12., 13., 14., 15.);
@@ -4375,7 +4380,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cvtepu32lo_pd() {
+    const fn test_mm512_mask_cvtepu32lo_pd() {
         let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         let src = _mm512_set1_pd(-1.);
         let r = _mm512_mask_cvtepu32lo_pd(src, 0, a);
@@ -4386,7 +4391,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cvtepi64_epi32() {
+    const fn test_mm512_cvtepi64_epi32() {
         let a = _mm512_set_epi64(8, 9, 10, 11, 12, 13, 14, 15);
         let r = _mm512_cvtepi64_epi32(a);
         let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
@@ -4394,7 +4399,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cvtepi64_epi32() {
+    const fn test_mm512_mask_cvtepi64_epi32() {
         let a = _mm512_set_epi64(8, 9, 10, 11, 12, 13, 14, 15);
         let src = _mm256_set1_epi32(-1);
         let r = _mm512_mask_cvtepi64_epi32(src, 0, a);
@@ -4405,7 +4410,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_cvtepi64_epi32() {
+    const fn test_mm512_maskz_cvtepi64_epi32() {
         let a = _mm512_set_epi64(8, 9, 10, 11, 12, 13, 14, 15);
         let r = _mm512_maskz_cvtepi64_epi32(0, a);
         assert_eq_m256i(r, _mm256_setzero_si256());
@@ -4415,7 +4420,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_cvtepi64_epi32() {
+    const fn test_mm256_cvtepi64_epi32() {
         let a = _mm256_set_epi64x(1, 2, 3, 4);
         let r = _mm256_cvtepi64_epi32(a);
         let e = _mm_set_epi32(1, 2, 3, 4);
@@ -4423,7 +4428,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_cvtepi64_epi32() {
+    const fn test_mm256_mask_cvtepi64_epi32() {
         let a = _mm256_set_epi64x(1, 2, 3, 4);
         let src = _mm_set1_epi32(0);
         let r = _mm256_mask_cvtepi64_epi32(src, 0, a);
@@ -4434,7 +4439,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_cvtepi64_epi32() {
+    const fn test_mm256_maskz_cvtepi64_epi32() {
         let a = _mm256_set_epi64x(1, 2, 3, 4);
         let r = _mm256_maskz_cvtepi64_epi32(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -4444,7 +4449,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_cvtepi64_epi32() {
+    fn test_mm_cvtepi64_epi32() {
         let a = _mm_set_epi64x(3, 4);
         let r = _mm_cvtepi64_epi32(a);
         let e = _mm_set_epi32(0, 0, 3, 4);
@@ -4452,7 +4457,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_cvtepi64_epi32() {
+    fn test_mm_mask_cvtepi64_epi32() {
         let a = _mm_set_epi64x(3, 4);
         let src = _mm_set1_epi32(0);
         let r = _mm_mask_cvtepi64_epi32(src, 0, a);
@@ -4463,7 +4468,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_cvtepi64_epi32() {
+    fn test_mm_maskz_cvtepi64_epi32() {
         let a = _mm_set_epi64x(3, 4);
         let r = _mm_maskz_cvtepi64_epi32(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -4473,7 +4478,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cvtepi64_epi16() {
+    const fn test_mm512_cvtepi64_epi16() {
         let a = _mm512_set_epi64(8, 9, 10, 11, 12, 13, 14, 15);
         let r = _mm512_cvtepi64_epi16(a);
         let e = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
@@ -4481,7 +4486,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cvtepi64_epi16() {
+    const fn test_mm512_mask_cvtepi64_epi16() {
         let a = _mm512_set_epi64(8, 9, 10, 11, 12, 13, 14, 15);
         let src = _mm_set1_epi16(-1);
         let r = _mm512_mask_cvtepi64_epi16(src, 0, a);
@@ -4492,7 +4497,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_cvtepi64_epi16() {
+    const fn test_mm512_maskz_cvtepi64_epi16() {
         let a = _mm512_set_epi64(8, 9, 10, 11, 12, 13, 14, 15);
         let r = _mm512_maskz_cvtepi64_epi16(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -4502,7 +4507,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_cvtepi64_epi16() {
+    fn test_mm256_cvtepi64_epi16() {
         let a = _mm256_set_epi64x(12, 13, 14, 15);
         let r = _mm256_cvtepi64_epi16(a);
         let e = _mm_set_epi16(0, 0, 0, 0, 12, 13, 14, 15);
@@ -4510,7 +4515,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_cvtepi64_epi16() {
+    fn test_mm256_mask_cvtepi64_epi16() {
         let a = _mm256_set_epi64x(12, 13, 14, 15);
         let src = _mm_set1_epi16(0);
         let r = _mm256_mask_cvtepi64_epi16(src, 0, a);
@@ -4521,7 +4526,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_cvtepi64_epi16() {
+    fn test_mm256_maskz_cvtepi64_epi16() {
         let a = _mm256_set_epi64x(12, 13, 14, 15);
         let r = _mm256_maskz_cvtepi64_epi16(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -4531,7 +4536,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_cvtepi64_epi16() {
+    fn test_mm_cvtepi64_epi16() {
         let a = _mm_set_epi64x(14, 15);
         let r = _mm_cvtepi64_epi16(a);
         let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 14, 15);
@@ -4539,7 +4544,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_cvtepi64_epi16() {
+    fn test_mm_mask_cvtepi64_epi16() {
         let a = _mm_set_epi64x(14, 15);
         let src = _mm_set1_epi16(0);
         let r = _mm_mask_cvtepi64_epi16(src, 0, a);
@@ -4550,7 +4555,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_cvtepi64_epi16() {
+    fn test_mm_maskz_cvtepi64_epi16() {
         let a = _mm_set_epi64x(14, 15);
         let r = _mm_maskz_cvtepi64_epi16(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -4560,7 +4565,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cvtepi64_epi8() {
+    fn test_mm512_cvtepi64_epi8() {
         let a = _mm512_set_epi64(8, 9, 10, 11, 12, 13, 14, 15);
         let r = _mm512_cvtepi64_epi8(a);
         let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
@@ -4568,7 +4573,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cvtepi64_epi8() {
+    fn test_mm512_mask_cvtepi64_epi8() {
         let a = _mm512_set_epi64(8, 9, 10, 11, 12, 13, 14, 15);
         let src = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
         let r = _mm512_mask_cvtepi64_epi8(src, 0, a);
@@ -4579,7 +4584,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_cvtepi64_epi8() {
+    fn test_mm512_maskz_cvtepi64_epi8() {
         let a = _mm512_set_epi64(8, 9, 10, 11, 12, 13, 14, 15);
         let r = _mm512_maskz_cvtepi64_epi8(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -4589,7 +4594,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_cvtepi64_epi8() {
+    fn test_mm256_cvtepi64_epi8() {
         let a = _mm256_set_epi64x(12, 13, 14, 15);
         let r = _mm256_cvtepi64_epi8(a);
         let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12, 13, 14, 15);
@@ -4597,7 +4602,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_cvtepi64_epi8() {
+    fn test_mm256_mask_cvtepi64_epi8() {
         let a = _mm256_set_epi64x(12, 13, 14, 15);
         let src = _mm_set1_epi8(0);
         let r = _mm256_mask_cvtepi64_epi8(src, 0, a);
@@ -4608,7 +4613,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_cvtepi64_epi8() {
+    fn test_mm256_maskz_cvtepi64_epi8() {
         let a = _mm256_set_epi64x(12, 13, 14, 15);
         let r = _mm256_maskz_cvtepi64_epi8(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -4618,7 +4623,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_cvtepi64_epi8() {
+    fn test_mm_cvtepi64_epi8() {
         let a = _mm_set_epi64x(14, 15);
         let r = _mm_cvtepi64_epi8(a);
         let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, 15);
@@ -4626,7 +4631,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_cvtepi64_epi8() {
+    fn test_mm_mask_cvtepi64_epi8() {
         let a = _mm_set_epi64x(14, 15);
         let src = _mm_set1_epi8(0);
         let r = _mm_mask_cvtepi64_epi8(src, 0, a);
@@ -4637,7 +4642,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_cvtepi64_epi8() {
+    fn test_mm_maskz_cvtepi64_epi8() {
         let a = _mm_set_epi64x(14, 15);
         let r = _mm_maskz_cvtepi64_epi8(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -4647,7 +4652,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cvtsepi64_epi32() {
+    fn test_mm512_cvtsepi64_epi32() {
         let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, i64::MIN, i64::MAX);
         let r = _mm512_cvtsepi64_epi32(a);
         let e = _mm256_set_epi32(0, 1, 2, 3, 4, 5, i32::MIN, i32::MAX);
@@ -4655,7 +4660,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cvtsepi64_epi32() {
+    fn test_mm512_mask_cvtsepi64_epi32() {
         let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, i64::MIN, i64::MAX);
         let src = _mm256_set1_epi32(-1);
         let r = _mm512_mask_cvtsepi64_epi32(src, 0, a);
@@ -4666,7 +4671,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_cvtsepi64_epi32() {
+    fn test_mm512_maskz_cvtsepi64_epi32() {
         let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, i64::MIN, i64::MAX);
         let r = _mm512_maskz_cvtsepi64_epi32(0, a);
         assert_eq_m256i(r, _mm256_setzero_si256());
@@ -4676,7 +4681,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_cvtsepi64_epi32() {
+    fn test_mm256_cvtsepi64_epi32() {
         let a = _mm256_set_epi64x(4, 5, i64::MIN, i64::MAX);
         let r = _mm256_cvtsepi64_epi32(a);
         let e = _mm_set_epi32(4, 5, i32::MIN, i32::MAX);
@@ -4684,7 +4689,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_cvtsepi64_epi32() {
+    fn test_mm256_mask_cvtsepi64_epi32() {
         let a = _mm256_set_epi64x(4, 5, i64::MIN, i64::MAX);
         let src = _mm_set1_epi32(-1);
         let r = _mm256_mask_cvtsepi64_epi32(src, 0, a);
@@ -4695,7 +4700,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_cvtsepi64_epi32() {
+    fn test_mm256_maskz_cvtsepi64_epi32() {
         let a = _mm256_set_epi64x(4, 5, i64::MIN, i64::MAX);
         let r = _mm256_maskz_cvtsepi64_epi32(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -4705,7 +4710,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_cvtsepi64_epi32() {
+    fn test_mm_cvtsepi64_epi32() {
         let a = _mm_set_epi64x(i64::MIN, i64::MAX);
         let r = _mm_cvtsepi64_epi32(a);
         let e = _mm_set_epi32(0, 0, i32::MIN, i32::MAX);
@@ -4713,7 +4718,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_cvtsepi64_epi32() {
+    fn test_mm_mask_cvtsepi64_epi32() {
         let a = _mm_set_epi64x(i64::MIN, i64::MAX);
         let src = _mm_set1_epi32(0);
         let r = _mm_mask_cvtsepi64_epi32(src, 0, a);
@@ -4724,7 +4729,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_cvtsepi64_epi32() {
+    fn test_mm_maskz_cvtsepi64_epi32() {
         let a = _mm_set_epi64x(i64::MIN, i64::MAX);
         let r = _mm_maskz_cvtsepi64_epi32(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -4734,7 +4739,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cvtsepi64_epi16() {
+    fn test_mm512_cvtsepi64_epi16() {
         let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, i64::MIN, i64::MAX);
         let r = _mm512_cvtsepi64_epi16(a);
         let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, i16::MIN, i16::MAX);
@@ -4742,7 +4747,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cvtsepi64_epi16() {
+    fn test_mm512_mask_cvtsepi64_epi16() {
         let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, i64::MIN, i64::MAX);
         let src = _mm_set1_epi16(-1);
         let r = _mm512_mask_cvtsepi64_epi16(src, 0, a);
@@ -4753,7 +4758,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_cvtsepi64_epi16() {
+    fn test_mm512_maskz_cvtsepi64_epi16() {
         let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, i64::MIN, i64::MAX);
         let r = _mm512_maskz_cvtsepi64_epi16(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -4763,7 +4768,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_cvtsepi64_epi16() {
+    fn test_mm256_cvtsepi64_epi16() {
         let a = _mm256_set_epi64x(4, 5, i64::MIN, i64::MAX);
         let r = _mm256_cvtsepi64_epi16(a);
         let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, i16::MIN, i16::MAX);
@@ -4771,7 +4776,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_cvtsepi64_epi16() {
+    fn test_mm256_mask_cvtsepi64_epi16() {
         let a = _mm256_set_epi64x(4, 5, i64::MIN, i64::MAX);
         let src = _mm_set1_epi16(0);
         let r = _mm256_mask_cvtsepi64_epi16(src, 0, a);
@@ -4782,7 +4787,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_cvtsepi64_epi16() {
+    fn test_mm256_maskz_cvtsepi64_epi16() {
         let a = _mm256_set_epi64x(4, 5, i64::MIN, i64::MAX);
         let r = _mm256_maskz_cvtsepi64_epi16(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -4792,7 +4797,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_cvtsepi64_epi16() {
+    fn test_mm_cvtsepi64_epi16() {
         let a = _mm_set_epi64x(i64::MIN, i64::MAX);
         let r = _mm_cvtsepi64_epi16(a);
         let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, i16::MIN, i16::MAX);
@@ -4800,7 +4805,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_cvtsepi64_epi16() {
+    fn test_mm_mask_cvtsepi64_epi16() {
         let a = _mm_set_epi64x(i64::MIN, i64::MAX);
         let src = _mm_set1_epi16(0);
         let r = _mm_mask_cvtsepi64_epi16(src, 0, a);
@@ -4811,7 +4816,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_cvtsepi64_epi16() {
+    fn test_mm_maskz_cvtsepi64_epi16() {
         let a = _mm_set_epi64x(i64::MIN, i64::MAX);
         let r = _mm_maskz_cvtsepi64_epi16(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -4821,7 +4826,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cvtsepi64_epi8() {
+    fn test_mm512_cvtsepi64_epi8() {
         let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, i64::MIN, i64::MAX);
         let r = _mm512_cvtsepi64_epi8(a);
         let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, i8::MIN, i8::MAX);
@@ -4829,7 +4834,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cvtsepi64_epi8() {
+    fn test_mm512_mask_cvtsepi64_epi8() {
         let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, i64::MIN, i64::MAX);
         let src = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
         let r = _mm512_mask_cvtsepi64_epi8(src, 0, a);
@@ -4846,7 +4851,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_cvtsepi64_epi8() {
+    fn test_mm512_maskz_cvtsepi64_epi8() {
         let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, i64::MIN, i64::MAX);
         let r = _mm512_maskz_cvtsepi64_epi8(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -4856,7 +4861,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_cvtsepi64_epi8() {
+    fn test_mm256_cvtsepi64_epi8() {
         let a = _mm256_set_epi64x(4, 5, i64::MIN, i64::MAX);
         let r = _mm256_cvtsepi64_epi8(a);
         let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 5, i8::MIN, i8::MAX);
@@ -4864,7 +4869,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_cvtsepi64_epi8() {
+    fn test_mm256_mask_cvtsepi64_epi8() {
         let a = _mm256_set_epi64x(4, 5, i64::MIN, i64::MAX);
         let src = _mm_set1_epi8(0);
         let r = _mm256_mask_cvtsepi64_epi8(src, 0, a);
@@ -4875,7 +4880,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_cvtsepi64_epi8() {
+    fn test_mm256_maskz_cvtsepi64_epi8() {
         let a = _mm256_set_epi64x(4, 5, i64::MIN, i64::MAX);
         let r = _mm256_maskz_cvtsepi64_epi8(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -4885,7 +4890,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_cvtsepi64_epi8() {
+    fn test_mm_cvtsepi64_epi8() {
         let a = _mm_set_epi64x(i64::MIN, i64::MAX);
         let r = _mm_cvtsepi64_epi8(a);
         let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MIN, i8::MAX);
@@ -4893,7 +4898,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_cvtsepi64_epi8() {
+    fn test_mm_mask_cvtsepi64_epi8() {
         let a = _mm_set_epi64x(i64::MIN, i64::MAX);
         let src = _mm_set1_epi8(0);
         let r = _mm_mask_cvtsepi64_epi8(src, 0, a);
@@ -4904,7 +4909,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_cvtsepi64_epi8() {
+    fn test_mm_maskz_cvtsepi64_epi8() {
         let a = _mm_set_epi64x(i64::MIN, i64::MAX);
         let r = _mm_maskz_cvtsepi64_epi8(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -4914,7 +4919,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cvtusepi64_epi32() {
+    fn test_mm512_cvtusepi64_epi32() {
         let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, i64::MIN, i64::MIN);
         let r = _mm512_cvtusepi64_epi32(a);
         let e = _mm256_set_epi32(0, 1, 2, 3, 4, 5, -1, -1);
@@ -4922,7 +4927,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cvtusepi64_epi32() {
+    fn test_mm512_mask_cvtusepi64_epi32() {
         let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, i64::MIN, i64::MIN);
         let src = _mm256_set1_epi32(-1);
         let r = _mm512_mask_cvtusepi64_epi32(src, 0, a);
@@ -4933,7 +4938,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_cvtusepi64_epi32() {
+    fn test_mm512_maskz_cvtusepi64_epi32() {
         let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, i64::MIN, i64::MIN);
         let r = _mm512_maskz_cvtusepi64_epi32(0, a);
         assert_eq_m256i(r, _mm256_setzero_si256());
@@ -4943,7 +4948,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_cvtusepi64_epi32() {
+    fn test_mm256_cvtusepi64_epi32() {
         let a = _mm256_set_epi64x(4, 5, 6, i64::MAX);
         let r = _mm256_cvtusepi64_epi32(a);
         let e = _mm_set_epi32(4, 5, 6, u32::MAX as i32);
@@ -4951,7 +4956,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_cvtusepi64_epi32() {
+    fn test_mm256_mask_cvtusepi64_epi32() {
         let a = _mm256_set_epi64x(4, 5, 6, i64::MAX);
         let src = _mm_set1_epi32(0);
         let r = _mm256_mask_cvtusepi64_epi32(src, 0, a);
@@ -4962,7 +4967,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_cvtusepi64_epi32() {
+    fn test_mm256_maskz_cvtusepi64_epi32() {
         let a = _mm256_set_epi64x(4, 5, 6, i64::MAX);
         let r = _mm256_maskz_cvtusepi64_epi32(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -4972,7 +4977,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_cvtusepi64_epi32() {
+    fn test_mm_cvtusepi64_epi32() {
         let a = _mm_set_epi64x(6, i64::MAX);
         let r = _mm_cvtusepi64_epi32(a);
         let e = _mm_set_epi32(0, 0, 6, u32::MAX as i32);
@@ -4980,7 +4985,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_cvtusepi64_epi32() {
+    fn test_mm_mask_cvtusepi64_epi32() {
         let a = _mm_set_epi64x(6, i64::MAX);
         let src = _mm_set1_epi32(0);
         let r = _mm_mask_cvtusepi64_epi32(src, 0, a);
@@ -4991,7 +4996,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_cvtusepi64_epi32() {
+    fn test_mm_maskz_cvtusepi64_epi32() {
         let a = _mm_set_epi64x(6, i64::MAX);
         let r = _mm_maskz_cvtusepi64_epi32(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -5001,7 +5006,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cvtusepi64_epi16() {
+    fn test_mm512_cvtusepi64_epi16() {
         let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, i64::MIN, i64::MIN);
         let r = _mm512_cvtusepi64_epi16(a);
         let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, -1, -1);
@@ -5009,7 +5014,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cvtusepi64_epi16() {
+    fn test_mm512_mask_cvtusepi64_epi16() {
         let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, i64::MIN, i64::MIN);
         let src = _mm_set1_epi16(-1);
         let r = _mm512_mask_cvtusepi64_epi16(src, 0, a);
@@ -5020,7 +5025,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_cvtusepi64_epi16() {
+    fn test_mm512_maskz_cvtusepi64_epi16() {
         let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, i64::MIN, i64::MIN);
         let r = _mm512_maskz_cvtusepi64_epi16(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -5030,7 +5035,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_cvtusepi64_epi16() {
+    fn test_mm256_cvtusepi64_epi16() {
         let a = _mm256_set_epi64x(4, 5, 6, i64::MAX);
         let r = _mm256_cvtusepi64_epi16(a);
         let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, u16::MAX as i16);
@@ -5038,7 +5043,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_cvtusepi64_epi16() {
+    fn test_mm256_mask_cvtusepi64_epi16() {
         let a = _mm256_set_epi64x(4, 5, 6, i64::MAX);
         let src = _mm_set1_epi16(0);
         let r = _mm256_mask_cvtusepi64_epi16(src, 0, a);
@@ -5049,7 +5054,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_cvtusepi64_epi16() {
+    fn test_mm256_maskz_cvtusepi64_epi16() {
         let a = _mm256_set_epi64x(4, 5, 6, i64::MAX);
         let r = _mm256_maskz_cvtusepi64_epi16(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -5059,7 +5064,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_cvtusepi64_epi16() {
+    fn test_mm_cvtusepi64_epi16() {
         let a = _mm_set_epi64x(6, i64::MAX);
         let r = _mm_cvtusepi64_epi16(a);
         let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 6, u16::MAX as i16);
@@ -5067,7 +5072,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_cvtusepi64_epi16() {
+    fn test_mm_mask_cvtusepi64_epi16() {
         let a = _mm_set_epi64x(6, i64::MAX);
         let src = _mm_set1_epi16(0);
         let r = _mm_mask_cvtusepi64_epi16(src, 0, a);
@@ -5078,7 +5083,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_cvtusepi64_epi16() {
+    fn test_mm_maskz_cvtusepi64_epi16() {
         let a = _mm_set_epi64x(6, i64::MAX);
         let r = _mm_maskz_cvtusepi64_epi16(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -5088,7 +5093,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cvtusepi64_epi8() {
+    fn test_mm512_cvtusepi64_epi8() {
         let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, i64::MIN, i64::MIN);
         let r = _mm512_cvtusepi64_epi8(a);
         let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, -1, -1);
@@ -5096,7 +5101,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cvtusepi64_epi8() {
+    fn test_mm512_mask_cvtusepi64_epi8() {
         let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, i64::MIN, i64::MIN);
         let src = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
         let r = _mm512_mask_cvtusepi64_epi8(src, 0, a);
@@ -5107,7 +5112,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_cvtusepi64_epi8() {
+    fn test_mm512_maskz_cvtusepi64_epi8() {
         let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, i64::MIN, i64::MIN);
         let r = _mm512_maskz_cvtusepi64_epi8(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -5117,7 +5122,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_cvtusepi64_epi8() {
+    fn test_mm256_cvtusepi64_epi8() {
         let a = _mm256_set_epi64x(4, 5, 6, i64::MAX);
         let r = _mm256_cvtusepi64_epi8(a);
         let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 5, 6, u8::MAX as i8);
@@ -5125,7 +5130,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_cvtusepi64_epi8() {
+    fn test_mm256_mask_cvtusepi64_epi8() {
         let a = _mm256_set_epi64x(4, 5, 6, i64::MAX);
         let src = _mm_set1_epi8(0);
         let r = _mm256_mask_cvtusepi64_epi8(src, 0, a);
@@ -5136,7 +5141,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_cvtusepi64_epi8() {
+    fn test_mm256_maskz_cvtusepi64_epi8() {
         let a = _mm256_set_epi64x(4, 5, 6, i64::MAX);
         let r = _mm256_maskz_cvtusepi64_epi8(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -5146,7 +5151,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_cvtusepi64_epi8() {
+    fn test_mm_cvtusepi64_epi8() {
         let a = _mm_set_epi64x(6, i64::MAX);
         let r = _mm_cvtusepi64_epi8(a);
         let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, u8::MAX as i8);
@@ -5154,7 +5159,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_cvtusepi64_epi8() {
+    fn test_mm_mask_cvtusepi64_epi8() {
         let a = _mm_set_epi64x(6, i64::MAX);
         let src = _mm_set1_epi8(0);
         let r = _mm_mask_cvtusepi64_epi8(src, 0, a);
@@ -5165,7 +5170,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_cvtusepi64_epi8() {
+    fn test_mm_maskz_cvtusepi64_epi8() {
         let a = _mm_set_epi64x(6, i64::MAX);
         let r = _mm_maskz_cvtusepi64_epi8(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -5175,7 +5180,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cvtt_roundpd_epi32() {
+    fn test_mm512_cvtt_roundpd_epi32() {
         let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5);
         let r = _mm512_cvtt_roundpd_epi32::<_MM_FROUND_NO_EXC>(a);
         let e = _mm256_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7);
@@ -5183,7 +5188,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cvtt_roundpd_epi32() {
+    fn test_mm512_mask_cvtt_roundpd_epi32() {
         let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5);
         let src = _mm256_set1_epi32(0);
         let r = _mm512_mask_cvtt_roundpd_epi32::<_MM_FROUND_NO_EXC>(src, 0, a);
@@ -5194,7 +5199,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_cvtt_roundpd_epi32() {
+    fn test_mm512_maskz_cvtt_roundpd_epi32() {
         let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5);
         let r = _mm512_maskz_cvtt_roundpd_epi32::<_MM_FROUND_NO_EXC>(0, a);
         assert_eq_m256i(r, _mm256_setzero_si256());
@@ -5204,7 +5209,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cvtt_roundpd_epu32() {
+    fn test_mm512_cvtt_roundpd_epu32() {
         let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5);
         let r = _mm512_cvtt_roundpd_epu32::<_MM_FROUND_NO_EXC>(a);
         let e = _mm256_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1);
@@ -5212,7 +5217,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cvtt_roundpd_epu32() {
+    fn test_mm512_mask_cvtt_roundpd_epu32() {
         let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5);
         let src = _mm256_set1_epi32(0);
         let r = _mm512_mask_cvtt_roundpd_epu32::<_MM_FROUND_NO_EXC>(src, 0, a);
@@ -5223,7 +5228,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_cvtt_roundpd_epu32() {
+    fn test_mm512_maskz_cvtt_roundpd_epu32() {
         let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5);
         let r = _mm512_maskz_cvtt_roundpd_epu32::<_MM_FROUND_NO_EXC>(0, a);
         assert_eq_m256i(r, _mm256_setzero_si256());
@@ -5233,7 +5238,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cvttpd_epi32() {
+    fn test_mm512_cvttpd_epi32() {
         let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5);
         let r = _mm512_cvttpd_epi32(a);
         let e = _mm256_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7);
@@ -5241,7 +5246,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cvttpd_epi32() {
+    fn test_mm512_mask_cvttpd_epi32() {
         let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5);
         let src = _mm256_set1_epi32(0);
         let r = _mm512_mask_cvttpd_epi32(src, 0, a);
@@ -5252,7 +5257,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_cvttpd_epi32() {
+    fn test_mm512_maskz_cvttpd_epi32() {
         let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5);
         let r = _mm512_maskz_cvttpd_epi32(0, a);
         assert_eq_m256i(r, _mm256_setzero_si256());
@@ -5262,7 +5267,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_cvttpd_epi32() {
+    fn test_mm256_mask_cvttpd_epi32() {
         let a = _mm256_setr_pd(4., -5.5, 6., -7.5);
         let src = _mm_set1_epi32(0);
         let r = _mm256_mask_cvttpd_epi32(src, 0, a);
@@ -5273,7 +5278,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_cvttpd_epi32() {
+    fn test_mm256_maskz_cvttpd_epi32() {
         let a = _mm256_setr_pd(4., -5.5, 6., -7.5);
         let r = _mm256_maskz_cvttpd_epi32(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -5283,7 +5288,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_cvttpd_epi32() {
+    fn test_mm_mask_cvttpd_epi32() {
         let a = _mm_set_pd(6., -7.5);
         let src = _mm_set1_epi32(0);
         let r = _mm_mask_cvttpd_epi32(src, 0, a);
@@ -5294,7 +5299,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_cvttpd_epi32() {
+    fn test_mm_maskz_cvttpd_epi32() {
         let a = _mm_set_pd(6., -7.5);
         let r = _mm_maskz_cvttpd_epi32(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -5304,7 +5309,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cvttpd_epu32() {
+    fn test_mm512_cvttpd_epu32() {
         let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5);
         let r = _mm512_cvttpd_epu32(a);
         let e = _mm256_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1);
@@ -5312,7 +5317,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cvttpd_epu32() {
+    fn test_mm512_mask_cvttpd_epu32() {
         let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5);
         let src = _mm256_set1_epi32(0);
         let r = _mm512_mask_cvttpd_epu32(src, 0, a);
@@ -5323,7 +5328,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_cvttpd_epu32() {
+    fn test_mm512_maskz_cvttpd_epu32() {
         let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5);
         let r = _mm512_maskz_cvttpd_epu32(0, a);
         assert_eq_m256i(r, _mm256_setzero_si256());
@@ -5333,7 +5338,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_cvttpd_epu32() {
+    fn test_mm256_cvttpd_epu32() {
         let a = _mm256_set_pd(4., 5.5, 6., 7.5);
         let r = _mm256_cvttpd_epu32(a);
         let e = _mm_set_epi32(4, 5, 6, 7);
@@ -5341,7 +5346,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_cvttpd_epu32() {
+    fn test_mm256_mask_cvttpd_epu32() {
         let a = _mm256_set_pd(4., 5.5, 6., 7.5);
         let src = _mm_set1_epi32(0);
         let r = _mm256_mask_cvttpd_epu32(src, 0, a);
@@ -5352,7 +5357,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_cvttpd_epu32() {
+    fn test_mm256_maskz_cvttpd_epu32() {
         let a = _mm256_set_pd(4., 5.5, 6., 7.5);
         let r = _mm256_maskz_cvttpd_epu32(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -5362,7 +5367,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_cvttpd_epu32() {
+    fn test_mm_cvttpd_epu32() {
         let a = _mm_set_pd(6., 7.5);
         let r = _mm_cvttpd_epu32(a);
         let e = _mm_set_epi32(0, 0, 6, 7);
@@ -5370,7 +5375,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_cvttpd_epu32() {
+    fn test_mm_mask_cvttpd_epu32() {
         let a = _mm_set_pd(6., 7.5);
         let src = _mm_set1_epi32(0);
         let r = _mm_mask_cvttpd_epu32(src, 0, a);
@@ -5381,7 +5386,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_cvttpd_epu32() {
+    fn test_mm_maskz_cvttpd_epu32() {
         let a = _mm_set_pd(6., 7.5);
         let r = _mm_maskz_cvttpd_epu32(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -5391,7 +5396,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_add_round_pd() {
+    fn test_mm512_add_round_pd() {
         let a = _mm512_setr_pd(8., 9.5, 10., 11.5, 12., 13.5, 14., 0.000000000000000007);
         let b = _mm512_set1_pd(-1.);
         let r = _mm512_add_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
@@ -5403,7 +5408,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_add_round_pd() {
+    fn test_mm512_mask_add_round_pd() {
         let a = _mm512_setr_pd(8., 9.5, 10., 11.5, 12., 13.5, 14., 0.000000000000000007);
         let b = _mm512_set1_pd(-1.);
         let r = _mm512_mask_add_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
@@ -5418,7 +5423,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_add_round_pd() {
+    fn test_mm512_maskz_add_round_pd() {
         let a = _mm512_setr_pd(8., 9.5, 10., 11.5, 12., 13.5, 14., 0.000000000000000007);
         let b = _mm512_set1_pd(-1.);
         let r =
@@ -5432,7 +5437,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_sub_round_pd() {
+    fn test_mm512_sub_round_pd() {
         let a = _mm512_setr_pd(8., 9.5, 10., 11.5, 12., 13.5, 14., 0.000000000000000007);
         let b = _mm512_set1_pd(1.);
         let r = _mm512_sub_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
@@ -5444,7 +5449,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_sub_round_pd() {
+    fn test_mm512_mask_sub_round_pd() {
         let a = _mm512_setr_pd(8., 9.5, 10., 11.5, 12., 13.5, 14., 0.000000000000000007);
         let b = _mm512_set1_pd(1.);
         let r = _mm512_mask_sub_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
@@ -5459,7 +5464,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_sub_round_pd() {
+    fn test_mm512_maskz_sub_round_pd() {
         let a = _mm512_setr_pd(8., 9.5, 10., 11.5, 12., 13.5, 14., 0.000000000000000007);
         let b = _mm512_set1_pd(1.);
         let r =
@@ -5473,7 +5478,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mul_round_pd() {
+    fn test_mm512_mul_round_pd() {
         let a = _mm512_setr_pd(8., 9.5, 10., 11.5, 12., 13.5, 14., 0.);
         let b = _mm512_set1_pd(0.1);
         let r = _mm512_mul_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
@@ -5494,7 +5499,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_mul_round_pd() {
+    fn test_mm512_mask_mul_round_pd() {
         let a = _mm512_setr_pd(8., 9.5, 10., 11.5, 12., 13.5, 14., 0.);
         let b = _mm512_set1_pd(0.1);
         let r = _mm512_mask_mul_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
@@ -5518,7 +5523,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_mul_round_pd() {
+    fn test_mm512_maskz_mul_round_pd() {
         let a = _mm512_setr_pd(8., 9.5, 10., 11.5, 12., 13.5, 14., 0.);
         let b = _mm512_set1_pd(0.1);
         let r =
@@ -5541,7 +5546,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_div_round_pd() {
+    fn test_mm512_div_round_pd() {
         let a = _mm512_set1_pd(1.);
         let b = _mm512_set1_pd(3.);
         let r = _mm512_div_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
@@ -5553,7 +5558,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_div_round_pd() {
+    fn test_mm512_mask_div_round_pd() {
         let a = _mm512_set1_pd(1.);
         let b = _mm512_set1_pd(3.);
         let r = _mm512_mask_div_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
@@ -5577,7 +5582,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_div_round_pd() {
+    fn test_mm512_maskz_div_round_pd() {
         let a = _mm512_set1_pd(1.);
         let b = _mm512_set1_pd(3.);
         let r =
@@ -5600,7 +5605,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_sqrt_round_pd() {
+    fn test_mm512_sqrt_round_pd() {
         let a = _mm512_set1_pd(3.);
         let r = _mm512_sqrt_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
         let e = _mm512_set1_pd(1.7320508075688772);
@@ -5611,7 +5616,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_sqrt_round_pd() {
+    fn test_mm512_mask_sqrt_round_pd() {
         let a = _mm512_set1_pd(3.);
         let r =
             _mm512_mask_sqrt_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, 0, a);
@@ -5633,7 +5638,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_sqrt_round_pd() {
+    fn test_mm512_maskz_sqrt_round_pd() {
         let a = _mm512_set1_pd(3.);
         let r =
             _mm512_maskz_sqrt_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a);
@@ -5655,7 +5660,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_fmadd_round_pd() {
+    fn test_mm512_fmadd_round_pd() {
         let a = _mm512_set1_pd(0.000000000000000007);
         let b = _mm512_set1_pd(1.);
         let c = _mm512_set1_pd(-1.);
@@ -5668,7 +5673,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_fmadd_round_pd() {
+    fn test_mm512_mask_fmadd_round_pd() {
         let a = _mm512_set1_pd(0.000000000000000007);
         let b = _mm512_set1_pd(1.);
         let c = _mm512_set1_pd(-1.);
@@ -5693,7 +5698,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_fmadd_round_pd() {
+    fn test_mm512_maskz_fmadd_round_pd() {
         let a = _mm512_set1_pd(0.000000000000000007);
         let b = _mm512_set1_pd(1.);
         let c = _mm512_set1_pd(-1.);
@@ -5709,7 +5714,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask3_fmadd_round_pd() {
+    fn test_mm512_mask3_fmadd_round_pd() {
         let a = _mm512_set1_pd(0.000000000000000007);
         let b = _mm512_set1_pd(1.);
         let c = _mm512_set1_pd(-1.);
@@ -5725,7 +5730,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_fmsub_round_pd() {
+    fn test_mm512_fmsub_round_pd() {
         let a = _mm512_set1_pd(0.000000000000000007);
         let b = _mm512_set1_pd(1.);
         let c = _mm512_set1_pd(1.);
@@ -5738,7 +5743,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_fmsub_round_pd() {
+    fn test_mm512_mask_fmsub_round_pd() {
         let a = _mm512_set1_pd(0.000000000000000007);
         let b = _mm512_set1_pd(1.);
         let c = _mm512_set1_pd(1.);
@@ -5763,7 +5768,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_fmsub_round_pd() {
+    fn test_mm512_maskz_fmsub_round_pd() {
         let a = _mm512_set1_pd(0.000000000000000007);
         let b = _mm512_set1_pd(1.);
         let c = _mm512_set1_pd(1.);
@@ -5779,7 +5784,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask3_fmsub_round_pd() {
+    fn test_mm512_mask3_fmsub_round_pd() {
         let a = _mm512_set1_pd(0.000000000000000007);
         let b = _mm512_set1_pd(1.);
         let c = _mm512_set1_pd(1.);
@@ -5795,7 +5800,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_fmaddsub_round_pd() {
+    fn test_mm512_fmaddsub_round_pd() {
         let a = _mm512_set1_pd(0.000000000000000007);
         let b = _mm512_set1_pd(1.);
         let c = _mm512_set1_pd(-1.);
@@ -5818,7 +5823,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_fmaddsub_round_pd() {
+    fn test_mm512_mask_fmaddsub_round_pd() {
         let a = _mm512_set1_pd(0.000000000000000007);
         let b = _mm512_set1_pd(1.);
         let c = _mm512_set1_pd(-1.);
@@ -5843,7 +5848,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_fmaddsub_round_pd() {
+    fn test_mm512_maskz_fmaddsub_round_pd() {
         let a = _mm512_set1_pd(0.000000000000000007);
         let b = _mm512_set1_pd(1.);
         let c = _mm512_set1_pd(-1.);
@@ -5859,7 +5864,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask3_fmaddsub_round_pd() {
+    fn test_mm512_mask3_fmaddsub_round_pd() {
         let a = _mm512_set1_pd(0.000000000000000007);
         let b = _mm512_set1_pd(1.);
         let c = _mm512_set1_pd(-1.);
@@ -5875,7 +5880,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_fmsubadd_round_pd() {
+    fn test_mm512_fmsubadd_round_pd() {
         let a = _mm512_set1_pd(0.000000000000000007);
         let b = _mm512_set1_pd(1.);
         let c = _mm512_set1_pd(-1.);
@@ -5898,7 +5903,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_fmsubadd_round_pd() {
+    fn test_mm512_mask_fmsubadd_round_pd() {
         let a = _mm512_set1_pd(0.000000000000000007);
         let b = _mm512_set1_pd(1.);
         let c = _mm512_set1_pd(-1.);
@@ -5923,7 +5928,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_fmsubadd_round_pd() {
+    fn test_mm512_maskz_fmsubadd_round_pd() {
         let a = _mm512_set1_pd(0.000000000000000007);
         let b = _mm512_set1_pd(1.);
         let c = _mm512_set1_pd(-1.);
@@ -5939,7 +5944,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask3_fmsubadd_round_pd() {
+    fn test_mm512_mask3_fmsubadd_round_pd() {
         let a = _mm512_set1_pd(0.000000000000000007);
         let b = _mm512_set1_pd(1.);
         let c = _mm512_set1_pd(-1.);
@@ -5955,7 +5960,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_fnmadd_round_pd() {
+    fn test_mm512_fnmadd_round_pd() {
         let a = _mm512_set1_pd(0.000000000000000007);
         let b = _mm512_set1_pd(1.);
         let c = _mm512_set1_pd(1.);
@@ -5969,7 +5974,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_fnmadd_round_pd() {
+    fn test_mm512_mask_fnmadd_round_pd() {
         let a = _mm512_set1_pd(0.000000000000000007);
         let b = _mm512_set1_pd(1.);
         let c = _mm512_set1_pd(1.);
@@ -5994,7 +5999,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_fnmadd_round_pd() {
+    fn test_mm512_maskz_fnmadd_round_pd() {
         let a = _mm512_set1_pd(0.000000000000000007);
         let b = _mm512_set1_pd(1.);
         let c = _mm512_set1_pd(1.);
@@ -6010,7 +6015,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask3_fnmadd_round_pd() {
+    fn test_mm512_mask3_fnmadd_round_pd() {
         let a = _mm512_set1_pd(0.000000000000000007);
         let b = _mm512_set1_pd(1.);
         let c = _mm512_set1_pd(1.);
@@ -6026,7 +6031,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_fnmsub_round_pd() {
+    fn test_mm512_fnmsub_round_pd() {
         let a = _mm512_set1_pd(0.000000000000000007);
         let b = _mm512_set1_pd(1.);
         let c = _mm512_set1_pd(-1.);
@@ -6040,7 +6045,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_fnmsub_round_pd() {
+    fn test_mm512_mask_fnmsub_round_pd() {
         let a = _mm512_set1_pd(0.000000000000000007);
         let b = _mm512_set1_pd(1.);
         let c = _mm512_set1_pd(-1.);
@@ -6065,7 +6070,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_fnmsub_round_pd() {
+    fn test_mm512_maskz_fnmsub_round_pd() {
         let a = _mm512_set1_pd(0.000000000000000007);
         let b = _mm512_set1_pd(1.);
         let c = _mm512_set1_pd(-1.);
@@ -6081,7 +6086,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask3_fnmsub_round_pd() {
+    fn test_mm512_mask3_fnmsub_round_pd() {
         let a = _mm512_set1_pd(0.000000000000000007);
         let b = _mm512_set1_pd(1.);
         let c = _mm512_set1_pd(-1.);
@@ -6097,7 +6102,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_max_round_pd() {
+    fn test_mm512_max_round_pd() {
         let a = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
         let b = _mm512_setr_pd(7., 6., 5., 4., 3., 2., 1., 0.);
         let r = _mm512_max_round_pd::<_MM_FROUND_CUR_DIRECTION>(a, b);
@@ -6106,7 +6111,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_max_round_pd() {
+    fn test_mm512_mask_max_round_pd() {
         let a = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
         let b = _mm512_setr_pd(7., 6., 5., 4., 3., 2., 1., 0.);
         let r = _mm512_mask_max_round_pd::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
@@ -6117,7 +6122,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_max_round_pd() {
+    fn test_mm512_maskz_max_round_pd() {
         let a = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
         let b = _mm512_setr_pd(7., 6., 5., 4., 3., 2., 1., 0.);
         let r = _mm512_maskz_max_round_pd::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
@@ -6128,7 +6133,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_min_round_pd() {
+    fn test_mm512_min_round_pd() {
         let a = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
         let b = _mm512_setr_pd(7., 6., 5., 4., 3., 2., 1., 0.);
         let r = _mm512_min_round_pd::<_MM_FROUND_CUR_DIRECTION>(a, b);
@@ -6137,7 +6142,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_min_round_pd() {
+    fn test_mm512_mask_min_round_pd() {
         let a = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
         let b = _mm512_setr_pd(7., 6., 5., 4., 3., 2., 1., 0.);
         let r = _mm512_mask_min_round_pd::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
@@ -6148,7 +6153,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_min_round_pd() {
+    fn test_mm512_maskz_min_round_pd() {
         let a = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
         let b = _mm512_setr_pd(7., 6., 5., 4., 3., 2., 1., 0.);
         let r = _mm512_maskz_min_round_pd::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
@@ -6159,7 +6164,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_getexp_round_pd() {
+    fn test_mm512_getexp_round_pd() {
         let a = _mm512_set1_pd(3.);
         let r = _mm512_getexp_round_pd::<_MM_FROUND_CUR_DIRECTION>(a);
         let e = _mm512_set1_pd(1.);
@@ -6167,7 +6172,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_getexp_round_pd() {
+    fn test_mm512_mask_getexp_round_pd() {
         let a = _mm512_set1_pd(3.);
         let r = _mm512_mask_getexp_round_pd::<_MM_FROUND_CUR_DIRECTION>(a, 0, a);
         assert_eq_m512d(r, a);
@@ -6177,7 +6182,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_getexp_round_pd() {
+    fn test_mm512_maskz_getexp_round_pd() {
         let a = _mm512_set1_pd(3.);
         let r = _mm512_maskz_getexp_round_pd::<_MM_FROUND_CUR_DIRECTION>(0, a);
         assert_eq_m512d(r, _mm512_setzero_pd());
@@ -6187,7 +6192,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_roundscale_round_pd() {
+    fn test_mm512_roundscale_round_pd() {
         let a = _mm512_set1_pd(1.1);
         let r = _mm512_roundscale_round_pd::<0, _MM_FROUND_CUR_DIRECTION>(a);
         let e = _mm512_set1_pd(1.0);
@@ -6195,7 +6200,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_roundscale_round_pd() {
+    fn test_mm512_mask_roundscale_round_pd() {
         let a = _mm512_set1_pd(1.1);
         let r = _mm512_mask_roundscale_round_pd::<0, _MM_FROUND_CUR_DIRECTION>(a, 0, a);
         let e = _mm512_set1_pd(1.1);
@@ -6206,7 +6211,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_roundscale_round_pd() {
+    fn test_mm512_maskz_roundscale_round_pd() {
         let a = _mm512_set1_pd(1.1);
         let r = _mm512_maskz_roundscale_round_pd::<0, _MM_FROUND_CUR_DIRECTION>(0, a);
         assert_eq_m512d(r, _mm512_setzero_pd());
@@ -6216,7 +6221,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_scalef_round_pd() {
+    fn test_mm512_scalef_round_pd() {
         let a = _mm512_set1_pd(1.);
         let b = _mm512_set1_pd(3.);
         let r = _mm512_scalef_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
@@ -6225,7 +6230,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_scalef_round_pd() {
+    fn test_mm512_mask_scalef_round_pd() {
         let a = _mm512_set1_pd(1.);
         let b = _mm512_set1_pd(3.);
         let r = _mm512_mask_scalef_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
@@ -6240,7 +6245,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_scalef_round_pd() {
+    fn test_mm512_maskz_scalef_round_pd() {
         let a = _mm512_set1_pd(1.);
         let b = _mm512_set1_pd(3.);
         let r = _mm512_maskz_scalef_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
@@ -6255,7 +6260,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_fixupimm_round_pd() {
+    fn test_mm512_fixupimm_round_pd() {
         let a = _mm512_set1_pd(f64::NAN);
         let b = _mm512_set1_pd(f64::MAX);
         let c = _mm512_set1_epi64(i32::MAX as i64);
@@ -6265,7 +6270,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_fixupimm_round_pd() {
+    fn test_mm512_mask_fixupimm_round_pd() {
         let a = _mm512_set_pd(f64::NAN, f64::NAN, f64::NAN, f64::NAN, 1., 1., 1., 1.);
         let b = _mm512_set1_pd(f64::MAX);
         let c = _mm512_set1_epi64(i32::MAX as i64);
@@ -6275,7 +6280,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_fixupimm_round_pd() {
+    fn test_mm512_maskz_fixupimm_round_pd() {
         let a = _mm512_set_pd(f64::NAN, f64::NAN, f64::NAN, f64::NAN, 1., 1., 1., 1.);
         let b = _mm512_set1_pd(f64::MAX);
         let c = _mm512_set1_epi64(i32::MAX as i64);
@@ -6285,7 +6290,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_getmant_round_pd() {
+    fn test_mm512_getmant_round_pd() {
         let a = _mm512_set1_pd(10.);
         let r = _mm512_getmant_round_pd::<
             _MM_MANT_NORM_1_2,
@@ -6297,7 +6302,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_getmant_round_pd() {
+    fn test_mm512_mask_getmant_round_pd() {
         let a = _mm512_set1_pd(10.);
         let r = _mm512_mask_getmant_round_pd::<
             _MM_MANT_NORM_1_2,
@@ -6315,7 +6320,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_getmant_round_pd() {
+    fn test_mm512_maskz_getmant_round_pd() {
         let a = _mm512_set1_pd(10.);
         let r = _mm512_maskz_getmant_round_pd::<
             _MM_MANT_NORM_1_2,
@@ -6333,7 +6338,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cvt_roundps_pd() {
+    fn test_mm512_cvt_roundps_pd() {
         let a = _mm256_setr_ps(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5);
         let r = _mm512_cvt_roundps_pd::<_MM_FROUND_CUR_DIRECTION>(a);
         let e = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5);
@@ -6341,7 +6346,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cvt_roundps_pd() {
+    fn test_mm512_mask_cvt_roundps_pd() {
         let a = _mm256_setr_ps(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5);
         let src = _mm512_set1_pd(0.);
         let r = _mm512_mask_cvt_roundps_pd::<_MM_FROUND_CUR_DIRECTION>(src, 0, a);
@@ -6352,7 +6357,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_cvt_roundps_pd() {
+    fn test_mm512_maskz_cvt_roundps_pd() {
         let a = _mm256_setr_ps(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5);
         let r = _mm512_maskz_cvt_roundps_pd::<_MM_FROUND_CUR_DIRECTION>(0, a);
         assert_eq_m512d(r, _mm512_setzero_pd());
@@ -6362,7 +6367,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cvt_roundpd_ps() {
+    fn test_mm512_cvt_roundpd_ps() {
         let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5);
         let r = _mm512_cvt_roundpd_ps::<_MM_FROUND_CUR_DIRECTION>(a);
         let e = _mm256_setr_ps(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5);
@@ -6370,7 +6375,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cvt_roundpd_ps() {
+    fn test_mm512_mask_cvt_roundpd_ps() {
         let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5);
         let src = _mm256_set1_ps(0.);
         let r = _mm512_mask_cvt_roundpd_ps::<_MM_FROUND_CUR_DIRECTION>(src, 0, a);
@@ -6381,7 +6386,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_cvt_roundpd_ps() {
+    fn test_mm512_maskz_cvt_roundpd_ps() {
         let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5);
         let r = _mm512_maskz_cvt_roundpd_ps::<_MM_FROUND_CUR_DIRECTION>(0, a);
         assert_eq_m256(r, _mm256_setzero_ps());
@@ -6391,7 +6396,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cvt_roundpd_epi32() {
+    fn test_mm512_cvt_roundpd_epi32() {
         let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5);
         let r = _mm512_cvt_roundpd_epi32::<_MM_FROUND_CUR_DIRECTION>(a);
         let e = _mm256_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8);
@@ -6399,7 +6404,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cvt_roundpd_epi32() {
+    fn test_mm512_mask_cvt_roundpd_epi32() {
         let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5);
         let src = _mm256_set1_epi32(0);
         let r = _mm512_mask_cvt_roundpd_epi32::<_MM_FROUND_CUR_DIRECTION>(src, 0, a);
@@ -6410,7 +6415,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_cvt_roundpd_epi32() {
+    fn test_mm512_maskz_cvt_roundpd_epi32() {
         let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5);
         let r = _mm512_maskz_cvt_roundpd_epi32::<_MM_FROUND_CUR_DIRECTION>(0, a);
         assert_eq_m256i(r, _mm256_setzero_si256());
@@ -6420,7 +6425,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cvt_roundpd_epu32() {
+    fn test_mm512_cvt_roundpd_epu32() {
         let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5);
         let r = _mm512_cvt_roundpd_epu32::<_MM_FROUND_CUR_DIRECTION>(a);
         let e = _mm256_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1);
@@ -6428,7 +6433,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cvt_roundpd_epu32() {
+    fn test_mm512_mask_cvt_roundpd_epu32() {
         let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5);
         let src = _mm256_set1_epi32(0);
         let r = _mm512_mask_cvt_roundpd_epu32::<_MM_FROUND_CUR_DIRECTION>(src, 0, a);
@@ -6439,7 +6444,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_cvt_roundpd_epu32() {
+    fn test_mm512_maskz_cvt_roundpd_epu32() {
         let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5);
         let r = _mm512_maskz_cvt_roundpd_epu32::<_MM_FROUND_CUR_DIRECTION>(0, a);
         assert_eq_m256i(r, _mm256_setzero_si256());
@@ -6449,45 +6454,48 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_setzero_pd() {
+    const fn test_mm512_setzero_pd() {
         assert_eq_m512d(_mm512_setzero_pd(), _mm512_set1_pd(0.));
     }
 
-    unsafe fn test_mm512_set1_epi64() {
+    #[simd_test(enable = "avx512f")]
+    const fn test_mm512_set1_epi64() {
         let r = _mm512_set_epi64(2, 2, 2, 2, 2, 2, 2, 2);
         assert_eq_m512i(r, _mm512_set1_epi64(2));
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_set1_pd() {
+    const fn test_mm512_set1_pd() {
         let expected = _mm512_set_pd(2., 2., 2., 2., 2., 2., 2., 2.);
         assert_eq_m512d(expected, _mm512_set1_pd(2.));
     }
 
-    unsafe fn test_mm512_set4_epi64() {
+    #[simd_test(enable = "avx512f")]
+    const fn test_mm512_set4_epi64() {
         let r = _mm512_set_epi64(4, 3, 2, 1, 4, 3, 2, 1);
         assert_eq_m512i(r, _mm512_set4_epi64(4, 3, 2, 1));
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_set4_pd() {
+    const fn test_mm512_set4_pd() {
         let r = _mm512_set_pd(4., 3., 2., 1., 4., 3., 2., 1.);
         assert_eq_m512d(r, _mm512_set4_pd(4., 3., 2., 1.));
     }
 
-    unsafe fn test_mm512_setr4_epi64() {
+    #[simd_test(enable = "avx512f")]
+    const fn test_mm512_setr4_epi64() {
         let r = _mm512_set_epi64(4, 3, 2, 1, 4, 3, 2, 1);
         assert_eq_m512i(r, _mm512_setr4_epi64(1, 2, 3, 4));
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_setr4_pd() {
+    const fn test_mm512_setr4_pd() {
         let r = _mm512_set_pd(4., 3., 2., 1., 4., 3., 2., 1.);
         assert_eq_m512d(r, _mm512_setr4_pd(1., 2., 3., 4.));
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cmplt_pd_mask() {
+    fn test_mm512_cmplt_pd_mask() {
         #[rustfmt::skip]
         let a = _mm512_set_pd(0., 1., -1., f64::MAX, f64::NAN, f64::MIN, 100., -100.);
         let b = _mm512_set1_pd(-1.);
@@ -6496,7 +6504,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cmplt_pd_mask() {
+    fn test_mm512_mask_cmplt_pd_mask() {
         #[rustfmt::skip]
         let a = _mm512_set_pd(0., 1., -1., f64::MAX, f64::NAN, f64::MIN, 100., -100.);
         let b = _mm512_set1_pd(-1.);
@@ -6506,7 +6514,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cmpnlt_pd_mask() {
+    fn test_mm512_cmpnlt_pd_mask() {
         #[rustfmt::skip]
         let a = _mm512_set_pd(0., 1., -1., f64::MAX, f64::NAN, f64::MIN, 100., -100.);
         let b = _mm512_set1_pd(-1.);
@@ -6514,7 +6522,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cmpnlt_pd_mask() {
+    fn test_mm512_mask_cmpnlt_pd_mask() {
         #[rustfmt::skip]
         let a = _mm512_set_pd(0., 1., -1., f64::MAX, f64::NAN, f64::MIN, 100., -100.);
         let b = _mm512_set1_pd(-1.);
@@ -6523,7 +6531,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cmple_pd_mask() {
+    fn test_mm512_cmple_pd_mask() {
         #[rustfmt::skip]
         let a = _mm512_set_pd(0., 1., -1., f64::MAX, f64::NAN, f64::MIN, 100., -100.);
         let b = _mm512_set1_pd(-1.);
@@ -6531,7 +6539,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cmple_pd_mask() {
+    fn test_mm512_mask_cmple_pd_mask() {
         #[rustfmt::skip]
         let a = _mm512_set_pd(0., 1., -1., f64::MAX, f64::NAN, f64::MIN, 100., -100.);
         let b = _mm512_set1_pd(-1.);
@@ -6540,7 +6548,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cmpnle_pd_mask() {
+    fn test_mm512_cmpnle_pd_mask() {
         #[rustfmt::skip]
         let a = _mm512_set_pd(0., 1., -1., f64::MAX, f64::NAN, f64::MIN, 100., -100.);
         let b = _mm512_set1_pd(-1.);
@@ -6549,7 +6557,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cmpnle_pd_mask() {
+    fn test_mm512_mask_cmpnle_pd_mask() {
         #[rustfmt::skip]
         let a = _mm512_set_pd(0., 1., -1., f64::MAX, f64::NAN, f64::MIN, 100., -100.);
         let b = _mm512_set1_pd(-1.);
@@ -6559,7 +6567,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cmpeq_pd_mask() {
+    fn test_mm512_cmpeq_pd_mask() {
         let a = _mm512_set_pd(0., 1., -1., 13., f64::MAX, f64::MIN, f64::NAN, -100.);
         let b = _mm512_set_pd(0., 1., 13., 42., f64::MAX, f64::MIN, f64::NAN, -100.);
         let m = _mm512_cmpeq_pd_mask(b, a);
@@ -6567,7 +6575,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cmpeq_pd_mask() {
+    fn test_mm512_mask_cmpeq_pd_mask() {
         let a = _mm512_set_pd(0., 1., -1., 13., f64::MAX, f64::MIN, f64::NAN, -100.);
         let b = _mm512_set_pd(0., 1., 13., 42., f64::MAX, f64::MIN, f64::NAN, -100.);
         let mask = 0b01111010;
@@ -6576,7 +6584,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cmpneq_pd_mask() {
+    fn test_mm512_cmpneq_pd_mask() {
         let a = _mm512_set_pd(0., 1., -1., 13., f64::MAX, f64::MIN, f64::NAN, -100.);
         let b = _mm512_set_pd(0., 1., 13., 42., f64::MAX, f64::MIN, f64::NAN, -100.);
         let m = _mm512_cmpneq_pd_mask(b, a);
@@ -6584,7 +6592,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cmpneq_pd_mask() {
+    fn test_mm512_mask_cmpneq_pd_mask() {
         let a = _mm512_set_pd(0., 1., -1., 13., f64::MAX, f64::MIN, f64::NAN, -100.);
         let b = _mm512_set_pd(0., 1., 13., 42., f64::MAX, f64::MIN, f64::NAN, -100.);
         let mask = 0b01111010;
@@ -6593,7 +6601,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cmp_pd_mask() {
+    fn test_mm512_cmp_pd_mask() {
         let a = _mm512_set_pd(0., 1., -1., 13., f64::MAX, f64::MIN, 100., -100.);
         let b = _mm512_set1_pd(-1.);
         let m = _mm512_cmp_pd_mask::<_CMP_LT_OQ>(a, b);
@@ -6601,7 +6609,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cmp_pd_mask() {
+    fn test_mm512_mask_cmp_pd_mask() {
         let a = _mm512_set_pd(0., 1., -1., 13., f64::MAX, f64::MIN, 100., -100.);
         let b = _mm512_set1_pd(-1.);
         let mask = 0b01100110;
@@ -6610,7 +6618,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_cmp_pd_mask() {
+    fn test_mm256_cmp_pd_mask() {
         let a = _mm256_set_pd(0., 1., -1., 13.);
         let b = _mm256_set1_pd(1.);
         let m = _mm256_cmp_pd_mask::<_CMP_LT_OQ>(a, b);
@@ -6618,7 +6626,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_cmp_pd_mask() {
+    fn test_mm256_mask_cmp_pd_mask() {
         let a = _mm256_set_pd(0., 1., -1., 13.);
         let b = _mm256_set1_pd(1.);
         let mask = 0b11111111;
@@ -6627,7 +6635,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_cmp_pd_mask() {
+    fn test_mm_cmp_pd_mask() {
         let a = _mm_set_pd(0., 1.);
         let b = _mm_set1_pd(1.);
         let m = _mm_cmp_pd_mask::<_CMP_LT_OQ>(a, b);
@@ -6635,7 +6643,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_cmp_pd_mask() {
+    fn test_mm_mask_cmp_pd_mask() {
         let a = _mm_set_pd(0., 1.);
         let b = _mm_set1_pd(1.);
         let mask = 0b11111111;
@@ -6644,7 +6652,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cmp_round_pd_mask() {
+    fn test_mm512_cmp_round_pd_mask() {
         #[rustfmt::skip]
         let a = _mm512_set_pd(0., 1., -1., 13., f64::MAX, f64::MIN, 100., -100.);
         let b = _mm512_set1_pd(-1.);
@@ -6653,7 +6661,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cmp_round_pd_mask() {
+    fn test_mm512_mask_cmp_round_pd_mask() {
         #[rustfmt::skip]
         let a = _mm512_set_pd(0., 1., -1., 13., f64::MAX, f64::MIN, 100., -100.);
         let b = _mm512_set1_pd(-1.);
@@ -6663,7 +6671,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cmpord_pd_mask() {
+    fn test_mm512_cmpord_pd_mask() {
         #[rustfmt::skip]
         let a = _mm512_set_pd(f64::NAN, f64::MAX, f64::NAN, f64::MIN, f64::NAN, -1., f64::NAN, 0.);
         #[rustfmt::skip]
@@ -6673,7 +6681,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cmpord_pd_mask() {
+    fn test_mm512_mask_cmpord_pd_mask() {
         #[rustfmt::skip]
         let a = _mm512_set_pd(f64::NAN, f64::MAX, f64::NAN, f64::MIN, f64::NAN, -1., f64::NAN, 0.);
         #[rustfmt::skip]
@@ -6684,7 +6692,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cmpunord_pd_mask() {
+    fn test_mm512_cmpunord_pd_mask() {
         #[rustfmt::skip]
         let a = _mm512_set_pd(f64::NAN, f64::MAX, f64::NAN, f64::MIN, f64::NAN, -1., f64::NAN, 0.);
         #[rustfmt::skip]
@@ -6695,7 +6703,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cmpunord_pd_mask() {
+    fn test_mm512_mask_cmpunord_pd_mask() {
         #[rustfmt::skip]
         let a = _mm512_set_pd(f64::NAN, f64::MAX, f64::NAN, f64::MIN, f64::NAN, -1., f64::NAN, 0.);
         #[rustfmt::skip]
@@ -6706,7 +6714,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cmplt_epu64_mask() {
+    const fn test_mm512_cmplt_epu64_mask() {
         let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100);
         let b = _mm512_set1_epi64(-1);
         let m = _mm512_cmplt_epu64_mask(a, b);
@@ -6714,7 +6722,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cmplt_epu64_mask() {
+    const fn test_mm512_mask_cmplt_epu64_mask() {
         let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100);
         let b = _mm512_set1_epi64(-1);
         let mask = 0b01111010;
@@ -6723,7 +6731,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_cmplt_epu64_mask() {
+    const fn test_mm256_cmplt_epu64_mask() {
         let a = _mm256_set_epi64x(0, 1, 2, 100);
         let b = _mm256_set1_epi64x(2);
         let r = _mm256_cmplt_epu64_mask(a, b);
@@ -6731,7 +6739,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_cmplt_epu64_mask() {
+    const fn test_mm256_mask_cmplt_epu64_mask() {
         let a = _mm256_set_epi64x(0, 1, 2, 100);
         let b = _mm256_set1_epi64x(2);
         let mask = 0b11111111;
@@ -6740,7 +6748,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_cmplt_epu64_mask() {
+    const fn test_mm_cmplt_epu64_mask() {
         let a = _mm_set_epi64x(0, 1);
         let b = _mm_set1_epi64x(2);
         let r = _mm_cmplt_epu64_mask(a, b);
@@ -6748,7 +6756,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_cmplt_epu64_mask() {
+    const fn test_mm_mask_cmplt_epu64_mask() {
         let a = _mm_set_epi64x(0, 1);
         let b = _mm_set1_epi64x(2);
         let mask = 0b11111111;
@@ -6757,7 +6765,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cmpgt_epu64_mask() {
+    const fn test_mm512_cmpgt_epu64_mask() {
         let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100);
         let b = _mm512_set1_epi64(-1);
         let m = _mm512_cmpgt_epu64_mask(b, a);
@@ -6765,7 +6773,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cmpgt_epu64_mask() {
+    const fn test_mm512_mask_cmpgt_epu64_mask() {
         let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100);
         let b = _mm512_set1_epi64(-1);
         let mask = 0b01111010;
@@ -6774,7 +6782,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_cmpgt_epu64_mask() {
+    const fn test_mm256_cmpgt_epu64_mask() {
         let a = _mm256_set_epi64x(0, 1, 2, 3);
         let b = _mm256_set1_epi64x(1);
         let r = _mm256_cmpgt_epu64_mask(a, b);
@@ -6782,7 +6790,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_cmpgt_epu64_mask() {
+    const fn test_mm256_mask_cmpgt_epu64_mask() {
         let a = _mm256_set_epi64x(0, 1, 2, 3);
         let b = _mm256_set1_epi64x(1);
         let mask = 0b11111111;
@@ -6791,7 +6799,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_cmpgt_epu64_mask() {
+    const fn test_mm_cmpgt_epu64_mask() {
         let a = _mm_set_epi64x(1, 2);
         let b = _mm_set1_epi64x(1);
         let r = _mm_cmpgt_epu64_mask(a, b);
@@ -6799,7 +6807,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_cmpgt_epu64_mask() {
+    const fn test_mm_mask_cmpgt_epu64_mask() {
         let a = _mm_set_epi64x(1, 2);
         let b = _mm_set1_epi64x(1);
         let mask = 0b11111111;
@@ -6808,7 +6816,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cmple_epu64_mask() {
+    const fn test_mm512_cmple_epu64_mask() {
         let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100);
         let b = _mm512_set1_epi64(-1);
         assert_eq!(
@@ -6818,7 +6826,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cmple_epu64_mask() {
+    const fn test_mm512_mask_cmple_epu64_mask() {
         let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100);
         let b = _mm512_set1_epi64(-1);
         let mask = 0b01111010;
@@ -6826,7 +6834,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_cmple_epu64_mask() {
+    const fn test_mm256_cmple_epu64_mask() {
         let a = _mm256_set_epi64x(0, 1, 2, 1);
         let b = _mm256_set1_epi64x(1);
         let r = _mm256_cmple_epu64_mask(a, b);
@@ -6834,7 +6842,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_cmple_epu64_mask() {
+    const fn test_mm256_mask_cmple_epu64_mask() {
         let a = _mm256_set_epi64x(0, 1, 2, 1);
         let b = _mm256_set1_epi64x(1);
         let mask = 0b11111111;
@@ -6843,7 +6851,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_cmple_epu64_mask() {
+    const fn test_mm_cmple_epu64_mask() {
         let a = _mm_set_epi64x(0, 1);
         let b = _mm_set1_epi64x(1);
         let r = _mm_cmple_epu64_mask(a, b);
@@ -6851,7 +6859,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_cmple_epu64_mask() {
+    const fn test_mm_mask_cmple_epu64_mask() {
         let a = _mm_set_epi64x(0, 1);
         let b = _mm_set1_epi64x(1);
         let mask = 0b11111111;
@@ -6860,7 +6868,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cmpge_epu64_mask() {
+    const fn test_mm512_cmpge_epu64_mask() {
         let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100);
         let b = _mm512_set1_epi64(-1);
         assert_eq!(
@@ -6870,7 +6878,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cmpge_epu64_mask() {
+    const fn test_mm512_mask_cmpge_epu64_mask() {
         let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100);
         let b = _mm512_set1_epi64(-1);
         let mask = 0b11111111;
@@ -6879,7 +6887,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_cmpge_epu64_mask() {
+    const fn test_mm256_cmpge_epu64_mask() {
         let a = _mm256_set_epi64x(0, 1, 2, u64::MAX as i64);
         let b = _mm256_set1_epi64x(1);
         let r = _mm256_cmpge_epu64_mask(a, b);
@@ -6887,7 +6895,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_cmpge_epu64_mask() {
+    const fn test_mm256_mask_cmpge_epu64_mask() {
         let a = _mm256_set_epi64x(0, 1, 2, u64::MAX as i64);
         let b = _mm256_set1_epi64x(1);
         let mask = 0b11111111;
@@ -6896,7 +6904,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_cmpge_epu64_mask() {
+    const fn test_mm_cmpge_epu64_mask() {
         let a = _mm_set_epi64x(0, 1);
         let b = _mm_set1_epi64x(1);
         let r = _mm_cmpge_epu64_mask(a, b);
@@ -6904,7 +6912,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_cmpge_epu64_mask() {
+    const fn test_mm_mask_cmpge_epu64_mask() {
         let a = _mm_set_epi64x(0, 1);
         let b = _mm_set1_epi64x(1);
         let mask = 0b11111111;
@@ -6913,7 +6921,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cmpeq_epu64_mask() {
+    const fn test_mm512_cmpeq_epu64_mask() {
         let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100);
         let b = _mm512_set_epi64(0, 1, 13, 42, i64::MAX, i64::MIN, 100, -100);
         let m = _mm512_cmpeq_epu64_mask(b, a);
@@ -6921,7 +6929,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cmpeq_epu64_mask() {
+    const fn test_mm512_mask_cmpeq_epu64_mask() {
         let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100);
         let b = _mm512_set_epi64(0, 1, 13, 42, i64::MAX, i64::MIN, 100, -100);
         let mask = 0b01111010;
@@ -6930,7 +6938,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_cmpeq_epu64_mask() {
+    const fn test_mm256_cmpeq_epu64_mask() {
         let a = _mm256_set_epi64x(0, 1, -1, u64::MAX as i64);
         let b = _mm256_set_epi64x(0, 1, 13, 42);
         let m = _mm256_cmpeq_epu64_mask(b, a);
@@ -6938,7 +6946,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_cmpeq_epu64_mask() {
+    const fn test_mm256_mask_cmpeq_epu64_mask() {
         let a = _mm256_set_epi64x(0, 1, -1, u64::MAX as i64);
         let b = _mm256_set_epi64x(0, 1, 13, 42);
         let mask = 0b11111111;
@@ -6947,7 +6955,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_cmpeq_epu64_mask() {
+    const fn test_mm_cmpeq_epu64_mask() {
         let a = _mm_set_epi64x(0, 1);
         let b = _mm_set_epi64x(0, 1);
         let m = _mm_cmpeq_epu64_mask(b, a);
@@ -6955,7 +6963,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_cmpeq_epu64_mask() {
+    const fn test_mm_mask_cmpeq_epu64_mask() {
         let a = _mm_set_epi64x(0, 1);
         let b = _mm_set_epi64x(0, 1);
         let mask = 0b11111111;
@@ -6964,7 +6972,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cmpneq_epu64_mask() {
+    const fn test_mm512_cmpneq_epu64_mask() {
         let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100);
         let b = _mm512_set_epi64(0, 1, 13, 42, i64::MAX, i64::MIN, 100, -100);
         let m = _mm512_cmpneq_epu64_mask(b, a);
@@ -6972,7 +6980,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cmpneq_epu64_mask() {
+    const fn test_mm512_mask_cmpneq_epu64_mask() {
         let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, -100, 100);
         let b = _mm512_set_epi64(0, 1, 13, 42, i64::MAX, i64::MIN, 100, -100);
         let mask = 0b01111010;
@@ -6981,7 +6989,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_cmpneq_epu64_mask() {
+    const fn test_mm256_cmpneq_epu64_mask() {
         let a = _mm256_set_epi64x(0, 1, -1, u64::MAX as i64);
         let b = _mm256_set_epi64x(0, 1, 13, 42);
         let r = _mm256_cmpneq_epu64_mask(b, a);
@@ -6989,7 +6997,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_cmpneq_epu64_mask() {
+    const fn test_mm256_mask_cmpneq_epu64_mask() {
         let a = _mm256_set_epi64x(0, 1, -1, u64::MAX as i64);
         let b = _mm256_set_epi64x(0, 1, 13, 42);
         let mask = 0b11111111;
@@ -6998,7 +7006,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_cmpneq_epu64_mask() {
+    const fn test_mm_cmpneq_epu64_mask() {
         let a = _mm_set_epi64x(-1, u64::MAX as i64);
         let b = _mm_set_epi64x(13, 42);
         let r = _mm_cmpneq_epu64_mask(b, a);
@@ -7006,7 +7014,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_cmpneq_epu64_mask() {
+    const fn test_mm_mask_cmpneq_epu64_mask() {
         let a = _mm_set_epi64x(-1, u64::MAX as i64);
         let b = _mm_set_epi64x(13, 42);
         let mask = 0b11111111;
@@ -7015,7 +7023,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cmp_epu64_mask() {
+    const fn test_mm512_cmp_epu64_mask() {
         let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100);
         let b = _mm512_set1_epi64(-1);
         let m = _mm512_cmp_epu64_mask::<_MM_CMPINT_LT>(a, b);
@@ -7023,7 +7031,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cmp_epu64_mask() {
+    const fn test_mm512_mask_cmp_epu64_mask() {
         let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100);
         let b = _mm512_set1_epi64(-1);
         let mask = 0b01111010;
@@ -7032,7 +7040,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_cmp_epu64_mask() {
+    const fn test_mm256_cmp_epu64_mask() {
         let a = _mm256_set_epi64x(0, 1, -1, 100);
         let b = _mm256_set1_epi64x(1);
         let m = _mm256_cmp_epu64_mask::<_MM_CMPINT_LT>(a, b);
@@ -7040,7 +7048,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_cmp_epu64_mask() {
+    const fn test_mm256_mask_cmp_epu64_mask() {
         let a = _mm256_set_epi64x(0, 1, -1, 100);
         let b = _mm256_set1_epi64x(1);
         let mask = 0b11111111;
@@ -7049,7 +7057,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_cmp_epu64_mask() {
+    const fn test_mm_cmp_epu64_mask() {
         let a = _mm_set_epi64x(0, 1);
         let b = _mm_set1_epi64x(1);
         let m = _mm_cmp_epu64_mask::<_MM_CMPINT_LT>(a, b);
@@ -7057,7 +7065,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_cmp_epu64_mask() {
+    const fn test_mm_mask_cmp_epu64_mask() {
         let a = _mm_set_epi64x(0, 1);
         let b = _mm_set1_epi64x(1);
         let mask = 0b11111111;
@@ -7066,7 +7074,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cmplt_epi64_mask() {
+    const fn test_mm512_cmplt_epi64_mask() {
         let a = _mm512_set_epi64(0, 1, -1, 13, i64::MAX, i64::MIN, 100, -100);
         let b = _mm512_set1_epi64(-1);
         let m = _mm512_cmplt_epi64_mask(a, b);
@@ -7074,7 +7082,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cmplt_epi64_mask() {
+    const fn test_mm512_mask_cmplt_epi64_mask() {
         let a = _mm512_set_epi64(0, 1, -1, 13, i64::MAX, i64::MIN, 100, -100);
         let b = _mm512_set1_epi64(-1);
         let mask = 0b01100110;
@@ -7083,7 +7091,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_cmplt_epi64_mask() {
+    const fn test_mm256_cmplt_epi64_mask() {
         let a = _mm256_set_epi64x(0, 1, -1, -13);
         let b = _mm256_set1_epi64x(-1);
         let r = _mm256_cmplt_epi64_mask(a, b);
@@ -7091,7 +7099,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_cmplt_epi64_mask() {
+    const fn test_mm256_mask_cmplt_epi64_mask() {
         let a = _mm256_set_epi64x(0, 1, -1, -13);
         let b = _mm256_set1_epi64x(-1);
         let mask = 0b11111111;
@@ -7100,7 +7108,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_cmplt_epi64_mask() {
+    const fn test_mm_cmplt_epi64_mask() {
         let a = _mm_set_epi64x(-1, -13);
         let b = _mm_set1_epi64x(-1);
         let r = _mm_cmplt_epi64_mask(a, b);
@@ -7108,7 +7116,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_cmplt_epi64_mask() {
+    const fn test_mm_mask_cmplt_epi64_mask() {
         let a = _mm_set_epi64x(-1, -13);
         let b = _mm_set1_epi64x(-1);
         let mask = 0b11111111;
@@ -7117,7 +7125,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cmpgt_epi64_mask() {
+    const fn test_mm512_cmpgt_epi64_mask() {
         let a = _mm512_set_epi64(0, 1, -1, 13, i64::MAX, i64::MIN, 100, -100);
         let b = _mm512_set1_epi64(-1);
         let m = _mm512_cmpgt_epi64_mask(b, a);
@@ -7125,7 +7133,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cmpgt_epi64_mask() {
+    const fn test_mm512_mask_cmpgt_epi64_mask() {
         let a = _mm512_set_epi64(0, 1, -1, 13, i64::MAX, i64::MIN, 100, -100);
         let b = _mm512_set1_epi64(-1);
         let mask = 0b01100110;
@@ -7134,7 +7142,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_cmpgt_epi64_mask() {
+    const fn test_mm256_cmpgt_epi64_mask() {
         let a = _mm256_set_epi64x(0, 1, -1, 13);
         let b = _mm256_set1_epi64x(-1);
         let r = _mm256_cmpgt_epi64_mask(a, b);
@@ -7142,7 +7150,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_cmpgt_epi64_mask() {
+    const fn test_mm256_mask_cmpgt_epi64_mask() {
         let a = _mm256_set_epi64x(0, 1, -1, 13);
         let b = _mm256_set1_epi64x(-1);
         let mask = 0b11111111;
@@ -7151,7 +7159,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_cmpgt_epi64_mask() {
+    const fn test_mm_cmpgt_epi64_mask() {
         let a = _mm_set_epi64x(0, -1);
         let b = _mm_set1_epi64x(-1);
         let r = _mm_cmpgt_epi64_mask(a, b);
@@ -7159,7 +7167,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_cmpgt_epi64_mask() {
+    const fn test_mm_mask_cmpgt_epi64_mask() {
         let a = _mm_set_epi64x(0, -1);
         let b = _mm_set1_epi64x(-1);
         let mask = 0b11111111;
@@ -7168,7 +7176,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cmple_epi64_mask() {
+    const fn test_mm512_cmple_epi64_mask() {
         let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100);
         let b = _mm512_set1_epi64(-1);
         assert_eq!(
@@ -7178,7 +7186,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cmple_epi64_mask() {
+    const fn test_mm512_mask_cmple_epi64_mask() {
         let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100);
         let b = _mm512_set1_epi64(-1);
         let mask = 0b01111010;
@@ -7186,7 +7194,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_cmple_epi64_mask() {
+    const fn test_mm256_cmple_epi64_mask() {
         let a = _mm256_set_epi64x(0, 1, -1, i64::MAX);
         let b = _mm256_set1_epi64x(-1);
         let r = _mm256_cmple_epi64_mask(a, b);
@@ -7194,7 +7202,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_cmple_epi64_mask() {
+    const fn test_mm256_mask_cmple_epi64_mask() {
         let a = _mm256_set_epi64x(0, 1, -1, i64::MAX);
         let b = _mm256_set1_epi64x(-1);
         let mask = 0b11111111;
@@ -7203,7 +7211,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_cmple_epi64_mask() {
+    const fn test_mm_cmple_epi64_mask() {
         let a = _mm_set_epi64x(0, 1);
         let b = _mm_set1_epi64x(1);
         let r = _mm_cmple_epi64_mask(a, b);
@@ -7211,7 +7219,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_cmple_epi64_mask() {
+    const fn test_mm_mask_cmple_epi64_mask() {
         let a = _mm_set_epi64x(0, 1);
         let b = _mm_set1_epi64x(1);
         let mask = 0b11111111;
@@ -7220,7 +7228,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cmpge_epi64_mask() {
+    const fn test_mm512_cmpge_epi64_mask() {
         let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100);
         let b = _mm512_set1_epi64(-1);
         assert_eq!(
@@ -7230,7 +7238,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cmpge_epi64_mask() {
+    const fn test_mm512_mask_cmpge_epi64_mask() {
         let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100);
         let b = _mm512_set1_epi64(-1);
         let mask = 0b11111111;
@@ -7239,7 +7247,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_cmpge_epi64_mask() {
+    const fn test_mm256_cmpge_epi64_mask() {
         let a = _mm256_set_epi64x(0, 1, -1, i64::MAX);
         let b = _mm256_set1_epi64x(-1);
         let r = _mm256_cmpge_epi64_mask(a, b);
@@ -7247,7 +7255,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_cmpge_epi64_mask() {
+    const fn test_mm256_mask_cmpge_epi64_mask() {
         let a = _mm256_set_epi64x(0, 1, -1, i64::MAX);
         let b = _mm256_set1_epi64x(-1);
         let mask = 0b11111111;
@@ -7256,7 +7264,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_cmpge_epi64_mask() {
+    const fn test_mm_cmpge_epi64_mask() {
         let a = _mm_set_epi64x(0, 1);
         let b = _mm_set1_epi64x(-1);
         let r = _mm_cmpge_epi64_mask(a, b);
@@ -7264,7 +7272,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_cmpge_epi64_mask() {
+    const fn test_mm_mask_cmpge_epi64_mask() {
         let a = _mm_set_epi64x(0, 1);
         let b = _mm_set1_epi64x(-1);
         let mask = 0b11111111;
@@ -7273,7 +7281,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cmpeq_epi64_mask() {
+    const fn test_mm512_cmpeq_epi64_mask() {
         let a = _mm512_set_epi64(0, 1, -1, 13, i64::MAX, i64::MIN, 100, -100);
         let b = _mm512_set_epi64(0, 1, 13, 42, i64::MAX, i64::MIN, 100, -100);
         let m = _mm512_cmpeq_epi64_mask(b, a);
@@ -7281,7 +7289,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cmpeq_epi64_mask() {
+    const fn test_mm512_mask_cmpeq_epi64_mask() {
         let a = _mm512_set_epi64(0, 1, -1, 13, i64::MAX, i64::MIN, 100, -100);
         let b = _mm512_set_epi64(0, 1, 13, 42, i64::MAX, i64::MIN, 100, -100);
         let mask = 0b01111010;
@@ -7290,7 +7298,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_cmpeq_epi64_mask() {
+    const fn test_mm256_cmpeq_epi64_mask() {
         let a = _mm256_set_epi64x(0, 1, -1, 13);
         let b = _mm256_set_epi64x(0, 1, 13, 42);
         let m = _mm256_cmpeq_epi64_mask(b, a);
@@ -7298,7 +7306,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_cmpeq_epi64_mask() {
+    const fn test_mm256_mask_cmpeq_epi64_mask() {
         let a = _mm256_set_epi64x(0, 1, -1, 13);
         let b = _mm256_set_epi64x(0, 1, 13, 42);
         let mask = 0b11111111;
@@ -7307,7 +7315,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_cmpeq_epi64_mask() {
+    const fn test_mm_cmpeq_epi64_mask() {
         let a = _mm_set_epi64x(0, 1);
         let b = _mm_set_epi64x(0, 1);
         let m = _mm_cmpeq_epi64_mask(b, a);
@@ -7315,7 +7323,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_cmpeq_epi64_mask() {
+    const fn test_mm_mask_cmpeq_epi64_mask() {
         let a = _mm_set_epi64x(0, 1);
         let b = _mm_set_epi64x(0, 1);
         let mask = 0b11111111;
@@ -7324,18 +7332,19 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_set_epi64() {
+    const fn test_mm512_set_epi64() {
         let r = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7);
         assert_eq_m512i(r, _mm512_set_epi64(7, 6, 5, 4, 3, 2, 1, 0))
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_setr_epi64() {
+    const fn test_mm512_setr_epi64() {
         let r = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7);
         assert_eq_m512i(r, _mm512_setr_epi64(7, 6, 5, 4, 3, 2, 1, 0))
     }
 
-    unsafe fn test_mm512_cmpneq_epi64_mask() {
+    #[simd_test(enable = "avx512f")]
+    const fn test_mm512_cmpneq_epi64_mask() {
         let a = _mm512_set_epi64(0, 1, -1, 13, i64::MAX, i64::MIN, 100, -100);
         let b = _mm512_set_epi64(0, 1, 13, 42, i64::MAX, i64::MIN, 100, -100);
         let m = _mm512_cmpneq_epi64_mask(b, a);
@@ -7343,7 +7352,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cmpneq_epi64_mask() {
+    const fn test_mm512_mask_cmpneq_epi64_mask() {
         let a = _mm512_set_epi64(0, 1, -1, 13, i64::MAX, i64::MIN, -100, 100);
         let b = _mm512_set_epi64(0, 1, 13, 42, i64::MAX, i64::MIN, 100, -100);
         let mask = 0b01111010;
@@ -7352,7 +7361,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_cmpneq_epi64_mask() {
+    const fn test_mm256_cmpneq_epi64_mask() {
         let a = _mm256_set_epi64x(0, 1, -1, 13);
         let b = _mm256_set_epi64x(0, 1, 13, 42);
         let r = _mm256_cmpneq_epi64_mask(b, a);
@@ -7360,7 +7369,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_cmpneq_epi64_mask() {
+    const fn test_mm256_mask_cmpneq_epi64_mask() {
         let a = _mm256_set_epi64x(0, 1, -1, 13);
         let b = _mm256_set_epi64x(0, 1, 13, 42);
         let mask = 0b11111111;
@@ -7369,7 +7378,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_cmpneq_epi64_mask() {
+    const fn test_mm_cmpneq_epi64_mask() {
         let a = _mm_set_epi64x(-1, 13);
         let b = _mm_set_epi64x(13, 42);
         let r = _mm_cmpneq_epi64_mask(b, a);
@@ -7377,7 +7386,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_cmpneq_epi64_mask() {
+    const fn test_mm_mask_cmpneq_epi64_mask() {
         let a = _mm_set_epi64x(-1, 13);
         let b = _mm_set_epi64x(13, 42);
         let mask = 0b11111111;
@@ -7386,7 +7395,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_cmp_epi64_mask() {
+    const fn test_mm512_cmp_epi64_mask() {
         let a = _mm512_set_epi64(0, 1, -1, 13, i64::MAX, i64::MIN, 100, -100);
         let b = _mm512_set1_epi64(-1);
         let m = _mm512_cmp_epi64_mask::<_MM_CMPINT_LT>(a, b);
@@ -7394,7 +7403,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cmp_epi64_mask() {
+    const fn test_mm512_mask_cmp_epi64_mask() {
         let a = _mm512_set_epi64(0, 1, -1, 13, i64::MAX, i64::MIN, 100, -100);
         let b = _mm512_set1_epi64(-1);
         let mask = 0b01100110;
@@ -7403,7 +7412,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_cmp_epi64_mask() {
+    const fn test_mm256_cmp_epi64_mask() {
         let a = _mm256_set_epi64x(0, 1, -1, 13);
         let b = _mm256_set1_epi64x(1);
         let m = _mm256_cmp_epi64_mask::<_MM_CMPINT_LT>(a, b);
@@ -7411,7 +7420,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_cmp_epi64_mask() {
+    const fn test_mm256_mask_cmp_epi64_mask() {
         let a = _mm256_set_epi64x(0, 1, -1, 13);
         let b = _mm256_set1_epi64x(1);
         let mask = 0b11111111;
@@ -7420,7 +7429,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_cmp_epi64_mask() {
+    const fn test_mm_cmp_epi64_mask() {
         let a = _mm_set_epi64x(0, 1);
         let b = _mm_set1_epi64x(1);
         let m = _mm_cmp_epi64_mask::<_MM_CMPINT_LT>(a, b);
@@ -7428,7 +7437,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_cmp_epi64_mask() {
+    const fn test_mm_mask_cmp_epi64_mask() {
         let a = _mm_set_epi64x(0, 1);
         let b = _mm_set1_epi64x(1);
         let mask = 0b11111111;
@@ -7437,81 +7446,81 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_i32gather_pd() {
+    fn test_mm512_i32gather_pd() {
         let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
         // A multiplier of 8 is word-addressing
         let index = _mm256_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112);
-        let r = _mm512_i32gather_pd::<8>(index, arr.as_ptr());
+        let r = unsafe { _mm512_i32gather_pd::<8>(index, arr.as_ptr()) };
         assert_eq_m512d(r, _mm512_setr_pd(0., 16., 32., 48., 64., 80., 96., 112.));
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_i32gather_pd() {
+    fn test_mm512_mask_i32gather_pd() {
         let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
         let src = _mm512_set1_pd(2.);
         let mask = 0b10101010;
         let index = _mm256_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112);
         // A multiplier of 8 is word-addressing
-        let r = _mm512_mask_i32gather_pd::<8>(src, mask, index, arr.as_ptr());
+        let r = unsafe { _mm512_mask_i32gather_pd::<8>(src, mask, index, arr.as_ptr()) };
         assert_eq_m512d(r, _mm512_setr_pd(2., 16., 2., 48., 2., 80., 2., 112.));
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_i64gather_pd() {
+    fn test_mm512_i64gather_pd() {
         let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
         // A multiplier of 8 is word-addressing
         let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112);
-        let r = _mm512_i64gather_pd::<8>(index, arr.as_ptr());
+        let r = unsafe { _mm512_i64gather_pd::<8>(index, arr.as_ptr()) };
         assert_eq_m512d(r, _mm512_setr_pd(0., 16., 32., 48., 64., 80., 96., 112.));
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_i64gather_pd() {
+    fn test_mm512_mask_i64gather_pd() {
         let arr: [f64; 128] = core::array::from_fn(|i| i as f64);
         let src = _mm512_set1_pd(2.);
         let mask = 0b10101010;
         let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112);
         // A multiplier of 8 is word-addressing
-        let r = _mm512_mask_i64gather_pd::<8>(src, mask, index, arr.as_ptr());
+        let r = unsafe { _mm512_mask_i64gather_pd::<8>(src, mask, index, arr.as_ptr()) };
         assert_eq_m512d(r, _mm512_setr_pd(2., 16., 2., 48., 2., 80., 2., 112.));
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_i64gather_ps() {
+    fn test_mm512_i64gather_ps() {
         let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
         // A multiplier of 4 is word-addressing
         #[rustfmt::skip]
         let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112);
-        let r = _mm512_i64gather_ps::<4>(index, arr.as_ptr());
+        let r = unsafe { _mm512_i64gather_ps::<4>(index, arr.as_ptr()) };
         assert_eq_m256(r, _mm256_setr_ps(0., 16., 32., 48., 64., 80., 96., 112.));
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_i64gather_ps() {
+    fn test_mm512_mask_i64gather_ps() {
         let arr: [f32; 128] = core::array::from_fn(|i| i as f32);
         let src = _mm256_set1_ps(2.);
         let mask = 0b10101010;
         #[rustfmt::skip]
         let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112);
         // A multiplier of 4 is word-addressing
-        let r = _mm512_mask_i64gather_ps::<4>(src, mask, index, arr.as_ptr());
+        let r = unsafe { _mm512_mask_i64gather_ps::<4>(src, mask, index, arr.as_ptr()) };
         assert_eq_m256(r, _mm256_setr_ps(2., 16., 2., 48., 2., 80., 2., 112.));
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_i32gather_epi64() {
+    fn test_mm512_i32gather_epi64() {
         let mut arr = [0i64; 128];
         for i in 0..128i64 {
             arr[i as usize] = i;
         }
         // A multiplier of 8 is word-addressing
         let index = _mm256_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112);
-        let r = _mm512_i32gather_epi64::<8>(index, arr.as_ptr());
+        let r = unsafe { _mm512_i32gather_epi64::<8>(index, arr.as_ptr()) };
         assert_eq_m512i(r, _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112));
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_i32gather_epi64() {
+    fn test_mm512_mask_i32gather_epi64() {
         let mut arr = [0i64; 128];
         for i in 0..128i64 {
             arr[i as usize] = i;
@@ -7520,24 +7529,24 @@ mod tests {
         let mask = 0b10101010;
         let index = _mm256_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112);
         // A multiplier of 8 is word-addressing
-        let r = _mm512_mask_i32gather_epi64::<8>(src, mask, index, arr.as_ptr());
+        let r = unsafe { _mm512_mask_i32gather_epi64::<8>(src, mask, index, arr.as_ptr()) };
         assert_eq_m512i(r, _mm512_setr_epi64(2, 16, 2, 48, 2, 80, 2, 112));
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_i64gather_epi64() {
+    fn test_mm512_i64gather_epi64() {
         let mut arr = [0i64; 128];
         for i in 0..128i64 {
             arr[i as usize] = i;
         }
         // A multiplier of 8 is word-addressing
         let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112);
-        let r = _mm512_i64gather_epi64::<8>(index, arr.as_ptr());
+        let r = unsafe { _mm512_i64gather_epi64::<8>(index, arr.as_ptr()) };
         assert_eq_m512i(r, _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112));
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_i64gather_epi64() {
+    fn test_mm512_mask_i64gather_epi64() {
         let mut arr = [0i64; 128];
         for i in 0..128i64 {
             arr[i as usize] = i;
@@ -7546,24 +7555,24 @@ mod tests {
         let mask = 0b10101010;
         let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112);
         // A multiplier of 8 is word-addressing
-        let r = _mm512_mask_i64gather_epi64::<8>(src, mask, index, arr.as_ptr());
+        let r = unsafe { _mm512_mask_i64gather_epi64::<8>(src, mask, index, arr.as_ptr()) };
         assert_eq_m512i(r, _mm512_setr_epi64(2, 16, 2, 48, 2, 80, 2, 112));
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_i64gather_epi32() {
+    fn test_mm512_i64gather_epi32() {
         let mut arr = [0i64; 128];
         for i in 0..128i64 {
             arr[i as usize] = i;
         }
         // A multiplier of 8 is word-addressing
         let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112);
-        let r = _mm512_i64gather_epi32::<8>(index, arr.as_ptr() as *const i32);
+        let r = unsafe { _mm512_i64gather_epi32::<8>(index, arr.as_ptr() as *const i32) };
         assert_eq_m256i(r, _mm256_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112));
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_i64gather_epi32() {
+    fn test_mm512_mask_i64gather_epi32() {
         let mut arr = [0i64; 128];
         for i in 0..128i64 {
             arr[i as usize] = i;
@@ -7572,17 +7581,21 @@ mod tests {
         let mask = 0b10101010;
         let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112);
         // A multiplier of 8 is word-addressing
-        let r = _mm512_mask_i64gather_epi32::<8>(src, mask, index, arr.as_ptr() as *const i32);
+        let r = unsafe {
+            _mm512_mask_i64gather_epi32::<8>(src, mask, index, arr.as_ptr() as *const i32)
+        };
         assert_eq_m256i(r, _mm256_setr_epi32(2, 16, 2, 48, 2, 80, 2, 112));
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_i32scatter_pd() {
+    fn test_mm512_i32scatter_pd() {
         let mut arr = [0f64; 128];
         let index = _mm256_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112);
         let src = _mm512_setr_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         // A multiplier of 8 is word-addressing
-        _mm512_i32scatter_pd::<8>(arr.as_mut_ptr(), index, src);
+        unsafe {
+            _mm512_i32scatter_pd::<8>(arr.as_mut_ptr(), index, src);
+        }
         let mut expected = [0f64; 128];
         for i in 0..8 {
             expected[i * 16] = (i + 1) as f64;
@@ -7591,13 +7604,15 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_i32scatter_pd() {
+    fn test_mm512_mask_i32scatter_pd() {
         let mut arr = [0f64; 128];
         let mask = 0b10101010;
         let index = _mm256_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112);
         let src = _mm512_setr_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         // A multiplier of 8 is word-addressing
-        _mm512_mask_i32scatter_pd::<8>(arr.as_mut_ptr(), mask, index, src);
+        unsafe {
+            _mm512_mask_i32scatter_pd::<8>(arr.as_mut_ptr(), mask, index, src);
+        }
         let mut expected = [0f64; 128];
         for i in 0..4 {
             expected[i * 32 + 16] = 2. * (i + 1) as f64;
@@ -7606,12 +7621,14 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_i64scatter_pd() {
+    fn test_mm512_i64scatter_pd() {
         let mut arr = [0f64; 128];
         let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112);
         let src = _mm512_setr_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         // A multiplier of 8 is word-addressing
-        _mm512_i64scatter_pd::<8>(arr.as_mut_ptr(), index, src);
+        unsafe {
+            _mm512_i64scatter_pd::<8>(arr.as_mut_ptr(), index, src);
+        }
         let mut expected = [0f64; 128];
         for i in 0..8 {
             expected[i * 16] = (i + 1) as f64;
@@ -7620,13 +7637,15 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_i64scatter_pd() {
+    fn test_mm512_mask_i64scatter_pd() {
         let mut arr = [0f64; 128];
         let mask = 0b10101010;
         let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112);
         let src = _mm512_setr_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         // A multiplier of 8 is word-addressing
-        _mm512_mask_i64scatter_pd::<8>(arr.as_mut_ptr(), mask, index, src);
+        unsafe {
+            _mm512_mask_i64scatter_pd::<8>(arr.as_mut_ptr(), mask, index, src);
+        }
         let mut expected = [0f64; 128];
         for i in 0..4 {
             expected[i * 32 + 16] = 2. * (i + 1) as f64;
@@ -7635,12 +7654,14 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_i64scatter_ps() {
+    fn test_mm512_i64scatter_ps() {
         let mut arr = [0f32; 128];
         let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112);
         let src = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         // A multiplier of 4 is word-addressing
-        _mm512_i64scatter_ps::<4>(arr.as_mut_ptr(), index, src);
+        unsafe {
+            _mm512_i64scatter_ps::<4>(arr.as_mut_ptr(), index, src);
+        }
         let mut expected = [0f32; 128];
         for i in 0..8 {
             expected[i * 16] = (i + 1) as f32;
@@ -7649,13 +7670,15 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_i64scatter_ps() {
+    fn test_mm512_mask_i64scatter_ps() {
         let mut arr = [0f32; 128];
         let mask = 0b10101010;
         let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112);
         let src = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
         // A multiplier of 4 is word-addressing
-        _mm512_mask_i64scatter_ps::<4>(arr.as_mut_ptr(), mask, index, src);
+        unsafe {
+            _mm512_mask_i64scatter_ps::<4>(arr.as_mut_ptr(), mask, index, src);
+        }
         let mut expected = [0f32; 128];
         for i in 0..4 {
             expected[i * 32 + 16] = 2. * (i + 1) as f32;
@@ -7664,12 +7687,14 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_i32scatter_epi64() {
+    fn test_mm512_i32scatter_epi64() {
         let mut arr = [0i64; 128];
         let index = _mm256_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112);
         let src = _mm512_setr_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         // A multiplier of 8 is word-addressing
-        _mm512_i32scatter_epi64::<8>(arr.as_mut_ptr(), index, src);
+        unsafe {
+            _mm512_i32scatter_epi64::<8>(arr.as_mut_ptr(), index, src);
+        }
         let mut expected = [0i64; 128];
         for i in 0..8 {
             expected[i * 16] = (i + 1) as i64;
@@ -7678,13 +7703,15 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_i32scatter_epi64() {
+    fn test_mm512_mask_i32scatter_epi64() {
         let mut arr = [0i64; 128];
         let mask = 0b10101010;
         let index = _mm256_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112);
         let src = _mm512_setr_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         // A multiplier of 8 is word-addressing
-        _mm512_mask_i32scatter_epi64::<8>(arr.as_mut_ptr(), mask, index, src);
+        unsafe {
+            _mm512_mask_i32scatter_epi64::<8>(arr.as_mut_ptr(), mask, index, src);
+        }
         let mut expected = [0i64; 128];
         for i in 0..4 {
             expected[i * 32 + 16] = 2 * (i + 1) as i64;
@@ -7693,12 +7720,14 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_i64scatter_epi64() {
+    fn test_mm512_i64scatter_epi64() {
         let mut arr = [0i64; 128];
         let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112);
         let src = _mm512_setr_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         // A multiplier of 8 is word-addressing
-        _mm512_i64scatter_epi64::<8>(arr.as_mut_ptr(), index, src);
+        unsafe {
+            _mm512_i64scatter_epi64::<8>(arr.as_mut_ptr(), index, src);
+        }
         let mut expected = [0i64; 128];
         for i in 0..8 {
             expected[i * 16] = (i + 1) as i64;
@@ -7707,13 +7736,15 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_i64scatter_epi64() {
+    fn test_mm512_mask_i64scatter_epi64() {
         let mut arr = [0i64; 128];
         let mask = 0b10101010;
         let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112);
         let src = _mm512_setr_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         // A multiplier of 8 is word-addressing
-        _mm512_mask_i64scatter_epi64::<8>(arr.as_mut_ptr(), mask, index, src);
+        unsafe {
+            _mm512_mask_i64scatter_epi64::<8>(arr.as_mut_ptr(), mask, index, src);
+        }
         let mut expected = [0i64; 128];
         for i in 0..4 {
             expected[i * 32 + 16] = 2 * (i + 1) as i64;
@@ -7722,12 +7753,14 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_i64scatter_epi32() {
+    fn test_mm512_i64scatter_epi32() {
         let mut arr = [0i32; 128];
         let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112);
         let src = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
         // A multiplier of 4 is word-addressing
-        _mm512_i64scatter_epi32::<4>(arr.as_mut_ptr(), index, src);
+        unsafe {
+            _mm512_i64scatter_epi32::<4>(arr.as_mut_ptr(), index, src);
+        }
         let mut expected = [0i32; 128];
         for i in 0..8 {
             expected[i * 16] = (i + 1) as i32;
@@ -7736,13 +7769,15 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_i64scatter_epi32() {
+    fn test_mm512_mask_i64scatter_epi32() {
         let mut arr = [0i32; 128];
         let mask = 0b10101010;
         let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112);
         let src = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
         // A multiplier of 4 is word-addressing
-        _mm512_mask_i64scatter_epi32::<4>(arr.as_mut_ptr(), mask, index, src);
+        unsafe {
+            _mm512_mask_i64scatter_epi32::<4>(arr.as_mut_ptr(), mask, index, src);
+        }
         let mut expected = [0i32; 128];
         for i in 0..4 {
             expected[i * 32 + 16] = 2 * (i + 1) as i32;
@@ -7751,565 +7786,646 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_i32logather_epi64() {
+    fn test_mm512_i32logather_epi64() {
         let base_addr: [i64; 8] = [1, 2, 3, 4, 5, 6, 7, 8];
         let vindex = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 0, -1, -1, -1, -1, -1, -1, -1, -1);
-        let r = _mm512_i32logather_epi64::<8>(vindex, base_addr.as_ptr());
+        let r = unsafe { _mm512_i32logather_epi64::<8>(vindex, base_addr.as_ptr()) };
         let expected = _mm512_setr_epi64(2, 3, 4, 5, 6, 7, 8, 1);
         assert_eq_m512i(expected, r);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_i32logather_epi64() {
+    fn test_mm512_mask_i32logather_epi64() {
         let base_addr: [i64; 8] = [1, 2, 3, 4, 5, 6, 7, 8];
         let src = _mm512_setr_epi64(9, 10, 11, 12, 13, 14, 15, 16);
         let vindex = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 0, -1, -1, -1, -1, -1, -1, -1, -1);
-        let r = _mm512_mask_i32logather_epi64::<8>(src, 0b01010101, vindex, base_addr.as_ptr());
+        let r = unsafe {
+            _mm512_mask_i32logather_epi64::<8>(src, 0b01010101, vindex, base_addr.as_ptr())
+        };
         let expected = _mm512_setr_epi64(2, 10, 4, 12, 6, 14, 8, 16);
         assert_eq_m512i(expected, r);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_i32logather_pd() {
+    fn test_mm512_i32logather_pd() {
         let base_addr: [f64; 8] = [1., 2., 3., 4., 5., 6., 7., 8.];
         let vindex = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 0, -1, -1, -1, -1, -1, -1, -1, -1);
-        let r = _mm512_i32logather_pd::<8>(vindex, base_addr.as_ptr());
+        let r = unsafe { _mm512_i32logather_pd::<8>(vindex, base_addr.as_ptr()) };
         let expected = _mm512_setr_pd(2., 3., 4., 5., 6., 7., 8., 1.);
         assert_eq_m512d(expected, r);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_i32logather_pd() {
+    fn test_mm512_mask_i32logather_pd() {
         let base_addr: [f64; 8] = [1., 2., 3., 4., 5., 6., 7., 8.];
         let src = _mm512_setr_pd(9., 10., 11., 12., 13., 14., 15., 16.);
         let vindex = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 0, -1, -1, -1, -1, -1, -1, -1, -1);
-        let r = _mm512_mask_i32logather_pd::<8>(src, 0b01010101, vindex, base_addr.as_ptr());
+        let r =
+            unsafe { _mm512_mask_i32logather_pd::<8>(src, 0b01010101, vindex, base_addr.as_ptr()) };
         let expected = _mm512_setr_pd(2., 10., 4., 12., 6., 14., 8., 16.);
         assert_eq_m512d(expected, r);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_i32loscatter_epi64() {
+    fn test_mm512_i32loscatter_epi64() {
         let mut base_addr: [i64; 8] = [0; 8];
         let vindex = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 0, -1, -1, -1, -1, -1, -1, -1, -1);
         let src = _mm512_setr_epi64(2, 3, 4, 5, 6, 7, 8, 1);
-        _mm512_i32loscatter_epi64::<8>(base_addr.as_mut_ptr(), vindex, src);
+        unsafe {
+            _mm512_i32loscatter_epi64::<8>(base_addr.as_mut_ptr(), vindex, src);
+        }
         let expected = [1, 2, 3, 4, 5, 6, 7, 8];
         assert_eq!(expected, base_addr);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_i32loscatter_epi64() {
+    fn test_mm512_mask_i32loscatter_epi64() {
         let mut base_addr: [i64; 8] = [0; 8];
         let vindex = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 0, -1, -1, -1, -1, -1, -1, -1, -1);
         let src = _mm512_setr_epi64(2, 3, 4, 5, 6, 7, 8, 1);
-        _mm512_mask_i32loscatter_epi64::<8>(base_addr.as_mut_ptr(), 0b01010101, vindex, src);
+        unsafe {
+            _mm512_mask_i32loscatter_epi64::<8>(base_addr.as_mut_ptr(), 0b01010101, vindex, src);
+        }
         let expected = [0, 2, 0, 4, 0, 6, 0, 8];
         assert_eq!(expected, base_addr);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_i32loscatter_pd() {
+    fn test_mm512_i32loscatter_pd() {
         let mut base_addr: [f64; 8] = [0.; 8];
         let vindex = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 0, -1, -1, -1, -1, -1, -1, -1, -1);
         let src = _mm512_setr_pd(2., 3., 4., 5., 6., 7., 8., 1.);
-        _mm512_i32loscatter_pd::<8>(base_addr.as_mut_ptr(), vindex, src);
+        unsafe {
+            _mm512_i32loscatter_pd::<8>(base_addr.as_mut_ptr(), vindex, src);
+        }
         let expected = [1., 2., 3., 4., 5., 6., 7., 8.];
         assert_eq!(expected, base_addr);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_i32loscatter_pd() {
+    fn test_mm512_mask_i32loscatter_pd() {
         let mut base_addr: [f64; 8] = [0.; 8];
         let vindex = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 0, -1, -1, -1, -1, -1, -1, -1, -1);
         let src = _mm512_setr_pd(2., 3., 4., 5., 6., 7., 8., 1.);
-        _mm512_mask_i32loscatter_pd::<8>(base_addr.as_mut_ptr(), 0b01010101, vindex, src);
+        unsafe {
+            _mm512_mask_i32loscatter_pd::<8>(base_addr.as_mut_ptr(), 0b01010101, vindex, src);
+        }
         let expected = [0., 2., 0., 4., 0., 6., 0., 8.];
         assert_eq!(expected, base_addr);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mmask_i32gather_epi32() {
+    fn test_mm_mmask_i32gather_epi32() {
         let base_addr: [i32; 4] = [1, 2, 3, 4];
         let src = _mm_setr_epi32(5, 6, 7, 8);
         let vindex = _mm_setr_epi32(1, 2, 3, 0);
-        let r = _mm_mmask_i32gather_epi32::<4>(src, 0b0101, vindex, base_addr.as_ptr());
+        let r = unsafe { _mm_mmask_i32gather_epi32::<4>(src, 0b0101, vindex, base_addr.as_ptr()) };
         let expected = _mm_setr_epi32(2, 6, 4, 8);
         assert_eq_m128i(expected, r);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mmask_i32gather_epi64() {
+    fn test_mm_mmask_i32gather_epi64() {
         let base_addr: [i64; 2] = [1, 2];
         let src = _mm_setr_epi64x(5, 6);
         let vindex = _mm_setr_epi32(1, 0, -1, -1);
-        let r = _mm_mmask_i32gather_epi64::<8>(src, 0b01, vindex, base_addr.as_ptr());
+        let r = unsafe { _mm_mmask_i32gather_epi64::<8>(src, 0b01, vindex, base_addr.as_ptr()) };
         let expected = _mm_setr_epi64x(2, 6);
         assert_eq_m128i(expected, r);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mmask_i32gather_pd() {
+    fn test_mm_mmask_i32gather_pd() {
         let base_addr: [f64; 2] = [1., 2.];
         let src = _mm_setr_pd(5., 6.);
         let vindex = _mm_setr_epi32(1, 0, -1, -1);
-        let r = _mm_mmask_i32gather_pd::<8>(src, 0b01, vindex, base_addr.as_ptr());
+        let r = unsafe { _mm_mmask_i32gather_pd::<8>(src, 0b01, vindex, base_addr.as_ptr()) };
         let expected = _mm_setr_pd(2., 6.);
         assert_eq_m128d(expected, r);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mmask_i32gather_ps() {
+    fn test_mm_mmask_i32gather_ps() {
         let base_addr: [f32; 4] = [1., 2., 3., 4.];
         let src = _mm_setr_ps(5., 6., 7., 8.);
         let vindex = _mm_setr_epi32(1, 2, 3, 0);
-        let r = _mm_mmask_i32gather_ps::<4>(src, 0b0101, vindex, base_addr.as_ptr());
+        let r = unsafe { _mm_mmask_i32gather_ps::<4>(src, 0b0101, vindex, base_addr.as_ptr()) };
         let expected = _mm_setr_ps(2., 6., 4., 8.);
         assert_eq_m128(expected, r);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mmask_i64gather_epi32() {
+    fn test_mm_mmask_i64gather_epi32() {
         let base_addr: [i32; 2] = [1, 2];
         let src = _mm_setr_epi32(5, 6, 7, 8);
         let vindex = _mm_setr_epi64x(1, 0);
-        let r = _mm_mmask_i64gather_epi32::<4>(src, 0b01, vindex, base_addr.as_ptr());
+        let r = unsafe { _mm_mmask_i64gather_epi32::<4>(src, 0b01, vindex, base_addr.as_ptr()) };
         let expected = _mm_setr_epi32(2, 6, 0, 0);
         assert_eq_m128i(expected, r);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mmask_i64gather_epi64() {
+    fn test_mm_mmask_i64gather_epi64() {
         let base_addr: [i64; 2] = [1, 2];
         let src = _mm_setr_epi64x(5, 6);
         let vindex = _mm_setr_epi64x(1, 0);
-        let r = _mm_mmask_i64gather_epi64::<8>(src, 0b01, vindex, base_addr.as_ptr());
+        let r = unsafe { _mm_mmask_i64gather_epi64::<8>(src, 0b01, vindex, base_addr.as_ptr()) };
         let expected = _mm_setr_epi64x(2, 6);
         assert_eq_m128i(expected, r);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mmask_i64gather_pd() {
+    fn test_mm_mmask_i64gather_pd() {
         let base_addr: [f64; 2] = [1., 2.];
         let src = _mm_setr_pd(5., 6.);
         let vindex = _mm_setr_epi64x(1, 0);
-        let r = _mm_mmask_i64gather_pd::<8>(src, 0b01, vindex, base_addr.as_ptr());
+        let r = unsafe { _mm_mmask_i64gather_pd::<8>(src, 0b01, vindex, base_addr.as_ptr()) };
         let expected = _mm_setr_pd(2., 6.);
         assert_eq_m128d(expected, r);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mmask_i64gather_ps() {
+    fn test_mm_mmask_i64gather_ps() {
         let base_addr: [f32; 2] = [1., 2.];
         let src = _mm_setr_ps(5., 6., 7., 8.);
         let vindex = _mm_setr_epi64x(1, 0);
-        let r = _mm_mmask_i64gather_ps::<4>(src, 0b01, vindex, base_addr.as_ptr());
+        let r = unsafe { _mm_mmask_i64gather_ps::<4>(src, 0b01, vindex, base_addr.as_ptr()) };
         let expected = _mm_setr_ps(2., 6., 0., 0.);
         assert_eq_m128(expected, r);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mmask_i32gather_epi32() {
+    fn test_mm256_mmask_i32gather_epi32() {
         let base_addr: [i32; 8] = [1, 2, 3, 4, 5, 6, 7, 8];
         let src = _mm256_setr_epi32(9, 10, 11, 12, 13, 14, 15, 16);
         let vindex = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 0);
-        let r = _mm256_mmask_i32gather_epi32::<4>(src, 0b01010101, vindex, base_addr.as_ptr());
+        let r = unsafe {
+            _mm256_mmask_i32gather_epi32::<4>(src, 0b01010101, vindex, base_addr.as_ptr())
+        };
         let expected = _mm256_setr_epi32(2, 10, 4, 12, 6, 14, 8, 16);
         assert_eq_m256i(expected, r);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mmask_i32gather_epi64() {
+    fn test_mm256_mmask_i32gather_epi64() {
         let base_addr: [i64; 4] = [1, 2, 3, 4];
         let src = _mm256_setr_epi64x(9, 10, 11, 12);
         let vindex = _mm_setr_epi32(1, 2, 3, 4);
-        let r = _mm256_mmask_i32gather_epi64::<8>(src, 0b0101, vindex, base_addr.as_ptr());
+        let r =
+            unsafe { _mm256_mmask_i32gather_epi64::<8>(src, 0b0101, vindex, base_addr.as_ptr()) };
         let expected = _mm256_setr_epi64x(2, 10, 4, 12);
         assert_eq_m256i(expected, r);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mmask_i32gather_pd() {
+    fn test_mm256_mmask_i32gather_pd() {
         let base_addr: [f64; 4] = [1., 2., 3., 4.];
         let src = _mm256_setr_pd(9., 10., 11., 12.);
         let vindex = _mm_setr_epi32(1, 2, 3, 4);
-        let r = _mm256_mmask_i32gather_pd::<8>(src, 0b0101, vindex, base_addr.as_ptr());
+        let r = unsafe { _mm256_mmask_i32gather_pd::<8>(src, 0b0101, vindex, base_addr.as_ptr()) };
         let expected = _mm256_setr_pd(2., 10., 4., 12.);
         assert_eq_m256d(expected, r);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mmask_i32gather_ps() {
+    fn test_mm256_mmask_i32gather_ps() {
         let base_addr: [f32; 8] = [1., 2., 3., 4., 5., 6., 7., 8.];
         let src = _mm256_setr_ps(9., 10., 11., 12., 13., 14., 15., 16.);
         let vindex = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 0);
-        let r = _mm256_mmask_i32gather_ps::<4>(src, 0b01010101, vindex, base_addr.as_ptr());
+        let r =
+            unsafe { _mm256_mmask_i32gather_ps::<4>(src, 0b01010101, vindex, base_addr.as_ptr()) };
         let expected = _mm256_setr_ps(2., 10., 4., 12., 6., 14., 8., 16.);
         assert_eq_m256(expected, r);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mmask_i64gather_epi32() {
+    fn test_mm256_mmask_i64gather_epi32() {
         let base_addr: [i32; 4] = [1, 2, 3, 4];
         let src = _mm_setr_epi32(9, 10, 11, 12);
         let vindex = _mm256_setr_epi64x(1, 2, 3, 0);
-        let r = _mm256_mmask_i64gather_epi32::<4>(src, 0b0101, vindex, base_addr.as_ptr());
+        let r =
+            unsafe { _mm256_mmask_i64gather_epi32::<4>(src, 0b0101, vindex, base_addr.as_ptr()) };
         let expected = _mm_setr_epi32(2, 10, 4, 12);
         assert_eq_m128i(expected, r);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mmask_i64gather_epi64() {
+    fn test_mm256_mmask_i64gather_epi64() {
         let base_addr: [i64; 4] = [1, 2, 3, 4];
         let src = _mm256_setr_epi64x(9, 10, 11, 12);
         let vindex = _mm256_setr_epi64x(1, 2, 3, 0);
-        let r = _mm256_mmask_i64gather_epi64::<8>(src, 0b0101, vindex, base_addr.as_ptr());
+        let r =
+            unsafe { _mm256_mmask_i64gather_epi64::<8>(src, 0b0101, vindex, base_addr.as_ptr()) };
         let expected = _mm256_setr_epi64x(2, 10, 4, 12);
         assert_eq_m256i(expected, r);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mmask_i64gather_pd() {
+    fn test_mm256_mmask_i64gather_pd() {
         let base_addr: [f64; 4] = [1., 2., 3., 4.];
         let src = _mm256_setr_pd(9., 10., 11., 12.);
         let vindex = _mm256_setr_epi64x(1, 2, 3, 0);
-        let r = _mm256_mmask_i64gather_pd::<8>(src, 0b0101, vindex, base_addr.as_ptr());
+        let r = unsafe { _mm256_mmask_i64gather_pd::<8>(src, 0b0101, vindex, base_addr.as_ptr()) };
         let expected = _mm256_setr_pd(2., 10., 4., 12.);
         assert_eq_m256d(expected, r);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mmask_i64gather_ps() {
+    fn test_mm256_mmask_i64gather_ps() {
         let base_addr: [f32; 4] = [1., 2., 3., 4.];
         let src = _mm_setr_ps(9., 10., 11., 12.);
         let vindex = _mm256_setr_epi64x(1, 2, 3, 0);
-        let r = _mm256_mmask_i64gather_ps::<4>(src, 0b0101, vindex, base_addr.as_ptr());
+        let r = unsafe { _mm256_mmask_i64gather_ps::<4>(src, 0b0101, vindex, base_addr.as_ptr()) };
         let expected = _mm_setr_ps(2., 10., 4., 12.);
         assert_eq_m128(expected, r);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_i32scatter_epi32() {
+    fn test_mm_i32scatter_epi32() {
         let mut base_addr: [i32; 4] = [0; 4];
         let vindex = _mm_setr_epi32(1, 2, 3, 0);
         let src = _mm_setr_epi32(2, 3, 4, 1);
-        _mm_i32scatter_epi32::<4>(base_addr.as_mut_ptr(), vindex, src);
+        unsafe {
+            _mm_i32scatter_epi32::<4>(base_addr.as_mut_ptr(), vindex, src);
+        }
         let expected = [1, 2, 3, 4];
         assert_eq!(expected, base_addr);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_i32scatter_epi32() {
+    fn test_mm_mask_i32scatter_epi32() {
         let mut base_addr: [i32; 4] = [0; 4];
         let vindex = _mm_setr_epi32(1, 2, 3, 0);
         let src = _mm_setr_epi32(2, 3, 4, 1);
-        _mm_mask_i32scatter_epi32::<4>(base_addr.as_mut_ptr(), 0b0101, vindex, src);
+        unsafe {
+            _mm_mask_i32scatter_epi32::<4>(base_addr.as_mut_ptr(), 0b0101, vindex, src);
+        }
         let expected = [0, 2, 0, 4];
         assert_eq!(expected, base_addr);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_i32scatter_epi64() {
+    fn test_mm_i32scatter_epi64() {
         let mut base_addr: [i64; 2] = [0; 2];
         let vindex = _mm_setr_epi32(1, 0, -1, -1);
         let src = _mm_setr_epi64x(2, 1);
-        _mm_i32scatter_epi64::<8>(base_addr.as_mut_ptr(), vindex, src);
+        unsafe {
+            _mm_i32scatter_epi64::<8>(base_addr.as_mut_ptr(), vindex, src);
+        }
         let expected = [1, 2];
         assert_eq!(expected, base_addr);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_i32scatter_epi64() {
+    fn test_mm_mask_i32scatter_epi64() {
         let mut base_addr: [i64; 2] = [0; 2];
         let vindex = _mm_setr_epi32(1, 0, -1, -1);
         let src = _mm_setr_epi64x(2, 1);
-        _mm_mask_i32scatter_epi64::<8>(base_addr.as_mut_ptr(), 0b01, vindex, src);
+        unsafe {
+            _mm_mask_i32scatter_epi64::<8>(base_addr.as_mut_ptr(), 0b01, vindex, src);
+        }
         let expected = [0, 2];
         assert_eq!(expected, base_addr);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_i32scatter_pd() {
+    fn test_mm_i32scatter_pd() {
         let mut base_addr: [f64; 2] = [0.; 2];
         let vindex = _mm_setr_epi32(1, 0, -1, -1);
         let src = _mm_setr_pd(2., 1.);
-        _mm_i32scatter_pd::<8>(base_addr.as_mut_ptr(), vindex, src);
+        unsafe {
+            _mm_i32scatter_pd::<8>(base_addr.as_mut_ptr(), vindex, src);
+        }
         let expected = [1., 2.];
         assert_eq!(expected, base_addr);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_i32scatter_pd() {
+    fn test_mm_mask_i32scatter_pd() {
         let mut base_addr: [f64; 2] = [0.; 2];
         let vindex = _mm_setr_epi32(1, 0, -1, -1);
         let src = _mm_setr_pd(2., 1.);
-        _mm_mask_i32scatter_pd::<8>(base_addr.as_mut_ptr(), 0b01, vindex, src);
+        unsafe {
+            _mm_mask_i32scatter_pd::<8>(base_addr.as_mut_ptr(), 0b01, vindex, src);
+        }
         let expected = [0., 2.];
         assert_eq!(expected, base_addr);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_i32scatter_ps() {
+    fn test_mm_i32scatter_ps() {
         let mut base_addr: [f32; 4] = [0.; 4];
         let vindex = _mm_setr_epi32(1, 2, 3, 0);
         let src = _mm_setr_ps(2., 3., 4., 1.);
-        _mm_i32scatter_ps::<4>(base_addr.as_mut_ptr(), vindex, src);
+        unsafe {
+            _mm_i32scatter_ps::<4>(base_addr.as_mut_ptr(), vindex, src);
+        }
         let expected = [1., 2., 3., 4.];
         assert_eq!(expected, base_addr);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_i32scatter_ps() {
+    fn test_mm_mask_i32scatter_ps() {
         let mut base_addr: [f32; 4] = [0.; 4];
         let vindex = _mm_setr_epi32(1, 2, 3, 0);
         let src = _mm_setr_ps(2., 3., 4., 1.);
-        _mm_mask_i32scatter_ps::<4>(base_addr.as_mut_ptr(), 0b0101, vindex, src);
+        unsafe {
+            _mm_mask_i32scatter_ps::<4>(base_addr.as_mut_ptr(), 0b0101, vindex, src);
+        }
         let expected = [0., 2., 0., 4.];
         assert_eq!(expected, base_addr);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_i64scatter_epi32() {
+    fn test_mm_i64scatter_epi32() {
         let mut base_addr: [i32; 2] = [0; 2];
         let vindex = _mm_setr_epi64x(1, 0);
         let src = _mm_setr_epi32(2, 1, -1, -1);
-        _mm_i64scatter_epi32::<4>(base_addr.as_mut_ptr(), vindex, src);
+        unsafe {
+            _mm_i64scatter_epi32::<4>(base_addr.as_mut_ptr(), vindex, src);
+        }
         let expected = [1, 2];
         assert_eq!(expected, base_addr);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_i64scatter_epi32() {
+    fn test_mm_mask_i64scatter_epi32() {
         let mut base_addr: [i32; 2] = [0; 2];
         let vindex = _mm_setr_epi64x(1, 0);
         let src = _mm_setr_epi32(2, 1, -1, -1);
-        _mm_mask_i64scatter_epi32::<4>(base_addr.as_mut_ptr(), 0b01, vindex, src);
+        unsafe {
+            _mm_mask_i64scatter_epi32::<4>(base_addr.as_mut_ptr(), 0b01, vindex, src);
+        }
         let expected = [0, 2];
         assert_eq!(expected, base_addr);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_i64scatter_epi64() {
+    fn test_mm_i64scatter_epi64() {
         let mut base_addr: [i64; 2] = [0; 2];
         let vindex = _mm_setr_epi64x(1, 0);
         let src = _mm_setr_epi64x(2, 1);
-        _mm_i64scatter_epi64::<8>(base_addr.as_mut_ptr(), vindex, src);
+        unsafe {
+            _mm_i64scatter_epi64::<8>(base_addr.as_mut_ptr(), vindex, src);
+        }
         let expected = [1, 2];
         assert_eq!(expected, base_addr);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_i64scatter_epi64() {
+    fn test_mm_mask_i64scatter_epi64() {
         let mut base_addr: [i64; 2] = [0; 2];
         let vindex = _mm_setr_epi64x(1, 0);
         let src = _mm_setr_epi64x(2, 1);
-        _mm_mask_i64scatter_epi64::<8>(base_addr.as_mut_ptr(), 0b01, vindex, src);
+        unsafe {
+            _mm_mask_i64scatter_epi64::<8>(base_addr.as_mut_ptr(), 0b01, vindex, src);
+        }
         let expected = [0, 2];
         assert_eq!(expected, base_addr);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_i64scatter_pd() {
+    fn test_mm_i64scatter_pd() {
         let mut base_addr: [f64; 2] = [0.; 2];
         let vindex = _mm_setr_epi64x(1, 0);
         let src = _mm_setr_pd(2., 1.);
-        _mm_i64scatter_pd::<8>(base_addr.as_mut_ptr(), vindex, src);
+        unsafe {
+            _mm_i64scatter_pd::<8>(base_addr.as_mut_ptr(), vindex, src);
+        }
         let expected = [1., 2.];
         assert_eq!(expected, base_addr);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_i64scatter_pd() {
+    fn test_mm_mask_i64scatter_pd() {
         let mut base_addr: [f64; 2] = [0.; 2];
         let vindex = _mm_setr_epi64x(1, 0);
         let src = _mm_setr_pd(2., 1.);
-        _mm_mask_i64scatter_pd::<8>(base_addr.as_mut_ptr(), 0b01, vindex, src);
+        unsafe {
+            _mm_mask_i64scatter_pd::<8>(base_addr.as_mut_ptr(), 0b01, vindex, src);
+        }
         let expected = [0., 2.];
         assert_eq!(expected, base_addr);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_i64scatter_ps() {
+    fn test_mm_i64scatter_ps() {
         let mut base_addr: [f32; 2] = [0.; 2];
         let vindex = _mm_setr_epi64x(1, 0);
         let src = _mm_setr_ps(2., 1., -1., -1.);
-        _mm_i64scatter_ps::<4>(base_addr.as_mut_ptr(), vindex, src);
+        unsafe {
+            _mm_i64scatter_ps::<4>(base_addr.as_mut_ptr(), vindex, src);
+        }
         let expected = [1., 2.];
         assert_eq!(expected, base_addr);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_i64scatter_ps() {
+    fn test_mm_mask_i64scatter_ps() {
         let mut base_addr: [f32; 2] = [0.; 2];
         let vindex = _mm_setr_epi64x(1, 0);
         let src = _mm_setr_ps(2., 1., -1., -1.);
-        _mm_mask_i64scatter_ps::<4>(base_addr.as_mut_ptr(), 0b01, vindex, src);
+        unsafe {
+            _mm_mask_i64scatter_ps::<4>(base_addr.as_mut_ptr(), 0b01, vindex, src);
+        }
         let expected = [0., 2.];
         assert_eq!(expected, base_addr);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_i32scatter_epi32() {
+    fn test_mm256_i32scatter_epi32() {
         let mut base_addr: [i32; 8] = [0; 8];
         let vindex = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 0);
         let src = _mm256_setr_epi32(2, 3, 4, 5, 6, 7, 8, 1);
-        _mm256_i32scatter_epi32::<4>(base_addr.as_mut_ptr(), vindex, src);
+        unsafe {
+            _mm256_i32scatter_epi32::<4>(base_addr.as_mut_ptr(), vindex, src);
+        }
         let expected = [1, 2, 3, 4, 5, 6, 7, 8];
         assert_eq!(expected, base_addr);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_i32scatter_epi32() {
+    fn test_mm256_mask_i32scatter_epi32() {
         let mut base_addr: [i32; 8] = [0; 8];
         let vindex = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 0);
         let src = _mm256_setr_epi32(2, 3, 4, 5, 6, 7, 8, 1);
-        _mm256_mask_i32scatter_epi32::<4>(base_addr.as_mut_ptr(), 0b01010101, vindex, src);
+        unsafe {
+            _mm256_mask_i32scatter_epi32::<4>(base_addr.as_mut_ptr(), 0b01010101, vindex, src);
+        }
         let expected = [0, 2, 0, 4, 0, 6, 0, 8];
         assert_eq!(expected, base_addr);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_i32scatter_epi64() {
+    fn test_mm256_i32scatter_epi64() {
         let mut base_addr: [i64; 4] = [0; 4];
         let vindex = _mm_setr_epi32(1, 2, 3, 0);
         let src = _mm256_setr_epi64x(2, 3, 4, 1);
-        _mm256_i32scatter_epi64::<8>(base_addr.as_mut_ptr(), vindex, src);
+        unsafe {
+            _mm256_i32scatter_epi64::<8>(base_addr.as_mut_ptr(), vindex, src);
+        }
         let expected = [1, 2, 3, 4];
         assert_eq!(expected, base_addr);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_i32scatter_epi64() {
+    fn test_mm256_mask_i32scatter_epi64() {
         let mut base_addr: [i64; 4] = [0; 4];
         let vindex = _mm_setr_epi32(1, 2, 3, 0);
         let src = _mm256_setr_epi64x(2, 3, 4, 1);
-        _mm256_mask_i32scatter_epi64::<8>(base_addr.as_mut_ptr(), 0b0101, vindex, src);
+        unsafe {
+            _mm256_mask_i32scatter_epi64::<8>(base_addr.as_mut_ptr(), 0b0101, vindex, src);
+        }
         let expected = [0, 2, 0, 4];
         assert_eq!(expected, base_addr);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_i32scatter_pd() {
+    fn test_mm256_i32scatter_pd() {
         let mut base_addr: [f64; 4] = [0.; 4];
         let vindex = _mm_setr_epi32(1, 2, 3, 0);
         let src = _mm256_setr_pd(2., 3., 4., 1.);
-        _mm256_i32scatter_pd::<8>(base_addr.as_mut_ptr(), vindex, src);
+        unsafe {
+            _mm256_i32scatter_pd::<8>(base_addr.as_mut_ptr(), vindex, src);
+        }
         let expected = [1., 2., 3., 4.];
         assert_eq!(expected, base_addr);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_i32scatter_pd() {
+    fn test_mm256_mask_i32scatter_pd() {
         let mut base_addr: [f64; 4] = [0.; 4];
         let vindex = _mm_setr_epi32(1, 2, 3, 0);
         let src = _mm256_setr_pd(2., 3., 4., 1.);
-        _mm256_mask_i32scatter_pd::<8>(base_addr.as_mut_ptr(), 0b0101, vindex, src);
+        unsafe {
+            _mm256_mask_i32scatter_pd::<8>(base_addr.as_mut_ptr(), 0b0101, vindex, src);
+        }
         let expected = [0., 2., 0., 4.];
         assert_eq!(expected, base_addr);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_i32scatter_ps() {
+    fn test_mm256_i32scatter_ps() {
         let mut base_addr: [f32; 8] = [0.; 8];
         let vindex = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 0);
         let src = _mm256_setr_ps(2., 3., 4., 5., 6., 7., 8., 1.);
-        _mm256_i32scatter_ps::<4>(base_addr.as_mut_ptr(), vindex, src);
+        unsafe {
+            _mm256_i32scatter_ps::<4>(base_addr.as_mut_ptr(), vindex, src);
+        }
         let expected = [1., 2., 3., 4., 5., 6., 7., 8.];
         assert_eq!(expected, base_addr);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_i32scatter_ps() {
+    fn test_mm256_mask_i32scatter_ps() {
         let mut base_addr: [f32; 8] = [0.; 8];
         let vindex = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 0);
         let src = _mm256_setr_ps(2., 3., 4., 5., 6., 7., 8., 1.);
-        _mm256_mask_i32scatter_ps::<4>(base_addr.as_mut_ptr(), 0b01010101, vindex, src);
+        unsafe {
+            _mm256_mask_i32scatter_ps::<4>(base_addr.as_mut_ptr(), 0b01010101, vindex, src);
+        }
         let expected = [0., 2., 0., 4., 0., 6., 0., 8.];
         assert_eq!(expected, base_addr);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_i64scatter_epi32() {
+    fn test_mm256_i64scatter_epi32() {
         let mut base_addr: [i32; 4] = [0; 4];
         let vindex = _mm256_setr_epi64x(1, 2, 3, 0);
         let src = _mm_setr_epi32(2, 3, 4, 1);
-        _mm256_i64scatter_epi32::<4>(base_addr.as_mut_ptr(), vindex, src);
+        unsafe {
+            _mm256_i64scatter_epi32::<4>(base_addr.as_mut_ptr(), vindex, src);
+        }
         let expected = [1, 2, 3, 4];
         assert_eq!(expected, base_addr);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_i64scatter_epi32() {
+    fn test_mm256_mask_i64scatter_epi32() {
         let mut base_addr: [i32; 4] = [0; 4];
         let vindex = _mm256_setr_epi64x(1, 2, 3, 0);
         let src = _mm_setr_epi32(2, 3, 4, 1);
-        _mm256_mask_i64scatter_epi32::<4>(base_addr.as_mut_ptr(), 0b0101, vindex, src);
+        unsafe {
+            _mm256_mask_i64scatter_epi32::<4>(base_addr.as_mut_ptr(), 0b0101, vindex, src);
+        }
         let expected = [0, 2, 0, 4];
         assert_eq!(expected, base_addr);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_i64scatter_epi64() {
+    fn test_mm256_i64scatter_epi64() {
         let mut base_addr: [i64; 4] = [0; 4];
         let vindex = _mm256_setr_epi64x(1, 2, 3, 0);
         let src = _mm256_setr_epi64x(2, 3, 4, 1);
-        _mm256_i64scatter_epi64::<8>(base_addr.as_mut_ptr(), vindex, src);
+        unsafe {
+            _mm256_i64scatter_epi64::<8>(base_addr.as_mut_ptr(), vindex, src);
+        }
         let expected = [1, 2, 3, 4];
         assert_eq!(expected, base_addr);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_i64scatter_epi64() {
+    fn test_mm256_mask_i64scatter_epi64() {
         let mut base_addr: [i64; 4] = [0; 4];
         let vindex = _mm256_setr_epi64x(1, 2, 3, 0);
         let src = _mm256_setr_epi64x(2, 3, 4, 1);
-        _mm256_mask_i64scatter_epi64::<8>(base_addr.as_mut_ptr(), 0b0101, vindex, src);
+        unsafe {
+            _mm256_mask_i64scatter_epi64::<8>(base_addr.as_mut_ptr(), 0b0101, vindex, src);
+        }
         let expected = [0, 2, 0, 4];
         assert_eq!(expected, base_addr);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_i64scatter_pd() {
+    fn test_mm256_i64scatter_pd() {
         let mut base_addr: [f64; 4] = [0.; 4];
         let vindex = _mm256_setr_epi64x(1, 2, 3, 0);
         let src = _mm256_setr_pd(2., 3., 4., 1.);
-        _mm256_i64scatter_pd::<8>(base_addr.as_mut_ptr(), vindex, src);
+        unsafe {
+            _mm256_i64scatter_pd::<8>(base_addr.as_mut_ptr(), vindex, src);
+        }
         let expected = [1., 2., 3., 4.];
         assert_eq!(expected, base_addr);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_i64scatter_pd() {
+    fn test_mm256_mask_i64scatter_pd() {
         let mut base_addr: [f64; 4] = [0.; 4];
         let vindex = _mm256_setr_epi64x(1, 2, 3, 0);
         let src = _mm256_setr_pd(2., 3., 4., 1.);
-        _mm256_mask_i64scatter_pd::<8>(base_addr.as_mut_ptr(), 0b0101, vindex, src);
+        unsafe {
+            _mm256_mask_i64scatter_pd::<8>(base_addr.as_mut_ptr(), 0b0101, vindex, src);
+        }
         let expected = [0., 2., 0., 4.];
         assert_eq!(expected, base_addr);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_i64scatter_ps() {
+    fn test_mm256_i64scatter_ps() {
         let mut base_addr: [f32; 4] = [0.; 4];
         let vindex = _mm256_setr_epi64x(1, 2, 3, 0);
         let src = _mm_setr_ps(2., 3., 4., 1.);
-        _mm256_i64scatter_ps::<4>(base_addr.as_mut_ptr(), vindex, src);
+        unsafe {
+            _mm256_i64scatter_ps::<4>(base_addr.as_mut_ptr(), vindex, src);
+        }
         let expected = [1., 2., 3., 4.];
         assert_eq!(expected, base_addr);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_i64scatter_ps() {
+    fn test_mm256_mask_i64scatter_ps() {
         let mut base_addr: [f32; 4] = [0.; 4];
         let vindex = _mm256_setr_epi64x(1, 2, 3, 0);
         let src = _mm_setr_ps(2., 3., 4., 1.);
-        _mm256_mask_i64scatter_ps::<4>(base_addr.as_mut_ptr(), 0b0101, vindex, src);
+        unsafe {
+            _mm256_mask_i64scatter_ps::<4>(base_addr.as_mut_ptr(), 0b0101, vindex, src);
+        }
         let expected = [0., 2., 0., 4.];
         assert_eq!(expected, base_addr);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_rol_epi64() {
+    const fn test_mm512_rol_epi64() {
         #[rustfmt::skip]
         let a = _mm512_set_epi64(
             1 << 63, 1 << 32, 1 << 32, 1 << 32,
@@ -8325,7 +8441,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_rol_epi64() {
+    const fn test_mm512_mask_rol_epi64() {
         #[rustfmt::skip]
         let a = _mm512_set_epi64(
             1 << 63, 1 << 32, 1 << 32, 1 << 32,
@@ -8343,7 +8459,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_rol_epi64() {
+    const fn test_mm512_maskz_rol_epi64() {
         #[rustfmt::skip]
         let a = _mm512_set_epi64(
             1 << 32, 1 << 32, 1 << 32, 1 << 32,
@@ -8357,7 +8473,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_rol_epi64() {
+    const fn test_mm256_rol_epi64() {
         let a = _mm256_set_epi64x(1 << 63, 1 << 32, 1 << 32, 1 << 32);
         let r = _mm256_rol_epi64::<1>(a);
         let e = _mm256_set_epi64x(1 << 0, 1 << 33, 1 << 33, 1 << 33);
@@ -8365,7 +8481,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_rol_epi64() {
+    const fn test_mm256_mask_rol_epi64() {
         let a = _mm256_set_epi64x(1 << 63, 1 << 32, 1 << 32, 1 << 32);
         let r = _mm256_mask_rol_epi64::<1>(a, 0, a);
         assert_eq_m256i(r, a);
@@ -8375,7 +8491,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_rol_epi64() {
+    const fn test_mm256_maskz_rol_epi64() {
         let a = _mm256_set_epi64x(1 << 63, 1 << 32, 1 << 32, 1 << 32);
         let r = _mm256_maskz_rol_epi64::<1>(0, a);
         assert_eq_m256i(r, _mm256_setzero_si256());
@@ -8385,7 +8501,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_rol_epi64() {
+    const fn test_mm_rol_epi64() {
         let a = _mm_set_epi64x(1 << 63, 1 << 32);
         let r = _mm_rol_epi64::<1>(a);
         let e = _mm_set_epi64x(1 << 0, 1 << 33);
@@ -8393,7 +8509,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_rol_epi64() {
+    const fn test_mm_mask_rol_epi64() {
         let a = _mm_set_epi64x(1 << 63, 1 << 32);
         let r = _mm_mask_rol_epi64::<1>(a, 0, a);
         assert_eq_m128i(r, a);
@@ -8403,7 +8519,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_rol_epi64() {
+    const fn test_mm_maskz_rol_epi64() {
         let a = _mm_set_epi64x(1 << 63, 1 << 32);
         let r = _mm_maskz_rol_epi64::<1>(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -8413,7 +8529,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_ror_epi64() {
+    const fn test_mm512_ror_epi64() {
         #[rustfmt::skip]
         let a = _mm512_set_epi64(
             1 << 0,  1 << 32, 1 << 32, 1 << 32,
@@ -8429,7 +8545,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_ror_epi64() {
+    const fn test_mm512_mask_ror_epi64() {
         #[rustfmt::skip]
         let a = _mm512_set_epi64(
             1 << 0,  1 << 32, 1 << 32, 1 << 32,
@@ -8447,7 +8563,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_ror_epi64() {
+    const fn test_mm512_maskz_ror_epi64() {
         #[rustfmt::skip]
         let a = _mm512_set_epi64(
             1 << 32, 1 << 32, 1 << 32, 1 << 32,
@@ -8461,7 +8577,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_ror_epi64() {
+    const fn test_mm256_ror_epi64() {
         let a = _mm256_set_epi64x(1 << 0, 1 << 32, 1 << 32, 1 << 32);
         let r = _mm256_ror_epi64::<1>(a);
         let e = _mm256_set_epi64x(1 << 63, 1 << 31, 1 << 31, 1 << 31);
@@ -8469,7 +8585,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_ror_epi64() {
+    const fn test_mm256_mask_ror_epi64() {
         let a = _mm256_set_epi64x(1 << 0, 1 << 32, 1 << 32, 1 << 32);
         let r = _mm256_mask_ror_epi64::<1>(a, 0, a);
         assert_eq_m256i(r, a);
@@ -8479,7 +8595,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_ror_epi64() {
+    const fn test_mm256_maskz_ror_epi64() {
         let a = _mm256_set_epi64x(1 << 0, 1 << 32, 1 << 32, 1 << 32);
         let r = _mm256_maskz_ror_epi64::<1>(0, a);
         assert_eq_m256i(r, _mm256_setzero_si256());
@@ -8489,7 +8605,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_ror_epi64() {
+    const fn test_mm_ror_epi64() {
         let a = _mm_set_epi64x(1 << 0, 1 << 32);
         let r = _mm_ror_epi64::<1>(a);
         let e = _mm_set_epi64x(1 << 63, 1 << 31);
@@ -8497,7 +8613,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_ror_epi64() {
+    const fn test_mm_mask_ror_epi64() {
         let a = _mm_set_epi64x(1 << 0, 1 << 32);
         let r = _mm_mask_ror_epi64::<1>(a, 0, a);
         assert_eq_m128i(r, a);
@@ -8507,7 +8623,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_ror_epi64() {
+    const fn test_mm_maskz_ror_epi64() {
         let a = _mm_set_epi64x(1 << 0, 1 << 32);
         let r = _mm_maskz_ror_epi64::<1>(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -8517,7 +8633,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_slli_epi64() {
+    const fn test_mm512_slli_epi64() {
         #[rustfmt::skip]
         let a = _mm512_set_epi64(
             1 << 63, 1 << 32, 1 << 32, 1 << 32,
@@ -8533,7 +8649,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_slli_epi64() {
+    const fn test_mm512_mask_slli_epi64() {
         #[rustfmt::skip]
         let a = _mm512_set_epi64(
             1 << 63, 1 << 32, 1 << 32, 1 << 32,
@@ -8551,7 +8667,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_slli_epi64() {
+    const fn test_mm512_maskz_slli_epi64() {
         #[rustfmt::skip]
         let a = _mm512_set_epi64(
             1 << 32, 1 << 32, 1 << 32, 1 << 32,
@@ -8565,7 +8681,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_slli_epi64() {
+    const fn test_mm256_mask_slli_epi64() {
         let a = _mm256_set_epi64x(1 << 63, 1 << 32, 1 << 32, 1 << 32);
         let r = _mm256_mask_slli_epi64::<1>(a, 0, a);
         assert_eq_m256i(r, a);
@@ -8575,7 +8691,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_slli_epi64() {
+    const fn test_mm256_maskz_slli_epi64() {
         let a = _mm256_set_epi64x(1 << 63, 1 << 32, 1 << 32, 1 << 32);
         let r = _mm256_maskz_slli_epi64::<1>(0, a);
         assert_eq_m256i(r, _mm256_setzero_si256());
@@ -8585,7 +8701,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_slli_epi64() {
+    const fn test_mm_mask_slli_epi64() {
         let a = _mm_set_epi64x(1 << 63, 1 << 32);
         let r = _mm_mask_slli_epi64::<1>(a, 0, a);
         assert_eq_m128i(r, a);
@@ -8595,7 +8711,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_slli_epi64() {
+    const fn test_mm_maskz_slli_epi64() {
         let a = _mm_set_epi64x(1 << 63, 1 << 32);
         let r = _mm_maskz_slli_epi64::<1>(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -8605,7 +8721,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_srli_epi64() {
+    const fn test_mm512_srli_epi64() {
         #[rustfmt::skip]
         let a = _mm512_set_epi64(
             1 << 0, 1 << 32, 1 << 32, 1 << 32,
@@ -8621,7 +8737,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_srli_epi64() {
+    const fn test_mm512_mask_srli_epi64() {
         #[rustfmt::skip]
         let a = _mm512_set_epi64(
             1 << 0, 1 << 32, 1 << 32, 1 << 32,
@@ -8639,7 +8755,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_srli_epi64() {
+    const fn test_mm512_maskz_srli_epi64() {
         #[rustfmt::skip]
         let a = _mm512_set_epi64(
             1 << 32, 1 << 32, 1 << 32, 1 << 32,
@@ -8653,7 +8769,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_srli_epi64() {
+    const fn test_mm256_mask_srli_epi64() {
         let a = _mm256_set_epi64x(1 << 5, 0, 0, 0);
         let r = _mm256_mask_srli_epi64::<1>(a, 0, a);
         assert_eq_m256i(r, a);
@@ -8663,7 +8779,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_srli_epi64() {
+    const fn test_mm256_maskz_srli_epi64() {
         let a = _mm256_set_epi64x(1 << 5, 0, 0, 0);
         let r = _mm256_maskz_srli_epi64::<1>(0, a);
         assert_eq_m256i(r, _mm256_setzero_si256());
@@ -8673,7 +8789,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_srli_epi64() {
+    const fn test_mm_mask_srli_epi64() {
         let a = _mm_set_epi64x(1 << 5, 0);
         let r = _mm_mask_srli_epi64::<1>(a, 0, a);
         assert_eq_m128i(r, a);
@@ -8683,7 +8799,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_srli_epi64() {
+    const fn test_mm_maskz_srli_epi64() {
         let a = _mm_set_epi64x(1 << 5, 0);
         let r = _mm_maskz_srli_epi64::<1>(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -8693,7 +8809,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_rolv_epi64() {
+    const fn test_mm512_rolv_epi64() {
         #[rustfmt::skip]
         let a = _mm512_set_epi64(
             1 << 32, 1 << 63, 1 << 32, 1 << 32,
@@ -8710,7 +8826,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_rolv_epi64() {
+    const fn test_mm512_mask_rolv_epi64() {
         #[rustfmt::skip]
         let a = _mm512_set_epi64(
             1 << 32, 1 << 63, 1 << 32, 1 << 32,
@@ -8729,7 +8845,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_rolv_epi64() {
+    const fn test_mm512_maskz_rolv_epi64() {
         #[rustfmt::skip]
         let a = _mm512_set_epi64(
             1 << 32, 1 << 32, 1 << 32, 1 << 32,
@@ -8744,7 +8860,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_rolv_epi64() {
+    const fn test_mm256_rolv_epi64() {
         let a = _mm256_set_epi64x(1 << 32, 1 << 63, 1 << 32, 1 << 32);
         let b = _mm256_set_epi64x(0, 1, 2, 3);
         let r = _mm256_rolv_epi64(a, b);
@@ -8753,7 +8869,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_rolv_epi64() {
+    const fn test_mm256_mask_rolv_epi64() {
         let a = _mm256_set_epi64x(1 << 32, 1 << 63, 1 << 32, 1 << 32);
         let b = _mm256_set_epi64x(0, 1, 2, 3);
         let r = _mm256_mask_rolv_epi64(a, 0, a, b);
@@ -8764,7 +8880,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_rolv_epi64() {
+    const fn test_mm256_maskz_rolv_epi64() {
         let a = _mm256_set_epi64x(1 << 32, 1 << 63, 1 << 32, 1 << 32);
         let b = _mm256_set_epi64x(0, 1, 2, 3);
         let r = _mm256_maskz_rolv_epi64(0, a, b);
@@ -8775,7 +8891,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_rolv_epi64() {
+    const fn test_mm_rolv_epi64() {
         let a = _mm_set_epi64x(1 << 32, 1 << 63);
         let b = _mm_set_epi64x(0, 1);
         let r = _mm_rolv_epi64(a, b);
@@ -8784,7 +8900,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_rolv_epi64() {
+    const fn test_mm_mask_rolv_epi64() {
         let a = _mm_set_epi64x(1 << 32, 1 << 63);
         let b = _mm_set_epi64x(0, 1);
         let r = _mm_mask_rolv_epi64(a, 0, a, b);
@@ -8795,7 +8911,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_rolv_epi64() {
+    const fn test_mm_maskz_rolv_epi64() {
         let a = _mm_set_epi64x(1 << 32, 1 << 63);
         let b = _mm_set_epi64x(0, 1);
         let r = _mm_maskz_rolv_epi64(0, a, b);
@@ -8806,7 +8922,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_rorv_epi64() {
+    const fn test_mm512_rorv_epi64() {
         #[rustfmt::skip]
         let a = _mm512_set_epi64(
             1 << 32, 1 << 0, 1 << 32, 1 << 32,
@@ -8823,7 +8939,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_rorv_epi64() {
+    const fn test_mm512_mask_rorv_epi64() {
         #[rustfmt::skip]
         let a = _mm512_set_epi64(
             1 << 32, 1 << 0, 1 << 32, 1 << 32,
@@ -8842,7 +8958,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_rorv_epi64() {
+    const fn test_mm512_maskz_rorv_epi64() {
         #[rustfmt::skip]
         let a = _mm512_set_epi64(
             1 << 32, 1 << 32, 1 << 32, 1 << 32,
@@ -8857,7 +8973,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_rorv_epi64() {
+    const fn test_mm256_rorv_epi64() {
         let a = _mm256_set_epi64x(1 << 32, 1 << 0, 1 << 32, 1 << 32);
         let b = _mm256_set_epi64x(0, 1, 2, 3);
         let r = _mm256_rorv_epi64(a, b);
@@ -8866,7 +8982,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_rorv_epi64() {
+    const fn test_mm256_mask_rorv_epi64() {
         let a = _mm256_set_epi64x(1 << 32, 1 << 0, 1 << 32, 1 << 32);
         let b = _mm256_set_epi64x(0, 1, 2, 3);
         let r = _mm256_mask_rorv_epi64(a, 0, a, b);
@@ -8877,7 +8993,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_rorv_epi64() {
+    const fn test_mm256_maskz_rorv_epi64() {
         let a = _mm256_set_epi64x(1 << 32, 1 << 0, 1 << 32, 1 << 32);
         let b = _mm256_set_epi64x(0, 1, 2, 3);
         let r = _mm256_maskz_rorv_epi64(0, a, b);
@@ -8888,7 +9004,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_rorv_epi64() {
+    const fn test_mm_rorv_epi64() {
         let a = _mm_set_epi64x(1 << 32, 1 << 0);
         let b = _mm_set_epi64x(0, 1);
         let r = _mm_rorv_epi64(a, b);
@@ -8897,7 +9013,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_rorv_epi64() {
+    const fn test_mm_mask_rorv_epi64() {
         let a = _mm_set_epi64x(1 << 32, 1 << 0);
         let b = _mm_set_epi64x(0, 1);
         let r = _mm_mask_rorv_epi64(a, 0, a, b);
@@ -8908,7 +9024,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_rorv_epi64() {
+    const fn test_mm_maskz_rorv_epi64() {
         let a = _mm_set_epi64x(1 << 32, 1 << 0);
         let b = _mm_set_epi64x(0, 1);
         let r = _mm_maskz_rorv_epi64(0, a, b);
@@ -8919,7 +9035,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_sllv_epi64() {
+    const fn test_mm512_sllv_epi64() {
         #[rustfmt::skip]
         let a = _mm512_set_epi64(
             1 << 32, 1 << 63, 1 << 32, 1 << 32,
@@ -8936,7 +9052,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_sllv_epi64() {
+    const fn test_mm512_mask_sllv_epi64() {
         #[rustfmt::skip]
         let a = _mm512_set_epi64(
             1 << 32, 1 << 32, 1 << 63, 1 << 32,
@@ -8955,7 +9071,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_sllv_epi64() {
+    const fn test_mm512_maskz_sllv_epi64() {
         #[rustfmt::skip]
         let a = _mm512_set_epi64(
             1 << 32, 1 << 32, 1 << 32, 1 << 32,
@@ -8970,7 +9086,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_sllv_epi64() {
+    const fn test_mm256_mask_sllv_epi64() {
         let a = _mm256_set_epi64x(1 << 32, 1 << 32, 1 << 63, 1 << 32);
         let count = _mm256_set_epi64x(0, 1, 2, 3);
         let r = _mm256_mask_sllv_epi64(a, 0, a, count);
@@ -8981,7 +9097,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_sllv_epi64() {
+    const fn test_mm256_maskz_sllv_epi64() {
         let a = _mm256_set_epi64x(1 << 32, 1 << 32, 1 << 63, 1 << 32);
         let count = _mm256_set_epi64x(0, 1, 2, 3);
         let r = _mm256_maskz_sllv_epi64(0, a, count);
@@ -8992,7 +9108,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_sllv_epi64() {
+    const fn test_mm_mask_sllv_epi64() {
         let a = _mm_set_epi64x(1 << 63, 1 << 32);
         let count = _mm_set_epi64x(2, 3);
         let r = _mm_mask_sllv_epi64(a, 0, a, count);
@@ -9003,7 +9119,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_sllv_epi64() {
+    const fn test_mm_maskz_sllv_epi64() {
         let a = _mm_set_epi64x(1 << 63, 1 << 32);
         let count = _mm_set_epi64x(2, 3);
         let r = _mm_maskz_sllv_epi64(0, a, count);
@@ -9014,7 +9130,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_srlv_epi64() {
+    const fn test_mm512_srlv_epi64() {
         #[rustfmt::skip]
         let a = _mm512_set_epi64(
             1 << 32, 1 << 0, 1 << 32, 1 << 32,
@@ -9031,7 +9147,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_srlv_epi64() {
+    const fn test_mm512_mask_srlv_epi64() {
         #[rustfmt::skip]
         let a = _mm512_set_epi64(
             1 << 32, 1 << 0, 1 << 32, 1 << 32,
@@ -9050,7 +9166,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_srlv_epi64() {
+    const fn test_mm512_maskz_srlv_epi64() {
         #[rustfmt::skip]
         let a = _mm512_set_epi64(
             1 << 32, 1 << 32, 1 << 32, 1 << 32,
@@ -9065,7 +9181,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_srlv_epi64() {
+    const fn test_mm256_mask_srlv_epi64() {
         let a = _mm256_set_epi64x(1 << 5, 0, 0, 0);
         let count = _mm256_set1_epi64x(1);
         let r = _mm256_mask_srlv_epi64(a, 0, a, count);
@@ -9076,7 +9192,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_srlv_epi64() {
+    const fn test_mm256_maskz_srlv_epi64() {
         let a = _mm256_set_epi64x(1 << 5, 0, 0, 0);
         let count = _mm256_set1_epi64x(1);
         let r = _mm256_maskz_srlv_epi64(0, a, count);
@@ -9087,7 +9203,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_srlv_epi64() {
+    const fn test_mm_mask_srlv_epi64() {
         let a = _mm_set_epi64x(1 << 5, 0);
         let count = _mm_set1_epi64x(1);
         let r = _mm_mask_srlv_epi64(a, 0, a, count);
@@ -9098,7 +9214,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_srlv_epi64() {
+    const fn test_mm_maskz_srlv_epi64() {
         let a = _mm_set_epi64x(1 << 5, 0);
         let count = _mm_set1_epi64x(1);
         let r = _mm_maskz_srlv_epi64(0, a, count);
@@ -9109,7 +9225,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_sll_epi64() {
+    fn test_mm512_sll_epi64() {
         #[rustfmt::skip]
         let a = _mm512_set_epi64(
             1 << 63, 1 << 32, 1 << 32, 1 << 32,
@@ -9129,7 +9245,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_sll_epi64() {
+    fn test_mm512_mask_sll_epi64() {
         #[rustfmt::skip]
         let a = _mm512_set_epi64(
             1 << 63, 1 << 32, 1 << 32, 1 << 32,
@@ -9148,7 +9264,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_sll_epi64() {
+    fn test_mm512_maskz_sll_epi64() {
         #[rustfmt::skip]
         let a = _mm512_set_epi64(
             1 << 32, 1 << 32, 1 << 32, 1 << 32,
@@ -9163,7 +9279,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_sll_epi64() {
+    fn test_mm256_mask_sll_epi64() {
         let a = _mm256_set_epi64x(1 << 63, 1 << 32, 1 << 32, 1 << 32);
         let count = _mm_set_epi64x(0, 1);
         let r = _mm256_mask_sll_epi64(a, 0, a, count);
@@ -9174,7 +9290,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_sll_epi64() {
+    fn test_mm256_maskz_sll_epi64() {
         let a = _mm256_set_epi64x(1 << 63, 1 << 32, 1 << 32, 1 << 32);
         let count = _mm_set_epi64x(0, 1);
         let r = _mm256_maskz_sll_epi64(0, a, count);
@@ -9185,7 +9301,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_sll_epi64() {
+    fn test_mm_mask_sll_epi64() {
         let a = _mm_set_epi64x(1 << 63, 1 << 32);
         let count = _mm_set_epi64x(0, 1);
         let r = _mm_mask_sll_epi64(a, 0, a, count);
@@ -9196,7 +9312,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_sll_epi64() {
+    fn test_mm_maskz_sll_epi64() {
         let a = _mm_set_epi64x(1 << 63, 1 << 32);
         let count = _mm_set_epi64x(0, 1);
         let r = _mm_maskz_sll_epi64(0, a, count);
@@ -9207,7 +9323,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_srl_epi64() {
+    fn test_mm512_srl_epi64() {
         #[rustfmt::skip]
         let a = _mm512_set_epi64(
             1 << 0, 1 << 32, 1 << 32, 1 << 32,
@@ -9224,7 +9340,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_srl_epi64() {
+    fn test_mm512_mask_srl_epi64() {
         #[rustfmt::skip]
         let a = _mm512_set_epi64(
             1 << 0, 1 << 32, 1 << 32, 1 << 32,
@@ -9243,7 +9359,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_srl_epi64() {
+    fn test_mm512_maskz_srl_epi64() {
         #[rustfmt::skip]
         let a = _mm512_set_epi64(
             1 << 32, 1 << 32, 1 << 32, 1 << 32,
@@ -9258,7 +9374,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_srl_epi64() {
+    fn test_mm256_mask_srl_epi64() {
         let a = _mm256_set_epi64x(1 << 5, 0, 0, 0);
         let count = _mm_set_epi64x(0, 1);
         let r = _mm256_mask_srl_epi64(a, 0, a, count);
@@ -9269,7 +9385,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_srl_epi64() {
+    fn test_mm256_maskz_srl_epi64() {
         let a = _mm256_set_epi64x(1 << 5, 0, 0, 0);
         let count = _mm_set_epi64x(0, 1);
         let r = _mm256_maskz_srl_epi64(0, a, count);
@@ -9280,7 +9396,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_srl_epi64() {
+    fn test_mm_mask_srl_epi64() {
         let a = _mm_set_epi64x(1 << 5, 0);
         let count = _mm_set_epi64x(0, 1);
         let r = _mm_mask_srl_epi64(a, 0, a, count);
@@ -9291,7 +9407,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_srl_epi64() {
+    fn test_mm_maskz_srl_epi64() {
         let a = _mm_set_epi64x(1 << 5, 0);
         let count = _mm_set_epi64x(0, 1);
         let r = _mm_maskz_srl_epi64(0, a, count);
@@ -9302,7 +9418,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_sra_epi64() {
+    fn test_mm512_sra_epi64() {
         let a = _mm512_set_epi64(1, -8, 0, 0, 0, 0, 15, -16);
         let count = _mm_set_epi64x(0, 2);
         let r = _mm512_sra_epi64(a, count);
@@ -9311,7 +9427,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_sra_epi64() {
+    fn test_mm512_mask_sra_epi64() {
         let a = _mm512_set_epi64(1, -8, 0, 0, 0, 0, 15, -16);
         let count = _mm_set_epi64x(0, 2);
         let r = _mm512_mask_sra_epi64(a, 0, a, count);
@@ -9322,7 +9438,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_sra_epi64() {
+    fn test_mm512_maskz_sra_epi64() {
         let a = _mm512_set_epi64(1, -8, 0, 0, 0, 0, 15, -16);
         let count = _mm_set_epi64x(0, 2);
         let r = _mm512_maskz_sra_epi64(0, a, count);
@@ -9333,7 +9449,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_sra_epi64() {
+    fn test_mm256_sra_epi64() {
         let a = _mm256_set_epi64x(1 << 5, 0, 0, 0);
         let count = _mm_set_epi64x(0, 1);
         let r = _mm256_sra_epi64(a, count);
@@ -9342,7 +9458,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_sra_epi64() {
+    fn test_mm256_mask_sra_epi64() {
         let a = _mm256_set_epi64x(1 << 5, 0, 0, 0);
         let count = _mm_set_epi64x(0, 1);
         let r = _mm256_mask_sra_epi64(a, 0, a, count);
@@ -9353,7 +9469,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_sra_epi64() {
+    fn test_mm256_maskz_sra_epi64() {
         let a = _mm256_set_epi64x(1 << 5, 0, 0, 0);
         let count = _mm_set_epi64x(0, 1);
         let r = _mm256_maskz_sra_epi64(0, a, count);
@@ -9364,7 +9480,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_sra_epi64() {
+    fn test_mm_sra_epi64() {
         let a = _mm_set_epi64x(1 << 5, 0);
         let count = _mm_set_epi64x(0, 1);
         let r = _mm_sra_epi64(a, count);
@@ -9373,7 +9489,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_sra_epi64() {
+    fn test_mm_mask_sra_epi64() {
         let a = _mm_set_epi64x(1 << 5, 0);
         let count = _mm_set_epi64x(0, 1);
         let r = _mm_mask_sra_epi64(a, 0, a, count);
@@ -9384,7 +9500,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_sra_epi64() {
+    fn test_mm_maskz_sra_epi64() {
         let a = _mm_set_epi64x(1 << 5, 0);
         let count = _mm_set_epi64x(0, 1);
         let r = _mm_maskz_sra_epi64(0, a, count);
@@ -9395,7 +9511,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_srav_epi64() {
+    const fn test_mm512_srav_epi64() {
         let a = _mm512_set_epi64(1, -8, 0, 0, 0, 0, 15, -16);
         let count = _mm512_set_epi64(2, 2, 0, 0, 0, 0, 2, 1);
         let r = _mm512_srav_epi64(a, count);
@@ -9404,7 +9520,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_srav_epi64() {
+    const fn test_mm512_mask_srav_epi64() {
         let a = _mm512_set_epi64(1, -8, 0, 0, 0, 0, 15, -16);
         let count = _mm512_set_epi64(2, 2, 0, 0, 0, 0, 2, 1);
         let r = _mm512_mask_srav_epi64(a, 0, a, count);
@@ -9415,7 +9531,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_srav_epi64() {
+    const fn test_mm512_maskz_srav_epi64() {
         let a = _mm512_set_epi64(1, -8, 0, 0, 0, 0, 15, -16);
         let count = _mm512_set_epi64(2, 2, 0, 0, 0, 0, 2, 1);
         let r = _mm512_maskz_srav_epi64(0, a, count);
@@ -9426,7 +9542,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_srav_epi64() {
+    const fn test_mm256_srav_epi64() {
         let a = _mm256_set_epi64x(1 << 5, 0, 0, 0);
         let count = _mm256_set1_epi64x(1);
         let r = _mm256_srav_epi64(a, count);
@@ -9435,7 +9551,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_srav_epi64() {
+    const fn test_mm256_mask_srav_epi64() {
         let a = _mm256_set_epi64x(1 << 5, 0, 0, 0);
         let count = _mm256_set1_epi64x(1);
         let r = _mm256_mask_srav_epi64(a, 0, a, count);
@@ -9446,7 +9562,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_srav_epi64() {
+    const fn test_mm256_maskz_srav_epi64() {
         let a = _mm256_set_epi64x(1 << 5, 0, 0, 0);
         let count = _mm256_set1_epi64x(1);
         let r = _mm256_maskz_srav_epi64(0, a, count);
@@ -9457,7 +9573,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_srav_epi64() {
+    const fn test_mm_srav_epi64() {
         let a = _mm_set_epi64x(1 << 5, 0);
         let count = _mm_set1_epi64x(1);
         let r = _mm_srav_epi64(a, count);
@@ -9466,7 +9582,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_srav_epi64() {
+    const fn test_mm_mask_srav_epi64() {
         let a = _mm_set_epi64x(1 << 5, 0);
         let count = _mm_set1_epi64x(1);
         let r = _mm_mask_srav_epi64(a, 0, a, count);
@@ -9477,7 +9593,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_srav_epi64() {
+    const fn test_mm_maskz_srav_epi64() {
         let a = _mm_set_epi64x(1 << 5, 0);
         let count = _mm_set1_epi64x(1);
         let r = _mm_maskz_srav_epi64(0, a, count);
@@ -9488,7 +9604,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_srai_epi64() {
+    const fn test_mm512_srai_epi64() {
         let a = _mm512_set_epi64(1, -4, 15, 0, 0, 0, 0, -16);
         let r = _mm512_srai_epi64::<2>(a);
         let e = _mm512_set_epi64(0, -1, 3, 0, 0, 0, 0, -4);
@@ -9496,7 +9612,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_srai_epi64() {
+    const fn test_mm512_mask_srai_epi64() {
         let a = _mm512_set_epi64(1, -4, 15, 0, 0, 0, 0, -16);
         let r = _mm512_mask_srai_epi64::<2>(a, 0, a);
         assert_eq_m512i(r, a);
@@ -9506,7 +9622,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_srai_epi64() {
+    const fn test_mm512_maskz_srai_epi64() {
         let a = _mm512_set_epi64(1, -4, 15, 0, 0, 0, 0, -16);
         let r = _mm512_maskz_srai_epi64::<2>(0, a);
         assert_eq_m512i(r, _mm512_setzero_si512());
@@ -9516,7 +9632,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_srai_epi64() {
+    const fn test_mm256_srai_epi64() {
         let a = _mm256_set_epi64x(1 << 5, 0, 0, 0);
         let r = _mm256_srai_epi64::<1>(a);
         let e = _mm256_set_epi64x(1 << 4, 0, 0, 0);
@@ -9524,7 +9640,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_srai_epi64() {
+    const fn test_mm256_mask_srai_epi64() {
         let a = _mm256_set_epi64x(1 << 5, 0, 0, 0);
         let r = _mm256_mask_srai_epi64::<1>(a, 0, a);
         assert_eq_m256i(r, a);
@@ -9534,7 +9650,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_srai_epi64() {
+    const fn test_mm256_maskz_srai_epi64() {
         let a = _mm256_set_epi64x(1 << 5, 0, 0, 0);
         let r = _mm256_maskz_srai_epi64::<1>(0, a);
         assert_eq_m256i(r, _mm256_setzero_si256());
@@ -9544,7 +9660,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_srai_epi64() {
+    const fn test_mm_srai_epi64() {
         let a = _mm_set_epi64x(1 << 5, 0);
         let r = _mm_srai_epi64::<1>(a);
         let e = _mm_set_epi64x(1 << 4, 0);
@@ -9552,7 +9668,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_srai_epi64() {
+    const fn test_mm_mask_srai_epi64() {
         let a = _mm_set_epi64x(1 << 5, 0);
         let r = _mm_mask_srai_epi64::<1>(a, 0, a);
         assert_eq_m128i(r, a);
@@ -9562,7 +9678,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_srai_epi64() {
+    const fn test_mm_maskz_srai_epi64() {
         let a = _mm_set_epi64x(1 << 5, 0);
         let r = _mm_maskz_srai_epi64::<1>(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -9572,7 +9688,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_permute_pd() {
+    const fn test_mm512_permute_pd() {
         let a = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
         let r = _mm512_permute_pd::<0b11_11_11_11>(a);
         let e = _mm512_setr_pd(1., 1., 3., 3., 5., 5., 7., 7.);
@@ -9580,7 +9696,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_permute_pd() {
+    const fn test_mm512_mask_permute_pd() {
         let a = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
         let r = _mm512_mask_permute_pd::<0b11_11_11_11>(a, 0, a);
         assert_eq_m512d(r, a);
@@ -9590,7 +9706,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_permute_pd() {
+    const fn test_mm512_maskz_permute_pd() {
         let a = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
         let r = _mm512_maskz_permute_pd::<0b11_11_11_11>(0, a);
         assert_eq_m512d(r, _mm512_setzero_pd());
@@ -9600,7 +9716,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_permute_pd() {
+    const fn test_mm256_mask_permute_pd() {
         let a = _mm256_set_pd(3., 2., 1., 0.);
         let r = _mm256_mask_permute_pd::<0b11_11>(a, 0, a);
         assert_eq_m256d(r, a);
@@ -9610,7 +9726,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_permute_pd() {
+    const fn test_mm256_maskz_permute_pd() {
         let a = _mm256_set_pd(3., 2., 1., 0.);
         let r = _mm256_maskz_permute_pd::<0b11_11>(0, a);
         assert_eq_m256d(r, _mm256_setzero_pd());
@@ -9620,7 +9736,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_permute_pd() {
+    const fn test_mm_mask_permute_pd() {
         let a = _mm_set_pd(1., 0.);
         let r = _mm_mask_permute_pd::<0b11>(a, 0, a);
         assert_eq_m128d(r, a);
@@ -9630,7 +9746,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_permute_pd() {
+    const fn test_mm_maskz_permute_pd() {
         let a = _mm_set_pd(1., 0.);
         let r = _mm_maskz_permute_pd::<0b11>(0, a);
         assert_eq_m128d(r, _mm_setzero_pd());
@@ -9640,7 +9756,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_permutex_epi64() {
+    const fn test_mm512_permutex_epi64() {
         let a = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7);
         let r = _mm512_permutex_epi64::<0b11_11_11_11>(a);
         let e = _mm512_setr_epi64(3, 3, 3, 3, 7, 7, 7, 7);
@@ -9648,7 +9764,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_permutex_epi64() {
+    const fn test_mm512_mask_permutex_epi64() {
         let a = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7);
         let r = _mm512_mask_permutex_epi64::<0b11_11_11_11>(a, 0, a);
         assert_eq_m512i(r, a);
@@ -9658,7 +9774,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_permutex_epi64() {
+    const fn test_mm512_maskz_permutex_epi64() {
         let a = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7);
         let r = _mm512_maskz_permutex_epi64::<0b11_11_11_11>(0, a);
         assert_eq_m512i(r, _mm512_setzero_si512());
@@ -9668,7 +9784,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_permutex_epi64() {
+    const fn test_mm256_permutex_epi64() {
         let a = _mm256_set_epi64x(3, 2, 1, 0);
         let r = _mm256_permutex_epi64::<0b11_11_11_11>(a);
         let e = _mm256_set_epi64x(3, 3, 3, 3);
@@ -9676,7 +9792,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_permutex_epi64() {
+    const fn test_mm256_mask_permutex_epi64() {
         let a = _mm256_set_epi64x(3, 2, 1, 0);
         let r = _mm256_mask_permutex_epi64::<0b11_11_11_11>(a, 0, a);
         assert_eq_m256i(r, a);
@@ -9685,8 +9801,8 @@ mod tests {
         assert_eq_m256i(r, e);
     }
 
-    #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm256_maskz_permutex_epi64() {
+    #[simd_test(enable = "avx512f,avx512vl")]
+    const fn test_mm256_maskz_permutex_epi64() {
         let a = _mm256_set_epi64x(3, 2, 1, 0);
         let r = _mm256_maskz_permutex_epi64::<0b11_11_11_11>(0, a);
         assert_eq_m256i(r, _mm256_setzero_si256());
@@ -9696,7 +9812,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_permutex_pd() {
+    const fn test_mm512_permutex_pd() {
         let a = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
         let r = _mm512_permutex_pd::<0b11_11_11_11>(a);
         let e = _mm512_setr_pd(3., 3., 3., 3., 7., 7., 7., 7.);
@@ -9704,7 +9820,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_permutex_pd() {
+    const fn test_mm512_mask_permutex_pd() {
         let a = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
         let r = _mm512_mask_permutex_pd::<0b11_11_11_11>(a, 0, a);
         assert_eq_m512d(r, a);
@@ -9714,7 +9830,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_permutex_pd() {
+    const fn test_mm512_maskz_permutex_pd() {
         let a = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
         let r = _mm512_maskz_permutex_pd::<0b11_11_11_11>(0, a);
         assert_eq_m512d(r, _mm512_setzero_pd());
@@ -9724,7 +9840,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_permutex_pd() {
+    const fn test_mm256_permutex_pd() {
         let a = _mm256_set_pd(0., 1., 2., 3.);
         let r = _mm256_permutex_pd::<0b11_11_11_11>(a);
         let e = _mm256_set_pd(0., 0., 0., 0.);
@@ -9732,7 +9848,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_permutex_pd() {
+    const fn test_mm256_mask_permutex_pd() {
         let a = _mm256_set_pd(0., 1., 2., 3.);
         let r = _mm256_mask_permutex_pd::<0b11_11_11_11>(a, 0, a);
         assert_eq_m256d(r, a);
@@ -9742,7 +9858,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_permutex_pd() {
+    const fn test_mm256_maskz_permutex_pd() {
         let a = _mm256_set_pd(0., 1., 2., 3.);
         let r = _mm256_maskz_permutex_pd::<0b11_11_11_11>(0, a);
         assert_eq_m256d(r, _mm256_setzero_pd());
@@ -9752,7 +9868,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_permutevar_pd() {
+    fn test_mm512_permutevar_pd() {
         let a = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.);
         let b = _mm512_set1_epi64(0b1);
         let r = _mm512_permutevar_pd(a, b);
@@ -9761,7 +9877,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_permutevar_pd() {
+    fn test_mm512_mask_permutevar_pd() {
         let a = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.);
         let b = _mm512_set1_epi64(0b1);
         let r = _mm512_mask_permutevar_pd(a, 0, a, b);
@@ -9772,7 +9888,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_permutevar_pd() {
+    fn test_mm512_maskz_permutevar_pd() {
         let a = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.);
         let b = _mm512_set1_epi64(0b1);
         let r = _mm512_maskz_permutevar_pd(0, a, b);
@@ -9783,7 +9899,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_permutevar_pd() {
+    fn test_mm256_mask_permutevar_pd() {
         let a = _mm256_set_pd(0., 1., 2., 3.);
         let b = _mm256_set1_epi64x(0b1);
         let r = _mm256_mask_permutevar_pd(a, 0, a, b);
@@ -9794,7 +9910,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_permutevar_pd() {
+    fn test_mm256_maskz_permutevar_pd() {
         let a = _mm256_set_pd(0., 1., 2., 3.);
         let b = _mm256_set1_epi64x(0b1);
         let r = _mm256_maskz_permutevar_pd(0, a, b);
@@ -9805,7 +9921,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_permutevar_pd() {
+    fn test_mm_mask_permutevar_pd() {
         let a = _mm_set_pd(0., 1.);
         let b = _mm_set1_epi64x(0b1);
         let r = _mm_mask_permutevar_pd(a, 0, a, b);
@@ -9816,7 +9932,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_permutevar_pd() {
+    fn test_mm_maskz_permutevar_pd() {
         let a = _mm_set_pd(0., 1.);
         let b = _mm_set1_epi64x(0b1);
         let r = _mm_maskz_permutevar_pd(0, a, b);
@@ -9827,7 +9943,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_permutexvar_epi64() {
+    fn test_mm512_permutexvar_epi64() {
         let idx = _mm512_set1_epi64(1);
         let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7);
         let r = _mm512_permutexvar_epi64(idx, a);
@@ -9836,7 +9952,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_permutexvar_epi64() {
+    fn test_mm512_mask_permutexvar_epi64() {
         let idx = _mm512_set1_epi64(1);
         let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7);
         let r = _mm512_mask_permutexvar_epi64(a, 0, idx, a);
@@ -9847,7 +9963,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_permutexvar_epi64() {
+    fn test_mm512_maskz_permutexvar_epi64() {
         let idx = _mm512_set1_epi64(1);
         let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7);
         let r = _mm512_maskz_permutexvar_epi64(0, idx, a);
@@ -9858,7 +9974,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_permutexvar_epi64() {
+    fn test_mm256_permutexvar_epi64() {
         let idx = _mm256_set1_epi64x(1);
         let a = _mm256_set_epi64x(0, 1, 2, 3);
         let r = _mm256_permutexvar_epi64(idx, a);
@@ -9867,7 +9983,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_permutexvar_epi64() {
+    fn test_mm256_mask_permutexvar_epi64() {
         let idx = _mm256_set1_epi64x(1);
         let a = _mm256_set_epi64x(0, 1, 2, 3);
         let r = _mm256_mask_permutexvar_epi64(a, 0, idx, a);
@@ -9878,7 +9994,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_permutexvar_epi64() {
+    fn test_mm256_maskz_permutexvar_epi64() {
         let idx = _mm256_set1_epi64x(1);
         let a = _mm256_set_epi64x(0, 1, 2, 3);
         let r = _mm256_maskz_permutexvar_epi64(0, idx, a);
@@ -9889,7 +10005,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_permutexvar_pd() {
+    fn test_mm512_permutexvar_pd() {
         let idx = _mm512_set1_epi64(1);
         let a = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.);
         let r = _mm512_permutexvar_pd(idx, a);
@@ -9898,7 +10014,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_permutexvar_pd() {
+    fn test_mm512_mask_permutexvar_pd() {
         let idx = _mm512_set1_epi64(1);
         let a = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.);
         let r = _mm512_mask_permutexvar_pd(a, 0, idx, a);
@@ -9909,7 +10025,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_permutexvar_pd() {
+    fn test_mm512_maskz_permutexvar_pd() {
         let idx = _mm512_set1_epi64(1);
         let a = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.);
         let r = _mm512_maskz_permutexvar_pd(0, idx, a);
@@ -9920,7 +10036,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_permutexvar_pd() {
+    fn test_mm256_permutexvar_pd() {
         let idx = _mm256_set1_epi64x(1);
         let a = _mm256_set_pd(0., 1., 2., 3.);
         let r = _mm256_permutexvar_pd(idx, a);
@@ -9929,7 +10045,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_permutexvar_pd() {
+    fn test_mm256_mask_permutexvar_pd() {
         let idx = _mm256_set1_epi64x(1);
         let a = _mm256_set_pd(0., 1., 2., 3.);
         let r = _mm256_mask_permutexvar_pd(a, 0, idx, a);
@@ -9940,7 +10056,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_permutexvar_pd() {
+    fn test_mm256_maskz_permutexvar_pd() {
         let idx = _mm256_set1_epi64x(1);
         let a = _mm256_set_pd(0., 1., 2., 3.);
         let r = _mm256_maskz_permutexvar_pd(0, idx, a);
@@ -9951,7 +10067,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_permutex2var_epi64() {
+    fn test_mm512_permutex2var_epi64() {
         let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7);
         let idx = _mm512_set_epi64(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
         let b = _mm512_set1_epi64(100);
@@ -9961,7 +10077,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_permutex2var_epi64() {
+    fn test_mm512_mask_permutex2var_epi64() {
         let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7);
         let idx = _mm512_set_epi64(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
         let b = _mm512_set1_epi64(100);
@@ -9973,7 +10089,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_permutex2var_epi64() {
+    fn test_mm512_maskz_permutex2var_epi64() {
         let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7);
         let idx = _mm512_set_epi64(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
         let b = _mm512_set1_epi64(100);
@@ -9985,7 +10101,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask2_permutex2var_epi64() {
+    fn test_mm512_mask2_permutex2var_epi64() {
         let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7);
         let idx = _mm512_set_epi64(1000, 1 << 3, 2000, 1 << 3, 3, 1 << 3, 4, 1 << 3);
         let b = _mm512_set1_epi64(100);
@@ -9997,7 +10113,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_permutex2var_epi64() {
+    fn test_mm256_permutex2var_epi64() {
         let a = _mm256_set_epi64x(0, 1, 2, 3);
         let idx = _mm256_set_epi64x(1, 1 << 2, 2, 1 << 2);
         let b = _mm256_set1_epi64x(100);
@@ -10007,7 +10123,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_permutex2var_epi64() {
+    fn test_mm256_mask_permutex2var_epi64() {
         let a = _mm256_set_epi64x(0, 1, 2, 3);
         let idx = _mm256_set_epi64x(1, 1 << 2, 2, 1 << 2);
         let b = _mm256_set1_epi64x(100);
@@ -10019,7 +10135,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_permutex2var_epi64() {
+    fn test_mm256_maskz_permutex2var_epi64() {
         let a = _mm256_set_epi64x(0, 1, 2, 3);
         let idx = _mm256_set_epi64x(1, 1 << 2, 2, 1 << 2);
         let b = _mm256_set1_epi64x(100);
@@ -10031,7 +10147,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask2_permutex2var_epi64() {
+    fn test_mm256_mask2_permutex2var_epi64() {
         let a = _mm256_set_epi64x(0, 1, 2, 3);
         let idx = _mm256_set_epi64x(1, 1 << 2, 2, 1 << 2);
         let b = _mm256_set1_epi64x(100);
@@ -10043,7 +10159,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_permutex2var_epi64() {
+    fn test_mm_permutex2var_epi64() {
         let a = _mm_set_epi64x(0, 1);
         let idx = _mm_set_epi64x(1, 1 << 1);
         let b = _mm_set1_epi64x(100);
@@ -10053,7 +10169,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_permutex2var_epi64() {
+    fn test_mm_mask_permutex2var_epi64() {
         let a = _mm_set_epi64x(0, 1);
         let idx = _mm_set_epi64x(1, 1 << 1);
         let b = _mm_set1_epi64x(100);
@@ -10065,7 +10181,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_permutex2var_epi64() {
+    fn test_mm_maskz_permutex2var_epi64() {
         let a = _mm_set_epi64x(0, 1);
         let idx = _mm_set_epi64x(1, 1 << 1);
         let b = _mm_set1_epi64x(100);
@@ -10077,7 +10193,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask2_permutex2var_epi64() {
+    fn test_mm_mask2_permutex2var_epi64() {
         let a = _mm_set_epi64x(0, 1);
         let idx = _mm_set_epi64x(1, 1 << 1);
         let b = _mm_set1_epi64x(100);
@@ -10089,7 +10205,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_permutex2var_pd() {
+    fn test_mm512_permutex2var_pd() {
         let a = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.);
         let idx = _mm512_set_epi64(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
         let b = _mm512_set1_pd(100.);
@@ -10099,7 +10215,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_permutex2var_pd() {
+    fn test_mm512_mask_permutex2var_pd() {
         let a = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.);
         let idx = _mm512_set_epi64(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
         let b = _mm512_set1_pd(100.);
@@ -10111,7 +10227,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_permutex2var_pd() {
+    fn test_mm512_maskz_permutex2var_pd() {
         let a = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.);
         let idx = _mm512_set_epi64(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
         let b = _mm512_set1_pd(100.);
@@ -10123,7 +10239,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask2_permutex2var_pd() {
+    fn test_mm512_mask2_permutex2var_pd() {
         let a = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.);
         let idx = _mm512_set_epi64(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
         let b = _mm512_set1_pd(100.);
@@ -10135,7 +10251,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_permutex2var_pd() {
+    fn test_mm256_permutex2var_pd() {
         let a = _mm256_set_pd(0., 1., 2., 3.);
         let idx = _mm256_set_epi64x(1, 1 << 2, 2, 1 << 2);
         let b = _mm256_set1_pd(100.);
@@ -10145,7 +10261,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_permutex2var_pd() {
+    fn test_mm256_mask_permutex2var_pd() {
         let a = _mm256_set_pd(0., 1., 2., 3.);
         let idx = _mm256_set_epi64x(1, 1 << 2, 2, 1 << 2);
         let b = _mm256_set1_pd(100.);
@@ -10157,7 +10273,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_permutex2var_pd() {
+    fn test_mm256_maskz_permutex2var_pd() {
         let a = _mm256_set_pd(0., 1., 2., 3.);
         let idx = _mm256_set_epi64x(1, 1 << 2, 2, 1 << 2);
         let b = _mm256_set1_pd(100.);
@@ -10169,7 +10285,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask2_permutex2var_pd() {
+    fn test_mm256_mask2_permutex2var_pd() {
         let a = _mm256_set_pd(0., 1., 2., 3.);
         let idx = _mm256_set_epi64x(1, 1 << 2, 2, 1 << 2);
         let b = _mm256_set1_pd(100.);
@@ -10181,7 +10297,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_permutex2var_pd() {
+    fn test_mm_permutex2var_pd() {
         let a = _mm_set_pd(0., 1.);
         let idx = _mm_set_epi64x(1, 1 << 1);
         let b = _mm_set1_pd(100.);
@@ -10191,7 +10307,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_permutex2var_pd() {
+    fn test_mm_mask_permutex2var_pd() {
         let a = _mm_set_pd(0., 1.);
         let idx = _mm_set_epi64x(1, 1 << 1);
         let b = _mm_set1_pd(100.);
@@ -10203,7 +10319,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_permutex2var_pd() {
+    fn test_mm_maskz_permutex2var_pd() {
         let a = _mm_set_pd(0., 1.);
         let idx = _mm_set_epi64x(1, 1 << 1);
         let b = _mm_set1_pd(100.);
@@ -10215,7 +10331,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask2_permutex2var_pd() {
+    fn test_mm_mask2_permutex2var_pd() {
         let a = _mm_set_pd(0., 1.);
         let idx = _mm_set_epi64x(1, 1 << 1);
         let b = _mm_set1_pd(100.);
@@ -10227,7 +10343,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_shuffle_pd() {
+    const fn test_mm256_mask_shuffle_pd() {
         let a = _mm256_set_pd(1., 4., 5., 8.);
         let b = _mm256_set_pd(2., 3., 6., 7.);
         let r = _mm256_mask_shuffle_pd::<0b11_11_11_11>(a, 0, a, b);
@@ -10238,7 +10354,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_shuffle_pd() {
+    const fn test_mm256_maskz_shuffle_pd() {
         let a = _mm256_set_pd(1., 4., 5., 8.);
         let b = _mm256_set_pd(2., 3., 6., 7.);
         let r = _mm256_maskz_shuffle_pd::<0b11_11_11_11>(0, a, b);
@@ -10249,7 +10365,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_shuffle_pd() {
+    const fn test_mm_mask_shuffle_pd() {
         let a = _mm_set_pd(1., 4.);
         let b = _mm_set_pd(2., 3.);
         let r = _mm_mask_shuffle_pd::<0b11_11_11_11>(a, 0, a, b);
@@ -10260,7 +10376,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_shuffle_pd() {
+    const fn test_mm_maskz_shuffle_pd() {
         let a = _mm_set_pd(1., 4.);
         let b = _mm_set_pd(2., 3.);
         let r = _mm_maskz_shuffle_pd::<0b11_11_11_11>(0, a, b);
@@ -10271,7 +10387,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_shuffle_i64x2() {
+    const fn test_mm512_shuffle_i64x2() {
         let a = _mm512_setr_epi64(1, 4, 5, 8, 9, 12, 13, 16);
         let b = _mm512_setr_epi64(2, 3, 6, 7, 10, 11, 14, 15);
         let r = _mm512_shuffle_i64x2::<0b00_00_00_00>(a, b);
@@ -10280,7 +10396,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_shuffle_i64x2() {
+    const fn test_mm512_mask_shuffle_i64x2() {
         let a = _mm512_setr_epi64(1, 4, 5, 8, 9, 12, 13, 16);
         let b = _mm512_setr_epi64(2, 3, 6, 7, 10, 11, 14, 15);
         let r = _mm512_mask_shuffle_i64x2::<0b00_00_00_00>(a, 0, a, b);
@@ -10291,7 +10407,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_shuffle_i64x2() {
+    const fn test_mm512_maskz_shuffle_i64x2() {
         let a = _mm512_setr_epi64(1, 4, 5, 8, 9, 12, 13, 16);
         let b = _mm512_setr_epi64(2, 3, 6, 7, 10, 11, 14, 15);
         let r = _mm512_maskz_shuffle_i64x2::<0b00_00_00_00>(0, a, b);
@@ -10302,7 +10418,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_shuffle_i64x2() {
+    const fn test_mm256_shuffle_i64x2() {
         let a = _mm256_set_epi64x(1, 4, 5, 8);
         let b = _mm256_set_epi64x(2, 3, 6, 7);
         let r = _mm256_shuffle_i64x2::<0b00>(a, b);
@@ -10311,7 +10427,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_shuffle_i64x2() {
+    const fn test_mm256_mask_shuffle_i64x2() {
         let a = _mm256_set_epi64x(1, 4, 5, 8);
         let b = _mm256_set_epi64x(2, 3, 6, 7);
         let r = _mm256_mask_shuffle_i64x2::<0b00>(a, 0, a, b);
@@ -10322,7 +10438,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_shuffle_i64x2() {
+    const fn test_mm256_maskz_shuffle_i64x2() {
         let a = _mm256_set_epi64x(1, 4, 5, 8);
         let b = _mm256_set_epi64x(2, 3, 6, 7);
         let r = _mm256_maskz_shuffle_i64x2::<0b00>(0, a, b);
@@ -10333,7 +10449,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_shuffle_f64x2() {
+    const fn test_mm512_shuffle_f64x2() {
         let a = _mm512_setr_pd(1., 4., 5., 8., 9., 12., 13., 16.);
         let b = _mm512_setr_pd(2., 3., 6., 7., 10., 11., 14., 15.);
         let r = _mm512_shuffle_f64x2::<0b00_00_00_00>(a, b);
@@ -10342,7 +10458,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_shuffle_f64x2() {
+    const fn test_mm512_mask_shuffle_f64x2() {
         let a = _mm512_setr_pd(1., 4., 5., 8., 9., 12., 13., 16.);
         let b = _mm512_setr_pd(2., 3., 6., 7., 10., 11., 14., 15.);
         let r = _mm512_mask_shuffle_f64x2::<0b00_00_00_00>(a, 0, a, b);
@@ -10353,7 +10469,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_shuffle_f64x2() {
+    const fn test_mm512_maskz_shuffle_f64x2() {
         let a = _mm512_setr_pd(1., 4., 5., 8., 9., 12., 13., 16.);
         let b = _mm512_setr_pd(2., 3., 6., 7., 10., 11., 14., 15.);
         let r = _mm512_maskz_shuffle_f64x2::<0b00_00_00_00>(0, a, b);
@@ -10364,7 +10480,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_shuffle_f64x2() {
+    const fn test_mm256_shuffle_f64x2() {
         let a = _mm256_set_pd(1., 4., 5., 8.);
         let b = _mm256_set_pd(2., 3., 6., 7.);
         let r = _mm256_shuffle_f64x2::<0b00>(a, b);
@@ -10373,7 +10489,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_shuffle_f64x2() {
+    const fn test_mm256_mask_shuffle_f64x2() {
         let a = _mm256_set_pd(1., 4., 5., 8.);
         let b = _mm256_set_pd(2., 3., 6., 7.);
         let r = _mm256_mask_shuffle_f64x2::<0b00>(a, 0, a, b);
@@ -10384,7 +10500,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_shuffle_f64x2() {
+    const fn test_mm256_maskz_shuffle_f64x2() {
         let a = _mm256_set_pd(1., 4., 5., 8.);
         let b = _mm256_set_pd(2., 3., 6., 7.);
         let r = _mm256_maskz_shuffle_f64x2::<0b00>(0, a, b);
@@ -10395,7 +10511,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_movedup_pd() {
+    const fn test_mm512_movedup_pd() {
         let a = _mm512_setr_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm512_movedup_pd(a);
         let e = _mm512_setr_pd(1., 1., 3., 3., 5., 5., 7., 7.);
@@ -10403,7 +10519,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_movedup_pd() {
+    const fn test_mm512_mask_movedup_pd() {
         let a = _mm512_setr_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm512_mask_movedup_pd(a, 0, a);
         assert_eq_m512d(r, a);
@@ -10413,7 +10529,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_movedup_pd() {
+    const fn test_mm512_maskz_movedup_pd() {
         let a = _mm512_setr_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm512_maskz_movedup_pd(0, a);
         assert_eq_m512d(r, _mm512_setzero_pd());
@@ -10423,7 +10539,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_movedup_pd() {
+    const fn test_mm256_mask_movedup_pd() {
         let a = _mm256_set_pd(1., 2., 3., 4.);
         let r = _mm256_mask_movedup_pd(a, 0, a);
         assert_eq_m256d(r, a);
@@ -10433,7 +10549,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_movedup_pd() {
+    const fn test_mm256_maskz_movedup_pd() {
         let a = _mm256_set_pd(1., 2., 3., 4.);
         let r = _mm256_maskz_movedup_pd(0, a);
         assert_eq_m256d(r, _mm256_setzero_pd());
@@ -10443,7 +10559,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_movedup_pd() {
+    const fn test_mm_mask_movedup_pd() {
         let a = _mm_set_pd(1., 2.);
         let r = _mm_mask_movedup_pd(a, 0, a);
         assert_eq_m128d(r, a);
@@ -10453,7 +10569,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_movedup_pd() {
+    const fn test_mm_maskz_movedup_pd() {
         let a = _mm_set_pd(1., 2.);
         let r = _mm_maskz_movedup_pd(0, a);
         assert_eq_m128d(r, _mm_setzero_pd());
@@ -10463,7 +10579,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_inserti64x4() {
+    const fn test_mm512_inserti64x4() {
         let a = _mm512_setr_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let b = _mm256_setr_epi64x(17, 18, 19, 20);
         let r = _mm512_inserti64x4::<1>(a, b);
@@ -10472,7 +10588,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_inserti64x4() {
+    const fn test_mm512_mask_inserti64x4() {
         let a = _mm512_setr_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let b = _mm256_setr_epi64x(17, 18, 19, 20);
         let r = _mm512_mask_inserti64x4::<1>(a, 0, a, b);
@@ -10483,7 +10599,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_inserti64x4() {
+    const fn test_mm512_maskz_inserti64x4() {
         let a = _mm512_setr_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let b = _mm256_setr_epi64x(17, 18, 19, 20);
         let r = _mm512_maskz_inserti64x4::<1>(0, a, b);
@@ -10494,7 +10610,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_insertf64x4() {
+    const fn test_mm512_insertf64x4() {
         let a = _mm512_setr_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         let b = _mm256_setr_pd(17., 18., 19., 20.);
         let r = _mm512_insertf64x4::<1>(a, b);
@@ -10503,7 +10619,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_insertf64x4() {
+    const fn test_mm512_mask_insertf64x4() {
         let a = _mm512_setr_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         let b = _mm256_setr_pd(17., 18., 19., 20.);
         let r = _mm512_mask_insertf64x4::<1>(a, 0, a, b);
@@ -10514,7 +10630,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_insertf64x4() {
+    const fn test_mm512_maskz_insertf64x4() {
         let a = _mm512_setr_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         let b = _mm256_setr_pd(17., 18., 19., 20.);
         let r = _mm512_maskz_insertf64x4::<1>(0, a, b);
@@ -10525,21 +10641,21 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_castpd128_pd512() {
+    const fn test_mm512_castpd128_pd512() {
         let a = _mm_setr_pd(17., 18.);
         let r = _mm512_castpd128_pd512(a);
         assert_eq_m128d(_mm512_castpd512_pd128(r), a);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_castpd256_pd512() {
+    const fn test_mm512_castpd256_pd512() {
         let a = _mm256_setr_pd(17., 18., 19., 20.);
         let r = _mm512_castpd256_pd512(a);
         assert_eq_m256d(_mm512_castpd512_pd256(r), a);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_zextpd128_pd512() {
+    const fn test_mm512_zextpd128_pd512() {
         let a = _mm_setr_pd(17., 18.);
         let r = _mm512_zextpd128_pd512(a);
         let e = _mm512_setr_pd(17., 18., 0., 0., 0., 0., 0., 0.);
@@ -10547,7 +10663,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_zextpd256_pd512() {
+    const fn test_mm512_zextpd256_pd512() {
         let a = _mm256_setr_pd(17., 18., 19., 20.);
         let r = _mm512_zextpd256_pd512(a);
         let e = _mm512_setr_pd(17., 18., 19., 20., 0., 0., 0., 0.);
@@ -10555,7 +10671,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_castpd512_pd128() {
+    const fn test_mm512_castpd512_pd128() {
         let a = _mm512_setr_pd(17., 18., -1., -1., -1., -1., -1., -1.);
         let r = _mm512_castpd512_pd128(a);
         let e = _mm_setr_pd(17., 18.);
@@ -10563,7 +10679,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_castpd512_pd256() {
+    const fn test_mm512_castpd512_pd256() {
         let a = _mm512_setr_pd(17., 18., 19., 20., -1., -1., -1., -1.);
         let r = _mm512_castpd512_pd256(a);
         let e = _mm256_setr_pd(17., 18., 19., 20.);
@@ -10571,7 +10687,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_castpd_ps() {
+    const fn test_mm512_castpd_ps() {
         let a = _mm512_set1_pd(1.);
         let r = _mm512_castpd_ps(a);
         let e = _mm512_set_ps(
@@ -10582,7 +10698,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_castpd_si512() {
+    const fn test_mm512_castpd_si512() {
         let a = _mm512_set1_pd(1.);
         let r = _mm512_castpd_si512(a);
         let e = _mm512_set_epi32(
@@ -10593,21 +10709,21 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_castsi128_si512() {
+    const fn test_mm512_castsi128_si512() {
         let a = _mm_setr_epi64x(17, 18);
         let r = _mm512_castsi128_si512(a);
         assert_eq_m128i(_mm512_castsi512_si128(r), a);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_castsi256_si512() {
+    const fn test_mm512_castsi256_si512() {
         let a = _mm256_setr_epi64x(17, 18, 19, 20);
         let r = _mm512_castsi256_si512(a);
         assert_eq_m256i(_mm512_castsi512_si256(r), a);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_zextsi128_si512() {
+    const fn test_mm512_zextsi128_si512() {
         let a = _mm_setr_epi64x(17, 18);
         let r = _mm512_zextsi128_si512(a);
         let e = _mm512_setr_epi64(17, 18, 0, 0, 0, 0, 0, 0);
@@ -10615,7 +10731,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_zextsi256_si512() {
+    const fn test_mm512_zextsi256_si512() {
         let a = _mm256_setr_epi64x(17, 18, 19, 20);
         let r = _mm512_zextsi256_si512(a);
         let e = _mm512_setr_epi64(17, 18, 19, 20, 0, 0, 0, 0);
@@ -10623,7 +10739,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_castsi512_si128() {
+    const fn test_mm512_castsi512_si128() {
         let a = _mm512_setr_epi64(17, 18, -1, -1, -1, -1, -1, -1);
         let r = _mm512_castsi512_si128(a);
         let e = _mm_setr_epi64x(17, 18);
@@ -10631,7 +10747,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_castsi512_si256() {
+    const fn test_mm512_castsi512_si256() {
         let a = _mm512_setr_epi64(17, 18, 19, 20, -1, -1, -1, -1);
         let r = _mm512_castsi512_si256(a);
         let e = _mm256_setr_epi64x(17, 18, 19, 20);
@@ -10639,7 +10755,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_castsi512_ps() {
+    const fn test_mm512_castsi512_ps() {
         let a = _mm512_set1_epi64(1 << 62);
         let r = _mm512_castsi512_ps(a);
         let e = _mm512_set_ps(
@@ -10649,7 +10765,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_castsi512_pd() {
+    const fn test_mm512_castsi512_pd() {
         let a = _mm512_set1_epi64(1 << 62);
         let r = _mm512_castsi512_pd(a);
         let e = _mm512_set_pd(2., 2., 2., 2., 2., 2., 2., 2.);
@@ -10657,7 +10773,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_broadcastq_epi64() {
+    const fn test_mm512_broadcastq_epi64() {
         let a = _mm_setr_epi64x(17, 18);
         let r = _mm512_broadcastq_epi64(a);
         let e = _mm512_set1_epi64(17);
@@ -10665,7 +10781,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_broadcastq_epi64() {
+    const fn test_mm512_mask_broadcastq_epi64() {
         let src = _mm512_set1_epi64(18);
         let a = _mm_setr_epi64x(17, 18);
         let r = _mm512_mask_broadcastq_epi64(src, 0, a);
@@ -10676,7 +10792,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_broadcastq_epi64() {
+    const fn test_mm512_maskz_broadcastq_epi64() {
         let a = _mm_setr_epi64x(17, 18);
         let r = _mm512_maskz_broadcastq_epi64(0, a);
         assert_eq_m512i(r, _mm512_setzero_si512());
@@ -10686,7 +10802,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_broadcastq_epi64() {
+    const fn test_mm256_mask_broadcastq_epi64() {
         let src = _mm256_set1_epi64x(18);
         let a = _mm_set_epi64x(17, 18);
         let r = _mm256_mask_broadcastq_epi64(src, 0, a);
@@ -10697,7 +10813,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_broadcastq_epi64() {
+    const fn test_mm256_maskz_broadcastq_epi64() {
         let a = _mm_set_epi64x(17, 18);
         let r = _mm256_maskz_broadcastq_epi64(0, a);
         assert_eq_m256i(r, _mm256_setzero_si256());
@@ -10707,7 +10823,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_broadcastq_epi64() {
+    const fn test_mm_mask_broadcastq_epi64() {
         let src = _mm_set1_epi64x(18);
         let a = _mm_set_epi64x(17, 18);
         let r = _mm_mask_broadcastq_epi64(src, 0, a);
@@ -10718,7 +10834,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_broadcastq_epi64() {
+    const fn test_mm_maskz_broadcastq_epi64() {
         let a = _mm_set_epi64x(17, 18);
         let r = _mm_maskz_broadcastq_epi64(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -10728,7 +10844,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_broadcastsd_pd() {
+    const fn test_mm512_broadcastsd_pd() {
         let a = _mm_set_pd(17., 18.);
         let r = _mm512_broadcastsd_pd(a);
         let e = _mm512_set1_pd(18.);
@@ -10736,7 +10852,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_broadcastsd_pd() {
+    const fn test_mm512_mask_broadcastsd_pd() {
         let src = _mm512_set1_pd(18.);
         let a = _mm_set_pd(17., 18.);
         let r = _mm512_mask_broadcastsd_pd(src, 0, a);
@@ -10747,7 +10863,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_broadcastsd_pd() {
+    const fn test_mm512_maskz_broadcastsd_pd() {
         let a = _mm_set_pd(17., 18.);
         let r = _mm512_maskz_broadcastsd_pd(0, a);
         assert_eq_m512d(r, _mm512_setzero_pd());
@@ -10757,7 +10873,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_broadcastsd_pd() {
+    const fn test_mm256_mask_broadcastsd_pd() {
         let src = _mm256_set1_pd(18.);
         let a = _mm_set_pd(17., 18.);
         let r = _mm256_mask_broadcastsd_pd(src, 0, a);
@@ -10768,7 +10884,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_broadcastsd_pd() {
+    const fn test_mm256_maskz_broadcastsd_pd() {
         let a = _mm_set_pd(17., 18.);
         let r = _mm256_maskz_broadcastsd_pd(0, a);
         assert_eq_m256d(r, _mm256_setzero_pd());
@@ -10778,7 +10894,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_broadcast_i64x4() {
+    const fn test_mm512_broadcast_i64x4() {
         let a = _mm256_set_epi64x(17, 18, 19, 20);
         let r = _mm512_broadcast_i64x4(a);
         let e = _mm512_set_epi64(17, 18, 19, 20, 17, 18, 19, 20);
@@ -10786,7 +10902,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_broadcast_i64x4() {
+    const fn test_mm512_mask_broadcast_i64x4() {
         let src = _mm512_set1_epi64(18);
         let a = _mm256_set_epi64x(17, 18, 19, 20);
         let r = _mm512_mask_broadcast_i64x4(src, 0, a);
@@ -10797,7 +10913,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_broadcast_i64x4() {
+    const fn test_mm512_maskz_broadcast_i64x4() {
         let a = _mm256_set_epi64x(17, 18, 19, 20);
         let r = _mm512_maskz_broadcast_i64x4(0, a);
         assert_eq_m512i(r, _mm512_setzero_si512());
@@ -10807,7 +10923,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_broadcast_f64x4() {
+    const fn test_mm512_broadcast_f64x4() {
         let a = _mm256_set_pd(17., 18., 19., 20.);
         let r = _mm512_broadcast_f64x4(a);
         let e = _mm512_set_pd(17., 18., 19., 20., 17., 18., 19., 20.);
@@ -10815,7 +10931,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_broadcast_f64x4() {
+    const fn test_mm512_mask_broadcast_f64x4() {
         let src = _mm512_set1_pd(18.);
         let a = _mm256_set_pd(17., 18., 19., 20.);
         let r = _mm512_mask_broadcast_f64x4(src, 0, a);
@@ -10826,7 +10942,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_broadcast_f64x4() {
+    const fn test_mm512_maskz_broadcast_f64x4() {
         let a = _mm256_set_pd(17., 18., 19., 20.);
         let r = _mm512_maskz_broadcast_f64x4(0, a);
         assert_eq_m512d(r, _mm512_setzero_pd());
@@ -10836,7 +10952,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_blend_epi64() {
+    const fn test_mm512_mask_blend_epi64() {
         let a = _mm512_set1_epi64(1);
         let b = _mm512_set1_epi64(2);
         let r = _mm512_mask_blend_epi64(0b11110000, a, b);
@@ -10845,7 +10961,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_blend_epi64() {
+    const fn test_mm256_mask_blend_epi64() {
         let a = _mm256_set1_epi64x(1);
         let b = _mm256_set1_epi64x(2);
         let r = _mm256_mask_blend_epi64(0b00001111, a, b);
@@ -10854,7 +10970,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_blend_epi64() {
+    const fn test_mm_mask_blend_epi64() {
         let a = _mm_set1_epi64x(1);
         let b = _mm_set1_epi64x(2);
         let r = _mm_mask_blend_epi64(0b00000011, a, b);
@@ -10863,7 +10979,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_blend_pd() {
+    const fn test_mm512_mask_blend_pd() {
         let a = _mm512_set1_pd(1.);
         let b = _mm512_set1_pd(2.);
         let r = _mm512_mask_blend_pd(0b11110000, a, b);
@@ -10872,7 +10988,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_blend_pd() {
+    const fn test_mm256_mask_blend_pd() {
         let a = _mm256_set1_pd(1.);
         let b = _mm256_set1_pd(2.);
         let r = _mm256_mask_blend_pd(0b00001111, a, b);
@@ -10881,7 +10997,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_blend_pd() {
+    const fn test_mm_mask_blend_pd() {
         let a = _mm_set1_pd(1.);
         let b = _mm_set1_pd(2.);
         let r = _mm_mask_blend_pd(0b00000011, a, b);
@@ -10890,7 +11006,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_unpackhi_epi64() {
+    const fn test_mm512_unpackhi_epi64() {
         let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let b = _mm512_set_epi64(17, 18, 19, 20, 21, 22, 23, 24);
         let r = _mm512_unpackhi_epi64(a, b);
@@ -10899,7 +11015,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_unpackhi_epi64() {
+    const fn test_mm512_mask_unpackhi_epi64() {
         let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let b = _mm512_set_epi64(17, 18, 19, 20, 21, 22, 23, 24);
         let r = _mm512_mask_unpackhi_epi64(a, 0, a, b);
@@ -10910,7 +11026,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_unpackhi_epi64() {
+    const fn test_mm512_maskz_unpackhi_epi64() {
         let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let b = _mm512_set_epi64(17, 18, 19, 20, 21, 22, 23, 24);
         let r = _mm512_maskz_unpackhi_epi64(0, a, b);
@@ -10921,7 +11037,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_unpackhi_epi64() {
+    const fn test_mm256_mask_unpackhi_epi64() {
         let a = _mm256_set_epi64x(1, 2, 3, 4);
         let b = _mm256_set_epi64x(17, 18, 19, 20);
         let r = _mm256_mask_unpackhi_epi64(a, 0, a, b);
@@ -10932,7 +11048,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_unpackhi_epi64() {
+    const fn test_mm256_maskz_unpackhi_epi64() {
         let a = _mm256_set_epi64x(1, 2, 3, 4);
         let b = _mm256_set_epi64x(17, 18, 19, 20);
         let r = _mm256_maskz_unpackhi_epi64(0, a, b);
@@ -10943,7 +11059,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_unpackhi_epi64() {
+    const fn test_mm_mask_unpackhi_epi64() {
         let a = _mm_set_epi64x(1, 2);
         let b = _mm_set_epi64x(17, 18);
         let r = _mm_mask_unpackhi_epi64(a, 0, a, b);
@@ -10954,7 +11070,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_unpackhi_epi64() {
+    const fn test_mm_maskz_unpackhi_epi64() {
         let a = _mm_set_epi64x(1, 2);
         let b = _mm_set_epi64x(17, 18);
         let r = _mm_maskz_unpackhi_epi64(0, a, b);
@@ -10965,7 +11081,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_unpackhi_pd() {
+    const fn test_mm512_unpackhi_pd() {
         let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         let b = _mm512_set_pd(17., 18., 19., 20., 21., 22., 23., 24.);
         let r = _mm512_unpackhi_pd(a, b);
@@ -10974,7 +11090,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_unpackhi_pd() {
+    const fn test_mm512_mask_unpackhi_pd() {
         let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         let b = _mm512_set_pd(17., 18., 19., 20., 21., 22., 23., 24.);
         let r = _mm512_mask_unpackhi_pd(a, 0, a, b);
@@ -10985,7 +11101,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_unpackhi_pd() {
+    const fn test_mm512_maskz_unpackhi_pd() {
         let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         let b = _mm512_set_pd(17., 18., 19., 20., 21., 22., 23., 24.);
         let r = _mm512_maskz_unpackhi_pd(0, a, b);
@@ -10996,7 +11112,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_unpackhi_pd() {
+    const fn test_mm256_mask_unpackhi_pd() {
         let a = _mm256_set_pd(1., 2., 3., 4.);
         let b = _mm256_set_pd(17., 18., 19., 20.);
         let r = _mm256_mask_unpackhi_pd(a, 0, a, b);
@@ -11007,7 +11123,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_unpackhi_pd() {
+    const fn test_mm256_maskz_unpackhi_pd() {
         let a = _mm256_set_pd(1., 2., 3., 4.);
         let b = _mm256_set_pd(17., 18., 19., 20.);
         let r = _mm256_maskz_unpackhi_pd(0, a, b);
@@ -11018,7 +11134,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_unpackhi_pd() {
+    const fn test_mm_mask_unpackhi_pd() {
         let a = _mm_set_pd(1., 2.);
         let b = _mm_set_pd(17., 18.);
         let r = _mm_mask_unpackhi_pd(a, 0, a, b);
@@ -11029,7 +11145,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_unpackhi_pd() {
+    const fn test_mm_maskz_unpackhi_pd() {
         let a = _mm_set_pd(1., 2.);
         let b = _mm_set_pd(17., 18.);
         let r = _mm_maskz_unpackhi_pd(0, a, b);
@@ -11040,7 +11156,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_unpacklo_epi64() {
+    const fn test_mm512_unpacklo_epi64() {
         let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let b = _mm512_set_epi64(17, 18, 19, 20, 21, 22, 23, 24);
         let r = _mm512_unpacklo_epi64(a, b);
@@ -11049,7 +11165,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_unpacklo_epi64() {
+    const fn test_mm512_mask_unpacklo_epi64() {
         let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let b = _mm512_set_epi64(17, 18, 19, 20, 21, 22, 23, 24);
         let r = _mm512_mask_unpacklo_epi64(a, 0, a, b);
@@ -11060,7 +11176,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_unpacklo_epi64() {
+    const fn test_mm512_maskz_unpacklo_epi64() {
         let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let b = _mm512_set_epi64(17, 18, 19, 20, 21, 22, 23, 24);
         let r = _mm512_maskz_unpacklo_epi64(0, a, b);
@@ -11071,7 +11187,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_unpacklo_epi64() {
+    const fn test_mm256_mask_unpacklo_epi64() {
         let a = _mm256_set_epi64x(1, 2, 3, 4);
         let b = _mm256_set_epi64x(17, 18, 19, 20);
         let r = _mm256_mask_unpacklo_epi64(a, 0, a, b);
@@ -11082,7 +11198,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_unpacklo_epi64() {
+    const fn test_mm256_maskz_unpacklo_epi64() {
         let a = _mm256_set_epi64x(1, 2, 3, 4);
         let b = _mm256_set_epi64x(17, 18, 19, 20);
         let r = _mm256_maskz_unpacklo_epi64(0, a, b);
@@ -11093,7 +11209,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_unpacklo_epi64() {
+    const fn test_mm_mask_unpacklo_epi64() {
         let a = _mm_set_epi64x(1, 2);
         let b = _mm_set_epi64x(17, 18);
         let r = _mm_mask_unpacklo_epi64(a, 0, a, b);
@@ -11104,7 +11220,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_unpacklo_epi64() {
+    const fn test_mm_maskz_unpacklo_epi64() {
         let a = _mm_set_epi64x(1, 2);
         let b = _mm_set_epi64x(17, 18);
         let r = _mm_maskz_unpacklo_epi64(0, a, b);
@@ -11115,7 +11231,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_unpacklo_pd() {
+    const fn test_mm512_unpacklo_pd() {
         let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         let b = _mm512_set_pd(17., 18., 19., 20., 21., 22., 23., 24.);
         let r = _mm512_unpacklo_pd(a, b);
@@ -11124,7 +11240,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_unpacklo_pd() {
+    const fn test_mm512_mask_unpacklo_pd() {
         let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         let b = _mm512_set_pd(17., 18., 19., 20., 21., 22., 23., 24.);
         let r = _mm512_mask_unpacklo_pd(a, 0, a, b);
@@ -11135,7 +11251,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_unpacklo_pd() {
+    const fn test_mm512_maskz_unpacklo_pd() {
         let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         let b = _mm512_set_pd(17., 18., 19., 20., 21., 22., 23., 24.);
         let r = _mm512_maskz_unpacklo_pd(0, a, b);
@@ -11146,7 +11262,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_unpacklo_pd() {
+    const fn test_mm256_mask_unpacklo_pd() {
         let a = _mm256_set_pd(1., 2., 3., 4.);
         let b = _mm256_set_pd(17., 18., 19., 20.);
         let r = _mm256_mask_unpacklo_pd(a, 0, a, b);
@@ -11157,7 +11273,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_unpacklo_pd() {
+    const fn test_mm256_maskz_unpacklo_pd() {
         let a = _mm256_set_pd(1., 2., 3., 4.);
         let b = _mm256_set_pd(17., 18., 19., 20.);
         let r = _mm256_maskz_unpacklo_pd(0, a, b);
@@ -11168,7 +11284,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_unpacklo_pd() {
+    const fn test_mm_mask_unpacklo_pd() {
         let a = _mm_set_pd(1., 2.);
         let b = _mm_set_pd(17., 18.);
         let r = _mm_mask_unpacklo_pd(a, 0, a, b);
@@ -11179,7 +11295,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_unpacklo_pd() {
+    const fn test_mm_maskz_unpacklo_pd() {
         let a = _mm_set_pd(1., 2.);
         let b = _mm_set_pd(17., 18.);
         let r = _mm_maskz_unpacklo_pd(0, a, b);
@@ -11190,7 +11306,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_alignr_epi64() {
+    const fn test_mm512_alignr_epi64() {
         let a = _mm512_set_epi64(8, 7, 6, 5, 4, 3, 2, 1);
         let b = _mm512_set_epi64(16, 15, 14, 13, 12, 11, 10, 9);
         let r = _mm512_alignr_epi64::<0>(a, b);
@@ -11203,7 +11319,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_alignr_epi64() {
+    const fn test_mm512_mask_alignr_epi64() {
         let a = _mm512_set_epi64(8, 7, 6, 5, 4, 3, 2, 1);
         let b = _mm512_set_epi64(16, 15, 14, 13, 12, 11, 10, 9);
         let r = _mm512_mask_alignr_epi64::<1>(a, 0, a, b);
@@ -11214,7 +11330,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_alignr_epi64() {
+    const fn test_mm512_maskz_alignr_epi64() {
         let a = _mm512_set_epi64(8, 7, 6, 5, 4, 3, 2, 1);
         let b = _mm512_set_epi64(16, 15, 14, 13, 12, 11, 10, 9);
         let r = _mm512_maskz_alignr_epi64::<1>(0, a, b);
@@ -11225,7 +11341,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_alignr_epi64() {
+    const fn test_mm256_alignr_epi64() {
         let a = _mm256_set_epi64x(4, 3, 2, 1);
         let b = _mm256_set_epi64x(8, 7, 6, 5);
         let r = _mm256_alignr_epi64::<0>(a, b);
@@ -11240,7 +11356,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_alignr_epi64() {
+    const fn test_mm256_mask_alignr_epi64() {
         let a = _mm256_set_epi64x(4, 3, 2, 1);
         let b = _mm256_set_epi64x(8, 7, 6, 5);
         let r = _mm256_mask_alignr_epi64::<1>(a, 0, a, b);
@@ -11251,7 +11367,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_alignr_epi64() {
+    const fn test_mm256_maskz_alignr_epi64() {
         let a = _mm256_set_epi64x(4, 3, 2, 1);
         let b = _mm256_set_epi64x(8, 7, 6, 5);
         let r = _mm256_maskz_alignr_epi64::<1>(0, a, b);
@@ -11262,7 +11378,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_alignr_epi64() {
+    const fn test_mm_alignr_epi64() {
         let a = _mm_set_epi64x(2, 1);
         let b = _mm_set_epi64x(4, 3);
         let r = _mm_alignr_epi64::<0>(a, b);
@@ -11271,7 +11387,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_alignr_epi64() {
+    const fn test_mm_mask_alignr_epi64() {
         let a = _mm_set_epi64x(2, 1);
         let b = _mm_set_epi64x(4, 3);
         let r = _mm_mask_alignr_epi64::<1>(a, 0, a, b);
@@ -11282,7 +11398,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_alignr_epi64() {
+    const fn test_mm_maskz_alignr_epi64() {
         let a = _mm_set_epi64x(2, 1);
         let b = _mm_set_epi64x(4, 3);
         let r = _mm_maskz_alignr_epi64::<1>(0, a, b);
@@ -11293,7 +11409,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_and_epi64() {
+    const fn test_mm512_and_epi64() {
         let a = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
         let b = _mm512_set_epi64(1 << 13, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
         let r = _mm512_and_epi64(a, b);
@@ -11302,7 +11418,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_and_epi64() {
+    const fn test_mm512_mask_and_epi64() {
         let a = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
         let b = _mm512_set_epi64(1 << 13, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
         let r = _mm512_mask_and_epi64(a, 0, a, b);
@@ -11313,7 +11429,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_and_epi64() {
+    const fn test_mm512_maskz_and_epi64() {
         let a = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
         let b = _mm512_set_epi64(1 << 13, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
         let r = _mm512_maskz_and_epi64(0, a, b);
@@ -11324,7 +11440,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_and_epi64() {
+    const fn test_mm256_mask_and_epi64() {
         let a = _mm256_set1_epi64x(1 << 0 | 1 << 15);
         let b = _mm256_set1_epi64x(1 << 0);
         let r = _mm256_mask_and_epi64(a, 0, a, b);
@@ -11335,7 +11451,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_and_epi64() {
+    const fn test_mm256_maskz_and_epi64() {
         let a = _mm256_set1_epi64x(1 << 0 | 1 << 15);
         let b = _mm256_set1_epi64x(1 << 0);
         let r = _mm256_maskz_and_epi64(0, a, b);
@@ -11346,7 +11462,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_and_epi64() {
+    const fn test_mm_mask_and_epi64() {
         let a = _mm_set1_epi64x(1 << 0 | 1 << 15);
         let b = _mm_set1_epi64x(1 << 0);
         let r = _mm_mask_and_epi64(a, 0, a, b);
@@ -11357,7 +11473,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_and_epi64() {
+    const fn test_mm_maskz_and_epi64() {
         let a = _mm_set1_epi64x(1 << 0 | 1 << 15);
         let b = _mm_set1_epi64x(1 << 0);
         let r = _mm_maskz_and_epi64(0, a, b);
@@ -11368,7 +11484,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_and_si512() {
+    const fn test_mm512_and_si512() {
         let a = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
         let b = _mm512_set_epi64(1 << 13, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
         let r = _mm512_and_epi64(a, b);
@@ -11377,7 +11493,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_or_epi64() {
+    const fn test_mm512_or_epi64() {
         let a = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
         let b = _mm512_set_epi64(1 << 13, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
         let r = _mm512_or_epi64(a, b);
@@ -11390,7 +11506,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_or_epi64() {
+    const fn test_mm512_mask_or_epi64() {
         let a = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
         let b = _mm512_set_epi64(1 << 13, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
         let r = _mm512_mask_or_epi64(a, 0, a, b);
@@ -11405,7 +11521,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_or_epi64() {
+    const fn test_mm512_maskz_or_epi64() {
         let a = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
         let b = _mm512_set_epi64(1 << 13, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
         let r = _mm512_maskz_or_epi64(0, a, b);
@@ -11416,7 +11532,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_or_epi64() {
+    const fn test_mm256_or_epi64() {
         let a = _mm256_set1_epi64x(1 << 0 | 1 << 15);
         let b = _mm256_set1_epi64x(1 << 13);
         let r = _mm256_or_epi64(a, b);
@@ -11425,7 +11541,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_or_epi64() {
+    const fn test_mm256_mask_or_epi64() {
         let a = _mm256_set1_epi64x(1 << 0 | 1 << 15);
         let b = _mm256_set1_epi64x(1 << 13);
         let r = _mm256_mask_or_epi64(a, 0, a, b);
@@ -11436,7 +11552,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_or_epi64() {
+    const fn test_mm256_maskz_or_epi64() {
         let a = _mm256_set1_epi64x(1 << 0 | 1 << 15);
         let b = _mm256_set1_epi64x(1 << 13);
         let r = _mm256_maskz_or_epi64(0, a, b);
@@ -11447,7 +11563,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_or_epi64() {
+    const fn test_mm_or_epi64() {
         let a = _mm_set1_epi64x(1 << 0 | 1 << 15);
         let b = _mm_set1_epi64x(1 << 13);
         let r = _mm_or_epi64(a, b);
@@ -11456,7 +11572,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_or_epi64() {
+    const fn test_mm_mask_or_epi64() {
         let a = _mm_set1_epi64x(1 << 0 | 1 << 15);
         let b = _mm_set1_epi64x(1 << 13);
         let r = _mm_mask_or_epi64(a, 0, a, b);
@@ -11467,7 +11583,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_or_epi64() {
+    const fn test_mm_maskz_or_epi64() {
         let a = _mm_set1_epi64x(1 << 0 | 1 << 15);
         let b = _mm_set1_epi64x(1 << 13);
         let r = _mm_maskz_or_epi64(0, a, b);
@@ -11478,7 +11594,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_or_si512() {
+    const fn test_mm512_or_si512() {
         let a = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
         let b = _mm512_set_epi64(1 << 13, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
         let r = _mm512_or_epi64(a, b);
@@ -11491,7 +11607,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_xor_epi64() {
+    const fn test_mm512_xor_epi64() {
         let a = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
         let b = _mm512_set_epi64(1 << 13, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
         let r = _mm512_xor_epi64(a, b);
@@ -11500,7 +11616,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_xor_epi64() {
+    const fn test_mm512_mask_xor_epi64() {
         let a = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
         let b = _mm512_set_epi64(1 << 13, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
         let r = _mm512_mask_xor_epi64(a, 0, a, b);
@@ -11511,7 +11627,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_xor_epi64() {
+    const fn test_mm512_maskz_xor_epi64() {
         let a = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
         let b = _mm512_set_epi64(1 << 13, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
         let r = _mm512_maskz_xor_epi64(0, a, b);
@@ -11522,7 +11638,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_xor_epi64() {
+    const fn test_mm256_xor_epi64() {
         let a = _mm256_set1_epi64x(1 << 0 | 1 << 15);
         let b = _mm256_set1_epi64x(1 << 13);
         let r = _mm256_xor_epi64(a, b);
@@ -11531,7 +11647,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_xor_epi64() {
+    const fn test_mm256_mask_xor_epi64() {
         let a = _mm256_set1_epi64x(1 << 0 | 1 << 15);
         let b = _mm256_set1_epi64x(1 << 13);
         let r = _mm256_mask_xor_epi64(a, 0, a, b);
@@ -11542,7 +11658,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_xor_epi64() {
+    const fn test_mm256_maskz_xor_epi64() {
         let a = _mm256_set1_epi64x(1 << 0 | 1 << 15);
         let b = _mm256_set1_epi64x(1 << 13);
         let r = _mm256_maskz_xor_epi64(0, a, b);
@@ -11553,7 +11669,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_xor_epi64() {
+    const fn test_mm_xor_epi64() {
         let a = _mm_set1_epi64x(1 << 0 | 1 << 15);
         let b = _mm_set1_epi64x(1 << 13);
         let r = _mm_xor_epi64(a, b);
@@ -11562,7 +11678,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_xor_epi64() {
+    const fn test_mm_mask_xor_epi64() {
         let a = _mm_set1_epi64x(1 << 0 | 1 << 15);
         let b = _mm_set1_epi64x(1 << 13);
         let r = _mm_mask_xor_epi64(a, 0, a, b);
@@ -11573,7 +11689,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_xor_epi64() {
+    const fn test_mm_maskz_xor_epi64() {
         let a = _mm_set1_epi64x(1 << 0 | 1 << 15);
         let b = _mm_set1_epi64x(1 << 13);
         let r = _mm_maskz_xor_epi64(0, a, b);
@@ -11584,7 +11700,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_xor_si512() {
+    const fn test_mm512_xor_si512() {
         let a = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
         let b = _mm512_set_epi64(1 << 13, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
         let r = _mm512_xor_epi64(a, b);
@@ -11593,7 +11709,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_andnot_epi64() {
+    const fn test_mm512_andnot_epi64() {
         let a = _mm512_set1_epi64(0);
         let b = _mm512_set1_epi64(1 << 3 | 1 << 4);
         let r = _mm512_andnot_epi64(a, b);
@@ -11602,7 +11718,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_andnot_epi64() {
+    const fn test_mm512_mask_andnot_epi64() {
         let a = _mm512_set1_epi64(1 << 1 | 1 << 2);
         let b = _mm512_set1_epi64(1 << 3 | 1 << 4);
         let r = _mm512_mask_andnot_epi64(a, 0, a, b);
@@ -11613,7 +11729,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_andnot_epi64() {
+    const fn test_mm512_maskz_andnot_epi64() {
         let a = _mm512_set1_epi64(1 << 1 | 1 << 2);
         let b = _mm512_set1_epi64(1 << 3 | 1 << 4);
         let r = _mm512_maskz_andnot_epi64(0, a, b);
@@ -11628,7 +11744,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_andnot_epi64() {
+    const fn test_mm256_mask_andnot_epi64() {
         let a = _mm256_set1_epi64x(1 << 1 | 1 << 2);
         let b = _mm256_set1_epi64x(1 << 3 | 1 << 4);
         let r = _mm256_mask_andnot_epi64(a, 0, a, b);
@@ -11639,7 +11755,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_andnot_epi64() {
+    const fn test_mm256_maskz_andnot_epi64() {
         let a = _mm256_set1_epi64x(1 << 1 | 1 << 2);
         let b = _mm256_set1_epi64x(1 << 3 | 1 << 4);
         let r = _mm256_maskz_andnot_epi64(0, a, b);
@@ -11650,7 +11766,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_andnot_epi64() {
+    const fn test_mm_mask_andnot_epi64() {
         let a = _mm_set1_epi64x(1 << 1 | 1 << 2);
         let b = _mm_set1_epi64x(1 << 3 | 1 << 4);
         let r = _mm_mask_andnot_epi64(a, 0, a, b);
@@ -11661,7 +11777,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_andnot_epi64() {
+    const fn test_mm_maskz_andnot_epi64() {
         let a = _mm_set1_epi64x(1 << 1 | 1 << 2);
         let b = _mm_set1_epi64x(1 << 3 | 1 << 4);
         let r = _mm_maskz_andnot_epi64(0, a, b);
@@ -11672,7 +11788,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_andnot_si512() {
+    const fn test_mm512_andnot_si512() {
         let a = _mm512_set1_epi64(0);
         let b = _mm512_set1_epi64(1 << 3 | 1 << 4);
         let r = _mm512_andnot_si512(a, b);
@@ -11681,175 +11797,175 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_reduce_add_epi64() {
+    const fn test_mm512_reduce_add_epi64() {
         let a = _mm512_set1_epi64(1);
         let e: i64 = _mm512_reduce_add_epi64(a);
         assert_eq!(8, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_reduce_add_epi64() {
+    const fn test_mm512_mask_reduce_add_epi64() {
         let a = _mm512_set1_epi64(1);
         let e: i64 = _mm512_mask_reduce_add_epi64(0b11110000, a);
         assert_eq!(4, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_reduce_add_pd() {
+    const fn test_mm512_reduce_add_pd() {
         let a = _mm512_set1_pd(1.);
         let e: f64 = _mm512_reduce_add_pd(a);
         assert_eq!(8., e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_reduce_add_pd() {
+    const fn test_mm512_mask_reduce_add_pd() {
         let a = _mm512_set1_pd(1.);
         let e: f64 = _mm512_mask_reduce_add_pd(0b11110000, a);
         assert_eq!(4., e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_reduce_mul_epi64() {
+    const fn test_mm512_reduce_mul_epi64() {
         let a = _mm512_set1_epi64(2);
         let e: i64 = _mm512_reduce_mul_epi64(a);
         assert_eq!(256, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_reduce_mul_epi64() {
+    const fn test_mm512_mask_reduce_mul_epi64() {
         let a = _mm512_set1_epi64(2);
         let e: i64 = _mm512_mask_reduce_mul_epi64(0b11110000, a);
         assert_eq!(16, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_reduce_mul_pd() {
+    const fn test_mm512_reduce_mul_pd() {
         let a = _mm512_set1_pd(2.);
         let e: f64 = _mm512_reduce_mul_pd(a);
         assert_eq!(256., e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_reduce_mul_pd() {
+    const fn test_mm512_mask_reduce_mul_pd() {
         let a = _mm512_set1_pd(2.);
         let e: f64 = _mm512_mask_reduce_mul_pd(0b11110000, a);
         assert_eq!(16., e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_reduce_max_epi64() {
+    const fn test_mm512_reduce_max_epi64() {
         let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7);
         let e: i64 = _mm512_reduce_max_epi64(a);
         assert_eq!(7, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_reduce_max_epi64() {
+    const fn test_mm512_mask_reduce_max_epi64() {
         let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7);
         let e: i64 = _mm512_mask_reduce_max_epi64(0b11110000, a);
         assert_eq!(3, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_reduce_max_epu64() {
+    const fn test_mm512_reduce_max_epu64() {
         let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7);
         let e: u64 = _mm512_reduce_max_epu64(a);
         assert_eq!(7, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_reduce_max_epu64() {
+    const fn test_mm512_mask_reduce_max_epu64() {
         let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7);
         let e: u64 = _mm512_mask_reduce_max_epu64(0b11110000, a);
         assert_eq!(3, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_reduce_max_pd() {
+    fn test_mm512_reduce_max_pd() {
         let a = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.);
         let e: f64 = _mm512_reduce_max_pd(a);
         assert_eq!(7., e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_reduce_max_pd() {
+    fn test_mm512_mask_reduce_max_pd() {
         let a = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.);
         let e: f64 = _mm512_mask_reduce_max_pd(0b11110000, a);
         assert_eq!(3., e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_reduce_min_epi64() {
+    const fn test_mm512_reduce_min_epi64() {
         let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7);
         let e: i64 = _mm512_reduce_min_epi64(a);
         assert_eq!(0, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_reduce_min_epi64() {
+    const fn test_mm512_mask_reduce_min_epi64() {
         let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7);
         let e: i64 = _mm512_mask_reduce_min_epi64(0b11110000, a);
         assert_eq!(0, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_reduce_min_epu64() {
+    const fn test_mm512_reduce_min_epu64() {
         let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7);
         let e: u64 = _mm512_reduce_min_epu64(a);
         assert_eq!(0, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_reduce_min_epu64() {
+    const fn test_mm512_mask_reduce_min_epu64() {
         let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7);
         let e: u64 = _mm512_mask_reduce_min_epu64(0b11110000, a);
         assert_eq!(0, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_reduce_min_pd() {
+    fn test_mm512_reduce_min_pd() {
         let a = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.);
         let e: f64 = _mm512_reduce_min_pd(a);
         assert_eq!(0., e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_reduce_min_pd() {
+    fn test_mm512_mask_reduce_min_pd() {
         let a = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.);
         let e: f64 = _mm512_mask_reduce_min_pd(0b11110000, a);
         assert_eq!(0., e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_reduce_and_epi64() {
+    const fn test_mm512_reduce_and_epi64() {
         let a = _mm512_set_epi64(1, 1, 1, 1, 2, 2, 2, 2);
         let e: i64 = _mm512_reduce_and_epi64(a);
         assert_eq!(0, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_reduce_and_epi64() {
+    const fn test_mm512_mask_reduce_and_epi64() {
         let a = _mm512_set_epi64(1, 1, 1, 1, 2, 2, 2, 2);
         let e: i64 = _mm512_mask_reduce_and_epi64(0b11110000, a);
         assert_eq!(1, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_reduce_or_epi64() {
+    const fn test_mm512_reduce_or_epi64() {
         let a = _mm512_set_epi64(1, 1, 1, 1, 2, 2, 2, 2);
         let e: i64 = _mm512_reduce_or_epi64(a);
         assert_eq!(3, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_reduce_or_epi64() {
+    const fn test_mm512_mask_reduce_or_epi64() {
         let a = _mm512_set_epi64(1, 1, 1, 1, 2, 2, 2, 2);
         let e: i64 = _mm512_mask_reduce_or_epi64(0b11110000, a);
         assert_eq!(1, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_extractf64x4_pd() {
+    const fn test_mm512_extractf64x4_pd() {
         let a = _mm512_setr_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm512_extractf64x4_pd::<1>(a);
         let e = _mm256_setr_pd(5., 6., 7., 8.);
@@ -11857,7 +11973,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_extractf64x4_pd() {
+    const fn test_mm512_mask_extractf64x4_pd() {
         let a = _mm512_setr_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         let src = _mm256_set1_pd(100.);
         let r = _mm512_mask_extractf64x4_pd::<1>(src, 0, a);
@@ -11868,7 +11984,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_extractf64x4_pd() {
+    const fn test_mm512_maskz_extractf64x4_pd() {
         let a = _mm512_setr_pd(1., 2., 3., 4., 5., 6., 7., 8.);
         let r = _mm512_maskz_extractf64x4_pd::<1>(0, a);
         assert_eq_m256d(r, _mm256_setzero_pd());
@@ -11878,7 +11994,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_extracti64x4_epi64() {
+    const fn test_mm512_extracti64x4_epi64() {
         let a = _mm512_setr_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let r = _mm512_extracti64x4_epi64::<0x1>(a);
         let e = _mm256_setr_epi64x(5, 6, 7, 8);
@@ -11886,7 +12002,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_extracti64x4_epi64() {
+    const fn test_mm512_mask_extracti64x4_epi64() {
         let a = _mm512_setr_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let src = _mm256_set1_epi64x(100);
         let r = _mm512_mask_extracti64x4_epi64::<0x1>(src, 0, a);
@@ -11897,7 +12013,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_extracti64x4_epi64() {
+    const fn test_mm512_maskz_extracti64x4_epi64() {
         let a = _mm512_setr_epi64(1, 2, 3, 4, 5, 6, 7, 8);
         let r = _mm512_maskz_extracti64x4_epi64::<0x1>(0, a);
         assert_eq_m256i(r, _mm256_setzero_si256());
@@ -11907,7 +12023,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_compress_epi64() {
+    fn test_mm512_mask_compress_epi64() {
         let src = _mm512_set1_epi64(200);
         let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7);
         let r = _mm512_mask_compress_epi64(src, 0, a);
@@ -11918,7 +12034,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_compress_epi64() {
+    fn test_mm512_maskz_compress_epi64() {
         let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7);
         let r = _mm512_maskz_compress_epi64(0, a);
         assert_eq_m512i(r, _mm512_setzero_si512());
@@ -11928,7 +12044,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_compress_epi64() {
+    fn test_mm256_mask_compress_epi64() {
         let src = _mm256_set1_epi64x(200);
         let a = _mm256_set_epi64x(0, 1, 2, 3);
         let r = _mm256_mask_compress_epi64(src, 0, a);
@@ -11939,7 +12055,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_compress_epi64() {
+    fn test_mm256_maskz_compress_epi64() {
         let a = _mm256_set_epi64x(0, 1, 2, 3);
         let r = _mm256_maskz_compress_epi64(0, a);
         assert_eq_m256i(r, _mm256_setzero_si256());
@@ -11949,7 +12065,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_compress_epi64() {
+    fn test_mm_mask_compress_epi64() {
         let src = _mm_set1_epi64x(200);
         let a = _mm_set_epi64x(0, 1);
         let r = _mm_mask_compress_epi64(src, 0, a);
@@ -11960,7 +12076,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_compress_epi64() {
+    fn test_mm_maskz_compress_epi64() {
         let a = _mm_set_epi64x(0, 1);
         let r = _mm_maskz_compress_epi64(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -11970,7 +12086,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_compress_pd() {
+    fn test_mm512_mask_compress_pd() {
         let src = _mm512_set1_pd(200.);
         let a = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.);
         let r = _mm512_mask_compress_pd(src, 0, a);
@@ -11981,7 +12097,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_compress_pd() {
+    fn test_mm512_maskz_compress_pd() {
         let a = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.);
         let r = _mm512_maskz_compress_pd(0, a);
         assert_eq_m512d(r, _mm512_setzero_pd());
@@ -11991,7 +12107,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_compress_pd() {
+    fn test_mm256_mask_compress_pd() {
         let src = _mm256_set1_pd(200.);
         let a = _mm256_set_pd(0., 1., 2., 3.);
         let r = _mm256_mask_compress_pd(src, 0, a);
@@ -12002,7 +12118,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_compress_pd() {
+    fn test_mm256_maskz_compress_pd() {
         let a = _mm256_set_pd(0., 1., 2., 3.);
         let r = _mm256_maskz_compress_pd(0, a);
         assert_eq_m256d(r, _mm256_setzero_pd());
@@ -12012,7 +12128,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_compress_pd() {
+    fn test_mm_mask_compress_pd() {
         let src = _mm_set1_pd(200.);
         let a = _mm_set_pd(0., 1.);
         let r = _mm_mask_compress_pd(src, 0, a);
@@ -12023,7 +12139,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_compress_pd() {
+    fn test_mm_maskz_compress_pd() {
         let a = _mm_set_pd(0., 1.);
         let r = _mm_maskz_compress_pd(0, a);
         assert_eq_m128d(r, _mm_setzero_pd());
@@ -12033,7 +12149,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_expand_epi64() {
+    fn test_mm512_mask_expand_epi64() {
         let src = _mm512_set1_epi64(200);
         let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7);
         let r = _mm512_mask_expand_epi64(src, 0, a);
@@ -12044,7 +12160,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_expand_epi64() {
+    fn test_mm512_maskz_expand_epi64() {
         let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7);
         let r = _mm512_maskz_expand_epi64(0, a);
         assert_eq_m512i(r, _mm512_setzero_si512());
@@ -12054,7 +12170,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_expand_epi64() {
+    fn test_mm256_mask_expand_epi64() {
         let src = _mm256_set1_epi64x(200);
         let a = _mm256_set_epi64x(0, 1, 2, 3);
         let r = _mm256_mask_expand_epi64(src, 0, a);
@@ -12065,7 +12181,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_expand_epi64() {
+    fn test_mm256_maskz_expand_epi64() {
         let a = _mm256_set_epi64x(0, 1, 2, 3);
         let r = _mm256_maskz_expand_epi64(0, a);
         assert_eq_m256i(r, _mm256_setzero_si256());
@@ -12075,7 +12191,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_expand_epi64() {
+    fn test_mm_mask_expand_epi64() {
         let src = _mm_set1_epi64x(200);
         let a = _mm_set_epi64x(0, 1);
         let r = _mm_mask_expand_epi64(src, 0, a);
@@ -12086,7 +12202,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_expand_epi64() {
+    fn test_mm_maskz_expand_epi64() {
         let a = _mm_set_epi64x(0, 1);
         let r = _mm_maskz_expand_epi64(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -12096,7 +12212,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_expand_pd() {
+    fn test_mm512_mask_expand_pd() {
         let src = _mm512_set1_pd(200.);
         let a = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.);
         let r = _mm512_mask_expand_pd(src, 0, a);
@@ -12107,7 +12223,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_expand_pd() {
+    fn test_mm512_maskz_expand_pd() {
         let a = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.);
         let r = _mm512_maskz_expand_pd(0, a);
         assert_eq_m512d(r, _mm512_setzero_pd());
@@ -12117,7 +12233,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_expand_pd() {
+    fn test_mm256_mask_expand_pd() {
         let src = _mm256_set1_pd(200.);
         let a = _mm256_set_pd(0., 1., 2., 3.);
         let r = _mm256_mask_expand_pd(src, 0, a);
@@ -12128,7 +12244,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_expand_pd() {
+    fn test_mm256_maskz_expand_pd() {
         let a = _mm256_set_pd(0., 1., 2., 3.);
         let r = _mm256_maskz_expand_pd(0, a);
         assert_eq_m256d(r, _mm256_setzero_pd());
@@ -12138,7 +12254,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_expand_pd() {
+    fn test_mm_mask_expand_pd() {
         let src = _mm_set1_pd(200.);
         let a = _mm_set_pd(0., 1.);
         let r = _mm_mask_expand_pd(src, 0, a);
@@ -12149,7 +12265,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_expand_pd() {
+    fn test_mm_maskz_expand_pd() {
         let a = _mm_set_pd(0., 1.);
         let r = _mm_maskz_expand_pd(0, a);
         assert_eq_m128d(r, _mm_setzero_pd());
@@ -12159,100 +12275,116 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_loadu_epi64() {
+    const fn test_mm512_loadu_epi64() {
         let a = &[4, 3, 2, 5, -8, -9, -64, -50];
         let p = a.as_ptr();
-        let r = _mm512_loadu_epi64(black_box(p));
+        let r = unsafe { _mm512_loadu_epi64(black_box(p)) };
         let e = _mm512_setr_epi64(4, 3, 2, 5, -8, -9, -64, -50);
         assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_loadu_epi64() {
+    const fn test_mm256_loadu_epi64() {
         let a = &[4, 3, 2, 5];
         let p = a.as_ptr();
-        let r = _mm256_loadu_epi64(black_box(p));
+        let r = unsafe { _mm256_loadu_epi64(black_box(p)) };
         let e = _mm256_setr_epi64x(4, 3, 2, 5);
         assert_eq_m256i(r, e);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_loadu_epi64() {
+    const fn test_mm_loadu_epi64() {
         let a = &[4, 3];
         let p = a.as_ptr();
-        let r = _mm_loadu_epi64(black_box(p));
+        let r = unsafe { _mm_loadu_epi64(black_box(p)) };
         let e = _mm_setr_epi64x(4, 3);
         assert_eq_m128i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cvtepi64_storeu_epi16() {
+    fn test_mm512_mask_cvtepi64_storeu_epi16() {
         let a = _mm512_set1_epi64(9);
         let mut r = _mm_undefined_si128();
-        _mm512_mask_cvtepi64_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a);
+        unsafe {
+            _mm512_mask_cvtepi64_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a);
+        }
         let e = _mm_set1_epi16(9);
         assert_eq_m128i(r, e);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_cvtepi64_storeu_epi16() {
+    fn test_mm256_mask_cvtepi64_storeu_epi16() {
         let a = _mm256_set1_epi64x(9);
         let mut r = _mm_set1_epi16(0);
-        _mm256_mask_cvtepi64_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a);
+        unsafe {
+            _mm256_mask_cvtepi64_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a);
+        }
         let e = _mm_set_epi16(0, 0, 0, 0, 9, 9, 9, 9);
         assert_eq_m128i(r, e);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_cvtepi64_storeu_epi16() {
+    fn test_mm_mask_cvtepi64_storeu_epi16() {
         let a = _mm_set1_epi64x(9);
         let mut r = _mm_set1_epi16(0);
-        _mm_mask_cvtepi64_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a);
+        unsafe {
+            _mm_mask_cvtepi64_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a);
+        }
         let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 9, 9);
         assert_eq_m128i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cvtsepi64_storeu_epi16() {
+    fn test_mm512_mask_cvtsepi64_storeu_epi16() {
         let a = _mm512_set1_epi64(i64::MAX);
         let mut r = _mm_undefined_si128();
-        _mm512_mask_cvtsepi64_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a);
+        unsafe {
+            _mm512_mask_cvtsepi64_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a);
+        }
         let e = _mm_set1_epi16(i16::MAX);
         assert_eq_m128i(r, e);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_cvtsepi64_storeu_epi16() {
+    fn test_mm256_mask_cvtsepi64_storeu_epi16() {
         let a = _mm256_set1_epi64x(i64::MAX);
         let mut r = _mm_set1_epi16(0);
-        _mm256_mask_cvtsepi64_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a);
+        unsafe {
+            _mm256_mask_cvtsepi64_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a);
+        }
         let e = _mm_set_epi16(0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
         assert_eq_m128i(r, e);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_cvtsepi64_storeu_epi16() {
+    fn test_mm_mask_cvtsepi64_storeu_epi16() {
         let a = _mm_set1_epi64x(i64::MAX);
         let mut r = _mm_set1_epi16(0);
-        _mm_mask_cvtsepi64_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a);
+        unsafe {
+            _mm_mask_cvtsepi64_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a);
+        }
         let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, i16::MAX, i16::MAX);
         assert_eq_m128i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cvtusepi64_storeu_epi16() {
+    fn test_mm512_mask_cvtusepi64_storeu_epi16() {
         let a = _mm512_set1_epi64(i64::MAX);
         let mut r = _mm_undefined_si128();
-        _mm512_mask_cvtusepi64_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a);
+        unsafe {
+            _mm512_mask_cvtusepi64_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a);
+        }
         let e = _mm_set1_epi16(u16::MAX as i16);
         assert_eq_m128i(r, e);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_cvtusepi64_storeu_epi16() {
+    fn test_mm256_mask_cvtusepi64_storeu_epi16() {
         let a = _mm256_set1_epi64x(i64::MAX);
         let mut r = _mm_set1_epi16(0);
-        _mm256_mask_cvtusepi64_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a);
+        unsafe {
+            _mm256_mask_cvtusepi64_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a);
+        }
         let e = _mm_set_epi16(
             0,
             0,
@@ -12267,46 +12399,56 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_cvtusepi64_storeu_epi16() {
+    fn test_mm_mask_cvtusepi64_storeu_epi16() {
         let a = _mm_set1_epi64x(i64::MAX);
         let mut r = _mm_set1_epi16(0);
-        _mm_mask_cvtusepi64_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a);
+        unsafe {
+            _mm_mask_cvtusepi64_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a);
+        }
         let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, u16::MAX as i16, u16::MAX as i16);
         assert_eq_m128i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cvtepi64_storeu_epi8() {
+    fn test_mm512_mask_cvtepi64_storeu_epi8() {
         let a = _mm512_set1_epi64(9);
         let mut r = _mm_set1_epi8(0);
-        _mm512_mask_cvtepi64_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
+        unsafe {
+            _mm512_mask_cvtepi64_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
+        }
         let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 9, 9, 9, 9, 9, 9, 9, 9);
         assert_eq_m128i(r, e);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_cvtepi64_storeu_epi8() {
+    fn test_mm256_mask_cvtepi64_storeu_epi8() {
         let a = _mm256_set1_epi64x(9);
         let mut r = _mm_set1_epi8(0);
-        _mm256_mask_cvtepi64_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
+        unsafe {
+            _mm256_mask_cvtepi64_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
+        }
         let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 9, 9, 9);
         assert_eq_m128i(r, e);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_cvtepi64_storeu_epi8() {
+    fn test_mm_mask_cvtepi64_storeu_epi8() {
         let a = _mm_set1_epi64x(9);
         let mut r = _mm_set1_epi8(0);
-        _mm_mask_cvtepi64_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
+        unsafe {
+            _mm_mask_cvtepi64_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
+        }
         let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 9);
         assert_eq_m128i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cvtsepi64_storeu_epi8() {
+    fn test_mm512_mask_cvtsepi64_storeu_epi8() {
         let a = _mm512_set1_epi64(i64::MAX);
         let mut r = _mm_set1_epi8(0);
-        _mm512_mask_cvtsepi64_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
+        unsafe {
+            _mm512_mask_cvtsepi64_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
+        }
         #[rustfmt::skip]
         let e = _mm_set_epi8(
             0, 0, 0, 0,
@@ -12318,10 +12460,12 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_cvtsepi64_storeu_epi8() {
+    fn test_mm256_mask_cvtsepi64_storeu_epi8() {
         let a = _mm256_set1_epi64x(i64::MAX);
         let mut r = _mm_set1_epi8(0);
-        _mm256_mask_cvtsepi64_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
+        unsafe {
+            _mm256_mask_cvtsepi64_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
+        }
         #[rustfmt::skip]
         let e = _mm_set_epi8(
             0, 0, 0, 0,
@@ -12333,19 +12477,23 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_cvtsepi64_storeu_epi8() {
+    fn test_mm_mask_cvtsepi64_storeu_epi8() {
         let a = _mm_set1_epi64x(i64::MAX);
         let mut r = _mm_set1_epi8(0);
-        _mm_mask_cvtsepi64_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
+        unsafe {
+            _mm_mask_cvtsepi64_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
+        }
         let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX);
         assert_eq_m128i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cvtusepi64_storeu_epi8() {
+    fn test_mm512_mask_cvtusepi64_storeu_epi8() {
         let a = _mm512_set1_epi64(i64::MAX);
         let mut r = _mm_set1_epi8(0);
-        _mm512_mask_cvtusepi64_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
+        unsafe {
+            _mm512_mask_cvtusepi64_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
+        }
         #[rustfmt::skip]
         let e = _mm_set_epi8(
             0, 0, 0, 0,
@@ -12357,10 +12505,12 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_cvtusepi64_storeu_epi8() {
+    fn test_mm256_mask_cvtusepi64_storeu_epi8() {
         let a = _mm256_set1_epi64x(i64::MAX);
         let mut r = _mm_set1_epi8(0);
-        _mm256_mask_cvtusepi64_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
+        unsafe {
+            _mm256_mask_cvtusepi64_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
+        }
         #[rustfmt::skip]
         let e = _mm_set_epi8(
             0, 0, 0, 0,
@@ -12372,10 +12522,12 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_cvtusepi64_storeu_epi8() {
+    fn test_mm_mask_cvtusepi64_storeu_epi8() {
         let a = _mm_set1_epi64x(i64::MAX);
         let mut r = _mm_set1_epi8(0);
-        _mm_mask_cvtusepi64_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
+        unsafe {
+            _mm_mask_cvtusepi64_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
+        }
         #[rustfmt::skip]
         let e = _mm_set_epi8(
             0, 0, 0, 0,
@@ -12387,112 +12539,136 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cvtepi64_storeu_epi32() {
+    fn test_mm512_mask_cvtepi64_storeu_epi32() {
         let a = _mm512_set1_epi64(9);
         let mut r = _mm256_undefined_si256();
-        _mm512_mask_cvtepi64_storeu_epi32(&mut r as *mut _ as *mut i32, 0b11111111, a);
+        unsafe {
+            _mm512_mask_cvtepi64_storeu_epi32(&mut r as *mut _ as *mut i32, 0b11111111, a);
+        }
         let e = _mm256_set1_epi32(9);
         assert_eq_m256i(r, e);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_cvtepi64_storeu_epi32() {
+    fn test_mm256_mask_cvtepi64_storeu_epi32() {
         let a = _mm256_set1_epi64x(9);
         let mut r = _mm_set1_epi32(0);
-        _mm256_mask_cvtepi64_storeu_epi32(&mut r as *mut _ as *mut i32, 0b11111111, a);
+        unsafe {
+            _mm256_mask_cvtepi64_storeu_epi32(&mut r as *mut _ as *mut i32, 0b11111111, a);
+        }
         let e = _mm_set_epi32(9, 9, 9, 9);
         assert_eq_m128i(r, e);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_cvtepi64_storeu_epi32() {
+    fn test_mm_mask_cvtepi64_storeu_epi32() {
         let a = _mm_set1_epi64x(9);
         let mut r = _mm_set1_epi16(0);
-        _mm_mask_cvtepi64_storeu_epi32(&mut r as *mut _ as *mut i32, 0b11111111, a);
+        unsafe {
+            _mm_mask_cvtepi64_storeu_epi32(&mut r as *mut _ as *mut i32, 0b11111111, a);
+        }
         let e = _mm_set_epi32(0, 0, 9, 9);
         assert_eq_m128i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cvtsepi64_storeu_epi32() {
+    fn test_mm512_mask_cvtsepi64_storeu_epi32() {
         let a = _mm512_set1_epi64(i64::MAX);
         let mut r = _mm256_undefined_si256();
-        _mm512_mask_cvtsepi64_storeu_epi32(&mut r as *mut _ as *mut i32, 0b11111111, a);
+        unsafe {
+            _mm512_mask_cvtsepi64_storeu_epi32(&mut r as *mut _ as *mut i32, 0b11111111, a);
+        }
         let e = _mm256_set1_epi32(i32::MAX);
         assert_eq_m256i(r, e);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_cvtsepi64_storeu_epi32() {
+    fn test_mm256_mask_cvtsepi64_storeu_epi32() {
         let a = _mm256_set1_epi64x(i64::MAX);
         let mut r = _mm_set1_epi32(0);
-        _mm256_mask_cvtsepi64_storeu_epi32(&mut r as *mut _ as *mut i32, 0b00001111, a);
+        unsafe {
+            _mm256_mask_cvtsepi64_storeu_epi32(&mut r as *mut _ as *mut i32, 0b00001111, a);
+        }
         let e = _mm_set1_epi32(i32::MAX);
         assert_eq_m128i(r, e);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_cvtsepi64_storeu_epi32() {
+    fn test_mm_mask_cvtsepi64_storeu_epi32() {
         let a = _mm_set1_epi64x(i64::MAX);
         let mut r = _mm_set1_epi16(0);
-        _mm_mask_cvtsepi64_storeu_epi32(&mut r as *mut _ as *mut i32, 0b00000011, a);
+        unsafe {
+            _mm_mask_cvtsepi64_storeu_epi32(&mut r as *mut _ as *mut i32, 0b00000011, a);
+        }
         let e = _mm_set_epi32(0, 0, i32::MAX, i32::MAX);
         assert_eq_m128i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_cvtusepi64_storeu_epi32() {
+    fn test_mm512_mask_cvtusepi64_storeu_epi32() {
         let a = _mm512_set1_epi64(i64::MAX);
         let mut r = _mm256_undefined_si256();
-        _mm512_mask_cvtusepi64_storeu_epi32(&mut r as *mut _ as *mut i32, 0b11111111, a);
+        unsafe {
+            _mm512_mask_cvtusepi64_storeu_epi32(&mut r as *mut _ as *mut i32, 0b11111111, a);
+        }
         let e = _mm256_set1_epi32(u32::MAX as i32);
         assert_eq_m256i(r, e);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_cvtusepi64_storeu_epi32() {
+    fn test_mm256_mask_cvtusepi64_storeu_epi32() {
         let a = _mm256_set1_epi64x(i64::MAX);
         let mut r = _mm_set1_epi32(0);
-        _mm256_mask_cvtusepi64_storeu_epi32(&mut r as *mut _ as *mut i32, 0b00001111, a);
+        unsafe {
+            _mm256_mask_cvtusepi64_storeu_epi32(&mut r as *mut _ as *mut i32, 0b00001111, a);
+        }
         let e = _mm_set1_epi32(u32::MAX as i32);
         assert_eq_m128i(r, e);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_cvtusepi64_storeu_epi32() {
+    fn test_mm_mask_cvtusepi64_storeu_epi32() {
         let a = _mm_set1_epi64x(i64::MAX);
         let mut r = _mm_set1_epi16(0);
-        _mm_mask_cvtusepi64_storeu_epi32(&mut r as *mut _ as *mut i32, 0b00000011, a);
+        unsafe {
+            _mm_mask_cvtusepi64_storeu_epi32(&mut r as *mut _ as *mut i32, 0b00000011, a);
+        }
         let e = _mm_set_epi32(0, 0, u32::MAX as i32, u32::MAX as i32);
         assert_eq_m128i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_storeu_epi64() {
+    const fn test_mm512_storeu_epi64() {
         let a = _mm512_set1_epi64(9);
         let mut r = _mm512_set1_epi64(0);
-        _mm512_storeu_epi64(&mut r as *mut _ as *mut i64, a);
+        unsafe {
+            _mm512_storeu_epi64(&mut r as *mut _ as *mut i64, a);
+        }
         assert_eq_m512i(r, a);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_storeu_epi64() {
+    const fn test_mm256_storeu_epi64() {
         let a = _mm256_set1_epi64x(9);
         let mut r = _mm256_set1_epi64x(0);
-        _mm256_storeu_epi64(&mut r as *mut _ as *mut i64, a);
+        unsafe {
+            _mm256_storeu_epi64(&mut r as *mut _ as *mut i64, a);
+        }
         assert_eq_m256i(r, a);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_storeu_epi64() {
+    const fn test_mm_storeu_epi64() {
         let a = _mm_set1_epi64x(9);
         let mut r = _mm_set1_epi64x(0);
-        _mm_storeu_epi64(&mut r as *mut _ as *mut i64, a);
+        unsafe {
+            _mm_storeu_epi64(&mut r as *mut _ as *mut i64, a);
+        }
         assert_eq_m128i(r, a);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_load_epi64() {
+    const fn test_mm512_load_epi64() {
         #[repr(align(64))]
         struct Align {
             data: [i64; 8], // 64 bytes
@@ -12501,63 +12677,69 @@ mod tests {
             data: [4, 3, 2, 5, -8, -9, -64, -50],
         };
         let p = (a.data).as_ptr();
-        let r = _mm512_load_epi64(black_box(p));
+        let r = unsafe { _mm512_load_epi64(black_box(p)) };
         let e = _mm512_setr_epi64(4, 3, 2, 5, -8, -9, -64, -50);
         assert_eq_m512i(r, e);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_load_epi64() {
+    const fn test_mm256_load_epi64() {
         #[repr(align(64))]
         struct Align {
             data: [i64; 4],
         }
         let a = Align { data: [4, 3, 2, 5] };
         let p = (a.data).as_ptr();
-        let r = _mm256_load_epi64(black_box(p));
+        let r = unsafe { _mm256_load_epi64(black_box(p)) };
         let e = _mm256_set_epi64x(5, 2, 3, 4);
         assert_eq_m256i(r, e);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_load_epi64() {
+    const fn test_mm_load_epi64() {
         #[repr(align(64))]
         struct Align {
             data: [i64; 2],
         }
         let a = Align { data: [4, 3] };
         let p = (a.data).as_ptr();
-        let r = _mm_load_epi64(black_box(p));
+        let r = unsafe { _mm_load_epi64(black_box(p)) };
         let e = _mm_set_epi64x(3, 4);
         assert_eq_m128i(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_store_epi64() {
+    const fn test_mm512_store_epi64() {
         let a = _mm512_set1_epi64(9);
         let mut r = _mm512_set1_epi64(0);
-        _mm512_store_epi64(&mut r as *mut _ as *mut i64, a);
+        unsafe {
+            _mm512_store_epi64(&mut r as *mut _ as *mut i64, a);
+        }
         assert_eq_m512i(r, a);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_store_epi64() {
+    const fn test_mm256_store_epi64() {
         let a = _mm256_set1_epi64x(9);
         let mut r = _mm256_set1_epi64x(0);
-        _mm256_store_epi64(&mut r as *mut _ as *mut i64, a);
+        unsafe {
+            _mm256_store_epi64(&mut r as *mut _ as *mut i64, a);
+        }
         assert_eq_m256i(r, a);
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_store_epi64() {
+    const fn test_mm_store_epi64() {
         let a = _mm_set1_epi64x(9);
         let mut r = _mm_set1_epi64x(0);
-        _mm_store_epi64(&mut r as *mut _ as *mut i64, a);
+        unsafe {
+            _mm_store_epi64(&mut r as *mut _ as *mut i64, a);
+        }
         assert_eq_m128i(r, a);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_load_pd() {
+    const fn test_mm512_load_pd() {
         #[repr(align(64))]
         struct Align {
             data: [f64; 8], // 64 bytes
@@ -12566,21 +12748,23 @@ mod tests {
             data: [4., 3., 2., 5., -8., -9., -64., -50.],
         };
         let p = (a.data).as_ptr();
-        let r = _mm512_load_pd(black_box(p));
+        let r = unsafe { _mm512_load_pd(black_box(p)) };
         let e = _mm512_setr_pd(4., 3., 2., 5., -8., -9., -64., -50.);
         assert_eq_m512d(r, e);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_store_pd() {
+    const fn test_mm512_store_pd() {
         let a = _mm512_set1_pd(9.);
         let mut r = _mm512_undefined_pd();
-        _mm512_store_pd(&mut r as *mut _ as *mut f64, a);
+        unsafe {
+            _mm512_store_pd(&mut r as *mut _ as *mut f64, a);
+        }
         assert_eq_m512d(r, a);
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_test_epi64_mask() {
+    const fn test_mm512_test_epi64_mask() {
         let a = _mm512_set1_epi64(1 << 0);
         let b = _mm512_set1_epi64(1 << 0 | 1 << 1);
         let r = _mm512_test_epi64_mask(a, b);
@@ -12589,7 +12773,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_test_epi64_mask() {
+    const fn test_mm512_mask_test_epi64_mask() {
         let a = _mm512_set1_epi64(1 << 0);
         let b = _mm512_set1_epi64(1 << 0 | 1 << 1);
         let r = _mm512_mask_test_epi64_mask(0, a, b);
@@ -12600,7 +12784,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_test_epi64_mask() {
+    const fn test_mm256_test_epi64_mask() {
         let a = _mm256_set1_epi64x(1 << 0);
         let b = _mm256_set1_epi64x(1 << 0 | 1 << 1);
         let r = _mm256_test_epi64_mask(a, b);
@@ -12609,7 +12793,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_test_epi64_mask() {
+    const fn test_mm256_mask_test_epi64_mask() {
         let a = _mm256_set1_epi64x(1 << 0);
         let b = _mm256_set1_epi64x(1 << 0 | 1 << 1);
         let r = _mm256_mask_test_epi64_mask(0, a, b);
@@ -12620,7 +12804,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_test_epi64_mask() {
+    const fn test_mm_test_epi64_mask() {
         let a = _mm_set1_epi64x(1 << 0);
         let b = _mm_set1_epi64x(1 << 0 | 1 << 1);
         let r = _mm_test_epi64_mask(a, b);
@@ -12629,7 +12813,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_test_epi64_mask() {
+    const fn test_mm_mask_test_epi64_mask() {
         let a = _mm_set1_epi64x(1 << 0);
         let b = _mm_set1_epi64x(1 << 0 | 1 << 1);
         let r = _mm_mask_test_epi64_mask(0, a, b);
@@ -12640,7 +12824,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_testn_epi64_mask() {
+    const fn test_mm512_testn_epi64_mask() {
         let a = _mm512_set1_epi64(1 << 0);
         let b = _mm512_set1_epi64(1 << 0 | 1 << 1);
         let r = _mm512_testn_epi64_mask(a, b);
@@ -12649,7 +12833,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_testn_epi64_mask() {
+    const fn test_mm512_mask_testn_epi64_mask() {
         let a = _mm512_set1_epi64(1 << 0);
         let b = _mm512_set1_epi64(1 << 1);
         let r = _mm512_mask_testn_epi64_mask(0, a, b);
@@ -12660,7 +12844,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_testn_epi64_mask() {
+    const fn test_mm256_testn_epi64_mask() {
         let a = _mm256_set1_epi64x(1 << 0);
         let b = _mm256_set1_epi64x(1 << 1);
         let r = _mm256_testn_epi64_mask(a, b);
@@ -12669,7 +12853,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_testn_epi64_mask() {
+    const fn test_mm256_mask_testn_epi64_mask() {
         let a = _mm256_set1_epi64x(1 << 0);
         let b = _mm256_set1_epi64x(1 << 1);
         let r = _mm256_mask_testn_epi64_mask(0, a, b);
@@ -12680,7 +12864,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_testn_epi64_mask() {
+    const fn test_mm_testn_epi64_mask() {
         let a = _mm_set1_epi64x(1 << 0);
         let b = _mm_set1_epi64x(1 << 1);
         let r = _mm_testn_epi64_mask(a, b);
@@ -12689,7 +12873,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_testn_epi64_mask() {
+    const fn test_mm_mask_testn_epi64_mask() {
         let a = _mm_set1_epi64x(1 << 0);
         let b = _mm_set1_epi64x(1 << 1);
         let r = _mm_mask_testn_epi64_mask(0, a, b);
@@ -12700,7 +12884,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_mask_set1_epi64() {
+    const fn test_mm512_mask_set1_epi64() {
         let src = _mm512_set1_epi64(2);
         let a: i64 = 11;
         let r = _mm512_mask_set1_epi64(src, 0, a);
@@ -12711,7 +12895,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm512_maskz_set1_epi64() {
+    const fn test_mm512_maskz_set1_epi64() {
         let a: i64 = 11;
         let r = _mm512_maskz_set1_epi64(0, a);
         assert_eq_m512i(r, _mm512_setzero_si512());
@@ -12721,7 +12905,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_mask_set1_epi64() {
+    const fn test_mm256_mask_set1_epi64() {
         let src = _mm256_set1_epi64x(2);
         let a: i64 = 11;
         let r = _mm256_mask_set1_epi64(src, 0, a);
@@ -12732,7 +12916,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm256_maskz_set1_epi64() {
+    const fn test_mm256_maskz_set1_epi64() {
         let a: i64 = 11;
         let r = _mm256_maskz_set1_epi64(0, a);
         assert_eq_m256i(r, _mm256_setzero_si256());
@@ -12742,7 +12926,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_mask_set1_epi64() {
+    const fn test_mm_mask_set1_epi64() {
         let src = _mm_set1_epi64x(2);
         let a: i64 = 11;
         let r = _mm_mask_set1_epi64(src, 0, a);
@@ -12753,7 +12937,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f,avx512vl")]
-    unsafe fn test_mm_maskz_set1_epi64() {
+    const fn test_mm_maskz_set1_epi64() {
         let a: i64 = 11;
         let r = _mm_maskz_set1_epi64(0, a);
         assert_eq_m128i(r, _mm_setzero_si128());
@@ -12763,7 +12947,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_cvtsd_i64() {
+    fn test_mm_cvtsd_i64() {
         let a = _mm_set_pd(1., -1.5);
         let r = _mm_cvtsd_i64(a);
         let e: i64 = -2;
@@ -12771,7 +12955,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_cvtss_i64() {
+    fn test_mm_cvtss_i64() {
         let a = _mm_set_ps(0., -0.5, 1., -1.5);
         let r = _mm_cvtss_i64(a);
         let e: i64 = -2;
@@ -12779,7 +12963,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_cvt_roundi64_ss() {
+    fn test_mm_cvt_roundi64_ss() {
         let a = _mm_set_ps(0., -0.5, 1., -1.5);
         let b: i64 = 9;
         let r = _mm_cvt_roundi64_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
@@ -12788,7 +12972,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_cvt_roundsi64_ss() {
+    fn test_mm_cvt_roundsi64_ss() {
         let a = _mm_set_ps(0., -0.5, 1., -1.5);
         let b: i64 = 9;
         let r = _mm_cvt_roundsi64_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
@@ -12797,7 +12981,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_cvti64_ss() {
+    const fn test_mm_cvti64_ss() {
         let a = _mm_set_ps(0., -0.5, 1., -1.5);
         let b: i64 = 9;
         let r = _mm_cvti64_ss(a, b);
@@ -12806,7 +12990,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_cvti64_sd() {
+    const fn test_mm_cvti64_sd() {
         let a = _mm_set_pd(1., -1.5);
         let b: i64 = 9;
         let r = _mm_cvti64_sd(a, b);
@@ -12815,7 +12999,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_cvt_roundsd_si64() {
+    fn test_mm_cvt_roundsd_si64() {
         let a = _mm_set_pd(1., -1.5);
         let r = _mm_cvt_roundsd_si64::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a);
         let e: i64 = -1;
@@ -12823,7 +13007,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_cvt_roundsd_i64() {
+    fn test_mm_cvt_roundsd_i64() {
         let a = _mm_set_pd(1., -1.5);
         let r = _mm_cvt_roundsd_i64::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a);
         let e: i64 = -1;
@@ -12831,7 +13015,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_cvt_roundsd_u64() {
+    fn test_mm_cvt_roundsd_u64() {
         let a = _mm_set_pd(1., f64::MAX);
         let r = _mm_cvt_roundsd_u64::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a);
         let e: u64 = u64::MAX;
@@ -12839,7 +13023,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_cvtsd_u64() {
+    fn test_mm_cvtsd_u64() {
         let a = _mm_set_pd(1., -1.5);
         let r = _mm_cvtsd_u64(a);
         let e: u64 = u64::MAX;
@@ -12847,7 +13031,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_cvt_roundss_i64() {
+    fn test_mm_cvt_roundss_i64() {
         let a = _mm_set_ps(0., -0.5, 1., -1.5);
         let r = _mm_cvt_roundss_i64::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a);
         let e: i64 = -1;
@@ -12855,7 +13039,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_cvt_roundss_si64() {
+    fn test_mm_cvt_roundss_si64() {
         let a = _mm_set_ps(0., -0.5, 1., -1.5);
         let r = _mm_cvt_roundss_si64::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a);
         let e: i64 = -1;
@@ -12863,7 +13047,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_cvt_roundss_u64() {
+    fn test_mm_cvt_roundss_u64() {
         let a = _mm_set_ps(0., -0.5, 1., -1.5);
         let r = _mm_cvt_roundss_u64::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a);
         let e: u64 = u64::MAX;
@@ -12871,7 +13055,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_cvtss_u64() {
+    fn test_mm_cvtss_u64() {
         let a = _mm_set_ps(0., -0.5, 1., -1.5);
         let r = _mm_cvtss_u64(a);
         let e: u64 = u64::MAX;
@@ -12879,7 +13063,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_cvttsd_i64() {
+    fn test_mm_cvttsd_i64() {
         let a = _mm_set_pd(1., -1.5);
         let r = _mm_cvttsd_i64(a);
         let e: i64 = -1;
@@ -12887,7 +13071,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_cvtt_roundsd_i64() {
+    fn test_mm_cvtt_roundsd_i64() {
         let a = _mm_set_pd(1., -1.5);
         let r = _mm_cvtt_roundsd_i64::<_MM_FROUND_NO_EXC>(a);
         let e: i64 = -1;
@@ -12895,7 +13079,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_cvtt_roundsd_si64() {
+    fn test_mm_cvtt_roundsd_si64() {
         let a = _mm_set_pd(1., -1.5);
         let r = _mm_cvtt_roundsd_si64::<_MM_FROUND_NO_EXC>(a);
         let e: i64 = -1;
@@ -12903,7 +13087,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_cvtt_roundsd_u64() {
+    fn test_mm_cvtt_roundsd_u64() {
         let a = _mm_set_pd(1., -1.5);
         let r = _mm_cvtt_roundsd_u64::<_MM_FROUND_NO_EXC>(a);
         let e: u64 = u64::MAX;
@@ -12911,7 +13095,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_cvttsd_u64() {
+    fn test_mm_cvttsd_u64() {
         let a = _mm_set_pd(1., -1.5);
         let r = _mm_cvttsd_u64(a);
         let e: u64 = u64::MAX;
@@ -12919,7 +13103,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_cvttss_i64() {
+    fn test_mm_cvttss_i64() {
         let a = _mm_set_ps(0., -0.5, 1., -1.5);
         let r = _mm_cvttss_i64(a);
         let e: i64 = -1;
@@ -12927,7 +13111,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_cvtt_roundss_i64() {
+    fn test_mm_cvtt_roundss_i64() {
         let a = _mm_set_ps(0., -0.5, 1., -1.5);
         let r = _mm_cvtt_roundss_i64::<_MM_FROUND_NO_EXC>(a);
         let e: i64 = -1;
@@ -12935,7 +13119,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_cvtt_roundss_si64() {
+    fn test_mm_cvtt_roundss_si64() {
         let a = _mm_set_ps(0., -0.5, 1., -1.5);
         let r = _mm_cvtt_roundss_si64::<_MM_FROUND_NO_EXC>(a);
         let e: i64 = -1;
@@ -12943,7 +13127,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_cvtt_roundss_u64() {
+    fn test_mm_cvtt_roundss_u64() {
         let a = _mm_set_ps(0., -0.5, 1., -1.5);
         let r = _mm_cvtt_roundss_u64::<_MM_FROUND_NO_EXC>(a);
         let e: u64 = u64::MAX;
@@ -12951,7 +13135,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_cvttss_u64() {
+    fn test_mm_cvttss_u64() {
         let a = _mm_set_ps(0., -0.5, 1., -1.5);
         let r = _mm_cvttss_u64(a);
         let e: u64 = u64::MAX;
@@ -12959,7 +13143,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_cvtu64_ss() {
+    const fn test_mm_cvtu64_ss() {
         let a = _mm_set_ps(0., -0.5, 1., -1.5);
         let b: u64 = 9;
         let r = _mm_cvtu64_ss(a, b);
@@ -12968,7 +13152,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_cvtu64_sd() {
+    const fn test_mm_cvtu64_sd() {
         let a = _mm_set_pd(1., -1.5);
         let b: u64 = 9;
         let r = _mm_cvtu64_sd(a, b);
@@ -12977,7 +13161,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_cvt_roundu64_ss() {
+    fn test_mm_cvt_roundu64_ss() {
         let a = _mm_set_ps(0., -0.5, 1., -1.5);
         let b: u64 = 9;
         let r = _mm_cvt_roundu64_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
@@ -12986,7 +13170,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_cvt_roundu64_sd() {
+    fn test_mm_cvt_roundu64_sd() {
         let a = _mm_set_pd(1., -1.5);
         let b: u64 = 9;
         let r = _mm_cvt_roundu64_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
@@ -12995,7 +13179,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_cvt_roundi64_sd() {
+    fn test_mm_cvt_roundi64_sd() {
         let a = _mm_set_pd(1., -1.5);
         let b: i64 = 9;
         let r = _mm_cvt_roundi64_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
@@ -13004,7 +13188,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512f")]
-    unsafe fn test_mm_cvt_roundsi64_sd() {
+    fn test_mm_cvt_roundsi64_sd() {
         let a = _mm_set_pd(1., -1.5);
         let b: i64 = 9;
         let r = _mm_cvt_roundsi64_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
diff --git a/crates/core_arch/src/x86_64/avx512fp16.rs b/crates/core_arch/src/x86_64/avx512fp16.rs
index 955c6ccc75..2a511328bb 100644
--- a/crates/core_arch/src/x86_64/avx512fp16.rs
+++ b/crates/core_arch/src/x86_64/avx512fp16.rs
@@ -10,7 +10,7 @@ use stdarch_test::assert_instr;
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtsi2sh))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_cvti64_sh(a: __m128h, b: i64) -> __m128h {
     unsafe { vcvtsi642sh(a, b, _MM_FROUND_CUR_DIRECTION) }
 }
@@ -32,7 +32,7 @@ pub fn _mm_cvti64_sh(a: __m128h, b: i64) -> __m128h {
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtsi2sh, ROUNDING = 8))]
 #[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_cvt_roundi64_sh<const ROUNDING: i32>(a: __m128h, b: i64) -> __m128h {
     unsafe {
         static_assert_rounding!(ROUNDING);
@@ -48,7 +48,7 @@ pub fn _mm_cvt_roundi64_sh<const ROUNDING: i32>(a: __m128h, b: i64) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtusi2sh))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_cvtu64_sh(a: __m128h, b: u64) -> __m128h {
     unsafe { vcvtusi642sh(a, b, _MM_FROUND_CUR_DIRECTION) }
 }
@@ -70,7 +70,7 @@ pub fn _mm_cvtu64_sh(a: __m128h, b: u64) -> __m128h {
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtusi2sh, ROUNDING = 8))]
 #[rustc_legacy_const_generics(2)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_cvt_roundu64_sh<const ROUNDING: i32>(a: __m128h, b: u64) -> __m128h {
     unsafe {
         static_assert_rounding!(ROUNDING);
@@ -85,7 +85,7 @@ pub fn _mm_cvt_roundu64_sh<const ROUNDING: i32>(a: __m128h, b: u64) -> __m128h {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtsh2si))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_cvtsh_i64(a: __m128h) -> i64 {
     unsafe { vcvtsh2si64(a, _MM_FROUND_CUR_DIRECTION) }
 }
@@ -106,7 +106,7 @@ pub fn _mm_cvtsh_i64(a: __m128h) -> i64 {
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtsh2si, ROUNDING = 8))]
 #[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_cvt_roundsh_i64<const ROUNDING: i32>(a: __m128h) -> i64 {
     unsafe {
         static_assert_rounding!(ROUNDING);
@@ -121,7 +121,7 @@ pub fn _mm_cvt_roundsh_i64<const ROUNDING: i32>(a: __m128h) -> i64 {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtsh2usi))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_cvtsh_u64(a: __m128h) -> u64 {
     unsafe { vcvtsh2usi64(a, _MM_FROUND_CUR_DIRECTION) }
 }
@@ -142,7 +142,7 @@ pub fn _mm_cvtsh_u64(a: __m128h) -> u64 {
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvtsh2usi, ROUNDING = 8))]
 #[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_cvt_roundsh_u64<const ROUNDING: i32>(a: __m128h) -> u64 {
     unsafe {
         static_assert_rounding!(ROUNDING);
@@ -157,7 +157,7 @@ pub fn _mm_cvt_roundsh_u64<const ROUNDING: i32>(a: __m128h) -> u64 {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvttsh2si))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_cvttsh_i64(a: __m128h) -> i64 {
     unsafe { vcvttsh2si64(a, _MM_FROUND_CUR_DIRECTION) }
 }
@@ -172,7 +172,7 @@ pub fn _mm_cvttsh_i64(a: __m128h) -> i64 {
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvttsh2si, SAE = 8))]
 #[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_cvtt_roundsh_i64<const SAE: i32>(a: __m128h) -> i64 {
     unsafe {
         static_assert_sae!(SAE);
@@ -187,7 +187,7 @@ pub fn _mm_cvtt_roundsh_i64<const SAE: i32>(a: __m128h) -> i64 {
 #[inline]
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvttsh2usi))]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_cvttsh_u64(a: __m128h) -> u64 {
     unsafe { vcvttsh2usi64(a, _MM_FROUND_CUR_DIRECTION) }
 }
@@ -202,7 +202,7 @@ pub fn _mm_cvttsh_u64(a: __m128h) -> u64 {
 #[target_feature(enable = "avx512fp16")]
 #[cfg_attr(test, assert_instr(vcvttsh2usi, SAE = 8))]
 #[rustc_legacy_const_generics(1)]
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub fn _mm_cvtt_roundsh_u64<const SAE: i32>(a: __m128h) -> u64 {
     unsafe {
         static_assert_sae!(SAE);
@@ -232,7 +232,7 @@ mod tests {
     use stdarch_test::simd_test;
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_cvti64_sh() {
+    fn test_mm_cvti64_sh() {
         let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let r = _mm_cvti64_sh(a, 10);
         let e = _mm_setr_ph(10.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
@@ -240,7 +240,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_cvt_roundi64_sh() {
+    fn test_mm_cvt_roundi64_sh() {
         let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let r = _mm_cvt_roundi64_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, 10);
         let e = _mm_setr_ph(10.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
@@ -248,7 +248,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_cvtu64_sh() {
+    fn test_mm_cvtu64_sh() {
         let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let r = _mm_cvtu64_sh(a, 10);
         let e = _mm_setr_ph(10.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
@@ -256,7 +256,7 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16,avx512vl")]
-    unsafe fn test_mm_cvt_roundu64_sh() {
+    fn test_mm_cvt_roundu64_sh() {
         let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let r = _mm_cvt_roundu64_sh::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, 10);
         let e = _mm_setr_ph(10.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
@@ -264,56 +264,56 @@ mod tests {
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm_cvtsh_i64() {
+    fn test_mm_cvtsh_i64() {
         let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let r = _mm_cvtsh_i64(a);
         assert_eq!(r, 1);
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm_cvt_roundsh_i64() {
+    fn test_mm_cvt_roundsh_i64() {
         let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let r = _mm_cvt_roundsh_i64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
         assert_eq!(r, 1);
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm_cvtsh_u64() {
+    fn test_mm_cvtsh_u64() {
         let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let r = _mm_cvtsh_u64(a);
         assert_eq!(r, 1);
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm_cvt_roundsh_u64() {
+    fn test_mm_cvt_roundsh_u64() {
         let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let r = _mm_cvt_roundsh_u64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
         assert_eq!(r, 1);
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm_cvttsh_i64() {
+    fn test_mm_cvttsh_i64() {
         let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let r = _mm_cvttsh_i64(a);
         assert_eq!(r, 1);
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm_cvtt_roundsh_i64() {
+    fn test_mm_cvtt_roundsh_i64() {
         let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let r = _mm_cvtt_roundsh_i64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
         assert_eq!(r, 1);
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm_cvttsh_u64() {
+    fn test_mm_cvttsh_u64() {
         let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let r = _mm_cvttsh_u64(a);
         assert_eq!(r, 1);
     }
 
     #[simd_test(enable = "avx512fp16")]
-    unsafe fn test_mm_cvtt_roundsh_u64() {
+    fn test_mm_cvtt_roundsh_u64() {
         let a = _mm_setr_ph(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
         let r = _mm_cvtt_roundsh_u64::<_MM_FROUND_NO_EXC>(a);
         assert_eq!(r, 1);
diff --git a/crates/core_arch/src/x86_64/bmi.rs b/crates/core_arch/src/x86_64/bmi.rs
index 5d204d51ae..8d2b22089a 100644
--- a/crates/core_arch/src/x86_64/bmi.rs
+++ b/crates/core_arch/src/x86_64/bmi.rs
@@ -6,7 +6,7 @@
 //! [Wikipedia][wikipedia_bmi] provides a quick overview of the instructions
 //! available.
 //!
-//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
+//! [intel64_ref]: https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
 //! [wikipedia_bmi]: https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#ABM_.28Advanced_Bit_Manipulation.29
 
 #[cfg(test)]
@@ -48,7 +48,8 @@ pub fn _bextr2_u64(a: u64, control: u64) -> u64 {
 #[target_feature(enable = "bmi1")]
 #[cfg_attr(test, assert_instr(andn))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _andn_u64(a: u64, b: u64) -> u64 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _andn_u64(a: u64, b: u64) -> u64 {
     !a & b
 }
 
@@ -60,7 +61,8 @@ pub fn _andn_u64(a: u64, b: u64) -> u64 {
 #[cfg_attr(test, assert_instr(blsi))]
 #[cfg(not(target_arch = "x86"))] // generates lots of instructions
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _blsi_u64(x: u64) -> u64 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _blsi_u64(x: u64) -> u64 {
     x & x.wrapping_neg()
 }
 
@@ -72,7 +74,8 @@ pub fn _blsi_u64(x: u64) -> u64 {
 #[cfg_attr(test, assert_instr(blsmsk))]
 #[cfg(not(target_arch = "x86"))] // generates lots of instructions
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _blsmsk_u64(x: u64) -> u64 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _blsmsk_u64(x: u64) -> u64 {
     x ^ (x.wrapping_sub(1_u64))
 }
 
@@ -86,7 +89,8 @@ pub fn _blsmsk_u64(x: u64) -> u64 {
 #[cfg_attr(test, assert_instr(blsr))]
 #[cfg(not(target_arch = "x86"))] // generates lots of instructions
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _blsr_u64(x: u64) -> u64 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _blsr_u64(x: u64) -> u64 {
     x & (x.wrapping_sub(1))
 }
 
@@ -99,7 +103,8 @@ pub fn _blsr_u64(x: u64) -> u64 {
 #[target_feature(enable = "bmi1")]
 #[cfg_attr(test, assert_instr(tzcnt))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _tzcnt_u64(x: u64) -> u64 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _tzcnt_u64(x: u64) -> u64 {
     x.trailing_zeros() as u64
 }
 
@@ -112,7 +117,8 @@ pub fn _tzcnt_u64(x: u64) -> u64 {
 #[target_feature(enable = "bmi1")]
 #[cfg_attr(test, assert_instr(tzcnt))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_tzcnt_64(x: u64) -> i64 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_tzcnt_64(x: u64) -> i64 {
     x.trailing_zeros() as i64
 }
 
@@ -123,18 +129,19 @@ unsafe extern "C" {
 
 #[cfg(test)]
 mod tests {
+    use crate::core_arch::assert_eq_const as assert_eq;
     use stdarch_test::simd_test;
 
     use crate::core_arch::{x86::*, x86_64::*};
 
     #[simd_test(enable = "bmi1")]
-    unsafe fn test_bextr_u64() {
+    fn test_bextr_u64() {
         let r = _bextr_u64(0b0101_0000u64, 4, 4);
         assert_eq!(r, 0b0000_0101u64);
     }
 
     #[simd_test(enable = "bmi1")]
-    unsafe fn test_andn_u64() {
+    const fn test_andn_u64() {
         assert_eq!(_andn_u64(0, 0), 0);
         assert_eq!(_andn_u64(0, 1), 1);
         assert_eq!(_andn_u64(1, 0), 0);
@@ -157,25 +164,25 @@ mod tests {
     }
 
     #[simd_test(enable = "bmi1")]
-    unsafe fn test_blsi_u64() {
+    const fn test_blsi_u64() {
         assert_eq!(_blsi_u64(0b1101_0000u64), 0b0001_0000u64);
     }
 
     #[simd_test(enable = "bmi1")]
-    unsafe fn test_blsmsk_u64() {
+    const fn test_blsmsk_u64() {
         let r = _blsmsk_u64(0b0011_0000u64);
         assert_eq!(r, 0b0001_1111u64);
     }
 
     #[simd_test(enable = "bmi1")]
-    unsafe fn test_blsr_u64() {
+    const fn test_blsr_u64() {
         // TODO: test the behavior when the input is `0`.
         let r = _blsr_u64(0b0011_0000u64);
         assert_eq!(r, 0b0010_0000u64);
     }
 
     #[simd_test(enable = "bmi1")]
-    unsafe fn test_tzcnt_u64() {
+    const fn test_tzcnt_u64() {
         assert_eq!(_tzcnt_u64(0b0000_0001u64), 0u64);
         assert_eq!(_tzcnt_u64(0b0000_0000u64), 64u64);
         assert_eq!(_tzcnt_u64(0b1001_0000u64), 4u64);
diff --git a/crates/core_arch/src/x86_64/bmi2.rs b/crates/core_arch/src/x86_64/bmi2.rs
index ea9daf8857..6151eee8bd 100644
--- a/crates/core_arch/src/x86_64/bmi2.rs
+++ b/crates/core_arch/src/x86_64/bmi2.rs
@@ -6,7 +6,7 @@
 //! [Wikipedia][wikipedia_bmi] provides a quick overview of the instructions
 //! available.
 //!
-//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
+//! [intel64_ref]: https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
 //! [wikipedia_bmi]:
 //! https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#ABM_.28Advanced_Bit_Manipulation.29
 
@@ -24,7 +24,8 @@ use stdarch_test::assert_instr;
 #[target_feature(enable = "bmi2")]
 #[cfg(not(target_arch = "x86"))] // calls an intrinsic
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mulx_u64(a: u64, b: u64, hi: &mut u64) -> u64 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mulx_u64(a: u64, b: u64, hi: &mut u64) -> u64 {
     let result: u128 = (a as u128) * (b as u128);
     *hi = (result >> 64) as u64;
     result as u64
@@ -79,12 +80,13 @@ unsafe extern "C" {
 
 #[cfg(test)]
 mod tests {
+    use crate::core_arch::assert_eq_const as assert_eq;
     use stdarch_test::simd_test;
 
     use crate::core_arch::x86_64::*;
 
     #[simd_test(enable = "bmi2")]
-    unsafe fn test_pext_u64() {
+    fn test_pext_u64() {
         let n = 0b1011_1110_1001_0011u64;
 
         let m0 = 0b0110_0011_1000_0101u64;
@@ -98,7 +100,7 @@ mod tests {
     }
 
     #[simd_test(enable = "bmi2")]
-    unsafe fn test_pdep_u64() {
+    fn test_pdep_u64() {
         let n = 0b1011_1110_1001_0011u64;
 
         let m0 = 0b0110_0011_1000_0101u64;
@@ -112,7 +114,7 @@ mod tests {
     }
 
     #[simd_test(enable = "bmi2")]
-    unsafe fn test_bzhi_u64() {
+    fn test_bzhi_u64() {
         let n = 0b1111_0010u64;
         let s = 0b0001_0010u64;
         assert_eq!(_bzhi_u64(n, 5), s);
@@ -120,7 +122,7 @@ mod tests {
 
     #[simd_test(enable = "bmi2")]
     #[rustfmt::skip]
-    unsafe fn test_mulx_u64() {
+    const fn test_mulx_u64() {
         let a: u64 = 9_223_372_036_854_775_800;
         let b: u64 = 100;
         let mut hi = 0;
diff --git a/crates/core_arch/src/x86_64/bswap.rs b/crates/core_arch/src/x86_64/bswap.rs
index 4e2d8b96ea..1b1d739a62 100644
--- a/crates/core_arch/src/x86_64/bswap.rs
+++ b/crates/core_arch/src/x86_64/bswap.rs
@@ -11,16 +11,20 @@ use stdarch_test::assert_instr;
 #[inline]
 #[cfg_attr(test, assert_instr(bswap))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _bswap64(x: i64) -> i64 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _bswap64(x: i64) -> i64 {
     x.swap_bytes()
 }
 
 #[cfg(test)]
 mod tests {
+    use crate::core_arch::assert_eq_const as assert_eq;
+    use stdarch_test::simd_test;
+
     use super::*;
 
-    #[test]
-    fn test_bswap64() {
+    #[simd_test]
+    const fn test_bswap64() {
         assert_eq!(_bswap64(0x0EADBEEFFADECA0E), 0x0ECADEFAEFBEAD0E);
         assert_eq!(_bswap64(0x0000000000000000), 0x0000000000000000);
     }
diff --git a/crates/core_arch/src/x86_64/fxsr.rs b/crates/core_arch/src/x86_64/fxsr.rs
index a24b44fb1f..28bf195116 100644
--- a/crates/core_arch/src/x86_64/fxsr.rs
+++ b/crates/core_arch/src/x86_64/fxsr.rs
@@ -77,12 +77,14 @@ mod tests {
 
     #[simd_test(enable = "fxsr")]
     #[cfg_attr(miri, ignore)] // Register saving/restoring is not supported in Miri
-    unsafe fn test_fxsave64() {
+    fn test_fxsave64() {
         let mut a = FxsaveArea::new();
         let mut b = FxsaveArea::new();
 
-        fxsr::_fxsave64(a.ptr());
-        fxsr::_fxrstor64(a.ptr());
-        fxsr::_fxsave64(b.ptr());
+        unsafe {
+            fxsr::_fxsave64(a.ptr());
+            fxsr::_fxrstor64(a.ptr());
+            fxsr::_fxsave64(b.ptr());
+        }
     }
 }
diff --git a/crates/core_arch/src/x86_64/mod.rs b/crates/core_arch/src/x86_64/mod.rs
index 7d681882be..46384176e0 100644
--- a/crates/core_arch/src/x86_64/mod.rs
+++ b/crates/core_arch/src/x86_64/mod.rs
@@ -75,9 +75,13 @@ mod bt;
 pub use self::bt::*;
 
 mod avx512fp16;
-#[unstable(feature = "stdarch_x86_avx512_f16", issue = "127213")]
+#[stable(feature = "stdarch_x86_avx512fp16", since = "1.94.0")]
 pub use self::avx512fp16::*;
 
 mod amx;
 #[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
 pub use self::amx::*;
+
+mod movrs;
+#[unstable(feature = "movrs_target_feature", issue = "137976")]
+pub use self::movrs::*;
diff --git a/crates/core_arch/src/x86_64/movrs.rs b/crates/core_arch/src/x86_64/movrs.rs
new file mode 100644
index 0000000000..fc669bbb1c
--- /dev/null
+++ b/crates/core_arch/src/x86_64/movrs.rs
@@ -0,0 +1,94 @@
+//! Read-shared Move instructions
+
+#[cfg(test)]
+use stdarch_test::assert_instr;
+
+unsafe extern "unadjusted" {
+    #[link_name = "llvm.x86.movrsqi"]
+    fn movrsqi(src: *const i8) -> i8;
+    #[link_name = "llvm.x86.movrshi"]
+    fn movrshi(src: *const i16) -> i16;
+    #[link_name = "llvm.x86.movrssi"]
+    fn movrssi(src: *const i32) -> i32;
+    #[link_name = "llvm.x86.movrsdi"]
+    fn movrsdi(src: *const i64) -> i64;
+}
+
+/// Moves a byte from the source to the destination, with an indication that the source memory
+/// location is likely to become read-shared by multiple processors, i.e., read in the future by at
+/// least one other processor before it is written, assuming it is ever written in the future.
+#[inline]
+#[target_feature(enable = "movrs")]
+#[cfg_attr(all(test, not(target_vendor = "apple")), assert_instr(movrs))]
+#[unstable(feature = "movrs_target_feature", issue = "137976")]
+pub unsafe fn _movrs_i8(src: *const i8) -> i8 {
+    movrsqi(src)
+}
+
+/// Moves a 16-bit word from the source to the destination, with an indication that the source memory
+/// location is likely to become read-shared by multiple processors, i.e., read in the future by at
+/// least one other processor before it is written, assuming it is ever written in the future.
+#[inline]
+#[target_feature(enable = "movrs")]
+#[cfg_attr(all(test, not(target_vendor = "apple")), assert_instr(movrs))]
+#[unstable(feature = "movrs_target_feature", issue = "137976")]
+pub unsafe fn _movrs_i16(src: *const i16) -> i16 {
+    movrshi(src)
+}
+
+/// Moves a 32-bit doubleword from the source to the destination, with an indication that the source
+/// memory location is likely to become read-shared by multiple processors, i.e., read in the future
+/// by at least one other processor before it is written, assuming it is ever written in the future.
+#[inline]
+#[target_feature(enable = "movrs")]
+#[cfg_attr(all(test, not(target_vendor = "apple")), assert_instr(movrs))]
+#[unstable(feature = "movrs_target_feature", issue = "137976")]
+pub unsafe fn _movrs_i32(src: *const i32) -> i32 {
+    movrssi(src)
+}
+
+/// Moves a 64-bit quadword from the source to the destination, with an indication that the source
+/// memory location is likely to become read-shared by multiple processors, i.e., read in the future
+/// by at least one other processor before it is written, assuming it is ever written in the future.
+#[inline]
+#[target_feature(enable = "movrs")]
+#[cfg_attr(all(test, not(target_vendor = "apple")), assert_instr(movrs))]
+#[unstable(feature = "movrs_target_feature", issue = "137976")]
+pub unsafe fn _movrs_i64(src: *const i64) -> i64 {
+    movrsdi(src)
+}
+
+#[cfg(test)]
+mod tests {
+    use stdarch_test::simd_test;
+
+    use super::*;
+
+    #[simd_test(enable = "movrs")]
+    fn test_movrs_i8() {
+        let x: i8 = 42;
+        let y = unsafe { _movrs_i8(&x) };
+        assert_eq!(x, y);
+    }
+
+    #[simd_test(enable = "movrs")]
+    fn test_movrs_i16() {
+        let x: i16 = 42;
+        let y = unsafe { _movrs_i16(&x) };
+        assert_eq!(x, y);
+    }
+
+    #[simd_test(enable = "movrs")]
+    fn test_movrs_i32() {
+        let x: i32 = 42;
+        let y = unsafe { _movrs_i32(&x) };
+        assert_eq!(x, y);
+    }
+
+    #[simd_test(enable = "movrs")]
+    fn test_movrs_i64() {
+        let x: i64 = 42;
+        let y = unsafe { _movrs_i64(&x) };
+        assert_eq!(x, y);
+    }
+}
diff --git a/crates/core_arch/src/x86_64/sse.rs b/crates/core_arch/src/x86_64/sse.rs
index 6bd7ec83ec..81e1070b55 100644
--- a/crates/core_arch/src/x86_64/sse.rs
+++ b/crates/core_arch/src/x86_64/sse.rs
@@ -62,17 +62,19 @@ pub fn _mm_cvttss_si64(a: __m128) -> i64 {
 #[target_feature(enable = "sse")]
 #[cfg_attr(test, assert_instr(cvtsi2ss))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_cvtsi64_ss(a: __m128, b: i64) -> __m128 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cvtsi64_ss(a: __m128, b: i64) -> __m128 {
     unsafe { simd_insert!(a, 0, b as f32) }
 }
 
 #[cfg(test)]
 mod tests {
     use crate::core_arch::arch::x86_64::*;
+    use crate::core_arch::assert_eq_const as assert_eq;
     use stdarch_test::simd_test;
 
     #[simd_test(enable = "sse")]
-    unsafe fn test_mm_cvtss_si64() {
+    fn test_mm_cvtss_si64() {
         let inputs = &[
             (42.0f32, 42i64),
             (-31.4, -31),
@@ -96,7 +98,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse")]
-    unsafe fn test_mm_cvttss_si64() {
+    fn test_mm_cvttss_si64() {
         let inputs = &[
             (42.0f32, 42i64),
             (-31.4, -31),
@@ -123,21 +125,31 @@ mod tests {
     }
 
     #[simd_test(enable = "sse")]
-    unsafe fn test_mm_cvtsi64_ss() {
-        let inputs = &[
-            (4555i64, 4555.0f32),
-            (322223333, 322223330.0),
-            (-432, -432.0),
-            (-322223333, -322223330.0),
-            (9223372036854775807, 9.223372e18),
-            (-9223372036854775808, -9.223372e18),
-        ];
+    const fn test_mm_cvtsi64_ss() {
+        let a = _mm_setr_ps(5.0, 6.0, 7.0, 8.0);
 
-        for &(x, f) in inputs {
-            let a = _mm_setr_ps(5.0, 6.0, 7.0, 8.0);
-            let r = _mm_cvtsi64_ss(a, x);
-            let e = _mm_setr_ps(f, 6.0, 7.0, 8.0);
-            assert_eq_m128(e, r);
-        }
+        let r = _mm_cvtsi64_ss(a, 4555);
+        let e = _mm_setr_ps(4555.0, 6.0, 7.0, 8.0);
+        assert_eq_m128(e, r);
+
+        let r = _mm_cvtsi64_ss(a, 322223333);
+        let e = _mm_setr_ps(322223333.0, 6.0, 7.0, 8.0);
+        assert_eq_m128(e, r);
+
+        let r = _mm_cvtsi64_ss(a, -432);
+        let e = _mm_setr_ps(-432.0, 6.0, 7.0, 8.0);
+        assert_eq_m128(e, r);
+
+        let r = _mm_cvtsi64_ss(a, -322223333);
+        let e = _mm_setr_ps(-322223333.0, 6.0, 7.0, 8.0);
+        assert_eq_m128(e, r);
+
+        let r = _mm_cvtsi64_ss(a, 9223372036854775807);
+        let e = _mm_setr_ps(9.223372e18, 6.0, 7.0, 8.0);
+        assert_eq_m128(e, r);
+
+        let r = _mm_cvtsi64_ss(a, -9223372036854775808);
+        let e = _mm_setr_ps(-9.223372e18, 6.0, 7.0, 8.0);
+        assert_eq_m128(e, r);
     }
 }
diff --git a/crates/core_arch/src/x86_64/sse2.rs b/crates/core_arch/src/x86_64/sse2.rs
index 0894aa9810..08dabf053d 100644
--- a/crates/core_arch/src/x86_64/sse2.rs
+++ b/crates/core_arch/src/x86_64/sse2.rs
@@ -95,7 +95,8 @@ pub unsafe fn _mm_stream_si64(mem_addr: *mut i64, a: i64) {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(movq))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_cvtsi64_si128(a: i64) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cvtsi64_si128(a: i64) -> __m128i {
     _mm_set_epi64x(0, a)
 }
 
@@ -107,7 +108,8 @@ pub fn _mm_cvtsi64_si128(a: i64) -> __m128i {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(movq))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_cvtsi64x_si128(a: i64) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cvtsi64x_si128(a: i64) -> __m128i {
     _mm_cvtsi64_si128(a)
 }
 
@@ -118,7 +120,8 @@ pub fn _mm_cvtsi64x_si128(a: i64) -> __m128i {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(movq))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_cvtsi128_si64(a: __m128i) -> i64 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cvtsi128_si64(a: __m128i) -> i64 {
     unsafe { simd_extract!(a.as_i64x2(), 0) }
 }
 
@@ -129,7 +132,8 @@ pub fn _mm_cvtsi128_si64(a: __m128i) -> i64 {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(movq))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_cvtsi128_si64x(a: __m128i) -> i64 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cvtsi128_si64x(a: __m128i) -> i64 {
     _mm_cvtsi128_si64(a)
 }
 
@@ -141,7 +145,8 @@ pub fn _mm_cvtsi128_si64x(a: __m128i) -> i64 {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(cvtsi2sd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_cvtsi64_sd(a: __m128d, b: i64) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cvtsi64_sd(a: __m128d, b: i64) -> __m128d {
     unsafe { simd_insert!(a, 0, b as f64) }
 }
 
@@ -153,19 +158,21 @@ pub fn _mm_cvtsi64_sd(a: __m128d, b: i64) -> __m128d {
 #[target_feature(enable = "sse2")]
 #[cfg_attr(test, assert_instr(cvtsi2sd))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_cvtsi64x_sd(a: __m128d, b: i64) -> __m128d {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_cvtsi64x_sd(a: __m128d, b: i64) -> __m128d {
     _mm_cvtsi64_sd(a, b)
 }
 
 #[cfg(test)]
 mod tests {
     use crate::core_arch::arch::x86_64::*;
+    use crate::core_arch::assert_eq_const as assert_eq;
     use std::boxed;
     use std::ptr;
     use stdarch_test::simd_test;
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_cvtsd_si64() {
+    fn test_mm_cvtsd_si64() {
         let r = _mm_cvtsd_si64(_mm_setr_pd(-2.0, 5.0));
         assert_eq!(r, -2_i64);
 
@@ -174,20 +181,20 @@ mod tests {
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_cvtsd_si64x() {
+    fn test_mm_cvtsd_si64x() {
         let r = _mm_cvtsd_si64x(_mm_setr_pd(f64::NAN, f64::NAN));
         assert_eq!(r, i64::MIN);
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_cvttsd_si64() {
+    fn test_mm_cvttsd_si64() {
         let a = _mm_setr_pd(-1.1, 2.2);
         let r = _mm_cvttsd_si64(a);
         assert_eq!(r, -1_i64);
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_cvttsd_si64x() {
+    fn test_mm_cvttsd_si64x() {
         let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN);
         let r = _mm_cvttsd_si64x(a);
         assert_eq!(r, i64::MIN);
@@ -197,28 +204,30 @@ mod tests {
     // Miri cannot support this until it is clear how it fits in the Rust memory model
     // (non-temporal store)
     #[cfg_attr(miri, ignore)]
-    unsafe fn test_mm_stream_si64() {
+    fn test_mm_stream_si64() {
         let a: i64 = 7;
         let mut mem = boxed::Box::<i64>::new(-1);
-        _mm_stream_si64(ptr::addr_of_mut!(*mem), a);
+        unsafe {
+            _mm_stream_si64(ptr::addr_of_mut!(*mem), a);
+        }
         _mm_sfence();
         assert_eq!(a, *mem);
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_cvtsi64_si128() {
+    const fn test_mm_cvtsi64_si128() {
         let r = _mm_cvtsi64_si128(5);
         assert_eq_m128i(r, _mm_setr_epi64x(5, 0));
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_cvtsi128_si64() {
+    const fn test_mm_cvtsi128_si64() {
         let r = _mm_cvtsi128_si64(_mm_setr_epi64x(5, 0));
         assert_eq!(r, 5);
     }
 
     #[simd_test(enable = "sse2")]
-    unsafe fn test_mm_cvtsi64_sd() {
+    const fn test_mm_cvtsi64_sd() {
         let a = _mm_set1_pd(3.5);
         let r = _mm_cvtsi64_sd(a, 5);
         assert_eq_m128d(r, _mm_setr_pd(5.0, 3.5));
diff --git a/crates/core_arch/src/x86_64/sse41.rs b/crates/core_arch/src/x86_64/sse41.rs
index 4b7d25f214..7732264e20 100644
--- a/crates/core_arch/src/x86_64/sse41.rs
+++ b/crates/core_arch/src/x86_64/sse41.rs
@@ -13,7 +13,8 @@ use stdarch_test::assert_instr;
 #[cfg_attr(test, assert_instr(pextrq, IMM1 = 1))]
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_extract_epi64<const IMM1: i32>(a: __m128i) -> i64 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_extract_epi64<const IMM1: i32>(a: __m128i) -> i64 {
     static_assert_uimm_bits!(IMM1, 1);
     unsafe { simd_extract!(a.as_i64x2(), IMM1 as u32) }
 }
@@ -27,7 +28,8 @@ pub fn _mm_extract_epi64<const IMM1: i32>(a: __m128i) -> i64 {
 #[cfg_attr(test, assert_instr(pinsrq, IMM1 = 0))]
 #[rustc_legacy_const_generics(2)]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _mm_insert_epi64<const IMM1: i32>(a: __m128i, i: i64) -> __m128i {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _mm_insert_epi64<const IMM1: i32>(a: __m128i, i: i64) -> __m128i {
     static_assert_uimm_bits!(IMM1, 1);
     unsafe { transmute(simd_insert!(a.as_i64x2(), IMM1 as u32, i)) }
 }
@@ -35,10 +37,11 @@ pub fn _mm_insert_epi64<const IMM1: i32>(a: __m128i, i: i64) -> __m128i {
 #[cfg(test)]
 mod tests {
     use crate::core_arch::arch::x86_64::*;
+    use crate::core_arch::assert_eq_const as assert_eq;
     use stdarch_test::simd_test;
 
     #[simd_test(enable = "sse4.1")]
-    unsafe fn test_mm_extract_epi64() {
+    const fn test_mm_extract_epi64() {
         let a = _mm_setr_epi64x(0, 1);
         let r = _mm_extract_epi64::<1>(a);
         assert_eq!(r, 1);
@@ -47,7 +50,7 @@ mod tests {
     }
 
     #[simd_test(enable = "sse4.1")]
-    unsafe fn test_mm_insert_epi64() {
+    const fn test_mm_insert_epi64() {
         let a = _mm_set1_epi64x(0);
         let e = _mm_setr_epi64x(0, 32);
         let r = _mm_insert_epi64::<1>(a, 32);
diff --git a/crates/core_arch/src/x86_64/sse42.rs b/crates/core_arch/src/x86_64/sse42.rs
index 64a23b2b19..cd32c149af 100644
--- a/crates/core_arch/src/x86_64/sse42.rs
+++ b/crates/core_arch/src/x86_64/sse42.rs
@@ -28,7 +28,7 @@ mod tests {
     use stdarch_test::simd_test;
 
     #[simd_test(enable = "sse4.2")]
-    unsafe fn test_mm_crc32_u64() {
+    fn test_mm_crc32_u64() {
         let crc = 0x7819dccd3e824;
         let v = 0x2a22b845fed;
         let i = _mm_crc32_u64(crc, v);
diff --git a/crates/core_arch/src/x86_64/tbm.rs b/crates/core_arch/src/x86_64/tbm.rs
index f4bba709f6..fe12538b07 100644
--- a/crates/core_arch/src/x86_64/tbm.rs
+++ b/crates/core_arch/src/x86_64/tbm.rs
@@ -6,7 +6,7 @@
 //! [Wikipedia][wikipedia_bmi] provides a quick overview of the available
 //! instructions.
 //!
-//! [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf
+//! [amd64_ref]: https://docs.amd.com/v/u/en-US/24594_3.37
 //! [wikipedia_bmi]:
 //! https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#ABM_.28Advanced_Bit_Manipulation.29
 
@@ -42,7 +42,8 @@ pub fn _bextri_u64<const CONTROL: u64>(a: u64) -> u64 {
 #[target_feature(enable = "tbm")]
 #[cfg_attr(test, assert_instr(blcfill))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _blcfill_u64(x: u64) -> u64 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _blcfill_u64(x: u64) -> u64 {
     x & x.wrapping_add(1)
 }
 
@@ -53,7 +54,8 @@ pub fn _blcfill_u64(x: u64) -> u64 {
 #[target_feature(enable = "tbm")]
 #[cfg_attr(test, assert_instr(blci))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _blci_u64(x: u64) -> u64 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _blci_u64(x: u64) -> u64 {
     x | !x.wrapping_add(1)
 }
 
@@ -64,7 +66,8 @@ pub fn _blci_u64(x: u64) -> u64 {
 #[target_feature(enable = "tbm")]
 #[cfg_attr(test, assert_instr(blcic))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _blcic_u64(x: u64) -> u64 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _blcic_u64(x: u64) -> u64 {
     !x & x.wrapping_add(1)
 }
 
@@ -76,7 +79,8 @@ pub fn _blcic_u64(x: u64) -> u64 {
 #[target_feature(enable = "tbm")]
 #[cfg_attr(test, assert_instr(blcmsk))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _blcmsk_u64(x: u64) -> u64 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _blcmsk_u64(x: u64) -> u64 {
     x ^ x.wrapping_add(1)
 }
 
@@ -87,7 +91,8 @@ pub fn _blcmsk_u64(x: u64) -> u64 {
 #[target_feature(enable = "tbm")]
 #[cfg_attr(test, assert_instr(blcs))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _blcs_u64(x: u64) -> u64 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _blcs_u64(x: u64) -> u64 {
     x | x.wrapping_add(1)
 }
 
@@ -98,7 +103,8 @@ pub fn _blcs_u64(x: u64) -> u64 {
 #[target_feature(enable = "tbm")]
 #[cfg_attr(test, assert_instr(blsfill))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _blsfill_u64(x: u64) -> u64 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _blsfill_u64(x: u64) -> u64 {
     x | x.wrapping_sub(1)
 }
 
@@ -109,7 +115,8 @@ pub fn _blsfill_u64(x: u64) -> u64 {
 #[target_feature(enable = "tbm")]
 #[cfg_attr(test, assert_instr(blsic))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _blsic_u64(x: u64) -> u64 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _blsic_u64(x: u64) -> u64 {
     !x | x.wrapping_sub(1)
 }
 
@@ -121,7 +128,8 @@ pub fn _blsic_u64(x: u64) -> u64 {
 #[target_feature(enable = "tbm")]
 #[cfg_attr(test, assert_instr(t1mskc))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _t1mskc_u64(x: u64) -> u64 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _t1mskc_u64(x: u64) -> u64 {
     !x | x.wrapping_add(1)
 }
 
@@ -133,29 +141,31 @@ pub fn _t1mskc_u64(x: u64) -> u64 {
 #[target_feature(enable = "tbm")]
 #[cfg_attr(test, assert_instr(tzmsk))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
-pub fn _tzmsk_u64(x: u64) -> u64 {
+#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
+pub const fn _tzmsk_u64(x: u64) -> u64 {
     !x & x.wrapping_sub(1)
 }
 
 #[cfg(test)]
 mod tests {
+    use crate::core_arch::assert_eq_const as assert_eq;
     use stdarch_test::simd_test;
 
     use crate::core_arch::x86_64::*;
 
     #[simd_test(enable = "tbm")]
-    unsafe fn test_bextri_u64() {
+    fn test_bextri_u64() {
         assert_eq!(_bextri_u64::<0x0404>(0b0101_0000u64), 0b0000_0101u64);
     }
 
     #[simd_test(enable = "tbm")]
-    unsafe fn test_blcfill_u64() {
+    const fn test_blcfill_u64() {
         assert_eq!(_blcfill_u64(0b0101_0111u64), 0b0101_0000u64);
         assert_eq!(_blcfill_u64(0b1111_1111u64), 0u64);
     }
 
     #[simd_test(enable = "tbm")]
-    unsafe fn test_blci_u64() {
+    const fn test_blci_u64() {
         assert_eq!(
             _blci_u64(0b0101_0000u64),
             0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1110u64
@@ -167,25 +177,25 @@ mod tests {
     }
 
     #[simd_test(enable = "tbm")]
-    unsafe fn test_blcic_u64() {
+    const fn test_blcic_u64() {
         assert_eq!(_blcic_u64(0b0101_0001u64), 0b0000_0010u64);
         assert_eq!(_blcic_u64(0b1111_1111u64), 0b1_0000_0000u64);
     }
 
     #[simd_test(enable = "tbm")]
-    unsafe fn test_blcmsk_u64() {
+    const fn test_blcmsk_u64() {
         assert_eq!(_blcmsk_u64(0b0101_0001u64), 0b0000_0011u64);
         assert_eq!(_blcmsk_u64(0b1111_1111u64), 0b1_1111_1111u64);
     }
 
     #[simd_test(enable = "tbm")]
-    unsafe fn test_blcs_u64() {
+    const fn test_blcs_u64() {
         assert_eq!(_blcs_u64(0b0101_0001u64), 0b0101_0011u64);
         assert_eq!(_blcs_u64(0b1111_1111u64), 0b1_1111_1111u64);
     }
 
     #[simd_test(enable = "tbm")]
-    unsafe fn test_blsfill_u64() {
+    const fn test_blsfill_u64() {
         assert_eq!(_blsfill_u64(0b0101_0100u64), 0b0101_0111u64);
         assert_eq!(
             _blsfill_u64(0u64),
@@ -194,7 +204,7 @@ mod tests {
     }
 
     #[simd_test(enable = "tbm")]
-    unsafe fn test_blsic_u64() {
+    const fn test_blsic_u64() {
         assert_eq!(
             _blsic_u64(0b0101_0100u64),
             0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1011u64
@@ -206,7 +216,7 @@ mod tests {
     }
 
     #[simd_test(enable = "tbm")]
-    unsafe fn test_t1mskc_u64() {
+    const fn test_t1mskc_u64() {
         assert_eq!(
             _t1mskc_u64(0b0101_0111u64),
             0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1000u64
@@ -218,7 +228,7 @@ mod tests {
     }
 
     #[simd_test(enable = "tbm")]
-    unsafe fn test_tzmsk_u64() {
+    const fn test_tzmsk_u64() {
         assert_eq!(_tzmsk_u64(0b0101_1000u64), 0b0000_0111u64);
         assert_eq!(_tzmsk_u64(0b0101_1001u64), 0b0000_0000u64);
     }
diff --git a/crates/core_arch/src/x86_64/xsave.rs b/crates/core_arch/src/x86_64/xsave.rs
index fa1454a822..30a7123315 100644
--- a/crates/core_arch/src/x86_64/xsave.rs
+++ b/crates/core_arch/src/x86_64/xsave.rs
@@ -132,37 +132,43 @@ mod tests {
 
     #[simd_test(enable = "xsave")]
     #[cfg_attr(miri, ignore)] // Register saving/restoring is not supported in Miri
-    unsafe fn test_xsave64() {
+    fn test_xsave64() {
         let m = 0xFFFFFFFFFFFFFFFF_u64; //< all registers
         let mut a = XsaveArea::new();
         let mut b = XsaveArea::new();
 
-        _xsave64(a.ptr(), m);
-        _xrstor64(a.ptr(), m);
-        _xsave64(b.ptr(), m);
+        unsafe {
+            _xsave64(a.ptr(), m);
+            _xrstor64(a.ptr(), m);
+            _xsave64(b.ptr(), m);
+        }
     }
 
     #[simd_test(enable = "xsave,xsaveopt")]
     #[cfg_attr(miri, ignore)] // Register saving/restoring is not supported in Miri
-    unsafe fn test_xsaveopt64() {
+    fn test_xsaveopt64() {
         let m = 0xFFFFFFFFFFFFFFFF_u64; //< all registers
         let mut a = XsaveArea::new();
         let mut b = XsaveArea::new();
 
-        _xsaveopt64(a.ptr(), m);
-        _xrstor64(a.ptr(), m);
-        _xsaveopt64(b.ptr(), m);
+        unsafe {
+            _xsaveopt64(a.ptr(), m);
+            _xrstor64(a.ptr(), m);
+            _xsaveopt64(b.ptr(), m);
+        }
     }
 
     #[simd_test(enable = "xsave,xsavec")]
     #[cfg_attr(miri, ignore)] // Register saving/restoring is not supported in Miri
-    unsafe fn test_xsavec64() {
+    fn test_xsavec64() {
         let m = 0xFFFFFFFFFFFFFFFF_u64; //< all registers
         let mut a = XsaveArea::new();
         let mut b = XsaveArea::new();
 
-        _xsavec64(a.ptr(), m);
-        _xrstor64(a.ptr(), m);
-        _xsavec64(b.ptr(), m);
+        unsafe {
+            _xsavec64(a.ptr(), m);
+            _xrstor64(a.ptr(), m);
+            _xsavec64(b.ptr(), m);
+        }
     }
 }
diff --git a/crates/intrinsic-test/missing_aarch64.txt b/crates/intrinsic-test/missing_aarch64.txt
index bbcfc40c69..f0c9eeb6ce 100644
--- a/crates/intrinsic-test/missing_aarch64.txt
+++ b/crates/intrinsic-test/missing_aarch64.txt
@@ -1,35 +1,19 @@
-# Not implemented in stdarch yet
-vbfdot_f32
-vbfdot_lane_f32
-vbfdot_laneq_f32
-vbfdotq_f32
-vbfdotq_lane_f32
-vbfdotq_laneq_f32
-vbfmlalbq_f32
-vbfmlalbq_lane_f32
-vbfmlalbq_laneq_f32
-vbfmlaltq_f32
-vbfmlaltq_lane_f32
-vbfmlaltq_laneq_f32
-vbfmmlaq_f32
-
-
-# Implemented in stdarch, but missing in Clang.
-vrnd32xq_f64
-vrnd32zq_f64
-vrnd64xq_f64
-vrnd64zq_f64
+# Not supported by qemu (will throw illegal instruction)
+vamin_f16
+vaminq_f16
 vamin_f32
 vaminq_f32
 vaminq_f64
+vamax_f16
+vamaxq_f16
 vamax_f32
 vamaxq_f32
 vamaxq_f64
-# LLVM select error, and missing in Clang.
-vrnd32x_f64
-vrnd32z_f64
-vrnd64x_f64
-vrnd64z_f64
+vscale_f16
+vscale_f32
+vscaleq_f16
+vscaleq_f32
+vscaleq_f64
 vluti2_lane_p16
 vluti2_lane_p8
 vluti2_lane_s16
@@ -41,6 +25,23 @@ vluti2q_lane_p8
 vluti2q_lane_s16
 vluti2q_lane_s8
 vluti2q_lane_u16
+vluti2_laneq_f16
+vluti2_lane_f16
+vluti2_laneq_f16
+vluti2_laneq_p16
+vluti2_laneq_p8
+vluti2_laneq_s16
+vluti2_laneq_s8
+vluti2_laneq_u16
+vluti2_laneq_u8
+vluti2q_lane_f16
+vluti2q_laneq_f16
+vluti2q_laneq_p16
+vluti2q_laneq_p8
+vluti2q_laneq_s16
+vluti2q_laneq_s8
+vluti2q_laneq_u16
+vluti2q_laneq_u8
 vluti2q_lane_u8
 vluti4q_lane_f16_x2
 vluti4q_lane_p16_x2
@@ -57,8 +58,27 @@ vluti4q_laneq_s8
 vluti4q_laneq_u16_x2
 vluti4q_laneq_u8
 
-# Broken in Clang
+# Not implemented in stdarch yet
+vcvtad_s32_f64
+vcvtad_u32_f64
+vcvtd_s32_f64
+vcvtd_u32_f64
+vcvtmd_s32_f64
+vcvtmd_u32_f64
+vcvtnd_s32_f64
+vcvtnd_u32_f64
+vcvtpd_s32_f64
+vcvtpd_u32_f64
+vcvts_s64_f32
+vcvts_u64_f32
+vcvtas_s64_f32
+vcvtas_u64_f32
+vcvtms_s64_f32
+vcvtms_u64_f32
+vcvtns_s64_f32
+vcvtns_u64_f32
+vcvtps_s64_f32
+vcvtps_u64_f32
+
+# Broken in Clang (fixed in https://github.com/llvm/llvm-project/pull/156029)
 vcvth_s16_f16
-# FIXME: Broken output due to missing f16 printing support in Rust, see git blame for this line
-vmulh_lane_f16
-vmulh_laneq_f16
diff --git a/crates/intrinsic-test/missing_aarch64_be.txt b/crates/intrinsic-test/missing_aarch64_be.txt
new file mode 100644
index 0000000000..9163aaa1c8
--- /dev/null
+++ b/crates/intrinsic-test/missing_aarch64_be.txt
@@ -0,0 +1,124 @@
+# Bad LLVM codegen for BE in O2 in clang, and release in rust (https://github.com/llvm/llvm-project/issues/166190)
+vcmla_lane_f16
+vcmla_laneq_f16
+vcmla_rot180_lane_f16
+vcmla_rot180_laneq_f16
+vcmla_rot270_lane_f16
+vcmla_rot270_laneq_f16
+vcmla_rot90_lane_f16
+vcmla_rot90_laneq_f16
+vcmlaq_lane_f16
+vcmlaq_laneq_f16
+vcmlaq_laneq_f32
+vcmlaq_rot180_lane_f16
+vcmlaq_rot180_laneq_f16
+vcmlaq_rot180_laneq_f32
+vcmlaq_rot270_lane_f16
+vcmlaq_rot270_laneq_f16
+vcmlaq_rot270_laneq_f32
+vcmlaq_rot90_lane_f16
+vcmlaq_rot90_laneq_f16
+vcmlaq_rot90_laneq_f32
+# Bad codegen for BE in O2 in clang, correct in rust. Same cause as above issue.
+vdot_lane_s32
+vdot_lane_u32
+vdot_laneq_s32
+vdot_laneq_u32
+vdotq_lane_s32
+vdotq_lane_u32
+vdotq_laneq_s32
+vdotq_laneq_u32
+vsudot_lane_s32
+vsudot_laneq_s32
+vsudotq_lane_s32
+vsudotq_laneq_s32
+vusdot_lane_s32
+vusdot_laneq_s32
+vusdotq_lane_s32
+vusdotq_laneq_s32
+
+# Below are in common to missing_aarch64.txt
+# Not supported by qemu (will throw illegal instruction)
+vamin_f16
+vaminq_f16
+vamin_f32
+vaminq_f32
+vaminq_f64
+vamax_f16
+vamaxq_f16
+vamax_f32
+vamaxq_f32
+vamaxq_f64
+vscale_f16
+vscale_f32
+vscaleq_f16
+vscaleq_f32
+vscaleq_f64
+vluti2_lane_p16
+vluti2_lane_p8
+vluti2_lane_s16
+vluti2_lane_s8
+vluti2_lane_u16
+vluti2_lane_u8
+vluti2q_lane_p16
+vluti2q_lane_p8
+vluti2q_lane_s16
+vluti2q_lane_s8
+vluti2q_lane_u16
+vluti2_laneq_f16
+vluti2_lane_f16
+vluti2_laneq_f16
+vluti2_laneq_p16
+vluti2_laneq_p8
+vluti2_laneq_s16
+vluti2_laneq_s8
+vluti2_laneq_u16
+vluti2_laneq_u8
+vluti2q_lane_f16
+vluti2q_laneq_f16
+vluti2q_laneq_p16
+vluti2q_laneq_p8
+vluti2q_laneq_s16
+vluti2q_laneq_s8
+vluti2q_laneq_u16
+vluti2q_laneq_u8
+vluti2q_lane_u8
+vluti4q_lane_f16_x2
+vluti4q_lane_p16_x2
+vluti4q_lane_p8
+vluti4q_lane_s16_x2
+vluti4q_lane_s8
+vluti4q_lane_u16_x2
+vluti4q_lane_u8
+vluti4q_laneq_f16_x2
+vluti4q_laneq_p16_x2
+vluti4q_laneq_p8
+vluti4q_laneq_s16_x2
+vluti4q_laneq_s8
+vluti4q_laneq_u16_x2
+vluti4q_laneq_u8
+
+# Not implemented in stdarch yet
+vcvtad_s32_f64
+vcvtad_u32_f64
+vcvtd_s32_f64
+vcvtd_u32_f64
+vcvtmd_s32_f64
+vcvtmd_u32_f64
+vcvtnd_s32_f64
+vcvtnd_u32_f64
+vcvtpd_s32_f64
+vcvtpd_u32_f64
+vcvts_s64_f32
+vcvts_u64_f32
+vcvtas_s64_f32
+vcvtas_u64_f32
+vcvtms_s64_f32
+vcvtms_u64_f32
+vcvtns_s64_f32
+vcvtns_u64_f32
+vcvtps_s64_f32
+vcvtps_u64_f32
+
+# Broken in Clang
+vcvth_s16_f16
diff --git a/crates/intrinsic-test/missing_arm.txt b/crates/intrinsic-test/missing_arm.txt
index 04c09a27d9..165b45d50c 100644
--- a/crates/intrinsic-test/missing_arm.txt
+++ b/crates/intrinsic-test/missing_arm.txt
@@ -1,18 +1,3 @@
-# Not implemented in stdarch yet
-vbfdot_f32
-vbfdot_lane_f32
-vbfdot_laneq_f32
-vbfdotq_f32
-vbfdotq_lane_f32
-vbfdotq_laneq_f32
-vbfmlalbq_f32
-vbfmlalbq_lane_f32
-vbfmlalbq_laneq_f32
-vbfmlaltq_f32
-vbfmlaltq_lane_f32
-vbfmlaltq_laneq_f32
-vbfmmlaq_f32
-
 # Implemented in Clang and stdarch for A64 only even though CSV claims A32 support
 vaddq_p64
 vbsl_p64
@@ -134,18 +119,6 @@ vcvth_f16_u32
 vcvth_u32_f16
 vcvth_n_f16_u32
 vcvth_n_u32_f16
-vcvtah_s32_f16
-vcvtah_u32_f16
-vcvtmh_s32_f16
-vcvtmh_u32_f16
-vcvtpq_s16_f16
-vcvtpq_u16_f16
-vcvtp_s16_f16
-vcvtp_u16_f16
-vcvtph_s32_f16
-vcvtph_u32_f16
-vcvtnh_u32_f16
-vcvtnh_s32_f16
 vfmlsl_low_f16
 vfmlslq_low_f16
 vfmlsl_high_f16
@@ -318,3 +291,25 @@ vcvta_s16_f16
 vcvta_u16_f16
 vceqz_f16
 vceqzq_f16
+vcvtah_s32_f16
+vcvtah_u32_f16
+vcvtmh_s32_f16
+vcvtmh_u32_f16
+vcvtpq_s16_f16
+vcvtpq_u16_f16
+vcvtp_s16_f16
+vcvtp_u16_f16
+vcvtph_s32_f16
+vcvtph_u32_f16
+vcvtnh_u32_f16
+vcvtnh_s32_f16
+
+# Missing in Clang
+vusdot_laneq_s32
+vsudot_laneq_s32
+vusdotq_laneq_s32
+vsudotq_laneq_s32
+vdot_laneq_s32
+vdot_laneq_u32
+vdotq_laneq_s32
+vdotq_laneq_u32
diff --git a/crates/intrinsic-test/missing_x86.txt b/crates/intrinsic-test/missing_x86.txt
index 58e37b92a1..f88a125bfd 100644
--- a/crates/intrinsic-test/missing_x86.txt
+++ b/crates/intrinsic-test/missing_x86.txt
@@ -3,17 +3,9 @@
 #__bswap_64
 _bswap64
 
-# Provides pointer to allocated memory, which is difficult to test
-_mm_malloc
-
-# requires target feature 'waitpkg', but would be inlined into function that is compiled without support for 'waitpkg'
-_tpause
-_umwait
-
-# `use of undeclared identifier` error in Clang
+# not present in Clang and Rust
 _bit_scan_forward
 _bit_scan_reverse
-_bswap
 _castf32_u32
 _castf64_u64
 _castu32_f32
@@ -22,569 +14,6 @@ _lrotl
 _lrotr
 _may_i_use_cpu_feature
 _may_i_use_cpu_feature_ext
-_mm256_acos_pd
-_mm256_acos_ph
-_mm256_acos_ps
-_mm256_acosh_pd
-_mm256_acosh_ph
-_mm256_acosh_ps
-_mm256_asin_pd
-_mm256_asin_ph
-_mm256_asin_ps
-_mm256_asinh_pd
-_mm256_asinh_ph
-_mm256_asinh_ps
-_mm256_atan_pd
-_mm256_atan_ps
-_mm256_atan_ph
-_mm256_atan2_pd
-_mm256_atan2_ph
-_mm256_atan2_ps
-_mm256_atanh_pd
-_mm256_atanh_ph
-_mm256_atanh_ps
-_mm256_cbrt_pd
-_mm256_cbrt_ph
-_mm256_cbrt_ps
-_mm256_cdfnorm_pd
-_mm256_cdfnorm_ph
-_mm256_cdfnorm_ps
-_mm256_cdfnorminv_pd
-_mm256_cdfnorminv_ph
-_mm256_cdfnorminv_ps
-_mm256_cexp_ps
-_mm256_cos_pd
-_mm256_cos_ph
-_mm256_cos_ps
-_mm256_cosd_pd
-_mm256_cosd_ph
-_mm256_cosd_ps
-_mm256_cosh_pd
-_mm256_cosh_ph
-_mm256_cosh_ps
-_mm256_csqrt_ps
-_mm256_div_epi16
-_mm256_div_epi32
-_mm256_div_epi64
-_mm256_div_epi8
-_mm256_div_epu16
-_mm256_div_epu32
-_mm256_div_epu64
-_mm256_div_epu8
-_mm256_dpbssd_epi32
-_mm256_dpbssds_epi32
-_mm256_dpbsud_epi32
-_mm256_dpbsuds_epi32
-_mm256_dpbuud_epi32
-_mm256_dpbuuds_epi32
-_mm256_dpwsud_epi32
-_mm256_dpwsuds_epi32
-_mm256_dpwusd_epi32
-_mm256_dpwusds_epi32
-_mm256_dpwuud_epi32
-_mm256_dpwuuds_epi32
-_mm256_erf_pd
-_mm256_erf_ps
-_mm256_erfc_pd
-_mm256_erfc_ph
-_mm256_erfc_ps
-_mm256_erfcinv_pd
-_mm256_erfcinv_ph
-_mm256_erfcinv_ps
-_mm256_erfinv_pd
-_mm256_erfinv_ph
-_mm256_erfinv_ps
-_mm256_exp10_pd
-_mm256_exp10_ph
-_mm256_exp10_ps
-_mm256_exp2_pd
-_mm256_exp2_ph
-_mm256_exp2_ps
-_mm256_exp_pd
-_mm256_exp_ph
-_mm256_exp_ps
-_mm256_expm1_pd
-_mm256_expm1_ph
-_mm256_expm1_ps
-_mm256_hypot_pd
-_mm256_hypot_ph
-_mm256_hypot_ps
-_mm256_idiv_epi32
-_mm256_invcbrt_pd
-_mm256_invcbrt_ph
-_mm256_invcbrt_ps
-_mm256_invsqrt_pd
-_mm256_invsqrt_ph
-_mm256_invsqrt_ps
-_mm256_irem_epi32
-_mm256_log10_pd
-_mm256_log10_ph
-_mm256_log10_ps
-_mm256_log1p_pd
-_mm256_log1p_ph
-_mm256_log1p_ps
-_mm256_log2_pd
-_mm256_log2_ph
-_mm256_log2_ps
-_mm256_log_pd
-_mm256_log_ph
-_mm256_log_ps
-_mm256_logb_pd
-_mm256_logb_ph
-_mm256_logb_ps
-_mm256_clog_ps
-_mm256_madd52hi_avx_epu64
-_mm256_madd52lo_avx_epu64
-_mm256_erf_ph
-_mm256_mask_reduce_add_epi16
-_mm256_mask_reduce_add_epi8
-_mm256_mask_reduce_and_epi16
-_mm256_mask_reduce_and_epi8
-_mm256_mask_reduce_max_epi16
-_mm256_mask_reduce_max_epi8
-_mm256_mask_reduce_max_epu16
-_mm256_mask_reduce_max_epu8
-_mm256_mask_reduce_min_epi16
-_mm256_mask_reduce_min_epi8
-_mm256_mask_reduce_min_epu16
-_mm256_mask_reduce_min_epu8
-_mm256_mask_reduce_mul_epi16
-_mm256_mask_reduce_mul_epi8
-_mm256_mask_reduce_or_epi16
-_mm256_mask_reduce_or_epi8
-_mm512_cosd_ph
-_mm512_cosd_ps
-_mm512_cosh_pd
-_mm512_cosh_ph
-_mm512_cosh_ps
-_mm512_div_epi16
-_mm512_div_epi32
-_mm512_div_epi64
-_mm512_div_epi8
-_mm512_div_epu16
-_mm512_div_epu32
-_mm512_div_epu64
-_mm512_div_epu8
-_mm512_erf_pd
-_mm512_erf_ph
-_mm512_erf_ps
-_mm512_erfc_pd
-_mm512_erfc_ph
-_mm512_erfc_ps
-_mm512_erfcinv_pd
-_mm512_erfcinv_ph
-_mm512_erfcinv_ps
-_mm512_erfinv_pd
-_mm512_erfinv_ph
-_mm512_erfinv_ps
-_mm512_exp10_pd
-_mm512_exp10_ph
-_mm512_exp10_ps
-_mm512_exp2_pd
-_mm512_exp2_ph
-_mm512_exp2_ps
-_mm512_exp_pd
-_mm512_exp_ph
-_mm512_exp_ps
-_mm512_expm1_pd
-_mm512_expm1_ph
-_mm512_expm1_ps
-_mm512_floor_ph
-_mm512_hypot_pd
-_mm512_hypot_ph
-_mm512_hypot_ps
-_mm512_invsqrt_pd
-_mm512_invsqrt_ph
-_mm512_invsqrt_ps
-_mm512_log10_pd
-_mm512_log10_ph
-_mm512_log10_ps
-_mm512_log1p_pd
-_mm512_log1p_ph
-_mm512_log1p_ps
-_mm512_log2_pd
-_mm512_log2_ph
-_mm512_log2_ps
-_mm512_log_pd
-_mm512_log_ph
-_mm512_log_ps
-_mm512_logb_pd
-_mm512_logb_ph
-_mm512_logb_ps
-_mm512_mask_acos_pd
-_mm512_mask_acos_ph
-_mm512_mask_acos_ps
-_mm512_mask_acosh_pd
-_mm512_mask_acosh_ph
-_mm512_mask_acosh_ps
-_mm512_mask_asin_pd
-_mm512_mask_asin_ph
-_mm512_mask_asin_ps
-_mm512_mask_asinh_pd
-_mm512_mask_asinh_ph
-_mm512_mask_asinh_ps
-_mm512_mask_atan2_pd
-_mm512_mask_atan2_ps
-_mm512_mask_atan_pd
-_mm512_mask_atan_ph
-_mm512_mask_atan_ph
-_mm512_mask_atanh_pd
-_mm512_mask_atanh_ph
-_mm512_mask_atanh_ps
-_mm512_mask_cbrt_pd
-_mm512_mask_cbrt_ph
-_mm512_mask_cbrt_ps
-_mm512_mask_cdfnorm_pd
-_mm512_mask_cdfnorm_ph
-_mm512_mask_cdfnorm_ps
-_mm512_mask_cdfnorminv_pd
-_mm512_mask_cdfnorminv_ph
-_mm512_mask_cdfnorminv_ps
-_mm512_mask_ceil_ph
-_mm512_mask_cos_pd
-_mm512_mask_cos_ph
-_mm512_mask_cos_ps
-_mm512_mask_cosd_pd
-_mm512_mask_cosd_ph
-_mm512_mask_cosd_ps
-_mm512_mask_cosh_pd
-_mm512_mask_cosh_ph
-_mm512_mask_cosh_ps
-_mm512_mask_atan_ps
-_mm512_cosd_pd
-_mm512_cos_ps
-_mm512_cos_ph
-_mm512_cos_pd
-_mm512_mask_div_epi32
-_mm512_mask_div_epu32
-_mm512_mask_erf_pd
-_mm512_mask_erf_ph
-_mm512_mask_erf_ps
-_mm512_mask_erfc_pd
-_mm512_mask_erfc_ph
-_mm512_mask_erfc_ps
-_mm512_mask_erfcinv_pd
-_mm512_mask_erfcinv_ph
-_mm512_mask_erfcinv_ps
-_mm512_mask_erfinv_pd
-_mm512_mask_erfinv_ph
-_mm512_mask_erfinv_ps
-_mm512_mask_exp10_pd
-_mm512_mask_exp10_ph
-_mm512_mask_exp10_ps
-_mm512_mask_exp2_pd
-_mm512_mask_exp2_ph
-_mm512_mask_exp2_ps
-_mm512_mask_exp_pd
-_mm512_mask_exp_ph
-_mm512_mask_exp_ps
-_mm512_mask_expm1_pd
-_mm512_mask_expm1_ph
-_mm512_mask_expm1_ps
-_mm512_mask_floor_ph
-_mm512_mask_hypot_pd
-_mm512_mask_hypot_ps
-_mm512_mask_invsqrt_pd
-_mm512_mask_invsqrt_ph
-_mm512_mask_invsqrt_ps
-_mm512_mask_log10_pd
-_mm512_mask_log10_ph
-_mm512_mask_log10_ps
-_mm512_mask_log1p_pd
-_mm512_mask_log1p_ph
-_mm512_mask_log1p_ps
-_mm512_mask_log2_pd
-_mm512_mask_log2_ph
-_mm512_mask_log2_ps
-_mm512_mask_log_pd
-_mm512_mask_log_ph
-_mm512_mask_log_ps
-_mm512_mask_logb_pd
-_mm512_mask_logb_ph
-_mm512_mask_logb_ps
-_mm512_mask_nearbyint_pd
-_mm512_mask_nearbyint_ph
-_mm512_mask_nearbyint_ps
-_mm512_mask_pow_pd
-_mm512_mask_pow_ps
-_mm512_mask_recip_pd
-_mm512_mask_recip_ph
-_mm512_mask_recip_ps
-_mm512_mask_rem_epi32
-_mm512_mask_rem_epu32
-_mm512_mask_rint_pd
-_mm512_mask_rint_ph
-_mm512_mask_rint_ps
-_mm512_mask_sin_pd
-_mm512_mask_sin_ph
-_mm512_mask_sin_ps
-_mm512_mask_sind_pd
-_mm512_mask_sind_ph
-_mm512_mask_sind_ps
-_mm512_mask_sinh_pd
-_mm512_mask_sinh_ph
-_mm512_mask_sinh_ps
-_mm512_mask_svml_round_pd
-_mm512_mask_svml_round_ph
-_mm512_mask_tan_pd
-_mm512_mask_tan_ph
-_mm512_mask_tan_ps
-_mm512_mask_tand_pd
-_mm512_mask_tand_ph
-_mm512_mask_tand_ps
-_mm512_mask_tanh_pd
-_mm512_mask_tanh_ph
-_mm512_mask_tanh_ps
-_mm512_mask_trunc_pd
-_mm512_mask_trunc_ph
-_mm512_mask_trunc_ps
-_mm512_nearbyint_pd
-_mm512_nearbyint_ph
-_mm512_nearbyint_ps
-_mm512_pow_pd
-_mm512_pow_ph
-_mm512_pow_ps
-_mm512_recip_pd
-_mm512_recip_ph
-_mm512_recip_ps
-_mm512_rem_epi16
-_mm512_rem_epi32
-_mm512_rem_epi64
-_mm512_rem_epi8
-_mm512_rem_epu16
-_mm512_rem_epu32
-_mm512_rem_epu64
-_mm512_rem_epu8
-_mm512_rint_pd
-_mm512_rint_ph
-_mm512_rint_ps
-_mm512_sin_pd
-_mm512_sin_ph
-_mm512_sin_ps
-_mm512_sind_pd
-_mm512_sind_ph
-_mm512_sind_ps
-_mm512_sinh_pd
-_mm512_sinh_ph
-_mm512_sinh_ps
-_mm512_svml_round_pd
-_mm512_svml_round_ph
-_mm512_tan_pd
-_mm512_tan_ph
-_mm512_tan_ps
-_mm512_tand_pd
-_mm512_tand_ph
-_mm512_tand_ps
-_mm512_tanh_pd
-_mm512_tanh_ph
-_mm512_tanh_ps
-_mm512_trunc_pd
-_mm512_trunc_ph
-_mm512_trunc_ps
-_mm_acos_pd
-_mm_acos_ph
-_mm_acos_ps
-_mm_acosh_pd
-_mm_acosh_ph
-_mm_acosh_ps
-_mm_asin_pd
-_mm_asin_ph
-_mm_asin_ps
-_mm_asinh_pd
-_mm_asinh_ph
-_mm_asinh_ps
-_mm_atan2_pd
-_mm_atan2_ph
-_mm_atan2_ps
-_mm_atan_pd
-_mm_atan_ph
-_mm_atan_ps
-_mm_atanh_pd
-_mm_atanh_ph
-_mm_atanh_ps
-_mm_cbrt_pd
-_mm_cbrt_ph
-_mm_cbrt_ps
-_mm_cdfnorm_pd
-_mm_cdfnorm_ph
-_mm_cdfnorm_ps
-_mm_cdfnorminv_pd
-_mm_cdfnorminv_ph
-_mm_cdfnorminv_ps
-_mm_cexp_ps
-_mm_clog_ps
-_mm_cos_pd
-_mm_cos_ph
-_mm_cos_ps
-_mm_cosd_pd
-_mm_cosd_ph
-_mm_cosd_ps
-_mm_cosh_pd
-_mm_cosh_ph
-_mm_cosh_ps
-_mm_csqrt_ps
-_mm_cvtsd_si64x
-_mm_cvtsi128_si64x
-_mm_cvtsi64x_sd
-_mm_cvtsi64x_si128
-_mm_cvttsd_si64x
-_mm_div_epi16
-_mm_div_epi32
-_mm_div_epi64
-_mm_div_epi8
-_mm_div_epu16
-_mm_div_epu32
-_mm_div_epu64
-_mm_div_epu8
-_mm_dpbssd_epi32
-_mm_dpbssds_epi32
-_mm_dpbsud_epi32
-_mm_dpbsuds_epi32
-_mm_dpbuud_epi32
-_mm_dpbuuds_epi32
-_mm_dpwsud_epi32
-_mm_dpwsuds_epi32
-_mm_dpwusd_epi32
-_mm_dpwusds_epi32
-_mm_dpwuud_epi32
-_mm_dpwuuds_epi32
-_mm_erf_pd
-_mm_erf_ph
-_mm_erf_ps
-_mm_erfc_pd
-_mm_erfc_ph
-_mm_erfc_ps
-_mm_erfcinv_pd
-_mm_erfcinv_ph
-_mm_erfcinv_ps
-_mm_erfinv_pd
-_mm_erfinv_ph
-_mm_erfinv_ps
-_mm_exp10_pd
-_mm_exp10_ph
-_mm_exp10_ps
-_mm_exp2_pd
-_mm_exp2_ph
-_mm_exp2_ps
-_mm_exp_pd
-_mm_exp_ph
-_mm_exp_ps
-_mm_expm1_pd
-_mm_expm1_ph
-_mm_expm1_ps
-_mm_hypot_pd
-_mm_hypot_ph
-_mm_hypot_ps
-_mm_idiv_epi32
-_mm_invcbrt_pd
-_mm_invcbrt_ph
-_mm_invcbrt_ps
-_mm_invsqrt_pd
-_mm_invsqrt_ph
-_mm_invsqrt_ps
-_mm_irem_epi32
-_mm_log10_pd
-_mm_log10_ph
-_mm_log10_ps
-_mm_log1p_pd
-_mm_log1p_ph
-_mm_log1p_ps
-_mm_log2_pd
-_mm_log2_ph
-_mm_log2_ps
-_mm_log_pd
-_mm_log_ph
-_mm_log_ps
-_mm_logb_pd
-_mm_logb_ph
-_mm_logb_ps
-_mm_madd52hi_avx_epu64
-_mm_madd52lo_avx_epu64
-_mm_mask_reduce_add_epi16
-_mm_mask_reduce_add_epi8
-_mm_mask_reduce_and_epi16
-_mm_mask_reduce_and_epi8
-_mm_mask_reduce_max_epi16
-_mm_mask_reduce_max_epi8
-_mm_mask_reduce_max_epu16
-_mm_mask_reduce_max_epu8
-_mm_mask_reduce_min_epi16
-_mm_mask_reduce_min_epi8
-_mm_mask_reduce_min_epu16
-_mm_mask_reduce_min_epu8
-_mm_mask_reduce_mul_epi16
-_mm_mask_reduce_mul_epi8
-_mm_mask_reduce_or_epi16
-_mm_mask_reduce_or_epi8
-_mm_pow_pd
-_mm_pow_ph
-_mm_pow_ps
-_mm_reduce_add_epi16
-_mm_reduce_add_epi8
-_mm_reduce_and_epi16
-_mm_reduce_and_epi8
-_mm_reduce_max_epi16
-_mm_reduce_max_epi8
-_mm_reduce_max_epu16
-_mm_reduce_max_epu8
-_mm_reduce_min_epi16
-_mm_reduce_min_epi8
-_mm_reduce_min_epu16
-_mm_reduce_min_epu8
-_mm_reduce_mul_epi16
-_mm_reduce_mul_epi8
-_mm_reduce_or_epi16
-_mm_reduce_or_epi8
-_mm_rem_epi16
-_mm_rem_epi32
-_mm_rem_epi64
-_mm_rem_epi8
-_mm_rem_epu16
-_mm_rem_epu32
-_mm_rem_epu64
-_mm_rem_epu8
-_mm_sin_pd
-_mm_sin_ph
-_mm_sin_ps
-_mm_sind_pd
-_mm_sind_ph
-_mm_sind_ps
-_mm_sinh_pd
-_mm_sinh_ph
-_mm_sinh_ps
-_mm_sm3msg1_epi32
-_mm_sm3msg2_epi32
-_mm_sm3rnds2_epi32
-_mm_sm4key4_epi32
-_mm_sm4rnds4_epi32
-_mm_svml_ceil_pd
-_mm_svml_ceil_ph
-_mm_svml_ceil_ps
-_mm_svml_floor_pd
-_mm_svml_floor_ph
-_mm_svml_floor_ps
-_mm_svml_round_pd
-_mm_svml_round_ph
-_mm_svml_round_ps
-_mm_svml_sqrt_pd
-_mm_svml_sqrt_ph
-_mm_svml_sqrt_ps
-_mm_tan_pd
-_mm_tan_ph
-_mm_tan_ps
-_mm_tand_pd
-_mm_tand_ph
-_mm_tand_ps
-_mm_tanh_pd
-_mm_tanh_ph
-_mm_tanh_ps
-_mm_trunc_pd
-_mm_trunc_ph
-_mm_trunc_ps
-_mm_udiv_epi32
-_mm_urem_epi32
-_popcnt32
-_popcnt64
 _rdpmc
 _rotl
 _rotl64
@@ -594,311 +23,45 @@ _rotwl
 _rotwr
 _urdmsr
 
-# Cannot find value in this scope (in Rust testfiles)
+# not present in Clang
+_bswap
+_mm_cvtsd_si64x
+_mm_cvtsi128_si64x
+_mm_cvtsi64x_sd
+_mm_cvtsi64x_si128
+_mm_cvttsd_si64x
+_popcnt32
+_popcnt64
+
+# not present in Rust
+_cvtsh_ss
+_cvtss_sh
+_mm256_set1_pch
 _mm512_set1_pch
-_mm_abs_pi16
-_mm_abs_pi32
-_mm_abs_pi8
-_mm_add_pi16
-_mm_add_pi32
-_mm_add_pi8
-_mm_add_si64
-_mm_adds_pi16
-_mm_adds_pi8
-_mm_adds_pu16
-_mm_adds_pu8
-_mm_alignr_pi8
-_mm_and_si64
-_mm_andnot_si64
-_mm_avg_pu16
-_mm_avg_pu8
-_mm_cmpeq_pi16
-_mm_cmpeq_pi32
-_mm_cmpeq_pi8
-_mm_cmpgt_pi16
-_mm_cmpgt_pi32
-_mm_cmpgt_pi8
-_mm_cvt_pi2ps
-_mm_cvt_ps2pi
-_mm_cvtm64_si64
-_mm_cvtpd_pi32
-_mm_cvtpi16_ps
-_mm_cvtpi32_pd
-_mm_cvtpi32_ps
-_mm_cvtpi32x2_ps
-_mm_cvtpi8_ps
-_mm_cvtps_pi16
-_mm_cvtps_pi32
-_mm_cvtps_pi8
-_mm_cvtpu16_ps
-_mm_cvtpu8_ps
-_mm_cvtsi32_si64
-_mm_cvtsi64_m64
-_mm_cvtsi64_si32
-_mm_cvtt_ps2pi
-_mm_cvttpd_pi32
-_mm512_cbrt_pd
-_mm512_cbrt_ph
-_mm512_cbrt_ps
-_mm512_cdfnorm_pd
-_mm512_cdfnorm_ph
-_mm512_cdfnorm_ps
-_mm512_cdfnorminv_pd
-_mm512_cdfnorminv_ph
-_mm512_cdfnorminv_ps
-_mm512_ceil_pd
-_mm512_ceil_ph
-_mm512_ceil_ps
-_mm512_floor_pd
-_mm512_floor_ps
-_mm512_mask_ceil_pd
-_mm512_mask_ceil_ps
-_mm_max_pi16
-_mm_max_pu8
-_mm_min_pi16
-_mm_min_pu8
-_mm_movemask_pi8
-_mm_movepi64_pi64
-_mm_movpi64_epi64
-_mm_mul_su32
-_mm_mulhi_pi16
-_mm_mulhi_pu16
-_mm_mulhrs_pi16
-_mm_mullo_pi16
-_mm_or_si64
-_mm_packs_pi16
-_mm_packs_pi32
-_mm_packs_pu16
+_mm_malloc
 _mm_popcnt_u32
 _mm_popcnt_u64
-_mm_sad_pu8
-_mm_set1_epi64
 _mm_set1_pch
-_mm_set1_pi16
-_mm_set1_pi32
-_mm_set1_pi8
-_mm_set_epi64
-_mm_set_pi16
-_mm_set_pi32
-_mm_set_pi8
-_mm_setr_epi64
-_mm_setr_pi16
-_mm_setr_pi32
-_mm_setr_pi8
-_mm_shuffle_pi16
-_mm_shuffle_pi8
-_mm_sign_pi16
-_mm_sign_pi32
-_mm_sign_pi8
-_mm_sll_pi16
-_mm_sll_pi32
-_mm_sll_si64
-_mm_slli_pi16
-_mm_slli_pi32
-_mm_slli_si64
-_mm_sra_pi16
-_mm_sra_pi32
-_mm_srai_pi16
-_mm_srai_pi32
-_mm_srl_pi16
-_mm_srl_pi32
-_mm_srl_si64
-_mm_srli_pi16
-_mm_srli_pi32
-_mm_srli_si64
-_mm_sub_pi16
-_mm_sub_pi32
-_mm_sub_pi8
-_mm_sub_si64
-_mm_subs_pi16
-_mm_subs_pi8
-_mm_subs_pu16
-_mm_subs_pu8
-_mm_unpackhi_pi16
-_mm_unpackhi_pi32
-_mm_unpackhi_pi8
-_mm_unpacklo_pi16
-_mm_unpacklo_pi32
-_mm_unpacklo_pi8
-_mm_xor_si64
-_mm256_pow_pd
-_mm256_pow_ph
-_mm256_pow_ps
-_mm256_rem_epi16
-_mm256_rem_epi32
-_mm256_rem_epi64
-_mm256_rem_epi8
-_mm256_rem_epu16
-_mm256_rem_epu32
-_mm256_rem_epu64
-_mm256_rem_epu8
-_mm256_set1_pch
-_mm256_sin_pd
-_mm256_sin_ph
-_mm256_sin_ps
-_mm256_sind_pd
-_mm256_sind_ph
-_mm256_sind_ps
-_mm256_sinh_pd
-_mm256_sinh_ph
-_mm256_sinh_ps
-_mm256_svml_ceil_pd
-_mm256_svml_ceil_ph
-_mm256_svml_ceil_ps
-_mm256_svml_floor_pd
-_mm256_svml_floor_ph
-_mm256_svml_floor_ps
-_mm256_svml_round_pd
-_mm256_svml_round_ph
-_mm256_svml_round_ps
-_mm256_svml_sqrt_pd
-_mm256_svml_sqrt_ph
-_mm256_svml_sqrt_ps
-_mm256_tan_pd
-_mm256_tan_ph
-_mm256_tan_ps
-_mm256_tand_pd
-_mm256_tand_ph
-_mm256_tand_ps
-_mm256_tanh_pd
-_mm256_tanh_ph
-_mm256_tanh_ps
-_mm256_trunc_pd
-_mm256_trunc_ph
-_mm256_trunc_ps
-_mm256_udiv_epi32
-_mm256_urem_epi32
-_mm512_acos_pd
-_mm512_acos_ph
-_mm512_acos_ps
-_mm512_acosh_pd
-_mm512_acosh_ph
-_mm512_acosh_ps
-_mm_cvttps_pi32
-_mm_extract_pi16
-_mm_hadd_pi16
-_mm_hadd_pi32
-_mm_hadds_pi16
-_mm_hsub_pi16
-_mm_hsub_pi32
-_mm_hsubs_pi16
-_mm_insert_pi16
-_mm_madd_pi16
-_mm_maddubs_pi16
-_mm512_asin_pd
-_mm512_asin_ph
-_mm512_asin_ps
-_mm512_asinh_pd
-_mm512_asinh_ph
-_mm512_asinh_ps
-_mm512_atan2_pd
-_mm512_atan2_ph
-_mm512_atan2_ps
-_mm512_atan_pd
-_mm512_atan_ph
-_mm512_atan_ps
-_mm512_atanh_pd
-_mm512_atanh_ph
-_mm512_atanh_ps
-_cvtsh_ss
-_cvtss_sh
-_m_from_int
-_m_from_int64
-_m_packssdw
-_m_packsswb
-_m_packuswb
-_m_paddb
-_m_paddd
-_m_paddsb
-_m_paddsw
-_m_paddusb
-_m_paddusw
-_m_paddw
-_m_pand
-_m_pandn
-_m_pavgb
-_m_pavgw
-_m_pcmpeqb
-_m_pcmpeqd
-_m_pcmpeqw
-_m_pcmpgtb
-_m_pcmpgtd
-_m_pcmpgtw
-_m_pextrw
-_m_pinsrw
-_m_pmaddwd
-_m_pmaxsw
-_m_pmaxub
-_m_pminsw
-_m_pminub
-_m_pmovmskb
-_m_pmulhuw
-_m_pmulhw
-_m_pmullw
-_m_por
-_m_psadbw
-_m_pshufw
-_m_pslld
-_m_pslldi
-_m_psllq
-_m_psllqi
-_m_psllw
-_m_psllwi
-_m_psrad
-_m_psradi
-_m_psraw
-_m_psrawi
-_m_psrld
-_m_psrldi
-_m_psrlq
-_m_psrlqi
-_m_psrlw
-_m_psrlwi
-_m_psubb
-_m_psubd
-_m_psubsb
-_m_psubsw
-_m_psubusb
-_m_psubusw
-_m_psubw
-_m_punpckhbw
-_m_punpckhdq
-_m_punpckhwd
-_m_punpcklbw
-_m_punpckldq
-_m_punpcklwd
-_m_pxor
-_m_to_int
-_m_to_int64
-_mm512_mask_floor_pd
-_mm512_mask_floor_ps
+_tpause
+_umwait
+
+# IMM8 must be an even number in the range `0..=62`
+_mm_sm3rnds2_epi32
 
 # SDE ERROR: Cannot execute XGETBV with ECX != 0
 _xgetbv
 
-# Miscellaneous issues that can be fixed first
-_kshiftli_mask16
-_kshiftli_mask32
-_kshiftli_mask64
-_kshiftli_mask8
-_kshiftri_mask16
-_kshiftri_mask32
-_kshiftri_mask64
-_kshiftri_mask8
+# top bits are undefined, unclear how to test these
 _mm256_castsi128_si256
-_mm256_extract_epi16
-_mm256_extract_epi8
 _mm512_castsi128_si512
 _mm512_castsi256_si512
-# _mm512_conj_pch
+
+# Clang bug
+_mm256_extract_epi16
+_mm256_extract_epi8
 _mm512_mask_reduce_max_pd
 _mm512_mask_reduce_max_ps
 _mm512_mask_reduce_min_pd
 _mm512_mask_reduce_min_ps
-_mm_comineq_sh
 _mm_extract_epi16
 _mm_extract_epi8
-_mm_mask_cvtepi16_epi8
-_mm_mask_cvtpd_epi32
-_mm_mask_cvtpd_ps
-_mm_ucomineq_sh
\ No newline at end of file
diff --git a/crates/intrinsic-test/src/arm/compile.rs b/crates/intrinsic-test/src/arm/compile.rs
index 7da35f9a11..a672da2cc0 100644
--- a/crates/intrinsic-test/src/arm/compile.rs
+++ b/crates/intrinsic-test/src/arm/compile.rs
@@ -15,7 +15,7 @@ pub fn build_cpp_compilation(config: &ProcessedCli) -> Option<CppCompilation> {
         .add_extra_flags(["-ffp-contract=off", "-Wno-narrowing"]);
 
     if !config.target.contains("v7") {
-        command = command.add_arch_flags(["faminmax", "lut", "sha3"]);
+        command = command.add_arch_flags(["faminmax", "lut", "sha3", "fp8"]);
     }
 
     if !cpp_compiler.contains("clang") {
diff --git a/crates/intrinsic-test/src/arm/config.rs b/crates/intrinsic-test/src/arm/config.rs
index a634645969..60bb0ca56c 100644
--- a/crates/intrinsic-test/src/arm/config.rs
+++ b/crates/intrinsic-test/src/arm/config.rs
@@ -38,11 +38,7 @@ std::ostream& operator<<(std::ostream& os, poly128_t value) {
 #endif
 
 std::ostream& operator<<(std::ostream& os, float16_t value) {
-    uint16_t temp = 0;
-    memcpy(&temp, &value, sizeof(float16_t));
-    std::stringstream ss;
-    ss << "0x" << std::setfill('0') << std::setw(4) << std::hex << temp;
-    os << ss.str();
+    os << static_cast<float>(value);
     return os;
 }
 
@@ -52,92 +48,7 @@ std::ostream& operator<<(std::ostream& os, uint8_t value) {
 }
 "#;
 
-// Format f16 values (and vectors containing them) in a way that is consistent with C.
-pub const PLATFORM_RUST_DEFINITIONS: &str = r#"
-/// Used to continue `Debug`ging SIMD types as `MySimd(1, 2, 3, 4)`, as they
-/// were before moving to array-based simd.
-#[inline]
-fn debug_simd_finish<T: core::fmt::Debug, const N: usize>(
-    formatter: &mut core::fmt::Formatter<'_>,
-    type_name: &str,
-    array: &[T; N],
-) -> core::fmt::Result {
-    core::fmt::Formatter::debug_tuple_fields_finish(
-        formatter,
-        type_name,
-        &core::array::from_fn::<&dyn core::fmt::Debug, N, _>(|i| &array[i]),
-    )
-}
-
-#[repr(transparent)]
-struct Hex<T>(T);
-
-impl<T: DebugHexF16> core::fmt::Debug for Hex<T> {
-    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
-        <T as DebugHexF16>::fmt(&self.0, f)
-    }
-}
-
-fn debug_f16<T: DebugHexF16>(x: T) -> impl core::fmt::Debug {
-    Hex(x)
-}
-
-trait DebugHexF16 {
-    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result;
-}
-
-impl DebugHexF16 for f16 {
-    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
-        write!(f, "{:#06x?}", self.to_bits())
-    }
-}
-
-impl DebugHexF16 for float16x4_t {
-    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
-        let array = unsafe { core::mem::transmute::<_, [Hex<f16>; 4]>(*self) };
-        debug_simd_finish(f, "float16x4_t", &array)
-    }
-}
-
-impl DebugHexF16 for float16x8_t {
-    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
-        let array = unsafe { core::mem::transmute::<_, [Hex<f16>; 8]>(*self) };
-        debug_simd_finish(f, "float16x8_t", &array)
-    }
-}
-
-impl DebugHexF16 for float16x4x2_t {
-    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
-        debug_simd_finish(f, "float16x4x2_t", &[Hex(self.0), Hex(self.1)])
-    }
-}
-impl DebugHexF16 for float16x4x3_t {
-    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
-        debug_simd_finish(f, "float16x4x3_t", &[Hex(self.0), Hex(self.1), Hex(self.2)])
-    }
-}
-impl DebugHexF16 for float16x4x4_t {
-    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
-        debug_simd_finish(f, "float16x4x4_t", &[Hex(self.0), Hex(self.1), Hex(self.2), Hex(self.3)])
-    }
-}
-
-impl DebugHexF16 for float16x8x2_t {
-    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
-        debug_simd_finish(f, "float16x8x2_t", &[Hex(self.0), Hex(self.1)])
-    }
-}
-impl DebugHexF16 for float16x8x3_t {
-    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
-        debug_simd_finish(f, "float16x8x3_t", &[Hex(self.0), Hex(self.1), Hex(self.2)])
-    }
-}
-impl DebugHexF16 for float16x8x4_t {
-    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
-        debug_simd_finish(f, "float16x8x4_t", &[Hex(self.0), Hex(self.1), Hex(self.2), Hex(self.3)])
-    }
-}
- "#;
+pub const PLATFORM_RUST_DEFINITIONS: &str = "";
 
 pub const PLATFORM_RUST_CFGS: &str = r#"
 #![cfg_attr(target_arch = "arm", feature(stdarch_arm_neon_intrinsics))]
@@ -147,7 +58,9 @@ pub const PLATFORM_RUST_CFGS: &str = r#"
 #![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_i8mm))]
 #![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_sm4))]
 #![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_ftts))]
-#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_aarch64_jscvt))]
+#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_feat_lut))]
+#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(stdarch_neon_fp8))]
+#![cfg_attr(any(target_arch = "aarch64", target_arch = "arm64ec"), feature(faminmax))]
 #![feature(fmt_helpers_for_derive)]
 #![feature(stdarch_neon_f16)]
 
diff --git a/crates/intrinsic-test/src/arm/json_parser.rs b/crates/intrinsic-test/src/arm/json_parser.rs
index 65c179ef0d..c1563a7364 100644
--- a/crates/intrinsic-test/src/arm/json_parser.rs
+++ b/crates/intrinsic-test/src/arm/json_parser.rs
@@ -12,6 +12,8 @@ use std::path::Path;
 #[serde(deny_unknown_fields)]
 struct ReturnType {
     value: String,
+    #[serde(rename = "element_bit_size")]
+    _element_bit_size: Option<String>,
 }
 
 #[derive(Deserialize, Debug)]
@@ -50,6 +52,8 @@ struct JsonIntrinsic {
     args_prep: Option<HashMap<String, Value>>,
     #[serde(rename = "Architectures")]
     architectures: Vec<String>,
+    #[serde(rename = "instructions")]
+    _instructions: Option<Vec<Vec<String>>>,
 }
 
 pub fn get_neon_intrinsics(
diff --git a/crates/intrinsic-test/src/arm/mod.rs b/crates/intrinsic-test/src/arm/mod.rs
index 7fa5062e86..99c8da854c 100644
--- a/crates/intrinsic-test/src/arm/mod.rs
+++ b/crates/intrinsic-test/src/arm/mod.rs
@@ -48,8 +48,12 @@ impl SupportedArchitectureTest for ArmArchitectureTest {
             .expect("Error parsing input file");
 
         intrinsics.sort_by(|a, b| a.name.cmp(&b.name));
+        intrinsics.dedup();
+
+        let sample_percentage: usize = cli_options.sample_percentage as usize;
+        let sample_size = (intrinsics.len() * sample_percentage) / 100;
 
-        let mut intrinsics = intrinsics
+        let intrinsics = intrinsics
             .into_iter()
             // Not sure how we would compare intrinsic that returns void.
             .filter(|i| i.results.kind() != TypeKind::Void)
@@ -61,8 +65,8 @@ impl SupportedArchitectureTest for ArmArchitectureTest {
             .filter(|i| !i.arguments.iter().any(|a| a.ty.inner_size() == 128))
             .filter(|i| !cli_options.skip.contains(&i.name))
             .filter(|i| !(a32 && i.arch_tags == vec!["A64".to_string()]))
+            .take(sample_size)
             .collect::<Vec<_>>();
-        intrinsics.dedup();
 
         Self {
             intrinsics,
diff --git a/crates/intrinsic-test/src/arm/types.rs b/crates/intrinsic-test/src/arm/types.rs
index 4be8d1e48b..18468bd558 100644
--- a/crates/intrinsic-test/src/arm/types.rs
+++ b/crates/intrinsic-test/src/arm/types.rs
@@ -7,11 +7,10 @@ impl IntrinsicTypeDefinition for ArmIntrinsicType {
     /// Gets a string containing the typename for this type in C format.
     fn c_type(&self) -> String {
         let prefix = self.kind.c_prefix();
-        let const_prefix = if self.constant { "const " } else { "" };
 
         if let (Some(bit_len), simd_len, vec_len) = (self.bit_len, self.simd_len, self.vec_len) {
             match (simd_len, vec_len) {
-                (None, None) => format!("{const_prefix}{prefix}{bit_len}_t"),
+                (None, None) => format!("{prefix}{bit_len}_t"),
                 (Some(simd), None) => format!("{prefix}{bit_len}x{simd}_t"),
                 (Some(simd), Some(vec)) => format!("{prefix}{bit_len}x{simd}x{vec}_t"),
                 (None, Some(_)) => todo!("{self:#?}"), // Likely an invalid case
diff --git a/crates/intrinsic-test/src/common/argument.rs b/crates/intrinsic-test/src/common/argument.rs
index 5fb7d0f210..8ae9869db0 100644
--- a/crates/intrinsic-test/src/common/argument.rs
+++ b/crates/intrinsic-test/src/common/argument.rs
@@ -1,5 +1,6 @@
 use super::cli::Language;
 use super::constraint::Constraint;
+use super::gen_rust::PASSES;
 use super::indentation::Indentation;
 use super::intrinsic_helpers::IntrinsicTypeDefinition;
 
@@ -30,8 +31,7 @@ where
     }
 
     pub fn to_c_type(&self) -> String {
-        let prefix = if self.ty.constant { "const " } else { "" };
-        format!("{prefix}{}", self.ty.c_type())
+        self.ty.c_type()
     }
 
     pub fn generate_name(&self) -> String {
@@ -60,9 +60,15 @@ where
     }
 
     /// The name (e.g. "A_VALS" or "a_vals") for the array of possible test inputs.
-    fn rust_vals_array_name(&self) -> impl std::fmt::Display {
+    pub(crate) fn rust_vals_array_name(&self) -> impl std::fmt::Display {
         if self.ty.is_rust_vals_array_const() {
-            format!("{}_VALS", self.name.to_uppercase())
+            let loads = crate::common::gen_rust::PASSES;
+            format!(
+                "{}_{ty}_{load_size}",
+                self.name.to_uppercase(),
+                ty = self.ty.rust_scalar_type(),
+                load_size = self.ty.num_lanes() * self.ty.num_vectors() + loads - 1,
+            )
         } else {
             format!("{}_vals", self.name.to_lowercase())
         }
@@ -134,20 +140,34 @@ where
         loads: u32,
     ) -> std::io::Result<()> {
         for arg in self.iter().filter(|&arg| !arg.has_constraint()) {
-            writeln!(
-                w,
-                "{indentation}{bind} {name}: [{ty}; {load_size}] = {values};",
-                bind = arg.rust_vals_array_binding(),
-                name = arg.rust_vals_array_name(),
-                ty = arg.ty.rust_scalar_type(),
-                load_size = arg.ty.num_lanes() * arg.ty.num_vectors() + loads - 1,
-                values = arg.ty.populate_random(indentation, loads, &Language::Rust)
-            )?
+            // Constants are defined globally.
+            if arg.ty.is_rust_vals_array_const() {
+                continue;
+            }
+
+            Self::gen_arg_rust(arg, w, indentation, loads)?;
         }
 
         Ok(())
     }
 
+    pub fn gen_arg_rust(
+        arg: &Argument<T>,
+        w: &mut impl std::io::Write,
+        indentation: Indentation,
+        loads: u32,
+    ) -> std::io::Result<()> {
+        writeln!(
+            w,
+            "{indentation}{bind} {name}: [{ty}; {load_size}] = {values};\n",
+            bind = arg.rust_vals_array_binding(),
+            name = arg.rust_vals_array_name(),
+            ty = arg.ty.rust_scalar_type(),
+            load_size = arg.ty.num_lanes() * arg.ty.num_vectors() + loads - 1,
+            values = arg.ty.populate_random(indentation, loads, &Language::Rust)
+        )
+    }
+
     /// Creates a line for each argument that initializes the argument from an array `[arg]_vals` at
     /// an offset `i` using a load intrinsic, in C.
     /// e.g `uint8x8_t a = vld1_u8(&a_vals[i]);`
@@ -156,9 +176,10 @@ where
     pub fn load_values_c(&self, indentation: Indentation) -> String {
         self.iter()
             .filter(|&arg| !arg.has_constraint())
-            .map(|arg| {
+            .enumerate()
+            .map(|(idx, arg)| {
                 format!(
-                    "{indentation}{ty} {name} = cast<{ty}>({load}(&{name}_vals[i]));\n",
+                    "{indentation}{ty} {name} = cast<{ty}>({load}(&{name}_vals[(i+{idx}) % {PASSES}]));\n",
                     ty = arg.to_c_type(),
                     name = arg.generate_name(),
                     load = if arg.is_simd() {
@@ -177,18 +198,22 @@ where
     pub fn load_values_rust(&self, indentation: Indentation) -> String {
         self.iter()
             .filter(|&arg| !arg.has_constraint())
-            .map(|arg| {
-                let load = if arg.is_simd() {
-                    arg.ty.get_load_function(Language::Rust)
+            .enumerate()
+            .map(|(idx, arg)| {
+                if arg.is_simd() {
+                    format!(
+                        "{indentation}let {name} = {load}({vals_name}.as_ptr().add((i+{idx}) % {PASSES}) as _);\n",
+                        name = arg.generate_name(),
+                        vals_name = arg.rust_vals_array_name(),
+                        load = arg.ty.get_load_function(Language::Rust),
+                    )
                 } else {
-                    "*".to_string()
-                };
-                let typecast = if load.len() > 2 { "as _" } else { "" };
-                format!(
-                    "{indentation}let {name} = {load}({vals_name}.as_ptr().offset(i){typecast});\n",
-                    name = arg.generate_name(),
-                    vals_name = arg.rust_vals_array_name(),
-                )
+                    format!(
+                        "{indentation}let {name} = {vals_name}[(i+{idx}) % {PASSES}];\n",
+                        name = arg.generate_name(),
+                        vals_name = arg.rust_vals_array_name(),
+                    )
+                }
             })
             .collect()
     }
diff --git a/crates/intrinsic-test/src/common/cli.rs b/crates/intrinsic-test/src/common/cli.rs
index 461ab542ea..bed8259de8 100644
--- a/crates/intrinsic-test/src/common/cli.rs
+++ b/crates/intrinsic-test/src/common/cli.rs
@@ -7,12 +7,6 @@ pub enum Language {
     C,
 }
 
-pub enum FailureReason {
-    RunC(String),
-    RunRust(String),
-    Difference(String, String, String),
-}
-
 /// Intrinsic test tool
 #[derive(clap::Parser)]
 #[command(
@@ -47,6 +41,10 @@ pub struct Cli {
     #[arg(long, default_value_t = String::from("armv7-unknown-linux-gnueabihf"))]
     pub target: String,
 
+    /// Pass a profile (release, dev)
+    #[arg(long, default_value_t = String::from("release"))]
+    pub profile: String,
+
     /// Set the linker
     #[arg(long)]
     pub linker: Option<String>,
@@ -65,6 +63,7 @@ pub struct ProcessedCli {
     pub cpp_compiler: Option<String>,
     pub runner: String,
     pub target: String,
+    pub profile: String,
     pub linker: Option<String>,
     pub cxx_toolchain_dir: Option<String>,
     pub skip: Vec<String>,
@@ -76,6 +75,7 @@ impl ProcessedCli {
         let filename = cli_options.input;
         let runner = cli_options.runner.unwrap_or_default();
         let target = cli_options.target;
+        let profile = cli_options.profile;
         let linker = cli_options.linker;
         let cxx_toolchain_dir = cli_options.cxx_toolchain_dir;
         let sample_percentage = cli_options.sample_percentage;
@@ -109,6 +109,7 @@ impl ProcessedCli {
             cpp_compiler,
             runner,
             target,
+            profile,
             linker,
             cxx_toolchain_dir,
             skip,
diff --git a/crates/intrinsic-test/src/common/compare.rs b/crates/intrinsic-test/src/common/compare.rs
index 902df94283..c1438d1bbf 100644
--- a/crates/intrinsic-test/src/common/compare.rs
+++ b/crates/intrinsic-test/src/common/compare.rs
@@ -1,7 +1,8 @@
-use super::cli::FailureReason;
+use itertools::Itertools;
 use rayon::prelude::*;
-use std::process::Command;
+use std::{collections::HashMap, process::Command};
 
+pub const INTRINSIC_DELIMITER: &str = "############";
 fn runner_command(runner: &str) -> Command {
     let mut it = runner.split_whitespace();
     let mut cmd = Command::new(it.next().unwrap());
@@ -10,82 +11,134 @@ fn runner_command(runner: &str) -> Command {
     cmd
 }
 
-pub fn compare_outputs(intrinsic_name_list: &Vec<String>, runner: &str, target: &str) -> bool {
-    let intrinsics = intrinsic_name_list
-        .par_iter()
-        .filter_map(|intrinsic_name| {
-            let c = runner_command(runner)
+pub fn compare_outputs(
+    intrinsic_name_list: &Vec<String>,
+    runner: &str,
+    target: &str,
+    profile: &str,
+) -> bool {
+    let profile_dir = match profile {
+        "dev" => "debug",
+        _ => "release",
+    };
+
+    let (c, rust) = rayon::join(
+        || {
+            runner_command(runner)
                 .arg("./intrinsic-test-programs")
-                .arg(intrinsic_name)
                 .current_dir("c_programs")
-                .output();
-
-            let rust = runner_command(runner)
-                .arg(format!("./target/{target}/release/intrinsic-test-programs"))
-                .arg(intrinsic_name)
+                .output()
+        },
+        || {
+            runner_command(runner)
+                .arg(format!(
+                    "./target/{target}/{profile_dir}/intrinsic-test-programs"
+                ))
                 .current_dir("rust_programs")
-                .output();
+                .output()
+        },
+    );
+    let (c, rust) = match (c, rust) {
+        (Ok(c), Ok(rust)) => (c, rust),
+        failure => panic!("Failed to run: {failure:#?}"),
+    };
 
-            let (c, rust) = match (c, rust) {
-                (Ok(c), Ok(rust)) => (c, rust),
-                a => panic!("{a:#?}"),
-            };
+    if !c.status.success() {
+        error!(
+            "Failed to run C program.\nstdout: {stdout}\nstderr: {stderr}",
+            stdout = std::str::from_utf8(&c.stdout).unwrap_or(""),
+            stderr = std::str::from_utf8(&c.stderr).unwrap_or(""),
+        );
+    }
 
-            if !c.status.success() {
-                error!(
-                    "Failed to run C program for intrinsic `{intrinsic_name}`\nstdout: {stdout}\nstderr: {stderr}",
-                    stdout = std::str::from_utf8(&c.stdout).unwrap_or(""),
-                    stderr = std::str::from_utf8(&c.stderr).unwrap_or(""),
-                );
-                return Some(FailureReason::RunC(intrinsic_name.clone()));
-            }
+    if !rust.status.success() {
+        error!(
+            "Failed to run Rust program.\nstdout: {stdout}\nstderr: {stderr}",
+            stdout = std::str::from_utf8(&rust.stdout).unwrap_or(""),
+            stderr = std::str::from_utf8(&rust.stderr).unwrap_or(""),
+        );
+    }
 
-            if !rust.status.success() {
-                error!(
-                    "Failed to run Rust program for intrinsic `{intrinsic_name}`\nstdout: {stdout}\nstderr: {stderr}",
-                    stdout = std::str::from_utf8(&rust.stdout).unwrap_or(""),
-                    stderr = std::str::from_utf8(&rust.stderr).unwrap_or(""),
-                );
-                return Some(FailureReason::RunRust(intrinsic_name.clone()));
-            }
+    info!("Completed running C++ and Rust test binaries");
+    let c = std::str::from_utf8(&c.stdout)
+        .unwrap()
+        .to_lowercase()
+        .replace("-nan", "nan");
+    let rust = std::str::from_utf8(&rust.stdout)
+        .unwrap()
+        .to_lowercase()
+        .replace("-nan", "nan");
+
+    let c_output_map = c
+        .split(INTRINSIC_DELIMITER)
+        .filter_map(|output| output.trim().split_once("\n"))
+        .collect::<HashMap<&str, &str>>();
+    let rust_output_map = rust
+        .split(INTRINSIC_DELIMITER)
+        .filter_map(|output| output.trim().split_once("\n"))
+        .collect::<HashMap<&str, &str>>();
 
-            info!("Comparing intrinsic: {intrinsic_name}");
+    assert!(!c_output_map.is_empty(), "No C intrinsic output found!");
 
-            let c = std::str::from_utf8(&c.stdout)
-                .unwrap()
-                .to_lowercase()
-                .replace("-nan", "nan");
-            let rust = std::str::from_utf8(&rust.stdout)
-                .unwrap()
-                .to_lowercase()
-                .replace("-nan", "nan");
+    let intrinsics = c_output_map
+        .keys()
+        .chain(rust_output_map.keys())
+        .unique()
+        .collect_vec();
 
-            if c == rust {
+    info!("Comparing outputs");
+    let intrinsics_diff_count = intrinsics
+        .par_iter()
+        .filter_map(|&&intrinsic| {
+            let c_output = c_output_map.get(intrinsic).unwrap();
+            let rust_output = rust_output_map.get(intrinsic).unwrap();
+            if rust_output.eq(c_output) {
                 None
             } else {
-                Some(FailureReason::Difference(intrinsic_name.clone(), c, rust))
+                let diff = diff::lines(c_output, rust_output);
+                let diffs = diff
+                    .into_iter()
+                    .filter_map(|diff| match diff {
+                        diff::Result::Left(_) | diff::Result::Right(_) => Some(diff),
+                        diff::Result::Both(_, _) => None,
+                    })
+                    .collect_vec();
+                if diffs.len() > 0 {
+                    Some((intrinsic, diffs))
+                } else {
+                    None
+                }
             }
         })
-        .collect::<Vec<_>>();
+        .inspect(|(intrinsic, diffs)| {
+            use std::io::Write;
+
+            let stdout = std::io::stdout();
+            let mut out = stdout.lock();
 
-    intrinsics.iter().for_each(|reason| match reason {
-        FailureReason::Difference(intrinsic, c, rust) => {
-            println!("Difference for intrinsic: {intrinsic}");
-            let diff = diff::lines(c, rust);
-            diff.iter().for_each(|diff| match diff {
-                diff::Result::Left(c) => println!("C: {c}"),
-                diff::Result::Right(rust) => println!("Rust: {rust}"),
-                diff::Result::Both(_, _) => (),
+            writeln!(out, "Difference for intrinsic: {intrinsic}").unwrap();
+            diffs.into_iter().for_each(|diff| match diff {
+                diff::Result::Left(c) => {
+                    writeln!(out, "C: {c}").unwrap();
+                }
+                diff::Result::Right(rust) => {
+                    writeln!(out, "Rust: {rust}").unwrap();
+                }
+                _ => (),
             });
-            println!("****************************************************************");
-        }
-        FailureReason::RunC(intrinsic) => {
-            println!("Failed to run C program for intrinsic {intrinsic}")
-        }
-        FailureReason::RunRust(intrinsic) => {
-            println!("Failed to run rust program for intrinsic {intrinsic}")
-        }
-    });
-    println!("{} differences found", intrinsics.len());
-    intrinsics.is_empty()
+            writeln!(
+                out,
+                "****************************************************************"
+            )
+            .unwrap();
+        })
+        .count();
+
+    println!(
+        "{} differences found (tested {} intrinsics)",
+        intrinsics_diff_count,
+        intrinsic_name_list.len()
+    );
+
+    intrinsics_diff_count == 0
 }
diff --git a/crates/intrinsic-test/src/common/gen_c.rs b/crates/intrinsic-test/src/common/gen_c.rs
index 04741e4f80..a95b4c36b7 100644
--- a/crates/intrinsic-test/src/common/gen_c.rs
+++ b/crates/intrinsic-test/src/common/gen_c.rs
@@ -1,6 +1,7 @@
 use crate::common::intrinsic::Intrinsic;
 
 use super::argument::Argument;
+use super::compare::INTRINSIC_DELIMITER;
 use super::indentation::Indentation;
 use super::intrinsic_helpers::IntrinsicTypeDefinition;
 
@@ -147,22 +148,17 @@ pub fn write_main_cpp<'a>(
     }
 
     writeln!(w, "int main(int argc, char **argv) {{")?;
-    writeln!(w, "    std::string intrinsic_name = argv[1];")?;
-
-    writeln!(w, "    if (false) {{")?;
 
     for intrinsic in intrinsics {
-        writeln!(w, "    }} else if (intrinsic_name == \"{intrinsic}\") {{")?;
-        writeln!(w, "        return run_{intrinsic}();")?;
+        writeln!(
+            w,
+            "    std::cout << \"{INTRINSIC_DELIMITER}\" << std::endl;"
+        )?;
+        writeln!(w, "    std::cout << \"{intrinsic}\" << std::endl;")?;
+        writeln!(w, "    run_{intrinsic}();\n")?;
     }
 
-    writeln!(w, "    }} else {{")?;
-    writeln!(
-        w,
-        "        std::cerr << \"Unknown command: \" << intrinsic_name << \"\\n\";"
-    )?;
-    writeln!(w, "        return -1;")?;
-    writeln!(w, "    }}")?;
+    writeln!(w, "    return 0;")?;
 
     writeln!(w, "}}")?;
 
diff --git a/crates/intrinsic-test/src/common/gen_rust.rs b/crates/intrinsic-test/src/common/gen_rust.rs
index 27f49a37b1..82b97701bb 100644
--- a/crates/intrinsic-test/src/common/gen_rust.rs
+++ b/crates/intrinsic-test/src/common/gen_rust.rs
@@ -1,24 +1,31 @@
 use itertools::Itertools;
 use std::process::Command;
 
-use crate::common::intrinsic::Intrinsic;
-
+use super::compare::INTRINSIC_DELIMITER;
 use super::indentation::Indentation;
 use super::intrinsic_helpers::IntrinsicTypeDefinition;
+use crate::common::argument::ArgumentList;
+use crate::common::intrinsic::Intrinsic;
 
 // The number of times each intrinsic will be called.
-const PASSES: u32 = 20;
+pub(crate) const PASSES: u32 = 20;
+
+macro_rules! concatln {
+    ($($lines:expr),* $(,)?) => {
+        concat!($( $lines, "\n" ),*)
+    };
+}
 
 fn write_cargo_toml_header(w: &mut impl std::io::Write, name: &str) -> std::io::Result<()> {
     writeln!(
         w,
-        concat!(
-            "[package]\n",
-            "name = \"{name}\"\n",
-            "version = \"{version}\"\n",
-            "authors = [{authors}]\n",
-            "license = \"{license}\"\n",
-            "edition = \"2018\"\n",
+        concatln!(
+            "[package]",
+            "name = \"{name}\"",
+            "version = \"{version}\"",
+            "authors = [{authors}]",
+            "license = \"{license}\"",
+            "edition = \"2018\"",
         ),
         name = name,
         version = env!("CARGO_PKG_VERSION"),
@@ -79,18 +86,12 @@ pub fn write_main_rs<'a>(
 
     writeln!(w, "fn main() {{")?;
 
-    writeln!(w, "    match std::env::args().nth(1).unwrap().as_str() {{")?;
-
     for binary in intrinsics {
-        writeln!(w, "        \"{binary}\" => run_{binary}(),")?;
+        writeln!(w, "    println!(\"{INTRINSIC_DELIMITER}\");")?;
+        writeln!(w, "    println!(\"{binary}\");")?;
+        writeln!(w, "    run_{binary}();\n")?;
     }
 
-    writeln!(
-        w,
-        "        other => panic!(\"unknown intrinsic `{{}}`\", other),"
-    )?;
-
-    writeln!(w, "    }}")?;
     writeln!(w, "}}")?;
 
     Ok(())
@@ -118,6 +119,20 @@ pub fn write_lib_rs<T: IntrinsicTypeDefinition>(
 
     writeln!(w, "{definitions}")?;
 
+    let mut seen = std::collections::HashSet::new();
+
+    for intrinsic in intrinsics {
+        for arg in &intrinsic.arguments.args {
+            if !arg.has_constraint() && arg.ty.is_rust_vals_array_const() {
+                let name = arg.rust_vals_array_name().to_string();
+
+                if seen.insert(name) {
+                    ArgumentList::gen_arg_rust(arg, w, Indentation::default(), PASSES)?;
+                }
+            }
+        }
+    }
+
     for intrinsic in intrinsics {
         crate::common::gen_rust::create_rust_test_module(w, intrinsic)?;
     }
@@ -125,7 +140,12 @@ pub fn write_lib_rs<T: IntrinsicTypeDefinition>(
     Ok(())
 }
 
-pub fn compile_rust_programs(toolchain: Option<&str>, target: &str, linker: Option<&str>) -> bool {
+pub fn compile_rust_programs(
+    toolchain: Option<&str>,
+    target: &str,
+    profile: &str,
+    linker: Option<&str>,
+) -> bool {
     /* If there has been a linker explicitly set from the command line then
      * we want to set it via setting it in the RUSTFLAGS*/
 
@@ -146,7 +166,7 @@ pub fn compile_rust_programs(toolchain: Option<&str>, target: &str, linker: Opti
     if toolchain.is_some_and(|val| !val.is_empty()) {
         cargo_command.arg(toolchain.unwrap());
     }
-    cargo_command.args(["build", "--target", target, "--release"]);
+    cargo_command.args(["build", "--target", target, "--profile", profile]);
 
     let mut rust_flags = "-Cdebuginfo=0".to_string();
     if let Some(linker) = linker {
@@ -232,23 +252,23 @@ pub fn generate_rust_test_loop<T: IntrinsicTypeDefinition>(
         }
     }
 
-    let indentation2 = indentation.nested();
-    let indentation3 = indentation2.nested();
-    writeln!(
+    write!(
         w,
-        "\
-            for (id, f) in specializations {{\n\
-                for i in 0..{passes} {{\n\
-                    unsafe {{\n\
-                        {loaded_args}\
-                        let __return_value = f({args});\n\
-                        println!(\"Result {{id}}-{{}}: {{:?}}\", i + 1, {return_value});\n\
-                    }}\n\
-                }}\n\
-            }}",
-        loaded_args = intrinsic.arguments.load_values_rust(indentation3),
+        concatln!(
+            "    for (id, f) in specializations {{",
+            "        for i in 0..{passes} {{",
+            "            unsafe {{",
+            "{loaded_args}",
+            "                let __return_value = f({args});",
+            "                println!(\"Result {{id}}-{{}}: {{:?}}\", i + 1, {return_value});",
+            "            }}",
+            "        }}",
+            "    }}",
+        ),
+        loaded_args = intrinsic.arguments.load_values_rust(indentation.nest_by(4)),
         args = intrinsic.arguments.as_call_param_rust(),
         return_value = intrinsic.results.print_result_rust(),
+        passes = passes,
     )
 }
 
diff --git a/crates/intrinsic-test/src/common/indentation.rs b/crates/intrinsic-test/src/common/indentation.rs
index 9ee331d7f7..9c2cc886e6 100644
--- a/crates/intrinsic-test/src/common/indentation.rs
+++ b/crates/intrinsic-test/src/common/indentation.rs
@@ -10,6 +10,10 @@ impl Indentation {
     pub fn nested(self) -> Self {
         Self(self.0 + 1)
     }
+
+    pub fn nest_by(&self, additional_levels: u32) -> Self {
+        Self(self.0 + additional_levels)
+    }
 }
 
 impl std::fmt::Display for Indentation {
diff --git a/crates/intrinsic-test/src/common/intrinsic_helpers.rs b/crates/intrinsic-test/src/common/intrinsic_helpers.rs
index c2d66868ce..a14d7ef05f 100644
--- a/crates/intrinsic-test/src/common/intrinsic_helpers.rs
+++ b/crates/intrinsic-test/src/common/intrinsic_helpers.rs
@@ -42,7 +42,7 @@ impl FromStr for TypeKind {
             "uint" | "unsigned" | "UI8" | "UI16" | "UI32" | "UI64" => Ok(Self::Int(Sign::Unsigned)),
             "void" => Ok(Self::Void),
             "MASK" => Ok(Self::Mask),
-            "M64" | "M128" | "M256" | "M512" => Ok(Self::Vector),
+            "M128" | "M256" | "M512" => Ok(Self::Vector),
             _ => Err(format!("Impossible to parse argument kind {s}")),
         }
     }
@@ -368,19 +368,8 @@ pub trait IntrinsicTypeDefinition: Deref<Target = IntrinsicType> {
     /// Generates a std::cout for the intrinsics results that will match the
     /// rust debug output format for the return type. The generated line assumes
     /// there is an int i in scope which is the current pass number.
-    ///
-    /// The `intrinsic-test` crate compares the output of C and Rust intrinsics. Currently, It uses
-    /// a string representation of the output value to compare. In C, f16 values are currently printed
-    /// as hexadecimal integers. Since https://github.com/rust-lang/rust/pull/127013, rust does print
-    /// them as decimal floating point values. To keep the intrinsics tests working, for now, format
-    /// vectors containing f16 values like C prints them.
     fn print_result_rust(&self) -> String {
-        let return_value = match self.kind() {
-            TypeKind::Float if self.inner_size() == 16 => "debug_f16(__return_value)",
-            _ => "format_args!(\"{__return_value:.150?}\")",
-        };
-
-        String::from(return_value)
+        String::from("format_args!(\"{__return_value:.150?}\")")
     }
 
     /// To enable architecture-specific logic
diff --git a/crates/intrinsic-test/src/common/mod.rs b/crates/intrinsic-test/src/common/mod.rs
index d8f06ae238..a1062b3a87 100644
--- a/crates/intrinsic-test/src/common/mod.rs
+++ b/crates/intrinsic-test/src/common/mod.rs
@@ -79,12 +79,16 @@ pub trait SupportedArchitectureTest {
                 trace!("compiling mod_{i}.cpp");
                 if let Some(cpp_compiler) = cpp_compiler_wrapped.as_ref() {
                     let compile_output = cpp_compiler
-                        .compile_object_file(&format!("mod_{i}.cpp"), &format!("mod_{i}.o"));
+                        .compile_object_file(&format!("mod_{i}.cpp"), &format!("mod_{i}.o"))
+                        .map_err(|e| format!("Error compiling mod_{i}.cpp: {e:?}"))?;
+
+                    assert!(
+                        compile_output.status.success(),
+                        "{}",
+                        String::from_utf8_lossy(&compile_output.stderr)
+                    );
 
                     trace!("finished compiling mod_{i}.cpp");
-                    if let Err(compile_error) = compile_output {
-                        return Err(format!("Error compiling mod_{i}.cpp: {compile_error:?}"));
-                    }
                 }
                 Ok(())
             })
@@ -142,6 +146,7 @@ pub trait SupportedArchitectureTest {
         .unwrap();
 
         let target = &self.cli_options().target;
+        let profile = &self.cli_options().profile;
         let toolchain = self.cli_options().toolchain.as_deref();
         let linker = self.cli_options().linker.as_deref();
 
@@ -174,7 +179,7 @@ pub trait SupportedArchitectureTest {
             .collect::<Result<(), std::io::Error>>()
             .unwrap();
 
-        compile_rust_programs(toolchain, target, linker)
+        compile_rust_programs(toolchain, target, profile, linker)
     }
 
     fn compare_outputs(&self) -> bool {
@@ -189,6 +194,7 @@ pub trait SupportedArchitectureTest {
                 &intrinsics_name_list,
                 &self.cli_options().runner,
                 &self.cli_options().target,
+                &self.cli_options().profile,
             )
         } else {
             true
diff --git a/crates/intrinsic-test/src/main.rs b/crates/intrinsic-test/src/main.rs
index ed3a50067d..e5c846877c 100644
--- a/crates/intrinsic-test/src/main.rs
+++ b/crates/intrinsic-test/src/main.rs
@@ -34,7 +34,7 @@ fn run(test_environment: impl SupportedArchitectureTest) {
     if !test_environment.build_rust_file() {
         std::process::exit(3);
     }
-    info!("comaparing outputs");
+    info!("Running binaries");
     if !test_environment.compare_outputs() {
         std::process::exit(1);
     }
diff --git a/crates/intrinsic-test/src/x86/compile.rs b/crates/intrinsic-test/src/x86/compile.rs
index 60997a1278..65cd291b1b 100644
--- a/crates/intrinsic-test/src/x86/compile.rs
+++ b/crates/intrinsic-test/src/x86/compile.rs
@@ -25,14 +25,26 @@ pub fn build_cpp_compilation(config: &ProcessedCli) -> Option<CppCompilation> {
             "-mavx512cd",
             "-mavx512fp16",
             "-msha512",
+            "-msm3",
             "-msm4",
             "-mavxvnni",
+            "-mavxvnniint8",
+            "-mavxneconvert",
+            "-mavxifma",
+            "-mavxvnniint16",
+            "-mavx512bf16",
             "-mavx512bitalg",
             "-mavx512ifma",
             "-mavx512vbmi",
             "-mavx512vbmi2",
             "-mavx512vnni",
             "-mavx512vpopcntdq",
+            "-mavx512vp2intersect",
+            "-mbmi",
+            "-mbmi2",
+            "-mgfni",
+            "-mvaes",
+            "-mvpclmulqdq",
             "-ferror-limit=1000",
             "-std=c++23",
         ]);
diff --git a/crates/intrinsic-test/src/x86/config.rs b/crates/intrinsic-test/src/x86/config.rs
index 7c349e4482..491dbb5147 100644
--- a/crates/intrinsic-test/src/x86/config.rs
+++ b/crates/intrinsic-test/src/x86/config.rs
@@ -5,7 +5,7 @@ pub const NOTICE: &str = "\
 
 // Format f16 values (and vectors containing them) in a way that is consistent with C.
 pub const PLATFORM_RUST_DEFINITIONS: &str = r#"
-use std::arch::x86_64::*;
+use core_arch::arch::x86_64::*;
 
 #[inline]
 unsafe fn _mm_loadu_ph_to___m128i(mem_addr: *const f16) -> __m128i {
@@ -142,71 +142,6 @@ fn debug_simd_finish<T: core::fmt::Debug, const N: usize>(
     )
 }
 
-#[repr(transparent)]
-struct Hex<T>(T);
-
-impl<T: DebugHexF16> core::fmt::Debug for Hex<T> {
-    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
-        <T as DebugHexF16>::fmt(&self.0, f)
-    }
-}
-
-fn debug_f16<T: DebugHexF16>(x: T) -> impl core::fmt::Debug {
-    Hex(x)
-}
-
-trait DebugHexF16 {
-    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result;
-}
-
-impl DebugHexF16 for f16 {
-    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
-        write!(f, "{:#06x?}", self.to_bits())
-    }
-}
-
-impl DebugHexF16 for __m128h {
-    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
-        let array = unsafe { core::mem::transmute::<_, [Hex<f16>; 8]>(*self) };
-        debug_simd_finish(f, "__m128h", &array)
-    }
-}
-
-impl DebugHexF16 for __m128i {
-    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
-        let array = unsafe { core::mem::transmute::<_, [Hex<f16>; 8]>(*self) };
-        debug_simd_finish(f, "__m128i", &array)
-    }
-}
-
-impl DebugHexF16 for __m256h {
-    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
-        let array = unsafe { core::mem::transmute::<_, [Hex<f16>; 16]>(*self) };
-        debug_simd_finish(f, "__m256h", &array)
-    }
-}
-
-impl DebugHexF16 for __m256i {
-    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
-        let array = unsafe { core::mem::transmute::<_, [Hex<f16>; 16]>(*self) };
-        debug_simd_finish(f, "__m256i", &array)
-    }
-}
-
-impl DebugHexF16 for __m512h {
-    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
-        let array = unsafe { core::mem::transmute::<_, [Hex<f16>; 32]>(*self) };
-        debug_simd_finish(f, "__m512h", &array)
-    }
-}
-
-impl DebugHexF16 for __m512i {
-    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
-        let array = unsafe { core::mem::transmute::<_, [Hex<f16>; 32]>(*self) };
-        debug_simd_finish(f, "__m512i", &array)
-    }
-}
-
 trait DebugAs<T> {
     fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result;
 }
@@ -232,7 +167,7 @@ macro_rules! impl_debug_as {
     };
 }
 
-impl_debug_as!(__m128i, "__m128i", 128, [u8, i8, u16, i16, u32, i32, u64, i64]);
+impl_debug_as!(__m128i, "__m128i", 128, [u8, i8, u16, i16, u32, i32, u64, i64, f16]);
 impl_debug_as!(__m256i, "__m256i", 256, [u8, i8, u16, i16, u32, i32, u64, i64]);
 impl_debug_as!(__m512i, "__m512i", 512, [u8, i8, u16, i16, u32, i32, u64, i64]);
 impl_debug_as!(__m128h, "__m128h", 128, [f32]);
@@ -280,12 +215,6 @@ pub const PLATFORM_C_FORWARD_DECLARATIONS: &str = r#"
     
     #define _mm512_extract_intrinsic_test_epi64(m, lane) \
         _mm_extract_epi64(_mm512_extracti64x2_epi64((m), (lane) / 2), (lane) % 2)
-    
-    #define _mm64_extract_intrinsic_test_epi8(m, lane) \
-        ((_mm_extract_pi16((m), (lane) / 2) >> (((lane) % 2) * 8)) & 0xFF)
-    
-    #define _mm64_extract_intrinsic_test_epi32(m, lane) \
-        _mm_cvtsi64_si32(_mm_srli_si64(m, (lane) * 32))
         
     // Load f16 (__m128h) and cast to integer (__m128i)
     #define _mm_loadu_ph_to___m128i(mem_addr) _mm_castph_si128(_mm_loadu_ph(mem_addr))
@@ -342,11 +271,7 @@ pub const PLATFORM_C_FORWARD_DECLARATIONS: &str = r#"
 pub const PLATFORM_C_DEFINITIONS: &str = r#"
 
 std::ostream& operator<<(std::ostream& os, _Float16 value) {
-    uint16_t temp = 0;
-    memcpy(&temp, &value, sizeof(_Float16));
-    std::stringstream ss;
-    ss << "0x" << std::setfill('0') << std::setw(4) << std::hex << temp;
-    os << ss.str();
+    os << static_cast<float>(value);
     return os;
 }
 
diff --git a/crates/intrinsic-test/src/x86/mod.rs b/crates/intrinsic-test/src/x86/mod.rs
index 956e51836f..f2baf07071 100644
--- a/crates/intrinsic-test/src/x86/mod.rs
+++ b/crates/intrinsic-test/src/x86/mod.rs
@@ -11,7 +11,6 @@ use crate::common::compile_c::CppCompilation;
 use crate::common::intrinsic::Intrinsic;
 use crate::common::intrinsic_helpers::TypeKind;
 use intrinsic::X86IntrinsicType;
-use itertools::Itertools;
 use xml_parser::get_xml_intrinsics;
 
 pub struct X86ArchitectureTest {
@@ -44,12 +43,16 @@ impl SupportedArchitectureTest for X86ArchitectureTest {
     const PLATFORM_RUST_CFGS: &str = config::PLATFORM_RUST_CFGS;
 
     fn create(cli_options: ProcessedCli) -> Self {
-        let intrinsics =
+        let mut intrinsics =
             get_xml_intrinsics(&cli_options.filename).expect("Error parsing input file");
 
+        intrinsics.sort_by(|a, b| a.name.cmp(&b.name));
+        intrinsics.dedup_by(|a, b| a.name == b.name);
+
         let sample_percentage: usize = cli_options.sample_percentage as usize;
+        let sample_size = (intrinsics.len() * sample_percentage) / 100;
 
-        let mut intrinsics = intrinsics
+        let intrinsics = intrinsics
             .into_iter()
             // Not sure how we would compare intrinsic that returns void.
             .filter(|i| i.results.kind() != TypeKind::Void)
@@ -61,13 +64,9 @@ impl SupportedArchitectureTest for X86ArchitectureTest {
             .filter(|i| !i.arguments.iter().any(|a| a.is_ptr()))
             .filter(|i| !i.arguments.iter().any(|a| a.ty.inner_size() == 128))
             .filter(|i| !cli_options.skip.contains(&i.name))
-            .unique_by(|i| i.name.clone())
+            .take(sample_size)
             .collect::<Vec<_>>();
 
-        let sample_size = (intrinsics.len() * sample_percentage) / 100;
-        intrinsics.truncate(sample_size);
-
-        intrinsics.sort_by(|a, b| a.name.cmp(&b.name));
         Self {
             intrinsics: intrinsics,
             cli_options: cli_options,
diff --git a/crates/intrinsic-test/src/x86/types.rs b/crates/intrinsic-test/src/x86/types.rs
index 87932fcb3e..2391ee9c2d 100644
--- a/crates/intrinsic-test/src/x86/types.rs
+++ b/crates/intrinsic-test/src/x86/types.rs
@@ -115,9 +115,6 @@ impl IntrinsicTypeDefinition for X86IntrinsicType {
             // if "type" starts with __m<num>{h/i/<null>},
             // then use either _mm_set1_epi64,
             // _mm256_set1_epi64 or _mm512_set1_epi64
-            if type_value.contains("__m64") {
-                return String::from("*(__m64*)");
-            }
 
             let type_val_filtered = type_value
                 .chars()
@@ -175,29 +172,7 @@ impl IntrinsicTypeDefinition for X86IntrinsicType {
     /// rust debug output format for the return type. The generated line assumes
     /// there is an int i in scope which is the current pass number.
     fn print_result_c(&self, indentation: Indentation, additional: &str) -> String {
-        let lanes = if self.num_vectors() > 1 {
-            (0..self.num_vectors())
-                .map(|vector| {
-                    format!(
-                        r#""{ty}(" << {lanes} << ")""#,
-                        ty = self.c_single_vector_type(),
-                        lanes = (0..self.num_lanes())
-                            .map(move |idx| -> std::string::String {
-                                format!(
-                                    "{cast}{lane_fn}(__return_value.val[{vector}], {lane})",
-                                    cast = self.generate_final_type_cast(),
-                                    lane_fn = self.get_lane_function(),
-                                    lane = idx,
-                                    vector = vector,
-                                )
-                            })
-                            .collect::<Vec<_>>()
-                            .join(r#" << ", " << "#)
-                    )
-                })
-                .collect::<Vec<_>>()
-                .join(r#" << ", " << "#)
-        } else if self.num_lanes() > 1 {
+        let lanes = if self.num_lanes() > 1 {
             (0..self.num_lanes())
                 .map(|idx| -> std::string::String {
                     let cast_type = self.c_promotion();
@@ -262,9 +237,6 @@ impl IntrinsicTypeDefinition for X86IntrinsicType {
             (Some(16), Some(512)) => String::from("(uint16_t)_mm512_extract_intrinsic_test_epi16"),
             (Some(32), Some(512)) => String::from("(uint32_t)_mm512_extract_intrinsic_test_epi32"),
             (Some(64), Some(512)) => String::from("(uint64_t)_mm512_extract_intrinsic_test_epi64"),
-            (Some(8), Some(64)) => String::from("(uint8_t)_mm64_extract_intrinsic_test_epi8"),
-            (Some(16), Some(64)) => String::from("(uint16_t)_mm_extract_pi16"),
-            (Some(32), Some(64)) => String::from("(uint32_t)_mm64_extract_intrinsic_test_epi32"),
             _ => unreachable!(
                 "invalid length for vector argument: {:?}, {:?}",
                 self.bit_len, self.simd_len
@@ -289,12 +261,9 @@ impl IntrinsicTypeDefinition for X86IntrinsicType {
 
     fn print_result_rust(&self) -> String {
         let return_value = match self.kind() {
-            TypeKind::Float if self.inner_size() == 16 => "debug_f16(__return_value)".to_string(),
-            TypeKind::Float
-                if self.inner_size() == 32
-                    && ["__m512h"].contains(&self.param.type_data.as_str()) =>
-            {
-                "debug_as::<_, f32>(__return_value)".to_string()
+            // `_mm{256}_cvtps_ph` has return type __m128i but contains f16 values
+            TypeKind::Float if self.param.type_data == "__m128i" => {
+                "format_args!(\"{:.150?}\", debug_as::<_, f16>(__return_value))".to_string()
             }
             TypeKind::Int(_)
                 if ["__m128i", "__m256i", "__m512i"].contains(&self.param.type_data.as_str()) =>
@@ -469,6 +438,17 @@ impl X86IntrinsicType {
                     }
                 }
 
+                // a few intrinsics have wrong `etype` field in the XML
+                // - _mm512_reduce_add_ph
+                // - _mm512_reduce_mul_ph
+                // - _mm512_reduce_min_ph
+                // - _mm512_reduce_max_ph
+                // - _mm512_conj_pch
+                if param.type_data == "__m512h" && param.etype == "FP32" {
+                    data.bit_len = Some(16);
+                    data.simd_len = Some(32);
+                }
+
                 let mut result = X86IntrinsicType {
                     data,
                     param: param.clone(),
diff --git a/crates/intrinsic-test/src/x86/xml_parser.rs b/crates/intrinsic-test/src/x86/xml_parser.rs
index af85118b8a..681b1a3c52 100644
--- a/crates/intrinsic-test/src/x86/xml_parser.rs
+++ b/crates/intrinsic-test/src/x86/xml_parser.rs
@@ -30,8 +30,8 @@ struct XMLIntrinsic {
     pub return_data: Parameter,
     #[serde(rename = "@name")]
     pub name: String,
-    // #[serde(rename = "@tech")]
-    // tech: String,
+    #[serde(rename = "@tech")]
+    tech: String,
     #[serde(rename = "CPUID", default)]
     cpuid: Vec<String>,
     #[serde(rename = "parameter", default)]
@@ -65,6 +65,16 @@ pub fn get_xml_intrinsics(
     let parsed_intrinsics: Vec<Intrinsic<X86IntrinsicType>> = data
         .intrinsics
         .into_iter()
+        .filter(|intrinsic| {
+            intrinsic.tech != "SVML"
+                && intrinsic.tech != "MMX"
+                && !intrinsic.cpuid.contains(&"MPX".to_string())
+                && intrinsic.return_data.type_data != "__m64"
+                && !intrinsic
+                    .parameters
+                    .iter()
+                    .any(|param| param.type_data.contains("__m64"))
+        })
         .filter_map(|intr| {
             // Some(xml_to_intrinsic(intr, target).expect("Couldn't parse XML properly!"))
             xml_to_intrinsic(intr).ok()
diff --git a/crates/simd-test-macro/src/lib.rs b/crates/simd-test-macro/src/lib.rs
index b18e2d6b63..9219540a10 100644
--- a/crates/simd-test-macro/src/lib.rs
+++ b/crates/simd-test-macro/src/lib.rs
@@ -7,55 +7,62 @@
 #[macro_use]
 extern crate quote;
 
-use proc_macro2::{Ident, Literal, Span, TokenStream, TokenTree};
+use proc_macro2::{Ident, Span, TokenStream, TokenTree};
 use quote::ToTokens;
 use std::env;
 
-fn string(s: &str) -> TokenTree {
-    Literal::string(s).into()
-}
-
 #[proc_macro_attribute]
 pub fn simd_test(
     attr: proc_macro::TokenStream,
     item: proc_macro::TokenStream,
 ) -> proc_macro::TokenStream {
     let tokens = TokenStream::from(attr).into_iter().collect::<Vec<_>>();
-    if tokens.len() != 3 {
-        panic!("expected #[simd_test(enable = \"feature\")]");
-    }
-    match &tokens[0] {
-        TokenTree::Ident(tt) if *tt == "enable" => {}
-        _ => panic!("expected #[simd_test(enable = \"feature\")]"),
-    }
-    match &tokens[1] {
-        TokenTree::Punct(tt) if tt.as_char() == '=' => {}
-        _ => panic!("expected #[simd_test(enable = \"feature\")]"),
-    }
-    let enable_feature = match &tokens[2] {
-        TokenTree::Literal(tt) => tt.to_string(),
-        _ => panic!("expected #[simd_test(enable = \"feature\")]"),
-    };
-    let enable_feature = enable_feature.trim_start_matches('"').trim_end_matches('"');
-    let target_features: Vec<String> = enable_feature
-        .replace('+', "")
-        .split(',')
-        .map(String::from)
-        .collect();
-
-    let enable_feature = string(enable_feature);
-    let mut item = syn::parse_macro_input!(item as syn::ItemFn);
-    let item_attrs = std::mem::take(&mut item.attrs);
-    let name = &item.sig.ident;
 
     let target = env::var("TARGET").expect(
         "TARGET environment variable should be set for rustc (e.g. TARGET=x86_64-apple-darwin cargo test)"
     );
-    let macro_test = match target
+    let target_arch = target
         .split('-')
         .next()
-        .unwrap_or_else(|| panic!("target triple contained no \"-\": {target}"))
-    {
+        .unwrap_or_else(|| panic!("target triple contained no \"-\": {target}"));
+
+    let (target_features, target_feature_attr) = match &tokens[..] {
+        [] => (Vec::new(), TokenStream::new()),
+        [
+            TokenTree::Ident(enable),
+            TokenTree::Punct(equals),
+            TokenTree::Literal(literal),
+        ] if enable == "enable" && equals.as_char() == '=' => {
+            let mut enable_feature = literal
+                .to_string()
+                .trim_start_matches('"')
+                .trim_end_matches('"')
+                .to_string();
+            let target_features: Vec<_> = enable_feature
+                .replace('+', "")
+                .split(',')
+                .map(String::from)
+                .collect();
+            // Allows using `#[simd_test(enable = "neon")]` on aarch64/armv7 shared tests.
+            if target_arch == "armv7" && target_features.iter().any(|feat| feat == "neon") {
+                enable_feature.push_str(",v7");
+            }
+
+            (
+                target_features,
+                quote! {
+                    #[target_feature(enable = #enable_feature)]
+                },
+            )
+        }
+        _ => panic!("expected #[simd_test(enable = \"feature\")] or #[simd_test]"),
+    };
+
+    let mut item = syn::parse_macro_input!(item as syn::ItemFn);
+    let item_attrs = std::mem::take(&mut item.attrs);
+    let name = &item.sig.ident;
+
+    let macro_test = match target_arch {
         "i686" | "x86_64" | "i586" => "is_x86_feature_detected",
         "arm" | "armv7" | "thumbv7neon" => "is_arm_feature_detected",
         "aarch64" | "arm64ec" | "aarch64_be" => "is_aarch64_feature_detected",
@@ -64,6 +71,7 @@ pub fn simd_test(
         "powerpc64" | "powerpc64le" => "is_powerpc64_feature_detected",
         "loongarch32" | "loongarch64" => "is_loongarch_feature_detected",
         "s390x" => "is_s390x_feature_detected",
+        "mips64" | "mips64el" => "is_mips64_feature_detected",
         t => panic!("unknown target: {t}"),
     };
     let macro_test = Ident::new(macro_test, Span::call_site());
@@ -87,10 +95,20 @@ pub fn simd_test(
 
     let mut detect_missing_features = TokenStream::new();
     for feature in target_features {
-        let q = quote_spanned! {
-            proc_macro2::Span::call_site() =>
-            if !::std::arch::#macro_test!(#feature) {
-                missing_features.push(#feature);
+        let q = if target_arch == "armv7" && feature == "fp16" {
+            // "fp16" cannot be checked at runtime
+            quote_spanned! {
+                proc_macro2::Span::call_site() =>
+                if !cfg!(target_feature = #feature) {
+                    missing_features.push(#feature);
+                }
+            }
+        } else {
+            quote_spanned! {
+                proc_macro2::Span::call_site() =>
+                if !::std::arch::#macro_test!(#feature) {
+                    missing_features.push(#feature);
+                }
             }
         };
         q.to_tokens(&mut detect_missing_features);
@@ -102,6 +120,19 @@ pub fn simd_test(
         TokenStream::new()
     };
 
+    let (const_test, const_stability) = if item.sig.constness.is_some() {
+        (
+            quote! {
+                const _: () = unsafe { #name() };
+            },
+            quote! {
+                #[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")]
+            },
+        )
+    } else {
+        (TokenStream::new(), TokenStream::new())
+    };
+
     let ret: TokenStream = quote_spanned! {
         proc_macro2::Span::call_site() =>
         #[allow(non_snake_case)]
@@ -109,6 +140,8 @@ pub fn simd_test(
         #maybe_ignore
         #(#item_attrs)*
         fn #name() {
+            #const_test
+
             let mut missing_features = ::std::vec::Vec::new();
             #detect_missing_features
             if missing_features.is_empty() {
@@ -118,7 +151,8 @@ pub fn simd_test(
                 ::stdarch_test::assert_skip_test_ok(stringify!(#name), &missing_features);
             }
 
-            #[target_feature(enable = #enable_feature)]
+            #target_feature_attr
+            #const_stability
             #item
         }
     };
diff --git a/crates/stdarch-gen-arm/README.md b/crates/stdarch-gen-arm/README.md
new file mode 100644
index 0000000000..970721681c
--- /dev/null
+++ b/crates/stdarch-gen-arm/README.md
@@ -0,0 +1,297 @@
+# stdarch-gen-arm generator guide
+## Running the generator
+
+Run: `cargo +nightly run --bin=stdarch-gen-arm -- crates/stdarch-gen-arm/spec crates/core_arch/src`
+
+NOTE: If you are running this from rust-lang/rust, you must be in the `library/stdarch`
+working directory.
+
+## Input/Output
+### Input files (intrinsic YAML definitions)
+ - `crates/stdarch-gen-arm/spec/<feature>/*.spec.yml`
+### Output files
+ - Generated intrinsics:
+	 - `crates/core_arch/src/<arch>/<feature>/generated.rs`
+ - Generated load/store tests:
+	 - `crates/core_arch/src/<arch>/<feature>/ld_st_tests_<arch>.rs`
+	 - Only generated when `test: { load: <idx> }` or `test: { store: <idx> }` is set for SVE/SVE2 intrinsics.
+## `.spec.yml` file anatomy
+```
+---
+Configs
+---
+Variable definitions
+---
+
+Intrinsic definitions
+
+---
+```
+- If you're new to YAML syntax, consider [reviewing](https://quickref.me/yaml.html) some of the less obvious syntax and features.
+- For example, mapping an attribute to a sequence can be done in two different ways:
+```yaml
+attribute: [item_a, item_b, item_c]
+```
+or
+```yaml
+attribute:
+    - item_a
+    - item_b
+    - item_c
+``` 
+## Configs
+- Mappings defining top-level settings applied to all intrinsics:
+- `arch_cfgs`
+    - Sequence of mappings specifying `arch_name`, `target_feature` (sequence), and `llvm_prefix`.
+- `uses_neon_types`(_Optional_)
+    - A boolean specifying whether to emit NEON type imports in generated code.
+- `auto_big_endian`(_Optional_)
+    - A boolean specifying whether to auto-generate big-endian shuffles when possible.
+- `auto_llvm_sign_conversion`(_Optional_)
+    - A boolean specifying whether to auto-convert LLVM wrapper args to signed types.
+## Variable definitions
+- Defines YAML anchors/variables to avoid repetition.
+- Commonly used for stability attributes, cfgs and target features.
+## Intrinsic definitions
+### Example
+```yaml
+    - name: "vtst{neon_type[0].no}"
+      doc: "Signed compare bitwise Test bits nonzero"
+      arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+      return_type: "{neon_type[1]}"
+      attr:
+        - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmtst]]}]]
+        - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      safety: safe
+      types:
+        - [int64x1_t, uint64x1_t, 'i64x1', 'i64x1::new(0)']
+        - [int64x2_t, uint64x2_t, 'i64x2', 'i64x2::new(0, 0)']
+        - [poly64x1_t, uint64x1_t, 'i64x1', 'i64x1::new(0)']
+        - [poly64x2_t, uint64x2_t, 'i64x2', 'i64x2::new(0, 0)']
+      compose:
+        - Let: [c, "{neon_type[0]}", {FnCall: [simd_and, [a, b]]}]
+        - Let: [d, "{type[2]}", "{type[3]}"]
+        - FnCall: [simd_ne, [c, {FnCall: [transmute, [d]]}]]
+```
+
+### Explanation of fields
+- `name`
+    - The name of the intrinsic
+    - Often built from a base name followed by a type suffix
+- `doc` (_Optional_)
+    - A string explaining the purpose of the intrinsic
+- `static_defs` (_Optional_)
+    - A sequence of const generics of the format `"const <NAME>: <type>"`
+- `arguments`
+    - A sequence of strings in the format `"<argname>: <argtype>"`
+- `return_type` (_Optional_)
+    - A string specifying the return type. If omitted, the intrinsic returns `()`.
+- `attr` (_Optional_)
+    - A sequence of items defining the attributes to be applied to the intrinsic. Often stability attributes, target features, or `assert_instr` tests. At least one of `attr` or `assert_instr` must be set.
+- `target_features` (_Optional_)
+    - A sequence of target features to enable for this intrinsic (merged with any global `arch_cfgs` settings).
+- `assert_instr` (_Optional_)
+    - A sequence of strings expected to be found in the assembly. Required if `attr` is not set.
+- `safety` (_Optional_)
+    - Use `safe`, or map `unsafe:` to a sequence of unsafety comments:
+        - `custom: "<string>"`
+        - `uninitialized`
+        - `pointer_offset`, `pointer_offset_vnum`, or `dereference` (optionally qualified with `predicated`, `predicated_non_faulting`, or `predicated_first_faulting`)
+        - `unpredictable_on_fault`
+        - `non_temporal`
+        - `neon`
+        - `no_provenance: "<string>"`
+- `substitutions` (_Optional_)
+    - Mappings of custom wildcard names to either `MatchSize` or `MatchKind` expressions
+- `types`
+    - A sequence or sequence of sequences specifying the types to use when producing each intrinsic variant. These sequences can then be indexed by wildcards.
+- `constraints` (_Optional_)
+    - A sequence of mappings. Each specifies a variable and a constraint. The available mappings are:
+    - Assert a variable's value exists in a sequence of i32's
+        - Usage: `{ variable: <name>, any_values: [<i32>,...] }`
+    - Assert a variable's value exists in a range (inclusive)
+        - Usage: `{ variable: <name>, range: [<i32>, <i32>] }`
+    - Assert a variable's value exists in a range via a match (inclusive)
+        - Usage: `{ variable: <name>, range: <MatchSize returning [i32,i32]> }`
+    - Assert a variable's value does not exceed the number of elements in a SVE type `<type>`.
+        - Usage: `{ variable: <name>, sve_max_elems_type: <type> }`
+    - Assert a variable's value does not exceed the number of elements in a vector type `<type>`.
+        - Usage: `{ variable: <name>, vec_max_elems_type: <type> }`
+- `predication_methods` (_Optional_)
+    - Configuration for predicate-form variants. Only used when the intrinsic name includes an `_m*_` wildcard (e.g., `{_mx}`, `{_mxz}`).
+    - `zeroing_method`: Required when requesting `_z`; either `{ drop: <arg> }` to remove an argument and replace it with a zero initialiser, or `{ select: <predicate_var> }` to select zeros into a predicate.
+    - `dont_care_method`: How `_x` should be implemented (`inferred`, `as_zeroing`, or `as_merging`).
+- `compose`
+    - A sequence of expressions that make up the body of the intrinsic
+- `big_endian_inverse` (_Optional_)
+    - A boolean, default false. If true, generates two implementations of each intrinsic variant, one for each endianness, and attempts to automatically generate the required bit swizzles
+- `visibility` (_Optional_)
+    - Function visibility. One of `public` (default) or `private`.
+- `n_variant_op` (_Optional_)
+    - Enables generation of an `_n` variant when the intrinsic name includes the `{_n}` wildcard. Set to the operand name that should be splattered for the `_n` form.
+- `test` (_Optional_)
+	- When set, load/store tests are automatically generated.
+    - A mapping of either `load` or `store` to a number that indexes `types` to specify the type that the test should be addressing in memory. 
+### Expressions
+#### Common
+- `Let`
+    - Defines a variable
+    - Usage: `Let: [<variable>, <type(optional)>, <expression>]`
+- `Const`
+    - Defines a const
+    - Usage: `Const: [<variable>, <type>, <expression>]`
+- `Assign`
+    - Performs variable assignment
+    - Usage: `Assign: [<variable>, <expression>]`
+- `FnCall`
+    - Performs a function call
+    - Usage: `FnCall: [<function pointer: expression>, [<argument: expression>, ... ], [<turbofish argument: expression>, ...](optional), <unsafe wrapper(optional): bool>]`
+- `MacroCall`
+    - Performs a macro call
+    - Usage: `MacroCall: [<macro name>, <token stream>]`
+- `MethodCall`
+    - Performs a method call
+    - Usage: `MethodCall: [<object: expression>, <method name>, [<argument: expression>, ... ]]`
+- `LLVMLink`
+    - Creates an LLVM link and stores the function's name in the wildcard `{llvm_link}` for later use in subsequent expressions.
+    - If left unset, the arguments and return type inherit from the intrinsic's signature by default. The links will also be set automatically if unset.
+    - Usage:
+```yaml
+LLVMLink:
+    name: <name>
+    arguments: [<expression>, ... ](optional)
+    return_type: <return type>(optional)
+    links: (optional)
+        - link: <link>
+          arch: <arch>
+        - ...
+```
+- `Identifier`
+    - Emits a symbol. Prepend with a `$` to treat it as a scope variable, which engages variable tracking and enables inference. For example, `my_function_name` for a generic symbol or `$my_variable` for a variable.
+    - Usage `Identifier: [<symbol name>, <Variable|Symbol>]`
+- `CastAs`
+    - Casts an expression to an unchecked type
+    - Usage: `CastAs: [<expression>, <type>]`
+- `MatchSize`
+    - Allows for conditional generation depending on the size of a specified type
+    - Usage:
+```yaml
+MatchSize:
+    - <type>
+    - default: <expression>
+      byte(optional): <expression>
+      halfword(optional): <expression>
+      doubleword(optional): <expression>
+```
+- `MatchKind`
+    - Allows for conditional generation depending on the kind of a specified type
+```yaml
+MatchKind:
+    - <type>
+    - default: <expression>
+      float(optional): <expression>
+      unsigned(optional): <expression>
+```
+#### Rarely Used
+- `IntConstant`
+    - Constant signed integer expression
+    - Usage: `IntConstant: <i32>`
+- `FloatConstant`
+    - Constant floating-point expression
+    - Usage: `FloatConstant: <f32>`
+- `BoolConstant`
+    - Constant boolean expression
+    - Usage: `BoolConstant: <bool>`
+- `Array`
+    - An array of expressions
+    - Usage: `Array: [<expression>, ...]`
+- `Multiply`
+    - Simply `*`
+    - Usage: `Multiply: [<expression>, <expression>]`
+- `Xor`
+    - Simply `^`
+    - Usage: `Xor: [<expression>, <expression>]`
+- `ConvertConst`
+    - Converts the specified constant to the specified type's kind
+    - Usage: `ConvertConst: [<type>, <i32>]`
+- `Type`
+    - Yields the given type in the Rust representation
+    - Usage: `Type: [<type>]`
+
+### Wildstrings
+- Wildstrings let you take advantage of wildcards.
+- For example, they are often used in intrinsic names `name: "vtst{neon_type[0].no}"`
+- As shown above, wildcards are identified by the surrounding curly brackets.
+- Double curly brackets can be used to escape wildcard functionality if you need literal curly brackets in the generated intrinsic.
+### Wildcards
+Wildcards are heavily used in the spec. They let you write generalised definitions for a group of intrinsics that generate multiple variants. The wildcard itself is replaced with the relevant string in each variant.
+Ignoring endianness, for each row in the `types` field of an intrinsic in the spec, a variant of the intrinsic will be generated. That row's contents can be indexed by the wildcards. Below is the behaviour of each wildcard.
+- `type[<index: usize>]`
+    - Replaced in each variant with the value in the indexed position in the relevant row of the `types` field.
+    - For unnested sequences of `types` (i.e., `types` is a sequence where each element is a single item, not another sequence), the square brackets can be omitted. Simply: `type`
+- `neon_type[<index: usize>]`
+    - Extends the behaviour of `type` with some NEON-specific features and inference.
+    - Tuples: This wildcard can also be written as `neon_type_x<n>` where `n` is in the set `{2,3,4}`. This generates the `n`-tuple variant of the (inferred) NEON type.
+    - Suffixes: These modify the behaviour of the wildcard from simple substitution.
+	    - `no` -  normal behaviour. Tries to do as much work as it can for you, inferring when to emit:
+            - Regular type-size suffixes: `_s8`, `_u16`, `_f32`, ...
+            - `q` variants for double-width (128b) vector types: `q_s8`, `q_u16`, `q_f32`, ...
+            - `_x<n>` variants for tuple vector types: `_s8_x2`, `_u32_x3`, `_f64_x4`, ...
+            - As well as any combination of the above: `q_s16_x16` ...
+    - Most of the other suffixes modify the normal behaviour by disabling features or adding new ones. (See table below)
+- `sve_type[<index: usize>]`
+    - Similar to `neon_type`, but without the suffixes.
+- `size[<index: usize>]`
+    - The size (in bits) of the indexed type.
+- `size_minus_one[<index: usize>]`
+    - Emits the size (in bits) of the indexed type minus one.
+- `size_literal[<index: usize>]`
+    - The literal representation of the indexed type.
+    - `b`: byte, `h`: halfword, `w`: word, or `d`: double.
+- `type_kind[<index: usize>]`
+    - The literal representation of the indexed type's kind.
+    - `f`: float, `s`: signed, `u`: unsigned, `p`: polynomial, `b`: boolean.
+- `size_in_bytes_log2[<index: usize>]`
+    - Log2 of the size of the indexed type in *bytes*.
+- `predicate[<index: usize>]`
+    - SVE predicate vector type inferred from the indexed type.
+- `max_predicate`
+    - The same as predicate, but uses the largest type in the relevant `types` sequence/row.
+- `_n`
+    - Emits the current N-variant suffix when `n_variant_op` is configured.
+- `<wildcard> as <type>`
+    - If `<wildcard>` evaluates to a vector, it produces a vector of the same shape, but with `<type>` as the base type.
+- `llvm_link`
+    - If the `LLVMLink` mapping has been set for an intrinsic, this will give the name of the link.
+- `_m*`
+    - Predicate form masks. Use wildcards such as `{_mx}` or `{_mxz}` to expand merging/don't-care/zeroing variants according to the mask.
+- `<custom>`
+    - You may simply call upon wildcards defined under `substitutions`.
+### neon_type suffixes
+
+| suffix            | implication                                   |
+| ----------------- | --------------------------------------------- |
+| `.no`             | Normal                                        |
+| `.noq`            | Never include `q`s                            |
+| `.nox`            | Never include `_x<n>`s                        |
+| `.N`              | Include `_n_`, e.g., `_n_s8`                  |
+| `.noq_N`          | Include `_n_`, but never `q`s                 |
+| `.dup`            | Include `_dup_`, e.g., `_dup_s8`              |
+| `.dup_nox`        | Include `_dup_` but never `_x<n>`s            |
+| `.lane`           | Include `_lane_`, e.g., `_lane_s8`            |
+| `.lane_nox`       | Include `_lane_`, but never `_x<n>`s          |
+| `.rot90`          | Include `_rot90_`, e.g., `_rot90_s8`          |
+| `.rot180`         | Include `_rot180_`, e.g., `_rot180_s8`        |
+| `.rot270`         | Include `_rot270_`, e.g., `_rot270_s8`        |
+| `.rot90_lane`     | Include `_rot90_lane_`                        |
+| `.rot180_lane`    | Include `_rot180_lane_`                       |
+| `.rot270_lane`    | Include `_rot270_lane_`                       |
+| `.rot90_laneq`    | Include `_rot90_laneq_`                       |
+| `.rot180_laneq`   | Include `_rot180_laneq_`                      |
+| `.rot270_laneq`   | Include `_rot270_laneq_`                      |
+| `.base`           | Produce only the size, e.g., `8`, `16`        |
+| `.u`              | Produce the type's unsigned equivalent        |
+| `.laneq_nox`      | Include `_laneq_`, but never `_x<n>`s         |
+| `.tuple`          | Produce only the size of the tuple, e.g., `3` |
+| `.base_byte_size` | Produce only the size in bytes.               |
+ 
diff --git a/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml b/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
index fe64f9d786..2f7f2fc2b0 100644
--- a/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
+++ b/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
@@ -13,6 +13,10 @@ auto_llvm_sign_conversion: false
 neon-stable: &neon-stable
   FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
 
+# #[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
+neon-stable-fp16: &neon-stable-fp16
+  FnCall: [stable, ['feature = "stdarch_neon_fp16"', 'since = "1.94.0"']]
+
 # #[cfg(not(target_arch = "arm64ec"))]
 target-not-arm64ec: &target-not-arm64ec
   FnCall: [cfg, [{ FnCall: [not, ['target_arch = "arm64ec"']]}]]
@@ -59,17 +63,37 @@ neon-unstable-f16: &neon-unstable-f16
 neon-unstable-feat-lut: &neon-unstable-feat-lut
   FnCall: [unstable, ['feature = "stdarch_neon_feat_lut"', 'issue = "138050"']]
 
-aarch64-unstable-jscvt: &aarch64-unstable-jscvt
-  FnCall: [unstable, ['feature = "stdarch_aarch64_jscvt"', 'issue = "147555"']]
+aarch64-stable-jscvt: &aarch64-stable-jscvt
+  FnCall: [stable, ['feature = "stdarch_aarch64_jscvt"', 'since = "1.95.0"']]
+
+# #[unstable(feature = "stdarch_neon_feat_lrcpc3", issue = "none")]
+neon-unstable-feat-lrcpc3: &neon-unstable-feat-lrcpc3
+  FnCall: [unstable, ['feature = "stdarch_neon_feat_lrcpc3"', 'issue = "none"']]
+
+# #[cfg(target_has_atomic = "64")]
+cfg-target-has-atomic-64: &cfg-target-has-atomic-64
+  FnCall: [cfg, ['target_has_atomic = "64"']]
+
+# #[unstable(feature = "stdarch_neon_fp8", issue = "none")]
+neon-unstable-fp8: &neon-unstable-fp8
+  FnCall: [unstable, ['feature = "stdarch_neon_fp8"', 'issue = "none"']]
+
+# all(test, target_endian = "little")
+all-test-little-endian: &all-test-little-endian
+  FnCall: [all, [test, 'target_endian = "little"']]
 
 # #[cfg(target_endian = "little")]
-little-endian: &little-endian
+cfg-little-endian: &cfg-little-endian
   FnCall: [cfg, ['target_endian = "little"']]
 
 # #[cfg(target_endian = "big")]
-big-endian: &big-endian
+cfg-big-endian: &cfg-big-endian
   FnCall: [cfg, ['target_endian = "big"']]
 
+# all(test, not(target_env = "msvc"), target_endian = "big")
+cfg-test-not-msvc-little-endian: &cfg-test-not-msvc-little-endian
+  FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}, 'target_endian = "little"']]
+
 intrinsics:
   - name: "vaddd_{type}"
     doc: Add
@@ -158,12 +182,12 @@ intrinsics:
       - ['d_f64', 'f64']
     compose:
       - FnCall:
-          - simd_extract!
+          - 'vget_lane_{type[1]}'
           - - FnCall:
                 - "vabd_{type[1]}"
                 - - FnCall: ["vdup_n_{type[1]}", [a]]
                   - FnCall: ["vdup_n_{type[1]}", [b]]
-            - 0
+          - - 0
 
   - name: "vabd{type[0]}"
     doc: "Floating-point absolute difference"
@@ -179,125 +203,28 @@ intrinsics:
       - ['h_f16', 'f16']
     compose:
       - FnCall:
-          - simd_extract!
+          - 'vget_lane_{type[1]}'
           - - FnCall:
                 - "vabd_{type[1]}"
                 - - FnCall: ["vdup_n_{type[1]}", [a]]
                   - FnCall: ["vdup_n_{type[1]}", [b]]
-            - 0
-
-  - name: "vabdl_high{neon_type[0].noq}"
-    doc: Signed Absolute difference Long
-    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
-    return_type: "{neon_type[1]}"
-    attr: [*neon-stable]
-    assert_instr: [sabdl2]
-    safety: safe
-    types:
-      - [int8x16_t, int16x8_t, int8x8_t, uint8x8_t]
-    compose:
-      - Let:
-          - c
-          - "{neon_type[2]}"
-          - FnCall:
-              - simd_shuffle!
-              - - a
-                - a
-                - [8, 9, 10, 11, 12, 13, 14, 15]
-      - Let:
-          - d
-          - "{neon_type[2]}"
-          - FnCall:
-              - simd_shuffle!
-              - - b
-                - b
-                - [8, 9, 10, 11, 12, 13, 14, 15]
-      - Let:
-          - e
-          - "{neon_type[3]}"
-          - FnCall:
-              - simd_cast
-              - - FnCall:
-                    - "vabd_{neon_type[0]}"
-                    - - c
-                      - d
-      - FnCall:
-          - simd_cast
-          - - e
+          - - 0
 
   - name: "vabdl_high{neon_type[0].noq}"
     doc: Signed Absolute difference Long
     arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
     return_type: "{neon_type[1]}"
     attr:
-      - FnCall:
-          - stable
-          - - 'feature = "neon_intrinsics"'
-            - 'since = "1.59.0"'
-    assert_instr: [sabdl2]
+      - *neon-stable
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sabdl2]]}]]
     safety: safe
     types:
+      - [int8x16_t, int16x8_t, int8x8_t, uint8x8_t]
       - [int16x8_t, int32x4_t, int16x4_t, uint16x4_t]
-    compose:
-      - Let:
-          - c
-          - "{neon_type[2]}"
-          - FnCall:
-              - simd_shuffle!
-              - - a
-                - a
-                - [4, 5, 6, 7]
-      - Let:
-          - d
-          - "{neon_type[2]}"
-          - FnCall:
-              - simd_shuffle!
-              - - b
-                - b
-                - [4, 5, 6, 7]
-      - Let:
-          - e
-          - "{neon_type[3]}"
-          - FnCall:
-              - simd_cast
-              - - FnCall:
-                    - "vabd_{neon_type[0]}"
-                    - - c
-                      - d
-      - FnCall:
-          - simd_cast
-          - - e
-
-  - name: "vabdl_high{neon_type[0].noq}"
-    doc: Signed Absolute difference Long
-    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
-    return_type: "{neon_type[1]}"
-    attr:
-      - FnCall:
-          - stable
-          - - 'feature = "neon_intrinsics"'
-            - 'since = "1.59.0"'
-    assert_instr: [sabdl2]
-    safety: safe
-    types:
       - [int32x4_t, int64x2_t, int32x2_t, uint32x2_t]
     compose:
-      - Let:
-          - c
-          - "{neon_type[2]}"
-          - FnCall:
-              - simd_shuffle!
-              - - a
-                - a
-                - [2, 3]
-      - Let:
-          - d
-          - "{neon_type[2]}"
-          - FnCall:
-              - simd_shuffle!
-              - - b
-                - b
-                - [2, 3]
+      - Let: [c, FnCall: ['vget_high_{neon_type[0]}', [a]]]
+      - Let: [d, FnCall: ['vget_high_{neon_type[0]}', [b]]]
       - Let:
           - e
           - "{neon_type[3]}"
@@ -317,7 +244,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmeq]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [uint64x1_t, uint64x1_t]
@@ -335,7 +262,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmeq]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [float64x1_t, uint64x1_t]
@@ -349,19 +276,19 @@ intrinsics:
     return_type: "{type[2]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["s_f32", "f32", "u32"]
       - ["d_f64", "f64", "u64"]
     compose:
       - FnCall:
-          - simd_extract!
+          - 'vget_lane_{type[2]}'
           - - FnCall:
                 - "vceq_{type[1]}"
                 - - FnCall: ["vdup_n_{type[1]}", [a]]
                   - FnCall: ["vdup_n_{type[1]}", [b]]
-            - '0'
+          - - 0
 
 
   - name: "vceq{type[0]}"
@@ -378,12 +305,12 @@ intrinsics:
       - ["h_f16", "f16", "u16"]
     compose:
       - FnCall:
-          - simd_extract!
+          - 'vget_lane_{type[2]}'
           - - FnCall:
                 - "vceq_{type[1]}"
                 - - FnCall: ["vdup_n_{type[1]}", [a]]
                   - FnCall: ["vdup_n_{type[1]}", [b]]
-            - '0'
+          - - 0
 
   - name: "vceqd_{type[0]}"
     doc: "Compare bitwise equal"
@@ -391,7 +318,7 @@ intrinsics:
     return_type: "{type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmp]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["i64", "u64", "s64"]
@@ -410,7 +337,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmtst]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [int64x1_t, uint64x1_t, 'i64x1', 'i64x1::new(0)']
@@ -428,7 +355,7 @@ intrinsics:
     return_type: "{type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tst]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["i64", "u64", "s64"]
@@ -447,7 +374,7 @@ intrinsics:
     return_type: "{type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [suqadd]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["s_s32", "i32", "u32"]
@@ -465,19 +392,19 @@ intrinsics:
     return_type: "{type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [suqadd]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["b_s8", "i8", "u8", "s8"]
       - ["h_s16", "i16", "u16", "s16"]
     compose:
       - FnCall:
-          - simd_extract!
+          - 'vget_lane_{type[3]}'
           - - FnCall:
                 - "vuqadd_{type[3]}"
                 - - FnCall: ["vdup_n_{type[3]}", [a]]
                   - FnCall: ["vdup_n_{type[2]}", [b]]
-            - '0'
+          - - '0'
 
   - name: "vabs{neon_type.no}"
     doc: "Floating-point absolute value"
@@ -485,7 +412,7 @@ intrinsics:
     return_type: "{neon_type}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fabs]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - float64x1_t
@@ -499,7 +426,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmgt]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [int64x1_t, uint64x1_t]
@@ -513,7 +440,7 @@ intrinsics:
     return_type: "{neon_type}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmhi]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - uint64x1_t
@@ -527,7 +454,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmgt]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [float64x1_t, uint64x1_t]
@@ -541,19 +468,19 @@ intrinsics:
     return_type: "{type[2]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["s_f32", "f32", "u32"]
       - ["d_f64", "f64", "u64"]
     compose:
       - FnCall:
-          - 'simd_extract!'
+          - 'vget_lane_{type[2]}'
           - - FnCall:
                 - "vcgt_{type[1]}"
                 - - FnCall: ["vdup_n_{type[1]}", [a]]
                   - FnCall: ["vdup_n_{type[1]}", [b]]
-            - '0'
+          - - '0'
 
 
   - name: "vcgt{type[0]}"
@@ -570,12 +497,12 @@ intrinsics:
       - ["h_f16", "f16", "u16"]
     compose:
       - FnCall:
-          - 'simd_extract!'
+          - 'vget_lane_{type[2]}'
           - - FnCall:
                 - "vcgt_{type[1]}"
                 - - FnCall: ["vdup_n_{type[1]}", [a]]
                   - FnCall: ["vdup_n_{type[1]}", [b]]
-            - '0'
+          - - '0'
 
   - name: "vclt{neon_type[0].no}"
     doc: "Compare signed less than"
@@ -583,7 +510,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmgt]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [int64x1_t, uint64x1_t]
@@ -597,7 +524,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmge]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [int64x1_t, uint64x1_t]
@@ -611,7 +538,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmge]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [float64x1_t, uint64x1_t]
@@ -625,19 +552,19 @@ intrinsics:
     return_type: "{type[2]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["s_f32", "f32", "u32"]
       - ["d_f64", "f64", "u64"]
     compose:
       - FnCall:
-          - simd_extract!
+          - 'vget_lane_{type[2]}'
           - - FnCall:
                 - "vcle_{type[1]}"
                 - - FnCall: ["vdup_n_{type[1]}", [a]]
                   - FnCall: ["vdup_n_{type[1]}", [b]]
-            - '0'
+          - - '0'
 
 
   - name: "vcle{type[0]}"
@@ -654,12 +581,12 @@ intrinsics:
       - ["h_f16", "f16", "u16"]
     compose:
       - FnCall:
-          - simd_extract!
+          - 'vget_lane_{type[2]}'
           - - FnCall:
                 - "vcle_{type[1]}"
                 - - FnCall: ["vdup_n_{type[1]}", [a]]
                   - FnCall: ["vdup_n_{type[1]}", [b]]
-            - '0'
+          - - '0'
 
   - name: "vcge{neon_type[0].no}"
     doc: "Compare signed greater than or equal"
@@ -667,7 +594,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmge]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [int64x1_t, uint64x1_t]
@@ -681,7 +608,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmge]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [int8x8_t, uint8x8_t, i8x8, 'i8x8::new(0, 0, 0, 0, 0, 0, 0, 0)']
@@ -702,7 +629,7 @@ intrinsics:
     return_type: "{type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["i64", "u64"]
@@ -719,7 +646,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmle]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [int8x8_t, uint8x8_t, i8x8, 'i8x8::new(0, 0, 0, 0, 0, 0, 0, 0)']
@@ -743,7 +670,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmle]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [float32x2_t, uint32x2_t, f32x2, 'f32x2::new(0.0, 0.0)']
@@ -763,18 +690,18 @@ intrinsics:
     return_type: "{type[2]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["s_f32", "f32", "u32"]
       - ["d_f64", "f64", "u64"]
     compose:
       - FnCall:
-          - simd_extract!
+          - 'vget_lane_{type[2]}'
           - - FnCall:
                 - "vclez_{type[1]}"
                 - - FnCall: ["vdup_n_{type[1]}", [a]]
-            - '0'
+          - - '0'
 
   - name: "vclez{type[0]}"
     doc: "Floating-point compare less than or equal to zero"
@@ -790,11 +717,11 @@ intrinsics:
       - ["h_f16", "f16", "u16"]
     compose:
       - FnCall:
-          - simd_extract!
+          - 'vget_lane_{type[2]}'
           - - FnCall:
                 - "vclez_{type[1]}"
                 - - FnCall: ["vdup_n_{type[1]}", [a]]
-            - '0'
+          - - '0'
 
   - name: "vcltz{neon_type[0].no}"
     doc: "Compare signed less than zero"
@@ -802,7 +729,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmlt]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [int8x8_t, uint8x8_t, i8x8, 'i8x8::new(0, 0, 0, 0, 0, 0, 0, 0)']
@@ -826,7 +753,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmlt]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [float32x2_t, uint32x2_t, f32x2, 'f32x2::new(0.0, 0.0)']
@@ -846,18 +773,18 @@ intrinsics:
     return_type: "{type[2]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["s_f32", "f32", "u32"]
       - ["d_f64", "f64", "u64"]
     compose:
       - FnCall:
-          - simd_extract!
+          - 'vget_lane_{type[2]}'
           - - FnCall:
                 - "vcltz_{type[1]}"
                 - - FnCall: ["vdup_n_{type[1]}", [a]]
-            - '0'
+          - - '0'
 
   - name: "vcltz{type[0]}"
     doc: "Floating-point compare less than zero"
@@ -873,11 +800,11 @@ intrinsics:
       - ["h_f16", "f16", "u16"]
     compose:
       - FnCall:
-          - simd_extract!
+          - 'vget_lane_{type[2]}'
           - - FnCall:
                 - "vcltz_{type[1]}"
                 - - FnCall: ["vdup_n_{type[1]}", [a]]
-            - '0'
+          - - '0'
 
   - name: "vcltzd_s64"
     doc: "Compare less than zero"
@@ -885,7 +812,7 @@ intrinsics:
     return_type: "{type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [asr]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["i64", "u64"]
@@ -902,7 +829,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [facgt]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [float64x1_t, uint64x1_t]
@@ -920,7 +847,7 @@ intrinsics:
     return_type: "{type[2]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [facgt]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["s_f32", "f32", "u32", i32]
@@ -959,7 +886,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [facge]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [float64x1_t, uint64x1_t]
@@ -977,7 +904,7 @@ intrinsics:
     return_type: "{type[2]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [facge]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["s_f32", "f32", "u32", i32]
@@ -1017,7 +944,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [facgt]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [float64x1_t, uint64x1_t]
@@ -1031,7 +958,7 @@ intrinsics:
     return_type: "{type[2]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [facgt]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["s_f32", "f32", "u32"]
@@ -1060,7 +987,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [facge]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [float64x1_t, uint64x1_t]
@@ -1074,7 +1001,7 @@ intrinsics:
     return_type: "{type[2]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [facge]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["s_f32", "f32", "u32"]
@@ -1103,7 +1030,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [scvtf]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [int64x1_t, float64x1_t]
@@ -1117,7 +1044,7 @@ intrinsics:
     return_type: "{type[2]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [scvtf]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["s_f32", "i32", "f32", s32]
@@ -1131,7 +1058,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ucvtf]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [uint64x1_t, float64x1_t]
@@ -1145,7 +1072,7 @@ intrinsics:
     return_type: "{type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ucvtf]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["u32", "f32", "s_f32"]
@@ -1160,7 +1087,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [scvtf, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['1']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -1298,7 +1225,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [scvtf, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['1']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -1324,7 +1251,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ucvtf, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['1']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -1349,7 +1276,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ucvtf, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['1']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -1373,7 +1300,7 @@ intrinsics:
     return_type: "{type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtzs]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["f32", "i32", "s_s32_f32", "32"]
@@ -1387,7 +1314,7 @@ intrinsics:
     return_type: "{type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtzu]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["f32", "u32", "s_u32_f32"]
@@ -1472,7 +1399,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtl]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [float32x2_t, float64x2_t]
@@ -1484,30 +1411,21 @@ intrinsics:
     arguments: ["a: {neon_type[0]}"]
     return_type: "{neon_type[1]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtl2]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [fcvtl2]]}]]
+      - *neon-stable
     safety: safe
     types:
       - [float32x4_t, float64x2_t]
     compose:
-      - Let:
-          - b
-          - float32x2_t
-          - FnCall:
-              - simd_shuffle!
-              - - a
-                - a
-                - '[2, 3]'
-      - FnCall: [simd_cast, [b]]
+      - FnCall: [simd_cast, [{FnCall: ['vget_high_{neon_type[0]}', [a]]}]]
 
   - name: "vcvt_high_f16_f32"
     doc: "Floating-point convert to lower precision"
     arguments: ["a: {neon_type[1]}", "b: {neon_type[2]}"]
     return_type: "{neon_type[0]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtn2]]}]]
-      - *neon-fp16
-      - *neon-unstable-f16
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [fcvtn2]]}]]
+      - *neon-stable-fp16
       - *target-not-arm64ec
     safety: safe
     types:
@@ -1523,9 +1441,8 @@ intrinsics:
     arguments: ["a: {neon_type[1]}"]
     return_type: "{neon_type[0]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtl2]]}]]
-      - *neon-fp16
-      - *neon-unstable-f16
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [fcvtl2]]}]]
+      - *neon-stable-fp16
       - *target-not-arm64ec
     safety: safe
     types:
@@ -1541,8 +1458,8 @@ intrinsics:
     arguments: ["a: {neon_type[0]}"]
     return_type: "{neon_type[1]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtn]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [fcvtn]]}]]
+      - *neon-stable
     safety: safe
     types:
       - [float64x2_t, float32x2_t]
@@ -1554,25 +1471,24 @@ intrinsics:
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
     return_type: "{neon_type[2]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtn2]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [fcvtn2]]}]]
+      - *neon-stable
     safety: safe
     types:
       - [float32x2_t, float64x2_t, float32x4_t]
     compose:
       - FnCall:
-          - simd_shuffle!
+          - vcombine_f32
           - - a
-            - FnCall: [simd_cast, [b]]
-            - '[0, 1, 2, 3]'
+            - FnCall: [vcvt_f32_f64, [b]]
 
   - name: "vcvtx_f32_f64"
     doc: "Floating-point convert to lower precision narrow, rounding to odd"
     arguments: ["a: {neon_type[0]}"]
     return_type: "{neon_type[1]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtxn]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [fcvtxn]]}]]
+      - *neon-stable
     safety: safe
     types:
       - [float64x2_t, float32x2_t]
@@ -1589,34 +1505,33 @@ intrinsics:
     return_type: "{type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtxn]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["f64", "f32"]
     compose:
       - FnCall:
-          - simd_extract!
+          - 'vget_lane_{type[1]}'
           - - FnCall:
                 - vcvtx_f32_f64
                 - - FnCall: [vdupq_n_f64, [a]]
-            - '0'
+          - - '0'
 
   - name: "vcvtx_high_f32_f64"
     doc: "Floating-point convert to lower precision narrow, rounding to odd"
     arguments: ["a: {type[0]}", "b: {neon_type[1]}"]
     return_type: "{type[2]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtxn2]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [fcvtxn2]]}]]
+      - *neon-stable
     safety: safe
     types:
       - [float32x2_t, float64x2_t, float32x4_t]
     compose:
       - FnCall:
-          - simd_shuffle!
+          - vcombine_f32
           - - a
             - FnCall: [vcvtx_f32_f64, [b]]
-            - '[0, 1, 2, 3]'
 
   - name: "vcvt{type[2]}"
     doc: "Floating-point convert to fixed-point, rounding toward zero"
@@ -1625,7 +1540,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtzs, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['1']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -1648,7 +1563,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtzs, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['1']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -1742,7 +1657,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtzu, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['1']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -1765,7 +1680,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtzu, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['1']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -1787,7 +1702,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtas]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [float32x2_t, int32x2_t, _s32_f32]
@@ -1809,7 +1724,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtas]]}]]
       - *neon-fp16
-      - *neon-unstable-f16
+      - *neon-stable-fp16
       - *target-not-arm64ec
     safety: safe
     types:
@@ -1828,7 +1743,7 @@ intrinsics:
     return_type: "{type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtas]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["f32", "i32", 's_s32_f32']
@@ -1852,9 +1767,9 @@ intrinsics:
       - *target-not-arm64ec
     safety: safe
     types:
+      - ["f16", "u16", 'h_u16_f16']
       - ["f16", "u32", 'h_u32_f16']
       - ["f16", "u64", 'h_u64_f16']
-
     compose:
       - LLVMLink:
           name: "vcvta{type[2]}"
@@ -1874,6 +1789,7 @@ intrinsics:
       - *target-not-arm64ec
     safety: safe
     types:
+      - ["f16", "i16", 'h_s16_f16']
       - ["f16", "i32", 'h_s32_f16']
       - ["f16", "i64", 'h_s64_f16']
     compose:
@@ -1884,44 +1800,13 @@ intrinsics:
             - link: "llvm.aarch64.neon.fcvtas.{type[1]}.{type[0]}"
               arch: aarch64,arm64ec
 
-
-  - name: "vcvta{type[2]}"
-    doc: "Floating-point convert to integer, rounding to nearest with ties to away"
-    arguments: ["a: {type[0]}"]
-    return_type: "{type[1]}"
-    attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtas]]}]]
-      - *neon-fp16
-      - *neon-unstable-f16
-      - *target-not-arm64ec
-    safety: safe
-    types:
-      - ["f16", "i16", 'h_s16_f16', 's32']
-    compose:
-      - 'vcvtah_{type[3]}_f16(a) as i16'
-
   - name: "vcvta{type[2]}"
     doc: "Floating-point convert to integer, rounding to nearest with ties to away"
     arguments: ["a: {type[0]}"]
     return_type: "{type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtau]]}]]
-      - *neon-fp16
-      - *neon-unstable-f16
-      - *target-not-arm64ec
-    safety: safe
-    types:
-      - ["f16", "u16", 'h_u16_f16', 'u32']
-    compose:
-      - 'vcvtah_{type[3]}_f16(a) as u16'
-
-  - name: "vcvta{type[2]}"
-    doc: "Floating-point convert to integer, rounding to nearest with ties to away"
-    arguments: ["a: {type[0]}"]
-    return_type: "{type[1]}"
-    attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtau]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["f32", "u32", 's_u32_f32']
@@ -1939,7 +1824,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtns]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [float32x2_t, int32x2_t]
@@ -1959,7 +1844,7 @@ intrinsics:
     return_type: "{type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtns]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["f32", "i32", 's_s32_f32']
@@ -1979,7 +1864,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtns]]}]]
       - *neon-fp16
-      - *neon-unstable-f16
+      - *neon-stable-fp16
       - *target-not-arm64ec
     safety: safe
     types:
@@ -2000,7 +1885,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtnu]]}]]
       - *neon-fp16
-      - *neon-unstable-f16
+      - *neon-stable-fp16
       - *target-not-arm64ec
     safety: safe
     types:
@@ -2024,6 +1909,7 @@ intrinsics:
       - *target-not-arm64ec
     safety: safe
     types:
+      - ["f16", "i16", 'h']
       - ["f16", "i32", 'h']
       - ["f16", "i64", 'h']
     compose:
@@ -2034,22 +1920,6 @@ intrinsics:
             - link: "llvm.aarch64.neon.fcvtns.{type[1]}.{type[0]}"
               arch: aarch64,arm64ec
 
-  - name: "vcvtn{type[2]}_{type[1]}_{type[0]}"
-    doc: "Floating-point convert to integer, rounding to nearest with ties to even"
-    arguments: ["a: {type[0]}"]
-    return_type: "{type[1]}"
-    attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtns]]}]]
-      - *neon-fp16
-      - *neon-unstable-f16
-      - *target-not-arm64ec
-    safety: safe
-    types:
-      - ["f16", "i16", 'h', 'i32']
-    compose:
-      - 'vcvtnh_{type[3]}_f16(a) as i16'
-
-
   - name: "vcvtn{type[2]}_{type[1]}_{type[0]}"
     doc: "Floating-point convert to unsigned integer, rounding to nearest with ties to even"
     arguments: ["a: {type[0]}"]
@@ -2061,6 +1931,7 @@ intrinsics:
       - *target-not-arm64ec
     safety: safe
     types:
+      - ["f16", "u16", 'h']
       - ["f16", "u32", 'h']
       - ["f16", "u64", 'h']
     compose:
@@ -2071,28 +1942,13 @@ intrinsics:
             - link: "llvm.aarch64.neon.fcvtnu.{type[1]}.{type[0]}"
               arch: aarch64,arm64ec
 
-  - name: "vcvtn{type[2]}_{type[1]}_{type[0]}"
-    doc: "Floating-point convert to unsigned integer, rounding to nearest with ties to even"
-    arguments: ["a: {type[0]}"]
-    return_type: "{type[1]}"
-    attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtnu]]}]]
-      - *neon-fp16
-      - *neon-unstable-f16
-      - *target-not-arm64ec
-    safety: safe
-    types:
-      - ["f16", "u16", 'h', 'u32']
-    compose:
-      - 'vcvtnh_{type[3]}_f16(a) as u16'
-
   - name: "vcvtm{neon_type[1].no}_{neon_type[0]}"
     doc: "Floating-point convert to signed integer, rounding toward minus infinity"
     arguments: ["a: {neon_type[0]}"]
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtms]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [float32x2_t, int32x2_t]
@@ -2114,7 +1970,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtms]]}]]
       - *neon-fp16
-      - *neon-unstable-f16
+      - *neon-stable-fp16
       - *target-not-arm64ec
     safety: safe
     types:
@@ -2135,7 +1991,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtmu]]}]]
       - *neon-fp16
-      - *neon-unstable-f16
+      - *neon-stable-fp16
       - *target-not-arm64ec
     safety: safe
     types:
@@ -2155,7 +2011,7 @@ intrinsics:
     return_type: "{type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtms]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["f32", "i32", 's_s32_f32']
@@ -2173,7 +2029,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtps]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [float32x2_t, int32x2_t]
@@ -2193,7 +2049,7 @@ intrinsics:
     return_type: "{type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtps]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["f32", "i32", 's_s32_f32']
@@ -2211,7 +2067,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtnu]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [float32x2_t, uint32x2_t]
@@ -2231,7 +2087,7 @@ intrinsics:
     return_type: "{type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtnu]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["f32", "u32", 's_u32_f32']
@@ -2249,7 +2105,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtmu]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [float32x2_t, uint32x2_t]
@@ -2269,7 +2125,7 @@ intrinsics:
     return_type: "{type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtmu]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["f32", "u32", s_u32_f32]
@@ -2287,7 +2143,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtpu]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [float32x2_t, uint32x2_t]
@@ -2307,7 +2163,7 @@ intrinsics:
     return_type: "{type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtpu]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["f32", "u32", s_u32_f32, 'i32']
@@ -2330,7 +2186,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtps]]}]]
       - *neon-fp16
-      - *neon-unstable-f16
+      - *neon-stable-fp16
       - *target-not-arm64ec
     safety: safe
     types:
@@ -2351,7 +2207,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtpu]]}]]
       - *neon-fp16
-      - *neon-unstable-f16
+      - *neon-stable-fp16
       - *target-not-arm64ec
     safety: safe
     types:
@@ -2376,6 +2232,7 @@ intrinsics:
       - *target-not-arm64ec
     safety: safe
     types:
+      - ["f16", "i16", 'h']
       - ["f16", "i32", 'h']
       - ["f16", "i64", 'h']
     compose:
@@ -2386,21 +2243,6 @@ intrinsics:
             - link: "llvm.aarch64.neon.fcvtps.{type[1]}.{type[0]}"
               arch: aarch64,arm64ec
 
-  - name: "vcvtp{type[2]}_{type[1]}_{type[0]}"
-    doc: "Floating-point convert to integer, rounding to plus infinity"
-    arguments: ["a: {type[0]}"]
-    return_type: "{type[1]}"
-    attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtps]]}]]
-      - *neon-fp16
-      - *neon-unstable-f16
-      - *target-not-arm64ec
-    safety: safe
-    types:
-      - ["f16", "i16", 'h', 'i32']
-    compose:
-      - 'vcvtph_{type[3]}_f16(a) as i16'
-
   - name: "vcvtp{type[2]}_{type[1]}_{type[0]}"
     doc: "Floating-point convert to unsigned integer, rounding to plus infinity"
     arguments: ["a: {type[0]}"]
@@ -2412,6 +2254,7 @@ intrinsics:
       - *target-not-arm64ec
     safety: safe
     types:
+      - ["f16", "u16", 'h']
       - ["f16", "u32", 'h']
       - ["f16", "u64", 'h']
     compose:
@@ -2422,29 +2265,14 @@ intrinsics:
             - link: "llvm.aarch64.neon.fcvtpu.{type[1]}.{type[0]}"
               arch: aarch64,arm64ec
 
-  - name: "vcvtp{type[2]}_{type[1]}_{type[0]}"
-    doc: "Floating-point convert to unsigned integer, rounding to plus infinity"
-    arguments: ["a: {type[0]}"]
-    return_type: "{type[1]}"
-    attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtpu]]}]]
-      - *neon-fp16
-      - *neon-unstable-f16
-      - *target-not-arm64ec
-    safety: safe
-    types:
-      - ["f16", "u16", 'h', 'u32']
-    compose:
-      - 'vcvtph_{type[3]}_f16(a) as u16'
-
-  - name: "vdup{neon_type.laneq_nox}"
-    doc: "Set all vector lanes to the same value"
-    arguments: ["a: {neon_type}"]
-    return_type: "{neon_type}"
+  - name: "vdup{neon_type.laneq_nox}"
+    doc: "Set all vector lanes to the same value"
+    arguments: ["a: {neon_type}"]
+    return_type: "{neon_type}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [dup, 'N = 1']]}]]
       - FnCall: [rustc_legacy_const_generics, ['1']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -2461,7 +2289,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [dup, 'N = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ['1']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -2478,7 +2306,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'N = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ['1']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -2495,7 +2323,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'N = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ['1']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -2504,7 +2332,7 @@ intrinsics:
       - [float64x1_t, "f64"]
     compose:
       - FnCall: [static_assert!, ['N == 0']]
-      - FnCall: [simd_extract!, [a, 'N as u32']]
+      - FnCall: ['vget{neon_type[0].lane_nox}', [a], [N]]
 
   - name: "vdup_laneq_{neon_type[0]}"
     doc: "Set all vector lanes to the same value"
@@ -2513,7 +2341,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'N = 1']]}]]
       - FnCall: [rustc_legacy_const_generics, ['1']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -2522,8 +2350,8 @@ intrinsics:
     compose:
       - FnCall: [static_assert_uimm_bits!, [N, 1]]
       - FnCall:
-          - "transmute::<{type[2]}, _>"
-          - - FnCall: [simd_extract!, [a, 'N as u32']]
+          - transmute
+          - - FnCall: ['vget{neon_type[0].lane_nox}', [a], [N]]
 
   - name: "vdup{type[2]}"
     doc: "Set all vector lanes to the same value"
@@ -2532,7 +2360,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'N = 1']]}]]
       - FnCall: [rustc_legacy_const_generics, ['1']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -2544,7 +2372,7 @@ intrinsics:
       - [float64x2_t, "f64", d_laneq_f64]
     compose:
       - FnCall: [static_assert_uimm_bits!, [N, 1]]
-      - FnCall: [simd_extract!, [a, 'N as u32']]
+      - FnCall: ['vget{neon_type[0].lane_nox}', [a], [N]]
 
   - name: "vdup{type[2]}"
     doc: "Set all vector lanes to the same value"
@@ -2553,7 +2381,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'N = 4']]}]]
       - FnCall: [rustc_legacy_const_generics, ['1']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -2565,7 +2393,7 @@ intrinsics:
       - [poly16x8_t, "p16", h_laneq_p16]
     compose:
       - FnCall: [static_assert_uimm_bits!, [N, 3]]
-      - FnCall: [simd_extract!, [a, 'N as u32']]
+      - FnCall: ['vget{neon_type[0].lane_nox}', [a], [N]]
 
 
   - name: "vdup{type[2]}"
@@ -2584,7 +2412,7 @@ intrinsics:
       - [float16x4_t, "f16", h_lane_f16]
     compose:
       - FnCall: [static_assert_uimm_bits!, [N, 2]]
-      - FnCall: [simd_extract!, [a, 'N as u32']]
+      - FnCall: ['vget{neon_type[0].lane_nox}', [a], [N]]
 
 
   - name: "vdup{type[2]}"
@@ -2603,7 +2431,7 @@ intrinsics:
       - [float16x8_t, "f16", h_laneq_f16]
     compose:
       - FnCall: [static_assert_uimm_bits!, [N, 4]]
-      - FnCall: [simd_extract!, [a, 'N as u32']]
+      - FnCall: ['vget{neon_type[0].lane_nox}', [a], [N]]
 
 
   - name: "vdup{type[2]}"
@@ -2613,7 +2441,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'N = 8']]}]]
       - FnCall: [rustc_legacy_const_generics, ['1']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -2622,7 +2450,7 @@ intrinsics:
       - [poly8x16_t, "p8", b_laneq_p8]
     compose:
       - FnCall: [static_assert_uimm_bits!, [N, 4]]
-      - FnCall: [simd_extract!, [a, 'N as u32']]
+      - FnCall: ['vget{neon_type[0].lane_nox}', [a], [N]]
 
   - name: "vdup{type[2]}"
     doc: "Set all vector lanes to the same value"
@@ -2631,7 +2459,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['1']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -2643,24 +2471,24 @@ intrinsics:
       - [float32x4_t, "f32", s_laneq_f32]
     compose:
       - FnCall: [static_assert_uimm_bits!, [N, 2]]
-      - FnCall: [simd_extract!, [a, 'N as u32']]
+      - FnCall: ['vget{neon_type[0].lane_nox}', [a], [N]]
 
-  - name: "vext{neon_type[0].no}"
+  - name: "vext{neon_type.no}"
     doc: "Extract vector from pair of vectors"
-    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
-    return_type: "{neon_type[0]}"
+    arguments: ["a: {neon_type}", "b: {neon_type}"]
+    return_type: "{neon_type}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ext, 'N = 1']]}]]
       - FnCall: [rustc_legacy_const_generics, ['2']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
-      - [poly64x2_t, ' static_assert_uimm_bits!(N, 1);', 'unsafe { match N & 0b1 { 0 => simd_shuffle!(a, b, [0, 1]), 1 => simd_shuffle!(a, b, [1, 2]), _ => unreachable_unchecked(), } }']
-      - [float64x2_t, ' static_assert_uimm_bits!(N, 1);', 'unsafe { match N & 0b1 { 0 => simd_shuffle!(a, b, [0, 1]), 1 => simd_shuffle!(a, b, [1, 2]), _ => unreachable_unchecked(), } }']
+      - poly64x2_t
+      - float64x2_t
     compose:
-      - Identifier: ["{type[1]}", Symbol]
-      - Identifier: ["{type[2]}", Symbol]
+      - FnCall: [static_assert_uimm_bits!, [N, 1]]
+      - FnCall: [simd_shuffle!, [a, b, '[N as u32, N as u32 + 1]']]
 
   - name: "vmla{neon_type.no}"
     doc: "Floating-point multiply-add to accumulator"
@@ -2668,7 +2496,7 @@ intrinsics:
     return_type: "{neon_type}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmul]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - float64x1_t
@@ -2681,16 +2509,16 @@ intrinsics:
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[1]}"]
     return_type: "{neon_type[0]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [smlal2]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [smlal2]]}]]
+      - *neon-stable
     safety: safe
     types:
-      - [int16x8_t, int8x16_t, int8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]', '[8, 9, 10, 11, 12, 13, 14, 15]']
-      - [int32x4_t, int16x8_t, int16x4_t, '[4, 5, 6, 7]', '[4, 5, 6, 7]']
-      - [int64x2_t, int32x4_t, int32x2_t, '[2, 3]', '[2, 3]']
+      - [int16x8_t, int8x16_t, int8x8_t]
+      - [int32x4_t, int16x8_t, int16x4_t]
+      - [int64x2_t, int32x4_t, int32x2_t]
     compose:
-      - Let: [b, "{neon_type[2]}", {FnCall: [simd_shuffle!, [b, b, "{type[3]}"]]}]
-      - Let: [c, "{neon_type[2]}", {FnCall: [simd_shuffle!, [c, c, "{type[4]}"]]}]
+      - Let: [b, {FnCall: ['vget_high_{neon_type[1]}', [b]]}]
+      - Let: [c, {FnCall: ['vget_high_{neon_type[1]}', [c]]}]
       - FnCall: ["vmlal_{neon_type[2]}", [a, b, c]]
 
   - name: "vmlal_high_{neon_type[1]}"
@@ -2698,22 +2526,16 @@ intrinsics:
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[1]}"]
     return_type: "{neon_type[0]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [umlal2]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [umlal2]]}]]
+      - *neon-stable
     safety: safe
     types:
-      - [uint16x8_t, uint8x16_t, uint8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]']
-      - [uint32x4_t, uint16x8_t, uint16x4_t, '[4, 5, 6, 7]']
-      - [uint64x2_t, uint32x4_t, uint32x2_t, '[2, 3]']
+      - [uint16x8_t, uint8x16_t, uint8x8_t]
+      - [uint32x4_t, uint16x8_t, uint16x4_t]
+      - [uint64x2_t, uint32x4_t, uint32x2_t]
     compose:
-      - Let:
-          - b
-          - "{neon_type[2]}"
-          - FnCall: [simd_shuffle!, [b, b, "{type[3]}"]]
-      - Let:
-          - c
-          - "{neon_type[2]}"
-          - FnCall: [simd_shuffle!, [c, c, "{type[3]}"]]
+      - Let: [b, {FnCall: ['vget_high_{neon_type[1]}', [b]]}]
+      - Let: [c, {FnCall: ['vget_high_{neon_type[1]}', [c]]}]
       - FnCall: ["vmlal_{neon_type[1]}", [a, b, c]]
 
   - name: "vmlsl_high_{neon_type[1]}"
@@ -2721,22 +2543,16 @@ intrinsics:
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[1]}"]
     return_type: "{neon_type[0]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [smlsl2]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [smlsl2]]}]]
+      - *neon-stable
     safety: safe
     types:
-      - [int16x8_t, int8x16_t, int8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]']
-      - [int32x4_t, int16x8_t, int16x4_t, '[4, 5, 6, 7]']
-      - [int64x2_t, int32x4_t, int32x2_t, '[2, 3]']
+      - [int16x8_t, int8x16_t, int8x8_t]
+      - [int32x4_t, int16x8_t, int16x4_t]
+      - [int64x2_t, int32x4_t, int32x2_t]
     compose:
-      - Let:
-          - b
-          - "{neon_type[2]}"
-          - FnCall: [simd_shuffle!, [b, b, "{type[3]}"]]
-      - Let:
-          - c
-          - "{neon_type[2]}"
-          - FnCall: [simd_shuffle!, [c, c, "{type[3]}"]]
+      - Let: [b, {FnCall: ['vget_high_{neon_type[1]}', [b]]}]
+      - Let: [c, {FnCall: ['vget_high_{neon_type[1]}', [c]]}]
       - FnCall: ["vmlsl_{neon_type[1]}", [a, b, c]]
 
   - name: "vmlsl_high_{neon_type[1]}"
@@ -2744,44 +2560,38 @@ intrinsics:
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[1]}"]
     return_type: "{neon_type[0]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [umlsl2]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [umlsl2]]}]]
+      - *neon-stable
     safety: safe
     types:
-      - [uint16x8_t, uint8x16_t, uint8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]']
-      - [uint32x4_t, uint16x8_t, uint16x4_t, '[4, 5, 6, 7]']
-      - [uint64x2_t, uint32x4_t, uint32x2_t, '[2, 3]']
+      - [uint16x8_t, uint8x16_t, uint8x8_t]
+      - [uint32x4_t, uint16x8_t, uint16x4_t]
+      - [uint64x2_t, uint32x4_t, uint32x2_t]
     compose:
-      - Let: [b, "{neon_type[2]}", {FnCall: [simd_shuffle!, [b, b, "{type[3]}"]]}]
-      - Let: [c, "{neon_type[2]}", {FnCall: [simd_shuffle!, [c, c, "{type[3]}"]]}]
+      - Let: [b, {FnCall: ['vget_high_{neon_type[1]}', [b]]}]
+      - Let: [c, {FnCall: ['vget_high_{neon_type[1]}', [c]]}]
       - FnCall: ["vmlsl_{neon_type[1]}", [a, b, c]]
 
   - name: "vmovn_high{neon_type[1].noq}"
     doc: Extract narrow
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
     return_type: "{neon_type[2]}"
-    attr: [*neon-stable]
-    assert_instr: [xtn2]
+    attr:
+      - *neon-stable
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [xtn2]]}]]
     safety: safe
     types:
-      - [int8x8_t, int16x8_t, int8x16_t, '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
-      - [int16x4_t, int32x4_t, int16x8_t, '[0, 1, 2, 3, 4, 5, 6, 7]']
-      - [int32x2_t, int64x2_t, int32x4_t, '[0, 1, 2, 3]']
-      - [uint8x8_t, uint16x8_t, uint8x16_t, '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
-      - [uint16x4_t, uint32x4_t, uint16x8_t, '[0, 1, 2, 3, 4, 5, 6, 7]']
-      - [uint32x2_t, uint64x2_t, uint32x4_t, '[0, 1, 2, 3]']
+      - [int8x8_t, int16x8_t, int8x16_t]
+      - [int16x4_t, int32x4_t, int16x8_t]
+      - [int32x2_t, int64x2_t, int32x4_t]
+      - [uint8x8_t, uint16x8_t, uint8x16_t]
+      - [uint16x4_t, uint32x4_t, uint16x8_t]
+      - [uint32x2_t, uint64x2_t, uint32x4_t]
     compose:
-      - Let:
-          - c
-          - "{neon_type[0]}"
-          - FnCall:
-              - simd_cast
-              - - b
       - FnCall:
-          - simd_shuffle!
+          - 'vcombine_{neon_type[0]}'
           - - a
-            - c
-            - "{type[3]}"
+            - FnCall: ['simd_cast', [b]]
 
   - name: "vneg{neon_type.no}"
     doc: Negate
@@ -2859,11 +2669,11 @@ intrinsics:
       - [i64, 'd_s64', 's64']
     compose:
       - FnCall:
-          - 'simd_extract!'
+          - 'vget_lane_{type[2]}'
           - - FnCall:
                 - 'vqneg_{type[2]}'
                 - - FnCall: ['vdup_n_{type[2]}', [a]]
-            - 0
+          - - 0
 
   - name: "vqneg{neon_type[0].no}"
     doc: Signed saturating negate
@@ -2940,12 +2750,12 @@ intrinsics:
               - "vdup_n_{type[2]}"
               - - b
       - FnCall:
-          - 'simd_extract!'
+          - 'vget_lane_{type[2]}'
           - - FnCall:
                 - "vqsub_{type[2]}"
                 - - a
                   - b
-            - "0"
+          - - "0"
 
   - name: "vqsub{type[3]}"
     doc: Saturating subtract
@@ -2971,12 +2781,12 @@ intrinsics:
               - "vdup_n_{type[2]}"
               - - b
       - FnCall:
-          - 'simd_extract!'
+          - 'vget_lane_{type[2]}'
           - - FnCall:
                 - "vqsub_{type[2]}"
                 - - a
                   - b
-            - "0"
+          - - "0"
 
   - name: "vrbit{neon_type.no}"
     doc: Reverse bit order
@@ -3034,7 +2844,7 @@ intrinsics:
     return_type: "{neon_type}"
     attr:
       - *neon-fp16
-      - *neon-unstable-f16
+      - *neon-stable-fp16
       - *target-not-arm64ec
     assert_instr: [frintx]
     safety: safe
@@ -3083,7 +2893,7 @@ intrinsics:
     return_type: "{neon_type}"
     attr:
       - *neon-fp16
-      - *neon-unstable-f16
+      - *neon-stable-fp16
       - *target-not-arm64ec
     assert_instr: [frinta]
     safety: safe
@@ -3183,7 +2993,7 @@ intrinsics:
     return_type: "{neon_type}"
     attr:
       - *neon-fp16
-      - *neon-unstable-f16
+      - *neon-stable-fp16
       - *target-not-arm64ec
     assert_instr: [frintm]
     safety: safe
@@ -3233,7 +3043,7 @@ intrinsics:
     return_type: "{neon_type}"
     attr:
       - *neon-fp16
-      - *neon-unstable-f16
+      - *neon-stable-fp16
       - *target-not-arm64ec
     assert_instr: [frintp]
     safety: safe
@@ -3279,7 +3089,7 @@ intrinsics:
     return_type: "{neon_type}"
     attr:
       - *neon-fp16
-      - *neon-unstable-f16
+      - *neon-stable-fp16
       - *target-not-arm64ec
     assert_instr: [frintz]
     safety: safe
@@ -3332,7 +3142,7 @@ intrinsics:
     return_type: "{neon_type}"
     attr:
       - *neon-fp16
-      - *neon-unstable-f16
+      - *neon-stable-fp16
       - *target-not-arm64ec
     assert_instr: [frinti]
     safety: safe
@@ -3425,12 +3235,12 @@ intrinsics:
               - "vdup_n_{type[0]}"
               - - b
       - FnCall:
-          - simd_extract!
+          - 'vget_lane_{type[0]}'
           - - FnCall:
                 - "vqadd_{type[0]}"
                 - - a
                   - b
-            - "0"
+          - - "0"
 
   - name: "vqadd{type[2]}"
     doc: Saturating add
@@ -3456,19 +3266,19 @@ intrinsics:
               - "vdup_n_{type[0]}"
               - - b
       - FnCall:
-          - simd_extract!
+          - 'vget_lane_{type[0]}'
           - - FnCall:
                 - "vqadd_{type[0]}"
                 - - a
                   - b
-            - "0"
+          - - "0"
 
   - name: "vld1{neon_type[1].no}"
     doc: "Load multiple single-element structures to one, two, three, or four registers"
-    arguments: ["a: {type[0]}"]
+    arguments: ["ptr: {type[0]}"]
     return_type: "{neon_type[1]}"
     attr: [*neon-stable]
-    assert_instr: [ld1]
+    assert_instr: [ld]
     safety:
       unsafe: [neon]
     types:
@@ -3479,11 +3289,12 @@ intrinsics:
       - ["*const f64", float64x1x4_t]
       - ["*const f64", float64x2x4_t]
     compose:
-      - LLVMLink:
-          name: "vld1{neon_type[1].no}"
-          links:
-            - link: "llvm.aarch64.neon.ld1x{neon_type[1].tuple}.v{neon_type[1].lane}f{neon_type[1].base}.p0"
-              arch: aarch64,arm64ec
+      - FnCall:
+          - 'crate::ptr::read_unaligned'
+          - - MethodCall:
+                - ptr
+                - cast
+                - []
 
   - name: "vld2{neon_type[1].lane_nox}"
     doc: Load multiple 2-element structures to two registers
@@ -3684,16 +3495,12 @@ intrinsics:
     types:
       - ["*const f64", float64x1x2_t, f64, float64x1_t]
     compose:
-      - LLVMLink:
-          name: "vld2.{neon_type[1]}"
-          arguments:
-            - "ptr: *const {neon_type[3]}"
-          links:
-            - link: "llvm.aarch64.neon.ld2.v{neon_type[1].lane}{type[2]}.p0"
-              arch: aarch64,arm64ec
       - FnCall:
-          - "_vld2{neon_type[1].nox}"
-          - - "a as _"
+          - 'crate::ptr::read_unaligned'
+          - - MethodCall:
+                - a
+                - cast
+                - []
 
   - name: "vld2{neon_type[1].nox}"
     doc: Load multiple 2-element structures to two registers
@@ -3810,7 +3617,7 @@ intrinsics:
     safety:
       unsafe: [neon]
     types:
-      - ['*const i8', int8x16x3_t, int8x16_t, i8, '3']
+      - ['*const i8', int8x16x3_t, int8x16_t, i8, '4']
       - ['*const i64', int64x2x3_t, int64x2_t, i64, '1']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, '{type[4]}']]
@@ -4019,17 +3826,10 @@ intrinsics:
       unsafe: [neon]
     assert_instr: [ld3]
     types:
-      - ['*const i64', int64x2x3_t, '*const int64x2_t', i64]
-      - ['*const f64', float64x2x3_t, '*const float64x2_t', f64]
+      - ['*const i64', int64x2x3_t, i64, "2"]
+      - ['*const f64', float64x2x3_t, f64, "2"]
     compose:
-      - LLVMLink:
-          name: 'vld3{neon_type[1].nox}'
-          arguments:
-            - 'ptr: {type[2]}'
-          links:
-            - link: 'llvm.aarch64.neon.ld3.v{neon_type[1].lane}{type[3]}.p0'
-              arch: aarch64,arm64ec
-      - FnCall: ['_vld3{neon_type[1].nox}', ['a as _']]
+      - FnCall: ["crate::core_arch::macros::deinterleaving_load!", [{ Type: "{type[2]}" }, "{type[3]}", "3", a], [], true]
 
   - name: "vld3{neon_type[1].nox}"
     doc: Load multiple 3-element structures to three registers
@@ -4042,14 +3842,12 @@ intrinsics:
     types:
       - ['*const f64', float64x1x3_t, '*const float64x1_t', f64]
     compose:
-      - LLVMLink:
-          name: 'vld3{neon_type[1].nox}'
-          arguments:
-            - 'ptr: {type[2]}'
-          links:
-            - link: 'llvm.aarch64.neon.ld3.v{neon_type[1].lane}{type[3]}.p0'
-              arch: aarch64,arm64ec
-      - FnCall: ['_vld3{neon_type[1].nox}', ['a as _']]
+      - FnCall:
+          - 'crate::ptr::read_unaligned'
+          - - MethodCall:
+                - a
+                - cast
+                - []
 
   - name: "vld3{neon_type[1].nox}"
     doc: Load multiple 3-element structures to three registers
@@ -4163,17 +3961,11 @@ intrinsics:
     safety:
       unsafe: [neon]
     types:
-      - ['*const f64', float64x2x4_t, f64, '*const float64x2_t']
-      - ['*const i64', int64x2x4_t, i64, '*const int64x2_t']
+      - ['*const f64', float64x2x4_t, f64, "2"]
+      - ['*const i64', int64x2x4_t, i64, "2"]
     compose:
-      - LLVMLink:
-          name: 'vld4{neon_type[1].nox}'
-          arguments:
-            - 'ptr: {type[3]}'
-          links:
-            - link: 'llvm.aarch64.neon.ld4.v{neon_type[1].lane}{type[2]}.p0'
-              arch: aarch64,arm64ec
-      - FnCall: ['_vld4{neon_type[1].nox}', ['a as _']]
+      - FnCall: ["crate::core_arch::macros::deinterleaving_load!", [{ Type: "{type[2]}" }, "{type[3]}", "4", a], [], true]
+
 
   - name: "vld4{neon_type[1].nox}"
     doc: Load multiple 4-element structures to four registers
@@ -4187,14 +3979,12 @@ intrinsics:
     types:
       - ['*const f64', float64x1x4_t, f64, '*const float64x1_t']
     compose:
-      - LLVMLink:
-          name: 'vld4{neon_type[1].nox}'
-          arguments:
-            - 'ptr: {type[3]}'
-          links:
-            - link: 'llvm.aarch64.neon.ld4.v{neon_type[1].lane}{type[2]}.p0'
-              arch: aarch64,arm64ec
-      - FnCall: ['_vld4{neon_type[1].nox}', ['a as _']]
+      - FnCall:
+          - 'crate::ptr::read_unaligned'
+          - - MethodCall:
+                - a
+                - cast
+                - []
 
   - name: "vld4{neon_type[1].nox}"
     doc: Load multiple 4-element structures to four registers
@@ -4249,7 +4039,7 @@ intrinsics:
     safety:
       unsafe: [neon]
     types:
-      - ['*const i8', int8x16x4_t, int8x16_t, i8, '3']
+      - ['*const i8', int8x16x4_t, int8x16_t, i8, '4']
       - ['*const i64', int64x2x4_t, int64x2_t, i64, '1']
       - ['*const f64', float64x2x4_t, float64x2_t, f64, '1']
     compose:
@@ -4396,6 +4186,124 @@ intrinsics:
                 - - FnCall: [transmute, [a]]
                   - FnCall: [transmute, [b]]
 
+  - name: "vldap1{neon_type[1].lane_nox}"
+    doc: "Load-acquire RCpc one single-element structure to one lane of one register"
+    arguments: ["ptr: {type[0]}", "src: {type[1]}"]
+    static_defs: ["const LANE: i32"]
+    return_type: "{type[1]}"
+    safety:
+      unsafe: [neon]
+    attr:
+      - FnCall: [target_feature, ['enable = "neon,rcpc3"']]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env= "msvc"']]}]]}, {FnCall: [assert_instr, [ldap1, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ["2"]]
+      - *neon-unstable-feat-lrcpc3
+      - *cfg-target-has-atomic-64
+    types:
+      - ['*const i64', int64x1_t, 'static_assert!', 'LANE == 0']
+      - ['*const i64', int64x2_t,'static_assert_uimm_bits!', 'LANE, 1']
+    compose:
+      - FnCall: ['{type[2]}', ['{type[3]}']]
+      - Let:
+          - "atomic_src"
+          - FnCall: ["crate::sync::atomic::AtomicI64::from_ptr", ['ptr as *mut i64']]
+      - Identifier: [';', Symbol]
+      - FnCall:
+        - simd_insert!
+        - - src
+          - "LANE as u32"
+          - MethodCall:
+             - "atomic_src"
+             - load
+             - ["crate::sync::atomic::Ordering::Acquire"]
+
+  - name: "vldap1{neon_type[1].lane_nox}"
+    doc: "Load-acquire RCpc one single-element structure to one lane of one register"
+    arguments: ["ptr: {type[0]}","src: {type[1]}"]
+    static_defs: ["const LANE: i32"]
+    return_type: "{type[1]}"
+    safety:
+      unsafe: [neon]
+    attr:
+      - FnCall: [rustc_legacy_const_generics, ["2"]]
+      - FnCall: [target_feature, ['enable = "neon,rcpc3"']]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env= "msvc"']]}]]}, {FnCall: [assert_instr, [ldap1, 'LANE = 0']]}]]
+      - *neon-unstable-feat-lrcpc3
+      - *cfg-target-has-atomic-64
+    types:
+      - ['*const u64', uint64x1_t,'static_assert!', 'LANE == 0','']
+      #- ['*const f64', float64x1_t,'static_assert!', 'LANE == 0',''] # Fails due to bad IR gen from rust
+      - ['*const p64', poly64x1_t,'static_assert!', 'LANE == 0','']
+      - ['*const u64', uint64x2_t,'static_assert_uimm_bits!', 'LANE, 1','q']
+      - ['*const f64', float64x2_t,'static_assert_uimm_bits!', 'LANE, 1','q']
+      - ['*const p64', poly64x2_t,'static_assert_uimm_bits!', 'LANE, 1','q']
+    compose:
+      - FnCall: ['{type[2]}', ['{type[3]}']]
+      - FnCall:
+        - transmute
+        - - FnCall:
+            - 'vldap1{type[4]}_lane_s64::<LANE>'
+            - - "ptr as *mut i64"
+              - FnCall: [transmute,[src]]
+
+  - name: "vstl1{neon_type[1].lane_nox}"
+    doc: "Store-Release a single-element structure from one lane of one register."
+    arguments: ["ptr: {type[0]}", "val: {neon_type[1]}"]
+    static_defs: ["const LANE: i32"]
+    safety:
+      unsafe:
+        - pointer_write: ptr
+    attr:
+      - FnCall: [target_feature, ['enable = "neon,rcpc3"']]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env= "msvc"']]}]]}, {FnCall: [assert_instr, [stl1, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ["2"]]
+      - *neon-unstable-feat-lrcpc3
+      - *cfg-target-has-atomic-64
+    types:
+      - ['*mut i64', int64x1_t,'static_assert!', 'LANE == 0']
+      - ['*mut i64', int64x2_t,'static_assert_uimm_bits!', 'LANE, 1']
+    compose:
+      - FnCall: ['{type[2]}', ['{type[3]}']]
+      - Let:
+          - "atomic_dst"
+          - "ptr as *mut crate::sync::atomic::AtomicI64"
+      - Identifier: [';', Symbol]
+      - Let:
+        - "lane"
+        - i64
+        - FnCall: ['vget{neon_type[1].lane_nox}', [val], [LANE]]
+      - MethodCall:
+        - "(*atomic_dst)"
+        - store
+        - [FnCall: [transmute, [lane]],"crate::sync::atomic::Ordering::Release"]
+
+  - name: "vstl1{neon_type[1].lane_nox}"
+    doc: "Store-Release a single-element structure from one lane of one register."
+    arguments: ["ptr: {type[0]}", "val: {neon_type[1]}"]
+    static_defs: ["const LANE: i32"]
+    safety:
+      unsafe:
+        - pointer_write: ptr
+    attr:
+      - FnCall: [target_feature, ['enable = "neon,rcpc3"']]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env= "msvc"']]}]]}, {FnCall: [assert_instr, [stl1, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ["2"]]
+      - *neon-unstable-feat-lrcpc3
+      - *cfg-target-has-atomic-64
+    types:
+      - ['*mut u64', uint64x1_t, 'static_assert!', 'LANE == 0','']
+      - ['*mut f64', float64x1_t,'static_assert!', 'LANE == 0','']
+      - ['*mut p64', poly64x1_t, 'static_assert!', 'LANE == 0','']
+      - ['*mut u64', uint64x2_t ,'static_assert_uimm_bits!', 'LANE, 1','q']
+      - ['*mut f64', float64x2_t,'static_assert_uimm_bits!', 'LANE, 1','q']
+      - ['*mut p64', poly64x2_t ,'static_assert_uimm_bits!', 'LANE, 1','q']
+    compose:
+      - FnCall: ['{type[2]}', ['{type[3]}']]
+      - FnCall:
+        - "vstl1{type[4]}_lane_s64::<LANE>"
+        - - "ptr as *mut i64"
+          - FnCall: [transmute, [val]]
+
   - name: "vst1{neon_type[1].lane_nox}"
     doc: "Store multiple single-element structures from one, two, three, or four registers"
     arguments: ["a: {type[0]}", "b: {neon_type[1]}"]
@@ -4441,20 +4349,11 @@ intrinsics:
       unsafe: [neon]
     attr:
       - *neon-stable
-    assert_instr: [st1]
+    assert_instr: [stp]
     types:
-      - ['f64', float64x1x2_t, float64x1_t]
+      - ['f64', float64x1x2_t]
     compose:
-      - LLVMLink:
-          name: 'st2.{neon_type[1]}'
-          arguments:
-            - 'a: {type[2]}'
-            - 'b: {type[2]}'
-            - 'ptr: *mut i8'
-          links:
-            - link: 'llvm.aarch64.neon.st2.v{neon_type[1].lane}{type[0]}.p0'
-              arch: aarch64,arm64ec
-      - FnCall: ['_vst2{neon_type[1].nox}', ['b.0', 'b.1', 'a as _']]
+      - FnCall: [core::ptr::write_unaligned, ['a.cast()', b]]
 
   - name: "vst2{neon_type[1].nox}"
     doc: "Store multiple 2-element structures from two registers"
@@ -4465,19 +4364,10 @@ intrinsics:
       - *neon-stable
     assert_instr: [st2]
     types:
-      - [i64, int64x2x2_t, int64x2_t]
-      - [f64, float64x2x2_t, float64x2_t]
+      - [i64, int64x2x2_t, "2"]
+      - [f64, float64x2x2_t, "2"]
     compose:
-      - LLVMLink:
-          name: 'st2.{neon_type[1]}'
-          arguments:
-            - 'a: {type[2]}'
-            - 'b: {type[2]}'
-            - 'ptr: *mut i8'
-          links:
-            - link: 'llvm.aarch64.neon.st2.v{neon_type[1].lane}{type[0]}.p0'
-              arch: aarch64,arm64ec
-      - FnCall: ['_vst2{neon_type[1].nox}', ['b.0', 'b.1', 'a as _']]
+      - FnCall: ["crate::core_arch::macros::interleaving_store!", [{ Type: "{type[0]}" }, "{type[2]}", "2", a, b], [], true]
 
   - name: "vst2{neon_type[1].lane_nox}"
     doc: "Store multiple 2-element structures from two registers"
@@ -4655,19 +4545,9 @@ intrinsics:
     safety:
       unsafe: [neon]
     types:
-      - [f64, float64x1x3_t, float64x1_t]
+      - [f64, float64x1x3_t]
     compose:
-      - LLVMLink:
-          name: 'st3.{neon_type[1].nox}'
-          arguments:
-            - 'a: {type[2]}'
-            - 'b: {type[2]}'
-            - 'c: {type[2]}'
-            - 'ptr: *mut i8'
-          links:
-            - link: 'llvm.aarch64.neon.st3.v{neon_type[1].lane}{type[0]}.p0'
-              arch: aarch64,arm64ec
-      - FnCall: ['_vst3{neon_type[1].nox}', ['b.0', 'b.1', 'b.2', 'a as _']]
+      - FnCall: [core::ptr::write_unaligned, ['a.cast()', b]]
 
   - name: "vst3{neon_type[1].lane_nox}"
     doc: "Store multiple 3-element structures from three registers"
@@ -4734,20 +4614,10 @@ intrinsics:
     safety:
       unsafe: [neon]
     types:
-      - [i64, int64x2x3_t, int64x2_t]
-      - [f64, float64x2x3_t, float64x2_t]
+      - [i64, int64x2x3_t, "2"]
+      - [f64, float64x2x3_t, "2"]
     compose:
-      - LLVMLink:
-          name: 'st3.{neon_type[1].nox}'
-          arguments:
-            - 'a: {type[2]}'
-            - 'b: {type[2]}'
-            - 'c: {type[2]}'
-            - 'ptr: *mut i8'
-          links:
-            - link: 'llvm.aarch64.neon.st3.v{neon_type[1].lane}{type[0]}.p0'
-              arch: aarch64,arm64ec
-      - FnCall: ['_vst3{neon_type[1].nox}', ['b.0', 'b.1', 'b.2', 'a as _']]
+      - FnCall: ["crate::core_arch::macros::interleaving_store!", [{ Type: "{type[0]}" }, "{type[2]}", "3", a, b], [], true]
 
   - name: "vst3{neon_type[1].nox}"
     doc: "Store multiple 3-element structures from three registers"
@@ -4869,20 +4739,9 @@ intrinsics:
     safety:
       unsafe: [neon]
     types:
-      - [f64, float64x1x4_t, float64x1_t]
+      - [f64, float64x1x4_t]
     compose:
-      - LLVMLink:
-          name: 'st4.{neon_type[1].nox}'
-          arguments:
-            - 'a: {type[2]}'
-            - 'b: {type[2]}'
-            - 'c: {type[2]}'
-            - 'd: {type[2]}'
-            - 'ptr: *mut i8'
-          links:
-            - link: 'llvm.aarch64.neon.st4.v{neon_type[1].lane}{type[0]}.p0'
-              arch: aarch64,arm64ec
-      - FnCall: ['_vst4{neon_type[1].nox}', ['b.0', 'b.1', 'b.2', 'b.3', 'a as _']]
+      - FnCall: [core::ptr::write_unaligned, ['a.cast()', b]]
 
   - name: "vst4{neon_type[1].lane_nox}"
     doc: "Store multiple 4-element structures from four registers"
@@ -4949,21 +4808,10 @@ intrinsics:
     safety:
       unsafe: [neon]
     types:
-      - [i64, int64x2x4_t, int64x2_t]
-      - [f64, float64x2x4_t, float64x2_t]
+      - [i64, int64x2x4_t, "2"]
+      - [f64, float64x2x4_t, "2"]
     compose:
-      - LLVMLink:
-          name: 'st4.{neon_type[1].nox}'
-          arguments:
-            - 'a: {type[2]}'
-            - 'b: {type[2]}'
-            - 'c: {type[2]}'
-            - 'd: {type[2]}'
-            - 'ptr: *mut i8'
-          links:
-            - link: 'llvm.aarch64.neon.st4.v{neon_type[1].lane}{type[0]}.p0'
-              arch: aarch64,arm64ec
-      - FnCall: ['_vst4{neon_type[1].nox}', ['b.0', 'b.1', 'b.2', 'b.3', 'a as _']]
+      - FnCall: ["crate::core_arch::macros::interleaving_store!", [{ Type: "{type[0]}" }, "{type[2]}", "4", a, b], [], true]
 
   - name: "vst4{neon_type[1].nox}"
     doc: "Store multiple 4-element structures from four registers"
@@ -5079,52 +4927,6 @@ intrinsics:
               arch: aarch64,arm64ec
       - FnCall: ['_vst4{neon_type[1].lane_nox}', ['b.0', 'b.1', 'b.2', 'b.3', 'LANE as i64', 'a as _']]
 
-  - name: "vusdot{neon_type[0].laneq_nox}"
-    doc: "Dot product index form with unsigned and signed integers"
-    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
-    return_type: "{neon_type[0]}"
-    attr:
-      - *neon-i8mm
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [usdot, 'LANE = 3']]}]]
-      - FnCall: [rustc_legacy_const_generics, ['3']]
-      - FnCall: [unstable, ['feature = "stdarch_neon_i8mm"', 'issue = "117223"']]
-    static_defs: ["const LANE: i32"]
-    safety: safe
-    types:
-      - [int32x2_t, uint8x8_t, int8x16_t, '[LANE as u32, LANE as u32]']
-      - [int32x4_t, uint8x16_t, int8x16_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-    compose:
-      - FnCall: [static_assert_uimm_bits!, [LANE, '2']]
-      - Let: [c, int32x4_t, {FnCall: [transmute, [c]]}]
-      - Let: [c, "{neon_type[0]}", {FnCall: [simd_shuffle!, [c, c, "{type[3]}"]]}]
-      - FnCall: ["vusdot{neon_type[0].no}", [a, b, {FnCall: [transmute, [c]]}]]
-
-  - name: "vsudot{neon_type[0].laneq_nox}"
-    doc: "Dot product index form with signed and unsigned integers"
-    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
-    return_type: "{neon_type[0]}"
-    attr:
-      - *neon-i8mm
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sudot, 'LANE = 3']]}]]
-      - FnCall: [rustc_legacy_const_generics, ['3']]
-      - FnCall: [unstable, ['feature = "stdarch_neon_i8mm"', 'issue = "117223"']]
-    static_defs: ["const LANE: i32"]
-    safety: safe
-    types:
-      - [int32x2_t, int8x8_t, uint8x16_t, '[LANE as u32, LANE as u32]', uint32x2_t]
-      - [int32x4_t, int8x16_t, uint8x16_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]', uint32x4_t]
-    compose:
-      - FnCall: [static_assert_uimm_bits!, [LANE, 2]]
-      - Let:
-          - c
-          - uint32x4_t
-          - FnCall: [transmute, [c]]
-      - Let:
-          - c
-          - "{type[4]}"
-          - FnCall: [simd_shuffle!, [c, c, "{type[3]}"]]
-      - FnCall: ["vusdot{neon_type[0].no}", [a, {FnCall: [transmute, [c]]}, b]]
-
   - name: "vmul{neon_type.no}"
     doc: Multiply
     arguments: ["a: {neon_type}", "b: {neon_type}"]
@@ -5141,45 +4943,35 @@ intrinsics:
   - name: "vmull_high{neon_type[0].noq}"
     doc: Signed multiply long
     arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
-    return_type: "{neon_type[3]}"
-    attr: [*neon-stable]
-    assert_instr: [smull2]
+    return_type: "{neon_type[1]}"
+    attr:
+      - *neon-stable
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [smull2]]}]]
     safety: safe
     types:
-      - [int8x16_t, int8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]', int16x8_t]
-      - [int16x8_t, int16x4_t, '[4, 5, 6, 7]', int32x4_t]
-      - [int32x4_t, int32x2_t, '[2, 3]', int64x2_t]
+      - [int8x16_t, int16x8_t]
+      - [int16x8_t, int32x4_t]
+      - [int32x4_t, int64x2_t]
     compose:
-      - Let:
-          - a
-          - "{neon_type[1]}"
-          - FnCall: [simd_shuffle!, [a, a, "{type[2]}"]]
-      - Let:
-          - b
-          - "{neon_type[1]}"
-          - FnCall: [simd_shuffle!, [b, b, "{type[2]}"]]
+      - Let: [a, {FnCall: ['vget_high_{neon_type[0]}', [a]]}]
+      - Let: [b, {FnCall: ['vget_high_{neon_type[0]}', [b]]}]
       - FnCall: ["vmull_{neon_type[0]}", [a, b]]
 
   - name: "vmull_high{neon_type[0].noq}"
     doc: "Unsigned multiply long"
     arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
-    return_type: "{neon_type[3]}"
-    attr: [*neon-stable]
-    assert_instr: [umull2]
+    return_type: "{neon_type[1]}"
+    attr:
+      - *neon-stable
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [umull2]]}]]
     safety: safe
     types:
-      - [uint8x16_t, uint8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]', uint16x8_t]
-      - [uint16x8_t, uint16x4_t, '[4, 5, 6, 7]', uint32x4_t]
-      - [uint32x4_t, uint32x2_t, '[2, 3]', uint64x2_t]
+      - [uint8x16_t, uint16x8_t]
+      - [uint16x8_t, uint32x4_t]
+      - [uint32x4_t, uint64x2_t]
     compose:
-      - Let:
-          - a
-          - "{neon_type[1]}"
-          - FnCall: [simd_shuffle!, [a, a, "{type[2]}"]]
-      - Let:
-          - b
-          - "{neon_type[1]}"
-          - FnCall: [simd_shuffle!, [b, b, "{type[2]}"]]
+      - Let: [a, {FnCall: ['vget_high_{neon_type[0]}', [a]]}]
+      - Let: [b, {FnCall: ['vget_high_{neon_type[0]}', [b]]}]
       - FnCall: ["vmull_{neon_type[0]}", [a, b]]
 
   - name: "vmull_p64"
@@ -5205,22 +4997,16 @@ intrinsics:
   - name: "vmull_high{neon_type[0].noq}"
     doc: "Polynomial multiply long"
     arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
-    return_type: "{neon_type[3]}"
+    return_type: "{neon_type[1]}"
     attr:
       - *neon-stable
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [pmull2]]}]]
     safety: safe
-    assert_instr: [pmull2]
     types:
-      - [poly8x16_t, poly8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]', poly16x8_t]
+      - [poly8x16_t, poly16x8_t]
     compose:
-      - Let:
-          - a
-          - "{neon_type[1]}"
-          - FnCall: [simd_shuffle!, [a, a, "{type[2]}"]]
-      - Let:
-          - b
-          - "{neon_type[1]}"
-          - FnCall: [simd_shuffle!, [b, b, "{type[2]}"]]
+      - Let: [a, {FnCall: ['vget_high_{neon_type[0]}', [a]]}]
+      - Let: [b, {FnCall: ['vget_high_{neon_type[0]}', [b]]}]
       - FnCall: ["vmull_{neon_type[0]}", [a, b]]
 
   - name: "vmull_high{neon_type[0].noq}"
@@ -5230,15 +5016,15 @@ intrinsics:
     attr:
       - *neon-aes
       - *neon-stable
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [pmull2]]}]]
     safety: safe
-    assert_instr: [pmull2]
     types:
       - [poly64x2_t, "p128"]
     compose:
       - FnCall:
           - "vmull_{neon_type[0]}"
-          - - FnCall: [simd_extract!, [a, '1']]
-            - FnCall: [simd_extract!, [b, '1']]
+          - - FnCall: ['vget{neon_type[0].lane_nox}', [a], [1]]
+            - FnCall: ['vget{neon_type[0].lane_nox}', [b], [1]]
 
   - name: "vmulx{neon_type.no}"
     doc: Floating-point multiply extended
@@ -5266,7 +5052,7 @@ intrinsics:
     return_type: "{neon_type}"
     attr:
       - *neon-fp16
-      - *neon-unstable-f16
+      - *neon-stable-fp16
       - *target-not-arm64ec
     assert_instr: [fmulx]
     safety: safe
@@ -5337,11 +5123,8 @@ intrinsics:
           - vmulx_f64
           - - a
             - FnCall:
-                - 'transmute::<f64, _>'
-                - - FnCall:
-                      - "simd_extract!"
-                      - - b
-                        - 'LANE as u32'
+                - 'transmute'
+                - - FnCall: ['vget{neon_type.lane_nox}', [b], [LANE]]
 
   - name: "vmulx{type[0]}"
     doc: Floating-point multiply extended
@@ -5354,17 +5137,13 @@ intrinsics:
     static_defs: ["const LANE: i32"]
     safety: safe
     types:
-      - ["q_lane_f64", float64x2_t, float64x1_t, "q_f64", '[LANE as u32, LANE as u32]']
+      - ["q_lane_f64", float64x2_t, float64x1_t, "q_f64", '[LANE as u32; 2]']
     compose:
       - FnCall: [static_assert!, ['LANE == 0']]
       - FnCall:
           - "vmulx{type[3]}"
           - - a
-            - FnCall:
-                - "simd_shuffle!"
-                - - b
-                  - b
-                  - "{type[4]}"
+            - FnCall: ['vdup{type[0]}', [b], [LANE]]
 
   - name: "vmulx{type[0]}"
     doc: Floating-point multiply extended
@@ -5377,16 +5156,13 @@ intrinsics:
     static_defs: ["const LANE: i32"]
     safety: safe
     types:
-      - ["d_lane_f64", "f64", float64x1_t, "d_f64", 'LANE as u32']
+      - ["d_lane_f64", "f64", float64x1_t, "d_f64"]
     compose:
       - FnCall: [static_assert!, ['LANE == 0']]
       - FnCall:
           - "vmulx{type[3]}"
           - - a
-            - FnCall:
-                - "simd_extract!"
-                - - b
-                  - "{type[4]}"
+            - FnCall: ['vget{neon_type[2].lane_nox}', [b], [LANE]]
 
   - name: "vmulx_laneq_f64"
     doc: Floating-point multiply extended
@@ -5406,11 +5182,8 @@ intrinsics:
           - vmulx_f64
           - - a
             - FnCall:
-                - 'transmute::<f64, _>'
-                - - FnCall:
-                      - "simd_extract!"
-                      - - b
-                        - 'LANE as u32'
+                - 'transmute'
+                - - FnCall: ['vget{neon_type[1].lane_nox}', [b], [LANE]]
 
   - name: "vmulx{type[0]}"
     doc: Floating-point multiply extended
@@ -5423,21 +5196,17 @@ intrinsics:
     static_defs: ["const LANE: i32"]
     safety: safe
     types:
-      - ['_lane_f32', float32x2_t, float32x2_t, '1', '_f32', '[LANE as u32, LANE as u32]']
-      - ['_laneq_f32', float32x2_t, float32x4_t, '2', '_f32', '[LANE as u32, LANE as u32]']
-      - ['q_lane_f32', float32x4_t, float32x2_t, '1', 'q_f32', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - ['q_laneq_f32', float32x4_t, float32x4_t, '2', 'q_f32', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - ['q_laneq_f64', float64x2_t, float64x2_t, '1', 'q_f64', '[LANE as u32, LANE as u32]']
+      - ['_lane_f32', float32x2_t, float32x2_t, '1', '_f32']
+      - ['_laneq_f32', float32x2_t, float32x4_t, '2', '_f32']
+      - ['q_lane_f32', float32x4_t, float32x2_t, '1', 'q_f32']
+      - ['q_laneq_f32', float32x4_t, float32x4_t, '2', 'q_f32']
+      - ['q_laneq_f64', float64x2_t, float64x2_t, '1', 'q_f64']
     compose:
       - FnCall: [static_assert_uimm_bits!, ['LANE', "{type[3]}"]]
       - FnCall:
           - "vmulx{type[4]}"
           - - a
-            - FnCall:
-                - "simd_shuffle!"
-                - - b
-                  - b
-                  - "{type[5]}"
+            - FnCall: ['vdup{type[0]}', [b], [LANE]]
 
 
   - name: "vmulx{type[0]}"
@@ -5448,25 +5217,21 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmulx, 'LANE = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ['2']]
       - *neon-fp16
-      - *neon-unstable-f16
+      - *neon-stable-fp16
       - *target-not-arm64ec
     static_defs: ["const LANE: i32"]
     safety: safe
     types:
-      - ['_lane_f16', float16x4_t, float16x4_t, '2', '_f16', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - ['_laneq_f16', float16x4_t, float16x8_t, '3', '_f16', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - ['q_lane_f16', float16x8_t, float16x4_t, '2', 'q_f16', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - ['q_laneq_f16', float16x8_t, float16x8_t, '3', 'q_f16', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - ['_lane_f16', float16x4_t, float16x4_t, '2', '_f16']
+      - ['_laneq_f16', float16x4_t, float16x8_t, '3', '_f16']
+      - ['q_lane_f16', float16x8_t, float16x4_t, '2', 'q_f16']
+      - ['q_laneq_f16', float16x8_t, float16x8_t, '3', 'q_f16']
     compose:
       - FnCall: [static_assert_uimm_bits!, ['LANE', "{type[3]}"]]
       - FnCall:
           - "vmulx{type[4]}"
           - - a
-            - FnCall:
-                - "simd_shuffle!"
-                - - b
-                  - b
-                  - "{type[5]}"
+            - FnCall: ['vdup{type[0]}', [b], [LANE]]
 
 
   - name: "vmulx{type[0]}"
@@ -5480,18 +5245,15 @@ intrinsics:
     static_defs: ["const LANE: i32"]
     safety: safe
     types:
-      - ['s_lane_f32', f32, float32x2_t, '1', 's_f32', 'LANE as u32']
-      - ['s_laneq_f32', f32, float32x4_t, '2', 's_f32', 'LANE as u32']
-      - ['d_laneq_f64', f64, float64x2_t, '1', 'd_f64', 'LANE as u32']
+      - ['s_lane_f32', f32, float32x2_t, '1', 's_f32']
+      - ['s_laneq_f32', f32, float32x4_t, '2', 's_f32']
+      - ['d_laneq_f64', f64, float64x2_t, '1', 'd_f64']
     compose:
       - FnCall: [static_assert_uimm_bits!, ['LANE', "{type[3]}"]]
       - FnCall:
           - "vmulx{type[4]}"
           - - a
-            - FnCall:
-                - "simd_extract!"
-                - - b
-                  - "{type[5]}"
+            - FnCall: ['vget{neon_type[2].lane_nox}', [b], [LANE]]
 
 
   - name: "vmulx{type[0]}"
@@ -5514,10 +5276,7 @@ intrinsics:
       - FnCall:
           - "vmulx{type[4]}"
           - - a
-            - FnCall:
-                - "simd_extract!"
-                - - b
-                  - "{type[5]}"
+            - FnCall: ['vget{neon_type[2].lane_nox}', [b], [LANE]]
 
 
   - name: "vmulx{neon_type[0].N}"
@@ -5649,7 +5408,7 @@ intrinsics:
     return_type: "{neon_type}"
     attr:
       - *neon-fp16
-      - *neon-unstable-f16
+      - *neon-stable-fp16
       - *target-not-arm64ec
     assert_instr: [fdiv]
     safety: safe
@@ -5667,7 +5426,7 @@ intrinsics:
       - *neon-fp16
       - *neon-unstable-f16
       - *target-not-arm64ec
-    assert_instr: [nop]
+    assert_instr: [fdiv]
     safety: safe
     types:
       - [f16, 'h']
@@ -5692,7 +5451,7 @@ intrinsics:
     arguments: ["a: {type[1]}", "b: {type[1]}"]
     return_type: "{type[1]}"
     attr: [*neon-stable]
-    assert_instr: [nop]
+    assert_instr: [sub]
     safety: safe
     types:
       - ['d_s64', 'i64']
@@ -5708,7 +5467,7 @@ intrinsics:
       - *neon-fp16
       - *neon-unstable-f16
       - *target-not-arm64ec
-    assert_instr: [nop]
+    assert_instr: [fsub]
     safety: safe
     types:
       - ['h_f16', 'f16']
@@ -5811,18 +5570,18 @@ intrinsics:
     doc: Signed Subtract Wide
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
     return_type: "{neon_type[0]}"
-    attr: [*neon-stable]
-    assert_instr: [ssubw2]
+    attr:
+      - *neon-stable
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [ssubw2]]}]]
     safety: safe
     types:
-      - [int16x8_t, int8x16_t, int8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]']
-      - [int32x4_t, int16x8_t, int16x4_t, '[4, 5, 6, 7]']
-      - [int64x2_t, int32x4_t, int32x2_t, '[2, 3]']
+      - [int16x8_t, int8x16_t]
+      - [int32x4_t, int16x8_t]
+      - [int64x2_t, int32x4_t]
     compose:
       - Let:
           - c
-          - "{neon_type[2]}"
-          - FnCall: [simd_shuffle!, [b, b, "{type[3]}"]]
+          - FnCall: ['vget_high_{neon_type[1]}', [b]]
       - FnCall:
           - simd_sub
           - - a
@@ -5832,18 +5591,18 @@ intrinsics:
     doc: Unsigned Subtract Wide
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
     return_type: "{neon_type[0]}"
-    attr: [*neon-stable]
-    assert_instr: [usubw2]
+    attr:
+      - *neon-stable
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [usubw2]]}]]
     safety: safe
     types:
-      - [uint16x8_t, uint8x16_t, uint8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]']
-      - [uint32x4_t, uint16x8_t, uint16x4_t, '[4, 5, 6, 7]']
-      - [uint64x2_t, uint32x4_t, uint32x2_t, '[2, 3]']
+      - [uint16x8_t, uint8x16_t]
+      - [uint32x4_t, uint16x8_t]
+      - [uint64x2_t, uint32x4_t]
     compose:
       - Let:
           - c
-          - "{neon_type[2]}"
-          - FnCall: [simd_shuffle!, [b, b, "{type[3]}"]]
+          - FnCall: ['vget_high_{neon_type[1]}', [b]]
       - FnCall:
           - simd_sub
           - - a
@@ -5853,61 +5612,47 @@ intrinsics:
     doc: "Signed Subtract Long"
     arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
     return_type: "{neon_type[1]}"
-    attr: [*neon-stable]
-    assert_instr: [ssubl2]
+    attr:
+      - *neon-stable
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [ssubl2]]}]]
     safety: safe
     types:
-      - [int8x16_t, int16x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]', int8x8_t]
-      - [int16x8_t, int32x4_t, '[4, 5, 6, 7]', int16x4_t]
-      - [int32x4_t, int64x2_t, '[2, 3]', int32x2_t]
+      - [int8x16_t, int16x8_t]
+      - [int32x4_t, int64x2_t]
+      - [int16x8_t, int32x4_t]
     compose:
       - Let:
           - c
-          - "{neon_type[3]}"
-          - FnCall: [simd_shuffle!, [a, a, "{type[2]}"]]
-      - Let:
-          - d
           - "{neon_type[1]}"
-          - FnCall: [simd_cast, [c]]
-      - Let:
-          - e
-          - "{neon_type[3]}"
-          - FnCall: [simd_shuffle!, [b, b, "{type[2]}"]]
+          - FnCall: [simd_cast, [{FnCall: ['vget_high_{neon_type[0]}', [a]]}]]
       - Let:
-          - f
+          - d
           - "{neon_type[1]}"
-          - FnCall: [simd_cast, [e]]
-      - FnCall: [simd_sub, [d, f]]
+          - FnCall: [simd_cast, [{FnCall: ['vget_high_{neon_type[0]}', [b]]}]]
+      - FnCall: [simd_sub, [c, d]]
 
   - name: "vsubl_high{neon_type[0].noq}"
     doc: "Unsigned Subtract Long"
     arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
     return_type: "{neon_type[1]}"
-    attr: [*neon-stable]
-    assert_instr: [usubl2]
+    attr:
+      - *neon-stable
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [usubl2]]}]]
     safety: safe
     types:
-      - [uint8x16_t, uint16x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]', uint8x8_t]
-      - [uint16x8_t, uint32x4_t, '[4, 5, 6, 7]', uint16x4_t]
-      - [uint32x4_t, uint64x2_t, '[2, 3]', uint32x2_t]
+      - [uint8x16_t, uint16x8_t]
+      - [uint16x8_t, uint32x4_t]
+      - [uint32x4_t, uint64x2_t]
     compose:
       - Let:
           - c
-          - "{neon_type[3]}"
-          - FnCall: [simd_shuffle!, [a, a, "{type[2]}"]]
-      - Let:
-          - d
           - "{neon_type[1]}"
-          - FnCall: [simd_cast, [c]]
+          - FnCall: [simd_cast, [{FnCall: ['vget_high_{neon_type[0]}', [a]]}]]
       - Let:
-          - e
-          - "{neon_type[3]}"
-          - FnCall: [simd_shuffle!, [b, b, "{type[2]}"]]
-      - Let:
-          - f
+          - d
           - "{neon_type[1]}"
-          - FnCall: [simd_cast, [e]]
-      - FnCall: [simd_sub, [d, f]]
+          - FnCall: [simd_cast, [{FnCall: ['vget_high_{neon_type[0]}', [b]]}]]
+      - FnCall: [simd_sub, [c, d]]
 
   - name: "vbcax{neon_type.no}"
     doc: Bit clear and exclusive OR
@@ -5998,7 +5743,7 @@ intrinsics:
     attr:
       - *neon-fp16
       - *enable-fcma
-      - *neon-unstable-f16
+      - *neon-unstable-fcma
       - *target-not-arm64ec
     assert_instr: [fcadd]
     safety: safe
@@ -6019,7 +5764,7 @@ intrinsics:
     attr:
       - *neon-fp16
       - *enable-fcma
-      - *neon-unstable-f16
+      - *neon-unstable-fcma
       - *target-not-arm64ec
     assert_instr: [fcadd]
     safety: safe
@@ -6060,7 +5805,7 @@ intrinsics:
     attr:
       - FnCall: [target_feature, ['enable = "neon,fcma"']]
       - *neon-fp16
-      - *neon-unstable-f16
+      - *neon-unstable-fcma
       - *target-not-arm64ec
     assert_instr: [fcmla]
     safety: safe
@@ -6101,7 +5846,7 @@ intrinsics:
     attr:
       - FnCall: [target_feature, ['enable = "neon,fcma"']]
       - *neon-fp16
-      - *neon-unstable-f16
+      - *neon-unstable-fcma
       - *target-not-arm64ec
     assert_instr: [fcmla]
     safety: safe
@@ -6143,7 +5888,7 @@ intrinsics:
     attr:
       - FnCall: [target_feature, ['enable = "neon,fcma"']]
       - *neon-fp16
-      - *neon-unstable-f16
+      - *neon-unstable-fcma
       - *target-not-arm64ec
     assert_instr: [fcmla]
     safety: safe
@@ -6169,14 +5914,13 @@ intrinsics:
     static_defs: ["const LANE: i32"]
     safety: safe
     types:
-      - [float32x2_t, float32x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1]']
-      - [float32x4_t, float32x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]']
+      - [float32x2_t, float32x4_t, '']
+      - [float32x4_t, float32x4_t, 'q']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, 1]]
-      - Let:
-          - c
-          - "{neon_type[0]}"
-          - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]]
+      - Let: [c, {FnCall: [vreinterpretq_u64_f32, [c]]}]
+      - Let: [c, {FnCall: ['vdup{type[2]}_laneq_u64', [c], [LANE]]}]
+      - Let: [c, {FnCall: ['vreinterpret{type[2]}_f32_u64', [c]]}]
       - FnCall: ["vcmla{neon_type[0].no}", [a, b, c]]
 
   - name: "vcmla{neon_type[0].laneq_nox}"
@@ -6188,19 +5932,18 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmla, 'LANE = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ['3']]
       - *neon-fp16
-      - *neon-unstable-f16
+      - *neon-unstable-fcma
       - *target-not-arm64ec
     static_defs: ["const LANE: i32"]
     safety: safe
     types:
-      - [float16x4_t, float16x8_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]']
-      - [float16x8_t, float16x8_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]']
+      - [float16x4_t, float16x8_t, '']
+      - [float16x8_t, float16x8_t, 'q']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, 2]]
-      - Let:
-          - c
-          - "{neon_type[0]}"
-          - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]]
+      - Let: [c, {FnCall: [vreinterpretq_u32_f16, [c]]}]
+      - Let: [c, {FnCall: ['vdup{type[2]}_laneq_u32', [c], [LANE]]}]
+      - Let: [c, {FnCall: ['vreinterpret{type[2]}_f16_u32', [c]]}]
       - FnCall: ["vcmla{neon_type[0].no}", [a, b, c]]
 
   - name: "vcmla{neon_type[0].rot90_laneq}"
@@ -6215,14 +5958,13 @@ intrinsics:
     static_defs: ["const LANE: i32"]
     safety: safe
     types:
-      - [float32x2_t, float32x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1]']
-      - [float32x4_t, float32x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]']
+      - [float32x2_t, float32x4_t, '']
+      - [float32x4_t, float32x4_t, 'q']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, 1]]
-      - Let:
-          - c
-          - "{neon_type[0]}"
-          - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]]
+      - Let: [c, {FnCall: [vreinterpretq_u64_f32, [c]]}]
+      - Let: [c, {FnCall: ['vdup{type[2]}_laneq_u64', [c], [LANE]]}]
+      - Let: [c, {FnCall: ['vreinterpret{type[2]}_f32_u64', [c]]}]
       - FnCall: ["vcmla{neon_type[0].rot90}", [a, b, c]]
 
   - name: "vcmla{neon_type[0].rot90_laneq}"
@@ -6234,19 +5976,18 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmla, 'LANE = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ['3']]
       - *neon-fp16
-      - *neon-unstable-f16
+      - *neon-unstable-fcma
       - *target-not-arm64ec
     static_defs: ["const LANE: i32"]
     safety: safe
     types:
-      - [float16x4_t, float16x8_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]']
-      - [float16x8_t, float16x8_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]']
+      - [float16x4_t, float16x8_t, '']
+      - [float16x8_t, float16x8_t, 'q']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, 2]]
-      - Let:
-          - c
-          - "{neon_type[0]}"
-          - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]]
+      - Let: [c, {FnCall: [vreinterpretq_u32_f16, [c]]}]
+      - Let: [c, {FnCall: ['vdup{type[2]}_laneq_u32', [c], [LANE]]}]
+      - Let: [c, {FnCall: ['vreinterpret{type[2]}_f16_u32', [c]]}]
       - FnCall: ["vcmla{neon_type[0].rot90}", [a, b, c]]
 
   - name: "vcmla{neon_type[0].rot90_lane}"
@@ -6261,14 +6002,13 @@ intrinsics:
     static_defs: ["const LANE: i32"]
     safety: safe
     types:
-      - [float32x2_t, float32x2_t, '[2 * LANE as u32, 2 * LANE as u32 + 1]']
-      - [float32x4_t, float32x2_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]']
+      - [float32x2_t, float32x2_t, '']
+      - [float32x4_t, float32x2_t, 'q']
     compose:
       - FnCall: [static_assert!, ['LANE == 0']]
-      - Let:
-          - c
-          - "{neon_type[0]}"
-          - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]]
+      - Let: [c, {FnCall: [vreinterpret_u64_f32, [c]]}]
+      - Let: [c, {FnCall: ['vdup{type[2]}_lane_u64', [c], [LANE]]}]
+      - Let: [c, {FnCall: ['vreinterpret{type[2]}_f32_u64', [c]]}]
       - FnCall: ["vcmla{neon_type[0].rot90}", [a, b, c]]
 
   - name: "vcmla{neon_type[0].rot90_lane}"
@@ -6280,19 +6020,18 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmla, 'LANE = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ['3']]
       - *neon-fp16
-      - *neon-unstable-f16
+      - *neon-unstable-fcma
       - *target-not-arm64ec
     static_defs: ["const LANE: i32"]
     safety: safe
     types:
-      - [float16x4_t, float16x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]']
-      - [float16x8_t, float16x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]']
+      - [float16x4_t, float16x4_t, '']
+      - [float16x8_t, float16x4_t, 'q']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, 1]]
-      - Let:
-          - c
-          - "{neon_type[0]}"
-          - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]]
+      - Let: [c, {FnCall: [vreinterpret_u32_f16, [c]]}]
+      - Let: [c, {FnCall: ['vdup{type[2]}_lane_u32', [c], [LANE]]}]
+      - Let: [c, {FnCall: ['vreinterpret{type[2]}_f16_u32', [c]]}]
       - FnCall: ["vcmla{neon_type[0].rot90}", [a, b, c]]
 
   - name: "vcmla{neon_type.rot180}"
@@ -6323,7 +6062,7 @@ intrinsics:
     attr:
       - FnCall: [target_feature, ['enable = "neon,fcma"']]
       - *neon-fp16
-      - *neon-unstable-f16
+      - *neon-unstable-fcma
       - *target-not-arm64ec
     assert_instr: [fcmla]
     safety: safe
@@ -6350,14 +6089,13 @@ intrinsics:
     static_defs: ["const LANE: i32"]
     safety: safe
     types:
-      - [float32x2_t, float32x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1]']
-      - [float32x4_t, float32x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]']
+      - [float32x2_t, float32x4_t, '']
+      - [float32x4_t, float32x4_t, 'q']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, 1]]
-      - Let:
-          - c
-          - "{neon_type[0]}"
-          - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]]
+      - Let: [c, {FnCall: [vreinterpretq_u64_f32, [c]]}]
+      - Let: [c, {FnCall: ['vdup{type[2]}_laneq_u64', [c], [LANE]]}]
+      - Let: [c, {FnCall: ['vreinterpret{type[2]}_f32_u64', [c]]}]
       - FnCall: ["vcmla{neon_type[0].rot180}", [a, b, c]]
 
   - name: "vcmla{neon_type[0].rot180_laneq}"
@@ -6369,24 +6107,21 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmla, 'LANE = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ['3']]
       - *neon-fp16
-      - *neon-unstable-f16
+      - *neon-unstable-fcma
       - *target-not-arm64ec
     static_defs: ["const LANE: i32"]
     safety: safe
     types:
-      - [float16x4_t, float16x8_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]']
-      - [float16x8_t, float16x8_t,
-        '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]'
-        ]
+      - [float16x4_t, float16x8_t, '']
+      - [float16x8_t, float16x8_t, 'q']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, 2]]
-      - Let:
-          - c
-          - "{neon_type[0]}"
-          - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]]
+      - Let: [c, {FnCall: [vreinterpretq_u32_f16, [c]]}]
+      - Let: [c, {FnCall: ['vdup{type[2]}_laneq_u32', [c], [LANE]]}]
+      - Let: [c, {FnCall: ['vreinterpret{type[2]}_f16_u32', [c]]}]
       - FnCall: ["vcmla{neon_type[0].rot180}", [a, b, c]]
 
-  - name: "vcmla{type[3]}"
+  - name: "vcmla{neon_type[0].rot180_lane}"
     doc: Floating-point complex multiply accumulate
     arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"]
     return_type: "{neon_type[0]}"
@@ -6398,17 +6133,16 @@ intrinsics:
     static_defs: ["const LANE: i32"]
     safety: safe
     types:
-      - [float32x2_t, float32x2_t, '[2 * LANE as u32, 2 * LANE as u32 + 1]', '_rot180_lane_f32']
-      - [float32x4_t, float32x2_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]', 'q_rot180_lane_f32']
+      - [float32x2_t, float32x2_t, '']
+      - [float32x4_t, float32x2_t, 'q']
     compose:
       - FnCall: [static_assert!, ['LANE == 0']]
-      - Let:
-          - c
-          - "{neon_type[0]}"
-          - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]]
+      - Let: [c, {FnCall: [vreinterpret_u64_f32, [c]]}]
+      - Let: [c, {FnCall: ['vdup{type[2]}_lane_u64', [c], [LANE]]}]
+      - Let: [c, {FnCall: ['vreinterpret{type[2]}_f32_u64', [c]]}]
       - FnCall: ["vcmla{neon_type[0].rot180}", [a, b, c]]
 
-  - name: "vcmla{type[3]}"
+  - name: "vcmla{neon_type[0].rot180_lane}"
     doc: Floating-point complex multiply accumulate
     arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"]
     return_type: "{neon_type[0]}"
@@ -6417,21 +6151,18 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmla, 'LANE = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ['3']]
       - *neon-fp16
-      - *neon-unstable-f16
+      - *neon-unstable-fcma
       - *target-not-arm64ec
     static_defs: ["const LANE: i32"]
     safety: safe
     types:
-      - [float16x4_t, float16x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]', '_rot180_lane_f16']
-      - [float16x8_t, float16x4_t,
-          '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]', 'q_rot180_lane_f16'
-        ]
+      - [float16x4_t, float16x4_t, '']
+      - [float16x8_t, float16x4_t, 'q']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, 1]]
-      - Let:
-          - c
-          - "{neon_type[0]}"
-          - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]]
+      - Let: [c, {FnCall: [vreinterpret_u32_f16, [c]]}]
+      - Let: [c, {FnCall: ['vdup{type[2]}_lane_u32', [c], [LANE]]}]
+      - Let: [c, {FnCall: ['vreinterpret{type[2]}_f16_u32', [c]]}]
       - FnCall: ["vcmla{neon_type[0].rot180}", [a, b, c]]
 
   - name: "vcmla{neon_type[0].rot270_laneq}"
@@ -6446,14 +6177,13 @@ intrinsics:
     static_defs: ["const LANE: i32"]
     safety: safe
     types:
-      - [float32x2_t, float32x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1]']
-      - [float32x4_t, float32x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]']
+      - [float32x2_t, float32x4_t, '']
+      - [float32x4_t, float32x4_t, 'q']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, 1]]
-      - Let:
-          - c
-          - "{neon_type[0]}"
-          - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]]
+      - Let: [c, {FnCall: [vreinterpretq_u64_f32, [c]]}]
+      - Let: [c, {FnCall: ['vdup{type[2]}_laneq_u64', [c], [LANE]]}]
+      - Let: [c, {FnCall: ['vreinterpret{type[2]}_f32_u64', [c]]}]
       - FnCall: ["vcmla{neon_type[0].rot270}", [a, b, c]]
 
   - name: "vcmla{neon_type[0].rot270_laneq}"
@@ -6465,19 +6195,18 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmla, 'LANE = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ['3']]
       - *neon-fp16
-      - *neon-unstable-f16
+      - *neon-unstable-fcma
       - *target-not-arm64ec
     static_defs: ["const LANE: i32"]
     safety: safe
     types:
-      - [float16x4_t, float16x8_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]']
-      - [float16x8_t, float16x8_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]']
+      - [float16x4_t, float16x8_t, '']
+      - [float16x8_t, float16x8_t, 'q']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, 2]]
-      - Let:
-          - c
-          - "{neon_type[0]}"
-          - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]]
+      - Let: [c, {FnCall: [vreinterpretq_u32_f16, [c]]}]
+      - Let: [c, {FnCall: ['vdup{type[2]}_laneq_u32', [c], [LANE]]}]
+      - Let: [c, {FnCall: ['vreinterpret{type[2]}_f16_u32', [c]]}]
       - FnCall: ["vcmla{neon_type[0].rot270}", [a, b, c]]
 
   - name: "vcmla{neon_type[0].lane_nox}"
@@ -6492,14 +6221,13 @@ intrinsics:
     static_defs: ["const LANE: i32"]
     safety: safe
     types:
-      - [float32x2_t, float32x2_t, '[2 * LANE as u32, 2 * LANE as u32 + 1]']
-      - [float32x4_t, float32x2_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]']
+      - [float32x2_t, float32x2_t, '']
+      - [float32x4_t, float32x2_t, 'q']
     compose:
       - FnCall: [static_assert!, ['LANE == 0']]
-      - Let:
-          - c
-          - "{neon_type[0]}"
-          - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]]
+      - Let: [c, {FnCall: [vreinterpret_u64_f32, [c]]}]
+      - Let: [c, {FnCall: ['vdup{type[2]}_lane_u64', [c], [LANE]]}]
+      - Let: [c, {FnCall: ['vreinterpret{type[2]}_f32_u64', [c]]}]
       - FnCall: ["vcmla{neon_type[0].no}", [a, b, c]]
 
 
@@ -6512,19 +6240,18 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmla, 'LANE = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ['3']]
       - *neon-fp16
-      - *neon-unstable-f16
+      - *neon-unstable-fcma
       - *target-not-arm64ec
     static_defs: ["const LANE: i32"]
     safety: safe
     types:
-      - [float16x4_t, float16x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]']
-      - [float16x8_t, float16x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]']
+      - [float16x4_t, float16x4_t, '']
+      - [float16x8_t, float16x4_t, 'q']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, 1]]
-      - Let:
-          - c
-          - "{neon_type[0]}"
-          - FnCall: [simd_shuffle!, [c, c, "{type[2]}"]]
+      - Let: [c, {FnCall: [vreinterpret_u32_f16, [c]]}]
+      - Let: [c, {FnCall: ['vdup{type[2]}_lane_u32', [c], [LANE]]}]
+      - Let: [c, {FnCall: ['vreinterpret{type[2]}_f16_u32', [c]]}]
       - FnCall: ["vcmla{neon_type[0].no}", [a, b, c]]
 
   - name: "vcmla{neon_type[0].rot270_lane}"
@@ -6539,14 +6266,15 @@ intrinsics:
     static_defs: ["const LANE: i32"]
     safety: safe
     types:
-      - [float32x2_t, float32x2_t, '[2 * LANE as u32, 2 * LANE as u32 + 1]']
-      - [float32x4_t, float32x2_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]']
+      - [float32x2_t, float32x2_t, '']
+      - [float32x4_t, float32x2_t, 'q']
     compose:
       - FnCall: [static_assert!, ['LANE == 0']]
-      - Let: [c, "{neon_type[0]}", {FnCall: [simd_shuffle!, [c, c, "{type[2]}"]]}]
+      - Let: [c, {FnCall: [vreinterpret_u64_f32, [c]]}]
+      - Let: [c, {FnCall: ['vdup{type[2]}_lane_u64', [c], [LANE]]}]
+      - Let: [c, {FnCall: ['vreinterpret{type[2]}_f32_u64', [c]]}]
       - FnCall: ["vcmla{neon_type[0].rot270}", [a, b, c]]
 
-
   - name: "vcmla{neon_type[0].rot270_lane}"
     doc: Floating-point complex multiply accumulate
     arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"]
@@ -6556,78 +6284,20 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmla, 'LANE = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ['3']]
       - *neon-fp16
-      - *neon-unstable-f16
+      - *neon-unstable-fcma
       - *target-not-arm64ec
     static_defs: ["const LANE: i32"]
     safety: safe
     types:
-      - [float16x4_t, float16x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]']
-      - [float16x8_t, float16x4_t, '[2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1, 2 * LANE as u32, 2 * LANE as u32 + 1]']
+      - [float16x4_t, float16x4_t, '']
+      - [float16x8_t, float16x4_t, 'q']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, 1]]
-      - Let: [c, "{neon_type[0]}", {FnCall: [simd_shuffle!, [c, c, "{type[2]}"]]}]
+      - Let: [c, {FnCall: [vreinterpret_u32_f16, [c]]}]
+      - Let: [c, {FnCall: ['vdup{type[2]}_lane_u32', [c], [LANE]]}]
+      - Let: [c, {FnCall: ['vreinterpret{type[2]}_f16_u32', [c]]}]
       - FnCall: ["vcmla{neon_type[0].rot270}", [a, b, c]]
 
-  - name: "vdot{neon_type[0].laneq_nox}"
-    doc: Dot product arithmetic (indexed)
-    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
-    return_type: "{neon_type[0]}"
-    static_defs: ["const LANE: i32"]
-    attr:
-      - FnCall: [target_feature, ['enable = "neon,dotprod"']]
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sdot, 'LANE = 0']]}]]
-      - FnCall: [rustc_legacy_const_generics, ['3']]
-      - FnCall: [unstable, ['feature = "stdarch_neon_dotprod"', 'issue = "117224"']]
-    safety: safe
-    types:
-      - [int32x2_t, int8x8_t, int8x16_t, int32x4_t, '[LANE as u32, LANE as u32]']
-      - [int32x4_t, int8x16_t, int8x16_t, int32x4_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-    compose:
-      - FnCall: [static_assert_uimm_bits!, [LANE, '2']]
-      - Let:
-          - c
-          - "{neon_type[3]}"
-          - FnCall: [transmute, [c]]
-      - Let:
-          - c
-          - "{neon_type[0]}"
-          - FnCall: [simd_shuffle!, [c, c, '{type[4]}']]
-      - FnCall:
-          - "vdot{neon_type[0].no}"
-          - - a
-            - b
-            - FnCall: [transmute, [c]]
-
-  - name: "vdot{neon_type[0].laneq_nox}"
-    doc: Dot product arithmetic (indexed)
-    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
-    return_type: "{neon_type[0]}"
-    static_defs: ["const LANE: i32"]
-    attr:
-      - FnCall: [target_feature, ['enable = "neon,dotprod"']]
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [udot, 'LANE = 0']]}]]
-      - FnCall: [rustc_legacy_const_generics, ['3']]
-      - FnCall: [unstable, ['feature = "stdarch_neon_dotprod"', 'issue = "117224"']]
-    safety: safe
-    types:
-      - [uint32x2_t, uint8x8_t, uint8x16_t, uint32x4_t, '[LANE as u32, LANE as u32]']
-      - [uint32x4_t, uint8x16_t, uint8x16_t, uint32x4_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-    compose:
-      - FnCall: [static_assert_uimm_bits!, [LANE, '2']]
-      - Let:
-          - c
-          - "{neon_type[3]}"
-          - FnCall: [transmute, [c]]
-      - Let:
-          - c
-          - "{neon_type[0]}"
-          - FnCall: [simd_shuffle!, [c, c, '{type[4]}']]
-      - FnCall:
-          - "vdot{neon_type[0].no}"
-          - - a
-            - b
-            - FnCall: [transmute, [c]]
-
   - name: "vmax{neon_type.no}"
     doc: Maximum (vector)
     arguments: ["a: {neon_type}", "b: {neon_type}"]
@@ -6666,6 +6336,7 @@ intrinsics:
               arch: aarch64,arm64ec
 
 
+
   - name: "vmaxnm{neon_type.no}"
     doc: Floating-point Maximum Number (vector)
     arguments: ["a: {neon_type}", "b: {neon_type}"]
@@ -6677,7 +6348,11 @@ intrinsics:
       - float64x1_t
       - float64x2_t
     compose:
-      - FnCall: [simd_fmax, [a, b]]
+      - LLVMLink:
+          name: "fmaxnm.{neon_type}"
+          links:
+            - link: "llvm.aarch64.neon.fmaxnm.{neon_type}"
+              arch: aarch64,arm64ec
 
 
   - name: "vmaxnmh_{type}"
@@ -6693,7 +6368,11 @@ intrinsics:
     types:
       - f16
     compose:
-      - FnCall: ["f16::max", [a, b]]
+      - LLVMLink:
+          name: "vmaxh.{neon_type}"
+          links:
+            - link: "llvm.aarch64.neon.fmaxnm.{type}"
+              arch: aarch64,arm64ec
 
 
   - name: "vminnmh_{type}"
@@ -6709,7 +6388,11 @@ intrinsics:
     types:
       - f16
     compose:
-      - FnCall: ["f16::min", [a, b]]
+      - LLVMLink:
+          name: "vminh.{neon_type}"
+          links:
+            - link: "llvm.aarch64.neon.fminnm.{type}"
+              arch: aarch64,arm64ec
 
 
   - name: "vmaxnmv{neon_type[0].no}"
@@ -6723,7 +6406,11 @@ intrinsics:
       - [float32x2_t, f32]
       - [float64x2_t, f64]
     compose:
-      - FnCall: [simd_reduce_max, [a]]
+      - LLVMLink:
+          name: "fmaxnmv.{neon_type[0]}"
+          links:
+            - link: "llvm.aarch64.neon.fmaxnmv.{type[1]}.{neon_type[0]}"
+              arch: aarch64,arm64ec
 
   - name: "vmaxnmv{neon_type[0].no}"
     doc: Floating-point maximum number across vector
@@ -6735,7 +6422,11 @@ intrinsics:
     types:
       - [float32x4_t, f32]
     compose:
-      - FnCall: [simd_reduce_max, [a]]
+      - LLVMLink:
+          name: "fmaxnmv.{neon_type[0]}"
+          links:
+            - link: "llvm.aarch64.neon.fmaxnmv.{type[1]}.{neon_type[0]}"
+              arch: aarch64,arm64ec
 
 
   - name: "vmaxnmv{neon_type[0].no}"
@@ -6752,7 +6443,11 @@ intrinsics:
       - [float16x4_t, f16]
       - [float16x8_t, f16]
     compose:
-      - FnCall: [simd_reduce_max, [a]]
+      - LLVMLink:
+          name: "fmaxnmv.{neon_type[0]}"
+          links:
+            - link: "llvm.aarch64.neon.fmaxnmv.{type[1]}.{neon_type[0]}"
+              arch: aarch64,arm64ec
 
 
   - name: "vminnmv{neon_type[0].no}"
@@ -6769,7 +6464,11 @@ intrinsics:
       - [float16x4_t, f16]
       - [float16x8_t, f16]
     compose:
-      - FnCall: [simd_reduce_min, [a]]
+      - LLVMLink:
+          name: "fminnmv.{neon_type[0]}"
+          links:
+            - link: "llvm.aarch64.neon.fminnmv.{type[1]}.{neon_type[0]}"
+              arch: aarch64,arm64ec
 
 
   - name: "vmaxv{neon_type[0].no}"
@@ -6878,7 +6577,11 @@ intrinsics:
       - float64x1_t
       - float64x2_t
     compose:
-      - FnCall: [simd_fmin, [a, b]]
+      - LLVMLink:
+          name: "fminnm.{neon_type}"
+          links:
+            - link: "llvm.aarch64.neon.fminnm.{neon_type}"
+              arch: aarch64,arm64ec
 
   - name: "vminnmv{neon_type[0].no}"
     doc: "Floating-point minimum number across vector"
@@ -6886,13 +6589,17 @@ intrinsics:
     return_type: "{type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fminnmp]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [float32x2_t, "f32"]
       - [float64x2_t, "f64"]
     compose:
-      - FnCall: [simd_reduce_min, [a]]
+      - LLVMLink:
+          name: "vminnmv.{neon_type[0]}"
+          links:
+            - link: "llvm.aarch64.neon.fminnmv.{type[1]}.{neon_type[0]}"
+              arch: aarch64,arm64ec
 
   - name: "vminnmv{neon_type[0].no}"
     doc: "Floating-point minimum number across vector"
@@ -6900,86 +6607,92 @@ intrinsics:
     return_type: "{type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fminnmv]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [float32x4_t, "f32"]
     compose:
-      - FnCall: [simd_reduce_min, [a]]
+      - LLVMLink:
+          name: "vminnmv.{neon_type[0]}"
+          links:
+            - link: "llvm.aarch64.neon.fminnmv.{type[1]}.{neon_type[0]}"
+              arch: aarch64,arm64ec
 
   - name: "vmovl_high{neon_type[0].noq}"
     doc: Vector move
     arguments: ["a: {neon_type[0]}"]
     return_type: "{neon_type[1]}"
-    attr: [*neon-stable]
-    assert_instr: [sxtl2]
+    attr:
+      - *neon-stable
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sxtl2]]}]]
     safety: safe
     types:
-      - [int8x16_t, int16x8_t, int8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]']
-      - [int16x8_t, int32x4_t, int16x4_t, '[4, 5, 6, 7]']
-      - [int32x4_t, int64x2_t, int32x2_t, '[2, 3]']
+      - [int8x16_t, int16x8_t]
+      - [int16x8_t, int32x4_t]
+      - [int32x4_t, int64x2_t]
     compose:
       - Let:
           - a
-          - "{neon_type[2]}"
-          - FnCall: [simd_shuffle!, [a, a, "{type[3]}"]]
+          - FnCall: ['vget_high_{neon_type[0]}', [a]]
       - FnCall: ["vmovl{neon_type[0].noq}", [a]]
 
   - name: "vmovl_high{neon_type[0].noq}"
     doc: Vector move
     arguments: ["a: {neon_type[0]}"]
     return_type: "{neon_type[1]}"
-    attr: [*neon-stable]
-    assert_instr: [uxtl2]
+    attr:
+      - *neon-stable
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [uxtl2]]}]]
     safety: safe
     types:
-      - [uint8x16_t, uint16x8_t, uint8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]']
-      - [uint16x8_t, uint32x4_t, uint16x4_t, '[4, 5, 6, 7]']
-      - [uint32x4_t, uint64x2_t, uint32x2_t, '[2, 3]']
+      - [uint8x16_t, uint16x8_t]
+      - [uint16x8_t, uint32x4_t]
+      - [uint32x4_t, uint64x2_t]
     compose:
       - Let:
           - a
-          - "{neon_type[2]}"
-          - FnCall: [simd_shuffle!, [a, a, "{type[3]}"]]
+          - FnCall: ['vget_high_{neon_type[0]}', [a]]
       - FnCall: ["vmovl{neon_type[0].noq}", [a]]
 
-  - name: "vpadd{neon_type.no}"
-    doc: Floating-point add pairwise
-    arguments: ["a: {neon_type}", "b: {neon_type}"]
-    return_type: "{type}"
+  - name: "vpadd{neon_type[0].no}"
+    doc: "Floating-point add pairwise"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[0]}"
     attr: [*neon-stable]
     assert_instr: [faddp]
     safety: safe
     types:
-      - float32x4_t
-      - float64x2_t
+      - [float32x4_t, "4"]
+      - [float64x2_t, "2"]
     compose:
-      - LLVMLink:
-          name: "faddp.{neon_type}"
-          links:
-            - link: "llvm.aarch64.neon.faddp.{neon_type}"
-              arch: aarch64,arm64ec
-
+      - Let:
+        - even
+        - FnCall: ["simd_shuffle!", [a, b, "crate::core_arch::macros::even::<{type[1]}>()"]]
+      - Let:
+        - odd
+        - FnCall: ["simd_shuffle!", [a, b, "crate::core_arch::macros::odd::<{type[1]}>()"]]
+      - FnCall: [simd_add, [even, odd]]
 
-  - name: "vpadd{neon_type.no}"
+  - name: "vpadd{neon_type[0].no}"
     doc: Floating-point add pairwise
-    arguments: ["a: {neon_type}", "b: {neon_type}"]
-    return_type: "{type}"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[0]}"
     attr:
       - *neon-fp16
-      - *neon-unstable-f16
+      - *neon-stable-fp16
       - *target-not-arm64ec
     assert_instr: [faddp]
     safety: safe
     types:
-      - float16x8_t
+      - [float16x8_t, "8"]
     compose:
-      - LLVMLink:
-          name: "faddp.{neon_type}"
-          links:
-            - link: "llvm.aarch64.neon.faddp.{neon_type}"
-              arch: aarch64,arm64ec
-
+      - Let:
+        - even
+        - FnCall: ["simd_shuffle!", [a, b, "crate::core_arch::macros::even::<{type[1]}>()"]]
+      - Let:
+        - odd
+        - FnCall: ["simd_shuffle!", [a, b, "crate::core_arch::macros::odd::<{type[1]}>()"]]
+      - FnCall: [simd_add, [even, odd]]
 
   - name: "vpmax{neon_type.no}"
     doc: Floating-point add pairwise
@@ -6987,7 +6700,7 @@ intrinsics:
     return_type: "{type}"
     attr:
       - *neon-fp16
-      - *neon-unstable-f16
+      - *neon-stable-fp16
       - *target-not-arm64ec
     assert_instr: [fmaxp]
     safety: safe
@@ -7008,7 +6721,7 @@ intrinsics:
     return_type: "{type}"
     attr:
       - *neon-fp16
-      - *neon-unstable-f16
+      - *neon-stable-fp16
       - *target-not-arm64ec
     assert_instr: [fmaxnmp]
     safety: safe
@@ -7029,7 +6742,7 @@ intrinsics:
     return_type: "{type}"
     attr:
       - *neon-fp16
-      - *neon-unstable-f16
+      - *neon-stable-fp16
       - *target-not-arm64ec
     assert_instr: [fminp]
     safety: safe
@@ -7050,7 +6763,7 @@ intrinsics:
     return_type: "{type}"
     attr:
       - *neon-fp16
-      - *neon-unstable-f16
+      - *neon-stable-fp16
       - *target-not-arm64ec
     assert_instr: [fminnmp]
     safety: safe
@@ -7079,11 +6792,11 @@ intrinsics:
       - Let:
           - a1
           - "{type[2]}"
-          - FnCall: [simd_extract!, [a, '0']]
+          - FnCall: ['vget{neon_type[1].lane_nox}', [a], [0]]
       - Let:
           - a2
           - "{type[2]}"
-          - FnCall: [simd_extract!, [a, '1']]
+          - FnCall: ['vget{neon_type[1].lane_nox}', [a], [1]]
       - Identifier: ['a1 + a2', Symbol]
 
   - name: "vpmin{type[0]}"
@@ -7109,14 +6822,14 @@ intrinsics:
     return_type: "{type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmull]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["i16", "i32"]
     compose:
       - Let: [a, int16x4_t, {FnCall: [vdup_n_s16, [a]]}]
       - Let: [b, int16x4_t, {FnCall: [vdup_n_s16, [b]]}]
-      - FnCall: [simd_extract!, [{FnCall: [vqdmull_s16, [a, b]]}, '0']]
+      - FnCall: ['vgetq_lane_{type[1]}', [{FnCall: [vqdmull_s16, [a, b]]}], ['0']]
 
   - name: "vqdmulls_s32"
     doc: "Signed saturating doubling multiply long"
@@ -7124,7 +6837,7 @@ intrinsics:
     return_type: "{type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmull]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["i32", "i64"]
@@ -7140,15 +6853,15 @@ intrinsics:
     arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
     return_type: "{neon_type[1]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmull2]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sqdmull2]]}]]
+      - *neon-stable
     safety: safe
     types:
-      - [int16x8_t, int32x4_t, int16x4_t, '[4, 5, 6, 7]']
-      - [int32x4_t, int64x2_t, int32x2_t, '[2, 3]']
+      - [int16x8_t, int32x4_t]
+      - [int32x4_t, int64x2_t]
     compose:
-      - Let: [a, "{neon_type[2]}", {FnCall: [simd_shuffle!, [a, a, '{type[3]}']]}]
-      - Let: [b, "{neon_type[2]}", {FnCall: [simd_shuffle!, [b, b, '{type[3]}']]}]
+      - Let: [a, {FnCall: ['vget_high_{neon_type[0]}', [a]]}]
+      - Let: [b, {FnCall: ['vget_high_{neon_type[0]}', [b]]}]
       - FnCall: ["vqdmull{neon_type[0].noq}", [a, b]]
 
   - name: "vqdmull_high_n_{type[1]}"
@@ -7156,15 +6869,15 @@ intrinsics:
     arguments: ["a: {neon_type[0]}", "b: {type[1]}"]
     return_type: "{neon_type[2]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmull2]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sqdmull2]]}]]
+      - *neon-stable
     safety: safe
     types:
-      - [int16x8_t, "i16", int32x4_t, int16x4_t, '[4, 5, 6, 7]']
-      - [int32x4_t, "i32", int64x2_t, int32x2_t, '[2, 3]']
+      - [int16x8_t, "i16", int32x4_t]
+      - [int32x4_t, "i32", int64x2_t]
     compose:
-      - Let: [a, "{neon_type[3]}", {FnCall: [simd_shuffle!, [a, a, "{type[4]}"]]}]
-      - Let: [b, "{neon_type[3]}", {FnCall: ["vdup_n{neon_type[0].noq}", [b]]}]
+      - Let: [a, {FnCall: ['vget_high_{neon_type[0]}', [a]]}]
+      - Let: [b, {FnCall: ["vdup_n{neon_type[0].noq}", [b]]}]
       - FnCall: ["vqdmull{neon_type[0].noq}", [a, b]]
 
   - name: "vqdmull{type[3]}"
@@ -7174,7 +6887,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmull, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['2']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -7182,7 +6895,7 @@ intrinsics:
       - ["i32", int32x4_t, "i64", 's_laneq_s32', 's_s32']
     compose:
       - FnCall: [static_assert_uimm_bits!, [N, 2]]
-      - Let: [b, "{type[0]}", {FnCall: [simd_extract!, [b, 'N as u32']]}]
+      - Let: [b, "{type[0]}", {FnCall: ['vget{neon_type[1].lane_nox}', [b], [N]]}]
       - FnCall: ["vqdmull{type[4]}", [a, b]]
 
   - name: "vqdmullh_laneq_s16"
@@ -7192,14 +6905,14 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmull, N = 4]]}]]
       - FnCall: [rustc_legacy_const_generics, ['2']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
       - ["i16", int16x8_t, "i32"]
     compose:
       - FnCall: [static_assert_uimm_bits!, [N, 3]]
-      - Let: [b, "{type[0]}", {FnCall: [simd_extract!, [b, 'N as u32']]}]
+      - Let: [b, "{type[0]}", {FnCall: ['vget{neon_type[1].lane_nox}', [b], [N]]}]
       - FnCall: ["vqdmullh_s16", [a, b]]
 
   - name: "vqdmulls_lane_s32"
@@ -7209,33 +6922,33 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmull, 'N = 1']]}]]
       - FnCall: [rustc_legacy_const_generics, ['2']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
       - ["i32", int32x2_t, "i64"]
     compose:
       - FnCall: [static_assert_uimm_bits!, [N, 1]]
-      - Let: [b, "{type[0]}", {FnCall: [simd_extract!, [b, 'N as u32']]}]
+      - Let: [b, "{type[0]}", {FnCall: ['vget{neon_type[1].lane_nox}', [b], [N]]}]
       - FnCall: ["vqdmulls_s32", [a, b]]
 
-  - name: "vqdmull{type[6]}"
+  - name: "vqdmull{type[3]}"
     doc: "Signed saturating doubling multiply long"
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
     return_type: "{neon_type[2]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmull2, 'N = 2']]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sqdmull2, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['2']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
-      - [int16x8_t, int16x4_t, int32x4_t, int16x4_t, '[4, 5, 6, 7]', '[N as u32, N as u32, N as u32, N as u32]', '_high_lane_s16']
-      - [int32x4_t, int32x4_t, int64x2_t, int32x2_t, '[2, 3]', '[N as u32, N as u32]', '_high_laneq_s32']
+      - [int16x8_t, int16x4_t, int32x4_t, '_high_lane_s16']
+      - [int32x4_t, int32x4_t, int64x2_t, '_high_laneq_s32']
     compose:
       - FnCall: [static_assert_uimm_bits!, [N, '2']]
-      - Let: [a, "{neon_type[3]}", {FnCall: [simd_shuffle!, [a, a, "{type[4]}"]]}]
-      - Let: [b, "{neon_type[3]}", {FnCall: [simd_shuffle!, [b, b, "{type[5]}"]]}]
+      - Let: [a, {FnCall: ['vget_high_{neon_type[0]}', [a]]}]
+      - Let: [b, {FnCall: ['vdup_lane{neon_type[1].nox}', [b], [N]]}]
       - FnCall: ["vqdmull{neon_type[0].noq}", [a, b]]
 
   - name: "vqdmull_high_lane_s32"
@@ -7243,17 +6956,17 @@ intrinsics:
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
     return_type: "{neon_type[2]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmull2, 'N = 1']]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sqdmull2, 'N = 1']]}]]
       - FnCall: [rustc_legacy_const_generics, ['2']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
-      - [int32x4_t, int32x2_t, int64x2_t, int32x2_t, '[2, 3]', '[N as u32, N as u32]']
+      - [int32x4_t, int32x2_t, int64x2_t]
     compose:
       - FnCall: [static_assert_uimm_bits!, [N, '1']]
-      - Let: [a, "{neon_type[3]}", {FnCall: [simd_shuffle!, [a, a, "{type[4]}"]]}]
-      - Let: [b, "{neon_type[3]}", {FnCall: [simd_shuffle!, [b, b, "{type[5]}"]]}]
+      - Let: [a, {FnCall: ['vget_high_{neon_type[0]}', [a]]}]
+      - Let: [b, {FnCall: ['vdup_lane{neon_type[1].nox}', [b], [N]]}]
       - FnCall: ["vqdmull{neon_type[0].noq}", [a, b]]
 
   - name: "vqdmull_high_laneq_s16"
@@ -7261,17 +6974,17 @@ intrinsics:
     arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
     return_type: "{neon_type[2]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmull2, N = 4]]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sqdmull2, N = 4]]}]]
       - FnCall: [rustc_legacy_const_generics, ['2']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
-      - [int16x8_t, int16x8_t, int32x4_t, int16x4_t, '[4, 5, 6, 7]', '[N as u32, N as u32, N as u32, N as u32]']
+      - [int16x8_t, int16x8_t, int32x4_t]
     compose:
       - FnCall: [static_assert_uimm_bits!, [N, '3']]
-      - Let: [a, "{neon_type[3]}", {FnCall: [simd_shuffle!, [a, a, "{type[4]}"]]}]
-      - Let: [b, "{neon_type[3]}", {FnCall: [simd_shuffle!, [b, b, "{type[5]}"]]}]
+      - Let: [a, {FnCall: ['vget_high_{neon_type[0]}', [a]]}]
+      - Let: [b, {FnCall: ['vdup_lane{neon_type[1].nox}', [b], [N]]}]
       - FnCall: ["vqdmull{neon_type[0].noq}", [a, b]]
 
   - name: "vqdmull_laneq_s16"
@@ -7281,14 +6994,14 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmull, 'N = 4']]}]]
       - FnCall: [rustc_legacy_const_generics, ['2']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
-      - [int16x4_t, int16x8_t, int32x4_t, '[N as u32, N as u32, N as u32, N as u32]']
+      - [int16x4_t, int16x8_t, int32x4_t]
     compose:
       - FnCall: [static_assert_uimm_bits!, [N, '3']]
-      - Let: [b, "{neon_type[0]}", {FnCall: [simd_shuffle!, [b, b, "{type[3]}"]]}]
+      - Let: [b, {FnCall: ['vdup_lane{neon_type[1].nox}', [b], [N]]}]
       - FnCall: [vqdmull_s16, [a, b]]
 
   - name: "vqdmull_laneq_s32"
@@ -7298,14 +7011,14 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmull, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['2']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
-      - [int32x2_t, int32x4_t, int64x2_t, '[N as u32, N as u32]']
+      - [int32x2_t, int32x4_t, int64x2_t]
     compose:
       - FnCall: [static_assert_uimm_bits!, [N, '2']]
-      - Let: [b, "{neon_type[0]}", {FnCall: [simd_shuffle!, [b, b, "{type[3]}"]]}]
+      - Let: [b, {FnCall: ['vdup_lane{neon_type[1].nox}', [b], [N]]}]
       - FnCall: [vqdmull_s32, [a, b]]
 
   - name: "vqdmlal{type[4]}"
@@ -7313,8 +7026,8 @@ intrinsics:
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {type[2]}"]
     return_type: "{neon_type[0]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlal2]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sqdmlal2]]}]]
+      - *neon-stable
     safety: safe
     types:
       - [int32x4_t, int16x8_t, int16x8_t, int32x4_t, _high_s16]
@@ -7329,9 +7042,9 @@ intrinsics:
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
     return_type: "{neon_type[0]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlal2, 'N = 1']]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sqdmlal2, 'N = 1']]}]]
       - FnCall: [rustc_legacy_const_generics, ['3']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -7349,13 +7062,13 @@ intrinsics:
     return_type: "{type[0]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlal]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["i32", "i16", "s16"]
     compose:
       - Let: [x, int32x4_t, {FnCall: [vqdmull_s16, [{FnCall: [vdup_n_s16, [b]]}, {FnCall: [vdup_n_s16, [c]]}]]}]
-      - FnCall: [vqadds_s32, [a, {FnCall: [simd_extract!, [x, 0]]}]]
+      - FnCall: [vqadds_s32, [a, {FnCall: ['vgetq_lane_s32', [x], [0]]}]]
 
   - name: "vqdmlals_s32"
     doc: "Signed saturating doubling multiply-add long"
@@ -7363,7 +7076,7 @@ intrinsics:
     return_type: "{type[0]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlal]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["i64", "i32", "i32", "i64"]
@@ -7378,7 +7091,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlal, 'LANE = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ['3']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const LANE: i32']
     safety: safe
     types:
@@ -7388,16 +7101,16 @@ intrinsics:
       - ["i64", "i32", int32x4_t, "i64", s_laneq_s32, '2', s_s32]
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, "{type[5]}"]]
-      - FnCall: ["vqdmlal{type[6]}", [a, b, {FnCall: [simd_extract!, [c, 'LANE as u32']]}]]
+      - FnCall: ["vqdmlal{type[6]}", [a, b, {FnCall: ['vget{neon_type[2].lane_nox}', [c], [LANE]]}]]
 
   - name: "vqdmlal_laneq_s16"
     doc: "Vector widening saturating doubling multiply accumulate with scalar"
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
     return_type: "{neon_type[0]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlal, 'N = 2']]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sqdmlal, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['3']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -7411,9 +7124,9 @@ intrinsics:
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
     return_type: "{neon_type[0]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlal, 'N = 1']]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sqdmlal, 'N = 1']]}]]
       - FnCall: [rustc_legacy_const_generics, ['3']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -7427,8 +7140,8 @@ intrinsics:
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {type[2]}"]
     return_type: "{neon_type[0]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlsl2]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sqdmlsl2]]}]]
+      - *neon-stable
     safety: safe
     types:
       - [int32x4_t, int16x8_t, int16x8_t, int32x4_t, _high_s16]
@@ -7443,9 +7156,9 @@ intrinsics:
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
     return_type: "{neon_type[0]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlsl2, 'N = 1']]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sqdmlsl2, 'N = 1']]}]]
       - FnCall: [rustc_legacy_const_generics, ['3']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -7463,13 +7176,13 @@ intrinsics:
     return_type: "{type[0]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlsl]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["i32", "i16"]
     compose:
       - Let: [x, int32x4_t, {FnCall: [vqdmull_s16, [{FnCall: [vdup_n_s16, [b]]}, {FnCall: [vdup_n_s16, [c]]}]]}]
-      - FnCall: [vqsubs_s32, [a, {FnCall: [simd_extract!, [x, '0']]}]]
+      - FnCall: [vqsubs_s32, [a, {FnCall: ['vgetq_lane_s32', [x], [0]]}]]
 
   - name: "vqdmlsls_s32"
     doc: "Signed saturating doubling multiply-subtract long"
@@ -7477,7 +7190,7 @@ intrinsics:
     return_type: "{type[0]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlsl]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["i64", "i32", "i32", "i64"]
@@ -7492,7 +7205,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlsl, 'LANE = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ['3']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const LANE: i32']
     safety: safe
     types:
@@ -7502,16 +7215,16 @@ intrinsics:
       - ["i64", "i32", int32x4_t, "i64", 's_laneq_s32', '2', 's_s32']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, "{type[5]}"]]
-      - FnCall: ["vqdmlsl{type[6]}", [a, b, {FnCall: [simd_extract!, [c, 'LANE as u32']]}]]
+      - FnCall: ["vqdmlsl{type[6]}", [a, b, {FnCall: ['vget{neon_type[2].lane_nox}', [c], [LANE]]}]]
 
   - name: "vqdmlsl_laneq_s16"
     doc: "Vector widening saturating doubling multiply subtract with scalar"
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
     return_type: "{neon_type[0]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlsl, 'N = 2']]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sqdmlsl, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['3']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -7525,9 +7238,9 @@ intrinsics:
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
     return_type: "{neon_type[0]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmlsl, 'N = 1']]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sqdmlsl, 'N = 1']]}]]
       - FnCall: [rustc_legacy_const_generics, ['3']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -7542,7 +7255,7 @@ intrinsics:
     return_type: "{type[0]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmulh]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["i16", "i16", "i16", int16x4_t, 'h_s16']
@@ -7550,7 +7263,7 @@ intrinsics:
     compose:
       - Let: [a, "{neon_type[3]}", {FnCall: ["vdup_n{neon_type[3].no}", [a]]}]
       - Let: [b, "{neon_type[3]}", {FnCall: ["vdup_n{neon_type[3].no}", [b]]}]
-      - FnCall: [simd_extract!, [{FnCall: ["vqdmulh{neon_type[3].no}", [a, b]]}, '0']]
+      - FnCall: ['vget{neon_type[3].lane_nox}', [{FnCall: ["vqdmulh{neon_type[3].no}", [a, b]]}], ['0']]
 
   - name: "vqdmulhh{type[3]}"
     doc: "Signed saturating doubling multiply returning high half"
@@ -7559,7 +7272,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmulh, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['2']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -7567,7 +7280,7 @@ intrinsics:
       - ["i16", int16x8_t, "i16", '_laneq_s16', '3']
     compose:
       - FnCall: [static_assert_uimm_bits!, [N, "{type[4]}"]]
-      - Let: [b, 'i16', {FnCall: [simd_extract!, [b, 'N as u32']]}]
+      - Let: [b, 'i16', {FnCall: ['vget{neon_type[1].lane_nox}', [b], [N]]}]
       - FnCall: ['vqdmulhh_s16', [a, b]]
 
   - name: "vqdmulhs{type[3]}"
@@ -7577,7 +7290,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmulh, 'N = 1']]}]]
       - FnCall: [rustc_legacy_const_generics, ['2']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -7585,7 +7298,7 @@ intrinsics:
       - ["i32", int32x4_t, "i32", "_laneq_s32", '2']
     compose:
       - FnCall: [static_assert_uimm_bits!, [N, "{type[4]}"]]
-      - Let: [b, 'i32', {FnCall: [simd_extract!, [b, 'N as u32']]}]
+      - Let: [b, 'i32', {FnCall: ['vget{neon_type[1].lane_nox}', [b], [N]]}]
       - FnCall: ['vqdmulhs_s32', [a, b]]
 
   - name: "vqmovn_high{neon_type[1].noq}"
@@ -7593,30 +7306,30 @@ intrinsics:
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
     return_type: "{neon_type[2]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqxtn2]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sqxtn2]]}]]
+      - *neon-stable
     safety: safe
     types:
-      - [int8x8_t, int16x8_t, int8x16_t, '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
-      - [int16x4_t, int32x4_t, int16x8_t, '[0, 1, 2, 3, 4, 5, 6, 7]']
-      - [int32x2_t, int64x2_t, int32x4_t, '[0, 1, 2, 3]']
+      - [int8x8_t, int16x8_t, int8x16_t]
+      - [int16x4_t, int32x4_t, int16x8_t]
+      - [int32x2_t, int64x2_t, int32x4_t]
     compose:
-      - FnCall: [simd_shuffle!, [a, {FnCall: ["vqmovn{neon_type[1].noq}", [b]]}, "{type[3]}"]]
+      - FnCall: ['vcombine_{neon_type[0]}', [a, {FnCall: ["vqmovn{neon_type[1].noq}", [b]]}]]
 
   - name: "vqmovn_high{neon_type[1].noq}"
     doc: "Signed saturating extract narrow"
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
     return_type: "{neon_type[2]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uqxtn2]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [uqxtn2]]}]]
+      - *neon-stable
     safety: safe
     types:
-      - [uint8x8_t, uint16x8_t, uint8x16_t, '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
-      - [uint16x4_t, uint32x4_t, uint16x8_t, '[0, 1, 2, 3, 4, 5, 6, 7]']
-      - [uint32x2_t, uint64x2_t, uint32x4_t, '[0, 1, 2, 3]']
+      - [uint8x8_t, uint16x8_t, uint8x16_t]
+      - [uint16x4_t, uint32x4_t, uint16x8_t]
+      - [uint32x2_t, uint64x2_t, uint32x4_t]
     compose:
-      - FnCall: [simd_shuffle!, [a, {FnCall: ["vqmovn{neon_type[1].noq}", [b]]}, "{type[3]}"]]
+      - FnCall: ['vcombine_{neon_type[0]}', [a, {FnCall: ["vqmovn{neon_type[1].noq}", [b]]}]]
 
   - name: "vqmovn{type[2]}"
     doc: "Saturating extract narrow"
@@ -7624,13 +7337,13 @@ intrinsics:
     return_type: "{type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqxtn]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["i16", "i8", 'h_s16', s16]
       - ["i32", "i16", 's_s32', s32]
     compose:
-      - FnCall: [simd_extract!, [{FnCall: ["vqmovn_{type[3]}", [{FnCall: ["vdupq_n_{type[3]}", [a]]}]]}, '0']]
+      - FnCall: ['vget_lane_{type[1]}', [{FnCall: ["vqmovn_{type[3]}", [{FnCall: ["vdupq_n_{type[3]}", [a]]}]]}], ['0']]
 
   - name: "vqmovn{type[2]}"
     doc: "Saturating extract narrow"
@@ -7638,13 +7351,13 @@ intrinsics:
     return_type: "{type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uqxtn]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["u16", "u8", 'h_u16', 'u16']
       - ["u32", "u16", 's_u32', 'u32']
     compose:
-      - FnCall: [simd_extract!, [{FnCall: ["vqmovn_{type[3]}", [{FnCall: ["vdupq_n_{type[3]}", [a]]}]]}, '0']]
+      - FnCall: ['vget_lane_{type[1]}', [{FnCall: ["vqmovn_{type[3]}", [{FnCall: ["vdupq_n_{type[3]}", [a]]}]]}], ['0']]
 
   - name: "vqmovnd_s64"
     doc: "Saturating extract narrow"
@@ -7652,7 +7365,7 @@ intrinsics:
     return_type: "{type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqxtn]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["i64", "i32"]
@@ -7669,7 +7382,7 @@ intrinsics:
     return_type: "{type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uqxtn]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["u64", "u32"]
@@ -7686,29 +7399,29 @@ intrinsics:
     return_type: "{type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqxtun]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["i16", "u8", 'h_s16', s16]
       - ["i32", "u16", 's_s32', s32]
       - ["i64", "u32", 'd_s64', s64]
     compose:
-      - FnCall: [simd_extract!, [{FnCall: ["vqmovun_{type[3]}", [{FnCall: ["vdupq_n_{type[3]}", [a]]}]]}, '0']]
+      - FnCall: ['vget_lane_{type[1]}', [{FnCall: ["vqmovun_{type[3]}", [{FnCall: ["vdupq_n_{type[3]}", [a]]}]]}], ['0']]
 
   - name: "vqmovun_high_{neon_type[1]}"
     doc: "Signed saturating extract unsigned narrow"
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
     return_type: "{neon_type[2]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqxtun2]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sqxtun2]]}]]
+      - *neon-stable
     safety: safe
     types:
-      - [uint8x8_t, int16x8_t, uint8x16_t, s16, '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
-      - [uint16x4_t, int32x4_t, uint16x8_t, s32, '[0, 1, 2, 3, 4, 5, 6, 7]']
-      - [uint32x2_t, int64x2_t, uint32x4_t, s64, '[0, 1, 2, 3]']
+      - [uint8x8_t, int16x8_t, uint8x16_t, s16]
+      - [uint16x4_t, int32x4_t, uint16x8_t, s32]
+      - [uint32x2_t, int64x2_t, uint32x4_t, s64]
     compose:
-      - FnCall: [simd_shuffle!, [a, {FnCall: ["vqmovun_{type[3]}", [b]]}, "{type[4]}"]]
+      - FnCall: ['vcombine_{neon_type[0]}', [a, {FnCall: ["vqmovun_{type[3]}", [b]]}]]
 
   - name: "vqrdmulh{type[1]}"
     doc: "Signed saturating rounding doubling multiply returning high half"
@@ -7716,13 +7429,13 @@ intrinsics:
     return_type: "{type[0]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqrdmulh]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["i16", 'h_s16', 's16']
       - ["i32", 's_s32', 's32']
     compose:
-      - FnCall: [simd_extract!, [{FnCall: ["vqrdmulh_{type[2]}", [{FnCall: ["vdup_n_{type[2]}", [a]]}, {FnCall: ["vdup_n_{type[2]}", [b]]}]]}, '0']]
+      - FnCall: ['vget_lane_{type[2]}', [{FnCall: ["vqrdmulh_{type[2]}", [{FnCall: ["vdup_n_{type[2]}", [a]]}, {FnCall: ["vdup_n_{type[2]}", [b]]}]]}], ['0']]
 
   - name: "vqrdmulh{type[2]}"
     doc: "Signed saturating rounding doubling multiply returning high half"
@@ -7731,7 +7444,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqrdmulh, LANE = 1]]}]]
       - FnCall: [rustc_legacy_const_generics, ['2']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const LANE: i32']
     safety: safe
     types:
@@ -7741,7 +7454,7 @@ intrinsics:
       - ["i32", int32x4_t, 's_laneq_s32', 's_s32', '2']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, "{type[4]}"]]
-      - FnCall: ["vqrdmulh{type[3]}", [a, {FnCall: [simd_extract!, [b, 'LANE as u32']]}]]
+      - FnCall: ["vqrdmulh{type[3]}", [a, {FnCall: ['vget{neon_type[1].lane_nox}', [b], [LANE]]}]]
 
   - name: "vqrdmlah{neon_type.no}"
     doc: "Signed saturating rounding doubling multiply accumulate returning high half"
@@ -7780,7 +7493,7 @@ intrinsics:
       - Let: [a, "{neon_type[1]}", {FnCall: ["vdup_n_{type[2]}", [a]]}]
       - Let: [b, "{neon_type[1]}", {FnCall: ["vdup_n_{type[2]}", [b]]}]
       - Let: [c, "{neon_type[1]}", {FnCall: ["vdup_n_{type[2]}", [c]]}]
-      - FnCall: [simd_extract!, [{FnCall: ["vqrdmlah_{type[2]}", [a, b, c]]}, '0']]
+      - FnCall: ['vget_lane_{type[2]}', [{FnCall: ["vqrdmlah_{type[2]}", [a, b, c]]}], ['0']]
 
   - name: "vqrdmlah{type[0]}"
     doc: "Signed saturating rounding doubling multiply accumulate returning high half"
@@ -7794,17 +7507,17 @@ intrinsics:
     static_defs: ['const LANE: i32']
     safety: safe
     types:
-      - [_lane_s16, int16x4_t, int16x4_t, int16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [_laneq_s16, int16x4_t, int16x4_t, int16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [q_lane_s16, int16x8_t, int16x8_t, int16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [q_laneq_s16, int16x8_t, int16x8_t, int16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [_lane_s32, int32x2_t, int32x2_t, int32x2_t, '1', '[LANE as u32, LANE as u32]']
-      - [_laneq_s32, int32x2_t, int32x2_t, int32x4_t, '2', '[LANE as u32, LANE as u32]']
-      - [q_lane_s32, int32x4_t, int32x4_t, int32x2_t, '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [q_laneq_s32, int32x4_t, int32x4_t, int32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [_lane_s16, int16x4_t, int16x4_t, int16x4_t, '2']
+      - [_laneq_s16, int16x4_t, int16x4_t, int16x8_t, '3']
+      - [q_lane_s16, int16x8_t, int16x8_t, int16x4_t, '2']
+      - [q_laneq_s16, int16x8_t, int16x8_t, int16x8_t, '3']
+      - [_lane_s32, int32x2_t, int32x2_t, int32x2_t, '1']
+      - [_laneq_s32, int32x2_t, int32x2_t, int32x4_t, '2']
+      - [q_lane_s32, int32x4_t, int32x4_t, int32x2_t, '1']
+      - [q_laneq_s32, int32x4_t, int32x4_t, int32x4_t, '2']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, '{type[4]}']]
-      - Let: [c, "{type[1]}", {FnCall: [simd_shuffle!, [c, c, "{type[5]}"]]}]
+      - Let: [c, {FnCall: ['vdup{type[0]}', [c], [LANE]]}]
       - FnCall: ["vqrdmlah{neon_type[2].no}", [a, b, c]]
 
   - name: "vqrdmlah{type[4]}"
@@ -7825,7 +7538,7 @@ intrinsics:
       - ["i32", int32x4_t, '2', "s_s32", s_laneq_s32, s_s32]
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, "{type[2]}"]]
-      - FnCall: ["vqrdmlah{type[5]}", [a, b, {FnCall: [simd_extract!, [c, 'LANE as u32']]}]]
+      - FnCall: ["vqrdmlah{type[5]}", [a, b, {FnCall: ['vget{neon_type[1].lane_nox}', [c], [LANE]]}]]
 
   - name: "vqrdmlsh{neon_type.no}"
     doc: "Signed saturating rounding doubling multiply subtract returning high half"
@@ -7864,7 +7577,7 @@ intrinsics:
       - Let: [a, "{neon_type[2]}", {FnCall: ["vdup_n_{type[3]}", [a]]}]
       - Let: [b, "{neon_type[2]}", {FnCall: ["vdup_n_{type[3]}", [b]]}]
       - Let: [c, "{neon_type[2]}", {FnCall: ["vdup_n_{type[3]}", [c]]}]
-      - FnCall: [simd_extract!, [{FnCall: ["vqrdmlsh_{type[3]}", [a, b, c]]}, '0']]
+      - FnCall: ['vget{neon_type[2].lane_nox}', [{FnCall: ["vqrdmlsh_{type[3]}", [a, b, c]]}], ['0']]
 
   - name: "vqrdmlsh{type[0]}"
     doc: "Signed saturating rounding doubling multiply subtract returning high half"
@@ -7878,17 +7591,17 @@ intrinsics:
     static_defs: ['const LANE: i32']
     safety: safe
     types:
-      - [_lane_s16, int16x4_t, int16x4_t, int16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [_laneq_s16, int16x4_t, int16x4_t, int16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [q_lane_s16, int16x8_t, int16x8_t, int16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [q_laneq_s16, int16x8_t, int16x8_t, int16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [_lane_s32, int32x2_t, int32x2_t, int32x2_t, '1', '[LANE as u32, LANE as u32]']
-      - [_laneq_s32, int32x2_t, int32x2_t, int32x4_t, '2', '[LANE as u32, LANE as u32]']
-      - [q_lane_s32, int32x4_t, int32x4_t, int32x2_t, '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [q_laneq_s32, int32x4_t, int32x4_t, int32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [_lane_s16, int16x4_t, int16x4_t, int16x4_t, '2']
+      - [_laneq_s16, int16x4_t, int16x4_t, int16x8_t, '3']
+      - [q_lane_s16, int16x8_t, int16x8_t, int16x4_t, '2']
+      - [q_laneq_s16, int16x8_t, int16x8_t, int16x8_t, '3']
+      - [_lane_s32, int32x2_t, int32x2_t, int32x2_t, '1']
+      - [_laneq_s32, int32x2_t, int32x2_t, int32x4_t, '2']
+      - [q_lane_s32, int32x4_t, int32x4_t, int32x2_t, '1']
+      - [q_laneq_s32, int32x4_t, int32x4_t, int32x4_t, '2']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, '{type[4]}']]
-      - Let: [c, "{type[1]}", {FnCall: [simd_shuffle!, [c, c, "{type[5]}"]]}]
+      - Let: [c, {FnCall: ['vdup{type[0]}', [c], [LANE]]}]
       - FnCall: ["vqrdmlsh{neon_type[2].no}", [a, b, c]]
 
   - name: "vqrdmlsh{type[3]}"
@@ -7909,7 +7622,7 @@ intrinsics:
       - ["i32", int32x4_t, '2', s_laneq_s32, s_s32]
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, "{type[2]}"]]
-      - FnCall: ["vqrdmlsh{type[4]}", [a, b, {FnCall: [simd_extract!, [c, 'LANE as u32']]}]]
+      - FnCall: ["vqrdmlsh{type[4]}", [a, b, {FnCall: ['vget{neon_type[1].lane_nox}', [c], [LANE]]}]]
 
   - name: "vqrshl{type[0]}"
     doc: "Signed saturating rounding shift left"
@@ -7917,7 +7630,7 @@ intrinsics:
     return_type: "{type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqrshl]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ['s_s32', "i32"]
@@ -7935,7 +7648,7 @@ intrinsics:
     return_type: "{type[0]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqrshl]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["i8", 'b_s8', int8x8_t, s8]
@@ -7943,7 +7656,7 @@ intrinsics:
     compose:
       - Let: [a, "{neon_type[2]}", {FnCall: ["vdup_n_{type[3]}", [a]]}]
       - Let: [b, "{neon_type[2]}", {FnCall: ["vdup_n_{type[3]}", [b]]}]
-      - FnCall: [simd_extract!, [{FnCall: ["vqrshl_{type[3]}", [a, b]]}, '0']]
+      - FnCall: ['vget{neon_type[2].lane_nox}', [{FnCall: ["vqrshl_{type[3]}", [a, b]]}], ['0']]
 
   - name: "vqrshl{type[2]}"
     doc: "Unsigned signed saturating rounding shift left"
@@ -7951,7 +7664,7 @@ intrinsics:
     return_type: "{type[0]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uqrshl]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["u32", "i32", 's_u32']
@@ -7969,7 +7682,7 @@ intrinsics:
     return_type: "{type[0]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uqrshl]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["u8", "i8", "b_u8", uint8x8_t, int8x8_t, s8]
@@ -7977,7 +7690,7 @@ intrinsics:
     compose:
       - Let: [a, "{neon_type[3]}", {FnCall: ["vdup_n_{type[0]}", [a]]}]
       - Let: [b, "{neon_type[4]}", {FnCall: ["vdup_n_{type[5]}", [b]]}]
-      - FnCall: [simd_extract!, [{FnCall: ["vqrshl_{type[0]}", [a, b]]}, '0']]
+      - FnCall: ['vget{neon_type[3].lane_nox}', [{FnCall: ["vqrshl_{type[0]}", [a, b]]}], ['0']]
 
   - name: "vqrshrn{type[2]}"
     doc: "Signed saturating rounded shift right narrow"
@@ -7986,7 +7699,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqrshrn, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['1']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -7996,25 +7709,25 @@ intrinsics:
     compose:
       - FnCall: [static_assert!, ["{type[3]}"]]
       - Let: [a, "{neon_type[4]}", {FnCall: ["vdup{type[5]}", [a]]}]
-      - FnCall: [simd_extract!, [{FnCall: ["vqrshrn_n{neon_type[4].noq}::<N>", [a]]}, '0']]
+      - FnCall: ['vget_lane_{type[1]}', [{FnCall: ["vqrshrn_n{neon_type[4].noq}::<N>", [a]]}], ['0']]
 
   - name: "vqrshrn{type[3]}"
     doc: "Signed saturating rounded shift right narrow"
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
     return_type: "{neon_type[2]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqrshrn2, 'N = 2']]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sqrshrn2, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['2']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
-      - [int8x8_t, int16x8_t, int8x16_t, '_high_n_s16', '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]', 'N >= 1 && N <= 8']
-      - [int16x4_t, int32x4_t, int16x8_t, '_high_n_s32', '[0, 1, 2, 3, 4, 5, 6, 7]', 'N >= 1 && N <= 16']
-      - [int32x2_t, int64x2_t, int32x4_t, '_high_n_s64', '[0, 1, 2, 3]', 'N >= 1 && N <= 32']
+      - [int8x8_t, int16x8_t, int8x16_t, '_high_n_s16', 'N >= 1 && N <= 8']
+      - [int16x4_t, int32x4_t, int16x8_t, '_high_n_s32', 'N >= 1 && N <= 16']
+      - [int32x2_t, int64x2_t, int32x4_t, '_high_n_s64', 'N >= 1 && N <= 32']
     compose:
-      - FnCall: [static_assert!, ["{type[5]}"]]
-      - FnCall: [simd_shuffle!, [a, {FnCall: ["vqrshrn_n{neon_type[1].noq}::<N>", [b]]}, "{type[4]}"]]
+      - FnCall: [static_assert!, ["{type[4]}"]]
+      - FnCall: ['vcombine_{neon_type[0]}', [a, {FnCall: ["vqrshrn_n{neon_type[1].noq}::<N>", [b]]}]]
 
   - name: "vqrshrn{type[0]}"
     doc: "Unsigned saturating rounded shift right narrow"
@@ -8023,7 +7736,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uqrshrn, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['1']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -8033,31 +7746,30 @@ intrinsics:
     compose:
       - FnCall: [static_assert!, ['{type[3]}']]
       - Let: [a, "{neon_type[4]}", {FnCall: ["vdup{type[5]}", [a]]}]
-      - FnCall: [simd_extract!, [{FnCall: ["vqrshrn{type[6]}::<N>", [a]]}, '0']]
+      - FnCall: ['vget_lane_{type[2]}', [{FnCall: ["vqrshrn{type[6]}::<N>", [a]]}], ['0']]
 
   - name: "vqrshrn_high_n{neon_type[1].noq}"
     doc: "Unsigned saturating rounded shift right narrow"
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
     return_type: "{neon_type[2]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uqrshrn2, 'N = 2']]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [uqrshrn2, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['2']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
-      - [uint8x8_t, uint16x8_t, uint8x16_t, 'N >= 1 && N <= 8', '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
-      - [uint16x4_t, uint32x4_t, uint16x8_t, 'N >= 1 && N <= 16', '[0, 1, 2, 3, 4, 5, 6, 7]']
-      - [uint32x2_t, uint64x2_t, uint32x4_t, 'N >= 1 && N <= 32', '[0, 1, 2, 3]']
+      - [uint8x8_t, uint16x8_t, uint8x16_t, 'N >= 1 && N <= 8']
+      - [uint16x4_t, uint32x4_t, uint16x8_t, 'N >= 1 && N <= 16']
+      - [uint32x2_t, uint64x2_t, uint32x4_t, 'N >= 1 && N <= 32']
     compose:
       - FnCall: [static_assert!, ['{type[3]}']]
       - FnCall:
-          - simd_shuffle!
+          - 'vcombine_{neon_type[0]}'
           - - a
             - FnCall:
                 - "vqrshrn_n{neon_type[1].noq}::<N>"
                 - - b
-            - "{type[4]}"
 
   - name: "vqrshrun{type[0]}"
     doc: "Signed saturating rounded shift right unsigned narrow"
@@ -8066,7 +7778,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqrshrun, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['1']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -8080,35 +7792,34 @@ intrinsics:
           - "{neon_type[4]}"
           - FnCall: ["vdupq_n_{type[5]}", [a]]
       - FnCall:
-          - simd_extract!
+          - 'vget_lane_{type[2]}'
           - - FnCall:
                 - "vqrshrun_n_{type[5]}::<N>"
                 - - a
-            - '0'
+          - - '0'
 
   - name: "vqrshrun_high_n{neon_type[1].noq}"
     doc: "Signed saturating rounded shift right unsigned narrow"
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
     return_type: "{neon_type[2]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqrshrun2, 'N = 2']]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sqrshrun2, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['2']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
-      - [uint8x8_t, int16x8_t, uint8x16_t, 'N >= 1 && N <= 8', s16, '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
-      - [uint16x4_t, int32x4_t, uint16x8_t, 'N >= 1 && N <= 16', s32, '[0, 1, 2, 3, 4, 5, 6, 7]']
-      - [uint32x2_t, int64x2_t, uint32x4_t, 'N >= 1 && N <= 32', s64, '[0, 1, 2, 3]']
+      - [uint8x8_t, int16x8_t, uint8x16_t, 'N >= 1 && N <= 8']
+      - [uint16x4_t, int32x4_t, uint16x8_t, 'N >= 1 && N <= 16']
+      - [uint32x2_t, int64x2_t, uint32x4_t, 'N >= 1 && N <= 32']
     compose:
       - FnCall: [static_assert!, ["{type[3]}"]]
       - FnCall:
-          - simd_shuffle!
+          - 'vcombine_{neon_type[0]}'
           - - a
             - FnCall:
-                - "vqrshrun_n_{type[4]}::<N>"
+                - "vqrshrun_n_{neon_type[1]}::<N>"
                 - - b
-            - "{type[5]}"
 
   - name: "vqshld_{type}"
     doc: "Signed saturating shift left"
@@ -8116,7 +7827,7 @@ intrinsics:
     return_type: "{type}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqshl]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - i64
@@ -8133,7 +7844,7 @@ intrinsics:
     return_type: "{type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqshl]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [b_s8, "i8", int8x8_t]
@@ -8147,7 +7858,7 @@ intrinsics:
               - "vqshl{neon_type[2].noq}"
               - - FnCall: ["vdup_n{neon_type[2].no}", [a]]
                 - FnCall: ["vdup_n{neon_type[2].no}", [b]]
-      - FnCall: [simd_extract!, [c, '0']]
+      - FnCall: ['vget{neon_type[2].lane_nox}', [c], ['0']]
 
   - name: "vqshl{type[0]}"
     doc: "Signed saturating shift left"
@@ -8156,7 +7867,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqshl, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['1']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -8167,11 +7878,11 @@ intrinsics:
     compose:
       - FnCall: [static_assert_uimm_bits!, [N, "{type[2]}"]]
       - FnCall:
-          - simd_extract!
+          - 'vget_lane_{type[1]}'
           - - FnCall:
                 - "vqshl_n_{type[3]}::<N>"
                 - - FnCall: ["vdup_n_{type[3]}", [a]]
-            - '0'
+          - - '0'
 
   - name: "vqshld_{type[0]}"
     doc: "Unsigned saturating shift left"
@@ -8179,7 +7890,7 @@ intrinsics:
     return_type: "{type[0]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uqshl]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["u64", "i64"]
@@ -8196,7 +7907,7 @@ intrinsics:
     return_type: "{type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uqshl]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [b_u8, "u8", "i8", uint8x8_t, int8x8_t]
@@ -8210,7 +7921,7 @@ intrinsics:
               - "vqshl{neon_type[3].noq}"
               - - FnCall: ["vdup{neon_type[3].N}", [a]]
                 - FnCall: ["vdup{neon_type[4].N}", [b]]
-      - FnCall: [simd_extract!, [c, '0']]
+      - FnCall: ['vget{neon_type[3].lane_nox}', [c], ['0']]
 
   - name: "vqshl{type[0]}"
     doc: "Unsigned saturating shift left"
@@ -8219,7 +7930,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uqshl, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['1']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -8230,9 +7941,9 @@ intrinsics:
     compose:
       - FnCall: [static_assert_uimm_bits!, [N, "{type[2]}"]]
       - FnCall:
-          - simd_extract!
+          - 'vget_lane_{type[1]}'
           - - FnCall: ["vqshl_n_{type[1]}::<N>", [{FnCall: ["vdup_n_{type[1]}", [a]]}]]
-            - '0'
+          - - '0'
 
   - name: "vqshrnd_n_s64"
     doc: "Signed saturating shift right narrow"
@@ -8241,7 +7952,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqshrn, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['1']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -8265,7 +7976,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqshrn, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['1']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -8274,33 +7985,32 @@ intrinsics:
     compose:
       - FnCall: [static_assert!, ["{type[3]}"]]
       - FnCall:
-          - simd_extract!
+          - 'vget_lane_{type[2]}'
           - - FnCall:
                 - "vqshrn_n_{type[4]}::<N>"
                 - - FnCall: ["vdupq_n_{type[4]}", [a]]
-            - '0'
+          - - '0'
 
   - name: "vqshrn{type[0]}"
     doc: "Signed saturating shift right narrow"
     arguments: ["a: {neon_type[1]}", "b: {neon_type[2]}"]
     return_type: "{neon_type[3]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqshrn2, 'N = 2']]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sqshrn2, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['2']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
-      - [_high_n_s16, int8x8_t, int16x8_t, int8x16_t, 'N >= 1 && N <= 8', '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]', s16]
-      - [_high_n_s32, int16x4_t, int32x4_t, int16x8_t, 'N >= 1 && N <= 16', '[0, 1, 2, 3, 4, 5, 6, 7]', s32]
-      - [_high_n_s64, int32x2_t, int64x2_t, int32x4_t, 'N >= 1 && N <= 32', '[0, 1, 2, 3]', s64]
+      - [_high_n_s16, int8x8_t, int16x8_t, int8x16_t, 'N >= 1 && N <= 8']
+      - [_high_n_s32, int16x4_t, int32x4_t, int16x8_t, 'N >= 1 && N <= 16']
+      - [_high_n_s64, int32x2_t, int64x2_t, int32x4_t, 'N >= 1 && N <= 32']
     compose:
       - FnCall: [static_assert!, ["{type[4]}"]]
       - FnCall:
-          - simd_shuffle!
+          - 'vcombine_{neon_type[1]}'
           - - a
-            - FnCall: ["vqshrn_n_{type[6]}::<N>", [b]]
-            - "{type[5]}"
+            - FnCall: ["vqshrn_n_{neon_type[2]}::<N>", [b]]
 
   - name: "vqshrnd_n_u64"
     doc: "Unsigned saturating shift right narrow"
@@ -8309,7 +8019,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uqshrn, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['1']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -8333,7 +8043,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uqshrn, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['1']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -8342,33 +8052,32 @@ intrinsics:
     compose:
       - FnCall: [static_assert!, ["{type[3]}"]]
       - FnCall:
-          - "simd_extract!"
+          - 'vget_lane_{type[2]}'
           - - FnCall:
                 - "vqshrn_n_{type[1]}::<N>"
                 - - FnCall: ["vdupq_n_{type[1]}", [a]]
-            - '0'
+          - - '0'
 
   - name: "vqshrn{type[0]}"
     doc: "Unsigned saturating shift right narrow"
     arguments: ["a: {neon_type[1]}", "b: {neon_type[2]}"]
     return_type: "{neon_type[3]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uqshrn2, 'N = 2']]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [uqshrn2, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['2']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
-      - [_high_n_u16, uint8x8_t, uint16x8_t, uint8x16_t, 'N >= 1 && N <= 8', '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
-      - [_high_n_u32, uint16x4_t, uint32x4_t, uint16x8_t, 'N >= 1 && N <= 16', '[0, 1, 2, 3, 4, 5, 6, 7]']
-      - [_high_n_u64, uint32x2_t, uint64x2_t, uint32x4_t, 'N >= 1 && N <= 32', '[0, 1, 2, 3]']
+      - [_high_n_u16, uint8x8_t, uint16x8_t, uint8x16_t, 'N >= 1 && N <= 8']
+      - [_high_n_u32, uint16x4_t, uint32x4_t, uint16x8_t, 'N >= 1 && N <= 16']
+      - [_high_n_u64, uint32x2_t, uint64x2_t, uint32x4_t, 'N >= 1 && N <= 32']
     compose:
       - FnCall: [static_assert!, ["{type[4]}"]]
       - FnCall:
-          - simd_shuffle!
+          - 'vcombine_{neon_type[1]}'
           - - a
             - FnCall: ["vqshrn_n_{neon_type[2]}::<N>", [b]]
-            - "{type[5]}"
 
   - name: "vqshrun{type[0]}"
     doc: "Signed saturating shift right unsigned narrow"
@@ -8377,7 +8086,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqshrun, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['1']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -8387,33 +8096,32 @@ intrinsics:
     compose:
       - FnCall: [static_assert!, ["{type[3]}"]]
       - FnCall:
-          - simd_extract!
+          - 'vget_lane_{type[2]}'
           - - FnCall:
                 - "vqshrun_n_{type[4]}::<N>"
                 - - FnCall: ["vdupq_n_{type[4]}", [a]]
-            - '0'
+          - - '0'
 
   - name: "vqshrun_high_n_{neon_type[1]}"
     doc: "Signed saturating shift right unsigned narrow"
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
     return_type: "{neon_type[2]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqshrun2, 'N = 2']]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sqshrun2, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['2']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
-      - [uint8x8_t, int16x8_t, uint8x16_t, 'N >= 1 && N <= 8', '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
-      - [uint16x4_t, int32x4_t, uint16x8_t, 'N >= 1 && N <= 16', '[0, 1, 2, 3, 4, 5, 6, 7]']
-      - [uint32x2_t, int64x2_t, uint32x4_t, 'N >= 1 && N <= 32', '[0, 1, 2, 3]']
+      - [uint8x8_t, int16x8_t, uint8x16_t, 'N >= 1 && N <= 8']
+      - [uint16x4_t, int32x4_t, uint16x8_t, 'N >= 1 && N <= 16']
+      - [uint32x2_t, int64x2_t, uint32x4_t, 'N >= 1 && N <= 32']
     compose:
       - FnCall: [static_assert!, ["{type[3]}"]]
       - FnCall:
-          - simd_shuffle!
+          - 'vcombine_{neon_type[0]}'
           - - a
             - FnCall: ["vqshrun_n_{neon_type[1]}::<N>", [b]]
-            - "{type[4]}"
 
   - name: "vsqadd{type[0]}"
     doc: "Unsigned saturating accumulate of signed value"
@@ -8421,19 +8129,19 @@ intrinsics:
     return_type: "{type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [usqadd]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [b_u8, "u8", "i8", s8]
       - [h_u16, "u16", "i16", s16]
     compose:
       - FnCall:
-          - simd_extract!
+          - 'vget_lane_{type[1]}'
           - - FnCall:
                 - "vsqadd_{type[1]}"
                 - - FnCall: ["vdup_n_{type[1]}", [a]]
                   - FnCall: ["vdup_n_{type[2]}", [b]]
-            - '0'
+          - - '0'
 
   - name: "vsqadd{type[0]}"
     doc: "Unsigned saturating accumulate of signed value"
@@ -8441,7 +8149,7 @@ intrinsics:
     return_type: "{type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [usqadd]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [s_u32, "u32", "i32"]
@@ -8459,7 +8167,7 @@ intrinsics:
     return_type: "{neon_type}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fsqrt]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - float32x2_t
@@ -8476,7 +8184,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fsqrt]]}]]
       - *neon-fp16
-      - *neon-unstable-f16
+      - *neon-stable-fp16
       - *target-not-arm64ec
     safety: safe
     types:
@@ -8506,7 +8214,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [frsqrts]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [_f64, float64x1_t, v1f64]
@@ -8524,7 +8232,7 @@ intrinsics:
     return_type: "{type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [frsqrts]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [s_f32, "f32"]
@@ -8563,7 +8271,7 @@ intrinsics:
     return_type: "{type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [frecpe]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [_f64, float64x1_t, v1f64]
@@ -8581,7 +8289,7 @@ intrinsics:
     return_type: "{type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [frecpe]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [s_f32, "f32"]
@@ -8620,7 +8328,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [frecps]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [_f64, float64x1_t, v1f64]
@@ -8638,7 +8346,7 @@ intrinsics:
     return_type: "{type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [frecps]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [s_f32, "f32"]
@@ -8677,7 +8385,7 @@ intrinsics:
     return_type: "{type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [frecpx]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [s_f32, "f32"]
@@ -8709,7 +8417,6 @@ intrinsics:
             - link: "llvm.aarch64.neon.frecpx.{type[1]}"
               arch: aarch64,arm64ec
 
-
   - name: "vreinterpret{neon_type[1].no}{neon_type[0].noq}"
     doc: Vector reinterpret cast operation
     arguments: ["a: {type[0]}"]
@@ -8726,54 +8433,66 @@ intrinsics:
       - [poly64x2_t, uint64x2_t]
       - [int64x2_t, poly64x2_t]
       - [uint64x2_t, poly64x2_t]
+      - [float64x1_t, int64x1_t]
+      - [float64x2_t, int64x2_t]
+      - [float64x1_t, uint64x1_t]
+      - [float64x2_t, uint64x2_t]
+      - [float64x1_t, poly64x1_t]
+      - [float64x2_t, poly64x2_t]
+      - [int64x1_t, float64x1_t]
+      - [int64x2_t, float64x2_t]
+      - [uint64x1_t, float64x1_t]
+      - [uint64x2_t, float64x2_t]
+      - [poly64x1_t, float64x1_t]
+      - [poly64x2_t, float64x2_t]
+    compose:
+      - FnCall: [transmute, [a]]
+
+  - name: "vreinterpret{neon_type[1].no}{neon_type[0].noq}"
+    doc: Vector reinterpret cast operation
+    arguments: ["a: {type[0]}"]
+    return_type: "{type[1]}"
+    attr: [*neon-stable]
+    assert_instr: [nop]
+    safety: safe
+    big_endian_inverse: true
+    types:
       - [float64x1_t, int8x8_t]
       - [float64x1_t, int16x4_t]
       - [float64x1_t, int32x2_t]
-      - [float64x1_t, int64x1_t]
       - [float64x2_t, int8x16_t]
       - [float64x2_t, int16x8_t]
       - [float64x2_t, int32x4_t]
-      - [float64x2_t, int64x2_t]
       - [float64x1_t, uint8x8_t]
       - [float64x1_t, uint16x4_t]
       - [float64x1_t, uint32x2_t]
-      - [float64x1_t, uint64x1_t]
       - [float64x2_t, uint8x16_t]
       - [float64x2_t, uint16x8_t]
       - [float64x2_t, uint32x4_t]
-      - [float64x2_t, uint64x2_t]
       - [float64x1_t, poly8x8_t]
       - [float64x1_t, poly16x4_t]
       - [float32x2_t, poly64x1_t]
-      - [float64x1_t, poly64x1_t]
       - [float64x2_t, poly8x16_t]
       - [float64x2_t, poly16x8_t]
       - [float32x4_t, poly64x2_t]
-      - [float64x2_t, poly64x2_t]
       - [float64x2_t, p128]
       - [int8x8_t, float64x1_t]
       - [int16x4_t, float64x1_t]
       - [int32x2_t, float64x1_t]
-      - [int64x1_t, float64x1_t]
       - [int8x16_t, float64x2_t]
       - [int16x8_t, float64x2_t]
       - [int32x4_t, float64x2_t]
-      - [int64x2_t, float64x2_t]
       - [poly8x8_t, float64x1_t]
       - [uint16x4_t, float64x1_t]
       - [uint32x2_t, float64x1_t]
-      - [uint64x1_t, float64x1_t]
       - [poly8x16_t, float64x2_t]
       - [uint16x8_t, float64x2_t]
       - [uint32x4_t, float64x2_t]
-      - [uint64x2_t, float64x2_t]
       - [uint8x8_t, float64x1_t]
       - [poly16x4_t, float64x1_t]
-      - [poly64x1_t, float64x1_t]
       - [poly64x1_t, float32x2_t]
       - [uint8x16_t, float64x2_t]
       - [poly16x8_t, float64x2_t]
-      - [poly64x2_t, float64x2_t]
       - [poly64x2_t, float32x4_t]
       - [p128, float64x2_t]
       - [float32x2_t, float64x1_t]
@@ -8789,11 +8508,11 @@ intrinsics:
     arguments: ["a: {type[0]}"]
     return_type: "{type[1]}"
     attr:
-      - *neon-fp16
-      - *neon-unstable-f16
+      - *neon-stable-fp16
       - *target-not-arm64ec
     assert_instr: [nop]
     safety: safe
+    big_endian_inverse: true
     types:
       - [float64x1_t, float16x4_t]
       - [float16x4_t, float64x1_t]
@@ -8810,7 +8529,7 @@ intrinsics:
     return_type: "{type}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [srshl]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - "i64"
@@ -8827,7 +8546,7 @@ intrinsics:
     return_type: "{type[0]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [urshl]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["u64", "i64"]
@@ -8845,7 +8564,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [srshr, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['1']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -8861,7 +8580,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [urshr, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['1']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -8875,197 +8594,260 @@ intrinsics:
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
     return_type: "{neon_type[2]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [rshrn2, 'N = 2']]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [rshrn2, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['2']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
-      - [int8x8_t, int16x8_t, int8x16_t, 'N >= 1 && N <= 8', '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
-      - [int16x4_t, int32x4_t, int16x8_t, 'N >= 1 && N <= 16', '[0, 1, 2, 3, 4, 5, 6, 7]']
-      - [int32x2_t, int64x2_t, int32x4_t, 'N >= 1 && N <= 32', '[0, 1, 2, 3]']
-      - [uint8x8_t, uint16x8_t, uint8x16_t, 'N >= 1 && N <= 8', '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
-      - [uint16x4_t, uint32x4_t, uint16x8_t, 'N >= 1 && N <= 16', '[0, 1, 2, 3, 4, 5, 6, 7]']
-      - [uint32x2_t, uint64x2_t, uint32x4_t, 'N >= 1 && N <= 32', '[0, 1, 2, 3]']
+      - [int8x8_t, int16x8_t, int8x16_t, 'N >= 1 && N <= 8']
+      - [int16x4_t, int32x4_t, int16x8_t, 'N >= 1 && N <= 16']
+      - [int32x2_t, int64x2_t, int32x4_t, 'N >= 1 && N <= 32']
+      - [uint8x8_t, uint16x8_t, uint8x16_t, 'N >= 1 && N <= 8']
+      - [uint16x4_t, uint32x4_t, uint16x8_t, 'N >= 1 && N <= 16']
+      - [uint32x2_t, uint64x2_t, uint32x4_t, 'N >= 1 && N <= 32']
     compose:
       - FnCall: [static_assert!, ["{type[3]}"]]
       - FnCall:
-          - simd_shuffle!
+          - 'vcombine_{neon_type[0]}'
           - - a
             - FnCall: ["vrshrn_n_{neon_type[1]}::<N>", [b]]
-            - "{type[4]}"
 
   - name: "vrsubhn_high_{neon_type[1]}"
     doc: "Rounding subtract returning high narrow"
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[1]}"]
     return_type: "{neon_type[3]}"
     attr:
-      - *little-endian
+      - *cfg-little-endian
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [rsubhn2]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
-      - [int8x8_t, int16x8_t, int16x8_t, int8x16_t, '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
-      - [int16x4_t, int32x4_t, int32x4_t, int16x8_t, '[0, 1, 2, 3, 4, 5, 6, 7]']
-      - [int32x2_t, int64x2_t, int64x2_t, int32x4_t, '[0, 1, 2, 3]']
-      - [uint8x8_t, uint16x8_t, uint16x8_t, uint8x16_t, '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
-      - [uint16x4_t, uint32x4_t, uint32x4_t, uint16x8_t, '[0, 1, 2, 3, 4, 5, 6, 7]']
-      - [uint32x2_t, uint64x2_t, uint64x2_t, uint32x4_t, '[0, 1, 2, 3]']
+      - [int8x8_t, int16x8_t, int16x8_t, int8x16_t]
+      - [int16x4_t, int32x4_t, int32x4_t, int16x8_t]
+      - [int32x2_t, int64x2_t, int64x2_t, int32x4_t]
+      - [uint8x8_t, uint16x8_t, uint16x8_t, uint8x16_t]
+      - [uint16x4_t, uint32x4_t, uint32x4_t, uint16x8_t]
+      - [uint32x2_t, uint64x2_t, uint64x2_t, uint32x4_t]
     compose:
-      - Let:
-          - x
-          - "{neon_type[0]}"
-          - FnCall: ["vrsubhn_{neon_type[1]}", [b, c]]
-      - FnCall: [simd_shuffle!, [a, x, "{type[4]}"]]
+      - FnCall: ['vcombine_{neon_type[0]}', [a, {FnCall: ["vrsubhn_{neon_type[1]}", [b, c]]}]]
 
   - name: "vrsubhn_high_{neon_type[1]}"
     doc: "Rounding subtract returning high narrow"
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[1]}"]
     return_type: "{neon_type[3]}"
     attr:
-      - *big-endian
+      - *cfg-big-endian
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [rsubhn]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
-      - [int8x8_t, int16x8_t, int16x8_t, int8x16_t, '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
-      - [int16x4_t, int32x4_t, int32x4_t, int16x8_t, '[0, 1, 2, 3, 4, 5, 6, 7]']
-      - [int32x2_t, int64x2_t, int64x2_t, int32x4_t, '[0, 1, 2, 3]']
-      - [uint8x8_t, uint16x8_t, uint16x8_t, uint8x16_t, '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
-      - [uint16x4_t, uint32x4_t, uint32x4_t, uint16x8_t, '[0, 1, 2, 3, 4, 5, 6, 7]']
-      - [uint32x2_t, uint64x2_t, uint64x2_t, uint32x4_t, '[0, 1, 2, 3]']
+      - [int8x8_t, int16x8_t, int16x8_t, int8x16_t]
+      - [int16x4_t, int32x4_t, int32x4_t, int16x8_t]
+      - [int32x2_t, int64x2_t, int64x2_t, int32x4_t]
+      - [uint8x8_t, uint16x8_t, uint16x8_t, uint8x16_t]
+      - [uint16x4_t, uint32x4_t, uint32x4_t, uint16x8_t]
+      - [uint32x2_t, uint64x2_t, uint64x2_t, uint32x4_t]
     compose:
-      - Let:
-          - x
-          - "{neon_type[0]}"
-          - FnCall: ["vrsubhn_{neon_type[1]}", [b, c]]
-      - FnCall: [simd_shuffle!, [a, x, "{type[4]}"]]
+      - FnCall: ['vcombine_{neon_type[0]}', [a, {FnCall: ["vrsubhn_{neon_type[1]}", [b, c]]}]]
 
   - name: "vcopy{neon_type[0].lane_nox}"
     doc: "Insert vector element from another vector element"
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
     return_type: "{neon_type[2]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [mov, 'LANE1 = 0', 'LANE2 = 1']]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [mov, 'LANE1 = 0', 'LANE2 = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ['1', '3']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const LANE1: i32, const LANE2: i32']
     safety: safe
     types:
-      - [int8x8_t, int8x8_t, int8x8_t, '3', '3', ' unsafe { match LANE1 & 0b111 { 0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), 2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]), 3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]), _ => unreachable_unchecked(), } }']
-      - [int16x4_t, int16x4_t, int16x4_t, '2', '2', ' unsafe { match LANE1 & 0b11 { 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]), 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]), 2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]), 3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]), _ => unreachable_unchecked(), } }']
-      - [int32x2_t, int32x2_t, int32x2_t, '1', '1', ' unsafe { match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]), _ => unreachable_unchecked(), } }']
-      - [uint8x8_t, uint8x8_t, uint8x8_t, '3', '3', ' unsafe { match LANE1 & 0b111 { 0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), 2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]), 3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]), _ => unreachable_unchecked(), } }']
-      - [uint16x4_t, uint16x4_t, uint16x4_t, '2', '2', ' unsafe { match LANE1 & 0b11 { 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]), 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]), 2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]), 3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]), _ => unreachable_unchecked(), } }']
-      - [uint32x2_t, uint32x2_t, uint32x2_t, '1', '1', ' unsafe { match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]), _ => unreachable_unchecked(), } }']
-      - [poly8x8_t, poly8x8_t, poly8x8_t, '3', '3', ' unsafe { match LANE1 & 0b111 { 0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), 2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]), 3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]), _ => unreachable_unchecked(), } }']
-      - [poly16x4_t, poly16x4_t, poly16x4_t, '2', '2', ' unsafe { match LANE1 & 0b11 { 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]), 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]), 2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]), 3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]), _ => unreachable_unchecked(), } }']
-      - [float32x2_t, float32x2_t, float32x2_t, '1', '1', ' unsafe { match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]), _ => unreachable_unchecked(), } }']
+      - [int8x8_t, int8x8_t, int8x8_t, '3', '3']
+      - [int16x4_t, int16x4_t, int16x4_t, '2', '2']
+      - [int32x2_t, int32x2_t, int32x2_t, '1', '1']
+      - [uint8x8_t, uint8x8_t, uint8x8_t, '3', '3']
+      - [uint16x4_t, uint16x4_t, uint16x4_t, '2', '2']
+      - [uint32x2_t, uint32x2_t, uint32x2_t, '1', '1']
+      - [poly8x8_t, poly8x8_t, poly8x8_t, '3', '3']
+      - [poly16x4_t, poly16x4_t, poly16x4_t, '2', '2']
+      - [float32x2_t, float32x2_t, float32x2_t, '1', '1']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE1, '{type[3]}']]
       - FnCall: [static_assert_uimm_bits!, [LANE2, '{type[4]}']]
-      - Identifier: ["{type[5]}", Symbol]
+      - FnCall: ['vset{neon_type[0].lane_nox}', [{FnCall: ['vget{neon_type[1].lane_nox}', [b], [LANE2]]}, a], [LANE1]]
 
   - name: "vcopy{neon_type[0].lane_nox}"
     doc: "Insert vector element from another vector element"
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
     return_type: "{neon_type[2]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [mov, 'LANE1 = 0', 'LANE2 = 1']]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [mov, 'LANE1 = 0', 'LANE2 = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['1', '3']]
+      - *neon-stable
+    static_defs: ['const LANE1: i32, const LANE2: i32']
+    safety: safe
+    types:
+      - [int8x16_t, int8x8_t, int8x16_t, '4', '3']
+      - [int16x8_t, int16x4_t, int16x8_t, '3', '2']
+      - [int32x4_t, int32x2_t, int32x4_t, '2', '1']
+      - [uint8x16_t, uint8x8_t, uint8x16_t, '4', '3']
+      - [uint16x8_t, uint16x4_t, uint16x8_t, '3', '2']
+      - [uint32x4_t, uint32x2_t, uint32x4_t, '2', '1']
+      - [poly8x16_t, poly8x8_t, poly8x16_t, '4', '3']
+      - [poly16x8_t, poly16x4_t, poly16x8_t, '3', '2']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [LANE1, '{type[3]}']]
+      - FnCall: [static_assert_uimm_bits!, [LANE2, '{type[4]}']]
+      - Let: [b, '{neon_type[2]}', {FnCall: ['vcombine{neon_type[1].no}', [b, b]]}]
+      - FnCall: ['vset{neon_type[0].lane_nox}', [{FnCall: ['vget{neon_type[2].lane_nox}', [b], [LANE2]]}, a], [LANE1]]
+
+  - name: "vcopy_lane_{neon_type[0]}"
+    doc: "Insert vector element from another vector element"
+    arguments: ["_a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'LANE1 = {type[1]}', 'LANE2 = {type[1]}']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['1', '3']]
+      - *neon-stable
+    static_defs: ['const LANE1: i32, const LANE2: i32']
+    safety: safe
+    types:
+      - [float64x1_t, '0', 'b']
+      - [poly64x1_t, '0', 'b']
+      - [uint64x1_t, '0', 'b']
+      - [int64x1_t, '0', 'b']
+    compose:
+      - FnCall: [static_assert!, ['LANE1 == {type[1]}']]
+      - FnCall: [static_assert!, ['LANE2 == {type[1]}']]
+      - Identifier: ["{type[2]}", Symbol]
+
+  - name: "vcopy_laneq_{neon_type[0]}"
+    doc: "Insert vector element from another vector element"
+    arguments: ["_a: {neon_type[1]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'LANE1 = {type[2]}', 'LANE2 = {type[3]}']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['1', '3']]
+      - *neon-stable
+    static_defs: ['const LANE1: i32, const LANE2: i32']
+    safety: safe
+    types:
+      - [float64x2_t, float64x1_t, '0', '1']
+      - [poly64x2_t, poly64x1_t, '0', '1']
+      - [uint64x2_t, uint64x1_t, '0', '1']
+      - [int64x2_t, int64x1_t, '0', '1']
+    compose:
+      - FnCall: [static_assert!, ['LANE1 == {type[2]}']]
+      - FnCall: [static_assert_uimm_bits!, [LANE2, '{type[3]}']]
+      - FnCall: [transmute, [{FnCall: ['vget{neon_type[0].lane_nox}', [b], [LANE2]]}]]
+
+  - name: "vcopy{neon_type[0].laneq_nox}"
+    doc: "Insert vector element from another vector element"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[2]}"
+    attr:
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [mov, 'LANE1 = 0', 'LANE2 = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ['1', '3']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const LANE1: i32, const LANE2: i32']
     safety: safe
     types:
-      - [int8x16_t, int8x8_t, int8x16_t, '4', '3', ' let b: int8x16_t = unsafe { simd_shuffle!(b, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) };', 'unsafe { match LANE1 & 0b1111 { 0 => simd_shuffle!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 1 => simd_shuffle!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 2 => simd_shuffle!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 3 => simd_shuffle!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32, 8, 9, 10, 11, 12, 13, 14, 15]), 8 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 16 + LANE2 as u32, 9, 10, 11, 12, 13, 14, 15]), 9 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 16 + LANE2 as u32, 10, 11, 12, 13, 14, 15]), 10 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16 + LANE2 as u32, 11, 12, 13, 14, 15]), 11 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 16 + LANE2 as u32, 12, 13, 14, 15]), 12 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16 + LANE2 as u32, 13, 14, 15]), 13 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16 + LANE2 as u32, 14, 15]), 14 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16 + LANE2 as u32, 15]), 15 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16 + LANE2 as u32]), _ => unreachable_unchecked(), } }']
-      - [int16x8_t, int16x4_t, int16x8_t, '3', '2', ' let b: int16x8_t = unsafe { simd_shuffle!(b, b, [0, 1, 2, 3, 4, 5, 6, 7]) };', 'unsafe { match LANE1 & 0b111 { 0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), 2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]), 3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]), _ => unreachable_unchecked(), } }']
-      - [int32x4_t, int32x2_t, int32x4_t, '2', '1', ' let b: int32x4_t = unsafe { simd_shuffle!(b, b, [0, 1, 2, 3]) };', 'unsafe { match LANE1 & 0b11 { 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]), 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]), 2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]), 3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]), _ => unreachable_unchecked(), } }']
-      - [uint8x16_t, uint8x8_t, uint8x16_t, '4', '3', ' let b: uint8x16_t = unsafe { simd_shuffle!(b, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) };', 'unsafe { match LANE1 & 0b1111 { 0 => simd_shuffle!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 1 => simd_shuffle!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 2 => simd_shuffle!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 3 => simd_shuffle!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32, 8, 9, 10, 11, 12, 13, 14, 15]), 8 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 16 + LANE2 as u32, 9, 10, 11, 12, 13, 14, 15]), 9 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 16 + LANE2 as u32, 10, 11, 12, 13, 14, 15]), 10 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16 + LANE2 as u32, 11, 12, 13, 14, 15]), 11 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 16 + LANE2 as u32, 12, 13, 14, 15]), 12 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16 + LANE2 as u32, 13, 14, 15]), 13 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16 + LANE2 as u32, 14, 15]), 14 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16 + LANE2 as u32, 15]), 15 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16 + LANE2 as u32]), _ => unreachable_unchecked(), } }']
-      - [uint16x8_t, uint16x4_t, uint16x8_t, '3', '2', ' let b: uint16x8_t = unsafe { simd_shuffle!(b, b, [0, 1, 2, 3, 4, 5, 6, 7]) };', 'unsafe { match LANE1 & 0b111 { 0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), 2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]), 3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]), _ => unreachable_unchecked(), } }']
-      - [uint32x4_t, uint32x2_t, uint32x4_t, '2', '1', ' let b: uint32x4_t = unsafe { simd_shuffle!(b, b, [0, 1, 2, 3]) };', 'unsafe { match LANE1 & 0b11 { 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]), 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]), 2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]), 3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]), _ => unreachable_unchecked(), } }']
-      - [poly8x16_t, poly8x8_t, poly8x16_t, '4', '3', ' let b: poly8x16_t = unsafe { simd_shuffle!(b, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) };', 'unsafe { match LANE1 & 0b1111 { 0 => simd_shuffle!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 1 => simd_shuffle!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 2 => simd_shuffle!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 3 => simd_shuffle!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32, 8, 9, 10, 11, 12, 13, 14, 15]), 8 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 16 + LANE2 as u32, 9, 10, 11, 12, 13, 14, 15]), 9 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 16 + LANE2 as u32, 10, 11, 12, 13, 14, 15]), 10 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16 + LANE2 as u32, 11, 12, 13, 14, 15]), 11 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 16 + LANE2 as u32, 12, 13, 14, 15]), 12 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16 + LANE2 as u32, 13, 14, 15]), 13 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16 + LANE2 as u32, 14, 15]), 14 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16 + LANE2 as u32, 15]), 15 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16 + LANE2 as u32]), _ => unreachable_unchecked(), } }']
-      - [poly16x8_t, poly16x4_t, poly16x8_t, '3', '2', ' let b: poly16x8_t = unsafe { simd_shuffle!(b, b, [0, 1, 2, 3, 4, 5, 6, 7]) };', 'unsafe { match LANE1 & 0b111 { 0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), 2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]), 3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]), _ => unreachable_unchecked(), } }']
+      - [int8x16_t, int8x16_t, int8x16_t, '4', '4']
+      - [int16x8_t, int16x8_t, int16x8_t, '3', '3']
+      - [int32x4_t, int32x4_t, int32x4_t, '2', '2']
+      - [int64x2_t, int64x2_t, int64x2_t, '1', '1']
+      - [uint8x16_t, uint8x16_t, uint8x16_t, '4', '4']
+      - [uint16x8_t, uint16x8_t, uint16x8_t, '3', '3']
+      - [uint32x4_t, uint32x4_t, uint32x4_t, '2', '2']
+      - [uint64x2_t, uint64x2_t, uint64x2_t, '1', '1']
+      - [poly8x16_t, poly8x16_t, poly8x16_t, '4', '4']
+      - [poly16x8_t, poly16x8_t, poly16x8_t, '3', '3']
+      - [float32x4_t, float32x4_t, float32x4_t, '2', '2']
+      - [float64x2_t, float64x2_t, float64x2_t, '1', '1']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE1, '{type[3]}']]
       - FnCall: [static_assert_uimm_bits!, [LANE2, '{type[4]}']]
-      - Identifier: ["{type[5]}", Symbol]
-      - Identifier: ["{type[6]}", Symbol]
+      - FnCall: ['vset{neon_type[0].lane_nox}', [{FnCall: ['vget{neon_type[1].lane_nox}', [b], [LANE2]]}, a], [LANE1]]
 
   - name: "vcopy{neon_type[0].laneq_nox}"
     doc: "Insert vector element from another vector element"
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
     return_type: "{neon_type[2]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [mov, 'LANE1 = 0', 'LANE2 = 1']]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [mov, 'LANE1 = 0', 'LANE2 = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ['1', '3']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const LANE1: i32, const LANE2: i32']
     safety: safe
     types:
-      - [int8x16_t, int8x16_t, int8x16_t, '4', '4', ' unsafe { match LANE1 & 0b1111 { 0 => simd_shuffle!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 1 => simd_shuffle!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 2 => simd_shuffle!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 3 => simd_shuffle!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32, 8, 9, 10, 11, 12, 13, 14, 15]), 8 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 16 + LANE2 as u32, 9, 10, 11, 12, 13, 14, 15]), 9 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 16 + LANE2 as u32, 10, 11, 12, 13, 14, 15]), 10 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16 + LANE2 as u32, 11, 12, 13, 14, 15]), 11 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 16 + LANE2 as u32, 12, 13, 14, 15]), 12 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16 + LANE2 as u32, 13, 14, 15]), 13 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16 + LANE2 as u32, 14, 15]), 14 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16 + LANE2 as u32, 15]), 15 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16 + LANE2 as u32]), _ => unreachable_unchecked(), } }']
-      - [int16x8_t, int16x8_t, int16x8_t, '3', '3', ' unsafe { match LANE1 & 0b111 { 0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), 2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]), 3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]), _ => unreachable_unchecked(), } }']
-      - [int32x4_t, int32x4_t, int32x4_t, '2', '2', ' unsafe { match LANE1 & 0b11 { 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]), 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]), 2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]), 3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]), _ => unreachable_unchecked(), } }']
-      - [int64x2_t, int64x2_t, int64x2_t, '1', '1', ' unsafe { match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]), _ => unreachable_unchecked(), } }']
-      - [uint8x16_t, uint8x16_t, uint8x16_t, '4', '4', ' unsafe { match LANE1 & 0b1111 { 0 => simd_shuffle!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 1 => simd_shuffle!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 2 => simd_shuffle!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 3 => simd_shuffle!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32, 8, 9, 10, 11, 12, 13, 14, 15]), 8 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 16 + LANE2 as u32, 9, 10, 11, 12, 13, 14, 15]), 9 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 16 + LANE2 as u32, 10, 11, 12, 13, 14, 15]), 10 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16 + LANE2 as u32, 11, 12, 13, 14, 15]), 11 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 16 + LANE2 as u32, 12, 13, 14, 15]), 12 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16 + LANE2 as u32, 13, 14, 15]), 13 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16 + LANE2 as u32, 14, 15]), 14 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16 + LANE2 as u32, 15]), 15 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16 + LANE2 as u32]), _ => unreachable_unchecked(), } }']
-      - [uint16x8_t, uint16x8_t, uint16x8_t, '3', '3', ' unsafe { match LANE1 & 0b111 { 0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), 2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]), 3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]), _ => unreachable_unchecked(), } }']
-      - [uint32x4_t, uint32x4_t, uint32x4_t, '2', '2', ' unsafe { match LANE1 & 0b11 { 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]), 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]), 2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]), 3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]), _ => unreachable_unchecked(), } }']
-      - [uint64x2_t, uint64x2_t, uint64x2_t, '1', '1', ' unsafe { match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]), _ => unreachable_unchecked(), } }']
-      - [poly8x16_t, poly8x16_t, poly8x16_t, '4', '4', ' unsafe { match LANE1 & 0b1111 { 0 => simd_shuffle!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 1 => simd_shuffle!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 2 => simd_shuffle!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 3 => simd_shuffle!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32, 8, 9, 10, 11, 12, 13, 14, 15]), 8 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 16 + LANE2 as u32, 9, 10, 11, 12, 13, 14, 15]), 9 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 16 + LANE2 as u32, 10, 11, 12, 13, 14, 15]), 10 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 16 + LANE2 as u32, 11, 12, 13, 14, 15]), 11 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 16 + LANE2 as u32, 12, 13, 14, 15]), 12 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16 + LANE2 as u32, 13, 14, 15]), 13 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16 + LANE2 as u32, 14, 15]), 14 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 16 + LANE2 as u32, 15]), 15 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16 + LANE2 as u32]), _ => unreachable_unchecked(), } }']
-      - [poly16x8_t, poly16x8_t, poly16x8_t, '3', '3', ' unsafe { match LANE1 & 0b111 { 0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), 2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3, 4, 5, 6, 7]), 3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32, 4, 5, 6, 7]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 8 + LANE2 as u32, 5, 6, 7]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 8 + LANE2 as u32, 6, 7]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8 + LANE2 as u32, 7]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 8 + LANE2 as u32]), _ => unreachable_unchecked(), } }']
-      - [poly64x2_t, poly64x2_t, poly64x2_t, '1', '1', ' unsafe { match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]), _ => unreachable_unchecked(), } }']
-      - [float32x4_t, float32x4_t, float32x4_t, '2', '2', ' unsafe { match LANE1 & 0b11 { 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]), 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]), 2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]), 3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]), _ => unreachable_unchecked(), } }']
-      - [float64x2_t, float64x2_t, float64x2_t, '1', '1', ' unsafe { match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]), _ => unreachable_unchecked(), } }']
+      - [poly64x2_t, poly64x2_t, poly64x2_t, '1', '1']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE1, '{type[3]}']]
       - FnCall: [static_assert_uimm_bits!, [LANE2, '{type[4]}']]
-      - Identifier: ["{type[5]}", Symbol]
+      - FnCall: [simd_insert!, [a, LANE1 as u32, {FnCall: [simd_extract!, [b, LANE2 as u32, p64]]}]]
 
   - name: "vcopy{neon_type[0].laneq_nox}"
     doc: "Insert vector element from another vector element"
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
     return_type: "{neon_type[2]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [mov, 'LANE1 = 0', 'LANE2 = 1']]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [mov, 'LANE1 = 0', 'LANE2 = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ['1', '3']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const LANE1: i32, const LANE2: i32']
     safety: safe
     types:
-      - [int8x8_t, int8x16_t, int8x8_t, '3', '4', ' let a: int8x16_t = unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) };', 'unsafe { match LANE1 & 0b111 { 0 => simd_shuffle!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), 2 => simd_shuffle!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7]), 3 => simd_shuffle!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32]), _ => unreachable_unchecked(), } }']
-      - [int16x4_t, int16x8_t, int16x4_t, '2', '3', ' let a: int16x8_t = unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]) };', 'unsafe { match LANE1 & 0b11 { 0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3]), 1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3]), 2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3]), 3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32]), _ => unreachable_unchecked(), } }']
-      - [int32x2_t, int32x4_t, int32x2_t, '1', '2', ' let a: int32x4_t = unsafe { simd_shuffle!(a, a, [0, 1, 2, 3]) };', 'unsafe { match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32]), _ => unreachable_unchecked(), } }']
-      - [uint8x8_t, uint8x16_t, uint8x8_t, '3', '4', ' let a: uint8x16_t = unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) };', 'unsafe { match LANE1 & 0b111 { 0 => simd_shuffle!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), 2 => simd_shuffle!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7]), 3 => simd_shuffle!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32]), _ => unreachable_unchecked(), } }']
-      - [uint16x4_t, uint16x8_t, uint16x4_t, '2', '3', ' let a: uint16x8_t = unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]) };', 'unsafe { match LANE1 & 0b11 { 0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3]), 1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3]), 2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3]), 3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32]), _ => unreachable_unchecked(), } }']
-      - [uint32x2_t, uint32x4_t, uint32x2_t, '1', '2', 'let a: uint32x4_t = unsafe { simd_shuffle!(a, a, [0, 1, 2, 3]) };', 'unsafe { match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32]), _ => unreachable_unchecked(), } }']
-      - [poly8x8_t, poly8x16_t, poly8x8_t, '3', '4', ' let a: poly8x16_t = unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) };', 'unsafe { match LANE1 & 0b111 { 0 => simd_shuffle!(a, b, [16 + LANE2 as u32, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [0, 16 + LANE2 as u32, 2, 3, 4, 5, 6, 7]), 2 => simd_shuffle!(a, b, [0, 1, 16 + LANE2 as u32, 3, 4, 5, 6, 7]), 3 => simd_shuffle!(a, b, [0, 1, 2, 16 + LANE2 as u32, 4, 5, 6, 7]), 4 => simd_shuffle!(a, b, [0, 1, 2, 3, 16 + LANE2 as u32, 5, 6, 7]), 5 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 16 + LANE2 as u32, 6, 7]), 6 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 16 + LANE2 as u32, 7]), 7 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 16 + LANE2 as u32]), _ => unreachable_unchecked(), } }']
-      - [poly16x4_t, poly16x8_t, poly16x4_t, '2', '3', ' let a: poly16x8_t = unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]) };', 'unsafe { match LANE1 & 0b11 { 0 => simd_shuffle!(a, b, [8 + LANE2 as u32, 1, 2, 3]), 1 => simd_shuffle!(a, b, [0, 8 + LANE2 as u32, 2, 3]), 2 => simd_shuffle!(a, b, [0, 1, 8 + LANE2 as u32, 3]), 3 => simd_shuffle!(a, b, [0, 1, 2, 8 + LANE2 as u32]), _ => unreachable_unchecked(), } }']
-      - [float32x2_t, float32x4_t, float32x2_t, '1', '2', ' let a: float32x4_t = unsafe { simd_shuffle!(a, a, [0, 1, 2, 3]) };', 'unsafe { match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32]), _ => unreachable_unchecked(), } }']
+      - [int8x8_t, int8x16_t, int8x8_t, '3', '4']
+      - [int16x4_t, int16x8_t, int16x4_t, '2', '3']
+      - [int32x2_t, int32x4_t, int32x2_t, '1', '2']
+      - [uint8x8_t, uint8x16_t, uint8x8_t, '3', '4']
+      - [uint16x4_t, uint16x8_t, uint16x4_t, '2', '3']
+      - [uint32x2_t, uint32x4_t, uint32x2_t, '1', '2']
+      - [poly8x8_t, poly8x16_t, poly8x8_t, '3', '4']
+      - [poly16x4_t, poly16x8_t, poly16x4_t, '2', '3']
+      - [float32x2_t, float32x4_t, float32x2_t, '1', '2']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE1, '{type[3]}']]
       - FnCall: [static_assert_uimm_bits!, [LANE2, '{type[4]}']]
-      - Identifier: ["{type[5]}", Symbol]
-      - Identifier: ["{type[6]}", Symbol]
+      - FnCall: ['vset{neon_type[0].lane_nox}', [{FnCall: ['vget{neon_type[1].lane_nox}', [b], [LANE2]]}, a], [LANE1]]
 
   - name: "vcopyq_lane_{neon_type[0]}"
     doc: "Insert vector element from another vector element"
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
     return_type: "{neon_type[0]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [mov, 'LANE1 = 1', 'LANE2 = 0']]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [mov, 'LANE1 = 1', 'LANE2 = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['1', '3']]
+      - *neon-stable
+    static_defs: ['const LANE1: i32, const LANE2: i32']
+    safety: safe
+    types:
+      - [int64x2_t, int64x1_t]
+      - [uint64x2_t, uint64x1_t]
+      - [float64x2_t, float64x1_t]
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [LANE1, '1']]
+      - FnCall: [static_assert!, ['LANE2 == 0']]
+      - Let: [b, '{neon_type[0]}', {FnCall: ['vcombine{neon_type[1].no}', [b, b]]}]
+      - FnCall: ['vset{neon_type[0].lane_nox}', [{FnCall: ['vget{neon_type[0].lane_nox}', [b], [LANE2]]}, a], [LANE1]]
+
+  - name: "vcopyq_lane_{neon_type[0]}"
+    doc: "Insert vector element from another vector element"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [mov, 'LANE1 = 1', 'LANE2 = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ['1', '3']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const LANE1: i32, const LANE2: i32']
     safety: safe
     types:
-      - [int64x2_t, int64x1_t, ' let b: int64x2_t = unsafe { simd_shuffle!(b, b, [0, 1]) };', 'unsafe { match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]), _ => unreachable_unchecked(), } }']
-      - [uint64x2_t, uint64x1_t, ' let b: uint64x2_t = unsafe { simd_shuffle!(b, b, [0, 1]) };', 'unsafe { match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]), _ => unreachable_unchecked(), } }']
-      - [poly64x2_t, poly64x1_t, ' let b: poly64x2_t = unsafe { simd_shuffle!(b, b, [0, 1]) };', 'unsafe { match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]), _ => unreachable_unchecked(), } }']
-      - [float64x2_t, float64x1_t, ' let b: float64x2_t = unsafe { simd_shuffle!(b, b, [0, 1]) };', 'unsafe { match LANE1 & 0b1 { 0 => simd_shuffle!(a, b, [2 + LANE2 as u32, 1]), 1 => simd_shuffle!(a, b, [0, 2 + LANE2 as u32]), _ => unreachable_unchecked(), } }']
+      - [poly64x2_t, poly64x1_t]
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE1, '1']]
       - FnCall: [static_assert!, ['LANE2 == 0']]
-      - Identifier: ['{type[2]}', Symbol]
-      - Identifier: ['{type[3]}', Symbol]
+      - Let: [b, '{neon_type[0]}', {FnCall: ['vcombine{neon_type[1].no}', [b, b]]}]
+      - FnCall: [simd_insert!, [a, LANE1 as u32, {FnCall: [simd_extract!, [b, LANE2 as u32, p64]]}]]
 
   - name: "vcopyq_lane_f32"
     doc: "Insert vector element from another vector element"
@@ -9074,16 +8856,15 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [mov, 'LANE1 = 1', 'LANE2 = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ['1', '3']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const LANE1: i32, const LANE2: i32']
     safety: safe
     types:
-      - [float32x4_t, float32x2_t, ' let b: float32x4_t = unsafe { simd_shuffle!(b, b, [0, 1, 2, 3]) };', 'unsafe { match LANE1 & 0b11 { 0 => simd_shuffle!(a, b, [4 + LANE2 as u32, 1, 2, 3]), 1 => simd_shuffle!(a, b, [0, 4 + LANE2 as u32, 2, 3]), 2 => simd_shuffle!(a, b, [0, 1, 4 + LANE2 as u32, 3]), 3 => simd_shuffle!(a, b, [0, 1, 2, 4 + LANE2 as u32]), _ => unreachable_unchecked(), } }']
+      - [float32x4_t, float32x2_t]
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE1, 2]]
       - FnCall: [static_assert_uimm_bits!, [LANE2, 1]]
-      - Identifier: ["{type[2]}", Symbol]
-      - Identifier: ["{type[3]}", Symbol]
+      - FnCall: ['vset{neon_type[0].lane_nox}', [{FnCall: ['vget{neon_type[1].lane_nox}', [b], [LANE2]]}, a], [LANE1]]
 
   - name: "vcreate_f64"
     doc: "Insert vector element from another vector element"
@@ -9091,8 +8872,9 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
+    big_endian_inverse: true
     types:
       - ["u64", float64x1_t]
     compose:
@@ -9105,7 +8887,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'LANE = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ['2']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const LANE: i32']
     safety: safe
     types:
@@ -9121,7 +8903,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'LANE = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ['2']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const LANE: i32']
     safety: safe
     types:
@@ -9136,7 +8918,7 @@ intrinsics:
     return_type: "{type}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sshl]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - "i64"
@@ -9154,7 +8936,7 @@ intrinsics:
     return_type: "{type[0]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ushl]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["u64", "i64"]
@@ -9171,63 +8953,62 @@ intrinsics:
     arguments: ["a: {neon_type[0]}"]
     return_type: "{neon_type[1]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sshll2, 'N = 2']]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [sshll2, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['1']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
-      - [int8x16_t, int16x8_t, int8x8_t, 'N >= 0 && N <= 8', '[8, 9, 10, 11, 12, 13, 14, 15]']
-      - [int16x8_t, int32x4_t, int16x4_t, 'N >= 0 && N <= 16', '[4, 5, 6, 7]']
-      - [int32x4_t, int64x2_t, int32x2_t, 'N >= 0 && N <= 32', '[2, 3]']
+      - [int8x16_t, int16x8_t, int8x8_t, 'N >= 0 && N <= 8']
+      - [int16x8_t, int32x4_t, int16x4_t, 'N >= 0 && N <= 16']
+      - [int32x4_t, int64x2_t, int32x2_t, 'N >= 0 && N <= 32']
     compose:
       - FnCall: [static_assert!, ["{type[3]}"]]
-      - Let: [b, "{neon_type[2]}", {FnCall: [simd_shuffle!, [a, a, "{type[4]}"]]}]
-      - FnCall: ["vshll_n_{neon_type[2]}::<N>", [b]]
+      - Let: [b, {FnCall: ['vget_high_{neon_type[0]}', [a]]}]
+      - FnCall: ["vshll_n_{neon_type[2]}", [b], [N]]
 
   - name: "vshll_high_n_{neon_type[0]}"
     doc: "Signed shift left long"
     arguments: ["a: {neon_type[0]}"]
     return_type: "{neon_type[1]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ushll2, 'N = 2']]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [ushll2, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['1']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
-      - [uint8x16_t, uint16x8_t, uint8x8_t, 'N >= 0 && N <= 8', '[8, 9, 10, 11, 12, 13, 14, 15]']
-      - [uint16x8_t, uint32x4_t, uint16x4_t, 'N >= 0 && N <= 16', '[4, 5, 6, 7]']
-      - [uint32x4_t, uint64x2_t, uint32x2_t, 'N >= 0 && N <= 32', '[2, 3]']
+      - [uint8x16_t, uint16x8_t, uint8x8_t, 'N >= 0 && N <= 8']
+      - [uint16x8_t, uint32x4_t, uint16x4_t, 'N >= 0 && N <= 16']
+      - [uint32x4_t, uint64x2_t, uint32x2_t, 'N >= 0 && N <= 32']
     compose:
       - FnCall: [static_assert!, ["{type[3]}"]]
-      - Let: [b, "{neon_type[2]}", {FnCall: [simd_shuffle!, [a, a, "{type[4]}"]]}]
-      - FnCall: ["vshll_n_{neon_type[2]}::<N>", [b]]
+      - Let: [b, "{neon_type[2]}", {FnCall: ['vget_high_{neon_type[0]}', [a]]}]
+      - FnCall: ["vshll_n_{neon_type[2]}", [b], [N]]
 
   - name: "vshrn_high_n_{neon_type[1]}"
     doc: "Shift right narrow"
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
     return_type: "{neon_type[2]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [shrn2, 'N = 2']]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [shrn2, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['2']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
-      - [int8x8_t, int16x8_t, int8x16_t, 'N >= 1 && N <= 8', '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
-      - [int16x4_t, int32x4_t, int16x8_t, 'N >= 1 && N <= 16', '[0, 1, 2, 3, 4, 5, 6, 7]']
-      - [int32x2_t, int64x2_t, int32x4_t, 'N >= 1 && N <= 32', '[0, 1, 2, 3]']
-      - [uint8x8_t, uint16x8_t, uint8x16_t, 'N >= 1 && N <= 8', '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
-      - [uint16x4_t, uint32x4_t, uint16x8_t, 'N >= 1 && N <= 16', '[0, 1, 2, 3, 4, 5, 6, 7]']
-      - [uint32x2_t, uint64x2_t, uint32x4_t, 'N >= 1 && N <= 32', '[0, 1, 2, 3]']
+      - [int8x8_t, int16x8_t, int8x16_t, 'N >= 1 && N <= 8']
+      - [int16x4_t, int32x4_t, int16x8_t, 'N >= 1 && N <= 16']
+      - [int32x2_t, int64x2_t, int32x4_t, 'N >= 1 && N <= 32']
+      - [uint8x8_t, uint16x8_t, uint8x16_t, 'N >= 1 && N <= 8']
+      - [uint16x4_t, uint32x4_t, uint16x8_t, 'N >= 1 && N <= 16']
+      - [uint32x2_t, uint64x2_t, uint32x4_t, 'N >= 1 && N <= 32']
     compose:
       - FnCall: [static_assert!, ["{type[3]}"]]
       - FnCall:
-          - simd_shuffle!
+          - 'vcombine_{neon_type[0]}'
           - - a
-            - FnCall: ["vshrn_n_{neon_type[1]}::<N>", [b]]
-            - "{type[4]}"
+            - FnCall: ["vshrn_n_{neon_type[1]}", [b], [N]]
 
   - name: "vsm3partw1{neon_type.no}"
     doc: "SM3PARTW1"
@@ -9522,7 +9303,7 @@ intrinsics:
           - transmute
           - - FnCall:
                 - _vrnd32x_f64
-                - - FnCall: [simd_extract!, [a, 0]]
+                - - FnCall: ['vget{neon_type.lane_nox}', [a], [0]]
 
   - name: "vrnd32z{neon_type.no}"
     doc: "Floating-point round to 32-bit integer toward zero"
@@ -9566,7 +9347,7 @@ intrinsics:
               arch: aarch64,arm64ec
       - FnCall:
           - transmute
-          - - FnCall: [_vrnd32z_f64, [{FnCall: [simd_extract!, [a, 0]]}]]
+          - - FnCall: [_vrnd32z_f64, [{FnCall: ['vget{neon_type.lane_nox}', [a], [0]]}]]
 
   - name: "vrnd64x{neon_type.no}"
     doc: "Floating-point round to 64-bit integer, using current rounding mode"
@@ -9610,7 +9391,7 @@ intrinsics:
               arch: aarch64,arm64ec
       - FnCall:
           - transmute
-          - - FnCall: [_vrnd64x_f64, [{FnCall: [simd_extract!, [a, 0]]}]]
+          - - FnCall: [_vrnd64x_f64, [{FnCall: ['vget{neon_type.lane_nox}', [a], [0]]}]]
 
   - name: "vrnd64z{neon_type.no}"
     doc: "Floating-point round to 64-bit integer toward zero"
@@ -9654,7 +9435,7 @@ intrinsics:
               arch: aarch64,arm64ec
       - FnCall:
           - transmute
-          - - FnCall: [_vrnd64z_f64, [{FnCall: [simd_extract!, [a, 0]]}]]
+          - - FnCall: [_vrnd64z_f64, [{FnCall: ['vget{neon_type.lane_nox}', [a], [0]]}]]
 
   - name: "vtrn1{neon_type[0].no}"
     doc: Transpose vectors
@@ -9662,7 +9443,7 @@ intrinsics:
     return_type: "{neon_type[0]}"
     attr:
       - *neon-stable
-      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [trn1]]}]]
+      - FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [trn1]]}]]
     safety: safe
     types:
       - [int8x8_t, '[0, 8, 2, 10, 4, 12, 6, 14]']
@@ -9690,9 +9471,9 @@ intrinsics:
     return_type: "{neon_type[0]}"
     attr:
       - *neon-fp16
-      - *neon-unstable-f16
+      - *neon-stable-fp16
       - *target-not-arm64ec
-      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [trn1]]}]]
+      - FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [trn1]]}]]
     safety: safe
     types:
       - [float16x4_t, '[0, 4, 2, 6]']
@@ -9706,7 +9487,7 @@ intrinsics:
     return_type: "{neon_type[0]}"
     attr:
       - *neon-stable
-      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [zip1]]}]]
+      - FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [zip1]]}]]
     safety: safe
     types:
       - [int32x2_t, '[0, 2]']
@@ -9725,7 +9506,7 @@ intrinsics:
     return_type: "{neon_type[0]}"
     attr:
       - *neon-stable
-      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [trn2]]}]]
+      - FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [trn2]]}]]
     safety: safe
     types:
       - [int8x8_t, '[1, 9, 3, 11, 5, 13, 7, 15]']
@@ -9752,9 +9533,9 @@ intrinsics:
     return_type: "{neon_type[0]}"
     attr:
       - *neon-fp16
-      - *neon-unstable-f16
+      - *neon-stable-fp16
       - *target-not-arm64ec
-      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [trn2]]}]]
+      - FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [trn2]]}]]
     safety: safe
     types:
       - [float16x4_t, '[1, 5, 3, 7]']
@@ -9768,7 +9549,7 @@ intrinsics:
     return_type: "{neon_type[0]}"
     attr:
       - *neon-stable
-      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [zip2]]}]]
+      - FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [zip2]]}]]
     safety: safe
     types:
       - [int32x2_t, '[1, 3]']
@@ -9787,7 +9568,7 @@ intrinsics:
     return_type: "{neon_type[0]}"
     attr:
       - *neon-stable
-      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [zip2]]}]]
+      - FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [zip2]]}]]
     safety: safe
     types:
       - [int8x8_t, '[4, 12, 5, 13, 6, 14, 7, 15]']
@@ -9821,9 +9602,9 @@ intrinsics:
     return_type: "{neon_type[0]}"
     attr:
       - *neon-fp16
-      - *neon-unstable-f16
+      - *neon-stable-fp16
       - *target-not-arm64ec
-      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [zip2]]}]]
+      - FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [zip2]]}]]
     safety: safe
     types:
       - [float16x4_t, '[2, 6, 3, 7]']
@@ -9837,7 +9618,7 @@ intrinsics:
     return_type: "{neon_type[0]}"
     attr:
       - *neon-stable
-      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [zip1]]}]]
+      - FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [zip1]]}]]
     safety: safe
     types:
       - [int8x8_t, '[0, 8, 1, 9, 2, 10, 3, 11]']
@@ -9872,9 +9653,9 @@ intrinsics:
     return_type: "{neon_type[0]}"
     attr:
       - *neon-fp16
-      - *neon-unstable-f16
+      - *neon-stable-fp16
       - *target-not-arm64ec
-      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [zip1]]}]]
+      - FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [zip1]]}]]
     safety: safe
     types:
       - [float16x4_t, '[0, 4, 1, 5]']
@@ -9888,7 +9669,7 @@ intrinsics:
     return_type: "{neon_type[0]}"
     attr:
       - *neon-stable
-      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [zip1]]}]]
+      - FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [zip1]]}]]
     safety: safe
     types:
       - [int32x2_t, '[0, 2]']
@@ -9907,7 +9688,7 @@ intrinsics:
     return_type: "{neon_type[0]}"
     attr:
       - *neon-stable
-      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [uzp1]]}]]
+      - FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [uzp1]]}]]
     safety: safe
     types:
       - [int8x8_t, '[0, 2, 4, 6, 8, 10, 12, 14]']
@@ -9934,9 +9715,9 @@ intrinsics:
     return_type: "{neon_type[0]}"
     attr:
       - *neon-fp16
-      - *neon-unstable-f16
+      - *neon-stable-fp16
       - *target-not-arm64ec
-      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [uzp1]]}]]
+      - FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [uzp1]]}]]
     safety: safe
     types:
       - [float16x4_t, '[0, 2, 4, 6]']
@@ -9950,7 +9731,7 @@ intrinsics:
     return_type: "{neon_type[0]}"
     attr:
       - *neon-stable
-      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [zip2]]}]]
+      - FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [zip2]]}]]
     safety: safe
     types:
       - [int32x2_t, '[1, 3]']
@@ -9969,7 +9750,7 @@ intrinsics:
     return_type: "{neon_type[0]}"
     attr:
       - *neon-stable
-      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [uzp2]]}]]
+      - FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [uzp2]]}]]
     safety: safe
     types:
       - [int8x8_t, '[1, 3, 5, 7, 9, 11, 13, 15]']
@@ -10000,9 +9781,9 @@ intrinsics:
     return_type: "{neon_type[0]}"
     attr:
       - *neon-fp16
-      - *neon-unstable-f16
+      - *neon-stable-fp16
       - *target-not-arm64ec
-      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [uzp2]]}]]
+      - FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [uzp2]]}]]
     safety: safe
     types:
       - [float16x4_t, '[1, 3, 5, 7]']
@@ -10019,23 +9800,21 @@ intrinsics:
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[1]}"]
     return_type: "{neon_type[0]}"
     attr:
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
-      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [uabal2]]}]]
+      - *neon-stable
+      - FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [uabal2]]}]]
     safety: safe
     types:
-      - [uint16x8_t, uint8x16_t, uint8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]', '[8, 9, 10, 11, 12, 13, 14, 15]']
-      - [uint32x4_t, uint16x8_t, uint16x4_t, '[4, 5, 6, 7]', '[4, 5, 6, 7]']
-      - [uint64x2_t, uint32x4_t, uint32x2_t, '[2, 3]', '[2, 3]']
+      - [uint16x8_t, uint8x16_t, uint8x8_t]
+      - [uint32x4_t, uint16x8_t, uint16x4_t]
+      - [uint64x2_t, uint32x4_t, uint32x2_t]
     compose:
       - Let:
           - d
-          - "{neon_type[2]}"
-          - FnCall: [simd_shuffle!, [b, b, "{type[3]}"]]
+          - FnCall: ['vget_high_{neon_type[1]}', [b]]
       - Let:
           - e
-          - "{neon_type[2]}"
-          - FnCall: [simd_shuffle!, [c, c, "{type[4]}"]]
-      - Let: [f, "{neon_type[2]}", {FnCall: ["vabd_{neon_type[2]}", [d, e]]}]
+          - FnCall: ['vget_high_{neon_type[1]}', [c]]
+      - Let: [f, {FnCall: ["vabd_{neon_type[2]}", [d, e]]}]
       - FnCall:
           - simd_add
           - - a
@@ -10047,39 +9826,28 @@ intrinsics:
     return_type: "{neon_type[0]}"
     attr:
       - *neon-stable
-      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [sabal2]]}]]
+      - FnCall: [cfg_attr, [*cfg-test-not-msvc-little-endian, {FnCall: [assert_instr, [sabal2]]}]]
     safety: safe
     types:
-      - [int16x8_t, int8x16_t, int8x16_t, '[8, 9, 10, 11, 12, 13, 14, 15]', int8x8_t, uint8x8_t]
-      - [int32x4_t, int16x8_t, int16x8_t, '[4, 5, 6, 7]', int16x4_t, uint16x4_t]
-      - [int64x2_t, int32x4_t, int32x4_t, '[2, 3]', int32x2_t, uint32x2_t]
+      - [int16x8_t, int8x16_t, int8x16_t, int8x8_t, uint8x8_t]
+      - [int32x4_t, int16x8_t, int16x8_t, int16x4_t, uint16x4_t]
+      - [int64x2_t, int32x4_t, int32x4_t, int32x2_t, uint32x2_t]
     compose:
       - Let:
           - d
-          - "{neon_type[4]}"
-          - FnCall:
-              - simd_shuffle!
-              - - b
-                - b
-                - "{type[3]}"
+          - FnCall: ['vget_high_{neon_type[1]}', [b]]
       - Let:
           - e
-          - "{neon_type[4]}"
-          - FnCall:
-              - simd_shuffle!
-              - - c
-                - c
-                - "{type[3]}"
+          - FnCall: ['vget_high_{neon_type[2]}', [c]]
       - Let:
           - f
-          - "{neon_type[4]}"
           - FnCall:
-              - "vabd{neon_type[4].no}"
+              - "vabd{neon_type[3].no}"
               - - d
                 - e
       - Let:
           - f
-          - "{neon_type[5]}"
+          - "{neon_type[4]}"
           - FnCall:
               - simd_cast
               - - f
@@ -10146,7 +9914,7 @@ intrinsics:
     attr:
       - *neon-stable
       - FnCall: [rustc_legacy_const_generics, ['2']]
-      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [sri, 'N = 2']]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [bfxil, 'N = 2']]}]]
     safety: safe
     types:
       - i64
@@ -10172,7 +9940,7 @@ intrinsics:
     return_type: "{neon_type}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmaxnmp]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - float32x2_t
@@ -10190,7 +9958,7 @@ intrinsics:
     arguments: ["a: {type[0]}", "b: {neon_type[1]}"]
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [st1]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety:
       unsafe: [neon]
     types:
@@ -10213,7 +9981,7 @@ intrinsics:
     arguments: ["a: {type[0]}", "b: {neon_type[1]}"]
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [st1]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety:
       unsafe: [neon]
     types:
@@ -10237,7 +10005,7 @@ intrinsics:
     arguments: ["a: {type[0]}", "b: {neon_type[1]}"]
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [st1]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety:
       unsafe: [neon]
     types:
@@ -10264,7 +10032,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmla, 'LANE = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ['3']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const LANE: i32']
     safety: safe
     types:
@@ -10279,7 +10047,7 @@ intrinsics:
           - "vfma{neon_type[0].no}"
           - - a
             - b
-            - FnCall: ["vdup{neon_type[0].N}", [{FnCall: [simd_extract!, [c, 'LANE as u32']]}]]
+            - FnCall: ["vdup{neon_type[0].N}", [{FnCall: ['vget{neon_type[1].lane_nox}', [c], [LANE]]}]]
 
 
   - name: "vfma{type[3]}"
@@ -10290,7 +10058,7 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmla, 'LANE = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ['3']]
       - *neon-fp16
-      - *neon-unstable-f16
+      - *neon-stable-fp16
       - *target-not-arm64ec
     static_defs: ['const LANE: i32']
     safety: safe
@@ -10305,7 +10073,7 @@ intrinsics:
           - "vfma{neon_type[0].no}"
           - - a
             - b
-            - FnCall: ["vdup{neon_type[0].N}", [{FnCall: [simd_extract!, [c, 'LANE as u32']]}]]
+            - FnCall: ["vdup{neon_type[0].N}", [{FnCall: ['vget{neon_type[1].lane_nox}', [c], [LANE]]}]]
 
 
   # vfms lane f16
@@ -10317,7 +10085,7 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmls, 'LANE = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ['3']]
       - *neon-fp16
-      - *neon-unstable-f16
+      - *neon-stable-fp16
       - *target-not-arm64ec
     static_defs: ['const LANE: i32']
     safety: safe
@@ -10332,7 +10100,7 @@ intrinsics:
           - "vfms{neon_type[0].no}"
           - - a
             - b
-            - FnCall: ["vdup{neon_type[0].N}", [{FnCall: [simd_extract!, [c, 'LANE as u32']]}]]
+            - FnCall: ["vdup{neon_type[0].N}", [{FnCall: ['vget{neon_type[1].lane_nox}', [c], [LANE]]}]]
 
 
   - name: "vfms{type[1]}"
@@ -10358,7 +10126,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmadd, 'LANE = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ['3']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const LANE: i32']
     safety: safe
     types:
@@ -10369,16 +10137,16 @@ intrinsics:
           - "vfma{neon_type.no}"
           - - a
             - b
-            - FnCall: ["vdup{neon_type.N}", [{FnCall: [simd_extract!, [c, 'LANE as u32']]}]]
+            - FnCall: ["vdup{neon_type.N}", [{FnCall: ['vget{neon_type.lane_nox}', [c], [LANE]]}]]
 
   - name: "vfma_laneq_f64"
     doc: "Floating-point fused multiply-add to accumulator"
     arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"]
     return_type: "{neon_type[0]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmadd, 'LANE = 0']]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [fmadd, 'LANE = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ['3']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const LANE: i32']
     safety: safe
     types:
@@ -10389,7 +10157,7 @@ intrinsics:
           - "vfma{neon_type[0].no}"
           - - a
             - b
-            - FnCall: ["vdup{neon_type[0].N}", [{FnCall: [simd_extract!, [c, 'LANE as u32']]}]]
+            - FnCall: ["vdup{neon_type[0].N}", [{FnCall: ['vget{neon_type[1].lane_nox}', [c], [LANE]]}]]
 
   - name: "vfmaq_lane_f64"
     doc: "Floating-point fused multiply-add to accumulator"
@@ -10398,7 +10166,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmla, 'LANE = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ['3']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const LANE: i32']
     safety: safe
     types:
@@ -10409,16 +10177,16 @@ intrinsics:
           - "vfma{neon_type[0].no}"
           - - a
             - b
-            - FnCall: ["vdup{neon_type[0].N}", [{FnCall: [simd_extract!, [c, 'LANE as u32']]}]]
+            - FnCall: ["vdup{neon_type[0].N}", [{FnCall: ['vget{neon_type[1].lane_nox}', [c], [LANE]]}]]
 
   - name: "vfma{type[2]}"
     doc: "Floating-point fused multiply-add to accumulator"
     arguments: ["a: {type[0]}", "b: {type[0]}", "c: {neon_type[1]}"]
     return_type: "{type[0]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmadd, 'LANE = 0']]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [fmadd, 'LANE = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ['3']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const LANE: i32']
     safety: safe
     types:
@@ -10427,7 +10195,7 @@ intrinsics:
       - ["f64", float64x2_t, "d_laneq_f64", '1']
     compose:
       - FnCall: [static_assert_uimm_bits!, ['LANE', "{type[3]}"]]
-      - Let: [c, "{type[0]}", {FnCall: [simd_extract!, [c, 'LANE as u32']]}]
+      - Let: [c, "{type[0]}", {FnCall: ['vget{neon_type[1].lane_nox}', [c], [LANE]]}]
       - FnCall: ["fma{type[0]}", [b, c, a]]
 
   - name: "vfmad_lane_f64"
@@ -10437,14 +10205,14 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmadd, 'LANE = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ['3']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const LANE: i32']
     safety: safe
     types:
       - ["f64", float64x1_t]
     compose:
       - FnCall: [static_assert!, ['LANE == 0']]
-      - Let: [c, "{type[0]}", {FnCall: [simd_extract!, [c, 'LANE as u32']]}]
+      - Let: [c, "{type[0]}", {FnCall: ['vget{neon_type[1].lane_nox}', [c], [LANE]]}]
       - FnCall: [fmaf64, [b, c, a]]
 
 
@@ -10469,7 +10237,7 @@ intrinsics:
     arguments: ["a: {type[0]}", "b: {type[0]}", "v: {neon_type[1]}"]
     return_type: "{type[0]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmadd, 'LANE = 0']]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [fmadd, 'LANE = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ['3']]
       - *neon-fp16
       - *neon-unstable-f16
@@ -10481,7 +10249,7 @@ intrinsics:
       - ["f16", float16x8_t, 'q_f16', '3']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']]
-      - Let: [c, "{type[0]}", {FnCall: [simd_extract!, [v, 'LANE as u32']]}]
+      - Let: [c, "{type[0]}", {FnCall: ['vget{neon_type[1].lane_nox}', [v], [LANE]]}]
       - FnCall: ["vfmah_{type[0]}", [a, b, c]]
 
   - name: "vfmsh_lane{type[2]}"
@@ -10489,7 +10257,7 @@ intrinsics:
     arguments: ["a: {type[0]}", "b: {type[0]}", "v: {neon_type[1]}"]
     return_type: "{type[0]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmsub, 'LANE = 0']]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [fmsub, 'LANE = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ['3']]
       - *neon-fp16
       - *neon-unstable-f16
@@ -10501,7 +10269,7 @@ intrinsics:
       - ["f16", float16x8_t, 'q_f16', '3']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']]
-      - Let: [c, "{type[0]}", {FnCall: [simd_extract!, [v, 'LANE as u32']]}]
+      - Let: [c, "{type[0]}", {FnCall: ['vget{neon_type[1].lane_nox}', [v], [LANE]]}]
       - FnCall: ["vfmsh_{type[0]}", [a, b, c]]
 
   - name: "vfms_f64"
@@ -10510,7 +10278,7 @@ intrinsics:
     return_type: "{neon_type}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmsub]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - float64x1_t
@@ -10524,7 +10292,7 @@ intrinsics:
     return_type: "{neon_type}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmls]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - float64x2_t
@@ -10538,7 +10306,7 @@ intrinsics:
     return_type: "{neon_type}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmul]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - float64x1_t
@@ -10553,7 +10321,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmls, 'LANE = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ['3']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const LANE: i32']
     safety: safe
     types:
@@ -10564,7 +10332,7 @@ intrinsics:
       - [float64x2_t, float64x2_t, '1', q_laneq_f64]
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, '{type[2]}']]
-      - FnCall: ["vfms{neon_type[0].no}", [a, b, {FnCall: ["vdup{neon_type[0].N}", [{FnCall: [simd_extract!, [c, 'LANE as u32']]}]]}]]
+      - FnCall: ["vfms{neon_type[0].no}", [a, b, {FnCall: ["vdup{neon_type[0].N}", [{FnCall: ['vget{neon_type[1].lane_nox}', [c], [LANE]]}]]}]]
 
   - name: "vfms_lane_f64"
     doc: "Floating-point fused multiply-subtract to accumulator"
@@ -10573,30 +10341,30 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmsub, 'LANE = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ['3']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const LANE: i32']
     safety: safe
     types:
       - float64x1_t
     compose:
       - FnCall: [static_assert!, ['LANE == 0']]
-      - FnCall: ["vfms{neon_type.no}", [a, b, {FnCall: ["vdup{neon_type.N}", [{FnCall: [simd_extract!, [c, 'LANE as u32']]}]]}]]
+      - FnCall: ["vfms{neon_type.no}", [a, b, {FnCall: ["vdup{neon_type.N}", [{FnCall: ['vget{neon_type.lane_nox}', [c], [LANE]]}]]}]]
 
   - name: "vfms_laneq_f64"
     doc: "Floating-point fused multiply-subtract to accumulator"
     arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"]
     return_type: "{neon_type[0]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmsub, 'LANE = 0']]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [fmsub, 'LANE = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ['3']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const LANE: i32']
     safety: safe
     types:
       - [float64x1_t, float64x2_t]
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, '1']]
-      - FnCall: ["vfms{neon_type[0].no}", [a, b, {FnCall: ["vdup{neon_type[0].N}", [{FnCall: [simd_extract!, [c, 'LANE as u32']]}]]}]]
+      - FnCall: ["vfms{neon_type[0].no}", [a, b, {FnCall: ["vdup{neon_type[0].N}", [{FnCall: ['vget{neon_type[1].lane_nox}', [c], [LANE]]}]]}]]
 
   - name: "vfmsq_lane_f64"
     doc: "Floating-point fused multiply-subtract to accumulator"
@@ -10605,23 +10373,23 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmls, 'LANE = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ['3']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const LANE: i32']
     safety: safe
     types:
       - [float64x2_t, float64x1_t]
     compose:
       - FnCall: [static_assert!, ['LANE == 0']]
-      - FnCall: ["vfms{neon_type[0].no}", [a, b, {FnCall: ["vdup{neon_type[0].N}", [{FnCall: [simd_extract!, [c, 'LANE as u32']]}]]}]]
+      - FnCall: ["vfms{neon_type[0].no}", [a, b, {FnCall: ["vdup{neon_type[0].N}", [{FnCall: ['vget{neon_type[1].lane_nox}', [c], [LANE]]}]]}]]
 
   - name: "vfms{type[2]}"
     doc: "Floating-point fused multiply-subtract to accumulator"
     arguments: ["a: {type[0]}", "b: {type[0]}", "c: {neon_type[1]}"]
     return_type: "{type[0]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmsub, 'LANE = 0']]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [fmsub, 'LANE = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ['3']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const LANE: i32']
     safety: safe
     types:
@@ -10639,7 +10407,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmeq]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [float32x2_t, uint32x2_t, 'f32x2', 'f32x2::new(0.0, 0.0)']
@@ -10657,7 +10425,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmeq]]}]]
       - *neon-fp16
-      - *neon-unstable-f16
+      - *neon-stable-fp16
       - *target-not-arm64ec
     safety: safe
     types:
@@ -10673,18 +10441,18 @@ intrinsics:
     return_type: "{type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["f32", "u32", "s_f32"]
       - ["f64", "u64", "d_f64"]
     compose:
       - FnCall:
-          - simd_extract!
+          - 'vget_lane_{type[1]}'
           - - FnCall:
                 - "vceqz_{type[0]}"
                 - - FnCall: ["vdup_n_{type[0]}", [a]]
-            - '0'
+          - - '0'
 
   - name: "vceqz{type[2]}"
     doc: "Floating-point compare bitwise equal to zero"
@@ -10700,11 +10468,11 @@ intrinsics:
       - ["f16", "u16", "h_f16"]
     compose:
       - FnCall:
-          - simd_extract!
+          - 'vget_lane_{type[1]}'
           - - FnCall:
                 - "vceqz_{type[0]}"
                 - - FnCall: ["vdup_n_{type[0]}", [a]]
-            - '0'
+          - - '0'
 
   - name: "vceqzd_{type[2]}"
     doc: "Compare bitwise equal to zero"
@@ -10712,7 +10480,7 @@ intrinsics:
     return_type: "{type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmp]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["i64", "u64", "s64"]
@@ -10730,7 +10498,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmeq]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [int8x8_t, uint8x8_t, i8x8, 'i8x8::new(0, 0, 0, 0, 0, 0, 0, 0)']
@@ -10758,7 +10526,7 @@ intrinsics:
     return_type: "{neon_type[0]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmeq]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [uint8x8_t, uint8x8_t, u8x8, 'u8x8::new(0, 0, 0, 0, 0, 0, 0, 0)']
@@ -10782,7 +10550,7 @@ intrinsics:
     return_type: "{neon_type}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmhs]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - uint64x1_t
@@ -10796,19 +10564,19 @@ intrinsics:
     return_type: "{type[2]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["s_f32", "f32", "u32"]
       - ["d_f64", "f64", "u64"]
     compose:
       - FnCall:
-          - simd_extract!
+          - 'vget_lane_{type[2]}'
           - - FnCall:
                 - "vcge_{type[1]}"
                 - - FnCall: ["vdup_n_{type[1]}", [a]]
                   - FnCall: ["vdup_n_{type[1]}", [b]]
-            - '0'
+          - - '0'
 
 
   - name: "vcge{type[0]}"
@@ -10825,12 +10593,12 @@ intrinsics:
       - ["h_f16", "f16", "u16"]
     compose:
       - FnCall:
-          - simd_extract!
+          - 'vget_lane_{type[2]}'
           - - FnCall:
                 - "vcge_{type[1]}"
                 - - FnCall: ["vdup_n_{type[1]}", [a]]
                   - FnCall: ["vdup_n_{type[1]}", [b]]
-            - '0'
+          - - '0'
 
   - name: "vcge{neon_type[0].no}"
     doc: "Floating-point compare greater than or equal"
@@ -10838,7 +10606,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmge]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [float64x1_t, uint64x1_t]
@@ -10852,7 +10620,7 @@ intrinsics:
     return_type: "{type[2]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmp]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["d_s64", "i64", "u64", s64]
@@ -10871,7 +10639,7 @@ intrinsics:
     return_type: "{neon_type}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmhi]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - uint64x1_t
@@ -10885,7 +10653,7 @@ intrinsics:
     return_type: "{type[2]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmp]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["s64", "i64", "u64"]
@@ -10904,7 +10672,7 @@ intrinsics:
     return_type: "{neon_type[0]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmtst]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [uint64x1_t, u64x1, 'u64x1::new(0)']
@@ -10920,7 +10688,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmge]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [float32x2_t, uint32x2_t, f32x2, 'f32x2::new(0.0, 0.0)']
@@ -10940,18 +10708,18 @@ intrinsics:
     return_type: "{type[2]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["s_f32", "f32", "u32"]
       - ["d_f64", "f64", "u64"]
     compose:
       - FnCall:
-          - simd_extract!
+          - 'vget_lane_{type[2]}'
           - - FnCall:
                 - "vcgez_{type[1]}"
                 - - FnCall: ["vdup_n_{type[1]}", [a]]
-            - '0'
+          - - '0'
 
 
   - name: "vcgez{type[0]}"
@@ -10968,11 +10736,11 @@ intrinsics:
       - ["h_f16", "f16", "u16"]
     compose:
       - FnCall:
-          - simd_extract!
+          - 'vget_lane_{type[2]}'
           - - FnCall:
                 - "vcgez_{type[1]}"
                 - - FnCall: ["vdup_n_{type[1]}", [a]]
-            - '0'
+          - - '0'
 
   - name: "vclezd_s64"
     doc: "Compare less than or equal to zero"
@@ -10980,7 +10748,7 @@ intrinsics:
     return_type: "{type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmp]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["i64", "u64"]
@@ -10995,7 +10763,7 @@ intrinsics:
     return_type: "{type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmp]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["i64", "u64", 's64']
@@ -11014,7 +10782,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmgt]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [int8x8_t, uint8x8_t, i8x8, 'i8x8::new(0, 0, 0, 0, 0, 0, 0, 0)']
@@ -11038,7 +10806,7 @@ intrinsics:
     return_type: "{type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmp]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["i64", "u64"]
@@ -11055,7 +10823,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmgt]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [float32x2_t, uint32x2_t, f32x2, 'f32x2::new(0.0, 0.0)']
@@ -11072,18 +10840,18 @@ intrinsics:
     return_type: "{type[2]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["s_f32", "f32", "u32"]
       - ["d_f64", "f64", "u64"]
     compose:
       - FnCall:
-          - "simd_extract!"
+          - 'vget_lane_{type[2]}'
           - - FnCall:
                 - "vcgtz_{type[1]}"
                 - - FnCall: ["vdup_n_{type[1]}", [a]]
-            - '0'
+          - - '0'
 
   - name: "vcgtz{type[0]}"
     doc: "Floating-point compare greater than zero"
@@ -11099,11 +10867,11 @@ intrinsics:
       - ["h_f16", "f16", "u16"]
     compose:
       - FnCall:
-          - "simd_extract!"
+          - 'vget_lane_{type[2]}'
           - - FnCall:
                 - "vcgtz_{type[1]}"
                 - - FnCall: ["vdup_n_{type[1]}", [a]]
-            - '0'
+          - - '0'
 
   - name: "vcvt{neon_type[1].no}_{neon_type[0]}"
     doc: "Floating-point convert to unsigned fixed-point, rounding toward zero"
@@ -11111,7 +10879,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtzu]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [float64x1_t, uint64x1_t]
@@ -11129,7 +10897,7 @@ intrinsics:
     return_type: "{neon_type[0]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmul]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [float64x1_t, "f64"]
@@ -11147,7 +10915,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmul, 'LANE = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ['2']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const LANE: i32']
     safety: safe
     types:
@@ -11159,7 +10927,7 @@ intrinsics:
           - - a
             - FnCall:
                 - "transmute::<f64, _>"
-                - - FnCall: [simd_extract!, [b, 'LANE as u32']]
+                - - FnCall: ['vget{neon_type.lane_nox}', [b], [LANE]]
 
   - name: "vmulq_lane_f64"
     doc: "Floating-point multiply"
@@ -11168,7 +10936,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmul, 'LANE = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ['2']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const LANE: i32']
     safety: safe
     types:
@@ -11178,7 +10946,7 @@ intrinsics:
       - FnCall:
           - simd_mul
           - - a
-            - FnCall: ["simd_shuffle!", [b, b, '[LANE as u32, LANE as u32]']]
+            - FnCall: [vdupq_lane_f64, [b], [LANE]]
 
   - name: "vmuld_lane_f64"
     doc: "Floating-point multiply"
@@ -11187,14 +10955,14 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmul, 'LANE = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ['2']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const LANE: i32']
     safety: safe
     types:
       - ["f64", float64x1_t]
     compose:
       - FnCall: [static_assert!, ['LANE == 0']]
-      - Let: [b, '{type[0]}', {FnCall: [simd_extract!, [b, 'LANE as u32']]}]
+      - Let: [b, '{type[0]}', {FnCall: ['vget{neon_type[1].lane_nox}', [b], [LANE]]}]
       - Identifier: ['a * b', Symbol]
 
   - name: "vmul_laneq_f64"
@@ -11204,7 +10972,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmul, 'LANE = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ['2']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const LANE: i32']
     safety: safe
     types:
@@ -11216,7 +10984,7 @@ intrinsics:
           - - a
             - FnCall:
                 - "transmute::<f64, _>"
-                - - FnCall: [simd_extract!, [b, 'LANE as u32']]
+                - - FnCall: ['vget{neon_type[1].lane_nox}', [b], [LANE]]
 
   - name: "vmulq_laneq_f64"
     doc: "Floating-point multiply"
@@ -11225,7 +10993,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmul, 'LANE = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ['2']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const LANE: i32']
     safety: safe
     types:
@@ -11235,7 +11003,7 @@ intrinsics:
       - FnCall:
           - simd_mul
           - - a
-            - FnCall: [simd_shuffle!, [b, b, '[LANE as u32, LANE as u32]']]
+            - FnCall: [vdupq_laneq_f64, [b], [LANE]]
 
 
   # vmulq_laneq_f16
@@ -11247,19 +11015,19 @@ intrinsics:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmul, 'LANE = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ['2']]
       - *neon-fp16
-      - *neon-unstable-f16
+      - *neon-stable-fp16
       - *target-not-arm64ec
     static_defs: ['const LANE: i32']
     safety: safe
     types:
-      - [float16x4_t, float16x8_t, '_lane', "[LANE as u32, LANE as u32, LANE as u32, LANE as u32]"]
-      - [float16x8_t, float16x8_t, 'q_lane', "[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]"]
+      - [float16x4_t, float16x8_t, '_lane']
+      - [float16x8_t, float16x8_t, 'q_lane']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, '3']]
       - FnCall:
           - simd_mul
           - - a
-            - FnCall: [simd_shuffle!, [b, b, "{type[3]}"]]
+            - FnCall: ['vdup{neon_type[0].laneq_nox}', [b], [LANE]]
 
 
   - name: "vmul{type[1]}_{type[0]}"
@@ -11270,7 +11038,7 @@ intrinsics:
       - *neon-fp16
       - *neon-unstable-f16
       - *target-not-arm64ec
-    assert_instr: [nop]
+    assert_instr: [fmul]
     safety: safe
     types:
       - [f16, 'h']
@@ -11285,7 +11053,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmul, 'LANE = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ['2']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const LANE: i32']
     safety: safe
     types:
@@ -11294,7 +11062,7 @@ intrinsics:
       - ["f64", float64x2_t, "d_laneq_f64", '1']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']]
-      - Let: [b, '{type[0]}', {FnCall: [simd_extract!, [b, 'LANE as u32']]}]
+      - Let: [b, '{type[0]}', {FnCall: ['vget{neon_type[1].lane_nox}', [b], [LANE]]}]
       - Identifier: ['a * b', Symbol]
 
 
@@ -11315,7 +11083,7 @@ intrinsics:
       - ["f16", float16x8_t, "h_laneq_f16", '3']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']]
-      - Let: [b, '{type[0]}', {FnCall: [simd_extract!, [b, 'LANE as u32']]}]
+      - Let: [b, '{type[0]}', {FnCall: ['vget{neon_type[1].lane_nox}', [b], [LANE]]}]
       - Identifier: ['a * b', Symbol]
 
 
@@ -11326,7 +11094,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [srshr, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['2']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -11341,8 +11109,8 @@ intrinsics:
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {type[2]}"]
     return_type: "{neon_type[0]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [smlsl2]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [smlsl2]]}]]
+      - *neon-stable
     safety: safe
     types:
       - [int32x4_t, int16x8_t, "i16"]
@@ -11355,8 +11123,8 @@ intrinsics:
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {type[2]}"]
     return_type: "{neon_type[0]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [umlsl2]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [umlsl2]]}]]
+      - *neon-stable
     safety: safe
     types:
       - [uint32x4_t, uint16x8_t, "u16"]
@@ -11369,46 +11137,46 @@ intrinsics:
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
     return_type: "{neon_type[0]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [smlsl2, 'LANE = 1']]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [smlsl2, 'LANE = 1']]}]]
       - FnCall: [rustc_legacy_const_generics, ['3']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const LANE: i32']
     safety: safe
     types:
-      - [int32x4_t, int16x8_t, int16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [int32x4_t, int16x8_t, int16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [int64x2_t, int32x4_t, int32x2_t, '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [int64x2_t, int32x4_t, int32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [int32x4_t, int16x8_t, int16x4_t, '2']
+      - [int32x4_t, int16x8_t, int16x8_t, '3']
+      - [int64x2_t, int32x4_t, int32x2_t, '1']
+      - [int64x2_t, int32x4_t, int32x4_t, '2']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']]
       - FnCall:
           - "vmlsl_high_{neon_type[1]}"
           - - a
             - b
-            - FnCall: [simd_shuffle!, [c, c, "{type[4]}"]]
+            - FnCall: ['vdupq_lane{neon_type[2].no}', [c], [LANE]]
 
   - name: "vmlsl_high_lane{neon_type[2].no}"
     doc: "Multiply-subtract long"
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
     return_type: "{neon_type[0]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [umlsl2, 'LANE = 1']]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [umlsl2, 'LANE = 1']]}]]
       - FnCall: [rustc_legacy_const_generics, ['3']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const LANE: i32']
     safety: safe
     types:
-      - [uint32x4_t, uint16x8_t, uint16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [uint32x4_t, uint16x8_t, uint16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [uint64x2_t, uint32x4_t, uint32x2_t, '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [uint64x2_t, uint32x4_t, uint32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [uint32x4_t, uint16x8_t, uint16x4_t, '2']
+      - [uint32x4_t, uint16x8_t, uint16x8_t, '3']
+      - [uint64x2_t, uint32x4_t, uint32x2_t, '1']
+      - [uint64x2_t, uint32x4_t, uint32x4_t, '2']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']]
       - FnCall:
           - "vmlsl_high_{neon_type[1]}"
           - - a
             - b
-            - FnCall: [simd_shuffle!, [c, c, "{type[4]}"]]
+            - FnCall: ['vdupq_lane{neon_type[2].no}', [c], [LANE]]
 
   - name: "vclt{neon_type[0].no}"
     doc: "Floating-point compare less than"
@@ -11416,7 +11184,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmgt]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [float64x1_t, uint64x1_t]
@@ -11430,19 +11198,19 @@ intrinsics:
     return_type: "{type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcmp]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["f32", "u32", 's_f32']
       - ["f64", "u64", 'd_f64']
     compose:
       - FnCall:
-          - simd_extract!
+          - 'vget_lane_{type[1]}'
           - - FnCall:
                 - "vclt_{type[0]}"
                 - - FnCall: ["vdup_n_{type[0]}", [a]]
                   - FnCall: ["vdup_n_{type[0]}", [b]]
-            - '0'
+          - - '0'
 
 
   - name: "vclt{type[2]}"
@@ -11459,29 +11227,38 @@ intrinsics:
       - ["f16", "u16", 'h_f16']
     compose:
       - FnCall:
-          - simd_extract!
+          - 'vget_lane_{type[1]}'
           - - FnCall:
                 - "vclt_{type[0]}"
                 - - FnCall: ["vdup_n_{type[0]}", [a]]
                   - FnCall: ["vdup_n_{type[0]}", [b]]
-            - '0'
+          - - '0'
 
-  - name: "vabdl_high_{neon_type[0]}"
-    doc: "Unsigned Absolute difference Long"
+  - name: "vabdl_high{neon_type[0].noq}"
+    doc: Unsigned Absolute difference Long
     arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
     return_type: "{neon_type[1]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [uabdl2]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [uabdl2]]}]]
     safety: safe
     types:
-      - [uint8x16_t, uint16x8_t, uint8x8_t, '[8, 9, 10, 11, 12, 13, 14, 15]']
-      - [uint16x8_t, uint32x4_t, uint16x4_t, '[4, 5, 6, 7]']
-      - [uint32x4_t, uint64x2_t, uint32x2_t, '[2, 3]']
+      - [uint8x16_t, uint16x8_t]
+      - [uint16x8_t, uint32x4_t]
+      - [uint32x4_t, uint64x2_t]
     compose:
-      - Let: [c, "{neon_type[2]}", {FnCall: [simd_shuffle!, [a, a, "{type[3]}"]]}]
-      - Let: [d, "{neon_type[2]}", {FnCall: [simd_shuffle!, [b, b, "{type[3]}"]]}]
-      - FnCall: [simd_cast, [{FnCall: ["vabd_{neon_type[0]}", [c, d]]}]]
+      - Let:
+          - c
+          - FnCall: ['vget_high_{neon_type[0]}', [a]]
+      - Let:
+          - d
+          - FnCall: ['vget_high_{neon_type[0]}', [b]]
+      - FnCall:
+          - simd_cast
+          - - FnCall:
+                - "vabd_{neon_type[0]}"
+                - - c
+                  - d
 
   - name: "vfms_n_f64"
     doc: "Floating-point fused Multiply-subtract to accumulator(vector)"
@@ -11489,7 +11266,7 @@ intrinsics:
     return_type: "{neon_type[0]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmsub]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [float64x1_t, "f64"]
@@ -11506,7 +11283,7 @@ intrinsics:
     return_type: "{neon_type[0]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmls]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [float64x2_t, "f64"]
@@ -11547,7 +11324,7 @@ intrinsics:
     return_type: "{type[2]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fminnmp]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ['s_f32', float32x2_t, "f32"]
@@ -11565,7 +11342,7 @@ intrinsics:
     return_type: "{type[2]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fmaxnmp]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ['s_f32', float32x2_t, "f32"]
@@ -11583,7 +11360,7 @@ intrinsics:
     return_type: "{type[2]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmp]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["s64", "i64", "u64"]
@@ -11603,7 +11380,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqdmulh, 'LANE = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ['2']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const LANE: i32']
     safety: safe
     types:
@@ -11618,14 +11395,14 @@ intrinsics:
           - - a
             - FnCall:
                 - "vdup{neon_type[0].N}"
-                - - FnCall: [simd_extract!, [b, 'LANE as u32']]
+                - - FnCall: ['vget{neon_type[1].lane_nox}', [b], [LANE]]
 
   - name: "vqabs{type[2]}"
     doc: "Signed saturating absolute value"
     arguments: ["a: {type[0]}"]
     return_type: "{type[0]}"
     attr:
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
       - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [sqabs]]}]]
     safety: safe
     types:
@@ -11633,16 +11410,16 @@ intrinsics:
       - ["i16", "s16", 'h_s16']
     compose:
       - FnCall:
-          - "simd_extract!"
+          - 'vget_lane_{type[0]}'
           - - FnCall: ["vqabs_{type[1]}", [{FnCall: ["vdup_n_{type[1]}", [a]]}]]
-            - '0'
+          - - '0'
 
   - name: "vqabs{type[1]}"
     doc: "Signed saturating absolute value"
     arguments: ["a: {type[0]}"]
     return_type: "{type[0]}"
     attr:
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
       - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]}, {FnCall: [assert_instr, [sqabs]]}]]
     safety: safe
     types:
@@ -11660,8 +11437,8 @@ intrinsics:
     arguments: ["a: {neon_type[0]}", "b: {type[1]}"]
     return_type: "{neon_type[2]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [smull2]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [smull2]]}]]
+      - *neon-stable
     safety: safe
     types:
       - [int16x8_t, "i16", int32x4_t]
@@ -11677,8 +11454,8 @@ intrinsics:
     arguments: ["a: {neon_type[0]}", "b: {type[1]}"]
     return_type: "{neon_type[2]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [umull2]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [umull2]]}]]
+      - *neon-stable
     safety: safe
     types:
       - [uint16x8_t, "u16", uint32x4_t]
@@ -11694,44 +11471,44 @@ intrinsics:
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
     return_type: "{neon_type[2]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [smull2, 'LANE = 1']]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [smull2, 'LANE = 1']]}]]
       - FnCall: [rustc_legacy_const_generics, ['2']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const LANE: i32']
     safety: safe
     types:
-      - [int16x8_t, int16x4_t, int32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [int16x8_t, int16x8_t, int32x4_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [int32x4_t, int32x2_t, int64x2_t, '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [int32x4_t, int32x4_t, int64x2_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [int16x8_t, int16x4_t, int32x4_t, '2']
+      - [int16x8_t, int16x8_t, int32x4_t, '3']
+      - [int32x4_t, int32x2_t, int64x2_t, '1']
+      - [int32x4_t, int32x4_t, int64x2_t, '2']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, "{type[3]}"]]
       - FnCall:
           - "vmull_high_{neon_type[0]}"
           - - a
-            - FnCall: [simd_shuffle!, [b, b, '{type[4]}']]
+            - FnCall: ['vdupq_lane{neon_type[1].no}', [b], [LANE]]
 
   - name: "vmull_high_lane{neon_type[1].no}"
     doc: "Multiply long"
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
     return_type: "{neon_type[2]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [umull2, 'LANE = 1']]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [umull2, 'LANE = 1']]}]]
       - FnCall: [rustc_legacy_const_generics, ['2']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const LANE: i32']
     safety: safe
     types:
-      - [uint16x8_t, uint16x4_t, uint32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [uint16x8_t, uint16x8_t, uint32x4_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [uint32x4_t, uint32x2_t, uint64x2_t, '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [uint32x4_t, uint32x4_t, uint64x2_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [uint16x8_t, uint16x4_t, uint32x4_t, '2']
+      - [uint16x8_t, uint16x8_t, uint32x4_t, '3']
+      - [uint32x4_t, uint32x2_t, uint64x2_t, '1']
+      - [uint32x4_t, uint32x4_t, uint64x2_t, '2']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, "{type[3]}"]]
       - FnCall:
           - "vmull_high_{neon_type[0]}"
           - - a
-            - FnCall: [simd_shuffle!, [b, b, '{type[4]}']]
+            - FnCall: ['vdupq_lane{neon_type[1].no}', [b], [LANE]]
 
   - name: "vrsqrte{neon_type.no}"
     doc: "Reciprocal square-root estimate."
@@ -11739,7 +11516,7 @@ intrinsics:
     return_type: "{neon_type}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [frsqrte]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - float64x1_t
@@ -11757,7 +11534,7 @@ intrinsics:
     return_type: "{type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [frsqrte]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["s_f32", "f32"]
@@ -11796,7 +11573,7 @@ intrinsics:
     return_type: "{neon_type}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fminnmp]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - float32x2_t
@@ -11816,7 +11593,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [sqshlu, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['1']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -11827,11 +11604,11 @@ intrinsics:
     compose:
       - FnCall: [static_assert_uimm_bits!, [N, "{type[3]}"]]
       - FnCall:
-          - simd_extract!
+          - 'vget_lane_{type[2]}'
           - - FnCall:
                 - "vqshlu_n_{type[4]}::<N>"
                 - - FnCall: ["vdup_n_{type[4]}", [a]]
-            - '0'
+          - - '0'
 
   - name: "vcvta{neon_type[1].no}_{neon_type[0]}"
     doc: "Floating-point convert to unsigned integer, rounding to nearest with ties to away"
@@ -11839,7 +11616,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtau]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [float32x2_t, uint32x2_t]
@@ -11861,7 +11638,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtau]]}]]
       - *neon-fp16
-      - *neon-unstable-f16
+      - *neon-stable-fp16
       - *target-not-arm64ec
     safety: safe
     types:
@@ -11881,7 +11658,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtzs]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [float64x1_t, int64x1_t]
@@ -11904,6 +11681,7 @@ intrinsics:
       - *target-not-arm64ec
     safety: safe
     types:
+      - ["f16", "i16", 'h']
       - ["f16", "i32", 'h']
       - ["f16", "i64", 'h']
     compose:
@@ -11914,22 +11692,6 @@ intrinsics:
             - link: "llvm.aarch64.neon.fcvtms.{type[1]}.{type[0]}"
               arch: aarch64,arm64ec
 
-  - name: "vcvtm{type[2]}_{type[1]}_{type[0]}"
-    doc: "Floating-point convert to integer, rounding towards minus infinity"
-    arguments: ["a: {type[0]}"]
-    return_type: "{type[1]}"
-    attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtms]]}]]
-      - *neon-fp16
-      - *neon-unstable-f16
-      - *target-not-arm64ec
-    safety: safe
-    types:
-      - ["f16", "i16", 'h', 'i32']
-    compose:
-      - 'vcvtmh_{type[3]}_f16(a) as i16'
-
-
   - name: "vcvtm{type[2]}_{type[1]}_{type[0]}"
     doc: "Floating-point convert to unsigned integer, rounding towards minus infinity"
     arguments: ["a: {type[0]}"]
@@ -11941,6 +11703,7 @@ intrinsics:
       - *target-not-arm64ec
     safety: safe
     types:
+      - ["f16", "u16", 'h']
       - ["f16", "u32", 'h']
       - ["f16", "u64", 'h']
     compose:
@@ -11951,28 +11714,13 @@ intrinsics:
             - link: "llvm.aarch64.neon.fcvtmu.{type[1]}.{type[0]}"
               arch: aarch64,arm64ec
 
-  - name: "vcvtm{type[2]}_{type[1]}_{type[0]}"
-    doc: "Floating-point convert to integer, rounding towards minus infinity"
-    arguments: ["a: {type[0]}"]
-    return_type: "{type[1]}"
-    attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [fcvtmu]]}]]
-      - *neon-fp16
-      - *neon-unstable-f16
-      - *target-not-arm64ec
-    safety: safe
-    types:
-      - ["f16", "u16", 'h', 'u32']
-    compose:
-      - 'vcvtmh_{type[3]}_f16(a) as u16'
-
   - name: "vmlal_high_n_{neon_type[1]}"
     doc: "Multiply-add long"
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {type[2]}"]
     return_type: "{neon_type[0]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [smlal2]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [smlal2]]}]]
+      - *neon-stable
     safety: safe
     types:
       - [int32x4_t, int16x8_t, "i16"]
@@ -11989,8 +11737,8 @@ intrinsics:
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {type[2]}"]
     return_type: "{neon_type[0]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [umlal2]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [umlal2]]}]]
+      - *neon-stable
     safety: safe
     types:
       - [uint32x4_t, uint16x8_t, "u16"]
@@ -12007,38 +11755,38 @@ intrinsics:
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
     return_type: "{neon_type[0]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [smlal2, 'LANE = 1']]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [smlal2, 'LANE = 1']]}]]
       - FnCall: [rustc_legacy_const_generics, ['3']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const LANE: i32']
     safety: safe
     types:
-      - [int32x4_t, int16x8_t, int16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [int32x4_t, int16x8_t, int16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [int64x2_t, int32x4_t, int32x2_t, '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [int64x2_t, int32x4_t, int32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [int32x4_t, int16x8_t, int16x4_t, '2']
+      - [int32x4_t, int16x8_t, int16x8_t, '3']
+      - [int64x2_t, int32x4_t, int32x2_t, '1']
+      - [int64x2_t, int32x4_t, int32x4_t, '2']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']]
-      - FnCall: ['vmlal_high_{neon_type[2]}', [a, b, {FnCall: [simd_shuffle!, [c, c, '{type[4]}']]}]]
+      - FnCall: ['vmlal_high_{neon_type[2]}', [a, b, {FnCall: ['vdupq_lane{neon_type[2].no}', [c], [LANE]]}]]
 
   - name: "vmlal_high_lane{neon_type[2].no}"
     doc: "Multiply-add long"
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
     return_type: "{neon_type[0]}"
     attr:
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [umlal2, 'LANE = 1']]}]]
+      - FnCall: [cfg_attr, [*all-test-little-endian, {FnCall: [assert_instr, [umlal2, 'LANE = 1']]}]]
       - FnCall: [rustc_legacy_const_generics, ['3']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const LANE: i32']
     safety: safe
     types:
-      - [uint32x4_t, uint16x8_t, uint16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [uint32x4_t, uint16x8_t, uint16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [uint64x2_t, uint32x4_t, uint32x2_t, '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [uint64x2_t, uint32x4_t, uint32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [uint32x4_t, uint16x8_t, uint16x4_t, '2']
+      - [uint32x4_t, uint16x8_t, uint16x8_t, '3']
+      - [uint64x2_t, uint32x4_t, uint32x2_t, '1']
+      - [uint64x2_t, uint32x4_t, uint32x4_t, '2']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']]
-      - FnCall: ['vmlal_high_{neon_type[2]}', [a, b, {FnCall: [simd_shuffle!, [c, c, '{type[4]}']]}]]
+      - FnCall: ['vmlal_high_{neon_type[2]}', [a, b, {FnCall: ['vdupq_lane{neon_type[2].no}', [c], [LANE]]}]]
 
   - name: "vrsrad_n_u64"
     doc: "Unsigned rounding shift right and accumulate."
@@ -12047,7 +11795,7 @@ intrinsics:
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [urshr, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['2']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     static_defs: ['const N: i32']
     safety: safe
     types:
@@ -12063,7 +11811,7 @@ intrinsics:
     return_type: "{neon_type}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [cmhs]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - uint64x1_t
@@ -12077,7 +11825,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ld4r]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety:
       unsafe: [neon]
     types:
@@ -12100,7 +11848,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ld4r]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety:
       unsafe: [neon]
     types:
@@ -12117,7 +11865,7 @@ intrinsics:
     attr:
       - FnCall: [target_feature, ['enable = "neon,aes"']]
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ld4r]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety:
       unsafe: [neon]
     types:
@@ -12133,21 +11881,29 @@ intrinsics:
     return_type: "{neon_type[0]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
-      - [int8x8_t, int8x8x4_t]
+      - [int8x8_t, 'int8x8x4_t', 'int8x16x2', 'int8x8', 'i8x8::splat(32)']
     compose:
-      - FnCall:
-          - "vqtbx2"
-          - - FnCall: [transmute, [a]]
-            - FnCall:
-                - transmute
-                - - FnCall: ["vcombine{neon_type[0].noq}", ["b.0", "b.1"]]
-            - FnCall:
-                - transmute
-                - - FnCall: ["vcombine{neon_type[0].noq}", ["b.2", "b.3"]]
-            - FnCall: [transmute, [c]]
+      - Let:
+        - x
+        - FnCall:
+            - '{type[2]}_t'
+            - - FnCall: ['vcombine{neon_type[0].no}', ['b.0', 'b.1']]
+              - FnCall: ['vcombine{neon_type[0].no}', ['b.2', 'b.3']]
+      - FnCall:
+        - simd_select
+        - - FnCall:
+              - "simd_lt::<{type[3]}_t, int8x8_t>"
+              - - c
+                - FnCall: [transmute, ["{type[4]}"]]
+          - FnCall:
+              - 'vqtbx2{neon_type[0].no}'
+              - - a
+                - x
+                - FnCall: ['vreinterpret_u8{neon_type[0].no}', [c]]
+          - a
 
   - name: "vtbx4{neon_type[0].no}"
     doc: "Extended table look-up"
@@ -12155,24 +11911,30 @@ intrinsics:
     return_type: "{neon_type[0]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
-      - [uint8x8_t, uint8x8x4_t, uint8x8_t]
-      - [poly8x8_t, poly8x8x4_t, uint8x8_t]
+      - [uint8x8_t, 'uint8x8x4_t', 'uint8x8_t', 'uint8x16x2', 'uint8x8', 'u8x8::splat(32)']
+      - [poly8x8_t, 'poly8x8x4_t', 'uint8x8_t', 'poly8x16x2', 'uint8x8', 'u8x8::splat(32)']
     compose:
-      - FnCall:
-          - transmute
-          - - FnCall:
-                - "vqtbx2"
-                - - FnCall: [transmute, [a]]
-                  - FnCall:
-                      - transmute
-                      - - FnCall: ["vcombine{neon_type[0].noq}", ["b.0", "b.1"]]
-                  - FnCall:
-                      - transmute
-                      - - FnCall: ["vcombine{neon_type[0].noq}", ["b.2", "b.3"]]
-                  - c
+      - Let:
+        - x
+        - FnCall:
+            - '{type[3]}_t'
+            - - FnCall: ['vcombine{neon_type[0].no}', ['b.0', 'b.1']]
+              - FnCall: ['vcombine{neon_type[0].no}', ['b.2', 'b.3']]
+      - FnCall:
+        - simd_select
+        - - FnCall:
+              - "simd_lt::<{type[4]}_t, int8x8_t>"
+              - - c
+                - FnCall: [transmute, ["{type[5]}"]]
+          - FnCall:
+              - 'vqtbx2{neon_type[0].no}'
+              - - a
+                - x
+                - c
+          - a
 
   - name: "vtbl1{neon_type[0].no}"
     doc: "Table look-up"
@@ -12180,7 +11942,7 @@ intrinsics:
     return_type: "{neon_type[0]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [int8x8_t, 'int8x8_t', 'unsafe {{ transmute(b) }}']
@@ -12195,26 +11957,21 @@ intrinsics:
                   - 'unsafe {{ crate::mem::zeroed() }}'
             - Identifier: ['{type[2]}', Symbol]
 
-  - name: "vtbl2{neon_type[1].noq}"
+  - name: "vtbl2{neon_type[2].no}"
     doc: "Table look-up"
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
-    return_type: "{neon_type[1]}"
+    return_type: "{neon_type[2]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
-      - [int8x8x2_t, 'int8x8_t']
+      - ['int8x8x2_t',  'int8x8_t',  'int8x8_t']
     compose:
       - FnCall:
-          - vqtbl1
-          - - FnCall:
-                - transmute
-                - - FnCall:
-                      - 'vcombine{neon_type[1].noq}'
-                      - - 'a.0'
-                        - 'a.1'
-            - FnCall: [transmute, [b]]
+        - 'vqtbl1{neon_type[2].noq}'
+        - - FnCall: ['vcombine{neon_type[2].noq}', ['a.0', 'a.1']]
+          - FnCall: ['vreinterpret_u8{neon_type[2].noq}', [b]]
 
   - name: "vtbl2{neon_type[2].no}"
     doc: "Table look-up"
@@ -12222,127 +11979,107 @@ intrinsics:
     return_type: "{neon_type[2]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
-      - [uint8x8x2_t, 'uint8x8_t', 'uint8x8_t']
-      - [poly8x8x2_t, 'uint8x8_t', 'poly8x8_t']
+      - ['uint8x8x2_t', 'uint8x8_t', 'uint8x8_t']
+      - ['poly8x8x2_t', 'uint8x8_t', 'poly8x8_t']
     compose:
       - FnCall:
-          - transmute
-          - - FnCall:
-                - vqtbl1
-                - - FnCall:
-                      - transmute
-                      - - FnCall:
-                            - 'vcombine{neon_type[2].noq}'
-                            - - 'a.0'
-                              - 'a.1'
-                  - b
+          - 'vqtbl1{neon_type[2].noq}'
+          - - FnCall: ['vcombine{neon_type[2].noq}', ['a.0', 'a.1']]
+            - b
 
   - name: "vtbl3{neon_type[1].no}"
     doc: "Table look-up"
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
-    return_type: "{neon_type[1]}"
+    return_type: "{neon_type[2]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
-      - [int8x8x3_t, 'int8x8_t', 'int8x16x2']
+      - ['int8x8x3_t', 'int8x8_t', 'int8x8_t', 'int8x16x2']
     compose:
       - Let:
-          - x
-          - FnCall:
-              - '{type[2]}_t'
-              - - FnCall: ['vcombine{neon_type[1].no}', ['a.0', 'a.1']]
-                - FnCall: ['vcombine{neon_type[1].no}', ['a.2', 'unsafe {{ crate::mem::zeroed() }}']]
+        - x
+        - FnCall:
+          - '{type[3]}_t'
+          - - FnCall: ['vcombine{neon_type[2].no}', ['a.0', 'a.1']]
+            - FnCall: ['vcombine{neon_type[2].no}', ['a.2', 'unsafe {{ crate::mem::zeroed() }}']]
       - FnCall:
-          - transmute
-          - - FnCall:
-                - vqtbl2
-                - - FnCall: [transmute, ['x.0']]
-                  - FnCall: [transmute, ['x.1']]
-                  - FnCall: [transmute, [b]]
+        - 'vqtbl2{neon_type[2].no}'
+        - - x
+          - FnCall: ['vreinterpret_u8{neon_type[2].noq}', [b]]
 
-  - name: "vtbl3{neon_type[3].no}"
+
+  - name: "vtbl3{neon_type[2].no}"
     doc: "Table look-up"
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
-    return_type: "{neon_type[3]}"
+    return_type: "{neon_type[2]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
-      - [uint8x8x3_t, 'uint8x8_t', 'uint8x16x2', 'uint8x8_t']
-      - [poly8x8x3_t, 'uint8x8_t', 'poly8x16x2', 'poly8x8_t']
-    big_endian_inverse: true
+      - [uint8x8x3_t, 'uint8x8_t', 'uint8x8_t', 'uint8x16x2']
+      - [poly8x8x3_t, 'uint8x8_t', 'poly8x8_t', 'poly8x16x2']
     compose:
       - Let:
-          - x
-          - FnCall:
-              - '{type[2]}_t'
-              - - FnCall: ['vcombine{neon_type[3].no}', ['a.0', 'a.1']]
-                - FnCall: ['vcombine{neon_type[3].no}', ['a.2', 'unsafe {{ crate::mem::zeroed() }}']]
-      - FnCall:
-          - transmute
-          - - FnCall:
-                - vqtbl2
-                - - FnCall: [transmute, ['x.0']]
-                  - FnCall: [transmute, ['x.1']]
-                  - b
+        - x
+        - FnCall:
+          - '{type[3]}_t'
+          - - FnCall: ['vcombine{neon_type[2].no}', ['a.0', 'a.1']]
+            - FnCall: ['vcombine{neon_type[2].no}', ['a.2', 'unsafe {{ crate::mem::zeroed() }}']]
+      - FnCall:
+        - 'vqtbl2{neon_type[2].no}'
+        - - x
+          - b
 
-  - name: "vtbl4{neon_type[1].no}"
+  - name: "vtbl4{neon_type[2].no}"
     doc: "Table look-up"
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
-    return_type: "{neon_type[1]}"
+    return_type: "{neon_type[2]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
-      - [int8x8x4_t, 'int8x8_t', 'int8x16x2']
+      - ['int8x8x4_t', 'int8x8_t', 'int8x8_t', 'int8x16x2']
     compose:
       - Let:
-          - x
-          - FnCall:
-              - '{type[2]}_t'
-              - - FnCall: ['vcombine{neon_type[1].no}', ['a.0', 'a.1']]
-                - FnCall: ['vcombine{neon_type[1].no}', ['a.2', 'a.3']]
+        - x
+        - FnCall:
+            - '{type[3]}_t'
+            - - FnCall: ['vcombine{neon_type[1].no}', ['a.0', 'a.1']]
+              - FnCall: ['vcombine{neon_type[1].no}', ['a.2', 'a.3']]
       - FnCall:
-          - transmute
-          - - FnCall:
-                - 'vqtbl2'
-                - - FnCall: [transmute, ['x.0']]
-                  - FnCall: [transmute, ['x.1']]
-                  - FnCall: [transmute, [b]]
+        - 'vqtbl2{neon_type[2].no}'
+        - - x
+          - FnCall: ['vreinterpret_u8{neon_type[2].noq}', [b]]
 
-  - name: "vtbl4{neon_type[3].no}"
+  - name: "vtbl4{neon_type[2].no}"
     doc: "Table look-up"
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
-    return_type: "{neon_type[3]}"
+    return_type: "{neon_type[2]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
-      - [uint8x8x4_t, 'uint8x8_t', 'uint8x16x2', 'uint8x8_t']
-      - [poly8x8x4_t, 'uint8x8_t', 'poly8x16x2', 'poly8x8_t']
-    big_endian_inverse: true
+      - [uint8x8x4_t, 'uint8x8_t', 'uint8x8_t', 'uint8x16x2']
+      - [poly8x8x4_t, 'uint8x8_t', 'poly8x8_t', 'poly8x16x2']
     compose:
       - Let:
-          - x
-          - FnCall:
-              - '{type[2]}_t'
-              - - FnCall: ['vcombine{neon_type[3].no}', ['a.0', 'a.1']]
-                - FnCall: ['vcombine{neon_type[3].no}', ['a.2', 'a.3']]
-      - FnCall:
-          - transmute
-          - - FnCall:
-                - 'vqtbl2'
-                - - FnCall: [transmute, ['x.0']]
-                  - FnCall: [transmute, ['x.1']]
-                  - b
+        - x
+        - FnCall:
+            - '{type[3]}_t'
+            - - FnCall: ['vcombine{neon_type[2].no}', ['a.0', 'a.1']]
+              - FnCall: ['vcombine{neon_type[2].no}', ['a.2', 'a.3']]
+      - FnCall:
+        - 'vqtbl2{neon_type[2].no}'
+        - - x
+          - b
 
   - name: "vqtbx1{neon_type[0].no}"
     doc: "Extended table look-up"
@@ -12350,7 +12087,7 @@ intrinsics:
     return_type: "{neon_type[0]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [int8x8_t, int8x16_t, uint8x8_t, vqtbx1]
@@ -12364,14 +12101,13 @@ intrinsics:
     return_type: "{neon_type[0]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [uint8x8_t, "uint8x16_t", uint8x8_t, "vqtbx1", "_u8"]
       - [poly8x8_t, "poly8x16_t", uint8x8_t, "vqtbx1", "_p8"]
       - [uint8x16_t, "uint8x16_t", uint8x16_t, "vqtbx1q", "q_u8"]
       - [poly8x16_t, "poly8x16_t", uint8x16_t, "vqtbx1q", "q_p8"]
-    big_endian_inverse: false
     compose:
       - FnCall:
           - transmute
@@ -12387,29 +12123,48 @@ intrinsics:
     return_type: "{neon_type[0]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
-      - [int8x8_t, "int8x8_t", "transmute(c)", "i8x8::splat(8)", "int8x8"]
-      - [uint8x8_t, "uint8x8_t", "c", "u8x8::splat(8)", "uint8x8"]
-      - [poly8x8_t, "uint8x8_t", "c", "u8x8::splat(8)", "uint8x8"]
+      - [int8x8_t, "int8x8_t", "int8x8", "i8x8::splat(8)"]
     compose:
       - FnCall:
-          - simd_select
-          - - FnCall:
-                - "simd_lt::<{type[4]}_t, int8x8_t>"
-                - - c
-                  - FnCall: [transmute, ["{type[3]}"]]
-            - FnCall:
-                - transmute
-                - - FnCall:
-                      - "vqtbx1"
-                      - - "transmute(a)"
-                        - FnCall:
-                            - transmute
-                            - - FnCall: ["vcombine{neon_type[0].no}", [b, "crate::mem::zeroed()"]]
-                        - "{type[2]}"
-            - a
+        - simd_select
+        - - FnCall:
+              - "simd_lt::<{type[2]}_t, int8x8_t>"
+              - - c
+                - FnCall: [transmute, ["{type[3]}"]]
+          - FnCall:
+            - 'vqtbx1{neon_type[0].no}'
+            - - a
+              - FnCall: ['vcombine{neon_type[0].no}', [b, 'crate::mem::zeroed()']]
+              - FnCall: ['vreinterpret_u8{neon_type[0].no}', [c]]
+          - a
+
+  - name: "vtbx1{neon_type[0].no}"
+    doc: "Extended table look-up"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}", "c: {neon_type[1]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
+      - *neon-stable
+    safety: safe
+    types:
+      - [uint8x8_t, "uint8x8_t", "uint8x8", "u8x8::splat(8)"]
+      - [poly8x8_t, "uint8x8_t", "uint8x8", "u8x8::splat(8)"]
+    compose:
+      - FnCall:
+        - simd_select
+        - - FnCall:
+              - "simd_lt::<{type[2]}_t, int8x8_t>"
+              - - c
+                - FnCall: [transmute, ["{type[3]}"]]
+          - FnCall:
+            - 'vqtbx1{neon_type[0].no}'
+            - - a
+              - FnCall: ['vcombine{neon_type[0].no}', [b, 'crate::mem::zeroed()']]
+              - c
+          - a
 
   - name: "vtbx2{neon_type[0].no}"
     doc: "Extended table look-up"
@@ -12417,18 +12172,23 @@ intrinsics:
     return_type: "{neon_type[0]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
-      - [int8x8_t, 'int8x8x2_t']
+      - [int8x8_t, 'int8x8x2_t', 'int8x8', 'i8x8::splat(16)']
     compose:
       - FnCall:
-          - vqtbx1
-          - - FnCall: [transmute, [a]]
-            - FnCall:
-                - transmute
-                - - FnCall: ["vcombine{neon_type[0].no}", ['b.0', 'b.1']]
-            - FnCall: [transmute, [c]]
+        - simd_select
+        - - FnCall:
+              - "simd_lt::<{type[2]}_t, int8x8_t>"
+              - - c
+                - FnCall: [transmute, ["{type[3]}"]]
+          - FnCall:
+              - 'vqtbx1{neon_type[0].no}'
+              - - a
+                - FnCall: ["vcombine{neon_type[0].no}", ['b.0', 'b.1']]
+                - FnCall: ['vreinterpret_u8{neon_type[0].no}', [c]]
+          - a
 
   - name: "vtbx2{neon_type[0].no}"
     doc: "Extended table look-up"
@@ -12436,21 +12196,24 @@ intrinsics:
     return_type: "{neon_type[0]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
-      - [uint8x8_t, 'uint8x8x2_t', uint8x8_t]
-      - [poly8x8_t, 'poly8x8x2_t', uint8x8_t]
+      - [uint8x8_t, 'uint8x8x2_t', uint8x8_t, 'uint8x8', 'u8x8::splat(16)']
+      - [poly8x8_t, 'poly8x8x2_t', uint8x8_t, 'uint8x8', 'u8x8::splat(16)']
     compose:
       - FnCall:
-          - transmute
-          - - FnCall:
-                - vqtbx1
-                - - FnCall: [transmute, [a]]
-                  - FnCall:
-                      - transmute
-                      - - FnCall: ["vcombine{neon_type[0].no}", ['b.0', 'b.1']]
-                  - c
+        - simd_select
+        - - FnCall:
+              - "simd_lt::<{type[3]}_t, int8x8_t>"
+              - - c
+                - FnCall: [transmute, ["{type[4]}"]]
+          - FnCall:
+              - 'vqtbx1{neon_type[0].no}'
+              - - a
+                - FnCall: ["vcombine{neon_type[0].no}", ['b.0', 'b.1']]
+                - c
+          - a
 
   - name: "vtbx3{neon_type[0].no}"
     doc: "Extended table look-up"
@@ -12458,34 +12221,29 @@ intrinsics:
     return_type: "{neon_type[0]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
-      - [int8x8_t, 'int8x8x3_t', 'int8x16x2', 'i8x8::splat(24)', 'int8x8']
+      - [int8x8_t, 'int8x8x3_t', 'int8x16x2', 'int8x8', 'i8x8::splat(24)']
     compose:
       - Let:
-         - x
-         - FnCall:
-             - '{type[2]}_t'
-             - - FnCall: ['vcombine{neon_type[0].no}', ['b.0', 'b.1']]
-               - FnCall: ['vcombine{neon_type[0].no}', ['b.2', 'unsafe {{ crate::mem::zeroed() }}']]
-      - FnCall:
-          - transmute
-          - - FnCall:
-                - simd_select
-                - - FnCall:
-                      - 'simd_lt::<{type[4]}_t, int8x8_t>'
-                      - - FnCall: [transmute, [c]]
-                        - FnCall: [transmute, ['{type[3]}']]
-                  - FnCall:
-                      - transmute
-                      - - FnCall:
-                            - 'vqtbx2'
-                            - - FnCall: [transmute, [a]]
-                              - FnCall: [transmute, ['x.0']]
-                              - FnCall: [transmute, ['x.1']]
-                              - FnCall: [transmute, [c]]
-                  - a
+        - x
+        - FnCall:
+            - '{type[2]}_t'
+            - - FnCall: ['vcombine{neon_type[0].no}', ['b.0', 'b.1']]
+              - FnCall: ['vcombine{neon_type[0].no}', ['b.2', 'unsafe {{ crate::mem::zeroed() }}']]
+      - FnCall:
+        - simd_select
+        - - FnCall:
+              - "simd_lt::<{type[3]}_t, int8x8_t>"
+              - - c
+                - FnCall: [transmute, ["{type[4]}"]]
+          - FnCall:
+              - 'vqtbx2{neon_type[0].no}'
+              - - a
+                - x
+                - FnCall: ['vreinterpret_u8{neon_type[0].no}', [c]]
+          - a
 
   - name: "vtbx3{neon_type[0].no}"
     doc: "Extended table look-up"
@@ -12493,12 +12251,11 @@ intrinsics:
     return_type: "{neon_type[0]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
-      - [uint8x8_t, 'uint8x8x3_t', 'uint8x16x2', 'u8x8::splat(24)', 'uint8x8']
-      - [poly8x8_t, 'poly8x8x3_t', 'poly8x16x2', 'u8x8::splat(24)', 'poly8x8']
-    big_endian_inverse: true
+      - [uint8x8_t, 'uint8x8x3_t', 'uint8x16x2', 'uint8x8', 'u8x8::splat(24)']
+      - [poly8x8_t, 'poly8x8x3_t', 'poly8x16x2', 'uint8x8', 'u8x8::splat(24)']
     compose:
       - Let:
          - x
@@ -12507,22 +12264,17 @@ intrinsics:
              - - FnCall: ['vcombine{neon_type[0].no}', ['b.0', 'b.1']]
                - FnCall: ['vcombine{neon_type[0].no}', ['b.2', 'unsafe {{ crate::mem::zeroed() }}']]
       - FnCall:
-          - transmute
-          - - FnCall:
-                - simd_select
-                - - FnCall:
-                      - 'simd_lt::<{type[4]}_t, int8x8_t>'
-                      - - FnCall: [transmute, [c]]
-                        - FnCall: [transmute, ['{type[3]}']]
-                  - FnCall:
-                      - transmute
-                      - - FnCall:
-                            - 'vqtbx2'
-                            - - FnCall: [transmute, [a]]
-                              - FnCall: [transmute, ['x.0']]
-                              - FnCall: [transmute, ['x.1']]
-                              - c
-                  - a
+        - simd_select
+        - - FnCall:
+              - "simd_lt::<{type[3]}_t, int8x8_t>"
+              - - c
+                - FnCall: [transmute, ["{type[4]}"]]
+          - FnCall:
+              - 'vqtbx2{neon_type[0].no}'
+              - - a
+                - x
+                - c
+          - a
 
   - name: "vqtbl1{neon_type[3].no}"
     doc: "Table look-up"
@@ -12530,7 +12282,7 @@ intrinsics:
     return_type: "{neon_type[3]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ['int8x16_t', uint8x8_t, 'vqtbl1', 'int8x8_t']
@@ -12544,14 +12296,13 @@ intrinsics:
     return_type: "{neon_type[3]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ['uint8x16_t', uint8x8_t, 'vqtbl1', 'uint8x8_t']
       - ['poly8x16_t', uint8x8_t, 'vqtbl1', 'poly8x8_t']
       - ['uint8x16_t', uint8x16_t, 'vqtbl1q', 'uint8x16_t']
       - ['poly8x16_t', uint8x16_t, 'vqtbl1q', 'poly8x16_t']
-    big_endian_inverse: false
     compose:
       - FnCall:
           - transmute
@@ -12566,7 +12317,7 @@ intrinsics:
     return_type: "{neon_type[3]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ['int8x16x2_t', uint8x8_t, 'vqtbl2', 'int8x8_t']
@@ -12580,7 +12331,7 @@ intrinsics:
     return_type: "{neon_type[3]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ['uint8x16x2_t', uint8x8_t, 'vqtbl2', 'uint8x8_t']
@@ -12602,7 +12353,7 @@ intrinsics:
     return_type: "{neon_type[0]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [int8x8_t, 'int8x16x2_t', uint8x8_t, 'vqtbx2']
@@ -12616,7 +12367,7 @@ intrinsics:
     return_type: "{neon_type[0]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [uint8x8_t, 'uint8x16x2_t', uint8x8_t, 'vqtbx2']
@@ -12639,7 +12390,7 @@ intrinsics:
     return_type: "{neon_type[0]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ['int8x8_t', 'int8x16x3_t', uint8x8_t, 'vqtbl3']
@@ -12653,7 +12404,7 @@ intrinsics:
     return_type: "{neon_type[0]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ['uint8x8_t', 'uint8x16x3_t', uint8x8_t, 'vqtbl3']
@@ -12676,7 +12427,7 @@ intrinsics:
     return_type: "{neon_type[0]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [int8x8_t, 'int8x16x3_t', uint8x8_t, 'vqtbx3']
@@ -12690,7 +12441,7 @@ intrinsics:
     return_type: "{neon_type[0]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [uint8x8_t, 'uint8x16x3_t', uint8x8_t, 'vqtbx3']
@@ -12714,7 +12465,7 @@ intrinsics:
     return_type: "{neon_type[3]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ['int8x16x4_t', uint8x8_t, 'vqtbl4', 'int8x8_t']
@@ -12728,7 +12479,7 @@ intrinsics:
     return_type: "{neon_type[3]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ['uint8x16x4_t', uint8x8_t, 'vqtbl4', 'uint8x8_t']
@@ -12752,7 +12503,7 @@ intrinsics:
     return_type: "{neon_type[0]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [int8x8_t, 'int8x16x4_t', uint8x8_t, 'vqtbx4']
@@ -12766,7 +12517,7 @@ intrinsics:
     return_type: "{neon_type[0]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [uint8x8_t, 'uint8x16x4_t', uint8x8_t, 'vqtbx4']
@@ -12792,7 +12543,7 @@ intrinsics:
     return_type: "{neon_type[3]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["vqtbl1", "int8x16_t", "uint8x8_t", "int8x8_t"]
@@ -12811,7 +12562,7 @@ intrinsics:
     return_type: "{neon_type[3]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["vqtbl2", "int8x16_t", "uint8x8_t", "int8x8_t"]
@@ -12830,7 +12581,7 @@ intrinsics:
     return_type: "{neon_type[3]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["vqtbl3", int8x16_t, uint8x8_t, int8x8_t]
@@ -12849,7 +12600,7 @@ intrinsics:
     return_type: "{neon_type[3]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbl]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - ["vqtbl4", int8x16_t, uint8x8_t, int8x8_t]
@@ -12868,7 +12619,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [vqtbx1, "int8x8_t", "int8x16_t", "uint8x8_t"]
@@ -12887,7 +12638,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [vqtbx2, "int8x8_t", "int8x16_t", "uint8x8_t"]
@@ -12906,7 +12657,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [vqtbx3, "int8x8_t", "int8x16_t", "uint8x8_t"]
@@ -12925,7 +12676,7 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [tbx]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety: safe
     types:
       - [vqtbx4, "int8x8_t", "int8x16_t", "uint8x8_t"]
@@ -12944,7 +12695,7 @@ intrinsics:
     attr:
       - FnCall: [target_feature, ['enable = "{type[2]}"']]
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [ldr]]}]]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety:
       unsafe: [neon]
     types:
@@ -13011,7 +12762,7 @@ intrinsics:
       - FnCall: [target_feature, ['enable = "{type[2]}"']]
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [str]]}]]
       - FnCall: [allow, ['clippy::cast_ptr_alignment']]
-      - FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
+      - *neon-stable
     safety:
       unsafe: [neon]
     types:
@@ -13205,26 +12956,6 @@ intrinsics:
             - link: "llvm.aarch64.neon.usqadd.{neon_type[1]}"
               arch: aarch64,arm64ec
 
-  - name: "vpadd{neon_type.no}"
-    doc: "Add Pairwise"
-    arguments: ["a: {neon_type}", "b: {neon_type}"]
-    return_type: "{neon_type}"
-    attr:
-      - *neon-stable
-    assert_instr: [addp]
-    safety: safe
-    types:
-      - int8x16_t
-      - int16x8_t
-      - int32x4_t
-      - int64x2_t
-    compose:
-      - LLVMLink:
-          name: "vpadd{neon_type.no}"
-          links:
-            - link: "llvm.aarch64.neon.addp.{neon_type}"
-              arch: aarch64,arm64ec
-
   - name: "vpadd{neon_type[0].no}"
     doc: "Add Pairwise"
     arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
@@ -13234,17 +12965,22 @@ intrinsics:
     assert_instr: [addp]
     safety: safe
     types:
-      - [uint8x16_t, int8x16_t]
-      - [uint16x8_t, int16x8_t]
-      - [uint32x4_t, int32x4_t]
-      - [uint64x2_t, int64x2_t]
+      - [int8x16_t, "16"]
+      - [int16x8_t, "8"]
+      - [int32x4_t, "4"]
+      - [int64x2_t, "2"]
+      - [uint8x16_t, "16"]
+      - [uint16x8_t, "8"]
+      - [uint32x4_t, "4"]
+      - [uint64x2_t, "2"]
     compose:
-      - FnCall:
-          - transmute
-          - - FnCall:
-                - 'vpadd{neon_type[1].no}'
-                - - FnCall: [transmute, [a]]
-                  - FnCall: [transmute, [b]]
+      - Let:
+        - even
+        - FnCall: ["simd_shuffle!", [a, b, "crate::core_arch::macros::even::<{type[1]}>()"]]
+      - Let:
+        - odd
+        - FnCall: ["simd_shuffle!", [a, b, "crate::core_arch::macros::odd::<{type[1]}>()"]]
+      - FnCall: [simd_add, [even, odd]]
 
   - name: "vpaddd_s64"
     doc: "Add pairwise"
@@ -13257,7 +12993,7 @@ intrinsics:
     types:
       - [int64x2_t, i64]
     compose:
-      - FnCall: [simd_reduce_add_unordered, [a]]
+      - FnCall: [simd_reduce_add_ordered, [a, 0]]
 
   - name: "vpaddd_u64"
     doc: "Add pairwise"
@@ -13270,7 +13006,7 @@ intrinsics:
     types:
       - [uint64x2_t, u64]
     compose:
-      - FnCall: [simd_reduce_add_unordered, [a]]
+      - FnCall: [simd_reduce_add_ordered, [a, 0]]
 
   - name: "vaddv{neon_type[0].no}"
     doc: "Add across vector"
@@ -13287,7 +13023,7 @@ intrinsics:
       - [int16x8_t, i16]
       - [int32x4_t, i32]
     compose:
-      - FnCall: [simd_reduce_add_unordered, [a]]
+      - FnCall: [simd_reduce_add_ordered, [a, 0]]
 
   - name: "vaddv{neon_type[0].no}"
     doc: "Add across vector"
@@ -13300,7 +13036,7 @@ intrinsics:
     types:
       - [int32x2_t, i32]
     compose:
-      - FnCall: [simd_reduce_add_unordered, [a]]
+      - FnCall: [simd_reduce_add_ordered, [a, 0]]
 
   - name: "vaddv{neon_type[0].no}"
     doc: "Add across vector"
@@ -13313,7 +13049,7 @@ intrinsics:
     types:
       - [int64x2_t, i64]
     compose:
-      - FnCall: [simd_reduce_add_unordered, [a]]
+      - FnCall: [simd_reduce_add_ordered, [a, 0]]
 
   - name: "vaddv{neon_type[0].no}"
     doc: "Add across vector"
@@ -13330,7 +13066,7 @@ intrinsics:
       - [uint16x8_t, u16]
       - [uint32x4_t, u32]
     compose:
-      - FnCall: [simd_reduce_add_unordered, [a]]
+      - FnCall: [simd_reduce_add_ordered, [a, 0]]
 
   - name: "vaddv{neon_type[0].no}"
     doc: "Add across vector"
@@ -13343,7 +13079,7 @@ intrinsics:
     types:
       - [uint32x2_t, u32, i32]
     compose:
-      - FnCall: [simd_reduce_add_unordered, [a]]
+      - FnCall: [simd_reduce_add_ordered, [a, 0]]
 
   - name: "vaddv{neon_type[0].no}"
     doc: "Add across vector"
@@ -13356,7 +13092,7 @@ intrinsics:
     types:
       - [uint64x2_t, u64, i64]
     compose:
-      - FnCall: [simd_reduce_add_unordered, [a]]
+      - FnCall: [simd_reduce_add_ordered, [a, 0]]
 
   - name: "vaddlv{neon_type[0].no}"
     doc: "Signed Add Long across Vector"
@@ -13376,7 +13112,7 @@ intrinsics:
           links:
             - link: "llvm.aarch64.neon.saddlv.i32.{neon_type[0]}"
               arch: aarch64,arm64ec
-      - Identifier: ["unsafe {{ _vaddlv{neon_type[0].no}(a) as i16 }}", Symbol]
+      - Identifier: ["_vaddlv{neon_type[0].no}(a) as i16", UnsafeSymbol]
 
   - name: "vaddlv{neon_type[0].no}"
     doc: "Unsigned Add Long across Vector"
@@ -13396,7 +13132,7 @@ intrinsics:
           links:
             - link: "llvm.aarch64.neon.uaddlv.i32.{neon_type[0]}"
               arch: aarch64,arm64ec
-      - Identifier: ["unsafe {{ _vaddlv{neon_type[0].no}(a) as u16 }}", Symbol]
+      - Identifier: ["_vaddlv{neon_type[0].no}(a) as u16", UnsafeSymbol]
 
   - name: "vmaxv{neon_type[0].no}"
     doc: "Horizontal vector max."
@@ -13702,26 +13438,17 @@ intrinsics:
     static_defs: ['const N: i32']
     safety: safe
     types:
-      - [int8x8_t,  'N >= 1 && N <= 8']
-      - [int8x16_t, 'N >= 1 && N <= 8']
-      - [int16x4_t, 'N >= 1 && N <= 16']
-      - [int16x8_t, 'N >= 1 && N <= 16']
-      - [int32x2_t, 'N >= 1 && N <= 32']
-      - [int32x4_t, 'N >= 1 && N <= 32']
-      - [int64x1_t, 'N >= 1 && N <= 64']
-      - [int64x2_t, 'N >= 1 && N <= 64']
+      - [int8x8_t, u8, '8', 'N >= 1 && N <= 8']
+      - [int8x16_t, u8, '16', 'N >= 1 && N <= 8']
+      - [int16x4_t, u16, '4', 'N >= 1 && N <= 16']
+      - [int16x8_t, u16, '8', 'N >= 1 && N <= 16']
+      - [int32x2_t, u32, '2', 'N >= 1 && N <= 32']
+      - [int32x4_t, u32, '4', 'N >= 1 && N <= 32']
+      - [int64x1_t, u64, '1', 'N >= 1 && N <= 64']
+      - [int64x2_t, u64, '2', 'N >= 1 && N <= 64']
     compose:
-      - FnCall: ['static_assert!', ['{type[1]}']]
-      - LLVMLink:
-          name: "vsri{neon_type[0].N}"
-          arguments:
-            - "a: {neon_type[0]}"
-            - "b: {neon_type[0]}"
-            - "n: i32"
-          links:
-            - link: "llvm.aarch64.neon.vsri.{neon_type[0]}"
-              arch: aarch64,arm64ec
-      - FnCall: ["_vsri{neon_type[0].N}", [a, b, N], [], true]
+      - FnCall: ['static_assert!', ['{type[3]}']]
+      - FnCall: ["super::shift_right_and_insert!", ['{type[1]}', '{type[2]}', N, a, b], [], true]
 
   - name: "vsri{neon_type[0].N}"
     doc: "Shift Right and Insert (immediate)"
@@ -13769,7 +13496,7 @@ intrinsics:
     attr:
       - *neon-fp16
       - *enable-fhm
-      - *neon-unstable-f16
+      - *neon-stable-fp16
       - *target-not-arm64ec
     assert_instr: [fmlal2]
     safety: safe
@@ -13784,7 +13511,7 @@ intrinsics:
               arch: aarch64,arm64ec
 
 
-  - name: "vfmlal{type[3]}{neon_type[1]}"
+  - name: "vfmlal{type[3]}_high_{neon_type[1]}"
     doc: "Floating-point fused Multiply-Add Long to accumulator (by element)."
     arguments: ["r: {neon_type[0]}", "a: {neon_type[1]}",  "b: {neon_type[2]}"]
     return_type: "{neon_type[0]}"
@@ -13793,23 +13520,22 @@ intrinsics:
       - *neon-fp16
       - *enable-fhm
       - FnCall: [rustc_legacy_const_generics, ['3']]
-      - *neon-unstable-f16
+      - *neon-stable-fp16
       - *target-not-arm64ec
     static_defs: ['const LANE: i32']
     safety: safe
     types:
-      - [float32x2_t, float16x4_t, float16x4_t, '_lane_high_', '_high_', '2']
-      - [float32x2_t, float16x4_t, float16x8_t, '_laneq_high_', '_high_', '3']
-      - [float32x4_t, float16x8_t, float16x4_t, 'q_lane_high_', 'q_high_', '2']
-      - [float32x4_t, float16x8_t, float16x8_t, 'q_laneq_high_', 'q_high_', '3']
+      - [float32x2_t, float16x4_t, float16x4_t, '_lane', '_high_', '2']
+      - [float32x2_t, float16x4_t, float16x8_t, '_laneq', '_high_', '3']
+      - [float32x4_t, float16x8_t, float16x4_t, 'q_lane', 'q_high_', '2']
+      - [float32x4_t, float16x8_t, float16x8_t, 'q_laneq', 'q_high_', '3']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, "{type[5]}"]]
       - FnCall:
           - "vfmlal{type[4]}{neon_type[1]}"
           - - r
             - a
-            - FnCall: ["vdup{neon_type[1].N}", [{FnCall: [simd_extract!, [b, 'LANE as u32']]}]]
-
+            - FnCall: ["vdup{type[3]}_{neon_type[2]}", [b], [LANE]]
 
   - name: "vfmlal{type[2]}{neon_type[1]}"
     doc: "Floating-point fused Multiply-Add Long to accumulator (vector)."
@@ -13818,7 +13544,7 @@ intrinsics:
     attr:
       - *neon-fp16
       - *enable-fhm
-      - *neon-unstable-f16
+      - *neon-stable-fp16
       - *target-not-arm64ec
     assert_instr: [fmlal]
     safety: safe
@@ -13833,7 +13559,7 @@ intrinsics:
               arch: aarch64,arm64ec
 
 
-  - name: "vfmlal{type[3]}{neon_type[1]}"
+  - name: "vfmlal{type[3]}_low_{neon_type[1]}"
     doc: "Floating-point fused Multiply-Add Long to accumulator (by element)."
     arguments: ["r: {neon_type[0]}", "a: {neon_type[1]}",  "b: {neon_type[2]}"]
     return_type: "{neon_type[0]}"
@@ -13842,23 +13568,22 @@ intrinsics:
       - *neon-fp16
       - *enable-fhm
       - FnCall: [rustc_legacy_const_generics, ['3']]
-      - *neon-unstable-f16
+      - *neon-stable-fp16
       - *target-not-arm64ec
     static_defs: ['const LANE: i32']
     safety: safe
     types:
-      - [float32x2_t, float16x4_t, float16x4_t, '_lane_low_', '_low_', '2']
-      - [float32x2_t, float16x4_t, float16x8_t, '_laneq_low_', '_low_', '3']
-      - [float32x4_t, float16x8_t, float16x4_t, 'q_lane_low_', 'q_low_', '2']
-      - [float32x4_t, float16x8_t, float16x8_t, 'q_laneq_low_', 'q_low_', '3']
+      - [float32x2_t, float16x4_t, float16x4_t, '_lane', '_low_', '2']
+      - [float32x2_t, float16x4_t, float16x8_t, '_laneq', '_low_', '3']
+      - [float32x4_t, float16x8_t, float16x4_t, 'q_lane', 'q_low_', '2']
+      - [float32x4_t, float16x8_t, float16x8_t, 'q_laneq', 'q_low_', '3']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, "{type[5]}"]]
       - FnCall:
           - "vfmlal{type[4]}{neon_type[1]}"
           - - r
             - a
-            - FnCall: ["vdup{neon_type[1].N}", [{FnCall: [simd_extract!, [b, 'LANE as u32']]}]]
-
+            - FnCall: ["vdup{type[3]}_{neon_type[2]}", [b], [LANE]]
 
   - name: "vfmlsl{type[2]}{neon_type[1]}"
     doc: "Floating-point fused Multiply-Subtract Long from accumulator (vector)."
@@ -13867,7 +13592,7 @@ intrinsics:
     attr:
       - *neon-fp16
       - *enable-fhm
-      - *neon-unstable-f16
+      - *neon-stable-fp16
       - *target-not-arm64ec
     assert_instr: [fmlsl2]
     safety: safe
@@ -13881,7 +13606,7 @@ intrinsics:
             - link: "llvm.aarch64.neon.fmlsl2.{neon_type[0]}.{neon_type[1]}"
               arch: aarch64,arm64ec
 
-  - name: "vfmlsl{type[3]}{neon_type[1]}"
+  - name: "vfmlsl{type[3]}_high_{neon_type[1]}"
     doc: "Floating-point fused Multiply-Subtract Long from accumulator (by element)."
     arguments: ["r: {neon_type[0]}", "a: {neon_type[1]}",  "b: {neon_type[2]}"]
     return_type: "{neon_type[0]}"
@@ -13890,23 +13615,22 @@ intrinsics:
       - *neon-fp16
       - *enable-fhm
       - FnCall: [rustc_legacy_const_generics, ['3']]
-      - *neon-unstable-f16
+      - *neon-stable-fp16
       - *target-not-arm64ec
     static_defs: ['const LANE: i32']
     safety: safe
     types:
-      - [float32x2_t, float16x4_t, float16x4_t, '_lane_high_', '_high_', '2']
-      - [float32x2_t, float16x4_t, float16x8_t, '_laneq_high_', '_high_', '3']
-      - [float32x4_t, float16x8_t, float16x4_t, 'q_lane_high_', 'q_high_', '2']
-      - [float32x4_t, float16x8_t, float16x8_t, 'q_laneq_high_', 'q_high_', '3']
+      - [float32x2_t, float16x4_t, float16x4_t, '_lane', '_high_', '2']
+      - [float32x2_t, float16x4_t, float16x8_t, '_laneq', '_high_', '3']
+      - [float32x4_t, float16x8_t, float16x4_t, 'q_lane', 'q_high_', '2']
+      - [float32x4_t, float16x8_t, float16x8_t, 'q_laneq', 'q_high_', '3']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, "{type[5]}"]]
       - FnCall:
           - "vfmlsl{type[4]}{neon_type[1]}"
           - - r
             - a
-            - FnCall: ["vdup{neon_type[1].N}", [{FnCall: [simd_extract!, [b, 'LANE as u32']]}]]
-
+            - FnCall: ["vdup{type[3]}_{neon_type[2]}", [b], [LANE]]
 
   - name: "vfmlsl{type[2]}{neon_type[1]}"
     doc: "Floating-point fused Multiply-Subtract Long from accumulator (vector)."
@@ -13915,7 +13639,7 @@ intrinsics:
     attr:
       - *neon-fp16
       - *enable-fhm
-      - *neon-unstable-f16
+      - *neon-stable-fp16
       - *target-not-arm64ec
     assert_instr: [fmlsl]
     safety: safe
@@ -13929,7 +13653,7 @@ intrinsics:
             - link: "llvm.aarch64.neon.fmlsl.{neon_type[0]}.{neon_type[1]}"
               arch: aarch64,arm64ec
 
-  - name: "vfmlsl{type[3]}{neon_type[1]}"
+  - name: "vfmlsl{type[3]}_low_{neon_type[1]}"
     doc: "Floating-point fused Multiply-Subtract Long from accumulator (by element)."
     arguments: ["r: {neon_type[0]}", "a: {neon_type[1]}",  "b: {neon_type[2]}"]
     return_type: "{neon_type[0]}"
@@ -13938,22 +13662,22 @@ intrinsics:
       - *neon-fp16
       - *enable-fhm
       - FnCall: [rustc_legacy_const_generics, ['3']]
-      - *neon-unstable-f16
+      - *neon-stable-fp16
       - *target-not-arm64ec
     static_defs: ['const LANE: i32']
     safety: safe
     types:
-      - [float32x2_t, float16x4_t, float16x4_t, '_lane_low_', '_low_', '2']
-      - [float32x2_t, float16x4_t, float16x8_t, '_laneq_low_', '_low_', '3']
-      - [float32x4_t, float16x8_t, float16x4_t, 'q_lane_low_', 'q_low_', '2']
-      - [float32x4_t, float16x8_t, float16x8_t, 'q_laneq_low_', 'q_low_', '3']
+      - [float32x2_t, float16x4_t, float16x4_t, '_lane', '_low_', '2']
+      - [float32x2_t, float16x4_t, float16x8_t, '_laneq', '_low_', '3']
+      - [float32x4_t, float16x8_t, float16x4_t, 'q_lane', 'q_low_', '2']
+      - [float32x4_t, float16x8_t, float16x8_t, 'q_laneq', 'q_low_', '3']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, "{type[5]}"]]
       - FnCall:
           - "vfmlsl{type[4]}{neon_type[1]}"
           - - r
             - a
-            - FnCall: ["vdup{neon_type[1].N}", [{FnCall: [simd_extract!, [b, 'LANE as u32']]}]]
+            - FnCall: ["vdup{type[3]}_{neon_type[2]}", [b], [LANE]]
 
   - name: "vamax{neon_type.no}"
     doc: "Multi-vector floating-point absolute maximum"
@@ -13961,10 +13685,12 @@ intrinsics:
     return_type: "{neon_type}"
     attr:
       - FnCall: [target_feature, ['enable = "neon,faminmax"']]
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env= "msvc"']]}]]}, {FnCall: [assert_instr, [famax]]}]]
       - FnCall: [unstable, ['feature = "faminmax"', 'issue = "137933"']]
     safety: safe
     types:
+      - float16x4_t
+      - float16x8_t
       - float32x2_t
       - float32x4_t
       - float64x2_t
@@ -13981,10 +13707,12 @@ intrinsics:
     return_type: "{neon_type}"
     attr:
       - FnCall: [target_feature, ['enable = "neon,faminmax"']]
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop]]}]]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env= "msvc"']]}]]}, {FnCall: [assert_instr, [famin]]}]]
       - FnCall: [unstable, ['feature = "faminmax"', 'issue = "137933"']]
     safety: safe
     types:
+      - float16x4_t
+      - float16x8_t
       - float32x2_t
       - float32x4_t
       - float64x2_t
@@ -14025,36 +13753,101 @@ intrinsics:
               arch: aarch64,arm64ec
       - FnCall: ['_vluti2{neon_type[0].lane_nox}', [a, b, LANE]]
 
+  - name: "vluti2{neon_type[0].laneq_nox}"
+    doc: "Lookup table read with 2-bit indices"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[2]}"
+    attr:
+      - FnCall: [target_feature, ['enable = {type[4]}']]
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'INDEX = 1']]}]]
+      - *neon-unstable-feat-lut
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+    static_defs: ["const INDEX: i32"]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int8x8_t,   uint8x16_t, int8x16_t, 'INDEX >= 0 && INDEX <= 3', '"neon,lut"']
+      - [int8x16_t,  uint8x16_t, int8x16_t, 'INDEX >= 0 && INDEX <= 3', '"neon,lut"']
+      - [int16x4_t,  uint8x16_t, int16x8_t, 'INDEX >= 0 && INDEX <= 7', '"neon,lut"']
+      - [int16x8_t,  uint8x16_t, int16x8_t, 'INDEX >= 0 && INDEX <= 7', '"neon,lut"']
+    compose:
+      - FnCall: ['static_assert!', ['{type[3]}']]
+      - LLVMLink:
+          name: "vluti2{neon_type[0].laneq_nox}"
+          arguments:
+            - 'a: {neon_type[0]}'
+            - 'b: {neon_type[1]}'
+            - 'n: i32'
+          links:
+            - link: "llvm.aarch64.neon.vluti2.laneq.{neon_type[2]}.{neon_type[0]}"
+              arch: aarch64,arm64ec
+      - FnCall: ['_vluti2{neon_type[0].laneq_nox}', [a, b, INDEX]]
+
   - name: "vluti2{neon_type[0].lane_nox}"
     doc: "Lookup table read with 2-bit indices"
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
     return_type: "{neon_type[2]}"
     attr:
       - FnCall: [target_feature, ['enable = "neon,lut"']]
-      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'LANE = 1']]}]]
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'INDEX = 1']]}]]
       - *neon-unstable-feat-lut
       - FnCall: [rustc_legacy_const_generics, ['2']]
-    static_defs: ["const LANE: i32"]
+    static_defs: ["const INDEX: i32"]
     safety:
       unsafe: [neon]
     types:
-      - [uint8x8_t,  uint8x8_t, uint8x16_t, 'LANE >= 0 && LANE <= 1', 'int8x8_t']
-      - [uint8x16_t, uint8x8_t, uint8x16_t, 'LANE >= 0 && LANE <= 1', 'int8x16_t']
-      - [poly8x8_t,  uint8x8_t, poly8x16_t, 'LANE >= 0 && LANE <= 1', 'int8x8_t']
-      - [poly8x16_t, uint8x8_t, poly8x16_t, 'LANE >= 0 && LANE <= 1', 'int8x16_t']
-      - [uint16x4_t, uint8x8_t, uint16x8_t, 'LANE >= 0 && LANE <= 3', 'int16x4_t']
-      - [uint16x8_t, uint8x8_t, uint16x8_t, 'LANE >= 0 && LANE <= 3', 'int16x8_t']
-      - [poly16x4_t, uint8x8_t, poly16x8_t, 'LANE >= 0 && LANE <= 3', 'int16x4_t']
-      - [poly16x8_t, uint8x8_t, poly16x8_t, 'LANE >= 0 && LANE <= 3', 'int16x8_t']
+      - [uint8x8_t,  uint8x8_t, uint8x16_t, 'INDEX >= 0 && INDEX <= 1', 'int8x8_t']
+      - [uint8x16_t, uint8x8_t, uint8x16_t, 'INDEX >= 0 && INDEX <= 1', 'int8x16_t']
+      - [poly8x8_t,  uint8x8_t, poly8x16_t, 'INDEX >= 0 && INDEX <= 1', 'int8x8_t']
+      - [poly8x16_t, uint8x8_t, poly8x16_t, 'INDEX >= 0 && INDEX <= 1', 'int8x16_t']
+      - [uint16x4_t, uint8x8_t, uint16x8_t, 'INDEX >= 0 && INDEX <= 3', 'int16x4_t']
+      - [uint16x8_t, uint8x8_t, uint16x8_t, 'INDEX >= 0 && INDEX <= 3', 'int16x8_t']
+      - [poly16x4_t, uint8x8_t, poly16x8_t, 'INDEX >= 0 && INDEX <= 3', 'int16x4_t']
+      - [poly16x8_t, uint8x8_t, poly16x8_t, 'INDEX >= 0 && INDEX <= 3', 'int16x8_t']
+      - [float16x4_t, uint8x8_t, float16x8_t, 'INDEX >= 0 && INDEX <= 3', 'int16x4_t']
+      - [float16x8_t, uint8x8_t, float16x8_t, 'INDEX >= 0 && INDEX <= 3', 'int16x8_t']
     compose:
       - FnCall: ['static_assert!', ['{type[3]}']]
       - FnCall:
           - transmute
           - - FnCall:
-                - 'vluti2{neon_type[4].lane_nox}::<LANE>'
+                - 'vluti2{neon_type[4].lane_nox}::<INDEX>'
                 - - FnCall: [transmute, [a]]
                   - b
 
+  - name: "vluti2{neon_type[0].laneq_nox}"
+    doc: "Lookup table read with 2-bit indices"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[2]}"
+    attr:
+      - FnCall: [target_feature, ['enable = "neon,lut"']]
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'INDEX = 1']]}]]
+      - *neon-unstable-feat-lut
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+    static_defs: ["const INDEX: i32"]
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint8x8_t,  uint8x16_t, uint8x16_t, 'INDEX >= 0 && INDEX <= 3', 'int8x8_t']
+      - [uint8x16_t, uint8x16_t, uint8x16_t, 'INDEX >= 0 && INDEX <= 3', 'int8x16_t']
+      - [poly8x8_t,  uint8x16_t, poly8x16_t, 'INDEX >= 0 && INDEX <= 3', 'int8x8_t']
+      - [poly8x16_t, uint8x16_t, poly8x16_t, 'INDEX >= 0 && INDEX <= 3', 'int8x16_t']
+      - [uint16x4_t, uint8x16_t, uint16x8_t, 'INDEX >= 0 && INDEX <= 7', 'int16x4_t']
+      - [uint16x8_t, uint8x16_t, uint16x8_t, 'INDEX >= 0 && INDEX <= 7', 'int16x8_t']
+      - [poly16x4_t, uint8x16_t, poly16x8_t, 'INDEX >= 0 && INDEX <= 7', 'int16x4_t']
+      - [poly16x8_t, uint8x16_t, poly16x8_t, 'INDEX >= 0 && INDEX <= 7', 'int16x8_t']
+      - [float16x4_t, uint8x16_t, float16x8_t, 'INDEX >= 0 && INDEX <= 7', 'int16x4_t']
+      - [float16x8_t, uint8x16_t, float16x8_t, 'INDEX >= 0 && INDEX <= 7', 'int16x8_t']
+    compose:
+      - FnCall: ['static_assert!', ['{type[3]}']]
+      - FnCall:
+          - transmute
+          - - FnCall:
+                - 'vluti2{neon_type[4].laneq_nox}::<INDEX>'
+                - - FnCall: [transmute, [a]]
+                  - b
+
+
   - name: "vluti4{neon_type[0].lane_nox}"
     doc: "Lookup table read with 4-bit indices"
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
@@ -14263,6 +14056,28 @@ intrinsics:
                 - - FnCall: [transmute, [a]]
                   - b
 
+  - name: "vscale{neon_type[0].no}"
+    doc: "Multi-vector floating-point adjust exponent"
+    arguments: ["vn: {type[0]}", "vm: {type[1]}"]
+    return_type: "{type[0]}"
+    attr:
+      - *neon-unstable-fp8
+      - FnCall: [target_feature, ['enable = "neon,fp8"']]
+      - FnCall: [cfg_attr, [{FnCall: [all, [test, {FnCall: [not, ['target_env= "msvc"']]}]]}, {FnCall: [assert_instr, [fscale]]}]]
+    safety: safe
+    types:
+      - [float16x4_t, int16x4_t]
+      - [float16x8_t, int16x8_t]
+      - [float32x2_t, int32x2_t]
+      - [float32x4_t, int32x4_t]
+      - [float64x2_t, int64x2_t]
+    compose:
+      - LLVMLink:
+          name: "vscale{neon_type[0].no}"
+          links:
+            - link: "llvm.aarch64.neon.fp8.fscale.{neon_type[0]}"
+              arch: aarch64,arm64ec
+
   - name: "__jcvt"
     doc: "Floating-point JavaScript convert to signed fixed-point, rounding toward zero"
     arguments: ["a: {type}"]
@@ -14270,7 +14085,7 @@ intrinsics:
     attr:
       - FnCall: [target_feature, ['enable = "jsconv"']]
       - FnCall: [cfg_attr, [test, { FnCall: [assert_instr, ["fjcvtzs"]] }]]
-      - *aarch64-unstable-jscvt
+      - *aarch64-stable-jscvt
     safety: safe
     types:
       - f64
@@ -14280,3 +14095,46 @@ intrinsics:
           links:
             - link: "llvm.aarch64.fjcvtzs"
               arch: aarch64,arm64ec
+
+  - name: "{type[0]}"
+    doc: "Duplicate vector element to vector or scalar"
+    arguments: ["a: {type[1]}"]
+    return_type: "{neon_type[2]}"
+    attr:
+      - *neon-stable
+    assert_instr: ['{type[3]}']
+    safety: safe
+    types:
+      - ['vget_high_f64', 'float64x2_t', 'float64x1_t', 'fmov', 'float64x1_t([simd_extract!(a, 1)])']
+      - ['vget_low_f64', 'float64x2_t', 'float64x1_t', 'nop', 'float64x1_t([simd_extract!(a, 0)])']
+    compose:
+      - Identifier: ['{type[4]}', UnsafeSymbol]
+
+  - name: "vcombine{neon_type[0].noq}"
+    doc: Join two smaller vectors into a single larger vector
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
+    return_type: "{neon_type[1]}"
+    attr:
+      - *neon-stable
+    assert_instr: [mov]
+    safety: safe
+    types:
+      - [float64x1_t, float64x2_t, '[0, 1]']
+    compose:
+      - FnCall: [simd_shuffle!, [a, b, '{type[2]}']]
+
+  - name: "vgetq_lane_{type[1]}"
+    doc: "Duplicate vector element to vector or scalar"
+    arguments: ["a: {type[0]}"]
+    return_type: "{type[1]}"
+    attr:
+      - *neon-stable
+      - FnCall: [rustc_legacy_const_generics, ['1']]
+    assert_instr: [['nop', 'IMM5 = 0']]
+    safety: safe
+    static_defs: ["const IMM5: i32"]
+    types:
+      - ['float64x2_t', 'f64']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [IMM5, "1"]]
+      - FnCall: ['simd_extract!', [a, 'IMM5 as u32'], [] , true]
diff --git a/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml b/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml
index 43dd3b9031..972d838f42 100644
--- a/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml
+++ b/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml
@@ -10,6 +10,10 @@ auto_big_endian: true
 neon-stable: &neon-stable
   FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]
 
+# #[stable(feature = "stdarch_neon_fp16", since = "1.94.0")]
+neon-stable-fp16: &neon-stable-fp16
+  FnCall: [stable, ['feature = "stdarch_neon_fp16"', 'since = "1.94.0"']]
+
 # #[cfg_attr(target_arch = "arm", unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800"))]
 neon-cfg-arm-unstable: &neon-cfg-arm-unstable
   FnCall: ['cfg_attr', ['target_arch = "arm"', {FnCall: ['unstable', ['feature = "stdarch_arm_neon_intrinsics"', 'issue = "111800"']]}]]
@@ -51,6 +55,10 @@ neon-target-aarch64-arm64ec: &neon-target-aarch64-arm64ec
 neon-not-arm-stable: &neon-not-arm-stable
   FnCall: [cfg_attr, [{ FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "neon_intrinsics"', 'since = "1.59.0"']]}]]
 
+# #[cfg_attr(not(target_arch = "arm"), stable(feature = "stdarch_neon_fp16", since = "1.94.0"))]
+neon-not-arm-stable-fp16: &neon-not-arm-stable-fp16
+  FnCall: [cfg_attr, [{ FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [stable, ['feature = "stdarch_neon_fp16"', 'since = "1.94.0"']]}]]
+
 # #[cfg_attr(all(test, not(target_env = "msvc"))]
 msvc-disabled: &msvc-disabled
   FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]
@@ -71,6 +79,10 @@ neon-i8mm: &neon-i8mm
 neon-fp16: &neon-fp16
   FnCall: [target_feature, ['enable = "neon,fp16"']]
 
+# #[cfg_attr[target_arch = "arm", target_feature(enable = "neon,fp16")]
+arm-fp16: &arm-fp16
+  FnCall: [cfg_attr, ['target_arch = "arm"', {FnCall: [target_feature, ['enable = "fp16"']]}]]
+
 enable-fcma: &enable-fcma
   FnCall: [cfg_attr, [{ FnCall: [not, ['target_arch = "arm"']]}, { FnCall: [target_feature, ['enable = "fcma"']] }]]
 
@@ -92,6 +104,10 @@ aarch64-crc-stable: &aarch64-crc-stable
 neon-unstable-f16: &neon-unstable-f16
   FnCall: [unstable, ['feature = "stdarch_neon_f16"', 'issue = "136306"']]
 
+# all(target_endian = "little")
+all-neon-target-aarch64-arm64ec-little-endian: &all-neon-target-aarch64-arm64ec-little-endian
+  FnCall: [all, [test, {FnCall: [any, ['target_arch = "aarch64"', 'target_arch = "arm64ec"']]}, 'target_endian = "little"']]
+
 intrinsics:
   - name: "vand{neon_type.no}"
     doc: Vector bitwise and
@@ -281,7 +297,8 @@ intrinsics:
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vabd.f16"']]}]]
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fabd]]}]]
       - *neon-fp16
-      - *neon-unstable-f16
+      - *neon-not-arm-stable-fp16
+      - *neon-cfg-arm-unstable
       - *target-not-arm64ec
     safety: safe
     types:
@@ -400,7 +417,8 @@ intrinsics:
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vceq.f16"']]}]]
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fcmeq]]}]]
       - *neon-fp16
-      - *neon-unstable-f16
+      - *neon-not-arm-stable-fp16
+      - *neon-cfg-arm-unstable
       - *target-not-arm64ec
     safety: safe
     types:
@@ -462,7 +480,8 @@ intrinsics:
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vabs]]}]]
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fabs]]}]]
       - *neon-fp16
-      - *neon-unstable-f16
+      - *neon-not-arm-stable-fp16
+      - *neon-cfg-arm-unstable
       - *target-not-arm64ec
     safety: safe
     types:
@@ -487,11 +506,11 @@ intrinsics:
       - ['h_f16', 'f16']
     compose:
       - FnCall:
-          - simd_extract!
+          - 'vget_lane_{type[1]}'
           - - FnCall:
                 - "vabs_{type[1]}"
                 - - FnCall: ["vdup_n_{type[1]}", [a]]
-            - 0
+          - - 0
 
   - name: "vcgt{neon_type[0].no}"
     doc: "Compare signed greater than"
@@ -562,7 +581,8 @@ intrinsics:
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vcgt.f16"']]}]]
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fcmgt]]}]]
       - *neon-fp16
-      - *neon-unstable-f16
+      - *neon-not-arm-stable-fp16
+      - *neon-cfg-arm-unstable
       - *target-not-arm64ec
     safety: safe
     types:
@@ -581,7 +601,8 @@ intrinsics:
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vcgt.f16"']]}]]
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fcmgt]]}]]
       - *neon-fp16
-      - *neon-unstable-f16
+      - *neon-not-arm-stable-fp16
+      - *neon-cfg-arm-unstable
       - *target-not-arm64ec
     safety: safe
     types:
@@ -660,7 +681,8 @@ intrinsics:
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vcge.f16"']]}]]
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fcmge]]}]]
       - *neon-fp16
-      - *neon-unstable-f16
+      - *neon-not-arm-stable-fp16
+      - *neon-cfg-arm-unstable
       - *target-not-arm64ec
     safety: safe
     types:
@@ -678,7 +700,8 @@ intrinsics:
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vcle.f16"']]}]]
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fcmle]]}]]
       - *neon-fp16
-      - *neon-unstable-f16
+      - *neon-not-arm-stable-fp16
+      - *neon-cfg-arm-unstable
       - *target-not-arm64ec
     safety: safe
     types:
@@ -860,7 +883,8 @@ intrinsics:
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vacgt.f16"']]}]]
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [facgt]]}]]
       - *neon-fp16
-      - *neon-unstable-f16
+      - *neon-not-arm-stable-fp16
+      - *neon-cfg-arm-unstable
       - *target-not-arm64ec
     safety: safe
     types:
@@ -907,7 +931,8 @@ intrinsics:
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vacge.f16"']]}]]
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [facge]]}]]
       - *neon-fp16
-      - *neon-unstable-f16
+      - *neon-not-arm-stable-fp16
+      - *neon-cfg-arm-unstable
       - *target-not-arm64ec
     safety: safe
     types:
@@ -948,7 +973,8 @@ intrinsics:
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vacgt.f16"']]}]]
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [facgt]]}]]
       - *neon-fp16
-      - *neon-unstable-f16
+      - *neon-not-arm-stable-fp16
+      - *neon-cfg-arm-unstable
       - *target-not-arm64ec
     safety: safe
     types:
@@ -984,7 +1010,8 @@ intrinsics:
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vacge.f16"']]}]]
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [facge]]}]]
       - *neon-fp16
-      - *neon-unstable-f16
+      - *neon-not-arm-stable-fp16
+      - *neon-cfg-arm-unstable
       - *target-not-arm64ec
     safety: safe
     types:
@@ -1019,7 +1046,8 @@ intrinsics:
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vcvt]]}]]
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [scvtf]]}]]
       - *neon-fp16
-      - *neon-unstable-f16
+      - *neon-not-arm-stable-fp16
+      - *neon-cfg-arm-unstable
       - *target-not-arm64ec
     safety: safe
     types:
@@ -1054,7 +1082,8 @@ intrinsics:
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vcvt]]}]]
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ucvtf]]}]]
       - *neon-fp16
-      - *neon-unstable-f16
+      - *neon-not-arm-stable-fp16
+      - *neon-cfg-arm-unstable
       - *target-not-arm64ec
     safety: safe
     types:
@@ -1126,7 +1155,8 @@ intrinsics:
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ucvtf, 'N = 1']]}]]
       - FnCall: [rustc_legacy_const_generics, ['1']]
       - *neon-fp16
-      - *neon-unstable-f16
+      - *neon-not-arm-stable-fp16
+      - *neon-cfg-arm-unstable
       - *target-not-arm64ec
     static_defs: ['const N: i32']
     safety: safe
@@ -1158,7 +1188,8 @@ intrinsics:
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fcvtzs, 'N = 1']]}]]
       - FnCall: [rustc_legacy_const_generics, ['1']]
       - *neon-fp16
-      - *neon-unstable-f16
+      - *neon-not-arm-stable-fp16
+      - *neon-cfg-arm-unstable
       - *target-not-arm64ec
     static_defs: ['const N: i32']
     safety: safe
@@ -1190,7 +1221,8 @@ intrinsics:
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fcvtzu, 'N = 1']]}]]
       - FnCall: [rustc_legacy_const_generics, ['1']]
       - *neon-fp16
-      - *neon-unstable-f16
+      - *neon-not-arm-stable-fp16
+      - *neon-cfg-arm-unstable
       - *target-not-arm64ec
     static_defs: ['const N: i32']
     safety: safe
@@ -1249,7 +1281,8 @@ intrinsics:
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [scvtf, 'N = 1']]}]]
       - FnCall: [rustc_legacy_const_generics, ['1']]
       - *neon-fp16
-      - *neon-unstable-f16
+      - *neon-not-arm-stable-fp16
+      - *neon-cfg-arm-unstable
       - *target-not-arm64ec
     static_defs: ['const N: i32']
     safety: safe
@@ -1410,12 +1443,12 @@ intrinsics:
     static_defs: ['const N: i32']
     safety: safe
     types:
-      - [_lane_s8, int8x8_t, int8x8_t, '3', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]']
-      - [q_lane_s8, int8x8_t, int8x16_t, '3', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]']
-      - [_lane_u8, uint8x8_t, uint8x8_t, '3', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]']
-      - [q_lane_u8, uint8x8_t, uint8x16_t, '3', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]']
-      - [_lane_p8, poly8x8_t, poly8x8_t, '3', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]']
-      - [q_lane_p8, poly8x8_t, poly8x16_t, '3', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]']
+      - [_lane_s8, int8x8_t, int8x8_t, '3', '[N as u32; 8]']
+      - [q_lane_s8, int8x8_t, int8x16_t, '3', '[N as u32; 16]']
+      - [_lane_u8, uint8x8_t, uint8x8_t, '3', '[N as u32; 8]']
+      - [q_lane_u8, uint8x8_t, uint8x16_t, '3', '[N as u32; 16]']
+      - [_lane_p8, poly8x8_t, poly8x8_t, '3', '[N as u32; 8]']
+      - [q_lane_p8, poly8x8_t, poly8x16_t, '3', '[N as u32; 16]']
     compose:
       - FnCall: [static_assert_uimm_bits!, [N, "{type[3]}"]]
       - FnCall: [simd_shuffle!, [a, a, "{type[4]}"]]
@@ -1434,12 +1467,12 @@ intrinsics:
     static_defs: ['const N: i32']
     safety: safe
     types:
-      - [q_laneq_s8, int8x16_t, int8x16_t, '4', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]']
-      - [_laneq_s8, int8x16_t, int8x8_t, '4', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]']
-      - [q_laneq_u8, uint8x16_t, uint8x16_t, '4', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]']
-      - [_laneq_u8, uint8x16_t, uint8x8_t, '4', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]']
-      - [q_laneq_p8, poly8x16_t, poly8x16_t, '4', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]']
-      - [_laneq_p8, poly8x16_t, poly8x8_t, '4', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]']
+      - [q_laneq_s8, int8x16_t, int8x16_t, '4', '[N as u32; 16]']
+      - [_laneq_s8, int8x16_t, int8x8_t, '4', '[N as u32; 8]']
+      - [q_laneq_u8, uint8x16_t, uint8x16_t, '4', '[N as u32; 16]']
+      - [_laneq_u8, uint8x16_t, uint8x8_t, '4', '[N as u32; 8]']
+      - [q_laneq_p8, poly8x16_t, poly8x16_t, '4', '[N as u32; 16]']
+      - [_laneq_p8, poly8x16_t, poly8x8_t, '4', '[N as u32; 8]']
     compose:
       - FnCall: [static_assert_uimm_bits!, [N, "{type[3]}"]]
       - FnCall: [simd_shuffle!, [a, a, "{type[4]}"]]
@@ -1458,12 +1491,12 @@ intrinsics:
     static_defs: ['const N: i32']
     safety: safe
     types:
-      - [_lane_s16, int16x4_t, int16x4_t, '2', '[N as u32, N as u32, N as u32, N as u32]']
-      - [q_lane_s16, int16x4_t, int16x8_t, '2', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]']
-      - [_lane_u16, uint16x4_t, uint16x4_t, '2', '[N as u32, N as u32, N as u32, N as u32]']
-      - [q_lane_u16, uint16x4_t, uint16x8_t, '2', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]']
-      - [_lane_p16, poly16x4_t, poly16x4_t, '2', '[N as u32, N as u32, N as u32, N as u32]']
-      - [q_lane_p16, poly16x4_t, poly16x8_t, '2', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]']
+      - [_lane_s16, int16x4_t, int16x4_t, '2', '[N as u32; 4]']
+      - [q_lane_s16, int16x4_t, int16x8_t, '2', '[N as u32; 8]']
+      - [_lane_u16, uint16x4_t, uint16x4_t, '2', '[N as u32; 4]']
+      - [q_lane_u16, uint16x4_t, uint16x8_t, '2', '[N as u32; 8]']
+      - [_lane_p16, poly16x4_t, poly16x4_t, '2', '[N as u32; 4]']
+      - [q_lane_p16, poly16x4_t, poly16x8_t, '2', '[N as u32; 8]']
     compose:
       - FnCall: [static_assert_uimm_bits!, [N, "{type[3]}"]]
       - FnCall: [simd_shuffle!, [a, a, "{type[4]}"]]
@@ -1482,12 +1515,12 @@ intrinsics:
     static_defs: ['const N: i32']
     safety: safe
     types:
-      - [q_laneq_s16, int16x8_t, int16x8_t, '3', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]']
-      - [_laneq_s16, int16x8_t, int16x4_t, '3', '[N as u32, N as u32, N as u32, N as u32]']
-      - [q_laneq_u16, uint16x8_t, uint16x8_t, '3', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]']
-      - [_laneq_u16, uint16x8_t, uint16x4_t, '3', '[N as u32, N as u32, N as u32, N as u32]']
-      - [q_laneq_p16, poly16x8_t, poly16x8_t, '3', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]']
-      - [_laneq_p16, poly16x8_t, poly16x4_t, '3', '[N as u32, N as u32, N as u32, N as u32]']
+      - [q_laneq_s16, int16x8_t, int16x8_t, '3', '[N as u32; 8]']
+      - [_laneq_s16, int16x8_t, int16x4_t, '3', '[N as u32; 4]']
+      - [q_laneq_u16, uint16x8_t, uint16x8_t, '3', '[N as u32; 8]']
+      - [_laneq_u16, uint16x8_t, uint16x4_t, '3', '[N as u32; 4]']
+      - [q_laneq_p16, poly16x8_t, poly16x8_t, '3', '[N as u32; 8]']
+      - [_laneq_p16, poly16x8_t, poly16x4_t, '3', '[N as u32; 4]']
     compose:
       - FnCall: [static_assert_uimm_bits!, [N, "{type[3]}"]]
       - FnCall: [simd_shuffle!, [a, a, "{type[4]}"]]
@@ -1502,14 +1535,15 @@ intrinsics:
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vdup.16"', 'N = 4']]}]]
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [dup, 'N = 4']]}]]
       - FnCall: [rustc_legacy_const_generics, ['1']]
-      - *neon-fp16
-      - *neon-unstable-f16
+      - *arm-fp16
+      - *neon-not-arm-stable-fp16
+      - *neon-cfg-arm-unstable
       - *target-not-arm64ec
     static_defs: ['const N: i32']
     safety: safe
     types:
-      - [q_laneq_f16, float16x8_t, float16x8_t, '3', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]']
-      - [_laneq_f16, float16x8_t, float16x4_t, '3', '[N as u32, N as u32, N as u32, N as u32]']
+      - [q_laneq_f16, float16x8_t, float16x8_t, '3', '[N as u32; 8]']
+      - [_laneq_f16, float16x8_t, float16x4_t, '3', '[N as u32; 4]']
     compose:
       - FnCall: [static_assert_uimm_bits!, [N, "{type[3]}"]]
       - FnCall: [simd_shuffle!, [a, a, "{type[4]}"]]
@@ -1522,7 +1556,7 @@ intrinsics:
       - *neon-v7
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vdup.16"']]}]]
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [dup]]}]]
-      - *neon-fp16
+      - *arm-fp16
       - *neon-unstable-f16
       - *target-not-arm64ec
     safety: safe
@@ -1541,14 +1575,15 @@ intrinsics:
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vdup.16"', 'N = 2']]}]]
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [dup, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['1']]
-      - *neon-fp16
-      - *neon-unstable-f16
+      - *arm-fp16
+      - *neon-not-arm-stable-fp16
+      - *neon-cfg-arm-unstable
       - *target-not-arm64ec
     static_defs: ['const N: i32']
     safety: safe
     types:
-      - [_lane_f16, float16x4_t, float16x4_t, '2', '[N as u32, N as u32, N as u32, N as u32]']
-      - [q_lane_f16, float16x4_t, float16x8_t, '2', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]']
+      - [_lane_f16, float16x4_t, float16x4_t, '2', '[N as u32; 4]']
+      - [q_lane_f16, float16x4_t, float16x8_t, '2', '[N as u32; 8]']
     compose:
       - FnCall: [static_assert_uimm_bits!, [N, "{type[3]}"]]
       - FnCall: [simd_shuffle!, [a, a, "{type[4]}"]]
@@ -1681,13 +1716,13 @@ intrinsics:
     compose:
       - FnCall: [static_assert_uimm_bits!, [N, 1]]
       - FnCall:
-          - "transmute{type[3]}"
-          - - FnCall: [simd_extract!, [a, 'N as u32']]
+          - "transmute"
+          - - FnCall: ['vget{neon_type[1].lane_nox}', [a], [N]]
 
-  - name: "vext{neon_type[0].no}"
+  - name: "vext{neon_type.no}"
     doc: "Extract vector from pair of vectors"
-    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
-    return_type: "{neon_type[0]}"
+    arguments: ["a: {neon_type}", "b: {neon_type}"]
+    return_type: "{neon_type}"
     attr:
       - *neon-v7
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vext.8"', 'N = 7']]}]]
@@ -1698,20 +1733,20 @@ intrinsics:
     static_defs: ['const N: i32']
     safety: safe
     types:
-      - [int8x8_t, ' static_assert_uimm_bits!(N, 3);', 'unsafe { match N & 0b111 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]), 4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]), 5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]), 6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]), 7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]), _ => unreachable_unchecked(), } }']
-      - [int16x8_t, ' static_assert_uimm_bits!(N, 3);', 'unsafe { match N & 0b111 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]), 4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]), 5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]), 6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]), 7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]), _ => unreachable_unchecked(), } }']
-      - [uint8x8_t, ' static_assert_uimm_bits!(N, 3);', 'unsafe { match N & 0b111 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]), 4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]), 5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]), 6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]), 7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]), _ => unreachable_unchecked(), } }']
-      - [uint16x8_t, ' static_assert_uimm_bits!(N, 3);', 'unsafe { match N & 0b111 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]), 4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]), 5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]), 6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]), 7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]), _ => unreachable_unchecked(), } }']
-      - [poly8x8_t, ' static_assert_uimm_bits!(N, 3);', 'unsafe { match N & 0b111 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]), 4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]), 5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]), 6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]), 7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]), _ => unreachable_unchecked(), } }']
-      - [poly16x8_t, ' static_assert_uimm_bits!(N, 3);', 'unsafe { match N & 0b111 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]), 4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]), 5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]), 6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]), 7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]), _ => unreachable_unchecked(), } }']
+      - int8x8_t
+      - int16x8_t
+      - uint8x8_t
+      - uint16x8_t
+      - poly8x8_t
+      - poly16x8_t
     compose:
-      - Identifier: ["{type[1]}", Symbol]
-      - Identifier: ["{type[2]}", Symbol]
+      - FnCall: [static_assert_uimm_bits!, [N, 3]]
+      - FnCall: [simd_shuffle!, [a, b, '[N as u32, N as u32 + 1, N as u32 + 2, N as u32 + 3, N as u32 + 4, N as u32 + 5, N as u32 + 6, N as u32 + 7]']]
 
-  - name: "vext{neon_type[0].no}"
+  - name: "vext{neon_type.no}"
     doc: "Extract vector from pair of vectors"
-    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
-    return_type: "{neon_type[0]}"
+    arguments: ["a: {neon_type}", "b: {neon_type}"]
+    return_type: "{neon_type}"
     attr:
       - *neon-v7
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vext.8"', 'N = 15']]}]]
@@ -1722,17 +1757,17 @@ intrinsics:
     static_defs: ['const N: i32']
     safety: safe
     types:
-      - [int8x16_t, ' static_assert_uimm_bits!(N, 4);', 'unsafe { match N & 0b1111 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]), 4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]), 5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]), 6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]), 7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22]), 8 => simd_shuffle!(a, b, [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]), 9 => simd_shuffle!(a, b, [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]), 10 => simd_shuffle!(a, b, [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]), 11 => simd_shuffle!(a, b, [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26]), 12 => simd_shuffle!(a, b, [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27]), 13 => simd_shuffle!(a, b, [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28]), 14 => simd_shuffle!(a, b, [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29]), 15 => simd_shuffle!(a, b, [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]), _ => unreachable_unchecked(), } }']
-      - [uint8x16_t, ' static_assert_uimm_bits!(N, 4);', 'unsafe { match N & 0b1111 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]), 4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]), 5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]), 6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]), 7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22]), 8 => simd_shuffle!(a, b, [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]), 9 => simd_shuffle!(a, b, [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]), 10 => simd_shuffle!(a, b, [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]), 11 => simd_shuffle!(a, b, [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26]), 12 => simd_shuffle!(a, b, [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27]), 13 => simd_shuffle!(a, b, [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28]), 14 => simd_shuffle!(a, b, [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29]), 15 => simd_shuffle!(a, b, [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]), _ => unreachable_unchecked(), } }']
-      - [poly8x16_t, ' static_assert_uimm_bits!(N, 4);', 'unsafe { match N & 0b1111 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]), 4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]), 5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]), 6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]), 7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22]), 8 => simd_shuffle!(a, b, [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]), 9 => simd_shuffle!(a, b, [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]), 10 => simd_shuffle!(a, b, [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]), 11 => simd_shuffle!(a, b, [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26]), 12 => simd_shuffle!(a, b, [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27]), 13 => simd_shuffle!(a, b, [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28]), 14 => simd_shuffle!(a, b, [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29]), 15 => simd_shuffle!(a, b, [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]), _ => unreachable_unchecked(), } }']
+      - int8x16_t
+      - uint8x16_t
+      - poly8x16_t
     compose:
-      - Identifier: ["{type[1]}", Symbol]
-      - Identifier: ["{type[2]}", Symbol]
+      - FnCall: [static_assert_uimm_bits!, [N, 4]]
+      - FnCall: [simd_shuffle!, [a, b, '[N as u32, N as u32 + 1, N as u32 + 2, N as u32 + 3, N as u32 + 4, N as u32 + 5, N as u32 + 6, N as u32 + 7, N as u32 + 8, N as u32 + 9, N as u32 + 10, N as u32 + 11, N as u32 + 12, N as u32 + 13, N as u32 + 14, N as u32 + 15]']]
 
-  - name: "vext{neon_type[0].no}"
+  - name: "vext{neon_type.no}"
     doc: "Extract vector from pair of vectors"
-    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
-    return_type: "{neon_type[0]}"
+    arguments: ["a: {neon_type}", "b: {neon_type}"]
+    return_type: "{neon_type}"
     attr:
       - *neon-v7
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vext.8"', 'N = 3']]}]]
@@ -1743,61 +1778,65 @@ intrinsics:
     static_defs: ['const N: i32']
     safety: safe
     types:
-      - [int16x4_t, 'static_assert_uimm_bits!(N, 2);', 'unsafe { match N & 0b11 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6]), _ => unreachable_unchecked(), } }']
-      - [int32x4_t, ' static_assert_uimm_bits!(N, 2);', 'unsafe { match N & 0b11 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6]), _ => unreachable_unchecked(), } }']
-      - [uint16x4_t, ' static_assert_uimm_bits!(N, 2);', 'unsafe { match N & 0b11 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6]), _ => unreachable_unchecked(), } }']
-      - [uint32x4_t, ' static_assert_uimm_bits!(N, 2);', 'unsafe { match N & 0b11 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6]), _ => unreachable_unchecked(), } }']
-      - [poly16x4_t, ' static_assert_uimm_bits!(N, 2);', 'unsafe { match N & 0b11 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6]), _ => unreachable_unchecked(), } }']
-      - [float32x4_t, ' static_assert_uimm_bits!(N, 2);', 'unsafe { match N & 0b11 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6]), _ => unreachable_unchecked(), } }']
+      - int16x4_t
+      - int32x4_t
+      - uint16x4_t
+      - uint32x4_t
+      - poly16x4_t
+      - float32x4_t
     compose:
-      - Identifier: ["{type[1]}", Symbol]
-      - Identifier: ["{type[2]}", Symbol]
+      - FnCall: [static_assert_uimm_bits!, [N, 2]]
+      - FnCall: [simd_shuffle!, [a, b, '[N as u32, N as u32 + 1, N as u32 + 2, N as u32 + 3]']]
 
 
-  - name: "vext{neon_type[0].no}"
+  - name: "vext{neon_type.no}"
     doc: "Extract vector from pair of vectors"
-    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
-    return_type: "{neon_type[0]}"
+    arguments: ["a: {neon_type}", "b: {neon_type}"]
+    return_type: "{neon_type}"
     attr:
       - *neon-v7
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vext.8"', 'N = 3']]}]]
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ext, 'N = 3']]}]]
       - FnCall: [rustc_legacy_const_generics, ['2']]
       - *neon-fp16
-      - *neon-unstable-f16
+      - *neon-not-arm-stable-fp16
+      - *neon-cfg-arm-unstable
       - *target-not-arm64ec
     static_defs: ['const N: i32']
     safety: safe
     types:
-      - [float16x4_t, ' static_assert_uimm_bits!(N, 2); unsafe { match N & 0b11 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6]), _ => unreachable_unchecked(), } }']
+      - float16x4_t
     compose:
-      - Identifier: ["{type[1]}", Symbol]
+      - FnCall: [static_assert_uimm_bits!, [N, 2]]
+      - FnCall: [simd_shuffle!, [a, b, '[N as u32, N as u32 + 1, N as u32 + 2, N as u32 + 3]']]
 
-  - name: "vext{neon_type[0].no}"
+  - name: "vext{neon_type.no}"
     doc: "Extract vector from pair of vectors"
-    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
-    return_type: "{neon_type[0]}"
+    arguments: ["a: {neon_type}", "b: {neon_type}"]
+    return_type: "{neon_type}"
     attr:
       - *neon-v7
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vext.8"', 'N = 7']]}]]
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ext, 'N = 7']]}]]
       - FnCall: [rustc_legacy_const_generics, ['2']]
       - *neon-fp16
-      - *neon-unstable-f16
+      - *neon-not-arm-stable-fp16
+      - *neon-cfg-arm-unstable
       - *target-not-arm64ec
     static_defs: ['const N: i32']
     safety: safe
     types:
-      - [float16x8_t, ' static_assert_uimm_bits!(N, 3); unsafe { match N & 0b111 { 0 => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]), 1 => simd_shuffle!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]), 2 => simd_shuffle!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]), 3 => simd_shuffle!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]), 4 => simd_shuffle!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]), 5 => simd_shuffle!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]), 6 => simd_shuffle!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]), 7 => simd_shuffle!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]), _ => unreachable_unchecked(), } }']
+      - float16x8_t
     compose:
-      - Identifier: ["{type[1]}", Symbol]
+      - FnCall: [static_assert_uimm_bits!, [N, 3]]
+      - FnCall: [simd_shuffle!, [a, b, '[N as u32, N as u32 + 1, N as u32 + 2, N as u32 + 3, N as u32 + 4, N as u32 + 5, N as u32 + 6, N as u32 + 7]']]
 
 
 
-  - name: "vext{neon_type[0].no}"
+  - name: "vext{neon_type.no}"
     doc: "Extract vector from pair of vectors"
-    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
-    return_type: "{neon_type[0]}"
+    arguments: ["a: {neon_type}", "b: {neon_type}"]
+    return_type: "{neon_type}"
     attr:
       - *neon-v7
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vext.8"', 'N = 1']]}]]
@@ -1808,17 +1847,17 @@ intrinsics:
     static_defs: ['const N: i32']
     safety: safe
     types:
-      - [int32x2_t, ' static_assert_uimm_bits!(N, 1);', 'unsafe { match N & 0b1 { 0 => simd_shuffle!(a, b, [0, 1]), 1 => simd_shuffle!(a, b, [1, 2]), _ => unreachable_unchecked(), } }']
-      - [uint32x2_t, ' static_assert_uimm_bits!(N, 1);', 'unsafe { match N & 0b1 { 0 => simd_shuffle!(a, b, [0, 1]), 1 => simd_shuffle!(a, b, [1, 2]), _ => unreachable_unchecked(), } }']
-      - [float32x2_t, ' static_assert_uimm_bits!(N, 1);', 'unsafe { match N & 0b1 { 0 => simd_shuffle!(a, b, [0, 1]), 1 => simd_shuffle!(a, b, [1, 2]), _ => unreachable_unchecked(), } }']
+      - int32x2_t
+      - uint32x2_t
+      - float32x2_t
     compose:
-      - Identifier: ["{type[1]}", Symbol]
-      - Identifier: ["{type[2]}", Symbol]
+      - FnCall: [static_assert_uimm_bits!, [N, 1]]
+      - FnCall: [simd_shuffle!, [a, b, '[N as u32, N as u32 + 1]']]
 
-  - name: "vext{neon_type[0].no}"
+  - name: "vext{neon_type.no}"
     doc: "Extract vector from pair of vectors"
-    arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
-    return_type: "{neon_type[0]}"
+    arguments: ["a: {neon_type}", "b: {neon_type}"]
+    return_type: "{neon_type}"
     attr:
       - *neon-v7
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vmov, 'N = 1']]}]]
@@ -1829,11 +1868,11 @@ intrinsics:
     static_defs: ['const N: i32']
     safety: safe
     types:
-      - [int64x2_t, 'static_assert_uimm_bits!(N, 1);', 'unsafe { match N & 0b1 { 0 => simd_shuffle!(a, b, [0, 1]), 1 => simd_shuffle!(a, b, [1, 2]), _ => unreachable_unchecked(), } }']
-      - [uint64x2_t, 'static_assert_uimm_bits!(N, 1);', 'unsafe { match N & 0b1 { 0 => simd_shuffle!(a, b, [0, 1]), 1 => simd_shuffle!(a, b, [1, 2]), _ => unreachable_unchecked(), } }']
+      - int64x2_t
+      - uint64x2_t
     compose:
-      - Identifier: ["{type[1]}", Symbol]
-      - Identifier: ["{type[2]}", Symbol]
+      - FnCall: [static_assert_uimm_bits!, [N, 1]]
+      - FnCall: [simd_shuffle!, [a, b, '[N as u32, N as u32 + 1]']]
 
   - name: "vmla{neon_type[0].no}"
     doc: "Multiply-add to accumulator"
@@ -1953,17 +1992,17 @@ intrinsics:
     static_defs: ['const LANE: i32']
     safety: safe
     types:
-      - [int32x4_t, int16x4_t, int16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [int32x4_t, int16x4_t, int16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [int64x2_t, int32x2_t, int32x2_t, '1', '[LANE as u32, LANE as u32]']
-      - [int64x2_t, int32x2_t, int32x4_t, '2', '[LANE as u32, LANE as u32]']
+      - [int32x4_t, int16x4_t, int16x4_t, '2']
+      - [int32x4_t, int16x4_t, int16x8_t, '3']
+      - [int64x2_t, int32x2_t, int32x2_t, '1']
+      - [int64x2_t, int32x2_t, int32x4_t, '2']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, "{type[3]}"]]
       - FnCall:
           - "vmlal_{neon_type[1]}"
           - - a
             - b
-            - FnCall: [simd_shuffle!, [c, c, '{type[4]}']]
+            - FnCall: ['vdup_lane{neon_type[2].no}', [c], [LANE]]
 
   - name: "vmlal_lane{neon_type[2].no}"
     doc: "Vector widening multiply accumulate with scalar"
@@ -1979,17 +2018,17 @@ intrinsics:
     static_defs: ['const LANE: i32']
     safety: safe
     types:
-      - [uint32x4_t, uint16x4_t, uint16x4_t, uint32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [uint32x4_t, uint16x4_t, uint16x8_t, uint32x4_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [uint64x2_t, uint32x2_t, uint32x2_t, uint64x2_t, '1', '[LANE as u32, LANE as u32]']
-      - [uint64x2_t, uint32x2_t, uint32x4_t, uint64x2_t, '2', '[LANE as u32, LANE as u32]']
+      - [uint32x4_t, uint16x4_t, uint16x4_t, '2']
+      - [uint32x4_t, uint16x4_t, uint16x8_t, '3']
+      - [uint64x2_t, uint32x2_t, uint32x2_t, '1']
+      - [uint64x2_t, uint32x2_t, uint32x4_t, '2']
     compose:
-      - FnCall: [static_assert_uimm_bits!, [LANE, "{type[4]}"]]
+      - FnCall: [static_assert_uimm_bits!, [LANE, "{type[3]}"]]
       - FnCall:
           - "vmlal_{neon_type[1]}"
           - - a
             - b
-            - FnCall: [simd_shuffle!, [c, c, '{type[5]}']]
+            - FnCall: ['vdup_lane{neon_type[2].no}', [c], [LANE]]
 
   - name: "vmlal_{neon_type[1]}"
     doc: "Unsigned multiply-add long"
@@ -2108,15 +2147,15 @@ intrinsics:
     static_defs: ['const LANE: i32']
     safety: safe
     types:
-      - [int32x4_t, int16x4_t, int16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [int32x4_t, int16x4_t, int16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [int32x4_t, int16x4_t, int16x4_t, '2']
+      - [int32x4_t, int16x4_t, int16x8_t, '3']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']]
       - FnCall:
           - "vmlsl_{neon_type[1]}"
           - - a
             - b
-            - FnCall: [simd_shuffle!, [c, c, "{type[4]}"]]
+            - FnCall: ['vdup_lane{neon_type[2].no}', [c], [LANE]]
 
   - name: "vmlsl_lane{neon_type[2].no}"
     doc: "Vector widening multiply subtract with scalar"
@@ -2132,15 +2171,15 @@ intrinsics:
     static_defs: ['const LANE: i32']
     safety: safe
     types:
-      - [int64x2_t, int32x2_t, int32x2_t, '[LANE as u32, LANE as u32]', '1']
-      - [int64x2_t, int32x2_t, int32x4_t, '[LANE as u32, LANE as u32]', '2']
+      - [int64x2_t, int32x2_t, int32x2_t, '1']
+      - [int64x2_t, int32x2_t, int32x4_t, '2']
     compose:
-      - FnCall: [static_assert_uimm_bits!, [LANE, "{type[4]}"]]
+      - FnCall: [static_assert_uimm_bits!, [LANE, "{type[3]}"]]
       - FnCall:
           - "vmlsl_{neon_type[1]}"
           - - a
             - b
-            - FnCall: [simd_shuffle!, [c, c, "{type[3]}"]]
+            - FnCall: ['vdup_lane{neon_type[2].no}', [c], [LANE]]
 
   - name: "vmlsl_lane{neon_type[2].no}"
     doc: "Vector widening multiply subtract with scalar"
@@ -2156,17 +2195,17 @@ intrinsics:
     static_defs: ['const LANE: i32']
     safety: safe
     types:
-      - [uint32x4_t, uint16x4_t, uint16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [uint32x4_t, uint16x4_t, uint16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [uint64x2_t, uint32x2_t, uint32x2_t, '1', '[LANE as u32, LANE as u32]']
-      - [uint64x2_t, uint32x2_t, uint32x4_t, '2', '[LANE as u32, LANE as u32]']
+      - [uint32x4_t, uint16x4_t, uint16x4_t, '2']
+      - [uint32x4_t, uint16x4_t, uint16x8_t, '3']
+      - [uint64x2_t, uint32x2_t, uint32x2_t, '1']
+      - [uint64x2_t, uint32x2_t, uint32x4_t, '2']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, "{type[3]}"]]
       - FnCall:
           - "vmlsl_{neon_type[1]}"
           - - a
             - b
-            - FnCall: [simd_shuffle!, [c, c, "{type[4]}"]]
+            - FnCall: ['vdup_lane{neon_type[2].no}', [c], [LANE]]
 
   - name: "vmlsl_{neon_type[1]}"
     doc: "Unsigned multiply-subtract long"
@@ -2233,7 +2272,8 @@ intrinsics:
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vneg.{type[1]}"']]}]]
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fneg]]}]]
       - *neon-fp16
-      - *neon-unstable-f16
+      - *neon-not-arm-stable-fp16
+      - *neon-cfg-arm-unstable
       - *target-not-arm64ec
     safety: safe
     types:
@@ -2497,7 +2537,8 @@ intrinsics:
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vrintn]]}]]
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [frintn]]}]]
       - *neon-fp16
-      - *neon-unstable-f16
+      - *neon-not-arm-stable-fp16
+      - *neon-cfg-arm-unstable
       - *target-not-arm64ec
     safety: safe
     types:
@@ -2568,8 +2609,8 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - *neon-v7
-      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vld1]]}]]
-      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld1]]}]]
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vld]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld]]}]]
       - *neon-not-arm-stable
       - *neon-cfg-arm-unstable
     safety:
@@ -2582,13 +2623,12 @@ intrinsics:
       - ["*const f32", float32x2x4_t]
       - ["*const f32", float32x4x4_t]
     compose:
-      - LLVMLink:
-          name: "vld1x{neon_type[1].tuple}.{neon_type[1]}"
-          links:
-            - link: "llvm.aarch64.neon.ld1x{neon_type[1].tuple}.v{neon_type[1].lane}f{neon_type[1].base}.p0"
-              arch: aarch64,arm64ec
-            - link: "llvm.arm.neon.vld1x{neon_type[1].tuple}.v{neon_type[1].lane}f{neon_type[1].base}.p0"
-              arch: arm
+      - FnCall:
+          - 'crate::ptr::read_unaligned'
+          - - MethodCall:
+                - a
+                - cast
+                - []
 
   - name: "vld1{neon_type[1].no}"
     doc: "Load multiple single-element structures to one, two, three, or four registers"
@@ -2596,8 +2636,8 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - *neon-v7
-      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vld1]]}]]
-      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld1]]}]]
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vld]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld]]}]]
       - *neon-not-arm-stable
       - *neon-cfg-arm-unstable
     safety:
@@ -2628,13 +2668,12 @@ intrinsics:
       - ["*const i64", int64x2x3_t]
       - ["*const i64", int64x2x4_t]
     compose:
-      - LLVMLink:
-          name: "ld1x{neon_type[1].tuple}.{neon_type[1]}"
-          links:
-            - link: "llvm.aarch64.neon.ld1x{neon_type[1].tuple}.v{neon_type[1].lane}i{neon_type[1].base}.p0"
-              arch: aarch64,arm64ec
-            - link: "llvm.arm.neon.vld1x{neon_type[1].tuple}.v{neon_type[1].lane}i{neon_type[1].base}.p0"
-              arch: arm
+      - FnCall:
+          - 'crate::ptr::read_unaligned'
+          - - MethodCall:
+                - a
+                - cast
+                - []
 
   - name: "vld1{neon_type[1].no}"
     doc: "Load multiple single-element structures to one, two, three, or four registers"
@@ -2642,8 +2681,8 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - *neon-v7
-      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vld1]]}]]
-      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld1]]}]]
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vld]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld]]}]]
       - *neon-not-arm-stable
       - *neon-cfg-arm-unstable
     safety:
@@ -2687,12 +2726,11 @@ intrinsics:
       - ["*const p16", poly16x8x4_t, int16x8x4_t]
     compose:
       - FnCall:
-          - transmute
-          - - FnCall:
-                - "vld1{neon_type[2].no}"
-                - - FnCall:
-                      - transmute
-                      - - a
+          - 'crate::ptr::read_unaligned'
+          - - MethodCall:
+                - a
+                - cast
+                - []
 
   - name: "vld1{neon_type[1].no}"
     doc: "Load multiple single-element structures to one, two, three, or four registers"
@@ -2702,7 +2740,7 @@ intrinsics:
       - *neon-aes
       - *neon-v8
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop]]}]]
-      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld1]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld]]}]]
       - *neon-not-arm-stable
       - *neon-cfg-arm-unstable
     safety:
@@ -2715,12 +2753,11 @@ intrinsics:
       - ["*const p64", poly64x2x4_t, int64x2x4_t]
     compose:
       - FnCall:
-          - transmute
-          - - FnCall:
-                - "vld1{neon_type[2].no}"
-                - - FnCall:
-                      - transmute
-                      - - a
+          - 'crate::ptr::read_unaligned'
+          - - MethodCall:
+                - a
+                - cast
+                - []
 
   - name: "vld1{neon_type[1].no}"
     doc: "Load multiple single-element structures to one, two, three, or four registers"
@@ -2729,8 +2766,8 @@ intrinsics:
     attr:
       - *neon-aes
       - *neon-v8
-      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vld1]]}]]
-      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld1]]}]]
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vld]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld]]}]]
       - *neon-not-arm-stable
       - *neon-cfg-arm-unstable
     safety:
@@ -2739,12 +2776,11 @@ intrinsics:
       - ["*const p64", poly64x1x2_t, int64x1x2_t]
     compose:
       - FnCall:
-          - transmute
-          - - FnCall:
-                - "vld1{neon_type[2].no}"
-                - - FnCall:
-                      - transmute
-                      - - a
+          - 'crate::ptr::read_unaligned'
+          - - MethodCall:
+                - a
+                - cast
+                - []
 
   - name: "vld1{neon_type[1].no}"
     doc: "Load multiple single-element structures to one, two, three, or four registers"
@@ -2753,8 +2789,8 @@ intrinsics:
     attr:
       - *neon-v7
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vld1]]}]]
-      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld1]]}]]
-      - *neon-fp16
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld]]}]]
+      - *arm-fp16
       - *neon-unstable-f16
       - *target-not-arm64ec
     safety:
@@ -2767,13 +2803,12 @@ intrinsics:
       - ["*const f16", float16x4x4_t]
       - ["*const f16", float16x8x4_t]
     compose:
-      - LLVMLink:
-          name: "vld1x{neon_type[1].tuple}.{neon_type[1]}"
-          links:
-            - link: "llvm.aarch64.neon.ld1x{neon_type[1].tuple}.v{neon_type[1].lane}f{neon_type[1].base}.p0"
-              arch: aarch64,arm64ec
-            - link: "llvm.arm.neon.vld1x{neon_type[1].tuple}.v{neon_type[1].lane}f{neon_type[1].base}.p0"
-              arch: arm
+      - FnCall:
+          - 'crate::ptr::read_unaligned'
+          - - MethodCall:
+                - a
+                - cast
+                - []
 
   - name: "vld1{type[2]}_{neon_type[1]}"
     doc: "Load one single-element structure to one lane of one register"
@@ -2785,7 +2820,7 @@ intrinsics:
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vld1, 'LANE = 0']]}]]
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld1, 'LANE = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ["2"]]
-      - *neon-fp16
+      - *arm-fp16
       - *neon-unstable-f16
       - *target-not-arm64ec
     static_defs: ['const LANE: i32']
@@ -2806,7 +2841,7 @@ intrinsics:
       - *neon-v7
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ["vld1"]]}]]
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld1r]]}]]
-      - *neon-fp16
+      - *arm-fp16
       - *neon-unstable-f16
       - *target-not-arm64ec
     safety:
@@ -3399,7 +3434,7 @@ intrinsics:
       - *neon-v7
       - *target-is-arm
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vld2]]}]]
-      - *neon-fp16
+      - *arm-fp16
       - *neon-unstable-f16
       - *target-not-arm64ec
     safety:
@@ -3428,7 +3463,7 @@ intrinsics:
     attr:
       - *target-not-arm
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld2]]}]]
-      - *neon-fp16
+      - *arm-fp16
       - *neon-unstable-f16
       - *target-not-arm64ec
     safety:
@@ -3455,7 +3490,7 @@ intrinsics:
     attr:
       - *neon-v7
       - *target-is-arm
-      - *neon-fp16
+      - *arm-fp16
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vld2]]}]]
       - *neon-unstable-f16
       - *target-not-arm64ec
@@ -3486,7 +3521,7 @@ intrinsics:
     attr:
       - *target-not-arm
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld2r]]}]]
-      - *neon-fp16
+      - *arm-fp16
       - *neon-unstable-f16
       - *target-not-arm64ec
     safety:
@@ -3516,7 +3551,7 @@ intrinsics:
       - *target-is-arm
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['vld2', 'LANE = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ["2"]]
-      - *neon-fp16
+      - *arm-fp16
       - *neon-unstable-f16
       - *target-not-arm64ec
     static_defs:
@@ -3559,7 +3594,7 @@ intrinsics:
       - *target-not-arm
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld2, 'LANE = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ["2"]]
-      - *neon-fp16
+      - *arm-fp16
       - *neon-unstable-f16
       - *target-not-arm64ec
     static_defs:
@@ -3600,7 +3635,7 @@ intrinsics:
       - *neon-v7
       - *target-is-arm
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vld3]]}]]
-      - *neon-fp16
+      - *arm-fp16
       - *neon-unstable-f16
       - *target-not-arm64ec
     safety:
@@ -3629,25 +3664,17 @@ intrinsics:
     attr:
       - *target-not-arm
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld3]]}]]
-      - *neon-fp16
+      - *arm-fp16
       - *neon-unstable-f16
       - *target-not-arm64ec
     safety:
       unsafe: [neon]
     types:
-      - ["*const f16", float16x4x3_t, f16]
-      - ["*const f16", float16x8x3_t, f16]
+      - ["*const f16", float16x4x3_t, f16, "4"]
+      - ["*const f16", float16x8x3_t, f16, "8"]
     compose:
-      - LLVMLink:
-          name: "vld3.{neon_type[1]}"
-          arguments:
-            - "ptr: {type[0]}"
-          links:
-            - link: "llvm.aarch64.neon.ld3.v{neon_type[1].lane}{type[2]}.p0"
-              arch: aarch64,arm64ec
-      - FnCall:
-          - "_vld3{neon_type[1].nox}"
-          - - "a as _"
+      - FnCall: ["crate::core_arch::macros::deinterleaving_load!", [{ Type: "{type[2]}" }, "{type[3]}", "3", a], [], true]
+
 
   - name: "vld3{neon_type[1].dup_nox}"
     doc: Load single 3-element structure and replicate to all lanes of two registers
@@ -3657,7 +3684,7 @@ intrinsics:
       - *neon-v7
       - *target-is-arm
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vld3]]}]]
-      - *neon-fp16
+      - *arm-fp16
       - *neon-unstable-f16
       - *target-not-arm64ec
     safety:
@@ -3687,7 +3714,7 @@ intrinsics:
     attr:
       - *target-not-arm
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld3r]]}]]
-      - *neon-fp16
+      - *arm-fp16
       - *neon-unstable-f16
       - *target-not-arm64ec
     safety:
@@ -3717,7 +3744,7 @@ intrinsics:
       - *target-is-arm
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['vld3', 'LANE = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ["2"]]
-      - *neon-fp16
+      - *arm-fp16
       - *neon-unstable-f16
       - *target-not-arm64ec
     static_defs:
@@ -3762,7 +3789,7 @@ intrinsics:
       - *target-not-arm
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [ld3, 'LANE = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ["2"]]
-      - *neon-fp16
+      - *arm-fp16
       - *neon-unstable-f16
       - *target-not-arm64ec
     static_defs:
@@ -3841,23 +3868,17 @@ intrinsics:
     safety:
       unsafe: [neon]
     types:
-      - ['*const i8', int8x8x3_t, '*const int8x8_t', i8]
-      - ['*const i16', int16x4x3_t, '*const int16x4_t', i16]
-      - ['*const i32', int32x2x3_t, '*const int32x2_t', i32]
-      - ['*const i8', int8x16x3_t, '*const int8x16_t', i8]
-      - ['*const i16', int16x8x3_t, '*const int16x8_t', i16]
-      - ['*const i32', int32x4x3_t, '*const int32x4_t', i32]
-      - ['*const f32', float32x2x3_t, '*const float32x2_t', f32]
-      - ['*const f32', float32x4x3_t, '*const float32x4_t', f32]
+      - ['*const i8', int8x8x3_t, i8, "8"]
+      - ['*const i16', int16x4x3_t, i16, "4"]
+      - ['*const i32', int32x2x3_t, i32, "2"]
+      - ['*const i8', int8x16x3_t, i8, "16"]
+      - ['*const i16', int16x8x3_t, i16, "8"]
+      - ['*const i32', int32x4x3_t, i32, "4"]
+      - ['*const f32', float32x2x3_t, f32, "2"]
+      - ['*const f32', float32x4x3_t, f32, "4"]
     compose:
-      - LLVMLink:
-          name: 'vld3{neon_type[1].nox}'
-          arguments:
-            - 'ptr: {type[2]}'
-          links:
-            - link: 'llvm.aarch64.neon.ld3.v{neon_type[1].lane}{type[3]}.p0'
-              arch: aarch64,arm64ec
-      - FnCall: ['_vld3{neon_type[1].nox}', ['a as _']]
+      - FnCall: ["crate::core_arch::macros::deinterleaving_load!", [{ Type: "{type[2]}" }, "{type[3]}", "3", a], [], true]
+
 
   - name: "vld3{neon_type[1].nox}"
     doc: Load multiple 3-element structures to three registers
@@ -3872,14 +3893,12 @@ intrinsics:
     types:
       - ['*const i64', int64x1x3_t, '*const int64x1_t', i64]
     compose:
-      - LLVMLink:
-          name: "vld3{neon_type[1].nox}"
-          arguments:
-            - 'ptr: {type[2]}'
-          links:
-            - link: 'llvm.aarch64.neon.ld3.v{neon_type[1].lane}{type[3]}.p0'
-              arch: aarch64,arm64ec
-      - FnCall: ['_vld3{neon_type[1].nox}', ['a as _']]
+      - FnCall:
+          - 'crate::ptr::read_unaligned'
+          - - MethodCall:
+                - a
+                - cast
+                - []
 
   - name: "vld3{neon_type[1].nox}"
     doc: Load multiple 3-element structures to three registers
@@ -4337,23 +4356,16 @@ intrinsics:
     safety:
       unsafe: [neon]
     types:
-      - ['*const i8', int8x8x4_t, i8, '*const int8x8_t']
-      - ['*const i32', int32x4x4_t, i32, '*const int32x4_t']
-      - ['*const i16', int16x4x4_t, i16, '*const int16x4_t']
-      - ['*const i32', int32x2x4_t, i32, '*const int32x2_t']
-      - ['*const i8', int8x16x4_t, i8, '*const int8x16_t']
-      - ['*const i16', int16x8x4_t, i16, '*const int16x8_t']
-      - ['*const f32', float32x2x4_t, f32, '*const float32x2_t']
-      - ['*const f32', float32x4x4_t, f32, '*const float32x4_t']
+      - ['*const i8', int8x8x4_t, i8, "8"]
+      - ['*const i32', int32x4x4_t, i32, "4"]
+      - ['*const i16', int16x4x4_t, i16, "4"]
+      - ['*const i32', int32x2x4_t, i32, "2"]
+      - ['*const i8', int8x16x4_t, i8, "16"]
+      - ['*const i16', int16x8x4_t, i16, "8"]
+      - ['*const f32', float32x2x4_t, f32, "2"]
+      - ['*const f32', float32x4x4_t, f32, "4"]
     compose:
-      - LLVMLink:
-          name: 'vld4{neon_type[1].nox}'
-          arguments:
-            - 'ptr: {type[3]}'
-          links:
-            - link: 'llvm.aarch64.neon.ld4.v{neon_type[1].lane}{type[2]}.p0'
-              arch: aarch64,arm64ec
-      - FnCall: ['_vld4{neon_type[1].nox}', ['a as _']]
+      - FnCall: ["crate::core_arch::macros::deinterleaving_load!", [{ Type: "{type[2]}" }, "{type[3]}", "4", a], [], true]
 
   - name: "vld4{neon_type[1].nox}"
     doc: Load multiple 4-element structures to four registers
@@ -4366,14 +4378,12 @@ intrinsics:
     types:
       - ['*const i64', int64x1x4_t, i64, '*const int64x1_t']
     compose:
-      - LLVMLink:
-          name: 'vld4{neon_type[1].nox}'
-          arguments:
-            - 'ptr: {type[3]}'
-          links:
-            - link: 'llvm.aarch64.neon.ld4.v{neon_type[1].lane}{type[2]}.p0'
-              arch: aarch64,arm64ec
-      - FnCall: ['_vld4{neon_type[1].nox}', ['a as _']]
+      - FnCall:
+          - 'crate::ptr::read_unaligned'
+          - - MethodCall:
+                - a
+                - cast
+                - []
 
   - name: "vld4{neon_type[1].lane_nox}"
     doc: Load multiple 4-element structures to four registers
@@ -4744,7 +4754,7 @@ intrinsics:
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop, 'LANE = 0']]}]]
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [nop, 'LANE = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ["2"]]
-      - *neon-fp16
+      - *arm-fp16
       - *neon-unstable-f16
       - *target-not-arm64ec
     types:
@@ -4982,7 +4992,7 @@ intrinsics:
     attr:
       - *target-is-arm
       - *neon-v7
-      - *neon-fp16
+      - *arm-fp16
       - *neon-unstable-f16
       - *target-not-arm64ec
     assert_instr: [vst1]
@@ -5036,17 +5046,7 @@ intrinsics:
     types:
       - [i64, int64x1x2_t, int64x1_t]
     compose:
-      - LLVMLink:
-          name: 'vst2.{neon_type[1]}'
-          arguments:
-            - 'ptr: *mut i8'
-            - 'a: {type[2]}'
-            - 'b: {type[2]}'
-            - 'size: i32'
-          links:
-            - link: 'llvm.arm.neon.vst2.v{neon_type[1].lane}{type[0]}.p0'
-              arch: arm
-      - FnCall: ['_vst2{neon_type[1].nox}', ['a as _', 'b.0', 'b.1', '8']]
+      - FnCall: [core::ptr::write_unaligned, ['a.cast()', b]]
 
   - name: "vst2{neon_type[1].nox}"
     doc: "Store multiple 2-element structures from two registers"
@@ -5079,16 +5079,7 @@ intrinsics:
     types:
       - [i64, int64x1x2_t, int64x1_t]
     compose:
-      - LLVMLink:
-          name: 'st2.{neon_type[1]}'
-          arguments:
-            - 'a: {type[2]}'
-            - 'b: {type[2]}'
-            - 'ptr: *mut i8'
-          links:
-            - link: 'llvm.aarch64.neon.st2.v{neon_type[1].lane}{type[0]}.p0'
-              arch: aarch64,arm64ec
-      - FnCall: ['_vst2{neon_type[1].nox}', ['b.0', 'b.1', 'a as _']]
+      - FnCall: [core::ptr::write_unaligned, ['a.cast()', b]]
 
   - name: "vst2{neon_type[1].nox}"
     doc: "Store multiple 2-element structures from two registers"
@@ -5100,33 +5091,23 @@ intrinsics:
     safety:
       unsafe: [neon]
     types:
-      - [i8, int8x8x2_t, int8x8_t]
-      - [i16, int16x4x2_t, int16x4_t]
-      - [i32, int32x2x2_t, int32x2_t]
-      - [i8, int8x16x2_t, int8x16_t]
-      - [i16, int16x8x2_t, int16x8_t]
-      - [i32, int32x4x2_t, int32x4_t]
-      - [f32, float32x2x2_t, float32x2_t]
-      - [f32, float32x4x2_t, float32x4_t]
+      - [i8, int8x8x2_t, "8"]
+      - [i16, int16x4x2_t, "4"]
+      - [i32, int32x2x2_t, "2"]
+      - [i8, int8x16x2_t, "16"]
+      - [i16, int16x8x2_t, "8"]
+      - [i32, int32x4x2_t, "4"]
+      - [f32, float32x2x2_t, "2"]
+      - [f32, float32x4x2_t, "4"]
     compose:
-      - LLVMLink:
-          name: 'st2.{neon_type[1]}'
-          arguments:
-            - 'a: {type[2]}'
-            - 'b: {type[2]}'
-            - 'ptr: *mut i8'
-          links:
-            - link: 'llvm.aarch64.neon.st2.v{neon_type[1].lane}{type[0]}.p0'
-              arch: aarch64,arm64ec
-      - FnCall: ['_vst2{neon_type[1].nox}', ['b.0', 'b.1', 'a as _']]
-
+      - FnCall: ["crate::core_arch::macros::interleaving_store!", [{ Type: "{type[0]}" }, "{type[2]}", "2", a, b], [], true]
 
   - name: "vst2{neon_type[1].nox}"
     doc: "Store multiple 2-element structures from two registers"
     arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"]
     attr:
       - *target-not-arm
-      - *neon-fp16
+      - *arm-fp16
       - *neon-unstable-f16
       - *target-not-arm64ec
     assert_instr: [st2]
@@ -5217,7 +5198,7 @@ intrinsics:
       - *target-not-arm
       - FnCall: [rustc_legacy_const_generics, ['2']]
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [st2, 'LANE = 0']]}]]
-      - *neon-fp16
+      - *arm-fp16
       - *neon-unstable-f16
       - *target-not-arm64ec
     static_defs: ['const LANE: i32']
@@ -5309,7 +5290,7 @@ intrinsics:
     attr:
       - *target-is-arm
       - *neon-v7
-      - *neon-fp16
+      - *arm-fp16
       - *neon-unstable-f16
       - *target-not-arm64ec
     assert_instr: [vst2]
@@ -5376,7 +5357,7 @@ intrinsics:
       - *neon-v7
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [vst2, 'LANE = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ['2']]
-      - *neon-fp16
+      - *arm-fp16
       - *neon-unstable-f16
       - *target-not-arm64ec
     static_defs: ['const LANE: i32']
@@ -5384,7 +5365,7 @@ intrinsics:
       unsafe: [neon]
     types:
       - [f16, float16x4x2_t, '2', float16x4_t, '2']
-      - [f16, float16x8x2_t, '1', float16x8_t, '2']
+      - [f16, float16x8x2_t, '3', float16x8_t, '2']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, "{type[2]}"]]
       - LLVMLink:
@@ -5413,17 +5394,7 @@ intrinsics:
     types:
       - [i64, int64x1x3_t, int64x1_t]
     compose:
-      - LLVMLink:
-          name: 'st3.{neon_type[1].nox}'
-          arguments:
-            - 'a: {type[2]}'
-            - 'b: {type[2]}'
-            - 'c: {type[2]}'
-            - 'ptr: *mut i8'
-          links:
-            - link: 'llvm.aarch64.neon.st3.v{neon_type[1].lane}{type[0]}.p0'
-              arch: aarch64,arm64ec
-      - FnCall: ['_vst3{neon_type[1].nox}', ['b.0', 'b.1', 'b.2', 'a as _']]
+      - FnCall: [core::ptr::write_unaligned, ['a.cast()', b]]
 
   - name: "vst3{neon_type[1].nox}"
     doc: "Store multiple 3-element structures from three registers"
@@ -5458,18 +5429,7 @@ intrinsics:
     types:
       - [i64, int64x1x3_t, int64x1_t]
     compose:
-      - LLVMLink:
-          name: 'vst3.{neon_type[1]}'
-          arguments:
-            - 'ptr: *mut i8'
-            - 'a: {type[2]}'
-            - 'b: {type[2]}'
-            - 'c: {type[2]}'
-            - 'size: i32'
-          links:
-            - link: 'llvm.arm.neon.vst3.p0.v{neon_type[1].lane}{type[0]}'
-              arch: arm
-      - FnCall: ['_vst3{neon_type[1].nox}', ['a as _', 'b.0', 'b.1', 'b.2', '8']]
+      - FnCall: [core::ptr::write_unaligned, ['a.cast()', b]]
 
   - name: "vst3{neon_type[1].nox}"
     doc: "Store multiple 3-element structures from three registers"
@@ -5558,27 +5518,16 @@ intrinsics:
     safety:
       unsafe: [neon]
     types:
-      - [i8, int8x8x3_t, int8x8_t, '1']
-      - [i16, int16x4x3_t, int16x4_t, '2']
-      - [i32, int32x2x3_t, int32x2_t, '4']
-      - [i8, int8x16x3_t, int8x16_t, '1']
-      - [i16, int16x8x3_t, int16x8_t, '2']
-      - [i32, int32x4x3_t, int32x4_t, '4']
-      - [f32, float32x2x3_t, float32x2_t, '4']
-      - [f32, float32x4x3_t, float32x4_t, '4']
+      - [i8, int8x8x3_t, '8']
+      - [i16, int16x4x3_t, '4']
+      - [i32, int32x2x3_t, '2']
+      - [i8, int8x16x3_t, '16']
+      - [i16, int16x8x3_t, '8']
+      - [i32, int32x4x3_t, '4']
+      - [f32, float32x2x3_t, '2']
+      - [f32, float32x4x3_t, '4']
     compose:
-      - LLVMLink:
-          name: 'vst3.{neon_type[1]}'
-          arguments:
-            - 'ptr: *mut i8'
-            - 'a: {type[2]}'
-            - 'b: {type[2]}'
-            - 'c: {type[2]}'
-            - 'size: i32'
-          links:
-            - link: 'llvm.arm.neon.vst3.p0.v{neon_type[1].lane}{type[0]}'
-              arch: arm
-      - FnCall: ['_vst3{neon_type[1].nox}', ['a as _', 'b.0', 'b.1', 'b.2', "{type[3]}"]]
+      - FnCall: ["crate::core_arch::macros::interleaving_store!", [{ Type: "{type[0]}" }, "{type[2]}", "3", a, b], [], true]
 
 
   - name: "vst3{neon_type[1].nox}"
@@ -5587,7 +5536,7 @@ intrinsics:
     attr:
       - *target-is-arm
       - *neon-v7
-      - *neon-fp16
+      - *arm-fp16
       - *neon-unstable-f16
       - *target-not-arm64ec
     assert_instr: [vst3]
@@ -5656,7 +5605,7 @@ intrinsics:
       - *neon-v7
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [vst3, 'LANE = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ['2']]
-      - *neon-fp16
+      - *arm-fp16
       - *neon-unstable-f16
       - *target-not-arm64ec
     static_defs: ['const LANE: i32']
@@ -5690,34 +5639,23 @@ intrinsics:
     safety:
       unsafe: [neon]
     types:
-      - [i8, int8x8x3_t, int8x8_t]
-      - [i16, int16x4x3_t, int16x4_t]
-      - [i32, int32x2x3_t, int32x2_t]
-      - [i8, int8x16x3_t, int8x16_t]
-      - [i16, int16x8x3_t, int16x8_t]
-      - [i32, int32x4x3_t, int32x4_t]
-      - [f32, float32x2x3_t, float32x2_t]
-      - [f32, float32x4x3_t, float32x4_t]
+      - [i8, int8x8x3_t, "8"]
+      - [i16, int16x4x3_t, "4"]
+      - [i32, int32x2x3_t, "2"]
+      - [i8, int8x16x3_t, "16"]
+      - [i16, int16x8x3_t, "8"]
+      - [i32, int32x4x3_t, "4"]
+      - [f32, float32x2x3_t, "2"]
+      - [f32, float32x4x3_t, "4"]
     compose:
-      - LLVMLink:
-          name: 'vst3.{neon_type[1]}'
-          arguments:
-            - 'a: {type[2]}'
-            - 'b: {type[2]}'
-            - 'c: {type[2]}'
-            - 'ptr: *mut i8'
-          links:
-            - link: 'llvm.aarch64.neon.st3.v{neon_type[1].lane}{type[0]}.p0'
-              arch: aarch64,arm64ec
-      - FnCall: ['_vst3{neon_type[1].nox}', ['b.0', 'b.1', 'b.2', 'a as _']]
-
+      - FnCall: ["crate::core_arch::macros::interleaving_store!", [{ Type: "{type[0]}" }, "{type[2]}", "3", a, b], [], true]
 
   - name: "vst3{neon_type[1].nox}"
     doc: "Store multiple 3-element structures from three registers"
     arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"]
     attr:
       - *target-not-arm
-      - *neon-fp16
+      - *arm-fp16
       - *neon-unstable-f16
       - *target-not-arm64ec
     assert_instr: [st3]
@@ -5782,7 +5720,7 @@ intrinsics:
       - *target-not-arm
       - FnCall: [rustc_legacy_const_generics, ['2']]
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [st3, 'LANE = 0']]}]]
-      - *neon-fp16
+      - *arm-fp16
       - *neon-unstable-f16
       - *target-not-arm64ec
     static_defs: ['const LANE: i32']
@@ -5840,19 +5778,7 @@ intrinsics:
     types:
       - [i64, int64x1x4_t, int64x1_t]
     compose:
-      - LLVMLink:
-          name: 'vst4.{neon_type[1]}'
-          arguments:
-            - 'ptr: *mut i8'
-            - 'a: {type[2]}'
-            - 'b: {type[2]}'
-            - 'c: {type[2]}'
-            - 'd: {type[2]}'
-            - 'size: i32'
-          links:
-            - link: 'llvm.arm.neon.vst4.p0.v{neon_type[1].lane}{type[0]}'
-              arch: arm
-      - FnCall: ['_vst4{neon_type[1].nox}', ['a as _', 'b.0', 'b.1', 'b.2', 'b.3', '8']]
+      - FnCall: [core::ptr::write_unaligned, ['a.cast()', b]]
 
   - name: "vst4{neon_type[1].nox}"
     doc: "Store multiple 4-element structures from four registers"
@@ -5866,18 +5792,7 @@ intrinsics:
     types:
       - [i64, int64x1x4_t, int64x1_t]
     compose:
-      - LLVMLink:
-          name: 'vst4.{neon_type[1]}'
-          arguments:
-            - 'a: {type[2]}'
-            - 'b: {type[2]}'
-            - 'c: {type[2]}'
-            - 'd: {type[2]}'
-            - 'ptr: *mut i8'
-          links:
-            - link: 'llvm.aarch64.neon.st4.{neon_type[2]}.p0'
-              arch: aarch64,arm64ec
-      - FnCall: ['_vst4{neon_type[1].nox}', ['b.0', 'b.1', 'b.2', 'b.3', 'a as _']]
+      - FnCall: [core::ptr::write_unaligned, ['a.cast()', b]]
 
   - name: "vst4{neon_type[1].nox}"
     doc: "Store multiple 4-element structures from four registers"
@@ -5996,7 +5911,7 @@ intrinsics:
     attr:
       - *target-is-arm
       - *neon-v7
-      - *neon-fp16
+      - *arm-fp16
       - *neon-unstable-f16
       - *target-not-arm64ec
     assert_instr: [vst4]
@@ -6066,7 +5981,7 @@ intrinsics:
       - *neon-v7
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [vst4, 'LANE = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ['2']]
-      - *neon-fp16
+      - *arm-fp16
       - *neon-unstable-f16
       - *target-not-arm64ec
     static_defs: ['const LANE: i32']
@@ -6101,27 +6016,16 @@ intrinsics:
     safety:
       unsafe: [neon]
     types:
-      - [i8, int8x8x4_t, int8x8_t]
-      - [i16, int16x4x4_t, int16x4_t]
-      - [i32, int32x2x4_t, int32x2_t]
-      - [i8, int8x16x4_t, int8x16_t]
-      - [i16, int16x8x4_t, int16x8_t]
-      - [i32, int32x4x4_t, int32x4_t]
-      - [f32, float32x2x4_t, float32x2_t]
-      - [f32, float32x4x4_t, float32x4_t]
+      - [i8, int8x8x4_t, "8"]
+      - [i16, int16x4x4_t, "4"]
+      - [i32, int32x2x4_t, "2"]
+      - [i8, int8x16x4_t, "16"]
+      - [i16, int16x8x4_t, "8"]
+      - [i32, int32x4x4_t, "4"]
+      - [f32, float32x2x4_t, "2"]
+      - [f32, float32x4x4_t, "4"]
     compose:
-      - LLVMLink:
-          name: 'vst4.{neon_type[1]}'
-          arguments:
-            - 'a: {type[2]}'
-            - 'b: {type[2]}'
-            - 'c: {type[2]}'
-            - 'd: {type[2]}'
-            - 'ptr: *mut i8'
-          links:
-            - link: 'llvm.aarch64.neon.st4.v{neon_type[1].lane}{type[0]}.p0'
-              arch: aarch64,arm64ec
-      - FnCall: ['_vst4{neon_type[1].nox}', ['b.0', 'b.1', 'b.2', 'b.3', 'a as _']]
+      - FnCall: ["crate::core_arch::macros::interleaving_store!", [{ Type: "{type[0]}" }, "{type[2]}", "4", a, b], [], true]
 
 
   - name: "vst4{neon_type[1].nox}"
@@ -6129,7 +6033,7 @@ intrinsics:
     arguments: ["a: *mut {type[0]}", "b: {neon_type[1]}"]
     attr:
       - *target-not-arm
-      - *neon-fp16
+      - *arm-fp16
       - *neon-unstable-f16
       - *target-not-arm64ec
     assert_instr: [st4]
@@ -6196,7 +6100,7 @@ intrinsics:
       - *target-not-arm
       - FnCall: [rustc_legacy_const_generics, ['2']]
       - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [st4, 'LANE = 0']]}]]
-      - *neon-fp16
+      - *arm-fp16
       - *neon-unstable-f16
       - *target-not-arm64ec
     static_defs: ['const LANE: i32']
@@ -6230,7 +6134,7 @@ intrinsics:
       - *neon-i8mm
       - *neon-v8
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vusdot]]}]]
-      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [usdot]]}]]
+      - FnCall: [cfg_attr, [*all-neon-target-aarch64-arm64ec-little-endian, {FnCall: [assert_instr, [usdot]]}]]
       - *neon-unstable-i8mm
       - *neon-cfg-arm-unstable
     safety: safe
@@ -6246,10 +6150,10 @@ intrinsics:
             - link: "llvm.arm.neon.usdot.v{neon_type[0].lane}i32.v{neon_type[1].lane}i8"
               arch: arm
 
-  - name: "vusdot{type[0]}"
+  - name: "vusdot{neon_type[0].lane_nox}"
     doc: "Dot product index form with unsigned and signed integers"
-    arguments: ["a: {neon_type[1]}", "b: {neon_type[2]}", "c: int8x8_t"]
-    return_type: "{neon_type[1]}"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: int8x8_t"]
+    return_type: "{neon_type[0]}"
     attr:
       - *neon-i8mm
       - *neon-v8
@@ -6261,19 +6165,17 @@ intrinsics:
     static_defs: ["const LANE: i32"]
     safety: safe
     types:
-      - ['_lane_s32', int32x2_t, uint8x8_t, '[LANE as u32, LANE as u32]']
-      - ['q_lane_s32', int32x4_t, uint8x16_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [int32x2_t, uint8x8_t, '']
+      - [int32x4_t, uint8x16_t, 'q']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, '1']]
       - Let:
           - c
-          - int32x2_t
-          - FnCall: [transmute, [c]]
+          - FnCall: ['vreinterpret_s32_s8', [c]]
       - Let:
           - c
-          - "{type[1]}"
-          - FnCall: [simd_shuffle!, [c, c, "{type[3]}"]]
-      - FnCall: ["vusdot{neon_type[1].no}", [a, b, {FnCall: [transmute, [c]]}]]
+          - FnCall: ['vdup{neon_type[0].lane_nox}', [c], [LANE]]
+      - FnCall: ["vusdot{neon_type[0].no}", [a, b, {FnCall: ['vreinterpret{type[2]}_s8_s32', [c]]}]]
 
   - name: "vsudot{neon_type[0].lane_nox}"
     doc: "Dot product index form with signed and unsigned integers"
@@ -6283,26 +6185,28 @@ intrinsics:
       - *neon-i8mm
       - *neon-v8
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vsudot, 'LANE = 0']]}]]
-      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [sudot, 'LANE = 0']]}]]
+      - FnCall: [cfg_attr, [*all-neon-target-aarch64-arm64ec-little-endian, {FnCall: [assert_instr, [sudot, 'LANE = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ['3']]
       - *neon-unstable-i8mm
       - *neon-cfg-arm-unstable
     static_defs: ["const LANE: i32"]
     safety: safe
     types:
-      - [int32x2_t, int8x8_t, uint8x8_t, '[LANE as u32, LANE as u32]', uint32x2_t]
-      - [int32x4_t, int8x16_t, uint8x8_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]', uint32x4_t]
+      - [int32x2_t, int8x8_t, uint8x8_t, uint32x2_t, '']
+      - [int32x4_t, int8x16_t, uint8x8_t, uint32x4_t, 'q']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, '1']]
       - Let:
           - c
-          - uint32x2_t
-          - FnCall: [transmute, [c]]
+          - FnCall: ['vreinterpret_u32_u8', [c]]
       - Let:
           - c
-          - "{type[4]}"
-          - FnCall: [simd_shuffle!, [c, c, "{type[3]}"]]
-      - FnCall: ["vusdot{neon_type[0].no}", [a, {FnCall: [transmute, [c]]}, b]]
+          - FnCall: ['vdup{neon_type[3].lane_nox}', [c], [LANE]]
+      - FnCall: 
+          - "vusdot{neon_type[0].no}"
+          - - a
+            - FnCall: ['vreinterpret{type[4]}_u8_u32', [c]]
+            - b
 
   - name: "vmul{neon_type[1].no}"
     doc: Multiply
@@ -6358,7 +6262,8 @@ intrinsics:
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vmul.{type[0]}"']]}]]
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fmul]]}]]
       - *neon-fp16
-      - *neon-unstable-f16
+      - *neon-not-arm-stable-fp16
+      - *neon-cfg-arm-unstable
       - *target-not-arm64ec
     safety: safe
     types:
@@ -6382,20 +6287,20 @@ intrinsics:
     static_defs: ["const LANE: i32"]
     safety: safe
     types:
-      - [int16x4_t, int16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [int16x8_t, int16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [int32x2_t, int32x2_t, '1', '[LANE as u32, LANE as u32]']
-      - [int32x4_t, int32x2_t, '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [uint16x4_t, uint16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [uint16x8_t, uint16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [uint32x2_t, uint32x2_t, '1', '[LANE as u32, LANE as u32]']
-      - [uint32x4_t, uint32x2_t, '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [int16x4_t, int16x4_t, '2']
+      - [int16x8_t, int16x4_t, '2']
+      - [int32x2_t, int32x2_t, '1']
+      - [int32x4_t, int32x2_t, '1']
+      - [uint16x4_t, uint16x4_t, '2']
+      - [uint16x8_t, uint16x4_t, '2']
+      - [uint32x2_t, uint32x2_t, '1']
+      - [uint32x4_t, uint32x2_t, '1']
     compose:
       - FnCall: ["static_assert_uimm_bits!", [LANE, "{type[2]}"]]
       - FnCall:
           - simd_mul
           - - a
-            - FnCall: ["simd_shuffle!", [b, b, "{type[3]}"]]
+            - FnCall: ["vdup{neon_type[0].lane_nox}", [b], [LANE]]
 
 
   - name: "vmul{neon_type[0].lane_nox}"
@@ -6408,19 +6313,20 @@ intrinsics:
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fmul, 'LANE = 1']]}]]
       - FnCall: [rustc_legacy_const_generics, ['2']]
       - *neon-fp16
-      - *neon-unstable-f16
+      - *neon-not-arm-stable-fp16
+      - *neon-cfg-arm-unstable
       - *target-not-arm64ec
     static_defs: ["const LANE: i32"]
     safety: safe
     types:
-      - [float16x4_t, float16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [float16x8_t, float16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [float16x4_t, float16x4_t, '2']
+      - [float16x8_t, float16x4_t, '2']
     compose:
       - FnCall: ["static_assert_uimm_bits!", [LANE, "{type[2]}"]]
       - FnCall:
           - simd_mul
           - - a
-            - FnCall: ["simd_shuffle!", [v, v, "{type[3]}"]]
+            - FnCall: ["vdup{neon_type[0].lane_nox}", [v], [LANE]]
 
 
   - name: "vmul{neon_type[0].laneq_nox}"
@@ -6437,20 +6343,20 @@ intrinsics:
     static_defs: ["const LANE: i32"]
     safety: safe
     types:
-      - [int16x4_t, int16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [int16x8_t, int16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [int32x2_t, int32x4_t, '2', '[LANE as u32, LANE as u32]']
-      - [int32x4_t, int32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [uint16x4_t, uint16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [uint16x8_t, uint16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [uint32x2_t, uint32x4_t, '2', '[LANE as u32, LANE as u32]']
-      - [uint32x4_t, uint32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [int16x4_t, int16x8_t, '3']
+      - [int16x8_t, int16x8_t, '3']
+      - [int32x2_t, int32x4_t, '2']
+      - [int32x4_t, int32x4_t, '2']
+      - [uint16x4_t, uint16x8_t, '3']
+      - [uint16x8_t, uint16x8_t, '3']
+      - [uint32x2_t, uint32x4_t, '2']
+      - [uint32x4_t, uint32x4_t, '2']
     compose:
       - FnCall: ["static_assert_uimm_bits!", [LANE, "{type[2]}"]]
       - FnCall:
           - simd_mul
           - - a
-            - FnCall: ["simd_shuffle!", [b, b, "{type[3]}"]]
+            - FnCall: ["vdup{neon_type[0].laneq_nox}", [b], [LANE]]
 
   - name: "vmull{neon_type[1].no}"
     doc: Signed multiply long
@@ -6468,13 +6374,10 @@ intrinsics:
       - ["s16", int16x4_t, int32x4_t]
       - ["s32", int32x2_t, int64x2_t]
     compose:
-      - LLVMLink:
-          name: "smull.{neon_type[1]}"
-          links:
-            - link: "llvm.aarch64.neon.smull.{neon_type[2]}"
-              arch: aarch64,arm64ec
-            - link: "llvm.arm.neon.vmulls.{neon_type[2]}"
-              arch: arm
+      - FnCall:
+          - simd_mul
+          - - FnCall: ['simd_cast', [a]]
+            - FnCall: ['simd_cast', [b]]
 
   - name: "vmull{neon_type[1].no}"
     doc: "Unsigned multiply long"
@@ -6492,13 +6395,10 @@ intrinsics:
       - ["u16", uint16x4_t, uint32x4_t]
       - ["u32", uint32x2_t, uint64x2_t]
     compose:
-      - LLVMLink:
-          name: "smull.{neon_type[1]}"
-          links:
-            - link: "llvm.aarch64.neon.umull.{neon_type[2]}"
-              arch: aarch64,arm64ec
-            - link: "llvm.arm.neon.vmullu.{neon_type[2]}"
-              arch: arm
+      - FnCall:
+          - simd_mul
+          - - FnCall: ['simd_cast', [a]]
+            - FnCall: ['simd_cast', [b]]
 
   - name: "vmull{neon_type[1].no}"
     doc: "Polynomial multiply long"
@@ -6612,7 +6512,8 @@ intrinsics:
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vfma]]}]]
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fmla]]}]]
       - *neon-fp16
-      - *neon-unstable-f16
+      - *neon-not-arm-stable-fp16
+      - *neon-cfg-arm-unstable
       - *target-not-arm64ec
     safety: safe
     types:
@@ -6722,7 +6623,8 @@ intrinsics:
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vsub.{type[0]}"']]}]]
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fsub]]}]]
       - *neon-fp16
-      - *neon-unstable-f16
+      - *neon-not-arm-stable-fp16
+      - *neon-cfg-arm-unstable
       - *target-not-arm64ec
     safety: safe
     types:
@@ -6741,7 +6643,8 @@ intrinsics:
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vadd.f16"']]}]]
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fadd]]}]]
       - *neon-fp16
-      - *neon-unstable-f16
+      - *neon-not-arm-stable-fp16
+      - *neon-cfg-arm-unstable
       - *target-not-arm64ec
     safety: safe
     types:
@@ -6884,23 +6787,22 @@ intrinsics:
     attr:
       - *neon-v7
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ["vsubhn"]]}]]
-      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [subhn2]]}]]
+      - FnCall: [cfg_attr, [*all-neon-target-aarch64-arm64ec-little-endian, {FnCall: [assert_instr, [subhn2]]}]]
       - *neon-not-arm-stable
       - *neon-cfg-arm-unstable
     safety: safe
     types:
-      - [int8x8_t, int16x8_t, int8x16_t, '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
-      - [int16x4_t, int32x4_t, int16x8_t, '[0, 1, 2, 3, 4, 5, 6, 7]']
-      - [int32x2_t, int64x2_t, int32x4_t, '[0, 1, 2, 3]']
-      - [uint8x8_t, uint16x8_t, uint8x16_t, '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
-      - [uint16x4_t, uint32x4_t, uint16x8_t, '[0, 1, 2, 3, 4, 5, 6, 7]']
-      - [uint32x2_t, uint64x2_t, uint32x4_t, '[0, 1, 2, 3]']
+      - [int8x8_t, int16x8_t, int8x16_t]
+      - [int16x4_t, int32x4_t, int16x8_t]
+      - [int32x2_t, int64x2_t, int32x4_t]
+      - [uint8x8_t, uint16x8_t, uint8x16_t]
+      - [uint16x4_t, uint32x4_t, uint16x8_t]
+      - [uint32x2_t, uint64x2_t, uint32x4_t]
     compose:
       - Let:
           - d
-          - "{neon_type[0]}"
           - FnCall: ["vsubhn{neon_type[1].noq}", [b, c]]
-      - FnCall: [simd_shuffle!, [a, d, "{type[3]}"]]
+      - FnCall: ['vcombine_{neon_type[0]}', [a, d]]
 
   - name: "vhsub{neon_type[1].no}"
     doc: "Signed halving subtract"
@@ -7050,6 +6952,118 @@ intrinsics:
           - FnCall: [simd_cast, [b]]
       - FnCall: [simd_sub, [c, d]]
 
+  - name: "vusdot{neon_type[0].laneq_nox}"
+    doc: "Dot product index form with unsigned and signed integers"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - *neon-v8
+      - *neon-i8mm
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vusdot, 'LANE = 3']]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [usdot, 'LANE = 3']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['3']]
+      - FnCall: [unstable, ['feature = "stdarch_neon_i8mm"', 'issue = "117223"']]
+    static_defs: ["const LANE: i32"]
+    safety: safe
+    types:
+      - [int32x2_t, uint8x8_t, int8x16_t, '']
+      - [int32x4_t, uint8x16_t, int8x16_t, 'q']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [LANE, '2']]
+      - Let: [c, {FnCall: [vreinterpretq_s32_s8, [c]]}]
+      - Let: [c, {FnCall: ['vdup{neon_type[0].laneq_nox}', [c], [LANE]]}]
+      - FnCall: ["vusdot{neon_type[0].no}", [a, b, {FnCall: ['vreinterpret{type[3]}_s8_s32', [c]]}]] 
+
+  - name: "vsudot{neon_type[0].laneq_nox}"
+    doc: "Dot product index form with signed and unsigned integers"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - *neon-v8
+      - *neon-i8mm
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vsudot, 'LANE = 1']]}]]
+      - FnCall: [cfg_attr, [*all-neon-target-aarch64-arm64ec-little-endian, {FnCall: [assert_instr, [sudot, 'LANE = 3']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['3']]
+      - FnCall: [unstable, ['feature = "stdarch_neon_i8mm"', 'issue = "117223"']]
+    static_defs: ["const LANE: i32"]
+    safety: safe
+    types:
+      - [int32x2_t, int8x8_t, uint8x16_t, uint32x2_t, '']
+      - [int32x4_t, int8x16_t, uint8x16_t, uint32x4_t, 'q']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [LANE, 2]]
+      - Let:
+          - c
+          - FnCall: [vreinterpretq_u32_u8, [c]]
+      - Let:
+          - c
+          - FnCall: ['vdup{neon_type[3].laneq_nox}', [c], [LANE]]
+      - FnCall: 
+          - "vusdot{neon_type[0].no}"
+          - - a
+            - FnCall: ['vreinterpret{type[4]}_u8_u32', [c]]
+            - b
+
+  - name: "vdot{neon_type[0].laneq_nox}"
+    doc: Dot product arithmetic (indexed)
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
+    return_type: "{neon_type[0]}"
+    static_defs: ["const LANE: i32"]
+    attr:
+      - *neon-v8
+      - FnCall: [target_feature, ['enable = "neon,dotprod"']]
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vsdot, 'LANE = 0']]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [sdot, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['3']]
+      - FnCall: [unstable, ['feature = "stdarch_neon_dotprod"', 'issue = "117224"']]
+    safety: safe
+    types:
+      - [int32x2_t, int8x8_t, int8x16_t, '']
+      - [int32x4_t, int8x16_t, int8x16_t, 'q']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [LANE, '2']]
+      - Let:
+          - c
+          - FnCall: [vreinterpretq_s32_s8, [c]]
+      - Let:
+          - c
+          - FnCall: ['vdup{neon_type[0].laneq_nox}', [c], [LANE]]
+      - FnCall:
+          - "vdot{neon_type[0].no}"
+          - - a
+            - b
+            - FnCall: ['vreinterpret{type[3]}_s8_s32', [c]]
+
+  - name: "vdot{neon_type[0].laneq_nox}"
+    doc: Dot product arithmetic (indexed)
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[2]}"]
+    return_type: "{neon_type[0]}"
+    static_defs: ["const LANE: i32"]
+    attr:
+      - *neon-v8
+      - FnCall: [target_feature, ['enable = "neon,dotprod"']]
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vudot, 'LANE = 0']]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [udot, 'LANE = 0']]}]]
+      - FnCall: [rustc_legacy_const_generics, ['3']]
+      - FnCall: [unstable, ['feature = "stdarch_neon_dotprod"', 'issue = "117224"']]
+    safety: safe
+    types:
+      - [uint32x2_t, uint8x8_t, uint8x16_t, '']
+      - [uint32x4_t, uint8x16_t, uint8x16_t, 'q']
+    compose:
+      - FnCall: [static_assert_uimm_bits!, [LANE, '2']]
+      - Let:
+          - c
+          - FnCall: ['vreinterpretq_u32_u8', [c]]
+      - Let:
+          - c
+          - FnCall: ['vdup{neon_type[0].laneq_nox}', [c], [LANE]]
+      - FnCall:
+          - "vdot{neon_type[0].no}"
+          - - a
+            - b
+            - FnCall: ['vreinterpret{type[3]}_u8_u32', [c]]
+
   - name: "vdot{neon_type[0].no}"
     doc: Dot product arithmetic (vector)
     arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}", "c: {neon_type[1]}"]
@@ -7113,23 +7127,21 @@ intrinsics:
       - *neon-cfg-arm-unstable
     safety: safe
     types:
-      - [int32x2_t, int8x8_t, int8x8_t, int32x2_t, '[LANE as u32, LANE as u32]']
-      - [int32x4_t, int8x16_t, int8x8_t, int32x2_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [int32x2_t, int8x8_t, int8x8_t, '']
+      - [int32x4_t, int8x16_t, int8x8_t, 'q']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, '1']]
       - Let:
           - c
-          - "{neon_type[3]}"
-          - FnCall: [transmute, [c]]
+          - FnCall: ['vreinterpret_s32_s8', [c]]
       - Let:
           - c
-          - "{neon_type[0]}"
-          - FnCall: [simd_shuffle!, [c, c, '{type[4]}']]
+          - FnCall: ['vdup{neon_type[0].lane_nox}', [c], [LANE]]
       - FnCall:
           - "vdot{neon_type[0].no}"
           - - a
             - b
-            - FnCall: [transmute, [c]]
+            - FnCall: ['vreinterpret{type[3]}_s8_s32', [c]]
 
   - name: "vdot{neon_type[0].lane_nox}"
     doc: Dot product arithmetic (indexed)
@@ -7146,23 +7158,21 @@ intrinsics:
       - *neon-cfg-arm-unstable
     safety: safe
     types:
-      - [uint32x2_t, uint8x8_t, uint8x8_t, uint32x2_t, '[LANE as u32, LANE as u32]']
-      - [uint32x4_t, uint8x16_t, uint8x8_t, uint32x2_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [uint32x2_t, uint8x8_t, uint8x8_t, '']
+      - [uint32x4_t, uint8x16_t, uint8x8_t, 'q']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, '1']]
       - Let:
           - c
-          - "{neon_type[3]}"
-          - FnCall: [transmute, [c]]
+          - FnCall: ['vreinterpret_u32_u8', [c]]
       - Let:
           - c
-          - "{neon_type[0]}"
-          - FnCall: [simd_shuffle!, [c, c, '{type[4]}']]
+          - FnCall: ['vdup{neon_type[0].lane_nox}', [c], [LANE]]
       - FnCall:
           - "vdot{neon_type[0].no}"
           - - a
             - b
-            - FnCall: [transmute, [c]]
+            - FnCall: ['vreinterpret{type[3]}_u8_u32', [c]]
 
   - name: "vmax{neon_type.no}"
     doc: Maximum (vector)
@@ -7241,7 +7251,8 @@ intrinsics:
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vmax]]}]]
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fmax]]}]]
       - *neon-fp16
-      - *neon-unstable-f16
+      - *neon-not-arm-stable-fp16
+      - *neon-cfg-arm-unstable
       - *target-not-arm64ec
     safety: safe
     types:
@@ -7272,7 +7283,13 @@ intrinsics:
       - float32x2_t
       - float32x4_t
     compose:
-      - FnCall: [simd_fmax, [a, b]]
+      - LLVMLink:
+          name: "fmaxnm.{neon_type}"
+          links:
+            - link: "llvm.arm.neon.vmaxnm.{neon_type}"
+              arch: arm
+            - link: "llvm.aarch64.neon.fmaxnm.{neon_type}"
+              arch: aarch64,arm64ec
 
 
   - name: "vmaxnm{neon_type.no}"
@@ -7284,14 +7301,21 @@ intrinsics:
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vmaxnm]]}]]
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fmaxnm]]}]]
       - *neon-fp16
-      - *neon-unstable-f16
+      - *neon-not-arm-stable-fp16
+      - *neon-cfg-arm-unstable
       - *target-not-arm64ec
     safety: safe
     types:
       - float16x4_t
       - float16x8_t
     compose:
-      - FnCall: [simd_fmax, [a, b]]
+      - LLVMLink:
+          name: "fmaxnm.{neon_type}"
+          links:
+            - link: "llvm.arm.neon.vmaxnm.{neon_type}"
+              arch: arm
+            - link: "llvm.aarch64.neon.fmaxnm.{neon_type}"
+              arch: aarch64,arm64ec
 
 
   - name: "vminnm{neon_type.no}"
@@ -7303,14 +7327,21 @@ intrinsics:
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vminnm]]}]]
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fminnm]]}]]
       - *neon-fp16
-      - *neon-unstable-f16
+      - *neon-not-arm-stable-fp16
+      - *neon-cfg-arm-unstable
       - *target-not-arm64ec
     safety: safe
     types:
       - float16x4_t
       - float16x8_t
     compose:
-      - FnCall: [simd_fmin, [a, b]]
+      - LLVMLink:
+          name: "fminnm.{neon_type}"
+          links:
+            - link: "llvm.arm.neon.vminnm.{neon_type}"
+              arch: arm
+            - link: "llvm.aarch64.neon.fminnm.{neon_type}"
+              arch: aarch64,arm64ec
 
 
   - name: "vmin{neon_type.no}"
@@ -7390,7 +7421,8 @@ intrinsics:
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vmin]]}]]
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fmin]]}]]
       - *neon-fp16
-      - *neon-unstable-f16
+      - *neon-not-arm-stable-fp16
+      - *neon-cfg-arm-unstable
       - *target-not-arm64ec
     safety: safe
     types:
@@ -7422,7 +7454,13 @@ intrinsics:
       - float32x2_t
       - float32x4_t
     compose:
-      - FnCall: [simd_fmin, [a, b]]
+      - LLVMLink:
+          name: "fminnm.{neon_type}"
+          links:
+            - link: "llvm.arm.neon.vminnm.{neon_type}"
+              arch: arm
+            - link: "llvm.aarch64.neon.fminnm.{neon_type}"
+              arch: aarch64,arm64ec
 
   - name: "vpadd{neon_type.no}"
     doc: Floating-point add pairwise
@@ -7455,7 +7493,8 @@ intrinsics:
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vpadd]]}]]
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [faddp]]}]]
       - *neon-fp16
-      - *neon-unstable-f16
+      - *neon-not-arm-stable-fp16
+      - *neon-cfg-arm-unstable
       - *target-not-arm64ec
     safety: safe
     types:
@@ -7524,10 +7563,10 @@ intrinsics:
     static_defs: ['const N: i32']
     safety: safe
     types:
-      - [int16x4_t, int16x4_t, int32x4_t, '[N as u32, N as u32, N as u32, N as u32]']
+      - [int16x4_t, int16x4_t, int32x4_t, '[N as u32; 4]']
     compose:
       - FnCall: [static_assert_uimm_bits!, [N, '2']]
-      - Let: [b, "{neon_type[0]}", {FnCall: [simd_shuffle!, [b, b, "{type[3]}"]]}]
+      - Let: [b, {FnCall: ['vdup{neon_type[0].lane_nox}', [b], [N]]}]
       - FnCall: [vqdmull_s16, [a, b]]
 
   - name: "vqdmull_lane_s32"
@@ -7544,10 +7583,10 @@ intrinsics:
     static_defs: ['const N: i32']
     safety: safe
     types:
-      - [int32x2_t, int32x2_t, int64x2_t, '[N as u32, N as u32]']
+      - [int32x2_t, int32x2_t, int64x2_t]
     compose:
       - FnCall: [static_assert_uimm_bits!, [N, '1']]
-      - Let: [b, "{neon_type[0]}", {FnCall: [simd_shuffle!, [b, b, "{type[3]}"]]}]
+      - Let: [b, {FnCall: ['vdup{neon_type[0].lane_nox}', [b], [N]]}]
       - FnCall: [vqdmull_s32, [a, b]]
 
   - name: "vqdmlal{neon_type[1].noq}"
@@ -7591,7 +7630,7 @@ intrinsics:
     attr:
       - *neon-v7
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vqdmlal, N = 2]]}]]
-      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [sqdmlal, 'N = 2']]}]]
+      - FnCall: [cfg_attr, [*all-neon-target-aarch64-arm64ec-little-endian, {FnCall: [assert_instr, [sqdmlal, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['3']]
       - *neon-not-arm-stable
       - *neon-cfg-arm-unstable
@@ -7610,7 +7649,7 @@ intrinsics:
     attr:
       - *neon-v7
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vqdmlal, N = 1]]}]]
-      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [sqdmlal, 'N = 1']]}]]
+      - FnCall: [cfg_attr, [*all-neon-target-aarch64-arm64ec-little-endian, {FnCall: [assert_instr, [sqdmlal, 'N = 1']]}]]
       - FnCall: [rustc_legacy_const_generics, ['3']]
       - *neon-not-arm-stable
       - *neon-cfg-arm-unstable
@@ -7663,7 +7702,7 @@ intrinsics:
     attr:
       - *neon-v7
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vqdmlsl, N = 2]]}]]
-      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [sqdmlsl, 'N = 2']]}]]
+      - FnCall: [cfg_attr, [*all-neon-target-aarch64-arm64ec-little-endian, {FnCall: [assert_instr, [sqdmlsl, 'N = 2']]}]]
       - FnCall: [rustc_legacy_const_generics, ['3']]
       - *neon-not-arm-stable
       - *neon-cfg-arm-unstable
@@ -7682,7 +7721,7 @@ intrinsics:
     attr:
       - *neon-v7
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vqdmlsl, N = 1]]}]]
-      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [sqdmlsl, 'N = 1']]}]]
+      - FnCall: [cfg_attr, [*all-neon-target-aarch64-arm64ec-little-endian, {FnCall: [assert_instr, [sqdmlsl, 'N = 1']]}]]
       - FnCall: [rustc_legacy_const_generics, ['3']]
       - *neon-not-arm-stable
       - *neon-cfg-arm-unstable
@@ -8169,9 +8208,9 @@ intrinsics:
     static_defs: ['const N: i32']
     safety: safe
     types:
-      - [uint16x8_t, uint8x8_t, 'N >= 1 && N <= 8', 'const { uint16x8_t([-N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16]) }']
-      - [uint32x4_t, uint16x4_t, 'N >= 1 && N <= 16', 'const { uint32x4_t([-N as u32, -N as u32, -N as u32, -N as u32]) }']
-      - [uint64x2_t, uint32x2_t, 'N >= 1 && N <= 32', 'const { uint64x2_t([-N as u64, -N as u64]) }']
+      - [uint16x8_t, uint8x8_t, 'N >= 1 && N <= 8', 'const { uint16x8_t([-N as u16; 8]) }']
+      - [uint32x4_t, uint16x4_t, 'N >= 1 && N <= 16', 'const { uint32x4_t([-N as u32; 4]) }']
+      - [uint64x2_t, uint32x2_t, 'N >= 1 && N <= 32', 'const { uint64x2_t([-N as u64; 2]) }']
     compose:
       - FnCall: [static_assert!, ["{type[2]}"]]
       - LLVMLink:
@@ -8299,7 +8338,8 @@ intrinsics:
       - *neon-fp16
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vrsqrts]]}]]
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [frsqrts]]}]]
-      - *neon-unstable-f16
+      - *neon-not-arm-stable-fp16
+      - *neon-cfg-arm-unstable
       - *target-not-arm64ec
     safety: safe
     types:
@@ -8348,7 +8388,8 @@ intrinsics:
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vrecpe]]}]]
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [frecpe]]}]]
       - *neon-fp16
-      - *neon-unstable-f16
+      - *neon-not-arm-stable-fp16
+      - *neon-cfg-arm-unstable
       - *target-not-arm64ec
     safety: safe
     types:
@@ -8397,7 +8438,8 @@ intrinsics:
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vrecps]]}]]
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [frecps]]}]]
       - *neon-fp16
-      - *neon-unstable-f16
+      - *neon-not-arm-stable-fp16
+      - *neon-cfg-arm-unstable
       - *target-not-arm64ec
     safety: safe
     types:
@@ -8426,60 +8468,18 @@ intrinsics:
       - *neon-cfg-arm-unstable
     safety: safe
     types:
-      - [poly64x1_t, int32x2_t]
-      - [poly64x1_t, uint32x2_t]
-      - [poly64x2_t, int32x4_t]
-      - [poly64x2_t, uint32x4_t]
       - [p128, int64x2_t]
       - [p128, uint64x2_t]
       - [p128, poly64x2_t]
-      - [poly8x16_t, p128]
       - [p128, int8x16_t]
       - [p128, uint8x16_t]
       - [p128, poly8x16_t]
-      - [int32x2_t, poly64x1_t]
-      - [uint32x2_t, poly64x1_t]
-      - [int32x4_t, poly64x2_t]
-      - [uint32x4_t, poly64x2_t]
-      - [int64x2_t, p128]
-      - [uint64x2_t, p128]
       - [poly64x2_t, p128]
-      - [poly64x1_t, int16x4_t]
-      - [poly64x1_t, uint16x4_t]
-      - [poly64x1_t, poly16x4_t]
-      - [poly64x2_t, int16x8_t]
-      - [poly64x2_t, uint16x8_t]
-      - [poly64x2_t, poly16x8_t]
       - [p128, int32x4_t]
       - [p128, uint32x4_t]
-      - [poly16x4_t, poly64x1_t]
-      - [int16x4_t, poly64x1_t]
-      - [uint16x4_t, poly64x1_t]
-      - [poly16x8_t, poly64x2_t]
-      - [int16x8_t, poly64x2_t]
-      - [uint16x8_t, poly64x2_t]
-      - [int32x4_t, p128]
-      - [uint32x4_t, p128]
-      - [poly64x1_t, int8x8_t]
-      - [poly64x1_t, uint8x8_t]
-      - [poly64x1_t, poly8x8_t]
-      - [poly64x2_t, int8x16_t]
-      - [poly64x2_t, uint8x16_t]
-      - [poly64x2_t, poly8x16_t]
       - [p128, int16x8_t]
       - [p128, uint16x8_t]
       - [p128, poly16x8_t]
-      - [poly8x8_t, poly64x1_t]
-      - [int8x8_t, poly64x1_t]
-      - [uint8x8_t, poly64x1_t]
-      - [poly8x16_t, poly64x2_t]
-      - [int8x16_t, poly64x2_t]
-      - [uint8x16_t, poly64x2_t]
-      - [int16x8_t, p128]
-      - [uint16x8_t, p128]
-      - [poly16x8_t, p128]
-      - [int8x16_t, p128]
-      - [uint8x16_t, p128]
     compose:
       - FnCall: [transmute, [a]]
 
@@ -8494,39 +8494,50 @@ intrinsics:
       - *neon-not-arm-stable
       - *neon-cfg-arm-unstable
     safety: safe
+    big_endian_inverse: true
     types:
-      - [uint8x8_t, int8x8_t]
-      - [poly8x8_t, int8x8_t]
-      - [poly16x4_t, int16x4_t]
-      - [uint16x4_t, int16x4_t]
-      - [uint32x2_t, int32x2_t]
-      - [uint64x1_t, int64x1_t]
-      - [uint8x16_t, int8x16_t]
-      - [poly8x16_t, int8x16_t]
-      - [poly16x8_t, int16x8_t]
-      - [uint16x8_t, int16x8_t]
-      - [uint32x4_t, int32x4_t]
-      - [uint64x2_t, int64x2_t]
-      - [poly8x8_t, uint8x8_t]
-      - [int8x8_t, uint8x8_t]
-      - [poly16x4_t, uint16x4_t]
-      - [int16x4_t, uint16x4_t]
-      - [int32x2_t, uint32x2_t]
-      - [int64x1_t, uint64x1_t]
-      - [poly8x16_t, uint8x16_t]
-      - [int8x16_t, uint8x16_t]
-      - [poly16x8_t, uint16x8_t]
-      - [int16x8_t, uint16x8_t]
-      - [int32x4_t, uint32x4_t]
-      - [int64x2_t, uint64x2_t]
-      - [int8x8_t, poly8x8_t]
-      - [uint8x8_t, poly8x8_t]
-      - [int16x4_t, poly16x4_t]
-      - [uint16x4_t, poly16x4_t]
-      - [int8x16_t, poly8x16_t]
-      - [uint8x16_t, poly8x16_t]
-      - [int16x8_t, poly16x8_t]
-      - [uint16x8_t, poly16x8_t]
+      - [poly64x1_t, int32x2_t]
+      - [poly64x1_t, uint32x2_t]
+      - [poly8x16_t, p128]
+      - [int32x2_t, poly64x1_t]
+      - [uint32x2_t, poly64x1_t]
+      - [int32x4_t, poly64x2_t]
+      - [uint32x4_t, poly64x2_t]
+      - [int64x2_t, p128]
+      - [uint64x2_t, p128]
+      - [poly64x1_t, int16x4_t]
+      - [poly64x1_t, uint16x4_t]
+      - [poly64x1_t, poly16x4_t]
+      - [poly64x2_t, poly16x8_t]
+      - [poly16x4_t, poly64x1_t]
+      - [int16x4_t, poly64x1_t]
+      - [uint16x4_t, poly64x1_t]
+      - [poly16x8_t, poly64x2_t]
+      - [int16x8_t, poly64x2_t]
+      - [uint16x8_t, poly64x2_t]
+      - [int32x4_t, p128]
+      - [uint32x4_t, p128]
+      - [poly64x1_t, int8x8_t]
+      - [poly64x1_t, uint8x8_t]
+      - [poly64x1_t, poly8x8_t]
+      - [poly64x2_t, poly8x16_t]
+      - [poly8x8_t, poly64x1_t]
+      - [int8x8_t, poly64x1_t]
+      - [uint8x8_t, poly64x1_t]
+      - [poly8x16_t, poly64x2_t]
+      - [int8x16_t, poly64x2_t]
+      - [uint8x16_t, poly64x2_t]
+      - [int16x8_t, p128]
+      - [uint16x8_t, p128]
+      - [poly16x8_t, p128]
+      - [int8x16_t, p128]
+      - [uint8x16_t, p128]
+      - [poly64x2_t, int32x4_t]
+      - [poly64x2_t, uint32x4_t]
+      - [poly64x2_t, int16x8_t]
+      - [poly64x2_t, uint16x8_t]
+      - [poly64x2_t, int8x16_t]
+      - [poly64x2_t, uint8x16_t]
       - [int16x4_t, int8x8_t]
       - [uint16x4_t, int8x8_t]
       - [poly16x4_t, int8x8_t]
@@ -8677,19 +8688,15 @@ intrinsics:
       - [uint8x16_t, uint64x2_t]
       - [float32x2_t, int8x8_t]
       - [float32x2_t, int16x4_t]
-      - [float32x2_t, int32x2_t]
       - [float32x2_t, int64x1_t]
       - [float32x4_t, int8x16_t]
       - [float32x4_t, int16x8_t]
-      - [float32x4_t, int32x4_t]
       - [float32x4_t, int64x2_t]
       - [float32x2_t, uint8x8_t]
       - [float32x2_t, uint16x4_t]
-      - [float32x2_t, uint32x2_t]
       - [float32x2_t, uint64x1_t]
       - [float32x4_t, uint8x16_t]
       - [float32x4_t, uint16x8_t]
-      - [float32x4_t, uint32x4_t]
       - [float32x4_t, uint64x2_t]
       - [float32x2_t, poly8x8_t]
       - [float32x2_t, poly16x4_t]
@@ -8698,19 +8705,15 @@ intrinsics:
       - [float32x4_t, p128]
       - [int8x8_t, float32x2_t]
       - [int16x4_t, float32x2_t]
-      - [int32x2_t, float32x2_t]
       - [int64x1_t, float32x2_t]
       - [int8x16_t, float32x4_t]
       - [int16x8_t, float32x4_t]
-      - [int32x4_t, float32x4_t]
       - [int64x2_t, float32x4_t]
       - [uint8x8_t, float32x2_t]
       - [uint16x4_t, float32x2_t]
-      - [uint32x2_t, float32x2_t]
       - [uint64x1_t, float32x2_t]
       - [uint8x16_t, float32x4_t]
       - [uint16x8_t, float32x4_t]
-      - [uint32x4_t, float32x4_t]
       - [uint64x2_t, float32x4_t]
       - [poly8x8_t, float32x2_t]
       - [poly16x4_t, float32x2_t]
@@ -8720,6 +8723,60 @@ intrinsics:
     compose:
       - FnCall: [transmute, [a]]
 
+  - name: "vreinterpret{neon_type[1].no}{neon_type[0].noq}"
+    doc: Vector reinterpret cast operation
+    arguments: ["a: {type[0]}"]
+    return_type: "{type[1]}"
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [nop]]}]]
+      - *neon-not-arm-stable
+      - *neon-cfg-arm-unstable
+    safety: safe
+    types:
+      - [uint8x8_t, int8x8_t]
+      - [poly8x8_t, int8x8_t]
+      - [poly16x4_t, int16x4_t]
+      - [uint16x4_t, int16x4_t]
+      - [uint32x2_t, int32x2_t]
+      - [uint64x1_t, int64x1_t]
+      - [uint8x16_t, int8x16_t]
+      - [poly8x16_t, int8x16_t]
+      - [poly16x8_t, int16x8_t]
+      - [uint16x8_t, int16x8_t]
+      - [uint32x4_t, int32x4_t]
+      - [uint64x2_t, int64x2_t]
+      - [poly8x8_t, uint8x8_t]
+      - [int8x8_t, uint8x8_t]
+      - [poly16x4_t, uint16x4_t]
+      - [int16x4_t, uint16x4_t]
+      - [int32x2_t, uint32x2_t]
+      - [int64x1_t, uint64x1_t]
+      - [poly8x16_t, uint8x16_t]
+      - [int8x16_t, uint8x16_t]
+      - [poly16x8_t, uint16x8_t]
+      - [int16x8_t, uint16x8_t]
+      - [int32x4_t, uint32x4_t]
+      - [int64x2_t, uint64x2_t]
+      - [int8x8_t, poly8x8_t]
+      - [uint8x8_t, poly8x8_t]
+      - [int16x4_t, poly16x4_t]
+      - [uint16x4_t, poly16x4_t]
+      - [int8x16_t, poly8x16_t]
+      - [uint8x16_t, poly8x16_t]
+      - [int16x8_t, poly16x8_t]
+      - [uint16x8_t, poly16x8_t]
+      - [float32x2_t, int32x2_t]
+      - [float32x4_t, int32x4_t]
+      - [float32x2_t, uint32x2_t]
+      - [float32x4_t, uint32x4_t]
+      - [int32x2_t, float32x2_t]
+      - [int32x4_t, float32x4_t]
+      - [uint32x2_t, float32x2_t]
+      - [uint32x4_t, float32x4_t]
+    compose:
+      - FnCall: [transmute, [a]]
 
   - name: "vreinterpret{neon_type[1].no}{neon_type[0].noq}"
     doc: Vector reinterpret cast operation
@@ -8729,61 +8786,81 @@ intrinsics:
       - *neon-v7
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop]]}]]
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [nop]]}]]
-      - *neon-fp16
-      - *neon-unstable-f16
+      - *neon-not-arm-stable-fp16
+      - *neon-cfg-arm-unstable
       - *target-not-arm64ec
     safety: safe
+    big_endian_inverse: true
     types:
       # non-q
       - [float32x2_t, float16x4_t]
-      - [poly16x4_t, float16x4_t]
       - [poly8x8_t, float16x4_t]
       - [int8x8_t, float16x4_t]
-      - [int16x4_t, float16x4_t]
       - [int32x2_t, float16x4_t]
       - [int64x1_t, float16x4_t]
       - [uint8x8_t, float16x4_t]
-      - [uint16x4_t, float16x4_t]
       - [uint32x2_t, float16x4_t]
       - [uint64x1_t, float16x4_t]
       - [float16x4_t, float32x2_t]
-      - [float16x4_t, poly16x4_t]
       - [float16x4_t, poly8x8_t]
       - [float16x4_t, int8x8_t]
-      - [float16x4_t, int16x4_t]
       - [float16x4_t, int32x2_t]
       - [float16x4_t, int64x1_t]
       - [float16x4_t, uint8x8_t]
-      - [float16x4_t, uint16x4_t]
       - [float16x4_t, uint32x2_t]
       - [float16x4_t, uint64x1_t]
       # q
       - [float32x4_t, float16x8_t]
-      - [poly16x8_t, float16x8_t]
       - [poly8x16_t, float16x8_t]
       - [int8x16_t, float16x8_t]
-      - [int16x8_t, float16x8_t]
       - [int32x4_t, float16x8_t]
       - [int64x2_t, float16x8_t]
       - [uint8x16_t, float16x8_t]
-      - [uint16x8_t, float16x8_t]
       - [uint32x4_t, float16x8_t]
       - [uint64x2_t, float16x8_t]
       - [float16x8_t, float32x4_t]
-      - [float16x8_t, poly16x8_t]
       - [float16x8_t, poly8x16_t]
       - [float16x8_t, int8x16_t]
-      - [float16x8_t, int16x8_t]
       - [float16x8_t, int32x4_t]
       - [float16x8_t, int64x2_t]
       - [float16x8_t, uint8x16_t]
-      - [float16x8_t, uint16x8_t]
       - [float16x8_t, uint32x4_t]
       - [float16x8_t, uint64x2_t]
     compose:
       - FnCall: [transmute, [a]]
 
 
+  - name: "vreinterpret{neon_type[1].no}{neon_type[0].noq}"
+    doc: Vector reinterpret cast operation
+    arguments: ["a: {type[0]}"]
+    return_type: "{type[1]}"
+    attr:
+      - *neon-v7
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop]]}]]
+      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [nop]]}]]
+      - *neon-not-arm-stable-fp16
+      - *neon-cfg-arm-unstable
+      - *target-not-arm64ec
+    safety: safe
+    types:
+      # non-q
+      - [poly16x4_t, float16x4_t]
+      - [int16x4_t, float16x4_t]
+      - [uint16x4_t, float16x4_t]
+      - [float16x4_t, poly16x4_t]
+      - [float16x4_t, int16x4_t]
+      - [float16x4_t, uint16x4_t]
+      # q
+      - [poly16x8_t, float16x8_t]
+      - [int16x8_t, float16x8_t]
+      - [uint16x8_t, float16x8_t]
+      - [float16x8_t, poly16x8_t]
+      - [float16x8_t, int16x8_t]
+      - [float16x8_t, uint16x8_t]
+    compose:
+      - FnCall: [transmute, [a]]
+
+
   - name: "vreinterpret{neon_type[1].no}{neon_type[0].noq}"
     doc: Vector reinterpret cast operation
     arguments: ["a: {type[0]}"]
@@ -8792,10 +8869,11 @@ intrinsics:
       - *neon-v8
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop]]}]]
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [nop]]}]]
-      - *neon-fp16
-      - *neon-unstable-f16
+      - *neon-not-arm-stable-fp16
+      - *neon-cfg-arm-unstable
       - *target-not-arm64ec
     safety: safe
+    big_endian_inverse: true
     types:
       - [poly64x1_t, float16x4_t]
       - [float16x4_t, poly64x1_t]
@@ -8816,7 +8894,8 @@ intrinsics:
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vrev64]]}]]
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [rev64]]}]]
       - *neon-fp16
-      - *neon-unstable-f16
+      - *neon-not-arm-stable-fp16
+      - *neon-cfg-arm-unstable
       - *target-not-arm64ec
     safety: safe
     types:
@@ -9108,6 +9187,7 @@ intrinsics:
       - *neon-not-arm-stable
       - *neon-cfg-arm-unstable
     safety: safe
+    big_endian_inverse: true
     types:
       - ["u64", int8x8_t]
       - ["u64", int16x4_t]
@@ -9131,10 +9211,12 @@ intrinsics:
       - *neon-v7
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop]]}]]
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [nop]]}]]
-      - *neon-fp16
-      - *neon-unstable-f16
+      - *arm-fp16
+      - *neon-not-arm-stable-fp16
+      - *neon-cfg-arm-unstable
       - *target-not-arm64ec
     safety: safe
+    big_endian_inverse: true
     types:
       - ["u64", float16x4_t]
     compose:
@@ -9152,6 +9234,7 @@ intrinsics:
       - *neon-not-arm-stable
       - *neon-cfg-arm-unstable
     safety: safe
+    big_endian_inverse: true
     types:
       - ["u64", poly64x1_t]
     compose:
@@ -9638,7 +9721,8 @@ intrinsics:
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [trn1]]}]]
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [trn2]]}]]
       - *neon-fp16
-      - *neon-unstable-f16
+      - *neon-not-arm-stable-fp16
+      - *neon-cfg-arm-unstable
       - *target-not-arm64ec
     safety: safe
     types:
@@ -9794,7 +9878,8 @@ intrinsics:
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [zip1]]}]]
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [zip2]]}]]
       - *neon-fp16
-      - *neon-unstable-f16
+      - *neon-not-arm-stable-fp16
+      - *neon-cfg-arm-unstable
       - *target-not-arm64ec
     safety: safe
     types:
@@ -9865,7 +9950,8 @@ intrinsics:
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [uzp1]]}]]
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [uzp2]]}]]
       - *neon-fp16
-      - *neon-unstable-f16
+      - *neon-not-arm-stable-fp16
+      - *neon-cfg-arm-unstable
       - *target-not-arm64ec
     safety: safe
     types:
@@ -10444,7 +10530,8 @@ intrinsics:
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vcge.f16"']]}]]
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fcmge]]}]]
       - *neon-fp16
-      - *neon-unstable-f16
+      - *neon-not-arm-stable-fp16
+      - *neon-cfg-arm-unstable
       - *target-not-arm64ec
     safety: safe
     types:
@@ -10463,7 +10550,8 @@ intrinsics:
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vcge.f16"']]}]]
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fcmge]]}]]
       - *neon-fp16
-      - *neon-unstable-f16
+      - *neon-not-arm-stable-fp16
+      - *neon-cfg-arm-unstable
       - *target-not-arm64ec
     safety: safe
     types:
@@ -10628,9 +10716,9 @@ intrinsics:
     static_defs: ['const N: i32']
     safety: safe
     types:
-      - [uint16x8_t, uint8x8_t, '8', 'const { uint16x8_t([-N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16]) }']
-      - [uint32x4_t, uint16x4_t, '16', 'const { uint32x4_t([-N as u32, -N as u32, -N as u32, -N as u32]) }']
-      - [uint64x2_t, uint32x2_t, '32', 'const { uint64x2_t([-N as u64, -N as u64]) }']
+      - [uint16x8_t, uint8x8_t, '8', 'const { uint16x8_t([-N as u16; 8]) }']
+      - [uint32x4_t, uint16x4_t, '16', 'const { uint32x4_t([-N as u32; 4]) }']
+      - [uint64x2_t, uint32x2_t, '32', 'const { uint64x2_t([-N as u64; 2]) }']
     compose:
       - FnCall: [static_assert!, ['N >= 1 && N <= {type[2]}']]
       - LLVMLink:
@@ -10703,7 +10791,8 @@ intrinsics:
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vcvt]]}]]
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fcvtzu]]}]]
       - *neon-fp16
-      - *neon-unstable-f16
+      - *neon-not-arm-stable-fp16
+      - *neon-cfg-arm-unstable
       - *target-not-arm64ec
     safety: safe
     types:
@@ -10720,10 +10809,11 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - *neon-v7
-      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vcvt.f16.f32]]}]]
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vcvt]]}]]
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fcvtn]]}]]
-      - *neon-fp16
-      - *neon-unstable-f16
+      - *arm-fp16
+      - *neon-not-arm-stable-fp16
+      - *neon-cfg-arm-unstable
       - *target-not-arm64ec
     safety: safe
     types:
@@ -10739,8 +10829,9 @@ intrinsics:
       - *neon-v7
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vcvt]]}]]
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fcvtl]]}]]
-      - *neon-fp16
-      - *neon-unstable-f16
+      - *arm-fp16
+      - *neon-not-arm-stable-fp16
+      - *neon-cfg-arm-unstable
       - *target-not-arm64ec
     safety: safe
     types:
@@ -10825,21 +10916,21 @@ intrinsics:
     static_defs: ['const LANE: i32']
     safety: safe
     types:
-      - [_lane_s16, int16x4_t, int16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [_laneq_s16, int16x4_t, int16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [q_lane_s16, int16x8_t, int16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [q_laneq_s16, int16x8_t, int16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [_lane_u16, uint16x4_t, uint16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [_laneq_u16, uint16x4_t, uint16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [q_lane_u16, uint16x8_t, uint16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [q_laneq_u16, uint16x8_t, uint16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [_lane_s16, int16x4_t, int16x4_t, '2']
+      - [_laneq_s16, int16x4_t, int16x8_t, '3']
+      - [q_lane_s16, int16x8_t, int16x4_t, '2']
+      - [q_laneq_s16, int16x8_t, int16x8_t, '3']
+      - [_lane_u16, uint16x4_t, uint16x4_t, '2']
+      - [_laneq_u16, uint16x4_t, uint16x8_t, '3']
+      - [q_lane_u16, uint16x8_t, uint16x4_t, '2']
+      - [q_laneq_u16, uint16x8_t, uint16x8_t, '3']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']]
       - FnCall:
           - "vmla{neon_type[1].no}"
           - - a
             - b
-            - FnCall: [simd_shuffle!, [c, c, "{type[4]}"]]
+            - FnCall: ['vdup{type[0]}', [c], [LANE]]
 
   - name: "vmla{type[0]}"
     doc: "Vector multiply accumulate with scalar"
@@ -10855,21 +10946,21 @@ intrinsics:
     static_defs: ['const LANE: i32']
     safety: safe
     types:
-      - [_lane_s32, int32x2_t, int32x2_t, '1', '[LANE as u32, LANE as u32]']
-      - [_laneq_s32, int32x2_t, int32x4_t, '2', '[LANE as u32, LANE as u32]']
-      - [q_lane_s32, int32x4_t, int32x2_t, '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [q_laneq_s32, int32x4_t, int32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [_lane_u32, uint32x2_t, uint32x2_t, '1', '[LANE as u32, LANE as u32]']
-      - [_laneq_u32, uint32x2_t, uint32x4_t, '2', '[LANE as u32, LANE as u32]']
-      - [q_lane_u32, uint32x4_t, uint32x2_t, '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [q_laneq_u32, uint32x4_t, uint32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [_lane_s32, int32x2_t, int32x2_t, '1']
+      - [_laneq_s32, int32x2_t, int32x4_t, '2']
+      - [q_lane_s32, int32x4_t, int32x2_t, '1']
+      - [q_laneq_s32, int32x4_t, int32x4_t, '2']
+      - [_lane_u32, uint32x2_t, uint32x2_t, '1']
+      - [_laneq_u32, uint32x2_t, uint32x4_t, '2']
+      - [q_lane_u32, uint32x4_t, uint32x2_t, '1']
+      - [q_laneq_u32, uint32x4_t, uint32x4_t, '2']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']]
       - FnCall:
           - "vmla{neon_type[1].no}"
           - - a
             - b
-            - FnCall: [simd_shuffle!, [c, c, "{type[4]}"]]
+            - FnCall: ['vdup{type[0]}', [c], [LANE]]
 
   - name: "vmla{type[0]}"
     doc: "Vector multiply accumulate with scalar"
@@ -10885,17 +10976,17 @@ intrinsics:
     static_defs: ['const LANE: i32']
     safety: safe
     types:
-      - [_lane_f32, float32x2_t, float32x2_t, '1', '[LANE as u32, LANE as u32]']
-      - [_laneq_f32, float32x2_t, float32x4_t, '2', '[LANE as u32, LANE as u32]']
-      - [q_lane_f32, float32x4_t, float32x2_t, '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [q_laneq_f32, float32x4_t, float32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [_lane_f32, float32x2_t, float32x2_t, '1']
+      - [_laneq_f32, float32x2_t, float32x4_t, '2']
+      - [q_lane_f32, float32x4_t, float32x2_t, '1']
+      - [q_laneq_f32, float32x4_t, float32x4_t, '2']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']]
       - FnCall:
           - "vmla{neon_type[1].no}"
           - - a
             - b
-            - FnCall: [simd_shuffle!, [c, c, "{type[4]}"]]
+            - FnCall: ['vdup{type[0]}', [c], [LANE]]
 
   - name: "vmls{neon_type[0].N}"
     doc: "Vector multiply subtract with scalar"
@@ -10974,21 +11065,21 @@ intrinsics:
     static_defs: ['const LANE: i32']
     safety: safe
     types:
-      - [_lane_s16, int16x4_t, int16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [_laneq_s16, int16x4_t, int16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [q_lane_s16, int16x8_t, int16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [q_laneq_s16, int16x8_t, int16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [_lane_u16, uint16x4_t, uint16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [_laneq_u16, uint16x4_t, uint16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [q_lane_u16, uint16x8_t, uint16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [q_laneq_u16, uint16x8_t, uint16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [_lane_s16, int16x4_t, int16x4_t, '2']
+      - [_laneq_s16, int16x4_t, int16x8_t, '3']
+      - [q_lane_s16, int16x8_t, int16x4_t, '2']
+      - [q_laneq_s16, int16x8_t, int16x8_t, '3']
+      - [_lane_u16, uint16x4_t, uint16x4_t, '2']
+      - [_laneq_u16, uint16x4_t, uint16x8_t, '3']
+      - [q_lane_u16, uint16x8_t, uint16x4_t, '2']
+      - [q_laneq_u16, uint16x8_t, uint16x8_t, '3']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']]
       - FnCall:
           - "vmls{neon_type[1].no}"
           - - a
             - b
-            - FnCall: [simd_shuffle!, [c, c, "{type[4]}"]]
+            - FnCall: ['vdup{type[0]}', [c], [LANE]]
 
   - name: "vmls{type[0]}"
     doc: "Vector multiply subtract with scalar"
@@ -11004,21 +11095,21 @@ intrinsics:
     static_defs: ['const LANE: i32']
     safety: safe
     types:
-      - [_lane_s32, int32x2_t, int32x2_t, '1', '[LANE as u32, LANE as u32]']
-      - [_laneq_s32, int32x2_t, int32x4_t, '2', '[LANE as u32, LANE as u32]']
-      - [q_lane_s32, int32x4_t, int32x2_t, '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [q_laneq_s32, int32x4_t, int32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [_lane_u32, uint32x2_t, uint32x2_t, '1', '[LANE as u32, LANE as u32]']
-      - [_laneq_u32, uint32x2_t, uint32x4_t, '2', '[LANE as u32, LANE as u32]']
-      - [q_lane_u32, uint32x4_t, uint32x2_t, '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [q_laneq_u32, uint32x4_t, uint32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [_lane_s32, int32x2_t, int32x2_t, '1']
+      - [_laneq_s32, int32x2_t, int32x4_t, '2']
+      - [q_lane_s32, int32x4_t, int32x2_t, '1']
+      - [q_laneq_s32, int32x4_t, int32x4_t, '2']
+      - [_lane_u32, uint32x2_t, uint32x2_t, '1']
+      - [_laneq_u32, uint32x2_t, uint32x4_t, '2']
+      - [q_lane_u32, uint32x4_t, uint32x2_t, '1']
+      - [q_laneq_u32, uint32x4_t, uint32x4_t, '2']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']]
       - FnCall:
           - "vmls{neon_type[1].no}"
           - - a
             - b
-            - FnCall: [simd_shuffle!, [c, c, "{type[4]}"]]
+            - FnCall: ['vdup{type[0]}', [c], [LANE]]
 
   - name: "vmls{type[0]}"
     doc: "Vector multiply subtract with scalar"
@@ -11034,17 +11125,17 @@ intrinsics:
     static_defs: ['const LANE: i32']
     safety: safe
     types:
-      - [_lane_f32, float32x2_t, float32x2_t, '1', '[LANE as u32, LANE as u32]']
-      - [_laneq_f32, float32x2_t, float32x4_t, '2', '[LANE as u32, LANE as u32]']
-      - [q_lane_f32, float32x4_t, float32x2_t, '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [q_laneq_f32, float32x4_t, float32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [_lane_f32, float32x2_t, float32x2_t, '1']
+      - [_laneq_f32, float32x2_t, float32x4_t, '2']
+      - [q_lane_f32, float32x4_t, float32x2_t, '1']
+      - [q_laneq_f32, float32x4_t, float32x4_t, '2']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']]
       - FnCall:
           - "vmls{neon_type[1].no}"
           - - a
             - b
-            - FnCall: [simd_shuffle!, [c, c, "{type[4]}"]]
+            - FnCall: ['vdup{type[0]}', [c], [LANE]]
 
   - name: "vmul{neon_type[0].N}"
     doc: "Vector multiply by scalar"
@@ -11129,16 +11220,16 @@ intrinsics:
     static_defs: ['const LANE: i32']
     safety: safe
     types:
-      - [float32x2_t, float32x2_t, '_lane_f32', '1', '[LANE as u32, LANE as u32]']
-      - [float32x2_t, float32x4_t, '_laneq_f32', '2', '[LANE as u32, LANE as u32]']
-      - [float32x4_t, float32x2_t, 'q_lane_f32', '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [float32x4_t, float32x4_t, 'q_laneq_f32', '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [float32x2_t, float32x2_t, '_lane_f32', '1']
+      - [float32x2_t, float32x4_t, '_laneq_f32', '2']
+      - [float32x4_t, float32x2_t, 'q_lane_f32', '1']
+      - [float32x4_t, float32x4_t, 'q_laneq_f32', '2']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']]
       - FnCall:
           - simd_mul
           - - a
-            - FnCall: [simd_shuffle!, [b, b, "{type[4]}"]]
+            - FnCall: ['vdup{type[2]}', [b], [LANE]]
 
   - name: "vqrdmulh{type[0]}"
     doc: "Vector rounding saturating doubling multiply high by scalar"
@@ -11154,17 +11245,17 @@ intrinsics:
     static_defs: ['const LANE: i32']
     safety: safe
     types:
-      - [_lane_s16, int16x4_t, int16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [_laneq_s16, int16x4_t, int16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [q_lane_s16, int16x8_t, int16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [q_laneq_s16, int16x8_t, int16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [_lane_s32, int32x2_t, int32x2_t, '1', '[LANE as u32, LANE as u32]']
-      - [_laneq_s32, int32x2_t, int32x4_t, '2', '[LANE as u32, LANE as u32]']
-      - [q_lane_s32, int32x4_t, int32x2_t, '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [q_laneq_s32, int32x4_t, int32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
+      - [_lane_s16, int16x4_t, int16x4_t, '2']
+      - [_laneq_s16, int16x4_t, int16x8_t, '3']
+      - [q_lane_s16, int16x8_t, int16x4_t, '2']
+      - [q_laneq_s16, int16x8_t, int16x8_t, '3']
+      - [_lane_s32, int32x2_t, int32x2_t, '1']
+      - [_laneq_s32, int32x2_t, int32x4_t, '2']
+      - [q_lane_s32, int32x4_t, int32x2_t, '1']
+      - [q_laneq_s32, int32x4_t, int32x4_t, '2']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, "{type[3]}"]]
-      - Let: [b, "{neon_type[1]}", {FnCall: [simd_shuffle!, [b, b, '{type[4]}']]}]
+      - Let: [b, {FnCall: ['vdup{type[0]}', [b], [LANE]]}]
       - FnCall: ["vqrdmulh{neon_type[1].no}", [a, b]]
 
   - name: "vqrdmulh{neon_type[0].N}"
@@ -11215,7 +11306,8 @@ intrinsics:
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vcgt.f16"']]}]]
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fcmgt]]}]]
       - *neon-fp16
-      - *neon-unstable-f16
+      - *neon-not-arm-stable-fp16
+      - *neon-cfg-arm-unstable
       - *target-not-arm64ec
     safety: safe
     types:
@@ -11234,7 +11326,8 @@ intrinsics:
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vclt.f16"']]}]]
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fcmlt]]}]]
       - *neon-fp16
-      - *neon-unstable-f16
+      - *neon-not-arm-stable-fp16
+      - *neon-cfg-arm-unstable
       - *target-not-arm64ec
     safety: safe
     types:
@@ -11279,16 +11372,16 @@ intrinsics:
     static_defs: ['const LANE: i32']
     safety: safe
     types:
-      - [int16x4_t, int16x4_t, int32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [int16x4_t, int16x8_t, int32x4_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [int32x2_t, int32x2_t, int64x2_t, '1', '[LANE as u32, LANE as u32]']
-      - [int32x2_t, int32x4_t, int64x2_t, '2', '[LANE as u32, LANE as u32]']
+      - [int16x4_t, int16x4_t, int32x4_t, '2']
+      - [int16x4_t, int16x8_t, int32x4_t, '3']
+      - [int32x2_t, int32x2_t, int64x2_t, '1']
+      - [int32x2_t, int32x4_t, int64x2_t, '2']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, "{type[3]}"]]
       - FnCall:
           - "vmull_{neon_type[0]}"
           - - a
-            - FnCall: [simd_shuffle!, [b, b, "{type[4]}"]]
+            - FnCall: ['vdup_lane{neon_type[1].nox}', [b], [LANE]]
 
   - name: "vmull_lane{neon_type[1].no}"
     doc: "Vector long multiply by scalar"
@@ -11304,16 +11397,16 @@ intrinsics:
     static_defs: ['const LANE: i32']
     safety: safe
     types:
-      - [uint16x4_t, uint16x4_t, uint32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [uint16x4_t, uint16x8_t, uint32x4_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]']
-      - [uint32x2_t, uint32x2_t, uint64x2_t, '1', '[LANE as u32, LANE as u32]']
-      - [uint32x2_t, uint32x4_t, uint64x2_t, '2', '[LANE as u32, LANE as u32]']
+      - [uint16x4_t, uint16x4_t, uint32x4_t, '2']
+      - [uint16x4_t, uint16x8_t, uint32x4_t, '3']
+      - [uint32x2_t, uint32x2_t, uint64x2_t, '1']
+      - [uint32x2_t, uint32x4_t, uint64x2_t, '2']
     compose:
       - FnCall: [static_assert_uimm_bits!, [LANE, "{type[3]}"]]
       - FnCall:
           - "vmull_{neon_type[0]}"
           - - a
-            - FnCall: [simd_shuffle!, [b, b, "{type[4]}"]]
+            - FnCall: ['vdup_lane{neon_type[1].nox}', [b], [LANE]]
 
   - name: "vfms{neon_type[0].N}"
     doc: "Floating-point fused Multiply-subtract to accumulator(vector)"
@@ -11347,7 +11440,8 @@ intrinsics:
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop]]}]]
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fmls]]}]]
       - *neon-fp16
-      - *neon-unstable-f16
+      - *neon-not-arm-stable-fp16
+      - *neon-cfg-arm-unstable
       - *target-not-arm64ec
     safety: safe
     types:
@@ -11382,7 +11476,7 @@ intrinsics:
           - - a
             - FnCall:
                 - "vdup{neon_type[0].N}"
-                - - FnCall: [simd_extract!, [b, 'LANE as u32']]
+                - - FnCall: ['vget{neon_type[1].lane_nox}', [b], [LANE]]
 
   - name: "vrecpe{neon_type.no}"
     doc: "Unsigned reciprocal estimate"
@@ -11463,7 +11557,8 @@ intrinsics:
       - *neon-fp16
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vrsqrte]]}]]
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [frsqrte]]}]]
-      - *neon-unstable-f16
+      - *neon-not-arm-stable-fp16
+      - *neon-cfg-arm-unstable
       - *target-not-arm64ec
     safety: safe
     types:
@@ -11577,7 +11672,8 @@ intrinsics:
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vcvt]]}]]
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [fcvtzs]]}]]
       - *neon-fp16
-      - *neon-unstable-f16
+      - *neon-not-arm-stable-fp16
+      - *neon-cfg-arm-unstable
       - *target-not-arm64ec
     safety: safe
     types:
@@ -11787,39 +11883,39 @@ intrinsics:
 
   - name: "vld1{type[0]}"
     visibility: private
-    doc: "Load multiple single-element structures to one, two, three, or four registers"
-    arguments: ["a: {type[1]}", "b: {type[2]}"]
-    return_type: "{neon_type[3]}"
+    arguments: ["a: {type[1]}"]
+    static_defs: ["const ALIGN: i32"]
+    return_type: "{neon_type[2]}"
     attr:
+      - FnCall: [rustc_legacy_const_generics, ['1']]
       - *target-is-arm
       - *enable-v7
-      # - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vld1]]}]]
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vld1.8"', "ALIGN = 0"]]}]]
       - *neon-arm-unstable
     safety:
       unsafe: [neon]
     types:
-      - ["_v8i8", "*const i8", "i32", "int8x8_t"]
-      - ["q_v16i8", "*const i8", "i32", "int8x16_t"]
-      - ["_v4i16", "*const i8", "i32", "int16x4_t"]
-      - ["q_v8i16", "*const i8", "i32", "int16x8_t"]
-      - ["_v2i32", "*const i8", "i32", "int32x2_t"]
-      - ["q_v4i32", "*const i8", "i32", "int32x4_t"]
-      - ["_v1i64", "*const i8", "i32", "int64x1_t"]
-      - ["q_v2i64", "*const i8", "i32", "int64x2_t"]
-      - ["_v2f32", "*const i8", "i32", "float32x2_t"]
-      - ["q_v4f32", "*const i8", "i32", "float32x4_t"]
+      - ["_v8i8",   "*const i8", "int8x8_t"   ]
+      - ["q_v16i8", "*const i8", "int8x16_t"  ]
+      - ["_v4i16",  "*const i8", "int16x4_t"  ]
+      - ["q_v8i16", "*const i8", "int16x8_t"  ]
+      - ["_v2i32",  "*const i8", "int32x2_t"  ]
+      - ["q_v4i32", "*const i8", "int32x4_t"  ]
+      - ["_v1i64",  "*const i8", "int64x1_t"  ]
+      - ["q_v2i64", "*const i8", "int64x2_t"  ]
+      - ["_v2f32",  "*const i8", "float32x2_t"]
+      - ["q_v4f32", "*const i8", "float32x4_t"]
     compose:
       - LLVMLink:
           name: "vld1.{type[0]}"
+          arguments: ["a: {type[1]}", "b: i32"]
           links:
-            - link: "llvm.arm.neon.vld1.{neon_type[3]}"
+            - link: "llvm.arm.neon.vld1.{neon_type[2]}"
               arch: arm
-      - FnCall: ["_vld1{type[0]}", [a, b]]
-
+      - FnCall: ["_vld1{type[0]}", [a, ALIGN]]
 
   - name: "vld1{type[0]}"
     visibility: private
-    doc: "Load multiple single-element structures to one, two, three, or four registers"
     arguments: ["a: {type[1]}", "b: {type[2]}"]
     return_type: "{neon_type[3]}"
     attr:
@@ -11827,7 +11923,7 @@ intrinsics:
       - *enable-v7
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop]]}]]
       - *neon-fp16
-      - *neon-unstable-f16
+      - *neon-arm-unstable
       - *target-not-arm64ec
     safety:
       unsafe: [neon]
@@ -11855,19 +11951,23 @@ intrinsics:
       - *neon-arm-unstable
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['{type[2]}']]}]]
     types:
-      - ['*const i8',  int8x8_t,  '"vld1.8"', 'crate::mem::align_of::<i8>() as i32', '_v8i8']
-      - ['*const i8',  int8x16_t, '"vld1.8"', 'crate::mem::align_of::<i8>() as i32', 'q_v16i8']
-      - ['*const i16', int16x4_t, '"vld1.16"', 'crate::mem::align_of::<i16>() as i32', '_v4i16']
-      - ['*const i16', int16x8_t, '"vld1.16"', 'crate::mem::align_of::<i16>() as i32', 'q_v8i16']
-      - ['*const i32', int32x2_t, 'vldr', 'crate::mem::align_of::<i32>() as i32', '_v2i32']
-      - ['*const i32', int32x4_t, '"vld1.32"', 'crate::mem::align_of::<i32>() as i32', 'q_v4i32']
-      - ['*const i64', int64x1_t, 'vldr', 'crate::mem::align_of::<i64>() as i32', '_v1i64']
-      - ['*const i64', int64x2_t, '"vld1.64"', 'crate::mem::align_of::<i64>() as i32', 'q_v2i64']
+      - ['*const i8',  int8x8_t,  '"vld1.8"',  'crate::mem::align_of::<i8>()',  '_v8i8'  ]
+      - ['*const i8',  int8x16_t, '"vld1.8"',  'crate::mem::align_of::<i8>()',  'q_v16i8']
+      - ['*const i16', int16x4_t, '"vld1.16"', 'crate::mem::align_of::<i16>()', '_v4i16' ]
+      - ['*const i16', int16x8_t, '"vld1.16"', 'crate::mem::align_of::<i16>()', 'q_v8i16']
+      - ['*const i32', int32x2_t, 'vldr',      'crate::mem::align_of::<i32>()', '_v2i32' ]
+      - ['*const i32', int32x4_t, '"vld1.32"', 'crate::mem::align_of::<i32>()', 'q_v4i32']
+      - ['*const i64', int64x1_t, 'vldr',      'crate::mem::align_of::<i64>()', '_v1i64' ]
+      - ['*const i64', int64x2_t, '"vld1.64"', 'crate::mem::align_of::<i64>()', 'q_v2i64']
     compose:
+      - Const:
+          - ALIGN
+          - "i32"
+          - "{type[3]} as i32"
       - FnCall:
           - "vld1{type[4]}"
-          - - 'ptr as *const i8'
-            - '{type[3]}'
+          - ['ptr as *const i8']
+          - ['ALIGN']
 
   - name: "vld1{neon_type[1].no}"
     doc: "Load multiple single-element structures to one, two, three, or four registers."
@@ -11881,28 +11981,32 @@ intrinsics:
       - *neon-arm-unstable
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['{type[2]}']]}]]
     types:
-      - ['*const u8',  uint8x8_t,   '"vld1.8"', 'neon,v7', 'crate::mem::align_of::<u8>() as i32', '_v8i8']
-      - ['*const u8',  uint8x16_t,  '"vld1.8"', 'neon,v7', 'crate::mem::align_of::<u8>() as i32', 'q_v16i8']
-      - ['*const u16', uint16x4_t,  '"vld1.16"', 'neon,v7', 'crate::mem::align_of::<u16>() as i32', '_v4i16']
-      - ['*const u16', uint16x8_t,  '"vld1.16"', 'neon,v7', 'crate::mem::align_of::<u16>() as i32', 'q_v8i16']
-      - ['*const u32', uint32x2_t,  'vldr', 'neon,v7', 'crate::mem::align_of::<u32>() as i32', '_v2i32']
-      - ['*const u32', uint32x4_t,  '"vld1.32"', 'neon,v7', 'crate::mem::align_of::<u32>() as i32', 'q_v4i32']
-      - ['*const u64', uint64x1_t,  'vldr', 'neon,v7', 'crate::mem::align_of::<u64>() as i32', '_v1i64']
-      - ['*const u64', uint64x2_t,  '"vld1.64"', 'neon,v7', 'crate::mem::align_of::<u64>() as i32', 'q_v2i64']
-      - ['*const p8',  poly8x8_t,   '"vld1.8"', 'neon,v7', 'crate::mem::align_of::<p8>() as i32', '_v8i8']
-      - ['*const p8',  poly8x16_t,  '"vld1.8"', 'neon,v7', 'crate::mem::align_of::<p8>() as i32', 'q_v16i8']
-      - ['*const p16', poly16x4_t,  '"vld1.16"', 'neon,v7', 'crate::mem::align_of::<p16>() as i32', '_v4i16']
-      - ['*const p16', poly16x8_t,  '"vld1.16"', 'neon,v7', 'crate::mem::align_of::<p16>() as i32', 'q_v8i16']
-      - ['*const p64', poly64x2_t,  '"vld1.64"', 'neon,aes', 'crate::mem::align_of::<p64>() as i32', 'q_v2i64']
-      - ['*const f32', float32x2_t, 'vldr', 'neon,v7', 'crate::mem::align_of::<f32>() as i32', '_v2f32']
-      - ['*const f32', float32x4_t, '"vld1.32"', 'neon,v7', 'crate::mem::align_of::<f32>() as i32', 'q_v4f32']
-    compose:
+      - ['*const u8',  uint8x8_t,   '"vld1.8"',  'neon,v7',  'crate::mem::align_of::<u8>()',  '_v8i8'  ]
+      - ['*const u8',  uint8x16_t,  '"vld1.8"',  'neon,v7',  'crate::mem::align_of::<u8>()',  'q_v16i8']
+      - ['*const u16', uint16x4_t,  '"vld1.16"', 'neon,v7',  'crate::mem::align_of::<u16>()', '_v4i16' ]
+      - ['*const u16', uint16x8_t,  '"vld1.16"', 'neon,v7',  'crate::mem::align_of::<u16>()', 'q_v8i16']
+      - ['*const u32', uint32x2_t,  'vldr',      'neon,v7',  'crate::mem::align_of::<u32>()', '_v2i32' ]
+      - ['*const u32', uint32x4_t,  '"vld1.32"', 'neon,v7',  'crate::mem::align_of::<u32>()', 'q_v4i32']
+      - ['*const u64', uint64x1_t,  'vldr',      'neon,v7',  'crate::mem::align_of::<u64>()', '_v1i64' ]
+      - ['*const u64', uint64x2_t,  '"vld1.64"', 'neon,v7',  'crate::mem::align_of::<u64>()', 'q_v2i64']
+      - ['*const p8',  poly8x8_t,   '"vld1.8"',  'neon,v7',  'crate::mem::align_of::<p8>()',  '_v8i8'  ]
+      - ['*const p8',  poly8x16_t,  '"vld1.8"',  'neon,v7',  'crate::mem::align_of::<p8>()',  'q_v16i8']
+      - ['*const p16', poly16x4_t,  '"vld1.16"', 'neon,v7',  'crate::mem::align_of::<p16>()', '_v4i16' ]
+      - ['*const p16', poly16x8_t,  '"vld1.16"', 'neon,v7',  'crate::mem::align_of::<p16>()', 'q_v8i16']
+      - ['*const p64', poly64x2_t,  '"vld1.64"', 'neon,aes', 'crate::mem::align_of::<p64>()', 'q_v2i64']
+      - ['*const f32', float32x2_t, 'vldr',      'neon,v7',  'crate::mem::align_of::<f32>()', '_v2f32' ]
+      - ['*const f32', float32x4_t, '"vld1.32"', 'neon,v7',  'crate::mem::align_of::<f32>()', 'q_v4f32']
+    compose:
+      - Const:
+        - ALIGN
+        - "i32"
+        - "{type[4]} as i32"
       - FnCall:
         - transmute
         - - FnCall:
               - "vld1{type[5]}"
-              - - 'ptr as *const i8'
-                - '{type[4]}'
+              - ['ptr as *const i8']
+              - ['ALIGN']
 
   - name: "vld1{neon_type[1].no}"
     doc: "Load multiple single-element structures to one, two, three, or four registers."
@@ -12232,19 +12336,10 @@ intrinsics:
     safety:
       unsafe: [neon]
     types:
-      - ["*const f16", float16x4x4_t, f16]
-      - ["*const f16", float16x8x4_t, f16]
+      - ["*const f16", float16x4x4_t, f16, "4"]
+      - ["*const f16", float16x8x4_t, f16, "8"]
     compose:
-      - LLVMLink:
-          name: "vld4.{neon_type[1]}"
-          arguments:
-            - "ptr: {type[0]}"
-          links:
-            - link: "llvm.aarch64.neon.ld4.v{neon_type[1].lane}{type[2]}.p0"
-              arch: aarch64,arm64ec
-      - FnCall:
-          - "_vld4{neon_type[1].nox}"
-          - - "a as _"
+      - FnCall: ["crate::core_arch::macros::deinterleaving_load!", [{ Type: "{type[2]}" }, "{type[3]}", "4", a], [], true]
 
   - name: "vld4{neon_type[1].dup_nox}"
     doc: Load single 4-element structure and replicate to all lanes of two registers
@@ -13140,14 +13235,14 @@ intrinsics:
     attr:
       - *neon-v7
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"{type[3]}"']]}]]
-      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [raddhn2]]}]]
+      - FnCall: [cfg_attr, [*all-neon-target-aarch64-arm64ec-little-endian, {FnCall: [assert_instr, [raddhn2]]}]]
       - *neon-not-arm-stable
       - *neon-cfg-arm-unstable
     safety: safe
     types:
-      - [uint8x8_t , uint16x8_t, uint8x16_t, 'vraddhn.i16', int16x8_t, '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
-      - [uint16x4_t, uint32x4_t, uint16x8_t, 'vraddhn.i32', int32x4_t, '[0, 1, 2, 3, 4, 5, 6, 7]']
-      - [uint32x2_t, uint64x2_t, uint32x4_t, 'vraddhn.i64', int64x2_t, '[0, 1, 2, 3]']
+      - [uint8x8_t , uint16x8_t, uint8x16_t, 'vraddhn.i16', int16x8_t]
+      - [uint16x4_t, uint32x4_t, uint16x8_t, 'vraddhn.i32', int32x4_t]
+      - [uint32x2_t, uint64x2_t, uint32x4_t, 'vraddhn.i64', int64x2_t]
     compose:
       - Let:
           - x
@@ -13158,7 +13253,7 @@ intrinsics:
                     - "vraddhn{neon_type[4].noq}"
                     - - FnCall: [transmute, [b]]
                       - FnCall: [transmute, [c]]
-      - FnCall: ["simd_shuffle!", [a, x, '{type[5]}']]
+      - FnCall: ['vcombine_{neon_type[0]}', [a, x]]
 
   - name: "vraddhn_high{neon_type[1].noq}"
     doc: "Rounding Add returning High Narrow (high half)."
@@ -13167,14 +13262,14 @@ intrinsics:
     attr:
       - *neon-v7
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"{type[3]}"']]}]]
-      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [raddhn2]]}]]
+      - FnCall: [cfg_attr, [*all-neon-target-aarch64-arm64ec-little-endian, {FnCall: [assert_instr, [raddhn2]]}]]
       - *neon-not-arm-stable
       - *neon-cfg-arm-unstable
     safety: safe
     types:
-      - [int8x8_t , int16x8_t, int8x16_t, 'vraddhn.i16', '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
-      - [int16x4_t, int32x4_t, int16x8_t, 'vraddhn.i32', '[0, 1, 2, 3, 4, 5, 6, 7]']
-      - [int32x2_t, int64x2_t, int32x4_t, 'vraddhn.i64', '[0, 1, 2, 3]']
+      - [int8x8_t , int16x8_t, int8x16_t, 'vraddhn.i16']
+      - [int16x4_t, int32x4_t, int16x8_t, 'vraddhn.i32']
+      - [int32x2_t, int64x2_t, int32x4_t, 'vraddhn.i64']
     compose:
       - Let:
           - x
@@ -13182,7 +13277,7 @@ intrinsics:
               - "vraddhn{neon_type[1].noq}"
               - - b
                 - c
-      - FnCall: ["simd_shuffle!", [a, x, '{type[4]}']]
+      - FnCall: ['vcombine_{neon_type[0]}', [a, x]]
 
   - name: "vpadd{neon_type.no}"
     doc: "Add pairwise."
@@ -13724,32 +13819,35 @@ intrinsics:
 
   - name: "vst1{type[0]}"
     visibility: private
-    doc: "Store multiple single-element structures from one, two, three, or four registers."
-    arguments: ["addr: {type[1]}", "val: {neon_type[2]}", "align: {type[3]}"]
+    arguments: ["addr: {type[1]}", "val: {neon_type[2]}"]
+    static_defs: ["const ALIGN: i32"]
     safety:
       unsafe: [neon]
     attr:
       - *target-is-arm
       - *neon-v7
       - *neon-arm-unstable
-      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vst1.{type[4]}"']]}]]
+      - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vst1.{type[3]}"', "ALIGN = 0"]]}]]
+      - FnCall: ["rustc_legacy_const_generics", ['2']]
     types:
-      - ['_v8i8', '* const i8', int8x8_t, i32, '8']
-      - ['q_v16i8', '* const i8', int8x16_t, i32, '8']
-      - ['_v4i16', '* const i8', int16x4_t, i32, '16']
-      - ['q_v8i16', '* const i8', int16x8_t, i32, '16']
-      - ['_v2i32', '* const i8', int32x2_t, i32, '32']
-      - ['q_v4i32', '* const i8', int32x4_t, i32, '32']
-      - ['_v1i64', '* const i8', int64x1_t, i32, '64']
-      - ['q_v2i64', '* const i8', int64x2_t, i32, '64']
-      - ['_v2f32', '* const i8', float32x2_t, i32, '32']
-      - ['q_v4f32', '* const i8', float32x4_t, i32, '32']
+      - ['_v8i8',   '* const i8', int8x8_t,    '8' ]
+      - ['q_v16i8', '* const i8', int8x16_t,   '8' ]
+      - ['_v4i16',  '* const i8', int16x4_t,   '16']
+      - ['q_v8i16', '* const i8', int16x8_t,   '16']
+      - ['_v2i32',  '* const i8', int32x2_t,   '32']
+      - ['q_v4i32', '* const i8', int32x4_t,   '32']
+      - ['_v1i64',  '* const i8', int64x1_t,   '64']
+      - ['q_v2i64', '* const i8', int64x2_t,   '64']
+      - ['_v2f32',  '* const i8', float32x2_t, '32']
+      - ['q_v4f32', '* const i8', float32x4_t, '32']
     compose:
       - LLVMLink:
           name: "_vst1{type[0]}"
+          arguments: ["addr: {type[1]}", "val: {neon_type[2]}", "align: i32"]
           links:
             - link: "llvm.arm.neon.vst1.{neon_type[2]}.p0"
               arch: arm
+      - FnCall: ["_vst1{type[0]}",[addr, val, ALIGN]]
 
   - name: "vst1{type[0]}"
     visibility: private
@@ -13761,7 +13859,7 @@ intrinsics:
       - *target-is-arm
       - *neon-v7
       - *neon-fp16
-      - *neon-unstable-f16
+      - *neon-arm-unstable
       - *target-not-arm64ec
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vst1.{type[4]}"']]}]]
     types:
@@ -13785,37 +13883,39 @@ intrinsics:
       - *neon-arm-unstable
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vst1.{type[2]}"']]}]]
     types:
-      - ['*mut i8',  int8x8_t,   '8', 'a', 'crate::mem::align_of::<i8>() as i32', '_v8i8']
-      - ['*mut i8',  int8x16_t,  '8', 'a', 'crate::mem::align_of::<i8>() as i32', 'q_v16i8']
-      - ['*mut i16', int16x4_t, '16', 'a', 'crate::mem::align_of::<i16>() as i32', '_v4i16']
-      - ['*mut i16', int16x8_t, '16', 'a', 'crate::mem::align_of::<i16>() as i32', 'q_v8i16']
-      - ['*mut i32', int32x2_t, '32', 'a', 'crate::mem::align_of::<i32>() as i32', '_v2i32']
-      - ['*mut i32', int32x4_t, '32', 'a', 'crate::mem::align_of::<i32>() as i32', 'q_v4i32']
-      - ['*mut i64', int64x1_t, '64', 'a', 'crate::mem::align_of::<i64>() as i32', '_v1i64']
-      - ['*mut i64', int64x2_t, '64', 'a', 'crate::mem::align_of::<i64>() as i32', 'q_v2i64']
-      - ['*mut u8',  uint8x8_t,   '8', 'transmute(a)', 'crate::mem::align_of::<u8>() as i32', '_v8i8']
-      - ['*mut u8',  uint8x16_t,  '8', 'transmute(a)', 'crate::mem::align_of::<u8>() as i32', 'q_v16i8']
-      - ['*mut u16', uint16x4_t, '16', 'transmute(a)', 'crate::mem::align_of::<u16>() as i32', '_v4i16']
-      - ['*mut u16', uint16x8_t, '16', 'transmute(a)', 'crate::mem::align_of::<u16>() as i32', 'q_v8i16']
-      - ['*mut u32', uint32x2_t, '32', 'transmute(a)', 'crate::mem::align_of::<u32>() as i32', '_v2i32']
-      - ['*mut u32', uint32x4_t, '32', 'transmute(a)', 'crate::mem::align_of::<u32>() as i32', 'q_v4i32']
-      - ['*mut u64', uint64x1_t, '64', 'transmute(a)', 'crate::mem::align_of::<u64>() as i32', '_v1i64']
-      - ['*mut u64', uint64x2_t, '64', 'transmute(a)', 'crate::mem::align_of::<u64>() as i32', 'q_v2i64']
-      - ['*mut p8',  poly8x8_t,    '8', 'transmute(a)', 'crate::mem::align_of::<p8>() as i32', '_v8i8']
-      - ['*mut p8',  poly8x16_t,   '8', 'transmute(a)', 'crate::mem::align_of::<p8>() as i32', 'q_v16i8']
-      - ['*mut p16', poly16x4_t,  '16', 'transmute(a)', 'crate::mem::align_of::<p16>() as i32', '_v4i16']
-      - ['*mut p16', poly16x8_t,  '16', 'transmute(a)', 'crate::mem::align_of::<p16>() as i32', 'q_v8i16']
-      - ['*mut p64', poly64x1_t,  '64', 'transmute(a)', 'crate::mem::align_of::<p64>() as i32', '_v1i64']
-      - ['*mut p64', poly64x2_t,  '64', 'transmute(a)', 'crate::mem::align_of::<p64>() as i32', 'q_v2i64']
-      - ['*mut f32', float32x2_t, '32', 'transmute(a)', 'crate::mem::align_of::<f32>() as i32', '_v2f32']
-      - ['*mut f32', float32x4_t, '32', 'transmute(a)', 'crate::mem::align_of::<f32>() as i32', 'q_v4f32']
-    compose:
+      - ['*mut i8',  int8x8_t,    '8',  'a',            'crate::mem::align_of::<i8>()',  '_v8i8'  ]
+      - ['*mut i8',  int8x16_t,   '8',  'a',            'crate::mem::align_of::<i8>()',  'q_v16i8']
+      - ['*mut i16', int16x4_t,   '16', 'a',            'crate::mem::align_of::<i16>()', '_v4i16' ]
+      - ['*mut i16', int16x8_t,   '16', 'a',            'crate::mem::align_of::<i16>()', 'q_v8i16']
+      - ['*mut i32', int32x2_t,   '32', 'a',            'crate::mem::align_of::<i32>()', '_v2i32' ]
+      - ['*mut i32', int32x4_t,   '32', 'a',            'crate::mem::align_of::<i32>()', 'q_v4i32']
+      - ['*mut i64', int64x1_t,   '64', 'a',            'crate::mem::align_of::<i64>()', '_v1i64' ]
+      - ['*mut i64', int64x2_t,   '64', 'a',            'crate::mem::align_of::<i64>()', 'q_v2i64']
+      - ['*mut u8',  uint8x8_t,   '8',  'transmute(a)', 'crate::mem::align_of::<u8>()',  '_v8i8'  ]
+      - ['*mut u8',  uint8x16_t,  '8',  'transmute(a)', 'crate::mem::align_of::<u8>()',  'q_v16i8']
+      - ['*mut u16', uint16x4_t,  '16', 'transmute(a)', 'crate::mem::align_of::<u16>()', '_v4i16' ]
+      - ['*mut u16', uint16x8_t,  '16', 'transmute(a)', 'crate::mem::align_of::<u16>()', 'q_v8i16']
+      - ['*mut u32', uint32x2_t,  '32', 'transmute(a)', 'crate::mem::align_of::<u32>()', '_v2i32' ]
+      - ['*mut u32', uint32x4_t,  '32', 'transmute(a)', 'crate::mem::align_of::<u32>()', 'q_v4i32']
+      - ['*mut u64', uint64x1_t,  '64', 'transmute(a)', 'crate::mem::align_of::<u64>()', '_v1i64' ]
+      - ['*mut u64', uint64x2_t,  '64', 'transmute(a)', 'crate::mem::align_of::<u64>()', 'q_v2i64']
+      - ['*mut p8',  poly8x8_t,   '8',  'transmute(a)', 'crate::mem::align_of::<p8>()',  '_v8i8'  ]
+      - ['*mut p8',  poly8x16_t,  '8',  'transmute(a)', 'crate::mem::align_of::<p8>()',  'q_v16i8']
+      - ['*mut p16', poly16x4_t,  '16', 'transmute(a)', 'crate::mem::align_of::<p16>()', '_v4i16' ]
+      - ['*mut p16', poly16x8_t,  '16', 'transmute(a)', 'crate::mem::align_of::<p16>()', 'q_v8i16']
+      - ['*mut p64', poly64x1_t,  '64', 'transmute(a)', 'crate::mem::align_of::<p64>()', '_v1i64' ]
+      - ['*mut p64', poly64x2_t,  '64', 'transmute(a)', 'crate::mem::align_of::<p64>()', 'q_v2i64']
+      - ['*mut f32', float32x2_t, '32', 'transmute(a)', 'crate::mem::align_of::<f32>()', '_v2f32' ]
+      - ['*mut f32', float32x4_t, '32', 'transmute(a)', 'crate::mem::align_of::<f32>()', 'q_v4f32']
+    compose:
+      - Const:
+          - ALIGN
+          - "i32"
+          - "{type[4]} as i32"
       - FnCall:
           - "vst1{type[5]}"
-          - - 'ptr as *const i8'
-            - '{type[3]}'
-            - '{type[4]}'
-
+          - ['ptr as *const i8','{type[3]}']
+          - ['ALIGN']
 
   - name: "vst1{neon_type[1].no}"
     doc: "Store multiple single-element structures from one, two, three, or four registers."
@@ -13839,32 +13939,64 @@ intrinsics:
             - '{type[3]}'
             - '{type[4]}'
 
-
-  - name: "vshiftins{type[0]}"
+  - name: "vshiftlins{type[0]}"
     visibility: private
+    arguments: ["a: {neon_type[1]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[1]}"
+    safety: safe
+    attr:
+      - *target-is-arm
+      - *neon-v7
+      - *neon-arm-unstable
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+    static_defs: ['const N: i32']
+    types:
+      - ['_v8i8',  'int8x8_t',  '8',  'int8x8_t([N as i8; 8 ])' ] 
+      - ['_v16i8', 'int8x16_t', '8',  'int8x16_t([N as i8; 16])']
+      - ['_v4i16', 'int16x4_t', '16', 'int16x4_t([N as i16; 4])']
+      - ['_v8i16', 'int16x8_t', '16', 'int16x8_t([N as i16; 8])']
+      - ['_v2i32', 'int32x2_t', '32', 'int32x2_t([N; 2])'       ]
+      - ['_v4i32', 'int32x4_t', '32', 'int32x4_t([N; 4])'       ]
+      - ['_v1i64', 'int64x1_t', '64', 'int64x1_t([N as i64; 1])']
+      - ['_v2i64', 'int64x2_t', '64', 'int64x2_t([N as i64; 2])']
+    compose:
+      - LLVMLink:
+          name: "_vshiftins{type[0]}"
+          arguments: ["a: {type[1]}", "b: {type[1]}", "c: {type[1]}"]
+          links:
+            - link: "llvm.arm.neon.vshiftins.{neon_type[1]}"
+              arch: arm
+      - FnCall: ["_vshiftlins{type[0]}", [a,b, "const {{ {type[3]} }}"], [], true]
+
+  - name: "vshiftrins{type[0]}"
     doc: "Shift Right and Insert (immediate)"
-    arguments: ["a: {neon_type[1]}", "b: {neon_type[1]}", "c: {neon_type[1]}"]
+    visibility: private
+    arguments: ["a: {neon_type[1]}", "b: {neon_type[1]}"]
     return_type: "{neon_type[1]}"
     safety: safe
     attr:
       - *target-is-arm
       - *neon-v7
       - *neon-arm-unstable
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+    static_defs: ['const N: i32']
     types:
-      - ['_v8i8',  "int8x8_t", '8']
-      - ['_v16i8', 'int8x16_t', '8']
-      - ['_v4i16', 'int16x4_t', '16']
-      - ['_v8i16', 'int16x8_t', '16']
-      - ['_v2i32', 'int32x2_t', '32']
-      - ['_v4i32', 'int32x4_t', '32']
-      - ['_v1i64', 'int64x1_t', '64']
-      - ['_v2i64', 'int64x2_t', '64']
+      - ['_v8i8',  'int8x8_t',  '8',  'int8x8_t([-N as i8; 8 ])' ]
+      - ['_v16i8', 'int8x16_t', '8',  'int8x16_t([-N as i8; 16])']
+      - ['_v4i16', 'int16x4_t', '16', 'int16x4_t([-N as i16; 4])']
+      - ['_v8i16', 'int16x8_t', '16', 'int16x8_t([-N as i16; 8])']
+      - ['_v2i32', 'int32x2_t', '32', 'int32x2_t([-N; 2])'       ]
+      - ['_v4i32', 'int32x4_t', '32', 'int32x4_t([-N; 4])'       ]
+      - ['_v1i64', 'int64x1_t', '64', 'int64x1_t([-N as i64; 1])']
+      - ['_v2i64', 'int64x2_t', '64', 'int64x2_t([-N as i64; 2])']
     compose:
       - LLVMLink:
           name: "_vshiftins{type[0]}"
+          arguments: ["a: {type[1]}", "b: {type[1]}", "c: {type[1]}"]
           links:
             - link: "llvm.arm.neon.vshiftins.{neon_type[1]}"
               arch: arm
+      - FnCall: ["_vshiftrins{type[0]}", [a,b, "const {{ {type[3]} }}"], [], true]
 
   - name: "vsri{neon_type[0].N}"
     doc: "Shift Right and Insert (immediate)"
@@ -13879,18 +14011,18 @@ intrinsics:
       - FnCall: [rustc_legacy_const_generics, ['2']]
     static_defs: ['const N: i32']
     types:
-      - [uint8x8_t,  "neon,v7",  '8', '1 <= N && N <= 8',   'v8i8', 'int8x8_t::splat',  '-N as i8']
-      - [uint8x16_t, "neon,v7",  '8', '1 <= N && N <= 8',  'v16i8', 'int8x16_t::splat', '-N as i8']
-      - [uint16x4_t, "neon,v7", '16', '1 <= N && N <= 16', 'v4i16', 'int16x4_t::splat', '-N as i16']
-      - [uint16x8_t, "neon,v7", '16', '1 <= N && N <= 16', 'v8i16', 'int16x8_t::splat', '-N as i16']
-      - [uint32x2_t, "neon,v7", '32', '1 <= N && N <= 32', 'v2i32', 'int32x2_t::splat', '-N']
-      - [uint32x4_t, "neon,v7", '32', '1 <= N && N <= 32', 'v4i32', 'int32x4_t::splat', '-N']
-      - [uint64x1_t, "neon,v7", '64', '1 <= N && N <= 64', 'v1i64', 'int64x1_t::splat', '-N as i64']
-      - [uint64x2_t, "neon,v7", '64', '1 <= N && N <= 64', 'v2i64', 'int64x2_t::splat', '-N as i64']
-      - [poly8x8_t,  "neon,v7",  '8', '1 <= N && N <= 8',   'v8i8', 'int8x8_t::splat',  '-N as i8']
-      - [poly8x16_t, "neon,v7",  '8', '1 <= N && N <= 8',  'v16i8', 'int8x16_t::splat', '-N as i8']
-      - [poly16x4_t, "neon,v7", '16', '1 <= N && N <= 16', 'v4i16', 'int16x4_t::splat', '-N as i16']
-      - [poly16x8_t, "neon,v7", '16', '1 <= N && N <= 16', 'v8i16', 'int16x8_t::splat', '-N as i16']
+      - [uint8x8_t,  "neon,v7", '8',  '1 <= N && N <= 8',  'v8i8' ]
+      - [uint8x16_t, "neon,v7", '8',  '1 <= N && N <= 8',  'v16i8']
+      - [uint16x4_t, "neon,v7", '16', '1 <= N && N <= 16', 'v4i16']
+      - [uint16x8_t, "neon,v7", '16', '1 <= N && N <= 16', 'v8i16']
+      - [uint32x2_t, "neon,v7", '32', '1 <= N && N <= 32', 'v2i32']
+      - [uint32x4_t, "neon,v7", '32', '1 <= N && N <= 32', 'v4i32']
+      - [uint64x1_t, "neon,v7", '64', '1 <= N && N <= 64', 'v1i64']
+      - [uint64x2_t, "neon,v7", '64', '1 <= N && N <= 64', 'v2i64']
+      - [poly8x8_t,  "neon,v7",  '8', '1 <= N && N <= 8',  'v8i8' ]
+      - [poly8x16_t, "neon,v7",  '8', '1 <= N && N <= 8',  'v16i8']
+      - [poly16x4_t, "neon,v7", '16', '1 <= N && N <= 16', 'v4i16']
+      - [poly16x8_t, "neon,v7", '16', '1 <= N && N <= 16', 'v8i16']
       ## These live in ./crates/core_arch/src/arm/neon.rs
       #- [poly64x1_t, "neon,v7,aes", '64', '1 <= N && N <= 64', 'v1i64', 'int64x1_t::splat', '-N as i64']
       #- [poly64x2_t, "neon,v7,aes", '64', '1 <= N && N <= 64', 'v2i64', 'int64x2_t::splat', '-N as i64']
@@ -13899,10 +14031,9 @@ intrinsics:
       - FnCall:
           - 'transmute'
           - - FnCall:
-                - "vshiftins_{type[4]}"
+                - "vshiftrins_{type[4]}::<N>"
                 - - FnCall: [transmute, [a]]
                   - FnCall: [transmute, [b]]
-                  - FnCall: ["{type[5]}", ["{type[6]}"]]
 
   - name: "vsri{neon_type[0].N}"
     doc: "Shift Right and Insert (immediate)"
@@ -13917,21 +14048,20 @@ intrinsics:
       - FnCall: [rustc_legacy_const_generics, ['2']]
     safety: safe
     types:
-      - [int8x8_t,  '8',  '1 <= N && N <= 8',  'v8i8',  'int8x8_t::splat',  '-N as i8']
-      - [int8x16_t, '8',  '1 <= N && N <= 8',  'v16i8', 'int8x16_t::splat', '-N as i8']
-      - [int16x4_t, '16', '1 <= N && N <= 16', 'v4i16', 'int16x4_t::splat', '-N as i16']
-      - [int16x8_t, '16', '1 <= N && N <= 16', 'v8i16', 'int16x8_t::splat', '-N as i16']
-      - [int32x2_t, '32', '1 <= N && N <= 32', 'v2i32', 'int32x2_t::splat', '-N']
-      - [int32x4_t, '32', '1 <= N && N <= 32', 'v4i32', 'int32x4_t::splat', '-N']
-      - [int64x1_t, '64', '1 <= N && N <= 64', 'v1i64', 'int64x1_t::splat', '-N as i64']
-      - [int64x2_t, '64', '1 <= N && N <= 64', 'v2i64', 'int64x2_t::splat', '-N as i64']
+      - [int8x8_t,  '8',  '1 <= N && N <= 8',  'v8i8' ]
+      - [int8x16_t, '8',  '1 <= N && N <= 8',  'v16i8']
+      - [int16x4_t, '16', '1 <= N && N <= 16', 'v4i16']
+      - [int16x8_t, '16', '1 <= N && N <= 16', 'v8i16']
+      - [int32x2_t, '32', '1 <= N && N <= 32', 'v2i32']
+      - [int32x4_t, '32', '1 <= N && N <= 32', 'v4i32']
+      - [int64x1_t, '64', '1 <= N && N <= 64', 'v1i64']
+      - [int64x2_t, '64', '1 <= N && N <= 64', 'v2i64']
     compose:
       - FnCall: ["static_assert!", ['{type[2]}']]
       - FnCall:
-          - "vshiftins_{type[3]}"
+          - "vshiftrins_{type[3]}::<N>"
           - - a
             - b
-            - FnCall: ["{type[4]}", ["{type[5]}"]]
 
   - name: "vsli{neon_type[0].N}"
     doc: "Shift Left and Insert (immediate)"
@@ -13946,18 +14076,18 @@ intrinsics:
       - FnCall: [rustc_legacy_const_generics, ['2']]
     static_defs: ['const N: i32']
     types:
-      - [uint8x8_t,  "neon,v7", '8',  'static_assert_uimm_bits!', 'N, 3',    'v8i8',  'int8x8_t::splat',  'N as i8']
-      - [uint8x16_t, "neon,v7", '8',  'static_assert_uimm_bits!', 'N, 3',    'v16i8', 'int8x16_t::splat', 'N as i8']
-      - [uint16x4_t, "neon,v7", '16', 'static_assert_uimm_bits!', 'N, 4',    'v4i16', 'int16x4_t::splat', 'N as i16']
-      - [uint16x8_t, "neon,v7", '16', 'static_assert_uimm_bits!', 'N, 4',    'v8i16', 'int16x8_t::splat', 'N as i16']
-      - [uint32x2_t, "neon,v7", '32', 'static_assert!', 'N >= 0 && N <= 31', 'v2i32', 'int32x2_t::splat', 'N']
-      - [uint32x4_t, "neon,v7", '32', 'static_assert!', 'N >= 0 && N <= 31', 'v4i32', 'int32x4_t::splat', 'N']
-      - [uint64x1_t, "neon,v7", '64', 'static_assert!', 'N >= 0 && N <= 63', 'v1i64', 'int64x1_t::splat', 'N as i64']
-      - [uint64x2_t, "neon,v7", '64', 'static_assert!', 'N >= 0 && N <= 63', 'v2i64', 'int64x2_t::splat', 'N as i64']
-      - [poly8x8_t,  "neon,v7", '8',  'static_assert_uimm_bits!', 'N, 3',     'v8i8', 'int8x8_t::splat',  'N as i8']
-      - [poly8x16_t, "neon,v7", '8',  'static_assert_uimm_bits!', 'N, 3',    'v16i8', 'int8x16_t::splat', 'N as i8']
-      - [poly16x4_t, "neon,v7", '16', 'static_assert_uimm_bits!', 'N, 4',    'v4i16', 'int16x4_t::splat', 'N as i16']
-      - [poly16x8_t, "neon,v7", '16', 'static_assert_uimm_bits!', 'N, 4',    'v8i16', 'int16x8_t::splat', 'N as i16']
+      - [uint8x8_t,  "neon,v7", '8',  'static_assert_uimm_bits!', 'N, 3',     'v8i8']
+      - [uint8x16_t, "neon,v7", '8',  'static_assert_uimm_bits!', 'N, 3',    'v16i8']
+      - [uint16x4_t, "neon,v7", '16', 'static_assert_uimm_bits!', 'N, 4',    'v4i16']
+      - [uint16x8_t, "neon,v7", '16', 'static_assert_uimm_bits!', 'N, 4',    'v8i16']
+      - [uint32x2_t, "neon,v7", '32', 'static_assert!', 'N >= 0 && N <= 31', 'v2i32']
+      - [uint32x4_t, "neon,v7", '32', 'static_assert!', 'N >= 0 && N <= 31', 'v4i32']
+      - [uint64x1_t, "neon,v7", '64', 'static_assert!', 'N >= 0 && N <= 63', 'v1i64']
+      - [uint64x2_t, "neon,v7", '64', 'static_assert!', 'N >= 0 && N <= 63', 'v2i64']
+      - [poly8x8_t,  "neon,v7", '8',  'static_assert_uimm_bits!', 'N, 3',     'v8i8']
+      - [poly8x16_t, "neon,v7", '8',  'static_assert_uimm_bits!', 'N, 3',    'v16i8']
+      - [poly16x4_t, "neon,v7", '16', 'static_assert_uimm_bits!', 'N, 4',    'v4i16']
+      - [poly16x8_t, "neon,v7", '16', 'static_assert_uimm_bits!', 'N, 4',    'v8i16']
       ## These live in ./crates/core_arch/src/arm/neon.rs
       #- [poly64x1_t, "neon,v7,aes", '"vsli.64"', 'static_assert!', '0 <= N && N <= 63', 'v1i64', 'int64x1_t::splat', 'N as i64']
       #- [poly64x2_t, "neon,v7,aes", '"vsli.64"', 'static_assert!', '0 <= N && N <= 63', 'v2i64', 'int64x2_t::splat', 'N as i64']
@@ -13966,10 +14096,9 @@ intrinsics:
       - FnCall:
           - 'transmute'
           - - FnCall:
-                - "vshiftins_{type[5]}"
+                - "vshiftlins_{type[5]}::<N>"
                 - - FnCall: [transmute, [a]]
                   - FnCall: [transmute, [b]]
-                  - FnCall: ["{type[6]}", ["{type[7]}"]]
   
   - name: "vsli{neon_type[0].N}"
     doc: "Shift Left and Insert (immediate)"
@@ -13984,21 +14113,20 @@ intrinsics:
       - FnCall: [rustc_legacy_const_generics, ['2']]
     static_defs: ['const N: i32']
     types:
-      - [int8x8_t,  '8',  'static_assert_uimm_bits!', 'N, 3',     'v8i8', 'int8x8_t::splat',  'N as i8']
-      - [int8x16_t, '8',  'static_assert_uimm_bits!', 'N, 3',    'v16i8', 'int8x16_t::splat', 'N as i8']
-      - [int16x4_t, '16', 'static_assert_uimm_bits!', 'N, 4',    'v4i16', 'int16x4_t::splat', 'N as i16']
-      - [int16x8_t, '16', 'static_assert_uimm_bits!', 'N, 4',    'v8i16', 'int16x8_t::splat', 'N as i16']
-      - [int32x2_t, '32', 'static_assert!', 'N >= 0 && N <= 31', 'v2i32', 'int32x2_t::splat', 'N']
-      - [int32x4_t, '32', 'static_assert!', 'N >= 0 && N <= 31', 'v4i32', 'int32x4_t::splat', 'N']
-      - [int64x1_t, '64', 'static_assert!', 'N >= 0 && N <= 63', 'v1i64', 'int64x1_t::splat', 'N as i64']
-      - [int64x2_t, '64', 'static_assert!', 'N >= 0 && N <= 63', 'v2i64', 'int64x2_t::splat', 'N as i64']
+      - [int8x8_t,  '8',  'static_assert_uimm_bits!', 'N, 3',              'v8i8' ]
+      - [int8x16_t, '8',  'static_assert_uimm_bits!', 'N, 3',              'v16i8']
+      - [int16x4_t, '16', 'static_assert_uimm_bits!', 'N, 4',              'v4i16']
+      - [int16x8_t, '16', 'static_assert_uimm_bits!', 'N, 4',              'v8i16']
+      - [int32x2_t, '32', 'static_assert!',           'N >= 0 && N <= 31', 'v2i32']
+      - [int32x4_t, '32', 'static_assert!',           'N >= 0 && N <= 31', 'v4i32']
+      - [int64x1_t, '64', 'static_assert!',           'N >= 0 && N <= 63', 'v1i64']
+      - [int64x2_t, '64', 'static_assert!',           'N >= 0 && N <= 63', 'v2i64']
     compose:
       - FnCall: ["{type[2]}", ['{type[3]}']]
       - FnCall:
-          - "vshiftins_{type[4]}"
+          - "vshiftlins_{type[4]}::<N>"
           - - a
             - b
-            - FnCall: ["{type[5]}", ["{type[6]}"]]
 
   - name: "vcombine{neon_type[0].no}"
     doc: Join two smaller vectors into a single larger vector
@@ -14006,8 +14134,9 @@ intrinsics:
     return_type: "{neon_type[1]}"
     attr:
       - *neon-v7
-      - *neon-fp16
-      - *neon-unstable-f16
+      - *arm-fp16
+      - *neon-not-arm-stable-fp16
+      - *neon-cfg-arm-unstable
       - *target-not-arm64ec
     assert_instr: [nop]
     safety: safe
@@ -14022,8 +14151,9 @@ intrinsics:
     return_type: "{neon_type[0]}"
     attr:
       - *neon-v7
-      - *neon-fp16
-      - *neon-unstable-f16
+      - *arm-fp16
+      - *neon-not-arm-stable-fp16
+      - *neon-cfg-arm-unstable
       - *target-not-arm64ec
     assert_instr: [nop]
     safety: safe
@@ -14039,10 +14169,10 @@ intrinsics:
     return_type: "{type[1]}"
     attr:
       - *neon-v7
-      - *neon-fp16
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [nop, 'LANE = 0']]}]]
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [nop, 'LANE = 0']]}]]
       - FnCall: [rustc_legacy_const_generics, ["1"]]
+      - *arm-fp16
       - *neon-unstable-f16
       - *target-not-arm64ec
     static_defs: ['const LANE: i32']
@@ -14062,7 +14192,7 @@ intrinsics:
       - *neon-v7
       - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vdup.16"']]}]]
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, [dup]]}]]
-      - *neon-fp16
+      - *arm-fp16
       - *neon-unstable-f16
       - *target-not-arm64ec
     safety: safe
@@ -14201,7 +14331,6 @@ intrinsics:
     doc: "Load one single-element structure and Replicate to all lanes (of one register)."
     arguments: ["ptr: {type[1]}"]
     return_type: "{neon_type[2]}"
-    big_endian_inverse: false
     attr:
       - *neon-v7
       - FnCall: [cfg_attr, [*test-is-arm, { FnCall: [assert_instr, ['"{type[3]}"']] }  ]]
@@ -14357,26 +14486,26 @@ intrinsics:
     attr:
       - *neon-v7
       - FnCall: [cfg_attr, [*test-is-arm, { FnCall: [assert_instr, ['{type[3]}']] }  ]]
-      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, { FnCall: [assert_instr, ['{type[4]}']]}] ]
+      - FnCall: [cfg_attr, [*all-neon-target-aarch64-arm64ec-little-endian, { FnCall: [assert_instr, ['{type[4]}']]}] ]
       - *neon-not-arm-stable
       - *neon-cfg-arm-unstable
     safety: safe
     types:
-      - ['vaddl_high_s8', 'int8x16_t', 'int16x8_t', 'vaddl', 'saddl2',  'int8x8_t', '[8, 9, 10, 11, 12, 13, 14, 15]']
-      - ['vaddl_high_s16', 'int16x8_t', 'int32x4_t', 'vaddl', 'saddl2',  'int16x4_t', '[4, 5, 6, 7]']
-      - ['vaddl_high_s32', 'int32x4_t', 'int64x2_t', 'vaddl', 'saddl2',  'int32x2_t', '[2, 3]']
-      - ['vaddl_high_u8', 'uint8x16_t', 'uint16x8_t', 'vaddl', 'uaddl2',  'uint8x8_t', '[8, 9, 10, 11, 12, 13, 14, 15]']
-      - ['vaddl_high_u16', 'uint16x8_t', 'uint32x4_t', 'vaddl', 'uaddl2',  'uint16x4_t', '[4, 5, 6, 7]']
-      - ['vaddl_high_u32', 'uint32x4_t', 'uint64x2_t', 'vaddl', 'uaddl2',  'uint32x2_t', '[2, 3]']
+      - ['vaddl_high_s8', 'int8x16_t', 'int16x8_t', 'vaddl', 'saddl2',  'int8x8_t']
+      - ['vaddl_high_s16', 'int16x8_t', 'int32x4_t', 'vaddl', 'saddl2',  'int16x4_t']
+      - ['vaddl_high_s32', 'int32x4_t', 'int64x2_t', 'vaddl', 'saddl2',  'int32x2_t']
+      - ['vaddl_high_u8', 'uint8x16_t', 'uint16x8_t', 'vaddl', 'uaddl2',  'uint8x8_t']
+      - ['vaddl_high_u16', 'uint16x8_t', 'uint32x4_t', 'vaddl', 'uaddl2',  'uint16x4_t']
+      - ['vaddl_high_u32', 'uint32x4_t', 'uint64x2_t', 'vaddl', 'uaddl2',  'uint32x2_t']
     compose:
       - Let:
           - a
           - '{neon_type[5]}'
-          - FnCall: ['simd_shuffle!', [a, a, '{type[6]}']]
+          - FnCall: ['vget_high_{neon_type[1]}', [a]]
       - Let:
           - b
           - '{neon_type[5]}'
-          - FnCall: ['simd_shuffle!', [b, b, '{type[6]}']]
+          - FnCall: ['vget_high_{neon_type[1]}', [b]]
       - Let: [a, '{neon_type[2]}', {FnCall: [simd_cast, [a]]}]
       - Let: [b, '{neon_type[2]}', {FnCall: [simd_cast, [b]]}]
       - FnCall: [simd_add, [a, b]]
@@ -14413,22 +14542,21 @@ intrinsics:
     attr:
       - *neon-v7
       - FnCall: [cfg_attr, [*test-is-arm, { FnCall: [assert_instr, ['{type[3]}']] }  ]]
-      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, { FnCall: [assert_instr, ['{type[4]}']]}] ]
+      - FnCall: [cfg_attr, [*all-neon-target-aarch64-arm64ec-little-endian, { FnCall: [assert_instr, ['{type[4]}']]}] ]
       - *neon-not-arm-stable
       - *neon-cfg-arm-unstable
     safety: safe
     types:
-      - ['vaddw_high_s8', 'int16x8_t', 'int8x16_t', 'vaddw', 'saddw2', 'int8x8_t', '[8, 9, 10, 11, 12, 13, 14, 15]']
-      - ['vaddw_high_s16', 'int32x4_t', 'int16x8_t', 'vaddw', 'saddw2', 'int16x4_t', '[4, 5, 6, 7]']
-      - ['vaddw_high_s32', 'int64x2_t', 'int32x4_t', 'vaddw', 'saddw2', 'int32x2_t', '[2, 3]']
-      - ['vaddw_high_u8', 'uint16x8_t', 'uint8x16_t', 'vaddw', 'uaddw2', 'uint8x8_t', '[8, 9, 10, 11, 12, 13, 14, 15]']
-      - ['vaddw_high_u16', 'uint32x4_t', 'uint16x8_t', 'vaddw', 'uaddw2', 'uint16x4_t', '[4, 5, 6, 7]']
-      - ['vaddw_high_u32', 'uint64x2_t', 'uint32x4_t', 'vaddw', 'uaddw2', 'uint32x2_t', '[2, 3]']
+      - ['vaddw_high_s8', 'int16x8_t', 'int8x16_t', 'vaddw', 'saddw2', 'int8x8_t']
+      - ['vaddw_high_s16', 'int32x4_t', 'int16x8_t', 'vaddw', 'saddw2', 'int16x4_t']
+      - ['vaddw_high_s32', 'int64x2_t', 'int32x4_t', 'vaddw', 'saddw2', 'int32x2_t']
+      - ['vaddw_high_u8', 'uint16x8_t', 'uint8x16_t', 'vaddw', 'uaddw2', 'uint8x8_t']
+      - ['vaddw_high_u16', 'uint32x4_t', 'uint16x8_t', 'vaddw', 'uaddw2', 'uint16x4_t']
+      - ['vaddw_high_u32', 'uint64x2_t', 'uint32x4_t', 'vaddw', 'uaddw2', 'uint32x2_t']
     compose:
       - Let:
           - b
-          - '{neon_type[5]}'
-          - FnCall: ['simd_shuffle!', [b, b, '{type[6]}']]
+          - FnCall: ['vget_high_{neon_type[2]}', [b]]
       - Let:
           - b
           - '{neon_type[1]}'
@@ -14471,17 +14599,17 @@ intrinsics:
     attr:
       - *neon-v7
       - FnCall: [cfg_attr, [*test-is-arm, { FnCall: [assert_instr, ['vaddhn']] }  ]]
-      - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, { FnCall: [assert_instr, ['addhn2']]}] ]
+      - FnCall: [cfg_attr, [*all-neon-target-aarch64-arm64ec-little-endian, { FnCall: [assert_instr, ['addhn2']]}] ]
       - *neon-not-arm-stable
       - *neon-cfg-arm-unstable
     safety: safe
     types:
-      - ['vaddhn_high_s16', 'int8x8_t', 'int16x8_t', 'int8x16_t', 'int16x8_t::splat(8)', '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
-      - ['vaddhn_high_s32', 'int16x4_t', 'int32x4_t', 'int16x8_t', 'int32x4_t::splat(16)', '[0, 1, 2, 3, 4, 5, 6, 7]']
-      - ['vaddhn_high_s64', 'int32x2_t', 'int64x2_t', 'int32x4_t', 'int64x2_t::splat(32)', '[0, 1, 2, 3]']
-      - ['vaddhn_high_u16', 'uint8x8_t', 'uint16x8_t', 'uint8x16_t', 'uint16x8_t::splat(8)', '[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]']
-      - ['vaddhn_high_u32', 'uint16x4_t', 'uint32x4_t', 'uint16x8_t', 'uint32x4_t::splat(16)', '[0, 1, 2, 3, 4, 5, 6, 7]']
-      - ['vaddhn_high_u64', 'uint32x2_t', 'uint64x2_t', 'uint32x4_t', 'uint64x2_t::splat(32)', '[0, 1, 2, 3]']
+      - ['vaddhn_high_s16', 'int8x8_t', 'int16x8_t', 'int8x16_t', 'int16x8_t::splat(8)']
+      - ['vaddhn_high_s32', 'int16x4_t', 'int32x4_t', 'int16x8_t', 'int32x4_t::splat(16)']
+      - ['vaddhn_high_s64', 'int32x2_t', 'int64x2_t', 'int32x4_t', 'int64x2_t::splat(32)']
+      - ['vaddhn_high_u16', 'uint8x8_t', 'uint16x8_t', 'uint8x16_t', 'uint16x8_t::splat(8)']
+      - ['vaddhn_high_u32', 'uint16x4_t', 'uint32x4_t', 'uint16x8_t', 'uint32x4_t::splat(16)']
+      - ['vaddhn_high_u64', 'uint32x2_t', 'uint64x2_t', 'uint32x4_t', 'uint64x2_t::splat(32)']
     compose:
       - Let:
         - x
@@ -14494,7 +14622,7 @@ intrinsics:
                   - - a
                     - b
                 - '{type[4]}'
-      - FnCall: ['simd_shuffle!', [r, x, '{type[5]}']]
+      - FnCall: ['vcombine_{neon_type[1]}', [r, x]]
 
   - name: "{type[0]}"
     doc: "Vector narrow integer."
@@ -14678,7 +14806,8 @@ intrinsics:
       - *neon-v7
       - FnCall: [cfg_attr, [*test-is-arm, { FnCall: [assert_instr, ['vbsl']]}]]
       - FnCall: [cfg_attr, [*neon-target-aarch64-arm64ec, {FnCall: [assert_instr, ['bsl']]}]]
-      - *neon-unstable-f16
+      - *neon-not-arm-stable-fp16
+      - *neon-cfg-arm-unstable
       - *target-not-arm64ec
     safety: safe
     types:
@@ -14884,10 +15013,11 @@ intrinsics:
       - *neon-cfg-arm-unstable
     safety: safe
     types:
-      - ['vget_high_s64', 'int64x2_t', 'int64x1_t', 'vmov', 'ext', 'unsafe { int64x1_t([simd_extract!(a, 1)]) }']
-      - ['vget_high_u64', 'uint64x2_t', 'uint64x1_t', 'vmov', 'ext', 'unsafe { uint64x1_t([simd_extract!(a, 1)]) }']
+      - ['vget_high_s64', 'int64x2_t',   'int64x1_t',   'vmov', 'ext',  'int64x1_t([simd_extract!(a, 1)])']
+      - ['vget_high_u64', 'uint64x2_t',  'uint64x1_t',  'vmov', 'ext',  'uint64x1_t([simd_extract!(a, 1)])']
+      - ['vget_high_p64', 'poly64x2_t',  'poly64x1_t',  'vmov', 'ext',  'transmute(u64x1::new(simd_extract!(a, 1)))']
     compose:
-      - Identifier: ['{type[5]}', Symbol]
+      - Identifier: ['{type[5]}', UnsafeSymbol]
 
   - name: "{type[0]}"
     doc: "Duplicate vector element to vector or scalar"
@@ -14900,10 +15030,11 @@ intrinsics:
       - *neon-cfg-arm-unstable
     safety: safe
     types:
-      - ['vget_low_s64', 'int64x2_t', 'int64x1_t', 'unsafe { int64x1_t([simd_extract!(a, 0)]) }']
-      - ['vget_low_u64', 'uint64x2_t', 'uint64x1_t', 'unsafe { uint64x1_t([simd_extract!(a, 0)]) }']
+      - ['vget_low_s64', 'int64x2_t',   'int64x1_t',   'int64x1_t([simd_extract!(a, 0)])']
+      - ['vget_low_u64', 'uint64x2_t',  'uint64x1_t',  'uint64x1_t([simd_extract!(a, 0)])']
+      - ['vget_low_p64', 'poly64x2_t',  'poly64x1_t',  'transmute(u64x1::new(simd_extract!(a, 0)))']
     compose:
-      - Identifier: ['{type[3]}', Symbol]
+      - Identifier: ['{type[3]}', UnsafeSymbol]
 
   - name: "{type[0]}"
     doc: "Duplicate vector element to vector or scalar"
diff --git a/crates/stdarch-gen-arm/spec/sve/aarch64.spec.yml b/crates/stdarch-gen-arm/spec/sve/aarch64.spec.yml
new file mode 100644
index 0000000000..29dd3a095d
--- /dev/null
+++ b/crates/stdarch-gen-arm/spec/sve/aarch64.spec.yml
@@ -0,0 +1,5201 @@
+arch_cfgs:
+  - arch_name: aarch64
+    target_feature: [sve]
+    llvm_prefix: llvm.aarch64.sve
+
+uses_neon_types: true
+auto_llvm_sign_conversion: true
+generate_load_store_tests: true
+
+# `#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]`
+sve-unstable: &sve-unstable
+  FnCall: [unstable, ['feature = "stdarch_aarch64_sve"', 'issue= "145052"']]
+
+intrinsics:
+  - name: svacge[{_n}_{type}]
+    attr: [*sve-unstable]
+    doc: Absolute compare greater than or equal to
+    arguments: ["pg: {predicate}", "op1: {sve_type}", "op2: {sve_type}"]
+    return_type: "{predicate}"
+    types: [f32, f64]
+    assert_instr: [facge]
+    n_variant_op: op2
+    compose:
+      - LLVMLink: { name: "facge.{sve_type}" }
+
+  - name: svacgt[{_n}_{type}]
+    attr: [*sve-unstable]
+    doc: Absolute compare greater than
+    arguments: ["pg: {predicate}", "op1: {sve_type}", "op2: {sve_type}"]
+    return_type: "{predicate}"
+    types: [f32, f64]
+    assert_instr: [facgt]
+    n_variant_op: op2
+    compose:
+      - LLVMLink: { name: "facgt.{sve_type}" }
+
+  - name: svacle[{_n}_{type}]
+    attr: [*sve-unstable]
+    doc: Absolute compare less than or equal to
+    arguments: ["pg: {predicate}", "op1: {sve_type}", "op2: {sve_type}"]
+    return_type: "{predicate}"
+    types: [f32, f64]
+    assert_instr: [facge]
+    n_variant_op: op2
+    compose:
+      - FnCall: ["svacge_{type}", [$pg, $op2, $op1]]
+
+  - name: svaclt[{_n}_{type}]
+    attr: [*sve-unstable]
+    doc: Absolute compare less than
+    arguments: ["pg: {predicate}", "op1: {sve_type}", "op2: {sve_type}"]
+    return_type: "{predicate}"
+    types: [f32, f64]
+    assert_instr: [facgt]
+    n_variant_op: op2
+    compose:
+      - FnCall: ["svacgt_{type}", [$pg, $op2, $op1]]
+
+  - name: svcadd[_{type}]{_mxz}
+    attr: [*sve-unstable]
+    doc: Complex add with rotate
+    arguments: ["pg: {predicate}", "op1: {sve_type}", "op2: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [f32, f64]
+    static_defs: ["const IMM_ROTATION: i32"]
+    constraints: [{ variable: IMM_ROTATION, any_values: [90, 270] }]
+    assert_instr: [[fcadd, "IMM_ROTATION = 90"]]
+    zeroing_method: { select: op1 }
+    compose:
+      - LLVMLink:
+          name: fcadd.{sve_type}
+          arguments:
+            - "pg: {predicate}"
+            - "op1: {sve_type}"
+            - "op2: {sve_type}"
+            - "imm_rotation: i32"
+      - FnCall: ["{llvm_link}", [$pg, $op1, $op2, $IMM_ROTATION]]
+
+  - name: svcmla[_{type}]{_mxz}
+    attr: [*sve-unstable]
+    doc: Complex multiply-add with rotate
+    arguments:
+      - "pg: {predicate}"
+      - "op1: {sve_type}"
+      - "op2: {sve_type}"
+      - "op3: {sve_type}"
+    return_type: "{sve_type}"
+    types: [f32, f64]
+    static_defs: ["const IMM_ROTATION: i32"]
+    constraints: [{ variable: IMM_ROTATION, any_values: [0, 90, 180, 270] }]
+    assert_instr: [[fcmla, "IMM_ROTATION = 90"]]
+    zeroing_method: { select: op1 }
+    compose:
+      - LLVMLink:
+          name: fcmla.{sve_type}
+          arguments:
+            - "pg: {predicate}"
+            - "op1: {sve_type}"
+            - "op2: {sve_type}"
+            - "op3: {sve_type}"
+            - "imm_rotation: i32"
+      - FnCall: ["{llvm_link}", [$pg, $op1, $op2, $op3, $IMM_ROTATION]]
+
+  - name: svcmla_lane[_{type}]
+    attr: [*sve-unstable]
+    doc: Complex multiply-add with rotate
+    arguments: ["op1: {sve_type}", "op2: {sve_type}", "op3: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [f32]
+    static_defs: ["const IMM_INDEX: i32", "const IMM_ROTATION: i32"]
+    constraints:
+      - variable: IMM_INDEX
+        range: { match_size: "{type}", default: [0, 1], halfword: [0, 3] }
+      - { variable: IMM_ROTATION, any_values: [0, 90, 180, 270] }
+    assert_instr: [[fcmla, "IMM_INDEX = 0, IMM_ROTATION = 90"]]
+    compose:
+      - LLVMLink:
+          name: fcmla.lane.x.{sve_type}
+          arguments:
+            - "op1: {sve_type}"
+            - "op2: {sve_type}"
+            - "op3: {sve_type}"
+            - "imm_index: i32"
+            - "imm_rotation: i32"
+      - FnCall: ["{llvm_link}", [$op1, $op2, $op3, $IMM_INDEX, $IMM_ROTATION]]
+
+  - name: svadd[{_n}_{type}]{_mxz}
+    attr: [*sve-unstable]
+    doc: Add
+    arguments: ["pg: {predicate}", "op1: {sve_type}", "op2: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64]
+    zeroing_method: { select: op1 }
+    assert_instr: ["{type_kind.f}add"]
+    n_variant_op: op2
+    compose:
+      - LLVMLink: { name: "{type_kind.f}add.{sve_type}" }
+
+  - name: svqsub[{_n}_{type}]
+    attr: [*sve-unstable]
+    doc: Saturating subtract
+    arguments: ["op1: {sve_type}", "op2: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [i8, i16, i32, i64, u8, u16, u32, u64]
+    assert_instr: ["{type_kind.su}qsub"]
+    n_variant_op: op2
+    compose:
+      - LLVMLink: { name: "{type_kind.su}qsub.x.{sve_type}" }
+
+  - name: svcnt[_{type[0]}]{_mxz}
+    attr: [*sve-unstable]
+    doc: Count nonzero bits
+    arguments:
+      ["inactive: {sve_type[1]}", "pg: {predicate[0]}", "op: {sve_type[0]}"]
+    return_type: "{sve_type[1]}"
+    types:
+      - [f32, u32]
+      - [f64, u64]
+      - [i8, u8]
+      - [i16, u16]
+      - [i32, u32]
+      - [i64, u64]
+      - [u8, u8]
+      - [u16, u16]
+      - [u32, u32]
+      - [u64, u64]
+    zeroing_method: { drop: inactive }
+    assert_instr: [cnt]
+    compose:
+      - LLVMLink: { name: "cnt.{sve_type[0]}" }
+
+  - name: svcls[_{type[0]}]{_mxz}
+    attr: [*sve-unstable]
+    doc: Count leading sign bits
+    arguments:
+      ["inactive: {sve_type[1]}", "pg: {predicate[0]}", "op: {sve_type[0]}"]
+    return_type: "{sve_type[1]}"
+    types: [[i8, u8], [i16, u16], [i32, u32], [i64, u64]]
+    zeroing_method: { drop: inactive }
+    assert_instr: [cls]
+    compose:
+      - LLVMLink: { name: "cls.{sve_type[0]}" }
+
+  - name: svclz[_{type[0]}]{_mxz}
+    attr: [*sve-unstable]
+    doc: Count leading zero bits
+    arguments:
+      ["inactive: {sve_type[1]}", "pg: {predicate[0]}", "op: {sve_type[0]}"]
+    return_type: "{sve_type[1]}"
+    types:
+      - [i8, u8]
+      - [i16, u16]
+      - [i32, u32]
+      - [i64, u64]
+      - [u8, u8]
+      - [u16, u16]
+      - [u32, u32]
+      - [u64, u64]
+    zeroing_method: { drop: inactive }
+    assert_instr: [clz]
+    compose:
+      - LLVMLink: { name: "clz.{sve_type[0]}" }
+
+  - name: svext{size_literal[1]}[_{type[0]}]{_mxz}
+    attr: [*sve-unstable]
+    substitutions:
+      sign_or_zero:
+        match_kind: "{type[0]}"
+        default: Sign
+        unsigned: Zero
+      kind_literal: { match_kind: "{type[0]}", default: s, unsigned: u }
+    doc: "{sign_or_zero}-extend the low {size[1]} bits"
+    arguments:
+      ["inactive: {sve_type[0]}", "pg: {predicate[0]}", "op: {sve_type[0]}"]
+    return_type: "{sve_type[0]}"
+    types:
+      - [[i16, i32, i64, u16, u32, u64], i8]
+      - [[i32, i64, u32, u64], i16]
+      - [[i64, u64], i32]
+    zeroing_method: { drop: inactive }
+    assert_instr: ["{type_kind[0].su}xt{size_literal[1]}"]
+    compose:
+      - LLVMLink:
+          name: "{type_kind[0].su}xt{size_literal[1]}.{sve_type[0]}"
+
+  - name: svsqrt[_{type}]{_mxz}
+    attr: [*sve-unstable]
+    doc: Square root
+    arguments: ["inactive: {sve_type}", "pg: {predicate}", "op: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [f32, f64]
+    zeroing_method: { drop: inactive }
+    assert_instr: [fsqrt]
+    compose:
+      - LLVMLink: { name: "fsqrt.{sve_type}" }
+
+  - name: svcmpeq[{_n}_{type}]
+    attr: [*sve-unstable]
+    doc: Compare equal to
+    arguments: ["pg: {predicate}", "op1: {sve_type}", "op2: {sve_type}"]
+    return_type: "{predicate}"
+    types: [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64]
+    assert_instr: [{ float: fcmeq, default: cmpeq }]
+    n_variant_op: op2
+    compose:
+      - LLVMLink: { name: "{type_kind.f}cmpeq.{sve_type}" }
+
+  - name: svcmpeq_wide[{_n}_{type[0]}]
+    attr: [*sve-unstable]
+    doc: Compare equal to
+    arguments:
+      ["pg: {predicate[0]}", "op1: {sve_type[0]}", "op2: {sve_type[1]}"]
+    return_type: "{predicate[0]}"
+    types:
+      - [[i8, i16, i32], i64]
+    assert_instr: [cmpeq]
+    n_variant_op: op2
+    compose:
+      - LLVMLink: { name: "cmpeq.wide.{sve_type[0]}" }
+
+  - name: svcmpge[{_n}_{type}]
+    attr: [*sve-unstable]
+    doc: Compare greater than or equal to
+    arguments: ["pg: {predicate}", "op1: {sve_type}", "op2: {sve_type}"]
+    return_type: "{predicate}"
+    types: [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64]
+    assert_instr: [{ float: fcmge, default: cmpge, unsigned: cmphs }]
+    n_variant_op: op2
+    compose:
+      - MatchKind:
+          - "{type}"
+          - default:
+              LLVMLink: { name: "{type_kind.f}cmpge.{sve_type}" }
+            unsigned:
+              LLVMLink: { name: "cmphs.{sve_type}" }
+
+  - name: svcmpge_wide[{_n}_{type[0]}]
+    attr: [*sve-unstable]
+    doc: Compare greater than or equal to
+    arguments:
+      ["pg: {predicate[0]}", "op1: {sve_type[0]}", "op2: {sve_type[1]}"]
+    return_type: "{predicate[0]}"
+    n_variant_op: op2
+    types:
+      - [[i8, i16, i32], i64]
+      - [[u8, u16, u32], u64]
+    assert_instr: [{ default: cmpge, unsigned: cmphs }]
+    compose:
+      - MatchKind:
+          - "{type[0]}"
+          - default:
+              LLVMLink: { name: "cmpge.wide.{sve_type[0]}" }
+            unsigned:
+              LLVMLink: { name: "cmphs.wide.{sve_type[0]}" }
+
+  - name: svcmpgt[{_n}_{type}]
+    attr: [*sve-unstable]
+    doc: Compare greater than
+    arguments: ["pg: {predicate}", "op1: {sve_type}", "op2: {sve_type}"]
+    return_type: "{predicate}"
+    types: [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64]
+    assert_instr: [{ float: fcmgt, default: cmpgt, unsigned: cmphi }]
+    n_variant_op: op2
+    compose:
+      - MatchKind:
+          - "{type}"
+          - default:
+              LLVMLink: { name: "{type_kind.f}cmpgt.{sve_type}" }
+            unsigned:
+              LLVMLink: { name: "cmphi.{sve_type}" }
+
+  - name: svcmpgt_wide[{_n}_{type[0]}]
+    attr: [*sve-unstable]
+    doc: Compare greater than
+    arguments:
+      ["pg: {predicate[0]}", "op1: {sve_type[0]}", "op2: {sve_type[1]}"]
+    return_type: "{predicate[0]}"
+    types:
+      - [[i8, i16, i32], i64]
+      - [[u8, u16, u32], u64]
+    assert_instr: [{ default: cmpgt, unsigned: cmphi }]
+    n_variant_op: op2
+    compose:
+      - MatchKind:
+          - "{type[0]}"
+          - default:
+              LLVMLink: { name: "cmpgt.wide.{sve_type[0]}" }
+            unsigned:
+              LLVMLink: { name: "cmphi.wide.{sve_type[0]}" }
+
+  - name: svcmple[{_n}_{type}]
+    attr: [*sve-unstable]
+    doc: Compare less than or equal to
+    arguments: ["pg: svbool_t", "op1: {sve_type}", "op2: {sve_type}"]
+    return_type: "svbool_t"
+    types: [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64]
+    assert_instr: [{ float: fcmge, default: cmpge, unsigned: cmphs }]
+    n_variant_op: op2
+    compose:
+      - FnCall: ["svcmpge_{type}", [$pg, $op2, $op1]]
+
+  - name: svcmple_wide[{_n}_{type[0]}]
+    attr: [*sve-unstable]
+    doc: Compare less than or equal to
+    arguments:
+      ["pg: {predicate[0]}", "op1: {sve_type[0]}", "op2: {sve_type[1]}"]
+    return_type: "{predicate[0]}"
+    types:
+      - [[i8, i16, i32], i64]
+      - [[u8, u16, u32], u64]
+    assert_instr: [{ default: cmple, unsigned: cmpls }]
+    n_variant_op: op2
+    compose:
+      - MatchKind:
+          - "{type[0]}"
+          - default:
+              LLVMLink: { name: "cmple.wide.{sve_type[0]}" }
+            unsigned:
+              LLVMLink: { name: "cmpls.wide.{sve_type[0]}" }
+
+  - name: svcmplt[{_n}_{type}]
+    attr: [*sve-unstable]
+    doc: Compare less than
+    arguments: ["pg: svbool_t", "op1: {sve_type}", "op2: {sve_type}"]
+    return_type: "svbool_t"
+    types: [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64]
+    assert_instr: [{ float: fcmgt, default: cmpgt, unsigned: cmphi }]
+    n_variant_op: op2
+    compose:
+      - FnCall: ["svcmpgt_{type}", [$pg, $op2, $op1]]
+
+  - name: svcmplt_wide[{_n}_{type[0]}]
+    attr: [*sve-unstable]
+    doc: Compare less than
+    arguments:
+      ["pg: {predicate[0]}", "op1: {sve_type[0]}", "op2: {sve_type[1]}"]
+    return_type: "{predicate[0]}"
+    types:
+      - [[i8, i16, i32], i64]
+      - [[u8, u16, u32], u64]
+    assert_instr: [{ default: cmplt, unsigned: cmplo }]
+    n_variant_op: op2
+    compose:
+      - MatchKind:
+          - "{type[0]}"
+          - default:
+              LLVMLink: { name: "cmplt.wide.{sve_type[0]}" }
+            unsigned:
+              LLVMLink: { name: "cmplo.wide.{sve_type[0]}" }
+
+  - name: svcmpne[{_n}_{type}]
+    attr: [*sve-unstable]
+    doc: Compare not equal to
+    arguments: ["pg: {predicate}", "op1: {sve_type}", "op2: {sve_type}"]
+    return_type: "{predicate}"
+    types: [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64]
+    assert_instr: [{ float: fcmne, default: cmpne }]
+    n_variant_op: op2
+    compose:
+      - LLVMLink: { name: "{type_kind.f}cmpne.{sve_type}" }
+
+  - name: svcmpne_wide[{_n}_{type[0]}]
+    attr: [*sve-unstable]
+    doc: Compare not equal to
+    arguments:
+      ["pg: {predicate[0]}", "op1: {sve_type[0]}", "op2: {sve_type[1]}"]
+    return_type: "{predicate[0]}"
+    types: [[[i8, i16, i32], i64]]
+    assert_instr: [cmpne]
+    n_variant_op: op2
+    compose:
+      - LLVMLink: { name: "cmpne.wide.{sve_type[0]}" }
+
+  - name: svcmpuo[{_n}_{type}]
+    attr: [*sve-unstable]
+    doc: Compare unordered with
+    arguments: ["pg: {predicate}", "op1: {sve_type}", "op2: {sve_type}"]
+    return_type: "{predicate}"
+    types: [f32, f64]
+    assert_instr: [fcmuo]
+    n_variant_op: op2
+    compose:
+      - LLVMLink: { name: "fcmpuo.{sve_type}" }
+
+  - name: svcnt{size_literal}
+    attr: [*sve-unstable]
+    doc: Count the number of {size}-bit elements in a vector
+    arguments: []
+    return_type: u64
+    types: [i8, i16, i32, i64]
+    assert_instr:
+      - default: { byte: rdvl, halfword: cnth, default: cntw, doubleword: cntd }
+    compose:
+      - FnCall: ["svcnt{size_literal}_pat", [], ["{{ svpattern::SV_ALL }}"]]
+
+  - name: svcnt{size_literal}_pat
+    attr: [*sve-unstable]
+    doc: Count the number of {size}-bit elements in a vector
+    arguments: []
+    static_defs: ["const PATTERN: svpattern"]
+    return_type: u64
+    assert_instr:
+      - [rdvl, "PATTERN = {{ svpattern::SV_ALL }}"]
+      - ["cnt{size_literal}", "PATTERN = {{ svpattern::SV_MUL4 }}"]
+    types: [i8]
+    compose:
+      - LLVMLink:
+          name: cnt{size_literal}
+          arguments: ["pattern: svpattern"]
+      - FnCall: ["{llvm_link}", [$PATTERN]]
+
+  - name: svcnt{size_literal}_pat
+    attr: [*sve-unstable]
+    doc: Count the number of {size}-bit elements in a vector
+    arguments: []
+    static_defs: ["const PATTERN: svpattern"]
+    return_type: u64
+    assert_instr: [["cnt{size_literal}", "PATTERN = {{ svpattern::SV_ALL }}"]]
+    types: [i16, i32, i64]
+    compose:
+      - LLVMLink:
+          name: cnt{size_literal}
+          arguments: ["pattern: svpattern"]
+      - FnCall: ["{llvm_link}", [$PATTERN]]
+
+  - name: svlen[_{type}]
+    attr: [*sve-unstable]
+    doc: Count the number of elements in a full vector
+    arguments: ["_op: {sve_type}"]
+    return_type: "u64"
+    types: [i8, u8, i16, u16, i32, u32, f32, i64, u64, f64]
+    assert_instr: [{ default: { default: "cnt{size_literal}", byte: rdvl } }]
+    compose:
+      - FnCall: ["svcnt{size_literal}", []]
+
+  - name: svdup[_n]_{type}
+    attr: [*sve-unstable]
+    doc: Broadcast a scalar value
+    arguments: ["op: {type}"]
+    return_type: "{sve_type}"
+    types: [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64]
+    assert_instr: [mov]
+    compose:
+      - LLVMLink: { name: "dup.x.{sve_type}" }
+
+  - name: svdup[_n]_{type}{_mxz}
+    attr: [*sve-unstable]
+    doc: Broadcast a scalar value
+    arguments: ["inactive: {sve_type}", "pg: {predicate}", "op: {type}"]
+    return_type: "{sve_type}"
+    types: [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64]
+    zeroing_method: { drop: inactive }
+    assert_instr: [mov]
+    compose:
+      - LLVMLink: { name: "dup.{sve_type}" }
+
+  - name: svdup[_n]_{type}
+    attr: [*sve-unstable]
+    doc: Broadcast a scalar value
+    arguments: ["op: bool"]
+    return_type: "{predicate}"
+    types: [b8, b16, b32, b64]
+    assert_instr: [sbfx, whilelo]
+    compose:
+      - LLVMLink: { name: "dup.x.{sve_type}" }
+
+  - name: svdup_lane[_{type[0]}]
+    attr: [*sve-unstable]
+    doc: Broadcast a scalar value
+    arguments: ["data: {sve_type[0]}", "index: {type[1]}"]
+    return_type: "{sve_type[0]}"
+    types:
+      - [f32, u32]
+      - [f64, u64]
+      - [i8, u8]
+      - [i16, u16]
+      - [i32, u32]
+      - [i64, u64]
+      - [u8, u8]
+      - [u16, u16]
+      - [u32, u32]
+      - [u64, u64]
+    assert_instr: [tbl]
+    compose:
+      - FnCall:
+          - svtbl_{type[0]}
+          - - $data
+            - FnCall: ["svdup_n_{type[1]}", [$index]]
+
+  - name: svdupq_lane[_{type}]
+    attr: [*sve-unstable]
+    doc: Broadcast a quadword of scalars
+    arguments: ["data: {sve_type}", "index: u64"]
+    return_type: "{sve_type}"
+    types: [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64]
+    assert_instr: [tbl]
+    compose:
+      - LLVMLink: { name: "dupq.lane.{sve_type}" }
+
+  - name: svdupq[_n]_{type}
+    attr: [*sve-unstable]
+    doc: Broadcast a quadword of scalars
+    arguments:
+      - "x0: {type}"
+      - "x1: {type}"
+      - "x2: {type}"
+      - "x3: {type}"
+      - "x4: {type}"
+      - "x5: {type}"
+      - "x6: {type}"
+      - "x7: {type}"
+      - "x8: {type}"
+      - "x9: {type}"
+      - "x10: {type}"
+      - "x11: {type}"
+      - "x12: {type}"
+      - "x13: {type}"
+      - "x14: {type}"
+      - "x15: {type}"
+    return_type: "{sve_type}"
+    types: [i8, u8]
+    assert_instr: []
+    compose:
+      - LLVMLink:
+          name: llvm.vector.insert.{sve_type}.{neon_type}
+          arguments: ["op0: {sve_type}", "op1: {neon_type}", "idx: i64"]
+      - Let:
+          - op
+          - FnCall:
+              - "{llvm_link}"
+              - - FnCall: ["svundef_{type}", [], [], true]
+                - FnCall:
+                    - "crate::mem::transmute"
+                    - - - $x0
+                        - $x1
+                        - $x2
+                        - $x3
+                        - $x4
+                        - $x5
+                        - $x6
+                        - $x7
+                        - $x8
+                        - $x9
+                        - $x10
+                        - $x11
+                        - $x12
+                        - $x13
+                        - $x14
+                        - $x15
+                - 0
+      - FnCall: ["svdupq_lane_{type}", [$op, 0]]
+
+  - name: svdupq[_n]_{type}
+    attr: [*sve-unstable]
+    doc: Broadcast a quadword of scalars
+    types: [b8]
+    arguments:
+      - "x0: bool"
+      - "x1: bool"
+      - "x2: bool"
+      - "x3: bool"
+      - "x4: bool"
+      - "x5: bool"
+      - "x6: bool"
+      - "x7: bool"
+      - "x8: bool"
+      - "x9: bool"
+      - "x10: bool"
+      - "x11: bool"
+      - "x12: bool"
+      - "x13: bool"
+      - "x14: bool"
+      - "x15: bool"
+    return_type: "svbool_t"
+    assert_instr: []
+    compose:
+      - Let:
+          - op1
+          - FnCall:
+              - svdupq_n_s8
+              - - CastAs: [$x0, i8]
+                - CastAs: [$x1, i8]
+                - CastAs: [$x2, i8]
+                - CastAs: [$x3, i8]
+                - CastAs: [$x4, i8]
+                - CastAs: [$x5, i8]
+                - CastAs: [$x6, i8]
+                - CastAs: [$x7, i8]
+                - CastAs: [$x8, i8]
+                - CastAs: [$x9, i8]
+                - CastAs: [$x10, i8]
+                - CastAs: [$x11, i8]
+                - CastAs: [$x12, i8]
+                - CastAs: [$x13, i8]
+                - CastAs: [$x14, i8]
+                - CastAs: [$x15, i8]
+      - FnCall:
+          - svcmpne_wide_s8
+          - - FnCall: [svptrue_b8, []]
+            - $op1
+            - FnCall: [svdup_n_s64, [0]]
+
+  - name: svdupq[_n]_{type}
+    attr: [*sve-unstable]
+    doc: Broadcast a quadword of scalars
+    arguments:
+      - "x0: {type}"
+      - "x1: {type}"
+      - "x2: {type}"
+      - "x3: {type}"
+      - "x4: {type}"
+      - "x5: {type}"
+      - "x6: {type}"
+      - "x7: {type}"
+    return_type: "{sve_type}"
+    types: [i16, u16]
+    assert_instr: []
+    compose:
+      - LLVMLink:
+          name: llvm.vector.insert.{sve_type}.{neon_type}
+          arguments: ["op0: {sve_type}", "op1: {neon_type}", "idx: i64"]
+      - Let:
+          - op
+          - FnCall:
+              - "{llvm_link}"
+              - - FnCall: ["svundef_{type}", [], [], true]
+                - FnCall:
+                    - "crate::mem::transmute"
+                    - - [$x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7]
+                - 0
+      - FnCall: ["svdupq_lane_{type}", [$op, 0]]
+
+  - name: svdupq[_n]_{type}
+    attr: [*sve-unstable]
+    doc: Broadcast a quadword of scalars
+    types: [b16]
+    arguments:
+      - "x0: bool"
+      - "x1: bool"
+      - "x2: bool"
+      - "x3: bool"
+      - "x4: bool"
+      - "x5: bool"
+      - "x6: bool"
+      - "x7: bool"
+    return_type: svbool_t
+    assert_instr: []
+    compose:
+      - Let:
+          - op1
+          - FnCall:
+              - svdupq_n_s16
+              - - CastAs: [$x0, i16]
+                - CastAs: [$x1, i16]
+                - CastAs: [$x2, i16]
+                - CastAs: [$x3, i16]
+                - CastAs: [$x4, i16]
+                - CastAs: [$x5, i16]
+                - CastAs: [$x6, i16]
+                - CastAs: [$x7, i16]
+      - FnCall:
+          - svcmpne_wide_s16
+          - - FnCall: [svptrue_b16, []]
+            - $op1
+            - FnCall: [svdup_n_s64, [0]]
+
+  - name: svdupq[_n]_{type}
+    attr: [*sve-unstable]
+    doc: Broadcast a quadword of scalars
+    arguments: ["x0: {type}", "x1: {type}", "x2: {type}", "x3: {type}"]
+    return_type: "{sve_type}"
+    types: [f32, i32, u32]
+    assert_instr: []
+    compose:
+      - LLVMLink:
+          name: llvm.vector.insert.{sve_type}.{neon_type}
+          arguments: ["op0: {sve_type}", "op1: {neon_type}", "idx: i64"]
+      - Let:
+          - op
+          - FnCall:
+              - "{llvm_link}"
+              - - FnCall: ["svundef_{type}", [], [], true]
+                - FnCall: ["crate::mem::transmute", [[$x0, $x1, $x2, $x3]]]
+                - 0
+      - FnCall: ["svdupq_lane_{type}", [$op, 0]]
+
+  - name: svdupq[_n]_{type}
+    attr: [*sve-unstable]
+    doc: Broadcast a quadword of scalars
+    types: [b32]
+    arguments: ["x0: bool", "x1: bool", "x2: bool", "x3: bool"]
+    return_type: "svbool_t"
+    assert_instr: []
+    compose:
+      - Let:
+          - op1
+          - FnCall:
+              - svdupq_n_s32
+              - - CastAs: [$x0, i32]
+                - CastAs: [$x1, i32]
+                - CastAs: [$x2, i32]
+                - CastAs: [$x3, i32]
+      - FnCall:
+          - svcmpne_wide_s32
+          - - FnCall: [svptrue_b32, []]
+            - $op1
+            - FnCall: [svdup_n_s64, [0]]
+
+  - name: svdupq[_n]_{type}
+    attr: [*sve-unstable]
+    doc: Broadcast a quadword of scalars
+    arguments: ["x0: {type}", "x1: {type}"]
+    return_type: "{sve_type}"
+    types: [f64, i64, u64]
+    assert_instr: []
+    compose:
+      - LLVMLink:
+          name: llvm.vector.insert.{sve_type}.{neon_type}
+          arguments: ["op0: {sve_type}", "op1: {neon_type}", "idx: i64"]
+      - Let:
+          - op
+          - FnCall:
+              - "{llvm_link}"
+              - - FnCall: ["svundef_{type}", [], [], true]
+                - FnCall: ["crate::mem::transmute", [[$x0, $x1]]]
+                - 0
+      - FnCall: ["svdupq_lane_{type}", [$op, 0]]
+
+  - name: svdupq[_n]_{type}
+    attr: [*sve-unstable]
+    doc: Broadcast a quadword of scalars
+    types: [b64]
+    arguments: ["x0: bool", "x1: bool"]
+    return_type: "svbool_t"
+    assert_instr: []
+    compose:
+      - Let:
+          - op1
+          - FnCall: [svdupq_n_s64, [CastAs: [$x0, i64], CastAs: [$x1, i64]]]
+      - FnCall:
+          - svcmpne_s64
+          - - FnCall: [svptrue_b64, []]
+            - $op1
+            - FnCall: [svdup_n_s64, [0]]
+
+  - name: svcreate2[_{type}]
+    attr: [*sve-unstable]
+    doc: Create a tuple of two vectors
+    arguments: ["x0: {sve_type}", "x1: {sve_type}"]
+    return_type: "{sve_type_x2}"
+    types: [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64]
+    assert_instr: []
+    compose:
+      - FnCall: ["crate::intrinsics::simd::scalable::sve_tuple_create2", [$x0, $x1], [], true]
+
+  - name: svcreate3[_{type}]
+    attr: [*sve-unstable]
+    doc: Create a tuple of three vectors
+    arguments: ["x0: {sve_type}", "x1: {sve_type}", "x2: {sve_type}"]
+    return_type: "{sve_type_x3}"
+    types: [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64]
+    assert_instr: []
+    compose:
+      - FnCall: ["crate::intrinsics::simd::scalable::sve_tuple_create3", [$x0, $x1, $x2], [], true]
+
+  - name: svcreate4[_{type}]
+    attr: [*sve-unstable]
+    doc: Create a tuple of four vectors
+    arguments:
+      ["x0: {sve_type}", "x1: {sve_type}", "x2: {sve_type}", "x3: {sve_type}"]
+    return_type: "{sve_type_x4}"
+    types: [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64]
+    assert_instr: []
+    compose:
+      - FnCall: ["crate::intrinsics::simd::scalable::sve_tuple_create4", [$x0, $x1, $x2, $x3], [], true]
+
+  - name: svundef_{type}
+    attr: [*sve-unstable]
+    safety:
+      unsafe: [uninitialized]
+    doc: Create an uninitialized vector
+    arguments: []
+    return_type: "{sve_type}"
+    types: [i8, i16, i32, i64, u8, u16, u32, u64]
+    assert_instr: []
+    compose:
+      - FnCall: ["svdup_n_{type}", ["0"]]
+
+  - name: svundef_{type}
+    attr: [*sve-unstable]
+    safety:
+      unsafe: [uninitialized]
+    doc: Create an uninitialized vector
+    arguments: []
+    return_type: "{sve_type}"
+    types: [f32, f64]
+    assert_instr: []
+    compose:
+      - FnCall: ["svdup_n_{type}", ["0{type}"]]
+
+  - name: svundef2_{type}
+    attr: [*sve-unstable]
+    safety:
+      unsafe: [uninitialized]
+    doc: Create an uninitialized tuple of two vectors
+    arguments: []
+    return_type: "{sve_type_x2}"
+    types: [i8, i16, i32, i64, u8, u16, u32, u64]
+    assert_instr: []
+    compose:
+      - FnCall:
+        - "svcreate2_{type}"
+        - - FnCall: ["svdup_n_{type}", ["0"]]
+          - FnCall: ["svdup_n_{type}", ["0"]]
+
+  - name: svundef2_{type}
+    attr: [*sve-unstable]
+    safety:
+      unsafe: [uninitialized]
+    doc: Create an uninitialized tuple of two vectors
+    arguments: []
+    return_type: "{sve_type_x2}"
+    types: [f32, f64]
+    assert_instr: []
+    compose:
+      - FnCall:
+        - "svcreate2_{type}"
+        - - FnCall: ["svdup_n_{type}", ["0{type}"]]
+          - FnCall: ["svdup_n_{type}", ["0{type}"]]
+
+  - name: svundef3_{type}
+    attr: [*sve-unstable]
+    safety:
+      unsafe: [uninitialized]
+    doc: Create an uninitialized tuple of three vectors
+    arguments: []
+    return_type: "{sve_type_x3}"
+    types: [i8, i16, i32, i64, u8, u16, u32, u64]
+    assert_instr: []
+    compose:
+      - FnCall:
+        - "svcreate3_{type}"
+        - - FnCall: ["svdup_n_{type}", ["0"]]
+          - FnCall: ["svdup_n_{type}", ["0"]]
+          - FnCall: ["svdup_n_{type}", ["0"]]
+
+  - name: svundef3_{type}
+    attr: [*sve-unstable]
+    safety:
+      unsafe: [uninitialized]
+    doc: Create an uninitialized tuple of three vectors
+    arguments: []
+    return_type: "{sve_type_x3}"
+    types: [f32, f64]
+    assert_instr: []
+    compose:
+      - FnCall:
+        - "svcreate3_{type}"
+        - - FnCall: ["svdup_n_{type}", ["0{type}"]]
+          - FnCall: ["svdup_n_{type}", ["0{type}"]]
+          - FnCall: ["svdup_n_{type}", ["0{type}"]]
+
+  - name: svundef4_{type}
+    attr: [*sve-unstable]
+    safety:
+      unsafe: [uninitialized]
+    doc: Create an uninitialized tuple of four vectors
+    arguments: []
+    return_type: "{sve_type_x4}"
+    types: [i8, i16, i32, i64, u8, u16, u32, u64]
+    assert_instr: []
+    compose:
+      - FnCall:
+        - "svcreate4_{type}"
+        - - FnCall: ["svdup_n_{type}", ["0"]]
+          - FnCall: ["svdup_n_{type}", ["0"]]
+          - FnCall: ["svdup_n_{type}", ["0"]]
+          - FnCall: ["svdup_n_{type}", ["0"]]
+
+  - name: svundef4_{type}
+    attr: [*sve-unstable]
+    safety:
+      unsafe: [uninitialized]
+    doc: Create an uninitialized tuple of four vectors
+    arguments: []
+    return_type: "{sve_type_x4}"
+    types: [f32, f64]
+    assert_instr: []
+    compose:
+      - FnCall:
+        - "svcreate4_{type}"
+        - - FnCall: ["svdup_n_{type}", ["0{type}"]]
+          - FnCall: ["svdup_n_{type}", ["0{type}"]]
+          - FnCall: ["svdup_n_{type}", ["0{type}"]]
+          - FnCall: ["svdup_n_{type}", ["0{type}"]]
+
+  - name: svindex_{type}
+    attr: [*sve-unstable]
+    doc: Create linear series
+    arguments: ["base: {type}", "step: {type}"]
+    return_type: "{sve_type}"
+    types: [i8, i16, i32, i64, u8, u16, u32, u64]
+    assert_instr: [index]
+    compose:
+      - LLVMLink: { name: "index.{sve_type}" }
+
+  - name: svget2[_{type}]
+    attr: [*sve-unstable]
+    doc: Extract one vector from a tuple of two vectors
+    arguments: ["tuple: {sve_type_x2}"]
+    static_defs: ["const IMM_INDEX: i32"]
+    constraints: [{ variable: IMM_INDEX, range: [0, 1] }]
+    return_type: "{sve_type}"
+    types: [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64]
+    assert_instr: []
+    compose:
+      - FnCall: ["crate::intrinsics::simd::scalable::sve_tuple_get", [$tuple], ["_", "_", "{{IMM_INDEX}}"], true]
+
+  - name: svget3[_{type}]
+    attr: [*sve-unstable]
+    doc: Extract one vector from a tuple of three vectors
+    arguments: ["tuple: {sve_type_x3}"]
+    static_defs: ["const IMM_INDEX: i32"]
+    constraints: [{ variable: IMM_INDEX, range: [0, 2] }]
+    return_type: "{sve_type}"
+    types: [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64]
+    assert_instr: []
+    compose:
+      - FnCall: ["crate::intrinsics::simd::scalable::sve_tuple_get", [$tuple], ["_", "_", "{{IMM_INDEX}}"], true]
+
+  - name: svget4[_{type}]
+    attr: [*sve-unstable]
+    doc: Extract one vector from a tuple of four vectors
+    arguments: ["tuple: {sve_type_x4}"]
+    static_defs: ["const IMM_INDEX: i32"]
+    constraints: [{ variable: IMM_INDEX, range: [0, 3] }]
+    return_type: "{sve_type}"
+    types: [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64]
+    assert_instr: []
+    compose:
+      - FnCall: ["crate::intrinsics::simd::scalable::sve_tuple_get", [$tuple], ["_", "_", "{{IMM_INDEX}}"], true]
+
+  - name: svset2[_{type}]
+    attr: [*sve-unstable]
+    doc: Change one vector in a tuple of two vectors
+    arguments: ["tuple: {sve_type_x2}", "x: {sve_type}"]
+    static_defs: ["const IMM_INDEX: i32"]
+    constraints: [{ variable: IMM_INDEX, range: [0, 1] }]
+    return_type: "{sve_type_x2}"
+    types: [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64]
+    assert_instr: []
+    compose:
+      - FnCall: ["crate::intrinsics::simd::scalable::sve_tuple_set", [$tuple, $x], ["_", "_", "{{IMM_INDEX}}"], true]
+
+  - name: svset3[_{type}]
+    attr: [*sve-unstable]
+    doc: Change one vector in a tuple of three vectors
+    arguments: ["tuple: {sve_type_x3}", "x: {sve_type}"]
+    static_defs: ["const IMM_INDEX: i32"]
+    constraints: [{ variable: IMM_INDEX, range: [0, 2] }]
+    return_type: "{sve_type_x3}"
+    types: [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64]
+    assert_instr: []
+    compose:
+      - FnCall: ["crate::intrinsics::simd::scalable::sve_tuple_set", [$tuple, $x], ["_", "_", "{{IMM_INDEX}}"], true]
+
+  - name: svset4[_{type}]
+    attr: [*sve-unstable]
+    doc: Change one vector in a tuple of four vectors
+    arguments: ["tuple: {sve_type_x4}", "x: {sve_type}"]
+    static_defs: ["const IMM_INDEX: i32"]
+    constraints: [{ variable: IMM_INDEX, range: [0, 3] }]
+    return_type: "{sve_type_x4}"
+    types: [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64]
+    assert_instr: []
+    compose:
+      - FnCall: ["crate::intrinsics::simd::scalable::sve_tuple_set", [$tuple, $x], ["_", "_", "{{IMM_INDEX}}"], true]
+
+  - name: svzip1[_{type}]
+    attr: [*sve-unstable]
+    doc: Interleave elements from low halves of two inputs
+    arguments: ["op1: {sve_type}", "op2: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64]
+    assert_instr: [zip1]
+    compose:
+      - LLVMLink: { name: "zip1.{sve_type}" }
+
+  - name: svzip1_{type}
+    attr: [*sve-unstable]
+    doc: Interleave elements from low halves of two inputs
+    arguments: ["op1: {sve_type}", "op2: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [b8, b16, b32, b64]
+    assert_instr: [zip1]
+    compose:
+      - LLVMLink: { name: "zip1.{sve_type}" }
+
+  - name: svzip1q[_{type}]
+    attr: [*sve-unstable]
+    doc: Interleave quadwords from low halves of two inputs
+    target_features: [f64mm]
+    arguments: ["op1: {sve_type}", "op2: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64]
+    assert_instr: [zip1]
+    compose:
+      - LLVMLink: { name: "zip1q.{sve_type}" }
+
+  - name: svzip2[_{type}]
+    attr: [*sve-unstable]
+    doc: Interleave elements from high halves of two inputs
+    arguments: ["op1: {sve_type}", "op2: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64]
+    assert_instr: [zip2]
+    compose:
+      - LLVMLink: { name: "zip2.{sve_type}" }
+
+  - name: svzip2_{type}
+    attr: [*sve-unstable]
+    doc: Interleave elements from high halves of two inputs
+    arguments: ["op1: {sve_type}", "op2: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [b8, b16, b32, b64]
+    assert_instr: [zip2]
+    compose:
+      - LLVMLink: { name: "zip2.{sve_type}" }
+
+  - name: svzip2q[_{type}]
+    attr: [*sve-unstable]
+    doc: Interleave quadwords from high halves of two inputs
+    target_features: [f64mm]
+    arguments: ["op1: {sve_type}", "op2: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64]
+    assert_instr: [zip2]
+    compose:
+      - LLVMLink: { name: "zip2q.{sve_type}" }
+
+  - name: svuzp1[_{type}]
+    attr: [*sve-unstable]
+    doc: Concatenate even elements from two inputs
+    arguments: ["op1: {sve_type}", "op2: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64]
+    assert_instr: [uzp1]
+    compose:
+      - LLVMLink: { name: "uzp1.{sve_type}" }
+
+  - name: svuzp1_{type}
+    attr: [*sve-unstable]
+    doc: Concatenate even elements from two inputs
+    arguments: ["op1: {sve_type}", "op2: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [b8, b16, b32, b64]
+    assert_instr: [uzp1]
+    compose:
+      - LLVMLink: { name: "uzp1.{sve_type}" }
+
+  - name: svuzp1q[_{type}]
+    attr: [*sve-unstable]
+    doc: Concatenate even quadwords from two inputs
+    target_features: [f64mm]
+    arguments: ["op1: {sve_type}", "op2: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64]
+    assert_instr: [uzp1]
+    compose:
+      - LLVMLink: { name: "uzp1q.{sve_type}" }
+
+  - name: svuzp2[_{type}]
+    attr: [*sve-unstable]
+    doc: Concatenate odd elements from two inputs
+    arguments: ["op1: {sve_type}", "op2: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64]
+    assert_instr: [uzp2]
+    compose:
+      - LLVMLink: { name: "uzp2.{sve_type}" }
+
+  - name: svuzp2_{type}
+    attr: [*sve-unstable]
+    doc: Concatenate odd elements from two inputs
+    arguments: ["op1: {sve_type}", "op2: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [b8, b16, b32, b64]
+    assert_instr: [uzp2]
+    compose:
+      - LLVMLink: { name: "uzp2.{sve_type}" }
+
+  - name: svuzp2q[_{type}]
+    attr: [*sve-unstable]
+    doc: Concatenate odd quadwords from two inputs
+    target_features: [f64mm]
+    arguments: ["op1: {sve_type}", "op2: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64]
+    assert_instr: [uzp2]
+    compose:
+      - LLVMLink: { name: "uzp2q.{sve_type}" }
+
+  - name: svtrn1[_{type}]
+    attr: [*sve-unstable]
+    doc: Interleave even elements from two inputs
+    arguments: ["op1: {sve_type}", "op2: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64]
+    assert_instr: [trn1]
+    compose:
+      - LLVMLink: { name: "trn1.{sve_type}" }
+
+  - name: svtrn1_{type}
+    attr: [*sve-unstable]
+    doc: Interleave even elements from two inputs
+    arguments: ["op1: {sve_type}", "op2: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [b8, b16, b32, b64]
+    assert_instr: [trn1]
+    compose:
+      - LLVMLink: { name: "trn1.{sve_type}" }
+
+  - name: svtrn1q[_{type}]
+    attr: [*sve-unstable]
+    doc: Interleave even quadwords from two inputs
+    target_features: [f64mm]
+    arguments: ["op1: {sve_type}", "op2: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64]
+    assert_instr: [trn1]
+    compose:
+      - LLVMLink: { name: "trn1q.{sve_type}" }
+
+  - name: svtrn2[_{type}]
+    attr: [*sve-unstable]
+    doc: Interleave odd elements from two inputs
+    arguments: ["op1: {sve_type}", "op2: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64]
+    assert_instr: [trn2]
+    compose:
+      - LLVMLink: { name: "trn2.{sve_type}" }
+
+  - name: svtrn2_{type}
+    attr: [*sve-unstable]
+    doc: Interleave odd elements from two inputs
+    arguments: ["op1: {sve_type}", "op2: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [b8, b16, b32, b64]
+    assert_instr: [trn2]
+    compose:
+      - LLVMLink: { name: "trn2.{sve_type}" }
+
+  - name: svtrn2q[_{type}]
+    attr: [*sve-unstable]
+    doc: Interleave odd quadwords from two inputs
+    target_features: [f64mm]
+    arguments: ["op1: {sve_type}", "op2: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64]
+    assert_instr: [trn2]
+    compose:
+      - LLVMLink: { name: "trn2q.{sve_type}" }
+
+  - name: svrev[_{type}]
+    attr: [*sve-unstable]
+    doc: Reverse all elements
+    arguments: ["op: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64]
+    assert_instr: [rev]
+    compose:
+      - LLVMLink: { name: "llvm.vector.reverse.{sve_type}" }
+
+  - name: svrev_{type}
+    attr: [*sve-unstable]
+    doc: Reverse all elements
+    arguments: ["op: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [b8, b16, b32, b64]
+    assert_instr: [rev]
+    compose:
+      - LLVMLink: { name: "llvm.vector.reverse.{sve_type}" }
+
+  - name: svrevb[_{type}]{_mxz}
+    attr: [*sve-unstable]
+    doc: Reverse bytes within elements
+    arguments: ["inactive: {sve_type}", "pg: {predicate}", "op: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [i16, i32, i64, u16, u32, u64]
+    zeroing_method: { drop: "inactive" }
+    assert_instr: [revb]
+    compose:
+      - LLVMLink: { name: "revb.{sve_type}" }
+
+  - name: svrevh[_{type}]{_mxz}
+    attr: [*sve-unstable]
+    doc: Reverse halfwords within elements
+    arguments: ["inactive: {sve_type}", "pg: {predicate}", "op: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [i32, i64, u32, u64]
+    zeroing_method: { drop: "inactive" }
+    assert_instr: [revh]
+    compose:
+      - LLVMLink: { name: "revh.{sve_type}" }
+
+  - name: svrevw[_{type}]{_mxz}
+    attr: [*sve-unstable]
+    doc: Reverse words within elements
+    arguments: ["inactive: {sve_type}", "pg: {predicate}", "op: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [i64, u64]
+    zeroing_method: { drop: "inactive" }
+    assert_instr: [revw]
+    compose:
+      - LLVMLink: { name: "revw.{sve_type}" }
+
+  - name: svrbit[_{type}]{_mxz}
+    attr: [*sve-unstable]
+    doc: Reverse bits
+    arguments: ["inactive: {sve_type}", "pg: {predicate}", "op: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [i8, i16, i32, i64, u8, u16, u32, u64]
+    zeroing_method: { drop: "inactive" }
+    assert_instr: [rbit]
+    compose:
+      - LLVMLink: { name: "rbit.{sve_type}" }
+
+  - name: svext[_{type}]
+    attr: [*sve-unstable]
+    doc: Extract vector from pair of vectors
+    arguments: ["op1: {sve_type}", "op2: {sve_type}"]
+    return_type: "{sve_type}"
+    static_defs: ["const IMM3: i32"]
+    constraints: [{ variable: IMM3, sve_max_elems_type: "{type}" }]
+    types: [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64]
+    assert_instr: [[ext, "IMM3 = 1"]]
+    compose:
+      - LLVMLink:
+          name: ext.{sve_type}
+          arguments: ["op1: {sve_type}", "op2: {sve_type}", "imm3: i32"]
+      - FnCall: ["{llvm_link}", [$op1, $op2, $IMM3]]
+
+  - name: svsplice[_{type}]
+    attr: [*sve-unstable]
+    doc: Splice two vectors under predicate control
+    arguments: ["pg: {predicate}", "op1: {sve_type}", "op2: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64]
+    assert_instr: [splice]
+    compose:
+      - LLVMLink: { name: "splice.{sve_type}" }
+
+  - name: svinsr[_n_{type}]
+    attr: [*sve-unstable]
+    doc: Insert scalar in shifted vector
+    arguments: ["op1: {sve_type}", "op2: {type}"]
+    return_type: "{sve_type}"
+    types: [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64]
+    assert_instr: [insr]
+    compose:
+      - LLVMLink: { name: "insr.{sve_type}" }
+
+  - name: svld1[_{type}]
+    attr: [*sve-unstable]
+    doc: Unextended load
+    safety:
+      unsafe:
+        - pointer_offset: predicated
+        - dereference: predicated
+    arguments: ["pg: {predicate}", "base: *{type}"]
+    return_type: "{sve_type}"
+    types: [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64]
+    assert_instr: ["ld1{size_literal}"]
+    test: { load: 0 }
+    compose:
+      - LLVMLink: { name: "ld1.{sve_type}" }
+
+  - name: svld1_vnum[_{type}]
+    attr: [*sve-unstable]
+    doc: Unextended load
+    safety:
+      unsafe:
+        - pointer_offset_vnum: predicated
+        - dereference: predicated
+    arguments: ["pg: {predicate}", "base: *{type}", "vnum: i64"]
+    return_type: "{sve_type}"
+    types: [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64]
+    assert_instr: ["ld1{size_literal}"]
+    test: { load: 0 }
+    compose:
+      - FnCall:
+          - "svld1_{type}"
+          - - $pg
+            - MethodCall:
+                - $base
+                - offset
+                - - Multiply:
+                      - CastAs: [{ FnCall: ["svcnt{size_literal}", []] }, isize]
+                      - CastAs: [$vnum, isize]
+
+  - name: svld1_gather_[{type[0]}]index[_{type[1]}]
+    attr: [*sve-unstable]
+    doc: Unextended load
+    safety:
+      unsafe:
+        - pointer_offset: predicated
+        - dereference: predicated
+    arguments:
+      ["pg: {predicate[0]}", "base: *{type[1]}", "indices: {sve_type[0]}"]
+    return_type: "{sve_type[1]}"
+    types:
+      - [[i32, u32], [f32, i32, u32]]
+      - [[i64, u64], [f64, i64, u64]]
+    assert_instr: ["ld1{size_literal[0]}"]
+    test: { load: 1 }
+    compose:
+      - MatchSize:
+          - "{type[0]}"
+          - default:
+              LLVMLink:
+                name: "ld1.gather.{type_kind[0].su}xtw.index.{sve_type[1]}"
+            doubleword:
+              LLVMLink:
+                name: "ld1.gather.index.{sve_type[1]}"
+
+  - name: svld1_gather_[{type[0]}]offset[_{type[1]}]
+    attr: [*sve-unstable]
+    doc: Unextended load
+    safety:
+      unsafe:
+        - pointer_offset: predicated
+        - dereference: predicated
+    arguments:
+      ["pg: {predicate[0]}", "base: *{type[1]}", "offsets: {sve_type[0]}"]
+    return_type: "{sve_type[1]}"
+    types:
+      - [[i32, u32], [f32, i32, u32]]
+      - [[i64, u64], [f64, i64, u64]]
+    assert_instr: ["ld1{size_literal[0]}"]
+    test: { load: 1 }
+    compose:
+      - MatchSize:
+          - "{type[0]}"
+          - default:
+              LLVMLink:
+                name: "ld1.gather.{type_kind[0].su}xtw.{sve_type[1]}"
+            doubleword:
+              LLVMLink:
+                name: "ld1.gather.{sve_type[1]}"
+
+  - name: svld1_gather[_{type[0]}base]_offset_{type[1]}
+    attr: [*sve-unstable]
+    doc: Unextended load
+    safety:
+      unsafe:
+        - pointer_offset: predicated
+        - dereference: predicated
+        - no_provenance: bases
+    arguments: ["pg: {predicate[0]}", "bases: {sve_type[0]}", "offset: i64"]
+    return_type: "{sve_type[1]}"
+    types:
+      - [u32, [f32, i32, u32]]
+      - [u64, [f64, i64, u64]]
+    assert_instr: ["ld1{size_literal[0]}"]
+    test: { load: 1 }
+    compose:
+      - LLVMLink:
+          name: "ld1.gather.scalar.offset.{sve_type[1]}.{sve_type[0]}"
+
+  - name: svld1_gather[_{type[0]}base]_{type[1]}
+    attr: [*sve-unstable]
+    doc: Unextended load
+    safety:
+      unsafe:
+        - pointer_offset: predicated
+        - dereference: predicated
+        - no_provenance: bases
+    arguments: ["pg: {predicate[0]}", "bases: {sve_type[0]}"]
+    return_type: "{sve_type[1]}"
+    types:
+      - [u32, [f32, i32, u32]]
+      - [u64, [f64, i64, u64]]
+    assert_instr: ["ld1{size_literal[0]}"]
+    test: { load: 1 }
+    compose:
+      - FnCall:
+          - "svld1_gather_{type[0]}base_offset_{type[1]}"
+          - - $pg
+            - $bases
+            - 0
+
+  - name: svld1_gather[_{type[0]}base]_index_{type[1]}
+    attr: [*sve-unstable]
+    doc: Unextended load
+    safety:
+      unsafe:
+        - pointer_offset: predicated
+        - dereference: predicated
+        - no_provenance: bases
+    arguments: ["pg: {predicate[0]}", "bases: {sve_type[0]}", "index: i64"]
+    return_type: "{sve_type[1]}"
+    types:
+      - [u32, [f32, i32, u32]]
+      - [u64, [f64, i64, u64]]
+    assert_instr: ["ld1{size_literal[0]}"]
+    test: { load: 1 }
+    compose:
+      - FnCall:
+          - "svld1_gather_{type[0]}base_offset_{type[1]}"
+          - - $pg
+            - $bases
+            - MethodCall: [$index, unchecked_shl, ["{size_in_bytes_log2[0]}"]]
+
+  - name: svld1s{size_literal[2]}_gather_[{type[0]}]index_{type[1]}
+    attr: [*sve-unstable]
+    doc: Load {size[2]}-bit data and sign-extend
+    safety:
+      unsafe:
+        - pointer_offset: predicated
+        - dereference: predicated
+    arguments:
+      ["pg: {predicate[0]}", "base: *{type[2]}", "indices: {sve_type[0]}"]
+    return_type: "{sve_type[1]}"
+    types:
+      - [[i32, u32], [i32, u32], i16]
+      - [[i64, u64], [i64, u64], [i16, i32]]
+    assert_instr: ["ld1s{size_literal[2]}"]
+    test: { load: 2 }
+    compose:
+      - MatchSize:
+          - "{type[0]}"
+          - default:
+              LLVMLink:
+                name: "ld1.gather.{type_kind[0].su}xtw.index.{sve_type[1] as {type[2]}}"
+                return_type: "{sve_type[1] as {type[2]}}"
+            doubleword:
+              LLVMLink:
+                name: "ld1.gather.index.{sve_type[1] as {type[2]}}"
+                return_type: "{sve_type[1] as {type[2]}}"
+      - FnCall:
+          - crate::intrinsics::simd::simd_cast
+          - - FnCall: ["{llvm_link}", [$pg, $base, $indices]]
+
+  - name: svld1u{size_literal[2]}_gather_[{type[0]}]index_{type[1]}
+    attr: [*sve-unstable]
+    doc: Load {size[2]}-bit data and zero-extend
+    safety:
+      unsafe:
+        - pointer_offset: predicated
+        - dereference: predicated
+    arguments:
+      ["pg: {predicate[0]}", "base: *{type[2]}", "indices: {sve_type[0]}"]
+    return_type: "{sve_type[1]}"
+    types:
+      - [[i32, u32], [u32, i32], u16]
+      - [[i64, u64], [u64, i64], [u16, u32]]
+    assert_instr: ["ld1{size_literal[2]}"]
+    test: { load: 2 }
+    compose:
+      - MatchSize:
+          - "{type[0]}"
+          - default:
+              LLVMLink:
+                name: "ld1.gather.{type_kind[0].su}xtw.index.{sve_type[1] as {type[2]}}"
+                return_type: "{sve_type[1] as {type[2]}}"
+            doubleword:
+              LLVMLink:
+                name: "ld1.gather.index.{sve_type[1] as {type[2]}}"
+                return_type: "{sve_type[1] as {type[2]}}"
+      - FnCall:
+          - crate::intrinsics::simd::simd_cast
+          - - FnCall: ["{llvm_link}", [$pg, $base, $indices]]
+          - - Type: "{sve_type[1] as {type[2]}}"
+            - _
+
+  - name: svld1s{size_literal[2]}_gather_[{type[0]}]offset_{type[1]}
+    attr: [*sve-unstable]
+    doc: Load {size[2]}-bit data and sign-extend
+    safety:
+      unsafe:
+        - pointer_offset: predicated
+        - dereference: predicated
+    arguments:
+      ["pg: {predicate[0]}", "base: *{type[2]}", "offsets: {sve_type[0]}"]
+    return_type: "{sve_type[1]}"
+    types:
+      - [[i32, u32], [i32, u32], [i8, i16]]
+      - [[i64, u64], [i64, u64], [i8, i16, i32]]
+    assert_instr: ["ld1s{size_literal[2]}"]
+    test: { load: 2 }
+    compose:
+      - MatchSize:
+          - "{type[0]}"
+          - default:
+              LLVMLink:
+                name: "ld1.gather.{type_kind[0].su}xtw.{sve_type[1] as {type[2]}}"
+                return_type: "{sve_type[1] as {type[2]}}"
+            doubleword:
+              LLVMLink:
+                name: "ld1.gather.{sve_type[1] as {type[2]}}"
+                return_type: "{sve_type[1] as {type[2]}}"
+      - FnCall:
+          - crate::intrinsics::simd::simd_cast
+          - - FnCall: ["{llvm_link}", [$pg, $base, $offsets]]
+
+  - name: svld1u{size_literal[2]}_gather_[{type[0]}]offset_{type[1]}
+    attr: [*sve-unstable]
+    doc: Load {size[2]}-bit data and zero-extend
+    safety:
+      unsafe:
+        - pointer_offset: predicated
+        - dereference: predicated
+    arguments:
+      ["pg: {predicate[0]}", "base: *{type[2]}", "offsets: {sve_type[0]}"]
+    return_type: "{sve_type[1]}"
+    types:
+      - [[i32, u32], [u32, i32], [u8, u16]]
+      - [[i64, u64], [u64, i64], [u8, u16, u32]]
+    assert_instr: ["ld1{size_literal[2]}"]
+    test: { load: 2 }
+    compose:
+      - MatchSize:
+          - "{type[0]}"
+          - default:
+              LLVMLink:
+                name: "ld1.gather.{type_kind[0].su}xtw.{sve_type[1] as {type[2]}}"
+                return_type: "{sve_type[1] as {type[2]}}"
+            doubleword:
+              LLVMLink:
+                name: "ld1.gather.{sve_type[1] as {type[2]}}"
+                return_type: "{sve_type[1] as {type[2]}}"
+      - FnCall:
+          - crate::intrinsics::simd::simd_cast
+          - - FnCall: ["{llvm_link}", [$pg, $base, $offsets]]
+          - - Type: "{sve_type[1] as {type[2]}}"
+            - _
+
+  - name: svld1s{size_literal[2]}_gather[_{type[0]}base]_offset_{type[1]}
+    attr: [*sve-unstable]
+    doc: Load {size[2]}-bit data and sign-extend
+    safety:
+      unsafe:
+        - pointer_offset: predicated
+        - dereference: predicated
+        - no_provenance: bases
+    arguments: ["pg: {predicate[0]}", "bases: {sve_type[0]}", "offset: i64"]
+    return_type: "{sve_type[1]}"
+    types:
+      - [u32, [i32, u32], [i8, i16]]
+      - [u64, [i64, u64], [i8, i16, i32]]
+    assert_instr: ["ld1s{size_literal[2]}"]
+    test: { load: 2 }
+    compose:
+      - LLVMLink:
+          name: "ld1.gather.scalar.offset.{sve_type[1] as {type[2]}}.{sve_type[0]}"
+          return_type: "{sve_type[1] as {type[2]}}"
+      - FnCall:
+          - crate::intrinsics::simd::simd_cast
+          - - FnCall: ["{llvm_link}", [$pg, $bases, $offset]]
+
+  - name: svld1u{size_literal[2]}_gather[_{type[0]}base]_offset_{type[1]}
+    attr: [*sve-unstable]
+    doc: Load {size[2]}-bit data and zero-extend
+    safety:
+      unsafe:
+        - pointer_offset: predicated
+        - dereference: predicated
+        - no_provenance: bases
+    arguments: ["pg: {predicate[0]}", "bases: {sve_type[0]}", "offset: i64"]
+    return_type: "{sve_type[1]}"
+    types:
+      - [u32, [u32, i32], [u8, u16]]
+      - [u64, [u64, i64], [u8, u16, u32]]
+    assert_instr: ["ld1{size_literal[2]}"]
+    test: { load: 2 }
+    compose:
+      - LLVMLink:
+          name: "ld1.gather.scalar.offset.{sve_type[1] as {type[2]}}.{sve_type[0]}"
+          return_type: "{sve_type[1] as {type[2]}}"
+      - FnCall:
+          - crate::intrinsics::simd::simd_cast
+          - - FnCall: ["{llvm_link}", [$pg, $bases, $offset]]
+          - - Type: "{sve_type[1] as {type[2]}}"
+            - _
+
+  - name: svld1s{size_literal[2]}_gather[_{type[0]}base]_{type[1]}
+    attr: [*sve-unstable]
+    doc: Load {size[2]}-bit data and sign-extend
+    safety:
+      unsafe:
+        - pointer_offset: predicated
+        - dereference: predicated
+        - no_provenance: bases
+    arguments: ["pg: {predicate[0]}", "bases: {sve_type[0]}"]
+    return_type: "{sve_type[1]}"
+    types:
+      - [u32, [i32, u32], [i8, i16]]
+      - [u64, [i64, u64], [i8, i16, i32]]
+    assert_instr: ["ld1s{size_literal[2]}"]
+    test: { load: 2 }
+    compose:
+      - FnCall:
+          - "svld1s{size_literal[2]}_gather_{type[0]}base_offset_{type[1]}"
+          - - $pg
+            - $bases
+            - 0
+
+  - name: svld1u{size_literal[2]}_gather[_{type[0]}base]_{type[1]}
+    attr: [*sve-unstable]
+    doc: Load {size[2]}-bit data and zero-extend
+    safety:
+      unsafe:
+        - pointer_offset: predicated
+        - dereference: predicated
+        - no_provenance: bases
+    arguments: ["pg: {predicate[0]}", "bases: {sve_type[0]}"]
+    return_type: "{sve_type[1]}"
+    types:
+      - [u32, [i32, u32], [u8, u16]]
+      - [u64, [i64, u64], [u8, u16, u32]]
+    assert_instr: ["ld1{size_literal[2]}"]
+    test: { load: 2 }
+    compose:
+      - FnCall:
+          - "svld1u{size_literal[2]}_gather_{type[0]}base_offset_{type[1]}"
+          - - $pg
+            - $bases
+            - 0
+
+  - name: svld1s{size_literal[2]}_gather[_{type[0]}base]_index_{type[1]}
+    attr: [*sve-unstable]
+    doc: Load {size[2]}-bit data and sign-extend
+    safety:
+      unsafe:
+        - pointer_offset: predicated
+        - dereference: predicated
+        - no_provenance: bases
+    arguments: ["pg: {predicate[0]}", "bases: {sve_type[0]}", "index: i64"]
+    return_type: "{sve_type[1]}"
+    types:
+      - [u32, [i32, u32], i16]
+      - [u64, [i64, u64], [i16, i32]]
+    assert_instr: ["ld1s{size_literal[2]}"]
+    test: { load: 2 }
+    compose:
+      - FnCall:
+          - "svld1s{size_literal[2]}_gather_{type[0]}base_offset_{type[1]}"
+          - - $pg
+            - $bases
+            - MethodCall: [$index, unchecked_shl, ["{size_in_bytes_log2[2]}"]]
+
+  - name: svld1u{size_literal[2]}_gather[_{type[0]}base]_index_{type[1]}
+    attr: [*sve-unstable]
+    doc: Load {size[2]}-bit data and zero-extend
+    safety:
+      unsafe:
+        - pointer_offset: predicated
+        - dereference: predicated
+        - no_provenance: bases
+    arguments: ["pg: {predicate[0]}", "bases: {sve_type[0]}", "index: i64"]
+    return_type: "{sve_type[1]}"
+    types:
+      - [u32, [i32, u32], u16]
+      - [u64, [i64, u64], [u16, u32]]
+    assert_instr: ["ld1{size_literal[2]}"]
+    test: { load: 2 }
+    compose:
+      - FnCall:
+          - "svld1u{size_literal[2]}_gather_{type[0]}base_offset_{type[1]}"
+          - - $pg
+            - $bases
+            - MethodCall: [$index, unchecked_shl, ["{size_in_bytes_log2[2]}"]]
+
+  - name: svldnt1[_{type}]
+    attr: [*sve-unstable]
+    doc: Unextended load, non-temporal
+    safety:
+      unsafe:
+        - pointer_offset: predicated
+        - dereference: predicated
+        - non_temporal
+    arguments: ["pg: {predicate}", "base: *{type}"]
+    return_type: "{sve_type}"
+    types: [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64]
+    assert_instr: ["ldnt1{size_literal}"]
+    test: { load: 0 }
+    compose:
+      - LLVMLink: { name: "ldnt1.{sve_type}" }
+
+  - name: svldnt1_vnum[_{type}]
+    attr: [*sve-unstable]
+    doc: Unextended load, non-temporal
+    safety:
+      unsafe:
+        - pointer_offset_vnum: predicated
+        - dereference: predicated
+        - non_temporal
+    arguments: ["pg: {predicate}", "base: *{type}", "vnum: i64"]
+    return_type: "{sve_type}"
+    types: [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64]
+    assert_instr: ["ldnt1{size_literal}"]
+    test: { load: 0 }
+    compose:
+      - FnCall:
+          - "svldnt1_{type}"
+          - - $pg
+            - MethodCall:
+                - $base
+                - offset
+                - - Multiply:
+                      - CastAs: [{ FnCall: ["svcnt{size_literal}", []] }, isize]
+                      - CastAs: [$vnum, isize]
+
+  - name: svld1s{size_literal[1]}_{type[0]}
+    attr: [*sve-unstable]
+    doc: Load {size[1]}-bit data and sign-extend
+    safety:
+      unsafe:
+        - pointer_offset: predicated
+        - dereference: predicated
+    arguments: ["pg: {predicate[0]}", "base: *{type[1]}"]
+    return_type: "{sve_type[0]}"
+    types:
+      - [[i16, i32, i64, u16, u32, u64], i8]
+      - [[i32, i64, u32, u64], i16]
+      - [[i64, u64], i32]
+    assert_instr: ["ld1s{size_literal[1]}"]
+    test: { load: 1 }
+    compose:
+      - LLVMLink:
+          name: "ld1.{sve_type[0] as {type[1]}}"
+          arguments: ["pg: {predicate[0]}", "base: *{type[1]}"]
+          return_type: "{sve_type[0] as {type[1]}}"
+      - FnCall:
+          - "crate::intrinsics::simd::simd_cast"
+          - - FnCall: ["{llvm_link}", [$pg, $base]]
+
+  - name: svld1u{size_literal[1]}_{type[0]}
+    attr: [*sve-unstable]
+    doc: Load {size[1]}-bit data and zero-extend
+    safety:
+      unsafe:
+        - pointer_offset: predicated
+        - dereference: predicated
+    arguments: ["pg: {predicate[0]}", "base: *{type[1]}"]
+    return_type: "{sve_type[0]}"
+    types:
+      - [[i16, i32, i64, u16, u32, u64], u8]
+      - [[i32, i64, u32, u64], u16]
+      - [[i64, u64], u32]
+    assert_instr: ["ld1{size_literal[1]}"]
+    test: { load: 1 }
+    compose:
+      - LLVMLink:
+          name: "ld1.{sve_type[0] as {type[1]}}"
+          arguments: ["pg: {predicate[0]}", "base: *{type[1]}"]
+          return_type: "{sve_type[0] as {type[1]}}"
+      - FnCall:
+          - "crate::intrinsics::simd::simd_cast"
+          - - FnCall: ["{llvm_link}", [$pg, $base]]
+          - [Type: "{sve_type[0] as {type[1]}}", _]
+
+  - name: svld1s{size_literal[1]}_vnum_{type[0]}
+    attr: [*sve-unstable]
+    doc: Load {size[1]}-bit data and sign-extend
+    safety:
+      unsafe:
+        - pointer_offset_vnum: predicated
+        - dereference: predicated
+    arguments: ["pg: {predicate[0]}", "base: *{type[1]}", "vnum: i64"]
+    return_type: "{sve_type[0]}"
+    types:
+      - [[i16, i32, i64, u16, u32, u64], i8]
+      - [[i32, i64, u32, u64], i16]
+      - [[i64, u64], i32]
+    assert_instr: ["ld1s{size_literal[1]}"]
+    test: { load: 1 }
+    compose:
+      - FnCall:
+          - "svld1s{size_literal[1]}_{type[0]}"
+          - - $pg
+            - MethodCall:
+                - $base
+                - offset
+                - - Multiply:
+                      - CastAs:
+                          [{ FnCall: ["svcnt{size_literal[0]}", []] }, isize]
+                      - CastAs: [$vnum, isize]
+
+  - name: svld1u{size_literal[1]}_vnum_{type[0]}
+    attr: [*sve-unstable]
+    doc: Load {size[1]}-bit data and zero-extend
+    safety:
+      unsafe:
+        - pointer_offset_vnum: predicated
+        - dereference: predicated
+    arguments: ["pg: {predicate[0]}", "base: *{type[1]}", "vnum: i64"]
+    return_type: "{sve_type[0]}"
+    types:
+      - [[i16, i32, i64, u16, u32, u64], u8]
+      - [[i32, i64, u32, u64], u16]
+      - [[i64, u64], u32]
+    assert_instr: ["ld1{size_literal[1]}"]
+    test: { load: 1 }
+    compose:
+      - FnCall:
+          - "svld1u{size_literal[1]}_{type[0]}"
+          - - $pg
+            - MethodCall:
+                - $base
+                - offset
+                - - Multiply:
+                      - CastAs:
+                          [{ FnCall: ["svcnt{size_literal[0]}", []] }, isize]
+                      - CastAs: [$vnum, isize]
+
+  - name: svld2[_{type}]
+    attr: [*sve-unstable]
+    doc: Load two-element tuples into two vectors
+    safety:
+      unsafe:
+        - pointer_offset: predicated
+        - dereference: predicated
+    arguments: ["pg: {predicate}", "base: *{type}"]
+    return_type: "{sve_type_x2}"
+    types: [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64]
+    assert_instr: ["ld2{size_literal}"]
+    test: { load: 0 }
+    compose:
+      - LLVMLink: { name: "ld2.sret.{sve_type}" }
+
+  - name: svld2_vnum[_{type}]
+    attr: [*sve-unstable]
+    doc: Load two-element tuples into two vectors
+    safety:
+      unsafe:
+        - pointer_offset: predicated
+        - dereference: predicated
+    arguments: ["pg: {predicate}", "base: *{type}", "vnum: i64"]
+    return_type: "{sve_type_x2}"
+    types: [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64]
+    assert_instr: ["ld2{size_literal}"]
+    test: { load: 0 }
+    compose:
+      - FnCall:
+          - "svld2_{type}"
+          - - $pg
+            - MethodCall:
+                - $base
+                - offset
+                - - Multiply:
+                      - CastAs: [{ FnCall: ["svcnt{size_literal}", []] }, isize]
+                      - CastAs: [$vnum, isize]
+
+  - name: svld3[_{type}]
+    attr: [*sve-unstable]
+    doc: Load three-element tuples into three vectors
+    safety:
+      unsafe:
+        - pointer_offset: predicated
+        - dereference: predicated
+    arguments: ["pg: {predicate}", "base: *{type}"]
+    return_type: "{sve_type_x3}"
+    types: [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64]
+    assert_instr: ["ld3{size_literal}"]
+    test: { load: 0 }
+    compose:
+      - LLVMLink: { name: "ld3.sret.{sve_type}" }
+
+  - name: svld3_vnum[_{type}]
+    attr: [*sve-unstable]
+    doc: Load three-element tuples into three vectors
+    safety:
+      unsafe:
+        - pointer_offset: predicated
+        - dereference: predicated
+    arguments: ["pg: {predicate}", "base: *{type}", "vnum: i64"]
+    return_type: "{sve_type_x3}"
+    types: [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64]
+    assert_instr: ["ld3{size_literal}"]
+    test: { load: 0 }
+    compose:
+      - FnCall:
+          - "svld3_{type}"
+          - - $pg
+            - MethodCall:
+                - $base
+                - offset
+                - - Multiply:
+                      - CastAs: [{ FnCall: ["svcnt{size_literal}", []] }, isize]
+                      - CastAs: [$vnum, isize]
+
+  - name: svld4[_{type}]
+    attr: [*sve-unstable]
+    doc: Load four-element tuples into four vectors
+    safety:
+      unsafe:
+        - pointer_offset: predicated
+        - dereference: predicated
+    arguments: ["pg: {predicate}", "base: *{type}"]
+    return_type: "{sve_type_x4}"
+    types: [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64]
+    assert_instr: ["ld4{size_literal}"]
+    test: { load: 0 }
+    compose:
+      - LLVMLink: { name: "ld4.sret.{sve_type}" }
+
+  - name: svld4_vnum[_{type}]
+    attr: [*sve-unstable]
+    doc: Load four-element tuples into four vectors
+    safety:
+      unsafe:
+        - pointer_offset_vnum: predicated
+        - dereference: predicated
+    arguments: ["pg: {predicate}", "base: *{type}", "vnum: i64"]
+    return_type: "{sve_type_x4}"
+    types: [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64]
+    assert_instr: ["ld4{size_literal}"]
+    test: { load: 0 }
+    compose:
+      - FnCall:
+          - "svld4_{type}"
+          - - $pg
+            - MethodCall:
+                - $base
+                - offset
+                - - Multiply:
+                      - CastAs: [{ FnCall: ["svcnt{size_literal}", []] }, isize]
+                      - CastAs: [$vnum, isize]
+
+  - name: svld1rq[_{type}]
+    attr: [*sve-unstable]
+    doc: Load and replicate 128 bits of data
+    safety:
+      unsafe:
+        - pointer_offset: predicated
+        - dereference: predicated
+    arguments: ["pg: {predicate}", "base: *{type}"]
+    return_type: "{sve_type}"
+    types: [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64]
+    assert_instr: ["ld1rq{size_literal}"]
+    test: { load: 0 }
+    compose:
+      - LLVMLink: { name: "ld1rq.{sve_type}" }
+
+  - name: svld1ro[_{type}]
+    attr: [*sve-unstable]
+    doc: Load and replicate 256 bits of data
+    safety:
+      unsafe:
+        - pointer_offset: predicated
+        - dereference: predicated
+    target_features: [f64mm]
+    arguments: ["pg: {predicate}", "base: *{type}"]
+    return_type: "{sve_type}"
+    types: [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64]
+    assert_instr: ["ld1ro{size_literal}"]
+    test: { load: 0 }
+    compose:
+      - LLVMLink: { name: "ld1ro.{sve_type}" }
+
+  - name: svldnf1[_{type}]
+    attr: [*sve-unstable]
+    doc: Unextended load, non-faulting
+    safety:
+      unsafe:
+        - pointer_offset: predicated_non_faulting
+        - dereference: predicated_non_faulting
+        - unpredictable_on_fault
+    arguments: ["pg: {predicate}", "base: *{type}"]
+    return_type: "{sve_type}"
+    types: [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64]
+    assert_instr: ["ldnf1{size_literal}"]
+    test: { load: 0 }
+    compose:
+      - LLVMLink: { name: "ldnf1.{sve_type}" }
+
+  - name: svldnf1_vnum[_{type}]
+    attr: [*sve-unstable]
+    doc: Unextended load, non-faulting
+    safety:
+      unsafe:
+        - pointer_offset_vnum: predicated_non_faulting
+        - dereference: predicated_non_faulting
+        - unpredictable_on_fault
+    arguments: ["pg: {predicate}", "base: *{type}", "vnum: i64"]
+    return_type: "{sve_type}"
+    types: [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64]
+    assert_instr: ["ldnf1{size_literal}"]
+    test: { load: 0 }
+    compose:
+      - FnCall:
+          - "svldnf1_{type}"
+          - - $pg
+            - MethodCall:
+                - $base
+                - offset
+                - - Multiply:
+                      - CastAs: [{ FnCall: ["svcnt{size_literal}", []] }, isize]
+                      - CastAs: [$vnum, isize]
+
+  - name: svldnf1s{size_literal[1]}_{type[0]}
+    attr: [*sve-unstable]
+    doc: Load {size[1]}-bit data and sign-extend, non-faulting
+    safety:
+      unsafe:
+        - pointer_offset: predicated_non_faulting
+        - dereference: predicated_non_faulting
+        - unpredictable_on_fault
+    arguments: ["pg: {predicate[0]}", "base: *{type[1]}"]
+    return_type: "{sve_type[0]}"
+    types:
+      - [[i16, i32, i64, u16, u32, u64], i8]
+      - [[i32, i64, u32, u64], i16]
+      - [[i64, u64], i32]
+    assert_instr: ["ldnf1s{size_literal[1]}"]
+    test: { load: 1 }
+    compose:
+      - LLVMLink:
+          name: "ldnf1.{sve_type[0] as {type[1]}}"
+          arguments: ["pg: {predicate[0]}", "base: *{type[1]}"]
+          return_type: "{sve_type[0] as {type[1]}}"
+      - FnCall:
+          - "crate::intrinsics::simd::simd_cast"
+          - - FnCall: ["{llvm_link}", [$pg, $base]]
+
+  - name: svldnf1u{size_literal[1]}_{type[0]}
+    attr: [*sve-unstable]
+    doc: Load {size[1]}-bit data and zero-extend, non-faulting
+    safety:
+      unsafe:
+        - pointer_offset: predicated_non_faulting
+        - dereference: predicated_non_faulting
+        - unpredictable_on_fault
+    arguments: ["pg: {predicate[0]}", "base: *{type[1]}"]
+    return_type: "{sve_type[0]}"
+    types:
+      - [[i16, i32, i64, u16, u32, u64], u8]
+      - [[i32, i64, u32, u64], u16]
+      - [[i64, u64], u32]
+    assert_instr: ["ldnf1{size_literal[1]}"]
+    test: { load: 1 }
+    compose:
+      - LLVMLink:
+          name: "ldnf1.{sve_type[0] as {type[1]}}"
+          arguments: ["pg: {predicate[0]}", "base: *{type[1]}"]
+          return_type: "{sve_type[0] as {type[1]}}"
+      - FnCall:
+          - "crate::intrinsics::simd::simd_cast"
+          - - FnCall: ["{llvm_link}", [$pg, $base]]
+          - [Type: "{sve_type[0] as {type[1]}}", _]
+
+  - name: svldnf1s{size_literal[1]}_vnum_{type[0]}
+    attr: [*sve-unstable]
+    doc: Load {size[1]}-bit data and sign-extend, non-faulting
+    safety:
+      unsafe:
+        - pointer_offset_vnum: predicated_non_faulting
+        - dereference: predicated_non_faulting
+        - unpredictable_on_fault
+    arguments: ["pg: {predicate[0]}", "base: *{type[1]}", "vnum: i64"]
+    return_type: "{sve_type[0]}"
+    types:
+      - [[i16, i32, i64, u16, u32, u64], i8]
+      - [[i32, i64, u32, u64], i16]
+      - [[i64, u64], i32]
+    assert_instr: ["ldnf1s{size_literal[1]}"]
+    test: { load: 1 }
+    compose:
+      - FnCall:
+          - "svldnf1s{size_literal[1]}_{type[0]}"
+          - - $pg
+            - MethodCall:
+                - $base
+                - offset
+                - - Multiply:
+                      - CastAs:
+                          [{ FnCall: ["svcnt{size_literal[0]}", []] }, isize]
+                      - CastAs: [$vnum, isize]
+
+  - name: svldnf1u{size_literal[1]}_vnum_{type[0]}
+    attr: [*sve-unstable]
+    doc: Load {size[1]}-bit data and zero-extend, non-faulting
+    safety:
+      unsafe:
+        - pointer_offset_vnum: predicated_non_faulting
+        - dereference: predicated_non_faulting
+        - unpredictable_on_fault
+    arguments: ["pg: {predicate[0]}", "base: *{type[1]}", "vnum: i64"]
+    return_type: "{sve_type[0]}"
+    types:
+      - [[i16, i32, i64, u16, u32, u64], u8]
+      - [[i32, i64, u32, u64], u16]
+      - [[i64, u64], u32]
+    assert_instr: ["ldnf1{size_literal[1]}"]
+    test: { load: 1 }
+    compose:
+      - FnCall:
+          - "svldnf1u{size_literal[1]}_{type[0]}"
+          - - $pg
+            - MethodCall:
+                - $base
+                - offset
+                - - Multiply:
+                      - CastAs:
+                          [{ FnCall: ["svcnt{size_literal[0]}", []] }, isize]
+                      - CastAs: [$vnum, isize]
+
+  - name: svldff1[_{type}]
+    attr: [*sve-unstable]
+    doc: Unextended load, first-faulting
+    safety:
+      unsafe:
+        - pointer_offset: predicated_first_faulting
+        - dereference: predicated_first_faulting
+        - unpredictable_on_fault
+    arguments: ["pg: {predicate}", "base: *{type}"]
+    return_type: "{sve_type}"
+    types: [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64]
+    assert_instr: ["ldff1{size_literal}"]
+    test: { load: 0 }
+    compose:
+      - LLVMLink: { name: "ldff1.{sve_type}" }
+
+  - name: svldff1_vnum[_{type}]
+    attr: [*sve-unstable]
+    doc: Unextended load, first-faulting
+    safety:
+      unsafe:
+        - pointer_offset_vnum: predicated_first_faulting
+        - dereference: predicated_first_faulting
+        - unpredictable_on_fault
+    arguments: ["pg: {predicate}", "base: *{type}", "vnum: i64"]
+    return_type: "{sve_type}"
+    types: [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64]
+    assert_instr: ["ldff1{size_literal}"]
+    test: { load: 0 }
+    compose:
+      - FnCall:
+          - "svldff1_{type}"
+          - - $pg
+            - MethodCall:
+                - $base
+                - offset
+                - - Multiply:
+                      - CastAs: [{ FnCall: ["svcnt{size_literal}", []] }, isize]
+                      - CastAs: [$vnum, isize]
+
+  - name: svldff1s{size_literal[1]}_{type[0]}
+    attr: [*sve-unstable]
+    doc: Load {size[1]}-bit data and sign-extend, first-faulting
+    safety:
+      unsafe:
+        - pointer_offset: predicated_first_faulting
+        - dereference: predicated_first_faulting
+        - unpredictable_on_fault
+    arguments: ["pg: {predicate[0]}", "base: *{type[1]}"]
+    return_type: "{sve_type[0]}"
+    types:
+      - [[i16, i32, i64, u16, u32, u64], i8]
+      - [[i32, i64, u32, u64], i16]
+      - [[i64, u64], i32]
+    assert_instr: ["ldff1s{size_literal[1]}"]
+    test: { load: 1 }
+    compose:
+      - LLVMLink:
+          name: "ldff1.{sve_type[0] as {type[1]}}"
+          arguments: ["pg: {predicate[0]}", "base: *{type[1]}"]
+          return_type: "{sve_type[0] as {type[1]}}"
+      - FnCall:
+          - "crate::intrinsics::simd::simd_cast"
+          - - FnCall: ["{llvm_link}", [$pg, $base]]
+
+  - name: svldff1u{size_literal[1]}_{type[0]}
+    attr: [*sve-unstable]
+    doc: Load {size[1]}-bit data and zero-extend, first-faulting
+    safety:
+      unsafe:
+        - pointer_offset: predicated_first_faulting
+        - dereference: predicated_first_faulting
+        - unpredictable_on_fault
+    arguments: ["pg: {predicate[0]}", "base: *{type[1]}"]
+    return_type: "{sve_type[0]}"
+    types:
+      - [[i16, i32, i64, u16, u32, u64], u8]
+      - [[i32, i64, u32, u64], u16]
+      - [[i64, u64], u32]
+    assert_instr: ["ldff1{size_literal[1]}"]
+    test: { load: 1 }
+    compose:
+      - LLVMLink:
+          name: "ldff1.{sve_type[0] as {type[1]}}"
+          arguments: ["pg: {predicate[0]}", "base: *{type[1]}"]
+          return_type: "{sve_type[0] as {type[1]}}"
+      - FnCall:
+          - "crate::intrinsics::simd::simd_cast"
+          - - FnCall: ["{llvm_link}", [$pg, $base]]
+          - [Type: "{sve_type[0] as {type[1]}}", _]
+
+  - name: svldff1s{size_literal[1]}_vnum_{type[0]}
+    attr: [*sve-unstable]
+    doc: Load {size[1]}-bit data and sign-extend, first-faulting
+    safety:
+      unsafe:
+        - pointer_offset_vnum: predicated_first_faulting
+        - dereference: predicated_first_faulting
+        - unpredictable_on_fault
+    arguments: ["pg: {predicate[0]}", "base: *{type[1]}", "vnum: i64"]
+    return_type: "{sve_type[0]}"
+    types:
+      - [[i16, i32, i64, u16, u32, u64], i8]
+      - [[i32, i64, u32, u64], i16]
+      - [[i64, u64], i32]
+    assert_instr: ["ldff1s{size_literal[1]}"]
+    test: { load: 1 }
+    compose:
+      - FnCall:
+          - "svldff1s{size_literal[1]}_{type[0]}"
+          - - $pg
+            - MethodCall:
+                - $base
+                - offset
+                - - Multiply:
+                      - CastAs:
+                          [{ FnCall: ["svcnt{size_literal[0]}", []] }, isize]
+                      - CastAs: [$vnum, isize]
+
+  - name: svldff1u{size_literal[1]}_vnum_{type[0]}
+    attr: [*sve-unstable]
+    doc: Load {size[1]}-bit data and zero-extend, first-faulting
+    safety:
+      unsafe:
+        - pointer_offset_vnum: predicated_first_faulting
+        - dereference: predicated_first_faulting
+        - unpredictable_on_fault
+    arguments: ["pg: {predicate[0]}", "base: *{type[1]}", "vnum: i64"]
+    return_type: "{sve_type[0]}"
+    types:
+      - [[i16, i32, i64, u16, u32, u64], u8]
+      - [[i32, i64, u32, u64], u16]
+      - [[i64, u64], u32]
+    assert_instr: ["ldff1{size_literal[1]}"]
+    test: { load: 1 }
+    compose:
+      - FnCall:
+          - "svldff1u{size_literal[1]}_{type[0]}"
+          - - $pg
+            - MethodCall:
+                - $base
+                - offset
+                - - Multiply:
+                      - CastAs:
+                          [{ FnCall: ["svcnt{size_literal[0]}", []] }, isize]
+                      - CastAs: [$vnum, isize]
+
+  - name: svldff1_gather_[{type[0]}]index[_{type[1]}]
+    attr: [*sve-unstable]
+    doc: Unextended load, first-faulting
+    safety:
+      unsafe:
+        - pointer_offset: predicated_first_faulting
+        - dereference: predicated_first_faulting
+        - unpredictable_on_fault
+    arguments:
+      ["pg: {predicate[0]}", "base: *{type[1]}", "indices: {sve_type[0]}"]
+    return_type: "{sve_type[1]}"
+    types:
+      - [[i32, u32], [f32, i32, u32]]
+      - [[i64, u64], [f64, i64, u64]]
+    assert_instr: ["ldff1{size_literal[0]}"]
+    test: { load: 1 }
+    compose:
+      - MatchSize:
+          - "{type[0]}"
+          - default:
+              LLVMLink:
+                name: "ldff1.gather.{type_kind[0].su}xtw.index.{sve_type[1]}"
+            doubleword:
+              LLVMLink:
+                name: "ldff1.gather.index.{sve_type[1]}"
+
+  - name: svldff1_gather_[{type[0]}]offset[_{type[1]}]
+    attr: [*sve-unstable]
+    doc: Unextended load, first-faulting
+    safety:
+      unsafe:
+        - pointer_offset: predicated_first_faulting
+        - dereference: predicated_first_faulting
+        - unpredictable_on_fault
+    arguments:
+      ["pg: {predicate[0]}", "base: *{type[1]}", "offsets: {sve_type[0]}"]
+    return_type: "{sve_type[1]}"
+    types:
+      - [[i32, u32], [f32, i32, u32]]
+      - [[i64, u64], [f64, i64, u64]]
+    assert_instr: ["ldff1{size_literal[0]}"]
+    test: { load: 1 }
+    compose:
+      - MatchSize:
+          - "{type[0]}"
+          - default:
+              LLVMLink:
+                name: "ldff1.gather.{type_kind[0].su}xtw.{sve_type[1]}"
+            doubleword:
+              LLVMLink:
+                name: "ldff1.gather.{sve_type[1]}"
+
+  - name: svldff1_gather[_{type[0]}base]_offset_{type[1]}
+    attr: [*sve-unstable]
+    doc: Unextended load, first-faulting
+    safety:
+      unsafe:
+        - pointer_offset: predicated_first_faulting
+        - dereference: predicated_first_faulting
+        - unpredictable_on_fault
+        - no_provenance: bases
+    arguments: ["pg: {predicate[0]}", "bases: {sve_type[0]}", "offset: i64"]
+    return_type: "{sve_type[1]}"
+    types:
+      - [u32, [f32, i32, u32]]
+      - [u64, [f64, i64, u64]]
+    assert_instr: ["ldff1{size_literal[0]}"]
+    test: { load: 1 }
+    compose:
+      - LLVMLink:
+          name: "ldff1.gather.scalar.offset.{sve_type[1]}.{sve_type[0]}"
+
+  - name: svldff1_gather[_{type[0]}base]_{type[1]}
+    attr: [*sve-unstable]
+    doc: Unextended load, first-faulting
+    safety:
+      unsafe:
+        - pointer_offset: predicated_first_faulting
+        - dereference: predicated_first_faulting
+        - unpredictable_on_fault
+        - no_provenance: bases
+    arguments: ["pg: {predicate[0]}", "bases: {sve_type[0]}"]
+    return_type: "{sve_type[1]}"
+    types:
+      - [u32, [f32, i32, u32]]
+      - [u64, [f64, i64, u64]]
+    assert_instr: ["ldff1{size_literal[0]}"]
+    test: { load: 1 }
+    compose:
+      - FnCall:
+          - "svldff1_gather_{type[0]}base_offset_{type[1]}"
+          - - $pg
+            - $bases
+            - 0
+
+  - name: svldff1_gather[_{type[0]}base]_index_{type[1]}
+    attr: [*sve-unstable]
+    doc: Unextended load, first-faulting
+    safety:
+      unsafe:
+        - pointer_offset: predicated_first_faulting
+        - dereference: predicated_first_faulting
+        - unpredictable_on_fault
+        - no_provenance: bases
+    arguments: ["pg: {predicate[0]}", "bases: {sve_type[0]}", "index: i64"]
+    return_type: "{sve_type[1]}"
+    types:
+      - [u32, [f32, i32, u32]]
+      - [u64, [f64, i64, u64]]
+    assert_instr: ["ldff1{size_literal[0]}"]
+    test: { load: 1 }
+    compose:
+      - FnCall:
+          - "svldff1_gather_{type[0]}base_offset_{type[1]}"
+          - - $pg
+            - $bases
+            - MethodCall: [$index, unchecked_shl, ["{size_in_bytes_log2[0]}"]]
+
+  - name: svldff1s{size_literal[2]}_gather_[{type[0]}]index_{type[1]}
+    attr: [*sve-unstable]
+    doc: Load {size[2]}-bit data and sign-extend, first-faulting
+    safety:
+      unsafe:
+        - pointer_offset: predicated_first_faulting
+        - dereference: predicated_first_faulting
+        - unpredictable_on_fault
+    arguments:
+      ["pg: {predicate[0]}", "base: *{type[2]}", "indices: {sve_type[0]}"]
+    return_type: "{sve_type[1]}"
+    types:
+      - [[i32, u32], [i32, u32], i16]
+      - [[i64, u64], [i64, u64], [i16, i32]]
+    assert_instr: ["ldff1s{size_literal[2]}"]
+    test: { load: 2 }
+    compose:
+      - MatchSize:
+          - "{type[0]}"
+          - default:
+              LLVMLink:
+                name: "ldff1.gather.{type_kind[0].su}xtw.index.{sve_type[1] as {type[2]}}"
+                return_type: "{sve_type[1] as {type[2]}}"
+            doubleword:
+              LLVMLink:
+                name: "ldff1.gather.index.{sve_type[1] as {type[2]}}"
+                return_type: "{sve_type[1] as {type[2]}}"
+      - FnCall:
+          - crate::intrinsics::simd::simd_cast
+          - - FnCall: ["{llvm_link}", [$pg, $base, $indices]]
+
+  - name: svldff1u{size_literal[2]}_gather_[{type[0]}]index_{type[1]}
+    attr: [*sve-unstable]
+    doc: Load {size[2]}-bit data and zero-extend, first-faulting
+    safety:
+      unsafe:
+        - pointer_offset: predicated_first_faulting
+        - dereference: predicated_first_faulting
+        - unpredictable_on_fault
+    arguments:
+      ["pg: {predicate[0]}", "base: *{type[2]}", "indices: {sve_type[0]}"]
+    return_type: "{sve_type[1]}"
+    types:
+      - [[i32, u32], [u32, i32], u16]
+      - [[i64, u64], [u64, i64], [u16, u32]]
+    assert_instr: ["ldff1{size_literal[2]}"]
+    test: { load: 2 }
+    compose:
+      - MatchSize:
+          - "{type[0]}"
+          - default:
+              LLVMLink:
+                name: "ldff1.gather.{type_kind[0].su}xtw.index.{sve_type[1] as {type[2]}}"
+                return_type: "{sve_type[1] as {type[2]}}"
+            doubleword:
+              LLVMLink:
+                name: "ldff1.gather.index.{sve_type[1] as {type[2]}}"
+                return_type: "{sve_type[1] as {type[2]}}"
+      - FnCall:
+          - crate::intrinsics::simd::simd_cast
+          - - FnCall: ["{llvm_link}", [$pg, $base, $indices]]
+          - - Type: "{sve_type[1] as {type[2]}}"
+            - _
+
+  - name: svldff1s{size_literal[2]}_gather_[{type[0]}]offset_{type[1]}
+    attr: [*sve-unstable]
+    doc: Load {size[2]}-bit data and sign-extend, first-faulting
+    safety:
+      unsafe:
+        - pointer_offset: predicated_first_faulting
+        - dereference: predicated_first_faulting
+        - unpredictable_on_fault
+    arguments:
+      ["pg: {predicate[0]}", "base: *{type[2]}", "offsets: {sve_type[0]}"]
+    return_type: "{sve_type[1]}"
+    types:
+      - [[i32, u32], [i32, u32], [i8, i16]]
+      - [[i64, u64], [i64, u64], [i8, i16, i32]]
+    assert_instr: ["ldff1s{size_literal[2]}"]
+    test: { load: 2 }
+    compose:
+      - MatchSize:
+          - "{type[0]}"
+          - default:
+              LLVMLink:
+                name: "ldff1.gather.{type_kind[0].su}xtw.{sve_type[1] as {type[2]}}"
+                return_type: "{sve_type[1] as {type[2]}}"
+            doubleword:
+              LLVMLink:
+                name: "ldff1.gather.{sve_type[1] as {type[2]}}"
+                return_type: "{sve_type[1] as {type[2]}}"
+      - FnCall:
+          - crate::intrinsics::simd::simd_cast
+          - - FnCall: ["{llvm_link}", [$pg, $base, $offsets]]
+
+  - name: svldff1u{size_literal[2]}_gather_[{type[0]}]offset_{type[1]}
+    attr: [*sve-unstable]
+    doc: Load {size[2]}-bit data and zero-extend, first-faulting
+    safety:
+      unsafe:
+        - pointer_offset: predicated_first_faulting
+        - dereference: predicated_first_faulting
+        - unpredictable_on_fault
+    arguments:
+      ["pg: {predicate[0]}", "base: *{type[2]}", "offsets: {sve_type[0]}"]
+    return_type: "{sve_type[1]}"
+    types:
+      - [[i32, u32], [u32, i32], [u8, u16]]
+      - [[i64, u64], [u64, i64], [u8, u16, u32]]
+    assert_instr: ["ldff1{size_literal[2]}"]
+    test: { load: 2 }
+    compose:
+      - MatchSize:
+          - "{type[0]}"
+          - default:
+              LLVMLink:
+                name: "ldff1.gather.{type_kind[0].su}xtw.{sve_type[1] as {type[2]}}"
+                return_type: "{sve_type[1] as {type[2]}}"
+            doubleword:
+              LLVMLink:
+                name: "ldff1.gather.{sve_type[1] as {type[2]}}"
+                return_type: "{sve_type[1] as {type[2]}}"
+      - FnCall:
+          - crate::intrinsics::simd::simd_cast
+          - - FnCall: ["{llvm_link}", [$pg, $base, $offsets]]
+          - - Type: "{sve_type[1] as {type[2]}}"
+            - _
+
+  - name: svldff1s{size_literal[2]}_gather[_{type[0]}base]_offset_{type[1]}
+    attr: [*sve-unstable]
+    doc: Load {size[2]}-bit data and sign-extend, first-faulting
+    safety:
+      unsafe:
+        - pointer_offset: predicated_first_faulting
+        - dereference: predicated_first_faulting
+        - unpredictable_on_fault
+        - no_provenance: bases
+    arguments: ["pg: {predicate[0]}", "bases: {sve_type[0]}", "offset: i64"]
+    return_type: "{sve_type[1]}"
+    types:
+      - [u32, [i32, u32], [i8, i16]]
+      - [u64, [i64, u64], [i8, i16, i32]]
+    assert_instr: ["ldff1s{size_literal[2]}"]
+    test: { load: 2 }
+    compose:
+      - LLVMLink:
+          name: "ldff1.gather.scalar.offset.{sve_type[1] as {type[2]}}.{sve_type[0]}"
+          return_type: "{sve_type[1] as {type[2]}}"
+      - FnCall:
+          - crate::intrinsics::simd::simd_cast
+          - - FnCall: ["{llvm_link}", [$pg, $bases, $offset]]
+
+  - name: svldff1u{size_literal[2]}_gather[_{type[0]}base]_offset_{type[1]}
+    attr: [*sve-unstable]
+    doc: Load {size[2]}-bit data and zero-extend, first-faulting
+    safety:
+      unsafe:
+        - pointer_offset: predicated_first_faulting
+        - dereference: predicated_first_faulting
+        - unpredictable_on_fault
+        - no_provenance: bases
+    arguments: ["pg: {predicate[0]}", "bases: {sve_type[0]}", "offset: i64"]
+    return_type: "{sve_type[1]}"
+    types:
+      - [u32, [u32, i32], [u8, u16]]
+      - [u64, [u64, i64], [u8, u16, u32]]
+    assert_instr: ["ldff1{size_literal[2]}"]
+    test: { load: 2 }
+    compose:
+      - LLVMLink:
+          name: "ldff1.gather.scalar.offset.{sve_type[1] as {type[2]}}.{sve_type[0]}"
+          return_type: "{sve_type[1] as {type[2]}}"
+      - FnCall:
+          - crate::intrinsics::simd::simd_cast
+          - - FnCall: ["{llvm_link}", [$pg, $bases, $offset]]
+          - - Type: "{sve_type[1] as {type[2]}}"
+            - _
+
+  - name: svldff1s{size_literal[2]}_gather[_{type[0]}base]_{type[1]}
+    attr: [*sve-unstable]
+    safety:
+      unsafe:
+        - pointer_offset: predicated_first_faulting
+        - dereference: predicated_first_faulting
+        - unpredictable_on_fault
+        - no_provenance: bases
+    doc: Load {size[2]}-bit data and sign-extend, first-faulting
+    arguments: ["pg: {predicate[0]}", "bases: {sve_type[0]}"]
+    return_type: "{sve_type[1]}"
+    types:
+      - [u32, [i32, u32], [i8, i16]]
+      - [u64, [i64, u64], [i8, i16, i32]]
+    assert_instr: ["ldff1s{size_literal[2]}"]
+    test: { load: 2 }
+    compose:
+      - FnCall:
+          - "svldff1s{size_literal[2]}_gather_{type[0]}base_offset_{type[1]}"
+          - - $pg
+            - $bases
+            - 0
+
+  - name: svldff1u{size_literal[2]}_gather[_{type[0]}base]_{type[1]}
+    attr: [*sve-unstable]
+    doc: Load {size[2]}-bit data and zero-extend, first-faulting
+    safety:
+      unsafe:
+        - pointer_offset: predicated_first_faulting
+        - dereference: predicated_first_faulting
+        - unpredictable_on_fault
+        - no_provenance: bases
+    arguments: ["pg: {predicate[0]}", "bases: {sve_type[0]}"]
+    return_type: "{sve_type[1]}"
+    types:
+      - [u32, [i32, u32], [u8, u16]]
+      - [u64, [i64, u64], [u8, u16, u32]]
+    assert_instr: ["ldff1{size_literal[2]}"]
+    test: { load: 2 }
+    compose:
+      - FnCall:
+          - "svldff1u{size_literal[2]}_gather_{type[0]}base_offset_{type[1]}"
+          - - $pg
+            - $bases
+            - 0
+
+  - name: svldff1s{size_literal[2]}_gather[_{type[0]}base]_index_{type[1]}
+    attr: [*sve-unstable]
+    doc: Load {size[2]}-bit data and sign-extend, first-faulting
+    safety:
+      unsafe:
+        - pointer_offset: predicated_first_faulting
+        - dereference: predicated_first_faulting
+        - unpredictable_on_fault
+        - no_provenance: bases
+    arguments: ["pg: {predicate[0]}", "bases: {sve_type[0]}", "index: i64"]
+    return_type: "{sve_type[1]}"
+    types:
+      - [u32, [i32, u32], i16]
+      - [u64, [i64, u64], [i16, i32]]
+    assert_instr: ["ldff1s{size_literal[2]}"]
+    test: { load: 2 }
+    compose:
+      - FnCall:
+          - "svldff1s{size_literal[2]}_gather_{type[0]}base_offset_{type[1]}"
+          - - $pg
+            - $bases
+            - MethodCall: [$index, unchecked_shl, ["{size_in_bytes_log2[2]}"]]
+
+  - name: svldff1u{size_literal[2]}_gather[_{type[0]}base]_index_{type[1]}
+    attr: [*sve-unstable]
+    doc: Load {size[2]}-bit data and zero-extend, first-faulting
+    safety:
+      unsafe:
+        - pointer_offset: predicated_first_faulting
+        - dereference: predicated_first_faulting
+        - unpredictable_on_fault
+        - no_provenance: bases
+    arguments: ["pg: {predicate[0]}", "bases: {sve_type[0]}", "index: i64"]
+    return_type: "{sve_type[1]}"
+    types:
+      - [u32, [i32, u32], u16]
+      - [u64, [i64, u64], [u16, u32]]
+    assert_instr: ["ldff1{size_literal[2]}"]
+    test: { load: 2 }
+    compose:
+      - FnCall:
+          - "svldff1u{size_literal[2]}_gather_{type[0]}base_offset_{type[1]}"
+          - - $pg
+            - $bases
+            - MethodCall: [$index, unchecked_shl, ["{size_in_bytes_log2[2]}"]]
+
+  - name: svrdffr_z
+    attr: [*sve-unstable]
+    doc: Read FFR, returning predicate of succesfully loaded elements
+    arguments: ["pg: svbool_t"]
+    return_type: svbool_t
+    assert_instr: [rdffr]
+    compose:
+      - LLVMLink: { name: "rdffr.z" }
+
+  - name: svrdffr
+    attr: [*sve-unstable]
+    doc: Read FFR, returning predicate of succesfully loaded elements
+    arguments: []
+    return_type: svbool_t
+    assert_instr: [rdffr]
+    compose:
+      - FnCall: [svrdffr_z, [FnCall: [svptrue_b8, []]]]
+
+  - name: svsetffr
+    attr: [*sve-unstable]
+    doc: Initialize the first-fault register to all-true
+    arguments: []
+    assert_instr: [setffr]
+    compose:
+      - LLVMLink: { name: "setffr" }
+
+  - name: svwrffr
+    attr: [*sve-unstable]
+    doc: Write to the first-fault register
+    arguments: ["op: svbool_t"]
+    assert_instr: [wrffr]
+    compose:
+      - LLVMLink: { name: "wrffr" }
+
+  - name: svqinc{size_literal[1]}[_n_{type[0]}]
+    attr: [*sve-unstable]
+    substitutions:
+      textual_size:
+        match_size: "{type[1]}"
+        default: word
+        byte: byte
+        halfword: halfword
+        doubleword: doubleword
+    doc: Saturating increment by number of {textual_size} elements
+    arguments: ["op: {type[0]}"]
+    static_defs: ["const IMM_FACTOR: i32"]
+    return_type: "{type[0]}"
+    types:
+      - [[i32, i64, u32, u64], [i8, i16, i32, i64]]
+    assert_instr:
+      - ["{type_kind[0].su}qinc{size_literal[1]}", "IMM_FACTOR = 1"]
+    compose:
+      - FnCall:
+          - "svqinc{size_literal[1]}_pat_n_{type[0]}"
+          - [$op]
+          - ["{{svpattern::SV_ALL}}", $IMM_FACTOR]
+
+  - name: svqinc{size_literal[1]}_pat[_n_{type[0]}]
+    attr: [*sve-unstable]
+    substitutions:
+      textual_size:
+        match_size: "{type[1]}"
+        default: word
+        byte: byte
+        halfword: halfword
+        doubleword: doubleword
+    doc: Saturating increment by number of {textual_size} elements
+    arguments: ["op: {type[0]}"]
+    static_defs: ["const PATTERN: svpattern", "const IMM_FACTOR: i32"]
+    constraints: [{ variable: IMM_FACTOR, range: [1, 16] }]
+    return_type: "{type[0]}"
+    types:
+      - [[i32, i64, u32, u64], [i8, i16, i32, i64]]
+    assert_instr:
+      - - "{type_kind[0].su}qinc{size_literal[1]}"
+        - "PATTERN = {{svpattern::SV_ALL}}, IMM_FACTOR = 1"
+    compose:
+      - LLVMLink:
+          name: "{type_kind[0].su}qinc{size_literal[1]}.n{size[0]}"
+          arguments: ["op: {type[0]}", "pattern: svpattern", "imm_factor: i32"]
+          return_type: "{type[0]}"
+      - FnCall: ["{llvm_link}", [$op, $PATTERN, $IMM_FACTOR]]
+
+  - name: svqinc{size_literal}[_{type}]
+    attr: [*sve-unstable]
+    substitutions:
+      textual_size:
+        match_size: "{type}"
+        default: word
+        halfword: halfword
+        doubleword: doubleword
+    doc: Saturating increment by number of {textual_size} elements
+    arguments: ["op: {sve_type}"]
+    static_defs: ["const IMM_FACTOR: i32"]
+    return_type: "{sve_type}"
+    types: [i16, u16, i32, u32, i64, u64]
+    assert_instr: [["{type_kind.su}qinc{size_literal}", "IMM_FACTOR = 1"]]
+    compose:
+      - FnCall:
+          - "svqinc{size_literal}_pat_{type}"
+          - [$op]
+          - ["{{svpattern::SV_ALL}}", $IMM_FACTOR]
+
+  - name: svqinc{size_literal}_pat[_{type}]
+    attr: [*sve-unstable]
+    substitutions:
+      textual_size:
+        match_size: "{type}"
+        default: word
+        halfword: halfword
+        doubleword: doubleword
+    doc: Saturating increment by number of {textual_size} elements
+    arguments: ["op: {sve_type}"]
+    static_defs: ["const PATTERN: svpattern", "const IMM_FACTOR: i32"]
+    constraints: [{ variable: IMM_FACTOR, range: [1, 16] }]
+    return_type: "{sve_type}"
+    types: [i16, u16, i32, u32, i64, u64]
+    assert_instr:
+      - - "{type_kind.su}qinc{size_literal}"
+        - "PATTERN = {{svpattern::SV_ALL}}, IMM_FACTOR = 1"
+    compose:
+      - LLVMLink:
+          name: "{type_kind.su}qinc{size_literal}.{sve_type}"
+          arguments: ["op: {sve_type}", "pattern: svpattern", "imm_factor: i32"]
+          return_type: "{sve_type}"
+      - FnCall: ["{llvm_link}", [$op, $PATTERN, $IMM_FACTOR]]
+
+  - name: svqdec{size_literal[1]}[_n_{type[0]}]
+    attr: [*sve-unstable]
+    substitutions:
+      textual_size:
+        match_size: "{type[1]}"
+        default: word
+        byte: byte
+        halfword: halfword
+        doubleword: doubleword
+    doc: Saturating decrement by number of {textual_size} elements
+    arguments: ["op: {type[0]}"]
+    static_defs: ["const IMM_FACTOR: i32"]
+    return_type: "{type[0]}"
+    types:
+      - [[i32, i64, u32, u64], [i8, i16, i32, i64]]
+    assert_instr:
+      - ["{type_kind[0].su}qdec{size_literal[1]}", "IMM_FACTOR = 1"]
+    compose:
+      - FnCall:
+          - "svqdec{size_literal[1]}_pat_n_{type[0]}"
+          - [$op]
+          - ["{{svpattern::SV_ALL}}", $IMM_FACTOR]
+
+  - name: svqdec{size_literal[1]}_pat[_n_{type[0]}]
+    attr: [*sve-unstable]
+    substitutions:
+      textual_size:
+        match_size: "{type[1]}"
+        default: word
+        byte: byte
+        halfword: halfword
+        doubleword: doubleword
+    doc: Saturating decrement by number of {textual_size} elements
+    arguments: ["op: {type[0]}"]
+    static_defs: ["const PATTERN: svpattern", "const IMM_FACTOR: i32"]
+    constraints: [{ variable: IMM_FACTOR, range: [1, 16] }]
+    return_type: "{type[0]}"
+    types:
+      - [[i32, i64, u32, u64], [i8, i16, i32, i64]]
+    assert_instr:
+      - - "{type_kind[0].su}qdec{size_literal[1]}"
+        - "PATTERN = {{svpattern::SV_ALL}}, IMM_FACTOR = 1"
+    compose:
+      - LLVMLink:
+          name: "{type_kind[0].su}qdec{size_literal[1]}.n{size[0]}"
+          arguments: ["op: {type[0]}", "pattern: svpattern", "imm_factor: i32"]
+          return_type: "{type[0]}"
+      - FnCall: ["{llvm_link}", [$op, $PATTERN, $IMM_FACTOR]]
+
+  - name: svqdec{size_literal}[_{type}]
+    attr: [*sve-unstable]
+    substitutions:
+      textual_size:
+        match_size: "{type}"
+        default: word
+        halfword: halfword
+        doubleword: doubleword
+    doc: Saturating decrement by number of {textual_size} elements
+    arguments: ["op: {sve_type}"]
+    static_defs: ["const IMM_FACTOR: i32"]
+    return_type: "{sve_type}"
+    types: [i16, u16, i32, u32, i64, u64]
+    assert_instr: [["{type_kind.su}qdec{size_literal}", "IMM_FACTOR = 1"]]
+    compose:
+      - FnCall:
+          - "svqdec{size_literal}_pat_{type}"
+          - [$op]
+          - ["{{svpattern::SV_ALL}}", $IMM_FACTOR]
+
+  - name: svqdec{size_literal}_pat[_{type}]
+    attr: [*sve-unstable]
+    substitutions:
+      textual_size:
+        match_size: "{type}"
+        default: word
+        halfword: halfword
+        doubleword: doubleword
+    doc: Saturating decrement by number of {textual_size} elements
+    arguments: ["op: {sve_type}"]
+    static_defs: ["const PATTERN: svpattern", "const IMM_FACTOR: i32"]
+    constraints: [{ variable: IMM_FACTOR, range: [1, 16] }]
+    return_type: "{sve_type}"
+    types: [i16, u16, i32, u32, i64, u64]
+    assert_instr:
+      - - "{type_kind.su}qdec{size_literal}"
+        - "PATTERN = {{svpattern::SV_ALL}}, IMM_FACTOR = 1"
+    compose:
+      - LLVMLink:
+          name: "{type_kind.su}qdec{size_literal}.{sve_type}"
+          arguments: ["op: {sve_type}", "pattern: svpattern", "imm_factor: i32"]
+          return_type: "{sve_type}"
+      - FnCall: ["{llvm_link}", [$op, $PATTERN, $IMM_FACTOR]]
+
+  - name: svst1[_{type}]
+    attr: [*sve-unstable]
+    doc: Non-truncating store
+    safety:
+      unsafe:
+        - pointer_offset: predicated
+        - dereference: predicated
+    arguments: ["pg: {predicate}", "base: *mut {type}", "data: {sve_type}"]
+    types: [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64]
+    assert_instr: ["st1{size_literal}"]
+    test: { store: 0 }
+    compose:
+      - LLVMLink:
+          name: "st1.{sve_type}"
+          arguments:
+            - "data: {sve_type}"
+            - "pg: {predicate}"
+            - "ptr: *mut {type}"
+      - FnCall: ["{llvm_link}", [$data, $pg, $base]]
+
+  - name: svst1_scatter_[{type[0]}]index[_{type[1]}]
+    attr: [*sve-unstable]
+    doc: Non-truncating store
+    safety:
+      unsafe:
+        - pointer_offset: predicated
+        - dereference: predicated
+    arguments:
+      - "pg: {predicate[0]}"
+      - "base: *mut {type[1]}"
+      - "indices: {sve_type[0]}"
+      - "data: {sve_type[1]}"
+    types:
+      - [[i32, u32], [f32, i32, u32]]
+      - [[i64, u64], [f64, i64, u64]]
+    assert_instr: ["st1{size_literal[0]}"]
+    test: { store: 1 }
+    compose:
+      - MatchSize:
+          - "{type[0]}"
+          - default:
+              LLVMLink:
+                name: "st1.scatter.{type_kind[0].su}xtw.index.{sve_type[1]}"
+                arguments:
+                  - "data: {sve_type[1]}"
+                  - "pg: {predicate[0]}"
+                  - "base: *mut {type[1]}"
+                  - "indices: {sve_type[0]}"
+            doubleword:
+              LLVMLink:
+                name: "st1.scatter.index.{sve_type[1]}"
+                arguments:
+                  - "data: {sve_type[1]}"
+                  - "pg: {predicate[0]}"
+                  - "base: *mut {type[1]}"
+                  - "indices: {sve_type[0]}"
+      - FnCall: ["{llvm_link}", [$data, $pg, $base, $indices]]
+
+  - name: svst1_scatter_[{type[0]}]offset[_{type[1]}]
+    attr: [*sve-unstable]
+    doc: Non-truncating store
+    safety:
+      unsafe:
+        - pointer_offset: predicated
+        - dereference: predicated
+    arguments:
+      - "pg: {predicate[0]}"
+      - "base: *mut {type[1]}"
+      - "offsets: {sve_type[0]}"
+      - "data: {sve_type[1]}"
+    types:
+      - [[i32, u32], [f32, i32, u32]]
+      - [[i64, u64], [f64, i64, u64]]
+    assert_instr: ["st1{size_literal[0]}"]
+    test: { store: 1 }
+    compose:
+      - MatchSize:
+          - "{type[0]}"
+          - default:
+              LLVMLink:
+                name: "st1.scatter.{type_kind[0].su}xtw.{sve_type[1]}"
+                arguments:
+                  - "data: {sve_type[1]}"
+                  - "pg: {predicate[0]}"
+                  - "base: *mut {type[1]}"
+                  - "offsets: {sve_type[0]}"
+            doubleword:
+              LLVMLink:
+                name: "st1.scatter.{sve_type[1]}"
+                arguments:
+                  - "data: {sve_type[1]}"
+                  - "pg: {predicate[0]}"
+                  - "base: *mut {type[1]}"
+                  - "offsets: {sve_type[0]}"
+      - FnCall: ["{llvm_link}", [$data, $pg, $base, $offsets]]
+
+  - name: svst1_scatter[_{type[0]}base]_offset[_{type[1]}]
+    attr: [*sve-unstable]
+    doc: Non-truncating store
+    safety:
+      unsafe:
+        - pointer_offset: predicated
+        - dereference: predicated
+        - no_provenance: bases
+    arguments:
+      - "pg: {predicate[0]}"
+      - "bases: {sve_type[0]}"
+      - "offset: i64"
+      - "data: {sve_type[1]}"
+    types:
+      - [u32, [f32, i32, u32]]
+      - [u64, [f64, i64, u64]]
+    assert_instr: ["st1{size_literal[0]}"]
+    test: { store: 1 }
+    compose:
+      - LLVMLink:
+          arguments:
+            - "data: {sve_type[1]}"
+            - "pg: {predicate[0]}"
+            - "bases: {sve_type[0]}"
+            - "offset: i64"
+          name: "st1.scatter.scalar.offset.{sve_type[1]}.{sve_type[0]}"
+      - FnCall: ["{llvm_link}", [$data, $pg, $bases, $offset]]
+
+  - name: svst1_scatter[_{type[0]}base_{type[1]}]
+    attr: [*sve-unstable]
+    doc: Non-truncating store
+    safety:
+      unsafe:
+        - pointer_offset: predicated
+        - dereference: predicated
+        - no_provenance: bases
+    arguments:
+      ["pg: {predicate[0]}", "bases: {sve_type[0]}", "data: {sve_type[1]}"]
+    types:
+      - [u32, [f32, i32, u32]]
+      - [u64, [f64, i64, u64]]
+    assert_instr: ["st1{size_literal[0]}"]
+    test: { store: 1 }
+    compose:
+      - FnCall:
+          - "svst1_scatter_{type[0]}base_offset_{type[1]}"
+          - - $pg
+            - $bases
+            - 0
+            - $data
+
+  - name: svst1_scatter[_{type[0]}base]_index[_{type[1]}]
+    attr: [*sve-unstable]
+    doc: Non-truncating store
+    safety:
+      unsafe:
+        - pointer_offset: predicated
+        - dereference: predicated
+        - no_provenance: bases
+    arguments:
+      - "pg: {predicate[0]}"
+      - "bases: {sve_type[0]}"
+      - "index: i64"
+      - "data: {sve_type[1]}"
+    types:
+      - [u32, [f32, i32, u32]]
+      - [u64, [f64, i64, u64]]
+    assert_instr: ["st1{size_literal[0]}"]
+    test: { store: 1 }
+    compose:
+      - FnCall:
+          - "svst1_scatter_{type[0]}base_offset_{type[1]}"
+          - - $pg
+            - $bases
+            - MethodCall: [$index, unchecked_shl, ["{size_in_bytes_log2[0]}"]]
+            - $data
+
+  - name: svst1{size_literal[2]}_scatter_[{type[0]}]index[_{type[1]}]
+    attr: [*sve-unstable]
+    doc: Truncate to {size[2]} bits and store
+    safety:
+      unsafe:
+        - pointer_offset: predicated
+        - dereference: predicated
+    arguments:
+      - "pg: {predicate[0]}"
+      - "base: *mut {type[2]}"
+      - "indices: {sve_type[0]}"
+      - "data: {sve_type[1]}"
+    types:
+      - [[i32, u32], i32, i16]
+      - [[i32, u32], u32, u16]
+      - [[i64, u64], i64, [i16, i32]]
+      - [[i64, u64], u64, [u16, u32]]
+    assert_instr: ["st1{size_literal[2]}"]
+    test: { store: 2 }
+    compose:
+      - MatchSize:
+          - "{type[0]}"
+          - default:
+              LLVMLink:
+                name: "st1.scatter.{type_kind[0].su}xtw.index.{sve_type[1] as {type[2]}}"
+                arguments:
+                  - "data: {sve_type[1] as {type[2]}}"
+                  - "pg: {predicate[0]}"
+                  - "base: *mut {type[2]}"
+                  - "indices: {sve_type[0]}"
+            doubleword:
+              LLVMLink:
+                name: "st1.scatter.index.{sve_type[1] as {type[2]}}"
+                arguments:
+                  - "data: {sve_type[1] as {type[2]}}"
+                  - "pg: {predicate[0]}"
+                  - "base: *mut {type[2]}"
+                  - "indices: {sve_type[0]}"
+      - FnCall:
+          - "{llvm_link}"
+          - [FnCall: ["crate::intrinsics::simd::simd_cast", [$data]], $pg, $base, $indices]
+
+  - name: svst1{size_literal[2]}_scatter_[{type[0]}]offset[_{type[1]}]
+    attr: [*sve-unstable]
+    doc: Truncate to {size[2]} bits and store
+    safety:
+      unsafe:
+        - pointer_offset: predicated
+        - dereference: predicated
+    arguments:
+      - "pg: {predicate[0]}"
+      - "base: *mut {type[2]}"
+      - "offsets: {sve_type[0]}"
+      - "data: {sve_type[1]}"
+    types:
+      - [[i32, u32], i32, [i8, i16]]
+      - [[i32, u32], u32, [u8, u16]]
+      - [[i64, u64], i64, [i8, i16, i32]]
+      - [[i64, u64], u64, [u8, u16, u32]]
+    assert_instr: ["st1{size_literal[2]}"]
+    test: { store: 2 }
+    compose:
+      - MatchSize:
+          - "{type[0]}"
+          - default:
+              LLVMLink:
+                name: "st1.scatter.{type_kind[0].su}xtw.{sve_type[1] as {type[2]}}"
+                arguments:
+                  - "data: {sve_type[1] as {type[2]}}"
+                  - "pg: {predicate[0]}"
+                  - "base: *mut {type[2]}"
+                  - "offsets: {sve_type[0]}"
+            doubleword:
+              LLVMLink:
+                name: "st1.scatter.{sve_type[1] as {type[2]}}"
+                arguments:
+                  - "data: {sve_type[1] as {type[2]}}"
+                  - "pg: {predicate[0]}"
+                  - "base: *mut {type[2]}"
+                  - "offsets: {sve_type[0]}"
+      - FnCall:
+          - "{llvm_link}"
+          - [FnCall: ["crate::intrinsics::simd::simd_cast", [$data]], $pg, $base, $offsets]
+
+  - name: svst1{size_literal[2]}_scatter[_{type[0]}base]_offset[_{type[1]}]
+    attr: [*sve-unstable]
+    doc: Truncate to {size[2]} bits and store
+    safety:
+      unsafe:
+        - pointer_offset: predicated
+        - dereference: predicated
+        - no_provenance: bases
+    arguments:
+      - "pg: {predicate[0]}"
+      - "bases: {sve_type[0]}"
+      - "offset: i64"
+      - "data: {sve_type[1]}"
+    types:
+      - [u32, [i32, u32], [i8, i16]]
+      - [u64, [i64, u64], [i8, i16, i32]]
+    assert_instr: ["st1{size_literal[2]}"]
+    test: { store: 2 }
+    compose:
+      - LLVMLink:
+          name: "st1.scatter.scalar.offset.{sve_type[1] as {type[2]}}.{sve_type[0]}"
+          arguments:
+            - "data: {sve_type[1] as {type[2]}}"
+            - "pg: {predicate[0]}"
+            - "bases: {sve_type[0]}"
+            - "offset: i64"
+      - FnCall:
+          - "{llvm_link}"
+          - [FnCall: ["crate::intrinsics::simd::simd_cast", [$data]], $pg, $bases, $offset]
+
+  - name: svst1{size_literal[2]}_scatter[_{type[0]}base_{type[1]}]
+    attr: [*sve-unstable]
+    doc: Truncate to {size[2]} bits and store
+    safety:
+      unsafe:
+        - pointer_offset: predicated
+        - dereference: predicated
+        - no_provenance: bases
+    arguments:
+      ["pg: {predicate[0]}", "bases: {sve_type[0]}", "data: {sve_type[1]}"]
+    types:
+      - [u32, [i32, u32], [i8, i16]]
+      - [u64, [i64, u64], [i8, i16, i32]]
+    assert_instr: ["st1{size_literal[2]}"]
+    test: { store: 2 }
+    compose:
+      - FnCall:
+          - "svst1{size_literal[2]}_scatter_{type[0]}base_offset_{type[1]}"
+          - - $pg
+            - $bases
+            - 0
+            - $data
+
+  - name: svst1{size_literal[2]}_scatter[_{type[0]}base]_index[_{type[1]}]
+    attr: [*sve-unstable]
+    doc: Truncate to {size[2]} bits and store
+    safety:
+      unsafe:
+        - pointer_offset: predicated
+        - dereference: predicated
+        - no_provenance: bases
+    arguments:
+      - "pg: {predicate[0]}"
+      - "bases: {sve_type[0]}"
+      - "index: i64"
+      - "data: {sve_type[1]}"
+    types:
+      - [u32, [i32, u32], i16]
+      - [u64, [i64, u64], [i16, i32]]
+    assert_instr: ["st1{size_literal[2]}"]
+    test: { store: 2 }
+    compose:
+      - FnCall:
+          - "svst1{size_literal[2]}_scatter_{type[0]}base_offset_{type[1]}"
+          - - $pg
+            - $bases
+            - MethodCall: [$index, unchecked_shl, ["{size_in_bytes_log2[2]}"]]
+            - $data
+
+  - name: svstnt1[_{type}]
+    attr: [*sve-unstable]
+    doc: Non-truncating store, non-temporal
+    safety:
+      unsafe:
+        - pointer_offset: predicated
+        - dereference: predicated
+        - non_temporal
+    arguments: ["pg: {predicate}", "base: *mut {type}", "data: {sve_type}"]
+    types: [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64]
+    assert_instr: ["stnt1{size_literal}"]
+    test: { store: 0 }
+    compose:
+      - LLVMLink:
+          name: "stnt1.{sve_type}"
+          arguments:
+            - "data: {sve_type}"
+            - "pg: {predicate}"
+            - "ptr: *mut {type}"
+      - FnCall: ["{llvm_link}", [$data, $pg, $base]]
+
+  - name: svstnt1_vnum[_{type}]
+    attr: [*sve-unstable]
+    doc: Non-truncating store, non-temporal
+    safety:
+      unsafe:
+        - pointer_offset: predicated
+        - dereference: predicated
+        - non_temporal
+    arguments:
+      ["pg: {predicate}", "base: *mut {type}", "vnum: i64", "data: {sve_type}"]
+    types: [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64]
+    assert_instr: ["stnt1{size_literal}"]
+    test: { store: 0 }
+    compose:
+      - FnCall:
+          - "svstnt1_{type}"
+          - - $pg
+            - MethodCall:
+                - $base
+                - offset
+                - - Multiply:
+                      - CastAs: [{ FnCall: ["svcnt{size_literal}", []] }, isize]
+                      - CastAs: [$vnum, isize]
+            - $data
+
+  - name: svst1{size_literal[1]}[_{type[0]}]
+    attr: [*sve-unstable]
+    doc: Truncate to {size[1]} bits and store
+    safety:
+      unsafe:
+        - pointer_offset: predicated
+        - dereference: predicated
+    arguments:
+      ["pg: {predicate[0]}", "base: *mut {type[1]}", "data: {sve_type[0]}"]
+    types:
+      - [[i16, i32, i64], i8]
+      - [[u16, u32, u64], u8]
+      - [[i32, i64], i16]
+      - [[u32, u64], u16]
+      - [i64, i32]
+      - [u64, u32]
+    assert_instr: ["st1{size_literal[1]}"]
+    test: { store: 1 }
+    compose:
+      - LLVMLink:
+          name: "st1.{sve_type[0] as {type[1]}}"
+          arguments:
+            - "data: {sve_type[0] as {type[1]}}"
+            - "pg: {predicate[0]}"
+            - "ptr: *mut {type[1]}"
+      - FnCall:
+          - "{llvm_link}"
+          - [FnCall: ["crate::intrinsics::simd::simd_cast", [$data]], $pg, $base]
+
+  - name: svst1{size_literal[1]}_vnum[_{type[0]}]
+    attr: [*sve-unstable]
+    doc: Truncate to {size[1]} bits and store
+    safety:
+      unsafe:
+        - pointer_offset_vnum: predicated
+        - dereference: predicated
+    arguments:
+      - "pg: {predicate[0]}"
+      - "base: *mut {type[1]}"
+      - "vnum: i64"
+      - "data: {sve_type[0]}"
+    types:
+      - [[i16, i32, i64], i8]
+      - [[u16, u32, u64], u8]
+      - [[i32, i64], i16]
+      - [[u32, u64], u16]
+      - [i64, i32]
+      - [u64, u32]
+    assert_instr: ["st1{size_literal[1]}"]
+    test: { store: 1 }
+    compose:
+      - FnCall:
+          - "svst1{size_literal[1]}_{type[0]}"
+          - - $pg
+            - MethodCall:
+                - $base
+                - offset
+                - - Multiply:
+                      - CastAs:
+                          [{ FnCall: ["svcnt{size_literal[0]}", []] }, isize]
+                      - CastAs: [$vnum, isize]
+            - $data
+
+  - name: svst1_vnum[_{type}]
+    attr: [*sve-unstable]
+    doc: Non-truncating store
+    safety:
+      unsafe:
+        - pointer_offset: predicated
+        - dereference: predicated
+    arguments:
+      ["pg: {predicate}", "base: *mut {type}", "vnum: i64", "data: {sve_type}"]
+    types: [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64]
+    assert_instr: ["st1{size_literal}"]
+    test: { store: 0 }
+    compose:
+      - FnCall:
+          - "svst1_{type}"
+          - - $pg
+            - MethodCall:
+                - $base
+                - offset
+                - - Multiply:
+                      - CastAs: [{ FnCall: ["svcnt{size_literal}", []] }, isize]
+                      - CastAs: [$vnum, isize]
+            - $data
+
+  - name: svst2[_{type}]
+    attr: [*sve-unstable]
+    doc: Store two vectors into two-element tuples
+    safety:
+      unsafe:
+        - pointer_offset: predicated
+        - dereference: predicated
+    arguments: ["pg: {predicate}", "base: *mut {type}", "data: {sve_type_x2}"]
+    types: [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64]
+    assert_instr: ["st2{size_literal}"]
+    test: { store: 0 }
+    compose:
+      - LLVMLink:
+          name: "st2.{sve_type}"
+          arguments:
+            - "data0: {sve_type}"
+            - "data1: {sve_type}"
+            - "pg: {predicate}"
+            - "ptr: *mut {type}"
+      - FnCall:
+          - "{llvm_link}"
+          - - FnCall: ["svget2_{type}", ["$data"], [0]]
+            - FnCall: ["svget2_{type}", ["$data"], [1]]
+            - "$pg"
+            - "$base"
+
+  - name: svst2_vnum[_{type}]
+    attr: [*sve-unstable]
+    doc: Store two vectors into two-element tuples
+    safety:
+      unsafe:
+        - pointer_offset: predicated
+        - dereference: predicated
+    arguments:
+      - "pg: {predicate}"
+      - "base: *mut {type}"
+      - "vnum: i64"
+      - "data: {sve_type_x2}"
+    types: [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64]
+    assert_instr: ["st2{size_literal}"]
+    test: { store: 0 }
+    compose:
+      - FnCall:
+          - "svst2_{type}"
+          - - $pg
+            - MethodCall:
+                - $base
+                - offset
+                - - Multiply:
+                      - CastAs: [{ FnCall: ["svcnt{size_literal}", []] }, isize]
+                      - CastAs: [$vnum, isize]
+            - $data
+
+  - name: svst3[_{type}]
+    attr: [*sve-unstable]
+    doc: Store three vectors into three-element tuples
+    safety:
+      unsafe:
+        - pointer_offset: predicated
+        - dereference: predicated
+    arguments: ["pg: {predicate}", "base: *mut {type}", "data: {sve_type_x3}"]
+    types: [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64]
+    assert_instr: ["st3{size_literal}"]
+    test: { store: 0 }
+    compose:
+      - LLVMLink:
+          name: "st3.{sve_type}"
+          arguments:
+            - "data0: {sve_type}"
+            - "data1: {sve_type}"
+            - "data2: {sve_type}"
+            - "pg: {predicate}"
+            - "ptr: *mut {type}"
+      - FnCall:
+          - "{llvm_link}"
+          - - FnCall: ["svget3_{type}", ["$data"], [0]]
+            - FnCall: ["svget3_{type}", ["$data"], [1]]
+            - FnCall: ["svget3_{type}", ["$data"], [2]]
+            - "$pg"
+            - "$base"
+
+  - name: svst3_vnum[_{type}]
+    attr: [*sve-unstable]
+    doc: Store three vectors into three-element tuples
+    safety:
+      unsafe:
+        - pointer_offset_vnum: predicated
+        - dereference: predicated
+    arguments:
+      - "pg: {predicate}"
+      - "base: *mut {type}"
+      - "vnum: i64"
+      - "data: {sve_type_x3}"
+    types: [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64]
+    assert_instr: ["st3{size_literal}"]
+    test: { store: 0 }
+    compose:
+      - FnCall:
+          - "svst3_{type}"
+          - - $pg
+            - MethodCall:
+                - $base
+                - offset
+                - - Multiply:
+                      - CastAs: [{ FnCall: ["svcnt{size_literal}", []] }, isize]
+                      - CastAs: [$vnum, isize]
+            - $data
+
+  - name: svst4[_{type}]
+    attr: [*sve-unstable]
+    doc: Store four vectors into four-element tuples
+    safety:
+      unsafe:
+        - pointer_offset: predicated
+        - dereference: predicated
+    arguments: ["pg: {predicate}", "base: *mut {type}", "data: {sve_type_x4}"]
+    types: [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64]
+    assert_instr: ["st4{size_literal}"]
+    test: { store: 0 }
+    compose:
+      - LLVMLink:
+          name: "st4.{sve_type}"
+          arguments:
+            - "data0: {sve_type}"
+            - "data1: {sve_type}"
+            - "data2: {sve_type}"
+            - "data3: {sve_type}"
+            - "pg: {predicate}"
+            - "ptr: *mut {type}"
+      - FnCall:
+          - "{llvm_link}"
+          - - FnCall: ["svget4_{type}", ["$data"], [0]]
+            - FnCall: ["svget4_{type}", ["$data"], [1]]
+            - FnCall: ["svget4_{type}", ["$data"], [2]]
+            - FnCall: ["svget4_{type}", ["$data"], [3]]
+            - "$pg"
+            - "$base"
+
+  - name: svst4_vnum[_{type}]
+    attr: [*sve-unstable]
+    doc: Store four vectors into four-element tuples
+    safety:
+      unsafe:
+        - pointer_offset_vnum: predicated
+        - dereference: predicated
+    arguments:
+      - "pg: {predicate}"
+      - "base: *mut {type}"
+      - "vnum: i64"
+      - "data: {sve_type_x4}"
+    types: [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64]
+    assert_instr: ["st4{size_literal}"]
+    test: { store: 0 }
+    compose:
+      - FnCall:
+          - "svst4_{type}"
+          - - $pg
+            - MethodCall:
+                - $base
+                - offset
+                - - Multiply:
+                      - CastAs: [{ FnCall: ["svcnt{size_literal}", []] }, isize]
+                      - CastAs: [$vnum, isize]
+            - $data
+
+  - name: svtbl[_{type[0]}]
+    attr: [*sve-unstable]
+    doc: Table lookup in single-vector table
+    arguments: ["data: {sve_type[0]}", "indices: {sve_type[1]}"]
+    return_type: "{sve_type[0]}"
+    assert_instr: [tbl]
+    types:
+      - [f32, u32]
+      - [f64, u64]
+      - [i8, u8]
+      - [i16, u16]
+      - [i32, u32]
+      - [i64, u64]
+      - [u8, u8]
+      - [u16, u16]
+      - [u32, u32]
+      - [u64, u64]
+    compose:
+      - LLVMLink: { name: "tbl.{sve_type[0]}" }
+
+  - name: svwhilele_{type[1]}[_{type[0]}]
+    attr: [*sve-unstable]
+    doc: While incrementing scalar is less than or equal to
+    arguments: ["op1: {type[0]}", "op2: {type[0]}"]
+    return_type: "{sve_type[1]}"
+    types: [[[i32, i64, u32, u64], [b8, b16, b32, b64]]]
+    assert_instr: [{ default: whilele, unsigned: whilels }]
+    compose:
+      - MatchKind:
+          - "{type[0]}"
+          - default: { LLVMLink: { name: "whilele.{sve_type[1]}.{type[0]}" } }
+            unsigned: { LLVMLink: { name: "whilels.{sve_type[1]}.{type[0]}" } }
+
+  - name: svwhilelt_{type[1]}[_{type[0]}]
+    attr: [*sve-unstable]
+    doc: While incrementing scalar is less than
+    arguments: ["op1: {type[0]}", "op2: {type[0]}"]
+    return_type: "{sve_type[1]}"
+    types: [[[i32, i64, u32, u64], [b8, b16, b32, b64]]]
+    assert_instr: [{ default: whilelt, unsigned: whilelo }]
+    compose:
+      - MatchKind:
+          - "{type[0]}"
+          - default: { LLVMLink: { name: "whilelt.{sve_type[1]}.{type[0]}" } }
+            unsigned: { LLVMLink: { name: "whilelo.{sve_type[1]}.{type[0]}" } }
+
+  - name: svmax[{_n}_{type}]{_mxz}
+    attr: [*sve-unstable]
+    doc: Maximum
+    arguments: ["pg: {predicate}", "op1: {sve_type}", "op2: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [i8, i16, i32, i64, u8, u16, u32, u64, f32, f64]
+    zeroing_method: { select: op1 }
+    assert_instr: ["{type_kind}max"]
+    n_variant_op: op2
+    compose:
+      - LLVMLink: { name: "{type_kind.fsu}max.{sve_type}" }
+
+  - name: svmaxnm[{_n}_{type}]{_mxz}
+    attr: [*sve-unstable]
+    doc: Maximum number
+    arguments: ["pg: {predicate}", "op1: {sve_type}", "op2: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [f32, f64]
+    zeroing_method: { select: op1 }
+    assert_instr: [fmaxnm]
+    n_variant_op: op2
+    compose:
+      - LLVMLink: { name: "{type_kind.f}maxnm.{sve_type}" }
+
+  - name: svpfalse[_b]
+    attr: [*sve-unstable]
+    doc: Set all predicate elements to false
+    arguments: []
+    return_type: "svbool_t"
+    # TODO: With current implementation, `pfalse` isn't generated, will need to add intrinsic to
+    # generate `zeroinitializer`
+    # assert_instr: [pfalse] 
+    compose:
+      - FnCall:
+          - "svdupq_n_b8"
+          - - false
+            - false
+            - false
+            - false
+            - false
+            - false
+            - false
+            - false
+            - false
+            - false
+            - false
+            - false
+            - false
+            - false
+            - false
+            - false
+
+  - name: svptrue_pat_{type}
+    attr: [*sve-unstable]
+    doc: Set predicate elements to true
+    arguments: []
+    static_defs: ["const PATTERN: svpattern"]
+    return_type: "{predicate}"
+    types: [b8, b16, b32, b64]
+    assert_instr: [[ptrue, "PATTERN = {{svpattern::SV_ALL}}"]]
+    compose:
+      - LLVMLink:
+          name: ptrue.{sve_type}
+          arguments: ["pattern: svpattern"]
+      - FnCall: ["{llvm_link}", [$PATTERN]]
+
+  - name: svptrue_{type}
+    attr: [*sve-unstable]
+    doc: Set predicate elements to true
+    arguments: []
+    return_type: "svbool_t"
+    types: [b8, b16, b32, b64]
+    assert_instr: [ptrue]
+    compose:
+      - FnCall: ["svptrue_pat_{type}", [], ["{{svpattern::SV_ALL}}"]]
+
+  - name: svptest_any
+    attr: [*sve-unstable]
+    doc: Test whether any active element is true
+    arguments: ["pg: svbool_t", "op: svbool_t"]
+    return_type: "bool"
+    assert_instr: [ptest]
+    compose:
+      - LLVMLink: { name: "ptest.any.nxv16i1" }
+
+  - name: svptest_first
+    attr: [*sve-unstable]
+    doc: Test whether first active element is true
+    arguments: ["pg: svbool_t", "op: svbool_t"]
+    return_type: "bool"
+    assert_instr: [ptest]
+    compose:
+      - LLVMLink: { name: "ptest.first.nxv16i1" }
+
+  - name: svptest_last
+    attr: [*sve-unstable]
+    doc: Test whether last active element is true
+    arguments: ["pg: svbool_t", "op: svbool_t"]
+    return_type: "bool"
+    assert_instr: [ptest]
+    compose:
+      - LLVMLink: { name: "ptest.last.nxv16i1" }
+
+  - name: svpfirst[_b]
+    attr: [*sve-unstable]
+    doc: Set the first active predicate element to true
+    arguments: ["pg: svbool_t", "op: svbool_t"]
+    return_type: "svbool_t"
+    assert_instr: [pfirst]
+    compose:
+      - LLVMLink: { name: "pfirst.nxv16i1" }
+
+  - name: svpnext_{type}
+    attr: [*sve-unstable]
+    doc: Find next active predicate
+    arguments: ["pg: {predicate}", "op: {predicate}"]
+    return_type: "{predicate}"
+    types: [b8, b16, b32, b64]
+    assert_instr: [pnext]
+    compose:
+      - LLVMLink: { name: "pnext.{sve_type}" }
+
+  - name: svbrkn[_b]_z
+    attr: [*sve-unstable]
+    doc: Propagate break to next partition
+    arguments: ["pg: svbool_t", "op1: svbool_t", "op2: svbool_t"]
+    return_type: "svbool_t"
+    assert_instr: [brkn]
+    compose:
+      - LLVMLink: { name: "brkn.z.nxv16i1" }
+
+  - name: svbrkb[_b]_z
+    attr: [*sve-unstable]
+    doc: Break before first true condition
+    arguments: ["pg: svbool_t", "op: svbool_t"]
+    return_type: "svbool_t"
+    assert_instr: [brkb]
+    compose:
+      - LLVMLink: { name: "brkb.z.nxv16i1" }
+
+  - name: svbrkb[_b]_m
+    attr: [*sve-unstable]
+    doc: Break before first true condition
+    arguments: ["inactive: svbool_t", "pg: svbool_t", "op: svbool_t"]
+    return_type: "svbool_t"
+    assert_instr: [brkb]
+    compose:
+      - LLVMLink: { name: "brkb.nxv16i1" }
+
+  - name: svbrkpb[_b]_z
+    attr: [*sve-unstable]
+    doc: Break before first true condition, propagating from previous partition
+    arguments: ["pg: svbool_t", "op1: svbool_t", "op2: svbool_t"]
+    return_type: "svbool_t"
+    assert_instr: [brkpb]
+    compose:
+      - LLVMLink: { name: "brkpb.z.nxv16i1" }
+
+  - name: svbrka[_b]_z
+    attr: [*sve-unstable]
+    doc: Break after first true condition
+    arguments: ["pg: svbool_t", "op: svbool_t"]
+    return_type: "svbool_t"
+    assert_instr: [brka]
+    compose:
+      - LLVMLink: { name: "brka.z.nxv16i1" }
+
+  - name: svbrka[_b]_m
+    attr: [*sve-unstable]
+    doc: Break after first true condition
+    arguments: ["inactive: svbool_t", "pg: svbool_t", "op: svbool_t"]
+    return_type: "svbool_t"
+    assert_instr: [brka]
+    compose:
+      - LLVMLink: { name: "brka.nxv16i1" }
+
+  - name: svbrkpa[_b]_z
+    attr: [*sve-unstable]
+    doc: Break after first true condition, propagating from previous partition
+    arguments: ["pg: svbool_t", "op1: svbool_t", "op2: svbool_t"]
+    return_type: "svbool_t"
+    assert_instr: [brkpa]
+    compose:
+      - LLVMLink: { name: "brkpa.z.nxv16i1" }
+
+  - name: svsel[_b]
+    attr: [*sve-unstable]
+    doc: Conditionally select elements
+    arguments: ["pg: svbool_t", "op1: svbool_t", "op2: svbool_t"]
+    return_type: "svbool_t"
+    assert_instr: [sel]
+    compose:
+      - FnCall: ["simd_select", [$pg, $op1, $op2]]
+
+  - name: svsel[_{type}]
+    attr: [*sve-unstable]
+    doc: Conditionally select elements
+    arguments: ["pg: svbool_t", "op1: {sve_type}", "op2: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64]
+    assert_instr: [sel]
+    compose:
+      - FnCall:
+          - "simd_select"
+          - - MatchSize:
+                - "{type}"
+                - { default: { MethodCall: [$pg, sve_into, []] }, byte: $pg }
+            - $op1
+            - $op2
+          - - MatchSize:
+                - "{type}"
+                - byte: svbool_t
+                  halfword: svbool8_t
+                  default: svbool4_t
+                  doubleword: svbool2_t
+            - _
+
+  - name: svsub[{_n}_{type}]{_mxz}
+    attr: [*sve-unstable]
+    doc: Subtract
+    arguments: ["pg: {predicate}", "op1: {sve_type}", "op2: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [i8, i16, i32, i64, u8, u16, u32, u64, f32, f64]
+    assert_instr: ["{type_kind.f}sub"]
+    zeroing_method: { select: op1 }
+    n_variant_op: op2
+    compose:
+      - LLVMLink: { name: "{type_kind.f}sub.{sve_type}" }
+
+  - name: svsubr[{_n}_{type}]{_mxz}
+    attr: [*sve-unstable]
+    doc: Subtract reversed
+    arguments: ["pg: {predicate}", "op1: {sve_type}", "op2: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [i8, i16, i32, i64, u8, u16, u32, u64, f32, f64]
+    assert_instr: ["{type_kind.f}subr"]
+    zeroing_method: { select: op1 }
+    n_variant_op: op2
+    compose:
+      - LLVMLink: { name: "{type_kind.f}subr.{sve_type}" }
+
+  - name: svcntp_{predicate}
+    attr: [*sve-unstable]
+    doc: Count set predicate bits
+    arguments: ["pg: {predicate}", "op: {predicate}"]
+    types: [b8, b16, b32, b64]
+    return_type: u64
+    assert_instr: [cntp]
+    compose:
+      - LLVMLink: { name: "cntp.{predicate}" }
+
+  - name: svcompact[_{type}]
+    attr: [*sve-unstable]
+    doc: Shuffle active elements of vector to the right and fill with zero
+    arguments: ["pg: {predicate}", "op: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [f32, f64, i32, i64, u32, u64]
+    assert_instr: [compact]
+    compose:
+      - LLVMLink: { name: "compact.{sve_type}" }
+
+  - name: svlasta[_{type}]
+    attr: [*sve-unstable]
+    doc: Extract element after last
+    arguments: ["pg: {predicate}", "op: {sve_type}"]
+    return_type: "{type}"
+    types: [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64]
+    assert_instr: [lasta]
+    compose:
+      - LLVMLink: { name: "lasta.{sve_type}" }
+
+  - name: svclasta[_{type}]
+    attr: [*sve-unstable]
+    doc: Conditionally extract element after last
+    arguments: ["pg: {predicate}", "fallback: {sve_type}", "data: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64]
+    assert_instr: [clasta]
+    compose:
+      - LLVMLink: { name: "clasta.{sve_type}" }
+
+  - name: svclasta[_n_{type}]
+    attr: [*sve-unstable]
+    doc: Conditionally extract element after last
+    arguments: ["pg: {predicate}", "fallback: {type}", "data: {sve_type}"]
+    return_type: "{type}"
+    types: [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64]
+    assert_instr: [clasta]
+    compose:
+      - LLVMLink: { name: "clasta.n.{sve_type}" }
+
+  - name: svlastb[_{type}]
+    attr: [*sve-unstable]
+    doc: Extract last element
+    arguments: ["pg: {predicate}", "op: {sve_type}"]
+    return_type: "{type}"
+    types: [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64]
+    assert_instr: [lastb]
+    compose:
+      - LLVMLink: { name: "lastb.{sve_type}" }
+
+  - name: svclastb[_{type}]
+    attr: [*sve-unstable]
+    doc: Conditionally extract last element
+    arguments: ["pg: {predicate}", "fallback: {sve_type}", "data: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64]
+    assert_instr: [clastb]
+    compose:
+      - LLVMLink: { name: "clastb.{sve_type}" }
+
+  - name: svclastb[_n_{type}]
+    attr: [*sve-unstable]
+    doc: Conditionally extract last element
+    arguments: ["pg: {predicate}", "fallback: {type}", "data: {sve_type}"]
+    return_type: "{type}"
+    types: [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64]
+    assert_instr: [clastb]
+    compose:
+      - LLVMLink: { name: "clastb.n.{sve_type}" }
+
+  - name: svqdecp[_{type}]
+    attr: [*sve-unstable]
+    doc: Saturating decrement by active element count
+    arguments: ["op: {sve_type}", "pg: {predicate}"]
+    return_type: "{sve_type}"
+    types: [i16, i32, i64, u16, u32, u64]
+    assert_instr: ["{type_kind.su}qdecp"]
+    compose:
+      - LLVMLink: { name: "{type_kind.su}qdecp.{sve_type}" }
+
+  - name: svqdecp[_n_{type[0]}]_{type[1]}
+    attr: [*sve-unstable]
+    doc: Saturating decrement by active element count
+    arguments: ["op: {type[0]}", "pg: {sve_type[1]}"]
+    return_type: "{type[0]}"
+    types: [[[i32, i64, u32, u64], [b8, b16, b32, b64]]]
+    assert_instr: ["{type_kind[0].su}qdecp"]
+    compose:
+      - LLVMLink: { name: "{type_kind[0].su}qdecp.n{size[0]}.{sve_type[1]}" }
+
+  - name: svqincp[_{type}]
+    attr: [*sve-unstable]
+    doc: Saturating increment by active element count
+    arguments: ["op: {sve_type}", "pg: {predicate}"]
+    return_type: "{sve_type}"
+    types: [i16, i32, i64, u16, u32, u64]
+    assert_instr: ["{type_kind.su}qincp"]
+    compose:
+      - LLVMLink: { name: "{type_kind.su}qincp.{sve_type}" }
+
+  - name: svqincp[_n_{type[0]}]_{type[1]}
+    attr: [*sve-unstable]
+    doc: Saturating increment by active element count
+    arguments: ["op: {type[0]}", "pg: {sve_type[1]}"]
+    return_type: "{type[0]}"
+    types: [[[i32, i64, u32, u64], [b8, b16, b32, b64]]]
+    assert_instr: ["{type_kind[0].su}qincp"]
+    compose:
+      - LLVMLink: { name: "{type_kind[0].su}qincp.n{size[0]}.{sve_type[1]}" }
+
+  - name: svtmad[_{type}]
+    attr: [*sve-unstable]
+    doc: Trigonometric multiply-add coefficient
+    arguments: ["op1: {sve_type}", "op2: {sve_type}"]
+    static_defs: ["const IMM3: i32"]
+    constraints: [{ variable: IMM3, range: [0, 7] }]
+    return_type: "{sve_type}"
+    types: [f32, f64]
+    assert_instr: [[ftmad, "IMM3 = 0"]]
+    compose:
+      - LLVMLink:
+          name: "ftmad.x.{sve_type}"
+          arguments: ["op1: {sve_type}", "op2: {sve_type}", "imm3: i32"]
+      - FnCall:
+          - "{llvm_link}"
+          - [op1, op2, IMM3]
+
+  - name: svtsmul[_{type[0]}]
+    attr: [*sve-unstable]
+    doc: Trigonometric starting value
+    arguments: ["op1: {sve_type[0]}", "op2: {sve_type[1]}"]
+    return_type: "{sve_type[0]}"
+    types:
+      - [f32, u32]
+      - [f64, u64]
+    assert_instr: [ftsmul]
+    compose:
+      - LLVMLink:
+          name: "ftsmul.x.{sve_type[0]}"
+
+  - name: svtssel[_{type[0]}]
+    attr: [*sve-unstable]
+    doc: Trigonometric select coefficient
+    arguments: ["op1: {sve_type[0]}", "op2: {sve_type[1]}"]
+    return_type: "{sve_type[0]}"
+    types:
+      - [f32, u32]
+      - [f64, u64]
+    assert_instr: [ftssel]
+    compose:
+      - LLVMLink:
+          name: "ftssel.x.{sve_type[0]}"
+
+  - name: svprf{size_literal}
+    attr: [*sve-unstable]
+    safety:
+      unsafe:
+        - pointer_offset: predicated
+    substitutions:
+      textual_size:
+        match_size: "{type}"
+        default: words
+        byte: bytes
+        halfword: halfwords
+        doubleword: doublewords
+    doc: Prefetch {textual_size}
+    arguments: ["pg: {predicate}", "base: *T"]
+    static_defs: ["const OP: svprfop", T]
+    types: [b8, b16, b32, b64]
+    assert_instr:
+      - ["prf{size_literal}", "OP = {{svprfop::SV_PLDL1KEEP}}, T = i64"]
+    test: { load: 0 }
+    compose:
+      - LLVMLink:
+          name: "prf.{sve_type}"
+          arguments:
+            ["pg: {predicate}", "base: *crate::ffi::c_void", "op: svprfop"]
+      - FnCall:
+          - "{llvm_link}"
+          - - $pg
+            - CastAs: [$base, "*const crate::ffi::c_void"]
+            - $OP
+
+  - name: svprf{size_literal}_vnum
+    attr: [*sve-unstable]
+    safety:
+      unsafe:
+        - pointer_offset_vnum: predicated
+    substitutions:
+      textual_size:
+        match_size: "{type}"
+        default: words
+        byte: bytes
+        halfword: halfwords
+        doubleword: doublewords
+    doc: Prefetch {textual_size}
+    arguments: ["pg: {predicate}", "base: *T", "vnum: i64"]
+    static_defs: ["const OP: svprfop", T]
+    types: [b8, b16, b32, b64]
+    assert_instr:
+      - ["prf{size_literal}", "OP = {{svprfop::SV_PLDL1KEEP}}, T = i64"]
+    test: { load: 0 }
+    compose:
+      - FnCall:
+          - "svprf{size_literal}"
+          - - $pg
+            - MethodCall:
+                - $base
+                - offset
+                - - Multiply:
+                      - CastAs: [{ FnCall: ["svcnt{size_literal}", []] }, isize]
+                      - CastAs: [$vnum, isize]
+          - - $OP
+            - _
+
+  - name: svprf{size_literal[1]}_gather_[{type[0]}]{index_or_offset}
+    attr: [*sve-unstable]
+    safety:
+      unsafe:
+        - pointer_offset: predicated
+    substitutions:
+      index_or_offset:
+        { match_size: "{type[1]}", default: "index", byte: "offset" }
+      indices_or_offsets:
+        { match_size: "{type[1]}", default: "indices", byte: "offsets" }
+      textual_size:
+        match_size: "{type[1]}"
+        default: words
+        byte: bytes
+        halfword: halfwords
+        doubleword: doublewords
+    doc: Prefetch {textual_size}
+    types:
+      - [[i32, u32, i64, u64], [i8, i16, i32, i64]]
+    arguments:
+      ["pg: {predicate[0]}", "base: *T", "{indices_or_offsets}: {sve_type[0]}"]
+    static_defs: ["const OP: svprfop", T]
+    assert_instr:
+      [["prf{size_literal[1]}", "OP = {{svprfop::SV_PLDL1KEEP}}, T = i64"]]
+    test: { load: 0 }
+    compose:
+      - MatchSize:
+          - "{type[0]}"
+          - default:
+              LLVMLink:
+                name: "prf{size_literal[1]}.gather.{type_kind[0].su}xtw.index.{sve_type[0]}"
+                arguments:
+                  - "pg: {predicate[0]}"
+                  - "base: *crate::ffi::c_void"
+                  - "{indices_or_offsets}: {sve_type[0]}"
+                  - "op: svprfop"
+            doubleword:
+              LLVMLink:
+                name: "prf{size_literal[1]}.gather.index.{sve_type[0]}"
+                arguments:
+                  - "pg: {predicate[0]}"
+                  - "base: *crate::ffi::c_void"
+                  - "{indices_or_offsets}: {sve_type[0]}"
+                  - "op: svprfop"
+      - FnCall:
+          - "{llvm_link}"
+          - - $pg
+            - CastAs: [$base, "*const crate::ffi::c_void"]
+            - "${indices_or_offsets}"
+            - $OP
+
+  - name: svprf{size_literal[1]}_gather[_{type[0]}base]
+    attr: [*sve-unstable]
+    safety:
+      unsafe:
+        - pointer_offset: predicated
+        - no_provenance: bases
+    substitutions:
+      textual_size:
+        match_size: "{type[1]}"
+        default: words
+        byte: bytes
+        halfword: halfwords
+        doubleword: doublewords
+    doc: Prefetch {textual_size}
+    types:
+      - [[u32, u64], [i8, i16, i32, i64]]
+    arguments: ["pg: {predicate[0]}", "bases: {sve_type[0]}"]
+    static_defs: ["const OP: svprfop"]
+    assert_instr: [["prf{size_literal[1]}", "OP = {{svprfop::SV_PLDL1KEEP}}"]]
+    test: { load: 0 }
+    compose:
+      - LLVMLink:
+          name: "prf{size_literal[1]}.gather.scalar.offset.{sve_type[0]}"
+          arguments:
+            - "pg: {predicate[0]}"
+            - "bases: {sve_type[0]}"
+            - "index: i64"
+            - "op: svprfop"
+      - FnCall: ["{llvm_link}", [$pg, $bases, 0, $OP]]
+
+  - name: svprf{size_literal[1]}_gather[_{type[0]}base]_{index_or_offset}
+    attr: [*sve-unstable]
+    safety:
+      unsafe:
+        - pointer_offset: predicated
+        - no_provenance: bases
+    substitutions:
+      index_or_offset:
+        { match_size: "{type[1]}", default: "index", byte: "offset" }
+      textual_size:
+        match_size: "{type[1]}"
+        default: words
+        byte: bytes
+        halfword: halfwords
+        doubleword: doublewords
+    doc: Prefetch {textual_size}
+    types:
+      - [[u32, u64], [i8, i16, i32, i64]]
+    arguments:
+      ["pg: {predicate[0]}", "bases: {sve_type[0]}", "{index_or_offset}: i64"]
+    static_defs: ["const OP: svprfop"]
+    assert_instr: [["prfb", "OP = {{svprfop::SV_PLDL1KEEP}}"]]
+    test: { load: 0 }
+    compose:
+      - LLVMLink:
+          name: "prf{size_literal[1]}.gather.scalar.offset.{sve_type[0]}"
+          arguments:
+            - "pg: {predicate[0]}"
+            - "bases: {sve_type[0]}"
+            - "{index_or_offset}: i64"
+            - "op: svprfop"
+      - FnCall:
+          - "{llvm_link}"
+          - - $pg
+            - $bases
+            - MatchSize:
+                - "{type[1]}"
+                - byte: $offset
+                  halfword: { MethodCall: [$index, unchecked_shl, [1]] }
+                  default: { MethodCall: [$index, unchecked_shl, [2]] }
+                  doubleword: { MethodCall: [$index, unchecked_shl, [3]] }
+            - $OP
+
+  - name: svcvt_{type[0]}[_{type[1]}]{_mxz}
+    attr: [*sve-unstable]
+    doc: Floating-point convert
+    arguments:
+      ["inactive: {sve_type[0]}", "pg: {max_predicate}", "op: {sve_type[1]}"]
+    return_type: "{sve_type[0]}"
+    types:
+      - [[f32, f64], [i32, u32, i64, u64]]
+    zeroing_method: { drop: inactive }
+    substitutions:
+      convert_from: { match_kind: "{type[1]}", default: s, unsigned: u }
+    assert_instr: ["{convert_from}cvtf"]
+    compose:
+      - LLVMLink:
+          name: "{convert_from}cvtf.{type[0]}{type[1]}"
+
+  - name: svcvt_{type[0]}[_{type[1]}]{_mxz}
+    attr: [*sve-unstable]
+    doc: Floating-point convert
+    arguments:
+      ["inactive: {sve_type[0]}", "pg: {max_predicate}", "op: {sve_type[1]}"]
+    return_type: "{sve_type[0]}"
+    types:
+      - [[i32, u32, i64, u64], [f32, f64]]
+    zeroing_method: { drop: inactive }
+    substitutions:
+      convert_to: { match_kind: "{type[0]}", default: s, unsigned: u }
+    assert_instr: ["fcvtz{convert_to}"]
+    compose:
+      - LLVMLink: { name: "fcvtz{convert_to}.{type[0]}{type[1]}" }
+
+  - name: svcvt_{type[0]}[_{type[1]}]{_mxz}
+    attr: [*sve-unstable]
+    doc: Floating-point convert
+    arguments:
+      ["inactive: {sve_type[0]}", "pg: {max_predicate}", "op: {sve_type[1]}"]
+    return_type: "{sve_type[0]}"
+    types: [[f32, f64], [f64, f32]]
+    zeroing_method: { drop: inactive }
+    assert_instr: [fcvt]
+    compose:
+      - LLVMLink: { name: "fcvt.{type[0]}{type[1]}" }
+
+  - name: svreinterpret_{type[0]}[_{type[1]}]
+    attr: [*sve-unstable]
+    doc: Reinterpret vector contents
+    arguments: ["op: {sve_type[1]}"]
+    return_type: "{sve_type[0]}"
+    assert_instr: []
+    types:
+      - - [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64]
+        - [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64]
+    compose:
+      - FnCall: ["crate::intrinsics::transmute_unchecked", [$op], [], true]
+
+  - name: svrinta[_{type}]{_mxz}
+    attr: [*sve-unstable]
+    doc: Round to nearest, ties away from zero
+    arguments: ["inactive: {sve_type}", "pg: {predicate}", "op: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [f32, f64]
+    assert_instr: [frinta]
+    zeroing_method: { drop: inactive }
+    compose:
+      - LLVMLink: { name: "frinta.{sve_type}" }
+
+  - name: svrinti[_{type}]{_mxz}
+    attr: [*sve-unstable]
+    doc: Round using current rounding mode (inexact)
+    arguments: ["inactive: {sve_type}", "pg: {predicate}", "op: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [f32, f64]
+    assert_instr: [frinti]
+    zeroing_method: { drop: inactive }
+    compose:
+      - LLVMLink: { name: "frinti.{sve_type}" }
+
+  - name: svrintm[_{type}]{_mxz}
+    attr: [*sve-unstable]
+    doc: Round towards -∞
+    arguments: ["inactive: {sve_type}", "pg: {predicate}", "op: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [f32, f64]
+    assert_instr: [frintm]
+    zeroing_method: { drop: inactive }
+    compose:
+      - LLVMLink: { name: "frintm.{sve_type}" }
+
+  - name: svrintn[_{type}]{_mxz}
+    attr: [*sve-unstable]
+    doc: Round to nearest, ties to even
+    arguments: ["inactive: {sve_type}", "pg: {predicate}", "op: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [f32, f64]
+    assert_instr: [frintn]
+    zeroing_method: { drop: inactive }
+    compose:
+      - LLVMLink: { name: "frintn.{sve_type}" }
+
+  - name: svrintp[_{type}]{_mxz}
+    attr: [*sve-unstable]
+    doc: Round towards +∞
+    arguments: ["inactive: {sve_type}", "pg: {predicate}", "op: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [f32, f64]
+    assert_instr: [frintp]
+    zeroing_method: { drop: inactive }
+    compose:
+      - LLVMLink: { name: "frintp.{sve_type}" }
+
+  - name: svrintx[_{type}]{_mxz}
+    attr: [*sve-unstable]
+    doc: Round using current rounding mode (exact)
+    arguments: ["inactive: {sve_type}", "pg: {predicate}", "op: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [f32, f64]
+    assert_instr: [frintx]
+    zeroing_method: { drop: inactive }
+    compose:
+      - LLVMLink: { name: "frintx.{sve_type}" }
+
+  - name: svrintz[_{type}]{_mxz}
+    attr: [*sve-unstable]
+    doc: Round towards zero
+    arguments: ["inactive: {sve_type}", "pg: {predicate}", "op: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [f32, f64]
+    assert_instr: [frintz]
+    zeroing_method: { drop: inactive }
+    compose:
+      - LLVMLink: { name: "frintz.{sve_type}" }
+
+  - name: svabd[{_n}_{type}]{_mxz}
+    attr: [*sve-unstable]
+    doc: Absolute difference
+    arguments: ["pg: {predicate}", "op1: {sve_type}", "op2: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [f64, f32, i8, i16, i32, i64, u8, u16, u32, u64]
+    assert_instr: ["{type_kind}abd"]
+    zeroing_method: { select: op1 }
+    n_variant_op: op2
+    compose:
+      - LLVMLink: { name: "{type_kind}abd.{sve_type}" }
+
+  - name: svabs[_{type}]{_mxz}
+    attr: [*sve-unstable]
+    doc: Absolute value
+    arguments: ["inactive: {sve_type}", "pg: {predicate}", "op: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [f32, f64, i8, i16, i32, i64]
+    assert_instr: ["{type_kind.f}abs"]
+    zeroing_method: { drop: inactive }
+    compose:
+      - LLVMLink: { name: "{type_kind.f}abs.{sve_type}" }
+
+  - name: svand[{_n}_{type}]{_mxz}
+    attr: [*sve-unstable]
+    doc: Bitwise AND
+    arguments: ["pg: {predicate}", "op1: {sve_type}", "op2: {sve_type}"]
+    return_type: "{sve_type}"
+    assert_instr: [and]
+    types: [i8, i16, i32, i64, u8, u16, u32, u64]
+    n_variant_op: op2
+    zeroing_method: { select: op1 }
+    compose:
+      - LLVMLink: { name: "and.{sve_type}" }
+
+  - name: svandv[_{type}]
+    attr: [*sve-unstable]
+    doc: Bitwise AND reduction to scalar
+    arguments: ["pg: {predicate}", "op: {sve_type}"]
+    return_type: "{type}"
+    assert_instr: [andv]
+    types: [i8, i16, i32, i64, u8, u16, u32, u64]
+    compose:
+      - LLVMLink: { name: "andv.{sve_type}" }
+
+  - name: svand[_b]_z
+    attr: [*sve-unstable]
+    doc: Bitwise AND
+    arguments: ["pg: svbool_t", "op1: svbool_t", "op2: svbool_t"]
+    return_type: svbool_t
+    assert_instr: [and]
+    compose:
+      - LLVMLink: { name: "and.z.nvx16i1" }
+
+  - name: svmov[_b]_z
+    attr: [*sve-unstable]
+    doc: Move
+    arguments: ["pg: svbool_t", "op: svbool_t"]
+    return_type: svbool_t
+    assert_instr: [mov]
+    compose:
+      - FnCall: ["svand_b_z", [$pg, $op, $op]]
+
+  - name: svbic[{_n}_{type}]{_mxz}
+    attr: [*sve-unstable]
+    doc: Bitwise clear
+    arguments: ["pg: {predicate}", "op1: {sve_type}", "op2: {sve_type}"]
+    return_type: "{sve_type}"
+    assert_instr: [bic]
+    types: [i8, i16, i32, i64, u8, u16, u32, u64]
+    n_variant_op: op2
+    zeroing_method: { select: op1 }
+    compose:
+      - LLVMLink: { name: "bic.{sve_type}" }
+
+  - name: svbic[_b]_z
+    attr: [*sve-unstable]
+    doc: Bitwise clear
+    arguments: ["pg: svbool_t", "op1: svbool_t", "op2: svbool_t"]
+    return_type: svbool_t
+    assert_instr: [bic]
+    compose:
+      - LLVMLink: { name: "bic.z.nvx16i1" }
+
+  - name: sveor[{_n}_{type}]{_mxz}
+    attr: [*sve-unstable]
+    doc: Bitwise exclusive OR
+    arguments: ["pg: {predicate}", "op1: {sve_type}", "op2: {sve_type}"]
+    return_type: "{sve_type}"
+    assert_instr: [eor]
+    types: [i8, i16, i32, i64, u8, u16, u32, u64]
+    n_variant_op: op2
+    zeroing_method: { select: op1 }
+    compose:
+      - LLVMLink: { name: "eor.{sve_type}" }
+
+  - name: sveorv[_{type}]
+    attr: [*sve-unstable]
+    doc: Bitwise exclusive OR reduction to scalar
+    arguments: ["pg: {predicate}", "op: {sve_type}"]
+    return_type: "{type}"
+    assert_instr: [eorv]
+    types: [i8, i16, i32, i64, u8, u16, u32, u64]
+    compose:
+      - LLVMLink: { name: "eorv.{sve_type}" }
+
+  - name: sveor[_b]_z
+    attr: [*sve-unstable]
+    doc: Bitwise exclusive OR
+    arguments: ["pg: svbool_t", "op1: svbool_t", "op2: svbool_t"]
+    return_type: svbool_t
+    assert_instr: [eor]
+    compose:
+      - LLVMLink: { name: "eor.z.nvx16i1" }
+
+  - name: svnot[_{type}]{_mxz}
+    attr: [*sve-unstable]
+    doc: Bitwise invert
+    arguments: ["inactive: {sve_type}", "pg: {predicate}", "op: {sve_type}"]
+    return_type: "{sve_type}"
+    assert_instr: [not]
+    types: [i8, i16, i32, i64, u8, u16, u32, u64]
+    zeroing_method: { drop: inactive }
+    compose:
+      - LLVMLink: { name: "not.{sve_type}" }
+
+  - name: svnot[_b]_z
+    attr: [*sve-unstable]
+    doc: Bitwise invert
+    arguments: ["pg: svbool_t", "op: svbool_t"]
+    return_type: svbool_t
+    assert_instr: [not]
+    compose:
+      - FnCall: ["sveor_b_z", [$pg, $op, $pg]]
+
+  - name: svcnot[_{type}]{_mxz}
+    attr: [*sve-unstable]
+    doc: Logically invert boolean condition
+    arguments: ["inactive: {sve_type}", "pg: {predicate}", "op: {sve_type}"]
+    return_type: "{sve_type}"
+    assert_instr: [cnot]
+    types: [i8, i16, i32, i64, u8, u16, u32, u64]
+    zeroing_method: { drop: inactive }
+    compose:
+      - LLVMLink: { name: "cnot.{sve_type}" }
+
+  - name: svnand[_b]_z
+    attr: [*sve-unstable]
+    doc: Bitwise NAND
+    arguments: ["pg: svbool_t", "op1: svbool_t", "op2: svbool_t"]
+    return_type: svbool_t
+    assert_instr: [nand]
+    compose:
+      - LLVMLink: { name: "nand.z.nxv16i1" }
+
+  - name: svnor[_b]_z
+    attr: [*sve-unstable]
+    doc: Bitwise NOR
+    arguments: ["pg: svbool_t", "op1: svbool_t", "op2: svbool_t"]
+    return_type: svbool_t
+    assert_instr: [nor]
+    compose:
+      - LLVMLink: { name: "nor.z.nxv16i1" }
+
+  - name: svorr[{_n}_{type}]{_mxz}
+    attr: [*sve-unstable]
+    doc: Bitwise inclusive OR
+    arguments: ["pg: {predicate}", "op1: {sve_type}", "op2: {sve_type}"]
+    return_type: "{sve_type}"
+    assert_instr: [orr]
+    types: [i8, i16, i32, i64, u8, u16, u32, u64]
+    n_variant_op: op2
+    zeroing_method: { select: op1 }
+    compose:
+      - LLVMLink: { name: "orr.{sve_type}" }
+
+  - name: svorv[_{type}]
+    attr: [*sve-unstable]
+    doc: Bitwise inclusive OR reduction to scalar
+    arguments: ["pg: {predicate}", "op: {sve_type}"]
+    return_type: "{type}"
+    assert_instr: [orv]
+    types: [i8, i16, i32, i64, u8, u16, u32, u64]
+    compose:
+      - LLVMLink: { name: "orv.{sve_type}" }
+
+  - name: svorr[_b]_z
+    attr: [*sve-unstable]
+    doc: Bitwise inclusive OR
+    arguments: ["pg: svbool_t", "op1: svbool_t", "op2: svbool_t"]
+    return_type: svbool_t
+    assert_instr: [orr]
+    compose:
+      - LLVMLink: { name: "orr.z.nvx16i1" }
+
+  - name: svorn[_b]_z
+    attr: [*sve-unstable]
+    doc: Bitwise inclusive OR, inverting second argument
+    arguments: ["pg: svbool_t", "op1: svbool_t", "op2: svbool_t"]
+    return_type: svbool_t
+    assert_instr: [orn]
+    compose:
+      - LLVMLink: { name: "orn.z.nvx16i1" }
+
+  - name: svlsl[{_n}_{type[0]}]{_mxz}
+    attr: [*sve-unstable]
+    doc: Logical shift left
+    arguments:
+      ["pg: {predicate[0]}", "op1: {sve_type[0]}", "op2: {sve_type[1]}"]
+    return_type: "{sve_type[0]}"
+    types:
+      - [[i8, u8], u8]
+      - [[i16, u16], u16]
+      - [[i32, u32], u32]
+      - [[i64, u64], u64]
+    assert_instr: [lsl]
+    zeroing_method: { select: op1 }
+    n_variant_op: op2
+    compose:
+      - LLVMLink: { name: "lsl.{sve_type[0]}" }
+
+  - name: svlsl_wide[{_n}_{type[0]}]{_mxz}
+    attr: [*sve-unstable]
+    doc: Logical shift left
+    arguments:
+      ["pg: {predicate[0]}", "op1: {sve_type[0]}", "op2: {sve_type[1]}"]
+    return_type: "{sve_type[0]}"
+    types:
+      - [[i8, i16, i32, u8, u16, u32], u64]
+    assert_instr: [lsl]
+    zeroing_method: { select: op1 }
+    n_variant_op: op2
+    compose:
+      - LLVMLink: { name: "lsl.wide.{sve_type[0]}" }
+
+  - name: svasr[{_n}_{type[0]}]{_mxz}
+    attr: [*sve-unstable]
+    doc: Arithmetic shift right
+    arguments:
+      ["pg: {predicate[0]}", "op1: {sve_type[0]}", "op2: {sve_type[1]}"]
+    return_type: "{sve_type[0]}"
+    types:
+      - [i8, u8]
+      - [i16, u16]
+      - [i32, u32]
+      - [i64, u64]
+    assert_instr: [asr]
+    zeroing_method: { select: op1 }
+    n_variant_op: op2
+    compose:
+      - LLVMLink: { name: "asr.{sve_type[0]}" }
+
+  - name: svasr_wide[{_n}_{type[0]}]{_mxz}
+    attr: [*sve-unstable]
+    doc: Arithmetic shift right
+    arguments:
+      ["pg: {predicate[0]}", "op1: {sve_type[0]}", "op2: {sve_type[1]}"]
+    return_type: "{sve_type[0]}"
+    types:
+      - [[i8, i16, i32], u64]
+    assert_instr: [asr]
+    zeroing_method: { select: op1 }
+    n_variant_op: op2
+    compose:
+      - LLVMLink: { name: "asr.wide.{sve_type[0]}" }
+
+  - name: svasrd[_n_{type}]{_mxz}
+    attr: [*sve-unstable]
+    doc: Arithmetic shift right for divide by immediate
+    arguments: ["pg: {predicate}", "op1: {sve_type}"]
+    return_type: "{sve_type}"
+    static_defs: ["const IMM2: i32"]
+    constraints: [{ variable: IMM2, range: ["1", "{size}"] }]
+    types: [i8, i16, i32, i64]
+    assert_instr: [[asrd, "IMM2 = 1"]]
+    zeroing_method: { select: op1 }
+    compose:
+      - LLVMLink:
+          name: "asrd.{sve_type}"
+          arguments: ["pg: {predicate}", "op1: {sve_type}", "imm2: i32"]
+      - FnCall: ["{llvm_link}", [$pg, $op1, $IMM2]]
+
+  - name: svlsr[{_n}_{type}]{_mxz}
+    attr: [*sve-unstable]
+    doc: Logical shift right
+    arguments: ["pg: {predicate}", "op1: {sve_type}", "op2: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [u8, u16, u32, u64]
+    assert_instr: [lsr]
+    zeroing_method: { select: op1 }
+    n_variant_op: op2
+    compose:
+      - LLVMLink: { name: "lsr.{sve_type}" }
+
+  - name: svlsr_wide[{_n}_{type[0]}]{_mxz}
+    attr: [*sve-unstable]
+    doc: Logical shift right
+    arguments:
+      ["pg: {predicate[0]}", "op1: {sve_type[0]}", "op2: {sve_type[1]}"]
+    return_type: "{sve_type[0]}"
+    types:
+      - [[u8, u16, u32], u64]
+    assert_instr: [lsr]
+    zeroing_method: { select: op1 }
+    n_variant_op: op2
+    compose:
+      - LLVMLink: { name: "lsr.wide.{sve_type[0]}" }
+
+  - name: svadda[_{type}]
+    attr: [*sve-unstable]
+    doc: Add reduction (strictly-ordered)
+    arguments: ["pg: {predicate}", "initial: {type}", "op: {sve_type}"]
+    return_type: "{type}"
+    assert_instr: [fadda]
+    types: [f32, f64]
+    compose:
+      - LLVMLink: { name: "fadda.{sve_type}" }
+
+  - name: svaddv[_{type}]
+    attr: [*sve-unstable]
+    doc: Add reduction
+    arguments: ["pg: {predicate}", "op: {sve_type}"]
+    return_type: "{type}"
+    types: [f32, f64, i64, u64]
+    assert_instr: [{ float: faddv, default: uaddv }]
+    compose:
+      - LLVMLink: { name: "{type_kind.fsu}addv.{sve_type}" }
+
+  - name: svaddv[_{type[0]}]
+    attr: [*sve-unstable]
+    doc: Add reduction
+    arguments: ["pg: {predicate[0]}", "op: {sve_type[0]}"]
+    return_type: "{type[1]}"
+    types:
+      - [[i8, i16, i32], i64]
+      - [[u8, u16, u32], u64]
+    assert_instr: ["{type_kind[0].su}addv"]
+    compose:
+      - LLVMLink: { name: "{type_kind[0].su}addv.{sve_type[0]}" }
+
+  - name: svmaxv[_{type}]
+    attr: [*sve-unstable]
+    doc: Maximum reduction to scalar
+    arguments: ["pg: {predicate}", "op: {sve_type}"]
+    return_type: "{type}"
+    types: [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64]
+    assert_instr: ["{type_kind.fsu}maxv"]
+    compose:
+      - LLVMLink: { name: "{type_kind.fsu}maxv.{sve_type}" }
+
+  - name: svmaxnmv[_{type}]
+    attr: [*sve-unstable]
+    doc: Maximum number reduction to scalar
+    arguments: ["pg: {predicate}", "op: {sve_type}"]
+    return_type: "{type}"
+    types: [f32, f64]
+    assert_instr: [fmaxnmv]
+    compose:
+      - LLVMLink: { name: "fmaxnmv.{sve_type}" }
+
+  - name: svminv[_{type}]
+    attr: [*sve-unstable]
+    doc: Minimum reduction to scalar
+    arguments: ["pg: {predicate}", "op: {sve_type}"]
+    return_type: "{type}"
+    types: [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64]
+    assert_instr: ["{type_kind.fsu}minv"]
+    compose:
+      - LLVMLink: { name: "{type_kind.fsu}minv.{sve_type}" }
+
+  - name: svminnmv[_{type}]
+    attr: [*sve-unstable]
+    doc: Minimum number reduction to scalar
+    arguments: ["pg: {predicate}", "op: {sve_type}"]
+    return_type: "{type}"
+    types: [f32, f64]
+    assert_instr: [fminnmv]
+    compose:
+      - LLVMLink: { name: "fminnmv.{sve_type}" }
+
+  - name: svmul[{_n}_{type}]{_mxz}
+    attr: [*sve-unstable]
+    doc: Multiply
+    arguments: ["pg: {predicate}", "op1: {sve_type}", "op2: {sve_type}"]
+    return_type: "{sve_type}"
+    assert_instr: ["{type_kind.f}mul"]
+    types: [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64]
+    zeroing_method: { select: op1 }
+    n_variant_op: op2
+    compose:
+      - LLVMLink: { name: "{type_kind.f}mul.{sve_type}" }
+
+  - name: svmulh[{_n}_{type}]{_mxz}
+    attr: [*sve-unstable]
+    doc: Multiply, returning high-half
+    arguments: ["pg: {predicate}", "op1: {sve_type}", "op2: {sve_type}"]
+    return_type: "{sve_type}"
+    assert_instr: ["{type_kind.su}mulh"]
+    types: [i8, i16, i32, i64, u8, u16, u32, u64]
+    zeroing_method: { select: op1 }
+    n_variant_op: op2
+    compose:
+      - LLVMLink: { name: "{type_kind.su}mulh.{sve_type}" }
+
+  - name: svmulx[{_n}_{type}]{_mxz}
+    attr: [*sve-unstable]
+    doc: Multiply extended (∞×0=2)
+    arguments: ["pg: {predicate}", "op1: {sve_type}", "op2: {sve_type}"]
+    return_type: "{sve_type}"
+    assert_instr: ["fmulx"]
+    types: [f32, f64]
+    zeroing_method: { select: op1 }
+    n_variant_op: op2
+    compose:
+      - LLVMLink: { name: "fmulx.{sve_type}" }
+
+  - name: svrecpe[_{type}]
+    attr: [*sve-unstable]
+    doc: Reciprocal estimate
+    arguments: ["op: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [f32, f64]
+    assert_instr: [frecpe]
+    compose:
+      - LLVMLink: { name: "frecpe.x.{sve_type}" }
+
+  - name: svrecps[_{type}]
+    attr: [*sve-unstable]
+    doc: Reciprocal step
+    arguments: ["op1: {sve_type}", "op2: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [f32, f64]
+    assert_instr: [frecps]
+    compose:
+      - LLVMLink: { name: "frecps.x.{sve_type}" }
+
+  - name: svrecpx[_{type}]{_mxz}
+    attr: [*sve-unstable]
+    doc: Reciprocal exponent
+    arguments: ["inactive: {sve_type}", "pg: {predicate}", "op: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [f32, f64]
+    assert_instr: [frecpx]
+    zeroing_method: { drop: inactive }
+    compose:
+      - LLVMLink: { name: "frecpx.x.{sve_type}" }
+
+  - name: svrsqrte[_{type}]
+    attr: [*sve-unstable]
+    doc: Reciprocal square root estimate
+    arguments: ["op: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [f32, f64]
+    assert_instr: [frsqrte]
+    compose:
+      - LLVMLink: { name: "frsqrte.x.{sve_type}" }
+
+  - name: svrsqrts[_{type}]
+    attr: [*sve-unstable]
+    doc: Reciprocal square root step
+    arguments: ["op1: {sve_type}", "op2: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [f32, f64]
+    assert_instr: [frsqrts]
+    compose:
+      - LLVMLink: { name: "frsqrts.x.{sve_type}" }
+
+  - name: svmad[{_n}_{type}]{_mxz}
+    attr: [*sve-unstable]
+    doc: Multiply-add, multiplicand first
+    arguments:
+      - "pg: {predicate}"
+      - "op1: {sve_type}"
+      - "op2: {sve_type}"
+      - "op3: {sve_type}"
+    return_type: "{sve_type}"
+    types: [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64]
+    zeroing_method: { select: op1 }
+    n_variant_op: op3
+    assert_instr: ["{type_kind.f}mad"]
+    compose:
+      - LLVMLink: { name: "{type_kind.f}mad.{sve_type}" }
+
+  - name: svmla[{_n}_{type}]{_mxz}
+    attr: [*sve-unstable]
+    doc: Multiply-add, addend first
+    arguments:
+      - "pg: {predicate}"
+      - "op1: {sve_type}"
+      - "op2: {sve_type}"
+      - "op3: {sve_type}"
+    return_type: "{sve_type}"
+    types: [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64]
+    zeroing_method: { select: op1 }
+    n_variant_op: op3
+    assert_instr: ["{type_kind.f}mla"]
+    compose:
+      - LLVMLink: { name: "{type_kind.f}mla.{sve_type}" }
+
+  - name: svmla_lane[_{type}]
+    attr: [*sve-unstable]
+    doc: Multiply-add, addend first
+    arguments: ["op1: {sve_type}", "op2: {sve_type}", "op3: {sve_type}"]
+    return_type: "{sve_type}"
+    static_defs: ["const IMM_INDEX: i32"]
+    constraints: [{ variable: IMM_INDEX, vec_max_elems_type: "{type}" }]
+    types: [f32, f64]
+    assert_instr: [[fmla, "IMM_INDEX = 0"]]
+    compose:
+      - LLVMLink:
+          name: "fmla.lane.{sve_type}"
+          arguments:
+            - "op1: {sve_type}"
+            - "op2: {sve_type}"
+            - "op3: {sve_type}"
+            - "IMM_INDEX: i32"
+      - FnCall: ["{llvm_link}", [$op1, $op2, $op3, $IMM_INDEX]]
+
+  - name: svmls[{_n}_{type}]{_mxz}
+    attr: [*sve-unstable]
+    doc: Multiply-subtract, minuend first
+    arguments:
+      - "pg: {predicate}"
+      - "op1: {sve_type}"
+      - "op2: {sve_type}"
+      - "op3: {sve_type}"
+    return_type: "{sve_type}"
+    types: [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64]
+    zeroing_method: { select: op1 }
+    n_variant_op: op3
+    assert_instr: ["{type_kind.f}mls"]
+    compose:
+      - LLVMLink: { name: "{type_kind.f}mls.{sve_type}" }
+
+  - name: svmls_lane[_{type}]
+    attr: [*sve-unstable]
+    doc: Multiply-subtract, minuend first
+    arguments: ["op1: {sve_type}", "op2: {sve_type}", "op3: {sve_type}"]
+    return_type: "{sve_type}"
+    static_defs: ["const IMM_INDEX: i32"]
+    constraints: [{ variable: IMM_INDEX, vec_max_elems_type: "{type}" }]
+    types: [f32, f64]
+    assert_instr: [[fmls, "IMM_INDEX = 0"]]
+    compose:
+      - LLVMLink:
+          name: "fmls.lane.{sve_type}"
+          arguments:
+            - "op1: {sve_type}"
+            - "op2: {sve_type}"
+            - "op3: {sve_type}"
+            - "IMM_INDEX: i32"
+      - FnCall: ["{llvm_link}", [$op1, $op2, $op3, $IMM_INDEX]]
+
+  - name: svmsb[{_n}_{type}]{_mxz}
+    attr: [*sve-unstable]
+    doc: Multiply-subtract, multiplicand first
+    arguments:
+      - "pg: {predicate}"
+      - "op1: {sve_type}"
+      - "op2: {sve_type}"
+      - "op3: {sve_type}"
+    return_type: "{sve_type}"
+    types: [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64]
+    zeroing_method: { select: op1 }
+    n_variant_op: op3
+    assert_instr: ["{type_kind.f}msb"]
+    compose:
+      - LLVMLink: { name: "{type_kind.f}msb.{sve_type}" }
+
+  - name: svnmad[{_n}_{type}]{_mxz}
+    attr: [*sve-unstable]
+    doc: Negated multiply-add, multiplicand first
+    arguments:
+      - "pg: {predicate}"
+      - "op1: {sve_type}"
+      - "op2: {sve_type}"
+      - "op3: {sve_type}"
+    return_type: "{sve_type}"
+    types: [f32, f64]
+    zeroing_method: { select: op1 }
+    n_variant_op: op3
+    assert_instr: [fnmad]
+    compose:
+      - LLVMLink: { name: "fnmad.{sve_type}" }
+
+  - name: svnmla[{_n}_{type}]{_mxz}
+    attr: [*sve-unstable]
+    doc: Negated multiply-add, addend first
+    arguments:
+      - "pg: {predicate}"
+      - "op1: {sve_type}"
+      - "op2: {sve_type}"
+      - "op3: {sve_type}"
+    return_type: "{sve_type}"
+    types: [f32, f64]
+    zeroing_method: { select: op1 }
+    n_variant_op: op3
+    assert_instr: [fnmla]
+    compose:
+      - LLVMLink: { name: "fnmla.{sve_type}" }
+
+  - name: svnmls[{_n}_{type}]{_mxz}
+    attr: [*sve-unstable]
+    doc: Negated multiply-subtract, minuend first
+    arguments:
+      - "pg: {predicate}"
+      - "op1: {sve_type}"
+      - "op2: {sve_type}"
+      - "op3: {sve_type}"
+    return_type: "{sve_type}"
+    types: [f32, f64]
+    zeroing_method: { select: op1 }
+    n_variant_op: op3
+    assert_instr: [fnmls]
+    compose:
+      - LLVMLink: { name: "fnmls.{sve_type}" }
+
+  - name: svnmsb[{_n}_{type}]{_mxz}
+    attr: [*sve-unstable]
+    doc: Negated multiply-subtract, multiplicand first
+    arguments:
+      - "pg: {predicate}"
+      - "op1: {sve_type}"
+      - "op2: {sve_type}"
+      - "op3: {sve_type}"
+    return_type: "{sve_type}"
+    types: [f32, f64]
+    zeroing_method: { select: op1 }
+    n_variant_op: op3
+    assert_instr: [fnmsb]
+    compose:
+      - LLVMLink: { name: "fnmsb.{sve_type}" }
+
+  - name: svneg[_{type}]{_mxz}
+    attr: [*sve-unstable]
+    doc: Negate
+    arguments: ["inactive: {sve_type}", "pg: {predicate}", "op: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [f32, f64, i8, i16, i32, i64]
+    assert_instr: ["{type_kind.f}neg"]
+    zeroing_method: { drop: inactive }
+    compose:
+      - LLVMLink: { name: "{type_kind.f}neg.{sve_type}" }
+
+  - name: svqadd[{_n}_{type}]
+    attr: [*sve-unstable]
+    doc: Saturating add
+    arguments: ["op1: {sve_type}", "op2: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [i8, i16, i32, i64, u8, u16, u32, u64]
+    assert_instr: ["{type_kind.su}qadd"]
+    n_variant_op: op2
+    compose:
+      - LLVMLink: { name: "{type_kind.su}qadd.x.{sve_type}" }
+
+  - name: svadr{size_literal[2]}[_{type[0]}base]_[{type[1]}]{index_or_offset}
+    attr: [*sve-unstable]
+    substitutions:
+      index_or_offset: { match_size: "{type[2]}", default: index, byte: offset }
+      indices_or_offsets:
+        { match_size: "{type[2]}", default: indices, byte: offsets }
+    doc: Compute vector addresses for {size[2]}-bit data
+    arguments: ["bases: {sve_type[0]}", "{indices_or_offsets}: {sve_type[1]}"]
+    return_type: "{sve_type[0]}"
+    types:
+      - [u32, [i32, u32], [i8, i16, i32, i64]]
+      - [u64, [i64, u64], [i8, i16, i32, i64]]
+    assert_instr: [adr]
+    compose:
+      - LLVMLink: { name: "adr{size_literal[2]}.{sve_type[0]}" }
+
+  - name: svdot[{_n}_{type[0]}]
+    attr: [*sve-unstable]
+    doc: Dot product
+    arguments:
+      ["op1: {sve_type[0]}", "op2: {sve_type[1]}", "op3: {sve_type[1]}"]
+    return_type: "{sve_type[0]}"
+    types:
+      - [i32, i8]
+      - [i64, i16]
+      - [u32, u8]
+      - [u64, u16]
+    assert_instr: ["{type_kind[0].su}dot"]
+    n_variant_op: op3
+    compose:
+      - LLVMLink: { name: "{type_kind[0].su}dot.{sve_type[0]}" }
+
+  - name: svdot_lane[_{type[0]}]
+    attr: [*sve-unstable]
+    doc: Dot product
+    arguments:
+      ["op1: {sve_type[0]}", "op2: {sve_type[1]}", "op3: {sve_type[1]}"]
+    return_type: "{sve_type[0]}"
+    static_defs: ["const IMM_INDEX: i32"]
+    constraints: [{ variable: IMM_INDEX, vec_max_elems_type: "{type[0]}" }]
+    types:
+      - [i32, i8]
+      - [i64, i16]
+      - [u32, u8]
+      - [u64, u16]
+    assert_instr: [["{type_kind[0].su}dot", "IMM_INDEX = 0"]]
+    compose:
+      - LLVMLink:
+          name: "{type_kind[0].su}dot.lane.{sve_type[0]}"
+          arguments:
+            - "op1: {sve_type[0]}"
+            - "op2: {sve_type[1]}"
+            - "op3: {sve_type[1]}"
+            - "imm_index: i32"
+      - FnCall: ["{llvm_link}", [$op1, $op2, $op3, $IMM_INDEX]]
+
+  - name: svusdot[{_n}_{type[0]}]
+    attr: [*sve-unstable]
+    doc: Dot product (unsigned × signed)
+    target_features: [i8mm]
+    arguments:
+      ["op1: {sve_type[0]}", "op2: {sve_type[1]}", "op3: {sve_type[2]}"]
+    return_type: "{sve_type[0]}"
+    types: [[i32, u8, i8]]
+    assert_instr: [usdot]
+    n_variant_op: op3
+    compose:
+      - LLVMLink: { name: "usdot.{sve_type[0]}" }
+
+  - name: svusdot_lane[_{type[0]}]
+    attr: [*sve-unstable]
+    doc: Dot product (unsigned × signed)
+    target_features: [i8mm]
+    arguments:
+      ["op1: {sve_type[0]}", "op2: {sve_type[1]}", "op3: {sve_type[2]}"]
+    return_type: "{sve_type[0]}"
+    static_defs: ["const IMM_INDEX: i32"]
+    constraints: [{ variable: IMM_INDEX, vec_max_elems_type: "{type[0]}" }]
+    types: [[i32, u8, i8]]
+    assert_instr: [[usdot, "IMM_INDEX = 0"]]
+    compose:
+      - LLVMLink:
+          name: "usdot.lane.{sve_type[0]}"
+          arguments:
+            - "op1: {sve_type[0]}"
+            - "op2: {sve_type[1]}"
+            - "op3: {sve_type[2]}"
+            - "imm_index: i32"
+      - FnCall: ["{llvm_link}", [$op1, $op2, $op3, $IMM_INDEX]]
+
+  - name: svsudot[{_n}_{type[0]}]
+    attr: [*sve-unstable]
+    doc: Dot product (signed × unsigned)
+    target_features: [i8mm]
+    arguments:
+      ["op1: {sve_type[0]}", "op2: {sve_type[1]}", "op3: {sve_type[2]}"]
+    return_type: "{sve_type[0]}"
+    types: [[i32, i8, u8]]
+    assert_instr: [usdot]
+    n_variant_op: op3
+    compose:
+      - FnCall: ["svusdot_{type[0]}", [$op1, $op3, $op2]]
+
+  - name: svsudot_lane[_{type[0]}]
+    attr: [*sve-unstable]
+    doc: Dot product (signed × unsigned)
+    target_features: [i8mm]
+    arguments:
+      ["op1: {sve_type[0]}", "op2: {sve_type[1]}", "op3: {sve_type[2]}"]
+    return_type: "{sve_type[0]}"
+    static_defs: ["const IMM_INDEX: i32"]
+    constraints: [{ variable: IMM_INDEX, vec_max_elems_type: "{type[0]}" }]
+    types: [[i32, i8, u8]]
+    assert_instr: [[sudot, "IMM_INDEX = 0"]]
+    compose:
+      - LLVMLink:
+          name: "sudot.lane.{sve_type[0]}"
+          arguments:
+            - "op1: {sve_type[0]}"
+            - "op2: {sve_type[1]}"
+            - "op3: {sve_type[2]}"
+            - "imm_index: i32"
+      - FnCall: ["{llvm_link}", [$op1, $op2, $op3, $IMM_INDEX]]
+
+  - name: svdiv[{_n}_{type}]{_mxz}
+    attr: [*sve-unstable]
+    doc: Divide
+    arguments: ["pg: {predicate}", "op1: {sve_type}", "op2: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [f32, f64, i32, i64, u32, u64]
+    assert_instr: ["{type_kind.fsu}div"]
+    zeroing_method: { select: op1 }
+    n_variant_op: op2
+    compose:
+      - LLVMLink: { name: "{type_kind.fsu}div.{sve_type}" }
+
+  - name: svdivr[{_n}_{type}]{_mxz}
+    attr: [*sve-unstable]
+    doc: Divide reversed
+    arguments: ["pg: {predicate}", "op1: {sve_type}", "op2: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [f32, f64, i32, i64, u32, u64]
+    assert_instr: ["{type_kind.fsu}divr"]
+    zeroing_method: { select: op1 }
+    n_variant_op: op2
+    compose:
+      - LLVMLink: { name: "{type_kind.fsu}divr.{sve_type}" }
+
+  - name: svexpa[_{type[0]}]
+    attr: [*sve-unstable]
+    doc: Floating-point exponential accelerator
+    arguments: ["op: {sve_type[1]}"]
+    return_type: "{sve_type[0]}"
+    types: [[f32, u32], [f64, u64]]
+    assert_instr: [fexpa]
+    compose:
+      - LLVMLink: { name: "fexpa.x.{sve_type[0]} " }
+
+  - name: svscale[{_n}_{type[0]}]{_mxz}
+    attr: [*sve-unstable]
+    doc: Adjust exponent
+    arguments:
+      ["pg: {predicate[0]}", "op1: {sve_type[0]}", "op2: {sve_type[1]}"]
+    return_type: "{sve_type[0]}"
+    types: [[f32, i32], [f64, i64]]
+    assert_instr: [fscale]
+    zeroing_method: { select: op1 }
+    n_variant_op: op2
+    compose:
+      - LLVMLink: { name: "fscale.{sve_type[0]}" }
+
+  - name: svmmla[_{type}]
+    attr: [*sve-unstable]
+    doc: Matrix multiply-accumulate
+    target_features: [f32mm]
+    arguments: ["op1: {sve_type}", "op2: {sve_type}", "op3: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [f32]
+    assert_instr: [fmmla]
+    compose:
+      - LLVMLink: { name: "fmmla.{sve_type}" }
+
+  - name: svmmla[_{type}]
+    attr: [*sve-unstable]
+    doc: Matrix multiply-accumulate
+    target_features: [f64mm]
+    arguments: ["op1: {sve_type}", "op2: {sve_type}", "op3: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [f64]
+    assert_instr: [fmmla]
+    compose:
+      - LLVMLink: { name: "fmmla.{sve_type}" }
+
+  - name: svmmla[_{type[0]}]
+    attr: [*sve-unstable]
+    doc: Matrix multiply-accumulate
+    target_features: [i8mm]
+    arguments:
+      ["op1: {sve_type[0]}", "op2: {sve_type[1]}", "op3: {sve_type[1]}"]
+    return_type: "{sve_type[0]}"
+    types: [[i32, i8], [u32, u8]]
+    assert_instr: ["{type_kind[0].su}mmla"]
+    compose:
+      - LLVMLink: { name: "{type_kind[0].su}mmla.{sve_type[0]}" }
+
+  - name: svusmmla[_{type[0]}]
+    attr: [*sve-unstable]
+    doc: Matrix multiply-accumulate (unsigned × signed)
+    target_features: [i8mm]
+    arguments:
+      ["op1: {sve_type[0]}", "op2: {sve_type[1]}", "op3: {sve_type[2]}"]
+    return_type: "{sve_type[0]}"
+    types: [[i32, u8, i8]]
+    assert_instr: [usmmla]
+    compose:
+      - LLVMLink: { name: "usmmla.{sve_type[0]}" }
+
+  - name: svmin[{_n}_{type}]{_mxz}
+    attr: [*sve-unstable]
+    doc: Minimum
+    arguments: ["pg: {predicate}", "op1: {sve_type}", "op2: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64]
+    zeroing_method: { select: op1 }
+    assert_instr: ["{type_kind.fsu}min"]
+    n_variant_op: op2
+    compose:
+      - LLVMLink: { name: "{type_kind.fsu}min.{sve_type}" }
+
+  - name: svminnm[{_n}_{type}]{_mxz}
+    attr: [*sve-unstable]
+    doc: Minimum number
+    arguments: ["pg: {predicate}", "op1: {sve_type}", "op2: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [f32, f64]
+    zeroing_method: { select: op1 }
+    assert_instr: [fminnm]
+    n_variant_op: op2
+    compose:
+      - LLVMLink: { name: "fminnm.{sve_type}" }
diff --git a/crates/stdarch-gen-arm/spec/sve2/aarch64.spec.yml b/crates/stdarch-gen-arm/spec/sve2/aarch64.spec.yml
new file mode 100644
index 0000000000..269d7ff0ea
--- /dev/null
+++ b/crates/stdarch-gen-arm/spec/sve2/aarch64.spec.yml
@@ -0,0 +1,3206 @@
+arch_cfgs:
+  - arch_name: aarch64
+    target_feature: [sve, sve2]
+    llvm_prefix: llvm.aarch64.sve
+
+auto_llvm_sign_conversion: true
+generate_load_store_tests: true
+
+# `#[unstable(feature = "stdarch_aarch64_sve", issue = "145052")]`
+sve-unstable: &sve-unstable
+  FnCall: [unstable, ['feature = "stdarch_aarch64_sve"', 'issue= "145052"']]
+
+# `#[cfg_attr(all(test, not(target_env = "msvc"))]`
+msvc-disabled: &msvc-disabled
+  FnCall: [all, [test, {FnCall: [not, ['target_env = "msvc"']]}]]
+
+intrinsics:
+  - name: svbext[{_n}_{type}]
+    attr: [*sve-unstable]
+    target_features: [sve2-bitperm]
+    doc: Gather lower bits from positions selected by bitmask
+    arguments: ["op1: {sve_type}", "op2: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [u8, u16, u32, u64]
+    assert_instr: [bext]
+    n_variant_op: op2
+    compose:
+      - LLVMLink: { name: "bext.x.{sve_type}" }
+
+  - name: svbgrp[{_n}_{type}]
+    attr: [*sve-unstable]
+    target_features: [sve2-bitperm]
+    doc: Group bits to right or left as selected by bitmask
+    arguments: ["op1: {sve_type}", "op2: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [u8, u16, u32, u64]
+    assert_instr: [bgrp]
+    n_variant_op: op2
+    compose:
+      - LLVMLink: { name: "bgrp.x.{sve_type}" }
+
+  - name: svbdep[{_n}_{type}]
+    attr: [*sve-unstable]
+    target_features: [sve2-bitperm]
+    doc: Scatter lower bits into positions selected by bitmask
+    arguments: ["op1: {sve_type}", "op2: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [u8, u16, u32, u64]
+    assert_instr: [bdep]
+    n_variant_op: op2
+    compose:
+      - LLVMLink: { name: "bdep.x.{sve_type}" }
+
+  - name: svhistcnt[_{type[0]}]_z
+    attr: [*sve-unstable]
+    doc: Count matching elements
+    arguments:
+      ["pg: {predicate[0]}", "op1: {sve_type[0]}", "op2: {sve_type[0]}"]
+    return_type: "{sve_type[1]}"
+    types: [[i32, u32], [i64, u64], [u32, u32], [u64, u64]]
+    assert_instr: [histcnt]
+    compose:
+      - LLVMLink: { name: "histcnt.{sve_type[0]}" }
+
+  - name: svhistseg[_{type[0]}]
+    attr: [*sve-unstable]
+    doc: Count matching elements in 128-bit segments
+    arguments: ["op1: {sve_type[0]}", "op2: {sve_type[0]}"]
+    return_type: "{sve_type[1]}"
+    types: [[i8, u8], [u8, u8]]
+    assert_instr: [histseg]
+    compose:
+      - LLVMLink: { name: "histseg.{sve_type[0]}" }
+
+  - name: svmatch[_{type}]
+    attr: [*sve-unstable]
+    doc: Detect any matching elements
+    arguments: ["pg: {predicate}", "op1: {sve_type}", "op2: {sve_type}"]
+    return_type: "{predicate}"
+    types: [i8, i16, u8, u16]
+    assert_instr: [match]
+    compose:
+      - LLVMLink: { name: "match.{sve_type}" }
+
+  - name: svnmatch[_{type}]
+    attr: [*sve-unstable]
+    doc: Detect no matching elements
+    arguments: ["pg: {predicate}", "op1: {sve_type}", "op2: {sve_type}"]
+    return_type: "{predicate}"
+    types: [i8, i16, u8, u16]
+    assert_instr: [nmatch]
+    compose:
+      - LLVMLink: { name: "nmatch.{sve_type}" }
+
+  - name: svhadd[{_n}_{type}]{_mxz}
+    attr: [*sve-unstable]
+    doc: Halving add
+    arguments: ["pg: {predicate}", "op1: {sve_type}", "op2: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [i8, i16, i32, i64, u8, u16, u32, u64]
+    zeroing_method: { select: op1 }
+    assert_instr: ["{type_kind.su}hadd"]
+    n_variant_op: op2
+    compose:
+      - LLVMLink: { name: "{type_kind.su}hadd.{sve_type}" }
+
+  - name: svrhadd[{_n}_{type}]{_mxz}
+    attr: [*sve-unstable]
+    doc: Rounding halving add
+    arguments: ["pg: {predicate}", "op1: {sve_type}", "op2: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [i8, i16, i32, i64, u8, u16, u32, u64]
+    zeroing_method: { select: op1 }
+    assert_instr: ["{type_kind.su}rhadd"]
+    n_variant_op: op2
+    compose:
+      - LLVMLink: { name: "{type_kind.su}rhadd.{sve_type}" }
+
+  - name: svaddhnb[{_n}_{type[0]}]
+    attr: [*sve-unstable]
+    doc: Add narrow high part (bottom)
+    types:
+      [[i16, i8], [i32, i16], [i64, i32], [u16, u8], [u32, u16], [u64, u32]]
+    return_type: "{sve_type[1]}"
+    arguments: ["op1: {sve_type[0]}", "op2: {sve_type[0]}"]
+    assert_instr: [addhnb]
+    n_variant_op: op2
+    compose:
+      - LLVMLink: { name: "addhnb.{sve_type[0]}" }
+
+  - name: svaddhnt[{_n}_{type[0]}]
+    attr: [*sve-unstable]
+    doc: Add narrow high part (top)
+    types:
+      [[i16, i8], [i32, i16], [i64, i32], [u16, u8], [u32, u16], [u64, u32]]
+    return_type: "{sve_type[1]}"
+    arguments:
+      ["even: {sve_type[1]}", "op1: {sve_type[0]}", "op2: {sve_type[0]}"]
+    assert_instr: [addhnt]
+    n_variant_op: op2
+    compose:
+      - LLVMLink: { name: "addhnt.{sve_type[0]}" }
+
+  - name: svraddhnb[{_n}_{type[0]}]
+    attr: [*sve-unstable]
+    doc: Rounding add narrow high part (bottom)
+    types:
+      [[i16, i8], [i32, i16], [i64, i32], [u16, u8], [u32, u16], [u64, u32]]
+    return_type: "{sve_type[1]}"
+    arguments: ["op1: {sve_type[0]}", "op2: {sve_type[0]}"]
+    assert_instr: [raddhnb]
+    n_variant_op: op2
+    compose:
+      - LLVMLink: { name: "raddhnb.{sve_type[0]}" }
+
+  - name: svraddhnt[{_n}_{type[0]}]
+    attr: [*sve-unstable]
+    doc: Rounding add narrow high part (top)
+    types:
+      [[i16, i8], [i32, i16], [i64, i32], [u16, u8], [u32, u16], [u64, u32]]
+    return_type: "{sve_type[1]}"
+    arguments:
+      ["even: {sve_type[1]}", "op1: {sve_type[0]}", "op2: {sve_type[0]}"]
+    assert_instr: [raddhnt]
+    n_variant_op: op2
+    compose:
+      - LLVMLink: { name: "raddhnt.{sve_type[0]}" }
+
+  - name: svcadd[_{type}]
+    attr: [*sve-unstable]
+    doc: Complex add with rotate
+    arguments: ["op1: {sve_type}", "op2: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [i8, i16, i32, i64, u8, u16, u32, u64]
+    static_defs: ["const IMM_ROTATION: i32"]
+    constraints: [{ variable: IMM_ROTATION, any_values: [90, 270] }]
+    assert_instr: [[cadd, "IMM_ROTATION = 90"]]
+    compose:
+      - LLVMLink:
+          name: cadd.x.{sve_type}
+          arguments: ["op1: {sve_type}", "op2: {sve_type}", "imm_rotation: i32"]
+      - FnCall: ["{llvm_link}", [$op1, $op2, $IMM_ROTATION]]
+
+  - name: svcdot[_{type[0]}]
+    attr: [*sve-unstable]
+    doc: Complex dot product
+    arguments:
+      ["op1: {sve_type[0]}", "op2: {sve_type[1]}", "op3: {sve_type[1]}"]
+    return_type: "{sve_type[0]}"
+    types: [[i32, i8], [i64, i16]]
+    static_defs: ["const IMM_ROTATION: i32"]
+    constraints: [{ variable: IMM_ROTATION, any_values: [0, 90, 180, 270] }]
+    assert_instr: [[cdot, "IMM_ROTATION = 90"]]
+    compose:
+      - LLVMLink:
+          name: cdot.{sve_type[0]}
+          arguments:
+            - "op1: {sve_type[0]}"
+            - "op2: {sve_type[1]}"
+            - "op3: {sve_type[1]}"
+            - "imm_rotation: i32"
+      - FnCall: ["{llvm_link}", [$op1, $op2, $op3, $IMM_ROTATION]]
+
+  - name: svcdot_lane[_{type[0]}]
+    attr: [*sve-unstable]
+    doc: Complex dot product
+    arguments:
+      ["op1: {sve_type[0]}", "op2: {sve_type[1]}", "op3: {sve_type[1]}"]
+    return_type: "{sve_type[0]}"
+    types: [[i32, i8], [i64, i16]]
+    static_defs: ["const IMM_INDEX: i32", "const IMM_ROTATION: i32"]
+    constraints:
+      - { variable: IMM_INDEX, vec_max_elems_type: "{type[0]}" }
+      - { variable: IMM_ROTATION, any_values: [0, 90, 180, 270] }
+    assert_instr: [[cdot, "IMM_INDEX = 0, IMM_ROTATION = 90"]]
+    compose:
+      - LLVMLink:
+          name: cdot.lane.{sve_type[0]}
+          arguments:
+            - "op1: {sve_type[0]}"
+            - "op2: {sve_type[1]}"
+            - "op3: {sve_type[1]}"
+            - "imm_index: i32"
+            - "imm_rotation: i32"
+      - FnCall: ["{llvm_link}", [$op1, $op2, $op3, $IMM_INDEX, $IMM_ROTATION]]
+
+  - name: svcmla[_{type}]
+    attr: [*sve-unstable]
+    doc: Complex multiply-add with rotate
+    arguments: ["op1: {sve_type}", "op2: {sve_type}", "op3: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [i8, i16, i32, i64, u8, u16, u32, u64]
+    static_defs: ["const IMM_ROTATION: i32"]
+    constraints: [{ variable: IMM_ROTATION, any_values: [0, 90, 180, 270] }]
+    assert_instr: [[cmla, "IMM_ROTATION = 90"]]
+    compose:
+      - LLVMLink:
+          name: cmla.x.{sve_type}
+          arguments:
+            - "op1: {sve_type}"
+            - "op2: {sve_type}"
+            - "op3: {sve_type}"
+            - "imm_rotation: i32"
+      - FnCall: ["{llvm_link}", [$op1, $op2, $op3, $IMM_ROTATION]]
+
+  - name: svcmla_lane[_{type}]
+    attr: [*sve-unstable]
+    doc: Complex multiply-add with rotate
+    arguments: ["op1: {sve_type}", "op2: {sve_type}", "op3: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [i16, i32, u16, u32]
+    static_defs: ["const IMM_INDEX: i32", "const IMM_ROTATION: i32"]
+    constraints:
+      - variable: IMM_INDEX
+        range: { match_size: "{type}", default: [0, 1], halfword: [0, 3] }
+      - { variable: IMM_ROTATION, any_values: [0, 90, 180, 270] }
+    assert_instr: [[cmla, "IMM_INDEX = 0, IMM_ROTATION = 90"]]
+    compose:
+      - LLVMLink:
+          name: cmla.lane.x.{sve_type}
+          arguments:
+            - "op1: {sve_type}"
+            - "op2: {sve_type}"
+            - "op3: {sve_type}"
+            - "imm_index: i32"
+            - "imm_rotation: i32"
+      - FnCall: ["{llvm_link}", [$op1, $op2, $op3, $IMM_INDEX, $IMM_ROTATION]]
+
+  - name: svqrdcmlah[_{type}]
+    attr: [*sve-unstable]
+    doc: Saturating rounding doubling complex multiply-add high with rotate
+    arguments: ["op1: {sve_type}", "op2: {sve_type}", "op3: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [i8, i16, i32, i64]
+    static_defs: ["const IMM_ROTATION: i32"]
+    constraints: [{ variable: IMM_ROTATION, any_values: [0, 90, 180, 270] }]
+    assert_instr: [[sqrdcmlah, "IMM_ROTATION = 90"]]
+    compose:
+      - LLVMLink:
+          name: sqrdcmlah.x.{sve_type}
+          arguments:
+            - "op1: {sve_type}"
+            - "op2: {sve_type}"
+            - "op3: {sve_type}"
+            - "imm_rotation: i32"
+      - FnCall: ["{llvm_link}", [$op1, $op2, $op3, $IMM_ROTATION]]
+
+  - name: svqrdcmlah_lane[_{type}]
+    attr: [*sve-unstable]
+    doc: Saturating rounding doubling complex multiply-add high with rotate
+    arguments: ["op1: {sve_type}", "op2: {sve_type}", "op3: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [i16, i32]
+    static_defs: ["const IMM_INDEX: i32", "const IMM_ROTATION: i32"]
+    constraints:
+      - variable: IMM_INDEX
+        range: { match_size: "{type}", default: [0, 1], halfword: [0, 3] }
+      - { variable: IMM_ROTATION, any_values: [0, 90, 180, 270] }
+    assert_instr: [[sqrdcmlah, "IMM_INDEX = 0, IMM_ROTATION = 90"]]
+    compose:
+      - LLVMLink:
+          name: sqrdcmlah.lane.x.{sve_type}
+          arguments:
+            - "op1: {sve_type}"
+            - "op2: {sve_type}"
+            - "op3: {sve_type}"
+            - "imm_index: i32"
+            - "imm_rotation: i32"
+      - FnCall: ["{llvm_link}", [$op1, $op2, $op3, $IMM_INDEX, $IMM_ROTATION]]
+
+  - name: svqcadd[_{type}]
+    attr: [*sve-unstable]
+    doc: Saturating complex add with rotate
+    arguments: ["op1: {sve_type}", "op2: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [i8, i16, i32, i64]
+    static_defs: ["const IMM_ROTATION: i32"]
+    constraints: [{ variable: "IMM_ROTATION", any_values: [90, 270] }]
+    assert_instr: [[sqcadd, "IMM_ROTATION = 90"]]
+    compose:
+      - LLVMLink:
+          name: "sqcadd.x.{sve_type}"
+          arguments: ["op1: {sve_type}", "op2: {sve_type}", "imm_rotation: i32"]
+      - FnCall: ["{llvm_link}", ["$op1", "$op2", "$IMM_ROTATION"]]
+
+  - name: svsublb[{_n}_{type[0]}]
+    attr: [*sve-unstable]
+    doc: Subtract long (bottom)
+    arguments: ["op1: {sve_type[1]}", "op2: {sve_type[1]}"]
+    return_type: "{sve_type[0]}"
+    types:
+      - [i16, i8]
+      - [i32, i16]
+      - [i64, i32]
+      - [u16, u8]
+      - [u32, u16]
+      - [u64, u32]
+    assert_instr: ["{type_kind[0].su}sublb"]
+    n_variant_op: op2
+    compose:
+      - LLVMLink:
+          name: "{type_kind[0].su}sublb.{sve_type[0]}"
+
+  - name: svsublbt[{_n}_{type[0]}]
+    attr: [*sve-unstable]
+    doc: Subtract long (bottom - top)
+    arguments: ["op1: {sve_type[1]}", "op2: {sve_type[1]}"]
+    return_type: "{sve_type[0]}"
+    types:
+      - [i16, i8]
+      - [i32, i16]
+      - [i64, i32]
+    assert_instr: [ssublbt]
+    n_variant_op: op2
+    compose:
+      - LLVMLink:
+          name: "ssublbt.{sve_type[0]}"
+
+  - name: svsublt[{_n}_{type[0]}]
+    attr: [*sve-unstable]
+    doc: Subtract long (top)
+    arguments: ["op1: {sve_type[1]}", "op2: {sve_type[1]}"]
+    return_type: "{sve_type[0]}"
+    types:
+      - [i16, i8]
+      - [i32, i16]
+      - [i64, i32]
+      - [u16, u8]
+      - [u32, u16]
+      - [u64, u32]
+    assert_instr: ["{type_kind[0].su}sublt"]
+    n_variant_op: op2
+    compose:
+      - LLVMLink: { name: "{type_kind[0].su}sublt.{sve_type[0]}" }
+
+  - name: svsubltb[{_n}_{type[0]}]
+    attr: [*sve-unstable]
+    doc: Subtract long (top - bottom)
+    arguments: ["op1: {sve_type[1]}", "op2: {sve_type[1]}"]
+    return_type: "{sve_type[0]}"
+    types:
+      - [i16, i8]
+      - [i32, i16]
+      - [i64, i32]
+    assert_instr: [ssubltb]
+    n_variant_op: op2
+    compose:
+      - LLVMLink:
+          name: "ssubltb.{sve_type[0]}"
+
+  - name: svsubwb[{_n}_{type[0]}]
+    attr: [*sve-unstable]
+    doc: Subtract wide (bottom)
+    arguments: ["op1: {sve_type[0]}", "op2: {sve_type[1]}"]
+    return_type: "{sve_type[0]}"
+    types:
+      - [i16, i8]
+      - [i32, i16]
+      - [i64, i32]
+      - [u16, u8]
+      - [u32, u16]
+      - [u64, u32]
+    assert_instr: ["{type_kind[0].su}subwb"]
+    n_variant_op: op2
+    compose:
+      - LLVMLink: { name: "{type_kind[0].su}subwb.{sve_type[0]}" }
+
+  - name: svsubwt[{_n}_{type[0]}]
+    attr: [*sve-unstable]
+    doc: Subtract wide (top)
+    arguments: ["op1: {sve_type[0]}", "op2: {sve_type[1]}"]
+    return_type: "{sve_type[0]}"
+    types:
+      - [i16, i8]
+      - [i32, i16]
+      - [i64, i32]
+      - [u16, u8]
+      - [u32, u16]
+      - [u64, u32]
+    assert_instr: ["{type_kind[0].su}subwt"]
+    n_variant_op: op2
+    compose:
+      - LLVMLink: { name: "{type_kind[0].su}subwt.{sve_type[0]}" }
+
+  - name: svrsubhnb[{_n}_{type[0]}]
+    attr: [*sve-unstable]
+    doc: Rounding subtract narrow high part (bottom)
+    arguments: ["op1: {sve_type[0]}", "op2: {sve_type[0]}"]
+    return_type: "{sve_type[1]}"
+    types:
+      - [i16, i8]
+      - [i32, i16]
+      - [i64, i32]
+      - [u16, u8]
+      - [u32, u16]
+      - [u64, u32]
+    assert_instr: [rsubhnb]
+    n_variant_op: op2
+    compose:
+      - LLVMLink: { name: "rsubhnb.{sve_type[0]}" }
+
+  - name: svrsubhnt[{_n}_{type[0]}]
+    attr: [*sve-unstable]
+    doc: Rounding subtract narrow high part (top)
+    arguments:
+      ["even: {sve_type[1]}", "op1: {sve_type[0]}", "op2: {sve_type[0]}"]
+    return_type: "{sve_type[1]}"
+    types:
+      - [i16, i8]
+      - [i32, i16]
+      - [i64, i32]
+      - [u16, u8]
+      - [u32, u16]
+      - [u64, u32]
+    assert_instr: [rsubhnt]
+    n_variant_op: op2
+    compose:
+      - LLVMLink: { name: "rsubhnt.{sve_type[0]}" }
+
+  - name: svsubhnb[{_n}_{type[0]}]
+    attr: [*sve-unstable]
+    doc: Subtract narrow high part (bottom)
+    arguments: ["op1: {sve_type[0]}", "op2: {sve_type[0]}"]
+    return_type: "{sve_type[1]}"
+    types:
+      - [i16, i8]
+      - [i32, i16]
+      - [i64, i32]
+      - [u16, u8]
+      - [u32, u16]
+      - [u64, u32]
+    assert_instr: [subhnb]
+    n_variant_op: op2
+    compose:
+      - LLVMLink: { name: "subhnb.{sve_type[0]}" }
+
+  - name: svsubhnt[{_n}_{type[0]}]
+    attr: [*sve-unstable]
+    doc: Subtract narrow high part (top)
+    arguments:
+      ["even: {sve_type[1]}", "op1: {sve_type[0]}", "op2: {sve_type[0]}"]
+    return_type: "{sve_type[1]}"
+    types:
+      - [i16, i8]
+      - [i32, i16]
+      - [i64, i32]
+      - [u16, u8]
+      - [u32, u16]
+      - [u64, u32]
+    assert_instr: [subhnt]
+    n_variant_op: op2
+    compose:
+      - LLVMLink: { name: "subhnt.{sve_type[0]}" }
+
+  - name: svsbclb[{_n}_{type}]
+    attr: [*sve-unstable]
+    doc: Subtract with borrow long (bottom)
+    arguments: ["op1: {sve_type}", "op2: {sve_type}", "op3: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [u32, u64]
+    assert_instr: [sbclb]
+    n_variant_op: op3
+    compose:
+      - LLVMLink: { name: "sbclb.{sve_type}" }
+
+  - name: svsbclt[{_n}_{type}]
+    attr: [*sve-unstable]
+    doc: Subtract with borrow long (top)
+    arguments: ["op1: {sve_type}", "op2: {sve_type}", "op3: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [u32, u64]
+    assert_instr: [sbclt]
+    n_variant_op: op3
+    compose:
+      - LLVMLink: { name: "sbclt.{sve_type}" }
+
+  - name: svqsub[{_n}_{type}]{_mxz}
+    attr: [*sve-unstable]
+    doc: Saturating subtract
+    arguments: ["pg: {predicate}", "op1: {sve_type}", "op2: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [i8, i16, i32, i64, u8, u16, u32, u64]
+    zeroing_method: { select: op1 }
+    assert_instr: ["{type_kind.su}qsub"]
+    n_variant_op: op2
+    compose:
+      - LLVMLink: { name: "{type_kind.su}qsub.{sve_type}" }
+
+  - name: svqsubr[{_n}_{type}]{_mxz}
+    attr: [*sve-unstable]
+    doc: Saturating subtract reversed
+    arguments: ["pg: {predicate}", "op1: {sve_type}", "op2: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [i8, i16, i32, i64, u8, u16, u32, u64]
+    zeroing_method: { select: op1 }
+    assert_instr: ["{type_kind.su}qsubr"]
+    n_variant_op: op2
+    compose:
+      - LLVMLink: { name: "{type_kind.su}qsubr.{sve_type}" }
+
+  - name: svhsub[{_n}_{type}]{_mxz}
+    attr: [*sve-unstable]
+    doc: Halving subtract
+    arguments: ["pg: {predicate}", "op1: {sve_type}", "op2: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [i8, i16, i32, i64, u8, u16, u32, u64]
+    assert_instr: ["{type_kind.su}hsub"]
+    zeroing_method: { select: op1 }
+    n_variant_op: op2
+    compose:
+      - LLVMLink: { name: "{type_kind.su}hsub.{sve_type}" }
+
+  - name: svhsubr[{_n}_{type}]{_mxz}
+    attr: [*sve-unstable]
+    doc: Halving subtract reversed
+    arguments: ["pg: {predicate}", "op1: {sve_type}", "op2: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [i8, i16, i32, i64, u8, u16, u32, u64]
+    assert_instr: ["{type_kind.su}hsub"]
+    zeroing_method: { select: op1 }
+    n_variant_op: op2
+    compose:
+      - LLVMLink: { name: "{type_kind.su}hsubr.{sve_type}" }
+
+  - name: svwhilege_{sve_type[1]}[_{type[0]}]
+    attr: [*sve-unstable]
+    doc: While decrementing scalar is greater than or equal to
+    arguments: ["op1: {type[0]}", "op2: {type[0]}"]
+    return_type: "{sve_type[1]}"
+    types: [[[i32, i64, u32, u64], [b8, b16, b32, b64]]]
+    assert_instr: [{ default: whilege, unsigned: whilehs }]
+    compose:
+      - MatchKind:
+          - "{type[0]}"
+          - default: { LLVMLink: { name: "whilege.{sve_type[1]}.{type[0]}" } }
+            unsigned: { LLVMLink: { name: "whilehs.{sve_type[1]}.{type[0]}" } }
+
+  - name: svwhilegt_{sve_type[1]}[_{type[0]}]
+    attr: [*sve-unstable]
+    doc: While decrementing scalar is greater than
+    arguments: ["op1: {type[0]}", "op2: {type[0]}"]
+    return_type: "{sve_type[1]}"
+    types: [[[i32, i64, u32, u64], [b8, b16, b32, b64]]]
+    assert_instr: [{ default: whilegt, unsigned: whilehi }]
+    compose:
+      - MatchKind:
+          - "{type[0]}"
+          - default: { LLVMLink: { name: "whilegt.{sve_type[1]}.{type[0]}" } }
+            unsigned: { LLVMLink: { name: "whilehi.{sve_type[1]}.{type[0]}" } }
+
+  - name: svwhilerw_{size}ptr
+    attr: [*sve-unstable]
+    safety:
+      unsafe: []
+    visibility: private
+    static_defs: [T]
+    substitutions:
+      size_alt:
+        match_size: "{type}"
+        byte: b
+        halfword: h
+        default: s
+        doubleword: d
+    arguments: ["op1: *T", "op2: *T"]
+    return_type: "{predicate}"
+    types: [i8, i16, i32, i64]
+    assert_instr: []
+    compose:
+      - Let: [op1, CastAs: [$op1, "*const crate::ffi::c_void"]]
+      - Let: [op2, CastAs: [$op2, "*const crate::ffi::c_void"]]
+      - LLVMLink:
+          name: "whilerw.{size_alt}.{predicate}.p0"
+          arguments: ["op1: *crate::ffi::c_void", "op2: *crate::ffi::c_void"]
+
+  - name: svwhilerw[_{type}]
+    attr: [*sve-unstable]
+    doc: While free of read-after-write conflicts
+    # TODO: This might be safe even with unrelated pointers, but the LLVM builtin's guarantees don't
+    # seem to be documented, so we conservatively keep this unsafe for now.
+    safety:
+      unsafe:
+      - custom: "[`pointer::byte_offset_from`](pointer#method.byte_offset_from) safety constraints
+        must be met for at least the base pointers, `op1` and `op2`."
+    arguments: ["op1: *{type}", "op2: *{type}"]
+    return_type: "svbool_t"
+    types: [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64]
+    assert_instr: [whilerw]
+    compose:
+      - FnCall:
+          - "svwhilerw_{size}ptr"
+          - - $op1
+            - $op2
+          - - Type: "{type}"
+
+  - name: svwhilewr_{size}ptr
+    attr: [*sve-unstable]
+    safety:
+      unsafe: []
+    visibility: private
+    static_defs: [T]
+    substitutions:
+      size_alt:
+        match_size: "{type}"
+        byte: b
+        halfword: h
+        default: s
+        doubleword: d
+    arguments: ["op1: *T", "op2: *T"]
+    return_type: "{predicate}"
+    types: [i8, i16, i32, i64]
+    assert_instr: []
+    compose:
+      - Let: [op1, CastAs: [$op1, "*const crate::ffi::c_void"]]
+      - Let: [op2, CastAs: [$op2, "*const crate::ffi::c_void"]]
+      - LLVMLink:
+          name: "whilewr.{size_alt}.{predicate}.p0"
+          arguments: ["op1: *crate::ffi::c_void", "op2: *crate::ffi::c_void"]
+
+  - name: svwhilewr[_{type}]
+    attr: [*sve-unstable]
+    doc: While free of write-after-read conflicts
+    # TODO: This might be safe even with unrelated pointers, but the LLVM builtin's guarantees don't
+    # seem to be documented, so we conservatively keep this unsafe for now.
+    safety:
+      unsafe:
+      - custom: "[`pointer::byte_offset_from`](pointer#method.byte_offset_from) safety constraints
+        must be met for at least the base pointers, `op1` and `op2`."
+    arguments: ["op1: *{type}", "op2: *{type}"]
+    return_type: "svbool_t"
+    types: [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64]
+    assert_instr: [whilewr]
+    compose:
+      - FnCall:
+          - "svwhilewr_{size}ptr"
+          - - $op1
+            - $op2
+          - - Type: "{type}"
+
+  - name: svtbl2[_{type[0]}]
+    attr: [*sve-unstable]
+    doc: Table lookup in two-vector table
+    arguments: ["data: {sve_type_x2[0]}", "indices: {sve_type[1]}"]
+    return_type: "{sve_type[0]}"
+    types:
+      - [f32, u32]
+      - [f64, u64]
+      - [i8, u8]
+      - [i16, u16]
+      - [i32, u32]
+      - [i64, u64]
+      - [u8, u8]
+      - [u16, u16]
+      - [u32, u32]
+      - [u64, u64]
+    assert_instr: [tbl]
+    compose:
+      - LLVMLink:
+          name: "tbl2.{sve_type[0]}"
+          arguments:
+            - "data0: {sve_type[0]}"
+            - "data1: {sve_type[0]}"
+            - "indices: {sve_type[1]}"
+      - FnCall:
+          - "{llvm_link}"
+          - - FnCall: ["svget2_{type[0]}", ["$data"], [0]]
+            - FnCall: ["svget2_{type[0]}", ["$data"], [1]]
+            - $indices
+
+  - name: svtbx[_{type[0]}]
+    attr: [*sve-unstable]
+    doc: Table lookup in single-vector table (merging)
+    arguments:
+      - "fallback: {sve_type[0]}"
+      - "data: {sve_type[0]}"
+      - "indices: {sve_type[1]}"
+    return_type: "{sve_type[0]}"
+    types:
+      - [f32, u32]
+      - [f64, u64]
+      - [i8, u8]
+      - [i16, u16]
+      - [i32, u32]
+      - [i64, u64]
+      - [u8, u8]
+      - [u16, u16]
+      - [u32, u32]
+      - [u64, u64]
+    assert_instr: [tbx]
+    compose:
+      - LLVMLink: { name: "tbx.{sve_type[0]}" }
+
+  - name: svcvtlt_{type[0]}[_{type[1]}]_m
+    attr: [*sve-unstable]
+    doc: Up convert long (top)
+    arguments:
+      ["inactive: {sve_type[0]}", "pg: {max_predicate}", "op: {sve_type[1]}"]
+    return_type: "{sve_type[0]}"
+    types: [[f64, f32]]
+    assert_instr: [fcvtlt]
+    compose:
+      - LLVMLink: { name: "fcvtlt.{type[0]}{type[1]}" }
+
+  - name: svcvtlt_{type[0]}[_{type[1]}]_x
+    attr: [*sve-unstable]
+    doc: Up convert long (top)
+    arguments: ["pg: svbool_t", "op: {sve_type[1]}"]
+    return_type: "{sve_type[0]}"
+    types: [[f64, f32]]
+    assert_instr: [fcvtlt]
+    compose:
+      - FnCall:
+        - "svcvtlt_{type[0]}_{type[1]}_m"
+        - - FnCall: ["crate::intrinsics::transmute_unchecked", [$op], [], true]
+          - $pg
+          - $op
+
+  - name: svcvtnt_{type[0]}[_{type[1]}]{_mx}
+    attr: [*sve-unstable]
+    doc: Down convert and narrow (top)
+    arguments:
+      ["even: {sve_type[0]}", "pg: {max_predicate}", "op: {sve_type[1]}"]
+    return_type: "{sve_type[0]}"
+    types: [[f32, f64]]
+    assert_instr: [fcvtnt]
+    compose:
+      - LLVMLink: { name: "fcvtnt.{type[0]}{type[1]}" }
+
+  - name: svcvtx_{type[0]}[_{type[1]}]{_mxz}
+    attr: [*sve-unstable]
+    doc: Down convert, rounding to odd
+    arguments:
+      ["inactive: {sve_type[0]}", "pg: {max_predicate}", "op: {sve_type[1]}"]
+    return_type: "{sve_type[0]}"
+    types: [[f32, f64]]
+    zeroing_method: { drop: inactive }
+    assert_instr: [fcvtx]
+    compose:
+      - LLVMLink: { name: "fcvtx.{type[0]}{type[1]}" }
+
+  - name: svcvtxnt_{type[0]}[_{type[1]}]{_mx}
+    attr: [*sve-unstable]
+    doc: Down convert, rounding to odd (top)
+    arguments:
+      ["even: {sve_type[0]}", "pg: {max_predicate}", "op: {sve_type[1]}"]
+    return_type: "{sve_type[0]}"
+    types: [[f32, f64]]
+    assert_instr: [fcvtxnt]
+    compose:
+      - LLVMLink: { name: "fcvtxnt.{type[0]}{type[1]}" }
+
+  - name: svldnt1_gather_[{type[0]}]index[_{type[1]}]
+    attr: [*sve-unstable]
+    doc: Unextended load, non-temporal
+    safety:
+      unsafe:
+        - pointer_offset: predicated
+        - dereference: predicated
+        - non_temporal
+    arguments:
+      ["pg: {predicate[0]}", "base: *{type[1]}", "indices: {sve_type[0]}"]
+    return_type: "{sve_type[1]}"
+    types:
+      - [[i64, u64], [f64, i64, u64]]
+    assert_instr: ["ldnt1{size_literal[0]}"]
+    test: { load: 1 }
+    compose:
+      - LLVMLink: { name: "ldnt1.gather.index.{sve_type[1]}" }
+
+  - name: svldnt1_gather_[{type[0]}]offset[_{type[1]}]
+    attr: [*sve-unstable]
+    doc: Unextended load, non-temporal
+    safety:
+      unsafe:
+        - pointer_offset: predicated
+        - dereference: predicated
+        - non_temporal
+    arguments:
+      ["pg: {predicate[0]}", "base: *{type[1]}", "offsets: {sve_type[0]}"]
+    return_type: "{sve_type[1]}"
+    types:
+      - [u32, [f32, i32, u32]]
+      - [[i64, u64], [f64, i64, u64]]
+    assert_instr: ["ldnt1{size_literal[0]}"]
+    test: { load: 1 }
+    compose:
+      - MatchSize:
+          - "{type[0]}"
+          - default:
+              LLVMLink: { name: "ldnt1.gather.uxtw.{sve_type[1]}" }
+            doubleword:
+              LLVMLink: { name: "ldnt1.gather.{sve_type[1]}" }
+
+  - name: svldnt1_gather[_{type[0]}base]_offset_{type[1]}
+    attr: [*sve-unstable]
+    doc: Unextended load, non-temporal
+    safety:
+      unsafe:
+        - pointer_offset: predicated
+        - dereference: predicated
+        - no_provenance: bases
+        - non_temporal
+    arguments: ["pg: {predicate[0]}", "bases: {sve_type[0]}", "offset: i64"]
+    return_type: "{sve_type[1]}"
+    types:
+      - [u32, [f32, i32, u32]]
+      - [u64, [f64, i64, u64]]
+    assert_instr: ["ldnt1{size_literal[0]}"]
+    test: { load: 1 }
+    compose:
+      - LLVMLink:
+          name: "ldnt1.gather.scalar.offset.{sve_type[1]}.{sve_type[0]}"
+
+  - name: svldnt1_gather[_{type[0]}base]_{type[1]}
+    attr: [*sve-unstable]
+    doc: Unextended load, non-temporal
+    safety:
+      unsafe:
+        - pointer_offset: predicated
+        - dereference: predicated
+        - no_provenance: bases
+        - non_temporal
+    arguments: ["pg: {predicate[0]}", "bases: {sve_type[0]}"]
+    return_type: "{sve_type[1]}"
+    types:
+      - [u32, [f32, i32, u32]]
+      - [u64, [f64, i64, u64]]
+    assert_instr: ["ldnt1{size_literal[0]}"]
+    test: { load: 1 }
+    compose:
+      - FnCall:
+          - "svldnt1_gather_{type[0]}base_offset_{type[1]}"
+          - - $pg
+            - $bases
+            - 0
+
+  - name: svldnt1_gather[_{type[0]}base]_index_{type[1]}
+    attr: [*sve-unstable]
+    doc: Unextended load, non-temporal
+    safety:
+      unsafe:
+        - pointer_offset: predicated
+        - dereference: predicated
+        - no_provenance: bases
+        - non_temporal
+    arguments: ["pg: {predicate[0]}", "bases: {sve_type[0]}", "index: i64"]
+    return_type: "{sve_type[1]}"
+    types:
+      - [u32, [f32, i32, u32]]
+      - [u64, [f64, i64, u64]]
+    assert_instr: ["ldnt1{size_literal[0]}"]
+    test: { load: 1 }
+    compose:
+      - FnCall:
+          - "svldnt1_gather_{type[0]}base_offset_{type[1]}"
+          - - $pg
+            - $bases
+            - MethodCall: [$index, unchecked_shl, ["{size_in_bytes_log2[0]}"]]
+
+  - name: svldnt1s{size_literal[2]}_gather_[{type[0]}]index_{type[1]}
+    attr: [*sve-unstable]
+    doc: Load {size[2]}-bit data and sign-extend, non-temporal
+    safety:
+      unsafe:
+        - pointer_offset: predicated
+        - dereference: predicated
+        - non_temporal
+    arguments:
+      ["pg: {predicate[0]}", "base: *{type[2]}", "indices: {sve_type[0]}"]
+    return_type: "{sve_type[1]}"
+    types:
+      - [[i64, u64], [i64, u64], [i16, i32]]
+    assert_instr: ["ldnt1s{size_literal[2]}"]
+    test: { load: 2 }
+    compose:
+      - LLVMLink:
+          name: "ldnt1.gather.index.{sve_type[1] as {type[2]}}"
+          return_type: "{sve_type[1] as {type[2]}}"
+      - FnCall:
+          - "crate::intrinsics::simd::simd_cast"
+          - - FnCall: ["{llvm_link}", [$pg, $base, $indices]]
+
+  - name: svldnt1u{size_literal[2]}_gather_[{type[0]}]index_{type[1]}
+    attr: [*sve-unstable]
+    doc: Load {size[2]}-bit data and zero-extend, non-temporal
+    safety:
+      unsafe:
+        - pointer_offset: predicated
+        - dereference: predicated
+        - non_temporal
+    arguments:
+      ["pg: {predicate[0]}", "base: *{type[2]}", "indices: {sve_type[0]}"]
+    return_type: "{sve_type[1]}"
+    types:
+      - [[i64, u64], [u64, i64], [u16, u32]]
+    assert_instr: ["ldnt1{size_literal[2]}"]
+    test: { load: 2 }
+    compose:
+      - LLVMLink:
+          name: "ldnt1.gather.index.{sve_type[1] as {type[2]}}"
+          return_type: "{sve_type[1] as {type[2]}}"
+      - FnCall:
+          - "crate::intrinsics::simd::simd_cast"
+          - - FnCall: ["{llvm_link}", [$pg, $base, $indices]]
+          - - Type: "{sve_type[1] as {type[2]}}"
+            - _
+
+  - name: svldnt1s{size_literal[2]}_gather_[{type[0]}]offset_{type[1]}
+    attr: [*sve-unstable]
+    doc: Load {size[2]}-bit data and sign-extend, non-temporal
+    safety:
+      unsafe:
+        - pointer_offset: predicated
+        - dereference: predicated
+        - non_temporal
+    arguments:
+      ["pg: {predicate[0]}", "base: *{type[2]}", "offsets: {sve_type[0]}"]
+    return_type: "{sve_type[1]}"
+    types:
+      - [u32, [i32, u32], [i8, i16]]
+      - [[i64, u64], [i64, u64], [i8, i16, i32]]
+    assert_instr: ["ldnt1s{size_literal[2]}"]
+    test: { load: 2 }
+    compose:
+      - MatchSize:
+          - "{type[0]}"
+          - default:
+              LLVMLink:
+                name: "ldnt1.gather.uxtw.{sve_type[1] as {type[2]}}"
+                return_type: "{sve_type[1] as {type[2]}}"
+            doubleword:
+              LLVMLink:
+                name: "ldnt1.gather.{sve_type[1] as {type[2]}}"
+                return_type: "{sve_type[1] as {type[2]}}"
+      - FnCall:
+          - "crate::intrinsics::simd::simd_cast"
+          - - FnCall: ["{llvm_link}", [$pg, $base, $offsets]]
+
+  - name: svldnt1u{size_literal[2]}_gather_[{type[0]}]offset_{type[1]}
+    attr: [*sve-unstable]
+    doc: Load {size[2]}-bit data and zero-extend, non-temporal
+    safety:
+      unsafe:
+        - pointer_offset: predicated
+        - dereference: predicated
+        - non_temporal
+    arguments:
+      ["pg: {predicate[0]}", "base: *{type[2]}", "offsets: {sve_type[0]}"]
+    return_type: "{sve_type[1]}"
+    types:
+      - [u32, [u32, i32], [u8, u16]]
+      - [[i64, u64], [u64, i64], [u8, u16, u32]]
+    assert_instr: ["ldnt1{size_literal[2]}"]
+    test: { load: 2 }
+    compose:
+      - MatchSize:
+          - "{type[0]}"
+          - default:
+              LLVMLink:
+                name: "ldnt1.gather.uxtw.{sve_type[1] as {type[2]}}"
+                return_type: "{sve_type[1] as {type[2]}}"
+            doubleword:
+              LLVMLink:
+                name: "ldnt1.gather.{sve_type[1] as {type[2]}}"
+                return_type: "{sve_type[1] as {type[2]}}"
+      - FnCall:
+          - "crate::intrinsics::simd::simd_cast"
+          - - FnCall: ["{llvm_link}", [$pg, $base, $offsets]]
+          - - Type: "{sve_type[1] as {type[2]}}"
+            - _
+
+  - name: svldnt1s{size_literal[2]}_gather[_{type[0]}base]_offset_{type[1]}
+    attr: [*sve-unstable]
+    doc: Load {size[2]}-bit data and sign-extend, non-temporal
+    safety:
+      unsafe:
+        - pointer_offset: predicated
+        - dereference: predicated
+        - no_provenance: bases
+        - non_temporal
+    arguments: ["pg: {predicate[0]}", "bases: {sve_type[0]}", "offset: i64"]
+    return_type: "{sve_type[1]}"
+    types:
+      - [u32, [i32, u32], [i8, i16]]
+      - [u64, [i64, u64], [i8, i16, i32]]
+    assert_instr: ["ldnt1s{size_literal[2]}"]
+    test: { load: 2 }
+    compose:
+      - LLVMLink:
+          name: "ldnt1.gather.scalar.offset.{sve_type[1] as {type[2]}}.{sve_type[0]}"
+          return_type: "{sve_type[1] as {type[2]}}"
+      - FnCall:
+          - "crate::intrinsics::simd::simd_cast"
+          - - FnCall: ["{llvm_link}", [$pg, $bases, $offset]]
+
+  - name: svldnt1u{size_literal[2]}_gather[_{type[0]}base]_offset_{type[1]}
+    attr: [*sve-unstable]
+    doc: Load {size[2]}-bit data and zero-extend, non-temporal
+    safety:
+      unsafe:
+        - pointer_offset: predicated
+        - dereference: predicated
+        - no_provenance: bases
+        - non_temporal
+    arguments: ["pg: {predicate[0]}", "bases: {sve_type[0]}", "offset: i64"]
+    return_type: "{sve_type[1]}"
+    types:
+      - [u32, [u32, i32], [u8, u16]]
+      - [u64, [u64, i64], [u8, u16, u32]]
+    assert_instr: ["ldnt1{size_literal[2]}"]
+    test: { load: 2 }
+    compose:
+      - LLVMLink:
+          name: "ldnt1.gather.scalar.offset.{sve_type[1] as {type[2]}}.{sve_type[0]}"
+          return_type: "{sve_type[1] as {type[2]}}"
+      - FnCall:
+          - "crate::intrinsics::simd::simd_cast"
+          - - FnCall: ["{llvm_link}", [$pg, $bases, $offset]]
+          - - Type: "{sve_type[1] as {type[2]}}"
+            - _
+
+  - name: svldnt1s{size_literal[2]}_gather[_{type[0]}base]_{type[1]}
+    attr: [*sve-unstable]
+    doc: Load {size[2]}-bit data and sign-extend, non-temporal
+    safety:
+      unsafe:
+        - pointer_offset: predicated
+        - dereference: predicated
+        - no_provenance: bases
+        - non_temporal
+    arguments: ["pg: {predicate[0]}", "bases: {sve_type[0]}"]
+    return_type: "{sve_type[1]}"
+    types:
+      - [u32, [i32, u32], [i8, i16]]
+      - [u64, [i64, u64], [i8, i16, i32]]
+    assert_instr: ["ldnt1s{size_literal[2]}"]
+    test: { load: 2 }
+    compose:
+      - FnCall:
+          - "svldnt1s{size_literal[2]}_gather_{type[0]}base_offset_{type[1]}"
+          - - $pg
+            - $bases
+            - 0
+
+  - name: svldnt1u{size_literal[2]}_gather[_{type[0]}base]_{type[1]}
+    attr: [*sve-unstable]
+    doc: Load {size[2]}-bit data and zero-extend, non-temporal
+    safety:
+      unsafe:
+        - pointer_offset: predicated
+        - dereference: predicated
+        - no_provenance: bases
+        - non_temporal
+    arguments: ["pg: {predicate[0]}", "bases: {sve_type[0]}"]
+    return_type: "{sve_type[1]}"
+    types:
+      - [u32, [i32, u32], [u8, u16]]
+      - [u64, [i64, u64], [u8, u16, u32]]
+    assert_instr: ["ldnt1{size_literal[2]}"]
+    test: { load: 2 }
+    compose:
+      - FnCall:
+          - "svldnt1u{size_literal[2]}_gather_{type[0]}base_offset_{type[1]}"
+          - - $pg
+            - $bases
+            - 0
+
+  - name: svldnt1s{size_literal[2]}_gather[_{type[0]}base]_index_{type[1]}
+    attr: [*sve-unstable]
+    doc: Load {size[2]}-bit data and sign-extend, non-temporal
+    safety:
+      unsafe:
+        - pointer_offset: predicated
+        - dereference: predicated
+        - no_provenance: bases
+        - non_temporal
+    arguments: ["pg: {predicate[0]}", "bases: {sve_type[0]}", "index: i64"]
+    return_type: "{sve_type[1]}"
+    types:
+      - [u32, [i32, u32], u16]
+      - [u64, [i64, u64], [u16, u32]]
+    assert_instr: ["ldnt1s{size_literal[2]}"]
+    test: { load: 2 }
+    compose:
+      - FnCall:
+          - "svldnt1s{size_literal[2]}_gather_{type[0]}base_offset_{type[1]}"
+          - - $pg
+            - $bases
+            - MethodCall: [$index, unchecked_shl, ["{size_in_bytes_log2[2]}"]]
+
+  - name: svldnt1u{size_literal[2]}_gather[_{type[0]}base]_index_{type[1]}
+    attr: [*sve-unstable]
+    doc: Load {size[2]}-bit data and zero-extend, non-temporal
+    safety:
+      unsafe:
+        - pointer_offset: predicated
+        - dereference: predicated
+        - no_provenance: bases
+        - non_temporal
+    arguments: ["pg: {predicate[0]}", "bases: {sve_type[0]}", "index: i64"]
+    return_type: "{sve_type[1]}"
+    types:
+      - [u32, [i32, u32], u16]
+      - [u64, [i64, u64], [u16, u32]]
+    assert_instr: ["ldnt1{size_literal[2]}"]
+    test: { load: 2 }
+    compose:
+      - FnCall:
+          - "svldnt1u{size_literal[2]}_gather_{type[0]}base_offset_{type[1]}"
+          - - $pg
+            - $bases
+            - MethodCall: [$index, unchecked_shl, ["{size_in_bytes_log2[2]}"]]
+
+  - name: svstnt1_scatter_[{type[0]}]index[_{type[1]}]
+    attr: [*sve-unstable]
+    doc: Non-truncating store, non-temporal
+    safety:
+      unsafe:
+        - pointer_offset: predicated
+        - dereference: predicated
+        - non_temporal
+    arguments:
+      - "pg: {predicate[0]}"
+      - "base: *mut {type[1]}"
+      - "indices: {sve_type[0]}"
+      - "data: {sve_type[1]}"
+    types:
+      - [[i64, u64], [f64, i64, u64]]
+    assert_instr: ["stnt1{size_literal[0]}"]
+    test: { store: 1 }
+    compose:
+      - LLVMLink:
+          name: "stnt1.scatter.index.{sve_type[1]}"
+          arguments:
+            - "data: {sve_type[1]}"
+            - "pg: {predicate[0]}"
+            - "base: *mut {type[1]}"
+            - "indices: {sve_type[0]}"
+      - FnCall: ["{llvm_link}", [$data, $pg, $base, $indices]]
+
+  - name: svstnt1_scatter_[{type[0]}]offset[_{type[1]}]
+    attr: [*sve-unstable]
+    doc: Non-truncating store, non-temporal
+    safety:
+      unsafe:
+        - pointer_offset: predicated
+        - dereference: predicated
+        - non_temporal
+    arguments:
+      - "pg: {predicate[0]}"
+      - "base: *mut {type[1]}"
+      - "offsets: {sve_type[0]}"
+      - "data: {sve_type[1]}"
+    types:
+      - [u32, [f32, i32, u32]]
+      - [[i64, u64], [f64, i64, u64]]
+    assert_instr: ["stnt1{size_literal[0]}"]
+    test: { store: 1 }
+    compose:
+      - MatchSize:
+          - "{type[0]}"
+          - default:
+              LLVMLink:
+                name: "stnt1.scatter.uxtw.{sve_type[1]}"
+                arguments:
+                  - "data: {sve_type[1]}"
+                  - "pg: {predicate[0]}"
+                  - "base: *mut {type[1]}"
+                  - "offsets: {sve_type[0]}"
+            doubleword:
+              LLVMLink:
+                name: "stnt1.scatter.{sve_type[1]}"
+                arguments:
+                  - "data: {sve_type[1]}"
+                  - "pg: {predicate[0]}"
+                  - "base: *mut {type[1]}"
+                  - "offsets: {sve_type[0]}"
+      - FnCall: ["{llvm_link}", [$data, $pg, $base, $offsets]]
+
+  - name: svstnt1_scatter[_{type[0]}base]_offset[_{type[1]}]
+    attr: [*sve-unstable]
+    doc: Non-truncating store, non-temporal
+    safety:
+      unsafe:
+        - pointer_offset: predicated
+        - dereference: predicated
+        - no_provenance: bases
+        - non_temporal
+    arguments:
+      - "pg: {predicate[0]}"
+      - "bases: {sve_type[0]}"
+      - "offset: i64"
+      - "data: {sve_type[1]}"
+    types:
+      - [u32, [f32, i32, u32]]
+      - [u64, [f64, i64, u64]]
+    assert_instr: ["stnt1{size_literal[0]}"]
+    test: { store: 1 }
+    compose:
+      - LLVMLink:
+          arguments:
+            - "data: {sve_type[1]}"
+            - "pg: {predicate[0]}"
+            - "bases: {sve_type[0]}"
+            - "offset: i64"
+          name: "stnt1.scatter.scalar.offset.{sve_type[1]}.{sve_type[0]}"
+      - FnCall: ["{llvm_link}", [$data, $pg, $bases, $offset]]
+
+  - name: svstnt1_scatter[_{type[0]}base_{type[1]}]
+    attr: [*sve-unstable]
+    doc: Non-truncating store, non-temporal
+    safety:
+      unsafe:
+        - pointer_offset: predicated
+        - dereference: predicated
+        - no_provenance: bases
+        - non_temporal
+    arguments:
+      ["pg: {predicate[0]}", "bases: {sve_type[0]}", "data: {sve_type[1]}"]
+    types:
+      - [u32, [f32, i32, u32]]
+      - [u64, [f64, i64, u64]]
+    assert_instr: ["stnt1{size_literal[0]}"]
+    test: { store: 1 }
+    compose:
+      - FnCall:
+          - "svstnt1_scatter_{type[0]}base_offset_{type[1]}"
+          - - $pg
+            - $bases
+            - 0
+            - $data
+
+  - name: svstnt1_scatter[_{type[0]}base]_index[_{type[1]}]
+    attr: [*sve-unstable]
+    doc: Non-truncating store, non-temporal
+    safety:
+      unsafe:
+        - pointer_offset: predicated
+        - dereference: predicated
+        - no_provenance: bases
+        - non_temporal
+    arguments:
+      - "pg: {predicate[0]}"
+      - "bases: {sve_type[0]}"
+      - "index: i64"
+      - "data: {sve_type[1]}"
+    types:
+      - [u32, [f32, i32, u32]]
+      - [u64, [f64, i64, u64]]
+    assert_instr: ["stnt1{size_literal[0]}"]
+    test: { store: 1 }
+    compose:
+      - FnCall:
+          - "svstnt1_scatter_{type[0]}base_offset_{type[1]}"
+          - - $pg
+            - $bases
+            - MethodCall: [$index, unchecked_shl, ["{size_in_bytes_log2[0]}"]]
+            - $data
+
+  - name: svstnt1{size_literal[2]}_scatter_[{type[0]}]index[_{type[1]}]
+    attr: [*sve-unstable]
+    doc: Truncate to {size[2]} bits and store, non-temporal
+    safety:
+      unsafe:
+        - pointer_offset: predicated
+        - dereference: predicated
+        - non_temporal
+    arguments:
+      - "pg: {predicate[0]}"
+      - "base: *mut {type[2]}"
+      - "indices: {sve_type[0]}"
+      - "data: {sve_type[1]}"
+    types:
+      - [[i64, u64], i64, [i16, i32]]
+      - [[i64, u64], u64, [u16, u32]]
+    assert_instr: ["stnt1{size_literal[2]}"]
+    test: { store: 2 }
+    compose:
+      - LLVMLink:
+          name: "stnt1.scatter.index.{sve_type[1] as {type[2]}}"
+          arguments:
+            - "data: {sve_type[1] as {type[2]}}"
+            - "pg: {predicate[0]}"
+            - "base: *mut {type[2]}"
+            - "indices: {sve_type[0]}"
+      - FnCall:
+          - "{llvm_link}"
+          - [FnCall: ["crate::intrinsics::simd::simd_cast", [$data]], $pg, $base, $indices]
+
+  - name: svstnt1{size_literal[2]}_scatter_[{type[0]}]offset[_{type[1]}]
+    attr: [*sve-unstable]
+    doc: Truncate to {size[2]} bits and store, non-temporal
+    safety:
+      unsafe:
+        - pointer_offset: predicated
+        - dereference: predicated
+        - non_temporal
+    arguments:
+      - "pg: {predicate[0]}"
+      - "base: *mut {type[2]}"
+      - "offsets: {sve_type[0]}"
+      - "data: {sve_type[1]}"
+    types:
+      - [u32, i32, [i8, i16]]
+      - [u32, u32, [u8, u16]]
+      - [[i64, u64], i64, [i8, i16, i32]]
+      - [[i64, u64], u64, [u8, u16, u32]]
+    assert_instr: ["stnt1{size_literal[2]}"]
+    test: { store: 2 }
+    compose:
+      - MatchSize:
+          - "{type[0]}"
+          - default:
+              LLVMLink:
+                name: "stnt1.scatter.uxtw.{sve_type[1] as {type[2]}}"
+                arguments:
+                  - "data: {sve_type[1] as {type[2]}}"
+                  - "pg: {predicate[0]}"
+                  - "base: *mut {type[2]}"
+                  - "offsets: {sve_type[0]}"
+            doubleword:
+              LLVMLink:
+                name: "stnt1.scatter.{sve_type[1] as {type[2]}}"
+                arguments:
+                  - "data: {sve_type[1] as {type[2]}}"
+                  - "pg: {predicate[0]}"
+                  - "base: *mut {type[2]}"
+                  - "offsets: {sve_type[0]}"
+      - FnCall:
+          - "{llvm_link}"
+          - [FnCall: ["crate::intrinsics::simd::simd_cast", [$data]], $pg, $base, $offsets]
+
+  - name: svstnt1{size_literal[2]}_scatter[_{type[0]}base]_offset[_{type[1]}]
+    attr: [*sve-unstable]
+    doc: Truncate to {size[2]} bits and store, non-temporal
+    safety:
+      unsafe:
+        - pointer_offset: predicated
+        - dereference: predicated
+        - no_provenance: bases
+        - non_temporal
+    arguments:
+      - "pg: {predicate[0]}"
+      - "bases: {sve_type[0]}"
+      - "offset: i64"
+      - "data: {sve_type[1]}"
+    types:
+      - [u32, [i32, u32], [i8, i16]]
+      - [u64, [i64, u64], [i8, i16, i32]]
+    assert_instr: ["stnt1{size_literal[2]}"]
+    test: { store: 2 }
+    compose:
+      - LLVMLink:
+          name: "stnt1.scatter.scalar.offset.{sve_type[1] as {type[2]}}.{sve_type[0]}"
+          arguments:
+            - "data: {sve_type[1] as {type[2]}}"
+            - "pg: {predicate[0]}"
+            - "bases: {sve_type[0]}"
+            - "offset: i64"
+      - FnCall:
+          - "{llvm_link}"
+          - [FnCall: ["crate::intrinsics::simd::simd_cast", [$data]], $pg, $bases, $offset]
+
+  - name: svstnt1{size_literal[2]}_scatter[_{type[0]}base_{type[1]}]
+    attr: [*sve-unstable]
+    doc: Truncate to {size[2]} bits and store, non-temporal
+    safety:
+      unsafe:
+        - pointer_offset: predicated
+        - dereference: predicated
+        - no_provenance: bases
+        - non_temporal
+    arguments:
+      ["pg: {predicate[0]}", "bases: {sve_type[0]}", "data: {sve_type[1]}"]
+    types:
+      - [u32, [i32, u32], [i8, i16]]
+      - [u64, [i64, u64], [i8, i16, i32]]
+    assert_instr: ["stnt1{size_literal[2]}"]
+    test: { store: 2 }
+    compose:
+      - FnCall:
+          - "svstnt1{size_literal[2]}_scatter_{type[0]}base_offset_{type[1]}"
+          - - $pg
+            - $bases
+            - 0
+            - $data
+
+  - name: svstnt1{size_literal[2]}_scatter[_{type[0]}base]_index[_{type[1]}]
+    attr: [*sve-unstable]
+    doc: Truncate to {size[2]} bits and store, non-temporal
+    safety:
+      unsafe:
+        - pointer_offset: predicated
+        - dereference: predicated
+        - no_provenance: bases
+        - non_temporal
+    arguments:
+      - "pg: {predicate[0]}"
+      - "bases: {sve_type[0]}"
+      - "index: i64"
+      - "data: {sve_type[1]}"
+    types:
+      - [u32, [i32, u32], i16]
+      - [u64, [i64, u64], [i16, i32]]
+    assert_instr: ["stnt1{size_literal[2]}"]
+    test: { store: 2 }
+    compose:
+      - FnCall:
+          - "svstnt1{size_literal[2]}_scatter_{type[0]}base_offset_{type[1]}"
+          - - $pg
+            - $bases
+            - MethodCall: [$index, unchecked_shl, ["{size_in_bytes_log2[2]}"]]
+            - $data
+
+  - name: svaba[{_n}_{type}]
+    attr: [*sve-unstable]
+    doc: Absolute difference and accumulate
+    arguments: ["op1: {sve_type}", "op2: {sve_type}", "op3: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [i8, i16, i32, i64, u8, u16, u32, u64]
+    assert_instr: ["{type_kind}aba"]
+    n_variant_op: op3
+    compose:
+      - LLVMLink: { name: "{type_kind}aba.{sve_type}" }
+
+  - name: svqabs[_{type}]{_mxz}
+    attr: [*sve-unstable]
+    doc: Saturating absolute value
+    arguments: ["inactive: {sve_type}", "pg: {predicate}", "op: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [i8, i16, i32, i64]
+    assert_instr: [sqabs]
+    zeroing_method: { drop: inactive }
+    compose:
+      - LLVMLink: { name: "sqabs.{sve_type}" }
+
+  - name: svabdlb[{_n}_{type[0]}]
+    attr: [*sve-unstable]
+    doc: Absolute difference long (bottom)
+    arguments: ["op1: {sve_type[1]}", "op2: {sve_type[1]}"]
+    return_type: "{sve_type[0]}"
+    types:
+      - [i16, i8]
+      - [i32, i16]
+      - [i64, i32]
+      - [u16, u8]
+      - [u32, u16]
+      - [u64, u32]
+    assert_instr: ["{type_kind[0].su}abdlb"]
+    n_variant_op: op2
+    compose:
+      - LLVMLink: { name: "{type_kind[0].su}abdlb.{sve_type[0]}" }
+
+  - name: svabdlt[{_n}_{type[0]}]
+    attr: [*sve-unstable]
+    doc: Absolute difference long (top)
+    arguments: ["op1: {sve_type[1]}", "op2: {sve_type[1]}"]
+    return_type: "{sve_type[0]}"
+    types:
+      - [i16, i8]
+      - [i32, i16]
+      - [i64, i32]
+      - [u16, u8]
+      - [u32, u16]
+      - [u64, u32]
+    assert_instr: ["{type_kind[0].su}abdlt"]
+    n_variant_op: op2
+    compose:
+      - LLVMLink: { name: "{type_kind[0].su}abdlt.{sve_type[0]}" }
+
+  - name: svabalb[{_n}_{type[0]}]
+    attr: [*sve-unstable]
+    doc: Absolute difference long (bottom)
+    arguments:
+      ["op1: {sve_type[0]}", "op2: {sve_type[1]}", "op3: {sve_type[1]}"]
+    return_type: "{sve_type[0]}"
+    types:
+      - [i16, i8]
+      - [i32, i16]
+      - [i64, i32]
+      - [u16, u8]
+      - [u32, u16]
+      - [u64, u32]
+    assert_instr: ["{type_kind[0].su}abalb"]
+    n_variant_op: op3
+    compose:
+      - LLVMLink: { name: "{type_kind[0].su}abalb.{sve_type[0]}" }
+
+  - name: svabalt[{_n}_{type[0]}]
+    attr: [*sve-unstable]
+    doc: Absolute difference long (top)
+    arguments:
+      ["op1: {sve_type[0]}", "op2: {sve_type[1]}", "op3: {sve_type[1]}"]
+    return_type: "{sve_type[0]}"
+    types:
+      - [i16, i8]
+      - [i32, i16]
+      - [i64, i32]
+      - [u16, u8]
+      - [u32, u16]
+      - [u64, u32]
+    assert_instr: ["{type_kind[0].su}abalt"]
+    n_variant_op: op3
+    compose:
+      - LLVMLink: { name: "{type_kind[0].su}abalt.{sve_type[0]}" }
+
+  - name: svbcax[{_n}_{type}]
+    attr: [*sve-unstable]
+    doc: Bitwise clear and exclusive OR
+    arguments: ["op1: {sve_type}", "op2: {sve_type}", "op3: {sve_type}"]
+    return_type: "{sve_type}"
+    assert_instr: [bcax]
+    types: [i8, i16, i32, i64, u8, u16, u32, u64]
+    n_variant_op: op3
+    compose:
+      - LLVMLink: { name: "bcax.{sve_type}" }
+
+  - name: sveorbt[{_n}_{type}]
+    attr: [*sve-unstable]
+    doc: Interleaving exclusive OR (bottom, top)
+    arguments: ["odd: {sve_type}", "op1: {sve_type}", "op2: {sve_type}"]
+    return_type: "{sve_type}"
+    assert_instr: [eorbt]
+    types: [i8, i16, i32, i64, u8, u16, u32, u64]
+    n_variant_op: op2
+    compose:
+      - LLVMLink: { name: "eorbt.{sve_type}" }
+
+  - name: sveortb[{_n}_{type}]
+    attr: [*sve-unstable]
+    doc: Interleaving exclusive OR (top, bottom)
+    arguments: ["even: {sve_type}", "op1: {sve_type}", "op2: {sve_type}"]
+    return_type: "{sve_type}"
+    assert_instr: [eortb]
+    types: [i8, i16, i32, i64, u8, u16, u32, u64]
+    n_variant_op: op2
+    compose:
+      - LLVMLink: { name: "eortb.{sve_type}" }
+
+  - name: sveor3[{_n}_{type}]
+    attr: [*sve-unstable]
+    doc: Bitwise exclusive OR of three vectors
+    arguments: ["op1: {sve_type}", "op2: {sve_type}", "op3: {sve_type}"]
+    return_type: "{sve_type}"
+    assert_instr: [eor3]
+    types: [i8, i16, i32, i64, u8, u16, u32, u64]
+    n_variant_op: op3
+    compose:
+      - LLVMLink: { name: "eor3.{sve_type}" }
+
+  - name: svbsl[{_n}_{type}]
+    attr: [*sve-unstable]
+    doc: Bitwise select
+    arguments: ["op1: {sve_type}", "op2: {sve_type}", "op3: {sve_type}"]
+    return_type: "{sve_type}"
+    assert_instr: [bsl]
+    types: [i8, i16, i32, i64, u8, u16, u32, u64]
+    n_variant_op: op3
+    compose:
+      - LLVMLink: { name: "bsl.{sve_type}" }
+
+  - name: svbsl1n[{_n}_{type}]
+    attr: [*sve-unstable]
+    doc: Bitwise select with first input inverted
+    arguments: ["op1: {sve_type}", "op2: {sve_type}", "op3: {sve_type}"]
+    return_type: "{sve_type}"
+    assert_instr: [bsl1n]
+    types: [i8, i16, i32, i64, u8, u16, u32, u64]
+    n_variant_op: op3
+    compose:
+      - LLVMLink: { name: "bsl1n.{sve_type}" }
+
+  - name: svbsl2n[{_n}_{type}]
+    attr: [*sve-unstable]
+    doc: Bitwise select with second input inverted
+    arguments: ["op1: {sve_type}", "op2: {sve_type}", "op3: {sve_type}"]
+    return_type: "{sve_type}"
+    assert_instr: [bsl2n]
+    types: [i8, i16, i32, i64, u8, u16, u32, u64]
+    n_variant_op: op3
+    compose:
+      - LLVMLink: { name: "bsl2n.{sve_type}" }
+
+  - name: svnbsl[{_n}_{type}]
+    attr: [*sve-unstable]
+    doc: Bitwise select
+    arguments: ["op1: {sve_type}", "op2: {sve_type}", "op3: {sve_type}"]
+    return_type: "{sve_type}"
+    assert_instr: [nbsl]
+    types: [i8, i16, i32, i64, u8, u16, u32, u64]
+    n_variant_op: op3
+    compose:
+      - LLVMLink: { name: "nbsl.{sve_type}" }
+
+  - name: svxar[_n_{type}]
+    attr: [*sve-unstable]
+    doc: Bitwise exclusive OR and rotate right
+    arguments: ["op1: {sve_type}", "op2: {sve_type}"]
+    return_type: "{sve_type}"
+    static_defs: ["const IMM3: i32"]
+    constraints: [{ variable: IMM3, range: ["1", "{size}"] }]
+    assert_instr: [[xar, "IMM3 = 1"]]
+    types: [i8, i16, i32, i64, u8, u16, u32, u64]
+    compose:
+      - LLVMLink:
+          name: "xar.{sve_type}"
+          arguments: ["op1: {sve_type}", "op2: {sve_type}", "imm3: i32"]
+      - FnCall: ["{llvm_link}", [$op1, $op2, $IMM3]]
+
+  - name: svrax1[_{type}]
+    attr: [*sve-unstable]
+    doc: Bitwise rotate left by 1 and exclusive OR
+    target_features: [sve2-sha3]
+    arguments: ["op1: {sve_type}", "op2: {sve_type}"]
+    return_type: "{sve_type}"
+    assert_instr: [rax1]
+    types: [i64, u64]
+    compose:
+      - LLVMLink: { name: "rax1" }
+
+  - name: svshllb[_n_{type[0]}]
+    attr: [*sve-unstable]
+    doc: Shift left long (bottom)
+    arguments: ["op1: {sve_type[1]}"]
+    return_type: "{sve_type[0]}"
+    static_defs: ["const IMM2: i32"]
+    constraints: [{ variable: IMM2, range: ["0", "{size_minus_one[1]}"] }]
+    types:
+      - [i16, i8]
+      - [i32, i16]
+      - [i64, i32]
+      - [u16, u8]
+      - [u32, u16]
+      - [u64, u32]
+    assert_instr: [["{type_kind[0].su}shllb", "IMM2 = 0"]]
+    compose:
+      - LLVMLink:
+          name: "{type_kind[0].su}shllb.{sve_type[0]}"
+          arguments: ["op1: {sve_type[1]}", "imm2: i32"]
+      - FnCall: ["{llvm_link}", [$op1, $IMM2]]
+
+  - name: svshllt[_n_{type[0]}]
+    attr: [*sve-unstable]
+    doc: Shift left long (top)
+    arguments: ["op1: {sve_type[1]}"]
+    return_type: "{sve_type[0]}"
+    static_defs: ["const IMM2: i32"]
+    constraints: [{ variable: IMM2, range: ["0", "{size_minus_one[1]}"] }]
+    types:
+      - [i16, i8]
+      - [i32, i16]
+      - [i64, i32]
+      - [u16, u8]
+      - [u32, u16]
+      - [u64, u32]
+    assert_instr: [["{type_kind[0].su}shllt", "IMM2 = 0"]]
+    compose:
+      - LLVMLink:
+          name: "{type_kind[0].su}shllt.{sve_type[0]}"
+          arguments: ["op1: {sve_type[1]}", "imm2: i32"]
+      - FnCall: ["{llvm_link}", [$op1, $IMM2]]
+
+  - name: svrshl[{_n}_{type[0]}]{_mxz}
+    attr: [*sve-unstable]
+    doc: Rounding shift left
+    arguments:
+      ["pg: {predicate[0]}", "op1: {sve_type[0]}", "op2: {sve_type[1]}"]
+    return_type: "{sve_type[0]}"
+    types:
+      - [[i8, u8], i8]
+      - [[i16, u16], i16]
+      - [[i32, u32], i32]
+      - [[i64, u64], i64]
+    assert_instr: ["{type_kind[0].su}rshl"]
+    zeroing_method: { select: op1 }
+    n_variant_op: op2
+    compose:
+      - LLVMLink: { name: "{type_kind[0].su}rshl.{sve_type[0]}" }
+
+  - name: svqrshl[{_n}_{type[0]}]{_mxz}
+    attr: [*sve-unstable]
+    doc: Saturating rounding shift left
+    arguments:
+      ["pg: {predicate[0]}", "op1: {sve_type[0]}", "op2: {sve_type[1]}"]
+    return_type: "{sve_type[0]}"
+    types:
+      - [[i8, u8], i8]
+      - [[i16, u16], i16]
+      - [[i32, u32], i32]
+      - [[i64, u64], i64]
+    assert_instr: ["{type_kind[0].su}qrshl"]
+    zeroing_method: { select: op1 }
+    n_variant_op: op2
+    compose:
+      - LLVMLink: { name: "{type_kind[0].su}qrshl.{sve_type[0]}" }
+
+  - name: svqshl[{_n}_{type[0]}]{_mxz}
+    attr: [*sve-unstable]
+    doc: Saturating shift left
+    arguments:
+      ["pg: {predicate[0]}", "op1: {sve_type[0]}", "op2: {sve_type[1]}"]
+    return_type: "{sve_type[0]}"
+    types:
+      - [[i8, u8], i8]
+      - [[i16, u16], i16]
+      - [[i32, u32], i32]
+      - [[i64, u64], i64]
+    assert_instr: ["{type_kind[0].su}qshl"]
+    zeroing_method: { select: op1 }
+    n_variant_op: op2
+    compose:
+      - LLVMLink: { name: "{type_kind[0].su}qshl.{sve_type[0]}" }
+
+  - name: svqshlu[_n_{type[0]}]{_mxz}
+    attr: [*sve-unstable]
+    doc: Saturating shift left unsigned
+    arguments: ["pg: {predicate[0]}", "op1: {sve_type[0]}"]
+    return_type: "{sve_type[1]}"
+    static_defs: ["const IMM2: i32"]
+    constraints: [{ variable: IMM2, range: ["0", "{size_minus_one[1]}"] }]
+    types:
+      - [i8, u8]
+      - [i16, u16]
+      - [i32, u32]
+      - [i64, u64]
+    assert_instr: [[sqshlu, "IMM2 = 0"]]
+    zeroing_method: { select: op1 }
+    compose:
+      - LLVMLink:
+          name: "sqshlu.{sve_type[0]}"
+          arguments: ["pg: {predicate[0]}", "op1: {sve_type[0]}", "imm2: i32"]
+      - FnCall: ["{llvm_link}", [$pg, $op1, $IMM2]]
+
+  - name: svsli[_n_{type}]
+    attr: [*sve-unstable]
+    doc: Shift left and insert
+    arguments: ["op1: {sve_type}", "op2: {sve_type}"]
+    return_type: "{sve_type}"
+    static_defs: ["const IMM3: i32"]
+    constraints: [{ variable: IMM3, range: ["0", "{size_minus_one}"] }]
+    types: [i8, i16, i32, i64, u8, u16, u32, u64]
+    assert_instr: [[sli, "IMM3 = 0"]]
+    compose:
+      - LLVMLink:
+          name: "sli.{sve_type}"
+          arguments: ["op1: {sve_type}", "op2: {sve_type}", "imm3: i32"]
+      - FnCall: ["{llvm_link}", [$op1, $op2, $IMM3]]
+
+  - name: svrshr[_n_{type}]{_mxz}
+    attr: [*sve-unstable]
+    doc: Rounding shift right
+    arguments: ["pg: {predicate}", "op1: {sve_type}"]
+    return_type: "{sve_type}"
+    static_defs: ["const IMM2: i32"]
+    constraints: [{ variable: IMM2, range: ["1", "{size}"] }]
+    types: [i8, i16, i32, i64, u8, u16, u32, u64]
+    assert_instr: [["{type_kind.su}rshr", "IMM2 = 1"]]
+    zeroing_method: { select: op1 }
+    compose:
+      - LLVMLink:
+          name: "{type_kind.su}rshr.{sve_type}"
+          arguments: ["pg: {predicate}", "op1: {sve_type}", "imm2: i32"]
+      - FnCall: ["{llvm_link}", [$pg, $op1, $IMM2]]
+
+  - name: svrsra[_n_{type}]
+    attr: [*sve-unstable]
+    doc: Rounding shift right and accumulate
+    arguments: ["op1: {sve_type}", "op2: {sve_type}"]
+    return_type: "{sve_type}"
+    static_defs: ["const IMM3: i32"]
+    constraints: [{ variable: IMM3, range: ["1", "{size}"] }]
+    types: [i8, i16, i32, i64, u8, u16, u32, u64]
+    assert_instr: [["{type_kind.su}rsra", "IMM3 = 1"]]
+    compose:
+      - LLVMLink:
+          name: "{type_kind.su}rsra.{sve_type}"
+          arguments: ["op1: {sve_type}", "op2: {sve_type}", "imm3: i32"]
+      - FnCall: ["{llvm_link}", [$op1, $op2, $IMM3]]
+
+  - name: svrshrnb[_n_{type[0]}]
+    attr: [*sve-unstable]
+    doc: Rounding shift right narrow (bottom)
+    arguments: ["op1: {sve_type[0]}"]
+    return_type: "{sve_type[1]}"
+    static_defs: ["const IMM2: i32"]
+    constraints: [{ variable: IMM2, range: ["1", "{size[1]}"] }]
+    types:
+      - [i16, i8]
+      - [i32, i16]
+      - [i64, i32]
+      - [u16, u8]
+      - [u32, u16]
+      - [u64, u32]
+    assert_instr: [[rshrnb, "IMM2 = 1"]]
+    compose:
+      - LLVMLink:
+          name: "rshrnb.{sve_type[0]}"
+          arguments: ["op1: {sve_type[0]}", "imm2: i32"]
+      - FnCall: ["{llvm_link}", [$op1, $IMM2]]
+
+  - name: svrshrnt[_n_{type[0]}]
+    attr: [*sve-unstable]
+    doc: Rounding shift right narrow (top)
+    arguments: ["even: {sve_type[1]}", "op1: {sve_type[0]}"]
+    return_type: "{sve_type[1]}"
+    static_defs: ["const IMM2: i32"]
+    constraints: [{ variable: IMM2, range: ["1", "{size[1]}"] }]
+    types:
+      - [i16, i8]
+      - [i32, i16]
+      - [i64, i32]
+      - [u16, u8]
+      - [u32, u16]
+      - [u64, u32]
+    assert_instr: [[rshrnt, "IMM2 = 1"]]
+    compose:
+      - LLVMLink:
+          name: "rshrnt.{sve_type[0]}"
+          arguments: ["even: {sve_type[1]}", "op1: {sve_type[0]}", "imm2: i32"]
+      - FnCall: ["{llvm_link}", [$even, $op1, $IMM2]]
+
+  - name: svqrshrnb[_n_{type[0]}]
+    attr: [*sve-unstable]
+    doc: Saturating rounding shift right narrow (bottom)
+    arguments: ["op1: {sve_type[0]}"]
+    return_type: "{sve_type[1]}"
+    static_defs: ["const IMM2: i32"]
+    constraints: [{ variable: IMM2, range: ["1", "{size[1]}"] }]
+    types:
+      - [i16, i8]
+      - [i32, i16]
+      - [i64, i32]
+      - [u16, u8]
+      - [u32, u16]
+      - [u64, u32]
+    assert_instr: [["{type_kind[0].su}qrshrnb", "IMM2 = 1"]]
+    compose:
+      - LLVMLink:
+          name: "{type_kind[0].su}qrshrnb.{sve_type[0]}"
+          arguments: ["op1: {sve_type[0]}", "imm2: i32"]
+      - FnCall: ["{llvm_link}", [$op1, $IMM2]]
+
+  - name: svqrshrnt[_n_{type[0]}]
+    attr: [*sve-unstable]
+    doc: Saturating rounding shift right narrow (top)
+    arguments: ["even: {sve_type[1]}", "op1: {sve_type[0]}"]
+    return_type: "{sve_type[1]}"
+    static_defs: ["const IMM2: i32"]
+    constraints: [{ variable: IMM2, range: ["1", "{size[1]}"] }]
+    types:
+      - [i16, i8]
+      - [i32, i16]
+      - [i64, i32]
+      - [u16, u8]
+      - [u32, u16]
+      - [u64, u32]
+    assert_instr: [["{type_kind[0].su}qrshrnt", "IMM2 = 1"]]
+    compose:
+      - LLVMLink:
+          name: "{type_kind[0].su}qrshrnt.{sve_type[0]}"
+          arguments: ["even: {sve_type[1]}", "op1: {sve_type[0]}", "imm2: i32"]
+      - FnCall: ["{llvm_link}", [$even, $op1, $IMM2]]
+
+  - name: svqrshrunb[_n_{type[0]}]
+    attr: [*sve-unstable]
+    doc: Saturating rounding shift right unsigned narrow (bottom)
+    arguments: ["op1: {sve_type[0]}"]
+    return_type: "{sve_type[1]}"
+    static_defs: ["const IMM2: i32"]
+    constraints: [{ variable: IMM2, range: ["1", "{size[1]}"] }]
+    types:
+      - [i16, u8]
+      - [i32, u16]
+      - [i64, u32]
+    assert_instr: [[sqrshrunb, "IMM2 = 1"]]
+    compose:
+      - LLVMLink:
+          name: "sqrshrunb.{sve_type[0]}"
+          arguments: ["op1: {sve_type[0]}", "imm2: i32"]
+      - FnCall: ["{llvm_link}", [$op1, $IMM2]]
+
+  - name: svqrshrunt[_n_{type[0]}]
+    attr: [*sve-unstable]
+    doc: Saturating rounding shift right unsigned narrow (top)
+    arguments: ["even: {sve_type[1]}", "op1: {sve_type[0]}"]
+    return_type: "{sve_type[1]}"
+    static_defs: ["const IMM2: i32"]
+    constraints: [{ variable: IMM2, range: ["1", "{size[1]}"] }]
+    types:
+      - [i16, u8]
+      - [i32, u16]
+      - [i64, u32]
+    assert_instr: [[sqrshrunt, "IMM2 = 1"]]
+    compose:
+      - LLVMLink:
+          name: "sqrshrunt.{sve_type[0]}"
+          arguments: ["even: {sve_type[1]}", "op1: {sve_type[0]}", "imm2: i32"]
+      - FnCall: ["{llvm_link}", [$even, $op1, $IMM2]]
+
+  - name: svqshrnb[_n_{type[0]}]
+    attr: [*sve-unstable]
+    doc: Saturating shift right narrow (bottom)
+    arguments: ["op1: {sve_type[0]}"]
+    return_type: "{sve_type[1]}"
+    static_defs: ["const IMM2: i32"]
+    constraints: [{ variable: IMM2, range: ["1", "{size[1]}"] }]
+    types:
+      - [i16, i8]
+      - [i32, i16]
+      - [i64, i32]
+      - [u16, u8]
+      - [u32, u16]
+      - [u64, u32]
+    assert_instr: [["{type_kind[0].su}qshrnb", "IMM2 = 1"]]
+    compose:
+      - LLVMLink:
+          name: "{type_kind[0].su}qshrnb.{sve_type[0]}"
+          arguments: ["op1: {sve_type[0]}", "imm2: i32"]
+      - FnCall: ["{llvm_link}", [$op1, $IMM2]]
+
+  - name: svqshrnt[_n_{type[0]}]
+    attr: [*sve-unstable]
+    doc: Saturating shift right narrow (top)
+    arguments: ["even: {sve_type[1]}", "op1: {sve_type[0]}"]
+    return_type: "{sve_type[1]}"
+    static_defs: ["const IMM2: i32"]
+    constraints: [{ variable: IMM2, range: ["1", "{size[1]}"] }]
+    types:
+      - [i16, i8]
+      - [i32, i16]
+      - [i64, i32]
+      - [u16, u8]
+      - [u32, u16]
+      - [u64, u32]
+    assert_instr: [["{type_kind[0].su}qshrnt", "IMM2 = 1"]]
+    compose:
+      - LLVMLink:
+          name: "{type_kind[0].su}qshrnt.{sve_type[0]}"
+          arguments: ["even: {sve_type[1]}", "op1: {sve_type[0]}", "imm2: i32"]
+      - FnCall: ["{llvm_link}", [$even, $op1, $IMM2]]
+
+  - name: svqshrunb[_n_{type[0]}]
+    attr: [*sve-unstable]
+    doc: Saturating shift right unsigned narrow (bottom)
+    arguments: ["op1: {sve_type[0]}"]
+    return_type: "{sve_type[1]}"
+    static_defs: ["const IMM2: i32"]
+    constraints: [{ variable: IMM2, range: ["1", "{size[1]}"] }]
+    types:
+      - [i16, u8]
+      - [i32, u16]
+      - [i64, u32]
+    assert_instr: [[sqshrunb, "IMM2 = 1"]]
+    compose:
+      - LLVMLink:
+          name: "sqshrunb.{sve_type[0]}"
+          arguments: ["op1: {sve_type[0]}", "imm2: i32"]
+      - FnCall: ["{llvm_link}", [$op1, $IMM2]]
+
+  - name: svqshrunt[_n_{type[0]}]
+    attr: [*sve-unstable]
+    doc: Saturating shift right unsigned narrow (top)
+    arguments: ["even: {sve_type[1]}", "op1: {sve_type[0]}"]
+    return_type: "{sve_type[1]}"
+    static_defs: ["const IMM2: i32"]
+    constraints: [{ variable: IMM2, range: ["1", "{size[1]}"] }]
+    types:
+      - [i16, u8]
+      - [i32, u16]
+      - [i64, u32]
+    assert_instr: [[sqshrunt, "IMM2 = 1"]]
+    compose:
+      - LLVMLink:
+          name: "sqshrunt.{sve_type[0]}"
+          arguments: ["even: {sve_type[1]}", "op1: {sve_type[0]}", "imm2: i32"]
+      - FnCall: ["{llvm_link}", [$even, $op1, $IMM2]]
+
+  - name: svsra[_n_{type}]
+    attr: [*sve-unstable]
+    doc: Shift right and accumulate
+    arguments: ["op1: {sve_type}", "op2: {sve_type}"]
+    return_type: "{sve_type}"
+    static_defs: ["const IMM3: i32"]
+    constraints: [{ variable: IMM3, range: ["1", "{size}"] }]
+    types: [i8, i16, i32, i64, u8, u16, u32, u64]
+    assert_instr: [["{type_kind.su}sra", "IMM3 = 1"]]
+    compose:
+      - LLVMLink:
+          name: "{type_kind.su}sra.{sve_type}"
+          arguments: ["op1: {sve_type}", "op2: {sve_type}", "imm3: i32"]
+      - FnCall: ["{llvm_link}", [$op1, $op2, $IMM3]]
+
+  - name: svsri[_n_{type}]
+    attr: [*sve-unstable]
+    doc: Shift right and insert
+    arguments: ["op1: {sve_type}", "op2: {sve_type}"]
+    return_type: "{sve_type}"
+    static_defs: ["const IMM3: i32"]
+    constraints: [{ variable: IMM3, range: ["1", "{size}"] }]
+    types: [i8, i16, i32, i64, u8, u16, u32, u64]
+    assert_instr: [[sri, "IMM3 = 1"]]
+    compose:
+      - LLVMLink:
+          name: "sri.{sve_type}"
+          arguments: ["op1: {sve_type}", "op2: {sve_type}", "imm3: i32"]
+      - FnCall: ["{llvm_link}", [$op1, $op2, $IMM3]]
+
+  - name: svshrnb[_n_{type[0]}]
+    attr: [*sve-unstable]
+    doc: Shift right narrow (bottom)
+    arguments: ["op1: {sve_type[0]}"]
+    return_type: "{sve_type[1]}"
+    static_defs: ["const IMM2: i32"]
+    constraints: [{ variable: IMM2, range: ["1", "{size[1]}"] }]
+    types:
+      - [i16, i8]
+      - [i32, i16]
+      - [i64, i32]
+      - [u16, u8]
+      - [u32, u16]
+      - [u64, u32]
+    assert_instr: [[shrnb, "IMM2 = 1"]]
+    compose:
+      - LLVMLink:
+          name: "shrnb.{sve_type[0]}"
+          arguments: ["op1: {sve_type[0]}", "imm2: i32"]
+      - FnCall: ["{llvm_link}", [$op1, $IMM2]]
+
+  - name: svshrnt[_n_{type[0]}]
+    attr: [*sve-unstable]
+    doc: Shift right narrow (top)
+    arguments: ["even: {sve_type[1]}", "op1: {sve_type[0]}"]
+    return_type: "{sve_type[1]}"
+    static_defs: ["const IMM2: i32"]
+    constraints: [{ variable: IMM2, range: ["1", "{size[1]}"] }]
+    types:
+      - [i16, i8]
+      - [i32, i16]
+      - [i64, i32]
+      - [u16, u8]
+      - [u32, u16]
+      - [u64, u32]
+    assert_instr: [[shrnt, "IMM2 = 1"]]
+    compose:
+      - LLVMLink:
+          name: "shrnt.{sve_type[0]}"
+          arguments: ["even: {sve_type[1]}", "op1: {sve_type[0]}", "imm2: i32"]
+      - FnCall: ["{llvm_link}", [$even, $op1, $IMM2]]
+
+  - name: svqxtnb[_{type[0]}]
+    attr: [*sve-unstable]
+    doc: Saturating extract narrow (bottom)
+    arguments: ["op: {sve_type[0]}"]
+    return_type: "{sve_type[1]}"
+    types:
+      - [i16, i8]
+      - [i32, i16]
+      - [i64, i32]
+      - [u16, u8]
+      - [u32, u16]
+      - [u64, u32]
+    assert_instr: ["{type_kind[0].su}qxtnb"]
+    compose:
+      - LLVMLink: { name: "{type_kind[0].su}qxtnb.{sve_type[0]}" }
+
+  - name: svqxtnt[_{type[0]}]
+    attr: [*sve-unstable]
+    doc: Saturating extract narrow (top)
+    arguments: ["even: {sve_type[1]}", "op: {sve_type[0]}"]
+    return_type: "{sve_type[1]}"
+    types:
+      - [i16, i8]
+      - [i32, i16]
+      - [i64, i32]
+      - [u16, u8]
+      - [u32, u16]
+      - [u64, u32]
+    assert_instr: ["{type_kind[0].su}qxtnt"]
+    compose:
+      - LLVMLink: { name: "{type_kind[0].su}qxtnt.{sve_type[0]}" }
+
+  - name: svqxtunb[_{type[0]}]
+    attr: [*sve-unstable]
+    doc: Saturating extract unsigned narrow (bottom)
+    arguments: ["op: {sve_type[0]}"]
+    return_type: "{sve_type[1]}"
+    types:
+      - [i16, u8]
+      - [i32, u16]
+      - [i64, u32]
+    assert_instr: [sqxtunb]
+    compose:
+      - LLVMLink: { name: "sqxtunb.{sve_type[0]}" }
+
+  - name: svqxtunt[_{type[0]}]
+    attr: [*sve-unstable]
+    doc: Saturating extract unsigned narrow (top)
+    arguments: ["even: {sve_type[1]}", "op: {sve_type[0]}"]
+    return_type: "{sve_type[1]}"
+    types:
+      - [i16, u8]
+      - [i32, u16]
+      - [i64, u32]
+    assert_instr: [sqxtunt]
+    compose:
+      - LLVMLink: { name: "sqxtunt.{sve_type[0]}" }
+
+  - name: svmovlb[_{type[0]}]
+    attr: [*sve-unstable]
+    doc: Move long (bottom)
+    arguments: ["op: {sve_type[1]}"]
+    return_type: "{sve_type[0]}"
+    types:
+      - [i16, i8]
+      - [i32, i16]
+      - [i64, i32]
+      - [u16, u8]
+      - [u32, u16]
+      - [u64, u32]
+    assert_instr: ["{type_kind[0].su}shllb"]
+    compose:
+      - FnCall: ["svshllb_n_{type[0]}", [$op], [0]]
+
+  - name: svmovlt[_{type[0]}]
+    attr: [*sve-unstable]
+    doc: Move long (top)
+    arguments: ["op: {sve_type[1]}"]
+    return_type: "{sve_type[0]}"
+    types:
+      - [i16, i8]
+      - [i32, i16]
+      - [i64, i32]
+      - [u16, u8]
+      - [u32, u16]
+      - [u64, u32]
+    assert_instr: ["{type_kind[0].su}shllt"]
+    compose:
+      - FnCall: ["svshllt_n_{type[0]}", [$op], [0]]
+
+  - name: svunpkhi[_{type[0]}]
+    attr: [*sve-unstable]
+    doc: Unpack and extend high half
+    arguments: ["op: {sve_type[1]}"]
+    return_type: "{sve_type[0]}"
+    types:
+      - [i16, i8]
+      - [i32, i16]
+      - [i64, i32]
+      - [u16, u8]
+      - [u32, u16]
+      - [u64, u32]
+    assert_instr: ["{type_kind[0].su}unpkhi"]
+    compose:
+      - LLVMLink: { name: "{type_kind[0].su}unpkhi.{sve_type[0]}" }
+
+  - name: svunpkhi[_b]
+    attr: [*sve-unstable]
+    doc: Unpack and extend high half
+    arguments: ["op: svbool_t"]
+    return_type: "svbool8_t"
+    assert_instr: [punpkhi]
+    compose:
+      - LLVMLink: { name: "punpkhi.nxv16i1" }
+
+  - name: svunpklo[_{type[0]}]
+    attr: [*sve-unstable]
+    doc: Unpack and extend low half
+    arguments: ["op: {sve_type[1]}"]
+    return_type: "{sve_type[0]}"
+    types:
+      - [i16, i8]
+      - [i32, i16]
+      - [i64, i32]
+      - [u16, u8]
+      - [u32, u16]
+      - [u64, u32]
+    assert_instr: ["{type_kind[0].su}unpklo"]
+    compose:
+      - LLVMLink: { name: "{type_kind[0].su}unpklo.{sve_type[0]}" }
+
+  - name: svunpklo[_b]
+    attr: [*sve-unstable]
+    doc: Unpack and extend low half
+    arguments: ["op: svbool_t"]
+    return_type: "svbool8_t"
+    assert_instr: [punpklo]
+    compose:
+      - LLVMLink: { name: "punpklo.nxv16i1" }
+
+  - name: svaddp[_{type}]{_mx}
+    attr: [*sve-unstable]
+    doc: Add pairwise
+    arguments: ["pg: {predicate}", "op1: {sve_type}", "op2: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64]
+    assert_instr: ["{type_kind.f}addp"]
+    compose:
+      - LLVMLink: { name: "{type_kind.f}addp.{sve_type}" }
+
+  - name: svadalp[_{type[0]}]{_mxz}
+    attr: [*sve-unstable]
+    doc: Add and accumulate long pairwise
+    arguments:
+      ["pg: {predicate[0]}", "op1: {sve_type[0]}", "op2: {sve_type[1]}"]
+    return_type: "{sve_type[0]}"
+    types:
+      - [i16, i8]
+      - [i32, i16]
+      - [i64, i32]
+      - [u16, u8]
+      - [u32, u16]
+      - [u64, u32]
+    assert_instr: ["{type_kind[0].su}adalp"]
+    zeroing_method: { select: op1 }
+    compose:
+      - LLVMLink: { name: "{type_kind[0].su}adalp.{sve_type[0]}" }
+
+  - name: svmaxp[_{type}]{_mx}
+    attr: [*sve-unstable]
+    doc: Maximum pairwise
+    arguments: ["pg: {predicate}", "op1: {sve_type}", "op2: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64]
+    assert_instr: ["{type_kind.fsu}maxp"]
+    compose:
+      - LLVMLink: { name: "{type_kind.fsu}maxp.{sve_type}" }
+
+  - name: svmaxnmp[_{type}]{_mx}
+    attr: [*sve-unstable]
+    doc: Maximum number pairwise
+    arguments: ["pg: {predicate}", "op1: {sve_type}", "op2: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [f32, f64]
+    assert_instr: ["fmaxnmp"]
+    compose:
+      - LLVMLink: { name: "fmaxnmp.{sve_type}" }
+
+  - name: svminp[_{type}]{_mx}
+    attr: [*sve-unstable]
+    doc: Minimum pairwise
+    arguments: ["pg: {predicate}", "op1: {sve_type}", "op2: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [f32, f64, i8, i16, i32, i64, u8, u16, u32, u64]
+    assert_instr: ["{type_kind.fsu}minp"]
+    compose:
+      - LLVMLink: { name: "{type_kind.fsu}minp.{sve_type}" }
+
+  - name: svminnmp[_{type}]{_mx}
+    attr: [*sve-unstable]
+    doc: Minimum number pairwise
+    arguments: ["pg: {predicate}", "op1: {sve_type}", "op2: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [f32, f64]
+    assert_instr: ["fminnmp"]
+    compose:
+      - LLVMLink: { name: "fminnmp.{sve_type}" }
+
+  - name: svmul_lane[_{type}]
+    attr: [*sve-unstable]
+    doc: Multiply
+    arguments: ["op1: {sve_type}", "op2: {sve_type}"]
+    return_type: "{sve_type}"
+    static_defs: ["const IMM_INDEX: i32"]
+    constraints: [{ variable: IMM_INDEX, vec_max_elems_type: "{type}" }]
+    assert_instr: [["{type_kind.f}mul", "IMM_INDEX = 0"]]
+    types: [f32, f64, i16, i32, i64, u16, u32, u64]
+    compose:
+      - LLVMLink:
+          name: "{type_kind.f}mul.lane.{sve_type}"
+          arguments: ["op1: {sve_type}", "op2: {sve_type}", "imm_index: i32"]
+      - FnCall: ["{llvm_link}", [$op1, $op2, IMM_INDEX]]
+
+  - name: svqdmulh[{_n}_{type}]
+    attr: [*sve-unstable]
+    doc: Saturating doubling multiply high
+    arguments: ["op1: {sve_type}", "op2: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [i8, i16, i32, i64]
+    assert_instr: [sqdmulh]
+    n_variant_op: op2
+    compose:
+      - LLVMLink: { name: "sqdmulh.{sve_type}" }
+
+  - name: svqdmulh_lane[_{type}]
+    attr: [*sve-unstable]
+    doc: Saturating doubling multiply high
+    arguments: ["op1: {sve_type}", "op2: {sve_type}"]
+    return_type: "{sve_type}"
+    static_defs: ["const IMM_INDEX: i32"]
+    constraints: [{ variable: IMM_INDEX, vec_max_elems_type: "{type}" }]
+    assert_instr: [["sqdmulh", "IMM_INDEX = 0"]]
+    types: [i16, i32, i64]
+    compose:
+      - LLVMLink:
+          name: "sqdmulh.lane.{sve_type}"
+          arguments: ["op1: {sve_type}", "op2: {sve_type}", "imm_index: i32"]
+      - FnCall: ["{llvm_link}", [$op1, $op2, IMM_INDEX]]
+
+  - name: svqrdmulh[{_n}_{type}]
+    attr: [*sve-unstable]
+    doc: Saturating rounding doubling multiply high
+    arguments: ["op1: {sve_type}", "op2: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [i8, i16, i32, i64]
+    assert_instr: [sqrdmulh]
+    n_variant_op: op2
+    compose:
+      - LLVMLink: { name: "sqrdmulh.{sve_type}" }
+
+  - name: svqrdmulh_lane[_{type}]
+    attr: [*sve-unstable]
+    doc: Saturating rounding doubling multiply high
+    arguments: ["op1: {sve_type}", "op2: {sve_type}"]
+    return_type: "{sve_type}"
+    static_defs: ["const IMM_INDEX: i32"]
+    constraints: [{ variable: IMM_INDEX, vec_max_elems_type: "{type}" }]
+    assert_instr: [["sqrdmulh", "IMM_INDEX = 0"]]
+    types: [i16, i32, i64]
+    compose:
+      - LLVMLink:
+          name: "sqrdmulh.lane.{sve_type}"
+          arguments: ["op1: {sve_type}", "op2: {sve_type}", "imm_index: i32"]
+      - FnCall: ["{llvm_link}", [$op1, $op2, IMM_INDEX]]
+
+  - name: svqdmullb[{_n}_{type[0]}]
+    attr: [*sve-unstable]
+    doc: Saturating doubling multiply long (bottom)
+    arguments: ["op1: {sve_type[1]}", "op2: {sve_type[1]}"]
+    return_type: "{sve_type[0]}"
+    types: [[i16, i8], [i32, i16], [i64, i32]]
+    assert_instr: [sqdmullb]
+    n_variant_op: op2
+    compose:
+      - LLVMLink: { name: "sqdmullb.{sve_type[0]}" }
+
+  - name: svqdmullb_lane[_{type[0]}]
+    attr: [*sve-unstable]
+    doc: Saturating doubling multiply long (bottom)
+    arguments: ["op1: {sve_type[1]}", "op2: {sve_type[1]}"]
+    return_type: "{sve_type[0]}"
+    static_defs: ["const IMM_INDEX: i32"]
+    constraints: [{ variable: IMM_INDEX, vec_max_elems_type: "{type[1]}" }]
+    assert_instr: [["sqdmullb", "IMM_INDEX = 0"]]
+    types: [[i32, i16], [i64, i32]]
+    compose:
+      - LLVMLink:
+          name: "sqdmullb.lane.{sve_type[0]}"
+          arguments:
+            ["op1: {sve_type[1]}", "op2: {sve_type[1]}", "imm_index: i32"]
+      - FnCall: ["{llvm_link}", [$op1, $op2, IMM_INDEX]]
+
+  - name: svqdmullt[{_n}_{type[0]}]
+    attr: [*sve-unstable]
+    doc: Saturating doubling multiply long (top)
+    arguments: ["op1: {sve_type[1]}", "op2: {sve_type[1]}"]
+    return_type: "{sve_type[0]}"
+    types: [[i16, i8], [i32, i16], [i64, i32]]
+    assert_instr: [sqdmullt]
+    n_variant_op: op2
+    compose:
+      - LLVMLink: { name: "sqdmullt.{sve_type[0]}" }
+
+  - name: svqdmullt_lane[_{type[0]}]
+    attr: [*sve-unstable]
+    doc: Saturating doubling multiply long (top)
+    arguments: ["op1: {sve_type[1]}", "op2: {sve_type[1]}"]
+    return_type: "{sve_type[0]}"
+    static_defs: ["const IMM_INDEX: i32"]
+    constraints: [{ variable: IMM_INDEX, vec_max_elems_type: "{type[1]}" }]
+    assert_instr: [["sqdmullt", "IMM_INDEX = 0"]]
+    types: [[i32, i16], [i64, i32]]
+    compose:
+      - LLVMLink:
+          name: "sqdmullt.lane.{sve_type[0]}"
+          arguments:
+            ["op1: {sve_type[1]}", "op2: {sve_type[1]}", "imm_index: i32"]
+      - FnCall: ["{llvm_link}", [$op1, $op2, IMM_INDEX]]
+
+  - name: svmullb[{_n}_{type[0]}]
+    attr: [*sve-unstable]
+    doc: Multiply long (bottom)
+    arguments: ["op1: {sve_type[1]}", "op2: {sve_type[1]}"]
+    return_type: "{sve_type[0]}"
+    types:
+      - [i16, i8]
+      - [i32, i16]
+      - [i64, i32]
+      - [u16, u8]
+      - [u32, u16]
+      - [u64, u32]
+    assert_instr: ["{type_kind[0].su}mullb"]
+    n_variant_op: op2
+    compose:
+      - LLVMLink: { name: "{type_kind[0].su}mullb.{sve_type[0]}" }
+
+  - name: svmullb_lane[_{type[0]}]
+    attr:
+      - *sve-unstable
+      # FIXME(arm-maintainers): MSVC disassembly of `[su]mullb` fails
+      - FnCall: [cfg_attr, [*msvc-disabled, {FnCall: [assert_instr, ["{type_kind[0].su}mullb", "IMM_INDEX = 0"]]}]]
+    doc: Multiply long (bottom)
+    arguments: ["op1: {sve_type[1]}", "op2: {sve_type[1]}"]
+    return_type: "{sve_type[0]}"
+    types:
+      - [i32, i16]
+      - [i64, i32]
+      - [u32, u16]
+      - [u64, u32]
+    static_defs: ["const IMM_INDEX: i32"]
+    constraints: [{ variable: IMM_INDEX, vec_max_elems_type: "{type[1]}" }]
+    compose:
+      - LLVMLink:
+          name: "{type_kind[0].su}mullb.lane.{sve_type[0]}"
+          arguments:
+            ["op1: {sve_type[1]}", "op2: {sve_type[1]}", "imm_index: i32"]
+      - FnCall: ["{llvm_link}", [$op1, $op2, $IMM_INDEX]]
+
+  - name: svmullt[{_n}_{type[0]}]
+    attr:
+      - *sve-unstable
+      # FIXME(arm-maintainers): MSVC disassembly of `[su]mullt` fails
+      - FnCall: [cfg_attr, [*msvc-disabled, {FnCall: [assert_instr, ["{type_kind[0].su}mullt"]]}]]
+    doc: Multiply long (top)
+    arguments: ["op1: {sve_type[1]}", "op2: {sve_type[1]}"]
+    return_type: "{sve_type[0]}"
+    types:
+      - [i16, i8]
+      - [i32, i16]
+      - [i64, i32]
+      - [u16, u8]
+      - [u32, u16]
+      - [u64, u32]
+    n_variant_op: op2
+    compose:
+      - LLVMLink: { name: "{type_kind[0].su}mullt.{sve_type[0]}" }
+
+  - name: svmullt_lane[_{type[0]}]
+    attr:
+      - *sve-unstable
+      # FIXME(arm-maintainers): MSVC disassembly of `[su]mullt` fails
+      - FnCall: [cfg_attr, [*msvc-disabled, {FnCall: [assert_instr, ["{type_kind[0].su}mullt", "IMM_INDEX = 0"]]}]]
+    doc: Multiply long (top)
+    arguments: ["op1: {sve_type[1]}", "op2: {sve_type[1]}"]
+    return_type: "{sve_type[0]}"
+    types:
+      - [i32, i16]
+      - [i64, i32]
+      - [u32, u16]
+      - [u64, u32]
+    static_defs: ["const IMM_INDEX: i32"]
+    constraints: [{ variable: IMM_INDEX, vec_max_elems_type: "{type[1]}" }]
+    compose:
+      - LLVMLink:
+          name: "{type_kind[0].su}mullt.lane.{sve_type[0]}"
+          arguments:
+            ["op1: {sve_type[1]}", "op2: {sve_type[1]}", "imm_index: i32"]
+      - FnCall: ["{llvm_link}", [$op1, $op2, $IMM_INDEX]]
+
+  - name: svrecpe[_{type}]{_mxz}
+    attr: [*sve-unstable]
+    doc: Reciprocal estimate
+    arguments: ["inactive: {sve_type}", "pg: {predicate}", "op: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [u32]
+    assert_instr: [urecpe]
+    zeroing_method: { drop: inactive }
+    compose:
+      - LLVMLink: { name: "urecpe.{sve_type}" }
+
+  - name: svrsqrte[_{type}]{_mxz}
+    attr: [*sve-unstable]
+    doc: Reciprocal square root estimate
+    arguments: ["inactive: {sve_type}", "pg: {predicate}", "op: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [u32]
+    assert_instr: [ursqrte]
+    zeroing_method: { drop: inactive }
+    compose:
+      - LLVMLink: { name: "ursqrte.{sve_type}" }
+
+  - name: svmla_lane[_{type}]
+    attr: [*sve-unstable]
+    doc: Multiply-add, addend first
+    arguments: ["op1: {sve_type}", "op2: {sve_type}", "op3: {sve_type}"]
+    return_type: "{sve_type}"
+    static_defs: ["const IMM_INDEX: i32"]
+    constraints: [{ variable: IMM_INDEX, vec_max_elems_type: "{type}" }]
+    types: [i16, i32, i64, u16, u32, u64]
+    assert_instr: [[mla, "IMM_INDEX = 0"]]
+    compose:
+      - LLVMLink:
+          name: "mla.lane.{sve_type}"
+          arguments:
+            - "op1: {sve_type}"
+            - "op2: {sve_type}"
+            - "op3: {sve_type}"
+            - "IMM_INDEX: i32"
+      - FnCall: ["{llvm_link}", [$op1, $op2, $op3, $IMM_INDEX]]
+
+  - name: svmls_lane[_{type}]
+    attr: [*sve-unstable]
+    doc: Multiply-subtract, minuend first
+    arguments: ["op1: {sve_type}", "op2: {sve_type}", "op3: {sve_type}"]
+    return_type: "{sve_type}"
+    static_defs: ["const IMM_INDEX: i32"]
+    constraints: [{ variable: IMM_INDEX, vec_max_elems_type: "{type}" }]
+    types: [i16, i32, i64, u16, u32, u64]
+    assert_instr: [[mls, "IMM_INDEX = 0"]]
+    compose:
+      - LLVMLink:
+          name: "mls.lane.{sve_type}"
+          arguments:
+            - "op1: {sve_type}"
+            - "op2: {sve_type}"
+            - "op3: {sve_type}"
+            - "IMM_INDEX: i32"
+      - FnCall: ["{llvm_link}", [$op1, $op2, $op3, $IMM_INDEX]]
+
+  - name: svmlalb[{_n}_{type[0]}]
+    attr: [*sve-unstable]
+    doc: Multiply-add long (bottom)
+    arguments:
+      ["op1: {sve_type[0]}", "op2: {sve_type[1]}", "op3: {sve_type[1]}"]
+    return_type: "{sve_type[0]}"
+    types:
+      [[i16, i8], [i32, i16], [i64, i32], [u16, u8], [u32, u16], [u64, u32]]
+    assert_instr: ["{type_kind[0].su}mlalb"]
+    n_variant_op: op3
+    compose:
+      - LLVMLink: { name: "{type_kind[0].su}mlalb.{sve_type[0]}" }
+
+  - name: svmlalb_lane[_{type[0]}]
+    attr: [*sve-unstable]
+    doc: Multiply-add long (bottom)
+    arguments:
+      ["op1: {sve_type[0]}", "op2: {sve_type[1]}", "op3: {sve_type[1]}"]
+    return_type: "{sve_type[0]}"
+    static_defs: ["const IMM_INDEX: i32"]
+    constraints: [{ variable: IMM_INDEX, vec_max_elems_type: "{type[1]}" }]
+    types: [[i32, i16], [i64, i32], [u32, u16], [u64, u32]]
+    assert_instr: [["{type_kind[0].su}mlalb", "IMM_INDEX = 0"]]
+    compose:
+      - LLVMLink:
+          name: "{type_kind[0].su}mlalb.lane.{sve_type[0]}"
+          arguments:
+            - "op1: {sve_type[0]}"
+            - "op2: {sve_type[1]}"
+            - "op3: {sve_type[1]}"
+            - "IMM_INDEX: i32"
+      - FnCall: ["{llvm_link}", [$op1, $op2, $op3, $IMM_INDEX]]
+
+  - name: svmlalt[{_n}_{type[0]}]
+    attr: [*sve-unstable]
+    doc: Multiply-add long (top)
+    arguments:
+      ["op1: {sve_type[0]}", "op2: {sve_type[1]}", "op3: {sve_type[1]}"]
+    return_type: "{sve_type[0]}"
+    types:
+      [[i16, i8], [i32, i16], [i64, i32], [u16, u8], [u32, u16], [u64, u32]]
+    assert_instr: ["{type_kind[0].su}mlalt"]
+    n_variant_op: op3
+    compose:
+      - LLVMLink: { name: "{type_kind[0].su}mlalt.{sve_type[0]}" }
+
+  - name: svmlalt_lane[_{type[0]}]
+    attr: [*sve-unstable]
+    doc: Multiply-add long (top)
+    arguments:
+      ["op1: {sve_type[0]}", "op2: {sve_type[1]}", "op3: {sve_type[1]}"]
+    return_type: "{sve_type[0]}"
+    static_defs: ["const IMM_INDEX: i32"]
+    constraints: [{ variable: IMM_INDEX, vec_max_elems_type: "{type[1]}" }]
+    types: [[i32, i16], [i64, i32], [u32, u16], [u64, u32]]
+    assert_instr: [["{type_kind[0].su}mlalt", "IMM_INDEX = 0"]]
+    compose:
+      - LLVMLink:
+          name: "{type_kind[0].su}mlalt.lane.{sve_type[0]}"
+          arguments:
+            - "op1: {sve_type[0]}"
+            - "op2: {sve_type[1]}"
+            - "op3: {sve_type[1]}"
+            - "IMM_INDEX: i32"
+      - FnCall: ["{llvm_link}", [$op1, $op2, $op3, $IMM_INDEX]]
+
+  - name: svmlslb[{_n}_{type[0]}]
+    attr: [*sve-unstable]
+    doc: Multiply-subtract long (bottom)
+    arguments:
+      ["op1: {sve_type[0]}", "op2: {sve_type[1]}", "op3: {sve_type[1]}"]
+    return_type: "{sve_type[0]}"
+    types:
+      [[i16, i8], [i32, i16], [i64, i32], [u16, u8], [u32, u16], [u64, u32]]
+    assert_instr: ["{type_kind[0].su}mlslb"]
+    n_variant_op: op3
+    compose:
+      - LLVMLink: { name: "{type_kind[0].su}mlslb.{sve_type[0]}" }
+
+  - name: svmlslb_lane[_{type[0]}]
+    attr: [*sve-unstable]
+    doc: Multiply-subtract long (bottom)
+    arguments:
+      ["op1: {sve_type[0]}", "op2: {sve_type[1]}", "op3: {sve_type[1]}"]
+    return_type: "{sve_type[0]}"
+    static_defs: ["const IMM_INDEX: i32"]
+    constraints: [{ variable: IMM_INDEX, vec_max_elems_type: "{type[1]}" }]
+    types: [[i32, i16], [i64, i32], [u32, u16], [u64, u32]]
+    assert_instr: [["{type_kind[0].su}mlslb", "IMM_INDEX = 0"]]
+    compose:
+      - LLVMLink:
+          name: "{type_kind[0].su}mlslb.lane.{sve_type[0]}"
+          arguments:
+            - "op1: {sve_type[0]}"
+            - "op2: {sve_type[1]}"
+            - "op3: {sve_type[1]}"
+            - "IMM_INDEX: i32"
+      - FnCall: ["{llvm_link}", [$op1, $op2, $op3, $IMM_INDEX]]
+
+  - name: svmlslt[{_n}_{type[0]}]
+    attr: [*sve-unstable]
+    doc: Multiply-subtract long (top)
+    arguments:
+      ["op1: {sve_type[0]}", "op2: {sve_type[1]}", "op3: {sve_type[1]}"]
+    return_type: "{sve_type[0]}"
+    types:
+      [[i16, i8], [i32, i16], [i64, i32], [u16, u8], [u32, u16], [u64, u32]]
+    assert_instr: ["{type_kind[0].su}mlslt"]
+    n_variant_op: op3
+    compose:
+      - LLVMLink: { name: "{type_kind[0].su}mlslt.{sve_type[0]}" }
+
+  - name: svmlslt_lane[_{type[0]}]
+    attr: [*sve-unstable]
+    doc: Multiply-subtract long (top)
+    arguments:
+      ["op1: {sve_type[0]}", "op2: {sve_type[1]}", "op3: {sve_type[1]}"]
+    return_type: "{sve_type[0]}"
+    static_defs: ["const IMM_INDEX: i32"]
+    constraints: [{ variable: IMM_INDEX, vec_max_elems_type: "{type[1]}" }]
+    types: [[i32, i16], [i64, i32], [u32, u16], [u64, u32]]
+    assert_instr: [["{type_kind[0].su}mlslt", "IMM_INDEX = 0"]]
+    compose:
+      - LLVMLink:
+          name: "{type_kind[0].su}mlslt.lane.{sve_type[0]}"
+          arguments:
+            - "op1: {sve_type[0]}"
+            - "op2: {sve_type[1]}"
+            - "op3: {sve_type[1]}"
+            - "IMM_INDEX: i32"
+      - FnCall: ["{llvm_link}", [$op1, $op2, $op3, $IMM_INDEX]]
+
+  - name: svqrdmlah[{_n}_{type}]
+    attr: [*sve-unstable]
+    doc: Saturating rounding doubling multiply-add high
+    arguments: ["op1: {sve_type}", "op2: {sve_type}", "op3: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [i8, i16, i32, i64]
+    assert_instr: [sqrdmlah]
+    n_variant_op: op3
+    compose:
+      - LLVMLink: { name: "sqrdmlah.{sve_type}" }
+
+  - name: svqrdmlah_lane[_{type}]
+    attr: [*sve-unstable]
+    doc: Saturating rounding doubling multiply-add high
+    arguments: ["op1: {sve_type}", "op2: {sve_type}", "op3: {sve_type}"]
+    return_type: "{sve_type}"
+    static_defs: ["const IMM_INDEX: i32"]
+    constraints: [{ variable: IMM_INDEX, vec_max_elems_type: "{type}" }]
+    types: [i16, i32, i64]
+    assert_instr: [[sqrdmlah, "IMM_INDEX = 0"]]
+    compose:
+      - LLVMLink:
+          name: "sqrdmlah.lane.{sve_type}"
+          arguments:
+            - "op1: {sve_type}"
+            - "op2: {sve_type}"
+            - "op3: {sve_type}"
+            - "IMM_INDEX: i32"
+      - FnCall: ["{llvm_link}", [$op1, $op2, $op3, $IMM_INDEX]]
+
+  - name: svqrdmlsh[{_n}_{type}]
+    attr: [*sve-unstable]
+    doc: Saturating rounding doubling multiply-subtract high
+    arguments: ["op1: {sve_type}", "op2: {sve_type}", "op3: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [i8, i16, i32, i64]
+    assert_instr: [sqrdmlsh]
+    n_variant_op: op3
+    compose:
+      - LLVMLink: { name: "sqrdmlsh.{sve_type}" }
+
+  - name: svqrdmlsh_lane[_{type}]
+    attr: [*sve-unstable]
+    doc: Saturating rounding doubling multiply-subtract high
+    arguments: ["op1: {sve_type}", "op2: {sve_type}", "op3: {sve_type}"]
+    return_type: "{sve_type}"
+    static_defs: ["const IMM_INDEX: i32"]
+    constraints: [{ variable: IMM_INDEX, vec_max_elems_type: "{type}" }]
+    types: [i16, i32, i64]
+    assert_instr: [[sqrdmlsh, "IMM_INDEX = 0"]]
+    compose:
+      - LLVMLink:
+          name: "sqrdmlsh.lane.{sve_type}"
+          arguments:
+            - "op1: {sve_type}"
+            - "op2: {sve_type}"
+            - "op3: {sve_type}"
+            - "IMM_INDEX: i32"
+      - FnCall: ["{llvm_link}", [$op1, $op2, $op3, $IMM_INDEX]]
+
+  - name: svqdmlalb[{_n}_{type[0]}]
+    attr: [*sve-unstable]
+    doc: Saturating doubling multiply-add long (bottom)
+    arguments:
+      ["op1: {sve_type[0]}", "op2: {sve_type[1]}", "op3: {sve_type[1]}"]
+    return_type: "{sve_type[0]}"
+    types: [[i16, i8], [i32, i16], [i64, i32]]
+    assert_instr: ["sqdmlalb"]
+    n_variant_op: op3
+    compose:
+      - LLVMLink: { name: "sqdmlalb.{sve_type[0]}" }
+
+  - name: svqdmlalb_lane[_{type[0]}]
+    attr: [*sve-unstable]
+    doc: Saturating doubling multiply-add long (bottom)
+    arguments:
+      ["op1: {sve_type[0]}", "op2: {sve_type[1]}", "op3: {sve_type[1]}"]
+    return_type: "{sve_type[0]}"
+    static_defs: ["const IMM_INDEX: i32"]
+    constraints: [{ variable: IMM_INDEX, vec_max_elems_type: "{type[1]}" }]
+    types: [[i32, i16], [i64, i32]]
+    assert_instr: [["sqdmlalb", "IMM_INDEX = 0"]]
+    compose:
+      - LLVMLink:
+          name: "sqdmlalb.lane.{sve_type[0]}"
+          arguments:
+            - "op1: {sve_type[0]}"
+            - "op2: {sve_type[1]}"
+            - "op3: {sve_type[1]}"
+            - "IMM_INDEX: i32"
+      - FnCall: ["{llvm_link}", [$op1, $op2, $op3, $IMM_INDEX]]
+
+  - name: svqdmlalbt[{_n}_{type[0]}]
+    attr: [*sve-unstable]
+    doc: Saturating doubling multiply-add long (bottom × top)
+    arguments:
+      ["op1: {sve_type[0]}", "op2: {sve_type[1]}", "op3: {sve_type[1]}"]
+    return_type: "{sve_type[0]}"
+    types: [[i16, i8], [i32, i16], [i64, i32]]
+    assert_instr: ["sqdmlalbt"]
+    n_variant_op: op3
+    compose:
+      - LLVMLink: { name: "sqdmlalbt.{sve_type[0]}" }
+
+  - name: svqdmlalt[{_n}_{type[0]}]
+    attr: [*sve-unstable]
+    doc: Saturating doubling multiply-add long (top)
+    arguments:
+      ["op1: {sve_type[0]}", "op2: {sve_type[1]}", "op3: {sve_type[1]}"]
+    return_type: "{sve_type[0]}"
+    types: [[i16, i8], [i32, i16], [i64, i32]]
+    assert_instr: ["sqdmlalt"]
+    n_variant_op: op3
+    compose:
+      - LLVMLink: { name: "sqdmlalt.{sve_type[0]}" }
+
+  - name: svqdmlalt_lane[_{type[0]}]
+    attr: [*sve-unstable]
+    doc: Saturating doubling multiply-add long (top)
+    arguments:
+      ["op1: {sve_type[0]}", "op2: {sve_type[1]}", "op3: {sve_type[1]}"]
+    return_type: "{sve_type[0]}"
+    static_defs: ["const IMM_INDEX: i32"]
+    constraints: [{ variable: IMM_INDEX, vec_max_elems_type: "{type[1]}" }]
+    types: [[i32, i16], [i64, i32]]
+    assert_instr: [["sqdmlalt", "IMM_INDEX = 0"]]
+    compose:
+      - LLVMLink:
+          name: "sqdmlalt.lane.{sve_type[0]}"
+          arguments:
+            - "op1: {sve_type[0]}"
+            - "op2: {sve_type[1]}"
+            - "op3: {sve_type[1]}"
+            - "IMM_INDEX: i32"
+      - FnCall: ["{llvm_link}", [$op1, $op2, $op3, $IMM_INDEX]]
+
+  - name: svqdmlslb[{_n}_{type[0]}]
+    attr: [*sve-unstable]
+    doc: Saturating doubling multiply-subtract long (bottom)
+    arguments:
+      ["op1: {sve_type[0]}", "op2: {sve_type[1]}", "op3: {sve_type[1]}"]
+    return_type: "{sve_type[0]}"
+    types: [[i16, i8], [i32, i16], [i64, i32]]
+    assert_instr: ["sqdmlslb"]
+    n_variant_op: op3
+    compose:
+      - LLVMLink: { name: "sqdmlslb.{sve_type[0]}" }
+
+  - name: svqdmlslb_lane[_{type[0]}]
+    attr: [*sve-unstable]
+    doc: Saturating doubling multiply-subtract long (bottom)
+    arguments:
+      ["op1: {sve_type[0]}", "op2: {sve_type[1]}", "op3: {sve_type[1]}"]
+    return_type: "{sve_type[0]}"
+    static_defs: ["const IMM_INDEX: i32"]
+    constraints: [{ variable: IMM_INDEX, vec_max_elems_type: "{type[1]}" }]
+    types: [[i32, i16], [i64, i32]]
+    assert_instr: [["sqdmlslb", "IMM_INDEX = 0"]]
+    compose:
+      - LLVMLink:
+          name: "sqdmlslb.lane.{sve_type[0]}"
+          arguments:
+            - "op1: {sve_type[0]}"
+            - "op2: {sve_type[1]}"
+            - "op3: {sve_type[1]}"
+            - "IMM_INDEX: i32"
+      - FnCall: ["{llvm_link}", [$op1, $op2, $op3, $IMM_INDEX]]
+
+  - name: svqdmlslbt[{_n}_{type[0]}]
+    attr: [*sve-unstable]
+    doc: Saturating doubling multiply-subtract long (bottom × top)
+    arguments:
+      ["op1: {sve_type[0]}", "op2: {sve_type[1]}", "op3: {sve_type[1]}"]
+    return_type: "{sve_type[0]}"
+    types: [[i16, i8], [i32, i16], [i64, i32]]
+    assert_instr: ["sqdmlslbt"]
+    n_variant_op: op3
+    compose:
+      - LLVMLink: { name: "sqdmlslbt.{sve_type[0]}" }
+
+  - name: svqdmlslt[{_n}_{type[0]}]
+    attr: [*sve-unstable]
+    doc: Saturating doubling multiply-subtract long (top)
+    arguments:
+      ["op1: {sve_type[0]}", "op2: {sve_type[1]}", "op3: {sve_type[1]}"]
+    return_type: "{sve_type[0]}"
+    types: [[i16, i8], [i32, i16], [i64, i32]]
+    assert_instr: ["sqdmlslt"]
+    n_variant_op: op3
+    compose:
+      - LLVMLink: { name: "sqdmlslt.{sve_type[0]}" }
+
+  - name: svqdmlslt_lane[_{type[0]}]
+    attr: [*sve-unstable]
+    doc: Saturating doubling multiply-subtract long (top)
+    arguments:
+      ["op1: {sve_type[0]}", "op2: {sve_type[1]}", "op3: {sve_type[1]}"]
+    return_type: "{sve_type[0]}"
+    static_defs: ["const IMM_INDEX: i32"]
+    constraints: [{ variable: IMM_INDEX, vec_max_elems_type: "{type[1]}" }]
+    types: [[i32, i16], [i64, i32]]
+    assert_instr: [["sqdmlslt", "IMM_INDEX = 0"]]
+    compose:
+      - LLVMLink:
+          name: "sqdmlslt.lane.{sve_type[0]}"
+          arguments:
+            - "op1: {sve_type[0]}"
+            - "op2: {sve_type[1]}"
+            - "op3: {sve_type[1]}"
+            - "IMM_INDEX: i32"
+      - FnCall: ["{llvm_link}", [$op1, $op2, $op3, $IMM_INDEX]]
+
+  - name: svqneg[_{type}]{_mxz}
+    attr: [*sve-unstable]
+    doc: Saturating negate
+    arguments: ["inactive: {sve_type}", "pg: {predicate}", "op: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [i8, i16, i32, i64]
+    assert_instr: [sqneg]
+    zeroing_method: { drop: inactive }
+    compose:
+      - LLVMLink: { name: "sqneg.{sve_type}" }
+
+  - name: svadclb[{_n}_{type}]
+    attr: [*sve-unstable]
+    doc: Add with carry long (bottom)
+    arguments: ["op1: {sve_type}", "op2: {sve_type}", "op3: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [u32, u64]
+    assert_instr: [adclb]
+    n_variant_op: op3
+    compose:
+      - LLVMLink: { name: "adclb.{sve_type}" }
+
+  - name: svadclt[{_n}_{type}]
+    attr: [*sve-unstable]
+    doc: Add with carry long (top)
+    arguments: ["op1: {sve_type}", "op2: {sve_type}", "op3: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [u32, u64]
+    assert_instr: [adclt]
+    n_variant_op: op3
+    compose:
+      - LLVMLink: { name: "adclt.{sve_type}" }
+
+  - name: svqadd[{_n}_{type}]{_mxz}
+    attr: [*sve-unstable]
+    doc: Saturating add
+    arguments: ["pg: {predicate}", "op1: {sve_type}", "op2: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [i8, i16, i32, i64, u8, u16, u32, u64]
+    assert_instr: ["{type_kind.su}qadd"]
+    zeroing_method: { select: op1 }
+    n_variant_op: op2
+    compose:
+      - LLVMLink: { name: "{type_kind.su}qadd.{sve_type}" }
+
+  - name: svsqadd[{_n}_{type[0]}]{_mxz}
+    attr: [*sve-unstable]
+    doc: Saturating add with signed addend
+    arguments:
+      ["pg: {predicate[0]}", "op1: {sve_type[0]}", "op2: {sve_type[1]}"]
+    return_type: "{sve_type[0]}"
+    types:
+      - [u8, i8]
+      - [u16, i16]
+      - [u32, i32]
+      - [u64, i64]
+    assert_instr: [usqadd]
+    zeroing_method: { select: op1 }
+    n_variant_op: op2
+    compose:
+      - LLVMLink: { name: "usqadd.{sve_type[0]}" }
+
+  - name: svuqadd[{_n}_{type[0]}]{_mxz}
+    attr: [*sve-unstable]
+    doc: Saturating add with unsigned addend
+    arguments:
+      ["pg: {predicate[0]}", "op1: {sve_type[0]}", "op2: {sve_type[1]}"]
+    return_type: "{sve_type[0]}"
+    types:
+      - [i8, u8]
+      - [i16, u16]
+      - [i32, u32]
+      - [i64, u64]
+    assert_instr: [suqadd]
+    zeroing_method: { select: op1 }
+    n_variant_op: op2
+    compose:
+      - LLVMLink: { name: "suqadd.{sve_type[0]}" }
+
+  - name: svaddlb[{_n}_{type[0]}]
+    attr: [*sve-unstable]
+    doc: Add long (bottom)
+    arguments: ["op1: {sve_type[1]}", "op2: {sve_type[1]}"]
+    return_type: "{sve_type[0]}"
+    types:
+      - [i16, i8]
+      - [i32, i16]
+      - [i64, i32]
+      - [u16, u8]
+      - [u32, u16]
+      - [u64, u32]
+    assert_instr: ["{type_kind[0].su}addlb"]
+    n_variant_op: op2
+    compose:
+      - LLVMLink: { name: "{type_kind[0].su}addlb.{sve_type[0]}" }
+
+  - name: svaddlbt[{_n}_{type[0]}]
+    attr: [*sve-unstable]
+    doc: Add long (bottom + top)
+    arguments: ["op1: {sve_type[1]}", "op2: {sve_type[1]}"]
+    return_type: "{sve_type[0]}"
+    types:
+      - [i16, i8]
+      - [i32, i16]
+      - [i64, i32]
+    assert_instr: ["{type_kind[0].su}addlbt"]
+    n_variant_op: op2
+    compose:
+      - LLVMLink: { name: "{type_kind[0].su}addlbt.{sve_type[0]}" }
+
+  - name: svaddlt[{_n}_{type[0]}]
+    attr: [*sve-unstable]
+    doc: Add long (top)
+    arguments: ["op1: {sve_type[1]}", "op2: {sve_type[1]}"]
+    return_type: "{sve_type[0]}"
+    types:
+      - [i16, i8]
+      - [i32, i16]
+      - [i64, i32]
+      - [u16, u8]
+      - [u32, u16]
+      - [u64, u32]
+    assert_instr: ["{type_kind[0].su}addlt"]
+    n_variant_op: op2
+    compose:
+      - LLVMLink: { name: "{type_kind[0].su}addlt.{sve_type[0]}" }
+
+  - name: svaddwb[{_n}_{type[0]}]
+    attr: [*sve-unstable]
+    doc: Add wide (bottom)
+    arguments: ["op1: {sve_type[0]}", "op2: {sve_type[1]}"]
+    return_type: "{sve_type[0]}"
+    types:
+      - [i16, i8]
+      - [i32, i16]
+      - [i64, i32]
+      - [u16, u8]
+      - [u32, u16]
+      - [u64, u32]
+    assert_instr: ["{type_kind[0].su}addwb"]
+    n_variant_op: op2
+    compose:
+      - LLVMLink: { name: "{type_kind[0].su}addwb.{sve_type[0]}" }
+
+  - name: svaddwt[{_n}_{type[0]}]
+    attr: [*sve-unstable]
+    doc: Add wide (top)
+    arguments: ["op1: {sve_type[0]}", "op2: {sve_type[1]}"]
+    return_type: "{sve_type[0]}"
+    types:
+      - [i16, i8]
+      - [i32, i16]
+      - [i64, i32]
+      - [u16, u8]
+      - [u32, u16]
+      - [u64, u32]
+    assert_instr: ["{type_kind[0].su}addwt"]
+    n_variant_op: op2
+    compose:
+      - LLVMLink: { name: "{type_kind[0].su}addwt.{sve_type[0]}" }
+
+  - name: svlogb[_{type[0]}]{_mxz}
+    attr: [*sve-unstable]
+    doc: Base 2 logarithm as integer
+    arguments:
+      ["inactive: {sve_type[1]}", "pg: {predicate[0]}", "op: {sve_type[0]}"]
+    return_type: "{sve_type[1]}"
+    types: [[f32, i32], [f64, i64]]
+    assert_instr: [flogb]
+    zeroing_method: { drop: inactive }
+    compose:
+      - LLVMLink: { name: "flogb.{sve_type[0]}" }
+
+  - name: svpmul[{_n}_{type}]
+    attr: [*sve-unstable]
+    doc: Polynomial multiply
+    arguments: ["op1: {sve_type}", "op2: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [u8]
+    assert_instr: [pmul]
+    n_variant_op: op2
+    compose:
+      - LLVMLink: { name: "pmul.{sve_type}" }
+
+  - name: svpmullb_pair[{_n}_{type}]
+    attr: [*sve-unstable]
+    doc: Polynomial multiply long (bottom)
+    target_features: [sve2-aes]
+    arguments: ["op1: {sve_type}", "op2: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [u8, u32, u64]
+    assert_instr: [pmullb]
+    n_variant_op: op2
+    compose:
+      - LLVMLink: { name: "pmullb.pair.{sve_type}" }
+
+  - name: svpmullb[{_n}_{type[0]}]
+    attr: [*sve-unstable]
+    doc: Polynomial multiply long (bottom)
+    target_features: [sve2-aes]
+    arguments: ["op1: {sve_type[1]}", "op2: {sve_type[1]}"]
+    return_type: "{sve_type[0]}"
+    types: [[u16, u8], [u64, u32]]
+    assert_instr: [pmullb]
+    n_variant_op: op2
+    compose:
+      - FnCall:
+          - "crate::intrinsics::transmute_unchecked"
+          - [FnCall: ["svpmullb_pair_{type[1]}", [$op1, $op2]]]
+          - []
+          - true
+
+  - name: svpmullt_pair[{_n}_{type}]
+    attr: [*sve-unstable]
+    doc: Polynomial multiply long (top)
+    target_features: [sve2-aes]
+    arguments: ["op1: {sve_type}", "op2: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [u8, u32, u64]
+    assert_instr: [pmullt]
+    n_variant_op: op2
+    compose:
+      - LLVMLink: { name: "pmullt.pair.{sve_type}" }
+
+  - name: svpmullt[{_n}_{type[0]}]
+    attr: [*sve-unstable]
+    doc: Polynomial multiply long (top)
+    target_features: [sve2-aes]
+    arguments: ["op1: {sve_type[1]}", "op2: {sve_type[1]}"]
+    return_type: "{sve_type[0]}"
+    types: [[u16, u8], [u64, u32]]
+    assert_instr: [pmullt]
+    n_variant_op: op2
+    compose:
+      - FnCall:
+          - "crate::intrinsics::transmute_unchecked"
+          - [FnCall: ["svpmullt_pair_{type[1]}", [$op1, $op2]]]
+          - []
+          - true
+
+  - name: svaesd[_{type}]
+    attr: [*sve-unstable]
+    doc: AES single round decryption
+    target_features: [sve2-aes]
+    arguments: ["op1: {sve_type}", "op2: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [u8]
+    assert_instr: [aesd]
+    compose:
+      - LLVMLink: { name: "aesd" }
+
+  - name: svaese[_{type}]
+    attr: [*sve-unstable]
+    doc: AES single round encryption
+    target_features: [sve2-aes]
+    arguments: ["op1: {sve_type}", "op2: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [u8]
+    assert_instr: [aese]
+    compose:
+      - LLVMLink: { name: "aese" }
+
+  - name: svaesmc[_{type}]
+    attr: [*sve-unstable]
+    doc: AES mix columns
+    target_features: [sve2-aes]
+    arguments: ["op: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [u8]
+    assert_instr: [aesmc]
+    compose:
+      - LLVMLink: { name: "aesmc" }
+
+  - name: svaesimc[_{type}]
+    attr: [*sve-unstable]
+    doc: AES inverse mix columns
+    target_features: [sve2-aes]
+    arguments: ["op: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [u8]
+    assert_instr: [aesimc]
+    compose:
+      - LLVMLink: { name: "aesimc" }
+
+  - name: svsm4e[_{type}]
+    attr: [*sve-unstable]
+    doc: SM4 encryption and decryption
+    target_features: [sve2-sm4]
+    arguments: ["op1: {sve_type}", "op2: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [u32]
+    assert_instr: [sm4e]
+    compose:
+      - LLVMLink: { name: "sm4e" }
+
+  - name: svsm4ekey[_{type}]
+    attr: [*sve-unstable]
+    doc: SM4 key updates
+    target_features: [sve2-sm4]
+    arguments: ["op1: {sve_type}", "op2: {sve_type}"]
+    return_type: "{sve_type}"
+    types: [u32]
+    assert_instr: [sm4ekey]
+    compose:
+      - LLVMLink: { name: "sm4ekey" }
diff --git a/crates/stdarch-gen-arm/src/big_endian.rs b/crates/stdarch-gen-arm/src/big_endian.rs
index b982ff53ec..f024ca074e 100644
--- a/crates/stdarch-gen-arm/src/big_endian.rs
+++ b/crates/stdarch-gen-arm/src/big_endian.rs
@@ -15,9 +15,9 @@ fn create_single_wild_string(name: &str) -> WildString {
 /// Creates an Identifier with name `name` with no wildcards. This, for example,
 /// can be used to create variables, function names or arbitrary input. Is is
 /// extremely flexible.
-pub fn create_symbol_identifier(arbitrary_string: &str) -> Expression {
+pub fn create_symbol_identifier(arbitrary_string: &str, kind: IdentifierType) -> Expression {
     let identifier_name = create_single_wild_string(arbitrary_string);
-    Expression::Identifier(identifier_name, IdentifierType::Symbol)
+    Expression::Identifier(identifier_name, kind)
 }
 
 /// To compose the simd_shuffle! call we need:
@@ -101,7 +101,6 @@ pub fn make_variable_mutable(variable_name: &str, type_kind: &TypeKind) -> Expre
 fn create_shuffle_internal(
     variable_name: &String,
     type_kind: &TypeKind,
-    fmt_tuple: fn(variable_name: &String, idx: u32, array_lanes: &String) -> String,
     fmt: fn(variable_name: &String, type_kind: &TypeKind, array_lanes: &String) -> String,
 ) -> Option<Expression> {
     let TypeKind::Vector(vector_type) = type_kind else {
@@ -120,14 +119,21 @@ fn create_shuffle_internal(
 
         /* <var_name>.idx = simd_shuffle!(<var_name>.idx, <var_name>.idx, [<indexes>]) */
         for idx in 0..tuple_count {
-            let formatted = fmt_tuple(variable_name, idx, &array_lanes);
+            let formatted =
+                create_assigned_tuple_shuffle_call_fmt(variable_name, idx, &array_lanes);
             string_builder += formatted.as_str();
         }
-        Some(create_symbol_identifier(&string_builder))
+        Some(create_symbol_identifier(
+            &string_builder,
+            IdentifierType::UnsafeSymbol,
+        ))
     } else {
         /* Generate a list of shuffles for each tuple */
         let expression = fmt(variable_name, type_kind, &array_lanes);
-        Some(create_symbol_identifier(&expression))
+        Some(create_symbol_identifier(
+            &expression,
+            IdentifierType::UnsafeSymbol,
+        ))
     }
 }
 
@@ -137,7 +143,7 @@ fn create_assigned_tuple_shuffle_call_fmt(
     array_lanes: &String,
 ) -> String {
     format!(
-        "{variable_name}.{idx} = unsafe {{ simd_shuffle!({variable_name}.{idx}, {variable_name}.{idx}, {array_lanes}) }};\n"
+        "{variable_name}.{idx} = simd_shuffle!({variable_name}.{idx}, {variable_name}.{idx}, {array_lanes});\n"
     )
 }
 
@@ -147,7 +153,7 @@ fn create_assigned_shuffle_call_fmt(
     array_lanes: &String,
 ) -> String {
     format!(
-        "let {variable_name}: {type_kind} = unsafe {{ simd_shuffle!({variable_name}, {variable_name}, {array_lanes}) }}"
+        "let {variable_name}: {type_kind} = simd_shuffle!({variable_name}, {variable_name}, {array_lanes})"
     )
 }
 
@@ -165,20 +171,10 @@ pub fn create_assigned_shuffle_call(
     variable_name: &String,
     type_kind: &TypeKind,
 ) -> Option<Expression> {
-    create_shuffle_internal(
-        variable_name,
-        type_kind,
-        create_assigned_tuple_shuffle_call_fmt,
-        create_assigned_shuffle_call_fmt,
-    )
+    create_shuffle_internal(variable_name, type_kind, create_assigned_shuffle_call_fmt)
 }
 
 /// Create a `simd_shuffle!(<...>, [...])` call
 pub fn create_shuffle_call(variable_name: &String, type_kind: &TypeKind) -> Option<Expression> {
-    create_shuffle_internal(
-        variable_name,
-        type_kind,
-        create_assigned_tuple_shuffle_call_fmt,
-        create_shuffle_call_fmt,
-    )
+    create_shuffle_internal(variable_name, type_kind, create_shuffle_call_fmt)
 }
diff --git a/crates/stdarch-gen-arm/src/context.rs b/crates/stdarch-gen-arm/src/context.rs
index 9b8eb8e8b9..85342a1804 100644
--- a/crates/stdarch-gen-arm/src/context.rs
+++ b/crates/stdarch-gen-arm/src/context.rs
@@ -43,6 +43,10 @@ pub struct GlobalContext {
     /// Should all LLVM wrappers convert their arguments to a signed type
     #[serde(default)]
     pub auto_llvm_sign_conversion: bool,
+
+    /// Should SVE load/store tests be generated?
+    #[serde(default)]
+    pub generate_load_store_tests: bool,
 }
 
 /// Context of an intrinsic group
@@ -218,7 +222,7 @@ impl LocalContext {
             } => Ok(Expression::MacroCall(
                 "static_assert_range".to_string(),
                 format!(
-                    "{variable}, {min}, {max}",
+                    "{variable}, {min}..={max}",
                     min = range.start(),
                     max = range.end()
                 ),
@@ -246,7 +250,7 @@ impl LocalContext {
                             |bitsize| Ok(higher_limit / bitsize - 1))?;
                     Ok(Expression::MacroCall(
                         "static_assert_range".to_string(),
-                        format!("{variable}, 0, {max}"),
+                        format!("{variable}, 0..={max}"),
                     ))
                 } else {
                     Err(format!(
diff --git a/crates/stdarch-gen-arm/src/expression.rs b/crates/stdarch-gen-arm/src/expression.rs
index d5644ef27d..daaf7ee689 100644
--- a/crates/stdarch-gen-arm/src/expression.rs
+++ b/crates/stdarch-gen-arm/src/expression.rs
@@ -23,6 +23,7 @@ use crate::{
 pub enum IdentifierType {
     Variable,
     Symbol,
+    UnsafeSymbol,
 }
 
 #[derive(Debug, Clone, Serialize, Deserialize)]
@@ -65,7 +66,11 @@ impl FnCall {
     }
 
     pub fn is_expected_call(&self, fn_call_name: &str) -> bool {
-        if let Expression::Identifier(fn_name, IdentifierType::Symbol) = self.0.as_ref() {
+        if let Expression::Identifier(
+            fn_name,
+            IdentifierType::Symbol | IdentifierType::UnsafeSymbol,
+        ) = self.0.as_ref()
+        {
             fn_name.to_string() == fn_call_name
         } else {
             false
@@ -108,6 +113,8 @@ impl ToTokens for FnCall {
 pub enum Expression {
     /// (Re)Defines a variable
     Let(LetVariant),
+    /// Defines a const
+    Const(WildString, TypeKind, Box<Expression>),
     /// Performs a variable assignment operation
     Assign(String, Box<Expression>),
     /// Performs a macro call
@@ -141,8 +148,6 @@ pub enum Expression {
     LLVMLink(LLVMLink),
     /// Casts the given expression to the specified (unchecked) type
     CastAs(Box<Expression>, String),
-    /// Returns the LLVM `undef` symbol
-    SvUndef,
     /// Multiplication
     Multiply(Box<Expression>, Box<Expression>),
     /// Xor
@@ -169,6 +174,7 @@ impl Expression {
                 | LetVariant::WithType(_, _, ex)
                 | LetVariant::MutWithType(_, _, ex),
             ) => ex.pre_build(ctx),
+            Self::Const(_, _, ex) => ex.pre_build(ctx),
             Self::CastAs(ex, _) => ex.pre_build(ctx),
             Self::Multiply(lhs, rhs) | Self::Xor(lhs, rhs) => {
                 lhs.pre_build(ctx)?;
@@ -245,6 +251,15 @@ impl Expression {
                 );
                 ex.build(intrinsic, ctx)
             }
+            Self::Const(var_name, ty, ex) => {
+                var_name.build_acle(ctx.local)?;
+                ctx.local.variables.insert(
+                    var_name.to_string(),
+                    (ty.to_owned(), VariableType::Internal),
+                );
+                ex.build(intrinsic, ctx)
+            }
+
             Self::CastAs(ex, _) => ex.build(intrinsic, ctx),
             Self::Multiply(lhs, rhs) | Self::Xor(lhs, rhs) => {
                 lhs.build(intrinsic, ctx)?;
@@ -283,18 +298,20 @@ impl Expression {
     ///  - An unnecessary `unsafe` is a warning, made into an error by the CI's `-D warnings`.
     ///
     /// This **panics** if it encounters an expression that shouldn't appear in a safe function at
-    /// all (such as `SvUndef`).
+    /// all.
     pub fn requires_unsafe_wrapper(&self, ctx_fn: &str) -> bool {
         match self {
             // The call will need to be unsafe, but the declaration does not.
             Self::LLVMLink(..) => false,
-            // Identifiers, literals and type names are never unsafe.
-            Self::Identifier(..) => false,
+            // literals and type names are never unsafe.
             Self::IntConstant(..) => false,
             Self::FloatConstant(..) => false,
             Self::BoolConstant(..) => false,
             Self::Type(..) => false,
             Self::ConvertConst(..) => false,
+            // Only unsafe `Symbol` identifiers are unsafe
+            Self::Identifier(_, IdentifierType::UnsafeSymbol) => true,
+            Self::Identifier(..) => false,
             // Nested structures that aren't inherently unsafe, but could contain other expressions
             // that might be.
             Self::Assign(_var, exp) => exp.requires_unsafe_wrapper(ctx_fn),
@@ -303,6 +320,7 @@ impl Expression {
                 | LetVariant::WithType(_, _, exp)
                 | LetVariant::MutWithType(_, _, exp),
             ) => exp.requires_unsafe_wrapper(ctx_fn),
+            Self::Const(_, _, exp) => exp.requires_unsafe_wrapper(ctx_fn),
             Self::Array(exps) => exps.iter().any(|exp| exp.requires_unsafe_wrapper(ctx_fn)),
             Self::Multiply(lhs, rhs) | Self::Xor(lhs, rhs) => {
                 lhs.requires_unsafe_wrapper(ctx_fn) || rhs.requires_unsafe_wrapper(ctx_fn)
@@ -334,9 +352,6 @@ impl Expression {
             },
             // We only use macros to check const generics (using static assertions).
             Self::MacroCall(_name, _args) => false,
-            // Materialising uninitialised values is always unsafe, and we avoid it in safe
-            // functions.
-            Self::SvUndef => panic!("Refusing to wrap unsafe SvUndef in safe function '{ctx_fn}'."),
             // Variants that aren't tokenised. We shouldn't encounter these here.
             Self::MatchKind(..) => {
                 unimplemented!("The unsafety of {self:?} cannot be determined in '{ctx_fn}'.")
@@ -377,9 +392,7 @@ impl FromStr for Expression {
         static MACRO_RE: LazyLock<Regex> =
             LazyLock::new(|| Regex::new(r"^(?P<name>[\w\d_]+)!\((?P<ex>.*?)\);?$").unwrap());
 
-        if s == "SvUndef" {
-            Ok(Expression::SvUndef)
-        } else if MACRO_RE.is_match(s) {
+        if MACRO_RE.is_match(s) {
             let c = MACRO_RE.captures(s).unwrap();
             let ex = c["ex"].to_string();
             let _: TokenStream = ex
@@ -462,6 +475,10 @@ impl ToTokens for Expression {
                 let var_ident = format_ident!("{}", var_name.to_string());
                 tokens.append_all(quote! { let mut #var_ident: #ty = #exp })
             }
+            Self::Const(var_name, ty, exp) => {
+                let var_ident = format_ident!("{}", var_name.to_string());
+                tokens.append_all(quote! { const #var_ident: #ty = #exp })
+            }
             Self::Assign(var_name, exp) => {
                 /* If we are dereferencing a variable to assign a value \
                  * the 'format_ident!' macro does not like the asterix */
@@ -516,7 +533,6 @@ impl ToTokens for Expression {
                 let ty: TokenStream = ty.parse().expect("invalid syntax");
                 tokens.append_all(quote! { #ex as #ty })
             }
-            Self::SvUndef => tokens.append_all(quote! { simd_reinterpret(()) }),
             Self::Multiply(lhs, rhs) => tokens.append_all(quote! { #lhs * #rhs }),
             Self::Xor(lhs, rhs) => tokens.append_all(quote! { #lhs ^ #rhs }),
             Self::Type(ty) => ty.to_tokens(tokens),
diff --git a/crates/stdarch-gen-arm/src/fn_suffix.rs b/crates/stdarch-gen-arm/src/fn_suffix.rs
index 26c156ae17..6fba3dc744 100644
--- a/crates/stdarch-gen-arm/src/fn_suffix.rs
+++ b/crates/stdarch-gen-arm/src/fn_suffix.rs
@@ -188,7 +188,7 @@ impl FromStr for SuffixKind {
             "rot90_lane" => Ok(SuffixKind::Rot90Lane),
             "rot90_laneq" => Ok(SuffixKind::Rot90LaneQ),
             "rot180" => Ok(SuffixKind::Rot180),
-            "rot180_lane" => Ok(SuffixKind::Rot180LaneQ),
+            "rot180_lane" => Ok(SuffixKind::Rot180Lane),
             "rot180_laneq" => Ok(SuffixKind::Rot180LaneQ),
             "u" => Ok(SuffixKind::Unsigned),
             "nox" => Ok(SuffixKind::NoX),
diff --git a/crates/stdarch-gen-arm/src/intrinsic.rs b/crates/stdarch-gen-arm/src/intrinsic.rs
index efaa9e1418..72fb97fee1 100644
--- a/crates/stdarch-gen-arm/src/intrinsic.rs
+++ b/crates/stdarch-gen-arm/src/intrinsic.rs
@@ -550,7 +550,7 @@ impl LLVMLink {
 
     /// Alters all the unsigned types from the signature. This is required where
     /// a signed and unsigned variant require the same binding to an exposed
-    /// LLVM instrinsic.
+    /// LLVM intrinsic.
     pub fn sanitise_uints(&mut self) {
         let transform = |tk: &mut TypeKind| {
             if let Some(BaseType::Sized(BaseTypeKind::UInt, size)) = tk.base_type() {
@@ -630,7 +630,7 @@ impl LLVMLink {
 
                     match (scope, kind.base_type()) {
                         (Argument, Some(Sized(Bool, bitsize))) if *bitsize != 8 => {
-                            Ok(convert("into", arg))
+                            Ok(convert("sve_into", arg))
                         }
                         (Argument, Some(Sized(UInt, _) | Unsized(UInt))) => {
                             if ctx.global.auto_llvm_sign_conversion {
@@ -647,27 +647,26 @@ impl LLVMLink {
             })
             .try_collect()?;
 
-        let return_type_conversion = if !ctx.global.auto_llvm_sign_conversion {
-            None
-        } else {
-            self.signature
-                .as_ref()
-                .and_then(|sig| sig.return_type.as_ref())
-                .and_then(|ty| {
-                    if let Some(Sized(Bool, bitsize)) = ty.base_type() {
-                        (*bitsize != 8).then_some(Bool)
-                    } else if let Some(Sized(UInt, _) | Unsized(UInt)) = ty.base_type() {
-                        Some(UInt)
-                    } else {
-                        None
-                    }
-                })
-        };
+        let return_type_conversion = self
+            .signature
+            .as_ref()
+            .and_then(|sig| sig.return_type.as_ref())
+            .and_then(|ty| {
+                if let Some(Sized(Bool, bitsize)) = ty.base_type() {
+                    (*bitsize != 8).then_some(Bool)
+                } else if let Some(Sized(UInt, _) | Unsized(UInt)) = ty.base_type() {
+                    Some(UInt)
+                } else {
+                    None
+                }
+            });
 
         let fn_call = Expression::FnCall(fn_call);
         match return_type_conversion {
-            Some(Bool) => Ok(convert("into", fn_call)),
-            Some(UInt) => Ok(convert("as_unsigned", fn_call)),
+            Some(Bool) => Ok(convert("sve_into", fn_call)),
+            Some(UInt) if ctx.global.auto_llvm_sign_conversion => {
+                Ok(convert("as_unsigned", fn_call))
+            }
             _ => Ok(fn_call),
         }
     }
@@ -807,6 +806,7 @@ pub enum UnsafetyComment {
     NonTemporal,
     Neon,
     NoProvenance(String),
+    PointerWrite(String),
 }
 
 #[derive(Debug, Clone, Default, Serialize, Deserialize)]
@@ -840,7 +840,7 @@ impl fmt::Display for UnsafetyComment {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         match self {
             Self::Custom(s) => s.fmt(f),
-            Self::Neon => write!(f, "Neon instrinsic unsafe"),
+            Self::Neon => write!(f, "Neon intrinsic unsafe"),
             Self::Uninitialized => write!(
                 f,
                 "This creates an uninitialized value, and may be unsound (like \
@@ -872,8 +872,12 @@ impl fmt::Display for UnsafetyComment {
             Self::NoProvenance(arg) => write!(
                 f,
                 "Addresses passed in `{arg}` lack provenance, so this is similar to using a \
-                `usize as ptr` cast (or [`core::ptr::from_exposed_addr`]) on each lane before \
-                using it."
+                `usize as ptr` cast (or [`core::ptr::with_exposed_provenance`]) on each lane \
+                before  using it."
+            ),
+            Self::PointerWrite(arg) => write!(
+                f,
+                "The pointer in `{arg}` must satisfy the requirements of [`core::ptr::write`]."
             ),
             Self::UnpredictableOnFault => write!(
                 f,
@@ -1055,23 +1059,8 @@ impl Intrinsic {
 
     /// Add a big endian implementation
     fn generate_big_endian(&self, variant: &mut Intrinsic) {
-        /* We can't always blindly reverse the bits only in certain conditions
-         * do we need a different order - thus this allows us to have the
-         * ability to do so without having to play codegolf with the yaml AST */
-        let should_reverse = {
-            if let Some(should_reverse) = variant.big_endian_inverse {
-                should_reverse
-            } else if variant.compose.len() == 1 {
-                match &variant.compose[0] {
-                    Expression::FnCall(fn_call) => fn_call.0.to_string() == "transmute",
-                    _ => false,
-                }
-            } else {
-                false
-            }
-        };
-
-        if !should_reverse {
+        // We only reverse if it was specifically requested
+        if !variant.big_endian_inverse.unwrap_or(false) {
             return;
         }
 
@@ -1139,7 +1128,7 @@ impl Intrinsic {
             } else {
                 /* If we do not need to reorder anything then immediately add
                  * the expressions from the big_endian_expressions and
-                 * concatinate the compose vector */
+                 * concatenate the compose vector */
                 variant.big_endian_compose.extend(big_endian_expressions);
                 variant
                     .big_endian_compose
@@ -1157,11 +1146,11 @@ impl Intrinsic {
 
             /* If we do not create a shuffle call we do not need modify the
              * return value and append to the big endian ast array. A bit confusing
-             * as in code we are making the final call before caputuring the return
+             * as in code we are making the final call before capturing the return
              * value of the intrinsic that has been called.*/
             let ret_val_name = "ret_val".to_string();
             if let Some(simd_shuffle_call) = create_shuffle_call(&ret_val_name, return_type) {
-                /* There is a possibility that the funcion arguments did not
+                /* There is a possibility that the function arguments did not
                  * require big endian treatment, thus we need to now add the
                  * original function body before appending the return value.*/
                 if variant.big_endian_compose.is_empty() {
@@ -1187,9 +1176,10 @@ impl Intrinsic {
                      * re-assigning each tuple however those generated calls do
                      * not make the parent function return. So we add the return
                      * value here */
-                    variant
-                        .big_endian_compose
-                        .push(create_symbol_identifier(&ret_val_name));
+                    variant.big_endian_compose.push(create_symbol_identifier(
+                        &ret_val_name,
+                        IdentifierType::Symbol,
+                    ));
                 }
             }
         }
@@ -1695,8 +1685,8 @@ enum Endianness {
     NA,
 }
 
-/// Based on the endianess will create the appropriate intrinsic, or simply
-/// create the desired intrinsic without any endianess
+/// Based on the endianness will create the appropriate intrinsic, or simply
+/// create the desired intrinsic without any endianness
 fn create_tokens(intrinsic: &Intrinsic, endianness: Endianness, tokens: &mut TokenStream) {
     let signature = &intrinsic.signature;
     let fn_name = signature.fn_name().to_string();
@@ -1730,7 +1720,8 @@ fn create_tokens(intrinsic: &Intrinsic, endianness: Endianness, tokens: &mut Tok
             "{fn_name} needs to be private, or to have documentation."
         );
         assert!(
-            !safety.has_doc_comments(),
+            !safety.has_doc_comments()
+                || matches!(intrinsic.visibility, FunctionVisibility::Private),
             "{fn_name} needs a documentation section for its safety comments."
         );
     }
diff --git a/crates/stdarch-gen-arm/src/load_store_tests.rs b/crates/stdarch-gen-arm/src/load_store_tests.rs
index 0f4de83dac..cbd5df50de 100644
--- a/crates/stdarch-gen-arm/src/load_store_tests.rs
+++ b/crates/stdarch-gen-arm/src/load_store_tests.rs
@@ -141,13 +141,6 @@ fn generate_single_test(
         }
     }
 
-    if fn_name.starts_with("svldff1") && fn_name.contains("gather") {
-        // TODO: We can remove this check when first-faulting gathers are fixed in CI's QEMU
-        // https://gitlab.com/qemu-project/qemu/-/issues/1612
-        println!("Skipping test for {fn_name}");
-        return Ok(quote!());
-    }
-
     let fn_ident = format_ident!("{fn_name}");
     let test_name = format_ident!(
         "test_{fn_name}{}",
diff --git a/crates/stdarch-gen-arm/src/main.rs b/crates/stdarch-gen-arm/src/main.rs
index 9bf7d0981d..b7e2aa416f 100644
--- a/crates/stdarch-gen-arm/src/main.rs
+++ b/crates/stdarch-gen-arm/src/main.rs
@@ -54,7 +54,7 @@ fn main() -> Result<(), String> {
                     vv.into_iter().flatten().collect_vec()
                 })?;
 
-            if filepath.ends_with("sve.spec.yml") || filepath.ends_with("sve2.spec.yml") {
+            if input.ctx.generate_load_store_tests {
                 let loads = intrinsics.iter()
                     .filter_map(|i| {
                         if matches!(i.test, Test::Load(..)) {
@@ -139,6 +139,7 @@ fn parse_args() -> Vec<(PathBuf, Option<PathBuf>)> {
         .into_iter()
         .filter_map(Result::ok)
         .filter(|f| f.file_type().is_file())
+        .filter(|f| f.file_name().to_string_lossy().ends_with(".yml"))
         .map(|f| (f.into_path(), out_dir.clone()))
         .collect()
 }
diff --git a/crates/stdarch-gen-arm/src/typekinds.rs b/crates/stdarch-gen-arm/src/typekinds.rs
index bd47ff2bd1..c3aa22294d 100644
--- a/crates/stdarch-gen-arm/src/typekinds.rs
+++ b/crates/stdarch-gen-arm/src/typekinds.rs
@@ -289,9 +289,9 @@ impl TypeKind {
                     (
                         BaseType::Sized(Float | Int | UInt, _),
                         BaseType::Sized(Float | Int | UInt, _),
-                    ) => Some(FnCall::new_expression(
+                    ) => Some(FnCall::new_unsafe_expression(
                         // Conversions between float and (u)int, or where the lane size changes.
-                        "simd_reinterpret".parse().unwrap(),
+                        "transmute_unchecked".parse().unwrap(),
                         vec![expr.into()],
                     )),
                     _ => None,
diff --git a/crates/stdarch-gen-hexagon-scalar/Cargo.toml b/crates/stdarch-gen-hexagon-scalar/Cargo.toml
new file mode 100644
index 0000000000..04bee944f4
--- /dev/null
+++ b/crates/stdarch-gen-hexagon-scalar/Cargo.toml
@@ -0,0 +1,9 @@
+[package]
+name = "stdarch-gen-hexagon-scalar"
+version = "0.1.0"
+authors = ["The Rust Project Developers"]
+license = "MIT OR Apache-2.0"
+edition = "2021"
+
+[dependencies]
+regex = "1.10"
diff --git a/crates/stdarch-gen-hexagon-scalar/hexagon_protos.h b/crates/stdarch-gen-hexagon-scalar/hexagon_protos.h
new file mode 100644
index 0000000000..2642f3c842
--- /dev/null
+++ b/crates/stdarch-gen-hexagon-scalar/hexagon_protos.h
@@ -0,0 +1,8439 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// Automatically generated file, do not edit!
+//===----------------------------------------------------------------------===//
+
+
+
+#ifndef __HEXAGON_PROTOS_H_
+#define __HEXAGON_PROTOS_H_ 1
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=abs(Rs32)
+   C Intrinsic Prototype: Word32 Q6_R_abs_R(Word32 Rs)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_abs_R __builtin_HEXAGON_A2_abs
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=abs(Rss32)
+   C Intrinsic Prototype: Word64 Q6_P_abs_P(Word64 Rss)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_abs_P __builtin_HEXAGON_A2_absp
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=abs(Rs32):sat
+   C Intrinsic Prototype: Word32 Q6_R_abs_R_sat(Word32 Rs)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_abs_R_sat __builtin_HEXAGON_A2_abssat
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=add(Rs32,Rt32)
+   C Intrinsic Prototype: Word32 Q6_R_add_RR(Word32 Rs, Word32 Rt)
+   Instruction Type:      ALU32_3op
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_R_add_RR __builtin_HEXAGON_A2_add
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=add(Rt32.h,Rs32.h):<<16
+   C Intrinsic Prototype: Word32 Q6_R_add_RhRh_s16(Word32 Rt, Word32 Rs)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_add_RhRh_s16 __builtin_HEXAGON_A2_addh_h16_hh
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=add(Rt32.h,Rs32.l):<<16
+   C Intrinsic Prototype: Word32 Q6_R_add_RhRl_s16(Word32 Rt, Word32 Rs)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_add_RhRl_s16 __builtin_HEXAGON_A2_addh_h16_hl
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=add(Rt32.l,Rs32.h):<<16
+   C Intrinsic Prototype: Word32 Q6_R_add_RlRh_s16(Word32 Rt, Word32 Rs)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_add_RlRh_s16 __builtin_HEXAGON_A2_addh_h16_lh
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=add(Rt32.l,Rs32.l):<<16
+   C Intrinsic Prototype: Word32 Q6_R_add_RlRl_s16(Word32 Rt, Word32 Rs)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_add_RlRl_s16 __builtin_HEXAGON_A2_addh_h16_ll
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=add(Rt32.h,Rs32.h):sat:<<16
+   C Intrinsic Prototype: Word32 Q6_R_add_RhRh_sat_s16(Word32 Rt, Word32 Rs)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_add_RhRh_sat_s16 __builtin_HEXAGON_A2_addh_h16_sat_hh
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=add(Rt32.h,Rs32.l):sat:<<16
+   C Intrinsic Prototype: Word32 Q6_R_add_RhRl_sat_s16(Word32 Rt, Word32 Rs)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_add_RhRl_sat_s16 __builtin_HEXAGON_A2_addh_h16_sat_hl
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=add(Rt32.l,Rs32.h):sat:<<16
+   C Intrinsic Prototype: Word32 Q6_R_add_RlRh_sat_s16(Word32 Rt, Word32 Rs)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_add_RlRh_sat_s16 __builtin_HEXAGON_A2_addh_h16_sat_lh
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=add(Rt32.l,Rs32.l):sat:<<16
+   C Intrinsic Prototype: Word32 Q6_R_add_RlRl_sat_s16(Word32 Rt, Word32 Rs)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_add_RlRl_sat_s16 __builtin_HEXAGON_A2_addh_h16_sat_ll
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=add(Rt32.l,Rs32.h)
+   C Intrinsic Prototype: Word32 Q6_R_add_RlRh(Word32 Rt, Word32 Rs)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_add_RlRh __builtin_HEXAGON_A2_addh_l16_hl
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=add(Rt32.l,Rs32.l)
+   C Intrinsic Prototype: Word32 Q6_R_add_RlRl(Word32 Rt, Word32 Rs)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_add_RlRl __builtin_HEXAGON_A2_addh_l16_ll
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=add(Rt32.l,Rs32.h):sat
+   C Intrinsic Prototype: Word32 Q6_R_add_RlRh_sat(Word32 Rt, Word32 Rs)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_add_RlRh_sat __builtin_HEXAGON_A2_addh_l16_sat_hl
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=add(Rt32.l,Rs32.l):sat
+   C Intrinsic Prototype: Word32 Q6_R_add_RlRl_sat(Word32 Rt, Word32 Rs)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_add_RlRl_sat __builtin_HEXAGON_A2_addh_l16_sat_ll
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=add(Rs32,#s16)
+   C Intrinsic Prototype: Word32 Q6_R_add_RI(Word32 Rs, Word32 Is16)
+   Instruction Type:      ALU32_ADDI
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_R_add_RI __builtin_HEXAGON_A2_addi
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=add(Rss32,Rtt32)
+   C Intrinsic Prototype: Word64 Q6_P_add_PP(Word64 Rss, Word64 Rtt)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_add_PP __builtin_HEXAGON_A2_addp
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=add(Rss32,Rtt32):sat
+   C Intrinsic Prototype: Word64 Q6_P_add_PP_sat(Word64 Rss, Word64 Rtt)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_add_PP_sat __builtin_HEXAGON_A2_addpsat
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=add(Rs32,Rt32):sat
+   C Intrinsic Prototype: Word32 Q6_R_add_RR_sat(Word32 Rs, Word32 Rt)
+   Instruction Type:      ALU32_3op
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_R_add_RR_sat __builtin_HEXAGON_A2_addsat
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=add(Rs32,Rtt32)
+   C Intrinsic Prototype: Word64 Q6_P_add_RP(Word32 Rs, Word64 Rtt)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_P_add_RP __builtin_HEXAGON_A2_addsp
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=and(Rs32,Rt32)
+   C Intrinsic Prototype: Word32 Q6_R_and_RR(Word32 Rs, Word32 Rt)
+   Instruction Type:      ALU32_3op
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_R_and_RR __builtin_HEXAGON_A2_and
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=and(Rs32,#s10)
+   C Intrinsic Prototype: Word32 Q6_R_and_RI(Word32 Rs, Word32 Is10)
+   Instruction Type:      ALU32_2op
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_R_and_RI __builtin_HEXAGON_A2_andir
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=and(Rss32,Rtt32)
+   C Intrinsic Prototype: Word64 Q6_P_and_PP(Word64 Rss, Word64 Rtt)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_and_PP __builtin_HEXAGON_A2_andp
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=aslh(Rs32)
+   C Intrinsic Prototype: Word32 Q6_R_aslh_R(Word32 Rs)
+   Instruction Type:      ALU32_2op
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_R_aslh_R __builtin_HEXAGON_A2_aslh
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=asrh(Rs32)
+   C Intrinsic Prototype: Word32 Q6_R_asrh_R(Word32 Rs)
+   Instruction Type:      ALU32_2op
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_R_asrh_R __builtin_HEXAGON_A2_asrh
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=combine(Rt32.h,Rs32.h)
+   C Intrinsic Prototype: Word32 Q6_R_combine_RhRh(Word32 Rt, Word32 Rs)
+   Instruction Type:      ALU32_3op
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_R_combine_RhRh __builtin_HEXAGON_A2_combine_hh
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=combine(Rt32.h,Rs32.l)
+   C Intrinsic Prototype: Word32 Q6_R_combine_RhRl(Word32 Rt, Word32 Rs)
+   Instruction Type:      ALU32_3op
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_R_combine_RhRl __builtin_HEXAGON_A2_combine_hl
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=combine(Rt32.l,Rs32.h)
+   C Intrinsic Prototype: Word32 Q6_R_combine_RlRh(Word32 Rt, Word32 Rs)
+   Instruction Type:      ALU32_3op
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_R_combine_RlRh __builtin_HEXAGON_A2_combine_lh
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=combine(Rt32.l,Rs32.l)
+   C Intrinsic Prototype: Word32 Q6_R_combine_RlRl(Word32 Rt, Word32 Rs)
+   Instruction Type:      ALU32_3op
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_R_combine_RlRl __builtin_HEXAGON_A2_combine_ll
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=combine(#s8,#S8)
+   C Intrinsic Prototype: Word64 Q6_P_combine_II(Word32 Is8, Word32 IS8)
+   Instruction Type:      ALU32_2op
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_P_combine_II __builtin_HEXAGON_A2_combineii
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=combine(Rs32,Rt32)
+   C Intrinsic Prototype: Word64 Q6_P_combine_RR(Word32 Rs, Word32 Rt)
+   Instruction Type:      ALU32_3op
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_P_combine_RR __builtin_HEXAGON_A2_combinew
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=max(Rs32,Rt32)
+   C Intrinsic Prototype: Word32 Q6_R_max_RR(Word32 Rs, Word32 Rt)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_max_RR __builtin_HEXAGON_A2_max
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=max(Rss32,Rtt32)
+   C Intrinsic Prototype: Word64 Q6_P_max_PP(Word64 Rss, Word64 Rtt)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_max_PP __builtin_HEXAGON_A2_maxp
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=maxu(Rs32,Rt32)
+   C Intrinsic Prototype: UWord32 Q6_R_maxu_RR(Word32 Rs, Word32 Rt)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_maxu_RR __builtin_HEXAGON_A2_maxu
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=maxu(Rss32,Rtt32)
+   C Intrinsic Prototype: UWord64 Q6_P_maxu_PP(Word64 Rss, Word64 Rtt)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_maxu_PP __builtin_HEXAGON_A2_maxup
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=min(Rt32,Rs32)
+   C Intrinsic Prototype: Word32 Q6_R_min_RR(Word32 Rt, Word32 Rs)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_min_RR __builtin_HEXAGON_A2_min
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=min(Rtt32,Rss32)
+   C Intrinsic Prototype: Word64 Q6_P_min_PP(Word64 Rtt, Word64 Rss)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_min_PP __builtin_HEXAGON_A2_minp
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=minu(Rt32,Rs32)
+   C Intrinsic Prototype: UWord32 Q6_R_minu_RR(Word32 Rt, Word32 Rs)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_minu_RR __builtin_HEXAGON_A2_minu
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=minu(Rtt32,Rss32)
+   C Intrinsic Prototype: UWord64 Q6_P_minu_PP(Word64 Rtt, Word64 Rss)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_minu_PP __builtin_HEXAGON_A2_minup
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=neg(Rs32)
+   C Intrinsic Prototype: Word32 Q6_R_neg_R(Word32 Rs)
+   Instruction Type:      ALU32_2op
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_R_neg_R __builtin_HEXAGON_A2_neg
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=neg(Rss32)
+   C Intrinsic Prototype: Word64 Q6_P_neg_P(Word64 Rss)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_neg_P __builtin_HEXAGON_A2_negp
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=neg(Rs32):sat
+   C Intrinsic Prototype: Word32 Q6_R_neg_R_sat(Word32 Rs)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_neg_R_sat __builtin_HEXAGON_A2_negsat
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=not(Rs32)
+   C Intrinsic Prototype: Word32 Q6_R_not_R(Word32 Rs)
+   Instruction Type:      ALU32_2op
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_R_not_R __builtin_HEXAGON_A2_not
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=not(Rss32)
+   C Intrinsic Prototype: Word64 Q6_P_not_P(Word64 Rss)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_not_P __builtin_HEXAGON_A2_notp
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=or(Rs32,Rt32)
+   C Intrinsic Prototype: Word32 Q6_R_or_RR(Word32 Rs, Word32 Rt)
+   Instruction Type:      ALU32_3op
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_R_or_RR __builtin_HEXAGON_A2_or
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=or(Rs32,#s10)
+   C Intrinsic Prototype: Word32 Q6_R_or_RI(Word32 Rs, Word32 Is10)
+   Instruction Type:      ALU32_2op
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_R_or_RI __builtin_HEXAGON_A2_orir
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=or(Rss32,Rtt32)
+   C Intrinsic Prototype: Word64 Q6_P_or_PP(Word64 Rss, Word64 Rtt)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_or_PP __builtin_HEXAGON_A2_orp
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=round(Rss32):sat
+   C Intrinsic Prototype: Word32 Q6_R_round_P_sat(Word64 Rss)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_round_P_sat __builtin_HEXAGON_A2_roundsat
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=sat(Rss32)
+   C Intrinsic Prototype: Word32 Q6_R_sat_P(Word64 Rss)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_sat_P __builtin_HEXAGON_A2_sat
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=satb(Rs32)
+   C Intrinsic Prototype: Word32 Q6_R_satb_R(Word32 Rs)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_satb_R __builtin_HEXAGON_A2_satb
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=sath(Rs32)
+   C Intrinsic Prototype: Word32 Q6_R_sath_R(Word32 Rs)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_sath_R __builtin_HEXAGON_A2_sath
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=satub(Rs32)
+   C Intrinsic Prototype: Word32 Q6_R_satub_R(Word32 Rs)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_satub_R __builtin_HEXAGON_A2_satub
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=satuh(Rs32)
+   C Intrinsic Prototype: Word32 Q6_R_satuh_R(Word32 Rs)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_satuh_R __builtin_HEXAGON_A2_satuh
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=sub(Rt32,Rs32)
+   C Intrinsic Prototype: Word32 Q6_R_sub_RR(Word32 Rt, Word32 Rs)
+   Instruction Type:      ALU32_3op
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_R_sub_RR __builtin_HEXAGON_A2_sub
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=sub(Rt32.h,Rs32.h):<<16
+   C Intrinsic Prototype: Word32 Q6_R_sub_RhRh_s16(Word32 Rt, Word32 Rs)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_sub_RhRh_s16 __builtin_HEXAGON_A2_subh_h16_hh
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=sub(Rt32.h,Rs32.l):<<16
+   C Intrinsic Prototype: Word32 Q6_R_sub_RhRl_s16(Word32 Rt, Word32 Rs)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_sub_RhRl_s16 __builtin_HEXAGON_A2_subh_h16_hl
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=sub(Rt32.l,Rs32.h):<<16
+   C Intrinsic Prototype: Word32 Q6_R_sub_RlRh_s16(Word32 Rt, Word32 Rs)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_sub_RlRh_s16 __builtin_HEXAGON_A2_subh_h16_lh
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=sub(Rt32.l,Rs32.l):<<16
+   C Intrinsic Prototype: Word32 Q6_R_sub_RlRl_s16(Word32 Rt, Word32 Rs)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_sub_RlRl_s16 __builtin_HEXAGON_A2_subh_h16_ll
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=sub(Rt32.h,Rs32.h):sat:<<16
+   C Intrinsic Prototype: Word32 Q6_R_sub_RhRh_sat_s16(Word32 Rt, Word32 Rs)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_sub_RhRh_sat_s16 __builtin_HEXAGON_A2_subh_h16_sat_hh
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=sub(Rt32.h,Rs32.l):sat:<<16
+   C Intrinsic Prototype: Word32 Q6_R_sub_RhRl_sat_s16(Word32 Rt, Word32 Rs)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_sub_RhRl_sat_s16 __builtin_HEXAGON_A2_subh_h16_sat_hl
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=sub(Rt32.l,Rs32.h):sat:<<16
+   C Intrinsic Prototype: Word32 Q6_R_sub_RlRh_sat_s16(Word32 Rt, Word32 Rs)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_sub_RlRh_sat_s16 __builtin_HEXAGON_A2_subh_h16_sat_lh
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=sub(Rt32.l,Rs32.l):sat:<<16
+   C Intrinsic Prototype: Word32 Q6_R_sub_RlRl_sat_s16(Word32 Rt, Word32 Rs)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_sub_RlRl_sat_s16 __builtin_HEXAGON_A2_subh_h16_sat_ll
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=sub(Rt32.l,Rs32.h)
+   C Intrinsic Prototype: Word32 Q6_R_sub_RlRh(Word32 Rt, Word32 Rs)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_sub_RlRh __builtin_HEXAGON_A2_subh_l16_hl
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=sub(Rt32.l,Rs32.l)
+   C Intrinsic Prototype: Word32 Q6_R_sub_RlRl(Word32 Rt, Word32 Rs)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_sub_RlRl __builtin_HEXAGON_A2_subh_l16_ll
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=sub(Rt32.l,Rs32.h):sat
+   C Intrinsic Prototype: Word32 Q6_R_sub_RlRh_sat(Word32 Rt, Word32 Rs)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_sub_RlRh_sat __builtin_HEXAGON_A2_subh_l16_sat_hl
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=sub(Rt32.l,Rs32.l):sat
+   C Intrinsic Prototype: Word32 Q6_R_sub_RlRl_sat(Word32 Rt, Word32 Rs)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_sub_RlRl_sat __builtin_HEXAGON_A2_subh_l16_sat_ll
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=sub(Rtt32,Rss32)
+   C Intrinsic Prototype: Word64 Q6_P_sub_PP(Word64 Rtt, Word64 Rss)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_sub_PP __builtin_HEXAGON_A2_subp
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=sub(#s10,Rs32)
+   C Intrinsic Prototype: Word32 Q6_R_sub_IR(Word32 Is10, Word32 Rs)
+   Instruction Type:      ALU32_2op
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_R_sub_IR __builtin_HEXAGON_A2_subri
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=sub(Rt32,Rs32):sat
+   C Intrinsic Prototype: Word32 Q6_R_sub_RR_sat(Word32 Rt, Word32 Rs)
+   Instruction Type:      ALU32_3op
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_R_sub_RR_sat __builtin_HEXAGON_A2_subsat
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=vaddh(Rs32,Rt32)
+   C Intrinsic Prototype: Word32 Q6_R_vaddh_RR(Word32 Rs, Word32 Rt)
+   Instruction Type:      ALU32_3op
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_R_vaddh_RR __builtin_HEXAGON_A2_svaddh
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=vaddh(Rs32,Rt32):sat
+   C Intrinsic Prototype: Word32 Q6_R_vaddh_RR_sat(Word32 Rs, Word32 Rt)
+   Instruction Type:      ALU32_3op
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_R_vaddh_RR_sat __builtin_HEXAGON_A2_svaddhs
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=vadduh(Rs32,Rt32):sat
+   C Intrinsic Prototype: Word32 Q6_R_vadduh_RR_sat(Word32 Rs, Word32 Rt)
+   Instruction Type:      ALU32_3op
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_R_vadduh_RR_sat __builtin_HEXAGON_A2_svadduhs
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=vavgh(Rs32,Rt32)
+   C Intrinsic Prototype: Word32 Q6_R_vavgh_RR(Word32 Rs, Word32 Rt)
+   Instruction Type:      ALU32_3op
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_R_vavgh_RR __builtin_HEXAGON_A2_svavgh
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=vavgh(Rs32,Rt32):rnd
+   C Intrinsic Prototype: Word32 Q6_R_vavgh_RR_rnd(Word32 Rs, Word32 Rt)
+   Instruction Type:      ALU32_3op
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_R_vavgh_RR_rnd __builtin_HEXAGON_A2_svavghs
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=vnavgh(Rt32,Rs32)
+   C Intrinsic Prototype: Word32 Q6_R_vnavgh_RR(Word32 Rt, Word32 Rs)
+   Instruction Type:      ALU32_3op
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_R_vnavgh_RR __builtin_HEXAGON_A2_svnavgh
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=vsubh(Rt32,Rs32)
+   C Intrinsic Prototype: Word32 Q6_R_vsubh_RR(Word32 Rt, Word32 Rs)
+   Instruction Type:      ALU32_3op
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_R_vsubh_RR __builtin_HEXAGON_A2_svsubh
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=vsubh(Rt32,Rs32):sat
+   C Intrinsic Prototype: Word32 Q6_R_vsubh_RR_sat(Word32 Rt, Word32 Rs)
+   Instruction Type:      ALU32_3op
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_R_vsubh_RR_sat __builtin_HEXAGON_A2_svsubhs
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=vsubuh(Rt32,Rs32):sat
+   C Intrinsic Prototype: Word32 Q6_R_vsubuh_RR_sat(Word32 Rt, Word32 Rs)
+   Instruction Type:      ALU32_3op
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_R_vsubuh_RR_sat __builtin_HEXAGON_A2_svsubuhs
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=swiz(Rs32)
+   C Intrinsic Prototype: Word32 Q6_R_swiz_R(Word32 Rs)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_swiz_R __builtin_HEXAGON_A2_swiz
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=sxtb(Rs32)
+   C Intrinsic Prototype: Word32 Q6_R_sxtb_R(Word32 Rs)
+   Instruction Type:      ALU32_2op
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_R_sxtb_R __builtin_HEXAGON_A2_sxtb
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=sxth(Rs32)
+   C Intrinsic Prototype: Word32 Q6_R_sxth_R(Word32 Rs)
+   Instruction Type:      ALU32_2op
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_R_sxth_R __builtin_HEXAGON_A2_sxth
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=sxtw(Rs32)
+   C Intrinsic Prototype: Word64 Q6_P_sxtw_R(Word32 Rs)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_sxtw_R __builtin_HEXAGON_A2_sxtw
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=Rs32
+   C Intrinsic Prototype: Word32 Q6_R_equals_R(Word32 Rs)
+   Instruction Type:      ALU32_2op
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_R_equals_R __builtin_HEXAGON_A2_tfr
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32.h=#u16
+   C Intrinsic Prototype: Word32 Q6_Rh_equals_I(Word32 Rx, Word32 Iu16)
+   Instruction Type:      ALU32_2op
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Rh_equals_I __builtin_HEXAGON_A2_tfrih
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32.l=#u16
+   C Intrinsic Prototype: Word32 Q6_Rl_equals_I(Word32 Rx, Word32 Iu16)
+   Instruction Type:      ALU32_2op
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Rl_equals_I __builtin_HEXAGON_A2_tfril
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=Rss32
+   C Intrinsic Prototype: Word64 Q6_P_equals_P(Word64 Rss)
+   Instruction Type:      ALU32_2op
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_P_equals_P __builtin_HEXAGON_A2_tfrp
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=#s8
+   C Intrinsic Prototype: Word64 Q6_P_equals_I(Word32 Is8)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_P_equals_I __builtin_HEXAGON_A2_tfrpi
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=#s16
+   C Intrinsic Prototype: Word32 Q6_R_equals_I(Word32 Is16)
+   Instruction Type:      ALU32_2op
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_R_equals_I __builtin_HEXAGON_A2_tfrsi
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vabsh(Rss32)
+   C Intrinsic Prototype: Word64 Q6_P_vabsh_P(Word64 Rss)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vabsh_P __builtin_HEXAGON_A2_vabsh
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vabsh(Rss32):sat
+   C Intrinsic Prototype: Word64 Q6_P_vabsh_P_sat(Word64 Rss)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vabsh_P_sat __builtin_HEXAGON_A2_vabshsat
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vabsw(Rss32)
+   C Intrinsic Prototype: Word64 Q6_P_vabsw_P(Word64 Rss)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vabsw_P __builtin_HEXAGON_A2_vabsw
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vabsw(Rss32):sat
+   C Intrinsic Prototype: Word64 Q6_P_vabsw_P_sat(Word64 Rss)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vabsw_P_sat __builtin_HEXAGON_A2_vabswsat
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vaddb(Rss32,Rtt32)
+   C Intrinsic Prototype: Word64 Q6_P_vaddb_PP(Word64 Rss, Word64 Rtt)
+   Instruction Type:      MAPPING
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_P_vaddb_PP __builtin_HEXAGON_A2_vaddb_map
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vaddh(Rss32,Rtt32)
+   C Intrinsic Prototype: Word64 Q6_P_vaddh_PP(Word64 Rss, Word64 Rtt)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vaddh_PP __builtin_HEXAGON_A2_vaddh
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vaddh(Rss32,Rtt32):sat
+   C Intrinsic Prototype: Word64 Q6_P_vaddh_PP_sat(Word64 Rss, Word64 Rtt)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vaddh_PP_sat __builtin_HEXAGON_A2_vaddhs
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vaddub(Rss32,Rtt32)
+   C Intrinsic Prototype: Word64 Q6_P_vaddub_PP(Word64 Rss, Word64 Rtt)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vaddub_PP __builtin_HEXAGON_A2_vaddub
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vaddub(Rss32,Rtt32):sat
+   C Intrinsic Prototype: Word64 Q6_P_vaddub_PP_sat(Word64 Rss, Word64 Rtt)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vaddub_PP_sat __builtin_HEXAGON_A2_vaddubs
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vadduh(Rss32,Rtt32):sat
+   C Intrinsic Prototype: Word64 Q6_P_vadduh_PP_sat(Word64 Rss, Word64 Rtt)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vadduh_PP_sat __builtin_HEXAGON_A2_vadduhs
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vaddw(Rss32,Rtt32)
+   C Intrinsic Prototype: Word64 Q6_P_vaddw_PP(Word64 Rss, Word64 Rtt)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vaddw_PP __builtin_HEXAGON_A2_vaddw
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vaddw(Rss32,Rtt32):sat
+   C Intrinsic Prototype: Word64 Q6_P_vaddw_PP_sat(Word64 Rss, Word64 Rtt)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vaddw_PP_sat __builtin_HEXAGON_A2_vaddws
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vavgh(Rss32,Rtt32)
+   C Intrinsic Prototype: Word64 Q6_P_vavgh_PP(Word64 Rss, Word64 Rtt)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vavgh_PP __builtin_HEXAGON_A2_vavgh
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vavgh(Rss32,Rtt32):crnd
+   C Intrinsic Prototype: Word64 Q6_P_vavgh_PP_crnd(Word64 Rss, Word64 Rtt)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vavgh_PP_crnd __builtin_HEXAGON_A2_vavghcr
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vavgh(Rss32,Rtt32):rnd
+   C Intrinsic Prototype: Word64 Q6_P_vavgh_PP_rnd(Word64 Rss, Word64 Rtt)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vavgh_PP_rnd __builtin_HEXAGON_A2_vavghr
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vavgub(Rss32,Rtt32)
+   C Intrinsic Prototype: Word64 Q6_P_vavgub_PP(Word64 Rss, Word64 Rtt)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vavgub_PP __builtin_HEXAGON_A2_vavgub
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vavgub(Rss32,Rtt32):rnd
+   C Intrinsic Prototype: Word64 Q6_P_vavgub_PP_rnd(Word64 Rss, Word64 Rtt)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vavgub_PP_rnd __builtin_HEXAGON_A2_vavgubr
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vavguh(Rss32,Rtt32)
+   C Intrinsic Prototype: Word64 Q6_P_vavguh_PP(Word64 Rss, Word64 Rtt)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vavguh_PP __builtin_HEXAGON_A2_vavguh
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vavguh(Rss32,Rtt32):rnd
+   C Intrinsic Prototype: Word64 Q6_P_vavguh_PP_rnd(Word64 Rss, Word64 Rtt)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vavguh_PP_rnd __builtin_HEXAGON_A2_vavguhr
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vavguw(Rss32,Rtt32)
+   C Intrinsic Prototype: Word64 Q6_P_vavguw_PP(Word64 Rss, Word64 Rtt)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vavguw_PP __builtin_HEXAGON_A2_vavguw
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vavguw(Rss32,Rtt32):rnd
+   C Intrinsic Prototype: Word64 Q6_P_vavguw_PP_rnd(Word64 Rss, Word64 Rtt)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vavguw_PP_rnd __builtin_HEXAGON_A2_vavguwr
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vavgw(Rss32,Rtt32)
+   C Intrinsic Prototype: Word64 Q6_P_vavgw_PP(Word64 Rss, Word64 Rtt)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vavgw_PP __builtin_HEXAGON_A2_vavgw
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vavgw(Rss32,Rtt32):crnd
+   C Intrinsic Prototype: Word64 Q6_P_vavgw_PP_crnd(Word64 Rss, Word64 Rtt)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vavgw_PP_crnd __builtin_HEXAGON_A2_vavgwcr
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vavgw(Rss32,Rtt32):rnd
+   C Intrinsic Prototype: Word64 Q6_P_vavgw_PP_rnd(Word64 Rss, Word64 Rtt)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vavgw_PP_rnd __builtin_HEXAGON_A2_vavgwr
+
+/* ==========================================================================
+   Assembly Syntax:       Pd4=vcmpb.eq(Rss32,Rtt32)
+   C Intrinsic Prototype: Byte Q6_p_vcmpb_eq_PP(Word64 Rss, Word64 Rtt)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_p_vcmpb_eq_PP __builtin_HEXAGON_A2_vcmpbeq
+
+/* ==========================================================================
+   Assembly Syntax:       Pd4=vcmpb.gtu(Rss32,Rtt32)
+   C Intrinsic Prototype: Byte Q6_p_vcmpb_gtu_PP(Word64 Rss, Word64 Rtt)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_p_vcmpb_gtu_PP __builtin_HEXAGON_A2_vcmpbgtu
+
+/* ==========================================================================
+   Assembly Syntax:       Pd4=vcmph.eq(Rss32,Rtt32)
+   C Intrinsic Prototype: Byte Q6_p_vcmph_eq_PP(Word64 Rss, Word64 Rtt)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_p_vcmph_eq_PP __builtin_HEXAGON_A2_vcmpheq
+
+/* ==========================================================================
+   Assembly Syntax:       Pd4=vcmph.gt(Rss32,Rtt32)
+   C Intrinsic Prototype: Byte Q6_p_vcmph_gt_PP(Word64 Rss, Word64 Rtt)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_p_vcmph_gt_PP __builtin_HEXAGON_A2_vcmphgt
+
+/* ==========================================================================
+   Assembly Syntax:       Pd4=vcmph.gtu(Rss32,Rtt32)
+   C Intrinsic Prototype: Byte Q6_p_vcmph_gtu_PP(Word64 Rss, Word64 Rtt)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_p_vcmph_gtu_PP __builtin_HEXAGON_A2_vcmphgtu
+
+/* ==========================================================================
+   Assembly Syntax:       Pd4=vcmpw.eq(Rss32,Rtt32)
+   C Intrinsic Prototype: Byte Q6_p_vcmpw_eq_PP(Word64 Rss, Word64 Rtt)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_p_vcmpw_eq_PP __builtin_HEXAGON_A2_vcmpweq
+
+/* ==========================================================================
+   Assembly Syntax:       Pd4=vcmpw.gt(Rss32,Rtt32)
+   C Intrinsic Prototype: Byte Q6_p_vcmpw_gt_PP(Word64 Rss, Word64 Rtt)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_p_vcmpw_gt_PP __builtin_HEXAGON_A2_vcmpwgt
+
+/* ==========================================================================
+   Assembly Syntax:       Pd4=vcmpw.gtu(Rss32,Rtt32)
+   C Intrinsic Prototype: Byte Q6_p_vcmpw_gtu_PP(Word64 Rss, Word64 Rtt)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_p_vcmpw_gtu_PP __builtin_HEXAGON_A2_vcmpwgtu
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vconj(Rss32):sat
+   C Intrinsic Prototype: Word64 Q6_P_vconj_P_sat(Word64 Rss)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vconj_P_sat __builtin_HEXAGON_A2_vconj
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vmaxb(Rtt32,Rss32)
+   C Intrinsic Prototype: Word64 Q6_P_vmaxb_PP(Word64 Rtt, Word64 Rss)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vmaxb_PP __builtin_HEXAGON_A2_vmaxb
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vmaxh(Rtt32,Rss32)
+   C Intrinsic Prototype: Word64 Q6_P_vmaxh_PP(Word64 Rtt, Word64 Rss)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vmaxh_PP __builtin_HEXAGON_A2_vmaxh
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vmaxub(Rtt32,Rss32)
+   C Intrinsic Prototype: Word64 Q6_P_vmaxub_PP(Word64 Rtt, Word64 Rss)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vmaxub_PP __builtin_HEXAGON_A2_vmaxub
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vmaxuh(Rtt32,Rss32)
+   C Intrinsic Prototype: Word64 Q6_P_vmaxuh_PP(Word64 Rtt, Word64 Rss)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vmaxuh_PP __builtin_HEXAGON_A2_vmaxuh
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vmaxuw(Rtt32,Rss32)
+   C Intrinsic Prototype: Word64 Q6_P_vmaxuw_PP(Word64 Rtt, Word64 Rss)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vmaxuw_PP __builtin_HEXAGON_A2_vmaxuw
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vmaxw(Rtt32,Rss32)
+   C Intrinsic Prototype: Word64 Q6_P_vmaxw_PP(Word64 Rtt, Word64 Rss)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vmaxw_PP __builtin_HEXAGON_A2_vmaxw
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vminb(Rtt32,Rss32)
+   C Intrinsic Prototype: Word64 Q6_P_vminb_PP(Word64 Rtt, Word64 Rss)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vminb_PP __builtin_HEXAGON_A2_vminb
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vminh(Rtt32,Rss32)
+   C Intrinsic Prototype: Word64 Q6_P_vminh_PP(Word64 Rtt, Word64 Rss)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vminh_PP __builtin_HEXAGON_A2_vminh
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vminub(Rtt32,Rss32)
+   C Intrinsic Prototype: Word64 Q6_P_vminub_PP(Word64 Rtt, Word64 Rss)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vminub_PP __builtin_HEXAGON_A2_vminub
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vminuh(Rtt32,Rss32)
+   C Intrinsic Prototype: Word64 Q6_P_vminuh_PP(Word64 Rtt, Word64 Rss)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vminuh_PP __builtin_HEXAGON_A2_vminuh
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vminuw(Rtt32,Rss32)
+   C Intrinsic Prototype: Word64 Q6_P_vminuw_PP(Word64 Rtt, Word64 Rss)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vminuw_PP __builtin_HEXAGON_A2_vminuw
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vminw(Rtt32,Rss32)
+   C Intrinsic Prototype: Word64 Q6_P_vminw_PP(Word64 Rtt, Word64 Rss)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vminw_PP __builtin_HEXAGON_A2_vminw
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vnavgh(Rtt32,Rss32)
+   C Intrinsic Prototype: Word64 Q6_P_vnavgh_PP(Word64 Rtt, Word64 Rss)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vnavgh_PP __builtin_HEXAGON_A2_vnavgh
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vnavgh(Rtt32,Rss32):crnd:sat
+   C Intrinsic Prototype: Word64 Q6_P_vnavgh_PP_crnd_sat(Word64 Rtt, Word64 Rss)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vnavgh_PP_crnd_sat __builtin_HEXAGON_A2_vnavghcr
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vnavgh(Rtt32,Rss32):rnd:sat
+   C Intrinsic Prototype: Word64 Q6_P_vnavgh_PP_rnd_sat(Word64 Rtt, Word64 Rss)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vnavgh_PP_rnd_sat __builtin_HEXAGON_A2_vnavghr
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vnavgw(Rtt32,Rss32)
+   C Intrinsic Prototype: Word64 Q6_P_vnavgw_PP(Word64 Rtt, Word64 Rss)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vnavgw_PP __builtin_HEXAGON_A2_vnavgw
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vnavgw(Rtt32,Rss32):crnd:sat
+   C Intrinsic Prototype: Word64 Q6_P_vnavgw_PP_crnd_sat(Word64 Rtt, Word64 Rss)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vnavgw_PP_crnd_sat __builtin_HEXAGON_A2_vnavgwcr
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vnavgw(Rtt32,Rss32):rnd:sat
+   C Intrinsic Prototype: Word64 Q6_P_vnavgw_PP_rnd_sat(Word64 Rtt, Word64 Rss)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vnavgw_PP_rnd_sat __builtin_HEXAGON_A2_vnavgwr
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vraddub(Rss32,Rtt32)
+   C Intrinsic Prototype: Word64 Q6_P_vraddub_PP(Word64 Rss, Word64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vraddub_PP __builtin_HEXAGON_A2_vraddub
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32+=vraddub(Rss32,Rtt32)
+   C Intrinsic Prototype: Word64 Q6_P_vraddubacc_PP(Word64 Rxx, Word64 Rss, Word64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vraddubacc_PP __builtin_HEXAGON_A2_vraddub_acc
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vrsadub(Rss32,Rtt32)
+   C Intrinsic Prototype: Word64 Q6_P_vrsadub_PP(Word64 Rss, Word64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vrsadub_PP __builtin_HEXAGON_A2_vrsadub
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32+=vrsadub(Rss32,Rtt32)
+   C Intrinsic Prototype: Word64 Q6_P_vrsadubacc_PP(Word64 Rxx, Word64 Rss, Word64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vrsadubacc_PP __builtin_HEXAGON_A2_vrsadub_acc
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vsubb(Rss32,Rtt32)
+   C Intrinsic Prototype: Word64 Q6_P_vsubb_PP(Word64 Rss, Word64 Rtt)
+   Instruction Type:      MAPPING
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_P_vsubb_PP __builtin_HEXAGON_A2_vsubb_map
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vsubh(Rtt32,Rss32)
+   C Intrinsic Prototype: Word64 Q6_P_vsubh_PP(Word64 Rtt, Word64 Rss)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vsubh_PP __builtin_HEXAGON_A2_vsubh
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vsubh(Rtt32,Rss32):sat
+   C Intrinsic Prototype: Word64 Q6_P_vsubh_PP_sat(Word64 Rtt, Word64 Rss)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vsubh_PP_sat __builtin_HEXAGON_A2_vsubhs
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vsubub(Rtt32,Rss32)
+   C Intrinsic Prototype: Word64 Q6_P_vsubub_PP(Word64 Rtt, Word64 Rss)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vsubub_PP __builtin_HEXAGON_A2_vsubub
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vsubub(Rtt32,Rss32):sat
+   C Intrinsic Prototype: Word64 Q6_P_vsubub_PP_sat(Word64 Rtt, Word64 Rss)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vsubub_PP_sat __builtin_HEXAGON_A2_vsububs
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vsubuh(Rtt32,Rss32):sat
+   C Intrinsic Prototype: Word64 Q6_P_vsubuh_PP_sat(Word64 Rtt, Word64 Rss)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vsubuh_PP_sat __builtin_HEXAGON_A2_vsubuhs
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vsubw(Rtt32,Rss32)
+   C Intrinsic Prototype: Word64 Q6_P_vsubw_PP(Word64 Rtt, Word64 Rss)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vsubw_PP __builtin_HEXAGON_A2_vsubw
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vsubw(Rtt32,Rss32):sat
+   C Intrinsic Prototype: Word64 Q6_P_vsubw_PP_sat(Word64 Rtt, Word64 Rss)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vsubw_PP_sat __builtin_HEXAGON_A2_vsubws
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=xor(Rs32,Rt32)
+   C Intrinsic Prototype: Word32 Q6_R_xor_RR(Word32 Rs, Word32 Rt)
+   Instruction Type:      ALU32_3op
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_R_xor_RR __builtin_HEXAGON_A2_xor
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=xor(Rss32,Rtt32)
+   C Intrinsic Prototype: Word64 Q6_P_xor_PP(Word64 Rss, Word64 Rtt)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_xor_PP __builtin_HEXAGON_A2_xorp
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=zxtb(Rs32)
+   C Intrinsic Prototype: Word32 Q6_R_zxtb_R(Word32 Rs)
+   Instruction Type:      ALU32_2op
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_R_zxtb_R __builtin_HEXAGON_A2_zxtb
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=zxth(Rs32)
+   C Intrinsic Prototype: Word32 Q6_R_zxth_R(Word32 Rs)
+   Instruction Type:      ALU32_2op
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_R_zxth_R __builtin_HEXAGON_A2_zxth
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=and(Rt32,~Rs32)
+   C Intrinsic Prototype: Word32 Q6_R_and_RnR(Word32 Rt, Word32 Rs)
+   Instruction Type:      ALU32_3op
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_R_and_RnR __builtin_HEXAGON_A4_andn
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=and(Rtt32,~Rss32)
+   C Intrinsic Prototype: Word64 Q6_P_and_PnP(Word64 Rtt, Word64 Rss)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_and_PnP __builtin_HEXAGON_A4_andnp
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=bitsplit(Rs32,Rt32)
+   C Intrinsic Prototype: Word64 Q6_P_bitsplit_RR(Word32 Rs, Word32 Rt)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_bitsplit_RR __builtin_HEXAGON_A4_bitsplit
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=bitsplit(Rs32,#u5)
+   C Intrinsic Prototype: Word64 Q6_P_bitsplit_RI(Word32 Rs, Word32 Iu5)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_bitsplit_RI __builtin_HEXAGON_A4_bitspliti
+
+/* ==========================================================================
+   Assembly Syntax:       Pd4=boundscheck(Rs32,Rtt32)
+   C Intrinsic Prototype: Byte Q6_p_boundscheck_RP(Word32 Rs, Word64 Rtt)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_p_boundscheck_RP __builtin_HEXAGON_A4_boundscheck
+
+/* ==========================================================================
+   Assembly Syntax:       Pd4=cmpb.eq(Rs32,Rt32)
+   C Intrinsic Prototype: Byte Q6_p_cmpb_eq_RR(Word32 Rs, Word32 Rt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_p_cmpb_eq_RR __builtin_HEXAGON_A4_cmpbeq
+
+/* ==========================================================================
+   Assembly Syntax:       Pd4=cmpb.eq(Rs32,#u8)
+   C Intrinsic Prototype: Byte Q6_p_cmpb_eq_RI(Word32 Rs, Word32 Iu8)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_p_cmpb_eq_RI __builtin_HEXAGON_A4_cmpbeqi
+
+/* ==========================================================================
+   Assembly Syntax:       Pd4=cmpb.gt(Rs32,Rt32)
+   C Intrinsic Prototype: Byte Q6_p_cmpb_gt_RR(Word32 Rs, Word32 Rt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_p_cmpb_gt_RR __builtin_HEXAGON_A4_cmpbgt
+
+/* ==========================================================================
+   Assembly Syntax:       Pd4=cmpb.gt(Rs32,#s8)
+   C Intrinsic Prototype: Byte Q6_p_cmpb_gt_RI(Word32 Rs, Word32 Is8)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_p_cmpb_gt_RI __builtin_HEXAGON_A4_cmpbgti
+
+/* ==========================================================================
+   Assembly Syntax:       Pd4=cmpb.gtu(Rs32,Rt32)
+   C Intrinsic Prototype: Byte Q6_p_cmpb_gtu_RR(Word32 Rs, Word32 Rt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_p_cmpb_gtu_RR __builtin_HEXAGON_A4_cmpbgtu
+
+/* ==========================================================================
+   Assembly Syntax:       Pd4=cmpb.gtu(Rs32,#u7)
+   C Intrinsic Prototype: Byte Q6_p_cmpb_gtu_RI(Word32 Rs, Word32 Iu7)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_p_cmpb_gtu_RI __builtin_HEXAGON_A4_cmpbgtui
+
+/* ==========================================================================
+   Assembly Syntax:       Pd4=cmph.eq(Rs32,Rt32)
+   C Intrinsic Prototype: Byte Q6_p_cmph_eq_RR(Word32 Rs, Word32 Rt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_p_cmph_eq_RR __builtin_HEXAGON_A4_cmpheq
+
+/* ==========================================================================
+   Assembly Syntax:       Pd4=cmph.eq(Rs32,#s8)
+   C Intrinsic Prototype: Byte Q6_p_cmph_eq_RI(Word32 Rs, Word32 Is8)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_p_cmph_eq_RI __builtin_HEXAGON_A4_cmpheqi
+
+/* ==========================================================================
+   Assembly Syntax:       Pd4=cmph.gt(Rs32,Rt32)
+   C Intrinsic Prototype: Byte Q6_p_cmph_gt_RR(Word32 Rs, Word32 Rt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_p_cmph_gt_RR __builtin_HEXAGON_A4_cmphgt
+
+/* ==========================================================================
+   Assembly Syntax:       Pd4=cmph.gt(Rs32,#s8)
+   C Intrinsic Prototype: Byte Q6_p_cmph_gt_RI(Word32 Rs, Word32 Is8)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_p_cmph_gt_RI __builtin_HEXAGON_A4_cmphgti
+
+/* ==========================================================================
+   Assembly Syntax:       Pd4=cmph.gtu(Rs32,Rt32)
+   C Intrinsic Prototype: Byte Q6_p_cmph_gtu_RR(Word32 Rs, Word32 Rt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_p_cmph_gtu_RR __builtin_HEXAGON_A4_cmphgtu
+
+/* ==========================================================================
+   Assembly Syntax:       Pd4=cmph.gtu(Rs32,#u7)
+   C Intrinsic Prototype: Byte Q6_p_cmph_gtu_RI(Word32 Rs, Word32 Iu7)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_p_cmph_gtu_RI __builtin_HEXAGON_A4_cmphgtui
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=combine(#s8,Rs32)
+   C Intrinsic Prototype: Word64 Q6_P_combine_IR(Word32 Is8, Word32 Rs)
+   Instruction Type:      ALU32_2op
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_P_combine_IR __builtin_HEXAGON_A4_combineir
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=combine(Rs32,#s8)
+   C Intrinsic Prototype: Word64 Q6_P_combine_RI(Word32 Rs, Word32 Is8)
+   Instruction Type:      ALU32_2op
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_P_combine_RI __builtin_HEXAGON_A4_combineri
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=cround(Rs32,#u5)
+   C Intrinsic Prototype: Word32 Q6_R_cround_RI(Word32 Rs, Word32 Iu5)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_cround_RI __builtin_HEXAGON_A4_cround_ri
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=cround(Rs32,Rt32)
+   C Intrinsic Prototype: Word32 Q6_R_cround_RR(Word32 Rs, Word32 Rt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_cround_RR __builtin_HEXAGON_A4_cround_rr
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=modwrap(Rs32,Rt32)
+   C Intrinsic Prototype: Word32 Q6_R_modwrap_RR(Word32 Rs, Word32 Rt)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_modwrap_RR __builtin_HEXAGON_A4_modwrapu
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=or(Rt32,~Rs32)
+   C Intrinsic Prototype: Word32 Q6_R_or_RnR(Word32 Rt, Word32 Rs)
+   Instruction Type:      ALU32_3op
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_R_or_RnR __builtin_HEXAGON_A4_orn
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=or(Rtt32,~Rss32)
+   C Intrinsic Prototype: Word64 Q6_P_or_PnP(Word64 Rtt, Word64 Rss)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_or_PnP __builtin_HEXAGON_A4_ornp
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=cmp.eq(Rs32,Rt32)
+   C Intrinsic Prototype: Word32 Q6_R_cmp_eq_RR(Word32 Rs, Word32 Rt)
+   Instruction Type:      ALU32_3op
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_R_cmp_eq_RR __builtin_HEXAGON_A4_rcmpeq
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=cmp.eq(Rs32,#s8)
+   C Intrinsic Prototype: Word32 Q6_R_cmp_eq_RI(Word32 Rs, Word32 Is8)
+   Instruction Type:      ALU32_2op
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_R_cmp_eq_RI __builtin_HEXAGON_A4_rcmpeqi
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=!cmp.eq(Rs32,Rt32)
+   C Intrinsic Prototype: Word32 Q6_R_not_cmp_eq_RR(Word32 Rs, Word32 Rt)
+   Instruction Type:      ALU32_3op
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_R_not_cmp_eq_RR __builtin_HEXAGON_A4_rcmpneq
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=!cmp.eq(Rs32,#s8)
+   C Intrinsic Prototype: Word32 Q6_R_not_cmp_eq_RI(Word32 Rs, Word32 Is8)
+   Instruction Type:      ALU32_2op
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_R_not_cmp_eq_RI __builtin_HEXAGON_A4_rcmpneqi
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=round(Rs32,#u5)
+   C Intrinsic Prototype: Word32 Q6_R_round_RI(Word32 Rs, Word32 Iu5)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_round_RI __builtin_HEXAGON_A4_round_ri
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=round(Rs32,#u5):sat
+   C Intrinsic Prototype: Word32 Q6_R_round_RI_sat(Word32 Rs, Word32 Iu5)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_round_RI_sat __builtin_HEXAGON_A4_round_ri_sat
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=round(Rs32,Rt32)
+   C Intrinsic Prototype: Word32 Q6_R_round_RR(Word32 Rs, Word32 Rt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_round_RR __builtin_HEXAGON_A4_round_rr
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=round(Rs32,Rt32):sat
+   C Intrinsic Prototype: Word32 Q6_R_round_RR_sat(Word32 Rs, Word32 Rt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_round_RR_sat __builtin_HEXAGON_A4_round_rr_sat
+
+/* ==========================================================================
+   Assembly Syntax:       Pd4=tlbmatch(Rss32,Rt32)
+   C Intrinsic Prototype: Byte Q6_p_tlbmatch_PR(Word64 Rss, Word32 Rt)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_p_tlbmatch_PR __builtin_HEXAGON_A4_tlbmatch
+
+/* ==========================================================================
+   Assembly Syntax:       Pd4=any8(vcmpb.eq(Rss32,Rtt32))
+   C Intrinsic Prototype: Byte Q6_p_any8_vcmpb_eq_PP(Word64 Rss, Word64 Rtt)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_p_any8_vcmpb_eq_PP __builtin_HEXAGON_A4_vcmpbeq_any
+
+/* ==========================================================================
+   Assembly Syntax:       Pd4=vcmpb.eq(Rss32,#u8)
+   C Intrinsic Prototype: Byte Q6_p_vcmpb_eq_PI(Word64 Rss, Word32 Iu8)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_p_vcmpb_eq_PI __builtin_HEXAGON_A4_vcmpbeqi
+
+/* ==========================================================================
+   Assembly Syntax:       Pd4=vcmpb.gt(Rss32,Rtt32)
+   C Intrinsic Prototype: Byte Q6_p_vcmpb_gt_PP(Word64 Rss, Word64 Rtt)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_p_vcmpb_gt_PP __builtin_HEXAGON_A4_vcmpbgt
+
+/* ==========================================================================
+   Assembly Syntax:       Pd4=vcmpb.gt(Rss32,#s8)
+   C Intrinsic Prototype: Byte Q6_p_vcmpb_gt_PI(Word64 Rss, Word32 Is8)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_p_vcmpb_gt_PI __builtin_HEXAGON_A4_vcmpbgti
+
+/* ==========================================================================
+   Assembly Syntax:       Pd4=vcmpb.gtu(Rss32,#u7)
+   C Intrinsic Prototype: Byte Q6_p_vcmpb_gtu_PI(Word64 Rss, Word32 Iu7)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_p_vcmpb_gtu_PI __builtin_HEXAGON_A4_vcmpbgtui
+
+/* ==========================================================================
+   Assembly Syntax:       Pd4=vcmph.eq(Rss32,#s8)
+   C Intrinsic Prototype: Byte Q6_p_vcmph_eq_PI(Word64 Rss, Word32 Is8)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_p_vcmph_eq_PI __builtin_HEXAGON_A4_vcmpheqi
+
+/* ==========================================================================
+   Assembly Syntax:       Pd4=vcmph.gt(Rss32,#s8)
+   C Intrinsic Prototype: Byte Q6_p_vcmph_gt_PI(Word64 Rss, Word32 Is8)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_p_vcmph_gt_PI __builtin_HEXAGON_A4_vcmphgti
+
+/* ==========================================================================
+   Assembly Syntax:       Pd4=vcmph.gtu(Rss32,#u7)
+   C Intrinsic Prototype: Byte Q6_p_vcmph_gtu_PI(Word64 Rss, Word32 Iu7)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_p_vcmph_gtu_PI __builtin_HEXAGON_A4_vcmphgtui
+
+/* ==========================================================================
+   Assembly Syntax:       Pd4=vcmpw.eq(Rss32,#s8)
+   C Intrinsic Prototype: Byte Q6_p_vcmpw_eq_PI(Word64 Rss, Word32 Is8)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_p_vcmpw_eq_PI __builtin_HEXAGON_A4_vcmpweqi
+
+/* ==========================================================================
+   Assembly Syntax:       Pd4=vcmpw.gt(Rss32,#s8)
+   C Intrinsic Prototype: Byte Q6_p_vcmpw_gt_PI(Word64 Rss, Word32 Is8)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_p_vcmpw_gt_PI __builtin_HEXAGON_A4_vcmpwgti
+
+/* ==========================================================================
+   Assembly Syntax:       Pd4=vcmpw.gtu(Rss32,#u7)
+   C Intrinsic Prototype: Byte Q6_p_vcmpw_gtu_PI(Word64 Rss, Word32 Iu7)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_p_vcmpw_gtu_PI __builtin_HEXAGON_A4_vcmpwgtui
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32=vrmaxh(Rss32,Ru32)
+   C Intrinsic Prototype: Word64 Q6_P_vrmaxh_PR(Word64 Rxx, Word64 Rss, Word32 Ru)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vrmaxh_PR __builtin_HEXAGON_A4_vrmaxh
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32=vrmaxuh(Rss32,Ru32)
+   C Intrinsic Prototype: Word64 Q6_P_vrmaxuh_PR(Word64 Rxx, Word64 Rss, Word32 Ru)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vrmaxuh_PR __builtin_HEXAGON_A4_vrmaxuh
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32=vrmaxuw(Rss32,Ru32)
+   C Intrinsic Prototype: Word64 Q6_P_vrmaxuw_PR(Word64 Rxx, Word64 Rss, Word32 Ru)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vrmaxuw_PR __builtin_HEXAGON_A4_vrmaxuw
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32=vrmaxw(Rss32,Ru32)
+   C Intrinsic Prototype: Word64 Q6_P_vrmaxw_PR(Word64 Rxx, Word64 Rss, Word32 Ru)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vrmaxw_PR __builtin_HEXAGON_A4_vrmaxw
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32=vrminh(Rss32,Ru32)
+   C Intrinsic Prototype: Word64 Q6_P_vrminh_PR(Word64 Rxx, Word64 Rss, Word32 Ru)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vrminh_PR __builtin_HEXAGON_A4_vrminh
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32=vrminuh(Rss32,Ru32)
+   C Intrinsic Prototype: Word64 Q6_P_vrminuh_PR(Word64 Rxx, Word64 Rss, Word32 Ru)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vrminuh_PR __builtin_HEXAGON_A4_vrminuh
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32=vrminuw(Rss32,Ru32)
+   C Intrinsic Prototype: Word64 Q6_P_vrminuw_PR(Word64 Rxx, Word64 Rss, Word32 Ru)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vrminuw_PR __builtin_HEXAGON_A4_vrminuw
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32=vrminw(Rss32,Ru32)
+   C Intrinsic Prototype: Word64 Q6_P_vrminw_PR(Word64 Rxx, Word64 Rss, Word32 Ru)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vrminw_PR __builtin_HEXAGON_A4_vrminw
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=vaddhub(Rss32,Rtt32):sat
+   C Intrinsic Prototype: Word32 Q6_R_vaddhub_PP_sat(Word64 Rss, Word64 Rtt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_vaddhub_PP_sat __builtin_HEXAGON_A5_vaddhubs
+
+/* ==========================================================================
+   Assembly Syntax:       Pd4=all8(Ps4)
+   C Intrinsic Prototype: Byte Q6_p_all8_p(Byte Ps)
+   Instruction Type:      CR
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_p_all8_p __builtin_HEXAGON_C2_all8
+
+/* ==========================================================================
+   Assembly Syntax:       Pd4=and(Pt4,Ps4)
+   C Intrinsic Prototype: Byte Q6_p_and_pp(Byte Pt, Byte Ps)
+   Instruction Type:      CR
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_p_and_pp __builtin_HEXAGON_C2_and
+
+/* ==========================================================================
+   Assembly Syntax:       Pd4=and(Pt4,!Ps4)
+   C Intrinsic Prototype: Byte Q6_p_and_pnp(Byte Pt, Byte Ps)
+   Instruction Type:      CR
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_p_and_pnp __builtin_HEXAGON_C2_andn
+
+/* ==========================================================================
+   Assembly Syntax:       Pd4=any8(Ps4)
+   C Intrinsic Prototype: Byte Q6_p_any8_p(Byte Ps)
+   Instruction Type:      CR
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_p_any8_p __builtin_HEXAGON_C2_any8
+
+/* ==========================================================================
+   Assembly Syntax:       Pd4=bitsclr(Rs32,Rt32)
+   C Intrinsic Prototype: Byte Q6_p_bitsclr_RR(Word32 Rs, Word32 Rt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_p_bitsclr_RR __builtin_HEXAGON_C2_bitsclr
+
+/* ==========================================================================
+   Assembly Syntax:       Pd4=bitsclr(Rs32,#u6)
+   C Intrinsic Prototype: Byte Q6_p_bitsclr_RI(Word32 Rs, Word32 Iu6)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_p_bitsclr_RI __builtin_HEXAGON_C2_bitsclri
+
+/* ==========================================================================
+   Assembly Syntax:       Pd4=bitsset(Rs32,Rt32)
+   C Intrinsic Prototype: Byte Q6_p_bitsset_RR(Word32 Rs, Word32 Rt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_p_bitsset_RR __builtin_HEXAGON_C2_bitsset
+
+/* ==========================================================================
+   Assembly Syntax:       Pd4=cmp.eq(Rs32,Rt32)
+   C Intrinsic Prototype: Byte Q6_p_cmp_eq_RR(Word32 Rs, Word32 Rt)
+   Instruction Type:      ALU32_3op
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_p_cmp_eq_RR __builtin_HEXAGON_C2_cmpeq
+
+/* ==========================================================================
+   Assembly Syntax:       Pd4=cmp.eq(Rs32,#s10)
+   C Intrinsic Prototype: Byte Q6_p_cmp_eq_RI(Word32 Rs, Word32 Is10)
+   Instruction Type:      ALU32_2op
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_p_cmp_eq_RI __builtin_HEXAGON_C2_cmpeqi
+
+/* ==========================================================================
+   Assembly Syntax:       Pd4=cmp.eq(Rss32,Rtt32)
+   C Intrinsic Prototype: Byte Q6_p_cmp_eq_PP(Word64 Rss, Word64 Rtt)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_p_cmp_eq_PP __builtin_HEXAGON_C2_cmpeqp
+
+/* ==========================================================================
+   Assembly Syntax:       Pd4=cmp.ge(Rs32,#s8)
+   C Intrinsic Prototype: Byte Q6_p_cmp_ge_RI(Word32 Rs, Word32 Is8)
+   Instruction Type:      ALU32_2op
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_p_cmp_ge_RI __builtin_HEXAGON_C2_cmpgei
+
+/* ==========================================================================
+   Assembly Syntax:       Pd4=cmp.geu(Rs32,#u8)
+   C Intrinsic Prototype: Byte Q6_p_cmp_geu_RI(Word32 Rs, Word32 Iu8)
+   Instruction Type:      ALU32_2op
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_p_cmp_geu_RI __builtin_HEXAGON_C2_cmpgeui
+
+/* ==========================================================================
+   Assembly Syntax:       Pd4=cmp.gt(Rs32,Rt32)
+   C Intrinsic Prototype: Byte Q6_p_cmp_gt_RR(Word32 Rs, Word32 Rt)
+   Instruction Type:      ALU32_3op
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_p_cmp_gt_RR __builtin_HEXAGON_C2_cmpgt
+
+/* ==========================================================================
+   Assembly Syntax:       Pd4=cmp.gt(Rs32,#s10)
+   C Intrinsic Prototype: Byte Q6_p_cmp_gt_RI(Word32 Rs, Word32 Is10)
+   Instruction Type:      ALU32_2op
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_p_cmp_gt_RI __builtin_HEXAGON_C2_cmpgti
+
+/* ==========================================================================
+   Assembly Syntax:       Pd4=cmp.gt(Rss32,Rtt32)
+   C Intrinsic Prototype: Byte Q6_p_cmp_gt_PP(Word64 Rss, Word64 Rtt)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_p_cmp_gt_PP __builtin_HEXAGON_C2_cmpgtp
+
+/* ==========================================================================
+   Assembly Syntax:       Pd4=cmp.gtu(Rs32,Rt32)
+   C Intrinsic Prototype: Byte Q6_p_cmp_gtu_RR(Word32 Rs, Word32 Rt)
+   Instruction Type:      ALU32_3op
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_p_cmp_gtu_RR __builtin_HEXAGON_C2_cmpgtu
+
+/* ==========================================================================
+   Assembly Syntax:       Pd4=cmp.gtu(Rs32,#u9)
+   C Intrinsic Prototype: Byte Q6_p_cmp_gtu_RI(Word32 Rs, Word32 Iu9)
+   Instruction Type:      ALU32_2op
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_p_cmp_gtu_RI __builtin_HEXAGON_C2_cmpgtui
+
+/* ==========================================================================
+   Assembly Syntax:       Pd4=cmp.gtu(Rss32,Rtt32)
+   C Intrinsic Prototype: Byte Q6_p_cmp_gtu_PP(Word64 Rss, Word64 Rtt)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_p_cmp_gtu_PP __builtin_HEXAGON_C2_cmpgtup
+
+/* ==========================================================================
+   Assembly Syntax:       Pd4=cmp.lt(Rs32,Rt32)
+   C Intrinsic Prototype: Byte Q6_p_cmp_lt_RR(Word32 Rs, Word32 Rt)
+   Instruction Type:      ALU32_3op
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_p_cmp_lt_RR __builtin_HEXAGON_C2_cmplt
+
+/* ==========================================================================
+   Assembly Syntax:       Pd4=cmp.ltu(Rs32,Rt32)
+   C Intrinsic Prototype: Byte Q6_p_cmp_ltu_RR(Word32 Rs, Word32 Rt)
+   Instruction Type:      ALU32_3op
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_p_cmp_ltu_RR __builtin_HEXAGON_C2_cmpltu
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=mask(Pt4)
+   C Intrinsic Prototype: Word64 Q6_P_mask_p(Byte Pt)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_mask_p __builtin_HEXAGON_C2_mask
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=mux(Pu4,Rs32,Rt32)
+   C Intrinsic Prototype: Word32 Q6_R_mux_pRR(Byte Pu, Word32 Rs, Word32 Rt)
+   Instruction Type:      ALU32_3op
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_R_mux_pRR __builtin_HEXAGON_C2_mux
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=mux(Pu4,#s8,#S8)
+   C Intrinsic Prototype: Word32 Q6_R_mux_pII(Byte Pu, Word32 Is8, Word32 IS8)
+   Instruction Type:      ALU32_2op
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_R_mux_pII __builtin_HEXAGON_C2_muxii
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=mux(Pu4,Rs32,#s8)
+   C Intrinsic Prototype: Word32 Q6_R_mux_pRI(Byte Pu, Word32 Rs, Word32 Is8)
+   Instruction Type:      ALU32_2op
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_R_mux_pRI __builtin_HEXAGON_C2_muxir
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=mux(Pu4,#s8,Rs32)
+   C Intrinsic Prototype: Word32 Q6_R_mux_pIR(Byte Pu, Word32 Is8, Word32 Rs)
+   Instruction Type:      ALU32_2op
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_R_mux_pIR __builtin_HEXAGON_C2_muxri
+
+/* ==========================================================================
+   Assembly Syntax:       Pd4=not(Ps4)
+   C Intrinsic Prototype: Byte Q6_p_not_p(Byte Ps)
+   Instruction Type:      CR
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_p_not_p __builtin_HEXAGON_C2_not
+
+/* ==========================================================================
+   Assembly Syntax:       Pd4=or(Pt4,Ps4)
+   C Intrinsic Prototype: Byte Q6_p_or_pp(Byte Pt, Byte Ps)
+   Instruction Type:      CR
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_p_or_pp __builtin_HEXAGON_C2_or
+
+/* ==========================================================================
+   Assembly Syntax:       Pd4=or(Pt4,!Ps4)
+   C Intrinsic Prototype: Byte Q6_p_or_pnp(Byte Pt, Byte Ps)
+   Instruction Type:      CR
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_p_or_pnp __builtin_HEXAGON_C2_orn
+
+/* ==========================================================================
+   Assembly Syntax:       Pd4=Ps4
+   C Intrinsic Prototype: Byte Q6_p_equals_p(Byte Ps)
+   Instruction Type:      MAPPING
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_p_equals_p __builtin_HEXAGON_C2_pxfer_map
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=Ps4
+   C Intrinsic Prototype: Word32 Q6_R_equals_p(Byte Ps)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_equals_p __builtin_HEXAGON_C2_tfrpr
+
+/* ==========================================================================
+   Assembly Syntax:       Pd4=Rs32
+   C Intrinsic Prototype: Byte Q6_p_equals_R(Word32 Rs)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_p_equals_R __builtin_HEXAGON_C2_tfrrp
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=vitpack(Ps4,Pt4)
+   C Intrinsic Prototype: Word32 Q6_R_vitpack_pp(Byte Ps, Byte Pt)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_vitpack_pp __builtin_HEXAGON_C2_vitpack
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vmux(Pu4,Rss32,Rtt32)
+   C Intrinsic Prototype: Word64 Q6_P_vmux_pPP(Byte Pu, Word64 Rss, Word64 Rtt)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vmux_pPP __builtin_HEXAGON_C2_vmux
+
+/* ==========================================================================
+   Assembly Syntax:       Pd4=xor(Ps4,Pt4)
+   C Intrinsic Prototype: Byte Q6_p_xor_pp(Byte Ps, Byte Pt)
+   Instruction Type:      CR
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_p_xor_pp __builtin_HEXAGON_C2_xor
+
+/* ==========================================================================
+   Assembly Syntax:       Pd4=and(Ps4,and(Pt4,Pu4))
+   C Intrinsic Prototype: Byte Q6_p_and_and_ppp(Byte Ps, Byte Pt, Byte Pu)
+   Instruction Type:      CR
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_p_and_and_ppp __builtin_HEXAGON_C4_and_and
+
+/* ==========================================================================
+   Assembly Syntax:       Pd4=and(Ps4,and(Pt4,!Pu4))
+   C Intrinsic Prototype: Byte Q6_p_and_and_ppnp(Byte Ps, Byte Pt, Byte Pu)
+   Instruction Type:      CR
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_p_and_and_ppnp __builtin_HEXAGON_C4_and_andn
+
+/* ==========================================================================
+   Assembly Syntax:       Pd4=and(Ps4,or(Pt4,Pu4))
+   C Intrinsic Prototype: Byte Q6_p_and_or_ppp(Byte Ps, Byte Pt, Byte Pu)
+   Instruction Type:      CR
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_p_and_or_ppp __builtin_HEXAGON_C4_and_or
+
+/* ==========================================================================
+   Assembly Syntax:       Pd4=and(Ps4,or(Pt4,!Pu4))
+   C Intrinsic Prototype: Byte Q6_p_and_or_ppnp(Byte Ps, Byte Pt, Byte Pu)
+   Instruction Type:      CR
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_p_and_or_ppnp __builtin_HEXAGON_C4_and_orn
+
+/* ==========================================================================
+   Assembly Syntax:       Pd4=!cmp.gt(Rs32,Rt32)
+   C Intrinsic Prototype: Byte Q6_p_not_cmp_gt_RR(Word32 Rs, Word32 Rt)
+   Instruction Type:      ALU32_3op
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_p_not_cmp_gt_RR __builtin_HEXAGON_C4_cmplte
+
+/* ==========================================================================
+   Assembly Syntax:       Pd4=!cmp.gt(Rs32,#s10)
+   C Intrinsic Prototype: Byte Q6_p_not_cmp_gt_RI(Word32 Rs, Word32 Is10)
+   Instruction Type:      ALU32_2op
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_p_not_cmp_gt_RI __builtin_HEXAGON_C4_cmpltei
+
+/* ==========================================================================
+   Assembly Syntax:       Pd4=!cmp.gtu(Rs32,Rt32)
+   C Intrinsic Prototype: Byte Q6_p_not_cmp_gtu_RR(Word32 Rs, Word32 Rt)
+   Instruction Type:      ALU32_3op
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_p_not_cmp_gtu_RR __builtin_HEXAGON_C4_cmplteu
+
+/* ==========================================================================
+   Assembly Syntax:       Pd4=!cmp.gtu(Rs32,#u9)
+   C Intrinsic Prototype: Byte Q6_p_not_cmp_gtu_RI(Word32 Rs, Word32 Iu9)
+   Instruction Type:      ALU32_2op
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_p_not_cmp_gtu_RI __builtin_HEXAGON_C4_cmplteui
+
+/* ==========================================================================
+   Assembly Syntax:       Pd4=!cmp.eq(Rs32,Rt32)
+   C Intrinsic Prototype: Byte Q6_p_not_cmp_eq_RR(Word32 Rs, Word32 Rt)
+   Instruction Type:      ALU32_3op
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_p_not_cmp_eq_RR __builtin_HEXAGON_C4_cmpneq
+
+/* ==========================================================================
+   Assembly Syntax:       Pd4=!cmp.eq(Rs32,#s10)
+   C Intrinsic Prototype: Byte Q6_p_not_cmp_eq_RI(Word32 Rs, Word32 Is10)
+   Instruction Type:      ALU32_2op
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_p_not_cmp_eq_RI __builtin_HEXAGON_C4_cmpneqi
+
+/* ==========================================================================
+   Assembly Syntax:       Pd4=fastcorner9(Ps4,Pt4)
+   C Intrinsic Prototype: Byte Q6_p_fastcorner9_pp(Byte Ps, Byte Pt)
+   Instruction Type:      CR
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_p_fastcorner9_pp __builtin_HEXAGON_C4_fastcorner9
+
+/* ==========================================================================
+   Assembly Syntax:       Pd4=!fastcorner9(Ps4,Pt4)
+   C Intrinsic Prototype: Byte Q6_p_not_fastcorner9_pp(Byte Ps, Byte Pt)
+   Instruction Type:      CR
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_p_not_fastcorner9_pp __builtin_HEXAGON_C4_fastcorner9_not
+
+/* ==========================================================================
+   Assembly Syntax:       Pd4=!bitsclr(Rs32,Rt32)
+   C Intrinsic Prototype: Byte Q6_p_not_bitsclr_RR(Word32 Rs, Word32 Rt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_p_not_bitsclr_RR __builtin_HEXAGON_C4_nbitsclr
+
+/* ==========================================================================
+   Assembly Syntax:       Pd4=!bitsclr(Rs32,#u6)
+   C Intrinsic Prototype: Byte Q6_p_not_bitsclr_RI(Word32 Rs, Word32 Iu6)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_p_not_bitsclr_RI __builtin_HEXAGON_C4_nbitsclri
+
+/* ==========================================================================
+   Assembly Syntax:       Pd4=!bitsset(Rs32,Rt32)
+   C Intrinsic Prototype: Byte Q6_p_not_bitsset_RR(Word32 Rs, Word32 Rt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_p_not_bitsset_RR __builtin_HEXAGON_C4_nbitsset
+
+/* ==========================================================================
+   Assembly Syntax:       Pd4=or(Ps4,and(Pt4,Pu4))
+   C Intrinsic Prototype: Byte Q6_p_or_and_ppp(Byte Ps, Byte Pt, Byte Pu)
+   Instruction Type:      CR
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_p_or_and_ppp __builtin_HEXAGON_C4_or_and
+
+/* ==========================================================================
+   Assembly Syntax:       Pd4=or(Ps4,and(Pt4,!Pu4))
+   C Intrinsic Prototype: Byte Q6_p_or_and_ppnp(Byte Ps, Byte Pt, Byte Pu)
+   Instruction Type:      CR
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_p_or_and_ppnp __builtin_HEXAGON_C4_or_andn
+
+/* ==========================================================================
+   Assembly Syntax:       Pd4=or(Ps4,or(Pt4,Pu4))
+   C Intrinsic Prototype: Byte Q6_p_or_or_ppp(Byte Ps, Byte Pt, Byte Pu)
+   Instruction Type:      CR
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_p_or_or_ppp __builtin_HEXAGON_C4_or_or
+
+/* ==========================================================================
+   Assembly Syntax:       Pd4=or(Ps4,or(Pt4,!Pu4))
+   C Intrinsic Prototype: Byte Q6_p_or_or_ppnp(Byte Ps, Byte Pt, Byte Pu)
+   Instruction Type:      CR
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_p_or_or_ppnp __builtin_HEXAGON_C4_or_orn
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=convert_d2df(Rss32)
+   C Intrinsic Prototype: Float64 Q6_P_convert_d2df_P(Word64 Rss)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_convert_d2df_P __builtin_HEXAGON_F2_conv_d2df
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=convert_d2sf(Rss32)
+   C Intrinsic Prototype: Float32 Q6_R_convert_d2sf_P(Word64 Rss)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_convert_d2sf_P __builtin_HEXAGON_F2_conv_d2sf
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=convert_df2d(Rss32)
+   C Intrinsic Prototype: Word64 Q6_P_convert_df2d_P(Float64 Rss)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_convert_df2d_P __builtin_HEXAGON_F2_conv_df2d
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=convert_df2d(Rss32):chop
+   C Intrinsic Prototype: Word64 Q6_P_convert_df2d_P_chop(Float64 Rss)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_convert_df2d_P_chop __builtin_HEXAGON_F2_conv_df2d_chop
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=convert_df2sf(Rss32)
+   C Intrinsic Prototype: Float32 Q6_R_convert_df2sf_P(Float64 Rss)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_convert_df2sf_P __builtin_HEXAGON_F2_conv_df2sf
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=convert_df2ud(Rss32)
+   C Intrinsic Prototype: Word64 Q6_P_convert_df2ud_P(Float64 Rss)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_convert_df2ud_P __builtin_HEXAGON_F2_conv_df2ud
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=convert_df2ud(Rss32):chop
+   C Intrinsic Prototype: Word64 Q6_P_convert_df2ud_P_chop(Float64 Rss)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_convert_df2ud_P_chop __builtin_HEXAGON_F2_conv_df2ud_chop
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=convert_df2uw(Rss32)
+   C Intrinsic Prototype: Word32 Q6_R_convert_df2uw_P(Float64 Rss)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_convert_df2uw_P __builtin_HEXAGON_F2_conv_df2uw
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=convert_df2uw(Rss32):chop
+   C Intrinsic Prototype: Word32 Q6_R_convert_df2uw_P_chop(Float64 Rss)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_convert_df2uw_P_chop __builtin_HEXAGON_F2_conv_df2uw_chop
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=convert_df2w(Rss32)
+   C Intrinsic Prototype: Word32 Q6_R_convert_df2w_P(Float64 Rss)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_convert_df2w_P __builtin_HEXAGON_F2_conv_df2w
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=convert_df2w(Rss32):chop
+   C Intrinsic Prototype: Word32 Q6_R_convert_df2w_P_chop(Float64 Rss)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_convert_df2w_P_chop __builtin_HEXAGON_F2_conv_df2w_chop
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=convert_sf2d(Rs32)
+   C Intrinsic Prototype: Word64 Q6_P_convert_sf2d_R(Float32 Rs)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_convert_sf2d_R __builtin_HEXAGON_F2_conv_sf2d
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=convert_sf2d(Rs32):chop
+   C Intrinsic Prototype: Word64 Q6_P_convert_sf2d_R_chop(Float32 Rs)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_convert_sf2d_R_chop __builtin_HEXAGON_F2_conv_sf2d_chop
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=convert_sf2df(Rs32)
+   C Intrinsic Prototype: Float64 Q6_P_convert_sf2df_R(Float32 Rs)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_convert_sf2df_R __builtin_HEXAGON_F2_conv_sf2df
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=convert_sf2ud(Rs32)
+   C Intrinsic Prototype: Word64 Q6_P_convert_sf2ud_R(Float32 Rs)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_convert_sf2ud_R __builtin_HEXAGON_F2_conv_sf2ud
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=convert_sf2ud(Rs32):chop
+   C Intrinsic Prototype: Word64 Q6_P_convert_sf2ud_R_chop(Float32 Rs)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_convert_sf2ud_R_chop __builtin_HEXAGON_F2_conv_sf2ud_chop
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=convert_sf2uw(Rs32)
+   C Intrinsic Prototype: Word32 Q6_R_convert_sf2uw_R(Float32 Rs)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_convert_sf2uw_R __builtin_HEXAGON_F2_conv_sf2uw
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=convert_sf2uw(Rs32):chop
+   C Intrinsic Prototype: Word32 Q6_R_convert_sf2uw_R_chop(Float32 Rs)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_convert_sf2uw_R_chop __builtin_HEXAGON_F2_conv_sf2uw_chop
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=convert_sf2w(Rs32)
+   C Intrinsic Prototype: Word32 Q6_R_convert_sf2w_R(Float32 Rs)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_convert_sf2w_R __builtin_HEXAGON_F2_conv_sf2w
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=convert_sf2w(Rs32):chop
+   C Intrinsic Prototype: Word32 Q6_R_convert_sf2w_R_chop(Float32 Rs)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_convert_sf2w_R_chop __builtin_HEXAGON_F2_conv_sf2w_chop
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=convert_ud2df(Rss32)
+   C Intrinsic Prototype: Float64 Q6_P_convert_ud2df_P(Word64 Rss)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_convert_ud2df_P __builtin_HEXAGON_F2_conv_ud2df
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=convert_ud2sf(Rss32)
+   C Intrinsic Prototype: Float32 Q6_R_convert_ud2sf_P(Word64 Rss)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_convert_ud2sf_P __builtin_HEXAGON_F2_conv_ud2sf
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=convert_uw2df(Rs32)
+   C Intrinsic Prototype: Float64 Q6_P_convert_uw2df_R(Word32 Rs)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_convert_uw2df_R __builtin_HEXAGON_F2_conv_uw2df
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=convert_uw2sf(Rs32)
+   C Intrinsic Prototype: Float32 Q6_R_convert_uw2sf_R(Word32 Rs)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_convert_uw2sf_R __builtin_HEXAGON_F2_conv_uw2sf
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=convert_w2df(Rs32)
+   C Intrinsic Prototype: Float64 Q6_P_convert_w2df_R(Word32 Rs)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_convert_w2df_R __builtin_HEXAGON_F2_conv_w2df
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=convert_w2sf(Rs32)
+   C Intrinsic Prototype: Float32 Q6_R_convert_w2sf_R(Word32 Rs)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_convert_w2sf_R __builtin_HEXAGON_F2_conv_w2sf
+
+/* ==========================================================================
+   Assembly Syntax:       Pd4=dfclass(Rss32,#u5)
+   C Intrinsic Prototype: Byte Q6_p_dfclass_PI(Float64 Rss, Word32 Iu5)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_p_dfclass_PI __builtin_HEXAGON_F2_dfclass
+
+/* ==========================================================================
+   Assembly Syntax:       Pd4=dfcmp.eq(Rss32,Rtt32)
+   C Intrinsic Prototype: Byte Q6_p_dfcmp_eq_PP(Float64 Rss, Float64 Rtt)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_p_dfcmp_eq_PP __builtin_HEXAGON_F2_dfcmpeq
+
+/* ==========================================================================
+   Assembly Syntax:       Pd4=dfcmp.ge(Rss32,Rtt32)
+   C Intrinsic Prototype: Byte Q6_p_dfcmp_ge_PP(Float64 Rss, Float64 Rtt)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_p_dfcmp_ge_PP __builtin_HEXAGON_F2_dfcmpge
+
+/* ==========================================================================
+   Assembly Syntax:       Pd4=dfcmp.gt(Rss32,Rtt32)
+   C Intrinsic Prototype: Byte Q6_p_dfcmp_gt_PP(Float64 Rss, Float64 Rtt)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_p_dfcmp_gt_PP __builtin_HEXAGON_F2_dfcmpgt
+
+/* ==========================================================================
+   Assembly Syntax:       Pd4=dfcmp.uo(Rss32,Rtt32)
+   C Intrinsic Prototype: Byte Q6_p_dfcmp_uo_PP(Float64 Rss, Float64 Rtt)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_p_dfcmp_uo_PP __builtin_HEXAGON_F2_dfcmpuo
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=dfmake(#u10):neg
+   C Intrinsic Prototype: Float64 Q6_P_dfmake_I_neg(Word32 Iu10)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_dfmake_I_neg __builtin_HEXAGON_F2_dfimm_n
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=dfmake(#u10):pos
+   C Intrinsic Prototype: Float64 Q6_P_dfmake_I_pos(Word32 Iu10)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_dfmake_I_pos __builtin_HEXAGON_F2_dfimm_p
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=sfadd(Rs32,Rt32)
+   C Intrinsic Prototype: Float32 Q6_R_sfadd_RR(Float32 Rs, Float32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_sfadd_RR __builtin_HEXAGON_F2_sfadd
+
+/* ==========================================================================
+   Assembly Syntax:       Pd4=sfclass(Rs32,#u5)
+   C Intrinsic Prototype: Byte Q6_p_sfclass_RI(Float32 Rs, Word32 Iu5)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_p_sfclass_RI __builtin_HEXAGON_F2_sfclass
+
+/* ==========================================================================
+   Assembly Syntax:       Pd4=sfcmp.eq(Rs32,Rt32)
+   C Intrinsic Prototype: Byte Q6_p_sfcmp_eq_RR(Float32 Rs, Float32 Rt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_p_sfcmp_eq_RR __builtin_HEXAGON_F2_sfcmpeq
+
+/* ==========================================================================
+   Assembly Syntax:       Pd4=sfcmp.ge(Rs32,Rt32)
+   C Intrinsic Prototype: Byte Q6_p_sfcmp_ge_RR(Float32 Rs, Float32 Rt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_p_sfcmp_ge_RR __builtin_HEXAGON_F2_sfcmpge
+
+/* ==========================================================================
+   Assembly Syntax:       Pd4=sfcmp.gt(Rs32,Rt32)
+   C Intrinsic Prototype: Byte Q6_p_sfcmp_gt_RR(Float32 Rs, Float32 Rt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_p_sfcmp_gt_RR __builtin_HEXAGON_F2_sfcmpgt
+
+/* ==========================================================================
+   Assembly Syntax:       Pd4=sfcmp.uo(Rs32,Rt32)
+   C Intrinsic Prototype: Byte Q6_p_sfcmp_uo_RR(Float32 Rs, Float32 Rt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_p_sfcmp_uo_RR __builtin_HEXAGON_F2_sfcmpuo
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=sffixupd(Rs32,Rt32)
+   C Intrinsic Prototype: Float32 Q6_R_sffixupd_RR(Float32 Rs, Float32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_sffixupd_RR __builtin_HEXAGON_F2_sffixupd
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=sffixupn(Rs32,Rt32)
+   C Intrinsic Prototype: Float32 Q6_R_sffixupn_RR(Float32 Rs, Float32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_sffixupn_RR __builtin_HEXAGON_F2_sffixupn
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=sffixupr(Rs32)
+   C Intrinsic Prototype: Float32 Q6_R_sffixupr_R(Float32 Rs)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_sffixupr_R __builtin_HEXAGON_F2_sffixupr
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32+=sfmpy(Rs32,Rt32)
+   C Intrinsic Prototype: Float32 Q6_R_sfmpyacc_RR(Float32 Rx, Float32 Rs, Float32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_sfmpyacc_RR __builtin_HEXAGON_F2_sffma
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32+=sfmpy(Rs32,Rt32):lib
+   C Intrinsic Prototype: Float32 Q6_R_sfmpyacc_RR_lib(Float32 Rx, Float32 Rs, Float32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_sfmpyacc_RR_lib __builtin_HEXAGON_F2_sffma_lib
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32+=sfmpy(Rs32,Rt32,Pu4):scale
+   C Intrinsic Prototype: Float32 Q6_R_sfmpyacc_RRp_scale(Float32 Rx, Float32 Rs, Float32 Rt, Byte Pu)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_sfmpyacc_RRp_scale __builtin_HEXAGON_F2_sffma_sc
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32-=sfmpy(Rs32,Rt32)
+   C Intrinsic Prototype: Float32 Q6_R_sfmpynac_RR(Float32 Rx, Float32 Rs, Float32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_sfmpynac_RR __builtin_HEXAGON_F2_sffms
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32-=sfmpy(Rs32,Rt32):lib
+   C Intrinsic Prototype: Float32 Q6_R_sfmpynac_RR_lib(Float32 Rx, Float32 Rs, Float32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_sfmpynac_RR_lib __builtin_HEXAGON_F2_sffms_lib
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=sfmake(#u10):neg
+   C Intrinsic Prototype: Float32 Q6_R_sfmake_I_neg(Word32 Iu10)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_sfmake_I_neg __builtin_HEXAGON_F2_sfimm_n
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=sfmake(#u10):pos
+   C Intrinsic Prototype: Float32 Q6_R_sfmake_I_pos(Word32 Iu10)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_sfmake_I_pos __builtin_HEXAGON_F2_sfimm_p
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=sfmax(Rs32,Rt32)
+   C Intrinsic Prototype: Float32 Q6_R_sfmax_RR(Float32 Rs, Float32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_sfmax_RR __builtin_HEXAGON_F2_sfmax
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=sfmin(Rs32,Rt32)
+   C Intrinsic Prototype: Float32 Q6_R_sfmin_RR(Float32 Rs, Float32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_sfmin_RR __builtin_HEXAGON_F2_sfmin
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=sfmpy(Rs32,Rt32)
+   C Intrinsic Prototype: Float32 Q6_R_sfmpy_RR(Float32 Rs, Float32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_sfmpy_RR __builtin_HEXAGON_F2_sfmpy
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=sfsub(Rs32,Rt32)
+   C Intrinsic Prototype: Float32 Q6_R_sfsub_RR(Float32 Rs, Float32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_sfsub_RR __builtin_HEXAGON_F2_sfsub
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=memb(Rx32++#s4:0:circ(Mu2))
+   C Intrinsic Prototype: Word32 Q6_R_memb_IM_circ(void** Rx, Word32 Is4_0, Word32 Mu, void* BaseAddress)
+   Instruction Type:      LD
+   Execution Slots:       SLOT01
+   ========================================================================== */
+
+#define Q6_R_memb_IM_circ __builtin_HEXAGON_L2_loadrb_pci
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=memb(Rx32++I:circ(Mu2))
+   C Intrinsic Prototype: Word32 Q6_R_memb_M_circ(void** Rx, Word32 Mu, void* BaseAddress)
+   Instruction Type:      LD
+   Execution Slots:       SLOT01
+   ========================================================================== */
+
+#define Q6_R_memb_M_circ __builtin_HEXAGON_L2_loadrb_pcr
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=memd(Rx32++#s4:3:circ(Mu2))
+   C Intrinsic Prototype: Word64 Q6_P_memd_IM_circ(void** Rx, Word32 Is4_3, Word32 Mu, void* BaseAddress)
+   Instruction Type:      LD
+   Execution Slots:       SLOT01
+   ========================================================================== */
+
+#define Q6_P_memd_IM_circ __builtin_HEXAGON_L2_loadrd_pci
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=memd(Rx32++I:circ(Mu2))
+   C Intrinsic Prototype: Word64 Q6_P_memd_M_circ(void** Rx, Word32 Mu, void* BaseAddress)
+   Instruction Type:      LD
+   Execution Slots:       SLOT01
+   ========================================================================== */
+
+#define Q6_P_memd_M_circ __builtin_HEXAGON_L2_loadrd_pcr
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=memh(Rx32++#s4:1:circ(Mu2))
+   C Intrinsic Prototype: Word32 Q6_R_memh_IM_circ(void** Rx, Word32 Is4_1, Word32 Mu, void* BaseAddress)
+   Instruction Type:      LD
+   Execution Slots:       SLOT01
+   ========================================================================== */
+
+#define Q6_R_memh_IM_circ __builtin_HEXAGON_L2_loadrh_pci
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=memh(Rx32++I:circ(Mu2))
+   C Intrinsic Prototype: Word32 Q6_R_memh_M_circ(void** Rx, Word32 Mu, void* BaseAddress)
+   Instruction Type:      LD
+   Execution Slots:       SLOT01
+   ========================================================================== */
+
+#define Q6_R_memh_M_circ __builtin_HEXAGON_L2_loadrh_pcr
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=memw(Rx32++#s4:2:circ(Mu2))
+   C Intrinsic Prototype: Word32 Q6_R_memw_IM_circ(void** Rx, Word32 Is4_2, Word32 Mu, void* BaseAddress)
+   Instruction Type:      LD
+   Execution Slots:       SLOT01
+   ========================================================================== */
+
+#define Q6_R_memw_IM_circ __builtin_HEXAGON_L2_loadri_pci
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=memw(Rx32++I:circ(Mu2))
+   C Intrinsic Prototype: Word32 Q6_R_memw_M_circ(void** Rx, Word32 Mu, void* BaseAddress)
+   Instruction Type:      LD
+   Execution Slots:       SLOT01
+   ========================================================================== */
+
+#define Q6_R_memw_M_circ __builtin_HEXAGON_L2_loadri_pcr
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=memub(Rx32++#s4:0:circ(Mu2))
+   C Intrinsic Prototype: Word32 Q6_R_memub_IM_circ(void** Rx, Word32 Is4_0, Word32 Mu, void* BaseAddress)
+   Instruction Type:      LD
+   Execution Slots:       SLOT01
+   ========================================================================== */
+
+#define Q6_R_memub_IM_circ __builtin_HEXAGON_L2_loadrub_pci
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=memub(Rx32++I:circ(Mu2))
+   C Intrinsic Prototype: Word32 Q6_R_memub_M_circ(void** Rx, Word32 Mu, void* BaseAddress)
+   Instruction Type:      LD
+   Execution Slots:       SLOT01
+   ========================================================================== */
+
+#define Q6_R_memub_M_circ __builtin_HEXAGON_L2_loadrub_pcr
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=memuh(Rx32++#s4:1:circ(Mu2))
+   C Intrinsic Prototype: Word32 Q6_R_memuh_IM_circ(void** Rx, Word32 Is4_1, Word32 Mu, void* BaseAddress)
+   Instruction Type:      LD
+   Execution Slots:       SLOT01
+   ========================================================================== */
+
+#define Q6_R_memuh_IM_circ __builtin_HEXAGON_L2_loadruh_pci
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=memuh(Rx32++I:circ(Mu2))
+   C Intrinsic Prototype: Word32 Q6_R_memuh_M_circ(void** Rx, Word32 Mu, void* BaseAddress)
+   Instruction Type:      LD
+   Execution Slots:       SLOT01
+   ========================================================================== */
+
+#define Q6_R_memuh_M_circ __builtin_HEXAGON_L2_loadruh_pcr
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32+=add(Rs32,Rt32)
+   C Intrinsic Prototype: Word32 Q6_R_addacc_RR(Word32 Rx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_addacc_RR __builtin_HEXAGON_M2_acci
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32+=add(Rs32,#s8)
+   C Intrinsic Prototype: Word32 Q6_R_addacc_RI(Word32 Rx, Word32 Rs, Word32 Is8)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_addacc_RI __builtin_HEXAGON_M2_accii
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32+=cmpyi(Rs32,Rt32)
+   C Intrinsic Prototype: Word64 Q6_P_cmpyiacc_RR(Word64 Rxx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_cmpyiacc_RR __builtin_HEXAGON_M2_cmaci_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32+=cmpyr(Rs32,Rt32)
+   C Intrinsic Prototype: Word64 Q6_P_cmpyracc_RR(Word64 Rxx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_cmpyracc_RR __builtin_HEXAGON_M2_cmacr_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32+=cmpy(Rs32,Rt32):sat
+   C Intrinsic Prototype: Word64 Q6_P_cmpyacc_RR_sat(Word64 Rxx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_cmpyacc_RR_sat __builtin_HEXAGON_M2_cmacs_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32+=cmpy(Rs32,Rt32):<<1:sat
+   C Intrinsic Prototype: Word64 Q6_P_cmpyacc_RR_s1_sat(Word64 Rxx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_cmpyacc_RR_s1_sat __builtin_HEXAGON_M2_cmacs_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32+=cmpy(Rs32,Rt32*):sat
+   C Intrinsic Prototype: Word64 Q6_P_cmpyacc_RR_conj_sat(Word64 Rxx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_cmpyacc_RR_conj_sat __builtin_HEXAGON_M2_cmacsc_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32+=cmpy(Rs32,Rt32*):<<1:sat
+   C Intrinsic Prototype: Word64 Q6_P_cmpyacc_RR_conj_s1_sat(Word64 Rxx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_cmpyacc_RR_conj_s1_sat __builtin_HEXAGON_M2_cmacsc_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=cmpyi(Rs32,Rt32)
+   C Intrinsic Prototype: Word64 Q6_P_cmpyi_RR(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_cmpyi_RR __builtin_HEXAGON_M2_cmpyi_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=cmpyr(Rs32,Rt32)
+   C Intrinsic Prototype: Word64 Q6_P_cmpyr_RR(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_cmpyr_RR __builtin_HEXAGON_M2_cmpyr_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=cmpy(Rs32,Rt32):rnd:sat
+   C Intrinsic Prototype: Word32 Q6_R_cmpy_RR_rnd_sat(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_cmpy_RR_rnd_sat __builtin_HEXAGON_M2_cmpyrs_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=cmpy(Rs32,Rt32):<<1:rnd:sat
+   C Intrinsic Prototype: Word32 Q6_R_cmpy_RR_s1_rnd_sat(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_cmpy_RR_s1_rnd_sat __builtin_HEXAGON_M2_cmpyrs_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=cmpy(Rs32,Rt32*):rnd:sat
+   C Intrinsic Prototype: Word32 Q6_R_cmpy_RR_conj_rnd_sat(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_cmpy_RR_conj_rnd_sat __builtin_HEXAGON_M2_cmpyrsc_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=cmpy(Rs32,Rt32*):<<1:rnd:sat
+   C Intrinsic Prototype: Word32 Q6_R_cmpy_RR_conj_s1_rnd_sat(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_cmpy_RR_conj_s1_rnd_sat __builtin_HEXAGON_M2_cmpyrsc_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=cmpy(Rs32,Rt32):sat
+   C Intrinsic Prototype: Word64 Q6_P_cmpy_RR_sat(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_cmpy_RR_sat __builtin_HEXAGON_M2_cmpys_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=cmpy(Rs32,Rt32):<<1:sat
+   C Intrinsic Prototype: Word64 Q6_P_cmpy_RR_s1_sat(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_cmpy_RR_s1_sat __builtin_HEXAGON_M2_cmpys_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=cmpy(Rs32,Rt32*):sat
+   C Intrinsic Prototype: Word64 Q6_P_cmpy_RR_conj_sat(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_cmpy_RR_conj_sat __builtin_HEXAGON_M2_cmpysc_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=cmpy(Rs32,Rt32*):<<1:sat
+   C Intrinsic Prototype: Word64 Q6_P_cmpy_RR_conj_s1_sat(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_cmpy_RR_conj_s1_sat __builtin_HEXAGON_M2_cmpysc_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32-=cmpy(Rs32,Rt32):sat
+   C Intrinsic Prototype: Word64 Q6_P_cmpynac_RR_sat(Word64 Rxx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_cmpynac_RR_sat __builtin_HEXAGON_M2_cnacs_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32-=cmpy(Rs32,Rt32):<<1:sat
+   C Intrinsic Prototype: Word64 Q6_P_cmpynac_RR_s1_sat(Word64 Rxx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_cmpynac_RR_s1_sat __builtin_HEXAGON_M2_cnacs_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32-=cmpy(Rs32,Rt32*):sat
+   C Intrinsic Prototype: Word64 Q6_P_cmpynac_RR_conj_sat(Word64 Rxx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_cmpynac_RR_conj_sat __builtin_HEXAGON_M2_cnacsc_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32-=cmpy(Rs32,Rt32*):<<1:sat
+   C Intrinsic Prototype: Word64 Q6_P_cmpynac_RR_conj_s1_sat(Word64 Rxx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_cmpynac_RR_conj_s1_sat __builtin_HEXAGON_M2_cnacsc_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32+=mpy(Rs32,Rt32)
+   C Intrinsic Prototype: Word64 Q6_P_mpyacc_RR(Word64 Rxx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_mpyacc_RR __builtin_HEXAGON_M2_dpmpyss_acc_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32-=mpy(Rs32,Rt32)
+   C Intrinsic Prototype: Word64 Q6_P_mpynac_RR(Word64 Rxx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_mpynac_RR __builtin_HEXAGON_M2_dpmpyss_nac_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=mpy(Rs32,Rt32):rnd
+   C Intrinsic Prototype: Word32 Q6_R_mpy_RR_rnd(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpy_RR_rnd __builtin_HEXAGON_M2_dpmpyss_rnd_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=mpy(Rs32,Rt32)
+   C Intrinsic Prototype: Word64 Q6_P_mpy_RR(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_mpy_RR __builtin_HEXAGON_M2_dpmpyss_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32+=mpyu(Rs32,Rt32)
+   C Intrinsic Prototype: Word64 Q6_P_mpyuacc_RR(Word64 Rxx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_mpyuacc_RR __builtin_HEXAGON_M2_dpmpyuu_acc_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32-=mpyu(Rs32,Rt32)
+   C Intrinsic Prototype: Word64 Q6_P_mpyunac_RR(Word64 Rxx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_mpyunac_RR __builtin_HEXAGON_M2_dpmpyuu_nac_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=mpyu(Rs32,Rt32)
+   C Intrinsic Prototype: UWord64 Q6_P_mpyu_RR(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_mpyu_RR __builtin_HEXAGON_M2_dpmpyuu_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=mpy(Rs32,Rt32.h):<<1:rnd:sat
+   C Intrinsic Prototype: Word32 Q6_R_mpy_RRh_s1_rnd_sat(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpy_RRh_s1_rnd_sat __builtin_HEXAGON_M2_hmmpyh_rs1
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=mpy(Rs32,Rt32.h):<<1:sat
+   C Intrinsic Prototype: Word32 Q6_R_mpy_RRh_s1_sat(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpy_RRh_s1_sat __builtin_HEXAGON_M2_hmmpyh_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=mpy(Rs32,Rt32.l):<<1:rnd:sat
+   C Intrinsic Prototype: Word32 Q6_R_mpy_RRl_s1_rnd_sat(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpy_RRl_s1_rnd_sat __builtin_HEXAGON_M2_hmmpyl_rs1
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=mpy(Rs32,Rt32.l):<<1:sat
+   C Intrinsic Prototype: Word32 Q6_R_mpy_RRl_s1_sat(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpy_RRl_s1_sat __builtin_HEXAGON_M2_hmmpyl_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32+=mpyi(Rs32,Rt32)
+   C Intrinsic Prototype: Word32 Q6_R_mpyiacc_RR(Word32 Rx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpyiacc_RR __builtin_HEXAGON_M2_maci
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32-=mpyi(Rs32,#u8)
+   C Intrinsic Prototype: Word32 Q6_R_mpyinac_RI(Word32 Rx, Word32 Rs, Word32 Iu8)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpyinac_RI __builtin_HEXAGON_M2_macsin
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32+=mpyi(Rs32,#u8)
+   C Intrinsic Prototype: Word32 Q6_R_mpyiacc_RI(Word32 Rx, Word32 Rs, Word32 Iu8)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpyiacc_RI __builtin_HEXAGON_M2_macsip
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32+=vmpywoh(Rss32,Rtt32):rnd:sat
+   C Intrinsic Prototype: Word64 Q6_P_vmpywohacc_PP_rnd_sat(Word64 Rxx, Word64 Rss, Word64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vmpywohacc_PP_rnd_sat __builtin_HEXAGON_M2_mmachs_rs0
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32+=vmpywoh(Rss32,Rtt32):<<1:rnd:sat
+   C Intrinsic Prototype: Word64 Q6_P_vmpywohacc_PP_s1_rnd_sat(Word64 Rxx, Word64 Rss, Word64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vmpywohacc_PP_s1_rnd_sat __builtin_HEXAGON_M2_mmachs_rs1
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32+=vmpywoh(Rss32,Rtt32):sat
+   C Intrinsic Prototype: Word64 Q6_P_vmpywohacc_PP_sat(Word64 Rxx, Word64 Rss, Word64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vmpywohacc_PP_sat __builtin_HEXAGON_M2_mmachs_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32+=vmpywoh(Rss32,Rtt32):<<1:sat
+   C Intrinsic Prototype: Word64 Q6_P_vmpywohacc_PP_s1_sat(Word64 Rxx, Word64 Rss, Word64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vmpywohacc_PP_s1_sat __builtin_HEXAGON_M2_mmachs_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32+=vmpyweh(Rss32,Rtt32):rnd:sat
+   C Intrinsic Prototype: Word64 Q6_P_vmpywehacc_PP_rnd_sat(Word64 Rxx, Word64 Rss, Word64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vmpywehacc_PP_rnd_sat __builtin_HEXAGON_M2_mmacls_rs0
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32+=vmpyweh(Rss32,Rtt32):<<1:rnd:sat
+   C Intrinsic Prototype: Word64 Q6_P_vmpywehacc_PP_s1_rnd_sat(Word64 Rxx, Word64 Rss, Word64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vmpywehacc_PP_s1_rnd_sat __builtin_HEXAGON_M2_mmacls_rs1
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32+=vmpyweh(Rss32,Rtt32):sat
+   C Intrinsic Prototype: Word64 Q6_P_vmpywehacc_PP_sat(Word64 Rxx, Word64 Rss, Word64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vmpywehacc_PP_sat __builtin_HEXAGON_M2_mmacls_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32+=vmpyweh(Rss32,Rtt32):<<1:sat
+   C Intrinsic Prototype: Word64 Q6_P_vmpywehacc_PP_s1_sat(Word64 Rxx, Word64 Rss, Word64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vmpywehacc_PP_s1_sat __builtin_HEXAGON_M2_mmacls_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32+=vmpywouh(Rss32,Rtt32):rnd:sat
+   C Intrinsic Prototype: Word64 Q6_P_vmpywouhacc_PP_rnd_sat(Word64 Rxx, Word64 Rss, Word64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vmpywouhacc_PP_rnd_sat __builtin_HEXAGON_M2_mmacuhs_rs0
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32+=vmpywouh(Rss32,Rtt32):<<1:rnd:sat
+   C Intrinsic Prototype: Word64 Q6_P_vmpywouhacc_PP_s1_rnd_sat(Word64 Rxx, Word64 Rss, Word64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vmpywouhacc_PP_s1_rnd_sat __builtin_HEXAGON_M2_mmacuhs_rs1
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32+=vmpywouh(Rss32,Rtt32):sat
+   C Intrinsic Prototype: Word64 Q6_P_vmpywouhacc_PP_sat(Word64 Rxx, Word64 Rss, Word64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vmpywouhacc_PP_sat __builtin_HEXAGON_M2_mmacuhs_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32+=vmpywouh(Rss32,Rtt32):<<1:sat
+   C Intrinsic Prototype: Word64 Q6_P_vmpywouhacc_PP_s1_sat(Word64 Rxx, Word64 Rss, Word64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vmpywouhacc_PP_s1_sat __builtin_HEXAGON_M2_mmacuhs_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32+=vmpyweuh(Rss32,Rtt32):rnd:sat
+   C Intrinsic Prototype: Word64 Q6_P_vmpyweuhacc_PP_rnd_sat(Word64 Rxx, Word64 Rss, Word64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vmpyweuhacc_PP_rnd_sat __builtin_HEXAGON_M2_mmaculs_rs0
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32+=vmpyweuh(Rss32,Rtt32):<<1:rnd:sat
+   C Intrinsic Prototype: Word64 Q6_P_vmpyweuhacc_PP_s1_rnd_sat(Word64 Rxx, Word64 Rss, Word64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vmpyweuhacc_PP_s1_rnd_sat __builtin_HEXAGON_M2_mmaculs_rs1
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32+=vmpyweuh(Rss32,Rtt32):sat
+   C Intrinsic Prototype: Word64 Q6_P_vmpyweuhacc_PP_sat(Word64 Rxx, Word64 Rss, Word64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vmpyweuhacc_PP_sat __builtin_HEXAGON_M2_mmaculs_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32+=vmpyweuh(Rss32,Rtt32):<<1:sat
+   C Intrinsic Prototype: Word64 Q6_P_vmpyweuhacc_PP_s1_sat(Word64 Rxx, Word64 Rss, Word64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vmpyweuhacc_PP_s1_sat __builtin_HEXAGON_M2_mmaculs_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vmpywoh(Rss32,Rtt32):rnd:sat
+   C Intrinsic Prototype: Word64 Q6_P_vmpywoh_PP_rnd_sat(Word64 Rss, Word64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vmpywoh_PP_rnd_sat __builtin_HEXAGON_M2_mmpyh_rs0
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vmpywoh(Rss32,Rtt32):<<1:rnd:sat
+   C Intrinsic Prototype: Word64 Q6_P_vmpywoh_PP_s1_rnd_sat(Word64 Rss, Word64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vmpywoh_PP_s1_rnd_sat __builtin_HEXAGON_M2_mmpyh_rs1
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vmpywoh(Rss32,Rtt32):sat
+   C Intrinsic Prototype: Word64 Q6_P_vmpywoh_PP_sat(Word64 Rss, Word64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vmpywoh_PP_sat __builtin_HEXAGON_M2_mmpyh_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vmpywoh(Rss32,Rtt32):<<1:sat
+   C Intrinsic Prototype: Word64 Q6_P_vmpywoh_PP_s1_sat(Word64 Rss, Word64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vmpywoh_PP_s1_sat __builtin_HEXAGON_M2_mmpyh_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vmpyweh(Rss32,Rtt32):rnd:sat
+   C Intrinsic Prototype: Word64 Q6_P_vmpyweh_PP_rnd_sat(Word64 Rss, Word64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vmpyweh_PP_rnd_sat __builtin_HEXAGON_M2_mmpyl_rs0
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vmpyweh(Rss32,Rtt32):<<1:rnd:sat
+   C Intrinsic Prototype: Word64 Q6_P_vmpyweh_PP_s1_rnd_sat(Word64 Rss, Word64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vmpyweh_PP_s1_rnd_sat __builtin_HEXAGON_M2_mmpyl_rs1
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vmpyweh(Rss32,Rtt32):sat
+   C Intrinsic Prototype: Word64 Q6_P_vmpyweh_PP_sat(Word64 Rss, Word64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vmpyweh_PP_sat __builtin_HEXAGON_M2_mmpyl_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vmpyweh(Rss32,Rtt32):<<1:sat
+   C Intrinsic Prototype: Word64 Q6_P_vmpyweh_PP_s1_sat(Word64 Rss, Word64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vmpyweh_PP_s1_sat __builtin_HEXAGON_M2_mmpyl_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vmpywouh(Rss32,Rtt32):rnd:sat
+   C Intrinsic Prototype: Word64 Q6_P_vmpywouh_PP_rnd_sat(Word64 Rss, Word64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vmpywouh_PP_rnd_sat __builtin_HEXAGON_M2_mmpyuh_rs0
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vmpywouh(Rss32,Rtt32):<<1:rnd:sat
+   C Intrinsic Prototype: Word64 Q6_P_vmpywouh_PP_s1_rnd_sat(Word64 Rss, Word64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vmpywouh_PP_s1_rnd_sat __builtin_HEXAGON_M2_mmpyuh_rs1
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vmpywouh(Rss32,Rtt32):sat
+   C Intrinsic Prototype: Word64 Q6_P_vmpywouh_PP_sat(Word64 Rss, Word64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vmpywouh_PP_sat __builtin_HEXAGON_M2_mmpyuh_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vmpywouh(Rss32,Rtt32):<<1:sat
+   C Intrinsic Prototype: Word64 Q6_P_vmpywouh_PP_s1_sat(Word64 Rss, Word64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vmpywouh_PP_s1_sat __builtin_HEXAGON_M2_mmpyuh_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vmpyweuh(Rss32,Rtt32):rnd:sat
+   C Intrinsic Prototype: Word64 Q6_P_vmpyweuh_PP_rnd_sat(Word64 Rss, Word64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vmpyweuh_PP_rnd_sat __builtin_HEXAGON_M2_mmpyul_rs0
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vmpyweuh(Rss32,Rtt32):<<1:rnd:sat
+   C Intrinsic Prototype: Word64 Q6_P_vmpyweuh_PP_s1_rnd_sat(Word64 Rss, Word64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vmpyweuh_PP_s1_rnd_sat __builtin_HEXAGON_M2_mmpyul_rs1
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vmpyweuh(Rss32,Rtt32):sat
+   C Intrinsic Prototype: Word64 Q6_P_vmpyweuh_PP_sat(Word64 Rss, Word64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vmpyweuh_PP_sat __builtin_HEXAGON_M2_mmpyul_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vmpyweuh(Rss32,Rtt32):<<1:sat
+   C Intrinsic Prototype: Word64 Q6_P_vmpyweuh_PP_s1_sat(Word64 Rss, Word64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vmpyweuh_PP_s1_sat __builtin_HEXAGON_M2_mmpyul_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32+=mpy(Rs32.h,Rt32.h)
+   C Intrinsic Prototype: Word32 Q6_R_mpyacc_RhRh(Word32 Rx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpyacc_RhRh __builtin_HEXAGON_M2_mpy_acc_hh_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32+=mpy(Rs32.h,Rt32.h):<<1
+   C Intrinsic Prototype: Word32 Q6_R_mpyacc_RhRh_s1(Word32 Rx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpyacc_RhRh_s1 __builtin_HEXAGON_M2_mpy_acc_hh_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32+=mpy(Rs32.h,Rt32.l)
+   C Intrinsic Prototype: Word32 Q6_R_mpyacc_RhRl(Word32 Rx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpyacc_RhRl __builtin_HEXAGON_M2_mpy_acc_hl_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32+=mpy(Rs32.h,Rt32.l):<<1
+   C Intrinsic Prototype: Word32 Q6_R_mpyacc_RhRl_s1(Word32 Rx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpyacc_RhRl_s1 __builtin_HEXAGON_M2_mpy_acc_hl_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32+=mpy(Rs32.l,Rt32.h)
+   C Intrinsic Prototype: Word32 Q6_R_mpyacc_RlRh(Word32 Rx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpyacc_RlRh __builtin_HEXAGON_M2_mpy_acc_lh_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32+=mpy(Rs32.l,Rt32.h):<<1
+   C Intrinsic Prototype: Word32 Q6_R_mpyacc_RlRh_s1(Word32 Rx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpyacc_RlRh_s1 __builtin_HEXAGON_M2_mpy_acc_lh_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32+=mpy(Rs32.l,Rt32.l)
+   C Intrinsic Prototype: Word32 Q6_R_mpyacc_RlRl(Word32 Rx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpyacc_RlRl __builtin_HEXAGON_M2_mpy_acc_ll_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32+=mpy(Rs32.l,Rt32.l):<<1
+   C Intrinsic Prototype: Word32 Q6_R_mpyacc_RlRl_s1(Word32 Rx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpyacc_RlRl_s1 __builtin_HEXAGON_M2_mpy_acc_ll_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32+=mpy(Rs32.h,Rt32.h):sat
+   C Intrinsic Prototype: Word32 Q6_R_mpyacc_RhRh_sat(Word32 Rx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpyacc_RhRh_sat __builtin_HEXAGON_M2_mpy_acc_sat_hh_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32+=mpy(Rs32.h,Rt32.h):<<1:sat
+   C Intrinsic Prototype: Word32 Q6_R_mpyacc_RhRh_s1_sat(Word32 Rx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpyacc_RhRh_s1_sat __builtin_HEXAGON_M2_mpy_acc_sat_hh_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32+=mpy(Rs32.h,Rt32.l):sat
+   C Intrinsic Prototype: Word32 Q6_R_mpyacc_RhRl_sat(Word32 Rx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpyacc_RhRl_sat __builtin_HEXAGON_M2_mpy_acc_sat_hl_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32+=mpy(Rs32.h,Rt32.l):<<1:sat
+   C Intrinsic Prototype: Word32 Q6_R_mpyacc_RhRl_s1_sat(Word32 Rx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpyacc_RhRl_s1_sat __builtin_HEXAGON_M2_mpy_acc_sat_hl_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32+=mpy(Rs32.l,Rt32.h):sat
+   C Intrinsic Prototype: Word32 Q6_R_mpyacc_RlRh_sat(Word32 Rx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpyacc_RlRh_sat __builtin_HEXAGON_M2_mpy_acc_sat_lh_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32+=mpy(Rs32.l,Rt32.h):<<1:sat
+   C Intrinsic Prototype: Word32 Q6_R_mpyacc_RlRh_s1_sat(Word32 Rx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpyacc_RlRh_s1_sat __builtin_HEXAGON_M2_mpy_acc_sat_lh_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32+=mpy(Rs32.l,Rt32.l):sat
+   C Intrinsic Prototype: Word32 Q6_R_mpyacc_RlRl_sat(Word32 Rx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpyacc_RlRl_sat __builtin_HEXAGON_M2_mpy_acc_sat_ll_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32+=mpy(Rs32.l,Rt32.l):<<1:sat
+   C Intrinsic Prototype: Word32 Q6_R_mpyacc_RlRl_s1_sat(Word32 Rx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpyacc_RlRl_s1_sat __builtin_HEXAGON_M2_mpy_acc_sat_ll_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=mpy(Rs32.h,Rt32.h)
+   C Intrinsic Prototype: Word32 Q6_R_mpy_RhRh(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpy_RhRh __builtin_HEXAGON_M2_mpy_hh_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=mpy(Rs32.h,Rt32.h):<<1
+   C Intrinsic Prototype: Word32 Q6_R_mpy_RhRh_s1(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpy_RhRh_s1 __builtin_HEXAGON_M2_mpy_hh_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=mpy(Rs32.h,Rt32.l)
+   C Intrinsic Prototype: Word32 Q6_R_mpy_RhRl(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpy_RhRl __builtin_HEXAGON_M2_mpy_hl_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=mpy(Rs32.h,Rt32.l):<<1
+   C Intrinsic Prototype: Word32 Q6_R_mpy_RhRl_s1(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpy_RhRl_s1 __builtin_HEXAGON_M2_mpy_hl_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=mpy(Rs32.l,Rt32.h)
+   C Intrinsic Prototype: Word32 Q6_R_mpy_RlRh(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpy_RlRh __builtin_HEXAGON_M2_mpy_lh_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=mpy(Rs32.l,Rt32.h):<<1
+   C Intrinsic Prototype: Word32 Q6_R_mpy_RlRh_s1(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpy_RlRh_s1 __builtin_HEXAGON_M2_mpy_lh_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=mpy(Rs32.l,Rt32.l)
+   C Intrinsic Prototype: Word32 Q6_R_mpy_RlRl(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpy_RlRl __builtin_HEXAGON_M2_mpy_ll_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=mpy(Rs32.l,Rt32.l):<<1
+   C Intrinsic Prototype: Word32 Q6_R_mpy_RlRl_s1(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpy_RlRl_s1 __builtin_HEXAGON_M2_mpy_ll_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32-=mpy(Rs32.h,Rt32.h)
+   C Intrinsic Prototype: Word32 Q6_R_mpynac_RhRh(Word32 Rx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpynac_RhRh __builtin_HEXAGON_M2_mpy_nac_hh_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32-=mpy(Rs32.h,Rt32.h):<<1
+   C Intrinsic Prototype: Word32 Q6_R_mpynac_RhRh_s1(Word32 Rx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpynac_RhRh_s1 __builtin_HEXAGON_M2_mpy_nac_hh_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32-=mpy(Rs32.h,Rt32.l)
+   C Intrinsic Prototype: Word32 Q6_R_mpynac_RhRl(Word32 Rx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpynac_RhRl __builtin_HEXAGON_M2_mpy_nac_hl_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32-=mpy(Rs32.h,Rt32.l):<<1
+   C Intrinsic Prototype: Word32 Q6_R_mpynac_RhRl_s1(Word32 Rx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpynac_RhRl_s1 __builtin_HEXAGON_M2_mpy_nac_hl_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32-=mpy(Rs32.l,Rt32.h)
+   C Intrinsic Prototype: Word32 Q6_R_mpynac_RlRh(Word32 Rx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpynac_RlRh __builtin_HEXAGON_M2_mpy_nac_lh_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32-=mpy(Rs32.l,Rt32.h):<<1
+   C Intrinsic Prototype: Word32 Q6_R_mpynac_RlRh_s1(Word32 Rx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpynac_RlRh_s1 __builtin_HEXAGON_M2_mpy_nac_lh_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32-=mpy(Rs32.l,Rt32.l)
+   C Intrinsic Prototype: Word32 Q6_R_mpynac_RlRl(Word32 Rx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpynac_RlRl __builtin_HEXAGON_M2_mpy_nac_ll_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32-=mpy(Rs32.l,Rt32.l):<<1
+   C Intrinsic Prototype: Word32 Q6_R_mpynac_RlRl_s1(Word32 Rx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpynac_RlRl_s1 __builtin_HEXAGON_M2_mpy_nac_ll_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32-=mpy(Rs32.h,Rt32.h):sat
+   C Intrinsic Prototype: Word32 Q6_R_mpynac_RhRh_sat(Word32 Rx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpynac_RhRh_sat __builtin_HEXAGON_M2_mpy_nac_sat_hh_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32-=mpy(Rs32.h,Rt32.h):<<1:sat
+   C Intrinsic Prototype: Word32 Q6_R_mpynac_RhRh_s1_sat(Word32 Rx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpynac_RhRh_s1_sat __builtin_HEXAGON_M2_mpy_nac_sat_hh_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32-=mpy(Rs32.h,Rt32.l):sat
+   C Intrinsic Prototype: Word32 Q6_R_mpynac_RhRl_sat(Word32 Rx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpynac_RhRl_sat __builtin_HEXAGON_M2_mpy_nac_sat_hl_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32-=mpy(Rs32.h,Rt32.l):<<1:sat
+   C Intrinsic Prototype: Word32 Q6_R_mpynac_RhRl_s1_sat(Word32 Rx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpynac_RhRl_s1_sat __builtin_HEXAGON_M2_mpy_nac_sat_hl_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32-=mpy(Rs32.l,Rt32.h):sat
+   C Intrinsic Prototype: Word32 Q6_R_mpynac_RlRh_sat(Word32 Rx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpynac_RlRh_sat __builtin_HEXAGON_M2_mpy_nac_sat_lh_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32-=mpy(Rs32.l,Rt32.h):<<1:sat
+   C Intrinsic Prototype: Word32 Q6_R_mpynac_RlRh_s1_sat(Word32 Rx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpynac_RlRh_s1_sat __builtin_HEXAGON_M2_mpy_nac_sat_lh_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32-=mpy(Rs32.l,Rt32.l):sat
+   C Intrinsic Prototype: Word32 Q6_R_mpynac_RlRl_sat(Word32 Rx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpynac_RlRl_sat __builtin_HEXAGON_M2_mpy_nac_sat_ll_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32-=mpy(Rs32.l,Rt32.l):<<1:sat
+   C Intrinsic Prototype: Word32 Q6_R_mpynac_RlRl_s1_sat(Word32 Rx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpynac_RlRl_s1_sat __builtin_HEXAGON_M2_mpy_nac_sat_ll_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=mpy(Rs32.h,Rt32.h):rnd
+   C Intrinsic Prototype: Word32 Q6_R_mpy_RhRh_rnd(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpy_RhRh_rnd __builtin_HEXAGON_M2_mpy_rnd_hh_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=mpy(Rs32.h,Rt32.h):<<1:rnd
+   C Intrinsic Prototype: Word32 Q6_R_mpy_RhRh_s1_rnd(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpy_RhRh_s1_rnd __builtin_HEXAGON_M2_mpy_rnd_hh_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=mpy(Rs32.h,Rt32.l):rnd
+   C Intrinsic Prototype: Word32 Q6_R_mpy_RhRl_rnd(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpy_RhRl_rnd __builtin_HEXAGON_M2_mpy_rnd_hl_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=mpy(Rs32.h,Rt32.l):<<1:rnd
+   C Intrinsic Prototype: Word32 Q6_R_mpy_RhRl_s1_rnd(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpy_RhRl_s1_rnd __builtin_HEXAGON_M2_mpy_rnd_hl_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=mpy(Rs32.l,Rt32.h):rnd
+   C Intrinsic Prototype: Word32 Q6_R_mpy_RlRh_rnd(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpy_RlRh_rnd __builtin_HEXAGON_M2_mpy_rnd_lh_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=mpy(Rs32.l,Rt32.h):<<1:rnd
+   C Intrinsic Prototype: Word32 Q6_R_mpy_RlRh_s1_rnd(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpy_RlRh_s1_rnd __builtin_HEXAGON_M2_mpy_rnd_lh_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=mpy(Rs32.l,Rt32.l):rnd
+   C Intrinsic Prototype: Word32 Q6_R_mpy_RlRl_rnd(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpy_RlRl_rnd __builtin_HEXAGON_M2_mpy_rnd_ll_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=mpy(Rs32.l,Rt32.l):<<1:rnd
+   C Intrinsic Prototype: Word32 Q6_R_mpy_RlRl_s1_rnd(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpy_RlRl_s1_rnd __builtin_HEXAGON_M2_mpy_rnd_ll_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=mpy(Rs32.h,Rt32.h):sat
+   C Intrinsic Prototype: Word32 Q6_R_mpy_RhRh_sat(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpy_RhRh_sat __builtin_HEXAGON_M2_mpy_sat_hh_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=mpy(Rs32.h,Rt32.h):<<1:sat
+   C Intrinsic Prototype: Word32 Q6_R_mpy_RhRh_s1_sat(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpy_RhRh_s1_sat __builtin_HEXAGON_M2_mpy_sat_hh_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=mpy(Rs32.h,Rt32.l):sat
+   C Intrinsic Prototype: Word32 Q6_R_mpy_RhRl_sat(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpy_RhRl_sat __builtin_HEXAGON_M2_mpy_sat_hl_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=mpy(Rs32.h,Rt32.l):<<1:sat
+   C Intrinsic Prototype: Word32 Q6_R_mpy_RhRl_s1_sat(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpy_RhRl_s1_sat __builtin_HEXAGON_M2_mpy_sat_hl_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=mpy(Rs32.l,Rt32.h):sat
+   C Intrinsic Prototype: Word32 Q6_R_mpy_RlRh_sat(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpy_RlRh_sat __builtin_HEXAGON_M2_mpy_sat_lh_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=mpy(Rs32.l,Rt32.h):<<1:sat
+   C Intrinsic Prototype: Word32 Q6_R_mpy_RlRh_s1_sat(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpy_RlRh_s1_sat __builtin_HEXAGON_M2_mpy_sat_lh_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=mpy(Rs32.l,Rt32.l):sat
+   C Intrinsic Prototype: Word32 Q6_R_mpy_RlRl_sat(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpy_RlRl_sat __builtin_HEXAGON_M2_mpy_sat_ll_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=mpy(Rs32.l,Rt32.l):<<1:sat
+   C Intrinsic Prototype: Word32 Q6_R_mpy_RlRl_s1_sat(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpy_RlRl_s1_sat __builtin_HEXAGON_M2_mpy_sat_ll_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=mpy(Rs32.h,Rt32.h):rnd:sat
+   C Intrinsic Prototype: Word32 Q6_R_mpy_RhRh_rnd_sat(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpy_RhRh_rnd_sat __builtin_HEXAGON_M2_mpy_sat_rnd_hh_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=mpy(Rs32.h,Rt32.h):<<1:rnd:sat
+   C Intrinsic Prototype: Word32 Q6_R_mpy_RhRh_s1_rnd_sat(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpy_RhRh_s1_rnd_sat __builtin_HEXAGON_M2_mpy_sat_rnd_hh_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=mpy(Rs32.h,Rt32.l):rnd:sat
+   C Intrinsic Prototype: Word32 Q6_R_mpy_RhRl_rnd_sat(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpy_RhRl_rnd_sat __builtin_HEXAGON_M2_mpy_sat_rnd_hl_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=mpy(Rs32.h,Rt32.l):<<1:rnd:sat
+   C Intrinsic Prototype: Word32 Q6_R_mpy_RhRl_s1_rnd_sat(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpy_RhRl_s1_rnd_sat __builtin_HEXAGON_M2_mpy_sat_rnd_hl_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=mpy(Rs32.l,Rt32.h):rnd:sat
+   C Intrinsic Prototype: Word32 Q6_R_mpy_RlRh_rnd_sat(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpy_RlRh_rnd_sat __builtin_HEXAGON_M2_mpy_sat_rnd_lh_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=mpy(Rs32.l,Rt32.h):<<1:rnd:sat
+   C Intrinsic Prototype: Word32 Q6_R_mpy_RlRh_s1_rnd_sat(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpy_RlRh_s1_rnd_sat __builtin_HEXAGON_M2_mpy_sat_rnd_lh_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=mpy(Rs32.l,Rt32.l):rnd:sat
+   C Intrinsic Prototype: Word32 Q6_R_mpy_RlRl_rnd_sat(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpy_RlRl_rnd_sat __builtin_HEXAGON_M2_mpy_sat_rnd_ll_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=mpy(Rs32.l,Rt32.l):<<1:rnd:sat
+   C Intrinsic Prototype: Word32 Q6_R_mpy_RlRl_s1_rnd_sat(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpy_RlRl_s1_rnd_sat __builtin_HEXAGON_M2_mpy_sat_rnd_ll_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=mpy(Rs32,Rt32)
+   C Intrinsic Prototype: Word32 Q6_R_mpy_RR(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpy_RR __builtin_HEXAGON_M2_mpy_up
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=mpy(Rs32,Rt32):<<1
+   C Intrinsic Prototype: Word32 Q6_R_mpy_RR_s1(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpy_RR_s1 __builtin_HEXAGON_M2_mpy_up_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=mpy(Rs32,Rt32):<<1:sat
+   C Intrinsic Prototype: Word32 Q6_R_mpy_RR_s1_sat(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpy_RR_s1_sat __builtin_HEXAGON_M2_mpy_up_s1_sat
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32+=mpy(Rs32.h,Rt32.h)
+   C Intrinsic Prototype: Word64 Q6_P_mpyacc_RhRh(Word64 Rxx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_mpyacc_RhRh __builtin_HEXAGON_M2_mpyd_acc_hh_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32+=mpy(Rs32.h,Rt32.h):<<1
+   C Intrinsic Prototype: Word64 Q6_P_mpyacc_RhRh_s1(Word64 Rxx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_mpyacc_RhRh_s1 __builtin_HEXAGON_M2_mpyd_acc_hh_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32+=mpy(Rs32.h,Rt32.l)
+   C Intrinsic Prototype: Word64 Q6_P_mpyacc_RhRl(Word64 Rxx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_mpyacc_RhRl __builtin_HEXAGON_M2_mpyd_acc_hl_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32+=mpy(Rs32.h,Rt32.l):<<1
+   C Intrinsic Prototype: Word64 Q6_P_mpyacc_RhRl_s1(Word64 Rxx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_mpyacc_RhRl_s1 __builtin_HEXAGON_M2_mpyd_acc_hl_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32+=mpy(Rs32.l,Rt32.h)
+   C Intrinsic Prototype: Word64 Q6_P_mpyacc_RlRh(Word64 Rxx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_mpyacc_RlRh __builtin_HEXAGON_M2_mpyd_acc_lh_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32+=mpy(Rs32.l,Rt32.h):<<1
+   C Intrinsic Prototype: Word64 Q6_P_mpyacc_RlRh_s1(Word64 Rxx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_mpyacc_RlRh_s1 __builtin_HEXAGON_M2_mpyd_acc_lh_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32+=mpy(Rs32.l,Rt32.l)
+   C Intrinsic Prototype: Word64 Q6_P_mpyacc_RlRl(Word64 Rxx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_mpyacc_RlRl __builtin_HEXAGON_M2_mpyd_acc_ll_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32+=mpy(Rs32.l,Rt32.l):<<1
+   C Intrinsic Prototype: Word64 Q6_P_mpyacc_RlRl_s1(Word64 Rxx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_mpyacc_RlRl_s1 __builtin_HEXAGON_M2_mpyd_acc_ll_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=mpy(Rs32.h,Rt32.h)
+   C Intrinsic Prototype: Word64 Q6_P_mpy_RhRh(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_mpy_RhRh __builtin_HEXAGON_M2_mpyd_hh_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=mpy(Rs32.h,Rt32.h):<<1
+   C Intrinsic Prototype: Word64 Q6_P_mpy_RhRh_s1(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_mpy_RhRh_s1 __builtin_HEXAGON_M2_mpyd_hh_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=mpy(Rs32.h,Rt32.l)
+   C Intrinsic Prototype: Word64 Q6_P_mpy_RhRl(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_mpy_RhRl __builtin_HEXAGON_M2_mpyd_hl_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=mpy(Rs32.h,Rt32.l):<<1
+   C Intrinsic Prototype: Word64 Q6_P_mpy_RhRl_s1(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_mpy_RhRl_s1 __builtin_HEXAGON_M2_mpyd_hl_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=mpy(Rs32.l,Rt32.h)
+   C Intrinsic Prototype: Word64 Q6_P_mpy_RlRh(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_mpy_RlRh __builtin_HEXAGON_M2_mpyd_lh_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=mpy(Rs32.l,Rt32.h):<<1
+   C Intrinsic Prototype: Word64 Q6_P_mpy_RlRh_s1(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_mpy_RlRh_s1 __builtin_HEXAGON_M2_mpyd_lh_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=mpy(Rs32.l,Rt32.l)
+   C Intrinsic Prototype: Word64 Q6_P_mpy_RlRl(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_mpy_RlRl __builtin_HEXAGON_M2_mpyd_ll_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=mpy(Rs32.l,Rt32.l):<<1
+   C Intrinsic Prototype: Word64 Q6_P_mpy_RlRl_s1(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_mpy_RlRl_s1 __builtin_HEXAGON_M2_mpyd_ll_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32-=mpy(Rs32.h,Rt32.h)
+   C Intrinsic Prototype: Word64 Q6_P_mpynac_RhRh(Word64 Rxx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_mpynac_RhRh __builtin_HEXAGON_M2_mpyd_nac_hh_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32-=mpy(Rs32.h,Rt32.h):<<1
+   C Intrinsic Prototype: Word64 Q6_P_mpynac_RhRh_s1(Word64 Rxx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_mpynac_RhRh_s1 __builtin_HEXAGON_M2_mpyd_nac_hh_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32-=mpy(Rs32.h,Rt32.l)
+   C Intrinsic Prototype: Word64 Q6_P_mpynac_RhRl(Word64 Rxx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_mpynac_RhRl __builtin_HEXAGON_M2_mpyd_nac_hl_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32-=mpy(Rs32.h,Rt32.l):<<1
+   C Intrinsic Prototype: Word64 Q6_P_mpynac_RhRl_s1(Word64 Rxx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_mpynac_RhRl_s1 __builtin_HEXAGON_M2_mpyd_nac_hl_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32-=mpy(Rs32.l,Rt32.h)
+   C Intrinsic Prototype: Word64 Q6_P_mpynac_RlRh(Word64 Rxx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_mpynac_RlRh __builtin_HEXAGON_M2_mpyd_nac_lh_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32-=mpy(Rs32.l,Rt32.h):<<1
+   C Intrinsic Prototype: Word64 Q6_P_mpynac_RlRh_s1(Word64 Rxx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_mpynac_RlRh_s1 __builtin_HEXAGON_M2_mpyd_nac_lh_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32-=mpy(Rs32.l,Rt32.l)
+   C Intrinsic Prototype: Word64 Q6_P_mpynac_RlRl(Word64 Rxx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_mpynac_RlRl __builtin_HEXAGON_M2_mpyd_nac_ll_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32-=mpy(Rs32.l,Rt32.l):<<1
+   C Intrinsic Prototype: Word64 Q6_P_mpynac_RlRl_s1(Word64 Rxx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_mpynac_RlRl_s1 __builtin_HEXAGON_M2_mpyd_nac_ll_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=mpy(Rs32.h,Rt32.h):rnd
+   C Intrinsic Prototype: Word64 Q6_P_mpy_RhRh_rnd(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_mpy_RhRh_rnd __builtin_HEXAGON_M2_mpyd_rnd_hh_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=mpy(Rs32.h,Rt32.h):<<1:rnd
+   C Intrinsic Prototype: Word64 Q6_P_mpy_RhRh_s1_rnd(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_mpy_RhRh_s1_rnd __builtin_HEXAGON_M2_mpyd_rnd_hh_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=mpy(Rs32.h,Rt32.l):rnd
+   C Intrinsic Prototype: Word64 Q6_P_mpy_RhRl_rnd(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_mpy_RhRl_rnd __builtin_HEXAGON_M2_mpyd_rnd_hl_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=mpy(Rs32.h,Rt32.l):<<1:rnd
+   C Intrinsic Prototype: Word64 Q6_P_mpy_RhRl_s1_rnd(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_mpy_RhRl_s1_rnd __builtin_HEXAGON_M2_mpyd_rnd_hl_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=mpy(Rs32.l,Rt32.h):rnd
+   C Intrinsic Prototype: Word64 Q6_P_mpy_RlRh_rnd(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_mpy_RlRh_rnd __builtin_HEXAGON_M2_mpyd_rnd_lh_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=mpy(Rs32.l,Rt32.h):<<1:rnd
+   C Intrinsic Prototype: Word64 Q6_P_mpy_RlRh_s1_rnd(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_mpy_RlRh_s1_rnd __builtin_HEXAGON_M2_mpyd_rnd_lh_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=mpy(Rs32.l,Rt32.l):rnd
+   C Intrinsic Prototype: Word64 Q6_P_mpy_RlRl_rnd(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_mpy_RlRl_rnd __builtin_HEXAGON_M2_mpyd_rnd_ll_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=mpy(Rs32.l,Rt32.l):<<1:rnd
+   C Intrinsic Prototype: Word64 Q6_P_mpy_RlRl_s1_rnd(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_mpy_RlRl_s1_rnd __builtin_HEXAGON_M2_mpyd_rnd_ll_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=mpyi(Rs32,Rt32)
+   C Intrinsic Prototype: Word32 Q6_R_mpyi_RR(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpyi_RR __builtin_HEXAGON_M2_mpyi
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=mpyi(Rs32,#m9)
+   C Intrinsic Prototype: Word32 Q6_R_mpyi_RI(Word32 Rs, Word32 Im9)
+   Instruction Type:      M
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_R_mpyi_RI __builtin_HEXAGON_M2_mpysmi
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=mpysu(Rs32,Rt32)
+   C Intrinsic Prototype: Word32 Q6_R_mpysu_RR(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpysu_RR __builtin_HEXAGON_M2_mpysu_up
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32+=mpyu(Rs32.h,Rt32.h)
+   C Intrinsic Prototype: Word32 Q6_R_mpyuacc_RhRh(Word32 Rx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpyuacc_RhRh __builtin_HEXAGON_M2_mpyu_acc_hh_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32+=mpyu(Rs32.h,Rt32.h):<<1
+   C Intrinsic Prototype: Word32 Q6_R_mpyuacc_RhRh_s1(Word32 Rx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpyuacc_RhRh_s1 __builtin_HEXAGON_M2_mpyu_acc_hh_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32+=mpyu(Rs32.h,Rt32.l)
+   C Intrinsic Prototype: Word32 Q6_R_mpyuacc_RhRl(Word32 Rx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpyuacc_RhRl __builtin_HEXAGON_M2_mpyu_acc_hl_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32+=mpyu(Rs32.h,Rt32.l):<<1
+   C Intrinsic Prototype: Word32 Q6_R_mpyuacc_RhRl_s1(Word32 Rx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpyuacc_RhRl_s1 __builtin_HEXAGON_M2_mpyu_acc_hl_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32+=mpyu(Rs32.l,Rt32.h)
+   C Intrinsic Prototype: Word32 Q6_R_mpyuacc_RlRh(Word32 Rx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpyuacc_RlRh __builtin_HEXAGON_M2_mpyu_acc_lh_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32+=mpyu(Rs32.l,Rt32.h):<<1
+   C Intrinsic Prototype: Word32 Q6_R_mpyuacc_RlRh_s1(Word32 Rx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpyuacc_RlRh_s1 __builtin_HEXAGON_M2_mpyu_acc_lh_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32+=mpyu(Rs32.l,Rt32.l)
+   C Intrinsic Prototype: Word32 Q6_R_mpyuacc_RlRl(Word32 Rx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpyuacc_RlRl __builtin_HEXAGON_M2_mpyu_acc_ll_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32+=mpyu(Rs32.l,Rt32.l):<<1
+   C Intrinsic Prototype: Word32 Q6_R_mpyuacc_RlRl_s1(Word32 Rx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpyuacc_RlRl_s1 __builtin_HEXAGON_M2_mpyu_acc_ll_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=mpyu(Rs32.h,Rt32.h)
+   C Intrinsic Prototype: UWord32 Q6_R_mpyu_RhRh(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpyu_RhRh __builtin_HEXAGON_M2_mpyu_hh_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=mpyu(Rs32.h,Rt32.h):<<1
+   C Intrinsic Prototype: UWord32 Q6_R_mpyu_RhRh_s1(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpyu_RhRh_s1 __builtin_HEXAGON_M2_mpyu_hh_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=mpyu(Rs32.h,Rt32.l)
+   C Intrinsic Prototype: UWord32 Q6_R_mpyu_RhRl(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpyu_RhRl __builtin_HEXAGON_M2_mpyu_hl_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=mpyu(Rs32.h,Rt32.l):<<1
+   C Intrinsic Prototype: UWord32 Q6_R_mpyu_RhRl_s1(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpyu_RhRl_s1 __builtin_HEXAGON_M2_mpyu_hl_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=mpyu(Rs32.l,Rt32.h)
+   C Intrinsic Prototype: UWord32 Q6_R_mpyu_RlRh(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpyu_RlRh __builtin_HEXAGON_M2_mpyu_lh_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=mpyu(Rs32.l,Rt32.h):<<1
+   C Intrinsic Prototype: UWord32 Q6_R_mpyu_RlRh_s1(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpyu_RlRh_s1 __builtin_HEXAGON_M2_mpyu_lh_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=mpyu(Rs32.l,Rt32.l)
+   C Intrinsic Prototype: UWord32 Q6_R_mpyu_RlRl(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpyu_RlRl __builtin_HEXAGON_M2_mpyu_ll_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=mpyu(Rs32.l,Rt32.l):<<1
+   C Intrinsic Prototype: UWord32 Q6_R_mpyu_RlRl_s1(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpyu_RlRl_s1 __builtin_HEXAGON_M2_mpyu_ll_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32-=mpyu(Rs32.h,Rt32.h)
+   C Intrinsic Prototype: Word32 Q6_R_mpyunac_RhRh(Word32 Rx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpyunac_RhRh __builtin_HEXAGON_M2_mpyu_nac_hh_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32-=mpyu(Rs32.h,Rt32.h):<<1
+   C Intrinsic Prototype: Word32 Q6_R_mpyunac_RhRh_s1(Word32 Rx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpyunac_RhRh_s1 __builtin_HEXAGON_M2_mpyu_nac_hh_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32-=mpyu(Rs32.h,Rt32.l)
+   C Intrinsic Prototype: Word32 Q6_R_mpyunac_RhRl(Word32 Rx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpyunac_RhRl __builtin_HEXAGON_M2_mpyu_nac_hl_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32-=mpyu(Rs32.h,Rt32.l):<<1
+   C Intrinsic Prototype: Word32 Q6_R_mpyunac_RhRl_s1(Word32 Rx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpyunac_RhRl_s1 __builtin_HEXAGON_M2_mpyu_nac_hl_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32-=mpyu(Rs32.l,Rt32.h)
+   C Intrinsic Prototype: Word32 Q6_R_mpyunac_RlRh(Word32 Rx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpyunac_RlRh __builtin_HEXAGON_M2_mpyu_nac_lh_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32-=mpyu(Rs32.l,Rt32.h):<<1
+   C Intrinsic Prototype: Word32 Q6_R_mpyunac_RlRh_s1(Word32 Rx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpyunac_RlRh_s1 __builtin_HEXAGON_M2_mpyu_nac_lh_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32-=mpyu(Rs32.l,Rt32.l)
+   C Intrinsic Prototype: Word32 Q6_R_mpyunac_RlRl(Word32 Rx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpyunac_RlRl __builtin_HEXAGON_M2_mpyu_nac_ll_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32-=mpyu(Rs32.l,Rt32.l):<<1
+   C Intrinsic Prototype: Word32 Q6_R_mpyunac_RlRl_s1(Word32 Rx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpyunac_RlRl_s1 __builtin_HEXAGON_M2_mpyu_nac_ll_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=mpyu(Rs32,Rt32)
+   C Intrinsic Prototype: UWord32 Q6_R_mpyu_RR(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpyu_RR __builtin_HEXAGON_M2_mpyu_up
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32+=mpyu(Rs32.h,Rt32.h)
+   C Intrinsic Prototype: Word64 Q6_P_mpyuacc_RhRh(Word64 Rxx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_mpyuacc_RhRh __builtin_HEXAGON_M2_mpyud_acc_hh_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32+=mpyu(Rs32.h,Rt32.h):<<1
+   C Intrinsic Prototype: Word64 Q6_P_mpyuacc_RhRh_s1(Word64 Rxx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_mpyuacc_RhRh_s1 __builtin_HEXAGON_M2_mpyud_acc_hh_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32+=mpyu(Rs32.h,Rt32.l)
+   C Intrinsic Prototype: Word64 Q6_P_mpyuacc_RhRl(Word64 Rxx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_mpyuacc_RhRl __builtin_HEXAGON_M2_mpyud_acc_hl_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32+=mpyu(Rs32.h,Rt32.l):<<1
+   C Intrinsic Prototype: Word64 Q6_P_mpyuacc_RhRl_s1(Word64 Rxx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_mpyuacc_RhRl_s1 __builtin_HEXAGON_M2_mpyud_acc_hl_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32+=mpyu(Rs32.l,Rt32.h)
+   C Intrinsic Prototype: Word64 Q6_P_mpyuacc_RlRh(Word64 Rxx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_mpyuacc_RlRh __builtin_HEXAGON_M2_mpyud_acc_lh_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32+=mpyu(Rs32.l,Rt32.h):<<1
+   C Intrinsic Prototype: Word64 Q6_P_mpyuacc_RlRh_s1(Word64 Rxx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_mpyuacc_RlRh_s1 __builtin_HEXAGON_M2_mpyud_acc_lh_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32+=mpyu(Rs32.l,Rt32.l)
+   C Intrinsic Prototype: Word64 Q6_P_mpyuacc_RlRl(Word64 Rxx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_mpyuacc_RlRl __builtin_HEXAGON_M2_mpyud_acc_ll_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32+=mpyu(Rs32.l,Rt32.l):<<1
+   C Intrinsic Prototype: Word64 Q6_P_mpyuacc_RlRl_s1(Word64 Rxx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_mpyuacc_RlRl_s1 __builtin_HEXAGON_M2_mpyud_acc_ll_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=mpyu(Rs32.h,Rt32.h)
+   C Intrinsic Prototype: UWord64 Q6_P_mpyu_RhRh(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_mpyu_RhRh __builtin_HEXAGON_M2_mpyud_hh_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=mpyu(Rs32.h,Rt32.h):<<1
+   C Intrinsic Prototype: UWord64 Q6_P_mpyu_RhRh_s1(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_mpyu_RhRh_s1 __builtin_HEXAGON_M2_mpyud_hh_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=mpyu(Rs32.h,Rt32.l)
+   C Intrinsic Prototype: UWord64 Q6_P_mpyu_RhRl(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_mpyu_RhRl __builtin_HEXAGON_M2_mpyud_hl_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=mpyu(Rs32.h,Rt32.l):<<1
+   C Intrinsic Prototype: UWord64 Q6_P_mpyu_RhRl_s1(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_mpyu_RhRl_s1 __builtin_HEXAGON_M2_mpyud_hl_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=mpyu(Rs32.l,Rt32.h)
+   C Intrinsic Prototype: UWord64 Q6_P_mpyu_RlRh(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_mpyu_RlRh __builtin_HEXAGON_M2_mpyud_lh_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=mpyu(Rs32.l,Rt32.h):<<1
+   C Intrinsic Prototype: UWord64 Q6_P_mpyu_RlRh_s1(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_mpyu_RlRh_s1 __builtin_HEXAGON_M2_mpyud_lh_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=mpyu(Rs32.l,Rt32.l)
+   C Intrinsic Prototype: UWord64 Q6_P_mpyu_RlRl(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_mpyu_RlRl __builtin_HEXAGON_M2_mpyud_ll_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=mpyu(Rs32.l,Rt32.l):<<1
+   C Intrinsic Prototype: UWord64 Q6_P_mpyu_RlRl_s1(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_mpyu_RlRl_s1 __builtin_HEXAGON_M2_mpyud_ll_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32-=mpyu(Rs32.h,Rt32.h)
+   C Intrinsic Prototype: Word64 Q6_P_mpyunac_RhRh(Word64 Rxx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_mpyunac_RhRh __builtin_HEXAGON_M2_mpyud_nac_hh_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32-=mpyu(Rs32.h,Rt32.h):<<1
+   C Intrinsic Prototype: Word64 Q6_P_mpyunac_RhRh_s1(Word64 Rxx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_mpyunac_RhRh_s1 __builtin_HEXAGON_M2_mpyud_nac_hh_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32-=mpyu(Rs32.h,Rt32.l)
+   C Intrinsic Prototype: Word64 Q6_P_mpyunac_RhRl(Word64 Rxx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_mpyunac_RhRl __builtin_HEXAGON_M2_mpyud_nac_hl_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32-=mpyu(Rs32.h,Rt32.l):<<1
+   C Intrinsic Prototype: Word64 Q6_P_mpyunac_RhRl_s1(Word64 Rxx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_mpyunac_RhRl_s1 __builtin_HEXAGON_M2_mpyud_nac_hl_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32-=mpyu(Rs32.l,Rt32.h)
+   C Intrinsic Prototype: Word64 Q6_P_mpyunac_RlRh(Word64 Rxx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_mpyunac_RlRh __builtin_HEXAGON_M2_mpyud_nac_lh_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32-=mpyu(Rs32.l,Rt32.h):<<1
+   C Intrinsic Prototype: Word64 Q6_P_mpyunac_RlRh_s1(Word64 Rxx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_mpyunac_RlRh_s1 __builtin_HEXAGON_M2_mpyud_nac_lh_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32-=mpyu(Rs32.l,Rt32.l)
+   C Intrinsic Prototype: Word64 Q6_P_mpyunac_RlRl(Word64 Rxx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_mpyunac_RlRl __builtin_HEXAGON_M2_mpyud_nac_ll_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32-=mpyu(Rs32.l,Rt32.l):<<1
+   C Intrinsic Prototype: Word64 Q6_P_mpyunac_RlRl_s1(Word64 Rxx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_mpyunac_RlRl_s1 __builtin_HEXAGON_M2_mpyud_nac_ll_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=mpyui(Rs32,Rt32)
+   C Intrinsic Prototype: Word32 Q6_R_mpyui_RR(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_R_mpyui_RR __builtin_HEXAGON_M2_mpyui
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32-=add(Rs32,Rt32)
+   C Intrinsic Prototype: Word32 Q6_R_addnac_RR(Word32 Rx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_addnac_RR __builtin_HEXAGON_M2_nacci
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32-=add(Rs32,#s8)
+   C Intrinsic Prototype: Word32 Q6_R_addnac_RI(Word32 Rx, Word32 Rs, Word32 Is8)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_addnac_RI __builtin_HEXAGON_M2_naccii
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32+=sub(Rt32,Rs32)
+   C Intrinsic Prototype: Word32 Q6_R_subacc_RR(Word32 Rx, Word32 Rt, Word32 Rs)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_subacc_RR __builtin_HEXAGON_M2_subacc
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vabsdiffh(Rtt32,Rss32)
+   C Intrinsic Prototype: Word64 Q6_P_vabsdiffh_PP(Word64 Rtt, Word64 Rss)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vabsdiffh_PP __builtin_HEXAGON_M2_vabsdiffh
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vabsdiffw(Rtt32,Rss32)
+   C Intrinsic Prototype: Word64 Q6_P_vabsdiffw_PP(Word64 Rtt, Word64 Rss)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vabsdiffw_PP __builtin_HEXAGON_M2_vabsdiffw
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32+=vcmpyi(Rss32,Rtt32):sat
+   C Intrinsic Prototype: Word64 Q6_P_vcmpyiacc_PP_sat(Word64 Rxx, Word64 Rss, Word64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vcmpyiacc_PP_sat __builtin_HEXAGON_M2_vcmac_s0_sat_i
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32+=vcmpyr(Rss32,Rtt32):sat
+   C Intrinsic Prototype: Word64 Q6_P_vcmpyracc_PP_sat(Word64 Rxx, Word64 Rss, Word64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vcmpyracc_PP_sat __builtin_HEXAGON_M2_vcmac_s0_sat_r
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vcmpyi(Rss32,Rtt32):sat
+   C Intrinsic Prototype: Word64 Q6_P_vcmpyi_PP_sat(Word64 Rss, Word64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vcmpyi_PP_sat __builtin_HEXAGON_M2_vcmpy_s0_sat_i
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vcmpyr(Rss32,Rtt32):sat
+   C Intrinsic Prototype: Word64 Q6_P_vcmpyr_PP_sat(Word64 Rss, Word64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vcmpyr_PP_sat __builtin_HEXAGON_M2_vcmpy_s0_sat_r
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vcmpyi(Rss32,Rtt32):<<1:sat
+   C Intrinsic Prototype: Word64 Q6_P_vcmpyi_PP_s1_sat(Word64 Rss, Word64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vcmpyi_PP_s1_sat __builtin_HEXAGON_M2_vcmpy_s1_sat_i
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vcmpyr(Rss32,Rtt32):<<1:sat
+   C Intrinsic Prototype: Word64 Q6_P_vcmpyr_PP_s1_sat(Word64 Rss, Word64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vcmpyr_PP_s1_sat __builtin_HEXAGON_M2_vcmpy_s1_sat_r
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32+=vdmpy(Rss32,Rtt32):sat
+   C Intrinsic Prototype: Word64 Q6_P_vdmpyacc_PP_sat(Word64 Rxx, Word64 Rss, Word64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vdmpyacc_PP_sat __builtin_HEXAGON_M2_vdmacs_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32+=vdmpy(Rss32,Rtt32):<<1:sat
+   C Intrinsic Prototype: Word64 Q6_P_vdmpyacc_PP_s1_sat(Word64 Rxx, Word64 Rss, Word64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vdmpyacc_PP_s1_sat __builtin_HEXAGON_M2_vdmacs_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=vdmpy(Rss32,Rtt32):rnd:sat
+   C Intrinsic Prototype: Word32 Q6_R_vdmpy_PP_rnd_sat(Word64 Rss, Word64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_vdmpy_PP_rnd_sat __builtin_HEXAGON_M2_vdmpyrs_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=vdmpy(Rss32,Rtt32):<<1:rnd:sat
+   C Intrinsic Prototype: Word32 Q6_R_vdmpy_PP_s1_rnd_sat(Word64 Rss, Word64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_vdmpy_PP_s1_rnd_sat __builtin_HEXAGON_M2_vdmpyrs_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vdmpy(Rss32,Rtt32):sat
+   C Intrinsic Prototype: Word64 Q6_P_vdmpy_PP_sat(Word64 Rss, Word64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vdmpy_PP_sat __builtin_HEXAGON_M2_vdmpys_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vdmpy(Rss32,Rtt32):<<1:sat
+   C Intrinsic Prototype: Word64 Q6_P_vdmpy_PP_s1_sat(Word64 Rss, Word64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vdmpy_PP_s1_sat __builtin_HEXAGON_M2_vdmpys_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32+=vmpyh(Rs32,Rt32)
+   C Intrinsic Prototype: Word64 Q6_P_vmpyhacc_RR(Word64 Rxx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vmpyhacc_RR __builtin_HEXAGON_M2_vmac2
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32+=vmpyeh(Rss32,Rtt32)
+   C Intrinsic Prototype: Word64 Q6_P_vmpyehacc_PP(Word64 Rxx, Word64 Rss, Word64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vmpyehacc_PP __builtin_HEXAGON_M2_vmac2es
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32+=vmpyeh(Rss32,Rtt32):sat
+   C Intrinsic Prototype: Word64 Q6_P_vmpyehacc_PP_sat(Word64 Rxx, Word64 Rss, Word64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vmpyehacc_PP_sat __builtin_HEXAGON_M2_vmac2es_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32+=vmpyeh(Rss32,Rtt32):<<1:sat
+   C Intrinsic Prototype: Word64 Q6_P_vmpyehacc_PP_s1_sat(Word64 Rxx, Word64 Rss, Word64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vmpyehacc_PP_s1_sat __builtin_HEXAGON_M2_vmac2es_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32+=vmpyh(Rs32,Rt32):sat
+   C Intrinsic Prototype: Word64 Q6_P_vmpyhacc_RR_sat(Word64 Rxx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vmpyhacc_RR_sat __builtin_HEXAGON_M2_vmac2s_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32+=vmpyh(Rs32,Rt32):<<1:sat
+   C Intrinsic Prototype: Word64 Q6_P_vmpyhacc_RR_s1_sat(Word64 Rxx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vmpyhacc_RR_s1_sat __builtin_HEXAGON_M2_vmac2s_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32+=vmpyhsu(Rs32,Rt32):sat
+   C Intrinsic Prototype: Word64 Q6_P_vmpyhsuacc_RR_sat(Word64 Rxx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vmpyhsuacc_RR_sat __builtin_HEXAGON_M2_vmac2su_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32+=vmpyhsu(Rs32,Rt32):<<1:sat
+   C Intrinsic Prototype: Word64 Q6_P_vmpyhsuacc_RR_s1_sat(Word64 Rxx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vmpyhsuacc_RR_s1_sat __builtin_HEXAGON_M2_vmac2su_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vmpyeh(Rss32,Rtt32):sat
+   C Intrinsic Prototype: Word64 Q6_P_vmpyeh_PP_sat(Word64 Rss, Word64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vmpyeh_PP_sat __builtin_HEXAGON_M2_vmpy2es_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vmpyeh(Rss32,Rtt32):<<1:sat
+   C Intrinsic Prototype: Word64 Q6_P_vmpyeh_PP_s1_sat(Word64 Rss, Word64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vmpyeh_PP_s1_sat __builtin_HEXAGON_M2_vmpy2es_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vmpyh(Rs32,Rt32):sat
+   C Intrinsic Prototype: Word64 Q6_P_vmpyh_RR_sat(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vmpyh_RR_sat __builtin_HEXAGON_M2_vmpy2s_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=vmpyh(Rs32,Rt32):rnd:sat
+   C Intrinsic Prototype: Word32 Q6_R_vmpyh_RR_rnd_sat(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_vmpyh_RR_rnd_sat __builtin_HEXAGON_M2_vmpy2s_s0pack
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vmpyh(Rs32,Rt32):<<1:sat
+   C Intrinsic Prototype: Word64 Q6_P_vmpyh_RR_s1_sat(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vmpyh_RR_s1_sat __builtin_HEXAGON_M2_vmpy2s_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=vmpyh(Rs32,Rt32):<<1:rnd:sat
+   C Intrinsic Prototype: Word32 Q6_R_vmpyh_RR_s1_rnd_sat(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_vmpyh_RR_s1_rnd_sat __builtin_HEXAGON_M2_vmpy2s_s1pack
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vmpyhsu(Rs32,Rt32):sat
+   C Intrinsic Prototype: Word64 Q6_P_vmpyhsu_RR_sat(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vmpyhsu_RR_sat __builtin_HEXAGON_M2_vmpy2su_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vmpyhsu(Rs32,Rt32):<<1:sat
+   C Intrinsic Prototype: Word64 Q6_P_vmpyhsu_RR_s1_sat(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vmpyhsu_RR_s1_sat __builtin_HEXAGON_M2_vmpy2su_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=vraddh(Rss32,Rtt32)
+   C Intrinsic Prototype: Word32 Q6_R_vraddh_PP(Word64 Rss, Word64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_vraddh_PP __builtin_HEXAGON_M2_vraddh
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=vradduh(Rss32,Rtt32)
+   C Intrinsic Prototype: Word32 Q6_R_vradduh_PP(Word64 Rss, Word64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_vradduh_PP __builtin_HEXAGON_M2_vradduh
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32+=vrcmpyi(Rss32,Rtt32)
+   C Intrinsic Prototype: Word64 Q6_P_vrcmpyiacc_PP(Word64 Rxx, Word64 Rss, Word64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vrcmpyiacc_PP __builtin_HEXAGON_M2_vrcmaci_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32+=vrcmpyi(Rss32,Rtt32*)
+   C Intrinsic Prototype: Word64 Q6_P_vrcmpyiacc_PP_conj(Word64 Rxx, Word64 Rss, Word64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vrcmpyiacc_PP_conj __builtin_HEXAGON_M2_vrcmaci_s0c
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32+=vrcmpyr(Rss32,Rtt32)
+   C Intrinsic Prototype: Word64 Q6_P_vrcmpyracc_PP(Word64 Rxx, Word64 Rss, Word64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vrcmpyracc_PP __builtin_HEXAGON_M2_vrcmacr_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32+=vrcmpyr(Rss32,Rtt32*)
+   C Intrinsic Prototype: Word64 Q6_P_vrcmpyracc_PP_conj(Word64 Rxx, Word64 Rss, Word64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vrcmpyracc_PP_conj __builtin_HEXAGON_M2_vrcmacr_s0c
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vrcmpyi(Rss32,Rtt32)
+   C Intrinsic Prototype: Word64 Q6_P_vrcmpyi_PP(Word64 Rss, Word64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vrcmpyi_PP __builtin_HEXAGON_M2_vrcmpyi_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vrcmpyi(Rss32,Rtt32*)
+   C Intrinsic Prototype: Word64 Q6_P_vrcmpyi_PP_conj(Word64 Rss, Word64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vrcmpyi_PP_conj __builtin_HEXAGON_M2_vrcmpyi_s0c
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vrcmpyr(Rss32,Rtt32)
+   C Intrinsic Prototype: Word64 Q6_P_vrcmpyr_PP(Word64 Rss, Word64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vrcmpyr_PP __builtin_HEXAGON_M2_vrcmpyr_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vrcmpyr(Rss32,Rtt32*)
+   C Intrinsic Prototype: Word64 Q6_P_vrcmpyr_PP_conj(Word64 Rss, Word64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vrcmpyr_PP_conj __builtin_HEXAGON_M2_vrcmpyr_s0c
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32+=vrcmpys(Rss32,Rt32):<<1:sat
+   C Intrinsic Prototype: Word64 Q6_P_vrcmpysacc_PR_s1_sat(Word64 Rxx, Word64 Rss, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_P_vrcmpysacc_PR_s1_sat __builtin_HEXAGON_M2_vrcmpys_acc_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vrcmpys(Rss32,Rt32):<<1:sat
+   C Intrinsic Prototype: Word64 Q6_P_vrcmpys_PR_s1_sat(Word64 Rss, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_P_vrcmpys_PR_s1_sat __builtin_HEXAGON_M2_vrcmpys_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=vrcmpys(Rss32,Rt32):<<1:rnd:sat
+   C Intrinsic Prototype: Word32 Q6_R_vrcmpys_PR_s1_rnd_sat(Word64 Rss, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_R_vrcmpys_PR_s1_rnd_sat __builtin_HEXAGON_M2_vrcmpys_s1rp
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32+=vrmpyh(Rss32,Rtt32)
+   C Intrinsic Prototype: Word64 Q6_P_vrmpyhacc_PP(Word64 Rxx, Word64 Rss, Word64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vrmpyhacc_PP __builtin_HEXAGON_M2_vrmac_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vrmpyh(Rss32,Rtt32)
+   C Intrinsic Prototype: Word64 Q6_P_vrmpyh_PP(Word64 Rss, Word64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vrmpyh_PP __builtin_HEXAGON_M2_vrmpy_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32^=xor(Rs32,Rt32)
+   C Intrinsic Prototype: Word32 Q6_R_xorxacc_RR(Word32 Rx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_xorxacc_RR __builtin_HEXAGON_M2_xor_xacc
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32&=and(Rs32,Rt32)
+   C Intrinsic Prototype: Word32 Q6_R_andand_RR(Word32 Rx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_andand_RR __builtin_HEXAGON_M4_and_and
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32&=and(Rs32,~Rt32)
+   C Intrinsic Prototype: Word32 Q6_R_andand_RnR(Word32 Rx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_andand_RnR __builtin_HEXAGON_M4_and_andn
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32&=or(Rs32,Rt32)
+   C Intrinsic Prototype: Word32 Q6_R_orand_RR(Word32 Rx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_orand_RR __builtin_HEXAGON_M4_and_or
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32&=xor(Rs32,Rt32)
+   C Intrinsic Prototype: Word32 Q6_R_xorand_RR(Word32 Rx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_xorand_RR __builtin_HEXAGON_M4_and_xor
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=cmpyiwh(Rss32,Rt32):<<1:rnd:sat
+   C Intrinsic Prototype: Word32 Q6_R_cmpyiwh_PR_s1_rnd_sat(Word64 Rss, Word32 Rt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_cmpyiwh_PR_s1_rnd_sat __builtin_HEXAGON_M4_cmpyi_wh
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=cmpyiwh(Rss32,Rt32*):<<1:rnd:sat
+   C Intrinsic Prototype: Word32 Q6_R_cmpyiwh_PR_conj_s1_rnd_sat(Word64 Rss, Word32 Rt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_cmpyiwh_PR_conj_s1_rnd_sat __builtin_HEXAGON_M4_cmpyi_whc
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=cmpyrwh(Rss32,Rt32):<<1:rnd:sat
+   C Intrinsic Prototype: Word32 Q6_R_cmpyrwh_PR_s1_rnd_sat(Word64 Rss, Word32 Rt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_cmpyrwh_PR_s1_rnd_sat __builtin_HEXAGON_M4_cmpyr_wh
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=cmpyrwh(Rss32,Rt32*):<<1:rnd:sat
+   C Intrinsic Prototype: Word32 Q6_R_cmpyrwh_PR_conj_s1_rnd_sat(Word64 Rss, Word32 Rt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_cmpyrwh_PR_conj_s1_rnd_sat __builtin_HEXAGON_M4_cmpyr_whc
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32+=mpy(Rs32,Rt32):<<1:sat
+   C Intrinsic Prototype: Word32 Q6_R_mpyacc_RR_s1_sat(Word32 Rx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpyacc_RR_s1_sat __builtin_HEXAGON_M4_mac_up_s1_sat
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=add(#u6,mpyi(Rs32,#U6))
+   C Intrinsic Prototype: Word32 Q6_R_add_mpyi_IRI(Word32 Iu6, Word32 Rs, Word32 IU6)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_add_mpyi_IRI __builtin_HEXAGON_M4_mpyri_addi
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=add(Ru32,mpyi(Rs32,#u6))
+   C Intrinsic Prototype: Word32 Q6_R_add_mpyi_RRI(Word32 Ru, Word32 Rs, Word32 Iu6)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_add_mpyi_RRI __builtin_HEXAGON_M4_mpyri_addr
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=add(Ru32,mpyi(#u6:2,Rs32))
+   C Intrinsic Prototype: Word32 Q6_R_add_mpyi_RIR(Word32 Ru, Word32 Iu6_2, Word32 Rs)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_add_mpyi_RIR __builtin_HEXAGON_M4_mpyri_addr_u2
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=add(#u6,mpyi(Rs32,Rt32))
+   C Intrinsic Prototype: Word32 Q6_R_add_mpyi_IRR(Word32 Iu6, Word32 Rs, Word32 Rt)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_add_mpyi_IRR __builtin_HEXAGON_M4_mpyrr_addi
+
+/* ==========================================================================
+   Assembly Syntax:       Ry32=add(Ru32,mpyi(Ry32,Rs32))
+   C Intrinsic Prototype: Word32 Q6_R_add_mpyi_RRR(Word32 Ru, Word32 Ry, Word32 Rs)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_add_mpyi_RRR __builtin_HEXAGON_M4_mpyrr_addr
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32-=mpy(Rs32,Rt32):<<1:sat
+   C Intrinsic Prototype: Word32 Q6_R_mpynac_RR_s1_sat(Word32 Rx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpynac_RR_s1_sat __builtin_HEXAGON_M4_nac_up_s1_sat
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32|=and(Rs32,Rt32)
+   C Intrinsic Prototype: Word32 Q6_R_andor_RR(Word32 Rx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_andor_RR __builtin_HEXAGON_M4_or_and
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32|=and(Rs32,~Rt32)
+   C Intrinsic Prototype: Word32 Q6_R_andor_RnR(Word32 Rx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_andor_RnR __builtin_HEXAGON_M4_or_andn
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32|=or(Rs32,Rt32)
+   C Intrinsic Prototype: Word32 Q6_R_oror_RR(Word32 Rx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_oror_RR __builtin_HEXAGON_M4_or_or
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32|=xor(Rs32,Rt32)
+   C Intrinsic Prototype: Word32 Q6_R_xoror_RR(Word32 Rx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_xoror_RR __builtin_HEXAGON_M4_or_xor
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=pmpyw(Rs32,Rt32)
+   C Intrinsic Prototype: Word64 Q6_P_pmpyw_RR(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_pmpyw_RR __builtin_HEXAGON_M4_pmpyw
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32^=pmpyw(Rs32,Rt32)
+   C Intrinsic Prototype: Word64 Q6_P_pmpywxacc_RR(Word64 Rxx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_pmpywxacc_RR __builtin_HEXAGON_M4_pmpyw_acc
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vpmpyh(Rs32,Rt32)
+   C Intrinsic Prototype: Word64 Q6_P_vpmpyh_RR(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vpmpyh_RR __builtin_HEXAGON_M4_vpmpyh
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32^=vpmpyh(Rs32,Rt32)
+   C Intrinsic Prototype: Word64 Q6_P_vpmpyhxacc_RR(Word64 Rxx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vpmpyhxacc_RR __builtin_HEXAGON_M4_vpmpyh_acc
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32+=vrmpyweh(Rss32,Rtt32)
+   C Intrinsic Prototype: Word64 Q6_P_vrmpywehacc_PP(Word64 Rxx, Word64 Rss, Word64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vrmpywehacc_PP __builtin_HEXAGON_M4_vrmpyeh_acc_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32+=vrmpyweh(Rss32,Rtt32):<<1
+   C Intrinsic Prototype: Word64 Q6_P_vrmpywehacc_PP_s1(Word64 Rxx, Word64 Rss, Word64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vrmpywehacc_PP_s1 __builtin_HEXAGON_M4_vrmpyeh_acc_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vrmpyweh(Rss32,Rtt32)
+   C Intrinsic Prototype: Word64 Q6_P_vrmpyweh_PP(Word64 Rss, Word64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vrmpyweh_PP __builtin_HEXAGON_M4_vrmpyeh_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vrmpyweh(Rss32,Rtt32):<<1
+   C Intrinsic Prototype: Word64 Q6_P_vrmpyweh_PP_s1(Word64 Rss, Word64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vrmpyweh_PP_s1 __builtin_HEXAGON_M4_vrmpyeh_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32+=vrmpywoh(Rss32,Rtt32)
+   C Intrinsic Prototype: Word64 Q6_P_vrmpywohacc_PP(Word64 Rxx, Word64 Rss, Word64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vrmpywohacc_PP __builtin_HEXAGON_M4_vrmpyoh_acc_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32+=vrmpywoh(Rss32,Rtt32):<<1
+   C Intrinsic Prototype: Word64 Q6_P_vrmpywohacc_PP_s1(Word64 Rxx, Word64 Rss, Word64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vrmpywohacc_PP_s1 __builtin_HEXAGON_M4_vrmpyoh_acc_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vrmpywoh(Rss32,Rtt32)
+   C Intrinsic Prototype: Word64 Q6_P_vrmpywoh_PP(Word64 Rss, Word64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vrmpywoh_PP __builtin_HEXAGON_M4_vrmpyoh_s0
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vrmpywoh(Rss32,Rtt32):<<1
+   C Intrinsic Prototype: Word64 Q6_P_vrmpywoh_PP_s1(Word64 Rss, Word64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vrmpywoh_PP_s1 __builtin_HEXAGON_M4_vrmpyoh_s1
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32^=and(Rs32,Rt32)
+   C Intrinsic Prototype: Word32 Q6_R_andxacc_RR(Word32 Rx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_andxacc_RR __builtin_HEXAGON_M4_xor_and
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32^=and(Rs32,~Rt32)
+   C Intrinsic Prototype: Word32 Q6_R_andxacc_RnR(Word32 Rx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_andxacc_RnR __builtin_HEXAGON_M4_xor_andn
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32^=or(Rs32,Rt32)
+   C Intrinsic Prototype: Word32 Q6_R_orxacc_RR(Word32 Rx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_orxacc_RR __builtin_HEXAGON_M4_xor_or
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32^=xor(Rss32,Rtt32)
+   C Intrinsic Prototype: Word64 Q6_P_xorxacc_PP(Word64 Rxx, Word64 Rss, Word64 Rtt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_xorxacc_PP __builtin_HEXAGON_M4_xor_xacc
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32+=vdmpybsu(Rss32,Rtt32):sat
+   C Intrinsic Prototype: Word64 Q6_P_vdmpybsuacc_PP_sat(Word64 Rxx, Word64 Rss, Word64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vdmpybsuacc_PP_sat __builtin_HEXAGON_M5_vdmacbsu
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vdmpybsu(Rss32,Rtt32):sat
+   C Intrinsic Prototype: Word64 Q6_P_vdmpybsu_PP_sat(Word64 Rss, Word64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vdmpybsu_PP_sat __builtin_HEXAGON_M5_vdmpybsu
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32+=vmpybsu(Rs32,Rt32)
+   C Intrinsic Prototype: Word64 Q6_P_vmpybsuacc_RR(Word64 Rxx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vmpybsuacc_RR __builtin_HEXAGON_M5_vmacbsu
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32+=vmpybu(Rs32,Rt32)
+   C Intrinsic Prototype: Word64 Q6_P_vmpybuacc_RR(Word64 Rxx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vmpybuacc_RR __builtin_HEXAGON_M5_vmacbuu
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vmpybsu(Rs32,Rt32)
+   C Intrinsic Prototype: Word64 Q6_P_vmpybsu_RR(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vmpybsu_RR __builtin_HEXAGON_M5_vmpybsu
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vmpybu(Rs32,Rt32)
+   C Intrinsic Prototype: Word64 Q6_P_vmpybu_RR(Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vmpybu_RR __builtin_HEXAGON_M5_vmpybuu
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32+=vrmpybsu(Rss32,Rtt32)
+   C Intrinsic Prototype: Word64 Q6_P_vrmpybsuacc_PP(Word64 Rxx, Word64 Rss, Word64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vrmpybsuacc_PP __builtin_HEXAGON_M5_vrmacbsu
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32+=vrmpybu(Rss32,Rtt32)
+   C Intrinsic Prototype: Word64 Q6_P_vrmpybuacc_PP(Word64 Rxx, Word64 Rss, Word64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vrmpybuacc_PP __builtin_HEXAGON_M5_vrmacbuu
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vrmpybsu(Rss32,Rtt32)
+   C Intrinsic Prototype: Word64 Q6_P_vrmpybsu_PP(Word64 Rss, Word64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vrmpybsu_PP __builtin_HEXAGON_M5_vrmpybsu
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vrmpybu(Rss32,Rtt32)
+   C Intrinsic Prototype: Word64 Q6_P_vrmpybu_PP(Word64 Rss, Word64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vrmpybu_PP __builtin_HEXAGON_M5_vrmpybuu
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=addasl(Rt32,Rs32,#u3)
+   C Intrinsic Prototype: Word32 Q6_R_addasl_RRI(Word32 Rt, Word32 Rs, Word32 Iu3)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_addasl_RRI __builtin_HEXAGON_S2_addasl_rrri
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=asl(Rss32,#u6)
+   C Intrinsic Prototype: Word64 Q6_P_asl_PI(Word64 Rss, Word32 Iu6)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_asl_PI __builtin_HEXAGON_S2_asl_i_p
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32+=asl(Rss32,#u6)
+   C Intrinsic Prototype: Word64 Q6_P_aslacc_PI(Word64 Rxx, Word64 Rss, Word32 Iu6)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_aslacc_PI __builtin_HEXAGON_S2_asl_i_p_acc
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32&=asl(Rss32,#u6)
+   C Intrinsic Prototype: Word64 Q6_P_asland_PI(Word64 Rxx, Word64 Rss, Word32 Iu6)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_asland_PI __builtin_HEXAGON_S2_asl_i_p_and
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32-=asl(Rss32,#u6)
+   C Intrinsic Prototype: Word64 Q6_P_aslnac_PI(Word64 Rxx, Word64 Rss, Word32 Iu6)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_aslnac_PI __builtin_HEXAGON_S2_asl_i_p_nac
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32|=asl(Rss32,#u6)
+   C Intrinsic Prototype: Word64 Q6_P_aslor_PI(Word64 Rxx, Word64 Rss, Word32 Iu6)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_aslor_PI __builtin_HEXAGON_S2_asl_i_p_or
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32^=asl(Rss32,#u6)
+   C Intrinsic Prototype: Word64 Q6_P_aslxacc_PI(Word64 Rxx, Word64 Rss, Word32 Iu6)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_aslxacc_PI __builtin_HEXAGON_S2_asl_i_p_xacc
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=asl(Rs32,#u5)
+   C Intrinsic Prototype: Word32 Q6_R_asl_RI(Word32 Rs, Word32 Iu5)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_asl_RI __builtin_HEXAGON_S2_asl_i_r
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32+=asl(Rs32,#u5)
+   C Intrinsic Prototype: Word32 Q6_R_aslacc_RI(Word32 Rx, Word32 Rs, Word32 Iu5)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_aslacc_RI __builtin_HEXAGON_S2_asl_i_r_acc
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32&=asl(Rs32,#u5)
+   C Intrinsic Prototype: Word32 Q6_R_asland_RI(Word32 Rx, Word32 Rs, Word32 Iu5)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_asland_RI __builtin_HEXAGON_S2_asl_i_r_and
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32-=asl(Rs32,#u5)
+   C Intrinsic Prototype: Word32 Q6_R_aslnac_RI(Word32 Rx, Word32 Rs, Word32 Iu5)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_aslnac_RI __builtin_HEXAGON_S2_asl_i_r_nac
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32|=asl(Rs32,#u5)
+   C Intrinsic Prototype: Word32 Q6_R_aslor_RI(Word32 Rx, Word32 Rs, Word32 Iu5)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_aslor_RI __builtin_HEXAGON_S2_asl_i_r_or
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=asl(Rs32,#u5):sat
+   C Intrinsic Prototype: Word32 Q6_R_asl_RI_sat(Word32 Rs, Word32 Iu5)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_asl_RI_sat __builtin_HEXAGON_S2_asl_i_r_sat
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32^=asl(Rs32,#u5)
+   C Intrinsic Prototype: Word32 Q6_R_aslxacc_RI(Word32 Rx, Word32 Rs, Word32 Iu5)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_aslxacc_RI __builtin_HEXAGON_S2_asl_i_r_xacc
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vaslh(Rss32,#u4)
+   C Intrinsic Prototype: Word64 Q6_P_vaslh_PI(Word64 Rss, Word32 Iu4)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vaslh_PI __builtin_HEXAGON_S2_asl_i_vh
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vaslw(Rss32,#u5)
+   C Intrinsic Prototype: Word64 Q6_P_vaslw_PI(Word64 Rss, Word32 Iu5)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vaslw_PI __builtin_HEXAGON_S2_asl_i_vw
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=asl(Rss32,Rt32)
+   C Intrinsic Prototype: Word64 Q6_P_asl_PR(Word64 Rss, Word32 Rt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_asl_PR __builtin_HEXAGON_S2_asl_r_p
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32+=asl(Rss32,Rt32)
+   C Intrinsic Prototype: Word64 Q6_P_aslacc_PR(Word64 Rxx, Word64 Rss, Word32 Rt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_aslacc_PR __builtin_HEXAGON_S2_asl_r_p_acc
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32&=asl(Rss32,Rt32)
+   C Intrinsic Prototype: Word64 Q6_P_asland_PR(Word64 Rxx, Word64 Rss, Word32 Rt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_asland_PR __builtin_HEXAGON_S2_asl_r_p_and
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32-=asl(Rss32,Rt32)
+   C Intrinsic Prototype: Word64 Q6_P_aslnac_PR(Word64 Rxx, Word64 Rss, Word32 Rt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_aslnac_PR __builtin_HEXAGON_S2_asl_r_p_nac
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32|=asl(Rss32,Rt32)
+   C Intrinsic Prototype: Word64 Q6_P_aslor_PR(Word64 Rxx, Word64 Rss, Word32 Rt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_aslor_PR __builtin_HEXAGON_S2_asl_r_p_or
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32^=asl(Rss32,Rt32)
+   C Intrinsic Prototype: Word64 Q6_P_aslxacc_PR(Word64 Rxx, Word64 Rss, Word32 Rt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_aslxacc_PR __builtin_HEXAGON_S2_asl_r_p_xor
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=asl(Rs32,Rt32)
+   C Intrinsic Prototype: Word32 Q6_R_asl_RR(Word32 Rs, Word32 Rt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_asl_RR __builtin_HEXAGON_S2_asl_r_r
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32+=asl(Rs32,Rt32)
+   C Intrinsic Prototype: Word32 Q6_R_aslacc_RR(Word32 Rx, Word32 Rs, Word32 Rt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_aslacc_RR __builtin_HEXAGON_S2_asl_r_r_acc
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32&=asl(Rs32,Rt32)
+   C Intrinsic Prototype: Word32 Q6_R_asland_RR(Word32 Rx, Word32 Rs, Word32 Rt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_asland_RR __builtin_HEXAGON_S2_asl_r_r_and
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32-=asl(Rs32,Rt32)
+   C Intrinsic Prototype: Word32 Q6_R_aslnac_RR(Word32 Rx, Word32 Rs, Word32 Rt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_aslnac_RR __builtin_HEXAGON_S2_asl_r_r_nac
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32|=asl(Rs32,Rt32)
+   C Intrinsic Prototype: Word32 Q6_R_aslor_RR(Word32 Rx, Word32 Rs, Word32 Rt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_aslor_RR __builtin_HEXAGON_S2_asl_r_r_or
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=asl(Rs32,Rt32):sat
+   C Intrinsic Prototype: Word32 Q6_R_asl_RR_sat(Word32 Rs, Word32 Rt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_asl_RR_sat __builtin_HEXAGON_S2_asl_r_r_sat
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vaslh(Rss32,Rt32)
+   C Intrinsic Prototype: Word64 Q6_P_vaslh_PR(Word64 Rss, Word32 Rt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vaslh_PR __builtin_HEXAGON_S2_asl_r_vh
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vaslw(Rss32,Rt32)
+   C Intrinsic Prototype: Word64 Q6_P_vaslw_PR(Word64 Rss, Word32 Rt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vaslw_PR __builtin_HEXAGON_S2_asl_r_vw
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=asr(Rss32,#u6)
+   C Intrinsic Prototype: Word64 Q6_P_asr_PI(Word64 Rss, Word32 Iu6)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_asr_PI __builtin_HEXAGON_S2_asr_i_p
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32+=asr(Rss32,#u6)
+   C Intrinsic Prototype: Word64 Q6_P_asracc_PI(Word64 Rxx, Word64 Rss, Word32 Iu6)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_asracc_PI __builtin_HEXAGON_S2_asr_i_p_acc
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32&=asr(Rss32,#u6)
+   C Intrinsic Prototype: Word64 Q6_P_asrand_PI(Word64 Rxx, Word64 Rss, Word32 Iu6)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_asrand_PI __builtin_HEXAGON_S2_asr_i_p_and
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32-=asr(Rss32,#u6)
+   C Intrinsic Prototype: Word64 Q6_P_asrnac_PI(Word64 Rxx, Word64 Rss, Word32 Iu6)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_asrnac_PI __builtin_HEXAGON_S2_asr_i_p_nac
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32|=asr(Rss32,#u6)
+   C Intrinsic Prototype: Word64 Q6_P_asror_PI(Word64 Rxx, Word64 Rss, Word32 Iu6)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_asror_PI __builtin_HEXAGON_S2_asr_i_p_or
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=asr(Rss32,#u6):rnd
+   C Intrinsic Prototype: Word64 Q6_P_asr_PI_rnd(Word64 Rss, Word32 Iu6)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_asr_PI_rnd __builtin_HEXAGON_S2_asr_i_p_rnd
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=asrrnd(Rss32,#u6)
+   C Intrinsic Prototype: Word64 Q6_P_asrrnd_PI(Word64 Rss, Word32 Iu6)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_P_asrrnd_PI __builtin_HEXAGON_S2_asr_i_p_rnd_goodsyntax
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=asr(Rs32,#u5)
+   C Intrinsic Prototype: Word32 Q6_R_asr_RI(Word32 Rs, Word32 Iu5)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_asr_RI __builtin_HEXAGON_S2_asr_i_r
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32+=asr(Rs32,#u5)
+   C Intrinsic Prototype: Word32 Q6_R_asracc_RI(Word32 Rx, Word32 Rs, Word32 Iu5)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_asracc_RI __builtin_HEXAGON_S2_asr_i_r_acc
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32&=asr(Rs32,#u5)
+   C Intrinsic Prototype: Word32 Q6_R_asrand_RI(Word32 Rx, Word32 Rs, Word32 Iu5)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_asrand_RI __builtin_HEXAGON_S2_asr_i_r_and
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32-=asr(Rs32,#u5)
+   C Intrinsic Prototype: Word32 Q6_R_asrnac_RI(Word32 Rx, Word32 Rs, Word32 Iu5)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_asrnac_RI __builtin_HEXAGON_S2_asr_i_r_nac
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32|=asr(Rs32,#u5)
+   C Intrinsic Prototype: Word32 Q6_R_asror_RI(Word32 Rx, Word32 Rs, Word32 Iu5)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_asror_RI __builtin_HEXAGON_S2_asr_i_r_or
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=asr(Rs32,#u5):rnd
+   C Intrinsic Prototype: Word32 Q6_R_asr_RI_rnd(Word32 Rs, Word32 Iu5)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_asr_RI_rnd __builtin_HEXAGON_S2_asr_i_r_rnd
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=asrrnd(Rs32,#u5)
+   C Intrinsic Prototype: Word32 Q6_R_asrrnd_RI(Word32 Rs, Word32 Iu5)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_R_asrrnd_RI __builtin_HEXAGON_S2_asr_i_r_rnd_goodsyntax
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=vasrw(Rss32,#u5)
+   C Intrinsic Prototype: Word32 Q6_R_vasrw_PI(Word64 Rss, Word32 Iu5)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_vasrw_PI __builtin_HEXAGON_S2_asr_i_svw_trun
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vasrh(Rss32,#u4)
+   C Intrinsic Prototype: Word64 Q6_P_vasrh_PI(Word64 Rss, Word32 Iu4)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vasrh_PI __builtin_HEXAGON_S2_asr_i_vh
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vasrw(Rss32,#u5)
+   C Intrinsic Prototype: Word64 Q6_P_vasrw_PI(Word64 Rss, Word32 Iu5)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vasrw_PI __builtin_HEXAGON_S2_asr_i_vw
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=asr(Rss32,Rt32)
+   C Intrinsic Prototype: Word64 Q6_P_asr_PR(Word64 Rss, Word32 Rt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_asr_PR __builtin_HEXAGON_S2_asr_r_p
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32+=asr(Rss32,Rt32)
+   C Intrinsic Prototype: Word64 Q6_P_asracc_PR(Word64 Rxx, Word64 Rss, Word32 Rt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_asracc_PR __builtin_HEXAGON_S2_asr_r_p_acc
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32&=asr(Rss32,Rt32)
+   C Intrinsic Prototype: Word64 Q6_P_asrand_PR(Word64 Rxx, Word64 Rss, Word32 Rt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_asrand_PR __builtin_HEXAGON_S2_asr_r_p_and
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32-=asr(Rss32,Rt32)
+   C Intrinsic Prototype: Word64 Q6_P_asrnac_PR(Word64 Rxx, Word64 Rss, Word32 Rt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_asrnac_PR __builtin_HEXAGON_S2_asr_r_p_nac
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32|=asr(Rss32,Rt32)
+   C Intrinsic Prototype: Word64 Q6_P_asror_PR(Word64 Rxx, Word64 Rss, Word32 Rt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_asror_PR __builtin_HEXAGON_S2_asr_r_p_or
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32^=asr(Rss32,Rt32)
+   C Intrinsic Prototype: Word64 Q6_P_asrxacc_PR(Word64 Rxx, Word64 Rss, Word32 Rt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_asrxacc_PR __builtin_HEXAGON_S2_asr_r_p_xor
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=asr(Rs32,Rt32)
+   C Intrinsic Prototype: Word32 Q6_R_asr_RR(Word32 Rs, Word32 Rt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_asr_RR __builtin_HEXAGON_S2_asr_r_r
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32+=asr(Rs32,Rt32)
+   C Intrinsic Prototype: Word32 Q6_R_asracc_RR(Word32 Rx, Word32 Rs, Word32 Rt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_asracc_RR __builtin_HEXAGON_S2_asr_r_r_acc
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32&=asr(Rs32,Rt32)
+   C Intrinsic Prototype: Word32 Q6_R_asrand_RR(Word32 Rx, Word32 Rs, Word32 Rt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_asrand_RR __builtin_HEXAGON_S2_asr_r_r_and
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32-=asr(Rs32,Rt32)
+   C Intrinsic Prototype: Word32 Q6_R_asrnac_RR(Word32 Rx, Word32 Rs, Word32 Rt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_asrnac_RR __builtin_HEXAGON_S2_asr_r_r_nac
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32|=asr(Rs32,Rt32)
+   C Intrinsic Prototype: Word32 Q6_R_asror_RR(Word32 Rx, Word32 Rs, Word32 Rt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_asror_RR __builtin_HEXAGON_S2_asr_r_r_or
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=asr(Rs32,Rt32):sat
+   C Intrinsic Prototype: Word32 Q6_R_asr_RR_sat(Word32 Rs, Word32 Rt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_asr_RR_sat __builtin_HEXAGON_S2_asr_r_r_sat
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=vasrw(Rss32,Rt32)
+   C Intrinsic Prototype: Word32 Q6_R_vasrw_PR(Word64 Rss, Word32 Rt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_vasrw_PR __builtin_HEXAGON_S2_asr_r_svw_trun
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vasrh(Rss32,Rt32)
+   C Intrinsic Prototype: Word64 Q6_P_vasrh_PR(Word64 Rss, Word32 Rt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vasrh_PR __builtin_HEXAGON_S2_asr_r_vh
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vasrw(Rss32,Rt32)
+   C Intrinsic Prototype: Word64 Q6_P_vasrw_PR(Word64 Rss, Word32 Rt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vasrw_PR __builtin_HEXAGON_S2_asr_r_vw
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=brev(Rs32)
+   C Intrinsic Prototype: Word32 Q6_R_brev_R(Word32 Rs)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_brev_R __builtin_HEXAGON_S2_brev
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=brev(Rss32)
+   C Intrinsic Prototype: Word64 Q6_P_brev_P(Word64 Rss)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_brev_P __builtin_HEXAGON_S2_brevp
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=cl0(Rs32)
+   C Intrinsic Prototype: Word32 Q6_R_cl0_R(Word32 Rs)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_cl0_R __builtin_HEXAGON_S2_cl0
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=cl0(Rss32)
+   C Intrinsic Prototype: Word32 Q6_R_cl0_P(Word64 Rss)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_cl0_P __builtin_HEXAGON_S2_cl0p
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=cl1(Rs32)
+   C Intrinsic Prototype: Word32 Q6_R_cl1_R(Word32 Rs)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_cl1_R __builtin_HEXAGON_S2_cl1
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=cl1(Rss32)
+   C Intrinsic Prototype: Word32 Q6_R_cl1_P(Word64 Rss)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_cl1_P __builtin_HEXAGON_S2_cl1p
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=clb(Rs32)
+   C Intrinsic Prototype: Word32 Q6_R_clb_R(Word32 Rs)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_clb_R __builtin_HEXAGON_S2_clb
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=normamt(Rs32)
+   C Intrinsic Prototype: Word32 Q6_R_normamt_R(Word32 Rs)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_normamt_R __builtin_HEXAGON_S2_clbnorm
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=clb(Rss32)
+   C Intrinsic Prototype: Word32 Q6_R_clb_P(Word64 Rss)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_clb_P __builtin_HEXAGON_S2_clbp
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=clrbit(Rs32,#u5)
+   C Intrinsic Prototype: Word32 Q6_R_clrbit_RI(Word32 Rs, Word32 Iu5)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_clrbit_RI __builtin_HEXAGON_S2_clrbit_i
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=clrbit(Rs32,Rt32)
+   C Intrinsic Prototype: Word32 Q6_R_clrbit_RR(Word32 Rs, Word32 Rt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_clrbit_RR __builtin_HEXAGON_S2_clrbit_r
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=ct0(Rs32)
+   C Intrinsic Prototype: Word32 Q6_R_ct0_R(Word32 Rs)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_ct0_R __builtin_HEXAGON_S2_ct0
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=ct0(Rss32)
+   C Intrinsic Prototype: Word32 Q6_R_ct0_P(Word64 Rss)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_ct0_P __builtin_HEXAGON_S2_ct0p
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=ct1(Rs32)
+   C Intrinsic Prototype: Word32 Q6_R_ct1_R(Word32 Rs)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_ct1_R __builtin_HEXAGON_S2_ct1
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=ct1(Rss32)
+   C Intrinsic Prototype: Word32 Q6_R_ct1_P(Word64 Rss)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_ct1_P __builtin_HEXAGON_S2_ct1p
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=deinterleave(Rss32)
+   C Intrinsic Prototype: Word64 Q6_P_deinterleave_P(Word64 Rss)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_deinterleave_P __builtin_HEXAGON_S2_deinterleave
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=extractu(Rs32,#u5,#U5)
+   C Intrinsic Prototype: Word32 Q6_R_extractu_RII(Word32 Rs, Word32 Iu5, Word32 IU5)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_extractu_RII __builtin_HEXAGON_S2_extractu
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=extractu(Rs32,Rtt32)
+   C Intrinsic Prototype: Word32 Q6_R_extractu_RP(Word32 Rs, Word64 Rtt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_extractu_RP __builtin_HEXAGON_S2_extractu_rp
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=extractu(Rss32,#u6,#U6)
+   C Intrinsic Prototype: Word64 Q6_P_extractu_PII(Word64 Rss, Word32 Iu6, Word32 IU6)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_extractu_PII __builtin_HEXAGON_S2_extractup
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=extractu(Rss32,Rtt32)
+   C Intrinsic Prototype: Word64 Q6_P_extractu_PP(Word64 Rss, Word64 Rtt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_extractu_PP __builtin_HEXAGON_S2_extractup_rp
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32=insert(Rs32,#u5,#U5)
+   C Intrinsic Prototype: Word32 Q6_R_insert_RII(Word32 Rx, Word32 Rs, Word32 Iu5, Word32 IU5)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_insert_RII __builtin_HEXAGON_S2_insert
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32=insert(Rs32,Rtt32)
+   C Intrinsic Prototype: Word32 Q6_R_insert_RP(Word32 Rx, Word32 Rs, Word64 Rtt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_insert_RP __builtin_HEXAGON_S2_insert_rp
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32=insert(Rss32,#u6,#U6)
+   C Intrinsic Prototype: Word64 Q6_P_insert_PII(Word64 Rxx, Word64 Rss, Word32 Iu6, Word32 IU6)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_insert_PII __builtin_HEXAGON_S2_insertp
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32=insert(Rss32,Rtt32)
+   C Intrinsic Prototype: Word64 Q6_P_insert_PP(Word64 Rxx, Word64 Rss, Word64 Rtt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_insert_PP __builtin_HEXAGON_S2_insertp_rp
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=interleave(Rss32)
+   C Intrinsic Prototype: Word64 Q6_P_interleave_P(Word64 Rss)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_interleave_P __builtin_HEXAGON_S2_interleave
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=lfs(Rss32,Rtt32)
+   C Intrinsic Prototype: Word64 Q6_P_lfs_PP(Word64 Rss, Word64 Rtt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_lfs_PP __builtin_HEXAGON_S2_lfsp
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=lsl(Rss32,Rt32)
+   C Intrinsic Prototype: Word64 Q6_P_lsl_PR(Word64 Rss, Word32 Rt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_lsl_PR __builtin_HEXAGON_S2_lsl_r_p
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32+=lsl(Rss32,Rt32)
+   C Intrinsic Prototype: Word64 Q6_P_lslacc_PR(Word64 Rxx, Word64 Rss, Word32 Rt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_lslacc_PR __builtin_HEXAGON_S2_lsl_r_p_acc
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32&=lsl(Rss32,Rt32)
+   C Intrinsic Prototype: Word64 Q6_P_lsland_PR(Word64 Rxx, Word64 Rss, Word32 Rt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_lsland_PR __builtin_HEXAGON_S2_lsl_r_p_and
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32-=lsl(Rss32,Rt32)
+   C Intrinsic Prototype: Word64 Q6_P_lslnac_PR(Word64 Rxx, Word64 Rss, Word32 Rt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_lslnac_PR __builtin_HEXAGON_S2_lsl_r_p_nac
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32|=lsl(Rss32,Rt32)
+   C Intrinsic Prototype: Word64 Q6_P_lslor_PR(Word64 Rxx, Word64 Rss, Word32 Rt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_lslor_PR __builtin_HEXAGON_S2_lsl_r_p_or
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32^=lsl(Rss32,Rt32)
+   C Intrinsic Prototype: Word64 Q6_P_lslxacc_PR(Word64 Rxx, Word64 Rss, Word32 Rt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_lslxacc_PR __builtin_HEXAGON_S2_lsl_r_p_xor
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=lsl(Rs32,Rt32)
+   C Intrinsic Prototype: Word32 Q6_R_lsl_RR(Word32 Rs, Word32 Rt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_lsl_RR __builtin_HEXAGON_S2_lsl_r_r
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32+=lsl(Rs32,Rt32)
+   C Intrinsic Prototype: Word32 Q6_R_lslacc_RR(Word32 Rx, Word32 Rs, Word32 Rt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_lslacc_RR __builtin_HEXAGON_S2_lsl_r_r_acc
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32&=lsl(Rs32,Rt32)
+   C Intrinsic Prototype: Word32 Q6_R_lsland_RR(Word32 Rx, Word32 Rs, Word32 Rt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_lsland_RR __builtin_HEXAGON_S2_lsl_r_r_and
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32-=lsl(Rs32,Rt32)
+   C Intrinsic Prototype: Word32 Q6_R_lslnac_RR(Word32 Rx, Word32 Rs, Word32 Rt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_lslnac_RR __builtin_HEXAGON_S2_lsl_r_r_nac
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32|=lsl(Rs32,Rt32)
+   C Intrinsic Prototype: Word32 Q6_R_lslor_RR(Word32 Rx, Word32 Rs, Word32 Rt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_lslor_RR __builtin_HEXAGON_S2_lsl_r_r_or
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vlslh(Rss32,Rt32)
+   C Intrinsic Prototype: Word64 Q6_P_vlslh_PR(Word64 Rss, Word32 Rt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vlslh_PR __builtin_HEXAGON_S2_lsl_r_vh
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vlslw(Rss32,Rt32)
+   C Intrinsic Prototype: Word64 Q6_P_vlslw_PR(Word64 Rss, Word32 Rt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vlslw_PR __builtin_HEXAGON_S2_lsl_r_vw
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=lsr(Rss32,#u6)
+   C Intrinsic Prototype: Word64 Q6_P_lsr_PI(Word64 Rss, Word32 Iu6)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_lsr_PI __builtin_HEXAGON_S2_lsr_i_p
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32+=lsr(Rss32,#u6)
+   C Intrinsic Prototype: Word64 Q6_P_lsracc_PI(Word64 Rxx, Word64 Rss, Word32 Iu6)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_lsracc_PI __builtin_HEXAGON_S2_lsr_i_p_acc
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32&=lsr(Rss32,#u6)
+   C Intrinsic Prototype: Word64 Q6_P_lsrand_PI(Word64 Rxx, Word64 Rss, Word32 Iu6)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_lsrand_PI __builtin_HEXAGON_S2_lsr_i_p_and
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32-=lsr(Rss32,#u6)
+   C Intrinsic Prototype: Word64 Q6_P_lsrnac_PI(Word64 Rxx, Word64 Rss, Word32 Iu6)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_lsrnac_PI __builtin_HEXAGON_S2_lsr_i_p_nac
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32|=lsr(Rss32,#u6)
+   C Intrinsic Prototype: Word64 Q6_P_lsror_PI(Word64 Rxx, Word64 Rss, Word32 Iu6)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_lsror_PI __builtin_HEXAGON_S2_lsr_i_p_or
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32^=lsr(Rss32,#u6)
+   C Intrinsic Prototype: Word64 Q6_P_lsrxacc_PI(Word64 Rxx, Word64 Rss, Word32 Iu6)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_lsrxacc_PI __builtin_HEXAGON_S2_lsr_i_p_xacc
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=lsr(Rs32,#u5)
+   C Intrinsic Prototype: Word32 Q6_R_lsr_RI(Word32 Rs, Word32 Iu5)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_lsr_RI __builtin_HEXAGON_S2_lsr_i_r
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32+=lsr(Rs32,#u5)
+   C Intrinsic Prototype: Word32 Q6_R_lsracc_RI(Word32 Rx, Word32 Rs, Word32 Iu5)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_lsracc_RI __builtin_HEXAGON_S2_lsr_i_r_acc
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32&=lsr(Rs32,#u5)
+   C Intrinsic Prototype: Word32 Q6_R_lsrand_RI(Word32 Rx, Word32 Rs, Word32 Iu5)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_lsrand_RI __builtin_HEXAGON_S2_lsr_i_r_and
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32-=lsr(Rs32,#u5)
+   C Intrinsic Prototype: Word32 Q6_R_lsrnac_RI(Word32 Rx, Word32 Rs, Word32 Iu5)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_lsrnac_RI __builtin_HEXAGON_S2_lsr_i_r_nac
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32|=lsr(Rs32,#u5)
+   C Intrinsic Prototype: Word32 Q6_R_lsror_RI(Word32 Rx, Word32 Rs, Word32 Iu5)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_lsror_RI __builtin_HEXAGON_S2_lsr_i_r_or
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32^=lsr(Rs32,#u5)
+   C Intrinsic Prototype: Word32 Q6_R_lsrxacc_RI(Word32 Rx, Word32 Rs, Word32 Iu5)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_lsrxacc_RI __builtin_HEXAGON_S2_lsr_i_r_xacc
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vlsrh(Rss32,#u4)
+   C Intrinsic Prototype: Word64 Q6_P_vlsrh_PI(Word64 Rss, Word32 Iu4)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vlsrh_PI __builtin_HEXAGON_S2_lsr_i_vh
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vlsrw(Rss32,#u5)
+   C Intrinsic Prototype: Word64 Q6_P_vlsrw_PI(Word64 Rss, Word32 Iu5)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vlsrw_PI __builtin_HEXAGON_S2_lsr_i_vw
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=lsr(Rss32,Rt32)
+   C Intrinsic Prototype: Word64 Q6_P_lsr_PR(Word64 Rss, Word32 Rt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_lsr_PR __builtin_HEXAGON_S2_lsr_r_p
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32+=lsr(Rss32,Rt32)
+   C Intrinsic Prototype: Word64 Q6_P_lsracc_PR(Word64 Rxx, Word64 Rss, Word32 Rt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_lsracc_PR __builtin_HEXAGON_S2_lsr_r_p_acc
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32&=lsr(Rss32,Rt32)
+   C Intrinsic Prototype: Word64 Q6_P_lsrand_PR(Word64 Rxx, Word64 Rss, Word32 Rt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_lsrand_PR __builtin_HEXAGON_S2_lsr_r_p_and
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32-=lsr(Rss32,Rt32)
+   C Intrinsic Prototype: Word64 Q6_P_lsrnac_PR(Word64 Rxx, Word64 Rss, Word32 Rt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_lsrnac_PR __builtin_HEXAGON_S2_lsr_r_p_nac
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32|=lsr(Rss32,Rt32)
+   C Intrinsic Prototype: Word64 Q6_P_lsror_PR(Word64 Rxx, Word64 Rss, Word32 Rt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_lsror_PR __builtin_HEXAGON_S2_lsr_r_p_or
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32^=lsr(Rss32,Rt32)
+   C Intrinsic Prototype: Word64 Q6_P_lsrxacc_PR(Word64 Rxx, Word64 Rss, Word32 Rt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_lsrxacc_PR __builtin_HEXAGON_S2_lsr_r_p_xor
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=lsr(Rs32,Rt32)
+   C Intrinsic Prototype: Word32 Q6_R_lsr_RR(Word32 Rs, Word32 Rt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_lsr_RR __builtin_HEXAGON_S2_lsr_r_r
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32+=lsr(Rs32,Rt32)
+   C Intrinsic Prototype: Word32 Q6_R_lsracc_RR(Word32 Rx, Word32 Rs, Word32 Rt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_lsracc_RR __builtin_HEXAGON_S2_lsr_r_r_acc
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32&=lsr(Rs32,Rt32)
+   C Intrinsic Prototype: Word32 Q6_R_lsrand_RR(Word32 Rx, Word32 Rs, Word32 Rt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_lsrand_RR __builtin_HEXAGON_S2_lsr_r_r_and
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32-=lsr(Rs32,Rt32)
+   C Intrinsic Prototype: Word32 Q6_R_lsrnac_RR(Word32 Rx, Word32 Rs, Word32 Rt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_lsrnac_RR __builtin_HEXAGON_S2_lsr_r_r_nac
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32|=lsr(Rs32,Rt32)
+   C Intrinsic Prototype: Word32 Q6_R_lsror_RR(Word32 Rx, Word32 Rs, Word32 Rt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_lsror_RR __builtin_HEXAGON_S2_lsr_r_r_or
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vlsrh(Rss32,Rt32)
+   C Intrinsic Prototype: Word64 Q6_P_vlsrh_PR(Word64 Rss, Word32 Rt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vlsrh_PR __builtin_HEXAGON_S2_lsr_r_vh
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vlsrw(Rss32,Rt32)
+   C Intrinsic Prototype: Word64 Q6_P_vlsrw_PR(Word64 Rss, Word32 Rt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vlsrw_PR __builtin_HEXAGON_S2_lsr_r_vw
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=packhl(Rs32,Rt32)
+   C Intrinsic Prototype: Word64 Q6_P_packhl_RR(Word32 Rs, Word32 Rt)
+   Instruction Type:      ALU32_3op
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_P_packhl_RR __builtin_HEXAGON_S2_packhl
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=parity(Rss32,Rtt32)
+   C Intrinsic Prototype: Word32 Q6_R_parity_PP(Word64 Rss, Word64 Rtt)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_parity_PP __builtin_HEXAGON_S2_parityp
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=setbit(Rs32,#u5)
+   C Intrinsic Prototype: Word32 Q6_R_setbit_RI(Word32 Rs, Word32 Iu5)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_setbit_RI __builtin_HEXAGON_S2_setbit_i
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=setbit(Rs32,Rt32)
+   C Intrinsic Prototype: Word32 Q6_R_setbit_RR(Word32 Rs, Word32 Rt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_setbit_RR __builtin_HEXAGON_S2_setbit_r
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=shuffeb(Rss32,Rtt32)
+   C Intrinsic Prototype: Word64 Q6_P_shuffeb_PP(Word64 Rss, Word64 Rtt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_shuffeb_PP __builtin_HEXAGON_S2_shuffeb
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=shuffeh(Rss32,Rtt32)
+   C Intrinsic Prototype: Word64 Q6_P_shuffeh_PP(Word64 Rss, Word64 Rtt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_shuffeh_PP __builtin_HEXAGON_S2_shuffeh
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=shuffob(Rtt32,Rss32)
+   C Intrinsic Prototype: Word64 Q6_P_shuffob_PP(Word64 Rtt, Word64 Rss)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_shuffob_PP __builtin_HEXAGON_S2_shuffob
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=shuffoh(Rtt32,Rss32)
+   C Intrinsic Prototype: Word64 Q6_P_shuffoh_PP(Word64 Rtt, Word64 Rss)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_shuffoh_PP __builtin_HEXAGON_S2_shuffoh
+
+/* ==========================================================================
+   Assembly Syntax:       memb(Rx32++#s4:0:circ(Mu2))=Rt32
+   C Intrinsic Prototype: void Q6_memb_IMR_circ(void** Rx, Word32 Is4_0, Word32 Mu, Word32 Rt, void* BaseAddress)
+   Instruction Type:      ST
+   Execution Slots:       SLOT01
+   ========================================================================== */
+
+#define Q6_memb_IMR_circ __builtin_HEXAGON_S2_storerb_pci
+
+/* ==========================================================================
+   Assembly Syntax:       memb(Rx32++I:circ(Mu2))=Rt32
+   C Intrinsic Prototype: void Q6_memb_MR_circ(void** Rx, Word32 Mu, Word32 Rt, void* BaseAddress)
+   Instruction Type:      ST
+   Execution Slots:       SLOT01
+   ========================================================================== */
+
+#define Q6_memb_MR_circ __builtin_HEXAGON_S2_storerb_pcr
+
+/* ==========================================================================
+   Assembly Syntax:       memd(Rx32++#s4:3:circ(Mu2))=Rtt32
+   C Intrinsic Prototype: void Q6_memd_IMP_circ(void** Rx, Word32 Is4_3, Word32 Mu, Word64 Rtt, void* BaseAddress)
+   Instruction Type:      ST
+   Execution Slots:       SLOT01
+   ========================================================================== */
+
+#define Q6_memd_IMP_circ __builtin_HEXAGON_S2_storerd_pci
+
+/* ==========================================================================
+   Assembly Syntax:       memd(Rx32++I:circ(Mu2))=Rtt32
+   C Intrinsic Prototype: void Q6_memd_MP_circ(void** Rx, Word32 Mu, Word64 Rtt, void* BaseAddress)
+   Instruction Type:      ST
+   Execution Slots:       SLOT01
+   ========================================================================== */
+
+#define Q6_memd_MP_circ __builtin_HEXAGON_S2_storerd_pcr
+
+/* ==========================================================================
+   Assembly Syntax:       memh(Rx32++#s4:1:circ(Mu2))=Rt32.h
+   C Intrinsic Prototype: void Q6_memh_IMRh_circ(void** Rx, Word32 Is4_1, Word32 Mu, Word32 Rt, void* BaseAddress)
+   Instruction Type:      ST
+   Execution Slots:       SLOT01
+   ========================================================================== */
+
+#define Q6_memh_IMRh_circ __builtin_HEXAGON_S2_storerf_pci
+
+/* ==========================================================================
+   Assembly Syntax:       memh(Rx32++I:circ(Mu2))=Rt32.h
+   C Intrinsic Prototype: void Q6_memh_MRh_circ(void** Rx, Word32 Mu, Word32 Rt, void* BaseAddress)
+   Instruction Type:      ST
+   Execution Slots:       SLOT01
+   ========================================================================== */
+
+#define Q6_memh_MRh_circ __builtin_HEXAGON_S2_storerf_pcr
+
+/* ==========================================================================
+   Assembly Syntax:       memh(Rx32++#s4:1:circ(Mu2))=Rt32
+   C Intrinsic Prototype: void Q6_memh_IMR_circ(void** Rx, Word32 Is4_1, Word32 Mu, Word32 Rt, void* BaseAddress)
+   Instruction Type:      ST
+   Execution Slots:       SLOT01
+   ========================================================================== */
+
+#define Q6_memh_IMR_circ __builtin_HEXAGON_S2_storerh_pci
+
+/* ==========================================================================
+   Assembly Syntax:       memh(Rx32++I:circ(Mu2))=Rt32
+   C Intrinsic Prototype: void Q6_memh_MR_circ(void** Rx, Word32 Mu, Word32 Rt, void* BaseAddress)
+   Instruction Type:      ST
+   Execution Slots:       SLOT01
+   ========================================================================== */
+
+#define Q6_memh_MR_circ __builtin_HEXAGON_S2_storerh_pcr
+
+/* ==========================================================================
+   Assembly Syntax:       memw(Rx32++#s4:2:circ(Mu2))=Rt32
+   C Intrinsic Prototype: void Q6_memw_IMR_circ(void** Rx, Word32 Is4_2, Word32 Mu, Word32 Rt, void* BaseAddress)
+   Instruction Type:      ST
+   Execution Slots:       SLOT01
+   ========================================================================== */
+
+#define Q6_memw_IMR_circ __builtin_HEXAGON_S2_storeri_pci
+
+/* ==========================================================================
+   Assembly Syntax:       memw(Rx32++I:circ(Mu2))=Rt32
+   C Intrinsic Prototype: void Q6_memw_MR_circ(void** Rx, Word32 Mu, Word32 Rt, void* BaseAddress)
+   Instruction Type:      ST
+   Execution Slots:       SLOT01
+   ========================================================================== */
+
+#define Q6_memw_MR_circ __builtin_HEXAGON_S2_storeri_pcr
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=vsathb(Rs32)
+   C Intrinsic Prototype: Word32 Q6_R_vsathb_R(Word32 Rs)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_vsathb_R __builtin_HEXAGON_S2_svsathb
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=vsathub(Rs32)
+   C Intrinsic Prototype: Word32 Q6_R_vsathub_R(Word32 Rs)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_vsathub_R __builtin_HEXAGON_S2_svsathub
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32=tableidxb(Rs32,#u4,#U5)
+   C Intrinsic Prototype: Word32 Q6_R_tableidxb_RII(Word32 Rx, Word32 Rs, Word32 Iu4, Word32 IU5)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_R_tableidxb_RII __builtin_HEXAGON_S2_tableidxb_goodsyntax
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32=tableidxd(Rs32,#u4,#U5)
+   C Intrinsic Prototype: Word32 Q6_R_tableidxd_RII(Word32 Rx, Word32 Rs, Word32 Iu4, Word32 IU5)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_R_tableidxd_RII __builtin_HEXAGON_S2_tableidxd_goodsyntax
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32=tableidxh(Rs32,#u4,#U5)
+   C Intrinsic Prototype: Word32 Q6_R_tableidxh_RII(Word32 Rx, Word32 Rs, Word32 Iu4, Word32 IU5)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_R_tableidxh_RII __builtin_HEXAGON_S2_tableidxh_goodsyntax
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32=tableidxw(Rs32,#u4,#U5)
+   C Intrinsic Prototype: Word32 Q6_R_tableidxw_RII(Word32 Rx, Word32 Rs, Word32 Iu4, Word32 IU5)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_R_tableidxw_RII __builtin_HEXAGON_S2_tableidxw_goodsyntax
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=togglebit(Rs32,#u5)
+   C Intrinsic Prototype: Word32 Q6_R_togglebit_RI(Word32 Rs, Word32 Iu5)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_togglebit_RI __builtin_HEXAGON_S2_togglebit_i
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=togglebit(Rs32,Rt32)
+   C Intrinsic Prototype: Word32 Q6_R_togglebit_RR(Word32 Rs, Word32 Rt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_togglebit_RR __builtin_HEXAGON_S2_togglebit_r
+
+/* ==========================================================================
+   Assembly Syntax:       Pd4=tstbit(Rs32,#u5)
+   C Intrinsic Prototype: Byte Q6_p_tstbit_RI(Word32 Rs, Word32 Iu5)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_p_tstbit_RI __builtin_HEXAGON_S2_tstbit_i
+
+/* ==========================================================================
+   Assembly Syntax:       Pd4=tstbit(Rs32,Rt32)
+   C Intrinsic Prototype: Byte Q6_p_tstbit_RR(Word32 Rs, Word32 Rt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_p_tstbit_RR __builtin_HEXAGON_S2_tstbit_r
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=valignb(Rtt32,Rss32,#u3)
+   C Intrinsic Prototype: Word64 Q6_P_valignb_PPI(Word64 Rtt, Word64 Rss, Word32 Iu3)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_valignb_PPI __builtin_HEXAGON_S2_valignib
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=valignb(Rtt32,Rss32,Pu4)
+   C Intrinsic Prototype: Word64 Q6_P_valignb_PPp(Word64 Rtt, Word64 Rss, Byte Pu)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_valignb_PPp __builtin_HEXAGON_S2_valignrb
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vcnegh(Rss32,Rt32)
+   C Intrinsic Prototype: Word64 Q6_P_vcnegh_PR(Word64 Rss, Word32 Rt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vcnegh_PR __builtin_HEXAGON_S2_vcnegh
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vcrotate(Rss32,Rt32)
+   C Intrinsic Prototype: Word64 Q6_P_vcrotate_PR(Word64 Rss, Word32 Rt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vcrotate_PR __builtin_HEXAGON_S2_vcrotate
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32+=vrcnegh(Rss32,Rt32)
+   C Intrinsic Prototype: Word64 Q6_P_vrcneghacc_PR(Word64 Rxx, Word64 Rss, Word32 Rt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vrcneghacc_PR __builtin_HEXAGON_S2_vrcnegh
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=vrndwh(Rss32)
+   C Intrinsic Prototype: Word32 Q6_R_vrndwh_P(Word64 Rss)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_vrndwh_P __builtin_HEXAGON_S2_vrndpackwh
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=vrndwh(Rss32):sat
+   C Intrinsic Prototype: Word32 Q6_R_vrndwh_P_sat(Word64 Rss)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_vrndwh_P_sat __builtin_HEXAGON_S2_vrndpackwhs
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=vsathb(Rss32)
+   C Intrinsic Prototype: Word32 Q6_R_vsathb_P(Word64 Rss)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_vsathb_P __builtin_HEXAGON_S2_vsathb
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vsathb(Rss32)
+   C Intrinsic Prototype: Word64 Q6_P_vsathb_P(Word64 Rss)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vsathb_P __builtin_HEXAGON_S2_vsathb_nopack
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=vsathub(Rss32)
+   C Intrinsic Prototype: Word32 Q6_R_vsathub_P(Word64 Rss)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_vsathub_P __builtin_HEXAGON_S2_vsathub
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vsathub(Rss32)
+   C Intrinsic Prototype: Word64 Q6_P_vsathub_P(Word64 Rss)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vsathub_P __builtin_HEXAGON_S2_vsathub_nopack
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=vsatwh(Rss32)
+   C Intrinsic Prototype: Word32 Q6_R_vsatwh_P(Word64 Rss)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_vsatwh_P __builtin_HEXAGON_S2_vsatwh
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vsatwh(Rss32)
+   C Intrinsic Prototype: Word64 Q6_P_vsatwh_P(Word64 Rss)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vsatwh_P __builtin_HEXAGON_S2_vsatwh_nopack
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=vsatwuh(Rss32)
+   C Intrinsic Prototype: Word32 Q6_R_vsatwuh_P(Word64 Rss)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_vsatwuh_P __builtin_HEXAGON_S2_vsatwuh
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vsatwuh(Rss32)
+   C Intrinsic Prototype: Word64 Q6_P_vsatwuh_P(Word64 Rss)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vsatwuh_P __builtin_HEXAGON_S2_vsatwuh_nopack
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=vsplatb(Rs32)
+   C Intrinsic Prototype: Word32 Q6_R_vsplatb_R(Word32 Rs)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_vsplatb_R __builtin_HEXAGON_S2_vsplatrb
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vsplath(Rs32)
+   C Intrinsic Prototype: Word64 Q6_P_vsplath_R(Word32 Rs)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vsplath_R __builtin_HEXAGON_S2_vsplatrh
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vspliceb(Rss32,Rtt32,#u3)
+   C Intrinsic Prototype: Word64 Q6_P_vspliceb_PPI(Word64 Rss, Word64 Rtt, Word32 Iu3)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vspliceb_PPI __builtin_HEXAGON_S2_vspliceib
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vspliceb(Rss32,Rtt32,Pu4)
+   C Intrinsic Prototype: Word64 Q6_P_vspliceb_PPp(Word64 Rss, Word64 Rtt, Byte Pu)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vspliceb_PPp __builtin_HEXAGON_S2_vsplicerb
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vsxtbh(Rs32)
+   C Intrinsic Prototype: Word64 Q6_P_vsxtbh_R(Word32 Rs)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vsxtbh_R __builtin_HEXAGON_S2_vsxtbh
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vsxthw(Rs32)
+   C Intrinsic Prototype: Word64 Q6_P_vsxthw_R(Word32 Rs)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vsxthw_R __builtin_HEXAGON_S2_vsxthw
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=vtrunehb(Rss32)
+   C Intrinsic Prototype: Word32 Q6_R_vtrunehb_P(Word64 Rss)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_vtrunehb_P __builtin_HEXAGON_S2_vtrunehb
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vtrunewh(Rss32,Rtt32)
+   C Intrinsic Prototype: Word64 Q6_P_vtrunewh_PP(Word64 Rss, Word64 Rtt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vtrunewh_PP __builtin_HEXAGON_S2_vtrunewh
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=vtrunohb(Rss32)
+   C Intrinsic Prototype: Word32 Q6_R_vtrunohb_P(Word64 Rss)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_vtrunohb_P __builtin_HEXAGON_S2_vtrunohb
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vtrunowh(Rss32,Rtt32)
+   C Intrinsic Prototype: Word64 Q6_P_vtrunowh_PP(Word64 Rss, Word64 Rtt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vtrunowh_PP __builtin_HEXAGON_S2_vtrunowh
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vzxtbh(Rs32)
+   C Intrinsic Prototype: Word64 Q6_P_vzxtbh_R(Word32 Rs)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vzxtbh_R __builtin_HEXAGON_S2_vzxtbh
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vzxthw(Rs32)
+   C Intrinsic Prototype: Word64 Q6_P_vzxthw_R(Word32 Rs)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vzxthw_R __builtin_HEXAGON_S2_vzxthw
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=add(Rs32,add(Ru32,#s6))
+   C Intrinsic Prototype: Word32 Q6_R_add_add_RRI(Word32 Rs, Word32 Ru, Word32 Is6)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_add_add_RRI __builtin_HEXAGON_S4_addaddi
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32=add(#u8,asl(Rx32,#U5))
+   C Intrinsic Prototype: Word32 Q6_R_add_asl_IRI(Word32 Iu8, Word32 Rx, Word32 IU5)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_add_asl_IRI __builtin_HEXAGON_S4_addi_asl_ri
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32=add(#u8,lsr(Rx32,#U5))
+   C Intrinsic Prototype: Word32 Q6_R_add_lsr_IRI(Word32 Iu8, Word32 Rx, Word32 IU5)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_add_lsr_IRI __builtin_HEXAGON_S4_addi_lsr_ri
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32=and(#u8,asl(Rx32,#U5))
+   C Intrinsic Prototype: Word32 Q6_R_and_asl_IRI(Word32 Iu8, Word32 Rx, Word32 IU5)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_and_asl_IRI __builtin_HEXAGON_S4_andi_asl_ri
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32=and(#u8,lsr(Rx32,#U5))
+   C Intrinsic Prototype: Word32 Q6_R_and_lsr_IRI(Word32 Iu8, Word32 Rx, Word32 IU5)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_and_lsr_IRI __builtin_HEXAGON_S4_andi_lsr_ri
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=add(clb(Rs32),#s6)
+   C Intrinsic Prototype: Word32 Q6_R_add_clb_RI(Word32 Rs, Word32 Is6)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_add_clb_RI __builtin_HEXAGON_S4_clbaddi
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=add(clb(Rss32),#s6)
+   C Intrinsic Prototype: Word32 Q6_R_add_clb_PI(Word64 Rss, Word32 Is6)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_add_clb_PI __builtin_HEXAGON_S4_clbpaddi
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=normamt(Rss32)
+   C Intrinsic Prototype: Word32 Q6_R_normamt_P(Word64 Rss)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_normamt_P __builtin_HEXAGON_S4_clbpnorm
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=extract(Rs32,#u5,#U5)
+   C Intrinsic Prototype: Word32 Q6_R_extract_RII(Word32 Rs, Word32 Iu5, Word32 IU5)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_extract_RII __builtin_HEXAGON_S4_extract
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=extract(Rs32,Rtt32)
+   C Intrinsic Prototype: Word32 Q6_R_extract_RP(Word32 Rs, Word64 Rtt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_extract_RP __builtin_HEXAGON_S4_extract_rp
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=extract(Rss32,#u6,#U6)
+   C Intrinsic Prototype: Word64 Q6_P_extract_PII(Word64 Rss, Word32 Iu6, Word32 IU6)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_extract_PII __builtin_HEXAGON_S4_extractp
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=extract(Rss32,Rtt32)
+   C Intrinsic Prototype: Word64 Q6_P_extract_PP(Word64 Rss, Word64 Rtt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_extract_PP __builtin_HEXAGON_S4_extractp_rp
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=lsl(#s6,Rt32)
+   C Intrinsic Prototype: Word32 Q6_R_lsl_IR(Word32 Is6, Word32 Rt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_lsl_IR __builtin_HEXAGON_S4_lsli
+
+/* ==========================================================================
+   Assembly Syntax:       Pd4=!tstbit(Rs32,#u5)
+   C Intrinsic Prototype: Byte Q6_p_not_tstbit_RI(Word32 Rs, Word32 Iu5)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_p_not_tstbit_RI __builtin_HEXAGON_S4_ntstbit_i
+
+/* ==========================================================================
+   Assembly Syntax:       Pd4=!tstbit(Rs32,Rt32)
+   C Intrinsic Prototype: Byte Q6_p_not_tstbit_RR(Word32 Rs, Word32 Rt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_p_not_tstbit_RR __builtin_HEXAGON_S4_ntstbit_r
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32|=and(Rs32,#s10)
+   C Intrinsic Prototype: Word32 Q6_R_andor_RI(Word32 Rx, Word32 Rs, Word32 Is10)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_andor_RI __builtin_HEXAGON_S4_or_andi
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32=or(Ru32,and(Rx32,#s10))
+   C Intrinsic Prototype: Word32 Q6_R_or_and_RRI(Word32 Ru, Word32 Rx, Word32 Is10)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_or_and_RRI __builtin_HEXAGON_S4_or_andix
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32|=or(Rs32,#s10)
+   C Intrinsic Prototype: Word32 Q6_R_oror_RI(Word32 Rx, Word32 Rs, Word32 Is10)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_oror_RI __builtin_HEXAGON_S4_or_ori
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32=or(#u8,asl(Rx32,#U5))
+   C Intrinsic Prototype: Word32 Q6_R_or_asl_IRI(Word32 Iu8, Word32 Rx, Word32 IU5)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_or_asl_IRI __builtin_HEXAGON_S4_ori_asl_ri
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32=or(#u8,lsr(Rx32,#U5))
+   C Intrinsic Prototype: Word32 Q6_R_or_lsr_IRI(Word32 Iu8, Word32 Rx, Word32 IU5)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_or_lsr_IRI __builtin_HEXAGON_S4_ori_lsr_ri
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=parity(Rs32,Rt32)
+   C Intrinsic Prototype: Word32 Q6_R_parity_RR(Word32 Rs, Word32 Rt)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_parity_RR __builtin_HEXAGON_S4_parity
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=add(Rs32,sub(#s6,Ru32))
+   C Intrinsic Prototype: Word32 Q6_R_add_sub_RIR(Word32 Rs, Word32 Is6, Word32 Ru)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_add_sub_RIR __builtin_HEXAGON_S4_subaddi
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32=sub(#u8,asl(Rx32,#U5))
+   C Intrinsic Prototype: Word32 Q6_R_sub_asl_IRI(Word32 Iu8, Word32 Rx, Word32 IU5)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_sub_asl_IRI __builtin_HEXAGON_S4_subi_asl_ri
+
+/* ==========================================================================
+   Assembly Syntax:       Rx32=sub(#u8,lsr(Rx32,#U5))
+   C Intrinsic Prototype: Word32 Q6_R_sub_lsr_IRI(Word32 Iu8, Word32 Rx, Word32 IU5)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_sub_lsr_IRI __builtin_HEXAGON_S4_subi_lsr_ri
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vrcrotate(Rss32,Rt32,#u2)
+   C Intrinsic Prototype: Word64 Q6_P_vrcrotate_PRI(Word64 Rss, Word32 Rt, Word32 Iu2)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vrcrotate_PRI __builtin_HEXAGON_S4_vrcrotate
+
+/* ==========================================================================
+   Assembly Syntax:       Rxx32+=vrcrotate(Rss32,Rt32,#u2)
+   C Intrinsic Prototype: Word64 Q6_P_vrcrotateacc_PRI(Word64 Rxx, Word64 Rss, Word32 Rt, Word32 Iu2)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vrcrotateacc_PRI __builtin_HEXAGON_S4_vrcrotate_acc
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vxaddsubh(Rss32,Rtt32):sat
+   C Intrinsic Prototype: Word64 Q6_P_vxaddsubh_PP_sat(Word64 Rss, Word64 Rtt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vxaddsubh_PP_sat __builtin_HEXAGON_S4_vxaddsubh
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vxaddsubh(Rss32,Rtt32):rnd:>>1:sat
+   C Intrinsic Prototype: Word64 Q6_P_vxaddsubh_PP_rnd_rs1_sat(Word64 Rss, Word64 Rtt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vxaddsubh_PP_rnd_rs1_sat __builtin_HEXAGON_S4_vxaddsubhr
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vxaddsubw(Rss32,Rtt32):sat
+   C Intrinsic Prototype: Word64 Q6_P_vxaddsubw_PP_sat(Word64 Rss, Word64 Rtt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vxaddsubw_PP_sat __builtin_HEXAGON_S4_vxaddsubw
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vxsubaddh(Rss32,Rtt32):sat
+   C Intrinsic Prototype: Word64 Q6_P_vxsubaddh_PP_sat(Word64 Rss, Word64 Rtt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vxsubaddh_PP_sat __builtin_HEXAGON_S4_vxsubaddh
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vxsubaddh(Rss32,Rtt32):rnd:>>1:sat
+   C Intrinsic Prototype: Word64 Q6_P_vxsubaddh_PP_rnd_rs1_sat(Word64 Rss, Word64 Rtt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vxsubaddh_PP_rnd_rs1_sat __builtin_HEXAGON_S4_vxsubaddhr
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vxsubaddw(Rss32,Rtt32):sat
+   C Intrinsic Prototype: Word64 Q6_P_vxsubaddw_PP_sat(Word64 Rss, Word64 Rtt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vxsubaddw_PP_sat __builtin_HEXAGON_S4_vxsubaddw
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=vasrhub(Rss32,#u4):rnd:sat
+   C Intrinsic Prototype: Word32 Q6_R_vasrhub_PI_rnd_sat(Word64 Rss, Word32 Iu4)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_R_vasrhub_PI_rnd_sat __builtin_HEXAGON_S5_asrhub_rnd_sat_goodsyntax
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=vasrhub(Rss32,#u4):sat
+   C Intrinsic Prototype: Word32 Q6_R_vasrhub_PI_sat(Word64 Rss, Word32 Iu4)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_vasrhub_PI_sat __builtin_HEXAGON_S5_asrhub_sat
+
+/* ==========================================================================
+   Assembly Syntax:       Rd32=popcount(Rss32)
+   C Intrinsic Prototype: Word32 Q6_R_popcount_P(Word64 Rss)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_popcount_P __builtin_HEXAGON_S5_popcountp
+
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vasrh(Rss32,#u4):rnd
+   C Intrinsic Prototype: Word64 Q6_P_vasrh_PI_rnd(Word64 Rss, Word32 Iu4)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_P_vasrh_PI_rnd __builtin_HEXAGON_S5_vasrhrnd_goodsyntax
+
+/* ==========================================================================
+   Assembly Syntax:       dccleana(Rs32)
+   C Intrinsic Prototype: void Q6_dccleana_A(Address Rs)
+   Instruction Type:      ST
+   Execution Slots:       SLOT0
+   ========================================================================== */
+
+#define Q6_dccleana_A __builtin_HEXAGON_Y2_dccleana
+
+/* ==========================================================================
+   Assembly Syntax:       dccleaninva(Rs32)
+   C Intrinsic Prototype: void Q6_dccleaninva_A(Address Rs)
+   Instruction Type:      ST
+   Execution Slots:       SLOT0
+   ========================================================================== */
+
+#define Q6_dccleaninva_A __builtin_HEXAGON_Y2_dccleaninva
+
+/* ==========================================================================
+   Assembly Syntax:       dcfetch(Rs32)
+   C Intrinsic Prototype: void Q6_dcfetch_A(Address Rs)
+   Instruction Type:      MAPPING
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_dcfetch_A __builtin_HEXAGON_Y2_dcfetch
+
+/* ==========================================================================
+   Assembly Syntax:       dcinva(Rs32)
+   C Intrinsic Prototype: void Q6_dcinva_A(Address Rs)
+   Instruction Type:      ST
+   Execution Slots:       SLOT0
+   ========================================================================== */
+
+#define Q6_dcinva_A __builtin_HEXAGON_Y2_dcinva
+
+/* ==========================================================================
+   Assembly Syntax:       dczeroa(Rs32)
+   C Intrinsic Prototype: void Q6_dczeroa_A(Address Rs)
+   Instruction Type:      ST
+   Execution Slots:       SLOT0
+   ========================================================================== */
+
+#define Q6_dczeroa_A __builtin_HEXAGON_Y2_dczeroa
+
+/* ==========================================================================
+   Assembly Syntax:       l2fetch(Rs32,Rt32)
+   C Intrinsic Prototype: void Q6_l2fetch_AR(Address Rs, Word32 Rt)
+   Instruction Type:      ST
+   Execution Slots:       SLOT0
+   ========================================================================== */
+
+#define Q6_l2fetch_AR __builtin_HEXAGON_Y4_l2fetch
+
+/* ==========================================================================
+   Assembly Syntax:       l2fetch(Rs32,Rtt32)
+   C Intrinsic Prototype: void Q6_l2fetch_AP(Address Rs, Word64 Rtt)
+   Instruction Type:      ST
+   Execution Slots:       SLOT0
+   ========================================================================== */
+
+#define Q6_l2fetch_AP __builtin_HEXAGON_Y5_l2fetch
+
+#if __HEXAGON_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=rol(Rss32,#u6)
+   C Intrinsic Prototype: Word64 Q6_P_rol_PI(Word64 Rss, Word32 Iu6)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_rol_PI __builtin_HEXAGON_S6_rol_i_p
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HEXAGON_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Rxx32+=rol(Rss32,#u6)
+   C Intrinsic Prototype: Word64 Q6_P_rolacc_PI(Word64 Rxx, Word64 Rss, Word32 Iu6)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_rolacc_PI __builtin_HEXAGON_S6_rol_i_p_acc
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HEXAGON_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Rxx32&=rol(Rss32,#u6)
+   C Intrinsic Prototype: Word64 Q6_P_roland_PI(Word64 Rxx, Word64 Rss, Word32 Iu6)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_roland_PI __builtin_HEXAGON_S6_rol_i_p_and
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HEXAGON_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Rxx32-=rol(Rss32,#u6)
+   C Intrinsic Prototype: Word64 Q6_P_rolnac_PI(Word64 Rxx, Word64 Rss, Word32 Iu6)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_rolnac_PI __builtin_HEXAGON_S6_rol_i_p_nac
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HEXAGON_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Rxx32|=rol(Rss32,#u6)
+   C Intrinsic Prototype: Word64 Q6_P_rolor_PI(Word64 Rxx, Word64 Rss, Word32 Iu6)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_rolor_PI __builtin_HEXAGON_S6_rol_i_p_or
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HEXAGON_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Rxx32^=rol(Rss32,#u6)
+   C Intrinsic Prototype: Word64 Q6_P_rolxacc_PI(Word64 Rxx, Word64 Rss, Word32 Iu6)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_rolxacc_PI __builtin_HEXAGON_S6_rol_i_p_xacc
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HEXAGON_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Rd32=rol(Rs32,#u5)
+   C Intrinsic Prototype: Word32 Q6_R_rol_RI(Word32 Rs, Word32 Iu5)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_rol_RI __builtin_HEXAGON_S6_rol_i_r
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HEXAGON_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Rx32+=rol(Rs32,#u5)
+   C Intrinsic Prototype: Word32 Q6_R_rolacc_RI(Word32 Rx, Word32 Rs, Word32 Iu5)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_rolacc_RI __builtin_HEXAGON_S6_rol_i_r_acc
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HEXAGON_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Rx32&=rol(Rs32,#u5)
+   C Intrinsic Prototype: Word32 Q6_R_roland_RI(Word32 Rx, Word32 Rs, Word32 Iu5)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_roland_RI __builtin_HEXAGON_S6_rol_i_r_and
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HEXAGON_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Rx32-=rol(Rs32,#u5)
+   C Intrinsic Prototype: Word32 Q6_R_rolnac_RI(Word32 Rx, Word32 Rs, Word32 Iu5)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_rolnac_RI __builtin_HEXAGON_S6_rol_i_r_nac
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HEXAGON_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Rx32|=rol(Rs32,#u5)
+   C Intrinsic Prototype: Word32 Q6_R_rolor_RI(Word32 Rx, Word32 Rs, Word32 Iu5)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_rolor_RI __builtin_HEXAGON_S6_rol_i_r_or
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HEXAGON_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Rx32^=rol(Rs32,#u5)
+   C Intrinsic Prototype: Word32 Q6_R_rolxacc_RI(Word32 Rx, Word32 Rs, Word32 Iu5)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_rolxacc_RI __builtin_HEXAGON_S6_rol_i_r_xacc
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HEXAGON_ARCH__ >= 62
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vabsdiffb(Rtt32,Rss32)
+   C Intrinsic Prototype: Word64 Q6_P_vabsdiffb_PP(Word64 Rtt, Word64 Rss)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vabsdiffb_PP __builtin_HEXAGON_M6_vabsdiffb
+#endif /* __HEXAGON_ARCH___ >= 62 */
+
+#if __HEXAGON_ARCH__ >= 62
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vabsdiffub(Rtt32,Rss32)
+   C Intrinsic Prototype: Word64 Q6_P_vabsdiffub_PP(Word64 Rtt, Word64 Rss)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vabsdiffub_PP __builtin_HEXAGON_M6_vabsdiffub
+#endif /* __HEXAGON_ARCH___ >= 62 */
+
+#if __HEXAGON_ARCH__ >= 62
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vsplatb(Rs32)
+   C Intrinsic Prototype: Word64 Q6_P_vsplatb_R(Word32 Rs)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vsplatb_R __builtin_HEXAGON_S6_vsplatrbp
+#endif /* __HEXAGON_ARCH___ >= 62 */
+
+#if __HEXAGON_ARCH__ >= 62
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vtrunehb(Rss32,Rtt32)
+   C Intrinsic Prototype: Word64 Q6_P_vtrunehb_PP(Word64 Rss, Word64 Rtt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vtrunehb_PP __builtin_HEXAGON_S6_vtrunehb_ppp
+#endif /* __HEXAGON_ARCH___ >= 62 */
+
+#if __HEXAGON_ARCH__ >= 62
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vtrunohb(Rss32,Rtt32)
+   C Intrinsic Prototype: Word64 Q6_P_vtrunohb_PP(Word64 Rss, Word64 Rtt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vtrunohb_PP __builtin_HEXAGON_S6_vtrunohb_ppp
+#endif /* __HEXAGON_ARCH___ >= 62 */
+
+#if __HEXAGON_ARCH__ >= 65
+/* ==========================================================================
+   Assembly Syntax:       Pd4=!any8(vcmpb.eq(Rss32,Rtt32))
+   C Intrinsic Prototype: Byte Q6_p_not_any8_vcmpb_eq_PP(Word64 Rss, Word64 Rtt)
+   Instruction Type:      ALU64
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_p_not_any8_vcmpb_eq_PP __builtin_HEXAGON_A6_vcmpbeq_notany
+#endif /* __HEXAGON_ARCH___ >= 65 */
+
+#if __HEXAGON_ARCH__ >= 66
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=dfadd(Rss32,Rtt32)
+   C Intrinsic Prototype: Float64 Q6_P_dfadd_PP(Float64 Rss, Float64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_dfadd_PP __builtin_HEXAGON_F2_dfadd
+#endif /* __HEXAGON_ARCH___ >= 66 */
+
+#if __HEXAGON_ARCH__ >= 66
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=dfsub(Rss32,Rtt32)
+   C Intrinsic Prototype: Float64 Q6_P_dfsub_PP(Float64 Rss, Float64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_dfsub_PP __builtin_HEXAGON_F2_dfsub
+#endif /* __HEXAGON_ARCH___ >= 66 */
+
+#if __HEXAGON_ARCH__ >= 66
+/* ==========================================================================
+   Assembly Syntax:       Rx32-=mpyi(Rs32,Rt32)
+   C Intrinsic Prototype: Word32 Q6_R_mpyinac_RR(Word32 Rx, Word32 Rs, Word32 Rt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mpyinac_RR __builtin_HEXAGON_M2_mnaci
+#endif /* __HEXAGON_ARCH___ >= 66 */
+
+#if __HEXAGON_ARCH__ >= 66
+/* ==========================================================================
+   Assembly Syntax:       Rd32=mask(#u5,#U5)
+   C Intrinsic Prototype: Word32 Q6_R_mask_II(Word32 Iu5, Word32 IU5)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_mask_II __builtin_HEXAGON_S2_mask
+#endif /* __HEXAGON_ARCH___ >= 66 */
+
+#if __HEXAGON_ARCH__ >= 67 && defined __HEXAGON_AUDIO__
+/* ==========================================================================
+   Assembly Syntax:       Rd32=clip(Rs32,#u5)
+   C Intrinsic Prototype: Word32 Q6_R_clip_RI(Word32 Rs, Word32 Iu5)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_R_clip_RI __builtin_HEXAGON_A7_clip
+#endif /* __HEXAGON_ARCH___ >= 67  && defined __HEXAGON_AUDIO__*/
+
+#if __HEXAGON_ARCH__ >= 67 && defined __HEXAGON_AUDIO__
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=cround(Rss32,#u6)
+   C Intrinsic Prototype: Word64 Q6_P_cround_PI(Word64 Rss, Word32 Iu6)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_cround_PI __builtin_HEXAGON_A7_croundd_ri
+#endif /* __HEXAGON_ARCH___ >= 67  && defined __HEXAGON_AUDIO__*/
+
+#if __HEXAGON_ARCH__ >= 67 && defined __HEXAGON_AUDIO__
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=cround(Rss32,Rt32)
+   C Intrinsic Prototype: Word64 Q6_P_cround_PR(Word64 Rss, Word32 Rt)
+   Instruction Type:      S_3op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_cround_PR __builtin_HEXAGON_A7_croundd_rr
+#endif /* __HEXAGON_ARCH___ >= 67  && defined __HEXAGON_AUDIO__*/
+
+#if __HEXAGON_ARCH__ >= 67 && defined __HEXAGON_AUDIO__
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vclip(Rss32,#u5)
+   C Intrinsic Prototype: Word64 Q6_P_vclip_PI(Word64 Rss, Word32 Iu5)
+   Instruction Type:      S_2op
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_vclip_PI __builtin_HEXAGON_A7_vclip
+#endif /* __HEXAGON_ARCH___ >= 67  && defined __HEXAGON_AUDIO__*/
+
+#if __HEXAGON_ARCH__ >= 67
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=dfmax(Rss32,Rtt32)
+   C Intrinsic Prototype: Float64 Q6_P_dfmax_PP(Float64 Rss, Float64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_dfmax_PP __builtin_HEXAGON_F2_dfmax
+#endif /* __HEXAGON_ARCH___ >= 67 */
+
+#if __HEXAGON_ARCH__ >= 67
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=dfmin(Rss32,Rtt32)
+   C Intrinsic Prototype: Float64 Q6_P_dfmin_PP(Float64 Rss, Float64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_dfmin_PP __builtin_HEXAGON_F2_dfmin
+#endif /* __HEXAGON_ARCH___ >= 67 */
+
+#if __HEXAGON_ARCH__ >= 67
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=dfmpyfix(Rss32,Rtt32)
+   C Intrinsic Prototype: Float64 Q6_P_dfmpyfix_PP(Float64 Rss, Float64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_dfmpyfix_PP __builtin_HEXAGON_F2_dfmpyfix
+#endif /* __HEXAGON_ARCH___ >= 67 */
+
+#if __HEXAGON_ARCH__ >= 67
+/* ==========================================================================
+   Assembly Syntax:       Rxx32+=dfmpyhh(Rss32,Rtt32)
+   C Intrinsic Prototype: Float64 Q6_P_dfmpyhhacc_PP(Float64 Rxx, Float64 Rss, Float64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_dfmpyhhacc_PP __builtin_HEXAGON_F2_dfmpyhh
+#endif /* __HEXAGON_ARCH___ >= 67 */
+
+#if __HEXAGON_ARCH__ >= 67
+/* ==========================================================================
+   Assembly Syntax:       Rxx32+=dfmpylh(Rss32,Rtt32)
+   C Intrinsic Prototype: Float64 Q6_P_dfmpylhacc_PP(Float64 Rxx, Float64 Rss, Float64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_dfmpylhacc_PP __builtin_HEXAGON_F2_dfmpylh
+#endif /* __HEXAGON_ARCH___ >= 67 */
+
+#if __HEXAGON_ARCH__ >= 67
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=dfmpyll(Rss32,Rtt32)
+   C Intrinsic Prototype: Float64 Q6_P_dfmpyll_PP(Float64 Rss, Float64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_P_dfmpyll_PP __builtin_HEXAGON_F2_dfmpyll
+#endif /* __HEXAGON_ARCH___ >= 67 */
+
+#if __HEXAGON_ARCH__ >= 67 && defined __HEXAGON_AUDIO__
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=cmpyiw(Rss32,Rtt32)
+   C Intrinsic Prototype: Word64 Q6_P_cmpyiw_PP(Word64 Rss, Word64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT3
+   ========================================================================== */
+
+#define Q6_P_cmpyiw_PP __builtin_HEXAGON_M7_dcmpyiw
+#endif /* __HEXAGON_ARCH___ >= 67  && defined __HEXAGON_AUDIO__*/
+
+#if __HEXAGON_ARCH__ >= 67 && defined __HEXAGON_AUDIO__
+/* ==========================================================================
+   Assembly Syntax:       Rxx32+=cmpyiw(Rss32,Rtt32)
+   C Intrinsic Prototype: Word64 Q6_P_cmpyiwacc_PP(Word64 Rxx, Word64 Rss, Word64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT3
+   ========================================================================== */
+
+#define Q6_P_cmpyiwacc_PP __builtin_HEXAGON_M7_dcmpyiw_acc
+#endif /* __HEXAGON_ARCH___ >= 67  && defined __HEXAGON_AUDIO__*/
+
+#if __HEXAGON_ARCH__ >= 67 && defined __HEXAGON_AUDIO__
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=cmpyiw(Rss32,Rtt32*)
+   C Intrinsic Prototype: Word64 Q6_P_cmpyiw_PP_conj(Word64 Rss, Word64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT3
+   ========================================================================== */
+
+#define Q6_P_cmpyiw_PP_conj __builtin_HEXAGON_M7_dcmpyiwc
+#endif /* __HEXAGON_ARCH___ >= 67  && defined __HEXAGON_AUDIO__*/
+
+#if __HEXAGON_ARCH__ >= 67 && defined __HEXAGON_AUDIO__
+/* ==========================================================================
+   Assembly Syntax:       Rxx32+=cmpyiw(Rss32,Rtt32*)
+   C Intrinsic Prototype: Word64 Q6_P_cmpyiwacc_PP_conj(Word64 Rxx, Word64 Rss, Word64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT3
+   ========================================================================== */
+
+#define Q6_P_cmpyiwacc_PP_conj __builtin_HEXAGON_M7_dcmpyiwc_acc
+#endif /* __HEXAGON_ARCH___ >= 67  && defined __HEXAGON_AUDIO__*/
+
+#if __HEXAGON_ARCH__ >= 67 && defined __HEXAGON_AUDIO__
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=cmpyrw(Rss32,Rtt32)
+   C Intrinsic Prototype: Word64 Q6_P_cmpyrw_PP(Word64 Rss, Word64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT3
+   ========================================================================== */
+
+#define Q6_P_cmpyrw_PP __builtin_HEXAGON_M7_dcmpyrw
+#endif /* __HEXAGON_ARCH___ >= 67  && defined __HEXAGON_AUDIO__*/
+
+#if __HEXAGON_ARCH__ >= 67 && defined __HEXAGON_AUDIO__
+/* ==========================================================================
+   Assembly Syntax:       Rxx32+=cmpyrw(Rss32,Rtt32)
+   C Intrinsic Prototype: Word64 Q6_P_cmpyrwacc_PP(Word64 Rxx, Word64 Rss, Word64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT3
+   ========================================================================== */
+
+#define Q6_P_cmpyrwacc_PP __builtin_HEXAGON_M7_dcmpyrw_acc
+#endif /* __HEXAGON_ARCH___ >= 67  && defined __HEXAGON_AUDIO__*/
+
+#if __HEXAGON_ARCH__ >= 67 && defined __HEXAGON_AUDIO__
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=cmpyrw(Rss32,Rtt32*)
+   C Intrinsic Prototype: Word64 Q6_P_cmpyrw_PP_conj(Word64 Rss, Word64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT3
+   ========================================================================== */
+
+#define Q6_P_cmpyrw_PP_conj __builtin_HEXAGON_M7_dcmpyrwc
+#endif /* __HEXAGON_ARCH___ >= 67  && defined __HEXAGON_AUDIO__*/
+
+#if __HEXAGON_ARCH__ >= 67 && defined __HEXAGON_AUDIO__
+/* ==========================================================================
+   Assembly Syntax:       Rxx32+=cmpyrw(Rss32,Rtt32*)
+   C Intrinsic Prototype: Word64 Q6_P_cmpyrwacc_PP_conj(Word64 Rxx, Word64 Rss, Word64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT3
+   ========================================================================== */
+
+#define Q6_P_cmpyrwacc_PP_conj __builtin_HEXAGON_M7_dcmpyrwc_acc
+#endif /* __HEXAGON_ARCH___ >= 67  && defined __HEXAGON_AUDIO__*/
+
+#if __HEXAGON_ARCH__ >= 67 && defined __HEXAGON_AUDIO__
+/* ==========================================================================
+   Assembly Syntax:       Rdd32=vdmpyw(Rss32,Rtt32)
+   C Intrinsic Prototype: Word64 Q6_P_vdmpyw_PP(Word64 Rss, Word64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT3
+   ========================================================================== */
+
+#define Q6_P_vdmpyw_PP __builtin_HEXAGON_M7_vdmpy
+#endif /* __HEXAGON_ARCH___ >= 67  && defined __HEXAGON_AUDIO__*/
+
+#if __HEXAGON_ARCH__ >= 67 && defined __HEXAGON_AUDIO__
+/* ==========================================================================
+   Assembly Syntax:       Rxx32+=vdmpyw(Rss32,Rtt32)
+   C Intrinsic Prototype: Word64 Q6_P_vdmpywacc_PP(Word64 Rxx, Word64 Rss, Word64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT3
+   ========================================================================== */
+
+#define Q6_P_vdmpywacc_PP __builtin_HEXAGON_M7_vdmpy_acc
+#endif /* __HEXAGON_ARCH___ >= 67  && defined __HEXAGON_AUDIO__*/
+
+#if __HEXAGON_ARCH__ >= 67 && defined __HEXAGON_AUDIO__
+/* ==========================================================================
+   Assembly Syntax:       Rd32=cmpyiw(Rss32,Rtt32):<<1:sat
+   C Intrinsic Prototype: Word32 Q6_R_cmpyiw_PP_s1_sat(Word64 Rss, Word64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT3
+   ========================================================================== */
+
+#define Q6_R_cmpyiw_PP_s1_sat __builtin_HEXAGON_M7_wcmpyiw
+#endif /* __HEXAGON_ARCH___ >= 67  && defined __HEXAGON_AUDIO__*/
+
+#if __HEXAGON_ARCH__ >= 67 && defined __HEXAGON_AUDIO__
+/* ==========================================================================
+   Assembly Syntax:       Rd32=cmpyiw(Rss32,Rtt32):<<1:rnd:sat
+   C Intrinsic Prototype: Word32 Q6_R_cmpyiw_PP_s1_rnd_sat(Word64 Rss, Word64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT3
+   ========================================================================== */
+
+#define Q6_R_cmpyiw_PP_s1_rnd_sat __builtin_HEXAGON_M7_wcmpyiw_rnd
+#endif /* __HEXAGON_ARCH___ >= 67  && defined __HEXAGON_AUDIO__*/
+
+#if __HEXAGON_ARCH__ >= 67 && defined __HEXAGON_AUDIO__
+/* ==========================================================================
+   Assembly Syntax:       Rd32=cmpyiw(Rss32,Rtt32*):<<1:sat
+   C Intrinsic Prototype: Word32 Q6_R_cmpyiw_PP_conj_s1_sat(Word64 Rss, Word64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT3
+   ========================================================================== */
+
+#define Q6_R_cmpyiw_PP_conj_s1_sat __builtin_HEXAGON_M7_wcmpyiwc
+#endif /* __HEXAGON_ARCH___ >= 67  && defined __HEXAGON_AUDIO__*/
+
+#if __HEXAGON_ARCH__ >= 67 && defined __HEXAGON_AUDIO__
+/* ==========================================================================
+   Assembly Syntax:       Rd32=cmpyiw(Rss32,Rtt32*):<<1:rnd:sat
+   C Intrinsic Prototype: Word32 Q6_R_cmpyiw_PP_conj_s1_rnd_sat(Word64 Rss, Word64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT3
+   ========================================================================== */
+
+#define Q6_R_cmpyiw_PP_conj_s1_rnd_sat __builtin_HEXAGON_M7_wcmpyiwc_rnd
+#endif /* __HEXAGON_ARCH___ >= 67  && defined __HEXAGON_AUDIO__*/
+
+#if __HEXAGON_ARCH__ >= 67 && defined __HEXAGON_AUDIO__
+/* ==========================================================================
+   Assembly Syntax:       Rd32=cmpyrw(Rss32,Rtt32):<<1:sat
+   C Intrinsic Prototype: Word32 Q6_R_cmpyrw_PP_s1_sat(Word64 Rss, Word64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT3
+   ========================================================================== */
+
+#define Q6_R_cmpyrw_PP_s1_sat __builtin_HEXAGON_M7_wcmpyrw
+#endif /* __HEXAGON_ARCH___ >= 67  && defined __HEXAGON_AUDIO__*/
+
+#if __HEXAGON_ARCH__ >= 67 && defined __HEXAGON_AUDIO__
+/* ==========================================================================
+   Assembly Syntax:       Rd32=cmpyrw(Rss32,Rtt32):<<1:rnd:sat
+   C Intrinsic Prototype: Word32 Q6_R_cmpyrw_PP_s1_rnd_sat(Word64 Rss, Word64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT3
+   ========================================================================== */
+
+#define Q6_R_cmpyrw_PP_s1_rnd_sat __builtin_HEXAGON_M7_wcmpyrw_rnd
+#endif /* __HEXAGON_ARCH___ >= 67  && defined __HEXAGON_AUDIO__*/
+
+#if __HEXAGON_ARCH__ >= 67 && defined __HEXAGON_AUDIO__
+/* ==========================================================================
+   Assembly Syntax:       Rd32=cmpyrw(Rss32,Rtt32*):<<1:sat
+   C Intrinsic Prototype: Word32 Q6_R_cmpyrw_PP_conj_s1_sat(Word64 Rss, Word64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT3
+   ========================================================================== */
+
+#define Q6_R_cmpyrw_PP_conj_s1_sat __builtin_HEXAGON_M7_wcmpyrwc
+#endif /* __HEXAGON_ARCH___ >= 67  && defined __HEXAGON_AUDIO__*/
+
+#if __HEXAGON_ARCH__ >= 67 && defined __HEXAGON_AUDIO__
+/* ==========================================================================
+   Assembly Syntax:       Rd32=cmpyrw(Rss32,Rtt32*):<<1:rnd:sat
+   C Intrinsic Prototype: Word32 Q6_R_cmpyrw_PP_conj_s1_rnd_sat(Word64 Rss, Word64 Rtt)
+   Instruction Type:      M
+   Execution Slots:       SLOT3
+   ========================================================================== */
+
+#define Q6_R_cmpyrw_PP_conj_s1_rnd_sat __builtin_HEXAGON_M7_wcmpyrwc_rnd
+#endif /* __HEXAGON_ARCH___ >= 67  && defined __HEXAGON_AUDIO__*/
+
+#if __HEXAGON_ARCH__ >= 68
+/* ==========================================================================
+   Assembly Syntax:       dmlink(Rs32,Rt32)
+   C Intrinsic Prototype: void Q6_dmlink_AA(Address Rs, Address Rt)
+   Instruction Type:      ST
+   Execution Slots:       SLOT0
+   ========================================================================== */
+
+#define Q6_dmlink_AA __builtin_HEXAGON_Y6_dmlink
+#endif /* __HEXAGON_ARCH___ >= 68 */
+
+#if __HEXAGON_ARCH__ >= 68
+/* ==========================================================================
+   Assembly Syntax:       Rd32=dmpause
+   C Intrinsic Prototype: Word32 Q6_R_dmpause()
+   Instruction Type:      ST
+   Execution Slots:       SLOT0
+   ========================================================================== */
+
+#define Q6_R_dmpause __builtin_HEXAGON_Y6_dmpause
+#endif /* __HEXAGON_ARCH___ >= 68 */
+
+#if __HEXAGON_ARCH__ >= 68
+/* ==========================================================================
+   Assembly Syntax:       Rd32=dmpoll
+   C Intrinsic Prototype: Word32 Q6_R_dmpoll()
+   Instruction Type:      ST
+   Execution Slots:       SLOT0
+   ========================================================================== */
+
+#define Q6_R_dmpoll __builtin_HEXAGON_Y6_dmpoll
+#endif /* __HEXAGON_ARCH___ >= 68 */
+
+#if __HEXAGON_ARCH__ >= 68
+/* ==========================================================================
+   Assembly Syntax:       dmresume(Rs32)
+   C Intrinsic Prototype: void Q6_dmresume_A(Address Rs)
+   Instruction Type:      ST
+   Execution Slots:       SLOT0
+   ========================================================================== */
+
+#define Q6_dmresume_A __builtin_HEXAGON_Y6_dmresume
+#endif /* __HEXAGON_ARCH___ >= 68 */
+
+#if __HEXAGON_ARCH__ >= 68
+/* ==========================================================================
+   Assembly Syntax:       dmstart(Rs32)
+   C Intrinsic Prototype: void Q6_dmstart_A(Address Rs)
+   Instruction Type:      ST
+   Execution Slots:       SLOT0
+   ========================================================================== */
+
+#define Q6_dmstart_A __builtin_HEXAGON_Y6_dmstart
+#endif /* __HEXAGON_ARCH___ >= 68 */
+
+#if __HEXAGON_ARCH__ >= 68
+/* ==========================================================================
+   Assembly Syntax:       Rd32=dmwait
+   C Intrinsic Prototype: Word32 Q6_R_dmwait()
+   Instruction Type:      ST
+   Execution Slots:       SLOT0
+   ========================================================================== */
+
+#define Q6_R_dmwait __builtin_HEXAGON_Y6_dmwait
+#endif /* __HEXAGON_ARCH___ >= 68 */
+
+#include <hexagon_circ_brev_intrinsics.h>
+#ifdef __HVX__
+#include <hvx_hexagon_protos.h>
+#endif /* __HVX__ */
+#endif
diff --git a/crates/stdarch-gen-hexagon-scalar/src/main.rs b/crates/stdarch-gen-hexagon-scalar/src/main.rs
new file mode 100644
index 0000000000..bbe28174ff
--- /dev/null
+++ b/crates/stdarch-gen-hexagon-scalar/src/main.rs
@@ -0,0 +1,672 @@
+//! Hexagon Scalar Code Generator
+//!
+//! This generator creates scalar.rs from the LLVM `hexagon_protos.h` header file.
+//! It parses the C intrinsic prototypes and generates Rust wrapper functions
+//! with appropriate attributes for all scalar (non-HVX) Hexagon intrinsics.
+//!
+//! The generated module provides ~901 scalar intrinsic wrappers covering:
+//! - Arithmetic, multiply, shift, saturate operations
+//! - Compare, floating-point, and other scalar operations
+//!
+//! Intrinsics with `void*`/`void**` parameters (circular-addressing) are skipped
+//! because they have no corresponding LLVM intrinsic.
+//!
+//! Usage:
+//!     cd crates/stdarch-gen-hexagon-scalar
+//!     cargo run
+//!     # Output is written to ../core_arch/src/hexagon/scalar.rs
+
+use regex::Regex;
+use std::collections::HashMap;
+use std::fs::File;
+use std::io::Write;
+use std::path::Path;
+
+/// Extract the instruction mnemonic from the assembly syntax string.
+///
+/// Examples:
+/// - `Rd32=abs(Rs32)` → Some("abs")
+/// - `Rd32=add(Rs32,Rt32):sat` → Some("add")
+/// - `Rx32+=mpy(Rs32,Rt32)` → Some("mpy")
+/// - `Rd32=dmpause` → Some("dmpause")
+/// - `dmlink(Rs32,Rt32)` → Some("dmlink")
+/// - `Rd32=Rs32` → None (simple transfer)
+/// - `Rx32.h=#u16` → None (immediate load)
+/// - `Rdd32=#s8` → None (immediate load)
+fn extract_instr_name(asm_syntax: &str) -> Option<String> {
+    // Find the operator: +=, -=, or =
+    let after_op = if let Some(pos) = asm_syntax.find("+=") {
+        &asm_syntax[pos + 2..]
+    } else if let Some(pos) = asm_syntax.find("-=") {
+        &asm_syntax[pos + 2..]
+    } else if let Some(pos) = asm_syntax.find('=') {
+        &asm_syntax[pos + 1..]
+    } else {
+        // No assignment operator: try function-call-style syntax like "dmlink(Rs32,Rt32)".
+        // The mnemonic is the leading lowercase identifier.
+        return extract_leading_mnemonic(asm_syntax);
+    };
+
+    // After the operator, we expect a lowercase letter starting the mnemonic.
+    // Skip if it starts with uppercase (register name like Rs32) or # (immediate).
+    extract_leading_mnemonic(after_op)
+}
+
+/// Extract a leading lowercase mnemonic from the given string.
+///
+/// Returns `Some(mnemonic)` if the string starts with a lowercase ASCII letter,
+/// collecting all subsequent alphanumeric/underscore characters. Returns `None`
+/// if the string is empty or starts with an uppercase letter, `#`, etc.
+fn extract_leading_mnemonic(s: &str) -> Option<String> {
+    let first_char = s.chars().next()?;
+    if !first_char.is_ascii_lowercase() {
+        return None;
+    }
+    let mnemonic: String = s
+        .chars()
+        .take_while(|c| c.is_ascii_alphanumeric() || *c == '_')
+        .collect();
+    if mnemonic.is_empty() {
+        None
+    } else {
+        Some(mnemonic)
+    }
+}
+
+/// The tracking issue number for the stdarch_hexagon feature
+const TRACKING_ISSUE: &str = "151523";
+
+/// LLVM version the header file is from (for reference)
+const LLVM_VERSION: &str = "22.1.0";
+
+/// Local header file path (checked into the repository)
+const HEADER_FILE: &str = "hexagon_protos.h";
+
+/// Rust type representation for scalar intrinsics
+#[derive(Debug, Clone, PartialEq)]
+enum RustType {
+    I32,
+    I64,
+    F32,
+    F64,
+    Unit,
+}
+
+impl RustType {
+    fn from_c_type(c_type: &str) -> Option<Self> {
+        match c_type.trim() {
+            "Word32" | "UWord32" | "Byte" | "Address" => Some(RustType::I32),
+            "Word64" | "UWord64" => Some(RustType::I64),
+            "Float32" => Some(RustType::F32),
+            "Float64" => Some(RustType::F64),
+            "void" => Some(RustType::Unit),
+            _ => None,
+        }
+    }
+
+    fn to_rust_str(&self) -> &'static str {
+        match self {
+            RustType::I32 => "i32",
+            RustType::I64 => "i64",
+            RustType::F32 => "f32",
+            RustType::F64 => "f64",
+            RustType::Unit => "()",
+        }
+    }
+}
+
+/// Information about an immediate operand parameter.
+///
+/// Detected from C prototype parameter names like `Is16` (signed 16-bit),
+/// `Iu5` (unsigned 5-bit), `IU5` (unsigned 5-bit secondary), `Iu6_2`
+/// (unsigned 6-bit with 2-bit alignment).
+#[derive(Debug, Clone)]
+struct ImmediateInfo {
+    /// Whether this is a signed immediate
+    signed: bool,
+    /// Number of bits in the immediate field
+    bits: u32,
+}
+
+/// Arch guard for an intrinsic
+#[derive(Debug, Clone, PartialEq)]
+enum ArchGuard {
+    /// No guard (base v5/v55 intrinsics)
+    None,
+    /// `#if __HEXAGON_ARCH__ >= N`
+    Arch(u32),
+    /// `#if __HEXAGON_ARCH__ >= N && defined __HEXAGON_AUDIO__`
+    ArchAudio(u32),
+}
+
+impl ArchGuard {
+    /// Returns a doc comment describing the required architecture version,
+    /// or None if no specific version is needed.
+    fn requires_doc(&self) -> Option<String> {
+        match self {
+            ArchGuard::None => Option::None,
+            ArchGuard::Arch(ver) => Some(format!("Requires: V{}", ver)),
+            ArchGuard::ArchAudio(ver) => Some(format!("Requires: V{}, Audio", ver)),
+        }
+    }
+
+    /// Returns a `#[cfg_attr(target_arch = "hexagon", target_feature(enable = "..."))]`
+    /// attribute string, or None for base intrinsics that have no user-facing feature gate.
+    fn target_feature_attr(&self) -> Option<String> {
+        match self {
+            ArchGuard::None => None,
+            ArchGuard::Arch(ver) => Some(format!(
+                "#[cfg_attr(target_arch = \"hexagon\", target_feature(enable = \"v{}\"))]",
+                ver
+            )),
+            ArchGuard::ArchAudio(ver) => Some(format!(
+                "#[cfg_attr(target_arch = \"hexagon\", target_feature(enable = \"v{},audio\"))]",
+                ver
+            )),
+        }
+    }
+}
+
+/// Parsed scalar intrinsic information
+#[derive(Debug, Clone)]
+struct ScalarIntrinsic {
+    /// Q6 name (e.g., "Q6_R_add_RR")
+    q6_name: String,
+    /// Builtin suffix (e.g., "A2_add") - from __builtin_HEXAGON_A2_add
+    builtin_name: String,
+    /// Assembly syntax
+    asm_syntax: String,
+    /// Instruction type
+    instr_type: String,
+    /// Execution slots
+    exec_slots: String,
+    /// Return type
+    return_type: RustType,
+    /// Parameters (name, type, optional immediate info)
+    params: Vec<(String, RustType, Option<ImmediateInfo>)>,
+    /// Architecture guard
+    arch_guard: ArchGuard,
+}
+
+impl ScalarIntrinsic {
+    /// Generate the LLVM link name: A2_add -> llvm.hexagon.A2.add
+    fn llvm_link_name(&self) -> String {
+        format!("llvm.hexagon.{}", self.builtin_name.replace('_', "."))
+    }
+
+    /// Generate the Rust function name: Q6_R_add_RR -> Q6_R_add_RR
+    ///
+    /// We preserve the original case because the Q6 naming convention uses
+    /// case to distinguish register types:
+    /// - `P` (uppercase) = 64-bit register pair (Word64)
+    /// - `p` (lowercase) = predicate register (Byte)
+    fn rust_fn_name(&self) -> String {
+        self.q6_name.clone()
+    }
+
+    /// Generate the extern function name: A2_add -> hexagon_A2_add
+    fn extern_fn_name(&self) -> String {
+        format!("hexagon_{}", self.builtin_name)
+    }
+}
+
+/// Read the local header file
+fn read_header(crate_dir: &Path) -> Result<String, String> {
+    let header_path = crate_dir.join(HEADER_FILE);
+    println!("Reading scalar header from: {}", header_path.display());
+    println!("  (LLVM version: {})", LLVM_VERSION);
+
+    std::fs::read_to_string(&header_path).map_err(|e| {
+        format!(
+            "Failed to read header file {}: {}",
+            header_path.display(),
+            e
+        )
+    })
+}
+
+/// Detect whether a C parameter name represents an immediate operand.
+///
+/// C prototype parameter names follow the pattern `I[usUS]\d+` for immediates:
+/// - `Is16` → signed 16-bit
+/// - `Iu5` → unsigned 5-bit
+/// - `IS8` → signed 8-bit (secondary)
+/// - `IU5` → unsigned 5-bit (secondary)
+/// - `Iu6_2` → unsigned 6-bit (with alignment suffix)
+fn detect_immediate(original_name: &str, imm_re: &Regex) -> Option<ImmediateInfo> {
+    imm_re.captures(original_name).map(|caps| {
+        let sign_char = &caps[1];
+        let bits: u32 = caps[2].parse().unwrap();
+        ImmediateInfo {
+            signed: sign_char == "s" || sign_char == "S",
+            bits,
+        }
+    })
+}
+
+/// Parse a C function prototype to extract return type and parameters
+fn parse_prototype(
+    prototype: &str,
+    proto_re: &Regex,
+    param_re: &Regex,
+    imm_re: &Regex,
+) -> Option<(RustType, Vec<(String, RustType, Option<ImmediateInfo>)>)> {
+    if let Some(caps) = proto_re.captures(prototype) {
+        let return_type_str = caps[1].trim();
+        let params_str = &caps[2];
+
+        // Skip if return type is unknown
+        let return_type = RustType::from_c_type(return_type_str)?;
+
+        let mut params = Vec::new();
+        if !params_str.trim().is_empty() {
+            let mut name_counts: HashMap<String, u32> = HashMap::new();
+            for param in params_str.split(',') {
+                let param = param.trim();
+                if let Some(pcaps) = param_re.captures(param) {
+                    let ptype_str = &pcaps[1];
+                    let original_name = &pcaps[2];
+                    let base_name = original_name.to_lowercase();
+
+                    // Skip intrinsics with void* or void** params
+                    if ptype_str.contains("void") {
+                        return None;
+                    }
+
+                    if let Some(ptype) = RustType::from_c_type(ptype_str) {
+                        // Detect immediate operands from the original C name
+                        let imm_info = detect_immediate(original_name, imm_re);
+
+                        // De-duplicate parameter names by appending a suffix
+                        let count = name_counts.entry(base_name.clone()).or_insert(0);
+                        *count += 1;
+                        let pname = if *count > 1 {
+                            format!("{}_{}", base_name, count)
+                        } else {
+                            base_name
+                        };
+                        params.push((pname, ptype, imm_info));
+                    } else {
+                        return None; // Unknown type
+                    }
+                }
+            }
+        }
+
+        Some((return_type, params))
+    } else {
+        None
+    }
+}
+
+/// Parse the header file to extract all scalar intrinsics
+fn parse_header(content: &str) -> Vec<ScalarIntrinsic> {
+    let mut intrinsics = Vec::new();
+
+    // Pre-compile all regexes once
+    let arch_guard_re = Regex::new(r"#if __HEXAGON_ARCH__ >= (\d+)(.*)").unwrap();
+    let q6_define_re = Regex::new(r"#define\s+(Q6_\w+)\s+__builtin_HEXAGON_(\w+)").unwrap();
+    let proto_re = Regex::new(r"(\w+)\s+Q6_\w+\(([^)]*)\)").unwrap();
+    let param_re = Regex::new(r"(\w+\*{0,2})\s+(\w+)").unwrap();
+    let imm_re = Regex::new(r"^I([uUsS])(\d+)").unwrap();
+
+    let lines: Vec<&str> = content.lines().collect();
+    let mut current_guard = ArchGuard::None;
+    let mut i = 0;
+
+    while i < lines.len() {
+        let line = lines[i].trim();
+
+        // Track #if guards
+        if let Some(caps) = arch_guard_re.captures(line) {
+            let arch_ver: u32 = caps[1].parse().unwrap_or(0);
+            let rest = &caps[2];
+            if rest.contains("__HEXAGON_AUDIO__") {
+                current_guard = ArchGuard::ArchAudio(arch_ver);
+            } else {
+                current_guard = ArchGuard::Arch(arch_ver);
+            }
+            i += 1;
+            continue;
+        }
+
+        // Track #endif to reset guard
+        if line.starts_with("#endif")
+            && !line.contains("__HEXAGON_PROTOS_H_")
+            && !line.contains("__HVX__")
+        {
+            current_guard = ArchGuard::None;
+            i += 1;
+            continue;
+        }
+
+        // Look for comment blocks with Assembly Syntax
+        if line.contains("Assembly Syntax:") {
+            let mut asm_syntax = String::new();
+            let mut prototype = String::new();
+            let mut instr_type = String::new();
+            let mut exec_slots = String::new();
+
+            // Parse the comment block
+            let mut j = i;
+            while j < lines.len() && !lines[j].trim().starts_with("#define") {
+                let cline = lines[j];
+                if cline.contains("Assembly Syntax:") {
+                    if let Some(pos) = cline.find("Assembly Syntax:") {
+                        asm_syntax = cline[pos + 16..].trim().to_string();
+                    }
+                } else if cline.contains("C Intrinsic Prototype:") {
+                    if let Some(pos) = cline.find("C Intrinsic Prototype:") {
+                        prototype = cline[pos + 22..].trim().to_string();
+                    }
+                } else if cline.contains("Instruction Type:") {
+                    if let Some(pos) = cline.find("Instruction Type:") {
+                        instr_type = cline[pos + 17..].trim().to_string();
+                    }
+                } else if cline.contains("Execution Slots:") {
+                    if let Some(pos) = cline.find("Execution Slots:") {
+                        exec_slots = cline[pos + 16..].trim().to_string();
+                    }
+                }
+                j += 1;
+            }
+
+            // Find the #define line
+            while j < lines.len() && !lines[j].trim().starts_with("#define") {
+                j += 1;
+            }
+
+            if j < lines.len() {
+                let define_line = lines[j];
+
+                if let Some(caps) = q6_define_re.captures(define_line) {
+                    let q6_name = caps[1].to_string();
+                    let builtin_name = caps[2].to_string();
+
+                    // Parse the C prototype
+                    if let Some((return_type, params)) =
+                        parse_prototype(&prototype, &proto_re, &param_re, &imm_re)
+                    {
+                        intrinsics.push(ScalarIntrinsic {
+                            q6_name,
+                            builtin_name,
+                            asm_syntax,
+                            instr_type,
+                            exec_slots,
+                            return_type,
+                            params,
+                            arch_guard: current_guard.clone(),
+                        });
+                    }
+                }
+            }
+            i = j + 1;
+            continue;
+        }
+
+        i += 1;
+    }
+
+    intrinsics
+}
+
+/// Generate the module documentation
+fn generate_module_doc() -> String {
+    r#"//! Hexagon scalar intrinsics
+//!
+//! This module provides intrinsics for scalar (non-HVX) Hexagon DSP operations,
+//! including arithmetic, multiply, shift, saturate, compare, and floating-point
+//! operations.
+//!
+//! [Hexagon V68 Programmer's Reference Manual](https://docs.qualcomm.com/doc/80-N2040-45)
+//!
+//! ## Naming Convention
+//!
+//! Function names preserve the original Q6 naming case because the convention
+//! uses case to distinguish register types:
+//! - `P` (uppercase) = 64-bit register pair (`Word64`)
+//! - `p` (lowercase) = predicate register (`Byte`)
+//!
+//! For example, `Q6_P_and_PP` operates on 64-bit pairs while `Q6_p_and_pp`
+//! operates on predicate registers.
+//!
+//! ## Architecture Versions
+//!
+//! Most scalar intrinsics are available on all Hexagon architectures.
+//! Some intrinsics require specific architecture versions (v60, v62, v65,
+//! v66, v67, v68, or v67+audio) and carry
+//! `#[target_feature(enable = "v68")]` (or the appropriate version).
+//! Enable these with `-C target-feature=+v68` or by setting the target CPU
+//! via `-C target-cpu=hexagonv68`.
+//!
+//! Each version includes all features from previous versions.
+
+#![allow(non_snake_case)]
+
+#[cfg(test)]
+use stdarch_test::assert_instr;
+"#
+    .to_string()
+}
+
+/// Generate the extern block with LLVM intrinsic declarations
+fn generate_extern_block(intrinsics: &[ScalarIntrinsic]) -> String {
+    let mut output = String::new();
+
+    output.push_str("// LLVM intrinsic declarations for Hexagon scalar operations\n");
+    output.push_str("#[allow(improper_ctypes)]\n");
+    output.push_str("unsafe extern \"unadjusted\" {\n");
+
+    for info in intrinsics {
+        let link_name = info.llvm_link_name();
+        let fn_name = info.extern_fn_name();
+
+        let params_str = if info.params.is_empty() {
+            String::new()
+        } else {
+            info.params
+                .iter()
+                .map(|(_, t, _)| format!("_: {}", t.to_rust_str()))
+                .collect::<Vec<_>>()
+                .join(", ")
+        };
+
+        let return_str = if info.return_type == RustType::Unit {
+            String::new()
+        } else {
+            format!(" -> {}", info.return_type.to_rust_str())
+        };
+
+        output.push_str(&format!(
+            "    #[link_name = \"{}\"]\n    fn {}({}){return_str};\n",
+            link_name, fn_name, params_str
+        ));
+    }
+
+    output.push_str("}\n");
+    output
+}
+
+/// Generate wrapper functions for all intrinsics
+fn generate_functions(intrinsics: &[ScalarIntrinsic]) -> String {
+    let mut output = String::new();
+
+    for info in intrinsics {
+        let rust_name = info.rust_fn_name();
+        let extern_name = info.extern_fn_name();
+
+        // Collect immediate parameter info: (original_index, const_name, ImmediateInfo)
+        let imm_params: Vec<(usize, String, &ImmediateInfo)> = info
+            .params
+            .iter()
+            .enumerate()
+            .filter_map(|(i, (name, _, imm))| imm.as_ref().map(|im| (i, name.to_uppercase(), im)))
+            .collect();
+
+        // Doc comment
+        output.push_str(&format!("/// `{}`\n", info.asm_syntax));
+        output.push_str("///\n");
+        output.push_str(&format!("/// Instruction Type: {}\n", info.instr_type));
+        output.push_str(&format!("/// Execution Slots: {}\n", info.exec_slots));
+        if let Some(req) = info.arch_guard.requires_doc() {
+            output.push_str(&format!("/// {}\n", req));
+        }
+
+        // Attributes
+        output.push_str("#[inline(always)]\n");
+        if let Some(tf_attr) = info.arch_guard.target_feature_attr() {
+            output.push_str(&format!("{}\n", tf_attr));
+        }
+
+        // Immediate parameters become const generics but are passed as positional
+        // arguments at the call site: Q6_R_add_RI(rs, 42) rather than Q6_R_add_RI::<42>(rs).
+        // This matches the assembly syntax where the immediate is an operand.
+        if !imm_params.is_empty() {
+            let indices: Vec<String> = imm_params.iter().map(|(i, _, _)| i.to_string()).collect();
+            output.push_str(&format!(
+                "#[rustc_legacy_const_generics({})]\n",
+                indices.join(", ")
+            ));
+        }
+
+        if let Some(instr) = extract_instr_name(&info.asm_syntax) {
+            if imm_params.is_empty() {
+                output.push_str(&format!("#[cfg_attr(test, assert_instr({}))]\n", instr));
+            } else {
+                // Provide default values for const generics in assert_instr
+                let defaults: Vec<String> = imm_params
+                    .iter()
+                    .map(|(_, name, _)| format!("{} = 0", name))
+                    .collect();
+                output.push_str(&format!(
+                    "#[cfg_attr(test, assert_instr({}, {}))]\n",
+                    instr,
+                    defaults.join(", ")
+                ));
+            }
+        }
+
+        output.push_str(&format!(
+            "#[unstable(feature = \"stdarch_hexagon\", issue = \"{}\")]\n",
+            TRACKING_ISSUE
+        ));
+
+        // Function signature: regular params exclude immediates, const generics added
+        let regular_params_str = info
+            .params
+            .iter()
+            .filter(|(_, _, imm)| imm.is_none())
+            .map(|(name, ty, _)| format!("{}: {}", name, ty.to_rust_str()))
+            .collect::<Vec<_>>()
+            .join(", ");
+
+        let return_str = if info.return_type == RustType::Unit {
+            String::new()
+        } else {
+            format!(" -> {}", info.return_type.to_rust_str())
+        };
+
+        if imm_params.is_empty() {
+            output.push_str(&format!(
+                "pub unsafe fn {}({}){} {{\n",
+                rust_name, regular_params_str, return_str
+            ));
+        } else {
+            let const_generics: Vec<String> = imm_params
+                .iter()
+                .map(|(_, name, imm)| {
+                    let ty = if imm.signed { "i32" } else { "u32" };
+                    format!("const {}: {}", name, ty)
+                })
+                .collect();
+            output.push_str(&format!(
+                "pub unsafe fn {}<{}>({}){} {{\n",
+                rust_name,
+                const_generics.join(", "),
+                regular_params_str,
+                return_str
+            ));
+        }
+
+        // Function body: static assertions then call
+        for (_, const_name, imm_info) in &imm_params {
+            if imm_info.signed {
+                output.push_str(&format!(
+                    "    static_assert_simm_bits!({}, {});\n",
+                    const_name, imm_info.bits
+                ));
+            } else {
+                output.push_str(&format!(
+                    "    static_assert_uimm_bits!({}, {});\n",
+                    const_name, imm_info.bits
+                ));
+            }
+        }
+
+        // Call args: use original order, using const generic names for immediates.
+        // Unsigned const generics (u32) need a cast to i32 for the extern call.
+        let args_str = info
+            .params
+            .iter()
+            .map(|(name, _, imm)| match imm {
+                Some(info) if !info.signed => format!("{} as i32", name.to_uppercase()),
+                Some(_) => name.to_uppercase(),
+                None => name.clone(),
+            })
+            .collect::<Vec<_>>()
+            .join(", ");
+
+        output.push_str(&format!("    {}({})\n", extern_name, args_str));
+        output.push_str("}\n\n");
+    }
+
+    output
+}
+
+/// Generate the complete scalar.rs file
+fn generate_scalar_file(intrinsics: &[ScalarIntrinsic], output_path: &Path) -> Result<(), String> {
+    let mut output =
+        File::create(output_path).map_err(|e| format!("Failed to create output: {}", e))?;
+
+    writeln!(output, "{}", generate_module_doc()).map_err(|e| e.to_string())?;
+    writeln!(output, "").map_err(|e| e.to_string())?;
+    writeln!(output, "{}", generate_extern_block(intrinsics)).map_err(|e| e.to_string())?;
+    writeln!(output, "{}", generate_functions(intrinsics)).map_err(|e| e.to_string())?;
+
+    // Flush before running rustfmt
+    drop(output);
+
+    // Run rustfmt on the generated file
+    let status = std::process::Command::new("rustfmt")
+        .arg(output_path)
+        .status()
+        .map_err(|e| format!("Failed to run rustfmt: {}", e))?;
+
+    if !status.success() {
+        return Err("rustfmt failed".to_string());
+    }
+
+    Ok(())
+}
+
+fn main() -> Result<(), String> {
+    println!("=== Hexagon Scalar Code Generator ===\n");
+
+    let crate_dir = std::env::var("CARGO_MANIFEST_DIR")
+        .map(std::path::PathBuf::from)
+        .unwrap_or_else(|_| std::env::current_dir().unwrap());
+
+    let header_content = read_header(&crate_dir)?;
+    println!("Read {} bytes", header_content.len());
+
+    let intrinsics = parse_header(&header_content);
+    println!("Parsed {} scalar intrinsics", intrinsics.len());
+
+    let hexagon_dir = crate_dir.join("../core_arch/src/hexagon");
+    let scalar_path = hexagon_dir.join("scalar.rs");
+
+    generate_scalar_file(&intrinsics, &scalar_path)?;
+    println!("Generated scalar.rs at {}", scalar_path.display());
+
+    Ok(())
+}
diff --git a/crates/stdarch-gen-hexagon/Cargo.toml b/crates/stdarch-gen-hexagon/Cargo.toml
new file mode 100644
index 0000000000..397c7816f8
--- /dev/null
+++ b/crates/stdarch-gen-hexagon/Cargo.toml
@@ -0,0 +1,9 @@
+[package]
+name = "stdarch-gen-hexagon"
+version = "0.1.0"
+authors = ["The Rust Project Developers"]
+license = "MIT OR Apache-2.0"
+edition = "2021"
+
+[dependencies]
+regex = "1.10"
diff --git a/crates/stdarch-gen-hexagon/hvx_hexagon_protos.h b/crates/stdarch-gen-hexagon/hvx_hexagon_protos.h
new file mode 100644
index 0000000000..19309a40d6
--- /dev/null
+++ b/crates/stdarch-gen-hexagon/hvx_hexagon_protos.h
@@ -0,0 +1,6003 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// Automatically generated file, do not edit!
+//===----------------------------------------------------------------------===//
+
+
+#ifndef _HVX_HEXAGON_PROTOS_H_
+#define _HVX_HEXAGON_PROTOS_H_ 1
+
+#ifdef __HVX__
+#if __HVX_LENGTH__ == 128
+#define __BUILTIN_VECTOR_WRAP(a) a ## _128B
+#else
+#define __BUILTIN_VECTOR_WRAP(a) a
+#endif
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Rd32=vextract(Vu32,Rs32)
+   C Intrinsic Prototype: Word32 Q6_R_vextract_VR(HVX_Vector Vu, Word32 Rs)
+   Instruction Type:      LD
+   Execution Slots:       SLOT0
+   ========================================================================== */
+
+#define Q6_R_vextract_VR(Vu,Rs) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_extractw)(Vu,Rs)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32=hi(Vss32)
+   C Intrinsic Prototype: HVX_Vector Q6_V_hi_W(HVX_VectorPair Vss)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_V_hi_W(Vss) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_hi)(Vss)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32=lo(Vss32)
+   C Intrinsic Prototype: HVX_Vector Q6_V_lo_W(HVX_VectorPair Vss)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_V_lo_W(Vss) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_lo)(Vss)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32=vsplat(Rt32)
+   C Intrinsic Prototype: HVX_Vector Q6_V_vsplat_R(Word32 Rt)
+   Instruction Type:      CVI_VX_LATE
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_V_vsplat_R(Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_lvsplatw)(Rt)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Qd4=and(Qs4,Qt4)
+   C Intrinsic Prototype: HVX_VectorPred Q6_Q_and_QQ(HVX_VectorPred Qs, HVX_VectorPred Qt)
+   Instruction Type:      CVI_VA_DV
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Q_and_QQ(Qs,Qt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_pred_and)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1),__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qt),-1))),-1)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Qd4=and(Qs4,!Qt4)
+   C Intrinsic Prototype: HVX_VectorPred Q6_Q_and_QQn(HVX_VectorPred Qs, HVX_VectorPred Qt)
+   Instruction Type:      CVI_VA_DV
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Q_and_QQn(Qs,Qt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_pred_and_n)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1),__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qt),-1))),-1)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Qd4=not(Qs4)
+   C Intrinsic Prototype: HVX_VectorPred Q6_Q_not_Q(HVX_VectorPred Qs)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Q_not_Q(Qs) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_pred_not)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1))),-1)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Qd4=or(Qs4,Qt4)
+   C Intrinsic Prototype: HVX_VectorPred Q6_Q_or_QQ(HVX_VectorPred Qs, HVX_VectorPred Qt)
+   Instruction Type:      CVI_VA_DV
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Q_or_QQ(Qs,Qt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_pred_or)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1),__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qt),-1))),-1)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Qd4=or(Qs4,!Qt4)
+   C Intrinsic Prototype: HVX_VectorPred Q6_Q_or_QQn(HVX_VectorPred Qs, HVX_VectorPred Qt)
+   Instruction Type:      CVI_VA_DV
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Q_or_QQn(Qs,Qt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_pred_or_n)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1),__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qt),-1))),-1)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Qd4=vsetq(Rt32)
+   C Intrinsic Prototype: HVX_VectorPred Q6_Q_vsetq_R(Word32 Rt)
+   Instruction Type:      CVI_VP
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Q_vsetq_R(Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_pred_scalar2)(Rt)),-1)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Qd4=xor(Qs4,Qt4)
+   C Intrinsic Prototype: HVX_VectorPred Q6_Q_xor_QQ(HVX_VectorPred Qs, HVX_VectorPred Qt)
+   Instruction Type:      CVI_VA_DV
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Q_xor_QQ(Qs,Qt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_pred_xor)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1),__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qt),-1))),-1)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       if (!Qv4) vmem(Rt32+#s4)=Vs32
+   C Intrinsic Prototype: void Q6_vmem_QnRIV(HVX_VectorPred Qv, HVX_Vector* Rt, HVX_Vector Vs)
+   Instruction Type:      CVI_VM_ST
+   Execution Slots:       SLOT0
+   ========================================================================== */
+
+#define Q6_vmem_QnRIV(Qv,Rt,Vs) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vS32b_nqpred_ai)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Rt,Vs)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       if (!Qv4) vmem(Rt32+#s4):nt=Vs32
+   C Intrinsic Prototype: void Q6_vmem_QnRIV_nt(HVX_VectorPred Qv, HVX_Vector* Rt, HVX_Vector Vs)
+   Instruction Type:      CVI_VM_ST
+   Execution Slots:       SLOT0
+   ========================================================================== */
+
+#define Q6_vmem_QnRIV_nt(Qv,Rt,Vs) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vS32b_nt_nqpred_ai)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Rt,Vs)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       if (Qv4) vmem(Rt32+#s4):nt=Vs32
+   C Intrinsic Prototype: void Q6_vmem_QRIV_nt(HVX_VectorPred Qv, HVX_Vector* Rt, HVX_Vector Vs)
+   Instruction Type:      CVI_VM_ST
+   Execution Slots:       SLOT0
+   ========================================================================== */
+
+#define Q6_vmem_QRIV_nt(Qv,Rt,Vs) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vS32b_nt_qpred_ai)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Rt,Vs)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       if (Qv4) vmem(Rt32+#s4)=Vs32
+   C Intrinsic Prototype: void Q6_vmem_QRIV(HVX_VectorPred Qv, HVX_Vector* Rt, HVX_Vector Vs)
+   Instruction Type:      CVI_VM_ST
+   Execution Slots:       SLOT0
+   ========================================================================== */
+
+#define Q6_vmem_QRIV(Qv,Rt,Vs) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vS32b_qpred_ai)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Rt,Vs)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.uh=vabsdiff(Vu32.h,Vv32.h)
+   C Intrinsic Prototype: HVX_Vector Q6_Vuh_vabsdiff_VhVh(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VX
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Vuh_vabsdiff_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsdiffh)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.ub=vabsdiff(Vu32.ub,Vv32.ub)
+   C Intrinsic Prototype: HVX_Vector Q6_Vub_vabsdiff_VubVub(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VX
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Vub_vabsdiff_VubVub(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsdiffub)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.uh=vabsdiff(Vu32.uh,Vv32.uh)
+   C Intrinsic Prototype: HVX_Vector Q6_Vuh_vabsdiff_VuhVuh(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VX
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Vuh_vabsdiff_VuhVuh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsdiffuh)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.uw=vabsdiff(Vu32.w,Vv32.w)
+   C Intrinsic Prototype: HVX_Vector Q6_Vuw_vabsdiff_VwVw(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VX
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Vuw_vabsdiff_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsdiffw)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.h=vabs(Vu32.h)
+   C Intrinsic Prototype: HVX_Vector Q6_Vh_vabs_Vh(HVX_Vector Vu)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vh_vabs_Vh(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsh)(Vu)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.h=vabs(Vu32.h):sat
+   C Intrinsic Prototype: HVX_Vector Q6_Vh_vabs_Vh_sat(HVX_Vector Vu)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vh_vabs_Vh_sat(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsh_sat)(Vu)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.w=vabs(Vu32.w)
+   C Intrinsic Prototype: HVX_Vector Q6_Vw_vabs_Vw(HVX_Vector Vu)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vw_vabs_Vw(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsw)(Vu)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.w=vabs(Vu32.w):sat
+   C Intrinsic Prototype: HVX_Vector Q6_Vw_vabs_Vw_sat(HVX_Vector Vu)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vw_vabs_Vw_sat(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsw_sat)(Vu)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.b=vadd(Vu32.b,Vv32.b)
+   C Intrinsic Prototype: HVX_Vector Q6_Vb_vadd_VbVb(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vb_vadd_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddb)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vdd32.b=vadd(Vuu32.b,Vvv32.b)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Wb_vadd_WbWb(HVX_VectorPair Vuu, HVX_VectorPair Vvv)
+   Instruction Type:      CVI_VA_DV
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Wb_vadd_WbWb(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddb_dv)(Vuu,Vvv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       if (!Qv4) Vx32.b+=Vu32.b
+   C Intrinsic Prototype: HVX_Vector Q6_Vb_condacc_QnVbVb(HVX_VectorPred Qv, HVX_Vector Vx, HVX_Vector Vu)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vb_condacc_QnVbVb(Qv,Vx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddbnq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vx,Vu)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       if (Qv4) Vx32.b+=Vu32.b
+   C Intrinsic Prototype: HVX_Vector Q6_Vb_condacc_QVbVb(HVX_VectorPred Qv, HVX_Vector Vx, HVX_Vector Vu)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vb_condacc_QVbVb(Qv,Vx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddbq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vx,Vu)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.h=vadd(Vu32.h,Vv32.h)
+   C Intrinsic Prototype: HVX_Vector Q6_Vh_vadd_VhVh(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vh_vadd_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddh)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vdd32.h=vadd(Vuu32.h,Vvv32.h)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vadd_WhWh(HVX_VectorPair Vuu, HVX_VectorPair Vvv)
+   Instruction Type:      CVI_VA_DV
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Wh_vadd_WhWh(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddh_dv)(Vuu,Vvv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       if (!Qv4) Vx32.h+=Vu32.h
+   C Intrinsic Prototype: HVX_Vector Q6_Vh_condacc_QnVhVh(HVX_VectorPred Qv, HVX_Vector Vx, HVX_Vector Vu)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vh_condacc_QnVhVh(Qv,Vx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddhnq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vx,Vu)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       if (Qv4) Vx32.h+=Vu32.h
+   C Intrinsic Prototype: HVX_Vector Q6_Vh_condacc_QVhVh(HVX_VectorPred Qv, HVX_Vector Vx, HVX_Vector Vu)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vh_condacc_QVhVh(Qv,Vx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddhq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vx,Vu)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.h=vadd(Vu32.h,Vv32.h):sat
+   C Intrinsic Prototype: HVX_Vector Q6_Vh_vadd_VhVh_sat(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vh_vadd_VhVh_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddhsat)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vdd32.h=vadd(Vuu32.h,Vvv32.h):sat
+   C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vadd_WhWh_sat(HVX_VectorPair Vuu, HVX_VectorPair Vvv)
+   Instruction Type:      CVI_VA_DV
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Wh_vadd_WhWh_sat(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddhsat_dv)(Vuu,Vvv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vdd32.w=vadd(Vu32.h,Vv32.h)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vadd_VhVh(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Ww_vadd_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddhw)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vdd32.h=vadd(Vu32.ub,Vv32.ub)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vadd_VubVub(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Wh_vadd_VubVub(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddubh)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.ub=vadd(Vu32.ub,Vv32.ub):sat
+   C Intrinsic Prototype: HVX_Vector Q6_Vub_vadd_VubVub_sat(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vub_vadd_VubVub_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddubsat)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vdd32.ub=vadd(Vuu32.ub,Vvv32.ub):sat
+   C Intrinsic Prototype: HVX_VectorPair Q6_Wub_vadd_WubWub_sat(HVX_VectorPair Vuu, HVX_VectorPair Vvv)
+   Instruction Type:      CVI_VA_DV
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Wub_vadd_WubWub_sat(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddubsat_dv)(Vuu,Vvv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.uh=vadd(Vu32.uh,Vv32.uh):sat
+   C Intrinsic Prototype: HVX_Vector Q6_Vuh_vadd_VuhVuh_sat(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vuh_vadd_VuhVuh_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadduhsat)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vdd32.uh=vadd(Vuu32.uh,Vvv32.uh):sat
+   C Intrinsic Prototype: HVX_VectorPair Q6_Wuh_vadd_WuhWuh_sat(HVX_VectorPair Vuu, HVX_VectorPair Vvv)
+   Instruction Type:      CVI_VA_DV
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Wuh_vadd_WuhWuh_sat(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadduhsat_dv)(Vuu,Vvv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vdd32.w=vadd(Vu32.uh,Vv32.uh)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vadd_VuhVuh(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Ww_vadd_VuhVuh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadduhw)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.w=vadd(Vu32.w,Vv32.w)
+   C Intrinsic Prototype: HVX_Vector Q6_Vw_vadd_VwVw(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vw_vadd_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddw)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vdd32.w=vadd(Vuu32.w,Vvv32.w)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vadd_WwWw(HVX_VectorPair Vuu, HVX_VectorPair Vvv)
+   Instruction Type:      CVI_VA_DV
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Ww_vadd_WwWw(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddw_dv)(Vuu,Vvv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       if (!Qv4) Vx32.w+=Vu32.w
+   C Intrinsic Prototype: HVX_Vector Q6_Vw_condacc_QnVwVw(HVX_VectorPred Qv, HVX_Vector Vx, HVX_Vector Vu)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vw_condacc_QnVwVw(Qv,Vx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddwnq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vx,Vu)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       if (Qv4) Vx32.w+=Vu32.w
+   C Intrinsic Prototype: HVX_Vector Q6_Vw_condacc_QVwVw(HVX_VectorPred Qv, HVX_Vector Vx, HVX_Vector Vu)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vw_condacc_QVwVw(Qv,Vx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddwq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vx,Vu)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.w=vadd(Vu32.w,Vv32.w):sat
+   C Intrinsic Prototype: HVX_Vector Q6_Vw_vadd_VwVw_sat(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vw_vadd_VwVw_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddwsat)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vdd32.w=vadd(Vuu32.w,Vvv32.w):sat
+   C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vadd_WwWw_sat(HVX_VectorPair Vuu, HVX_VectorPair Vvv)
+   Instruction Type:      CVI_VA_DV
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Ww_vadd_WwWw_sat(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddwsat_dv)(Vuu,Vvv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32=valign(Vu32,Vv32,Rt8)
+   C Intrinsic Prototype: HVX_Vector Q6_V_valign_VVR(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt)
+   Instruction Type:      CVI_VP
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_V_valign_VVR(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_valignb)(Vu,Vv,Rt)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32=valign(Vu32,Vv32,#u3)
+   C Intrinsic Prototype: HVX_Vector Q6_V_valign_VVI(HVX_Vector Vu, HVX_Vector Vv, Word32 Iu3)
+   Instruction Type:      CVI_VP
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_V_valign_VVI(Vu,Vv,Iu3) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_valignbi)(Vu,Vv,Iu3)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32=vand(Vu32,Vv32)
+   C Intrinsic Prototype: HVX_Vector Q6_V_vand_VV(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_V_vand_VV(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vand)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32=vand(Qu4,Rt32)
+   C Intrinsic Prototype: HVX_Vector Q6_V_vand_QR(HVX_VectorPred Qu, Word32 Rt)
+   Instruction Type:      CVI_VX_LATE
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_V_vand_QR(Qu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qu),-1),Rt)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vx32|=vand(Qu4,Rt32)
+   C Intrinsic Prototype: HVX_Vector Q6_V_vandor_VQR(HVX_Vector Vx, HVX_VectorPred Qu, Word32 Rt)
+   Instruction Type:      CVI_VX_LATE
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_V_vandor_VQR(Vx,Qu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt_acc)(Vx,__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qu),-1),Rt)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Qd4=vand(Vu32,Rt32)
+   C Intrinsic Prototype: HVX_VectorPred Q6_Q_vand_VR(HVX_Vector Vu, Word32 Rt)
+   Instruction Type:      CVI_VX_LATE
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Q_vand_VR(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)(Vu,Rt)),-1)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Qx4|=vand(Vu32,Rt32)
+   C Intrinsic Prototype: HVX_VectorPred Q6_Q_vandor_QVR(HVX_VectorPred Qx, HVX_Vector Vu, Word32 Rt)
+   Instruction Type:      CVI_VX_LATE
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Q_vandor_QVR(Qx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt_acc)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Rt)),-1)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.h=vasl(Vu32.h,Rt32)
+   C Intrinsic Prototype: HVX_Vector Q6_Vh_vasl_VhR(HVX_Vector Vu, Word32 Rt)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vh_vasl_VhR(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaslh)(Vu,Rt)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.h=vasl(Vu32.h,Vv32.h)
+   C Intrinsic Prototype: HVX_Vector Q6_Vh_vasl_VhVh(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vh_vasl_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaslhv)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.w=vasl(Vu32.w,Rt32)
+   C Intrinsic Prototype: HVX_Vector Q6_Vw_vasl_VwR(HVX_Vector Vu, Word32 Rt)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vw_vasl_VwR(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaslw)(Vu,Rt)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vx32.w+=vasl(Vu32.w,Rt32)
+   C Intrinsic Prototype: HVX_Vector Q6_Vw_vaslacc_VwVwR(HVX_Vector Vx, HVX_Vector Vu, Word32 Rt)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vw_vaslacc_VwVwR(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaslw_acc)(Vx,Vu,Rt)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.w=vasl(Vu32.w,Vv32.w)
+   C Intrinsic Prototype: HVX_Vector Q6_Vw_vasl_VwVw(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vw_vasl_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaslwv)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.h=vasr(Vu32.h,Rt32)
+   C Intrinsic Prototype: HVX_Vector Q6_Vh_vasr_VhR(HVX_Vector Vu, Word32 Rt)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vh_vasr_VhR(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrh)(Vu,Rt)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.b=vasr(Vu32.h,Vv32.h,Rt8):rnd:sat
+   C Intrinsic Prototype: HVX_Vector Q6_Vb_vasr_VhVhR_rnd_sat(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vb_vasr_VhVhR_rnd_sat(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrhbrndsat)(Vu,Vv,Rt)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.ub=vasr(Vu32.h,Vv32.h,Rt8):rnd:sat
+   C Intrinsic Prototype: HVX_Vector Q6_Vub_vasr_VhVhR_rnd_sat(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vub_vasr_VhVhR_rnd_sat(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrhubrndsat)(Vu,Vv,Rt)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.ub=vasr(Vu32.h,Vv32.h,Rt8):sat
+   C Intrinsic Prototype: HVX_Vector Q6_Vub_vasr_VhVhR_sat(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vub_vasr_VhVhR_sat(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrhubsat)(Vu,Vv,Rt)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.h=vasr(Vu32.h,Vv32.h)
+   C Intrinsic Prototype: HVX_Vector Q6_Vh_vasr_VhVh(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vh_vasr_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrhv)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.w=vasr(Vu32.w,Rt32)
+   C Intrinsic Prototype: HVX_Vector Q6_Vw_vasr_VwR(HVX_Vector Vu, Word32 Rt)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vw_vasr_VwR(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrw)(Vu,Rt)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vx32.w+=vasr(Vu32.w,Rt32)
+   C Intrinsic Prototype: HVX_Vector Q6_Vw_vasracc_VwVwR(HVX_Vector Vx, HVX_Vector Vu, Word32 Rt)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vw_vasracc_VwVwR(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrw_acc)(Vx,Vu,Rt)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.h=vasr(Vu32.w,Vv32.w,Rt8)
+   C Intrinsic Prototype: HVX_Vector Q6_Vh_vasr_VwVwR(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vh_vasr_VwVwR(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrwh)(Vu,Vv,Rt)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.h=vasr(Vu32.w,Vv32.w,Rt8):rnd:sat
+   C Intrinsic Prototype: HVX_Vector Q6_Vh_vasr_VwVwR_rnd_sat(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vh_vasr_VwVwR_rnd_sat(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrwhrndsat)(Vu,Vv,Rt)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.h=vasr(Vu32.w,Vv32.w,Rt8):sat
+   C Intrinsic Prototype: HVX_Vector Q6_Vh_vasr_VwVwR_sat(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vh_vasr_VwVwR_sat(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrwhsat)(Vu,Vv,Rt)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.uh=vasr(Vu32.w,Vv32.w,Rt8):sat
+   C Intrinsic Prototype: HVX_Vector Q6_Vuh_vasr_VwVwR_sat(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vuh_vasr_VwVwR_sat(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrwuhsat)(Vu,Vv,Rt)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.w=vasr(Vu32.w,Vv32.w)
+   C Intrinsic Prototype: HVX_Vector Q6_Vw_vasr_VwVw(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vw_vasr_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrwv)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32=Vu32
+   C Intrinsic Prototype: HVX_Vector Q6_V_equals_V(HVX_Vector Vu)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_V_equals_V(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vassign)(Vu)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vdd32=Vuu32
+   C Intrinsic Prototype: HVX_VectorPair Q6_W_equals_W(HVX_VectorPair Vuu)
+   Instruction Type:      CVI_VA_DV
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_W_equals_W(Vuu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vassignp)(Vuu)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.h=vavg(Vu32.h,Vv32.h)
+   C Intrinsic Prototype: HVX_Vector Q6_Vh_vavg_VhVh(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vh_vavg_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavgh)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.h=vavg(Vu32.h,Vv32.h):rnd
+   C Intrinsic Prototype: HVX_Vector Q6_Vh_vavg_VhVh_rnd(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vh_vavg_VhVh_rnd(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavghrnd)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.ub=vavg(Vu32.ub,Vv32.ub)
+   C Intrinsic Prototype: HVX_Vector Q6_Vub_vavg_VubVub(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vub_vavg_VubVub(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavgub)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.ub=vavg(Vu32.ub,Vv32.ub):rnd
+   C Intrinsic Prototype: HVX_Vector Q6_Vub_vavg_VubVub_rnd(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vub_vavg_VubVub_rnd(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavgubrnd)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.uh=vavg(Vu32.uh,Vv32.uh)
+   C Intrinsic Prototype: HVX_Vector Q6_Vuh_vavg_VuhVuh(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vuh_vavg_VuhVuh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavguh)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.uh=vavg(Vu32.uh,Vv32.uh):rnd
+   C Intrinsic Prototype: HVX_Vector Q6_Vuh_vavg_VuhVuh_rnd(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vuh_vavg_VuhVuh_rnd(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavguhrnd)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.w=vavg(Vu32.w,Vv32.w)
+   C Intrinsic Prototype: HVX_Vector Q6_Vw_vavg_VwVw(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vw_vavg_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavgw)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.w=vavg(Vu32.w,Vv32.w):rnd
+   C Intrinsic Prototype: HVX_Vector Q6_Vw_vavg_VwVw_rnd(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vw_vavg_VwVw_rnd(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavgwrnd)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.uh=vcl0(Vu32.uh)
+   C Intrinsic Prototype: HVX_Vector Q6_Vuh_vcl0_Vuh(HVX_Vector Vu)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vuh_vcl0_Vuh(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcl0h)(Vu)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.uw=vcl0(Vu32.uw)
+   C Intrinsic Prototype: HVX_Vector Q6_Vuw_vcl0_Vuw(HVX_Vector Vu)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vuw_vcl0_Vuw(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcl0w)(Vu)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vdd32=vcombine(Vu32,Vv32)
+   C Intrinsic Prototype: HVX_VectorPair Q6_W_vcombine_VV(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA_DV
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_W_vcombine_VV(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcombine)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32=#0
+   C Intrinsic Prototype: HVX_Vector Q6_V_vzero()
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_V_vzero() __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vd0)()
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.b=vdeal(Vu32.b)
+   C Intrinsic Prototype: HVX_Vector Q6_Vb_vdeal_Vb(HVX_Vector Vu)
+   Instruction Type:      CVI_VP
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vb_vdeal_Vb(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdealb)(Vu)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.b=vdeale(Vu32.b,Vv32.b)
+   C Intrinsic Prototype: HVX_Vector Q6_Vb_vdeale_VbVb(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VP
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vb_vdeale_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdealb4w)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.h=vdeal(Vu32.h)
+   C Intrinsic Prototype: HVX_Vector Q6_Vh_vdeal_Vh(HVX_Vector Vu)
+   Instruction Type:      CVI_VP
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vh_vdeal_Vh(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdealh)(Vu)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vdd32=vdeal(Vu32,Vv32,Rt8)
+   C Intrinsic Prototype: HVX_VectorPair Q6_W_vdeal_VVR(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt)
+   Instruction Type:      CVI_VP_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_W_vdeal_VVR(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdealvdd)(Vu,Vv,Rt)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32=vdelta(Vu32,Vv32)
+   C Intrinsic Prototype: HVX_Vector Q6_V_vdelta_VV(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VP
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_V_vdelta_VV(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdelta)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.h=vdmpy(Vu32.ub,Rt32.b)
+   C Intrinsic Prototype: HVX_Vector Q6_Vh_vdmpy_VubRb(HVX_Vector Vu, Word32 Rt)
+   Instruction Type:      CVI_VX
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Vh_vdmpy_VubRb(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpybus)(Vu,Rt)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vx32.h+=vdmpy(Vu32.ub,Rt32.b)
+   C Intrinsic Prototype: HVX_Vector Q6_Vh_vdmpyacc_VhVubRb(HVX_Vector Vx, HVX_Vector Vu, Word32 Rt)
+   Instruction Type:      CVI_VX
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Vh_vdmpyacc_VhVubRb(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpybus_acc)(Vx,Vu,Rt)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vdd32.h=vdmpy(Vuu32.ub,Rt32.b)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vdmpy_WubRb(HVX_VectorPair Vuu, Word32 Rt)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Wh_vdmpy_WubRb(Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpybus_dv)(Vuu,Rt)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vxx32.h+=vdmpy(Vuu32.ub,Rt32.b)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vdmpyacc_WhWubRb(HVX_VectorPair Vxx, HVX_VectorPair Vuu, Word32 Rt)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Wh_vdmpyacc_WhWubRb(Vxx,Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpybus_dv_acc)(Vxx,Vuu,Rt)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.w=vdmpy(Vu32.h,Rt32.b)
+   C Intrinsic Prototype: HVX_Vector Q6_Vw_vdmpy_VhRb(HVX_Vector Vu, Word32 Rt)
+   Instruction Type:      CVI_VX
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Vw_vdmpy_VhRb(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhb)(Vu,Rt)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vx32.w+=vdmpy(Vu32.h,Rt32.b)
+   C Intrinsic Prototype: HVX_Vector Q6_Vw_vdmpyacc_VwVhRb(HVX_Vector Vx, HVX_Vector Vu, Word32 Rt)
+   Instruction Type:      CVI_VX
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Vw_vdmpyacc_VwVhRb(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhb_acc)(Vx,Vu,Rt)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vdd32.w=vdmpy(Vuu32.h,Rt32.b)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vdmpy_WhRb(HVX_VectorPair Vuu, Word32 Rt)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Ww_vdmpy_WhRb(Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhb_dv)(Vuu,Rt)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vxx32.w+=vdmpy(Vuu32.h,Rt32.b)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vdmpyacc_WwWhRb(HVX_VectorPair Vxx, HVX_VectorPair Vuu, Word32 Rt)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Ww_vdmpyacc_WwWhRb(Vxx,Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhb_dv_acc)(Vxx,Vuu,Rt)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.w=vdmpy(Vuu32.h,Rt32.h):sat
+   C Intrinsic Prototype: HVX_Vector Q6_Vw_vdmpy_WhRh_sat(HVX_VectorPair Vuu, Word32 Rt)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Vw_vdmpy_WhRh_sat(Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhisat)(Vuu,Rt)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vx32.w+=vdmpy(Vuu32.h,Rt32.h):sat
+   C Intrinsic Prototype: HVX_Vector Q6_Vw_vdmpyacc_VwWhRh_sat(HVX_Vector Vx, HVX_VectorPair Vuu, Word32 Rt)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Vw_vdmpyacc_VwWhRh_sat(Vx,Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhisat_acc)(Vx,Vuu,Rt)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.w=vdmpy(Vu32.h,Rt32.h):sat
+   C Intrinsic Prototype: HVX_Vector Q6_Vw_vdmpy_VhRh_sat(HVX_Vector Vu, Word32 Rt)
+   Instruction Type:      CVI_VX
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Vw_vdmpy_VhRh_sat(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhsat)(Vu,Rt)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vx32.w+=vdmpy(Vu32.h,Rt32.h):sat
+   C Intrinsic Prototype: HVX_Vector Q6_Vw_vdmpyacc_VwVhRh_sat(HVX_Vector Vx, HVX_Vector Vu, Word32 Rt)
+   Instruction Type:      CVI_VX
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Vw_vdmpyacc_VwVhRh_sat(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhsat_acc)(Vx,Vu,Rt)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.w=vdmpy(Vuu32.h,Rt32.uh,#1):sat
+   C Intrinsic Prototype: HVX_Vector Q6_Vw_vdmpy_WhRuh_sat(HVX_VectorPair Vuu, Word32 Rt)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Vw_vdmpy_WhRuh_sat(Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhsuisat)(Vuu,Rt)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vx32.w+=vdmpy(Vuu32.h,Rt32.uh,#1):sat
+   C Intrinsic Prototype: HVX_Vector Q6_Vw_vdmpyacc_VwWhRuh_sat(HVX_Vector Vx, HVX_VectorPair Vuu, Word32 Rt)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Vw_vdmpyacc_VwWhRuh_sat(Vx,Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhsuisat_acc)(Vx,Vuu,Rt)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.w=vdmpy(Vu32.h,Rt32.uh):sat
+   C Intrinsic Prototype: HVX_Vector Q6_Vw_vdmpy_VhRuh_sat(HVX_Vector Vu, Word32 Rt)
+   Instruction Type:      CVI_VX
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Vw_vdmpy_VhRuh_sat(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhsusat)(Vu,Rt)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vx32.w+=vdmpy(Vu32.h,Rt32.uh):sat
+   C Intrinsic Prototype: HVX_Vector Q6_Vw_vdmpyacc_VwVhRuh_sat(HVX_Vector Vx, HVX_Vector Vu, Word32 Rt)
+   Instruction Type:      CVI_VX
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Vw_vdmpyacc_VwVhRuh_sat(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhsusat_acc)(Vx,Vu,Rt)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.w=vdmpy(Vu32.h,Vv32.h):sat
+   C Intrinsic Prototype: HVX_Vector Q6_Vw_vdmpy_VhVh_sat(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VX
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Vw_vdmpy_VhVh_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhvsat)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vx32.w+=vdmpy(Vu32.h,Vv32.h):sat
+   C Intrinsic Prototype: HVX_Vector Q6_Vw_vdmpyacc_VwVhVh_sat(HVX_Vector Vx, HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Vw_vdmpyacc_VwVhVh_sat(Vx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhvsat_acc)(Vx,Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vdd32.uw=vdsad(Vuu32.uh,Rt32.uh)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Wuw_vdsad_WuhRuh(HVX_VectorPair Vuu, Word32 Rt)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Wuw_vdsad_WuhRuh(Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdsaduh)(Vuu,Rt)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vxx32.uw+=vdsad(Vuu32.uh,Rt32.uh)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Wuw_vdsadacc_WuwWuhRuh(HVX_VectorPair Vxx, HVX_VectorPair Vuu, Word32 Rt)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Wuw_vdsadacc_WuwWuhRuh(Vxx,Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdsaduh_acc)(Vxx,Vuu,Rt)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Qd4=vcmp.eq(Vu32.b,Vv32.b)
+   C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eq_VbVb(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Q_vcmp_eq_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqb)(Vu,Vv)),-1)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Qx4&=vcmp.eq(Vu32.b,Vv32.b)
+   C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eqand_QVbVb(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Q_vcmp_eqand_QVbVb(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqb_and)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Qx4|=vcmp.eq(Vu32.b,Vv32.b)
+   C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eqor_QVbVb(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Q_vcmp_eqor_QVbVb(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqb_or)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Qx4^=vcmp.eq(Vu32.b,Vv32.b)
+   C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eqxacc_QVbVb(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Q_vcmp_eqxacc_QVbVb(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqb_xor)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Qd4=vcmp.eq(Vu32.h,Vv32.h)
+   C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eq_VhVh(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Q_vcmp_eq_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqh)(Vu,Vv)),-1)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Qx4&=vcmp.eq(Vu32.h,Vv32.h)
+   C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eqand_QVhVh(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Q_vcmp_eqand_QVhVh(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqh_and)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Qx4|=vcmp.eq(Vu32.h,Vv32.h)
+   C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eqor_QVhVh(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Q_vcmp_eqor_QVhVh(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqh_or)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Qx4^=vcmp.eq(Vu32.h,Vv32.h)
+   C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eqxacc_QVhVh(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Q_vcmp_eqxacc_QVhVh(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqh_xor)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Qd4=vcmp.eq(Vu32.w,Vv32.w)
+   C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eq_VwVw(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Q_vcmp_eq_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqw)(Vu,Vv)),-1)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Qx4&=vcmp.eq(Vu32.w,Vv32.w)
+   C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eqand_QVwVw(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Q_vcmp_eqand_QVwVw(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqw_and)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Qx4|=vcmp.eq(Vu32.w,Vv32.w)
+   C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eqor_QVwVw(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Q_vcmp_eqor_QVwVw(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqw_or)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Qx4^=vcmp.eq(Vu32.w,Vv32.w)
+   C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eqxacc_QVwVw(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Q_vcmp_eqxacc_QVwVw(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqw_xor)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Qd4=vcmp.gt(Vu32.b,Vv32.b)
+   C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gt_VbVb(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Q_vcmp_gt_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtb)(Vu,Vv)),-1)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Qx4&=vcmp.gt(Vu32.b,Vv32.b)
+   C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtand_QVbVb(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Q_vcmp_gtand_QVbVb(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtb_and)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Qx4|=vcmp.gt(Vu32.b,Vv32.b)
+   C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtor_QVbVb(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Q_vcmp_gtor_QVbVb(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtb_or)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Qx4^=vcmp.gt(Vu32.b,Vv32.b)
+   C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtxacc_QVbVb(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Q_vcmp_gtxacc_QVbVb(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtb_xor)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Qd4=vcmp.gt(Vu32.h,Vv32.h)
+   C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gt_VhVh(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Q_vcmp_gt_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgth)(Vu,Vv)),-1)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Qx4&=vcmp.gt(Vu32.h,Vv32.h)
+   C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtand_QVhVh(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Q_vcmp_gtand_QVhVh(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgth_and)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Qx4|=vcmp.gt(Vu32.h,Vv32.h)
+   C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtor_QVhVh(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Q_vcmp_gtor_QVhVh(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgth_or)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Qx4^=vcmp.gt(Vu32.h,Vv32.h)
+   C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtxacc_QVhVh(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Q_vcmp_gtxacc_QVhVh(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgth_xor)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Qd4=vcmp.gt(Vu32.ub,Vv32.ub)
+   C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gt_VubVub(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Q_vcmp_gt_VubVub(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtub)(Vu,Vv)),-1)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Qx4&=vcmp.gt(Vu32.ub,Vv32.ub)
+   C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtand_QVubVub(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Q_vcmp_gtand_QVubVub(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtub_and)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Qx4|=vcmp.gt(Vu32.ub,Vv32.ub)
+   C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtor_QVubVub(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Q_vcmp_gtor_QVubVub(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtub_or)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Qx4^=vcmp.gt(Vu32.ub,Vv32.ub)
+   C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtxacc_QVubVub(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Q_vcmp_gtxacc_QVubVub(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtub_xor)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Qd4=vcmp.gt(Vu32.uh,Vv32.uh)
+   C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gt_VuhVuh(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Q_vcmp_gt_VuhVuh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtuh)(Vu,Vv)),-1)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Qx4&=vcmp.gt(Vu32.uh,Vv32.uh)
+   C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtand_QVuhVuh(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Q_vcmp_gtand_QVuhVuh(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtuh_and)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Qx4|=vcmp.gt(Vu32.uh,Vv32.uh)
+   C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtor_QVuhVuh(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Q_vcmp_gtor_QVuhVuh(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtuh_or)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Qx4^=vcmp.gt(Vu32.uh,Vv32.uh)
+   C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtxacc_QVuhVuh(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Q_vcmp_gtxacc_QVuhVuh(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtuh_xor)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Qd4=vcmp.gt(Vu32.uw,Vv32.uw)
+   C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gt_VuwVuw(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Q_vcmp_gt_VuwVuw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtuw)(Vu,Vv)),-1)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Qx4&=vcmp.gt(Vu32.uw,Vv32.uw)
+   C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtand_QVuwVuw(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Q_vcmp_gtand_QVuwVuw(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtuw_and)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Qx4|=vcmp.gt(Vu32.uw,Vv32.uw)
+   C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtor_QVuwVuw(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Q_vcmp_gtor_QVuwVuw(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtuw_or)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Qx4^=vcmp.gt(Vu32.uw,Vv32.uw)
+   C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtxacc_QVuwVuw(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Q_vcmp_gtxacc_QVuwVuw(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtuw_xor)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Qd4=vcmp.gt(Vu32.w,Vv32.w)
+   C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gt_VwVw(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Q_vcmp_gt_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtw)(Vu,Vv)),-1)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Qx4&=vcmp.gt(Vu32.w,Vv32.w)
+   C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtand_QVwVw(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Q_vcmp_gtand_QVwVw(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtw_and)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Qx4|=vcmp.gt(Vu32.w,Vv32.w)
+   C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtor_QVwVw(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Q_vcmp_gtor_QVwVw(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtw_or)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Qx4^=vcmp.gt(Vu32.w,Vv32.w)
+   C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtxacc_QVwVw(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Q_vcmp_gtxacc_QVwVw(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtw_xor)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vx32.w=vinsert(Rt32)
+   C Intrinsic Prototype: HVX_Vector Q6_Vw_vinsert_VwR(HVX_Vector Vx, Word32 Rt)
+   Instruction Type:      CVI_VX_LATE
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Vw_vinsert_VwR(Vx,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vinsertwr)(Vx,Rt)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32=vlalign(Vu32,Vv32,Rt8)
+   C Intrinsic Prototype: HVX_Vector Q6_V_vlalign_VVR(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt)
+   Instruction Type:      CVI_VP
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_V_vlalign_VVR(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlalignb)(Vu,Vv,Rt)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32=vlalign(Vu32,Vv32,#u3)
+   C Intrinsic Prototype: HVX_Vector Q6_V_vlalign_VVI(HVX_Vector Vu, HVX_Vector Vv, Word32 Iu3)
+   Instruction Type:      CVI_VP
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_V_vlalign_VVI(Vu,Vv,Iu3) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlalignbi)(Vu,Vv,Iu3)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.uh=vlsr(Vu32.uh,Rt32)
+   C Intrinsic Prototype: HVX_Vector Q6_Vuh_vlsr_VuhR(HVX_Vector Vu, Word32 Rt)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vuh_vlsr_VuhR(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlsrh)(Vu,Rt)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.h=vlsr(Vu32.h,Vv32.h)
+   C Intrinsic Prototype: HVX_Vector Q6_Vh_vlsr_VhVh(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vh_vlsr_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlsrhv)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.uw=vlsr(Vu32.uw,Rt32)
+   C Intrinsic Prototype: HVX_Vector Q6_Vuw_vlsr_VuwR(HVX_Vector Vu, Word32 Rt)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vuw_vlsr_VuwR(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlsrw)(Vu,Rt)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.w=vlsr(Vu32.w,Vv32.w)
+   C Intrinsic Prototype: HVX_Vector Q6_Vw_vlsr_VwVw(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vw_vlsr_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlsrwv)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.b=vlut32(Vu32.b,Vv32.b,Rt8)
+   C Intrinsic Prototype: HVX_Vector Q6_Vb_vlut32_VbVbR(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt)
+   Instruction Type:      CVI_VP
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vb_vlut32_VbVbR(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvvb)(Vu,Vv,Rt)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vx32.b|=vlut32(Vu32.b,Vv32.b,Rt8)
+   C Intrinsic Prototype: HVX_Vector Q6_Vb_vlut32or_VbVbVbR(HVX_Vector Vx, HVX_Vector Vu, HVX_Vector Vv, Word32 Rt)
+   Instruction Type:      CVI_VP_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vb_vlut32or_VbVbVbR(Vx,Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvvb_oracc)(Vx,Vu,Vv,Rt)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vdd32.h=vlut16(Vu32.b,Vv32.h,Rt8)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vlut16_VbVhR(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt)
+   Instruction Type:      CVI_VP_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Wh_vlut16_VbVhR(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvwh)(Vu,Vv,Rt)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vxx32.h|=vlut16(Vu32.b,Vv32.h,Rt8)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vlut16or_WhVbVhR(HVX_VectorPair Vxx, HVX_Vector Vu, HVX_Vector Vv, Word32 Rt)
+   Instruction Type:      CVI_VP_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Wh_vlut16or_WhVbVhR(Vxx,Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvwh_oracc)(Vxx,Vu,Vv,Rt)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.h=vmax(Vu32.h,Vv32.h)
+   C Intrinsic Prototype: HVX_Vector Q6_Vh_vmax_VhVh(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vh_vmax_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmaxh)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.ub=vmax(Vu32.ub,Vv32.ub)
+   C Intrinsic Prototype: HVX_Vector Q6_Vub_vmax_VubVub(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vub_vmax_VubVub(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmaxub)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.uh=vmax(Vu32.uh,Vv32.uh)
+   C Intrinsic Prototype: HVX_Vector Q6_Vuh_vmax_VuhVuh(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vuh_vmax_VuhVuh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmaxuh)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.w=vmax(Vu32.w,Vv32.w)
+   C Intrinsic Prototype: HVX_Vector Q6_Vw_vmax_VwVw(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vw_vmax_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmaxw)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.h=vmin(Vu32.h,Vv32.h)
+   C Intrinsic Prototype: HVX_Vector Q6_Vh_vmin_VhVh(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vh_vmin_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vminh)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.ub=vmin(Vu32.ub,Vv32.ub)
+   C Intrinsic Prototype: HVX_Vector Q6_Vub_vmin_VubVub(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vub_vmin_VubVub(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vminub)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.uh=vmin(Vu32.uh,Vv32.uh)
+   C Intrinsic Prototype: HVX_Vector Q6_Vuh_vmin_VuhVuh(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vuh_vmin_VuhVuh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vminuh)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.w=vmin(Vu32.w,Vv32.w)
+   C Intrinsic Prototype: HVX_Vector Q6_Vw_vmin_VwVw(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vw_vmin_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vminw)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vdd32.h=vmpa(Vuu32.ub,Rt32.b)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vmpa_WubRb(HVX_VectorPair Vuu, Word32 Rt)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Wh_vmpa_WubRb(Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpabus)(Vuu,Rt)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vxx32.h+=vmpa(Vuu32.ub,Rt32.b)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vmpaacc_WhWubRb(HVX_VectorPair Vxx, HVX_VectorPair Vuu, Word32 Rt)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Wh_vmpaacc_WhWubRb(Vxx,Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpabus_acc)(Vxx,Vuu,Rt)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vdd32.h=vmpa(Vuu32.ub,Vvv32.b)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vmpa_WubWb(HVX_VectorPair Vuu, HVX_VectorPair Vvv)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Wh_vmpa_WubWb(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpabusv)(Vuu,Vvv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vdd32.h=vmpa(Vuu32.ub,Vvv32.ub)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vmpa_WubWub(HVX_VectorPair Vuu, HVX_VectorPair Vvv)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Wh_vmpa_WubWub(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpabuuv)(Vuu,Vvv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vdd32.w=vmpa(Vuu32.h,Rt32.b)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vmpa_WhRb(HVX_VectorPair Vuu, Word32 Rt)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Ww_vmpa_WhRb(Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpahb)(Vuu,Rt)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vxx32.w+=vmpa(Vuu32.h,Rt32.b)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vmpaacc_WwWhRb(HVX_VectorPair Vxx, HVX_VectorPair Vuu, Word32 Rt)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Ww_vmpaacc_WwWhRb(Vxx,Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpahb_acc)(Vxx,Vuu,Rt)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vdd32.h=vmpy(Vu32.ub,Rt32.b)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vmpy_VubRb(HVX_Vector Vu, Word32 Rt)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Wh_vmpy_VubRb(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpybus)(Vu,Rt)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vxx32.h+=vmpy(Vu32.ub,Rt32.b)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vmpyacc_WhVubRb(HVX_VectorPair Vxx, HVX_Vector Vu, Word32 Rt)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Wh_vmpyacc_WhVubRb(Vxx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpybus_acc)(Vxx,Vu,Rt)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vdd32.h=vmpy(Vu32.ub,Vv32.b)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vmpy_VubVb(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Wh_vmpy_VubVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpybusv)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vxx32.h+=vmpy(Vu32.ub,Vv32.b)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vmpyacc_WhVubVb(HVX_VectorPair Vxx, HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Wh_vmpyacc_WhVubVb(Vxx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpybusv_acc)(Vxx,Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vdd32.h=vmpy(Vu32.b,Vv32.b)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vmpy_VbVb(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Wh_vmpy_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpybv)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vxx32.h+=vmpy(Vu32.b,Vv32.b)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vmpyacc_WhVbVb(HVX_VectorPair Vxx, HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Wh_vmpyacc_WhVbVb(Vxx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpybv_acc)(Vxx,Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.w=vmpye(Vu32.w,Vv32.uh)
+   C Intrinsic Prototype: HVX_Vector Q6_Vw_vmpye_VwVuh(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Vw_vmpye_VwVuh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyewuh)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vdd32.w=vmpy(Vu32.h,Rt32.h)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vmpy_VhRh(HVX_Vector Vu, Word32 Rt)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Ww_vmpy_VhRh(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyh)(Vu,Rt)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vxx32.w+=vmpy(Vu32.h,Rt32.h):sat
+   C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vmpyacc_WwVhRh_sat(HVX_VectorPair Vxx, HVX_Vector Vu, Word32 Rt)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Ww_vmpyacc_WwVhRh_sat(Vxx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyhsat_acc)(Vxx,Vu,Rt)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.h=vmpy(Vu32.h,Rt32.h):<<1:rnd:sat
+   C Intrinsic Prototype: HVX_Vector Q6_Vh_vmpy_VhRh_s1_rnd_sat(HVX_Vector Vu, Word32 Rt)
+   Instruction Type:      CVI_VX
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Vh_vmpy_VhRh_s1_rnd_sat(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyhsrs)(Vu,Rt)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.h=vmpy(Vu32.h,Rt32.h):<<1:sat
+   C Intrinsic Prototype: HVX_Vector Q6_Vh_vmpy_VhRh_s1_sat(HVX_Vector Vu, Word32 Rt)
+   Instruction Type:      CVI_VX
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Vh_vmpy_VhRh_s1_sat(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyhss)(Vu,Rt)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vdd32.w=vmpy(Vu32.h,Vv32.uh)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vmpy_VhVuh(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Ww_vmpy_VhVuh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyhus)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vxx32.w+=vmpy(Vu32.h,Vv32.uh)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vmpyacc_WwVhVuh(HVX_VectorPair Vxx, HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Ww_vmpyacc_WwVhVuh(Vxx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyhus_acc)(Vxx,Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vdd32.w=vmpy(Vu32.h,Vv32.h)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vmpy_VhVh(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Ww_vmpy_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyhv)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vxx32.w+=vmpy(Vu32.h,Vv32.h)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vmpyacc_WwVhVh(HVX_VectorPair Vxx, HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Ww_vmpyacc_WwVhVh(Vxx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyhv_acc)(Vxx,Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.h=vmpy(Vu32.h,Vv32.h):<<1:rnd:sat
+   C Intrinsic Prototype: HVX_Vector Q6_Vh_vmpy_VhVh_s1_rnd_sat(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VX
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Vh_vmpy_VhVh_s1_rnd_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyhvsrs)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.w=vmpyieo(Vu32.h,Vv32.h)
+   C Intrinsic Prototype: HVX_Vector Q6_Vw_vmpyieo_VhVh(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VX
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Vw_vmpyieo_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyieoh)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vx32.w+=vmpyie(Vu32.w,Vv32.h)
+   C Intrinsic Prototype: HVX_Vector Q6_Vw_vmpyieacc_VwVwVh(HVX_Vector Vx, HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Vw_vmpyieacc_VwVwVh(Vx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiewh_acc)(Vx,Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.w=vmpyie(Vu32.w,Vv32.uh)
+   C Intrinsic Prototype: HVX_Vector Q6_Vw_vmpyie_VwVuh(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Vw_vmpyie_VwVuh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiewuh)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vx32.w+=vmpyie(Vu32.w,Vv32.uh)
+   C Intrinsic Prototype: HVX_Vector Q6_Vw_vmpyieacc_VwVwVuh(HVX_Vector Vx, HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Vw_vmpyieacc_VwVwVuh(Vx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiewuh_acc)(Vx,Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.h=vmpyi(Vu32.h,Vv32.h)
+   C Intrinsic Prototype: HVX_Vector Q6_Vh_vmpyi_VhVh(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Vh_vmpyi_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyih)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vx32.h+=vmpyi(Vu32.h,Vv32.h)
+   C Intrinsic Prototype: HVX_Vector Q6_Vh_vmpyiacc_VhVhVh(HVX_Vector Vx, HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Vh_vmpyiacc_VhVhVh(Vx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyih_acc)(Vx,Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.h=vmpyi(Vu32.h,Rt32.b)
+   C Intrinsic Prototype: HVX_Vector Q6_Vh_vmpyi_VhRb(HVX_Vector Vu, Word32 Rt)
+   Instruction Type:      CVI_VX
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Vh_vmpyi_VhRb(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyihb)(Vu,Rt)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vx32.h+=vmpyi(Vu32.h,Rt32.b)
+   C Intrinsic Prototype: HVX_Vector Q6_Vh_vmpyiacc_VhVhRb(HVX_Vector Vx, HVX_Vector Vu, Word32 Rt)
+   Instruction Type:      CVI_VX
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Vh_vmpyiacc_VhVhRb(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyihb_acc)(Vx,Vu,Rt)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.w=vmpyio(Vu32.w,Vv32.h)
+   C Intrinsic Prototype: HVX_Vector Q6_Vw_vmpyio_VwVh(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Vw_vmpyio_VwVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiowh)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.w=vmpyi(Vu32.w,Rt32.b)
+   C Intrinsic Prototype: HVX_Vector Q6_Vw_vmpyi_VwRb(HVX_Vector Vu, Word32 Rt)
+   Instruction Type:      CVI_VX
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Vw_vmpyi_VwRb(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiwb)(Vu,Rt)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vx32.w+=vmpyi(Vu32.w,Rt32.b)
+   C Intrinsic Prototype: HVX_Vector Q6_Vw_vmpyiacc_VwVwRb(HVX_Vector Vx, HVX_Vector Vu, Word32 Rt)
+   Instruction Type:      CVI_VX
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Vw_vmpyiacc_VwVwRb(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiwb_acc)(Vx,Vu,Rt)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.w=vmpyi(Vu32.w,Rt32.h)
+   C Intrinsic Prototype: HVX_Vector Q6_Vw_vmpyi_VwRh(HVX_Vector Vu, Word32 Rt)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Vw_vmpyi_VwRh(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiwh)(Vu,Rt)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vx32.w+=vmpyi(Vu32.w,Rt32.h)
+   C Intrinsic Prototype: HVX_Vector Q6_Vw_vmpyiacc_VwVwRh(HVX_Vector Vx, HVX_Vector Vu, Word32 Rt)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Vw_vmpyiacc_VwVwRh(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiwh_acc)(Vx,Vu,Rt)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.w=vmpyo(Vu32.w,Vv32.h):<<1:sat
+   C Intrinsic Prototype: HVX_Vector Q6_Vw_vmpyo_VwVh_s1_sat(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Vw_vmpyo_VwVh_s1_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyowh)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.w=vmpyo(Vu32.w,Vv32.h):<<1:rnd:sat
+   C Intrinsic Prototype: HVX_Vector Q6_Vw_vmpyo_VwVh_s1_rnd_sat(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Vw_vmpyo_VwVh_s1_rnd_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyowh_rnd)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vx32.w+=vmpyo(Vu32.w,Vv32.h):<<1:rnd:sat:shift
+   C Intrinsic Prototype: HVX_Vector Q6_Vw_vmpyoacc_VwVwVh_s1_rnd_sat_shift(HVX_Vector Vx, HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Vw_vmpyoacc_VwVwVh_s1_rnd_sat_shift(Vx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyowh_rnd_sacc)(Vx,Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vx32.w+=vmpyo(Vu32.w,Vv32.h):<<1:sat:shift
+   C Intrinsic Prototype: HVX_Vector Q6_Vw_vmpyoacc_VwVwVh_s1_sat_shift(HVX_Vector Vx, HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Vw_vmpyoacc_VwVwVh_s1_sat_shift(Vx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyowh_sacc)(Vx,Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vdd32.uh=vmpy(Vu32.ub,Rt32.ub)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Wuh_vmpy_VubRub(HVX_Vector Vu, Word32 Rt)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Wuh_vmpy_VubRub(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyub)(Vu,Rt)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vxx32.uh+=vmpy(Vu32.ub,Rt32.ub)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Wuh_vmpyacc_WuhVubRub(HVX_VectorPair Vxx, HVX_Vector Vu, Word32 Rt)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Wuh_vmpyacc_WuhVubRub(Vxx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyub_acc)(Vxx,Vu,Rt)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vdd32.uh=vmpy(Vu32.ub,Vv32.ub)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Wuh_vmpy_VubVub(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Wuh_vmpy_VubVub(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyubv)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vxx32.uh+=vmpy(Vu32.ub,Vv32.ub)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Wuh_vmpyacc_WuhVubVub(HVX_VectorPair Vxx, HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Wuh_vmpyacc_WuhVubVub(Vxx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyubv_acc)(Vxx,Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vdd32.uw=vmpy(Vu32.uh,Rt32.uh)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Wuw_vmpy_VuhRuh(HVX_Vector Vu, Word32 Rt)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Wuw_vmpy_VuhRuh(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyuh)(Vu,Rt)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vxx32.uw+=vmpy(Vu32.uh,Rt32.uh)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Wuw_vmpyacc_WuwVuhRuh(HVX_VectorPair Vxx, HVX_Vector Vu, Word32 Rt)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Wuw_vmpyacc_WuwVuhRuh(Vxx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyuh_acc)(Vxx,Vu,Rt)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vdd32.uw=vmpy(Vu32.uh,Vv32.uh)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Wuw_vmpy_VuhVuh(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Wuw_vmpy_VuhVuh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyuhv)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vxx32.uw+=vmpy(Vu32.uh,Vv32.uh)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Wuw_vmpyacc_WuwVuhVuh(HVX_VectorPair Vxx, HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Wuw_vmpyacc_WuwVuhVuh(Vxx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyuhv_acc)(Vxx,Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32=vmux(Qt4,Vu32,Vv32)
+   C Intrinsic Prototype: HVX_Vector Q6_V_vmux_QVV(HVX_VectorPred Qt, HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_V_vmux_QVV(Qt,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmux)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qt),-1),Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.h=vnavg(Vu32.h,Vv32.h)
+   C Intrinsic Prototype: HVX_Vector Q6_Vh_vnavg_VhVh(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vh_vnavg_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vnavgh)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.b=vnavg(Vu32.ub,Vv32.ub)
+   C Intrinsic Prototype: HVX_Vector Q6_Vb_vnavg_VubVub(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vb_vnavg_VubVub(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vnavgub)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.w=vnavg(Vu32.w,Vv32.w)
+   C Intrinsic Prototype: HVX_Vector Q6_Vw_vnavg_VwVw(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vw_vnavg_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vnavgw)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.h=vnormamt(Vu32.h)
+   C Intrinsic Prototype: HVX_Vector Q6_Vh_vnormamt_Vh(HVX_Vector Vu)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vh_vnormamt_Vh(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vnormamth)(Vu)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.w=vnormamt(Vu32.w)
+   C Intrinsic Prototype: HVX_Vector Q6_Vw_vnormamt_Vw(HVX_Vector Vu)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vw_vnormamt_Vw(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vnormamtw)(Vu)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32=vnot(Vu32)
+   C Intrinsic Prototype: HVX_Vector Q6_V_vnot_V(HVX_Vector Vu)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_V_vnot_V(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vnot)(Vu)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32=vor(Vu32,Vv32)
+   C Intrinsic Prototype: HVX_Vector Q6_V_vor_VV(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_V_vor_VV(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vor)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.b=vpacke(Vu32.h,Vv32.h)
+   C Intrinsic Prototype: HVX_Vector Q6_Vb_vpacke_VhVh(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VP
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vb_vpacke_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vpackeb)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.h=vpacke(Vu32.w,Vv32.w)
+   C Intrinsic Prototype: HVX_Vector Q6_Vh_vpacke_VwVw(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VP
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vh_vpacke_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vpackeh)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.b=vpack(Vu32.h,Vv32.h):sat
+   C Intrinsic Prototype: HVX_Vector Q6_Vb_vpack_VhVh_sat(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VP
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vb_vpack_VhVh_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vpackhb_sat)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.ub=vpack(Vu32.h,Vv32.h):sat
+   C Intrinsic Prototype: HVX_Vector Q6_Vub_vpack_VhVh_sat(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VP
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vub_vpack_VhVh_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vpackhub_sat)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.b=vpacko(Vu32.h,Vv32.h)
+   C Intrinsic Prototype: HVX_Vector Q6_Vb_vpacko_VhVh(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VP
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vb_vpacko_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vpackob)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.h=vpacko(Vu32.w,Vv32.w)
+   C Intrinsic Prototype: HVX_Vector Q6_Vh_vpacko_VwVw(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VP
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vh_vpacko_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vpackoh)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.h=vpack(Vu32.w,Vv32.w):sat
+   C Intrinsic Prototype: HVX_Vector Q6_Vh_vpack_VwVw_sat(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VP
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vh_vpack_VwVw_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vpackwh_sat)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.uh=vpack(Vu32.w,Vv32.w):sat
+   C Intrinsic Prototype: HVX_Vector Q6_Vuh_vpack_VwVw_sat(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VP
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vuh_vpack_VwVw_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vpackwuh_sat)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.h=vpopcount(Vu32.h)
+   C Intrinsic Prototype: HVX_Vector Q6_Vh_vpopcount_Vh(HVX_Vector Vu)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vh_vpopcount_Vh(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vpopcounth)(Vu)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32=vrdelta(Vu32,Vv32)
+   C Intrinsic Prototype: HVX_Vector Q6_V_vrdelta_VV(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VP
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_V_vrdelta_VV(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrdelta)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.w=vrmpy(Vu32.ub,Rt32.b)
+   C Intrinsic Prototype: HVX_Vector Q6_Vw_vrmpy_VubRb(HVX_Vector Vu, Word32 Rt)
+   Instruction Type:      CVI_VX
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Vw_vrmpy_VubRb(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpybus)(Vu,Rt)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vx32.w+=vrmpy(Vu32.ub,Rt32.b)
+   C Intrinsic Prototype: HVX_Vector Q6_Vw_vrmpyacc_VwVubRb(HVX_Vector Vx, HVX_Vector Vu, Word32 Rt)
+   Instruction Type:      CVI_VX
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Vw_vrmpyacc_VwVubRb(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpybus_acc)(Vx,Vu,Rt)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vdd32.w=vrmpy(Vuu32.ub,Rt32.b,#u1)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vrmpy_WubRbI(HVX_VectorPair Vuu, Word32 Rt, Word32 Iu1)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Ww_vrmpy_WubRbI(Vuu,Rt,Iu1) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpybusi)(Vuu,Rt,Iu1)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vxx32.w+=vrmpy(Vuu32.ub,Rt32.b,#u1)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vrmpyacc_WwWubRbI(HVX_VectorPair Vxx, HVX_VectorPair Vuu, Word32 Rt, Word32 Iu1)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Ww_vrmpyacc_WwWubRbI(Vxx,Vuu,Rt,Iu1) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpybusi_acc)(Vxx,Vuu,Rt,Iu1)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.w=vrmpy(Vu32.ub,Vv32.b)
+   C Intrinsic Prototype: HVX_Vector Q6_Vw_vrmpy_VubVb(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VX
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Vw_vrmpy_VubVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpybusv)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vx32.w+=vrmpy(Vu32.ub,Vv32.b)
+   C Intrinsic Prototype: HVX_Vector Q6_Vw_vrmpyacc_VwVubVb(HVX_Vector Vx, HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VX
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Vw_vrmpyacc_VwVubVb(Vx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpybusv_acc)(Vx,Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.w=vrmpy(Vu32.b,Vv32.b)
+   C Intrinsic Prototype: HVX_Vector Q6_Vw_vrmpy_VbVb(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VX
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Vw_vrmpy_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpybv)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vx32.w+=vrmpy(Vu32.b,Vv32.b)
+   C Intrinsic Prototype: HVX_Vector Q6_Vw_vrmpyacc_VwVbVb(HVX_Vector Vx, HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VX
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Vw_vrmpyacc_VwVbVb(Vx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpybv_acc)(Vx,Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.uw=vrmpy(Vu32.ub,Rt32.ub)
+   C Intrinsic Prototype: HVX_Vector Q6_Vuw_vrmpy_VubRub(HVX_Vector Vu, Word32 Rt)
+   Instruction Type:      CVI_VX
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Vuw_vrmpy_VubRub(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpyub)(Vu,Rt)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vx32.uw+=vrmpy(Vu32.ub,Rt32.ub)
+   C Intrinsic Prototype: HVX_Vector Q6_Vuw_vrmpyacc_VuwVubRub(HVX_Vector Vx, HVX_Vector Vu, Word32 Rt)
+   Instruction Type:      CVI_VX
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Vuw_vrmpyacc_VuwVubRub(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpyub_acc)(Vx,Vu,Rt)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vdd32.uw=vrmpy(Vuu32.ub,Rt32.ub,#u1)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Wuw_vrmpy_WubRubI(HVX_VectorPair Vuu, Word32 Rt, Word32 Iu1)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Wuw_vrmpy_WubRubI(Vuu,Rt,Iu1) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpyubi)(Vuu,Rt,Iu1)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vxx32.uw+=vrmpy(Vuu32.ub,Rt32.ub,#u1)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Wuw_vrmpyacc_WuwWubRubI(HVX_VectorPair Vxx, HVX_VectorPair Vuu, Word32 Rt, Word32 Iu1)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Wuw_vrmpyacc_WuwWubRubI(Vxx,Vuu,Rt,Iu1) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpyubi_acc)(Vxx,Vuu,Rt,Iu1)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.uw=vrmpy(Vu32.ub,Vv32.ub)
+   C Intrinsic Prototype: HVX_Vector Q6_Vuw_vrmpy_VubVub(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VX
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Vuw_vrmpy_VubVub(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpyubv)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vx32.uw+=vrmpy(Vu32.ub,Vv32.ub)
+   C Intrinsic Prototype: HVX_Vector Q6_Vuw_vrmpyacc_VuwVubVub(HVX_Vector Vx, HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VX
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Vuw_vrmpyacc_VuwVubVub(Vx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpyubv_acc)(Vx,Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32=vror(Vu32,Rt32)
+   C Intrinsic Prototype: HVX_Vector Q6_V_vror_VR(HVX_Vector Vu, Word32 Rt)
+   Instruction Type:      CVI_VP
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_V_vror_VR(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vror)(Vu,Rt)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.b=vround(Vu32.h,Vv32.h):sat
+   C Intrinsic Prototype: HVX_Vector Q6_Vb_vround_VhVh_sat(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vb_vround_VhVh_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vroundhb)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.ub=vround(Vu32.h,Vv32.h):sat
+   C Intrinsic Prototype: HVX_Vector Q6_Vub_vround_VhVh_sat(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vub_vround_VhVh_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vroundhub)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.h=vround(Vu32.w,Vv32.w):sat
+   C Intrinsic Prototype: HVX_Vector Q6_Vh_vround_VwVw_sat(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vh_vround_VwVw_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vroundwh)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.uh=vround(Vu32.w,Vv32.w):sat
+   C Intrinsic Prototype: HVX_Vector Q6_Vuh_vround_VwVw_sat(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vuh_vround_VwVw_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vroundwuh)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vdd32.uw=vrsad(Vuu32.ub,Rt32.ub,#u1)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Wuw_vrsad_WubRubI(HVX_VectorPair Vuu, Word32 Rt, Word32 Iu1)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Wuw_vrsad_WubRubI(Vuu,Rt,Iu1) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrsadubi)(Vuu,Rt,Iu1)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vxx32.uw+=vrsad(Vuu32.ub,Rt32.ub,#u1)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Wuw_vrsadacc_WuwWubRubI(HVX_VectorPair Vxx, HVX_VectorPair Vuu, Word32 Rt, Word32 Iu1)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Wuw_vrsadacc_WuwWubRubI(Vxx,Vuu,Rt,Iu1) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrsadubi_acc)(Vxx,Vuu,Rt,Iu1)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.ub=vsat(Vu32.h,Vv32.h)
+   C Intrinsic Prototype: HVX_Vector Q6_Vub_vsat_VhVh(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vub_vsat_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsathub)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.h=vsat(Vu32.w,Vv32.w)
+   C Intrinsic Prototype: HVX_Vector Q6_Vh_vsat_VwVw(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vh_vsat_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsatwh)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vdd32.h=vsxt(Vu32.b)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vsxt_Vb(HVX_Vector Vu)
+   Instruction Type:      CVI_VA_DV
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Wh_vsxt_Vb(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsb)(Vu)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vdd32.w=vsxt(Vu32.h)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vsxt_Vh(HVX_Vector Vu)
+   Instruction Type:      CVI_VA_DV
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Ww_vsxt_Vh(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsh)(Vu)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.h=vshuffe(Vu32.h,Vv32.h)
+   C Intrinsic Prototype: HVX_Vector Q6_Vh_vshuffe_VhVh(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vh_vshuffe_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vshufeh)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.b=vshuff(Vu32.b)
+   C Intrinsic Prototype: HVX_Vector Q6_Vb_vshuff_Vb(HVX_Vector Vu)
+   Instruction Type:      CVI_VP
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vb_vshuff_Vb(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vshuffb)(Vu)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.b=vshuffe(Vu32.b,Vv32.b)
+   C Intrinsic Prototype: HVX_Vector Q6_Vb_vshuffe_VbVb(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vb_vshuffe_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vshuffeb)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.h=vshuff(Vu32.h)
+   C Intrinsic Prototype: HVX_Vector Q6_Vh_vshuff_Vh(HVX_Vector Vu)
+   Instruction Type:      CVI_VP
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vh_vshuff_Vh(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vshuffh)(Vu)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.b=vshuffo(Vu32.b,Vv32.b)
+   C Intrinsic Prototype: HVX_Vector Q6_Vb_vshuffo_VbVb(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vb_vshuffo_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vshuffob)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vdd32=vshuff(Vu32,Vv32,Rt8)
+   C Intrinsic Prototype: HVX_VectorPair Q6_W_vshuff_VVR(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt)
+   Instruction Type:      CVI_VP_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_W_vshuff_VVR(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vshuffvdd)(Vu,Vv,Rt)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vdd32.b=vshuffoe(Vu32.b,Vv32.b)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Wb_vshuffoe_VbVb(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA_DV
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Wb_vshuffoe_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vshufoeb)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vdd32.h=vshuffoe(Vu32.h,Vv32.h)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vshuffoe_VhVh(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA_DV
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Wh_vshuffoe_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vshufoeh)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.h=vshuffo(Vu32.h,Vv32.h)
+   C Intrinsic Prototype: HVX_Vector Q6_Vh_vshuffo_VhVh(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vh_vshuffo_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vshufoh)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.b=vsub(Vu32.b,Vv32.b)
+   C Intrinsic Prototype: HVX_Vector Q6_Vb_vsub_VbVb(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vb_vsub_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubb)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vdd32.b=vsub(Vuu32.b,Vvv32.b)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Wb_vsub_WbWb(HVX_VectorPair Vuu, HVX_VectorPair Vvv)
+   Instruction Type:      CVI_VA_DV
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Wb_vsub_WbWb(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubb_dv)(Vuu,Vvv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       if (!Qv4) Vx32.b-=Vu32.b
+   C Intrinsic Prototype: HVX_Vector Q6_Vb_condnac_QnVbVb(HVX_VectorPred Qv, HVX_Vector Vx, HVX_Vector Vu)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vb_condnac_QnVbVb(Qv,Vx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubbnq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vx,Vu)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       if (Qv4) Vx32.b-=Vu32.b
+   C Intrinsic Prototype: HVX_Vector Q6_Vb_condnac_QVbVb(HVX_VectorPred Qv, HVX_Vector Vx, HVX_Vector Vu)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vb_condnac_QVbVb(Qv,Vx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubbq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vx,Vu)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.h=vsub(Vu32.h,Vv32.h)
+   C Intrinsic Prototype: HVX_Vector Q6_Vh_vsub_VhVh(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vh_vsub_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubh)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vdd32.h=vsub(Vuu32.h,Vvv32.h)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vsub_WhWh(HVX_VectorPair Vuu, HVX_VectorPair Vvv)
+   Instruction Type:      CVI_VA_DV
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Wh_vsub_WhWh(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubh_dv)(Vuu,Vvv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       if (!Qv4) Vx32.h-=Vu32.h
+   C Intrinsic Prototype: HVX_Vector Q6_Vh_condnac_QnVhVh(HVX_VectorPred Qv, HVX_Vector Vx, HVX_Vector Vu)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vh_condnac_QnVhVh(Qv,Vx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubhnq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vx,Vu)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       if (Qv4) Vx32.h-=Vu32.h
+   C Intrinsic Prototype: HVX_Vector Q6_Vh_condnac_QVhVh(HVX_VectorPred Qv, HVX_Vector Vx, HVX_Vector Vu)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vh_condnac_QVhVh(Qv,Vx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubhq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vx,Vu)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.h=vsub(Vu32.h,Vv32.h):sat
+   C Intrinsic Prototype: HVX_Vector Q6_Vh_vsub_VhVh_sat(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vh_vsub_VhVh_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubhsat)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vdd32.h=vsub(Vuu32.h,Vvv32.h):sat
+   C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vsub_WhWh_sat(HVX_VectorPair Vuu, HVX_VectorPair Vvv)
+   Instruction Type:      CVI_VA_DV
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Wh_vsub_WhWh_sat(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubhsat_dv)(Vuu,Vvv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vdd32.w=vsub(Vu32.h,Vv32.h)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vsub_VhVh(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Ww_vsub_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubhw)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vdd32.h=vsub(Vu32.ub,Vv32.ub)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vsub_VubVub(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Wh_vsub_VubVub(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsububh)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.ub=vsub(Vu32.ub,Vv32.ub):sat
+   C Intrinsic Prototype: HVX_Vector Q6_Vub_vsub_VubVub_sat(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vub_vsub_VubVub_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsububsat)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vdd32.ub=vsub(Vuu32.ub,Vvv32.ub):sat
+   C Intrinsic Prototype: HVX_VectorPair Q6_Wub_vsub_WubWub_sat(HVX_VectorPair Vuu, HVX_VectorPair Vvv)
+   Instruction Type:      CVI_VA_DV
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Wub_vsub_WubWub_sat(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsububsat_dv)(Vuu,Vvv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.uh=vsub(Vu32.uh,Vv32.uh):sat
+   C Intrinsic Prototype: HVX_Vector Q6_Vuh_vsub_VuhVuh_sat(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vuh_vsub_VuhVuh_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubuhsat)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vdd32.uh=vsub(Vuu32.uh,Vvv32.uh):sat
+   C Intrinsic Prototype: HVX_VectorPair Q6_Wuh_vsub_WuhWuh_sat(HVX_VectorPair Vuu, HVX_VectorPair Vvv)
+   Instruction Type:      CVI_VA_DV
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Wuh_vsub_WuhWuh_sat(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubuhsat_dv)(Vuu,Vvv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vdd32.w=vsub(Vu32.uh,Vv32.uh)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vsub_VuhVuh(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Ww_vsub_VuhVuh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubuhw)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.w=vsub(Vu32.w,Vv32.w)
+   C Intrinsic Prototype: HVX_Vector Q6_Vw_vsub_VwVw(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vw_vsub_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubw)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vdd32.w=vsub(Vuu32.w,Vvv32.w)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vsub_WwWw(HVX_VectorPair Vuu, HVX_VectorPair Vvv)
+   Instruction Type:      CVI_VA_DV
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Ww_vsub_WwWw(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubw_dv)(Vuu,Vvv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       if (!Qv4) Vx32.w-=Vu32.w
+   C Intrinsic Prototype: HVX_Vector Q6_Vw_condnac_QnVwVw(HVX_VectorPred Qv, HVX_Vector Vx, HVX_Vector Vu)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vw_condnac_QnVwVw(Qv,Vx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubwnq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vx,Vu)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       if (Qv4) Vx32.w-=Vu32.w
+   C Intrinsic Prototype: HVX_Vector Q6_Vw_condnac_QVwVw(HVX_VectorPred Qv, HVX_Vector Vx, HVX_Vector Vu)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vw_condnac_QVwVw(Qv,Vx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubwq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vx,Vu)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32.w=vsub(Vu32.w,Vv32.w):sat
+   C Intrinsic Prototype: HVX_Vector Q6_Vw_vsub_VwVw_sat(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vw_vsub_VwVw_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubwsat)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vdd32.w=vsub(Vuu32.w,Vvv32.w):sat
+   C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vsub_WwWw_sat(HVX_VectorPair Vuu, HVX_VectorPair Vvv)
+   Instruction Type:      CVI_VA_DV
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Ww_vsub_WwWw_sat(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubwsat_dv)(Vuu,Vvv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vdd32=vswap(Qt4,Vu32,Vv32)
+   C Intrinsic Prototype: HVX_VectorPair Q6_W_vswap_QVV(HVX_VectorPred Qt, HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA_DV
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_W_vswap_QVV(Qt,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vswap)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qt),-1),Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vdd32.h=vtmpy(Vuu32.b,Rt32.b)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vtmpy_WbRb(HVX_VectorPair Vuu, Word32 Rt)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Wh_vtmpy_WbRb(Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vtmpyb)(Vuu,Rt)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vxx32.h+=vtmpy(Vuu32.b,Rt32.b)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vtmpyacc_WhWbRb(HVX_VectorPair Vxx, HVX_VectorPair Vuu, Word32 Rt)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Wh_vtmpyacc_WhWbRb(Vxx,Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vtmpyb_acc)(Vxx,Vuu,Rt)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vdd32.h=vtmpy(Vuu32.ub,Rt32.b)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vtmpy_WubRb(HVX_VectorPair Vuu, Word32 Rt)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Wh_vtmpy_WubRb(Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vtmpybus)(Vuu,Rt)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vxx32.h+=vtmpy(Vuu32.ub,Rt32.b)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vtmpyacc_WhWubRb(HVX_VectorPair Vxx, HVX_VectorPair Vuu, Word32 Rt)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Wh_vtmpyacc_WhWubRb(Vxx,Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vtmpybus_acc)(Vxx,Vuu,Rt)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vdd32.w=vtmpy(Vuu32.h,Rt32.b)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vtmpy_WhRb(HVX_VectorPair Vuu, Word32 Rt)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Ww_vtmpy_WhRb(Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vtmpyhb)(Vuu,Rt)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vxx32.w+=vtmpy(Vuu32.h,Rt32.b)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vtmpyacc_WwWhRb(HVX_VectorPair Vxx, HVX_VectorPair Vuu, Word32 Rt)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Ww_vtmpyacc_WwWhRb(Vxx,Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vtmpyhb_acc)(Vxx,Vuu,Rt)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vdd32.h=vunpack(Vu32.b)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vunpack_Vb(HVX_Vector Vu)
+   Instruction Type:      CVI_VP_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Wh_vunpack_Vb(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vunpackb)(Vu)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vdd32.w=vunpack(Vu32.h)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vunpack_Vh(HVX_Vector Vu)
+   Instruction Type:      CVI_VP_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Ww_vunpack_Vh(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vunpackh)(Vu)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vxx32.h|=vunpacko(Vu32.b)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vunpackoor_WhVb(HVX_VectorPair Vxx, HVX_Vector Vu)
+   Instruction Type:      CVI_VP_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Wh_vunpackoor_WhVb(Vxx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vunpackob)(Vxx,Vu)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vxx32.w|=vunpacko(Vu32.h)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vunpackoor_WwVh(HVX_VectorPair Vxx, HVX_Vector Vu)
+   Instruction Type:      CVI_VP_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Ww_vunpackoor_WwVh(Vxx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vunpackoh)(Vxx,Vu)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vdd32.uh=vunpack(Vu32.ub)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Wuh_vunpack_Vub(HVX_Vector Vu)
+   Instruction Type:      CVI_VP_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Wuh_vunpack_Vub(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vunpackub)(Vu)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vdd32.uw=vunpack(Vu32.uh)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Wuw_vunpack_Vuh(HVX_Vector Vu)
+   Instruction Type:      CVI_VP_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Wuw_vunpack_Vuh(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vunpackuh)(Vu)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vd32=vxor(Vu32,Vv32)
+   C Intrinsic Prototype: HVX_Vector Q6_V_vxor_VV(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_V_vxor_VV(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vxor)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vdd32.uh=vzxt(Vu32.ub)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Wuh_vzxt_Vub(HVX_Vector Vu)
+   Instruction Type:      CVI_VA_DV
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Wuh_vzxt_Vub(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vzb)(Vu)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 60
+/* ==========================================================================
+   Assembly Syntax:       Vdd32.uw=vzxt(Vu32.uh)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Wuw_vzxt_Vuh(HVX_Vector Vu)
+   Instruction Type:      CVI_VA_DV
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Wuw_vzxt_Vuh(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vzh)(Vu)
+#endif /* __HEXAGON_ARCH___ >= 60 */
+
+#if __HVX_ARCH__ >= 62
+/* ==========================================================================
+   Assembly Syntax:       Vd32.b=vsplat(Rt32)
+   C Intrinsic Prototype: HVX_Vector Q6_Vb_vsplat_R(Word32 Rt)
+   Instruction Type:      CVI_VX_LATE
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Vb_vsplat_R(Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_lvsplatb)(Rt)
+#endif /* __HEXAGON_ARCH___ >= 62 */
+
+#if __HVX_ARCH__ >= 62
+/* ==========================================================================
+   Assembly Syntax:       Vd32.h=vsplat(Rt32)
+   C Intrinsic Prototype: HVX_Vector Q6_Vh_vsplat_R(Word32 Rt)
+   Instruction Type:      CVI_VX_LATE
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Vh_vsplat_R(Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_lvsplath)(Rt)
+#endif /* __HEXAGON_ARCH___ >= 62 */
+
+#if __HVX_ARCH__ >= 62
+/* ==========================================================================
+   Assembly Syntax:       Qd4=vsetq2(Rt32)
+   C Intrinsic Prototype: HVX_VectorPred Q6_Q_vsetq2_R(Word32 Rt)
+   Instruction Type:      CVI_VP
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Q_vsetq2_R(Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_pred_scalar2v2)(Rt)),-1)
+#endif /* __HEXAGON_ARCH___ >= 62 */
+
+#if __HVX_ARCH__ >= 62
+/* ==========================================================================
+   Assembly Syntax:       Qd4.b=vshuffe(Qs4.h,Qt4.h)
+   C Intrinsic Prototype: HVX_VectorPred Q6_Qb_vshuffe_QhQh(HVX_VectorPred Qs, HVX_VectorPred Qt)
+   Instruction Type:      CVI_VA_DV
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Qb_vshuffe_QhQh(Qs,Qt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_shuffeqh)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1),__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qt),-1))),-1)
+#endif /* __HEXAGON_ARCH___ >= 62 */
+
+#if __HVX_ARCH__ >= 62
+/* ==========================================================================
+   Assembly Syntax:       Qd4.h=vshuffe(Qs4.w,Qt4.w)
+   C Intrinsic Prototype: HVX_VectorPred Q6_Qh_vshuffe_QwQw(HVX_VectorPred Qs, HVX_VectorPred Qt)
+   Instruction Type:      CVI_VA_DV
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Qh_vshuffe_QwQw(Qs,Qt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_shuffeqw)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1),__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qt),-1))),-1)
+#endif /* __HEXAGON_ARCH___ >= 62 */
+
+#if __HVX_ARCH__ >= 62
+/* ==========================================================================
+   Assembly Syntax:       Vd32.b=vadd(Vu32.b,Vv32.b):sat
+   C Intrinsic Prototype: HVX_Vector Q6_Vb_vadd_VbVb_sat(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vb_vadd_VbVb_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddbsat)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 62 */
+
+#if __HVX_ARCH__ >= 62
+/* ==========================================================================
+   Assembly Syntax:       Vdd32.b=vadd(Vuu32.b,Vvv32.b):sat
+   C Intrinsic Prototype: HVX_VectorPair Q6_Wb_vadd_WbWb_sat(HVX_VectorPair Vuu, HVX_VectorPair Vvv)
+   Instruction Type:      CVI_VA_DV
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Wb_vadd_WbWb_sat(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddbsat_dv)(Vuu,Vvv)
+#endif /* __HEXAGON_ARCH___ >= 62 */
+
+#if __HVX_ARCH__ >= 62
+/* ==========================================================================
+   Assembly Syntax:       Vd32.w=vadd(Vu32.w,Vv32.w,Qx4):carry
+   C Intrinsic Prototype: HVX_Vector Q6_Vw_vadd_VwVwQ_carry(HVX_Vector Vu, HVX_Vector Vv, HVX_VectorPred* Qx)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vw_vadd_VwVwQ_carry(Vu,Vv,Qx) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddcarry)(Vu,Vv,Qx)
+#endif /* __HEXAGON_ARCH___ >= 62 */
+
+#if __HVX_ARCH__ >= 62
+/* ==========================================================================
+   Assembly Syntax:       Vd32.h=vadd(vclb(Vu32.h),Vv32.h)
+   C Intrinsic Prototype: HVX_Vector Q6_Vh_vadd_vclb_VhVh(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vh_vadd_vclb_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddclbh)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 62 */
+
+#if __HVX_ARCH__ >= 62
+/* ==========================================================================
+   Assembly Syntax:       Vd32.w=vadd(vclb(Vu32.w),Vv32.w)
+   C Intrinsic Prototype: HVX_Vector Q6_Vw_vadd_vclb_VwVw(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vw_vadd_vclb_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddclbw)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 62 */
+
+#if __HVX_ARCH__ >= 62
+/* ==========================================================================
+   Assembly Syntax:       Vxx32.w+=vadd(Vu32.h,Vv32.h)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vaddacc_WwVhVh(HVX_VectorPair Vxx, HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Ww_vaddacc_WwVhVh(Vxx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddhw_acc)(Vxx,Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 62 */
+
+#if __HVX_ARCH__ >= 62
+/* ==========================================================================
+   Assembly Syntax:       Vxx32.h+=vadd(Vu32.ub,Vv32.ub)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vaddacc_WhVubVub(HVX_VectorPair Vxx, HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Wh_vaddacc_WhVubVub(Vxx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddubh_acc)(Vxx,Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 62 */
+
+#if __HVX_ARCH__ >= 62
+/* ==========================================================================
+   Assembly Syntax:       Vd32.ub=vadd(Vu32.ub,Vv32.b):sat
+   C Intrinsic Prototype: HVX_Vector Q6_Vub_vadd_VubVb_sat(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vub_vadd_VubVb_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddububb_sat)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 62 */
+
+#if __HVX_ARCH__ >= 62
+/* ==========================================================================
+   Assembly Syntax:       Vxx32.w+=vadd(Vu32.uh,Vv32.uh)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vaddacc_WwVuhVuh(HVX_VectorPair Vxx, HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Ww_vaddacc_WwVuhVuh(Vxx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadduhw_acc)(Vxx,Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 62 */
+
+#if __HVX_ARCH__ >= 62
+/* ==========================================================================
+   Assembly Syntax:       Vd32.uw=vadd(Vu32.uw,Vv32.uw):sat
+   C Intrinsic Prototype: HVX_Vector Q6_Vuw_vadd_VuwVuw_sat(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vuw_vadd_VuwVuw_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadduwsat)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 62 */
+
+#if __HVX_ARCH__ >= 62
+/* ==========================================================================
+   Assembly Syntax:       Vdd32.uw=vadd(Vuu32.uw,Vvv32.uw):sat
+   C Intrinsic Prototype: HVX_VectorPair Q6_Wuw_vadd_WuwWuw_sat(HVX_VectorPair Vuu, HVX_VectorPair Vvv)
+   Instruction Type:      CVI_VA_DV
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Wuw_vadd_WuwWuw_sat(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadduwsat_dv)(Vuu,Vvv)
+#endif /* __HEXAGON_ARCH___ >= 62 */
+
+#if __HVX_ARCH__ >= 62
+/* ==========================================================================
+   Assembly Syntax:       Vd32=vand(!Qu4,Rt32)
+   C Intrinsic Prototype: HVX_Vector Q6_V_vand_QnR(HVX_VectorPred Qu, Word32 Rt)
+   Instruction Type:      CVI_VX_LATE
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_V_vand_QnR(Qu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandnqrt)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qu),-1),Rt)
+#endif /* __HEXAGON_ARCH___ >= 62 */
+
+#if __HVX_ARCH__ >= 62
+/* ==========================================================================
+   Assembly Syntax:       Vx32|=vand(!Qu4,Rt32)
+   C Intrinsic Prototype: HVX_Vector Q6_V_vandor_VQnR(HVX_Vector Vx, HVX_VectorPred Qu, Word32 Rt)
+   Instruction Type:      CVI_VX_LATE
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_V_vandor_VQnR(Vx,Qu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandnqrt_acc)(Vx,__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qu),-1),Rt)
+#endif /* __HEXAGON_ARCH___ >= 62 */
+
+#if __HVX_ARCH__ >= 62
+/* ==========================================================================
+   Assembly Syntax:       Vd32=vand(!Qv4,Vu32)
+   C Intrinsic Prototype: HVX_Vector Q6_V_vand_QnV(HVX_VectorPred Qv, HVX_Vector Vu)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_V_vand_QnV(Qv,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvnqv)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vu)
+#endif /* __HEXAGON_ARCH___ >= 62 */
+
+#if __HVX_ARCH__ >= 62
+/* ==========================================================================
+   Assembly Syntax:       Vd32=vand(Qv4,Vu32)
+   C Intrinsic Prototype: HVX_Vector Q6_V_vand_QV(HVX_VectorPred Qv, HVX_Vector Vu)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_V_vand_QV(Qv,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvqv)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vu)
+#endif /* __HEXAGON_ARCH___ >= 62 */
+
+#if __HVX_ARCH__ >= 62
+/* ==========================================================================
+   Assembly Syntax:       Vd32.b=vasr(Vu32.h,Vv32.h,Rt8):sat
+   C Intrinsic Prototype: HVX_Vector Q6_Vb_vasr_VhVhR_sat(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vb_vasr_VhVhR_sat(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrhbsat)(Vu,Vv,Rt)
+#endif /* __HEXAGON_ARCH___ >= 62 */
+
+#if __HVX_ARCH__ >= 62
+/* ==========================================================================
+   Assembly Syntax:       Vd32.uh=vasr(Vu32.uw,Vv32.uw,Rt8):rnd:sat
+   C Intrinsic Prototype: HVX_Vector Q6_Vuh_vasr_VuwVuwR_rnd_sat(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vuh_vasr_VuwVuwR_rnd_sat(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasruwuhrndsat)(Vu,Vv,Rt)
+#endif /* __HEXAGON_ARCH___ >= 62 */
+
+#if __HVX_ARCH__ >= 62
+/* ==========================================================================
+   Assembly Syntax:       Vd32.uh=vasr(Vu32.w,Vv32.w,Rt8):rnd:sat
+   C Intrinsic Prototype: HVX_Vector Q6_Vuh_vasr_VwVwR_rnd_sat(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vuh_vasr_VwVwR_rnd_sat(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrwuhrndsat)(Vu,Vv,Rt)
+#endif /* __HEXAGON_ARCH___ >= 62 */
+
+#if __HVX_ARCH__ >= 62
+/* ==========================================================================
+   Assembly Syntax:       Vd32.ub=vlsr(Vu32.ub,Rt32)
+   C Intrinsic Prototype: HVX_Vector Q6_Vub_vlsr_VubR(HVX_Vector Vu, Word32 Rt)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vub_vlsr_VubR(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlsrb)(Vu,Rt)
+#endif /* __HEXAGON_ARCH___ >= 62 */
+
+#if __HVX_ARCH__ >= 62
+/* ==========================================================================
+   Assembly Syntax:       Vd32.b=vlut32(Vu32.b,Vv32.b,Rt8):nomatch
+   C Intrinsic Prototype: HVX_Vector Q6_Vb_vlut32_VbVbR_nomatch(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt)
+   Instruction Type:      CVI_VP
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vb_vlut32_VbVbR_nomatch(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvvb_nm)(Vu,Vv,Rt)
+#endif /* __HEXAGON_ARCH___ >= 62 */
+
+#if __HVX_ARCH__ >= 62
+/* ==========================================================================
+   Assembly Syntax:       Vx32.b|=vlut32(Vu32.b,Vv32.b,#u3)
+   C Intrinsic Prototype: HVX_Vector Q6_Vb_vlut32or_VbVbVbI(HVX_Vector Vx, HVX_Vector Vu, HVX_Vector Vv, Word32 Iu3)
+   Instruction Type:      CVI_VP_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vb_vlut32or_VbVbVbI(Vx,Vu,Vv,Iu3) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvvb_oracci)(Vx,Vu,Vv,Iu3)
+#endif /* __HEXAGON_ARCH___ >= 62 */
+
+#if __HVX_ARCH__ >= 62
+/* ==========================================================================
+   Assembly Syntax:       Vd32.b=vlut32(Vu32.b,Vv32.b,#u3)
+   C Intrinsic Prototype: HVX_Vector Q6_Vb_vlut32_VbVbI(HVX_Vector Vu, HVX_Vector Vv, Word32 Iu3)
+   Instruction Type:      CVI_VP
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vb_vlut32_VbVbI(Vu,Vv,Iu3) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvvbi)(Vu,Vv,Iu3)
+#endif /* __HEXAGON_ARCH___ >= 62 */
+
+#if __HVX_ARCH__ >= 62
+/* ==========================================================================
+   Assembly Syntax:       Vdd32.h=vlut16(Vu32.b,Vv32.h,Rt8):nomatch
+   C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vlut16_VbVhR_nomatch(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt)
+   Instruction Type:      CVI_VP_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Wh_vlut16_VbVhR_nomatch(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvwh_nm)(Vu,Vv,Rt)
+#endif /* __HEXAGON_ARCH___ >= 62 */
+
+#if __HVX_ARCH__ >= 62
+/* ==========================================================================
+   Assembly Syntax:       Vxx32.h|=vlut16(Vu32.b,Vv32.h,#u3)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vlut16or_WhVbVhI(HVX_VectorPair Vxx, HVX_Vector Vu, HVX_Vector Vv, Word32 Iu3)
+   Instruction Type:      CVI_VP_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Wh_vlut16or_WhVbVhI(Vxx,Vu,Vv,Iu3) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvwh_oracci)(Vxx,Vu,Vv,Iu3)
+#endif /* __HEXAGON_ARCH___ >= 62 */
+
+#if __HVX_ARCH__ >= 62
+/* ==========================================================================
+   Assembly Syntax:       Vdd32.h=vlut16(Vu32.b,Vv32.h,#u3)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vlut16_VbVhI(HVX_Vector Vu, HVX_Vector Vv, Word32 Iu3)
+   Instruction Type:      CVI_VP_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Wh_vlut16_VbVhI(Vu,Vv,Iu3) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvwhi)(Vu,Vv,Iu3)
+#endif /* __HEXAGON_ARCH___ >= 62 */
+
+#if __HVX_ARCH__ >= 62
+/* ==========================================================================
+   Assembly Syntax:       Vd32.b=vmax(Vu32.b,Vv32.b)
+   C Intrinsic Prototype: HVX_Vector Q6_Vb_vmax_VbVb(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vb_vmax_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmaxb)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 62 */
+
+#if __HVX_ARCH__ >= 62
+/* ==========================================================================
+   Assembly Syntax:       Vd32.b=vmin(Vu32.b,Vv32.b)
+   C Intrinsic Prototype: HVX_Vector Q6_Vb_vmin_VbVb(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vb_vmin_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vminb)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 62 */
+
+#if __HVX_ARCH__ >= 62
+/* ==========================================================================
+   Assembly Syntax:       Vdd32.w=vmpa(Vuu32.uh,Rt32.b)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vmpa_WuhRb(HVX_VectorPair Vuu, Word32 Rt)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Ww_vmpa_WuhRb(Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpauhb)(Vuu,Rt)
+#endif /* __HEXAGON_ARCH___ >= 62 */
+
+#if __HVX_ARCH__ >= 62
+/* ==========================================================================
+   Assembly Syntax:       Vxx32.w+=vmpa(Vuu32.uh,Rt32.b)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vmpaacc_WwWuhRb(HVX_VectorPair Vxx, HVX_VectorPair Vuu, Word32 Rt)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Ww_vmpaacc_WwWuhRb(Vxx,Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpauhb_acc)(Vxx,Vuu,Rt)
+#endif /* __HEXAGON_ARCH___ >= 62 */
+
+#if __HVX_ARCH__ >= 62
+/* ==========================================================================
+   Assembly Syntax:       Vdd32=vmpye(Vu32.w,Vv32.uh)
+   C Intrinsic Prototype: HVX_VectorPair Q6_W_vmpye_VwVuh(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_W_vmpye_VwVuh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyewuh_64)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 62 */
+
+#if __HVX_ARCH__ >= 62
+/* ==========================================================================
+   Assembly Syntax:       Vd32.w=vmpyi(Vu32.w,Rt32.ub)
+   C Intrinsic Prototype: HVX_Vector Q6_Vw_vmpyi_VwRub(HVX_Vector Vu, Word32 Rt)
+   Instruction Type:      CVI_VX
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Vw_vmpyi_VwRub(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiwub)(Vu,Rt)
+#endif /* __HEXAGON_ARCH___ >= 62 */
+
+#if __HVX_ARCH__ >= 62
+/* ==========================================================================
+   Assembly Syntax:       Vx32.w+=vmpyi(Vu32.w,Rt32.ub)
+   C Intrinsic Prototype: HVX_Vector Q6_Vw_vmpyiacc_VwVwRub(HVX_Vector Vx, HVX_Vector Vu, Word32 Rt)
+   Instruction Type:      CVI_VX
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Vw_vmpyiacc_VwVwRub(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiwub_acc)(Vx,Vu,Rt)
+#endif /* __HEXAGON_ARCH___ >= 62 */
+
+#if __HVX_ARCH__ >= 62
+/* ==========================================================================
+   Assembly Syntax:       Vxx32+=vmpyo(Vu32.w,Vv32.h)
+   C Intrinsic Prototype: HVX_VectorPair Q6_W_vmpyoacc_WVwVh(HVX_VectorPair Vxx, HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_W_vmpyoacc_WVwVh(Vxx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyowh_64_acc)(Vxx,Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 62 */
+
+#if __HVX_ARCH__ >= 62
+/* ==========================================================================
+   Assembly Syntax:       Vd32.ub=vround(Vu32.uh,Vv32.uh):sat
+   C Intrinsic Prototype: HVX_Vector Q6_Vub_vround_VuhVuh_sat(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vub_vround_VuhVuh_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrounduhub)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 62 */
+
+#if __HVX_ARCH__ >= 62
+/* ==========================================================================
+   Assembly Syntax:       Vd32.uh=vround(Vu32.uw,Vv32.uw):sat
+   C Intrinsic Prototype: HVX_Vector Q6_Vuh_vround_VuwVuw_sat(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vuh_vround_VuwVuw_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrounduwuh)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 62 */
+
+#if __HVX_ARCH__ >= 62
+/* ==========================================================================
+   Assembly Syntax:       Vd32.uh=vsat(Vu32.uw,Vv32.uw)
+   C Intrinsic Prototype: HVX_Vector Q6_Vuh_vsat_VuwVuw(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vuh_vsat_VuwVuw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsatuwuh)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 62 */
+
+#if __HVX_ARCH__ >= 62
+/* ==========================================================================
+   Assembly Syntax:       Vd32.b=vsub(Vu32.b,Vv32.b):sat
+   C Intrinsic Prototype: HVX_Vector Q6_Vb_vsub_VbVb_sat(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vb_vsub_VbVb_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubbsat)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 62 */
+
+#if __HVX_ARCH__ >= 62
+/* ==========================================================================
+   Assembly Syntax:       Vdd32.b=vsub(Vuu32.b,Vvv32.b):sat
+   C Intrinsic Prototype: HVX_VectorPair Q6_Wb_vsub_WbWb_sat(HVX_VectorPair Vuu, HVX_VectorPair Vvv)
+   Instruction Type:      CVI_VA_DV
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Wb_vsub_WbWb_sat(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubbsat_dv)(Vuu,Vvv)
+#endif /* __HEXAGON_ARCH___ >= 62 */
+
+#if __HVX_ARCH__ >= 62
+/* ==========================================================================
+   Assembly Syntax:       Vd32.w=vsub(Vu32.w,Vv32.w,Qx4):carry
+   C Intrinsic Prototype: HVX_Vector Q6_Vw_vsub_VwVwQ_carry(HVX_Vector Vu, HVX_Vector Vv, HVX_VectorPred* Qx)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vw_vsub_VwVwQ_carry(Vu,Vv,Qx) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubcarry)(Vu,Vv,Qx)
+#endif /* __HEXAGON_ARCH___ >= 62 */
+
+#if __HVX_ARCH__ >= 62
+/* ==========================================================================
+   Assembly Syntax:       Vd32.ub=vsub(Vu32.ub,Vv32.b):sat
+   C Intrinsic Prototype: HVX_Vector Q6_Vub_vsub_VubVb_sat(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vub_vsub_VubVb_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubububb_sat)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 62 */
+
+#if __HVX_ARCH__ >= 62
+/* ==========================================================================
+   Assembly Syntax:       Vd32.uw=vsub(Vu32.uw,Vv32.uw):sat
+   C Intrinsic Prototype: HVX_Vector Q6_Vuw_vsub_VuwVuw_sat(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vuw_vsub_VuwVuw_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubuwsat)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 62 */
+
+#if __HVX_ARCH__ >= 62
+/* ==========================================================================
+   Assembly Syntax:       Vdd32.uw=vsub(Vuu32.uw,Vvv32.uw):sat
+   C Intrinsic Prototype: HVX_VectorPair Q6_Wuw_vsub_WuwWuw_sat(HVX_VectorPair Vuu, HVX_VectorPair Vvv)
+   Instruction Type:      CVI_VA_DV
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Wuw_vsub_WuwWuw_sat(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubuwsat_dv)(Vuu,Vvv)
+#endif /* __HEXAGON_ARCH___ >= 62 */
+
+#if __HVX_ARCH__ >= 65
+/* ==========================================================================
+   Assembly Syntax:       Vd32.b=vabs(Vu32.b)
+   C Intrinsic Prototype: HVX_Vector Q6_Vb_vabs_Vb(HVX_Vector Vu)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vb_vabs_Vb(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsb)(Vu)
+#endif /* __HEXAGON_ARCH___ >= 65 */
+
+#if __HVX_ARCH__ >= 65
+/* ==========================================================================
+   Assembly Syntax:       Vd32.b=vabs(Vu32.b):sat
+   C Intrinsic Prototype: HVX_Vector Q6_Vb_vabs_Vb_sat(HVX_Vector Vu)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vb_vabs_Vb_sat(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsb_sat)(Vu)
+#endif /* __HEXAGON_ARCH___ >= 65 */
+
+#if __HVX_ARCH__ >= 65
+/* ==========================================================================
+   Assembly Syntax:       Vx32.h+=vasl(Vu32.h,Rt32)
+   C Intrinsic Prototype: HVX_Vector Q6_Vh_vaslacc_VhVhR(HVX_Vector Vx, HVX_Vector Vu, Word32 Rt)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vh_vaslacc_VhVhR(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaslh_acc)(Vx,Vu,Rt)
+#endif /* __HEXAGON_ARCH___ >= 65 */
+
+#if __HVX_ARCH__ >= 65
+/* ==========================================================================
+   Assembly Syntax:       Vx32.h+=vasr(Vu32.h,Rt32)
+   C Intrinsic Prototype: HVX_Vector Q6_Vh_vasracc_VhVhR(HVX_Vector Vx, HVX_Vector Vu, Word32 Rt)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vh_vasracc_VhVhR(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrh_acc)(Vx,Vu,Rt)
+#endif /* __HEXAGON_ARCH___ >= 65 */
+
+#if __HVX_ARCH__ >= 65
+/* ==========================================================================
+   Assembly Syntax:       Vd32.ub=vasr(Vu32.uh,Vv32.uh,Rt8):rnd:sat
+   C Intrinsic Prototype: HVX_Vector Q6_Vub_vasr_VuhVuhR_rnd_sat(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vub_vasr_VuhVuhR_rnd_sat(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasruhubrndsat)(Vu,Vv,Rt)
+#endif /* __HEXAGON_ARCH___ >= 65 */
+
+#if __HVX_ARCH__ >= 65
+/* ==========================================================================
+   Assembly Syntax:       Vd32.ub=vasr(Vu32.uh,Vv32.uh,Rt8):sat
+   C Intrinsic Prototype: HVX_Vector Q6_Vub_vasr_VuhVuhR_sat(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vub_vasr_VuhVuhR_sat(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasruhubsat)(Vu,Vv,Rt)
+#endif /* __HEXAGON_ARCH___ >= 65 */
+
+#if __HVX_ARCH__ >= 65
+/* ==========================================================================
+   Assembly Syntax:       Vd32.uh=vasr(Vu32.uw,Vv32.uw,Rt8):sat
+   C Intrinsic Prototype: HVX_Vector Q6_Vuh_vasr_VuwVuwR_sat(HVX_Vector Vu, HVX_Vector Vv, Word32 Rt)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vuh_vasr_VuwVuwR_sat(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasruwuhsat)(Vu,Vv,Rt)
+#endif /* __HEXAGON_ARCH___ >= 65 */
+
+#if __HVX_ARCH__ >= 65
+/* ==========================================================================
+   Assembly Syntax:       Vd32.b=vavg(Vu32.b,Vv32.b)
+   C Intrinsic Prototype: HVX_Vector Q6_Vb_vavg_VbVb(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vb_vavg_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavgb)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 65 */
+
+#if __HVX_ARCH__ >= 65
+/* ==========================================================================
+   Assembly Syntax:       Vd32.b=vavg(Vu32.b,Vv32.b):rnd
+   C Intrinsic Prototype: HVX_Vector Q6_Vb_vavg_VbVb_rnd(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vb_vavg_VbVb_rnd(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavgbrnd)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 65 */
+
+#if __HVX_ARCH__ >= 65
+/* ==========================================================================
+   Assembly Syntax:       Vd32.uw=vavg(Vu32.uw,Vv32.uw)
+   C Intrinsic Prototype: HVX_Vector Q6_Vuw_vavg_VuwVuw(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vuw_vavg_VuwVuw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavguw)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 65 */
+
+#if __HVX_ARCH__ >= 65
+/* ==========================================================================
+   Assembly Syntax:       Vd32.uw=vavg(Vu32.uw,Vv32.uw):rnd
+   C Intrinsic Prototype: HVX_Vector Q6_Vuw_vavg_VuwVuw_rnd(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vuw_vavg_VuwVuw_rnd(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavguwrnd)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 65 */
+
+#if __HVX_ARCH__ >= 65
+/* ==========================================================================
+   Assembly Syntax:       Vdd32=#0
+   C Intrinsic Prototype: HVX_VectorPair Q6_W_vzero()
+   Instruction Type:      MAPPING
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_W_vzero() __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdd0)()
+#endif /* __HEXAGON_ARCH___ >= 65 */
+
+#if __HVX_ARCH__ >= 65
+/* ==========================================================================
+   Assembly Syntax:       vtmp.h=vgather(Rt32,Mu2,Vv32.h).h
+   C Intrinsic Prototype: void Q6_vgather_ARMVh(HVX_Vector* Rs, Word32 Rt, Word32 Mu, HVX_Vector Vv)
+   Instruction Type:      CVI_GATHER
+   Execution Slots:       SLOT01
+   ========================================================================== */
+
+#define Q6_vgather_ARMVh(Rs,Rt,Mu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgathermh)(Rs,Rt,Mu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 65 */
+
+#if __HVX_ARCH__ >= 65
+/* ==========================================================================
+   Assembly Syntax:       if (Qs4) vtmp.h=vgather(Rt32,Mu2,Vv32.h).h
+   C Intrinsic Prototype: void Q6_vgather_AQRMVh(HVX_Vector* Rs, HVX_VectorPred Qs, Word32 Rt, Word32 Mu, HVX_Vector Vv)
+   Instruction Type:      CVI_GATHER
+   Execution Slots:       SLOT01
+   ========================================================================== */
+
+#define Q6_vgather_AQRMVh(Rs,Qs,Rt,Mu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgathermhq)(Rs,__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1),Rt,Mu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 65 */
+
+#if __HVX_ARCH__ >= 65
+/* ==========================================================================
+   Assembly Syntax:       vtmp.h=vgather(Rt32,Mu2,Vvv32.w).h
+   C Intrinsic Prototype: void Q6_vgather_ARMWw(HVX_Vector* Rs, Word32 Rt, Word32 Mu, HVX_VectorPair Vvv)
+   Instruction Type:      CVI_GATHER_DV
+   Execution Slots:       SLOT01
+   ========================================================================== */
+
+#define Q6_vgather_ARMWw(Rs,Rt,Mu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgathermhw)(Rs,Rt,Mu,Vvv)
+#endif /* __HEXAGON_ARCH___ >= 65 */
+
+#if __HVX_ARCH__ >= 65
+/* ==========================================================================
+   Assembly Syntax:       if (Qs4) vtmp.h=vgather(Rt32,Mu2,Vvv32.w).h
+   C Intrinsic Prototype: void Q6_vgather_AQRMWw(HVX_Vector* Rs, HVX_VectorPred Qs, Word32 Rt, Word32 Mu, HVX_VectorPair Vvv)
+   Instruction Type:      CVI_GATHER_DV
+   Execution Slots:       SLOT01
+   ========================================================================== */
+
+#define Q6_vgather_AQRMWw(Rs,Qs,Rt,Mu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgathermhwq)(Rs,__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1),Rt,Mu,Vvv)
+#endif /* __HEXAGON_ARCH___ >= 65 */
+
+#if __HVX_ARCH__ >= 65
+/* ==========================================================================
+   Assembly Syntax:       vtmp.w=vgather(Rt32,Mu2,Vv32.w).w
+   C Intrinsic Prototype: void Q6_vgather_ARMVw(HVX_Vector* Rs, Word32 Rt, Word32 Mu, HVX_Vector Vv)
+   Instruction Type:      CVI_GATHER
+   Execution Slots:       SLOT01
+   ========================================================================== */
+
+#define Q6_vgather_ARMVw(Rs,Rt,Mu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgathermw)(Rs,Rt,Mu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 65 */
+
+#if __HVX_ARCH__ >= 65
+/* ==========================================================================
+   Assembly Syntax:       if (Qs4) vtmp.w=vgather(Rt32,Mu2,Vv32.w).w
+   C Intrinsic Prototype: void Q6_vgather_AQRMVw(HVX_Vector* Rs, HVX_VectorPred Qs, Word32 Rt, Word32 Mu, HVX_Vector Vv)
+   Instruction Type:      CVI_GATHER
+   Execution Slots:       SLOT01
+   ========================================================================== */
+
+#define Q6_vgather_AQRMVw(Rs,Qs,Rt,Mu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgathermwq)(Rs,__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1),Rt,Mu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 65 */
+
+#if __HVX_ARCH__ >= 65
+/* ==========================================================================
+   Assembly Syntax:       Vd32.h=vlut4(Vu32.uh,Rtt32.h)
+   C Intrinsic Prototype: HVX_Vector Q6_Vh_vlut4_VuhPh(HVX_Vector Vu, Word64 Rtt)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT2
+   ========================================================================== */
+
+#define Q6_Vh_vlut4_VuhPh(Vu,Rtt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlut4)(Vu,Rtt)
+#endif /* __HEXAGON_ARCH___ >= 65 */
+
+#if __HVX_ARCH__ >= 65
+/* ==========================================================================
+   Assembly Syntax:       Vdd32.h=vmpa(Vuu32.ub,Rt32.ub)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vmpa_WubRub(HVX_VectorPair Vuu, Word32 Rt)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Wh_vmpa_WubRub(Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpabuu)(Vuu,Rt)
+#endif /* __HEXAGON_ARCH___ >= 65 */
+
+#if __HVX_ARCH__ >= 65
+/* ==========================================================================
+   Assembly Syntax:       Vxx32.h+=vmpa(Vuu32.ub,Rt32.ub)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Wh_vmpaacc_WhWubRub(HVX_VectorPair Vxx, HVX_VectorPair Vuu, Word32 Rt)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Wh_vmpaacc_WhWubRub(Vxx,Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpabuu_acc)(Vxx,Vuu,Rt)
+#endif /* __HEXAGON_ARCH___ >= 65 */
+
+#if __HVX_ARCH__ >= 65
+/* ==========================================================================
+   Assembly Syntax:       Vx32.h=vmpa(Vx32.h,Vu32.h,Rtt32.h):sat
+   C Intrinsic Prototype: HVX_Vector Q6_Vh_vmpa_VhVhVhPh_sat(HVX_Vector Vx, HVX_Vector Vu, Word64 Rtt)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT2
+   ========================================================================== */
+
+#define Q6_Vh_vmpa_VhVhVhPh_sat(Vx,Vu,Rtt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpahhsat)(Vx,Vu,Rtt)
+#endif /* __HEXAGON_ARCH___ >= 65 */
+
+#if __HVX_ARCH__ >= 65
+/* ==========================================================================
+   Assembly Syntax:       Vx32.h=vmpa(Vx32.h,Vu32.uh,Rtt32.uh):sat
+   C Intrinsic Prototype: HVX_Vector Q6_Vh_vmpa_VhVhVuhPuh_sat(HVX_Vector Vx, HVX_Vector Vu, Word64 Rtt)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT2
+   ========================================================================== */
+
+#define Q6_Vh_vmpa_VhVhVuhPuh_sat(Vx,Vu,Rtt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpauhuhsat)(Vx,Vu,Rtt)
+#endif /* __HEXAGON_ARCH___ >= 65 */
+
+#if __HVX_ARCH__ >= 65
+/* ==========================================================================
+   Assembly Syntax:       Vx32.h=vmps(Vx32.h,Vu32.uh,Rtt32.uh):sat
+   C Intrinsic Prototype: HVX_Vector Q6_Vh_vmps_VhVhVuhPuh_sat(HVX_Vector Vx, HVX_Vector Vu, Word64 Rtt)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT2
+   ========================================================================== */
+
+#define Q6_Vh_vmps_VhVhVuhPuh_sat(Vx,Vu,Rtt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpsuhuhsat)(Vx,Vu,Rtt)
+#endif /* __HEXAGON_ARCH___ >= 65 */
+
+#if __HVX_ARCH__ >= 65
+/* ==========================================================================
+   Assembly Syntax:       Vxx32.w+=vmpy(Vu32.h,Rt32.h)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vmpyacc_WwVhRh(HVX_VectorPair Vxx, HVX_Vector Vu, Word32 Rt)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Ww_vmpyacc_WwVhRh(Vxx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyh_acc)(Vxx,Vu,Rt)
+#endif /* __HEXAGON_ARCH___ >= 65 */
+
+#if __HVX_ARCH__ >= 65
+/* ==========================================================================
+   Assembly Syntax:       Vd32.uw=vmpye(Vu32.uh,Rt32.uh)
+   C Intrinsic Prototype: HVX_Vector Q6_Vuw_vmpye_VuhRuh(HVX_Vector Vu, Word32 Rt)
+   Instruction Type:      CVI_VX
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Vuw_vmpye_VuhRuh(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyuhe)(Vu,Rt)
+#endif /* __HEXAGON_ARCH___ >= 65 */
+
+#if __HVX_ARCH__ >= 65
+/* ==========================================================================
+   Assembly Syntax:       Vx32.uw+=vmpye(Vu32.uh,Rt32.uh)
+   C Intrinsic Prototype: HVX_Vector Q6_Vuw_vmpyeacc_VuwVuhRuh(HVX_Vector Vx, HVX_Vector Vu, Word32 Rt)
+   Instruction Type:      CVI_VX
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Vuw_vmpyeacc_VuwVuhRuh(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyuhe_acc)(Vx,Vu,Rt)
+#endif /* __HEXAGON_ARCH___ >= 65 */
+
+#if __HVX_ARCH__ >= 65
+/* ==========================================================================
+   Assembly Syntax:       Vd32.b=vnavg(Vu32.b,Vv32.b)
+   C Intrinsic Prototype: HVX_Vector Q6_Vb_vnavg_VbVb(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vb_vnavg_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vnavgb)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 65 */
+
+#if __HVX_ARCH__ >= 65
+/* ==========================================================================
+   Assembly Syntax:       Vd32.b=prefixsum(Qv4)
+   C Intrinsic Prototype: HVX_Vector Q6_Vb_prefixsum_Q(HVX_VectorPred Qv)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vb_prefixsum_Q(Qv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vprefixqb)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1))
+#endif /* __HEXAGON_ARCH___ >= 65 */
+
+#if __HVX_ARCH__ >= 65
+/* ==========================================================================
+   Assembly Syntax:       Vd32.h=prefixsum(Qv4)
+   C Intrinsic Prototype: HVX_Vector Q6_Vh_prefixsum_Q(HVX_VectorPred Qv)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vh_prefixsum_Q(Qv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vprefixqh)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1))
+#endif /* __HEXAGON_ARCH___ >= 65 */
+
+#if __HVX_ARCH__ >= 65
+/* ==========================================================================
+   Assembly Syntax:       Vd32.w=prefixsum(Qv4)
+   C Intrinsic Prototype: HVX_Vector Q6_Vw_prefixsum_Q(HVX_VectorPred Qv)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vw_prefixsum_Q(Qv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vprefixqw)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1))
+#endif /* __HEXAGON_ARCH___ >= 65 */
+
+#if __HVX_ARCH__ >= 65
+/* ==========================================================================
+   Assembly Syntax:       vscatter(Rt32,Mu2,Vv32.h).h=Vw32
+   C Intrinsic Prototype: void Q6_vscatter_RMVhV(Word32 Rt, Word32 Mu, HVX_Vector Vv, HVX_Vector Vw)
+   Instruction Type:      CVI_SCATTER
+   Execution Slots:       SLOT0
+   ========================================================================== */
+
+#define Q6_vscatter_RMVhV(Rt,Mu,Vv,Vw) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vscattermh)(Rt,Mu,Vv,Vw)
+#endif /* __HEXAGON_ARCH___ >= 65 */
+
+#if __HVX_ARCH__ >= 65
+/* ==========================================================================
+   Assembly Syntax:       vscatter(Rt32,Mu2,Vv32.h).h+=Vw32
+   C Intrinsic Prototype: void Q6_vscatteracc_RMVhV(Word32 Rt, Word32 Mu, HVX_Vector Vv, HVX_Vector Vw)
+   Instruction Type:      CVI_SCATTER
+   Execution Slots:       SLOT0
+   ========================================================================== */
+
+#define Q6_vscatteracc_RMVhV(Rt,Mu,Vv,Vw) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vscattermh_add)(Rt,Mu,Vv,Vw)
+#endif /* __HEXAGON_ARCH___ >= 65 */
+
+#if __HVX_ARCH__ >= 65
+/* ==========================================================================
+   Assembly Syntax:       if (Qs4) vscatter(Rt32,Mu2,Vv32.h).h=Vw32
+   C Intrinsic Prototype: void Q6_vscatter_QRMVhV(HVX_VectorPred Qs, Word32 Rt, Word32 Mu, HVX_Vector Vv, HVX_Vector Vw)
+   Instruction Type:      CVI_SCATTER
+   Execution Slots:       SLOT0
+   ========================================================================== */
+
+#define Q6_vscatter_QRMVhV(Qs,Rt,Mu,Vv,Vw) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vscattermhq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1),Rt,Mu,Vv,Vw)
+#endif /* __HEXAGON_ARCH___ >= 65 */
+
+#if __HVX_ARCH__ >= 65
+/* ==========================================================================
+   Assembly Syntax:       vscatter(Rt32,Mu2,Vvv32.w).h=Vw32
+   C Intrinsic Prototype: void Q6_vscatter_RMWwV(Word32 Rt, Word32 Mu, HVX_VectorPair Vvv, HVX_Vector Vw)
+   Instruction Type:      CVI_SCATTER_DV
+   Execution Slots:       SLOT0
+   ========================================================================== */
+
+#define Q6_vscatter_RMWwV(Rt,Mu,Vvv,Vw) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vscattermhw)(Rt,Mu,Vvv,Vw)
+#endif /* __HEXAGON_ARCH___ >= 65 */
+
+#if __HVX_ARCH__ >= 65
+/* ==========================================================================
+   Assembly Syntax:       vscatter(Rt32,Mu2,Vvv32.w).h+=Vw32
+   C Intrinsic Prototype: void Q6_vscatteracc_RMWwV(Word32 Rt, Word32 Mu, HVX_VectorPair Vvv, HVX_Vector Vw)
+   Instruction Type:      CVI_SCATTER_DV
+   Execution Slots:       SLOT0
+   ========================================================================== */
+
+#define Q6_vscatteracc_RMWwV(Rt,Mu,Vvv,Vw) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vscattermhw_add)(Rt,Mu,Vvv,Vw)
+#endif /* __HEXAGON_ARCH___ >= 65 */
+
+#if __HVX_ARCH__ >= 65
+/* ==========================================================================
+   Assembly Syntax:       if (Qs4) vscatter(Rt32,Mu2,Vvv32.w).h=Vw32
+   C Intrinsic Prototype: void Q6_vscatter_QRMWwV(HVX_VectorPred Qs, Word32 Rt, Word32 Mu, HVX_VectorPair Vvv, HVX_Vector Vw)
+   Instruction Type:      CVI_SCATTER_DV
+   Execution Slots:       SLOT0
+   ========================================================================== */
+
+#define Q6_vscatter_QRMWwV(Qs,Rt,Mu,Vvv,Vw) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vscattermhwq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1),Rt,Mu,Vvv,Vw)
+#endif /* __HEXAGON_ARCH___ >= 65 */
+
+#if __HVX_ARCH__ >= 65
+/* ==========================================================================
+   Assembly Syntax:       vscatter(Rt32,Mu2,Vv32.w).w=Vw32
+   C Intrinsic Prototype: void Q6_vscatter_RMVwV(Word32 Rt, Word32 Mu, HVX_Vector Vv, HVX_Vector Vw)
+   Instruction Type:      CVI_SCATTER
+   Execution Slots:       SLOT0
+   ========================================================================== */
+
+#define Q6_vscatter_RMVwV(Rt,Mu,Vv,Vw) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vscattermw)(Rt,Mu,Vv,Vw)
+#endif /* __HEXAGON_ARCH___ >= 65 */
+
+#if __HVX_ARCH__ >= 65
+/* ==========================================================================
+   Assembly Syntax:       vscatter(Rt32,Mu2,Vv32.w).w+=Vw32
+   C Intrinsic Prototype: void Q6_vscatteracc_RMVwV(Word32 Rt, Word32 Mu, HVX_Vector Vv, HVX_Vector Vw)
+   Instruction Type:      CVI_SCATTER
+   Execution Slots:       SLOT0
+   ========================================================================== */
+
+#define Q6_vscatteracc_RMVwV(Rt,Mu,Vv,Vw) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vscattermw_add)(Rt,Mu,Vv,Vw)
+#endif /* __HEXAGON_ARCH___ >= 65 */
+
+#if __HVX_ARCH__ >= 65
+/* ==========================================================================
+   Assembly Syntax:       if (Qs4) vscatter(Rt32,Mu2,Vv32.w).w=Vw32
+   C Intrinsic Prototype: void Q6_vscatter_QRMVwV(HVX_VectorPred Qs, Word32 Rt, Word32 Mu, HVX_Vector Vv, HVX_Vector Vw)
+   Instruction Type:      CVI_SCATTER
+   Execution Slots:       SLOT0
+   ========================================================================== */
+
+#define Q6_vscatter_QRMVwV(Qs,Rt,Mu,Vv,Vw) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vscattermwq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1),Rt,Mu,Vv,Vw)
+#endif /* __HEXAGON_ARCH___ >= 65 */
+
+#if __HVX_ARCH__ >= 66
+/* ==========================================================================
+   Assembly Syntax:       Vd32.w=vadd(Vu32.w,Vv32.w,Qs4):carry:sat
+   C Intrinsic Prototype: HVX_Vector Q6_Vw_vadd_VwVwQ_carry_sat(HVX_Vector Vu, HVX_Vector Vv, HVX_VectorPred Qs)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vw_vadd_VwVwQ_carry_sat(Vu,Vv,Qs) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddcarrysat)(Vu,Vv,__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1))
+#endif /* __HEXAGON_ARCH___ >= 66 */
+
+#if __HVX_ARCH__ >= 66
+/* ==========================================================================
+   Assembly Syntax:       Vxx32.w=vasrinto(Vu32.w,Vv32.w)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Ww_vasrinto_WwVwVw(HVX_VectorPair Vxx, HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VP_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Ww_vasrinto_WwVwVw(Vxx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasr_into)(Vxx,Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 66 */
+
+#if __HVX_ARCH__ >= 66
+/* ==========================================================================
+   Assembly Syntax:       Vd32.uw=vrotr(Vu32.uw,Vv32.uw)
+   C Intrinsic Prototype: HVX_Vector Q6_Vuw_vrotr_VuwVuw(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vuw_vrotr_VuwVuw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrotr)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 66 */
+
+#if __HVX_ARCH__ >= 66
+/* ==========================================================================
+   Assembly Syntax:       Vd32.w=vsatdw(Vu32.w,Vv32.w)
+   C Intrinsic Prototype: HVX_Vector Q6_Vw_vsatdw_VwVw(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vw_vsatdw_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsatdw)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 66 */
+
+#if __HVX_ARCH__ >= 68
+/* ==========================================================================
+   Assembly Syntax:       Vdd32.w=v6mpy(Vuu32.ub,Vvv32.b,#u2):h
+   C Intrinsic Prototype: HVX_VectorPair Q6_Ww_v6mpy_WubWbI_h(HVX_VectorPair Vuu, HVX_VectorPair Vvv, Word32 Iu2)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Ww_v6mpy_WubWbI_h(Vuu,Vvv,Iu2) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_v6mpyhubs10)(Vuu,Vvv,Iu2)
+#endif /* __HEXAGON_ARCH___ >= 68 */
+
+#if __HVX_ARCH__ >= 68
+/* ==========================================================================
+   Assembly Syntax:       Vxx32.w+=v6mpy(Vuu32.ub,Vvv32.b,#u2):h
+   C Intrinsic Prototype: HVX_VectorPair Q6_Ww_v6mpyacc_WwWubWbI_h(HVX_VectorPair Vxx, HVX_VectorPair Vuu, HVX_VectorPair Vvv, Word32 Iu2)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Ww_v6mpyacc_WwWubWbI_h(Vxx,Vuu,Vvv,Iu2) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_v6mpyhubs10_vxx)(Vxx,Vuu,Vvv,Iu2)
+#endif /* __HEXAGON_ARCH___ >= 68 */
+
+#if __HVX_ARCH__ >= 68
+/* ==========================================================================
+   Assembly Syntax:       Vdd32.w=v6mpy(Vuu32.ub,Vvv32.b,#u2):v
+   C Intrinsic Prototype: HVX_VectorPair Q6_Ww_v6mpy_WubWbI_v(HVX_VectorPair Vuu, HVX_VectorPair Vvv, Word32 Iu2)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Ww_v6mpy_WubWbI_v(Vuu,Vvv,Iu2) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_v6mpyvubs10)(Vuu,Vvv,Iu2)
+#endif /* __HEXAGON_ARCH___ >= 68 */
+
+#if __HVX_ARCH__ >= 68
+/* ==========================================================================
+   Assembly Syntax:       Vxx32.w+=v6mpy(Vuu32.ub,Vvv32.b,#u2):v
+   C Intrinsic Prototype: HVX_VectorPair Q6_Ww_v6mpyacc_WwWubWbI_v(HVX_VectorPair Vxx, HVX_VectorPair Vuu, HVX_VectorPair Vvv, Word32 Iu2)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Ww_v6mpyacc_WwWubWbI_v(Vxx,Vuu,Vvv,Iu2) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_v6mpyvubs10_vxx)(Vxx,Vuu,Vvv,Iu2)
+#endif /* __HEXAGON_ARCH___ >= 68 */
+
+#if __HVX_ARCH__ >= 68
+/* ==========================================================================
+   Assembly Syntax:       Vd32.hf=vabs(Vu32.hf)
+   C Intrinsic Prototype: HVX_Vector Q6_Vhf_vabs_Vhf(HVX_Vector Vu)
+   Instruction Type:      CVI_VX_LATE
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Vhf_vabs_Vhf(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabs_hf)(Vu)
+#endif /* __HEXAGON_ARCH___ >= 68 */
+
+#if __HVX_ARCH__ >= 68
+/* ==========================================================================
+   Assembly Syntax:       Vd32.sf=vabs(Vu32.sf)
+   C Intrinsic Prototype: HVX_Vector Q6_Vsf_vabs_Vsf(HVX_Vector Vu)
+   Instruction Type:      CVI_VX_LATE
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Vsf_vabs_Vsf(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabs_sf)(Vu)
+#endif /* __HEXAGON_ARCH___ >= 68 */
+
+#if __HVX_ARCH__ >= 68
+/* ==========================================================================
+   Assembly Syntax:       Vd32.qf16=vadd(Vu32.hf,Vv32.hf)
+   C Intrinsic Prototype: HVX_Vector Q6_Vqf16_vadd_VhfVhf(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vqf16_vadd_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadd_hf)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 68 */
+
+#if __HVX_ARCH__ >= 68
+/* ==========================================================================
+   Assembly Syntax:       Vd32.hf=vadd(Vu32.hf,Vv32.hf)
+   C Intrinsic Prototype: HVX_Vector Q6_Vhf_vadd_VhfVhf(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VX
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Vhf_vadd_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadd_hf_hf)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 68 */
+
+#if __HVX_ARCH__ >= 68
+/* ==========================================================================
+   Assembly Syntax:       Vd32.qf16=vadd(Vu32.qf16,Vv32.qf16)
+   C Intrinsic Prototype: HVX_Vector Q6_Vqf16_vadd_Vqf16Vqf16(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vqf16_vadd_Vqf16Vqf16(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadd_qf16)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 68 */
+
+#if __HVX_ARCH__ >= 68
+/* ==========================================================================
+   Assembly Syntax:       Vd32.qf16=vadd(Vu32.qf16,Vv32.hf)
+   C Intrinsic Prototype: HVX_Vector Q6_Vqf16_vadd_Vqf16Vhf(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vqf16_vadd_Vqf16Vhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadd_qf16_mix)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 68 */
+
+#if __HVX_ARCH__ >= 68
+/* ==========================================================================
+   Assembly Syntax:       Vd32.qf32=vadd(Vu32.qf32,Vv32.qf32)
+   C Intrinsic Prototype: HVX_Vector Q6_Vqf32_vadd_Vqf32Vqf32(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vqf32_vadd_Vqf32Vqf32(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadd_qf32)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 68 */
+
+#if __HVX_ARCH__ >= 68
+/* ==========================================================================
+   Assembly Syntax:       Vd32.qf32=vadd(Vu32.qf32,Vv32.sf)
+   C Intrinsic Prototype: HVX_Vector Q6_Vqf32_vadd_Vqf32Vsf(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vqf32_vadd_Vqf32Vsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadd_qf32_mix)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 68 */
+
+#if __HVX_ARCH__ >= 68
+/* ==========================================================================
+   Assembly Syntax:       Vd32.qf32=vadd(Vu32.sf,Vv32.sf)
+   C Intrinsic Prototype: HVX_Vector Q6_Vqf32_vadd_VsfVsf(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vqf32_vadd_VsfVsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadd_sf)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 68 */
+
+#if __HVX_ARCH__ >= 68
+/* ==========================================================================
+   Assembly Syntax:       Vdd32.sf=vadd(Vu32.hf,Vv32.hf)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Wsf_vadd_VhfVhf(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Wsf_vadd_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadd_sf_hf)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 68 */
+
+#if __HVX_ARCH__ >= 68
+/* ==========================================================================
+   Assembly Syntax:       Vd32.sf=vadd(Vu32.sf,Vv32.sf)
+   C Intrinsic Prototype: HVX_Vector Q6_Vsf_vadd_VsfVsf(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VX
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Vsf_vadd_VsfVsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadd_sf_sf)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 68 */
+
+#if __HVX_ARCH__ >= 68
+/* ==========================================================================
+   Assembly Syntax:       Vd32.w=vfmv(Vu32.w)
+   C Intrinsic Prototype: HVX_Vector Q6_Vw_vfmv_Vw(HVX_Vector Vu)
+   Instruction Type:      CVI_VX_LATE
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Vw_vfmv_Vw(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vassign_fp)(Vu)
+#endif /* __HEXAGON_ARCH___ >= 68 */
+
+#if __HVX_ARCH__ >= 68
+/* ==========================================================================
+   Assembly Syntax:       Vd32.hf=Vu32.qf16
+   C Intrinsic Prototype: HVX_Vector Q6_Vhf_equals_Vqf16(HVX_Vector Vu)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vhf_equals_Vqf16(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vconv_hf_qf16)(Vu)
+#endif /* __HEXAGON_ARCH___ >= 68 */
+
+#if __HVX_ARCH__ >= 68
+/* ==========================================================================
+   Assembly Syntax:       Vd32.hf=Vuu32.qf32
+   C Intrinsic Prototype: HVX_Vector Q6_Vhf_equals_Wqf32(HVX_VectorPair Vuu)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vhf_equals_Wqf32(Vuu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vconv_hf_qf32)(Vuu)
+#endif /* __HEXAGON_ARCH___ >= 68 */
+
+#if __HVX_ARCH__ >= 68
+/* ==========================================================================
+   Assembly Syntax:       Vd32.sf=Vu32.qf32
+   C Intrinsic Prototype: HVX_Vector Q6_Vsf_equals_Vqf32(HVX_Vector Vu)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vsf_equals_Vqf32(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vconv_sf_qf32)(Vu)
+#endif /* __HEXAGON_ARCH___ >= 68 */
+
+#if __HVX_ARCH__ >= 68
+/* ==========================================================================
+   Assembly Syntax:       Vd32.b=vcvt(Vu32.hf,Vv32.hf)
+   C Intrinsic Prototype: HVX_Vector Q6_Vb_vcvt_VhfVhf(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VX
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Vb_vcvt_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt_b_hf)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 68 */
+
+#if __HVX_ARCH__ >= 68
+/* ==========================================================================
+   Assembly Syntax:       Vd32.h=vcvt(Vu32.hf)
+   C Intrinsic Prototype: HVX_Vector Q6_Vh_vcvt_Vhf(HVX_Vector Vu)
+   Instruction Type:      CVI_VX
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Vh_vcvt_Vhf(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt_h_hf)(Vu)
+#endif /* __HEXAGON_ARCH___ >= 68 */
+
+#if __HVX_ARCH__ >= 68
+/* ==========================================================================
+   Assembly Syntax:       Vdd32.hf=vcvt(Vu32.b)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Whf_vcvt_Vb(HVX_Vector Vu)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Whf_vcvt_Vb(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt_hf_b)(Vu)
+#endif /* __HEXAGON_ARCH___ >= 68 */
+
+#if __HVX_ARCH__ >= 68
+/* ==========================================================================
+   Assembly Syntax:       Vd32.hf=vcvt(Vu32.h)
+   C Intrinsic Prototype: HVX_Vector Q6_Vhf_vcvt_Vh(HVX_Vector Vu)
+   Instruction Type:      CVI_VX
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Vhf_vcvt_Vh(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt_hf_h)(Vu)
+#endif /* __HEXAGON_ARCH___ >= 68 */
+
+#if __HVX_ARCH__ >= 68
+/* ==========================================================================
+   Assembly Syntax:       Vd32.hf=vcvt(Vu32.sf,Vv32.sf)
+   C Intrinsic Prototype: HVX_Vector Q6_Vhf_vcvt_VsfVsf(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VX
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Vhf_vcvt_VsfVsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt_hf_sf)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 68 */
+
+#if __HVX_ARCH__ >= 68
+/* ==========================================================================
+   Assembly Syntax:       Vdd32.hf=vcvt(Vu32.ub)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Whf_vcvt_Vub(HVX_Vector Vu)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Whf_vcvt_Vub(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt_hf_ub)(Vu)
+#endif /* __HEXAGON_ARCH___ >= 68 */
+
+#if __HVX_ARCH__ >= 68
+/* ==========================================================================
+   Assembly Syntax:       Vd32.hf=vcvt(Vu32.uh)
+   C Intrinsic Prototype: HVX_Vector Q6_Vhf_vcvt_Vuh(HVX_Vector Vu)
+   Instruction Type:      CVI_VX
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Vhf_vcvt_Vuh(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt_hf_uh)(Vu)
+#endif /* __HEXAGON_ARCH___ >= 68 */
+
+#if __HVX_ARCH__ >= 68
+/* ==========================================================================
+   Assembly Syntax:       Vdd32.sf=vcvt(Vu32.hf)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Wsf_vcvt_Vhf(HVX_Vector Vu)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Wsf_vcvt_Vhf(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt_sf_hf)(Vu)
+#endif /* __HEXAGON_ARCH___ >= 68 */
+
+#if __HVX_ARCH__ >= 68
+/* ==========================================================================
+   Assembly Syntax:       Vd32.ub=vcvt(Vu32.hf,Vv32.hf)
+   C Intrinsic Prototype: HVX_Vector Q6_Vub_vcvt_VhfVhf(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VX
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Vub_vcvt_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt_ub_hf)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 68 */
+
+#if __HVX_ARCH__ >= 68
+/* ==========================================================================
+   Assembly Syntax:       Vd32.uh=vcvt(Vu32.hf)
+   C Intrinsic Prototype: HVX_Vector Q6_Vuh_vcvt_Vhf(HVX_Vector Vu)
+   Instruction Type:      CVI_VX
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Vuh_vcvt_Vhf(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt_uh_hf)(Vu)
+#endif /* __HEXAGON_ARCH___ >= 68 */
+
+#if __HVX_ARCH__ >= 68
+/* ==========================================================================
+   Assembly Syntax:       Vd32.sf=vdmpy(Vu32.hf,Vv32.hf)
+   C Intrinsic Prototype: HVX_Vector Q6_Vsf_vdmpy_VhfVhf(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VX
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Vsf_vdmpy_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpy_sf_hf)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 68 */
+
+#if __HVX_ARCH__ >= 68
+/* ==========================================================================
+   Assembly Syntax:       Vx32.sf+=vdmpy(Vu32.hf,Vv32.hf)
+   C Intrinsic Prototype: HVX_Vector Q6_Vsf_vdmpyacc_VsfVhfVhf(HVX_Vector Vx, HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VX
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Vsf_vdmpyacc_VsfVhfVhf(Vx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpy_sf_hf_acc)(Vx,Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 68 */
+
+#if __HVX_ARCH__ >= 68
+/* ==========================================================================
+   Assembly Syntax:       Vd32.hf=vfmax(Vu32.hf,Vv32.hf)
+   C Intrinsic Prototype: HVX_Vector Q6_Vhf_vfmax_VhfVhf(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VX_LATE
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Vhf_vfmax_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vfmax_hf)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 68 */
+
+#if __HVX_ARCH__ >= 68
+/* ==========================================================================
+   Assembly Syntax:       Vd32.sf=vfmax(Vu32.sf,Vv32.sf)
+   C Intrinsic Prototype: HVX_Vector Q6_Vsf_vfmax_VsfVsf(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VX_LATE
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Vsf_vfmax_VsfVsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vfmax_sf)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 68 */
+
+#if __HVX_ARCH__ >= 68
+/* ==========================================================================
+   Assembly Syntax:       Vd32.hf=vfmin(Vu32.hf,Vv32.hf)
+   C Intrinsic Prototype: HVX_Vector Q6_Vhf_vfmin_VhfVhf(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VX_LATE
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Vhf_vfmin_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vfmin_hf)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 68 */
+
+#if __HVX_ARCH__ >= 68
+/* ==========================================================================
+   Assembly Syntax:       Vd32.sf=vfmin(Vu32.sf,Vv32.sf)
+   C Intrinsic Prototype: HVX_Vector Q6_Vsf_vfmin_VsfVsf(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VX_LATE
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Vsf_vfmin_VsfVsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vfmin_sf)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 68 */
+
+#if __HVX_ARCH__ >= 68
+/* ==========================================================================
+   Assembly Syntax:       Vd32.hf=vfneg(Vu32.hf)
+   C Intrinsic Prototype: HVX_Vector Q6_Vhf_vfneg_Vhf(HVX_Vector Vu)
+   Instruction Type:      CVI_VX_LATE
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Vhf_vfneg_Vhf(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vfneg_hf)(Vu)
+#endif /* __HEXAGON_ARCH___ >= 68 */
+
+#if __HVX_ARCH__ >= 68
+/* ==========================================================================
+   Assembly Syntax:       Vd32.sf=vfneg(Vu32.sf)
+   C Intrinsic Prototype: HVX_Vector Q6_Vsf_vfneg_Vsf(HVX_Vector Vu)
+   Instruction Type:      CVI_VX_LATE
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Vsf_vfneg_Vsf(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vfneg_sf)(Vu)
+#endif /* __HEXAGON_ARCH___ >= 68 */
+
+#if __HVX_ARCH__ >= 68
+/* ==========================================================================
+   Assembly Syntax:       Qd4=vcmp.gt(Vu32.hf,Vv32.hf)
+   C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gt_VhfVhf(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Q_vcmp_gt_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgthf)(Vu,Vv)),-1)
+#endif /* __HEXAGON_ARCH___ >= 68 */
+
+#if __HVX_ARCH__ >= 68
+/* ==========================================================================
+   Assembly Syntax:       Qx4&=vcmp.gt(Vu32.hf,Vv32.hf)
+   C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtand_QVhfVhf(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Q_vcmp_gtand_QVhfVhf(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgthf_and)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1)
+#endif /* __HEXAGON_ARCH___ >= 68 */
+
+#if __HVX_ARCH__ >= 68
+/* ==========================================================================
+   Assembly Syntax:       Qx4|=vcmp.gt(Vu32.hf,Vv32.hf)
+   C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtor_QVhfVhf(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Q_vcmp_gtor_QVhfVhf(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgthf_or)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1)
+#endif /* __HEXAGON_ARCH___ >= 68 */
+
+#if __HVX_ARCH__ >= 68
+/* ==========================================================================
+   Assembly Syntax:       Qx4^=vcmp.gt(Vu32.hf,Vv32.hf)
+   C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtxacc_QVhfVhf(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Q_vcmp_gtxacc_QVhfVhf(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgthf_xor)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1)
+#endif /* __HEXAGON_ARCH___ >= 68 */
+
+#if __HVX_ARCH__ >= 68
+/* ==========================================================================
+   Assembly Syntax:       Qd4=vcmp.gt(Vu32.sf,Vv32.sf)
+   C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gt_VsfVsf(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Q_vcmp_gt_VsfVsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtsf)(Vu,Vv)),-1)
+#endif /* __HEXAGON_ARCH___ >= 68 */
+
+#if __HVX_ARCH__ >= 68
+/* ==========================================================================
+   Assembly Syntax:       Qx4&=vcmp.gt(Vu32.sf,Vv32.sf)
+   C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtand_QVsfVsf(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Q_vcmp_gtand_QVsfVsf(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtsf_and)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1)
+#endif /* __HEXAGON_ARCH___ >= 68 */
+
+#if __HVX_ARCH__ >= 68
+/* ==========================================================================
+   Assembly Syntax:       Qx4|=vcmp.gt(Vu32.sf,Vv32.sf)
+   C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtor_QVsfVsf(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Q_vcmp_gtor_QVsfVsf(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtsf_or)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1)
+#endif /* __HEXAGON_ARCH___ >= 68 */
+
+#if __HVX_ARCH__ >= 68
+/* ==========================================================================
+   Assembly Syntax:       Qx4^=vcmp.gt(Vu32.sf,Vv32.sf)
+   C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtxacc_QVsfVsf(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Q_vcmp_gtxacc_QVsfVsf(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtsf_xor)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1)
+#endif /* __HEXAGON_ARCH___ >= 68 */
+
+#if __HVX_ARCH__ >= 68
+/* ==========================================================================
+   Assembly Syntax:       Vd32.hf=vmax(Vu32.hf,Vv32.hf)
+   C Intrinsic Prototype: HVX_Vector Q6_Vhf_vmax_VhfVhf(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vhf_vmax_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmax_hf)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 68 */
+
+#if __HVX_ARCH__ >= 68
+/* ==========================================================================
+   Assembly Syntax:       Vd32.sf=vmax(Vu32.sf,Vv32.sf)
+   C Intrinsic Prototype: HVX_Vector Q6_Vsf_vmax_VsfVsf(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vsf_vmax_VsfVsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmax_sf)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 68 */
+
+#if __HVX_ARCH__ >= 68
+/* ==========================================================================
+   Assembly Syntax:       Vd32.hf=vmin(Vu32.hf,Vv32.hf)
+   C Intrinsic Prototype: HVX_Vector Q6_Vhf_vmin_VhfVhf(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vhf_vmin_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmin_hf)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 68 */
+
+#if __HVX_ARCH__ >= 68
+/* ==========================================================================
+   Assembly Syntax:       Vd32.sf=vmin(Vu32.sf,Vv32.sf)
+   C Intrinsic Prototype: HVX_Vector Q6_Vsf_vmin_VsfVsf(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VA
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vsf_vmin_VsfVsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmin_sf)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 68 */
+
+#if __HVX_ARCH__ >= 68
+/* ==========================================================================
+   Assembly Syntax:       Vd32.hf=vmpy(Vu32.hf,Vv32.hf)
+   C Intrinsic Prototype: HVX_Vector Q6_Vhf_vmpy_VhfVhf(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VX
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Vhf_vmpy_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_hf_hf)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 68 */
+
+#if __HVX_ARCH__ >= 68
+/* ==========================================================================
+   Assembly Syntax:       Vx32.hf+=vmpy(Vu32.hf,Vv32.hf)
+   C Intrinsic Prototype: HVX_Vector Q6_Vhf_vmpyacc_VhfVhfVhf(HVX_Vector Vx, HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VX
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Vhf_vmpyacc_VhfVhfVhf(Vx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_hf_hf_acc)(Vx,Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 68 */
+
+#if __HVX_ARCH__ >= 68
+/* ==========================================================================
+   Assembly Syntax:       Vd32.qf16=vmpy(Vu32.qf16,Vv32.qf16)
+   C Intrinsic Prototype: HVX_Vector Q6_Vqf16_vmpy_Vqf16Vqf16(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Vqf16_vmpy_Vqf16Vqf16(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_qf16)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 68 */
+
+#if __HVX_ARCH__ >= 68
+/* ==========================================================================
+   Assembly Syntax:       Vd32.qf16=vmpy(Vu32.hf,Vv32.hf)
+   C Intrinsic Prototype: HVX_Vector Q6_Vqf16_vmpy_VhfVhf(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Vqf16_vmpy_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_qf16_hf)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 68 */
+
+#if __HVX_ARCH__ >= 68
+/* ==========================================================================
+   Assembly Syntax:       Vd32.qf16=vmpy(Vu32.qf16,Vv32.hf)
+   C Intrinsic Prototype: HVX_Vector Q6_Vqf16_vmpy_Vqf16Vhf(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Vqf16_vmpy_Vqf16Vhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_qf16_mix_hf)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 68 */
+
+#if __HVX_ARCH__ >= 68
+/* ==========================================================================
+   Assembly Syntax:       Vd32.qf32=vmpy(Vu32.qf32,Vv32.qf32)
+   C Intrinsic Prototype: HVX_Vector Q6_Vqf32_vmpy_Vqf32Vqf32(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Vqf32_vmpy_Vqf32Vqf32(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_qf32)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 68 */
+
+#if __HVX_ARCH__ >= 68
+/* ==========================================================================
+   Assembly Syntax:       Vdd32.qf32=vmpy(Vu32.hf,Vv32.hf)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Wqf32_vmpy_VhfVhf(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Wqf32_vmpy_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_qf32_hf)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 68 */
+
+#if __HVX_ARCH__ >= 68
+/* ==========================================================================
+   Assembly Syntax:       Vdd32.qf32=vmpy(Vu32.qf16,Vv32.hf)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Wqf32_vmpy_Vqf16Vhf(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Wqf32_vmpy_Vqf16Vhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_qf32_mix_hf)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 68 */
+
+#if __HVX_ARCH__ >= 68
+/* ==========================================================================
+   Assembly Syntax:       Vdd32.qf32=vmpy(Vu32.qf16,Vv32.qf16)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Wqf32_vmpy_Vqf16Vqf16(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Wqf32_vmpy_Vqf16Vqf16(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_qf32_qf16)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 68 */
+
+#if __HVX_ARCH__ >= 68
+/* ==========================================================================
+   Assembly Syntax:       Vd32.qf32=vmpy(Vu32.sf,Vv32.sf)
+   C Intrinsic Prototype: HVX_Vector Q6_Vqf32_vmpy_VsfVsf(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Vqf32_vmpy_VsfVsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_qf32_sf)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 68 */
+
+#if __HVX_ARCH__ >= 68
+/* ==========================================================================
+   Assembly Syntax:       Vdd32.sf=vmpy(Vu32.hf,Vv32.hf)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Wsf_vmpy_VhfVhf(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Wsf_vmpy_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_sf_hf)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 68 */
+
+#if __HVX_ARCH__ >= 68
+/* ==========================================================================
+   Assembly Syntax:       Vxx32.sf+=vmpy(Vu32.hf,Vv32.hf)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Wsf_vmpyacc_WsfVhfVhf(HVX_VectorPair Vxx, HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Wsf_vmpyacc_WsfVhfVhf(Vxx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_sf_hf_acc)(Vxx,Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 68 */
+
+#if __HVX_ARCH__ >= 68
+/* ==========================================================================
+   Assembly Syntax:       Vd32.sf=vmpy(Vu32.sf,Vv32.sf)
+   C Intrinsic Prototype: HVX_Vector Q6_Vsf_vmpy_VsfVsf(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Vsf_vmpy_VsfVsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_sf_sf)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 68 */
+
+#if __HVX_ARCH__ >= 68
+/* ==========================================================================
+   Assembly Syntax:       Vd32.qf16=vsub(Vu32.hf,Vv32.hf)
+   C Intrinsic Prototype: HVX_Vector Q6_Vqf16_vsub_VhfVhf(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vqf16_vsub_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsub_hf)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 68 */
+
+#if __HVX_ARCH__ >= 68
+/* ==========================================================================
+   Assembly Syntax:       Vd32.hf=vsub(Vu32.hf,Vv32.hf)
+   C Intrinsic Prototype: HVX_Vector Q6_Vhf_vsub_VhfVhf(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VX
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Vhf_vsub_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsub_hf_hf)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 68 */
+
+#if __HVX_ARCH__ >= 68
+/* ==========================================================================
+   Assembly Syntax:       Vd32.qf16=vsub(Vu32.qf16,Vv32.qf16)
+   C Intrinsic Prototype: HVX_Vector Q6_Vqf16_vsub_Vqf16Vqf16(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vqf16_vsub_Vqf16Vqf16(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsub_qf16)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 68 */
+
+#if __HVX_ARCH__ >= 68
+/* ==========================================================================
+   Assembly Syntax:       Vd32.qf16=vsub(Vu32.qf16,Vv32.hf)
+   C Intrinsic Prototype: HVX_Vector Q6_Vqf16_vsub_Vqf16Vhf(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vqf16_vsub_Vqf16Vhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsub_qf16_mix)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 68 */
+
+#if __HVX_ARCH__ >= 68
+/* ==========================================================================
+   Assembly Syntax:       Vd32.qf32=vsub(Vu32.qf32,Vv32.qf32)
+   C Intrinsic Prototype: HVX_Vector Q6_Vqf32_vsub_Vqf32Vqf32(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vqf32_vsub_Vqf32Vqf32(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsub_qf32)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 68 */
+
+#if __HVX_ARCH__ >= 68
+/* ==========================================================================
+   Assembly Syntax:       Vd32.qf32=vsub(Vu32.qf32,Vv32.sf)
+   C Intrinsic Prototype: HVX_Vector Q6_Vqf32_vsub_Vqf32Vsf(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vqf32_vsub_Vqf32Vsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsub_qf32_mix)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 68 */
+
+#if __HVX_ARCH__ >= 68
+/* ==========================================================================
+   Assembly Syntax:       Vd32.qf32=vsub(Vu32.sf,Vv32.sf)
+   C Intrinsic Prototype: HVX_Vector Q6_Vqf32_vsub_VsfVsf(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vqf32_vsub_VsfVsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsub_sf)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 68 */
+
+#if __HVX_ARCH__ >= 68
+/* ==========================================================================
+   Assembly Syntax:       Vdd32.sf=vsub(Vu32.hf,Vv32.hf)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Wsf_vsub_VhfVhf(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Wsf_vsub_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsub_sf_hf)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 68 */
+
+#if __HVX_ARCH__ >= 68
+/* ==========================================================================
+   Assembly Syntax:       Vd32.sf=vsub(Vu32.sf,Vv32.sf)
+   C Intrinsic Prototype: HVX_Vector Q6_Vsf_vsub_VsfVsf(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VX
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Vsf_vsub_VsfVsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsub_sf_sf)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 68 */
+
+#if __HVX_ARCH__ >= 69
+/* ==========================================================================
+   Assembly Syntax:       Vd32.ub=vasr(Vuu32.uh,Vv32.ub):rnd:sat
+   C Intrinsic Prototype: HVX_Vector Q6_Vub_vasr_WuhVub_rnd_sat(HVX_VectorPair Vuu, HVX_Vector Vv)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vub_vasr_WuhVub_rnd_sat(Vuu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrvuhubrndsat)(Vuu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 69 */
+
+#if __HVX_ARCH__ >= 69
+/* ==========================================================================
+   Assembly Syntax:       Vd32.ub=vasr(Vuu32.uh,Vv32.ub):sat
+   C Intrinsic Prototype: HVX_Vector Q6_Vub_vasr_WuhVub_sat(HVX_VectorPair Vuu, HVX_Vector Vv)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vub_vasr_WuhVub_sat(Vuu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrvuhubsat)(Vuu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 69 */
+
+#if __HVX_ARCH__ >= 69
+/* ==========================================================================
+   Assembly Syntax:       Vd32.uh=vasr(Vuu32.w,Vv32.uh):rnd:sat
+   C Intrinsic Prototype: HVX_Vector Q6_Vuh_vasr_WwVuh_rnd_sat(HVX_VectorPair Vuu, HVX_Vector Vv)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vuh_vasr_WwVuh_rnd_sat(Vuu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrvwuhrndsat)(Vuu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 69 */
+
+#if __HVX_ARCH__ >= 69
+/* ==========================================================================
+   Assembly Syntax:       Vd32.uh=vasr(Vuu32.w,Vv32.uh):sat
+   C Intrinsic Prototype: HVX_Vector Q6_Vuh_vasr_WwVuh_sat(HVX_VectorPair Vuu, HVX_Vector Vv)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vuh_vasr_WwVuh_sat(Vuu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrvwuhsat)(Vuu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 69 */
+
+#if __HVX_ARCH__ >= 69
+/* ==========================================================================
+   Assembly Syntax:       Vd32.uh=vmpy(Vu32.uh,Vv32.uh):>>16
+   C Intrinsic Prototype: HVX_Vector Q6_Vuh_vmpy_VuhVuh_rs16(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VX
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Vuh_vmpy_VuhVuh_rs16(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyuhvs)(Vu,Vv)
+#endif /* __HEXAGON_ARCH___ >= 69 */
+
+#if __HVX_ARCH__ >= 73
+/* ==========================================================================
+   Assembly Syntax:       Vdd32.sf=vadd(Vu32.bf,Vv32.bf)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Wsf_vadd_VbfVbf(HVX_Vector Vu,
+   HVX_Vector Vv) Instruction Type:      CVI_VX_DV Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Wsf_vadd_VbfVbf(Vu, Vv)                                             \
+  __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadd_sf_bf)(Vu, Vv)
+#endif /* __HEXAGON_ARCH___ >= 73 */
+
+#if __HVX_ARCH__ >= 73
+/* ==========================================================================
+   Assembly Syntax:       Vd32.h=Vu32.hf
+   C Intrinsic Prototype: HVX_Vector Q6_Vh_equals_Vhf(HVX_Vector Vu)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vh_equals_Vhf(Vu)                                                   \
+  __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vconv_h_hf)(Vu)
+#endif /* __HEXAGON_ARCH___ >= 73 */
+
+#if __HVX_ARCH__ >= 73
+/* ==========================================================================
+   Assembly Syntax:       Vd32.hf=Vu32.h
+   C Intrinsic Prototype: HVX_Vector Q6_Vhf_equals_Vh(HVX_Vector Vu)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vhf_equals_Vh(Vu)                                                   \
+  __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vconv_hf_h)(Vu)
+#endif /* __HEXAGON_ARCH___ >= 73 */
+
+#if __HVX_ARCH__ >= 73
+/* ==========================================================================
+   Assembly Syntax:       Vd32.sf=Vu32.w
+   C Intrinsic Prototype: HVX_Vector Q6_Vsf_equals_Vw(HVX_Vector Vu)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vsf_equals_Vw(Vu)                                                   \
+  __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vconv_sf_w)(Vu)
+#endif /* __HEXAGON_ARCH___ >= 73 */
+
+#if __HVX_ARCH__ >= 73
+/* ==========================================================================
+   Assembly Syntax:       Vd32.w=Vu32.sf
+   C Intrinsic Prototype: HVX_Vector Q6_Vw_equals_Vsf(HVX_Vector Vu)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vw_equals_Vsf(Vu)                                                   \
+  __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vconv_w_sf)(Vu)
+#endif /* __HEXAGON_ARCH___ >= 73 */
+
+#if __HVX_ARCH__ >= 73
+/* ==========================================================================
+   Assembly Syntax:       Vd32.bf=vcvt(Vu32.sf,Vv32.sf)
+   C Intrinsic Prototype: HVX_Vector Q6_Vbf_vcvt_VsfVsf(HVX_Vector Vu,
+   HVX_Vector Vv) Instruction Type:      CVI_VX Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Vbf_vcvt_VsfVsf(Vu, Vv)                                             \
+  __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt_bf_sf)(Vu, Vv)
+#endif /* __HEXAGON_ARCH___ >= 73 */
+
+#if __HVX_ARCH__ >= 73
+/* ==========================================================================
+   Assembly Syntax:       Qd4=vcmp.gt(Vu32.bf,Vv32.bf)
+   C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gt_VbfVbf(HVX_Vector Vu,
+   HVX_Vector Vv) Instruction Type:      CVI_VA Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Q_vcmp_gt_VbfVbf(Vu, Vv)                                            \
+  __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)                          \
+  ((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtbf)(Vu, Vv)), -1)
+#endif /* __HEXAGON_ARCH___ >= 73 */
+
+#if __HVX_ARCH__ >= 73
+/* ==========================================================================
+   Assembly Syntax:       Qx4&=vcmp.gt(Vu32.bf,Vv32.bf)
+   C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtand_QVbfVbf(HVX_VectorPred
+   Qx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type:      CVI_VA Execution
+   Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Q_vcmp_gtand_QVbfVbf(Qx, Vu, Vv)                                    \
+  __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)                          \
+  ((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtbf_and)(                     \
+       __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx), -1), Vu,      \
+       Vv)),                                                                   \
+   -1)
+#endif /* __HEXAGON_ARCH___ >= 73 */
+
+#if __HVX_ARCH__ >= 73
+/* ==========================================================================
+   Assembly Syntax:       Qx4|=vcmp.gt(Vu32.bf,Vv32.bf)
+   C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtor_QVbfVbf(HVX_VectorPred
+   Qx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type:      CVI_VA Execution
+   Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Q_vcmp_gtor_QVbfVbf(Qx, Vu, Vv)                                     \
+  __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)                          \
+  ((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtbf_or)(                      \
+       __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx), -1), Vu,      \
+       Vv)),                                                                   \
+   -1)
+#endif /* __HEXAGON_ARCH___ >= 73 */
+
+#if __HVX_ARCH__ >= 73
+/* ==========================================================================
+   Assembly Syntax:       Qx4^=vcmp.gt(Vu32.bf,Vv32.bf)
+   C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtxacc_QVbfVbf(HVX_VectorPred
+   Qx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type:      CVI_VA Execution
+   Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Q_vcmp_gtxacc_QVbfVbf(Qx, Vu, Vv)                                   \
+  __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)                          \
+  ((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtbf_xor)(                     \
+       __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx), -1), Vu,      \
+       Vv)),                                                                   \
+   -1)
+#endif /* __HEXAGON_ARCH___ >= 73 */
+
+#if __HVX_ARCH__ >= 73
+/* ==========================================================================
+   Assembly Syntax:       Vd32.bf=vmax(Vu32.bf,Vv32.bf)
+   C Intrinsic Prototype: HVX_Vector Q6_Vbf_vmax_VbfVbf(HVX_Vector Vu,
+   HVX_Vector Vv) Instruction Type:      CVI_VX_LATE Execution Slots: SLOT23
+   ========================================================================== */
+
+#define Q6_Vbf_vmax_VbfVbf(Vu, Vv)                                             \
+  __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmax_bf)(Vu, Vv)
+#endif /* __HEXAGON_ARCH___ >= 73 */
+
+#if __HVX_ARCH__ >= 73
+/* ==========================================================================
+   Assembly Syntax:       Vd32.bf=vmin(Vu32.bf,Vv32.bf)
+   C Intrinsic Prototype: HVX_Vector Q6_Vbf_vmin_VbfVbf(HVX_Vector Vu,
+   HVX_Vector Vv) Instruction Type:      CVI_VX_LATE Execution Slots: SLOT23
+   ========================================================================== */
+
+#define Q6_Vbf_vmin_VbfVbf(Vu, Vv)                                             \
+  __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmin_bf)(Vu, Vv)
+#endif /* __HEXAGON_ARCH___ >= 73 */
+
+#if __HVX_ARCH__ >= 73
+/* ==========================================================================
+   Assembly Syntax:       Vdd32.sf=vmpy(Vu32.bf,Vv32.bf)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Wsf_vmpy_VbfVbf(HVX_Vector Vu,
+   HVX_Vector Vv) Instruction Type:      CVI_VX_DV Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Wsf_vmpy_VbfVbf(Vu, Vv)                                             \
+  __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_sf_bf)(Vu, Vv)
+#endif /* __HEXAGON_ARCH___ >= 73 */
+
+#if __HVX_ARCH__ >= 73
+/* ==========================================================================
+   Assembly Syntax:       Vxx32.sf+=vmpy(Vu32.bf,Vv32.bf)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Wsf_vmpyacc_WsfVbfVbf(HVX_VectorPair
+   Vxx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type:      CVI_VX_DV Execution
+   Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Wsf_vmpyacc_WsfVbfVbf(Vxx, Vu, Vv)                                  \
+  __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_sf_bf_acc)(Vxx, Vu, Vv)
+#endif /* __HEXAGON_ARCH___ >= 73 */
+
+#if __HVX_ARCH__ >= 73
+/* ==========================================================================
+   Assembly Syntax:       Vdd32.sf=vsub(Vu32.bf,Vv32.bf)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Wsf_vsub_VbfVbf(HVX_Vector Vu,
+   HVX_Vector Vv) Instruction Type:      CVI_VX_DV Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Wsf_vsub_VbfVbf(Vu, Vv)                                             \
+  __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsub_sf_bf)(Vu, Vv)
+#endif /* __HEXAGON_ARCH___ >= 73 */
+
+#if __HVX_ARCH__ >= 79
+/* ==========================================================================
+   Assembly Syntax:       Vd32=vgetqfext(Vu32.x,Rt32)
+   C Intrinsic Prototype: HVX_Vector Q6_V_vgetqfext_VR(HVX_Vector Vu, Word32 Rt)
+   Instruction Type:      CVI_VX
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_V_vgetqfext_VR(Vu, Rt)                                              \
+  __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_get_qfext)(Vu, Rt)
+#endif /* __HEXAGON_ARCH___ >= 79 */
+
+#if __HVX_ARCH__ >= 79
+/* ==========================================================================
+   Assembly Syntax:       Vx32|=vgetqfext(Vu32.x,Rt32)
+   C Intrinsic Prototype: HVX_Vector Q6_V_vgetqfextor_VVR(HVX_Vector Vx,
+   HVX_Vector Vu, Word32 Rt) Instruction Type:      CVI_VX Execution Slots:
+   SLOT23
+   ========================================================================== */
+
+#define Q6_V_vgetqfextor_VVR(Vx, Vu, Rt)                                       \
+  __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_get_qfext_oracc)(Vx, Vu, Rt)
+#endif /* __HEXAGON_ARCH___ >= 79 */
+
+#if __HVX_ARCH__ >= 79
+/* ==========================================================================
+   Assembly Syntax:       Vd32.x=vsetqfext(Vu32,Rt32)
+   C Intrinsic Prototype: HVX_Vector Q6_V_vsetqfext_VR(HVX_Vector Vu, Word32 Rt)
+   Instruction Type:      CVI_VX
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_V_vsetqfext_VR(Vu, Rt)                                              \
+  __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_set_qfext)(Vu, Rt)
+#endif /* __HEXAGON_ARCH___ >= 79 */
+
+#if __HVX_ARCH__ >= 79
+/* ==========================================================================
+   Assembly Syntax:       Vd32.f8=vabs(Vu32.f8)
+   C Intrinsic Prototype: HVX_Vector Q6_V_vabs_V(HVX_Vector Vu)
+   Instruction Type:      CVI_VX_LATE
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_V_vabs_V(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabs_f8)(Vu)
+#endif /* __HEXAGON_ARCH___ >= 79 */
+
+#if __HVX_ARCH__ >= 79
+/* ==========================================================================
+   Assembly Syntax:       Vdd32.hf=vadd(Vu32.f8,Vv32.f8)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Whf_vadd_VV(HVX_Vector Vu,
+   HVX_Vector Vv) Instruction Type:      CVI_VX_DV Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Whf_vadd_VV(Vu, Vv)                                                 \
+  __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadd_hf_f8)(Vu, Vv)
+#endif /* __HEXAGON_ARCH___ >= 79 */
+
+#if __HVX_ARCH__ >= 79
+/* ==========================================================================
+   Assembly Syntax:       Vd32.b=vcvt2(Vu32.hf,Vv32.hf)
+   C Intrinsic Prototype: HVX_Vector Q6_Vb_vcvt2_VhfVhf(HVX_Vector Vu,
+   HVX_Vector Vv) Instruction Type:      CVI_VX Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Vb_vcvt2_VhfVhf(Vu, Vv)                                             \
+  __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt2_b_hf)(Vu, Vv)
+#endif /* __HEXAGON_ARCH___ >= 79 */
+
+#if __HVX_ARCH__ >= 79
+/* ==========================================================================
+   Assembly Syntax:       Vdd32.hf=vcvt2(Vu32.b)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Whf_vcvt2_Vb(HVX_Vector Vu)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Whf_vcvt2_Vb(Vu)                                                    \
+  __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt2_hf_b)(Vu)
+#endif /* __HEXAGON_ARCH___ >= 79 */
+
+#if __HVX_ARCH__ >= 79
+/* ==========================================================================
+   Assembly Syntax:       Vdd32.hf=vcvt2(Vu32.ub)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Whf_vcvt2_Vub(HVX_Vector Vu)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Whf_vcvt2_Vub(Vu)                                                   \
+  __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt2_hf_ub)(Vu)
+#endif /* __HEXAGON_ARCH___ >= 79 */
+
+#if __HVX_ARCH__ >= 79
+/* ==========================================================================
+   Assembly Syntax:       Vd32.ub=vcvt2(Vu32.hf,Vv32.hf)
+   C Intrinsic Prototype: HVX_Vector Q6_Vub_vcvt2_VhfVhf(HVX_Vector Vu,
+   HVX_Vector Vv) Instruction Type:      CVI_VX Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Vub_vcvt2_VhfVhf(Vu, Vv)                                            \
+  __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt2_ub_hf)(Vu, Vv)
+#endif /* __HEXAGON_ARCH___ >= 79 */
+
+#if __HVX_ARCH__ >= 79
+/* ==========================================================================
+   Assembly Syntax:       Vd32.f8=vcvt(Vu32.hf,Vv32.hf)
+   C Intrinsic Prototype: HVX_Vector Q6_V_vcvt_VhfVhf(HVX_Vector Vu, HVX_Vector
+   Vv) Instruction Type:      CVI_VX Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_V_vcvt_VhfVhf(Vu, Vv)                                               \
+  __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt_f8_hf)(Vu, Vv)
+#endif /* __HEXAGON_ARCH___ >= 79 */
+
+#if __HVX_ARCH__ >= 79
+/* ==========================================================================
+   Assembly Syntax:       Vdd32.hf=vcvt(Vu32.f8)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Whf_vcvt_V(HVX_Vector Vu)
+   Instruction Type:      CVI_VX_DV
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Whf_vcvt_V(Vu)                                                      \
+  __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt_hf_f8)(Vu)
+#endif /* __HEXAGON_ARCH___ >= 79 */
+
+#if __HVX_ARCH__ >= 79
+/* ==========================================================================
+   Assembly Syntax:       Vd32.f8=vfmax(Vu32.f8,Vv32.f8)
+   C Intrinsic Prototype: HVX_Vector Q6_V_vfmax_VV(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VX_LATE
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_V_vfmax_VV(Vu, Vv)                                                  \
+  __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vfmax_f8)(Vu, Vv)
+#endif /* __HEXAGON_ARCH___ >= 79 */
+
+#if __HVX_ARCH__ >= 79
+/* ==========================================================================
+   Assembly Syntax:       Vd32.f8=vfmin(Vu32.f8,Vv32.f8)
+   C Intrinsic Prototype: HVX_Vector Q6_V_vfmin_VV(HVX_Vector Vu, HVX_Vector Vv)
+   Instruction Type:      CVI_VX_LATE
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_V_vfmin_VV(Vu, Vv)                                                  \
+  __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vfmin_f8)(Vu, Vv)
+#endif /* __HEXAGON_ARCH___ >= 79 */
+
+#if __HVX_ARCH__ >= 79
+/* ==========================================================================
+   Assembly Syntax:       Vd32.f8=vfneg(Vu32.f8)
+   C Intrinsic Prototype: HVX_Vector Q6_V_vfneg_V(HVX_Vector Vu)
+   Instruction Type:      CVI_VX_LATE
+   Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_V_vfneg_V(Vu)                                                       \
+  __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vfneg_f8)(Vu)
+#endif /* __HEXAGON_ARCH___ >= 79 */
+
+#if __HVX_ARCH__ >= 79
+/* ==========================================================================
+   Assembly Syntax:       Vd32=vmerge(Vu32.x,Vv32.w)
+   C Intrinsic Prototype: HVX_Vector Q6_V_vmerge_VVw(HVX_Vector Vu, HVX_Vector
+   Vv) Instruction Type:      CVI_VS Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_V_vmerge_VVw(Vu, Vv)                                                \
+  __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmerge_qf)(Vu, Vv)
+#endif /* __HEXAGON_ARCH___ >= 79 */
+
+#if __HVX_ARCH__ >= 79
+/* ==========================================================================
+   Assembly Syntax:       Vdd32.hf=vmpy(Vu32.f8,Vv32.f8)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Whf_vmpy_VV(HVX_Vector Vu,
+   HVX_Vector Vv) Instruction Type:      CVI_VX_DV Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Whf_vmpy_VV(Vu, Vv)                                                 \
+  __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_hf_f8)(Vu, Vv)
+#endif /* __HEXAGON_ARCH___ >= 79 */
+
+#if __HVX_ARCH__ >= 79
+/* ==========================================================================
+   Assembly Syntax:       Vxx32.hf+=vmpy(Vu32.f8,Vv32.f8)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Whf_vmpyacc_WhfVV(HVX_VectorPair
+   Vxx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type:      CVI_VX_DV Execution
+   Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Whf_vmpyacc_WhfVV(Vxx, Vu, Vv)                                      \
+  __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_hf_f8_acc)(Vxx, Vu, Vv)
+#endif /* __HEXAGON_ARCH___ >= 79 */
+
+#if __HVX_ARCH__ >= 79
+/* ==========================================================================
+   Assembly Syntax:       Vd32.qf16=vmpy(Vu32.hf,Rt32.hf)
+   C Intrinsic Prototype: HVX_Vector Q6_Vqf16_vmpy_VhfRhf(HVX_Vector Vu, Word32
+   Rt) Instruction Type:      CVI_VX_DV Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Vqf16_vmpy_VhfRhf(Vu, Rt)                                           \
+  __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_rt_hf)(Vu, Rt)
+#endif /* __HEXAGON_ARCH___ >= 79 */
+
+#if __HVX_ARCH__ >= 79
+/* ==========================================================================
+   Assembly Syntax:       Vd32.qf16=vmpy(Vu32.qf16,Rt32.hf)
+   C Intrinsic Prototype: HVX_Vector Q6_Vqf16_vmpy_Vqf16Rhf(HVX_Vector Vu,
+   Word32 Rt) Instruction Type:      CVI_VX_DV Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Vqf16_vmpy_Vqf16Rhf(Vu, Rt)                                         \
+  __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_rt_qf16)(Vu, Rt)
+#endif /* __HEXAGON_ARCH___ >= 79 */
+
+#if __HVX_ARCH__ >= 79
+/* ==========================================================================
+   Assembly Syntax:       Vd32.qf32=vmpy(Vu32.sf,Rt32.sf)
+   C Intrinsic Prototype: HVX_Vector Q6_Vqf32_vmpy_VsfRsf(HVX_Vector Vu, Word32
+   Rt) Instruction Type:      CVI_VX_DV Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Vqf32_vmpy_VsfRsf(Vu, Rt)                                           \
+  __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_rt_sf)(Vu, Rt)
+#endif /* __HEXAGON_ARCH___ >= 79 */
+
+#if __HVX_ARCH__ >= 79
+/* ==========================================================================
+   Assembly Syntax:       Vdd32.hf=vsub(Vu32.f8,Vv32.f8)
+   C Intrinsic Prototype: HVX_VectorPair Q6_Whf_vsub_VV(HVX_Vector Vu,
+   HVX_Vector Vv) Instruction Type:      CVI_VX_DV Execution Slots:       SLOT23
+   ========================================================================== */
+
+#define Q6_Whf_vsub_VV(Vu, Vv)                                                 \
+  __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsub_hf_f8)(Vu, Vv)
+#endif /* __HEXAGON_ARCH___ >= 79 */
+
+#if __HVX_ARCH__ >= 81
+/* ==========================================================================
+   Assembly Syntax:       Vd32.qf16=vabs(Vu32.hf)
+   C Intrinsic Prototype: HVX_Vector Q6_Vqf16_vabs_Vhf(HVX_Vector Vu)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vqf16_vabs_Vhf(Vu)                                                  \
+  __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabs_qf16_hf)(Vu)
+#endif /* __HEXAGON_ARCH___ >= 81 */
+
+#if __HVX_ARCH__ >= 81
+/* ==========================================================================
+   Assembly Syntax:       Vd32.qf16=vabs(Vu32.qf16)
+   C Intrinsic Prototype: HVX_Vector Q6_Vqf16_vabs_Vqf16(HVX_Vector Vu)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vqf16_vabs_Vqf16(Vu)                                                \
+  __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabs_qf16_qf16)(Vu)
+#endif /* __HEXAGON_ARCH___ >= 81 */
+
+#if __HVX_ARCH__ >= 81
+/* ==========================================================================
+   Assembly Syntax:       Vd32.qf32=vabs(Vu32.qf32)
+   C Intrinsic Prototype: HVX_Vector Q6_Vqf32_vabs_Vqf32(HVX_Vector Vu)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vqf32_vabs_Vqf32(Vu)                                                \
+  __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabs_qf32_qf32)(Vu)
+#endif /* __HEXAGON_ARCH___ >= 81 */
+
+#if __HVX_ARCH__ >= 81
+/* ==========================================================================
+   Assembly Syntax:       Vd32.qf32=vabs(Vu32.sf)
+   C Intrinsic Prototype: HVX_Vector Q6_Vqf32_vabs_Vsf(HVX_Vector Vu)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vqf32_vabs_Vsf(Vu)                                                  \
+  __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabs_qf32_sf)(Vu)
+#endif /* __HEXAGON_ARCH___ >= 81 */
+
+#if __HVX_ARCH__ >= 81
+/* ==========================================================================
+   Assembly Syntax:       Vd32=valign4(Vu32,Vv32,Rt8)
+   C Intrinsic Prototype: HVX_Vector Q6_V_valign4_VVR(HVX_Vector Vu, HVX_Vector
+   Vv, Word32 Rt) Instruction Type:      CVI_VA Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_V_valign4_VVR(Vu, Vv, Rt)                                           \
+  __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_valign4)(Vu, Vv, Rt)
+#endif /* __HEXAGON_ARCH___ >= 81 */
+
+#if __HVX_ARCH__ >= 81
+/* ==========================================================================
+   Assembly Syntax:       Vd32.bf=Vuu32.qf32
+   C Intrinsic Prototype: HVX_Vector Q6_Vbf_equals_Wqf32(HVX_VectorPair Vuu)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vbf_equals_Wqf32(Vuu)                                               \
+  __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vconv_bf_qf32)(Vuu)
+#endif /* __HEXAGON_ARCH___ >= 81 */
+
+#if __HVX_ARCH__ >= 81
+/* ==========================================================================
+   Assembly Syntax:       Vd32.f8=Vu32.qf16
+   C Intrinsic Prototype: HVX_Vector Q6_V_equals_Vqf16(HVX_Vector Vu)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_V_equals_Vqf16(Vu)                                                  \
+  __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vconv_f8_qf16)(Vu)
+#endif /* __HEXAGON_ARCH___ >= 81 */
+
+#if __HVX_ARCH__ >= 81
+/* ==========================================================================
+   Assembly Syntax:       Vd32.h=Vu32.hf:rnd
+   C Intrinsic Prototype: HVX_Vector Q6_Vh_equals_Vhf_rnd(HVX_Vector Vu)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vh_equals_Vhf_rnd(Vu)                                               \
+  __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vconv_h_hf_rnd)(Vu)
+#endif /* __HEXAGON_ARCH___ >= 81 */
+
+#if __HVX_ARCH__ >= 81
+/* ==========================================================================
+   Assembly Syntax:       Vdd32.qf16=Vu32.f8
+   C Intrinsic Prototype: HVX_VectorPair Q6_Wqf16_equals_V(HVX_Vector Vu)
+   Instruction Type:      CVI_VP_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Wqf16_equals_V(Vu)                                                  \
+  __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vconv_qf16_f8)(Vu)
+#endif /* __HEXAGON_ARCH___ >= 81 */
+
+#if __HVX_ARCH__ >= 81
+/* ==========================================================================
+   Assembly Syntax:       Vd32.qf16=Vu32.hf
+   C Intrinsic Prototype: HVX_Vector Q6_Vqf16_equals_Vhf(HVX_Vector Vu)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vqf16_equals_Vhf(Vu)                                                \
+  __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vconv_qf16_hf)(Vu)
+#endif /* __HEXAGON_ARCH___ >= 81 */
+
+#if __HVX_ARCH__ >= 81
+/* ==========================================================================
+   Assembly Syntax:       Vd32.qf16=Vu32.qf16
+   C Intrinsic Prototype: HVX_Vector Q6_Vqf16_equals_Vqf16(HVX_Vector Vu)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vqf16_equals_Vqf16(Vu)                                              \
+  __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vconv_qf16_qf16)(Vu)
+#endif /* __HEXAGON_ARCH___ >= 81 */
+
+#if __HVX_ARCH__ >= 81
+/* ==========================================================================
+   Assembly Syntax:       Vd32.qf32=Vu32.qf32
+   C Intrinsic Prototype: HVX_Vector Q6_Vqf32_equals_Vqf32(HVX_Vector Vu)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vqf32_equals_Vqf32(Vu)                                              \
+  __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vconv_qf32_qf32)(Vu)
+#endif /* __HEXAGON_ARCH___ >= 81 */
+
+#if __HVX_ARCH__ >= 81
+/* ==========================================================================
+   Assembly Syntax:       Vd32.qf32=Vu32.sf
+   C Intrinsic Prototype: HVX_Vector Q6_Vqf32_equals_Vsf(HVX_Vector Vu)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vqf32_equals_Vsf(Vu)                                                \
+  __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vconv_qf32_sf)(Vu)
+#endif /* __HEXAGON_ARCH___ >= 81 */
+
+#if __HVX_ARCH__ >= 81
+/* ==========================================================================
+   Assembly Syntax:       Qd4=vcmp.eq(Vu32.hf,Vv32.hf)
+   C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eq_VhfVhf(HVX_Vector Vu,
+   HVX_Vector Vv) Instruction Type:      CVI_VA Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Q_vcmp_eq_VhfVhf(Vu, Vv)                                            \
+  __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)(                         \
+      (__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqhf)(Vu, Vv)), -1)
+#endif /* __HEXAGON_ARCH___ >= 81 */
+
+#if __HVX_ARCH__ >= 81
+/* ==========================================================================
+   Assembly Syntax:       Qx4&=vcmp.eq(Vu32.hf,Vv32.hf)
+   C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eqand_QVhfVhf(HVX_VectorPred
+   Qx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type:      CVI_VA Execution
+   Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Q_vcmp_eqand_QVhfVhf(Qx, Vu, Vv)                                    \
+  __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)(                         \
+      (__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqhf_and)(                  \
+          __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx), -1), Vu,   \
+          Vv)),                                                                \
+      -1)
+#endif /* __HEXAGON_ARCH___ >= 81 */
+
+#if __HVX_ARCH__ >= 81
+/* ==========================================================================
+   Assembly Syntax:       Qx4|=vcmp.eq(Vu32.hf,Vv32.hf)
+   C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eqor_QVhfVhf(HVX_VectorPred
+   Qx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type:      CVI_VA Execution
+   Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Q_vcmp_eqor_QVhfVhf(Qx, Vu, Vv)                                     \
+  __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)(                         \
+      (__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqhf_or)(                   \
+          __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx), -1), Vu,   \
+          Vv)),                                                                \
+      -1)
+#endif /* __HEXAGON_ARCH___ >= 81 */
+
+#if __HVX_ARCH__ >= 81
+/* ==========================================================================
+   Assembly Syntax:       Qx4^=vcmp.eq(Vu32.hf,Vv32.hf)
+   C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eqxacc_QVhfVhf(HVX_VectorPred
+   Qx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type:      CVI_VA Execution
+   Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Q_vcmp_eqxacc_QVhfVhf(Qx, Vu, Vv)                                   \
+  __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)(                         \
+      (__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqhf_xor)(                  \
+          __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx), -1), Vu,   \
+          Vv)),                                                                \
+      -1)
+#endif /* __HEXAGON_ARCH___ >= 81 */
+
+#if __HVX_ARCH__ >= 81
+/* ==========================================================================
+   Assembly Syntax:       Qd4=vcmp.eq(Vu32.sf,Vv32.sf)
+   C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eq_VsfVsf(HVX_Vector Vu,
+   HVX_Vector Vv) Instruction Type:      CVI_VA Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Q_vcmp_eq_VsfVsf(Vu, Vv)                                            \
+  __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)(                         \
+      (__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqsf)(Vu, Vv)), -1)
+#endif /* __HEXAGON_ARCH___ >= 81 */
+
+#if __HVX_ARCH__ >= 81
+/* ==========================================================================
+   Assembly Syntax:       Qx4&=vcmp.eq(Vu32.sf,Vv32.sf)
+   C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eqand_QVsfVsf(HVX_VectorPred
+   Qx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type:      CVI_VA Execution
+   Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Q_vcmp_eqand_QVsfVsf(Qx, Vu, Vv)                                    \
+  __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)(                         \
+      (__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqsf_and)(                  \
+          __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx), -1), Vu,   \
+          Vv)),                                                                \
+      -1)
+#endif /* __HEXAGON_ARCH___ >= 81 */
+
+#if __HVX_ARCH__ >= 81
+/* ==========================================================================
+   Assembly Syntax:       Qx4|=vcmp.eq(Vu32.sf,Vv32.sf)
+   C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eqor_QVsfVsf(HVX_VectorPred
+   Qx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type:      CVI_VA Execution
+   Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Q_vcmp_eqor_QVsfVsf(Qx, Vu, Vv)                                     \
+  __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)(                         \
+      (__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqsf_or)(                   \
+          __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx), -1), Vu,   \
+          Vv)),                                                                \
+      -1)
+#endif /* __HEXAGON_ARCH___ >= 81 */
+
+#if __HVX_ARCH__ >= 81
+/* ==========================================================================
+   Assembly Syntax:       Qx4^=vcmp.eq(Vu32.sf,Vv32.sf)
+   C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_eqxacc_QVsfVsf(HVX_VectorPred
+   Qx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type:      CVI_VA Execution
+   Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Q_vcmp_eqxacc_QVsfVsf(Qx, Vu, Vv)                                   \
+  __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)(                         \
+      (__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqsf_xor)(                  \
+          __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx), -1), Vu,   \
+          Vv)),                                                                \
+      -1)
+#endif /* __HEXAGON_ARCH___ >= 81 */
+
+#if __HVX_ARCH__ >= 81
+/* ==========================================================================
+   Assembly Syntax:       Vd32.w=vilog2(Vu32.hf)
+   C Intrinsic Prototype: HVX_Vector Q6_Vw_vilog2_Vhf(HVX_Vector Vu)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vw_vilog2_Vhf(Vu)                                                   \
+  __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vilog2_hf)(Vu)
+#endif /* __HEXAGON_ARCH___ >= 81 */
+
+#if __HVX_ARCH__ >= 81
+/* ==========================================================================
+   Assembly Syntax:       Vd32.w=vilog2(Vu32.qf16)
+   C Intrinsic Prototype: HVX_Vector Q6_Vw_vilog2_Vqf16(HVX_Vector Vu)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vw_vilog2_Vqf16(Vu)                                                 \
+  __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vilog2_qf16)(Vu)
+#endif /* __HEXAGON_ARCH___ >= 81 */
+
+#if __HVX_ARCH__ >= 81
+/* ==========================================================================
+   Assembly Syntax:       Vd32.w=vilog2(Vu32.qf32)
+   C Intrinsic Prototype: HVX_Vector Q6_Vw_vilog2_Vqf32(HVX_Vector Vu)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vw_vilog2_Vqf32(Vu)                                                 \
+  __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vilog2_qf32)(Vu)
+#endif /* __HEXAGON_ARCH___ >= 81 */
+
+#if __HVX_ARCH__ >= 81
+/* ==========================================================================
+   Assembly Syntax:       Vd32.w=vilog2(Vu32.sf)
+   C Intrinsic Prototype: HVX_Vector Q6_Vw_vilog2_Vsf(HVX_Vector Vu)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vw_vilog2_Vsf(Vu)                                                   \
+  __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vilog2_sf)(Vu)
+#endif /* __HEXAGON_ARCH___ >= 81 */
+
+#if __HVX_ARCH__ >= 81
+/* ==========================================================================
+   Assembly Syntax:       Vd32.qf16=vneg(Vu32.hf)
+   C Intrinsic Prototype: HVX_Vector Q6_Vqf16_vneg_Vhf(HVX_Vector Vu)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vqf16_vneg_Vhf(Vu)                                                  \
+  __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vneg_qf16_hf)(Vu)
+#endif /* __HEXAGON_ARCH___ >= 81 */
+
+#if __HVX_ARCH__ >= 81
+/* ==========================================================================
+   Assembly Syntax:       Vd32.qf16=vneg(Vu32.qf16)
+   C Intrinsic Prototype: HVX_Vector Q6_Vqf16_vneg_Vqf16(HVX_Vector Vu)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vqf16_vneg_Vqf16(Vu)                                                \
+  __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vneg_qf16_qf16)(Vu)
+#endif /* __HEXAGON_ARCH___ >= 81 */
+
+#if __HVX_ARCH__ >= 81
+/* ==========================================================================
+   Assembly Syntax:       Vd32.qf32=vneg(Vu32.qf32)
+   C Intrinsic Prototype: HVX_Vector Q6_Vqf32_vneg_Vqf32(HVX_Vector Vu)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vqf32_vneg_Vqf32(Vu)                                                \
+  __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vneg_qf32_qf32)(Vu)
+#endif /* __HEXAGON_ARCH___ >= 81 */
+
+#if __HVX_ARCH__ >= 81
+/* ==========================================================================
+   Assembly Syntax:       Vd32.qf32=vneg(Vu32.sf)
+   C Intrinsic Prototype: HVX_Vector Q6_Vqf32_vneg_Vsf(HVX_Vector Vu)
+   Instruction Type:      CVI_VS
+   Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vqf32_vneg_Vsf(Vu)                                                  \
+  __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vneg_qf32_sf)(Vu)
+#endif /* __HEXAGON_ARCH___ >= 81 */
+
+#if __HVX_ARCH__ >= 81
+/* ==========================================================================
+   Assembly Syntax:       Vd32.qf16=vsub(Vu32.hf,Vv32.qf16)
+   C Intrinsic Prototype: HVX_Vector Q6_Vqf16_vsub_VhfVqf16(HVX_Vector Vu,
+   HVX_Vector Vv) Instruction Type:      CVI_VS Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vqf16_vsub_VhfVqf16(Vu, Vv)                                         \
+  __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsub_hf_mix)(Vu, Vv)
+#endif /* __HEXAGON_ARCH___ >= 81 */
+
+#if __HVX_ARCH__ >= 81
+/* ==========================================================================
+   Assembly Syntax:       Vd32.qf32=vsub(Vu32.sf,Vv32.qf32)
+   C Intrinsic Prototype: HVX_Vector Q6_Vqf32_vsub_VsfVqf32(HVX_Vector Vu,
+   HVX_Vector Vv) Instruction Type:      CVI_VS Execution Slots:       SLOT0123
+   ========================================================================== */
+
+#define Q6_Vqf32_vsub_VsfVqf32(Vu, Vv)                                         \
+  __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsub_sf_mix)(Vu, Vv)
+#endif /* __HEXAGON_ARCH___ >= 81 */
+
+#endif /* __HVX__ */
+
+#endif
diff --git a/crates/stdarch-gen-hexagon/src/main.rs b/crates/stdarch-gen-hexagon/src/main.rs
new file mode 100644
index 0000000000..8a58c66313
--- /dev/null
+++ b/crates/stdarch-gen-hexagon/src/main.rs
@@ -0,0 +1,1724 @@
+//! Hexagon HVX Code Generator
+//!
+//! This generator creates v64.rs and v128.rs from scratch using the LLVM HVX
+//! header file as the sole source of truth. It parses the C intrinsic prototypes
+//! and generates Rust wrapper functions with appropriate attributes.
+//!
+//! The two generated files provide:
+//! - v64.rs: 64-byte vector mode intrinsics (512-bit vectors)
+//! - v128.rs: 128-byte vector mode intrinsics (1024-bit vectors)
+//!
+//! Both modules are available unconditionally, but require the appropriate
+//! target features to actually use the intrinsics.
+//!
+//! Usage:
+//!     cd crates/stdarch-gen-hexagon
+//!     cargo run
+//!     # Output is written to ../core_arch/src/hexagon/v64.rs and v128.rs
+
+use regex::Regex;
+use std::collections::{HashMap, HashSet};
+use std::fs::File;
+use std::io::Write;
+use std::path::Path;
+
+/// Mappings from HVX intrinsics to architecture-independent SIMD intrinsics.
+/// These intrinsics have equivalent semantics and can be lowered to the generic form.
+fn get_simd_intrinsic_mappings() -> HashMap<&'static str, &'static str> {
+    let mut map = HashMap::new();
+    // Bitwise operations (element-size independent)
+    map.insert("vxor", "simd_xor");
+    map.insert("vand", "simd_and");
+    map.insert("vor", "simd_or");
+    // Word (32-bit) arithmetic operations
+    map.insert("vaddw", "simd_add");
+    map.insert("vsubw", "simd_sub");
+    map
+}
+
+/// The tracking issue number for the stdarch_hexagon feature
+const TRACKING_ISSUE: &str = "151523";
+
+/// HVX vector length mode
+#[derive(Debug, Clone, Copy, PartialEq)]
+enum VectorMode {
+    /// 64-byte vectors (512 bits)
+    V64,
+    /// 128-byte vectors (1024 bits)
+    V128,
+}
+
+impl VectorMode {
+    fn bytes(&self) -> u32 {
+        match self {
+            VectorMode::V64 => 64,
+            VectorMode::V128 => 128,
+        }
+    }
+
+    fn bits(&self) -> u32 {
+        self.bytes() * 8
+    }
+
+    fn lanes(&self) -> u32 {
+        self.bytes() / 4 // 32-bit lanes
+    }
+
+    fn target_feature(&self) -> &'static str {
+        match self {
+            VectorMode::V64 => "hvx-length64b",
+            VectorMode::V128 => "hvx-length128b",
+        }
+    }
+}
+
+/// LLVM version the header file is from (for reference)
+/// Source: https://github.com/llvm/llvm-project/blob/llvmorg-22.1.0-rc1/clang/lib/Headers/hvx_hexagon_protos.h
+const LLVM_VERSION: &str = "22.1.0-rc1";
+
+/// Maximum HVX architecture version supported by rustc
+/// Check with: rustc --target=hexagon-unknown-linux-musl --print target-features
+const MAX_SUPPORTED_ARCH: u32 = 79;
+
+/// Local header file path (checked into the repository)
+const HEADER_FILE: &str = "hvx_hexagon_protos.h";
+
+/// Intrinsic information parsed from the LLVM header
+#[derive(Debug, Clone)]
+struct IntrinsicInfo {
+    /// The Q6_* intrinsic name (e.g., "Q6_V_vadd_VV")
+    q6_name: String,
+    /// The LLVM builtin name without prefix (e.g., "V6_vaddb")
+    builtin_name: String,
+    /// The short instruction name for assert_instr (e.g., "vaddb")
+    instr_name: String,
+    /// The assembly syntax from the comment
+    asm_syntax: String,
+    /// Instruction type
+    instr_type: String,
+    /// Execution slots
+    exec_slots: String,
+    /// Minimum HVX architecture version required
+    min_arch: u32,
+    /// Return type
+    return_type: RustType,
+    /// Parameters (name, type)
+    params: Vec<(String, RustType)>,
+    /// Whether this is a compound intrinsic (multiple builtins)
+    is_compound: bool,
+    /// For compound intrinsics: the parsed expression tree
+    compound_expr: Option<CompoundExpr>,
+}
+
+/// Expression tree for compound intrinsics
+#[derive(Debug, Clone)]
+enum CompoundExpr {
+    /// A call to a builtin: (builtin_name without V6_ prefix, arguments)
+    BuiltinCall(String, Vec<CompoundExpr>),
+    /// A parameter reference by name
+    Param(String),
+    /// An integer literal (like -1)
+    IntLiteral(i32),
+}
+
+/// Rust type mappings
+#[derive(Debug, Clone, PartialEq)]
+enum RustType {
+    HvxVector,
+    HvxVectorPair,
+    HvxVectorPred,
+    I32,
+    MutPtrHvxVector,
+    Unit,
+}
+
+impl RustType {
+    fn from_c_type(c_type: &str) -> Option<Self> {
+        match c_type.trim() {
+            "HVX_Vector" => Some(RustType::HvxVector),
+            "HVX_VectorPair" => Some(RustType::HvxVectorPair),
+            "HVX_VectorPred" => Some(RustType::HvxVectorPred),
+            "Word32" => Some(RustType::I32),
+            "HVX_Vector*" => Some(RustType::MutPtrHvxVector),
+            "void" => Some(RustType::Unit),
+            _ => None,
+        }
+    }
+
+    fn to_rust_str(&self) -> &'static str {
+        match self {
+            RustType::HvxVector => "HvxVector",
+            RustType::HvxVectorPair => "HvxVectorPair",
+            RustType::HvxVectorPred => "HvxVectorPred",
+            RustType::I32 => "i32",
+            RustType::MutPtrHvxVector => "*mut HvxVector",
+            RustType::Unit => "()",
+        }
+    }
+
+    fn to_extern_str(&self) -> &'static str {
+        match self {
+            RustType::HvxVector => "HvxVector",
+            RustType::HvxVectorPair => "HvxVectorPair",
+            RustType::HvxVectorPred => "HvxVectorPred",
+            RustType::I32 => "i32",
+            RustType::MutPtrHvxVector => "*mut HvxVector",
+            RustType::Unit => "()",
+        }
+    }
+}
+
+/// Parse a compound macro expression into an expression tree
+fn parse_compound_expr(expr: &str) -> Option<CompoundExpr> {
+    let expr = expr.trim();
+
+    // Try to match an integer literal (like -1)
+    if let Ok(n) = expr.parse::<i32>() {
+        return Some(CompoundExpr::IntLiteral(n));
+    }
+
+    // Try to match a simple parameter name (Vu, Vv, Rt, Qs, Qt, Qx, Vx, etc.)
+    // These are typically short identifiers in the macro
+    if expr.len() <= 3
+        && expr.chars().all(|c| c.is_ascii_alphanumeric() || c == '_')
+        && !expr.contains("__")
+    {
+        return Some(CompoundExpr::Param(expr.to_lowercase()));
+    }
+
+    // Check if it's wrapped in extra parens first
+    if expr.starts_with('(') && expr.ends_with(')') {
+        // Check if these parens wrap the entire expression
+        let inner = &expr[1..expr.len() - 1];
+        // Count depth: if after removing outer parens the expression is balanced,
+        // the outer parens were enclosing everything
+        if is_balanced_parens(inner) {
+            // But we also need to verify these aren't part of a function call
+            // If the inner expression is balanced and the whole thing starts with (
+            // and ends with ), it's a paren wrapper
+            let result = parse_compound_expr(inner);
+            if result.is_some() {
+                return result;
+            }
+        }
+    }
+
+    // Try to match __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_xxx)(args)
+    // The args portion may contain nested calls, so we need to find the matching paren
+    if expr.starts_with("__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_") {
+        // Find the end of the builtin name (after V6_)
+        let prefix = "__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_";
+        let after_prefix = &expr[prefix.len()..];
+        if let Some(paren_pos) = after_prefix.find(')') {
+            let builtin_name = &after_prefix[..paren_pos];
+            let rest = &after_prefix[paren_pos + 1..]; // Skip the closing ) of the WRAP
+                                                       // rest should now be "(args)"
+            if rest.starts_with('(') && rest.ends_with(')') {
+                let args_str = &rest[1..rest.len() - 1];
+                let args = parse_compound_args(args_str)?;
+                return Some(CompoundExpr::BuiltinCall(builtin_name.to_string(), args));
+            }
+        }
+    }
+
+    // Try to match __builtin_HEXAGON_V6_xxx(args) without wrap
+    if expr.starts_with("__builtin_HEXAGON_V6_") {
+        let prefix = "__builtin_HEXAGON_V6_";
+        let after_prefix = &expr[prefix.len()..];
+        if let Some(paren_pos) = after_prefix.find('(') {
+            let builtin_name = &after_prefix[..paren_pos];
+            let rest = &after_prefix[paren_pos..];
+            if rest.starts_with('(') && rest.ends_with(')') {
+                let args_str = &rest[1..rest.len() - 1];
+                let args = parse_compound_args(args_str)?;
+                return Some(CompoundExpr::BuiltinCall(builtin_name.to_string(), args));
+            }
+        }
+    }
+
+    None
+}
+
+/// Check if parentheses are balanced in a string
+fn is_balanced_parens(s: &str) -> bool {
+    let mut depth = 0;
+    for c in s.chars() {
+        match c {
+            '(' => depth += 1,
+            ')' => {
+                depth -= 1;
+                if depth < 0 {
+                    return false;
+                }
+            }
+            _ => {}
+        }
+    }
+    depth == 0
+}
+
+/// Parse comma-separated arguments, respecting nested parentheses
+fn parse_compound_args(args_str: &str) -> Option<Vec<CompoundExpr>> {
+    let mut args = Vec::new();
+    let mut current = String::new();
+    let mut depth = 0;
+
+    for c in args_str.chars() {
+        match c {
+            '(' => {
+                depth += 1;
+                current.push(c);
+            }
+            ')' => {
+                depth -= 1;
+                current.push(c);
+            }
+            ',' if depth == 0 => {
+                let arg = current.trim().to_string();
+                if !arg.is_empty() {
+                    args.push(parse_compound_expr(&arg)?);
+                }
+                current.clear();
+            }
+            _ => current.push(c),
+        }
+    }
+
+    // Don't forget the last argument
+    let arg = current.trim().to_string();
+    if !arg.is_empty() {
+        args.push(parse_compound_expr(&arg)?);
+    }
+
+    Some(args)
+}
+
+/// Extract all builtin names used in a compound expression
+fn collect_builtins_from_expr(expr: &CompoundExpr, builtins: &mut HashSet<String>) {
+    match expr {
+        CompoundExpr::BuiltinCall(name, args) => {
+            builtins.insert(name.clone());
+            for arg in args {
+                collect_builtins_from_expr(arg, builtins);
+            }
+        }
+        CompoundExpr::Param(_) | CompoundExpr::IntLiteral(_) => {}
+    }
+}
+
+/// Read the local HVX header file
+fn read_header(crate_dir: &Path) -> Result<String, String> {
+    let header_path = crate_dir.join(HEADER_FILE);
+    println!("Reading HVX header from: {}", header_path.display());
+    println!("  (LLVM version: {})", LLVM_VERSION);
+
+    std::fs::read_to_string(&header_path).map_err(|e| {
+        format!(
+            "Failed to read header file {}: {}",
+            header_path.display(),
+            e
+        )
+    })
+}
+
+/// Parse a C function prototype to extract return type and parameters
+fn parse_prototype(prototype: &str) -> Option<(RustType, Vec<(String, RustType)>)> {
+    // Pattern: ReturnType FunctionName(ParamType1 Param1, ParamType2 Param2, ...)
+    let proto_re = Regex::new(r"(\w+(?:\*)?)\s+Q6_\w+\(([^)]*)\)").unwrap();
+
+    if let Some(caps) = proto_re.captures(prototype) {
+        let return_type_str = caps[1].trim();
+        let params_str = &caps[2];
+
+        let return_type = RustType::from_c_type(return_type_str)?;
+
+        let mut params = Vec::new();
+        if !params_str.trim().is_empty() {
+            // Pattern: Type Name or Type* Name
+            let param_re = Regex::new(r"(\w+\*?)\s+(\w+)").unwrap();
+            for param in params_str.split(',') {
+                let param = param.trim();
+                if let Some(pcaps) = param_re.captures(param) {
+                    let ptype_str = pcaps[1].trim();
+                    let pname = pcaps[2].to_lowercase();
+                    if let Some(ptype) = RustType::from_c_type(ptype_str) {
+                        params.push((pname, ptype));
+                    } else {
+                        return None; // Unknown type
+                    }
+                }
+            }
+        }
+
+        Some((return_type, params))
+    } else {
+        None
+    }
+}
+
+/// Parse the LLVM header file to extract intrinsic information
+fn parse_header(content: &str) -> Vec<IntrinsicInfo> {
+    let mut intrinsics = Vec::new();
+
+    let arch_re = Regex::new(r"#if __HVX_ARCH__ >= (\d+)").unwrap();
+
+    // Regex to extract the simple builtin name from a macro body
+    // Match: __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_xxx)(args)
+    let simple_builtin_re =
+        Regex::new(r"__BUILTIN_VECTOR_WRAP\(__builtin_HEXAGON_(\w+)\)\([^)]*\)\s*$").unwrap();
+
+    // Also handle builtins without VECTOR_WRAP
+    let simple_builtin_re2 = Regex::new(r"__builtin_HEXAGON_(\w+)\([^)]*\)\s*$").unwrap();
+
+    // Regex to extract Q6 name from #define
+    let q6_name_re = Regex::new(r"#define\s+(Q6_\w+)").unwrap();
+
+    // Regex to extract macro expression body
+    let macro_expr_re = Regex::new(r"#define\s+Q6_\w+\([^)]*\)\s+(.+)").unwrap();
+
+    let lines: Vec<&str> = content.lines().collect();
+    let mut current_arch: u32 = 60;
+    let mut i = 0;
+
+    while i < lines.len() {
+        // Track architecture version
+        if let Some(caps) = arch_re.captures(lines[i]) {
+            if let Ok(arch) = caps[1].parse() {
+                current_arch = arch;
+            }
+        }
+
+        // Look for Assembly Syntax comment block
+        if lines[i].contains("Assembly Syntax:") {
+            let mut asm_syntax = String::new();
+            let mut prototype = String::new();
+            let mut instr_type = String::new();
+            let mut exec_slots = String::new();
+
+            // Parse the comment block
+            let mut j = i;
+            while j < lines.len() && !lines[j].starts_with("#define") {
+                let line = lines[j];
+                if line.contains("Assembly Syntax:") {
+                    if let Some(pos) = line.find("Assembly Syntax:") {
+                        asm_syntax = line[pos + 16..].trim().to_string();
+                    }
+                } else if line.contains("C Intrinsic Prototype:") {
+                    if let Some(pos) = line.find("C Intrinsic Prototype:") {
+                        prototype = line[pos + 22..].trim().to_string();
+                    }
+                } else if line.contains("Instruction Type:") {
+                    if let Some(pos) = line.find("Instruction Type:") {
+                        instr_type = line[pos + 17..].trim().to_string();
+                    }
+                } else if line.contains("Execution Slots:") {
+                    if let Some(pos) = line.find("Execution Slots:") {
+                        exec_slots = line[pos + 16..].trim().to_string();
+                    }
+                }
+                j += 1;
+            }
+
+            // Now find the #define line
+            while j < lines.len() && !lines[j].starts_with("#define") {
+                j += 1;
+            }
+
+            if j < lines.len() {
+                let define_line = lines[j];
+
+                // Extract Q6 name and check if it's simple or compound
+                if let Some(caps) = q6_name_re.captures(define_line) {
+                    let q6_name = caps[1].to_string();
+
+                    // Get the full macro body (handle line continuations)
+                    let mut macro_body = define_line.to_string();
+                    let mut k = j;
+                    while macro_body.trim_end().ends_with('\\') && k + 1 < lines.len() {
+                        k += 1;
+                        macro_body.push_str(lines[k]);
+                    }
+
+                    // Try to extract simple builtin name
+                    let builtin_name = simple_builtin_re
+                        .captures(&macro_body)
+                        .or_else(|| simple_builtin_re2.captures(&macro_body))
+                        .map(|bcaps| bcaps[1].to_string());
+
+                    // Check if it's a compound intrinsic (multiple __builtin calls)
+                    let builtin_count = macro_body.matches("__builtin_HEXAGON_").count();
+                    let is_compound = builtin_count > 1;
+
+                    // Parse prototype
+                    if let Some((return_type, params)) = parse_prototype(&prototype) {
+                        if is_compound {
+                            // For compound intrinsics, parse the expression
+                            // Extract the macro body after the parameter list
+                            if let Some(expr_caps) = macro_expr_re.captures(&macro_body) {
+                                let expr_str = expr_caps[1].trim().replace(['\n', '\\'], " ");
+                                let expr_str = expr_str.trim();
+
+                                if let Some(compound_expr) = parse_compound_expr(expr_str) {
+                                    // For compound intrinsics, we use the outermost builtin
+                                    // as the "primary" for the instruction name
+                                    let (primary_builtin, instr_name) = match &compound_expr {
+                                        CompoundExpr::BuiltinCall(name, _) => {
+                                            (name.clone(), name.clone())
+                                        }
+                                        _ => continue,
+                                    };
+
+                                    intrinsics.push(IntrinsicInfo {
+                                        q6_name,
+                                        builtin_name: format!("V6_{}", primary_builtin),
+                                        instr_name,
+                                        asm_syntax,
+                                        instr_type,
+                                        exec_slots,
+                                        min_arch: current_arch,
+                                        return_type,
+                                        params,
+                                        is_compound: true,
+                                        compound_expr: Some(compound_expr),
+                                    });
+                                }
+                            }
+                        } else if let Some(builtin) = builtin_name {
+                            // Extract short instruction name
+                            let instr_name = builtin
+                                .strip_prefix("V6_")
+                                .map(|s| s.to_string())
+                                .unwrap_or_else(|| builtin.clone());
+
+                            intrinsics.push(IntrinsicInfo {
+                                q6_name,
+                                builtin_name: builtin,
+                                instr_name,
+                                asm_syntax,
+                                instr_type,
+                                exec_slots,
+                                min_arch: current_arch,
+                                return_type,
+                                params,
+                                is_compound: false,
+                                compound_expr: None,
+                            });
+                        }
+                    }
+                }
+            }
+            i = j;
+        }
+        i += 1;
+    }
+
+    intrinsics
+}
+
+/// Generate the module documentation
+fn generate_module_doc(mode: VectorMode) -> String {
+    format!(
+        r#"//! Hexagon HVX {bytes}-byte vector mode intrinsics
+//!
+//! This module provides intrinsics for the Hexagon Vector Extensions (HVX)
+//! in {bytes}-byte vector mode ({bits}-bit vectors).
+//!
+//! HVX is a wide vector extension designed for high-performance signal processing.
+//! [Hexagon HVX Programmer's Reference Manual](https://docs.qualcomm.com/doc/80-N2040-61)
+//!
+//! ## Vector Types
+//!
+//! In {bytes}-byte mode:
+//! - `HvxVector` is {bits} bits ({bytes} bytes) containing {lanes} x 32-bit values
+//! - `HvxVectorPair` is {pair_bits} bits ({pair_bytes} bytes)
+//! - `HvxVectorPred` is {bits} bits ({bytes} bytes) for predicate operations
+//!
+//! To use this module, compile with `-C target-feature=+{target_feature}`.
+//!
+//! ## Naming Convention
+//!
+//! Function names preserve the original Q6 naming case because the convention
+//! uses case to distinguish register types:
+//! - `W` (uppercase) = vector pair (`HvxVectorPair`)
+//! - `V` (uppercase) = vector (`HvxVector`)
+//! - `Q` (uppercase) = predicate (`HvxVectorPred`)
+//! - `R` = scalar register (`i32`)
+//!
+//! For example, `Q6_W_vcombine_VV` operates on a vector pair while
+//! `Q6_V_hi_W` extracts a vector from a pair.
+//!
+//! ## Architecture Versions
+//!
+//! Different intrinsics require different HVX architecture versions. Use the
+//! appropriate target feature to enable the required version:
+//! - HVX v60: `-C target-feature=+hvxv60` (basic HVX operations)
+//! - HVX v62: `-C target-feature=+hvxv62`
+//! - HVX v65: `-C target-feature=+hvxv65` (includes floating-point support)
+//! - HVX v66: `-C target-feature=+hvxv66`
+//! - HVX v68: `-C target-feature=+hvxv68`
+//! - HVX v69: `-C target-feature=+hvxv69`
+//! - HVX v73: `-C target-feature=+hvxv73`
+//! - HVX v79: `-C target-feature=+hvxv79`
+//!
+//! Each version includes all features from previous versions.
+"#,
+        bytes = mode.bytes(),
+        bits = mode.bits(),
+        lanes = mode.lanes(),
+        pair_bytes = mode.bytes() * 2,
+        pair_bits = mode.bits() * 2,
+        target_feature = mode.target_feature(),
+    )
+}
+
+/// Generate the type definitions for a specific vector mode
+fn generate_types(mode: VectorMode) -> String {
+    let lanes = mode.lanes();
+    let pair_lanes = lanes * 2;
+    let bits = mode.bits();
+    let bytes = mode.bytes();
+    let pair_bits = bits * 2;
+    let pair_bytes = bytes * 2;
+
+    format!(
+        r#"
+#![allow(non_camel_case_types)]
+#![allow(non_snake_case)]
+
+#[cfg(test)]
+use stdarch_test::assert_instr;
+
+use crate::intrinsics::simd::{{simd_add, simd_and, simd_or, simd_sub, simd_xor}};
+
+// HVX type definitions for {bytes}-byte vector mode
+types! {{
+    #![unstable(feature = "stdarch_hexagon", issue = "{TRACKING_ISSUE}")]
+
+    /// HVX vector type ({bits} bits / {bytes} bytes)
+    ///
+    /// This type represents a single HVX vector register containing {lanes} x 32-bit values.
+    pub struct HvxVector({lanes} x i32);
+
+    /// HVX vector pair type ({pair_bits} bits / {pair_bytes} bytes)
+    ///
+    /// This type represents a pair of HVX vector registers, often used for
+    /// operations that produce double-width results.
+    pub struct HvxVectorPair({pair_lanes} x i32);
+
+    /// HVX vector predicate type ({bits} bits / {bytes} bytes)
+    ///
+    /// This type represents a predicate vector used for conditional operations.
+    /// Each bit corresponds to a lane in the vector.
+    pub struct HvxVectorPred({lanes} x i32);
+}}
+"#,
+        bytes = bytes,
+        bits = bits,
+        lanes = lanes,
+        pair_bits = pair_bits,
+        pair_bytes = pair_bytes,
+        pair_lanes = pair_lanes,
+        TRACKING_ISSUE = TRACKING_ISSUE,
+    )
+}
+
+/// Builtin signature information for extern declarations
+struct BuiltinSignature {
+    /// The V6_ prefixed name
+    full_name: String,
+    /// The short name (without V6_)
+    short_name: String,
+    /// Return type
+    return_type: RustType,
+    /// Parameter types
+    param_types: Vec<RustType>,
+}
+
+/// Get known signatures for builtins used in compound operations
+/// These are the helper builtins that don't have their own Q6_ wrapper
+fn get_compound_helper_signatures() -> HashMap<String, BuiltinSignature> {
+    let mut map = HashMap::new();
+
+    // vandvrt: HVX_Vector -> i32 -> HVX_Vector
+    // Converts predicate to vector representation. LLVM uses HVX_Vector for both.
+    map.insert(
+        "vandvrt".to_string(),
+        BuiltinSignature {
+            full_name: "V6_vandvrt".to_string(),
+            short_name: "vandvrt".to_string(),
+            return_type: RustType::HvxVector,
+            param_types: vec![RustType::HvxVector, RustType::I32],
+        },
+    );
+
+    // vandqrt: HVX_Vector -> i32 -> HVX_Vector
+    // Converts vector representation back to predicate. LLVM uses HVX_Vector for both.
+    map.insert(
+        "vandqrt".to_string(),
+        BuiltinSignature {
+            full_name: "V6_vandqrt".to_string(),
+            short_name: "vandqrt".to_string(),
+            return_type: RustType::HvxVector,
+            param_types: vec![RustType::HvxVector, RustType::I32],
+        },
+    );
+
+    // vandvrt_acc: HVX_Vector -> HVX_Vector -> i32 -> HVX_Vector
+    map.insert(
+        "vandvrt_acc".to_string(),
+        BuiltinSignature {
+            full_name: "V6_vandvrt_acc".to_string(),
+            short_name: "vandvrt_acc".to_string(),
+            return_type: RustType::HvxVector,
+            param_types: vec![RustType::HvxVector, RustType::HvxVector, RustType::I32],
+        },
+    );
+
+    // vandqrt_acc: HVX_Vector -> HVX_Vector -> i32 -> HVX_Vector
+    map.insert(
+        "vandqrt_acc".to_string(),
+        BuiltinSignature {
+            full_name: "V6_vandqrt_acc".to_string(),
+            short_name: "vandqrt_acc".to_string(),
+            return_type: RustType::HvxVector,
+            param_types: vec![RustType::HvxVector, RustType::HvxVector, RustType::I32],
+        },
+    );
+
+    // pred_and: HVX_Vector -> HVX_Vector -> HVX_Vector
+    map.insert(
+        "pred_and".to_string(),
+        BuiltinSignature {
+            full_name: "V6_pred_and".to_string(),
+            short_name: "pred_and".to_string(),
+            return_type: RustType::HvxVector,
+            param_types: vec![RustType::HvxVector, RustType::HvxVector],
+        },
+    );
+
+    // pred_and_n: HVX_Vector -> HVX_Vector -> HVX_Vector
+    map.insert(
+        "pred_and_n".to_string(),
+        BuiltinSignature {
+            full_name: "V6_pred_and_n".to_string(),
+            short_name: "pred_and_n".to_string(),
+            return_type: RustType::HvxVector,
+            param_types: vec![RustType::HvxVector, RustType::HvxVector],
+        },
+    );
+
+    // pred_or: HVX_Vector -> HVX_Vector -> HVX_Vector
+    map.insert(
+        "pred_or".to_string(),
+        BuiltinSignature {
+            full_name: "V6_pred_or".to_string(),
+            short_name: "pred_or".to_string(),
+            return_type: RustType::HvxVector,
+            param_types: vec![RustType::HvxVector, RustType::HvxVector],
+        },
+    );
+
+    // pred_or_n: HVX_Vector -> HVX_Vector -> HVX_Vector
+    map.insert(
+        "pred_or_n".to_string(),
+        BuiltinSignature {
+            full_name: "V6_pred_or_n".to_string(),
+            short_name: "pred_or_n".to_string(),
+            return_type: RustType::HvxVector,
+            param_types: vec![RustType::HvxVector, RustType::HvxVector],
+        },
+    );
+
+    // pred_xor: HVX_Vector -> HVX_Vector -> HVX_Vector
+    map.insert(
+        "pred_xor".to_string(),
+        BuiltinSignature {
+            full_name: "V6_pred_xor".to_string(),
+            short_name: "pred_xor".to_string(),
+            return_type: RustType::HvxVector,
+            param_types: vec![RustType::HvxVector, RustType::HvxVector],
+        },
+    );
+
+    // pred_not: HVX_Vector -> HVX_Vector
+    map.insert(
+        "pred_not".to_string(),
+        BuiltinSignature {
+            full_name: "V6_pred_not".to_string(),
+            short_name: "pred_not".to_string(),
+            return_type: RustType::HvxVector,
+            param_types: vec![RustType::HvxVector],
+        },
+    );
+
+    // pred_scalar2: i32 -> HVX_Vector
+    map.insert(
+        "pred_scalar2".to_string(),
+        BuiltinSignature {
+            full_name: "V6_pred_scalar2".to_string(),
+            short_name: "pred_scalar2".to_string(),
+            return_type: RustType::HvxVector,
+            param_types: vec![RustType::I32],
+        },
+    );
+
+    // Conditional store operations
+    map.insert(
+        "vS32b_qpred_ai".to_string(),
+        BuiltinSignature {
+            full_name: "V6_vS32b_qpred_ai".to_string(),
+            short_name: "vS32b_qpred_ai".to_string(),
+            return_type: RustType::Unit,
+            param_types: vec![
+                RustType::HvxVector,
+                RustType::MutPtrHvxVector,
+                RustType::HvxVector,
+            ],
+        },
+    );
+
+    map.insert(
+        "vS32b_nqpred_ai".to_string(),
+        BuiltinSignature {
+            full_name: "V6_vS32b_nqpred_ai".to_string(),
+            short_name: "vS32b_nqpred_ai".to_string(),
+            return_type: RustType::Unit,
+            param_types: vec![
+                RustType::HvxVector,
+                RustType::MutPtrHvxVector,
+                RustType::HvxVector,
+            ],
+        },
+    );
+
+    map.insert(
+        "vS32b_nt_qpred_ai".to_string(),
+        BuiltinSignature {
+            full_name: "V6_vS32b_nt_qpred_ai".to_string(),
+            short_name: "vS32b_nt_qpred_ai".to_string(),
+            return_type: RustType::Unit,
+            param_types: vec![
+                RustType::HvxVector,
+                RustType::MutPtrHvxVector,
+                RustType::HvxVector,
+            ],
+        },
+    );
+
+    map.insert(
+        "vS32b_nt_nqpred_ai".to_string(),
+        BuiltinSignature {
+            full_name: "V6_vS32b_nt_nqpred_ai".to_string(),
+            short_name: "vS32b_nt_nqpred_ai".to_string(),
+            return_type: RustType::Unit,
+            param_types: vec![
+                RustType::HvxVector,
+                RustType::MutPtrHvxVector,
+                RustType::HvxVector,
+            ],
+        },
+    );
+
+    // Conditional accumulation operations
+    for (suffix, _elem) in [("b", "byte"), ("h", "halfword"), ("w", "word")] {
+        // vaddbq, vaddhq, vaddwq
+        map.insert(
+            format!("vadd{}q", suffix),
+            BuiltinSignature {
+                full_name: format!("V6_vadd{}q", suffix),
+                short_name: format!("vadd{}q", suffix),
+                return_type: RustType::HvxVector,
+                param_types: vec![
+                    RustType::HvxVector,
+                    RustType::HvxVector,
+                    RustType::HvxVector,
+                ],
+            },
+        );
+        // vaddbnq, vaddhnq, vaddwnq
+        map.insert(
+            format!("vadd{}nq", suffix),
+            BuiltinSignature {
+                full_name: format!("V6_vadd{}nq", suffix),
+                short_name: format!("vadd{}nq", suffix),
+                return_type: RustType::HvxVector,
+                param_types: vec![
+                    RustType::HvxVector,
+                    RustType::HvxVector,
+                    RustType::HvxVector,
+                ],
+            },
+        );
+    }
+
+    // Comparison operations with accumulation
+    // veqb_and, veqb_or, veqb_xor, etc.
+    for elem in ["b", "h", "w", "ub", "uh", "uw"] {
+        for op in ["and", "or", "xor"] {
+            // veq*_and, veq*_or, veq*_xor
+            map.insert(
+                format!("veq{}_{}", elem, op),
+                BuiltinSignature {
+                    full_name: format!("V6_veq{}_{}", elem, op),
+                    short_name: format!("veq{}_{}", elem, op),
+                    return_type: RustType::HvxVector,
+                    param_types: vec![
+                        RustType::HvxVector,
+                        RustType::HvxVector,
+                        RustType::HvxVector,
+                    ],
+                },
+            );
+            // vgt*_and, vgt*_or, vgt*_xor
+            map.insert(
+                format!("vgt{}_{}", elem, op),
+                BuiltinSignature {
+                    full_name: format!("V6_vgt{}_{}", elem, op),
+                    short_name: format!("vgt{}_{}", elem, op),
+                    return_type: RustType::HvxVector,
+                    param_types: vec![
+                        RustType::HvxVector,
+                        RustType::HvxVector,
+                        RustType::HvxVector,
+                    ],
+                },
+            );
+        }
+    }
+
+    // Floating-point comparison operations (hf = half-float, sf = single-float)
+    for elem in ["hf", "sf"] {
+        // Basic comparison: vgt*
+        map.insert(
+            format!("vgt{}", elem),
+            BuiltinSignature {
+                full_name: format!("V6_vgt{}", elem),
+                short_name: format!("vgt{}", elem),
+                return_type: RustType::HvxVector,
+                param_types: vec![RustType::HvxVector, RustType::HvxVector],
+            },
+        );
+
+        for op in ["and", "or", "xor"] {
+            // vgt*_and, vgt*_or, vgt*_xor
+            map.insert(
+                format!("vgt{}_{}", elem, op),
+                BuiltinSignature {
+                    full_name: format!("V6_vgt{}_{}", elem, op),
+                    short_name: format!("vgt{}_{}", elem, op),
+                    return_type: RustType::HvxVector,
+                    param_types: vec![
+                        RustType::HvxVector,
+                        RustType::HvxVector,
+                        RustType::HvxVector,
+                    ],
+                },
+            );
+        }
+    }
+
+    // Prefix operations with predicate
+    for elem in ["b", "h", "w"] {
+        map.insert(
+            format!("vprefixq{}", elem),
+            BuiltinSignature {
+                full_name: format!("V6_vprefixq{}", elem),
+                short_name: format!("vprefixq{}", elem),
+                return_type: RustType::HvxVector,
+                param_types: vec![RustType::HvxVector],
+            },
+        );
+    }
+
+    // Scatter operations with predicate
+    map.insert(
+        "vscattermhq".to_string(),
+        BuiltinSignature {
+            full_name: "V6_vscattermhq".to_string(),
+            short_name: "vscattermhq".to_string(),
+            return_type: RustType::Unit,
+            param_types: vec![
+                RustType::HvxVector,
+                RustType::I32,
+                RustType::I32,
+                RustType::HvxVector,
+                RustType::HvxVector,
+            ],
+        },
+    );
+
+    map.insert(
+        "vscattermhwq".to_string(),
+        BuiltinSignature {
+            full_name: "V6_vscattermhwq".to_string(),
+            short_name: "vscattermhwq".to_string(),
+            return_type: RustType::Unit,
+            param_types: vec![
+                RustType::HvxVector,
+                RustType::I32,
+                RustType::I32,
+                RustType::HvxVectorPair,
+                RustType::HvxVector,
+            ],
+        },
+    );
+
+    map.insert(
+        "vscattermwq".to_string(),
+        BuiltinSignature {
+            full_name: "V6_vscattermwq".to_string(),
+            short_name: "vscattermwq".to_string(),
+            return_type: RustType::Unit,
+            param_types: vec![
+                RustType::HvxVector,
+                RustType::I32,
+                RustType::I32,
+                RustType::HvxVector,
+                RustType::HvxVector,
+            ],
+        },
+    );
+
+    // Add with carry saturation
+    map.insert(
+        "vaddcarrysat".to_string(),
+        BuiltinSignature {
+            full_name: "V6_vaddcarrysat".to_string(),
+            short_name: "vaddcarrysat".to_string(),
+            return_type: RustType::HvxVector,
+            param_types: vec![
+                RustType::HvxVector,
+                RustType::HvxVector,
+                RustType::HvxVector,
+            ],
+        },
+    );
+
+    // Gather operations with predicate
+    map.insert(
+        "vgathermhq".to_string(),
+        BuiltinSignature {
+            full_name: "V6_vgathermhq".to_string(),
+            short_name: "vgathermhq".to_string(),
+            return_type: RustType::Unit,
+            param_types: vec![
+                RustType::MutPtrHvxVector,
+                RustType::HvxVector,
+                RustType::I32,
+                RustType::I32,
+                RustType::HvxVector,
+            ],
+        },
+    );
+
+    map.insert(
+        "vgathermhwq".to_string(),
+        BuiltinSignature {
+            full_name: "V6_vgathermhwq".to_string(),
+            short_name: "vgathermhwq".to_string(),
+            return_type: RustType::Unit,
+            param_types: vec![
+                RustType::MutPtrHvxVector,
+                RustType::HvxVector,
+                RustType::I32,
+                RustType::I32,
+                RustType::HvxVectorPair,
+            ],
+        },
+    );
+
+    map.insert(
+        "vgathermwq".to_string(),
+        BuiltinSignature {
+            full_name: "V6_vgathermwq".to_string(),
+            short_name: "vgathermwq".to_string(),
+            return_type: RustType::Unit,
+            param_types: vec![
+                RustType::MutPtrHvxVector,
+                RustType::HvxVector,
+                RustType::I32,
+                RustType::I32,
+                RustType::HvxVector,
+            ],
+        },
+    );
+
+    // Basic comparison operations (without accumulation)
+    for elem in ["b", "h", "w", "ub", "uh", "uw"] {
+        // vgt* - greater than
+        map.insert(
+            format!("vgt{}", elem),
+            BuiltinSignature {
+                full_name: format!("V6_vgt{}", elem),
+                short_name: format!("vgt{}", elem),
+                return_type: RustType::HvxVector,
+                param_types: vec![RustType::HvxVector, RustType::HvxVector],
+            },
+        );
+        // veq* - equal
+        map.insert(
+            format!("veq{}", elem),
+            BuiltinSignature {
+                full_name: format!("V6_veq{}", elem),
+                short_name: format!("veq{}", elem),
+                return_type: RustType::HvxVector,
+                param_types: vec![RustType::HvxVector, RustType::HvxVector],
+            },
+        );
+    }
+
+    // Conditional subtraction operations (vsub*q, vsub*nq)
+    for elem in ["b", "h", "w"] {
+        map.insert(
+            format!("vsub{}q", elem),
+            BuiltinSignature {
+                full_name: format!("V6_vsub{}q", elem),
+                short_name: format!("vsub{}q", elem),
+                return_type: RustType::HvxVector,
+                param_types: vec![
+                    RustType::HvxVector,
+                    RustType::HvxVector,
+                    RustType::HvxVector,
+                ],
+            },
+        );
+        map.insert(
+            format!("vsub{}nq", elem),
+            BuiltinSignature {
+                full_name: format!("V6_vsub{}nq", elem),
+                short_name: format!("vsub{}nq", elem),
+                return_type: RustType::HvxVector,
+                param_types: vec![
+                    RustType::HvxVector,
+                    RustType::HvxVector,
+                    RustType::HvxVector,
+                ],
+            },
+        );
+    }
+
+    // vmux - vector mux (select based on predicate)
+    map.insert(
+        "vmux".to_string(),
+        BuiltinSignature {
+            full_name: "V6_vmux".to_string(),
+            short_name: "vmux".to_string(),
+            return_type: RustType::HvxVector,
+            param_types: vec![
+                RustType::HvxVector,
+                RustType::HvxVector,
+                RustType::HvxVector,
+            ],
+        },
+    );
+
+    // vswap - vector swap based on predicate
+    map.insert(
+        "vswap".to_string(),
+        BuiltinSignature {
+            full_name: "V6_vswap".to_string(),
+            short_name: "vswap".to_string(),
+            return_type: RustType::HvxVectorPair,
+            param_types: vec![
+                RustType::HvxVector,
+                RustType::HvxVector,
+                RustType::HvxVector,
+            ],
+        },
+    );
+
+    // shuffeq operations - take vectors (internal pred representation) and return vector
+    for elem in ["h", "w"] {
+        map.insert(
+            format!("shuffeq{}", elem),
+            BuiltinSignature {
+                full_name: format!("V6_shuffeq{}", elem),
+                short_name: format!("shuffeq{}", elem),
+                return_type: RustType::HvxVector,
+                param_types: vec![RustType::HvxVector, RustType::HvxVector],
+            },
+        );
+    }
+
+    // Predicate AND with vector operations
+    map.insert(
+        "vandvqv".to_string(),
+        BuiltinSignature {
+            full_name: "V6_vandvqv".to_string(),
+            short_name: "vandvqv".to_string(),
+            return_type: RustType::HvxVector,
+            param_types: vec![RustType::HvxVector, RustType::HvxVector],
+        },
+    );
+
+    map.insert(
+        "vandvnqv".to_string(),
+        BuiltinSignature {
+            full_name: "V6_vandvnqv".to_string(),
+            short_name: "vandvnqv".to_string(),
+            return_type: RustType::HvxVector,
+            param_types: vec![RustType::HvxVector, RustType::HvxVector],
+        },
+    );
+
+    // vandnqrt and vandnqrt_acc
+    map.insert(
+        "vandnqrt".to_string(),
+        BuiltinSignature {
+            full_name: "V6_vandnqrt".to_string(),
+            short_name: "vandnqrt".to_string(),
+            return_type: RustType::HvxVector,
+            param_types: vec![RustType::HvxVector, RustType::I32],
+        },
+    );
+
+    map.insert(
+        "vandnqrt_acc".to_string(),
+        BuiltinSignature {
+            full_name: "V6_vandnqrt_acc".to_string(),
+            short_name: "vandnqrt_acc".to_string(),
+            return_type: RustType::HvxVector,
+            param_types: vec![RustType::HvxVector, RustType::HvxVector, RustType::I32],
+        },
+    );
+
+    // pred_scalar2v2
+    map.insert(
+        "pred_scalar2v2".to_string(),
+        BuiltinSignature {
+            full_name: "V6_pred_scalar2v2".to_string(),
+            short_name: "pred_scalar2v2".to_string(),
+            return_type: RustType::HvxVector,
+            param_types: vec![RustType::I32],
+        },
+    );
+
+    map
+}
+
+/// Generate extern declarations for all intrinsics for a specific vector mode
+fn generate_extern_block(intrinsics: &[IntrinsicInfo], mode: VectorMode) -> String {
+    let mut output = String::new();
+
+    // Collect unique builtins to avoid duplicates
+    let mut seen_builtins: HashSet<String> = HashSet::new();
+    let mut decls: Vec<(String, String, RustType, Vec<RustType>)> = Vec::new();
+
+    // First, add simple intrinsics
+    for info in intrinsics.iter().filter(|i| !i.is_compound) {
+        if seen_builtins.contains(&info.builtin_name) {
+            continue;
+        }
+        seen_builtins.insert(info.builtin_name.clone());
+
+        let param_types: Vec<RustType> = info.params.iter().map(|(_, t)| t.clone()).collect();
+        decls.push((
+            info.builtin_name.clone(),
+            info.instr_name.clone(),
+            info.return_type.clone(),
+            param_types,
+        ));
+    }
+
+    // Then, collect all builtins used in compound expressions
+    let helper_sigs = get_compound_helper_signatures();
+    let mut compound_builtins: HashSet<String> = HashSet::new();
+
+    for info in intrinsics.iter().filter(|i| i.is_compound) {
+        if let Some(ref expr) = info.compound_expr {
+            collect_builtins_from_expr(expr, &mut compound_builtins);
+        }
+    }
+
+    // Add compound helper builtins
+    let mut missing_builtins = Vec::new();
+    for builtin_name in compound_builtins {
+        let full_name = format!("V6_{}", builtin_name);
+        if seen_builtins.contains(&full_name) {
+            continue;
+        }
+        seen_builtins.insert(full_name.clone());
+
+        if let Some(sig) = helper_sigs.get(&builtin_name) {
+            decls.push((
+                sig.full_name.clone(),
+                sig.short_name.clone(),
+                sig.return_type.clone(),
+                sig.param_types.clone(),
+            ));
+        } else {
+            missing_builtins.push(builtin_name);
+        }
+    }
+
+    // Report missing builtins (for development purposes)
+    if !missing_builtins.is_empty() {
+        eprintln!("Warning: Missing helper signatures for compound builtins:");
+        for name in &missing_builtins {
+            eprintln!("  - {}", name);
+        }
+    }
+
+    // Sort by builtin name for consistent output
+    decls.sort_by(|a, b| a.0.cmp(&b.0));
+
+    // Generate intrinsic declarations for the specified mode
+    output.push_str(&format!(
+        "// LLVM intrinsic declarations for {}-byte vector mode\n",
+        mode.bytes()
+    ));
+    output.push_str("#[allow(improper_ctypes)]\n");
+    output.push_str("unsafe extern \"unadjusted\" {\n");
+
+    for (builtin_name, instr_name, return_type, param_types) in &decls {
+        let base_link = builtin_name.replace('_', ".");
+        // 128-byte mode uses .128B suffix, 64-byte mode doesn't
+        let link_name = if builtin_name.starts_with("V6_") && mode == VectorMode::V128 {
+            format!("llvm.hexagon.{}.128B", base_link)
+        } else {
+            format!("llvm.hexagon.{}", base_link)
+        };
+
+        let params_str = if param_types.is_empty() {
+            String::new()
+        } else {
+            param_types
+                .iter()
+                .map(|t| format!("_: {}", t.to_extern_str()))
+                .collect::<Vec<_>>()
+                .join(", ")
+        };
+
+        let return_str = if *return_type == RustType::Unit {
+            " -> ()".to_string()
+        } else {
+            format!(" -> {}", return_type.to_extern_str())
+        };
+
+        output.push_str(&format!(
+            "    #[link_name = \"{}\"]\n    fn {}({}){};\n",
+            link_name, instr_name, params_str, return_str
+        ));
+    }
+
+    output.push_str("}\n");
+    output
+}
+
+/// Generate Rust code for a compound expression
+/// `params` maps parameter names to their types in the function signature
+/// Get the type of an expression
+fn get_expr_type(
+    expr: &CompoundExpr,
+    params: &HashMap<String, RustType>,
+    helper_sigs: &HashMap<String, BuiltinSignature>,
+) -> Option<RustType> {
+    match expr {
+        CompoundExpr::BuiltinCall(name, _) => {
+            helper_sigs.get(name).map(|sig| sig.return_type.clone())
+        }
+        CompoundExpr::Param(name) => params.get(name).cloned(),
+        CompoundExpr::IntLiteral(_) => Some(RustType::I32),
+    }
+}
+
+fn generate_compound_expr_code(
+    expr: &CompoundExpr,
+    params: &HashMap<String, RustType>,
+    helper_sigs: &HashMap<String, BuiltinSignature>,
+) -> String {
+    match expr {
+        CompoundExpr::BuiltinCall(name, args) => {
+            // Get the expected parameter types for this builtin
+            let expected_types = helper_sigs
+                .get(name)
+                .map(|sig| sig.param_types.clone())
+                .unwrap_or_default();
+
+            let args_code: Vec<String> = args
+                .iter()
+                .enumerate()
+                .map(|(i, arg)| {
+                    let arg_code = generate_compound_expr_code(arg, params, helper_sigs);
+
+                    // Check if we need to transmute this argument
+                    let expected_type = expected_types.get(i);
+                    let actual_type = get_expr_type(arg, params, helper_sigs);
+
+                    // If the builtin expects HvxVector but the arg is HvxVectorPred, transmute
+                    if expected_type == Some(&RustType::HvxVector)
+                        && actual_type == Some(RustType::HvxVectorPred)
+                    {
+                        format!(
+                            "core::mem::transmute::<HvxVectorPred, HvxVector>({})",
+                            arg_code
+                        )
+                    } else {
+                        arg_code
+                    }
+                })
+                .collect();
+            format!("{}({})", name, args_code.join(", "))
+        }
+        CompoundExpr::Param(name) => name.clone(),
+        CompoundExpr::IntLiteral(n) => n.to_string(),
+    }
+}
+
+/// Get the primary instruction name from a compound expression (innermost significant op)
+fn get_compound_primary_instr(expr: &CompoundExpr) -> Option<String> {
+    match expr {
+        CompoundExpr::BuiltinCall(name, args) => {
+            // For vandqrt wrapper, look inside
+            if name == "vandqrt" && !args.is_empty() {
+                if let Some(inner) = get_compound_primary_instr(&args[0]) {
+                    return Some(inner);
+                }
+            }
+            // For store operations, use the store name
+            if name.starts_with("vS32b") {
+                return Some(name.clone());
+            }
+            // For conditional accumulation, use the add name
+            if name.starts_with("vadd") && (name.ends_with("q") || name.ends_with("nq")) {
+                return Some(name.clone());
+            }
+            // For predicate operations
+            if name.starts_with("pred_") {
+                return Some(name.clone());
+            }
+            // For comparison operations with accumulation
+            if (name.starts_with("veq") || name.starts_with("vgt"))
+                && (name.ends_with("_and") || name.ends_with("_or") || name.ends_with("_xor"))
+            {
+                return Some(name.clone());
+            }
+            Some(name.clone())
+        }
+        _ => None,
+    }
+}
+
+/// Get override implementations for specific compound intrinsics.
+/// Some C macros rely on implicit type conversions that don't work with
+/// our stricter Rust types, so we provide corrected implementations.
+fn get_compound_overrides() -> HashMap<&'static str, &'static str> {
+    let mut map = HashMap::new();
+
+    // Q6_V_vand_QR: takes pred, returns vec
+    // Use transmute to convert pred to vec for LLVM, call vandvrt
+    map.insert(
+        "Q6_V_vand_QR",
+        "vandvrt(core::mem::transmute::<HvxVectorPred, HvxVector>(qu), rt)",
+    );
+
+    // Q6_V_vandor_VQR: takes vec and pred, returns vec
+    map.insert(
+        "Q6_V_vandor_VQR",
+        "vandvrt_acc(vx, core::mem::transmute::<HvxVectorPred, HvxVector>(qu), rt)",
+    );
+
+    // Q6_Q_vand_VR: takes vec, returns pred
+    map.insert(
+        "Q6_Q_vand_VR",
+        "core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt(vu, rt))",
+    );
+
+    // Q6_Q_vandor_QVR: takes pred and vec, returns pred
+    map.insert(
+        "Q6_Q_vandor_QVR",
+        "core::mem::transmute::<HvxVector, HvxVectorPred>(vandqrt_acc(core::mem::transmute::<HvxVectorPred, HvxVector>(qx), vu, rt))",
+    );
+
+    map
+}
+
+/// Generate wrapper functions for all intrinsics
+fn generate_functions(intrinsics: &[IntrinsicInfo]) -> String {
+    let mut output = String::new();
+    let simd_mappings = get_simd_intrinsic_mappings();
+
+    // Generate simple intrinsics
+    for info in intrinsics.iter().filter(|i| !i.is_compound) {
+        let rust_name = &info.q6_name;
+
+        // Generate doc comment
+        output.push_str(&format!("/// `{}`\n", info.asm_syntax));
+        output.push_str("///\n");
+        output.push_str(&format!("/// Instruction Type: {}\n", info.instr_type));
+        output.push_str(&format!("/// Execution Slots: {}\n", info.exec_slots));
+
+        // Generate attributes
+        output.push_str("#[inline]\n");
+        output.push_str(&format!(
+            "#[cfg_attr(target_arch = \"hexagon\", target_feature(enable = \"hvxv{}\"))]\n",
+            info.min_arch
+        ));
+
+        // Check if we should use simd intrinsic instead
+        let use_simd = simd_mappings.get(info.instr_name.as_str());
+
+        // assert_instr uses the original instruction name
+        output.push_str(&format!(
+            "#[cfg_attr(test, assert_instr({}))]\n",
+            info.instr_name
+        ));
+
+        output.push_str(&format!(
+            "#[unstable(feature = \"stdarch_hexagon\", issue = \"{}\")]\n",
+            TRACKING_ISSUE
+        ));
+
+        // Generate function signature
+        let params_str = info
+            .params
+            .iter()
+            .map(|(name, ty)| format!("{}: {}", name, ty.to_rust_str()))
+            .collect::<Vec<_>>()
+            .join(", ");
+
+        let return_str = if info.return_type == RustType::Unit {
+            String::new()
+        } else {
+            format!(" -> {}", info.return_type.to_rust_str())
+        };
+
+        output.push_str(&format!(
+            "pub unsafe fn {}({}){} {{\n",
+            rust_name, params_str, return_str
+        ));
+
+        // Generate function body
+        let args_str = info
+            .params
+            .iter()
+            .map(|(name, _)| name.as_str())
+            .collect::<Vec<_>>()
+            .join(", ");
+
+        if let Some(simd_fn) = use_simd {
+            // Use architecture-independent simd intrinsic
+            output.push_str(&format!("    {}({})\n", simd_fn, args_str));
+        } else {
+            // Use the LLVM intrinsic
+            output.push_str(&format!("    {}({})\n", info.instr_name, args_str));
+        }
+
+        output.push_str("}\n\n");
+    }
+
+    // Generate compound intrinsics
+    let helper_sigs = get_compound_helper_signatures();
+    let overrides = get_compound_overrides();
+    for info in intrinsics.iter().filter(|i| i.is_compound) {
+        if let Some(ref compound_expr) = info.compound_expr {
+            let rust_name = &info.q6_name;
+
+            // Get the primary instruction for assert_instr
+            let _primary_instr = get_compound_primary_instr(compound_expr)
+                .unwrap_or_else(|| info.instr_name.clone());
+
+            // Generate doc comment
+            output.push_str(&format!("/// `{}`\n", info.asm_syntax));
+            output.push_str("///\n");
+            output.push_str(
+                "/// This is a compound operation composed of multiple HVX instructions.\n",
+            );
+            if !info.instr_type.is_empty() {
+                output.push_str(&format!("/// Instruction Type: {}\n", info.instr_type));
+            }
+            if !info.exec_slots.is_empty() {
+                output.push_str(&format!("/// Execution Slots: {}\n", info.exec_slots));
+            }
+
+            // Generate attributes
+            output.push_str("#[inline]\n");
+            output.push_str(&format!(
+                "#[cfg_attr(target_arch = \"hexagon\", target_feature(enable = \"hvxv{}\"))]\n",
+                info.min_arch
+            ));
+
+            // For compound ops, we skip assert_instr since they emit multiple instructions
+            // output.push_str(&format!(
+            //     "#[cfg_attr(test, assert_instr({}))]\n",
+            //     primary_instr
+            // ));
+
+            output.push_str(&format!(
+                "#[unstable(feature = \"stdarch_hexagon\", issue = \"{}\")]\n",
+                TRACKING_ISSUE
+            ));
+
+            // Generate function signature
+            let params_str = info
+                .params
+                .iter()
+                .map(|(name, ty)| format!("{}: {}", name, ty.to_rust_str()))
+                .collect::<Vec<_>>()
+                .join(", ");
+
+            let return_str = if info.return_type == RustType::Unit {
+                String::new()
+            } else {
+                format!(" -> {}", info.return_type.to_rust_str())
+            };
+
+            output.push_str(&format!(
+                "pub unsafe fn {}({}){} {{\n",
+                rust_name, params_str, return_str
+            ));
+
+            // Check if we have an override for this intrinsic
+            let body = if let Some(override_body) = overrides.get(info.q6_name.as_str()) {
+                override_body.to_string()
+            } else {
+                // Build param type map for expression code generation
+                let param_types: HashMap<String, RustType> = info.params.iter().cloned().collect();
+                // Generate function body from compound expression
+                let expr_body =
+                    generate_compound_expr_code(compound_expr, &param_types, &helper_sigs);
+
+                // Check if we need to transmute the result
+                let expr_return_type = get_expr_type(compound_expr, &param_types, &helper_sigs);
+                if info.return_type == RustType::HvxVectorPred
+                    && expr_return_type == Some(RustType::HvxVector)
+                {
+                    format!(
+                        "core::mem::transmute::<HvxVector, HvxVectorPred>({})",
+                        expr_body
+                    )
+                } else {
+                    expr_body
+                }
+            };
+            output.push_str(&format!("    {}\n", body));
+
+            output.push_str("}\n\n");
+        }
+    }
+
+    output
+}
+
+/// Generate a module file for a specific vector mode
+fn generate_module_file(
+    intrinsics: &[IntrinsicInfo],
+    output_path: &Path,
+    mode: VectorMode,
+) -> Result<(), String> {
+    let mut output =
+        File::create(output_path).map_err(|e| format!("Failed to create output: {}", e))?;
+
+    writeln!(output, "{}", generate_module_doc(mode)).map_err(|e| e.to_string())?;
+    writeln!(output, "{}", generate_types(mode)).map_err(|e| e.to_string())?;
+    writeln!(output, "{}", generate_extern_block(intrinsics, mode)).map_err(|e| e.to_string())?;
+    writeln!(output, "{}", generate_functions(intrinsics)).map_err(|e| e.to_string())?;
+
+    // Ensure file is flushed before running rustfmt
+    drop(output);
+
+    // Run rustfmt on the generated file
+    let status = std::process::Command::new("rustfmt")
+        .arg(output_path)
+        .status()
+        .map_err(|e| format!("Failed to run rustfmt: {}", e))?;
+
+    if !status.success() {
+        return Err("rustfmt failed".to_string());
+    }
+
+    Ok(())
+}
+
+fn main() -> Result<(), String> {
+    println!("=== Hexagon HVX Code Generator ===\n");
+
+    // Get the crate directory first (needed for both reading header and writing output)
+    let crate_dir = std::env::var("CARGO_MANIFEST_DIR")
+        .map(std::path::PathBuf::from)
+        .unwrap_or_else(|_| std::env::current_dir().unwrap());
+
+    // Read and parse the local LLVM header
+    println!("Step 1: Reading LLVM HVX header...");
+    let header_content = read_header(&crate_dir)?;
+    println!("  Read {} bytes", header_content.len());
+
+    println!("\nStep 2: Parsing intrinsic definitions...");
+    let all_intrinsics = parse_header(&header_content);
+    println!("  Found {} intrinsic definitions", all_intrinsics.len());
+
+    // Filter out intrinsics requiring architecture versions not yet supported by rustc
+    let intrinsics: Vec<_> = all_intrinsics
+        .into_iter()
+        .filter(|i| i.min_arch <= MAX_SUPPORTED_ARCH)
+        .collect();
+    let filtered_count = intrinsics.len();
+    println!(
+        "  Filtered to {} intrinsics (max supported: hvxv{})",
+        filtered_count, MAX_SUPPORTED_ARCH
+    );
+
+    // Count simple vs compound
+    let simple_count = intrinsics.iter().filter(|i| !i.is_compound).count();
+    let compound_count = intrinsics.iter().filter(|i| i.is_compound).count();
+    println!("  Simple intrinsics: {}", simple_count);
+    println!("  Compound intrinsics: {}", compound_count);
+
+    // Print some sample intrinsics for verification
+    println!("\n  Sample simple intrinsics:");
+    for info in intrinsics.iter().filter(|i| !i.is_compound).take(5) {
+        println!(
+            "    {} -> {} ({})",
+            info.q6_name, info.builtin_name, info.asm_syntax
+        );
+    }
+
+    println!("\n  Sample compound intrinsics:");
+    for info in intrinsics.iter().filter(|i| i.is_compound).take(5) {
+        println!("    {} ({})", info.q6_name, info.asm_syntax);
+    }
+
+    // Count architecture versions
+    let mut arch_counts: HashMap<u32, usize> = HashMap::new();
+    for info in &intrinsics {
+        *arch_counts.entry(info.min_arch).or_insert(0) += 1;
+    }
+    println!("\n  By architecture version:");
+    let mut archs: Vec<_> = arch_counts.iter().collect();
+    archs.sort_by_key(|(k, _)| *k);
+    for (arch, count) in archs {
+        println!("    HVX v{}: {} intrinsics", arch, count);
+    }
+
+    // Generate output files
+    let hexagon_dir = crate_dir.join("../core_arch/src/hexagon");
+
+    // Generate v64.rs (64-byte vector mode)
+    let v64_path = hexagon_dir.join("v64.rs");
+    println!("\nStep 3: Generating v64.rs (64-byte mode)...");
+    generate_module_file(&intrinsics, &v64_path, VectorMode::V64)?;
+    println!("  Output: {}", v64_path.display());
+
+    // Generate v128.rs (128-byte vector mode)
+    let v128_path = hexagon_dir.join("v128.rs");
+    println!("\nStep 4: Generating v128.rs (128-byte mode)...");
+    generate_module_file(&intrinsics, &v128_path, VectorMode::V128)?;
+    println!("  Output: {}", v128_path.display());
+
+    println!("\n=== Results ===");
+    println!(
+        "  Generated {} simple wrapper functions per module",
+        simple_count
+    );
+    println!(
+        "  Generated {} compound wrapper functions per module",
+        compound_count
+    );
+    println!(
+        "  Total: {} functions per module",
+        simple_count + compound_count
+    );
+    println!("  Output files: v64.rs, v128.rs");
+
+    Ok(())
+}
diff --git a/crates/stdarch-gen-loongarch/lasx.spec b/crates/stdarch-gen-loongarch/lasx.spec
index e3bdfcb5e9..ae69f19410 100644
--- a/crates/stdarch-gen-loongarch/lasx.spec
+++ b/crates/stdarch-gen-loongarch/lasx.spec
@@ -4,81 +4,97 @@
 // ```
 
 /// lasx_xvsll_b
+impl = portable
 name = lasx_xvsll_b
 asm-fmts = xd, xj, xk
 data-types = V32QI, V32QI, V32QI
 
 /// lasx_xvsll_h
+impl = portable
 name = lasx_xvsll_h
 asm-fmts = xd, xj, xk
 data-types = V16HI, V16HI, V16HI
 
 /// lasx_xvsll_w
+impl = portable
 name = lasx_xvsll_w
 asm-fmts = xd, xj, xk
 data-types = V8SI, V8SI, V8SI
 
 /// lasx_xvsll_d
+impl = portable
 name = lasx_xvsll_d
 asm-fmts = xd, xj, xk
 data-types = V4DI, V4DI, V4DI
 
 /// lasx_xvslli_b
+impl = portable
 name = lasx_xvslli_b
 asm-fmts = xd, xj, ui3
 data-types = V32QI, V32QI, UQI
 
 /// lasx_xvslli_h
+impl = portable
 name = lasx_xvslli_h
 asm-fmts = xd, xj, ui4
 data-types = V16HI, V16HI, UQI
 
 /// lasx_xvslli_w
+impl = portable
 name = lasx_xvslli_w
 asm-fmts = xd, xj, ui5
 data-types = V8SI, V8SI, UQI
 
 /// lasx_xvslli_d
+impl = portable
 name = lasx_xvslli_d
 asm-fmts = xd, xj, ui6
 data-types = V4DI, V4DI, UQI
 
 /// lasx_xvsra_b
+impl = portable
 name = lasx_xvsra_b
 asm-fmts = xd, xj, xk
 data-types = V32QI, V32QI, V32QI
 
 /// lasx_xvsra_h
+impl = portable
 name = lasx_xvsra_h
 asm-fmts = xd, xj, xk
 data-types = V16HI, V16HI, V16HI
 
 /// lasx_xvsra_w
+impl = portable
 name = lasx_xvsra_w
 asm-fmts = xd, xj, xk
 data-types = V8SI, V8SI, V8SI
 
 /// lasx_xvsra_d
+impl = portable
 name = lasx_xvsra_d
 asm-fmts = xd, xj, xk
 data-types = V4DI, V4DI, V4DI
 
 /// lasx_xvsrai_b
+impl = portable
 name = lasx_xvsrai_b
 asm-fmts = xd, xj, ui3
 data-types = V32QI, V32QI, UQI
 
 /// lasx_xvsrai_h
+impl = portable
 name = lasx_xvsrai_h
 asm-fmts = xd, xj, ui4
 data-types = V16HI, V16HI, UQI
 
 /// lasx_xvsrai_w
+impl = portable
 name = lasx_xvsrai_w
 asm-fmts = xd, xj, ui5
 data-types = V8SI, V8SI, UQI
 
 /// lasx_xvsrai_d
+impl = portable
 name = lasx_xvsrai_d
 asm-fmts = xd, xj, ui6
 data-types = V4DI, V4DI, UQI
@@ -124,41 +140,49 @@ asm-fmts = xd, xj, ui6
 data-types = V4DI, V4DI, UQI
 
 /// lasx_xvsrl_b
+impl = portable
 name = lasx_xvsrl_b
 asm-fmts = xd, xj, xk
 data-types = V32QI, V32QI, V32QI
 
 /// lasx_xvsrl_h
+impl = portable
 name = lasx_xvsrl_h
 asm-fmts = xd, xj, xk
 data-types = V16HI, V16HI, V16HI
 
 /// lasx_xvsrl_w
+impl = portable
 name = lasx_xvsrl_w
 asm-fmts = xd, xj, xk
 data-types = V8SI, V8SI, V8SI
 
 /// lasx_xvsrl_d
+impl = portable
 name = lasx_xvsrl_d
 asm-fmts = xd, xj, xk
 data-types = V4DI, V4DI, V4DI
 
 /// lasx_xvsrli_b
+impl = portable
 name = lasx_xvsrli_b
 asm-fmts = xd, xj, ui3
 data-types = V32QI, V32QI, UQI
 
 /// lasx_xvsrli_h
+impl = portable
 name = lasx_xvsrli_h
 asm-fmts = xd, xj, ui4
 data-types = V16HI, V16HI, UQI
 
 /// lasx_xvsrli_w
+impl = portable
 name = lasx_xvsrli_w
 asm-fmts = xd, xj, ui5
 data-types = V8SI, V8SI, UQI
 
 /// lasx_xvsrli_d
+impl = portable
 name = lasx_xvsrli_d
 asm-fmts = xd, xj, ui6
 data-types = V4DI, V4DI, UQI
@@ -204,21 +228,25 @@ asm-fmts = xd, xj, ui6
 data-types = V4DI, V4DI, UQI
 
 /// lasx_xvbitclr_b
+impl = portable
 name = lasx_xvbitclr_b
 asm-fmts = xd, xj, xk
 data-types = UV32QI, UV32QI, UV32QI
 
 /// lasx_xvbitclr_h
+impl = portable
 name = lasx_xvbitclr_h
 asm-fmts = xd, xj, xk
 data-types = UV16HI, UV16HI, UV16HI
 
 /// lasx_xvbitclr_w
+impl = portable
 name = lasx_xvbitclr_w
 asm-fmts = xd, xj, xk
 data-types = UV8SI, UV8SI, UV8SI
 
 /// lasx_xvbitclr_d
+impl = portable
 name = lasx_xvbitclr_d
 asm-fmts = xd, xj, xk
 data-types = UV4DI, UV4DI, UV4DI
@@ -244,21 +272,25 @@ asm-fmts = xd, xj, ui6
 data-types = UV4DI, UV4DI, UQI
 
 /// lasx_xvbitset_b
+impl = portable
 name = lasx_xvbitset_b
 asm-fmts = xd, xj, xk
 data-types = UV32QI, UV32QI, UV32QI
 
 /// lasx_xvbitset_h
+impl = portable
 name = lasx_xvbitset_h
 asm-fmts = xd, xj, xk
 data-types = UV16HI, UV16HI, UV16HI
 
 /// lasx_xvbitset_w
+impl = portable
 name = lasx_xvbitset_w
 asm-fmts = xd, xj, xk
 data-types = UV8SI, UV8SI, UV8SI
 
 /// lasx_xvbitset_d
+impl = portable
 name = lasx_xvbitset_d
 asm-fmts = xd, xj, xk
 data-types = UV4DI, UV4DI, UV4DI
@@ -284,21 +316,25 @@ asm-fmts = xd, xj, ui6
 data-types = UV4DI, UV4DI, UQI
 
 /// lasx_xvbitrev_b
+impl = portable
 name = lasx_xvbitrev_b
 asm-fmts = xd, xj, xk
 data-types = UV32QI, UV32QI, UV32QI
 
 /// lasx_xvbitrev_h
+impl = portable
 name = lasx_xvbitrev_h
 asm-fmts = xd, xj, xk
 data-types = UV16HI, UV16HI, UV16HI
 
 /// lasx_xvbitrev_w
+impl = portable
 name = lasx_xvbitrev_w
 asm-fmts = xd, xj, xk
 data-types = UV8SI, UV8SI, UV8SI
 
 /// lasx_xvbitrev_d
+impl = portable
 name = lasx_xvbitrev_d
 asm-fmts = xd, xj, xk
 data-types = UV4DI, UV4DI, UV4DI
@@ -324,61 +360,73 @@ asm-fmts = xd, xj, ui6
 data-types = UV4DI, UV4DI, UQI
 
 /// lasx_xvadd_b
+impl = portable
 name = lasx_xvadd_b
 asm-fmts = xd, xj, xk
 data-types = V32QI, V32QI, V32QI
 
 /// lasx_xvadd_h
+impl = portable
 name = lasx_xvadd_h
 asm-fmts = xd, xj, xk
 data-types = V16HI, V16HI, V16HI
 
 /// lasx_xvadd_w
+impl = portable
 name = lasx_xvadd_w
 asm-fmts = xd, xj, xk
 data-types = V8SI, V8SI, V8SI
 
 /// lasx_xvadd_d
+impl = portable
 name = lasx_xvadd_d
 asm-fmts = xd, xj, xk
 data-types = V4DI, V4DI, V4DI
 
 /// lasx_xvaddi_bu
+impl = portable
 name = lasx_xvaddi_bu
 asm-fmts = xd, xj, ui5
 data-types = V32QI, V32QI, UQI
 
 /// lasx_xvaddi_hu
+impl = portable
 name = lasx_xvaddi_hu
 asm-fmts = xd, xj, ui5
 data-types = V16HI, V16HI, UQI
 
 /// lasx_xvaddi_wu
+impl = portable
 name = lasx_xvaddi_wu
 asm-fmts = xd, xj, ui5
 data-types = V8SI, V8SI, UQI
 
 /// lasx_xvaddi_du
+impl = portable
 name = lasx_xvaddi_du
 asm-fmts = xd, xj, ui5
 data-types = V4DI, V4DI, UQI
 
 /// lasx_xvsub_b
+impl = portable
 name = lasx_xvsub_b
 asm-fmts = xd, xj, xk
 data-types = V32QI, V32QI, V32QI
 
 /// lasx_xvsub_h
+impl = portable
 name = lasx_xvsub_h
 asm-fmts = xd, xj, xk
 data-types = V16HI, V16HI, V16HI
 
 /// lasx_xvsub_w
+impl = portable
 name = lasx_xvsub_w
 asm-fmts = xd, xj, xk
 data-types = V8SI, V8SI, V8SI
 
 /// lasx_xvsub_d
+impl = portable
 name = lasx_xvsub_d
 asm-fmts = xd, xj, xk
 data-types = V4DI, V4DI, V4DI
@@ -404,361 +452,433 @@ asm-fmts = xd, xj, ui5
 data-types = V4DI, V4DI, UQI
 
 /// lasx_xvmax_b
+impl = portable
 name = lasx_xvmax_b
 asm-fmts = xd, xj, xk
 data-types = V32QI, V32QI, V32QI
 
 /// lasx_xvmax_h
+impl = portable
 name = lasx_xvmax_h
 asm-fmts = xd, xj, xk
 data-types = V16HI, V16HI, V16HI
 
 /// lasx_xvmax_w
+impl = portable
 name = lasx_xvmax_w
 asm-fmts = xd, xj, xk
 data-types = V8SI, V8SI, V8SI
 
 /// lasx_xvmax_d
+impl = portable
 name = lasx_xvmax_d
 asm-fmts = xd, xj, xk
 data-types = V4DI, V4DI, V4DI
 
 /// lasx_xvmaxi_b
+impl = portable
 name = lasx_xvmaxi_b
 asm-fmts = xd, xj, si5
 data-types = V32QI, V32QI, QI
 
 /// lasx_xvmaxi_h
+impl = portable
 name = lasx_xvmaxi_h
 asm-fmts = xd, xj, si5
 data-types = V16HI, V16HI, QI
 
 /// lasx_xvmaxi_w
+impl = portable
 name = lasx_xvmaxi_w
 asm-fmts = xd, xj, si5
 data-types = V8SI, V8SI, QI
 
 /// lasx_xvmaxi_d
+impl = portable
 name = lasx_xvmaxi_d
 asm-fmts = xd, xj, si5
 data-types = V4DI, V4DI, QI
 
 /// lasx_xvmax_bu
+impl = portable
 name = lasx_xvmax_bu
 asm-fmts = xd, xj, xk
 data-types = UV32QI, UV32QI, UV32QI
 
 /// lasx_xvmax_hu
+impl = portable
 name = lasx_xvmax_hu
 asm-fmts = xd, xj, xk
 data-types = UV16HI, UV16HI, UV16HI
 
 /// lasx_xvmax_wu
+impl = portable
 name = lasx_xvmax_wu
 asm-fmts = xd, xj, xk
 data-types = UV8SI, UV8SI, UV8SI
 
 /// lasx_xvmax_du
+impl = portable
 name = lasx_xvmax_du
 asm-fmts = xd, xj, xk
 data-types = UV4DI, UV4DI, UV4DI
 
 /// lasx_xvmaxi_bu
+impl = portable
 name = lasx_xvmaxi_bu
 asm-fmts = xd, xj, ui5
 data-types = UV32QI, UV32QI, UQI
 
 /// lasx_xvmaxi_hu
+impl = portable
 name = lasx_xvmaxi_hu
 asm-fmts = xd, xj, ui5
 data-types = UV16HI, UV16HI, UQI
 
 /// lasx_xvmaxi_wu
+impl = portable
 name = lasx_xvmaxi_wu
 asm-fmts = xd, xj, ui5
 data-types = UV8SI, UV8SI, UQI
 
 /// lasx_xvmaxi_du
+impl = portable
 name = lasx_xvmaxi_du
 asm-fmts = xd, xj, ui5
 data-types = UV4DI, UV4DI, UQI
 
 /// lasx_xvmin_b
+impl = portable
 name = lasx_xvmin_b
 asm-fmts = xd, xj, xk
 data-types = V32QI, V32QI, V32QI
 
 /// lasx_xvmin_h
+impl = portable
 name = lasx_xvmin_h
 asm-fmts = xd, xj, xk
 data-types = V16HI, V16HI, V16HI
 
 /// lasx_xvmin_w
+impl = portable
 name = lasx_xvmin_w
 asm-fmts = xd, xj, xk
 data-types = V8SI, V8SI, V8SI
 
 /// lasx_xvmin_d
+impl = portable
 name = lasx_xvmin_d
 asm-fmts = xd, xj, xk
 data-types = V4DI, V4DI, V4DI
 
 /// lasx_xvmini_b
+impl = portable
 name = lasx_xvmini_b
 asm-fmts = xd, xj, si5
 data-types = V32QI, V32QI, QI
 
 /// lasx_xvmini_h
+impl = portable
 name = lasx_xvmini_h
 asm-fmts = xd, xj, si5
 data-types = V16HI, V16HI, QI
 
 /// lasx_xvmini_w
+impl = portable
 name = lasx_xvmini_w
 asm-fmts = xd, xj, si5
 data-types = V8SI, V8SI, QI
 
 /// lasx_xvmini_d
+impl = portable
 name = lasx_xvmini_d
 asm-fmts = xd, xj, si5
 data-types = V4DI, V4DI, QI
 
 /// lasx_xvmin_bu
+impl = portable
 name = lasx_xvmin_bu
 asm-fmts = xd, xj, xk
 data-types = UV32QI, UV32QI, UV32QI
 
 /// lasx_xvmin_hu
+impl = portable
 name = lasx_xvmin_hu
 asm-fmts = xd, xj, xk
 data-types = UV16HI, UV16HI, UV16HI
 
 /// lasx_xvmin_wu
+impl = portable
 name = lasx_xvmin_wu
 asm-fmts = xd, xj, xk
 data-types = UV8SI, UV8SI, UV8SI
 
 /// lasx_xvmin_du
+impl = portable
 name = lasx_xvmin_du
 asm-fmts = xd, xj, xk
 data-types = UV4DI, UV4DI, UV4DI
 
 /// lasx_xvmini_bu
+impl = portable
 name = lasx_xvmini_bu
 asm-fmts = xd, xj, ui5
 data-types = UV32QI, UV32QI, UQI
 
 /// lasx_xvmini_hu
+impl = portable
 name = lasx_xvmini_hu
 asm-fmts = xd, xj, ui5
 data-types = UV16HI, UV16HI, UQI
 
 /// lasx_xvmini_wu
+impl = portable
 name = lasx_xvmini_wu
 asm-fmts = xd, xj, ui5
 data-types = UV8SI, UV8SI, UQI
 
 /// lasx_xvmini_du
+impl = portable
 name = lasx_xvmini_du
 asm-fmts = xd, xj, ui5
 data-types = UV4DI, UV4DI, UQI
 
 /// lasx_xvseq_b
+impl = portable
 name = lasx_xvseq_b
 asm-fmts = xd, xj, xk
 data-types = V32QI, V32QI, V32QI
 
 /// lasx_xvseq_h
+impl = portable
 name = lasx_xvseq_h
 asm-fmts = xd, xj, xk
 data-types = V16HI, V16HI, V16HI
 
 /// lasx_xvseq_w
+impl = portable
 name = lasx_xvseq_w
 asm-fmts = xd, xj, xk
 data-types = V8SI, V8SI, V8SI
 
 /// lasx_xvseq_d
+impl = portable
 name = lasx_xvseq_d
 asm-fmts = xd, xj, xk
 data-types = V4DI, V4DI, V4DI
 
 /// lasx_xvseqi_b
+impl = portable
 name = lasx_xvseqi_b
 asm-fmts = xd, xj, si5
 data-types = V32QI, V32QI, QI
 
 /// lasx_xvseqi_h
+impl = portable
 name = lasx_xvseqi_h
 asm-fmts = xd, xj, si5
 data-types = V16HI, V16HI, QI
 
 /// lasx_xvseqi_w
+impl = portable
 name = lasx_xvseqi_w
 asm-fmts = xd, xj, si5
 data-types = V8SI, V8SI, QI
 
 /// lasx_xvseqi_d
+impl = portable
 name = lasx_xvseqi_d
 asm-fmts = xd, xj, si5
 data-types = V4DI, V4DI, QI
 
 /// lasx_xvslt_b
+impl = portable
 name = lasx_xvslt_b
 asm-fmts = xd, xj, xk
 data-types = V32QI, V32QI, V32QI
 
 /// lasx_xvslt_h
+impl = portable
 name = lasx_xvslt_h
 asm-fmts = xd, xj, xk
 data-types = V16HI, V16HI, V16HI
 
 /// lasx_xvslt_w
+impl = portable
 name = lasx_xvslt_w
 asm-fmts = xd, xj, xk
 data-types = V8SI, V8SI, V8SI
 
 /// lasx_xvslt_d
+impl = portable
 name = lasx_xvslt_d
 asm-fmts = xd, xj, xk
 data-types = V4DI, V4DI, V4DI
 
 /// lasx_xvslti_b
+impl = portable
 name = lasx_xvslti_b
 asm-fmts = xd, xj, si5
 data-types = V32QI, V32QI, QI
 
 /// lasx_xvslti_h
+impl = portable
 name = lasx_xvslti_h
 asm-fmts = xd, xj, si5
 data-types = V16HI, V16HI, QI
 
 /// lasx_xvslti_w
+impl = portable
 name = lasx_xvslti_w
 asm-fmts = xd, xj, si5
 data-types = V8SI, V8SI, QI
 
 /// lasx_xvslti_d
+impl = portable
 name = lasx_xvslti_d
 asm-fmts = xd, xj, si5
 data-types = V4DI, V4DI, QI
 
 /// lasx_xvslt_bu
+impl = portable
 name = lasx_xvslt_bu
 asm-fmts = xd, xj, xk
 data-types = V32QI, UV32QI, UV32QI
 
 /// lasx_xvslt_hu
+impl = portable
 name = lasx_xvslt_hu
 asm-fmts = xd, xj, xk
 data-types = V16HI, UV16HI, UV16HI
 
 /// lasx_xvslt_wu
+impl = portable
 name = lasx_xvslt_wu
 asm-fmts = xd, xj, xk
 data-types = V8SI, UV8SI, UV8SI
 
 /// lasx_xvslt_du
+impl = portable
 name = lasx_xvslt_du
 asm-fmts = xd, xj, xk
 data-types = V4DI, UV4DI, UV4DI
 
 /// lasx_xvslti_bu
+impl = portable
 name = lasx_xvslti_bu
 asm-fmts = xd, xj, ui5
 data-types = V32QI, UV32QI, UQI
 
 /// lasx_xvslti_hu
+impl = portable
 name = lasx_xvslti_hu
 asm-fmts = xd, xj, ui5
 data-types = V16HI, UV16HI, UQI
 
 /// lasx_xvslti_wu
+impl = portable
 name = lasx_xvslti_wu
 asm-fmts = xd, xj, ui5
 data-types = V8SI, UV8SI, UQI
 
 /// lasx_xvslti_du
+impl = portable
 name = lasx_xvslti_du
 asm-fmts = xd, xj, ui5
 data-types = V4DI, UV4DI, UQI
 
 /// lasx_xvsle_b
+impl = portable
 name = lasx_xvsle_b
 asm-fmts = xd, xj, xk
 data-types = V32QI, V32QI, V32QI
 
 /// lasx_xvsle_h
+impl = portable
 name = lasx_xvsle_h
 asm-fmts = xd, xj, xk
 data-types = V16HI, V16HI, V16HI
 
 /// lasx_xvsle_w
+impl = portable
 name = lasx_xvsle_w
 asm-fmts = xd, xj, xk
 data-types = V8SI, V8SI, V8SI
 
 /// lasx_xvsle_d
+impl = portable
 name = lasx_xvsle_d
 asm-fmts = xd, xj, xk
 data-types = V4DI, V4DI, V4DI
 
 /// lasx_xvslei_b
+impl = portable
 name = lasx_xvslei_b
 asm-fmts = xd, xj, si5
 data-types = V32QI, V32QI, QI
 
 /// lasx_xvslei_h
+impl = portable
 name = lasx_xvslei_h
 asm-fmts = xd, xj, si5
 data-types = V16HI, V16HI, QI
 
 /// lasx_xvslei_w
+impl = portable
 name = lasx_xvslei_w
 asm-fmts = xd, xj, si5
 data-types = V8SI, V8SI, QI
 
 /// lasx_xvslei_d
+impl = portable
 name = lasx_xvslei_d
 asm-fmts = xd, xj, si5
 data-types = V4DI, V4DI, QI
 
 /// lasx_xvsle_bu
+impl = portable
 name = lasx_xvsle_bu
 asm-fmts = xd, xj, xk
 data-types = V32QI, UV32QI, UV32QI
 
 /// lasx_xvsle_hu
+impl = portable
 name = lasx_xvsle_hu
 asm-fmts = xd, xj, xk
 data-types = V16HI, UV16HI, UV16HI
 
 /// lasx_xvsle_wu
+impl = portable
 name = lasx_xvsle_wu
 asm-fmts = xd, xj, xk
 data-types = V8SI, UV8SI, UV8SI
 
 /// lasx_xvsle_du
+impl = portable
 name = lasx_xvsle_du
 asm-fmts = xd, xj, xk
 data-types = V4DI, UV4DI, UV4DI
 
 /// lasx_xvslei_bu
+impl = portable
 name = lasx_xvslei_bu
 asm-fmts = xd, xj, ui5
 data-types = V32QI, UV32QI, UQI
 
 /// lasx_xvslei_hu
+impl = portable
 name = lasx_xvslei_hu
 asm-fmts = xd, xj, ui5
 data-types = V16HI, UV16HI, UQI
 
 /// lasx_xvslei_wu
+impl = portable
 name = lasx_xvslei_wu
 asm-fmts = xd, xj, ui5
 data-types = V8SI, UV8SI, UQI
 
 /// lasx_xvslei_du
+impl = portable
 name = lasx_xvslei_du
 asm-fmts = xd, xj, ui5
 data-types = V4DI, UV4DI, UQI
@@ -804,61 +924,73 @@ asm-fmts = xd, xj, ui6
 data-types = UV4DI, UV4DI, UQI
 
 /// lasx_xvadda_b
+impl = portable
 name = lasx_xvadda_b
 asm-fmts = xd, xj, xk
 data-types = V32QI, V32QI, V32QI
 
 /// lasx_xvadda_h
+impl = portable
 name = lasx_xvadda_h
 asm-fmts = xd, xj, xk
 data-types = V16HI, V16HI, V16HI
 
 /// lasx_xvadda_w
+impl = portable
 name = lasx_xvadda_w
 asm-fmts = xd, xj, xk
 data-types = V8SI, V8SI, V8SI
 
 /// lasx_xvadda_d
+impl = portable
 name = lasx_xvadda_d
 asm-fmts = xd, xj, xk
 data-types = V4DI, V4DI, V4DI
 
 /// lasx_xvsadd_b
+impl = portable
 name = lasx_xvsadd_b
 asm-fmts = xd, xj, xk
 data-types = V32QI, V32QI, V32QI
 
 /// lasx_xvsadd_h
+impl = portable
 name = lasx_xvsadd_h
 asm-fmts = xd, xj, xk
 data-types = V16HI, V16HI, V16HI
 
 /// lasx_xvsadd_w
+impl = portable
 name = lasx_xvsadd_w
 asm-fmts = xd, xj, xk
 data-types = V8SI, V8SI, V8SI
 
 /// lasx_xvsadd_d
+impl = portable
 name = lasx_xvsadd_d
 asm-fmts = xd, xj, xk
 data-types = V4DI, V4DI, V4DI
 
 /// lasx_xvsadd_bu
+impl = portable
 name = lasx_xvsadd_bu
 asm-fmts = xd, xj, xk
 data-types = UV32QI, UV32QI, UV32QI
 
 /// lasx_xvsadd_hu
+impl = portable
 name = lasx_xvsadd_hu
 asm-fmts = xd, xj, xk
 data-types = UV16HI, UV16HI, UV16HI
 
 /// lasx_xvsadd_wu
+impl = portable
 name = lasx_xvsadd_wu
 asm-fmts = xd, xj, xk
 data-types = UV8SI, UV8SI, UV8SI
 
 /// lasx_xvsadd_du
+impl = portable
 name = lasx_xvsadd_du
 asm-fmts = xd, xj, xk
 data-types = UV4DI, UV4DI, UV4DI
@@ -944,181 +1076,217 @@ asm-fmts = xd, xj, xk
 data-types = UV4DI, UV4DI, UV4DI
 
 /// lasx_xvssub_b
+impl = portable
 name = lasx_xvssub_b
 asm-fmts = xd, xj, xk
 data-types = V32QI, V32QI, V32QI
 
 /// lasx_xvssub_h
+impl = portable
 name = lasx_xvssub_h
 asm-fmts = xd, xj, xk
 data-types = V16HI, V16HI, V16HI
 
 /// lasx_xvssub_w
+impl = portable
 name = lasx_xvssub_w
 asm-fmts = xd, xj, xk
 data-types = V8SI, V8SI, V8SI
 
 /// lasx_xvssub_d
+impl = portable
 name = lasx_xvssub_d
 asm-fmts = xd, xj, xk
 data-types = V4DI, V4DI, V4DI
 
 /// lasx_xvssub_bu
+impl = portable
 name = lasx_xvssub_bu
 asm-fmts = xd, xj, xk
 data-types = UV32QI, UV32QI, UV32QI
 
 /// lasx_xvssub_hu
+impl = portable
 name = lasx_xvssub_hu
 asm-fmts = xd, xj, xk
 data-types = UV16HI, UV16HI, UV16HI
 
 /// lasx_xvssub_wu
+impl = portable
 name = lasx_xvssub_wu
 asm-fmts = xd, xj, xk
 data-types = UV8SI, UV8SI, UV8SI
 
 /// lasx_xvssub_du
+impl = portable
 name = lasx_xvssub_du
 asm-fmts = xd, xj, xk
 data-types = UV4DI, UV4DI, UV4DI
 
 /// lasx_xvabsd_b
+impl = portable
 name = lasx_xvabsd_b
 asm-fmts = xd, xj, xk
 data-types = V32QI, V32QI, V32QI
 
 /// lasx_xvabsd_h
+impl = portable
 name = lasx_xvabsd_h
 asm-fmts = xd, xj, xk
 data-types = V16HI, V16HI, V16HI
 
 /// lasx_xvabsd_w
+impl = portable
 name = lasx_xvabsd_w
 asm-fmts = xd, xj, xk
 data-types = V8SI, V8SI, V8SI
 
 /// lasx_xvabsd_d
+impl = portable
 name = lasx_xvabsd_d
 asm-fmts = xd, xj, xk
 data-types = V4DI, V4DI, V4DI
 
 /// lasx_xvabsd_bu
+impl = portable
 name = lasx_xvabsd_bu
 asm-fmts = xd, xj, xk
 data-types = UV32QI, UV32QI, UV32QI
 
 /// lasx_xvabsd_hu
+impl = portable
 name = lasx_xvabsd_hu
 asm-fmts = xd, xj, xk
 data-types = UV16HI, UV16HI, UV16HI
 
 /// lasx_xvabsd_wu
+impl = portable
 name = lasx_xvabsd_wu
 asm-fmts = xd, xj, xk
 data-types = UV8SI, UV8SI, UV8SI
 
 /// lasx_xvabsd_du
+impl = portable
 name = lasx_xvabsd_du
 asm-fmts = xd, xj, xk
 data-types = UV4DI, UV4DI, UV4DI
 
 /// lasx_xvmul_b
+impl = portable
 name = lasx_xvmul_b
 asm-fmts = xd, xj, xk
 data-types = V32QI, V32QI, V32QI
 
 /// lasx_xvmul_h
+impl = portable
 name = lasx_xvmul_h
 asm-fmts = xd, xj, xk
 data-types = V16HI, V16HI, V16HI
 
 /// lasx_xvmul_w
+impl = portable
 name = lasx_xvmul_w
 asm-fmts = xd, xj, xk
 data-types = V8SI, V8SI, V8SI
 
 /// lasx_xvmul_d
+impl = portable
 name = lasx_xvmul_d
 asm-fmts = xd, xj, xk
 data-types = V4DI, V4DI, V4DI
 
 /// lasx_xvmadd_b
+impl = portable
 name = lasx_xvmadd_b
 asm-fmts = xd, xj, xk
 data-types = V32QI, V32QI, V32QI, V32QI
 
 /// lasx_xvmadd_h
+impl = portable
 name = lasx_xvmadd_h
 asm-fmts = xd, xj, xk
 data-types = V16HI, V16HI, V16HI, V16HI
 
 /// lasx_xvmadd_w
+impl = portable
 name = lasx_xvmadd_w
 asm-fmts = xd, xj, xk
 data-types = V8SI, V8SI, V8SI, V8SI
 
 /// lasx_xvmadd_d
+impl = portable
 name = lasx_xvmadd_d
 asm-fmts = xd, xj, xk
 data-types = V4DI, V4DI, V4DI, V4DI
 
 /// lasx_xvmsub_b
+impl = portable
 name = lasx_xvmsub_b
 asm-fmts = xd, xj, xk
 data-types = V32QI, V32QI, V32QI, V32QI
 
 /// lasx_xvmsub_h
+impl = portable
 name = lasx_xvmsub_h
 asm-fmts = xd, xj, xk
 data-types = V16HI, V16HI, V16HI, V16HI
 
 /// lasx_xvmsub_w
+impl = portable
 name = lasx_xvmsub_w
 asm-fmts = xd, xj, xk
 data-types = V8SI, V8SI, V8SI, V8SI
 
 /// lasx_xvmsub_d
+impl = portable
 name = lasx_xvmsub_d
 asm-fmts = xd, xj, xk
 data-types = V4DI, V4DI, V4DI, V4DI
 
 /// lasx_xvdiv_b
+impl = portable
 name = lasx_xvdiv_b
 asm-fmts = xd, xj, xk
 data-types = V32QI, V32QI, V32QI
 
 /// lasx_xvdiv_h
+impl = portable
 name = lasx_xvdiv_h
 asm-fmts = xd, xj, xk
 data-types = V16HI, V16HI, V16HI
 
 /// lasx_xvdiv_w
+impl = portable
 name = lasx_xvdiv_w
 asm-fmts = xd, xj, xk
 data-types = V8SI, V8SI, V8SI
 
 /// lasx_xvdiv_d
+impl = portable
 name = lasx_xvdiv_d
 asm-fmts = xd, xj, xk
 data-types = V4DI, V4DI, V4DI
 
 /// lasx_xvdiv_bu
+impl = portable
 name = lasx_xvdiv_bu
 asm-fmts = xd, xj, xk
 data-types = UV32QI, UV32QI, UV32QI
 
 /// lasx_xvdiv_hu
+impl = portable
 name = lasx_xvdiv_hu
 asm-fmts = xd, xj, xk
 data-types = UV16HI, UV16HI, UV16HI
 
 /// lasx_xvdiv_wu
+impl = portable
 name = lasx_xvdiv_wu
 asm-fmts = xd, xj, xk
 data-types = UV8SI, UV8SI, UV8SI
 
 /// lasx_xvdiv_du
+impl = portable
 name = lasx_xvdiv_du
 asm-fmts = xd, xj, xk
 data-types = UV4DI, UV4DI, UV4DI
@@ -1184,41 +1352,49 @@ asm-fmts = xd, xj, xk
 data-types = V4DI, UV8SI, UV8SI
 
 /// lasx_xvmod_b
+impl = portable
 name = lasx_xvmod_b
 asm-fmts = xd, xj, xk
 data-types = V32QI, V32QI, V32QI
 
 /// lasx_xvmod_h
+impl = portable
 name = lasx_xvmod_h
 asm-fmts = xd, xj, xk
 data-types = V16HI, V16HI, V16HI
 
 /// lasx_xvmod_w
+impl = portable
 name = lasx_xvmod_w
 asm-fmts = xd, xj, xk
 data-types = V8SI, V8SI, V8SI
 
 /// lasx_xvmod_d
+impl = portable
 name = lasx_xvmod_d
 asm-fmts = xd, xj, xk
 data-types = V4DI, V4DI, V4DI
 
 /// lasx_xvmod_bu
+impl = portable
 name = lasx_xvmod_bu
 asm-fmts = xd, xj, xk
 data-types = UV32QI, UV32QI, UV32QI
 
 /// lasx_xvmod_hu
+impl = portable
 name = lasx_xvmod_hu
 asm-fmts = xd, xj, xk
 data-types = UV16HI, UV16HI, UV16HI
 
 /// lasx_xvmod_wu
+impl = portable
 name = lasx_xvmod_wu
 asm-fmts = xd, xj, xk
 data-types = UV8SI, UV8SI, UV8SI
 
 /// lasx_xvmod_du
+impl = portable
 name = lasx_xvmod_du
 asm-fmts = xd, xj, xk
 data-types = UV4DI, UV4DI, UV4DI
@@ -1384,6 +1560,7 @@ asm-fmts = xd, xj, xk
 data-types = V4DI, V4DI, V4DI, V4DI
 
 /// lasx_xvand_v
+impl = portable
 name = lasx_xvand_v
 asm-fmts = xd, xj, xk
 data-types = UV32QI, UV32QI, UV32QI
@@ -1394,6 +1571,7 @@ asm-fmts = xd, xj, ui8
 data-types = UV32QI, UV32QI, UQI
 
 /// lasx_xvor_v
+impl = portable
 name = lasx_xvor_v
 asm-fmts = xd, xj, xk
 data-types = UV32QI, UV32QI, UV32QI
@@ -1404,6 +1582,7 @@ asm-fmts = xd, xj, ui8
 data-types = UV32QI, UV32QI, UQI
 
 /// lasx_xvnor_v
+impl = portable
 name = lasx_xvnor_v
 asm-fmts = xd, xj, xk
 data-types = UV32QI, UV32QI, UV32QI
@@ -1414,6 +1593,7 @@ asm-fmts = xd, xj, ui8
 data-types = UV32QI, UV32QI, UQI
 
 /// lasx_xvxor_v
+impl = portable
 name = lasx_xvxor_v
 asm-fmts = xd, xj, xk
 data-types = UV32QI, UV32QI, UV32QI
@@ -1449,41 +1629,49 @@ asm-fmts = xd, xj, ui8
 data-types = V8SI, V8SI, USI
 
 /// lasx_xvreplgr2vr_b
+impl = portable
 name = lasx_xvreplgr2vr_b
 asm-fmts = xd, rj
 data-types = V32QI, SI
 
 /// lasx_xvreplgr2vr_h
+impl = portable
 name = lasx_xvreplgr2vr_h
 asm-fmts = xd, rj
 data-types = V16HI, SI
 
 /// lasx_xvreplgr2vr_w
+impl = portable
 name = lasx_xvreplgr2vr_w
 asm-fmts = xd, rj
 data-types = V8SI, SI
 
 /// lasx_xvreplgr2vr_d
+impl = portable
 name = lasx_xvreplgr2vr_d
 asm-fmts = xd, rj
 data-types = V4DI, DI
 
 /// lasx_xvpcnt_b
+impl = portable
 name = lasx_xvpcnt_b
 asm-fmts = xd, xj
 data-types = V32QI, V32QI
 
 /// lasx_xvpcnt_h
+impl = portable
 name = lasx_xvpcnt_h
 asm-fmts = xd, xj
 data-types = V16HI, V16HI
 
 /// lasx_xvpcnt_w
+impl = portable
 name = lasx_xvpcnt_w
 asm-fmts = xd, xj
 data-types = V8SI, V8SI
 
 /// lasx_xvpcnt_d
+impl = portable
 name = lasx_xvpcnt_d
 asm-fmts = xd, xj
 data-types = V4DI, V4DI
@@ -1509,61 +1697,73 @@ asm-fmts = xd, xj
 data-types = V4DI, V4DI
 
 /// lasx_xvclz_b
+impl = portable
 name = lasx_xvclz_b
 asm-fmts = xd, xj
 data-types = V32QI, V32QI
 
 /// lasx_xvclz_h
+impl = portable
 name = lasx_xvclz_h
 asm-fmts = xd, xj
 data-types = V16HI, V16HI
 
 /// lasx_xvclz_w
+impl = portable
 name = lasx_xvclz_w
 asm-fmts = xd, xj
 data-types = V8SI, V8SI
 
 /// lasx_xvclz_d
+impl = portable
 name = lasx_xvclz_d
 asm-fmts = xd, xj
 data-types = V4DI, V4DI
 
 /// lasx_xvfadd_s
+impl = portable
 name = lasx_xvfadd_s
 asm-fmts = xd, xj, xk
 data-types = V8SF, V8SF, V8SF
 
 /// lasx_xvfadd_d
+impl = portable
 name = lasx_xvfadd_d
 asm-fmts = xd, xj, xk
 data-types = V4DF, V4DF, V4DF
 
 /// lasx_xvfsub_s
+impl = portable
 name = lasx_xvfsub_s
 asm-fmts = xd, xj, xk
 data-types = V8SF, V8SF, V8SF
 
 /// lasx_xvfsub_d
+impl = portable
 name = lasx_xvfsub_d
 asm-fmts = xd, xj, xk
 data-types = V4DF, V4DF, V4DF
 
 /// lasx_xvfmul_s
+impl = portable
 name = lasx_xvfmul_s
 asm-fmts = xd, xj, xk
 data-types = V8SF, V8SF, V8SF
 
 /// lasx_xvfmul_d
+impl = portable
 name = lasx_xvfmul_d
 asm-fmts = xd, xj, xk
 data-types = V4DF, V4DF, V4DF
 
 /// lasx_xvfdiv_s
+impl = portable
 name = lasx_xvfdiv_s
 asm-fmts = xd, xj, xk
 data-types = V8SF, V8SF, V8SF
 
 /// lasx_xvfdiv_d
+impl = portable
 name = lasx_xvfdiv_d
 asm-fmts = xd, xj, xk
 data-types = V4DF, V4DF, V4DF
@@ -1629,11 +1829,13 @@ asm-fmts = xd, xj
 data-types = V4DI, V4DF
 
 /// lasx_xvfsqrt_s
+impl = portable
 name = lasx_xvfsqrt_s
 asm-fmts = xd, xj
 data-types = V8SF, V8SF
 
 /// lasx_xvfsqrt_d
+impl = portable
 name = lasx_xvfsqrt_d
 asm-fmts = xd, xj
 data-types = V4DF, V4DF
@@ -1804,26 +2006,31 @@ asm-fmts = xd, xj, ui8
 data-types = V8SI, V8SI, V8SI, USI
 
 /// lasx_xvandn_v
+impl = portable
 name = lasx_xvandn_v
 asm-fmts = xd, xj, xk
 data-types = UV32QI, UV32QI, UV32QI
 
 /// lasx_xvneg_b
+impl = portable
 name = lasx_xvneg_b
 asm-fmts = xd, xj
 data-types = V32QI, V32QI
 
 /// lasx_xvneg_h
+impl = portable
 name = lasx_xvneg_h
 asm-fmts = xd, xj
 data-types = V16HI, V16HI
 
 /// lasx_xvneg_w
+impl = portable
 name = lasx_xvneg_w
 asm-fmts = xd, xj
 data-types = V8SI, V8SI
 
 /// lasx_xvneg_d
+impl = portable
 name = lasx_xvneg_d
 asm-fmts = xd, xj
 data-types = V4DI, V4DI
@@ -2144,41 +2351,49 @@ asm-fmts = xd, xj, xk
 data-types = V4DI, V4DI, V4DI
 
 /// lasx_xvfmadd_s
+impl = portable
 name = lasx_xvfmadd_s
 asm-fmts = xd, xj, xk, xa
 data-types = V8SF, V8SF, V8SF, V8SF
 
 /// lasx_xvfmadd_d
+impl = portable
 name = lasx_xvfmadd_d
 asm-fmts = xd, xj, xk, xa
 data-types = V4DF, V4DF, V4DF, V4DF
 
 /// lasx_xvfmsub_s
+impl = portable
 name = lasx_xvfmsub_s
 asm-fmts = xd, xj, xk, xa
 data-types = V8SF, V8SF, V8SF, V8SF
 
 /// lasx_xvfmsub_d
+impl = portable
 name = lasx_xvfmsub_d
 asm-fmts = xd, xj, xk, xa
 data-types = V4DF, V4DF, V4DF, V4DF
 
 /// lasx_xvfnmadd_s
+impl = portable
 name = lasx_xvfnmadd_s
 asm-fmts = xd, xj, xk, xa
 data-types = V8SF, V8SF, V8SF, V8SF
 
 /// lasx_xvfnmadd_d
+impl = portable
 name = lasx_xvfnmadd_d
 asm-fmts = xd, xj, xk, xa
 data-types = V4DF, V4DF, V4DF, V4DF
 
 /// lasx_xvfnmsub_s
+impl = portable
 name = lasx_xvfnmsub_s
 asm-fmts = xd, xj, xk, xa
 data-types = V8SF, V8SF, V8SF, V8SF
 
 /// lasx_xvfnmsub_d
+impl = portable
 name = lasx_xvfnmsub_d
 asm-fmts = xd, xj, xk, xa
 data-types = V4DF, V4DF, V4DF, V4DF
@@ -2424,9 +2639,10 @@ asm-fmts = xd, xj, xk
 data-types = V8SI, V4DI, V4DI
 
 /// lasx_xvorn_v
+impl = portable
 name = lasx_xvorn_v
 asm-fmts = xd, xj, xk
-data-types = V32QI, V32QI, V32QI
+data-types = UV32QI, UV32QI, UV32QI
 
 /// lasx_xvldi
 name = lasx_xvldi
@@ -2449,11 +2665,13 @@ asm-fmts = xd, xj
 data-types = UV4DI, UV4DI
 
 /// lasx_xvinsgr2vr_w
+impl = portable
 name = lasx_xvinsgr2vr_w
 asm-fmts = xd, rj, ui3
 data-types = V8SI, V8SI, SI, UQI
 
 /// lasx_xvinsgr2vr_d
+impl = portable
 name = lasx_xvinsgr2vr_d
 asm-fmts = xd, rj, ui2
 data-types = V4DI, V4DI, DI, UQI
@@ -2579,21 +2797,25 @@ asm-fmts = xd, rj, si9
 data-types = V4DI, CVPOINTER, SI
 
 /// lasx_xvpickve2gr_w
+impl = portable
 name = lasx_xvpickve2gr_w
 asm-fmts = rd, xj, ui3
 data-types = SI, V8SI, UQI
 
 /// lasx_xvpickve2gr_wu
+impl = portable
 name = lasx_xvpickve2gr_wu
 asm-fmts = rd, xj, ui3
 data-types = USI, V8SI, UQI
 
 /// lasx_xvpickve2gr_d
+impl = portable
 name = lasx_xvpickve2gr_d
 asm-fmts = rd, xj, ui2
 data-types = DI, V4DI, UQI
 
 /// lasx_xvpickve2gr_du
+impl = portable
 name = lasx_xvpickve2gr_du
 asm-fmts = rd, xj, ui2
 data-types = UDI, V4DI, UQI
@@ -3684,22 +3906,116 @@ asm-fmts = xd, xj, ui3
 data-types = V8SF, V8SF, UQI
 
 /// lasx_xvrepli_b
+impl = portable
 name = lasx_xvrepli_b
 asm-fmts = xd, si10
 data-types = V32QI, HI
 
 /// lasx_xvrepli_d
+impl = portable
 name = lasx_xvrepli_d
 asm-fmts = xd, si10
 data-types = V4DI, HI
 
 /// lasx_xvrepli_h
+impl = portable
 name = lasx_xvrepli_h
 asm-fmts = xd, si10
 data-types = V16HI, HI
 
 /// lasx_xvrepli_w
+impl = portable
 name = lasx_xvrepli_w
 asm-fmts = xd, si10
 data-types = V8SI, HI
 
+/// lasx_cast_128_s
+name = lasx_cast_128_s
+asm-fmts = xd, vj
+data-types = V8SF, V4SF
+
+/// lasx_cast_128_d
+name = lasx_cast_128_d
+asm-fmts = xd, vj
+data-types = V4DF, V2DF
+
+/// lasx_cast_128
+name = lasx_cast_128
+asm-fmts = xd, vj
+data-types = V4DI, V2DI
+
+/// lasx_concat_128_s
+name = lasx_concat_128_s
+asm-fmts = xd, vj, vk
+data-types = V8SF, V4SF, V4SF
+
+/// lasx_concat_128_d
+name = lasx_concat_128_d
+asm-fmts = xd, vj, vk
+data-types = V4DF, V2DF, V2DF
+
+/// lasx_concat_128
+name = lasx_concat_128
+asm-fmts = xd, vj, vk
+data-types = V4DI, V2DI, V2DI
+
+/// lasx_extract_128_lo_s
+name = lasx_extract_128_lo_s
+asm-fmts = vd, xj
+data-types = V4SF, V8SF
+
+/// lasx_extract_128_hi_s
+name = lasx_extract_128_hi_s
+asm-fmts = vd, xj
+data-types = V4SF, V8SF
+
+/// lasx_extract_128_lo_d
+name = lasx_extract_128_lo_d
+asm-fmts = vd, xj
+data-types = V2DF, V4DF
+
+/// lasx_extract_128_hi_d
+name = lasx_extract_128_hi_d
+asm-fmts = vd, xj
+data-types = V2DF, V4DF
+
+/// lasx_extract_128_lo
+name = lasx_extract_128_lo
+asm-fmts = vd, xj
+data-types = V2DI, V4DI
+
+/// lasx_extract_128_hi
+name = lasx_extract_128_hi
+asm-fmts = vd, xj
+data-types = V2DI, V4DI
+
+/// lasx_insert_128_lo_s
+name = lasx_insert_128_lo_s
+asm-fmts = xd, xj, vk
+data-types = V8SF, V8SF, V4SF
+
+/// lasx_insert_128_hi_s
+name = lasx_insert_128_hi_s
+asm-fmts = xd, xj, vk
+data-types = V8SF, V8SF, V4SF
+
+/// lasx_insert_128_lo_d
+name = lasx_insert_128_lo_d
+asm-fmts = xd, xj, vk
+data-types = V4DF, V4DF, V2DF
+
+/// lasx_insert_128_hi_d
+name = lasx_insert_128_hi_d
+asm-fmts = xd, xj, vk
+data-types = V4DF, V4DF, V2DF
+
+/// lasx_insert_128_lo
+name = lasx_insert_128_lo
+asm-fmts = xd, xj, vk
+data-types = V4DI, V4DI, V2DI
+
+/// lasx_insert_128_hi
+name = lasx_insert_128_hi
+asm-fmts = xd, xj, vk
+data-types = V4DI, V4DI, V2DI
+
diff --git a/crates/stdarch-gen-loongarch/lasxintrin.h b/crates/stdarch-gen-loongarch/lasxintrin.h
index c525b6106b..02bb97918d 100644
--- a/crates/stdarch-gen-loongarch/lasxintrin.h
+++ b/crates/stdarch-gen-loongarch/lasxintrin.h
@@ -1,10 +1,10 @@
 /*
- * https://gcc.gnu.org/git/?p=gcc.git;a=blob_plain;f=gcc/config/loongarch/lasxintrin.h;hb=61f1001f2f4ab9128e5eb6e9a4adbbb0f9f0bc75
+ * https://gcc.gnu.org/git/?p=gcc.git;a=blob_plain;f=gcc/config/loongarch/lasxintrin.h;hb=c2013267642fea4a6e89b826940c8aa80a76089d
  */
 
 /* LARCH Loongson ASX intrinsics include file.
 
-   Copyright (C) 2018-2024 Free Software Foundation, Inc.
+   Copyright (C) 2018-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -27,6 +27,8 @@
    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
    <http://www.gnu.org/licenses/>.  */
 
+#include <lsxintrin.h>
+
 #ifndef _GCC_LOONGSON_ASXINTRIN_H
 #define _GCC_LOONGSON_ASXINTRIN_H 1
 
@@ -3568,11 +3570,11 @@ __m256i __lasx_xvssrln_w_d (__m256i _1, __m256i _2)
 }
 
 /* Assembly instruction format:	xd, xj, xk.  */
-/* Data types in instruction templates:  V32QI, V32QI, V32QI.  */
+/* Data types in instruction templates:  UV32QI, UV32QI, UV32QI.  */
 extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 __m256i __lasx_xvorn_v (__m256i _1, __m256i _2)
 {
-  return (__m256i)__builtin_lasx_xvorn_v ((v32i8)_1, (v32i8)_2);
+  return (__m256i)__builtin_lasx_xvorn_v ((v32u8)_1, (v32u8)_2);
 }
 
 /* Assembly instruction format:	xd, i13.  */
@@ -5372,5 +5374,159 @@ __m256i __lasx_xvfcmp_sun_s (__m256 _1, __m256 _2)
 #define __lasx_xvrepli_w(/*si10*/ _1) \
   ((__m256i)__builtin_lasx_xvrepli_w ((_1)))
 
+#if defined (__loongarch_asx_sx_conv)
+/* Add builtin interfaces for 128 and 256 vector conversions.
+   For the assembly instruction format of some functions of the following vector
+   conversion, it is not described exactly in accordance with the format of the
+   generated assembly instruction.
+   In the front end of the Rust language, different built-in functions are called
+   by analyzing the format of assembly instructions. The data types of instructions
+   are all defined based on the interfaces of the defined functions, in the
+   following order: output, input... .  */
+/* Assembly instruction format:	xd, vj.  */
+/* Data types in instruction templates:  V8SF, V4SF.  */
+extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__m256 __lasx_cast_128_s (__m128 _1)
+{
+  return  (__m256)__builtin_lasx_cast_128_s ((v4f32)_1);
+}
+
+/* Assembly instruction format:	xd, vj.  */
+/* Data types in instruction templates:  V4DF, V2DF.  */
+extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__m256d __lasx_cast_128_d (__m128d _1)
+{
+  return  (__m256d)__builtin_lasx_cast_128_d ((v2f64)_1);
+}
+
+/* Assembly instruction format:	xd, vj.  */
+/* Data types in instruction templates:  V4DI, V2DI.  */
+extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__m256i __lasx_cast_128 (__m128i _1)
+{
+  return  (__m256i)__builtin_lasx_cast_128 ((v2i64)_1);
+}
+
+/* Assembly instruction format:	xd, vj, vk.  */
+/* Data types in instruction templates:  V8SF, V4SF, V4SF.  */
+extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__m256 __lasx_concat_128_s (__m128 _1, __m128 _2)
+{
+  return  (__m256)__builtin_lasx_concat_128_s ((v4f32)_1, (v4f32)_2);
+}
+
+/* Assembly instruction format:	xd, vj, vk.  */
+/* Data types in instruction templates:  V4DF, V2DF, V2DF.  */
+extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__m256d __lasx_concat_128_d (__m128d _1, __m128d _2)
+{
+  return  (__m256d)__builtin_lasx_concat_128_d ((v2f64)_1, (v2f64)_2);
+}
+
+/* Assembly instruction format:	xd, vj, vk.  */
+/* Data types in instruction templates:  V4DI, V2DI, V2DI.  */
+extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__m256i __lasx_concat_128 (__m128i _1, __m128i _2)
+{
+  return  (__m256i)__builtin_lasx_concat_128 ((v2i64)_1, (v2i64)_2);
+}
+
+/* Assembly instruction format:	vd, xj.  */
+/* Data types in instruction templates:  V4SF, V8SF.  */
+extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__m128 __lasx_extract_128_lo_s (__m256 _1)
+{
+  return  (__m128)__builtin_lasx_extract_128_lo_s ((v8f32)_1);
+}
+
+/* Assembly instruction format:	vd, xj.  */
+/* Data types in instruction templates:  V4SF, V8SF.  */
+extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__m128 __lasx_extract_128_hi_s (__m256 _1)
+{
+  return  (__m128)__builtin_lasx_extract_128_hi_s ((v8f32)_1);
+}
+
+/* Assembly instruction format:	vd, xj.  */
+/* Data types in instruction templates:  V2DF, V4DF.  */
+extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__m128d __lasx_extract_128_lo_d (__m256d _1)
+{
+  return  (__m128d)__builtin_lasx_extract_128_lo_d ((v4f64)_1);
+}
+
+/* Assembly instruction format:	vd, xj.  */
+/* Data types in instruction templates:  V2DF, V4DF.  */
+extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__m128d __lasx_extract_128_hi_d (__m256d _1)
+{
+  return  (__m128d)__builtin_lasx_extract_128_hi_d ((v4f64)_1);
+}
+
+/* Assembly instruction format:	vd, xj.  */
+/* Data types in instruction templates:  V2DI, V4DI.  */
+extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__m128i __lasx_extract_128_lo (__m256i _1)
+{
+  return  (__m128i)__builtin_lasx_extract_128_lo ((v4i64)_1);
+}
+
+/* Assembly instruction format:	vd, xj.  */
+/* Data types in instruction templates:  V2DI, V4DI.  */
+extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__m128i __lasx_extract_128_hi (__m256i _1)
+{
+  return  (__m128i)__builtin_lasx_extract_128_hi ((v4i64)_1);
+}
+
+/* Assembly instruction format:	xd, xj, vk.  */
+/* Data types in instruction templates:  V8SF, V8SF, V4SF.  */
+extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__m256 __lasx_insert_128_lo_s (__m256 _1, __m128 _2)
+{
+  return  (__m256)__builtin_lasx_insert_128_lo_s ((v8f32)_1, (v4f32)_2);
+}
+
+/* Assembly instruction format:	xd, xj, vk.  */
+/* Data types in instruction templates:  V8SF, V8SF, V4SF.  */
+extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__m256 __lasx_insert_128_hi_s (__m256 _1, __m128 _2)
+{
+  return  (__m256)__builtin_lasx_insert_128_hi_s ((v8f32)_1, (v4f32)_2);
+}
+
+/* Assembly instruction format:	xd, xj, vk.  */
+/* Data types in instruction templates:  V4DF, V4DF, V2DF.  */
+extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__m256d __lasx_insert_128_lo_d (__m256d _1, __m128d _2)
+{
+  return  (__m256d)__builtin_lasx_insert_128_lo_d ((v4f64)_1, (v2f64)_2);
+}
+
+/* Assembly instruction format:	xd, xj, vk.  */
+/* Data types in instruction templates:  V4DF, V4DF, V2DF.  */
+extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__m256d __lasx_insert_128_hi_d (__m256d _1, __m128d _2)
+{
+  return  (__m256d)__builtin_lasx_insert_128_hi_d ((v4f64)_1, (v2f64)_2);
+}
+
+/* Assembly instruction format:	xd, xj, vk.  */
+/* Data types in instruction templates:  V4DI, V4DI, V2DI.  */
+extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__m256i __lasx_insert_128_lo (__m256i _1, __m128i _2)
+{
+  return  (__m256i)__builtin_lasx_insert_128_lo ((v4i64)_1, (v2i64)_2);
+}
+
+/* Assembly instruction format:	xd, xj, vk.  */
+/* Data types in instruction templates:  V4DI, V4DI, V2DI.  */
+extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__m256i __lasx_insert_128_hi (__m256i _1, __m128i _2)
+{
+  return  (__m256i)__builtin_lasx_insert_128_hi ((v4i64)_1, (v2i64)_2);
+}
+
+#endif /* defined(__loongarch_asx_sx_conv).  */
 #endif /* defined(__loongarch_asx).  */
 #endif /* _GCC_LOONGSON_ASXINTRIN_H.  */
diff --git a/crates/stdarch-gen-loongarch/lsx.spec b/crates/stdarch-gen-loongarch/lsx.spec
index dc835770d5..48e98d59b6 100644
--- a/crates/stdarch-gen-loongarch/lsx.spec
+++ b/crates/stdarch-gen-loongarch/lsx.spec
@@ -4,81 +4,97 @@
 // ```
 
 /// lsx_vsll_b
+impl = portable
 name = lsx_vsll_b
 asm-fmts = vd, vj, vk
 data-types = V16QI, V16QI, V16QI
 
 /// lsx_vsll_h
+impl = portable
 name = lsx_vsll_h
 asm-fmts = vd, vj, vk
 data-types = V8HI, V8HI, V8HI
 
 /// lsx_vsll_w
+impl = portable
 name = lsx_vsll_w
 asm-fmts = vd, vj, vk
 data-types = V4SI, V4SI, V4SI
 
 /// lsx_vsll_d
+impl = portable
 name = lsx_vsll_d
 asm-fmts = vd, vj, vk
 data-types = V2DI, V2DI, V2DI
 
 /// lsx_vslli_b
+impl = portable
 name = lsx_vslli_b
 asm-fmts = vd, vj, ui3
 data-types = V16QI, V16QI, UQI
 
 /// lsx_vslli_h
+impl = portable
 name = lsx_vslli_h
 asm-fmts = vd, vj, ui4
 data-types = V8HI, V8HI, UQI
 
 /// lsx_vslli_w
+impl = portable
 name = lsx_vslli_w
 asm-fmts = vd, vj, ui5
 data-types = V4SI, V4SI, UQI
 
 /// lsx_vslli_d
+impl = portable
 name = lsx_vslli_d
 asm-fmts = vd, vj, ui6
 data-types = V2DI, V2DI, UQI
 
 /// lsx_vsra_b
+impl = portable
 name = lsx_vsra_b
 asm-fmts = vd, vj, vk
 data-types = V16QI, V16QI, V16QI
 
 /// lsx_vsra_h
+impl = portable
 name = lsx_vsra_h
 asm-fmts = vd, vj, vk
 data-types = V8HI, V8HI, V8HI
 
 /// lsx_vsra_w
+impl = portable
 name = lsx_vsra_w
 asm-fmts = vd, vj, vk
 data-types = V4SI, V4SI, V4SI
 
 /// lsx_vsra_d
+impl = portable
 name = lsx_vsra_d
 asm-fmts = vd, vj, vk
 data-types = V2DI, V2DI, V2DI
 
 /// lsx_vsrai_b
+impl = portable
 name = lsx_vsrai_b
 asm-fmts = vd, vj, ui3
 data-types = V16QI, V16QI, UQI
 
 /// lsx_vsrai_h
+impl = portable
 name = lsx_vsrai_h
 asm-fmts = vd, vj, ui4
 data-types = V8HI, V8HI, UQI
 
 /// lsx_vsrai_w
+impl = portable
 name = lsx_vsrai_w
 asm-fmts = vd, vj, ui5
 data-types = V4SI, V4SI, UQI
 
 /// lsx_vsrai_d
+impl = portable
 name = lsx_vsrai_d
 asm-fmts = vd, vj, ui6
 data-types = V2DI, V2DI, UQI
@@ -124,41 +140,49 @@ asm-fmts = vd, vj, ui6
 data-types = V2DI, V2DI, UQI
 
 /// lsx_vsrl_b
+impl = portable
 name = lsx_vsrl_b
 asm-fmts = vd, vj, vk
 data-types = V16QI, V16QI, V16QI
 
 /// lsx_vsrl_h
+impl = portable
 name = lsx_vsrl_h
 asm-fmts = vd, vj, vk
 data-types = V8HI, V8HI, V8HI
 
 /// lsx_vsrl_w
+impl = portable
 name = lsx_vsrl_w
 asm-fmts = vd, vj, vk
 data-types = V4SI, V4SI, V4SI
 
 /// lsx_vsrl_d
+impl = portable
 name = lsx_vsrl_d
 asm-fmts = vd, vj, vk
 data-types = V2DI, V2DI, V2DI
 
 /// lsx_vsrli_b
+impl = portable
 name = lsx_vsrli_b
 asm-fmts = vd, vj, ui3
 data-types = V16QI, V16QI, UQI
 
 /// lsx_vsrli_h
+impl = portable
 name = lsx_vsrli_h
 asm-fmts = vd, vj, ui4
 data-types = V8HI, V8HI, UQI
 
 /// lsx_vsrli_w
+impl = portable
 name = lsx_vsrli_w
 asm-fmts = vd, vj, ui5
 data-types = V4SI, V4SI, UQI
 
 /// lsx_vsrli_d
+impl = portable
 name = lsx_vsrli_d
 asm-fmts = vd, vj, ui6
 data-types = V2DI, V2DI, UQI
@@ -204,21 +228,25 @@ asm-fmts = vd, vj, ui6
 data-types = V2DI, V2DI, UQI
 
 /// lsx_vbitclr_b
+impl = portable
 name = lsx_vbitclr_b
 asm-fmts = vd, vj, vk
 data-types = UV16QI, UV16QI, UV16QI
 
 /// lsx_vbitclr_h
+impl = portable
 name = lsx_vbitclr_h
 asm-fmts = vd, vj, vk
 data-types = UV8HI, UV8HI, UV8HI
 
 /// lsx_vbitclr_w
+impl = portable
 name = lsx_vbitclr_w
 asm-fmts = vd, vj, vk
 data-types = UV4SI, UV4SI, UV4SI
 
 /// lsx_vbitclr_d
+impl = portable
 name = lsx_vbitclr_d
 asm-fmts = vd, vj, vk
 data-types = UV2DI, UV2DI, UV2DI
@@ -244,21 +272,25 @@ asm-fmts = vd, vj, ui6
 data-types = UV2DI, UV2DI, UQI
 
 /// lsx_vbitset_b
+impl = portable
 name = lsx_vbitset_b
 asm-fmts = vd, vj, vk
 data-types = UV16QI, UV16QI, UV16QI
 
 /// lsx_vbitset_h
+impl = portable
 name = lsx_vbitset_h
 asm-fmts = vd, vj, vk
 data-types = UV8HI, UV8HI, UV8HI
 
 /// lsx_vbitset_w
+impl = portable
 name = lsx_vbitset_w
 asm-fmts = vd, vj, vk
 data-types = UV4SI, UV4SI, UV4SI
 
 /// lsx_vbitset_d
+impl = portable
 name = lsx_vbitset_d
 asm-fmts = vd, vj, vk
 data-types = UV2DI, UV2DI, UV2DI
@@ -284,21 +316,25 @@ asm-fmts = vd, vj, ui6
 data-types = UV2DI, UV2DI, UQI
 
 /// lsx_vbitrev_b
+impl = portable
 name = lsx_vbitrev_b
 asm-fmts = vd, vj, vk
 data-types = UV16QI, UV16QI, UV16QI
 
 /// lsx_vbitrev_h
+impl = portable
 name = lsx_vbitrev_h
 asm-fmts = vd, vj, vk
 data-types = UV8HI, UV8HI, UV8HI
 
 /// lsx_vbitrev_w
+impl = portable
 name = lsx_vbitrev_w
 asm-fmts = vd, vj, vk
 data-types = UV4SI, UV4SI, UV4SI
 
 /// lsx_vbitrev_d
+impl = portable
 name = lsx_vbitrev_d
 asm-fmts = vd, vj, vk
 data-types = UV2DI, UV2DI, UV2DI
@@ -324,61 +360,73 @@ asm-fmts = vd, vj, ui6
 data-types = UV2DI, UV2DI, UQI
 
 /// lsx_vadd_b
+impl = portable
 name = lsx_vadd_b
 asm-fmts = vd, vj, vk
 data-types = V16QI, V16QI, V16QI
 
 /// lsx_vadd_h
+impl = portable
 name = lsx_vadd_h
 asm-fmts = vd, vj, vk
 data-types = V8HI, V8HI, V8HI
 
 /// lsx_vadd_w
+impl = portable
 name = lsx_vadd_w
 asm-fmts = vd, vj, vk
 data-types = V4SI, V4SI, V4SI
 
 /// lsx_vadd_d
+impl = portable
 name = lsx_vadd_d
 asm-fmts = vd, vj, vk
 data-types = V2DI, V2DI, V2DI
 
 /// lsx_vaddi_bu
+impl = portable
 name = lsx_vaddi_bu
 asm-fmts = vd, vj, ui5
 data-types = V16QI, V16QI, UQI
 
 /// lsx_vaddi_hu
+impl = portable
 name = lsx_vaddi_hu
 asm-fmts = vd, vj, ui5
 data-types = V8HI, V8HI, UQI
 
 /// lsx_vaddi_wu
+impl = portable
 name = lsx_vaddi_wu
 asm-fmts = vd, vj, ui5
 data-types = V4SI, V4SI, UQI
 
 /// lsx_vaddi_du
+impl = portable
 name = lsx_vaddi_du
 asm-fmts = vd, vj, ui5
 data-types = V2DI, V2DI, UQI
 
 /// lsx_vsub_b
+impl = portable
 name = lsx_vsub_b
 asm-fmts = vd, vj, vk
 data-types = V16QI, V16QI, V16QI
 
 /// lsx_vsub_h
+impl = portable
 name = lsx_vsub_h
 asm-fmts = vd, vj, vk
 data-types = V8HI, V8HI, V8HI
 
 /// lsx_vsub_w
+impl = portable
 name = lsx_vsub_w
 asm-fmts = vd, vj, vk
 data-types = V4SI, V4SI, V4SI
 
 /// lsx_vsub_d
+impl = portable
 name = lsx_vsub_d
 asm-fmts = vd, vj, vk
 data-types = V2DI, V2DI, V2DI
@@ -404,361 +452,433 @@ asm-fmts = vd, vj, ui5
 data-types = V2DI, V2DI, UQI
 
 /// lsx_vmax_b
+impl = portable
 name = lsx_vmax_b
 asm-fmts = vd, vj, vk
 data-types = V16QI, V16QI, V16QI
 
 /// lsx_vmax_h
+impl = portable
 name = lsx_vmax_h
 asm-fmts = vd, vj, vk
 data-types = V8HI, V8HI, V8HI
 
 /// lsx_vmax_w
+impl = portable
 name = lsx_vmax_w
 asm-fmts = vd, vj, vk
 data-types = V4SI, V4SI, V4SI
 
 /// lsx_vmax_d
+impl = portable
 name = lsx_vmax_d
 asm-fmts = vd, vj, vk
 data-types = V2DI, V2DI, V2DI
 
 /// lsx_vmaxi_b
+impl = portable
 name = lsx_vmaxi_b
 asm-fmts = vd, vj, si5
 data-types = V16QI, V16QI, QI
 
 /// lsx_vmaxi_h
+impl = portable
 name = lsx_vmaxi_h
 asm-fmts = vd, vj, si5
 data-types = V8HI, V8HI, QI
 
 /// lsx_vmaxi_w
+impl = portable
 name = lsx_vmaxi_w
 asm-fmts = vd, vj, si5
 data-types = V4SI, V4SI, QI
 
 /// lsx_vmaxi_d
+impl = portable
 name = lsx_vmaxi_d
 asm-fmts = vd, vj, si5
 data-types = V2DI, V2DI, QI
 
 /// lsx_vmax_bu
+impl = portable
 name = lsx_vmax_bu
 asm-fmts = vd, vj, vk
 data-types = UV16QI, UV16QI, UV16QI
 
 /// lsx_vmax_hu
+impl = portable
 name = lsx_vmax_hu
 asm-fmts = vd, vj, vk
 data-types = UV8HI, UV8HI, UV8HI
 
 /// lsx_vmax_wu
+impl = portable
 name = lsx_vmax_wu
 asm-fmts = vd, vj, vk
 data-types = UV4SI, UV4SI, UV4SI
 
 /// lsx_vmax_du
+impl = portable
 name = lsx_vmax_du
 asm-fmts = vd, vj, vk
 data-types = UV2DI, UV2DI, UV2DI
 
 /// lsx_vmaxi_bu
+impl = portable
 name = lsx_vmaxi_bu
 asm-fmts = vd, vj, ui5
 data-types = UV16QI, UV16QI, UQI
 
 /// lsx_vmaxi_hu
+impl = portable
 name = lsx_vmaxi_hu
 asm-fmts = vd, vj, ui5
 data-types = UV8HI, UV8HI, UQI
 
 /// lsx_vmaxi_wu
+impl = portable
 name = lsx_vmaxi_wu
 asm-fmts = vd, vj, ui5
 data-types = UV4SI, UV4SI, UQI
 
 /// lsx_vmaxi_du
+impl = portable
 name = lsx_vmaxi_du
 asm-fmts = vd, vj, ui5
 data-types = UV2DI, UV2DI, UQI
 
 /// lsx_vmin_b
+impl = portable
 name = lsx_vmin_b
 asm-fmts = vd, vj, vk
 data-types = V16QI, V16QI, V16QI
 
 /// lsx_vmin_h
+impl = portable
 name = lsx_vmin_h
 asm-fmts = vd, vj, vk
 data-types = V8HI, V8HI, V8HI
 
 /// lsx_vmin_w
+impl = portable
 name = lsx_vmin_w
 asm-fmts = vd, vj, vk
 data-types = V4SI, V4SI, V4SI
 
 /// lsx_vmin_d
+impl = portable
 name = lsx_vmin_d
 asm-fmts = vd, vj, vk
 data-types = V2DI, V2DI, V2DI
 
 /// lsx_vmini_b
+impl = portable
 name = lsx_vmini_b
 asm-fmts = vd, vj, si5
 data-types = V16QI, V16QI, QI
 
 /// lsx_vmini_h
+impl = portable
 name = lsx_vmini_h
 asm-fmts = vd, vj, si5
 data-types = V8HI, V8HI, QI
 
 /// lsx_vmini_w
+impl = portable
 name = lsx_vmini_w
 asm-fmts = vd, vj, si5
 data-types = V4SI, V4SI, QI
 
 /// lsx_vmini_d
+impl = portable
 name = lsx_vmini_d
 asm-fmts = vd, vj, si5
 data-types = V2DI, V2DI, QI
 
 /// lsx_vmin_bu
+impl = portable
 name = lsx_vmin_bu
 asm-fmts = vd, vj, vk
 data-types = UV16QI, UV16QI, UV16QI
 
 /// lsx_vmin_hu
+impl = portable
 name = lsx_vmin_hu
 asm-fmts = vd, vj, vk
 data-types = UV8HI, UV8HI, UV8HI
 
 /// lsx_vmin_wu
+impl = portable
 name = lsx_vmin_wu
 asm-fmts = vd, vj, vk
 data-types = UV4SI, UV4SI, UV4SI
 
 /// lsx_vmin_du
+impl = portable
 name = lsx_vmin_du
 asm-fmts = vd, vj, vk
 data-types = UV2DI, UV2DI, UV2DI
 
 /// lsx_vmini_bu
+impl = portable
 name = lsx_vmini_bu
 asm-fmts = vd, vj, ui5
 data-types = UV16QI, UV16QI, UQI
 
 /// lsx_vmini_hu
+impl = portable
 name = lsx_vmini_hu
 asm-fmts = vd, vj, ui5
 data-types = UV8HI, UV8HI, UQI
 
 /// lsx_vmini_wu
+impl = portable
 name = lsx_vmini_wu
 asm-fmts = vd, vj, ui5
 data-types = UV4SI, UV4SI, UQI
 
 /// lsx_vmini_du
+impl = portable
 name = lsx_vmini_du
 asm-fmts = vd, vj, ui5
 data-types = UV2DI, UV2DI, UQI
 
 /// lsx_vseq_b
+impl = portable
 name = lsx_vseq_b
 asm-fmts = vd, vj, vk
 data-types = V16QI, V16QI, V16QI
 
 /// lsx_vseq_h
+impl = portable
 name = lsx_vseq_h
 asm-fmts = vd, vj, vk
 data-types = V8HI, V8HI, V8HI
 
 /// lsx_vseq_w
+impl = portable
 name = lsx_vseq_w
 asm-fmts = vd, vj, vk
 data-types = V4SI, V4SI, V4SI
 
 /// lsx_vseq_d
+impl = portable
 name = lsx_vseq_d
 asm-fmts = vd, vj, vk
 data-types = V2DI, V2DI, V2DI
 
 /// lsx_vseqi_b
+impl = portable
 name = lsx_vseqi_b
 asm-fmts = vd, vj, si5
 data-types = V16QI, V16QI, QI
 
 /// lsx_vseqi_h
+impl = portable
 name = lsx_vseqi_h
 asm-fmts = vd, vj, si5
 data-types = V8HI, V8HI, QI
 
 /// lsx_vseqi_w
+impl = portable
 name = lsx_vseqi_w
 asm-fmts = vd, vj, si5
 data-types = V4SI, V4SI, QI
 
 /// lsx_vseqi_d
+impl = portable
 name = lsx_vseqi_d
 asm-fmts = vd, vj, si5
 data-types = V2DI, V2DI, QI
 
 /// lsx_vslti_b
+impl = portable
 name = lsx_vslti_b
 asm-fmts = vd, vj, si5
 data-types = V16QI, V16QI, QI
 
 /// lsx_vslt_b
+impl = portable
 name = lsx_vslt_b
 asm-fmts = vd, vj, vk
 data-types = V16QI, V16QI, V16QI
 
 /// lsx_vslt_h
+impl = portable
 name = lsx_vslt_h
 asm-fmts = vd, vj, vk
 data-types = V8HI, V8HI, V8HI
 
 /// lsx_vslt_w
+impl = portable
 name = lsx_vslt_w
 asm-fmts = vd, vj, vk
 data-types = V4SI, V4SI, V4SI
 
 /// lsx_vslt_d
+impl = portable
 name = lsx_vslt_d
 asm-fmts = vd, vj, vk
 data-types = V2DI, V2DI, V2DI
 
 /// lsx_vslti_h
+impl = portable
 name = lsx_vslti_h
 asm-fmts = vd, vj, si5
 data-types = V8HI, V8HI, QI
 
 /// lsx_vslti_w
+impl = portable
 name = lsx_vslti_w
 asm-fmts = vd, vj, si5
 data-types = V4SI, V4SI, QI
 
 /// lsx_vslti_d
+impl = portable
 name = lsx_vslti_d
 asm-fmts = vd, vj, si5
 data-types = V2DI, V2DI, QI
 
 /// lsx_vslt_bu
+impl = portable
 name = lsx_vslt_bu
 asm-fmts = vd, vj, vk
 data-types = V16QI, UV16QI, UV16QI
 
 /// lsx_vslt_hu
+impl = portable
 name = lsx_vslt_hu
 asm-fmts = vd, vj, vk
 data-types = V8HI, UV8HI, UV8HI
 
 /// lsx_vslt_wu
+impl = portable
 name = lsx_vslt_wu
 asm-fmts = vd, vj, vk
 data-types = V4SI, UV4SI, UV4SI
 
 /// lsx_vslt_du
+impl = portable
 name = lsx_vslt_du
 asm-fmts = vd, vj, vk
 data-types = V2DI, UV2DI, UV2DI
 
 /// lsx_vslti_bu
+impl = portable
 name = lsx_vslti_bu
 asm-fmts = vd, vj, ui5
 data-types = V16QI, UV16QI, UQI
 
 /// lsx_vslti_hu
+impl = portable
 name = lsx_vslti_hu
 asm-fmts = vd, vj, ui5
 data-types = V8HI, UV8HI, UQI
 
 /// lsx_vslti_wu
+impl = portable
 name = lsx_vslti_wu
 asm-fmts = vd, vj, ui5
 data-types = V4SI, UV4SI, UQI
 
 /// lsx_vslti_du
+impl = portable
 name = lsx_vslti_du
 asm-fmts = vd, vj, ui5
 data-types = V2DI, UV2DI, UQI
 
 /// lsx_vsle_b
+impl = portable
 name = lsx_vsle_b
 asm-fmts = vd, vj, vk
 data-types = V16QI, V16QI, V16QI
 
 /// lsx_vsle_h
+impl = portable
 name = lsx_vsle_h
 asm-fmts = vd, vj, vk
 data-types = V8HI, V8HI, V8HI
 
 /// lsx_vsle_w
+impl = portable
 name = lsx_vsle_w
 asm-fmts = vd, vj, vk
 data-types = V4SI, V4SI, V4SI
 
 /// lsx_vsle_d
+impl = portable
 name = lsx_vsle_d
 asm-fmts = vd, vj, vk
 data-types = V2DI, V2DI, V2DI
 
 /// lsx_vslei_b
+impl = portable
 name = lsx_vslei_b
 asm-fmts = vd, vj, si5
 data-types = V16QI, V16QI, QI
 
 /// lsx_vslei_h
+impl = portable
 name = lsx_vslei_h
 asm-fmts = vd, vj, si5
 data-types = V8HI, V8HI, QI
 
 /// lsx_vslei_w
+impl = portable
 name = lsx_vslei_w
 asm-fmts = vd, vj, si5
 data-types = V4SI, V4SI, QI
 
 /// lsx_vslei_d
+impl = portable
 name = lsx_vslei_d
 asm-fmts = vd, vj, si5
 data-types = V2DI, V2DI, QI
 
 /// lsx_vsle_bu
+impl = portable
 name = lsx_vsle_bu
 asm-fmts = vd, vj, vk
 data-types = V16QI, UV16QI, UV16QI
 
 /// lsx_vsle_hu
+impl = portable
 name = lsx_vsle_hu
 asm-fmts = vd, vj, vk
 data-types = V8HI, UV8HI, UV8HI
 
 /// lsx_vsle_wu
+impl = portable
 name = lsx_vsle_wu
 asm-fmts = vd, vj, vk
 data-types = V4SI, UV4SI, UV4SI
 
 /// lsx_vsle_du
+impl = portable
 name = lsx_vsle_du
 asm-fmts = vd, vj, vk
 data-types = V2DI, UV2DI, UV2DI
 
 /// lsx_vslei_bu
+impl = portable
 name = lsx_vslei_bu
 asm-fmts = vd, vj, ui5
 data-types = V16QI, UV16QI, UQI
 
 /// lsx_vslei_hu
+impl = portable
 name = lsx_vslei_hu
 asm-fmts = vd, vj, ui5
 data-types = V8HI, UV8HI, UQI
 
 /// lsx_vslei_wu
+impl = portable
 name = lsx_vslei_wu
 asm-fmts = vd, vj, ui5
 data-types = V4SI, UV4SI, UQI
 
 /// lsx_vslei_du
+impl = portable
 name = lsx_vslei_du
 asm-fmts = vd, vj, ui5
 data-types = V2DI, UV2DI, UQI
@@ -804,61 +924,73 @@ asm-fmts = vd, vj, ui6
 data-types = UV2DI, UV2DI, UQI
 
 /// lsx_vadda_b
+impl = portable
 name = lsx_vadda_b
 asm-fmts = vd, vj, vk
 data-types = V16QI, V16QI, V16QI
 
 /// lsx_vadda_h
+impl = portable
 name = lsx_vadda_h
 asm-fmts = vd, vj, vk
 data-types = V8HI, V8HI, V8HI
 
 /// lsx_vadda_w
+impl = portable
 name = lsx_vadda_w
 asm-fmts = vd, vj, vk
 data-types = V4SI, V4SI, V4SI
 
 /// lsx_vadda_d
+impl = portable
 name = lsx_vadda_d
 asm-fmts = vd, vj, vk
 data-types = V2DI, V2DI, V2DI
 
 /// lsx_vsadd_b
+impl = portable
 name = lsx_vsadd_b
 asm-fmts = vd, vj, vk
 data-types = V16QI, V16QI, V16QI
 
 /// lsx_vsadd_h
+impl = portable
 name = lsx_vsadd_h
 asm-fmts = vd, vj, vk
 data-types = V8HI, V8HI, V8HI
 
 /// lsx_vsadd_w
+impl = portable
 name = lsx_vsadd_w
 asm-fmts = vd, vj, vk
 data-types = V4SI, V4SI, V4SI
 
 /// lsx_vsadd_d
+impl = portable
 name = lsx_vsadd_d
 asm-fmts = vd, vj, vk
 data-types = V2DI, V2DI, V2DI
 
 /// lsx_vsadd_bu
+impl = portable
 name = lsx_vsadd_bu
 asm-fmts = vd, vj, vk
 data-types = UV16QI, UV16QI, UV16QI
 
 /// lsx_vsadd_hu
+impl = portable
 name = lsx_vsadd_hu
 asm-fmts = vd, vj, vk
 data-types = UV8HI, UV8HI, UV8HI
 
 /// lsx_vsadd_wu
+impl = portable
 name = lsx_vsadd_wu
 asm-fmts = vd, vj, vk
 data-types = UV4SI, UV4SI, UV4SI
 
 /// lsx_vsadd_du
+impl = portable
 name = lsx_vsadd_du
 asm-fmts = vd, vj, vk
 data-types = UV2DI, UV2DI, UV2DI
@@ -944,181 +1076,217 @@ asm-fmts = vd, vj, vk
 data-types = UV2DI, UV2DI, UV2DI
 
 /// lsx_vssub_b
+impl = portable
 name = lsx_vssub_b
 asm-fmts = vd, vj, vk
 data-types = V16QI, V16QI, V16QI
 
 /// lsx_vssub_h
+impl = portable
 name = lsx_vssub_h
 asm-fmts = vd, vj, vk
 data-types = V8HI, V8HI, V8HI
 
 /// lsx_vssub_w
+impl = portable
 name = lsx_vssub_w
 asm-fmts = vd, vj, vk
 data-types = V4SI, V4SI, V4SI
 
 /// lsx_vssub_d
+impl = portable
 name = lsx_vssub_d
 asm-fmts = vd, vj, vk
 data-types = V2DI, V2DI, V2DI
 
 /// lsx_vssub_bu
+impl = portable
 name = lsx_vssub_bu
 asm-fmts = vd, vj, vk
 data-types = UV16QI, UV16QI, UV16QI
 
 /// lsx_vssub_hu
+impl = portable
 name = lsx_vssub_hu
 asm-fmts = vd, vj, vk
 data-types = UV8HI, UV8HI, UV8HI
 
 /// lsx_vssub_wu
+impl = portable
 name = lsx_vssub_wu
 asm-fmts = vd, vj, vk
 data-types = UV4SI, UV4SI, UV4SI
 
 /// lsx_vssub_du
+impl = portable
 name = lsx_vssub_du
 asm-fmts = vd, vj, vk
 data-types = UV2DI, UV2DI, UV2DI
 
 /// lsx_vabsd_b
+impl = portable
 name = lsx_vabsd_b
 asm-fmts = vd, vj, vk
 data-types = V16QI, V16QI, V16QI
 
 /// lsx_vabsd_h
+impl = portable
 name = lsx_vabsd_h
 asm-fmts = vd, vj, vk
 data-types = V8HI, V8HI, V8HI
 
 /// lsx_vabsd_w
+impl = portable
 name = lsx_vabsd_w
 asm-fmts = vd, vj, vk
 data-types = V4SI, V4SI, V4SI
 
 /// lsx_vabsd_d
+impl = portable
 name = lsx_vabsd_d
 asm-fmts = vd, vj, vk
 data-types = V2DI, V2DI, V2DI
 
 /// lsx_vabsd_bu
+impl = portable
 name = lsx_vabsd_bu
 asm-fmts = vd, vj, vk
 data-types = UV16QI, UV16QI, UV16QI
 
 /// lsx_vabsd_hu
+impl = portable
 name = lsx_vabsd_hu
 asm-fmts = vd, vj, vk
 data-types = UV8HI, UV8HI, UV8HI
 
 /// lsx_vabsd_wu
+impl = portable
 name = lsx_vabsd_wu
 asm-fmts = vd, vj, vk
 data-types = UV4SI, UV4SI, UV4SI
 
 /// lsx_vabsd_du
+impl = portable
 name = lsx_vabsd_du
 asm-fmts = vd, vj, vk
 data-types = UV2DI, UV2DI, UV2DI
 
 /// lsx_vmul_b
+impl = portable
 name = lsx_vmul_b
 asm-fmts = vd, vj, vk
 data-types = V16QI, V16QI, V16QI
 
 /// lsx_vmul_h
+impl = portable
 name = lsx_vmul_h
 asm-fmts = vd, vj, vk
 data-types = V8HI, V8HI, V8HI
 
 /// lsx_vmul_w
+impl = portable
 name = lsx_vmul_w
 asm-fmts = vd, vj, vk
 data-types = V4SI, V4SI, V4SI
 
 /// lsx_vmul_d
+impl = portable
 name = lsx_vmul_d
 asm-fmts = vd, vj, vk
 data-types = V2DI, V2DI, V2DI
 
 /// lsx_vmadd_b
+impl = portable
 name = lsx_vmadd_b
 asm-fmts = vd, vj, vk
 data-types = V16QI, V16QI, V16QI, V16QI
 
 /// lsx_vmadd_h
+impl = portable
 name = lsx_vmadd_h
 asm-fmts = vd, vj, vk
 data-types = V8HI, V8HI, V8HI, V8HI
 
 /// lsx_vmadd_w
+impl = portable
 name = lsx_vmadd_w
 asm-fmts = vd, vj, vk
 data-types = V4SI, V4SI, V4SI, V4SI
 
 /// lsx_vmadd_d
+impl = portable
 name = lsx_vmadd_d
 asm-fmts = vd, vj, vk
 data-types = V2DI, V2DI, V2DI, V2DI
 
 /// lsx_vmsub_b
+impl = portable
 name = lsx_vmsub_b
 asm-fmts = vd, vj, vk
 data-types = V16QI, V16QI, V16QI, V16QI
 
 /// lsx_vmsub_h
+impl = portable
 name = lsx_vmsub_h
 asm-fmts = vd, vj, vk
 data-types = V8HI, V8HI, V8HI, V8HI
 
 /// lsx_vmsub_w
+impl = portable
 name = lsx_vmsub_w
 asm-fmts = vd, vj, vk
 data-types = V4SI, V4SI, V4SI, V4SI
 
 /// lsx_vmsub_d
+impl = portable
 name = lsx_vmsub_d
 asm-fmts = vd, vj, vk
 data-types = V2DI, V2DI, V2DI, V2DI
 
 /// lsx_vdiv_b
+impl = portable
 name = lsx_vdiv_b
 asm-fmts = vd, vj, vk
 data-types = V16QI, V16QI, V16QI
 
 /// lsx_vdiv_h
+impl = portable
 name = lsx_vdiv_h
 asm-fmts = vd, vj, vk
 data-types = V8HI, V8HI, V8HI
 
 /// lsx_vdiv_w
+impl = portable
 name = lsx_vdiv_w
 asm-fmts = vd, vj, vk
 data-types = V4SI, V4SI, V4SI
 
 /// lsx_vdiv_d
+impl = portable
 name = lsx_vdiv_d
 asm-fmts = vd, vj, vk
 data-types = V2DI, V2DI, V2DI
 
 /// lsx_vdiv_bu
+impl = portable
 name = lsx_vdiv_bu
 asm-fmts = vd, vj, vk
 data-types = UV16QI, UV16QI, UV16QI
 
 /// lsx_vdiv_hu
+impl = portable
 name = lsx_vdiv_hu
 asm-fmts = vd, vj, vk
 data-types = UV8HI, UV8HI, UV8HI
 
 /// lsx_vdiv_wu
+impl = portable
 name = lsx_vdiv_wu
 asm-fmts = vd, vj, vk
 data-types = UV4SI, UV4SI, UV4SI
 
 /// lsx_vdiv_du
+impl = portable
 name = lsx_vdiv_du
 asm-fmts = vd, vj, vk
 data-types = UV2DI, UV2DI, UV2DI
@@ -1184,41 +1352,49 @@ asm-fmts = vd, vj, vk
 data-types = V2DI, UV4SI, UV4SI
 
 /// lsx_vmod_b
+impl = portable
 name = lsx_vmod_b
 asm-fmts = vd, vj, vk
 data-types = V16QI, V16QI, V16QI
 
 /// lsx_vmod_h
+impl = portable
 name = lsx_vmod_h
 asm-fmts = vd, vj, vk
 data-types = V8HI, V8HI, V8HI
 
 /// lsx_vmod_w
+impl = portable
 name = lsx_vmod_w
 asm-fmts = vd, vj, vk
 data-types = V4SI, V4SI, V4SI
 
 /// lsx_vmod_d
+impl = portable
 name = lsx_vmod_d
 asm-fmts = vd, vj, vk
 data-types = V2DI, V2DI, V2DI
 
 /// lsx_vmod_bu
+impl = portable
 name = lsx_vmod_bu
 asm-fmts = vd, vj, vk
 data-types = UV16QI, UV16QI, UV16QI
 
 /// lsx_vmod_hu
+impl = portable
 name = lsx_vmod_hu
 asm-fmts = vd, vj, vk
 data-types = UV8HI, UV8HI, UV8HI
 
 /// lsx_vmod_wu
+impl = portable
 name = lsx_vmod_wu
 asm-fmts = vd, vj, vk
 data-types = UV4SI, UV4SI, UV4SI
 
 /// lsx_vmod_du
+impl = portable
 name = lsx_vmod_du
 asm-fmts = vd, vj, vk
 data-types = UV2DI, UV2DI, UV2DI
@@ -1399,6 +1575,7 @@ asm-fmts = vd, vj, vk
 data-types = V2DI, V2DI, V2DI, V2DI
 
 /// lsx_vand_v
+impl = portable
 name = lsx_vand_v
 asm-fmts = vd, vj, vk
 data-types = UV16QI, UV16QI, UV16QI
@@ -1409,6 +1586,7 @@ asm-fmts = vd, vj, ui8
 data-types = UV16QI, UV16QI, UQI
 
 /// lsx_vor_v
+impl = portable
 name = lsx_vor_v
 asm-fmts = vd, vj, vk
 data-types = UV16QI, UV16QI, UV16QI
@@ -1419,6 +1597,7 @@ asm-fmts = vd, vj, ui8
 data-types = UV16QI, UV16QI, UQI
 
 /// lsx_vnor_v
+impl = portable
 name = lsx_vnor_v
 asm-fmts = vd, vj, vk
 data-types = UV16QI, UV16QI, UV16QI
@@ -1429,6 +1608,7 @@ asm-fmts = vd, vj, ui8
 data-types = UV16QI, UV16QI, UQI
 
 /// lsx_vxor_v
+impl = portable
 name = lsx_vxor_v
 asm-fmts = vd, vj, vk
 data-types = UV16QI, UV16QI, UV16QI
@@ -1464,41 +1644,49 @@ asm-fmts = vd, vj, ui8
 data-types = V4SI, V4SI, USI
 
 /// lsx_vreplgr2vr_b
+impl = portable
 name = lsx_vreplgr2vr_b
 asm-fmts = vd, rj
 data-types = V16QI, SI
 
 /// lsx_vreplgr2vr_h
+impl = portable
 name = lsx_vreplgr2vr_h
 asm-fmts = vd, rj
 data-types = V8HI, SI
 
 /// lsx_vreplgr2vr_w
+impl = portable
 name = lsx_vreplgr2vr_w
 asm-fmts = vd, rj
 data-types = V4SI, SI
 
 /// lsx_vreplgr2vr_d
+impl = portable
 name = lsx_vreplgr2vr_d
 asm-fmts = vd, rj
 data-types = V2DI, DI
 
 /// lsx_vpcnt_b
+impl = portable
 name = lsx_vpcnt_b
 asm-fmts = vd, vj
 data-types = V16QI, V16QI
 
 /// lsx_vpcnt_h
+impl = portable
 name = lsx_vpcnt_h
 asm-fmts = vd, vj
 data-types = V8HI, V8HI
 
 /// lsx_vpcnt_w
+impl = portable
 name = lsx_vpcnt_w
 asm-fmts = vd, vj
 data-types = V4SI, V4SI
 
 /// lsx_vpcnt_d
+impl = portable
 name = lsx_vpcnt_d
 asm-fmts = vd, vj
 data-types = V2DI, V2DI
@@ -1524,121 +1712,145 @@ asm-fmts = vd, vj
 data-types = V2DI, V2DI
 
 /// lsx_vclz_b
+impl = portable
 name = lsx_vclz_b
 asm-fmts = vd, vj
 data-types = V16QI, V16QI
 
 /// lsx_vclz_h
+impl = portable
 name = lsx_vclz_h
 asm-fmts = vd, vj
 data-types = V8HI, V8HI
 
 /// lsx_vclz_w
+impl = portable
 name = lsx_vclz_w
 asm-fmts = vd, vj
 data-types = V4SI, V4SI
 
 /// lsx_vclz_d
+impl = portable
 name = lsx_vclz_d
 asm-fmts = vd, vj
 data-types = V2DI, V2DI
 
 /// lsx_vpickve2gr_b
+impl = portable
 name = lsx_vpickve2gr_b
 asm-fmts = rd, vj, ui4
 data-types = SI, V16QI, UQI
 
 /// lsx_vpickve2gr_h
+impl = portable
 name = lsx_vpickve2gr_h
 asm-fmts = rd, vj, ui3
 data-types = SI, V8HI, UQI
 
 /// lsx_vpickve2gr_w
+impl = portable
 name = lsx_vpickve2gr_w
 asm-fmts = rd, vj, ui2
 data-types = SI, V4SI, UQI
 
 /// lsx_vpickve2gr_d
+impl = portable
 name = lsx_vpickve2gr_d
 asm-fmts = rd, vj, ui1
 data-types = DI, V2DI, UQI
 
 /// lsx_vpickve2gr_bu
+impl = portable
 name = lsx_vpickve2gr_bu
 asm-fmts = rd, vj, ui4
 data-types = USI, V16QI, UQI
 
 /// lsx_vpickve2gr_hu
+impl = portable
 name = lsx_vpickve2gr_hu
 asm-fmts = rd, vj, ui3
 data-types = USI, V8HI, UQI
 
 /// lsx_vpickve2gr_wu
+impl = portable
 name = lsx_vpickve2gr_wu
 asm-fmts = rd, vj, ui2
 data-types = USI, V4SI, UQI
 
 /// lsx_vpickve2gr_du
+impl = portable
 name = lsx_vpickve2gr_du
 asm-fmts = rd, vj, ui1
 data-types = UDI, V2DI, UQI
 
 /// lsx_vinsgr2vr_b
+impl = portable
 name = lsx_vinsgr2vr_b
 asm-fmts = vd, rj, ui4
 data-types = V16QI, V16QI, SI, UQI
 
 /// lsx_vinsgr2vr_h
+impl = portable
 name = lsx_vinsgr2vr_h
 asm-fmts = vd, rj, ui3
 data-types = V8HI, V8HI, SI, UQI
 
 /// lsx_vinsgr2vr_w
+impl = portable
 name = lsx_vinsgr2vr_w
 asm-fmts = vd, rj, ui2
 data-types = V4SI, V4SI, SI, UQI
 
 /// lsx_vinsgr2vr_d
+impl = portable
 name = lsx_vinsgr2vr_d
 asm-fmts = vd, rj, ui1
 data-types = V2DI, V2DI, DI, UQI
 
 /// lsx_vfadd_s
+impl = portable
 name = lsx_vfadd_s
 asm-fmts = vd, vj, vk
 data-types = V4SF, V4SF, V4SF
 
 /// lsx_vfadd_d
+impl = portable
 name = lsx_vfadd_d
 asm-fmts = vd, vj, vk
 data-types = V2DF, V2DF, V2DF
 
 /// lsx_vfsub_s
+impl = portable
 name = lsx_vfsub_s
 asm-fmts = vd, vj, vk
 data-types = V4SF, V4SF, V4SF
 
 /// lsx_vfsub_d
+impl = portable
 name = lsx_vfsub_d
 asm-fmts = vd, vj, vk
 data-types = V2DF, V2DF, V2DF
 
 /// lsx_vfmul_s
+impl = portable
 name = lsx_vfmul_s
 asm-fmts = vd, vj, vk
 data-types = V4SF, V4SF, V4SF
 
 /// lsx_vfmul_d
+impl = portable
 name = lsx_vfmul_d
 asm-fmts = vd, vj, vk
 data-types = V2DF, V2DF, V2DF
 
 /// lsx_vfdiv_s
+impl = portable
 name = lsx_vfdiv_s
 asm-fmts = vd, vj, vk
 data-types = V4SF, V4SF, V4SF
 
 /// lsx_vfdiv_d
+impl = portable
 name = lsx_vfdiv_d
 asm-fmts = vd, vj, vk
 data-types = V2DF, V2DF, V2DF
@@ -1704,11 +1916,13 @@ asm-fmts = vd, vj
 data-types = V2DI, V2DF
 
 /// lsx_vfsqrt_s
+impl = portable
 name = lsx_vfsqrt_s
 asm-fmts = vd, vj
 data-types = V4SF, V4SF
 
 /// lsx_vfsqrt_d
+impl = portable
 name = lsx_vfsqrt_d
 asm-fmts = vd, vj
 data-types = V2DF, V2DF
@@ -1854,26 +2068,31 @@ asm-fmts = vd, vj
 data-types = V2DF, UV2DI
 
 /// lsx_vandn_v
+impl = portable
 name = lsx_vandn_v
 asm-fmts = vd, vj, vk
 data-types = UV16QI, UV16QI, UV16QI
 
 /// lsx_vneg_b
+impl = portable
 name = lsx_vneg_b
 asm-fmts = vd, vj
 data-types = V16QI, V16QI
 
 /// lsx_vneg_h
+impl = portable
 name = lsx_vneg_h
 asm-fmts = vd, vj
 data-types = V8HI, V8HI
 
 /// lsx_vneg_w
+impl = portable
 name = lsx_vneg_w
 asm-fmts = vd, vj
 data-types = V4SI, V4SI
 
 /// lsx_vneg_d
+impl = portable
 name = lsx_vneg_d
 asm-fmts = vd, vj
 data-types = V2DI, V2DI
@@ -2194,41 +2413,49 @@ asm-fmts = vd, vj, vk
 data-types = V2DI, V2DI, V2DI
 
 /// lsx_vfmadd_s
+impl = portable
 name = lsx_vfmadd_s
 asm-fmts = vd, vj, vk, va
 data-types = V4SF, V4SF, V4SF, V4SF
 
 /// lsx_vfmadd_d
+impl = portable
 name = lsx_vfmadd_d
 asm-fmts = vd, vj, vk, va
 data-types = V2DF, V2DF, V2DF, V2DF
 
 /// lsx_vfmsub_s
+impl = portable
 name = lsx_vfmsub_s
 asm-fmts = vd, vj, vk, va
 data-types = V4SF, V4SF, V4SF, V4SF
 
 /// lsx_vfmsub_d
+impl = portable
 name = lsx_vfmsub_d
 asm-fmts = vd, vj, vk, va
 data-types = V2DF, V2DF, V2DF, V2DF
 
 /// lsx_vfnmadd_s
+impl = portable
 name = lsx_vfnmadd_s
 asm-fmts = vd, vj, vk, va
 data-types = V4SF, V4SF, V4SF, V4SF
 
 /// lsx_vfnmadd_d
+impl = portable
 name = lsx_vfnmadd_d
 asm-fmts = vd, vj, vk, va
 data-types = V2DF, V2DF, V2DF, V2DF
 
 /// lsx_vfnmsub_s
+impl = portable
 name = lsx_vfnmsub_s
 asm-fmts = vd, vj, vk, va
 data-types = V4SF, V4SF, V4SF, V4SF
 
 /// lsx_vfnmsub_d
+impl = portable
 name = lsx_vfnmsub_d
 asm-fmts = vd, vj, vk, va
 data-types = V2DF, V2DF, V2DF, V2DF
@@ -3284,9 +3511,10 @@ asm-fmts = vd, vj, vk
 data-types = V4SI, V2DI, V2DI
 
 /// lsx_vorn_v
+impl = portable
 name = lsx_vorn_v
 asm-fmts = vd, vj, vk
-data-types = V16QI, V16QI, V16QI
+data-types = UV16QI, UV16QI, UV16QI
 
 /// lsx_vldi
 name = lsx_vldi
@@ -3584,21 +3812,25 @@ asm-fmts = vd, vj, vk
 data-types = V4SI, V4SF, V4SF
 
 /// lsx_vrepli_b
+impl = portable
 name = lsx_vrepli_b
 asm-fmts = vd, si10
 data-types = V16QI, HI
 
 /// lsx_vrepli_d
+impl = portable
 name = lsx_vrepli_d
 asm-fmts = vd, si10
 data-types = V2DI, HI
 
 /// lsx_vrepli_h
+impl = portable
 name = lsx_vrepli_h
 asm-fmts = vd, si10
 data-types = V8HI, HI
 
 /// lsx_vrepli_w
+impl = portable
 name = lsx_vrepli_w
 asm-fmts = vd, si10
 data-types = V4SI, HI
diff --git a/crates/stdarch-gen-loongarch/lsxintrin.h b/crates/stdarch-gen-loongarch/lsxintrin.h
index 943f2df913..66b7c7e218 100644
--- a/crates/stdarch-gen-loongarch/lsxintrin.h
+++ b/crates/stdarch-gen-loongarch/lsxintrin.h
@@ -1,10 +1,10 @@
 /*
- * https://gcc.gnu.org/git/?p=gcc.git;a=blob_plain;f=gcc/config/loongarch/lsxintrin.h;hb=61f1001f2f4ab9128e5eb6e9a4adbbb0f9f0bc75
+ * https://gcc.gnu.org/git/?p=gcc.git;a=blob_plain;f=gcc/config/loongarch/lsxintrin.h;hb=6441eb6dc020faae0672ea724dfdb38c6a9bf6a1
  */
 
 /* LARCH Loongson SX intrinsics include file.
 
-   Copyright (C) 2018-2024 Free Software Foundation, Inc.
+   Copyright (C) 2018-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -4749,11 +4749,11 @@ __m128i __lsx_vssrln_w_d (__m128i _1, __m128i _2)
 }
 
 /* Assembly instruction format:	vd, vj, vk.  */
-/* Data types in instruction templates:  V16QI, V16QI, V16QI.  */
+/* Data types in instruction templates:  UV16QI, UV16QI, UV16QI.  */
 extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 __m128i __lsx_vorn_v (__m128i _1, __m128i _2)
 {
-  return (__m128i)__builtin_lsx_vorn_v ((v16i8)_1, (v16i8)_2);
+  return (__m128i)__builtin_lsx_vorn_v ((v16u8)_1, (v16u8)_2);
 }
 
 /* Assembly instruction format:	vd, i13.  */
diff --git a/crates/stdarch-gen-loongarch/src/main.rs b/crates/stdarch-gen-loongarch/src/main.rs
index 5076064ffc..8c4d4e13ec 100644
--- a/crates/stdarch-gen-loongarch/src/main.rs
+++ b/crates/stdarch-gen-loongarch/src/main.rs
@@ -1,3 +1,4 @@
+use std::collections::HashSet;
 use std::env;
 use std::fmt;
 use std::fs::File;
@@ -90,6 +91,14 @@ impl TargetFeature {
     }
 }
 
+fn portable_intrinsics() -> HashSet<&'static str> {
+    include_str!("portable-intrinsics.txt")
+        .lines()
+        .map(str::trim)
+        .filter(|line| !line.is_empty() && !line.starts_with('#'))
+        .collect()
+}
+
 fn gen_spec(in_file: String, ext_name: &str) -> io::Result<()> {
     let f = File::open(in_file.clone()).unwrap_or_else(|_| panic!("Failed to open {in_file}"));
     let f = BufReader::new(f);
@@ -105,6 +114,7 @@ fn gen_spec(in_file: String, ext_name: &str) -> io::Result<()> {
     let mut asm_fmts = String::new();
     let mut data_types = String::new();
     let fn_pat = format!("__{ext_name}_");
+    let portable_intrinsics = portable_intrinsics();
     for line in f.lines() {
         let line = line.unwrap();
         if line.is_empty() {
@@ -121,6 +131,9 @@ fn gen_spec(in_file: String, ext_name: &str) -> io::Result<()> {
             let e = line.find('(').unwrap();
             let name = line.get(s + 2..e).unwrap().trim().to_string();
             out.push_str(&format!("/// {name}\n"));
+            if portable_intrinsics.contains(name.as_str()) {
+                out.push_str("impl = portable\n");
+            }
             out.push_str(&format!("name = {name}\n"));
             out.push_str(&format!("asm-fmts = {asm_fmts}\n"));
             out.push_str(&format!("data-types = {data_types}\n"));
@@ -146,6 +159,7 @@ fn gen_bind(in_file: String, ext_name: &str) -> io::Result<()> {
     let mut link_function_str = String::new();
     let mut function_str = String::new();
     let mut out = String::new();
+    let mut skip = false;
 
     out.push_str(&format!(
         r#"// This code is automatically generated. DO NOT MODIFY.
@@ -157,7 +171,7 @@ fn gen_bind(in_file: String, ext_name: &str) -> io::Result<()> {
 // ```
 
 use crate::mem::transmute;
-use super::types::*;
+use super::super::*;
 "#
     ));
 
@@ -173,7 +187,9 @@ unsafe extern "unadjusted" {
         if line.is_empty() {
             continue;
         }
-        if let Some(name) = line.strip_prefix("name = ") {
+        if line.starts_with("impl = portable") {
+            skip = true;
+        } else if let Some(name) = line.strip_prefix("name = ") {
             current_name = Some(String::from(name));
         } else if line.starts_with("asm-fmts = ") {
             asm_fmts = line[10..]
@@ -210,6 +226,11 @@ unsafe extern "unadjusted" {
                 panic!("DEBUG: line: {0} len: {1}", line, data_types.len());
             }
 
+            if skip {
+                skip = false;
+                continue;
+            }
+
             let (link_function, function) =
                 gen_bind_body(&current_name, &asm_fmts, &in_t, out_t, para_num, target);
             link_function_str.push_str(&link_function);
@@ -571,21 +592,21 @@ fn gen_bind_body(
     } else if para_num == 3 && in_t[1] == "CVPOINTER" && in_t[2] == "SI" {
         call_params = match asm_fmts[2].as_str() {
             "si12" => format!(
-                "static_assert_simm_bits!(IMM_S12, 12);\n    {unsafe_start}transmute(__{current_name}(transmute(a), mem_addr, IMM_S12)){unsafe_end}"
+                "static_assert_simm_bits!(IMM_S12, 12);\n    {unsafe_start}__{current_name}(transmute(a), mem_addr, IMM_S12){unsafe_end}"
             ),
             _ => panic!("unsupported assembly format: {}", asm_fmts[2]),
         };
     } else if para_num == 3 && in_t[1] == "CVPOINTER" && in_t[2] == "DI" {
         call_params = match asm_fmts[2].as_str() {
             "rk" => format!(
-                "{unsafe_start}transmute(__{current_name}(transmute(a), mem_addr, transmute(b))){unsafe_end}"
+                "{unsafe_start}__{current_name}(transmute(a), mem_addr, transmute(b)){unsafe_end}"
             ),
             _ => panic!("unsupported assembly format: {}", asm_fmts[2]),
         };
     } else if para_num == 4 {
         call_params = match (asm_fmts[2].as_str(), current_name.chars().last().unwrap()) {
             ("si8", t) => format!(
-                "static_assert_simm_bits!(IMM_S8, 8);\n    static_assert_uimm_bits!(IMM{0}, {0});\n    {unsafe_start}transmute(__{current_name}(transmute(a), mem_addr, IMM_S8, IMM{0})){unsafe_end}",
+                "static_assert_simm_bits!(IMM_S8, 8);\n    static_assert_uimm_bits!(IMM{0}, {0});\n    {unsafe_start}__{current_name}(transmute(a), mem_addr, IMM_S8, IMM{0}){unsafe_end}",
                 type_to_imm(t)
             ),
             (_, _) => panic!(
@@ -847,6 +868,7 @@ union v4df
     out.push_str("    printf(\"    core_arch::{loongarch64::*, simd::*},\\n\");\n");
     out.push_str("    printf(\"    mem::transmute,\\n\");\n");
     out.push_str("    printf(\"};\\n\");\n");
+    out.push_str("    printf(\"use std::hint::black_box;\\n\");\n");
     out.push_str("    printf(\"use stdarch_test::simd_test;\\n\");\n");
     out.push_str(&call_function_str);
     out.push_str("    return 0;\n");
@@ -1323,10 +1345,10 @@ fn gen_test_body(
             _ => "unsupported parameter number".to_string(),
         };
         let mut as_params = match para_num {
-            1 => "(transmute(a))".to_string(),
-            2 => "(transmute(a), transmute(b))".to_string(),
-            3 => "(transmute(a), transmute(b), transmute(c))".to_string(),
-            4 => "(transmute(a), transmute(b), transmute(c), transmute(d))".to_string(),
+            1 => "(black_box(transmute(a)))".to_string(),
+            2 => "(black_box(transmute(a)), black_box(transmute(b)))".to_string(),
+            3 => "(black_box(transmute(a)), black_box(transmute(b)), black_box(transmute(c)))".to_string(),
+            4 => "(black_box(transmute(a)), black_box(transmute(b)), black_box(transmute(c)), black_box(transmute(d)))".to_string(),
             _ => panic!("unsupported parameter number"),
         };
         let mut as_args = String::new();
@@ -1356,9 +1378,9 @@ fn gen_test_body(
         {
             fn_params = "(a)".to_string();
             if in_t[0] == "SI" {
-                as_params = "(%d)".to_string();
+                as_params = "(black_box(%d))".to_string();
             } else {
-                as_params = "(%ld)".to_string();
+                as_params = "(black_box(%ld))".to_string();
             }
             as_args = ", a".to_string();
         } else if para_num == 2 && (in_t[1] == "UQI" || in_t[1] == "USI") {
@@ -1370,7 +1392,7 @@ fn gen_test_body(
                 );
                 let val = rand_u32(asm_fmts[2].get(2..).unwrap().parse::<u8>().unwrap());
                 fn_params = format!("(a.v, {val})");
-                as_params = format!("::<{val}>(transmute(a))");
+                as_params = format!("::<{val}>(black_box(transmute(a)))");
             } else {
                 panic!("unsupported assembly format: {}", asm_fmts[2]);
             }
@@ -1383,13 +1405,13 @@ fn gen_test_body(
                 );
                 let val = rand_i32(asm_fmts[2].get(2..).unwrap().parse::<u8>().unwrap());
                 fn_params = format!("(a.v, {val})");
-                as_params = format!("::<{val}>(transmute(a))");
+                as_params = format!("::<{val}>(black_box(transmute(a)))");
             } else {
                 panic!("unsupported assembly format: {}", asm_fmts[2]);
             }
         } else if para_num == 2 && in_t[1] == "SI" && asm_fmts[2].starts_with("rk") {
             fn_params = "(a.v, b)".to_string();
-            as_params = "(transmute(a), %d)".to_string();
+            as_params = "(black_box(transmute(a)), %d)".to_string();
             as_args = ", b".to_string();
         } else if para_num == 2 && in_t[0] == "CVPOINTER" && in_t[1] == "SI" {
             if asm_fmts[2].starts_with("si") {
@@ -1441,7 +1463,7 @@ fn gen_test_body(
                 let ival = rand_i32(32);
                 let uval = rand_u32(asm_fmts[2].get(2..).unwrap().parse::<u8>().unwrap());
                 fn_params = format!("(a.v, {ival}, {uval})");
-                as_params = format!("::<{uval}>(transmute(a), {ival})");
+                as_params = format!("::<{uval}>(black_box(transmute(a)), {ival})");
             } else {
                 panic!("unsupported assembly format: {}", asm_fmts[2]);
             }
@@ -1456,7 +1478,7 @@ fn gen_test_body(
                 );
                 let val = rand_u32(asm_fmts[2].get(2..).unwrap().parse::<u8>().unwrap());
                 fn_params = format!("(a.v, b.v, {val})");
-                as_params = format!("::<{val}>(transmute(a), transmute(b))");
+                as_params = format!("::<{val}>(black_box(transmute(a)), black_box(transmute(b)))");
             } else {
                 panic!("unsupported assembly format: {}", asm_fmts[2]);
             }
@@ -1478,7 +1500,7 @@ fn gen_test_body(
                     type_to_ct(in_t[1])
                 );
                 fn_params = "(a.v, b, 0)".to_string();
-                as_params = "::<0>(transmute(a), o.as_mut_ptr())".to_string();
+                as_params = "::<0>(black_box(transmute(a)), o.as_mut_ptr())".to_string();
             } else {
                 panic!("unsupported assembly format: {}", asm_fmts[2]);
             }
@@ -1500,7 +1522,7 @@ fn gen_test_body(
                     type_to_ct(in_t[1])
                 );
                 fn_params = "(a.v, b, 0)".to_string();
-                as_params = "(transmute(a), o.as_mut_ptr(), 0)".to_string();
+                as_params = "(black_box(transmute(a)), o.as_mut_ptr(), 0)".to_string();
             } else {
                 panic!("unsupported assembly format: {}", asm_fmts[2]);
             }
@@ -1524,7 +1546,7 @@ fn gen_test_body(
                     );
                     let val = rand_u32(type_to_imm(t).try_into().unwrap());
                     fn_params = format!("(a.v, b, 0, {val})");
-                    as_params = format!("::<0, {val}>(transmute(a), o.as_mut_ptr())");
+                    as_params = format!("::<0, {val}>(black_box(transmute(a)), o.as_mut_ptr())");
                 }
                 (_, _) => panic!(
                     "unsupported assembly format: {} for {}",
@@ -1551,6 +1573,10 @@ fn gen_test_body(
                 format!(
                     "    printf(\"\\n    {current_name}{as_params};\\n    assert_eq!(r, transmute(o));\\n\"{as_args});"
                 )
+            } else if current_name.starts_with("lasx_cast_128") {
+                format!(
+                    "    printf(\"\\n    assert_eq!(r.as_array()[0..2], transmute::<_, i64x4>({current_name}{as_params}).as_array()[0..2]);\\n\"{as_args});"
+                )
             } else {
                 format!(
                     "    printf(\"\\n    assert_eq!(r, transmute({current_name}{as_params}));\\n\"{as_args});"
diff --git a/crates/stdarch-gen-loongarch/src/portable-intrinsics.txt b/crates/stdarch-gen-loongarch/src/portable-intrinsics.txt
new file mode 100644
index 0000000000..e9b7b04f27
--- /dev/null
+++ b/crates/stdarch-gen-loongarch/src/portable-intrinsics.txt
@@ -0,0 +1,461 @@
+# LSX intrinsics
+lsx_vsll_b
+lsx_vsll_h
+lsx_vsll_w
+lsx_vsll_d
+lsx_vslli_b
+lsx_vslli_h
+lsx_vslli_w
+lsx_vslli_d
+lsx_vsra_b
+lsx_vsra_h
+lsx_vsra_w
+lsx_vsra_d
+lsx_vsrai_b
+lsx_vsrai_h
+lsx_vsrai_w
+lsx_vsrai_d
+lsx_vsrl_b
+lsx_vsrl_h
+lsx_vsrl_w
+lsx_vsrl_d
+lsx_vsrli_b
+lsx_vsrli_h
+lsx_vsrli_w
+lsx_vsrli_d
+lsx_vadd_b
+lsx_vadd_h
+lsx_vadd_w
+lsx_vadd_d
+lsx_vaddi_bu
+lsx_vaddi_hu
+lsx_vaddi_wu
+lsx_vaddi_du
+lsx_vsub_b
+lsx_vsub_h
+lsx_vsub_w
+lsx_vsub_d
+lsx_vmax_b
+lsx_vmax_h
+lsx_vmax_w
+lsx_vmax_d
+lsx_vmaxi_b
+lsx_vmaxi_h
+lsx_vmaxi_w
+lsx_vmaxi_d
+lsx_vmax_bu
+lsx_vmax_hu
+lsx_vmax_wu
+lsx_vmax_du
+lsx_vmaxi_bu
+lsx_vmaxi_hu
+lsx_vmaxi_wu
+lsx_vmaxi_du
+lsx_vmin_b
+lsx_vmin_h
+lsx_vmin_w
+lsx_vmin_d
+lsx_vmini_b
+lsx_vmini_h
+lsx_vmini_w
+lsx_vmini_d
+lsx_vmin_bu
+lsx_vmin_hu
+lsx_vmin_wu
+lsx_vmin_du
+lsx_vmini_bu
+lsx_vmini_hu
+lsx_vmini_wu
+lsx_vmini_du
+lsx_vseq_b
+lsx_vseq_h
+lsx_vseq_w
+lsx_vseq_d
+lsx_vseqi_b
+lsx_vseqi_h
+lsx_vseqi_w
+lsx_vseqi_d
+lsx_vslt_b
+lsx_vslt_h
+lsx_vslt_w
+lsx_vslt_d
+lsx_vslti_b
+lsx_vslti_h
+lsx_vslti_w
+lsx_vslti_d
+lsx_vslt_bu
+lsx_vslt_hu
+lsx_vslt_wu
+lsx_vslt_du
+lsx_vslti_bu
+lsx_vslti_hu
+lsx_vslti_wu
+lsx_vslti_du
+lsx_vsle_b
+lsx_vsle_h
+lsx_vsle_w
+lsx_vsle_d
+lsx_vslei_b
+lsx_vslei_h
+lsx_vslei_w
+lsx_vslei_d
+lsx_vsle_bu
+lsx_vsle_hu
+lsx_vsle_wu
+lsx_vsle_du
+lsx_vslei_bu
+lsx_vslei_hu
+lsx_vslei_wu
+lsx_vslei_du
+lsx_vmul_b
+lsx_vmul_h
+lsx_vmul_w
+lsx_vmul_d
+lsx_vdiv_b
+lsx_vdiv_h
+lsx_vdiv_w
+lsx_vdiv_d
+lsx_vdiv_bu
+lsx_vdiv_hu
+lsx_vdiv_wu
+lsx_vdiv_du
+lsx_vmod_b
+lsx_vmod_h
+lsx_vmod_w
+lsx_vmod_d
+lsx_vmod_bu
+lsx_vmod_hu
+lsx_vmod_wu
+lsx_vmod_du
+lsx_vmadd_b
+lsx_vmadd_h
+lsx_vmadd_w
+lsx_vmadd_d
+lsx_vmsub_b
+lsx_vmsub_h
+lsx_vmsub_w
+lsx_vmsub_d
+lsx_vand_v
+lsx_vor_v
+lsx_vnor_v
+lsx_vxor_v
+lsx_vpcnt_b
+lsx_vpcnt_h
+lsx_vpcnt_w
+lsx_vpcnt_d
+lsx_vclz_b
+lsx_vclz_h
+lsx_vclz_w
+lsx_vclz_d
+lsx_vreplgr2vr_b
+lsx_vreplgr2vr_h
+lsx_vreplgr2vr_w
+lsx_vreplgr2vr_d
+lsx_vpickve2gr_b
+lsx_vpickve2gr_h
+lsx_vpickve2gr_w
+lsx_vpickve2gr_d
+lsx_vpickve2gr_bu
+lsx_vpickve2gr_hu
+lsx_vpickve2gr_wu
+lsx_vpickve2gr_du
+lsx_vinsgr2vr_b
+lsx_vinsgr2vr_h
+lsx_vinsgr2vr_w
+lsx_vinsgr2vr_d
+lsx_vfadd_s
+lsx_vfadd_d
+lsx_vfsub_s
+lsx_vfsub_d
+lsx_vfmul_s
+lsx_vfmul_d
+lsx_vfdiv_s
+lsx_vfdiv_d
+lsx_vfsqrt_s
+lsx_vfsqrt_d
+lsx_vandn_v
+lsx_vneg_b
+lsx_vneg_h
+lsx_vneg_w
+lsx_vneg_d
+lsx_vfmadd_s
+lsx_vfmadd_d
+lsx_vfmsub_s
+lsx_vfmsub_d
+lsx_vfnmadd_s
+lsx_vfnmadd_d
+lsx_vfnmsub_s
+lsx_vfnmsub_d
+lsx_vorn_v
+lsx_vrepli_b
+lsx_vrepli_h
+lsx_vrepli_w
+lsx_vrepli_d
+lsx_vbitclr_b
+lsx_vbitclr_h
+lsx_vbitclr_w
+lsx_vbitclr_d
+lsx_vbitset_b
+lsx_vbitset_h
+lsx_vbitset_w
+lsx_vbitset_d
+lsx_vbitrev_b
+lsx_vbitrev_h
+lsx_vbitrev_w
+lsx_vbitrev_d
+lsx_vsadd_b
+lsx_vsadd_h
+lsx_vsadd_w
+lsx_vsadd_d
+lsx_vsadd_bu
+lsx_vsadd_hu
+lsx_vsadd_wu
+lsx_vsadd_du
+lsx_vssub_b
+lsx_vssub_h
+lsx_vssub_w
+lsx_vssub_d
+lsx_vssub_bu
+lsx_vssub_hu
+lsx_vssub_wu
+lsx_vssub_du
+lsx_vadda_b
+lsx_vadda_h
+lsx_vadda_w
+lsx_vadda_d
+lsx_vabsd_b
+lsx_vabsd_h
+lsx_vabsd_w
+lsx_vabsd_d
+lsx_vabsd_bu
+lsx_vabsd_hu
+lsx_vabsd_wu
+lsx_vabsd_du
+
+# LASX intrinsics
+lasx_xvsll_b
+lasx_xvsll_h
+lasx_xvsll_w
+lasx_xvsll_d
+lasx_xvslli_b
+lasx_xvslli_h
+lasx_xvslli_w
+lasx_xvslli_d
+lasx_xvsra_b
+lasx_xvsra_h
+lasx_xvsra_w
+lasx_xvsra_d
+lasx_xvsrai_b
+lasx_xvsrai_h
+lasx_xvsrai_w
+lasx_xvsrai_d
+lasx_xvsrl_b
+lasx_xvsrl_h
+lasx_xvsrl_w
+lasx_xvsrl_d
+lasx_xvsrli_b
+lasx_xvsrli_h
+lasx_xvsrli_w
+lasx_xvsrli_d
+lasx_xvadd_b
+lasx_xvadd_h
+lasx_xvadd_w
+lasx_xvadd_d
+lasx_xvaddi_bu
+lasx_xvaddi_hu
+lasx_xvaddi_wu
+lasx_xvaddi_du
+lasx_xvsub_b
+lasx_xvsub_h
+lasx_xvsub_w
+lasx_xvsub_d
+lasx_xvmax_b
+lasx_xvmax_h
+lasx_xvmax_w
+lasx_xvmax_d
+lasx_xvmaxi_b
+lasx_xvmaxi_h
+lasx_xvmaxi_w
+lasx_xvmaxi_d
+lasx_xvmax_bu
+lasx_xvmax_hu
+lasx_xvmax_wu
+lasx_xvmax_du
+lasx_xvmaxi_bu
+lasx_xvmaxi_hu
+lasx_xvmaxi_wu
+lasx_xvmaxi_du
+lasx_xvmin_b
+lasx_xvmin_h
+lasx_xvmin_w
+lasx_xvmin_d
+lasx_xvmini_b
+lasx_xvmini_h
+lasx_xvmini_w
+lasx_xvmini_d
+lasx_xvmin_bu
+lasx_xvmin_hu
+lasx_xvmin_wu
+lasx_xvmin_du
+lasx_xvmini_bu
+lasx_xvmini_hu
+lasx_xvmini_wu
+lasx_xvmini_du
+lasx_xvseq_b
+lasx_xvseq_h
+lasx_xvseq_w
+lasx_xvseq_d
+lasx_xvseqi_b
+lasx_xvseqi_h
+lasx_xvseqi_w
+lasx_xvseqi_d
+lasx_xvslt_b
+lasx_xvslt_h
+lasx_xvslt_w
+lasx_xvslt_d
+lasx_xvslti_b
+lasx_xvslti_h
+lasx_xvslti_w
+lasx_xvslti_d
+lasx_xvslt_bu
+lasx_xvslt_hu
+lasx_xvslt_wu
+lasx_xvslt_du
+lasx_xvslti_bu
+lasx_xvslti_hu
+lasx_xvslti_wu
+lasx_xvslti_du
+lasx_xvsle_b
+lasx_xvsle_h
+lasx_xvsle_w
+lasx_xvsle_d
+lasx_xvslei_b
+lasx_xvslei_h
+lasx_xvslei_w
+lasx_xvslei_d
+lasx_xvsle_bu
+lasx_xvsle_hu
+lasx_xvsle_wu
+lasx_xvsle_du
+lasx_xvslei_bu
+lasx_xvslei_hu
+lasx_xvslei_wu
+lasx_xvslei_du
+lasx_xvmul_b
+lasx_xvmul_h
+lasx_xvmul_w
+lasx_xvmul_d
+lasx_xvdiv_b
+lasx_xvdiv_h
+lasx_xvdiv_w
+lasx_xvdiv_d
+lasx_xvdiv_bu
+lasx_xvdiv_hu
+lasx_xvdiv_wu
+lasx_xvdiv_du
+lasx_xvmod_b
+lasx_xvmod_h
+lasx_xvmod_w
+lasx_xvmod_d
+lasx_xvmod_bu
+lasx_xvmod_hu
+lasx_xvmod_wu
+lasx_xvmod_du
+lasx_xvmadd_b
+lasx_xvmadd_h
+lasx_xvmadd_w
+lasx_xvmadd_d
+lasx_xvmsub_b
+lasx_xvmsub_h
+lasx_xvmsub_w
+lasx_xvmsub_d
+lasx_xvand_v
+lasx_xvor_v
+lasx_xvnor_v
+lasx_xvxor_v
+lasx_xvpcnt_b
+lasx_xvpcnt_h
+lasx_xvpcnt_w
+lasx_xvpcnt_d
+lasx_xvclz_b
+lasx_xvclz_h
+lasx_xvclz_w
+lasx_xvclz_d
+lasx_xvreplgr2vr_b
+lasx_xvreplgr2vr_h
+lasx_xvreplgr2vr_w
+lasx_xvreplgr2vr_d
+lasx_xvpickve2gr_w
+lasx_xvpickve2gr_d
+lasx_xvpickve2gr_wu
+lasx_xvpickve2gr_du
+lasx_xvinsgr2vr_w
+lasx_xvinsgr2vr_d
+lasx_xvfadd_s
+lasx_xvfadd_d
+lasx_xvfsub_s
+lasx_xvfsub_d
+lasx_xvfmul_s
+lasx_xvfmul_d
+lasx_xvfdiv_s
+lasx_xvfdiv_d
+lasx_xvfsqrt_s
+lasx_xvfsqrt_d
+lasx_xvandn_v
+lasx_xvneg_b
+lasx_xvneg_h
+lasx_xvneg_w
+lasx_xvneg_d
+lasx_xvfmadd_s
+lasx_xvfmadd_d
+lasx_xvfmsub_s
+lasx_xvfmsub_d
+lasx_xvfnmadd_s
+lasx_xvfnmadd_d
+lasx_xvfnmsub_s
+lasx_xvfnmsub_d
+lasx_xvorn_v
+lasx_xvrepli_b
+lasx_xvrepli_h
+lasx_xvrepli_w
+lasx_xvrepli_d
+lasx_xvbitclr_b
+lasx_xvbitclr_h
+lasx_xvbitclr_w
+lasx_xvbitclr_d
+lasx_xvbitset_b
+lasx_xvbitset_h
+lasx_xvbitset_w
+lasx_xvbitset_d
+lasx_xvbitrev_b
+lasx_xvbitrev_h
+lasx_xvbitrev_w
+lasx_xvbitrev_d
+lasx_xvsadd_b
+lasx_xvsadd_h
+lasx_xvsadd_w
+lasx_xvsadd_d
+lasx_xvsadd_bu
+lasx_xvsadd_hu
+lasx_xvsadd_wu
+lasx_xvsadd_du
+lasx_xvssub_b
+lasx_xvssub_h
+lasx_xvssub_w
+lasx_xvssub_d
+lasx_xvssub_bu
+lasx_xvssub_hu
+lasx_xvssub_wu
+lasx_xvssub_du
+lasx_xvadda_b
+lasx_xvadda_h
+lasx_xvadda_w
+lasx_xvadda_d
+lasx_xvabsd_b
+lasx_xvabsd_h
+lasx_xvabsd_w
+lasx_xvabsd_d
+lasx_xvabsd_bu
+lasx_xvabsd_hu
+lasx_xvabsd_wu
+lasx_xvabsd_du
diff --git a/crates/stdarch-test/src/disassembly.rs b/crates/stdarch-test/src/disassembly.rs
index 4c136cff02..7cf657baa0 100644
--- a/crates/stdarch-test/src/disassembly.rs
+++ b/crates/stdarch-test/src/disassembly.rs
@@ -78,7 +78,7 @@ pub(crate) fn disassemble_myself() -> HashSet<Function> {
     let objdump = env::var("OBJDUMP").unwrap_or_else(|_| "objdump".to_string());
     let add_args = if cfg!(target_vendor = "apple") && cfg!(target_arch = "aarch64") {
         // Target features need to be enabled for LLVM objdump on Darwin ARM64
-        vec!["--mattr=+v8.6a,+crypto,+tme"]
+        vec!["--mattr=+v8.6a,+crypto"]
     } else if cfg!(any(target_arch = "riscv32", target_arch = "riscv64")) {
         vec!["--mattr=+zk,+zks,+zbc,+zbb"]
     } else {
@@ -158,16 +158,26 @@ fn parse(output: &str) -> HashSet<Function> {
             };
 
             if cfg!(any(target_arch = "aarch64", target_arch = "arm64ec")) {
-                // Normalize [us]shll.* ..., #0 instructions to the preferred form: [us]xtl.* ...
-                // as neither LLVM objdump nor dumpbin does that.
-                // See https://developer.arm.com/documentation/ddi0602/latest/SIMD-FP-Instructions/UXTL--UXTL2--Unsigned-extend-Long--an-alias-of-USHLL--USHLL2-
-                // and https://developer.arm.com/documentation/ddi0602/latest/SIMD-FP-Instructions/SXTL--SXTL2--Signed-extend-Long--an-alias-of-SSHLL--SSHLL2-
-                // for details.
+                // Normalize `[us]shll{2}.* ..., #0` instructions to the preferred
+                // form: `[us]xtl{2}.* ...` as neither LLVM objdump nor dumpbin does that.
+                //
+                // SVE has `[us]shll[tb]` instructions that don't have an equivalent alias.
+                //
+                // See Arm documentation for details:
+                //
+                // - https://developer.arm.com/documentation/ddi0602/2026-03/SIMD-FP-Instructions/UXTL--UXTL2--Unsigned-extend-long--an-alias-of-USHLL--USHLL2-?lang=en
+                // - https://developer.arm.com/documentation/ddi0602/2026-03/SIMD-FP-Instructions/SXTL--SXTL2--Signed-extend-long--an-alias-of-SSHLL--SSHLL2-?lang=en
                 fn is_shll(instr: &str) -> bool {
                     if cfg!(target_env = "msvc") {
-                        instr.starts_with("ushll") || instr.starts_with("sshll")
+                        instr == "ushll"
+                            || instr == "ushll2"
+                            || instr == "sshll"
+                            || instr == "sshll2"
                     } else {
-                        instr.starts_with("ushll.") || instr.starts_with("sshll.")
+                        instr == "ushll."
+                            || instr == "ushll2."
+                            || instr == "sshll."
+                            || instr == "sshll2."
                     }
                 }
                 match (parts.first(), parts.last()) {
diff --git a/crates/stdarch-verify/src/lib.rs b/crates/stdarch-verify/src/lib.rs
index c81f5f45bc..f7304ab326 100644
--- a/crates/stdarch-verify/src/lib.rs
+++ b/crates/stdarch-verify/src/lib.rs
@@ -120,6 +120,13 @@ fn functions(input: TokenStream, dirs: &[&str]) -> TokenStream {
                 );
             }
 
+            // Newer intrinsics don't have `rustc_legacy_const_generics` - assume they belong at
+            // the end of the argument list
+            if required_const.is_empty() && legacy_const_generics.is_empty() {
+                legacy_const_generics =
+                    (arguments.len()..(arguments.len() + const_arguments.len())).collect();
+            }
+
             // The list of required consts, used to verify the arguments, comes from either the
             // `rustc_args_required_const` or the `rustc_legacy_const_generics` attribute.
             let required_const = if required_const.is_empty() {
@@ -136,14 +143,14 @@ fn functions(input: TokenStream, dirs: &[&str]) -> TokenStream {
                 arguments.insert(idx, ty);
             }
 
-            // strip leading underscore from fn name when building a test
-            // _mm_foo -> mm_foo such that the test name is test_mm_foo.
-            let test_name_string = format!("{name}");
-            let mut test_name_id = test_name_string.as_str();
-            while test_name_id.starts_with('_') {
-                test_name_id = &test_name_id[1..];
-            }
-            let has_test = tests.contains(&format!("test_{test_name_id}"));
+            // Strip leading underscore from fn name when building a test
+            // `_mm_foo` -> `mm_foo` such that the test name is `test_mm_foo`.
+            let test_name = name.to_string();
+            let test_name = test_name.trim_start_matches('_');
+            let has_test = tests.contains(&format!("test_{test_name}"))
+                // SVE load/store tests start with `test` or `_with_`
+                || tests.iter().any(|t| t.starts_with(&format!("test_{test_name}"))
+                                        || t.ends_with(&format!("_with_{test_name}")));
 
             let doc = find_doc(&f.attrs);
 
@@ -347,6 +354,50 @@ fn to_type(t: &syn::Type) -> proc_macro2::TokenStream {
             "v4f32" => quote! { &v4f32 },
             "v2f64" => quote! { &v2f64 },
 
+            "svbool_t" => quote! { &SVBOOL },
+            "svint8_t" => quote! { &SVI8 },
+            "svint8x2_t" => quote! { &SVI8X2 },
+            "svint8x3_t" => quote! { &SVI8X3 },
+            "svint8x4_t" => quote! { &SVI8X4 },
+            "svint16_t" => quote! { &SVI16 },
+            "svint16x2_t" => quote! { &SVI16X2 },
+            "svint16x3_t" => quote! { &SVI16X3 },
+            "svint16x4_t" => quote! { &SVI16X4 },
+            "svint32_t" => quote! { &SVI32 },
+            "svint32x2_t" => quote! { &SVI32X2 },
+            "svint32x3_t" => quote! { &SVI32X3 },
+            "svint32x4_t" => quote! { &SVI32X4 },
+            "svint64_t" => quote! { &SVI64 },
+            "svint64x2_t" => quote! { &SVI64X2 },
+            "svint64x3_t" => quote! { &SVI64X3 },
+            "svint64x4_t" => quote! { &SVI64X4 },
+            "svuint8_t" => quote! { &SVU8 },
+            "svuint8x2_t" => quote! { &SVU8X2 },
+            "svuint8x3_t" => quote! { &SVU8X3 },
+            "svuint8x4_t" => quote! { &SVU8X4 },
+            "svuint16_t" => quote! { &SVU16 },
+            "svuint16x2_t" => quote! { &SVU16X2 },
+            "svuint16x3_t" => quote! { &SVU16X3 },
+            "svuint16x4_t" => quote! { &SVU16X4 },
+            "svuint32_t" => quote! { &SVU32 },
+            "svuint32x2_t" => quote! { &SVU32X2 },
+            "svuint32x3_t" => quote! { &SVU32X3 },
+            "svuint32x4_t" => quote! { &SVU32X4 },
+            "svuint64_t" => quote! { &SVU64 },
+            "svuint64x2_t" => quote! { &SVU64X2 },
+            "svuint64x3_t" => quote! { &SVU64X3 },
+            "svuint64x4_t" => quote! { &SVU64X4 },
+            "svfloat32_t" => quote! { &SVF32 },
+            "svfloat32x2_t" => quote! { &SVF32X2 },
+            "svfloat32x3_t" => quote! { &SVF32X3 },
+            "svfloat32x4_t" => quote! { &SVF32X4 },
+            "svfloat64_t" => quote! { &SVF64 },
+            "svfloat64x2_t" => quote! { &SVF64X2 },
+            "svfloat64x3_t" => quote! { &SVF64X3 },
+            "svfloat64x4_t" => quote! { &SVF64X4 },
+            "svprfop" => quote! { &SVPRFOP },
+            "svpattern" => quote! { &SVPATTERN },
+
             // Generic types
             "T" => quote! { &GENERICT },
             "U" => quote! { &GENERICU },
diff --git a/crates/stdarch-verify/tests/arm.rs b/crates/stdarch-verify/tests/arm.rs
index a35b8175fb..6ba9c17c48 100644
--- a/crates/stdarch-verify/tests/arm.rs
+++ b/crates/stdarch-verify/tests/arm.rs
@@ -16,6 +16,7 @@ struct Function {
     doc: &'static str,
 }
 
+static BOOL: Type = Type::PrimBool;
 static F16: Type = Type::PrimFloat(16);
 static F32: Type = Type::PrimFloat(32);
 static F64: Type = Type::PrimFloat(64);
@@ -28,6 +29,7 @@ static U32: Type = Type::PrimUnsigned(32);
 static U64: Type = Type::PrimUnsigned(64);
 static U8: Type = Type::PrimUnsigned(8);
 static NEVER: Type = Type::Never;
+static VOID: Type = Type::Void;
 static GENERICT: Type = Type::GenericParam("T");
 static GENERICU: Type = Type::GenericParam("U");
 
@@ -151,19 +153,78 @@ static U8X8X2: Type = Type::U(8, 8, 2);
 static U8X8X3: Type = Type::U(8, 8, 3);
 static U8X8X4: Type = Type::U(8, 8, 4);
 
+static SVBOOL: Type = Type::Pred(1);
+static SVBOOLX2: Type = Type::Pred(2);
+static SVBOOLX3: Type = Type::Pred(3);
+static SVBOOLX4: Type = Type::Pred(4);
+static SVCOUNT: Type = Type::Pred(1);
+static SVF16: Type = Type::SVF(16, 1);
+static SVF16X2: Type = Type::SVF(16, 2);
+static SVF16X3: Type = Type::SVF(16, 3);
+static SVF16X4: Type = Type::SVF(16, 4);
+static SVF32: Type = Type::SVF(32, 1);
+static SVF32X2: Type = Type::SVF(32, 2);
+static SVF32X3: Type = Type::SVF(32, 3);
+static SVF32X4: Type = Type::SVF(32, 4);
+static SVF64: Type = Type::SVF(64, 1);
+static SVF64X2: Type = Type::SVF(64, 2);
+static SVF64X3: Type = Type::SVF(64, 3);
+static SVF64X4: Type = Type::SVF(64, 4);
+static SVI8: Type = Type::SVI(8, 1);
+static SVI8X2: Type = Type::SVI(8, 2);
+static SVI8X3: Type = Type::SVI(8, 3);
+static SVI8X4: Type = Type::SVI(8, 4);
+static SVI16: Type = Type::SVI(16, 1);
+static SVI16X2: Type = Type::SVI(16, 2);
+static SVI16X3: Type = Type::SVI(16, 3);
+static SVI16X4: Type = Type::SVI(16, 4);
+static SVI32: Type = Type::SVI(32, 1);
+static SVI32X2: Type = Type::SVI(32, 2);
+static SVI32X3: Type = Type::SVI(32, 3);
+static SVI32X4: Type = Type::SVI(32, 4);
+static SVI64: Type = Type::SVI(64, 1);
+static SVI64X2: Type = Type::SVI(64, 2);
+static SVI64X3: Type = Type::SVI(64, 3);
+static SVI64X4: Type = Type::SVI(64, 4);
+static SVU8: Type = Type::SVU(8, 1);
+static SVU8X2: Type = Type::SVU(8, 2);
+static SVU8X3: Type = Type::SVU(8, 3);
+static SVU8X4: Type = Type::SVU(8, 4);
+static SVU16: Type = Type::SVU(16, 1);
+static SVU16X2: Type = Type::SVU(16, 2);
+static SVU16X3: Type = Type::SVU(16, 3);
+static SVU16X4: Type = Type::SVU(16, 4);
+static SVU32: Type = Type::SVU(32, 1);
+static SVU32X2: Type = Type::SVU(32, 2);
+static SVU32X3: Type = Type::SVU(32, 3);
+static SVU32X4: Type = Type::SVU(32, 4);
+static SVU64: Type = Type::SVU(64, 1);
+static SVU64X2: Type = Type::SVU(64, 2);
+static SVU64X3: Type = Type::SVU(64, 3);
+static SVU64X4: Type = Type::SVU(64, 4);
+static SVPRFOP: Type = Type::Enum("svprfop");
+static SVPATTERN: Type = Type::Enum("svpattern");
+
 #[derive(Debug, Copy, Clone, PartialEq)]
 enum Type {
+    Void,
+    PrimBool,
     PrimFloat(u8),
     PrimSigned(u8),
     PrimUnsigned(u8),
     PrimPoly(u8),
     MutPtr(&'static Type),
     ConstPtr(&'static Type),
+    Enum(&'static str),
     GenericParam(&'static str),
     I(u8, u8, u8),
     U(u8, u8, u8),
     P(u8, u8, u8),
     F(u8, u8, u8),
+    Pred(u8),
+    SVI(u8, u8),
+    SVU(u8, u8),
+    SVF(u8, u8),
     Never,
 }
 
@@ -182,231 +243,20 @@ fn verify_all_signatures() {
 
     let mut all_valid = true;
     for rust in FUNCTIONS {
+        // Most SVE intrinsics just rely on the intrinsics test tool for validation
         if !rust.has_test {
-            let skip = [
-                "vaddq_s64",
-                "vaddq_u64",
-                "vrsqrte_f32",
-                "vtbl1_s8",
-                "vtbl1_u8",
-                "vtbl1_p8",
-                "vtbl2_s8",
-                "vtbl2_u8",
-                "vtbl2_p8",
-                "vtbl3_s8",
-                "vtbl3_u8",
-                "vtbl3_p8",
-                "vtbl4_s8",
-                "vtbl4_u8",
-                "vtbl4_p8",
-                "vtbx1_s8",
-                "vtbx1_u8",
-                "vtbx1_p8",
-                "vtbx2_s8",
-                "vtbx2_u8",
-                "vtbx2_p8",
-                "vtbx3_s8",
-                "vtbx3_u8",
-                "vtbx3_p8",
-                "vtbx4_s8",
-                "vtbx4_u8",
-                "vtbx4_p8",
-                "udf",
-                "_clz_u8",
-                "_clz_u16",
-                "_clz_u32",
-                "_rbit_u32",
-                "_rev_u16",
-                "_rev_u32",
-                "__breakpoint",
-                "vpminq_f32",
-                "vpminq_f64",
-                "vpmaxq_f32",
-                "vpmaxq_f64",
-                "vcombine_s8",
-                "vcombine_s16",
-                "vcombine_s32",
-                "vcombine_s64",
-                "vcombine_u8",
-                "vcombine_u16",
-                "vcombine_u32",
-                "vcombine_u64",
-                "vcombine_p64",
-                "vcombine_f32",
-                "vcombine_p8",
-                "vcombine_p16",
-                "vcombine_f64",
-                "vtbl1_s8",
-                "vtbl1_u8",
-                "vtbl1_p8",
-                "vtbl2_s8",
-                "vtbl2_u8",
-                "vtbl2_p8",
-                "vtbl3_s8",
-                "vtbl3_u8",
-                "vtbl3_p8",
-                "vtbl4_s8",
-                "vtbl4_u8",
-                "vtbl4_p8",
-                "vtbx1_s8",
-                "vtbx1_u8",
-                "vtbx1_p8",
-                "vtbx2_s8",
-                "vtbx2_u8",
-                "vtbx2_p8",
-                "vtbx3_s8",
-                "vtbx3_u8",
-                "vtbx3_p8",
-                "vtbx4_s8",
-                "vtbx4_u8",
-                "vtbx4_p8",
-                "vqtbl1_s8",
-                "vqtbl1q_s8",
-                "vqtbl1_u8",
-                "vqtbl1q_u8",
-                "vqtbl1_p8",
-                "vqtbl1q_p8",
-                "vqtbx1_s8",
-                "vqtbx1q_s8",
-                "vqtbx1_u8",
-                "vqtbx1q_u8",
-                "vqtbx1_p8",
-                "vqtbx1q_p8",
-                "vqtbl2_s8",
-                "vqtbl2q_s8",
-                "vqtbl2_u8",
-                "vqtbl2q_u8",
-                "vqtbl2_p8",
-                "vqtbl2q_p8",
-                "vqtbx2_s8",
-                "vqtbx2q_s8",
-                "vqtbx2_u8",
-                "vqtbx2q_u8",
-                "vqtbx2_p8",
-                "vqtbx2q_p8",
-                "vqtbl3_s8",
-                "vqtbl3q_s8",
-                "vqtbl3_u8",
-                "vqtbl3q_u8",
-                "vqtbl3_p8",
-                "vqtbl3q_p8",
-                "vqtbx3_s8",
-                "vqtbx3q_s8",
-                "vqtbx3_u8",
-                "vqtbx3q_u8",
-                "vqtbx3_p8",
-                "vqtbx3q_p8",
-                "vqtbl4_s8",
-                "vqtbl4q_s8",
-                "vqtbl4_u8",
-                "vqtbl4q_u8",
-                "vqtbl4_p8",
-                "vqtbl4q_p8",
-                "vqtbx4_s8",
-                "vqtbx4q_s8",
-                "vqtbx4_u8",
-                "vqtbx4q_u8",
-                "vqtbx4_p8",
-                "vqtbx4q_p8",
-                "brk",
-                "_rev_u64",
-                "_clz_u64",
-                "_rbit_u64",
-                "_cls_u32",
-                "_cls_u64",
-                "_prefetch",
-                "vsli_n_s8",
-                "vsliq_n_s8",
-                "vsli_n_s16",
-                "vsliq_n_s16",
-                "vsli_n_s32",
-                "vsliq_n_s32",
-                "vsli_n_s64",
-                "vsliq_n_s64",
-                "vsli_n_u8",
-                "vsliq_n_u8",
-                "vsli_n_u16",
-                "vsliq_n_u16",
-                "vsli_n_u32",
-                "vsliq_n_u32",
-                "vsli_n_u64",
-                "vsliq_n_u64",
-                "vsli_n_p8",
-                "vsliq_n_p8",
-                "vsli_n_p16",
-                "vsliq_n_p16",
-                "vsli_n_p64",
-                "vsliq_n_p64",
-                "vsri_n_s8",
-                "vsriq_n_s8",
-                "vsri_n_s16",
-                "vsriq_n_s16",
-                "vsri_n_s32",
-                "vsriq_n_s32",
-                "vsri_n_s64",
-                "vsriq_n_s64",
-                "vsri_n_u8",
-                "vsriq_n_u8",
-                "vsri_n_u16",
-                "vsriq_n_u16",
-                "vsri_n_u32",
-                "vsriq_n_u32",
-                "vsri_n_u64",
-                "vsriq_n_u64",
-                "vsri_n_p8",
-                "vsriq_n_p8",
-                "vsri_n_p16",
-                "vsriq_n_p16",
-                "vsri_n_p64",
-                "vsriq_n_p64",
-                "__smulbb",
-                "__smultb",
-                "__smulbt",
-                "__smultt",
-                "__smulwb",
-                "__smulwt",
-                "__qadd",
-                "__qsub",
-                "__qdbl",
-                "__smlabb",
-                "__smlabt",
-                "__smlatb",
-                "__smlatt",
-                "__smlawb",
-                "__smlawt",
-                "__qadd8",
-                "__qsub8",
-                "__qsub16",
-                "__qadd16",
-                "__qasx",
-                "__qsax",
-                "__sadd16",
-                "__sadd8",
-                "__smlad",
-                "__smlsd",
-                "__sasx",
-                "__sel",
-                "__shadd8",
-                "__shadd16",
-                "__shsub8",
-                "__usub8",
-                "__ssub8",
-                "__shsub16",
-                "__smuad",
-                "__smuadx",
-                "__smusd",
-                "__smusdx",
-                "__usad8",
-                "__usada8",
-                "__ldrex",
-                "__strex",
-                "__ldrexb",
-                "__strexb",
-                "__ldrexh",
-                "__strexh",
-                "__clrex",
-                "__dbg",
-            ];
+            if !SKIP_RUNTIME_TESTS.contains(&rust.name)
+                // Most run-time tests are handled by the intrinsic-test tool, except for
+                // load/stores (which have generated tests)
+                && (!rust.name.starts_with("sv") || rust.name.starts_with("svld")
+                    || rust.name.starts_with("svst"))
+                // The load/store test generator can't handle these cases yet
+                && (!rust.name.contains("_u32base_") || rust.name.contains("index") || rust.name.contains("offset"))
+                && !(rust.name.starts_with("svldff1") && rust.name.contains("gather"))
+            {
+                println!("missing run-time test for `{}`", rust.name);
+                all_valid = false;
+            }
         }
 
         // Skip some intrinsics that aren't NEON and are located in different
@@ -444,8 +294,8 @@ fn verify_all_signatures() {
                     && !rust.file.ends_with("v6.rs\"")
                     && !rust.file.ends_with("v7.rs\"")
                     && !rust.file.ends_with("v8.rs\"")
-                    && !rust.file.ends_with("tme.rs\"")
                     && !rust.file.ends_with("mte.rs\"")
+                    && !rust.file.ends_with("rand.rs\"")
                     && !rust.file.ends_with("ex.rs\"")
                     && !skip_intrinsic_verify.contains(&rust.name)
                 {
@@ -479,12 +329,21 @@ fn matches(rust: &Function, arm: &Intrinsic) -> Result<(), String> {
     let mut nconst = 0;
     let iter = rust.arguments.iter().zip(&arm.arguments).enumerate();
     for (i, (rust_ty, (arm, arm_const))) in iter {
-        if *rust_ty != arm {
-            bail!("mismatched arguments: {rust_ty:?} != {arm:?}")
+        match (*rust_ty, arm) {
+            // SVE uses generic type parameters to handle void pointers
+            (Type::ConstPtr(Type::GenericParam("T")), Type::ConstPtr(Type::Void)) => (),
+            // SVE const generics use i32 over u64 for usability reasons
+            (Type::PrimSigned(32), Type::PrimUnsigned(64)) if rust.required_const.contains(&i) => {
+                ()
+            }
+            // svset doesn't have its const argument last as we assumed when building the Function
+            _ if rust.name.starts_with("svset") => (),
+            (x, y) if x == y => (),
+            _ => bail!("mismatched arguments: {rust_ty:?} != {arm:?}"),
         }
         if *arm_const {
             nconst += 1;
-            if !rust.required_const.contains(&i) {
+            if !rust.required_const.contains(&i) && !rust.name.starts_with("svset") {
                 bail!("argument const mismatch");
             }
         }
@@ -493,7 +352,10 @@ fn matches(rust: &Function, arm: &Intrinsic) -> Result<(), String> {
         bail!("wrong number of const arguments");
     }
 
-    if rust.instrs.is_empty() {
+    if rust.instrs.is_empty()
+        && arm.instruction != ""
+        && !SKIP_ASSERT_INSTR_TESTS.contains(&rust.name)
+    {
         bail!(
             "instruction not listed for `{}`, but arm lists {:?}",
             rust.name,
@@ -532,7 +394,7 @@ fn matches(rust: &Function, arm: &Intrinsic) -> Result<(), String> {
     Ok(())
 }
 
-#[derive(PartialEq)]
+#[derive(Debug, PartialEq)]
 struct Intrinsic {
     name: String,
     ret: Option<Type>,
@@ -547,7 +409,7 @@ struct JsonIntrinsic {
     arguments: Vec<String>,
     return_type: ReturnType,
     #[serde(default)]
-    instructions: Vec<Vec<String>>,
+    instructions: Option<Vec<Vec<String>>>,
 }
 
 #[derive(Deserialize, Debug)]
@@ -566,6 +428,8 @@ fn parse_intrinsics(intrinsics: Vec<JsonIntrinsic>) -> HashMap<String, Intrinsic
 
 fn parse_intrinsic(mut intr: JsonIntrinsic) -> Intrinsic {
     let name = intr.name;
+    // Remove '[' and ']' so that intrinsics of the form `svwhilerw[_s16]` becomes `svwhilerw_s16`.
+    let name = name.replace('[', "").replace(']', "");
     let ret = if intr.return_type.value == "void" {
         None
     } else {
@@ -574,18 +438,24 @@ fn parse_intrinsic(mut intr: JsonIntrinsic) -> Intrinsic {
 
     // This ignores multiple instructions and different optional sequences for now to mimic
     // the old HTML scraping behaviour
-    let instruction = intr.instructions.swap_remove(0).swap_remove(0);
+    let instruction = intr
+        .instructions
+        .map_or(String::new(), |mut i| i.swap_remove(0).swap_remove(0));
 
     let arguments = intr
         .arguments
         .iter()
         .map(|s| {
-            let (ty, konst) = match s.strip_prefix("const") {
-                Some(stripped) => (stripped.trim_start(), true),
-                None => (s.as_str(), false),
+            let ty = if let Some(i) = s.find('*') {
+                &s[..i + 1]
+            } else {
+                s.rsplit_once(' ').unwrap().0.trim_start_matches("const ")
             };
-            let ty = ty.rsplit_once(' ').unwrap().0;
-            (parse_ty(ty), konst)
+            let ty = parse_ty(ty);
+            let konst = s.contains("const") && !matches!(ty, Type::ConstPtr(_))
+                || s.starts_with("enum")
+                || s.rsplit_once(" ").unwrap().1.starts_with("imm");
+            (ty, konst)
         })
         .collect::<Vec<_>>();
 
@@ -598,18 +468,27 @@ fn parse_intrinsic(mut intr: JsonIntrinsic) -> Intrinsic {
 }
 
 fn parse_ty(s: &str) -> Type {
-    let suffix = " const *";
-    if let Some(base) = s.strip_suffix(suffix) {
-        Type::ConstPtr(parse_ty_base(base))
-    } else if let Some(base) = s.strip_suffix(" *") {
-        Type::MutPtr(parse_ty_base(base))
+    if let Some(ty) = s.strip_suffix("*") {
+        let ty = ty.trim();
+        if let Some(ty) = ty.strip_prefix("const") {
+            // SVE intrinsics are west-const (`const int8_t *`)
+            Type::ConstPtr(parse_ty_base(ty))
+        } else if let Some(ty) = ty.strip_suffix("const") {
+            // Neon intrinsics are east-const (`int8_t const *`)
+            Type::ConstPtr(parse_ty_base(ty))
+        } else {
+            Type::MutPtr(parse_ty_base(ty))
+        }
     } else {
         *parse_ty_base(s)
     }
 }
 
 fn parse_ty_base(s: &str) -> &'static Type {
+    let s = s.trim();
     match s {
+        "bool" => &BOOL,
+        "void" => &VOID,
         "float16_t" => &F16,
         "float16x4_t" => &F16X4,
         "float16x4x2_t" => &F16X4X2,
@@ -739,7 +618,4655 @@ fn parse_ty_base(s: &str) -> &'static Type {
         "uint8x8x2_t" => &U8X8X2,
         "uint8x8x3_t" => &U8X8X3,
         "uint8x8x4_t" => &U8X8X4,
+        "svbool_t" => &SVBOOL,
+        "svboolx2_t" => &SVBOOLX2,
+        "svboolx3_t" => &SVBOOLX3,
+        "svboolx4_t" => &SVBOOLX4,
+        "svcount_t" => &SVCOUNT,
+        "svfloat16_t" => &SVF16,
+        "svfloat16x2_t" => &SVF16X2,
+        "svfloat16x3_t" => &SVF16X3,
+        "svfloat16x4_t" => &SVF16X4,
+        "svfloat32_t" => &SVF32,
+        "svfloat32x2_t" => &SVF32X2,
+        "svfloat32x3_t" => &SVF32X3,
+        "svfloat32x4_t" => &SVF32X4,
+        "svfloat64_t" => &SVF64,
+        "svfloat64x2_t" => &SVF64X2,
+        "svfloat64x3_t" => &SVF64X3,
+        "svfloat64x4_t" => &SVF64X4,
+        "svint8_t" => &SVI8,
+        "svint8x2_t" => &SVI8X2,
+        "svint8x3_t" => &SVI8X3,
+        "svint8x4_t" => &SVI8X4,
+        "svint16_t" => &SVI16,
+        "svint16x2_t" => &SVI16X2,
+        "svint16x3_t" => &SVI16X3,
+        "svint16x4_t" => &SVI16X4,
+        "svint32_t" => &SVI32,
+        "svint32x2_t" => &SVI32X2,
+        "svint32x3_t" => &SVI32X3,
+        "svint32x4_t" => &SVI32X4,
+        "svint64_t" => &SVI64,
+        "svint64x2_t" => &SVI64X2,
+        "svint64x3_t" => &SVI64X3,
+        "svint64x4_t" => &SVI64X4,
+        "svuint8_t" => &SVU8,
+        "svuint8x2_t" => &SVU8X2,
+        "svuint8x3_t" => &SVU8X3,
+        "svuint8x4_t" => &SVU8X4,
+        "svuint16_t" => &SVU16,
+        "svuint16x2_t" => &SVU16X2,
+        "svuint16x3_t" => &SVU16X3,
+        "svuint16x4_t" => &SVU16X4,
+        "svuint32_t" => &SVU32,
+        "svuint32x2_t" => &SVU32X2,
+        "svuint32x3_t" => &SVU32X3,
+        "svuint32x4_t" => &SVU32X4,
+        "svuint64_t" => &SVU64,
+        "svuint64x2_t" => &SVU64X2,
+        "svuint64x3_t" => &SVU64X3,
+        "svuint64x4_t" => &SVU64X4,
+        "enum svprfop" => &SVPRFOP,
+        "enum svpattern" => &SVPATTERN,
 
         _ => panic!("failed to parse json type {s:?}"),
     }
 }
+
+// FIXME(arm-maintainers): Some tests require new rustc intrinsics in order to generate
+// the appropriate instruction, though they do have the correct behaviour - these will be fixed
+// but are disabled for now.
+static SKIP_ASSERT_INSTR_TESTS: &'static [&'static str] = &["svpfalse_b"];
+
+// FIXME(arm-maintainers): With the advent of the `intrinsic-test` tool, new tests of this kind
+// are no longer being added and just adding to this list indefinitely isn't the best solution for
+// dealing with that.
+static SKIP_RUNTIME_TESTS: &'static [&'static str] = &[
+    "vaddq_s64",
+    "vaddq_u64",
+    "vrsqrte_f32",
+    "vtbl1_s8",
+    "vtbl1_u8",
+    "vtbl1_p8",
+    "vtbl2_s8",
+    "vtbl2_u8",
+    "vtbl2_p8",
+    "vtbl3_s8",
+    "vtbl3_u8",
+    "vtbl3_p8",
+    "vtbl4_s8",
+    "vtbl4_u8",
+    "vtbl4_p8",
+    "vtbx1_s8",
+    "vtbx1_u8",
+    "vtbx1_p8",
+    "vtbx2_s8",
+    "vtbx2_u8",
+    "vtbx2_p8",
+    "vtbx3_s8",
+    "vtbx3_u8",
+    "vtbx3_p8",
+    "vtbx4_s8",
+    "vtbx4_u8",
+    "vtbx4_p8",
+    "udf",
+    "_clz_u8",
+    "_clz_u16",
+    "_clz_u32",
+    "_rbit_u32",
+    "_rev_u16",
+    "_rev_u32",
+    "__breakpoint",
+    "vpminq_f32",
+    "vpminq_f64",
+    "vpmaxq_f32",
+    "vpmaxq_f64",
+    "vcombine_s8",
+    "vcombine_s16",
+    "vcombine_s32",
+    "vcombine_s64",
+    "vcombine_u8",
+    "vcombine_u16",
+    "vcombine_u32",
+    "vcombine_u64",
+    "vcombine_p64",
+    "vcombine_f32",
+    "vcombine_p8",
+    "vcombine_p16",
+    "vcombine_f64",
+    "vtbl1_s8",
+    "vtbl1_u8",
+    "vtbl1_p8",
+    "vtbl2_s8",
+    "vtbl2_u8",
+    "vtbl2_p8",
+    "vtbl3_s8",
+    "vtbl3_u8",
+    "vtbl3_p8",
+    "vtbl4_s8",
+    "vtbl4_u8",
+    "vtbl4_p8",
+    "vtbx1_s8",
+    "vtbx1_u8",
+    "vtbx1_p8",
+    "vtbx2_s8",
+    "vtbx2_u8",
+    "vtbx2_p8",
+    "vtbx3_s8",
+    "vtbx3_u8",
+    "vtbx3_p8",
+    "vtbx4_s8",
+    "vtbx4_u8",
+    "vtbx4_p8",
+    "vqtbl1_s8",
+    "vqtbl1q_s8",
+    "vqtbl1_u8",
+    "vqtbl1q_u8",
+    "vqtbl1_p8",
+    "vqtbl1q_p8",
+    "vqtbx1_s8",
+    "vqtbx1q_s8",
+    "vqtbx1_u8",
+    "vqtbx1q_u8",
+    "vqtbx1_p8",
+    "vqtbx1q_p8",
+    "vqtbl2_s8",
+    "vqtbl2q_s8",
+    "vqtbl2_u8",
+    "vqtbl2q_u8",
+    "vqtbl2_p8",
+    "vqtbl2q_p8",
+    "vqtbx2_s8",
+    "vqtbx2q_s8",
+    "vqtbx2_u8",
+    "vqtbx2q_u8",
+    "vqtbx2_p8",
+    "vqtbx2q_p8",
+    "vqtbl3_s8",
+    "vqtbl3q_s8",
+    "vqtbl3_u8",
+    "vqtbl3q_u8",
+    "vqtbl3_p8",
+    "vqtbl3q_p8",
+    "vqtbx3_s8",
+    "vqtbx3q_s8",
+    "vqtbx3_u8",
+    "vqtbx3q_u8",
+    "vqtbx3_p8",
+    "vqtbx3q_p8",
+    "vqtbl4_s8",
+    "vqtbl4q_s8",
+    "vqtbl4_u8",
+    "vqtbl4q_u8",
+    "vqtbl4_p8",
+    "vqtbl4q_p8",
+    "vqtbx4_s8",
+    "vqtbx4q_s8",
+    "vqtbx4_u8",
+    "vqtbx4q_u8",
+    "vqtbx4_p8",
+    "vqtbx4q_p8",
+    "brk",
+    "_rev_u64",
+    "_clz_u64",
+    "_rbit_u64",
+    "_cls_u32",
+    "_cls_u64",
+    "_prefetch",
+    "vsli_n_s8",
+    "vsliq_n_s8",
+    "vsli_n_s16",
+    "vsliq_n_s16",
+    "vsli_n_s32",
+    "vsliq_n_s32",
+    "vsli_n_s64",
+    "vsliq_n_s64",
+    "vsli_n_u8",
+    "vsliq_n_u8",
+    "vsli_n_u16",
+    "vsliq_n_u16",
+    "vsli_n_u32",
+    "vsliq_n_u32",
+    "vsli_n_u64",
+    "vsliq_n_u64",
+    "vsli_n_p8",
+    "vsliq_n_p8",
+    "vsli_n_p16",
+    "vsliq_n_p16",
+    "vsli_n_p64",
+    "vsliq_n_p64",
+    "vsri_n_s8",
+    "vsriq_n_s8",
+    "vsri_n_s16",
+    "vsriq_n_s16",
+    "vsri_n_s32",
+    "vsriq_n_s32",
+    "vsri_n_s64",
+    "vsriq_n_s64",
+    "vsri_n_u8",
+    "vsriq_n_u8",
+    "vsri_n_u16",
+    "vsriq_n_u16",
+    "vsri_n_u32",
+    "vsriq_n_u32",
+    "vsri_n_u64",
+    "vsriq_n_u64",
+    "vsri_n_p8",
+    "vsriq_n_p8",
+    "vsri_n_p16",
+    "vsriq_n_p16",
+    "vsri_n_p64",
+    "vsriq_n_p64",
+    "__smulbb",
+    "__smultb",
+    "__smulbt",
+    "__smultt",
+    "__smulwb",
+    "__smulwt",
+    "__qadd",
+    "__qsub",
+    "__qdbl",
+    "__smlabb",
+    "__smlabt",
+    "__smlatb",
+    "__smlatt",
+    "__smlawb",
+    "__smlawt",
+    "__qadd8",
+    "__qsub8",
+    "__qsub16",
+    "__qadd16",
+    "__qasx",
+    "__qsax",
+    "__sadd16",
+    "__sadd8",
+    "__smlad",
+    "__smlsd",
+    "__sasx",
+    "__sel",
+    "__shadd8",
+    "__shadd16",
+    "__shsub8",
+    "__usub8",
+    "__ssub8",
+    "__shsub16",
+    "__smuad",
+    "__smuadx",
+    "__smusd",
+    "__smusdx",
+    "__usad8",
+    "__usada8",
+    "__ldrex",
+    "__strex",
+    "__ldrexb",
+    "__strexb",
+    "__ldrexh",
+    "__strexh",
+    "__clrex",
+    "__dbg",
+    "__crc32cd",
+    "__crc32d",
+    "__jcvt",
+    "vabal_high_s8",
+    "vabal_high_s16",
+    "vabal_high_s32",
+    "vabal_high_u8",
+    "vabal_high_u16",
+    "vabal_high_u32",
+    "vabd_f64",
+    "vabdq_f64",
+    "vabdd_f64",
+    "vabds_f32",
+    "vabdh_f16",
+    "vabdl_high_s16",
+    "vabdl_high_s32",
+    "vabdl_high_s8",
+    "vabdl_high_u8",
+    "vabdl_high_u16",
+    "vabdl_high_u32",
+    "vabs_f64",
+    "vabsq_f64",
+    "vabs_s64",
+    "vabsq_s64",
+    "vabsd_s64",
+    "vaddlv_s16",
+    "vaddlvq_s16",
+    "vaddlvq_s32",
+    "vaddlv_s32",
+    "vaddlv_s8",
+    "vaddlvq_s8",
+    "vaddlv_u16",
+    "vaddlvq_u16",
+    "vaddlvq_u32",
+    "vaddlv_u32",
+    "vaddlv_u8",
+    "vaddlvq_u8",
+    "vaddv_f32",
+    "vaddvq_f32",
+    "vaddvq_f64",
+    "vaddv_s32",
+    "vaddv_s8",
+    "vaddvq_s8",
+    "vaddv_s16",
+    "vaddvq_s16",
+    "vaddvq_s32",
+    "vaddv_u32",
+    "vaddv_u8",
+    "vaddvq_u8",
+    "vaddv_u16",
+    "vaddvq_u16",
+    "vaddvq_u32",
+    "vaddvq_s64",
+    "vaddvq_u64",
+    "vamax_f16",
+    "vamaxq_f16",
+    "vamax_f32",
+    "vamaxq_f32",
+    "vamaxq_f64",
+    "vamin_f16",
+    "vaminq_f16",
+    "vamin_f32",
+    "vaminq_f32",
+    "vaminq_f64",
+    "vbcaxq_s8",
+    "vbcaxq_s16",
+    "vbcaxq_s32",
+    "vbcaxq_s64",
+    "vbcaxq_u8",
+    "vbcaxq_u16",
+    "vbcaxq_u32",
+    "vbcaxq_u64",
+    "vcadd_rot270_f16",
+    "vcaddq_rot270_f16",
+    "vcadd_rot270_f32",
+    "vcaddq_rot270_f32",
+    "vcaddq_rot270_f64",
+    "vcadd_rot90_f16",
+    "vcaddq_rot90_f16",
+    "vcadd_rot90_f32",
+    "vcaddq_rot90_f32",
+    "vcaddq_rot90_f64",
+    "vcage_f64",
+    "vcageq_f64",
+    "vcaged_f64",
+    "vcages_f32",
+    "vcageh_f16",
+    "vcagt_f64",
+    "vcagtq_f64",
+    "vcagtd_f64",
+    "vcagts_f32",
+    "vcagth_f16",
+    "vcale_f64",
+    "vcaleq_f64",
+    "vcaled_f64",
+    "vcales_f32",
+    "vcaleh_f16",
+    "vcalt_f64",
+    "vcaltq_f64",
+    "vcaltd_f64",
+    "vcalts_f32",
+    "vcalth_f16",
+    "vceq_f64",
+    "vceqq_f64",
+    "vceq_s64",
+    "vceqq_s64",
+    "vceq_u64",
+    "vceqq_u64",
+    "vceq_p64",
+    "vceqq_p64",
+    "vceqd_f64",
+    "vceqs_f32",
+    "vceqd_s64",
+    "vceqd_u64",
+    "vceqh_f16",
+    "vceqz_f16",
+    "vceqzq_f16",
+    "vceqz_f32",
+    "vceqzq_f32",
+    "vceqz_f64",
+    "vceqzq_f64",
+    "vceqz_s8",
+    "vceqzq_s8",
+    "vceqz_s16",
+    "vceqzq_s16",
+    "vceqz_s32",
+    "vceqzq_s32",
+    "vceqz_s64",
+    "vceqzq_s64",
+    "vceqz_p8",
+    "vceqzq_p8",
+    "vceqz_p64",
+    "vceqzq_p64",
+    "vceqz_u8",
+    "vceqzq_u8",
+    "vceqz_u16",
+    "vceqzq_u16",
+    "vceqz_u32",
+    "vceqzq_u32",
+    "vceqz_u64",
+    "vceqzq_u64",
+    "vceqzd_s64",
+    "vceqzd_u64",
+    "vceqzh_f16",
+    "vceqzs_f32",
+    "vceqzd_f64",
+    "vcge_f64",
+    "vcgeq_f64",
+    "vcge_s64",
+    "vcgeq_s64",
+    "vcge_u64",
+    "vcgeq_u64",
+    "vcged_f64",
+    "vcges_f32",
+    "vcged_s64",
+    "vcged_u64",
+    "vcgeh_f16",
+    "vcgez_f32",
+    "vcgezq_f32",
+    "vcgez_f64",
+    "vcgezq_f64",
+    "vcgez_s8",
+    "vcgezq_s8",
+    "vcgez_s16",
+    "vcgezq_s16",
+    "vcgez_s32",
+    "vcgezq_s32",
+    "vcgez_s64",
+    "vcgezq_s64",
+    "vcgezd_f64",
+    "vcgezs_f32",
+    "vcgezd_s64",
+    "vcgezh_f16",
+    "vcgt_f64",
+    "vcgtq_f64",
+    "vcgt_s64",
+    "vcgtq_s64",
+    "vcgt_u64",
+    "vcgtq_u64",
+    "vcgtd_f64",
+    "vcgts_f32",
+    "vcgtd_s64",
+    "vcgtd_u64",
+    "vcgth_f16",
+    "vcgtz_f32",
+    "vcgtzq_f32",
+    "vcgtz_f64",
+    "vcgtzq_f64",
+    "vcgtz_s8",
+    "vcgtzq_s8",
+    "vcgtz_s16",
+    "vcgtzq_s16",
+    "vcgtz_s32",
+    "vcgtzq_s32",
+    "vcgtz_s64",
+    "vcgtzq_s64",
+    "vcgtzd_f64",
+    "vcgtzs_f32",
+    "vcgtzd_s64",
+    "vcgtzh_f16",
+    "vcle_f64",
+    "vcleq_f64",
+    "vcle_s64",
+    "vcleq_s64",
+    "vcle_u64",
+    "vcleq_u64",
+    "vcled_f64",
+    "vcles_f32",
+    "vcled_u64",
+    "vcled_s64",
+    "vcleh_f16",
+    "vclez_f32",
+    "vclezq_f32",
+    "vclez_f64",
+    "vclezq_f64",
+    "vclez_s8",
+    "vclezq_s8",
+    "vclez_s16",
+    "vclezq_s16",
+    "vclez_s32",
+    "vclezq_s32",
+    "vclez_s64",
+    "vclezq_s64",
+    "vclezd_f64",
+    "vclezs_f32",
+    "vclezd_s64",
+    "vclezh_f16",
+    "vclt_f64",
+    "vcltq_f64",
+    "vclt_s64",
+    "vcltq_s64",
+    "vclt_u64",
+    "vcltq_u64",
+    "vcltd_u64",
+    "vcltd_s64",
+    "vclth_f16",
+    "vclts_f32",
+    "vcltd_f64",
+    "vcltz_f32",
+    "vcltzq_f32",
+    "vcltz_f64",
+    "vcltzq_f64",
+    "vcltz_s8",
+    "vcltzq_s8",
+    "vcltz_s16",
+    "vcltzq_s16",
+    "vcltz_s32",
+    "vcltzq_s32",
+    "vcltz_s64",
+    "vcltzq_s64",
+    "vcltzd_f64",
+    "vcltzs_f32",
+    "vcltzd_s64",
+    "vcltzh_f16",
+    "vcmla_f16",
+    "vcmlaq_f16",
+    "vcmla_f32",
+    "vcmlaq_f32",
+    "vcmlaq_f64",
+    "vcmla_lane_f16",
+    "vcmlaq_lane_f16",
+    "vcmla_lane_f32",
+    "vcmlaq_lane_f32",
+    "vcmla_laneq_f16",
+    "vcmlaq_laneq_f16",
+    "vcmla_laneq_f32",
+    "vcmlaq_laneq_f32",
+    "vcmla_rot180_f16",
+    "vcmlaq_rot180_f16",
+    "vcmla_rot180_f32",
+    "vcmlaq_rot180_f32",
+    "vcmlaq_rot180_f64",
+    "vcmla_rot180_lane_f16",
+    "vcmlaq_rot180_lane_f16",
+    "vcmla_rot180_lane_f32",
+    "vcmlaq_rot180_lane_f32",
+    "vcmla_rot180_laneq_f16",
+    "vcmlaq_rot180_laneq_f16",
+    "vcmla_rot180_laneq_f32",
+    "vcmlaq_rot180_laneq_f32",
+    "vcmla_rot270_f16",
+    "vcmlaq_rot270_f16",
+    "vcmla_rot270_f32",
+    "vcmlaq_rot270_f32",
+    "vcmlaq_rot270_f64",
+    "vcmla_rot270_lane_f16",
+    "vcmlaq_rot270_lane_f16",
+    "vcmla_rot270_lane_f32",
+    "vcmlaq_rot270_lane_f32",
+    "vcmla_rot270_laneq_f16",
+    "vcmlaq_rot270_laneq_f16",
+    "vcmla_rot270_laneq_f32",
+    "vcmlaq_rot270_laneq_f32",
+    "vcmla_rot90_f16",
+    "vcmlaq_rot90_f16",
+    "vcmla_rot90_f32",
+    "vcmlaq_rot90_f32",
+    "vcmlaq_rot90_f64",
+    "vcmla_rot90_lane_f16",
+    "vcmlaq_rot90_lane_f16",
+    "vcmla_rot90_lane_f32",
+    "vcmlaq_rot90_lane_f32",
+    "vcmla_rot90_laneq_f16",
+    "vcmlaq_rot90_laneq_f16",
+    "vcmla_rot90_laneq_f32",
+    "vcmlaq_rot90_laneq_f32",
+    "vcopy_lane_f32",
+    "vcopy_lane_s8",
+    "vcopy_lane_s16",
+    "vcopy_lane_s32",
+    "vcopy_lane_u8",
+    "vcopy_lane_u16",
+    "vcopy_lane_u32",
+    "vcopy_lane_p8",
+    "vcopy_lane_p16",
+    "vcopy_laneq_f32",
+    "vcopy_laneq_s8",
+    "vcopy_laneq_s16",
+    "vcopy_laneq_s32",
+    "vcopy_laneq_u8",
+    "vcopy_laneq_u16",
+    "vcopy_laneq_u32",
+    "vcopy_laneq_p8",
+    "vcopy_laneq_p16",
+    "vcopyq_lane_f32",
+    "vcopyq_lane_f64",
+    "vcopyq_lane_s64",
+    "vcopyq_lane_u64",
+    "vcopyq_lane_p64",
+    "vcopyq_lane_s8",
+    "vcopyq_lane_s16",
+    "vcopyq_lane_s32",
+    "vcopyq_lane_u8",
+    "vcopyq_lane_u16",
+    "vcopyq_lane_u32",
+    "vcopyq_lane_p8",
+    "vcopyq_lane_p16",
+    "vcopyq_laneq_f32",
+    "vcopyq_laneq_f64",
+    "vcopyq_laneq_s8",
+    "vcopyq_laneq_s16",
+    "vcopyq_laneq_s32",
+    "vcopyq_laneq_s64",
+    "vcopyq_laneq_u8",
+    "vcopyq_laneq_u16",
+    "vcopyq_laneq_u32",
+    "vcopyq_laneq_u64",
+    "vcopyq_laneq_p8",
+    "vcopyq_laneq_p16",
+    "vcopyq_laneq_p64",
+    "vcreate_f64",
+    "vcvt_f32_f64",
+    "vcvt_f64_f32",
+    "vcvt_f64_s64",
+    "vcvtq_f64_s64",
+    "vcvt_f64_u64",
+    "vcvtq_f64_u64",
+    "vcvt_high_f16_f32",
+    "vcvt_high_f32_f16",
+    "vcvt_high_f32_f64",
+    "vcvt_high_f64_f32",
+    "vcvt_n_f64_s64",
+    "vcvtq_n_f64_s64",
+    "vcvt_n_f64_u64",
+    "vcvtq_n_f64_u64",
+    "vcvt_n_s64_f64",
+    "vcvtq_n_s64_f64",
+    "vcvt_n_u64_f64",
+    "vcvtq_n_u64_f64",
+    "vcvt_s64_f64",
+    "vcvtq_s64_f64",
+    "vcvt_u64_f64",
+    "vcvtq_u64_f64",
+    "vcvta_s16_f16",
+    "vcvtaq_s16_f16",
+    "vcvta_s32_f32",
+    "vcvtaq_s32_f32",
+    "vcvta_s64_f64",
+    "vcvtaq_s64_f64",
+    "vcvta_u16_f16",
+    "vcvtaq_u16_f16",
+    "vcvta_u32_f32",
+    "vcvtaq_u32_f32",
+    "vcvta_u64_f64",
+    "vcvtaq_u64_f64",
+    "vcvtah_s16_f16",
+    "vcvtah_s32_f16",
+    "vcvtah_s64_f16",
+    "vcvtah_u16_f16",
+    "vcvtah_u32_f16",
+    "vcvtah_u64_f16",
+    "vcvtas_s32_f32",
+    "vcvtad_s64_f64",
+    "vcvtas_u32_f32",
+    "vcvtad_u64_f64",
+    "vcvtd_f64_s64",
+    "vcvts_f32_s32",
+    "vcvth_f16_s16",
+    "vcvth_f16_s32",
+    "vcvth_f16_s64",
+    "vcvth_f16_u16",
+    "vcvth_f16_u32",
+    "vcvth_f16_u64",
+    "vcvth_n_f16_s16",
+    "vcvth_n_f16_s32",
+    "vcvth_n_f16_s64",
+    "vcvth_n_f16_u16",
+    "vcvth_n_f16_u32",
+    "vcvth_n_f16_u64",
+    "vcvth_n_s16_f16",
+    "vcvth_n_s32_f16",
+    "vcvth_n_s64_f16",
+    "vcvth_n_u16_f16",
+    "vcvth_n_u32_f16",
+    "vcvth_n_u64_f16",
+    "vcvth_s16_f16",
+    "vcvth_s32_f16",
+    "vcvth_s64_f16",
+    "vcvth_u16_f16",
+    "vcvth_u32_f16",
+    "vcvth_u64_f16",
+    "vcvtm_s16_f16",
+    "vcvtmq_s16_f16",
+    "vcvtm_s32_f32",
+    "vcvtmq_s32_f32",
+    "vcvtm_s64_f64",
+    "vcvtmq_s64_f64",
+    "vcvtm_u16_f16",
+    "vcvtmq_u16_f16",
+    "vcvtm_u32_f32",
+    "vcvtmq_u32_f32",
+    "vcvtm_u64_f64",
+    "vcvtmq_u64_f64",
+    "vcvtmh_s16_f16",
+    "vcvtmh_s32_f16",
+    "vcvtmh_s64_f16",
+    "vcvtmh_u16_f16",
+    "vcvtmh_u32_f16",
+    "vcvtmh_u64_f16",
+    "vcvtms_s32_f32",
+    "vcvtmd_s64_f64",
+    "vcvtms_u32_f32",
+    "vcvtmd_u64_f64",
+    "vcvtn_s16_f16",
+    "vcvtnq_s16_f16",
+    "vcvtn_s32_f32",
+    "vcvtnq_s32_f32",
+    "vcvtn_s64_f64",
+    "vcvtnq_s64_f64",
+    "vcvtn_u16_f16",
+    "vcvtnq_u16_f16",
+    "vcvtn_u32_f32",
+    "vcvtnq_u32_f32",
+    "vcvtn_u64_f64",
+    "vcvtnq_u64_f64",
+    "vcvtnh_s16_f16",
+    "vcvtnh_s32_f16",
+    "vcvtnh_s64_f16",
+    "vcvtnh_u16_f16",
+    "vcvtnh_u32_f16",
+    "vcvtnh_u64_f16",
+    "vcvtns_s32_f32",
+    "vcvtnd_s64_f64",
+    "vcvtns_u32_f32",
+    "vcvtnd_u64_f64",
+    "vcvtp_s16_f16",
+    "vcvtpq_s16_f16",
+    "vcvtp_s32_f32",
+    "vcvtpq_s32_f32",
+    "vcvtp_s64_f64",
+    "vcvtpq_s64_f64",
+    "vcvtp_u16_f16",
+    "vcvtpq_u16_f16",
+    "vcvtp_u32_f32",
+    "vcvtpq_u32_f32",
+    "vcvtp_u64_f64",
+    "vcvtpq_u64_f64",
+    "vcvtph_s16_f16",
+    "vcvtph_s32_f16",
+    "vcvtph_s64_f16",
+    "vcvtph_u16_f16",
+    "vcvtph_u32_f16",
+    "vcvtph_u64_f16",
+    "vcvtps_s32_f32",
+    "vcvtpd_s64_f64",
+    "vcvtps_u32_f32",
+    "vcvtpd_u64_f64",
+    "vcvts_f32_u32",
+    "vcvtd_f64_u64",
+    "vcvts_n_f32_s32",
+    "vcvtd_n_f64_s64",
+    "vcvts_n_f32_u32",
+    "vcvtd_n_f64_u64",
+    "vcvts_n_s32_f32",
+    "vcvtd_n_s64_f64",
+    "vcvts_n_u32_f32",
+    "vcvtd_n_u64_f64",
+    "vcvts_s32_f32",
+    "vcvtd_s64_f64",
+    "vcvts_u32_f32",
+    "vcvtd_u64_f64",
+    "vcvtx_f32_f64",
+    "vcvtx_high_f32_f64",
+    "vcvtxd_f32_f64",
+    "vdiv_f16",
+    "vdivq_f16",
+    "vdiv_f32",
+    "vdivq_f32",
+    "vdiv_f64",
+    "vdivq_f64",
+    "vdivh_f16",
+    "vdup_lane_f64",
+    "vdup_lane_p64",
+    "vdup_laneq_f64",
+    "vdup_laneq_p64",
+    "vdupb_lane_s8",
+    "vduph_laneq_s16",
+    "vdupb_lane_u8",
+    "vduph_laneq_u16",
+    "vdupb_lane_p8",
+    "vduph_laneq_p16",
+    "vdupb_laneq_s8",
+    "vdupb_laneq_u8",
+    "vdupb_laneq_p8",
+    "vdupd_lane_f64",
+    "vdupd_lane_s64",
+    "vdupd_lane_u64",
+    "vduph_lane_f16",
+    "vduph_laneq_f16",
+    "vdupq_lane_f64",
+    "vdupq_lane_p64",
+    "vdupq_laneq_f64",
+    "vdupq_laneq_p64",
+    "vdups_lane_f32",
+    "vdupd_laneq_f64",
+    "vdups_lane_s32",
+    "vdupd_laneq_s64",
+    "vdups_lane_u32",
+    "vdupd_laneq_u64",
+    "vdups_laneq_f32",
+    "vduph_lane_s16",
+    "vdups_laneq_s32",
+    "vduph_lane_u16",
+    "vdups_laneq_u32",
+    "vduph_lane_p16",
+    "veor3q_s8",
+    "veor3q_s16",
+    "veor3q_s32",
+    "veor3q_s64",
+    "veor3q_u8",
+    "veor3q_u16",
+    "veor3q_u32",
+    "veor3q_u64",
+    "vextq_f64",
+    "vextq_p64",
+    "vfma_f64",
+    "vfma_lane_f16",
+    "vfma_laneq_f16",
+    "vfmaq_lane_f16",
+    "vfmaq_laneq_f16",
+    "vfma_lane_f32",
+    "vfma_laneq_f32",
+    "vfmaq_lane_f32",
+    "vfmaq_laneq_f32",
+    "vfmaq_laneq_f64",
+    "vfma_lane_f64",
+    "vfma_laneq_f64",
+    "vfma_n_f16",
+    "vfmaq_n_f16",
+    "vfma_n_f64",
+    "vfmad_lane_f64",
+    "vfmah_f16",
+    "vfmah_lane_f16",
+    "vfmah_laneq_f16",
+    "vfmaq_f64",
+    "vfmaq_lane_f64",
+    "vfmaq_n_f64",
+    "vfmas_lane_f32",
+    "vfmas_laneq_f32",
+    "vfmad_laneq_f64",
+    "vfmlal_high_f16",
+    "vfmlalq_high_f16",
+    "vfmlal_lane_high_f16",
+    "vfmlal_laneq_high_f16",
+    "vfmlalq_lane_high_f16",
+    "vfmlalq_laneq_high_f16",
+    "vfmlal_lane_low_f16",
+    "vfmlal_laneq_low_f16",
+    "vfmlalq_lane_low_f16",
+    "vfmlalq_laneq_low_f16",
+    "vfmlal_low_f16",
+    "vfmlalq_low_f16",
+    "vfmlsl_high_f16",
+    "vfmlslq_high_f16",
+    "vfmlsl_lane_high_f16",
+    "vfmlsl_laneq_high_f16",
+    "vfmlslq_lane_high_f16",
+    "vfmlslq_laneq_high_f16",
+    "vfmlsl_lane_low_f16",
+    "vfmlsl_laneq_low_f16",
+    "vfmlslq_lane_low_f16",
+    "vfmlslq_laneq_low_f16",
+    "vfmlsl_low_f16",
+    "vfmlslq_low_f16",
+    "vfms_f64",
+    "vfms_lane_f16",
+    "vfms_laneq_f16",
+    "vfmsq_lane_f16",
+    "vfmsq_laneq_f16",
+    "vfms_lane_f32",
+    "vfms_laneq_f32",
+    "vfmsq_lane_f32",
+    "vfmsq_laneq_f32",
+    "vfmsq_laneq_f64",
+    "vfms_lane_f64",
+    "vfms_laneq_f64",
+    "vfms_n_f16",
+    "vfmsq_n_f16",
+    "vfms_n_f64",
+    "vfmsh_f16",
+    "vfmsh_lane_f16",
+    "vfmsh_laneq_f16",
+    "vfmsq_f64",
+    "vfmsq_lane_f64",
+    "vfmsq_n_f64",
+    "vfmss_lane_f32",
+    "vfmss_laneq_f32",
+    "vfmsd_lane_f64",
+    "vfmsd_laneq_f64",
+    "vld1_f16",
+    "vld1q_f16",
+    "vld1_f64_x2",
+    "vld1_f64_x3",
+    "vld1_f64_x4",
+    "vld1q_f64_x2",
+    "vld1q_f64_x3",
+    "vld1q_f64_x4",
+    "vld2_dup_f64",
+    "vld2q_dup_f64",
+    "vld2q_dup_s64",
+    "vld2_f64",
+    "vld2_lane_f64",
+    "vld2_lane_s64",
+    "vld2_lane_p64",
+    "vld2_lane_u64",
+    "vld2q_dup_p64",
+    "vld2q_dup_p64",
+    "vld2q_dup_u64",
+    "vld2q_dup_u64",
+    "vld2q_f64",
+    "vld2q_s64",
+    "vld2q_lane_f64",
+    "vld2q_lane_s8",
+    "vld2q_lane_s64",
+    "vld2q_lane_p64",
+    "vld2q_lane_u8",
+    "vld2q_lane_u64",
+    "vld2q_lane_p8",
+    "vld2q_p64",
+    "vld2q_p64",
+    "vld2q_u64",
+    "vld3_dup_f64",
+    "vld3q_dup_f64",
+    "vld3q_dup_s64",
+    "vld3_f64",
+    "vld3_lane_f64",
+    "vld3_lane_p64",
+    "vld3_lane_s64",
+    "vld3_lane_u64",
+    "vld3q_dup_p64",
+    "vld3q_dup_p64",
+    "vld3q_dup_u64",
+    "vld3q_dup_u64",
+    "vld3q_f64",
+    "vld3q_s64",
+    "vld3q_lane_f64",
+    "vld3q_lane_p64",
+    "vld3q_lane_s8",
+    "vld3q_lane_s64",
+    "vld3q_lane_u8",
+    "vld3q_lane_u64",
+    "vld3q_lane_p8",
+    "vld3q_p64",
+    "vld3q_p64",
+    "vld3q_u64",
+    "vld4_dup_f64",
+    "vld4q_dup_f64",
+    "vld4q_dup_s64",
+    "vld4_f64",
+    "vld4_lane_f64",
+    "vld4_lane_s64",
+    "vld4_lane_p64",
+    "vld4_lane_u64",
+    "vld4q_dup_p64",
+    "vld4q_dup_p64",
+    "vld4q_dup_u64",
+    "vld4q_dup_u64",
+    "vld4q_f64",
+    "vld4q_s64",
+    "vld4q_lane_f64",
+    "vld4q_lane_s8",
+    "vld4q_lane_s64",
+    "vld4q_lane_p64",
+    "vld4q_lane_u8",
+    "vld4q_lane_u64",
+    "vld4q_lane_p8",
+    "vld4q_p64",
+    "vld4q_p64",
+    "vld4q_u64",
+    "vldap1_lane_s64",
+    "vldap1q_lane_s64",
+    "vldap1q_lane_f64",
+    "vldap1_lane_u64",
+    "vldap1q_lane_u64",
+    "vldap1_lane_p64",
+    "vldap1q_lane_p64",
+    "vluti2_lane_f16",
+    "vluti2q_lane_f16",
+    "vluti2_lane_u8",
+    "vluti2q_lane_u8",
+    "vluti2_lane_u16",
+    "vluti2q_lane_u16",
+    "vluti2_lane_p8",
+    "vluti2q_lane_p8",
+    "vluti2_lane_p16",
+    "vluti2q_lane_p16",
+    "vluti2_lane_s8",
+    "vluti2q_lane_s8",
+    "vluti2_lane_s16",
+    "vluti2q_lane_s16",
+    "vluti2_laneq_f16",
+    "vluti2q_laneq_f16",
+    "vluti2_laneq_u8",
+    "vluti2q_laneq_u8",
+    "vluti2_laneq_u16",
+    "vluti2q_laneq_u16",
+    "vluti2_laneq_p8",
+    "vluti2q_laneq_p8",
+    "vluti2_laneq_p16",
+    "vluti2q_laneq_p16",
+    "vluti2_laneq_s8",
+    "vluti2q_laneq_s8",
+    "vluti2_laneq_s16",
+    "vluti2q_laneq_s16",
+    "vluti4q_lane_f16_x2",
+    "vluti4q_lane_u16_x2",
+    "vluti4q_lane_p16_x2",
+    "vluti4q_lane_s16_x2",
+    "vluti4q_lane_s8",
+    "vluti4q_lane_u8",
+    "vluti4q_lane_p8",
+    "vluti4q_laneq_f16_x2",
+    "vluti4q_laneq_u16_x2",
+    "vluti4q_laneq_p16_x2",
+    "vluti4q_laneq_s16_x2",
+    "vluti4q_laneq_s8",
+    "vluti4q_laneq_u8",
+    "vluti4q_laneq_p8",
+    "vmax_f64",
+    "vmaxq_f64",
+    "vmaxh_f16",
+    "vmaxnm_f64",
+    "vmaxnmq_f64",
+    "vmaxnmh_f16",
+    "vmaxnmv_f16",
+    "vmaxnmvq_f16",
+    "vmaxnmv_f32",
+    "vmaxnmvq_f64",
+    "vmaxnmvq_f32",
+    "vmaxv_f16",
+    "vmaxvq_f16",
+    "vmaxv_f32",
+    "vmaxvq_f32",
+    "vmaxvq_f64",
+    "vmaxv_s8",
+    "vmaxvq_s8",
+    "vmaxv_s16",
+    "vmaxvq_s16",
+    "vmaxv_s32",
+    "vmaxvq_s32",
+    "vmaxv_u8",
+    "vmaxvq_u8",
+    "vmaxv_u16",
+    "vmaxvq_u16",
+    "vmaxv_u32",
+    "vmaxvq_u32",
+    "vmin_f64",
+    "vminq_f64",
+    "vminh_f16",
+    "vminnm_f64",
+    "vminnmq_f64",
+    "vminnmh_f16",
+    "vminnmv_f16",
+    "vminnmvq_f16",
+    "vminnmv_f32",
+    "vminnmvq_f64",
+    "vminnmvq_f32",
+    "vminv_f16",
+    "vminvq_f16",
+    "vminv_f32",
+    "vminvq_f32",
+    "vminvq_f64",
+    "vminv_s8",
+    "vminvq_s8",
+    "vminv_s16",
+    "vminvq_s16",
+    "vminv_s32",
+    "vminvq_s32",
+    "vminv_u8",
+    "vminvq_u8",
+    "vminv_u16",
+    "vminvq_u16",
+    "vminv_u32",
+    "vminvq_u32",
+    "vmla_f64",
+    "vmlaq_f64",
+    "vmlal_high_lane_s16",
+    "vmlal_high_laneq_s16",
+    "vmlal_high_lane_s32",
+    "vmlal_high_laneq_s32",
+    "vmlal_high_lane_u16",
+    "vmlal_high_laneq_u16",
+    "vmlal_high_lane_u32",
+    "vmlal_high_laneq_u32",
+    "vmlal_high_n_s16",
+    "vmlal_high_n_s32",
+    "vmlal_high_n_u16",
+    "vmlal_high_n_u32",
+    "vmlal_high_s8",
+    "vmlal_high_s16",
+    "vmlal_high_s32",
+    "vmlal_high_u8",
+    "vmlal_high_u16",
+    "vmlal_high_u32",
+    "vmls_f64",
+    "vmlsq_f64",
+    "vmlsl_high_lane_s16",
+    "vmlsl_high_laneq_s16",
+    "vmlsl_high_lane_s32",
+    "vmlsl_high_laneq_s32",
+    "vmlsl_high_lane_u16",
+    "vmlsl_high_laneq_u16",
+    "vmlsl_high_lane_u32",
+    "vmlsl_high_laneq_u32",
+    "vmlsl_high_n_s16",
+    "vmlsl_high_n_s32",
+    "vmlsl_high_n_u16",
+    "vmlsl_high_n_u32",
+    "vmlsl_high_s8",
+    "vmlsl_high_s16",
+    "vmlsl_high_s32",
+    "vmlsl_high_u8",
+    "vmlsl_high_u16",
+    "vmlsl_high_u32",
+    "vmovl_high_s8",
+    "vmovl_high_s16",
+    "vmovl_high_s32",
+    "vmovl_high_u8",
+    "vmovl_high_u16",
+    "vmovl_high_u32",
+    "vmovn_high_s16",
+    "vmovn_high_s32",
+    "vmovn_high_s64",
+    "vmovn_high_u16",
+    "vmovn_high_u32",
+    "vmovn_high_u64",
+    "vmul_f64",
+    "vmulq_f64",
+    "vmul_lane_f64",
+    "vmul_laneq_f16",
+    "vmulq_laneq_f16",
+    "vmul_laneq_f64",
+    "vmul_n_f64",
+    "vmulq_n_f64",
+    "vmuld_lane_f64",
+    "vmulh_f16",
+    "vmulh_lane_f16",
+    "vmulh_laneq_f16",
+    "vmull_high_lane_s16",
+    "vmull_high_laneq_s16",
+    "vmull_high_lane_s32",
+    "vmull_high_laneq_s32",
+    "vmull_high_lane_u16",
+    "vmull_high_laneq_u16",
+    "vmull_high_lane_u32",
+    "vmull_high_laneq_u32",
+    "vmull_high_n_s16",
+    "vmull_high_n_s32",
+    "vmull_high_n_u16",
+    "vmull_high_n_u32",
+    "vmull_high_p64",
+    "vmull_high_p8",
+    "vmull_high_s8",
+    "vmull_high_s16",
+    "vmull_high_s32",
+    "vmull_high_u8",
+    "vmull_high_u16",
+    "vmull_high_u32",
+    "vmull_p64",
+    "vmulq_lane_f64",
+    "vmulq_laneq_f64",
+    "vmuls_lane_f32",
+    "vmuls_laneq_f32",
+    "vmuld_laneq_f64",
+    "vmulx_f16",
+    "vmulxq_f16",
+    "vmulx_f32",
+    "vmulxq_f32",
+    "vmulx_f64",
+    "vmulxq_f64",
+    "vmulx_lane_f16",
+    "vmulx_laneq_f16",
+    "vmulxq_lane_f16",
+    "vmulxq_laneq_f16",
+    "vmulx_lane_f32",
+    "vmulx_laneq_f32",
+    "vmulxq_lane_f32",
+    "vmulxq_laneq_f32",
+    "vmulxq_laneq_f64",
+    "vmulx_lane_f64",
+    "vmulx_laneq_f64",
+    "vmulx_n_f16",
+    "vmulxq_n_f16",
+    "vmulxd_f64",
+    "vmulxs_f32",
+    "vmulxd_lane_f64",
+    "vmulxd_laneq_f64",
+    "vmulxs_lane_f32",
+    "vmulxs_laneq_f32",
+    "vmulxh_f16",
+    "vmulxh_lane_f16",
+    "vmulxh_laneq_f16",
+    "vmulxq_lane_f64",
+    "vneg_f64",
+    "vnegq_f64",
+    "vneg_s64",
+    "vnegq_s64",
+    "vnegd_s64",
+    "vnegh_f16",
+    "vpaddd_f64",
+    "vpadds_f32",
+    "vpaddd_s64",
+    "vpaddd_u64",
+    "vpaddq_f16",
+    "vpaddq_f32",
+    "vpaddq_f64",
+    "vpaddq_s8",
+    "vpaddq_s16",
+    "vpaddq_s32",
+    "vpaddq_s64",
+    "vpaddq_u8",
+    "vpaddq_u16",
+    "vpaddq_u32",
+    "vpaddq_u64",
+    "vpmax_f16",
+    "vpmaxq_f16",
+    "vpmaxnm_f16",
+    "vpmaxnmq_f16",
+    "vpmaxnm_f32",
+    "vpmaxnmq_f32",
+    "vpmaxnmq_f64",
+    "vpmaxnmqd_f64",
+    "vpmaxnms_f32",
+    "vpmaxq_s8",
+    "vpmaxq_s16",
+    "vpmaxq_s32",
+    "vpmaxq_u8",
+    "vpmaxq_u16",
+    "vpmaxq_u32",
+    "vpmaxqd_f64",
+    "vpmaxs_f32",
+    "vpmin_f16",
+    "vpminq_f16",
+    "vpminnm_f16",
+    "vpminnmq_f16",
+    "vpminnm_f32",
+    "vpminnmq_f32",
+    "vpminnmq_f64",
+    "vpminnmqd_f64",
+    "vpminnms_f32",
+    "vpminq_s8",
+    "vpminq_s16",
+    "vpminq_s32",
+    "vpminq_u8",
+    "vpminq_u16",
+    "vpminq_u32",
+    "vpminqd_f64",
+    "vpmins_f32",
+    "vqabs_s64",
+    "vqabsq_s64",
+    "vqabsb_s8",
+    "vqabsh_s16",
+    "vqabss_s32",
+    "vqabsd_s64",
+    "vqaddb_s8",
+    "vqaddh_s16",
+    "vqaddb_u8",
+    "vqaddh_u16",
+    "vqadds_s32",
+    "vqaddd_s64",
+    "vqadds_u32",
+    "vqaddd_u64",
+    "vqdmlal_high_lane_s16",
+    "vqdmlal_high_laneq_s16",
+    "vqdmlal_high_lane_s32",
+    "vqdmlal_high_laneq_s32",
+    "vqdmlal_high_n_s16",
+    "vqdmlal_high_s16",
+    "vqdmlal_high_n_s32",
+    "vqdmlal_high_s32",
+    "vqdmlal_laneq_s16",
+    "vqdmlal_laneq_s32",
+    "vqdmlalh_lane_s16",
+    "vqdmlalh_laneq_s16",
+    "vqdmlals_lane_s32",
+    "vqdmlals_laneq_s32",
+    "vqdmlalh_s16",
+    "vqdmlals_s32",
+    "vqdmlsl_high_lane_s16",
+    "vqdmlsl_high_laneq_s16",
+    "vqdmlsl_high_lane_s32",
+    "vqdmlsl_high_laneq_s32",
+    "vqdmlsl_high_n_s16",
+    "vqdmlsl_high_s16",
+    "vqdmlsl_high_n_s32",
+    "vqdmlsl_high_s32",
+    "vqdmlsl_laneq_s16",
+    "vqdmlsl_laneq_s32",
+    "vqdmlslh_lane_s16",
+    "vqdmlslh_laneq_s16",
+    "vqdmlsls_lane_s32",
+    "vqdmlsls_laneq_s32",
+    "vqdmlslh_s16",
+    "vqdmlsls_s32",
+    "vqdmulh_lane_s16",
+    "vqdmulhq_lane_s16",
+    "vqdmulh_lane_s32",
+    "vqdmulhq_lane_s32",
+    "vqdmulhh_lane_s16",
+    "vqdmulhh_laneq_s16",
+    "vqdmulhh_s16",
+    "vqdmulhs_s32",
+    "vqdmulhs_lane_s32",
+    "vqdmulhs_laneq_s32",
+    "vqdmull_high_lane_s16",
+    "vqdmull_high_laneq_s32",
+    "vqdmull_high_lane_s32",
+    "vqdmull_high_laneq_s16",
+    "vqdmull_high_n_s16",
+    "vqdmull_high_n_s32",
+    "vqdmull_high_s16",
+    "vqdmull_high_s32",
+    "vqdmull_laneq_s16",
+    "vqdmull_laneq_s32",
+    "vqdmullh_lane_s16",
+    "vqdmulls_laneq_s32",
+    "vqdmullh_laneq_s16",
+    "vqdmullh_s16",
+    "vqdmulls_lane_s32",
+    "vqdmulls_s32",
+    "vqmovn_high_s16",
+    "vqmovn_high_s32",
+    "vqmovn_high_s64",
+    "vqmovn_high_u16",
+    "vqmovn_high_u32",
+    "vqmovn_high_u64",
+    "vqmovnd_s64",
+    "vqmovnd_u64",
+    "vqmovnh_s16",
+    "vqmovns_s32",
+    "vqmovnh_u16",
+    "vqmovns_u32",
+    "vqmovun_high_s16",
+    "vqmovun_high_s32",
+    "vqmovun_high_s64",
+    "vqmovunh_s16",
+    "vqmovuns_s32",
+    "vqmovund_s64",
+    "vqneg_s64",
+    "vqnegq_s64",
+    "vqnegb_s8",
+    "vqnegh_s16",
+    "vqnegs_s32",
+    "vqnegd_s64",
+    "vqrdmlah_lane_s16",
+    "vqrdmlah_lane_s32",
+    "vqrdmlah_laneq_s16",
+    "vqrdmlah_laneq_s32",
+    "vqrdmlahq_lane_s16",
+    "vqrdmlahq_lane_s32",
+    "vqrdmlahq_laneq_s16",
+    "vqrdmlahq_laneq_s32",
+    "vqrdmlah_s16",
+    "vqrdmlahq_s16",
+    "vqrdmlah_s32",
+    "vqrdmlahq_s32",
+    "vqrdmlahh_lane_s16",
+    "vqrdmlahh_laneq_s16",
+    "vqrdmlahs_lane_s32",
+    "vqrdmlahs_laneq_s32",
+    "vqrdmlahh_s16",
+    "vqrdmlahs_s32",
+    "vqrdmlsh_lane_s16",
+    "vqrdmlsh_lane_s32",
+    "vqrdmlsh_laneq_s16",
+    "vqrdmlsh_laneq_s32",
+    "vqrdmlshq_lane_s16",
+    "vqrdmlshq_lane_s32",
+    "vqrdmlshq_laneq_s16",
+    "vqrdmlshq_laneq_s32",
+    "vqrdmlsh_s16",
+    "vqrdmlshq_s16",
+    "vqrdmlsh_s32",
+    "vqrdmlshq_s32",
+    "vqrdmlshh_lane_s16",
+    "vqrdmlshh_laneq_s16",
+    "vqrdmlshs_lane_s32",
+    "vqrdmlshs_laneq_s32",
+    "vqrdmlshh_s16",
+    "vqrdmlshs_s32",
+    "vqrdmulhh_lane_s16",
+    "vqrdmulhh_laneq_s16",
+    "vqrdmulhs_lane_s32",
+    "vqrdmulhs_laneq_s32",
+    "vqrdmulhh_s16",
+    "vqrdmulhs_s32",
+    "vqrshlb_s8",
+    "vqrshlh_s16",
+    "vqrshlb_u8",
+    "vqrshlh_u16",
+    "vqrshld_s64",
+    "vqrshls_s32",
+    "vqrshls_u32",
+    "vqrshld_u64",
+    "vqrshrn_high_n_s16",
+    "vqrshrn_high_n_s32",
+    "vqrshrn_high_n_s64",
+    "vqrshrn_high_n_u16",
+    "vqrshrn_high_n_u32",
+    "vqrshrn_high_n_u64",
+    "vqrshrnd_n_u64",
+    "vqrshrnh_n_u16",
+    "vqrshrns_n_u32",
+    "vqrshrnh_n_s16",
+    "vqrshrns_n_s32",
+    "vqrshrnd_n_s64",
+    "vqrshrun_high_n_s16",
+    "vqrshrun_high_n_s32",
+    "vqrshrun_high_n_s64",
+    "vqrshrund_n_s64",
+    "vqrshrunh_n_s16",
+    "vqrshruns_n_s32",
+    "vqshlb_n_s8",
+    "vqshld_n_s64",
+    "vqshlh_n_s16",
+    "vqshls_n_s32",
+    "vqshlb_n_u8",
+    "vqshld_n_u64",
+    "vqshlh_n_u16",
+    "vqshls_n_u32",
+    "vqshlb_s8",
+    "vqshlh_s16",
+    "vqshls_s32",
+    "vqshlb_u8",
+    "vqshlh_u16",
+    "vqshls_u32",
+    "vqshld_s64",
+    "vqshld_u64",
+    "vqshlub_n_s8",
+    "vqshlud_n_s64",
+    "vqshluh_n_s16",
+    "vqshlus_n_s32",
+    "vqshrn_high_n_s16",
+    "vqshrn_high_n_s32",
+    "vqshrn_high_n_s64",
+    "vqshrn_high_n_u16",
+    "vqshrn_high_n_u32",
+    "vqshrn_high_n_u64",
+    "vqshrnd_n_s64",
+    "vqshrnd_n_u64",
+    "vqshrnh_n_s16",
+    "vqshrns_n_s32",
+    "vqshrnh_n_u16",
+    "vqshrns_n_u32",
+    "vqshrun_high_n_s16",
+    "vqshrun_high_n_s32",
+    "vqshrun_high_n_s64",
+    "vqshrund_n_s64",
+    "vqshrunh_n_s16",
+    "vqshruns_n_s32",
+    "vqsubb_s8",
+    "vqsubh_s16",
+    "vqsubb_u8",
+    "vqsubh_u16",
+    "vqsubs_s32",
+    "vqsubd_s64",
+    "vqsubs_u32",
+    "vqsubd_u64",
+    "vrax1q_u64",
+    "vrbit_s8",
+    "vrbitq_s8",
+    "vrbit_u8",
+    "vrbit_u8",
+    "vrbitq_u8",
+    "vrbitq_u8",
+    "vrbit_p8",
+    "vrbit_p8",
+    "vrbitq_p8",
+    "vrbitq_p8",
+    "vrecpe_f64",
+    "vrecpeq_f64",
+    "vrecped_f64",
+    "vrecpes_f32",
+    "vrecpeh_f16",
+    "vrecps_f64",
+    "vrecpsq_f64",
+    "vrecpsd_f64",
+    "vrecpss_f32",
+    "vrecpsh_f16",
+    "vrecpxd_f64",
+    "vrecpxs_f32",
+    "vrecpxh_f16",
+    "vreinterpret_f64_f16",
+    "vreinterpret_f64_f16",
+    "vreinterpretq_f64_f16",
+    "vreinterpretq_f64_f16",
+    "vreinterpret_f16_f64",
+    "vreinterpret_f16_f64",
+    "vreinterpretq_f16_f64",
+    "vreinterpretq_f16_f64",
+    "vreinterpretq_f64_p128",
+    "vreinterpretq_f64_p128",
+    "vreinterpret_f64_f32",
+    "vreinterpret_f64_f32",
+    "vreinterpret_p64_f32",
+    "vreinterpret_p64_f32",
+    "vreinterpretq_f64_f32",
+    "vreinterpretq_f64_f32",
+    "vreinterpretq_p64_f32",
+    "vreinterpretq_p64_f32",
+    "vreinterpret_f32_f64",
+    "vreinterpret_f32_f64",
+    "vreinterpret_s8_f64",
+    "vreinterpret_s8_f64",
+    "vreinterpret_s16_f64",
+    "vreinterpret_s16_f64",
+    "vreinterpret_s32_f64",
+    "vreinterpret_s32_f64",
+    "vreinterpret_s64_f64",
+    "vreinterpret_u8_f64",
+    "vreinterpret_u8_f64",
+    "vreinterpret_u16_f64",
+    "vreinterpret_u16_f64",
+    "vreinterpret_u32_f64",
+    "vreinterpret_u32_f64",
+    "vreinterpret_u64_f64",
+    "vreinterpret_p8_f64",
+    "vreinterpret_p8_f64",
+    "vreinterpret_p16_f64",
+    "vreinterpret_p16_f64",
+    "vreinterpret_p64_f64",
+    "vreinterpretq_p128_f64",
+    "vreinterpretq_p128_f64",
+    "vreinterpretq_f32_f64",
+    "vreinterpretq_f32_f64",
+    "vreinterpretq_s8_f64",
+    "vreinterpretq_s8_f64",
+    "vreinterpretq_s16_f64",
+    "vreinterpretq_s16_f64",
+    "vreinterpretq_s32_f64",
+    "vreinterpretq_s32_f64",
+    "vreinterpretq_s64_f64",
+    "vreinterpretq_s64_f64",
+    "vreinterpretq_u8_f64",
+    "vreinterpretq_u8_f64",
+    "vreinterpretq_u16_f64",
+    "vreinterpretq_u16_f64",
+    "vreinterpretq_u32_f64",
+    "vreinterpretq_u32_f64",
+    "vreinterpretq_u64_f64",
+    "vreinterpretq_u64_f64",
+    "vreinterpretq_p8_f64",
+    "vreinterpretq_p8_f64",
+    "vreinterpretq_p16_f64",
+    "vreinterpretq_p16_f64",
+    "vreinterpretq_p64_f64",
+    "vreinterpretq_p64_f64",
+    "vreinterpret_f64_s8",
+    "vreinterpret_f64_s8",
+    "vreinterpretq_f64_s8",
+    "vreinterpretq_f64_s8",
+    "vreinterpret_f64_s16",
+    "vreinterpret_f64_s16",
+    "vreinterpretq_f64_s16",
+    "vreinterpretq_f64_s16",
+    "vreinterpret_f64_s32",
+    "vreinterpret_f64_s32",
+    "vreinterpretq_f64_s32",
+    "vreinterpretq_f64_s32",
+    "vreinterpret_f64_s64",
+    "vreinterpret_p64_s64",
+    "vreinterpretq_f64_s64",
+    "vreinterpretq_f64_s64",
+    "vreinterpretq_p64_s64",
+    "vreinterpretq_p64_s64",
+    "vreinterpret_f64_u8",
+    "vreinterpret_f64_u8",
+    "vreinterpretq_f64_u8",
+    "vreinterpretq_f64_u8",
+    "vreinterpret_f64_u16",
+    "vreinterpret_f64_u16",
+    "vreinterpretq_f64_u16",
+    "vreinterpretq_f64_u16",
+    "vreinterpret_f64_u32",
+    "vreinterpret_f64_u32",
+    "vreinterpretq_f64_u32",
+    "vreinterpretq_f64_u32",
+    "vreinterpret_f64_u64",
+    "vreinterpret_p64_u64",
+    "vreinterpretq_f64_u64",
+    "vreinterpretq_f64_u64",
+    "vreinterpretq_p64_u64",
+    "vreinterpretq_p64_u64",
+    "vreinterpret_f64_p8",
+    "vreinterpret_f64_p8",
+    "vreinterpretq_f64_p8",
+    "vreinterpretq_f64_p8",
+    "vreinterpret_f64_p16",
+    "vreinterpret_f64_p16",
+    "vreinterpretq_f64_p16",
+    "vreinterpretq_f64_p16",
+    "vreinterpret_f32_p64",
+    "vreinterpret_f32_p64",
+    "vreinterpret_f64_p64",
+    "vreinterpret_s64_p64",
+    "vreinterpret_u64_p64",
+    "vreinterpretq_f32_p64",
+    "vreinterpretq_f32_p64",
+    "vreinterpretq_f64_p64",
+    "vreinterpretq_f64_p64",
+    "vreinterpretq_s64_p64",
+    "vreinterpretq_s64_p64",
+    "vreinterpretq_u64_p64",
+    "vreinterpretq_u64_p64",
+    "vrnd32x_f32",
+    "vrnd32xq_f32",
+    "vrnd32xq_f64",
+    "vrnd32x_f64",
+    "vrnd32z_f32",
+    "vrnd32zq_f32",
+    "vrnd32zq_f64",
+    "vrnd32z_f64",
+    "vrnd64x_f32",
+    "vrnd64xq_f32",
+    "vrnd64xq_f64",
+    "vrnd64x_f64",
+    "vrnd64z_f32",
+    "vrnd64zq_f32",
+    "vrnd64zq_f64",
+    "vrnd64z_f64",
+    "vrnd_f16",
+    "vrndq_f16",
+    "vrnd_f32",
+    "vrndq_f32",
+    "vrnd_f64",
+    "vrndq_f64",
+    "vrnda_f16",
+    "vrndaq_f16",
+    "vrnda_f32",
+    "vrndaq_f32",
+    "vrnda_f64",
+    "vrndaq_f64",
+    "vrndah_f16",
+    "vrndh_f16",
+    "vrndi_f16",
+    "vrndiq_f16",
+    "vrndi_f32",
+    "vrndiq_f32",
+    "vrndi_f64",
+    "vrndiq_f64",
+    "vrndih_f16",
+    "vrndm_f16",
+    "vrndmq_f16",
+    "vrndm_f32",
+    "vrndmq_f32",
+    "vrndm_f64",
+    "vrndmq_f64",
+    "vrndmh_f16",
+    "vrndn_f64",
+    "vrndnq_f64",
+    "vrndnh_f16",
+    "vrndns_f32",
+    "vrndp_f16",
+    "vrndpq_f16",
+    "vrndp_f32",
+    "vrndpq_f32",
+    "vrndp_f64",
+    "vrndpq_f64",
+    "vrndph_f16",
+    "vrndx_f16",
+    "vrndxq_f16",
+    "vrndx_f32",
+    "vrndxq_f32",
+    "vrndx_f64",
+    "vrndxq_f64",
+    "vrndxh_f16",
+    "vrshld_s64",
+    "vrshld_u64",
+    "vrshrd_n_s64",
+    "vrshrd_n_u64",
+    "vrshrn_high_n_s16",
+    "vrshrn_high_n_s32",
+    "vrshrn_high_n_s64",
+    "vrshrn_high_n_u16",
+    "vrshrn_high_n_u32",
+    "vrshrn_high_n_u64",
+    "vrsqrte_f64",
+    "vrsqrteq_f64",
+    "vrsqrted_f64",
+    "vrsqrtes_f32",
+    "vrsqrteh_f16",
+    "vrsqrts_f64",
+    "vrsqrtsq_f64",
+    "vrsqrtsd_f64",
+    "vrsqrtss_f32",
+    "vrsqrtsh_f16",
+    "vrsrad_n_s64",
+    "vrsrad_n_u64",
+    "vrsubhn_high_s16",
+    "vrsubhn_high_s32",
+    "vrsubhn_high_s64",
+    "vrsubhn_high_u16",
+    "vrsubhn_high_u32",
+    "vrsubhn_high_u64",
+    "vrsubhn_high_s16",
+    "vrsubhn_high_s32",
+    "vrsubhn_high_s64",
+    "vrsubhn_high_u16",
+    "vrsubhn_high_u32",
+    "vrsubhn_high_u64",
+    "vscale_f16",
+    "vscaleq_f16",
+    "vscale_f32",
+    "vscaleq_f32",
+    "vscaleq_f64",
+    "vset_lane_f64",
+    "vsetq_lane_f64",
+    "vsha512h2q_u64",
+    "vsha512hq_u64",
+    "vsha512su0q_u64",
+    "vsha512su1q_u64",
+    "vshld_s64",
+    "vshld_u64",
+    "vshll_high_n_s8",
+    "vshll_high_n_s16",
+    "vshll_high_n_s32",
+    "vshll_high_n_u8",
+    "vshll_high_n_u16",
+    "vshll_high_n_u32",
+    "vshrn_high_n_s16",
+    "vshrn_high_n_s32",
+    "vshrn_high_n_s64",
+    "vshrn_high_n_u16",
+    "vshrn_high_n_u32",
+    "vshrn_high_n_u64",
+    "vslid_n_s64",
+    "vslid_n_u64",
+    "vsm3partw1q_u32",
+    "vsm3partw2q_u32",
+    "vsm3ss1q_u32",
+    "vsm3tt1aq_u32",
+    "vsm3tt1bq_u32",
+    "vsm3tt2aq_u32",
+    "vsm3tt2bq_u32",
+    "vsm4ekeyq_u32",
+    "vsm4eq_u32",
+    "vsqadd_u8",
+    "vsqaddq_u8",
+    "vsqadd_u16",
+    "vsqaddq_u16",
+    "vsqadd_u32",
+    "vsqaddq_u32",
+    "vsqadd_u64",
+    "vsqaddq_u64",
+    "vsqaddb_u8",
+    "vsqaddh_u16",
+    "vsqaddd_u64",
+    "vsqadds_u32",
+    "vsqrt_f16",
+    "vsqrtq_f16",
+    "vsqrt_f32",
+    "vsqrtq_f32",
+    "vsqrt_f64",
+    "vsqrtq_f64",
+    "vsqrth_f16",
+    "vsrid_n_s64",
+    "vsrid_n_u64",
+    "vst1_f16",
+    "vst1q_f16",
+    "vst1_f64_x2",
+    "vst1q_f64_x2",
+    "vst1_f64_x3",
+    "vst1q_f64_x3",
+    "vst1_f64_x4",
+    "vst1q_f64_x4",
+    "vst1_lane_f64",
+    "vst1q_lane_f64",
+    "vst2_f64",
+    "vst2_lane_f64",
+    "vst2_lane_s64",
+    "vst2_lane_p64",
+    "vst2_lane_u64",
+    "vst2q_f64",
+    "vst2q_s64",
+    "vst2q_lane_f64",
+    "vst2q_lane_s8",
+    "vst2q_lane_s64",
+    "vst2q_lane_p64",
+    "vst2q_lane_u8",
+    "vst2q_lane_u64",
+    "vst2q_lane_p8",
+    "vst2q_p64",
+    "vst2q_u64",
+    "vst3_f64",
+    "vst3_lane_f64",
+    "vst3_lane_s64",
+    "vst3_lane_p64",
+    "vst3_lane_u64",
+    "vst3q_f64",
+    "vst3q_s64",
+    "vst3q_lane_f64",
+    "vst3q_lane_s8",
+    "vst3q_lane_s64",
+    "vst3q_lane_p64",
+    "vst3q_lane_u8",
+    "vst3q_lane_u64",
+    "vst3q_lane_p8",
+    "vst3q_p64",
+    "vst3q_u64",
+    "vst4_f64",
+    "vst4_lane_f64",
+    "vst4_lane_s64",
+    "vst4_lane_p64",
+    "vst4_lane_u64",
+    "vst4q_f64",
+    "vst4q_s64",
+    "vst4q_lane_f64",
+    "vst4q_lane_s8",
+    "vst4q_lane_s64",
+    "vst4q_lane_p64",
+    "vst4q_lane_u8",
+    "vst4q_lane_u64",
+    "vst4q_lane_p8",
+    "vst4q_p64",
+    "vst4q_u64",
+    "vstl1_lane_f64",
+    "vstl1q_lane_f64",
+    "vstl1_lane_u64",
+    "vstl1q_lane_u64",
+    "vstl1_lane_p64",
+    "vstl1q_lane_p64",
+    "vstl1_lane_s64",
+    "vstl1q_lane_s64",
+    "vsub_f64",
+    "vsubq_f64",
+    "vsubd_s64",
+    "vsubd_u64",
+    "vsubh_f16",
+    "vsubl_high_s8",
+    "vsubl_high_s16",
+    "vsubl_high_s32",
+    "vsubl_high_u8",
+    "vsubl_high_u16",
+    "vsubl_high_u32",
+    "vsubw_high_s8",
+    "vsubw_high_s16",
+    "vsubw_high_s32",
+    "vsubw_high_u8",
+    "vsubw_high_u16",
+    "vsubw_high_u32",
+    "vtrn1_f16",
+    "vtrn1q_f16",
+    "vtrn1_f32",
+    "vtrn1q_f64",
+    "vtrn1_s32",
+    "vtrn1q_s64",
+    "vtrn1_u32",
+    "vtrn1q_u64",
+    "vtrn1q_p64",
+    "vtrn1q_f32",
+    "vtrn1_s8",
+    "vtrn1q_s8",
+    "vtrn1_s16",
+    "vtrn1q_s16",
+    "vtrn1q_s32",
+    "vtrn1_u8",
+    "vtrn1q_u8",
+    "vtrn1_u16",
+    "vtrn1q_u16",
+    "vtrn1q_u32",
+    "vtrn1_p8",
+    "vtrn1q_p8",
+    "vtrn1_p16",
+    "vtrn1q_p16",
+    "vtrn2_f16",
+    "vtrn2q_f16",
+    "vtrn2_f32",
+    "vtrn2q_f64",
+    "vtrn2_s32",
+    "vtrn2q_s64",
+    "vtrn2_u32",
+    "vtrn2q_u64",
+    "vtrn2q_p64",
+    "vtrn2q_f32",
+    "vtrn2_s8",
+    "vtrn2q_s8",
+    "vtrn2_s16",
+    "vtrn2q_s16",
+    "vtrn2q_s32",
+    "vtrn2_u8",
+    "vtrn2q_u8",
+    "vtrn2_u16",
+    "vtrn2q_u16",
+    "vtrn2q_u32",
+    "vtrn2_p8",
+    "vtrn2q_p8",
+    "vtrn2_p16",
+    "vtrn2q_p16",
+    "vtst_s64",
+    "vtstq_s64",
+    "vtst_p64",
+    "vtstq_p64",
+    "vtst_u64",
+    "vtstq_u64",
+    "vtstd_s64",
+    "vtstd_u64",
+    "vuqadd_s8",
+    "vuqaddq_s8",
+    "vuqadd_s16",
+    "vuqaddq_s16",
+    "vuqadd_s32",
+    "vuqaddq_s32",
+    "vuqadd_s64",
+    "vuqaddq_s64",
+    "vuqaddb_s8",
+    "vuqaddh_s16",
+    "vuqaddd_s64",
+    "vuqadds_s32",
+    "vuzp1_f16",
+    "vuzp1q_f16",
+    "vuzp1_f32",
+    "vuzp1q_f64",
+    "vuzp1_s32",
+    "vuzp1q_s64",
+    "vuzp1_u32",
+    "vuzp1q_u64",
+    "vuzp1q_p64",
+    "vuzp1q_f32",
+    "vuzp1_s8",
+    "vuzp1q_s8",
+    "vuzp1_s16",
+    "vuzp1q_s16",
+    "vuzp1q_s32",
+    "vuzp1_u8",
+    "vuzp1q_u8",
+    "vuzp1_u16",
+    "vuzp1q_u16",
+    "vuzp1q_u32",
+    "vuzp1_p8",
+    "vuzp1q_p8",
+    "vuzp1_p16",
+    "vuzp1q_p16",
+    "vuzp2_f16",
+    "vuzp2q_f16",
+    "vuzp2_f32",
+    "vuzp2q_f64",
+    "vuzp2_s32",
+    "vuzp2q_s64",
+    "vuzp2_u32",
+    "vuzp2q_u64",
+    "vuzp2q_p64",
+    "vuzp2q_f32",
+    "vuzp2_s8",
+    "vuzp2q_s8",
+    "vuzp2_s16",
+    "vuzp2q_s16",
+    "vuzp2q_s32",
+    "vuzp2_u8",
+    "vuzp2q_u8",
+    "vuzp2_u16",
+    "vuzp2q_u16",
+    "vuzp2q_u32",
+    "vuzp2_p8",
+    "vuzp2q_p8",
+    "vuzp2_p16",
+    "vuzp2q_p16",
+    "vxarq_u64",
+    "vzip1_f16",
+    "vzip1q_f16",
+    "vzip1_f32",
+    "vzip1q_f32",
+    "vzip1q_f64",
+    "vzip1_s8",
+    "vzip1q_s8",
+    "vzip1_s16",
+    "vzip1q_s16",
+    "vzip1_s32",
+    "vzip1q_s32",
+    "vzip1q_s64",
+    "vzip1_u8",
+    "vzip1q_u8",
+    "vzip1_u16",
+    "vzip1q_u16",
+    "vzip1_u32",
+    "vzip1q_u32",
+    "vzip1q_u64",
+    "vzip1_p8",
+    "vzip1q_p8",
+    "vzip1_p16",
+    "vzip1q_p16",
+    "vzip1q_p64",
+    "vzip2_f16",
+    "vzip2q_f16",
+    "vzip2_f32",
+    "vzip2q_f32",
+    "vzip2q_f64",
+    "vzip2_s8",
+    "vzip2q_s8",
+    "vzip2_s16",
+    "vzip2q_s16",
+    "vzip2_s32",
+    "vzip2q_s32",
+    "vzip2q_s64",
+    "vzip2_u8",
+    "vzip2q_u8",
+    "vzip2_u16",
+    "vzip2q_u16",
+    "vzip2_u32",
+    "vzip2q_u32",
+    "vzip2q_u64",
+    "vzip2_p8",
+    "vzip2q_p8",
+    "vzip2_p16",
+    "vzip2q_p16",
+    "vzip2q_p64",
+    "__crc32b",
+    "__crc32cb",
+    "__crc32cd",
+    "__crc32ch",
+    "__crc32cw",
+    "__crc32d",
+    "__crc32h",
+    "__crc32w",
+    "vabal_s8",
+    "vabal_s16",
+    "vabal_s32",
+    "vabal_u8",
+    "vabal_u16",
+    "vabal_u32",
+    "vabd_f16",
+    "vabdq_f16",
+    "vabd_f32",
+    "vabdq_f32",
+    "vabd_s8",
+    "vabdq_s8",
+    "vabd_s16",
+    "vabdq_s16",
+    "vabd_s32",
+    "vabdq_s32",
+    "vabd_u8",
+    "vabdq_u8",
+    "vabd_u16",
+    "vabdq_u16",
+    "vabd_u32",
+    "vabdq_u32",
+    "vabdl_s8",
+    "vabdl_s16",
+    "vabdl_s32",
+    "vabdl_u8",
+    "vabdl_u16",
+    "vabdl_u32",
+    "vabs_f16",
+    "vabsq_f16",
+    "vabs_f32",
+    "vabsq_f32",
+    "vabs_s8",
+    "vabsq_s8",
+    "vabs_s16",
+    "vabsq_s16",
+    "vabs_s32",
+    "vabsq_s32",
+    "vabsh_f16",
+    "vadd_f16",
+    "vaddq_f16",
+    "vadd_p8",
+    "vaddq_p8",
+    "vadd_p16",
+    "vaddq_p16",
+    "vadd_p64",
+    "vaddq_p64",
+    "vaddh_f16",
+    "vaddhn_high_s16",
+    "vaddhn_high_s32",
+    "vaddhn_high_s64",
+    "vaddhn_high_u16",
+    "vaddhn_high_u32",
+    "vaddhn_high_u64",
+    "vaddhn_s16",
+    "vaddhn_s32",
+    "vaddhn_s64",
+    "vaddhn_u16",
+    "vaddhn_u32",
+    "vaddhn_u64",
+    "vaddq_p128",
+    "vaesdq_u8",
+    "vaeseq_u8",
+    "vaesimcq_u8",
+    "vaesmcq_u8",
+    "vbsl_f16",
+    "vbslq_f16",
+    "vcage_f16",
+    "vcageq_f16",
+    "vcage_f32",
+    "vcageq_f32",
+    "vcagt_f16",
+    "vcagtq_f16",
+    "vcagt_f32",
+    "vcagtq_f32",
+    "vcale_f16",
+    "vcaleq_f16",
+    "vcale_f32",
+    "vcaleq_f32",
+    "vcalt_f16",
+    "vcaltq_f16",
+    "vcalt_f32",
+    "vcaltq_f32",
+    "vceq_f16",
+    "vceqq_f16",
+    "vceq_p8",
+    "vceqq_p8",
+    "vcge_f16",
+    "vcgeq_f16",
+    "vcgez_f16",
+    "vcgezq_f16",
+    "vcgt_f16",
+    "vcgtq_f16",
+    "vcgtz_f16",
+    "vcgtzq_f16",
+    "vcle_f16",
+    "vcleq_f16",
+    "vclez_f16",
+    "vclezq_f16",
+    "vcls_s8",
+    "vclsq_s8",
+    "vcls_s16",
+    "vclsq_s16",
+    "vcls_s32",
+    "vclsq_s32",
+    "vcls_u8",
+    "vclsq_u8",
+    "vcls_u16",
+    "vclsq_u16",
+    "vcls_u32",
+    "vclsq_u32",
+    "vclt_f16",
+    "vcltq_f16",
+    "vcltz_f16",
+    "vcltzq_f16",
+    "vclz_s8",
+    "vclzq_s8",
+    "vclz_s16",
+    "vclzq_s16",
+    "vclz_s32",
+    "vclzq_s32",
+    "vclz_u16",
+    "vclz_u16",
+    "vclzq_u16",
+    "vclzq_u16",
+    "vclz_u32",
+    "vclz_u32",
+    "vclzq_u32",
+    "vclzq_u32",
+    "vclz_u8",
+    "vclz_u8",
+    "vclzq_u8",
+    "vclzq_u8",
+    "vcnt_s8",
+    "vcntq_s8",
+    "vcnt_u8",
+    "vcnt_u8",
+    "vcntq_u8",
+    "vcntq_u8",
+    "vcnt_p8",
+    "vcnt_p8",
+    "vcntq_p8",
+    "vcntq_p8",
+    "vcombine_f16",
+    "vcreate_f16",
+    "vcreate_f16",
+    "vcreate_f32",
+    "vcreate_f32",
+    "vcreate_s8",
+    "vcreate_s8",
+    "vcreate_s16",
+    "vcreate_s16",
+    "vcreate_s32",
+    "vcreate_s32",
+    "vcreate_s64",
+    "vcreate_u8",
+    "vcreate_u8",
+    "vcreate_u16",
+    "vcreate_u16",
+    "vcreate_u32",
+    "vcreate_u32",
+    "vcreate_u64",
+    "vcreate_p8",
+    "vcreate_p8",
+    "vcreate_p16",
+    "vcreate_p16",
+    "vcreate_p64",
+    "vcvt_f16_f32",
+    "vcvt_f16_s16",
+    "vcvtq_f16_s16",
+    "vcvt_f16_u16",
+    "vcvtq_f16_u16",
+    "vcvt_f32_f16",
+    "vcvt_f32_s32",
+    "vcvtq_f32_s32",
+    "vcvt_f32_u32",
+    "vcvtq_f32_u32",
+    "vcvt_n_f16_s16",
+    "vcvtq_n_f16_s16",
+    "vcvt_n_f16_u16",
+    "vcvtq_n_f16_u16",
+    "vcvt_n_f32_s32",
+    "vcvtq_n_f32_s32",
+    "vcvt_n_f32_s32",
+    "vcvtq_n_f32_s32",
+    "vcvt_n_f32_u32",
+    "vcvtq_n_f32_u32",
+    "vcvt_n_f32_u32",
+    "vcvtq_n_f32_u32",
+    "vcvt_n_s16_f16",
+    "vcvtq_n_s16_f16",
+    "vcvt_n_s32_f32",
+    "vcvtq_n_s32_f32",
+    "vcvt_n_s32_f32",
+    "vcvtq_n_s32_f32",
+    "vcvt_n_u16_f16",
+    "vcvtq_n_u16_f16",
+    "vcvt_n_u32_f32",
+    "vcvtq_n_u32_f32",
+    "vcvt_n_u32_f32",
+    "vcvtq_n_u32_f32",
+    "vcvt_s16_f16",
+    "vcvtq_s16_f16",
+    "vcvt_s32_f32",
+    "vcvtq_s32_f32",
+    "vcvt_u16_f16",
+    "vcvtq_u16_f16",
+    "vcvt_u32_f32",
+    "vcvtq_u32_f32",
+    "vdot_lane_s32",
+    "vdot_lane_s32",
+    "vdotq_lane_s32",
+    "vdotq_lane_s32",
+    "vdot_lane_u32",
+    "vdot_lane_u32",
+    "vdotq_lane_u32",
+    "vdotq_lane_u32",
+    "vdot_laneq_s32",
+    "vdot_laneq_s32",
+    "vdotq_laneq_s32",
+    "vdotq_laneq_s32",
+    "vdot_laneq_u32",
+    "vdot_laneq_u32",
+    "vdotq_laneq_u32",
+    "vdotq_laneq_u32",
+    "vdot_s32",
+    "vdotq_s32",
+    "vdot_u32",
+    "vdotq_u32",
+    "vdup_lane_f16",
+    "vdupq_lane_f16",
+    "vdup_lane_f32",
+    "vdup_lane_s32",
+    "vdup_lane_u32",
+    "vdupq_lane_f32",
+    "vdupq_lane_s32",
+    "vdupq_lane_u32",
+    "vdup_lane_p16",
+    "vdup_lane_s16",
+    "vdup_lane_u16",
+    "vdupq_lane_p16",
+    "vdupq_lane_s16",
+    "vdupq_lane_u16",
+    "vdup_lane_p8",
+    "vdup_lane_s8",
+    "vdup_lane_u8",
+    "vdupq_lane_p8",
+    "vdupq_lane_s8",
+    "vdupq_lane_u8",
+    "vdup_lane_s64",
+    "vdup_lane_u64",
+    "vdup_laneq_f16",
+    "vdupq_laneq_f16",
+    "vdup_laneq_f32",
+    "vdup_laneq_s32",
+    "vdup_laneq_u32",
+    "vdupq_laneq_f32",
+    "vdupq_laneq_s32",
+    "vdupq_laneq_u32",
+    "vdup_laneq_p16",
+    "vdup_laneq_s16",
+    "vdup_laneq_u16",
+    "vdupq_laneq_p16",
+    "vdupq_laneq_s16",
+    "vdupq_laneq_u16",
+    "vdup_laneq_p8",
+    "vdup_laneq_s8",
+    "vdup_laneq_u8",
+    "vdupq_laneq_p8",
+    "vdupq_laneq_s8",
+    "vdupq_laneq_u8",
+    "vdup_laneq_s64",
+    "vdup_laneq_u64",
+    "vdup_n_f16",
+    "vdupq_n_f16",
+    "vdupq_lane_s64",
+    "vdupq_lane_u64",
+    "vdupq_laneq_s64",
+    "vdupq_laneq_u64",
+    "vext_f16",
+    "vext_f32",
+    "vext_s32",
+    "vext_u32",
+    "vext_s8",
+    "vextq_s16",
+    "vext_u8",
+    "vextq_u16",
+    "vext_p8",
+    "vextq_p16",
+    "vextq_f16",
+    "vextq_f32",
+    "vext_s16",
+    "vextq_s32",
+    "vext_u16",
+    "vextq_u32",
+    "vext_p16",
+    "vextq_s64",
+    "vextq_u64",
+    "vextq_s8",
+    "vextq_u8",
+    "vextq_p8",
+    "vfma_f16",
+    "vfmaq_f16",
+    "vfma_f32",
+    "vfmaq_f32",
+    "vfma_n_f32",
+    "vfmaq_n_f32",
+    "vfms_f16",
+    "vfmsq_f16",
+    "vfms_f32",
+    "vfmsq_f32",
+    "vfms_n_f32",
+    "vfmsq_n_f32",
+    "vget_high_f16",
+    "vget_low_f16",
+    "vget_lane_f16",
+    "vgetq_lane_f16",
+    "vld1_dup_f16",
+    "vld1q_dup_f16",
+    "vld1_f16",
+    "vld1_f16",
+    "vld1q_f16",
+    "vld1q_f16",
+    "vld1_f16_x2",
+    "vld1_f16_x3",
+    "vld1_f16_x4",
+    "vld1q_f16_x2",
+    "vld1q_f16_x3",
+    "vld1q_f16_x4",
+    "vld1_f32_x2",
+    "vld1_f32_x3",
+    "vld1_f32_x4",
+    "vld1q_f32_x2",
+    "vld1q_f32_x3",
+    "vld1q_f32_x4",
+    "vld1_lane_f16",
+    "vld1q_lane_f16",
+    "vld1_p64_x2",
+    "vld1_p64_x3",
+    "vld1_p64_x4",
+    "vld1q_p64_x2",
+    "vld1q_p64_x3",
+    "vld1q_p64_x4",
+    "vld1_s8_x2",
+    "vld1_s8_x3",
+    "vld1_s8_x4",
+    "vld1q_s8_x2",
+    "vld1q_s8_x3",
+    "vld1q_s8_x4",
+    "vld1_s16_x2",
+    "vld1_s16_x3",
+    "vld1_s16_x4",
+    "vld1q_s16_x2",
+    "vld1q_s16_x3",
+    "vld1q_s16_x4",
+    "vld1_s32_x2",
+    "vld1_s32_x3",
+    "vld1_s32_x4",
+    "vld1q_s32_x2",
+    "vld1q_s32_x3",
+    "vld1q_s32_x4",
+    "vld1_s64_x2",
+    "vld1_s64_x3",
+    "vld1_s64_x4",
+    "vld1q_s64_x2",
+    "vld1q_s64_x3",
+    "vld1q_s64_x4",
+    "vld1_u8_x2",
+    "vld1_u8_x3",
+    "vld1_u8_x4",
+    "vld1q_u8_x2",
+    "vld1q_u8_x3",
+    "vld1q_u8_x4",
+    "vld1_u16_x2",
+    "vld1_u16_x3",
+    "vld1_u16_x4",
+    "vld1q_u16_x2",
+    "vld1q_u16_x3",
+    "vld1q_u16_x4",
+    "vld1_u32_x2",
+    "vld1_u32_x3",
+    "vld1_u32_x4",
+    "vld1q_u32_x2",
+    "vld1q_u32_x3",
+    "vld1q_u32_x4",
+    "vld1_u64_x2",
+    "vld1_u64_x3",
+    "vld1_u64_x4",
+    "vld1q_u64_x2",
+    "vld1q_u64_x3",
+    "vld1q_u64_x4",
+    "vld1_p8_x2",
+    "vld1_p8_x3",
+    "vld1_p8_x4",
+    "vld1q_p8_x2",
+    "vld1q_p8_x3",
+    "vld1q_p8_x4",
+    "vld1_p16_x2",
+    "vld1_p16_x3",
+    "vld1_p16_x4",
+    "vld1q_p16_x2",
+    "vld1q_p16_x3",
+    "vld1q_p16_x4",
+    "vld2_dup_f16",
+    "vld2q_dup_f16",
+    "vld2_dup_f16",
+    "vld2q_dup_f16",
+    "vld2_dup_f32",
+    "vld2q_dup_f32",
+    "vld2_dup_s8",
+    "vld2q_dup_s8",
+    "vld2_dup_s16",
+    "vld2q_dup_s16",
+    "vld2_dup_s32",
+    "vld2q_dup_s32",
+    "vld2_dup_f32",
+    "vld2q_dup_f32",
+    "vld2_dup_s8",
+    "vld2q_dup_s8",
+    "vld2_dup_s16",
+    "vld2q_dup_s16",
+    "vld2_dup_s32",
+    "vld2q_dup_s32",
+    "vld2_dup_p64",
+    "vld2_dup_s64",
+    "vld2_dup_s64",
+    "vld2_dup_u64",
+    "vld2_dup_u8",
+    "vld2_dup_u8",
+    "vld2q_dup_u8",
+    "vld2q_dup_u8",
+    "vld2_dup_u16",
+    "vld2_dup_u16",
+    "vld2q_dup_u16",
+    "vld2q_dup_u16",
+    "vld2_dup_u32",
+    "vld2_dup_u32",
+    "vld2q_dup_u32",
+    "vld2q_dup_u32",
+    "vld2_dup_p8",
+    "vld2_dup_p8",
+    "vld2q_dup_p8",
+    "vld2q_dup_p8",
+    "vld2_dup_p16",
+    "vld2_dup_p16",
+    "vld2q_dup_p16",
+    "vld2q_dup_p16",
+    "vld2_f16",
+    "vld2q_f16",
+    "vld2_f16",
+    "vld2q_f16",
+    "vld2_f32",
+    "vld2q_f32",
+    "vld2_s8",
+    "vld2q_s8",
+    "vld2_s16",
+    "vld2q_s16",
+    "vld2_s32",
+    "vld2q_s32",
+    "vld2_f32",
+    "vld2q_f32",
+    "vld2_s8",
+    "vld2q_s8",
+    "vld2_s16",
+    "vld2q_s16",
+    "vld2_s32",
+    "vld2q_s32",
+    "vld2_lane_f16",
+    "vld2q_lane_f16",
+    "vld2_lane_f16",
+    "vld2q_lane_f16",
+    "vld2_lane_f32",
+    "vld2q_lane_f32",
+    "vld2_lane_s8",
+    "vld2_lane_s16",
+    "vld2q_lane_s16",
+    "vld2_lane_s32",
+    "vld2q_lane_s32",
+    "vld2_lane_f32",
+    "vld2q_lane_f32",
+    "vld2q_lane_s16",
+    "vld2q_lane_s32",
+    "vld2_lane_s8",
+    "vld2_lane_s16",
+    "vld2_lane_s32",
+    "vld2_lane_u8",
+    "vld2_lane_u16",
+    "vld2q_lane_u16",
+    "vld2_lane_u32",
+    "vld2q_lane_u32",
+    "vld2_lane_p8",
+    "vld2_lane_p16",
+    "vld2q_lane_p16",
+    "vld2_p64",
+    "vld2_s64",
+    "vld2_s64",
+    "vld2_u64",
+    "vld2_u8",
+    "vld2q_u8",
+    "vld2_u16",
+    "vld2q_u16",
+    "vld2_u32",
+    "vld2q_u32",
+    "vld2_p8",
+    "vld2q_p8",
+    "vld2_p16",
+    "vld2q_p16",
+    "vld3_dup_f16",
+    "vld3q_dup_f16",
+    "vld3_dup_f16",
+    "vld3q_dup_f16",
+    "vld3_dup_f32",
+    "vld3q_dup_f32",
+    "vld3_dup_s8",
+    "vld3q_dup_s8",
+    "vld3_dup_s16",
+    "vld3q_dup_s16",
+    "vld3_dup_s32",
+    "vld3q_dup_s32",
+    "vld3_dup_s64",
+    "vld3_dup_f32",
+    "vld3q_dup_f32",
+    "vld3_dup_s8",
+    "vld3q_dup_s8",
+    "vld3_dup_s16",
+    "vld3q_dup_s16",
+    "vld3_dup_s32",
+    "vld3q_dup_s32",
+    "vld3_dup_p64",
+    "vld3_dup_s64",
+    "vld3_dup_u64",
+    "vld3_dup_u8",
+    "vld3_dup_u8",
+    "vld3q_dup_u8",
+    "vld3q_dup_u8",
+    "vld3_dup_u16",
+    "vld3_dup_u16",
+    "vld3q_dup_u16",
+    "vld3q_dup_u16",
+    "vld3_dup_u32",
+    "vld3_dup_u32",
+    "vld3q_dup_u32",
+    "vld3q_dup_u32",
+    "vld3_dup_p8",
+    "vld3_dup_p8",
+    "vld3q_dup_p8",
+    "vld3q_dup_p8",
+    "vld3_dup_p16",
+    "vld3_dup_p16",
+    "vld3q_dup_p16",
+    "vld3q_dup_p16",
+    "vld3_f16",
+    "vld3q_f16",
+    "vld3_f16",
+    "vld3q_f16",
+    "vld3_f32",
+    "vld3q_f32",
+    "vld3_s8",
+    "vld3q_s8",
+    "vld3_s16",
+    "vld3q_s16",
+    "vld3_s32",
+    "vld3q_s32",
+    "vld3_f32",
+    "vld3q_f32",
+    "vld3_s8",
+    "vld3q_s8",
+    "vld3_s16",
+    "vld3q_s16",
+    "vld3_s32",
+    "vld3q_s32",
+    "vld3_lane_f16",
+    "vld3q_lane_f16",
+    "vld3_lane_f16",
+    "vld3q_lane_f16",
+    "vld3_lane_f32",
+    "vld3q_lane_f32",
+    "vld3_lane_f32",
+    "vld3_lane_s8",
+    "vld3_lane_s16",
+    "vld3q_lane_s16",
+    "vld3_lane_s32",
+    "vld3q_lane_s32",
+    "vld3_lane_s8",
+    "vld3_lane_s16",
+    "vld3q_lane_s16",
+    "vld3_lane_s32",
+    "vld3q_lane_s32",
+    "vld3_lane_u8",
+    "vld3_lane_u16",
+    "vld3q_lane_u16",
+    "vld3_lane_u32",
+    "vld3q_lane_u32",
+    "vld3_lane_p8",
+    "vld3_lane_p16",
+    "vld3q_lane_p16",
+    "vld3_p64",
+    "vld3_s64",
+    "vld3_s64",
+    "vld3_u64",
+    "vld3_u8",
+    "vld3q_u8",
+    "vld3_u16",
+    "vld3q_u16",
+    "vld3_u32",
+    "vld3q_u32",
+    "vld3_p8",
+    "vld3q_p8",
+    "vld3_p16",
+    "vld3q_p16",
+    "vld3q_lane_f32",
+    "vld4_dup_f16",
+    "vld4q_dup_f16",
+    "vld4_dup_f16",
+    "vld4q_dup_f16",
+    "vld4_dup_f32",
+    "vld4q_dup_f32",
+    "vld4_dup_s8",
+    "vld4q_dup_s8",
+    "vld4_dup_s16",
+    "vld4q_dup_s16",
+    "vld4_dup_s32",
+    "vld4q_dup_s32",
+    "vld4_dup_f32",
+    "vld4q_dup_f32",
+    "vld4_dup_s8",
+    "vld4q_dup_s8",
+    "vld4_dup_s16",
+    "vld4q_dup_s16",
+    "vld4_dup_s32",
+    "vld4q_dup_s32",
+    "vld4_dup_s64",
+    "vld4_dup_p64",
+    "vld4_dup_s64",
+    "vld4_dup_u64",
+    "vld4_dup_u8",
+    "vld4_dup_u8",
+    "vld4q_dup_u8",
+    "vld4q_dup_u8",
+    "vld4_dup_u16",
+    "vld4_dup_u16",
+    "vld4q_dup_u16",
+    "vld4q_dup_u16",
+    "vld4_dup_u32",
+    "vld4_dup_u32",
+    "vld4q_dup_u32",
+    "vld4q_dup_u32",
+    "vld4_dup_p8",
+    "vld4_dup_p8",
+    "vld4q_dup_p8",
+    "vld4q_dup_p8",
+    "vld4_dup_p16",
+    "vld4_dup_p16",
+    "vld4q_dup_p16",
+    "vld4q_dup_p16",
+    "vld4_f16",
+    "vld4q_f16",
+    "vld4_f16",
+    "vld4q_f16",
+    "vld4_f32",
+    "vld4q_f32",
+    "vld4_s8",
+    "vld4q_s8",
+    "vld4_s16",
+    "vld4q_s16",
+    "vld4_s32",
+    "vld4q_s32",
+    "vld4_f32",
+    "vld4q_f32",
+    "vld4_s8",
+    "vld4q_s8",
+    "vld4_s16",
+    "vld4q_s16",
+    "vld4_s32",
+    "vld4q_s32",
+    "vld4_lane_f16",
+    "vld4q_lane_f16",
+    "vld4_lane_f16",
+    "vld4q_lane_f16",
+    "vld4_lane_f32",
+    "vld4q_lane_f32",
+    "vld4_lane_s8",
+    "vld4_lane_s16",
+    "vld4q_lane_s16",
+    "vld4_lane_s32",
+    "vld4q_lane_s32",
+    "vld4_lane_f32",
+    "vld4q_lane_f32",
+    "vld4_lane_s8",
+    "vld4_lane_s16",
+    "vld4q_lane_s16",
+    "vld4_lane_s32",
+    "vld4q_lane_s32",
+    "vld4_lane_u8",
+    "vld4_lane_u16",
+    "vld4q_lane_u16",
+    "vld4_lane_u32",
+    "vld4q_lane_u32",
+    "vld4_lane_p8",
+    "vld4_lane_p16",
+    "vld4q_lane_p16",
+    "vld4_p64",
+    "vld4_s64",
+    "vld4_s64",
+    "vld4_u64",
+    "vld4_u8",
+    "vld4q_u8",
+    "vld4_u16",
+    "vld4q_u16",
+    "vld4_u32",
+    "vld4q_u32",
+    "vld4_p8",
+    "vld4q_p8",
+    "vld4_p16",
+    "vld4q_p16",
+    "vmax_f16",
+    "vmaxq_f16",
+    "vmax_f32",
+    "vmaxq_f32",
+    "vmax_s8",
+    "vmaxq_s8",
+    "vmax_s16",
+    "vmaxq_s16",
+    "vmax_s32",
+    "vmaxq_s32",
+    "vmax_u8",
+    "vmaxq_u8",
+    "vmax_u16",
+    "vmaxq_u16",
+    "vmax_u32",
+    "vmaxq_u32",
+    "vmaxnm_f16",
+    "vmaxnmq_f16",
+    "vmaxnm_f32",
+    "vmaxnmq_f32",
+    "vmin_f16",
+    "vminq_f16",
+    "vmin_f32",
+    "vminq_f32",
+    "vmin_s8",
+    "vminq_s8",
+    "vmin_s16",
+    "vminq_s16",
+    "vmin_s32",
+    "vminq_s32",
+    "vmin_u8",
+    "vminq_u8",
+    "vmin_u16",
+    "vminq_u16",
+    "vmin_u32",
+    "vminq_u32",
+    "vminnm_f16",
+    "vminnmq_f16",
+    "vminnm_f32",
+    "vminnmq_f32",
+    "vmla_f32",
+    "vmlaq_f32",
+    "vmla_lane_f32",
+    "vmla_laneq_f32",
+    "vmlaq_lane_f32",
+    "vmlaq_laneq_f32",
+    "vmla_lane_s16",
+    "vmla_lane_u16",
+    "vmla_laneq_s16",
+    "vmla_laneq_u16",
+    "vmlaq_lane_s16",
+    "vmlaq_lane_u16",
+    "vmlaq_laneq_s16",
+    "vmlaq_laneq_u16",
+    "vmla_lane_s32",
+    "vmla_lane_u32",
+    "vmla_laneq_s32",
+    "vmla_laneq_u32",
+    "vmlaq_lane_s32",
+    "vmlaq_lane_u32",
+    "vmlaq_laneq_s32",
+    "vmlaq_laneq_u32",
+    "vmla_n_f32",
+    "vmlaq_n_f32",
+    "vmla_n_s16",
+    "vmlaq_n_s16",
+    "vmla_n_u16",
+    "vmlaq_n_u16",
+    "vmla_n_s32",
+    "vmlaq_n_s32",
+    "vmla_n_u32",
+    "vmlaq_n_u32",
+    "vmla_s8",
+    "vmlaq_s8",
+    "vmla_s16",
+    "vmlaq_s16",
+    "vmla_s32",
+    "vmlaq_s32",
+    "vmla_u8",
+    "vmlaq_u8",
+    "vmla_u16",
+    "vmlaq_u16",
+    "vmla_u32",
+    "vmlaq_u32",
+    "vmlal_lane_s16",
+    "vmlal_laneq_s16",
+    "vmlal_lane_s32",
+    "vmlal_laneq_s32",
+    "vmlal_lane_u16",
+    "vmlal_laneq_u16",
+    "vmlal_lane_u32",
+    "vmlal_laneq_u32",
+    "vmlal_n_s16",
+    "vmlal_n_s32",
+    "vmlal_n_u16",
+    "vmlal_n_u32",
+    "vmlal_s8",
+    "vmlal_s16",
+    "vmlal_s32",
+    "vmlal_u8",
+    "vmlal_u16",
+    "vmlal_u32",
+    "vmls_f32",
+    "vmlsq_f32",
+    "vmls_lane_f32",
+    "vmls_laneq_f32",
+    "vmlsq_lane_f32",
+    "vmlsq_laneq_f32",
+    "vmls_lane_s16",
+    "vmls_lane_u16",
+    "vmls_laneq_s16",
+    "vmls_laneq_u16",
+    "vmlsq_lane_s16",
+    "vmlsq_lane_u16",
+    "vmlsq_laneq_s16",
+    "vmlsq_laneq_u16",
+    "vmls_lane_s32",
+    "vmls_lane_u32",
+    "vmls_laneq_s32",
+    "vmls_laneq_u32",
+    "vmlsq_lane_s32",
+    "vmlsq_lane_u32",
+    "vmlsq_laneq_s32",
+    "vmlsq_laneq_u32",
+    "vmls_n_f32",
+    "vmlsq_n_f32",
+    "vmls_n_s16",
+    "vmlsq_n_s16",
+    "vmls_n_u16",
+    "vmlsq_n_u16",
+    "vmls_n_s32",
+    "vmlsq_n_s32",
+    "vmls_n_u32",
+    "vmlsq_n_u32",
+    "vmls_s8",
+    "vmlsq_s8",
+    "vmls_s16",
+    "vmlsq_s16",
+    "vmls_s32",
+    "vmlsq_s32",
+    "vmls_u8",
+    "vmlsq_u8",
+    "vmls_u16",
+    "vmlsq_u16",
+    "vmls_u32",
+    "vmlsq_u32",
+    "vmlsl_lane_s16",
+    "vmlsl_laneq_s16",
+    "vmlsl_lane_s32",
+    "vmlsl_laneq_s32",
+    "vmlsl_lane_u16",
+    "vmlsl_laneq_u16",
+    "vmlsl_lane_u32",
+    "vmlsl_laneq_u32",
+    "vmlsl_n_s16",
+    "vmlsl_n_s32",
+    "vmlsl_n_u16",
+    "vmlsl_n_u32",
+    "vmlsl_s8",
+    "vmlsl_s16",
+    "vmlsl_s32",
+    "vmlsl_u8",
+    "vmlsl_u16",
+    "vmlsl_u32",
+    "vmmlaq_s32",
+    "vmmlaq_u32",
+    "vmov_n_f16",
+    "vmovq_n_f16",
+    "vmul_f16",
+    "vmulq_f16",
+    "vmul_lane_f16",
+    "vmulq_lane_f16",
+    "vmul_lane_f32",
+    "vmul_laneq_f32",
+    "vmulq_lane_f32",
+    "vmulq_laneq_f32",
+    "vmul_lane_s16",
+    "vmulq_lane_s16",
+    "vmul_lane_s32",
+    "vmulq_lane_s32",
+    "vmul_lane_u16",
+    "vmulq_lane_u16",
+    "vmul_lane_u32",
+    "vmulq_lane_u32",
+    "vmul_laneq_s16",
+    "vmulq_laneq_s16",
+    "vmul_laneq_s32",
+    "vmulq_laneq_s32",
+    "vmul_laneq_u16",
+    "vmulq_laneq_u16",
+    "vmul_laneq_u32",
+    "vmulq_laneq_u32",
+    "vmul_n_f16",
+    "vmulq_n_f16",
+    "vmul_n_f32",
+    "vmulq_n_f32",
+    "vmul_n_s16",
+    "vmulq_n_s16",
+    "vmul_n_s32",
+    "vmulq_n_s32",
+    "vmul_n_u16",
+    "vmulq_n_u16",
+    "vmul_n_u32",
+    "vmulq_n_u32",
+    "vmul_p8",
+    "vmulq_p8",
+    "vmull_lane_s16",
+    "vmull_laneq_s16",
+    "vmull_lane_s32",
+    "vmull_laneq_s32",
+    "vmull_lane_u16",
+    "vmull_laneq_u16",
+    "vmull_lane_u32",
+    "vmull_laneq_u32",
+    "vmull_n_s16",
+    "vmull_n_s32",
+    "vmull_n_u16",
+    "vmull_n_u32",
+    "vmull_p8",
+    "vmull_s16",
+    "vmull_s32",
+    "vmull_s8",
+    "vmull_u8",
+    "vmull_u16",
+    "vmull_u32",
+    "vneg_f16",
+    "vnegq_f16",
+    "vneg_f32",
+    "vnegq_f32",
+    "vneg_s8",
+    "vnegq_s8",
+    "vneg_s16",
+    "vnegq_s16",
+    "vneg_s32",
+    "vnegq_s32",
+    "vpadal_s8",
+    "vpadalq_s8",
+    "vpadal_s16",
+    "vpadalq_s16",
+    "vpadal_s32",
+    "vpadalq_s32",
+    "vpadal_u8",
+    "vpadalq_u8",
+    "vpadal_u16",
+    "vpadalq_u16",
+    "vpadal_u32",
+    "vpadalq_u32",
+    "vpadd_f16",
+    "vpadd_f32",
+    "vpadd_s8",
+    "vpadd_s16",
+    "vpadd_s32",
+    "vpadd_u8",
+    "vpadd_u8",
+    "vpadd_u16",
+    "vpadd_u16",
+    "vpadd_u32",
+    "vpadd_u32",
+    "vpaddl_s8",
+    "vpaddlq_s8",
+    "vpaddl_s16",
+    "vpaddlq_s16",
+    "vpaddl_s32",
+    "vpaddlq_s32",
+    "vpaddl_u8",
+    "vpaddlq_u8",
+    "vpaddl_u16",
+    "vpaddlq_u16",
+    "vpaddl_u32",
+    "vpaddlq_u32",
+    "vpmax_f32",
+    "vpmax_s8",
+    "vpmax_s16",
+    "vpmax_s32",
+    "vpmax_u8",
+    "vpmax_u16",
+    "vpmax_u32",
+    "vpmin_f32",
+    "vpmin_s8",
+    "vpmin_s16",
+    "vpmin_s32",
+    "vpmin_u8",
+    "vpmin_u16",
+    "vpmin_u32",
+    "vqabs_s8",
+    "vqabsq_s8",
+    "vqabs_s16",
+    "vqabsq_s16",
+    "vqabs_s32",
+    "vqabsq_s32",
+    "vqadd_s64",
+    "vqaddq_s64",
+    "vqadd_u64",
+    "vqaddq_u64",
+    "vqdmlal_lane_s16",
+    "vqdmlal_lane_s32",
+    "vqdmlal_n_s16",
+    "vqdmlal_n_s32",
+    "vqdmlal_s16",
+    "vqdmlal_s32",
+    "vqdmlsl_lane_s16",
+    "vqdmlsl_lane_s32",
+    "vqdmlsl_n_s16",
+    "vqdmlsl_n_s32",
+    "vqdmlsl_s16",
+    "vqdmlsl_s32",
+    "vqdmulh_laneq_s16",
+    "vqdmulhq_laneq_s16",
+    "vqdmulh_laneq_s32",
+    "vqdmulhq_laneq_s32",
+    "vqdmulh_n_s16",
+    "vqdmulhq_n_s16",
+    "vqdmulh_n_s32",
+    "vqdmulhq_n_s32",
+    "vqdmulh_s16",
+    "vqdmulhq_s16",
+    "vqdmulh_s32",
+    "vqdmulhq_s32",
+    "vqdmull_lane_s16",
+    "vqdmull_lane_s32",
+    "vqdmull_n_s16",
+    "vqdmull_n_s32",
+    "vqdmull_s16",
+    "vqdmull_s32",
+    "vqmovn_s16",
+    "vqmovn_s32",
+    "vqmovn_s64",
+    "vqmovn_u16",
+    "vqmovn_u32",
+    "vqmovn_u64",
+    "vqmovun_s16",
+    "vqmovun_s32",
+    "vqmovun_s64",
+    "vqneg_s8",
+    "vqnegq_s8",
+    "vqneg_s16",
+    "vqnegq_s16",
+    "vqneg_s32",
+    "vqnegq_s32",
+    "vqrdmulh_lane_s16",
+    "vqrdmulh_lane_s32",
+    "vqrdmulh_laneq_s16",
+    "vqrdmulh_laneq_s32",
+    "vqrdmulhq_lane_s16",
+    "vqrdmulhq_lane_s32",
+    "vqrdmulhq_laneq_s16",
+    "vqrdmulhq_laneq_s32",
+    "vqrdmulh_n_s16",
+    "vqrdmulhq_n_s16",
+    "vqrdmulh_n_s32",
+    "vqrdmulhq_n_s32",
+    "vqrdmulh_s16",
+    "vqrdmulhq_s16",
+    "vqrdmulh_s32",
+    "vqrdmulhq_s32",
+    "vqrshl_s8",
+    "vqrshlq_s8",
+    "vqrshl_s16",
+    "vqrshlq_s16",
+    "vqrshl_s32",
+    "vqrshlq_s32",
+    "vqrshl_s64",
+    "vqrshlq_s64",
+    "vqrshl_u8",
+    "vqrshlq_u8",
+    "vqrshl_u16",
+    "vqrshlq_u16",
+    "vqrshl_u32",
+    "vqrshlq_u32",
+    "vqrshl_u64",
+    "vqrshlq_u64",
+    "vqrshrn_n_s16",
+    "vqrshrn_n_s32",
+    "vqrshrn_n_s64",
+    "vqrshrn_n_s16",
+    "vqrshrn_n_s32",
+    "vqrshrn_n_s64",
+    "vqrshrn_n_u16",
+    "vqrshrn_n_u32",
+    "vqrshrn_n_u64",
+    "vqrshrn_n_u16",
+    "vqrshrn_n_u32",
+    "vqrshrn_n_u64",
+    "vqrshrun_n_s16",
+    "vqrshrun_n_s32",
+    "vqrshrun_n_s64",
+    "vqrshrun_n_s16",
+    "vqrshrun_n_s32",
+    "vqrshrun_n_s64",
+    "vqshl_n_s8",
+    "vqshlq_n_s8",
+    "vqshl_n_s16",
+    "vqshlq_n_s16",
+    "vqshl_n_s32",
+    "vqshlq_n_s32",
+    "vqshl_n_s64",
+    "vqshlq_n_s64",
+    "vqshl_n_u8",
+    "vqshlq_n_u8",
+    "vqshl_n_u16",
+    "vqshlq_n_u16",
+    "vqshl_n_u32",
+    "vqshlq_n_u32",
+    "vqshl_n_u64",
+    "vqshlq_n_u64",
+    "vqshl_s8",
+    "vqshlq_s8",
+    "vqshl_s16",
+    "vqshlq_s16",
+    "vqshl_s32",
+    "vqshlq_s32",
+    "vqshl_s64",
+    "vqshlq_s64",
+    "vqshl_u8",
+    "vqshlq_u8",
+    "vqshl_u16",
+    "vqshlq_u16",
+    "vqshl_u32",
+    "vqshlq_u32",
+    "vqshl_u64",
+    "vqshlq_u64",
+    "vqshlu_n_s8",
+    "vqshluq_n_s8",
+    "vqshlu_n_s16",
+    "vqshluq_n_s16",
+    "vqshlu_n_s32",
+    "vqshluq_n_s32",
+    "vqshlu_n_s64",
+    "vqshluq_n_s64",
+    "vqshlu_n_s8",
+    "vqshluq_n_s8",
+    "vqshlu_n_s16",
+    "vqshluq_n_s16",
+    "vqshlu_n_s32",
+    "vqshluq_n_s32",
+    "vqshlu_n_s64",
+    "vqshluq_n_s64",
+    "vqshrn_n_s16",
+    "vqshrn_n_s32",
+    "vqshrn_n_s64",
+    "vqshrn_n_s16",
+    "vqshrn_n_s32",
+    "vqshrn_n_s64",
+    "vqshrn_n_u16",
+    "vqshrn_n_u32",
+    "vqshrn_n_u64",
+    "vqshrn_n_u16",
+    "vqshrn_n_u32",
+    "vqshrn_n_u64",
+    "vqshrun_n_s16",
+    "vqshrun_n_s32",
+    "vqshrun_n_s64",
+    "vqshrun_n_s16",
+    "vqshrun_n_s32",
+    "vqshrun_n_s64",
+    "vqsub_s64",
+    "vqsubq_s64",
+    "vqsub_u64",
+    "vqsubq_u64",
+    "vraddhn_high_s16",
+    "vraddhn_high_s32",
+    "vraddhn_high_s64",
+    "vraddhn_high_u16",
+    "vraddhn_high_u32",
+    "vraddhn_high_u64",
+    "vraddhn_s16",
+    "vraddhn_s32",
+    "vraddhn_s64",
+    "vraddhn_u16",
+    "vraddhn_u16",
+    "vraddhn_u32",
+    "vraddhn_u32",
+    "vraddhn_u64",
+    "vraddhn_u64",
+    "vrecpe_f16",
+    "vrecpeq_f16",
+    "vrecpe_f32",
+    "vrecpeq_f32",
+    "vrecpe_u32",
+    "vrecpeq_u32",
+    "vrecps_f16",
+    "vrecpsq_f16",
+    "vrecps_f32",
+    "vrecpsq_f32",
+    "vreinterpret_f32_f16",
+    "vreinterpret_f32_f16",
+    "vreinterpret_s8_f16",
+    "vreinterpret_s8_f16",
+    "vreinterpret_s16_f16",
+    "vreinterpret_s16_f16",
+    "vreinterpret_s32_f16",
+    "vreinterpret_s32_f16",
+    "vreinterpret_s64_f16",
+    "vreinterpret_s64_f16",
+    "vreinterpret_u8_f16",
+    "vreinterpret_u8_f16",
+    "vreinterpret_u16_f16",
+    "vreinterpret_u16_f16",
+    "vreinterpret_u32_f16",
+    "vreinterpret_u32_f16",
+    "vreinterpret_u64_f16",
+    "vreinterpret_u64_f16",
+    "vreinterpret_p8_f16",
+    "vreinterpret_p8_f16",
+    "vreinterpret_p16_f16",
+    "vreinterpret_p16_f16",
+    "vreinterpretq_f32_f16",
+    "vreinterpretq_f32_f16",
+    "vreinterpretq_s8_f16",
+    "vreinterpretq_s8_f16",
+    "vreinterpretq_s16_f16",
+    "vreinterpretq_s16_f16",
+    "vreinterpretq_s32_f16",
+    "vreinterpretq_s32_f16",
+    "vreinterpretq_s64_f16",
+    "vreinterpretq_s64_f16",
+    "vreinterpretq_u8_f16",
+    "vreinterpretq_u8_f16",
+    "vreinterpretq_u16_f16",
+    "vreinterpretq_u16_f16",
+    "vreinterpretq_u32_f16",
+    "vreinterpretq_u32_f16",
+    "vreinterpretq_u64_f16",
+    "vreinterpretq_u64_f16",
+    "vreinterpretq_p8_f16",
+    "vreinterpretq_p8_f16",
+    "vreinterpretq_p16_f16",
+    "vreinterpretq_p16_f16",
+    "vreinterpret_f16_f32",
+    "vreinterpret_f16_f32",
+    "vreinterpretq_f16_f32",
+    "vreinterpretq_f16_f32",
+    "vreinterpret_f16_s8",
+    "vreinterpret_f16_s8",
+    "vreinterpretq_f16_s8",
+    "vreinterpretq_f16_s8",
+    "vreinterpret_f16_s16",
+    "vreinterpret_f16_s16",
+    "vreinterpretq_f16_s16",
+    "vreinterpretq_f16_s16",
+    "vreinterpret_f16_s32",
+    "vreinterpret_f16_s32",
+    "vreinterpretq_f16_s32",
+    "vreinterpretq_f16_s32",
+    "vreinterpret_f16_s64",
+    "vreinterpret_f16_s64",
+    "vreinterpretq_f16_s64",
+    "vreinterpretq_f16_s64",
+    "vreinterpret_f16_u8",
+    "vreinterpret_f16_u8",
+    "vreinterpretq_f16_u8",
+    "vreinterpretq_f16_u8",
+    "vreinterpret_f16_u16",
+    "vreinterpret_f16_u16",
+    "vreinterpretq_f16_u16",
+    "vreinterpretq_f16_u16",
+    "vreinterpret_f16_u32",
+    "vreinterpret_f16_u32",
+    "vreinterpretq_f16_u32",
+    "vreinterpretq_f16_u32",
+    "vreinterpret_f16_u64",
+    "vreinterpret_f16_u64",
+    "vreinterpretq_f16_u64",
+    "vreinterpretq_f16_u64",
+    "vreinterpret_f16_p8",
+    "vreinterpret_f16_p8",
+    "vreinterpretq_f16_p8",
+    "vreinterpretq_f16_p8",
+    "vreinterpret_f16_p16",
+    "vreinterpret_f16_p16",
+    "vreinterpretq_f16_p16",
+    "vreinterpretq_f16_p16",
+    "vreinterpretq_f16_p128",
+    "vreinterpretq_f16_p128",
+    "vreinterpret_p64_f16",
+    "vreinterpret_p64_f16",
+    "vreinterpretq_p128_f16",
+    "vreinterpretq_p128_f16",
+    "vreinterpretq_p64_f16",
+    "vreinterpretq_p64_f16",
+    "vreinterpret_f16_p64",
+    "vreinterpret_f16_p64",
+    "vreinterpretq_f16_p64",
+    "vreinterpretq_f16_p64",
+    "vreinterpretq_f32_p128",
+    "vreinterpretq_f32_p128",
+    "vreinterpret_s8_f32",
+    "vreinterpret_s8_f32",
+    "vreinterpret_s16_f32",
+    "vreinterpret_s16_f32",
+    "vreinterpret_s32_f32",
+    "vreinterpret_s32_f32",
+    "vreinterpret_s64_f32",
+    "vreinterpret_s64_f32",
+    "vreinterpret_u8_f32",
+    "vreinterpret_u8_f32",
+    "vreinterpret_u16_f32",
+    "vreinterpret_u16_f32",
+    "vreinterpret_u32_f32",
+    "vreinterpret_u32_f32",
+    "vreinterpret_u64_f32",
+    "vreinterpret_u64_f32",
+    "vreinterpret_p8_f32",
+    "vreinterpret_p8_f32",
+    "vreinterpret_p16_f32",
+    "vreinterpret_p16_f32",
+    "vreinterpretq_p128_f32",
+    "vreinterpretq_p128_f32",
+    "vreinterpretq_s8_f32",
+    "vreinterpretq_s8_f32",
+    "vreinterpretq_s16_f32",
+    "vreinterpretq_s16_f32",
+    "vreinterpretq_s32_f32",
+    "vreinterpretq_s32_f32",
+    "vreinterpretq_s64_f32",
+    "vreinterpretq_s64_f32",
+    "vreinterpretq_u8_f32",
+    "vreinterpretq_u8_f32",
+    "vreinterpretq_u16_f32",
+    "vreinterpretq_u16_f32",
+    "vreinterpretq_u32_f32",
+    "vreinterpretq_u32_f32",
+    "vreinterpretq_u64_f32",
+    "vreinterpretq_u64_f32",
+    "vreinterpretq_p8_f32",
+    "vreinterpretq_p8_f32",
+    "vreinterpretq_p16_f32",
+    "vreinterpretq_p16_f32",
+    "vreinterpret_f32_s8",
+    "vreinterpret_f32_s8",
+    "vreinterpret_s16_s8",
+    "vreinterpret_s16_s8",
+    "vreinterpret_s32_s8",
+    "vreinterpret_s32_s8",
+    "vreinterpret_s64_s8",
+    "vreinterpret_s64_s8",
+    "vreinterpret_u8_s8",
+    "vreinterpret_u8_s8",
+    "vreinterpret_u16_s8",
+    "vreinterpret_u16_s8",
+    "vreinterpret_u32_s8",
+    "vreinterpret_u32_s8",
+    "vreinterpret_u64_s8",
+    "vreinterpret_u64_s8",
+    "vreinterpret_p8_s8",
+    "vreinterpret_p8_s8",
+    "vreinterpret_p16_s8",
+    "vreinterpret_p16_s8",
+    "vreinterpretq_f32_s8",
+    "vreinterpretq_f32_s8",
+    "vreinterpretq_s16_s8",
+    "vreinterpretq_s16_s8",
+    "vreinterpretq_s32_s8",
+    "vreinterpretq_s32_s8",
+    "vreinterpretq_s64_s8",
+    "vreinterpretq_s64_s8",
+    "vreinterpretq_u8_s8",
+    "vreinterpretq_u8_s8",
+    "vreinterpretq_u16_s8",
+    "vreinterpretq_u16_s8",
+    "vreinterpretq_u32_s8",
+    "vreinterpretq_u32_s8",
+    "vreinterpretq_u64_s8",
+    "vreinterpretq_u64_s8",
+    "vreinterpretq_p8_s8",
+    "vreinterpretq_p8_s8",
+    "vreinterpretq_p16_s8",
+    "vreinterpretq_p16_s8",
+    "vreinterpret_f32_s16",
+    "vreinterpret_f32_s16",
+    "vreinterpret_s8_s16",
+    "vreinterpret_s8_s16",
+    "vreinterpret_s32_s16",
+    "vreinterpret_s32_s16",
+    "vreinterpret_s64_s16",
+    "vreinterpret_s64_s16",
+    "vreinterpret_u8_s16",
+    "vreinterpret_u8_s16",
+    "vreinterpret_u16_s16",
+    "vreinterpret_u16_s16",
+    "vreinterpret_u32_s16",
+    "vreinterpret_u32_s16",
+    "vreinterpret_u64_s16",
+    "vreinterpret_u64_s16",
+    "vreinterpret_p8_s16",
+    "vreinterpret_p8_s16",
+    "vreinterpret_p16_s16",
+    "vreinterpret_p16_s16",
+    "vreinterpretq_f32_s16",
+    "vreinterpretq_f32_s16",
+    "vreinterpretq_s8_s16",
+    "vreinterpretq_s8_s16",
+    "vreinterpretq_s32_s16",
+    "vreinterpretq_s32_s16",
+    "vreinterpretq_s64_s16",
+    "vreinterpretq_s64_s16",
+    "vreinterpretq_u8_s16",
+    "vreinterpretq_u8_s16",
+    "vreinterpretq_u16_s16",
+    "vreinterpretq_u16_s16",
+    "vreinterpretq_u32_s16",
+    "vreinterpretq_u32_s16",
+    "vreinterpretq_u64_s16",
+    "vreinterpretq_u64_s16",
+    "vreinterpretq_p8_s16",
+    "vreinterpretq_p8_s16",
+    "vreinterpretq_p16_s16",
+    "vreinterpretq_p16_s16",
+    "vreinterpret_f32_s32",
+    "vreinterpret_f32_s32",
+    "vreinterpret_s8_s32",
+    "vreinterpret_s8_s32",
+    "vreinterpret_s16_s32",
+    "vreinterpret_s16_s32",
+    "vreinterpret_s64_s32",
+    "vreinterpret_s64_s32",
+    "vreinterpret_u8_s32",
+    "vreinterpret_u8_s32",
+    "vreinterpret_u16_s32",
+    "vreinterpret_u16_s32",
+    "vreinterpret_u32_s32",
+    "vreinterpret_u32_s32",
+    "vreinterpret_u64_s32",
+    "vreinterpret_u64_s32",
+    "vreinterpret_p8_s32",
+    "vreinterpret_p8_s32",
+    "vreinterpret_p16_s32",
+    "vreinterpret_p16_s32",
+    "vreinterpretq_f32_s32",
+    "vreinterpretq_f32_s32",
+    "vreinterpretq_s8_s32",
+    "vreinterpretq_s8_s32",
+    "vreinterpretq_s16_s32",
+    "vreinterpretq_s16_s32",
+    "vreinterpretq_s64_s32",
+    "vreinterpretq_s64_s32",
+    "vreinterpretq_u8_s32",
+    "vreinterpretq_u8_s32",
+    "vreinterpretq_u16_s32",
+    "vreinterpretq_u16_s32",
+    "vreinterpretq_u32_s32",
+    "vreinterpretq_u32_s32",
+    "vreinterpretq_u64_s32",
+    "vreinterpretq_u64_s32",
+    "vreinterpretq_p8_s32",
+    "vreinterpretq_p8_s32",
+    "vreinterpretq_p16_s32",
+    "vreinterpretq_p16_s32",
+    "vreinterpret_f32_s64",
+    "vreinterpret_f32_s64",
+    "vreinterpret_s8_s64",
+    "vreinterpret_s8_s64",
+    "vreinterpret_s16_s64",
+    "vreinterpret_s16_s64",
+    "vreinterpret_s32_s64",
+    "vreinterpret_s32_s64",
+    "vreinterpret_u8_s64",
+    "vreinterpret_u8_s64",
+    "vreinterpret_u16_s64",
+    "vreinterpret_u16_s64",
+    "vreinterpret_u32_s64",
+    "vreinterpret_u32_s64",
+    "vreinterpret_u64_s64",
+    "vreinterpret_p8_s64",
+    "vreinterpret_p8_s64",
+    "vreinterpret_p16_s64",
+    "vreinterpret_p16_s64",
+    "vreinterpretq_f32_s64",
+    "vreinterpretq_f32_s64",
+    "vreinterpretq_s8_s64",
+    "vreinterpretq_s8_s64",
+    "vreinterpretq_s16_s64",
+    "vreinterpretq_s16_s64",
+    "vreinterpretq_s32_s64",
+    "vreinterpretq_s32_s64",
+    "vreinterpretq_u8_s64",
+    "vreinterpretq_u8_s64",
+    "vreinterpretq_u16_s64",
+    "vreinterpretq_u16_s64",
+    "vreinterpretq_u32_s64",
+    "vreinterpretq_u32_s64",
+    "vreinterpretq_u64_s64",
+    "vreinterpretq_u64_s64",
+    "vreinterpretq_p8_s64",
+    "vreinterpretq_p8_s64",
+    "vreinterpretq_p16_s64",
+    "vreinterpretq_p16_s64",
+    "vreinterpret_f32_u8",
+    "vreinterpret_f32_u8",
+    "vreinterpret_s8_u8",
+    "vreinterpret_s8_u8",
+    "vreinterpret_s16_u8",
+    "vreinterpret_s16_u8",
+    "vreinterpret_s32_u8",
+    "vreinterpret_s32_u8",
+    "vreinterpret_s64_u8",
+    "vreinterpret_s64_u8",
+    "vreinterpret_u16_u8",
+    "vreinterpret_u16_u8",
+    "vreinterpret_u32_u8",
+    "vreinterpret_u32_u8",
+    "vreinterpret_u64_u8",
+    "vreinterpret_u64_u8",
+    "vreinterpret_p8_u8",
+    "vreinterpret_p8_u8",
+    "vreinterpret_p16_u8",
+    "vreinterpret_p16_u8",
+    "vreinterpretq_f32_u8",
+    "vreinterpretq_f32_u8",
+    "vreinterpretq_s8_u8",
+    "vreinterpretq_s8_u8",
+    "vreinterpretq_s16_u8",
+    "vreinterpretq_s16_u8",
+    "vreinterpretq_s32_u8",
+    "vreinterpretq_s32_u8",
+    "vreinterpretq_s64_u8",
+    "vreinterpretq_s64_u8",
+    "vreinterpretq_u16_u8",
+    "vreinterpretq_u16_u8",
+    "vreinterpretq_u32_u8",
+    "vreinterpretq_u32_u8",
+    "vreinterpretq_u64_u8",
+    "vreinterpretq_u64_u8",
+    "vreinterpretq_p8_u8",
+    "vreinterpretq_p8_u8",
+    "vreinterpretq_p16_u8",
+    "vreinterpretq_p16_u8",
+    "vreinterpret_f32_u16",
+    "vreinterpret_f32_u16",
+    "vreinterpret_s8_u16",
+    "vreinterpret_s8_u16",
+    "vreinterpret_s16_u16",
+    "vreinterpret_s16_u16",
+    "vreinterpret_s32_u16",
+    "vreinterpret_s32_u16",
+    "vreinterpret_s64_u16",
+    "vreinterpret_s64_u16",
+    "vreinterpret_u8_u16",
+    "vreinterpret_u8_u16",
+    "vreinterpret_u32_u16",
+    "vreinterpret_u32_u16",
+    "vreinterpret_u64_u16",
+    "vreinterpret_u64_u16",
+    "vreinterpret_p8_u16",
+    "vreinterpret_p8_u16",
+    "vreinterpret_p16_u16",
+    "vreinterpret_p16_u16",
+    "vreinterpretq_f32_u16",
+    "vreinterpretq_f32_u16",
+    "vreinterpretq_s8_u16",
+    "vreinterpretq_s8_u16",
+    "vreinterpretq_s16_u16",
+    "vreinterpretq_s16_u16",
+    "vreinterpretq_s32_u16",
+    "vreinterpretq_s32_u16",
+    "vreinterpretq_s64_u16",
+    "vreinterpretq_s64_u16",
+    "vreinterpretq_u8_u16",
+    "vreinterpretq_u8_u16",
+    "vreinterpretq_u32_u16",
+    "vreinterpretq_u32_u16",
+    "vreinterpretq_u64_u16",
+    "vreinterpretq_u64_u16",
+    "vreinterpretq_p8_u16",
+    "vreinterpretq_p8_u16",
+    "vreinterpretq_p16_u16",
+    "vreinterpretq_p16_u16",
+    "vreinterpret_f32_u32",
+    "vreinterpret_f32_u32",
+    "vreinterpret_s8_u32",
+    "vreinterpret_s8_u32",
+    "vreinterpret_s16_u32",
+    "vreinterpret_s16_u32",
+    "vreinterpret_s32_u32",
+    "vreinterpret_s32_u32",
+    "vreinterpret_s64_u32",
+    "vreinterpret_s64_u32",
+    "vreinterpret_u8_u32",
+    "vreinterpret_u8_u32",
+    "vreinterpret_u16_u32",
+    "vreinterpret_u16_u32",
+    "vreinterpret_u64_u32",
+    "vreinterpret_u64_u32",
+    "vreinterpret_p8_u32",
+    "vreinterpret_p8_u32",
+    "vreinterpret_p16_u32",
+    "vreinterpret_p16_u32",
+    "vreinterpretq_f32_u32",
+    "vreinterpretq_f32_u32",
+    "vreinterpretq_s8_u32",
+    "vreinterpretq_s8_u32",
+    "vreinterpretq_s16_u32",
+    "vreinterpretq_s16_u32",
+    "vreinterpretq_s32_u32",
+    "vreinterpretq_s32_u32",
+    "vreinterpretq_s64_u32",
+    "vreinterpretq_s64_u32",
+    "vreinterpretq_u8_u32",
+    "vreinterpretq_u8_u32",
+    "vreinterpretq_u16_u32",
+    "vreinterpretq_u16_u32",
+    "vreinterpretq_u64_u32",
+    "vreinterpretq_u64_u32",
+    "vreinterpretq_p8_u32",
+    "vreinterpretq_p8_u32",
+    "vreinterpretq_p16_u32",
+    "vreinterpretq_p16_u32",
+    "vreinterpret_f32_u64",
+    "vreinterpret_f32_u64",
+    "vreinterpret_s8_u64",
+    "vreinterpret_s8_u64",
+    "vreinterpret_s16_u64",
+    "vreinterpret_s16_u64",
+    "vreinterpret_s32_u64",
+    "vreinterpret_s32_u64",
+    "vreinterpret_s64_u64",
+    "vreinterpret_u8_u64",
+    "vreinterpret_u8_u64",
+    "vreinterpret_u16_u64",
+    "vreinterpret_u16_u64",
+    "vreinterpret_u32_u64",
+    "vreinterpret_u32_u64",
+    "vreinterpret_p8_u64",
+    "vreinterpret_p8_u64",
+    "vreinterpret_p16_u64",
+    "vreinterpret_p16_u64",
+    "vreinterpretq_f32_u64",
+    "vreinterpretq_f32_u64",
+    "vreinterpretq_s8_u64",
+    "vreinterpretq_s8_u64",
+    "vreinterpretq_s16_u64",
+    "vreinterpretq_s16_u64",
+    "vreinterpretq_s32_u64",
+    "vreinterpretq_s32_u64",
+    "vreinterpretq_s64_u64",
+    "vreinterpretq_s64_u64",
+    "vreinterpretq_u8_u64",
+    "vreinterpretq_u8_u64",
+    "vreinterpretq_u16_u64",
+    "vreinterpretq_u16_u64",
+    "vreinterpretq_u32_u64",
+    "vreinterpretq_u32_u64",
+    "vreinterpretq_p8_u64",
+    "vreinterpretq_p8_u64",
+    "vreinterpretq_p16_u64",
+    "vreinterpretq_p16_u64",
+    "vreinterpret_f32_p8",
+    "vreinterpret_f32_p8",
+    "vreinterpret_s8_p8",
+    "vreinterpret_s8_p8",
+    "vreinterpret_s16_p8",
+    "vreinterpret_s16_p8",
+    "vreinterpret_s32_p8",
+    "vreinterpret_s32_p8",
+    "vreinterpret_s64_p8",
+    "vreinterpret_s64_p8",
+    "vreinterpret_u8_p8",
+    "vreinterpret_u8_p8",
+    "vreinterpret_u16_p8",
+    "vreinterpret_u16_p8",
+    "vreinterpret_u32_p8",
+    "vreinterpret_u32_p8",
+    "vreinterpret_u64_p8",
+    "vreinterpret_u64_p8",
+    "vreinterpret_p16_p8",
+    "vreinterpret_p16_p8",
+    "vreinterpretq_f32_p8",
+    "vreinterpretq_f32_p8",
+    "vreinterpretq_s8_p8",
+    "vreinterpretq_s8_p8",
+    "vreinterpretq_s16_p8",
+    "vreinterpretq_s16_p8",
+    "vreinterpretq_s32_p8",
+    "vreinterpretq_s32_p8",
+    "vreinterpretq_s64_p8",
+    "vreinterpretq_s64_p8",
+    "vreinterpretq_u8_p8",
+    "vreinterpretq_u8_p8",
+    "vreinterpretq_u16_p8",
+    "vreinterpretq_u16_p8",
+    "vreinterpretq_u32_p8",
+    "vreinterpretq_u32_p8",
+    "vreinterpretq_u64_p8",
+    "vreinterpretq_u64_p8",
+    "vreinterpretq_p16_p8",
+    "vreinterpretq_p16_p8",
+    "vreinterpret_f32_p16",
+    "vreinterpret_f32_p16",
+    "vreinterpret_s8_p16",
+    "vreinterpret_s8_p16",
+    "vreinterpret_s16_p16",
+    "vreinterpret_s16_p16",
+    "vreinterpret_s32_p16",
+    "vreinterpret_s32_p16",
+    "vreinterpret_s64_p16",
+    "vreinterpret_s64_p16",
+    "vreinterpret_u8_p16",
+    "vreinterpret_u8_p16",
+    "vreinterpret_u16_p16",
+    "vreinterpret_u16_p16",
+    "vreinterpret_u32_p16",
+    "vreinterpret_u32_p16",
+    "vreinterpret_u64_p16",
+    "vreinterpret_u64_p16",
+    "vreinterpret_p8_p16",
+    "vreinterpret_p8_p16",
+    "vreinterpretq_f32_p16",
+    "vreinterpretq_f32_p16",
+    "vreinterpretq_s8_p16",
+    "vreinterpretq_s8_p16",
+    "vreinterpretq_s16_p16",
+    "vreinterpretq_s16_p16",
+    "vreinterpretq_s32_p16",
+    "vreinterpretq_s32_p16",
+    "vreinterpretq_s64_p16",
+    "vreinterpretq_s64_p16",
+    "vreinterpretq_u8_p16",
+    "vreinterpretq_u8_p16",
+    "vreinterpretq_u16_p16",
+    "vreinterpretq_u16_p16",
+    "vreinterpretq_u32_p16",
+    "vreinterpretq_u32_p16",
+    "vreinterpretq_u64_p16",
+    "vreinterpretq_u64_p16",
+    "vreinterpretq_p8_p16",
+    "vreinterpretq_p8_p16",
+    "vreinterpretq_s8_p128",
+    "vreinterpretq_s8_p128",
+    "vreinterpretq_s16_p128",
+    "vreinterpretq_s16_p128",
+    "vreinterpretq_s32_p128",
+    "vreinterpretq_s32_p128",
+    "vreinterpretq_s64_p128",
+    "vreinterpretq_s64_p128",
+    "vreinterpretq_u8_p128",
+    "vreinterpretq_u8_p128",
+    "vreinterpretq_u16_p128",
+    "vreinterpretq_u16_p128",
+    "vreinterpretq_u32_p128",
+    "vreinterpretq_u32_p128",
+    "vreinterpretq_u64_p128",
+    "vreinterpretq_u64_p128",
+    "vreinterpretq_p8_p128",
+    "vreinterpretq_p8_p128",
+    "vreinterpretq_p16_p128",
+    "vreinterpretq_p16_p128",
+    "vreinterpretq_p64_p128",
+    "vreinterpretq_p64_p128",
+    "vreinterpret_p64_s8",
+    "vreinterpret_p64_s8",
+    "vreinterpretq_p128_s8",
+    "vreinterpretq_p128_s8",
+    "vreinterpretq_p64_s8",
+    "vreinterpretq_p64_s8",
+    "vreinterpret_p64_s16",
+    "vreinterpret_p64_s16",
+    "vreinterpretq_p128_s16",
+    "vreinterpretq_p128_s16",
+    "vreinterpretq_p64_s16",
+    "vreinterpretq_p64_s16",
+    "vreinterpret_p64_s32",
+    "vreinterpret_p64_s32",
+    "vreinterpretq_p128_s32",
+    "vreinterpretq_p128_s32",
+    "vreinterpretq_p64_s32",
+    "vreinterpretq_p64_s32",
+    "vreinterpretq_p128_s64",
+    "vreinterpretq_p128_s64",
+    "vreinterpret_p64_u8",
+    "vreinterpret_p64_u8",
+    "vreinterpretq_p128_u8",
+    "vreinterpretq_p128_u8",
+    "vreinterpretq_p64_u8",
+    "vreinterpretq_p64_u8",
+    "vreinterpret_p64_u16",
+    "vreinterpret_p64_u16",
+    "vreinterpretq_p128_u16",
+    "vreinterpretq_p128_u16",
+    "vreinterpretq_p64_u16",
+    "vreinterpretq_p64_u16",
+    "vreinterpret_p64_u32",
+    "vreinterpret_p64_u32",
+    "vreinterpretq_p128_u32",
+    "vreinterpretq_p128_u32",
+    "vreinterpretq_p64_u32",
+    "vreinterpretq_p64_u32",
+    "vreinterpretq_p128_u64",
+    "vreinterpretq_p128_u64",
+    "vreinterpret_p64_p8",
+    "vreinterpret_p64_p8",
+    "vreinterpretq_p128_p8",
+    "vreinterpretq_p128_p8",
+    "vreinterpretq_p64_p8",
+    "vreinterpretq_p64_p8",
+    "vreinterpret_p64_p16",
+    "vreinterpret_p64_p16",
+    "vreinterpretq_p128_p16",
+    "vreinterpretq_p128_p16",
+    "vreinterpretq_p64_p16",
+    "vreinterpretq_p64_p16",
+    "vreinterpret_s8_p64",
+    "vreinterpret_s8_p64",
+    "vreinterpret_s16_p64",
+    "vreinterpret_s16_p64",
+    "vreinterpret_s32_p64",
+    "vreinterpret_s32_p64",
+    "vreinterpret_u8_p64",
+    "vreinterpret_u8_p64",
+    "vreinterpret_u16_p64",
+    "vreinterpret_u16_p64",
+    "vreinterpret_u32_p64",
+    "vreinterpret_u32_p64",
+    "vreinterpret_p8_p64",
+    "vreinterpret_p8_p64",
+    "vreinterpret_p16_p64",
+    "vreinterpret_p16_p64",
+    "vreinterpretq_p128_p64",
+    "vreinterpretq_p128_p64",
+    "vreinterpretq_s8_p64",
+    "vreinterpretq_s8_p64",
+    "vreinterpretq_s16_p64",
+    "vreinterpretq_s16_p64",
+    "vreinterpretq_s32_p64",
+    "vreinterpretq_s32_p64",
+    "vreinterpretq_u8_p64",
+    "vreinterpretq_u8_p64",
+    "vreinterpretq_u16_p64",
+    "vreinterpretq_u16_p64",
+    "vreinterpretq_u32_p64",
+    "vreinterpretq_u32_p64",
+    "vreinterpretq_p8_p64",
+    "vreinterpretq_p8_p64",
+    "vreinterpretq_p16_p64",
+    "vreinterpretq_p16_p64",
+    "vrev64_f16",
+    "vrev64q_f16",
+    "vrndn_f16",
+    "vrndnq_f16",
+    "vrndn_f32",
+    "vrndnq_f32",
+    "vrshl_s8",
+    "vrshlq_s8",
+    "vrshl_s16",
+    "vrshlq_s16",
+    "vrshl_s32",
+    "vrshlq_s32",
+    "vrshl_s64",
+    "vrshlq_s64",
+    "vrshl_u8",
+    "vrshlq_u8",
+    "vrshl_u16",
+    "vrshlq_u16",
+    "vrshl_u32",
+    "vrshlq_u32",
+    "vrshl_u64",
+    "vrshlq_u64",
+    "vrshr_n_s8",
+    "vrshrq_n_s8",
+    "vrshr_n_s16",
+    "vrshrq_n_s16",
+    "vrshr_n_s32",
+    "vrshrq_n_s32",
+    "vrshr_n_s64",
+    "vrshrq_n_s64",
+    "vrshr_n_u8",
+    "vrshrq_n_u8",
+    "vrshr_n_u16",
+    "vrshrq_n_u16",
+    "vrshr_n_u32",
+    "vrshrq_n_u32",
+    "vrshr_n_u64",
+    "vrshrq_n_u64",
+    "vrshrn_n_s16",
+    "vrshrn_n_s32",
+    "vrshrn_n_s64",
+    "vrshrn_n_s16",
+    "vrshrn_n_s32",
+    "vrshrn_n_s64",
+    "vrshrn_n_u16",
+    "vrshrn_n_u32",
+    "vrshrn_n_u64",
+    "vrsqrte_f16",
+    "vrsqrteq_f16",
+    "vrsqrteq_f32",
+    "vrsqrte_u32",
+    "vrsqrteq_u32",
+    "vrsqrts_f16",
+    "vrsqrtsq_f16",
+    "vrsqrts_f32",
+    "vrsqrtsq_f32",
+    "vrsra_n_s8",
+    "vrsraq_n_s8",
+    "vrsra_n_s16",
+    "vrsraq_n_s16",
+    "vrsra_n_s32",
+    "vrsraq_n_s32",
+    "vrsra_n_s64",
+    "vrsraq_n_s64",
+    "vrsra_n_u8",
+    "vrsraq_n_u8",
+    "vrsra_n_u16",
+    "vrsraq_n_u16",
+    "vrsra_n_u32",
+    "vrsraq_n_u32",
+    "vrsra_n_u64",
+    "vrsraq_n_u64",
+    "vrsubhn_s16",
+    "vrsubhn_s32",
+    "vrsubhn_s64",
+    "vrsubhn_u16",
+    "vrsubhn_u16",
+    "vrsubhn_u32",
+    "vrsubhn_u32",
+    "vrsubhn_u64",
+    "vrsubhn_u64",
+    "vset_lane_f16",
+    "vsetq_lane_f16",
+    "vset_lane_f32",
+    "vsetq_lane_f32",
+    "vset_lane_s8",
+    "vsetq_lane_s8",
+    "vset_lane_s16",
+    "vsetq_lane_s16",
+    "vset_lane_s32",
+    "vsetq_lane_s32",
+    "vsetq_lane_s64",
+    "vset_lane_u8",
+    "vsetq_lane_u8",
+    "vset_lane_u16",
+    "vsetq_lane_u16",
+    "vset_lane_u32",
+    "vsetq_lane_u32",
+    "vsetq_lane_u64",
+    "vset_lane_p8",
+    "vsetq_lane_p8",
+    "vset_lane_p16",
+    "vsetq_lane_p16",
+    "vset_lane_p64",
+    "vset_lane_s64",
+    "vset_lane_u64",
+    "vsetq_lane_p64",
+    "vsha1cq_u32",
+    "vsha1h_u32",
+    "vsha1mq_u32",
+    "vsha1pq_u32",
+    "vsha1su0q_u32",
+    "vsha1su1q_u32",
+    "vsha256h2q_u32",
+    "vsha256hq_u32",
+    "vsha256su0q_u32",
+    "vsha256su1q_u32",
+    "vshl_n_s8",
+    "vshlq_n_s8",
+    "vshl_n_s16",
+    "vshlq_n_s16",
+    "vshl_n_s32",
+    "vshlq_n_s32",
+    "vshl_n_s64",
+    "vshlq_n_s64",
+    "vshl_n_u8",
+    "vshlq_n_u8",
+    "vshl_n_u16",
+    "vshlq_n_u16",
+    "vshl_n_u32",
+    "vshlq_n_u32",
+    "vshl_n_u64",
+    "vshlq_n_u64",
+    "vshl_s8",
+    "vshlq_s8",
+    "vshl_s16",
+    "vshlq_s16",
+    "vshl_s32",
+    "vshlq_s32",
+    "vshl_s64",
+    "vshlq_s64",
+    "vshl_u8",
+    "vshlq_u8",
+    "vshl_u16",
+    "vshlq_u16",
+    "vshl_u32",
+    "vshlq_u32",
+    "vshl_u64",
+    "vshlq_u64",
+    "vshll_n_s16",
+    "vshll_n_s32",
+    "vshll_n_s8",
+    "vshll_n_u16",
+    "vshll_n_u32",
+    "vshll_n_u8",
+    "vshr_n_s8",
+    "vshrq_n_s8",
+    "vshr_n_s16",
+    "vshrq_n_s16",
+    "vshr_n_s32",
+    "vshrq_n_s32",
+    "vshr_n_s64",
+    "vshrq_n_s64",
+    "vshr_n_u8",
+    "vshrq_n_u8",
+    "vshr_n_u16",
+    "vshrq_n_u16",
+    "vshr_n_u32",
+    "vshrq_n_u32",
+    "vshr_n_u64",
+    "vshrq_n_u64",
+    "vshrn_n_s16",
+    "vshrn_n_s32",
+    "vshrn_n_s64",
+    "vshrn_n_u16",
+    "vshrn_n_u32",
+    "vshrn_n_u64",
+    "vsra_n_s8",
+    "vsraq_n_s8",
+    "vsra_n_s16",
+    "vsraq_n_s16",
+    "vsra_n_s32",
+    "vsraq_n_s32",
+    "vsra_n_s64",
+    "vsraq_n_s64",
+    "vsra_n_u8",
+    "vsraq_n_u8",
+    "vsra_n_u16",
+    "vsraq_n_u16",
+    "vsra_n_u32",
+    "vsraq_n_u32",
+    "vsra_n_u64",
+    "vsraq_n_u64",
+    "vst1_f16",
+    "vst1q_f16",
+    "vst1_f16_x2",
+    "vst1q_f16_x2",
+    "vst1_f16_x2",
+    "vst1q_f16_x2",
+    "vst1_f16_x3",
+    "vst1q_f16_x3",
+    "vst1_f16_x3",
+    "vst1q_f16_x3",
+    "vst1_f16_x4",
+    "vst1q_f16_x4",
+    "vst1_f16_x4",
+    "vst1q_f16_x4",
+    "vst1_f32_x2",
+    "vst1q_f32_x2",
+    "vst1_f32_x2",
+    "vst1q_f32_x2",
+    "vst1_f32_x3",
+    "vst1q_f32_x3",
+    "vst1_f32_x4",
+    "vst1q_f32_x4",
+    "vst1_f32_x4",
+    "vst1q_f32_x4",
+    "vst1_lane_f16",
+    "vst1q_lane_f16",
+    "vst1_lane_f32",
+    "vst1q_lane_f32",
+    "vst1_lane_s8",
+    "vst1q_lane_s8",
+    "vst1_lane_s16",
+    "vst1q_lane_s16",
+    "vst1_lane_s32",
+    "vst1q_lane_s32",
+    "vst1q_lane_s64",
+    "vst1_lane_u8",
+    "vst1q_lane_u8",
+    "vst1_lane_u16",
+    "vst1q_lane_u16",
+    "vst1_lane_u32",
+    "vst1q_lane_u32",
+    "vst1q_lane_u64",
+    "vst1_lane_p8",
+    "vst1q_lane_p8",
+    "vst1_lane_p16",
+    "vst1q_lane_p16",
+    "vst1_lane_p64",
+    "vst1_lane_s64",
+    "vst1_lane_u64",
+    "vst1_p64_x2",
+    "vst1_p64_x3",
+    "vst1_p64_x4",
+    "vst1q_p64_x2",
+    "vst1q_p64_x3",
+    "vst1q_p64_x4",
+    "vst1_s8_x2",
+    "vst1q_s8_x2",
+    "vst1_s16_x2",
+    "vst1q_s16_x2",
+    "vst1_s32_x2",
+    "vst1q_s32_x2",
+    "vst1_s64_x2",
+    "vst1q_s64_x2",
+    "vst1_s8_x2",
+    "vst1q_s8_x2",
+    "vst1_s16_x2",
+    "vst1q_s16_x2",
+    "vst1_s32_x2",
+    "vst1q_s32_x2",
+    "vst1_s64_x2",
+    "vst1q_s64_x2",
+    "vst1_s8_x3",
+    "vst1q_s8_x3",
+    "vst1_s16_x3",
+    "vst1q_s16_x3",
+    "vst1_s32_x3",
+    "vst1q_s32_x3",
+    "vst1_s64_x3",
+    "vst1q_s64_x3",
+    "vst1_s8_x3",
+    "vst1q_s8_x3",
+    "vst1_s16_x3",
+    "vst1q_s16_x3",
+    "vst1_s32_x3",
+    "vst1q_s32_x3",
+    "vst1_s64_x3",
+    "vst1q_s64_x3",
+    "vst1_s8_x4",
+    "vst1q_s8_x4",
+    "vst1_s16_x4",
+    "vst1q_s16_x4",
+    "vst1_s32_x4",
+    "vst1q_s32_x4",
+    "vst1_s64_x4",
+    "vst1q_s64_x4",
+    "vst1_s8_x4",
+    "vst1q_s8_x4",
+    "vst1_s16_x4",
+    "vst1q_s16_x4",
+    "vst1_s32_x4",
+    "vst1q_s32_x4",
+    "vst1_s64_x4",
+    "vst1q_s64_x4",
+    "vst1_u8_x2",
+    "vst1_u8_x3",
+    "vst1_u8_x4",
+    "vst1q_u8_x2",
+    "vst1q_u8_x3",
+    "vst1q_u8_x4",
+    "vst1_u16_x2",
+    "vst1_u16_x3",
+    "vst1_u16_x4",
+    "vst1q_u16_x2",
+    "vst1q_u16_x3",
+    "vst1q_u16_x4",
+    "vst1_u32_x2",
+    "vst1_u32_x3",
+    "vst1_u32_x4",
+    "vst1q_u32_x2",
+    "vst1q_u32_x3",
+    "vst1q_u32_x4",
+    "vst1_u64_x2",
+    "vst1_u64_x3",
+    "vst1_u64_x4",
+    "vst1q_u64_x2",
+    "vst1q_u64_x3",
+    "vst1q_u64_x4",
+    "vst1_p8_x2",
+    "vst1_p8_x3",
+    "vst1_p8_x4",
+    "vst1q_p8_x2",
+    "vst1q_p8_x3",
+    "vst1q_p8_x4",
+    "vst1_p16_x2",
+    "vst1_p16_x3",
+    "vst1_p16_x4",
+    "vst1q_p16_x2",
+    "vst1q_p16_x3",
+    "vst1q_p16_x4",
+    "vst1q_lane_p64",
+    "vst2_f16",
+    "vst2q_f16",
+    "vst2_f16",
+    "vst2q_f16",
+    "vst2_f32",
+    "vst2q_f32",
+    "vst2_s8",
+    "vst2q_s8",
+    "vst2_s16",
+    "vst2q_s16",
+    "vst2_s32",
+    "vst2q_s32",
+    "vst2_f32",
+    "vst2q_f32",
+    "vst2_s8",
+    "vst2q_s8",
+    "vst2_s16",
+    "vst2q_s16",
+    "vst2_s32",
+    "vst2q_s32",
+    "vst2_lane_f16",
+    "vst2q_lane_f16",
+    "vst2_lane_f16",
+    "vst2q_lane_f16",
+    "vst2_lane_f32",
+    "vst2q_lane_f32",
+    "vst2_lane_s8",
+    "vst2_lane_s16",
+    "vst2q_lane_s16",
+    "vst2_lane_s32",
+    "vst2q_lane_s32",
+    "vst2_lane_f32",
+    "vst2q_lane_f32",
+    "vst2_lane_s8",
+    "vst2_lane_s16",
+    "vst2q_lane_s16",
+    "vst2_lane_s32",
+    "vst2q_lane_s32",
+    "vst2_lane_u8",
+    "vst2_lane_u16",
+    "vst2q_lane_u16",
+    "vst2_lane_u32",
+    "vst2q_lane_u32",
+    "vst2_lane_p8",
+    "vst2_lane_p16",
+    "vst2q_lane_p16",
+    "vst2_p64",
+    "vst2_s64",
+    "vst2_s64",
+    "vst2_u64",
+    "vst2_u8",
+    "vst2q_u8",
+    "vst2_u16",
+    "vst2q_u16",
+    "vst2_u32",
+    "vst2q_u32",
+    "vst2_p8",
+    "vst2q_p8",
+    "vst2_p16",
+    "vst2q_p16",
+    "vst3_f16",
+    "vst3q_f16",
+    "vst3_f16",
+    "vst3q_f16",
+    "vst3_f32",
+    "vst3q_f32",
+    "vst3_s8",
+    "vst3q_s8",
+    "vst3_s16",
+    "vst3q_s16",
+    "vst3_s32",
+    "vst3q_s32",
+    "vst3_f32",
+    "vst3q_f32",
+    "vst3_s8",
+    "vst3q_s8",
+    "vst3_s16",
+    "vst3q_s16",
+    "vst3_s32",
+    "vst3q_s32",
+    "vst3_lane_f16",
+    "vst3q_lane_f16",
+    "vst3_lane_f16",
+    "vst3q_lane_f16",
+    "vst3_lane_f32",
+    "vst3q_lane_f32",
+    "vst3_lane_s8",
+    "vst3_lane_s16",
+    "vst3q_lane_s16",
+    "vst3_lane_s32",
+    "vst3q_lane_s32",
+    "vst3_lane_f32",
+    "vst3q_lane_f32",
+    "vst3_lane_s8",
+    "vst3_lane_s16",
+    "vst3q_lane_s16",
+    "vst3_lane_s32",
+    "vst3q_lane_s32",
+    "vst3_lane_u8",
+    "vst3_lane_u16",
+    "vst3q_lane_u16",
+    "vst3_lane_u32",
+    "vst3q_lane_u32",
+    "vst3_lane_p8",
+    "vst3_lane_p16",
+    "vst3q_lane_p16",
+    "vst3_p64",
+    "vst3_s64",
+    "vst3_s64",
+    "vst3_u64",
+    "vst3_u8",
+    "vst3q_u8",
+    "vst3_u16",
+    "vst3q_u16",
+    "vst3_u32",
+    "vst3q_u32",
+    "vst3_p8",
+    "vst3q_p8",
+    "vst3_p16",
+    "vst3q_p16",
+    "vst4_f16",
+    "vst4q_f16",
+    "vst4_f16",
+    "vst4q_f16",
+    "vst4_f32",
+    "vst4q_f32",
+    "vst4_s8",
+    "vst4q_s8",
+    "vst4_s16",
+    "vst4q_s16",
+    "vst4_s32",
+    "vst4q_s32",
+    "vst4_f32",
+    "vst4q_f32",
+    "vst4_s8",
+    "vst4q_s8",
+    "vst4_s16",
+    "vst4q_s16",
+    "vst4_s32",
+    "vst4q_s32",
+    "vst4_lane_f16",
+    "vst4q_lane_f16",
+    "vst4_lane_f16",
+    "vst4q_lane_f16",
+    "vst4_lane_f32",
+    "vst4q_lane_f32",
+    "vst4_lane_s8",
+    "vst4_lane_s16",
+    "vst4q_lane_s16",
+    "vst4_lane_s32",
+    "vst4q_lane_s32",
+    "vst4_lane_f32",
+    "vst4q_lane_f32",
+    "vst4_lane_s8",
+    "vst4_lane_s16",
+    "vst4q_lane_s16",
+    "vst4_lane_s32",
+    "vst4q_lane_s32",
+    "vst4_lane_u8",
+    "vst4_lane_u16",
+    "vst4q_lane_u16",
+    "vst4_lane_u32",
+    "vst4q_lane_u32",
+    "vst4_lane_p8",
+    "vst4_lane_p16",
+    "vst4q_lane_p16",
+    "vst4_p64",
+    "vst4_s64",
+    "vst4_s64",
+    "vst4_u64",
+    "vst4_u8",
+    "vst4q_u8",
+    "vst4_u16",
+    "vst4q_u16",
+    "vst4_u32",
+    "vst4q_u32",
+    "vst4_p8",
+    "vst4q_p8",
+    "vst4_p16",
+    "vst4q_p16",
+    "vsub_f16",
+    "vsubq_f16",
+    "vsub_s64",
+    "vsubq_s64",
+    "vsub_u64",
+    "vsubq_u64",
+    "vsubhn_high_s16",
+    "vsubhn_high_s32",
+    "vsubhn_high_s64",
+    "vsubhn_high_u16",
+    "vsubhn_high_u32",
+    "vsubhn_high_u64",
+    "vsubhn_s16",
+    "vsubhn_s32",
+    "vsubhn_s64",
+    "vsubhn_u16",
+    "vsubhn_u32",
+    "vsubhn_u64",
+    "vsubl_s8",
+    "vsubl_s16",
+    "vsubl_s32",
+    "vsubl_u8",
+    "vsubl_u16",
+    "vsubl_u32",
+    "vsubw_s8",
+    "vsubw_s16",
+    "vsubw_s32",
+    "vsubw_u8",
+    "vsubw_u16",
+    "vsubw_u32",
+    "vsudot_lane_s32",
+    "vsudot_lane_s32",
+    "vsudotq_lane_s32",
+    "vsudotq_lane_s32",
+    "vsudot_laneq_s32",
+    "vsudotq_laneq_s32",
+    "vtrn_f16",
+    "vtrnq_f16",
+    "vtrn_f32",
+    "vtrn_s32",
+    "vtrn_u32",
+    "vtrnq_f32",
+    "vtrn_s8",
+    "vtrnq_s8",
+    "vtrn_s16",
+    "vtrnq_s16",
+    "vtrnq_s32",
+    "vtrn_u8",
+    "vtrnq_u8",
+    "vtrn_u16",
+    "vtrnq_u16",
+    "vtrnq_u32",
+    "vtrn_p8",
+    "vtrnq_p8",
+    "vtrn_p16",
+    "vtrnq_p16",
+    "vtst_s8",
+    "vtstq_s8",
+    "vtst_s16",
+    "vtstq_s16",
+    "vtst_s32",
+    "vtstq_s32",
+    "vtst_p8",
+    "vtstq_p8",
+    "vtst_p16",
+    "vtstq_p16",
+    "vtst_u8",
+    "vtstq_u8",
+    "vtst_u16",
+    "vtstq_u16",
+    "vtst_u32",
+    "vtstq_u32",
+    "vusdot_lane_s32",
+    "vusdot_lane_s32",
+    "vusdotq_lane_s32",
+    "vusdotq_lane_s32",
+    "vusdot_laneq_s32",
+    "vusdot_laneq_s32",
+    "vusdotq_laneq_s32",
+    "vusdotq_laneq_s32",
+    "vusdot_s32",
+    "vusdotq_s32",
+    "vusmmlaq_s32",
+    "vuzp_f16",
+    "vuzpq_f16",
+    "vuzp_f32",
+    "vuzp_s32",
+    "vuzp_u32",
+    "vuzpq_f32",
+    "vuzp_s8",
+    "vuzpq_s8",
+    "vuzp_s16",
+    "vuzpq_s16",
+    "vuzpq_s32",
+    "vuzp_u8",
+    "vuzpq_u8",
+    "vuzp_u16",
+    "vuzpq_u16",
+    "vuzpq_u32",
+    "vuzp_p8",
+    "vuzpq_p8",
+    "vuzp_p16",
+    "vuzpq_p16",
+    "vzip_f16",
+    "vzipq_f16",
+    "vzip_f32",
+    "vzip_s32",
+    "vzip_u32",
+    "vzip_s8",
+    "vzip_s16",
+    "vzip_u8",
+    "vzip_u16",
+    "vzip_p8",
+    "vzip_p16",
+    "vzipq_f32",
+    "vzipq_s8",
+    "vzipq_s16",
+    "vzipq_s32",
+    "vzipq_u8",
+    "vzipq_u16",
+    "vzipq_u32",
+    "vzipq_p8",
+    "vzipq_p16",
+    "__rndr",
+    "__rndrrs",
+    "vcopy_laneq_f64",
+    "vcopy_laneq_f64",
+    "vcopy_laneq_s64",
+    "vcopy_laneq_s64",
+    "vcopy_laneq_u64",
+    "vcopy_laneq_u64",
+    "vcopy_laneq_p64",
+    "vcopy_laneq_p64",
+    "vget_high_f64",
+    "vget_high_f64",
+    "vget_high_p64",
+    "vget_high_p64",
+    "vget_low_f64",
+    "vget_low_f64",
+    "vget_low_p64",
+    "vget_low_p64",
+    "vgetq_lane_f64",
+    "vgetq_lane_f64",
+    "vaddl_high_s16",
+    "vaddl_high_s16",
+    "vaddl_high_s32",
+    "vaddl_high_s32",
+    "vaddl_high_s8",
+    "vaddl_high_s8",
+    "vaddl_high_u16",
+    "vaddl_high_u16",
+    "vaddl_high_u32",
+    "vaddl_high_u32",
+    "vaddl_high_u8",
+    "vaddl_high_u8",
+    "vget_high_f32",
+    "vget_high_f32",
+    "vget_high_p16",
+    "vget_high_p16",
+    "vget_high_p8",
+    "vget_high_p8",
+    "vget_high_s16",
+    "vget_high_s16",
+    "vget_high_s32",
+    "vget_high_s32",
+    "vget_high_s8",
+    "vget_high_s8",
+    "vget_high_u16",
+    "vget_high_u16",
+    "vget_high_u32",
+    "vget_high_u32",
+    "vget_high_u8",
+    "vget_high_u8",
+    "vget_high_s64",
+    "vget_high_s64",
+    "vget_high_u64",
+    "vget_high_u64",
+    "vget_lane_f32",
+    "vget_lane_f32",
+    "vget_lane_p16",
+    "vget_lane_p16",
+    "vget_lane_p8",
+    "vget_lane_p8",
+    "vget_lane_s16",
+    "vget_lane_s16",
+    "vget_lane_s32",
+    "vget_lane_s32",
+    "vget_lane_s8",
+    "vget_lane_s8",
+    "vget_lane_u16",
+    "vget_lane_u16",
+    "vget_lane_u32",
+    "vget_lane_u32",
+    "vget_lane_u8",
+    "vget_lane_u8",
+    "vgetq_lane_f32",
+    "vgetq_lane_f32",
+    "vgetq_lane_p16",
+    "vgetq_lane_p16",
+    "vgetq_lane_p64",
+    "vgetq_lane_p64",
+    "vgetq_lane_p8",
+    "vgetq_lane_p8",
+    "vgetq_lane_s16",
+    "vgetq_lane_s16",
+    "vgetq_lane_s32",
+    "vgetq_lane_s32",
+    "vgetq_lane_s64",
+    "vgetq_lane_s64",
+    "vgetq_lane_s8",
+    "vgetq_lane_s8",
+    "vgetq_lane_u16",
+    "vgetq_lane_u16",
+    "vgetq_lane_u32",
+    "vgetq_lane_u32",
+    "vgetq_lane_u8",
+    "vgetq_lane_u8",
+    "vget_lane_p64",
+    "vget_lane_s64",
+    "vget_lane_u64",
+    "vget_low_f32",
+    "vget_low_f32",
+    "vget_low_p16",
+    "vget_low_p16",
+    "vget_low_p8",
+    "vget_low_p8",
+    "vget_low_s16",
+    "vget_low_s16",
+    "vget_low_s32",
+    "vget_low_s32",
+    "vget_low_s8",
+    "vget_low_s8",
+    "vget_low_u16",
+    "vget_low_u16",
+    "vget_low_u32",
+    "vget_low_u32",
+    "vget_low_u8",
+    "vget_low_u8",
+    "vget_low_s64",
+    "vget_low_s64",
+    "vget_low_u64",
+    "vget_low_u64",
+    "vaddw_high_s16",
+    "vaddw_high_s32",
+    "vaddw_high_s8",
+    "vaddw_high_u16",
+    "vaddw_high_u32",
+    "vaddw_high_u8",
+    "vgetq_lane_u64",
+];
diff --git a/crates/stdarch-verify/tests/x86-intel.rs b/crates/stdarch-verify/tests/x86-intel.rs
index 5a98db980b..024a873de1 100644
--- a/crates/stdarch-verify/tests/x86-intel.rs
+++ b/crates/stdarch-verify/tests/x86-intel.rs
@@ -211,9 +211,10 @@ fn verify_all_signatures() {
                 "_rdseed64_step",
                 // Prefetch
                 "_mm_prefetch",
+                "_m_prefetchrs",
                 // CMPXCHG
                 "cmpxchg16b",
-                // Undefined
+                // Undefined,
                 "_mm_undefined_ps",
                 "_mm_undefined_pd",
                 "_mm_undefined_si128",
@@ -246,13 +247,9 @@ fn verify_all_signatures() {
                 "_xend",
                 "_xabort_code",
                 // Aliases
-                "_mm_comige_ss",
                 "_mm_cvt_ss2si",
                 "_mm_cvtt_ss2si",
                 "_mm_cvt_si2ss",
-                "_mm_set_ps1",
-                "_mm_load_ps1",
-                "_mm_store_ps1",
                 "_mm_bslli_si128",
                 "_mm_bsrli_si128",
                 "_bextr2_u32",
@@ -304,6 +301,14 @@ fn verify_all_signatures() {
             if feature.contains("sse4a") || feature.contains("tbm") {
                 continue;
             }
+
+            // FIXME: these have not been added to Intrinsics Guide yet
+            if ["amx-avx512", "amx-fp8", "amx-movrs", "amx-tf32", "movrs"]
+                .iter()
+                .any(|f| feature.contains(f))
+            {
+                continue;
+            }
         }
 
         let intel = match map.remove(rust.name) {
diff --git a/examples/Cargo.toml b/examples/Cargo.toml
index 61451edee8..c4fc4c7e37 100644
--- a/examples/Cargo.toml
+++ b/examples/Cargo.toml
@@ -23,6 +23,11 @@ path = "hex.rs"
 name = "connect5"
 path = "connect5.rs"
 
+# Hexagon-only: requires --target hexagon-unknown-linux-musl
+[[bin]]
+name = "gaussian"
+path = "gaussian.rs"
+
 [[example]]
 name = "wasm"
 crate-type = ["cdylib"]
diff --git a/examples/gaussian.rs b/examples/gaussian.rs
new file mode 100644
index 0000000000..f41f753986
--- /dev/null
+++ b/examples/gaussian.rs
@@ -0,0 +1,352 @@
+//! Hexagon HVX Gaussian 3x3 blur example
+//!
+//! This example demonstrates the use of Hexagon HVX intrinsics to implement
+//! a 3x3 Gaussian blur filter on unsigned 8-bit images.
+//!
+//! The 3x3 Gaussian kernel is:
+//!     1 2 1
+//!     2 4 2  / 16
+//!     1 2 1
+//!
+//! This is a separable filter: `[1 2 1]^T * [1 2 1] / 16`.
+//!
+//! On Hexagon targets, this implementation uses `HvxVectorPair` for widening
+//! arithmetic to achieve full precision in the Gaussian computation, avoiding
+//! the approximation errors of byte-averaging approaches. On other targets,
+//! it runs a reference implementation in pure Rust.
+//!
+//! # Building and Running (Hexagon)
+//!
+//! To build (requires Hexagon toolchain):
+//!
+//!     RUSTFLAGS="-C target-feature=+hvxv62,+hvx-length128b \
+//!         -C linker=hexagon-unknown-linux-musl-clang" \
+//!         cargo +nightly build -p stdarch_examples --bin gaussian \
+//!         --target hexagon-unknown-linux-musl \
+//!         -Zbuild-std -Zbuild-std-features=llvm-libunwind
+//!
+//! To run under QEMU:
+//!
+//!     qemu-hexagon -L <sysroot>/target/hexagon-unknown-linux-musl \
+//!         target/hexagon-unknown-linux-musl/debug/gaussian
+//!
+//! # Building and Running (Other targets)
+//!
+//!     cargo +nightly run -p stdarch_examples --bin gaussian
+
+#![cfg_attr(target_arch = "hexagon", feature(stdarch_hexagon))]
+#![cfg_attr(target_arch = "hexagon", feature(hexagon_target_feature))]
+#![allow(
+    unsafe_op_in_unsafe_fn,
+    clippy::unwrap_used,
+    clippy::print_stdout,
+    clippy::missing_docs_in_private_items,
+    clippy::cast_possible_wrap,
+    clippy::cast_ptr_alignment
+)]
+
+/// Image width - must be multiple of HVX vector length on Hexagon
+const WIDTH: usize = 256;
+const HEIGHT: usize = 16;
+
+// ============================================================================
+// Hexagon HVX implementation
+// ============================================================================
+
+#[cfg(target_arch = "hexagon")]
+mod hvx {
+    #[cfg(not(target_feature = "hvx-length128b"))]
+    use core_arch::arch::hexagon::v64::*;
+    #[cfg(target_feature = "hvx-length128b")]
+    use core_arch::arch::hexagon::v128::*;
+
+    /// Vector length in bytes for HVX 128-byte mode
+    #[cfg(target_feature = "hvx-length128b")]
+    const VLEN: usize = 128;
+
+    /// Vector length in bytes for HVX 64-byte mode
+    #[cfg(not(target_feature = "hvx-length128b"))]
+    const VLEN: usize = 64;
+
+    /// Vertical 1-2-1 filter pass using HvxVectorPair widening arithmetic
+    ///
+    /// Computes: dst[x] = (row_above[x] + 2*center[x] + row_below[x] + 2) >> 2
+    ///
+    /// Uses HvxVectorPair to widen u8 to u16 for precise arithmetic, avoiding
+    /// the rounding errors of byte-averaging approximations.
+    ///
+    /// # Safety
+    ///
+    /// - `src` must point to the center row with valid data at -stride and +stride
+    /// - `dst` must point to a valid output buffer for `width` bytes
+    /// - `width` must be a multiple of VLEN
+    /// - All pointers must be HVX-aligned (128-byte for 128B mode)
+    #[target_feature(enable = "hvxv62")]
+    unsafe fn vertical_121_pass(src: *const u8, stride: isize, width: usize, dst: *mut u8) {
+        let inp0 = src.offset(-stride) as *const HvxVector;
+        let inp1 = src as *const HvxVector;
+        let inp2 = src.offset(stride) as *const HvxVector;
+        let outp = dst as *mut HvxVector;
+
+        let n_chunks = width / VLEN;
+        for i in 0..n_chunks {
+            let above = *inp0.add(i);
+            let center = *inp1.add(i);
+            let below = *inp2.add(i);
+
+            // Widen above + below to 16-bit using HvxVectorPair
+            // Q6_Wh_vadd_VubVub: adds two u8 vectors, producing u16 results in a pair
+            let above_plus_below: HvxVectorPair = Q6_Wh_vadd_VubVub(above, below);
+
+            // Widen center * 2 (add center to itself)
+            let center_x2: HvxVectorPair = Q6_Wh_vadd_VubVub(center, center);
+
+            // Add them: (above + below) + (center * 2) = above + 2*center + below
+            let sum: HvxVectorPair = Q6_Wh_vadd_WhWh(above_plus_below, center_x2);
+
+            // Extract high and low vectors from the pair (each contains u16 values)
+            let sum_lo = Q6_V_lo_W(sum); // Lower 64 elements as i16
+            let sum_hi = Q6_V_hi_W(sum); // Upper 64 elements as i16
+
+            // Arithmetic right shift by 2 (divide by 4) with rounding
+            // Add 2 for rounding before shift: (sum + 2) >> 2
+            let two = Q6_Vh_vsplat_R(2);
+            let sum_lo_rounded = Q6_Vh_vadd_VhVh(sum_lo, two);
+            let sum_hi_rounded = Q6_Vh_vadd_VhVh(sum_hi, two);
+            let shifted_lo = Q6_Vh_vasr_VhVh(sum_lo_rounded, two);
+            let shifted_hi = Q6_Vh_vasr_VhVh(sum_hi_rounded, two);
+
+            // Pack back to u8 with saturation: takes hi and lo halfword vectors,
+            // saturates to u8, and interleaves them back to original order
+            let result = Q6_Vub_vsat_VhVh(shifted_hi, shifted_lo);
+
+            *outp.add(i) = result;
+        }
+    }
+
+    /// Horizontal 1-2-1 filter pass using HvxVectorPair widening arithmetic
+    ///
+    /// Computes: dst[x] = (src[x-1] + 2*src[x] + src[x+1] + 2) >> 2
+    ///
+    /// Uses `valign` and `vlalign` to shift vectors by 1 byte for neighbor access,
+    /// then HvxVectorPair for precise widening arithmetic.
+    ///
+    /// # Safety
+    ///
+    /// - `src` and `dst` must point to valid buffers of `width` bytes
+    /// - `width` must be a multiple of VLEN
+    /// - All pointers must be HVX-aligned
+    #[target_feature(enable = "hvxv62")]
+    unsafe fn horizontal_121_pass(src: *const u8, width: usize, dst: *mut u8) {
+        let inp = src as *const HvxVector;
+        let outp = dst as *mut HvxVector;
+
+        let n_chunks = width / VLEN;
+        let mut prev = Q6_V_vzero();
+
+        for i in 0..n_chunks {
+            let curr = *inp.add(i);
+            let next = if i + 1 < n_chunks {
+                *inp.add(i + 1)
+            } else {
+                Q6_V_vzero()
+            };
+
+            // Left neighbor (x-1): shift curr right by 1 byte, filling from prev
+            let left = Q6_V_vlalign_VVR(curr, prev, 1);
+
+            // Right neighbor (x+1): shift curr left by 1 byte, filling from next
+            let right = Q6_V_valign_VVR(next, curr, 1);
+
+            // Widen left + right to 16-bit
+            let left_plus_right: HvxVectorPair = Q6_Wh_vadd_VubVub(left, right);
+
+            // Widen center * 2
+            let center_x2: HvxVectorPair = Q6_Wh_vadd_VubVub(curr, curr);
+
+            // Add: left + 2*center + right
+            let sum: HvxVectorPair = Q6_Wh_vadd_WhWh(left_plus_right, center_x2);
+
+            // Extract high and low vectors
+            let sum_lo = Q6_V_lo_W(sum);
+            let sum_hi = Q6_V_hi_W(sum);
+
+            // Arithmetic right shift by 2 with rounding
+            let two = Q6_Vh_vsplat_R(2);
+            let sum_lo_rounded = Q6_Vh_vadd_VhVh(sum_lo, two);
+            let sum_hi_rounded = Q6_Vh_vadd_VhVh(sum_hi, two);
+            let shifted_lo = Q6_Vh_vasr_VhVh(sum_lo_rounded, two);
+            let shifted_hi = Q6_Vh_vasr_VhVh(sum_hi_rounded, two);
+
+            // Pack back to u8 with saturation
+            let result = Q6_Vub_vsat_VhVh(shifted_hi, shifted_lo);
+
+            *outp.add(i) = result;
+
+            prev = curr;
+        }
+    }
+
+    /// Apply Gaussian 3x3 blur to an entire image using separable filtering
+    ///
+    /// Two-pass approach:
+    /// 1. Vertical pass: apply 1-2-1 filter across rows
+    /// 2. Horizontal pass: apply 1-2-1 filter across columns
+    ///
+    /// Combined effect: 3x3 Gaussian kernel [1 2 1; 2 4 2; 1 2 1] / 16
+    ///
+    /// # Safety
+    ///
+    /// - `src` and `dst` must point to valid image buffers of `stride * height` bytes
+    /// - `tmp` must point to a valid temporary buffer of `width` bytes, HVX-aligned
+    /// - `width` must be a multiple of VLEN and >= VLEN
+    /// - `stride` must be >= `width`
+    /// - All buffers must be HVX-aligned (128-byte for 128B mode)
+    #[target_feature(enable = "hvxv62")]
+    pub unsafe fn gaussian3x3u8(
+        src: *const u8,
+        stride: usize,
+        width: usize,
+        height: usize,
+        dst: *mut u8,
+        tmp: *mut u8,
+    ) {
+        let stride_i = stride as isize;
+
+        // Process interior rows (skip first and last which lack vertical neighbors)
+        for y in 1..height - 1 {
+            let row_src = src.offset(y as isize * stride_i);
+            let row_dst = dst.offset(y as isize * stride_i);
+
+            // Pass 1: vertical 1-2-1 into tmp
+            vertical_121_pass(row_src, stride_i, width, tmp);
+
+            // Pass 2: horizontal 1-2-1 from tmp into dst
+            horizontal_121_pass(tmp, width, row_dst);
+        }
+    }
+}
+
+// ============================================================================
+// Reference implementation (works on all targets)
+// ============================================================================
+
+/// Reference implementation of Gaussian 3x3 blur
+///
+/// Kernel:
+///     1 2 1
+///     2 4 2  / 16
+///     1 2 1
+fn gaussian3x3u8_reference(src: &[u8], stride: usize, width: usize, height: usize, dst: &mut [u8]) {
+    for y in 1..height - 1 {
+        for x in 1..width - 1 {
+            // Compute column sums (vertical 1-2-1 weights)
+            let mut col = [0u32; 3];
+            for i in 0..3 {
+                col[i] = 1 * src[(y - 1) * stride + x - 1 + i] as u32
+                    + 2 * src[y * stride + x - 1 + i] as u32
+                    + 1 * src[(y + 1) * stride + x - 1 + i] as u32;
+            }
+            // Apply horizontal 1-2-1 weights and normalize
+            // (1*col[0] + 2*col[1] + 1*col[2] + 8) / 16
+            dst[y * stride + x] = ((1 * col[0] + 2 * col[1] + 1 * col[2] + 8) >> 4) as u8;
+        }
+    }
+}
+
+/// Generate deterministic test pattern
+fn generate_test_pattern(buf: &mut [u8], width: usize, height: usize) {
+    for y in 0..height {
+        for x in 0..width {
+            buf[y * width + x] = ((x + y * 7) % 256) as u8;
+        }
+    }
+}
+
+// ============================================================================
+// Main: runs HVX + reference on Hexagon, reference-only on other targets
+// ============================================================================
+
+#[cfg(target_arch = "hexagon")]
+fn main() {
+    // Aligned buffers for HVX
+    #[repr(align(128))]
+    struct AlignedBuf<const N: usize>([u8; N]);
+
+    let mut src = AlignedBuf::<{ WIDTH * HEIGHT }>([0u8; WIDTH * HEIGHT]);
+    let mut dst_hvx = AlignedBuf::<{ WIDTH * HEIGHT }>([0u8; WIDTH * HEIGHT]);
+    let mut tmp = AlignedBuf::<{ WIDTH }>([0u8; WIDTH]);
+    let mut dst_ref = vec![0u8; WIDTH * HEIGHT];
+
+    // Generate test pattern
+    generate_test_pattern(&mut src.0, WIDTH, HEIGHT);
+
+    // Run HVX implementation
+    unsafe {
+        hvx::gaussian3x3u8(
+            src.0.as_ptr(),
+            WIDTH,
+            WIDTH,
+            HEIGHT,
+            dst_hvx.0.as_mut_ptr(),
+            tmp.0.as_mut_ptr(),
+        );
+    }
+
+    // Run reference
+    gaussian3x3u8_reference(&src.0, WIDTH, WIDTH, HEIGHT, &mut dst_ref);
+
+    // Verify HVX matches reference (allowing small rounding differences).
+    use core_arch::arch::hexagon::scalar::{Q6_R_abs_R, Q6_R_max_RR};
+    let mut max_diff = 0i32;
+    for y in 1..HEIGHT - 1 {
+        for x in 1..WIDTH - 1 {
+            let idx = y * WIDTH + x;
+            let diff = unsafe { Q6_R_abs_R(dst_hvx.0[idx] as i32 - dst_ref[idx] as i32) };
+            max_diff = unsafe { Q6_R_max_RR(max_diff, diff) };
+            // Allow up to 1 LSB difference due to rounding
+            assert!(
+                diff <= 1,
+                "HVX differs from reference at ({}, {}): hvx={}, ref={}, diff={}",
+                x,
+                y,
+                dst_hvx.0[idx],
+                dst_ref[idx],
+                diff
+            );
+        }
+    }
+
+    println!(
+        "Gaussian 3x3 HVX test passed! Max difference from reference: {}",
+        max_diff
+    );
+}
+
+#[cfg(not(target_arch = "hexagon"))]
+fn main() {
+    let mut src = vec![0u8; WIDTH * HEIGHT];
+    let mut dst = vec![0u8; WIDTH * HEIGHT];
+
+    // Generate test pattern
+    generate_test_pattern(&mut src, WIDTH, HEIGHT);
+
+    // Run reference implementation
+    gaussian3x3u8_reference(&src, WIDTH, WIDTH, HEIGHT, &mut dst);
+
+    // Verify output is non-trivial (blurred values differ from input)
+    let mut changed = 0;
+    for y in 1..HEIGHT - 1 {
+        for x in 1..WIDTH - 1 {
+            let idx = y * WIDTH + x;
+            if src[idx] != dst[idx] {
+                changed += 1;
+            }
+        }
+    }
+
+    println!(
+        "Gaussian 3x3 reference test passed! {} pixels changed by blur",
+        changed
+    );
+}
diff --git a/examples/hex.rs b/examples/hex.rs
index 621f55bc09..21827b375a 100644
--- a/examples/hex.rs
+++ b/examples/hex.rs
@@ -13,7 +13,6 @@
 //! and you should see `746573740a` get printed out.
 
 #![allow(internal_features)]
-#![feature(wasm_target_feature)]
 #![cfg_attr(test, feature(test))]
 #![cfg_attr(
     any(target_arch = "x86", target_arch = "x86_64"),
diff --git a/intrinsics_data/arm_intrinsics.json b/intrinsics_data/arm_intrinsics.json
index 19c655cd6d..fab6da7f2c 100644
--- a/intrinsics_data/arm_intrinsics.json
+++ b/intrinsics_data/arm_intrinsics.json
@@ -224,223 +224,235 @@
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vaba_s16",
+    "SIMD_ISA": "SVE2",
+    "name": "svaba[_n_s16]",
     "arguments": [
-      "int16x4_t a",
-      "int16x4_t b",
-      "int16x4_t c"
+      "svint16_t op1",
+      "svint16_t op2",
+      "int16_t op3"
     ],
     "return_type": {
-      "value": "int16x4_t"
+      "value": "svint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.4H"
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
       },
-      "b": {
-        "register": "Vn.4H"
+      "op2": {
+        "register": "Zop2.H"
       },
-      "c": {
-        "register": "Vm.4H"
+      "op3": {
+        "register": "Zop3.H[*]"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
         "SABA"
+      ],
+      [
+        "MOVPRFX",
+        "SABA"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vaba_s32",
+    "SIMD_ISA": "SVE2",
+    "name": "svaba[_n_s32]",
     "arguments": [
-      "int32x2_t a",
-      "int32x2_t b",
-      "int32x2_t c"
+      "svint32_t op1",
+      "svint32_t op2",
+      "int32_t op3"
     ],
     "return_type": {
-      "value": "int32x2_t"
+      "value": "svint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.2S"
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
       },
-      "b": {
-        "register": "Vn.2S"
+      "op2": {
+        "register": "Zop2.S"
       },
-      "c": {
-        "register": "Vm.2S"
+      "op3": {
+        "register": "Zop3.S[*]"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
         "SABA"
+      ],
+      [
+        "MOVPRFX",
+        "SABA"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vaba_s8",
+    "SIMD_ISA": "SVE2",
+    "name": "svaba[_n_s64]",
     "arguments": [
-      "int8x8_t a",
-      "int8x8_t b",
-      "int8x8_t c"
+      "svint64_t op1",
+      "svint64_t op2",
+      "int64_t op3"
     ],
     "return_type": {
-      "value": "int8x8_t"
+      "value": "svint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.8B"
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
       },
-      "b": {
-        "register": "Vn.8B"
+      "op2": {
+        "register": "Zop2.D"
       },
-      "c": {
-        "register": "Vm.8B"
+      "op3": {
+        "register": "Zop3.D[*]"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
         "SABA"
+      ],
+      [
+        "MOVPRFX",
+        "SABA"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vaba_u16",
+    "SIMD_ISA": "SVE2",
+    "name": "svaba[_n_s8]",
     "arguments": [
-      "uint16x4_t a",
-      "uint16x4_t b",
-      "uint16x4_t c"
+      "svint8_t op1",
+      "svint8_t op2",
+      "int8_t op3"
     ],
     "return_type": {
-      "value": "uint16x4_t"
+      "value": "svint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.4H"
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
       },
-      "b": {
-        "register": "Vn.4H"
+      "op2": {
+        "register": "Zop2.B"
       },
-      "c": {
-        "register": "Vm.4H"
+      "op3": {
+        "register": "Zop3.B[*]"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "UABA"
+        "SABA"
+      ],
+      [
+        "MOVPRFX",
+        "SABA"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vaba_u32",
+    "SIMD_ISA": "SVE2",
+    "name": "svaba[_n_u16]",
     "arguments": [
-      "uint32x2_t a",
-      "uint32x2_t b",
-      "uint32x2_t c"
+      "svuint16_t op1",
+      "svuint16_t op2",
+      "uint16_t op3"
     ],
     "return_type": {
-      "value": "uint32x2_t"
+      "value": "svuint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.2S"
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
       },
-      "b": {
-        "register": "Vn.2S"
+      "op2": {
+        "register": "Zop2.H"
       },
-      "c": {
-        "register": "Vm.2S"
+      "op3": {
+        "register": "Zop3.H[*]"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
         "UABA"
+      ],
+      [
+        "MOVPRFX",
+        "UABA"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vaba_u8",
+    "SIMD_ISA": "SVE2",
+    "name": "svaba[_n_u32]",
     "arguments": [
-      "uint8x8_t a",
-      "uint8x8_t b",
-      "uint8x8_t c"
+      "svuint32_t op1",
+      "svuint32_t op2",
+      "uint32_t op3"
     ],
     "return_type": {
-      "value": "uint8x8_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.8B"
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
       },
-      "b": {
-        "register": "Vn.8B"
+      "op2": {
+        "register": "Zop2.S"
       },
-      "c": {
-        "register": "Vm.8B"
+      "op3": {
+        "register": "Zop3.S[*]"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
         "UABA"
+      ],
+      [
+        "MOVPRFX",
+        "UABA"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vabal_high_s16",
+    "SIMD_ISA": "SVE2",
+    "name": "svaba[_n_u64]",
     "arguments": [
-      "int32x4_t a",
-      "int16x8_t b",
-      "int16x8_t c"
+      "svuint64_t op1",
+      "svuint64_t op2",
+      "uint64_t op3"
     ],
     "return_type": {
-      "value": "int32x4_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.4S"
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
       },
-      "b": {
-        "register": "Vn.8H"
+      "op2": {
+        "register": "Zop2.D"
       },
-      "c": {
-        "register": "Vm.8H"
+      "op3": {
+        "register": "Zop3.D[*]"
       }
     },
     "Architectures": [
@@ -448,30 +460,34 @@
     ],
     "instructions": [
       [
-        "SABAL2"
+        "UABA"
+      ],
+      [
+        "MOVPRFX",
+        "UABA"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vabal_high_s32",
+    "SIMD_ISA": "SVE2",
+    "name": "svaba[_n_u8]",
     "arguments": [
-      "int64x2_t a",
-      "int32x4_t b",
-      "int32x4_t c"
+      "svuint8_t op1",
+      "svuint8_t op2",
+      "uint8_t op3"
     ],
     "return_type": {
-      "value": "int64x2_t"
+      "value": "svuint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.2D"
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
       },
-      "b": {
-        "register": "Vn.4S"
+      "op2": {
+        "register": "Zop2.B"
       },
-      "c": {
-        "register": "Vm.4S"
+      "op3": {
+        "register": "Zop3.B[*]"
       }
     },
     "Architectures": [
@@ -479,30 +495,34 @@
     ],
     "instructions": [
       [
-        "SABAL2"
+        "UABA"
+      ],
+      [
+        "MOVPRFX",
+        "UABA"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vabal_high_s8",
+    "SIMD_ISA": "SVE2",
+    "name": "svaba[_s16]",
     "arguments": [
-      "int16x8_t a",
-      "int8x16_t b",
-      "int8x16_t c"
+      "svint16_t op1",
+      "svint16_t op2",
+      "svint16_t op3"
     ],
     "return_type": {
-      "value": "int16x8_t"
+      "value": "svint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.8H"
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
       },
-      "b": {
-        "register": "Vn.16B"
+      "op2": {
+        "register": "Zop2.H"
       },
-      "c": {
-        "register": "Vm.16B"
+      "op3": {
+        "register": "Zop3.H"
       }
     },
     "Architectures": [
@@ -510,30 +530,34 @@
     ],
     "instructions": [
       [
-        "SABAL2"
+        "SABA"
+      ],
+      [
+        "MOVPRFX",
+        "SABA"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vabal_high_u16",
+    "SIMD_ISA": "SVE2",
+    "name": "svaba[_s32]",
     "arguments": [
-      "uint32x4_t a",
-      "uint16x8_t b",
-      "uint16x8_t c"
+      "svint32_t op1",
+      "svint32_t op2",
+      "svint32_t op3"
     ],
     "return_type": {
-      "value": "uint32x4_t"
+      "value": "svint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.4S"
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
       },
-      "b": {
-        "register": "Vn.8H"
+      "op2": {
+        "register": "Zop2.S"
       },
-      "c": {
-        "register": "Vm.8H"
+      "op3": {
+        "register": "Zop3.S"
       }
     },
     "Architectures": [
@@ -541,30 +565,34 @@
     ],
     "instructions": [
       [
-        "UABAL2"
+        "SABA"
+      ],
+      [
+        "MOVPRFX",
+        "SABA"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vabal_high_u32",
+    "SIMD_ISA": "SVE2",
+    "name": "svaba[_s64]",
     "arguments": [
-      "uint64x2_t a",
-      "uint32x4_t b",
-      "uint32x4_t c"
+      "svint64_t op1",
+      "svint64_t op2",
+      "svint64_t op3"
     ],
     "return_type": {
-      "value": "uint64x2_t"
+      "value": "svint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.2D"
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
       },
-      "b": {
-        "register": "Vn.4S"
+      "op2": {
+        "register": "Zop2.D"
       },
-      "c": {
-        "register": "Vm.4S"
+      "op3": {
+        "register": "Zop3.D"
       }
     },
     "Architectures": [
@@ -572,30 +600,34 @@
     ],
     "instructions": [
       [
-        "UABAL2"
+        "SABA"
+      ],
+      [
+        "MOVPRFX",
+        "SABA"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vabal_high_u8",
+    "SIMD_ISA": "SVE2",
+    "name": "svaba[_s8]",
     "arguments": [
-      "uint16x8_t a",
-      "uint8x16_t b",
-      "uint8x16_t c"
+      "svint8_t op1",
+      "svint8_t op2",
+      "svint8_t op3"
     ],
     "return_type": {
-      "value": "uint16x8_t"
+      "value": "svint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.8H"
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
       },
-      "b": {
-        "register": "Vn.16B"
+      "op2": {
+        "register": "Zop2.B"
       },
-      "c": {
-        "register": "Vm.16B"
+      "op3": {
+        "register": "Zop3.B"
       }
     },
     "Architectures": [
@@ -603,479 +635,524 @@
     ],
     "instructions": [
       [
-        "UABAL2"
+        "SABA"
+      ],
+      [
+        "MOVPRFX",
+        "SABA"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vabal_s16",
+    "SIMD_ISA": "SVE2",
+    "name": "svaba[_u16]",
     "arguments": [
-      "int32x4_t a",
-      "int16x4_t b",
-      "int16x4_t c"
+      "svuint16_t op1",
+      "svuint16_t op2",
+      "svuint16_t op3"
     ],
     "return_type": {
-      "value": "int32x4_t"
+      "value": "svuint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.4S"
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
       },
-      "b": {
-        "register": "Vn.4H"
+      "op2": {
+        "register": "Zop2.H"
       },
-      "c": {
-        "register": "Vm.4H"
+      "op3": {
+        "register": "Zop3.H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "SABAL"
+        "UABA"
+      ],
+      [
+        "MOVPRFX",
+        "UABA"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vabal_s32",
+    "SIMD_ISA": "SVE2",
+    "name": "svaba[_u32]",
     "arguments": [
-      "int64x2_t a",
-      "int32x2_t b",
-      "int32x2_t c"
+      "svuint32_t op1",
+      "svuint32_t op2",
+      "svuint32_t op3"
     ],
     "return_type": {
-      "value": "int64x2_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.2D"
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
       },
-      "b": {
-        "register": "Vn.2S"
+      "op2": {
+        "register": "Zop2.S"
       },
-      "c": {
-        "register": "Vm.2S"
+      "op3": {
+        "register": "Zop3.S"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "SABAL"
+        "UABA"
+      ],
+      [
+        "MOVPRFX",
+        "UABA"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vabal_s8",
+    "SIMD_ISA": "SVE2",
+    "name": "svaba[_u64]",
     "arguments": [
-      "int16x8_t a",
-      "int8x8_t b",
-      "int8x8_t c"
+      "svuint64_t op1",
+      "svuint64_t op2",
+      "svuint64_t op3"
     ],
     "return_type": {
-      "value": "int16x8_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.8H"
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
       },
-      "b": {
-        "register": "Vn.8B"
+      "op2": {
+        "register": "Zop2.D"
       },
-      "c": {
-        "register": "Vm.8B"
+      "op3": {
+        "register": "Zop3.D"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "SABAL"
+        "UABA"
+      ],
+      [
+        "MOVPRFX",
+        "UABA"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vabal_u16",
+    "SIMD_ISA": "SVE2",
+    "name": "svaba[_u8]",
     "arguments": [
-      "uint32x4_t a",
-      "uint16x4_t b",
-      "uint16x4_t c"
+      "svuint8_t op1",
+      "svuint8_t op2",
+      "svuint8_t op3"
     ],
     "return_type": {
-      "value": "uint32x4_t"
+      "value": "svuint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.4S"
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
       },
-      "b": {
-        "register": "Vn.4H"
+      "op2": {
+        "register": "Zop2.B"
       },
-      "c": {
-        "register": "Vm.4H"
+      "op3": {
+        "register": "Zop3.B"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "UABAL"
+        "UABA"
+      ],
+      [
+        "MOVPRFX",
+        "UABA"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vabal_u32",
+    "SIMD_ISA": "SVE2",
+    "name": "svabalb[_n_s16]",
     "arguments": [
-      "uint64x2_t a",
-      "uint32x2_t b",
-      "uint32x2_t c"
+      "svint16_t op1",
+      "svint8_t op2",
+      "int8_t op3"
     ],
     "return_type": {
-      "value": "uint64x2_t"
+      "value": "svint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.2D"
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
       },
-      "b": {
-        "register": "Vn.2S"
+      "op2": {
+        "register": "Zop2.B"
       },
-      "c": {
-        "register": "Vm.2S"
+      "op3": {
+        "register": "Zop3.B[*]"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "UABAL"
+        "SABALB"
+      ],
+      [
+        "MOVPRFX",
+        "SABALB"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vabal_u8",
+    "SIMD_ISA": "SVE2",
+    "name": "svabalb[_n_s32]",
     "arguments": [
-      "uint16x8_t a",
-      "uint8x8_t b",
-      "uint8x8_t c"
+      "svint32_t op1",
+      "svint16_t op2",
+      "int16_t op3"
     ],
     "return_type": {
-      "value": "uint16x8_t"
+      "value": "svint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.8H"
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
       },
-      "b": {
-        "register": "Vn.8B"
+      "op2": {
+        "register": "Zop2.H"
       },
-      "c": {
-        "register": "Vm.8B"
+      "op3": {
+        "register": "Zop3.H[*]"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "UABAL"
+        "SABALB"
+      ],
+      [
+        "MOVPRFX",
+        "SABALB"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vabaq_s16",
+    "SIMD_ISA": "SVE2",
+    "name": "svabalb[_n_s64]",
     "arguments": [
-      "int16x8_t a",
-      "int16x8_t b",
-      "int16x8_t c"
+      "svint64_t op1",
+      "svint32_t op2",
+      "int32_t op3"
     ],
     "return_type": {
-      "value": "int16x8_t"
+      "value": "svint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.8H"
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
       },
-      "b": {
-        "register": "Vn.8H"
+      "op2": {
+        "register": "Zop2.S"
       },
-      "c": {
-        "register": "Vm.8H"
+      "op3": {
+        "register": "Zop3.S[*]"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "SABA"
+        "SABALB"
+      ],
+      [
+        "MOVPRFX",
+        "SABALB"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vabaq_s32",
+    "SIMD_ISA": "SVE2",
+    "name": "svabalb[_n_u16]",
     "arguments": [
-      "int32x4_t a",
-      "int32x4_t b",
-      "int32x4_t c"
+      "svuint16_t op1",
+      "svuint8_t op2",
+      "uint8_t op3"
     ],
     "return_type": {
-      "value": "int32x4_t"
+      "value": "svuint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.4S"
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
       },
-      "b": {
-        "register": "Vn.4S"
+      "op2": {
+        "register": "Zop2.B"
       },
-      "c": {
-        "register": "Vm.4S"
+      "op3": {
+        "register": "Zop3.B[*]"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "SABA"
+        "UABALB"
+      ],
+      [
+        "MOVPRFX",
+        "UABALB"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vabaq_s8",
+    "SIMD_ISA": "SVE2",
+    "name": "svabalb[_n_u32]",
     "arguments": [
-      "int8x16_t a",
-      "int8x16_t b",
-      "int8x16_t c"
+      "svuint32_t op1",
+      "svuint16_t op2",
+      "uint16_t op3"
     ],
     "return_type": {
-      "value": "int8x16_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.16B"
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
       },
-      "b": {
-        "register": "Vn.16B"
+      "op2": {
+        "register": "Zop2.H"
       },
-      "c": {
-        "register": "Vm.16B"
+      "op3": {
+        "register": "Zop3.H[*]"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "SABA"
+        "UABALB"
+      ],
+      [
+        "MOVPRFX",
+        "UABALB"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vabaq_u16",
+    "SIMD_ISA": "SVE2",
+    "name": "svabalb[_n_u64]",
     "arguments": [
-      "uint16x8_t a",
-      "uint16x8_t b",
-      "uint16x8_t c"
+      "svuint64_t op1",
+      "svuint32_t op2",
+      "uint32_t op3"
     ],
     "return_type": {
-      "value": "uint16x8_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.8H"
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
       },
-      "b": {
-        "register": "Vn.8H"
+      "op2": {
+        "register": "Zop2.S"
       },
-      "c": {
-        "register": "Vm.8H"
+      "op3": {
+        "register": "Zop3.S[*]"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "UABA"
+        "UABALB"
+      ],
+      [
+        "MOVPRFX",
+        "UABALB"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vabaq_u32",
+    "SIMD_ISA": "SVE2",
+    "name": "svabalb[_s16]",
     "arguments": [
-      "uint32x4_t a",
-      "uint32x4_t b",
-      "uint32x4_t c"
+      "svint16_t op1",
+      "svint8_t op2",
+      "svint8_t op3"
     ],
     "return_type": {
-      "value": "uint32x4_t"
+      "value": "svint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.4S"
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
       },
-      "b": {
-        "register": "Vn.4S"
+      "op2": {
+        "register": "Zop2.B"
       },
-      "c": {
-        "register": "Vm.4S"
+      "op3": {
+        "register": "Zop3.B"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "UABA"
+        "SABALB"
+      ],
+      [
+        "MOVPRFX",
+        "SABALB"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vabaq_u8",
+    "SIMD_ISA": "SVE2",
+    "name": "svabalb[_s32]",
     "arguments": [
-      "uint8x16_t a",
-      "uint8x16_t b",
-      "uint8x16_t c"
+      "svint32_t op1",
+      "svint16_t op2",
+      "svint16_t op3"
     ],
     "return_type": {
-      "value": "uint8x16_t"
+      "value": "svint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.16B"
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
       },
-      "b": {
-        "register": "Vn.16B"
+      "op2": {
+        "register": "Zop2.H"
       },
-      "c": {
-        "register": "Vm.16B"
+      "op3": {
+        "register": "Zop3.H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "UABA"
+        "SABALB"
+      ],
+      [
+        "MOVPRFX",
+        "SABALB"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vabd_f16",
+    "SIMD_ISA": "SVE2",
+    "name": "svabalb[_s64]",
     "arguments": [
-      "float16x4_t a",
-      "float16x4_t b"
+      "svint64_t op1",
+      "svint32_t op2",
+      "svint32_t op3"
     ],
     "return_type": {
-      "value": "float16x4_t"
+      "value": "svint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4H"
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
       },
-      "b": {
-        "register": "Vm.4H"
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FABD"
+        "SABALB"
+      ],
+      [
+        "MOVPRFX",
+        "SABALB"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vabd_f32",
+    "SIMD_ISA": "SVE2",
+    "name": "svabalb[_u16]",
     "arguments": [
-      "float32x2_t a",
-      "float32x2_t b"
+      "svuint16_t op1",
+      "svuint8_t op2",
+      "svuint8_t op3"
     ],
     "return_type": {
-      "value": "float32x2_t"
+      "value": "svuint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2S"
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
       },
-      "b": {
-        "register": "Vm.2S"
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FABD"
+        "UABALB"
+      ],
+      [
+        "MOVPRFX",
+        "UABALB"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vabd_f64",
+    "SIMD_ISA": "SVE2",
+    "name": "svabalb[_u32]",
     "arguments": [
-      "float64x1_t a",
-      "float64x1_t b"
+      "svuint32_t op1",
+      "svuint16_t op2",
+      "svuint16_t op3"
     ],
     "return_type": {
-      "value": "float64x1_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
       },
-      "b": {
-        "register": "Dm"
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
       }
     },
     "Architectures": [
@@ -1083,200 +1160,244 @@
     ],
     "instructions": [
       [
-        "FABD"
+        "UABALB"
+      ],
+      [
+        "MOVPRFX",
+        "UABALB"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vabd_s16",
+    "SIMD_ISA": "SVE2",
+    "name": "svabalb[_u64]",
     "arguments": [
-      "int16x4_t a",
-      "int16x4_t b"
+      "svuint64_t op1",
+      "svuint32_t op2",
+      "svuint32_t op3"
     ],
     "return_type": {
-      "value": "int16x4_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4H"
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
       },
-      "b": {
-        "register": "Vm.4H"
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "SABD"
+        "UABALB"
+      ],
+      [
+        "MOVPRFX",
+        "UABALB"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vabd_s32",
+    "SIMD_ISA": "SVE2",
+    "name": "svabalt[_n_s16]",
     "arguments": [
-      "int32x2_t a",
-      "int32x2_t b"
+      "svint16_t op1",
+      "svint8_t op2",
+      "int8_t op3"
     ],
     "return_type": {
-      "value": "int32x2_t"
+      "value": "svint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2S"
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
       },
-      "b": {
-        "register": "Vm.2S"
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B[*]"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "SABD"
+        "SABALT"
+      ],
+      [
+        "MOVPRFX",
+        "SABALT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vabd_s8",
+    "SIMD_ISA": "SVE2",
+    "name": "svabalt[_n_s32]",
     "arguments": [
-      "int8x8_t a",
-      "int8x8_t b"
+      "svint32_t op1",
+      "svint16_t op2",
+      "int16_t op3"
     ],
     "return_type": {
-      "value": "int8x8_t"
+      "value": "svint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8B"
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
       },
-      "b": {
-        "register": "Vm.8B"
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H[*]"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "SABD"
+        "SABALT"
+      ],
+      [
+        "MOVPRFX",
+        "SABALT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vabd_u16",
+    "SIMD_ISA": "SVE2",
+    "name": "svabalt[_n_s64]",
     "arguments": [
-      "uint16x4_t a",
-      "uint16x4_t b"
+      "svint64_t op1",
+      "svint32_t op2",
+      "int32_t op3"
     ],
     "return_type": {
-      "value": "uint16x4_t"
+      "value": "svint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4H"
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
       },
-      "b": {
-        "register": "Vm.4H"
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S[*]"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "UABD"
+        "SABALT"
+      ],
+      [
+        "MOVPRFX",
+        "SABALT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vabd_u32",
+    "SIMD_ISA": "SVE2",
+    "name": "svabalt[_n_u16]",
     "arguments": [
-      "uint32x2_t a",
-      "uint32x2_t b"
+      "svuint16_t op1",
+      "svuint8_t op2",
+      "uint8_t op3"
     ],
     "return_type": {
-      "value": "uint32x2_t"
+      "value": "svuint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2S"
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
       },
-      "b": {
-        "register": "Vm.2S"
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B[*]"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "UABD"
+        "UABALT"
+      ],
+      [
+        "MOVPRFX",
+        "UABALT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vabd_u8",
+    "SIMD_ISA": "SVE2",
+    "name": "svabalt[_n_u32]",
     "arguments": [
-      "uint8x8_t a",
-      "uint8x8_t b"
+      "svuint32_t op1",
+      "svuint16_t op2",
+      "uint16_t op3"
     ],
     "return_type": {
-      "value": "uint8x8_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8B"
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
       },
-      "b": {
-        "register": "Vm.8B"
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H[*]"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "UABD"
+        "UABALT"
+      ],
+      [
+        "MOVPRFX",
+        "UABALT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vabdd_f64",
+    "SIMD_ISA": "SVE2",
+    "name": "svabalt[_n_u64]",
     "arguments": [
-      "float64_t a",
-      "float64_t b"
+      "svuint64_t op1",
+      "svuint32_t op2",
+      "uint32_t op3"
     ],
     "return_type": {
-      "value": "float64_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
       },
-      "b": {
-        "register": "Dm"
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S[*]"
       }
     },
     "Architectures": [
@@ -1284,26 +1405,34 @@
     ],
     "instructions": [
       [
-        "FABD"
+        "UABALT"
+      ],
+      [
+        "MOVPRFX",
+        "UABALT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vabdh_f16",
+    "SIMD_ISA": "SVE2",
+    "name": "svabalt[_s16]",
     "arguments": [
-      "float16_t a",
-      "float16_t b"
+      "svint16_t op1",
+      "svint8_t op2",
+      "svint8_t op3"
     ],
     "return_type": {
-      "value": "float16_t"
+      "value": "svint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Hn"
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
       },
-      "b": {
-        "register": "Hm"
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B"
       }
     },
     "Architectures": [
@@ -1311,26 +1440,34 @@
     ],
     "instructions": [
       [
-        "FABD"
+        "SABALT"
+      ],
+      [
+        "MOVPRFX",
+        "SABALT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vabdl_high_s16",
+    "SIMD_ISA": "SVE2",
+    "name": "svabalt[_s32]",
     "arguments": [
-      "int16x8_t a",
-      "int16x8_t b"
+      "svint32_t op1",
+      "svint16_t op2",
+      "svint16_t op3"
     ],
     "return_type": {
-      "value": "int32x4_t"
+      "value": "svint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8H"
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
       },
-      "b": {
-        "register": "Vm.8H"
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
       }
     },
     "Architectures": [
@@ -1338,26 +1475,34 @@
     ],
     "instructions": [
       [
-        "SABDL2"
+        "SABALT"
+      ],
+      [
+        "MOVPRFX",
+        "SABALT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vabdl_high_s32",
+    "SIMD_ISA": "SVE2",
+    "name": "svabalt[_s64]",
     "arguments": [
-      "int32x4_t a",
-      "int32x4_t b"
+      "svint64_t op1",
+      "svint32_t op2",
+      "svint32_t op3"
     ],
     "return_type": {
-      "value": "int64x2_t"
+      "value": "svint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4S"
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
       },
-      "b": {
-        "register": "Vm.4S"
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S"
       }
     },
     "Architectures": [
@@ -1365,26 +1510,34 @@
     ],
     "instructions": [
       [
-        "SABDL2"
+        "SABALT"
+      ],
+      [
+        "MOVPRFX",
+        "SABALT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vabdl_high_s8",
+    "SIMD_ISA": "SVE2",
+    "name": "svabalt[_u16]",
     "arguments": [
-      "int8x16_t a",
-      "int8x16_t b"
+      "svuint16_t op1",
+      "svuint8_t op2",
+      "svuint8_t op3"
     ],
     "return_type": {
-      "value": "int16x8_t"
+      "value": "svuint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
       },
-      "b": {
-        "register": "Vm.16B"
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B"
       }
     },
     "Architectures": [
@@ -1392,26 +1545,34 @@
     ],
     "instructions": [
       [
-        "SABDL2"
+        "UABALT"
+      ],
+      [
+        "MOVPRFX",
+        "UABALT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vabdl_high_u16",
+    "SIMD_ISA": "SVE2",
+    "name": "svabalt[_u32]",
     "arguments": [
-      "uint16x8_t a",
-      "uint16x8_t b"
+      "svuint32_t op1",
+      "svuint16_t op2",
+      "svuint16_t op3"
     ],
     "return_type": {
-      "value": "uint32x4_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8H"
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
       },
-      "b": {
-        "register": "Vm.8H"
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
       }
     },
     "Architectures": [
@@ -1419,26 +1580,34 @@
     ],
     "instructions": [
       [
-        "UABDL2"
+        "UABALT"
+      ],
+      [
+        "MOVPRFX",
+        "UABALT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vabdl_high_u32",
+    "SIMD_ISA": "SVE2",
+    "name": "svabalt[_u64]",
     "arguments": [
-      "uint32x4_t a",
-      "uint32x4_t b"
+      "svuint64_t op1",
+      "svuint32_t op2",
+      "svuint32_t op3"
     ],
     "return_type": {
-      "value": "uint64x2_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4S"
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
       },
-      "b": {
-        "register": "Vm.4S"
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S"
       }
     },
     "Architectures": [
@@ -1446,26 +1615,34 @@
     ],
     "instructions": [
       [
-        "UABDL2"
+        "UABALT"
+      ],
+      [
+        "MOVPRFX",
+        "UABALT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vabdl_high_u8",
+    "SIMD_ISA": "SVE",
+    "name": "svabd[_f16]_m",
     "arguments": [
-      "uint8x16_t a",
-      "uint8x16_t b"
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2"
     ],
     "return_type": {
-      "value": "uint16x8_t"
+      "value": "svfloat16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
       },
-      "b": {
-        "register": "Vm.16B"
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
@@ -1473,257 +1650,326 @@
     ],
     "instructions": [
       [
-        "UABDL2"
+        "FABD"
+      ],
+      [
+        "MOVPRFX",
+        "FABD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vabdl_s16",
+    "SIMD_ISA": "SVE",
+    "name": "svabd[_f16]_x",
     "arguments": [
-      "int16x4_t a",
-      "int16x4_t b"
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2"
     ],
     "return_type": {
-      "value": "int32x4_t"
+      "value": "svfloat16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4H"
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
       },
-      "b": {
-        "register": "Vm.4H"
+      "op2": {
+        "register": "Zop2.H|Ztied2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "SABDL"
+        "FABD"
+      ],
+      [
+        "FABD"
+      ],
+      [
+        "MOVPRFX",
+        "FABD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vabdl_s32",
+    "SIMD_ISA": "SVE",
+    "name": "svabd[_f16]_z",
     "arguments": [
-      "int32x2_t a",
-      "int32x2_t b"
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2"
     ],
     "return_type": {
-      "value": "int64x2_t"
+      "value": "svfloat16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2S"
+      "op1": {
+        "register": "Zop1.H"
       },
-      "b": {
-        "register": "Vm.2S"
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "SABDL"
+        "MOVPRFX",
+        "FABD"
+      ],
+      [
+        "MOVPRFX",
+        "FABD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vabdl_s8",
+    "SIMD_ISA": "SVE",
+    "name": "svabd[_f32]_m",
     "arguments": [
-      "int8x8_t a",
-      "int8x8_t b"
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2"
     ],
     "return_type": {
-      "value": "int16x8_t"
+      "value": "svfloat32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8B"
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
       },
-      "b": {
-        "register": "Vm.8B"
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "SABDL"
+        "FABD"
+      ],
+      [
+        "MOVPRFX",
+        "FABD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vabdl_u16",
+    "SIMD_ISA": "SVE",
+    "name": "svabd[_f32]_x",
     "arguments": [
-      "uint16x4_t a",
-      "uint16x4_t b"
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2"
     ],
     "return_type": {
-      "value": "uint32x4_t"
+      "value": "svfloat32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4H"
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
       },
-      "b": {
-        "register": "Vm.4H"
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "UABDL"
+        "FABD"
+      ],
+      [
+        "FABD"
+      ],
+      [
+        "MOVPRFX",
+        "FABD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vabdl_u32",
+    "SIMD_ISA": "SVE",
+    "name": "svabd[_f32]_z",
     "arguments": [
-      "uint32x2_t a",
-      "uint32x2_t b"
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2"
     ],
     "return_type": {
-      "value": "uint64x2_t"
+      "value": "svfloat32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2S"
+      "op1": {
+        "register": "Zop1.S"
       },
-      "b": {
-        "register": "Vm.2S"
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "UABDL"
+        "MOVPRFX",
+        "FABD"
+      ],
+      [
+        "MOVPRFX",
+        "FABD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vabdl_u8",
+    "SIMD_ISA": "SVE",
+    "name": "svabd[_f64]_m",
     "arguments": [
-      "uint8x8_t a",
-      "uint8x8_t b"
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2"
     ],
     "return_type": {
-      "value": "uint16x8_t"
+      "value": "svfloat64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8B"
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
       },
-      "b": {
-        "register": "Vm.8B"
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "UABDL"
+        "FABD"
+      ],
+      [
+        "MOVPRFX",
+        "FABD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vabdq_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svabd[_f64]_x",
     "arguments": [
-      "float16x8_t a",
-      "float16x8_t b"
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2"
     ],
     "return_type": {
-      "value": "float16x8_t"
+      "value": "svfloat64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8H"
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
       },
-      "b": {
-        "register": "Vm.8H"
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
         "FABD"
+      ],
+      [
+        "FABD"
+      ],
+      [
+        "MOVPRFX",
+        "FABD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vabdq_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svabd[_f64]_z",
     "arguments": [
-      "float32x4_t a",
-      "float32x4_t b"
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2"
     ],
     "return_type": {
-      "value": "float32x4_t"
+      "value": "svfloat64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4S"
+      "op1": {
+        "register": "Zop1.D"
       },
-      "b": {
-        "register": "Vm.4S"
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
+        "MOVPRFX",
+        "FABD"
+      ],
+      [
+        "MOVPRFX",
         "FABD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vabdq_f64",
+    "SIMD_ISA": "SVE",
+    "name": "svabd[_n_f16]_m",
     "arguments": [
-      "float64x2_t a",
-      "float64x2_t b"
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "float16_t op2"
     ],
     "return_type": {
-      "value": "float64x2_t"
+      "value": "svfloat16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2D"
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
       },
-      "b": {
-        "register": "Vm.2D"
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
@@ -1732,199 +1978,251 @@
     "instructions": [
       [
         "FABD"
+      ],
+      [
+        "MOVPRFX",
+        "FABD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vabdq_s16",
+    "SIMD_ISA": "SVE",
+    "name": "svabd[_n_f16]_x",
     "arguments": [
-      "int16x8_t a",
-      "int16x8_t b"
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "float16_t op2"
     ],
     "return_type": {
-      "value": "int16x8_t"
+      "value": "svfloat16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8H"
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
       },
-      "b": {
-        "register": "Vm.8H"
+      "op2": {
+        "register": "Zop2.H[*]|Ztied2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "SABD"
+        "FABD"
+      ],
+      [
+        "FABD"
+      ],
+      [
+        "MOVPRFX",
+        "FABD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vabdq_s32",
+    "SIMD_ISA": "SVE",
+    "name": "svabd[_n_f16]_z",
     "arguments": [
-      "int32x4_t a",
-      "int32x4_t b"
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "float16_t op2"
     ],
     "return_type": {
-      "value": "int32x4_t"
+      "value": "svfloat16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4S"
+      "op1": {
+        "register": "Zop1.H"
       },
-      "b": {
-        "register": "Vm.4S"
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "SABD"
+        "MOVPRFX",
+        "FABD"
+      ],
+      [
+        "MOVPRFX",
+        "FABD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vabdq_s8",
+    "SIMD_ISA": "SVE",
+    "name": "svabd[_n_f32]_m",
     "arguments": [
-      "int8x16_t a",
-      "int8x16_t b"
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "float32_t op2"
     ],
     "return_type": {
-      "value": "int8x16_t"
+      "value": "svfloat32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
       },
-      "b": {
-        "register": "Vm.16B"
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "SABD"
+        "FABD"
+      ],
+      [
+        "MOVPRFX",
+        "FABD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vabdq_u16",
+    "SIMD_ISA": "SVE",
+    "name": "svabd[_n_f32]_x",
     "arguments": [
-      "uint16x8_t a",
-      "uint16x8_t b"
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "float32_t op2"
     ],
     "return_type": {
-      "value": "uint16x8_t"
+      "value": "svfloat32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8H"
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
       },
-      "b": {
-        "register": "Vm.8H"
+      "op2": {
+        "register": "Zop2.S[*]|Ztied2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "UABD"
+        "FABD"
+      ],
+      [
+        "FABD"
+      ],
+      [
+        "MOVPRFX",
+        "FABD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vabdq_u32",
+    "SIMD_ISA": "SVE",
+    "name": "svabd[_n_f32]_z",
     "arguments": [
-      "uint32x4_t a",
-      "uint32x4_t b"
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "float32_t op2"
     ],
     "return_type": {
-      "value": "uint32x4_t"
+      "value": "svfloat32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4S"
+      "op1": {
+        "register": "Zop1.S"
       },
-      "b": {
-        "register": "Vm.4S"
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "UABD"
+        "MOVPRFX",
+        "FABD"
+      ],
+      [
+        "MOVPRFX",
+        "FABD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vabdq_u8",
+    "SIMD_ISA": "SVE",
+    "name": "svabd[_n_f64]_m",
     "arguments": [
-      "uint8x16_t a",
-      "uint8x16_t b"
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "float64_t op2"
     ],
     "return_type": {
-      "value": "uint8x16_t"
+      "value": "svfloat64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
       },
-      "b": {
-        "register": "Vm.16B"
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "UABD"
+        "FABD"
+      ],
+      [
+        "MOVPRFX",
+        "FABD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vabds_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svabd[_n_f64]_x",
     "arguments": [
-      "float32_t a",
-      "float32_t b"
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "float64_t op2"
     ],
     "return_type": {
-      "value": "float32_t"
+      "value": "svfloat64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Sn"
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
       },
-      "b": {
-        "register": "Sm"
+      "op2": {
+        "register": "Zop2.D[*]|Ztied2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
@@ -1933,70 +2231,107 @@
     "instructions": [
       [
         "FABD"
+      ],
+      [
+        "FABD"
+      ],
+      [
+        "MOVPRFX",
+        "FABD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vabs_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svabd[_n_f64]_z",
     "arguments": [
-      "float16x4_t a"
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "float64_t op2"
     ],
     "return_type": {
-      "value": "float16x4_t"
+      "value": "svfloat64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4H"
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FABS"
+        "MOVPRFX",
+        "FABD"
+      ],
+      [
+        "MOVPRFX",
+        "FABD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vabs_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svabd[_n_s16]_m",
     "arguments": [
-      "float32x2_t a"
+      "svbool_t pg",
+      "svint16_t op1",
+      "int16_t op2"
     ],
     "return_type": {
-      "value": "float32x2_t"
+      "value": "svint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2S"
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FABS"
+        "SABD"
+      ],
+      [
+        "MOVPRFX",
+        "SABD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vabs_f64",
+    "SIMD_ISA": "SVE",
+    "name": "svabd[_n_s16]_x",
     "arguments": [
-      "float64x1_t a"
+      "svbool_t pg",
+      "svint16_t op1",
+      "int16_t op2"
     ],
     "return_type": {
-      "value": "float64x1_t"
+      "value": "svint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]|Ztied2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
@@ -2004,72 +2339,108 @@
     ],
     "instructions": [
       [
-        "FABS"
+        "SABD"
+      ],
+      [
+        "SABD"
+      ],
+      [
+        "MOVPRFX",
+        "SABD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vabs_s16",
+    "SIMD_ISA": "SVE",
+    "name": "svabd[_n_s16]_z",
     "arguments": [
-      "int16x4_t a"
+      "svbool_t pg",
+      "svint16_t op1",
+      "int16_t op2"
     ],
     "return_type": {
-      "value": "int16x4_t"
+      "value": "svint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4H"
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "ABS"
+        "MOVPRFX",
+        "SABD"
+      ],
+      [
+        "MOVPRFX",
+        "SABD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vabs_s32",
+    "SIMD_ISA": "SVE",
+    "name": "svabd[_n_s32]_m",
     "arguments": [
-      "int32x2_t a"
+      "svbool_t pg",
+      "svint32_t op1",
+      "int32_t op2"
     ],
     "return_type": {
-      "value": "int32x2_t"
+      "value": "svint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2S"
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "ABS"
+        "SABD"
+      ],
+      [
+        "MOVPRFX",
+        "SABD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vabs_s64",
+    "SIMD_ISA": "SVE",
+    "name": "svabd[_n_s32]_x",
     "arguments": [
-      "int64x1_t a"
+      "svbool_t pg",
+      "svint32_t op1",
+      "int32_t op2"
     ],
     "return_type": {
-      "value": "int64x1_t"
+      "value": "svint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]|Ztied2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
@@ -2077,47 +2448,73 @@
     ],
     "instructions": [
       [
-        "ABS"
+        "SABD"
+      ],
+      [
+        "SABD"
+      ],
+      [
+        "MOVPRFX",
+        "SABD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vabs_s8",
+    "SIMD_ISA": "SVE",
+    "name": "svabd[_n_s32]_z",
     "arguments": [
-      "int8x8_t a"
+      "svbool_t pg",
+      "svint32_t op1",
+      "int32_t op2"
     ],
     "return_type": {
-      "value": "int8x8_t"
+      "value": "svint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8B"
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "ABS"
+        "MOVPRFX",
+        "SABD"
+      ],
+      [
+        "MOVPRFX",
+        "SABD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vabsd_s64",
+    "SIMD_ISA": "SVE",
+    "name": "svabd[_n_s64]_m",
     "arguments": [
-      "int64_t a"
+      "svbool_t pg",
+      "svint64_t op1",
+      "int64_t op2"
     ],
     "return_type": {
-      "value": "int64_t"
+      "value": "svint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
@@ -2125,95 +2522,143 @@
     ],
     "instructions": [
       [
-        "ABS"
+        "SABD"
+      ],
+      [
+        "MOVPRFX",
+        "SABD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vabsh_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svabd[_n_s64]_x",
     "arguments": [
-      "float16_t a"
+      "svbool_t pg",
+      "svint64_t op1",
+      "int64_t op2"
     ],
     "return_type": {
-      "value": "float16_t"
+      "value": "svint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Hn"
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]|Ztied2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FABS"
+        "SABD"
+      ],
+      [
+        "SABD"
+      ],
+      [
+        "MOVPRFX",
+        "SABD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vabsq_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svabd[_n_s64]_z",
     "arguments": [
-      "float16x8_t a"
+      "svbool_t pg",
+      "svint64_t op1",
+      "int64_t op2"
     ],
     "return_type": {
-      "value": "float16x8_t"
+      "value": "svint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8H"
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FABS"
+        "MOVPRFX",
+        "SABD"
+      ],
+      [
+        "MOVPRFX",
+        "SABD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vabsq_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svabd[_n_s8]_m",
     "arguments": [
-      "float32x4_t a"
+      "svbool_t pg",
+      "svint8_t op1",
+      "int8_t op2"
     ],
     "return_type": {
-      "value": "float32x4_t"
+      "value": "svint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4S"
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FABS"
+        "SABD"
+      ],
+      [
+        "MOVPRFX",
+        "SABD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vabsq_f64",
+    "SIMD_ISA": "SVE",
+    "name": "svabd[_n_s8]_x",
     "arguments": [
-      "float64x2_t a"
+      "svbool_t pg",
+      "svint8_t op1",
+      "int8_t op2"
     ],
     "return_type": {
-      "value": "float64x2_t"
+      "value": "svint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2D"
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]|Ztied2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
@@ -2221,72 +2666,108 @@
     ],
     "instructions": [
       [
-        "FABS"
+        "SABD"
+      ],
+      [
+        "SABD"
+      ],
+      [
+        "MOVPRFX",
+        "SABD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vabsq_s16",
+    "SIMD_ISA": "SVE",
+    "name": "svabd[_n_s8]_z",
     "arguments": [
-      "int16x8_t a"
+      "svbool_t pg",
+      "svint8_t op1",
+      "int8_t op2"
     ],
     "return_type": {
-      "value": "int16x8_t"
+      "value": "svint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8H"
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "ABS"
+        "MOVPRFX",
+        "SABD"
+      ],
+      [
+        "MOVPRFX",
+        "SABD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vabsq_s32",
+    "SIMD_ISA": "SVE",
+    "name": "svabd[_n_u16]_m",
     "arguments": [
-      "int32x4_t a"
+      "svbool_t pg",
+      "svuint16_t op1",
+      "uint16_t op2"
     ],
     "return_type": {
-      "value": "int32x4_t"
+      "value": "svuint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4S"
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "ABS"
+        "UABD"
+      ],
+      [
+        "MOVPRFX",
+        "UABD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vabsq_s64",
+    "SIMD_ISA": "SVE",
+    "name": "svabd[_n_u16]_x",
     "arguments": [
-      "int64x2_t a"
+      "svbool_t pg",
+      "svuint16_t op1",
+      "uint16_t op2"
     ],
     "return_type": {
-      "value": "int64x2_t"
+      "value": "svuint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2D"
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]|Ztied2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
@@ -2294,108 +2775,146 @@
     ],
     "instructions": [
       [
-        "ABS"
+        "UABD"
+      ],
+      [
+        "UABD"
+      ],
+      [
+        "MOVPRFX",
+        "UABD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vabsq_s8",
+    "SIMD_ISA": "SVE",
+    "name": "svabd[_n_u16]_z",
     "arguments": [
-      "int8x16_t a"
+      "svbool_t pg",
+      "svuint16_t op1",
+      "uint16_t op2"
     ],
     "return_type": {
-      "value": "int8x16_t"
+      "value": "svuint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "ABS"
+        "MOVPRFX",
+        "UABD"
+      ],
+      [
+        "MOVPRFX",
+        "UABD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vadd_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svabd[_n_u32]_m",
     "arguments": [
-      "float16x4_t a",
-      "float16x4_t b"
+      "svbool_t pg",
+      "svuint32_t op1",
+      "uint32_t op2"
     ],
     "return_type": {
-      "value": "float16x4_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4H"
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
       },
-      "b": {
-        "register": "Vm.4H"
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FADD"
+        "UABD"
+      ],
+      [
+        "MOVPRFX",
+        "UABD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vadd_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svabd[_n_u32]_x",
     "arguments": [
-      "float32x2_t a",
-      "float32x2_t b"
+      "svbool_t pg",
+      "svuint32_t op1",
+      "uint32_t op2"
     ],
     "return_type": {
-      "value": "float32x2_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2S"
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
       },
-      "b": {
-        "register": "Vm.2S"
+      "op2": {
+        "register": "Zop2.S[*]|Ztied2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FADD"
+        "UABD"
+      ],
+      [
+        "UABD"
+      ],
+      [
+        "MOVPRFX",
+        "UABD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vadd_f64",
+    "SIMD_ISA": "SVE",
+    "name": "svabd[_n_u32]_z",
     "arguments": [
-      "float64x1_t a",
-      "float64x1_t b"
+      "svbool_t pg",
+      "svuint32_t op1",
+      "uint32_t op2"
     ],
     "return_type": {
-      "value": "float64x1_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "op1": {
+        "register": "Zop1.S"
       },
-      "b": {
-        "register": "Dm"
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
@@ -2403,345 +2922,435 @@
     ],
     "instructions": [
       [
-        "FADD"
+        "MOVPRFX",
+        "UABD"
+      ],
+      [
+        "MOVPRFX",
+        "UABD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vadd_p16",
+    "SIMD_ISA": "SVE",
+    "name": "svabd[_n_u64]_m",
     "arguments": [
-      "poly16x4_t a",
-      "poly16x4_t b"
+      "svbool_t pg",
+      "svuint64_t op1",
+      "uint64_t op2"
     ],
     "return_type": {
-      "value": "poly16x4_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8B"
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
       },
-      "b": {
-        "register": "Vm.8B"
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "EOR"
+        "UABD"
+      ],
+      [
+        "MOVPRFX",
+        "UABD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vadd_p64",
+    "SIMD_ISA": "SVE",
+    "name": "svabd[_n_u64]_x",
     "arguments": [
-      "poly64x1_t a",
-      "poly64x1_t b"
+      "svbool_t pg",
+      "svuint64_t op1",
+      "uint64_t op2"
     ],
     "return_type": {
-      "value": "poly64x1_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8B"
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
       },
-      "b": {
-        "register": "Vm.8B"
+      "op2": {
+        "register": "Zop2.D[*]|Ztied2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "EOR"
+        "UABD"
+      ],
+      [
+        "UABD"
+      ],
+      [
+        "MOVPRFX",
+        "UABD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vadd_p8",
+    "SIMD_ISA": "SVE",
+    "name": "svabd[_n_u64]_z",
     "arguments": [
-      "poly8x8_t a",
-      "poly8x8_t b"
+      "svbool_t pg",
+      "svuint64_t op1",
+      "uint64_t op2"
     ],
     "return_type": {
-      "value": "poly8x8_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8B"
+      "op1": {
+        "register": "Zop1.D"
       },
-      "b": {
-        "register": "Vm.8B"
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "EOR"
+        "MOVPRFX",
+        "UABD"
+      ],
+      [
+        "MOVPRFX",
+        "UABD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vadd_s16",
+    "SIMD_ISA": "SVE",
+    "name": "svabd[_n_u8]_m",
     "arguments": [
-      "int16x4_t a",
-      "int16x4_t b"
+      "svbool_t pg",
+      "svuint8_t op1",
+      "uint8_t op2"
     ],
     "return_type": {
-      "value": "int16x4_t"
+      "value": "svuint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4H"
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
       },
-      "b": {
-        "register": "Vm.4H"
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "ADD"
+        "UABD"
+      ],
+      [
+        "MOVPRFX",
+        "UABD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vadd_s32",
+    "SIMD_ISA": "SVE",
+    "name": "svabd[_n_u8]_x",
     "arguments": [
-      "int32x2_t a",
-      "int32x2_t b"
+      "svbool_t pg",
+      "svuint8_t op1",
+      "uint8_t op2"
     ],
     "return_type": {
-      "value": "int32x2_t"
+      "value": "svuint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2S"
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
       },
-      "b": {
-        "register": "Vm.2S"
+      "op2": {
+        "register": "Zop2.B[*]|Ztied2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "ADD"
+        "UABD"
+      ],
+      [
+        "UABD"
+      ],
+      [
+        "MOVPRFX",
+        "UABD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vadd_s64",
+    "SIMD_ISA": "SVE",
+    "name": "svabd[_n_u8]_z",
     "arguments": [
-      "int64x1_t a",
-      "int64x1_t b"
+      "svbool_t pg",
+      "svuint8_t op1",
+      "uint8_t op2"
     ],
     "return_type": {
-      "value": "int64x1_t"
+      "value": "svuint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "op1": {
+        "register": "Zop1.B"
       },
-      "b": {
-        "register": "Dm"
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "ADD"
+        "MOVPRFX",
+        "UABD"
+      ],
+      [
+        "MOVPRFX",
+        "UABD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vadd_s8",
+    "SIMD_ISA": "SVE",
+    "name": "svabd[_s16]_m",
     "arguments": [
-      "int8x8_t a",
-      "int8x8_t b"
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2"
     ],
     "return_type": {
-      "value": "int8x8_t"
+      "value": "svint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8B"
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
       },
-      "b": {
-        "register": "Vm.8B"
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "ADD"
+        "SABD"
+      ],
+      [
+        "MOVPRFX",
+        "SABD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vadd_u16",
+    "SIMD_ISA": "SVE",
+    "name": "svabd[_s16]_x",
     "arguments": [
-      "uint16x4_t a",
-      "uint16x4_t b"
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2"
     ],
     "return_type": {
-      "value": "uint16x4_t"
+      "value": "svint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4H"
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
       },
-      "b": {
-        "register": "Vm.4H"
+      "op2": {
+        "register": "Zop2.H|Ztied2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "ADD"
+        "SABD"
+      ],
+      [
+        "SABD"
+      ],
+      [
+        "MOVPRFX",
+        "SABD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vadd_u32",
+    "SIMD_ISA": "SVE",
+    "name": "svabd[_s16]_z",
     "arguments": [
-      "uint32x2_t a",
-      "uint32x2_t b"
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2"
     ],
     "return_type": {
-      "value": "uint32x2_t"
+      "value": "svint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2S"
+      "op1": {
+        "register": "Zop1.H"
       },
-      "b": {
-        "register": "Vm.2S"
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "ADD"
+        "MOVPRFX",
+        "SABD"
+      ],
+      [
+        "MOVPRFX",
+        "SABD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vadd_u64",
+    "SIMD_ISA": "SVE",
+    "name": "svabd[_s32]_m",
     "arguments": [
-      "uint64x1_t a",
-      "uint64x1_t b"
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2"
     ],
     "return_type": {
-      "value": "uint64x1_t"
+      "value": "svint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
       },
-      "b": {
-        "register": "Dm"
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "ADD"
+        "SABD"
+      ],
+      [
+        "MOVPRFX",
+        "SABD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vadd_u8",
+    "SIMD_ISA": "SVE",
+    "name": "svabd[_s32]_x",
     "arguments": [
-      "uint8x8_t a",
-      "uint8x8_t b"
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2"
     ],
     "return_type": {
-      "value": "uint8x8_t"
+      "value": "svint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8B"
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
       },
-      "b": {
-        "register": "Vm.8B"
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "ADD"
+        "SABD"
+      ],
+      [
+        "SABD"
+      ],
+      [
+        "MOVPRFX",
+        "SABD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vaddd_s64",
+    "SIMD_ISA": "SVE",
+    "name": "svabd[_s32]_z",
     "arguments": [
-      "int64_t a",
-      "int64_t b"
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2"
     ],
     "return_type": {
-      "value": "int64_t"
+      "value": "svint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "op1": {
+        "register": "Zop1.S"
       },
-      "b": {
-        "register": "Dm"
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
@@ -2749,26 +3358,35 @@
     ],
     "instructions": [
       [
-        "ADD"
+        "MOVPRFX",
+        "SABD"
+      ],
+      [
+        "MOVPRFX",
+        "SABD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vaddd_u64",
+    "SIMD_ISA": "SVE",
+    "name": "svabd[_s64]_m",
     "arguments": [
-      "uint64_t a",
-      "uint64_t b"
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2"
     ],
     "return_type": {
-      "value": "uint64_t"
+      "value": "svint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
       },
-      "b": {
-        "register": "Dm"
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
@@ -2776,58 +3394,72 @@
     ],
     "instructions": [
       [
-        "ADD"
+        "SABD"
+      ],
+      [
+        "MOVPRFX",
+        "SABD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vaddh_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svabd[_s64]_x",
     "arguments": [
-      "float16_t a",
-      "float16_t b"
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2"
     ],
     "return_type": {
-      "value": "float16_t"
+      "value": "svint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Hn"
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
       },
-      "b": {
-        "register": "Hm"
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FADD"
+        "SABD"
+      ],
+      [
+        "SABD"
+      ],
+      [
+        "MOVPRFX",
+        "SABD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vaddhn_high_s16",
+    "SIMD_ISA": "SVE",
+    "name": "svabd[_s64]_z",
     "arguments": [
-      "int8x8_t r",
-      "int16x8_t a",
-      "int16x8_t b"
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2"
     ],
     "return_type": {
-      "value": "int8x16_t"
+      "value": "svint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8H"
+      "op1": {
+        "register": "Zop1.D"
       },
-      "b": {
-        "register": "Vm.8H"
+      "op2": {
+        "register": "Zop2.D"
       },
-      "r": {
-        "register": "Vd.8B"
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
@@ -2835,30 +3467,35 @@
     ],
     "instructions": [
       [
-        "ADDHN2"
+        "MOVPRFX",
+        "SABD"
+      ],
+      [
+        "MOVPRFX",
+        "SABD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vaddhn_high_s32",
+    "SIMD_ISA": "SVE",
+    "name": "svabd[_s8]_m",
     "arguments": [
-      "int16x4_t r",
-      "int32x4_t a",
-      "int32x4_t b"
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2"
     ],
     "return_type": {
-      "value": "int16x8_t"
+      "value": "svint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4S"
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
       },
-      "b": {
-        "register": "Vm.4S"
+      "op2": {
+        "register": "Zop2.B"
       },
-      "r": {
-        "register": "Vd.4H"
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
@@ -2866,30 +3503,34 @@
     ],
     "instructions": [
       [
-        "ADDHN2"
+        "SABD"
+      ],
+      [
+        "MOVPRFX",
+        "SABD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vaddhn_high_s64",
+    "SIMD_ISA": "SVE",
+    "name": "svabd[_s8]_x",
     "arguments": [
-      "int32x2_t r",
-      "int64x2_t a",
-      "int64x2_t b"
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2"
     ],
     "return_type": {
-      "value": "int32x4_t"
+      "value": "svint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2D"
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
       },
-      "b": {
-        "register": "Vm.2D"
+      "op2": {
+        "register": "Zop2.B|Ztied2.B"
       },
-      "r": {
-        "register": "Vd.2S"
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
@@ -2897,30 +3538,37 @@
     ],
     "instructions": [
       [
-        "ADDHN2"
+        "SABD"
+      ],
+      [
+        "SABD"
+      ],
+      [
+        "MOVPRFX",
+        "SABD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vaddhn_high_u16",
+    "SIMD_ISA": "SVE",
+    "name": "svabd[_s8]_z",
     "arguments": [
-      "uint8x8_t r",
-      "uint16x8_t a",
-      "uint16x8_t b"
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2"
     ],
     "return_type": {
-      "value": "uint8x16_t"
+      "value": "svint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8H"
+      "op1": {
+        "register": "Zop1.B"
       },
-      "b": {
-        "register": "Vm.8H"
+      "op2": {
+        "register": "Zop2.B"
       },
-      "r": {
-        "register": "Vd.8B"
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
@@ -2928,30 +3576,35 @@
     ],
     "instructions": [
       [
-        "ADDHN2"
+        "MOVPRFX",
+        "SABD"
+      ],
+      [
+        "MOVPRFX",
+        "SABD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vaddhn_high_u32",
+    "SIMD_ISA": "SVE",
+    "name": "svabd[_u16]_m",
     "arguments": [
-      "uint16x4_t r",
-      "uint32x4_t a",
-      "uint32x4_t b"
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2"
     ],
     "return_type": {
-      "value": "uint16x8_t"
+      "value": "svuint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4S"
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
       },
-      "b": {
-        "register": "Vm.4S"
+      "op2": {
+        "register": "Zop2.H"
       },
-      "r": {
-        "register": "Vd.4H"
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
@@ -2959,30 +3612,34 @@
     ],
     "instructions": [
       [
-        "ADDHN2"
+        "UABD"
+      ],
+      [
+        "MOVPRFX",
+        "UABD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vaddhn_high_u64",
+    "SIMD_ISA": "SVE",
+    "name": "svabd[_u16]_x",
     "arguments": [
-      "uint32x2_t r",
-      "uint64x2_t a",
-      "uint64x2_t b"
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2"
     ],
     "return_type": {
-      "value": "uint32x4_t"
+      "value": "svuint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2D"
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
       },
-      "b": {
-        "register": "Vm.2D"
+      "op2": {
+        "register": "Zop2.H|Ztied2.H"
       },
-      "r": {
-        "register": "Vd.2S"
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
@@ -2990,200 +3647,255 @@
     ],
     "instructions": [
       [
-        "ADDHN2"
+        "UABD"
+      ],
+      [
+        "UABD"
+      ],
+      [
+        "MOVPRFX",
+        "UABD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vaddhn_s16",
+    "SIMD_ISA": "SVE",
+    "name": "svabd[_u16]_z",
     "arguments": [
-      "int16x8_t a",
-      "int16x8_t b"
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2"
     ],
     "return_type": {
-      "value": "int8x8_t"
+      "value": "svuint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8H"
+      "op1": {
+        "register": "Zop1.H"
       },
-      "b": {
-        "register": "Vm.8H"
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "ADDHN"
+        "MOVPRFX",
+        "UABD"
+      ],
+      [
+        "MOVPRFX",
+        "UABD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vaddhn_s32",
+    "SIMD_ISA": "SVE",
+    "name": "svabd[_u32]_m",
     "arguments": [
-      "int32x4_t a",
-      "int32x4_t b"
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2"
     ],
     "return_type": {
-      "value": "int16x4_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4S"
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
       },
-      "b": {
-        "register": "Vm.4S"
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "ADDHN"
+        "UABD"
+      ],
+      [
+        "MOVPRFX",
+        "UABD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vaddhn_s64",
+    "SIMD_ISA": "SVE",
+    "name": "svabd[_u32]_x",
     "arguments": [
-      "int64x2_t a",
-      "int64x2_t b"
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2"
     ],
     "return_type": {
-      "value": "int32x2_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2D"
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
       },
-      "b": {
-        "register": "Vm.2D"
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "ADDHN"
+        "UABD"
+      ],
+      [
+        "UABD"
+      ],
+      [
+        "MOVPRFX",
+        "UABD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vaddhn_u16",
+    "SIMD_ISA": "SVE",
+    "name": "svabd[_u32]_z",
     "arguments": [
-      "uint16x8_t a",
-      "uint16x8_t b"
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2"
     ],
     "return_type": {
-      "value": "uint8x8_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8H"
+      "op1": {
+        "register": "Zop1.S"
       },
-      "b": {
-        "register": "Vm.8H"
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "ADDHN"
+        "MOVPRFX",
+        "UABD"
+      ],
+      [
+        "MOVPRFX",
+        "UABD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vaddhn_u32",
+    "SIMD_ISA": "SVE",
+    "name": "svabd[_u64]_m",
     "arguments": [
-      "uint32x4_t a",
-      "uint32x4_t b"
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2"
     ],
     "return_type": {
-      "value": "uint16x4_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4S"
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
       },
-      "b": {
-        "register": "Vm.4S"
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "ADDHN"
+        "UABD"
+      ],
+      [
+        "MOVPRFX",
+        "UABD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vaddhn_u64",
+    "SIMD_ISA": "SVE",
+    "name": "svabd[_u64]_x",
     "arguments": [
-      "uint64x2_t a",
-      "uint64x2_t b"
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2"
     ],
     "return_type": {
-      "value": "uint32x2_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2D"
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
       },
-      "b": {
-        "register": "Vm.2D"
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "ADDHN"
+        "UABD"
+      ],
+      [
+        "UABD"
+      ],
+      [
+        "MOVPRFX",
+        "UABD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vaddl_high_s16",
+    "SIMD_ISA": "SVE",
+    "name": "svabd[_u64]_z",
     "arguments": [
-      "int16x8_t a",
-      "int16x8_t b"
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2"
     ],
     "return_type": {
-      "value": "int32x4_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8H"
+      "op1": {
+        "register": "Zop1.D"
       },
-      "b": {
-        "register": "Vm.8H"
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
@@ -3191,26 +3903,35 @@
     ],
     "instructions": [
       [
-        "SADDL2"
+        "MOVPRFX",
+        "UABD"
+      ],
+      [
+        "MOVPRFX",
+        "UABD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vaddl_high_s32",
+    "SIMD_ISA": "SVE",
+    "name": "svabd[_u8]_m",
     "arguments": [
-      "int32x4_t a",
-      "int32x4_t b"
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2"
     ],
     "return_type": {
-      "value": "int64x2_t"
+      "value": "svuint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4S"
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
       },
-      "b": {
-        "register": "Vm.4S"
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
@@ -3218,26 +3939,34 @@
     ],
     "instructions": [
       [
-        "SADDL2"
+        "UABD"
+      ],
+      [
+        "MOVPRFX",
+        "UABD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vaddl_high_s8",
+    "SIMD_ISA": "SVE",
+    "name": "svabd[_u8]_x",
     "arguments": [
-      "int8x16_t a",
-      "int8x16_t b"
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2"
     ],
     "return_type": {
-      "value": "int16x8_t"
+      "value": "svuint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
       },
-      "b": {
-        "register": "Vm.16B"
+      "op2": {
+        "register": "Zop2.B|Ztied2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
@@ -3245,26 +3974,37 @@
     ],
     "instructions": [
       [
-        "SADDL2"
+        "UABD"
+      ],
+      [
+        "UABD"
+      ],
+      [
+        "MOVPRFX",
+        "UABD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vaddl_high_u16",
+    "SIMD_ISA": "SVE",
+    "name": "svabd[_u8]_z",
     "arguments": [
-      "uint16x8_t a",
-      "uint16x8_t b"
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2"
     ],
     "return_type": {
-      "value": "uint32x4_t"
+      "value": "svuint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8H"
+      "op1": {
+        "register": "Zop1.B"
       },
-      "b": {
-        "register": "Vm.8H"
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
@@ -3272,26 +4012,31 @@
     ],
     "instructions": [
       [
-        "UADDL2"
+        "MOVPRFX",
+        "UABD"
+      ],
+      [
+        "MOVPRFX",
+        "UABD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vaddl_high_u32",
+    "SIMD_ISA": "SVE2",
+    "name": "svabdlb[_n_s16]",
     "arguments": [
-      "uint32x4_t a",
-      "uint32x4_t b"
+      "svint8_t op1",
+      "int8_t op2"
     ],
     "return_type": {
-      "value": "uint64x2_t"
+      "value": "svint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4S"
+      "op1": {
+        "register": "Zop1.B"
       },
-      "b": {
-        "register": "Vm.4S"
+      "op2": {
+        "register": "Zop2.B[*]"
       }
     },
     "Architectures": [
@@ -3299,26 +4044,26 @@
     ],
     "instructions": [
       [
-        "UADDL2"
+        "SABDLB"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vaddl_high_u8",
+    "SIMD_ISA": "SVE2",
+    "name": "svabdlb[_n_s32]",
     "arguments": [
-      "uint8x16_t a",
-      "uint8x16_t b"
+      "svint16_t op1",
+      "int16_t op2"
     ],
     "return_type": {
-      "value": "uint16x8_t"
+      "value": "svint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
+      "op1": {
+        "register": "Zop1.H"
       },
-      "b": {
-        "register": "Vm.16B"
+      "op2": {
+        "register": "Zop2.H[*]"
       }
     },
     "Architectures": [
@@ -3326,196 +4071,188 @@
     ],
     "instructions": [
       [
-        "UADDL2"
+        "SABDLB"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vaddl_s16",
+    "SIMD_ISA": "SVE2",
+    "name": "svabdlb[_n_s64]",
     "arguments": [
-      "int16x4_t a",
-      "int16x4_t b"
+      "svint32_t op1",
+      "int32_t op2"
     ],
     "return_type": {
-      "value": "int32x4_t"
+      "value": "svint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4H"
+      "op1": {
+        "register": "Zop1.S"
       },
-      "b": {
-        "register": "Vm.4H"
+      "op2": {
+        "register": "Zop2.S[*]"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "SADDL"
+        "SABDLB"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vaddl_s32",
+    "SIMD_ISA": "SVE2",
+    "name": "svabdlb[_n_u16]",
     "arguments": [
-      "int32x2_t a",
-      "int32x2_t b"
+      "svuint8_t op1",
+      "uint8_t op2"
     ],
     "return_type": {
-      "value": "int64x2_t"
+      "value": "svuint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2S"
+      "op1": {
+        "register": "Zop1.B"
       },
-      "b": {
-        "register": "Vm.2S"
+      "op2": {
+        "register": "Zop2.B[*]"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "SADDL"
+        "UABDLB"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vaddl_s8",
+    "SIMD_ISA": "SVE2",
+    "name": "svabdlb[_n_u32]",
     "arguments": [
-      "int8x8_t a",
-      "int8x8_t b"
+      "svuint16_t op1",
+      "uint16_t op2"
     ],
     "return_type": {
-      "value": "int16x8_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8B"
+      "op1": {
+        "register": "Zop1.H"
       },
-      "b": {
-        "register": "Vm.8B"
+      "op2": {
+        "register": "Zop2.H[*]"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "SADDL"
+        "UABDLB"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vaddl_u16",
+    "SIMD_ISA": "SVE2",
+    "name": "svabdlb[_n_u64]",
     "arguments": [
-      "uint16x4_t a",
-      "uint16x4_t b"
+      "svuint32_t op1",
+      "uint32_t op2"
     ],
     "return_type": {
-      "value": "uint32x4_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4H"
+      "op1": {
+        "register": "Zop1.S"
       },
-      "b": {
-        "register": "Vm.4H"
+      "op2": {
+        "register": "Zop2.S[*]"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "UADDL"
+        "UABDLB"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vaddl_u32",
+    "SIMD_ISA": "SVE2",
+    "name": "svabdlb[_s16]",
     "arguments": [
-      "uint32x2_t a",
-      "uint32x2_t b"
+      "svint8_t op1",
+      "svint8_t op2"
     ],
     "return_type": {
-      "value": "uint64x2_t"
+      "value": "svint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2S"
+      "op1": {
+        "register": "Zop1.B"
       },
-      "b": {
-        "register": "Vm.2S"
+      "op2": {
+        "register": "Zop2.B"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "UADDL"
+        "SABDLB"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vaddl_u8",
+    "SIMD_ISA": "SVE2",
+    "name": "svabdlb[_s32]",
     "arguments": [
-      "uint8x8_t a",
-      "uint8x8_t b"
+      "svint16_t op1",
+      "svint16_t op2"
     ],
     "return_type": {
-      "value": "uint16x8_t"
+      "value": "svint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8B"
+      "op1": {
+        "register": "Zop1.H"
       },
-      "b": {
-        "register": "Vm.8B"
+      "op2": {
+        "register": "Zop2.H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "UADDL"
+        "SABDLB"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vaddlv_s16",
+    "SIMD_ISA": "SVE2",
+    "name": "svabdlb[_s64]",
     "arguments": [
-      "int16x4_t a"
+      "svint32_t op1",
+      "svint32_t op2"
     ],
     "return_type": {
-      "value": "int32_t"
+      "value": "svint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4H"
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
       }
     },
     "Architectures": [
@@ -3523,22 +4260,26 @@
     ],
     "instructions": [
       [
-        "SADDLV"
+        "SABDLB"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vaddlv_s32",
+    "SIMD_ISA": "SVE2",
+    "name": "svabdlb[_u16]",
     "arguments": [
-      "int32x2_t a"
+      "svuint8_t op1",
+      "svuint8_t op2"
     ],
     "return_type": {
-      "value": "int64_t"
+      "value": "svuint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2S"
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
       }
     },
     "Architectures": [
@@ -3546,22 +4287,26 @@
     ],
     "instructions": [
       [
-        "SADDLP"
+        "UABDLB"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vaddlv_s8",
+    "SIMD_ISA": "SVE2",
+    "name": "svabdlb[_u32]",
     "arguments": [
-      "int8x8_t a"
+      "svuint16_t op1",
+      "svuint16_t op2"
     ],
     "return_type": {
-      "value": "int16_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8B"
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
       }
     },
     "Architectures": [
@@ -3569,22 +4314,26 @@
     ],
     "instructions": [
       [
-        "SADDLV"
+        "UABDLB"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vaddlv_u16",
+    "SIMD_ISA": "SVE2",
+    "name": "svabdlb[_u64]",
     "arguments": [
-      "uint16x4_t a"
+      "svuint32_t op1",
+      "svuint32_t op2"
     ],
     "return_type": {
-      "value": "uint32_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4H"
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
       }
     },
     "Architectures": [
@@ -3592,22 +4341,26 @@
     ],
     "instructions": [
       [
-        "UADDLV"
+        "UABDLB"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vaddlv_u32",
+    "SIMD_ISA": "SVE2",
+    "name": "svabdlt[_n_s16]",
     "arguments": [
-      "uint32x2_t a"
+      "svint8_t op1",
+      "int8_t op2"
     ],
     "return_type": {
-      "value": "uint64_t"
+      "value": "svint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2S"
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
       }
     },
     "Architectures": [
@@ -3615,22 +4368,26 @@
     ],
     "instructions": [
       [
-        "UADDLP"
+        "SABDLT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vaddlv_u8",
+    "SIMD_ISA": "SVE2",
+    "name": "svabdlt[_n_s32]",
     "arguments": [
-      "uint8x8_t a"
+      "svint16_t op1",
+      "int16_t op2"
     ],
     "return_type": {
-      "value": "uint16_t"
+      "value": "svint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8B"
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
       }
     },
     "Architectures": [
@@ -3638,22 +4395,26 @@
     ],
     "instructions": [
       [
-        "UADDLV"
+        "SABDLT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vaddlvq_s16",
+    "SIMD_ISA": "SVE2",
+    "name": "svabdlt[_n_s64]",
     "arguments": [
-      "int16x8_t a"
+      "svint32_t op1",
+      "int32_t op2"
     ],
     "return_type": {
-      "value": "int32_t"
+      "value": "svint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8H"
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
       }
     },
     "Architectures": [
@@ -3661,22 +4422,26 @@
     ],
     "instructions": [
       [
-        "SADDLV"
+        "SABDLT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vaddlvq_s32",
+    "SIMD_ISA": "SVE2",
+    "name": "svabdlt[_n_u16]",
     "arguments": [
-      "int32x4_t a"
+      "svuint8_t op1",
+      "uint8_t op2"
     ],
     "return_type": {
-      "value": "int64_t"
+      "value": "svuint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4S"
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
       }
     },
     "Architectures": [
@@ -3684,22 +4449,26 @@
     ],
     "instructions": [
       [
-        "SADDLV"
+        "UABDLT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vaddlvq_s8",
+    "SIMD_ISA": "SVE2",
+    "name": "svabdlt[_n_u32]",
     "arguments": [
-      "int8x16_t a"
+      "svuint16_t op1",
+      "uint16_t op2"
     ],
     "return_type": {
-      "value": "int16_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
       }
     },
     "Architectures": [
@@ -3707,22 +4476,26 @@
     ],
     "instructions": [
       [
-        "SADDLV"
+        "UABDLT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vaddlvq_u16",
+    "SIMD_ISA": "SVE2",
+    "name": "svabdlt[_n_u64]",
     "arguments": [
-      "uint16x8_t a"
+      "svuint32_t op1",
+      "uint32_t op2"
     ],
     "return_type": {
-      "value": "uint32_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8H"
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
       }
     },
     "Architectures": [
@@ -3730,22 +4503,26 @@
     ],
     "instructions": [
       [
-        "UADDLV"
+        "UABDLT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vaddlvq_u32",
+    "SIMD_ISA": "SVE2",
+    "name": "svabdlt[_s16]",
     "arguments": [
-      "uint32x4_t a"
+      "svint8_t op1",
+      "svint8_t op2"
     ],
     "return_type": {
-      "value": "uint64_t"
+      "value": "svint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4S"
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
       }
     },
     "Architectures": [
@@ -3753,22 +4530,26 @@
     ],
     "instructions": [
       [
-        "UADDLV"
+        "SABDLT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vaddlvq_u8",
+    "SIMD_ISA": "SVE2",
+    "name": "svabdlt[_s32]",
     "arguments": [
-      "uint8x16_t a"
+      "svint16_t op1",
+      "svint16_t op2"
     ],
     "return_type": {
-      "value": "uint16_t"
+      "value": "svint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
       }
     },
     "Architectures": [
@@ -3776,83 +4557,80 @@
     ],
     "instructions": [
       [
-        "UADDLV"
+        "SABDLT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vaddq_f16",
+    "SIMD_ISA": "SVE2",
+    "name": "svabdlt[_s64]",
     "arguments": [
-      "float16x8_t a",
-      "float16x8_t b"
+      "svint32_t op1",
+      "svint32_t op2"
     ],
     "return_type": {
-      "value": "float16x8_t"
+      "value": "svint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8H"
+      "op1": {
+        "register": "Zop1.S"
       },
-      "b": {
-        "register": "Vm.8H"
+      "op2": {
+        "register": "Zop2.S"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FADD"
+        "SABDLT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vaddq_f32",
+    "SIMD_ISA": "SVE2",
+    "name": "svabdlt[_u16]",
     "arguments": [
-      "float32x4_t a",
-      "float32x4_t b"
+      "svuint8_t op1",
+      "svuint8_t op2"
     ],
     "return_type": {
-      "value": "float32x4_t"
+      "value": "svuint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4S"
+      "op1": {
+        "register": "Zop1.B"
       },
-      "b": {
-        "register": "Vm.4S"
+      "op2": {
+        "register": "Zop2.B"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FADD"
+        "UABDLT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vaddq_f64",
+    "SIMD_ISA": "SVE2",
+    "name": "svabdlt[_u32]",
     "arguments": [
-      "float64x2_t a",
-      "float64x2_t b"
+      "svuint16_t op1",
+      "svuint16_t op2"
     ],
     "return_type": {
-      "value": "float64x2_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2D"
+      "op1": {
+        "register": "Zop1.H"
       },
-      "b": {
-        "register": "Vm.2D"
+      "op2": {
+        "register": "Zop2.H"
       }
     },
     "Architectures": [
@@ -3860,370 +4638,401 @@
     ],
     "instructions": [
       [
-        "FADD"
+        "UABDLT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vaddq_p128",
+    "SIMD_ISA": "SVE2",
+    "name": "svabdlt[_u64]",
     "arguments": [
-      "poly128_t a",
-      "poly128_t b"
+      "svuint32_t op1",
+      "svuint32_t op2"
     ],
     "return_type": {
-      "value": "poly128_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
+      "op1": {
+        "register": "Zop1.S"
       },
-      "b": {
-        "register": "Vm.16B"
+      "op2": {
+        "register": "Zop2.S"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "EOR"
+        "UABDLT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vaddq_p16",
+    "SIMD_ISA": "SVE",
+    "name": "svabs[_f16]_m",
     "arguments": [
-      "poly16x8_t a",
-      "poly16x8_t b"
+      "svfloat16_t inactive",
+      "svbool_t pg",
+      "svfloat16_t op"
     ],
     "return_type": {
-      "value": "poly16x8_t"
+      "value": "svfloat16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
+      "inactive": {
+        "register": "Zinactive.H|Ztied.H"
       },
-      "b": {
-        "register": "Vm.16B"
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "EOR"
+        "FABS"
+      ],
+      [
+        "MOVPRFX",
+        "FABS"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vaddq_p64",
+    "SIMD_ISA": "SVE",
+    "name": "svabs[_f16]_x",
     "arguments": [
-      "poly64x2_t a",
-      "poly64x2_t b"
+      "svbool_t pg",
+      "svfloat16_t op"
     ],
     "return_type": {
-      "value": "poly64x2_t"
+      "value": "svfloat16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
+      "op": {
+        "register": "Zop.H|Ztied.H"
       },
-      "b": {
-        "register": "Vm.16B"
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "EOR"
+        "FABS"
+      ],
+      [
+        "MOVPRFX",
+        "FABS"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vaddq_p8",
+    "SIMD_ISA": "SVE",
+    "name": "svabs[_f16]_z",
     "arguments": [
-      "poly8x16_t a",
-      "poly8x16_t b"
+      "svbool_t pg",
+      "svfloat16_t op"
     ],
     "return_type": {
-      "value": "poly8x16_t"
+      "value": "svfloat16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
+      "op": {
+        "register": "Zop.H"
       },
-      "b": {
-        "register": "Vm.16B"
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "EOR"
+        "MOVPRFX",
+        "FABS"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vaddq_s16",
+    "SIMD_ISA": "SVE",
+    "name": "svabs[_f32]_m",
     "arguments": [
-      "int16x8_t a",
-      "int16x8_t b"
+      "svfloat32_t inactive",
+      "svbool_t pg",
+      "svfloat32_t op"
     ],
     "return_type": {
-      "value": "int16x8_t"
+      "value": "svfloat32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8H"
+      "inactive": {
+        "register": "Zinactive.S|Ztied.S"
       },
-      "b": {
-        "register": "Vm.8H"
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "ADD"
+        "FABS"
+      ],
+      [
+        "MOVPRFX",
+        "FABS"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vaddq_s32",
+    "SIMD_ISA": "SVE",
+    "name": "svabs[_f32]_x",
     "arguments": [
-      "int32x4_t a",
-      "int32x4_t b"
+      "svbool_t pg",
+      "svfloat32_t op"
     ],
     "return_type": {
-      "value": "int32x4_t"
+      "value": "svfloat32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4S"
+      "op": {
+        "register": "Zop.S|Ztied.S"
       },
-      "b": {
-        "register": "Vm.4S"
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "ADD"
+        "FABS"
+      ],
+      [
+        "MOVPRFX",
+        "FABS"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vaddq_s64",
+    "SIMD_ISA": "SVE",
+    "name": "svabs[_f32]_z",
     "arguments": [
-      "int64x2_t a",
-      "int64x2_t b"
+      "svbool_t pg",
+      "svfloat32_t op"
     ],
     "return_type": {
-      "value": "int64x2_t"
+      "value": "svfloat32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2D"
+      "op": {
+        "register": "Zop.S"
       },
-      "b": {
-        "register": "Vm.2D"
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "ADD"
+        "MOVPRFX",
+        "FABS"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vaddq_s8",
+    "SIMD_ISA": "SVE",
+    "name": "svabs[_f64]_m",
     "arguments": [
-      "int8x16_t a",
-      "int8x16_t b"
+      "svfloat64_t inactive",
+      "svbool_t pg",
+      "svfloat64_t op"
     ],
     "return_type": {
-      "value": "int8x16_t"
+      "value": "svfloat64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
+      "inactive": {
+        "register": "Zinactive.D|Ztied.D"
       },
-      "b": {
-        "register": "Vm.16B"
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "ADD"
+        "FABS"
+      ],
+      [
+        "MOVPRFX",
+        "FABS"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vaddq_u16",
+    "SIMD_ISA": "SVE",
+    "name": "svabs[_f64]_x",
     "arguments": [
-      "uint16x8_t a",
-      "uint16x8_t b"
+      "svbool_t pg",
+      "svfloat64_t op"
     ],
     "return_type": {
-      "value": "uint16x8_t"
+      "value": "svfloat64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8H"
+      "op": {
+        "register": "Zop.D|Ztied.D"
       },
-      "b": {
-        "register": "Vm.8H"
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "ADD"
+        "FABS"
+      ],
+      [
+        "MOVPRFX",
+        "FABS"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vaddq_u32",
+    "SIMD_ISA": "SVE",
+    "name": "svabs[_f64]_z",
     "arguments": [
-      "uint32x4_t a",
-      "uint32x4_t b"
+      "svbool_t pg",
+      "svfloat64_t op"
     ],
     "return_type": {
-      "value": "uint32x4_t"
+      "value": "svfloat64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4S"
+      "op": {
+        "register": "Zop.D"
       },
-      "b": {
-        "register": "Vm.4S"
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "ADD"
+        "MOVPRFX",
+        "FABS"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vaddq_u64",
+    "SIMD_ISA": "SVE",
+    "name": "svabs[_s16]_m",
     "arguments": [
-      "uint64x2_t a",
-      "uint64x2_t b"
+      "svint16_t inactive",
+      "svbool_t pg",
+      "svint16_t op"
     ],
     "return_type": {
-      "value": "uint64x2_t"
+      "value": "svint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2D"
+      "inactive": {
+        "register": "Zinactive.H|Ztied.H"
       },
-      "b": {
-        "register": "Vm.2D"
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "ADD"
+        "ABS"
+      ],
+      [
+        "MOVPRFX",
+        "ABS"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vaddq_u8",
+    "SIMD_ISA": "SVE",
+    "name": "svabs[_s16]_x",
     "arguments": [
-      "uint8x16_t a",
-      "uint8x16_t b"
+      "svbool_t pg",
+      "svint16_t op"
     ],
     "return_type": {
-      "value": "uint8x16_t"
+      "value": "svint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
+      "op": {
+        "register": "Zop.H|Ztied.H"
       },
-      "b": {
-        "register": "Vm.16B"
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "ADD"
+        "ABS"
+      ],
+      [
+        "MOVPRFX",
+        "ABS"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vaddv_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svabs[_s16]_z",
     "arguments": [
-      "float32x2_t a"
+      "svbool_t pg",
+      "svint16_t op"
     ],
     "return_type": {
-      "value": "float32_t"
+      "value": "svint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2S"
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
@@ -4231,22 +5040,31 @@
     ],
     "instructions": [
       [
-        "FADDP"
+        "MOVPRFX",
+        "ABS"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vaddv_s16",
+    "SIMD_ISA": "SVE",
+    "name": "svabs[_s32]_m",
     "arguments": [
-      "int16x4_t a"
+      "svint32_t inactive",
+      "svbool_t pg",
+      "svint32_t op"
     ],
     "return_type": {
-      "value": "int16_t"
+      "value": "svint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4H"
+      "inactive": {
+        "register": "Zinactive.S|Ztied.S"
+      },
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
@@ -4254,22 +5072,30 @@
     ],
     "instructions": [
       [
-        "ADDV"
+        "ABS"
+      ],
+      [
+        "MOVPRFX",
+        "ABS"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vaddv_s32",
+    "SIMD_ISA": "SVE",
+    "name": "svabs[_s32]_x",
     "arguments": [
-      "int32x2_t a"
+      "svbool_t pg",
+      "svint32_t op"
     ],
     "return_type": {
-      "value": "int32_t"
+      "value": "svint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2S"
+      "op": {
+        "register": "Zop.S|Ztied.S"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
@@ -4277,22 +5103,30 @@
     ],
     "instructions": [
       [
-        "ADDP"
+        "ABS"
+      ],
+      [
+        "MOVPRFX",
+        "ABS"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vaddv_s8",
+    "SIMD_ISA": "SVE",
+    "name": "svabs[_s32]_z",
     "arguments": [
-      "int8x8_t a"
+      "svbool_t pg",
+      "svint32_t op"
     ],
     "return_type": {
-      "value": "int8_t"
+      "value": "svint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8B"
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
@@ -4300,22 +5134,31 @@
     ],
     "instructions": [
       [
-        "ADDV"
+        "MOVPRFX",
+        "ABS"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vaddv_u16",
+    "SIMD_ISA": "SVE",
+    "name": "svabs[_s64]_m",
     "arguments": [
-      "uint16x4_t a"
+      "svint64_t inactive",
+      "svbool_t pg",
+      "svint64_t op"
     ],
     "return_type": {
-      "value": "uint16_t"
+      "value": "svint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4H"
+      "inactive": {
+        "register": "Zinactive.D|Ztied.D"
+      },
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
@@ -4323,22 +5166,30 @@
     ],
     "instructions": [
       [
-        "ADDV"
+        "ABS"
+      ],
+      [
+        "MOVPRFX",
+        "ABS"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vaddv_u32",
+    "SIMD_ISA": "SVE",
+    "name": "svabs[_s64]_x",
     "arguments": [
-      "uint32x2_t a"
+      "svbool_t pg",
+      "svint64_t op"
     ],
     "return_type": {
-      "value": "uint32_t"
+      "value": "svint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2S"
+      "op": {
+        "register": "Zop.D|Ztied.D"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
@@ -4346,22 +5197,30 @@
     ],
     "instructions": [
       [
-        "ADDP"
+        "ABS"
+      ],
+      [
+        "MOVPRFX",
+        "ABS"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vaddv_u8",
+    "SIMD_ISA": "SVE",
+    "name": "svabs[_s64]_z",
     "arguments": [
-      "uint8x8_t a"
+      "svbool_t pg",
+      "svint64_t op"
     ],
     "return_type": {
-      "value": "uint8_t"
+      "value": "svint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8B"
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
@@ -4369,22 +5228,31 @@
     ],
     "instructions": [
       [
-        "ADDV"
+        "MOVPRFX",
+        "ABS"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vaddvq_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svabs[_s8]_m",
     "arguments": [
-      "float32x4_t a"
+      "svint8_t inactive",
+      "svbool_t pg",
+      "svint8_t op"
     ],
     "return_type": {
-      "value": "float32_t"
+      "value": "svint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4S"
+      "inactive": {
+        "register": "Zinactive.B|Ztied.B"
+      },
+      "op": {
+        "register": "Zop.B"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
@@ -4392,23 +5260,30 @@
     ],
     "instructions": [
       [
-        "FADDP",
-        "FADDP"
+        "ABS"
+      ],
+      [
+        "MOVPRFX",
+        "ABS"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vaddvq_f64",
+    "SIMD_ISA": "SVE",
+    "name": "svabs[_s8]_x",
     "arguments": [
-      "float64x2_t a"
+      "svbool_t pg",
+      "svint8_t op"
     ],
     "return_type": {
-      "value": "float64_t"
+      "value": "svint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2D"
+      "op": {
+        "register": "Zop.B|Ztied.B"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
@@ -4416,22 +5291,30 @@
     ],
     "instructions": [
       [
-        "FADDP"
+        "ABS"
+      ],
+      [
+        "MOVPRFX",
+        "ABS"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vaddvq_s16",
+    "SIMD_ISA": "SVE",
+    "name": "svabs[_s8]_z",
     "arguments": [
-      "int16x8_t a"
+      "svbool_t pg",
+      "svint8_t op"
     ],
     "return_type": {
-      "value": "int16_t"
+      "value": "svint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8H"
+      "op": {
+        "register": "Zop.B"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
@@ -4439,22 +5322,31 @@
     ],
     "instructions": [
       [
-        "ADDV"
+        "MOVPRFX",
+        "ABS"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vaddvq_s32",
+    "SIMD_ISA": "SVE",
+    "name": "svacge[_f16]",
     "arguments": [
-      "int32x4_t a"
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2"
     ],
     "return_type": {
-      "value": "int32_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4S"
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
@@ -4462,22 +5354,30 @@
     ],
     "instructions": [
       [
-        "ADDV"
+        "FACGE"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vaddvq_s64",
+    "SIMD_ISA": "SVE",
+    "name": "svacge[_f32]",
     "arguments": [
-      "int64x2_t a"
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2"
     ],
     "return_type": {
-      "value": "int64_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2D"
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
@@ -4485,22 +5385,30 @@
     ],
     "instructions": [
       [
-        "ADDP"
+        "FACGE"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vaddvq_s8",
+    "SIMD_ISA": "SVE",
+    "name": "svacge[_f64]",
     "arguments": [
-      "int8x16_t a"
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2"
     ],
     "return_type": {
-      "value": "int8_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
@@ -4508,22 +5416,30 @@
     ],
     "instructions": [
       [
-        "ADDV"
+        "FACGE"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vaddvq_u16",
+    "SIMD_ISA": "SVE",
+    "name": "svacge[_n_f16]",
     "arguments": [
-      "uint16x8_t a"
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "float16_t op2"
     ],
     "return_type": {
-      "value": "uint16_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8H"
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
@@ -4531,22 +5447,30 @@
     ],
     "instructions": [
       [
-        "ADDV"
+        "FACGE"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vaddvq_u32",
+    "SIMD_ISA": "SVE",
+    "name": "svacge[_n_f32]",
     "arguments": [
-      "uint32x4_t a"
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "float32_t op2"
     ],
     "return_type": {
-      "value": "uint32_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4S"
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
@@ -4554,22 +5478,30 @@
     ],
     "instructions": [
       [
-        "ADDV"
+        "FACGE"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vaddvq_u64",
+    "SIMD_ISA": "SVE",
+    "name": "svacge[_n_f64]",
     "arguments": [
-      "uint64x2_t a"
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "float64_t op2"
     ],
     "return_type": {
-      "value": "uint64_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2D"
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
@@ -4577,22 +5509,30 @@
     ],
     "instructions": [
       [
-        "ADDP"
+        "FACGE"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vaddvq_u8",
+    "SIMD_ISA": "SVE",
+    "name": "svacgt[_f16]",
     "arguments": [
-      "uint8x16_t a"
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2"
     ],
     "return_type": {
-      "value": "uint8_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
@@ -4600,26 +5540,30 @@
     ],
     "instructions": [
       [
-        "ADDV"
+        "FACGT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vaddw_high_s16",
+    "SIMD_ISA": "SVE",
+    "name": "svacgt[_f32]",
     "arguments": [
-      "int32x4_t a",
-      "int16x8_t b"
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2"
     ],
     "return_type": {
-      "value": "int32x4_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4S"
+      "op1": {
+        "register": "Zop1.S"
       },
-      "b": {
-        "register": "Vm.8H"
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
@@ -4627,26 +5571,30 @@
     ],
     "instructions": [
       [
-        "SADDW2"
+        "FACGT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vaddw_high_s32",
+    "SIMD_ISA": "SVE",
+    "name": "svacgt[_f64]",
     "arguments": [
-      "int64x2_t a",
-      "int32x4_t b"
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2"
     ],
     "return_type": {
-      "value": "int64x2_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2D"
+      "op1": {
+        "register": "Zop1.D"
       },
-      "b": {
-        "register": "Vm.4S"
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
@@ -4654,26 +5602,30 @@
     ],
     "instructions": [
       [
-        "SADDW2"
+        "FACGT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vaddw_high_s8",
+    "SIMD_ISA": "SVE",
+    "name": "svacgt[_n_f16]",
     "arguments": [
-      "int16x8_t a",
-      "int8x16_t b"
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "float16_t op2"
     ],
     "return_type": {
-      "value": "int16x8_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8H"
+      "op1": {
+        "register": "Zop1.H"
       },
-      "b": {
-        "register": "Vm.16B"
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
@@ -4681,26 +5633,30 @@
     ],
     "instructions": [
       [
-        "SADDW2"
+        "FACGT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vaddw_high_u16",
+    "SIMD_ISA": "SVE",
+    "name": "svacgt[_n_f32]",
     "arguments": [
-      "uint32x4_t a",
-      "uint16x8_t b"
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "float32_t op2"
     ],
     "return_type": {
-      "value": "uint32x4_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4S"
+      "op1": {
+        "register": "Zop1.S"
       },
-      "b": {
-        "register": "Vm.8H"
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
@@ -4708,26 +5664,30 @@
     ],
     "instructions": [
       [
-        "UADDW2"
+        "FACGT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vaddw_high_u32",
+    "SIMD_ISA": "SVE",
+    "name": "svacgt[_n_f64]",
     "arguments": [
-      "uint64x2_t a",
-      "uint32x4_t b"
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "float64_t op2"
     ],
     "return_type": {
-      "value": "uint64x2_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2D"
+      "op1": {
+        "register": "Zop1.D"
       },
-      "b": {
-        "register": "Vm.4S"
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
@@ -4735,26 +5695,30 @@
     ],
     "instructions": [
       [
-        "UADDW2"
+        "FACGT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vaddw_high_u8",
+    "SIMD_ISA": "SVE",
+    "name": "svacle[_f16]",
     "arguments": [
-      "uint16x8_t a",
-      "uint8x16_t b"
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2"
     ],
     "return_type": {
-      "value": "uint16x8_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8H"
+      "op1": {
+        "register": "Zop1.H"
       },
-      "b": {
-        "register": "Vm.16B"
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
@@ -4762,1518 +5726,1855 @@
     ],
     "instructions": [
       [
-        "UADDW2"
+        "FACGE"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vaddw_s16",
+    "SIMD_ISA": "SVE",
+    "name": "svacle[_f32]",
     "arguments": [
-      "int32x4_t a",
-      "int16x4_t b"
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2"
     ],
     "return_type": {
-      "value": "int32x4_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4S"
+      "op1": {
+        "register": "Zop1.S"
       },
-      "b": {
-        "register": "Vm.4H"
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "SADDW"
+        "FACGE"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vaddw_s32",
+    "SIMD_ISA": "SVE",
+    "name": "svacle[_f64]",
     "arguments": [
-      "int64x2_t a",
-      "int32x2_t b"
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2"
     ],
     "return_type": {
-      "value": "int64x2_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2D"
+      "op1": {
+        "register": "Zop1.D"
       },
-      "b": {
-        "register": "Vm.2S"
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "SADDW"
+        "FACGE"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vaddw_s8",
+    "SIMD_ISA": "SVE",
+    "name": "svacle[_n_f16]",
     "arguments": [
-      "int16x8_t a",
-      "int8x8_t b"
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "float16_t op2"
     ],
     "return_type": {
-      "value": "int16x8_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8H"
+      "op1": {
+        "register": "Zop1.H"
       },
-      "b": {
-        "register": "Vm.8B"
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "SADDW"
+        "FACGE"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vaddw_u16",
+    "SIMD_ISA": "SVE",
+    "name": "svacle[_n_f32]",
     "arguments": [
-      "uint32x4_t a",
-      "uint16x4_t b"
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "float32_t op2"
     ],
     "return_type": {
-      "value": "uint32x4_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4S"
+      "op1": {
+        "register": "Zop1.S"
       },
-      "b": {
-        "register": "Vm.4H"
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "UADDW"
+        "FACGE"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vaddw_u32",
+    "SIMD_ISA": "SVE",
+    "name": "svacle[_n_f64]",
     "arguments": [
-      "uint64x2_t a",
-      "uint32x2_t b"
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "float64_t op2"
     ],
     "return_type": {
-      "value": "uint64x2_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2D"
+      "op1": {
+        "register": "Zop1.D"
       },
-      "b": {
-        "register": "Vm.2S"
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "UADDW"
+        "FACGE"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vaddw_u8",
+    "SIMD_ISA": "SVE",
+    "name": "svaclt[_f16]",
     "arguments": [
-      "uint16x8_t a",
-      "uint8x8_t b"
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2"
     ],
     "return_type": {
-      "value": "uint16x8_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8H"
+      "op1": {
+        "register": "Zop1.H"
       },
-      "b": {
-        "register": "Vm.8B"
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "UADDW"
+        "FACGT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vaesdq_u8",
+    "SIMD_ISA": "SVE",
+    "name": "svaclt[_f32]",
     "arguments": [
-      "uint8x16_t data",
-      "uint8x16_t key"
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2"
     ],
     "return_type": {
-      "value": "uint8x16_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "data": {
-        "register": "Vd.16B"
+      "op1": {
+        "register": "Zop1.S"
       },
-      "key": {
-        "register": "Vn.16B"
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "AESD"
+        "FACGT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vaeseq_u8",
+    "SIMD_ISA": "SVE",
+    "name": "svaclt[_f64]",
     "arguments": [
-      "uint8x16_t data",
-      "uint8x16_t key"
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2"
     ],
     "return_type": {
-      "value": "uint8x16_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "data": {
-        "register": "Vd.16B"
+      "op1": {
+        "register": "Zop1.D"
       },
-      "key": {
-        "register": "Vn.16B"
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "AESE"
+        "FACGT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vaesimcq_u8",
+    "SIMD_ISA": "SVE",
+    "name": "svaclt[_n_f16]",
     "arguments": [
-      "uint8x16_t data"
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "float16_t op2"
     ],
     "return_type": {
-      "value": "uint8x16_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "data": {
-        "register": "Vn.16B"
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "AESIMC"
+        "FACGT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vaesmcq_u8",
+    "SIMD_ISA": "SVE",
+    "name": "svaclt[_n_f32]",
     "arguments": [
-      "uint8x16_t data"
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "float32_t op2"
     ],
     "return_type": {
-      "value": "uint8x16_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "data": {
-        "register": "Vn.16B"
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "AESMC"
+        "FACGT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vand_s16",
+    "SIMD_ISA": "SVE",
+    "name": "svaclt[_n_f64]",
     "arguments": [
-      "int16x4_t a",
-      "int16x4_t b"
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "float64_t op2"
     ],
     "return_type": {
-      "value": "int16x4_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8B"
+      "op1": {
+        "register": "Zop1.D"
       },
-      "b": {
-        "register": "Vm.8B"
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "AND"
+        "FACGT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vand_s32",
+    "SIMD_ISA": "SVE2",
+    "name": "svadalp[_s16]_m",
     "arguments": [
-      "int32x2_t a",
-      "int32x2_t b"
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint8_t op2"
     ],
     "return_type": {
-      "value": "int32x2_t"
+      "value": "svint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8B"
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
       },
-      "b": {
-        "register": "Vm.8B"
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "AND"
+        "SADALP"
+      ],
+      [
+        "MOVPRFX",
+        "SADALP"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vand_s64",
+    "SIMD_ISA": "SVE2",
+    "name": "svadalp[_s16]_x",
     "arguments": [
-      "int64x1_t a",
-      "int64x1_t b"
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint8_t op2"
     ],
     "return_type": {
-      "value": "int64x1_t"
+      "value": "svint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
       },
-      "b": {
-        "register": "Dm"
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "AND"
+        "SADALP"
+      ],
+      [
+        "MOVPRFX",
+        "SADALP"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vand_s8",
+    "SIMD_ISA": "SVE2",
+    "name": "svadalp[_s16]_z",
     "arguments": [
-      "int8x8_t a",
-      "int8x8_t b"
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint8_t op2"
     ],
     "return_type": {
-      "value": "int8x8_t"
+      "value": "svint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8B"
+      "op1": {
+        "register": "Zop1.H"
       },
-      "b": {
-        "register": "Vm.8B"
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "AND"
+        "MOVPRFX",
+        "SADALP"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vand_u16",
+    "SIMD_ISA": "SVE2",
+    "name": "svadalp[_s32]_m",
     "arguments": [
-      "uint16x4_t a",
-      "uint16x4_t b"
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint16_t op2"
     ],
     "return_type": {
-      "value": "uint16x4_t"
+      "value": "svint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8B"
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
       },
-      "b": {
-        "register": "Vm.8B"
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "AND"
+        "SADALP"
+      ],
+      [
+        "MOVPRFX",
+        "SADALP"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vand_u32",
+    "SIMD_ISA": "SVE2",
+    "name": "svadalp[_s32]_x",
     "arguments": [
-      "uint32x2_t a",
-      "uint32x2_t b"
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint16_t op2"
     ],
     "return_type": {
-      "value": "uint32x2_t"
+      "value": "svint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8B"
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
       },
-      "b": {
-        "register": "Vm.8B"
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "AND"
+        "SADALP"
+      ],
+      [
+        "MOVPRFX",
+        "SADALP"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vand_u64",
+    "SIMD_ISA": "SVE2",
+    "name": "svadalp[_s32]_z",
     "arguments": [
-      "uint64x1_t a",
-      "uint64x1_t b"
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint16_t op2"
     ],
     "return_type": {
-      "value": "uint64x1_t"
+      "value": "svint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8B"
+      "op1": {
+        "register": "Zop1.S"
       },
-      "b": {
-        "register": "Vm.8B"
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "AND"
+        "MOVPRFX",
+        "SADALP"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vand_u8",
+    "SIMD_ISA": "SVE2",
+    "name": "svadalp[_s64]_m",
     "arguments": [
-      "uint8x8_t a",
-      "uint8x8_t b"
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint32_t op2"
     ],
     "return_type": {
-      "value": "uint8x8_t"
+      "value": "svint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8B"
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
       },
-      "b": {
-        "register": "Vm.8B"
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "AND"
+        "SADALP"
+      ],
+      [
+        "MOVPRFX",
+        "SADALP"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vandq_s16",
+    "SIMD_ISA": "SVE2",
+    "name": "svadalp[_s64]_x",
     "arguments": [
-      "int16x8_t a",
-      "int16x8_t b"
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint32_t op2"
     ],
     "return_type": {
-      "value": "int16x8_t"
+      "value": "svint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
       },
-      "b": {
-        "register": "Vm.16B"
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "AND"
+        "SADALP"
+      ],
+      [
+        "MOVPRFX",
+        "SADALP"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vandq_s32",
+    "SIMD_ISA": "SVE2",
+    "name": "svadalp[_s64]_z",
     "arguments": [
-      "int32x4_t a",
-      "int32x4_t b"
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint32_t op2"
     ],
     "return_type": {
-      "value": "int32x4_t"
+      "value": "svint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
+      "op1": {
+        "register": "Zop1.D"
       },
-      "b": {
-        "register": "Vm.16B"
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "AND"
+        "MOVPRFX",
+        "SADALP"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vandq_s64",
+    "SIMD_ISA": "SVE2",
+    "name": "svadalp[_u16]_m",
     "arguments": [
-      "int64x2_t a",
-      "int64x2_t b"
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint8_t op2"
     ],
     "return_type": {
-      "value": "int64x2_t"
+      "value": "svuint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
       },
-      "b": {
-        "register": "Vm.16B"
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "AND"
+        "UADALP"
+      ],
+      [
+        "MOVPRFX",
+        "UADALP"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vandq_s8",
+    "SIMD_ISA": "SVE2",
+    "name": "svadalp[_u16]_x",
     "arguments": [
-      "int8x16_t a",
-      "int8x16_t b"
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint8_t op2"
     ],
     "return_type": {
-      "value": "int8x16_t"
+      "value": "svuint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
       },
-      "b": {
-        "register": "Vm.16B"
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "AND"
+        "UADALP"
+      ],
+      [
+        "MOVPRFX",
+        "UADALP"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vandq_u16",
+    "SIMD_ISA": "SVE2",
+    "name": "svadalp[_u16]_z",
     "arguments": [
-      "uint16x8_t a",
-      "uint16x8_t b"
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint8_t op2"
     ],
     "return_type": {
-      "value": "uint16x8_t"
+      "value": "svuint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
+      "op1": {
+        "register": "Zop1.H"
       },
-      "b": {
-        "register": "Vm.16B"
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "AND"
+        "MOVPRFX",
+        "UADALP"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vandq_u32",
+    "SIMD_ISA": "SVE2",
+    "name": "svadalp[_u32]_m",
     "arguments": [
-      "uint32x4_t a",
-      "uint32x4_t b"
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint16_t op2"
     ],
     "return_type": {
-      "value": "uint32x4_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
       },
-      "b": {
-        "register": "Vm.16B"
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "AND"
+        "UADALP"
+      ],
+      [
+        "MOVPRFX",
+        "UADALP"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vandq_u64",
+    "SIMD_ISA": "SVE2",
+    "name": "svadalp[_u32]_x",
     "arguments": [
-      "uint64x2_t a",
-      "uint64x2_t b"
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint16_t op2"
     ],
     "return_type": {
-      "value": "uint64x2_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
       },
-      "b": {
-        "register": "Vm.16B"
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "AND"
+        "UADALP"
+      ],
+      [
+        "MOVPRFX",
+        "UADALP"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vandq_u8",
+    "SIMD_ISA": "SVE2",
+    "name": "svadalp[_u32]_z",
     "arguments": [
-      "uint8x16_t a",
-      "uint8x16_t b"
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint16_t op2"
     ],
     "return_type": {
-      "value": "uint8x16_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
+      "op1": {
+        "register": "Zop1.S"
       },
-      "b": {
-        "register": "Vm.16B"
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "AND"
+        "MOVPRFX",
+        "UADALP"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vbcaxq_s16",
+    "SIMD_ISA": "SVE2",
+    "name": "svadalp[_u64]_m",
     "arguments": [
-      "int16x8_t a",
-      "int16x8_t b",
-      "int16x8_t c"
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint32_t op2"
     ],
     "return_type": {
-      "value": "int16x8_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.S"
       },
-      "b": {},
-      "c": {}
+      "pg": {
+        "register": "Pg.D"
+      }
     },
     "Architectures": [
       "A64"
     ],
     "instructions": [
       [
-        "BCAX"
+        "UADALP"
+      ],
+      [
+        "MOVPRFX",
+        "UADALP"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vbcaxq_s32",
+    "SIMD_ISA": "SVE2",
+    "name": "svadalp[_u64]_x",
     "arguments": [
-      "int32x4_t a",
-      "int32x4_t b",
-      "int32x4_t c"
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint32_t op2"
     ],
     "return_type": {
-      "value": "int32x4_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.S"
       },
-      "b": {},
-      "c": {}
+      "pg": {
+        "register": "Pg.D"
+      }
     },
     "Architectures": [
       "A64"
     ],
     "instructions": [
       [
-        "BCAX"
+        "UADALP"
+      ],
+      [
+        "MOVPRFX",
+        "UADALP"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vbcaxq_s64",
+    "SIMD_ISA": "SVE2",
+    "name": "svadalp[_u64]_z",
     "arguments": [
-      "int64x2_t a",
-      "int64x2_t b",
-      "int64x2_t c"
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint32_t op2"
     ],
     "return_type": {
-      "value": "int64x2_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.S"
       },
-      "b": {},
-      "c": {}
+      "pg": {
+        "register": "Pg.D"
+      }
     },
     "Architectures": [
       "A64"
     ],
     "instructions": [
       [
-        "BCAX"
+        "MOVPRFX",
+        "UADALP"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vbcaxq_s8",
+    "SIMD_ISA": "SVE2",
+    "name": "svadclb[_n_u32]",
     "arguments": [
-      "int8x16_t a",
-      "int8x16_t b",
-      "int8x16_t c"
+      "svuint32_t op1",
+      "svuint32_t op2",
+      "uint32_t op3"
     ],
     "return_type": {
-      "value": "int8x16_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
       },
-      "b": {},
-      "c": {}
+      "op3": {
+        "register": "Zop3.S[*]"
+      }
     },
     "Architectures": [
       "A64"
     ],
     "instructions": [
       [
-        "BCAX"
+        "ADCLB"
+      ],
+      [
+        "MOVPRFX",
+        "ADCLB"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vbcaxq_u16",
+    "SIMD_ISA": "SVE2",
+    "name": "svadclb[_n_u64]",
     "arguments": [
-      "uint16x8_t a",
-      "uint16x8_t b",
-      "uint16x8_t c"
+      "svuint64_t op1",
+      "svuint64_t op2",
+      "uint64_t op3"
     ],
     "return_type": {
-      "value": "uint16x8_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
       },
-      "b": {},
-      "c": {}
+      "op3": {
+        "register": "Zop3.D[*]"
+      }
     },
     "Architectures": [
       "A64"
     ],
     "instructions": [
       [
-        "BCAX"
+        "ADCLB"
+      ],
+      [
+        "MOVPRFX",
+        "ADCLB"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vbcaxq_u32",
+    "SIMD_ISA": "SVE2",
+    "name": "svadclb[_u32]",
     "arguments": [
-      "uint32x4_t a",
-      "uint32x4_t b",
-      "uint32x4_t c"
+      "svuint32_t op1",
+      "svuint32_t op2",
+      "svuint32_t op3"
     ],
     "return_type": {
-      "value": "uint32x4_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
       },
-      "b": {},
-      "c": {}
+      "op3": {
+        "register": "Zop3.S"
+      }
     },
     "Architectures": [
       "A64"
     ],
     "instructions": [
       [
-        "BCAX"
+        "ADCLB"
+      ],
+      [
+        "MOVPRFX",
+        "ADCLB"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vbcaxq_u64",
+    "SIMD_ISA": "SVE2",
+    "name": "svadclb[_u64]",
     "arguments": [
-      "uint64x2_t a",
-      "uint64x2_t b",
-      "uint64x2_t c"
+      "svuint64_t op1",
+      "svuint64_t op2",
+      "svuint64_t op3"
     ],
     "return_type": {
-      "value": "uint64x2_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
       },
-      "b": {},
-      "c": {}
+      "op3": {
+        "register": "Zop3.D"
+      }
     },
     "Architectures": [
       "A64"
     ],
     "instructions": [
       [
-        "BCAX"
+        "ADCLB"
+      ],
+      [
+        "MOVPRFX",
+        "ADCLB"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vbcaxq_u8",
+    "SIMD_ISA": "SVE2",
+    "name": "svadclt[_n_u32]",
     "arguments": [
-      "uint8x16_t a",
-      "uint8x16_t b",
-      "uint8x16_t c"
+      "svuint32_t op1",
+      "svuint32_t op2",
+      "uint32_t op3"
     ],
     "return_type": {
-      "value": "uint8x16_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
       },
-      "b": {},
-      "c": {}
+      "op3": {
+        "register": "Zop3.S[*]"
+      }
     },
     "Architectures": [
       "A64"
     ],
     "instructions": [
       [
-        "BCAX"
+        "ADCLT"
+      ],
+      [
+        "MOVPRFX",
+        "ADCLT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vbic_s16",
+    "SIMD_ISA": "SVE2",
+    "name": "svadclt[_n_u64]",
     "arguments": [
-      "int16x4_t a",
-      "int16x4_t b"
+      "svuint64_t op1",
+      "svuint64_t op2",
+      "uint64_t op3"
     ],
     "return_type": {
-      "value": "int16x4_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8B"
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
       },
-      "b": {
-        "register": "Vm.8B"
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D[*]"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "BIC"
+        "ADCLT"
+      ],
+      [
+        "MOVPRFX",
+        "ADCLT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vbic_s32",
+    "SIMD_ISA": "SVE2",
+    "name": "svadclt[_u32]",
     "arguments": [
-      "int32x2_t a",
-      "int32x2_t b"
+      "svuint32_t op1",
+      "svuint32_t op2",
+      "svuint32_t op3"
     ],
     "return_type": {
-      "value": "int32x2_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8B"
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
       },
-      "b": {
-        "register": "Vm.8B"
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "BIC"
+        "ADCLT"
+      ],
+      [
+        "MOVPRFX",
+        "ADCLT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vbic_s64",
+    "SIMD_ISA": "SVE2",
+    "name": "svadclt[_u64]",
     "arguments": [
-      "int64x1_t a",
-      "int64x1_t b"
+      "svuint64_t op1",
+      "svuint64_t op2",
+      "svuint64_t op3"
     ],
     "return_type": {
-      "value": "int64x1_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8B"
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
       },
-      "b": {
-        "register": "Vm.8B"
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "BIC"
+        "ADCLT"
+      ],
+      [
+        "MOVPRFX",
+        "ADCLT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vbic_s8",
+    "SIMD_ISA": "SVE",
+    "name": "svadd[_f16]_m",
     "arguments": [
-      "int8x8_t a",
-      "int8x8_t b"
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2"
     ],
     "return_type": {
-      "value": "int8x8_t"
+      "value": "svfloat16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8B"
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
       },
-      "b": {
-        "register": "Vm.8B"
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "BIC"
+        "FADD"
+      ],
+      [
+        "MOVPRFX",
+        "FADD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vbic_u16",
+    "SIMD_ISA": "SVE",
+    "name": "svadd[_f16]_x",
     "arguments": [
-      "uint16x4_t a",
-      "uint16x4_t b"
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2"
     ],
     "return_type": {
-      "value": "uint16x4_t"
+      "value": "svfloat16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8B"
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
       },
-      "b": {
-        "register": "Vm.8B"
+      "op2": {
+        "register": "Zop2.H|Ztied2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "BIC"
+        "FADD"
+      ],
+      [
+        "FADD"
+      ],
+      [
+        "FADD"
+      ],
+      [
+        "MOVPRFX",
+        "FADD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vbic_u32",
+    "SIMD_ISA": "SVE",
+    "name": "svadd[_f16]_z",
     "arguments": [
-      "uint32x2_t a",
-      "uint32x2_t b"
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2"
     ],
     "return_type": {
-      "value": "uint32x2_t"
+      "value": "svfloat16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8B"
+      "op1": {
+        "register": "Zop1.H"
       },
-      "b": {
-        "register": "Vm.8B"
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "BIC"
+        "MOVPRFX",
+        "FADD"
+      ],
+      [
+        "MOVPRFX",
+        "FADD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vbic_u64",
+    "SIMD_ISA": "SVE",
+    "name": "svadd[_f32]_m",
     "arguments": [
-      "uint64x1_t a",
-      "uint64x1_t b"
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2"
     ],
     "return_type": {
-      "value": "uint64x1_t"
+      "value": "svfloat32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8B"
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
       },
-      "b": {
-        "register": "Vm.8B"
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "BIC"
+        "FADD"
+      ],
+      [
+        "MOVPRFX",
+        "FADD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vbic_u8",
+    "SIMD_ISA": "SVE",
+    "name": "svadd[_f32]_x",
     "arguments": [
-      "uint8x8_t a",
-      "uint8x8_t b"
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2"
     ],
     "return_type": {
-      "value": "uint8x8_t"
+      "value": "svfloat32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8B"
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
       },
-      "b": {
-        "register": "Vm.8B"
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "BIC"
+        "FADD"
+      ],
+      [
+        "FADD"
+      ],
+      [
+        "FADD"
+      ],
+      [
+        "MOVPRFX",
+        "FADD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vbicq_s16",
+    "SIMD_ISA": "SVE",
+    "name": "svadd[_f32]_z",
     "arguments": [
-      "int16x8_t a",
-      "int16x8_t b"
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2"
     ],
     "return_type": {
-      "value": "int16x8_t"
+      "value": "svfloat32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
+      "op1": {
+        "register": "Zop1.S"
       },
-      "b": {
-        "register": "Vm.16B"
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "BIC"
+        "MOVPRFX",
+        "FADD"
+      ],
+      [
+        "MOVPRFX",
+        "FADD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vbicq_s32",
+    "SIMD_ISA": "SVE",
+    "name": "svadd[_f64]_m",
     "arguments": [
-      "int32x4_t a",
-      "int32x4_t b"
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2"
     ],
     "return_type": {
-      "value": "int32x4_t"
+      "value": "svfloat64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
       },
-      "b": {
-        "register": "Vm.16B"
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "BIC"
+        "FADD"
+      ],
+      [
+        "MOVPRFX",
+        "FADD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vbicq_s64",
+    "SIMD_ISA": "SVE",
+    "name": "svadd[_f64]_x",
     "arguments": [
-      "int64x2_t a",
-      "int64x2_t b"
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2"
     ],
     "return_type": {
-      "value": "int64x2_t"
+      "value": "svfloat64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
       },
-      "b": {
-        "register": "Vm.16B"
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "BIC"
+        "FADD"
+      ],
+      [
+        "FADD"
+      ],
+      [
+        "FADD"
+      ],
+      [
+        "MOVPRFX",
+        "FADD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vbicq_s8",
+    "SIMD_ISA": "SVE",
+    "name": "svadd[_f64]_z",
     "arguments": [
-      "int8x16_t a",
-      "int8x16_t b"
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2"
     ],
     "return_type": {
-      "value": "int8x16_t"
+      "value": "svfloat64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
+      "op1": {
+        "register": "Zop1.D"
       },
-      "b": {
-        "register": "Vm.16B"
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "BIC"
+        "MOVPRFX",
+        "FADD"
+      ],
+      [
+        "MOVPRFX",
+        "FADD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vbicq_u16",
+    "SIMD_ISA": "SVE",
+    "name": "svadd[_n_f16]_m",
     "arguments": [
-      "uint16x8_t a",
-      "uint16x8_t b"
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "float16_t op2"
     ],
     "return_type": {
-      "value": "uint16x8_t"
+      "value": "svfloat16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
+      "op1": {
+        "register": "Ztied1.H"
       },
-      "b": {
-        "register": "Vm.16B"
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "BIC"
+        "FADD"
+      ],
+      [
+        "FSUB"
+      ],
+      [
+        "FADD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vbicq_u32",
+    "SIMD_ISA": "SVE",
+    "name": "svadd[_n_f16]_x",
     "arguments": [
-      "uint32x4_t a",
-      "uint32x4_t b"
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "float16_t op2"
     ],
     "return_type": {
-      "value": "uint32x4_t"
+      "value": "svfloat16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
       },
-      "b": {
-        "register": "Vm.16B"
+      "op2": {
+        "register": "Zop2.H[*]|Ztied2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "BIC"
+        "FADD"
+      ],
+      [
+        "FSUB"
+      ],
+      [
+        "FADD"
+      ],
+      [
+        "FADD"
+      ],
+      [
+        "FADD"
+      ],
+      [
+        "MOVPRFX",
+        "FADD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vbicq_u64",
+    "SIMD_ISA": "SVE",
+    "name": "svadd[_n_f16]_z",
     "arguments": [
-      "uint64x2_t a",
-      "uint64x2_t b"
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "float16_t op2"
     ],
     "return_type": {
-      "value": "uint64x2_t"
+      "value": "svfloat16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
+      "op1": {
+        "register": "Zop1.H"
       },
-      "b": {
-        "register": "Vm.16B"
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "BIC"
+        "MOVPRFX",
+        "FADD"
+      ],
+      [
+        "MOVPRFX",
+        "FSUB"
+      ],
+      [
+        "MOVPRFX",
+        "FADD"
+      ],
+      [
+        "MOVPRFX",
+        "FADD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vbicq_u8",
+    "SIMD_ISA": "SVE",
+    "name": "svadd[_n_f32]_m",
     "arguments": [
-      "uint8x16_t a",
-      "uint8x16_t b"
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "float32_t op2"
     ],
     "return_type": {
-      "value": "uint8x16_t"
+      "value": "svfloat32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
+      "op1": {
+        "register": "Ztied1.S"
       },
-      "b": {
-        "register": "Vm.16B"
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "BIC"
+        "FADD"
+      ],
+      [
+        "FSUB"
+      ],
+      [
+        "FADD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vbsl_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svadd[_n_f32]_x",
     "arguments": [
-      "uint16x4_t a",
-      "float16x4_t b",
-      "float16x4_t c"
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "float32_t op2"
     ],
     "return_type": {
-      "value": "float16x4_t"
+      "value": "svfloat32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.8B"
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
       },
-      "b": {
-        "register": "Vn.8B"
+      "op2": {
+        "register": "Zop2.S[*]|Ztied2.S[*]"
       },
-      "c": {
-        "register": "Vm.8B"
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "BSL"
+        "FADD"
+      ],
+      [
+        "FSUB"
+      ],
+      [
+        "FADD"
+      ],
+      [
+        "FADD"
+      ],
+      [
+        "FADD"
+      ],
+      [
+        "MOVPRFX",
+        "FADD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vbsl_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svadd[_n_f32]_z",
     "arguments": [
-      "uint32x2_t a",
-      "float32x2_t b",
-      "float32x2_t c"
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "float32_t op2"
     ],
     "return_type": {
-      "value": "float32x2_t"
+      "value": "svfloat32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.8B"
+      "op1": {
+        "register": "Zop1.S"
       },
-      "b": {
-        "register": "Vn.8B"
+      "op2": {
+        "register": "Zop2.S[*]"
       },
-      "c": {
-        "register": "Vm.8B"
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "BSL"
+        "MOVPRFX",
+        "FADD"
+      ],
+      [
+        "MOVPRFX",
+        "FSUB"
+      ],
+      [
+        "MOVPRFX",
+        "FADD"
+      ],
+      [
+        "MOVPRFX",
+        "FADD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vbsl_f64",
+    "SIMD_ISA": "SVE",
+    "name": "svadd[_n_f64]_m",
     "arguments": [
-      "uint64x1_t a",
-      "float64x1_t b",
-      "float64x1_t c"
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "float64_t op2"
     ],
     "return_type": {
-      "value": "float64x1_t"
+      "value": "svfloat64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.8B"
+      "op1": {
+        "register": "Ztied1.D"
       },
-      "b": {
-        "register": "Vn.8B"
+      "op2": {
+        "register": "Zop2.D[*]"
       },
-      "c": {
-        "register": "Vm.8B"
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
@@ -6281,458 +7582,547 @@
     ],
     "instructions": [
       [
-        "BSL"
+        "FADD"
+      ],
+      [
+        "FSUB"
+      ],
+      [
+        "FADD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vbsl_p16",
+    "SIMD_ISA": "SVE",
+    "name": "svadd[_n_f64]_x",
     "arguments": [
-      "uint16x4_t a",
-      "poly16x4_t b",
-      "poly16x4_t c"
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "float64_t op2"
     ],
     "return_type": {
-      "value": "poly16x4_t"
+      "value": "svfloat64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.8B"
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
       },
-      "b": {
-        "register": "Vn.8B"
+      "op2": {
+        "register": "Zop2.D[*]|Ztied2.D[*]"
       },
-      "c": {
-        "register": "Vm.8B"
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "BSL"
+        "FADD"
+      ],
+      [
+        "FSUB"
+      ],
+      [
+        "FADD"
+      ],
+      [
+        "FADD"
+      ],
+      [
+        "FADD"
+      ],
+      [
+        "MOVPRFX",
+        "FADD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vbsl_p64",
+    "SIMD_ISA": "SVE",
+    "name": "svadd[_n_f64]_z",
     "arguments": [
-      "poly64x1_t a",
-      "poly64x1_t b",
-      "poly64x1_t c"
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "float64_t op2"
     ],
     "return_type": {
-      "value": "poly64x1_t"
+      "value": "svfloat64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.8B"
+      "op1": {
+        "register": "Zop1.D"
       },
-      "b": {
-        "register": "Vn.8B"
+      "op2": {
+        "register": "Zop2.D[*]"
       },
-      "c": {
-        "register": "Vm.8B"
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "BSL"
+        "MOVPRFX",
+        "FADD"
+      ],
+      [
+        "MOVPRFX",
+        "FSUB"
+      ],
+      [
+        "MOVPRFX",
+        "FADD"
+      ],
+      [
+        "MOVPRFX",
+        "FADD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vbsl_p8",
+    "SIMD_ISA": "SVE",
+    "name": "svadd[_n_s16]_m",
     "arguments": [
-      "uint8x8_t a",
-      "poly8x8_t b",
-      "poly8x8_t c"
+      "svbool_t pg",
+      "svint16_t op1",
+      "int16_t op2"
     ],
     "return_type": {
-      "value": "poly8x8_t"
+      "value": "svint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.8B"
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
       },
-      "b": {
-        "register": "Vn.8B"
+      "op2": {
+        "register": "Zop2.H[*]"
       },
-      "c": {
-        "register": "Vm.8B"
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "BSL"
+        "ADD"
+      ],
+      [
+        "MOVPRFX",
+        "ADD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vbsl_s16",
+    "SIMD_ISA": "SVE",
+    "name": "svadd[_n_s16]_x",
     "arguments": [
-      "uint16x4_t a",
-      "int16x4_t b",
-      "int16x4_t c"
+      "svbool_t pg",
+      "svint16_t op1",
+      "int16_t op2"
     ],
     "return_type": {
-      "value": "int16x4_t"
+      "value": "svint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.8B"
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
       },
-      "b": {
-        "register": "Vn.8B"
+      "op2": {
+        "register": "Zop2.H[*]|Ztied2.H[*]"
       },
-      "c": {
-        "register": "Vm.8B"
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "BSL"
+        "ADD"
+      ],
+      [
+        "SUB"
+      ],
+      [
+        "ADD"
+      ],
+      [
+        "ADD"
+      ],
+      [
+        "ADD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vbsl_s32",
+    "SIMD_ISA": "SVE",
+    "name": "svadd[_n_s16]_z",
     "arguments": [
-      "uint32x2_t a",
-      "int32x2_t b",
-      "int32x2_t c"
+      "svbool_t pg",
+      "svint16_t op1",
+      "int16_t op2"
     ],
     "return_type": {
-      "value": "int32x2_t"
+      "value": "svint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.8B"
+      "op1": {
+        "register": "Zop1.H"
       },
-      "b": {
-        "register": "Vn.8B"
+      "op2": {
+        "register": "Zop2.H[*]"
       },
-      "c": {
-        "register": "Vm.8B"
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "BSL"
+        "MOVPRFX",
+        "ADD"
+      ],
+      [
+        "MOVPRFX",
+        "ADD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vbsl_s64",
+    "SIMD_ISA": "SVE",
+    "name": "svadd[_n_s32]_m",
     "arguments": [
-      "uint64x1_t a",
-      "int64x1_t b",
-      "int64x1_t c"
+      "svbool_t pg",
+      "svint32_t op1",
+      "int32_t op2"
     ],
     "return_type": {
-      "value": "int64x1_t"
+      "value": "svint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.8B"
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
       },
-      "b": {
-        "register": "Vn.8B"
+      "op2": {
+        "register": "Zop2.S[*]"
       },
-      "c": {
-        "register": "Vm.8B"
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "BSL"
+        "ADD"
+      ],
+      [
+        "MOVPRFX",
+        "ADD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vbsl_s8",
+    "SIMD_ISA": "SVE",
+    "name": "svadd[_n_s32]_x",
     "arguments": [
-      "uint8x8_t a",
-      "int8x8_t b",
-      "int8x8_t c"
+      "svbool_t pg",
+      "svint32_t op1",
+      "int32_t op2"
     ],
     "return_type": {
-      "value": "int8x8_t"
+      "value": "svint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.8B"
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
       },
-      "b": {
-        "register": "Vn.8B"
+      "op2": {
+        "register": "Zop2.S[*]|Ztied2.S[*]"
       },
-      "c": {
-        "register": "Vm.8B"
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "BSL"
+        "ADD"
+      ],
+      [
+        "SUB"
+      ],
+      [
+        "ADD"
+      ],
+      [
+        "ADD"
+      ],
+      [
+        "ADD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vbsl_u16",
+    "SIMD_ISA": "SVE",
+    "name": "svadd[_n_s32]_z",
     "arguments": [
-      "uint16x4_t a",
-      "uint16x4_t b",
-      "uint16x4_t c"
+      "svbool_t pg",
+      "svint32_t op1",
+      "int32_t op2"
     ],
     "return_type": {
-      "value": "uint16x4_t"
+      "value": "svint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.8B"
+      "op1": {
+        "register": "Zop1.S"
       },
-      "b": {
-        "register": "Vn.8B"
+      "op2": {
+        "register": "Zop2.S[*]"
       },
-      "c": {
-        "register": "Vm.8B"
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "BSL"
+        "MOVPRFX",
+        "ADD"
+      ],
+      [
+        "MOVPRFX",
+        "ADD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vbsl_u32",
+    "SIMD_ISA": "SVE",
+    "name": "svadd[_n_s64]_m",
     "arguments": [
-      "uint32x2_t a",
-      "uint32x2_t b",
-      "uint32x2_t c"
+      "svbool_t pg",
+      "svint64_t op1",
+      "int64_t op2"
     ],
     "return_type": {
-      "value": "uint32x2_t"
+      "value": "svint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.8B"
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
       },
-      "b": {
-        "register": "Vn.8B"
+      "op2": {
+        "register": "Zop2.D[*]"
       },
-      "c": {
-        "register": "Vm.8B"
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "BSL"
+        "ADD"
+      ],
+      [
+        "MOVPRFX",
+        "ADD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vbsl_u64",
+    "SIMD_ISA": "SVE",
+    "name": "svadd[_n_s64]_x",
     "arguments": [
-      "uint64x1_t a",
-      "uint64x1_t b",
-      "uint64x1_t c"
+      "svbool_t pg",
+      "svint64_t op1",
+      "int64_t op2"
     ],
     "return_type": {
-      "value": "uint64x1_t"
+      "value": "svint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.8B"
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
       },
-      "b": {
-        "register": "Vn.8B"
+      "op2": {
+        "register": "Zop2.D[*]|Ztied2.D[*]"
       },
-      "c": {
-        "register": "Vm.8B"
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "BSL"
+        "ADD"
+      ],
+      [
+        "SUB"
+      ],
+      [
+        "ADD"
+      ],
+      [
+        "ADD"
+      ],
+      [
+        "ADD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vbsl_u8",
+    "SIMD_ISA": "SVE",
+    "name": "svadd[_n_s64]_z",
     "arguments": [
-      "uint8x8_t a",
-      "uint8x8_t b",
-      "uint8x8_t c"
+      "svbool_t pg",
+      "svint64_t op1",
+      "int64_t op2"
     ],
     "return_type": {
-      "value": "uint8x8_t"
+      "value": "svint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.8B"
+      "op1": {
+        "register": "Zop1.D"
       },
-      "b": {
-        "register": "Vn.8B"
+      "op2": {
+        "register": "Zop2.D[*]"
       },
-      "c": {
-        "register": "Vm.8B"
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "BSL"
+        "MOVPRFX",
+        "ADD"
+      ],
+      [
+        "MOVPRFX",
+        "ADD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vbslq_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svadd[_n_s8]_m",
     "arguments": [
-      "uint16x8_t a",
-      "float16x8_t b",
-      "float16x8_t c"
+      "svbool_t pg",
+      "svint8_t op1",
+      "int8_t op2"
     ],
     "return_type": {
-      "value": "float16x8_t"
+      "value": "svint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.16B"
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
       },
-      "b": {
-        "register": "Vn.16B"
+      "op2": {
+        "register": "Zop2.B[*]"
       },
-      "c": {
-        "register": "Vm.16B"
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "BSL"
+        "ADD"
+      ],
+      [
+        "MOVPRFX",
+        "ADD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vbslq_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svadd[_n_s8]_x",
     "arguments": [
-      "uint32x4_t a",
-      "float32x4_t b",
-      "float32x4_t c"
+      "svbool_t pg",
+      "svint8_t op1",
+      "int8_t op2"
     ],
     "return_type": {
-      "value": "float32x4_t"
+      "value": "svint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.16B"
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
       },
-      "b": {
-        "register": "Vn.16B"
+      "op2": {
+        "register": "Zop2.B[*]|Ztied2.B[*]"
       },
-      "c": {
-        "register": "Vm.16B"
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "BSL"
+        "ADD"
+      ],
+      [
+        "SUB"
+      ],
+      [
+        "ADD"
+      ],
+      [
+        "ADD"
+      ],
+      [
+        "ADD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vbslq_f64",
+    "SIMD_ISA": "SVE",
+    "name": "svadd[_n_s8]_z",
     "arguments": [
-      "uint64x2_t a",
-      "float64x2_t b",
-      "float64x2_t c"
+      "svbool_t pg",
+      "svint8_t op1",
+      "int8_t op2"
     ],
     "return_type": {
-      "value": "float64x2_t"
+      "value": "svint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.16B"
+      "op1": {
+        "register": "Zop1.B"
       },
-      "b": {
-        "register": "Vn.16B"
+      "op2": {
+        "register": "Zop2.B[*]"
       },
-      "c": {
-        "register": "Vm.16B"
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
@@ -6740,556 +8130,671 @@
     ],
     "instructions": [
       [
-        "BSL"
+        "MOVPRFX",
+        "ADD"
+      ],
+      [
+        "MOVPRFX",
+        "ADD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vbslq_p16",
+    "SIMD_ISA": "SVE",
+    "name": "svadd[_n_u16]_m",
     "arguments": [
-      "uint16x8_t a",
-      "poly16x8_t b",
-      "poly16x8_t c"
+      "svbool_t pg",
+      "svuint16_t op1",
+      "uint16_t op2"
     ],
     "return_type": {
-      "value": "poly16x8_t"
+      "value": "svuint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.16B"
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
       },
-      "b": {
-        "register": "Vn.16B"
+      "op2": {
+        "register": "Zop2.H[*]"
       },
-      "c": {
-        "register": "Vm.16B"
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "BSL"
+        "ADD"
+      ],
+      [
+        "MOVPRFX",
+        "ADD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vbslq_p64",
+    "SIMD_ISA": "SVE",
+    "name": "svadd[_n_u16]_x",
     "arguments": [
-      "poly64x2_t a",
-      "poly64x2_t b",
-      "poly64x2_t c"
+      "svbool_t pg",
+      "svuint16_t op1",
+      "uint16_t op2"
     ],
     "return_type": {
-      "value": "poly64x2_t"
+      "value": "svuint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.16B"
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
       },
-      "b": {
-        "register": "Vn.16B"
+      "op2": {
+        "register": "Zop2.H[*]|Ztied2.H[*]"
       },
-      "c": {
-        "register": "Vm.16B"
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "BSL"
+        "ADD"
+      ],
+      [
+        "SUB"
+      ],
+      [
+        "ADD"
+      ],
+      [
+        "ADD"
+      ],
+      [
+        "ADD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vbslq_p8",
+    "SIMD_ISA": "SVE",
+    "name": "svadd[_n_u16]_z",
     "arguments": [
-      "uint8x16_t a",
-      "poly8x16_t b",
-      "poly8x16_t c"
+      "svbool_t pg",
+      "svuint16_t op1",
+      "uint16_t op2"
     ],
     "return_type": {
-      "value": "poly8x16_t"
+      "value": "svuint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.16B"
+      "op1": {
+        "register": "Zop1.H"
       },
-      "b": {
-        "register": "Vn.16B"
+      "op2": {
+        "register": "Zop2.H[*]"
       },
-      "c": {
-        "register": "Vm.16B"
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "BSL"
+        "MOVPRFX",
+        "ADD"
+      ],
+      [
+        "MOVPRFX",
+        "ADD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vbslq_s16",
+    "SIMD_ISA": "SVE",
+    "name": "svadd[_n_u32]_m",
     "arguments": [
-      "uint16x8_t a",
-      "int16x8_t b",
-      "int16x8_t c"
+      "svbool_t pg",
+      "svuint32_t op1",
+      "uint32_t op2"
     ],
     "return_type": {
-      "value": "int16x8_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.16B"
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
       },
-      "b": {
-        "register": "Vn.16B"
+      "op2": {
+        "register": "Zop2.S[*]"
       },
-      "c": {
-        "register": "Vm.16B"
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "BSL"
+        "ADD"
+      ],
+      [
+        "MOVPRFX",
+        "ADD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vbslq_s32",
+    "SIMD_ISA": "SVE",
+    "name": "svadd[_n_u32]_x",
     "arguments": [
-      "uint32x4_t a",
-      "int32x4_t b",
-      "int32x4_t c"
+      "svbool_t pg",
+      "svuint32_t op1",
+      "uint32_t op2"
     ],
     "return_type": {
-      "value": "int32x4_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.16B"
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
       },
-      "b": {
-        "register": "Vn.16B"
+      "op2": {
+        "register": "Zop2.S[*]|Ztied2.S[*]"
       },
-      "c": {
-        "register": "Vm.16B"
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "BSL"
+        "ADD"
+      ],
+      [
+        "SUB"
+      ],
+      [
+        "ADD"
+      ],
+      [
+        "ADD"
+      ],
+      [
+        "ADD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vbslq_s64",
+    "SIMD_ISA": "SVE",
+    "name": "svadd[_n_u32]_z",
     "arguments": [
-      "uint64x2_t a",
-      "int64x2_t b",
-      "int64x2_t c"
+      "svbool_t pg",
+      "svuint32_t op1",
+      "uint32_t op2"
     ],
     "return_type": {
-      "value": "int64x2_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.16B"
+      "op1": {
+        "register": "Zop1.S"
       },
-      "b": {
-        "register": "Vn.16B"
+      "op2": {
+        "register": "Zop2.S[*]"
       },
-      "c": {
-        "register": "Vm.16B"
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "BSL"
+        "MOVPRFX",
+        "ADD"
+      ],
+      [
+        "MOVPRFX",
+        "ADD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vbslq_s8",
+    "SIMD_ISA": "SVE",
+    "name": "svadd[_n_u64]_m",
     "arguments": [
-      "uint8x16_t a",
-      "int8x16_t b",
-      "int8x16_t c"
+      "svbool_t pg",
+      "svuint64_t op1",
+      "uint64_t op2"
     ],
     "return_type": {
-      "value": "int8x16_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.16B"
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
       },
-      "b": {
-        "register": "Vn.16B"
+      "op2": {
+        "register": "Zop2.D[*]"
       },
-      "c": {
-        "register": "Vm.16B"
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "BSL"
+        "ADD"
+      ],
+      [
+        "MOVPRFX",
+        "ADD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vbslq_u16",
+    "SIMD_ISA": "SVE",
+    "name": "svadd[_n_u64]_x",
     "arguments": [
-      "uint16x8_t a",
-      "uint16x8_t b",
-      "uint16x8_t c"
+      "svbool_t pg",
+      "svuint64_t op1",
+      "uint64_t op2"
     ],
     "return_type": {
-      "value": "uint16x8_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.16B"
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
       },
-      "b": {
-        "register": "Vn.16B"
+      "op2": {
+        "register": "Zop2.D[*]|Ztied2.D[*]"
       },
-      "c": {
-        "register": "Vm.16B"
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "BSL"
+        "ADD"
+      ],
+      [
+        "SUB"
+      ],
+      [
+        "ADD"
+      ],
+      [
+        "ADD"
+      ],
+      [
+        "ADD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vbslq_u32",
+    "SIMD_ISA": "SVE",
+    "name": "svadd[_n_u64]_z",
     "arguments": [
-      "uint32x4_t a",
-      "uint32x4_t b",
-      "uint32x4_t c"
+      "svbool_t pg",
+      "svuint64_t op1",
+      "uint64_t op2"
     ],
     "return_type": {
-      "value": "uint32x4_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.16B"
+      "op1": {
+        "register": "Zop1.D"
       },
-      "b": {
-        "register": "Vn.16B"
+      "op2": {
+        "register": "Zop2.D[*]"
       },
-      "c": {
-        "register": "Vm.16B"
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "BSL"
+        "MOVPRFX",
+        "ADD"
+      ],
+      [
+        "MOVPRFX",
+        "ADD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vbslq_u64",
+    "SIMD_ISA": "SVE",
+    "name": "svadd[_n_u8]_m",
     "arguments": [
-      "uint64x2_t a",
-      "uint64x2_t b",
-      "uint64x2_t c"
+      "svbool_t pg",
+      "svuint8_t op1",
+      "uint8_t op2"
     ],
     "return_type": {
-      "value": "uint64x2_t"
+      "value": "svuint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.16B"
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
       },
-      "b": {
-        "register": "Vn.16B"
+      "op2": {
+        "register": "Zop2.B[*]"
       },
-      "c": {
-        "register": "Vm.16B"
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "BSL"
+        "ADD"
+      ],
+      [
+        "MOVPRFX",
+        "ADD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vbslq_u8",
+    "SIMD_ISA": "SVE",
+    "name": "svadd[_n_u8]_x",
     "arguments": [
-      "uint8x16_t a",
-      "uint8x16_t b",
-      "uint8x16_t c"
+      "svbool_t pg",
+      "svuint8_t op1",
+      "uint8_t op2"
     ],
     "return_type": {
-      "value": "uint8x16_t"
+      "value": "svuint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.16B"
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
       },
-      "b": {
-        "register": "Vn.16B"
+      "op2": {
+        "register": "Zop2.B[*]|Ztied2.B[*]"
       },
-      "c": {
-        "register": "Vm.16B"
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "BSL"
+        "ADD"
+      ],
+      [
+        "SUB"
+      ],
+      [
+        "ADD"
+      ],
+      [
+        "ADD"
+      ],
+      [
+        "ADD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcadd_rot270_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svadd[_n_u8]_z",
     "arguments": [
-      "float16x4_t a",
-      "float16x4_t b"
+      "svbool_t pg",
+      "svuint8_t op1",
+      "uint8_t op2"
     ],
     "return_type": {
-      "value": "float16x4_t"
+      "value": "svuint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4H "
+      "op1": {
+        "register": "Zop1.B"
       },
-      "b": {
-        "register": "Vm.4H"
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCADD"
+        "MOVPRFX",
+        "ADD"
+      ],
+      [
+        "MOVPRFX",
+        "ADD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcadd_rot270_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svadd[_s16]_m",
     "arguments": [
-      "float32x2_t a",
-      "float32x2_t b"
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2"
     ],
     "return_type": {
-      "value": "float32x2_t"
+      "value": "svint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2S "
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
       },
-      "b": {
-        "register": "Vm.2S"
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCADD"
+        "ADD"
+      ],
+      [
+        "MOVPRFX",
+        "ADD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcadd_rot90_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svadd[_s16]_x",
     "arguments": [
-      "float16x4_t a",
-      "float16x4_t b"
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2"
     ],
     "return_type": {
-      "value": "float16x4_t"
+      "value": "svint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4H "
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
       },
-      "b": {
-        "register": "Vm.4H"
+      "op2": {
+        "register": "Zop2.H|Ztied2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCADD"
+        "ADD"
+      ],
+      [
+        "ADD"
+      ],
+      [
+        "ADD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcadd_rot90_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svadd[_s16]_z",
     "arguments": [
-      "float32x2_t a",
-      "float32x2_t b"
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2"
     ],
     "return_type": {
-      "value": "float32x2_t"
+      "value": "svint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2S "
+      "op1": {
+        "register": "Zop1.H"
       },
-      "b": {
-        "register": "Vm.2S"
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCADD"
+        "MOVPRFX",
+        "ADD"
+      ],
+      [
+        "MOVPRFX",
+        "ADD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcaddq_rot270_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svadd[_s32]_m",
     "arguments": [
-      "float16x8_t a",
-      "float16x8_t b"
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2"
     ],
     "return_type": {
-      "value": "float16x8_t"
+      "value": "svint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8H "
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
       },
-      "b": {
-        "register": "Vm.8H"
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCADD"
+        "ADD"
+      ],
+      [
+        "MOVPRFX",
+        "ADD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcaddq_rot270_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svadd[_s32]_x",
     "arguments": [
-      "float32x4_t a",
-      "float32x4_t b"
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2"
     ],
     "return_type": {
-      "value": "float32x4_t"
+      "value": "svint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4S "
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
       },
-      "b": {
-        "register": "Vm.4S"
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCADD"
+        "ADD"
+      ],
+      [
+        "ADD"
+      ],
+      [
+        "ADD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcaddq_rot270_f64",
+    "SIMD_ISA": "SVE",
+    "name": "svadd[_s32]_z",
     "arguments": [
-      "float64x2_t a",
-      "float64x2_t b"
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2"
     ],
     "return_type": {
-      "value": "float64x2_t"
+      "value": "svint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2D "
+      "op1": {
+        "register": "Zop1.S"
       },
-      "b": {
-        "register": "Vm.2D"
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
@@ -7297,82 +8802,107 @@
     ],
     "instructions": [
       [
-        "FCADD"
+        "MOVPRFX",
+        "ADD"
+      ],
+      [
+        "MOVPRFX",
+        "ADD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcaddq_rot90_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svadd[_s64]_m",
     "arguments": [
-      "float16x8_t a",
-      "float16x8_t b"
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2"
     ],
     "return_type": {
-      "value": "float16x8_t"
+      "value": "svint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8H "
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
       },
-      "b": {
-        "register": "Vm.8H"
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCADD"
+        "ADD"
+      ],
+      [
+        "MOVPRFX",
+        "ADD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcaddq_rot90_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svadd[_s64]_x",
     "arguments": [
-      "float32x4_t a",
-      "float32x4_t b"
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2"
     ],
     "return_type": {
-      "value": "float32x4_t"
+      "value": "svint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4S "
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
       },
-      "b": {
-        "register": "Vm.4S"
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCADD"
+        "ADD"
+      ],
+      [
+        "ADD"
+      ],
+      [
+        "ADD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcaddq_rot90_f64",
+    "SIMD_ISA": "SVE",
+    "name": "svadd[_s64]_z",
     "arguments": [
-      "float64x2_t a",
-      "float64x2_t b"
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2"
     ],
     "return_type": {
-      "value": "float64x2_t"
+      "value": "svint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2D "
+      "op1": {
+        "register": "Zop1.D"
       },
-      "b": {
-        "register": "Vm.2D"
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
@@ -7380,83 +8910,107 @@
     ],
     "instructions": [
       [
-        "FCADD"
+        "MOVPRFX",
+        "ADD"
+      ],
+      [
+        "MOVPRFX",
+        "ADD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcage_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svadd[_s8]_m",
     "arguments": [
-      "float16x4_t a",
-      "float16x4_t b"
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2"
     ],
     "return_type": {
-      "value": "uint16x4_t"
+      "value": "svint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4H"
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
       },
-      "b": {
-        "register": "Vm.4H"
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FACGE"
+        "ADD"
+      ],
+      [
+        "MOVPRFX",
+        "ADD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcage_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svadd[_s8]_x",
     "arguments": [
-      "float32x2_t a",
-      "float32x2_t b"
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2"
     ],
     "return_type": {
-      "value": "uint32x2_t"
+      "value": "svint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2S"
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
       },
-      "b": {
-        "register": "Vm.2S"
+      "op2": {
+        "register": "Zop2.B|Ztied2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FACGE"
+        "ADD"
+      ],
+      [
+        "ADD"
+      ],
+      [
+        "ADD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcage_f64",
+    "SIMD_ISA": "SVE",
+    "name": "svadd[_s8]_z",
     "arguments": [
-      "float64x1_t a",
-      "float64x1_t b"
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2"
     ],
     "return_type": {
-      "value": "uint64x1_t"
+      "value": "svint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "op1": {
+        "register": "Zop1.B"
       },
-      "b": {
-        "register": "Dm"
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
@@ -7464,26 +9018,35 @@
     ],
     "instructions": [
       [
-        "FACGE"
+        "MOVPRFX",
+        "ADD"
+      ],
+      [
+        "MOVPRFX",
+        "ADD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcaged_f64",
+    "SIMD_ISA": "SVE",
+    "name": "svadd[_u16]_m",
     "arguments": [
-      "float64_t a",
-      "float64_t b"
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2"
     ],
     "return_type": {
-      "value": "uint64_t"
+      "value": "svuint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
       },
-      "b": {
-        "register": "Dm"
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
@@ -7491,26 +9054,34 @@
     ],
     "instructions": [
       [
-        "FACGE"
+        "ADD"
+      ],
+      [
+        "MOVPRFX",
+        "ADD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcageh_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svadd[_u16]_x",
     "arguments": [
-      "float16_t a",
-      "float16_t b"
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2"
     ],
     "return_type": {
-      "value": "uint16_t"
+      "value": "svuint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Hn"
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
       },
-      "b": {
-        "register": "Hm"
+      "op2": {
+        "register": "Zop2.H|Ztied2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
@@ -7518,83 +9089,107 @@
     ],
     "instructions": [
       [
-        "FACGE"
+        "ADD"
+      ],
+      [
+        "ADD"
+      ],
+      [
+        "ADD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcageq_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svadd[_u16]_z",
     "arguments": [
-      "float16x8_t a",
-      "float16x8_t b"
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2"
     ],
     "return_type": {
-      "value": "uint16x8_t"
+      "value": "svuint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8H"
+      "op1": {
+        "register": "Zop1.H"
       },
-      "b": {
-        "register": "Vm.8H"
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FACGE"
+        "MOVPRFX",
+        "ADD"
+      ],
+      [
+        "MOVPRFX",
+        "ADD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcageq_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svadd[_u32]_m",
     "arguments": [
-      "float32x4_t a",
-      "float32x4_t b"
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2"
     ],
     "return_type": {
-      "value": "uint32x4_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4S"
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
       },
-      "b": {
-        "register": "Vm.4S"
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FACGE"
+        "ADD"
+      ],
+      [
+        "MOVPRFX",
+        "ADD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcageq_f64",
+    "SIMD_ISA": "SVE",
+    "name": "svadd[_u32]_x",
     "arguments": [
-      "float64x2_t a",
-      "float64x2_t b"
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2"
     ],
     "return_type": {
-      "value": "uint64x2_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2D"
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
       },
-      "b": {
-        "register": "Vm.2D"
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
@@ -7602,26 +9197,36 @@
     ],
     "instructions": [
       [
-        "FACGE"
+        "ADD"
+      ],
+      [
+        "ADD"
+      ],
+      [
+        "ADD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcages_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svadd[_u32]_z",
     "arguments": [
-      "float32_t a",
-      "float32_t b"
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2"
     ],
     "return_type": {
-      "value": "uint32_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Sn"
+      "op1": {
+        "register": "Zop1.S"
       },
-      "b": {
-        "register": "Sm"
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
@@ -7629,83 +9234,107 @@
     ],
     "instructions": [
       [
-        "FACGE"
+        "MOVPRFX",
+        "ADD"
+      ],
+      [
+        "MOVPRFX",
+        "ADD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcagt_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svadd[_u64]_m",
     "arguments": [
-      "float16x4_t a",
-      "float16x4_t b"
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2"
     ],
     "return_type": {
-      "value": "uint16x4_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4H"
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
       },
-      "b": {
-        "register": "Vm.4H"
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FACGT"
+        "ADD"
+      ],
+      [
+        "MOVPRFX",
+        "ADD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcagt_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svadd[_u64]_x",
     "arguments": [
-      "float32x2_t a",
-      "float32x2_t b"
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2"
     ],
     "return_type": {
-      "value": "uint32x2_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2S"
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
       },
-      "b": {
-        "register": "Vm.2S"
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FACGT"
+        "ADD"
+      ],
+      [
+        "ADD"
+      ],
+      [
+        "ADD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcagt_f64",
+    "SIMD_ISA": "SVE",
+    "name": "svadd[_u64]_z",
     "arguments": [
-      "float64x1_t a",
-      "float64x1_t b"
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2"
     ],
     "return_type": {
-      "value": "uint64x1_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "op1": {
+        "register": "Zop1.D"
       },
-      "b": {
-        "register": "Dm"
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
@@ -7713,26 +9342,35 @@
     ],
     "instructions": [
       [
-        "FACGT"
+        "MOVPRFX",
+        "ADD"
+      ],
+      [
+        "MOVPRFX",
+        "ADD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcagtd_f64",
+    "SIMD_ISA": "SVE",
+    "name": "svadd[_u8]_m",
     "arguments": [
-      "float64_t a",
-      "float64_t b"
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2"
     ],
     "return_type": {
-      "value": "uint64_t"
+      "value": "svuint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
       },
-      "b": {
-        "register": "Dm"
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
@@ -7740,26 +9378,34 @@
     ],
     "instructions": [
       [
-        "FACGT"
+        "ADD"
+      ],
+      [
+        "MOVPRFX",
+        "ADD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcagth_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svadd[_u8]_x",
     "arguments": [
-      "float16_t a",
-      "float16_t b"
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2"
     ],
     "return_type": {
-      "value": "uint16_t"
+      "value": "svuint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Hn"
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
       },
-      "b": {
-        "register": "Hm"
+      "op2": {
+        "register": "Zop2.B|Ztied2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
@@ -7767,83 +9413,103 @@
     ],
     "instructions": [
       [
-        "FACGT"
+        "ADD"
+      ],
+      [
+        "ADD"
+      ],
+      [
+        "ADD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcagtq_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svadd[_u8]_z",
     "arguments": [
-      "float16x8_t a",
-      "float16x8_t b"
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2"
     ],
     "return_type": {
-      "value": "uint16x8_t"
+      "value": "svuint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8H"
+      "op1": {
+        "register": "Zop1.B"
       },
-      "b": {
-        "register": "Vm.8H"
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FACGT"
+        "MOVPRFX",
+        "ADD"
+      ],
+      [
+        "MOVPRFX",
+        "ADD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcagtq_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svadda[_f16]",
     "arguments": [
-      "float32x4_t a",
-      "float32x4_t b"
+      "svbool_t pg",
+      "float16_t initial",
+      "svfloat16_t op"
     ],
     "return_type": {
-      "value": "uint32x4_t"
+      "value": "float16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4S"
+      "initial": {
+        "register": "Htied"
       },
-      "b": {
-        "register": "Vm.4S"
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FACGT"
+        "FADDA"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcagtq_f64",
+    "SIMD_ISA": "SVE",
+    "name": "svadda[_f32]",
     "arguments": [
-      "float64x2_t a",
-      "float64x2_t b"
+      "svbool_t pg",
+      "float32_t initial",
+      "svfloat32_t op"
     ],
     "return_type": {
-      "value": "uint64x2_t"
+      "value": "float32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2D"
+      "initial": {
+        "register": "Stied"
       },
-      "b": {
-        "register": "Vm.2D"
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
@@ -7851,26 +9517,30 @@
     ],
     "instructions": [
       [
-        "FACGT"
+        "FADDA"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcagts_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svadda[_f64]",
     "arguments": [
-      "float32_t a",
-      "float32_t b"
+      "svbool_t pg",
+      "float64_t initial",
+      "svfloat64_t op"
     ],
     "return_type": {
-      "value": "uint32_t"
+      "value": "float64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Sn"
+      "initial": {
+        "register": "Dtied"
       },
-      "b": {
-        "register": "Sm"
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
@@ -7878,83 +9548,80 @@
     ],
     "instructions": [
       [
-        "FACGT"
+        "FADDA"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcale_f16",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddhnb[_n_s16]",
     "arguments": [
-      "float16x4_t a",
-      "float16x4_t b"
+      "svint16_t op1",
+      "int16_t op2"
     ],
     "return_type": {
-      "value": "uint16x4_t"
+      "value": "svint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4H"
+      "op1": {
+        "register": "Zop1.H"
       },
-      "b": {
-        "register": "Vm.4H"
+      "op2": {
+        "register": "Zop2.H[*]"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FACGE"
+        "ADDHNB"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcale_f32",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddhnb[_n_s32]",
     "arguments": [
-      "float32x2_t a",
-      "float32x2_t b"
+      "svint32_t op1",
+      "int32_t op2"
     ],
     "return_type": {
-      "value": "uint32x2_t"
+      "value": "svint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2S"
+      "op1": {
+        "register": "Zop1.S"
       },
-      "b": {
-        "register": "Vm.2S"
+      "op2": {
+        "register": "Zop2.S[*]"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FACGE"
+        "ADDHNB"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcale_f64",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddhnb[_n_s64]",
     "arguments": [
-      "float64x1_t a",
-      "float64x1_t b"
+      "svint64_t op1",
+      "int64_t op2"
     ],
     "return_type": {
-      "value": "uint64x1_t"
+      "value": "svint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "op1": {
+        "register": "Zop1.D"
       },
-      "b": {
-        "register": "Dm"
+      "op2": {
+        "register": "Zop2.D[*]"
       }
     },
     "Architectures": [
@@ -7962,26 +9629,26 @@
     ],
     "instructions": [
       [
-        "FACGE"
+        "ADDHNB"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcaled_f64",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddhnb[_n_u16]",
     "arguments": [
-      "float64_t a",
-      "float64_t b"
+      "svuint16_t op1",
+      "uint16_t op2"
     ],
     "return_type": {
-      "value": "uint64_t"
+      "value": "svuint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "op1": {
+        "register": "Zop1.H"
       },
-      "b": {
-        "register": "Dm"
+      "op2": {
+        "register": "Zop2.H[*]"
       }
     },
     "Architectures": [
@@ -7989,26 +9656,26 @@
     ],
     "instructions": [
       [
-        "FACGE"
+        "ADDHNB"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcaleh_f16",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddhnb[_n_u32]",
     "arguments": [
-      "float16_t a",
-      "float16_t b"
+      "svuint32_t op1",
+      "uint32_t op2"
     ],
     "return_type": {
-      "value": "uint16_t"
+      "value": "svuint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Hn"
+      "op1": {
+        "register": "Zop1.S"
       },
-      "b": {
-        "register": "Hm"
+      "op2": {
+        "register": "Zop2.S[*]"
       }
     },
     "Architectures": [
@@ -8016,83 +9683,80 @@
     ],
     "instructions": [
       [
-        "FACGE"
+        "ADDHNB"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcaleq_f16",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddhnb[_n_u64]",
     "arguments": [
-      "float16x8_t a",
-      "float16x8_t b"
+      "svuint64_t op1",
+      "uint64_t op2"
     ],
     "return_type": {
-      "value": "uint16x8_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8H"
+      "op1": {
+        "register": "Zop1.D"
       },
-      "b": {
-        "register": "Vm.8H"
+      "op2": {
+        "register": "Zop2.D[*]"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FACGE"
+        "ADDHNB"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcaleq_f32",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddhnb[_s16]",
     "arguments": [
-      "float32x4_t a",
-      "float32x4_t b"
+      "svint16_t op1",
+      "svint16_t op2"
     ],
     "return_type": {
-      "value": "uint32x4_t"
+      "value": "svint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4S"
+      "op1": {
+        "register": "Zop1.H"
       },
-      "b": {
-        "register": "Vm.4S"
+      "op2": {
+        "register": "Zop2.H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FACGE"
+        "ADDHNB"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcaleq_f64",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddhnb[_s32]",
     "arguments": [
-      "float64x2_t a",
-      "float64x2_t b"
+      "svint32_t op1",
+      "svint32_t op2"
     ],
     "return_type": {
-      "value": "uint64x2_t"
+      "value": "svint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2D"
+      "op1": {
+        "register": "Zop1.S"
       },
-      "b": {
-        "register": "Vm.2D"
+      "op2": {
+        "register": "Zop2.S"
       }
     },
     "Architectures": [
@@ -8100,26 +9764,26 @@
     ],
     "instructions": [
       [
-        "FACGE"
+        "ADDHNB"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcales_f32",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddhnb[_s64]",
     "arguments": [
-      "float32_t a",
-      "float32_t b"
+      "svint64_t op1",
+      "svint64_t op2"
     ],
     "return_type": {
-      "value": "uint32_t"
+      "value": "svint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Sn"
+      "op1": {
+        "register": "Zop1.D"
       },
-      "b": {
-        "register": "Sm"
+      "op2": {
+        "register": "Zop2.D"
       }
     },
     "Architectures": [
@@ -8127,83 +9791,80 @@
     ],
     "instructions": [
       [
-        "FACGE"
+        "ADDHNB"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcalt_f16",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddhnb[_u16]",
     "arguments": [
-      "float16x4_t a",
-      "float16x4_t b"
+      "svuint16_t op1",
+      "svuint16_t op2"
     ],
     "return_type": {
-      "value": "uint16x4_t"
+      "value": "svuint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4H"
+      "op1": {
+        "register": "Zop1.H"
       },
-      "b": {
-        "register": "Vm.4H"
+      "op2": {
+        "register": "Zop2.H"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FACGT"
+        "ADDHNB"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcalt_f32",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddhnb[_u32]",
     "arguments": [
-      "float32x2_t a",
-      "float32x2_t b"
+      "svuint32_t op1",
+      "svuint32_t op2"
     ],
     "return_type": {
-      "value": "uint32x2_t"
+      "value": "svuint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2S"
+      "op1": {
+        "register": "Zop1.S"
       },
-      "b": {
-        "register": "Vm.2S"
+      "op2": {
+        "register": "Zop2.S"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FACGT"
+        "ADDHNB"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcalt_f64",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddhnb[_u64]",
     "arguments": [
-      "float64x1_t a",
-      "float64x1_t b"
+      "svuint64_t op1",
+      "svuint64_t op2"
     ],
     "return_type": {
-      "value": "uint64x1_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "op1": {
+        "register": "Zop1.D"
       },
-      "b": {
-        "register": "Dm"
+      "op2": {
+        "register": "Zop2.D"
       }
     },
     "Architectures": [
@@ -8211,26 +9872,30 @@
     ],
     "instructions": [
       [
-        "FACGT"
+        "ADDHNB"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcaltd_f64",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddhnt[_n_s16]",
     "arguments": [
-      "float64_t a",
-      "float64_t b"
+      "svint8_t even",
+      "svint16_t op1",
+      "int16_t op2"
     ],
     "return_type": {
-      "value": "uint64_t"
+      "value": "svint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "even": {
+        "register": "Ztied.B"
       },
-      "b": {
-        "register": "Dm"
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
       }
     },
     "Architectures": [
@@ -8238,26 +9903,30 @@
     ],
     "instructions": [
       [
-        "FACGT"
+        "ADDHNT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcalth_f16",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddhnt[_n_s32]",
     "arguments": [
-      "float16_t a",
-      "float16_t b"
+      "svint16_t even",
+      "svint32_t op1",
+      "int32_t op2"
     ],
     "return_type": {
-      "value": "uint16_t"
+      "value": "svint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Hn"
+      "even": {
+        "register": "Ztied.H"
       },
-      "b": {
-        "register": "Hm"
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
       }
     },
     "Architectures": [
@@ -8265,83 +9934,92 @@
     ],
     "instructions": [
       [
-        "FACGT"
+        "ADDHNT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcaltq_f16",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddhnt[_n_s64]",
     "arguments": [
-      "float16x8_t a",
-      "float16x8_t b"
+      "svint32_t even",
+      "svint64_t op1",
+      "int64_t op2"
     ],
     "return_type": {
-      "value": "uint16x8_t"
+      "value": "svint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8H"
+      "even": {
+        "register": "Ztied.S"
       },
-      "b": {
-        "register": "Vm.8H"
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FACGT"
+        "ADDHNT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcaltq_f32",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddhnt[_n_u16]",
     "arguments": [
-      "float32x4_t a",
-      "float32x4_t b"
+      "svuint8_t even",
+      "svuint16_t op1",
+      "uint16_t op2"
     ],
     "return_type": {
-      "value": "uint32x4_t"
+      "value": "svuint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4S"
+      "even": {
+        "register": "Ztied.B"
       },
-      "b": {
-        "register": "Vm.4S"
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FACGT"
+        "ADDHNT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcaltq_f64",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddhnt[_n_u32]",
     "arguments": [
-      "float64x2_t a",
-      "float64x2_t b"
+      "svuint16_t even",
+      "svuint32_t op1",
+      "uint32_t op2"
     ],
     "return_type": {
-      "value": "uint64x2_t"
+      "value": "svuint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2D"
+      "even": {
+        "register": "Ztied.H"
       },
-      "b": {
-        "register": "Vm.2D"
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
       }
     },
     "Architectures": [
@@ -8349,26 +10027,30 @@
     ],
     "instructions": [
       [
-        "FACGT"
+        "ADDHNT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcalts_f32",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddhnt[_n_u64]",
     "arguments": [
-      "float32_t a",
-      "float32_t b"
+      "svuint32_t even",
+      "svuint64_t op1",
+      "uint64_t op2"
     ],
     "return_type": {
-      "value": "uint32_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Sn"
+      "even": {
+        "register": "Ztied.S"
       },
-      "b": {
-        "register": "Sm"
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
       }
     },
     "Architectures": [
@@ -8376,83 +10058,92 @@
     ],
     "instructions": [
       [
-        "FACGT"
+        "ADDHNT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vceq_f16",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddhnt[_s16]",
     "arguments": [
-      "float16x4_t a",
-      "float16x4_t b"
+      "svint8_t even",
+      "svint16_t op1",
+      "svint16_t op2"
     ],
     "return_type": {
-      "value": "uint16x4_t"
+      "value": "svint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4H"
+      "even": {
+        "register": "Ztied.B"
       },
-      "b": {
-        "register": "Vm.4H"
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCMEQ"
+        "ADDHNT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vceq_f32",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddhnt[_s32]",
     "arguments": [
-      "float32x2_t a",
-      "float32x2_t b"
+      "svint16_t even",
+      "svint32_t op1",
+      "svint32_t op2"
     ],
     "return_type": {
-      "value": "uint32x2_t"
+      "value": "svint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2S"
+      "even": {
+        "register": "Ztied.H"
       },
-      "b": {
-        "register": "Vm.2S"
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCMEQ"
+        "ADDHNT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vceq_f64",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddhnt[_s64]",
     "arguments": [
-      "float64x1_t a",
-      "float64x1_t b"
+      "svint32_t even",
+      "svint64_t op1",
+      "svint64_t op2"
     ],
     "return_type": {
-      "value": "uint64x1_t"
+      "value": "svint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "even": {
+        "register": "Ztied.S"
       },
-      "b": {
-        "register": "Dm"
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
       }
     },
     "Architectures": [
@@ -8460,141 +10151,146 @@
     ],
     "instructions": [
       [
-        "FCMEQ"
+        "ADDHNT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vceq_p64",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddhnt[_u16]",
     "arguments": [
-      "poly64x1_t a",
-      "poly64x1_t b"
+      "svuint8_t even",
+      "svuint16_t op1",
+      "svuint16_t op2"
     ],
     "return_type": {
-      "value": "uint64x1_t"
+      "value": "svuint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "even": {
+        "register": "Ztied.B"
       },
-      "b": {
-        "register": "Dm"
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "CMEQ"
+        "ADDHNT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vceq_p8",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddhnt[_u32]",
     "arguments": [
-      "poly8x8_t a",
-      "poly8x8_t b"
+      "svuint16_t even",
+      "svuint32_t op1",
+      "svuint32_t op2"
     ],
     "return_type": {
-      "value": "uint8x8_t"
+      "value": "svuint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8B"
+      "even": {
+        "register": "Ztied.H"
       },
-      "b": {
-        "register": "Vm.8B"
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "CMEQ"
+        "ADDHNT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vceq_s16",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddhnt[_u64]",
     "arguments": [
-      "int16x4_t a",
-      "int16x4_t b"
+      "svuint32_t even",
+      "svuint64_t op1",
+      "svuint64_t op2"
     ],
     "return_type": {
-      "value": "uint16x4_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4H"
+      "even": {
+        "register": "Ztied.S"
       },
-      "b": {
-        "register": "Vm.4H"
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "CMEQ"
+        "ADDHNT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vceq_s32",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddlb[_n_s16]",
     "arguments": [
-      "int32x2_t a",
-      "int32x2_t b"
+      "svint8_t op1",
+      "int8_t op2"
     ],
     "return_type": {
-      "value": "uint32x2_t"
+      "value": "svint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2S"
+      "op1": {
+        "register": "Zop1.B"
       },
-      "b": {
-        "register": "Vm.2S"
+      "op2": {
+        "register": "Zop2.B[*]"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "CMEQ"
+        "SADDLB"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vceq_s64",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddlb[_n_s32]",
     "arguments": [
-      "int64x1_t a",
-      "int64x1_t b"
+      "svint16_t op1",
+      "int16_t op2"
     ],
     "return_type": {
-      "value": "uint64x1_t"
+      "value": "svint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "op1": {
+        "register": "Zop1.H"
       },
-      "b": {
-        "register": "Dm"
+      "op2": {
+        "register": "Zop2.H[*]"
       }
     },
     "Architectures": [
@@ -8602,113 +10298,107 @@
     ],
     "instructions": [
       [
-        "CMEQ"
+        "SADDLB"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vceq_s8",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddlb[_n_s64]",
     "arguments": [
-      "int8x8_t a",
-      "int8x8_t b"
+      "svint32_t op1",
+      "int32_t op2"
     ],
     "return_type": {
-      "value": "uint8x8_t"
+      "value": "svint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8B"
+      "op1": {
+        "register": "Zop1.S"
       },
-      "b": {
-        "register": "Vm.8B"
+      "op2": {
+        "register": "Zop2.S[*]"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "CMEQ"
+        "SADDLB"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vceq_u16",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddlb[_n_u16]",
     "arguments": [
-      "uint16x4_t a",
-      "uint16x4_t b"
+      "svuint8_t op1",
+      "uint8_t op2"
     ],
     "return_type": {
-      "value": "uint16x4_t"
+      "value": "svuint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4H"
+      "op1": {
+        "register": "Zop1.B"
       },
-      "b": {
-        "register": "Vm.4H"
+      "op2": {
+        "register": "Zop2.B[*]"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "CMEQ"
+        "UADDLB"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vceq_u32",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddlb[_n_u32]",
     "arguments": [
-      "uint32x2_t a",
-      "uint32x2_t b"
+      "svuint16_t op1",
+      "uint16_t op2"
     ],
     "return_type": {
-      "value": "uint32x2_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2S"
+      "op1": {
+        "register": "Zop1.H"
       },
-      "b": {
-        "register": "Vm.2S"
+      "op2": {
+        "register": "Zop2.H[*]"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "CMEQ"
+        "UADDLB"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vceq_u64",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddlb[_n_u64]",
     "arguments": [
-      "uint64x1_t a",
-      "uint64x1_t b"
+      "svuint32_t op1",
+      "uint32_t op2"
     ],
     "return_type": {
-      "value": "uint64x1_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "op1": {
+        "register": "Zop1.S"
       },
-      "b": {
-        "register": "Dm"
+      "op2": {
+        "register": "Zop2.S[*]"
       }
     },
     "Architectures": [
@@ -8716,55 +10406,53 @@
     ],
     "instructions": [
       [
-        "CMEQ"
+        "UADDLB"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vceq_u8",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddlb[_s16]",
     "arguments": [
-      "uint8x8_t a",
-      "uint8x8_t b"
+      "svint8_t op1",
+      "svint8_t op2"
     ],
     "return_type": {
-      "value": "uint8x8_t"
+      "value": "svint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8B"
+      "op1": {
+        "register": "Zop1.B"
       },
-      "b": {
-        "register": "Vm.8B"
+      "op2": {
+        "register": "Zop2.B"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "CMEQ"
+        "SADDLB"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vceqd_f64",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddlb[_s32]",
     "arguments": [
-      "float64_t a",
-      "float64_t b"
+      "svint16_t op1",
+      "svint16_t op2"
     ],
     "return_type": {
-      "value": "uint64_t"
+      "value": "svint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "op1": {
+        "register": "Zop1.H"
       },
-      "b": {
-        "register": "Dm"
+      "op2": {
+        "register": "Zop2.H"
       }
     },
     "Architectures": [
@@ -8772,26 +10460,26 @@
     ],
     "instructions": [
       [
-        "FCMEQ"
+        "SADDLB"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vceqd_s64",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddlb[_s64]",
     "arguments": [
-      "int64_t a",
-      "int64_t b"
+      "svint32_t op1",
+      "svint32_t op2"
     ],
     "return_type": {
-      "value": "uint64_t"
+      "value": "svint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "op1": {
+        "register": "Zop1.S"
       },
-      "b": {
-        "register": "Dm"
+      "op2": {
+        "register": "Zop2.S"
       }
     },
     "Architectures": [
@@ -8799,26 +10487,26 @@
     ],
     "instructions": [
       [
-        "CMEQ"
+        "SADDLB"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vceqd_u64",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddlb[_u16]",
     "arguments": [
-      "uint64_t a",
-      "uint64_t b"
+      "svuint8_t op1",
+      "svuint8_t op2"
     ],
     "return_type": {
-      "value": "uint64_t"
+      "value": "svuint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "op1": {
+        "register": "Zop1.B"
       },
-      "b": {
-        "register": "Dm"
+      "op2": {
+        "register": "Zop2.B"
       }
     },
     "Architectures": [
@@ -8826,26 +10514,26 @@
     ],
     "instructions": [
       [
-        "CMEQ"
+        "UADDLB"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vceqh_f16",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddlb[_u32]",
     "arguments": [
-      "float16_t a",
-      "float16_t b"
+      "svuint16_t op1",
+      "svuint16_t op2"
     ],
     "return_type": {
-      "value": "uint16_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Hn"
+      "op1": {
+        "register": "Zop1.H"
       },
-      "b": {
-        "register": "Hm"
+      "op2": {
+        "register": "Zop2.H"
       }
     },
     "Architectures": [
@@ -8853,83 +10541,80 @@
     ],
     "instructions": [
       [
-        "FCMEQ"
+        "UADDLB"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vceqq_f16",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddlb[_u64]",
     "arguments": [
-      "float16x8_t a",
-      "float16x8_t b"
+      "svuint32_t op1",
+      "svuint32_t op2"
     ],
     "return_type": {
-      "value": "uint16x8_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8H"
+      "op1": {
+        "register": "Zop1.S"
       },
-      "b": {
-        "register": "Vm.8H"
+      "op2": {
+        "register": "Zop2.S"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCMEQ"
+        "UADDLB"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vceqq_f32",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddlbt[_n_s16]",
     "arguments": [
-      "float32x4_t a",
-      "float32x4_t b"
+      "svint8_t op1",
+      "int8_t op2"
     ],
     "return_type": {
-      "value": "uint32x4_t"
+      "value": "svint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4S"
+      "op1": {
+        "register": "Zop1.B"
       },
-      "b": {
-        "register": "Vm.4S"
+      "op2": {
+        "register": "Zop2.B[*]"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCMEQ"
+        "SADDLBT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vceqq_f64",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddlbt[_n_s32]",
     "arguments": [
-      "float64x2_t a",
-      "float64x2_t b"
+      "svint16_t op1",
+      "int16_t op2"
     ],
     "return_type": {
-      "value": "uint64x2_t"
+      "value": "svint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2D"
+      "op1": {
+        "register": "Zop1.H"
       },
-      "b": {
-        "register": "Vm.2D"
+      "op2": {
+        "register": "Zop2.H[*]"
       }
     },
     "Architectures": [
@@ -8937,141 +10622,134 @@
     ],
     "instructions": [
       [
-        "FCMEQ"
+        "SADDLBT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vceqq_p64",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddlbt[_n_s64]",
     "arguments": [
-      "poly64x2_t a",
-      "poly64x2_t b"
+      "svint32_t op1",
+      "int32_t op2"
     ],
     "return_type": {
-      "value": "uint64x2_t"
+      "value": "svint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2D"
+      "op1": {
+        "register": "Zop1.S"
       },
-      "b": {
-        "register": "Vm.2D"
+      "op2": {
+        "register": "Zop2.S[*]"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "CMEQ"
+        "SADDLBT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vceqq_p8",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddlbt[_s16]",
     "arguments": [
-      "poly8x16_t a",
-      "poly8x16_t b"
+      "svint8_t op1",
+      "svint8_t op2"
     ],
     "return_type": {
-      "value": "uint8x16_t"
+      "value": "svint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
+      "op1": {
+        "register": "Zop1.B"
       },
-      "b": {
-        "register": "Vm.16B"
+      "op2": {
+        "register": "Zop2.B"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "CMEQ"
+        "SADDLBT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vceqq_s16",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddlbt[_s32]",
     "arguments": [
-      "int16x8_t a",
-      "int16x8_t b"
+      "svint16_t op1",
+      "svint16_t op2"
     ],
     "return_type": {
-      "value": "uint16x8_t"
+      "value": "svint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8H"
+      "op1": {
+        "register": "Zop1.H"
       },
-      "b": {
-        "register": "Vm.8H"
+      "op2": {
+        "register": "Zop2.H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "CMEQ"
+        "SADDLBT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vceqq_s32",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddlbt[_s64]",
     "arguments": [
-      "int32x4_t a",
-      "int32x4_t b"
+      "svint32_t op1",
+      "svint32_t op2"
     ],
     "return_type": {
-      "value": "uint32x4_t"
+      "value": "svint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4S"
+      "op1": {
+        "register": "Zop1.S"
       },
-      "b": {
-        "register": "Vm.4S"
+      "op2": {
+        "register": "Zop2.S"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "CMEQ"
+        "SADDLBT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vceqq_s64",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddlt[_n_s16]",
     "arguments": [
-      "int64x2_t a",
-      "int64x2_t b"
+      "svint8_t op1",
+      "int8_t op2"
     ],
     "return_type": {
-      "value": "uint64x2_t"
+      "value": "svint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2D"
+      "op1": {
+        "register": "Zop1.B"
       },
-      "b": {
-        "register": "Vm.2D"
+      "op2": {
+        "register": "Zop2.B[*]"
       }
     },
     "Architectures": [
@@ -9079,113 +10757,107 @@
     ],
     "instructions": [
       [
-        "CMEQ"
+        "SADDLT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vceqq_s8",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddlt[_n_s32]",
     "arguments": [
-      "int8x16_t a",
-      "int8x16_t b"
+      "svint16_t op1",
+      "int16_t op2"
     ],
     "return_type": {
-      "value": "uint8x16_t"
+      "value": "svint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
+      "op1": {
+        "register": "Zop1.H"
       },
-      "b": {
-        "register": "Vm.16B"
+      "op2": {
+        "register": "Zop2.H[*]"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "CMEQ"
+        "SADDLT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vceqq_u16",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddlt[_n_s64]",
     "arguments": [
-      "uint16x8_t a",
-      "uint16x8_t b"
+      "svint32_t op1",
+      "int32_t op2"
     ],
     "return_type": {
-      "value": "uint16x8_t"
+      "value": "svint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8H"
+      "op1": {
+        "register": "Zop1.S"
       },
-      "b": {
-        "register": "Vm.8H"
+      "op2": {
+        "register": "Zop2.S[*]"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "CMEQ"
+        "SADDLT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vceqq_u32",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddlt[_n_u16]",
     "arguments": [
-      "uint32x4_t a",
-      "uint32x4_t b"
+      "svuint8_t op1",
+      "uint8_t op2"
     ],
     "return_type": {
-      "value": "uint32x4_t"
+      "value": "svuint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4S"
+      "op1": {
+        "register": "Zop1.B"
       },
-      "b": {
-        "register": "Vm.4S"
+      "op2": {
+        "register": "Zop2.B[*]"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "CMEQ"
+        "UADDLT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vceqq_u64",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddlt[_n_u32]",
     "arguments": [
-      "uint64x2_t a",
-      "uint64x2_t b"
+      "svuint16_t op1",
+      "uint16_t op2"
     ],
     "return_type": {
-      "value": "uint64x2_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2D"
+      "op1": {
+        "register": "Zop1.H"
       },
-      "b": {
-        "register": "Vm.2D"
+      "op2": {
+        "register": "Zop2.H[*]"
       }
     },
     "Architectures": [
@@ -9193,55 +10865,53 @@
     ],
     "instructions": [
       [
-        "CMEQ"
+        "UADDLT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vceqq_u8",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddlt[_n_u64]",
     "arguments": [
-      "uint8x16_t a",
-      "uint8x16_t b"
+      "svuint32_t op1",
+      "uint32_t op2"
     ],
     "return_type": {
-      "value": "uint8x16_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
+      "op1": {
+        "register": "Zop1.S"
       },
-      "b": {
-        "register": "Vm.16B"
+      "op2": {
+        "register": "Zop2.S[*]"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "CMEQ"
+        "UADDLT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vceqs_f32",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddlt[_s16]",
     "arguments": [
-      "float32_t a",
-      "float32_t b"
+      "svint8_t op1",
+      "svint8_t op2"
     ],
     "return_type": {
-      "value": "uint32_t"
+      "value": "svint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Sn"
+      "op1": {
+        "register": "Zop1.B"
       },
-      "b": {
-        "register": "Sm"
+      "op2": {
+        "register": "Zop2.B"
       }
     },
     "Architectures": [
@@ -9249,46 +10919,53 @@
     ],
     "instructions": [
       [
-        "FCMEQ"
+        "SADDLT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vceqz_f16",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddlt[_s32]",
     "arguments": [
-      "float16x4_t a"
+      "svint16_t op1",
+      "svint16_t op2"
     ],
     "return_type": {
-      "value": "uint16x4_t"
+      "value": "svint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4H"
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCMEQ"
+        "SADDLT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vceqz_f32",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddlt[_s64]",
     "arguments": [
-      "float32x2_t a"
+      "svint32_t op1",
+      "svint32_t op2"
     ],
     "return_type": {
-      "value": "uint32x2_t"
+      "value": "svint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2S"
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
       }
     },
     "Architectures": [
@@ -9296,22 +10973,26 @@
     ],
     "instructions": [
       [
-        "FCMEQ"
+        "SADDLT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vceqz_f64",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddlt[_u16]",
     "arguments": [
-      "float64x1_t a"
+      "svuint8_t op1",
+      "svuint8_t op2"
     ],
     "return_type": {
-      "value": "uint64x1_t"
+      "value": "svuint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
       }
     },
     "Architectures": [
@@ -9319,46 +11000,53 @@
     ],
     "instructions": [
       [
-        "FCMEQ"
+        "UADDLT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vceqz_p64",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddlt[_u32]",
     "arguments": [
-      "poly64x1_t a"
+      "svuint16_t op1",
+      "svuint16_t op2"
     ],
     "return_type": {
-      "value": "uint64x1_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "CMEQ"
+        "UADDLT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vceqz_p8",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddlt[_u64]",
     "arguments": [
-      "poly8x8_t a"
+      "svuint32_t op1",
+      "svuint32_t op2"
     ],
     "return_type": {
-      "value": "uint8x8_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8B"
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
       }
     },
     "Architectures": [
@@ -9366,22 +11054,30 @@
     ],
     "instructions": [
       [
-        "CMEQ"
+        "UADDLT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vceqz_s16",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddp[_f16]_m",
     "arguments": [
-      "int16x4_t a"
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2"
     ],
     "return_type": {
-      "value": "uint16x4_t"
+      "value": "svfloat16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4H"
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
@@ -9389,22 +11085,34 @@
     ],
     "instructions": [
       [
-        "CMEQ"
+        "FADDP"
+      ],
+      [
+        "MOVPRFX",
+        "FADDP"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vceqz_s32",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddp[_f16]_x",
     "arguments": [
-      "int32x2_t a"
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2"
     ],
     "return_type": {
-      "value": "uint32x2_t"
+      "value": "svfloat16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2S"
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
@@ -9412,22 +11120,34 @@
     ],
     "instructions": [
       [
-        "CMEQ"
+        "FADDP"
+      ],
+      [
+        "MOVPRFX",
+        "FADDP"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vceqz_s64",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddp[_f32]_m",
     "arguments": [
-      "int64x1_t a"
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2"
     ],
     "return_type": {
-      "value": "uint64x1_t"
+      "value": "svfloat32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
@@ -9435,22 +11155,34 @@
     ],
     "instructions": [
       [
-        "CMEQ"
+        "FADDP"
+      ],
+      [
+        "MOVPRFX",
+        "FADDP"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vceqz_s8",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddp[_f32]_x",
     "arguments": [
-      "int8x8_t a"
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2"
     ],
     "return_type": {
-      "value": "uint8x8_t"
+      "value": "svfloat32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8B"
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
@@ -9458,22 +11190,34 @@
     ],
     "instructions": [
       [
-        "CMEQ"
+        "FADDP"
+      ],
+      [
+        "MOVPRFX",
+        "FADDP"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vceqz_u16",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddp[_f64]_m",
     "arguments": [
-      "uint16x4_t a"
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2"
     ],
     "return_type": {
-      "value": "uint16x4_t"
+      "value": "svfloat64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4H"
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
@@ -9481,22 +11225,34 @@
     ],
     "instructions": [
       [
-        "CMEQ"
+        "FADDP"
+      ],
+      [
+        "MOVPRFX",
+        "FADDP"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vceqz_u32",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddp[_f64]_x",
     "arguments": [
-      "uint32x2_t a"
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2"
     ],
     "return_type": {
-      "value": "uint32x2_t"
+      "value": "svfloat64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2S"
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
@@ -9504,22 +11260,34 @@
     ],
     "instructions": [
       [
-        "CMEQ"
+        "FADDP"
+      ],
+      [
+        "MOVPRFX",
+        "FADDP"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vceqz_u64",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddp[_s16]_m",
     "arguments": [
-      "uint64x1_t a"
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2"
     ],
     "return_type": {
-      "value": "uint64x1_t"
+      "value": "svint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
@@ -9527,22 +11295,34 @@
     ],
     "instructions": [
       [
-        "CMEQ"
+        "ADDP"
+      ],
+      [
+        "MOVPRFX",
+        "ADDP"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vceqz_u8",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddp[_s16]_x",
     "arguments": [
-      "uint8x8_t a"
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2"
     ],
     "return_type": {
-      "value": "uint8x8_t"
+      "value": "svint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8B"
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
@@ -9550,22 +11330,34 @@
     ],
     "instructions": [
       [
-        "CMEQ"
+        "ADDP"
+      ],
+      [
+        "MOVPRFX",
+        "ADDP"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vceqzd_f64",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddp[_s32]_m",
     "arguments": [
-      "float64_t a"
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2"
     ],
     "return_type": {
-      "value": "uint64_t"
+      "value": "svint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
@@ -9573,22 +11365,34 @@
     ],
     "instructions": [
       [
-        "FCMEQ"
+        "ADDP"
+      ],
+      [
+        "MOVPRFX",
+        "ADDP"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vceqzd_s64",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddp[_s32]_x",
     "arguments": [
-      "int64_t a"
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2"
     ],
     "return_type": {
-      "value": "uint64_t"
+      "value": "svint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
@@ -9596,22 +11400,34 @@
     ],
     "instructions": [
       [
-        "CMEQ"
+        "ADDP"
+      ],
+      [
+        "MOVPRFX",
+        "ADDP"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vceqzd_u64",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddp[_s64]_m",
     "arguments": [
-      "uint64_t a"
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2"
     ],
     "return_type": {
-      "value": "uint64_t"
+      "value": "svint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
@@ -9619,22 +11435,34 @@
     ],
     "instructions": [
       [
-        "CMEQ"
+        "ADDP"
+      ],
+      [
+        "MOVPRFX",
+        "ADDP"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vceqzh_f16",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddp[_s64]_x",
     "arguments": [
-      "float16_t a"
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2"
     ],
     "return_type": {
-      "value": "uint16_t"
+      "value": "svint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Hn"
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
@@ -9642,46 +11470,69 @@
     ],
     "instructions": [
       [
-        "FCMEQ"
+        "ADDP"
+      ],
+      [
+        "MOVPRFX",
+        "ADDP"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vceqzq_f16",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddp[_s8]_m",
     "arguments": [
-      "float16x8_t a"
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2"
     ],
     "return_type": {
-      "value": "uint16x8_t"
+      "value": "svint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8H"
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCMEQ"
+        "ADDP"
+      ],
+      [
+        "MOVPRFX",
+        "ADDP"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vceqzq_f32",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddp[_s8]_x",
     "arguments": [
-      "float32x4_t a"
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2"
     ],
     "return_type": {
-      "value": "uint32x4_t"
+      "value": "svint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4S"
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
@@ -9689,22 +11540,34 @@
     ],
     "instructions": [
       [
-        "FCMEQ"
+        "ADDP"
+      ],
+      [
+        "MOVPRFX",
+        "ADDP"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vceqzq_f64",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddp[_u16]_m",
     "arguments": [
-      "float64x2_t a"
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2"
     ],
     "return_type": {
-      "value": "uint64x2_t"
+      "value": "svuint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2D"
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
@@ -9712,46 +11575,69 @@
     ],
     "instructions": [
       [
-        "FCMEQ"
+        "ADDP"
+      ],
+      [
+        "MOVPRFX",
+        "ADDP"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vceqzq_p64",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddp[_u16]_x",
     "arguments": [
-      "poly64x2_t a"
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2"
     ],
     "return_type": {
-      "value": "uint64x2_t"
+      "value": "svuint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2D"
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "CMEQ"
+        "ADDP"
+      ],
+      [
+        "MOVPRFX",
+        "ADDP"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vceqzq_p8",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddp[_u32]_m",
     "arguments": [
-      "poly8x16_t a"
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2"
     ],
     "return_type": {
-      "value": "uint8x16_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
@@ -9759,22 +11645,34 @@
     ],
     "instructions": [
       [
-        "CMEQ"
+        "ADDP"
+      ],
+      [
+        "MOVPRFX",
+        "ADDP"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vceqzq_s16",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddp[_u32]_x",
     "arguments": [
-      "int16x8_t a"
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2"
     ],
     "return_type": {
-      "value": "uint16x8_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8H"
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
@@ -9782,22 +11680,34 @@
     ],
     "instructions": [
       [
-        "CMEQ"
+        "ADDP"
+      ],
+      [
+        "MOVPRFX",
+        "ADDP"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vceqzq_s32",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddp[_u64]_m",
     "arguments": [
-      "int32x4_t a"
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2"
     ],
     "return_type": {
-      "value": "uint32x4_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4S"
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
@@ -9805,22 +11715,34 @@
     ],
     "instructions": [
       [
-        "CMEQ"
+        "ADDP"
+      ],
+      [
+        "MOVPRFX",
+        "ADDP"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vceqzq_s64",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddp[_u64]_x",
     "arguments": [
-      "int64x2_t a"
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2"
     ],
     "return_type": {
-      "value": "uint64x2_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2D"
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
@@ -9828,22 +11750,34 @@
     ],
     "instructions": [
       [
-        "CMEQ"
+        "ADDP"
+      ],
+      [
+        "MOVPRFX",
+        "ADDP"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vceqzq_s8",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddp[_u8]_m",
     "arguments": [
-      "int8x16_t a"
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2"
     ],
     "return_type": {
-      "value": "uint8x16_t"
+      "value": "svuint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
@@ -9851,22 +11785,34 @@
     ],
     "instructions": [
       [
-        "CMEQ"
+        "ADDP"
+      ],
+      [
+        "MOVPRFX",
+        "ADDP"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vceqzq_u16",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddp[_u8]_x",
     "arguments": [
-      "uint16x8_t a"
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2"
     ],
     "return_type": {
-      "value": "uint16x8_t"
+      "value": "svuint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8H"
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
@@ -9874,22 +11820,30 @@
     ],
     "instructions": [
       [
-        "CMEQ"
+        "ADDP"
+      ],
+      [
+        "MOVPRFX",
+        "ADDP"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vceqzq_u32",
+    "SIMD_ISA": "SVE",
+    "name": "svaddv[_f16]",
     "arguments": [
-      "uint32x4_t a"
+      "svbool_t pg",
+      "svfloat16_t op"
     ],
     "return_type": {
-      "value": "uint32x4_t"
+      "value": "float16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4S"
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
@@ -9897,22 +11851,26 @@
     ],
     "instructions": [
       [
-        "CMEQ"
+        "FADDV"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vceqzq_u64",
+    "SIMD_ISA": "SVE",
+    "name": "svaddv[_f32]",
     "arguments": [
-      "uint64x2_t a"
+      "svbool_t pg",
+      "svfloat32_t op"
     ],
     "return_type": {
-      "value": "uint64x2_t"
+      "value": "float32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2D"
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
@@ -9920,22 +11878,26 @@
     ],
     "instructions": [
       [
-        "CMEQ"
+        "FADDV"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vceqzq_u8",
+    "SIMD_ISA": "SVE",
+    "name": "svaddv[_f64]",
     "arguments": [
-      "uint8x16_t a"
+      "svbool_t pg",
+      "svfloat64_t op"
     ],
     "return_type": {
-      "value": "uint8x16_t"
+      "value": "float64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
@@ -9943,22 +11905,26 @@
     ],
     "instructions": [
       [
-        "CMEQ"
+        "FADDV"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vceqzs_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svaddv[_s16]",
     "arguments": [
-      "float32_t a"
+      "svbool_t pg",
+      "svint16_t op"
     ],
     "return_type": {
-      "value": "uint32_t"
+      "value": "int64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Sn"
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
@@ -9966,83 +11932,80 @@
     ],
     "instructions": [
       [
-        "FCMEQ"
+        "SADDV"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcge_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svaddv[_s32]",
     "arguments": [
-      "float16x4_t a",
-      "float16x4_t b"
+      "svbool_t pg",
+      "svint32_t op"
     ],
     "return_type": {
-      "value": "uint16x4_t"
+      "value": "int64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4H"
+      "op": {
+        "register": "Zop.S"
       },
-      "b": {
-        "register": "Vm.4H"
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCMGE"
+        "SADDV"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcge_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svaddv[_s64]",
     "arguments": [
-      "float32x2_t a",
-      "float32x2_t b"
+      "svbool_t pg",
+      "svint64_t op"
     ],
     "return_type": {
-      "value": "uint32x2_t"
+      "value": "int64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2S"
+      "op": {
+        "register": "Zop.D"
       },
-      "b": {
-        "register": "Vm.2S"
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCMGE"
+        "UADDV"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcge_f64",
+    "SIMD_ISA": "SVE",
+    "name": "svaddv[_s8]",
     "arguments": [
-      "float64x1_t a",
-      "float64x1_t b"
+      "svbool_t pg",
+      "svint8_t op"
     ],
     "return_type": {
-      "value": "uint64x1_t"
+      "value": "int64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "op": {
+        "register": "Zop.B"
       },
-      "b": {
-        "register": "Dm"
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
@@ -10050,84 +12013,80 @@
     ],
     "instructions": [
       [
-        "FCMGE"
+        "SADDV"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcge_s16",
+    "SIMD_ISA": "SVE",
+    "name": "svaddv[_u16]",
     "arguments": [
-      "int16x4_t a",
-      "int16x4_t b"
+      "svbool_t pg",
+      "svuint16_t op"
     ],
     "return_type": {
-      "value": "uint16x4_t"
+      "value": "uint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4H"
+      "op": {
+        "register": "Zop.H"
       },
-      "b": {
-        "register": "Vm.4H"
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "CMGE"
+        "UADDV"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcge_s32",
+    "SIMD_ISA": "SVE",
+    "name": "svaddv[_u32]",
     "arguments": [
-      "int32x2_t a",
-      "int32x2_t b"
+      "svbool_t pg",
+      "svuint32_t op"
     ],
     "return_type": {
-      "value": "uint32x2_t"
+      "value": "uint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2S"
+      "op": {
+        "register": "Zop.S"
       },
-      "b": {
-        "register": "Vm.2S"
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "CMGE"
+        "UADDV"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcge_s64",
+    "SIMD_ISA": "SVE",
+    "name": "svaddv[_u64]",
     "arguments": [
-      "int64x1_t a",
-      "int64x1_t b"
+      "svbool_t pg",
+      "svuint64_t op"
     ],
     "return_type": {
-      "value": "uint64x1_t"
+      "value": "uint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "op": {
+        "register": "Zop.D"
       },
-      "b": {
-        "register": "Dm"
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
@@ -10135,113 +12094,107 @@
     ],
     "instructions": [
       [
-        "CMGE"
+        "UADDV"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcge_s8",
+    "SIMD_ISA": "SVE",
+    "name": "svaddv[_u8]",
     "arguments": [
-      "int8x8_t a",
-      "int8x8_t b"
+      "svbool_t pg",
+      "svuint8_t op"
     ],
     "return_type": {
-      "value": "uint8x8_t"
+      "value": "uint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8B"
+      "op": {
+        "register": "Zop.B"
       },
-      "b": {
-        "register": "Vm.8B"
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "CMGE"
+        "UADDV"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcge_u16",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddwb[_n_s16]",
     "arguments": [
-      "uint16x4_t a",
-      "uint16x4_t b"
+      "svint16_t op1",
+      "int8_t op2"
     ],
     "return_type": {
-      "value": "uint16x4_t"
+      "value": "svint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4H"
+      "op1": {
+        "register": "Zop1.H"
       },
-      "b": {
-        "register": "Vm.4H"
+      "op2": {
+        "register": "Zop2.B[*]"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "CMHS"
+        "SADDWB"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcge_u32",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddwb[_n_s32]",
     "arguments": [
-      "uint32x2_t a",
-      "uint32x2_t b"
+      "svint32_t op1",
+      "int16_t op2"
     ],
     "return_type": {
-      "value": "uint32x2_t"
+      "value": "svint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2S"
+      "op1": {
+        "register": "Zop1.S"
       },
-      "b": {
-        "register": "Vm.2S"
+      "op2": {
+        "register": "Zop2.H[*]"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "CMHS"
+        "SADDWB"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcge_u64",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddwb[_n_s64]",
     "arguments": [
-      "uint64x1_t a",
-      "uint64x1_t b"
+      "svint64_t op1",
+      "int32_t op2"
     ],
     "return_type": {
-      "value": "uint64x1_t"
+      "value": "svint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "op1": {
+        "register": "Zop1.D"
       },
-      "b": {
-        "register": "Dm"
+      "op2": {
+        "register": "Zop2.S[*]"
       }
     },
     "Architectures": [
@@ -10249,55 +12202,53 @@
     ],
     "instructions": [
       [
-        "CMHS"
+        "SADDWB"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcge_u8",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddwb[_n_u16]",
     "arguments": [
-      "uint8x8_t a",
-      "uint8x8_t b"
+      "svuint16_t op1",
+      "uint8_t op2"
     ],
     "return_type": {
-      "value": "uint8x8_t"
+      "value": "svuint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8B"
+      "op1": {
+        "register": "Zop1.H"
       },
-      "b": {
-        "register": "Vm.8B"
+      "op2": {
+        "register": "Zop2.B[*]"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "CMHS"
+        "UADDWB"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcged_f64",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddwb[_n_u32]",
     "arguments": [
-      "float64_t a",
-      "float64_t b"
+      "svuint32_t op1",
+      "uint16_t op2"
     ],
     "return_type": {
-      "value": "uint64_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "op1": {
+        "register": "Zop1.S"
       },
-      "b": {
-        "register": "Dm"
+      "op2": {
+        "register": "Zop2.H[*]"
       }
     },
     "Architectures": [
@@ -10305,26 +12256,26 @@
     ],
     "instructions": [
       [
-        "FCMGE"
+        "UADDWB"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcged_s64",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddwb[_n_u64]",
     "arguments": [
-      "int64_t a",
-      "int64_t b"
+      "svuint64_t op1",
+      "uint32_t op2"
     ],
     "return_type": {
-      "value": "uint64_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "op1": {
+        "register": "Zop1.D"
       },
-      "b": {
-        "register": "Dm"
+      "op2": {
+        "register": "Zop2.S[*]"
       }
     },
     "Architectures": [
@@ -10332,26 +12283,26 @@
     ],
     "instructions": [
       [
-        "CMGE"
+        "UADDWB"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcged_u64",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddwb[_s16]",
     "arguments": [
-      "uint64_t a",
-      "uint64_t b"
+      "svint16_t op1",
+      "svint8_t op2"
     ],
     "return_type": {
-      "value": "uint64_t"
+      "value": "svint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "op1": {
+        "register": "Zop1.H"
       },
-      "b": {
-        "register": "Dm"
+      "op2": {
+        "register": "Zop2.B"
       }
     },
     "Architectures": [
@@ -10359,26 +12310,26 @@
     ],
     "instructions": [
       [
-        "CMHS"
+        "SADDWB"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcgeh_f16",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddwb[_s32]",
     "arguments": [
-      "float16_t a",
-      "float16_t b"
+      "svint32_t op1",
+      "svint16_t op2"
     ],
     "return_type": {
-      "value": "uint16_t"
+      "value": "svint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Hn"
+      "op1": {
+        "register": "Zop1.S"
       },
-      "b": {
-        "register": "Hm"
+      "op2": {
+        "register": "Zop2.H"
       }
     },
     "Architectures": [
@@ -10386,83 +12337,80 @@
     ],
     "instructions": [
       [
-        "FCMGE"
+        "SADDWB"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcgeq_f16",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddwb[_s64]",
     "arguments": [
-      "float16x8_t a",
-      "float16x8_t b"
+      "svint64_t op1",
+      "svint32_t op2"
     ],
     "return_type": {
-      "value": "uint16x8_t"
+      "value": "svint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8H"
+      "op1": {
+        "register": "Zop1.D"
       },
-      "b": {
-        "register": "Vm.8H"
+      "op2": {
+        "register": "Zop2.S"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCMGE"
+        "SADDWB"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcgeq_f32",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddwb[_u16]",
     "arguments": [
-      "float32x4_t a",
-      "float32x4_t b"
+      "svuint16_t op1",
+      "svuint8_t op2"
     ],
     "return_type": {
-      "value": "uint32x4_t"
+      "value": "svuint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4S"
+      "op1": {
+        "register": "Zop1.H"
       },
-      "b": {
-        "register": "Vm.4S"
+      "op2": {
+        "register": "Zop2.B"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCMGE"
+        "UADDWB"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcgeq_f64",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddwb[_u32]",
     "arguments": [
-      "float64x2_t a",
-      "float64x2_t b"
+      "svuint32_t op1",
+      "svuint16_t op2"
     ],
     "return_type": {
-      "value": "uint64x2_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2D"
+      "op1": {
+        "register": "Zop1.S"
       },
-      "b": {
-        "register": "Vm.2D"
+      "op2": {
+        "register": "Zop2.H"
       }
     },
     "Architectures": [
@@ -10470,84 +12418,80 @@
     ],
     "instructions": [
       [
-        "FCMGE"
+        "UADDWB"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcgeq_s16",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddwb[_u64]",
     "arguments": [
-      "int16x8_t a",
-      "int16x8_t b"
+      "svuint64_t op1",
+      "svuint32_t op2"
     ],
     "return_type": {
-      "value": "uint16x8_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8H"
+      "op1": {
+        "register": "Zop1.D"
       },
-      "b": {
-        "register": "Vm.8H"
+      "op2": {
+        "register": "Zop2.S"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "CMGE"
+        "UADDWB"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcgeq_s32",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddwt[_n_s16]",
     "arguments": [
-      "int32x4_t a",
-      "int32x4_t b"
+      "svint16_t op1",
+      "int8_t op2"
     ],
     "return_type": {
-      "value": "uint32x4_t"
+      "value": "svint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4S"
+      "op1": {
+        "register": "Zop1.H"
       },
-      "b": {
-        "register": "Vm.4S"
+      "op2": {
+        "register": "Zop2.B[*]"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "CMGE"
+        "SADDWT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcgeq_s64",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddwt[_n_s32]",
     "arguments": [
-      "int64x2_t a",
-      "int64x2_t b"
+      "svint32_t op1",
+      "int16_t op2"
     ],
     "return_type": {
-      "value": "uint64x2_t"
+      "value": "svint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2D"
+      "op1": {
+        "register": "Zop1.S"
       },
-      "b": {
-        "register": "Vm.2D"
+      "op2": {
+        "register": "Zop2.H[*]"
       }
     },
     "Architectures": [
@@ -10555,113 +12499,107 @@
     ],
     "instructions": [
       [
-        "CMGE"
+        "SADDWT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcgeq_s8",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddwt[_n_s64]",
     "arguments": [
-      "int8x16_t a",
-      "int8x16_t b"
+      "svint64_t op1",
+      "int32_t op2"
     ],
     "return_type": {
-      "value": "uint8x16_t"
+      "value": "svint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
+      "op1": {
+        "register": "Zop1.D"
       },
-      "b": {
-        "register": "Vm.16B"
+      "op2": {
+        "register": "Zop2.S[*]"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "CMGE"
+        "SADDWT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcgeq_u16",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddwt[_n_u16]",
     "arguments": [
-      "uint16x8_t a",
-      "uint16x8_t b"
+      "svuint16_t op1",
+      "uint8_t op2"
     ],
     "return_type": {
-      "value": "uint16x8_t"
+      "value": "svuint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8H"
+      "op1": {
+        "register": "Zop1.H"
       },
-      "b": {
-        "register": "Vm.8H"
+      "op2": {
+        "register": "Zop2.B[*]"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "CMHS"
+        "UADDWT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcgeq_u32",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddwt[_n_u32]",
     "arguments": [
-      "uint32x4_t a",
-      "uint32x4_t b"
+      "svuint32_t op1",
+      "uint16_t op2"
     ],
     "return_type": {
-      "value": "uint32x4_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4S"
+      "op1": {
+        "register": "Zop1.S"
       },
-      "b": {
-        "register": "Vm.4S"
+      "op2": {
+        "register": "Zop2.H[*]"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "CMHS"
+        "UADDWT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcgeq_u64",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddwt[_n_u64]",
     "arguments": [
-      "uint64x2_t a",
-      "uint64x2_t b"
+      "svuint64_t op1",
+      "uint32_t op2"
     ],
     "return_type": {
-      "value": "uint64x2_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2D"
+      "op1": {
+        "register": "Zop1.D"
       },
-      "b": {
-        "register": "Vm.2D"
+      "op2": {
+        "register": "Zop2.S[*]"
       }
     },
     "Architectures": [
@@ -10669,55 +12607,53 @@
     ],
     "instructions": [
       [
-        "CMHS"
+        "UADDWT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcgeq_u8",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddwt[_s16]",
     "arguments": [
-      "uint8x16_t a",
-      "uint8x16_t b"
+      "svint16_t op1",
+      "svint8_t op2"
     ],
     "return_type": {
-      "value": "uint8x16_t"
+      "value": "svint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
+      "op1": {
+        "register": "Zop1.H"
       },
-      "b": {
-        "register": "Vm.16B"
+      "op2": {
+        "register": "Zop2.B"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "CMHS"
+        "SADDWT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcges_f32",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddwt[_s32]",
     "arguments": [
-      "float32_t a",
-      "float32_t b"
+      "svint32_t op1",
+      "svint16_t op2"
     ],
     "return_type": {
-      "value": "uint32_t"
+      "value": "svint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Sn"
+      "op1": {
+        "register": "Zop1.S"
       },
-      "b": {
-        "register": "Sm"
+      "op2": {
+        "register": "Zop2.H"
       }
     },
     "Architectures": [
@@ -10725,46 +12661,53 @@
     ],
     "instructions": [
       [
-        "FCMGE"
+        "SADDWT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcgez_f16",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddwt[_s64]",
     "arguments": [
-      "float16x4_t a"
+      "svint64_t op1",
+      "svint32_t op2"
     ],
     "return_type": {
-      "value": "uint16x4_t"
+      "value": "svint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4H"
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.S"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCMGE"
+        "SADDWT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcgez_f32",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddwt[_u16]",
     "arguments": [
-      "float32x2_t a"
+      "svuint16_t op1",
+      "svuint8_t op2"
     ],
     "return_type": {
-      "value": "uint32x2_t"
+      "value": "svuint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2S"
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.B"
       }
     },
     "Architectures": [
@@ -10772,22 +12715,26 @@
     ],
     "instructions": [
       [
-        "FCMGE"
+        "UADDWT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcgez_f64",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddwt[_u32]",
     "arguments": [
-      "float64x1_t a"
+      "svuint32_t op1",
+      "svuint16_t op2"
     ],
     "return_type": {
-      "value": "uint64x1_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.H"
       }
     },
     "Architectures": [
@@ -10795,22 +12742,26 @@
     ],
     "instructions": [
       [
-        "FCMGE"
+        "UADDWT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcgez_s16",
+    "SIMD_ISA": "SVE2",
+    "name": "svaddwt[_u64]",
     "arguments": [
-      "int16x4_t a"
+      "svuint64_t op1",
+      "svuint32_t op2"
     ],
     "return_type": {
-      "value": "uint16x4_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4H"
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.S"
       }
     },
     "Architectures": [
@@ -10818,22 +12769,26 @@
     ],
     "instructions": [
       [
-        "CMGE"
+        "UADDWT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcgez_s32",
+    "SIMD_ISA": "SVE",
+    "name": "svadrb[_u32base]_[s32]offset",
     "arguments": [
-      "int32x2_t a"
+      "svuint32_t bases",
+      "svint32_t offsets"
     ],
     "return_type": {
-      "value": "uint32x2_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2S"
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "offsets": {
+        "register": "Zoffsets.S"
       }
     },
     "Architectures": [
@@ -10841,22 +12796,26 @@
     ],
     "instructions": [
       [
-        "CMGE"
+        "ADR"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcgez_s64",
+    "SIMD_ISA": "SVE",
+    "name": "svadrb[_u32base]_[u32]offset",
     "arguments": [
-      "int64x1_t a"
+      "svuint32_t bases",
+      "svuint32_t offsets"
     ],
     "return_type": {
-      "value": "uint64x1_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "offsets": {
+        "register": "Zoffsets.S"
       }
     },
     "Architectures": [
@@ -10864,22 +12823,26 @@
     ],
     "instructions": [
       [
-        "CMGE"
+        "ADR"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcgez_s8",
+    "SIMD_ISA": "SVE",
+    "name": "svadrb[_u64base]_[s64]offset",
     "arguments": [
-      "int8x8_t a"
+      "svuint64_t bases",
+      "svint64_t offsets"
     ],
     "return_type": {
-      "value": "uint8x8_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8B"
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
       }
     },
     "Architectures": [
@@ -10887,22 +12850,26 @@
     ],
     "instructions": [
       [
-        "CMGE"
+        "ADR"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcgezd_f64",
+    "SIMD_ISA": "SVE",
+    "name": "svadrb[_u64base]_[u64]offset",
     "arguments": [
-      "float64_t a"
+      "svuint64_t bases",
+      "svuint64_t offsets"
     ],
     "return_type": {
-      "value": "uint64_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
       }
     },
     "Architectures": [
@@ -10910,22 +12877,26 @@
     ],
     "instructions": [
       [
-        "FCMGE"
+        "ADR"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcgezd_s64",
+    "SIMD_ISA": "SVE",
+    "name": "svadrd[_u32base]_[s32]index",
     "arguments": [
-      "int64_t a"
+      "svuint32_t bases",
+      "svint32_t indices"
     ],
     "return_type": {
-      "value": "uint64_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "indices": {
+        "register": "Zindices.S"
       }
     },
     "Architectures": [
@@ -10933,22 +12904,26 @@
     ],
     "instructions": [
       [
-        "CMGE"
+        "ADR"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcgezh_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svadrd[_u32base]_[u32]index",
     "arguments": [
-      "float16_t a"
+      "svuint32_t bases",
+      "svuint32_t indices"
     ],
     "return_type": {
-      "value": "uint16_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Hn"
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "indices": {
+        "register": "Zindices.S"
       }
     },
     "Architectures": [
@@ -10956,46 +12931,53 @@
     ],
     "instructions": [
       [
-        "FCMGE"
+        "ADR"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcgezq_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svadrd[_u64base]_[s64]index",
     "arguments": [
-      "float16x8_t a"
+      "svuint64_t bases",
+      "svint64_t indices"
     ],
     "return_type": {
-      "value": "uint16x8_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8H"
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "indices": {
+        "register": "Zindices.D"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCMGE"
+        "ADR"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcgezq_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svadrd[_u64base]_[u64]index",
     "arguments": [
-      "float32x4_t a"
+      "svuint64_t bases",
+      "svuint64_t indices"
     ],
     "return_type": {
-      "value": "uint32x4_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4S"
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "indices": {
+        "register": "Zindices.D"
       }
     },
     "Architectures": [
@@ -11003,22 +12985,26 @@
     ],
     "instructions": [
       [
-        "FCMGE"
+        "ADR"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcgezq_f64",
+    "SIMD_ISA": "SVE",
+    "name": "svadrh[_u32base]_[s32]index",
     "arguments": [
-      "float64x2_t a"
+      "svuint32_t bases",
+      "svint32_t indices"
     ],
     "return_type": {
-      "value": "uint64x2_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2D"
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "indices": {
+        "register": "Zindices.S"
       }
     },
     "Architectures": [
@@ -11026,22 +13012,26 @@
     ],
     "instructions": [
       [
-        "FCMGE"
+        "ADR"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcgezq_s16",
+    "SIMD_ISA": "SVE",
+    "name": "svadrh[_u32base]_[u32]index",
     "arguments": [
-      "int16x8_t a"
+      "svuint32_t bases",
+      "svuint32_t indices"
     ],
     "return_type": {
-      "value": "uint16x8_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8H"
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "indices": {
+        "register": "Zindices.S"
       }
     },
     "Architectures": [
@@ -11049,22 +13039,26 @@
     ],
     "instructions": [
       [
-        "CMGE"
+        "ADR"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcgezq_s32",
+    "SIMD_ISA": "SVE",
+    "name": "svadrh[_u64base]_[s64]index",
     "arguments": [
-      "int32x4_t a"
+      "svuint64_t bases",
+      "svint64_t indices"
     ],
     "return_type": {
-      "value": "uint32x4_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4S"
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "indices": {
+        "register": "Zindices.D"
       }
     },
     "Architectures": [
@@ -11072,22 +13066,26 @@
     ],
     "instructions": [
       [
-        "CMGE"
+        "ADR"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcgezq_s64",
+    "SIMD_ISA": "SVE",
+    "name": "svadrh[_u64base]_[u64]index",
     "arguments": [
-      "int64x2_t a"
+      "svuint64_t bases",
+      "svuint64_t indices"
     ],
     "return_type": {
-      "value": "uint64x2_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2D"
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "indices": {
+        "register": "Zindices.D"
       }
     },
     "Architectures": [
@@ -11095,22 +13093,26 @@
     ],
     "instructions": [
       [
-        "CMGE"
+        "ADR"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcgezq_s8",
+    "SIMD_ISA": "SVE",
+    "name": "svadrw[_u32base]_[s32]index",
     "arguments": [
-      "int8x16_t a"
+      "svuint32_t bases",
+      "svint32_t indices"
     ],
     "return_type": {
-      "value": "uint8x16_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "indices": {
+        "register": "Zindices.S"
       }
     },
     "Architectures": [
@@ -11118,22 +13120,26 @@
     ],
     "instructions": [
       [
-        "CMGE"
+        "ADR"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcgezs_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svadrw[_u32base]_[u32]index",
     "arguments": [
-      "float32_t a"
+      "svuint32_t bases",
+      "svuint32_t indices"
     ],
     "return_type": {
-      "value": "uint32_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Sn"
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "indices": {
+        "register": "Zindices.S"
       }
     },
     "Architectures": [
@@ -11141,83 +13147,80 @@
     ],
     "instructions": [
       [
-        "FCMGE"
+        "ADR"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcgt_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svadrw[_u64base]_[s64]index",
     "arguments": [
-      "float16x4_t a",
-      "float16x4_t b"
+      "svuint64_t bases",
+      "svint64_t indices"
     ],
     "return_type": {
-      "value": "uint16x4_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4H"
+      "bases": {
+        "register": "Zbases.D"
       },
-      "b": {
-        "register": "Vm.4H"
+      "indices": {
+        "register": "Zindices.D"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCMGT"
+        "ADR"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcgt_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svadrw[_u64base]_[u64]index",
     "arguments": [
-      "float32x2_t a",
-      "float32x2_t b"
+      "svuint64_t bases",
+      "svuint64_t indices"
     ],
     "return_type": {
-      "value": "uint32x2_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2S"
+      "bases": {
+        "register": "Zbases.D"
       },
-      "b": {
-        "register": "Vm.2S"
+      "indices": {
+        "register": "Zindices.D"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCMGT"
+        "ADR"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcgt_f64",
+    "SIMD_ISA": "SVE2",
+    "name": "svaesd[_u8]",
     "arguments": [
-      "float64x1_t a",
-      "float64x1_t b"
+      "svuint8_t op1",
+      "svuint8_t op2"
     ],
     "return_type": {
-      "value": "uint64x1_t"
+      "value": "svuint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
       },
-      "b": {
-        "register": "Dm"
+      "op2": {
+        "register": "Zop2.B|Ztied2.B"
       }
     },
     "Architectures": [
@@ -11225,84 +13228,78 @@
     ],
     "instructions": [
       [
-        "FCMGT"
+        "AESD"
+      ],
+      [
+        "AESD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcgt_s16",
+    "SIMD_ISA": "SVE2",
+    "name": "svaese[_u8]",
     "arguments": [
-      "int16x4_t a",
-      "int16x4_t b"
+      "svuint8_t op1",
+      "svuint8_t op2"
     ],
     "return_type": {
-      "value": "uint16x4_t"
+      "value": "svuint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4H"
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
       },
-      "b": {
-        "register": "Vm.4H"
+      "op2": {
+        "register": "Zop2.B|Ztied2.B"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "CMGT"
+        "AESE"
+      ],
+      [
+        "AESE"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcgt_s32",
+    "SIMD_ISA": "SVE2",
+    "name": "svaesimc[_u8]",
     "arguments": [
-      "int32x2_t a",
-      "int32x2_t b"
+      "svuint8_t op"
     ],
     "return_type": {
-      "value": "uint32x2_t"
+      "value": "svuint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2S"
-      },
-      "b": {
-        "register": "Vm.2S"
+      "op": {
+        "register": "Ztied.B"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "CMGT"
+        "AESIMC"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcgt_s64",
+    "SIMD_ISA": "SVE2",
+    "name": "svaesmc[_u8]",
     "arguments": [
-      "int64x1_t a",
-      "int64x1_t b"
+      "svuint8_t op"
     ],
     "return_type": {
-      "value": "uint64x1_t"
+      "value": "svuint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
-      },
-      "b": {
-        "register": "Dm"
+      "op": {
+        "register": "Ztied.B"
       }
     },
     "Architectures": [
@@ -11310,113 +13307,135 @@
     ],
     "instructions": [
       [
-        "CMGT"
+        "AESMC"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcgt_s8",
+    "SIMD_ISA": "SVE",
+    "name": "svand[_b]_z",
     "arguments": [
-      "int8x8_t a",
-      "int8x8_t b"
+      "svbool_t pg",
+      "svbool_t op1",
+      "svbool_t op2"
     ],
     "return_type": {
-      "value": "uint8x8_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8B"
+      "op1": {
+        "register": "Pop1.B"
       },
-      "b": {
-        "register": "Vm.8B"
+      "op2": {
+        "register": "Pop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "CMGT"
+        "AND"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcgt_u16",
+    "SIMD_ISA": "SVE",
+    "name": "svand[_n_s16]_m",
     "arguments": [
-      "uint16x4_t a",
-      "uint16x4_t b"
+      "svbool_t pg",
+      "svint16_t op1",
+      "int16_t op2"
     ],
     "return_type": {
-      "value": "uint16x4_t"
+      "value": "svint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4H"
+      "op1": {
+        "register": "Ztied1.H"
       },
-      "b": {
-        "register": "Vm.4H"
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "CMHI"
+        "UXTB"
+      ],
+      [
+        "AND"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcgt_u32",
+    "SIMD_ISA": "SVE",
+    "name": "svand[_n_s16]_x",
     "arguments": [
-      "uint32x2_t a",
-      "uint32x2_t b"
+      "svbool_t pg",
+      "svint16_t op1",
+      "int16_t op2"
     ],
     "return_type": {
-      "value": "uint32x2_t"
+      "value": "svint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2S"
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
       },
-      "b": {
-        "register": "Vm.2S"
+      "op2": {
+        "register": "Zop2.H[*]|Ztied2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "CMHI"
+        "AND"
+      ],
+      [
+        "AND"
+      ],
+      [
+        "AND"
+      ],
+      [
+        "AND"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcgt_u64",
+    "SIMD_ISA": "SVE",
+    "name": "svand[_n_s16]_z",
     "arguments": [
-      "uint64x1_t a",
-      "uint64x1_t b"
+      "svbool_t pg",
+      "svint16_t op1",
+      "int16_t op2"
     ],
     "return_type": {
-      "value": "uint64x1_t"
+      "value": "svint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "op1": {
+        "register": "Zop1.H"
       },
-      "b": {
-        "register": "Dm"
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
@@ -11424,55 +13443,76 @@
     ],
     "instructions": [
       [
-        "CMHI"
+        "MOVPRFX",
+        "UXTB"
+      ],
+      [
+        "MOVPRFX",
+        "AND"
+      ],
+      [
+        "MOVPRFX",
+        "AND"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcgt_u8",
+    "SIMD_ISA": "SVE",
+    "name": "svand[_n_s32]_m",
     "arguments": [
-      "uint8x8_t a",
-      "uint8x8_t b"
+      "svbool_t pg",
+      "svint32_t op1",
+      "int32_t op2"
     ],
     "return_type": {
-      "value": "uint8x8_t"
+      "value": "svint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8B"
+      "op1": {
+        "register": "Ztied1.S"
       },
-      "b": {
-        "register": "Vm.8B"
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "CMHI"
+        "UXTB"
+      ],
+      [
+        "UXTH"
+      ],
+      [
+        "AND"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcgtd_f64",
+    "SIMD_ISA": "SVE",
+    "name": "svand[_n_s32]_x",
     "arguments": [
-      "float64_t a",
-      "float64_t b"
+      "svbool_t pg",
+      "svint32_t op1",
+      "int32_t op2"
     ],
     "return_type": {
-      "value": "uint64_t"
+      "value": "svint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
       },
-      "b": {
-        "register": "Dm"
+      "op2": {
+        "register": "Zop2.S[*]|Ztied2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
@@ -11480,26 +13520,39 @@
     ],
     "instructions": [
       [
-        "FCMGT"
+        "AND"
+      ],
+      [
+        "AND"
+      ],
+      [
+        "AND"
+      ],
+      [
+        "AND"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcgtd_s64",
+    "SIMD_ISA": "SVE",
+    "name": "svand[_n_s32]_z",
     "arguments": [
-      "int64_t a",
-      "int64_t b"
+      "svbool_t pg",
+      "svint32_t op1",
+      "int32_t op2"
     ],
     "return_type": {
-      "value": "uint64_t"
+      "value": "svint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "op1": {
+        "register": "Zop1.S"
       },
-      "b": {
-        "register": "Dm"
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
@@ -11507,26 +13560,43 @@
     ],
     "instructions": [
       [
-        "CMGT"
+        "MOVPRFX",
+        "UXTB"
+      ],
+      [
+        "MOVPRFX",
+        "UXTH"
+      ],
+      [
+        "MOVPRFX",
+        "AND"
+      ],
+      [
+        "MOVPRFX",
+        "AND"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcgtd_u64",
+    "SIMD_ISA": "SVE",
+    "name": "svand[_n_s64]_m",
     "arguments": [
-      "uint64_t a",
-      "uint64_t b"
+      "svbool_t pg",
+      "svint64_t op1",
+      "int64_t op2"
     ],
     "return_type": {
-      "value": "uint64_t"
+      "value": "svint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "op1": {
+        "register": "Ztied1.D"
       },
-      "b": {
-        "register": "Dm"
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
@@ -11534,26 +13604,39 @@
     ],
     "instructions": [
       [
-        "CMHI"
+        "UXTB"
+      ],
+      [
+        "UXTH"
+      ],
+      [
+        "UXTW"
+      ],
+      [
+        "AND"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcgth_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svand[_n_s64]_x",
     "arguments": [
-      "float16_t a",
-      "float16_t b"
+      "svbool_t pg",
+      "svint64_t op1",
+      "int64_t op2"
     ],
     "return_type": {
-      "value": "uint16_t"
+      "value": "svint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Hn"
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
       },
-      "b": {
-        "register": "Hm"
+      "op2": {
+        "register": "Zop2.D[*]|Ztied2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
@@ -11561,83 +13644,122 @@
     ],
     "instructions": [
       [
-        "FCMGT"
+        "AND"
+      ],
+      [
+        "AND"
+      ],
+      [
+        "AND"
+      ],
+      [
+        "AND"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcgtq_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svand[_n_s64]_z",
     "arguments": [
-      "float16x8_t a",
-      "float16x8_t b"
+      "svbool_t pg",
+      "svint64_t op1",
+      "int64_t op2"
     ],
     "return_type": {
-      "value": "uint16x8_t"
+      "value": "svint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8H"
+      "op1": {
+        "register": "Zop1.D"
       },
-      "b": {
-        "register": "Vm.8H"
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCMGT"
+        "MOVPRFX",
+        "UXTB"
+      ],
+      [
+        "MOVPRFX",
+        "UXTH"
+      ],
+      [
+        "MOVPRFX",
+        "UXTW"
+      ],
+      [
+        "MOVPRFX",
+        "AND"
+      ],
+      [
+        "MOVPRFX",
+        "AND"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcgtq_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svand[_n_s8]_m",
     "arguments": [
-      "float32x4_t a",
-      "float32x4_t b"
+      "svbool_t pg",
+      "svint8_t op1",
+      "int8_t op2"
     ],
     "return_type": {
-      "value": "uint32x4_t"
+      "value": "svint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4S"
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
       },
-      "b": {
-        "register": "Vm.4S"
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCMGT"
+        "AND"
+      ],
+      [
+        "MOVPRFX",
+        "AND"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcgtq_f64",
+    "SIMD_ISA": "SVE",
+    "name": "svand[_n_s8]_x",
     "arguments": [
-      "float64x2_t a",
-      "float64x2_t b"
+      "svbool_t pg",
+      "svint8_t op1",
+      "int8_t op2"
     ],
     "return_type": {
-      "value": "uint64x2_t"
+      "value": "svint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2D"
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
       },
-      "b": {
-        "register": "Vm.2D"
+      "op2": {
+        "register": "Zop2.B[*]|Ztied2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
@@ -11645,84 +13767,109 @@
     ],
     "instructions": [
       [
-        "FCMGT"
+        "AND"
+      ],
+      [
+        "AND"
+      ],
+      [
+        "AND"
+      ],
+      [
+        "AND"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcgtq_s16",
+    "SIMD_ISA": "SVE",
+    "name": "svand[_n_s8]_z",
     "arguments": [
-      "int16x8_t a",
-      "int16x8_t b"
+      "svbool_t pg",
+      "svint8_t op1",
+      "int8_t op2"
     ],
     "return_type": {
-      "value": "uint16x8_t"
+      "value": "svint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8H"
+      "op1": {
+        "register": "Zop1.B"
       },
-      "b": {
-        "register": "Vm.8H"
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "CMGT"
+        "MOVPRFX",
+        "AND"
+      ],
+      [
+        "MOVPRFX",
+        "AND"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcgtq_s32",
+    "SIMD_ISA": "SVE",
+    "name": "svand[_n_u16]_m",
     "arguments": [
-      "int32x4_t a",
-      "int32x4_t b"
+      "svbool_t pg",
+      "svuint16_t op1",
+      "uint16_t op2"
     ],
     "return_type": {
-      "value": "uint32x4_t"
+      "value": "svuint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4S"
+      "op1": {
+        "register": "Ztied1.H"
       },
-      "b": {
-        "register": "Vm.4S"
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "CMGT"
+        "UXTB"
+      ],
+      [
+        "AND"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcgtq_s64",
+    "SIMD_ISA": "SVE",
+    "name": "svand[_n_u16]_x",
     "arguments": [
-      "int64x2_t a",
-      "int64x2_t b"
+      "svbool_t pg",
+      "svuint16_t op1",
+      "uint16_t op2"
     ],
     "return_type": {
-      "value": "uint64x2_t"
+      "value": "svuint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2D"
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
       },
-      "b": {
-        "register": "Vm.2D"
+      "op2": {
+        "register": "Zop2.H[*]|Ztied2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
@@ -11730,113 +13877,156 @@
     ],
     "instructions": [
       [
-        "CMGT"
+        "AND"
+      ],
+      [
+        "AND"
+      ],
+      [
+        "AND"
+      ],
+      [
+        "AND"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcgtq_s8",
+    "SIMD_ISA": "SVE",
+    "name": "svand[_n_u16]_z",
     "arguments": [
-      "int8x16_t a",
-      "int8x16_t b"
+      "svbool_t pg",
+      "svuint16_t op1",
+      "uint16_t op2"
     ],
     "return_type": {
-      "value": "uint8x16_t"
+      "value": "svuint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
+      "op1": {
+        "register": "Zop1.H"
       },
-      "b": {
-        "register": "Vm.16B"
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "CMGT"
+        "MOVPRFX",
+        "UXTB"
+      ],
+      [
+        "MOVPRFX",
+        "AND"
+      ],
+      [
+        "MOVPRFX",
+        "AND"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcgtq_u16",
+    "SIMD_ISA": "SVE",
+    "name": "svand[_n_u32]_m",
     "arguments": [
-      "uint16x8_t a",
-      "uint16x8_t b"
+      "svbool_t pg",
+      "svuint32_t op1",
+      "uint32_t op2"
     ],
     "return_type": {
-      "value": "uint16x8_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8H"
+      "op1": {
+        "register": "Ztied1.S"
       },
-      "b": {
-        "register": "Vm.8H"
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "CMHI"
+        "UXTB"
+      ],
+      [
+        "UXTH"
+      ],
+      [
+        "AND"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcgtq_u32",
+    "SIMD_ISA": "SVE",
+    "name": "svand[_n_u32]_x",
     "arguments": [
-      "uint32x4_t a",
-      "uint32x4_t b"
+      "svbool_t pg",
+      "svuint32_t op1",
+      "uint32_t op2"
     ],
     "return_type": {
-      "value": "uint32x4_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4S"
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
       },
-      "b": {
-        "register": "Vm.4S"
+      "op2": {
+        "register": "Zop2.S[*]|Ztied2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "CMHI"
+        "AND"
+      ],
+      [
+        "AND"
+      ],
+      [
+        "AND"
+      ],
+      [
+        "AND"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcgtq_u64",
+    "SIMD_ISA": "SVE",
+    "name": "svand[_n_u32]_z",
     "arguments": [
-      "uint64x2_t a",
-      "uint64x2_t b"
+      "svbool_t pg",
+      "svuint32_t op1",
+      "uint32_t op2"
     ],
     "return_type": {
-      "value": "uint64x2_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2D"
+      "op1": {
+        "register": "Zop1.S"
       },
-      "b": {
-        "register": "Vm.2D"
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
@@ -11844,55 +14034,83 @@
     ],
     "instructions": [
       [
-        "CMHI"
+        "MOVPRFX",
+        "UXTB"
+      ],
+      [
+        "MOVPRFX",
+        "UXTH"
+      ],
+      [
+        "MOVPRFX",
+        "AND"
+      ],
+      [
+        "MOVPRFX",
+        "AND"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcgtq_u8",
+    "SIMD_ISA": "SVE",
+    "name": "svand[_n_u64]_m",
     "arguments": [
-      "uint8x16_t a",
-      "uint8x16_t b"
+      "svbool_t pg",
+      "svuint64_t op1",
+      "uint64_t op2"
     ],
     "return_type": {
-      "value": "uint8x16_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
+      "op1": {
+        "register": "Ztied1.D"
       },
-      "b": {
-        "register": "Vm.16B"
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "CMHI"
+        "UXTB"
+      ],
+      [
+        "UXTH"
+      ],
+      [
+        "UXTW"
+      ],
+      [
+        "AND"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcgts_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svand[_n_u64]_x",
     "arguments": [
-      "float32_t a",
-      "float32_t b"
+      "svbool_t pg",
+      "svuint64_t op1",
+      "uint64_t op2"
     ],
     "return_type": {
-      "value": "uint32_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Sn"
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
       },
-      "b": {
-        "register": "Sm"
+      "op2": {
+        "register": "Zop2.D[*]|Ztied2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
@@ -11900,46 +14118,87 @@
     ],
     "instructions": [
       [
-        "FCMGT"
+        "AND"
+      ],
+      [
+        "AND"
+      ],
+      [
+        "AND"
+      ],
+      [
+        "AND"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcgtz_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svand[_n_u64]_z",
     "arguments": [
-      "float16x4_t a"
+      "svbool_t pg",
+      "svuint64_t op1",
+      "uint64_t op2"
     ],
     "return_type": {
-      "value": "uint16x4_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4H"
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCMGT"
+        "MOVPRFX",
+        "UXTB"
+      ],
+      [
+        "MOVPRFX",
+        "UXTH"
+      ],
+      [
+        "MOVPRFX",
+        "UXTW"
+      ],
+      [
+        "MOVPRFX",
+        "AND"
+      ],
+      [
+        "MOVPRFX",
+        "AND"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcgtz_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svand[_n_u8]_m",
     "arguments": [
-      "float32x2_t a"
+      "svbool_t pg",
+      "svuint8_t op1",
+      "uint8_t op2"
     ],
     "return_type": {
-      "value": "uint32x2_t"
+      "value": "svuint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2S"
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
@@ -11947,22 +14206,34 @@
     ],
     "instructions": [
       [
-        "FCMGT"
+        "AND"
+      ],
+      [
+        "MOVPRFX",
+        "AND"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcgtz_f64",
+    "SIMD_ISA": "SVE",
+    "name": "svand[_n_u8]_x",
     "arguments": [
-      "float64x1_t a"
+      "svbool_t pg",
+      "svuint8_t op1",
+      "uint8_t op2"
     ],
     "return_type": {
-      "value": "uint64x1_t"
+      "value": "svuint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]|Ztied2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
@@ -11970,22 +14241,39 @@
     ],
     "instructions": [
       [
-        "FCMGT"
+        "AND"
+      ],
+      [
+        "AND"
+      ],
+      [
+        "AND"
+      ],
+      [
+        "AND"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcgtz_s16",
+    "SIMD_ISA": "SVE",
+    "name": "svand[_n_u8]_z",
     "arguments": [
-      "int16x4_t a"
+      "svbool_t pg",
+      "svuint8_t op1",
+      "uint8_t op2"
     ],
     "return_type": {
-      "value": "uint16x4_t"
+      "value": "svuint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4H"
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
@@ -11993,22 +14281,35 @@
     ],
     "instructions": [
       [
-        "CMGT"
+        "MOVPRFX",
+        "AND"
+      ],
+      [
+        "MOVPRFX",
+        "AND"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcgtz_s32",
+    "SIMD_ISA": "SVE",
+    "name": "svand[_s16]_m",
     "arguments": [
-      "int32x2_t a"
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2"
     ],
     "return_type": {
-      "value": "uint32x2_t"
+      "value": "svint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2S"
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
@@ -12016,22 +14317,34 @@
     ],
     "instructions": [
       [
-        "CMGT"
+        "AND"
+      ],
+      [
+        "MOVPRFX",
+        "AND"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcgtz_s64",
+    "SIMD_ISA": "SVE",
+    "name": "svand[_s16]_x",
     "arguments": [
-      "int64x1_t a"
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2"
     ],
     "return_type": {
-      "value": "uint64x1_t"
+      "value": "svint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H|Ztied2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
@@ -12039,22 +14352,36 @@
     ],
     "instructions": [
       [
-        "CMGT"
+        "AND"
+      ],
+      [
+        "AND"
+      ],
+      [
+        "AND"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcgtz_s8",
+    "SIMD_ISA": "SVE",
+    "name": "svand[_s16]_z",
     "arguments": [
-      "int8x8_t a"
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2"
     ],
     "return_type": {
-      "value": "uint8x8_t"
+      "value": "svint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8B"
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
@@ -12062,22 +14389,35 @@
     ],
     "instructions": [
       [
-        "CMGT"
+        "MOVPRFX",
+        "AND"
+      ],
+      [
+        "MOVPRFX",
+        "AND"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcgtzd_f64",
+    "SIMD_ISA": "SVE",
+    "name": "svand[_s32]_m",
     "arguments": [
-      "float64_t a"
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2"
     ],
     "return_type": {
-      "value": "uint64_t"
+      "value": "svint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
@@ -12085,22 +14425,34 @@
     ],
     "instructions": [
       [
-        "FCMGT"
+        "AND"
+      ],
+      [
+        "MOVPRFX",
+        "AND"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcgtzd_s64",
+    "SIMD_ISA": "SVE",
+    "name": "svand[_s32]_x",
     "arguments": [
-      "int64_t a"
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2"
     ],
     "return_type": {
-      "value": "uint64_t"
+      "value": "svint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
@@ -12108,22 +14460,36 @@
     ],
     "instructions": [
       [
-        "CMGT"
+        "AND"
+      ],
+      [
+        "AND"
+      ],
+      [
+        "AND"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcgtzh_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svand[_s32]_z",
     "arguments": [
-      "float16_t a"
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2"
     ],
     "return_type": {
-      "value": "uint16_t"
+      "value": "svint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Hn"
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
@@ -12131,46 +14497,70 @@
     ],
     "instructions": [
       [
-        "FCMGT"
+        "MOVPRFX",
+        "AND"
+      ],
+      [
+        "MOVPRFX",
+        "AND"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcgtzq_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svand[_s64]_m",
     "arguments": [
-      "float16x8_t a"
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2"
     ],
     "return_type": {
-      "value": "uint16x8_t"
+      "value": "svint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8H"
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCMGT"
+        "AND"
+      ],
+      [
+        "MOVPRFX",
+        "AND"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcgtzq_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svand[_s64]_x",
     "arguments": [
-      "float32x4_t a"
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2"
     ],
     "return_type": {
-      "value": "uint32x4_t"
+      "value": "svint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4S"
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
@@ -12178,22 +14568,36 @@
     ],
     "instructions": [
       [
-        "FCMGT"
+        "AND"
+      ],
+      [
+        "AND"
+      ],
+      [
+        "AND"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcgtzq_f64",
+    "SIMD_ISA": "SVE",
+    "name": "svand[_s64]_z",
     "arguments": [
-      "float64x2_t a"
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2"
     ],
     "return_type": {
-      "value": "uint64x2_t"
+      "value": "svint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2D"
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
@@ -12201,22 +14605,35 @@
     ],
     "instructions": [
       [
-        "FCMGT"
+        "MOVPRFX",
+        "AND"
+      ],
+      [
+        "MOVPRFX",
+        "AND"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcgtzq_s16",
+    "SIMD_ISA": "SVE",
+    "name": "svand[_s8]_m",
     "arguments": [
-      "int16x8_t a"
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2"
     ],
     "return_type": {
-      "value": "uint16x8_t"
+      "value": "svint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8H"
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
@@ -12224,22 +14641,34 @@
     ],
     "instructions": [
       [
-        "CMGT"
+        "AND"
+      ],
+      [
+        "MOVPRFX",
+        "AND"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcgtzq_s32",
+    "SIMD_ISA": "SVE",
+    "name": "svand[_s8]_x",
     "arguments": [
-      "int32x4_t a"
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2"
     ],
     "return_type": {
-      "value": "uint32x4_t"
+      "value": "svint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4S"
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B|Ztied2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
@@ -12247,22 +14676,36 @@
     ],
     "instructions": [
       [
-        "CMGT"
+        "AND"
+      ],
+      [
+        "AND"
+      ],
+      [
+        "AND"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcgtzq_s64",
+    "SIMD_ISA": "SVE",
+    "name": "svand[_s8]_z",
     "arguments": [
-      "int64x2_t a"
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2"
     ],
     "return_type": {
-      "value": "uint64x2_t"
+      "value": "svint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2D"
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
@@ -12270,22 +14713,35 @@
     ],
     "instructions": [
       [
-        "CMGT"
+        "MOVPRFX",
+        "AND"
+      ],
+      [
+        "MOVPRFX",
+        "AND"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcgtzq_s8",
+    "SIMD_ISA": "SVE",
+    "name": "svand[_u16]_m",
     "arguments": [
-      "int8x16_t a"
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2"
     ],
     "return_type": {
-      "value": "uint8x16_t"
+      "value": "svuint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
@@ -12293,22 +14749,34 @@
     ],
     "instructions": [
       [
-        "CMGT"
+        "AND"
+      ],
+      [
+        "MOVPRFX",
+        "AND"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcgtzs_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svand[_u16]_x",
     "arguments": [
-      "float32_t a"
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2"
     ],
     "return_type": {
-      "value": "uint32_t"
+      "value": "svuint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Sn"
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H|Ztied2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
@@ -12316,83 +14784,107 @@
     ],
     "instructions": [
       [
-        "FCMGT"
+        "AND"
+      ],
+      [
+        "AND"
+      ],
+      [
+        "AND"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcle_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svand[_u16]_z",
     "arguments": [
-      "float16x4_t a",
-      "float16x4_t b"
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2"
     ],
     "return_type": {
-      "value": "uint16x4_t"
+      "value": "svuint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4H"
+      "op1": {
+        "register": "Zop1.H"
       },
-      "b": {
-        "register": "Vm.4H"
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCMGE"
+        "MOVPRFX",
+        "AND"
+      ],
+      [
+        "MOVPRFX",
+        "AND"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcle_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svand[_u32]_m",
     "arguments": [
-      "float32x2_t a",
-      "float32x2_t b"
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2"
     ],
     "return_type": {
-      "value": "uint32x2_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2S"
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
       },
-      "b": {
-        "register": "Vm.2S"
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCMGE"
+        "AND"
+      ],
+      [
+        "MOVPRFX",
+        "AND"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcle_f64",
+    "SIMD_ISA": "SVE",
+    "name": "svand[_u32]_x",
     "arguments": [
-      "float64x1_t a",
-      "float64x1_t b"
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2"
     ],
     "return_type": {
-      "value": "uint64x1_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
       },
-      "b": {
-        "register": "Dm"
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
@@ -12400,84 +14892,107 @@
     ],
     "instructions": [
       [
-        "FCMGE"
+        "AND"
+      ],
+      [
+        "AND"
+      ],
+      [
+        "AND"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcle_s16",
+    "SIMD_ISA": "SVE",
+    "name": "svand[_u32]_z",
     "arguments": [
-      "int16x4_t a",
-      "int16x4_t b"
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2"
     ],
     "return_type": {
-      "value": "uint16x4_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4H"
+      "op1": {
+        "register": "Zop1.S"
       },
-      "b": {
-        "register": "Vm.4H"
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "CMGE"
+        "MOVPRFX",
+        "AND"
+      ],
+      [
+        "MOVPRFX",
+        "AND"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcle_s32",
+    "SIMD_ISA": "SVE",
+    "name": "svand[_u64]_m",
     "arguments": [
-      "int32x2_t a",
-      "int32x2_t b"
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2"
     ],
     "return_type": {
-      "value": "uint32x2_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2S"
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
       },
-      "b": {
-        "register": "Vm.2S"
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "CMGE"
+        "AND"
+      ],
+      [
+        "MOVPRFX",
+        "AND"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcle_s64",
+    "SIMD_ISA": "SVE",
+    "name": "svand[_u64]_x",
     "arguments": [
-      "int64x1_t a",
-      "int64x1_t b"
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2"
     ],
     "return_type": {
-      "value": "uint64x1_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
       },
-      "b": {
-        "register": "Dm"
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
@@ -12485,113 +15000,144 @@
     ],
     "instructions": [
       [
-        "CMGE"
+        "AND"
+      ],
+      [
+        "AND"
+      ],
+      [
+        "AND"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcle_s8",
+    "SIMD_ISA": "SVE",
+    "name": "svand[_u64]_z",
     "arguments": [
-      "int8x8_t a",
-      "int8x8_t b"
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2"
     ],
     "return_type": {
-      "value": "uint8x8_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8B"
+      "op1": {
+        "register": "Zop1.D"
       },
-      "b": {
-        "register": "Vm.8B"
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "CMGE"
+        "MOVPRFX",
+        "AND"
+      ],
+      [
+        "MOVPRFX",
+        "AND"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcle_u16",
+    "SIMD_ISA": "SVE",
+    "name": "svand[_u8]_m",
     "arguments": [
-      "uint16x4_t a",
-      "uint16x4_t b"
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2"
     ],
     "return_type": {
-      "value": "uint16x4_t"
+      "value": "svuint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4H"
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
       },
-      "b": {
-        "register": "Vm.4H"
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "CMHS"
-      ]
-    ]
+        "AND"
+      ],
+      [
+        "MOVPRFX",
+        "AND"
+      ]
+    ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcle_u32",
+    "SIMD_ISA": "SVE",
+    "name": "svand[_u8]_x",
     "arguments": [
-      "uint32x2_t a",
-      "uint32x2_t b"
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2"
     ],
     "return_type": {
-      "value": "uint32x2_t"
+      "value": "svuint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2S"
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
       },
-      "b": {
-        "register": "Vm.2S"
+      "op2": {
+        "register": "Zop2.B|Ztied2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "CMHS"
+        "AND"
+      ],
+      [
+        "AND"
+      ],
+      [
+        "AND"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcle_u64",
+    "SIMD_ISA": "SVE",
+    "name": "svand[_u8]_z",
     "arguments": [
-      "uint64x1_t a",
-      "uint64x1_t b"
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2"
     ],
     "return_type": {
-      "value": "uint64x1_t"
+      "value": "svuint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "op1": {
+        "register": "Zop1.B"
       },
-      "b": {
-        "register": "Dm"
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
@@ -12599,55 +15145,58 @@
     ],
     "instructions": [
       [
-        "CMHS"
+        "MOVPRFX",
+        "AND"
+      ],
+      [
+        "MOVPRFX",
+        "AND"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcle_u8",
+    "SIMD_ISA": "SVE",
+    "name": "svandv[_s16]",
     "arguments": [
-      "uint8x8_t a",
-      "uint8x8_t b"
+      "svbool_t pg",
+      "svint16_t op"
     ],
     "return_type": {
-      "value": "uint8x8_t"
+      "value": "int16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8B"
+      "op": {
+        "register": "Zop.H"
       },
-      "b": {
-        "register": "Vm.8B"
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "CMHS"
+        "ANDV"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcled_f64",
+    "SIMD_ISA": "SVE",
+    "name": "svandv[_s32]",
     "arguments": [
-      "float64_t a",
-      "float64_t b"
+      "svbool_t pg",
+      "svint32_t op"
     ],
     "return_type": {
-      "value": "uint64_t"
+      "value": "int32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "op": {
+        "register": "Zop.S"
       },
-      "b": {
-        "register": "Dm"
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
@@ -12655,26 +15204,26 @@
     ],
     "instructions": [
       [
-        "FCMGE"
+        "ANDV"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcled_s64",
+    "SIMD_ISA": "SVE",
+    "name": "svandv[_s64]",
     "arguments": [
-      "int64_t a",
-      "int64_t b"
+      "svbool_t pg",
+      "svint64_t op"
     ],
     "return_type": {
-      "value": "uint64_t"
+      "value": "int64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "op": {
+        "register": "Zop.D"
       },
-      "b": {
-        "register": "Dm"
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
@@ -12682,26 +15231,26 @@
     ],
     "instructions": [
       [
-        "CMGE"
+        "ANDV"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcled_u64",
+    "SIMD_ISA": "SVE",
+    "name": "svandv[_s8]",
     "arguments": [
-      "uint64_t a",
-      "uint64_t b"
+      "svbool_t pg",
+      "svint8_t op"
     ],
     "return_type": {
-      "value": "uint64_t"
+      "value": "int8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "op": {
+        "register": "Zop.B"
       },
-      "b": {
-        "register": "Dm"
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
@@ -12709,26 +15258,26 @@
     ],
     "instructions": [
       [
-        "CMHS"
+        "ANDV"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcleh_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svandv[_u16]",
     "arguments": [
-      "float16_t a",
-      "float16_t b"
+      "svbool_t pg",
+      "svuint16_t op"
     ],
     "return_type": {
       "value": "uint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Hn"
+      "op": {
+        "register": "Zop.H"
       },
-      "b": {
-        "register": "Hm"
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
@@ -12736,83 +15285,80 @@
     ],
     "instructions": [
       [
-        "FCMGE"
+        "ANDV"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcleq_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svandv[_u32]",
     "arguments": [
-      "float16x8_t a",
-      "float16x8_t b"
+      "svbool_t pg",
+      "svuint32_t op"
     ],
     "return_type": {
-      "value": "uint16x8_t"
+      "value": "uint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8H"
+      "op": {
+        "register": "Zop.S"
       },
-      "b": {
-        "register": "Vm.8H"
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCMGE"
+        "ANDV"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcleq_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svandv[_u64]",
     "arguments": [
-      "float32x4_t a",
-      "float32x4_t b"
+      "svbool_t pg",
+      "svuint64_t op"
     ],
     "return_type": {
-      "value": "uint32x4_t"
+      "value": "uint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4S"
+      "op": {
+        "register": "Zop.D"
       },
-      "b": {
-        "register": "Vm.4S"
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCMGE"
+        "ANDV"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcleq_f64",
+    "SIMD_ISA": "SVE",
+    "name": "svandv[_u8]",
     "arguments": [
-      "float64x2_t a",
-      "float64x2_t b"
+      "svbool_t pg",
+      "svuint8_t op"
     ],
     "return_type": {
-      "value": "uint64x2_t"
+      "value": "uint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2D"
+      "op": {
+        "register": "Zop.B"
       },
-      "b": {
-        "register": "Vm.2D"
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
@@ -12820,84 +15366,105 @@
     ],
     "instructions": [
       [
-        "FCMGE"
+        "ANDV"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcleq_s16",
+    "SIMD_ISA": "SVE",
+    "name": "svasr[_n_s16]_m",
     "arguments": [
-      "int16x8_t a",
-      "int16x8_t b"
+      "svbool_t pg",
+      "svint16_t op1",
+      "uint16_t op2"
     ],
     "return_type": {
-      "value": "uint16x8_t"
+      "value": "svint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8H"
+      "op1": {
+        "register": "Ztied1.H"
       },
-      "b": {
-        "register": "Vm.8H"
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "CMGE"
+        "ASR"
+      ],
+      [
+        "ASR"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcleq_s32",
+    "SIMD_ISA": "SVE",
+    "name": "svasr[_n_s16]_x",
     "arguments": [
-      "int32x4_t a",
-      "int32x4_t b"
+      "svbool_t pg",
+      "svint16_t op1",
+      "uint16_t op2"
     ],
     "return_type": {
-      "value": "uint32x4_t"
+      "value": "svint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4S"
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
       },
-      "b": {
-        "register": "Vm.4S"
+      "op2": {
+        "register": "Zop2.H[*]|Ztied2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "CMGE"
+        "ASR"
+      ],
+      [
+        "ASR"
+      ],
+      [
+        "ASRR"
+      ],
+      [
+        "MOVPRFX",
+        "ASR"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcleq_s64",
+    "SIMD_ISA": "SVE",
+    "name": "svasr[_n_s16]_z",
     "arguments": [
-      "int64x2_t a",
-      "int64x2_t b"
+      "svbool_t pg",
+      "svint16_t op1",
+      "uint16_t op2"
     ],
     "return_type": {
-      "value": "uint64x2_t"
+      "value": "svint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2D"
+      "op1": {
+        "register": "Zop1.H"
       },
-      "b": {
-        "register": "Vm.2D"
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
@@ -12905,113 +15472,154 @@
     ],
     "instructions": [
       [
-        "CMGE"
+        "MOVPRFX",
+        "ASR"
+      ],
+      [
+        "MOVPRFX",
+        "ASR"
+      ],
+      [
+        "MOVPRFX",
+        "ASRR"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcleq_s8",
+    "SIMD_ISA": "SVE",
+    "name": "svasr[_n_s32]_m",
     "arguments": [
-      "int8x16_t a",
-      "int8x16_t b"
+      "svbool_t pg",
+      "svint32_t op1",
+      "uint32_t op2"
     ],
     "return_type": {
-      "value": "uint8x16_t"
+      "value": "svint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
+      "op1": {
+        "register": "Ztied1.S"
       },
-      "b": {
-        "register": "Vm.16B"
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "CMGE"
+        "ASR"
+      ],
+      [
+        "ASR"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcleq_u16",
+    "SIMD_ISA": "SVE",
+    "name": "svasr[_n_s32]_x",
     "arguments": [
-      "uint16x8_t a",
-      "uint16x8_t b"
+      "svbool_t pg",
+      "svint32_t op1",
+      "uint32_t op2"
     ],
     "return_type": {
-      "value": "uint16x8_t"
+      "value": "svint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8H"
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
       },
-      "b": {
-        "register": "Vm.8H"
+      "op2": {
+        "register": "Zop2.S[*]|Ztied2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "CMHS"
+        "ASR"
+      ],
+      [
+        "ASR"
+      ],
+      [
+        "ASRR"
+      ],
+      [
+        "MOVPRFX",
+        "ASR"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcleq_u32",
+    "SIMD_ISA": "SVE",
+    "name": "svasr[_n_s32]_z",
     "arguments": [
-      "uint32x4_t a",
-      "uint32x4_t b"
+      "svbool_t pg",
+      "svint32_t op1",
+      "uint32_t op2"
     ],
     "return_type": {
-      "value": "uint32x4_t"
+      "value": "svint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4S"
+      "op1": {
+        "register": "Zop1.S"
       },
-      "b": {
-        "register": "Vm.4S"
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "CMHS"
+        "MOVPRFX",
+        "ASR"
+      ],
+      [
+        "MOVPRFX",
+        "ASR"
+      ],
+      [
+        "MOVPRFX",
+        "ASRR"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcleq_u64",
+    "SIMD_ISA": "SVE",
+    "name": "svasr[_n_s64]_m",
     "arguments": [
-      "uint64x2_t a",
-      "uint64x2_t b"
+      "svbool_t pg",
+      "svint64_t op1",
+      "uint64_t op2"
     ],
     "return_type": {
-      "value": "uint64x2_t"
+      "value": "svint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2D"
+      "op1": {
+        "register": "Ztied1.D"
       },
-      "b": {
-        "register": "Vm.2D"
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
@@ -13019,55 +15627,74 @@
     ],
     "instructions": [
       [
-        "CMHS"
+        "ASR"
+      ],
+      [
+        "ASR"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcleq_u8",
+    "SIMD_ISA": "SVE",
+    "name": "svasr[_n_s64]_x",
     "arguments": [
-      "uint8x16_t a",
-      "uint8x16_t b"
+      "svbool_t pg",
+      "svint64_t op1",
+      "uint64_t op2"
     ],
     "return_type": {
-      "value": "uint8x16_t"
+      "value": "svint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
       },
-      "b": {
-        "register": "Vm.16B"
+      "op2": {
+        "register": "Zop2.D[*]|Ztied2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "CMHS"
+        "ASR"
+      ],
+      [
+        "ASR"
+      ],
+      [
+        "ASRR"
+      ],
+      [
+        "MOVPRFX",
+        "ASR"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcles_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svasr[_n_s64]_z",
     "arguments": [
-      "float32_t a",
-      "float32_t b"
+      "svbool_t pg",
+      "svint64_t op1",
+      "uint64_t op2"
     ],
     "return_type": {
-      "value": "uint32_t"
+      "value": "svint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Sn"
+      "op1": {
+        "register": "Zop1.D"
       },
-      "b": {
-        "register": "Sm"
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
@@ -13075,46 +15702,73 @@
     ],
     "instructions": [
       [
-        "FCMGE"
+        "MOVPRFX",
+        "ASR"
+      ],
+      [
+        "MOVPRFX",
+        "ASR"
+      ],
+      [
+        "MOVPRFX",
+        "ASRR"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vclez_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svasr[_n_s8]_m",
     "arguments": [
-      "float16x4_t a"
+      "svbool_t pg",
+      "svint8_t op1",
+      "uint8_t op2"
     ],
     "return_type": {
-      "value": "uint16x4_t"
+      "value": "svint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4H"
+      "op1": {
+        "register": "Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCMLE"
+        "ASR"
+      ],
+      [
+        "ASR"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vclez_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svasr[_n_s8]_x",
     "arguments": [
-      "float32x2_t a"
+      "svbool_t pg",
+      "svint8_t op1",
+      "uint8_t op2"
     ],
     "return_type": {
-      "value": "uint32x2_t"
+      "value": "svint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2S"
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]|Ztied2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
@@ -13122,22 +15776,40 @@
     ],
     "instructions": [
       [
-        "CMLE"
+        "ASR"
+      ],
+      [
+        "ASR"
+      ],
+      [
+        "ASRR"
+      ],
+      [
+        "MOVPRFX",
+        "ASR"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vclez_f64",
+    "SIMD_ISA": "SVE",
+    "name": "svasr[_n_s8]_z",
     "arguments": [
-      "float64x1_t a"
+      "svbool_t pg",
+      "svint8_t op1",
+      "uint8_t op2"
     ],
     "return_type": {
-      "value": "uint64x1_t"
+      "value": "svint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
@@ -13145,22 +15817,39 @@
     ],
     "instructions": [
       [
-        "FCMLE"
+        "MOVPRFX",
+        "ASR"
+      ],
+      [
+        "MOVPRFX",
+        "ASR"
+      ],
+      [
+        "MOVPRFX",
+        "ASRR"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vclez_s16",
+    "SIMD_ISA": "SVE",
+    "name": "svasr[_s16]_m",
     "arguments": [
-      "int16x4_t a"
+      "svbool_t pg",
+      "svint16_t op1",
+      "svuint16_t op2"
     ],
     "return_type": {
-      "value": "uint16x4_t"
+      "value": "svint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4H"
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
@@ -13168,22 +15857,34 @@
     ],
     "instructions": [
       [
-        "CMLE"
+        "ASR"
+      ],
+      [
+        "MOVPRFX",
+        "ASR"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vclez_s32",
+    "SIMD_ISA": "SVE",
+    "name": "svasr[_s16]_x",
     "arguments": [
-      "int32x2_t a"
+      "svbool_t pg",
+      "svint16_t op1",
+      "svuint16_t op2"
     ],
     "return_type": {
-      "value": "uint32x2_t"
+      "value": "svint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2S"
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H|Ztied2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
@@ -13191,22 +15892,37 @@
     ],
     "instructions": [
       [
-        "CMLE"
+        "ASR"
+      ],
+      [
+        "ASRR"
+      ],
+      [
+        "MOVPRFX",
+        "ASR"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vclez_s64",
+    "SIMD_ISA": "SVE",
+    "name": "svasr[_s16]_z",
     "arguments": [
-      "int64x1_t a"
+      "svbool_t pg",
+      "svint16_t op1",
+      "svuint16_t op2"
     ],
     "return_type": {
-      "value": "uint64x1_t"
+      "value": "svint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
@@ -13214,22 +15930,35 @@
     ],
     "instructions": [
       [
-        "CMLE"
+        "MOVPRFX",
+        "ASR"
+      ],
+      [
+        "MOVPRFX",
+        "ASRR"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vclez_s8",
+    "SIMD_ISA": "SVE",
+    "name": "svasr[_s32]_m",
     "arguments": [
-      "int8x8_t a"
+      "svbool_t pg",
+      "svint32_t op1",
+      "svuint32_t op2"
     ],
     "return_type": {
-      "value": "uint8x8_t"
+      "value": "svint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8B"
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
@@ -13237,22 +15966,34 @@
     ],
     "instructions": [
       [
-        "CMLE"
+        "ASR"
+      ],
+      [
+        "MOVPRFX",
+        "ASR"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vclezd_f64",
+    "SIMD_ISA": "SVE",
+    "name": "svasr[_s32]_x",
     "arguments": [
-      "float64_t a"
+      "svbool_t pg",
+      "svint32_t op1",
+      "svuint32_t op2"
     ],
     "return_type": {
-      "value": "uint64_t"
+      "value": "svint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
@@ -13260,22 +16001,37 @@
     ],
     "instructions": [
       [
-        "FCMLE"
+        "ASR"
+      ],
+      [
+        "ASRR"
+      ],
+      [
+        "MOVPRFX",
+        "ASR"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vclezd_s64",
+    "SIMD_ISA": "SVE",
+    "name": "svasr[_s32]_z",
     "arguments": [
-      "int64_t a"
+      "svbool_t pg",
+      "svint32_t op1",
+      "svuint32_t op2"
     ],
     "return_type": {
-      "value": "uint64_t"
+      "value": "svint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
@@ -13283,22 +16039,35 @@
     ],
     "instructions": [
       [
-        "CMLE"
+        "MOVPRFX",
+        "ASR"
+      ],
+      [
+        "MOVPRFX",
+        "ASRR"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vclezh_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svasr[_s64]_m",
     "arguments": [
-      "float16_t a"
+      "svbool_t pg",
+      "svint64_t op1",
+      "svuint64_t op2"
     ],
     "return_type": {
-      "value": "uint16_t"
+      "value": "svint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Hn"
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
@@ -13306,46 +16075,72 @@
     ],
     "instructions": [
       [
-        "FCMLE"
+        "ASR"
+      ],
+      [
+        "MOVPRFX",
+        "ASR"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vclezq_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svasr[_s64]_x",
     "arguments": [
-      "float16x8_t a"
+      "svbool_t pg",
+      "svint64_t op1",
+      "svuint64_t op2"
     ],
     "return_type": {
-      "value": "uint16x8_t"
+      "value": "svint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8H"
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCMLE"
+        "ASR"
+      ],
+      [
+        "ASRR"
+      ],
+      [
+        "MOVPRFX",
+        "ASR"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vclezq_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svasr[_s64]_z",
     "arguments": [
-      "float32x4_t a"
+      "svbool_t pg",
+      "svint64_t op1",
+      "svuint64_t op2"
     ],
     "return_type": {
-      "value": "uint32x4_t"
+      "value": "svint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4S"
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
@@ -13353,22 +16148,35 @@
     ],
     "instructions": [
       [
-        "FCMLE"
+        "MOVPRFX",
+        "ASR"
+      ],
+      [
+        "MOVPRFX",
+        "ASRR"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vclezq_f64",
+    "SIMD_ISA": "SVE",
+    "name": "svasr[_s8]_m",
     "arguments": [
-      "float64x2_t a"
+      "svbool_t pg",
+      "svint8_t op1",
+      "svuint8_t op2"
     ],
     "return_type": {
-      "value": "uint64x2_t"
+      "value": "svint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2D"
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
@@ -13376,22 +16184,34 @@
     ],
     "instructions": [
       [
-        "FCMLE"
+        "ASR"
+      ],
+      [
+        "MOVPRFX",
+        "ASR"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vclezq_s16",
+    "SIMD_ISA": "SVE",
+    "name": "svasr[_s8]_x",
     "arguments": [
-      "int16x8_t a"
+      "svbool_t pg",
+      "svint8_t op1",
+      "svuint8_t op2"
     ],
     "return_type": {
-      "value": "uint16x8_t"
+      "value": "svint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8H"
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B|Ztied2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
@@ -13399,22 +16219,37 @@
     ],
     "instructions": [
       [
-        "CMLE"
+        "ASR"
+      ],
+      [
+        "ASRR"
+      ],
+      [
+        "MOVPRFX",
+        "ASR"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vclezq_s32",
+    "SIMD_ISA": "SVE",
+    "name": "svasr[_s8]_z",
     "arguments": [
-      "int32x4_t a"
+      "svbool_t pg",
+      "svint8_t op1",
+      "svuint8_t op2"
     ],
     "return_type": {
-      "value": "uint32x4_t"
+      "value": "svint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4S"
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
@@ -13422,22 +16257,35 @@
     ],
     "instructions": [
       [
-        "CMLE"
+        "MOVPRFX",
+        "ASR"
+      ],
+      [
+        "MOVPRFX",
+        "ASRR"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vclezq_s64",
+    "SIMD_ISA": "SVE",
+    "name": "svasr_wide[_n_s16]_m",
     "arguments": [
-      "int64x2_t a"
+      "svbool_t pg",
+      "svint16_t op1",
+      "uint64_t op2"
     ],
     "return_type": {
-      "value": "uint64x2_t"
+      "value": "svint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2D"
+      "op1": {
+        "register": "Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
@@ -13445,22 +16293,33 @@
     ],
     "instructions": [
       [
-        "CMLE"
+        "ASR"
+      ],
+      [
+        "ASR"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vclezq_s8",
+    "SIMD_ISA": "SVE",
+    "name": "svasr_wide[_n_s16]_x",
     "arguments": [
-      "int8x16_t a"
+      "svbool_t pg",
+      "svint16_t op1",
+      "uint64_t op2"
     ],
     "return_type": {
-      "value": "uint8x16_t"
+      "value": "svint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
@@ -13468,22 +16327,36 @@
     ],
     "instructions": [
       [
-        "CMLE"
+        "ASR"
+      ],
+      [
+        "ASR"
+      ],
+      [
+        "ASR"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vclezs_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svasr_wide[_n_s16]_z",
     "arguments": [
-      "float32_t a"
+      "svbool_t pg",
+      "svint16_t op1",
+      "uint64_t op2"
     ],
     "return_type": {
-      "value": "uint32_t"
+      "value": "svint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Sn"
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
@@ -13491,383 +16364,520 @@
     ],
     "instructions": [
       [
-        "FCMLE"
+        "MOVPRFX",
+        "ASR"
+      ],
+      [
+        "MOVPRFX",
+        "ASR"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcls_s16",
+    "SIMD_ISA": "SVE",
+    "name": "svasr_wide[_n_s32]_m",
     "arguments": [
-      "int16x4_t a"
+      "svbool_t pg",
+      "svint32_t op1",
+      "uint64_t op2"
     ],
     "return_type": {
-      "value": "int16x4_t"
+      "value": "svint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4H"
+      "op1": {
+        "register": "Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "CLS"
+        "ASR"
+      ],
+      [
+        "ASR"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcls_s32",
+    "SIMD_ISA": "SVE",
+    "name": "svasr_wide[_n_s32]_x",
     "arguments": [
-      "int32x2_t a"
+      "svbool_t pg",
+      "svint32_t op1",
+      "uint64_t op2"
     ],
     "return_type": {
-      "value": "int32x2_t"
+      "value": "svint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2S"
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "CLS"
+        "ASR"
+      ],
+      [
+        "ASR"
+      ],
+      [
+        "ASR"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcls_s8",
+    "SIMD_ISA": "SVE",
+    "name": "svasr_wide[_n_s32]_z",
     "arguments": [
-      "int8x8_t a"
+      "svbool_t pg",
+      "svint32_t op1",
+      "uint64_t op2"
     ],
     "return_type": {
-      "value": "int8x8_t"
+      "value": "svint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8B"
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "CLS"
+        "MOVPRFX",
+        "ASR"
+      ],
+      [
+        "MOVPRFX",
+        "ASR"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcls_u16",
+    "SIMD_ISA": "SVE",
+    "name": "svasr_wide[_n_s8]_m",
     "arguments": [
-      "uint16x4_t a"
+      "svbool_t pg",
+      "svint8_t op1",
+      "uint64_t op2"
     ],
     "return_type": {
-      "value": "int16x4_t"
+      "value": "svint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4H"
+      "op1": {
+        "register": "Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "CLS"
+        "ASR"
+      ],
+      [
+        "ASR"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcls_u32",
+    "SIMD_ISA": "SVE",
+    "name": "svasr_wide[_n_s8]_x",
     "arguments": [
-      "uint32x2_t a"
+      "svbool_t pg",
+      "svint8_t op1",
+      "uint64_t op2"
     ],
     "return_type": {
-      "value": "int32x2_t"
+      "value": "svint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2S"
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "CLS"
+        "ASR"
+      ],
+      [
+        "ASR"
+      ],
+      [
+        "ASR"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcls_u8",
+    "SIMD_ISA": "SVE",
+    "name": "svasr_wide[_n_s8]_z",
     "arguments": [
-      "uint8x8_t a"
+      "svbool_t pg",
+      "svint8_t op1",
+      "uint64_t op2"
     ],
     "return_type": {
-      "value": "int8x8_t"
+      "value": "svint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8B"
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "CLS"
+        "MOVPRFX",
+        "ASR"
+      ],
+      [
+        "MOVPRFX",
+        "ASR"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vclsq_s16",
+    "SIMD_ISA": "SVE",
+    "name": "svasr_wide[_s16]_m",
     "arguments": [
-      "int16x8_t a"
+      "svbool_t pg",
+      "svint16_t op1",
+      "svuint64_t op2"
     ],
     "return_type": {
-      "value": "int16x8_t"
+      "value": "svint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8H"
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "CLS"
+        "ASR"
+      ],
+      [
+        "MOVPRFX",
+        "ASR"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vclsq_s32",
+    "SIMD_ISA": "SVE",
+    "name": "svasr_wide[_s16]_x",
     "arguments": [
-      "int32x4_t a"
+      "svbool_t pg",
+      "svint16_t op1",
+      "svuint64_t op2"
     ],
     "return_type": {
-      "value": "int32x4_t"
+      "value": "svint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4S"
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "CLS"
+        "ASR"
+      ],
+      [
+        "ASR"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vclsq_s8",
+    "SIMD_ISA": "SVE",
+    "name": "svasr_wide[_s16]_z",
     "arguments": [
-      "int8x16_t a"
+      "svbool_t pg",
+      "svint16_t op1",
+      "svuint64_t op2"
     ],
     "return_type": {
-      "value": "int8x16_t"
+      "value": "svint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "CLS"
+        "MOVPRFX",
+        "ASR"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vclsq_u16",
+    "SIMD_ISA": "SVE",
+    "name": "svasr_wide[_s32]_m",
     "arguments": [
-      "uint16x8_t a"
+      "svbool_t pg",
+      "svint32_t op1",
+      "svuint64_t op2"
     ],
     "return_type": {
-      "value": "int16x8_t"
+      "value": "svint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8H"
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "CLS"
+        "ASR"
+      ],
+      [
+        "MOVPRFX",
+        "ASR"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vclsq_u32",
+    "SIMD_ISA": "SVE",
+    "name": "svasr_wide[_s32]_x",
     "arguments": [
-      "uint32x4_t a"
+      "svbool_t pg",
+      "svint32_t op1",
+      "svuint64_t op2"
     ],
     "return_type": {
-      "value": "int32x4_t"
+      "value": "svint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4S"
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "CLS"
+        "ASR"
+      ],
+      [
+        "ASR"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vclsq_u8",
+    "SIMD_ISA": "SVE",
+    "name": "svasr_wide[_s32]_z",
     "arguments": [
-      "uint8x16_t a"
+      "svbool_t pg",
+      "svint32_t op1",
+      "svuint64_t op2"
     ],
     "return_type": {
-      "value": "int8x16_t"
+      "value": "svint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "CLS"
+        "MOVPRFX",
+        "ASR"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vclt_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svasr_wide[_s8]_m",
     "arguments": [
-      "float16x4_t a",
-      "float16x4_t b"
+      "svbool_t pg",
+      "svint8_t op1",
+      "svuint64_t op2"
     ],
     "return_type": {
-      "value": "uint16x4_t"
+      "value": "svint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4H"
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
       },
-      "b": {
-        "register": "Vm.4H"
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCMGT"
+        "ASR"
+      ],
+      [
+        "MOVPRFX",
+        "ASR"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vclt_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svasr_wide[_s8]_x",
     "arguments": [
-      "float32x2_t a",
-      "float32x2_t b"
+      "svbool_t pg",
+      "svint8_t op1",
+      "svuint64_t op2"
     ],
     "return_type": {
-      "value": "uint32x2_t"
+      "value": "svint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2S"
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
       },
-      "b": {
-        "register": "Vm.2S"
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCMGT"
+        "ASR"
+      ],
+      [
+        "ASR"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vclt_f64",
+    "SIMD_ISA": "SVE",
+    "name": "svasr_wide[_s8]_z",
     "arguments": [
-      "float64x1_t a",
-      "float64x1_t b"
+      "svbool_t pg",
+      "svint8_t op1",
+      "svuint64_t op2"
     ],
     "return_type": {
-      "value": "uint64x1_t"
+      "value": "svint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "op1": {
+        "register": "Zop1.B"
       },
-      "b": {
-        "register": "Dm"
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
@@ -13875,84 +16885,104 @@
     ],
     "instructions": [
       [
-        "FCMGT"
+        "MOVPRFX",
+        "ASR"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vclt_s16",
+    "SIMD_ISA": "SVE",
+    "name": "svasrd[_n_s16]_m",
     "arguments": [
-      "int16x4_t a",
-      "int16x4_t b"
+      "svbool_t pg",
+      "svint16_t op1",
+      "uint64_t imm2"
     ],
     "return_type": {
-      "value": "uint16x4_t"
+      "value": "svint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4H"
+      "imm2": {
+        "minimum": 1,
+        "maximum": 16
       },
-      "b": {
-        "register": "Vm.4H"
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "CMGT"
+        "ASRD"
+      ],
+      [
+        "MOVPRFX",
+        "ASRD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vclt_s32",
+    "SIMD_ISA": "SVE",
+    "name": "svasrd[_n_s16]_x",
     "arguments": [
-      "int32x2_t a",
-      "int32x2_t b"
+      "svbool_t pg",
+      "svint16_t op1",
+      "uint64_t imm2"
     ],
     "return_type": {
-      "value": "uint32x2_t"
+      "value": "svint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2S"
+      "imm2": {
+        "minimum": 1,
+        "maximum": 16
       },
-      "b": {
-        "register": "Vm.2S"
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "CMGT"
+        "ASRD"
+      ],
+      [
+        "MOVPRFX",
+        "ASRD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vclt_s64",
+    "SIMD_ISA": "SVE",
+    "name": "svasrd[_n_s16]_z",
     "arguments": [
-      "int64x1_t a",
-      "int64x1_t b"
+      "svbool_t pg",
+      "svint16_t op1",
+      "uint64_t imm2"
     ],
     "return_type": {
-      "value": "uint64x1_t"
+      "value": "svint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "imm2": {
+        "minimum": 1,
+        "maximum": 16
       },
-      "b": {
-        "register": "Dm"
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
@@ -13960,113 +16990,137 @@
     ],
     "instructions": [
       [
-        "CMGT"
+        "MOVPRFX",
+        "ASRD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vclt_s8",
+    "SIMD_ISA": "SVE",
+    "name": "svasrd[_n_s32]_m",
     "arguments": [
-      "int8x8_t a",
-      "int8x8_t b"
+      "svbool_t pg",
+      "svint32_t op1",
+      "uint64_t imm2"
     ],
     "return_type": {
-      "value": "uint8x8_t"
+      "value": "svint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8B"
+      "imm2": {
+        "minimum": 1,
+        "maximum": 32
       },
-      "b": {
-        "register": "Vm.8B"
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "CMGT"
+        "ASRD"
+      ],
+      [
+        "MOVPRFX",
+        "ASRD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vclt_u16",
+    "SIMD_ISA": "SVE",
+    "name": "svasrd[_n_s32]_x",
     "arguments": [
-      "uint16x4_t a",
-      "uint16x4_t b"
+      "svbool_t pg",
+      "svint32_t op1",
+      "uint64_t imm2"
     ],
     "return_type": {
-      "value": "uint16x4_t"
+      "value": "svint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4H"
+      "imm2": {
+        "minimum": 1,
+        "maximum": 32
       },
-      "b": {
-        "register": "Vm.4H"
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "CMHI"
+        "ASRD"
+      ],
+      [
+        "MOVPRFX",
+        "ASRD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vclt_u32",
+    "SIMD_ISA": "SVE",
+    "name": "svasrd[_n_s32]_z",
     "arguments": [
-      "uint32x2_t a",
-      "uint32x2_t b"
+      "svbool_t pg",
+      "svint32_t op1",
+      "uint64_t imm2"
     ],
     "return_type": {
-      "value": "uint32x2_t"
+      "value": "svint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2S"
+      "imm2": {
+        "minimum": 1,
+        "maximum": 32
       },
-      "b": {
-        "register": "Vm.2S"
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "CMHI"
+        "MOVPRFX",
+        "ASRD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vclt_u64",
+    "SIMD_ISA": "SVE",
+    "name": "svasrd[_n_s64]_m",
     "arguments": [
-      "uint64x1_t a",
-      "uint64x1_t b"
+      "svbool_t pg",
+      "svint64_t op1",
+      "uint64_t imm2"
     ],
     "return_type": {
-      "value": "uint64x1_t"
+      "value": "svint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "imm2": {
+        "minimum": 1,
+        "maximum": 64
       },
-      "b": {
-        "register": "Dm"
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
@@ -14074,55 +17128,71 @@
     ],
     "instructions": [
       [
-        "CMHI"
+        "ASRD"
+      ],
+      [
+        "MOVPRFX",
+        "ASRD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vclt_u8",
+    "SIMD_ISA": "SVE",
+    "name": "svasrd[_n_s64]_x",
     "arguments": [
-      "uint8x8_t a",
-      "uint8x8_t b"
+      "svbool_t pg",
+      "svint64_t op1",
+      "uint64_t imm2"
     ],
     "return_type": {
-      "value": "uint8x8_t"
+      "value": "svint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8B"
+      "imm2": {
+        "minimum": 1,
+        "maximum": 64
       },
-      "b": {
-        "register": "Vm.8B"
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "CMHI"
+        "ASRD"
+      ],
+      [
+        "MOVPRFX",
+        "ASRD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcltd_f64",
+    "SIMD_ISA": "SVE",
+    "name": "svasrd[_n_s64]_z",
     "arguments": [
-      "float64_t a",
-      "float64_t b"
+      "svbool_t pg",
+      "svint64_t op1",
+      "uint64_t imm2"
     ],
     "return_type": {
-      "value": "uint64_t"
+      "value": "svint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "imm2": {
+        "minimum": 1,
+        "maximum": 64
       },
-      "b": {
-        "register": "Dm"
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
@@ -14130,26 +17200,32 @@
     ],
     "instructions": [
       [
-        "FCMGT"
+        "MOVPRFX",
+        "ASRD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcltd_s64",
+    "SIMD_ISA": "SVE",
+    "name": "svasrd[_n_s8]_m",
     "arguments": [
-      "int64_t a",
-      "int64_t b"
+      "svbool_t pg",
+      "svint8_t op1",
+      "uint64_t imm2"
     ],
     "return_type": {
-      "value": "uint64_t"
+      "value": "svint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "imm2": {
+        "minimum": 1,
+        "maximum": 8
       },
-      "b": {
-        "register": "Dm"
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
@@ -14157,26 +17233,35 @@
     ],
     "instructions": [
       [
-        "CMGT"
+        "ASRD"
+      ],
+      [
+        "MOVPRFX",
+        "ASRD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcltd_u64",
+    "SIMD_ISA": "SVE",
+    "name": "svasrd[_n_s8]_x",
     "arguments": [
-      "uint64_t a",
-      "uint64_t b"
+      "svbool_t pg",
+      "svint8_t op1",
+      "uint64_t imm2"
     ],
     "return_type": {
-      "value": "uint64_t"
+      "value": "svint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "imm2": {
+        "minimum": 1,
+        "maximum": 8
       },
-      "b": {
-        "register": "Dm"
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
@@ -14184,26 +17269,35 @@
     ],
     "instructions": [
       [
-        "CMHI"
+        "ASRD"
+      ],
+      [
+        "MOVPRFX",
+        "ASRD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vclth_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svasrd[_n_s8]_z",
     "arguments": [
-      "float16_t a",
-      "float16_t b"
+      "svbool_t pg",
+      "svint8_t op1",
+      "uint64_t imm2"
     ],
     "return_type": {
-      "value": "uint16_t"
+      "value": "svint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Hn"
+      "imm2": {
+        "minimum": 1,
+        "maximum": 8
       },
-      "b": {
-        "register": "Hm"
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
@@ -14211,83 +17305,101 @@
     ],
     "instructions": [
       [
-        "FCMGT"
+        "MOVPRFX",
+        "ASRD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcltq_f16",
+    "SIMD_ISA": "SVE2",
+    "name": "svbcax[_n_s16]",
     "arguments": [
-      "float16x8_t a",
-      "float16x8_t b"
+      "svint16_t op1",
+      "svint16_t op2",
+      "int16_t op3"
     ],
     "return_type": {
-      "value": "uint16x8_t"
+      "value": "svint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8H"
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
       },
-      "b": {
-        "register": "Vm.8H"
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H[*]"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCMGT"
+        "BCAX"
+      ],
+      [
+        "MOVPRFX",
+        "BCAX"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcltq_f32",
+    "SIMD_ISA": "SVE2",
+    "name": "svbcax[_n_s32]",
     "arguments": [
-      "float32x4_t a",
-      "float32x4_t b"
+      "svint32_t op1",
+      "svint32_t op2",
+      "int32_t op3"
     ],
     "return_type": {
-      "value": "uint32x4_t"
+      "value": "svint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4S"
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
       },
-      "b": {
-        "register": "Vm.4S"
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S[*]"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCMGT"
+        "BCAX"
+      ],
+      [
+        "MOVPRFX",
+        "BCAX"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcltq_f64",
+    "SIMD_ISA": "SVE2",
+    "name": "svbcax[_n_s64]",
     "arguments": [
-      "float64x2_t a",
-      "float64x2_t b"
+      "svint64_t op1",
+      "svint64_t op2",
+      "int64_t op3"
     ],
     "return_type": {
-      "value": "uint64x2_t"
+      "value": "svint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2D"
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
       },
-      "b": {
-        "register": "Vm.2D"
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D[*]"
       }
     },
     "Architectures": [
@@ -14295,84 +17407,104 @@
     ],
     "instructions": [
       [
-        "FCMGT"
+        "BCAX"
+      ],
+      [
+        "MOVPRFX",
+        "BCAX"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcltq_s16",
+    "SIMD_ISA": "SVE2",
+    "name": "svbcax[_n_s8]",
     "arguments": [
-      "int16x8_t a",
-      "int16x8_t b"
+      "svint8_t op1",
+      "svint8_t op2",
+      "int8_t op3"
     ],
     "return_type": {
-      "value": "uint16x8_t"
+      "value": "svint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8H"
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
       },
-      "b": {
-        "register": "Vm.8H"
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B[*]"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "CMGT"
+        "BCAX"
+      ],
+      [
+        "MOVPRFX",
+        "BCAX"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcltq_s32",
+    "SIMD_ISA": "SVE2",
+    "name": "svbcax[_n_u16]",
     "arguments": [
-      "int32x4_t a",
-      "int32x4_t b"
+      "svuint16_t op1",
+      "svuint16_t op2",
+      "uint16_t op3"
     ],
     "return_type": {
-      "value": "uint32x4_t"
+      "value": "svuint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4S"
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
       },
-      "b": {
-        "register": "Vm.4S"
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H[*]"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "CMGT"
+        "BCAX"
+      ],
+      [
+        "MOVPRFX",
+        "BCAX"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcltq_s64",
+    "SIMD_ISA": "SVE2",
+    "name": "svbcax[_n_u32]",
     "arguments": [
-      "int64x2_t a",
-      "int64x2_t b"
+      "svuint32_t op1",
+      "svuint32_t op2",
+      "uint32_t op3"
     ],
     "return_type": {
-      "value": "uint64x2_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2D"
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
       },
-      "b": {
-        "register": "Vm.2D"
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S[*]"
       }
     },
     "Architectures": [
@@ -14380,113 +17512,139 @@
     ],
     "instructions": [
       [
-        "CMGT"
+        "BCAX"
+      ],
+      [
+        "MOVPRFX",
+        "BCAX"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcltq_s8",
+    "SIMD_ISA": "SVE2",
+    "name": "svbcax[_n_u64]",
     "arguments": [
-      "int8x16_t a",
-      "int8x16_t b"
+      "svuint64_t op1",
+      "svuint64_t op2",
+      "uint64_t op3"
     ],
     "return_type": {
-      "value": "uint8x16_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
       },
-      "b": {
-        "register": "Vm.16B"
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D[*]"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "CMGT"
+        "BCAX"
+      ],
+      [
+        "MOVPRFX",
+        "BCAX"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcltq_u16",
+    "SIMD_ISA": "SVE2",
+    "name": "svbcax[_n_u8]",
     "arguments": [
-      "uint16x8_t a",
-      "uint16x8_t b"
+      "svuint8_t op1",
+      "svuint8_t op2",
+      "uint8_t op3"
     ],
     "return_type": {
-      "value": "uint16x8_t"
+      "value": "svuint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8H"
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
       },
-      "b": {
-        "register": "Vm.8H"
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B[*]"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "CMHI"
+        "BCAX"
+      ],
+      [
+        "MOVPRFX",
+        "BCAX"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcltq_u32",
+    "SIMD_ISA": "SVE2",
+    "name": "svbcax[_s16]",
     "arguments": [
-      "uint32x4_t a",
-      "uint32x4_t b"
+      "svint16_t op1",
+      "svint16_t op2",
+      "svint16_t op3"
     ],
     "return_type": {
-      "value": "uint32x4_t"
+      "value": "svint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4S"
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
       },
-      "b": {
-        "register": "Vm.4S"
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "CMHI"
+        "BCAX"
+      ],
+      [
+        "MOVPRFX",
+        "BCAX"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcltq_u64",
+    "SIMD_ISA": "SVE2",
+    "name": "svbcax[_s32]",
     "arguments": [
-      "uint64x2_t a",
-      "uint64x2_t b"
+      "svint32_t op1",
+      "svint32_t op2",
+      "svint32_t op3"
     ],
     "return_type": {
-      "value": "uint64x2_t"
+      "value": "svint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2D"
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
       },
-      "b": {
-        "register": "Vm.2D"
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S"
       }
     },
     "Architectures": [
@@ -14494,55 +17652,69 @@
     ],
     "instructions": [
       [
-        "CMHI"
+        "BCAX"
+      ],
+      [
+        "MOVPRFX",
+        "BCAX"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcltq_u8",
+    "SIMD_ISA": "SVE2",
+    "name": "svbcax[_s64]",
     "arguments": [
-      "uint8x16_t a",
-      "uint8x16_t b"
+      "svint64_t op1",
+      "svint64_t op2",
+      "svint64_t op3"
     ],
     "return_type": {
-      "value": "uint8x16_t"
+      "value": "svint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
       },
-      "b": {
-        "register": "Vm.16B"
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "CMHI"
+        "BCAX"
+      ],
+      [
+        "MOVPRFX",
+        "BCAX"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vclts_f32",
+    "SIMD_ISA": "SVE2",
+    "name": "svbcax[_s8]",
     "arguments": [
-      "float32_t a",
-      "float32_t b"
+      "svint8_t op1",
+      "svint8_t op2",
+      "svint8_t op3"
     ],
     "return_type": {
-      "value": "uint32_t"
+      "value": "svint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Sn"
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
       },
-      "b": {
-        "register": "Sm"
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B"
       }
     },
     "Architectures": [
@@ -14550,46 +17722,69 @@
     ],
     "instructions": [
       [
-        "FCMGT"
+        "BCAX"
+      ],
+      [
+        "MOVPRFX",
+        "BCAX"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcltz_f16",
+    "SIMD_ISA": "SVE2",
+    "name": "svbcax[_u16]",
     "arguments": [
-      "float16x4_t a"
+      "svuint16_t op1",
+      "svuint16_t op2",
+      "svuint16_t op3"
     ],
     "return_type": {
-      "value": "uint16x4_t"
+      "value": "svuint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4H"
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCMLT"
+        "BCAX"
+      ],
+      [
+        "MOVPRFX",
+        "BCAX"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcltz_f32",
+    "SIMD_ISA": "SVE2",
+    "name": "svbcax[_u32]",
     "arguments": [
-      "float32x2_t a"
+      "svuint32_t op1",
+      "svuint32_t op2",
+      "svuint32_t op3"
     ],
     "return_type": {
-      "value": "uint32x2_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2S"
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S"
       }
     },
     "Architectures": [
@@ -14597,22 +17792,34 @@
     ],
     "instructions": [
       [
-        "FCMLT"
-      ]
-    ]
+        "BCAX"
+      ],
+      [
+        "MOVPRFX",
+        "BCAX"
+      ]
+    ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcltz_f64",
+    "SIMD_ISA": "SVE2",
+    "name": "svbcax[_u64]",
     "arguments": [
-      "float64x1_t a"
+      "svuint64_t op1",
+      "svuint64_t op2",
+      "svuint64_t op3"
     ],
     "return_type": {
-      "value": "uint64x1_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D"
       }
     },
     "Architectures": [
@@ -14620,22 +17827,34 @@
     ],
     "instructions": [
       [
-        "FCMLT"
+        "BCAX"
+      ],
+      [
+        "MOVPRFX",
+        "BCAX"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcltz_s16",
+    "SIMD_ISA": "SVE2",
+    "name": "svbcax[_u8]",
     "arguments": [
-      "int16x4_t a"
+      "svuint8_t op1",
+      "svuint8_t op2",
+      "svuint8_t op3"
     ],
     "return_type": {
-      "value": "uint16x4_t"
+      "value": "svuint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4H"
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B"
       }
     },
     "Architectures": [
@@ -14643,22 +17862,30 @@
     ],
     "instructions": [
       [
-        "CMLT"
+        "BCAX"
+      ],
+      [
+        "MOVPRFX",
+        "BCAX"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcltz_s32",
+    "SIMD_ISA": "SVE2",
+    "name": "svbdep[_n_u16]",
     "arguments": [
-      "int32x2_t a"
+      "svuint16_t op1",
+      "uint16_t op2"
     ],
     "return_type": {
-      "value": "uint32x2_t"
+      "value": "svuint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2S"
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
       }
     },
     "Architectures": [
@@ -14666,22 +17893,26 @@
     ],
     "instructions": [
       [
-        "CMLT"
+        "BDEP"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcltz_s64",
+    "SIMD_ISA": "SVE2",
+    "name": "svbdep[_n_u32]",
     "arguments": [
-      "int64x1_t a"
+      "svuint32_t op1",
+      "uint32_t op2"
     ],
     "return_type": {
-      "value": "uint64x1_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
       }
     },
     "Architectures": [
@@ -14689,22 +17920,26 @@
     ],
     "instructions": [
       [
-        "CMLT"
+        "BDEP"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcltz_s8",
+    "SIMD_ISA": "SVE2",
+    "name": "svbdep[_n_u64]",
     "arguments": [
-      "int8x8_t a"
+      "svuint64_t op1",
+      "uint64_t op2"
     ],
     "return_type": {
-      "value": "uint8x8_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8B"
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
       }
     },
     "Architectures": [
@@ -14712,22 +17947,26 @@
     ],
     "instructions": [
       [
-        "CMLT"
+        "BDEP"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcltzd_f64",
+    "SIMD_ISA": "SVE2",
+    "name": "svbdep[_n_u8]",
     "arguments": [
-      "float64_t a"
+      "svuint8_t op1",
+      "uint8_t op2"
     ],
     "return_type": {
-      "value": "uint64_t"
+      "value": "svuint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
       }
     },
     "Architectures": [
@@ -14735,22 +17974,26 @@
     ],
     "instructions": [
       [
-        "FCMLT"
+        "BDEP"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcltzd_s64",
+    "SIMD_ISA": "SVE2",
+    "name": "svbdep[_u16]",
     "arguments": [
-      "int64_t a"
+      "svuint16_t op1",
+      "svuint16_t op2"
     ],
     "return_type": {
-      "value": "uint64_t"
+      "value": "svuint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
       }
     },
     "Architectures": [
@@ -14758,22 +18001,26 @@
     ],
     "instructions": [
       [
-        "CMLT"
+        "BDEP"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcltzh_f16",
+    "SIMD_ISA": "SVE2",
+    "name": "svbdep[_u32]",
     "arguments": [
-      "float16_t a"
+      "svuint32_t op1",
+      "svuint32_t op2"
     ],
     "return_type": {
-      "value": "uint16_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Hn"
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
       }
     },
     "Architectures": [
@@ -14781,46 +18028,53 @@
     ],
     "instructions": [
       [
-        "FCMLT"
+        "BDEP"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcltzq_f16",
+    "SIMD_ISA": "SVE2",
+    "name": "svbdep[_u64]",
     "arguments": [
-      "float16x8_t a"
+      "svuint64_t op1",
+      "svuint64_t op2"
     ],
     "return_type": {
-      "value": "uint16x8_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8H"
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCMLT"
+        "BDEP"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcltzq_f32",
+    "SIMD_ISA": "SVE2",
+    "name": "svbdep[_u8]",
     "arguments": [
-      "float32x4_t a"
+      "svuint8_t op1",
+      "svuint8_t op2"
     ],
     "return_type": {
-      "value": "uint32x4_t"
+      "value": "svuint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4S"
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
       }
     },
     "Architectures": [
@@ -14828,22 +18082,26 @@
     ],
     "instructions": [
       [
-        "FCMLT"
+        "BDEP"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcltzq_f64",
+    "SIMD_ISA": "SVE2",
+    "name": "svbext[_n_u16]",
     "arguments": [
-      "float64x2_t a"
+      "svuint16_t op1",
+      "uint16_t op2"
     ],
     "return_type": {
-      "value": "uint64x2_t"
+      "value": "svuint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2D"
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
       }
     },
     "Architectures": [
@@ -14851,22 +18109,26 @@
     ],
     "instructions": [
       [
-        "FCMLT"
+        "BEXT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcltzq_s16",
+    "SIMD_ISA": "SVE2",
+    "name": "svbext[_n_u32]",
     "arguments": [
-      "int16x8_t a"
+      "svuint32_t op1",
+      "uint32_t op2"
     ],
     "return_type": {
-      "value": "uint16x8_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8H"
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
       }
     },
     "Architectures": [
@@ -14874,22 +18136,26 @@
     ],
     "instructions": [
       [
-        "CMLT"
+        "BEXT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcltzq_s32",
+    "SIMD_ISA": "SVE2",
+    "name": "svbext[_n_u64]",
     "arguments": [
-      "int32x4_t a"
+      "svuint64_t op1",
+      "uint64_t op2"
     ],
     "return_type": {
-      "value": "uint32x4_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4S"
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
       }
     },
     "Architectures": [
@@ -14897,22 +18163,26 @@
     ],
     "instructions": [
       [
-        "CMLT"
+        "BEXT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcltzq_s64",
+    "SIMD_ISA": "SVE2",
+    "name": "svbext[_n_u8]",
     "arguments": [
-      "int64x2_t a"
+      "svuint8_t op1",
+      "uint8_t op2"
     ],
     "return_type": {
-      "value": "uint64x2_t"
+      "value": "svuint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2D"
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
       }
     },
     "Architectures": [
@@ -14920,22 +18190,26 @@
     ],
     "instructions": [
       [
-        "CMLT"
+        "BEXT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcltzq_s8",
+    "SIMD_ISA": "SVE2",
+    "name": "svbext[_u16]",
     "arguments": [
-      "int8x16_t a"
+      "svuint16_t op1",
+      "svuint16_t op2"
     ],
     "return_type": {
-      "value": "uint8x16_t"
+      "value": "svuint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
       }
     },
     "Architectures": [
@@ -14943,22 +18217,26 @@
     ],
     "instructions": [
       [
-        "CMLT"
+        "BEXT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcltzs_f32",
+    "SIMD_ISA": "SVE2",
+    "name": "svbext[_u32]",
     "arguments": [
-      "float32_t a"
+      "svuint32_t op1",
+      "svuint32_t op2"
     ],
     "return_type": {
-      "value": "uint32_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Sn"
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
       }
     },
     "Architectures": [
@@ -14966,1138 +18244,1264 @@
     ],
     "instructions": [
       [
-        "FCMLT"
+        "BEXT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vclz_s16",
+    "SIMD_ISA": "SVE2",
+    "name": "svbext[_u64]",
     "arguments": [
-      "int16x4_t a"
+      "svuint64_t op1",
+      "svuint64_t op2"
     ],
     "return_type": {
-      "value": "int16x4_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4H"
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "CLZ"
+        "BEXT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vclz_s32",
+    "SIMD_ISA": "SVE2",
+    "name": "svbext[_u8]",
     "arguments": [
-      "int32x2_t a"
+      "svuint8_t op1",
+      "svuint8_t op2"
     ],
     "return_type": {
-      "value": "int32x2_t"
+      "value": "svuint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2S"
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "CLZ"
+        "BEXT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vclz_s8",
+    "SIMD_ISA": "SVE2",
+    "name": "svbgrp[_n_u16]",
     "arguments": [
-      "int8x8_t a"
+      "svuint16_t op1",
+      "uint16_t op2"
     ],
     "return_type": {
-      "value": "int8x8_t"
+      "value": "svuint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8B"
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "CLZ"
+        "BGRP"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vclz_u16",
+    "SIMD_ISA": "SVE2",
+    "name": "svbgrp[_n_u32]",
     "arguments": [
-      "uint16x4_t a"
+      "svuint32_t op1",
+      "uint32_t op2"
     ],
     "return_type": {
-      "value": "uint16x4_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4H"
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "CLZ"
+        "BGRP"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vclz_u32",
+    "SIMD_ISA": "SVE2",
+    "name": "svbgrp[_n_u64]",
     "arguments": [
-      "uint32x2_t a"
+      "svuint64_t op1",
+      "uint64_t op2"
     ],
     "return_type": {
-      "value": "uint32x2_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2S"
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "CLZ"
+        "BGRP"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vclz_u8",
+    "SIMD_ISA": "SVE2",
+    "name": "svbgrp[_n_u8]",
     "arguments": [
-      "uint8x8_t a"
+      "svuint8_t op1",
+      "uint8_t op2"
     ],
     "return_type": {
-      "value": "uint8x8_t"
+      "value": "svuint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8B"
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "CLZ"
+        "BGRP"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vclzq_s16",
+    "SIMD_ISA": "SVE2",
+    "name": "svbgrp[_u16]",
     "arguments": [
-      "int16x8_t a"
+      "svuint16_t op1",
+      "svuint16_t op2"
     ],
     "return_type": {
-      "value": "int16x8_t"
+      "value": "svuint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8H"
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "CLZ"
+        "BGRP"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vclzq_s32",
+    "SIMD_ISA": "SVE2",
+    "name": "svbgrp[_u32]",
     "arguments": [
-      "int32x4_t a"
+      "svuint32_t op1",
+      "svuint32_t op2"
     ],
     "return_type": {
-      "value": "int32x4_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4S"
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "CLZ"
+        "BGRP"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vclzq_s8",
+    "SIMD_ISA": "SVE2",
+    "name": "svbgrp[_u64]",
     "arguments": [
-      "int8x16_t a"
+      "svuint64_t op1",
+      "svuint64_t op2"
     ],
     "return_type": {
-      "value": "int8x16_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "CLZ"
+        "BGRP"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vclzq_u16",
+    "SIMD_ISA": "SVE2",
+    "name": "svbgrp[_u8]",
     "arguments": [
-      "uint16x8_t a"
+      "svuint8_t op1",
+      "svuint8_t op2"
     ],
     "return_type": {
-      "value": "uint16x8_t"
+      "value": "svuint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8H"
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "CLZ"
+        "BGRP"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vclzq_u32",
+    "SIMD_ISA": "SVE",
+    "name": "svbic[_b]_z",
     "arguments": [
-      "uint32x4_t a"
+      "svbool_t pg",
+      "svbool_t op1",
+      "svbool_t op2"
     ],
     "return_type": {
-      "value": "uint32x4_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4S"
+      "op1": {
+        "register": "Pop1.B"
+      },
+      "op2": {
+        "register": "Pop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "CLZ"
+        "BIC"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vclzq_u8",
+    "SIMD_ISA": "SVE",
+    "name": "svbic[_n_s16]_m",
     "arguments": [
-      "uint8x16_t a"
+      "svbool_t pg",
+      "svint16_t op1",
+      "int16_t op2"
     ],
     "return_type": {
-      "value": "uint8x16_t"
+      "value": "svint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "CLZ"
+        "BIC"
+      ],
+      [
+        "MOVPRFX",
+        "BIC"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcmla_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svbic[_n_s16]_x",
     "arguments": [
-      "float16x4_t r",
-      "float16x4_t a",
-      "float16x4_t b"
+      "svbool_t pg",
+      "svint16_t op1",
+      "int16_t op2"
     ],
     "return_type": {
-      "value": "float16x4_t"
+      "value": "svint16_t"
     },
     "Arguments_Preparation": {
-      "a": {},
-      "b": {},
-      "r": {
-        "register": "Vd.4H"
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCMLA"
+        "AND"
+      ],
+      [
+        "BIC"
+      ],
+      [
+        "BIC"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcmla_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svbic[_n_s16]_z",
     "arguments": [
-      "float32x2_t r",
-      "float32x2_t a",
-      "float32x2_t b"
+      "svbool_t pg",
+      "svint16_t op1",
+      "int16_t op2"
     ],
     "return_type": {
-      "value": "float32x2_t"
+      "value": "svint16_t"
     },
     "Arguments_Preparation": {
-      "a": {},
-      "b": {},
-      "r": {
-        "register": "Vd.2S"
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCMLA"
+        "MOVPRFX",
+        "BIC"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcmla_lane_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svbic[_n_s32]_m",
     "arguments": [
-      "float16x4_t r",
-      "float16x4_t a",
-      "float16x4_t b",
-      "const int lane"
+      "svbool_t pg",
+      "svint32_t op1",
+      "int32_t op2"
     ],
     "return_type": {
-      "value": "float16x4_t"
+      "value": "svint32_t"
     },
     "Arguments_Preparation": {
-      "a": {},
-      "b": {},
-      "lane": {
-        "minimum": 0,
-        "maximum": 1
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
       },
-      "r": {
-        "register": "Vd.4H"
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCMLA"
+        "BIC"
+      ],
+      [
+        "MOVPRFX",
+        "BIC"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcmla_lane_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svbic[_n_s32]_x",
     "arguments": [
-      "float32x2_t r",
-      "float32x2_t a",
-      "float32x2_t b",
-      "const int lane"
+      "svbool_t pg",
+      "svint32_t op1",
+      "int32_t op2"
     ],
     "return_type": {
-      "value": "float32x2_t"
+      "value": "svint32_t"
     },
     "Arguments_Preparation": {
-      "a": {},
-      "b": {},
-      "lane": {
-        "minimum": 0,
-        "maximum": 0
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
       },
-      "r": {
-        "register": "Vd.2S"
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCMLA"
+        "AND"
+      ],
+      [
+        "BIC"
+      ],
+      [
+        "BIC"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcmla_laneq_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svbic[_n_s32]_z",
     "arguments": [
-      "float16x4_t r",
-      "float16x4_t a",
-      "float16x8_t b",
-      "const int lane"
+      "svbool_t pg",
+      "svint32_t op1",
+      "int32_t op2"
     ],
     "return_type": {
-      "value": "float16x4_t"
+      "value": "svint32_t"
     },
     "Arguments_Preparation": {
-      "a": {},
-      "b": {},
-      "lane": {
-        "minimum": 0,
-        "maximum": 1
+      "op1": {
+        "register": "Zop1.S"
       },
-      "r": {
-        "register": "Vd.4H"
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCMLA"
+        "MOVPRFX",
+        "BIC"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcmla_laneq_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svbic[_n_s64]_m",
     "arguments": [
-      "float32x2_t r",
-      "float32x2_t a",
-      "float32x4_t b",
-      "const int lane"
+      "svbool_t pg",
+      "svint64_t op1",
+      "int64_t op2"
     ],
     "return_type": {
-      "value": "float32x2_t"
+      "value": "svint64_t"
     },
     "Arguments_Preparation": {
-      "a": {},
-      "b": {},
-      "lane": {
-        "minimum": 0,
-        "maximum": 1
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
       },
-      "r": {
-        "register": "Vd.2S"
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "DUP",
-        "FCMLA"
+        "BIC"
+      ],
+      [
+        "MOVPRFX",
+        "BIC"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcmla_rot180_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svbic[_n_s64]_x",
     "arguments": [
-      "float16x4_t r",
-      "float16x4_t a",
-      "float16x4_t b"
+      "svbool_t pg",
+      "svint64_t op1",
+      "int64_t op2"
     ],
     "return_type": {
-      "value": "float16x4_t"
+      "value": "svint64_t"
     },
     "Arguments_Preparation": {
-      "a": {},
-      "b": {},
-      "r": {
-        "register": "Vd.4H"
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCMLA"
+        "AND"
+      ],
+      [
+        "BIC"
+      ],
+      [
+        "BIC"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcmla_rot180_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svbic[_n_s64]_z",
     "arguments": [
-      "float32x2_t r",
-      "float32x2_t a",
-      "float32x2_t b"
+      "svbool_t pg",
+      "svint64_t op1",
+      "int64_t op2"
     ],
     "return_type": {
-      "value": "float32x2_t"
+      "value": "svint64_t"
     },
     "Arguments_Preparation": {
-      "a": {},
-      "b": {},
-      "r": {
-        "register": "Vd.2S"
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCMLA"
+        "MOVPRFX",
+        "BIC"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcmla_rot180_lane_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svbic[_n_s8]_m",
     "arguments": [
-      "float16x4_t r",
-      "float16x4_t a",
-      "float16x4_t b",
-      "const int lane"
+      "svbool_t pg",
+      "svint8_t op1",
+      "int8_t op2"
     ],
     "return_type": {
-      "value": "float16x4_t"
+      "value": "svint8_t"
     },
     "Arguments_Preparation": {
-      "a": {},
-      "b": {},
-      "lane": {
-        "minimum": 0,
-        "maximum": 1
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
       },
-      "r": {
-        "register": "Vd.4H"
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCMLA"
+        "BIC"
+      ],
+      [
+        "MOVPRFX",
+        "BIC"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcmla_rot180_lane_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svbic[_n_s8]_x",
     "arguments": [
-      "float32x2_t r",
-      "float32x2_t a",
-      "float32x2_t b",
-      "const int lane"
+      "svbool_t pg",
+      "svint8_t op1",
+      "int8_t op2"
     ],
     "return_type": {
-      "value": "float32x2_t"
+      "value": "svint8_t"
     },
     "Arguments_Preparation": {
-      "a": {},
-      "b": {},
-      "lane": {
-        "minimum": 0,
-        "maximum": 0
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
       },
-      "r": {
-        "register": "Vd.2S"
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCMLA"
+        "AND"
+      ],
+      [
+        "BIC"
+      ],
+      [
+        "BIC"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcmla_rot180_laneq_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svbic[_n_s8]_z",
     "arguments": [
-      "float16x4_t r",
-      "float16x4_t a",
-      "float16x8_t b",
-      "const int lane"
+      "svbool_t pg",
+      "svint8_t op1",
+      "int8_t op2"
     ],
     "return_type": {
-      "value": "float16x4_t"
+      "value": "svint8_t"
     },
     "Arguments_Preparation": {
-      "a": {},
-      "b": {},
-      "lane": {
-        "minimum": 0,
-        "maximum": 1
+      "op1": {
+        "register": "Zop1.B"
       },
-      "r": {
-        "register": "Vd.4H"
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCMLA"
+        "MOVPRFX",
+        "BIC"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcmla_rot180_laneq_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svbic[_n_u16]_m",
     "arguments": [
-      "float32x2_t r",
-      "float32x2_t a",
-      "float32x4_t b",
-      "const int lane"
+      "svbool_t pg",
+      "svuint16_t op1",
+      "uint16_t op2"
     ],
     "return_type": {
-      "value": "float32x2_t"
+      "value": "svuint16_t"
     },
     "Arguments_Preparation": {
-      "a": {},
-      "b": {},
-      "lane": {
-        "minimum": 0,
-        "maximum": 1
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
       },
-      "r": {
-        "register": "Vd.2S"
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "DUP",
-        "FCMLA"
+        "BIC"
+      ],
+      [
+        "MOVPRFX",
+        "BIC"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcmla_rot270_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svbic[_n_u16]_x",
     "arguments": [
-      "float16x4_t r",
-      "float16x4_t a",
-      "float16x4_t b"
+      "svbool_t pg",
+      "svuint16_t op1",
+      "uint16_t op2"
     ],
     "return_type": {
-      "value": "float16x4_t"
+      "value": "svuint16_t"
     },
     "Arguments_Preparation": {
-      "a": {},
-      "b": {},
-      "r": {
-        "register": "Vd.4H"
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCMLA"
+        "AND"
+      ],
+      [
+        "BIC"
+      ],
+      [
+        "BIC"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcmla_rot270_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svbic[_n_u16]_z",
     "arguments": [
-      "float32x2_t r",
-      "float32x2_t a",
-      "float32x2_t b"
+      "svbool_t pg",
+      "svuint16_t op1",
+      "uint16_t op2"
     ],
     "return_type": {
-      "value": "float32x2_t"
+      "value": "svuint16_t"
     },
     "Arguments_Preparation": {
-      "a": {},
-      "b": {},
-      "r": {
-        "register": "Vd.2S"
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCMLA"
+        "MOVPRFX",
+        "BIC"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcmla_rot270_lane_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svbic[_n_u32]_m",
     "arguments": [
-      "float16x4_t r",
-      "float16x4_t a",
-      "float16x4_t b",
-      "const int lane"
+      "svbool_t pg",
+      "svuint32_t op1",
+      "uint32_t op2"
     ],
     "return_type": {
-      "value": "float16x4_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {},
-      "b": {},
-      "lane": {
-        "minimum": 0,
-        "maximum": 1
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
       },
-      "r": {
-        "register": "Vd.4H"
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCMLA"
+        "BIC"
+      ],
+      [
+        "MOVPRFX",
+        "BIC"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcmla_rot270_lane_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svbic[_n_u32]_x",
     "arguments": [
-      "float32x2_t r",
-      "float32x2_t a",
-      "float32x2_t b",
-      "const int lane"
+      "svbool_t pg",
+      "svuint32_t op1",
+      "uint32_t op2"
     ],
     "return_type": {
-      "value": "float32x2_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {},
-      "b": {},
-      "lane": {
-        "minimum": 0,
-        "maximum": 0
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
       },
-      "r": {
-        "register": "Vd.2S"
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCMLA"
+        "AND"
+      ],
+      [
+        "BIC"
+      ],
+      [
+        "BIC"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcmla_rot270_laneq_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svbic[_n_u32]_z",
     "arguments": [
-      "float16x4_t r",
-      "float16x4_t a",
-      "float16x8_t b",
-      "const int lane"
+      "svbool_t pg",
+      "svuint32_t op1",
+      "uint32_t op2"
     ],
     "return_type": {
-      "value": "float16x4_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {},
-      "b": {},
-      "lane": {
-        "minimum": 0,
-        "maximum": 1
+      "op1": {
+        "register": "Zop1.S"
       },
-      "r": {
-        "register": "Vd.4H"
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCMLA"
+        "MOVPRFX",
+        "BIC"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcmla_rot270_laneq_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svbic[_n_u64]_m",
     "arguments": [
-      "float32x2_t r",
-      "float32x2_t a",
-      "float32x4_t b",
-      "const int lane"
+      "svbool_t pg",
+      "svuint64_t op1",
+      "uint64_t op2"
     ],
     "return_type": {
-      "value": "float32x2_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "a": {},
-      "b": {},
-      "lane": {
-        "minimum": 0,
-        "maximum": 1
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
       },
-      "r": {
-        "register": "Vd.2S"
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "DUP",
-        "FCMLA"
+        "BIC"
+      ],
+      [
+        "MOVPRFX",
+        "BIC"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcmla_rot90_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svbic[_n_u64]_x",
     "arguments": [
-      "float16x4_t r",
-      "float16x4_t a",
-      "float16x4_t b"
+      "svbool_t pg",
+      "svuint64_t op1",
+      "uint64_t op2"
     ],
     "return_type": {
-      "value": "float16x4_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "a": {},
-      "b": {},
-      "r": {
-        "register": "Vd.4H"
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCMLA"
+        "AND"
+      ],
+      [
+        "BIC"
+      ],
+      [
+        "BIC"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcmla_rot90_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svbic[_n_u64]_z",
     "arguments": [
-      "float32x2_t r",
-      "float32x2_t a",
-      "float32x2_t b"
+      "svbool_t pg",
+      "svuint64_t op1",
+      "uint64_t op2"
     ],
     "return_type": {
-      "value": "float32x2_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "a": {},
-      "b": {},
-      "r": {
-        "register": "Vd.2S"
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCMLA"
+        "MOVPRFX",
+        "BIC"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcmla_rot90_lane_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svbic[_n_u8]_m",
     "arguments": [
-      "float16x4_t r",
-      "float16x4_t a",
-      "float16x4_t b",
-      "const int lane"
+      "svbool_t pg",
+      "svuint8_t op1",
+      "uint8_t op2"
     ],
     "return_type": {
-      "value": "float16x4_t"
+      "value": "svuint8_t"
     },
     "Arguments_Preparation": {
-      "a": {},
-      "b": {},
-      "lane": {
-        "minimum": 0,
-        "maximum": 1
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
       },
-      "r": {
-        "register": "Vd.4H"
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCMLA"
+        "BIC"
+      ],
+      [
+        "MOVPRFX",
+        "BIC"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcmla_rot90_lane_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svbic[_n_u8]_x",
     "arguments": [
-      "float32x2_t r",
-      "float32x2_t a",
-      "float32x2_t b",
-      "const int lane"
+      "svbool_t pg",
+      "svuint8_t op1",
+      "uint8_t op2"
     ],
     "return_type": {
-      "value": "float32x2_t"
+      "value": "svuint8_t"
     },
     "Arguments_Preparation": {
-      "a": {},
-      "b": {},
-      "lane": {
-        "minimum": 0,
-        "maximum": 0
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
       },
-      "r": {
-        "register": "Vd.2S"
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCMLA"
+        "AND"
+      ],
+      [
+        "BIC"
+      ],
+      [
+        "BIC"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcmla_rot90_laneq_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svbic[_n_u8]_z",
     "arguments": [
-      "float16x4_t r",
-      "float16x4_t a",
-      "float16x8_t b",
-      "const int lane"
+      "svbool_t pg",
+      "svuint8_t op1",
+      "uint8_t op2"
     ],
     "return_type": {
-      "value": "float16x4_t"
+      "value": "svuint8_t"
     },
     "Arguments_Preparation": {
-      "a": {},
-      "b": {},
-      "lane": {
-        "minimum": 0,
-        "maximum": 1
+      "op1": {
+        "register": "Zop1.B"
       },
-      "r": {
-        "register": "Vd.4H"
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCMLA"
+        "MOVPRFX",
+        "BIC"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcmla_rot90_laneq_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svbic[_s16]_m",
     "arguments": [
-      "float32x2_t r",
-      "float32x2_t a",
-      "float32x4_t b",
-      "const int lane"
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2"
     ],
     "return_type": {
-      "value": "float32x2_t"
+      "value": "svint16_t"
     },
     "Arguments_Preparation": {
-      "a": {},
-      "b": {},
-      "lane": {
-        "minimum": 0,
-        "maximum": 1
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
       },
-      "r": {
-        "register": "Vd.2S"
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "DUP",
-        "FCMLA"
+        "BIC"
+      ],
+      [
+        "MOVPRFX",
+        "BIC"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcmlaq_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svbic[_s16]_x",
     "arguments": [
-      "float16x8_t r",
-      "float16x8_t a",
-      "float16x8_t b"
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2"
     ],
     "return_type": {
-      "value": "float16x8_t"
+      "value": "svint16_t"
     },
     "Arguments_Preparation": {
-      "a": {},
-      "b": {},
-      "r": {
-        "register": "Vd.8H"
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCMLA"
+        "BIC"
+      ],
+      [
+        "BIC"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcmlaq_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svbic[_s16]_z",
     "arguments": [
-      "float32x4_t r",
-      "float32x4_t a",
-      "float32x4_t b"
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2"
     ],
     "return_type": {
-      "value": "float32x4_t"
+      "value": "svint16_t"
     },
     "Arguments_Preparation": {
-      "a": {},
-      "b": {},
-      "r": {
-        "register": "Vd.4S"
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCMLA"
+        "MOVPRFX",
+        "BIC"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcmlaq_f64",
+    "SIMD_ISA": "SVE",
+    "name": "svbic[_s32]_m",
     "arguments": [
-      "float64x2_t r",
-      "float64x2_t a",
-      "float64x2_t b"
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2"
     ],
     "return_type": {
-      "value": "float64x2_t"
+      "value": "svint32_t"
     },
     "Arguments_Preparation": {
-      "a": {},
-      "b": {},
-      "r": {
-        "register": "Vd.2D"
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
@@ -16105,214 +19509,236 @@
     ],
     "instructions": [
       [
-        "FCMLA"
+        "BIC"
+      ],
+      [
+        "MOVPRFX",
+        "BIC"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcmlaq_lane_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svbic[_s32]_x",
     "arguments": [
-      "float16x8_t r",
-      "float16x8_t a",
-      "float16x4_t b",
-      "const int lane"
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2"
     ],
     "return_type": {
-      "value": "float16x8_t"
+      "value": "svint32_t"
     },
     "Arguments_Preparation": {
-      "a": {},
-      "b": {},
-      "lane": {
-        "minimum": 0,
-        "maximum": 1
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
       },
-      "r": {
-        "register": "Vd.8H"
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCMLA"
+        "BIC"
+      ],
+      [
+        "BIC"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcmlaq_lane_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svbic[_s32]_z",
     "arguments": [
-      "float32x4_t r",
-      "float32x4_t a",
-      "float32x2_t b",
-      "const int lane"
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2"
     ],
     "return_type": {
-      "value": "float32x4_t"
+      "value": "svint32_t"
     },
     "Arguments_Preparation": {
-      "a": {},
-      "b": {},
-      "lane": {
-        "minimum": 0,
-        "maximum": 0
+      "op1": {
+        "register": "Zop1.S"
       },
-      "r": {
-        "register": "Vd.4S"
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCMLA"
+        "MOVPRFX",
+        "BIC"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcmlaq_laneq_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svbic[_s64]_m",
     "arguments": [
-      "float16x8_t r",
-      "float16x8_t a",
-      "float16x8_t b",
-      "const int lane"
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2"
     ],
     "return_type": {
-      "value": "float16x8_t"
+      "value": "svint64_t"
     },
     "Arguments_Preparation": {
-      "a": {},
-      "b": {},
-      "lane": {
-        "minimum": 0,
-        "maximum": 3
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
       },
-      "r": {
-        "register": "Vd.8H"
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCMLA"
+        "BIC"
+      ],
+      [
+        "MOVPRFX",
+        "BIC"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcmlaq_laneq_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svbic[_s64]_x",
     "arguments": [
-      "float32x4_t r",
-      "float32x4_t a",
-      "float32x4_t b",
-      "const int lane"
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2"
     ],
     "return_type": {
-      "value": "float32x4_t"
+      "value": "svint64_t"
     },
     "Arguments_Preparation": {
-      "a": {},
-      "b": {},
-      "lane": {
-        "minimum": 0,
-        "maximum": 1
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
       },
-      "r": {
-        "register": "Vd.4S"
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCMLA"
+        "BIC"
+      ],
+      [
+        "BIC"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcmlaq_rot180_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svbic[_s64]_z",
     "arguments": [
-      "float16x8_t r",
-      "float16x8_t a",
-      "float16x8_t b"
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2"
     ],
     "return_type": {
-      "value": "float16x8_t"
+      "value": "svint64_t"
     },
     "Arguments_Preparation": {
-      "a": {},
-      "b": {},
-      "r": {
-        "register": "Vd.8H"
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCMLA"
+        "MOVPRFX",
+        "BIC"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcmlaq_rot180_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svbic[_s8]_m",
     "arguments": [
-      "float32x4_t r",
-      "float32x4_t a",
-      "float32x4_t b"
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2"
     ],
     "return_type": {
-      "value": "float32x4_t"
+      "value": "svint8_t"
     },
     "Arguments_Preparation": {
-      "a": {},
-      "b": {},
-      "r": {
-        "register": "Vd.4S"
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCMLA"
+        "BIC"
+      ],
+      [
+        "MOVPRFX",
+        "BIC"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcmlaq_rot180_f64",
+    "SIMD_ISA": "SVE",
+    "name": "svbic[_s8]_x",
     "arguments": [
-      "float64x2_t r",
-      "float64x2_t a",
-      "float64x2_t b"
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2"
     ],
     "return_type": {
-      "value": "float64x2_t"
+      "value": "svint8_t"
     },
     "Arguments_Preparation": {
-      "a": {},
-      "b": {},
-      "r": {
-        "register": "Vd.2D"
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
@@ -16320,214 +19746,235 @@
     ],
     "instructions": [
       [
-        "FCMLA"
+        "BIC"
+      ],
+      [
+        "BIC"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcmlaq_rot180_lane_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svbic[_s8]_z",
     "arguments": [
-      "float16x8_t r",
-      "float16x8_t a",
-      "float16x4_t b",
-      "const int lane"
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2"
     ],
     "return_type": {
-      "value": "float16x8_t"
+      "value": "svint8_t"
     },
     "Arguments_Preparation": {
-      "a": {},
-      "b": {},
-      "lane": {
-        "minimum": 0,
-        "maximum": 1
+      "op1": {
+        "register": "Zop1.B"
       },
-      "r": {
-        "register": "Vd.8H"
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCMLA"
+        "MOVPRFX",
+        "BIC"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcmlaq_rot180_lane_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svbic[_u16]_m",
     "arguments": [
-      "float32x4_t r",
-      "float32x4_t a",
-      "float32x2_t b",
-      "const int lane"
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2"
     ],
     "return_type": {
-      "value": "float32x4_t"
+      "value": "svuint16_t"
     },
     "Arguments_Preparation": {
-      "a": {},
-      "b": {},
-      "lane": {
-        "minimum": 0,
-        "maximum": 0
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
       },
-      "r": {
-        "register": "Vd.4S"
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCMLA"
+        "BIC"
+      ],
+      [
+        "MOVPRFX",
+        "BIC"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcmlaq_rot180_laneq_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svbic[_u16]_x",
     "arguments": [
-      "float16x8_t r",
-      "float16x8_t a",
-      "float16x8_t b",
-      "const int lane"
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2"
     ],
     "return_type": {
-      "value": "float16x8_t"
+      "value": "svuint16_t"
     },
     "Arguments_Preparation": {
-      "a": {},
-      "b": {},
-      "lane": {
-        "minimum": 0,
-        "maximum": 3
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
       },
-      "r": {
-        "register": "Vd.8H"
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCMLA"
+        "BIC"
+      ],
+      [
+        "BIC"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcmlaq_rot180_laneq_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svbic[_u16]_z",
     "arguments": [
-      "float32x4_t r",
-      "float32x4_t a",
-      "float32x4_t b",
-      "const int lane"
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2"
     ],
     "return_type": {
-      "value": "float32x4_t"
+      "value": "svuint16_t"
     },
     "Arguments_Preparation": {
-      "a": {},
-      "b": {},
-      "lane": {
-        "minimum": 0,
-        "maximum": 1
+      "op1": {
+        "register": "Zop1.H"
       },
-      "r": {
-        "register": "Vd.4S"
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCMLA"
+        "MOVPRFX",
+        "BIC"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcmlaq_rot270_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svbic[_u32]_m",
     "arguments": [
-      "float16x8_t r",
-      "float16x8_t a",
-      "float16x8_t b"
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2"
     ],
     "return_type": {
-      "value": "float16x8_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {},
-      "b": {},
-      "r": {
-        "register": "Vd.8H"
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCMLA"
+        "BIC"
+      ],
+      [
+        "MOVPRFX",
+        "BIC"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcmlaq_rot270_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svbic[_u32]_x",
     "arguments": [
-      "float32x4_t r",
-      "float32x4_t a",
-      "float32x4_t b"
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2"
     ],
     "return_type": {
-      "value": "float32x4_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {},
-      "b": {},
-      "r": {
-        "register": "Vd.4S"
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCMLA"
+        "BIC"
+      ],
+      [
+        "BIC"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcmlaq_rot270_f64",
+    "SIMD_ISA": "SVE",
+    "name": "svbic[_u32]_z",
     "arguments": [
-      "float64x2_t r",
-      "float64x2_t a",
-      "float64x2_t b"
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2"
     ],
     "return_type": {
-      "value": "float64x2_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {},
-      "b": {},
-      "r": {
-        "register": "Vd.2D"
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
@@ -16535,214 +19982,233 @@
     ],
     "instructions": [
       [
-        "FCMLA"
+        "MOVPRFX",
+        "BIC"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcmlaq_rot270_lane_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svbic[_u64]_m",
     "arguments": [
-      "float16x8_t r",
-      "float16x8_t a",
-      "float16x4_t b",
-      "const int lane"
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2"
     ],
     "return_type": {
-      "value": "float16x8_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "a": {},
-      "b": {},
-      "lane": {
-        "minimum": 0,
-        "maximum": 1
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
       },
-      "r": {
-        "register": "Vd.8H"
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCMLA"
+        "BIC"
+      ],
+      [
+        "MOVPRFX",
+        "BIC"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcmlaq_rot270_lane_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svbic[_u64]_x",
     "arguments": [
-      "float32x4_t r",
-      "float32x4_t a",
-      "float32x2_t b",
-      "const int lane"
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2"
     ],
     "return_type": {
-      "value": "float32x4_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "a": {},
-      "b": {},
-      "lane": {
-        "minimum": 0,
-        "maximum": 0
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
       },
-      "r": {
-        "register": "Vd.4S"
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCMLA"
+        "BIC"
+      ],
+      [
+        "BIC"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcmlaq_rot270_laneq_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svbic[_u64]_z",
     "arguments": [
-      "float16x8_t r",
-      "float16x8_t a",
-      "float16x8_t b",
-      "const int lane"
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2"
     ],
     "return_type": {
-      "value": "float16x8_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "a": {},
-      "b": {},
-      "lane": {
-        "minimum": 0,
-        "maximum": 3
+      "op1": {
+        "register": "Zop1.D"
       },
-      "r": {
-        "register": "Vd.8H"
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCMLA"
+        "MOVPRFX",
+        "BIC"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcmlaq_rot270_laneq_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svbic[_u8]_m",
     "arguments": [
-      "float32x4_t r",
-      "float32x4_t a",
-      "float32x4_t b",
-      "const int lane"
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2"
     ],
     "return_type": {
-      "value": "float32x4_t"
+      "value": "svuint8_t"
     },
     "Arguments_Preparation": {
-      "a": {},
-      "b": {},
-      "lane": {
-        "minimum": 0,
-        "maximum": 1
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
       },
-      "r": {
-        "register": "Vd.4S"
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCMLA"
+        "BIC"
+      ],
+      [
+        "MOVPRFX",
+        "BIC"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcmlaq_rot90_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svbic[_u8]_x",
     "arguments": [
-      "float16x8_t r",
-      "float16x8_t a",
-      "float16x8_t b"
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2"
     ],
     "return_type": {
-      "value": "float16x8_t"
+      "value": "svuint8_t"
     },
     "Arguments_Preparation": {
-      "a": {},
-      "b": {},
-      "r": {
-        "register": "Vd.8H"
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCMLA"
+        "BIC"
+      ],
+      [
+        "BIC"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcmlaq_rot90_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svbic[_u8]_z",
     "arguments": [
-      "float32x4_t r",
-      "float32x4_t a",
-      "float32x4_t b"
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2"
     ],
     "return_type": {
-      "value": "float32x4_t"
+      "value": "svuint8_t"
     },
     "Arguments_Preparation": {
-      "a": {},
-      "b": {},
-      "r": {
-        "register": "Vd.4S"
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCMLA"
+        "MOVPRFX",
+        "BIC"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcmlaq_rot90_f64",
+    "SIMD_ISA": "SVE",
+    "name": "svbrka[_b]_m",
     "arguments": [
-      "float64x2_t r",
-      "float64x2_t a",
-      "float64x2_t b"
+      "svbool_t inactive",
+      "svbool_t pg",
+      "svbool_t op"
     ],
     "return_type": {
-      "value": "float64x2_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {},
-      "b": {},
-      "r": {
-        "register": "Vd.2D"
+      "inactive": {
+        "register": "Ptied.B"
+      },
+      "op": {
+        "register": "Pop.B"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
@@ -16750,368 +20216,418 @@
     ],
     "instructions": [
       [
-        "FCMLA"
+        "BRKA"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcmlaq_rot90_lane_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svbrka[_b]_z",
     "arguments": [
-      "float16x8_t r",
-      "float16x8_t a",
-      "float16x4_t b",
-      "const int lane"
+      "svbool_t pg",
+      "svbool_t op"
     ],
     "return_type": {
-      "value": "float16x8_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {},
-      "b": {},
-      "lane": {
-        "minimum": 0,
-        "maximum": 1
+      "op": {
+        "register": "Pop.B"
       },
-      "r": {
-        "register": "Vd.8H"
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCMLA"
+        "BRKA"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcmlaq_rot90_lane_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svbrkb[_b]_m",
     "arguments": [
-      "float32x4_t r",
-      "float32x4_t a",
-      "float32x2_t b",
-      "const int lane"
+      "svbool_t inactive",
+      "svbool_t pg",
+      "svbool_t op"
     ],
     "return_type": {
-      "value": "float32x4_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {},
-      "b": {},
-      "lane": {
-        "minimum": 0,
-        "maximum": 0
+      "inactive": {
+        "register": "Ptied.B"
       },
-      "r": {
-        "register": "Vd.4S"
+      "op": {
+        "register": "Pop.B"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCMLA"
+        "BRKB"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcmlaq_rot90_laneq_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svbrkb[_b]_z",
     "arguments": [
-      "float16x8_t r",
-      "float16x8_t a",
-      "float16x8_t b",
-      "const int lane"
+      "svbool_t pg",
+      "svbool_t op"
     ],
     "return_type": {
-      "value": "float16x8_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {},
-      "b": {},
-      "lane": {
-        "minimum": 0,
-        "maximum": 3
+      "op": {
+        "register": "Pop.B"
       },
-      "r": {
-        "register": "Vd.8H"
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCMLA"
+        "BRKB"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcmlaq_rot90_laneq_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svbrkn[_b]_z",
     "arguments": [
-      "float32x4_t r",
-      "float32x4_t a",
-      "float32x4_t b",
-      "const int lane"
+      "svbool_t pg",
+      "svbool_t op1",
+      "svbool_t op2"
     ],
     "return_type": {
-      "value": "float32x4_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {},
-      "b": {},
-      "lane": {
-        "minimum": 0,
-        "maximum": 1
+      "op1": {
+        "register": "Pop1.B"
       },
-      "r": {
-        "register": "Vd.4S"
+      "op2": {
+        "register": "Ptied2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCMLA"
+        "BRKN"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcnt_p8",
+    "SIMD_ISA": "SVE",
+    "name": "svbrkpa[_b]_z",
     "arguments": [
-      "poly8x8_t a"
+      "svbool_t pg",
+      "svbool_t op1",
+      "svbool_t op2"
     ],
     "return_type": {
-      "value": "poly8x8_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8B"
+      "op1": {
+        "register": "Pop1.B"
+      },
+      "op2": {
+        "register": "Pop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "CNT"
+        "BRKPA"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcnt_s8",
+    "SIMD_ISA": "SVE",
+    "name": "svbrkpb[_b]_z",
     "arguments": [
-      "int8x8_t a"
+      "svbool_t pg",
+      "svbool_t op1",
+      "svbool_t op2"
     ],
     "return_type": {
-      "value": "int8x8_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8B"
+      "op1": {
+        "register": "Pop1.B"
+      },
+      "op2": {
+        "register": "Pop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "CNT"
+        "BRKPB"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcnt_u8",
+    "SIMD_ISA": "SVE2",
+    "name": "svbsl1n[_n_s16]",
     "arguments": [
-      "uint8x8_t a"
+      "svint16_t op1",
+      "svint16_t op2",
+      "int16_t op3"
     ],
     "return_type": {
-      "value": "uint8x8_t"
+      "value": "svint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8B"
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H[*]"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "CNT"
+        "BSL1N"
+      ],
+      [
+        "MOVPRFX",
+        "BSL1N"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcntq_p8",
+    "SIMD_ISA": "SVE2",
+    "name": "svbsl1n[_n_s32]",
     "arguments": [
-      "poly8x16_t a"
+      "svint32_t op1",
+      "svint32_t op2",
+      "int32_t op3"
     ],
     "return_type": {
-      "value": "poly8x16_t"
+      "value": "svint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S[*]"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "CNT"
+        "BSL1N"
+      ],
+      [
+        "MOVPRFX",
+        "BSL1N"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcntq_s8",
+    "SIMD_ISA": "SVE2",
+    "name": "svbsl1n[_n_s64]",
     "arguments": [
-      "int8x16_t a"
+      "svint64_t op1",
+      "svint64_t op2",
+      "int64_t op3"
     ],
     "return_type": {
-      "value": "int8x16_t"
+      "value": "svint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D[*]"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "CNT"
+        "BSL1N"
+      ],
+      [
+        "MOVPRFX",
+        "BSL1N"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcntq_u8",
+    "SIMD_ISA": "SVE2",
+    "name": "svbsl1n[_n_s8]",
     "arguments": [
-      "uint8x16_t a"
+      "svint8_t op1",
+      "svint8_t op2",
+      "int8_t op3"
     ],
     "return_type": {
-      "value": "uint8x16_t"
+      "value": "svint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B[*]"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "CNT"
+        "BSL1N"
+      ],
+      [
+        "MOVPRFX",
+        "BSL1N"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcombine_f16",
+    "SIMD_ISA": "SVE2",
+    "name": "svbsl1n[_n_u16]",
     "arguments": [
-      "float16x4_t low",
-      "float16x4_t high"
+      "svuint16_t op1",
+      "svuint16_t op2",
+      "uint16_t op3"
     ],
     "return_type": {
-      "value": "float16x8_t"
+      "value": "svuint16_t"
     },
     "Arguments_Preparation": {
-      "high": {
-        "register": "Vm.4H"
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
       },
-      "low": {
-        "register": "Vn.4H"
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H[*]"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "DUP",
-        "INS"
+        "BSL1N"
+      ],
+      [
+        "MOVPRFX",
+        "BSL1N"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcombine_f32",
+    "SIMD_ISA": "SVE2",
+    "name": "svbsl1n[_n_u32]",
     "arguments": [
-      "float32x2_t low",
-      "float32x2_t high"
+      "svuint32_t op1",
+      "svuint32_t op2",
+      "uint32_t op3"
     ],
     "return_type": {
-      "value": "float32x4_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "high": {
-        "register": "Vm.2S"
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
       },
-      "low": {
-        "register": "Vn.2S"
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S[*]"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "DUP",
-        "INS"
+        "BSL1N"
+      ],
+      [
+        "MOVPRFX",
+        "BSL1N"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcombine_f64",
+    "SIMD_ISA": "SVE2",
+    "name": "svbsl1n[_n_u64]",
     "arguments": [
-      "float64x1_t low",
-      "float64x1_t high"
+      "svuint64_t op1",
+      "svuint64_t op2",
+      "uint64_t op3"
     ],
     "return_type": {
-      "value": "float64x2_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "high": {
-        "register": "Vm.1D"
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
       },
-      "low": {
-        "register": "Vn.1D"
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D[*]"
       }
     },
     "Architectures": [
@@ -17119,366 +20635,419 @@
     ],
     "instructions": [
       [
-        "DUP",
-        "INS"
+        "BSL1N"
+      ],
+      [
+        "MOVPRFX",
+        "BSL1N"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcombine_p16",
+    "SIMD_ISA": "SVE2",
+    "name": "svbsl1n[_n_u8]",
     "arguments": [
-      "poly16x4_t low",
-      "poly16x4_t high"
+      "svuint8_t op1",
+      "svuint8_t op2",
+      "uint8_t op3"
     ],
     "return_type": {
-      "value": "poly16x8_t"
+      "value": "svuint8_t"
     },
     "Arguments_Preparation": {
-      "high": {
-        "register": "Vm.4H"
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
       },
-      "low": {
-        "register": "Vn.4H"
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B[*]"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "DUP",
-        "INS"
+        "BSL1N"
+      ],
+      [
+        "MOVPRFX",
+        "BSL1N"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcombine_p64",
+    "SIMD_ISA": "SVE2",
+    "name": "svbsl1n[_s16]",
     "arguments": [
-      "poly64x1_t low",
-      "poly64x1_t high"
+      "svint16_t op1",
+      "svint16_t op2",
+      "svint16_t op3"
     ],
     "return_type": {
-      "value": "poly64x2_t"
+      "value": "svint16_t"
     },
     "Arguments_Preparation": {
-      "high": {
-        "register": "Vm.1D"
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
       },
-      "low": {
-        "register": "Vn.1D"
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "DUP",
-        "INS"
+        "BSL1N"
+      ],
+      [
+        "MOVPRFX",
+        "BSL1N"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcombine_p8",
+    "SIMD_ISA": "SVE2",
+    "name": "svbsl1n[_s32]",
     "arguments": [
-      "poly8x8_t low",
-      "poly8x8_t high"
+      "svint32_t op1",
+      "svint32_t op2",
+      "svint32_t op3"
     ],
     "return_type": {
-      "value": "poly8x16_t"
+      "value": "svint32_t"
     },
     "Arguments_Preparation": {
-      "high": {
-        "register": "Vm.8B"
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
       },
-      "low": {
-        "register": "Vn.8B"
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "DUP",
-        "INS"
+        "BSL1N"
+      ],
+      [
+        "MOVPRFX",
+        "BSL1N"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcombine_s16",
+    "SIMD_ISA": "SVE2",
+    "name": "svbsl1n[_s64]",
     "arguments": [
-      "int16x4_t low",
-      "int16x4_t high"
+      "svint64_t op1",
+      "svint64_t op2",
+      "svint64_t op3"
     ],
     "return_type": {
-      "value": "int16x8_t"
+      "value": "svint64_t"
     },
     "Arguments_Preparation": {
-      "high": {
-        "register": "Vm.4H"
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
       },
-      "low": {
-        "register": "Vn.4H"
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "DUP",
-        "INS"
+        "BSL1N"
+      ],
+      [
+        "MOVPRFX",
+        "BSL1N"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcombine_s32",
+    "SIMD_ISA": "SVE2",
+    "name": "svbsl1n[_s8]",
     "arguments": [
-      "int32x2_t low",
-      "int32x2_t high"
+      "svint8_t op1",
+      "svint8_t op2",
+      "svint8_t op3"
     ],
     "return_type": {
-      "value": "int32x4_t"
+      "value": "svint8_t"
     },
     "Arguments_Preparation": {
-      "high": {
-        "register": "Vm.2S"
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
       },
-      "low": {
-        "register": "Vn.2S"
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "DUP",
-        "INS"
+        "BSL1N"
+      ],
+      [
+        "MOVPRFX",
+        "BSL1N"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcombine_s64",
+    "SIMD_ISA": "SVE2",
+    "name": "svbsl1n[_u16]",
     "arguments": [
-      "int64x1_t low",
-      "int64x1_t high"
+      "svuint16_t op1",
+      "svuint16_t op2",
+      "svuint16_t op3"
     ],
     "return_type": {
-      "value": "int64x2_t"
+      "value": "svuint16_t"
     },
     "Arguments_Preparation": {
-      "high": {
-        "register": "Vm.1D"
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
       },
-      "low": {
-        "register": "Vn.1D"
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "DUP",
-        "INS"
+        "BSL1N"
+      ],
+      [
+        "MOVPRFX",
+        "BSL1N"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcombine_s8",
+    "SIMD_ISA": "SVE2",
+    "name": "svbsl1n[_u32]",
     "arguments": [
-      "int8x8_t low",
-      "int8x8_t high"
+      "svuint32_t op1",
+      "svuint32_t op2",
+      "svuint32_t op3"
     ],
     "return_type": {
-      "value": "int8x16_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "high": {
-        "register": "Vm.8B"
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
       },
-      "low": {
-        "register": "Vn.8B"
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "DUP",
-        "INS"
+        "BSL1N"
+      ],
+      [
+        "MOVPRFX",
+        "BSL1N"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcombine_u16",
+    "SIMD_ISA": "SVE2",
+    "name": "svbsl1n[_u64]",
     "arguments": [
-      "uint16x4_t low",
-      "uint16x4_t high"
+      "svuint64_t op1",
+      "svuint64_t op2",
+      "svuint64_t op3"
     ],
     "return_type": {
-      "value": "uint16x8_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "high": {
-        "register": "Vm.4H"
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
       },
-      "low": {
-        "register": "Vn.4H"
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "DUP",
-        "INS"
+        "BSL1N"
+      ],
+      [
+        "MOVPRFX",
+        "BSL1N"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcombine_u32",
+    "SIMD_ISA": "SVE2",
+    "name": "svbsl1n[_u8]",
     "arguments": [
-      "uint32x2_t low",
-      "uint32x2_t high"
+      "svuint8_t op1",
+      "svuint8_t op2",
+      "svuint8_t op3"
     ],
     "return_type": {
-      "value": "uint32x4_t"
+      "value": "svuint8_t"
     },
     "Arguments_Preparation": {
-      "high": {
-        "register": "Vm.2S"
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
       },
-      "low": {
-        "register": "Vn.2S"
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "DUP",
-        "INS"
+        "BSL1N"
+      ],
+      [
+        "MOVPRFX",
+        "BSL1N"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcombine_u64",
+    "SIMD_ISA": "SVE2",
+    "name": "svbsl2n[_n_s16]",
     "arguments": [
-      "uint64x1_t low",
-      "uint64x1_t high"
+      "svint16_t op1",
+      "svint16_t op2",
+      "int16_t op3"
     ],
     "return_type": {
-      "value": "uint64x2_t"
+      "value": "svint16_t"
     },
     "Arguments_Preparation": {
-      "high": {
-        "register": "Vm.1D"
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
       },
-      "low": {
-        "register": "Vn.1D"
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H[*]"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "DUP",
-        "INS"
+        "BSL2N"
+      ],
+      [
+        "MOVPRFX",
+        "BSL2N"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcombine_u8",
+    "SIMD_ISA": "SVE2",
+    "name": "svbsl2n[_n_s32]",
     "arguments": [
-      "uint8x8_t low",
-      "uint8x8_t high"
+      "svint32_t op1",
+      "svint32_t op2",
+      "int32_t op3"
     ],
     "return_type": {
-      "value": "uint8x16_t"
+      "value": "svint32_t"
     },
     "Arguments_Preparation": {
-      "high": {
-        "register": "Vm.8B"
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
       },
-      "low": {
-        "register": "Vn.8B"
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S[*]"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "DUP",
-        "INS"
+        "BSL2N"
+      ],
+      [
+        "MOVPRFX",
+        "BSL2N"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcopy_lane_f32",
+    "SIMD_ISA": "SVE2",
+    "name": "svbsl2n[_n_s64]",
     "arguments": [
-      "float32x2_t a",
-      "const int lane1",
-      "float32x2_t b",
-      "const int lane2"
+      "svint64_t op1",
+      "svint64_t op2",
+      "int64_t op3"
     ],
     "return_type": {
-      "value": "float32x2_t"
+      "value": "svint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.2S"
-      },
-      "b": {
-        "register": "Vn.2S"
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
       },
-      "lane1": {
-        "minimum": 0,
-        "maximum": 1
+      "op2": {
+        "register": "Zop2.D"
       },
-      "lane2": {
-        "minimum": 0,
-        "maximum": 1
+      "op3": {
+        "register": "Zop3.D[*]"
       }
     },
     "Architectures": [
@@ -17486,36 +21055,34 @@
     ],
     "instructions": [
       [
-        "INS"
+        "BSL2N"
+      ],
+      [
+        "MOVPRFX",
+        "BSL2N"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcopy_lane_f64",
+    "SIMD_ISA": "SVE2",
+    "name": "svbsl2n[_n_s8]",
     "arguments": [
-      "float64x1_t a",
-      "const int lane1",
-      "float64x1_t b",
-      "const int lane2"
+      "svint8_t op1",
+      "svint8_t op2",
+      "int8_t op3"
     ],
     "return_type": {
-      "value": "float64x1_t"
+      "value": "svint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "UNUSED"
-      },
-      "b": {
-        "register": "Vn.1D"
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
       },
-      "lane1": {
-        "minimum": 0,
-        "maximum": 0
+      "op2": {
+        "register": "Zop2.B"
       },
-      "lane2": {
-        "minimum": 0,
-        "maximum": 0
+      "op3": {
+        "register": "Zop3.B[*]"
       }
     },
     "Architectures": [
@@ -17523,36 +21090,34 @@
     ],
     "instructions": [
       [
-        "DUP"
+        "BSL2N"
+      ],
+      [
+        "MOVPRFX",
+        "BSL2N"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcopy_lane_p16",
+    "SIMD_ISA": "SVE2",
+    "name": "svbsl2n[_n_u16]",
     "arguments": [
-      "poly16x4_t a",
-      "const int lane1",
-      "poly16x4_t b",
-      "const int lane2"
+      "svuint16_t op1",
+      "svuint16_t op2",
+      "uint16_t op3"
     ],
     "return_type": {
-      "value": "poly16x4_t"
+      "value": "svuint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.4H"
-      },
-      "b": {
-        "register": "Vn.4H"
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
       },
-      "lane1": {
-        "minimum": 0,
-        "maximum": 3
+      "op2": {
+        "register": "Zop2.H"
       },
-      "lane2": {
-        "minimum": 0,
-        "maximum": 3
+      "op3": {
+        "register": "Zop3.H[*]"
       }
     },
     "Architectures": [
@@ -17560,74 +21125,69 @@
     ],
     "instructions": [
       [
-        "INS"
+        "BSL2N"
+      ],
+      [
+        "MOVPRFX",
+        "BSL2N"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcopy_lane_p64",
+    "SIMD_ISA": "SVE2",
+    "name": "svbsl2n[_n_u32]",
     "arguments": [
-      "poly64x1_t a",
-      "const int lane1",
-      "poly64x1_t b",
-      "const int lane2"
+      "svuint32_t op1",
+      "svuint32_t op2",
+      "uint32_t op3"
     ],
     "return_type": {
-      "value": "poly64x1_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "UNUSED"
-      },
-      "b": {
-        "register": "Vn.1D"
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
       },
-      "lane1": {
-        "minimum": 0,
-        "maximum": 0
+      "op2": {
+        "register": "Zop2.S"
       },
-      "lane2": {
-        "minimum": 0,
-        "maximum": 0
+      "op3": {
+        "register": "Zop3.S[*]"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "DUP"
+        "BSL2N"
+      ],
+      [
+        "MOVPRFX",
+        "BSL2N"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcopy_lane_p8",
+    "SIMD_ISA": "SVE2",
+    "name": "svbsl2n[_n_u64]",
     "arguments": [
-      "poly8x8_t a",
-      "const int lane1",
-      "poly8x8_t b",
-      "const int lane2"
+      "svuint64_t op1",
+      "svuint64_t op2",
+      "uint64_t op3"
     ],
     "return_type": {
-      "value": "poly8x8_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.8B"
-      },
-      "b": {
-        "register": "Vn.8B"
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
       },
-      "lane1": {
-        "minimum": 0,
-        "maximum": 7
+      "op2": {
+        "register": "Zop2.D"
       },
-      "lane2": {
-        "minimum": 0,
-        "maximum": 7
+      "op3": {
+        "register": "Zop3.D[*]"
       }
     },
     "Architectures": [
@@ -17635,36 +21195,34 @@
     ],
     "instructions": [
       [
-        "INS"
+        "BSL2N"
+      ],
+      [
+        "MOVPRFX",
+        "BSL2N"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcopy_lane_s16",
+    "SIMD_ISA": "SVE2",
+    "name": "svbsl2n[_n_u8]",
     "arguments": [
-      "int16x4_t a",
-      "const int lane1",
-      "int16x4_t b",
-      "const int lane2"
+      "svuint8_t op1",
+      "svuint8_t op2",
+      "uint8_t op3"
     ],
     "return_type": {
-      "value": "int16x4_t"
+      "value": "svuint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.4H"
-      },
-      "b": {
-        "register": "Vn.4H"
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
       },
-      "lane1": {
-        "minimum": 0,
-        "maximum": 3
+      "op2": {
+        "register": "Zop2.B"
       },
-      "lane2": {
-        "minimum": 0,
-        "maximum": 3
+      "op3": {
+        "register": "Zop3.B[*]"
       }
     },
     "Architectures": [
@@ -17672,36 +21230,34 @@
     ],
     "instructions": [
       [
-        "INS"
+        "BSL2N"
+      ],
+      [
+        "MOVPRFX",
+        "BSL2N"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcopy_lane_s32",
+    "SIMD_ISA": "SVE2",
+    "name": "svbsl2n[_s16]",
     "arguments": [
-      "int32x2_t a",
-      "const int lane1",
-      "int32x2_t b",
-      "const int lane2"
+      "svint16_t op1",
+      "svint16_t op2",
+      "svint16_t op3"
     ],
     "return_type": {
-      "value": "int32x2_t"
+      "value": "svint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.2S"
-      },
-      "b": {
-        "register": "Vn.2S"
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
       },
-      "lane1": {
-        "minimum": 0,
-        "maximum": 1
+      "op2": {
+        "register": "Zop2.H"
       },
-      "lane2": {
-        "minimum": 0,
-        "maximum": 1
+      "op3": {
+        "register": "Zop3.H"
       }
     },
     "Architectures": [
@@ -17709,36 +21265,34 @@
     ],
     "instructions": [
       [
-        "INS"
+        "BSL2N"
+      ],
+      [
+        "MOVPRFX",
+        "BSL2N"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcopy_lane_s64",
+    "SIMD_ISA": "SVE2",
+    "name": "svbsl2n[_s32]",
     "arguments": [
-      "int64x1_t a",
-      "const int lane1",
-      "int64x1_t b",
-      "const int lane2"
+      "svint32_t op1",
+      "svint32_t op2",
+      "svint32_t op3"
     ],
     "return_type": {
-      "value": "int64x1_t"
+      "value": "svint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "UNUSED"
-      },
-      "b": {
-        "register": "Vn.1D"
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
       },
-      "lane1": {
-        "minimum": 0,
-        "maximum": 0
+      "op2": {
+        "register": "Zop2.S"
       },
-      "lane2": {
-        "minimum": 0,
-        "maximum": 0
+      "op3": {
+        "register": "Zop3.S"
       }
     },
     "Architectures": [
@@ -17746,36 +21300,34 @@
     ],
     "instructions": [
       [
-        "DUP"
+        "BSL2N"
+      ],
+      [
+        "MOVPRFX",
+        "BSL2N"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcopy_lane_s8",
+    "SIMD_ISA": "SVE2",
+    "name": "svbsl2n[_s64]",
     "arguments": [
-      "int8x8_t a",
-      "const int lane1",
-      "int8x8_t b",
-      "const int lane2"
+      "svint64_t op1",
+      "svint64_t op2",
+      "svint64_t op3"
     ],
     "return_type": {
-      "value": "int8x8_t"
+      "value": "svint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.8B"
-      },
-      "b": {
-        "register": "Vn.8B"
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
       },
-      "lane1": {
-        "minimum": 0,
-        "maximum": 7
+      "op2": {
+        "register": "Zop2.D"
       },
-      "lane2": {
-        "minimum": 0,
-        "maximum": 7
+      "op3": {
+        "register": "Zop3.D"
       }
     },
     "Architectures": [
@@ -17783,36 +21335,34 @@
     ],
     "instructions": [
       [
-        "INS"
+        "BSL2N"
+      ],
+      [
+        "MOVPRFX",
+        "BSL2N"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcopy_lane_u16",
+    "SIMD_ISA": "SVE2",
+    "name": "svbsl2n[_s8]",
     "arguments": [
-      "uint16x4_t a",
-      "const int lane1",
-      "uint16x4_t b",
-      "const int lane2"
+      "svint8_t op1",
+      "svint8_t op2",
+      "svint8_t op3"
     ],
     "return_type": {
-      "value": "uint16x4_t"
+      "value": "svint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.4H"
-      },
-      "b": {
-        "register": "Vn.4H"
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
       },
-      "lane1": {
-        "minimum": 0,
-        "maximum": 3
+      "op2": {
+        "register": "Zop2.B"
       },
-      "lane2": {
-        "minimum": 0,
-        "maximum": 3
+      "op3": {
+        "register": "Zop3.B"
       }
     },
     "Architectures": [
@@ -17820,36 +21370,34 @@
     ],
     "instructions": [
       [
-        "INS"
+        "BSL2N"
+      ],
+      [
+        "MOVPRFX",
+        "BSL2N"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcopy_lane_u32",
+    "SIMD_ISA": "SVE2",
+    "name": "svbsl2n[_u16]",
     "arguments": [
-      "uint32x2_t a",
-      "const int lane1",
-      "uint32x2_t b",
-      "const int lane2"
+      "svuint16_t op1",
+      "svuint16_t op2",
+      "svuint16_t op3"
     ],
     "return_type": {
-      "value": "uint32x2_t"
+      "value": "svuint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.2S"
-      },
-      "b": {
-        "register": "Vn.2S"
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
       },
-      "lane1": {
-        "minimum": 0,
-        "maximum": 1
+      "op2": {
+        "register": "Zop2.H"
       },
-      "lane2": {
-        "minimum": 0,
-        "maximum": 1
+      "op3": {
+        "register": "Zop3.H"
       }
     },
     "Architectures": [
@@ -17857,36 +21405,34 @@
     ],
     "instructions": [
       [
-        "INS"
+        "BSL2N"
+      ],
+      [
+        "MOVPRFX",
+        "BSL2N"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcopy_lane_u64",
+    "SIMD_ISA": "SVE2",
+    "name": "svbsl2n[_u32]",
     "arguments": [
-      "uint64x1_t a",
-      "const int lane1",
-      "uint64x1_t b",
-      "const int lane2"
+      "svuint32_t op1",
+      "svuint32_t op2",
+      "svuint32_t op3"
     ],
     "return_type": {
-      "value": "uint64x1_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "UNUSED"
-      },
-      "b": {
-        "register": "Vn.1D"
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
       },
-      "lane1": {
-        "minimum": 0,
-        "maximum": 0
+      "op2": {
+        "register": "Zop2.S"
       },
-      "lane2": {
-        "minimum": 0,
-        "maximum": 0
+      "op3": {
+        "register": "Zop3.S"
       }
     },
     "Architectures": [
@@ -17894,36 +21440,34 @@
     ],
     "instructions": [
       [
-        "DUP"
+        "BSL2N"
+      ],
+      [
+        "MOVPRFX",
+        "BSL2N"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcopy_lane_u8",
+    "SIMD_ISA": "SVE2",
+    "name": "svbsl2n[_u64]",
     "arguments": [
-      "uint8x8_t a",
-      "const int lane1",
-      "uint8x8_t b",
-      "const int lane2"
+      "svuint64_t op1",
+      "svuint64_t op2",
+      "svuint64_t op3"
     ],
     "return_type": {
-      "value": "uint8x8_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.8B"
-      },
-      "b": {
-        "register": "Vn.8B"
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
       },
-      "lane1": {
-        "minimum": 0,
-        "maximum": 7
+      "op2": {
+        "register": "Zop2.D"
       },
-      "lane2": {
-        "minimum": 0,
-        "maximum": 7
+      "op3": {
+        "register": "Zop3.D"
       }
     },
     "Architectures": [
@@ -17931,36 +21475,34 @@
     ],
     "instructions": [
       [
-        "INS"
+        "BSL2N"
+      ],
+      [
+        "MOVPRFX",
+        "BSL2N"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcopy_laneq_f32",
+    "SIMD_ISA": "SVE2",
+    "name": "svbsl2n[_u8]",
     "arguments": [
-      "float32x2_t a",
-      "const int lane1",
-      "float32x4_t b",
-      "const int lane2"
+      "svuint8_t op1",
+      "svuint8_t op2",
+      "svuint8_t op3"
     ],
     "return_type": {
-      "value": "float32x2_t"
+      "value": "svuint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.2S"
-      },
-      "b": {
-        "register": "Vn.4S"
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
       },
-      "lane1": {
-        "minimum": 0,
-        "maximum": 1
+      "op2": {
+        "register": "Zop2.B"
       },
-      "lane2": {
-        "minimum": 0,
-        "maximum": 3
+      "op3": {
+        "register": "Zop3.B"
       }
     },
     "Architectures": [
@@ -17968,36 +21510,34 @@
     ],
     "instructions": [
       [
-        "INS"
+        "BSL2N"
+      ],
+      [
+        "MOVPRFX",
+        "BSL2N"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcopy_laneq_f64",
+    "SIMD_ISA": "SVE2",
+    "name": "svbsl[_n_s16]",
     "arguments": [
-      "float64x1_t a",
-      "const int lane1",
-      "float64x2_t b",
-      "const int lane2"
+      "svint16_t op1",
+      "svint16_t op2",
+      "int16_t op3"
     ],
     "return_type": {
-      "value": "float64x1_t"
+      "value": "svint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "UNUSED"
-      },
-      "b": {
-        "register": "Vn.2D"
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
       },
-      "lane1": {
-        "minimum": 0,
-        "maximum": 0
+      "op2": {
+        "register": "Zop2.H"
       },
-      "lane2": {
-        "minimum": 0,
-        "maximum": 1
+      "op3": {
+        "register": "Zop3.H[*]"
       }
     },
     "Architectures": [
@@ -18005,36 +21545,34 @@
     ],
     "instructions": [
       [
-        "DUP"
+        "BSL"
+      ],
+      [
+        "MOVPRFX",
+        "BSL"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcopy_laneq_p16",
+    "SIMD_ISA": "SVE2",
+    "name": "svbsl[_n_s32]",
     "arguments": [
-      "poly16x4_t a",
-      "const int lane1",
-      "poly16x8_t b",
-      "const int lane2"
+      "svint32_t op1",
+      "svint32_t op2",
+      "int32_t op3"
     ],
     "return_type": {
-      "value": "poly16x4_t"
+      "value": "svint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.4H"
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
       },
-      "b": {
-        "register": "Vn.8H"
-      },
-      "lane1": {
-        "minimum": 0,
-        "maximum": 3
+      "op2": {
+        "register": "Zop2.S"
       },
-      "lane2": {
-        "minimum": 0,
-        "maximum": 7
+      "op3": {
+        "register": "Zop3.S[*]"
       }
     },
     "Architectures": [
@@ -18042,74 +21580,69 @@
     ],
     "instructions": [
       [
-        "INS"
+        "BSL"
+      ],
+      [
+        "MOVPRFX",
+        "BSL"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcopy_laneq_p64",
+    "SIMD_ISA": "SVE2",
+    "name": "svbsl[_n_s64]",
     "arguments": [
-      "poly64x1_t a",
-      "const int lane1",
-      "poly64x2_t b",
-      "const int lane2"
+      "svint64_t op1",
+      "svint64_t op2",
+      "int64_t op3"
     ],
     "return_type": {
-      "value": "poly64x1_t"
+      "value": "svint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "UNUSED"
-      },
-      "b": {
-        "register": "Vn.2D"
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
       },
-      "lane1": {
-        "minimum": 0,
-        "maximum": 0
+      "op2": {
+        "register": "Zop2.D"
       },
-      "lane2": {
-        "minimum": 0,
-        "maximum": 1
+      "op3": {
+        "register": "Zop3.D[*]"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "DUP"
+        "BSL"
+      ],
+      [
+        "MOVPRFX",
+        "BSL"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcopy_laneq_p8",
+    "SIMD_ISA": "SVE2",
+    "name": "svbsl[_n_s8]",
     "arguments": [
-      "poly8x8_t a",
-      "const int lane1",
-      "poly8x16_t b",
-      "const int lane2"
+      "svint8_t op1",
+      "svint8_t op2",
+      "int8_t op3"
     ],
     "return_type": {
-      "value": "poly8x8_t"
+      "value": "svint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.8B"
-      },
-      "b": {
-        "register": "Vn.16B"
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
       },
-      "lane1": {
-        "minimum": 0,
-        "maximum": 7
+      "op2": {
+        "register": "Zop2.B"
       },
-      "lane2": {
-        "minimum": 0,
-        "maximum": 15
+      "op3": {
+        "register": "Zop3.B[*]"
       }
     },
     "Architectures": [
@@ -18117,36 +21650,34 @@
     ],
     "instructions": [
       [
-        "INS"
+        "BSL"
+      ],
+      [
+        "MOVPRFX",
+        "BSL"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcopy_laneq_s16",
+    "SIMD_ISA": "SVE2",
+    "name": "svbsl[_n_u16]",
     "arguments": [
-      "int16x4_t a",
-      "const int lane1",
-      "int16x8_t b",
-      "const int lane2"
+      "svuint16_t op1",
+      "svuint16_t op2",
+      "uint16_t op3"
     ],
     "return_type": {
-      "value": "int16x4_t"
+      "value": "svuint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.4H"
-      },
-      "b": {
-        "register": "Vn.8H"
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
       },
-      "lane1": {
-        "minimum": 0,
-        "maximum": 3
+      "op2": {
+        "register": "Zop2.H"
       },
-      "lane2": {
-        "minimum": 0,
-        "maximum": 7
+      "op3": {
+        "register": "Zop3.H[*]"
       }
     },
     "Architectures": [
@@ -18154,36 +21685,34 @@
     ],
     "instructions": [
       [
-        "INS"
+        "BSL"
+      ],
+      [
+        "MOVPRFX",
+        "BSL"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcopy_laneq_s32",
+    "SIMD_ISA": "SVE2",
+    "name": "svbsl[_n_u32]",
     "arguments": [
-      "int32x2_t a",
-      "const int lane1",
-      "int32x4_t b",
-      "const int lane2"
+      "svuint32_t op1",
+      "svuint32_t op2",
+      "uint32_t op3"
     ],
     "return_type": {
-      "value": "int32x2_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.2S"
-      },
-      "b": {
-        "register": "Vn.4S"
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
       },
-      "lane1": {
-        "minimum": 0,
-        "maximum": 1
+      "op2": {
+        "register": "Zop2.S"
       },
-      "lane2": {
-        "minimum": 0,
-        "maximum": 3
+      "op3": {
+        "register": "Zop3.S[*]"
       }
     },
     "Architectures": [
@@ -18191,36 +21720,34 @@
     ],
     "instructions": [
       [
-        "INS"
+        "BSL"
+      ],
+      [
+        "MOVPRFX",
+        "BSL"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcopy_laneq_s64",
+    "SIMD_ISA": "SVE2",
+    "name": "svbsl[_n_u64]",
     "arguments": [
-      "int64x1_t a",
-      "const int lane1",
-      "int64x2_t b",
-      "const int lane2"
+      "svuint64_t op1",
+      "svuint64_t op2",
+      "uint64_t op3"
     ],
     "return_type": {
-      "value": "int64x1_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "UNUSED"
-      },
-      "b": {
-        "register": "Vn.2D"
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
       },
-      "lane1": {
-        "minimum": 0,
-        "maximum": 0
+      "op2": {
+        "register": "Zop2.D"
       },
-      "lane2": {
-        "minimum": 0,
-        "maximum": 1
+      "op3": {
+        "register": "Zop3.D[*]"
       }
     },
     "Architectures": [
@@ -18228,36 +21755,34 @@
     ],
     "instructions": [
       [
-        "DUP"
+        "BSL"
+      ],
+      [
+        "MOVPRFX",
+        "BSL"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcopy_laneq_s8",
+    "SIMD_ISA": "SVE2",
+    "name": "svbsl[_n_u8]",
     "arguments": [
-      "int8x8_t a",
-      "const int lane1",
-      "int8x16_t b",
-      "const int lane2"
+      "svuint8_t op1",
+      "svuint8_t op2",
+      "uint8_t op3"
     ],
     "return_type": {
-      "value": "int8x8_t"
+      "value": "svuint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.8B"
-      },
-      "b": {
-        "register": "Vn.16B"
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
       },
-      "lane1": {
-        "minimum": 0,
-        "maximum": 7
+      "op2": {
+        "register": "Zop2.B"
       },
-      "lane2": {
-        "minimum": 0,
-        "maximum": 15
+      "op3": {
+        "register": "Zop3.B[*]"
       }
     },
     "Architectures": [
@@ -18265,36 +21790,34 @@
     ],
     "instructions": [
       [
-        "INS"
+        "BSL"
+      ],
+      [
+        "MOVPRFX",
+        "BSL"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcopy_laneq_u16",
+    "SIMD_ISA": "SVE2",
+    "name": "svbsl[_s16]",
     "arguments": [
-      "uint16x4_t a",
-      "const int lane1",
-      "uint16x8_t b",
-      "const int lane2"
+      "svint16_t op1",
+      "svint16_t op2",
+      "svint16_t op3"
     ],
     "return_type": {
-      "value": "uint16x4_t"
+      "value": "svint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.4H"
-      },
-      "b": {
-        "register": "Vn.8H"
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
       },
-      "lane1": {
-        "minimum": 0,
-        "maximum": 3
+      "op2": {
+        "register": "Zop2.H"
       },
-      "lane2": {
-        "minimum": 0,
-        "maximum": 7
+      "op3": {
+        "register": "Zop3.H"
       }
     },
     "Architectures": [
@@ -18302,36 +21825,34 @@
     ],
     "instructions": [
       [
-        "INS"
+        "BSL"
+      ],
+      [
+        "MOVPRFX",
+        "BSL"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcopy_laneq_u32",
+    "SIMD_ISA": "SVE2",
+    "name": "svbsl[_s32]",
     "arguments": [
-      "uint32x2_t a",
-      "const int lane1",
-      "uint32x4_t b",
-      "const int lane2"
+      "svint32_t op1",
+      "svint32_t op2",
+      "svint32_t op3"
     ],
     "return_type": {
-      "value": "uint32x2_t"
+      "value": "svint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.2S"
-      },
-      "b": {
-        "register": "Vn.4S"
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
       },
-      "lane1": {
-        "minimum": 0,
-        "maximum": 1
+      "op2": {
+        "register": "Zop2.S"
       },
-      "lane2": {
-        "minimum": 0,
-        "maximum": 3
+      "op3": {
+        "register": "Zop3.S"
       }
     },
     "Architectures": [
@@ -18339,36 +21860,34 @@
     ],
     "instructions": [
       [
-        "INS"
+        "BSL"
+      ],
+      [
+        "MOVPRFX",
+        "BSL"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcopy_laneq_u64",
+    "SIMD_ISA": "SVE2",
+    "name": "svbsl[_s64]",
     "arguments": [
-      "uint64x1_t a",
-      "const int lane1",
-      "uint64x2_t b",
-      "const int lane2"
+      "svint64_t op1",
+      "svint64_t op2",
+      "svint64_t op3"
     ],
     "return_type": {
-      "value": "uint64x1_t"
+      "value": "svint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "UNUSED"
-      },
-      "b": {
-        "register": "Vn.2D"
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
       },
-      "lane1": {
-        "minimum": 0,
-        "maximum": 0
+      "op2": {
+        "register": "Zop2.D"
       },
-      "lane2": {
-        "minimum": 0,
-        "maximum": 1
+      "op3": {
+        "register": "Zop3.D"
       }
     },
     "Architectures": [
@@ -18376,36 +21895,34 @@
     ],
     "instructions": [
       [
-        "DUP"
+        "BSL"
+      ],
+      [
+        "MOVPRFX",
+        "BSL"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcopy_laneq_u8",
+    "SIMD_ISA": "SVE2",
+    "name": "svbsl[_s8]",
     "arguments": [
-      "uint8x8_t a",
-      "const int lane1",
-      "uint8x16_t b",
-      "const int lane2"
+      "svint8_t op1",
+      "svint8_t op2",
+      "svint8_t op3"
     ],
     "return_type": {
-      "value": "uint8x8_t"
+      "value": "svint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.8B"
-      },
-      "b": {
-        "register": "Vn.16B"
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
       },
-      "lane1": {
-        "minimum": 0,
-        "maximum": 7
+      "op2": {
+        "register": "Zop2.B"
       },
-      "lane2": {
-        "minimum": 0,
-        "maximum": 15
+      "op3": {
+        "register": "Zop3.B"
       }
     },
     "Architectures": [
@@ -18413,36 +21930,34 @@
     ],
     "instructions": [
       [
-        "INS"
+        "BSL"
+      ],
+      [
+        "MOVPRFX",
+        "BSL"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcopyq_lane_f32",
+    "SIMD_ISA": "SVE2",
+    "name": "svbsl[_u16]",
     "arguments": [
-      "float32x4_t a",
-      "const int lane1",
-      "float32x2_t b",
-      "const int lane2"
+      "svuint16_t op1",
+      "svuint16_t op2",
+      "svuint16_t op3"
     ],
     "return_type": {
-      "value": "float32x4_t"
+      "value": "svuint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.4S"
-      },
-      "b": {
-        "register": "Vn.2S"
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
       },
-      "lane1": {
-        "minimum": 0,
-        "maximum": 3
+      "op2": {
+        "register": "Zop2.H"
       },
-      "lane2": {
-        "minimum": 0,
-        "maximum": 1
+      "op3": {
+        "register": "Zop3.H"
       }
     },
     "Architectures": [
@@ -18450,36 +21965,34 @@
     ],
     "instructions": [
       [
-        "INS"
+        "BSL"
+      ],
+      [
+        "MOVPRFX",
+        "BSL"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcopyq_lane_f64",
+    "SIMD_ISA": "SVE2",
+    "name": "svbsl[_u32]",
     "arguments": [
-      "float64x2_t a",
-      "const int lane1",
-      "float64x1_t b",
-      "const int lane2"
+      "svuint32_t op1",
+      "svuint32_t op2",
+      "svuint32_t op3"
     ],
     "return_type": {
-      "value": "float64x2_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.2D"
-      },
-      "b": {
-        "register": "Vn.1D"
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
       },
-      "lane1": {
-        "minimum": 0,
-        "maximum": 1
+      "op2": {
+        "register": "Zop2.S"
       },
-      "lane2": {
-        "minimum": 0,
-        "maximum": 0
+      "op3": {
+        "register": "Zop3.S"
       }
     },
     "Architectures": [
@@ -18487,36 +22000,34 @@
     ],
     "instructions": [
       [
-        "INS"
+        "BSL"
+      ],
+      [
+        "MOVPRFX",
+        "BSL"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcopyq_lane_p16",
+    "SIMD_ISA": "SVE2",
+    "name": "svbsl[_u64]",
     "arguments": [
-      "poly16x8_t a",
-      "const int lane1",
-      "poly16x4_t b",
-      "const int lane2"
+      "svuint64_t op1",
+      "svuint64_t op2",
+      "svuint64_t op3"
     ],
     "return_type": {
-      "value": "poly16x8_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.8H"
-      },
-      "b": {
-        "register": "Vn.4H"
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
       },
-      "lane1": {
-        "minimum": 0,
-        "maximum": 7
+      "op2": {
+        "register": "Zop2.D"
       },
-      "lane2": {
-        "minimum": 0,
-        "maximum": 3
+      "op3": {
+        "register": "Zop3.D"
       }
     },
     "Architectures": [
@@ -18524,74 +22035,70 @@
     ],
     "instructions": [
       [
-        "INS"
+        "BSL"
+      ],
+      [
+        "MOVPRFX",
+        "BSL"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcopyq_lane_p64",
+    "SIMD_ISA": "SVE2",
+    "name": "svbsl[_u8]",
     "arguments": [
-      "poly64x2_t a",
-      "const int lane1",
-      "poly64x1_t b",
-      "const int lane2"
+      "svuint8_t op1",
+      "svuint8_t op2",
+      "svuint8_t op3"
     ],
     "return_type": {
-      "value": "poly64x2_t"
+      "value": "svuint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.2D"
-      },
-      "b": {
-        "register": "Vn.1D"
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
       },
-      "lane1": {
-        "minimum": 0,
-        "maximum": 1
+      "op2": {
+        "register": "Zop2.B"
       },
-      "lane2": {
-        "minimum": 0,
-        "maximum": 0
+      "op3": {
+        "register": "Zop3.B"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "INS"
+        "BSL"
+      ],
+      [
+        "MOVPRFX",
+        "BSL"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcopyq_lane_p8",
+    "SIMD_ISA": "SVE",
+    "name": "svcadd[_f16]_m",
     "arguments": [
-      "poly8x16_t a",
-      "const int lane1",
-      "poly8x8_t b",
-      "const int lane2"
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2",
+      "uint64_t imm_rotation"
     ],
     "return_type": {
-      "value": "poly8x16_t"
+      "value": "svfloat16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.16B"
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
       },
-      "b": {
-        "register": "Vn.8B"
-      },
-      "lane1": {
-        "minimum": 0,
-        "maximum": 15
+      "op2": {
+        "register": "Zop2.H"
       },
-      "lane2": {
-        "minimum": 0,
-        "maximum": 7
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
@@ -18599,36 +22106,35 @@
     ],
     "instructions": [
       [
-        "INS"
+        "FCADD"
+      ],
+      [
+        "MOVPRFX",
+        "FCADD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcopyq_lane_s16",
+    "SIMD_ISA": "SVE",
+    "name": "svcadd[_f16]_x",
     "arguments": [
-      "int16x8_t a",
-      "const int lane1",
-      "int16x4_t b",
-      "const int lane2"
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2",
+      "uint64_t imm_rotation"
     ],
     "return_type": {
-      "value": "int16x8_t"
+      "value": "svfloat16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.8H"
-      },
-      "b": {
-        "register": "Vn.4H"
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
       },
-      "lane1": {
-        "minimum": 0,
-        "maximum": 7
+      "op2": {
+        "register": "Zop2.H"
       },
-      "lane2": {
-        "minimum": 0,
-        "maximum": 3
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
@@ -18636,36 +22142,35 @@
     ],
     "instructions": [
       [
-        "INS"
+        "FCADD"
+      ],
+      [
+        "MOVPRFX",
+        "FCADD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcopyq_lane_s32",
+    "SIMD_ISA": "SVE",
+    "name": "svcadd[_f16]_z",
     "arguments": [
-      "int32x4_t a",
-      "const int lane1",
-      "int32x2_t b",
-      "const int lane2"
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2",
+      "uint64_t imm_rotation"
     ],
     "return_type": {
-      "value": "int32x4_t"
+      "value": "svfloat16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.4S"
+      "op1": {
+        "register": "Zop1.H"
       },
-      "b": {
-        "register": "Vn.2S"
-      },
-      "lane1": {
-        "minimum": 0,
-        "maximum": 3
+      "op2": {
+        "register": "Zop2.H"
       },
-      "lane2": {
-        "minimum": 0,
-        "maximum": 1
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
@@ -18673,36 +22178,32 @@
     ],
     "instructions": [
       [
-        "INS"
+        "MOVPRFX",
+        "FCADD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcopyq_lane_s64",
+    "SIMD_ISA": "SVE",
+    "name": "svcadd[_f32]_m",
     "arguments": [
-      "int64x2_t a",
-      "const int lane1",
-      "int64x1_t b",
-      "const int lane2"
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2",
+      "uint64_t imm_rotation"
     ],
     "return_type": {
-      "value": "int64x2_t"
+      "value": "svfloat32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.2D"
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
       },
-      "b": {
-        "register": "Vn.1D"
-      },
-      "lane1": {
-        "minimum": 0,
-        "maximum": 1
+      "op2": {
+        "register": "Zop2.S"
       },
-      "lane2": {
-        "minimum": 0,
-        "maximum": 0
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
@@ -18710,36 +22211,35 @@
     ],
     "instructions": [
       [
-        "INS"
+        "FCADD"
+      ],
+      [
+        "MOVPRFX",
+        "FCADD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcopyq_lane_s8",
+    "SIMD_ISA": "SVE",
+    "name": "svcadd[_f32]_x",
     "arguments": [
-      "int8x16_t a",
-      "const int lane1",
-      "int8x8_t b",
-      "const int lane2"
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2",
+      "uint64_t imm_rotation"
     ],
     "return_type": {
-      "value": "int8x16_t"
+      "value": "svfloat32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.16B"
-      },
-      "b": {
-        "register": "Vn.8B"
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
       },
-      "lane1": {
-        "minimum": 0,
-        "maximum": 15
+      "op2": {
+        "register": "Zop2.S"
       },
-      "lane2": {
-        "minimum": 0,
-        "maximum": 7
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
@@ -18747,36 +22247,35 @@
     ],
     "instructions": [
       [
-        "INS"
+        "FCADD"
+      ],
+      [
+        "MOVPRFX",
+        "FCADD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcopyq_lane_u16",
+    "SIMD_ISA": "SVE",
+    "name": "svcadd[_f32]_z",
     "arguments": [
-      "uint16x8_t a",
-      "const int lane1",
-      "uint16x4_t b",
-      "const int lane2"
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2",
+      "uint64_t imm_rotation"
     ],
     "return_type": {
-      "value": "uint16x8_t"
+      "value": "svfloat32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.8H"
+      "op1": {
+        "register": "Zop1.S"
       },
-      "b": {
-        "register": "Vn.4H"
-      },
-      "lane1": {
-        "minimum": 0,
-        "maximum": 7
+      "op2": {
+        "register": "Zop2.S"
       },
-      "lane2": {
-        "minimum": 0,
-        "maximum": 3
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
@@ -18784,36 +22283,32 @@
     ],
     "instructions": [
       [
-        "INS"
+        "MOVPRFX",
+        "FCADD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcopyq_lane_u32",
+    "SIMD_ISA": "SVE",
+    "name": "svcadd[_f64]_m",
     "arguments": [
-      "uint32x4_t a",
-      "const int lane1",
-      "uint32x2_t b",
-      "const int lane2"
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2",
+      "uint64_t imm_rotation"
     ],
     "return_type": {
-      "value": "uint32x4_t"
+      "value": "svfloat64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.4S"
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
       },
-      "b": {
-        "register": "Vn.2S"
-      },
-      "lane1": {
-        "minimum": 0,
-        "maximum": 3
+      "op2": {
+        "register": "Zop2.D"
       },
-      "lane2": {
-        "minimum": 0,
-        "maximum": 1
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
@@ -18821,36 +22316,35 @@
     ],
     "instructions": [
       [
-        "INS"
+        "FCADD"
+      ],
+      [
+        "MOVPRFX",
+        "FCADD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcopyq_lane_u64",
+    "SIMD_ISA": "SVE",
+    "name": "svcadd[_f64]_x",
     "arguments": [
-      "uint64x2_t a",
-      "const int lane1",
-      "uint64x1_t b",
-      "const int lane2"
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2",
+      "uint64_t imm_rotation"
     ],
     "return_type": {
-      "value": "uint64x2_t"
+      "value": "svfloat64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.2D"
-      },
-      "b": {
-        "register": "Vn.1D"
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
       },
-      "lane1": {
-        "minimum": 0,
-        "maximum": 1
+      "op2": {
+        "register": "Zop2.D"
       },
-      "lane2": {
-        "minimum": 0,
-        "maximum": 0
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
@@ -18858,36 +22352,35 @@
     ],
     "instructions": [
       [
-        "INS"
+        "FCADD"
+      ],
+      [
+        "MOVPRFX",
+        "FCADD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcopyq_lane_u8",
+    "SIMD_ISA": "SVE",
+    "name": "svcadd[_f64]_z",
     "arguments": [
-      "uint8x16_t a",
-      "const int lane1",
-      "uint8x8_t b",
-      "const int lane2"
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2",
+      "uint64_t imm_rotation"
     ],
     "return_type": {
-      "value": "uint8x16_t"
+      "value": "svfloat64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.16B"
+      "op1": {
+        "register": "Zop1.D"
       },
-      "b": {
-        "register": "Vn.8B"
-      },
-      "lane1": {
-        "minimum": 0,
-        "maximum": 15
+      "op2": {
+        "register": "Zop2.D"
       },
-      "lane2": {
-        "minimum": 0,
-        "maximum": 7
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
@@ -18895,36 +22388,28 @@
     ],
     "instructions": [
       [
-        "INS"
+        "MOVPRFX",
+        "FCADD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcopyq_laneq_f32",
+    "SIMD_ISA": "SVE2",
+    "name": "svcadd[_s16]",
     "arguments": [
-      "float32x4_t a",
-      "const int lane1",
-      "float32x4_t b",
-      "const int lane2"
+      "svint16_t op1",
+      "svint16_t op2",
+      "uint64_t imm_rotation"
     ],
     "return_type": {
-      "value": "float32x4_t"
+      "value": "svint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.4S"
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
       },
-      "b": {
-        "register": "Vn.4S"
-      },
-      "lane1": {
-        "minimum": 0,
-        "maximum": 3
-      },
-      "lane2": {
-        "minimum": 0,
-        "maximum": 3
+      "op2": {
+        "register": "Zop2.H"
       }
     },
     "Architectures": [
@@ -18932,36 +22417,31 @@
     ],
     "instructions": [
       [
-        "INS"
+        "CADD"
+      ],
+      [
+        "MOVPRFX",
+        "CADD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcopyq_laneq_f64",
+    "SIMD_ISA": "SVE2",
+    "name": "svcadd[_s32]",
     "arguments": [
-      "float64x2_t a",
-      "const int lane1",
-      "float64x2_t b",
-      "const int lane2"
+      "svint32_t op1",
+      "svint32_t op2",
+      "uint64_t imm_rotation"
     ],
     "return_type": {
-      "value": "float64x2_t"
+      "value": "svint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.2D"
-      },
-      "b": {
-        "register": "Vn.2D"
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
       },
-      "lane1": {
-        "minimum": 0,
-        "maximum": 1
-      },
-      "lane2": {
-        "minimum": 0,
-        "maximum": 1
+      "op2": {
+        "register": "Zop2.S"
       }
     },
     "Architectures": [
@@ -18969,36 +22449,31 @@
     ],
     "instructions": [
       [
-        "INS"
+        "CADD"
+      ],
+      [
+        "MOVPRFX",
+        "CADD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcopyq_laneq_p16",
+    "SIMD_ISA": "SVE2",
+    "name": "svcadd[_s64]",
     "arguments": [
-      "poly16x8_t a",
-      "const int lane1",
-      "poly16x8_t b",
-      "const int lane2"
+      "svint64_t op1",
+      "svint64_t op2",
+      "uint64_t imm_rotation"
     ],
     "return_type": {
-      "value": "poly16x8_t"
+      "value": "svint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.8H"
-      },
-      "b": {
-        "register": "Vn.8H"
-      },
-      "lane1": {
-        "minimum": 0,
-        "maximum": 7
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
       },
-      "lane2": {
-        "minimum": 0,
-        "maximum": 7
+      "op2": {
+        "register": "Zop2.D"
       }
     },
     "Architectures": [
@@ -19006,74 +22481,63 @@
     ],
     "instructions": [
       [
-        "INS"
+        "CADD"
+      ],
+      [
+        "MOVPRFX",
+        "CADD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcopyq_laneq_p64",
+    "SIMD_ISA": "SVE2",
+    "name": "svcadd[_s8]",
     "arguments": [
-      "poly64x2_t a",
-      "const int lane1",
-      "poly64x2_t b",
-      "const int lane2"
+      "svint8_t op1",
+      "svint8_t op2",
+      "uint64_t imm_rotation"
     ],
     "return_type": {
-      "value": "poly64x2_t"
+      "value": "svint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.2D"
-      },
-      "b": {
-        "register": "Vn.2D"
-      },
-      "lane1": {
-        "minimum": 0,
-        "maximum": 1
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
       },
-      "lane2": {
-        "minimum": 0,
-        "maximum": 1
+      "op2": {
+        "register": "Zop2.B"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "INS"
+        "CADD"
+      ],
+      [
+        "MOVPRFX",
+        "CADD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcopyq_laneq_p8",
+    "SIMD_ISA": "SVE2",
+    "name": "svcadd[_u16]",
     "arguments": [
-      "poly8x16_t a",
-      "const int lane1",
-      "poly8x16_t b",
-      "const int lane2"
+      "svuint16_t op1",
+      "svuint16_t op2",
+      "uint64_t imm_rotation"
     ],
     "return_type": {
-      "value": "poly8x16_t"
+      "value": "svuint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.16B"
-      },
-      "b": {
-        "register": "Vn.16B"
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
       },
-      "lane1": {
-        "minimum": 0,
-        "maximum": 15
-      },
-      "lane2": {
-        "minimum": 0,
-        "maximum": 15
+      "op2": {
+        "register": "Zop2.H"
       }
     },
     "Architectures": [
@@ -19081,36 +22545,31 @@
     ],
     "instructions": [
       [
-        "INS"
+        "CADD"
+      ],
+      [
+        "MOVPRFX",
+        "CADD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcopyq_laneq_s16",
+    "SIMD_ISA": "SVE2",
+    "name": "svcadd[_u32]",
     "arguments": [
-      "int16x8_t a",
-      "const int lane1",
-      "int16x8_t b",
-      "const int lane2"
+      "svuint32_t op1",
+      "svuint32_t op2",
+      "uint64_t imm_rotation"
     ],
     "return_type": {
-      "value": "int16x8_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.8H"
-      },
-      "b": {
-        "register": "Vn.8H"
-      },
-      "lane1": {
-        "minimum": 0,
-        "maximum": 7
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
       },
-      "lane2": {
-        "minimum": 0,
-        "maximum": 7
+      "op2": {
+        "register": "Zop2.S"
       }
     },
     "Architectures": [
@@ -19118,36 +22577,31 @@
     ],
     "instructions": [
       [
-        "INS"
+        "CADD"
+      ],
+      [
+        "MOVPRFX",
+        "CADD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcopyq_laneq_s32",
+    "SIMD_ISA": "SVE2",
+    "name": "svcadd[_u64]",
     "arguments": [
-      "int32x4_t a",
-      "const int lane1",
-      "int32x4_t b",
-      "const int lane2"
+      "svuint64_t op1",
+      "svuint64_t op2",
+      "uint64_t imm_rotation"
     ],
     "return_type": {
-      "value": "int32x4_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.4S"
-      },
-      "b": {
-        "register": "Vn.4S"
-      },
-      "lane1": {
-        "minimum": 0,
-        "maximum": 3
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
       },
-      "lane2": {
-        "minimum": 0,
-        "maximum": 3
+      "op2": {
+        "register": "Zop2.D"
       }
     },
     "Architectures": [
@@ -19155,36 +22609,31 @@
     ],
     "instructions": [
       [
-        "INS"
+        "CADD"
+      ],
+      [
+        "MOVPRFX",
+        "CADD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcopyq_laneq_s64",
+    "SIMD_ISA": "SVE2",
+    "name": "svcadd[_u8]",
     "arguments": [
-      "int64x2_t a",
-      "const int lane1",
-      "int64x2_t b",
-      "const int lane2"
+      "svuint8_t op1",
+      "svuint8_t op2",
+      "uint64_t imm_rotation"
     ],
     "return_type": {
-      "value": "int64x2_t"
+      "value": "svuint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.2D"
-      },
-      "b": {
-        "register": "Vn.2D"
-      },
-      "lane1": {
-        "minimum": 0,
-        "maximum": 1
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
       },
-      "lane2": {
-        "minimum": 0,
-        "maximum": 1
+      "op2": {
+        "register": "Zop2.B"
       }
     },
     "Architectures": [
@@ -19192,36 +22641,35 @@
     ],
     "instructions": [
       [
-        "INS"
+        "CADD"
+      ],
+      [
+        "MOVPRFX",
+        "CADD"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcopyq_laneq_s8",
+    "SIMD_ISA": "SVE2",
+    "name": "svcdot[_s32]",
     "arguments": [
-      "int8x16_t a",
-      "const int lane1",
-      "int8x16_t b",
-      "const int lane2"
+      "svint32_t op1",
+      "svint8_t op2",
+      "svint8_t op3",
+      "uint64_t imm_rotation"
     ],
     "return_type": {
-      "value": "int8x16_t"
+      "value": "svint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.16B"
-      },
-      "b": {
-        "register": "Vn.16B"
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
       },
-      "lane1": {
-        "minimum": 0,
-        "maximum": 15
+      "op2": {
+        "register": "Zop2.B"
       },
-      "lane2": {
-        "minimum": 0,
-        "maximum": 15
+      "op3": {
+        "register": "Zop3.B"
       }
     },
     "Architectures": [
@@ -19229,36 +22677,35 @@
     ],
     "instructions": [
       [
-        "INS"
+        "CDOT"
+      ],
+      [
+        "MOVPRFX",
+        "CDOT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcopyq_laneq_u16",
+    "SIMD_ISA": "SVE2",
+    "name": "svcdot[_s64]",
     "arguments": [
-      "uint16x8_t a",
-      "const int lane1",
-      "uint16x8_t b",
-      "const int lane2"
+      "svint64_t op1",
+      "svint16_t op2",
+      "svint16_t op3",
+      "uint64_t imm_rotation"
     ],
     "return_type": {
-      "value": "uint16x8_t"
+      "value": "svint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.8H"
-      },
-      "b": {
-        "register": "Vn.8H"
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
       },
-      "lane1": {
-        "minimum": 0,
-        "maximum": 7
+      "op2": {
+        "register": "Zop2.H"
       },
-      "lane2": {
-        "minimum": 0,
-        "maximum": 7
+      "op3": {
+        "register": "Zop3.H"
       }
     },
     "Architectures": [
@@ -19266,36 +22713,40 @@
     ],
     "instructions": [
       [
-        "INS"
+        "CDOT"
+      ],
+      [
+        "MOVPRFX",
+        "CDOT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcopyq_laneq_u32",
+    "SIMD_ISA": "SVE2",
+    "name": "svcdot_lane[_s32]",
     "arguments": [
-      "uint32x4_t a",
-      "const int lane1",
-      "uint32x4_t b",
-      "const int lane2"
+      "svint32_t op1",
+      "svint8_t op2",
+      "svint8_t op3",
+      "uint64_t imm_index",
+      "uint64_t imm_rotation"
     ],
     "return_type": {
-      "value": "uint32x4_t"
+      "value": "svint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.4S"
-      },
-      "b": {
-        "register": "Vn.4S"
-      },
-      "lane1": {
+      "imm_index": {
         "minimum": 0,
         "maximum": 3
       },
-      "lane2": {
-        "minimum": 0,
-        "maximum": 3
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B"
       }
     },
     "Architectures": [
@@ -19303,36 +22754,40 @@
     ],
     "instructions": [
       [
-        "INS"
+        "CDOT"
+      ],
+      [
+        "MOVPRFX",
+        "CDOT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcopyq_laneq_u64",
+    "SIMD_ISA": "SVE2",
+    "name": "svcdot_lane[_s64]",
     "arguments": [
-      "uint64x2_t a",
-      "const int lane1",
-      "uint64x2_t b",
-      "const int lane2"
+      "svint64_t op1",
+      "svint16_t op2",
+      "svint16_t op3",
+      "uint64_t imm_index",
+      "uint64_t imm_rotation"
     ],
     "return_type": {
-      "value": "uint64x2_t"
+      "value": "svint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.2D"
-      },
-      "b": {
-        "register": "Vn.2D"
-      },
-      "lane1": {
+      "imm_index": {
         "minimum": 0,
         "maximum": 1
       },
-      "lane2": {
-        "minimum": 0,
-        "maximum": 1
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
       }
     },
     "Architectures": [
@@ -19340,36 +22795,34 @@
     ],
     "instructions": [
       [
-        "INS"
+        "CDOT"
+      ],
+      [
+        "MOVPRFX",
+        "CDOT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcopyq_laneq_u8",
+    "SIMD_ISA": "SVE2",
+    "name": "svclamp[_f16]",
     "arguments": [
-      "uint8x16_t a",
-      "const int lane1",
-      "uint8x16_t b",
-      "const int lane2"
+      "svfloat16_t op",
+      "svfloat16_t min",
+      "svfloat16_t max"
     ],
     "return_type": {
-      "value": "uint8x16_t"
+      "value": "svfloat16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.16B"
-      },
-      "b": {
-        "register": "Vn.16B"
+      "max": {
+        "register": "Zreg3.H"
       },
-      "lane1": {
-        "minimum": 0,
-        "maximum": 15
+      "min": {
+        "register": "Zreg2.H"
       },
-      "lane2": {
-        "minimum": 0,
-        "maximum": 15
+      "op": {
+        "register": "Zreg1.H"
       }
     },
     "Architectures": [
@@ -19377,72 +22830,92 @@
     ],
     "instructions": [
       [
-        "INS"
+        "FCLAMP"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcreate_f16",
+    "SIMD_ISA": "SVE2",
+    "name": "svclamp[_f32]",
     "arguments": [
-      "uint64_t a"
+      "svfloat32_t op",
+      "svfloat32_t min",
+      "svfloat32_t max"
     ],
     "return_type": {
-      "value": "float16x4_t"
+      "value": "svfloat32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Xn"
+      "max": {
+        "register": "Zreg3.S"
+      },
+      "min": {
+        "register": "Zreg2.S"
+      },
+      "op": {
+        "register": "Zreg1.S"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "INS"
+        "FCLAMP"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcreate_f32",
+    "SIMD_ISA": "SVE2",
+    "name": "svclamp[_f64]",
     "arguments": [
-      "uint64_t a"
+      "svfloat64_t op",
+      "svfloat64_t min",
+      "svfloat64_t max"
     ],
     "return_type": {
-      "value": "float32x2_t"
+      "value": "svfloat64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Xn"
+      "max": {
+        "register": "Zreg3.D"
+      },
+      "min": {
+        "register": "Zreg2.D"
+      },
+      "op": {
+        "register": "Zreg1.D"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "INS"
+        "FCLAMP"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcreate_f64",
+    "SIMD_ISA": "SVE2",
+    "name": "svclamp[_s16]",
     "arguments": [
-      "uint64_t a"
+      "svint16_t op",
+      "svint16_t min",
+      "svint16_t max"
     ],
     "return_type": {
-      "value": "float64x1_t"
+      "value": "svint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Xn"
+      "max": {
+        "register": "Zreg3.H"
+      },
+      "min": {
+        "register": "Zreg2.H"
+      },
+      "op": {
+        "register": "Zreg1.H"
       }
     },
     "Architectures": [
@@ -19450,394 +22923,522 @@
     ],
     "instructions": [
       [
-        "INS"
+        "SCLAMP"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcreate_p16",
+    "SIMD_ISA": "SVE2",
+    "name": "svclamp[_s32]",
     "arguments": [
-      "uint64_t a"
+      "svint32_t op",
+      "svint32_t min",
+      "svint32_t max"
     ],
     "return_type": {
-      "value": "poly16x4_t"
+      "value": "svint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Xn"
+      "max": {
+        "register": "Zreg3.S"
+      },
+      "min": {
+        "register": "Zreg2.S"
+      },
+      "op": {
+        "register": "Zreg1.S"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "INS"
+        "SCLAMP"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcreate_p64",
+    "SIMD_ISA": "SVE2",
+    "name": "svclamp[_s64]",
     "arguments": [
-      "uint64_t a"
+      "svint64_t op",
+      "svint64_t min",
+      "svint64_t max"
     ],
     "return_type": {
-      "value": "poly64x1_t"
+      "value": "svint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Xn"
+      "max": {
+        "register": "Zreg3.D"
+      },
+      "min": {
+        "register": "Zreg2.D"
+      },
+      "op": {
+        "register": "Zreg1.D"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "INS"
+        "SCLAMP"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcreate_p8",
+    "SIMD_ISA": "SVE2",
+    "name": "svclamp[_s8]",
     "arguments": [
-      "uint64_t a"
+      "svint8_t op",
+      "svint8_t min",
+      "svint8_t max"
     ],
     "return_type": {
-      "value": "poly8x8_t"
+      "value": "svint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Xn"
+      "max": {
+        "register": "Zreg3.B"
+      },
+      "min": {
+        "register": "Zreg2.B"
+      },
+      "op": {
+        "register": "Zreg1.B"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "INS"
+        "SCLAMP"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcreate_s16",
+    "SIMD_ISA": "SVE2",
+    "name": "svclamp[_u16]",
     "arguments": [
-      "uint64_t a"
+      "svuint16_t op",
+      "svuint16_t min",
+      "svuint16_t max"
     ],
     "return_type": {
-      "value": "int16x4_t"
+      "value": "svuint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Xn"
+      "max": {
+        "register": "Zreg3.H"
+      },
+      "min": {
+        "register": "Zreg2.H"
+      },
+      "op": {
+        "register": "Zreg1.H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "INS"
+        "UCLAMP"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcreate_s32",
+    "SIMD_ISA": "SVE2",
+    "name": "svclamp[_u32]",
     "arguments": [
-      "uint64_t a"
+      "svuint32_t op",
+      "svuint32_t min",
+      "svuint32_t max"
     ],
     "return_type": {
-      "value": "int32x2_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Xn"
+      "max": {
+        "register": "Zreg3.S"
+      },
+      "min": {
+        "register": "Zreg2.S"
+      },
+      "op": {
+        "register": "Zreg1.S"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "INS"
+        "UCLAMP"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcreate_s64",
+    "SIMD_ISA": "SVE2",
+    "name": "svclamp[_u64]",
     "arguments": [
-      "uint64_t a"
+      "svuint64_t op",
+      "svuint64_t min",
+      "svuint64_t max"
     ],
     "return_type": {
-      "value": "int64x1_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Xn"
+      "max": {
+        "register": "Zreg3.D"
+      },
+      "min": {
+        "register": "Zreg2.D"
+      },
+      "op": {
+        "register": "Zreg1.D"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "INS"
+        "UCLAMP"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcreate_s8",
+    "SIMD_ISA": "SVE2",
+    "name": "svclamp[_u8]",
     "arguments": [
-      "uint64_t a"
+      "svuint8_t op",
+      "svuint8_t min",
+      "svuint8_t max"
     ],
     "return_type": {
-      "value": "int8x8_t"
+      "value": "svuint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Xn"
+      "max": {
+        "register": "Zreg3.B"
+      },
+      "min": {
+        "register": "Zreg2.B"
+      },
+      "op": {
+        "register": "Zreg1.B"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "INS"
+        "UCLAMP"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcreate_u16",
+    "SIMD_ISA": "SVE",
+    "name": "svclasta[_f16]",
     "arguments": [
-      "uint64_t a"
+      "svbool_t pg",
+      "svfloat16_t fallback",
+      "svfloat16_t data"
     ],
     "return_type": {
-      "value": "uint16x4_t"
+      "value": "svfloat16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Xn"
+      "data": {
+        "register": "Zdata.H"
+      },
+      "fallback": {
+        "register": "Zfallback.H|Ztied.H"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "INS"
+        "CLASTA"
+      ],
+      [
+        "MOVPRFX",
+        "CLASTA"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcreate_u32",
+    "SIMD_ISA": "SVE",
+    "name": "svclasta[_f32]",
     "arguments": [
-      "uint64_t a"
+      "svbool_t pg",
+      "svfloat32_t fallback",
+      "svfloat32_t data"
     ],
     "return_type": {
-      "value": "uint32x2_t"
+      "value": "svfloat32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Xn"
+      "data": {
+        "register": "Zdata.S"
+      },
+      "fallback": {
+        "register": "Zfallback.S|Ztied.S"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "INS"
+        "CLASTA"
+      ],
+      [
+        "MOVPRFX",
+        "CLASTA"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcreate_u64",
+    "SIMD_ISA": "SVE",
+    "name": "svclasta[_f64]",
     "arguments": [
-      "uint64_t a"
+      "svbool_t pg",
+      "svfloat64_t fallback",
+      "svfloat64_t data"
     ],
     "return_type": {
-      "value": "uint64x1_t"
+      "value": "svfloat64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Xn"
+      "data": {
+        "register": "Zdata.D"
+      },
+      "fallback": {
+        "register": "Zfallback.D|Ztied.D"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "INS"
+        "CLASTA"
+      ],
+      [
+        "MOVPRFX",
+        "CLASTA"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcreate_u8",
+    "SIMD_ISA": "SVE",
+    "name": "svclasta[_n_f16]",
     "arguments": [
-      "uint64_t a"
+      "svbool_t pg",
+      "float16_t fallback",
+      "svfloat16_t data"
     ],
     "return_type": {
-      "value": "uint8x8_t"
+      "value": "float16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Xn"
+      "data": {
+        "register": "Zdata.H"
+      },
+      "fallback": {
+        "register": "Htied|Wtied"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "INS"
+        "CLASTA"
+      ],
+      [
+        "CLASTA"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvt_f16_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svclasta[_n_f32]",
     "arguments": [
-      "float32x4_t a"
+      "svbool_t pg",
+      "float32_t fallback",
+      "svfloat32_t data"
     ],
     "return_type": {
-      "value": "float16x4_t"
+      "value": "float32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4S"
+      "data": {
+        "register": "Zdata.S"
+      },
+      "fallback": {
+        "register": "Stied|Wtied"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCVTN"
+        "CLASTA"
+      ],
+      [
+        "CLASTA"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvt_f16_s16",
+    "SIMD_ISA": "SVE",
+    "name": "svclasta[_n_f64]",
     "arguments": [
-      "int16x4_t a"
+      "svbool_t pg",
+      "float64_t fallback",
+      "svfloat64_t data"
     ],
     "return_type": {
-      "value": "float16x4_t"
+      "value": "float64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4H"
+      "data": {
+        "register": "Zdata.D"
+      },
+      "fallback": {
+        "register": "Dtied|Xtied"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "SCVTF"
+        "CLASTA"
+      ],
+      [
+        "CLASTA"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvt_f16_u16",
+    "SIMD_ISA": "SVE",
+    "name": "svclasta[_n_s16]",
     "arguments": [
-      "uint16x4_t a"
+      "svbool_t pg",
+      "int16_t fallback",
+      "svint16_t data"
     ],
     "return_type": {
-      "value": "float16x4_t"
+      "value": "int16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4H"
+      "data": {
+        "register": "Zdata.H"
+      },
+      "fallback": {
+        "register": "Htied|Wtied"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "UCVTF"
+        "CLASTA"
+      ],
+      [
+        "CLASTA"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvt_f32_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svclasta[_n_s32]",
     "arguments": [
-      "float16x4_t a"
+      "svbool_t pg",
+      "int32_t fallback",
+      "svint32_t data"
     ],
     "return_type": {
-      "value": "float32x4_t"
+      "value": "int32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4H"
+      "data": {
+        "register": "Zdata.S"
+      },
+      "fallback": {
+        "register": "Stied|Wtied"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCVTL"
+        "CLASTA"
+      ],
+      [
+        "CLASTA"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvt_f32_f64",
+    "SIMD_ISA": "SVE",
+    "name": "svclasta[_n_s64]",
     "arguments": [
-      "float64x2_t a"
+      "svbool_t pg",
+      "int64_t fallback",
+      "svint64_t data"
     ],
     "return_type": {
-      "value": "float32x2_t"
+      "value": "int64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2D"
+      "data": {
+        "register": "Zdata.D"
+      },
+      "fallback": {
+        "register": "Dtied|Xtied"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
@@ -19845,72 +23446,101 @@
     ],
     "instructions": [
       [
-        "FCVTN"
+        "CLASTA"
+      ],
+      [
+        "CLASTA"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvt_f32_s32",
+    "SIMD_ISA": "SVE",
+    "name": "svclasta[_n_s8]",
     "arguments": [
-      "int32x2_t a"
+      "svbool_t pg",
+      "int8_t fallback",
+      "svint8_t data"
     ],
     "return_type": {
-      "value": "float32x2_t"
+      "value": "int8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2S"
+      "data": {
+        "register": "Zdata.B"
+      },
+      "fallback": {
+        "register": "Btied|Wtied"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "SCVTF"
+        "CLASTA"
+      ],
+      [
+        "CLASTA"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvt_f32_u32",
+    "SIMD_ISA": "SVE",
+    "name": "svclasta[_n_u16]",
     "arguments": [
-      "uint32x2_t a"
+      "svbool_t pg",
+      "uint16_t fallback",
+      "svuint16_t data"
     ],
     "return_type": {
-      "value": "float32x2_t"
+      "value": "uint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2S"
+      "data": {
+        "register": "Zdata.H"
+      },
+      "fallback": {
+        "register": "Htied|Wtied"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "UCVTF"
+        "CLASTA"
+      ],
+      [
+        "CLASTA"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvt_f64_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svclasta[_n_u32]",
     "arguments": [
-      "float32x2_t a"
+      "svbool_t pg",
+      "uint32_t fallback",
+      "svuint32_t data"
     ],
     "return_type": {
-      "value": "float64x2_t"
+      "value": "uint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2S"
+      "data": {
+        "register": "Zdata.S"
+      },
+      "fallback": {
+        "register": "Stied|Wtied"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
@@ -19918,22 +23548,33 @@
     ],
     "instructions": [
       [
-        "FCVTL"
+        "CLASTA"
+      ],
+      [
+        "CLASTA"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvt_f64_s64",
+    "SIMD_ISA": "SVE",
+    "name": "svclasta[_n_u64]",
     "arguments": [
-      "int64x1_t a"
+      "svbool_t pg",
+      "uint64_t fallback",
+      "svuint64_t data"
     ],
     "return_type": {
-      "value": "float64x1_t"
+      "value": "uint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "data": {
+        "register": "Zdata.D"
+      },
+      "fallback": {
+        "register": "Dtied|Xtied"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
@@ -19941,22 +23582,33 @@
     ],
     "instructions": [
       [
-        "SCVTF"
+        "CLASTA"
+      ],
+      [
+        "CLASTA"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvt_f64_u64",
+    "SIMD_ISA": "SVE",
+    "name": "svclasta[_n_u8]",
     "arguments": [
-      "uint64x1_t a"
+      "svbool_t pg",
+      "uint8_t fallback",
+      "svuint8_t data"
     ],
     "return_type": {
-      "value": "float64x1_t"
+      "value": "uint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "data": {
+        "register": "Zdata.B"
+      },
+      "fallback": {
+        "register": "Btied|Wtied"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
@@ -19964,26 +23616,33 @@
     ],
     "instructions": [
       [
-        "UCVTF"
+        "CLASTA"
+      ],
+      [
+        "CLASTA"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvt_high_f16_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svclasta[_s16]",
     "arguments": [
-      "float16x4_t r",
-      "float32x4_t a"
+      "svbool_t pg",
+      "svint16_t fallback",
+      "svint16_t data"
     ],
     "return_type": {
-      "value": "float16x8_t"
+      "value": "svint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4S"
+      "data": {
+        "register": "Zdata.H"
       },
-      "r": {
-        "register": "Vd.4H"
+      "fallback": {
+        "register": "Zfallback.H|Ztied.H"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
@@ -19991,22 +23650,34 @@
     ],
     "instructions": [
       [
-        "FCVTN2"
+        "CLASTA"
+      ],
+      [
+        "MOVPRFX",
+        "CLASTA"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvt_high_f32_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svclasta[_s32]",
     "arguments": [
-      "float16x8_t a"
+      "svbool_t pg",
+      "svint32_t fallback",
+      "svint32_t data"
     ],
     "return_type": {
-      "value": "float32x4_t"
+      "value": "svint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8H"
+      "data": {
+        "register": "Zdata.S"
+      },
+      "fallback": {
+        "register": "Zfallback.S|Ztied.S"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
@@ -20014,26 +23685,34 @@
     ],
     "instructions": [
       [
-        "FCVTL2"
+        "CLASTA"
+      ],
+      [
+        "MOVPRFX",
+        "CLASTA"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvt_high_f32_f64",
+    "SIMD_ISA": "SVE",
+    "name": "svclasta[_s64]",
     "arguments": [
-      "float32x2_t r",
-      "float64x2_t a"
+      "svbool_t pg",
+      "svint64_t fallback",
+      "svint64_t data"
     ],
     "return_type": {
-      "value": "float32x4_t"
+      "value": "svint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2D"
+      "data": {
+        "register": "Zdata.D"
       },
-      "r": {
-        "register": "Vd.2S"
+      "fallback": {
+        "register": "Zfallback.D|Ztied.D"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
@@ -20041,22 +23720,34 @@
     ],
     "instructions": [
       [
-        "FCVTN2"
+        "CLASTA"
+      ],
+      [
+        "MOVPRFX",
+        "CLASTA"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvt_high_f64_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svclasta[_s8]",
     "arguments": [
-      "float32x4_t a"
+      "svbool_t pg",
+      "svint8_t fallback",
+      "svint8_t data"
     ],
     "return_type": {
-      "value": "float64x2_t"
+      "value": "svint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4S"
+      "data": {
+        "register": "Zdata.B"
+      },
+      "fallback": {
+        "register": "Zfallback.B|Ztied.B"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
@@ -20064,145 +23755,174 @@
     ],
     "instructions": [
       [
-        "FCVTL2"
+        "CLASTA"
+      ],
+      [
+        "MOVPRFX",
+        "CLASTA"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvt_n_f16_s16",
+    "SIMD_ISA": "SVE",
+    "name": "svclasta[_u16]",
     "arguments": [
-      "int16x4_t a",
-      "const int n"
+      "svbool_t pg",
+      "svuint16_t fallback",
+      "svuint16_t data"
     ],
     "return_type": {
-      "value": "float16x4_t"
+      "value": "svuint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4H"
+      "data": {
+        "register": "Zdata.H"
       },
-      "n": {
-        "minimum": 1,
-        "maximum": 16
+      "fallback": {
+        "register": "Zfallback.H|Ztied.H"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "SCVTF"
+        "CLASTA"
+      ],
+      [
+        "MOVPRFX",
+        "CLASTA"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvt_n_f16_u16",
+    "SIMD_ISA": "SVE",
+    "name": "svclasta[_u32]",
     "arguments": [
-      "uint16x4_t a",
-      "const int n"
+      "svbool_t pg",
+      "svuint32_t fallback",
+      "svuint32_t data"
     ],
     "return_type": {
-      "value": "float16x4_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4H"
+      "data": {
+        "register": "Zdata.S"
       },
-      "n": {
-        "minimum": 1,
-        "maximum": 16
+      "fallback": {
+        "register": "Zfallback.S|Ztied.S"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "UCVTF"
+        "CLASTA"
+      ],
+      [
+        "MOVPRFX",
+        "CLASTA"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvt_n_f32_s32",
+    "SIMD_ISA": "SVE",
+    "name": "svclasta[_u64]",
     "arguments": [
-      "int32x2_t a",
-      "const int n"
+      "svbool_t pg",
+      "svuint64_t fallback",
+      "svuint64_t data"
     ],
     "return_type": {
-      "value": "float32x2_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2S"
+      "data": {
+        "register": "Zdata.D"
       },
-      "n": {
-        "minimum": 1,
-        "maximum": 32
+      "fallback": {
+        "register": "Zfallback.D|Ztied.D"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "SCVTF"
+        "CLASTA"
+      ],
+      [
+        "MOVPRFX",
+        "CLASTA"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvt_n_f32_u32",
+    "SIMD_ISA": "SVE",
+    "name": "svclasta[_u8]",
     "arguments": [
-      "uint32x2_t a",
-      "const int n"
+      "svbool_t pg",
+      "svuint8_t fallback",
+      "svuint8_t data"
     ],
     "return_type": {
-      "value": "float32x2_t"
+      "value": "svuint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2S"
+      "data": {
+        "register": "Zdata.B"
       },
-      "n": {
-        "minimum": 1,
-        "maximum": 32
+      "fallback": {
+        "register": "Zfallback.B|Ztied.B"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "UCVTF"
+        "CLASTA"
+      ],
+      [
+        "MOVPRFX",
+        "CLASTA"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvt_n_f64_s64",
+    "SIMD_ISA": "SVE",
+    "name": "svclastb[_f16]",
     "arguments": [
-      "int64x1_t a",
-      "const int n"
+      "svbool_t pg",
+      "svfloat16_t fallback",
+      "svfloat16_t data"
     ],
     "return_type": {
-      "value": "float64x1_t"
+      "value": "svfloat16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "data": {
+        "register": "Zdata.H"
       },
-      "n": {
-        "minimum": 1,
-        "maximum": 64
+      "fallback": {
+        "register": "Zfallback.H|Ztied.H"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
@@ -20210,27 +23930,34 @@
     ],
     "instructions": [
       [
-        "SCVTF"
+        "CLASTB"
+      ],
+      [
+        "MOVPRFX",
+        "CLASTB"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvt_n_f64_u64",
+    "SIMD_ISA": "SVE",
+    "name": "svclastb[_f32]",
     "arguments": [
-      "uint64x1_t a",
-      "const int n"
+      "svbool_t pg",
+      "svfloat32_t fallback",
+      "svfloat32_t data"
     ],
     "return_type": {
-      "value": "float64x1_t"
+      "value": "svfloat32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "data": {
+        "register": "Zdata.S"
       },
-      "n": {
-        "minimum": 1,
-        "maximum": 64
+      "fallback": {
+        "register": "Zfallback.S|Ztied.S"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
@@ -20238,86 +23965,103 @@
     ],
     "instructions": [
       [
-        "UCVTF"
+        "CLASTB"
+      ],
+      [
+        "MOVPRFX",
+        "CLASTB"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvt_n_s16_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svclastb[_f64]",
     "arguments": [
-      "float16x4_t a",
-      "const int n"
+      "svbool_t pg",
+      "svfloat64_t fallback",
+      "svfloat64_t data"
     ],
     "return_type": {
-      "value": "int16x4_t"
+      "value": "svfloat64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4H"
+      "data": {
+        "register": "Zdata.D"
       },
-      "n": {
-        "minimum": 1,
-        "maximum": 16
+      "fallback": {
+        "register": "Zfallback.D|Ztied.D"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCVTZS"
+        "CLASTB"
+      ],
+      [
+        "MOVPRFX",
+        "CLASTB"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvt_n_s32_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svclastb[_n_f16]",
     "arguments": [
-      "float32x2_t a",
-      "const int n"
+      "svbool_t pg",
+      "float16_t fallback",
+      "svfloat16_t data"
     ],
     "return_type": {
-      "value": "int32x2_t"
+      "value": "float16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2S"
+      "data": {
+        "register": "Zdata.H"
       },
-      "n": {
-        "minimum": 1,
-        "maximum": 32
+      "fallback": {
+        "register": "Htied|Wtied"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCVTZS"
+        "CLASTB"
+      ],
+      [
+        "CLASTB"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvt_n_s64_f64",
+    "SIMD_ISA": "SVE",
+    "name": "svclastb[_n_f32]",
     "arguments": [
-      "float64x1_t a",
-      "const int n"
+      "svbool_t pg",
+      "float32_t fallback",
+      "svfloat32_t data"
     ],
     "return_type": {
-      "value": "int64x1_t"
+      "value": "float32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "data": {
+        "register": "Zdata.S"
       },
-      "n": {
-        "minimum": 1,
-        "maximum": 64
+      "fallback": {
+        "register": "Stied|Wtied"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
@@ -20325,86 +24069,101 @@
     ],
     "instructions": [
       [
-        "FCVTZS"
+        "CLASTB"
+      ],
+      [
+        "CLASTB"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvt_n_u16_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svclastb[_n_f64]",
     "arguments": [
-      "float16x4_t a",
-      "const int n"
+      "svbool_t pg",
+      "float64_t fallback",
+      "svfloat64_t data"
     ],
     "return_type": {
-      "value": "uint16x4_t"
+      "value": "float64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4H"
+      "data": {
+        "register": "Zdata.D"
       },
-      "n": {
-        "minimum": 1,
-        "maximum": 16
+      "fallback": {
+        "register": "Dtied|Xtied"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCVTZU"
+        "CLASTB"
+      ],
+      [
+        "CLASTB"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvt_n_u32_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svclastb[_n_s16]",
     "arguments": [
-      "float32x2_t a",
-      "const int n"
+      "svbool_t pg",
+      "int16_t fallback",
+      "svint16_t data"
     ],
     "return_type": {
-      "value": "uint32x2_t"
+      "value": "int16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2S"
+      "data": {
+        "register": "Zdata.H"
       },
-      "n": {
-        "minimum": 1,
-        "maximum": 32
+      "fallback": {
+        "register": "Htied|Wtied"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCVTZU"
+        "CLASTB"
+      ],
+      [
+        "CLASTB"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvt_n_u64_f64",
+    "SIMD_ISA": "SVE",
+    "name": "svclastb[_n_s32]",
     "arguments": [
-      "float64x1_t a",
-      "const int n"
+      "svbool_t pg",
+      "int32_t fallback",
+      "svint32_t data"
     ],
     "return_type": {
-      "value": "uint64x1_t"
+      "value": "int32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "data": {
+        "register": "Zdata.S"
       },
-      "n": {
-        "minimum": 1,
-        "maximum": 64
+      "fallback": {
+        "register": "Stied|Wtied"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
@@ -20412,71 +24171,101 @@
     ],
     "instructions": [
       [
-        "FCVTZU"
+        "CLASTB"
+      ],
+      [
+        "CLASTB"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvt_s16_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svclastb[_n_s64]",
     "arguments": [
-      "float16x4_t a"
+      "svbool_t pg",
+      "int64_t fallback",
+      "svint64_t data"
     ],
     "return_type": {
-      "value": "int16x4_t"
+      "value": "int64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4H"
+      "data": {
+        "register": "Zdata.D"
+      },
+      "fallback": {
+        "register": "Dtied|Xtied"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCVTZS"
+        "CLASTB"
+      ],
+      [
+        "CLASTB"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvt_s32_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svclastb[_n_s8]",
     "arguments": [
-      "float32x2_t a"
+      "svbool_t pg",
+      "int8_t fallback",
+      "svint8_t data"
     ],
     "return_type": {
-      "value": "int32x2_t"
+      "value": "int8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2S"
+      "data": {
+        "register": "Zdata.B"
+      },
+      "fallback": {
+        "register": "Btied|Wtied"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCVTZS"
+        "CLASTB"
+      ],
+      [
+        "CLASTB"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvt_s64_f64",
+    "SIMD_ISA": "SVE",
+    "name": "svclastb[_n_u16]",
     "arguments": [
-      "float64x1_t a"
+      "svbool_t pg",
+      "uint16_t fallback",
+      "svuint16_t data"
     ],
     "return_type": {
-      "value": "int64x1_t"
+      "value": "uint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "data": {
+        "register": "Zdata.H"
+      },
+      "fallback": {
+        "register": "Htied|Wtied"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
@@ -20484,71 +24273,101 @@
     ],
     "instructions": [
       [
-        "FCVTZS"
+        "CLASTB"
+      ],
+      [
+        "CLASTB"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvt_u16_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svclastb[_n_u32]",
     "arguments": [
-      "float16x4_t a"
+      "svbool_t pg",
+      "uint32_t fallback",
+      "svuint32_t data"
     ],
     "return_type": {
-      "value": "uint16x4_t"
+      "value": "uint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4H"
+      "data": {
+        "register": "Zdata.S"
+      },
+      "fallback": {
+        "register": "Stied|Wtied"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCVTZS"
+        "CLASTB"
+      ],
+      [
+        "CLASTB"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvt_u32_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svclastb[_n_u64]",
     "arguments": [
-      "float32x2_t a"
+      "svbool_t pg",
+      "uint64_t fallback",
+      "svuint64_t data"
     ],
     "return_type": {
-      "value": "uint32x2_t"
+      "value": "uint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2S"
+      "data": {
+        "register": "Zdata.D"
+      },
+      "fallback": {
+        "register": "Dtied|Xtied"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCVTZU"
+        "CLASTB"
+      ],
+      [
+        "CLASTB"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvt_u64_f64",
+    "SIMD_ISA": "SVE",
+    "name": "svclastb[_n_u8]",
     "arguments": [
-      "float64x1_t a"
+      "svbool_t pg",
+      "uint8_t fallback",
+      "svuint8_t data"
     ],
     "return_type": {
-      "value": "uint64x1_t"
+      "value": "uint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "data": {
+        "register": "Zdata.B"
+      },
+      "fallback": {
+        "register": "Btied|Wtied"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
@@ -20556,70 +24375,103 @@
     ],
     "instructions": [
       [
-        "FCVTZU"
+        "CLASTB"
+      ],
+      [
+        "CLASTB"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvta_s16_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svclastb[_s16]",
     "arguments": [
-      "float16x4_t a"
+      "svbool_t pg",
+      "svint16_t fallback",
+      "svint16_t data"
     ],
     "return_type": {
-      "value": "int16x4_t"
+      "value": "svint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4H"
+      "data": {
+        "register": "Zdata.H"
+      },
+      "fallback": {
+        "register": "Zfallback.H|Ztied.H"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCVTAS"
+        "CLASTB"
+      ],
+      [
+        "MOVPRFX",
+        "CLASTB"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvta_s32_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svclastb[_s32]",
     "arguments": [
-      "float32x2_t a"
+      "svbool_t pg",
+      "svint32_t fallback",
+      "svint32_t data"
     ],
     "return_type": {
-      "value": "int32x2_t"
+      "value": "svint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2S"
+      "data": {
+        "register": "Zdata.S"
+      },
+      "fallback": {
+        "register": "Zfallback.S|Ztied.S"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCVTAS"
+        "CLASTB"
+      ],
+      [
+        "MOVPRFX",
+        "CLASTB"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvta_s64_f64",
+    "SIMD_ISA": "SVE",
+    "name": "svclastb[_s64]",
     "arguments": [
-      "float64x1_t a"
+      "svbool_t pg",
+      "svint64_t fallback",
+      "svint64_t data"
     ],
     "return_type": {
-      "value": "int64x1_t"
+      "value": "svint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "data": {
+        "register": "Zdata.D"
+      },
+      "fallback": {
+        "register": "Zfallback.D|Ztied.D"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
@@ -20627,70 +24479,104 @@
     ],
     "instructions": [
       [
-        "FCVTAS"
+        "CLASTB"
+      ],
+      [
+        "MOVPRFX",
+        "CLASTB"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvta_u16_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svclastb[_s8]",
     "arguments": [
-      "float16x4_t a"
+      "svbool_t pg",
+      "svint8_t fallback",
+      "svint8_t data"
     ],
     "return_type": {
-      "value": "uint16x4_t"
+      "value": "svint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4H"
+      "data": {
+        "register": "Zdata.B"
+      },
+      "fallback": {
+        "register": "Zfallback.B|Ztied.B"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCVTAU"
+        "CLASTB"
+      ],
+      [
+        "MOVPRFX",
+        "CLASTB"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvta_u32_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svclastb[_u16]",
     "arguments": [
-      "float32x2_t a"
+      "svbool_t pg",
+      "svuint16_t fallback",
+      "svuint16_t data"
     ],
     "return_type": {
-      "value": "uint32x2_t"
+      "value": "svuint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2S"
+      "data": {
+        "register": "Zdata.H"
+      },
+      "fallback": {
+        "register": "Zfallback.H|Ztied.H"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCVTAU"
+        "CLASTB"
+      ],
+      [
+        "MOVPRFX",
+        "CLASTB"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvta_u64_f64",
+    "SIMD_ISA": "SVE",
+    "name": "svclastb[_u32]",
     "arguments": [
-      "float64x1_t a"
+      "svbool_t pg",
+      "svuint32_t fallback",
+      "svuint32_t data"
     ],
     "return_type": {
-      "value": "uint64x1_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "data": {
+        "register": "Zdata.S"
+      },
+      "fallback": {
+        "register": "Zfallback.S|Ztied.S"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
@@ -20698,22 +24584,34 @@
     ],
     "instructions": [
       [
-        "FCVTAU"
+        "CLASTB"
+      ],
+      [
+        "MOVPRFX",
+        "CLASTB"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtad_s64_f64",
+    "SIMD_ISA": "SVE",
+    "name": "svclastb[_u64]",
     "arguments": [
-      "float64_t a"
+      "svbool_t pg",
+      "svuint64_t fallback",
+      "svuint64_t data"
     ],
     "return_type": {
-      "value": "int64_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "data": {
+        "register": "Zdata.D"
+      },
+      "fallback": {
+        "register": "Zfallback.D|Ztied.D"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
@@ -20721,22 +24619,34 @@
     ],
     "instructions": [
       [
-        "FCVTAS"
+        "CLASTB"
+      ],
+      [
+        "MOVPRFX",
+        "CLASTB"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtad_u64_f64",
+    "SIMD_ISA": "SVE",
+    "name": "svclastb[_u8]",
     "arguments": [
-      "float64_t a"
+      "svbool_t pg",
+      "svuint8_t fallback",
+      "svuint8_t data"
     ],
     "return_type": {
-      "value": "uint64_t"
+      "value": "svuint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "data": {
+        "register": "Zdata.B"
+      },
+      "fallback": {
+        "register": "Zfallback.B|Ztied.B"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
@@ -20744,22 +24654,34 @@
     ],
     "instructions": [
       [
-        "FCVTAU"
+        "CLASTB"
+      ],
+      [
+        "MOVPRFX",
+        "CLASTB"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtah_s16_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svcls[_s16]_m",
     "arguments": [
-      "float16_t a"
+      "svuint16_t inactive",
+      "svbool_t pg",
+      "svint16_t op"
     ],
     "return_type": {
-      "value": "int16_t"
+      "value": "svuint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Hn"
+      "inactive": {
+        "register": "Zinactive.H|Ztied.H"
+      },
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
@@ -20767,46 +24689,61 @@
     ],
     "instructions": [
       [
-        "FCVTAS"
+        "CLS"
+      ],
+      [
+        "MOVPRFX",
+        "CLS"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtah_s32_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svcls[_s16]_x",
     "arguments": [
-      "float16_t a"
+      "svbool_t pg",
+      "svint16_t op"
     ],
     "return_type": {
-      "value": "int32_t"
+      "value": "svuint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Hn"
+      "op": {
+        "register": "Zop.H|Ztied.H"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCVTAS"
+        "CLS"
+      ],
+      [
+        "MOVPRFX",
+        "CLS"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtah_s64_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svcls[_s16]_z",
     "arguments": [
-      "float16_t a"
+      "svbool_t pg",
+      "svint16_t op"
     ],
     "return_type": {
-      "value": "int64_t"
+      "value": "svuint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Hn"
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
@@ -20814,22 +24751,31 @@
     ],
     "instructions": [
       [
-        "FCVTAS"
+        "MOVPRFX",
+        "CLS"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtah_u16_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svcls[_s32]_m",
     "arguments": [
-      "float16_t a"
+      "svuint32_t inactive",
+      "svbool_t pg",
+      "svint32_t op"
     ],
     "return_type": {
-      "value": "uint16_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Hn"
+      "inactive": {
+        "register": "Zinactive.S|Ztied.S"
+      },
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
@@ -20837,46 +24783,61 @@
     ],
     "instructions": [
       [
-        "FCVTAU"
+        "CLS"
+      ],
+      [
+        "MOVPRFX",
+        "CLS"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtah_u32_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svcls[_s32]_x",
     "arguments": [
-      "float16_t a"
+      "svbool_t pg",
+      "svint32_t op"
     ],
     "return_type": {
-      "value": "uint32_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Hn"
+      "op": {
+        "register": "Zop.S|Ztied.S"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCVTAU"
+        "CLS"
+      ],
+      [
+        "MOVPRFX",
+        "CLS"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtah_u64_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svcls[_s32]_z",
     "arguments": [
-      "float16_t a"
+      "svbool_t pg",
+      "svint32_t op"
     ],
     "return_type": {
-      "value": "uint64_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Hn"
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
@@ -20884,70 +24845,93 @@
     ],
     "instructions": [
       [
-        "FCVTAU"
+        "MOVPRFX",
+        "CLS"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtaq_s16_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svcls[_s64]_m",
     "arguments": [
-      "float16x8_t a"
+      "svuint64_t inactive",
+      "svbool_t pg",
+      "svint64_t op"
     ],
     "return_type": {
-      "value": "int16x8_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8H"
+      "inactive": {
+        "register": "Zinactive.D|Ztied.D"
+      },
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCVTAS"
+        "CLS"
+      ],
+      [
+        "MOVPRFX",
+        "CLS"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtaq_s32_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svcls[_s64]_x",
     "arguments": [
-      "float32x4_t a"
+      "svbool_t pg",
+      "svint64_t op"
     ],
     "return_type": {
-      "value": "int32x4_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4S"
+      "op": {
+        "register": "Zop.D|Ztied.D"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCVTAS"
+        "CLS"
+      ],
+      [
+        "MOVPRFX",
+        "CLS"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtaq_s64_f64",
+    "SIMD_ISA": "SVE",
+    "name": "svcls[_s64]_z",
     "arguments": [
-      "float64x2_t a"
+      "svbool_t pg",
+      "svint64_t op"
     ],
     "return_type": {
-      "value": "int64x2_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2D"
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
@@ -20955,70 +24939,93 @@
     ],
     "instructions": [
       [
-        "FCVTAS"
+        "MOVPRFX",
+        "CLS"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtaq_u16_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svcls[_s8]_m",
     "arguments": [
-      "float16x8_t a"
+      "svuint8_t inactive",
+      "svbool_t pg",
+      "svint8_t op"
     ],
     "return_type": {
-      "value": "uint16x8_t"
+      "value": "svuint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8H"
+      "inactive": {
+        "register": "Zinactive.B|Ztied.B"
+      },
+      "op": {
+        "register": "Zop.B"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCVTAU"
+        "CLS"
+      ],
+      [
+        "MOVPRFX",
+        "CLS"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtaq_u32_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svcls[_s8]_x",
     "arguments": [
-      "float32x4_t a"
+      "svbool_t pg",
+      "svint8_t op"
     ],
     "return_type": {
-      "value": "uint32x4_t"
+      "value": "svuint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4S"
+      "op": {
+        "register": "Zop.B|Ztied.B"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCVTAU"
+        "CLS"
+      ],
+      [
+        "MOVPRFX",
+        "CLS"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtaq_u64_f64",
+    "SIMD_ISA": "SVE",
+    "name": "svcls[_s8]_z",
     "arguments": [
-      "float64x2_t a"
+      "svbool_t pg",
+      "svint8_t op"
     ],
     "return_type": {
-      "value": "uint64x2_t"
+      "value": "svuint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2D"
+      "op": {
+        "register": "Zop.B"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
@@ -21026,22 +25033,31 @@
     ],
     "instructions": [
       [
-        "FCVTAU"
+        "MOVPRFX",
+        "CLS"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtas_s32_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svclz[_s16]_m",
     "arguments": [
-      "float32_t a"
+      "svuint16_t inactive",
+      "svbool_t pg",
+      "svint16_t op"
     ],
     "return_type": {
-      "value": "int32_t"
+      "value": "svuint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Sn"
+      "inactive": {
+        "register": "Zinactive.H|Ztied.H"
+      },
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
@@ -21049,22 +25065,30 @@
     ],
     "instructions": [
       [
-        "FCVTAS"
+        "CLZ"
+      ],
+      [
+        "MOVPRFX",
+        "CLZ"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtas_u32_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svclz[_s16]_x",
     "arguments": [
-      "float32_t a"
+      "svbool_t pg",
+      "svint16_t op"
     ],
     "return_type": {
-      "value": "uint32_t"
+      "value": "svuint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Sn"
+      "op": {
+        "register": "Zop.H|Ztied.H"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
@@ -21072,22 +25096,30 @@
     ],
     "instructions": [
       [
-        "FCVTAU"
+        "CLZ"
+      ],
+      [
+        "MOVPRFX",
+        "CLZ"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtd_f64_s64",
+    "SIMD_ISA": "SVE",
+    "name": "svclz[_s16]_z",
     "arguments": [
-      "int64_t a"
+      "svbool_t pg",
+      "svint16_t op"
     ],
     "return_type": {
-      "value": "float64_t"
+      "value": "svuint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
@@ -21095,22 +25127,31 @@
     ],
     "instructions": [
       [
-        "SCVTF"
+        "MOVPRFX",
+        "CLZ"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtd_f64_u64",
+    "SIMD_ISA": "SVE",
+    "name": "svclz[_s32]_m",
     "arguments": [
-      "uint64_t a"
+      "svuint32_t inactive",
+      "svbool_t pg",
+      "svint32_t op"
     ],
     "return_type": {
-      "value": "float64_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "inactive": {
+        "register": "Zinactive.S|Ztied.S"
+      },
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
@@ -21118,27 +25159,30 @@
     ],
     "instructions": [
       [
-        "UCVTF"
+        "CLZ"
+      ],
+      [
+        "MOVPRFX",
+        "CLZ"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtd_n_f64_s64",
+    "SIMD_ISA": "SVE",
+    "name": "svclz[_s32]_x",
     "arguments": [
-      "int64_t a",
-      "const int n"
+      "svbool_t pg",
+      "svint32_t op"
     ],
     "return_type": {
-      "value": "float64_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "op": {
+        "register": "Zop.S|Ztied.S"
       },
-      "n": {
-        "minimum": 1,
-        "maximum": 64
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
@@ -21146,27 +25190,30 @@
     ],
     "instructions": [
       [
-        "SCVTF"
+        "CLZ"
+      ],
+      [
+        "MOVPRFX",
+        "CLZ"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtd_n_f64_u64",
+    "SIMD_ISA": "SVE",
+    "name": "svclz[_s32]_z",
     "arguments": [
-      "uint64_t a",
-      "const int n"
+      "svbool_t pg",
+      "svint32_t op"
     ],
     "return_type": {
-      "value": "float64_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "op": {
+        "register": "Zop.S"
       },
-      "n": {
-        "minimum": 1,
-        "maximum": 64
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
@@ -21174,27 +25221,31 @@
     ],
     "instructions": [
       [
-        "UCVTF"
+        "MOVPRFX",
+        "CLZ"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtd_n_s64_f64",
+    "SIMD_ISA": "SVE",
+    "name": "svclz[_s64]_m",
     "arguments": [
-      "float64_t a",
-      "const int n"
+      "svuint64_t inactive",
+      "svbool_t pg",
+      "svint64_t op"
     ],
     "return_type": {
-      "value": "int64_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "inactive": {
+        "register": "Zinactive.D|Ztied.D"
       },
-      "n": {
-        "minimum": 1,
-        "maximum": 64
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
@@ -21202,27 +25253,30 @@
     ],
     "instructions": [
       [
-        "FCVTZS"
+        "CLZ"
+      ],
+      [
+        "MOVPRFX",
+        "CLZ"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtd_n_u64_f64",
+    "SIMD_ISA": "SVE",
+    "name": "svclz[_s64]_x",
     "arguments": [
-      "float64_t a",
-      "const int n"
+      "svbool_t pg",
+      "svint64_t op"
     ],
     "return_type": {
-      "value": "uint64_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "op": {
+        "register": "Zop.D|Ztied.D"
       },
-      "n": {
-        "minimum": 1,
-        "maximum": 64
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
@@ -21230,22 +25284,30 @@
     ],
     "instructions": [
       [
-        "FCVTZU"
+        "CLZ"
+      ],
+      [
+        "MOVPRFX",
+        "CLZ"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtd_s64_f64",
+    "SIMD_ISA": "SVE",
+    "name": "svclz[_s64]_z",
     "arguments": [
-      "float64_t a"
+      "svbool_t pg",
+      "svint64_t op"
     ],
     "return_type": {
-      "value": "int64_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
@@ -21253,22 +25315,31 @@
     ],
     "instructions": [
       [
-        "FCVTZS"
+        "MOVPRFX",
+        "CLZ"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtd_u64_f64",
+    "SIMD_ISA": "SVE",
+    "name": "svclz[_s8]_m",
     "arguments": [
-      "float64_t a"
+      "svuint8_t inactive",
+      "svbool_t pg",
+      "svint8_t op"
     ],
     "return_type": {
-      "value": "uint64_t"
+      "value": "svuint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "inactive": {
+        "register": "Zinactive.B|Ztied.B"
+      },
+      "op": {
+        "register": "Zop.B"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
@@ -21276,22 +25347,30 @@
     ],
     "instructions": [
       [
-        "FCVTZU"
+        "CLZ"
+      ],
+      [
+        "MOVPRFX",
+        "CLZ"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvth_f16_s16",
+    "SIMD_ISA": "SVE",
+    "name": "svclz[_s8]_x",
     "arguments": [
-      "int16_t a"
+      "svbool_t pg",
+      "svint8_t op"
     ],
     "return_type": {
-      "value": "float16_t"
+      "value": "svuint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Hn"
+      "op": {
+        "register": "Zop.B|Ztied.B"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
@@ -21299,46 +25378,62 @@
     ],
     "instructions": [
       [
-        "SCVTF"
+        "CLZ"
+      ],
+      [
+        "MOVPRFX",
+        "CLZ"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvth_f16_s32",
+    "SIMD_ISA": "SVE",
+    "name": "svclz[_s8]_z",
     "arguments": [
-      "int32_t a"
+      "svbool_t pg",
+      "svint8_t op"
     ],
     "return_type": {
-      "value": "float16_t"
+      "value": "svuint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Hn"
+      "op": {
+        "register": "Zop.B"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "SCVTF"
+        "MOVPRFX",
+        "CLZ"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvth_f16_s64",
+    "SIMD_ISA": "SVE",
+    "name": "svclz[_u16]_m",
     "arguments": [
-      "int64_t a"
+      "svuint16_t inactive",
+      "svbool_t pg",
+      "svuint16_t op"
     ],
     "return_type": {
-      "value": "float16_t"
+      "value": "svuint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Hn"
+      "inactive": {
+        "register": "Zinactive.H|Ztied.H"
+      },
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
@@ -21346,22 +25441,30 @@
     ],
     "instructions": [
       [
-        "SCVTF"
+        "CLZ"
+      ],
+      [
+        "MOVPRFX",
+        "CLZ"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvth_f16_u16",
+    "SIMD_ISA": "SVE",
+    "name": "svclz[_u16]_x",
     "arguments": [
-      "uint16_t a"
+      "svbool_t pg",
+      "svuint16_t op"
     ],
     "return_type": {
-      "value": "float16_t"
+      "value": "svuint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Hn"
+      "op": {
+        "register": "Zop.H|Ztied.H"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
@@ -21369,46 +25472,62 @@
     ],
     "instructions": [
       [
-        "UCVTF"
+        "CLZ"
+      ],
+      [
+        "MOVPRFX",
+        "CLZ"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvth_f16_u32",
+    "SIMD_ISA": "SVE",
+    "name": "svclz[_u16]_z",
     "arguments": [
-      "uint32_t a"
+      "svbool_t pg",
+      "svuint16_t op"
     ],
     "return_type": {
-      "value": "float16_t"
+      "value": "svuint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Hn"
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "UCVTF"
+        "MOVPRFX",
+        "CLZ"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvth_f16_u64",
+    "SIMD_ISA": "SVE",
+    "name": "svclz[_u32]_m",
     "arguments": [
-      "uint64_t a"
+      "svuint32_t inactive",
+      "svbool_t pg",
+      "svuint32_t op"
     ],
     "return_type": {
-      "value": "float16_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Hn"
+      "inactive": {
+        "register": "Zinactive.S|Ztied.S"
+      },
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
@@ -21416,27 +25535,30 @@
     ],
     "instructions": [
       [
-        "UCVTF"
+        "CLZ"
+      ],
+      [
+        "MOVPRFX",
+        "CLZ"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvth_n_f16_s16",
+    "SIMD_ISA": "SVE",
+    "name": "svclz[_u32]_x",
     "arguments": [
-      "int16_t a",
-      "const int n"
+      "svbool_t pg",
+      "svuint32_t op"
     ],
     "return_type": {
-      "value": "float16_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Hn"
+      "op": {
+        "register": "Zop.S|Ztied.S"
       },
-      "n": {
-        "minimum": 1,
-        "maximum": 16
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
@@ -21444,56 +25566,62 @@
     ],
     "instructions": [
       [
-        "SCVTF"
+        "CLZ"
+      ],
+      [
+        "MOVPRFX",
+        "CLZ"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvth_n_f16_s32",
+    "SIMD_ISA": "SVE",
+    "name": "svclz[_u32]_z",
     "arguments": [
-      "int32_t a",
-      "const int n"
+      "svbool_t pg",
+      "svuint32_t op"
     ],
     "return_type": {
-      "value": "float16_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Hn"
+      "op": {
+        "register": "Zop.S"
       },
-      "n": {
-        "minimum": 1,
-        "maximum": 16
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "SCVTF"
+        "MOVPRFX",
+        "CLZ"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvth_n_f16_s64",
+    "SIMD_ISA": "SVE",
+    "name": "svclz[_u64]_m",
     "arguments": [
-      "int64_t a",
-      "const int n"
+      "svuint64_t inactive",
+      "svbool_t pg",
+      "svuint64_t op"
     ],
     "return_type": {
-      "value": "float16_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Hn"
+      "inactive": {
+        "register": "Zinactive.D|Ztied.D"
       },
-      "n": {
-        "minimum": 1,
-        "maximum": 16
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
@@ -21501,27 +25629,30 @@
     ],
     "instructions": [
       [
-        "SCVTF"
+        "CLZ"
+      ],
+      [
+        "MOVPRFX",
+        "CLZ"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvth_n_f16_u16",
+    "SIMD_ISA": "SVE",
+    "name": "svclz[_u64]_x",
     "arguments": [
-      "uint16_t a",
-      "const int n"
+      "svbool_t pg",
+      "svuint64_t op"
     ],
     "return_type": {
-      "value": "float16_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Hn"
+      "op": {
+        "register": "Zop.D|Ztied.D"
       },
-      "n": {
-        "minimum": 1,
-        "maximum": 16
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
@@ -21529,56 +25660,62 @@
     ],
     "instructions": [
       [
-        "UCVTF"
+        "CLZ"
+      ],
+      [
+        "MOVPRFX",
+        "CLZ"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvth_n_f16_u32",
+    "SIMD_ISA": "SVE",
+    "name": "svclz[_u64]_z",
     "arguments": [
-      "uint32_t a",
-      "const int n"
+      "svbool_t pg",
+      "svuint64_t op"
     ],
     "return_type": {
-      "value": "float16_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Hn"
+      "op": {
+        "register": "Zop.D"
       },
-      "n": {
-        "minimum": 1,
-        "maximum": 16
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "UCVTF"
+        "MOVPRFX",
+        "CLZ"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvth_n_f16_u64",
+    "SIMD_ISA": "SVE",
+    "name": "svclz[_u8]_m",
     "arguments": [
-      "uint64_t a",
-      "const int n"
+      "svuint8_t inactive",
+      "svbool_t pg",
+      "svuint8_t op"
     ],
     "return_type": {
-      "value": "float16_t"
+      "value": "svuint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Hn"
+      "inactive": {
+        "register": "Zinactive.B|Ztied.B"
       },
-      "n": {
-        "minimum": 1,
-        "maximum": 16
+      "op": {
+        "register": "Zop.B"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
@@ -21586,27 +25723,30 @@
     ],
     "instructions": [
       [
-        "UCVTF"
+        "CLZ"
+      ],
+      [
+        "MOVPRFX",
+        "CLZ"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvth_n_s16_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svclz[_u8]_x",
     "arguments": [
-      "float16_t a",
-      "const int n"
+      "svbool_t pg",
+      "svuint8_t op"
     ],
     "return_type": {
-      "value": "int16_t"
+      "value": "svuint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Hn"
+      "op": {
+        "register": "Zop.B|Ztied.B"
       },
-      "n": {
-        "minimum": 1,
-        "maximum": 16
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
@@ -21614,56 +25754,67 @@
     ],
     "instructions": [
       [
-        "FCVTZS"
+        "CLZ"
+      ],
+      [
+        "MOVPRFX",
+        "CLZ"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvth_n_s32_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svclz[_u8]_z",
     "arguments": [
-      "float16_t a",
-      "const int n"
+      "svbool_t pg",
+      "svuint8_t op"
     ],
     "return_type": {
-      "value": "int32_t"
+      "value": "svuint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Hn"
+      "op": {
+        "register": "Zop.B"
       },
-      "n": {
-        "minimum": 1,
-        "maximum": 16
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCVTZS"
+        "MOVPRFX",
+        "CLZ"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvth_n_s64_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svcmla[_f16]_m",
     "arguments": [
-      "float16_t a",
-      "const int n"
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2",
+      "svfloat16_t op3",
+      "uint64_t imm_rotation"
     ],
     "return_type": {
-      "value": "int64_t"
+      "value": "svfloat16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Hn"
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
       },
-      "n": {
-        "minimum": 1,
-        "maximum": 16
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
@@ -21671,27 +25822,39 @@
     ],
     "instructions": [
       [
-        "FCVTZS"
+        "FCMLA"
+      ],
+      [
+        "MOVPRFX",
+        "FCMLA"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvth_n_u16_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svcmla[_f16]_x",
     "arguments": [
-      "float16_t a",
-      "const int n"
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2",
+      "svfloat16_t op3",
+      "uint64_t imm_rotation"
     ],
     "return_type": {
-      "value": "uint16_t"
+      "value": "svfloat16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Hn"
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
       },
-      "n": {
-        "minimum": 1,
-        "maximum": 16
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
@@ -21699,56 +25862,76 @@
     ],
     "instructions": [
       [
-        "FCVTZU"
+        "FCMLA"
+      ],
+      [
+        "MOVPRFX",
+        "FCMLA"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvth_n_u32_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svcmla[_f16]_z",
     "arguments": [
-      "float16_t a",
-      "const int n"
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2",
+      "svfloat16_t op3",
+      "uint64_t imm_rotation"
     ],
     "return_type": {
-      "value": "uint32_t"
+      "value": "svfloat16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Hn"
+      "op1": {
+        "register": "Zop1.H"
       },
-      "n": {
-        "minimum": 1,
-        "maximum": 16
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCVTZU"
+        "MOVPRFX",
+        "FCMLA"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvth_n_u64_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svcmla[_f32]_m",
     "arguments": [
-      "float16_t a",
-      "const int n"
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2",
+      "svfloat32_t op3",
+      "uint64_t imm_rotation"
     ],
     "return_type": {
-      "value": "uint64_t"
+      "value": "svfloat32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Hn"
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
       },
-      "n": {
-        "minimum": 1,
-        "maximum": 16
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
@@ -21756,22 +25939,39 @@
     ],
     "instructions": [
       [
-        "FCVTZU"
+        "FCMLA"
+      ],
+      [
+        "MOVPRFX",
+        "FCMLA"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvth_s16_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svcmla[_f32]_x",
     "arguments": [
-      "float16_t a"
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2",
+      "svfloat32_t op3",
+      "uint64_t imm_rotation"
     ],
     "return_type": {
-      "value": "int16_t"
+      "value": "svfloat32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Hn"
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
@@ -21779,46 +25979,76 @@
     ],
     "instructions": [
       [
-        "FCVTZS"
+        "FCMLA"
+      ],
+      [
+        "MOVPRFX",
+        "FCMLA"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvth_s32_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svcmla[_f32]_z",
     "arguments": [
-      "float16_t a"
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2",
+      "svfloat32_t op3",
+      "uint64_t imm_rotation"
     ],
     "return_type": {
-      "value": "int32_t"
+      "value": "svfloat32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Hn"
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCVTZS"
+        "MOVPRFX",
+        "FCMLA"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvth_s64_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svcmla[_f64]_m",
     "arguments": [
-      "float16_t a"
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2",
+      "svfloat64_t op3",
+      "uint64_t imm_rotation"
     ],
     "return_type": {
-      "value": "int64_t"
+      "value": "svfloat64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Hn"
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
@@ -21826,22 +26056,39 @@
     ],
     "instructions": [
       [
-        "FCVTZS"
+        "FCMLA"
+      ],
+      [
+        "MOVPRFX",
+        "FCMLA"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvth_u16_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svcmla[_f64]_x",
     "arguments": [
-      "float16_t a"
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2",
+      "svfloat64_t op3",
+      "uint64_t imm_rotation"
     ],
     "return_type": {
-      "value": "uint16_t"
+      "value": "svfloat64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Hn"
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
@@ -21849,46 +26096,72 @@
     ],
     "instructions": [
       [
-        "FCVTZU"
+        "FCMLA"
+      ],
+      [
+        "MOVPRFX",
+        "FCMLA"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvth_u32_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svcmla[_f64]_z",
     "arguments": [
-      "float16_t a"
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2",
+      "svfloat64_t op3",
+      "uint64_t imm_rotation"
     ],
     "return_type": {
-      "value": "uint32_t"
+      "value": "svfloat64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Hn"
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCVTZU"
+        "MOVPRFX",
+        "FCMLA"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvth_u64_f16",
+    "SIMD_ISA": "SVE2",
+    "name": "svcmla[_s16]",
     "arguments": [
-      "float16_t a"
+      "svint16_t op1",
+      "svint16_t op2",
+      "svint16_t op3",
+      "uint64_t imm_rotation"
     ],
     "return_type": {
-      "value": "uint64_t"
+      "value": "svint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Hn"
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
       }
     },
     "Architectures": [
@@ -21896,70 +26169,107 @@
     ],
     "instructions": [
       [
-        "FCVTZU"
+        "CMLA"
+      ],
+      [
+        "MOVPRFX",
+        "CMLA"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtm_s16_f16",
+    "SIMD_ISA": "SVE2",
+    "name": "svcmla[_s32]",
     "arguments": [
-      "float16x4_t a"
+      "svint32_t op1",
+      "svint32_t op2",
+      "svint32_t op3",
+      "uint64_t imm_rotation"
     ],
     "return_type": {
-      "value": "int16x4_t"
+      "value": "svint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4H"
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCVTMS"
+        "CMLA"
+      ],
+      [
+        "MOVPRFX",
+        "CMLA"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtm_s32_f32",
+    "SIMD_ISA": "SVE2",
+    "name": "svcmla[_s64]",
     "arguments": [
-      "float32x2_t a"
+      "svint64_t op1",
+      "svint64_t op2",
+      "svint64_t op3",
+      "uint64_t imm_rotation"
     ],
     "return_type": {
-      "value": "int32x2_t"
+      "value": "svint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2S"
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCVTMS"
+        "CMLA"
+      ],
+      [
+        "MOVPRFX",
+        "CMLA"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtm_s64_f64",
+    "SIMD_ISA": "SVE2",
+    "name": "svcmla[_s8]",
     "arguments": [
-      "float64x1_t a"
+      "svint8_t op1",
+      "svint8_t op2",
+      "svint8_t op3",
+      "uint64_t imm_rotation"
     ],
     "return_type": {
-      "value": "int64x1_t"
+      "value": "svint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B"
       }
     },
     "Architectures": [
@@ -21967,70 +26277,107 @@
     ],
     "instructions": [
       [
-        "FCVTMS"
+        "CMLA"
+      ],
+      [
+        "MOVPRFX",
+        "CMLA"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtm_u16_f16",
+    "SIMD_ISA": "SVE2",
+    "name": "svcmla[_u16]",
     "arguments": [
-      "float16x4_t a"
+      "svuint16_t op1",
+      "svuint16_t op2",
+      "svuint16_t op3",
+      "uint64_t imm_rotation"
     ],
     "return_type": {
-      "value": "uint16x4_t"
+      "value": "svuint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4H"
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCVTMU"
+        "CMLA"
+      ],
+      [
+        "MOVPRFX",
+        "CMLA"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtm_u32_f32",
+    "SIMD_ISA": "SVE2",
+    "name": "svcmla[_u32]",
     "arguments": [
-      "float32x2_t a"
+      "svuint32_t op1",
+      "svuint32_t op2",
+      "svuint32_t op3",
+      "uint64_t imm_rotation"
     ],
     "return_type": {
-      "value": "uint32x2_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2S"
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCVTMU"
+        "CMLA"
+      ],
+      [
+        "MOVPRFX",
+        "CMLA"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtm_u64_f64",
+    "SIMD_ISA": "SVE2",
+    "name": "svcmla[_u64]",
     "arguments": [
-      "float64x1_t a"
+      "svuint64_t op1",
+      "svuint64_t op2",
+      "svuint64_t op3",
+      "uint64_t imm_rotation"
     ],
     "return_type": {
-      "value": "uint64x1_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D"
       }
     },
     "Architectures": [
@@ -22038,22 +26385,35 @@
     ],
     "instructions": [
       [
-        "FCVTMU"
+        "CMLA"
+      ],
+      [
+        "MOVPRFX",
+        "CMLA"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtmd_s64_f64",
+    "SIMD_ISA": "SVE2",
+    "name": "svcmla[_u8]",
     "arguments": [
-      "float64_t a"
+      "svuint8_t op1",
+      "svuint8_t op2",
+      "svuint8_t op3",
+      "uint64_t imm_rotation"
     ],
     "return_type": {
-      "value": "int64_t"
+      "value": "svuint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B"
       }
     },
     "Architectures": [
@@ -22061,22 +26421,40 @@
     ],
     "instructions": [
       [
-        "FCVTMS"
+        "CMLA"
+      ],
+      [
+        "MOVPRFX",
+        "CMLA"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtmd_u64_f64",
+    "SIMD_ISA": "SVE",
+    "name": "svcmla_lane[_f16]",
     "arguments": [
-      "float64_t a"
+      "svfloat16_t op1",
+      "svfloat16_t op2",
+      "svfloat16_t op3",
+      "uint64_t imm_index",
+      "uint64_t imm_rotation"
     ],
     "return_type": {
-      "value": "uint64_t"
+      "value": "svfloat16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "imm_index": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
       }
     },
     "Architectures": [
@@ -22084,22 +26462,40 @@
     ],
     "instructions": [
       [
-        "FCVTMU"
+        "FCMLA"
+      ],
+      [
+        "MOVPRFX",
+        "FCMLA"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtmh_s16_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svcmla_lane[_f32]",
     "arguments": [
-      "float16_t a"
+      "svfloat32_t op1",
+      "svfloat32_t op2",
+      "svfloat32_t op3",
+      "uint64_t imm_index",
+      "uint64_t imm_rotation"
     ],
     "return_type": {
-      "value": "int16_t"
+      "value": "svfloat32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Hn"
+      "imm_index": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S"
       }
     },
     "Architectures": [
@@ -22107,46 +26503,81 @@
     ],
     "instructions": [
       [
-        "FCVTMS"
+        "FCMLA"
+      ],
+      [
+        "MOVPRFX",
+        "FCMLA"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtmh_s32_f16",
+    "SIMD_ISA": "SVE2",
+    "name": "svcmla_lane[_s16]",
     "arguments": [
-      "float16_t a"
+      "svint16_t op1",
+      "svint16_t op2",
+      "svint16_t op3",
+      "uint64_t imm_index",
+      "uint64_t imm_rotation"
     ],
     "return_type": {
-      "value": "int32_t"
+      "value": "svint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Hn"
+      "imm_index": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCVTMS"
+        "CMLA"
+      ],
+      [
+        "MOVPRFX",
+        "CMLA"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtmh_s64_f16",
+    "SIMD_ISA": "SVE2",
+    "name": "svcmla_lane[_s32]",
     "arguments": [
-      "float16_t a"
+      "svint32_t op1",
+      "svint32_t op2",
+      "svint32_t op3",
+      "uint64_t imm_index",
+      "uint64_t imm_rotation"
     ],
     "return_type": {
-      "value": "int64_t"
+      "value": "svint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Hn"
+      "imm_index": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S"
       }
     },
     "Architectures": [
@@ -22154,22 +26585,40 @@
     ],
     "instructions": [
       [
-        "FCVTMS"
+        "CMLA"
+      ],
+      [
+        "MOVPRFX",
+        "CMLA"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtmh_u16_f16",
+    "SIMD_ISA": "SVE2",
+    "name": "svcmla_lane[_u16]",
     "arguments": [
-      "float16_t a"
+      "svuint16_t op1",
+      "svuint16_t op2",
+      "svuint16_t op3",
+      "uint64_t imm_index",
+      "uint64_t imm_rotation"
     ],
     "return_type": {
-      "value": "uint16_t"
+      "value": "svuint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Hn"
+      "imm_index": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
       }
     },
     "Architectures": [
@@ -22177,46 +26626,75 @@
     ],
     "instructions": [
       [
-        "FCVTMU"
+        "CMLA"
+      ],
+      [
+        "MOVPRFX",
+        "CMLA"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtmh_u32_f16",
+    "SIMD_ISA": "SVE2",
+    "name": "svcmla_lane[_u32]",
     "arguments": [
-      "float16_t a"
+      "svuint32_t op1",
+      "svuint32_t op2",
+      "svuint32_t op3",
+      "uint64_t imm_index",
+      "uint64_t imm_rotation"
     ],
     "return_type": {
-      "value": "uint32_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Hn"
+      "imm_index": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCVTMU"
+        "CMLA"
+      ],
+      [
+        "MOVPRFX",
+        "CMLA"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtmh_u64_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpeq[_f16]",
     "arguments": [
-      "float16_t a"
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2"
     ],
     "return_type": {
-      "value": "uint64_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Hn"
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
@@ -22224,70 +26702,92 @@
     ],
     "instructions": [
       [
-        "FCVTMU"
+        "FCMEQ"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtmq_s16_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpeq[_f32]",
     "arguments": [
-      "float16x8_t a"
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2"
     ],
     "return_type": {
-      "value": "int16x8_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8H"
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCVTMS"
+        "FCMEQ"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtmq_s32_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpeq[_f64]",
     "arguments": [
-      "float32x4_t a"
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2"
     ],
     "return_type": {
-      "value": "int32x4_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4S"
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCVTMS"
+        "FCMEQ"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtmq_s64_f64",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpeq[_n_f16]",
     "arguments": [
-      "float64x2_t a"
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "float16_t op2"
     ],
     "return_type": {
-      "value": "int64x2_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2D"
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
@@ -22295,70 +26795,101 @@
     ],
     "instructions": [
       [
-        "FCVTMS"
+        "FCMEQ"
+      ],
+      [
+        "FCMEQ"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtmq_u16_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpeq[_n_f32]",
     "arguments": [
-      "float16x8_t a"
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "float32_t op2"
     ],
     "return_type": {
-      "value": "uint16x8_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8H"
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCVTMU"
+        "FCMEQ"
+      ],
+      [
+        "FCMEQ"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtmq_u32_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpeq[_n_f64]",
     "arguments": [
-      "float32x4_t a"
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "float64_t op2"
     ],
     "return_type": {
-      "value": "uint32x4_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4S"
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCVTMU"
+        "FCMEQ"
+      ],
+      [
+        "FCMEQ"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtmq_u64_f64",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpeq[_n_s16]",
     "arguments": [
-      "float64x2_t a"
+      "svbool_t pg",
+      "svint16_t op1",
+      "int16_t op2"
     ],
     "return_type": {
-      "value": "uint64x2_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2D"
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
@@ -22366,22 +26897,33 @@
     ],
     "instructions": [
       [
-        "FCVTMU"
+        "CMPEQ"
+      ],
+      [
+        "CMPEQ"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtms_s32_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpeq[_n_s32]",
     "arguments": [
-      "float32_t a"
+      "svbool_t pg",
+      "svint32_t op1",
+      "int32_t op2"
     ],
     "return_type": {
-      "value": "int32_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Sn"
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
@@ -22389,22 +26931,33 @@
     ],
     "instructions": [
       [
-        "FCVTMS"
+        "CMPEQ"
+      ],
+      [
+        "CMPEQ"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtms_u32_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpeq[_n_s64]",
     "arguments": [
-      "float32_t a"
+      "svbool_t pg",
+      "svint64_t op1",
+      "int64_t op2"
     ],
     "return_type": {
-      "value": "uint32_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Sn"
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
@@ -22412,70 +26965,101 @@
     ],
     "instructions": [
       [
-        "FCVTMU"
+        "CMPEQ"
+      ],
+      [
+        "CMPEQ"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtn_s16_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpeq[_n_s8]",
     "arguments": [
-      "float16x4_t a"
+      "svbool_t pg",
+      "svint8_t op1",
+      "int8_t op2"
     ],
     "return_type": {
-      "value": "int16x4_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4H"
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCVTNS"
+        "CMPEQ"
+      ],
+      [
+        "CMPEQ"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtn_s32_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpeq[_n_u16]",
     "arguments": [
-      "float32x2_t a"
+      "svbool_t pg",
+      "svuint16_t op1",
+      "uint16_t op2"
     ],
     "return_type": {
-      "value": "int32x2_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2S"
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCVTNS"
+        "CMPEQ"
+      ],
+      [
+        "CMPEQ"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtn_s64_f64",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpeq[_n_u32]",
     "arguments": [
-      "float64x1_t a"
+      "svbool_t pg",
+      "svuint32_t op1",
+      "uint32_t op2"
     ],
     "return_type": {
-      "value": "int64x1_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
@@ -22483,70 +27067,101 @@
     ],
     "instructions": [
       [
-        "FCVTNS"
+        "CMPEQ"
+      ],
+      [
+        "CMPEQ"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtn_u16_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpeq[_n_u64]",
     "arguments": [
-      "float16x4_t a"
+      "svbool_t pg",
+      "svuint64_t op1",
+      "uint64_t op2"
     ],
     "return_type": {
-      "value": "uint16x4_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4H"
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCVTNU"
+        "CMPEQ"
+      ],
+      [
+        "CMPEQ"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtn_u32_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpeq[_n_u8]",
     "arguments": [
-      "float32x2_t a"
+      "svbool_t pg",
+      "svuint8_t op1",
+      "uint8_t op2"
     ],
     "return_type": {
-      "value": "uint32x2_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2S"
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCVTNU"
+        "CMPEQ"
+      ],
+      [
+        "CMPEQ"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtn_u64_f64",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpeq[_s16]",
     "arguments": [
-      "float64x1_t a"
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2"
     ],
     "return_type": {
-      "value": "uint64x1_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
@@ -22554,22 +27169,30 @@
     ],
     "instructions": [
       [
-        "FCVTNU"
+        "CMPEQ"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtnd_s64_f64",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpeq[_s32]",
     "arguments": [
-      "float64_t a"
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2"
     ],
     "return_type": {
-      "value": "int64_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
@@ -22577,22 +27200,30 @@
     ],
     "instructions": [
       [
-        "FCVTNS"
+        "CMPEQ"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtnd_u64_f64",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpeq[_s64]",
     "arguments": [
-      "float64_t a"
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2"
     ],
     "return_type": {
-      "value": "uint64_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
@@ -22600,22 +27231,30 @@
     ],
     "instructions": [
       [
-        "FCVTNU"
+        "CMPEQ"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtnh_s16_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpeq[_s8]",
     "arguments": [
-      "float16_t a"
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2"
     ],
     "return_type": {
-      "value": "int16_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Hn"
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
@@ -22623,46 +27262,61 @@
     ],
     "instructions": [
       [
-        "FCVTNS"
+        "CMPEQ"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtnh_s32_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpeq[_u16]",
     "arguments": [
-      "float16_t a"
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2"
     ],
     "return_type": {
-      "value": "int32_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Hn"
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCVTNS"
+        "CMPEQ"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtnh_s64_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpeq[_u32]",
     "arguments": [
-      "float16_t a"
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2"
     ],
     "return_type": {
-      "value": "int64_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Hn"
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
@@ -22670,22 +27324,30 @@
     ],
     "instructions": [
       [
-        "FCVTNS"
+        "CMPEQ"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtnh_u16_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpeq[_u64]",
     "arguments": [
-      "float16_t a"
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2"
     ],
     "return_type": {
-      "value": "uint16_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Hn"
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
@@ -22693,46 +27355,61 @@
     ],
     "instructions": [
       [
-        "FCVTNU"
+        "CMPEQ"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtnh_u32_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpeq[_u8]",
     "arguments": [
-      "float16_t a"
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2"
     ],
     "return_type": {
-      "value": "uint32_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Hn"
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCVTNU"
+        "CMPEQ"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtnh_u64_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpeq_wide[_n_s16]",
     "arguments": [
-      "float16_t a"
+      "svbool_t pg",
+      "svint16_t op1",
+      "int64_t op2"
     ],
     "return_type": {
-      "value": "uint64_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Hn"
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
@@ -22740,70 +27417,101 @@
     ],
     "instructions": [
       [
-        "FCVTNU"
+        "CMPEQ"
+      ],
+      [
+        "CMPEQ"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtnq_s16_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpeq_wide[_n_s32]",
     "arguments": [
-      "float16x8_t a"
+      "svbool_t pg",
+      "svint32_t op1",
+      "int64_t op2"
     ],
     "return_type": {
-      "value": "int16x8_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8H"
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCVTNS"
+        "CMPEQ"
+      ],
+      [
+        "CMPEQ"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtnq_s32_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpeq_wide[_n_s8]",
     "arguments": [
-      "float32x4_t a"
+      "svbool_t pg",
+      "svint8_t op1",
+      "int64_t op2"
     ],
     "return_type": {
-      "value": "int32x4_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4S"
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCVTNS"
+        "CMPEQ"
+      ],
+      [
+        "CMPEQ"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtnq_s64_f64",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpeq_wide[_s16]",
     "arguments": [
-      "float64x2_t a"
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint64_t op2"
     ],
     "return_type": {
-      "value": "int64x2_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2D"
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
@@ -22811,70 +27519,92 @@
     ],
     "instructions": [
       [
-        "FCVTNS"
+        "CMPEQ"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtnq_u16_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpeq_wide[_s32]",
     "arguments": [
-      "float16x8_t a"
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint64_t op2"
     ],
     "return_type": {
-      "value": "uint16x8_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8H"
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCVTNU"
+        "CMPEQ"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtnq_u32_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpeq_wide[_s8]",
     "arguments": [
-      "float32x4_t a"
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint64_t op2"
     ],
     "return_type": {
-      "value": "uint32x4_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4S"
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCVTNU"
+        "CMPEQ"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtnq_u64_f64",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpge[_f16]",
     "arguments": [
-      "float64x2_t a"
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2"
     ],
     "return_type": {
-      "value": "uint64x2_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2D"
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
@@ -22882,22 +27612,30 @@
     ],
     "instructions": [
       [
-        "FCVTNU"
+        "FCMGE"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtns_s32_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpge[_f32]",
     "arguments": [
-      "float32_t a"
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2"
     ],
     "return_type": {
-      "value": "int32_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Sn"
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
@@ -22905,22 +27643,30 @@
     ],
     "instructions": [
       [
-        "FCVTNS"
+        "FCMGE"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtns_u32_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpge[_f64]",
     "arguments": [
-      "float32_t a"
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2"
     ],
     "return_type": {
-      "value": "uint32_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Sn"
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
@@ -22928,70 +27674,98 @@
     ],
     "instructions": [
       [
-        "FCVTNU"
+        "FCMGE"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtp_s16_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpge[_n_f16]",
     "arguments": [
-      "float16x4_t a"
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "float16_t op2"
     ],
     "return_type": {
-      "value": "int16x4_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4H"
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCVTPS"
+        "FCMGE"
+      ],
+      [
+        "FCMGE"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtp_s32_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpge[_n_f32]",
     "arguments": [
-      "float32x2_t a"
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "float32_t op2"
     ],
     "return_type": {
-      "value": "int32x2_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2S"
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCVTPS"
+        "FCMGE"
+      ],
+      [
+        "FCMGE"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtp_s64_f64",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpge[_n_f64]",
     "arguments": [
-      "float64x1_t a"
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "float64_t op2"
     ],
     "return_type": {
-      "value": "int64x1_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
@@ -22999,70 +27773,101 @@
     ],
     "instructions": [
       [
-        "FCVTPS"
+        "FCMGE"
+      ],
+      [
+        "FCMGE"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtp_u16_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpge[_n_s16]",
     "arguments": [
-      "float16x4_t a"
+      "svbool_t pg",
+      "svint16_t op1",
+      "int16_t op2"
     ],
     "return_type": {
-      "value": "uint16x4_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4H"
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCVTPU"
+        "CMPGE"
+      ],
+      [
+        "CMPGE"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtp_u32_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpge[_n_s32]",
     "arguments": [
-      "float32x2_t a"
+      "svbool_t pg",
+      "svint32_t op1",
+      "int32_t op2"
     ],
     "return_type": {
-      "value": "uint32x2_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2S"
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCVTPU"
+        "CMPGE"
+      ],
+      [
+        "CMPGE"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtp_u64_f64",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpge[_n_s64]",
     "arguments": [
-      "float64x1_t a"
+      "svbool_t pg",
+      "svint64_t op1",
+      "int64_t op2"
     ],
     "return_type": {
-      "value": "uint64x1_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
@@ -23070,22 +27875,33 @@
     ],
     "instructions": [
       [
-        "FCVTPU"
+        "CMPGE"
+      ],
+      [
+        "CMPGE"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtpd_s64_f64",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpge[_n_s8]",
     "arguments": [
-      "float64_t a"
+      "svbool_t pg",
+      "svint8_t op1",
+      "int8_t op2"
     ],
     "return_type": {
-      "value": "int64_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
@@ -23093,22 +27909,33 @@
     ],
     "instructions": [
       [
-        "FCVTPS"
+        "CMPGE"
+      ],
+      [
+        "CMPGE"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtpd_u64_f64",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpge[_n_u16]",
     "arguments": [
-      "float64_t a"
+      "svbool_t pg",
+      "svuint16_t op1",
+      "uint16_t op2"
     ],
     "return_type": {
-      "value": "uint64_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
@@ -23116,22 +27943,33 @@
     ],
     "instructions": [
       [
-        "FCVTPU"
+        "CMPHS"
+      ],
+      [
+        "CMPHS"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtph_s16_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpge[_n_u32]",
     "arguments": [
-      "float16_t a"
+      "svbool_t pg",
+      "svuint32_t op1",
+      "uint32_t op2"
     ],
     "return_type": {
-      "value": "int16_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Hn"
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
@@ -23139,46 +27977,67 @@
     ],
     "instructions": [
       [
-        "FCVTPS"
+        "CMPHS"
+      ],
+      [
+        "CMPHS"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtph_s32_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpge[_n_u64]",
     "arguments": [
-      "float16_t a"
+      "svbool_t pg",
+      "svuint64_t op1",
+      "uint64_t op2"
     ],
     "return_type": {
-      "value": "int32_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Hn"
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCVTPS"
+        "CMPHS"
+      ],
+      [
+        "CMPHS"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtph_s64_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpge[_n_u8]",
     "arguments": [
-      "float16_t a"
+      "svbool_t pg",
+      "svuint8_t op1",
+      "uint8_t op2"
     ],
     "return_type": {
-      "value": "int64_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Hn"
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
@@ -23186,22 +28045,33 @@
     ],
     "instructions": [
       [
-        "FCVTPS"
+        "CMPHS"
+      ],
+      [
+        "CMPHS"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtph_u16_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpge[_s16]",
     "arguments": [
-      "float16_t a"
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2"
     ],
     "return_type": {
-      "value": "uint16_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Hn"
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
@@ -23209,46 +28079,61 @@
     ],
     "instructions": [
       [
-        "FCVTPU"
+        "CMPGE"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtph_u32_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpge[_s32]",
     "arguments": [
-      "float16_t a"
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2"
     ],
     "return_type": {
-      "value": "uint32_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Hn"
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCVTPU"
+        "CMPGE"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtph_u64_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpge[_s64]",
     "arguments": [
-      "float16_t a"
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2"
     ],
     "return_type": {
-      "value": "uint64_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Hn"
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
@@ -23256,70 +28141,92 @@
     ],
     "instructions": [
       [
-        "FCVTPU"
+        "CMPGE"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtpq_s16_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpge[_s8]",
     "arguments": [
-      "float16x8_t a"
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2"
     ],
     "return_type": {
-      "value": "int16x8_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8H"
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCVTPS"
+        "CMPGE"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtpq_s32_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpge[_u16]",
     "arguments": [
-      "float32x4_t a"
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2"
     ],
     "return_type": {
-      "value": "int32x4_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4S"
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCVTPS"
+        "CMPHS"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtpq_s64_f64",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpge[_u32]",
     "arguments": [
-      "float64x2_t a"
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2"
     ],
     "return_type": {
-      "value": "int64x2_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2D"
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
@@ -23327,70 +28234,92 @@
     ],
     "instructions": [
       [
-        "FCVTPS"
+        "CMPHS"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtpq_u16_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpge[_u64]",
     "arguments": [
-      "float16x8_t a"
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2"
     ],
     "return_type": {
-      "value": "uint16x8_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8H"
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCVTPU"
+        "CMPHS"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtpq_u32_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpge[_u8]",
     "arguments": [
-      "float32x4_t a"
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2"
     ],
     "return_type": {
-      "value": "uint32x4_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4S"
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCVTPU"
+        "CMPHS"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtpq_u64_f64",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpge_wide[_n_s16]",
     "arguments": [
-      "float64x2_t a"
+      "svbool_t pg",
+      "svint16_t op1",
+      "int64_t op2"
     ],
     "return_type": {
-      "value": "uint64x2_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2D"
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
@@ -23398,22 +28327,33 @@
     ],
     "instructions": [
       [
-        "FCVTPU"
+        "CMPGE"
+      ],
+      [
+        "CMPGE"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtps_s32_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpge_wide[_n_s32]",
     "arguments": [
-      "float32_t a"
+      "svbool_t pg",
+      "svint32_t op1",
+      "int64_t op2"
     ],
     "return_type": {
-      "value": "int32_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Sn"
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
@@ -23421,22 +28361,33 @@
     ],
     "instructions": [
       [
-        "FCVTPS"
+        "CMPGE"
+      ],
+      [
+        "CMPGE"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtps_u32_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpge_wide[_n_s8]",
     "arguments": [
-      "float32_t a"
+      "svbool_t pg",
+      "svint8_t op1",
+      "int64_t op2"
     ],
     "return_type": {
-      "value": "uint32_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Sn"
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
@@ -23444,120 +28395,166 @@
     ],
     "instructions": [
       [
-        "FCVTPU"
+        "CMPGE"
+      ],
+      [
+        "CMPGE"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtq_f16_s16",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpge_wide[_n_u16]",
     "arguments": [
-      "int16x8_t a"
+      "svbool_t pg",
+      "svuint16_t op1",
+      "uint64_t op2"
     ],
     "return_type": {
-      "value": "float16x8_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8H"
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "SCVTF"
+        "CMPHS"
+      ],
+      [
+        "CMPHS"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtq_f16_u16",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpge_wide[_n_u32]",
     "arguments": [
-      "uint16x8_t a"
+      "svbool_t pg",
+      "svuint32_t op1",
+      "uint64_t op2"
     ],
     "return_type": {
-      "value": "float16x8_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8H"
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "UCVTF"
+        "CMPHS"
+      ],
+      [
+        "CMPHS"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtq_f32_s32",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpge_wide[_n_u8]",
     "arguments": [
-      "int32x4_t a"
+      "svbool_t pg",
+      "svuint8_t op1",
+      "uint64_t op2"
     ],
     "return_type": {
-      "value": "float32x4_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4S"
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "SCVTF"
+        "CMPHS"
+      ],
+      [
+        "CMPHS"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtq_f32_u32",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpge_wide[_s16]",
     "arguments": [
-      "uint32x4_t a"
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint64_t op2"
     ],
     "return_type": {
-      "value": "float32x4_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4S"
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "UCVTF"
+        "CMPGE"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtq_f64_s64",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpge_wide[_s32]",
     "arguments": [
-      "int64x2_t a"
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint64_t op2"
     ],
     "return_type": {
-      "value": "float64x2_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2D"
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
@@ -23565,22 +28562,30 @@
     ],
     "instructions": [
       [
-        "SCVTF"
+        "CMPGE"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtq_f64_u64",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpge_wide[_s8]",
     "arguments": [
-      "uint64x2_t a"
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint64_t op2"
     ],
     "return_type": {
-      "value": "float64x2_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2D"
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
@@ -23588,145 +28593,154 @@
     ],
     "instructions": [
       [
-        "UCVTF"
+        "CMPGE"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtq_n_f16_s16",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpge_wide[_u16]",
     "arguments": [
-      "int16x8_t a",
-      "const int n"
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint64_t op2"
     ],
     "return_type": {
-      "value": "float16x8_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8H"
+      "op1": {
+        "register": "Zop1.H"
       },
-      "n": {
-        "minimum": 1,
-        "maximum": 16
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "SCVTF"
+        "CMPHS"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtq_n_f16_u16",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpge_wide[_u32]",
     "arguments": [
-      "uint16x8_t a",
-      "const int n"
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint64_t op2"
     ],
     "return_type": {
-      "value": "float16x8_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8H"
+      "op1": {
+        "register": "Zop1.S"
       },
-      "n": {
-        "minimum": 1,
-        "maximum": 16
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "UCVTF"
+        "CMPHS"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtq_n_f32_s32",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpge_wide[_u8]",
     "arguments": [
-      "int32x4_t a",
-      "const int n"
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint64_t op2"
     ],
     "return_type": {
-      "value": "float32x4_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4S"
+      "op1": {
+        "register": "Zop1.B"
       },
-      "n": {
-        "minimum": 1,
-        "maximum": 32
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "SCVTF"
+        "CMPHS"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtq_n_f32_u32",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpgt[_f16]",
     "arguments": [
-      "uint32x4_t a",
-      "const int n"
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2"
     ],
     "return_type": {
-      "value": "float32x4_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4S"
+      "op1": {
+        "register": "Zop1.H"
       },
-      "n": {
-        "minimum": 1,
-        "maximum": 32
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "UCVTF"
+        "FCMGT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtq_n_f64_s64",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpgt[_f32]",
     "arguments": [
-      "int64x2_t a",
-      "const int n"
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2"
     ],
     "return_type": {
-      "value": "float64x2_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2D"
+      "op1": {
+        "register": "Zop1.S"
       },
-      "n": {
-        "minimum": 1,
-        "maximum": 64
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
@@ -23734,27 +28748,30 @@
     ],
     "instructions": [
       [
-        "SCVTF"
+        "FCMGT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtq_n_f64_u64",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpgt[_f64]",
     "arguments": [
-      "uint64x2_t a",
-      "const int n"
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2"
     ],
     "return_type": {
-      "value": "float64x2_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2D"
+      "op1": {
+        "register": "Zop1.D"
       },
-      "n": {
-        "minimum": 1,
-        "maximum": 64
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
@@ -23762,86 +28779,98 @@
     ],
     "instructions": [
       [
-        "UCVTF"
+        "FCMGT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtq_n_s16_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpgt[_n_f16]",
     "arguments": [
-      "float16x8_t a",
-      "const int n"
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "float16_t op2"
     ],
     "return_type": {
-      "value": "int16x8_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8H"
+      "op1": {
+        "register": "Zop1.H"
       },
-      "n": {
-        "minimum": 1,
-        "maximum": 16
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCVTZS"
+        "FCMGT"
+      ],
+      [
+        "FCMGT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtq_n_s32_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpgt[_n_f32]",
     "arguments": [
-      "float32x4_t a",
-      "const int n"
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "float32_t op2"
     ],
     "return_type": {
-      "value": "int32x4_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4S"
+      "op1": {
+        "register": "Zop1.S"
       },
-      "n": {
-        "minimum": 1,
-        "maximum": 32
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCVTZS"
+        "FCMGT"
+      ],
+      [
+        "FCMGT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtq_n_s64_f64",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpgt[_n_f64]",
     "arguments": [
-      "float64x2_t a",
-      "const int n"
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "float64_t op2"
     ],
     "return_type": {
-      "value": "int64x2_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2D"
+      "op1": {
+        "register": "Zop1.D"
       },
-      "n": {
-        "minimum": 1,
-        "maximum": 64
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
@@ -23849,86 +28878,101 @@
     ],
     "instructions": [
       [
-        "FCVTZS"
+        "FCMGT"
+      ],
+      [
+        "FCMGT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtq_n_u16_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpgt[_n_s16]",
     "arguments": [
-      "float16x8_t a",
-      "const int n"
+      "svbool_t pg",
+      "svint16_t op1",
+      "int16_t op2"
     ],
     "return_type": {
-      "value": "uint16x8_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8H"
+      "op1": {
+        "register": "Zop1.H"
       },
-      "n": {
-        "minimum": 1,
-        "maximum": 16
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCVTZU"
+        "CMPGT"
+      ],
+      [
+        "CMPGT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtq_n_u32_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpgt[_n_s32]",
     "arguments": [
-      "float32x4_t a",
-      "const int n"
+      "svbool_t pg",
+      "svint32_t op1",
+      "int32_t op2"
     ],
     "return_type": {
-      "value": "uint32x4_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4S"
+      "op1": {
+        "register": "Zop1.S"
       },
-      "n": {
-        "minimum": 1,
-        "maximum": 32
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCVTZU"
+        "CMPGT"
+      ],
+      [
+        "CMPGT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtq_n_u64_f64",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpgt[_n_s64]",
     "arguments": [
-      "float64x2_t a",
-      "const int n"
+      "svbool_t pg",
+      "svint64_t op1",
+      "int64_t op2"
     ],
     "return_type": {
-      "value": "uint64x2_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2D"
+      "op1": {
+        "register": "Zop1.D"
       },
-      "n": {
-        "minimum": 1,
-        "maximum": 64
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
@@ -23936,71 +28980,101 @@
     ],
     "instructions": [
       [
-        "FCVTZU"
+        "CMPGT"
+      ],
+      [
+        "CMPGT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtq_s16_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpgt[_n_s8]",
     "arguments": [
-      "float16x8_t a"
+      "svbool_t pg",
+      "svint8_t op1",
+      "int8_t op2"
     ],
     "return_type": {
-      "value": "int16x8_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8H"
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCVTZS"
+        "CMPGT"
+      ],
+      [
+        "CMPGT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtq_s32_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpgt[_n_u16]",
     "arguments": [
-      "float32x4_t a"
+      "svbool_t pg",
+      "svuint16_t op1",
+      "uint16_t op2"
     ],
     "return_type": {
-      "value": "int32x4_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4S"
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCVTZS"
+        "CMPHI"
+      ],
+      [
+        "CMPHI"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtq_s64_f64",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpgt[_n_u32]",
     "arguments": [
-      "float64x2_t a"
+      "svbool_t pg",
+      "svuint32_t op1",
+      "uint32_t op2"
     ],
     "return_type": {
-      "value": "int64x2_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2D"
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
@@ -24008,71 +29082,101 @@
     ],
     "instructions": [
       [
-        "FCVTZS"
+        "CMPHI"
+      ],
+      [
+        "CMPHI"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtq_u16_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpgt[_n_u64]",
     "arguments": [
-      "float16x8_t a"
+      "svbool_t pg",
+      "svuint64_t op1",
+      "uint64_t op2"
     ],
     "return_type": {
-      "value": "uint16x8_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8H"
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCVTZS"
+        "CMPHI"
+      ],
+      [
+        "CMPHI"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtq_u32_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpgt[_n_u8]",
     "arguments": [
-      "float32x4_t a"
+      "svbool_t pg",
+      "svuint8_t op1",
+      "uint8_t op2"
     ],
     "return_type": {
-      "value": "uint32x4_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4S"
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FCVTZU"
+        "CMPHI"
+      ],
+      [
+        "CMPHI"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtq_u64_f64",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpgt[_s16]",
     "arguments": [
-      "float64x2_t a"
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2"
     ],
     "return_type": {
-      "value": "uint64x2_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2D"
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
@@ -24080,22 +29184,30 @@
     ],
     "instructions": [
       [
-        "FCVTZU"
+        "CMPGT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvts_f32_s32",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpgt[_s32]",
     "arguments": [
-      "int32_t a"
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2"
     ],
     "return_type": {
-      "value": "float32_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Sn"
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
@@ -24103,22 +29215,30 @@
     ],
     "instructions": [
       [
-        "SCVTF"
+        "CMPGT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvts_f32_u32",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpgt[_s64]",
     "arguments": [
-      "uint32_t a"
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2"
     ],
     "return_type": {
-      "value": "float32_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Sn"
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
@@ -24126,27 +29246,30 @@
     ],
     "instructions": [
       [
-        "UCVTF"
+        "CMPGT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvts_n_f32_s32",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpgt[_s8]",
     "arguments": [
-      "int32_t a",
-      "const int n"
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2"
     ],
     "return_type": {
-      "value": "float32_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Sn"
+      "op1": {
+        "register": "Zop1.B"
       },
-      "n": {
-        "minimum": 1,
-        "maximum": 32
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
@@ -24154,27 +29277,30 @@
     ],
     "instructions": [
       [
-        "SCVTF"
+        "CMPGT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvts_n_f32_u32",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpgt[_u16]",
     "arguments": [
-      "uint32_t a",
-      "const int n"
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2"
     ],
     "return_type": {
-      "value": "float32_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Sn"
+      "op1": {
+        "register": "Zop1.H"
       },
-      "n": {
-        "minimum": 1,
-        "maximum": 32
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
@@ -24182,27 +29308,30 @@
     ],
     "instructions": [
       [
-        "UCVTF"
+        "CMPHI"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvts_n_s32_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpgt[_u32]",
     "arguments": [
-      "float32_t a",
-      "const int n"
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2"
     ],
     "return_type": {
-      "value": "int32_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Sn"
+      "op1": {
+        "register": "Zop1.S"
       },
-      "n": {
-        "minimum": 1,
-        "maximum": 32
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
@@ -24210,27 +29339,30 @@
     ],
     "instructions": [
       [
-        "FCVTZS"
+        "CMPHI"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvts_n_u32_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpgt[_u64]",
     "arguments": [
-      "float32_t a",
-      "const int n"
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2"
     ],
     "return_type": {
-      "value": "uint32_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Sn"
+      "op1": {
+        "register": "Zop1.D"
       },
-      "n": {
-        "minimum": 1,
-        "maximum": 32
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
@@ -24238,22 +29370,30 @@
     ],
     "instructions": [
       [
-        "FCVTZU"
+        "CMPHI"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvts_s32_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpgt[_u8]",
     "arguments": [
-      "float32_t a"
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2"
     ],
     "return_type": {
-      "value": "int32_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Sn"
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
@@ -24261,22 +29401,30 @@
     ],
     "instructions": [
       [
-        "FCVTZS"
+        "CMPHI"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvts_u32_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpgt_wide[_n_s16]",
     "arguments": [
-      "float32_t a"
+      "svbool_t pg",
+      "svint16_t op1",
+      "int64_t op2"
     ],
     "return_type": {
-      "value": "uint32_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Sn"
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
@@ -24284,22 +29432,33 @@
     ],
     "instructions": [
       [
-        "FCVTZU"
+        "CMPGT"
+      ],
+      [
+        "CMPGT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtx_f32_f64",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpgt_wide[_n_s32]",
     "arguments": [
-      "float64x2_t a"
+      "svbool_t pg",
+      "svint32_t op1",
+      "int64_t op2"
     ],
     "return_type": {
-      "value": "float32x2_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2D"
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
@@ -24307,26 +29466,33 @@
     ],
     "instructions": [
       [
-        "FCVTXN"
+        "CMPGT"
+      ],
+      [
+        "CMPGT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtx_high_f32_f64",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpgt_wide[_n_s8]",
     "arguments": [
-      "float32x2_t r",
-      "float64x2_t a"
+      "svbool_t pg",
+      "svint8_t op1",
+      "int64_t op2"
     ],
     "return_type": {
-      "value": "float32x4_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2D"
+      "op1": {
+        "register": "Zop1.B"
       },
-      "r": {
-        "register": "Vd.2S"
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
@@ -24334,22 +29500,33 @@
     ],
     "instructions": [
       [
-        "FCVTXN2"
+        "CMPGT"
+      ],
+      [
+        "CMPGT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vcvtxd_f32_f64",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpgt_wide[_n_u16]",
     "arguments": [
-      "float64_t a"
+      "svbool_t pg",
+      "svuint16_t op1",
+      "uint64_t op2"
     ],
     "return_type": {
-      "value": "float32_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
@@ -24357,26 +29534,33 @@
     ],
     "instructions": [
       [
-        "FCVTXN"
+        "CMPHI"
+      ],
+      [
+        "CMPHI"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdiv_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpgt_wide[_n_u32]",
     "arguments": [
-      "float16x4_t a",
-      "float16x4_t b"
+      "svbool_t pg",
+      "svuint32_t op1",
+      "uint64_t op2"
     ],
     "return_type": {
-      "value": "float16x4_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4H"
+      "op1": {
+        "register": "Zop1.S"
       },
-      "b": {
-        "register": "Vm.4H"
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
@@ -24384,26 +29568,33 @@
     ],
     "instructions": [
       [
-        "FDIV"
+        "CMPHI"
+      ],
+      [
+        "CMPHI"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdiv_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpgt_wide[_n_u8]",
     "arguments": [
-      "float32x2_t a",
-      "float32x2_t b"
+      "svbool_t pg",
+      "svuint8_t op1",
+      "uint64_t op2"
     ],
     "return_type": {
-      "value": "float32x2_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2S"
+      "op1": {
+        "register": "Zop1.B"
       },
-      "b": {
-        "register": "Vm.2S"
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
@@ -24411,26 +29602,33 @@
     ],
     "instructions": [
       [
-        "FDIV"
+        "CMPHI"
+      ],
+      [
+        "CMPHI"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdiv_f64",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpgt_wide[_s16]",
     "arguments": [
-      "float64x1_t a",
-      "float64x1_t b"
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint64_t op2"
     ],
     "return_type": {
-      "value": "float64x1_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dn"
+      "op1": {
+        "register": "Zop1.H"
       },
-      "b": {
-        "register": "Dm"
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
@@ -24438,54 +29636,61 @@
     ],
     "instructions": [
       [
-        "FDIV"
+        "CMPGT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdivh_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpgt_wide[_s32]",
     "arguments": [
-      "float16_t a",
-      "float16_t b"
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint64_t op2"
     ],
     "return_type": {
-      "value": "float16_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Hn"
+      "op1": {
+        "register": "Zop1.S"
       },
-      "b": {
-        "register": "Hm"
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FDIV"
+        "CMPGT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdivq_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpgt_wide[_s8]",
     "arguments": [
-      "float16x8_t a",
-      "float16x8_t b"
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint64_t op2"
     ],
     "return_type": {
-      "value": "float16x8_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8H"
+      "op1": {
+        "register": "Zop1.B"
       },
-      "b": {
-        "register": "Vm.8H"
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
@@ -24493,26 +29698,30 @@
     ],
     "instructions": [
       [
-        "FDIV"
+        "CMPGT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdivq_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpgt_wide[_u16]",
     "arguments": [
-      "float32x4_t a",
-      "float32x4_t b"
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint64_t op2"
     ],
     "return_type": {
-      "value": "float32x4_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4S"
+      "op1": {
+        "register": "Zop1.H"
       },
-      "b": {
-        "register": "Vm.4S"
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
@@ -24520,26 +29729,30 @@
     ],
     "instructions": [
       [
-        "FDIV"
+        "CMPHI"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdivq_f64",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpgt_wide[_u32]",
     "arguments": [
-      "float64x2_t a",
-      "float64x2_t b"
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint64_t op2"
     ],
     "return_type": {
-      "value": "float64x2_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2D"
+      "op1": {
+        "register": "Zop1.S"
       },
-      "b": {
-        "register": "Vm.2D"
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
@@ -24547,109 +29760,92 @@
     ],
     "instructions": [
       [
-        "FDIV"
+        "CMPHI"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdot_lane_s32",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpgt_wide[_u8]",
     "arguments": [
-      "int32x2_t r",
-      "int8x8_t a",
-      "int8x8_t b",
-      "const int lane"
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint64_t op2"
     ],
     "return_type": {
-      "value": "int32x2_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8B"
-      },
-      "b": {
-        "register": "Vm.4B"
+      "op1": {
+        "register": "Zop1.B"
       },
-      "lane": {
-        "minimum": 0,
-        "maximum": 1
+      "op2": {
+        "register": "Zop2.D"
       },
-      "r": {
-        "register": "Vd.2S"
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "SDOT"
+        "CMPHI"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdot_lane_u32",
+    "SIMD_ISA": "SVE",
+    "name": "svcmple[_f16]",
     "arguments": [
-      "uint32x2_t r",
-      "uint8x8_t a",
-      "uint8x8_t b",
-      "const int lane"
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2"
     ],
     "return_type": {
-      "value": "uint32x2_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8B"
-      },
-      "b": {
-        "register": "Vm.4B"
+      "op1": {
+        "register": "Zop1.H"
       },
-      "lane": {
-        "minimum": 0,
-        "maximum": 1
+      "op2": {
+        "register": "Zop2.H"
       },
-      "r": {
-        "register": "Vd.2S"
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "UDOT"
+        "FCMGE"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdot_laneq_s32",
+    "SIMD_ISA": "SVE",
+    "name": "svcmple[_f32]",
     "arguments": [
-      "int32x2_t r",
-      "int8x8_t a",
-      "int8x16_t b",
-      "const int lane"
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2"
     ],
     "return_type": {
-      "value": "int32x2_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8B"
-      },
-      "b": {
-        "register": "Vm.4B"
+      "op1": {
+        "register": "Zop1.S"
       },
-      "lane": {
-        "minimum": 0,
-        "maximum": 3
+      "op2": {
+        "register": "Zop2.S"
       },
-      "r": {
-        "register": "Vd.2S"
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
@@ -24657,35 +29853,30 @@
     ],
     "instructions": [
       [
-        "SDOT"
+        "FCMGE"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdot_laneq_u32",
+    "SIMD_ISA": "SVE",
+    "name": "svcmple[_f64]",
     "arguments": [
-      "uint32x2_t r",
-      "uint8x8_t a",
-      "uint8x16_t b",
-      "const int lane"
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2"
     ],
     "return_type": {
-      "value": "uint32x2_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8B"
-      },
-      "b": {
-        "register": "Vm.4B"
+      "op1": {
+        "register": "Zop1.D"
       },
-      "lane": {
-        "minimum": 0,
-        "maximum": 3
+      "op2": {
+        "register": "Zop2.D"
       },
-      "r": {
-        "register": "Vd.2S"
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
@@ -24693,173 +29884,166 @@
     ],
     "instructions": [
       [
-        "UDOT"
+        "FCMGE"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdot_s32",
+    "SIMD_ISA": "SVE",
+    "name": "svcmple[_n_f16]",
     "arguments": [
-      "int32x2_t r",
-      "int8x8_t a",
-      "int8x8_t b"
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "float16_t op2"
     ],
     "return_type": {
-      "value": "int32x2_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8B"
+      "op1": {
+        "register": "Zop1.H"
       },
-      "b": {
-        "register": "Vm.8B"
+      "op2": {
+        "register": "Zop2.H[*]"
       },
-      "r": {
-        "register": "Vd.2S"
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "SDOT"
+        "FCMLE"
+      ],
+      [
+        "FCMGE"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdot_u32",
+    "SIMD_ISA": "SVE",
+    "name": "svcmple[_n_f32]",
     "arguments": [
-      "uint32x2_t r",
-      "uint8x8_t a",
-      "uint8x8_t b"
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "float32_t op2"
     ],
     "return_type": {
-      "value": "uint32x2_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8B"
+      "op1": {
+        "register": "Zop1.S"
       },
-      "b": {
-        "register": "Vm.8B"
+      "op2": {
+        "register": "Zop2.S[*]"
       },
-      "r": {
-        "register": "Vd.2S"
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "UDOT"
+        "FCMLE"
+      ],
+      [
+        "FCMGE"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdotq_lane_s32",
+    "SIMD_ISA": "SVE",
+    "name": "svcmple[_n_f64]",
     "arguments": [
-      "int32x4_t r",
-      "int8x16_t a",
-      "int8x8_t b",
-      "const int lane"
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "float64_t op2"
     ],
     "return_type": {
-      "value": "int32x4_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
-      },
-      "b": {
-        "register": "Vm.4B"
+      "op1": {
+        "register": "Zop1.D"
       },
-      "lane": {
-        "minimum": 0,
-        "maximum": 1
+      "op2": {
+        "register": "Zop2.D[*]"
       },
-      "r": {
-        "register": "Vd.4S"
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "SDOT"
+        "FCMLE"
+      ],
+      [
+        "FCMGE"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdotq_lane_u32",
+    "SIMD_ISA": "SVE",
+    "name": "svcmple[_n_s16]",
     "arguments": [
-      "uint32x4_t r",
-      "uint8x16_t a",
-      "uint8x8_t b",
-      "const int lane"
+      "svbool_t pg",
+      "svint16_t op1",
+      "int16_t op2"
     ],
     "return_type": {
-      "value": "uint32x4_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
+      "op1": {
+        "register": "Zop1.H"
       },
-      "b": {
-        "register": "Vm.4B"
-      },
-      "lane": {
-        "minimum": 0,
-        "maximum": 1
+      "op2": {
+        "register": "Zop2.H[*]"
       },
-      "r": {
-        "register": "Vd.4S"
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "UDOT"
+        "CMPLE"
+      ],
+      [
+        "CMPGE"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdotq_laneq_s32",
+    "SIMD_ISA": "SVE",
+    "name": "svcmple[_n_s32]",
     "arguments": [
-      "int32x4_t r",
-      "int8x16_t a",
-      "int8x16_t b",
-      "const int lane"
+      "svbool_t pg",
+      "svint32_t op1",
+      "int32_t op2"
     ],
     "return_type": {
-      "value": "int32x4_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
+      "op1": {
+        "register": "Zop1.S"
       },
-      "b": {
-        "register": "Vm.4B"
-      },
-      "lane": {
-        "minimum": 0,
-        "maximum": 3
+      "op2": {
+        "register": "Zop2.S[*]"
       },
-      "r": {
-        "register": "Vd.4S"
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
@@ -24867,35 +30051,33 @@
     ],
     "instructions": [
       [
-        "SDOT"
+        "CMPLE"
+      ],
+      [
+        "CMPGE"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdotq_laneq_u32",
+    "SIMD_ISA": "SVE",
+    "name": "svcmple[_n_s64]",
     "arguments": [
-      "uint32x4_t r",
-      "uint8x16_t a",
-      "uint8x16_t b",
-      "const int lane"
+      "svbool_t pg",
+      "svint64_t op1",
+      "int64_t op2"
     ],
     "return_type": {
-      "value": "uint32x4_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
-      },
-      "b": {
-        "register": "Vm.4B"
+      "op1": {
+        "register": "Zop1.D"
       },
-      "lane": {
-        "minimum": 0,
-        "maximum": 3
+      "op2": {
+        "register": "Zop2.D[*]"
       },
-      "r": {
-        "register": "Vd.4S"
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
@@ -24903,151 +30085,169 @@
     ],
     "instructions": [
       [
-        "UDOT"
+        "CMPLE"
+      ],
+      [
+        "CMPGE"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdotq_s32",
+    "SIMD_ISA": "SVE",
+    "name": "svcmple[_n_s8]",
     "arguments": [
-      "int32x4_t r",
-      "int8x16_t a",
-      "int8x16_t b"
+      "svbool_t pg",
+      "svint8_t op1",
+      "int8_t op2"
     ],
     "return_type": {
-      "value": "int32x4_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
+      "op1": {
+        "register": "Zop1.B"
       },
-      "b": {
-        "register": "Vm.16B"
+      "op2": {
+        "register": "Zop2.B[*]"
       },
-      "r": {
-        "register": "Vd.4S"
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "SDOT"
+        "CMPLE"
+      ],
+      [
+        "CMPGE"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdotq_u32",
+    "SIMD_ISA": "SVE",
+    "name": "svcmple[_n_u16]",
     "arguments": [
-      "uint32x4_t r",
-      "uint8x16_t a",
-      "uint8x16_t b"
+      "svbool_t pg",
+      "svuint16_t op1",
+      "uint16_t op2"
     ],
     "return_type": {
-      "value": "uint32x4_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
+      "op1": {
+        "register": "Zop1.H"
       },
-      "b": {
-        "register": "Vm.16B"
+      "op2": {
+        "register": "Zop2.H[*]"
       },
-      "r": {
-        "register": "Vd.4S"
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "UDOT"
+        "CMPLS"
+      ],
+      [
+        "CMPHS"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdup_lane_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svcmple[_n_u32]",
     "arguments": [
-      "float16x4_t vec",
-      "const int lane"
+      "svbool_t pg",
+      "svuint32_t op1",
+      "uint32_t op2"
     ],
     "return_type": {
-      "value": "float16x4_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 3
+      "op1": {
+        "register": "Zop1.S"
       },
-      "vec": {
-        "register": "Vn.4H"
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "DUP"
+        "CMPLS"
+      ],
+      [
+        "CMPHS"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdup_lane_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svcmple[_n_u64]",
     "arguments": [
-      "float32x2_t vec",
-      "const int lane"
+      "svbool_t pg",
+      "svuint64_t op1",
+      "uint64_t op2"
     ],
     "return_type": {
-      "value": "float32x2_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 1
+      "op1": {
+        "register": "Zop1.D"
       },
-      "vec": {
-        "register": "Vn.2S"
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "DUP"
+        "CMPLS"
+      ],
+      [
+        "CMPHS"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdup_lane_f64",
+    "SIMD_ISA": "SVE",
+    "name": "svcmple[_n_u8]",
     "arguments": [
-      "float64x1_t vec",
-      "const int lane"
+      "svbool_t pg",
+      "svuint8_t op1",
+      "uint8_t op2"
     ],
     "return_type": {
-      "value": "float64x1_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 0
+      "op1": {
+        "register": "Zop1.B"
       },
-      "vec": {
-        "register": "Vn.1D"
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
@@ -25055,356 +30255,383 @@
     ],
     "instructions": [
       [
-        "DUP"
+        "CMPLS"
+      ],
+      [
+        "CMPHS"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdup_lane_p16",
+    "SIMD_ISA": "SVE",
+    "name": "svcmple[_s16]",
     "arguments": [
-      "poly16x4_t vec",
-      "const int lane"
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2"
     ],
     "return_type": {
-      "value": "poly16x4_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 3
+      "op1": {
+        "register": "Zop1.H"
       },
-      "vec": {
-        "register": "Vn.4H"
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "DUP"
+        "CMPGE"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdup_lane_p64",
+    "SIMD_ISA": "SVE",
+    "name": "svcmple[_s32]",
     "arguments": [
-      "poly64x1_t vec",
-      "const int lane"
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2"
     ],
     "return_type": {
-      "value": "poly64x1_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 0
+      "op1": {
+        "register": "Zop1.S"
       },
-      "vec": {
-        "register": "Vn.1D"
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "DUP"
+        "CMPGE"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdup_lane_p8",
+    "SIMD_ISA": "SVE",
+    "name": "svcmple[_s64]",
     "arguments": [
-      "poly8x8_t vec",
-      "const int lane"
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2"
     ],
     "return_type": {
-      "value": "poly8x8_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 7
+      "op1": {
+        "register": "Zop1.D"
       },
-      "vec": {
-        "register": "Vn.8B"
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "DUP"
+        "CMPGE"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdup_lane_s16",
+    "SIMD_ISA": "SVE",
+    "name": "svcmple[_s8]",
     "arguments": [
-      "int16x4_t vec",
-      "const int lane"
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2"
     ],
     "return_type": {
-      "value": "int16x4_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 3
+      "op1": {
+        "register": "Zop1.B"
       },
-      "vec": {
-        "register": "Vn.4H"
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "DUP"
+        "CMPGE"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdup_lane_s32",
+    "SIMD_ISA": "SVE",
+    "name": "svcmple[_u16]",
     "arguments": [
-      "int32x2_t vec",
-      "const int lane"
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2"
     ],
     "return_type": {
-      "value": "int32x2_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 1
+      "op1": {
+        "register": "Zop1.H"
       },
-      "vec": {
-        "register": "Vn.2S"
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "DUP"
+        "CMPHS"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdup_lane_s64",
+    "SIMD_ISA": "SVE",
+    "name": "svcmple[_u32]",
     "arguments": [
-      "int64x1_t vec",
-      "const int lane"
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2"
     ],
     "return_type": {
-      "value": "int64x1_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 0
+      "op1": {
+        "register": "Zop1.S"
       },
-      "vec": {
-        "register": "Vn.1D"
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "DUP"
+        "CMPHS"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdup_lane_s8",
+    "SIMD_ISA": "SVE",
+    "name": "svcmple[_u64]",
     "arguments": [
-      "int8x8_t vec",
-      "const int lane"
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2"
     ],
     "return_type": {
-      "value": "int8x8_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 7
+      "op1": {
+        "register": "Zop1.D"
       },
-      "vec": {
-        "register": "Vn.8B"
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "DUP"
+        "CMPHS"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdup_lane_u16",
+    "SIMD_ISA": "SVE",
+    "name": "svcmple[_u8]",
     "arguments": [
-      "uint16x4_t vec",
-      "const int lane"
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2"
     ],
     "return_type": {
-      "value": "uint16x4_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 3
+      "op1": {
+        "register": "Zop1.B"
       },
-      "vec": {
-        "register": "Vn.4H"
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "DUP"
+        "CMPHS"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdup_lane_u32",
+    "SIMD_ISA": "SVE",
+    "name": "svcmple_wide[_n_s16]",
     "arguments": [
-      "uint32x2_t vec",
-      "const int lane"
+      "svbool_t pg",
+      "svint16_t op1",
+      "int64_t op2"
     ],
     "return_type": {
-      "value": "uint32x2_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 1
+      "op1": {
+        "register": "Zop1.H"
       },
-      "vec": {
-        "register": "Vn.2S"
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "DUP"
+        "CMPLE"
+      ],
+      [
+        "CMPLE"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdup_lane_u64",
+    "SIMD_ISA": "SVE",
+    "name": "svcmple_wide[_n_s32]",
     "arguments": [
-      "uint64x1_t vec",
-      "const int lane"
+      "svbool_t pg",
+      "svint32_t op1",
+      "int64_t op2"
     ],
     "return_type": {
-      "value": "uint64x1_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 0
+      "op1": {
+        "register": "Zop1.S"
       },
-      "vec": {
-        "register": "Vn.1D"
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "DUP"
+        "CMPLE"
+      ],
+      [
+        "CMPLE"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdup_lane_u8",
+    "SIMD_ISA": "SVE",
+    "name": "svcmple_wide[_n_s8]",
     "arguments": [
-      "uint8x8_t vec",
-      "const int lane"
+      "svbool_t pg",
+      "svint8_t op1",
+      "int64_t op2"
     ],
     "return_type": {
-      "value": "uint8x8_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 7
+      "op1": {
+        "register": "Zop1.B"
       },
-      "vec": {
-        "register": "Vn.8B"
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "DUP"
+        "CMPLE"
+      ],
+      [
+        "CMPLE"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdup_laneq_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svcmple_wide[_n_u16]",
     "arguments": [
-      "float16x8_t vec",
-      "const int lane"
+      "svbool_t pg",
+      "svuint16_t op1",
+      "uint64_t op2"
     ],
     "return_type": {
-      "value": "float16x4_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 7
+      "op1": {
+        "register": "Zop1.H"
       },
-      "vec": {
-        "register": "Vn.8H"
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
@@ -25412,27 +30639,33 @@
     ],
     "instructions": [
       [
-        "DUP"
+        "CMPLS"
+      ],
+      [
+        "CMPLS"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdup_laneq_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svcmple_wide[_n_u32]",
     "arguments": [
-      "float32x4_t vec",
-      "const int lane"
+      "svbool_t pg",
+      "svuint32_t op1",
+      "uint64_t op2"
     ],
     "return_type": {
-      "value": "float32x2_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 3
+      "op1": {
+        "register": "Zop1.S"
       },
-      "vec": {
-        "register": "Vn.4S"
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
@@ -25440,27 +30673,33 @@
     ],
     "instructions": [
       [
-        "DUP"
+        "CMPLS"
+      ],
+      [
+        "CMPLS"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdup_laneq_f64",
+    "SIMD_ISA": "SVE",
+    "name": "svcmple_wide[_n_u8]",
     "arguments": [
-      "float64x2_t vec",
-      "const int lane"
+      "svbool_t pg",
+      "svuint8_t op1",
+      "uint64_t op2"
     ],
     "return_type": {
-      "value": "float64x1_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 1
+      "op1": {
+        "register": "Zop1.B"
       },
-      "vec": {
-        "register": "Vn.2D"
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
@@ -25468,27 +30707,33 @@
     ],
     "instructions": [
       [
-        "DUP"
+        "CMPLS"
+      ],
+      [
+        "CMPLS"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdup_laneq_p16",
+    "SIMD_ISA": "SVE",
+    "name": "svcmple_wide[_s16]",
     "arguments": [
-      "poly16x8_t vec",
-      "const int lane"
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint64_t op2"
     ],
     "return_type": {
-      "value": "poly16x4_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 7
+      "op1": {
+        "register": "Zop1.H"
       },
-      "vec": {
-        "register": "Vn.8H"
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
@@ -25496,27 +30741,30 @@
     ],
     "instructions": [
       [
-        "DUP"
+        "CMPLE"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdup_laneq_p64",
+    "SIMD_ISA": "SVE",
+    "name": "svcmple_wide[_s32]",
     "arguments": [
-      "poly64x2_t vec",
-      "const int lane"
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint64_t op2"
     ],
     "return_type": {
-      "value": "poly64x1_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 1
+      "op1": {
+        "register": "Zop1.S"
       },
-      "vec": {
-        "register": "Vn.2D"
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
@@ -25524,27 +30772,30 @@
     ],
     "instructions": [
       [
-        "DUP"
+        "CMPLE"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdup_laneq_p8",
+    "SIMD_ISA": "SVE",
+    "name": "svcmple_wide[_s8]",
     "arguments": [
-      "poly8x16_t vec",
-      "const int lane"
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint64_t op2"
     ],
     "return_type": {
-      "value": "poly8x8_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 15
+      "op1": {
+        "register": "Zop1.B"
       },
-      "vec": {
-        "register": "Vn.16B"
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
@@ -25552,27 +30803,30 @@
     ],
     "instructions": [
       [
-        "DUP"
+        "CMPLE"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdup_laneq_s16",
+    "SIMD_ISA": "SVE",
+    "name": "svcmple_wide[_u16]",
     "arguments": [
-      "int16x8_t vec",
-      "const int lane"
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint64_t op2"
     ],
     "return_type": {
-      "value": "int16x4_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 7
+      "op1": {
+        "register": "Zop1.H"
       },
-      "vec": {
-        "register": "Vn.8H"
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
@@ -25580,27 +30834,30 @@
     ],
     "instructions": [
       [
-        "DUP"
+        "CMPLS"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdup_laneq_s32",
+    "SIMD_ISA": "SVE",
+    "name": "svcmple_wide[_u32]",
     "arguments": [
-      "int32x4_t vec",
-      "const int lane"
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint64_t op2"
     ],
     "return_type": {
-      "value": "int32x2_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 3
+      "op1": {
+        "register": "Zop1.S"
       },
-      "vec": {
-        "register": "Vn.4S"
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
@@ -25608,27 +30865,30 @@
     ],
     "instructions": [
       [
-        "DUP"
+        "CMPLS"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdup_laneq_s64",
+    "SIMD_ISA": "SVE",
+    "name": "svcmple_wide[_u8]",
     "arguments": [
-      "int64x2_t vec",
-      "const int lane"
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint64_t op2"
     ],
     "return_type": {
-      "value": "int64x1_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 1
+      "op1": {
+        "register": "Zop1.B"
       },
-      "vec": {
-        "register": "Vn.2D"
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
@@ -25636,27 +30896,30 @@
     ],
     "instructions": [
       [
-        "DUP"
+        "CMPLS"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdup_laneq_s8",
+    "SIMD_ISA": "SVE",
+    "name": "svcmplt[_f16]",
     "arguments": [
-      "int8x16_t vec",
-      "const int lane"
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2"
     ],
     "return_type": {
-      "value": "int8x8_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 15
+      "op1": {
+        "register": "Zop1.H"
       },
-      "vec": {
-        "register": "Vn.16B"
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
@@ -25664,27 +30927,30 @@
     ],
     "instructions": [
       [
-        "DUP"
+        "FCMGT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdup_laneq_u16",
+    "SIMD_ISA": "SVE",
+    "name": "svcmplt[_f32]",
     "arguments": [
-      "uint16x8_t vec",
-      "const int lane"
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2"
     ],
     "return_type": {
-      "value": "uint16x4_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 7
+      "op1": {
+        "register": "Zop1.S"
       },
-      "vec": {
-        "register": "Vn.8H"
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
@@ -25692,27 +30958,30 @@
     ],
     "instructions": [
       [
-        "DUP"
+        "FCMGT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdup_laneq_u32",
+    "SIMD_ISA": "SVE",
+    "name": "svcmplt[_f64]",
     "arguments": [
-      "uint32x4_t vec",
-      "const int lane"
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2"
     ],
     "return_type": {
-      "value": "uint32x2_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 3
+      "op1": {
+        "register": "Zop1.D"
       },
-      "vec": {
-        "register": "Vn.4S"
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
@@ -25720,27 +30989,30 @@
     ],
     "instructions": [
       [
-        "DUP"
+        "FCMGT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdup_laneq_u64",
+    "SIMD_ISA": "SVE",
+    "name": "svcmplt[_n_f16]",
     "arguments": [
-      "uint64x2_t vec",
-      "const int lane"
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "float16_t op2"
     ],
     "return_type": {
-      "value": "uint64x1_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 1
+      "op1": {
+        "register": "Zop1.H"
       },
-      "vec": {
-        "register": "Vn.2D"
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
@@ -25748,27 +31020,33 @@
     ],
     "instructions": [
       [
-        "DUP"
+        "FCMLT"
+      ],
+      [
+        "FCMGT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdup_laneq_u8",
+    "SIMD_ISA": "SVE",
+    "name": "svcmplt[_n_f32]",
     "arguments": [
-      "uint8x16_t vec",
-      "const int lane"
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "float32_t op2"
     ],
     "return_type": {
-      "value": "uint8x8_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 15
+      "op1": {
+        "register": "Zop1.S"
       },
-      "vec": {
-        "register": "Vn.16B"
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
@@ -25776,72 +31054,101 @@
     ],
     "instructions": [
       [
-        "DUP"
+        "FCMLT"
+      ],
+      [
+        "FCMGT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdup_n_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svcmplt[_n_f64]",
     "arguments": [
-      "float16_t value"
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "float64_t op2"
     ],
     "return_type": {
-      "value": "float16x4_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "value": {
-        "register": "rn"
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "DUP"
+        "FCMLT"
+      ],
+      [
+        "FCMGT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdup_n_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svcmplt[_n_s16]",
     "arguments": [
-      "float32_t value"
+      "svbool_t pg",
+      "svint16_t op1",
+      "int16_t op2"
     ],
     "return_type": {
-      "value": "float32x2_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "value": {
-        "register": "rn"
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "DUP"
+        "CMPLT"
+      ],
+      [
+        "CMPGT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdup_n_f64",
+    "SIMD_ISA": "SVE",
+    "name": "svcmplt[_n_s32]",
     "arguments": [
-      "float64_t value"
+      "svbool_t pg",
+      "svint32_t op1",
+      "int32_t op2"
     ],
     "return_type": {
-      "value": "float64x1_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "value": {
-        "register": "rn"
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
@@ -25849,301 +31156,392 @@
     ],
     "instructions": [
       [
-        "INS"
+        "CMPLT"
+      ],
+      [
+        "CMPGT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdup_n_p16",
+    "SIMD_ISA": "SVE",
+    "name": "svcmplt[_n_s64]",
     "arguments": [
-      "poly16_t value"
+      "svbool_t pg",
+      "svint64_t op1",
+      "int64_t op2"
     ],
     "return_type": {
-      "value": "poly16x4_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "value": {
-        "register": "rn"
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "DUP"
+        "CMPLT"
+      ],
+      [
+        "CMPGT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdup_n_p64",
+    "SIMD_ISA": "SVE",
+    "name": "svcmplt[_n_s8]",
     "arguments": [
-      "poly64_t value"
+      "svbool_t pg",
+      "svint8_t op1",
+      "int8_t op2"
     ],
     "return_type": {
-      "value": "poly64x1_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "value": {
-        "register": "rn"
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "INS"
+        "CMPLT"
+      ],
+      [
+        "CMPGT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdup_n_p8",
+    "SIMD_ISA": "SVE",
+    "name": "svcmplt[_n_u16]",
     "arguments": [
-      "poly8_t value"
+      "svbool_t pg",
+      "svuint16_t op1",
+      "uint16_t op2"
     ],
     "return_type": {
-      "value": "poly8x8_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "value": {
-        "register": "rn"
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "DUP"
+        "CMPLO"
+      ],
+      [
+        "CMPHI"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdup_n_s16",
+    "SIMD_ISA": "SVE",
+    "name": "svcmplt[_n_u32]",
     "arguments": [
-      "int16_t value"
+      "svbool_t pg",
+      "svuint32_t op1",
+      "uint32_t op2"
     ],
     "return_type": {
-      "value": "int16x4_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "value": {
-        "register": "rn"
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "DUP"
+        "CMPLO"
+      ],
+      [
+        "CMPHI"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdup_n_s32",
+    "SIMD_ISA": "SVE",
+    "name": "svcmplt[_n_u64]",
     "arguments": [
-      "int32_t value"
+      "svbool_t pg",
+      "svuint64_t op1",
+      "uint64_t op2"
     ],
     "return_type": {
-      "value": "int32x2_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "value": {
-        "register": "rn"
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "DUP"
+        "CMPLO"
+      ],
+      [
+        "CMPHI"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdup_n_s64",
+    "SIMD_ISA": "SVE",
+    "name": "svcmplt[_n_u8]",
     "arguments": [
-      "int64_t value"
+      "svbool_t pg",
+      "svuint8_t op1",
+      "uint8_t op2"
     ],
     "return_type": {
-      "value": "int64x1_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "value": {
-        "register": "rn"
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "INS"
+        "CMPLO"
+      ],
+      [
+        "CMPHI"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdup_n_s8",
+    "SIMD_ISA": "SVE",
+    "name": "svcmplt[_s16]",
     "arguments": [
-      "int8_t value"
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2"
     ],
     "return_type": {
-      "value": "int8x8_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "value": {
-        "register": "rn"
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "DUP"
+        "CMPGT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdup_n_u16",
+    "SIMD_ISA": "SVE",
+    "name": "svcmplt[_s32]",
     "arguments": [
-      "uint16_t value"
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2"
     ],
     "return_type": {
-      "value": "uint16x4_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "value": {
-        "register": "rn"
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "DUP"
+        "CMPGT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdup_n_u32",
+    "SIMD_ISA": "SVE",
+    "name": "svcmplt[_s64]",
     "arguments": [
-      "uint32_t value"
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2"
     ],
     "return_type": {
-      "value": "uint32x2_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "value": {
-        "register": "rn"
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "DUP"
+        "CMPGT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdup_n_u64",
+    "SIMD_ISA": "SVE",
+    "name": "svcmplt[_s8]",
     "arguments": [
-      "uint64_t value"
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2"
     ],
     "return_type": {
-      "value": "uint64x1_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "value": {
-        "register": "rn"
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "INS"
+        "CMPGT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdup_n_u8",
+    "SIMD_ISA": "SVE",
+    "name": "svcmplt[_u16]",
     "arguments": [
-      "uint8_t value"
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2"
     ],
     "return_type": {
-      "value": "uint8x8_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "value": {
-        "register": "rn"
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "DUP"
+        "CMPHI"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdupb_lane_p8",
+    "SIMD_ISA": "SVE",
+    "name": "svcmplt[_u32]",
     "arguments": [
-      "poly8x8_t vec",
-      "const int lane"
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2"
     ],
     "return_type": {
-      "value": "poly8_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 7
+      "op1": {
+        "register": "Zop1.S"
       },
-      "vec": {
-        "register": "Vn.8B"
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
@@ -26151,27 +31549,30 @@
     ],
     "instructions": [
       [
-        "DUP"
+        "CMPHI"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdupb_lane_s8",
+    "SIMD_ISA": "SVE",
+    "name": "svcmplt[_u64]",
     "arguments": [
-      "int8x8_t vec",
-      "const int lane"
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2"
     ],
     "return_type": {
-      "value": "int8_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 7
+      "op1": {
+        "register": "Zop1.D"
       },
-      "vec": {
-        "register": "Vn.8B"
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
@@ -26179,27 +31580,30 @@
     ],
     "instructions": [
       [
-        "DUP"
+        "CMPHI"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdupb_lane_u8",
+    "SIMD_ISA": "SVE",
+    "name": "svcmplt[_u8]",
     "arguments": [
-      "uint8x8_t vec",
-      "const int lane"
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2"
     ],
     "return_type": {
-      "value": "uint8_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 7
+      "op1": {
+        "register": "Zop1.B"
       },
-      "vec": {
-        "register": "Vn.8B"
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
@@ -26207,27 +31611,30 @@
     ],
     "instructions": [
       [
-        "DUP"
+        "CMPHI"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdupb_laneq_p8",
+    "SIMD_ISA": "SVE",
+    "name": "svcmplt_wide[_n_s16]",
     "arguments": [
-      "poly8x16_t vec",
-      "const int lane"
+      "svbool_t pg",
+      "svint16_t op1",
+      "int64_t op2"
     ],
     "return_type": {
-      "value": "poly8_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 15
+      "op1": {
+        "register": "Zop1.H"
       },
-      "vec": {
-        "register": "Vn.16B"
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
@@ -26235,27 +31642,33 @@
     ],
     "instructions": [
       [
-        "DUP"
+        "CMPLT"
+      ],
+      [
+        "CMPLT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdupb_laneq_s8",
+    "SIMD_ISA": "SVE",
+    "name": "svcmplt_wide[_n_s32]",
     "arguments": [
-      "int8x16_t vec",
-      "const int lane"
+      "svbool_t pg",
+      "svint32_t op1",
+      "int64_t op2"
     ],
     "return_type": {
-      "value": "int8_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 15
+      "op1": {
+        "register": "Zop1.S"
       },
-      "vec": {
-        "register": "Vn.16B"
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
@@ -26263,27 +31676,33 @@
     ],
     "instructions": [
       [
-        "DUP"
+        "CMPLT"
+      ],
+      [
+        "CMPLT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdupb_laneq_u8",
+    "SIMD_ISA": "SVE",
+    "name": "svcmplt_wide[_n_s8]",
     "arguments": [
-      "uint8x16_t vec",
-      "const int lane"
+      "svbool_t pg",
+      "svint8_t op1",
+      "int64_t op2"
     ],
     "return_type": {
-      "value": "uint8_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 15
+      "op1": {
+        "register": "Zop1.B"
       },
-      "vec": {
-        "register": "Vn.16B"
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
@@ -26291,27 +31710,33 @@
     ],
     "instructions": [
       [
-        "DUP"
+        "CMPLT"
+      ],
+      [
+        "CMPLT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdupd_lane_f64",
+    "SIMD_ISA": "SVE",
+    "name": "svcmplt_wide[_n_u16]",
     "arguments": [
-      "float64x1_t vec",
-      "const int lane"
+      "svbool_t pg",
+      "svuint16_t op1",
+      "uint64_t op2"
     ],
     "return_type": {
-      "value": "float64_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 0
+      "op1": {
+        "register": "Zop1.H"
       },
-      "vec": {
-        "register": "Vn.1D"
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
@@ -26319,27 +31744,33 @@
     ],
     "instructions": [
       [
-        "DUP"
+        "CMPLO"
+      ],
+      [
+        "CMPLO"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdupd_lane_s64",
+    "SIMD_ISA": "SVE",
+    "name": "svcmplt_wide[_n_u32]",
     "arguments": [
-      "int64x1_t vec",
-      "const int lane"
+      "svbool_t pg",
+      "svuint32_t op1",
+      "uint64_t op2"
     ],
     "return_type": {
-      "value": "int64_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 0
+      "op1": {
+        "register": "Zop1.S"
       },
-      "vec": {
-        "register": "Vn.1D"
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
@@ -26347,27 +31778,33 @@
     ],
     "instructions": [
       [
-        "DUP"
+        "CMPLO"
+      ],
+      [
+        "CMPLO"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdupd_lane_u64",
+    "SIMD_ISA": "SVE",
+    "name": "svcmplt_wide[_n_u8]",
     "arguments": [
-      "uint64x1_t vec",
-      "const int lane"
+      "svbool_t pg",
+      "svuint8_t op1",
+      "uint64_t op2"
     ],
     "return_type": {
-      "value": "uint64_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 0
+      "op1": {
+        "register": "Zop1.B"
       },
-      "vec": {
-        "register": "Vn.1D"
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
@@ -26375,27 +31812,33 @@
     ],
     "instructions": [
       [
-        "DUP"
+        "CMPLO"
+      ],
+      [
+        "CMPLO"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdupd_laneq_f64",
+    "SIMD_ISA": "SVE",
+    "name": "svcmplt_wide[_s16]",
     "arguments": [
-      "float64x2_t vec",
-      "const int lane"
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint64_t op2"
     ],
     "return_type": {
-      "value": "float64_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 1
+      "op1": {
+        "register": "Zop1.H"
       },
-      "vec": {
-        "register": "Vn.2D"
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
@@ -26403,27 +31846,30 @@
     ],
     "instructions": [
       [
-        "DUP"
+        "CMPLT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdupd_laneq_s64",
+    "SIMD_ISA": "SVE",
+    "name": "svcmplt_wide[_s32]",
     "arguments": [
-      "int64x2_t vec",
-      "const int lane"
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint64_t op2"
     ],
     "return_type": {
-      "value": "int64_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 1
+      "op1": {
+        "register": "Zop1.S"
       },
-      "vec": {
-        "register": "Vn.2D"
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
@@ -26431,27 +31877,30 @@
     ],
     "instructions": [
       [
-        "DUP"
+        "CMPLT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdupd_laneq_u64",
+    "SIMD_ISA": "SVE",
+    "name": "svcmplt_wide[_s8]",
     "arguments": [
-      "uint64x2_t vec",
-      "const int lane"
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint64_t op2"
     ],
     "return_type": {
-      "value": "uint64_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 1
+      "op1": {
+        "register": "Zop1.B"
       },
-      "vec": {
-        "register": "Vn.2D"
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
@@ -26459,27 +31908,30 @@
     ],
     "instructions": [
       [
-        "DUP"
+        "CMPLT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vduph_lane_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svcmplt_wide[_u16]",
     "arguments": [
-      "float16x4_t vec",
-      "const int lane"
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint64_t op2"
     ],
     "return_type": {
-      "value": "float16_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 3
+      "op1": {
+        "register": "Zop1.H"
       },
-      "vec": {
-        "register": "Vn.4H"
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
@@ -26487,27 +31939,30 @@
     ],
     "instructions": [
       [
-        "DUP"
+        "CMPLO"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vduph_lane_p16",
+    "SIMD_ISA": "SVE",
+    "name": "svcmplt_wide[_u32]",
     "arguments": [
-      "poly16x4_t vec",
-      "const int lane"
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint64_t op2"
     ],
     "return_type": {
-      "value": "poly16_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 3
+      "op1": {
+        "register": "Zop1.S"
       },
-      "vec": {
-        "register": "Vn.4H"
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
@@ -26515,27 +31970,30 @@
     ],
     "instructions": [
       [
-        "DUP"
+        "CMPLO"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vduph_lane_s16",
+    "SIMD_ISA": "SVE",
+    "name": "svcmplt_wide[_u8]",
     "arguments": [
-      "int16x4_t vec",
-      "const int lane"
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint64_t op2"
     ],
     "return_type": {
-      "value": "int16_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 3
+      "op1": {
+        "register": "Zop1.B"
       },
-      "vec": {
-        "register": "Vn.4H"
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
@@ -26543,27 +32001,30 @@
     ],
     "instructions": [
       [
-        "DUP"
+        "CMPLO"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vduph_lane_u16",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpne[_f16]",
     "arguments": [
-      "uint16x4_t vec",
-      "const int lane"
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2"
     ],
     "return_type": {
-      "value": "uint16_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 3
+      "op1": {
+        "register": "Zop1.H"
       },
-      "vec": {
-        "register": "Vn.4H"
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
@@ -26571,27 +32032,30 @@
     ],
     "instructions": [
       [
-        "DUP"
+        "FCMNE"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vduph_laneq_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpne[_f32]",
     "arguments": [
-      "float16x8_t vec",
-      "const int lane"
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2"
     ],
     "return_type": {
-      "value": "float16_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 7
+      "op1": {
+        "register": "Zop1.S"
       },
-      "vec": {
-        "register": "Vn.8H"
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
@@ -26599,27 +32063,30 @@
     ],
     "instructions": [
       [
-        "DUP"
+        "FCMNE"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vduph_laneq_p16",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpne[_f64]",
     "arguments": [
-      "poly16x8_t vec",
-      "const int lane"
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2"
     ],
     "return_type": {
-      "value": "poly16_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 7
+      "op1": {
+        "register": "Zop1.D"
       },
-      "vec": {
-        "register": "Vn.8H"
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
@@ -26627,27 +32094,30 @@
     ],
     "instructions": [
       [
-        "DUP"
+        "FCMNE"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vduph_laneq_s16",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpne[_n_f16]",
     "arguments": [
-      "int16x8_t vec",
-      "const int lane"
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "float16_t op2"
     ],
     "return_type": {
-      "value": "int16_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 7
+      "op1": {
+        "register": "Zop1.H"
       },
-      "vec": {
-        "register": "Vn.8H"
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
@@ -26655,27 +32125,33 @@
     ],
     "instructions": [
       [
-        "DUP"
+        "FCMNE"
+      ],
+      [
+        "FCMNE"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vduph_laneq_u16",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpne[_n_f32]",
     "arguments": [
-      "uint16x8_t vec",
-      "const int lane"
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "float32_t op2"
     ],
     "return_type": {
-      "value": "uint16_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 7
+      "op1": {
+        "register": "Zop1.S"
       },
-      "vec": {
-        "register": "Vn.8H"
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
@@ -26683,87 +32159,101 @@
     ],
     "instructions": [
       [
-        "DUP"
+        "FCMNE"
+      ],
+      [
+        "FCMNE"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdupq_lane_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpne[_n_f64]",
     "arguments": [
-      "float16x4_t vec",
-      "const int lane"
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "float64_t op2"
     ],
     "return_type": {
-      "value": "float16x8_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 3
+      "op1": {
+        "register": "Zop1.D"
       },
-      "vec": {
-        "register": "Vn.4H"
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "DUP"
+        "FCMNE"
+      ],
+      [
+        "FCMNE"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdupq_lane_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpne[_n_s16]",
     "arguments": [
-      "float32x2_t vec",
-      "const int lane"
+      "svbool_t pg",
+      "svint16_t op1",
+      "int16_t op2"
     ],
     "return_type": {
-      "value": "float32x4_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 1
+      "op1": {
+        "register": "Zop1.H"
       },
-      "vec": {
-        "register": "Vn.2S"
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "DUP"
+        "CMPNE"
+      ],
+      [
+        "CMPNE"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdupq_lane_f64",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpne[_n_s32]",
     "arguments": [
-      "float64x1_t vec",
-      "const int lane"
+      "svbool_t pg",
+      "svint32_t op1",
+      "int32_t op2"
     ],
     "return_type": {
-      "value": "float64x2_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 0
+      "op1": {
+        "register": "Zop1.S"
       },
-      "vec": {
-        "register": "Vn.1D"
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
@@ -26771,356 +32261,392 @@
     ],
     "instructions": [
       [
-        "DUP"
+        "CMPNE"
+      ],
+      [
+        "CMPNE"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdupq_lane_p16",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpne[_n_s64]",
     "arguments": [
-      "poly16x4_t vec",
-      "const int lane"
+      "svbool_t pg",
+      "svint64_t op1",
+      "int64_t op2"
     ],
     "return_type": {
-      "value": "poly16x8_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 3
+      "op1": {
+        "register": "Zop1.D"
       },
-      "vec": {
-        "register": "Vn.4H"
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "DUP"
+        "CMPNE"
+      ],
+      [
+        "CMPNE"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdupq_lane_p64",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpne[_n_s8]",
     "arguments": [
-      "poly64x1_t vec",
-      "const int lane"
+      "svbool_t pg",
+      "svint8_t op1",
+      "int8_t op2"
     ],
     "return_type": {
-      "value": "poly64x2_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 0
+      "op1": {
+        "register": "Zop1.B"
       },
-      "vec": {
-        "register": "Vn.1D"
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "DUP"
+        "CMPNE"
+      ],
+      [
+        "CMPNE"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdupq_lane_p8",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpne[_n_u16]",
     "arguments": [
-      "poly8x8_t vec",
-      "const int lane"
+      "svbool_t pg",
+      "svuint16_t op1",
+      "uint16_t op2"
     ],
     "return_type": {
-      "value": "poly8x16_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 7
+      "op1": {
+        "register": "Zop1.H"
       },
-      "vec": {
-        "register": "Vn.8B"
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "DUP"
+        "CMPNE"
+      ],
+      [
+        "CMPNE"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdupq_lane_s16",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpne[_n_u32]",
     "arguments": [
-      "int16x4_t vec",
-      "const int lane"
+      "svbool_t pg",
+      "svuint32_t op1",
+      "uint32_t op2"
     ],
     "return_type": {
-      "value": "int16x8_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 3
+      "op1": {
+        "register": "Zop1.S"
       },
-      "vec": {
-        "register": "Vn.4H"
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "DUP"
+        "CMPNE"
+      ],
+      [
+        "CMPNE"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdupq_lane_s32",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpne[_n_u64]",
     "arguments": [
-      "int32x2_t vec",
-      "const int lane"
+      "svbool_t pg",
+      "svuint64_t op1",
+      "uint64_t op2"
     ],
     "return_type": {
-      "value": "int32x4_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 1
+      "op1": {
+        "register": "Zop1.D"
       },
-      "vec": {
-        "register": "Vn.2S"
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "DUP"
+        "CMPNE"
+      ],
+      [
+        "CMPNE"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdupq_lane_s64",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpne[_n_u8]",
     "arguments": [
-      "int64x1_t vec",
-      "const int lane"
+      "svbool_t pg",
+      "svuint8_t op1",
+      "uint8_t op2"
     ],
     "return_type": {
-      "value": "int64x2_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 0
+      "op1": {
+        "register": "Zop1.B"
       },
-      "vec": {
-        "register": "Vn.1D"
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "DUP"
+        "CMPNE"
+      ],
+      [
+        "CMPNE"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdupq_lane_s8",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpne[_s16]",
     "arguments": [
-      "int8x8_t vec",
-      "const int lane"
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2"
     ],
     "return_type": {
-      "value": "int8x16_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 7
+      "op1": {
+        "register": "Zop1.H"
       },
-      "vec": {
-        "register": "Vn.8B"
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "DUP"
+        "CMPNE"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdupq_lane_u16",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpne[_s32]",
     "arguments": [
-      "uint16x4_t vec",
-      "const int lane"
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2"
     ],
     "return_type": {
-      "value": "uint16x8_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 3
+      "op1": {
+        "register": "Zop1.S"
       },
-      "vec": {
-        "register": "Vn.4H"
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "DUP"
+        "CMPNE"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdupq_lane_u32",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpne[_s64]",
     "arguments": [
-      "uint32x2_t vec",
-      "const int lane"
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2"
     ],
     "return_type": {
-      "value": "uint32x4_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 1
+      "op1": {
+        "register": "Zop1.D"
       },
-      "vec": {
-        "register": "Vn.2S"
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "DUP"
+        "CMPNE"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdupq_lane_u64",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpne[_s8]",
     "arguments": [
-      "uint64x1_t vec",
-      "const int lane"
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2"
     ],
     "return_type": {
-      "value": "uint64x2_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 0
+      "op1": {
+        "register": "Zop1.B"
       },
-      "vec": {
-        "register": "Vn.1D"
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "DUP"
+        "CMPNE"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdupq_lane_u8",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpne[_u16]",
     "arguments": [
-      "uint8x8_t vec",
-      "const int lane"
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2"
     ],
     "return_type": {
-      "value": "uint8x16_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 7
+      "op1": {
+        "register": "Zop1.H"
       },
-      "vec": {
-        "register": "Vn.8B"
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "DUP"
+        "CMPNE"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdupq_laneq_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpne[_u32]",
     "arguments": [
-      "float16x8_t vec",
-      "const int lane"
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2"
     ],
     "return_type": {
-      "value": "float16x8_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 7
+      "op1": {
+        "register": "Zop1.S"
       },
-      "vec": {
-        "register": "Vn.8H"
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
@@ -27128,27 +32654,30 @@
     ],
     "instructions": [
       [
-        "DUP"
+        "CMPNE"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdupq_laneq_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpne[_u64]",
     "arguments": [
-      "float32x4_t vec",
-      "const int lane"
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2"
     ],
     "return_type": {
-      "value": "float32x4_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 3
+      "op1": {
+        "register": "Zop1.D"
       },
-      "vec": {
-        "register": "Vn.4S"
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
@@ -27156,27 +32685,30 @@
     ],
     "instructions": [
       [
-        "DUP"
+        "CMPNE"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdupq_laneq_f64",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpne[_u8]",
     "arguments": [
-      "float64x2_t vec",
-      "const int lane"
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2"
     ],
     "return_type": {
-      "value": "float64x2_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 1
+      "op1": {
+        "register": "Zop1.B"
       },
-      "vec": {
-        "register": "Vn.2D"
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
@@ -27184,27 +32716,30 @@
     ],
     "instructions": [
       [
-        "DUP"
+        "CMPNE"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdupq_laneq_p16",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpne_wide[_n_s16]",
     "arguments": [
-      "poly16x8_t vec",
-      "const int lane"
+      "svbool_t pg",
+      "svint16_t op1",
+      "int64_t op2"
     ],
     "return_type": {
-      "value": "poly16x8_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 7
+      "op1": {
+        "register": "Zop1.H"
       },
-      "vec": {
-        "register": "Vn.8H"
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
@@ -27212,27 +32747,33 @@
     ],
     "instructions": [
       [
-        "DUP"
+        "CMPNE"
+      ],
+      [
+        "CMPNE"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdupq_laneq_p64",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpne_wide[_n_s32]",
     "arguments": [
-      "poly64x2_t vec",
-      "const int lane"
+      "svbool_t pg",
+      "svint32_t op1",
+      "int64_t op2"
     ],
     "return_type": {
-      "value": "poly64x2_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 1
+      "op1": {
+        "register": "Zop1.S"
       },
-      "vec": {
-        "register": "Vn.2D"
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
@@ -27240,27 +32781,33 @@
     ],
     "instructions": [
       [
-        "DUP"
+        "CMPNE"
+      ],
+      [
+        "CMPNE"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdupq_laneq_p8",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpne_wide[_n_s8]",
     "arguments": [
-      "poly8x16_t vec",
-      "const int lane"
+      "svbool_t pg",
+      "svint8_t op1",
+      "int64_t op2"
     ],
     "return_type": {
-      "value": "poly8x16_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 15
+      "op1": {
+        "register": "Zop1.B"
       },
-      "vec": {
-        "register": "Vn.16B"
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
@@ -27268,27 +32815,33 @@
     ],
     "instructions": [
       [
-        "DUP"
+        "CMPNE"
+      ],
+      [
+        "CMPNE"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdupq_laneq_s16",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpne_wide[_s16]",
     "arguments": [
-      "int16x8_t vec",
-      "const int lane"
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint64_t op2"
     ],
     "return_type": {
-      "value": "int16x8_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 7
+      "op1": {
+        "register": "Zop1.H"
       },
-      "vec": {
-        "register": "Vn.8H"
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
@@ -27296,27 +32849,30 @@
     ],
     "instructions": [
       [
-        "DUP"
+        "CMPNE"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdupq_laneq_s32",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpne_wide[_s32]",
     "arguments": [
-      "int32x4_t vec",
-      "const int lane"
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint64_t op2"
     ],
     "return_type": {
-      "value": "int32x4_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 3
+      "op1": {
+        "register": "Zop1.S"
       },
-      "vec": {
-        "register": "Vn.4S"
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
@@ -27324,27 +32880,30 @@
     ],
     "instructions": [
       [
-        "DUP"
+        "CMPNE"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdupq_laneq_s64",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpne_wide[_s8]",
     "arguments": [
-      "int64x2_t vec",
-      "const int lane"
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint64_t op2"
     ],
     "return_type": {
-      "value": "int64x2_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 1
+      "op1": {
+        "register": "Zop1.B"
       },
-      "vec": {
-        "register": "Vn.2D"
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
@@ -27352,27 +32911,30 @@
     ],
     "instructions": [
       [
-        "DUP"
+        "CMPNE"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdupq_laneq_s8",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpuo[_f16]",
     "arguments": [
-      "int8x16_t vec",
-      "const int lane"
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2"
     ],
     "return_type": {
-      "value": "int8x16_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 15
+      "op1": {
+        "register": "Zop1.H"
       },
-      "vec": {
-        "register": "Vn.16B"
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
@@ -27380,27 +32942,30 @@
     ],
     "instructions": [
       [
-        "DUP"
+        "FCMUO"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdupq_laneq_u16",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpuo[_f32]",
     "arguments": [
-      "uint16x8_t vec",
-      "const int lane"
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2"
     ],
     "return_type": {
-      "value": "uint16x8_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 7
+      "op1": {
+        "register": "Zop1.S"
       },
-      "vec": {
-        "register": "Vn.8H"
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
@@ -27408,27 +32973,30 @@
     ],
     "instructions": [
       [
-        "DUP"
+        "FCMUO"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdupq_laneq_u32",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpuo[_f64]",
     "arguments": [
-      "uint32x4_t vec",
-      "const int lane"
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2"
     ],
     "return_type": {
-      "value": "uint32x4_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 3
+      "op1": {
+        "register": "Zop1.D"
       },
-      "vec": {
-        "register": "Vn.4S"
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
@@ -27436,27 +33004,30 @@
     ],
     "instructions": [
       [
-        "DUP"
+        "FCMUO"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdupq_laneq_u64",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpuo[_n_f16]",
     "arguments": [
-      "uint64x2_t vec",
-      "const int lane"
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "float16_t op2"
     ],
     "return_type": {
-      "value": "uint64x2_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 1
+      "op1": {
+        "register": "Zop1.H"
       },
-      "vec": {
-        "register": "Vn.2D"
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
@@ -27464,27 +33035,30 @@
     ],
     "instructions": [
       [
-        "DUP"
+        "FCMUO"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdupq_laneq_u8",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpuo[_n_f32]",
     "arguments": [
-      "uint8x16_t vec",
-      "const int lane"
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "float32_t op2"
     ],
     "return_type": {
-      "value": "uint8x16_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 15
+      "op1": {
+        "register": "Zop1.S"
       },
-      "vec": {
-        "register": "Vn.16B"
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
@@ -27492,72 +33066,92 @@
     ],
     "instructions": [
       [
-        "DUP"
+        "FCMUO"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdupq_n_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svcmpuo[_n_f64]",
     "arguments": [
-      "float16_t value"
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "float64_t op2"
     ],
     "return_type": {
-      "value": "float16x8_t"
+      "value": "svbool_t"
     },
     "Arguments_Preparation": {
-      "value": {
-        "register": "rn"
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "DUP"
+        "FCMUO"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdupq_n_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svcnot[_s16]_m",
     "arguments": [
-      "float32_t value"
+      "svint16_t inactive",
+      "svbool_t pg",
+      "svint16_t op"
     ],
     "return_type": {
-      "value": "float32x4_t"
+      "value": "svint16_t"
     },
     "Arguments_Preparation": {
-      "value": {
-        "register": "rn"
+      "inactive": {
+        "register": "Zinactive.H|Ztied.H"
+      },
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "DUP"
+        "CNOT"
+      ],
+      [
+        "MOVPRFX",
+        "CNOT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdupq_n_f64",
+    "SIMD_ISA": "SVE",
+    "name": "svcnot[_s16]_x",
     "arguments": [
-      "float64_t value"
+      "svbool_t pg",
+      "svint16_t op"
     ],
     "return_type": {
-      "value": "float64x2_t"
+      "value": "svint16_t"
     },
     "Arguments_Preparation": {
-      "value": {
-        "register": "rn"
+      "op": {
+        "register": "Zop.H|Ztied.H"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
@@ -27565,301 +33159,375 @@
     ],
     "instructions": [
       [
-        "DUP"
+        "CNOT"
+      ],
+      [
+        "MOVPRFX",
+        "CNOT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdupq_n_p16",
+    "SIMD_ISA": "SVE",
+    "name": "svcnot[_s16]_z",
     "arguments": [
-      "poly16_t value"
+      "svbool_t pg",
+      "svint16_t op"
     ],
     "return_type": {
-      "value": "poly16x8_t"
+      "value": "svint16_t"
     },
     "Arguments_Preparation": {
-      "value": {
-        "register": "rn"
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "DUP"
+        "MOVPRFX",
+        "CNOT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdupq_n_p64",
+    "SIMD_ISA": "SVE",
+    "name": "svcnot[_s32]_m",
     "arguments": [
-      "poly64_t value"
+      "svint32_t inactive",
+      "svbool_t pg",
+      "svint32_t op"
     ],
     "return_type": {
-      "value": "poly64x2_t"
+      "value": "svint32_t"
     },
     "Arguments_Preparation": {
-      "value": {
-        "register": "rn"
+      "inactive": {
+        "register": "Zinactive.S|Ztied.S"
+      },
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "DUP"
+        "CNOT"
+      ],
+      [
+        "MOVPRFX",
+        "CNOT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdupq_n_p8",
+    "SIMD_ISA": "SVE",
+    "name": "svcnot[_s32]_x",
     "arguments": [
-      "poly8_t value"
+      "svbool_t pg",
+      "svint32_t op"
     ],
     "return_type": {
-      "value": "poly8x16_t"
+      "value": "svint32_t"
     },
     "Arguments_Preparation": {
-      "value": {
-        "register": "rn"
+      "op": {
+        "register": "Zop.S|Ztied.S"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "DUP"
+        "CNOT"
+      ],
+      [
+        "MOVPRFX",
+        "CNOT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdupq_n_s16",
+    "SIMD_ISA": "SVE",
+    "name": "svcnot[_s32]_z",
     "arguments": [
-      "int16_t value"
+      "svbool_t pg",
+      "svint32_t op"
     ],
     "return_type": {
-      "value": "int16x8_t"
+      "value": "svint32_t"
     },
     "Arguments_Preparation": {
-      "value": {
-        "register": "rn"
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "DUP"
+        "MOVPRFX",
+        "CNOT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdupq_n_s32",
+    "SIMD_ISA": "SVE",
+    "name": "svcnot[_s64]_m",
     "arguments": [
-      "int32_t value"
+      "svint64_t inactive",
+      "svbool_t pg",
+      "svint64_t op"
     ],
     "return_type": {
-      "value": "int32x4_t"
+      "value": "svint64_t"
     },
     "Arguments_Preparation": {
-      "value": {
-        "register": "rn"
+      "inactive": {
+        "register": "Zinactive.D|Ztied.D"
+      },
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "DUP"
+        "CNOT"
+      ],
+      [
+        "MOVPRFX",
+        "CNOT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdupq_n_s64",
+    "SIMD_ISA": "SVE",
+    "name": "svcnot[_s64]_x",
     "arguments": [
-      "int64_t value"
+      "svbool_t pg",
+      "svint64_t op"
     ],
     "return_type": {
-      "value": "int64x2_t"
+      "value": "svint64_t"
     },
     "Arguments_Preparation": {
-      "value": {
-        "register": "rn"
+      "op": {
+        "register": "Zop.D|Ztied.D"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "DUP"
+        "CNOT"
+      ],
+      [
+        "MOVPRFX",
+        "CNOT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdupq_n_s8",
+    "SIMD_ISA": "SVE",
+    "name": "svcnot[_s64]_z",
     "arguments": [
-      "int8_t value"
+      "svbool_t pg",
+      "svint64_t op"
     ],
     "return_type": {
-      "value": "int8x16_t"
+      "value": "svint64_t"
     },
     "Arguments_Preparation": {
-      "value": {
-        "register": "rn"
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "DUP"
+        "MOVPRFX",
+        "CNOT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdupq_n_u16",
+    "SIMD_ISA": "SVE",
+    "name": "svcnot[_s8]_m",
     "arguments": [
-      "uint16_t value"
+      "svint8_t inactive",
+      "svbool_t pg",
+      "svint8_t op"
     ],
     "return_type": {
-      "value": "uint16x8_t"
+      "value": "svint8_t"
     },
     "Arguments_Preparation": {
-      "value": {
-        "register": "rn"
+      "inactive": {
+        "register": "Zinactive.B|Ztied.B"
+      },
+      "op": {
+        "register": "Zop.B"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "DUP"
+        "CNOT"
+      ],
+      [
+        "MOVPRFX",
+        "CNOT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdupq_n_u32",
+    "SIMD_ISA": "SVE",
+    "name": "svcnot[_s8]_x",
     "arguments": [
-      "uint32_t value"
+      "svbool_t pg",
+      "svint8_t op"
     ],
     "return_type": {
-      "value": "uint32x4_t"
+      "value": "svint8_t"
     },
     "Arguments_Preparation": {
-      "value": {
-        "register": "rn"
+      "op": {
+        "register": "Zop.B|Ztied.B"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "DUP"
+        "CNOT"
+      ],
+      [
+        "MOVPRFX",
+        "CNOT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdupq_n_u64",
+    "SIMD_ISA": "SVE",
+    "name": "svcnot[_s8]_z",
     "arguments": [
-      "uint64_t value"
+      "svbool_t pg",
+      "svint8_t op"
     ],
     "return_type": {
-      "value": "uint64x2_t"
+      "value": "svint8_t"
     },
     "Arguments_Preparation": {
-      "value": {
-        "register": "rn"
+      "op": {
+        "register": "Zop.B"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "DUP"
+        "MOVPRFX",
+        "CNOT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdupq_n_u8",
+    "SIMD_ISA": "SVE",
+    "name": "svcnot[_u16]_m",
     "arguments": [
-      "uint8_t value"
+      "svuint16_t inactive",
+      "svbool_t pg",
+      "svuint16_t op"
     ],
     "return_type": {
-      "value": "uint8x16_t"
+      "value": "svuint16_t"
     },
     "Arguments_Preparation": {
-      "value": {
-        "register": "rn"
+      "inactive": {
+        "register": "Zinactive.H|Ztied.H"
+      },
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "DUP"
+        "CNOT"
+      ],
+      [
+        "MOVPRFX",
+        "CNOT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdups_lane_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svcnot[_u16]_x",
     "arguments": [
-      "float32x2_t vec",
-      "const int lane"
+      "svbool_t pg",
+      "svuint16_t op"
     ],
     "return_type": {
-      "value": "float32_t"
+      "value": "svuint16_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 1
+      "op": {
+        "register": "Zop.H|Ztied.H"
       },
-      "vec": {
-        "register": "Vn.2S"
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
@@ -27867,27 +33535,30 @@
     ],
     "instructions": [
       [
-        "DUP"
+        "CNOT"
+      ],
+      [
+        "MOVPRFX",
+        "CNOT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdups_lane_s32",
+    "SIMD_ISA": "SVE",
+    "name": "svcnot[_u16]_z",
     "arguments": [
-      "int32x2_t vec",
-      "const int lane"
+      "svbool_t pg",
+      "svuint16_t op"
     ],
     "return_type": {
-      "value": "int32_t"
+      "value": "svuint16_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 1
+      "op": {
+        "register": "Zop.H"
       },
-      "vec": {
-        "register": "Vn.2S"
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
@@ -27895,27 +33566,31 @@
     ],
     "instructions": [
       [
-        "DUP"
+        "MOVPRFX",
+        "CNOT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdups_lane_u32",
+    "SIMD_ISA": "SVE",
+    "name": "svcnot[_u32]_m",
     "arguments": [
-      "uint32x2_t vec",
-      "const int lane"
+      "svuint32_t inactive",
+      "svbool_t pg",
+      "svuint32_t op"
     ],
     "return_type": {
-      "value": "uint32_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 1
+      "inactive": {
+        "register": "Zinactive.S|Ztied.S"
       },
-      "vec": {
-        "register": "Vn.2S"
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
@@ -27923,27 +33598,30 @@
     ],
     "instructions": [
       [
-        "DUP"
+        "CNOT"
+      ],
+      [
+        "MOVPRFX",
+        "CNOT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdups_laneq_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svcnot[_u32]_x",
     "arguments": [
-      "float32x4_t vec",
-      "const int lane"
+      "svbool_t pg",
+      "svuint32_t op"
     ],
     "return_type": {
-      "value": "float32_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 3
+      "op": {
+        "register": "Zop.S|Ztied.S"
       },
-      "vec": {
-        "register": "Vn.4S"
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
@@ -27951,27 +33629,30 @@
     ],
     "instructions": [
       [
-        "DUP"
+        "CNOT"
+      ],
+      [
+        "MOVPRFX",
+        "CNOT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdups_laneq_s32",
+    "SIMD_ISA": "SVE",
+    "name": "svcnot[_u32]_z",
     "arguments": [
-      "int32x4_t vec",
-      "const int lane"
+      "svbool_t pg",
+      "svuint32_t op"
     ],
     "return_type": {
-      "value": "int32_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 3
+      "op": {
+        "register": "Zop.S"
       },
-      "vec": {
-        "register": "Vn.4S"
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
@@ -27979,27 +33660,31 @@
     ],
     "instructions": [
       [
-        "DUP"
+        "MOVPRFX",
+        "CNOT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vdups_laneq_u32",
+    "SIMD_ISA": "SVE",
+    "name": "svcnot[_u64]_m",
     "arguments": [
-      "uint32x4_t vec",
-      "const int lane"
+      "svuint64_t inactive",
+      "svbool_t pg",
+      "svuint64_t op"
     ],
     "return_type": {
-      "value": "uint32_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 3
+      "inactive": {
+        "register": "Zinactive.D|Ztied.D"
       },
-      "vec": {
-        "register": "Vn.4S"
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
@@ -28007,779 +33692,845 @@
     ],
     "instructions": [
       [
-        "DUP"
+        "CNOT"
+      ],
+      [
+        "MOVPRFX",
+        "CNOT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "veor3q_s16",
+    "SIMD_ISA": "SVE",
+    "name": "svcnot[_u64]_x",
     "arguments": [
-      "int16x8_t a",
-      "int16x8_t b",
-      "int16x8_t c"
+      "svbool_t pg",
+      "svuint64_t op"
     ],
     "return_type": {
-      "value": "int16x8_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
+      "op": {
+        "register": "Zop.D|Ztied.D"
       },
-      "b": {},
-      "c": {}
+      "pg": {
+        "register": "Pg.D"
+      }
     },
     "Architectures": [
       "A64"
     ],
     "instructions": [
       [
-        "EOR3"
+        "CNOT"
+      ],
+      [
+        "MOVPRFX",
+        "CNOT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "veor3q_s32",
+    "SIMD_ISA": "SVE",
+    "name": "svcnot[_u64]_z",
     "arguments": [
-      "int32x4_t a",
-      "int32x4_t b",
-      "int32x4_t c"
+      "svbool_t pg",
+      "svuint64_t op"
     ],
     "return_type": {
-      "value": "int32x4_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
+      "op": {
+        "register": "Zop.D"
       },
-      "b": {},
-      "c": {}
+      "pg": {
+        "register": "Pg.D"
+      }
     },
     "Architectures": [
       "A64"
     ],
     "instructions": [
       [
-        "EOR3"
+        "MOVPRFX",
+        "CNOT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "veor3q_s64",
+    "SIMD_ISA": "SVE",
+    "name": "svcnot[_u8]_m",
     "arguments": [
-      "int64x2_t a",
-      "int64x2_t b",
-      "int64x2_t c"
+      "svuint8_t inactive",
+      "svbool_t pg",
+      "svuint8_t op"
     ],
     "return_type": {
-      "value": "int64x2_t"
+      "value": "svuint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
+      "inactive": {
+        "register": "Zinactive.B|Ztied.B"
       },
-      "b": {},
-      "c": {}
+      "op": {
+        "register": "Zop.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
     },
     "Architectures": [
       "A64"
     ],
     "instructions": [
       [
-        "EOR3"
+        "CNOT"
+      ],
+      [
+        "MOVPRFX",
+        "CNOT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "veor3q_s8",
+    "SIMD_ISA": "SVE",
+    "name": "svcnot[_u8]_x",
     "arguments": [
-      "int8x16_t a",
-      "int8x16_t b",
-      "int8x16_t c"
+      "svbool_t pg",
+      "svuint8_t op"
     ],
     "return_type": {
-      "value": "int8x16_t"
+      "value": "svuint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
+      "op": {
+        "register": "Zop.B|Ztied.B"
       },
-      "b": {},
-      "c": {}
+      "pg": {
+        "register": "Pg.B"
+      }
     },
     "Architectures": [
       "A64"
     ],
     "instructions": [
       [
-        "EOR3"
+        "CNOT"
+      ],
+      [
+        "MOVPRFX",
+        "CNOT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "veor3q_u16",
+    "SIMD_ISA": "SVE",
+    "name": "svcnot[_u8]_z",
     "arguments": [
-      "uint16x8_t a",
-      "uint16x8_t b",
-      "uint16x8_t c"
+      "svbool_t pg",
+      "svuint8_t op"
     ],
     "return_type": {
-      "value": "uint16x8_t"
+      "value": "svuint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
+      "op": {
+        "register": "Zop.B"
       },
-      "b": {},
-      "c": {}
+      "pg": {
+        "register": "Pg.B"
+      }
     },
     "Architectures": [
       "A64"
     ],
     "instructions": [
       [
-        "EOR3"
+        "MOVPRFX",
+        "CNOT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "veor3q_u32",
+    "SIMD_ISA": "SVE",
+    "name": "svcnt[_f16]_m",
     "arguments": [
-      "uint32x4_t a",
-      "uint32x4_t b",
-      "uint32x4_t c"
+      "svuint16_t inactive",
+      "svbool_t pg",
+      "svfloat16_t op"
     ],
     "return_type": {
-      "value": "uint32x4_t"
+      "value": "svuint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
+      "inactive": {
+        "register": "Zinactive.H|Ztied.H"
+      },
+      "op": {
+        "register": "Zop.H"
       },
-      "b": {},
-      "c": {}
+      "pg": {
+        "register": "Pg.H"
+      }
     },
     "Architectures": [
       "A64"
     ],
     "instructions": [
       [
-        "EOR3"
+        "CNT"
+      ],
+      [
+        "MOVPRFX",
+        "CNT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "veor3q_u64",
+    "SIMD_ISA": "SVE",
+    "name": "svcnt[_f16]_x",
     "arguments": [
-      "uint64x2_t a",
-      "uint64x2_t b",
-      "uint64x2_t c"
+      "svbool_t pg",
+      "svfloat16_t op"
     ],
     "return_type": {
-      "value": "uint64x2_t"
+      "value": "svuint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
+      "op": {
+        "register": "Zop.H|Ztied.H"
       },
-      "b": {},
-      "c": {}
+      "pg": {
+        "register": "Pg.H"
+      }
     },
     "Architectures": [
       "A64"
     ],
     "instructions": [
       [
-        "EOR3"
+        "CNT"
+      ],
+      [
+        "MOVPRFX",
+        "CNT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "veor3q_u8",
+    "SIMD_ISA": "SVE",
+    "name": "svcnt[_f16]_z",
     "arguments": [
-      "uint8x16_t a",
-      "uint8x16_t b",
-      "uint8x16_t c"
+      "svbool_t pg",
+      "svfloat16_t op"
     ],
     "return_type": {
-      "value": "uint8x16_t"
+      "value": "svuint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
+      "op": {
+        "register": "Zop.H"
       },
-      "b": {},
-      "c": {}
+      "pg": {
+        "register": "Pg.H"
+      }
     },
     "Architectures": [
       "A64"
     ],
     "instructions": [
       [
-        "EOR3"
+        "MOVPRFX",
+        "CNT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "veor_s16",
+    "SIMD_ISA": "SVE",
+    "name": "svcnt[_f32]_m",
     "arguments": [
-      "int16x4_t a",
-      "int16x4_t b"
+      "svuint32_t inactive",
+      "svbool_t pg",
+      "svfloat32_t op"
     ],
     "return_type": {
-      "value": "int16x4_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8B"
+      "inactive": {
+        "register": "Zinactive.S|Ztied.S"
       },
-      "b": {
-        "register": "Vm.8B"
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "EOR"
+        "CNT"
+      ],
+      [
+        "MOVPRFX",
+        "CNT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "veor_s32",
+    "SIMD_ISA": "SVE",
+    "name": "svcnt[_f32]_x",
     "arguments": [
-      "int32x2_t a",
-      "int32x2_t b"
+      "svbool_t pg",
+      "svfloat32_t op"
     ],
     "return_type": {
-      "value": "int32x2_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8B"
+      "op": {
+        "register": "Zop.S|Ztied.S"
       },
-      "b": {
-        "register": "Vm.8B"
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "EOR"
+        "CNT"
+      ],
+      [
+        "MOVPRFX",
+        "CNT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "veor_s64",
+    "SIMD_ISA": "SVE",
+    "name": "svcnt[_f32]_z",
     "arguments": [
-      "int64x1_t a",
-      "int64x1_t b"
+      "svbool_t pg",
+      "svfloat32_t op"
     ],
     "return_type": {
-      "value": "int64x1_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8B"
+      "op": {
+        "register": "Zop.S"
       },
-      "b": {
-        "register": "Vm.8B"
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "EOR"
+        "MOVPRFX",
+        "CNT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "veor_s8",
+    "SIMD_ISA": "SVE",
+    "name": "svcnt[_f64]_m",
     "arguments": [
-      "int8x8_t a",
-      "int8x8_t b"
+      "svuint64_t inactive",
+      "svbool_t pg",
+      "svfloat64_t op"
     ],
     "return_type": {
-      "value": "int8x8_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8B"
+      "inactive": {
+        "register": "Zinactive.D|Ztied.D"
       },
-      "b": {
-        "register": "Vm.8B"
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "EOR"
+        "CNT"
+      ],
+      [
+        "MOVPRFX",
+        "CNT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "veor_u16",
+    "SIMD_ISA": "SVE",
+    "name": "svcnt[_f64]_x",
     "arguments": [
-      "uint16x4_t a",
-      "uint16x4_t b"
+      "svbool_t pg",
+      "svfloat64_t op"
     ],
     "return_type": {
-      "value": "uint16x4_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8B"
+      "op": {
+        "register": "Zop.D|Ztied.D"
       },
-      "b": {
-        "register": "Vm.8B"
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "EOR"
+        "CNT"
+      ],
+      [
+        "MOVPRFX",
+        "CNT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "veor_u32",
+    "SIMD_ISA": "SVE",
+    "name": "svcnt[_f64]_z",
     "arguments": [
-      "uint32x2_t a",
-      "uint32x2_t b"
+      "svbool_t pg",
+      "svfloat64_t op"
     ],
     "return_type": {
-      "value": "uint32x2_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8B"
+      "op": {
+        "register": "Zop.D"
       },
-      "b": {
-        "register": "Vm.8B"
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "EOR"
+        "MOVPRFX",
+        "CNT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "veor_u64",
+    "SIMD_ISA": "SVE",
+    "name": "svcnt[_s16]_m",
     "arguments": [
-      "uint64x1_t a",
-      "uint64x1_t b"
+      "svuint16_t inactive",
+      "svbool_t pg",
+      "svint16_t op"
     ],
     "return_type": {
-      "value": "uint64x1_t"
+      "value": "svuint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8B"
+      "inactive": {
+        "register": "Zinactive.H|Ztied.H"
       },
-      "b": {
-        "register": "Vm.8B"
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "EOR"
+        "CNT"
+      ],
+      [
+        "MOVPRFX",
+        "CNT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "veor_u8",
+    "SIMD_ISA": "SVE",
+    "name": "svcnt[_s16]_x",
     "arguments": [
-      "uint8x8_t a",
-      "uint8x8_t b"
+      "svbool_t pg",
+      "svint16_t op"
     ],
     "return_type": {
-      "value": "uint8x8_t"
+      "value": "svuint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8B"
+      "op": {
+        "register": "Zop.H|Ztied.H"
       },
-      "b": {
-        "register": "Vm.8B"
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "EOR"
+        "CNT"
+      ],
+      [
+        "MOVPRFX",
+        "CNT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "veorq_s16",
+    "SIMD_ISA": "SVE",
+    "name": "svcnt[_s16]_z",
     "arguments": [
-      "int16x8_t a",
-      "int16x8_t b"
+      "svbool_t pg",
+      "svint16_t op"
     ],
     "return_type": {
-      "value": "int16x8_t"
+      "value": "svuint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
+      "op": {
+        "register": "Zop.H"
       },
-      "b": {
-        "register": "Vm.16B"
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "EOR"
+        "MOVPRFX",
+        "CNT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "veorq_s32",
+    "SIMD_ISA": "SVE",
+    "name": "svcnt[_s32]_m",
     "arguments": [
-      "int32x4_t a",
-      "int32x4_t b"
+      "svuint32_t inactive",
+      "svbool_t pg",
+      "svint32_t op"
     ],
     "return_type": {
-      "value": "int32x4_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
+      "inactive": {
+        "register": "Zinactive.S|Ztied.S"
       },
-      "b": {
-        "register": "Vm.16B"
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "EOR"
+        "CNT"
+      ],
+      [
+        "MOVPRFX",
+        "CNT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "veorq_s64",
+    "SIMD_ISA": "SVE",
+    "name": "svcnt[_s32]_x",
     "arguments": [
-      "int64x2_t a",
-      "int64x2_t b"
+      "svbool_t pg",
+      "svint32_t op"
     ],
     "return_type": {
-      "value": "int64x2_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
+      "op": {
+        "register": "Zop.S|Ztied.S"
       },
-      "b": {
-        "register": "Vm.16B"
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "EOR"
+        "CNT"
+      ],
+      [
+        "MOVPRFX",
+        "CNT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "veorq_s8",
+    "SIMD_ISA": "SVE",
+    "name": "svcnt[_s32]_z",
     "arguments": [
-      "int8x16_t a",
-      "int8x16_t b"
+      "svbool_t pg",
+      "svint32_t op"
     ],
     "return_type": {
-      "value": "int8x16_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
+      "op": {
+        "register": "Zop.S"
       },
-      "b": {
-        "register": "Vm.16B"
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "EOR"
+        "MOVPRFX",
+        "CNT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "veorq_u16",
+    "SIMD_ISA": "SVE",
+    "name": "svcnt[_s64]_m",
     "arguments": [
-      "uint16x8_t a",
-      "uint16x8_t b"
+      "svuint64_t inactive",
+      "svbool_t pg",
+      "svint64_t op"
     ],
     "return_type": {
-      "value": "uint16x8_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
+      "inactive": {
+        "register": "Zinactive.D|Ztied.D"
       },
-      "b": {
-        "register": "Vm.16B"
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "EOR"
+        "CNT"
+      ],
+      [
+        "MOVPRFX",
+        "CNT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "veorq_u32",
+    "SIMD_ISA": "SVE",
+    "name": "svcnt[_s64]_x",
     "arguments": [
-      "uint32x4_t a",
-      "uint32x4_t b"
+      "svbool_t pg",
+      "svint64_t op"
     ],
     "return_type": {
-      "value": "uint32x4_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
+      "op": {
+        "register": "Zop.D|Ztied.D"
       },
-      "b": {
-        "register": "Vm.16B"
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "EOR"
+        "CNT"
+      ],
+      [
+        "MOVPRFX",
+        "CNT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "veorq_u64",
+    "SIMD_ISA": "SVE",
+    "name": "svcnt[_s64]_z",
     "arguments": [
-      "uint64x2_t a",
-      "uint64x2_t b"
+      "svbool_t pg",
+      "svint64_t op"
     ],
     "return_type": {
-      "value": "uint64x2_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
+      "op": {
+        "register": "Zop.D"
       },
-      "b": {
-        "register": "Vm.16B"
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "EOR"
+        "MOVPRFX",
+        "CNT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "veorq_u8",
+    "SIMD_ISA": "SVE",
+    "name": "svcnt[_s8]_m",
     "arguments": [
-      "uint8x16_t a",
-      "uint8x16_t b"
+      "svuint8_t inactive",
+      "svbool_t pg",
+      "svint8_t op"
     ],
     "return_type": {
-      "value": "uint8x16_t"
+      "value": "svuint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
+      "inactive": {
+        "register": "Zinactive.B|Ztied.B"
       },
-      "b": {
-        "register": "Vm.16B"
+      "op": {
+        "register": "Zop.B"
+      },
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "EOR"
+        "CNT"
+      ],
+      [
+        "MOVPRFX",
+        "CNT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vext_f16",
+    "SIMD_ISA": "SVE",
+    "name": "svcnt[_s8]_x",
     "arguments": [
-      "float16x4_t a",
-      "float16x4_t b",
-      "const int n"
+      "svbool_t pg",
+      "svint8_t op"
     ],
     "return_type": {
-      "value": "float16x4_t"
+      "value": "svuint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8B"
-      },
-      "b": {
-        "register": "Vm.8B"
+      "op": {
+        "register": "Zop.B|Ztied.B"
       },
-      "n": {
-        "minimum": 0,
-        "maximum": 3
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "EXT"
+        "CNT"
+      ],
+      [
+        "MOVPRFX",
+        "CNT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vext_f32",
+    "SIMD_ISA": "SVE",
+    "name": "svcnt[_s8]_z",
     "arguments": [
-      "float32x2_t a",
-      "float32x2_t b",
-      "const int n"
+      "svbool_t pg",
+      "svint8_t op"
     ],
     "return_type": {
-      "value": "float32x2_t"
+      "value": "svuint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8B"
-      },
-      "b": {
-        "register": "Vm.8B"
+      "op": {
+        "register": "Zop.B"
       },
-      "n": {
-        "minimum": 0,
-        "maximum": 1
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "EXT"
+        "MOVPRFX",
+        "CNT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vext_f64",
+    "SIMD_ISA": "SVE",
+    "name": "svcnt[_u16]_m",
     "arguments": [
-      "float64x1_t a",
-      "float64x1_t b",
-      "const int n"
+      "svuint16_t inactive",
+      "svbool_t pg",
+      "svuint16_t op"
     ],
     "return_type": {
-      "value": "float64x1_t"
+      "value": "svuint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8B"
+      "inactive": {
+        "register": "Zinactive.H|Ztied.H"
       },
-      "b": {
-        "register": "Vm.8B"
+      "op": {
+        "register": "Zop.H"
       },
-      "n": {
-        "minimum": 0,
-        "maximum": 0
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
@@ -28787,538 +34538,192238 @@
     ],
     "instructions": [
       [
-        "EXT"
+        "CNT"
+      ],
+      [
+        "MOVPRFX",
+        "CNT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vext_p16",
+    "SIMD_ISA": "SVE",
+    "name": "svcnt[_u16]_x",
     "arguments": [
-      "poly16x4_t a",
-      "poly16x4_t b",
-      "const int n"
+      "svbool_t pg",
+      "svuint16_t op"
     ],
     "return_type": {
-      "value": "poly16x4_t"
+      "value": "svuint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8B"
+      "op": {
+        "register": "Zop.H|Ztied.H"
       },
-      "b": {
-        "register": "Vm.8B"
-      },
-      "n": {
-        "minimum": 0,
-        "maximum": 3
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "EXT"
+        "CNT"
+      ],
+      [
+        "MOVPRFX",
+        "CNT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vext_p64",
+    "SIMD_ISA": "SVE",
+    "name": "svcnt[_u16]_z",
     "arguments": [
-      "poly64x1_t a",
-      "poly64x1_t b",
-      "const int n"
+      "svbool_t pg",
+      "svuint16_t op"
     ],
     "return_type": {
-      "value": "poly64x1_t"
+      "value": "svuint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8B"
+      "op": {
+        "register": "Zop.H"
       },
-      "b": {
-        "register": "Vm.8B"
-      },
-      "n": {
-        "minimum": 0,
-        "maximum": 0
+      "pg": {
+        "register": "Pg.H"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "EXT"
+        "MOVPRFX",
+        "CNT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vext_p8",
+    "SIMD_ISA": "SVE",
+    "name": "svcnt[_u32]_m",
     "arguments": [
-      "poly8x8_t a",
-      "poly8x8_t b",
-      "const int n"
+      "svuint32_t inactive",
+      "svbool_t pg",
+      "svuint32_t op"
     ],
     "return_type": {
-      "value": "poly8x8_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8B"
+      "inactive": {
+        "register": "Zinactive.S|Ztied.S"
       },
-      "b": {
-        "register": "Vm.8B"
+      "op": {
+        "register": "Zop.S"
       },
-      "n": {
-        "minimum": 0,
-        "maximum": 7
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "EXT"
+        "CNT"
+      ],
+      [
+        "MOVPRFX",
+        "CNT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vext_s16",
+    "SIMD_ISA": "SVE",
+    "name": "svcnt[_u32]_x",
     "arguments": [
-      "int16x4_t a",
-      "int16x4_t b",
-      "const int n"
+      "svbool_t pg",
+      "svuint32_t op"
     ],
     "return_type": {
-      "value": "int16x4_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8B"
+      "op": {
+        "register": "Zop.S|Ztied.S"
       },
-      "b": {
-        "register": "Vm.8B"
-      },
-      "n": {
-        "minimum": 0,
-        "maximum": 3
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "EXT"
+        "CNT"
+      ],
+      [
+        "MOVPRFX",
+        "CNT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vext_s32",
+    "SIMD_ISA": "SVE",
+    "name": "svcnt[_u32]_z",
     "arguments": [
-      "int32x2_t a",
-      "int32x2_t b",
-      "const int n"
+      "svbool_t pg",
+      "svuint32_t op"
     ],
     "return_type": {
-      "value": "int32x2_t"
+      "value": "svuint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8B"
+      "op": {
+        "register": "Zop.S"
       },
-      "b": {
-        "register": "Vm.8B"
-      },
-      "n": {
-        "minimum": 0,
-        "maximum": 1
+      "pg": {
+        "register": "Pg.S"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "EXT"
+        "MOVPRFX",
+        "CNT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vext_s64",
+    "SIMD_ISA": "SVE",
+    "name": "svcnt[_u64]_m",
     "arguments": [
-      "int64x1_t a",
-      "int64x1_t b",
-      "const int n"
+      "svuint64_t inactive",
+      "svbool_t pg",
+      "svuint64_t op"
     ],
     "return_type": {
-      "value": "int64x1_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8B"
+      "inactive": {
+        "register": "Zinactive.D|Ztied.D"
       },
-      "b": {
-        "register": "Vm.8B"
+      "op": {
+        "register": "Zop.D"
       },
-      "n": {
-        "minimum": 0,
-        "maximum": 0
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "EXT"
+        "CNT"
+      ],
+      [
+        "MOVPRFX",
+        "CNT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vext_s8",
+    "SIMD_ISA": "SVE",
+    "name": "svcnt[_u64]_x",
     "arguments": [
-      "int8x8_t a",
-      "int8x8_t b",
-      "const int n"
+      "svbool_t pg",
+      "svuint64_t op"
     ],
     "return_type": {
-      "value": "int8x8_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8B"
-      },
-      "b": {
-        "register": "Vm.8B"
+      "op": {
+        "register": "Zop.D|Ztied.D"
       },
-      "n": {
-        "minimum": 0,
-        "maximum": 7
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "EXT"
+        "CNT"
+      ],
+      [
+        "MOVPRFX",
+        "CNT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vext_u16",
+    "SIMD_ISA": "SVE",
+    "name": "svcnt[_u64]_z",
     "arguments": [
-      "uint16x4_t a",
-      "uint16x4_t b",
-      "const int n"
+      "svbool_t pg",
+      "svuint64_t op"
     ],
     "return_type": {
-      "value": "uint16x4_t"
+      "value": "svuint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8B"
-      },
-      "b": {
-        "register": "Vm.8B"
+      "op": {
+        "register": "Zop.D"
       },
-      "n": {
-        "minimum": 0,
-        "maximum": 3
+      "pg": {
+        "register": "Pg.D"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "EXT"
+        "MOVPRFX",
+        "CNT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vext_u32",
+    "SIMD_ISA": "SVE",
+    "name": "svcnt[_u8]_m",
     "arguments": [
-      "uint32x2_t a",
-      "uint32x2_t b",
-      "const int n"
+      "svuint8_t inactive",
+      "svbool_t pg",
+      "svuint8_t op"
     ],
     "return_type": {
-      "value": "uint32x2_t"
+      "value": "svuint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8B"
+      "inactive": {
+        "register": "Zinactive.B|Ztied.B"
       },
-      "b": {
-        "register": "Vm.8B"
+      "op": {
+        "register": "Zop.B"
       },
-      "n": {
-        "minimum": 0,
-        "maximum": 1
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "EXT"
+        "CNT"
+      ],
+      [
+        "MOVPRFX",
+        "CNT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vext_u64",
+    "SIMD_ISA": "SVE",
+    "name": "svcnt[_u8]_x",
     "arguments": [
-      "uint64x1_t a",
-      "uint64x1_t b",
-      "const int n"
+      "svbool_t pg",
+      "svuint8_t op"
     ],
     "return_type": {
-      "value": "uint64x1_t"
+      "value": "svuint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8B"
-      },
-      "b": {
-        "register": "Vm.8B"
+      "op": {
+        "register": "Zop.B|Ztied.B"
       },
-      "n": {
-        "minimum": 0,
-        "maximum": 0
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "EXT"
+        "CNT"
+      ],
+      [
+        "MOVPRFX",
+        "CNT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vext_u8",
+    "SIMD_ISA": "SVE",
+    "name": "svcnt[_u8]_z",
     "arguments": [
-      "uint8x8_t a",
-      "uint8x8_t b",
-      "const int n"
+      "svbool_t pg",
+      "svuint8_t op"
     ],
     "return_type": {
-      "value": "uint8x8_t"
+      "value": "svuint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8B"
-      },
-      "b": {
-        "register": "Vm.8B"
+      "op": {
+        "register": "Zop.B"
       },
-      "n": {
-        "minimum": 0,
-        "maximum": 7
+      "pg": {
+        "register": "Pg.B"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "EXT"
+        "MOVPRFX",
+        "CNT"
       ]
     ]
   },
   {
-    "SIMD_ISA": "Neon",
-    "name": "vextq_f16",
-    "arguments": [
-      "float16x8_t a",
-      "float16x8_t b",
-      "const int n"
-    ],
+    "SIMD_ISA": "SVE",
+    "name": "svcntb",
+    "arguments": [],
     "return_type": {
-      "value": "float16x8_t"
-    },
-    "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
-      },
-      "b": {
-        "register": "Vm.16B"
-      },
-      "n": {
-        "minimum": 0,
-        "maximum": 7
-      }
+      "value": "uint64_t"
     },
+    "Arguments_Preparation": {},
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "EXT"
+        "CNTB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcntb_pat",
+    "arguments": [
+      "enum svpattern pattern"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {},
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CNTB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcntd",
+    "arguments": [],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {},
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CNTD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcntd_pat",
+    "arguments": [
+      "enum svpattern pattern"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {},
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CNTD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcnth",
+    "arguments": [],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {},
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CNTH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcnth_pat",
+    "arguments": [
+      "enum svpattern pattern"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {},
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CNTH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcntp_b16",
+    "arguments": [
+      "svbool_t pg",
+      "svbool_t op"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Pop.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CNTP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcntp_b32",
+    "arguments": [
+      "svbool_t pg",
+      "svbool_t op"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Pop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CNTP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcntp_b64",
+    "arguments": [
+      "svbool_t pg",
+      "svbool_t op"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Pop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CNTP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcntp_b8",
+    "arguments": [
+      "svbool_t pg",
+      "svbool_t op"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Pop.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CNTP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcntw",
+    "arguments": [],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {},
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CNTW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcntw_pat",
+    "arguments": [
+      "enum svpattern pattern"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {},
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CNTW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcompact[_f32]",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "COMPACT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcompact[_f64]",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "COMPACT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcompact[_s32]",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "COMPACT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcompact[_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "COMPACT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcompact[_u32]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "COMPACT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcompact[_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "COMPACT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcreate2[_f16]",
+    "arguments": [
+      "svfloat16_t x0",
+      "svfloat16_t x1"
+    ],
+    "return_type": {
+      "value": "svfloat16x2_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcreate2[_f32]",
+    "arguments": [
+      "svfloat32_t x0",
+      "svfloat32_t x1"
+    ],
+    "return_type": {
+      "value": "svfloat32x2_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcreate2[_f64]",
+    "arguments": [
+      "svfloat64_t x0",
+      "svfloat64_t x1"
+    ],
+    "return_type": {
+      "value": "svfloat64x2_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcreate2[_s16]",
+    "arguments": [
+      "svint16_t x0",
+      "svint16_t x1"
+    ],
+    "return_type": {
+      "value": "svint16x2_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcreate2[_s32]",
+    "arguments": [
+      "svint32_t x0",
+      "svint32_t x1"
+    ],
+    "return_type": {
+      "value": "svint32x2_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcreate2[_s64]",
+    "arguments": [
+      "svint64_t x0",
+      "svint64_t x1"
+    ],
+    "return_type": {
+      "value": "svint64x2_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcreate2[_s8]",
+    "arguments": [
+      "svint8_t x0",
+      "svint8_t x1"
+    ],
+    "return_type": {
+      "value": "svint8x2_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcreate2[_u16]",
+    "arguments": [
+      "svuint16_t x0",
+      "svuint16_t x1"
+    ],
+    "return_type": {
+      "value": "svuint16x2_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcreate2[_u32]",
+    "arguments": [
+      "svuint32_t x0",
+      "svuint32_t x1"
+    ],
+    "return_type": {
+      "value": "svuint32x2_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcreate2[_u64]",
+    "arguments": [
+      "svuint64_t x0",
+      "svuint64_t x1"
+    ],
+    "return_type": {
+      "value": "svuint64x2_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcreate2[_u8]",
+    "arguments": [
+      "svuint8_t x0",
+      "svuint8_t x1"
+    ],
+    "return_type": {
+      "value": "svuint8x2_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcreate3[_f16]",
+    "arguments": [
+      "svfloat16_t x0",
+      "svfloat16_t x1",
+      "svfloat16_t x2"
+    ],
+    "return_type": {
+      "value": "svfloat16x3_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcreate3[_f32]",
+    "arguments": [
+      "svfloat32_t x0",
+      "svfloat32_t x1",
+      "svfloat32_t x2"
+    ],
+    "return_type": {
+      "value": "svfloat32x3_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcreate3[_f64]",
+    "arguments": [
+      "svfloat64_t x0",
+      "svfloat64_t x1",
+      "svfloat64_t x2"
+    ],
+    "return_type": {
+      "value": "svfloat64x3_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcreate3[_s16]",
+    "arguments": [
+      "svint16_t x0",
+      "svint16_t x1",
+      "svint16_t x2"
+    ],
+    "return_type": {
+      "value": "svint16x3_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcreate3[_s32]",
+    "arguments": [
+      "svint32_t x0",
+      "svint32_t x1",
+      "svint32_t x2"
+    ],
+    "return_type": {
+      "value": "svint32x3_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcreate3[_s64]",
+    "arguments": [
+      "svint64_t x0",
+      "svint64_t x1",
+      "svint64_t x2"
+    ],
+    "return_type": {
+      "value": "svint64x3_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcreate3[_s8]",
+    "arguments": [
+      "svint8_t x0",
+      "svint8_t x1",
+      "svint8_t x2"
+    ],
+    "return_type": {
+      "value": "svint8x3_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcreate3[_u16]",
+    "arguments": [
+      "svuint16_t x0",
+      "svuint16_t x1",
+      "svuint16_t x2"
+    ],
+    "return_type": {
+      "value": "svuint16x3_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcreate3[_u32]",
+    "arguments": [
+      "svuint32_t x0",
+      "svuint32_t x1",
+      "svuint32_t x2"
+    ],
+    "return_type": {
+      "value": "svuint32x3_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcreate3[_u64]",
+    "arguments": [
+      "svuint64_t x0",
+      "svuint64_t x1",
+      "svuint64_t x2"
+    ],
+    "return_type": {
+      "value": "svuint64x3_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcreate3[_u8]",
+    "arguments": [
+      "svuint8_t x0",
+      "svuint8_t x1",
+      "svuint8_t x2"
+    ],
+    "return_type": {
+      "value": "svuint8x3_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcreate4[_f16]",
+    "arguments": [
+      "svfloat16_t x0",
+      "svfloat16_t x1",
+      "svfloat16_t x2",
+      "svfloat16_t x3"
+    ],
+    "return_type": {
+      "value": "svfloat16x4_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcreate4[_f32]",
+    "arguments": [
+      "svfloat32_t x0",
+      "svfloat32_t x1",
+      "svfloat32_t x2",
+      "svfloat32_t x3"
+    ],
+    "return_type": {
+      "value": "svfloat32x4_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcreate4[_f64]",
+    "arguments": [
+      "svfloat64_t x0",
+      "svfloat64_t x1",
+      "svfloat64_t x2",
+      "svfloat64_t x3"
+    ],
+    "return_type": {
+      "value": "svfloat64x4_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcreate4[_s16]",
+    "arguments": [
+      "svint16_t x0",
+      "svint16_t x1",
+      "svint16_t x2",
+      "svint16_t x3"
+    ],
+    "return_type": {
+      "value": "svint16x4_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcreate4[_s32]",
+    "arguments": [
+      "svint32_t x0",
+      "svint32_t x1",
+      "svint32_t x2",
+      "svint32_t x3"
+    ],
+    "return_type": {
+      "value": "svint32x4_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcreate4[_s64]",
+    "arguments": [
+      "svint64_t x0",
+      "svint64_t x1",
+      "svint64_t x2",
+      "svint64_t x3"
+    ],
+    "return_type": {
+      "value": "svint64x4_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcreate4[_s8]",
+    "arguments": [
+      "svint8_t x0",
+      "svint8_t x1",
+      "svint8_t x2",
+      "svint8_t x3"
+    ],
+    "return_type": {
+      "value": "svint8x4_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcreate4[_u16]",
+    "arguments": [
+      "svuint16_t x0",
+      "svuint16_t x1",
+      "svuint16_t x2",
+      "svuint16_t x3"
+    ],
+    "return_type": {
+      "value": "svuint16x4_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcreate4[_u32]",
+    "arguments": [
+      "svuint32_t x0",
+      "svuint32_t x1",
+      "svuint32_t x2",
+      "svuint32_t x3"
+    ],
+    "return_type": {
+      "value": "svuint32x4_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcreate4[_u64]",
+    "arguments": [
+      "svuint64_t x0",
+      "svuint64_t x1",
+      "svuint64_t x2",
+      "svuint64_t x3"
+    ],
+    "return_type": {
+      "value": "svuint64x4_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcreate4[_u8]",
+    "arguments": [
+      "svuint8_t x0",
+      "svuint8_t x1",
+      "svuint8_t x2",
+      "svuint8_t x3"
+    ],
+    "return_type": {
+      "value": "svuint8x4_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_f16[_f32]_m",
+    "arguments": [
+      "svfloat16_t inactive",
+      "svbool_t pg",
+      "svfloat32_t op"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.H|Ztied.H"
+      },
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVT"
+      ],
+      [
+        "MOVPRFX",
+        "FCVT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_f16[_f32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S|Ztied.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVT"
+      ],
+      [
+        "MOVPRFX",
+        "FCVT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_f16[_f32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FCVT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_f16[_f64]_m",
+    "arguments": [
+      "svfloat16_t inactive",
+      "svbool_t pg",
+      "svfloat64_t op"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.H|Ztied.H"
+      },
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVT"
+      ],
+      [
+        "MOVPRFX",
+        "FCVT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_f16[_f64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D|Ztied.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVT"
+      ],
+      [
+        "MOVPRFX",
+        "FCVT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_f16[_f64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FCVT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_f16[_s16]_m",
+    "arguments": [
+      "svfloat16_t inactive",
+      "svbool_t pg",
+      "svint16_t op"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.H|Ztied.H"
+      },
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SCVTF"
+      ],
+      [
+        "MOVPRFX",
+        "SCVTF"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_f16[_s16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H|Ztied.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SCVTF"
+      ],
+      [
+        "MOVPRFX",
+        "SCVTF"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_f16[_s16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SCVTF"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_f16[_s32]_m",
+    "arguments": [
+      "svfloat16_t inactive",
+      "svbool_t pg",
+      "svint32_t op"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.H|Ztied.H"
+      },
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SCVTF"
+      ],
+      [
+        "MOVPRFX",
+        "SCVTF"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_f16[_s32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S|Ztied.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SCVTF"
+      ],
+      [
+        "MOVPRFX",
+        "SCVTF"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_f16[_s32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SCVTF"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_f16[_s64]_m",
+    "arguments": [
+      "svfloat16_t inactive",
+      "svbool_t pg",
+      "svint64_t op"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.H|Ztied.H"
+      },
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SCVTF"
+      ],
+      [
+        "MOVPRFX",
+        "SCVTF"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_f16[_s64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D|Ztied.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SCVTF"
+      ],
+      [
+        "MOVPRFX",
+        "SCVTF"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_f16[_s64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SCVTF"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_f16[_u16]_m",
+    "arguments": [
+      "svfloat16_t inactive",
+      "svbool_t pg",
+      "svuint16_t op"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.H|Ztied.H"
+      },
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UCVTF"
+      ],
+      [
+        "MOVPRFX",
+        "UCVTF"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_f16[_u16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H|Ztied.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UCVTF"
+      ],
+      [
+        "MOVPRFX",
+        "UCVTF"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_f16[_u16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UCVTF"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_f16[_u32]_m",
+    "arguments": [
+      "svfloat16_t inactive",
+      "svbool_t pg",
+      "svuint32_t op"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.H|Ztied.H"
+      },
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UCVTF"
+      ],
+      [
+        "MOVPRFX",
+        "UCVTF"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_f16[_u32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S|Ztied.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UCVTF"
+      ],
+      [
+        "MOVPRFX",
+        "UCVTF"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_f16[_u32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UCVTF"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_f16[_u64]_m",
+    "arguments": [
+      "svfloat16_t inactive",
+      "svbool_t pg",
+      "svuint64_t op"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.H|Ztied.H"
+      },
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UCVTF"
+      ],
+      [
+        "MOVPRFX",
+        "UCVTF"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_f16[_u64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D|Ztied.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UCVTF"
+      ],
+      [
+        "MOVPRFX",
+        "UCVTF"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_f16[_u64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UCVTF"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_f32[_f16]_m",
+    "arguments": [
+      "svfloat32_t inactive",
+      "svbool_t pg",
+      "svfloat16_t op"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.S|Ztied.S"
+      },
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVT"
+      ],
+      [
+        "MOVPRFX",
+        "FCVT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_f32[_f16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H|Ztied.H"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVT"
+      ],
+      [
+        "MOVPRFX",
+        "FCVT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_f32[_f16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FCVT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_f32[_f64]_m",
+    "arguments": [
+      "svfloat32_t inactive",
+      "svbool_t pg",
+      "svfloat64_t op"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.S|Ztied.S"
+      },
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVT"
+      ],
+      [
+        "MOVPRFX",
+        "FCVT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_f32[_f64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D|Ztied.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVT"
+      ],
+      [
+        "MOVPRFX",
+        "FCVT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_f32[_f64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FCVT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_f32[_s32]_m",
+    "arguments": [
+      "svfloat32_t inactive",
+      "svbool_t pg",
+      "svint32_t op"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.S|Ztied.S"
+      },
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SCVTF"
+      ],
+      [
+        "MOVPRFX",
+        "SCVTF"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_f32[_s32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S|Ztied.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SCVTF"
+      ],
+      [
+        "MOVPRFX",
+        "SCVTF"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_f32[_s32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SCVTF"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_f32[_s64]_m",
+    "arguments": [
+      "svfloat32_t inactive",
+      "svbool_t pg",
+      "svint64_t op"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.S|Ztied.S"
+      },
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SCVTF"
+      ],
+      [
+        "MOVPRFX",
+        "SCVTF"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_f32[_s64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D|Ztied.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SCVTF"
+      ],
+      [
+        "MOVPRFX",
+        "SCVTF"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_f32[_s64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SCVTF"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_f32[_u32]_m",
+    "arguments": [
+      "svfloat32_t inactive",
+      "svbool_t pg",
+      "svuint32_t op"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.S|Ztied.S"
+      },
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UCVTF"
+      ],
+      [
+        "MOVPRFX",
+        "UCVTF"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_f32[_u32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S|Ztied.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UCVTF"
+      ],
+      [
+        "MOVPRFX",
+        "UCVTF"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_f32[_u32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UCVTF"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_f32[_u64]_m",
+    "arguments": [
+      "svfloat32_t inactive",
+      "svbool_t pg",
+      "svuint64_t op"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.S|Ztied.S"
+      },
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UCVTF"
+      ],
+      [
+        "MOVPRFX",
+        "UCVTF"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_f32[_u64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D|Ztied.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UCVTF"
+      ],
+      [
+        "MOVPRFX",
+        "UCVTF"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_f32[_u64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UCVTF"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_f64[_f16]_m",
+    "arguments": [
+      "svfloat64_t inactive",
+      "svbool_t pg",
+      "svfloat16_t op"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.D|Ztied.D"
+      },
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVT"
+      ],
+      [
+        "MOVPRFX",
+        "FCVT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_f64[_f16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H|Ztied.H"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVT"
+      ],
+      [
+        "MOVPRFX",
+        "FCVT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_f64[_f16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FCVT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_f64[_f32]_m",
+    "arguments": [
+      "svfloat64_t inactive",
+      "svbool_t pg",
+      "svfloat32_t op"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.D|Ztied.D"
+      },
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVT"
+      ],
+      [
+        "MOVPRFX",
+        "FCVT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_f64[_f32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S|Ztied.S"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVT"
+      ],
+      [
+        "MOVPRFX",
+        "FCVT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_f64[_f32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FCVT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_f64[_s32]_m",
+    "arguments": [
+      "svfloat64_t inactive",
+      "svbool_t pg",
+      "svint32_t op"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.D|Ztied.D"
+      },
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SCVTF"
+      ],
+      [
+        "MOVPRFX",
+        "SCVTF"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_f64[_s32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S|Ztied.S"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SCVTF"
+      ],
+      [
+        "MOVPRFX",
+        "SCVTF"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_f64[_s32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SCVTF"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_f64[_s64]_m",
+    "arguments": [
+      "svfloat64_t inactive",
+      "svbool_t pg",
+      "svint64_t op"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.D|Ztied.D"
+      },
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SCVTF"
+      ],
+      [
+        "MOVPRFX",
+        "SCVTF"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_f64[_s64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D|Ztied.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SCVTF"
+      ],
+      [
+        "MOVPRFX",
+        "SCVTF"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_f64[_s64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SCVTF"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_f64[_u32]_m",
+    "arguments": [
+      "svfloat64_t inactive",
+      "svbool_t pg",
+      "svuint32_t op"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.D|Ztied.D"
+      },
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UCVTF"
+      ],
+      [
+        "MOVPRFX",
+        "UCVTF"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_f64[_u32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S|Ztied.S"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UCVTF"
+      ],
+      [
+        "MOVPRFX",
+        "UCVTF"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_f64[_u32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UCVTF"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_f64[_u64]_m",
+    "arguments": [
+      "svfloat64_t inactive",
+      "svbool_t pg",
+      "svuint64_t op"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.D|Ztied.D"
+      },
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UCVTF"
+      ],
+      [
+        "MOVPRFX",
+        "UCVTF"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_f64[_u64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D|Ztied.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UCVTF"
+      ],
+      [
+        "MOVPRFX",
+        "UCVTF"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_f64[_u64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UCVTF"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_s16[_f16]_m",
+    "arguments": [
+      "svint16_t inactive",
+      "svbool_t pg",
+      "svfloat16_t op"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.H|Ztied.H"
+      },
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTZS"
+      ],
+      [
+        "MOVPRFX",
+        "FCVTZS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_s16[_f16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H|Ztied.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTZS"
+      ],
+      [
+        "MOVPRFX",
+        "FCVTZS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_s16[_f16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FCVTZS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_s32[_f16]_m",
+    "arguments": [
+      "svint32_t inactive",
+      "svbool_t pg",
+      "svfloat16_t op"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.S|Ztied.S"
+      },
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTZS"
+      ],
+      [
+        "MOVPRFX",
+        "FCVTZS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_s32[_f16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H|Ztied.H"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTZS"
+      ],
+      [
+        "MOVPRFX",
+        "FCVTZS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_s32[_f16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FCVTZS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_s32[_f32]_m",
+    "arguments": [
+      "svint32_t inactive",
+      "svbool_t pg",
+      "svfloat32_t op"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.S|Ztied.S"
+      },
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTZS"
+      ],
+      [
+        "MOVPRFX",
+        "FCVTZS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_s32[_f32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S|Ztied.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTZS"
+      ],
+      [
+        "MOVPRFX",
+        "FCVTZS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_s32[_f32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FCVTZS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_s32[_f64]_m",
+    "arguments": [
+      "svint32_t inactive",
+      "svbool_t pg",
+      "svfloat64_t op"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.S|Ztied.S"
+      },
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTZS"
+      ],
+      [
+        "MOVPRFX",
+        "FCVTZS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_s32[_f64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D|Ztied.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTZS"
+      ],
+      [
+        "MOVPRFX",
+        "FCVTZS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_s32[_f64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FCVTZS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_s64[_f16]_m",
+    "arguments": [
+      "svint64_t inactive",
+      "svbool_t pg",
+      "svfloat16_t op"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.D|Ztied.D"
+      },
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTZS"
+      ],
+      [
+        "MOVPRFX",
+        "FCVTZS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_s64[_f16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H|Ztied.H"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTZS"
+      ],
+      [
+        "MOVPRFX",
+        "FCVTZS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_s64[_f16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FCVTZS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_s64[_f32]_m",
+    "arguments": [
+      "svint64_t inactive",
+      "svbool_t pg",
+      "svfloat32_t op"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.D|Ztied.D"
+      },
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTZS"
+      ],
+      [
+        "MOVPRFX",
+        "FCVTZS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_s64[_f32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S|Ztied.S"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTZS"
+      ],
+      [
+        "MOVPRFX",
+        "FCVTZS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_s64[_f32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FCVTZS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_s64[_f64]_m",
+    "arguments": [
+      "svint64_t inactive",
+      "svbool_t pg",
+      "svfloat64_t op"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.D|Ztied.D"
+      },
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTZS"
+      ],
+      [
+        "MOVPRFX",
+        "FCVTZS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_s64[_f64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D|Ztied.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTZS"
+      ],
+      [
+        "MOVPRFX",
+        "FCVTZS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_s64[_f64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FCVTZS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_u16[_f16]_m",
+    "arguments": [
+      "svuint16_t inactive",
+      "svbool_t pg",
+      "svfloat16_t op"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.H|Ztied.H"
+      },
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTZU"
+      ],
+      [
+        "MOVPRFX",
+        "FCVTZU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_u16[_f16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H|Ztied.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTZU"
+      ],
+      [
+        "MOVPRFX",
+        "FCVTZU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_u16[_f16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FCVTZU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_u32[_f16]_m",
+    "arguments": [
+      "svuint32_t inactive",
+      "svbool_t pg",
+      "svfloat16_t op"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.S|Ztied.S"
+      },
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTZU"
+      ],
+      [
+        "MOVPRFX",
+        "FCVTZU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_u32[_f16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H|Ztied.H"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTZU"
+      ],
+      [
+        "MOVPRFX",
+        "FCVTZU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_u32[_f16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FCVTZU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_u32[_f32]_m",
+    "arguments": [
+      "svuint32_t inactive",
+      "svbool_t pg",
+      "svfloat32_t op"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.S|Ztied.S"
+      },
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTZU"
+      ],
+      [
+        "MOVPRFX",
+        "FCVTZU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_u32[_f32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S|Ztied.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTZU"
+      ],
+      [
+        "MOVPRFX",
+        "FCVTZU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_u32[_f32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FCVTZU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_u32[_f64]_m",
+    "arguments": [
+      "svuint32_t inactive",
+      "svbool_t pg",
+      "svfloat64_t op"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.S|Ztied.S"
+      },
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTZU"
+      ],
+      [
+        "MOVPRFX",
+        "FCVTZU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_u32[_f64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D|Ztied.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTZU"
+      ],
+      [
+        "MOVPRFX",
+        "FCVTZU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_u32[_f64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FCVTZU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_u64[_f16]_m",
+    "arguments": [
+      "svuint64_t inactive",
+      "svbool_t pg",
+      "svfloat16_t op"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.D|Ztied.D"
+      },
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTZU"
+      ],
+      [
+        "MOVPRFX",
+        "FCVTZU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_u64[_f16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H|Ztied.H"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTZU"
+      ],
+      [
+        "MOVPRFX",
+        "FCVTZU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_u64[_f16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FCVTZU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_u64[_f32]_m",
+    "arguments": [
+      "svuint64_t inactive",
+      "svbool_t pg",
+      "svfloat32_t op"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.D|Ztied.D"
+      },
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTZU"
+      ],
+      [
+        "MOVPRFX",
+        "FCVTZU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_u64[_f32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S|Ztied.S"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTZU"
+      ],
+      [
+        "MOVPRFX",
+        "FCVTZU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_u64[_f32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FCVTZU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_u64[_f64]_m",
+    "arguments": [
+      "svuint64_t inactive",
+      "svbool_t pg",
+      "svfloat64_t op"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.D|Ztied.D"
+      },
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTZU"
+      ],
+      [
+        "MOVPRFX",
+        "FCVTZU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_u64[_f64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D|Ztied.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTZU"
+      ],
+      [
+        "MOVPRFX",
+        "FCVTZU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svcvt_u64[_f64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FCVTZU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svcvtlt_f32[_f16]_m",
+    "arguments": [
+      "svfloat32_t inactive",
+      "svbool_t pg",
+      "svfloat16_t op"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Ztied.S"
+      },
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svcvtlt_f32[_f16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Ztied.H"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svcvtlt_f64[_f32]_m",
+    "arguments": [
+      "svfloat64_t inactive",
+      "svbool_t pg",
+      "svfloat32_t op"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Ztied.D"
+      },
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svcvtlt_f64[_f32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Ztied.S"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svcvtnt_f16[_f32]_m",
+    "arguments": [
+      "svfloat16_t even",
+      "svbool_t pg",
+      "svfloat32_t op"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "even": {
+        "register": "Ztied.H"
+      },
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTNT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svcvtnt_f16[_f32]_x",
+    "arguments": [
+      "svfloat16_t even",
+      "svbool_t pg",
+      "svfloat32_t op"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "even": {
+        "register": "Ztied.H"
+      },
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTNT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svcvtnt_f32[_f64]_m",
+    "arguments": [
+      "svfloat32_t even",
+      "svbool_t pg",
+      "svfloat64_t op"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "even": {
+        "register": "Ztied.S"
+      },
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTNT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svcvtnt_f32[_f64]_x",
+    "arguments": [
+      "svfloat32_t even",
+      "svbool_t pg",
+      "svfloat64_t op"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "even": {
+        "register": "Ztied.S"
+      },
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTNT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svcvtx_f32[_f64]_m",
+    "arguments": [
+      "svfloat32_t inactive",
+      "svbool_t pg",
+      "svfloat64_t op"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.S|Ztied.S"
+      },
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTX"
+      ],
+      [
+        "MOVPRFX",
+        "FCVTX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svcvtx_f32[_f64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D|Ztied.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTX"
+      ],
+      [
+        "MOVPRFX",
+        "FCVTX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svcvtx_f32[_f64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FCVTX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svcvtxnt_f32[_f64]_m",
+    "arguments": [
+      "svfloat32_t even",
+      "svbool_t pg",
+      "svfloat64_t op"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "even": {
+        "register": "Ztied.S"
+      },
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTXNT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svcvtxnt_f32[_f64]_x",
+    "arguments": [
+      "svfloat32_t even",
+      "svbool_t pg",
+      "svfloat64_t op"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "even": {
+        "register": "Ztied.S"
+      },
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTXNT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdiv[_f16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FDIV"
+      ],
+      [
+        "MOVPRFX",
+        "FDIV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdiv[_f16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H|Ztied2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FDIV"
+      ],
+      [
+        "FDIVR"
+      ],
+      [
+        "MOVPRFX",
+        "FDIV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdiv[_f16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FDIV"
+      ],
+      [
+        "MOVPRFX",
+        "FDIVR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdiv[_f32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FDIV"
+      ],
+      [
+        "MOVPRFX",
+        "FDIV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdiv[_f32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FDIV"
+      ],
+      [
+        "FDIVR"
+      ],
+      [
+        "MOVPRFX",
+        "FDIV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdiv[_f32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FDIV"
+      ],
+      [
+        "MOVPRFX",
+        "FDIVR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdiv[_f64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FDIV"
+      ],
+      [
+        "MOVPRFX",
+        "FDIV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdiv[_f64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FDIV"
+      ],
+      [
+        "FDIVR"
+      ],
+      [
+        "MOVPRFX",
+        "FDIV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdiv[_f64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FDIV"
+      ],
+      [
+        "MOVPRFX",
+        "FDIVR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdiv[_n_f16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "float16_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FDIV"
+      ],
+      [
+        "MOVPRFX",
+        "FDIV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdiv[_n_f16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "float16_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]|Ztied2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FDIV"
+      ],
+      [
+        "FDIVR"
+      ],
+      [
+        "MOVPRFX",
+        "FDIV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdiv[_n_f16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "float16_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FDIV"
+      ],
+      [
+        "MOVPRFX",
+        "FDIVR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdiv[_n_f32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "float32_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FDIV"
+      ],
+      [
+        "MOVPRFX",
+        "FDIV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdiv[_n_f32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "float32_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]|Ztied2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FDIV"
+      ],
+      [
+        "FDIVR"
+      ],
+      [
+        "MOVPRFX",
+        "FDIV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdiv[_n_f32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "float32_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FDIV"
+      ],
+      [
+        "MOVPRFX",
+        "FDIVR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdiv[_n_f64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "float64_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FDIV"
+      ],
+      [
+        "MOVPRFX",
+        "FDIV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdiv[_n_f64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "float64_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]|Ztied2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FDIV"
+      ],
+      [
+        "FDIVR"
+      ],
+      [
+        "MOVPRFX",
+        "FDIV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdiv[_n_f64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "float64_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FDIV"
+      ],
+      [
+        "MOVPRFX",
+        "FDIVR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdiv[_n_s32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SDIV"
+      ],
+      [
+        "MOVPRFX",
+        "SDIV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdiv[_n_s32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]|Ztied2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SDIV"
+      ],
+      [
+        "SDIVR"
+      ],
+      [
+        "MOVPRFX",
+        "SDIV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdiv[_n_s32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SDIV"
+      ],
+      [
+        "MOVPRFX",
+        "SDIVR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdiv[_n_s64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SDIV"
+      ],
+      [
+        "MOVPRFX",
+        "SDIV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdiv[_n_s64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]|Ztied2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SDIV"
+      ],
+      [
+        "SDIVR"
+      ],
+      [
+        "MOVPRFX",
+        "SDIV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdiv[_n_s64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SDIV"
+      ],
+      [
+        "MOVPRFX",
+        "SDIVR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdiv[_n_u32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UDIV"
+      ],
+      [
+        "MOVPRFX",
+        "UDIV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdiv[_n_u32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]|Ztied2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UDIV"
+      ],
+      [
+        "UDIVR"
+      ],
+      [
+        "MOVPRFX",
+        "UDIV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdiv[_n_u32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UDIV"
+      ],
+      [
+        "MOVPRFX",
+        "UDIVR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdiv[_n_u64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UDIV"
+      ],
+      [
+        "MOVPRFX",
+        "UDIV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdiv[_n_u64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]|Ztied2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UDIV"
+      ],
+      [
+        "UDIVR"
+      ],
+      [
+        "MOVPRFX",
+        "UDIV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdiv[_n_u64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UDIV"
+      ],
+      [
+        "MOVPRFX",
+        "UDIVR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdiv[_s32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SDIV"
+      ],
+      [
+        "MOVPRFX",
+        "SDIV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdiv[_s32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SDIV"
+      ],
+      [
+        "SDIVR"
+      ],
+      [
+        "MOVPRFX",
+        "SDIV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdiv[_s32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SDIV"
+      ],
+      [
+        "MOVPRFX",
+        "SDIVR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdiv[_s64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SDIV"
+      ],
+      [
+        "MOVPRFX",
+        "SDIV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdiv[_s64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SDIV"
+      ],
+      [
+        "SDIVR"
+      ],
+      [
+        "MOVPRFX",
+        "SDIV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdiv[_s64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SDIV"
+      ],
+      [
+        "MOVPRFX",
+        "SDIVR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdiv[_u32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UDIV"
+      ],
+      [
+        "MOVPRFX",
+        "UDIV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdiv[_u32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UDIV"
+      ],
+      [
+        "UDIVR"
+      ],
+      [
+        "MOVPRFX",
+        "UDIV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdiv[_u32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UDIV"
+      ],
+      [
+        "MOVPRFX",
+        "UDIVR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdiv[_u64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UDIV"
+      ],
+      [
+        "MOVPRFX",
+        "UDIV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdiv[_u64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UDIV"
+      ],
+      [
+        "UDIVR"
+      ],
+      [
+        "MOVPRFX",
+        "UDIV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdiv[_u64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UDIV"
+      ],
+      [
+        "MOVPRFX",
+        "UDIVR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdivr[_f16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FDIVR"
+      ],
+      [
+        "MOVPRFX",
+        "FDIVR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdivr[_f16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H|Ztied2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FDIVR"
+      ],
+      [
+        "FDIV"
+      ],
+      [
+        "MOVPRFX",
+        "FDIVR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdivr[_f16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FDIVR"
+      ],
+      [
+        "MOVPRFX",
+        "FDIV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdivr[_f32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FDIVR"
+      ],
+      [
+        "MOVPRFX",
+        "FDIVR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdivr[_f32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FDIVR"
+      ],
+      [
+        "FDIV"
+      ],
+      [
+        "MOVPRFX",
+        "FDIVR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdivr[_f32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FDIVR"
+      ],
+      [
+        "MOVPRFX",
+        "FDIV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdivr[_f64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FDIVR"
+      ],
+      [
+        "MOVPRFX",
+        "FDIVR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdivr[_f64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FDIVR"
+      ],
+      [
+        "FDIV"
+      ],
+      [
+        "MOVPRFX",
+        "FDIVR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdivr[_f64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FDIVR"
+      ],
+      [
+        "MOVPRFX",
+        "FDIV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdivr[_n_f16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "float16_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FDIVR"
+      ],
+      [
+        "MOVPRFX",
+        "FDIVR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdivr[_n_f16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "float16_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]|Ztied2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FDIVR"
+      ],
+      [
+        "FDIV"
+      ],
+      [
+        "MOVPRFX",
+        "FDIVR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdivr[_n_f16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "float16_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FDIVR"
+      ],
+      [
+        "MOVPRFX",
+        "FDIV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdivr[_n_f32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "float32_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FDIVR"
+      ],
+      [
+        "MOVPRFX",
+        "FDIVR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdivr[_n_f32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "float32_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]|Ztied2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FDIVR"
+      ],
+      [
+        "FDIV"
+      ],
+      [
+        "MOVPRFX",
+        "FDIVR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdivr[_n_f32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "float32_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FDIVR"
+      ],
+      [
+        "MOVPRFX",
+        "FDIV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdivr[_n_f64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "float64_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FDIVR"
+      ],
+      [
+        "MOVPRFX",
+        "FDIVR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdivr[_n_f64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "float64_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]|Ztied2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FDIVR"
+      ],
+      [
+        "FDIV"
+      ],
+      [
+        "MOVPRFX",
+        "FDIVR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdivr[_n_f64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "float64_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FDIVR"
+      ],
+      [
+        "MOVPRFX",
+        "FDIV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdivr[_n_s32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SDIVR"
+      ],
+      [
+        "MOVPRFX",
+        "SDIVR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdivr[_n_s32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]|Ztied2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SDIVR"
+      ],
+      [
+        "SDIV"
+      ],
+      [
+        "MOVPRFX",
+        "SDIVR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdivr[_n_s32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SDIVR"
+      ],
+      [
+        "MOVPRFX",
+        "SDIV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdivr[_n_s64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SDIVR"
+      ],
+      [
+        "MOVPRFX",
+        "SDIVR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdivr[_n_s64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]|Ztied2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SDIVR"
+      ],
+      [
+        "SDIV"
+      ],
+      [
+        "MOVPRFX",
+        "SDIVR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdivr[_n_s64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SDIVR"
+      ],
+      [
+        "MOVPRFX",
+        "SDIV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdivr[_n_u32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UDIVR"
+      ],
+      [
+        "MOVPRFX",
+        "UDIVR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdivr[_n_u32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]|Ztied2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UDIVR"
+      ],
+      [
+        "UDIV"
+      ],
+      [
+        "MOVPRFX",
+        "UDIVR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdivr[_n_u32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UDIVR"
+      ],
+      [
+        "MOVPRFX",
+        "UDIV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdivr[_n_u64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UDIVR"
+      ],
+      [
+        "MOVPRFX",
+        "UDIVR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdivr[_n_u64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]|Ztied2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UDIVR"
+      ],
+      [
+        "UDIV"
+      ],
+      [
+        "MOVPRFX",
+        "UDIVR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdivr[_n_u64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UDIVR"
+      ],
+      [
+        "MOVPRFX",
+        "UDIV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdivr[_s32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SDIVR"
+      ],
+      [
+        "MOVPRFX",
+        "SDIVR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdivr[_s32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SDIVR"
+      ],
+      [
+        "SDIV"
+      ],
+      [
+        "MOVPRFX",
+        "SDIVR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdivr[_s32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SDIVR"
+      ],
+      [
+        "MOVPRFX",
+        "SDIV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdivr[_s64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SDIVR"
+      ],
+      [
+        "MOVPRFX",
+        "SDIVR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdivr[_s64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SDIVR"
+      ],
+      [
+        "SDIV"
+      ],
+      [
+        "MOVPRFX",
+        "SDIVR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdivr[_s64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SDIVR"
+      ],
+      [
+        "MOVPRFX",
+        "SDIV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdivr[_u32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UDIVR"
+      ],
+      [
+        "MOVPRFX",
+        "UDIVR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdivr[_u32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UDIVR"
+      ],
+      [
+        "UDIV"
+      ],
+      [
+        "MOVPRFX",
+        "UDIVR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdivr[_u32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UDIVR"
+      ],
+      [
+        "MOVPRFX",
+        "UDIV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdivr[_u64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UDIVR"
+      ],
+      [
+        "MOVPRFX",
+        "UDIVR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdivr[_u64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UDIVR"
+      ],
+      [
+        "UDIV"
+      ],
+      [
+        "MOVPRFX",
+        "UDIVR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdivr[_u64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UDIVR"
+      ],
+      [
+        "MOVPRFX",
+        "UDIV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svdot[_f32_f16]",
+    "arguments": [
+      "svfloat32_t zda",
+      "svfloat16_t zn",
+      "svfloat16_t zm"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "zda": {
+        "register": "Zreg1.S"
+      },
+      "zm": {
+        "register": "Zreg3.H"
+      },
+      "zn": {
+        "register": "Zreg2.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FDOT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdot[_n_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "svint8_t op2",
+      "int8_t op3"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SDOT"
+      ],
+      [
+        "MOVPRFX",
+        "SDOT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdot[_n_s64]",
+    "arguments": [
+      "svint64_t op1",
+      "svint16_t op2",
+      "int16_t op3"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SDOT"
+      ],
+      [
+        "MOVPRFX",
+        "SDOT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdot[_n_u32]",
+    "arguments": [
+      "svuint32_t op1",
+      "svuint8_t op2",
+      "uint8_t op3"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UDOT"
+      ],
+      [
+        "MOVPRFX",
+        "UDOT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdot[_n_u64]",
+    "arguments": [
+      "svuint64_t op1",
+      "svuint16_t op2",
+      "uint16_t op3"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UDOT"
+      ],
+      [
+        "MOVPRFX",
+        "UDOT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdot[_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "svint8_t op2",
+      "svint8_t op3"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SDOT"
+      ],
+      [
+        "MOVPRFX",
+        "SDOT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svdot[_s32_s16]",
+    "arguments": [
+      "svint32_t zda",
+      "svint16_t zn",
+      "svint16_t zm"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "zda": {
+        "register": "Zreg1.S"
+      },
+      "zm": {
+        "register": "Zreg3.H"
+      },
+      "zn": {
+        "register": "Zreg2.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SDOT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdot[_s64]",
+    "arguments": [
+      "svint64_t op1",
+      "svint16_t op2",
+      "svint16_t op3"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SDOT"
+      ],
+      [
+        "MOVPRFX",
+        "SDOT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdot[_u32]",
+    "arguments": [
+      "svuint32_t op1",
+      "svuint8_t op2",
+      "svuint8_t op3"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UDOT"
+      ],
+      [
+        "MOVPRFX",
+        "UDOT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svdot[_u32_u16]",
+    "arguments": [
+      "svuint32_t zda",
+      "svuint16_t zn",
+      "svuint16_t zm"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "zda": {
+        "register": "Zreg1.S"
+      },
+      "zm": {
+        "register": "Zreg3.H"
+      },
+      "zn": {
+        "register": "Zreg2.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UDOT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdot[_u64]",
+    "arguments": [
+      "svuint64_t op1",
+      "svuint16_t op2",
+      "svuint16_t op3"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UDOT"
+      ],
+      [
+        "MOVPRFX",
+        "UDOT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svdot_lane[_f32_f16]",
+    "arguments": [
+      "svfloat32_t zda",
+      "svfloat16_t zn",
+      "svfloat16_t zm",
+      "uint64_t imm_idx"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "imm_idx": {
+        "immediate": "imm1"
+      },
+      "zda": {
+        "register": "Zreg1.S"
+      },
+      "zm": {
+        "register": "Zreg3.H"
+      },
+      "zn": {
+        "register": "Zreg2.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FDOT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdot_lane[_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "svint8_t op2",
+      "svint8_t op3",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "imm_index": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SDOT"
+      ],
+      [
+        "MOVPRFX",
+        "SDOT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svdot_lane[_s32_s16]",
+    "arguments": [
+      "svint32_t zda",
+      "svint16_t zn",
+      "svint16_t zm",
+      "uint64_t imm_idx"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "imm_idx": {
+        "immediate": "imm1"
+      },
+      "zda": {
+        "register": "Zreg1.S"
+      },
+      "zm": {
+        "register": "Zreg3.H"
+      },
+      "zn": {
+        "register": "Zreg2.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SDOT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdot_lane[_s64]",
+    "arguments": [
+      "svint64_t op1",
+      "svint16_t op2",
+      "svint16_t op3",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "imm_index": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SDOT"
+      ],
+      [
+        "MOVPRFX",
+        "SDOT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdot_lane[_u32]",
+    "arguments": [
+      "svuint32_t op1",
+      "svuint8_t op2",
+      "svuint8_t op3",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "imm_index": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UDOT"
+      ],
+      [
+        "MOVPRFX",
+        "UDOT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svdot_lane[_u32_u16]",
+    "arguments": [
+      "svuint32_t zda",
+      "svuint16_t zn",
+      "svuint16_t zm",
+      "uint64_t imm_idx"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "imm_idx": {
+        "immediate": "imm1"
+      },
+      "zda": {
+        "register": "Zreg1.S"
+      },
+      "zm": {
+        "register": "Zreg3.H"
+      },
+      "zn": {
+        "register": "Zreg2.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UDOT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdot_lane[_u64]",
+    "arguments": [
+      "svuint64_t op1",
+      "svuint16_t op2",
+      "svuint16_t op3",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "imm_index": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UDOT"
+      ],
+      [
+        "MOVPRFX",
+        "UDOT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdup[_n]_b16",
+    "arguments": [
+      "bool op"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdup[_n]_b32",
+    "arguments": [
+      "bool op"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdup[_n]_b64",
+    "arguments": [
+      "bool op"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdup[_n]_b8",
+    "arguments": [
+      "bool op"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdup[_n]_f16",
+    "arguments": [
+      "float16_t op"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Hop|Wop"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ],
+      [
+        "FDUP"
+      ],
+      [
+        "DUP"
+      ],
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdup[_n]_f16_m",
+    "arguments": [
+      "svfloat16_t inactive",
+      "svbool_t pg",
+      "float16_t op"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Ztied.H"
+      },
+      "op": {
+        "register": "Hop|Wop"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CPY"
+      ],
+      [
+        "FCPY"
+      ],
+      [
+        "CPY"
+      ],
+      [
+        "CPY"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdup[_n]_f16_x",
+    "arguments": [
+      "svbool_t pg",
+      "float16_t op"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Hop|Wop"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CPY"
+      ],
+      [
+        "DUP"
+      ],
+      [
+        "FCPY"
+      ],
+      [
+        "FDUP"
+      ],
+      [
+        "DUP"
+      ],
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdup[_n]_f16_z",
+    "arguments": [
+      "svbool_t pg",
+      "float16_t op"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Hop|Wop"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CPY"
+      ],
+      [
+        "DUP",
+        "FCPY"
+      ],
+      [
+        "DUP",
+        "CPY"
+      ],
+      [
+        "MOVPRFX",
+        "CPY"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdup[_n]_f32",
+    "arguments": [
+      "float32_t op"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Sop|Wop"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ],
+      [
+        "FDUP"
+      ],
+      [
+        "DUP"
+      ],
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdup[_n]_f32_m",
+    "arguments": [
+      "svfloat32_t inactive",
+      "svbool_t pg",
+      "float32_t op"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Ztied.S"
+      },
+      "op": {
+        "register": "Sop|Wop"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CPY"
+      ],
+      [
+        "FCPY"
+      ],
+      [
+        "CPY"
+      ],
+      [
+        "CPY"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdup[_n]_f32_x",
+    "arguments": [
+      "svbool_t pg",
+      "float32_t op"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Sop|Wop"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CPY"
+      ],
+      [
+        "DUP"
+      ],
+      [
+        "FCPY"
+      ],
+      [
+        "FDUP"
+      ],
+      [
+        "DUP"
+      ],
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdup[_n]_f32_z",
+    "arguments": [
+      "svbool_t pg",
+      "float32_t op"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Sop|Wop"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CPY"
+      ],
+      [
+        "DUP",
+        "FCPY"
+      ],
+      [
+        "DUP",
+        "CPY"
+      ],
+      [
+        "MOVPRFX",
+        "CPY"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdup[_n]_f64",
+    "arguments": [
+      "float64_t op"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Dop|Xop"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ],
+      [
+        "FDUP"
+      ],
+      [
+        "DUP"
+      ],
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdup[_n]_f64_m",
+    "arguments": [
+      "svfloat64_t inactive",
+      "svbool_t pg",
+      "float64_t op"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Ztied.D"
+      },
+      "op": {
+        "register": "Dop|Xop"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CPY"
+      ],
+      [
+        "FCPY"
+      ],
+      [
+        "CPY"
+      ],
+      [
+        "CPY"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdup[_n]_f64_x",
+    "arguments": [
+      "svbool_t pg",
+      "float64_t op"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Dop|Xop"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CPY"
+      ],
+      [
+        "DUP"
+      ],
+      [
+        "FCPY"
+      ],
+      [
+        "FDUP"
+      ],
+      [
+        "DUP"
+      ],
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdup[_n]_f64_z",
+    "arguments": [
+      "svbool_t pg",
+      "float64_t op"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Dop|Xop"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CPY"
+      ],
+      [
+        "DUP",
+        "FCPY"
+      ],
+      [
+        "DUP",
+        "CPY"
+      ],
+      [
+        "MOVPRFX",
+        "CPY"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdup[_n]_s16",
+    "arguments": [
+      "int16_t op"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Hop|Wop"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ],
+      [
+        "FDUP"
+      ],
+      [
+        "DUPM"
+      ],
+      [
+        "DUP"
+      ],
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdup[_n]_s16_m",
+    "arguments": [
+      "svint16_t inactive",
+      "svbool_t pg",
+      "int16_t op"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Ztied.H"
+      },
+      "op": {
+        "register": "Hop|Wop"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CPY"
+      ],
+      [
+        "FCPY"
+      ],
+      [
+        "CPY"
+      ],
+      [
+        "CPY"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdup[_n]_s16_x",
+    "arguments": [
+      "svbool_t pg",
+      "int16_t op"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Hop|Wop"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CPY"
+      ],
+      [
+        "DUP"
+      ],
+      [
+        "FCPY"
+      ],
+      [
+        "FDUP"
+      ],
+      [
+        "DUPM"
+      ],
+      [
+        "DUP"
+      ],
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdup[_n]_s16_z",
+    "arguments": [
+      "svbool_t pg",
+      "int16_t op"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Hop|Wop"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CPY"
+      ],
+      [
+        "DUP",
+        "FCPY"
+      ],
+      [
+        "DUP",
+        "CPY"
+      ],
+      [
+        "MOVPRFX",
+        "CPY"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdup[_n]_s32",
+    "arguments": [
+      "int32_t op"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Sop|Wop"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ],
+      [
+        "FDUP"
+      ],
+      [
+        "DUPM"
+      ],
+      [
+        "DUP"
+      ],
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdup[_n]_s32_m",
+    "arguments": [
+      "svint32_t inactive",
+      "svbool_t pg",
+      "int32_t op"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Ztied.S"
+      },
+      "op": {
+        "register": "Sop|Wop"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CPY"
+      ],
+      [
+        "FCPY"
+      ],
+      [
+        "CPY"
+      ],
+      [
+        "CPY"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdup[_n]_s32_x",
+    "arguments": [
+      "svbool_t pg",
+      "int32_t op"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Sop|Wop"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CPY"
+      ],
+      [
+        "DUP"
+      ],
+      [
+        "FCPY"
+      ],
+      [
+        "FDUP"
+      ],
+      [
+        "DUPM"
+      ],
+      [
+        "DUP"
+      ],
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdup[_n]_s32_z",
+    "arguments": [
+      "svbool_t pg",
+      "int32_t op"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Sop|Wop"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CPY"
+      ],
+      [
+        "DUP",
+        "FCPY"
+      ],
+      [
+        "DUP",
+        "CPY"
+      ],
+      [
+        "MOVPRFX",
+        "CPY"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdup[_n]_s64",
+    "arguments": [
+      "int64_t op"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Dop|Xop"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ],
+      [
+        "FDUP"
+      ],
+      [
+        "DUPM"
+      ],
+      [
+        "DUP"
+      ],
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdup[_n]_s64_m",
+    "arguments": [
+      "svint64_t inactive",
+      "svbool_t pg",
+      "int64_t op"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Ztied.D"
+      },
+      "op": {
+        "register": "Dop|Xop"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CPY"
+      ],
+      [
+        "FCPY"
+      ],
+      [
+        "CPY"
+      ],
+      [
+        "CPY"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdup[_n]_s64_x",
+    "arguments": [
+      "svbool_t pg",
+      "int64_t op"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Dop|Xop"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CPY"
+      ],
+      [
+        "DUP"
+      ],
+      [
+        "FCPY"
+      ],
+      [
+        "FDUP"
+      ],
+      [
+        "DUPM"
+      ],
+      [
+        "DUP"
+      ],
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdup[_n]_s64_z",
+    "arguments": [
+      "svbool_t pg",
+      "int64_t op"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Dop|Xop"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CPY"
+      ],
+      [
+        "DUP",
+        "FCPY"
+      ],
+      [
+        "DUP",
+        "CPY"
+      ],
+      [
+        "MOVPRFX",
+        "CPY"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdup[_n]_s8",
+    "arguments": [
+      "int8_t op"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Bop|Wop"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ],
+      [
+        "FDUP"
+      ],
+      [
+        "DUPM"
+      ],
+      [
+        "DUP"
+      ],
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdup[_n]_s8_m",
+    "arguments": [
+      "svint8_t inactive",
+      "svbool_t pg",
+      "int8_t op"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Ztied.B"
+      },
+      "op": {
+        "register": "Bop|Wop"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CPY"
+      ],
+      [
+        "FCPY"
+      ],
+      [
+        "CPY"
+      ],
+      [
+        "CPY"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdup[_n]_s8_x",
+    "arguments": [
+      "svbool_t pg",
+      "int8_t op"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Bop|Wop"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CPY"
+      ],
+      [
+        "DUP"
+      ],
+      [
+        "FCPY"
+      ],
+      [
+        "FDUP"
+      ],
+      [
+        "DUPM"
+      ],
+      [
+        "DUP"
+      ],
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdup[_n]_s8_z",
+    "arguments": [
+      "svbool_t pg",
+      "int8_t op"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Bop|Wop"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CPY"
+      ],
+      [
+        "DUP",
+        "FCPY"
+      ],
+      [
+        "DUP",
+        "CPY"
+      ],
+      [
+        "MOVPRFX",
+        "CPY"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdup[_n]_u16",
+    "arguments": [
+      "uint16_t op"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Hop|Wop"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ],
+      [
+        "FDUP"
+      ],
+      [
+        "DUPM"
+      ],
+      [
+        "DUP"
+      ],
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdup[_n]_u16_m",
+    "arguments": [
+      "svuint16_t inactive",
+      "svbool_t pg",
+      "uint16_t op"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Ztied.H"
+      },
+      "op": {
+        "register": "Hop|Wop"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CPY"
+      ],
+      [
+        "FCPY"
+      ],
+      [
+        "CPY"
+      ],
+      [
+        "CPY"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdup[_n]_u16_x",
+    "arguments": [
+      "svbool_t pg",
+      "uint16_t op"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Hop|Wop"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CPY"
+      ],
+      [
+        "DUP"
+      ],
+      [
+        "FCPY"
+      ],
+      [
+        "FDUP"
+      ],
+      [
+        "DUPM"
+      ],
+      [
+        "DUP"
+      ],
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdup[_n]_u16_z",
+    "arguments": [
+      "svbool_t pg",
+      "uint16_t op"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Hop|Wop"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CPY"
+      ],
+      [
+        "DUP",
+        "FCPY"
+      ],
+      [
+        "DUP",
+        "CPY"
+      ],
+      [
+        "MOVPRFX",
+        "CPY"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdup[_n]_u32",
+    "arguments": [
+      "uint32_t op"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Sop|Wop"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ],
+      [
+        "FDUP"
+      ],
+      [
+        "DUPM"
+      ],
+      [
+        "DUP"
+      ],
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdup[_n]_u32_m",
+    "arguments": [
+      "svuint32_t inactive",
+      "svbool_t pg",
+      "uint32_t op"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Ztied.S"
+      },
+      "op": {
+        "register": "Sop|Wop"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CPY"
+      ],
+      [
+        "FCPY"
+      ],
+      [
+        "CPY"
+      ],
+      [
+        "CPY"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdup[_n]_u32_x",
+    "arguments": [
+      "svbool_t pg",
+      "uint32_t op"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Sop|Wop"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CPY"
+      ],
+      [
+        "DUP"
+      ],
+      [
+        "FCPY"
+      ],
+      [
+        "FDUP"
+      ],
+      [
+        "DUPM"
+      ],
+      [
+        "DUP"
+      ],
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdup[_n]_u32_z",
+    "arguments": [
+      "svbool_t pg",
+      "uint32_t op"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Sop|Wop"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CPY"
+      ],
+      [
+        "DUP",
+        "FCPY"
+      ],
+      [
+        "DUP",
+        "CPY"
+      ],
+      [
+        "MOVPRFX",
+        "CPY"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdup[_n]_u64",
+    "arguments": [
+      "uint64_t op"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Dop|Xop"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ],
+      [
+        "FDUP"
+      ],
+      [
+        "DUPM"
+      ],
+      [
+        "DUP"
+      ],
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdup[_n]_u64_m",
+    "arguments": [
+      "svuint64_t inactive",
+      "svbool_t pg",
+      "uint64_t op"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Ztied.D"
+      },
+      "op": {
+        "register": "Dop|Xop"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CPY"
+      ],
+      [
+        "FCPY"
+      ],
+      [
+        "CPY"
+      ],
+      [
+        "CPY"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdup[_n]_u64_x",
+    "arguments": [
+      "svbool_t pg",
+      "uint64_t op"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Dop|Xop"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CPY"
+      ],
+      [
+        "DUP"
+      ],
+      [
+        "FCPY"
+      ],
+      [
+        "FDUP"
+      ],
+      [
+        "DUPM"
+      ],
+      [
+        "DUP"
+      ],
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdup[_n]_u64_z",
+    "arguments": [
+      "svbool_t pg",
+      "uint64_t op"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Dop|Xop"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CPY"
+      ],
+      [
+        "DUP",
+        "FCPY"
+      ],
+      [
+        "DUP",
+        "CPY"
+      ],
+      [
+        "MOVPRFX",
+        "CPY"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdup[_n]_u8",
+    "arguments": [
+      "uint8_t op"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Bop|Wop"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ],
+      [
+        "FDUP"
+      ],
+      [
+        "DUPM"
+      ],
+      [
+        "DUP"
+      ],
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdup[_n]_u8_m",
+    "arguments": [
+      "svuint8_t inactive",
+      "svbool_t pg",
+      "uint8_t op"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Ztied.B"
+      },
+      "op": {
+        "register": "Bop|Wop"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CPY"
+      ],
+      [
+        "FCPY"
+      ],
+      [
+        "CPY"
+      ],
+      [
+        "CPY"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdup[_n]_u8_x",
+    "arguments": [
+      "svbool_t pg",
+      "uint8_t op"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Bop|Wop"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CPY"
+      ],
+      [
+        "DUP"
+      ],
+      [
+        "FCPY"
+      ],
+      [
+        "FDUP"
+      ],
+      [
+        "DUPM"
+      ],
+      [
+        "DUP"
+      ],
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdup[_n]_u8_z",
+    "arguments": [
+      "svbool_t pg",
+      "uint8_t op"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Bop|Wop"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CPY"
+      ],
+      [
+        "DUP",
+        "FCPY"
+      ],
+      [
+        "DUP",
+        "CPY"
+      ],
+      [
+        "MOVPRFX",
+        "CPY"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdup_lane[_f16]",
+    "arguments": [
+      "svfloat16_t data",
+      "uint16_t index"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "data": {
+        "register": "Zdata.H"
+      },
+      "index": {
+        "register": "Zindex.H[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ],
+      [
+        "TBL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdup_lane[_f32]",
+    "arguments": [
+      "svfloat32_t data",
+      "uint32_t index"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "data": {
+        "register": "Zdata.S"
+      },
+      "index": {
+        "register": "Zindex.S[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ],
+      [
+        "TBL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdup_lane[_f64]",
+    "arguments": [
+      "svfloat64_t data",
+      "uint64_t index"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "data": {
+        "register": "Zdata.D"
+      },
+      "index": {
+        "register": "Zindex.D[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ],
+      [
+        "TBL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdup_lane[_s16]",
+    "arguments": [
+      "svint16_t data",
+      "uint16_t index"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "data": {
+        "register": "Zdata.H"
+      },
+      "index": {
+        "register": "Zindex.H[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ],
+      [
+        "TBL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdup_lane[_s32]",
+    "arguments": [
+      "svint32_t data",
+      "uint32_t index"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "data": {
+        "register": "Zdata.S"
+      },
+      "index": {
+        "register": "Zindex.S[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ],
+      [
+        "TBL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdup_lane[_s64]",
+    "arguments": [
+      "svint64_t data",
+      "uint64_t index"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "data": {
+        "register": "Zdata.D"
+      },
+      "index": {
+        "register": "Zindex.D[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ],
+      [
+        "TBL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdup_lane[_s8]",
+    "arguments": [
+      "svint8_t data",
+      "uint8_t index"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "data": {
+        "register": "Zdata.B"
+      },
+      "index": {
+        "register": "Zindex.B[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ],
+      [
+        "TBL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdup_lane[_u16]",
+    "arguments": [
+      "svuint16_t data",
+      "uint16_t index"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "data": {
+        "register": "Zdata.H"
+      },
+      "index": {
+        "register": "Zindex.H[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ],
+      [
+        "TBL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdup_lane[_u32]",
+    "arguments": [
+      "svuint32_t data",
+      "uint32_t index"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "data": {
+        "register": "Zdata.S"
+      },
+      "index": {
+        "register": "Zindex.S[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ],
+      [
+        "TBL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdup_lane[_u64]",
+    "arguments": [
+      "svuint64_t data",
+      "uint64_t index"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "data": {
+        "register": "Zdata.D"
+      },
+      "index": {
+        "register": "Zindex.D[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ],
+      [
+        "TBL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdup_lane[_u8]",
+    "arguments": [
+      "svuint8_t data",
+      "uint8_t index"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "data": {
+        "register": "Zdata.B"
+      },
+      "index": {
+        "register": "Zindex.B[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ],
+      [
+        "TBL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdupq[_n]_b16",
+    "arguments": [
+      "bool x0",
+      "bool x1",
+      "bool x2",
+      "bool x3",
+      "bool x4",
+      "bool x5",
+      "bool x6",
+      "bool x7"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdupq[_n]_b32",
+    "arguments": [
+      "bool x0",
+      "bool x1",
+      "bool x2",
+      "bool x3"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdupq[_n]_b64",
+    "arguments": [
+      "bool x0",
+      "bool x1"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdupq[_n]_b8",
+    "arguments": [
+      "bool x0",
+      "bool x1",
+      "bool x2",
+      "bool x3",
+      "bool x4",
+      "bool x5",
+      "bool x6",
+      "bool x7",
+      "bool x8",
+      "bool x9",
+      "bool x10",
+      "bool x11",
+      "bool x12",
+      "bool x13",
+      "bool x14",
+      "bool x15"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdupq[_n]_f16",
+    "arguments": [
+      "float16_t x0",
+      "float16_t x1",
+      "float16_t x2",
+      "float16_t x3",
+      "float16_t x4",
+      "float16_t x5",
+      "float16_t x6",
+      "float16_t x7"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdupq[_n]_f32",
+    "arguments": [
+      "float32_t x0",
+      "float32_t x1",
+      "float32_t x2",
+      "float32_t x3"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdupq[_n]_f64",
+    "arguments": [
+      "float64_t x0",
+      "float64_t x1"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdupq[_n]_s16",
+    "arguments": [
+      "int16_t x0",
+      "int16_t x1",
+      "int16_t x2",
+      "int16_t x3",
+      "int16_t x4",
+      "int16_t x5",
+      "int16_t x6",
+      "int16_t x7"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdupq[_n]_s32",
+    "arguments": [
+      "int32_t x0",
+      "int32_t x1",
+      "int32_t x2",
+      "int32_t x3"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdupq[_n]_s64",
+    "arguments": [
+      "int64_t x0",
+      "int64_t x1"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdupq[_n]_s8",
+    "arguments": [
+      "int8_t x0",
+      "int8_t x1",
+      "int8_t x2",
+      "int8_t x3",
+      "int8_t x4",
+      "int8_t x5",
+      "int8_t x6",
+      "int8_t x7",
+      "int8_t x8",
+      "int8_t x9",
+      "int8_t x10",
+      "int8_t x11",
+      "int8_t x12",
+      "int8_t x13",
+      "int8_t x14",
+      "int8_t x15"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdupq[_n]_u16",
+    "arguments": [
+      "uint16_t x0",
+      "uint16_t x1",
+      "uint16_t x2",
+      "uint16_t x3",
+      "uint16_t x4",
+      "uint16_t x5",
+      "uint16_t x6",
+      "uint16_t x7"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdupq[_n]_u32",
+    "arguments": [
+      "uint32_t x0",
+      "uint32_t x1",
+      "uint32_t x2",
+      "uint32_t x3"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdupq[_n]_u64",
+    "arguments": [
+      "uint64_t x0",
+      "uint64_t x1"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdupq[_n]_u8",
+    "arguments": [
+      "uint8_t x0",
+      "uint8_t x1",
+      "uint8_t x2",
+      "uint8_t x3",
+      "uint8_t x4",
+      "uint8_t x5",
+      "uint8_t x6",
+      "uint8_t x7",
+      "uint8_t x8",
+      "uint8_t x9",
+      "uint8_t x10",
+      "uint8_t x11",
+      "uint8_t x12",
+      "uint8_t x13",
+      "uint8_t x14",
+      "uint8_t x15"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdupq_lane[_f16]",
+    "arguments": [
+      "svfloat16_t data",
+      "uint64_t index"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "data": {
+        "register": "Zdata.D|Zdata.Q"
+      },
+      "{2 * index, 2 * index + 1, 2 * index, 2 * index + 1, …}": {
+        "register": "Zindices_d.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ],
+      [
+        "TBL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdupq_lane[_f32]",
+    "arguments": [
+      "svfloat32_t data",
+      "uint64_t index"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "data": {
+        "register": "Zdata.D|Zdata.Q"
+      },
+      "{2 * index, 2 * index + 1, 2 * index, 2 * index + 1, …}": {
+        "register": "Zindices_d.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ],
+      [
+        "TBL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdupq_lane[_f64]",
+    "arguments": [
+      "svfloat64_t data",
+      "uint64_t index"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "data": {
+        "register": "Zdata.D|Zdata.Q"
+      },
+      "{2 * index, 2 * index + 1, 2 * index, 2 * index + 1, …}": {
+        "register": "Zindices_d.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ],
+      [
+        "TBL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdupq_lane[_s16]",
+    "arguments": [
+      "svint16_t data",
+      "uint64_t index"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "data": {
+        "register": "Zdata.D|Zdata.Q"
+      },
+      "{2 * index, 2 * index + 1, 2 * index, 2 * index + 1, …}": {
+        "register": "Zindices_d.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ],
+      [
+        "TBL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdupq_lane[_s32]",
+    "arguments": [
+      "svint32_t data",
+      "uint64_t index"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "data": {
+        "register": "Zdata.D|Zdata.Q"
+      },
+      "{2 * index, 2 * index + 1, 2 * index, 2 * index + 1, …}": {
+        "register": "Zindices_d.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ],
+      [
+        "TBL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdupq_lane[_s64]",
+    "arguments": [
+      "svint64_t data",
+      "uint64_t index"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "data": {
+        "register": "Zdata.D|Zdata.Q"
+      },
+      "{2 * index, 2 * index + 1, 2 * index, 2 * index + 1, …}": {
+        "register": "Zindices_d.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ],
+      [
+        "TBL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdupq_lane[_s8]",
+    "arguments": [
+      "svint8_t data",
+      "uint64_t index"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "data": {
+        "register": "Zdata.D|Zdata.Q"
+      },
+      "{2 * index, 2 * index + 1, 2 * index, 2 * index + 1, …}": {
+        "register": "Zindices_d.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ],
+      [
+        "TBL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdupq_lane[_u16]",
+    "arguments": [
+      "svuint16_t data",
+      "uint64_t index"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "data": {
+        "register": "Zdata.D|Zdata.Q"
+      },
+      "{2 * index, 2 * index + 1, 2 * index, 2 * index + 1, …}": {
+        "register": "Zindices_d.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ],
+      [
+        "TBL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdupq_lane[_u32]",
+    "arguments": [
+      "svuint32_t data",
+      "uint64_t index"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "data": {
+        "register": "Zdata.D|Zdata.Q"
+      },
+      "{2 * index, 2 * index + 1, 2 * index, 2 * index + 1, …}": {
+        "register": "Zindices_d.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ],
+      [
+        "TBL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdupq_lane[_u64]",
+    "arguments": [
+      "svuint64_t data",
+      "uint64_t index"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "data": {
+        "register": "Zdata.D|Zdata.Q"
+      },
+      "{2 * index, 2 * index + 1, 2 * index, 2 * index + 1, …}": {
+        "register": "Zindices_d.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ],
+      [
+        "TBL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svdupq_lane[_u8]",
+    "arguments": [
+      "svuint8_t data",
+      "uint64_t index"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "data": {
+        "register": "Zdata.D|Zdata.Q"
+      },
+      "{2 * index, 2 * index + 1, 2 * index, 2 * index + 1, …}": {
+        "register": "Zindices_d.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ],
+      [
+        "TBL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "sveor3[_n_s16]",
+    "arguments": [
+      "svint16_t op1",
+      "svint16_t op2",
+      "int16_t op3"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H|Ztied2.H"
+      },
+      "op3": {
+        "register": "Zop3.H[*]|Ztied3.H[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EOR3"
+      ],
+      [
+        "EOR3"
+      ],
+      [
+        "EOR3"
+      ],
+      [
+        "MOVPRFX",
+        "EOR3"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "sveor3[_n_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "svint32_t op2",
+      "int32_t op3"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "op3": {
+        "register": "Zop3.S[*]|Ztied3.S[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EOR3"
+      ],
+      [
+        "EOR3"
+      ],
+      [
+        "EOR3"
+      ],
+      [
+        "MOVPRFX",
+        "EOR3"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "sveor3[_n_s64]",
+    "arguments": [
+      "svint64_t op1",
+      "svint64_t op2",
+      "int64_t op3"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "op3": {
+        "register": "Zop3.D[*]|Ztied3.D[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EOR3"
+      ],
+      [
+        "EOR3"
+      ],
+      [
+        "EOR3"
+      ],
+      [
+        "MOVPRFX",
+        "EOR3"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "sveor3[_n_s8]",
+    "arguments": [
+      "svint8_t op1",
+      "svint8_t op2",
+      "int8_t op3"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B|Ztied2.B"
+      },
+      "op3": {
+        "register": "Zop3.B[*]|Ztied3.B[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EOR3"
+      ],
+      [
+        "EOR3"
+      ],
+      [
+        "EOR3"
+      ],
+      [
+        "MOVPRFX",
+        "EOR3"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "sveor3[_n_u16]",
+    "arguments": [
+      "svuint16_t op1",
+      "svuint16_t op2",
+      "uint16_t op3"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H|Ztied2.H"
+      },
+      "op3": {
+        "register": "Zop3.H[*]|Ztied3.H[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EOR3"
+      ],
+      [
+        "EOR3"
+      ],
+      [
+        "EOR3"
+      ],
+      [
+        "MOVPRFX",
+        "EOR3"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "sveor3[_n_u32]",
+    "arguments": [
+      "svuint32_t op1",
+      "svuint32_t op2",
+      "uint32_t op3"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "op3": {
+        "register": "Zop3.S[*]|Ztied3.S[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EOR3"
+      ],
+      [
+        "EOR3"
+      ],
+      [
+        "EOR3"
+      ],
+      [
+        "MOVPRFX",
+        "EOR3"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "sveor3[_n_u64]",
+    "arguments": [
+      "svuint64_t op1",
+      "svuint64_t op2",
+      "uint64_t op3"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "op3": {
+        "register": "Zop3.D[*]|Ztied3.D[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EOR3"
+      ],
+      [
+        "EOR3"
+      ],
+      [
+        "EOR3"
+      ],
+      [
+        "MOVPRFX",
+        "EOR3"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "sveor3[_n_u8]",
+    "arguments": [
+      "svuint8_t op1",
+      "svuint8_t op2",
+      "uint8_t op3"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B|Ztied2.B"
+      },
+      "op3": {
+        "register": "Zop3.B[*]|Ztied3.B[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EOR3"
+      ],
+      [
+        "EOR3"
+      ],
+      [
+        "EOR3"
+      ],
+      [
+        "MOVPRFX",
+        "EOR3"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "sveor3[_s16]",
+    "arguments": [
+      "svint16_t op1",
+      "svint16_t op2",
+      "svint16_t op3"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H|Ztied2.H"
+      },
+      "op3": {
+        "register": "Zop3.H|Ztied3.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EOR3"
+      ],
+      [
+        "EOR3"
+      ],
+      [
+        "EOR3"
+      ],
+      [
+        "MOVPRFX",
+        "EOR3"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "sveor3[_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "svint32_t op2",
+      "svint32_t op3"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "op3": {
+        "register": "Zop3.S|Ztied3.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EOR3"
+      ],
+      [
+        "EOR3"
+      ],
+      [
+        "EOR3"
+      ],
+      [
+        "MOVPRFX",
+        "EOR3"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "sveor3[_s64]",
+    "arguments": [
+      "svint64_t op1",
+      "svint64_t op2",
+      "svint64_t op3"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "op3": {
+        "register": "Zop3.D|Ztied3.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EOR3"
+      ],
+      [
+        "EOR3"
+      ],
+      [
+        "EOR3"
+      ],
+      [
+        "MOVPRFX",
+        "EOR3"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "sveor3[_s8]",
+    "arguments": [
+      "svint8_t op1",
+      "svint8_t op2",
+      "svint8_t op3"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B|Ztied2.B"
+      },
+      "op3": {
+        "register": "Zop3.B|Ztied3.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EOR3"
+      ],
+      [
+        "EOR3"
+      ],
+      [
+        "EOR3"
+      ],
+      [
+        "MOVPRFX",
+        "EOR3"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "sveor3[_u16]",
+    "arguments": [
+      "svuint16_t op1",
+      "svuint16_t op2",
+      "svuint16_t op3"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H|Ztied2.H"
+      },
+      "op3": {
+        "register": "Zop3.H|Ztied3.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EOR3"
+      ],
+      [
+        "EOR3"
+      ],
+      [
+        "EOR3"
+      ],
+      [
+        "MOVPRFX",
+        "EOR3"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "sveor3[_u32]",
+    "arguments": [
+      "svuint32_t op1",
+      "svuint32_t op2",
+      "svuint32_t op3"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "op3": {
+        "register": "Zop3.S|Ztied3.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EOR3"
+      ],
+      [
+        "EOR3"
+      ],
+      [
+        "EOR3"
+      ],
+      [
+        "MOVPRFX",
+        "EOR3"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "sveor3[_u64]",
+    "arguments": [
+      "svuint64_t op1",
+      "svuint64_t op2",
+      "svuint64_t op3"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "op3": {
+        "register": "Zop3.D|Ztied3.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EOR3"
+      ],
+      [
+        "EOR3"
+      ],
+      [
+        "EOR3"
+      ],
+      [
+        "MOVPRFX",
+        "EOR3"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "sveor3[_u8]",
+    "arguments": [
+      "svuint8_t op1",
+      "svuint8_t op2",
+      "svuint8_t op3"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B|Ztied2.B"
+      },
+      "op3": {
+        "register": "Zop3.B|Ztied3.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EOR3"
+      ],
+      [
+        "EOR3"
+      ],
+      [
+        "EOR3"
+      ],
+      [
+        "MOVPRFX",
+        "EOR3"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "sveor[_b]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svbool_t op1",
+      "svbool_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Pop1.B"
+      },
+      "op2": {
+        "register": "Pop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EOR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "sveor[_n_s16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "int16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EOR"
+      ],
+      [
+        "MOVPRFX",
+        "EOR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "sveor[_n_s16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "int16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]|Ztied2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EOR"
+      ],
+      [
+        "EOR"
+      ],
+      [
+        "EOR"
+      ],
+      [
+        "EOR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "sveor[_n_s16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "int16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "EOR"
+      ],
+      [
+        "MOVPRFX",
+        "EOR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "sveor[_n_s32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EOR"
+      ],
+      [
+        "MOVPRFX",
+        "EOR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "sveor[_n_s32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]|Ztied2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EOR"
+      ],
+      [
+        "EOR"
+      ],
+      [
+        "EOR"
+      ],
+      [
+        "EOR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "sveor[_n_s32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "EOR"
+      ],
+      [
+        "MOVPRFX",
+        "EOR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "sveor[_n_s64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EOR"
+      ],
+      [
+        "MOVPRFX",
+        "EOR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "sveor[_n_s64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]|Ztied2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EOR"
+      ],
+      [
+        "EOR"
+      ],
+      [
+        "EOR"
+      ],
+      [
+        "EOR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "sveor[_n_s64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "EOR"
+      ],
+      [
+        "MOVPRFX",
+        "EOR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "sveor[_n_s8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "int8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EOR"
+      ],
+      [
+        "MOVPRFX",
+        "EOR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "sveor[_n_s8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "int8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]|Ztied2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EOR"
+      ],
+      [
+        "EOR"
+      ],
+      [
+        "EOR"
+      ],
+      [
+        "EOR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "sveor[_n_s8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "int8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "EOR"
+      ],
+      [
+        "MOVPRFX",
+        "EOR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "sveor[_n_u16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "uint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EOR"
+      ],
+      [
+        "MOVPRFX",
+        "EOR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "sveor[_n_u16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "uint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]|Ztied2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EOR"
+      ],
+      [
+        "EOR"
+      ],
+      [
+        "EOR"
+      ],
+      [
+        "EOR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "sveor[_n_u16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "uint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "EOR"
+      ],
+      [
+        "MOVPRFX",
+        "EOR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "sveor[_n_u32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EOR"
+      ],
+      [
+        "MOVPRFX",
+        "EOR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "sveor[_n_u32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]|Ztied2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EOR"
+      ],
+      [
+        "EOR"
+      ],
+      [
+        "EOR"
+      ],
+      [
+        "EOR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "sveor[_n_u32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "EOR"
+      ],
+      [
+        "MOVPRFX",
+        "EOR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "sveor[_n_u64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EOR"
+      ],
+      [
+        "MOVPRFX",
+        "EOR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "sveor[_n_u64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]|Ztied2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EOR"
+      ],
+      [
+        "EOR"
+      ],
+      [
+        "EOR"
+      ],
+      [
+        "EOR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "sveor[_n_u64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "EOR"
+      ],
+      [
+        "MOVPRFX",
+        "EOR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "sveor[_n_u8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "uint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EOR"
+      ],
+      [
+        "MOVPRFX",
+        "EOR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "sveor[_n_u8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "uint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]|Ztied2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EOR"
+      ],
+      [
+        "EOR"
+      ],
+      [
+        "EOR"
+      ],
+      [
+        "EOR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "sveor[_n_u8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "uint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "EOR"
+      ],
+      [
+        "MOVPRFX",
+        "EOR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "sveor[_s16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EOR"
+      ],
+      [
+        "MOVPRFX",
+        "EOR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "sveor[_s16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H|Ztied2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EOR"
+      ],
+      [
+        "EOR"
+      ],
+      [
+        "EOR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "sveor[_s16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "EOR"
+      ],
+      [
+        "MOVPRFX",
+        "EOR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "sveor[_s32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EOR"
+      ],
+      [
+        "MOVPRFX",
+        "EOR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "sveor[_s32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EOR"
+      ],
+      [
+        "EOR"
+      ],
+      [
+        "EOR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "sveor[_s32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "EOR"
+      ],
+      [
+        "MOVPRFX",
+        "EOR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "sveor[_s64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EOR"
+      ],
+      [
+        "MOVPRFX",
+        "EOR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "sveor[_s64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EOR"
+      ],
+      [
+        "EOR"
+      ],
+      [
+        "EOR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "sveor[_s64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "EOR"
+      ],
+      [
+        "MOVPRFX",
+        "EOR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "sveor[_s8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EOR"
+      ],
+      [
+        "MOVPRFX",
+        "EOR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "sveor[_s8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B|Ztied2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EOR"
+      ],
+      [
+        "EOR"
+      ],
+      [
+        "EOR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "sveor[_s8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "EOR"
+      ],
+      [
+        "MOVPRFX",
+        "EOR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "sveor[_u16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EOR"
+      ],
+      [
+        "MOVPRFX",
+        "EOR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "sveor[_u16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H|Ztied2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EOR"
+      ],
+      [
+        "EOR"
+      ],
+      [
+        "EOR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "sveor[_u16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "EOR"
+      ],
+      [
+        "MOVPRFX",
+        "EOR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "sveor[_u32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EOR"
+      ],
+      [
+        "MOVPRFX",
+        "EOR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "sveor[_u32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EOR"
+      ],
+      [
+        "EOR"
+      ],
+      [
+        "EOR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "sveor[_u32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "EOR"
+      ],
+      [
+        "MOVPRFX",
+        "EOR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "sveor[_u64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EOR"
+      ],
+      [
+        "MOVPRFX",
+        "EOR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "sveor[_u64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EOR"
+      ],
+      [
+        "EOR"
+      ],
+      [
+        "EOR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "sveor[_u64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "EOR"
+      ],
+      [
+        "MOVPRFX",
+        "EOR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "sveor[_u8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EOR"
+      ],
+      [
+        "MOVPRFX",
+        "EOR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "sveor[_u8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B|Ztied2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EOR"
+      ],
+      [
+        "EOR"
+      ],
+      [
+        "EOR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "sveor[_u8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "EOR"
+      ],
+      [
+        "MOVPRFX",
+        "EOR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "sveorbt[_n_s16]",
+    "arguments": [
+      "svint16_t odd",
+      "svint16_t op1",
+      "int16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "odd": {
+        "register": "Zodd.H|Ztied.H"
+      },
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EORBT"
+      ],
+      [
+        "MOVPRFX",
+        "EORBT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "sveorbt[_n_s32]",
+    "arguments": [
+      "svint32_t odd",
+      "svint32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "odd": {
+        "register": "Zodd.S|Ztied.S"
+      },
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EORBT"
+      ],
+      [
+        "MOVPRFX",
+        "EORBT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "sveorbt[_n_s64]",
+    "arguments": [
+      "svint64_t odd",
+      "svint64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "odd": {
+        "register": "Zodd.D|Ztied.D"
+      },
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EORBT"
+      ],
+      [
+        "MOVPRFX",
+        "EORBT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "sveorbt[_n_s8]",
+    "arguments": [
+      "svint8_t odd",
+      "svint8_t op1",
+      "int8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "odd": {
+        "register": "Zodd.B|Ztied.B"
+      },
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EORBT"
+      ],
+      [
+        "MOVPRFX",
+        "EORBT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "sveorbt[_n_u16]",
+    "arguments": [
+      "svuint16_t odd",
+      "svuint16_t op1",
+      "uint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "odd": {
+        "register": "Zodd.H|Ztied.H"
+      },
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EORBT"
+      ],
+      [
+        "MOVPRFX",
+        "EORBT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "sveorbt[_n_u32]",
+    "arguments": [
+      "svuint32_t odd",
+      "svuint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "odd": {
+        "register": "Zodd.S|Ztied.S"
+      },
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EORBT"
+      ],
+      [
+        "MOVPRFX",
+        "EORBT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "sveorbt[_n_u64]",
+    "arguments": [
+      "svuint64_t odd",
+      "svuint64_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "odd": {
+        "register": "Zodd.D|Ztied.D"
+      },
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EORBT"
+      ],
+      [
+        "MOVPRFX",
+        "EORBT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "sveorbt[_n_u8]",
+    "arguments": [
+      "svuint8_t odd",
+      "svuint8_t op1",
+      "uint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "odd": {
+        "register": "Zodd.B|Ztied.B"
+      },
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EORBT"
+      ],
+      [
+        "MOVPRFX",
+        "EORBT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "sveorbt[_s16]",
+    "arguments": [
+      "svint16_t odd",
+      "svint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "odd": {
+        "register": "Zodd.H|Ztied.H"
+      },
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EORBT"
+      ],
+      [
+        "MOVPRFX",
+        "EORBT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "sveorbt[_s32]",
+    "arguments": [
+      "svint32_t odd",
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "odd": {
+        "register": "Zodd.S|Ztied.S"
+      },
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EORBT"
+      ],
+      [
+        "MOVPRFX",
+        "EORBT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "sveorbt[_s64]",
+    "arguments": [
+      "svint64_t odd",
+      "svint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "odd": {
+        "register": "Zodd.D|Ztied.D"
+      },
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EORBT"
+      ],
+      [
+        "MOVPRFX",
+        "EORBT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "sveorbt[_s8]",
+    "arguments": [
+      "svint8_t odd",
+      "svint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "odd": {
+        "register": "Zodd.B|Ztied.B"
+      },
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EORBT"
+      ],
+      [
+        "MOVPRFX",
+        "EORBT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "sveorbt[_u16]",
+    "arguments": [
+      "svuint16_t odd",
+      "svuint16_t op1",
+      "svuint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "odd": {
+        "register": "Zodd.H|Ztied.H"
+      },
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EORBT"
+      ],
+      [
+        "MOVPRFX",
+        "EORBT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "sveorbt[_u32]",
+    "arguments": [
+      "svuint32_t odd",
+      "svuint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "odd": {
+        "register": "Zodd.S|Ztied.S"
+      },
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EORBT"
+      ],
+      [
+        "MOVPRFX",
+        "EORBT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "sveorbt[_u64]",
+    "arguments": [
+      "svuint64_t odd",
+      "svuint64_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "odd": {
+        "register": "Zodd.D|Ztied.D"
+      },
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EORBT"
+      ],
+      [
+        "MOVPRFX",
+        "EORBT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "sveorbt[_u8]",
+    "arguments": [
+      "svuint8_t odd",
+      "svuint8_t op1",
+      "svuint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "odd": {
+        "register": "Zodd.B|Ztied.B"
+      },
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EORBT"
+      ],
+      [
+        "MOVPRFX",
+        "EORBT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "sveortb[_n_s16]",
+    "arguments": [
+      "svint16_t even",
+      "svint16_t op1",
+      "int16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "even": {
+        "register": "Zeven.H|Ztied.H"
+      },
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EORTB"
+      ],
+      [
+        "MOVPRFX",
+        "EORTB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "sveortb[_n_s32]",
+    "arguments": [
+      "svint32_t even",
+      "svint32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "even": {
+        "register": "Zeven.S|Ztied.S"
+      },
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EORTB"
+      ],
+      [
+        "MOVPRFX",
+        "EORTB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "sveortb[_n_s64]",
+    "arguments": [
+      "svint64_t even",
+      "svint64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "even": {
+        "register": "Zeven.D|Ztied.D"
+      },
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EORTB"
+      ],
+      [
+        "MOVPRFX",
+        "EORTB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "sveortb[_n_s8]",
+    "arguments": [
+      "svint8_t even",
+      "svint8_t op1",
+      "int8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "even": {
+        "register": "Zeven.B|Ztied.B"
+      },
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EORTB"
+      ],
+      [
+        "MOVPRFX",
+        "EORTB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "sveortb[_n_u16]",
+    "arguments": [
+      "svuint16_t even",
+      "svuint16_t op1",
+      "uint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "even": {
+        "register": "Zeven.H|Ztied.H"
+      },
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EORTB"
+      ],
+      [
+        "MOVPRFX",
+        "EORTB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "sveortb[_n_u32]",
+    "arguments": [
+      "svuint32_t even",
+      "svuint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "even": {
+        "register": "Zeven.S|Ztied.S"
+      },
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EORTB"
+      ],
+      [
+        "MOVPRFX",
+        "EORTB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "sveortb[_n_u64]",
+    "arguments": [
+      "svuint64_t even",
+      "svuint64_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "even": {
+        "register": "Zeven.D|Ztied.D"
+      },
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EORTB"
+      ],
+      [
+        "MOVPRFX",
+        "EORTB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "sveortb[_n_u8]",
+    "arguments": [
+      "svuint8_t even",
+      "svuint8_t op1",
+      "uint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "even": {
+        "register": "Zeven.B|Ztied.B"
+      },
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EORTB"
+      ],
+      [
+        "MOVPRFX",
+        "EORTB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "sveortb[_s16]",
+    "arguments": [
+      "svint16_t even",
+      "svint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "even": {
+        "register": "Zeven.H|Ztied.H"
+      },
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EORTB"
+      ],
+      [
+        "MOVPRFX",
+        "EORTB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "sveortb[_s32]",
+    "arguments": [
+      "svint32_t even",
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "even": {
+        "register": "Zeven.S|Ztied.S"
+      },
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EORTB"
+      ],
+      [
+        "MOVPRFX",
+        "EORTB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "sveortb[_s64]",
+    "arguments": [
+      "svint64_t even",
+      "svint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "even": {
+        "register": "Zeven.D|Ztied.D"
+      },
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EORTB"
+      ],
+      [
+        "MOVPRFX",
+        "EORTB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "sveortb[_s8]",
+    "arguments": [
+      "svint8_t even",
+      "svint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "even": {
+        "register": "Zeven.B|Ztied.B"
+      },
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EORTB"
+      ],
+      [
+        "MOVPRFX",
+        "EORTB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "sveortb[_u16]",
+    "arguments": [
+      "svuint16_t even",
+      "svuint16_t op1",
+      "svuint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "even": {
+        "register": "Zeven.H|Ztied.H"
+      },
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EORTB"
+      ],
+      [
+        "MOVPRFX",
+        "EORTB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "sveortb[_u32]",
+    "arguments": [
+      "svuint32_t even",
+      "svuint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "even": {
+        "register": "Zeven.S|Ztied.S"
+      },
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EORTB"
+      ],
+      [
+        "MOVPRFX",
+        "EORTB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "sveortb[_u64]",
+    "arguments": [
+      "svuint64_t even",
+      "svuint64_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "even": {
+        "register": "Zeven.D|Ztied.D"
+      },
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EORTB"
+      ],
+      [
+        "MOVPRFX",
+        "EORTB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "sveortb[_u8]",
+    "arguments": [
+      "svuint8_t even",
+      "svuint8_t op1",
+      "svuint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "even": {
+        "register": "Zeven.B|Ztied.B"
+      },
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EORTB"
+      ],
+      [
+        "MOVPRFX",
+        "EORTB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "sveorv[_s16]",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op"
+    ],
+    "return_type": {
+      "value": "int16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EORV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "sveorv[_s32]",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op"
+    ],
+    "return_type": {
+      "value": "int32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EORV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "sveorv[_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op"
+    ],
+    "return_type": {
+      "value": "int64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EORV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "sveorv[_s8]",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op"
+    ],
+    "return_type": {
+      "value": "int8_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EORV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "sveorv[_u16]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op"
+    ],
+    "return_type": {
+      "value": "uint16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EORV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "sveorv[_u32]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op"
+    ],
+    "return_type": {
+      "value": "uint32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EORV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "sveorv[_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EORV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "sveorv[_u8]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op"
+    ],
+    "return_type": {
+      "value": "uint8_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EORV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svexpa[_f16]",
+    "arguments": [
+      "svuint16_t op"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FEXPA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svexpa[_f32]",
+    "arguments": [
+      "svuint32_t op"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FEXPA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svexpa[_f64]",
+    "arguments": [
+      "svuint64_t op"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FEXPA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svext[_f16]",
+    "arguments": [
+      "svfloat16_t op1",
+      "svfloat16_t op2",
+      "uint64_t imm3"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "imm3": {
+        "minimum": 0,
+        "maximum": 127
+      },
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EXT"
+      ],
+      [
+        "MOVPRFX",
+        "EXT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svext[_f32]",
+    "arguments": [
+      "svfloat32_t op1",
+      "svfloat32_t op2",
+      "uint64_t imm3"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "imm3": {
+        "minimum": 0,
+        "maximum": 63
+      },
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EXT"
+      ],
+      [
+        "MOVPRFX",
+        "EXT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svext[_f64]",
+    "arguments": [
+      "svfloat64_t op1",
+      "svfloat64_t op2",
+      "uint64_t imm3"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "imm3": {
+        "minimum": 0,
+        "maximum": 31
+      },
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EXT"
+      ],
+      [
+        "MOVPRFX",
+        "EXT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svext[_s16]",
+    "arguments": [
+      "svint16_t op1",
+      "svint16_t op2",
+      "uint64_t imm3"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "imm3": {
+        "minimum": 0,
+        "maximum": 127
+      },
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EXT"
+      ],
+      [
+        "MOVPRFX",
+        "EXT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svext[_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "svint32_t op2",
+      "uint64_t imm3"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "imm3": {
+        "minimum": 0,
+        "maximum": 63
+      },
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EXT"
+      ],
+      [
+        "MOVPRFX",
+        "EXT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svext[_s64]",
+    "arguments": [
+      "svint64_t op1",
+      "svint64_t op2",
+      "uint64_t imm3"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "imm3": {
+        "minimum": 0,
+        "maximum": 31
+      },
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EXT"
+      ],
+      [
+        "MOVPRFX",
+        "EXT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svext[_s8]",
+    "arguments": [
+      "svint8_t op1",
+      "svint8_t op2",
+      "uint64_t imm3"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "imm3": {
+        "minimum": 0,
+        "maximum": 255
+      },
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EXT"
+      ],
+      [
+        "MOVPRFX",
+        "EXT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svext[_u16]",
+    "arguments": [
+      "svuint16_t op1",
+      "svuint16_t op2",
+      "uint64_t imm3"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "imm3": {
+        "minimum": 0,
+        "maximum": 127
+      },
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EXT"
+      ],
+      [
+        "MOVPRFX",
+        "EXT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svext[_u32]",
+    "arguments": [
+      "svuint32_t op1",
+      "svuint32_t op2",
+      "uint64_t imm3"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "imm3": {
+        "minimum": 0,
+        "maximum": 63
+      },
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EXT"
+      ],
+      [
+        "MOVPRFX",
+        "EXT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svext[_u64]",
+    "arguments": [
+      "svuint64_t op1",
+      "svuint64_t op2",
+      "uint64_t imm3"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "imm3": {
+        "minimum": 0,
+        "maximum": 31
+      },
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EXT"
+      ],
+      [
+        "MOVPRFX",
+        "EXT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svext[_u8]",
+    "arguments": [
+      "svuint8_t op1",
+      "svuint8_t op2",
+      "uint64_t imm3"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "imm3": {
+        "minimum": 0,
+        "maximum": 255
+      },
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EXT"
+      ],
+      [
+        "MOVPRFX",
+        "EXT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svextb[_s16]_m",
+    "arguments": [
+      "svint16_t inactive",
+      "svbool_t pg",
+      "svint16_t op"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.H|Ztied.H"
+      },
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SXTB"
+      ],
+      [
+        "MOVPRFX",
+        "SXTB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svextb[_s16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H|Ztied.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SXTB"
+      ],
+      [
+        "MOVPRFX",
+        "SXTB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svextb[_s16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SXTB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svextb[_s32]_m",
+    "arguments": [
+      "svint32_t inactive",
+      "svbool_t pg",
+      "svint32_t op"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.S|Ztied.S"
+      },
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SXTB"
+      ],
+      [
+        "MOVPRFX",
+        "SXTB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svextb[_s32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S|Ztied.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SXTB"
+      ],
+      [
+        "MOVPRFX",
+        "SXTB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svextb[_s32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SXTB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svextb[_s64]_m",
+    "arguments": [
+      "svint64_t inactive",
+      "svbool_t pg",
+      "svint64_t op"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.D|Ztied.D"
+      },
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SXTB"
+      ],
+      [
+        "MOVPRFX",
+        "SXTB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svextb[_s64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D|Ztied.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SXTB"
+      ],
+      [
+        "MOVPRFX",
+        "SXTB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svextb[_s64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SXTB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svextb[_u16]_m",
+    "arguments": [
+      "svuint16_t inactive",
+      "svbool_t pg",
+      "svuint16_t op"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.H|Ztied.H"
+      },
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UXTB"
+      ],
+      [
+        "MOVPRFX",
+        "UXTB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svextb[_u16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Ztied.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UXTB"
+      ],
+      [
+        "AND"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svextb[_u16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UXTB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svextb[_u32]_m",
+    "arguments": [
+      "svuint32_t inactive",
+      "svbool_t pg",
+      "svuint32_t op"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.S|Ztied.S"
+      },
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UXTB"
+      ],
+      [
+        "MOVPRFX",
+        "UXTB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svextb[_u32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Ztied.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UXTB"
+      ],
+      [
+        "AND"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svextb[_u32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UXTB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svextb[_u64]_m",
+    "arguments": [
+      "svuint64_t inactive",
+      "svbool_t pg",
+      "svuint64_t op"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.D|Ztied.D"
+      },
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UXTB"
+      ],
+      [
+        "MOVPRFX",
+        "UXTB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svextb[_u64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Ztied.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UXTB"
+      ],
+      [
+        "AND"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svextb[_u64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UXTB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svexth[_s32]_m",
+    "arguments": [
+      "svint32_t inactive",
+      "svbool_t pg",
+      "svint32_t op"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.S|Ztied.S"
+      },
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SXTH"
+      ],
+      [
+        "MOVPRFX",
+        "SXTH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svexth[_s32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S|Ztied.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SXTH"
+      ],
+      [
+        "MOVPRFX",
+        "SXTH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svexth[_s32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SXTH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svexth[_s64]_m",
+    "arguments": [
+      "svint64_t inactive",
+      "svbool_t pg",
+      "svint64_t op"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.D|Ztied.D"
+      },
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SXTH"
+      ],
+      [
+        "MOVPRFX",
+        "SXTH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svexth[_s64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D|Ztied.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SXTH"
+      ],
+      [
+        "MOVPRFX",
+        "SXTH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svexth[_s64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SXTH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svexth[_u32]_m",
+    "arguments": [
+      "svuint32_t inactive",
+      "svbool_t pg",
+      "svuint32_t op"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.S|Ztied.S"
+      },
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UXTH"
+      ],
+      [
+        "MOVPRFX",
+        "UXTH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svexth[_u32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Ztied.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UXTH"
+      ],
+      [
+        "AND"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svexth[_u32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UXTH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svexth[_u64]_m",
+    "arguments": [
+      "svuint64_t inactive",
+      "svbool_t pg",
+      "svuint64_t op"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.D|Ztied.D"
+      },
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UXTH"
+      ],
+      [
+        "MOVPRFX",
+        "UXTH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svexth[_u64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Ztied.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UXTH"
+      ],
+      [
+        "AND"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svexth[_u64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UXTH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svextw[_s64]_m",
+    "arguments": [
+      "svint64_t inactive",
+      "svbool_t pg",
+      "svint64_t op"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.D|Ztied.D"
+      },
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SXTW"
+      ],
+      [
+        "MOVPRFX",
+        "SXTW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svextw[_s64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D|Ztied.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SXTW"
+      ],
+      [
+        "MOVPRFX",
+        "SXTW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svextw[_s64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SXTW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svextw[_u64]_m",
+    "arguments": [
+      "svuint64_t inactive",
+      "svbool_t pg",
+      "svuint64_t op"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.D|Ztied.D"
+      },
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UXTW"
+      ],
+      [
+        "MOVPRFX",
+        "UXTW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svextw[_u64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Ztied.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UXTW"
+      ],
+      [
+        "AND"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svextw[_u64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UXTW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svget2[_f16]",
+    "arguments": [
+      "svfloat16x2_t tuple",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svget2[_f32]",
+    "arguments": [
+      "svfloat32x2_t tuple",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svget2[_f64]",
+    "arguments": [
+      "svfloat64x2_t tuple",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svget2[_s16]",
+    "arguments": [
+      "svint16x2_t tuple",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svget2[_s32]",
+    "arguments": [
+      "svint32x2_t tuple",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svget2[_s64]",
+    "arguments": [
+      "svint64x2_t tuple",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svget2[_s8]",
+    "arguments": [
+      "svint8x2_t tuple",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svget2[_u16]",
+    "arguments": [
+      "svuint16x2_t tuple",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svget2[_u32]",
+    "arguments": [
+      "svuint32x2_t tuple",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svget2[_u64]",
+    "arguments": [
+      "svuint64x2_t tuple",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svget2[_u8]",
+    "arguments": [
+      "svuint8x2_t tuple",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svget3[_f16]",
+    "arguments": [
+      "svfloat16x3_t tuple",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svget3[_f32]",
+    "arguments": [
+      "svfloat32x3_t tuple",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svget3[_f64]",
+    "arguments": [
+      "svfloat64x3_t tuple",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svget3[_s16]",
+    "arguments": [
+      "svint16x3_t tuple",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svget3[_s32]",
+    "arguments": [
+      "svint32x3_t tuple",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svget3[_s64]",
+    "arguments": [
+      "svint64x3_t tuple",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svget3[_s8]",
+    "arguments": [
+      "svint8x3_t tuple",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svget3[_u16]",
+    "arguments": [
+      "svuint16x3_t tuple",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svget3[_u32]",
+    "arguments": [
+      "svuint32x3_t tuple",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svget3[_u64]",
+    "arguments": [
+      "svuint64x3_t tuple",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svget3[_u8]",
+    "arguments": [
+      "svuint8x3_t tuple",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svget4[_f16]",
+    "arguments": [
+      "svfloat16x4_t tuple",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svget4[_f32]",
+    "arguments": [
+      "svfloat32x4_t tuple",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svget4[_f64]",
+    "arguments": [
+      "svfloat64x4_t tuple",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svget4[_s16]",
+    "arguments": [
+      "svint16x4_t tuple",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svget4[_s32]",
+    "arguments": [
+      "svint32x4_t tuple",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svget4[_s64]",
+    "arguments": [
+      "svint64x4_t tuple",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svget4[_s8]",
+    "arguments": [
+      "svint8x4_t tuple",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svget4[_u16]",
+    "arguments": [
+      "svuint16x4_t tuple",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svget4[_u32]",
+    "arguments": [
+      "svuint32x4_t tuple",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svget4[_u64]",
+    "arguments": [
+      "svuint64x4_t tuple",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svget4[_u8]",
+    "arguments": [
+      "svuint8x4_t tuple",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhadd[_n_s16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "int16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SHADD"
+      ],
+      [
+        "MOVPRFX",
+        "SHADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhadd[_n_s16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "int16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]|Ztied2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SHADD"
+      ],
+      [
+        "SHADD"
+      ],
+      [
+        "MOVPRFX",
+        "SHADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhadd[_n_s16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "int16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SHADD"
+      ],
+      [
+        "MOVPRFX",
+        "SHADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhadd[_n_s32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SHADD"
+      ],
+      [
+        "MOVPRFX",
+        "SHADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhadd[_n_s32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]|Ztied2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SHADD"
+      ],
+      [
+        "SHADD"
+      ],
+      [
+        "MOVPRFX",
+        "SHADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhadd[_n_s32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SHADD"
+      ],
+      [
+        "MOVPRFX",
+        "SHADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhadd[_n_s64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SHADD"
+      ],
+      [
+        "MOVPRFX",
+        "SHADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhadd[_n_s64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]|Ztied2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SHADD"
+      ],
+      [
+        "SHADD"
+      ],
+      [
+        "MOVPRFX",
+        "SHADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhadd[_n_s64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SHADD"
+      ],
+      [
+        "MOVPRFX",
+        "SHADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhadd[_n_s8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "int8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SHADD"
+      ],
+      [
+        "MOVPRFX",
+        "SHADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhadd[_n_s8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "int8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]|Ztied2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SHADD"
+      ],
+      [
+        "SHADD"
+      ],
+      [
+        "MOVPRFX",
+        "SHADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhadd[_n_s8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "int8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SHADD"
+      ],
+      [
+        "MOVPRFX",
+        "SHADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhadd[_n_u16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "uint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UHADD"
+      ],
+      [
+        "MOVPRFX",
+        "UHADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhadd[_n_u16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "uint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]|Ztied2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UHADD"
+      ],
+      [
+        "UHADD"
+      ],
+      [
+        "MOVPRFX",
+        "UHADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhadd[_n_u16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "uint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UHADD"
+      ],
+      [
+        "MOVPRFX",
+        "UHADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhadd[_n_u32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UHADD"
+      ],
+      [
+        "MOVPRFX",
+        "UHADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhadd[_n_u32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]|Ztied2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UHADD"
+      ],
+      [
+        "UHADD"
+      ],
+      [
+        "MOVPRFX",
+        "UHADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhadd[_n_u32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UHADD"
+      ],
+      [
+        "MOVPRFX",
+        "UHADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhadd[_n_u64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UHADD"
+      ],
+      [
+        "MOVPRFX",
+        "UHADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhadd[_n_u64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]|Ztied2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UHADD"
+      ],
+      [
+        "UHADD"
+      ],
+      [
+        "MOVPRFX",
+        "UHADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhadd[_n_u64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UHADD"
+      ],
+      [
+        "MOVPRFX",
+        "UHADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhadd[_n_u8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "uint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UHADD"
+      ],
+      [
+        "MOVPRFX",
+        "UHADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhadd[_n_u8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "uint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]|Ztied2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UHADD"
+      ],
+      [
+        "UHADD"
+      ],
+      [
+        "MOVPRFX",
+        "UHADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhadd[_n_u8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "uint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UHADD"
+      ],
+      [
+        "MOVPRFX",
+        "UHADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhadd[_s16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SHADD"
+      ],
+      [
+        "MOVPRFX",
+        "SHADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhadd[_s16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H|Ztied2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SHADD"
+      ],
+      [
+        "SHADD"
+      ],
+      [
+        "MOVPRFX",
+        "SHADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhadd[_s16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SHADD"
+      ],
+      [
+        "MOVPRFX",
+        "SHADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhadd[_s32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SHADD"
+      ],
+      [
+        "MOVPRFX",
+        "SHADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhadd[_s32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SHADD"
+      ],
+      [
+        "SHADD"
+      ],
+      [
+        "MOVPRFX",
+        "SHADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhadd[_s32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SHADD"
+      ],
+      [
+        "MOVPRFX",
+        "SHADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhadd[_s64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SHADD"
+      ],
+      [
+        "MOVPRFX",
+        "SHADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhadd[_s64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SHADD"
+      ],
+      [
+        "SHADD"
+      ],
+      [
+        "MOVPRFX",
+        "SHADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhadd[_s64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SHADD"
+      ],
+      [
+        "MOVPRFX",
+        "SHADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhadd[_s8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SHADD"
+      ],
+      [
+        "MOVPRFX",
+        "SHADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhadd[_s8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B|Ztied2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SHADD"
+      ],
+      [
+        "SHADD"
+      ],
+      [
+        "MOVPRFX",
+        "SHADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhadd[_s8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SHADD"
+      ],
+      [
+        "MOVPRFX",
+        "SHADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhadd[_u16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UHADD"
+      ],
+      [
+        "MOVPRFX",
+        "UHADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhadd[_u16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H|Ztied2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UHADD"
+      ],
+      [
+        "UHADD"
+      ],
+      [
+        "MOVPRFX",
+        "UHADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhadd[_u16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UHADD"
+      ],
+      [
+        "MOVPRFX",
+        "UHADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhadd[_u32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UHADD"
+      ],
+      [
+        "MOVPRFX",
+        "UHADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhadd[_u32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UHADD"
+      ],
+      [
+        "UHADD"
+      ],
+      [
+        "MOVPRFX",
+        "UHADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhadd[_u32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UHADD"
+      ],
+      [
+        "MOVPRFX",
+        "UHADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhadd[_u64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UHADD"
+      ],
+      [
+        "MOVPRFX",
+        "UHADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhadd[_u64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UHADD"
+      ],
+      [
+        "UHADD"
+      ],
+      [
+        "MOVPRFX",
+        "UHADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhadd[_u64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UHADD"
+      ],
+      [
+        "MOVPRFX",
+        "UHADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhadd[_u8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UHADD"
+      ],
+      [
+        "MOVPRFX",
+        "UHADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhadd[_u8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B|Ztied2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UHADD"
+      ],
+      [
+        "UHADD"
+      ],
+      [
+        "MOVPRFX",
+        "UHADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhadd[_u8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UHADD"
+      ],
+      [
+        "MOVPRFX",
+        "UHADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhistcnt[_s32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "HISTCNT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhistcnt[_s64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "HISTCNT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhistcnt[_u32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "HISTCNT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhistcnt[_u64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "HISTCNT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhistseg[_s8]",
+    "arguments": [
+      "svint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "HISTSEG"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhistseg[_u8]",
+    "arguments": [
+      "svuint8_t op1",
+      "svuint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "HISTSEG"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhsub[_n_s16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "int16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SHSUB"
+      ],
+      [
+        "MOVPRFX",
+        "SHSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhsub[_n_s16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "int16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]|Ztied2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SHSUB"
+      ],
+      [
+        "SHSUBR"
+      ],
+      [
+        "MOVPRFX",
+        "SHSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhsub[_n_s16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "int16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SHSUB"
+      ],
+      [
+        "MOVPRFX",
+        "SHSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhsub[_n_s32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SHSUB"
+      ],
+      [
+        "MOVPRFX",
+        "SHSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhsub[_n_s32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]|Ztied2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SHSUB"
+      ],
+      [
+        "SHSUBR"
+      ],
+      [
+        "MOVPRFX",
+        "SHSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhsub[_n_s32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SHSUB"
+      ],
+      [
+        "MOVPRFX",
+        "SHSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhsub[_n_s64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SHSUB"
+      ],
+      [
+        "MOVPRFX",
+        "SHSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhsub[_n_s64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]|Ztied2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SHSUB"
+      ],
+      [
+        "SHSUBR"
+      ],
+      [
+        "MOVPRFX",
+        "SHSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhsub[_n_s64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SHSUB"
+      ],
+      [
+        "MOVPRFX",
+        "SHSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhsub[_n_s8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "int8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SHSUB"
+      ],
+      [
+        "MOVPRFX",
+        "SHSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhsub[_n_s8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "int8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]|Ztied2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SHSUB"
+      ],
+      [
+        "SHSUBR"
+      ],
+      [
+        "MOVPRFX",
+        "SHSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhsub[_n_s8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "int8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SHSUB"
+      ],
+      [
+        "MOVPRFX",
+        "SHSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhsub[_n_u16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "uint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UHSUB"
+      ],
+      [
+        "MOVPRFX",
+        "UHSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhsub[_n_u16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "uint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]|Ztied2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UHSUB"
+      ],
+      [
+        "UHSUBR"
+      ],
+      [
+        "MOVPRFX",
+        "UHSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhsub[_n_u16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "uint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UHSUB"
+      ],
+      [
+        "MOVPRFX",
+        "UHSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhsub[_n_u32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UHSUB"
+      ],
+      [
+        "MOVPRFX",
+        "UHSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhsub[_n_u32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]|Ztied2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UHSUB"
+      ],
+      [
+        "UHSUBR"
+      ],
+      [
+        "MOVPRFX",
+        "UHSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhsub[_n_u32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UHSUB"
+      ],
+      [
+        "MOVPRFX",
+        "UHSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhsub[_n_u64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UHSUB"
+      ],
+      [
+        "MOVPRFX",
+        "UHSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhsub[_n_u64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]|Ztied2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UHSUB"
+      ],
+      [
+        "UHSUBR"
+      ],
+      [
+        "MOVPRFX",
+        "UHSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhsub[_n_u64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UHSUB"
+      ],
+      [
+        "MOVPRFX",
+        "UHSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhsub[_n_u8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "uint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UHSUB"
+      ],
+      [
+        "MOVPRFX",
+        "UHSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhsub[_n_u8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "uint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]|Ztied2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UHSUB"
+      ],
+      [
+        "UHSUBR"
+      ],
+      [
+        "MOVPRFX",
+        "UHSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhsub[_n_u8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "uint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UHSUB"
+      ],
+      [
+        "MOVPRFX",
+        "UHSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhsub[_s16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SHSUB"
+      ],
+      [
+        "MOVPRFX",
+        "SHSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhsub[_s16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H|Ztied2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SHSUB"
+      ],
+      [
+        "SHSUBR"
+      ],
+      [
+        "MOVPRFX",
+        "SHSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhsub[_s16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SHSUB"
+      ],
+      [
+        "MOVPRFX",
+        "SHSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhsub[_s32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SHSUB"
+      ],
+      [
+        "MOVPRFX",
+        "SHSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhsub[_s32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SHSUB"
+      ],
+      [
+        "SHSUBR"
+      ],
+      [
+        "MOVPRFX",
+        "SHSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhsub[_s32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SHSUB"
+      ],
+      [
+        "MOVPRFX",
+        "SHSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhsub[_s64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SHSUB"
+      ],
+      [
+        "MOVPRFX",
+        "SHSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhsub[_s64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SHSUB"
+      ],
+      [
+        "SHSUBR"
+      ],
+      [
+        "MOVPRFX",
+        "SHSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhsub[_s64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SHSUB"
+      ],
+      [
+        "MOVPRFX",
+        "SHSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhsub[_s8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SHSUB"
+      ],
+      [
+        "MOVPRFX",
+        "SHSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhsub[_s8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B|Ztied2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SHSUB"
+      ],
+      [
+        "SHSUBR"
+      ],
+      [
+        "MOVPRFX",
+        "SHSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhsub[_s8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SHSUB"
+      ],
+      [
+        "MOVPRFX",
+        "SHSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhsub[_u16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UHSUB"
+      ],
+      [
+        "MOVPRFX",
+        "UHSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhsub[_u16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H|Ztied2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UHSUB"
+      ],
+      [
+        "UHSUBR"
+      ],
+      [
+        "MOVPRFX",
+        "UHSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhsub[_u16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UHSUB"
+      ],
+      [
+        "MOVPRFX",
+        "UHSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhsub[_u32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UHSUB"
+      ],
+      [
+        "MOVPRFX",
+        "UHSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhsub[_u32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UHSUB"
+      ],
+      [
+        "UHSUBR"
+      ],
+      [
+        "MOVPRFX",
+        "UHSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhsub[_u32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UHSUB"
+      ],
+      [
+        "MOVPRFX",
+        "UHSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhsub[_u64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UHSUB"
+      ],
+      [
+        "MOVPRFX",
+        "UHSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhsub[_u64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UHSUB"
+      ],
+      [
+        "UHSUBR"
+      ],
+      [
+        "MOVPRFX",
+        "UHSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhsub[_u64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UHSUB"
+      ],
+      [
+        "MOVPRFX",
+        "UHSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhsub[_u8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UHSUB"
+      ],
+      [
+        "MOVPRFX",
+        "UHSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhsub[_u8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B|Ztied2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UHSUB"
+      ],
+      [
+        "UHSUBR"
+      ],
+      [
+        "MOVPRFX",
+        "UHSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhsub[_u8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UHSUB"
+      ],
+      [
+        "MOVPRFX",
+        "UHSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhsubr[_n_s16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "int16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SHSUBR"
+      ],
+      [
+        "MOVPRFX",
+        "SHSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhsubr[_n_s16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "int16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]|Ztied2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SHSUBR"
+      ],
+      [
+        "SHSUB"
+      ],
+      [
+        "MOVPRFX",
+        "SHSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhsubr[_n_s16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "int16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SHSUBR"
+      ],
+      [
+        "MOVPRFX",
+        "SHSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhsubr[_n_s32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SHSUBR"
+      ],
+      [
+        "MOVPRFX",
+        "SHSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhsubr[_n_s32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]|Ztied2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SHSUBR"
+      ],
+      [
+        "SHSUB"
+      ],
+      [
+        "MOVPRFX",
+        "SHSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhsubr[_n_s32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SHSUBR"
+      ],
+      [
+        "MOVPRFX",
+        "SHSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhsubr[_n_s64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SHSUBR"
+      ],
+      [
+        "MOVPRFX",
+        "SHSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhsubr[_n_s64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]|Ztied2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SHSUBR"
+      ],
+      [
+        "SHSUB"
+      ],
+      [
+        "MOVPRFX",
+        "SHSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhsubr[_n_s64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SHSUBR"
+      ],
+      [
+        "MOVPRFX",
+        "SHSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhsubr[_n_s8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "int8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SHSUBR"
+      ],
+      [
+        "MOVPRFX",
+        "SHSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhsubr[_n_s8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "int8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]|Ztied2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SHSUBR"
+      ],
+      [
+        "SHSUB"
+      ],
+      [
+        "MOVPRFX",
+        "SHSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhsubr[_n_s8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "int8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SHSUBR"
+      ],
+      [
+        "MOVPRFX",
+        "SHSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhsubr[_n_u16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "uint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UHSUBR"
+      ],
+      [
+        "MOVPRFX",
+        "UHSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhsubr[_n_u16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "uint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]|Ztied2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UHSUBR"
+      ],
+      [
+        "UHSUB"
+      ],
+      [
+        "MOVPRFX",
+        "UHSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhsubr[_n_u16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "uint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UHSUBR"
+      ],
+      [
+        "MOVPRFX",
+        "UHSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhsubr[_n_u32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UHSUBR"
+      ],
+      [
+        "MOVPRFX",
+        "UHSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhsubr[_n_u32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]|Ztied2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UHSUBR"
+      ],
+      [
+        "UHSUB"
+      ],
+      [
+        "MOVPRFX",
+        "UHSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhsubr[_n_u32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UHSUBR"
+      ],
+      [
+        "MOVPRFX",
+        "UHSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhsubr[_n_u64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UHSUBR"
+      ],
+      [
+        "MOVPRFX",
+        "UHSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhsubr[_n_u64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]|Ztied2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UHSUBR"
+      ],
+      [
+        "UHSUB"
+      ],
+      [
+        "MOVPRFX",
+        "UHSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhsubr[_n_u64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UHSUBR"
+      ],
+      [
+        "MOVPRFX",
+        "UHSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhsubr[_n_u8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "uint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UHSUBR"
+      ],
+      [
+        "MOVPRFX",
+        "UHSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhsubr[_n_u8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "uint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]|Ztied2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UHSUBR"
+      ],
+      [
+        "UHSUB"
+      ],
+      [
+        "MOVPRFX",
+        "UHSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhsubr[_n_u8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "uint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UHSUBR"
+      ],
+      [
+        "MOVPRFX",
+        "UHSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhsubr[_s16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SHSUBR"
+      ],
+      [
+        "MOVPRFX",
+        "SHSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhsubr[_s16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H|Ztied2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SHSUBR"
+      ],
+      [
+        "SHSUB"
+      ],
+      [
+        "MOVPRFX",
+        "SHSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhsubr[_s16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SHSUBR"
+      ],
+      [
+        "MOVPRFX",
+        "SHSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhsubr[_s32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SHSUBR"
+      ],
+      [
+        "MOVPRFX",
+        "SHSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhsubr[_s32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SHSUBR"
+      ],
+      [
+        "SHSUB"
+      ],
+      [
+        "MOVPRFX",
+        "SHSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhsubr[_s32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SHSUBR"
+      ],
+      [
+        "MOVPRFX",
+        "SHSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhsubr[_s64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SHSUBR"
+      ],
+      [
+        "MOVPRFX",
+        "SHSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhsubr[_s64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SHSUBR"
+      ],
+      [
+        "SHSUB"
+      ],
+      [
+        "MOVPRFX",
+        "SHSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhsubr[_s64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SHSUBR"
+      ],
+      [
+        "MOVPRFX",
+        "SHSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhsubr[_s8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SHSUBR"
+      ],
+      [
+        "MOVPRFX",
+        "SHSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhsubr[_s8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B|Ztied2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SHSUBR"
+      ],
+      [
+        "SHSUB"
+      ],
+      [
+        "MOVPRFX",
+        "SHSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhsubr[_s8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SHSUBR"
+      ],
+      [
+        "MOVPRFX",
+        "SHSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhsubr[_u16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UHSUBR"
+      ],
+      [
+        "MOVPRFX",
+        "UHSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhsubr[_u16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H|Ztied2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UHSUBR"
+      ],
+      [
+        "UHSUB"
+      ],
+      [
+        "MOVPRFX",
+        "UHSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhsubr[_u16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UHSUBR"
+      ],
+      [
+        "MOVPRFX",
+        "UHSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhsubr[_u32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UHSUBR"
+      ],
+      [
+        "MOVPRFX",
+        "UHSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhsubr[_u32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UHSUBR"
+      ],
+      [
+        "UHSUB"
+      ],
+      [
+        "MOVPRFX",
+        "UHSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhsubr[_u32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UHSUBR"
+      ],
+      [
+        "MOVPRFX",
+        "UHSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhsubr[_u64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UHSUBR"
+      ],
+      [
+        "MOVPRFX",
+        "UHSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhsubr[_u64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UHSUBR"
+      ],
+      [
+        "UHSUB"
+      ],
+      [
+        "MOVPRFX",
+        "UHSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhsubr[_u64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UHSUBR"
+      ],
+      [
+        "MOVPRFX",
+        "UHSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhsubr[_u8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UHSUBR"
+      ],
+      [
+        "MOVPRFX",
+        "UHSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhsubr[_u8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B|Ztied2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UHSUBR"
+      ],
+      [
+        "UHSUB"
+      ],
+      [
+        "MOVPRFX",
+        "UHSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svhsubr[_u8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UHSUBR"
+      ],
+      [
+        "MOVPRFX",
+        "UHSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svindex_s16",
+    "arguments": [
+      "int16_t base",
+      "int16_t step"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Wbase"
+      },
+      "step": {
+        "register": "Wstep"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "INDEX"
+      ],
+      [
+        "INDEX"
+      ],
+      [
+        "INDEX"
+      ],
+      [
+        "INDEX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svindex_s32",
+    "arguments": [
+      "int32_t base",
+      "int32_t step"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Wbase"
+      },
+      "step": {
+        "register": "Wstep"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "INDEX"
+      ],
+      [
+        "INDEX"
+      ],
+      [
+        "INDEX"
+      ],
+      [
+        "INDEX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svindex_s64",
+    "arguments": [
+      "int64_t base",
+      "int64_t step"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "step": {
+        "register": "Xstep"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "INDEX"
+      ],
+      [
+        "INDEX"
+      ],
+      [
+        "INDEX"
+      ],
+      [
+        "INDEX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svindex_s8",
+    "arguments": [
+      "int8_t base",
+      "int8_t step"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Wbase"
+      },
+      "step": {
+        "register": "Wstep"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "INDEX"
+      ],
+      [
+        "INDEX"
+      ],
+      [
+        "INDEX"
+      ],
+      [
+        "INDEX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svindex_u16",
+    "arguments": [
+      "uint16_t base",
+      "uint16_t step"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Wbase"
+      },
+      "step": {
+        "register": "Wstep"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "INDEX"
+      ],
+      [
+        "INDEX"
+      ],
+      [
+        "INDEX"
+      ],
+      [
+        "INDEX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svindex_u32",
+    "arguments": [
+      "uint32_t base",
+      "uint32_t step"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Wbase"
+      },
+      "step": {
+        "register": "Wstep"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "INDEX"
+      ],
+      [
+        "INDEX"
+      ],
+      [
+        "INDEX"
+      ],
+      [
+        "INDEX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svindex_u64",
+    "arguments": [
+      "uint64_t base",
+      "uint64_t step"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "step": {
+        "register": "Xstep"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "INDEX"
+      ],
+      [
+        "INDEX"
+      ],
+      [
+        "INDEX"
+      ],
+      [
+        "INDEX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svindex_u8",
+    "arguments": [
+      "uint8_t base",
+      "uint8_t step"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Wbase"
+      },
+      "step": {
+        "register": "Wstep"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "INDEX"
+      ],
+      [
+        "INDEX"
+      ],
+      [
+        "INDEX"
+      ],
+      [
+        "INDEX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svinsr[_n_f16]",
+    "arguments": [
+      "svfloat16_t op1",
+      "float16_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Ztied1.H"
+      },
+      "op2": {
+        "register": "Hop2|Wop2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "INSR"
+      ],
+      [
+        "INSR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svinsr[_n_f32]",
+    "arguments": [
+      "svfloat32_t op1",
+      "float32_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Ztied1.S"
+      },
+      "op2": {
+        "register": "Sop2|Wop2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "INSR"
+      ],
+      [
+        "INSR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svinsr[_n_f64]",
+    "arguments": [
+      "svfloat64_t op1",
+      "float64_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Ztied1.D"
+      },
+      "op2": {
+        "register": "Dop2|Xop2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "INSR"
+      ],
+      [
+        "INSR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svinsr[_n_s16]",
+    "arguments": [
+      "svint16_t op1",
+      "int16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Ztied1.H"
+      },
+      "op2": {
+        "register": "Hop2|Wop2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "INSR"
+      ],
+      [
+        "INSR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svinsr[_n_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Ztied1.S"
+      },
+      "op2": {
+        "register": "Sop2|Wop2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "INSR"
+      ],
+      [
+        "INSR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svinsr[_n_s64]",
+    "arguments": [
+      "svint64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Ztied1.D"
+      },
+      "op2": {
+        "register": "Dop2|Xop2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "INSR"
+      ],
+      [
+        "INSR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svinsr[_n_s8]",
+    "arguments": [
+      "svint8_t op1",
+      "int8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Ztied1.B"
+      },
+      "op2": {
+        "register": "Bop2|Wop2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "INSR"
+      ],
+      [
+        "INSR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svinsr[_n_u16]",
+    "arguments": [
+      "svuint16_t op1",
+      "uint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Ztied1.H"
+      },
+      "op2": {
+        "register": "Hop2|Wop2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "INSR"
+      ],
+      [
+        "INSR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svinsr[_n_u32]",
+    "arguments": [
+      "svuint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Ztied1.S"
+      },
+      "op2": {
+        "register": "Sop2|Wop2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "INSR"
+      ],
+      [
+        "INSR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svinsr[_n_u64]",
+    "arguments": [
+      "svuint64_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Ztied1.D"
+      },
+      "op2": {
+        "register": "Dop2|Xop2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "INSR"
+      ],
+      [
+        "INSR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svinsr[_n_u8]",
+    "arguments": [
+      "svuint8_t op1",
+      "uint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Ztied1.B"
+      },
+      "op2": {
+        "register": "Bop2|Wop2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "INSR"
+      ],
+      [
+        "INSR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlasta[_f16]",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op"
+    ],
+    "return_type": {
+      "value": "float16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LASTA"
+      ],
+      [
+        "LASTA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlasta[_f32]",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op"
+    ],
+    "return_type": {
+      "value": "float32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LASTA"
+      ],
+      [
+        "LASTA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlasta[_f64]",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op"
+    ],
+    "return_type": {
+      "value": "float64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LASTA"
+      ],
+      [
+        "LASTA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlasta[_s16]",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op"
+    ],
+    "return_type": {
+      "value": "int16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LASTA"
+      ],
+      [
+        "LASTA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlasta[_s32]",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op"
+    ],
+    "return_type": {
+      "value": "int32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LASTA"
+      ],
+      [
+        "LASTA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlasta[_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op"
+    ],
+    "return_type": {
+      "value": "int64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LASTA"
+      ],
+      [
+        "LASTA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlasta[_s8]",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op"
+    ],
+    "return_type": {
+      "value": "int8_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LASTA"
+      ],
+      [
+        "LASTA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlasta[_u16]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op"
+    ],
+    "return_type": {
+      "value": "uint16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LASTA"
+      ],
+      [
+        "LASTA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlasta[_u32]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op"
+    ],
+    "return_type": {
+      "value": "uint32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LASTA"
+      ],
+      [
+        "LASTA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlasta[_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LASTA"
+      ],
+      [
+        "LASTA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlasta[_u8]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op"
+    ],
+    "return_type": {
+      "value": "uint8_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LASTA"
+      ],
+      [
+        "LASTA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlastb[_f16]",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op"
+    ],
+    "return_type": {
+      "value": "float16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LASTB"
+      ],
+      [
+        "LASTB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlastb[_f32]",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op"
+    ],
+    "return_type": {
+      "value": "float32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LASTB"
+      ],
+      [
+        "LASTB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlastb[_f64]",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op"
+    ],
+    "return_type": {
+      "value": "float64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LASTB"
+      ],
+      [
+        "LASTB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlastb[_s16]",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op"
+    ],
+    "return_type": {
+      "value": "int16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LASTB"
+      ],
+      [
+        "LASTB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlastb[_s32]",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op"
+    ],
+    "return_type": {
+      "value": "int32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LASTB"
+      ],
+      [
+        "LASTB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlastb[_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op"
+    ],
+    "return_type": {
+      "value": "int64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LASTB"
+      ],
+      [
+        "LASTB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlastb[_s8]",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op"
+    ],
+    "return_type": {
+      "value": "int8_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LASTB"
+      ],
+      [
+        "LASTB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlastb[_u16]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op"
+    ],
+    "return_type": {
+      "value": "uint16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LASTB"
+      ],
+      [
+        "LASTB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlastb[_u32]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op"
+    ],
+    "return_type": {
+      "value": "uint32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LASTB"
+      ],
+      [
+        "LASTB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlastb[_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LASTB"
+      ],
+      [
+        "LASTB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlastb[_u8]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op"
+    ],
+    "return_type": {
+      "value": "uint8_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LASTB"
+      ],
+      [
+        "LASTB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1[_f16]",
+    "arguments": [
+      "svbool_t pg",
+      "const float16_t *base"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1H"
+      ],
+      [
+        "LD1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1[_f32]",
+    "arguments": [
+      "svbool_t pg",
+      "const float32_t *base"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1W"
+      ],
+      [
+        "LD1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1[_f64]",
+    "arguments": [
+      "svbool_t pg",
+      "const float64_t *base"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1D"
+      ],
+      [
+        "LD1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1[_s16]",
+    "arguments": [
+      "svbool_t pg",
+      "const int16_t *base"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1H"
+      ],
+      [
+        "LD1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1[_s32]",
+    "arguments": [
+      "svbool_t pg",
+      "const int32_t *base"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1W"
+      ],
+      [
+        "LD1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1[_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "const int64_t *base"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1D"
+      ],
+      [
+        "LD1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1[_s8]",
+    "arguments": [
+      "svbool_t pg",
+      "const int8_t *base"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1B"
+      ],
+      [
+        "LD1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1[_u16]",
+    "arguments": [
+      "svbool_t pg",
+      "const uint16_t *base"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1H"
+      ],
+      [
+        "LD1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1[_u32]",
+    "arguments": [
+      "svbool_t pg",
+      "const uint32_t *base"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1W"
+      ],
+      [
+        "LD1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1[_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "const uint64_t *base"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1D"
+      ],
+      [
+        "LD1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1[_u8]",
+    "arguments": [
+      "svbool_t pg",
+      "const uint8_t *base"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1B"
+      ],
+      [
+        "LD1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1_gather[_u32base]_f32",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1_gather[_u32base]_index_f32",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "int64_t index"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "index * 4": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1W"
+      ],
+      [
+        "LD1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1_gather[_u32base]_index_s32",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "int64_t index"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "index * 4": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1W"
+      ],
+      [
+        "LD1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1_gather[_u32base]_index_u32",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "int64_t index"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "index * 4": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1W"
+      ],
+      [
+        "LD1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1_gather[_u32base]_offset_f32",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "int64_t offset"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1W"
+      ],
+      [
+        "LD1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1_gather[_u32base]_offset_s32",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "int64_t offset"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1W"
+      ],
+      [
+        "LD1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1_gather[_u32base]_offset_u32",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "int64_t offset"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1W"
+      ],
+      [
+        "LD1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1_gather[_u32base]_s32",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1_gather[_u32base]_u32",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1_gather[_u64base]_f64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1_gather[_u64base]_index_f64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t index"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "index * 8": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1D"
+      ],
+      [
+        "LD1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1_gather[_u64base]_index_s64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t index"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "index * 8": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1D"
+      ],
+      [
+        "LD1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1_gather[_u64base]_index_u64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t index"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "index * 8": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1D"
+      ],
+      [
+        "LD1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1_gather[_u64base]_offset_f64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t offset"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1D"
+      ],
+      [
+        "LD1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1_gather[_u64base]_offset_s64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t offset"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1D"
+      ],
+      [
+        "LD1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1_gather[_u64base]_offset_u64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t offset"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1D"
+      ],
+      [
+        "LD1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1_gather[_u64base]_s64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1_gather[_u64base]_u64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1_gather_[s32]index[_f32]",
+    "arguments": [
+      "svbool_t pg",
+      "const float32_t *base",
+      "svint32_t indices"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices": {
+        "register": "Zindices.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1_gather_[s32]index[_s32]",
+    "arguments": [
+      "svbool_t pg",
+      "const int32_t *base",
+      "svint32_t indices"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices": {
+        "register": "Zindices.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1_gather_[s32]index[_u32]",
+    "arguments": [
+      "svbool_t pg",
+      "const uint32_t *base",
+      "svint32_t indices"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices": {
+        "register": "Zindices.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1_gather_[s32]offset[_f32]",
+    "arguments": [
+      "svbool_t pg",
+      "const float32_t *base",
+      "svint32_t offsets"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1_gather_[s32]offset[_s32]",
+    "arguments": [
+      "svbool_t pg",
+      "const int32_t *base",
+      "svint32_t offsets"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1_gather_[s32]offset[_u32]",
+    "arguments": [
+      "svbool_t pg",
+      "const uint32_t *base",
+      "svint32_t offsets"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1_gather_[s64]index[_f64]",
+    "arguments": [
+      "svbool_t pg",
+      "const float64_t *base",
+      "svint64_t indices"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices": {
+        "register": "Zindices.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1_gather_[s64]index[_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "const int64_t *base",
+      "svint64_t indices"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices": {
+        "register": "Zindices.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1_gather_[s64]index[_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "const uint64_t *base",
+      "svint64_t indices"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices": {
+        "register": "Zindices.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1_gather_[s64]offset[_f64]",
+    "arguments": [
+      "svbool_t pg",
+      "const float64_t *base",
+      "svint64_t offsets"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1_gather_[s64]offset[_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "const int64_t *base",
+      "svint64_t offsets"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1_gather_[s64]offset[_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "const uint64_t *base",
+      "svint64_t offsets"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1_gather_[u32]index[_f32]",
+    "arguments": [
+      "svbool_t pg",
+      "const float32_t *base",
+      "svuint32_t indices"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices": {
+        "register": "Zindices.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1_gather_[u32]index[_s32]",
+    "arguments": [
+      "svbool_t pg",
+      "const int32_t *base",
+      "svuint32_t indices"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices": {
+        "register": "Zindices.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1_gather_[u32]index[_u32]",
+    "arguments": [
+      "svbool_t pg",
+      "const uint32_t *base",
+      "svuint32_t indices"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices": {
+        "register": "Zindices.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1_gather_[u32]offset[_f32]",
+    "arguments": [
+      "svbool_t pg",
+      "const float32_t *base",
+      "svuint32_t offsets"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1_gather_[u32]offset[_s32]",
+    "arguments": [
+      "svbool_t pg",
+      "const int32_t *base",
+      "svuint32_t offsets"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1_gather_[u32]offset[_u32]",
+    "arguments": [
+      "svbool_t pg",
+      "const uint32_t *base",
+      "svuint32_t offsets"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1_gather_[u64]index[_f64]",
+    "arguments": [
+      "svbool_t pg",
+      "const float64_t *base",
+      "svuint64_t indices"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices": {
+        "register": "Zindices.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1_gather_[u64]index[_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "const int64_t *base",
+      "svuint64_t indices"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices": {
+        "register": "Zindices.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1_gather_[u64]index[_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "const uint64_t *base",
+      "svuint64_t indices"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices": {
+        "register": "Zindices.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1_gather_[u64]offset[_f64]",
+    "arguments": [
+      "svbool_t pg",
+      "const float64_t *base",
+      "svuint64_t offsets"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1_gather_[u64]offset[_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "const int64_t *base",
+      "svuint64_t offsets"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1_gather_[u64]offset[_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "const uint64_t *base",
+      "svuint64_t offsets"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1_vnum[_f16]",
+    "arguments": [
+      "svbool_t pg",
+      "const float16_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.H"
+      },
+      "vnum * svcnth()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1H"
+      ],
+      [
+        "LD1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1_vnum[_f32]",
+    "arguments": [
+      "svbool_t pg",
+      "const float32_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.S"
+      },
+      "vnum * svcntw()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1W"
+      ],
+      [
+        "LD1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1_vnum[_f64]",
+    "arguments": [
+      "svbool_t pg",
+      "const float64_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      },
+      "vnum * svcntd()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1D"
+      ],
+      [
+        "LD1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1_vnum[_s16]",
+    "arguments": [
+      "svbool_t pg",
+      "const int16_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.H"
+      },
+      "vnum * svcnth()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1H"
+      ],
+      [
+        "LD1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1_vnum[_s32]",
+    "arguments": [
+      "svbool_t pg",
+      "const int32_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.S"
+      },
+      "vnum * svcntw()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1W"
+      ],
+      [
+        "LD1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1_vnum[_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "const int64_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      },
+      "vnum * svcntd()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1D"
+      ],
+      [
+        "LD1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1_vnum[_s8]",
+    "arguments": [
+      "svbool_t pg",
+      "const int8_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.B"
+      },
+      "vnum * svcntb()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1B"
+      ],
+      [
+        "LD1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1_vnum[_u16]",
+    "arguments": [
+      "svbool_t pg",
+      "const uint16_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.H"
+      },
+      "vnum * svcnth()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1H"
+      ],
+      [
+        "LD1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1_vnum[_u32]",
+    "arguments": [
+      "svbool_t pg",
+      "const uint32_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.S"
+      },
+      "vnum * svcntw()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1W"
+      ],
+      [
+        "LD1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1_vnum[_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "const uint64_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      },
+      "vnum * svcntd()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1D"
+      ],
+      [
+        "LD1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1_vnum[_u8]",
+    "arguments": [
+      "svbool_t pg",
+      "const uint8_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.B"
+      },
+      "vnum * svcntb()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1B"
+      ],
+      [
+        "LD1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1ro[_f16]",
+    "arguments": [
+      "svbool_t pg",
+      "const float16_t *base"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1ROH"
+      ],
+      [
+        "LD1ROH"
+      ],
+      [
+        "LD1ROH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1ro[_f32]",
+    "arguments": [
+      "svbool_t pg",
+      "const float32_t *base"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1ROW"
+      ],
+      [
+        "LD1ROW"
+      ],
+      [
+        "LD1ROW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1ro[_f64]",
+    "arguments": [
+      "svbool_t pg",
+      "const float64_t *base"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1ROD"
+      ],
+      [
+        "LD1ROD"
+      ],
+      [
+        "LD1ROD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1ro[_s16]",
+    "arguments": [
+      "svbool_t pg",
+      "const int16_t *base"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1ROH"
+      ],
+      [
+        "LD1ROH"
+      ],
+      [
+        "LD1ROH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1ro[_s32]",
+    "arguments": [
+      "svbool_t pg",
+      "const int32_t *base"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1ROW"
+      ],
+      [
+        "LD1ROW"
+      ],
+      [
+        "LD1ROW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1ro[_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "const int64_t *base"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1ROD"
+      ],
+      [
+        "LD1ROD"
+      ],
+      [
+        "LD1ROD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1ro[_s8]",
+    "arguments": [
+      "svbool_t pg",
+      "const int8_t *base"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1ROB"
+      ],
+      [
+        "LD1ROB"
+      ],
+      [
+        "LD1ROB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1ro[_u16]",
+    "arguments": [
+      "svbool_t pg",
+      "const uint16_t *base"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1ROH"
+      ],
+      [
+        "LD1ROH"
+      ],
+      [
+        "LD1ROH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1ro[_u32]",
+    "arguments": [
+      "svbool_t pg",
+      "const uint32_t *base"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1ROW"
+      ],
+      [
+        "LD1ROW"
+      ],
+      [
+        "LD1ROW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1ro[_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "const uint64_t *base"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1ROD"
+      ],
+      [
+        "LD1ROD"
+      ],
+      [
+        "LD1ROD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1ro[_u8]",
+    "arguments": [
+      "svbool_t pg",
+      "const uint8_t *base"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1ROB"
+      ],
+      [
+        "LD1ROB"
+      ],
+      [
+        "LD1ROB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1rq[_f16]",
+    "arguments": [
+      "svbool_t pg",
+      "const float16_t *base"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1RQH"
+      ],
+      [
+        "LD1RQH"
+      ],
+      [
+        "LD1RQH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1rq[_f32]",
+    "arguments": [
+      "svbool_t pg",
+      "const float32_t *base"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1RQW"
+      ],
+      [
+        "LD1RQW"
+      ],
+      [
+        "LD1RQW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1rq[_f64]",
+    "arguments": [
+      "svbool_t pg",
+      "const float64_t *base"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1RQD"
+      ],
+      [
+        "LD1RQD"
+      ],
+      [
+        "LD1RQD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1rq[_s16]",
+    "arguments": [
+      "svbool_t pg",
+      "const int16_t *base"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1RQH"
+      ],
+      [
+        "LD1RQH"
+      ],
+      [
+        "LD1RQH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1rq[_s32]",
+    "arguments": [
+      "svbool_t pg",
+      "const int32_t *base"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1RQW"
+      ],
+      [
+        "LD1RQW"
+      ],
+      [
+        "LD1RQW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1rq[_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "const int64_t *base"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1RQD"
+      ],
+      [
+        "LD1RQD"
+      ],
+      [
+        "LD1RQD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1rq[_s8]",
+    "arguments": [
+      "svbool_t pg",
+      "const int8_t *base"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1RQB"
+      ],
+      [
+        "LD1RQB"
+      ],
+      [
+        "LD1RQB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1rq[_u16]",
+    "arguments": [
+      "svbool_t pg",
+      "const uint16_t *base"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1RQH"
+      ],
+      [
+        "LD1RQH"
+      ],
+      [
+        "LD1RQH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1rq[_u32]",
+    "arguments": [
+      "svbool_t pg",
+      "const uint32_t *base"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1RQW"
+      ],
+      [
+        "LD1RQW"
+      ],
+      [
+        "LD1RQW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1rq[_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "const uint64_t *base"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1RQD"
+      ],
+      [
+        "LD1RQD"
+      ],
+      [
+        "LD1RQD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1rq[_u8]",
+    "arguments": [
+      "svbool_t pg",
+      "const uint8_t *base"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1RQB"
+      ],
+      [
+        "LD1RQB"
+      ],
+      [
+        "LD1RQB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1sb_gather[_u32base]_offset_s32",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "int64_t offset"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1SB"
+      ],
+      [
+        "LD1SB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1sb_gather[_u32base]_offset_u32",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "int64_t offset"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1SB"
+      ],
+      [
+        "LD1SB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1sb_gather[_u32base]_s32",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1SB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1sb_gather[_u32base]_u32",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1SB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1sb_gather[_u64base]_offset_s64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t offset"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1SB"
+      ],
+      [
+        "LD1SB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1sb_gather[_u64base]_offset_u64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t offset"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1SB"
+      ],
+      [
+        "LD1SB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1sb_gather[_u64base]_s64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1SB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1sb_gather[_u64base]_u64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1SB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1sb_gather_[s32]offset_s32",
+    "arguments": [
+      "svbool_t pg",
+      "const int8_t *base",
+      "svint32_t offsets"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1SB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1sb_gather_[s32]offset_u32",
+    "arguments": [
+      "svbool_t pg",
+      "const int8_t *base",
+      "svint32_t offsets"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1SB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1sb_gather_[s64]offset_s64",
+    "arguments": [
+      "svbool_t pg",
+      "const int8_t *base",
+      "svint64_t offsets"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1SB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1sb_gather_[s64]offset_u64",
+    "arguments": [
+      "svbool_t pg",
+      "const int8_t *base",
+      "svint64_t offsets"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1SB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1sb_gather_[u32]offset_s32",
+    "arguments": [
+      "svbool_t pg",
+      "const int8_t *base",
+      "svuint32_t offsets"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1SB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1sb_gather_[u32]offset_u32",
+    "arguments": [
+      "svbool_t pg",
+      "const int8_t *base",
+      "svuint32_t offsets"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1SB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1sb_gather_[u64]offset_s64",
+    "arguments": [
+      "svbool_t pg",
+      "const int8_t *base",
+      "svuint64_t offsets"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1SB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1sb_gather_[u64]offset_u64",
+    "arguments": [
+      "svbool_t pg",
+      "const int8_t *base",
+      "svuint64_t offsets"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1SB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1sb_s16",
+    "arguments": [
+      "svbool_t pg",
+      "const int8_t *base"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1SB"
+      ],
+      [
+        "LD1SB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1sb_s32",
+    "arguments": [
+      "svbool_t pg",
+      "const int8_t *base"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1SB"
+      ],
+      [
+        "LD1SB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1sb_s64",
+    "arguments": [
+      "svbool_t pg",
+      "const int8_t *base"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1SB"
+      ],
+      [
+        "LD1SB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1sb_u16",
+    "arguments": [
+      "svbool_t pg",
+      "const int8_t *base"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1SB"
+      ],
+      [
+        "LD1SB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1sb_u32",
+    "arguments": [
+      "svbool_t pg",
+      "const int8_t *base"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1SB"
+      ],
+      [
+        "LD1SB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1sb_u64",
+    "arguments": [
+      "svbool_t pg",
+      "const int8_t *base"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1SB"
+      ],
+      [
+        "LD1SB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1sb_vnum_s16",
+    "arguments": [
+      "svbool_t pg",
+      "const int8_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.H"
+      },
+      "vnum * svcnth()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1SB"
+      ],
+      [
+        "LD1SB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1sb_vnum_s32",
+    "arguments": [
+      "svbool_t pg",
+      "const int8_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.S"
+      },
+      "vnum * svcntw()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1SB"
+      ],
+      [
+        "LD1SB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1sb_vnum_s64",
+    "arguments": [
+      "svbool_t pg",
+      "const int8_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      },
+      "vnum * svcntd()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1SB"
+      ],
+      [
+        "LD1SB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1sb_vnum_u16",
+    "arguments": [
+      "svbool_t pg",
+      "const int8_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.H"
+      },
+      "vnum * svcnth()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1SB"
+      ],
+      [
+        "LD1SB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1sb_vnum_u32",
+    "arguments": [
+      "svbool_t pg",
+      "const int8_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.S"
+      },
+      "vnum * svcntw()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1SB"
+      ],
+      [
+        "LD1SB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1sb_vnum_u64",
+    "arguments": [
+      "svbool_t pg",
+      "const int8_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      },
+      "vnum * svcntd()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1SB"
+      ],
+      [
+        "LD1SB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1sh_gather[_u32base]_index_s32",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "int64_t index"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "index * 2": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1SH"
+      ],
+      [
+        "LD1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1sh_gather[_u32base]_index_u32",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "int64_t index"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "index * 2": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1SH"
+      ],
+      [
+        "LD1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1sh_gather[_u32base]_offset_s32",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "int64_t offset"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1SH"
+      ],
+      [
+        "LD1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1sh_gather[_u32base]_offset_u32",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "int64_t offset"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1SH"
+      ],
+      [
+        "LD1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1sh_gather[_u32base]_s32",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1sh_gather[_u32base]_u32",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1sh_gather[_u64base]_index_s64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t index"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "index * 2": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1SH"
+      ],
+      [
+        "LD1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1sh_gather[_u64base]_index_u64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t index"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "index * 2": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1SH"
+      ],
+      [
+        "LD1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1sh_gather[_u64base]_offset_s64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t offset"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1SH"
+      ],
+      [
+        "LD1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1sh_gather[_u64base]_offset_u64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t offset"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1SH"
+      ],
+      [
+        "LD1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1sh_gather[_u64base]_s64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1sh_gather[_u64base]_u64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1sh_gather_[s32]index_s32",
+    "arguments": [
+      "svbool_t pg",
+      "const int16_t *base",
+      "svint32_t indices"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices": {
+        "register": "Zindices.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1sh_gather_[s32]index_u32",
+    "arguments": [
+      "svbool_t pg",
+      "const int16_t *base",
+      "svint32_t indices"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices": {
+        "register": "Zindices.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1sh_gather_[s32]offset_s32",
+    "arguments": [
+      "svbool_t pg",
+      "const int16_t *base",
+      "svint32_t offsets"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1sh_gather_[s32]offset_u32",
+    "arguments": [
+      "svbool_t pg",
+      "const int16_t *base",
+      "svint32_t offsets"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1sh_gather_[s64]index_s64",
+    "arguments": [
+      "svbool_t pg",
+      "const int16_t *base",
+      "svint64_t indices"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices": {
+        "register": "Zindices.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1sh_gather_[s64]index_u64",
+    "arguments": [
+      "svbool_t pg",
+      "const int16_t *base",
+      "svint64_t indices"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices": {
+        "register": "Zindices.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1sh_gather_[s64]offset_s64",
+    "arguments": [
+      "svbool_t pg",
+      "const int16_t *base",
+      "svint64_t offsets"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1sh_gather_[s64]offset_u64",
+    "arguments": [
+      "svbool_t pg",
+      "const int16_t *base",
+      "svint64_t offsets"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1sh_gather_[u32]index_s32",
+    "arguments": [
+      "svbool_t pg",
+      "const int16_t *base",
+      "svuint32_t indices"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices": {
+        "register": "Zindices.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1sh_gather_[u32]index_u32",
+    "arguments": [
+      "svbool_t pg",
+      "const int16_t *base",
+      "svuint32_t indices"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices": {
+        "register": "Zindices.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1sh_gather_[u32]offset_s32",
+    "arguments": [
+      "svbool_t pg",
+      "const int16_t *base",
+      "svuint32_t offsets"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1sh_gather_[u32]offset_u32",
+    "arguments": [
+      "svbool_t pg",
+      "const int16_t *base",
+      "svuint32_t offsets"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1sh_gather_[u64]index_s64",
+    "arguments": [
+      "svbool_t pg",
+      "const int16_t *base",
+      "svuint64_t indices"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices": {
+        "register": "Zindices.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1sh_gather_[u64]index_u64",
+    "arguments": [
+      "svbool_t pg",
+      "const int16_t *base",
+      "svuint64_t indices"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices": {
+        "register": "Zindices.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1sh_gather_[u64]offset_s64",
+    "arguments": [
+      "svbool_t pg",
+      "const int16_t *base",
+      "svuint64_t offsets"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1sh_gather_[u64]offset_u64",
+    "arguments": [
+      "svbool_t pg",
+      "const int16_t *base",
+      "svuint64_t offsets"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1sh_s32",
+    "arguments": [
+      "svbool_t pg",
+      "const int16_t *base"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1SH"
+      ],
+      [
+        "LD1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1sh_s64",
+    "arguments": [
+      "svbool_t pg",
+      "const int16_t *base"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1SH"
+      ],
+      [
+        "LD1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1sh_u32",
+    "arguments": [
+      "svbool_t pg",
+      "const int16_t *base"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1SH"
+      ],
+      [
+        "LD1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1sh_u64",
+    "arguments": [
+      "svbool_t pg",
+      "const int16_t *base"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1SH"
+      ],
+      [
+        "LD1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1sh_vnum_s32",
+    "arguments": [
+      "svbool_t pg",
+      "const int16_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.S"
+      },
+      "vnum * svcntw()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1SH"
+      ],
+      [
+        "LD1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1sh_vnum_s64",
+    "arguments": [
+      "svbool_t pg",
+      "const int16_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      },
+      "vnum * svcntd()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1SH"
+      ],
+      [
+        "LD1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1sh_vnum_u32",
+    "arguments": [
+      "svbool_t pg",
+      "const int16_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.S"
+      },
+      "vnum * svcntw()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1SH"
+      ],
+      [
+        "LD1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1sh_vnum_u64",
+    "arguments": [
+      "svbool_t pg",
+      "const int16_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      },
+      "vnum * svcntd()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1SH"
+      ],
+      [
+        "LD1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1sw_gather[_u64base]_index_s64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t index"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "index * 4": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1SW"
+      ],
+      [
+        "LD1SW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1sw_gather[_u64base]_index_u64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t index"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "index * 4": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1SW"
+      ],
+      [
+        "LD1SW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1sw_gather[_u64base]_offset_s64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t offset"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1SW"
+      ],
+      [
+        "LD1SW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1sw_gather[_u64base]_offset_u64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t offset"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1SW"
+      ],
+      [
+        "LD1SW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1sw_gather[_u64base]_s64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1SW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1sw_gather[_u64base]_u64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1SW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1sw_gather_[s64]index_s64",
+    "arguments": [
+      "svbool_t pg",
+      "const int32_t *base",
+      "svint64_t indices"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices": {
+        "register": "Zindices.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1SW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1sw_gather_[s64]index_u64",
+    "arguments": [
+      "svbool_t pg",
+      "const int32_t *base",
+      "svint64_t indices"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices": {
+        "register": "Zindices.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1SW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1sw_gather_[s64]offset_s64",
+    "arguments": [
+      "svbool_t pg",
+      "const int32_t *base",
+      "svint64_t offsets"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1SW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1sw_gather_[s64]offset_u64",
+    "arguments": [
+      "svbool_t pg",
+      "const int32_t *base",
+      "svint64_t offsets"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1SW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1sw_gather_[u64]index_s64",
+    "arguments": [
+      "svbool_t pg",
+      "const int32_t *base",
+      "svuint64_t indices"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices": {
+        "register": "Zindices.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1SW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1sw_gather_[u64]index_u64",
+    "arguments": [
+      "svbool_t pg",
+      "const int32_t *base",
+      "svuint64_t indices"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices": {
+        "register": "Zindices.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1SW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1sw_gather_[u64]offset_s64",
+    "arguments": [
+      "svbool_t pg",
+      "const int32_t *base",
+      "svuint64_t offsets"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1SW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1sw_gather_[u64]offset_u64",
+    "arguments": [
+      "svbool_t pg",
+      "const int32_t *base",
+      "svuint64_t offsets"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1SW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1sw_s64",
+    "arguments": [
+      "svbool_t pg",
+      "const int32_t *base"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1SW"
+      ],
+      [
+        "LD1SW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1sw_u64",
+    "arguments": [
+      "svbool_t pg",
+      "const int32_t *base"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1SW"
+      ],
+      [
+        "LD1SW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1sw_vnum_s64",
+    "arguments": [
+      "svbool_t pg",
+      "const int32_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      },
+      "vnum * svcntd()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1SW"
+      ],
+      [
+        "LD1SW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1sw_vnum_u64",
+    "arguments": [
+      "svbool_t pg",
+      "const int32_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      },
+      "vnum * svcntd()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1SW"
+      ],
+      [
+        "LD1SW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1ub_gather[_u32base]_offset_s32",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "int64_t offset"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1B"
+      ],
+      [
+        "LD1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1ub_gather[_u32base]_offset_u32",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "int64_t offset"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1B"
+      ],
+      [
+        "LD1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1ub_gather[_u32base]_s32",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1ub_gather[_u32base]_u32",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1ub_gather[_u64base]_offset_s64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t offset"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1B"
+      ],
+      [
+        "LD1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1ub_gather[_u64base]_offset_u64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t offset"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1B"
+      ],
+      [
+        "LD1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1ub_gather[_u64base]_s64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1ub_gather[_u64base]_u64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1ub_gather_[s32]offset_s32",
+    "arguments": [
+      "svbool_t pg",
+      "const uint8_t *base",
+      "svint32_t offsets"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1ub_gather_[s32]offset_u32",
+    "arguments": [
+      "svbool_t pg",
+      "const uint8_t *base",
+      "svint32_t offsets"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1ub_gather_[s64]offset_s64",
+    "arguments": [
+      "svbool_t pg",
+      "const uint8_t *base",
+      "svint64_t offsets"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1ub_gather_[s64]offset_u64",
+    "arguments": [
+      "svbool_t pg",
+      "const uint8_t *base",
+      "svint64_t offsets"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1ub_gather_[u32]offset_s32",
+    "arguments": [
+      "svbool_t pg",
+      "const uint8_t *base",
+      "svuint32_t offsets"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1ub_gather_[u32]offset_u32",
+    "arguments": [
+      "svbool_t pg",
+      "const uint8_t *base",
+      "svuint32_t offsets"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1ub_gather_[u64]offset_s64",
+    "arguments": [
+      "svbool_t pg",
+      "const uint8_t *base",
+      "svuint64_t offsets"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1ub_gather_[u64]offset_u64",
+    "arguments": [
+      "svbool_t pg",
+      "const uint8_t *base",
+      "svuint64_t offsets"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1ub_s16",
+    "arguments": [
+      "svbool_t pg",
+      "const uint8_t *base"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1B"
+      ],
+      [
+        "LD1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1ub_s32",
+    "arguments": [
+      "svbool_t pg",
+      "const uint8_t *base"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1B"
+      ],
+      [
+        "LD1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1ub_s64",
+    "arguments": [
+      "svbool_t pg",
+      "const uint8_t *base"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1B"
+      ],
+      [
+        "LD1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1ub_u16",
+    "arguments": [
+      "svbool_t pg",
+      "const uint8_t *base"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1B"
+      ],
+      [
+        "LD1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1ub_u32",
+    "arguments": [
+      "svbool_t pg",
+      "const uint8_t *base"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1B"
+      ],
+      [
+        "LD1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1ub_u64",
+    "arguments": [
+      "svbool_t pg",
+      "const uint8_t *base"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1B"
+      ],
+      [
+        "LD1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1ub_vnum_s16",
+    "arguments": [
+      "svbool_t pg",
+      "const uint8_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.H"
+      },
+      "vnum * svcnth()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1B"
+      ],
+      [
+        "LD1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1ub_vnum_s32",
+    "arguments": [
+      "svbool_t pg",
+      "const uint8_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.S"
+      },
+      "vnum * svcntw()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1B"
+      ],
+      [
+        "LD1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1ub_vnum_s64",
+    "arguments": [
+      "svbool_t pg",
+      "const uint8_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      },
+      "vnum * svcntd()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1B"
+      ],
+      [
+        "LD1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1ub_vnum_u16",
+    "arguments": [
+      "svbool_t pg",
+      "const uint8_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.H"
+      },
+      "vnum * svcnth()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1B"
+      ],
+      [
+        "LD1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1ub_vnum_u32",
+    "arguments": [
+      "svbool_t pg",
+      "const uint8_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.S"
+      },
+      "vnum * svcntw()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1B"
+      ],
+      [
+        "LD1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1ub_vnum_u64",
+    "arguments": [
+      "svbool_t pg",
+      "const uint8_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      },
+      "vnum * svcntd()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1B"
+      ],
+      [
+        "LD1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1uh_gather[_u32base]_index_s32",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "int64_t index"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "index * 2": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1H"
+      ],
+      [
+        "LD1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1uh_gather[_u32base]_index_u32",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "int64_t index"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "index * 2": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1H"
+      ],
+      [
+        "LD1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1uh_gather[_u32base]_offset_s32",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "int64_t offset"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1H"
+      ],
+      [
+        "LD1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1uh_gather[_u32base]_offset_u32",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "int64_t offset"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1H"
+      ],
+      [
+        "LD1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1uh_gather[_u32base]_s32",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1uh_gather[_u32base]_u32",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1uh_gather[_u64base]_index_s64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t index"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "index * 2": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1H"
+      ],
+      [
+        "LD1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1uh_gather[_u64base]_index_u64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t index"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "index * 2": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1H"
+      ],
+      [
+        "LD1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1uh_gather[_u64base]_offset_s64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t offset"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1H"
+      ],
+      [
+        "LD1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1uh_gather[_u64base]_offset_u64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t offset"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1H"
+      ],
+      [
+        "LD1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1uh_gather[_u64base]_s64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1uh_gather[_u64base]_u64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1uh_gather_[s32]index_s32",
+    "arguments": [
+      "svbool_t pg",
+      "const uint16_t *base",
+      "svint32_t indices"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices": {
+        "register": "Zindices.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1uh_gather_[s32]index_u32",
+    "arguments": [
+      "svbool_t pg",
+      "const uint16_t *base",
+      "svint32_t indices"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices": {
+        "register": "Zindices.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1uh_gather_[s32]offset_s32",
+    "arguments": [
+      "svbool_t pg",
+      "const uint16_t *base",
+      "svint32_t offsets"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1uh_gather_[s32]offset_u32",
+    "arguments": [
+      "svbool_t pg",
+      "const uint16_t *base",
+      "svint32_t offsets"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1uh_gather_[s64]index_s64",
+    "arguments": [
+      "svbool_t pg",
+      "const uint16_t *base",
+      "svint64_t indices"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices": {
+        "register": "Zindices.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1uh_gather_[s64]index_u64",
+    "arguments": [
+      "svbool_t pg",
+      "const uint16_t *base",
+      "svint64_t indices"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices": {
+        "register": "Zindices.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1uh_gather_[s64]offset_s64",
+    "arguments": [
+      "svbool_t pg",
+      "const uint16_t *base",
+      "svint64_t offsets"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1uh_gather_[s64]offset_u64",
+    "arguments": [
+      "svbool_t pg",
+      "const uint16_t *base",
+      "svint64_t offsets"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1uh_gather_[u32]index_s32",
+    "arguments": [
+      "svbool_t pg",
+      "const uint16_t *base",
+      "svuint32_t indices"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices": {
+        "register": "Zindices.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1uh_gather_[u32]index_u32",
+    "arguments": [
+      "svbool_t pg",
+      "const uint16_t *base",
+      "svuint32_t indices"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices": {
+        "register": "Zindices.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1uh_gather_[u32]offset_s32",
+    "arguments": [
+      "svbool_t pg",
+      "const uint16_t *base",
+      "svuint32_t offsets"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1uh_gather_[u32]offset_u32",
+    "arguments": [
+      "svbool_t pg",
+      "const uint16_t *base",
+      "svuint32_t offsets"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1uh_gather_[u64]index_s64",
+    "arguments": [
+      "svbool_t pg",
+      "const uint16_t *base",
+      "svuint64_t indices"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices": {
+        "register": "Zindices.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1uh_gather_[u64]index_u64",
+    "arguments": [
+      "svbool_t pg",
+      "const uint16_t *base",
+      "svuint64_t indices"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices": {
+        "register": "Zindices.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1uh_gather_[u64]offset_s64",
+    "arguments": [
+      "svbool_t pg",
+      "const uint16_t *base",
+      "svuint64_t offsets"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1uh_gather_[u64]offset_u64",
+    "arguments": [
+      "svbool_t pg",
+      "const uint16_t *base",
+      "svuint64_t offsets"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1uh_s32",
+    "arguments": [
+      "svbool_t pg",
+      "const uint16_t *base"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1H"
+      ],
+      [
+        "LD1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1uh_s64",
+    "arguments": [
+      "svbool_t pg",
+      "const uint16_t *base"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1H"
+      ],
+      [
+        "LD1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1uh_u32",
+    "arguments": [
+      "svbool_t pg",
+      "const uint16_t *base"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1H"
+      ],
+      [
+        "LD1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1uh_u64",
+    "arguments": [
+      "svbool_t pg",
+      "const uint16_t *base"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1H"
+      ],
+      [
+        "LD1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1uh_vnum_s32",
+    "arguments": [
+      "svbool_t pg",
+      "const uint16_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.S"
+      },
+      "vnum * svcntw()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1H"
+      ],
+      [
+        "LD1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1uh_vnum_s64",
+    "arguments": [
+      "svbool_t pg",
+      "const uint16_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      },
+      "vnum * svcntd()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1H"
+      ],
+      [
+        "LD1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1uh_vnum_u32",
+    "arguments": [
+      "svbool_t pg",
+      "const uint16_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.S"
+      },
+      "vnum * svcntw()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1H"
+      ],
+      [
+        "LD1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1uh_vnum_u64",
+    "arguments": [
+      "svbool_t pg",
+      "const uint16_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      },
+      "vnum * svcntd()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1H"
+      ],
+      [
+        "LD1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1uw_gather[_u64base]_index_s64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t index"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "index * 4": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1W"
+      ],
+      [
+        "LD1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1uw_gather[_u64base]_index_u64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t index"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "index * 4": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1W"
+      ],
+      [
+        "LD1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1uw_gather[_u64base]_offset_s64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t offset"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1W"
+      ],
+      [
+        "LD1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1uw_gather[_u64base]_offset_u64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t offset"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1W"
+      ],
+      [
+        "LD1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1uw_gather[_u64base]_s64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1uw_gather[_u64base]_u64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1uw_gather_[s64]index_s64",
+    "arguments": [
+      "svbool_t pg",
+      "const uint32_t *base",
+      "svint64_t indices"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices": {
+        "register": "Zindices.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1uw_gather_[s64]index_u64",
+    "arguments": [
+      "svbool_t pg",
+      "const uint32_t *base",
+      "svint64_t indices"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices": {
+        "register": "Zindices.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1uw_gather_[s64]offset_s64",
+    "arguments": [
+      "svbool_t pg",
+      "const uint32_t *base",
+      "svint64_t offsets"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1uw_gather_[s64]offset_u64",
+    "arguments": [
+      "svbool_t pg",
+      "const uint32_t *base",
+      "svint64_t offsets"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1uw_gather_[u64]index_s64",
+    "arguments": [
+      "svbool_t pg",
+      "const uint32_t *base",
+      "svuint64_t indices"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices": {
+        "register": "Zindices.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1uw_gather_[u64]index_u64",
+    "arguments": [
+      "svbool_t pg",
+      "const uint32_t *base",
+      "svuint64_t indices"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices": {
+        "register": "Zindices.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1uw_gather_[u64]offset_s64",
+    "arguments": [
+      "svbool_t pg",
+      "const uint32_t *base",
+      "svuint64_t offsets"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1uw_gather_[u64]offset_u64",
+    "arguments": [
+      "svbool_t pg",
+      "const uint32_t *base",
+      "svuint64_t offsets"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1uw_s64",
+    "arguments": [
+      "svbool_t pg",
+      "const uint32_t *base"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1W"
+      ],
+      [
+        "LD1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1uw_u64",
+    "arguments": [
+      "svbool_t pg",
+      "const uint32_t *base"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1W"
+      ],
+      [
+        "LD1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1uw_vnum_s64",
+    "arguments": [
+      "svbool_t pg",
+      "const uint32_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      },
+      "vnum * svcntd()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1W"
+      ],
+      [
+        "LD1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld1uw_vnum_u64",
+    "arguments": [
+      "svbool_t pg",
+      "const uint32_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      },
+      "vnum * svcntd()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD1W"
+      ],
+      [
+        "LD1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld2[_f16]",
+    "arguments": [
+      "svbool_t pg",
+      "const float16_t *base"
+    ],
+    "return_type": {
+      "value": "svfloat16x2_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD2H"
+      ],
+      [
+        "LD2H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld2[_f32]",
+    "arguments": [
+      "svbool_t pg",
+      "const float32_t *base"
+    ],
+    "return_type": {
+      "value": "svfloat32x2_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD2W"
+      ],
+      [
+        "LD2W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld2[_f64]",
+    "arguments": [
+      "svbool_t pg",
+      "const float64_t *base"
+    ],
+    "return_type": {
+      "value": "svfloat64x2_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD2D"
+      ],
+      [
+        "LD2D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld2[_s16]",
+    "arguments": [
+      "svbool_t pg",
+      "const int16_t *base"
+    ],
+    "return_type": {
+      "value": "svint16x2_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD2H"
+      ],
+      [
+        "LD2H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld2[_s32]",
+    "arguments": [
+      "svbool_t pg",
+      "const int32_t *base"
+    ],
+    "return_type": {
+      "value": "svint32x2_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD2W"
+      ],
+      [
+        "LD2W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld2[_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "const int64_t *base"
+    ],
+    "return_type": {
+      "value": "svint64x2_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD2D"
+      ],
+      [
+        "LD2D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld2[_s8]",
+    "arguments": [
+      "svbool_t pg",
+      "const int8_t *base"
+    ],
+    "return_type": {
+      "value": "svint8x2_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD2B"
+      ],
+      [
+        "LD2B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld2[_u16]",
+    "arguments": [
+      "svbool_t pg",
+      "const uint16_t *base"
+    ],
+    "return_type": {
+      "value": "svuint16x2_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD2H"
+      ],
+      [
+        "LD2H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld2[_u32]",
+    "arguments": [
+      "svbool_t pg",
+      "const uint32_t *base"
+    ],
+    "return_type": {
+      "value": "svuint32x2_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD2W"
+      ],
+      [
+        "LD2W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld2[_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "const uint64_t *base"
+    ],
+    "return_type": {
+      "value": "svuint64x2_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD2D"
+      ],
+      [
+        "LD2D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld2[_u8]",
+    "arguments": [
+      "svbool_t pg",
+      "const uint8_t *base"
+    ],
+    "return_type": {
+      "value": "svuint8x2_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD2B"
+      ],
+      [
+        "LD2B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld2_vnum[_f16]",
+    "arguments": [
+      "svbool_t pg",
+      "const float16_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svfloat16x2_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.H"
+      },
+      "vnum * svcnth()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD2H"
+      ],
+      [
+        "LD2H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld2_vnum[_f32]",
+    "arguments": [
+      "svbool_t pg",
+      "const float32_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svfloat32x2_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.S"
+      },
+      "vnum * svcntw()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD2W"
+      ],
+      [
+        "LD2W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld2_vnum[_f64]",
+    "arguments": [
+      "svbool_t pg",
+      "const float64_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svfloat64x2_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      },
+      "vnum * svcntd()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD2D"
+      ],
+      [
+        "LD2D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld2_vnum[_s16]",
+    "arguments": [
+      "svbool_t pg",
+      "const int16_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svint16x2_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.H"
+      },
+      "vnum * svcnth()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD2H"
+      ],
+      [
+        "LD2H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld2_vnum[_s32]",
+    "arguments": [
+      "svbool_t pg",
+      "const int32_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svint32x2_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.S"
+      },
+      "vnum * svcntw()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD2W"
+      ],
+      [
+        "LD2W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld2_vnum[_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "const int64_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svint64x2_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      },
+      "vnum * svcntd()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD2D"
+      ],
+      [
+        "LD2D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld2_vnum[_s8]",
+    "arguments": [
+      "svbool_t pg",
+      "const int8_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svint8x2_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.B"
+      },
+      "vnum * svcntb()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD2B"
+      ],
+      [
+        "LD2B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld2_vnum[_u16]",
+    "arguments": [
+      "svbool_t pg",
+      "const uint16_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svuint16x2_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.H"
+      },
+      "vnum * svcnth()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD2H"
+      ],
+      [
+        "LD2H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld2_vnum[_u32]",
+    "arguments": [
+      "svbool_t pg",
+      "const uint32_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svuint32x2_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.S"
+      },
+      "vnum * svcntw()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD2W"
+      ],
+      [
+        "LD2W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld2_vnum[_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "const uint64_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svuint64x2_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      },
+      "vnum * svcntd()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD2D"
+      ],
+      [
+        "LD2D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld2_vnum[_u8]",
+    "arguments": [
+      "svbool_t pg",
+      "const uint8_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svuint8x2_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.B"
+      },
+      "vnum * svcntb()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD2B"
+      ],
+      [
+        "LD2B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld3[_f16]",
+    "arguments": [
+      "svbool_t pg",
+      "const float16_t *base"
+    ],
+    "return_type": {
+      "value": "svfloat16x3_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD3H"
+      ],
+      [
+        "LD3H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld3[_f32]",
+    "arguments": [
+      "svbool_t pg",
+      "const float32_t *base"
+    ],
+    "return_type": {
+      "value": "svfloat32x3_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD3W"
+      ],
+      [
+        "LD3W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld3[_f64]",
+    "arguments": [
+      "svbool_t pg",
+      "const float64_t *base"
+    ],
+    "return_type": {
+      "value": "svfloat64x3_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD3D"
+      ],
+      [
+        "LD3D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld3[_s16]",
+    "arguments": [
+      "svbool_t pg",
+      "const int16_t *base"
+    ],
+    "return_type": {
+      "value": "svint16x3_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD3H"
+      ],
+      [
+        "LD3H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld3[_s32]",
+    "arguments": [
+      "svbool_t pg",
+      "const int32_t *base"
+    ],
+    "return_type": {
+      "value": "svint32x3_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD3W"
+      ],
+      [
+        "LD3W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld3[_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "const int64_t *base"
+    ],
+    "return_type": {
+      "value": "svint64x3_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD3D"
+      ],
+      [
+        "LD3D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld3[_s8]",
+    "arguments": [
+      "svbool_t pg",
+      "const int8_t *base"
+    ],
+    "return_type": {
+      "value": "svint8x3_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD3B"
+      ],
+      [
+        "LD3B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld3[_u16]",
+    "arguments": [
+      "svbool_t pg",
+      "const uint16_t *base"
+    ],
+    "return_type": {
+      "value": "svuint16x3_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD3H"
+      ],
+      [
+        "LD3H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld3[_u32]",
+    "arguments": [
+      "svbool_t pg",
+      "const uint32_t *base"
+    ],
+    "return_type": {
+      "value": "svuint32x3_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD3W"
+      ],
+      [
+        "LD3W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld3[_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "const uint64_t *base"
+    ],
+    "return_type": {
+      "value": "svuint64x3_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD3D"
+      ],
+      [
+        "LD3D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld3[_u8]",
+    "arguments": [
+      "svbool_t pg",
+      "const uint8_t *base"
+    ],
+    "return_type": {
+      "value": "svuint8x3_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD3B"
+      ],
+      [
+        "LD3B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld3_vnum[_f16]",
+    "arguments": [
+      "svbool_t pg",
+      "const float16_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svfloat16x3_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.H"
+      },
+      "vnum * svcnth()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD3H"
+      ],
+      [
+        "LD3H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld3_vnum[_f32]",
+    "arguments": [
+      "svbool_t pg",
+      "const float32_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svfloat32x3_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.S"
+      },
+      "vnum * svcntw()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD3W"
+      ],
+      [
+        "LD3W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld3_vnum[_f64]",
+    "arguments": [
+      "svbool_t pg",
+      "const float64_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svfloat64x3_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      },
+      "vnum * svcntd()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD3D"
+      ],
+      [
+        "LD3D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld3_vnum[_s16]",
+    "arguments": [
+      "svbool_t pg",
+      "const int16_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svint16x3_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.H"
+      },
+      "vnum * svcnth()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD3H"
+      ],
+      [
+        "LD3H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld3_vnum[_s32]",
+    "arguments": [
+      "svbool_t pg",
+      "const int32_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svint32x3_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.S"
+      },
+      "vnum * svcntw()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD3W"
+      ],
+      [
+        "LD3W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld3_vnum[_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "const int64_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svint64x3_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      },
+      "vnum * svcntd()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD3D"
+      ],
+      [
+        "LD3D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld3_vnum[_s8]",
+    "arguments": [
+      "svbool_t pg",
+      "const int8_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svint8x3_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.B"
+      },
+      "vnum * svcntb()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD3B"
+      ],
+      [
+        "LD3B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld3_vnum[_u16]",
+    "arguments": [
+      "svbool_t pg",
+      "const uint16_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svuint16x3_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.H"
+      },
+      "vnum * svcnth()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD3H"
+      ],
+      [
+        "LD3H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld3_vnum[_u32]",
+    "arguments": [
+      "svbool_t pg",
+      "const uint32_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svuint32x3_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.S"
+      },
+      "vnum * svcntw()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD3W"
+      ],
+      [
+        "LD3W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld3_vnum[_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "const uint64_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svuint64x3_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      },
+      "vnum * svcntd()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD3D"
+      ],
+      [
+        "LD3D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld3_vnum[_u8]",
+    "arguments": [
+      "svbool_t pg",
+      "const uint8_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svuint8x3_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.B"
+      },
+      "vnum * svcntb()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD3B"
+      ],
+      [
+        "LD3B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld4[_f16]",
+    "arguments": [
+      "svbool_t pg",
+      "const float16_t *base"
+    ],
+    "return_type": {
+      "value": "svfloat16x4_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD4H"
+      ],
+      [
+        "LD4H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld4[_f32]",
+    "arguments": [
+      "svbool_t pg",
+      "const float32_t *base"
+    ],
+    "return_type": {
+      "value": "svfloat32x4_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD4W"
+      ],
+      [
+        "LD4W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld4[_f64]",
+    "arguments": [
+      "svbool_t pg",
+      "const float64_t *base"
+    ],
+    "return_type": {
+      "value": "svfloat64x4_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD4D"
+      ],
+      [
+        "LD4D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld4[_s16]",
+    "arguments": [
+      "svbool_t pg",
+      "const int16_t *base"
+    ],
+    "return_type": {
+      "value": "svint16x4_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD4H"
+      ],
+      [
+        "LD4H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld4[_s32]",
+    "arguments": [
+      "svbool_t pg",
+      "const int32_t *base"
+    ],
+    "return_type": {
+      "value": "svint32x4_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD4W"
+      ],
+      [
+        "LD4W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld4[_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "const int64_t *base"
+    ],
+    "return_type": {
+      "value": "svint64x4_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD4D"
+      ],
+      [
+        "LD4D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld4[_s8]",
+    "arguments": [
+      "svbool_t pg",
+      "const int8_t *base"
+    ],
+    "return_type": {
+      "value": "svint8x4_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD4B"
+      ],
+      [
+        "LD4B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld4[_u16]",
+    "arguments": [
+      "svbool_t pg",
+      "const uint16_t *base"
+    ],
+    "return_type": {
+      "value": "svuint16x4_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD4H"
+      ],
+      [
+        "LD4H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld4[_u32]",
+    "arguments": [
+      "svbool_t pg",
+      "const uint32_t *base"
+    ],
+    "return_type": {
+      "value": "svuint32x4_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD4W"
+      ],
+      [
+        "LD4W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld4[_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "const uint64_t *base"
+    ],
+    "return_type": {
+      "value": "svuint64x4_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD4D"
+      ],
+      [
+        "LD4D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld4[_u8]",
+    "arguments": [
+      "svbool_t pg",
+      "const uint8_t *base"
+    ],
+    "return_type": {
+      "value": "svuint8x4_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD4B"
+      ],
+      [
+        "LD4B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld4_vnum[_f16]",
+    "arguments": [
+      "svbool_t pg",
+      "const float16_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svfloat16x4_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.H"
+      },
+      "vnum * svcnth()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD4H"
+      ],
+      [
+        "LD4H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld4_vnum[_f32]",
+    "arguments": [
+      "svbool_t pg",
+      "const float32_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svfloat32x4_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.S"
+      },
+      "vnum * svcntw()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD4W"
+      ],
+      [
+        "LD4W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld4_vnum[_f64]",
+    "arguments": [
+      "svbool_t pg",
+      "const float64_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svfloat64x4_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      },
+      "vnum * svcntd()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD4D"
+      ],
+      [
+        "LD4D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld4_vnum[_s16]",
+    "arguments": [
+      "svbool_t pg",
+      "const int16_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svint16x4_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.H"
+      },
+      "vnum * svcnth()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD4H"
+      ],
+      [
+        "LD4H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld4_vnum[_s32]",
+    "arguments": [
+      "svbool_t pg",
+      "const int32_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svint32x4_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.S"
+      },
+      "vnum * svcntw()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD4W"
+      ],
+      [
+        "LD4W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld4_vnum[_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "const int64_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svint64x4_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      },
+      "vnum * svcntd()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD4D"
+      ],
+      [
+        "LD4D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld4_vnum[_s8]",
+    "arguments": [
+      "svbool_t pg",
+      "const int8_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svint8x4_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.B"
+      },
+      "vnum * svcntb()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD4B"
+      ],
+      [
+        "LD4B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld4_vnum[_u16]",
+    "arguments": [
+      "svbool_t pg",
+      "const uint16_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svuint16x4_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.H"
+      },
+      "vnum * svcnth()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD4H"
+      ],
+      [
+        "LD4H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld4_vnum[_u32]",
+    "arguments": [
+      "svbool_t pg",
+      "const uint32_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svuint32x4_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.S"
+      },
+      "vnum * svcntw()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD4W"
+      ],
+      [
+        "LD4W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld4_vnum[_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "const uint64_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svuint64x4_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      },
+      "vnum * svcntd()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD4D"
+      ],
+      [
+        "LD4D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svld4_vnum[_u8]",
+    "arguments": [
+      "svbool_t pg",
+      "const uint8_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svuint8x4_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.B"
+      },
+      "vnum * svcntb()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD4B"
+      ],
+      [
+        "LD4B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1[_f16]",
+    "arguments": [
+      "svbool_t pg",
+      "const float16_t *base"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1H"
+      ],
+      [
+        "LDFF1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1[_f32]",
+    "arguments": [
+      "svbool_t pg",
+      "const float32_t *base"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1W"
+      ],
+      [
+        "LDFF1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1[_f64]",
+    "arguments": [
+      "svbool_t pg",
+      "const float64_t *base"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1D"
+      ],
+      [
+        "LDFF1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1[_s16]",
+    "arguments": [
+      "svbool_t pg",
+      "const int16_t *base"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1H"
+      ],
+      [
+        "LDFF1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1[_s32]",
+    "arguments": [
+      "svbool_t pg",
+      "const int32_t *base"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1W"
+      ],
+      [
+        "LDFF1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1[_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "const int64_t *base"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1D"
+      ],
+      [
+        "LDFF1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1[_s8]",
+    "arguments": [
+      "svbool_t pg",
+      "const int8_t *base"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1B"
+      ],
+      [
+        "LDFF1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1[_u16]",
+    "arguments": [
+      "svbool_t pg",
+      "const uint16_t *base"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1H"
+      ],
+      [
+        "LDFF1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1[_u32]",
+    "arguments": [
+      "svbool_t pg",
+      "const uint32_t *base"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1W"
+      ],
+      [
+        "LDFF1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1[_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "const uint64_t *base"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1D"
+      ],
+      [
+        "LDFF1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1[_u8]",
+    "arguments": [
+      "svbool_t pg",
+      "const uint8_t *base"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1B"
+      ],
+      [
+        "LDFF1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1_gather[_u32base]_f32",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1_gather[_u32base]_index_f32",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "int64_t index"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "index * 4": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1W"
+      ],
+      [
+        "LDFF1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1_gather[_u32base]_index_s32",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "int64_t index"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "index * 4": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1W"
+      ],
+      [
+        "LDFF1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1_gather[_u32base]_index_u32",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "int64_t index"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "index * 4": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1W"
+      ],
+      [
+        "LDFF1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1_gather[_u32base]_offset_f32",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "int64_t offset"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1W"
+      ],
+      [
+        "LDFF1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1_gather[_u32base]_offset_s32",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "int64_t offset"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1W"
+      ],
+      [
+        "LDFF1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1_gather[_u32base]_offset_u32",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "int64_t offset"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1W"
+      ],
+      [
+        "LDFF1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1_gather[_u32base]_s32",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1_gather[_u32base]_u32",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1_gather[_u64base]_f64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1_gather[_u64base]_index_f64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t index"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "index * 8": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1D"
+      ],
+      [
+        "LDFF1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1_gather[_u64base]_index_s64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t index"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "index * 8": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1D"
+      ],
+      [
+        "LDFF1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1_gather[_u64base]_index_u64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t index"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "index * 8": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1D"
+      ],
+      [
+        "LDFF1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1_gather[_u64base]_offset_f64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t offset"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1D"
+      ],
+      [
+        "LDFF1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1_gather[_u64base]_offset_s64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t offset"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1D"
+      ],
+      [
+        "LDFF1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1_gather[_u64base]_offset_u64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t offset"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1D"
+      ],
+      [
+        "LDFF1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1_gather[_u64base]_s64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1_gather[_u64base]_u64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1_gather_[s32]index[_f32]",
+    "arguments": [
+      "svbool_t pg",
+      "const float32_t *base",
+      "svint32_t indices"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices": {
+        "register": "Zindices.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1_gather_[s32]index[_s32]",
+    "arguments": [
+      "svbool_t pg",
+      "const int32_t *base",
+      "svint32_t indices"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices": {
+        "register": "Zindices.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1_gather_[s32]index[_u32]",
+    "arguments": [
+      "svbool_t pg",
+      "const uint32_t *base",
+      "svint32_t indices"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices": {
+        "register": "Zindices.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1_gather_[s32]offset[_f32]",
+    "arguments": [
+      "svbool_t pg",
+      "const float32_t *base",
+      "svint32_t offsets"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1_gather_[s32]offset[_s32]",
+    "arguments": [
+      "svbool_t pg",
+      "const int32_t *base",
+      "svint32_t offsets"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1_gather_[s32]offset[_u32]",
+    "arguments": [
+      "svbool_t pg",
+      "const uint32_t *base",
+      "svint32_t offsets"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1_gather_[s64]index[_f64]",
+    "arguments": [
+      "svbool_t pg",
+      "const float64_t *base",
+      "svint64_t indices"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices": {
+        "register": "Zindices.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1_gather_[s64]index[_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "const int64_t *base",
+      "svint64_t indices"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices": {
+        "register": "Zindices.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1_gather_[s64]index[_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "const uint64_t *base",
+      "svint64_t indices"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices": {
+        "register": "Zindices.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1_gather_[s64]offset[_f64]",
+    "arguments": [
+      "svbool_t pg",
+      "const float64_t *base",
+      "svint64_t offsets"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1_gather_[s64]offset[_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "const int64_t *base",
+      "svint64_t offsets"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1_gather_[s64]offset[_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "const uint64_t *base",
+      "svint64_t offsets"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1_gather_[u32]index[_f32]",
+    "arguments": [
+      "svbool_t pg",
+      "const float32_t *base",
+      "svuint32_t indices"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices": {
+        "register": "Zindices.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1_gather_[u32]index[_s32]",
+    "arguments": [
+      "svbool_t pg",
+      "const int32_t *base",
+      "svuint32_t indices"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices": {
+        "register": "Zindices.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1_gather_[u32]index[_u32]",
+    "arguments": [
+      "svbool_t pg",
+      "const uint32_t *base",
+      "svuint32_t indices"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices": {
+        "register": "Zindices.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1_gather_[u32]offset[_f32]",
+    "arguments": [
+      "svbool_t pg",
+      "const float32_t *base",
+      "svuint32_t offsets"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1_gather_[u32]offset[_s32]",
+    "arguments": [
+      "svbool_t pg",
+      "const int32_t *base",
+      "svuint32_t offsets"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1_gather_[u32]offset[_u32]",
+    "arguments": [
+      "svbool_t pg",
+      "const uint32_t *base",
+      "svuint32_t offsets"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1_gather_[u64]index[_f64]",
+    "arguments": [
+      "svbool_t pg",
+      "const float64_t *base",
+      "svuint64_t indices"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices": {
+        "register": "Zindices.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1_gather_[u64]index[_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "const int64_t *base",
+      "svuint64_t indices"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices": {
+        "register": "Zindices.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1_gather_[u64]index[_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "const uint64_t *base",
+      "svuint64_t indices"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices": {
+        "register": "Zindices.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1_gather_[u64]offset[_f64]",
+    "arguments": [
+      "svbool_t pg",
+      "const float64_t *base",
+      "svuint64_t offsets"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1_gather_[u64]offset[_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "const int64_t *base",
+      "svuint64_t offsets"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1_gather_[u64]offset[_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "const uint64_t *base",
+      "svuint64_t offsets"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1_vnum[_f16]",
+    "arguments": [
+      "svbool_t pg",
+      "const float16_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.H"
+      },
+      "vnum * svcnth()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1_vnum[_f32]",
+    "arguments": [
+      "svbool_t pg",
+      "const float32_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.S"
+      },
+      "vnum * svcntw()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1_vnum[_f64]",
+    "arguments": [
+      "svbool_t pg",
+      "const float64_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      },
+      "vnum * svcntd()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1_vnum[_s16]",
+    "arguments": [
+      "svbool_t pg",
+      "const int16_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.H"
+      },
+      "vnum * svcnth()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1_vnum[_s32]",
+    "arguments": [
+      "svbool_t pg",
+      "const int32_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.S"
+      },
+      "vnum * svcntw()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1_vnum[_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "const int64_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      },
+      "vnum * svcntd()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1_vnum[_s8]",
+    "arguments": [
+      "svbool_t pg",
+      "const int8_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.B"
+      },
+      "vnum * svcntb()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1_vnum[_u16]",
+    "arguments": [
+      "svbool_t pg",
+      "const uint16_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.H"
+      },
+      "vnum * svcnth()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1_vnum[_u32]",
+    "arguments": [
+      "svbool_t pg",
+      "const uint32_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.S"
+      },
+      "vnum * svcntw()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1_vnum[_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "const uint64_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      },
+      "vnum * svcntd()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1_vnum[_u8]",
+    "arguments": [
+      "svbool_t pg",
+      "const uint8_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.B"
+      },
+      "vnum * svcntb()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1sb_gather[_u32base]_offset_s32",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "int64_t offset"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1SB"
+      ],
+      [
+        "LDFF1SB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1sb_gather[_u32base]_offset_u32",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "int64_t offset"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1SB"
+      ],
+      [
+        "LDFF1SB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1sb_gather[_u32base]_s32",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1SB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1sb_gather[_u32base]_u32",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1SB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1sb_gather[_u64base]_offset_s64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t offset"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1SB"
+      ],
+      [
+        "LDFF1SB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1sb_gather[_u64base]_offset_u64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t offset"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1SB"
+      ],
+      [
+        "LDFF1SB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1sb_gather[_u64base]_s64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1SB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1sb_gather[_u64base]_u64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1SB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1sb_gather_[s32]offset_s32",
+    "arguments": [
+      "svbool_t pg",
+      "const int8_t *base",
+      "svint32_t offsets"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1SB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1sb_gather_[s32]offset_u32",
+    "arguments": [
+      "svbool_t pg",
+      "const int8_t *base",
+      "svint32_t offsets"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1SB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1sb_gather_[s64]offset_s64",
+    "arguments": [
+      "svbool_t pg",
+      "const int8_t *base",
+      "svint64_t offsets"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1SB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1sb_gather_[s64]offset_u64",
+    "arguments": [
+      "svbool_t pg",
+      "const int8_t *base",
+      "svint64_t offsets"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1SB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1sb_gather_[u32]offset_s32",
+    "arguments": [
+      "svbool_t pg",
+      "const int8_t *base",
+      "svuint32_t offsets"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1SB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1sb_gather_[u32]offset_u32",
+    "arguments": [
+      "svbool_t pg",
+      "const int8_t *base",
+      "svuint32_t offsets"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1SB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1sb_gather_[u64]offset_s64",
+    "arguments": [
+      "svbool_t pg",
+      "const int8_t *base",
+      "svuint64_t offsets"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1SB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1sb_gather_[u64]offset_u64",
+    "arguments": [
+      "svbool_t pg",
+      "const int8_t *base",
+      "svuint64_t offsets"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1SB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1sb_s16",
+    "arguments": [
+      "svbool_t pg",
+      "const int8_t *base"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1SB"
+      ],
+      [
+        "LDFF1SB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1sb_s32",
+    "arguments": [
+      "svbool_t pg",
+      "const int8_t *base"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1SB"
+      ],
+      [
+        "LDFF1SB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1sb_s64",
+    "arguments": [
+      "svbool_t pg",
+      "const int8_t *base"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1SB"
+      ],
+      [
+        "LDFF1SB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1sb_u16",
+    "arguments": [
+      "svbool_t pg",
+      "const int8_t *base"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1SB"
+      ],
+      [
+        "LDFF1SB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1sb_u32",
+    "arguments": [
+      "svbool_t pg",
+      "const int8_t *base"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1SB"
+      ],
+      [
+        "LDFF1SB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1sb_u64",
+    "arguments": [
+      "svbool_t pg",
+      "const int8_t *base"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1SB"
+      ],
+      [
+        "LDFF1SB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1sb_vnum_s16",
+    "arguments": [
+      "svbool_t pg",
+      "const int8_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.H"
+      },
+      "vnum * svcnth()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1SB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1sb_vnum_s32",
+    "arguments": [
+      "svbool_t pg",
+      "const int8_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.S"
+      },
+      "vnum * svcntw()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1SB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1sb_vnum_s64",
+    "arguments": [
+      "svbool_t pg",
+      "const int8_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      },
+      "vnum * svcntd()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1SB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1sb_vnum_u16",
+    "arguments": [
+      "svbool_t pg",
+      "const int8_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.H"
+      },
+      "vnum * svcnth()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1SB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1sb_vnum_u32",
+    "arguments": [
+      "svbool_t pg",
+      "const int8_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.S"
+      },
+      "vnum * svcntw()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1SB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1sb_vnum_u64",
+    "arguments": [
+      "svbool_t pg",
+      "const int8_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      },
+      "vnum * svcntd()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1SB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1sh_gather[_u32base]_index_s32",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "int64_t index"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "index * 2": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1SH"
+      ],
+      [
+        "LDFF1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1sh_gather[_u32base]_index_u32",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "int64_t index"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "index * 2": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1SH"
+      ],
+      [
+        "LDFF1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1sh_gather[_u32base]_offset_s32",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "int64_t offset"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1SH"
+      ],
+      [
+        "LDFF1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1sh_gather[_u32base]_offset_u32",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "int64_t offset"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1SH"
+      ],
+      [
+        "LDFF1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1sh_gather[_u32base]_s32",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1sh_gather[_u32base]_u32",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1sh_gather[_u64base]_index_s64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t index"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "index * 2": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1SH"
+      ],
+      [
+        "LDFF1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1sh_gather[_u64base]_index_u64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t index"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "index * 2": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1SH"
+      ],
+      [
+        "LDFF1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1sh_gather[_u64base]_offset_s64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t offset"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1SH"
+      ],
+      [
+        "LDFF1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1sh_gather[_u64base]_offset_u64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t offset"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1SH"
+      ],
+      [
+        "LDFF1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1sh_gather[_u64base]_s64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1sh_gather[_u64base]_u64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1sh_gather_[s32]index_s32",
+    "arguments": [
+      "svbool_t pg",
+      "const int16_t *base",
+      "svint32_t indices"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices": {
+        "register": "Zindices.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1sh_gather_[s32]index_u32",
+    "arguments": [
+      "svbool_t pg",
+      "const int16_t *base",
+      "svint32_t indices"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices": {
+        "register": "Zindices.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1sh_gather_[s32]offset_s32",
+    "arguments": [
+      "svbool_t pg",
+      "const int16_t *base",
+      "svint32_t offsets"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1sh_gather_[s32]offset_u32",
+    "arguments": [
+      "svbool_t pg",
+      "const int16_t *base",
+      "svint32_t offsets"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1sh_gather_[s64]index_s64",
+    "arguments": [
+      "svbool_t pg",
+      "const int16_t *base",
+      "svint64_t indices"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices": {
+        "register": "Zindices.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1sh_gather_[s64]index_u64",
+    "arguments": [
+      "svbool_t pg",
+      "const int16_t *base",
+      "svint64_t indices"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices": {
+        "register": "Zindices.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1sh_gather_[s64]offset_s64",
+    "arguments": [
+      "svbool_t pg",
+      "const int16_t *base",
+      "svint64_t offsets"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1sh_gather_[s64]offset_u64",
+    "arguments": [
+      "svbool_t pg",
+      "const int16_t *base",
+      "svint64_t offsets"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1sh_gather_[u32]index_s32",
+    "arguments": [
+      "svbool_t pg",
+      "const int16_t *base",
+      "svuint32_t indices"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices": {
+        "register": "Zindices.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1sh_gather_[u32]index_u32",
+    "arguments": [
+      "svbool_t pg",
+      "const int16_t *base",
+      "svuint32_t indices"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices": {
+        "register": "Zindices.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1sh_gather_[u32]offset_s32",
+    "arguments": [
+      "svbool_t pg",
+      "const int16_t *base",
+      "svuint32_t offsets"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1sh_gather_[u32]offset_u32",
+    "arguments": [
+      "svbool_t pg",
+      "const int16_t *base",
+      "svuint32_t offsets"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1sh_gather_[u64]index_s64",
+    "arguments": [
+      "svbool_t pg",
+      "const int16_t *base",
+      "svuint64_t indices"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices": {
+        "register": "Zindices.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1sh_gather_[u64]index_u64",
+    "arguments": [
+      "svbool_t pg",
+      "const int16_t *base",
+      "svuint64_t indices"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices": {
+        "register": "Zindices.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1sh_gather_[u64]offset_s64",
+    "arguments": [
+      "svbool_t pg",
+      "const int16_t *base",
+      "svuint64_t offsets"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1sh_gather_[u64]offset_u64",
+    "arguments": [
+      "svbool_t pg",
+      "const int16_t *base",
+      "svuint64_t offsets"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1sh_s32",
+    "arguments": [
+      "svbool_t pg",
+      "const int16_t *base"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1SH"
+      ],
+      [
+        "LDFF1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1sh_s64",
+    "arguments": [
+      "svbool_t pg",
+      "const int16_t *base"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1SH"
+      ],
+      [
+        "LDFF1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1sh_u32",
+    "arguments": [
+      "svbool_t pg",
+      "const int16_t *base"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1SH"
+      ],
+      [
+        "LDFF1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1sh_u64",
+    "arguments": [
+      "svbool_t pg",
+      "const int16_t *base"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1SH"
+      ],
+      [
+        "LDFF1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1sh_vnum_s32",
+    "arguments": [
+      "svbool_t pg",
+      "const int16_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.S"
+      },
+      "vnum * svcntw()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1sh_vnum_s64",
+    "arguments": [
+      "svbool_t pg",
+      "const int16_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      },
+      "vnum * svcntd()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1sh_vnum_u32",
+    "arguments": [
+      "svbool_t pg",
+      "const int16_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.S"
+      },
+      "vnum * svcntw()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1sh_vnum_u64",
+    "arguments": [
+      "svbool_t pg",
+      "const int16_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      },
+      "vnum * svcntd()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1sw_gather[_u64base]_index_s64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t index"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "index * 4": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1SW"
+      ],
+      [
+        "LDFF1SW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1sw_gather[_u64base]_index_u64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t index"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "index * 4": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1SW"
+      ],
+      [
+        "LDFF1SW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1sw_gather[_u64base]_offset_s64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t offset"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1SW"
+      ],
+      [
+        "LDFF1SW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1sw_gather[_u64base]_offset_u64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t offset"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1SW"
+      ],
+      [
+        "LDFF1SW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1sw_gather[_u64base]_s64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1SW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1sw_gather[_u64base]_u64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1SW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1sw_gather_[s64]index_s64",
+    "arguments": [
+      "svbool_t pg",
+      "const int32_t *base",
+      "svint64_t indices"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices": {
+        "register": "Zindices.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1SW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1sw_gather_[s64]index_u64",
+    "arguments": [
+      "svbool_t pg",
+      "const int32_t *base",
+      "svint64_t indices"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices": {
+        "register": "Zindices.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1SW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1sw_gather_[s64]offset_s64",
+    "arguments": [
+      "svbool_t pg",
+      "const int32_t *base",
+      "svint64_t offsets"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1SW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1sw_gather_[s64]offset_u64",
+    "arguments": [
+      "svbool_t pg",
+      "const int32_t *base",
+      "svint64_t offsets"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1SW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1sw_gather_[u64]index_s64",
+    "arguments": [
+      "svbool_t pg",
+      "const int32_t *base",
+      "svuint64_t indices"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices": {
+        "register": "Zindices.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1SW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1sw_gather_[u64]index_u64",
+    "arguments": [
+      "svbool_t pg",
+      "const int32_t *base",
+      "svuint64_t indices"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices": {
+        "register": "Zindices.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1SW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1sw_gather_[u64]offset_s64",
+    "arguments": [
+      "svbool_t pg",
+      "const int32_t *base",
+      "svuint64_t offsets"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1SW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1sw_gather_[u64]offset_u64",
+    "arguments": [
+      "svbool_t pg",
+      "const int32_t *base",
+      "svuint64_t offsets"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1SW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1sw_s64",
+    "arguments": [
+      "svbool_t pg",
+      "const int32_t *base"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1SW"
+      ],
+      [
+        "LDFF1SW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1sw_u64",
+    "arguments": [
+      "svbool_t pg",
+      "const int32_t *base"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1SW"
+      ],
+      [
+        "LDFF1SW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1sw_vnum_s64",
+    "arguments": [
+      "svbool_t pg",
+      "const int32_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      },
+      "vnum * svcntd()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1SW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1sw_vnum_u64",
+    "arguments": [
+      "svbool_t pg",
+      "const int32_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      },
+      "vnum * svcntd()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1SW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1ub_gather[_u32base]_offset_s32",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "int64_t offset"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1B"
+      ],
+      [
+        "LDFF1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1ub_gather[_u32base]_offset_u32",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "int64_t offset"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1B"
+      ],
+      [
+        "LDFF1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1ub_gather[_u32base]_s32",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1ub_gather[_u32base]_u32",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1ub_gather[_u64base]_offset_s64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t offset"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1B"
+      ],
+      [
+        "LDFF1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1ub_gather[_u64base]_offset_u64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t offset"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1B"
+      ],
+      [
+        "LDFF1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1ub_gather[_u64base]_s64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1ub_gather[_u64base]_u64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1ub_gather_[s32]offset_s32",
+    "arguments": [
+      "svbool_t pg",
+      "const uint8_t *base",
+      "svint32_t offsets"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1ub_gather_[s32]offset_u32",
+    "arguments": [
+      "svbool_t pg",
+      "const uint8_t *base",
+      "svint32_t offsets"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1ub_gather_[s64]offset_s64",
+    "arguments": [
+      "svbool_t pg",
+      "const uint8_t *base",
+      "svint64_t offsets"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1ub_gather_[s64]offset_u64",
+    "arguments": [
+      "svbool_t pg",
+      "const uint8_t *base",
+      "svint64_t offsets"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1ub_gather_[u32]offset_s32",
+    "arguments": [
+      "svbool_t pg",
+      "const uint8_t *base",
+      "svuint32_t offsets"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1ub_gather_[u32]offset_u32",
+    "arguments": [
+      "svbool_t pg",
+      "const uint8_t *base",
+      "svuint32_t offsets"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1ub_gather_[u64]offset_s64",
+    "arguments": [
+      "svbool_t pg",
+      "const uint8_t *base",
+      "svuint64_t offsets"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1ub_gather_[u64]offset_u64",
+    "arguments": [
+      "svbool_t pg",
+      "const uint8_t *base",
+      "svuint64_t offsets"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1ub_s16",
+    "arguments": [
+      "svbool_t pg",
+      "const uint8_t *base"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1B"
+      ],
+      [
+        "LDFF1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1ub_s32",
+    "arguments": [
+      "svbool_t pg",
+      "const uint8_t *base"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1B"
+      ],
+      [
+        "LDFF1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1ub_s64",
+    "arguments": [
+      "svbool_t pg",
+      "const uint8_t *base"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1B"
+      ],
+      [
+        "LDFF1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1ub_u16",
+    "arguments": [
+      "svbool_t pg",
+      "const uint8_t *base"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1B"
+      ],
+      [
+        "LDFF1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1ub_u32",
+    "arguments": [
+      "svbool_t pg",
+      "const uint8_t *base"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1B"
+      ],
+      [
+        "LDFF1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1ub_u64",
+    "arguments": [
+      "svbool_t pg",
+      "const uint8_t *base"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1B"
+      ],
+      [
+        "LDFF1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1ub_vnum_s16",
+    "arguments": [
+      "svbool_t pg",
+      "const uint8_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.H"
+      },
+      "vnum * svcnth()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1ub_vnum_s32",
+    "arguments": [
+      "svbool_t pg",
+      "const uint8_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.S"
+      },
+      "vnum * svcntw()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1ub_vnum_s64",
+    "arguments": [
+      "svbool_t pg",
+      "const uint8_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      },
+      "vnum * svcntd()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1ub_vnum_u16",
+    "arguments": [
+      "svbool_t pg",
+      "const uint8_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.H"
+      },
+      "vnum * svcnth()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1ub_vnum_u32",
+    "arguments": [
+      "svbool_t pg",
+      "const uint8_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.S"
+      },
+      "vnum * svcntw()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1ub_vnum_u64",
+    "arguments": [
+      "svbool_t pg",
+      "const uint8_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      },
+      "vnum * svcntd()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1uh_gather[_u32base]_index_s32",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "int64_t index"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "index * 2": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1H"
+      ],
+      [
+        "LDFF1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1uh_gather[_u32base]_index_u32",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "int64_t index"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "index * 2": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1H"
+      ],
+      [
+        "LDFF1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1uh_gather[_u32base]_offset_s32",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "int64_t offset"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1H"
+      ],
+      [
+        "LDFF1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1uh_gather[_u32base]_offset_u32",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "int64_t offset"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1H"
+      ],
+      [
+        "LDFF1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1uh_gather[_u32base]_s32",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1uh_gather[_u32base]_u32",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1uh_gather[_u64base]_index_s64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t index"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "index * 2": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1H"
+      ],
+      [
+        "LDFF1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1uh_gather[_u64base]_index_u64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t index"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "index * 2": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1H"
+      ],
+      [
+        "LDFF1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1uh_gather[_u64base]_offset_s64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t offset"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1H"
+      ],
+      [
+        "LDFF1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1uh_gather[_u64base]_offset_u64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t offset"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1H"
+      ],
+      [
+        "LDFF1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1uh_gather[_u64base]_s64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1uh_gather[_u64base]_u64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1uh_gather_[s32]index_s32",
+    "arguments": [
+      "svbool_t pg",
+      "const uint16_t *base",
+      "svint32_t indices"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices": {
+        "register": "Zindices.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1uh_gather_[s32]index_u32",
+    "arguments": [
+      "svbool_t pg",
+      "const uint16_t *base",
+      "svint32_t indices"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices": {
+        "register": "Zindices.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1uh_gather_[s32]offset_s32",
+    "arguments": [
+      "svbool_t pg",
+      "const uint16_t *base",
+      "svint32_t offsets"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1uh_gather_[s32]offset_u32",
+    "arguments": [
+      "svbool_t pg",
+      "const uint16_t *base",
+      "svint32_t offsets"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1uh_gather_[s64]index_s64",
+    "arguments": [
+      "svbool_t pg",
+      "const uint16_t *base",
+      "svint64_t indices"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices": {
+        "register": "Zindices.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1uh_gather_[s64]index_u64",
+    "arguments": [
+      "svbool_t pg",
+      "const uint16_t *base",
+      "svint64_t indices"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices": {
+        "register": "Zindices.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1uh_gather_[s64]offset_s64",
+    "arguments": [
+      "svbool_t pg",
+      "const uint16_t *base",
+      "svint64_t offsets"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1uh_gather_[s64]offset_u64",
+    "arguments": [
+      "svbool_t pg",
+      "const uint16_t *base",
+      "svint64_t offsets"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1uh_gather_[u32]index_s32",
+    "arguments": [
+      "svbool_t pg",
+      "const uint16_t *base",
+      "svuint32_t indices"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices": {
+        "register": "Zindices.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1uh_gather_[u32]index_u32",
+    "arguments": [
+      "svbool_t pg",
+      "const uint16_t *base",
+      "svuint32_t indices"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices": {
+        "register": "Zindices.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1uh_gather_[u32]offset_s32",
+    "arguments": [
+      "svbool_t pg",
+      "const uint16_t *base",
+      "svuint32_t offsets"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1uh_gather_[u32]offset_u32",
+    "arguments": [
+      "svbool_t pg",
+      "const uint16_t *base",
+      "svuint32_t offsets"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1uh_gather_[u64]index_s64",
+    "arguments": [
+      "svbool_t pg",
+      "const uint16_t *base",
+      "svuint64_t indices"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices": {
+        "register": "Zindices.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1uh_gather_[u64]index_u64",
+    "arguments": [
+      "svbool_t pg",
+      "const uint16_t *base",
+      "svuint64_t indices"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices": {
+        "register": "Zindices.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1uh_gather_[u64]offset_s64",
+    "arguments": [
+      "svbool_t pg",
+      "const uint16_t *base",
+      "svuint64_t offsets"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1uh_gather_[u64]offset_u64",
+    "arguments": [
+      "svbool_t pg",
+      "const uint16_t *base",
+      "svuint64_t offsets"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1uh_s32",
+    "arguments": [
+      "svbool_t pg",
+      "const uint16_t *base"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1H"
+      ],
+      [
+        "LDFF1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1uh_s64",
+    "arguments": [
+      "svbool_t pg",
+      "const uint16_t *base"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1H"
+      ],
+      [
+        "LDFF1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1uh_u32",
+    "arguments": [
+      "svbool_t pg",
+      "const uint16_t *base"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1H"
+      ],
+      [
+        "LDFF1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1uh_u64",
+    "arguments": [
+      "svbool_t pg",
+      "const uint16_t *base"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1H"
+      ],
+      [
+        "LDFF1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1uh_vnum_s32",
+    "arguments": [
+      "svbool_t pg",
+      "const uint16_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.S"
+      },
+      "vnum * svcntw()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1uh_vnum_s64",
+    "arguments": [
+      "svbool_t pg",
+      "const uint16_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      },
+      "vnum * svcntd()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1uh_vnum_u32",
+    "arguments": [
+      "svbool_t pg",
+      "const uint16_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.S"
+      },
+      "vnum * svcntw()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1uh_vnum_u64",
+    "arguments": [
+      "svbool_t pg",
+      "const uint16_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      },
+      "vnum * svcntd()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1uw_gather[_u64base]_index_s64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t index"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "index * 4": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1W"
+      ],
+      [
+        "LDFF1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1uw_gather[_u64base]_index_u64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t index"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "index * 4": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1W"
+      ],
+      [
+        "LDFF1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1uw_gather[_u64base]_offset_s64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t offset"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1W"
+      ],
+      [
+        "LDFF1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1uw_gather[_u64base]_offset_u64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t offset"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1W"
+      ],
+      [
+        "LDFF1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1uw_gather[_u64base]_s64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1uw_gather[_u64base]_u64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1uw_gather_[s64]index_s64",
+    "arguments": [
+      "svbool_t pg",
+      "const uint32_t *base",
+      "svint64_t indices"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices": {
+        "register": "Zindices.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1uw_gather_[s64]index_u64",
+    "arguments": [
+      "svbool_t pg",
+      "const uint32_t *base",
+      "svint64_t indices"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices": {
+        "register": "Zindices.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1uw_gather_[s64]offset_s64",
+    "arguments": [
+      "svbool_t pg",
+      "const uint32_t *base",
+      "svint64_t offsets"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1uw_gather_[s64]offset_u64",
+    "arguments": [
+      "svbool_t pg",
+      "const uint32_t *base",
+      "svint64_t offsets"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1uw_gather_[u64]index_s64",
+    "arguments": [
+      "svbool_t pg",
+      "const uint32_t *base",
+      "svuint64_t indices"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices": {
+        "register": "Zindices.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1uw_gather_[u64]index_u64",
+    "arguments": [
+      "svbool_t pg",
+      "const uint32_t *base",
+      "svuint64_t indices"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices": {
+        "register": "Zindices.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1uw_gather_[u64]offset_s64",
+    "arguments": [
+      "svbool_t pg",
+      "const uint32_t *base",
+      "svuint64_t offsets"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1uw_gather_[u64]offset_u64",
+    "arguments": [
+      "svbool_t pg",
+      "const uint32_t *base",
+      "svuint64_t offsets"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1uw_s64",
+    "arguments": [
+      "svbool_t pg",
+      "const uint32_t *base"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1W"
+      ],
+      [
+        "LDFF1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1uw_u64",
+    "arguments": [
+      "svbool_t pg",
+      "const uint32_t *base"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1W"
+      ],
+      [
+        "LDFF1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1uw_vnum_s64",
+    "arguments": [
+      "svbool_t pg",
+      "const uint32_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      },
+      "vnum * svcntd()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldff1uw_vnum_u64",
+    "arguments": [
+      "svbool_t pg",
+      "const uint32_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      },
+      "vnum * svcntd()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDFF1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldnf1[_f16]",
+    "arguments": [
+      "svbool_t pg",
+      "const float16_t *base"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNF1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldnf1[_f32]",
+    "arguments": [
+      "svbool_t pg",
+      "const float32_t *base"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNF1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldnf1[_f64]",
+    "arguments": [
+      "svbool_t pg",
+      "const float64_t *base"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNF1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldnf1[_s16]",
+    "arguments": [
+      "svbool_t pg",
+      "const int16_t *base"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNF1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldnf1[_s32]",
+    "arguments": [
+      "svbool_t pg",
+      "const int32_t *base"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNF1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldnf1[_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "const int64_t *base"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNF1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldnf1[_s8]",
+    "arguments": [
+      "svbool_t pg",
+      "const int8_t *base"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNF1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldnf1[_u16]",
+    "arguments": [
+      "svbool_t pg",
+      "const uint16_t *base"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNF1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldnf1[_u32]",
+    "arguments": [
+      "svbool_t pg",
+      "const uint32_t *base"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNF1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldnf1[_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "const uint64_t *base"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNF1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldnf1[_u8]",
+    "arguments": [
+      "svbool_t pg",
+      "const uint8_t *base"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNF1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldnf1_vnum[_f16]",
+    "arguments": [
+      "svbool_t pg",
+      "const float16_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "base + vnum * svcnth() * 2": {
+        "register": "Xptr"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNF1H"
+      ],
+      [
+        "LDNF1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldnf1_vnum[_f32]",
+    "arguments": [
+      "svbool_t pg",
+      "const float32_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "base + vnum * svcntw() * 4": {
+        "register": "Xptr"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNF1W"
+      ],
+      [
+        "LDNF1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldnf1_vnum[_f64]",
+    "arguments": [
+      "svbool_t pg",
+      "const float64_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "base + vnum * svcntd() * 8": {
+        "register": "Xptr"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNF1D"
+      ],
+      [
+        "LDNF1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldnf1_vnum[_s16]",
+    "arguments": [
+      "svbool_t pg",
+      "const int16_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "base + vnum * svcnth() * 2": {
+        "register": "Xptr"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNF1H"
+      ],
+      [
+        "LDNF1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldnf1_vnum[_s32]",
+    "arguments": [
+      "svbool_t pg",
+      "const int32_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "base + vnum * svcntw() * 4": {
+        "register": "Xptr"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNF1W"
+      ],
+      [
+        "LDNF1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldnf1_vnum[_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "const int64_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "base + vnum * svcntd() * 8": {
+        "register": "Xptr"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNF1D"
+      ],
+      [
+        "LDNF1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldnf1_vnum[_s8]",
+    "arguments": [
+      "svbool_t pg",
+      "const int8_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "base + vnum * svcntb()": {
+        "register": "Xptr"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNF1B"
+      ],
+      [
+        "LDNF1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldnf1_vnum[_u16]",
+    "arguments": [
+      "svbool_t pg",
+      "const uint16_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "base + vnum * svcnth() * 2": {
+        "register": "Xptr"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNF1H"
+      ],
+      [
+        "LDNF1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldnf1_vnum[_u32]",
+    "arguments": [
+      "svbool_t pg",
+      "const uint32_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "base + vnum * svcntw() * 4": {
+        "register": "Xptr"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNF1W"
+      ],
+      [
+        "LDNF1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldnf1_vnum[_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "const uint64_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "base + vnum * svcntd() * 8": {
+        "register": "Xptr"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNF1D"
+      ],
+      [
+        "LDNF1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldnf1_vnum[_u8]",
+    "arguments": [
+      "svbool_t pg",
+      "const uint8_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "base + vnum * svcntb()": {
+        "register": "Xptr"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNF1B"
+      ],
+      [
+        "LDNF1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldnf1sb_s16",
+    "arguments": [
+      "svbool_t pg",
+      "const int8_t *base"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNF1SB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldnf1sb_s32",
+    "arguments": [
+      "svbool_t pg",
+      "const int8_t *base"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNF1SB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldnf1sb_s64",
+    "arguments": [
+      "svbool_t pg",
+      "const int8_t *base"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNF1SB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldnf1sb_u16",
+    "arguments": [
+      "svbool_t pg",
+      "const int8_t *base"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNF1SB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldnf1sb_u32",
+    "arguments": [
+      "svbool_t pg",
+      "const int8_t *base"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNF1SB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldnf1sb_u64",
+    "arguments": [
+      "svbool_t pg",
+      "const int8_t *base"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNF1SB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldnf1sb_vnum_s16",
+    "arguments": [
+      "svbool_t pg",
+      "const int8_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "base + vnum * svcnth()": {
+        "register": "Xptr"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNF1SB"
+      ],
+      [
+        "LDNF1SB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldnf1sb_vnum_s32",
+    "arguments": [
+      "svbool_t pg",
+      "const int8_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "base + vnum * svcntw()": {
+        "register": "Xptr"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNF1SB"
+      ],
+      [
+        "LDNF1SB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldnf1sb_vnum_s64",
+    "arguments": [
+      "svbool_t pg",
+      "const int8_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "base + vnum * svcntd()": {
+        "register": "Xptr"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNF1SB"
+      ],
+      [
+        "LDNF1SB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldnf1sb_vnum_u16",
+    "arguments": [
+      "svbool_t pg",
+      "const int8_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "base + vnum * svcnth()": {
+        "register": "Xptr"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNF1SB"
+      ],
+      [
+        "LDNF1SB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldnf1sb_vnum_u32",
+    "arguments": [
+      "svbool_t pg",
+      "const int8_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "base + vnum * svcntw()": {
+        "register": "Xptr"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNF1SB"
+      ],
+      [
+        "LDNF1SB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldnf1sb_vnum_u64",
+    "arguments": [
+      "svbool_t pg",
+      "const int8_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "base + vnum * svcntd()": {
+        "register": "Xptr"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNF1SB"
+      ],
+      [
+        "LDNF1SB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldnf1sh_s32",
+    "arguments": [
+      "svbool_t pg",
+      "const int16_t *base"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNF1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldnf1sh_s64",
+    "arguments": [
+      "svbool_t pg",
+      "const int16_t *base"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNF1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldnf1sh_u32",
+    "arguments": [
+      "svbool_t pg",
+      "const int16_t *base"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNF1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldnf1sh_u64",
+    "arguments": [
+      "svbool_t pg",
+      "const int16_t *base"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNF1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldnf1sh_vnum_s32",
+    "arguments": [
+      "svbool_t pg",
+      "const int16_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "base + vnum * svcntw() * 2": {
+        "register": "Xptr"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNF1SH"
+      ],
+      [
+        "LDNF1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldnf1sh_vnum_s64",
+    "arguments": [
+      "svbool_t pg",
+      "const int16_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "base + vnum * svcntd() * 2": {
+        "register": "Xptr"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNF1SH"
+      ],
+      [
+        "LDNF1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldnf1sh_vnum_u32",
+    "arguments": [
+      "svbool_t pg",
+      "const int16_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "base + vnum * svcntw() * 2": {
+        "register": "Xptr"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNF1SH"
+      ],
+      [
+        "LDNF1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldnf1sh_vnum_u64",
+    "arguments": [
+      "svbool_t pg",
+      "const int16_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "base + vnum * svcntd() * 2": {
+        "register": "Xptr"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNF1SH"
+      ],
+      [
+        "LDNF1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldnf1sw_s64",
+    "arguments": [
+      "svbool_t pg",
+      "const int32_t *base"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNF1SW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldnf1sw_u64",
+    "arguments": [
+      "svbool_t pg",
+      "const int32_t *base"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNF1SW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldnf1sw_vnum_s64",
+    "arguments": [
+      "svbool_t pg",
+      "const int32_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "base + vnum * svcntd() * 4": {
+        "register": "Xptr"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNF1SW"
+      ],
+      [
+        "LDNF1SW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldnf1sw_vnum_u64",
+    "arguments": [
+      "svbool_t pg",
+      "const int32_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "base + vnum * svcntd() * 4": {
+        "register": "Xptr"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNF1SW"
+      ],
+      [
+        "LDNF1SW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldnf1ub_s16",
+    "arguments": [
+      "svbool_t pg",
+      "const uint8_t *base"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNF1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldnf1ub_s32",
+    "arguments": [
+      "svbool_t pg",
+      "const uint8_t *base"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNF1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldnf1ub_s64",
+    "arguments": [
+      "svbool_t pg",
+      "const uint8_t *base"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNF1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldnf1ub_u16",
+    "arguments": [
+      "svbool_t pg",
+      "const uint8_t *base"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNF1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldnf1ub_u32",
+    "arguments": [
+      "svbool_t pg",
+      "const uint8_t *base"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNF1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldnf1ub_u64",
+    "arguments": [
+      "svbool_t pg",
+      "const uint8_t *base"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNF1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldnf1ub_vnum_s16",
+    "arguments": [
+      "svbool_t pg",
+      "const uint8_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "base + vnum * svcnth()": {
+        "register": "Xptr"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNF1B"
+      ],
+      [
+        "LDNF1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldnf1ub_vnum_s32",
+    "arguments": [
+      "svbool_t pg",
+      "const uint8_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "base + vnum * svcntw()": {
+        "register": "Xptr"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNF1B"
+      ],
+      [
+        "LDNF1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldnf1ub_vnum_s64",
+    "arguments": [
+      "svbool_t pg",
+      "const uint8_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "base + vnum * svcntd()": {
+        "register": "Xptr"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNF1B"
+      ],
+      [
+        "LDNF1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldnf1ub_vnum_u16",
+    "arguments": [
+      "svbool_t pg",
+      "const uint8_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "base + vnum * svcnth()": {
+        "register": "Xptr"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNF1B"
+      ],
+      [
+        "LDNF1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldnf1ub_vnum_u32",
+    "arguments": [
+      "svbool_t pg",
+      "const uint8_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "base + vnum * svcntw()": {
+        "register": "Xptr"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNF1B"
+      ],
+      [
+        "LDNF1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldnf1ub_vnum_u64",
+    "arguments": [
+      "svbool_t pg",
+      "const uint8_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "base + vnum * svcntd()": {
+        "register": "Xptr"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNF1B"
+      ],
+      [
+        "LDNF1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldnf1uh_s32",
+    "arguments": [
+      "svbool_t pg",
+      "const uint16_t *base"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNF1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldnf1uh_s64",
+    "arguments": [
+      "svbool_t pg",
+      "const uint16_t *base"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNF1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldnf1uh_u32",
+    "arguments": [
+      "svbool_t pg",
+      "const uint16_t *base"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNF1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldnf1uh_u64",
+    "arguments": [
+      "svbool_t pg",
+      "const uint16_t *base"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNF1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldnf1uh_vnum_s32",
+    "arguments": [
+      "svbool_t pg",
+      "const uint16_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "base + vnum * svcntw() * 2": {
+        "register": "Xptr"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNF1H"
+      ],
+      [
+        "LDNF1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldnf1uh_vnum_s64",
+    "arguments": [
+      "svbool_t pg",
+      "const uint16_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "base + vnum * svcntd() * 2": {
+        "register": "Xptr"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNF1H"
+      ],
+      [
+        "LDNF1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldnf1uh_vnum_u32",
+    "arguments": [
+      "svbool_t pg",
+      "const uint16_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "base + vnum * svcntw() * 2": {
+        "register": "Xptr"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNF1H"
+      ],
+      [
+        "LDNF1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldnf1uh_vnum_u64",
+    "arguments": [
+      "svbool_t pg",
+      "const uint16_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "base + vnum * svcntd() * 2": {
+        "register": "Xptr"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNF1H"
+      ],
+      [
+        "LDNF1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldnf1uw_s64",
+    "arguments": [
+      "svbool_t pg",
+      "const uint32_t *base"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNF1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldnf1uw_u64",
+    "arguments": [
+      "svbool_t pg",
+      "const uint32_t *base"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNF1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldnf1uw_vnum_s64",
+    "arguments": [
+      "svbool_t pg",
+      "const uint32_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "base + vnum * svcntd() * 4": {
+        "register": "Xptr"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNF1W"
+      ],
+      [
+        "LDNF1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldnf1uw_vnum_u64",
+    "arguments": [
+      "svbool_t pg",
+      "const uint32_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "base + vnum * svcntd() * 4": {
+        "register": "Xptr"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNF1W"
+      ],
+      [
+        "LDNF1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldnt1[_f16]",
+    "arguments": [
+      "svbool_t pg",
+      "const float16_t *base"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1H"
+      ],
+      [
+        "LDNT1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldnt1[_f32]",
+    "arguments": [
+      "svbool_t pg",
+      "const float32_t *base"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1W"
+      ],
+      [
+        "LDNT1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldnt1[_f64]",
+    "arguments": [
+      "svbool_t pg",
+      "const float64_t *base"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1D"
+      ],
+      [
+        "LDNT1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldnt1[_s16]",
+    "arguments": [
+      "svbool_t pg",
+      "const int16_t *base"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1H"
+      ],
+      [
+        "LDNT1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldnt1[_s32]",
+    "arguments": [
+      "svbool_t pg",
+      "const int32_t *base"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1W"
+      ],
+      [
+        "LDNT1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldnt1[_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "const int64_t *base"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1D"
+      ],
+      [
+        "LDNT1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldnt1[_s8]",
+    "arguments": [
+      "svbool_t pg",
+      "const int8_t *base"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1B"
+      ],
+      [
+        "LDNT1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldnt1[_u16]",
+    "arguments": [
+      "svbool_t pg",
+      "const uint16_t *base"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1H"
+      ],
+      [
+        "LDNT1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldnt1[_u32]",
+    "arguments": [
+      "svbool_t pg",
+      "const uint32_t *base"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1W"
+      ],
+      [
+        "LDNT1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldnt1[_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "const uint64_t *base"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1D"
+      ],
+      [
+        "LDNT1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldnt1[_u8]",
+    "arguments": [
+      "svbool_t pg",
+      "const uint8_t *base"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1B"
+      ],
+      [
+        "LDNT1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1_gather[_u32base]_f32",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1_gather[_u32base]_index_f32",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "int64_t index"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "index * 4": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1_gather[_u32base]_index_s32",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "int64_t index"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "index * 4": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1_gather[_u32base]_index_u32",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "int64_t index"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "index * 4": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1_gather[_u32base]_offset_f32",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "int64_t offset"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1_gather[_u32base]_offset_s32",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "int64_t offset"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1_gather[_u32base]_offset_u32",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "int64_t offset"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1_gather[_u32base]_s32",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1_gather[_u32base]_u32",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1_gather[_u64base]_f64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1_gather[_u64base]_index_f64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t index"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "index * 8": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1_gather[_u64base]_index_s64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t index"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "index * 8": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1_gather[_u64base]_index_u64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t index"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "index * 8": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1_gather[_u64base]_offset_f64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t offset"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1_gather[_u64base]_offset_s64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t offset"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1_gather[_u64base]_offset_u64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t offset"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1_gather[_u64base]_s64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1_gather[_u64base]_u64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1_gather_[s64]index[_f64]",
+    "arguments": [
+      "svbool_t pg",
+      "const float64_t *base",
+      "svint64_t indices"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices * 8": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1_gather_[s64]index[_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "const int64_t *base",
+      "svint64_t indices"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices * 8": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1_gather_[s64]index[_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "const uint64_t *base",
+      "svint64_t indices"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices * 8": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1_gather_[s64]offset[_f64]",
+    "arguments": [
+      "svbool_t pg",
+      "const float64_t *base",
+      "svint64_t offsets"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1_gather_[s64]offset[_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "const int64_t *base",
+      "svint64_t offsets"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1_gather_[s64]offset[_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "const uint64_t *base",
+      "svint64_t offsets"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1_gather_[u32]offset[_f32]",
+    "arguments": [
+      "svbool_t pg",
+      "const float32_t *base",
+      "svuint32_t offsets"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1_gather_[u32]offset[_s32]",
+    "arguments": [
+      "svbool_t pg",
+      "const int32_t *base",
+      "svuint32_t offsets"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1_gather_[u32]offset[_u32]",
+    "arguments": [
+      "svbool_t pg",
+      "const uint32_t *base",
+      "svuint32_t offsets"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1_gather_[u64]index[_f64]",
+    "arguments": [
+      "svbool_t pg",
+      "const float64_t *base",
+      "svuint64_t indices"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices * 8": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1_gather_[u64]index[_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "const int64_t *base",
+      "svuint64_t indices"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices * 8": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1_gather_[u64]index[_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "const uint64_t *base",
+      "svuint64_t indices"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices * 8": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1_gather_[u64]offset[_f64]",
+    "arguments": [
+      "svbool_t pg",
+      "const float64_t *base",
+      "svuint64_t offsets"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1_gather_[u64]offset[_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "const int64_t *base",
+      "svuint64_t offsets"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1_gather_[u64]offset[_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "const uint64_t *base",
+      "svuint64_t offsets"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldnt1_vnum[_f16]",
+    "arguments": [
+      "svbool_t pg",
+      "const float16_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.H"
+      },
+      "vnum * svcnth()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1H"
+      ],
+      [
+        "LDNT1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldnt1_vnum[_f32]",
+    "arguments": [
+      "svbool_t pg",
+      "const float32_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.S"
+      },
+      "vnum * svcntw()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1W"
+      ],
+      [
+        "LDNT1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldnt1_vnum[_f64]",
+    "arguments": [
+      "svbool_t pg",
+      "const float64_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      },
+      "vnum * svcntd()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1D"
+      ],
+      [
+        "LDNT1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldnt1_vnum[_s16]",
+    "arguments": [
+      "svbool_t pg",
+      "const int16_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.H"
+      },
+      "vnum * svcnth()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1H"
+      ],
+      [
+        "LDNT1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldnt1_vnum[_s32]",
+    "arguments": [
+      "svbool_t pg",
+      "const int32_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.S"
+      },
+      "vnum * svcntw()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1W"
+      ],
+      [
+        "LDNT1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldnt1_vnum[_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "const int64_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      },
+      "vnum * svcntd()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1D"
+      ],
+      [
+        "LDNT1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldnt1_vnum[_s8]",
+    "arguments": [
+      "svbool_t pg",
+      "const int8_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.B"
+      },
+      "vnum * svcntb()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1B"
+      ],
+      [
+        "LDNT1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldnt1_vnum[_u16]",
+    "arguments": [
+      "svbool_t pg",
+      "const uint16_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.H"
+      },
+      "vnum * svcnth()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1H"
+      ],
+      [
+        "LDNT1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldnt1_vnum[_u32]",
+    "arguments": [
+      "svbool_t pg",
+      "const uint32_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.S"
+      },
+      "vnum * svcntw()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1W"
+      ],
+      [
+        "LDNT1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldnt1_vnum[_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "const uint64_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.D"
+      },
+      "vnum * svcntd()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1D"
+      ],
+      [
+        "LDNT1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svldnt1_vnum[_u8]",
+    "arguments": [
+      "svbool_t pg",
+      "const uint8_t *base",
+      "int64_t vnum"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.B"
+      },
+      "vnum * svcntb()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1B"
+      ],
+      [
+        "LDNT1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1sb_gather[_u32base]_offset_s32",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "int64_t offset"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1SB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1sb_gather[_u32base]_offset_u32",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "int64_t offset"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1SB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1sb_gather[_u32base]_s32",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1SB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1sb_gather[_u32base]_u32",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1SB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1sb_gather[_u64base]_offset_s64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t offset"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1SB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1sb_gather[_u64base]_offset_u64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t offset"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1SB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1sb_gather[_u64base]_s64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1SB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1sb_gather[_u64base]_u64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1SB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1sb_gather_[s64]offset_s64",
+    "arguments": [
+      "svbool_t pg",
+      "const int8_t *base",
+      "svint64_t offsets"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1SB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1sb_gather_[s64]offset_u64",
+    "arguments": [
+      "svbool_t pg",
+      "const int8_t *base",
+      "svint64_t offsets"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1SB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1sb_gather_[u32]offset_s32",
+    "arguments": [
+      "svbool_t pg",
+      "const int8_t *base",
+      "svuint32_t offsets"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1SB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1sb_gather_[u32]offset_u32",
+    "arguments": [
+      "svbool_t pg",
+      "const int8_t *base",
+      "svuint32_t offsets"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1SB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1sb_gather_[u64]offset_s64",
+    "arguments": [
+      "svbool_t pg",
+      "const int8_t *base",
+      "svuint64_t offsets"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1SB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1sb_gather_[u64]offset_u64",
+    "arguments": [
+      "svbool_t pg",
+      "const int8_t *base",
+      "svuint64_t offsets"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1SB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1sh_gather[_u32base]_index_s32",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "int64_t index"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "index * 2": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1sh_gather[_u32base]_index_u32",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "int64_t index"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "index * 2": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1sh_gather[_u32base]_offset_s32",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "int64_t offset"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1sh_gather[_u32base]_offset_u32",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "int64_t offset"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1sh_gather[_u32base]_s32",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1sh_gather[_u32base]_u32",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1sh_gather[_u64base]_index_s64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t index"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "index * 2": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1sh_gather[_u64base]_index_u64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t index"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "index * 2": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1sh_gather[_u64base]_offset_s64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t offset"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1sh_gather[_u64base]_offset_u64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t offset"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1sh_gather[_u64base]_s64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1sh_gather[_u64base]_u64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1sh_gather_[s64]index_s64",
+    "arguments": [
+      "svbool_t pg",
+      "const int16_t *base",
+      "svint64_t indices"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices * 2": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1sh_gather_[s64]index_u64",
+    "arguments": [
+      "svbool_t pg",
+      "const int16_t *base",
+      "svint64_t indices"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices * 2": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1sh_gather_[s64]offset_s64",
+    "arguments": [
+      "svbool_t pg",
+      "const int16_t *base",
+      "svint64_t offsets"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1sh_gather_[s64]offset_u64",
+    "arguments": [
+      "svbool_t pg",
+      "const int16_t *base",
+      "svint64_t offsets"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1sh_gather_[u32]offset_s32",
+    "arguments": [
+      "svbool_t pg",
+      "const int16_t *base",
+      "svuint32_t offsets"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1sh_gather_[u32]offset_u32",
+    "arguments": [
+      "svbool_t pg",
+      "const int16_t *base",
+      "svuint32_t offsets"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1sh_gather_[u64]index_s64",
+    "arguments": [
+      "svbool_t pg",
+      "const int16_t *base",
+      "svuint64_t indices"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices * 2": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1sh_gather_[u64]index_u64",
+    "arguments": [
+      "svbool_t pg",
+      "const int16_t *base",
+      "svuint64_t indices"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices * 2": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1sh_gather_[u64]offset_s64",
+    "arguments": [
+      "svbool_t pg",
+      "const int16_t *base",
+      "svuint64_t offsets"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1sh_gather_[u64]offset_u64",
+    "arguments": [
+      "svbool_t pg",
+      "const int16_t *base",
+      "svuint64_t offsets"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1SH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1sw_gather[_u64base]_index_s64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t index"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "index * 4": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1SW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1sw_gather[_u64base]_index_u64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t index"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "index * 4": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1SW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1sw_gather[_u64base]_offset_s64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t offset"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1SW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1sw_gather[_u64base]_offset_u64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t offset"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1SW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1sw_gather[_u64base]_s64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1SW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1sw_gather[_u64base]_u64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1SW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1sw_gather_[s64]index_s64",
+    "arguments": [
+      "svbool_t pg",
+      "const int32_t *base",
+      "svint64_t indices"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices * 4": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1SW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1sw_gather_[s64]index_u64",
+    "arguments": [
+      "svbool_t pg",
+      "const int32_t *base",
+      "svint64_t indices"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices * 4": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1SW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1sw_gather_[s64]offset_s64",
+    "arguments": [
+      "svbool_t pg",
+      "const int32_t *base",
+      "svint64_t offsets"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1SW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1sw_gather_[s64]offset_u64",
+    "arguments": [
+      "svbool_t pg",
+      "const int32_t *base",
+      "svint64_t offsets"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1SW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1sw_gather_[u64]index_s64",
+    "arguments": [
+      "svbool_t pg",
+      "const int32_t *base",
+      "svuint64_t indices"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices * 4": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1SW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1sw_gather_[u64]index_u64",
+    "arguments": [
+      "svbool_t pg",
+      "const int32_t *base",
+      "svuint64_t indices"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices * 4": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1SW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1sw_gather_[u64]offset_s64",
+    "arguments": [
+      "svbool_t pg",
+      "const int32_t *base",
+      "svuint64_t offsets"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1SW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1sw_gather_[u64]offset_u64",
+    "arguments": [
+      "svbool_t pg",
+      "const int32_t *base",
+      "svuint64_t offsets"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1SW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1ub_gather[_u32base]_offset_s32",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "int64_t offset"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1ub_gather[_u32base]_offset_u32",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "int64_t offset"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1ub_gather[_u32base]_s32",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1ub_gather[_u32base]_u32",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1ub_gather[_u64base]_offset_s64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t offset"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1ub_gather[_u64base]_offset_u64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t offset"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1ub_gather[_u64base]_s64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1ub_gather[_u64base]_u64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1ub_gather_[s64]offset_s64",
+    "arguments": [
+      "svbool_t pg",
+      "const uint8_t *base",
+      "svint64_t offsets"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1ub_gather_[s64]offset_u64",
+    "arguments": [
+      "svbool_t pg",
+      "const uint8_t *base",
+      "svint64_t offsets"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1ub_gather_[u32]offset_s32",
+    "arguments": [
+      "svbool_t pg",
+      "const uint8_t *base",
+      "svuint32_t offsets"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1ub_gather_[u32]offset_u32",
+    "arguments": [
+      "svbool_t pg",
+      "const uint8_t *base",
+      "svuint32_t offsets"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1ub_gather_[u64]offset_s64",
+    "arguments": [
+      "svbool_t pg",
+      "const uint8_t *base",
+      "svuint64_t offsets"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1ub_gather_[u64]offset_u64",
+    "arguments": [
+      "svbool_t pg",
+      "const uint8_t *base",
+      "svuint64_t offsets"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1uh_gather[_u32base]_index_s32",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "int64_t index"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "index * 2": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1uh_gather[_u32base]_index_u32",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "int64_t index"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "index * 2": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1uh_gather[_u32base]_offset_s32",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "int64_t offset"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1uh_gather[_u32base]_offset_u32",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "int64_t offset"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1uh_gather[_u32base]_s32",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1uh_gather[_u32base]_u32",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1uh_gather[_u64base]_index_s64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t index"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "index * 2": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1uh_gather[_u64base]_index_u64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t index"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "index * 2": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1uh_gather[_u64base]_offset_s64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t offset"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1uh_gather[_u64base]_offset_u64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t offset"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1uh_gather[_u64base]_s64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1uh_gather[_u64base]_u64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1uh_gather_[s64]index_s64",
+    "arguments": [
+      "svbool_t pg",
+      "const uint16_t *base",
+      "svint64_t indices"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices * 2": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1uh_gather_[s64]index_u64",
+    "arguments": [
+      "svbool_t pg",
+      "const uint16_t *base",
+      "svint64_t indices"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices * 2": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1uh_gather_[s64]offset_s64",
+    "arguments": [
+      "svbool_t pg",
+      "const uint16_t *base",
+      "svint64_t offsets"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1uh_gather_[s64]offset_u64",
+    "arguments": [
+      "svbool_t pg",
+      "const uint16_t *base",
+      "svint64_t offsets"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1uh_gather_[u32]offset_s32",
+    "arguments": [
+      "svbool_t pg",
+      "const uint16_t *base",
+      "svuint32_t offsets"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1uh_gather_[u32]offset_u32",
+    "arguments": [
+      "svbool_t pg",
+      "const uint16_t *base",
+      "svuint32_t offsets"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1uh_gather_[u64]index_s64",
+    "arguments": [
+      "svbool_t pg",
+      "const uint16_t *base",
+      "svuint64_t indices"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices * 2": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1uh_gather_[u64]index_u64",
+    "arguments": [
+      "svbool_t pg",
+      "const uint16_t *base",
+      "svuint64_t indices"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices * 2": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1uh_gather_[u64]offset_s64",
+    "arguments": [
+      "svbool_t pg",
+      "const uint16_t *base",
+      "svuint64_t offsets"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1uh_gather_[u64]offset_u64",
+    "arguments": [
+      "svbool_t pg",
+      "const uint16_t *base",
+      "svuint64_t offsets"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1uw_gather[_u64base]_index_s64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t index"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "index * 4": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1uw_gather[_u64base]_index_u64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t index"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "index * 4": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1uw_gather[_u64base]_offset_s64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t offset"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1uw_gather[_u64base]_offset_u64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t offset"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1uw_gather[_u64base]_s64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1uw_gather[_u64base]_u64",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1uw_gather_[s64]index_s64",
+    "arguments": [
+      "svbool_t pg",
+      "const uint32_t *base",
+      "svint64_t indices"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices * 4": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1uw_gather_[s64]index_u64",
+    "arguments": [
+      "svbool_t pg",
+      "const uint32_t *base",
+      "svint64_t indices"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices * 4": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1uw_gather_[s64]offset_s64",
+    "arguments": [
+      "svbool_t pg",
+      "const uint32_t *base",
+      "svint64_t offsets"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1uw_gather_[s64]offset_u64",
+    "arguments": [
+      "svbool_t pg",
+      "const uint32_t *base",
+      "svint64_t offsets"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1uw_gather_[u64]index_s64",
+    "arguments": [
+      "svbool_t pg",
+      "const uint32_t *base",
+      "svuint64_t indices"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices * 4": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1uw_gather_[u64]index_u64",
+    "arguments": [
+      "svbool_t pg",
+      "const uint32_t *base",
+      "svuint64_t indices"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices * 4": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1uw_gather_[u64]offset_s64",
+    "arguments": [
+      "svbool_t pg",
+      "const uint32_t *base",
+      "svuint64_t offsets"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svldnt1uw_gather_[u64]offset_u64",
+    "arguments": [
+      "svbool_t pg",
+      "const uint32_t *base",
+      "svuint64_t offsets"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDNT1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlen[_f16]",
+    "arguments": [
+      "svfloat16_t op"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {},
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CNTH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlen[_f32]",
+    "arguments": [
+      "svfloat32_t op"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {},
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CNTW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlen[_f64]",
+    "arguments": [
+      "svfloat64_t op"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {},
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CNTD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlen[_s16]",
+    "arguments": [
+      "svint16_t op"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {},
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CNTH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlen[_s32]",
+    "arguments": [
+      "svint32_t op"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {},
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CNTW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlen[_s64]",
+    "arguments": [
+      "svint64_t op"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {},
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CNTD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlen[_s8]",
+    "arguments": [
+      "svint8_t op"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {},
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CNTB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlen[_u16]",
+    "arguments": [
+      "svuint16_t op"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {},
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CNTH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlen[_u32]",
+    "arguments": [
+      "svuint32_t op"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {},
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CNTW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlen[_u64]",
+    "arguments": [
+      "svuint64_t op"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {},
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CNTD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlen[_u8]",
+    "arguments": [
+      "svuint8_t op"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {},
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CNTB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svlogb[_f16]_m",
+    "arguments": [
+      "svint16_t inactive",
+      "svbool_t pg",
+      "svfloat16_t op"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.H|Ztied.H"
+      },
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FLOGB"
+      ],
+      [
+        "MOVPRFX",
+        "FLOGB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svlogb[_f16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H|Ztied.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FLOGB"
+      ],
+      [
+        "MOVPRFX",
+        "FLOGB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svlogb[_f16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FLOGB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svlogb[_f32]_m",
+    "arguments": [
+      "svint32_t inactive",
+      "svbool_t pg",
+      "svfloat32_t op"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.S|Ztied.S"
+      },
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FLOGB"
+      ],
+      [
+        "MOVPRFX",
+        "FLOGB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svlogb[_f32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S|Ztied.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FLOGB"
+      ],
+      [
+        "MOVPRFX",
+        "FLOGB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svlogb[_f32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FLOGB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svlogb[_f64]_m",
+    "arguments": [
+      "svint64_t inactive",
+      "svbool_t pg",
+      "svfloat64_t op"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.D|Ztied.D"
+      },
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FLOGB"
+      ],
+      [
+        "MOVPRFX",
+        "FLOGB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svlogb[_f64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D|Ztied.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FLOGB"
+      ],
+      [
+        "MOVPRFX",
+        "FLOGB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svlogb[_f64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FLOGB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsl[_n_s16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "uint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSL"
+      ],
+      [
+        "LSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsl[_n_s16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "uint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]|Ztied2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSL"
+      ],
+      [
+        "LSL"
+      ],
+      [
+        "LSLR"
+      ],
+      [
+        "MOVPRFX",
+        "LSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsl[_n_s16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "uint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "LSL"
+      ],
+      [
+        "MOVPRFX",
+        "LSL"
+      ],
+      [
+        "MOVPRFX",
+        "LSLR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsl[_n_s32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSL"
+      ],
+      [
+        "LSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsl[_n_s32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]|Ztied2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSL"
+      ],
+      [
+        "LSL"
+      ],
+      [
+        "LSLR"
+      ],
+      [
+        "MOVPRFX",
+        "LSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsl[_n_s32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "LSL"
+      ],
+      [
+        "MOVPRFX",
+        "LSL"
+      ],
+      [
+        "MOVPRFX",
+        "LSLR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsl[_n_s64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSL"
+      ],
+      [
+        "LSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsl[_n_s64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]|Ztied2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSL"
+      ],
+      [
+        "LSL"
+      ],
+      [
+        "LSLR"
+      ],
+      [
+        "MOVPRFX",
+        "LSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsl[_n_s64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "LSL"
+      ],
+      [
+        "MOVPRFX",
+        "LSL"
+      ],
+      [
+        "MOVPRFX",
+        "LSLR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsl[_n_s8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "uint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSL"
+      ],
+      [
+        "LSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsl[_n_s8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "uint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]|Ztied2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSL"
+      ],
+      [
+        "LSL"
+      ],
+      [
+        "LSLR"
+      ],
+      [
+        "MOVPRFX",
+        "LSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsl[_n_s8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "uint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "LSL"
+      ],
+      [
+        "MOVPRFX",
+        "LSL"
+      ],
+      [
+        "MOVPRFX",
+        "LSLR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsl[_n_u16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "uint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSL"
+      ],
+      [
+        "LSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsl[_n_u16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "uint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]|Ztied2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSL"
+      ],
+      [
+        "LSL"
+      ],
+      [
+        "LSLR"
+      ],
+      [
+        "MOVPRFX",
+        "LSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsl[_n_u16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "uint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "LSL"
+      ],
+      [
+        "MOVPRFX",
+        "LSL"
+      ],
+      [
+        "MOVPRFX",
+        "LSLR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsl[_n_u32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSL"
+      ],
+      [
+        "LSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsl[_n_u32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]|Ztied2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSL"
+      ],
+      [
+        "LSL"
+      ],
+      [
+        "LSLR"
+      ],
+      [
+        "MOVPRFX",
+        "LSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsl[_n_u32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "LSL"
+      ],
+      [
+        "MOVPRFX",
+        "LSL"
+      ],
+      [
+        "MOVPRFX",
+        "LSLR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsl[_n_u64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSL"
+      ],
+      [
+        "LSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsl[_n_u64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]|Ztied2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSL"
+      ],
+      [
+        "LSL"
+      ],
+      [
+        "LSLR"
+      ],
+      [
+        "MOVPRFX",
+        "LSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsl[_n_u64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "LSL"
+      ],
+      [
+        "MOVPRFX",
+        "LSL"
+      ],
+      [
+        "MOVPRFX",
+        "LSLR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsl[_n_u8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "uint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSL"
+      ],
+      [
+        "LSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsl[_n_u8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "uint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]|Ztied2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSL"
+      ],
+      [
+        "LSL"
+      ],
+      [
+        "LSLR"
+      ],
+      [
+        "MOVPRFX",
+        "LSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsl[_n_u8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "uint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "LSL"
+      ],
+      [
+        "MOVPRFX",
+        "LSL"
+      ],
+      [
+        "MOVPRFX",
+        "LSLR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsl[_s16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "svuint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSL"
+      ],
+      [
+        "MOVPRFX",
+        "LSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsl[_s16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "svuint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H|Ztied2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSL"
+      ],
+      [
+        "LSLR"
+      ],
+      [
+        "MOVPRFX",
+        "LSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsl[_s16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "svuint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "LSL"
+      ],
+      [
+        "MOVPRFX",
+        "LSLR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsl[_s32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSL"
+      ],
+      [
+        "MOVPRFX",
+        "LSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsl[_s32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSL"
+      ],
+      [
+        "LSLR"
+      ],
+      [
+        "MOVPRFX",
+        "LSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsl[_s32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "LSL"
+      ],
+      [
+        "MOVPRFX",
+        "LSLR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsl[_s64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSL"
+      ],
+      [
+        "MOVPRFX",
+        "LSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsl[_s64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSL"
+      ],
+      [
+        "LSLR"
+      ],
+      [
+        "MOVPRFX",
+        "LSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsl[_s64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "LSL"
+      ],
+      [
+        "MOVPRFX",
+        "LSLR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsl[_s8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "svuint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSL"
+      ],
+      [
+        "MOVPRFX",
+        "LSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsl[_s8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "svuint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B|Ztied2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSL"
+      ],
+      [
+        "LSLR"
+      ],
+      [
+        "MOVPRFX",
+        "LSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsl[_s8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "svuint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "LSL"
+      ],
+      [
+        "MOVPRFX",
+        "LSLR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsl[_u16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSL"
+      ],
+      [
+        "MOVPRFX",
+        "LSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsl[_u16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H|Ztied2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSL"
+      ],
+      [
+        "LSLR"
+      ],
+      [
+        "MOVPRFX",
+        "LSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsl[_u16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "LSL"
+      ],
+      [
+        "MOVPRFX",
+        "LSLR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsl[_u32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSL"
+      ],
+      [
+        "MOVPRFX",
+        "LSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsl[_u32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSL"
+      ],
+      [
+        "LSLR"
+      ],
+      [
+        "MOVPRFX",
+        "LSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsl[_u32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "LSL"
+      ],
+      [
+        "MOVPRFX",
+        "LSLR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsl[_u64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSL"
+      ],
+      [
+        "MOVPRFX",
+        "LSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsl[_u64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSL"
+      ],
+      [
+        "LSLR"
+      ],
+      [
+        "MOVPRFX",
+        "LSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsl[_u64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "LSL"
+      ],
+      [
+        "MOVPRFX",
+        "LSLR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsl[_u8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSL"
+      ],
+      [
+        "MOVPRFX",
+        "LSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsl[_u8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B|Ztied2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSL"
+      ],
+      [
+        "LSLR"
+      ],
+      [
+        "MOVPRFX",
+        "LSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsl[_u8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "LSL"
+      ],
+      [
+        "MOVPRFX",
+        "LSLR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsl_wide[_n_s16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSL"
+      ],
+      [
+        "LSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsl_wide[_n_s16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSL"
+      ],
+      [
+        "LSL"
+      ],
+      [
+        "LSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsl_wide[_n_s16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "LSL"
+      ],
+      [
+        "MOVPRFX",
+        "LSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsl_wide[_n_s32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSL"
+      ],
+      [
+        "LSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsl_wide[_n_s32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSL"
+      ],
+      [
+        "LSL"
+      ],
+      [
+        "LSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsl_wide[_n_s32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "LSL"
+      ],
+      [
+        "MOVPRFX",
+        "LSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsl_wide[_n_s8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSL"
+      ],
+      [
+        "LSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsl_wide[_n_s8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSL"
+      ],
+      [
+        "LSL"
+      ],
+      [
+        "LSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsl_wide[_n_s8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "LSL"
+      ],
+      [
+        "MOVPRFX",
+        "LSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsl_wide[_n_u16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSL"
+      ],
+      [
+        "LSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsl_wide[_n_u16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSL"
+      ],
+      [
+        "LSL"
+      ],
+      [
+        "LSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsl_wide[_n_u16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "LSL"
+      ],
+      [
+        "MOVPRFX",
+        "LSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsl_wide[_n_u32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSL"
+      ],
+      [
+        "LSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsl_wide[_n_u32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSL"
+      ],
+      [
+        "LSL"
+      ],
+      [
+        "LSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsl_wide[_n_u32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "LSL"
+      ],
+      [
+        "MOVPRFX",
+        "LSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsl_wide[_n_u8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSL"
+      ],
+      [
+        "LSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsl_wide[_n_u8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSL"
+      ],
+      [
+        "LSL"
+      ],
+      [
+        "LSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsl_wide[_n_u8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "LSL"
+      ],
+      [
+        "MOVPRFX",
+        "LSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsl_wide[_s16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSL"
+      ],
+      [
+        "MOVPRFX",
+        "LSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsl_wide[_s16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSL"
+      ],
+      [
+        "LSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsl_wide[_s16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "LSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsl_wide[_s32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSL"
+      ],
+      [
+        "MOVPRFX",
+        "LSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsl_wide[_s32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSL"
+      ],
+      [
+        "LSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsl_wide[_s32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "LSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsl_wide[_s8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSL"
+      ],
+      [
+        "MOVPRFX",
+        "LSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsl_wide[_s8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSL"
+      ],
+      [
+        "LSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsl_wide[_s8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "LSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsl_wide[_u16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSL"
+      ],
+      [
+        "MOVPRFX",
+        "LSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsl_wide[_u16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSL"
+      ],
+      [
+        "LSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsl_wide[_u16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "LSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsl_wide[_u32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSL"
+      ],
+      [
+        "MOVPRFX",
+        "LSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsl_wide[_u32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSL"
+      ],
+      [
+        "LSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsl_wide[_u32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "LSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsl_wide[_u8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSL"
+      ],
+      [
+        "MOVPRFX",
+        "LSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsl_wide[_u8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSL"
+      ],
+      [
+        "LSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsl_wide[_u8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "LSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsr[_n_u16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "uint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSR"
+      ],
+      [
+        "LSR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsr[_n_u16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "uint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]|Ztied2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSR"
+      ],
+      [
+        "LSR"
+      ],
+      [
+        "LSRR"
+      ],
+      [
+        "MOVPRFX",
+        "LSR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsr[_n_u16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "uint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "LSR"
+      ],
+      [
+        "MOVPRFX",
+        "LSR"
+      ],
+      [
+        "MOVPRFX",
+        "LSRR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsr[_n_u32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSR"
+      ],
+      [
+        "LSR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsr[_n_u32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]|Ztied2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSR"
+      ],
+      [
+        "LSR"
+      ],
+      [
+        "LSRR"
+      ],
+      [
+        "MOVPRFX",
+        "LSR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsr[_n_u32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "LSR"
+      ],
+      [
+        "MOVPRFX",
+        "LSR"
+      ],
+      [
+        "MOVPRFX",
+        "LSRR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsr[_n_u64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSR"
+      ],
+      [
+        "LSR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsr[_n_u64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]|Ztied2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSR"
+      ],
+      [
+        "LSR"
+      ],
+      [
+        "LSRR"
+      ],
+      [
+        "MOVPRFX",
+        "LSR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsr[_n_u64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "LSR"
+      ],
+      [
+        "MOVPRFX",
+        "LSR"
+      ],
+      [
+        "MOVPRFX",
+        "LSRR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsr[_n_u8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "uint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSR"
+      ],
+      [
+        "LSR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsr[_n_u8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "uint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]|Ztied2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSR"
+      ],
+      [
+        "LSR"
+      ],
+      [
+        "LSRR"
+      ],
+      [
+        "MOVPRFX",
+        "LSR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsr[_n_u8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "uint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "LSR"
+      ],
+      [
+        "MOVPRFX",
+        "LSR"
+      ],
+      [
+        "MOVPRFX",
+        "LSRR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsr[_u16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSR"
+      ],
+      [
+        "MOVPRFX",
+        "LSR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsr[_u16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H|Ztied2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSR"
+      ],
+      [
+        "LSRR"
+      ],
+      [
+        "MOVPRFX",
+        "LSR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsr[_u16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "LSR"
+      ],
+      [
+        "MOVPRFX",
+        "LSRR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsr[_u32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSR"
+      ],
+      [
+        "MOVPRFX",
+        "LSR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsr[_u32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSR"
+      ],
+      [
+        "LSRR"
+      ],
+      [
+        "MOVPRFX",
+        "LSR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsr[_u32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "LSR"
+      ],
+      [
+        "MOVPRFX",
+        "LSRR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsr[_u64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSR"
+      ],
+      [
+        "MOVPRFX",
+        "LSR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsr[_u64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSR"
+      ],
+      [
+        "LSRR"
+      ],
+      [
+        "MOVPRFX",
+        "LSR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsr[_u64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "LSR"
+      ],
+      [
+        "MOVPRFX",
+        "LSRR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsr[_u8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSR"
+      ],
+      [
+        "MOVPRFX",
+        "LSR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsr[_u8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B|Ztied2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSR"
+      ],
+      [
+        "LSRR"
+      ],
+      [
+        "MOVPRFX",
+        "LSR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsr[_u8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "LSR"
+      ],
+      [
+        "MOVPRFX",
+        "LSRR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsr_wide[_n_u16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSR"
+      ],
+      [
+        "LSR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsr_wide[_n_u16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSR"
+      ],
+      [
+        "LSR"
+      ],
+      [
+        "LSR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsr_wide[_n_u16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "LSR"
+      ],
+      [
+        "MOVPRFX",
+        "LSR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsr_wide[_n_u32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSR"
+      ],
+      [
+        "LSR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsr_wide[_n_u32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSR"
+      ],
+      [
+        "LSR"
+      ],
+      [
+        "LSR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsr_wide[_n_u32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "LSR"
+      ],
+      [
+        "MOVPRFX",
+        "LSR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsr_wide[_n_u8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSR"
+      ],
+      [
+        "LSR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsr_wide[_n_u8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSR"
+      ],
+      [
+        "LSR"
+      ],
+      [
+        "LSR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsr_wide[_n_u8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "LSR"
+      ],
+      [
+        "MOVPRFX",
+        "LSR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsr_wide[_u16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSR"
+      ],
+      [
+        "MOVPRFX",
+        "LSR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsr_wide[_u16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSR"
+      ],
+      [
+        "LSR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsr_wide[_u16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "LSR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsr_wide[_u32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSR"
+      ],
+      [
+        "MOVPRFX",
+        "LSR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsr_wide[_u32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSR"
+      ],
+      [
+        "LSR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsr_wide[_u32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "LSR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsr_wide[_u8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSR"
+      ],
+      [
+        "MOVPRFX",
+        "LSR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsr_wide[_u8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSR"
+      ],
+      [
+        "LSR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svlsr_wide[_u8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "LSR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmad[_f16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2",
+      "svfloat16_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMAD"
+      ],
+      [
+        "MOVPRFX",
+        "FMAD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmad[_f16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2",
+      "svfloat16_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H|Ztied2.H"
+      },
+      "op3": {
+        "register": "Zop3.H|Ztied3.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMAD"
+      ],
+      [
+        "FMAD"
+      ],
+      [
+        "FMLA"
+      ],
+      [
+        "MOVPRFX",
+        "FMAD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmad[_f16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2",
+      "svfloat16_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FMAD"
+      ],
+      [
+        "MOVPRFX",
+        "FMAD"
+      ],
+      [
+        "MOVPRFX",
+        "FMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmad[_f32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2",
+      "svfloat32_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMAD"
+      ],
+      [
+        "MOVPRFX",
+        "FMAD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmad[_f32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2",
+      "svfloat32_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "op3": {
+        "register": "Zop3.S|Ztied3.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMAD"
+      ],
+      [
+        "FMAD"
+      ],
+      [
+        "FMLA"
+      ],
+      [
+        "MOVPRFX",
+        "FMAD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmad[_f32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2",
+      "svfloat32_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FMAD"
+      ],
+      [
+        "MOVPRFX",
+        "FMAD"
+      ],
+      [
+        "MOVPRFX",
+        "FMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmad[_f64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2",
+      "svfloat64_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMAD"
+      ],
+      [
+        "MOVPRFX",
+        "FMAD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmad[_f64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2",
+      "svfloat64_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "op3": {
+        "register": "Zop3.D|Ztied3.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMAD"
+      ],
+      [
+        "FMAD"
+      ],
+      [
+        "FMLA"
+      ],
+      [
+        "MOVPRFX",
+        "FMAD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmad[_f64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2",
+      "svfloat64_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FMAD"
+      ],
+      [
+        "MOVPRFX",
+        "FMAD"
+      ],
+      [
+        "MOVPRFX",
+        "FMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmad[_n_f16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2",
+      "float16_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMAD"
+      ],
+      [
+        "MOVPRFX",
+        "FMAD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmad[_n_f16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2",
+      "float16_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H|Ztied2.H"
+      },
+      "op3": {
+        "register": "Zop3.H[*]|Ztied3.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMAD"
+      ],
+      [
+        "FMAD"
+      ],
+      [
+        "FMLA"
+      ],
+      [
+        "MOVPRFX",
+        "FMAD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmad[_n_f16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2",
+      "float16_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FMAD"
+      ],
+      [
+        "MOVPRFX",
+        "FMAD"
+      ],
+      [
+        "MOVPRFX",
+        "FMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmad[_n_f32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2",
+      "float32_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMAD"
+      ],
+      [
+        "MOVPRFX",
+        "FMAD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmad[_n_f32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2",
+      "float32_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "op3": {
+        "register": "Zop3.S[*]|Ztied3.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMAD"
+      ],
+      [
+        "FMAD"
+      ],
+      [
+        "FMLA"
+      ],
+      [
+        "MOVPRFX",
+        "FMAD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmad[_n_f32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2",
+      "float32_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FMAD"
+      ],
+      [
+        "MOVPRFX",
+        "FMAD"
+      ],
+      [
+        "MOVPRFX",
+        "FMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmad[_n_f64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2",
+      "float64_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMAD"
+      ],
+      [
+        "MOVPRFX",
+        "FMAD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmad[_n_f64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2",
+      "float64_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "op3": {
+        "register": "Zop3.D[*]|Ztied3.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMAD"
+      ],
+      [
+        "FMAD"
+      ],
+      [
+        "FMLA"
+      ],
+      [
+        "MOVPRFX",
+        "FMAD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmad[_n_f64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2",
+      "float64_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FMAD"
+      ],
+      [
+        "MOVPRFX",
+        "FMAD"
+      ],
+      [
+        "MOVPRFX",
+        "FMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmad[_n_s16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2",
+      "int16_t op3"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MAD"
+      ],
+      [
+        "MOVPRFX",
+        "MAD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmad[_n_s16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2",
+      "int16_t op3"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H|Ztied2.H"
+      },
+      "op3": {
+        "register": "Zop3.H[*]|Ztied3.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MAD"
+      ],
+      [
+        "MAD"
+      ],
+      [
+        "MLA"
+      ],
+      [
+        "MOVPRFX",
+        "MAD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmad[_n_s16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2",
+      "int16_t op3"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "MAD"
+      ],
+      [
+        "MOVPRFX",
+        "MAD"
+      ],
+      [
+        "MOVPRFX",
+        "MLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmad[_n_s32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2",
+      "int32_t op3"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MAD"
+      ],
+      [
+        "MOVPRFX",
+        "MAD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmad[_n_s32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2",
+      "int32_t op3"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "op3": {
+        "register": "Zop3.S[*]|Ztied3.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MAD"
+      ],
+      [
+        "MAD"
+      ],
+      [
+        "MLA"
+      ],
+      [
+        "MOVPRFX",
+        "MAD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmad[_n_s32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2",
+      "int32_t op3"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "MAD"
+      ],
+      [
+        "MOVPRFX",
+        "MAD"
+      ],
+      [
+        "MOVPRFX",
+        "MLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmad[_n_s64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2",
+      "int64_t op3"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MAD"
+      ],
+      [
+        "MOVPRFX",
+        "MAD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmad[_n_s64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2",
+      "int64_t op3"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "op3": {
+        "register": "Zop3.D[*]|Ztied3.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MAD"
+      ],
+      [
+        "MAD"
+      ],
+      [
+        "MLA"
+      ],
+      [
+        "MOVPRFX",
+        "MAD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmad[_n_s64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2",
+      "int64_t op3"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "MAD"
+      ],
+      [
+        "MOVPRFX",
+        "MAD"
+      ],
+      [
+        "MOVPRFX",
+        "MLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmad[_n_s8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2",
+      "int8_t op3"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MAD"
+      ],
+      [
+        "MOVPRFX",
+        "MAD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmad[_n_s8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2",
+      "int8_t op3"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B|Ztied2.B"
+      },
+      "op3": {
+        "register": "Zop3.B[*]|Ztied3.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MAD"
+      ],
+      [
+        "MAD"
+      ],
+      [
+        "MLA"
+      ],
+      [
+        "MOVPRFX",
+        "MAD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmad[_n_s8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2",
+      "int8_t op3"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "MAD"
+      ],
+      [
+        "MOVPRFX",
+        "MAD"
+      ],
+      [
+        "MOVPRFX",
+        "MLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmad[_n_u16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2",
+      "uint16_t op3"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MAD"
+      ],
+      [
+        "MOVPRFX",
+        "MAD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmad[_n_u16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2",
+      "uint16_t op3"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H|Ztied2.H"
+      },
+      "op3": {
+        "register": "Zop3.H[*]|Ztied3.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MAD"
+      ],
+      [
+        "MAD"
+      ],
+      [
+        "MLA"
+      ],
+      [
+        "MOVPRFX",
+        "MAD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmad[_n_u16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2",
+      "uint16_t op3"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "MAD"
+      ],
+      [
+        "MOVPRFX",
+        "MAD"
+      ],
+      [
+        "MOVPRFX",
+        "MLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmad[_n_u32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2",
+      "uint32_t op3"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MAD"
+      ],
+      [
+        "MOVPRFX",
+        "MAD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmad[_n_u32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2",
+      "uint32_t op3"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "op3": {
+        "register": "Zop3.S[*]|Ztied3.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MAD"
+      ],
+      [
+        "MAD"
+      ],
+      [
+        "MLA"
+      ],
+      [
+        "MOVPRFX",
+        "MAD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmad[_n_u32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2",
+      "uint32_t op3"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "MAD"
+      ],
+      [
+        "MOVPRFX",
+        "MAD"
+      ],
+      [
+        "MOVPRFX",
+        "MLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmad[_n_u64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2",
+      "uint64_t op3"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MAD"
+      ],
+      [
+        "MOVPRFX",
+        "MAD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmad[_n_u64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2",
+      "uint64_t op3"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "op3": {
+        "register": "Zop3.D[*]|Ztied3.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MAD"
+      ],
+      [
+        "MAD"
+      ],
+      [
+        "MLA"
+      ],
+      [
+        "MOVPRFX",
+        "MAD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmad[_n_u64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2",
+      "uint64_t op3"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "MAD"
+      ],
+      [
+        "MOVPRFX",
+        "MAD"
+      ],
+      [
+        "MOVPRFX",
+        "MLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmad[_n_u8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2",
+      "uint8_t op3"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MAD"
+      ],
+      [
+        "MOVPRFX",
+        "MAD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmad[_n_u8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2",
+      "uint8_t op3"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B|Ztied2.B"
+      },
+      "op3": {
+        "register": "Zop3.B[*]|Ztied3.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MAD"
+      ],
+      [
+        "MAD"
+      ],
+      [
+        "MLA"
+      ],
+      [
+        "MOVPRFX",
+        "MAD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmad[_n_u8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2",
+      "uint8_t op3"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "MAD"
+      ],
+      [
+        "MOVPRFX",
+        "MAD"
+      ],
+      [
+        "MOVPRFX",
+        "MLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmad[_s16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2",
+      "svint16_t op3"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MAD"
+      ],
+      [
+        "MOVPRFX",
+        "MAD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmad[_s16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2",
+      "svint16_t op3"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H|Ztied2.H"
+      },
+      "op3": {
+        "register": "Zop3.H|Ztied3.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MAD"
+      ],
+      [
+        "MAD"
+      ],
+      [
+        "MLA"
+      ],
+      [
+        "MOVPRFX",
+        "MAD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmad[_s16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2",
+      "svint16_t op3"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "MAD"
+      ],
+      [
+        "MOVPRFX",
+        "MAD"
+      ],
+      [
+        "MOVPRFX",
+        "MLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmad[_s32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2",
+      "svint32_t op3"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MAD"
+      ],
+      [
+        "MOVPRFX",
+        "MAD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmad[_s32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2",
+      "svint32_t op3"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "op3": {
+        "register": "Zop3.S|Ztied3.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MAD"
+      ],
+      [
+        "MAD"
+      ],
+      [
+        "MLA"
+      ],
+      [
+        "MOVPRFX",
+        "MAD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmad[_s32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2",
+      "svint32_t op3"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "MAD"
+      ],
+      [
+        "MOVPRFX",
+        "MAD"
+      ],
+      [
+        "MOVPRFX",
+        "MLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmad[_s64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2",
+      "svint64_t op3"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MAD"
+      ],
+      [
+        "MOVPRFX",
+        "MAD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmad[_s64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2",
+      "svint64_t op3"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "op3": {
+        "register": "Zop3.D|Ztied3.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MAD"
+      ],
+      [
+        "MAD"
+      ],
+      [
+        "MLA"
+      ],
+      [
+        "MOVPRFX",
+        "MAD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmad[_s64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2",
+      "svint64_t op3"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "MAD"
+      ],
+      [
+        "MOVPRFX",
+        "MAD"
+      ],
+      [
+        "MOVPRFX",
+        "MLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmad[_s8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2",
+      "svint8_t op3"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MAD"
+      ],
+      [
+        "MOVPRFX",
+        "MAD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmad[_s8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2",
+      "svint8_t op3"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B|Ztied2.B"
+      },
+      "op3": {
+        "register": "Zop3.B|Ztied3.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MAD"
+      ],
+      [
+        "MAD"
+      ],
+      [
+        "MLA"
+      ],
+      [
+        "MOVPRFX",
+        "MAD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmad[_s8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2",
+      "svint8_t op3"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "MAD"
+      ],
+      [
+        "MOVPRFX",
+        "MAD"
+      ],
+      [
+        "MOVPRFX",
+        "MLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmad[_u16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2",
+      "svuint16_t op3"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MAD"
+      ],
+      [
+        "MOVPRFX",
+        "MAD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmad[_u16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2",
+      "svuint16_t op3"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H|Ztied2.H"
+      },
+      "op3": {
+        "register": "Zop3.H|Ztied3.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MAD"
+      ],
+      [
+        "MAD"
+      ],
+      [
+        "MLA"
+      ],
+      [
+        "MOVPRFX",
+        "MAD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmad[_u16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2",
+      "svuint16_t op3"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "MAD"
+      ],
+      [
+        "MOVPRFX",
+        "MAD"
+      ],
+      [
+        "MOVPRFX",
+        "MLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmad[_u32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2",
+      "svuint32_t op3"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MAD"
+      ],
+      [
+        "MOVPRFX",
+        "MAD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmad[_u32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2",
+      "svuint32_t op3"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "op3": {
+        "register": "Zop3.S|Ztied3.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MAD"
+      ],
+      [
+        "MAD"
+      ],
+      [
+        "MLA"
+      ],
+      [
+        "MOVPRFX",
+        "MAD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmad[_u32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2",
+      "svuint32_t op3"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "MAD"
+      ],
+      [
+        "MOVPRFX",
+        "MAD"
+      ],
+      [
+        "MOVPRFX",
+        "MLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmad[_u64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2",
+      "svuint64_t op3"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MAD"
+      ],
+      [
+        "MOVPRFX",
+        "MAD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmad[_u64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2",
+      "svuint64_t op3"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "op3": {
+        "register": "Zop3.D|Ztied3.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MAD"
+      ],
+      [
+        "MAD"
+      ],
+      [
+        "MLA"
+      ],
+      [
+        "MOVPRFX",
+        "MAD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmad[_u64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2",
+      "svuint64_t op3"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "MAD"
+      ],
+      [
+        "MOVPRFX",
+        "MAD"
+      ],
+      [
+        "MOVPRFX",
+        "MLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmad[_u8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2",
+      "svuint8_t op3"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MAD"
+      ],
+      [
+        "MOVPRFX",
+        "MAD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmad[_u8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2",
+      "svuint8_t op3"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B|Ztied2.B"
+      },
+      "op3": {
+        "register": "Zop3.B|Ztied3.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MAD"
+      ],
+      [
+        "MAD"
+      ],
+      [
+        "MLA"
+      ],
+      [
+        "MOVPRFX",
+        "MAD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmad[_u8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2",
+      "svuint8_t op3"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "MAD"
+      ],
+      [
+        "MOVPRFX",
+        "MAD"
+      ],
+      [
+        "MOVPRFX",
+        "MLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmatch[_s16]",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MATCH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmatch[_s8]",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MATCH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmatch[_u16]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MATCH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmatch[_u8]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MATCH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmax[_f16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMAX"
+      ],
+      [
+        "MOVPRFX",
+        "FMAX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmax[_f16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H|Ztied2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMAX"
+      ],
+      [
+        "FMAX"
+      ],
+      [
+        "MOVPRFX",
+        "FMAX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmax[_f16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FMAX"
+      ],
+      [
+        "MOVPRFX",
+        "FMAX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmax[_f32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMAX"
+      ],
+      [
+        "MOVPRFX",
+        "FMAX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmax[_f32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMAX"
+      ],
+      [
+        "FMAX"
+      ],
+      [
+        "MOVPRFX",
+        "FMAX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmax[_f32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FMAX"
+      ],
+      [
+        "MOVPRFX",
+        "FMAX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmax[_f64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMAX"
+      ],
+      [
+        "MOVPRFX",
+        "FMAX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmax[_f64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMAX"
+      ],
+      [
+        "FMAX"
+      ],
+      [
+        "MOVPRFX",
+        "FMAX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmax[_f64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FMAX"
+      ],
+      [
+        "MOVPRFX",
+        "FMAX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmax[_n_f16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "float16_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMAX"
+      ],
+      [
+        "FMAX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmax[_n_f16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "float16_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]|Ztied2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMAX"
+      ],
+      [
+        "FMAX"
+      ],
+      [
+        "FMAX"
+      ],
+      [
+        "MOVPRFX",
+        "FMAX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmax[_n_f16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "float16_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FMAX"
+      ],
+      [
+        "MOVPRFX",
+        "FMAX"
+      ],
+      [
+        "MOVPRFX",
+        "FMAX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmax[_n_f32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "float32_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMAX"
+      ],
+      [
+        "FMAX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmax[_n_f32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "float32_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]|Ztied2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMAX"
+      ],
+      [
+        "FMAX"
+      ],
+      [
+        "FMAX"
+      ],
+      [
+        "MOVPRFX",
+        "FMAX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmax[_n_f32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "float32_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FMAX"
+      ],
+      [
+        "MOVPRFX",
+        "FMAX"
+      ],
+      [
+        "MOVPRFX",
+        "FMAX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmax[_n_f64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "float64_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMAX"
+      ],
+      [
+        "FMAX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmax[_n_f64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "float64_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]|Ztied2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMAX"
+      ],
+      [
+        "FMAX"
+      ],
+      [
+        "FMAX"
+      ],
+      [
+        "MOVPRFX",
+        "FMAX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmax[_n_f64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "float64_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FMAX"
+      ],
+      [
+        "MOVPRFX",
+        "FMAX"
+      ],
+      [
+        "MOVPRFX",
+        "FMAX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmax[_n_s16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "int16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMAX"
+      ],
+      [
+        "MOVPRFX",
+        "SMAX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmax[_n_s16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "int16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]|Ztied2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMAX"
+      ],
+      [
+        "SMAX"
+      ],
+      [
+        "SMAX"
+      ],
+      [
+        "MOVPRFX",
+        "SMAX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmax[_n_s16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "int16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SMAX"
+      ],
+      [
+        "MOVPRFX",
+        "SMAX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmax[_n_s32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMAX"
+      ],
+      [
+        "MOVPRFX",
+        "SMAX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmax[_n_s32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]|Ztied2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMAX"
+      ],
+      [
+        "SMAX"
+      ],
+      [
+        "SMAX"
+      ],
+      [
+        "MOVPRFX",
+        "SMAX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmax[_n_s32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SMAX"
+      ],
+      [
+        "MOVPRFX",
+        "SMAX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmax[_n_s64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMAX"
+      ],
+      [
+        "MOVPRFX",
+        "SMAX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmax[_n_s64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]|Ztied2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMAX"
+      ],
+      [
+        "SMAX"
+      ],
+      [
+        "SMAX"
+      ],
+      [
+        "MOVPRFX",
+        "SMAX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmax[_n_s64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SMAX"
+      ],
+      [
+        "MOVPRFX",
+        "SMAX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmax[_n_s8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "int8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMAX"
+      ],
+      [
+        "MOVPRFX",
+        "SMAX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmax[_n_s8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "int8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]|Ztied2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMAX"
+      ],
+      [
+        "SMAX"
+      ],
+      [
+        "SMAX"
+      ],
+      [
+        "MOVPRFX",
+        "SMAX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmax[_n_s8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "int8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SMAX"
+      ],
+      [
+        "MOVPRFX",
+        "SMAX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmax[_n_u16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "uint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMAX"
+      ],
+      [
+        "MOVPRFX",
+        "UMAX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmax[_n_u16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "uint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]|Ztied2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMAX"
+      ],
+      [
+        "UMAX"
+      ],
+      [
+        "UMAX"
+      ],
+      [
+        "MOVPRFX",
+        "UMAX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmax[_n_u16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "uint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UMAX"
+      ],
+      [
+        "MOVPRFX",
+        "UMAX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmax[_n_u32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMAX"
+      ],
+      [
+        "MOVPRFX",
+        "UMAX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmax[_n_u32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]|Ztied2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMAX"
+      ],
+      [
+        "UMAX"
+      ],
+      [
+        "UMAX"
+      ],
+      [
+        "MOVPRFX",
+        "UMAX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmax[_n_u32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UMAX"
+      ],
+      [
+        "MOVPRFX",
+        "UMAX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmax[_n_u64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMAX"
+      ],
+      [
+        "MOVPRFX",
+        "UMAX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmax[_n_u64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]|Ztied2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMAX"
+      ],
+      [
+        "UMAX"
+      ],
+      [
+        "UMAX"
+      ],
+      [
+        "MOVPRFX",
+        "UMAX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmax[_n_u64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UMAX"
+      ],
+      [
+        "MOVPRFX",
+        "UMAX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmax[_n_u8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "uint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMAX"
+      ],
+      [
+        "MOVPRFX",
+        "UMAX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmax[_n_u8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "uint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]|Ztied2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMAX"
+      ],
+      [
+        "UMAX"
+      ],
+      [
+        "UMAX"
+      ],
+      [
+        "MOVPRFX",
+        "UMAX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmax[_n_u8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "uint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UMAX"
+      ],
+      [
+        "MOVPRFX",
+        "UMAX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmax[_s16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMAX"
+      ],
+      [
+        "MOVPRFX",
+        "SMAX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmax[_s16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H|Ztied2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMAX"
+      ],
+      [
+        "SMAX"
+      ],
+      [
+        "MOVPRFX",
+        "SMAX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmax[_s16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SMAX"
+      ],
+      [
+        "MOVPRFX",
+        "SMAX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmax[_s32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMAX"
+      ],
+      [
+        "MOVPRFX",
+        "SMAX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmax[_s32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMAX"
+      ],
+      [
+        "SMAX"
+      ],
+      [
+        "MOVPRFX",
+        "SMAX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmax[_s32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SMAX"
+      ],
+      [
+        "MOVPRFX",
+        "SMAX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmax[_s64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMAX"
+      ],
+      [
+        "MOVPRFX",
+        "SMAX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmax[_s64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMAX"
+      ],
+      [
+        "SMAX"
+      ],
+      [
+        "MOVPRFX",
+        "SMAX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmax[_s64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SMAX"
+      ],
+      [
+        "MOVPRFX",
+        "SMAX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmax[_s8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMAX"
+      ],
+      [
+        "MOVPRFX",
+        "SMAX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmax[_s8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B|Ztied2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMAX"
+      ],
+      [
+        "SMAX"
+      ],
+      [
+        "MOVPRFX",
+        "SMAX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmax[_s8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SMAX"
+      ],
+      [
+        "MOVPRFX",
+        "SMAX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmax[_u16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMAX"
+      ],
+      [
+        "MOVPRFX",
+        "UMAX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmax[_u16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H|Ztied2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMAX"
+      ],
+      [
+        "UMAX"
+      ],
+      [
+        "MOVPRFX",
+        "UMAX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmax[_u16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UMAX"
+      ],
+      [
+        "MOVPRFX",
+        "UMAX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmax[_u32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMAX"
+      ],
+      [
+        "MOVPRFX",
+        "UMAX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmax[_u32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMAX"
+      ],
+      [
+        "UMAX"
+      ],
+      [
+        "MOVPRFX",
+        "UMAX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmax[_u32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UMAX"
+      ],
+      [
+        "MOVPRFX",
+        "UMAX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmax[_u64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMAX"
+      ],
+      [
+        "MOVPRFX",
+        "UMAX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmax[_u64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMAX"
+      ],
+      [
+        "UMAX"
+      ],
+      [
+        "MOVPRFX",
+        "UMAX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmax[_u64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UMAX"
+      ],
+      [
+        "MOVPRFX",
+        "UMAX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmax[_u8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMAX"
+      ],
+      [
+        "MOVPRFX",
+        "UMAX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmax[_u8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B|Ztied2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMAX"
+      ],
+      [
+        "UMAX"
+      ],
+      [
+        "MOVPRFX",
+        "UMAX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmax[_u8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UMAX"
+      ],
+      [
+        "MOVPRFX",
+        "UMAX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmaxnm[_f16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMAXNM"
+      ],
+      [
+        "MOVPRFX",
+        "FMAXNM"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmaxnm[_f16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H|Ztied2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMAXNM"
+      ],
+      [
+        "FMAXNM"
+      ],
+      [
+        "MOVPRFX",
+        "FMAXNM"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmaxnm[_f16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FMAXNM"
+      ],
+      [
+        "MOVPRFX",
+        "FMAXNM"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmaxnm[_f32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMAXNM"
+      ],
+      [
+        "MOVPRFX",
+        "FMAXNM"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmaxnm[_f32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMAXNM"
+      ],
+      [
+        "FMAXNM"
+      ],
+      [
+        "MOVPRFX",
+        "FMAXNM"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmaxnm[_f32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FMAXNM"
+      ],
+      [
+        "MOVPRFX",
+        "FMAXNM"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmaxnm[_f64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMAXNM"
+      ],
+      [
+        "MOVPRFX",
+        "FMAXNM"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmaxnm[_f64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMAXNM"
+      ],
+      [
+        "FMAXNM"
+      ],
+      [
+        "MOVPRFX",
+        "FMAXNM"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmaxnm[_f64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FMAXNM"
+      ],
+      [
+        "MOVPRFX",
+        "FMAXNM"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmaxnm[_n_f16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "float16_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMAXNM"
+      ],
+      [
+        "FMAXNM"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmaxnm[_n_f16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "float16_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]|Ztied2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMAXNM"
+      ],
+      [
+        "FMAXNM"
+      ],
+      [
+        "FMAXNM"
+      ],
+      [
+        "MOVPRFX",
+        "FMAXNM"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmaxnm[_n_f16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "float16_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FMAXNM"
+      ],
+      [
+        "MOVPRFX",
+        "FMAXNM"
+      ],
+      [
+        "MOVPRFX",
+        "FMAXNM"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmaxnm[_n_f32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "float32_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMAXNM"
+      ],
+      [
+        "FMAXNM"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmaxnm[_n_f32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "float32_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]|Ztied2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMAXNM"
+      ],
+      [
+        "FMAXNM"
+      ],
+      [
+        "FMAXNM"
+      ],
+      [
+        "MOVPRFX",
+        "FMAXNM"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmaxnm[_n_f32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "float32_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FMAXNM"
+      ],
+      [
+        "MOVPRFX",
+        "FMAXNM"
+      ],
+      [
+        "MOVPRFX",
+        "FMAXNM"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmaxnm[_n_f64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "float64_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMAXNM"
+      ],
+      [
+        "FMAXNM"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmaxnm[_n_f64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "float64_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]|Ztied2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMAXNM"
+      ],
+      [
+        "FMAXNM"
+      ],
+      [
+        "FMAXNM"
+      ],
+      [
+        "MOVPRFX",
+        "FMAXNM"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmaxnm[_n_f64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "float64_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FMAXNM"
+      ],
+      [
+        "MOVPRFX",
+        "FMAXNM"
+      ],
+      [
+        "MOVPRFX",
+        "FMAXNM"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmaxnmp[_f16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMAXNMP"
+      ],
+      [
+        "MOVPRFX",
+        "FMAXNMP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmaxnmp[_f16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMAXNMP"
+      ],
+      [
+        "MOVPRFX",
+        "FMAXNMP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmaxnmp[_f32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMAXNMP"
+      ],
+      [
+        "MOVPRFX",
+        "FMAXNMP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmaxnmp[_f32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMAXNMP"
+      ],
+      [
+        "MOVPRFX",
+        "FMAXNMP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmaxnmp[_f64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMAXNMP"
+      ],
+      [
+        "MOVPRFX",
+        "FMAXNMP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmaxnmp[_f64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMAXNMP"
+      ],
+      [
+        "MOVPRFX",
+        "FMAXNMP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmaxnmv[_f16]",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op"
+    ],
+    "return_type": {
+      "value": "float16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMAXNMV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmaxnmv[_f32]",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op"
+    ],
+    "return_type": {
+      "value": "float32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMAXNMV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmaxnmv[_f64]",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op"
+    ],
+    "return_type": {
+      "value": "float64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMAXNMV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmaxp[_f16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMAXP"
+      ],
+      [
+        "MOVPRFX",
+        "FMAXP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmaxp[_f16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMAXP"
+      ],
+      [
+        "MOVPRFX",
+        "FMAXP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmaxp[_f32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMAXP"
+      ],
+      [
+        "MOVPRFX",
+        "FMAXP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmaxp[_f32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMAXP"
+      ],
+      [
+        "MOVPRFX",
+        "FMAXP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmaxp[_f64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMAXP"
+      ],
+      [
+        "MOVPRFX",
+        "FMAXP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmaxp[_f64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMAXP"
+      ],
+      [
+        "MOVPRFX",
+        "FMAXP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmaxp[_s16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMAXP"
+      ],
+      [
+        "MOVPRFX",
+        "SMAXP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmaxp[_s16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMAXP"
+      ],
+      [
+        "MOVPRFX",
+        "SMAXP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmaxp[_s32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMAXP"
+      ],
+      [
+        "MOVPRFX",
+        "SMAXP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmaxp[_s32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMAXP"
+      ],
+      [
+        "MOVPRFX",
+        "SMAXP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmaxp[_s64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMAXP"
+      ],
+      [
+        "MOVPRFX",
+        "SMAXP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmaxp[_s64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMAXP"
+      ],
+      [
+        "MOVPRFX",
+        "SMAXP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmaxp[_s8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMAXP"
+      ],
+      [
+        "MOVPRFX",
+        "SMAXP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmaxp[_s8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMAXP"
+      ],
+      [
+        "MOVPRFX",
+        "SMAXP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmaxp[_u16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMAXP"
+      ],
+      [
+        "MOVPRFX",
+        "UMAXP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmaxp[_u16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMAXP"
+      ],
+      [
+        "MOVPRFX",
+        "UMAXP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmaxp[_u32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMAXP"
+      ],
+      [
+        "MOVPRFX",
+        "UMAXP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmaxp[_u32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMAXP"
+      ],
+      [
+        "MOVPRFX",
+        "UMAXP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmaxp[_u64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMAXP"
+      ],
+      [
+        "MOVPRFX",
+        "UMAXP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmaxp[_u64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMAXP"
+      ],
+      [
+        "MOVPRFX",
+        "UMAXP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmaxp[_u8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMAXP"
+      ],
+      [
+        "MOVPRFX",
+        "UMAXP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmaxp[_u8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMAXP"
+      ],
+      [
+        "MOVPRFX",
+        "UMAXP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmaxv[_f16]",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op"
+    ],
+    "return_type": {
+      "value": "float16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMAXV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmaxv[_f32]",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op"
+    ],
+    "return_type": {
+      "value": "float32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMAXV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmaxv[_f64]",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op"
+    ],
+    "return_type": {
+      "value": "float64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMAXV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmaxv[_s16]",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op"
+    ],
+    "return_type": {
+      "value": "int16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMAXV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmaxv[_s32]",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op"
+    ],
+    "return_type": {
+      "value": "int32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMAXV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmaxv[_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op"
+    ],
+    "return_type": {
+      "value": "int64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMAXV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmaxv[_s8]",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op"
+    ],
+    "return_type": {
+      "value": "int8_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMAXV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmaxv[_u16]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op"
+    ],
+    "return_type": {
+      "value": "uint16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMAXV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmaxv[_u32]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op"
+    ],
+    "return_type": {
+      "value": "uint32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMAXV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmaxv[_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMAXV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmaxv[_u8]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op"
+    ],
+    "return_type": {
+      "value": "uint8_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMAXV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmin[_f16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMIN"
+      ],
+      [
+        "MOVPRFX",
+        "FMIN"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmin[_f16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H|Ztied2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMIN"
+      ],
+      [
+        "FMIN"
+      ],
+      [
+        "MOVPRFX",
+        "FMIN"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmin[_f16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FMIN"
+      ],
+      [
+        "MOVPRFX",
+        "FMIN"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmin[_f32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMIN"
+      ],
+      [
+        "MOVPRFX",
+        "FMIN"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmin[_f32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMIN"
+      ],
+      [
+        "FMIN"
+      ],
+      [
+        "MOVPRFX",
+        "FMIN"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmin[_f32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FMIN"
+      ],
+      [
+        "MOVPRFX",
+        "FMIN"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmin[_f64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMIN"
+      ],
+      [
+        "MOVPRFX",
+        "FMIN"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmin[_f64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMIN"
+      ],
+      [
+        "FMIN"
+      ],
+      [
+        "MOVPRFX",
+        "FMIN"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmin[_f64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FMIN"
+      ],
+      [
+        "MOVPRFX",
+        "FMIN"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmin[_n_f16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "float16_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMIN"
+      ],
+      [
+        "FMIN"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmin[_n_f16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "float16_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]|Ztied2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMIN"
+      ],
+      [
+        "FMIN"
+      ],
+      [
+        "FMIN"
+      ],
+      [
+        "MOVPRFX",
+        "FMIN"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmin[_n_f16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "float16_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FMIN"
+      ],
+      [
+        "MOVPRFX",
+        "FMIN"
+      ],
+      [
+        "MOVPRFX",
+        "FMIN"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmin[_n_f32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "float32_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMIN"
+      ],
+      [
+        "FMIN"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmin[_n_f32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "float32_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]|Ztied2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMIN"
+      ],
+      [
+        "FMIN"
+      ],
+      [
+        "FMIN"
+      ],
+      [
+        "MOVPRFX",
+        "FMIN"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmin[_n_f32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "float32_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FMIN"
+      ],
+      [
+        "MOVPRFX",
+        "FMIN"
+      ],
+      [
+        "MOVPRFX",
+        "FMIN"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmin[_n_f64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "float64_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMIN"
+      ],
+      [
+        "FMIN"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmin[_n_f64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "float64_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]|Ztied2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMIN"
+      ],
+      [
+        "FMIN"
+      ],
+      [
+        "FMIN"
+      ],
+      [
+        "MOVPRFX",
+        "FMIN"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmin[_n_f64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "float64_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FMIN"
+      ],
+      [
+        "MOVPRFX",
+        "FMIN"
+      ],
+      [
+        "MOVPRFX",
+        "FMIN"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmin[_n_s16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "int16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMIN"
+      ],
+      [
+        "MOVPRFX",
+        "SMIN"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmin[_n_s16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "int16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]|Ztied2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMIN"
+      ],
+      [
+        "SMIN"
+      ],
+      [
+        "SMIN"
+      ],
+      [
+        "MOVPRFX",
+        "SMIN"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmin[_n_s16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "int16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SMIN"
+      ],
+      [
+        "MOVPRFX",
+        "SMIN"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmin[_n_s32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMIN"
+      ],
+      [
+        "MOVPRFX",
+        "SMIN"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmin[_n_s32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]|Ztied2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMIN"
+      ],
+      [
+        "SMIN"
+      ],
+      [
+        "SMIN"
+      ],
+      [
+        "MOVPRFX",
+        "SMIN"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmin[_n_s32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SMIN"
+      ],
+      [
+        "MOVPRFX",
+        "SMIN"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmin[_n_s64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMIN"
+      ],
+      [
+        "MOVPRFX",
+        "SMIN"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmin[_n_s64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]|Ztied2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMIN"
+      ],
+      [
+        "SMIN"
+      ],
+      [
+        "SMIN"
+      ],
+      [
+        "MOVPRFX",
+        "SMIN"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmin[_n_s64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SMIN"
+      ],
+      [
+        "MOVPRFX",
+        "SMIN"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmin[_n_s8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "int8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMIN"
+      ],
+      [
+        "MOVPRFX",
+        "SMIN"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmin[_n_s8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "int8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]|Ztied2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMIN"
+      ],
+      [
+        "SMIN"
+      ],
+      [
+        "SMIN"
+      ],
+      [
+        "MOVPRFX",
+        "SMIN"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmin[_n_s8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "int8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SMIN"
+      ],
+      [
+        "MOVPRFX",
+        "SMIN"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmin[_n_u16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "uint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMIN"
+      ],
+      [
+        "MOVPRFX",
+        "UMIN"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmin[_n_u16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "uint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]|Ztied2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMIN"
+      ],
+      [
+        "UMIN"
+      ],
+      [
+        "UMIN"
+      ],
+      [
+        "MOVPRFX",
+        "UMIN"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmin[_n_u16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "uint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UMIN"
+      ],
+      [
+        "MOVPRFX",
+        "UMIN"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmin[_n_u32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMIN"
+      ],
+      [
+        "MOVPRFX",
+        "UMIN"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmin[_n_u32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]|Ztied2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMIN"
+      ],
+      [
+        "UMIN"
+      ],
+      [
+        "UMIN"
+      ],
+      [
+        "MOVPRFX",
+        "UMIN"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmin[_n_u32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UMIN"
+      ],
+      [
+        "MOVPRFX",
+        "UMIN"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmin[_n_u64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMIN"
+      ],
+      [
+        "MOVPRFX",
+        "UMIN"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmin[_n_u64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]|Ztied2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMIN"
+      ],
+      [
+        "UMIN"
+      ],
+      [
+        "UMIN"
+      ],
+      [
+        "MOVPRFX",
+        "UMIN"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmin[_n_u64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UMIN"
+      ],
+      [
+        "MOVPRFX",
+        "UMIN"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmin[_n_u8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "uint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMIN"
+      ],
+      [
+        "MOVPRFX",
+        "UMIN"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmin[_n_u8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "uint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]|Ztied2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMIN"
+      ],
+      [
+        "UMIN"
+      ],
+      [
+        "UMIN"
+      ],
+      [
+        "MOVPRFX",
+        "UMIN"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmin[_n_u8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "uint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UMIN"
+      ],
+      [
+        "MOVPRFX",
+        "UMIN"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmin[_s16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMIN"
+      ],
+      [
+        "MOVPRFX",
+        "SMIN"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmin[_s16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H|Ztied2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMIN"
+      ],
+      [
+        "SMIN"
+      ],
+      [
+        "MOVPRFX",
+        "SMIN"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmin[_s16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SMIN"
+      ],
+      [
+        "MOVPRFX",
+        "SMIN"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmin[_s32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMIN"
+      ],
+      [
+        "MOVPRFX",
+        "SMIN"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmin[_s32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMIN"
+      ],
+      [
+        "SMIN"
+      ],
+      [
+        "MOVPRFX",
+        "SMIN"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmin[_s32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SMIN"
+      ],
+      [
+        "MOVPRFX",
+        "SMIN"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmin[_s64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMIN"
+      ],
+      [
+        "MOVPRFX",
+        "SMIN"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmin[_s64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMIN"
+      ],
+      [
+        "SMIN"
+      ],
+      [
+        "MOVPRFX",
+        "SMIN"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmin[_s64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SMIN"
+      ],
+      [
+        "MOVPRFX",
+        "SMIN"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmin[_s8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMIN"
+      ],
+      [
+        "MOVPRFX",
+        "SMIN"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmin[_s8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B|Ztied2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMIN"
+      ],
+      [
+        "SMIN"
+      ],
+      [
+        "MOVPRFX",
+        "SMIN"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmin[_s8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SMIN"
+      ],
+      [
+        "MOVPRFX",
+        "SMIN"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmin[_u16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMIN"
+      ],
+      [
+        "MOVPRFX",
+        "UMIN"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmin[_u16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H|Ztied2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMIN"
+      ],
+      [
+        "UMIN"
+      ],
+      [
+        "MOVPRFX",
+        "UMIN"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmin[_u16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UMIN"
+      ],
+      [
+        "MOVPRFX",
+        "UMIN"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmin[_u32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMIN"
+      ],
+      [
+        "MOVPRFX",
+        "UMIN"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmin[_u32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMIN"
+      ],
+      [
+        "UMIN"
+      ],
+      [
+        "MOVPRFX",
+        "UMIN"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmin[_u32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UMIN"
+      ],
+      [
+        "MOVPRFX",
+        "UMIN"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmin[_u64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMIN"
+      ],
+      [
+        "MOVPRFX",
+        "UMIN"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmin[_u64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMIN"
+      ],
+      [
+        "UMIN"
+      ],
+      [
+        "MOVPRFX",
+        "UMIN"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmin[_u64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UMIN"
+      ],
+      [
+        "MOVPRFX",
+        "UMIN"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmin[_u8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMIN"
+      ],
+      [
+        "MOVPRFX",
+        "UMIN"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmin[_u8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B|Ztied2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMIN"
+      ],
+      [
+        "UMIN"
+      ],
+      [
+        "MOVPRFX",
+        "UMIN"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmin[_u8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UMIN"
+      ],
+      [
+        "MOVPRFX",
+        "UMIN"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svminnm[_f16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMINNM"
+      ],
+      [
+        "MOVPRFX",
+        "FMINNM"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svminnm[_f16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H|Ztied2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMINNM"
+      ],
+      [
+        "FMINNM"
+      ],
+      [
+        "MOVPRFX",
+        "FMINNM"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svminnm[_f16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FMINNM"
+      ],
+      [
+        "MOVPRFX",
+        "FMINNM"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svminnm[_f32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMINNM"
+      ],
+      [
+        "MOVPRFX",
+        "FMINNM"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svminnm[_f32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMINNM"
+      ],
+      [
+        "FMINNM"
+      ],
+      [
+        "MOVPRFX",
+        "FMINNM"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svminnm[_f32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FMINNM"
+      ],
+      [
+        "MOVPRFX",
+        "FMINNM"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svminnm[_f64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMINNM"
+      ],
+      [
+        "MOVPRFX",
+        "FMINNM"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svminnm[_f64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMINNM"
+      ],
+      [
+        "FMINNM"
+      ],
+      [
+        "MOVPRFX",
+        "FMINNM"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svminnm[_f64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FMINNM"
+      ],
+      [
+        "MOVPRFX",
+        "FMINNM"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svminnm[_n_f16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "float16_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMINNM"
+      ],
+      [
+        "FMINNM"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svminnm[_n_f16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "float16_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]|Ztied2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMINNM"
+      ],
+      [
+        "FMINNM"
+      ],
+      [
+        "FMINNM"
+      ],
+      [
+        "MOVPRFX",
+        "FMINNM"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svminnm[_n_f16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "float16_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FMINNM"
+      ],
+      [
+        "MOVPRFX",
+        "FMINNM"
+      ],
+      [
+        "MOVPRFX",
+        "FMINNM"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svminnm[_n_f32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "float32_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMINNM"
+      ],
+      [
+        "FMINNM"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svminnm[_n_f32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "float32_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]|Ztied2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMINNM"
+      ],
+      [
+        "FMINNM"
+      ],
+      [
+        "FMINNM"
+      ],
+      [
+        "MOVPRFX",
+        "FMINNM"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svminnm[_n_f32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "float32_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FMINNM"
+      ],
+      [
+        "MOVPRFX",
+        "FMINNM"
+      ],
+      [
+        "MOVPRFX",
+        "FMINNM"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svminnm[_n_f64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "float64_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMINNM"
+      ],
+      [
+        "FMINNM"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svminnm[_n_f64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "float64_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]|Ztied2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMINNM"
+      ],
+      [
+        "FMINNM"
+      ],
+      [
+        "FMINNM"
+      ],
+      [
+        "MOVPRFX",
+        "FMINNM"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svminnm[_n_f64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "float64_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FMINNM"
+      ],
+      [
+        "MOVPRFX",
+        "FMINNM"
+      ],
+      [
+        "MOVPRFX",
+        "FMINNM"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svminnmp[_f16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMINNMP"
+      ],
+      [
+        "MOVPRFX",
+        "FMINNMP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svminnmp[_f16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMINNMP"
+      ],
+      [
+        "MOVPRFX",
+        "FMINNMP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svminnmp[_f32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMINNMP"
+      ],
+      [
+        "MOVPRFX",
+        "FMINNMP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svminnmp[_f32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMINNMP"
+      ],
+      [
+        "MOVPRFX",
+        "FMINNMP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svminnmp[_f64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMINNMP"
+      ],
+      [
+        "MOVPRFX",
+        "FMINNMP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svminnmp[_f64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMINNMP"
+      ],
+      [
+        "MOVPRFX",
+        "FMINNMP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svminnmv[_f16]",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op"
+    ],
+    "return_type": {
+      "value": "float16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMINNMV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svminnmv[_f32]",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op"
+    ],
+    "return_type": {
+      "value": "float32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMINNMV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svminnmv[_f64]",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op"
+    ],
+    "return_type": {
+      "value": "float64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMINNMV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svminp[_f16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMINP"
+      ],
+      [
+        "MOVPRFX",
+        "FMINP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svminp[_f16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMINP"
+      ],
+      [
+        "MOVPRFX",
+        "FMINP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svminp[_f32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMINP"
+      ],
+      [
+        "MOVPRFX",
+        "FMINP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svminp[_f32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMINP"
+      ],
+      [
+        "MOVPRFX",
+        "FMINP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svminp[_f64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMINP"
+      ],
+      [
+        "MOVPRFX",
+        "FMINP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svminp[_f64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMINP"
+      ],
+      [
+        "MOVPRFX",
+        "FMINP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svminp[_s16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMINP"
+      ],
+      [
+        "MOVPRFX",
+        "SMINP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svminp[_s16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMINP"
+      ],
+      [
+        "MOVPRFX",
+        "SMINP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svminp[_s32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMINP"
+      ],
+      [
+        "MOVPRFX",
+        "SMINP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svminp[_s32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMINP"
+      ],
+      [
+        "MOVPRFX",
+        "SMINP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svminp[_s64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMINP"
+      ],
+      [
+        "MOVPRFX",
+        "SMINP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svminp[_s64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMINP"
+      ],
+      [
+        "MOVPRFX",
+        "SMINP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svminp[_s8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMINP"
+      ],
+      [
+        "MOVPRFX",
+        "SMINP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svminp[_s8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMINP"
+      ],
+      [
+        "MOVPRFX",
+        "SMINP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svminp[_u16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMINP"
+      ],
+      [
+        "MOVPRFX",
+        "UMINP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svminp[_u16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMINP"
+      ],
+      [
+        "MOVPRFX",
+        "UMINP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svminp[_u32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMINP"
+      ],
+      [
+        "MOVPRFX",
+        "UMINP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svminp[_u32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMINP"
+      ],
+      [
+        "MOVPRFX",
+        "UMINP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svminp[_u64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMINP"
+      ],
+      [
+        "MOVPRFX",
+        "UMINP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svminp[_u64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMINP"
+      ],
+      [
+        "MOVPRFX",
+        "UMINP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svminp[_u8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMINP"
+      ],
+      [
+        "MOVPRFX",
+        "UMINP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svminp[_u8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMINP"
+      ],
+      [
+        "MOVPRFX",
+        "UMINP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svminv[_f16]",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op"
+    ],
+    "return_type": {
+      "value": "float16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMINV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svminv[_f32]",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op"
+    ],
+    "return_type": {
+      "value": "float32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMINV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svminv[_f64]",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op"
+    ],
+    "return_type": {
+      "value": "float64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMINV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svminv[_s16]",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op"
+    ],
+    "return_type": {
+      "value": "int16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMINV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svminv[_s32]",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op"
+    ],
+    "return_type": {
+      "value": "int32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMINV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svminv[_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op"
+    ],
+    "return_type": {
+      "value": "int64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMINV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svminv[_s8]",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op"
+    ],
+    "return_type": {
+      "value": "int8_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMINV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svminv[_u16]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op"
+    ],
+    "return_type": {
+      "value": "uint16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMINV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svminv[_u32]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op"
+    ],
+    "return_type": {
+      "value": "uint32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMINV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svminv[_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMINV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svminv[_u8]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op"
+    ],
+    "return_type": {
+      "value": "uint8_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMINV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmla[_f16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2",
+      "svfloat16_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMLA"
+      ],
+      [
+        "MOVPRFX",
+        "FMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmla[_f16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2",
+      "svfloat16_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H|Ztied2.H"
+      },
+      "op3": {
+        "register": "Zop3.H|Ztied3.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMLA"
+      ],
+      [
+        "FMAD"
+      ],
+      [
+        "FMAD"
+      ],
+      [
+        "MOVPRFX",
+        "FMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmla[_f16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2",
+      "svfloat16_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FMLA"
+      ],
+      [
+        "MOVPRFX",
+        "FMAD"
+      ],
+      [
+        "MOVPRFX",
+        "FMAD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmla[_f32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2",
+      "svfloat32_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMLA"
+      ],
+      [
+        "MOVPRFX",
+        "FMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmla[_f32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2",
+      "svfloat32_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "op3": {
+        "register": "Zop3.S|Ztied3.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMLA"
+      ],
+      [
+        "FMAD"
+      ],
+      [
+        "FMAD"
+      ],
+      [
+        "MOVPRFX",
+        "FMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmla[_f32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2",
+      "svfloat32_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FMLA"
+      ],
+      [
+        "MOVPRFX",
+        "FMAD"
+      ],
+      [
+        "MOVPRFX",
+        "FMAD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmla[_f64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2",
+      "svfloat64_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMLA"
+      ],
+      [
+        "MOVPRFX",
+        "FMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmla[_f64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2",
+      "svfloat64_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "op3": {
+        "register": "Zop3.D|Ztied3.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMLA"
+      ],
+      [
+        "FMAD"
+      ],
+      [
+        "FMAD"
+      ],
+      [
+        "MOVPRFX",
+        "FMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmla[_f64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2",
+      "svfloat64_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FMLA"
+      ],
+      [
+        "MOVPRFX",
+        "FMAD"
+      ],
+      [
+        "MOVPRFX",
+        "FMAD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmla[_n_f16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2",
+      "float16_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMLA"
+      ],
+      [
+        "MOVPRFX",
+        "FMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmla[_n_f16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2",
+      "float16_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H|Ztied2.H"
+      },
+      "op3": {
+        "register": "Zop3.H[*]|Ztied3.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMLA"
+      ],
+      [
+        "FMAD"
+      ],
+      [
+        "FMAD"
+      ],
+      [
+        "MOVPRFX",
+        "FMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmla[_n_f16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2",
+      "float16_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FMLA"
+      ],
+      [
+        "MOVPRFX",
+        "FMAD"
+      ],
+      [
+        "MOVPRFX",
+        "FMAD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmla[_n_f32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2",
+      "float32_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMLA"
+      ],
+      [
+        "MOVPRFX",
+        "FMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmla[_n_f32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2",
+      "float32_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "op3": {
+        "register": "Zop3.S[*]|Ztied3.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMLA"
+      ],
+      [
+        "FMAD"
+      ],
+      [
+        "FMAD"
+      ],
+      [
+        "MOVPRFX",
+        "FMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmla[_n_f32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2",
+      "float32_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FMLA"
+      ],
+      [
+        "MOVPRFX",
+        "FMAD"
+      ],
+      [
+        "MOVPRFX",
+        "FMAD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmla[_n_f64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2",
+      "float64_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMLA"
+      ],
+      [
+        "MOVPRFX",
+        "FMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmla[_n_f64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2",
+      "float64_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "op3": {
+        "register": "Zop3.D[*]|Ztied3.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMLA"
+      ],
+      [
+        "FMAD"
+      ],
+      [
+        "FMAD"
+      ],
+      [
+        "MOVPRFX",
+        "FMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmla[_n_f64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2",
+      "float64_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FMLA"
+      ],
+      [
+        "MOVPRFX",
+        "FMAD"
+      ],
+      [
+        "MOVPRFX",
+        "FMAD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmla[_n_s16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2",
+      "int16_t op3"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MLA"
+      ],
+      [
+        "MOVPRFX",
+        "MLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmla[_n_s16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2",
+      "int16_t op3"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H|Ztied2.H"
+      },
+      "op3": {
+        "register": "Zop3.H[*]|Ztied3.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MLA"
+      ],
+      [
+        "MAD"
+      ],
+      [
+        "MAD"
+      ],
+      [
+        "MOVPRFX",
+        "MLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmla[_n_s16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2",
+      "int16_t op3"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "MLA"
+      ],
+      [
+        "MOVPRFX",
+        "MAD"
+      ],
+      [
+        "MOVPRFX",
+        "MAD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmla[_n_s32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2",
+      "int32_t op3"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MLA"
+      ],
+      [
+        "MOVPRFX",
+        "MLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmla[_n_s32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2",
+      "int32_t op3"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "op3": {
+        "register": "Zop3.S[*]|Ztied3.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MLA"
+      ],
+      [
+        "MAD"
+      ],
+      [
+        "MAD"
+      ],
+      [
+        "MOVPRFX",
+        "MLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmla[_n_s32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2",
+      "int32_t op3"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "MLA"
+      ],
+      [
+        "MOVPRFX",
+        "MAD"
+      ],
+      [
+        "MOVPRFX",
+        "MAD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmla[_n_s64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2",
+      "int64_t op3"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MLA"
+      ],
+      [
+        "MOVPRFX",
+        "MLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmla[_n_s64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2",
+      "int64_t op3"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "op3": {
+        "register": "Zop3.D[*]|Ztied3.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MLA"
+      ],
+      [
+        "MAD"
+      ],
+      [
+        "MAD"
+      ],
+      [
+        "MOVPRFX",
+        "MLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmla[_n_s64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2",
+      "int64_t op3"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "MLA"
+      ],
+      [
+        "MOVPRFX",
+        "MAD"
+      ],
+      [
+        "MOVPRFX",
+        "MAD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmla[_n_s8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2",
+      "int8_t op3"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MLA"
+      ],
+      [
+        "MOVPRFX",
+        "MLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmla[_n_s8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2",
+      "int8_t op3"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B|Ztied2.B"
+      },
+      "op3": {
+        "register": "Zop3.B[*]|Ztied3.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MLA"
+      ],
+      [
+        "MAD"
+      ],
+      [
+        "MAD"
+      ],
+      [
+        "MOVPRFX",
+        "MLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmla[_n_s8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2",
+      "int8_t op3"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "MLA"
+      ],
+      [
+        "MOVPRFX",
+        "MAD"
+      ],
+      [
+        "MOVPRFX",
+        "MAD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmla[_n_u16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2",
+      "uint16_t op3"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MLA"
+      ],
+      [
+        "MOVPRFX",
+        "MLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmla[_n_u16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2",
+      "uint16_t op3"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H|Ztied2.H"
+      },
+      "op3": {
+        "register": "Zop3.H[*]|Ztied3.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MLA"
+      ],
+      [
+        "MAD"
+      ],
+      [
+        "MAD"
+      ],
+      [
+        "MOVPRFX",
+        "MLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmla[_n_u16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2",
+      "uint16_t op3"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "MLA"
+      ],
+      [
+        "MOVPRFX",
+        "MAD"
+      ],
+      [
+        "MOVPRFX",
+        "MAD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmla[_n_u32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2",
+      "uint32_t op3"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MLA"
+      ],
+      [
+        "MOVPRFX",
+        "MLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmla[_n_u32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2",
+      "uint32_t op3"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "op3": {
+        "register": "Zop3.S[*]|Ztied3.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MLA"
+      ],
+      [
+        "MAD"
+      ],
+      [
+        "MAD"
+      ],
+      [
+        "MOVPRFX",
+        "MLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmla[_n_u32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2",
+      "uint32_t op3"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "MLA"
+      ],
+      [
+        "MOVPRFX",
+        "MAD"
+      ],
+      [
+        "MOVPRFX",
+        "MAD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmla[_n_u64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2",
+      "uint64_t op3"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MLA"
+      ],
+      [
+        "MOVPRFX",
+        "MLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmla[_n_u64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2",
+      "uint64_t op3"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "op3": {
+        "register": "Zop3.D[*]|Ztied3.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MLA"
+      ],
+      [
+        "MAD"
+      ],
+      [
+        "MAD"
+      ],
+      [
+        "MOVPRFX",
+        "MLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmla[_n_u64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2",
+      "uint64_t op3"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "MLA"
+      ],
+      [
+        "MOVPRFX",
+        "MAD"
+      ],
+      [
+        "MOVPRFX",
+        "MAD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmla[_n_u8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2",
+      "uint8_t op3"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MLA"
+      ],
+      [
+        "MOVPRFX",
+        "MLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmla[_n_u8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2",
+      "uint8_t op3"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B|Ztied2.B"
+      },
+      "op3": {
+        "register": "Zop3.B[*]|Ztied3.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MLA"
+      ],
+      [
+        "MAD"
+      ],
+      [
+        "MAD"
+      ],
+      [
+        "MOVPRFX",
+        "MLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmla[_n_u8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2",
+      "uint8_t op3"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "MLA"
+      ],
+      [
+        "MOVPRFX",
+        "MAD"
+      ],
+      [
+        "MOVPRFX",
+        "MAD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmla[_s16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2",
+      "svint16_t op3"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MLA"
+      ],
+      [
+        "MOVPRFX",
+        "MLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmla[_s16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2",
+      "svint16_t op3"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H|Ztied2.H"
+      },
+      "op3": {
+        "register": "Zop3.H|Ztied3.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MLA"
+      ],
+      [
+        "MAD"
+      ],
+      [
+        "MAD"
+      ],
+      [
+        "MOVPRFX",
+        "MLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmla[_s16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2",
+      "svint16_t op3"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "MLA"
+      ],
+      [
+        "MOVPRFX",
+        "MAD"
+      ],
+      [
+        "MOVPRFX",
+        "MAD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmla[_s32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2",
+      "svint32_t op3"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MLA"
+      ],
+      [
+        "MOVPRFX",
+        "MLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmla[_s32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2",
+      "svint32_t op3"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "op3": {
+        "register": "Zop3.S|Ztied3.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MLA"
+      ],
+      [
+        "MAD"
+      ],
+      [
+        "MAD"
+      ],
+      [
+        "MOVPRFX",
+        "MLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmla[_s32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2",
+      "svint32_t op3"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "MLA"
+      ],
+      [
+        "MOVPRFX",
+        "MAD"
+      ],
+      [
+        "MOVPRFX",
+        "MAD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmla[_s64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2",
+      "svint64_t op3"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MLA"
+      ],
+      [
+        "MOVPRFX",
+        "MLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmla[_s64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2",
+      "svint64_t op3"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "op3": {
+        "register": "Zop3.D|Ztied3.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MLA"
+      ],
+      [
+        "MAD"
+      ],
+      [
+        "MAD"
+      ],
+      [
+        "MOVPRFX",
+        "MLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmla[_s64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2",
+      "svint64_t op3"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "MLA"
+      ],
+      [
+        "MOVPRFX",
+        "MAD"
+      ],
+      [
+        "MOVPRFX",
+        "MAD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmla[_s8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2",
+      "svint8_t op3"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MLA"
+      ],
+      [
+        "MOVPRFX",
+        "MLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmla[_s8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2",
+      "svint8_t op3"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B|Ztied2.B"
+      },
+      "op3": {
+        "register": "Zop3.B|Ztied3.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MLA"
+      ],
+      [
+        "MAD"
+      ],
+      [
+        "MAD"
+      ],
+      [
+        "MOVPRFX",
+        "MLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmla[_s8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2",
+      "svint8_t op3"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "MLA"
+      ],
+      [
+        "MOVPRFX",
+        "MAD"
+      ],
+      [
+        "MOVPRFX",
+        "MAD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmla[_u16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2",
+      "svuint16_t op3"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MLA"
+      ],
+      [
+        "MOVPRFX",
+        "MLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmla[_u16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2",
+      "svuint16_t op3"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H|Ztied2.H"
+      },
+      "op3": {
+        "register": "Zop3.H|Ztied3.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MLA"
+      ],
+      [
+        "MAD"
+      ],
+      [
+        "MAD"
+      ],
+      [
+        "MOVPRFX",
+        "MLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmla[_u16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2",
+      "svuint16_t op3"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "MLA"
+      ],
+      [
+        "MOVPRFX",
+        "MAD"
+      ],
+      [
+        "MOVPRFX",
+        "MAD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmla[_u32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2",
+      "svuint32_t op3"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MLA"
+      ],
+      [
+        "MOVPRFX",
+        "MLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmla[_u32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2",
+      "svuint32_t op3"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "op3": {
+        "register": "Zop3.S|Ztied3.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MLA"
+      ],
+      [
+        "MAD"
+      ],
+      [
+        "MAD"
+      ],
+      [
+        "MOVPRFX",
+        "MLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmla[_u32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2",
+      "svuint32_t op3"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "MLA"
+      ],
+      [
+        "MOVPRFX",
+        "MAD"
+      ],
+      [
+        "MOVPRFX",
+        "MAD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmla[_u64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2",
+      "svuint64_t op3"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MLA"
+      ],
+      [
+        "MOVPRFX",
+        "MLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmla[_u64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2",
+      "svuint64_t op3"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "op3": {
+        "register": "Zop3.D|Ztied3.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MLA"
+      ],
+      [
+        "MAD"
+      ],
+      [
+        "MAD"
+      ],
+      [
+        "MOVPRFX",
+        "MLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmla[_u64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2",
+      "svuint64_t op3"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "MLA"
+      ],
+      [
+        "MOVPRFX",
+        "MAD"
+      ],
+      [
+        "MOVPRFX",
+        "MAD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmla[_u8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2",
+      "svuint8_t op3"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MLA"
+      ],
+      [
+        "MOVPRFX",
+        "MLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmla[_u8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2",
+      "svuint8_t op3"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B|Ztied2.B"
+      },
+      "op3": {
+        "register": "Zop3.B|Ztied3.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MLA"
+      ],
+      [
+        "MAD"
+      ],
+      [
+        "MAD"
+      ],
+      [
+        "MOVPRFX",
+        "MLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmla[_u8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2",
+      "svuint8_t op3"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "MLA"
+      ],
+      [
+        "MOVPRFX",
+        "MAD"
+      ],
+      [
+        "MOVPRFX",
+        "MAD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmla_lane[_f16]",
+    "arguments": [
+      "svfloat16_t op1",
+      "svfloat16_t op2",
+      "svfloat16_t op3",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "imm_index": {
+        "minimum": 0,
+        "maximum": 7
+      },
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMLA"
+      ],
+      [
+        "MOVPRFX",
+        "FMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmla_lane[_f32]",
+    "arguments": [
+      "svfloat32_t op1",
+      "svfloat32_t op2",
+      "svfloat32_t op3",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "imm_index": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMLA"
+      ],
+      [
+        "MOVPRFX",
+        "FMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmla_lane[_f64]",
+    "arguments": [
+      "svfloat64_t op1",
+      "svfloat64_t op2",
+      "svfloat64_t op3",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "imm_index": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMLA"
+      ],
+      [
+        "MOVPRFX",
+        "FMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmla_lane[_s16]",
+    "arguments": [
+      "svint16_t op1",
+      "svint16_t op2",
+      "svint16_t op3",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "imm_index": {
+        "minimum": 0,
+        "maximum": 7
+      },
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MLA"
+      ],
+      [
+        "MOVPRFX",
+        "MLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmla_lane[_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "svint32_t op2",
+      "svint32_t op3",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "imm_index": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MLA"
+      ],
+      [
+        "MOVPRFX",
+        "MLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmla_lane[_s64]",
+    "arguments": [
+      "svint64_t op1",
+      "svint64_t op2",
+      "svint64_t op3",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "imm_index": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MLA"
+      ],
+      [
+        "MOVPRFX",
+        "MLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmla_lane[_u16]",
+    "arguments": [
+      "svuint16_t op1",
+      "svuint16_t op2",
+      "svuint16_t op3",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "imm_index": {
+        "minimum": 0,
+        "maximum": 7
+      },
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MLA"
+      ],
+      [
+        "MOVPRFX",
+        "MLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmla_lane[_u32]",
+    "arguments": [
+      "svuint32_t op1",
+      "svuint32_t op2",
+      "svuint32_t op3",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "imm_index": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MLA"
+      ],
+      [
+        "MOVPRFX",
+        "MLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmla_lane[_u64]",
+    "arguments": [
+      "svuint64_t op1",
+      "svuint64_t op2",
+      "svuint64_t op3",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "imm_index": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MLA"
+      ],
+      [
+        "MOVPRFX",
+        "MLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmlalb[_f32]",
+    "arguments": [
+      "svfloat32_t op1",
+      "svfloat16_t op2",
+      "svfloat16_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMLALB"
+      ],
+      [
+        "MOVPRFX",
+        "FMLALB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmlalb[_n_f32]",
+    "arguments": [
+      "svfloat32_t op1",
+      "svfloat16_t op2",
+      "float16_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMLALB"
+      ],
+      [
+        "MOVPRFX",
+        "FMLALB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmlalb[_n_s16]",
+    "arguments": [
+      "svint16_t op1",
+      "svint8_t op2",
+      "int8_t op3"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMLALB"
+      ],
+      [
+        "MOVPRFX",
+        "SMLALB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmlalb[_n_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "svint16_t op2",
+      "int16_t op3"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMLALB"
+      ],
+      [
+        "MOVPRFX",
+        "SMLALB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmlalb[_n_s64]",
+    "arguments": [
+      "svint64_t op1",
+      "svint32_t op2",
+      "int32_t op3"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMLALB"
+      ],
+      [
+        "MOVPRFX",
+        "SMLALB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmlalb[_n_u16]",
+    "arguments": [
+      "svuint16_t op1",
+      "svuint8_t op2",
+      "uint8_t op3"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMLALB"
+      ],
+      [
+        "MOVPRFX",
+        "UMLALB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmlalb[_n_u32]",
+    "arguments": [
+      "svuint32_t op1",
+      "svuint16_t op2",
+      "uint16_t op3"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMLALB"
+      ],
+      [
+        "MOVPRFX",
+        "UMLALB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmlalb[_n_u64]",
+    "arguments": [
+      "svuint64_t op1",
+      "svuint32_t op2",
+      "uint32_t op3"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMLALB"
+      ],
+      [
+        "MOVPRFX",
+        "UMLALB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmlalb[_s16]",
+    "arguments": [
+      "svint16_t op1",
+      "svint8_t op2",
+      "svint8_t op3"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMLALB"
+      ],
+      [
+        "MOVPRFX",
+        "SMLALB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmlalb[_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "svint16_t op2",
+      "svint16_t op3"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMLALB"
+      ],
+      [
+        "MOVPRFX",
+        "SMLALB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmlalb[_s64]",
+    "arguments": [
+      "svint64_t op1",
+      "svint32_t op2",
+      "svint32_t op3"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMLALB"
+      ],
+      [
+        "MOVPRFX",
+        "SMLALB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmlalb[_u16]",
+    "arguments": [
+      "svuint16_t op1",
+      "svuint8_t op2",
+      "svuint8_t op3"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMLALB"
+      ],
+      [
+        "MOVPRFX",
+        "UMLALB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmlalb[_u32]",
+    "arguments": [
+      "svuint32_t op1",
+      "svuint16_t op2",
+      "svuint16_t op3"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMLALB"
+      ],
+      [
+        "MOVPRFX",
+        "UMLALB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmlalb[_u64]",
+    "arguments": [
+      "svuint64_t op1",
+      "svuint32_t op2",
+      "svuint32_t op3"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMLALB"
+      ],
+      [
+        "MOVPRFX",
+        "UMLALB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmlalb_lane[_f32]",
+    "arguments": [
+      "svfloat32_t op1",
+      "svfloat16_t op2",
+      "svfloat16_t op3",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "imm_index": {
+        "minimum": 0,
+        "maximum": 7
+      },
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMLALB"
+      ],
+      [
+        "MOVPRFX",
+        "FMLALB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmlalb_lane[_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "svint16_t op2",
+      "svint16_t op3",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "imm_index": {
+        "minimum": 0,
+        "maximum": 7
+      },
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMLALB"
+      ],
+      [
+        "MOVPRFX",
+        "SMLALB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmlalb_lane[_s64]",
+    "arguments": [
+      "svint64_t op1",
+      "svint32_t op2",
+      "svint32_t op3",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "imm_index": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMLALB"
+      ],
+      [
+        "MOVPRFX",
+        "SMLALB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmlalb_lane[_u32]",
+    "arguments": [
+      "svuint32_t op1",
+      "svuint16_t op2",
+      "svuint16_t op3",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "imm_index": {
+        "minimum": 0,
+        "maximum": 7
+      },
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMLALB"
+      ],
+      [
+        "MOVPRFX",
+        "UMLALB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmlalb_lane[_u64]",
+    "arguments": [
+      "svuint64_t op1",
+      "svuint32_t op2",
+      "svuint32_t op3",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "imm_index": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMLALB"
+      ],
+      [
+        "MOVPRFX",
+        "UMLALB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmlalt[_f32]",
+    "arguments": [
+      "svfloat32_t op1",
+      "svfloat16_t op2",
+      "svfloat16_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMLALT"
+      ],
+      [
+        "MOVPRFX",
+        "FMLALT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmlalt[_n_f32]",
+    "arguments": [
+      "svfloat32_t op1",
+      "svfloat16_t op2",
+      "float16_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMLALT"
+      ],
+      [
+        "MOVPRFX",
+        "FMLALT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmlalt[_n_s16]",
+    "arguments": [
+      "svint16_t op1",
+      "svint8_t op2",
+      "int8_t op3"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMLALT"
+      ],
+      [
+        "MOVPRFX",
+        "SMLALT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmlalt[_n_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "svint16_t op2",
+      "int16_t op3"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMLALT"
+      ],
+      [
+        "MOVPRFX",
+        "SMLALT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmlalt[_n_s64]",
+    "arguments": [
+      "svint64_t op1",
+      "svint32_t op2",
+      "int32_t op3"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMLALT"
+      ],
+      [
+        "MOVPRFX",
+        "SMLALT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmlalt[_n_u16]",
+    "arguments": [
+      "svuint16_t op1",
+      "svuint8_t op2",
+      "uint8_t op3"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMLALT"
+      ],
+      [
+        "MOVPRFX",
+        "UMLALT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmlalt[_n_u32]",
+    "arguments": [
+      "svuint32_t op1",
+      "svuint16_t op2",
+      "uint16_t op3"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMLALT"
+      ],
+      [
+        "MOVPRFX",
+        "UMLALT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmlalt[_n_u64]",
+    "arguments": [
+      "svuint64_t op1",
+      "svuint32_t op2",
+      "uint32_t op3"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMLALT"
+      ],
+      [
+        "MOVPRFX",
+        "UMLALT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmlalt[_s16]",
+    "arguments": [
+      "svint16_t op1",
+      "svint8_t op2",
+      "svint8_t op3"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMLALT"
+      ],
+      [
+        "MOVPRFX",
+        "SMLALT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmlalt[_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "svint16_t op2",
+      "svint16_t op3"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMLALT"
+      ],
+      [
+        "MOVPRFX",
+        "SMLALT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmlalt[_s64]",
+    "arguments": [
+      "svint64_t op1",
+      "svint32_t op2",
+      "svint32_t op3"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMLALT"
+      ],
+      [
+        "MOVPRFX",
+        "SMLALT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmlalt[_u16]",
+    "arguments": [
+      "svuint16_t op1",
+      "svuint8_t op2",
+      "svuint8_t op3"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMLALT"
+      ],
+      [
+        "MOVPRFX",
+        "UMLALT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmlalt[_u32]",
+    "arguments": [
+      "svuint32_t op1",
+      "svuint16_t op2",
+      "svuint16_t op3"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMLALT"
+      ],
+      [
+        "MOVPRFX",
+        "UMLALT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmlalt[_u64]",
+    "arguments": [
+      "svuint64_t op1",
+      "svuint32_t op2",
+      "svuint32_t op3"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMLALT"
+      ],
+      [
+        "MOVPRFX",
+        "UMLALT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmlalt_lane[_f32]",
+    "arguments": [
+      "svfloat32_t op1",
+      "svfloat16_t op2",
+      "svfloat16_t op3",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "imm_index": {
+        "minimum": 0,
+        "maximum": 7
+      },
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMLALT"
+      ],
+      [
+        "MOVPRFX",
+        "FMLALT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmlalt_lane[_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "svint16_t op2",
+      "svint16_t op3",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "imm_index": {
+        "minimum": 0,
+        "maximum": 7
+      },
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMLALT"
+      ],
+      [
+        "MOVPRFX",
+        "SMLALT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmlalt_lane[_s64]",
+    "arguments": [
+      "svint64_t op1",
+      "svint32_t op2",
+      "svint32_t op3",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "imm_index": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMLALT"
+      ],
+      [
+        "MOVPRFX",
+        "SMLALT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmlalt_lane[_u32]",
+    "arguments": [
+      "svuint32_t op1",
+      "svuint16_t op2",
+      "svuint16_t op3",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "imm_index": {
+        "minimum": 0,
+        "maximum": 7
+      },
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMLALT"
+      ],
+      [
+        "MOVPRFX",
+        "UMLALT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmlalt_lane[_u64]",
+    "arguments": [
+      "svuint64_t op1",
+      "svuint32_t op2",
+      "svuint32_t op3",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "imm_index": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMLALT"
+      ],
+      [
+        "MOVPRFX",
+        "UMLALT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmls[_f16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2",
+      "svfloat16_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMLS"
+      ],
+      [
+        "MOVPRFX",
+        "FMLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmls[_f16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2",
+      "svfloat16_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H|Ztied2.H"
+      },
+      "op3": {
+        "register": "Zop3.H|Ztied3.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMLS"
+      ],
+      [
+        "FMSB"
+      ],
+      [
+        "FMSB"
+      ],
+      [
+        "MOVPRFX",
+        "FMLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmls[_f16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2",
+      "svfloat16_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FMLS"
+      ],
+      [
+        "MOVPRFX",
+        "FMSB"
+      ],
+      [
+        "MOVPRFX",
+        "FMSB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmls[_f32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2",
+      "svfloat32_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMLS"
+      ],
+      [
+        "MOVPRFX",
+        "FMLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmls[_f32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2",
+      "svfloat32_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "op3": {
+        "register": "Zop3.S|Ztied3.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMLS"
+      ],
+      [
+        "FMSB"
+      ],
+      [
+        "FMSB"
+      ],
+      [
+        "MOVPRFX",
+        "FMLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmls[_f32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2",
+      "svfloat32_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FMLS"
+      ],
+      [
+        "MOVPRFX",
+        "FMSB"
+      ],
+      [
+        "MOVPRFX",
+        "FMSB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmls[_f64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2",
+      "svfloat64_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMLS"
+      ],
+      [
+        "MOVPRFX",
+        "FMLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmls[_f64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2",
+      "svfloat64_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "op3": {
+        "register": "Zop3.D|Ztied3.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMLS"
+      ],
+      [
+        "FMSB"
+      ],
+      [
+        "FMSB"
+      ],
+      [
+        "MOVPRFX",
+        "FMLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmls[_f64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2",
+      "svfloat64_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FMLS"
+      ],
+      [
+        "MOVPRFX",
+        "FMSB"
+      ],
+      [
+        "MOVPRFX",
+        "FMSB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmls[_n_f16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2",
+      "float16_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMLS"
+      ],
+      [
+        "MOVPRFX",
+        "FMLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmls[_n_f16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2",
+      "float16_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H|Ztied2.H"
+      },
+      "op3": {
+        "register": "Zop3.H[*]|Ztied3.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMLS"
+      ],
+      [
+        "FMSB"
+      ],
+      [
+        "FMSB"
+      ],
+      [
+        "MOVPRFX",
+        "FMLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmls[_n_f16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2",
+      "float16_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FMLS"
+      ],
+      [
+        "MOVPRFX",
+        "FMSB"
+      ],
+      [
+        "MOVPRFX",
+        "FMSB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmls[_n_f32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2",
+      "float32_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMLS"
+      ],
+      [
+        "MOVPRFX",
+        "FMLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmls[_n_f32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2",
+      "float32_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "op3": {
+        "register": "Zop3.S[*]|Ztied3.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMLS"
+      ],
+      [
+        "FMSB"
+      ],
+      [
+        "FMSB"
+      ],
+      [
+        "MOVPRFX",
+        "FMLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmls[_n_f32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2",
+      "float32_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FMLS"
+      ],
+      [
+        "MOVPRFX",
+        "FMSB"
+      ],
+      [
+        "MOVPRFX",
+        "FMSB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmls[_n_f64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2",
+      "float64_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMLS"
+      ],
+      [
+        "MOVPRFX",
+        "FMLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmls[_n_f64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2",
+      "float64_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "op3": {
+        "register": "Zop3.D[*]|Ztied3.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMLS"
+      ],
+      [
+        "FMSB"
+      ],
+      [
+        "FMSB"
+      ],
+      [
+        "MOVPRFX",
+        "FMLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmls[_n_f64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2",
+      "float64_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FMLS"
+      ],
+      [
+        "MOVPRFX",
+        "FMSB"
+      ],
+      [
+        "MOVPRFX",
+        "FMSB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmls[_n_s16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2",
+      "int16_t op3"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MLS"
+      ],
+      [
+        "MOVPRFX",
+        "MLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmls[_n_s16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2",
+      "int16_t op3"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H|Ztied2.H"
+      },
+      "op3": {
+        "register": "Zop3.H[*]|Ztied3.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MLS"
+      ],
+      [
+        "MSB"
+      ],
+      [
+        "MSB"
+      ],
+      [
+        "MOVPRFX",
+        "MLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmls[_n_s16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2",
+      "int16_t op3"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "MLS"
+      ],
+      [
+        "MOVPRFX",
+        "MSB"
+      ],
+      [
+        "MOVPRFX",
+        "MSB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmls[_n_s32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2",
+      "int32_t op3"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MLS"
+      ],
+      [
+        "MOVPRFX",
+        "MLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmls[_n_s32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2",
+      "int32_t op3"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "op3": {
+        "register": "Zop3.S[*]|Ztied3.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MLS"
+      ],
+      [
+        "MSB"
+      ],
+      [
+        "MSB"
+      ],
+      [
+        "MOVPRFX",
+        "MLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmls[_n_s32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2",
+      "int32_t op3"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "MLS"
+      ],
+      [
+        "MOVPRFX",
+        "MSB"
+      ],
+      [
+        "MOVPRFX",
+        "MSB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmls[_n_s64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2",
+      "int64_t op3"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MLS"
+      ],
+      [
+        "MOVPRFX",
+        "MLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmls[_n_s64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2",
+      "int64_t op3"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "op3": {
+        "register": "Zop3.D[*]|Ztied3.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MLS"
+      ],
+      [
+        "MSB"
+      ],
+      [
+        "MSB"
+      ],
+      [
+        "MOVPRFX",
+        "MLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmls[_n_s64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2",
+      "int64_t op3"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "MLS"
+      ],
+      [
+        "MOVPRFX",
+        "MSB"
+      ],
+      [
+        "MOVPRFX",
+        "MSB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmls[_n_s8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2",
+      "int8_t op3"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MLS"
+      ],
+      [
+        "MOVPRFX",
+        "MLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmls[_n_s8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2",
+      "int8_t op3"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B|Ztied2.B"
+      },
+      "op3": {
+        "register": "Zop3.B[*]|Ztied3.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MLS"
+      ],
+      [
+        "MSB"
+      ],
+      [
+        "MSB"
+      ],
+      [
+        "MOVPRFX",
+        "MLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmls[_n_s8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2",
+      "int8_t op3"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "MLS"
+      ],
+      [
+        "MOVPRFX",
+        "MSB"
+      ],
+      [
+        "MOVPRFX",
+        "MSB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmls[_n_u16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2",
+      "uint16_t op3"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MLS"
+      ],
+      [
+        "MOVPRFX",
+        "MLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmls[_n_u16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2",
+      "uint16_t op3"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H|Ztied2.H"
+      },
+      "op3": {
+        "register": "Zop3.H[*]|Ztied3.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MLS"
+      ],
+      [
+        "MSB"
+      ],
+      [
+        "MSB"
+      ],
+      [
+        "MOVPRFX",
+        "MLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmls[_n_u16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2",
+      "uint16_t op3"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "MLS"
+      ],
+      [
+        "MOVPRFX",
+        "MSB"
+      ],
+      [
+        "MOVPRFX",
+        "MSB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmls[_n_u32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2",
+      "uint32_t op3"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MLS"
+      ],
+      [
+        "MOVPRFX",
+        "MLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmls[_n_u32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2",
+      "uint32_t op3"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "op3": {
+        "register": "Zop3.S[*]|Ztied3.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MLS"
+      ],
+      [
+        "MSB"
+      ],
+      [
+        "MSB"
+      ],
+      [
+        "MOVPRFX",
+        "MLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmls[_n_u32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2",
+      "uint32_t op3"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "MLS"
+      ],
+      [
+        "MOVPRFX",
+        "MSB"
+      ],
+      [
+        "MOVPRFX",
+        "MSB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmls[_n_u64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2",
+      "uint64_t op3"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MLS"
+      ],
+      [
+        "MOVPRFX",
+        "MLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmls[_n_u64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2",
+      "uint64_t op3"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "op3": {
+        "register": "Zop3.D[*]|Ztied3.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MLS"
+      ],
+      [
+        "MSB"
+      ],
+      [
+        "MSB"
+      ],
+      [
+        "MOVPRFX",
+        "MLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmls[_n_u64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2",
+      "uint64_t op3"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "MLS"
+      ],
+      [
+        "MOVPRFX",
+        "MSB"
+      ],
+      [
+        "MOVPRFX",
+        "MSB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmls[_n_u8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2",
+      "uint8_t op3"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MLS"
+      ],
+      [
+        "MOVPRFX",
+        "MLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmls[_n_u8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2",
+      "uint8_t op3"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B|Ztied2.B"
+      },
+      "op3": {
+        "register": "Zop3.B[*]|Ztied3.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MLS"
+      ],
+      [
+        "MSB"
+      ],
+      [
+        "MSB"
+      ],
+      [
+        "MOVPRFX",
+        "MLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmls[_n_u8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2",
+      "uint8_t op3"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "MLS"
+      ],
+      [
+        "MOVPRFX",
+        "MSB"
+      ],
+      [
+        "MOVPRFX",
+        "MSB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmls[_s16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2",
+      "svint16_t op3"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MLS"
+      ],
+      [
+        "MOVPRFX",
+        "MLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmls[_s16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2",
+      "svint16_t op3"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H|Ztied2.H"
+      },
+      "op3": {
+        "register": "Zop3.H|Ztied3.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MLS"
+      ],
+      [
+        "MSB"
+      ],
+      [
+        "MSB"
+      ],
+      [
+        "MOVPRFX",
+        "MLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmls[_s16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2",
+      "svint16_t op3"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "MLS"
+      ],
+      [
+        "MOVPRFX",
+        "MSB"
+      ],
+      [
+        "MOVPRFX",
+        "MSB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmls[_s32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2",
+      "svint32_t op3"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MLS"
+      ],
+      [
+        "MOVPRFX",
+        "MLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmls[_s32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2",
+      "svint32_t op3"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "op3": {
+        "register": "Zop3.S|Ztied3.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MLS"
+      ],
+      [
+        "MSB"
+      ],
+      [
+        "MSB"
+      ],
+      [
+        "MOVPRFX",
+        "MLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmls[_s32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2",
+      "svint32_t op3"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "MLS"
+      ],
+      [
+        "MOVPRFX",
+        "MSB"
+      ],
+      [
+        "MOVPRFX",
+        "MSB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmls[_s64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2",
+      "svint64_t op3"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MLS"
+      ],
+      [
+        "MOVPRFX",
+        "MLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmls[_s64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2",
+      "svint64_t op3"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "op3": {
+        "register": "Zop3.D|Ztied3.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MLS"
+      ],
+      [
+        "MSB"
+      ],
+      [
+        "MSB"
+      ],
+      [
+        "MOVPRFX",
+        "MLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmls[_s64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2",
+      "svint64_t op3"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "MLS"
+      ],
+      [
+        "MOVPRFX",
+        "MSB"
+      ],
+      [
+        "MOVPRFX",
+        "MSB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmls[_s8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2",
+      "svint8_t op3"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MLS"
+      ],
+      [
+        "MOVPRFX",
+        "MLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmls[_s8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2",
+      "svint8_t op3"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B|Ztied2.B"
+      },
+      "op3": {
+        "register": "Zop3.B|Ztied3.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MLS"
+      ],
+      [
+        "MSB"
+      ],
+      [
+        "MSB"
+      ],
+      [
+        "MOVPRFX",
+        "MLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmls[_s8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2",
+      "svint8_t op3"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "MLS"
+      ],
+      [
+        "MOVPRFX",
+        "MSB"
+      ],
+      [
+        "MOVPRFX",
+        "MSB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmls[_u16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2",
+      "svuint16_t op3"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MLS"
+      ],
+      [
+        "MOVPRFX",
+        "MLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmls[_u16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2",
+      "svuint16_t op3"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H|Ztied2.H"
+      },
+      "op3": {
+        "register": "Zop3.H|Ztied3.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MLS"
+      ],
+      [
+        "MSB"
+      ],
+      [
+        "MSB"
+      ],
+      [
+        "MOVPRFX",
+        "MLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmls[_u16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2",
+      "svuint16_t op3"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "MLS"
+      ],
+      [
+        "MOVPRFX",
+        "MSB"
+      ],
+      [
+        "MOVPRFX",
+        "MSB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmls[_u32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2",
+      "svuint32_t op3"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MLS"
+      ],
+      [
+        "MOVPRFX",
+        "MLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmls[_u32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2",
+      "svuint32_t op3"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "op3": {
+        "register": "Zop3.S|Ztied3.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MLS"
+      ],
+      [
+        "MSB"
+      ],
+      [
+        "MSB"
+      ],
+      [
+        "MOVPRFX",
+        "MLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmls[_u32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2",
+      "svuint32_t op3"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "MLS"
+      ],
+      [
+        "MOVPRFX",
+        "MSB"
+      ],
+      [
+        "MOVPRFX",
+        "MSB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmls[_u64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2",
+      "svuint64_t op3"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MLS"
+      ],
+      [
+        "MOVPRFX",
+        "MLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmls[_u64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2",
+      "svuint64_t op3"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "op3": {
+        "register": "Zop3.D|Ztied3.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MLS"
+      ],
+      [
+        "MSB"
+      ],
+      [
+        "MSB"
+      ],
+      [
+        "MOVPRFX",
+        "MLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmls[_u64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2",
+      "svuint64_t op3"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "MLS"
+      ],
+      [
+        "MOVPRFX",
+        "MSB"
+      ],
+      [
+        "MOVPRFX",
+        "MSB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmls[_u8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2",
+      "svuint8_t op3"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MLS"
+      ],
+      [
+        "MOVPRFX",
+        "MLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmls[_u8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2",
+      "svuint8_t op3"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B|Ztied2.B"
+      },
+      "op3": {
+        "register": "Zop3.B|Ztied3.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MLS"
+      ],
+      [
+        "MSB"
+      ],
+      [
+        "MSB"
+      ],
+      [
+        "MOVPRFX",
+        "MLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmls[_u8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2",
+      "svuint8_t op3"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "MLS"
+      ],
+      [
+        "MOVPRFX",
+        "MSB"
+      ],
+      [
+        "MOVPRFX",
+        "MSB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmls_lane[_f16]",
+    "arguments": [
+      "svfloat16_t op1",
+      "svfloat16_t op2",
+      "svfloat16_t op3",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "imm_index": {
+        "minimum": 0,
+        "maximum": 7
+      },
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMLS"
+      ],
+      [
+        "MOVPRFX",
+        "FMLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmls_lane[_f32]",
+    "arguments": [
+      "svfloat32_t op1",
+      "svfloat32_t op2",
+      "svfloat32_t op3",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "imm_index": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMLS"
+      ],
+      [
+        "MOVPRFX",
+        "FMLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmls_lane[_f64]",
+    "arguments": [
+      "svfloat64_t op1",
+      "svfloat64_t op2",
+      "svfloat64_t op3",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "imm_index": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMLS"
+      ],
+      [
+        "MOVPRFX",
+        "FMLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmls_lane[_s16]",
+    "arguments": [
+      "svint16_t op1",
+      "svint16_t op2",
+      "svint16_t op3",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "imm_index": {
+        "minimum": 0,
+        "maximum": 7
+      },
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MLS"
+      ],
+      [
+        "MOVPRFX",
+        "MLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmls_lane[_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "svint32_t op2",
+      "svint32_t op3",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "imm_index": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MLS"
+      ],
+      [
+        "MOVPRFX",
+        "MLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmls_lane[_s64]",
+    "arguments": [
+      "svint64_t op1",
+      "svint64_t op2",
+      "svint64_t op3",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "imm_index": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MLS"
+      ],
+      [
+        "MOVPRFX",
+        "MLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmls_lane[_u16]",
+    "arguments": [
+      "svuint16_t op1",
+      "svuint16_t op2",
+      "svuint16_t op3",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "imm_index": {
+        "minimum": 0,
+        "maximum": 7
+      },
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MLS"
+      ],
+      [
+        "MOVPRFX",
+        "MLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmls_lane[_u32]",
+    "arguments": [
+      "svuint32_t op1",
+      "svuint32_t op2",
+      "svuint32_t op3",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "imm_index": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MLS"
+      ],
+      [
+        "MOVPRFX",
+        "MLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmls_lane[_u64]",
+    "arguments": [
+      "svuint64_t op1",
+      "svuint64_t op2",
+      "svuint64_t op3",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "imm_index": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MLS"
+      ],
+      [
+        "MOVPRFX",
+        "MLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmlslb[_f32]",
+    "arguments": [
+      "svfloat32_t op1",
+      "svfloat16_t op2",
+      "svfloat16_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMLSLB"
+      ],
+      [
+        "MOVPRFX",
+        "FMLSLB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmlslb[_n_f32]",
+    "arguments": [
+      "svfloat32_t op1",
+      "svfloat16_t op2",
+      "float16_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMLSLB"
+      ],
+      [
+        "MOVPRFX",
+        "FMLSLB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmlslb[_n_s16]",
+    "arguments": [
+      "svint16_t op1",
+      "svint8_t op2",
+      "int8_t op3"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMLSLB"
+      ],
+      [
+        "MOVPRFX",
+        "SMLSLB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmlslb[_n_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "svint16_t op2",
+      "int16_t op3"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMLSLB"
+      ],
+      [
+        "MOVPRFX",
+        "SMLSLB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmlslb[_n_s64]",
+    "arguments": [
+      "svint64_t op1",
+      "svint32_t op2",
+      "int32_t op3"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMLSLB"
+      ],
+      [
+        "MOVPRFX",
+        "SMLSLB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmlslb[_n_u16]",
+    "arguments": [
+      "svuint16_t op1",
+      "svuint8_t op2",
+      "uint8_t op3"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMLSLB"
+      ],
+      [
+        "MOVPRFX",
+        "UMLSLB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmlslb[_n_u32]",
+    "arguments": [
+      "svuint32_t op1",
+      "svuint16_t op2",
+      "uint16_t op3"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMLSLB"
+      ],
+      [
+        "MOVPRFX",
+        "UMLSLB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmlslb[_n_u64]",
+    "arguments": [
+      "svuint64_t op1",
+      "svuint32_t op2",
+      "uint32_t op3"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMLSLB"
+      ],
+      [
+        "MOVPRFX",
+        "UMLSLB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmlslb[_s16]",
+    "arguments": [
+      "svint16_t op1",
+      "svint8_t op2",
+      "svint8_t op3"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMLSLB"
+      ],
+      [
+        "MOVPRFX",
+        "SMLSLB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmlslb[_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "svint16_t op2",
+      "svint16_t op3"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMLSLB"
+      ],
+      [
+        "MOVPRFX",
+        "SMLSLB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmlslb[_s64]",
+    "arguments": [
+      "svint64_t op1",
+      "svint32_t op2",
+      "svint32_t op3"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMLSLB"
+      ],
+      [
+        "MOVPRFX",
+        "SMLSLB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmlslb[_u16]",
+    "arguments": [
+      "svuint16_t op1",
+      "svuint8_t op2",
+      "svuint8_t op3"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMLSLB"
+      ],
+      [
+        "MOVPRFX",
+        "UMLSLB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmlslb[_u32]",
+    "arguments": [
+      "svuint32_t op1",
+      "svuint16_t op2",
+      "svuint16_t op3"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMLSLB"
+      ],
+      [
+        "MOVPRFX",
+        "UMLSLB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmlslb[_u64]",
+    "arguments": [
+      "svuint64_t op1",
+      "svuint32_t op2",
+      "svuint32_t op3"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMLSLB"
+      ],
+      [
+        "MOVPRFX",
+        "UMLSLB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmlslb_lane[_f32]",
+    "arguments": [
+      "svfloat32_t op1",
+      "svfloat16_t op2",
+      "svfloat16_t op3",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "imm_index": {
+        "minimum": 0,
+        "maximum": 7
+      },
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMLSLB"
+      ],
+      [
+        "MOVPRFX",
+        "FMLSLB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmlslb_lane[_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "svint16_t op2",
+      "svint16_t op3",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "imm_index": {
+        "minimum": 0,
+        "maximum": 7
+      },
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMLSLB"
+      ],
+      [
+        "MOVPRFX",
+        "SMLSLB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmlslb_lane[_s64]",
+    "arguments": [
+      "svint64_t op1",
+      "svint32_t op2",
+      "svint32_t op3",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "imm_index": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMLSLB"
+      ],
+      [
+        "MOVPRFX",
+        "SMLSLB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmlslb_lane[_u32]",
+    "arguments": [
+      "svuint32_t op1",
+      "svuint16_t op2",
+      "svuint16_t op3",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "imm_index": {
+        "minimum": 0,
+        "maximum": 7
+      },
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMLSLB"
+      ],
+      [
+        "MOVPRFX",
+        "UMLSLB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmlslb_lane[_u64]",
+    "arguments": [
+      "svuint64_t op1",
+      "svuint32_t op2",
+      "svuint32_t op3",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "imm_index": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMLSLB"
+      ],
+      [
+        "MOVPRFX",
+        "UMLSLB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmlslt[_f32]",
+    "arguments": [
+      "svfloat32_t op1",
+      "svfloat16_t op2",
+      "svfloat16_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMLSLT"
+      ],
+      [
+        "MOVPRFX",
+        "FMLSLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmlslt[_n_f32]",
+    "arguments": [
+      "svfloat32_t op1",
+      "svfloat16_t op2",
+      "float16_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMLSLT"
+      ],
+      [
+        "MOVPRFX",
+        "FMLSLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmlslt[_n_s16]",
+    "arguments": [
+      "svint16_t op1",
+      "svint8_t op2",
+      "int8_t op3"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMLSLT"
+      ],
+      [
+        "MOVPRFX",
+        "SMLSLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmlslt[_n_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "svint16_t op2",
+      "int16_t op3"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMLSLT"
+      ],
+      [
+        "MOVPRFX",
+        "SMLSLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmlslt[_n_s64]",
+    "arguments": [
+      "svint64_t op1",
+      "svint32_t op2",
+      "int32_t op3"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMLSLT"
+      ],
+      [
+        "MOVPRFX",
+        "SMLSLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmlslt[_n_u16]",
+    "arguments": [
+      "svuint16_t op1",
+      "svuint8_t op2",
+      "uint8_t op3"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMLSLT"
+      ],
+      [
+        "MOVPRFX",
+        "UMLSLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmlslt[_n_u32]",
+    "arguments": [
+      "svuint32_t op1",
+      "svuint16_t op2",
+      "uint16_t op3"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMLSLT"
+      ],
+      [
+        "MOVPRFX",
+        "UMLSLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmlslt[_n_u64]",
+    "arguments": [
+      "svuint64_t op1",
+      "svuint32_t op2",
+      "uint32_t op3"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMLSLT"
+      ],
+      [
+        "MOVPRFX",
+        "UMLSLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmlslt[_s16]",
+    "arguments": [
+      "svint16_t op1",
+      "svint8_t op2",
+      "svint8_t op3"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMLSLT"
+      ],
+      [
+        "MOVPRFX",
+        "SMLSLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmlslt[_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "svint16_t op2",
+      "svint16_t op3"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMLSLT"
+      ],
+      [
+        "MOVPRFX",
+        "SMLSLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmlslt[_s64]",
+    "arguments": [
+      "svint64_t op1",
+      "svint32_t op2",
+      "svint32_t op3"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMLSLT"
+      ],
+      [
+        "MOVPRFX",
+        "SMLSLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmlslt[_u16]",
+    "arguments": [
+      "svuint16_t op1",
+      "svuint8_t op2",
+      "svuint8_t op3"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMLSLT"
+      ],
+      [
+        "MOVPRFX",
+        "UMLSLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmlslt[_u32]",
+    "arguments": [
+      "svuint32_t op1",
+      "svuint16_t op2",
+      "svuint16_t op3"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMLSLT"
+      ],
+      [
+        "MOVPRFX",
+        "UMLSLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmlslt[_u64]",
+    "arguments": [
+      "svuint64_t op1",
+      "svuint32_t op2",
+      "svuint32_t op3"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMLSLT"
+      ],
+      [
+        "MOVPRFX",
+        "UMLSLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmlslt_lane[_f32]",
+    "arguments": [
+      "svfloat32_t op1",
+      "svfloat16_t op2",
+      "svfloat16_t op3",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "imm_index": {
+        "minimum": 0,
+        "maximum": 7
+      },
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMLSLT"
+      ],
+      [
+        "MOVPRFX",
+        "FMLSLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmlslt_lane[_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "svint16_t op2",
+      "svint16_t op3",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "imm_index": {
+        "minimum": 0,
+        "maximum": 7
+      },
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMLSLT"
+      ],
+      [
+        "MOVPRFX",
+        "SMLSLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmlslt_lane[_s64]",
+    "arguments": [
+      "svint64_t op1",
+      "svint32_t op2",
+      "svint32_t op3",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "imm_index": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMLSLT"
+      ],
+      [
+        "MOVPRFX",
+        "SMLSLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmlslt_lane[_u32]",
+    "arguments": [
+      "svuint32_t op1",
+      "svuint16_t op2",
+      "svuint16_t op3",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "imm_index": {
+        "minimum": 0,
+        "maximum": 7
+      },
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMLSLT"
+      ],
+      [
+        "MOVPRFX",
+        "UMLSLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmlslt_lane[_u64]",
+    "arguments": [
+      "svuint64_t op1",
+      "svuint32_t op2",
+      "svuint32_t op3",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "imm_index": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMLSLT"
+      ],
+      [
+        "MOVPRFX",
+        "UMLSLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmmla[_f32]",
+    "arguments": [
+      "svfloat32_t op1",
+      "svfloat32_t op2",
+      "svfloat32_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMMLA"
+      ],
+      [
+        "MOVPRFX",
+        "FMMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmmla[_f64]",
+    "arguments": [
+      "svfloat64_t op1",
+      "svfloat64_t op2",
+      "svfloat64_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMMLA"
+      ],
+      [
+        "MOVPRFX",
+        "FMMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmmla[_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "svint8_t op2",
+      "svint8_t op3"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMMLA"
+      ],
+      [
+        "MOVPRFX",
+        "SMMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmmla[_u32]",
+    "arguments": [
+      "svuint32_t op1",
+      "svuint8_t op2",
+      "svuint8_t op3"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMMLA"
+      ],
+      [
+        "MOVPRFX",
+        "UMMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmov[_b]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svbool_t op"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Pop.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "AND"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmovlb[_s16]",
+    "arguments": [
+      "svint8_t op"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SSHLLB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmovlb[_s32]",
+    "arguments": [
+      "svint16_t op"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SSHLLB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmovlb[_s64]",
+    "arguments": [
+      "svint32_t op"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SSHLLB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmovlb[_u16]",
+    "arguments": [
+      "svuint8_t op"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "USHLLB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmovlb[_u32]",
+    "arguments": [
+      "svuint16_t op"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "USHLLB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmovlb[_u64]",
+    "arguments": [
+      "svuint32_t op"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "USHLLB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmovlt[_s16]",
+    "arguments": [
+      "svint8_t op"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SSHLLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmovlt[_s32]",
+    "arguments": [
+      "svint16_t op"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SSHLLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmovlt[_s64]",
+    "arguments": [
+      "svint32_t op"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SSHLLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmovlt[_u16]",
+    "arguments": [
+      "svuint8_t op"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "USHLLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmovlt[_u32]",
+    "arguments": [
+      "svuint16_t op"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "USHLLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmovlt[_u64]",
+    "arguments": [
+      "svuint32_t op"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "USHLLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmsb[_f16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2",
+      "svfloat16_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMSB"
+      ],
+      [
+        "MOVPRFX",
+        "FMSB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmsb[_f16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2",
+      "svfloat16_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H|Ztied2.H"
+      },
+      "op3": {
+        "register": "Zop3.H|Ztied3.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMSB"
+      ],
+      [
+        "FMSB"
+      ],
+      [
+        "FMLS"
+      ],
+      [
+        "MOVPRFX",
+        "FMSB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmsb[_f16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2",
+      "svfloat16_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FMSB"
+      ],
+      [
+        "MOVPRFX",
+        "FMSB"
+      ],
+      [
+        "MOVPRFX",
+        "FMLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmsb[_f32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2",
+      "svfloat32_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMSB"
+      ],
+      [
+        "MOVPRFX",
+        "FMSB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmsb[_f32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2",
+      "svfloat32_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "op3": {
+        "register": "Zop3.S|Ztied3.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMSB"
+      ],
+      [
+        "FMSB"
+      ],
+      [
+        "FMLS"
+      ],
+      [
+        "MOVPRFX",
+        "FMSB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmsb[_f32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2",
+      "svfloat32_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FMSB"
+      ],
+      [
+        "MOVPRFX",
+        "FMSB"
+      ],
+      [
+        "MOVPRFX",
+        "FMLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmsb[_f64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2",
+      "svfloat64_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMSB"
+      ],
+      [
+        "MOVPRFX",
+        "FMSB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmsb[_f64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2",
+      "svfloat64_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "op3": {
+        "register": "Zop3.D|Ztied3.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMSB"
+      ],
+      [
+        "FMSB"
+      ],
+      [
+        "FMLS"
+      ],
+      [
+        "MOVPRFX",
+        "FMSB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmsb[_f64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2",
+      "svfloat64_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FMSB"
+      ],
+      [
+        "MOVPRFX",
+        "FMSB"
+      ],
+      [
+        "MOVPRFX",
+        "FMLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmsb[_n_f16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2",
+      "float16_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMSB"
+      ],
+      [
+        "MOVPRFX",
+        "FMSB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmsb[_n_f16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2",
+      "float16_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H|Ztied2.H"
+      },
+      "op3": {
+        "register": "Zop3.H[*]|Ztied3.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMSB"
+      ],
+      [
+        "FMSB"
+      ],
+      [
+        "FMLS"
+      ],
+      [
+        "MOVPRFX",
+        "FMSB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmsb[_n_f16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2",
+      "float16_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FMSB"
+      ],
+      [
+        "MOVPRFX",
+        "FMSB"
+      ],
+      [
+        "MOVPRFX",
+        "FMLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmsb[_n_f32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2",
+      "float32_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMSB"
+      ],
+      [
+        "MOVPRFX",
+        "FMSB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmsb[_n_f32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2",
+      "float32_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "op3": {
+        "register": "Zop3.S[*]|Ztied3.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMSB"
+      ],
+      [
+        "FMSB"
+      ],
+      [
+        "FMLS"
+      ],
+      [
+        "MOVPRFX",
+        "FMSB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmsb[_n_f32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2",
+      "float32_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FMSB"
+      ],
+      [
+        "MOVPRFX",
+        "FMSB"
+      ],
+      [
+        "MOVPRFX",
+        "FMLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmsb[_n_f64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2",
+      "float64_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMSB"
+      ],
+      [
+        "MOVPRFX",
+        "FMSB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmsb[_n_f64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2",
+      "float64_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "op3": {
+        "register": "Zop3.D[*]|Ztied3.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMSB"
+      ],
+      [
+        "FMSB"
+      ],
+      [
+        "FMLS"
+      ],
+      [
+        "MOVPRFX",
+        "FMSB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmsb[_n_f64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2",
+      "float64_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FMSB"
+      ],
+      [
+        "MOVPRFX",
+        "FMSB"
+      ],
+      [
+        "MOVPRFX",
+        "FMLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmsb[_n_s16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2",
+      "int16_t op3"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MSB"
+      ],
+      [
+        "MOVPRFX",
+        "MSB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmsb[_n_s16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2",
+      "int16_t op3"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H|Ztied2.H"
+      },
+      "op3": {
+        "register": "Zop3.H[*]|Ztied3.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MSB"
+      ],
+      [
+        "MSB"
+      ],
+      [
+        "MLS"
+      ],
+      [
+        "MOVPRFX",
+        "MSB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmsb[_n_s16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2",
+      "int16_t op3"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "MSB"
+      ],
+      [
+        "MOVPRFX",
+        "MSB"
+      ],
+      [
+        "MOVPRFX",
+        "MLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmsb[_n_s32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2",
+      "int32_t op3"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MSB"
+      ],
+      [
+        "MOVPRFX",
+        "MSB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmsb[_n_s32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2",
+      "int32_t op3"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "op3": {
+        "register": "Zop3.S[*]|Ztied3.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MSB"
+      ],
+      [
+        "MSB"
+      ],
+      [
+        "MLS"
+      ],
+      [
+        "MOVPRFX",
+        "MSB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmsb[_n_s32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2",
+      "int32_t op3"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "MSB"
+      ],
+      [
+        "MOVPRFX",
+        "MSB"
+      ],
+      [
+        "MOVPRFX",
+        "MLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmsb[_n_s64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2",
+      "int64_t op3"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MSB"
+      ],
+      [
+        "MOVPRFX",
+        "MSB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmsb[_n_s64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2",
+      "int64_t op3"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "op3": {
+        "register": "Zop3.D[*]|Ztied3.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MSB"
+      ],
+      [
+        "MSB"
+      ],
+      [
+        "MLS"
+      ],
+      [
+        "MOVPRFX",
+        "MSB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmsb[_n_s64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2",
+      "int64_t op3"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "MSB"
+      ],
+      [
+        "MOVPRFX",
+        "MSB"
+      ],
+      [
+        "MOVPRFX",
+        "MLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmsb[_n_s8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2",
+      "int8_t op3"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MSB"
+      ],
+      [
+        "MOVPRFX",
+        "MSB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmsb[_n_s8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2",
+      "int8_t op3"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B|Ztied2.B"
+      },
+      "op3": {
+        "register": "Zop3.B[*]|Ztied3.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MSB"
+      ],
+      [
+        "MSB"
+      ],
+      [
+        "MLS"
+      ],
+      [
+        "MOVPRFX",
+        "MSB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmsb[_n_s8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2",
+      "int8_t op3"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "MSB"
+      ],
+      [
+        "MOVPRFX",
+        "MSB"
+      ],
+      [
+        "MOVPRFX",
+        "MLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmsb[_n_u16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2",
+      "uint16_t op3"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MSB"
+      ],
+      [
+        "MOVPRFX",
+        "MSB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmsb[_n_u16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2",
+      "uint16_t op3"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H|Ztied2.H"
+      },
+      "op3": {
+        "register": "Zop3.H[*]|Ztied3.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MSB"
+      ],
+      [
+        "MSB"
+      ],
+      [
+        "MLS"
+      ],
+      [
+        "MOVPRFX",
+        "MSB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmsb[_n_u16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2",
+      "uint16_t op3"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "MSB"
+      ],
+      [
+        "MOVPRFX",
+        "MSB"
+      ],
+      [
+        "MOVPRFX",
+        "MLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmsb[_n_u32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2",
+      "uint32_t op3"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MSB"
+      ],
+      [
+        "MOVPRFX",
+        "MSB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmsb[_n_u32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2",
+      "uint32_t op3"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "op3": {
+        "register": "Zop3.S[*]|Ztied3.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MSB"
+      ],
+      [
+        "MSB"
+      ],
+      [
+        "MLS"
+      ],
+      [
+        "MOVPRFX",
+        "MSB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmsb[_n_u32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2",
+      "uint32_t op3"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "MSB"
+      ],
+      [
+        "MOVPRFX",
+        "MSB"
+      ],
+      [
+        "MOVPRFX",
+        "MLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmsb[_n_u64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2",
+      "uint64_t op3"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MSB"
+      ],
+      [
+        "MOVPRFX",
+        "MSB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmsb[_n_u64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2",
+      "uint64_t op3"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "op3": {
+        "register": "Zop3.D[*]|Ztied3.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MSB"
+      ],
+      [
+        "MSB"
+      ],
+      [
+        "MLS"
+      ],
+      [
+        "MOVPRFX",
+        "MSB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmsb[_n_u64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2",
+      "uint64_t op3"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "MSB"
+      ],
+      [
+        "MOVPRFX",
+        "MSB"
+      ],
+      [
+        "MOVPRFX",
+        "MLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmsb[_n_u8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2",
+      "uint8_t op3"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MSB"
+      ],
+      [
+        "MOVPRFX",
+        "MSB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmsb[_n_u8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2",
+      "uint8_t op3"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B|Ztied2.B"
+      },
+      "op3": {
+        "register": "Zop3.B[*]|Ztied3.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MSB"
+      ],
+      [
+        "MSB"
+      ],
+      [
+        "MLS"
+      ],
+      [
+        "MOVPRFX",
+        "MSB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmsb[_n_u8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2",
+      "uint8_t op3"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "MSB"
+      ],
+      [
+        "MOVPRFX",
+        "MSB"
+      ],
+      [
+        "MOVPRFX",
+        "MLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmsb[_s16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2",
+      "svint16_t op3"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MSB"
+      ],
+      [
+        "MOVPRFX",
+        "MSB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmsb[_s16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2",
+      "svint16_t op3"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H|Ztied2.H"
+      },
+      "op3": {
+        "register": "Zop3.H|Ztied3.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MSB"
+      ],
+      [
+        "MSB"
+      ],
+      [
+        "MLS"
+      ],
+      [
+        "MOVPRFX",
+        "MSB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmsb[_s16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2",
+      "svint16_t op3"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "MSB"
+      ],
+      [
+        "MOVPRFX",
+        "MSB"
+      ],
+      [
+        "MOVPRFX",
+        "MLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmsb[_s32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2",
+      "svint32_t op3"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MSB"
+      ],
+      [
+        "MOVPRFX",
+        "MSB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmsb[_s32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2",
+      "svint32_t op3"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "op3": {
+        "register": "Zop3.S|Ztied3.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MSB"
+      ],
+      [
+        "MSB"
+      ],
+      [
+        "MLS"
+      ],
+      [
+        "MOVPRFX",
+        "MSB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmsb[_s32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2",
+      "svint32_t op3"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "MSB"
+      ],
+      [
+        "MOVPRFX",
+        "MSB"
+      ],
+      [
+        "MOVPRFX",
+        "MLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmsb[_s64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2",
+      "svint64_t op3"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MSB"
+      ],
+      [
+        "MOVPRFX",
+        "MSB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmsb[_s64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2",
+      "svint64_t op3"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "op3": {
+        "register": "Zop3.D|Ztied3.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MSB"
+      ],
+      [
+        "MSB"
+      ],
+      [
+        "MLS"
+      ],
+      [
+        "MOVPRFX",
+        "MSB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmsb[_s64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2",
+      "svint64_t op3"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "MSB"
+      ],
+      [
+        "MOVPRFX",
+        "MSB"
+      ],
+      [
+        "MOVPRFX",
+        "MLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmsb[_s8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2",
+      "svint8_t op3"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MSB"
+      ],
+      [
+        "MOVPRFX",
+        "MSB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmsb[_s8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2",
+      "svint8_t op3"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B|Ztied2.B"
+      },
+      "op3": {
+        "register": "Zop3.B|Ztied3.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MSB"
+      ],
+      [
+        "MSB"
+      ],
+      [
+        "MLS"
+      ],
+      [
+        "MOVPRFX",
+        "MSB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmsb[_s8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2",
+      "svint8_t op3"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "MSB"
+      ],
+      [
+        "MOVPRFX",
+        "MSB"
+      ],
+      [
+        "MOVPRFX",
+        "MLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmsb[_u16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2",
+      "svuint16_t op3"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MSB"
+      ],
+      [
+        "MOVPRFX",
+        "MSB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmsb[_u16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2",
+      "svuint16_t op3"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H|Ztied2.H"
+      },
+      "op3": {
+        "register": "Zop3.H|Ztied3.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MSB"
+      ],
+      [
+        "MSB"
+      ],
+      [
+        "MLS"
+      ],
+      [
+        "MOVPRFX",
+        "MSB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmsb[_u16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2",
+      "svuint16_t op3"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "MSB"
+      ],
+      [
+        "MOVPRFX",
+        "MSB"
+      ],
+      [
+        "MOVPRFX",
+        "MLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmsb[_u32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2",
+      "svuint32_t op3"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MSB"
+      ],
+      [
+        "MOVPRFX",
+        "MSB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmsb[_u32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2",
+      "svuint32_t op3"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "op3": {
+        "register": "Zop3.S|Ztied3.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MSB"
+      ],
+      [
+        "MSB"
+      ],
+      [
+        "MLS"
+      ],
+      [
+        "MOVPRFX",
+        "MSB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmsb[_u32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2",
+      "svuint32_t op3"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "MSB"
+      ],
+      [
+        "MOVPRFX",
+        "MSB"
+      ],
+      [
+        "MOVPRFX",
+        "MLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmsb[_u64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2",
+      "svuint64_t op3"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MSB"
+      ],
+      [
+        "MOVPRFX",
+        "MSB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmsb[_u64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2",
+      "svuint64_t op3"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "op3": {
+        "register": "Zop3.D|Ztied3.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MSB"
+      ],
+      [
+        "MSB"
+      ],
+      [
+        "MLS"
+      ],
+      [
+        "MOVPRFX",
+        "MSB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmsb[_u64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2",
+      "svuint64_t op3"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "MSB"
+      ],
+      [
+        "MOVPRFX",
+        "MSB"
+      ],
+      [
+        "MOVPRFX",
+        "MLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmsb[_u8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2",
+      "svuint8_t op3"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MSB"
+      ],
+      [
+        "MOVPRFX",
+        "MSB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmsb[_u8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2",
+      "svuint8_t op3"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B|Ztied2.B"
+      },
+      "op3": {
+        "register": "Zop3.B|Ztied3.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MSB"
+      ],
+      [
+        "MSB"
+      ],
+      [
+        "MLS"
+      ],
+      [
+        "MOVPRFX",
+        "MSB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmsb[_u8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2",
+      "svuint8_t op3"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "MSB"
+      ],
+      [
+        "MOVPRFX",
+        "MSB"
+      ],
+      [
+        "MOVPRFX",
+        "MLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmul[_f16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMUL"
+      ],
+      [
+        "MOVPRFX",
+        "FMUL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmul[_f16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H|Ztied2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMUL"
+      ],
+      [
+        "FMUL"
+      ],
+      [
+        "FMUL"
+      ],
+      [
+        "MOVPRFX",
+        "FMUL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmul[_f16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FMUL"
+      ],
+      [
+        "MOVPRFX",
+        "FMUL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmul[_f32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMUL"
+      ],
+      [
+        "MOVPRFX",
+        "FMUL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmul[_f32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMUL"
+      ],
+      [
+        "FMUL"
+      ],
+      [
+        "FMUL"
+      ],
+      [
+        "MOVPRFX",
+        "FMUL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmul[_f32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FMUL"
+      ],
+      [
+        "MOVPRFX",
+        "FMUL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmul[_f64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMUL"
+      ],
+      [
+        "MOVPRFX",
+        "FMUL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmul[_f64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMUL"
+      ],
+      [
+        "FMUL"
+      ],
+      [
+        "FMUL"
+      ],
+      [
+        "MOVPRFX",
+        "FMUL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmul[_f64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FMUL"
+      ],
+      [
+        "MOVPRFX",
+        "FMUL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmul[_n_f16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "float16_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMUL"
+      ],
+      [
+        "FMUL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmul[_n_f16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "float16_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]|Ztied2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMUL"
+      ],
+      [
+        "FMUL"
+      ],
+      [
+        "FMUL"
+      ],
+      [
+        "FMUL"
+      ],
+      [
+        "MOVPRFX",
+        "FMUL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmul[_n_f16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "float16_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FMUL"
+      ],
+      [
+        "MOVPRFX",
+        "FMUL"
+      ],
+      [
+        "MOVPRFX",
+        "FMUL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmul[_n_f32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "float32_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMUL"
+      ],
+      [
+        "FMUL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmul[_n_f32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "float32_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]|Ztied2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMUL"
+      ],
+      [
+        "FMUL"
+      ],
+      [
+        "FMUL"
+      ],
+      [
+        "FMUL"
+      ],
+      [
+        "MOVPRFX",
+        "FMUL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmul[_n_f32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "float32_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FMUL"
+      ],
+      [
+        "MOVPRFX",
+        "FMUL"
+      ],
+      [
+        "MOVPRFX",
+        "FMUL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmul[_n_f64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "float64_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMUL"
+      ],
+      [
+        "FMUL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmul[_n_f64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "float64_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]|Ztied2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMUL"
+      ],
+      [
+        "FMUL"
+      ],
+      [
+        "FMUL"
+      ],
+      [
+        "FMUL"
+      ],
+      [
+        "MOVPRFX",
+        "FMUL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmul[_n_f64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "float64_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FMUL"
+      ],
+      [
+        "MOVPRFX",
+        "FMUL"
+      ],
+      [
+        "MOVPRFX",
+        "FMUL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmul[_n_s16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "int16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MUL"
+      ],
+      [
+        "MOVPRFX",
+        "MUL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmul[_n_s16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "int16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]|Ztied2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MUL"
+      ],
+      [
+        "MUL"
+      ],
+      [
+        "MUL"
+      ],
+      [
+        "MOVPRFX",
+        "MUL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmul[_n_s16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "int16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "MUL"
+      ],
+      [
+        "MOVPRFX",
+        "MUL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmul[_n_s32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MUL"
+      ],
+      [
+        "MOVPRFX",
+        "MUL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmul[_n_s32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]|Ztied2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MUL"
+      ],
+      [
+        "MUL"
+      ],
+      [
+        "MUL"
+      ],
+      [
+        "MOVPRFX",
+        "MUL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmul[_n_s32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "MUL"
+      ],
+      [
+        "MOVPRFX",
+        "MUL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmul[_n_s64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MUL"
+      ],
+      [
+        "MOVPRFX",
+        "MUL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmul[_n_s64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]|Ztied2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MUL"
+      ],
+      [
+        "MUL"
+      ],
+      [
+        "MUL"
+      ],
+      [
+        "MOVPRFX",
+        "MUL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmul[_n_s64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "MUL"
+      ],
+      [
+        "MOVPRFX",
+        "MUL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmul[_n_s8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "int8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MUL"
+      ],
+      [
+        "MOVPRFX",
+        "MUL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmul[_n_s8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "int8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]|Ztied2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MUL"
+      ],
+      [
+        "MUL"
+      ],
+      [
+        "MUL"
+      ],
+      [
+        "MOVPRFX",
+        "MUL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmul[_n_s8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "int8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "MUL"
+      ],
+      [
+        "MOVPRFX",
+        "MUL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmul[_n_u16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "uint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MUL"
+      ],
+      [
+        "MOVPRFX",
+        "MUL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmul[_n_u16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "uint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]|Ztied2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MUL"
+      ],
+      [
+        "MUL"
+      ],
+      [
+        "MUL"
+      ],
+      [
+        "MOVPRFX",
+        "MUL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmul[_n_u16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "uint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "MUL"
+      ],
+      [
+        "MOVPRFX",
+        "MUL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmul[_n_u32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MUL"
+      ],
+      [
+        "MOVPRFX",
+        "MUL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmul[_n_u32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]|Ztied2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MUL"
+      ],
+      [
+        "MUL"
+      ],
+      [
+        "MUL"
+      ],
+      [
+        "MOVPRFX",
+        "MUL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmul[_n_u32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "MUL"
+      ],
+      [
+        "MOVPRFX",
+        "MUL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmul[_n_u64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MUL"
+      ],
+      [
+        "MOVPRFX",
+        "MUL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmul[_n_u64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]|Ztied2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MUL"
+      ],
+      [
+        "MUL"
+      ],
+      [
+        "MUL"
+      ],
+      [
+        "MOVPRFX",
+        "MUL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmul[_n_u64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "MUL"
+      ],
+      [
+        "MOVPRFX",
+        "MUL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmul[_n_u8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "uint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MUL"
+      ],
+      [
+        "MOVPRFX",
+        "MUL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmul[_n_u8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "uint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]|Ztied2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MUL"
+      ],
+      [
+        "MUL"
+      ],
+      [
+        "MUL"
+      ],
+      [
+        "MOVPRFX",
+        "MUL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmul[_n_u8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "uint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "MUL"
+      ],
+      [
+        "MOVPRFX",
+        "MUL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmul[_s16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MUL"
+      ],
+      [
+        "MOVPRFX",
+        "MUL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmul[_s16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H|Ztied2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MUL"
+      ],
+      [
+        "MUL"
+      ],
+      [
+        "MOVPRFX",
+        "MUL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmul[_s16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "MUL"
+      ],
+      [
+        "MOVPRFX",
+        "MUL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmul[_s32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MUL"
+      ],
+      [
+        "MOVPRFX",
+        "MUL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmul[_s32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MUL"
+      ],
+      [
+        "MUL"
+      ],
+      [
+        "MOVPRFX",
+        "MUL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmul[_s32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "MUL"
+      ],
+      [
+        "MOVPRFX",
+        "MUL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmul[_s64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MUL"
+      ],
+      [
+        "MOVPRFX",
+        "MUL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmul[_s64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MUL"
+      ],
+      [
+        "MUL"
+      ],
+      [
+        "MOVPRFX",
+        "MUL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmul[_s64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "MUL"
+      ],
+      [
+        "MOVPRFX",
+        "MUL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmul[_s8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MUL"
+      ],
+      [
+        "MOVPRFX",
+        "MUL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmul[_s8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B|Ztied2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MUL"
+      ],
+      [
+        "MUL"
+      ],
+      [
+        "MOVPRFX",
+        "MUL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmul[_s8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "MUL"
+      ],
+      [
+        "MOVPRFX",
+        "MUL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmul[_u16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MUL"
+      ],
+      [
+        "MOVPRFX",
+        "MUL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmul[_u16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H|Ztied2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MUL"
+      ],
+      [
+        "MUL"
+      ],
+      [
+        "MOVPRFX",
+        "MUL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmul[_u16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "MUL"
+      ],
+      [
+        "MOVPRFX",
+        "MUL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmul[_u32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MUL"
+      ],
+      [
+        "MOVPRFX",
+        "MUL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmul[_u32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MUL"
+      ],
+      [
+        "MUL"
+      ],
+      [
+        "MOVPRFX",
+        "MUL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmul[_u32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "MUL"
+      ],
+      [
+        "MOVPRFX",
+        "MUL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmul[_u64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MUL"
+      ],
+      [
+        "MOVPRFX",
+        "MUL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmul[_u64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MUL"
+      ],
+      [
+        "MUL"
+      ],
+      [
+        "MOVPRFX",
+        "MUL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmul[_u64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "MUL"
+      ],
+      [
+        "MOVPRFX",
+        "MUL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmul[_u8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MUL"
+      ],
+      [
+        "MOVPRFX",
+        "MUL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmul[_u8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B|Ztied2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MUL"
+      ],
+      [
+        "MUL"
+      ],
+      [
+        "MOVPRFX",
+        "MUL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmul[_u8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "MUL"
+      ],
+      [
+        "MOVPRFX",
+        "MUL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmul_lane[_f16]",
+    "arguments": [
+      "svfloat16_t op1",
+      "svfloat16_t op2",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "imm_index": {
+        "minimum": 0,
+        "maximum": 7
+      },
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMUL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmul_lane[_f32]",
+    "arguments": [
+      "svfloat32_t op1",
+      "svfloat32_t op2",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "imm_index": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMUL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmul_lane[_f64]",
+    "arguments": [
+      "svfloat64_t op1",
+      "svfloat64_t op2",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "imm_index": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMUL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmul_lane[_s16]",
+    "arguments": [
+      "svint16_t op1",
+      "svint16_t op2",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "imm_index": {
+        "minimum": 0,
+        "maximum": 7
+      },
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MUL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmul_lane[_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "svint32_t op2",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "imm_index": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MUL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmul_lane[_s64]",
+    "arguments": [
+      "svint64_t op1",
+      "svint64_t op2",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "imm_index": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MUL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmul_lane[_u16]",
+    "arguments": [
+      "svuint16_t op1",
+      "svuint16_t op2",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "imm_index": {
+        "minimum": 0,
+        "maximum": 7
+      },
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MUL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmul_lane[_u32]",
+    "arguments": [
+      "svuint32_t op1",
+      "svuint32_t op2",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "imm_index": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MUL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmul_lane[_u64]",
+    "arguments": [
+      "svuint64_t op1",
+      "svuint64_t op2",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "imm_index": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MUL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmulh[_n_s16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "int16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMULH"
+      ],
+      [
+        "MOVPRFX",
+        "SMULH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmulh[_n_s16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "int16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]|Ztied2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMULH"
+      ],
+      [
+        "SMULH"
+      ],
+      [
+        "MOVPRFX",
+        "SMULH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmulh[_n_s16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "int16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SMULH"
+      ],
+      [
+        "MOVPRFX",
+        "SMULH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmulh[_n_s32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMULH"
+      ],
+      [
+        "MOVPRFX",
+        "SMULH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmulh[_n_s32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]|Ztied2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMULH"
+      ],
+      [
+        "SMULH"
+      ],
+      [
+        "MOVPRFX",
+        "SMULH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmulh[_n_s32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SMULH"
+      ],
+      [
+        "MOVPRFX",
+        "SMULH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmulh[_n_s64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMULH"
+      ],
+      [
+        "MOVPRFX",
+        "SMULH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmulh[_n_s64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]|Ztied2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMULH"
+      ],
+      [
+        "SMULH"
+      ],
+      [
+        "MOVPRFX",
+        "SMULH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmulh[_n_s64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SMULH"
+      ],
+      [
+        "MOVPRFX",
+        "SMULH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmulh[_n_s8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "int8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMULH"
+      ],
+      [
+        "MOVPRFX",
+        "SMULH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmulh[_n_s8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "int8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]|Ztied2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMULH"
+      ],
+      [
+        "SMULH"
+      ],
+      [
+        "MOVPRFX",
+        "SMULH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmulh[_n_s8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "int8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SMULH"
+      ],
+      [
+        "MOVPRFX",
+        "SMULH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmulh[_n_u16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "uint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMULH"
+      ],
+      [
+        "MOVPRFX",
+        "UMULH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmulh[_n_u16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "uint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]|Ztied2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMULH"
+      ],
+      [
+        "UMULH"
+      ],
+      [
+        "MOVPRFX",
+        "UMULH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmulh[_n_u16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "uint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UMULH"
+      ],
+      [
+        "MOVPRFX",
+        "UMULH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmulh[_n_u32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMULH"
+      ],
+      [
+        "MOVPRFX",
+        "UMULH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmulh[_n_u32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]|Ztied2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMULH"
+      ],
+      [
+        "UMULH"
+      ],
+      [
+        "MOVPRFX",
+        "UMULH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmulh[_n_u32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UMULH"
+      ],
+      [
+        "MOVPRFX",
+        "UMULH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmulh[_n_u64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMULH"
+      ],
+      [
+        "MOVPRFX",
+        "UMULH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmulh[_n_u64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]|Ztied2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMULH"
+      ],
+      [
+        "UMULH"
+      ],
+      [
+        "MOVPRFX",
+        "UMULH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmulh[_n_u64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UMULH"
+      ],
+      [
+        "MOVPRFX",
+        "UMULH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmulh[_n_u8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "uint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMULH"
+      ],
+      [
+        "MOVPRFX",
+        "UMULH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmulh[_n_u8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "uint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]|Ztied2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMULH"
+      ],
+      [
+        "UMULH"
+      ],
+      [
+        "MOVPRFX",
+        "UMULH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmulh[_n_u8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "uint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UMULH"
+      ],
+      [
+        "MOVPRFX",
+        "UMULH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmulh[_s16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMULH"
+      ],
+      [
+        "MOVPRFX",
+        "SMULH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmulh[_s16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H|Ztied2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMULH"
+      ],
+      [
+        "SMULH"
+      ],
+      [
+        "MOVPRFX",
+        "SMULH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmulh[_s16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SMULH"
+      ],
+      [
+        "MOVPRFX",
+        "SMULH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmulh[_s32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMULH"
+      ],
+      [
+        "MOVPRFX",
+        "SMULH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmulh[_s32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMULH"
+      ],
+      [
+        "SMULH"
+      ],
+      [
+        "MOVPRFX",
+        "SMULH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmulh[_s32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SMULH"
+      ],
+      [
+        "MOVPRFX",
+        "SMULH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmulh[_s64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMULH"
+      ],
+      [
+        "MOVPRFX",
+        "SMULH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmulh[_s64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMULH"
+      ],
+      [
+        "SMULH"
+      ],
+      [
+        "MOVPRFX",
+        "SMULH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmulh[_s64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SMULH"
+      ],
+      [
+        "MOVPRFX",
+        "SMULH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmulh[_s8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMULH"
+      ],
+      [
+        "MOVPRFX",
+        "SMULH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmulh[_s8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B|Ztied2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMULH"
+      ],
+      [
+        "SMULH"
+      ],
+      [
+        "MOVPRFX",
+        "SMULH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmulh[_s8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SMULH"
+      ],
+      [
+        "MOVPRFX",
+        "SMULH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmulh[_u16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMULH"
+      ],
+      [
+        "MOVPRFX",
+        "UMULH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmulh[_u16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H|Ztied2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMULH"
+      ],
+      [
+        "UMULH"
+      ],
+      [
+        "MOVPRFX",
+        "UMULH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmulh[_u16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UMULH"
+      ],
+      [
+        "MOVPRFX",
+        "UMULH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmulh[_u32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMULH"
+      ],
+      [
+        "MOVPRFX",
+        "UMULH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmulh[_u32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMULH"
+      ],
+      [
+        "UMULH"
+      ],
+      [
+        "MOVPRFX",
+        "UMULH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmulh[_u32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UMULH"
+      ],
+      [
+        "MOVPRFX",
+        "UMULH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmulh[_u64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMULH"
+      ],
+      [
+        "MOVPRFX",
+        "UMULH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmulh[_u64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMULH"
+      ],
+      [
+        "UMULH"
+      ],
+      [
+        "MOVPRFX",
+        "UMULH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmulh[_u64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UMULH"
+      ],
+      [
+        "MOVPRFX",
+        "UMULH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmulh[_u8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMULH"
+      ],
+      [
+        "MOVPRFX",
+        "UMULH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmulh[_u8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B|Ztied2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMULH"
+      ],
+      [
+        "UMULH"
+      ],
+      [
+        "MOVPRFX",
+        "UMULH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmulh[_u8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UMULH"
+      ],
+      [
+        "MOVPRFX",
+        "UMULH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmullb[_n_s16]",
+    "arguments": [
+      "svint8_t op1",
+      "int8_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMULLB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmullb[_n_s32]",
+    "arguments": [
+      "svint16_t op1",
+      "int16_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMULLB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmullb[_n_s64]",
+    "arguments": [
+      "svint32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMULLB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmullb[_n_u16]",
+    "arguments": [
+      "svuint8_t op1",
+      "uint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMULLB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmullb[_n_u32]",
+    "arguments": [
+      "svuint16_t op1",
+      "uint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMULLB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmullb[_n_u64]",
+    "arguments": [
+      "svuint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMULLB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmullb[_s16]",
+    "arguments": [
+      "svint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMULLB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmullb[_s32]",
+    "arguments": [
+      "svint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMULLB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmullb[_s64]",
+    "arguments": [
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMULLB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmullb[_u16]",
+    "arguments": [
+      "svuint8_t op1",
+      "svuint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMULLB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmullb[_u32]",
+    "arguments": [
+      "svuint16_t op1",
+      "svuint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMULLB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmullb[_u64]",
+    "arguments": [
+      "svuint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMULLB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmullb_lane[_s32]",
+    "arguments": [
+      "svint16_t op1",
+      "svint16_t op2",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "imm_index": {
+        "minimum": 0,
+        "maximum": 7
+      },
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMULLB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmullb_lane[_s64]",
+    "arguments": [
+      "svint32_t op1",
+      "svint32_t op2",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "imm_index": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMULLB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmullb_lane[_u32]",
+    "arguments": [
+      "svuint16_t op1",
+      "svuint16_t op2",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "imm_index": {
+        "minimum": 0,
+        "maximum": 7
+      },
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMULLB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmullb_lane[_u64]",
+    "arguments": [
+      "svuint32_t op1",
+      "svuint32_t op2",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "imm_index": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMULLB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmullt[_n_s16]",
+    "arguments": [
+      "svint8_t op1",
+      "int8_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMULLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmullt[_n_s32]",
+    "arguments": [
+      "svint16_t op1",
+      "int16_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMULLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmullt[_n_s64]",
+    "arguments": [
+      "svint32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMULLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmullt[_n_u16]",
+    "arguments": [
+      "svuint8_t op1",
+      "uint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMULLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmullt[_n_u32]",
+    "arguments": [
+      "svuint16_t op1",
+      "uint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMULLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmullt[_n_u64]",
+    "arguments": [
+      "svuint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMULLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmullt[_s16]",
+    "arguments": [
+      "svint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMULLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmullt[_s32]",
+    "arguments": [
+      "svint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMULLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmullt[_s64]",
+    "arguments": [
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMULLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmullt[_u16]",
+    "arguments": [
+      "svuint8_t op1",
+      "svuint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMULLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmullt[_u32]",
+    "arguments": [
+      "svuint16_t op1",
+      "svuint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMULLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmullt[_u64]",
+    "arguments": [
+      "svuint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMULLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmullt_lane[_s32]",
+    "arguments": [
+      "svint16_t op1",
+      "svint16_t op2",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "imm_index": {
+        "minimum": 0,
+        "maximum": 7
+      },
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMULLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmullt_lane[_s64]",
+    "arguments": [
+      "svint32_t op1",
+      "svint32_t op2",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "imm_index": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SMULLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmullt_lane[_u32]",
+    "arguments": [
+      "svuint16_t op1",
+      "svuint16_t op2",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "imm_index": {
+        "minimum": 0,
+        "maximum": 7
+      },
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMULLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svmullt_lane[_u64]",
+    "arguments": [
+      "svuint32_t op1",
+      "svuint32_t op2",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "imm_index": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UMULLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmulx[_f16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMULX"
+      ],
+      [
+        "MOVPRFX",
+        "FMULX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmulx[_f16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H|Ztied2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMULX"
+      ],
+      [
+        "FMULX"
+      ],
+      [
+        "MOVPRFX",
+        "FMULX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmulx[_f16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FMULX"
+      ],
+      [
+        "MOVPRFX",
+        "FMULX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmulx[_f32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMULX"
+      ],
+      [
+        "MOVPRFX",
+        "FMULX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmulx[_f32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMULX"
+      ],
+      [
+        "FMULX"
+      ],
+      [
+        "MOVPRFX",
+        "FMULX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmulx[_f32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FMULX"
+      ],
+      [
+        "MOVPRFX",
+        "FMULX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmulx[_f64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMULX"
+      ],
+      [
+        "MOVPRFX",
+        "FMULX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmulx[_f64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMULX"
+      ],
+      [
+        "FMULX"
+      ],
+      [
+        "MOVPRFX",
+        "FMULX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmulx[_f64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FMULX"
+      ],
+      [
+        "MOVPRFX",
+        "FMULX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmulx[_n_f16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "float16_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMULX"
+      ],
+      [
+        "MOVPRFX",
+        "FMULX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmulx[_n_f16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "float16_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]|Ztied2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMULX"
+      ],
+      [
+        "FMULX"
+      ],
+      [
+        "MOVPRFX",
+        "FMULX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmulx[_n_f16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "float16_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FMULX"
+      ],
+      [
+        "MOVPRFX",
+        "FMULX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmulx[_n_f32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "float32_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMULX"
+      ],
+      [
+        "MOVPRFX",
+        "FMULX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmulx[_n_f32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "float32_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]|Ztied2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMULX"
+      ],
+      [
+        "FMULX"
+      ],
+      [
+        "MOVPRFX",
+        "FMULX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmulx[_n_f32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "float32_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FMULX"
+      ],
+      [
+        "MOVPRFX",
+        "FMULX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmulx[_n_f64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "float64_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMULX"
+      ],
+      [
+        "MOVPRFX",
+        "FMULX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmulx[_n_f64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "float64_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]|Ztied2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMULX"
+      ],
+      [
+        "FMULX"
+      ],
+      [
+        "MOVPRFX",
+        "FMULX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svmulx[_n_f64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "float64_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FMULX"
+      ],
+      [
+        "MOVPRFX",
+        "FMULX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnand[_b]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svbool_t op1",
+      "svbool_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Pop1.B"
+      },
+      "op2": {
+        "register": "Pop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "NAND"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svnbsl[_n_s16]",
+    "arguments": [
+      "svint16_t op1",
+      "svint16_t op2",
+      "int16_t op3"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "NBSL"
+      ],
+      [
+        "MOVPRFX",
+        "NBSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svnbsl[_n_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "svint32_t op2",
+      "int32_t op3"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "NBSL"
+      ],
+      [
+        "MOVPRFX",
+        "NBSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svnbsl[_n_s64]",
+    "arguments": [
+      "svint64_t op1",
+      "svint64_t op2",
+      "int64_t op3"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "NBSL"
+      ],
+      [
+        "MOVPRFX",
+        "NBSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svnbsl[_n_s8]",
+    "arguments": [
+      "svint8_t op1",
+      "svint8_t op2",
+      "int8_t op3"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "NBSL"
+      ],
+      [
+        "MOVPRFX",
+        "NBSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svnbsl[_n_u16]",
+    "arguments": [
+      "svuint16_t op1",
+      "svuint16_t op2",
+      "uint16_t op3"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "NBSL"
+      ],
+      [
+        "MOVPRFX",
+        "NBSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svnbsl[_n_u32]",
+    "arguments": [
+      "svuint32_t op1",
+      "svuint32_t op2",
+      "uint32_t op3"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "NBSL"
+      ],
+      [
+        "MOVPRFX",
+        "NBSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svnbsl[_n_u64]",
+    "arguments": [
+      "svuint64_t op1",
+      "svuint64_t op2",
+      "uint64_t op3"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "NBSL"
+      ],
+      [
+        "MOVPRFX",
+        "NBSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svnbsl[_n_u8]",
+    "arguments": [
+      "svuint8_t op1",
+      "svuint8_t op2",
+      "uint8_t op3"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "NBSL"
+      ],
+      [
+        "MOVPRFX",
+        "NBSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svnbsl[_s16]",
+    "arguments": [
+      "svint16_t op1",
+      "svint16_t op2",
+      "svint16_t op3"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "NBSL"
+      ],
+      [
+        "MOVPRFX",
+        "NBSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svnbsl[_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "svint32_t op2",
+      "svint32_t op3"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "NBSL"
+      ],
+      [
+        "MOVPRFX",
+        "NBSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svnbsl[_s64]",
+    "arguments": [
+      "svint64_t op1",
+      "svint64_t op2",
+      "svint64_t op3"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "NBSL"
+      ],
+      [
+        "MOVPRFX",
+        "NBSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svnbsl[_s8]",
+    "arguments": [
+      "svint8_t op1",
+      "svint8_t op2",
+      "svint8_t op3"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "NBSL"
+      ],
+      [
+        "MOVPRFX",
+        "NBSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svnbsl[_u16]",
+    "arguments": [
+      "svuint16_t op1",
+      "svuint16_t op2",
+      "svuint16_t op3"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "NBSL"
+      ],
+      [
+        "MOVPRFX",
+        "NBSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svnbsl[_u32]",
+    "arguments": [
+      "svuint32_t op1",
+      "svuint32_t op2",
+      "svuint32_t op3"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "NBSL"
+      ],
+      [
+        "MOVPRFX",
+        "NBSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svnbsl[_u64]",
+    "arguments": [
+      "svuint64_t op1",
+      "svuint64_t op2",
+      "svuint64_t op3"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "NBSL"
+      ],
+      [
+        "MOVPRFX",
+        "NBSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svnbsl[_u8]",
+    "arguments": [
+      "svuint8_t op1",
+      "svuint8_t op2",
+      "svuint8_t op3"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "NBSL"
+      ],
+      [
+        "MOVPRFX",
+        "NBSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svneg[_f16]_m",
+    "arguments": [
+      "svfloat16_t inactive",
+      "svbool_t pg",
+      "svfloat16_t op"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.H|Ztied.H"
+      },
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FNEG"
+      ],
+      [
+        "MOVPRFX",
+        "FNEG"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svneg[_f16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H|Ztied.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FNEG"
+      ],
+      [
+        "MOVPRFX",
+        "FNEG"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svneg[_f16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FNEG"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svneg[_f32]_m",
+    "arguments": [
+      "svfloat32_t inactive",
+      "svbool_t pg",
+      "svfloat32_t op"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.S|Ztied.S"
+      },
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FNEG"
+      ],
+      [
+        "MOVPRFX",
+        "FNEG"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svneg[_f32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S|Ztied.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FNEG"
+      ],
+      [
+        "MOVPRFX",
+        "FNEG"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svneg[_f32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FNEG"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svneg[_f64]_m",
+    "arguments": [
+      "svfloat64_t inactive",
+      "svbool_t pg",
+      "svfloat64_t op"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.D|Ztied.D"
+      },
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FNEG"
+      ],
+      [
+        "MOVPRFX",
+        "FNEG"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svneg[_f64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D|Ztied.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FNEG"
+      ],
+      [
+        "MOVPRFX",
+        "FNEG"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svneg[_f64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FNEG"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svneg[_s16]_m",
+    "arguments": [
+      "svint16_t inactive",
+      "svbool_t pg",
+      "svint16_t op"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.H|Ztied.H"
+      },
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "NEG"
+      ],
+      [
+        "MOVPRFX",
+        "NEG"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svneg[_s16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H|Ztied.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "NEG"
+      ],
+      [
+        "MOVPRFX",
+        "NEG"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svneg[_s16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "NEG"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svneg[_s32]_m",
+    "arguments": [
+      "svint32_t inactive",
+      "svbool_t pg",
+      "svint32_t op"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.S|Ztied.S"
+      },
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "NEG"
+      ],
+      [
+        "MOVPRFX",
+        "NEG"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svneg[_s32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S|Ztied.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "NEG"
+      ],
+      [
+        "MOVPRFX",
+        "NEG"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svneg[_s32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "NEG"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svneg[_s64]_m",
+    "arguments": [
+      "svint64_t inactive",
+      "svbool_t pg",
+      "svint64_t op"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.D|Ztied.D"
+      },
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "NEG"
+      ],
+      [
+        "MOVPRFX",
+        "NEG"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svneg[_s64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D|Ztied.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "NEG"
+      ],
+      [
+        "MOVPRFX",
+        "NEG"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svneg[_s64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "NEG"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svneg[_s8]_m",
+    "arguments": [
+      "svint8_t inactive",
+      "svbool_t pg",
+      "svint8_t op"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.B|Ztied.B"
+      },
+      "op": {
+        "register": "Zop.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "NEG"
+      ],
+      [
+        "MOVPRFX",
+        "NEG"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svneg[_s8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.B|Ztied.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "NEG"
+      ],
+      [
+        "MOVPRFX",
+        "NEG"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svneg[_s8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "NEG"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnmad[_f16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2",
+      "svfloat16_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FNMAD"
+      ],
+      [
+        "MOVPRFX",
+        "FNMAD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnmad[_f16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2",
+      "svfloat16_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H|Ztied2.H"
+      },
+      "op3": {
+        "register": "Zop3.H|Ztied3.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FNMAD"
+      ],
+      [
+        "FNMAD"
+      ],
+      [
+        "FNMLA"
+      ],
+      [
+        "MOVPRFX",
+        "FNMAD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnmad[_f16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2",
+      "svfloat16_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FNMAD"
+      ],
+      [
+        "MOVPRFX",
+        "FNMAD"
+      ],
+      [
+        "MOVPRFX",
+        "FNMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnmad[_f32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2",
+      "svfloat32_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FNMAD"
+      ],
+      [
+        "MOVPRFX",
+        "FNMAD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnmad[_f32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2",
+      "svfloat32_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "op3": {
+        "register": "Zop3.S|Ztied3.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FNMAD"
+      ],
+      [
+        "FNMAD"
+      ],
+      [
+        "FNMLA"
+      ],
+      [
+        "MOVPRFX",
+        "FNMAD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnmad[_f32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2",
+      "svfloat32_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FNMAD"
+      ],
+      [
+        "MOVPRFX",
+        "FNMAD"
+      ],
+      [
+        "MOVPRFX",
+        "FNMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnmad[_f64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2",
+      "svfloat64_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FNMAD"
+      ],
+      [
+        "MOVPRFX",
+        "FNMAD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnmad[_f64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2",
+      "svfloat64_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "op3": {
+        "register": "Zop3.D|Ztied3.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FNMAD"
+      ],
+      [
+        "FNMAD"
+      ],
+      [
+        "FNMLA"
+      ],
+      [
+        "MOVPRFX",
+        "FNMAD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnmad[_f64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2",
+      "svfloat64_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FNMAD"
+      ],
+      [
+        "MOVPRFX",
+        "FNMAD"
+      ],
+      [
+        "MOVPRFX",
+        "FNMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnmad[_n_f16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2",
+      "float16_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FNMAD"
+      ],
+      [
+        "MOVPRFX",
+        "FNMAD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnmad[_n_f16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2",
+      "float16_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H|Ztied2.H"
+      },
+      "op3": {
+        "register": "Zop3.H[*]|Ztied3.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FNMAD"
+      ],
+      [
+        "FNMAD"
+      ],
+      [
+        "FNMLA"
+      ],
+      [
+        "MOVPRFX",
+        "FNMAD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnmad[_n_f16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2",
+      "float16_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FNMAD"
+      ],
+      [
+        "MOVPRFX",
+        "FNMAD"
+      ],
+      [
+        "MOVPRFX",
+        "FNMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnmad[_n_f32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2",
+      "float32_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FNMAD"
+      ],
+      [
+        "MOVPRFX",
+        "FNMAD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnmad[_n_f32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2",
+      "float32_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "op3": {
+        "register": "Zop3.S[*]|Ztied3.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FNMAD"
+      ],
+      [
+        "FNMAD"
+      ],
+      [
+        "FNMLA"
+      ],
+      [
+        "MOVPRFX",
+        "FNMAD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnmad[_n_f32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2",
+      "float32_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FNMAD"
+      ],
+      [
+        "MOVPRFX",
+        "FNMAD"
+      ],
+      [
+        "MOVPRFX",
+        "FNMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnmad[_n_f64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2",
+      "float64_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FNMAD"
+      ],
+      [
+        "MOVPRFX",
+        "FNMAD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnmad[_n_f64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2",
+      "float64_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "op3": {
+        "register": "Zop3.D[*]|Ztied3.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FNMAD"
+      ],
+      [
+        "FNMAD"
+      ],
+      [
+        "FNMLA"
+      ],
+      [
+        "MOVPRFX",
+        "FNMAD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnmad[_n_f64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2",
+      "float64_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FNMAD"
+      ],
+      [
+        "MOVPRFX",
+        "FNMAD"
+      ],
+      [
+        "MOVPRFX",
+        "FNMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svnmatch[_s16]",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "NMATCH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svnmatch[_s8]",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "NMATCH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svnmatch[_u16]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "NMATCH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svnmatch[_u8]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "NMATCH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnmla[_f16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2",
+      "svfloat16_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FNMLA"
+      ],
+      [
+        "MOVPRFX",
+        "FNMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnmla[_f16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2",
+      "svfloat16_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H|Ztied2.H"
+      },
+      "op3": {
+        "register": "Zop3.H|Ztied3.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FNMLA"
+      ],
+      [
+        "FNMAD"
+      ],
+      [
+        "FNMAD"
+      ],
+      [
+        "MOVPRFX",
+        "FNMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnmla[_f16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2",
+      "svfloat16_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FNMLA"
+      ],
+      [
+        "MOVPRFX",
+        "FNMAD"
+      ],
+      [
+        "MOVPRFX",
+        "FNMAD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnmla[_f32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2",
+      "svfloat32_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FNMLA"
+      ],
+      [
+        "MOVPRFX",
+        "FNMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnmla[_f32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2",
+      "svfloat32_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "op3": {
+        "register": "Zop3.S|Ztied3.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FNMLA"
+      ],
+      [
+        "FNMAD"
+      ],
+      [
+        "FNMAD"
+      ],
+      [
+        "MOVPRFX",
+        "FNMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnmla[_f32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2",
+      "svfloat32_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FNMLA"
+      ],
+      [
+        "MOVPRFX",
+        "FNMAD"
+      ],
+      [
+        "MOVPRFX",
+        "FNMAD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnmla[_f64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2",
+      "svfloat64_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FNMLA"
+      ],
+      [
+        "MOVPRFX",
+        "FNMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnmla[_f64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2",
+      "svfloat64_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "op3": {
+        "register": "Zop3.D|Ztied3.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FNMLA"
+      ],
+      [
+        "FNMAD"
+      ],
+      [
+        "FNMAD"
+      ],
+      [
+        "MOVPRFX",
+        "FNMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnmla[_f64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2",
+      "svfloat64_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FNMLA"
+      ],
+      [
+        "MOVPRFX",
+        "FNMAD"
+      ],
+      [
+        "MOVPRFX",
+        "FNMAD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnmla[_n_f16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2",
+      "float16_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FNMLA"
+      ],
+      [
+        "MOVPRFX",
+        "FNMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnmla[_n_f16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2",
+      "float16_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H|Ztied2.H"
+      },
+      "op3": {
+        "register": "Zop3.H[*]|Ztied3.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FNMLA"
+      ],
+      [
+        "FNMAD"
+      ],
+      [
+        "FNMAD"
+      ],
+      [
+        "MOVPRFX",
+        "FNMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnmla[_n_f16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2",
+      "float16_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FNMLA"
+      ],
+      [
+        "MOVPRFX",
+        "FNMAD"
+      ],
+      [
+        "MOVPRFX",
+        "FNMAD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnmla[_n_f32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2",
+      "float32_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FNMLA"
+      ],
+      [
+        "MOVPRFX",
+        "FNMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnmla[_n_f32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2",
+      "float32_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "op3": {
+        "register": "Zop3.S[*]|Ztied3.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FNMLA"
+      ],
+      [
+        "FNMAD"
+      ],
+      [
+        "FNMAD"
+      ],
+      [
+        "MOVPRFX",
+        "FNMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnmla[_n_f32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2",
+      "float32_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FNMLA"
+      ],
+      [
+        "MOVPRFX",
+        "FNMAD"
+      ],
+      [
+        "MOVPRFX",
+        "FNMAD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnmla[_n_f64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2",
+      "float64_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FNMLA"
+      ],
+      [
+        "MOVPRFX",
+        "FNMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnmla[_n_f64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2",
+      "float64_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "op3": {
+        "register": "Zop3.D[*]|Ztied3.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FNMLA"
+      ],
+      [
+        "FNMAD"
+      ],
+      [
+        "FNMAD"
+      ],
+      [
+        "MOVPRFX",
+        "FNMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnmla[_n_f64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2",
+      "float64_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FNMLA"
+      ],
+      [
+        "MOVPRFX",
+        "FNMAD"
+      ],
+      [
+        "MOVPRFX",
+        "FNMAD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnmls[_f16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2",
+      "svfloat16_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FNMLS"
+      ],
+      [
+        "MOVPRFX",
+        "FNMLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnmls[_f16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2",
+      "svfloat16_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H|Ztied2.H"
+      },
+      "op3": {
+        "register": "Zop3.H|Ztied3.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FNMLS"
+      ],
+      [
+        "FNMSB"
+      ],
+      [
+        "FNMSB"
+      ],
+      [
+        "MOVPRFX",
+        "FNMLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnmls[_f16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2",
+      "svfloat16_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FNMLS"
+      ],
+      [
+        "MOVPRFX",
+        "FNMSB"
+      ],
+      [
+        "MOVPRFX",
+        "FNMSB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnmls[_f32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2",
+      "svfloat32_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FNMLS"
+      ],
+      [
+        "MOVPRFX",
+        "FNMLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnmls[_f32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2",
+      "svfloat32_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "op3": {
+        "register": "Zop3.S|Ztied3.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FNMLS"
+      ],
+      [
+        "FNMSB"
+      ],
+      [
+        "FNMSB"
+      ],
+      [
+        "MOVPRFX",
+        "FNMLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnmls[_f32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2",
+      "svfloat32_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FNMLS"
+      ],
+      [
+        "MOVPRFX",
+        "FNMSB"
+      ],
+      [
+        "MOVPRFX",
+        "FNMSB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnmls[_f64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2",
+      "svfloat64_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FNMLS"
+      ],
+      [
+        "MOVPRFX",
+        "FNMLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnmls[_f64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2",
+      "svfloat64_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "op3": {
+        "register": "Zop3.D|Ztied3.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FNMLS"
+      ],
+      [
+        "FNMSB"
+      ],
+      [
+        "FNMSB"
+      ],
+      [
+        "MOVPRFX",
+        "FNMLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnmls[_f64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2",
+      "svfloat64_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FNMLS"
+      ],
+      [
+        "MOVPRFX",
+        "FNMSB"
+      ],
+      [
+        "MOVPRFX",
+        "FNMSB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnmls[_n_f16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2",
+      "float16_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FNMLS"
+      ],
+      [
+        "MOVPRFX",
+        "FNMLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnmls[_n_f16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2",
+      "float16_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H|Ztied2.H"
+      },
+      "op3": {
+        "register": "Zop3.H[*]|Ztied3.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FNMLS"
+      ],
+      [
+        "FNMSB"
+      ],
+      [
+        "FNMSB"
+      ],
+      [
+        "MOVPRFX",
+        "FNMLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnmls[_n_f16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2",
+      "float16_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FNMLS"
+      ],
+      [
+        "MOVPRFX",
+        "FNMSB"
+      ],
+      [
+        "MOVPRFX",
+        "FNMSB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnmls[_n_f32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2",
+      "float32_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FNMLS"
+      ],
+      [
+        "MOVPRFX",
+        "FNMLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnmls[_n_f32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2",
+      "float32_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "op3": {
+        "register": "Zop3.S[*]|Ztied3.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FNMLS"
+      ],
+      [
+        "FNMSB"
+      ],
+      [
+        "FNMSB"
+      ],
+      [
+        "MOVPRFX",
+        "FNMLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnmls[_n_f32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2",
+      "float32_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FNMLS"
+      ],
+      [
+        "MOVPRFX",
+        "FNMSB"
+      ],
+      [
+        "MOVPRFX",
+        "FNMSB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnmls[_n_f64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2",
+      "float64_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FNMLS"
+      ],
+      [
+        "MOVPRFX",
+        "FNMLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnmls[_n_f64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2",
+      "float64_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "op3": {
+        "register": "Zop3.D[*]|Ztied3.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FNMLS"
+      ],
+      [
+        "FNMSB"
+      ],
+      [
+        "FNMSB"
+      ],
+      [
+        "MOVPRFX",
+        "FNMLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnmls[_n_f64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2",
+      "float64_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FNMLS"
+      ],
+      [
+        "MOVPRFX",
+        "FNMSB"
+      ],
+      [
+        "MOVPRFX",
+        "FNMSB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnmsb[_f16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2",
+      "svfloat16_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FNMSB"
+      ],
+      [
+        "MOVPRFX",
+        "FNMSB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnmsb[_f16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2",
+      "svfloat16_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H|Ztied2.H"
+      },
+      "op3": {
+        "register": "Zop3.H|Ztied3.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FNMSB"
+      ],
+      [
+        "FNMSB"
+      ],
+      [
+        "FNMLS"
+      ],
+      [
+        "MOVPRFX",
+        "FNMSB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnmsb[_f16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2",
+      "svfloat16_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FNMSB"
+      ],
+      [
+        "MOVPRFX",
+        "FNMSB"
+      ],
+      [
+        "MOVPRFX",
+        "FNMLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnmsb[_f32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2",
+      "svfloat32_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FNMSB"
+      ],
+      [
+        "MOVPRFX",
+        "FNMSB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnmsb[_f32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2",
+      "svfloat32_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "op3": {
+        "register": "Zop3.S|Ztied3.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FNMSB"
+      ],
+      [
+        "FNMSB"
+      ],
+      [
+        "FNMLS"
+      ],
+      [
+        "MOVPRFX",
+        "FNMSB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnmsb[_f32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2",
+      "svfloat32_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FNMSB"
+      ],
+      [
+        "MOVPRFX",
+        "FNMSB"
+      ],
+      [
+        "MOVPRFX",
+        "FNMLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnmsb[_f64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2",
+      "svfloat64_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FNMSB"
+      ],
+      [
+        "MOVPRFX",
+        "FNMSB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnmsb[_f64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2",
+      "svfloat64_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "op3": {
+        "register": "Zop3.D|Ztied3.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FNMSB"
+      ],
+      [
+        "FNMSB"
+      ],
+      [
+        "FNMLS"
+      ],
+      [
+        "MOVPRFX",
+        "FNMSB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnmsb[_f64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2",
+      "svfloat64_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FNMSB"
+      ],
+      [
+        "MOVPRFX",
+        "FNMSB"
+      ],
+      [
+        "MOVPRFX",
+        "FNMLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnmsb[_n_f16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2",
+      "float16_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FNMSB"
+      ],
+      [
+        "MOVPRFX",
+        "FNMSB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnmsb[_n_f16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2",
+      "float16_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H|Ztied2.H"
+      },
+      "op3": {
+        "register": "Zop3.H[*]|Ztied3.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FNMSB"
+      ],
+      [
+        "FNMSB"
+      ],
+      [
+        "FNMLS"
+      ],
+      [
+        "MOVPRFX",
+        "FNMSB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnmsb[_n_f16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2",
+      "float16_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FNMSB"
+      ],
+      [
+        "MOVPRFX",
+        "FNMSB"
+      ],
+      [
+        "MOVPRFX",
+        "FNMLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnmsb[_n_f32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2",
+      "float32_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FNMSB"
+      ],
+      [
+        "MOVPRFX",
+        "FNMSB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnmsb[_n_f32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2",
+      "float32_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "op3": {
+        "register": "Zop3.S[*]|Ztied3.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FNMSB"
+      ],
+      [
+        "FNMSB"
+      ],
+      [
+        "FNMLS"
+      ],
+      [
+        "MOVPRFX",
+        "FNMSB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnmsb[_n_f32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2",
+      "float32_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FNMSB"
+      ],
+      [
+        "MOVPRFX",
+        "FNMSB"
+      ],
+      [
+        "MOVPRFX",
+        "FNMLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnmsb[_n_f64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2",
+      "float64_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FNMSB"
+      ],
+      [
+        "MOVPRFX",
+        "FNMSB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnmsb[_n_f64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2",
+      "float64_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "op3": {
+        "register": "Zop3.D[*]|Ztied3.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FNMSB"
+      ],
+      [
+        "FNMSB"
+      ],
+      [
+        "FNMLS"
+      ],
+      [
+        "MOVPRFX",
+        "FNMSB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnmsb[_n_f64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2",
+      "float64_t op3"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FNMSB"
+      ],
+      [
+        "MOVPRFX",
+        "FNMSB"
+      ],
+      [
+        "MOVPRFX",
+        "FNMLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnor[_b]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svbool_t op1",
+      "svbool_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Pop1.B"
+      },
+      "op2": {
+        "register": "Pop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "NOR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnot[_b]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svbool_t op"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Pop.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EOR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnot[_s16]_m",
+    "arguments": [
+      "svint16_t inactive",
+      "svbool_t pg",
+      "svint16_t op"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.H|Ztied.H"
+      },
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "NOT"
+      ],
+      [
+        "MOVPRFX",
+        "NOT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnot[_s16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H|Ztied.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "NOT"
+      ],
+      [
+        "MOVPRFX",
+        "NOT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnot[_s16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "NOT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnot[_s32]_m",
+    "arguments": [
+      "svint32_t inactive",
+      "svbool_t pg",
+      "svint32_t op"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.S|Ztied.S"
+      },
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "NOT"
+      ],
+      [
+        "MOVPRFX",
+        "NOT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnot[_s32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S|Ztied.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "NOT"
+      ],
+      [
+        "MOVPRFX",
+        "NOT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnot[_s32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "NOT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnot[_s64]_m",
+    "arguments": [
+      "svint64_t inactive",
+      "svbool_t pg",
+      "svint64_t op"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.D|Ztied.D"
+      },
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "NOT"
+      ],
+      [
+        "MOVPRFX",
+        "NOT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnot[_s64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D|Ztied.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "NOT"
+      ],
+      [
+        "MOVPRFX",
+        "NOT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnot[_s64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "NOT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnot[_s8]_m",
+    "arguments": [
+      "svint8_t inactive",
+      "svbool_t pg",
+      "svint8_t op"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.B|Ztied.B"
+      },
+      "op": {
+        "register": "Zop.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "NOT"
+      ],
+      [
+        "MOVPRFX",
+        "NOT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnot[_s8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.B|Ztied.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "NOT"
+      ],
+      [
+        "MOVPRFX",
+        "NOT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnot[_s8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "NOT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnot[_u16]_m",
+    "arguments": [
+      "svuint16_t inactive",
+      "svbool_t pg",
+      "svuint16_t op"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.H|Ztied.H"
+      },
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "NOT"
+      ],
+      [
+        "MOVPRFX",
+        "NOT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnot[_u16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H|Ztied.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "NOT"
+      ],
+      [
+        "MOVPRFX",
+        "NOT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnot[_u16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "NOT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnot[_u32]_m",
+    "arguments": [
+      "svuint32_t inactive",
+      "svbool_t pg",
+      "svuint32_t op"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.S|Ztied.S"
+      },
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "NOT"
+      ],
+      [
+        "MOVPRFX",
+        "NOT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnot[_u32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S|Ztied.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "NOT"
+      ],
+      [
+        "MOVPRFX",
+        "NOT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnot[_u32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "NOT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnot[_u64]_m",
+    "arguments": [
+      "svuint64_t inactive",
+      "svbool_t pg",
+      "svuint64_t op"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.D|Ztied.D"
+      },
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "NOT"
+      ],
+      [
+        "MOVPRFX",
+        "NOT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnot[_u64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D|Ztied.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "NOT"
+      ],
+      [
+        "MOVPRFX",
+        "NOT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnot[_u64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "NOT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnot[_u8]_m",
+    "arguments": [
+      "svuint8_t inactive",
+      "svbool_t pg",
+      "svuint8_t op"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.B|Ztied.B"
+      },
+      "op": {
+        "register": "Zop.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "NOT"
+      ],
+      [
+        "MOVPRFX",
+        "NOT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnot[_u8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.B|Ztied.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "NOT"
+      ],
+      [
+        "MOVPRFX",
+        "NOT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svnot[_u8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "NOT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svorn[_b]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svbool_t op1",
+      "svbool_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Pop1.B"
+      },
+      "op2": {
+        "register": "Pop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ORN"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svorr[_b]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svbool_t op1",
+      "svbool_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Pop1.B"
+      },
+      "op2": {
+        "register": "Pop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ORR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svorr[_n_s16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "int16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ORR"
+      ],
+      [
+        "MOVPRFX",
+        "ORR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svorr[_n_s16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "int16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]|Ztied2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ORR"
+      ],
+      [
+        "ORR"
+      ],
+      [
+        "ORR"
+      ],
+      [
+        "ORR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svorr[_n_s16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "int16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "ORR"
+      ],
+      [
+        "MOVPRFX",
+        "ORR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svorr[_n_s32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ORR"
+      ],
+      [
+        "MOVPRFX",
+        "ORR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svorr[_n_s32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]|Ztied2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ORR"
+      ],
+      [
+        "ORR"
+      ],
+      [
+        "ORR"
+      ],
+      [
+        "ORR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svorr[_n_s32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "ORR"
+      ],
+      [
+        "MOVPRFX",
+        "ORR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svorr[_n_s64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ORR"
+      ],
+      [
+        "MOVPRFX",
+        "ORR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svorr[_n_s64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]|Ztied2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ORR"
+      ],
+      [
+        "ORR"
+      ],
+      [
+        "ORR"
+      ],
+      [
+        "ORR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svorr[_n_s64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "ORR"
+      ],
+      [
+        "MOVPRFX",
+        "ORR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svorr[_n_s8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "int8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ORR"
+      ],
+      [
+        "MOVPRFX",
+        "ORR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svorr[_n_s8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "int8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]|Ztied2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ORR"
+      ],
+      [
+        "ORR"
+      ],
+      [
+        "ORR"
+      ],
+      [
+        "ORR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svorr[_n_s8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "int8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "ORR"
+      ],
+      [
+        "MOVPRFX",
+        "ORR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svorr[_n_u16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "uint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ORR"
+      ],
+      [
+        "MOVPRFX",
+        "ORR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svorr[_n_u16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "uint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]|Ztied2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ORR"
+      ],
+      [
+        "ORR"
+      ],
+      [
+        "ORR"
+      ],
+      [
+        "ORR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svorr[_n_u16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "uint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "ORR"
+      ],
+      [
+        "MOVPRFX",
+        "ORR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svorr[_n_u32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ORR"
+      ],
+      [
+        "MOVPRFX",
+        "ORR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svorr[_n_u32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]|Ztied2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ORR"
+      ],
+      [
+        "ORR"
+      ],
+      [
+        "ORR"
+      ],
+      [
+        "ORR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svorr[_n_u32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "ORR"
+      ],
+      [
+        "MOVPRFX",
+        "ORR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svorr[_n_u64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ORR"
+      ],
+      [
+        "MOVPRFX",
+        "ORR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svorr[_n_u64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]|Ztied2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ORR"
+      ],
+      [
+        "ORR"
+      ],
+      [
+        "ORR"
+      ],
+      [
+        "ORR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svorr[_n_u64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "ORR"
+      ],
+      [
+        "MOVPRFX",
+        "ORR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svorr[_n_u8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "uint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ORR"
+      ],
+      [
+        "MOVPRFX",
+        "ORR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svorr[_n_u8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "uint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]|Ztied2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ORR"
+      ],
+      [
+        "ORR"
+      ],
+      [
+        "ORR"
+      ],
+      [
+        "ORR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svorr[_n_u8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "uint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "ORR"
+      ],
+      [
+        "MOVPRFX",
+        "ORR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svorr[_s16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ORR"
+      ],
+      [
+        "MOVPRFX",
+        "ORR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svorr[_s16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H|Ztied2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ORR"
+      ],
+      [
+        "ORR"
+      ],
+      [
+        "ORR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svorr[_s16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "ORR"
+      ],
+      [
+        "MOVPRFX",
+        "ORR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svorr[_s32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ORR"
+      ],
+      [
+        "MOVPRFX",
+        "ORR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svorr[_s32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ORR"
+      ],
+      [
+        "ORR"
+      ],
+      [
+        "ORR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svorr[_s32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "ORR"
+      ],
+      [
+        "MOVPRFX",
+        "ORR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svorr[_s64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ORR"
+      ],
+      [
+        "MOVPRFX",
+        "ORR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svorr[_s64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ORR"
+      ],
+      [
+        "ORR"
+      ],
+      [
+        "ORR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svorr[_s64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "ORR"
+      ],
+      [
+        "MOVPRFX",
+        "ORR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svorr[_s8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ORR"
+      ],
+      [
+        "MOVPRFX",
+        "ORR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svorr[_s8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B|Ztied2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ORR"
+      ],
+      [
+        "ORR"
+      ],
+      [
+        "ORR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svorr[_s8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "ORR"
+      ],
+      [
+        "MOVPRFX",
+        "ORR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svorr[_u16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ORR"
+      ],
+      [
+        "MOVPRFX",
+        "ORR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svorr[_u16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H|Ztied2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ORR"
+      ],
+      [
+        "ORR"
+      ],
+      [
+        "ORR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svorr[_u16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "ORR"
+      ],
+      [
+        "MOVPRFX",
+        "ORR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svorr[_u32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ORR"
+      ],
+      [
+        "MOVPRFX",
+        "ORR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svorr[_u32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ORR"
+      ],
+      [
+        "ORR"
+      ],
+      [
+        "ORR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svorr[_u32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "ORR"
+      ],
+      [
+        "MOVPRFX",
+        "ORR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svorr[_u64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ORR"
+      ],
+      [
+        "MOVPRFX",
+        "ORR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svorr[_u64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ORR"
+      ],
+      [
+        "ORR"
+      ],
+      [
+        "ORR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svorr[_u64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "ORR"
+      ],
+      [
+        "MOVPRFX",
+        "ORR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svorr[_u8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ORR"
+      ],
+      [
+        "MOVPRFX",
+        "ORR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svorr[_u8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B|Ztied2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ORR"
+      ],
+      [
+        "ORR"
+      ],
+      [
+        "ORR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svorr[_u8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "ORR"
+      ],
+      [
+        "MOVPRFX",
+        "ORR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svorv[_s16]",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op"
+    ],
+    "return_type": {
+      "value": "int16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ORV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svorv[_s32]",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op"
+    ],
+    "return_type": {
+      "value": "int32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ORV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svorv[_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op"
+    ],
+    "return_type": {
+      "value": "int64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ORV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svorv[_s8]",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op"
+    ],
+    "return_type": {
+      "value": "int8_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ORV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svorv[_u16]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op"
+    ],
+    "return_type": {
+      "value": "uint16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ORV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svorv[_u32]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op"
+    ],
+    "return_type": {
+      "value": "uint32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ORV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svorv[_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ORV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svorv[_u8]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op"
+    ],
+    "return_type": {
+      "value": "uint8_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ORV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svpfalse[_b]",
+    "arguments": [],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {},
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "PFALSE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svpfirst[_b]",
+    "arguments": [
+      "svbool_t pg",
+      "svbool_t op"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Ptied.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "PFIRST"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svpmul[_n_u8]",
+    "arguments": [
+      "svuint8_t op1",
+      "uint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "PMUL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svpmul[_u8]",
+    "arguments": [
+      "svuint8_t op1",
+      "svuint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "PMUL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svpmullb[_n_u16]",
+    "arguments": [
+      "svuint8_t op1",
+      "uint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "PMULLB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svpmullb[_n_u64]",
+    "arguments": [
+      "svuint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "PMULLB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svpmullb[_u16]",
+    "arguments": [
+      "svuint8_t op1",
+      "svuint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "PMULLB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svpmullb[_u64]",
+    "arguments": [
+      "svuint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "PMULLB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svpmullb_pair[_n_u32]",
+    "arguments": [
+      "svuint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "PMULLB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svpmullb_pair[_n_u64]",
+    "arguments": [
+      "svuint64_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "PMULLB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svpmullb_pair[_n_u8]",
+    "arguments": [
+      "svuint8_t op1",
+      "uint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "PMULLB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svpmullb_pair[_u32]",
+    "arguments": [
+      "svuint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "PMULLB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svpmullb_pair[_u64]",
+    "arguments": [
+      "svuint64_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "PMULLB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svpmullb_pair[_u8]",
+    "arguments": [
+      "svuint8_t op1",
+      "svuint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "PMULLB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svpmullt[_n_u16]",
+    "arguments": [
+      "svuint8_t op1",
+      "uint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "PMULLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svpmullt[_n_u64]",
+    "arguments": [
+      "svuint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "PMULLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svpmullt[_u16]",
+    "arguments": [
+      "svuint8_t op1",
+      "svuint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "PMULLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svpmullt[_u64]",
+    "arguments": [
+      "svuint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "PMULLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svpmullt_pair[_n_u32]",
+    "arguments": [
+      "svuint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "PMULLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svpmullt_pair[_n_u64]",
+    "arguments": [
+      "svuint64_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "PMULLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svpmullt_pair[_n_u8]",
+    "arguments": [
+      "svuint8_t op1",
+      "uint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "PMULLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svpmullt_pair[_u32]",
+    "arguments": [
+      "svuint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "PMULLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svpmullt_pair[_u64]",
+    "arguments": [
+      "svuint64_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "PMULLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svpmullt_pair[_u8]",
+    "arguments": [
+      "svuint8_t op1",
+      "svuint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "PMULLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svpnext_b16",
+    "arguments": [
+      "svbool_t pg",
+      "svbool_t op"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Ptied.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "PNEXT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svpnext_b32",
+    "arguments": [
+      "svbool_t pg",
+      "svbool_t op"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Ptied.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "PNEXT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svpnext_b64",
+    "arguments": [
+      "svbool_t pg",
+      "svbool_t op"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Ptied.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "PNEXT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svpnext_b8",
+    "arguments": [
+      "svbool_t pg",
+      "svbool_t op"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Ptied.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "PNEXT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svprfb",
+    "arguments": [
+      "svbool_t pg",
+      "const void *base",
+      "enum svprfop op"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "PRFB"
+      ],
+      [
+        "PRFB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svprfb_gather[_u32base]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "enum svprfop op"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "PRFB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svprfb_gather[_u32base]_offset",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "int64_t offset",
+      "enum svprfop op"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "PRFB"
+      ],
+      [
+        "PRFB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svprfb_gather[_u64base]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "enum svprfop op"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "PRFB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svprfb_gather[_u64base]_offset",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t offset",
+      "enum svprfop op"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "PRFB"
+      ],
+      [
+        "PRFB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svprfb_gather_[s32]offset",
+    "arguments": [
+      "svbool_t pg",
+      "const void *base",
+      "svint32_t offsets",
+      "enum svprfop op"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "PRFB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svprfb_gather_[s64]offset",
+    "arguments": [
+      "svbool_t pg",
+      "const void *base",
+      "svint64_t offsets",
+      "enum svprfop op"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "PRFB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svprfb_gather_[u32]offset",
+    "arguments": [
+      "svbool_t pg",
+      "const void *base",
+      "svuint32_t offsets",
+      "enum svprfop op"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "PRFB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svprfb_gather_[u64]offset",
+    "arguments": [
+      "svbool_t pg",
+      "const void *base",
+      "svuint64_t offsets",
+      "enum svprfop op"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "PRFB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svprfb_vnum",
+    "arguments": [
+      "svbool_t pg",
+      "const void *base",
+      "int64_t vnum",
+      "enum svprfop op"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.B"
+      },
+      "vnum * svcntb()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "PRFB"
+      ],
+      [
+        "PRFB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svprfd",
+    "arguments": [
+      "svbool_t pg",
+      "const void *base",
+      "enum svprfop op"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "PRFD"
+      ],
+      [
+        "PRFD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svprfd_gather[_u32base]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "enum svprfop op"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "PRFD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svprfd_gather[_u32base]_index",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "int64_t index",
+      "enum svprfop op"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "index * 8": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "PRFD"
+      ],
+      [
+        "PRFB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svprfd_gather[_u64base]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "enum svprfop op"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "PRFD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svprfd_gather[_u64base]_index",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t index",
+      "enum svprfop op"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "index * 8": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "PRFD"
+      ],
+      [
+        "PRFB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svprfd_gather_[s32]index",
+    "arguments": [
+      "svbool_t pg",
+      "const void *base",
+      "svint32_t indices",
+      "enum svprfop op"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices": {
+        "register": "Zindices.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "PRFD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svprfd_gather_[s64]index",
+    "arguments": [
+      "svbool_t pg",
+      "const void *base",
+      "svint64_t indices",
+      "enum svprfop op"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices": {
+        "register": "Zindices.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "PRFD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svprfd_gather_[u32]index",
+    "arguments": [
+      "svbool_t pg",
+      "const void *base",
+      "svuint32_t indices",
+      "enum svprfop op"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices": {
+        "register": "Zindices.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "PRFD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svprfd_gather_[u64]index",
+    "arguments": [
+      "svbool_t pg",
+      "const void *base",
+      "svuint64_t indices",
+      "enum svprfop op"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices": {
+        "register": "Zindices.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "PRFD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svprfd_vnum",
+    "arguments": [
+      "svbool_t pg",
+      "const void *base",
+      "int64_t vnum",
+      "enum svprfop op"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.B"
+      },
+      "vnum * svcntd()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "PRFD"
+      ],
+      [
+        "PRFD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svprfh",
+    "arguments": [
+      "svbool_t pg",
+      "const void *base",
+      "enum svprfop op"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "PRFH"
+      ],
+      [
+        "PRFH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svprfh_gather[_u32base]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "enum svprfop op"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "PRFH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svprfh_gather[_u32base]_index",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "int64_t index",
+      "enum svprfop op"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "index * 2": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "PRFH"
+      ],
+      [
+        "PRFB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svprfh_gather[_u64base]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "enum svprfop op"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "PRFH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svprfh_gather[_u64base]_index",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t index",
+      "enum svprfop op"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "index * 2": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "PRFH"
+      ],
+      [
+        "PRFB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svprfh_gather_[s32]index",
+    "arguments": [
+      "svbool_t pg",
+      "const void *base",
+      "svint32_t indices",
+      "enum svprfop op"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices": {
+        "register": "Zindices.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "PRFH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svprfh_gather_[s64]index",
+    "arguments": [
+      "svbool_t pg",
+      "const void *base",
+      "svint64_t indices",
+      "enum svprfop op"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices": {
+        "register": "Zindices.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "PRFH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svprfh_gather_[u32]index",
+    "arguments": [
+      "svbool_t pg",
+      "const void *base",
+      "svuint32_t indices",
+      "enum svprfop op"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices": {
+        "register": "Zindices.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "PRFH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svprfh_gather_[u64]index",
+    "arguments": [
+      "svbool_t pg",
+      "const void *base",
+      "svuint64_t indices",
+      "enum svprfop op"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices": {
+        "register": "Zindices.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "PRFH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svprfh_vnum",
+    "arguments": [
+      "svbool_t pg",
+      "const void *base",
+      "int64_t vnum",
+      "enum svprfop op"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.B"
+      },
+      "vnum * svcnth()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "PRFH"
+      ],
+      [
+        "PRFH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svprfw",
+    "arguments": [
+      "svbool_t pg",
+      "const void *base",
+      "enum svprfop op"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "PRFW"
+      ],
+      [
+        "PRFW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svprfw_gather[_u32base]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "enum svprfop op"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "PRFW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svprfw_gather[_u32base]_index",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "int64_t index",
+      "enum svprfop op"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "index * 4": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "PRFW"
+      ],
+      [
+        "PRFB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svprfw_gather[_u64base]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "enum svprfop op"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "PRFW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svprfw_gather[_u64base]_index",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t index",
+      "enum svprfop op"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "index * 4": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "PRFW"
+      ],
+      [
+        "PRFB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svprfw_gather_[s32]index",
+    "arguments": [
+      "svbool_t pg",
+      "const void *base",
+      "svint32_t indices",
+      "enum svprfop op"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices": {
+        "register": "Zindices.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "PRFW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svprfw_gather_[s64]index",
+    "arguments": [
+      "svbool_t pg",
+      "const void *base",
+      "svint64_t indices",
+      "enum svprfop op"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices": {
+        "register": "Zindices.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "PRFW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svprfw_gather_[u32]index",
+    "arguments": [
+      "svbool_t pg",
+      "const void *base",
+      "svuint32_t indices",
+      "enum svprfop op"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices": {
+        "register": "Zindices.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "PRFW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svprfw_gather_[u64]index",
+    "arguments": [
+      "svbool_t pg",
+      "const void *base",
+      "svuint64_t indices",
+      "enum svprfop op"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "indices": {
+        "register": "Zindices.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "PRFW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svprfw_vnum",
+    "arguments": [
+      "svbool_t pg",
+      "const void *base",
+      "int64_t vnum",
+      "enum svprfop op"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "pg": {
+        "register": "Pg.B"
+      },
+      "vnum * svcntw()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "PRFW"
+      ],
+      [
+        "PRFW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svpsel_lane_b16",
+    "arguments": [
+      "svbool_t pn",
+      "svbool_t pm",
+      "uint32_t idx"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "idx": {
+        "index": "[Wreg1, imm1]"
+      },
+      "pm": {
+        "register": "Preg3"
+      },
+      "pn": {
+        "register": "Preg2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "PSEL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svpsel_lane_b32",
+    "arguments": [
+      "svbool_t pn",
+      "svbool_t pm",
+      "uint32_t idx"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "idx": {
+        "index": "[Wreg1, imm1]"
+      },
+      "pm": {
+        "register": "Preg3"
+      },
+      "pn": {
+        "register": "Preg2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "PSEL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svpsel_lane_b64",
+    "arguments": [
+      "svbool_t pn",
+      "svbool_t pm",
+      "uint32_t idx"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "idx": {
+        "index": "[Wreg1, imm1]"
+      },
+      "pm": {
+        "register": "Preg3"
+      },
+      "pn": {
+        "register": "Preg2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "PSEL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svpsel_lane_b8",
+    "arguments": [
+      "svbool_t pn",
+      "svbool_t pm",
+      "uint32_t idx"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "idx": {
+        "index": "[Wreg1, imm1]"
+      },
+      "pm": {
+        "register": "Preg3"
+      },
+      "pn": {
+        "register": "Preg2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "PSEL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svptest_any",
+    "arguments": [
+      "svbool_t pg",
+      "svbool_t op"
+    ],
+    "return_type": {
+      "value": "bool"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svptest_first",
+    "arguments": [
+      "svbool_t pg",
+      "svbool_t op"
+    ],
+    "return_type": {
+      "value": "bool"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svptest_last",
+    "arguments": [
+      "svbool_t pg",
+      "svbool_t op"
+    ],
+    "return_type": {
+      "value": "bool"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svptrue_b16",
+    "arguments": [],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {},
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "PTRUE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svptrue_b32",
+    "arguments": [],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {},
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "PTRUE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svptrue_b64",
+    "arguments": [],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {},
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "PTRUE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svptrue_b8",
+    "arguments": [],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {},
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "PTRUE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svptrue_pat_b16",
+    "arguments": [
+      "enum svpattern pattern"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {},
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "PTRUE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svptrue_pat_b32",
+    "arguments": [
+      "enum svpattern pattern"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {},
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "PTRUE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svptrue_pat_b64",
+    "arguments": [
+      "enum svpattern pattern"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {},
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "PTRUE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svptrue_pat_b8",
+    "arguments": [
+      "enum svpattern pattern"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {},
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "PTRUE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqabs[_s16]_m",
+    "arguments": [
+      "svint16_t inactive",
+      "svbool_t pg",
+      "svint16_t op"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.H|Ztied.H"
+      },
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQABS"
+      ],
+      [
+        "MOVPRFX",
+        "SQABS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqabs[_s16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H|Ztied.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQABS"
+      ],
+      [
+        "MOVPRFX",
+        "SQABS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqabs[_s16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SQABS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqabs[_s32]_m",
+    "arguments": [
+      "svint32_t inactive",
+      "svbool_t pg",
+      "svint32_t op"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.S|Ztied.S"
+      },
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQABS"
+      ],
+      [
+        "MOVPRFX",
+        "SQABS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqabs[_s32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S|Ztied.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQABS"
+      ],
+      [
+        "MOVPRFX",
+        "SQABS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqabs[_s32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SQABS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqabs[_s64]_m",
+    "arguments": [
+      "svint64_t inactive",
+      "svbool_t pg",
+      "svint64_t op"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.D|Ztied.D"
+      },
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQABS"
+      ],
+      [
+        "MOVPRFX",
+        "SQABS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqabs[_s64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D|Ztied.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQABS"
+      ],
+      [
+        "MOVPRFX",
+        "SQABS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqabs[_s64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SQABS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqabs[_s8]_m",
+    "arguments": [
+      "svint8_t inactive",
+      "svbool_t pg",
+      "svint8_t op"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.B|Ztied.B"
+      },
+      "op": {
+        "register": "Zop.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQABS"
+      ],
+      [
+        "MOVPRFX",
+        "SQABS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqabs[_s8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.B|Ztied.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQABS"
+      ],
+      [
+        "MOVPRFX",
+        "SQABS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqabs[_s8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SQABS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqadd[_n_s16]",
+    "arguments": [
+      "svint16_t op1",
+      "int16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQADD"
+      ],
+      [
+        "SQSUB"
+      ],
+      [
+        "SQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqadd[_n_s16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "int16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQADD"
+      ],
+      [
+        "MOVPRFX",
+        "SQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqadd[_n_s16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "int16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]|Ztied2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQADD"
+      ],
+      [
+        "SQSUB"
+      ],
+      [
+        "SQADD"
+      ],
+      [
+        "SQADD"
+      ],
+      [
+        "SQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqadd[_n_s16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "int16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SQADD"
+      ],
+      [
+        "MOVPRFX",
+        "SQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqadd[_n_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQADD"
+      ],
+      [
+        "SQSUB"
+      ],
+      [
+        "SQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqadd[_n_s32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQADD"
+      ],
+      [
+        "MOVPRFX",
+        "SQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqadd[_n_s32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]|Ztied2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQADD"
+      ],
+      [
+        "SQSUB"
+      ],
+      [
+        "SQADD"
+      ],
+      [
+        "SQADD"
+      ],
+      [
+        "SQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqadd[_n_s32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SQADD"
+      ],
+      [
+        "MOVPRFX",
+        "SQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqadd[_n_s64]",
+    "arguments": [
+      "svint64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQADD"
+      ],
+      [
+        "SQSUB"
+      ],
+      [
+        "SQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqadd[_n_s64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQADD"
+      ],
+      [
+        "MOVPRFX",
+        "SQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqadd[_n_s64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]|Ztied2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQADD"
+      ],
+      [
+        "SQSUB"
+      ],
+      [
+        "SQADD"
+      ],
+      [
+        "SQADD"
+      ],
+      [
+        "SQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqadd[_n_s64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SQADD"
+      ],
+      [
+        "MOVPRFX",
+        "SQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqadd[_n_s8]",
+    "arguments": [
+      "svint8_t op1",
+      "int8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQADD"
+      ],
+      [
+        "SQSUB"
+      ],
+      [
+        "SQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqadd[_n_s8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "int8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQADD"
+      ],
+      [
+        "MOVPRFX",
+        "SQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqadd[_n_s8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "int8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]|Ztied2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQADD"
+      ],
+      [
+        "SQSUB"
+      ],
+      [
+        "SQADD"
+      ],
+      [
+        "SQADD"
+      ],
+      [
+        "SQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqadd[_n_s8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "int8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SQADD"
+      ],
+      [
+        "MOVPRFX",
+        "SQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqadd[_n_u16]",
+    "arguments": [
+      "svuint16_t op1",
+      "uint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQADD"
+      ],
+      [
+        "UQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqadd[_n_u16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "uint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQADD"
+      ],
+      [
+        "MOVPRFX",
+        "UQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqadd[_n_u16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "uint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]|Ztied2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQADD"
+      ],
+      [
+        "UQADD"
+      ],
+      [
+        "UQADD"
+      ],
+      [
+        "UQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqadd[_n_u16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "uint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UQADD"
+      ],
+      [
+        "MOVPRFX",
+        "UQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqadd[_n_u32]",
+    "arguments": [
+      "svuint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQADD"
+      ],
+      [
+        "UQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqadd[_n_u32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQADD"
+      ],
+      [
+        "MOVPRFX",
+        "UQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqadd[_n_u32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]|Ztied2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQADD"
+      ],
+      [
+        "UQADD"
+      ],
+      [
+        "UQADD"
+      ],
+      [
+        "UQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqadd[_n_u32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UQADD"
+      ],
+      [
+        "MOVPRFX",
+        "UQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqadd[_n_u64]",
+    "arguments": [
+      "svuint64_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQADD"
+      ],
+      [
+        "UQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqadd[_n_u64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQADD"
+      ],
+      [
+        "MOVPRFX",
+        "UQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqadd[_n_u64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]|Ztied2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQADD"
+      ],
+      [
+        "UQADD"
+      ],
+      [
+        "UQADD"
+      ],
+      [
+        "UQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqadd[_n_u64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UQADD"
+      ],
+      [
+        "MOVPRFX",
+        "UQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqadd[_n_u8]",
+    "arguments": [
+      "svuint8_t op1",
+      "uint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQADD"
+      ],
+      [
+        "UQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqadd[_n_u8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "uint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQADD"
+      ],
+      [
+        "MOVPRFX",
+        "UQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqadd[_n_u8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "uint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]|Ztied2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQADD"
+      ],
+      [
+        "UQADD"
+      ],
+      [
+        "UQADD"
+      ],
+      [
+        "UQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqadd[_n_u8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "uint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UQADD"
+      ],
+      [
+        "MOVPRFX",
+        "UQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqadd[_s16]",
+    "arguments": [
+      "svint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqadd[_s16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQADD"
+      ],
+      [
+        "MOVPRFX",
+        "SQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqadd[_s16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H|Ztied2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQADD"
+      ],
+      [
+        "SQADD"
+      ],
+      [
+        "SQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqadd[_s16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SQADD"
+      ],
+      [
+        "MOVPRFX",
+        "SQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqadd[_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqadd[_s32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQADD"
+      ],
+      [
+        "MOVPRFX",
+        "SQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqadd[_s32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQADD"
+      ],
+      [
+        "SQADD"
+      ],
+      [
+        "SQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqadd[_s32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SQADD"
+      ],
+      [
+        "MOVPRFX",
+        "SQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqadd[_s64]",
+    "arguments": [
+      "svint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqadd[_s64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQADD"
+      ],
+      [
+        "MOVPRFX",
+        "SQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqadd[_s64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQADD"
+      ],
+      [
+        "SQADD"
+      ],
+      [
+        "SQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqadd[_s64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SQADD"
+      ],
+      [
+        "MOVPRFX",
+        "SQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqadd[_s8]",
+    "arguments": [
+      "svint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqadd[_s8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQADD"
+      ],
+      [
+        "MOVPRFX",
+        "SQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqadd[_s8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B|Ztied2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQADD"
+      ],
+      [
+        "SQADD"
+      ],
+      [
+        "SQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqadd[_s8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SQADD"
+      ],
+      [
+        "MOVPRFX",
+        "SQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqadd[_u16]",
+    "arguments": [
+      "svuint16_t op1",
+      "svuint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqadd[_u16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQADD"
+      ],
+      [
+        "MOVPRFX",
+        "UQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqadd[_u16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H|Ztied2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQADD"
+      ],
+      [
+        "UQADD"
+      ],
+      [
+        "UQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqadd[_u16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UQADD"
+      ],
+      [
+        "MOVPRFX",
+        "UQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqadd[_u32]",
+    "arguments": [
+      "svuint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqadd[_u32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQADD"
+      ],
+      [
+        "MOVPRFX",
+        "UQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqadd[_u32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQADD"
+      ],
+      [
+        "UQADD"
+      ],
+      [
+        "UQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqadd[_u32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UQADD"
+      ],
+      [
+        "MOVPRFX",
+        "UQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqadd[_u64]",
+    "arguments": [
+      "svuint64_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqadd[_u64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQADD"
+      ],
+      [
+        "MOVPRFX",
+        "UQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqadd[_u64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQADD"
+      ],
+      [
+        "UQADD"
+      ],
+      [
+        "UQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqadd[_u64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UQADD"
+      ],
+      [
+        "MOVPRFX",
+        "UQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqadd[_u8]",
+    "arguments": [
+      "svuint8_t op1",
+      "svuint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqadd[_u8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQADD"
+      ],
+      [
+        "MOVPRFX",
+        "UQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqadd[_u8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B|Ztied2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQADD"
+      ],
+      [
+        "UQADD"
+      ],
+      [
+        "UQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqadd[_u8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UQADD"
+      ],
+      [
+        "MOVPRFX",
+        "UQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqcadd[_s16]",
+    "arguments": [
+      "svint16_t op1",
+      "svint16_t op2",
+      "uint64_t imm_rotation"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQCADD"
+      ],
+      [
+        "MOVPRFX",
+        "SQCADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqcadd[_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "svint32_t op2",
+      "uint64_t imm_rotation"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQCADD"
+      ],
+      [
+        "MOVPRFX",
+        "SQCADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqcadd[_s64]",
+    "arguments": [
+      "svint64_t op1",
+      "svint64_t op2",
+      "uint64_t imm_rotation"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQCADD"
+      ],
+      [
+        "MOVPRFX",
+        "SQCADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqcadd[_s8]",
+    "arguments": [
+      "svint8_t op1",
+      "svint8_t op2",
+      "uint64_t imm_rotation"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQCADD"
+      ],
+      [
+        "MOVPRFX",
+        "SQCADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqcvtn_s16[_s32_x2]",
+    "arguments": [
+      "svint32x2_t zn"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "zn": {
+        "Z multi-vector": "{ Zreg2.S, Zreg3.S }"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQCVTN"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqcvtn_u16[_s32_x2]",
+    "arguments": [
+      "svint32x2_t zn"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "zn": {
+        "Z multi-vector": "{ Zreg2.S, Zreg3.S }"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQCVTUN"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqcvtn_u16[_u32_x2]",
+    "arguments": [
+      "svuint32x2_t zn"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "zn": {
+        "Z multi-vector": "{ Zreg2.S, Zreg3.S }"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQCVTN"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqdecb[_n_s32]",
+    "arguments": [
+      "int32_t op",
+      "uint64_t imm_factor"
+    ],
+    "return_type": {
+      "value": "int32_t"
+    },
+    "Arguments_Preparation": {
+      "imm_factor": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op": {
+        "register": "Wtied"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDECB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqdecb[_n_s64]",
+    "arguments": [
+      "int64_t op",
+      "uint64_t imm_factor"
+    ],
+    "return_type": {
+      "value": "int64_t"
+    },
+    "Arguments_Preparation": {
+      "imm_factor": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op": {
+        "register": "Xtied"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDECB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqdecb[_n_u32]",
+    "arguments": [
+      "uint32_t op",
+      "uint64_t imm_factor"
+    ],
+    "return_type": {
+      "value": "uint32_t"
+    },
+    "Arguments_Preparation": {
+      "imm_factor": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op": {
+        "register": "Wtied"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQDECB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqdecb[_n_u64]",
+    "arguments": [
+      "uint64_t op",
+      "uint64_t imm_factor"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {
+      "imm_factor": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op": {
+        "register": "Xtied"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQDECB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqdecb_pat[_n_s32]",
+    "arguments": [
+      "int32_t op",
+      "enum svpattern pattern",
+      "uint64_t imm_factor"
+    ],
+    "return_type": {
+      "value": "int32_t"
+    },
+    "Arguments_Preparation": {
+      "imm_factor": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op": {
+        "register": "Wtied"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDECB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqdecb_pat[_n_s64]",
+    "arguments": [
+      "int64_t op",
+      "enum svpattern pattern",
+      "uint64_t imm_factor"
+    ],
+    "return_type": {
+      "value": "int64_t"
+    },
+    "Arguments_Preparation": {
+      "imm_factor": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op": {
+        "register": "Xtied"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDECB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqdecb_pat[_n_u32]",
+    "arguments": [
+      "uint32_t op",
+      "enum svpattern pattern",
+      "uint64_t imm_factor"
+    ],
+    "return_type": {
+      "value": "uint32_t"
+    },
+    "Arguments_Preparation": {
+      "imm_factor": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op": {
+        "register": "Wtied"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQDECB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqdecb_pat[_n_u64]",
+    "arguments": [
+      "uint64_t op",
+      "enum svpattern pattern",
+      "uint64_t imm_factor"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {
+      "imm_factor": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op": {
+        "register": "Xtied"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQDECB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqdecd[_n_s32]",
+    "arguments": [
+      "int32_t op",
+      "uint64_t imm_factor"
+    ],
+    "return_type": {
+      "value": "int32_t"
+    },
+    "Arguments_Preparation": {
+      "imm_factor": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op": {
+        "register": "Wtied"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDECD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqdecd[_n_s64]",
+    "arguments": [
+      "int64_t op",
+      "uint64_t imm_factor"
+    ],
+    "return_type": {
+      "value": "int64_t"
+    },
+    "Arguments_Preparation": {
+      "imm_factor": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op": {
+        "register": "Xtied"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDECD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqdecd[_n_u32]",
+    "arguments": [
+      "uint32_t op",
+      "uint64_t imm_factor"
+    ],
+    "return_type": {
+      "value": "uint32_t"
+    },
+    "Arguments_Preparation": {
+      "imm_factor": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op": {
+        "register": "Wtied"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQDECD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqdecd[_n_u64]",
+    "arguments": [
+      "uint64_t op",
+      "uint64_t imm_factor"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {
+      "imm_factor": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op": {
+        "register": "Xtied"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQDECD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqdecd[_s64]",
+    "arguments": [
+      "svint64_t op",
+      "uint64_t imm_factor"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "imm_factor": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op": {
+        "register": "Zop.D|Ztied.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDECD"
+      ],
+      [
+        "MOVPRFX",
+        "SQDECD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqdecd[_u64]",
+    "arguments": [
+      "svuint64_t op",
+      "uint64_t imm_factor"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "imm_factor": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op": {
+        "register": "Zop.D|Ztied.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQDECD"
+      ],
+      [
+        "MOVPRFX",
+        "UQDECD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqdecd_pat[_n_s32]",
+    "arguments": [
+      "int32_t op",
+      "enum svpattern pattern",
+      "uint64_t imm_factor"
+    ],
+    "return_type": {
+      "value": "int32_t"
+    },
+    "Arguments_Preparation": {
+      "imm_factor": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op": {
+        "register": "Wtied"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDECD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqdecd_pat[_n_s64]",
+    "arguments": [
+      "int64_t op",
+      "enum svpattern pattern",
+      "uint64_t imm_factor"
+    ],
+    "return_type": {
+      "value": "int64_t"
+    },
+    "Arguments_Preparation": {
+      "imm_factor": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op": {
+        "register": "Xtied"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDECD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqdecd_pat[_n_u32]",
+    "arguments": [
+      "uint32_t op",
+      "enum svpattern pattern",
+      "uint64_t imm_factor"
+    ],
+    "return_type": {
+      "value": "uint32_t"
+    },
+    "Arguments_Preparation": {
+      "imm_factor": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op": {
+        "register": "Wtied"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQDECD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqdecd_pat[_n_u64]",
+    "arguments": [
+      "uint64_t op",
+      "enum svpattern pattern",
+      "uint64_t imm_factor"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {
+      "imm_factor": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op": {
+        "register": "Xtied"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQDECD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqdecd_pat[_s64]",
+    "arguments": [
+      "svint64_t op",
+      "enum svpattern pattern",
+      "uint64_t imm_factor"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "imm_factor": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op": {
+        "register": "Zop.D|Ztied.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDECD"
+      ],
+      [
+        "MOVPRFX",
+        "SQDECD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqdecd_pat[_u64]",
+    "arguments": [
+      "svuint64_t op",
+      "enum svpattern pattern",
+      "uint64_t imm_factor"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "imm_factor": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op": {
+        "register": "Zop.D|Ztied.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQDECD"
+      ],
+      [
+        "MOVPRFX",
+        "UQDECD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqdech[_n_s32]",
+    "arguments": [
+      "int32_t op",
+      "uint64_t imm_factor"
+    ],
+    "return_type": {
+      "value": "int32_t"
+    },
+    "Arguments_Preparation": {
+      "imm_factor": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op": {
+        "register": "Wtied"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDECH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqdech[_n_s64]",
+    "arguments": [
+      "int64_t op",
+      "uint64_t imm_factor"
+    ],
+    "return_type": {
+      "value": "int64_t"
+    },
+    "Arguments_Preparation": {
+      "imm_factor": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op": {
+        "register": "Xtied"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDECH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqdech[_n_u32]",
+    "arguments": [
+      "uint32_t op",
+      "uint64_t imm_factor"
+    ],
+    "return_type": {
+      "value": "uint32_t"
+    },
+    "Arguments_Preparation": {
+      "imm_factor": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op": {
+        "register": "Wtied"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQDECH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqdech[_n_u64]",
+    "arguments": [
+      "uint64_t op",
+      "uint64_t imm_factor"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {
+      "imm_factor": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op": {
+        "register": "Xtied"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQDECH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqdech[_s16]",
+    "arguments": [
+      "svint16_t op",
+      "uint64_t imm_factor"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "imm_factor": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op": {
+        "register": "Zop.H|Ztied.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDECH"
+      ],
+      [
+        "MOVPRFX",
+        "SQDECH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqdech[_u16]",
+    "arguments": [
+      "svuint16_t op",
+      "uint64_t imm_factor"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "imm_factor": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op": {
+        "register": "Zop.H|Ztied.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQDECH"
+      ],
+      [
+        "MOVPRFX",
+        "UQDECH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqdech_pat[_n_s32]",
+    "arguments": [
+      "int32_t op",
+      "enum svpattern pattern",
+      "uint64_t imm_factor"
+    ],
+    "return_type": {
+      "value": "int32_t"
+    },
+    "Arguments_Preparation": {
+      "imm_factor": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op": {
+        "register": "Wtied"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDECH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqdech_pat[_n_s64]",
+    "arguments": [
+      "int64_t op",
+      "enum svpattern pattern",
+      "uint64_t imm_factor"
+    ],
+    "return_type": {
+      "value": "int64_t"
+    },
+    "Arguments_Preparation": {
+      "imm_factor": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op": {
+        "register": "Xtied"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDECH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqdech_pat[_n_u32]",
+    "arguments": [
+      "uint32_t op",
+      "enum svpattern pattern",
+      "uint64_t imm_factor"
+    ],
+    "return_type": {
+      "value": "uint32_t"
+    },
+    "Arguments_Preparation": {
+      "imm_factor": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op": {
+        "register": "Wtied"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQDECH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqdech_pat[_n_u64]",
+    "arguments": [
+      "uint64_t op",
+      "enum svpattern pattern",
+      "uint64_t imm_factor"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {
+      "imm_factor": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op": {
+        "register": "Xtied"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQDECH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqdech_pat[_s16]",
+    "arguments": [
+      "svint16_t op",
+      "enum svpattern pattern",
+      "uint64_t imm_factor"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "imm_factor": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op": {
+        "register": "Zop.H|Ztied.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDECH"
+      ],
+      [
+        "MOVPRFX",
+        "SQDECH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqdech_pat[_u16]",
+    "arguments": [
+      "svuint16_t op",
+      "enum svpattern pattern",
+      "uint64_t imm_factor"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "imm_factor": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op": {
+        "register": "Zop.H|Ztied.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQDECH"
+      ],
+      [
+        "MOVPRFX",
+        "UQDECH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqdecp[_n_s32]_b16",
+    "arguments": [
+      "int32_t op",
+      "svbool_t pg"
+    ],
+    "return_type": {
+      "value": "int32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Wtied"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDECP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqdecp[_n_s32]_b32",
+    "arguments": [
+      "int32_t op",
+      "svbool_t pg"
+    ],
+    "return_type": {
+      "value": "int32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Wtied"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDECP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqdecp[_n_s32]_b64",
+    "arguments": [
+      "int32_t op",
+      "svbool_t pg"
+    ],
+    "return_type": {
+      "value": "int32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Wtied"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDECP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqdecp[_n_s32]_b8",
+    "arguments": [
+      "int32_t op",
+      "svbool_t pg"
+    ],
+    "return_type": {
+      "value": "int32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Wtied"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDECP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqdecp[_n_s64]_b16",
+    "arguments": [
+      "int64_t op",
+      "svbool_t pg"
+    ],
+    "return_type": {
+      "value": "int64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Xtied"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDECP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqdecp[_n_s64]_b32",
+    "arguments": [
+      "int64_t op",
+      "svbool_t pg"
+    ],
+    "return_type": {
+      "value": "int64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Xtied"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDECP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqdecp[_n_s64]_b64",
+    "arguments": [
+      "int64_t op",
+      "svbool_t pg"
+    ],
+    "return_type": {
+      "value": "int64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Xtied"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDECP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqdecp[_n_s64]_b8",
+    "arguments": [
+      "int64_t op",
+      "svbool_t pg"
+    ],
+    "return_type": {
+      "value": "int64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Xtied"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDECP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqdecp[_n_u32]_b16",
+    "arguments": [
+      "uint32_t op",
+      "svbool_t pg"
+    ],
+    "return_type": {
+      "value": "uint32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Wtied"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQDECP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqdecp[_n_u32]_b32",
+    "arguments": [
+      "uint32_t op",
+      "svbool_t pg"
+    ],
+    "return_type": {
+      "value": "uint32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Wtied"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQDECP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqdecp[_n_u32]_b64",
+    "arguments": [
+      "uint32_t op",
+      "svbool_t pg"
+    ],
+    "return_type": {
+      "value": "uint32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Wtied"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQDECP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqdecp[_n_u32]_b8",
+    "arguments": [
+      "uint32_t op",
+      "svbool_t pg"
+    ],
+    "return_type": {
+      "value": "uint32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Wtied"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQDECP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqdecp[_n_u64]_b16",
+    "arguments": [
+      "uint64_t op",
+      "svbool_t pg"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Xtied"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQDECP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqdecp[_n_u64]_b32",
+    "arguments": [
+      "uint64_t op",
+      "svbool_t pg"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Xtied"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQDECP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqdecp[_n_u64]_b64",
+    "arguments": [
+      "uint64_t op",
+      "svbool_t pg"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Xtied"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQDECP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqdecp[_n_u64]_b8",
+    "arguments": [
+      "uint64_t op",
+      "svbool_t pg"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Xtied"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQDECP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqdecp[_s16]",
+    "arguments": [
+      "svint16_t op",
+      "svbool_t pg"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H|Ztied.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDECP"
+      ],
+      [
+        "MOVPRFX",
+        "SQDECP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqdecp[_s32]",
+    "arguments": [
+      "svint32_t op",
+      "svbool_t pg"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S|Ztied.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDECP"
+      ],
+      [
+        "MOVPRFX",
+        "SQDECP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqdecp[_s64]",
+    "arguments": [
+      "svint64_t op",
+      "svbool_t pg"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D|Ztied.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDECP"
+      ],
+      [
+        "MOVPRFX",
+        "SQDECP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqdecp[_u16]",
+    "arguments": [
+      "svuint16_t op",
+      "svbool_t pg"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H|Ztied.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQDECP"
+      ],
+      [
+        "MOVPRFX",
+        "UQDECP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqdecp[_u32]",
+    "arguments": [
+      "svuint32_t op",
+      "svbool_t pg"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S|Ztied.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQDECP"
+      ],
+      [
+        "MOVPRFX",
+        "UQDECP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqdecp[_u64]",
+    "arguments": [
+      "svuint64_t op",
+      "svbool_t pg"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D|Ztied.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQDECP"
+      ],
+      [
+        "MOVPRFX",
+        "UQDECP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqdecw[_n_s32]",
+    "arguments": [
+      "int32_t op",
+      "uint64_t imm_factor"
+    ],
+    "return_type": {
+      "value": "int32_t"
+    },
+    "Arguments_Preparation": {
+      "imm_factor": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op": {
+        "register": "Wtied"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDECW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqdecw[_n_s64]",
+    "arguments": [
+      "int64_t op",
+      "uint64_t imm_factor"
+    ],
+    "return_type": {
+      "value": "int64_t"
+    },
+    "Arguments_Preparation": {
+      "imm_factor": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op": {
+        "register": "Xtied"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDECW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqdecw[_n_u32]",
+    "arguments": [
+      "uint32_t op",
+      "uint64_t imm_factor"
+    ],
+    "return_type": {
+      "value": "uint32_t"
+    },
+    "Arguments_Preparation": {
+      "imm_factor": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op": {
+        "register": "Wtied"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQDECW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqdecw[_n_u64]",
+    "arguments": [
+      "uint64_t op",
+      "uint64_t imm_factor"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {
+      "imm_factor": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op": {
+        "register": "Xtied"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQDECW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqdecw[_s32]",
+    "arguments": [
+      "svint32_t op",
+      "uint64_t imm_factor"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "imm_factor": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op": {
+        "register": "Zop.S|Ztied.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDECW"
+      ],
+      [
+        "MOVPRFX",
+        "SQDECW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqdecw[_u32]",
+    "arguments": [
+      "svuint32_t op",
+      "uint64_t imm_factor"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "imm_factor": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op": {
+        "register": "Zop.S|Ztied.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQDECW"
+      ],
+      [
+        "MOVPRFX",
+        "UQDECW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqdecw_pat[_n_s32]",
+    "arguments": [
+      "int32_t op",
+      "enum svpattern pattern",
+      "uint64_t imm_factor"
+    ],
+    "return_type": {
+      "value": "int32_t"
+    },
+    "Arguments_Preparation": {
+      "imm_factor": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op": {
+        "register": "Wtied"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDECW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqdecw_pat[_n_s64]",
+    "arguments": [
+      "int64_t op",
+      "enum svpattern pattern",
+      "uint64_t imm_factor"
+    ],
+    "return_type": {
+      "value": "int64_t"
+    },
+    "Arguments_Preparation": {
+      "imm_factor": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op": {
+        "register": "Xtied"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDECW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqdecw_pat[_n_u32]",
+    "arguments": [
+      "uint32_t op",
+      "enum svpattern pattern",
+      "uint64_t imm_factor"
+    ],
+    "return_type": {
+      "value": "uint32_t"
+    },
+    "Arguments_Preparation": {
+      "imm_factor": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op": {
+        "register": "Wtied"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQDECW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqdecw_pat[_n_u64]",
+    "arguments": [
+      "uint64_t op",
+      "enum svpattern pattern",
+      "uint64_t imm_factor"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {
+      "imm_factor": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op": {
+        "register": "Xtied"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQDECW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqdecw_pat[_s32]",
+    "arguments": [
+      "svint32_t op",
+      "enum svpattern pattern",
+      "uint64_t imm_factor"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "imm_factor": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op": {
+        "register": "Zop.S|Ztied.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDECW"
+      ],
+      [
+        "MOVPRFX",
+        "SQDECW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqdecw_pat[_u32]",
+    "arguments": [
+      "svuint32_t op",
+      "enum svpattern pattern",
+      "uint64_t imm_factor"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "imm_factor": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op": {
+        "register": "Zop.S|Ztied.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQDECW"
+      ],
+      [
+        "MOVPRFX",
+        "UQDECW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqdmlalb[_n_s16]",
+    "arguments": [
+      "svint16_t op1",
+      "svint8_t op2",
+      "int8_t op3"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDMLALB"
+      ],
+      [
+        "MOVPRFX",
+        "SQDMLALB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqdmlalb[_n_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "svint16_t op2",
+      "int16_t op3"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDMLALB"
+      ],
+      [
+        "MOVPRFX",
+        "SQDMLALB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqdmlalb[_n_s64]",
+    "arguments": [
+      "svint64_t op1",
+      "svint32_t op2",
+      "int32_t op3"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDMLALB"
+      ],
+      [
+        "MOVPRFX",
+        "SQDMLALB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqdmlalb[_s16]",
+    "arguments": [
+      "svint16_t op1",
+      "svint8_t op2",
+      "svint8_t op3"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDMLALB"
+      ],
+      [
+        "MOVPRFX",
+        "SQDMLALB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqdmlalb[_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "svint16_t op2",
+      "svint16_t op3"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDMLALB"
+      ],
+      [
+        "MOVPRFX",
+        "SQDMLALB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqdmlalb[_s64]",
+    "arguments": [
+      "svint64_t op1",
+      "svint32_t op2",
+      "svint32_t op3"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDMLALB"
+      ],
+      [
+        "MOVPRFX",
+        "SQDMLALB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqdmlalb_lane[_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "svint16_t op2",
+      "svint16_t op3",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "imm_index": {
+        "minimum": 0,
+        "maximum": 7
+      },
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDMLALB"
+      ],
+      [
+        "MOVPRFX",
+        "SQDMLALB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqdmlalb_lane[_s64]",
+    "arguments": [
+      "svint64_t op1",
+      "svint32_t op2",
+      "svint32_t op3",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "imm_index": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDMLALB"
+      ],
+      [
+        "MOVPRFX",
+        "SQDMLALB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqdmlalbt[_n_s16]",
+    "arguments": [
+      "svint16_t op1",
+      "svint8_t op2",
+      "int8_t op3"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDMLALBT"
+      ],
+      [
+        "MOVPRFX",
+        "SQDMLALBT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqdmlalbt[_n_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "svint16_t op2",
+      "int16_t op3"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDMLALBT"
+      ],
+      [
+        "MOVPRFX",
+        "SQDMLALBT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqdmlalbt[_n_s64]",
+    "arguments": [
+      "svint64_t op1",
+      "svint32_t op2",
+      "int32_t op3"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDMLALBT"
+      ],
+      [
+        "MOVPRFX",
+        "SQDMLALBT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqdmlalbt[_s16]",
+    "arguments": [
+      "svint16_t op1",
+      "svint8_t op2",
+      "svint8_t op3"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDMLALBT"
+      ],
+      [
+        "MOVPRFX",
+        "SQDMLALBT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqdmlalbt[_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "svint16_t op2",
+      "svint16_t op3"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDMLALBT"
+      ],
+      [
+        "MOVPRFX",
+        "SQDMLALBT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqdmlalbt[_s64]",
+    "arguments": [
+      "svint64_t op1",
+      "svint32_t op2",
+      "svint32_t op3"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDMLALBT"
+      ],
+      [
+        "MOVPRFX",
+        "SQDMLALBT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqdmlalt[_n_s16]",
+    "arguments": [
+      "svint16_t op1",
+      "svint8_t op2",
+      "int8_t op3"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDMLALT"
+      ],
+      [
+        "MOVPRFX",
+        "SQDMLALT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqdmlalt[_n_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "svint16_t op2",
+      "int16_t op3"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDMLALT"
+      ],
+      [
+        "MOVPRFX",
+        "SQDMLALT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqdmlalt[_n_s64]",
+    "arguments": [
+      "svint64_t op1",
+      "svint32_t op2",
+      "int32_t op3"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDMLALT"
+      ],
+      [
+        "MOVPRFX",
+        "SQDMLALT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqdmlalt[_s16]",
+    "arguments": [
+      "svint16_t op1",
+      "svint8_t op2",
+      "svint8_t op3"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDMLALT"
+      ],
+      [
+        "MOVPRFX",
+        "SQDMLALT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqdmlalt[_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "svint16_t op2",
+      "svint16_t op3"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDMLALT"
+      ],
+      [
+        "MOVPRFX",
+        "SQDMLALT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqdmlalt[_s64]",
+    "arguments": [
+      "svint64_t op1",
+      "svint32_t op2",
+      "svint32_t op3"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDMLALT"
+      ],
+      [
+        "MOVPRFX",
+        "SQDMLALT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqdmlalt_lane[_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "svint16_t op2",
+      "svint16_t op3",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "imm_index": {
+        "minimum": 0,
+        "maximum": 7
+      },
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDMLALT"
+      ],
+      [
+        "MOVPRFX",
+        "SQDMLALT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqdmlalt_lane[_s64]",
+    "arguments": [
+      "svint64_t op1",
+      "svint32_t op2",
+      "svint32_t op3",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "imm_index": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDMLALT"
+      ],
+      [
+        "MOVPRFX",
+        "SQDMLALT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqdmlslb[_n_s16]",
+    "arguments": [
+      "svint16_t op1",
+      "svint8_t op2",
+      "int8_t op3"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDMLSLB"
+      ],
+      [
+        "MOVPRFX",
+        "SQDMLSLB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqdmlslb[_n_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "svint16_t op2",
+      "int16_t op3"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDMLSLB"
+      ],
+      [
+        "MOVPRFX",
+        "SQDMLSLB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqdmlslb[_n_s64]",
+    "arguments": [
+      "svint64_t op1",
+      "svint32_t op2",
+      "int32_t op3"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDMLSLB"
+      ],
+      [
+        "MOVPRFX",
+        "SQDMLSLB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqdmlslb[_s16]",
+    "arguments": [
+      "svint16_t op1",
+      "svint8_t op2",
+      "svint8_t op3"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDMLSLB"
+      ],
+      [
+        "MOVPRFX",
+        "SQDMLSLB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqdmlslb[_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "svint16_t op2",
+      "svint16_t op3"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDMLSLB"
+      ],
+      [
+        "MOVPRFX",
+        "SQDMLSLB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqdmlslb[_s64]",
+    "arguments": [
+      "svint64_t op1",
+      "svint32_t op2",
+      "svint32_t op3"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDMLSLB"
+      ],
+      [
+        "MOVPRFX",
+        "SQDMLSLB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqdmlslb_lane[_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "svint16_t op2",
+      "svint16_t op3",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "imm_index": {
+        "minimum": 0,
+        "maximum": 7
+      },
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDMLSLB"
+      ],
+      [
+        "MOVPRFX",
+        "SQDMLSLB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqdmlslb_lane[_s64]",
+    "arguments": [
+      "svint64_t op1",
+      "svint32_t op2",
+      "svint32_t op3",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "imm_index": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDMLSLB"
+      ],
+      [
+        "MOVPRFX",
+        "SQDMLSLB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqdmlslbt[_n_s16]",
+    "arguments": [
+      "svint16_t op1",
+      "svint8_t op2",
+      "int8_t op3"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDMLSLBT"
+      ],
+      [
+        "MOVPRFX",
+        "SQDMLSLBT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqdmlslbt[_n_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "svint16_t op2",
+      "int16_t op3"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDMLSLBT"
+      ],
+      [
+        "MOVPRFX",
+        "SQDMLSLBT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqdmlslbt[_n_s64]",
+    "arguments": [
+      "svint64_t op1",
+      "svint32_t op2",
+      "int32_t op3"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDMLSLBT"
+      ],
+      [
+        "MOVPRFX",
+        "SQDMLSLBT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqdmlslbt[_s16]",
+    "arguments": [
+      "svint16_t op1",
+      "svint8_t op2",
+      "svint8_t op3"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDMLSLBT"
+      ],
+      [
+        "MOVPRFX",
+        "SQDMLSLBT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqdmlslbt[_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "svint16_t op2",
+      "svint16_t op3"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDMLSLBT"
+      ],
+      [
+        "MOVPRFX",
+        "SQDMLSLBT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqdmlslbt[_s64]",
+    "arguments": [
+      "svint64_t op1",
+      "svint32_t op2",
+      "svint32_t op3"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDMLSLBT"
+      ],
+      [
+        "MOVPRFX",
+        "SQDMLSLBT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqdmlslt[_n_s16]",
+    "arguments": [
+      "svint16_t op1",
+      "svint8_t op2",
+      "int8_t op3"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDMLSLT"
+      ],
+      [
+        "MOVPRFX",
+        "SQDMLSLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqdmlslt[_n_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "svint16_t op2",
+      "int16_t op3"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDMLSLT"
+      ],
+      [
+        "MOVPRFX",
+        "SQDMLSLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqdmlslt[_n_s64]",
+    "arguments": [
+      "svint64_t op1",
+      "svint32_t op2",
+      "int32_t op3"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDMLSLT"
+      ],
+      [
+        "MOVPRFX",
+        "SQDMLSLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqdmlslt[_s16]",
+    "arguments": [
+      "svint16_t op1",
+      "svint8_t op2",
+      "svint8_t op3"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDMLSLT"
+      ],
+      [
+        "MOVPRFX",
+        "SQDMLSLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqdmlslt[_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "svint16_t op2",
+      "svint16_t op3"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDMLSLT"
+      ],
+      [
+        "MOVPRFX",
+        "SQDMLSLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqdmlslt[_s64]",
+    "arguments": [
+      "svint64_t op1",
+      "svint32_t op2",
+      "svint32_t op3"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDMLSLT"
+      ],
+      [
+        "MOVPRFX",
+        "SQDMLSLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqdmlslt_lane[_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "svint16_t op2",
+      "svint16_t op3",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "imm_index": {
+        "minimum": 0,
+        "maximum": 7
+      },
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDMLSLT"
+      ],
+      [
+        "MOVPRFX",
+        "SQDMLSLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqdmlslt_lane[_s64]",
+    "arguments": [
+      "svint64_t op1",
+      "svint32_t op2",
+      "svint32_t op3",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "imm_index": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDMLSLT"
+      ],
+      [
+        "MOVPRFX",
+        "SQDMLSLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqdmulh[_n_s16]",
+    "arguments": [
+      "svint16_t op1",
+      "int16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDMULH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqdmulh[_n_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDMULH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqdmulh[_n_s64]",
+    "arguments": [
+      "svint64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDMULH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqdmulh[_n_s8]",
+    "arguments": [
+      "svint8_t op1",
+      "int8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDMULH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqdmulh[_s16]",
+    "arguments": [
+      "svint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDMULH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqdmulh[_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDMULH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqdmulh[_s64]",
+    "arguments": [
+      "svint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDMULH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqdmulh[_s8]",
+    "arguments": [
+      "svint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDMULH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqdmulh_lane[_s16]",
+    "arguments": [
+      "svint16_t op1",
+      "svint16_t op2",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "imm_index": {
+        "minimum": 0,
+        "maximum": 7
+      },
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDMULH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqdmulh_lane[_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "svint32_t op2",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "imm_index": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDMULH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqdmulh_lane[_s64]",
+    "arguments": [
+      "svint64_t op1",
+      "svint64_t op2",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "imm_index": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDMULH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqdmullb[_n_s16]",
+    "arguments": [
+      "svint8_t op1",
+      "int8_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDMULLB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqdmullb[_n_s32]",
+    "arguments": [
+      "svint16_t op1",
+      "int16_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDMULLB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqdmullb[_n_s64]",
+    "arguments": [
+      "svint32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDMULLB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqdmullb[_s16]",
+    "arguments": [
+      "svint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDMULLB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqdmullb[_s32]",
+    "arguments": [
+      "svint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDMULLB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqdmullb[_s64]",
+    "arguments": [
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDMULLB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqdmullb_lane[_s32]",
+    "arguments": [
+      "svint16_t op1",
+      "svint16_t op2",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "imm_index": {
+        "minimum": 0,
+        "maximum": 7
+      },
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDMULLB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqdmullb_lane[_s64]",
+    "arguments": [
+      "svint32_t op1",
+      "svint32_t op2",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "imm_index": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDMULLB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqdmullt[_n_s16]",
+    "arguments": [
+      "svint8_t op1",
+      "int8_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDMULLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqdmullt[_n_s32]",
+    "arguments": [
+      "svint16_t op1",
+      "int16_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDMULLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqdmullt[_n_s64]",
+    "arguments": [
+      "svint32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDMULLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqdmullt[_s16]",
+    "arguments": [
+      "svint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDMULLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqdmullt[_s32]",
+    "arguments": [
+      "svint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDMULLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqdmullt[_s64]",
+    "arguments": [
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDMULLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqdmullt_lane[_s32]",
+    "arguments": [
+      "svint16_t op1",
+      "svint16_t op2",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "imm_index": {
+        "minimum": 0,
+        "maximum": 7
+      },
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDMULLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqdmullt_lane[_s64]",
+    "arguments": [
+      "svint32_t op1",
+      "svint32_t op2",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "imm_index": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQDMULLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqincb[_n_s32]",
+    "arguments": [
+      "int32_t op",
+      "uint64_t imm_factor"
+    ],
+    "return_type": {
+      "value": "int32_t"
+    },
+    "Arguments_Preparation": {
+      "imm_factor": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op": {
+        "register": "Wtied"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQINCB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqincb[_n_s64]",
+    "arguments": [
+      "int64_t op",
+      "uint64_t imm_factor"
+    ],
+    "return_type": {
+      "value": "int64_t"
+    },
+    "Arguments_Preparation": {
+      "imm_factor": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op": {
+        "register": "Xtied"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQINCB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqincb[_n_u32]",
+    "arguments": [
+      "uint32_t op",
+      "uint64_t imm_factor"
+    ],
+    "return_type": {
+      "value": "uint32_t"
+    },
+    "Arguments_Preparation": {
+      "imm_factor": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op": {
+        "register": "Wtied"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQINCB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqincb[_n_u64]",
+    "arguments": [
+      "uint64_t op",
+      "uint64_t imm_factor"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {
+      "imm_factor": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op": {
+        "register": "Xtied"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQINCB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqincb_pat[_n_s32]",
+    "arguments": [
+      "int32_t op",
+      "enum svpattern pattern",
+      "uint64_t imm_factor"
+    ],
+    "return_type": {
+      "value": "int32_t"
+    },
+    "Arguments_Preparation": {
+      "imm_factor": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op": {
+        "register": "Wtied"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQINCB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqincb_pat[_n_s64]",
+    "arguments": [
+      "int64_t op",
+      "enum svpattern pattern",
+      "uint64_t imm_factor"
+    ],
+    "return_type": {
+      "value": "int64_t"
+    },
+    "Arguments_Preparation": {
+      "imm_factor": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op": {
+        "register": "Xtied"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQINCB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqincb_pat[_n_u32]",
+    "arguments": [
+      "uint32_t op",
+      "enum svpattern pattern",
+      "uint64_t imm_factor"
+    ],
+    "return_type": {
+      "value": "uint32_t"
+    },
+    "Arguments_Preparation": {
+      "imm_factor": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op": {
+        "register": "Wtied"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQINCB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqincb_pat[_n_u64]",
+    "arguments": [
+      "uint64_t op",
+      "enum svpattern pattern",
+      "uint64_t imm_factor"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {
+      "imm_factor": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op": {
+        "register": "Xtied"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQINCB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqincd[_n_s32]",
+    "arguments": [
+      "int32_t op",
+      "uint64_t imm_factor"
+    ],
+    "return_type": {
+      "value": "int32_t"
+    },
+    "Arguments_Preparation": {
+      "imm_factor": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op": {
+        "register": "Wtied"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQINCD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqincd[_n_s64]",
+    "arguments": [
+      "int64_t op",
+      "uint64_t imm_factor"
+    ],
+    "return_type": {
+      "value": "int64_t"
+    },
+    "Arguments_Preparation": {
+      "imm_factor": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op": {
+        "register": "Xtied"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQINCD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqincd[_n_u32]",
+    "arguments": [
+      "uint32_t op",
+      "uint64_t imm_factor"
+    ],
+    "return_type": {
+      "value": "uint32_t"
+    },
+    "Arguments_Preparation": {
+      "imm_factor": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op": {
+        "register": "Wtied"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQINCD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqincd[_n_u64]",
+    "arguments": [
+      "uint64_t op",
+      "uint64_t imm_factor"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {
+      "imm_factor": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op": {
+        "register": "Xtied"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQINCD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqincd[_s64]",
+    "arguments": [
+      "svint64_t op",
+      "uint64_t imm_factor"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "imm_factor": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op": {
+        "register": "Zop.D|Ztied.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQINCD"
+      ],
+      [
+        "MOVPRFX",
+        "SQINCD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqincd[_u64]",
+    "arguments": [
+      "svuint64_t op",
+      "uint64_t imm_factor"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "imm_factor": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op": {
+        "register": "Zop.D|Ztied.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQINCD"
+      ],
+      [
+        "MOVPRFX",
+        "UQINCD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqincd_pat[_n_s32]",
+    "arguments": [
+      "int32_t op",
+      "enum svpattern pattern",
+      "uint64_t imm_factor"
+    ],
+    "return_type": {
+      "value": "int32_t"
+    },
+    "Arguments_Preparation": {
+      "imm_factor": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op": {
+        "register": "Wtied"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQINCD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqincd_pat[_n_s64]",
+    "arguments": [
+      "int64_t op",
+      "enum svpattern pattern",
+      "uint64_t imm_factor"
+    ],
+    "return_type": {
+      "value": "int64_t"
+    },
+    "Arguments_Preparation": {
+      "imm_factor": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op": {
+        "register": "Xtied"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQINCD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqincd_pat[_n_u32]",
+    "arguments": [
+      "uint32_t op",
+      "enum svpattern pattern",
+      "uint64_t imm_factor"
+    ],
+    "return_type": {
+      "value": "uint32_t"
+    },
+    "Arguments_Preparation": {
+      "imm_factor": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op": {
+        "register": "Wtied"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQINCD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqincd_pat[_n_u64]",
+    "arguments": [
+      "uint64_t op",
+      "enum svpattern pattern",
+      "uint64_t imm_factor"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {
+      "imm_factor": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op": {
+        "register": "Xtied"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQINCD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqincd_pat[_s64]",
+    "arguments": [
+      "svint64_t op",
+      "enum svpattern pattern",
+      "uint64_t imm_factor"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "imm_factor": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op": {
+        "register": "Zop.D|Ztied.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQINCD"
+      ],
+      [
+        "MOVPRFX",
+        "SQINCD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqincd_pat[_u64]",
+    "arguments": [
+      "svuint64_t op",
+      "enum svpattern pattern",
+      "uint64_t imm_factor"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "imm_factor": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op": {
+        "register": "Zop.D|Ztied.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQINCD"
+      ],
+      [
+        "MOVPRFX",
+        "UQINCD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqinch[_n_s32]",
+    "arguments": [
+      "int32_t op",
+      "uint64_t imm_factor"
+    ],
+    "return_type": {
+      "value": "int32_t"
+    },
+    "Arguments_Preparation": {
+      "imm_factor": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op": {
+        "register": "Wtied"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQINCH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqinch[_n_s64]",
+    "arguments": [
+      "int64_t op",
+      "uint64_t imm_factor"
+    ],
+    "return_type": {
+      "value": "int64_t"
+    },
+    "Arguments_Preparation": {
+      "imm_factor": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op": {
+        "register": "Xtied"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQINCH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqinch[_n_u32]",
+    "arguments": [
+      "uint32_t op",
+      "uint64_t imm_factor"
+    ],
+    "return_type": {
+      "value": "uint32_t"
+    },
+    "Arguments_Preparation": {
+      "imm_factor": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op": {
+        "register": "Wtied"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQINCH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqinch[_n_u64]",
+    "arguments": [
+      "uint64_t op",
+      "uint64_t imm_factor"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {
+      "imm_factor": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op": {
+        "register": "Xtied"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQINCH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqinch[_s16]",
+    "arguments": [
+      "svint16_t op",
+      "uint64_t imm_factor"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "imm_factor": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op": {
+        "register": "Zop.H|Ztied.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQINCH"
+      ],
+      [
+        "MOVPRFX",
+        "SQINCH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqinch[_u16]",
+    "arguments": [
+      "svuint16_t op",
+      "uint64_t imm_factor"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "imm_factor": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op": {
+        "register": "Zop.H|Ztied.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQINCH"
+      ],
+      [
+        "MOVPRFX",
+        "UQINCH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqinch_pat[_n_s32]",
+    "arguments": [
+      "int32_t op",
+      "enum svpattern pattern",
+      "uint64_t imm_factor"
+    ],
+    "return_type": {
+      "value": "int32_t"
+    },
+    "Arguments_Preparation": {
+      "imm_factor": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op": {
+        "register": "Wtied"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQINCH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqinch_pat[_n_s64]",
+    "arguments": [
+      "int64_t op",
+      "enum svpattern pattern",
+      "uint64_t imm_factor"
+    ],
+    "return_type": {
+      "value": "int64_t"
+    },
+    "Arguments_Preparation": {
+      "imm_factor": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op": {
+        "register": "Xtied"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQINCH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqinch_pat[_n_u32]",
+    "arguments": [
+      "uint32_t op",
+      "enum svpattern pattern",
+      "uint64_t imm_factor"
+    ],
+    "return_type": {
+      "value": "uint32_t"
+    },
+    "Arguments_Preparation": {
+      "imm_factor": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op": {
+        "register": "Wtied"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQINCH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqinch_pat[_n_u64]",
+    "arguments": [
+      "uint64_t op",
+      "enum svpattern pattern",
+      "uint64_t imm_factor"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {
+      "imm_factor": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op": {
+        "register": "Xtied"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQINCH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqinch_pat[_s16]",
+    "arguments": [
+      "svint16_t op",
+      "enum svpattern pattern",
+      "uint64_t imm_factor"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "imm_factor": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op": {
+        "register": "Zop.H|Ztied.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQINCH"
+      ],
+      [
+        "MOVPRFX",
+        "SQINCH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqinch_pat[_u16]",
+    "arguments": [
+      "svuint16_t op",
+      "enum svpattern pattern",
+      "uint64_t imm_factor"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "imm_factor": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op": {
+        "register": "Zop.H|Ztied.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQINCH"
+      ],
+      [
+        "MOVPRFX",
+        "UQINCH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqincp[_n_s32]_b16",
+    "arguments": [
+      "int32_t op",
+      "svbool_t pg"
+    ],
+    "return_type": {
+      "value": "int32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Wtied"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQINCP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqincp[_n_s32]_b32",
+    "arguments": [
+      "int32_t op",
+      "svbool_t pg"
+    ],
+    "return_type": {
+      "value": "int32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Wtied"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQINCP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqincp[_n_s32]_b64",
+    "arguments": [
+      "int32_t op",
+      "svbool_t pg"
+    ],
+    "return_type": {
+      "value": "int32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Wtied"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQINCP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqincp[_n_s32]_b8",
+    "arguments": [
+      "int32_t op",
+      "svbool_t pg"
+    ],
+    "return_type": {
+      "value": "int32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Wtied"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQINCP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqincp[_n_s64]_b16",
+    "arguments": [
+      "int64_t op",
+      "svbool_t pg"
+    ],
+    "return_type": {
+      "value": "int64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Xtied"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQINCP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqincp[_n_s64]_b32",
+    "arguments": [
+      "int64_t op",
+      "svbool_t pg"
+    ],
+    "return_type": {
+      "value": "int64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Xtied"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQINCP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqincp[_n_s64]_b64",
+    "arguments": [
+      "int64_t op",
+      "svbool_t pg"
+    ],
+    "return_type": {
+      "value": "int64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Xtied"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQINCP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqincp[_n_s64]_b8",
+    "arguments": [
+      "int64_t op",
+      "svbool_t pg"
+    ],
+    "return_type": {
+      "value": "int64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Xtied"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQINCP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqincp[_n_u32]_b16",
+    "arguments": [
+      "uint32_t op",
+      "svbool_t pg"
+    ],
+    "return_type": {
+      "value": "uint32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Wtied"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQINCP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqincp[_n_u32]_b32",
+    "arguments": [
+      "uint32_t op",
+      "svbool_t pg"
+    ],
+    "return_type": {
+      "value": "uint32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Wtied"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQINCP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqincp[_n_u32]_b64",
+    "arguments": [
+      "uint32_t op",
+      "svbool_t pg"
+    ],
+    "return_type": {
+      "value": "uint32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Wtied"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQINCP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqincp[_n_u32]_b8",
+    "arguments": [
+      "uint32_t op",
+      "svbool_t pg"
+    ],
+    "return_type": {
+      "value": "uint32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Wtied"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQINCP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqincp[_n_u64]_b16",
+    "arguments": [
+      "uint64_t op",
+      "svbool_t pg"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Xtied"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQINCP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqincp[_n_u64]_b32",
+    "arguments": [
+      "uint64_t op",
+      "svbool_t pg"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Xtied"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQINCP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqincp[_n_u64]_b64",
+    "arguments": [
+      "uint64_t op",
+      "svbool_t pg"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Xtied"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQINCP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqincp[_n_u64]_b8",
+    "arguments": [
+      "uint64_t op",
+      "svbool_t pg"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Xtied"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQINCP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqincp[_s16]",
+    "arguments": [
+      "svint16_t op",
+      "svbool_t pg"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H|Ztied.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQINCP"
+      ],
+      [
+        "MOVPRFX",
+        "SQINCP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqincp[_s32]",
+    "arguments": [
+      "svint32_t op",
+      "svbool_t pg"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S|Ztied.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQINCP"
+      ],
+      [
+        "MOVPRFX",
+        "SQINCP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqincp[_s64]",
+    "arguments": [
+      "svint64_t op",
+      "svbool_t pg"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D|Ztied.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQINCP"
+      ],
+      [
+        "MOVPRFX",
+        "SQINCP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqincp[_u16]",
+    "arguments": [
+      "svuint16_t op",
+      "svbool_t pg"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H|Ztied.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQINCP"
+      ],
+      [
+        "MOVPRFX",
+        "UQINCP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqincp[_u32]",
+    "arguments": [
+      "svuint32_t op",
+      "svbool_t pg"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S|Ztied.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQINCP"
+      ],
+      [
+        "MOVPRFX",
+        "UQINCP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqincp[_u64]",
+    "arguments": [
+      "svuint64_t op",
+      "svbool_t pg"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D|Ztied.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQINCP"
+      ],
+      [
+        "MOVPRFX",
+        "UQINCP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqincw[_n_s32]",
+    "arguments": [
+      "int32_t op",
+      "uint64_t imm_factor"
+    ],
+    "return_type": {
+      "value": "int32_t"
+    },
+    "Arguments_Preparation": {
+      "imm_factor": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op": {
+        "register": "Wtied"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQINCW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqincw[_n_s64]",
+    "arguments": [
+      "int64_t op",
+      "uint64_t imm_factor"
+    ],
+    "return_type": {
+      "value": "int64_t"
+    },
+    "Arguments_Preparation": {
+      "imm_factor": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op": {
+        "register": "Xtied"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQINCW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqincw[_n_u32]",
+    "arguments": [
+      "uint32_t op",
+      "uint64_t imm_factor"
+    ],
+    "return_type": {
+      "value": "uint32_t"
+    },
+    "Arguments_Preparation": {
+      "imm_factor": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op": {
+        "register": "Wtied"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQINCW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqincw[_n_u64]",
+    "arguments": [
+      "uint64_t op",
+      "uint64_t imm_factor"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {
+      "imm_factor": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op": {
+        "register": "Xtied"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQINCW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqincw[_s32]",
+    "arguments": [
+      "svint32_t op",
+      "uint64_t imm_factor"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "imm_factor": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op": {
+        "register": "Zop.S|Ztied.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQINCW"
+      ],
+      [
+        "MOVPRFX",
+        "SQINCW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqincw[_u32]",
+    "arguments": [
+      "svuint32_t op",
+      "uint64_t imm_factor"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "imm_factor": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op": {
+        "register": "Zop.S|Ztied.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQINCW"
+      ],
+      [
+        "MOVPRFX",
+        "UQINCW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqincw_pat[_n_s32]",
+    "arguments": [
+      "int32_t op",
+      "enum svpattern pattern",
+      "uint64_t imm_factor"
+    ],
+    "return_type": {
+      "value": "int32_t"
+    },
+    "Arguments_Preparation": {
+      "imm_factor": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op": {
+        "register": "Wtied"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQINCW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqincw_pat[_n_s64]",
+    "arguments": [
+      "int64_t op",
+      "enum svpattern pattern",
+      "uint64_t imm_factor"
+    ],
+    "return_type": {
+      "value": "int64_t"
+    },
+    "Arguments_Preparation": {
+      "imm_factor": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op": {
+        "register": "Xtied"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQINCW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqincw_pat[_n_u32]",
+    "arguments": [
+      "uint32_t op",
+      "enum svpattern pattern",
+      "uint64_t imm_factor"
+    ],
+    "return_type": {
+      "value": "uint32_t"
+    },
+    "Arguments_Preparation": {
+      "imm_factor": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op": {
+        "register": "Wtied"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQINCW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqincw_pat[_n_u64]",
+    "arguments": [
+      "uint64_t op",
+      "enum svpattern pattern",
+      "uint64_t imm_factor"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {
+      "imm_factor": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op": {
+        "register": "Xtied"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQINCW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqincw_pat[_s32]",
+    "arguments": [
+      "svint32_t op",
+      "enum svpattern pattern",
+      "uint64_t imm_factor"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "imm_factor": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op": {
+        "register": "Zop.S|Ztied.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQINCW"
+      ],
+      [
+        "MOVPRFX",
+        "SQINCW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqincw_pat[_u32]",
+    "arguments": [
+      "svuint32_t op",
+      "enum svpattern pattern",
+      "uint64_t imm_factor"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "imm_factor": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op": {
+        "register": "Zop.S|Ztied.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQINCW"
+      ],
+      [
+        "MOVPRFX",
+        "UQINCW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqneg[_s16]_m",
+    "arguments": [
+      "svint16_t inactive",
+      "svbool_t pg",
+      "svint16_t op"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.H|Ztied.H"
+      },
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQNEG"
+      ],
+      [
+        "MOVPRFX",
+        "SQNEG"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqneg[_s16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H|Ztied.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQNEG"
+      ],
+      [
+        "MOVPRFX",
+        "SQNEG"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqneg[_s16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SQNEG"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqneg[_s32]_m",
+    "arguments": [
+      "svint32_t inactive",
+      "svbool_t pg",
+      "svint32_t op"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.S|Ztied.S"
+      },
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQNEG"
+      ],
+      [
+        "MOVPRFX",
+        "SQNEG"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqneg[_s32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S|Ztied.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQNEG"
+      ],
+      [
+        "MOVPRFX",
+        "SQNEG"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqneg[_s32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SQNEG"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqneg[_s64]_m",
+    "arguments": [
+      "svint64_t inactive",
+      "svbool_t pg",
+      "svint64_t op"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.D|Ztied.D"
+      },
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQNEG"
+      ],
+      [
+        "MOVPRFX",
+        "SQNEG"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqneg[_s64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D|Ztied.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQNEG"
+      ],
+      [
+        "MOVPRFX",
+        "SQNEG"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqneg[_s64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SQNEG"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqneg[_s8]_m",
+    "arguments": [
+      "svint8_t inactive",
+      "svbool_t pg",
+      "svint8_t op"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.B|Ztied.B"
+      },
+      "op": {
+        "register": "Zop.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQNEG"
+      ],
+      [
+        "MOVPRFX",
+        "SQNEG"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqneg[_s8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.B|Ztied.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQNEG"
+      ],
+      [
+        "MOVPRFX",
+        "SQNEG"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqneg[_s8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SQNEG"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrdcmlah[_s16]",
+    "arguments": [
+      "svint16_t op1",
+      "svint16_t op2",
+      "svint16_t op3",
+      "uint64_t imm_rotation"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQRDCMLAH"
+      ],
+      [
+        "MOVPRFX",
+        "SQRDCMLAH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrdcmlah[_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "svint32_t op2",
+      "svint32_t op3",
+      "uint64_t imm_rotation"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQRDCMLAH"
+      ],
+      [
+        "MOVPRFX",
+        "SQRDCMLAH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrdcmlah[_s64]",
+    "arguments": [
+      "svint64_t op1",
+      "svint64_t op2",
+      "svint64_t op3",
+      "uint64_t imm_rotation"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQRDCMLAH"
+      ],
+      [
+        "MOVPRFX",
+        "SQRDCMLAH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrdcmlah[_s8]",
+    "arguments": [
+      "svint8_t op1",
+      "svint8_t op2",
+      "svint8_t op3",
+      "uint64_t imm_rotation"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQRDCMLAH"
+      ],
+      [
+        "MOVPRFX",
+        "SQRDCMLAH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrdcmlah_lane[_s16]",
+    "arguments": [
+      "svint16_t op1",
+      "svint16_t op2",
+      "svint16_t op3",
+      "uint64_t imm_index",
+      "uint64_t imm_rotation"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "imm_index": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQRDCMLAH"
+      ],
+      [
+        "MOVPRFX",
+        "SQRDCMLAH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrdcmlah_lane[_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "svint32_t op2",
+      "svint32_t op3",
+      "uint64_t imm_index",
+      "uint64_t imm_rotation"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "imm_index": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQRDCMLAH"
+      ],
+      [
+        "MOVPRFX",
+        "SQRDCMLAH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrdmlah[_n_s16]",
+    "arguments": [
+      "svint16_t op1",
+      "svint16_t op2",
+      "int16_t op3"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQRDMLAH"
+      ],
+      [
+        "MOVPRFX",
+        "SQRDMLAH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrdmlah[_n_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "svint32_t op2",
+      "int32_t op3"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQRDMLAH"
+      ],
+      [
+        "MOVPRFX",
+        "SQRDMLAH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrdmlah[_n_s64]",
+    "arguments": [
+      "svint64_t op1",
+      "svint64_t op2",
+      "int64_t op3"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQRDMLAH"
+      ],
+      [
+        "MOVPRFX",
+        "SQRDMLAH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrdmlah[_n_s8]",
+    "arguments": [
+      "svint8_t op1",
+      "svint8_t op2",
+      "int8_t op3"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQRDMLAH"
+      ],
+      [
+        "MOVPRFX",
+        "SQRDMLAH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrdmlah[_s16]",
+    "arguments": [
+      "svint16_t op1",
+      "svint16_t op2",
+      "svint16_t op3"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQRDMLAH"
+      ],
+      [
+        "MOVPRFX",
+        "SQRDMLAH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrdmlah[_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "svint32_t op2",
+      "svint32_t op3"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQRDMLAH"
+      ],
+      [
+        "MOVPRFX",
+        "SQRDMLAH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrdmlah[_s64]",
+    "arguments": [
+      "svint64_t op1",
+      "svint64_t op2",
+      "svint64_t op3"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQRDMLAH"
+      ],
+      [
+        "MOVPRFX",
+        "SQRDMLAH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrdmlah[_s8]",
+    "arguments": [
+      "svint8_t op1",
+      "svint8_t op2",
+      "svint8_t op3"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQRDMLAH"
+      ],
+      [
+        "MOVPRFX",
+        "SQRDMLAH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrdmlah_lane[_s16]",
+    "arguments": [
+      "svint16_t op1",
+      "svint16_t op2",
+      "svint16_t op3",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "imm_index": {
+        "minimum": 0,
+        "maximum": 7
+      },
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQRDMLAH"
+      ],
+      [
+        "MOVPRFX",
+        "SQRDMLAH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrdmlah_lane[_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "svint32_t op2",
+      "svint32_t op3",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "imm_index": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQRDMLAH"
+      ],
+      [
+        "MOVPRFX",
+        "SQRDMLAH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrdmlah_lane[_s64]",
+    "arguments": [
+      "svint64_t op1",
+      "svint64_t op2",
+      "svint64_t op3",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "imm_index": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQRDMLAH"
+      ],
+      [
+        "MOVPRFX",
+        "SQRDMLAH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrdmlsh[_n_s16]",
+    "arguments": [
+      "svint16_t op1",
+      "svint16_t op2",
+      "int16_t op3"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQRDMLSH"
+      ],
+      [
+        "MOVPRFX",
+        "SQRDMLSH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrdmlsh[_n_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "svint32_t op2",
+      "int32_t op3"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQRDMLSH"
+      ],
+      [
+        "MOVPRFX",
+        "SQRDMLSH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrdmlsh[_n_s64]",
+    "arguments": [
+      "svint64_t op1",
+      "svint64_t op2",
+      "int64_t op3"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQRDMLSH"
+      ],
+      [
+        "MOVPRFX",
+        "SQRDMLSH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrdmlsh[_n_s8]",
+    "arguments": [
+      "svint8_t op1",
+      "svint8_t op2",
+      "int8_t op3"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQRDMLSH"
+      ],
+      [
+        "MOVPRFX",
+        "SQRDMLSH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrdmlsh[_s16]",
+    "arguments": [
+      "svint16_t op1",
+      "svint16_t op2",
+      "svint16_t op3"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQRDMLSH"
+      ],
+      [
+        "MOVPRFX",
+        "SQRDMLSH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrdmlsh[_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "svint32_t op2",
+      "svint32_t op3"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQRDMLSH"
+      ],
+      [
+        "MOVPRFX",
+        "SQRDMLSH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrdmlsh[_s64]",
+    "arguments": [
+      "svint64_t op1",
+      "svint64_t op2",
+      "svint64_t op3"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQRDMLSH"
+      ],
+      [
+        "MOVPRFX",
+        "SQRDMLSH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrdmlsh[_s8]",
+    "arguments": [
+      "svint8_t op1",
+      "svint8_t op2",
+      "svint8_t op3"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQRDMLSH"
+      ],
+      [
+        "MOVPRFX",
+        "SQRDMLSH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrdmlsh_lane[_s16]",
+    "arguments": [
+      "svint16_t op1",
+      "svint16_t op2",
+      "svint16_t op3",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "imm_index": {
+        "minimum": 0,
+        "maximum": 7
+      },
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "op3": {
+        "register": "Zop3.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQRDMLSH"
+      ],
+      [
+        "MOVPRFX",
+        "SQRDMLSH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrdmlsh_lane[_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "svint32_t op2",
+      "svint32_t op3",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "imm_index": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQRDMLSH"
+      ],
+      [
+        "MOVPRFX",
+        "SQRDMLSH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrdmlsh_lane[_s64]",
+    "arguments": [
+      "svint64_t op1",
+      "svint64_t op2",
+      "svint64_t op3",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "imm_index": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQRDMLSH"
+      ],
+      [
+        "MOVPRFX",
+        "SQRDMLSH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrdmulh[_n_s16]",
+    "arguments": [
+      "svint16_t op1",
+      "int16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQRDMULH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrdmulh[_n_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQRDMULH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrdmulh[_n_s64]",
+    "arguments": [
+      "svint64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQRDMULH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrdmulh[_n_s8]",
+    "arguments": [
+      "svint8_t op1",
+      "int8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQRDMULH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrdmulh[_s16]",
+    "arguments": [
+      "svint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQRDMULH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrdmulh[_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQRDMULH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrdmulh[_s64]",
+    "arguments": [
+      "svint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQRDMULH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrdmulh[_s8]",
+    "arguments": [
+      "svint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQRDMULH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrdmulh_lane[_s16]",
+    "arguments": [
+      "svint16_t op1",
+      "svint16_t op2",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "imm_index": {
+        "minimum": 0,
+        "maximum": 7
+      },
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQRDMULH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrdmulh_lane[_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "svint32_t op2",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "imm_index": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQRDMULH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrdmulh_lane[_s64]",
+    "arguments": [
+      "svint64_t op1",
+      "svint64_t op2",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "imm_index": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQRDMULH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrshl[_n_s16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "int16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQSHL"
+      ],
+      [
+        "SRSHR"
+      ],
+      [
+        "SQRSHL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrshl[_n_s16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "int16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]|Ztied2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQSHL"
+      ],
+      [
+        "SRSHR"
+      ],
+      [
+        "SQRSHL"
+      ],
+      [
+        "SQRSHLR"
+      ],
+      [
+        "MOVPRFX",
+        "SQRSHL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrshl[_n_s16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "int16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SQSHL"
+      ],
+      [
+        "MOVPRFX",
+        "SRSHR"
+      ],
+      [
+        "MOVPRFX",
+        "SQRSHL"
+      ],
+      [
+        "MOVPRFX",
+        "SQRSHLR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrshl[_n_s32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQSHL"
+      ],
+      [
+        "SRSHR"
+      ],
+      [
+        "SQRSHL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrshl[_n_s32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]|Ztied2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQSHL"
+      ],
+      [
+        "SRSHR"
+      ],
+      [
+        "SQRSHL"
+      ],
+      [
+        "SQRSHLR"
+      ],
+      [
+        "MOVPRFX",
+        "SQRSHL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrshl[_n_s32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SQSHL"
+      ],
+      [
+        "MOVPRFX",
+        "SRSHR"
+      ],
+      [
+        "MOVPRFX",
+        "SQRSHL"
+      ],
+      [
+        "MOVPRFX",
+        "SQRSHLR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrshl[_n_s64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQSHL"
+      ],
+      [
+        "SRSHR"
+      ],
+      [
+        "SQRSHL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrshl[_n_s64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]|Ztied2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQSHL"
+      ],
+      [
+        "SRSHR"
+      ],
+      [
+        "SQRSHL"
+      ],
+      [
+        "SQRSHLR"
+      ],
+      [
+        "MOVPRFX",
+        "SQRSHL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrshl[_n_s64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SQSHL"
+      ],
+      [
+        "MOVPRFX",
+        "SRSHR"
+      ],
+      [
+        "MOVPRFX",
+        "SQRSHL"
+      ],
+      [
+        "MOVPRFX",
+        "SQRSHLR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrshl[_n_s8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "int8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQSHL"
+      ],
+      [
+        "SRSHR"
+      ],
+      [
+        "SQRSHL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrshl[_n_s8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "int8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]|Ztied2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQSHL"
+      ],
+      [
+        "SRSHR"
+      ],
+      [
+        "SQRSHL"
+      ],
+      [
+        "SQRSHLR"
+      ],
+      [
+        "MOVPRFX",
+        "SQRSHL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrshl[_n_s8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "int8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SQSHL"
+      ],
+      [
+        "MOVPRFX",
+        "SRSHR"
+      ],
+      [
+        "MOVPRFX",
+        "SQRSHL"
+      ],
+      [
+        "MOVPRFX",
+        "SQRSHLR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrshl[_n_u16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "int16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQSHL"
+      ],
+      [
+        "URSHR"
+      ],
+      [
+        "UQRSHL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrshl[_n_u16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "int16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]|Ztied2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQSHL"
+      ],
+      [
+        "URSHR"
+      ],
+      [
+        "UQRSHL"
+      ],
+      [
+        "UQRSHLR"
+      ],
+      [
+        "MOVPRFX",
+        "UQRSHL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrshl[_n_u16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "int16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UQSHL"
+      ],
+      [
+        "MOVPRFX",
+        "URSHR"
+      ],
+      [
+        "MOVPRFX",
+        "UQRSHL"
+      ],
+      [
+        "MOVPRFX",
+        "UQRSHLR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrshl[_n_u32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQSHL"
+      ],
+      [
+        "URSHR"
+      ],
+      [
+        "UQRSHL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrshl[_n_u32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]|Ztied2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQSHL"
+      ],
+      [
+        "URSHR"
+      ],
+      [
+        "UQRSHL"
+      ],
+      [
+        "UQRSHLR"
+      ],
+      [
+        "MOVPRFX",
+        "UQRSHL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrshl[_n_u32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UQSHL"
+      ],
+      [
+        "MOVPRFX",
+        "URSHR"
+      ],
+      [
+        "MOVPRFX",
+        "UQRSHL"
+      ],
+      [
+        "MOVPRFX",
+        "UQRSHLR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrshl[_n_u64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQSHL"
+      ],
+      [
+        "URSHR"
+      ],
+      [
+        "UQRSHL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrshl[_n_u64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]|Ztied2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQSHL"
+      ],
+      [
+        "URSHR"
+      ],
+      [
+        "UQRSHL"
+      ],
+      [
+        "UQRSHLR"
+      ],
+      [
+        "MOVPRFX",
+        "UQRSHL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrshl[_n_u64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UQSHL"
+      ],
+      [
+        "MOVPRFX",
+        "URSHR"
+      ],
+      [
+        "MOVPRFX",
+        "UQRSHL"
+      ],
+      [
+        "MOVPRFX",
+        "UQRSHLR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrshl[_n_u8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "int8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQSHL"
+      ],
+      [
+        "URSHR"
+      ],
+      [
+        "UQRSHL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrshl[_n_u8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "int8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]|Ztied2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQSHL"
+      ],
+      [
+        "URSHR"
+      ],
+      [
+        "UQRSHL"
+      ],
+      [
+        "UQRSHLR"
+      ],
+      [
+        "MOVPRFX",
+        "UQRSHL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrshl[_n_u8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "int8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UQSHL"
+      ],
+      [
+        "MOVPRFX",
+        "URSHR"
+      ],
+      [
+        "MOVPRFX",
+        "UQRSHL"
+      ],
+      [
+        "MOVPRFX",
+        "UQRSHLR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrshl[_s16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQRSHL"
+      ],
+      [
+        "MOVPRFX",
+        "SQRSHL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrshl[_s16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H|Ztied2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQRSHL"
+      ],
+      [
+        "SQRSHLR"
+      ],
+      [
+        "MOVPRFX",
+        "SQRSHL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrshl[_s16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SQRSHL"
+      ],
+      [
+        "MOVPRFX",
+        "SQRSHLR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrshl[_s32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQRSHL"
+      ],
+      [
+        "MOVPRFX",
+        "SQRSHL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrshl[_s32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQRSHL"
+      ],
+      [
+        "SQRSHLR"
+      ],
+      [
+        "MOVPRFX",
+        "SQRSHL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrshl[_s32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SQRSHL"
+      ],
+      [
+        "MOVPRFX",
+        "SQRSHLR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrshl[_s64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQRSHL"
+      ],
+      [
+        "MOVPRFX",
+        "SQRSHL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrshl[_s64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQRSHL"
+      ],
+      [
+        "SQRSHLR"
+      ],
+      [
+        "MOVPRFX",
+        "SQRSHL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrshl[_s64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SQRSHL"
+      ],
+      [
+        "MOVPRFX",
+        "SQRSHLR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrshl[_s8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQRSHL"
+      ],
+      [
+        "MOVPRFX",
+        "SQRSHL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrshl[_s8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B|Ztied2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQRSHL"
+      ],
+      [
+        "SQRSHLR"
+      ],
+      [
+        "MOVPRFX",
+        "SQRSHL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrshl[_s8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SQRSHL"
+      ],
+      [
+        "MOVPRFX",
+        "SQRSHLR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrshl[_u16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQRSHL"
+      ],
+      [
+        "MOVPRFX",
+        "UQRSHL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrshl[_u16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H|Ztied2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQRSHL"
+      ],
+      [
+        "UQRSHLR"
+      ],
+      [
+        "MOVPRFX",
+        "UQRSHL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrshl[_u16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UQRSHL"
+      ],
+      [
+        "MOVPRFX",
+        "UQRSHLR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrshl[_u32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQRSHL"
+      ],
+      [
+        "MOVPRFX",
+        "UQRSHL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrshl[_u32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQRSHL"
+      ],
+      [
+        "UQRSHLR"
+      ],
+      [
+        "MOVPRFX",
+        "UQRSHL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrshl[_u32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UQRSHL"
+      ],
+      [
+        "MOVPRFX",
+        "UQRSHLR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrshl[_u64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQRSHL"
+      ],
+      [
+        "MOVPRFX",
+        "UQRSHL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrshl[_u64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQRSHL"
+      ],
+      [
+        "UQRSHLR"
+      ],
+      [
+        "MOVPRFX",
+        "UQRSHL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrshl[_u64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UQRSHL"
+      ],
+      [
+        "MOVPRFX",
+        "UQRSHLR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrshl[_u8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQRSHL"
+      ],
+      [
+        "MOVPRFX",
+        "UQRSHL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrshl[_u8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B|Ztied2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQRSHL"
+      ],
+      [
+        "UQRSHLR"
+      ],
+      [
+        "MOVPRFX",
+        "UQRSHL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrshl[_u8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UQRSHL"
+      ],
+      [
+        "MOVPRFX",
+        "UQRSHLR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrshrn[_n]_s16[_s32_x2]",
+    "arguments": [
+      "svint32x2_t zn",
+      "uint64_t imm"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "imm": {
+        "immediate": "imm1"
+      },
+      "zn": {
+        "Z multi-vector": "{ Zreg2.S, Zreg3.S }"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQRSHRN"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrshrn[_n]_u16[_u32_x2]",
+    "arguments": [
+      "svuint32x2_t zn",
+      "uint64_t imm"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "imm": {
+        "immediate": "imm1"
+      },
+      "zn": {
+        "Z multi-vector": "{ Zreg2.S, Zreg3.S }"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQRSHRN"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrshrnb[_n_s16]",
+    "arguments": [
+      "svint16_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "imm2": {
+        "minimum": 1,
+        "maximum": 8
+      },
+      "op1": {
+        "register": "Zop1.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQRSHRNB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrshrnb[_n_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "imm2": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op1": {
+        "register": "Zop1.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQRSHRNB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrshrnb[_n_s64]",
+    "arguments": [
+      "svint64_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "imm2": {
+        "minimum": 1,
+        "maximum": 32
+      },
+      "op1": {
+        "register": "Zop1.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQRSHRNB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrshrnb[_n_u16]",
+    "arguments": [
+      "svuint16_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "imm2": {
+        "minimum": 1,
+        "maximum": 8
+      },
+      "op1": {
+        "register": "Zop1.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQRSHRNB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrshrnb[_n_u32]",
+    "arguments": [
+      "svuint32_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "imm2": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op1": {
+        "register": "Zop1.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQRSHRNB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrshrnb[_n_u64]",
+    "arguments": [
+      "svuint64_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "imm2": {
+        "minimum": 1,
+        "maximum": 32
+      },
+      "op1": {
+        "register": "Zop1.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQRSHRNB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrshrnt[_n_s16]",
+    "arguments": [
+      "svint8_t even",
+      "svint16_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "even": {
+        "register": "Ztied.B"
+      },
+      "imm2": {
+        "minimum": 1,
+        "maximum": 8
+      },
+      "op1": {
+        "register": "Zop1.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQRSHRNT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrshrnt[_n_s32]",
+    "arguments": [
+      "svint16_t even",
+      "svint32_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "even": {
+        "register": "Ztied.H"
+      },
+      "imm2": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op1": {
+        "register": "Zop1.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQRSHRNT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrshrnt[_n_s64]",
+    "arguments": [
+      "svint32_t even",
+      "svint64_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "even": {
+        "register": "Ztied.S"
+      },
+      "imm2": {
+        "minimum": 1,
+        "maximum": 32
+      },
+      "op1": {
+        "register": "Zop1.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQRSHRNT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrshrnt[_n_u16]",
+    "arguments": [
+      "svuint8_t even",
+      "svuint16_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "even": {
+        "register": "Ztied.B"
+      },
+      "imm2": {
+        "minimum": 1,
+        "maximum": 8
+      },
+      "op1": {
+        "register": "Zop1.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQRSHRNT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrshrnt[_n_u32]",
+    "arguments": [
+      "svuint16_t even",
+      "svuint32_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "even": {
+        "register": "Ztied.H"
+      },
+      "imm2": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op1": {
+        "register": "Zop1.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQRSHRNT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrshrnt[_n_u64]",
+    "arguments": [
+      "svuint32_t even",
+      "svuint64_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "even": {
+        "register": "Ztied.S"
+      },
+      "imm2": {
+        "minimum": 1,
+        "maximum": 32
+      },
+      "op1": {
+        "register": "Zop1.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQRSHRNT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrshrun[_n]_u16[_s32_x2]",
+    "arguments": [
+      "svint32x2_t zn",
+      "uint64_t imm"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "imm": {
+        "immediate": "imm1"
+      },
+      "zn": {
+        "Z multi-vector": "{ Zreg2.S, Zreg3.S }"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQRSHRUN"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrshrunb[_n_s16]",
+    "arguments": [
+      "svint16_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "imm2": {
+        "minimum": 1,
+        "maximum": 8
+      },
+      "op1": {
+        "register": "Zop1.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQRSHRUNB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrshrunb[_n_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "imm2": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op1": {
+        "register": "Zop1.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQRSHRUNB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrshrunb[_n_s64]",
+    "arguments": [
+      "svint64_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "imm2": {
+        "minimum": 1,
+        "maximum": 32
+      },
+      "op1": {
+        "register": "Zop1.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQRSHRUNB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrshrunt[_n_s16]",
+    "arguments": [
+      "svuint8_t even",
+      "svint16_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "even": {
+        "register": "Ztied.B"
+      },
+      "imm2": {
+        "minimum": 1,
+        "maximum": 8
+      },
+      "op1": {
+        "register": "Zop1.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQRSHRUNT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrshrunt[_n_s32]",
+    "arguments": [
+      "svuint16_t even",
+      "svint32_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "even": {
+        "register": "Ztied.H"
+      },
+      "imm2": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op1": {
+        "register": "Zop1.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQRSHRUNT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqrshrunt[_n_s64]",
+    "arguments": [
+      "svuint32_t even",
+      "svint64_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "even": {
+        "register": "Ztied.S"
+      },
+      "imm2": {
+        "minimum": 1,
+        "maximum": 32
+      },
+      "op1": {
+        "register": "Zop1.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQRSHRUNT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqshl[_n_s16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "int16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQSHL"
+      ],
+      [
+        "ASR"
+      ],
+      [
+        "SQSHL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqshl[_n_s16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "int16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]|Ztied2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQSHL"
+      ],
+      [
+        "ASR"
+      ],
+      [
+        "SQSHL"
+      ],
+      [
+        "SQSHLR"
+      ],
+      [
+        "MOVPRFX",
+        "SQSHL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqshl[_n_s16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "int16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SQSHL"
+      ],
+      [
+        "MOVPRFX",
+        "ASR"
+      ],
+      [
+        "MOVPRFX",
+        "SQSHL"
+      ],
+      [
+        "MOVPRFX",
+        "SQSHLR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqshl[_n_s32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQSHL"
+      ],
+      [
+        "ASR"
+      ],
+      [
+        "SQSHL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqshl[_n_s32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]|Ztied2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQSHL"
+      ],
+      [
+        "ASR"
+      ],
+      [
+        "SQSHL"
+      ],
+      [
+        "SQSHLR"
+      ],
+      [
+        "MOVPRFX",
+        "SQSHL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqshl[_n_s32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SQSHL"
+      ],
+      [
+        "MOVPRFX",
+        "ASR"
+      ],
+      [
+        "MOVPRFX",
+        "SQSHL"
+      ],
+      [
+        "MOVPRFX",
+        "SQSHLR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqshl[_n_s64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQSHL"
+      ],
+      [
+        "ASR"
+      ],
+      [
+        "SQSHL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqshl[_n_s64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]|Ztied2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQSHL"
+      ],
+      [
+        "ASR"
+      ],
+      [
+        "SQSHL"
+      ],
+      [
+        "SQSHLR"
+      ],
+      [
+        "MOVPRFX",
+        "SQSHL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqshl[_n_s64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SQSHL"
+      ],
+      [
+        "MOVPRFX",
+        "ASR"
+      ],
+      [
+        "MOVPRFX",
+        "SQSHL"
+      ],
+      [
+        "MOVPRFX",
+        "SQSHLR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqshl[_n_s8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "int8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQSHL"
+      ],
+      [
+        "ASR"
+      ],
+      [
+        "SQSHL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqshl[_n_s8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "int8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]|Ztied2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQSHL"
+      ],
+      [
+        "ASR"
+      ],
+      [
+        "SQSHL"
+      ],
+      [
+        "SQSHLR"
+      ],
+      [
+        "MOVPRFX",
+        "SQSHL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqshl[_n_s8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "int8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SQSHL"
+      ],
+      [
+        "MOVPRFX",
+        "ASR"
+      ],
+      [
+        "MOVPRFX",
+        "SQSHL"
+      ],
+      [
+        "MOVPRFX",
+        "SQSHLR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqshl[_n_u16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "int16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQSHL"
+      ],
+      [
+        "LSR"
+      ],
+      [
+        "UQSHL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqshl[_n_u16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "int16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]|Ztied2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQSHL"
+      ],
+      [
+        "LSR"
+      ],
+      [
+        "UQSHL"
+      ],
+      [
+        "UQSHLR"
+      ],
+      [
+        "MOVPRFX",
+        "UQSHL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqshl[_n_u16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "int16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UQSHL"
+      ],
+      [
+        "MOVPRFX",
+        "LSR"
+      ],
+      [
+        "MOVPRFX",
+        "UQSHL"
+      ],
+      [
+        "MOVPRFX",
+        "UQSHLR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqshl[_n_u32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQSHL"
+      ],
+      [
+        "LSR"
+      ],
+      [
+        "UQSHL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqshl[_n_u32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]|Ztied2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQSHL"
+      ],
+      [
+        "LSR"
+      ],
+      [
+        "UQSHL"
+      ],
+      [
+        "UQSHLR"
+      ],
+      [
+        "MOVPRFX",
+        "UQSHL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqshl[_n_u32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UQSHL"
+      ],
+      [
+        "MOVPRFX",
+        "LSR"
+      ],
+      [
+        "MOVPRFX",
+        "UQSHL"
+      ],
+      [
+        "MOVPRFX",
+        "UQSHLR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqshl[_n_u64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQSHL"
+      ],
+      [
+        "LSR"
+      ],
+      [
+        "UQSHL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqshl[_n_u64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]|Ztied2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQSHL"
+      ],
+      [
+        "LSR"
+      ],
+      [
+        "UQSHL"
+      ],
+      [
+        "UQSHLR"
+      ],
+      [
+        "MOVPRFX",
+        "UQSHL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqshl[_n_u64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UQSHL"
+      ],
+      [
+        "MOVPRFX",
+        "LSR"
+      ],
+      [
+        "MOVPRFX",
+        "UQSHL"
+      ],
+      [
+        "MOVPRFX",
+        "UQSHLR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqshl[_n_u8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "int8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQSHL"
+      ],
+      [
+        "LSR"
+      ],
+      [
+        "UQSHL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqshl[_n_u8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "int8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]|Ztied2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQSHL"
+      ],
+      [
+        "LSR"
+      ],
+      [
+        "UQSHL"
+      ],
+      [
+        "UQSHLR"
+      ],
+      [
+        "MOVPRFX",
+        "UQSHL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqshl[_n_u8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "int8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UQSHL"
+      ],
+      [
+        "MOVPRFX",
+        "LSR"
+      ],
+      [
+        "MOVPRFX",
+        "UQSHL"
+      ],
+      [
+        "MOVPRFX",
+        "UQSHLR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqshl[_s16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQSHL"
+      ],
+      [
+        "MOVPRFX",
+        "SQSHL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqshl[_s16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H|Ztied2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQSHL"
+      ],
+      [
+        "SQSHLR"
+      ],
+      [
+        "MOVPRFX",
+        "SQSHL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqshl[_s16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SQSHL"
+      ],
+      [
+        "MOVPRFX",
+        "SQSHLR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqshl[_s32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQSHL"
+      ],
+      [
+        "MOVPRFX",
+        "SQSHL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqshl[_s32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQSHL"
+      ],
+      [
+        "SQSHLR"
+      ],
+      [
+        "MOVPRFX",
+        "SQSHL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqshl[_s32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SQSHL"
+      ],
+      [
+        "MOVPRFX",
+        "SQSHLR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqshl[_s64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQSHL"
+      ],
+      [
+        "MOVPRFX",
+        "SQSHL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqshl[_s64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQSHL"
+      ],
+      [
+        "SQSHLR"
+      ],
+      [
+        "MOVPRFX",
+        "SQSHL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqshl[_s64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SQSHL"
+      ],
+      [
+        "MOVPRFX",
+        "SQSHLR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqshl[_s8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQSHL"
+      ],
+      [
+        "MOVPRFX",
+        "SQSHL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqshl[_s8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B|Ztied2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQSHL"
+      ],
+      [
+        "SQSHLR"
+      ],
+      [
+        "MOVPRFX",
+        "SQSHL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqshl[_s8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SQSHL"
+      ],
+      [
+        "MOVPRFX",
+        "SQSHLR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqshl[_u16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQSHL"
+      ],
+      [
+        "MOVPRFX",
+        "UQSHL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqshl[_u16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H|Ztied2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQSHL"
+      ],
+      [
+        "UQSHLR"
+      ],
+      [
+        "MOVPRFX",
+        "UQSHL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqshl[_u16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UQSHL"
+      ],
+      [
+        "MOVPRFX",
+        "UQSHLR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqshl[_u32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQSHL"
+      ],
+      [
+        "MOVPRFX",
+        "UQSHL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqshl[_u32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQSHL"
+      ],
+      [
+        "UQSHLR"
+      ],
+      [
+        "MOVPRFX",
+        "UQSHL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqshl[_u32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UQSHL"
+      ],
+      [
+        "MOVPRFX",
+        "UQSHLR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqshl[_u64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQSHL"
+      ],
+      [
+        "MOVPRFX",
+        "UQSHL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqshl[_u64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQSHL"
+      ],
+      [
+        "UQSHLR"
+      ],
+      [
+        "MOVPRFX",
+        "UQSHL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqshl[_u64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UQSHL"
+      ],
+      [
+        "MOVPRFX",
+        "UQSHLR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqshl[_u8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQSHL"
+      ],
+      [
+        "MOVPRFX",
+        "UQSHL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqshl[_u8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B|Ztied2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQSHL"
+      ],
+      [
+        "UQSHLR"
+      ],
+      [
+        "MOVPRFX",
+        "UQSHL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqshl[_u8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UQSHL"
+      ],
+      [
+        "MOVPRFX",
+        "UQSHLR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqshlu[_n_s16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "imm2": {
+        "minimum": 0,
+        "maximum": 15
+      },
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQSHLU"
+      ],
+      [
+        "MOVPRFX",
+        "SQSHLU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqshlu[_n_s16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "imm2": {
+        "minimum": 0,
+        "maximum": 15
+      },
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQSHLU"
+      ],
+      [
+        "MOVPRFX",
+        "SQSHLU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqshlu[_n_s16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "imm2": {
+        "minimum": 0,
+        "maximum": 15
+      },
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SQSHLU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqshlu[_n_s32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "imm2": {
+        "minimum": 0,
+        "maximum": 31
+      },
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQSHLU"
+      ],
+      [
+        "MOVPRFX",
+        "SQSHLU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqshlu[_n_s32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "imm2": {
+        "minimum": 0,
+        "maximum": 31
+      },
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQSHLU"
+      ],
+      [
+        "MOVPRFX",
+        "SQSHLU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqshlu[_n_s32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "imm2": {
+        "minimum": 0,
+        "maximum": 31
+      },
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SQSHLU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqshlu[_n_s64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "imm2": {
+        "minimum": 0,
+        "maximum": 63
+      },
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQSHLU"
+      ],
+      [
+        "MOVPRFX",
+        "SQSHLU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqshlu[_n_s64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "imm2": {
+        "minimum": 0,
+        "maximum": 63
+      },
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQSHLU"
+      ],
+      [
+        "MOVPRFX",
+        "SQSHLU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqshlu[_n_s64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "imm2": {
+        "minimum": 0,
+        "maximum": 63
+      },
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SQSHLU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqshlu[_n_s8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "imm2": {
+        "minimum": 0,
+        "maximum": 7
+      },
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQSHLU"
+      ],
+      [
+        "MOVPRFX",
+        "SQSHLU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqshlu[_n_s8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "imm2": {
+        "minimum": 0,
+        "maximum": 7
+      },
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQSHLU"
+      ],
+      [
+        "MOVPRFX",
+        "SQSHLU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqshlu[_n_s8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "imm2": {
+        "minimum": 0,
+        "maximum": 7
+      },
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SQSHLU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqshrnb[_n_s16]",
+    "arguments": [
+      "svint16_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "imm2": {
+        "minimum": 1,
+        "maximum": 8
+      },
+      "op1": {
+        "register": "Zop1.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQSHRNB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqshrnb[_n_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "imm2": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op1": {
+        "register": "Zop1.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQSHRNB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqshrnb[_n_s64]",
+    "arguments": [
+      "svint64_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "imm2": {
+        "minimum": 1,
+        "maximum": 32
+      },
+      "op1": {
+        "register": "Zop1.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQSHRNB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqshrnb[_n_u16]",
+    "arguments": [
+      "svuint16_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "imm2": {
+        "minimum": 1,
+        "maximum": 8
+      },
+      "op1": {
+        "register": "Zop1.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQSHRNB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqshrnb[_n_u32]",
+    "arguments": [
+      "svuint32_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "imm2": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op1": {
+        "register": "Zop1.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQSHRNB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqshrnb[_n_u64]",
+    "arguments": [
+      "svuint64_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "imm2": {
+        "minimum": 1,
+        "maximum": 32
+      },
+      "op1": {
+        "register": "Zop1.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQSHRNB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqshrnt[_n_s16]",
+    "arguments": [
+      "svint8_t even",
+      "svint16_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "even": {
+        "register": "Ztied.B"
+      },
+      "imm2": {
+        "minimum": 1,
+        "maximum": 8
+      },
+      "op1": {
+        "register": "Zop1.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQSHRNT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqshrnt[_n_s32]",
+    "arguments": [
+      "svint16_t even",
+      "svint32_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "even": {
+        "register": "Ztied.H"
+      },
+      "imm2": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op1": {
+        "register": "Zop1.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQSHRNT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqshrnt[_n_s64]",
+    "arguments": [
+      "svint32_t even",
+      "svint64_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "even": {
+        "register": "Ztied.S"
+      },
+      "imm2": {
+        "minimum": 1,
+        "maximum": 32
+      },
+      "op1": {
+        "register": "Zop1.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQSHRNT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqshrnt[_n_u16]",
+    "arguments": [
+      "svuint8_t even",
+      "svuint16_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "even": {
+        "register": "Ztied.B"
+      },
+      "imm2": {
+        "minimum": 1,
+        "maximum": 8
+      },
+      "op1": {
+        "register": "Zop1.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQSHRNT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqshrnt[_n_u32]",
+    "arguments": [
+      "svuint16_t even",
+      "svuint32_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "even": {
+        "register": "Ztied.H"
+      },
+      "imm2": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op1": {
+        "register": "Zop1.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQSHRNT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqshrnt[_n_u64]",
+    "arguments": [
+      "svuint32_t even",
+      "svuint64_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "even": {
+        "register": "Ztied.S"
+      },
+      "imm2": {
+        "minimum": 1,
+        "maximum": 32
+      },
+      "op1": {
+        "register": "Zop1.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQSHRNT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqshrunb[_n_s16]",
+    "arguments": [
+      "svint16_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "imm2": {
+        "minimum": 1,
+        "maximum": 8
+      },
+      "op1": {
+        "register": "Zop1.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQSHRUNB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqshrunb[_n_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "imm2": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op1": {
+        "register": "Zop1.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQSHRUNB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqshrunb[_n_s64]",
+    "arguments": [
+      "svint64_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "imm2": {
+        "minimum": 1,
+        "maximum": 32
+      },
+      "op1": {
+        "register": "Zop1.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQSHRUNB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqshrunt[_n_s16]",
+    "arguments": [
+      "svuint8_t even",
+      "svint16_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "even": {
+        "register": "Ztied.B"
+      },
+      "imm2": {
+        "minimum": 1,
+        "maximum": 8
+      },
+      "op1": {
+        "register": "Zop1.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQSHRUNT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqshrunt[_n_s32]",
+    "arguments": [
+      "svuint16_t even",
+      "svint32_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "even": {
+        "register": "Ztied.H"
+      },
+      "imm2": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op1": {
+        "register": "Zop1.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQSHRUNT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqshrunt[_n_s64]",
+    "arguments": [
+      "svuint32_t even",
+      "svint64_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "even": {
+        "register": "Ztied.S"
+      },
+      "imm2": {
+        "minimum": 1,
+        "maximum": 32
+      },
+      "op1": {
+        "register": "Zop1.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQSHRUNT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqsub[_n_s16]",
+    "arguments": [
+      "svint16_t op1",
+      "int16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQSUB"
+      ],
+      [
+        "SQADD"
+      ],
+      [
+        "SQSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqsub[_n_s16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "int16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQSUB"
+      ],
+      [
+        "MOVPRFX",
+        "SQSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqsub[_n_s16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "int16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]|Ztied2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQSUB"
+      ],
+      [
+        "SQADD"
+      ],
+      [
+        "SQSUB"
+      ],
+      [
+        "SQSUBR"
+      ],
+      [
+        "SQSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqsub[_n_s16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "int16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SQSUB"
+      ],
+      [
+        "MOVPRFX",
+        "SQSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqsub[_n_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQSUB"
+      ],
+      [
+        "SQADD"
+      ],
+      [
+        "SQSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqsub[_n_s32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQSUB"
+      ],
+      [
+        "MOVPRFX",
+        "SQSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqsub[_n_s32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]|Ztied2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQSUB"
+      ],
+      [
+        "SQADD"
+      ],
+      [
+        "SQSUB"
+      ],
+      [
+        "SQSUBR"
+      ],
+      [
+        "SQSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqsub[_n_s32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SQSUB"
+      ],
+      [
+        "MOVPRFX",
+        "SQSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqsub[_n_s64]",
+    "arguments": [
+      "svint64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQSUB"
+      ],
+      [
+        "SQADD"
+      ],
+      [
+        "SQSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqsub[_n_s64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQSUB"
+      ],
+      [
+        "MOVPRFX",
+        "SQSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqsub[_n_s64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]|Ztied2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQSUB"
+      ],
+      [
+        "SQADD"
+      ],
+      [
+        "SQSUB"
+      ],
+      [
+        "SQSUBR"
+      ],
+      [
+        "SQSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqsub[_n_s64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SQSUB"
+      ],
+      [
+        "MOVPRFX",
+        "SQSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqsub[_n_s8]",
+    "arguments": [
+      "svint8_t op1",
+      "int8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQSUB"
+      ],
+      [
+        "SQADD"
+      ],
+      [
+        "SQSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqsub[_n_s8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "int8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQSUB"
+      ],
+      [
+        "MOVPRFX",
+        "SQSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqsub[_n_s8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "int8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]|Ztied2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQSUB"
+      ],
+      [
+        "SQADD"
+      ],
+      [
+        "SQSUB"
+      ],
+      [
+        "SQSUBR"
+      ],
+      [
+        "SQSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqsub[_n_s8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "int8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SQSUB"
+      ],
+      [
+        "MOVPRFX",
+        "SQSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqsub[_n_u16]",
+    "arguments": [
+      "svuint16_t op1",
+      "uint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQSUB"
+      ],
+      [
+        "UQSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqsub[_n_u16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "uint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQSUB"
+      ],
+      [
+        "MOVPRFX",
+        "UQSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqsub[_n_u16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "uint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]|Ztied2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQSUB"
+      ],
+      [
+        "UQSUB"
+      ],
+      [
+        "UQSUBR"
+      ],
+      [
+        "UQSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqsub[_n_u16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "uint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UQSUB"
+      ],
+      [
+        "MOVPRFX",
+        "UQSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqsub[_n_u32]",
+    "arguments": [
+      "svuint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQSUB"
+      ],
+      [
+        "UQSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqsub[_n_u32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQSUB"
+      ],
+      [
+        "MOVPRFX",
+        "UQSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqsub[_n_u32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]|Ztied2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQSUB"
+      ],
+      [
+        "UQSUB"
+      ],
+      [
+        "UQSUBR"
+      ],
+      [
+        "UQSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqsub[_n_u32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UQSUB"
+      ],
+      [
+        "MOVPRFX",
+        "UQSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqsub[_n_u64]",
+    "arguments": [
+      "svuint64_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQSUB"
+      ],
+      [
+        "UQSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqsub[_n_u64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQSUB"
+      ],
+      [
+        "MOVPRFX",
+        "UQSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqsub[_n_u64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]|Ztied2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQSUB"
+      ],
+      [
+        "UQSUB"
+      ],
+      [
+        "UQSUBR"
+      ],
+      [
+        "UQSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqsub[_n_u64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UQSUB"
+      ],
+      [
+        "MOVPRFX",
+        "UQSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqsub[_n_u8]",
+    "arguments": [
+      "svuint8_t op1",
+      "uint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQSUB"
+      ],
+      [
+        "UQSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqsub[_n_u8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "uint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQSUB"
+      ],
+      [
+        "MOVPRFX",
+        "UQSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqsub[_n_u8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "uint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]|Ztied2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQSUB"
+      ],
+      [
+        "UQSUB"
+      ],
+      [
+        "UQSUBR"
+      ],
+      [
+        "UQSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqsub[_n_u8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "uint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UQSUB"
+      ],
+      [
+        "MOVPRFX",
+        "UQSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqsub[_s16]",
+    "arguments": [
+      "svint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqsub[_s16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQSUB"
+      ],
+      [
+        "MOVPRFX",
+        "SQSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqsub[_s16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H|Ztied2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQSUB"
+      ],
+      [
+        "SQSUBR"
+      ],
+      [
+        "SQSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqsub[_s16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SQSUB"
+      ],
+      [
+        "MOVPRFX",
+        "SQSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqsub[_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqsub[_s32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQSUB"
+      ],
+      [
+        "MOVPRFX",
+        "SQSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqsub[_s32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQSUB"
+      ],
+      [
+        "SQSUBR"
+      ],
+      [
+        "SQSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqsub[_s32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SQSUB"
+      ],
+      [
+        "MOVPRFX",
+        "SQSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqsub[_s64]",
+    "arguments": [
+      "svint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqsub[_s64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQSUB"
+      ],
+      [
+        "MOVPRFX",
+        "SQSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqsub[_s64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQSUB"
+      ],
+      [
+        "SQSUBR"
+      ],
+      [
+        "SQSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqsub[_s64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SQSUB"
+      ],
+      [
+        "MOVPRFX",
+        "SQSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqsub[_s8]",
+    "arguments": [
+      "svint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqsub[_s8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQSUB"
+      ],
+      [
+        "MOVPRFX",
+        "SQSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqsub[_s8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B|Ztied2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQSUB"
+      ],
+      [
+        "SQSUBR"
+      ],
+      [
+        "SQSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqsub[_s8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SQSUB"
+      ],
+      [
+        "MOVPRFX",
+        "SQSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqsub[_u16]",
+    "arguments": [
+      "svuint16_t op1",
+      "svuint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqsub[_u16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQSUB"
+      ],
+      [
+        "MOVPRFX",
+        "UQSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqsub[_u16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H|Ztied2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQSUB"
+      ],
+      [
+        "UQSUBR"
+      ],
+      [
+        "UQSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqsub[_u16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UQSUB"
+      ],
+      [
+        "MOVPRFX",
+        "UQSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqsub[_u32]",
+    "arguments": [
+      "svuint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqsub[_u32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQSUB"
+      ],
+      [
+        "MOVPRFX",
+        "UQSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqsub[_u32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQSUB"
+      ],
+      [
+        "UQSUBR"
+      ],
+      [
+        "UQSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqsub[_u32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UQSUB"
+      ],
+      [
+        "MOVPRFX",
+        "UQSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqsub[_u64]",
+    "arguments": [
+      "svuint64_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqsub[_u64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQSUB"
+      ],
+      [
+        "MOVPRFX",
+        "UQSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqsub[_u64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQSUB"
+      ],
+      [
+        "UQSUBR"
+      ],
+      [
+        "UQSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqsub[_u64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UQSUB"
+      ],
+      [
+        "MOVPRFX",
+        "UQSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svqsub[_u8]",
+    "arguments": [
+      "svuint8_t op1",
+      "svuint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqsub[_u8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQSUB"
+      ],
+      [
+        "MOVPRFX",
+        "UQSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqsub[_u8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B|Ztied2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQSUB"
+      ],
+      [
+        "UQSUBR"
+      ],
+      [
+        "UQSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqsub[_u8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UQSUB"
+      ],
+      [
+        "MOVPRFX",
+        "UQSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqsubr[_n_s16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "int16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQSUBR"
+      ],
+      [
+        "MOVPRFX",
+        "SQSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqsubr[_n_s16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "int16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]|Ztied2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQSUBR"
+      ],
+      [
+        "SQSUB"
+      ],
+      [
+        "SQSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqsubr[_n_s16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "int16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SQSUBR"
+      ],
+      [
+        "MOVPRFX",
+        "SQSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqsubr[_n_s32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQSUBR"
+      ],
+      [
+        "MOVPRFX",
+        "SQSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqsubr[_n_s32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]|Ztied2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQSUBR"
+      ],
+      [
+        "SQSUB"
+      ],
+      [
+        "SQSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqsubr[_n_s32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SQSUBR"
+      ],
+      [
+        "MOVPRFX",
+        "SQSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqsubr[_n_s64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQSUBR"
+      ],
+      [
+        "MOVPRFX",
+        "SQSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqsubr[_n_s64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]|Ztied2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQSUBR"
+      ],
+      [
+        "SQSUB"
+      ],
+      [
+        "SQSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqsubr[_n_s64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SQSUBR"
+      ],
+      [
+        "MOVPRFX",
+        "SQSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqsubr[_n_s8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "int8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQSUBR"
+      ],
+      [
+        "MOVPRFX",
+        "SQSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqsubr[_n_s8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "int8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]|Ztied2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQSUBR"
+      ],
+      [
+        "SQSUB"
+      ],
+      [
+        "SQSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqsubr[_n_s8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "int8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SQSUBR"
+      ],
+      [
+        "MOVPRFX",
+        "SQSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqsubr[_n_u16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "uint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQSUBR"
+      ],
+      [
+        "MOVPRFX",
+        "UQSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqsubr[_n_u16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "uint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]|Ztied2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQSUBR"
+      ],
+      [
+        "UQSUB"
+      ],
+      [
+        "UQSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqsubr[_n_u16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "uint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UQSUBR"
+      ],
+      [
+        "MOVPRFX",
+        "UQSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqsubr[_n_u32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQSUBR"
+      ],
+      [
+        "MOVPRFX",
+        "UQSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqsubr[_n_u32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]|Ztied2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQSUBR"
+      ],
+      [
+        "UQSUB"
+      ],
+      [
+        "UQSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqsubr[_n_u32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UQSUBR"
+      ],
+      [
+        "MOVPRFX",
+        "UQSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqsubr[_n_u64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQSUBR"
+      ],
+      [
+        "MOVPRFX",
+        "UQSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqsubr[_n_u64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]|Ztied2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQSUBR"
+      ],
+      [
+        "UQSUB"
+      ],
+      [
+        "UQSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqsubr[_n_u64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UQSUBR"
+      ],
+      [
+        "MOVPRFX",
+        "UQSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqsubr[_n_u8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "uint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQSUBR"
+      ],
+      [
+        "MOVPRFX",
+        "UQSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqsubr[_n_u8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "uint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]|Ztied2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQSUBR"
+      ],
+      [
+        "UQSUB"
+      ],
+      [
+        "UQSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqsubr[_n_u8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "uint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UQSUBR"
+      ],
+      [
+        "MOVPRFX",
+        "UQSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqsubr[_s16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQSUBR"
+      ],
+      [
+        "MOVPRFX",
+        "SQSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqsubr[_s16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H|Ztied2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQSUBR"
+      ],
+      [
+        "SQSUB"
+      ],
+      [
+        "SQSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqsubr[_s16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SQSUBR"
+      ],
+      [
+        "MOVPRFX",
+        "SQSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqsubr[_s32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQSUBR"
+      ],
+      [
+        "MOVPRFX",
+        "SQSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqsubr[_s32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQSUBR"
+      ],
+      [
+        "SQSUB"
+      ],
+      [
+        "SQSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqsubr[_s32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SQSUBR"
+      ],
+      [
+        "MOVPRFX",
+        "SQSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqsubr[_s64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQSUBR"
+      ],
+      [
+        "MOVPRFX",
+        "SQSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqsubr[_s64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQSUBR"
+      ],
+      [
+        "SQSUB"
+      ],
+      [
+        "SQSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqsubr[_s64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SQSUBR"
+      ],
+      [
+        "MOVPRFX",
+        "SQSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqsubr[_s8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQSUBR"
+      ],
+      [
+        "MOVPRFX",
+        "SQSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqsubr[_s8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B|Ztied2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQSUBR"
+      ],
+      [
+        "SQSUB"
+      ],
+      [
+        "SQSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqsubr[_s8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SQSUBR"
+      ],
+      [
+        "MOVPRFX",
+        "SQSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqsubr[_u16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQSUBR"
+      ],
+      [
+        "MOVPRFX",
+        "UQSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqsubr[_u16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H|Ztied2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQSUBR"
+      ],
+      [
+        "UQSUB"
+      ],
+      [
+        "UQSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqsubr[_u16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UQSUBR"
+      ],
+      [
+        "MOVPRFX",
+        "UQSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqsubr[_u32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQSUBR"
+      ],
+      [
+        "MOVPRFX",
+        "UQSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqsubr[_u32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQSUBR"
+      ],
+      [
+        "UQSUB"
+      ],
+      [
+        "UQSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqsubr[_u32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UQSUBR"
+      ],
+      [
+        "MOVPRFX",
+        "UQSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqsubr[_u64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQSUBR"
+      ],
+      [
+        "MOVPRFX",
+        "UQSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqsubr[_u64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQSUBR"
+      ],
+      [
+        "UQSUB"
+      ],
+      [
+        "UQSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqsubr[_u64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UQSUBR"
+      ],
+      [
+        "MOVPRFX",
+        "UQSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqsubr[_u8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQSUBR"
+      ],
+      [
+        "MOVPRFX",
+        "UQSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqsubr[_u8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B|Ztied2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQSUBR"
+      ],
+      [
+        "UQSUB"
+      ],
+      [
+        "UQSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqsubr[_u8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "UQSUBR"
+      ],
+      [
+        "MOVPRFX",
+        "UQSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqxtnb[_s16]",
+    "arguments": [
+      "svint16_t op"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQXTNB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqxtnb[_s32]",
+    "arguments": [
+      "svint32_t op"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQXTNB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqxtnb[_s64]",
+    "arguments": [
+      "svint64_t op"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQXTNB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqxtnb[_u16]",
+    "arguments": [
+      "svuint16_t op"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQXTNB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqxtnb[_u32]",
+    "arguments": [
+      "svuint32_t op"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQXTNB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqxtnb[_u64]",
+    "arguments": [
+      "svuint64_t op"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQXTNB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqxtnt[_s16]",
+    "arguments": [
+      "svint8_t even",
+      "svint16_t op"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "even": {
+        "register": "Ztied.B"
+      },
+      "op": {
+        "register": "Zop.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQXTNT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqxtnt[_s32]",
+    "arguments": [
+      "svint16_t even",
+      "svint32_t op"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "even": {
+        "register": "Ztied.H"
+      },
+      "op": {
+        "register": "Zop.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQXTNT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqxtnt[_s64]",
+    "arguments": [
+      "svint32_t even",
+      "svint64_t op"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "even": {
+        "register": "Ztied.S"
+      },
+      "op": {
+        "register": "Zop.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQXTNT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqxtnt[_u16]",
+    "arguments": [
+      "svuint8_t even",
+      "svuint16_t op"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "even": {
+        "register": "Ztied.B"
+      },
+      "op": {
+        "register": "Zop.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQXTNT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqxtnt[_u32]",
+    "arguments": [
+      "svuint16_t even",
+      "svuint32_t op"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "even": {
+        "register": "Ztied.H"
+      },
+      "op": {
+        "register": "Zop.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQXTNT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqxtnt[_u64]",
+    "arguments": [
+      "svuint32_t even",
+      "svuint64_t op"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "even": {
+        "register": "Ztied.S"
+      },
+      "op": {
+        "register": "Zop.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQXTNT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqxtunb[_s16]",
+    "arguments": [
+      "svint16_t op"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQXTUNB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqxtunb[_s32]",
+    "arguments": [
+      "svint32_t op"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQXTUNB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqxtunb[_s64]",
+    "arguments": [
+      "svint64_t op"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQXTUNB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqxtunt[_s16]",
+    "arguments": [
+      "svuint8_t even",
+      "svint16_t op"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "even": {
+        "register": "Ztied.B"
+      },
+      "op": {
+        "register": "Zop.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQXTUNT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqxtunt[_s32]",
+    "arguments": [
+      "svuint16_t even",
+      "svint32_t op"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "even": {
+        "register": "Ztied.H"
+      },
+      "op": {
+        "register": "Zop.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQXTUNT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svqxtunt[_s64]",
+    "arguments": [
+      "svuint32_t even",
+      "svint64_t op"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "even": {
+        "register": "Ztied.S"
+      },
+      "op": {
+        "register": "Zop.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQXTUNT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svraddhnb[_n_s16]",
+    "arguments": [
+      "svint16_t op1",
+      "int16_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "RADDHNB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svraddhnb[_n_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "RADDHNB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svraddhnb[_n_s64]",
+    "arguments": [
+      "svint64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "RADDHNB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svraddhnb[_n_u16]",
+    "arguments": [
+      "svuint16_t op1",
+      "uint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "RADDHNB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svraddhnb[_n_u32]",
+    "arguments": [
+      "svuint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "RADDHNB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svraddhnb[_n_u64]",
+    "arguments": [
+      "svuint64_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "RADDHNB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svraddhnb[_s16]",
+    "arguments": [
+      "svint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "RADDHNB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svraddhnb[_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "RADDHNB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svraddhnb[_s64]",
+    "arguments": [
+      "svint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "RADDHNB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svraddhnb[_u16]",
+    "arguments": [
+      "svuint16_t op1",
+      "svuint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "RADDHNB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svraddhnb[_u32]",
+    "arguments": [
+      "svuint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "RADDHNB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svraddhnb[_u64]",
+    "arguments": [
+      "svuint64_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "RADDHNB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svraddhnt[_n_s16]",
+    "arguments": [
+      "svint8_t even",
+      "svint16_t op1",
+      "int16_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "even": {
+        "register": "Ztied.B"
+      },
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "RADDHNT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svraddhnt[_n_s32]",
+    "arguments": [
+      "svint16_t even",
+      "svint32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "even": {
+        "register": "Ztied.H"
+      },
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "RADDHNT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svraddhnt[_n_s64]",
+    "arguments": [
+      "svint32_t even",
+      "svint64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "even": {
+        "register": "Ztied.S"
+      },
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "RADDHNT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svraddhnt[_n_u16]",
+    "arguments": [
+      "svuint8_t even",
+      "svuint16_t op1",
+      "uint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "even": {
+        "register": "Ztied.B"
+      },
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "RADDHNT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svraddhnt[_n_u32]",
+    "arguments": [
+      "svuint16_t even",
+      "svuint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "even": {
+        "register": "Ztied.H"
+      },
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "RADDHNT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svraddhnt[_n_u64]",
+    "arguments": [
+      "svuint32_t even",
+      "svuint64_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "even": {
+        "register": "Ztied.S"
+      },
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "RADDHNT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svraddhnt[_s16]",
+    "arguments": [
+      "svint8_t even",
+      "svint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "even": {
+        "register": "Ztied.B"
+      },
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "RADDHNT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svraddhnt[_s32]",
+    "arguments": [
+      "svint16_t even",
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "even": {
+        "register": "Ztied.H"
+      },
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "RADDHNT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svraddhnt[_s64]",
+    "arguments": [
+      "svint32_t even",
+      "svint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "even": {
+        "register": "Ztied.S"
+      },
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "RADDHNT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svraddhnt[_u16]",
+    "arguments": [
+      "svuint8_t even",
+      "svuint16_t op1",
+      "svuint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "even": {
+        "register": "Ztied.B"
+      },
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "RADDHNT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svraddhnt[_u32]",
+    "arguments": [
+      "svuint16_t even",
+      "svuint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "even": {
+        "register": "Ztied.H"
+      },
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "RADDHNT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svraddhnt[_u64]",
+    "arguments": [
+      "svuint32_t even",
+      "svuint64_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "even": {
+        "register": "Ztied.S"
+      },
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "RADDHNT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrax1[_s64]",
+    "arguments": [
+      "svint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "RAX1"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrax1[_u64]",
+    "arguments": [
+      "svuint64_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "RAX1"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrbit[_s16]_m",
+    "arguments": [
+      "svint16_t inactive",
+      "svbool_t pg",
+      "svint16_t op"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.H|Ztied.H"
+      },
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "RBIT"
+      ],
+      [
+        "MOVPRFX",
+        "RBIT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrbit[_s16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H|Ztied.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "RBIT"
+      ],
+      [
+        "MOVPRFX",
+        "RBIT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrbit[_s16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "RBIT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrbit[_s32]_m",
+    "arguments": [
+      "svint32_t inactive",
+      "svbool_t pg",
+      "svint32_t op"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.S|Ztied.S"
+      },
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "RBIT"
+      ],
+      [
+        "MOVPRFX",
+        "RBIT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrbit[_s32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S|Ztied.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "RBIT"
+      ],
+      [
+        "MOVPRFX",
+        "RBIT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrbit[_s32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "RBIT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrbit[_s64]_m",
+    "arguments": [
+      "svint64_t inactive",
+      "svbool_t pg",
+      "svint64_t op"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.D|Ztied.D"
+      },
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "RBIT"
+      ],
+      [
+        "MOVPRFX",
+        "RBIT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrbit[_s64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D|Ztied.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "RBIT"
+      ],
+      [
+        "MOVPRFX",
+        "RBIT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrbit[_s64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "RBIT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrbit[_s8]_m",
+    "arguments": [
+      "svint8_t inactive",
+      "svbool_t pg",
+      "svint8_t op"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.B|Ztied.B"
+      },
+      "op": {
+        "register": "Zop.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "RBIT"
+      ],
+      [
+        "MOVPRFX",
+        "RBIT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrbit[_s8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.B|Ztied.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "RBIT"
+      ],
+      [
+        "MOVPRFX",
+        "RBIT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrbit[_s8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "RBIT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrbit[_u16]_m",
+    "arguments": [
+      "svuint16_t inactive",
+      "svbool_t pg",
+      "svuint16_t op"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.H|Ztied.H"
+      },
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "RBIT"
+      ],
+      [
+        "MOVPRFX",
+        "RBIT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrbit[_u16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H|Ztied.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "RBIT"
+      ],
+      [
+        "MOVPRFX",
+        "RBIT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrbit[_u16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "RBIT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrbit[_u32]_m",
+    "arguments": [
+      "svuint32_t inactive",
+      "svbool_t pg",
+      "svuint32_t op"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.S|Ztied.S"
+      },
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "RBIT"
+      ],
+      [
+        "MOVPRFX",
+        "RBIT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrbit[_u32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S|Ztied.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "RBIT"
+      ],
+      [
+        "MOVPRFX",
+        "RBIT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrbit[_u32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "RBIT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrbit[_u64]_m",
+    "arguments": [
+      "svuint64_t inactive",
+      "svbool_t pg",
+      "svuint64_t op"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.D|Ztied.D"
+      },
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "RBIT"
+      ],
+      [
+        "MOVPRFX",
+        "RBIT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrbit[_u64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D|Ztied.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "RBIT"
+      ],
+      [
+        "MOVPRFX",
+        "RBIT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrbit[_u64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "RBIT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrbit[_u8]_m",
+    "arguments": [
+      "svuint8_t inactive",
+      "svbool_t pg",
+      "svuint8_t op"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.B|Ztied.B"
+      },
+      "op": {
+        "register": "Zop.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "RBIT"
+      ],
+      [
+        "MOVPRFX",
+        "RBIT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrbit[_u8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.B|Ztied.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "RBIT"
+      ],
+      [
+        "MOVPRFX",
+        "RBIT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrbit[_u8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "RBIT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrdffr",
+    "arguments": [],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {},
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "RDFFR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrdffr_z",
+    "arguments": [
+      "svbool_t pg"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "RDFFR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrecpe[_f16]",
+    "arguments": [
+      "svfloat16_t op"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FRECPE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrecpe[_f32]",
+    "arguments": [
+      "svfloat32_t op"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FRECPE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrecpe[_f64]",
+    "arguments": [
+      "svfloat64_t op"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FRECPE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrecpe[_u32]_m",
+    "arguments": [
+      "svuint32_t inactive",
+      "svbool_t pg",
+      "svuint32_t op"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.S|Ztied.S"
+      },
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "URECPE"
+      ],
+      [
+        "MOVPRFX",
+        "URECPE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrecpe[_u32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S|Ztied.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "URECPE"
+      ],
+      [
+        "MOVPRFX",
+        "URECPE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrecpe[_u32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "URECPE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrecps[_f16]",
+    "arguments": [
+      "svfloat16_t op1",
+      "svfloat16_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FRECPS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrecps[_f32]",
+    "arguments": [
+      "svfloat32_t op1",
+      "svfloat32_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FRECPS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrecps[_f64]",
+    "arguments": [
+      "svfloat64_t op1",
+      "svfloat64_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FRECPS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrecpx[_f16]_m",
+    "arguments": [
+      "svfloat16_t inactive",
+      "svbool_t pg",
+      "svfloat16_t op"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.H|Ztied.H"
+      },
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FRECPX"
+      ],
+      [
+        "MOVPRFX",
+        "FRECPX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrecpx[_f16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H|Ztied.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FRECPX"
+      ],
+      [
+        "MOVPRFX",
+        "FRECPX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrecpx[_f16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FRECPX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrecpx[_f32]_m",
+    "arguments": [
+      "svfloat32_t inactive",
+      "svbool_t pg",
+      "svfloat32_t op"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.S|Ztied.S"
+      },
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FRECPX"
+      ],
+      [
+        "MOVPRFX",
+        "FRECPX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrecpx[_f32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S|Ztied.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FRECPX"
+      ],
+      [
+        "MOVPRFX",
+        "FRECPX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrecpx[_f32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FRECPX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrecpx[_f64]_m",
+    "arguments": [
+      "svfloat64_t inactive",
+      "svbool_t pg",
+      "svfloat64_t op"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.D|Ztied.D"
+      },
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FRECPX"
+      ],
+      [
+        "MOVPRFX",
+        "FRECPX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrecpx[_f64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D|Ztied.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FRECPX"
+      ],
+      [
+        "MOVPRFX",
+        "FRECPX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrecpx[_f64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FRECPX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_f16[_f16]",
+    "arguments": [
+      "svfloat16_t op"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_f16[_f32]",
+    "arguments": [
+      "svfloat32_t op"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_f16[_f64]",
+    "arguments": [
+      "svfloat64_t op"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_f16[_s16]",
+    "arguments": [
+      "svint16_t op"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_f16[_s32]",
+    "arguments": [
+      "svint32_t op"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_f16[_s64]",
+    "arguments": [
+      "svint64_t op"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_f16[_s8]",
+    "arguments": [
+      "svint8_t op"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_f16[_u16]",
+    "arguments": [
+      "svuint16_t op"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_f16[_u32]",
+    "arguments": [
+      "svuint32_t op"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_f16[_u64]",
+    "arguments": [
+      "svuint64_t op"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_f16[_u8]",
+    "arguments": [
+      "svuint8_t op"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_f32[_f16]",
+    "arguments": [
+      "svfloat16_t op"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_f32[_f32]",
+    "arguments": [
+      "svfloat32_t op"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_f32[_f64]",
+    "arguments": [
+      "svfloat64_t op"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_f32[_s16]",
+    "arguments": [
+      "svint16_t op"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_f32[_s32]",
+    "arguments": [
+      "svint32_t op"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_f32[_s64]",
+    "arguments": [
+      "svint64_t op"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_f32[_s8]",
+    "arguments": [
+      "svint8_t op"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_f32[_u16]",
+    "arguments": [
+      "svuint16_t op"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_f32[_u32]",
+    "arguments": [
+      "svuint32_t op"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_f32[_u64]",
+    "arguments": [
+      "svuint64_t op"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_f32[_u8]",
+    "arguments": [
+      "svuint8_t op"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_f64[_f16]",
+    "arguments": [
+      "svfloat16_t op"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_f64[_f32]",
+    "arguments": [
+      "svfloat32_t op"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_f64[_f64]",
+    "arguments": [
+      "svfloat64_t op"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_f64[_s16]",
+    "arguments": [
+      "svint16_t op"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_f64[_s32]",
+    "arguments": [
+      "svint32_t op"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_f64[_s64]",
+    "arguments": [
+      "svint64_t op"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_f64[_s8]",
+    "arguments": [
+      "svint8_t op"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_f64[_u16]",
+    "arguments": [
+      "svuint16_t op"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_f64[_u32]",
+    "arguments": [
+      "svuint32_t op"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_f64[_u64]",
+    "arguments": [
+      "svuint64_t op"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_f64[_u8]",
+    "arguments": [
+      "svuint8_t op"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_s16[_f16]",
+    "arguments": [
+      "svfloat16_t op"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_s16[_f32]",
+    "arguments": [
+      "svfloat32_t op"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_s16[_f64]",
+    "arguments": [
+      "svfloat64_t op"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_s16[_s16]",
+    "arguments": [
+      "svint16_t op"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_s16[_s32]",
+    "arguments": [
+      "svint32_t op"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_s16[_s64]",
+    "arguments": [
+      "svint64_t op"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_s16[_s8]",
+    "arguments": [
+      "svint8_t op"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_s16[_u16]",
+    "arguments": [
+      "svuint16_t op"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_s16[_u32]",
+    "arguments": [
+      "svuint32_t op"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_s16[_u64]",
+    "arguments": [
+      "svuint64_t op"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_s16[_u8]",
+    "arguments": [
+      "svuint8_t op"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_s32[_f16]",
+    "arguments": [
+      "svfloat16_t op"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_s32[_f32]",
+    "arguments": [
+      "svfloat32_t op"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_s32[_f64]",
+    "arguments": [
+      "svfloat64_t op"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_s32[_s16]",
+    "arguments": [
+      "svint16_t op"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_s32[_s32]",
+    "arguments": [
+      "svint32_t op"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_s32[_s64]",
+    "arguments": [
+      "svint64_t op"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_s32[_s8]",
+    "arguments": [
+      "svint8_t op"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_s32[_u16]",
+    "arguments": [
+      "svuint16_t op"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_s32[_u32]",
+    "arguments": [
+      "svuint32_t op"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_s32[_u64]",
+    "arguments": [
+      "svuint64_t op"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_s32[_u8]",
+    "arguments": [
+      "svuint8_t op"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_s64[_f16]",
+    "arguments": [
+      "svfloat16_t op"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_s64[_f32]",
+    "arguments": [
+      "svfloat32_t op"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_s64[_f64]",
+    "arguments": [
+      "svfloat64_t op"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_s64[_s16]",
+    "arguments": [
+      "svint16_t op"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_s64[_s32]",
+    "arguments": [
+      "svint32_t op"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_s64[_s64]",
+    "arguments": [
+      "svint64_t op"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_s64[_s8]",
+    "arguments": [
+      "svint8_t op"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_s64[_u16]",
+    "arguments": [
+      "svuint16_t op"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_s64[_u32]",
+    "arguments": [
+      "svuint32_t op"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_s64[_u64]",
+    "arguments": [
+      "svuint64_t op"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_s64[_u8]",
+    "arguments": [
+      "svuint8_t op"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_s8[_f16]",
+    "arguments": [
+      "svfloat16_t op"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_s8[_f32]",
+    "arguments": [
+      "svfloat32_t op"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_s8[_f64]",
+    "arguments": [
+      "svfloat64_t op"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_s8[_s16]",
+    "arguments": [
+      "svint16_t op"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_s8[_s32]",
+    "arguments": [
+      "svint32_t op"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_s8[_s64]",
+    "arguments": [
+      "svint64_t op"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_s8[_s8]",
+    "arguments": [
+      "svint8_t op"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_s8[_u16]",
+    "arguments": [
+      "svuint16_t op"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_s8[_u32]",
+    "arguments": [
+      "svuint32_t op"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_s8[_u64]",
+    "arguments": [
+      "svuint64_t op"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_s8[_u8]",
+    "arguments": [
+      "svuint8_t op"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_u16[_f16]",
+    "arguments": [
+      "svfloat16_t op"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_u16[_f32]",
+    "arguments": [
+      "svfloat32_t op"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_u16[_f64]",
+    "arguments": [
+      "svfloat64_t op"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_u16[_s16]",
+    "arguments": [
+      "svint16_t op"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_u16[_s32]",
+    "arguments": [
+      "svint32_t op"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_u16[_s64]",
+    "arguments": [
+      "svint64_t op"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_u16[_s8]",
+    "arguments": [
+      "svint8_t op"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_u16[_u16]",
+    "arguments": [
+      "svuint16_t op"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_u16[_u32]",
+    "arguments": [
+      "svuint32_t op"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_u16[_u64]",
+    "arguments": [
+      "svuint64_t op"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_u16[_u8]",
+    "arguments": [
+      "svuint8_t op"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_u32[_f16]",
+    "arguments": [
+      "svfloat16_t op"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_u32[_f32]",
+    "arguments": [
+      "svfloat32_t op"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_u32[_f64]",
+    "arguments": [
+      "svfloat64_t op"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_u32[_s16]",
+    "arguments": [
+      "svint16_t op"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_u32[_s32]",
+    "arguments": [
+      "svint32_t op"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_u32[_s64]",
+    "arguments": [
+      "svint64_t op"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_u32[_s8]",
+    "arguments": [
+      "svint8_t op"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_u32[_u16]",
+    "arguments": [
+      "svuint16_t op"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_u32[_u32]",
+    "arguments": [
+      "svuint32_t op"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_u32[_u64]",
+    "arguments": [
+      "svuint64_t op"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_u32[_u8]",
+    "arguments": [
+      "svuint8_t op"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_u64[_f16]",
+    "arguments": [
+      "svfloat16_t op"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_u64[_f32]",
+    "arguments": [
+      "svfloat32_t op"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_u64[_f64]",
+    "arguments": [
+      "svfloat64_t op"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_u64[_s16]",
+    "arguments": [
+      "svint16_t op"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_u64[_s32]",
+    "arguments": [
+      "svint32_t op"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_u64[_s64]",
+    "arguments": [
+      "svint64_t op"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_u64[_s8]",
+    "arguments": [
+      "svint8_t op"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_u64[_u16]",
+    "arguments": [
+      "svuint16_t op"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_u64[_u32]",
+    "arguments": [
+      "svuint32_t op"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_u64[_u64]",
+    "arguments": [
+      "svuint64_t op"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_u64[_u8]",
+    "arguments": [
+      "svuint8_t op"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_u8[_f16]",
+    "arguments": [
+      "svfloat16_t op"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_u8[_f32]",
+    "arguments": [
+      "svfloat32_t op"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_u8[_f64]",
+    "arguments": [
+      "svfloat64_t op"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_u8[_s16]",
+    "arguments": [
+      "svint16_t op"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_u8[_s32]",
+    "arguments": [
+      "svint32_t op"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_u8[_s64]",
+    "arguments": [
+      "svint64_t op"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_u8[_s8]",
+    "arguments": [
+      "svint8_t op"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_u8[_u16]",
+    "arguments": [
+      "svuint16_t op"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_u8[_u32]",
+    "arguments": [
+      "svuint32_t op"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_u8[_u64]",
+    "arguments": [
+      "svuint64_t op"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svreinterpret_u8[_u8]",
+    "arguments": [
+      "svuint8_t op"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrev[_f16]",
+    "arguments": [
+      "svfloat16_t op"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "REV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrev[_f32]",
+    "arguments": [
+      "svfloat32_t op"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "REV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrev[_f64]",
+    "arguments": [
+      "svfloat64_t op"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "REV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrev[_s16]",
+    "arguments": [
+      "svint16_t op"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "REV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrev[_s32]",
+    "arguments": [
+      "svint32_t op"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "REV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrev[_s64]",
+    "arguments": [
+      "svint64_t op"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "REV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrev[_s8]",
+    "arguments": [
+      "svint8_t op"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "REV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrev[_u16]",
+    "arguments": [
+      "svuint16_t op"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "REV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrev[_u32]",
+    "arguments": [
+      "svuint32_t op"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "REV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrev[_u64]",
+    "arguments": [
+      "svuint64_t op"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "REV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrev[_u8]",
+    "arguments": [
+      "svuint8_t op"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "REV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrev_b16",
+    "arguments": [
+      "svbool_t op"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Pop.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "REV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrev_b32",
+    "arguments": [
+      "svbool_t op"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Pop.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "REV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrev_b64",
+    "arguments": [
+      "svbool_t op"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Pop.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "REV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrev_b8",
+    "arguments": [
+      "svbool_t op"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Pop.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "REV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrevb[_s16]_m",
+    "arguments": [
+      "svint16_t inactive",
+      "svbool_t pg",
+      "svint16_t op"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.H|Ztied.H"
+      },
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "REVB"
+      ],
+      [
+        "MOVPRFX",
+        "REVB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrevb[_s16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H|Ztied.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "REVB"
+      ],
+      [
+        "MOVPRFX",
+        "REVB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrevb[_s16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "REVB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrevb[_s32]_m",
+    "arguments": [
+      "svint32_t inactive",
+      "svbool_t pg",
+      "svint32_t op"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.S|Ztied.S"
+      },
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "REVB"
+      ],
+      [
+        "MOVPRFX",
+        "REVB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrevb[_s32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S|Ztied.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "REVB"
+      ],
+      [
+        "MOVPRFX",
+        "REVB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrevb[_s32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "REVB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrevb[_s64]_m",
+    "arguments": [
+      "svint64_t inactive",
+      "svbool_t pg",
+      "svint64_t op"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.D|Ztied.D"
+      },
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "REVB"
+      ],
+      [
+        "MOVPRFX",
+        "REVB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrevb[_s64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D|Ztied.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "REVB"
+      ],
+      [
+        "MOVPRFX",
+        "REVB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrevb[_s64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "REVB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrevb[_u16]_m",
+    "arguments": [
+      "svuint16_t inactive",
+      "svbool_t pg",
+      "svuint16_t op"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.H|Ztied.H"
+      },
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "REVB"
+      ],
+      [
+        "MOVPRFX",
+        "REVB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrevb[_u16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H|Ztied.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "REVB"
+      ],
+      [
+        "MOVPRFX",
+        "REVB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrevb[_u16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "REVB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrevb[_u32]_m",
+    "arguments": [
+      "svuint32_t inactive",
+      "svbool_t pg",
+      "svuint32_t op"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.S|Ztied.S"
+      },
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "REVB"
+      ],
+      [
+        "MOVPRFX",
+        "REVB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrevb[_u32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S|Ztied.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "REVB"
+      ],
+      [
+        "MOVPRFX",
+        "REVB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrevb[_u32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "REVB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrevb[_u64]_m",
+    "arguments": [
+      "svuint64_t inactive",
+      "svbool_t pg",
+      "svuint64_t op"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.D|Ztied.D"
+      },
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "REVB"
+      ],
+      [
+        "MOVPRFX",
+        "REVB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrevb[_u64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D|Ztied.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "REVB"
+      ],
+      [
+        "MOVPRFX",
+        "REVB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrevb[_u64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "REVB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrevd[_f16]_m",
+    "arguments": [
+      "svfloat16_t zd",
+      "svbool_t pg",
+      "svfloat16_t zn"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "pg": {
+        "register": "Preg1"
+      },
+      "zd": {
+        "register": "Zreg1.Q"
+      },
+      "zn": {
+        "register": "Zreg2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "REVD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrevd[_f16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t zn"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "pg": {
+        "register": "Preg1"
+      },
+      "zn": {
+        "register": "Zreg2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "REVD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrevd[_f16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t zn"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "pg": {
+        "register": "Preg1"
+      },
+      "zn": {
+        "register": "Zreg2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOV",
+        "REVD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrevd[_f32]_m",
+    "arguments": [
+      "svfloat32_t zd",
+      "svbool_t pg",
+      "svfloat32_t zn"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "pg": {
+        "register": "Preg1"
+      },
+      "zd": {
+        "register": "Zreg1.Q"
+      },
+      "zn": {
+        "register": "Zreg2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "REVD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrevd[_f32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t zn"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "pg": {
+        "register": "Preg1"
+      },
+      "zn": {
+        "register": "Zreg2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "REVD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrevd[_f32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t zn"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "pg": {
+        "register": "Preg1"
+      },
+      "zn": {
+        "register": "Zreg2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOV",
+        "REVD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrevd[_f64]_m",
+    "arguments": [
+      "svfloat64_t zd",
+      "svbool_t pg",
+      "svfloat64_t zn"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "pg": {
+        "register": "Preg1"
+      },
+      "zd": {
+        "register": "Zreg1.Q"
+      },
+      "zn": {
+        "register": "Zreg2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "REVD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrevd[_f64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t zn"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "pg": {
+        "register": "Preg1"
+      },
+      "zn": {
+        "register": "Zreg2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "REVD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrevd[_f64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t zn"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "pg": {
+        "register": "Preg1"
+      },
+      "zn": {
+        "register": "Zreg2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOV",
+        "REVD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrevd[_s16]_m",
+    "arguments": [
+      "svint16_t zd",
+      "svbool_t pg",
+      "svint16_t zn"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "pg": {
+        "register": "Preg1"
+      },
+      "zd": {
+        "register": "Zreg1.Q"
+      },
+      "zn": {
+        "register": "Zreg2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "REVD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrevd[_s16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t zn"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "pg": {
+        "register": "Preg1"
+      },
+      "zn": {
+        "register": "Zreg2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOV",
+        "REVD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrevd[_s16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t zn"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "pg": {
+        "register": "Preg1"
+      },
+      "zn": {
+        "register": "Zreg2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "REVD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrevd[_s32]_m",
+    "arguments": [
+      "svint32_t zd",
+      "svbool_t pg",
+      "svint32_t zn"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "pg": {
+        "register": "Preg1"
+      },
+      "zd": {
+        "register": "Zreg1.Q"
+      },
+      "zn": {
+        "register": "Zreg2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "REVD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrevd[_s32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t zn"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "pg": {
+        "register": "Preg1"
+      },
+      "zn": {
+        "register": "Zreg2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOV",
+        "REVD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrevd[_s32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t zn"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "pg": {
+        "register": "Preg1"
+      },
+      "zn": {
+        "register": "Zreg2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "REVD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrevd[_s64]_m",
+    "arguments": [
+      "svint64_t zd",
+      "svbool_t pg",
+      "svint64_t zn"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "pg": {
+        "register": "Preg1"
+      },
+      "zd": {
+        "register": "Zreg1.Q"
+      },
+      "zn": {
+        "register": "Zreg2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "REVD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrevd[_s64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t zn"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "pg": {
+        "register": "Preg1"
+      },
+      "zn": {
+        "register": "Zreg2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "REVD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrevd[_s64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t zn"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "pg": {
+        "register": "Preg1"
+      },
+      "zn": {
+        "register": "Zreg2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOV",
+        "REVD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrevd[_s8]_m",
+    "arguments": [
+      "svint8_t zd",
+      "svbool_t pg",
+      "svint8_t zn"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "pg": {
+        "register": "Preg1"
+      },
+      "zd": {
+        "register": "Zreg1.Q"
+      },
+      "zn": {
+        "register": "Zreg2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "REVD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrevd[_s8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t zn"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "pg": {
+        "register": "Preg1"
+      },
+      "zn": {
+        "register": "Zreg2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOV",
+        "REVD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrevd[_s8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t zn"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "pg": {
+        "register": "Preg1"
+      },
+      "zn": {
+        "register": "Zreg2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "REVD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrevd[_u16]_m",
+    "arguments": [
+      "svuint16_t zd",
+      "svbool_t pg",
+      "svuint16_t zn"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "pg": {
+        "register": "Preg1"
+      },
+      "zd": {
+        "register": "Zreg1.Q"
+      },
+      "zn": {
+        "register": "Zreg2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "REVD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrevd[_u16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t zn"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "pg": {
+        "register": "Preg1"
+      },
+      "zn": {
+        "register": "Zreg2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOV",
+        "REVD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrevd[_u16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t zn"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "pg": {
+        "register": "Preg1"
+      },
+      "zn": {
+        "register": "Zreg2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "REVD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrevd[_u32]_m",
+    "arguments": [
+      "svuint32_t zd",
+      "svbool_t pg",
+      "svuint32_t zn"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "pg": {
+        "register": "Preg1"
+      },
+      "zd": {
+        "register": "Zreg1.Q"
+      },
+      "zn": {
+        "register": "Zreg2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "REVD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrevd[_u32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t zn"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "pg": {
+        "register": "Preg1"
+      },
+      "zn": {
+        "register": "Zreg2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOV",
+        "REVD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrevd[_u32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t zn"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "pg": {
+        "register": "Preg1"
+      },
+      "zn": {
+        "register": "Zreg2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "REVD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrevd[_u64]_m",
+    "arguments": [
+      "svuint64_t zd",
+      "svbool_t pg",
+      "svuint64_t zn"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "pg": {
+        "register": "Preg1"
+      },
+      "zd": {
+        "register": "Zreg1.Q"
+      },
+      "zn": {
+        "register": "Zreg2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "REVD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrevd[_u64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t zn"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "pg": {
+        "register": "Preg1"
+      },
+      "zn": {
+        "register": "Zreg2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "REVD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrevd[_u64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t zn"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "pg": {
+        "register": "Preg1"
+      },
+      "zn": {
+        "register": "Zreg2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOV",
+        "REVD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrevd[_u8]_m",
+    "arguments": [
+      "svuint8_t zd",
+      "svbool_t pg",
+      "svuint8_t zn"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "pg": {
+        "register": "Preg1"
+      },
+      "zd": {
+        "register": "Zreg1.Q"
+      },
+      "zn": {
+        "register": "Zreg2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "REVD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrevd[_u8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t zn"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "pg": {
+        "register": "Preg1"
+      },
+      "zn": {
+        "register": "Zreg2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOV",
+        "REVD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrevd[_u8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t zn"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "pg": {
+        "register": "Preg1"
+      },
+      "zn": {
+        "register": "Zreg2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "REVD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrevh[_s32]_m",
+    "arguments": [
+      "svint32_t inactive",
+      "svbool_t pg",
+      "svint32_t op"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.S|Ztied.S"
+      },
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "REVH"
+      ],
+      [
+        "MOVPRFX",
+        "REVH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrevh[_s32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S|Ztied.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "REVH"
+      ],
+      [
+        "MOVPRFX",
+        "REVH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrevh[_s32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "REVH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrevh[_s64]_m",
+    "arguments": [
+      "svint64_t inactive",
+      "svbool_t pg",
+      "svint64_t op"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.D|Ztied.D"
+      },
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "REVH"
+      ],
+      [
+        "MOVPRFX",
+        "REVH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrevh[_s64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D|Ztied.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "REVH"
+      ],
+      [
+        "MOVPRFX",
+        "REVH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrevh[_s64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "REVH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrevh[_u32]_m",
+    "arguments": [
+      "svuint32_t inactive",
+      "svbool_t pg",
+      "svuint32_t op"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.S|Ztied.S"
+      },
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "REVH"
+      ],
+      [
+        "MOVPRFX",
+        "REVH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrevh[_u32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S|Ztied.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "REVH"
+      ],
+      [
+        "MOVPRFX",
+        "REVH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrevh[_u32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "REVH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrevh[_u64]_m",
+    "arguments": [
+      "svuint64_t inactive",
+      "svbool_t pg",
+      "svuint64_t op"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.D|Ztied.D"
+      },
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "REVH"
+      ],
+      [
+        "MOVPRFX",
+        "REVH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrevh[_u64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D|Ztied.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "REVH"
+      ],
+      [
+        "MOVPRFX",
+        "REVH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrevh[_u64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "REVH"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrevw[_s64]_m",
+    "arguments": [
+      "svint64_t inactive",
+      "svbool_t pg",
+      "svint64_t op"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.D|Ztied.D"
+      },
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "REVW"
+      ],
+      [
+        "MOVPRFX",
+        "REVW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrevw[_s64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D|Ztied.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "REVW"
+      ],
+      [
+        "MOVPRFX",
+        "REVW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrevw[_s64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "REVW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrevw[_u64]_m",
+    "arguments": [
+      "svuint64_t inactive",
+      "svbool_t pg",
+      "svuint64_t op"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.D|Ztied.D"
+      },
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "REVW"
+      ],
+      [
+        "MOVPRFX",
+        "REVW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrevw[_u64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D|Ztied.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "REVW"
+      ],
+      [
+        "MOVPRFX",
+        "REVW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrevw[_u64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "REVW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrhadd[_n_s16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "int16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SRHADD"
+      ],
+      [
+        "MOVPRFX",
+        "SRHADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrhadd[_n_s16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "int16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]|Ztied2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SRHADD"
+      ],
+      [
+        "SRHADD"
+      ],
+      [
+        "MOVPRFX",
+        "SRHADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrhadd[_n_s16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "int16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SRHADD"
+      ],
+      [
+        "MOVPRFX",
+        "SRHADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrhadd[_n_s32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SRHADD"
+      ],
+      [
+        "MOVPRFX",
+        "SRHADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrhadd[_n_s32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]|Ztied2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SRHADD"
+      ],
+      [
+        "SRHADD"
+      ],
+      [
+        "MOVPRFX",
+        "SRHADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrhadd[_n_s32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SRHADD"
+      ],
+      [
+        "MOVPRFX",
+        "SRHADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrhadd[_n_s64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SRHADD"
+      ],
+      [
+        "MOVPRFX",
+        "SRHADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrhadd[_n_s64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]|Ztied2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SRHADD"
+      ],
+      [
+        "SRHADD"
+      ],
+      [
+        "MOVPRFX",
+        "SRHADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrhadd[_n_s64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SRHADD"
+      ],
+      [
+        "MOVPRFX",
+        "SRHADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrhadd[_n_s8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "int8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SRHADD"
+      ],
+      [
+        "MOVPRFX",
+        "SRHADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrhadd[_n_s8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "int8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]|Ztied2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SRHADD"
+      ],
+      [
+        "SRHADD"
+      ],
+      [
+        "MOVPRFX",
+        "SRHADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrhadd[_n_s8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "int8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SRHADD"
+      ],
+      [
+        "MOVPRFX",
+        "SRHADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrhadd[_n_u16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "uint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "URHADD"
+      ],
+      [
+        "MOVPRFX",
+        "URHADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrhadd[_n_u16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "uint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]|Ztied2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "URHADD"
+      ],
+      [
+        "URHADD"
+      ],
+      [
+        "MOVPRFX",
+        "URHADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrhadd[_n_u16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "uint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "URHADD"
+      ],
+      [
+        "MOVPRFX",
+        "URHADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrhadd[_n_u32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "URHADD"
+      ],
+      [
+        "MOVPRFX",
+        "URHADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrhadd[_n_u32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]|Ztied2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "URHADD"
+      ],
+      [
+        "URHADD"
+      ],
+      [
+        "MOVPRFX",
+        "URHADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrhadd[_n_u32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "URHADD"
+      ],
+      [
+        "MOVPRFX",
+        "URHADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrhadd[_n_u64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "URHADD"
+      ],
+      [
+        "MOVPRFX",
+        "URHADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrhadd[_n_u64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]|Ztied2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "URHADD"
+      ],
+      [
+        "URHADD"
+      ],
+      [
+        "MOVPRFX",
+        "URHADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrhadd[_n_u64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "URHADD"
+      ],
+      [
+        "MOVPRFX",
+        "URHADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrhadd[_n_u8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "uint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "URHADD"
+      ],
+      [
+        "MOVPRFX",
+        "URHADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrhadd[_n_u8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "uint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]|Ztied2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "URHADD"
+      ],
+      [
+        "URHADD"
+      ],
+      [
+        "MOVPRFX",
+        "URHADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrhadd[_n_u8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "uint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "URHADD"
+      ],
+      [
+        "MOVPRFX",
+        "URHADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrhadd[_s16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SRHADD"
+      ],
+      [
+        "MOVPRFX",
+        "SRHADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrhadd[_s16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H|Ztied2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SRHADD"
+      ],
+      [
+        "SRHADD"
+      ],
+      [
+        "MOVPRFX",
+        "SRHADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrhadd[_s16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SRHADD"
+      ],
+      [
+        "MOVPRFX",
+        "SRHADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrhadd[_s32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SRHADD"
+      ],
+      [
+        "MOVPRFX",
+        "SRHADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrhadd[_s32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SRHADD"
+      ],
+      [
+        "SRHADD"
+      ],
+      [
+        "MOVPRFX",
+        "SRHADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrhadd[_s32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SRHADD"
+      ],
+      [
+        "MOVPRFX",
+        "SRHADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrhadd[_s64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SRHADD"
+      ],
+      [
+        "MOVPRFX",
+        "SRHADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrhadd[_s64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SRHADD"
+      ],
+      [
+        "SRHADD"
+      ],
+      [
+        "MOVPRFX",
+        "SRHADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrhadd[_s64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SRHADD"
+      ],
+      [
+        "MOVPRFX",
+        "SRHADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrhadd[_s8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SRHADD"
+      ],
+      [
+        "MOVPRFX",
+        "SRHADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrhadd[_s8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B|Ztied2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SRHADD"
+      ],
+      [
+        "SRHADD"
+      ],
+      [
+        "MOVPRFX",
+        "SRHADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrhadd[_s8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SRHADD"
+      ],
+      [
+        "MOVPRFX",
+        "SRHADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrhadd[_u16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "URHADD"
+      ],
+      [
+        "MOVPRFX",
+        "URHADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrhadd[_u16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H|Ztied2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "URHADD"
+      ],
+      [
+        "URHADD"
+      ],
+      [
+        "MOVPRFX",
+        "URHADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrhadd[_u16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "URHADD"
+      ],
+      [
+        "MOVPRFX",
+        "URHADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrhadd[_u32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "URHADD"
+      ],
+      [
+        "MOVPRFX",
+        "URHADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrhadd[_u32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "URHADD"
+      ],
+      [
+        "URHADD"
+      ],
+      [
+        "MOVPRFX",
+        "URHADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrhadd[_u32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "URHADD"
+      ],
+      [
+        "MOVPRFX",
+        "URHADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrhadd[_u64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "URHADD"
+      ],
+      [
+        "MOVPRFX",
+        "URHADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrhadd[_u64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "URHADD"
+      ],
+      [
+        "URHADD"
+      ],
+      [
+        "MOVPRFX",
+        "URHADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrhadd[_u64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "URHADD"
+      ],
+      [
+        "MOVPRFX",
+        "URHADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrhadd[_u8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "URHADD"
+      ],
+      [
+        "MOVPRFX",
+        "URHADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrhadd[_u8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B|Ztied2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "URHADD"
+      ],
+      [
+        "URHADD"
+      ],
+      [
+        "MOVPRFX",
+        "URHADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrhadd[_u8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "URHADD"
+      ],
+      [
+        "MOVPRFX",
+        "URHADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrinta[_f16]_m",
+    "arguments": [
+      "svfloat16_t inactive",
+      "svbool_t pg",
+      "svfloat16_t op"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.H|Ztied.H"
+      },
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FRINTA"
+      ],
+      [
+        "MOVPRFX",
+        "FRINTA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrinta[_f16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H|Ztied.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FRINTA"
+      ],
+      [
+        "MOVPRFX",
+        "FRINTA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrinta[_f16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FRINTA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrinta[_f32]_m",
+    "arguments": [
+      "svfloat32_t inactive",
+      "svbool_t pg",
+      "svfloat32_t op"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.S|Ztied.S"
+      },
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FRINTA"
+      ],
+      [
+        "MOVPRFX",
+        "FRINTA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrinta[_f32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S|Ztied.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FRINTA"
+      ],
+      [
+        "MOVPRFX",
+        "FRINTA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrinta[_f32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FRINTA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrinta[_f64]_m",
+    "arguments": [
+      "svfloat64_t inactive",
+      "svbool_t pg",
+      "svfloat64_t op"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.D|Ztied.D"
+      },
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FRINTA"
+      ],
+      [
+        "MOVPRFX",
+        "FRINTA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrinta[_f64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D|Ztied.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FRINTA"
+      ],
+      [
+        "MOVPRFX",
+        "FRINTA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrinta[_f64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FRINTA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrinti[_f16]_m",
+    "arguments": [
+      "svfloat16_t inactive",
+      "svbool_t pg",
+      "svfloat16_t op"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.H|Ztied.H"
+      },
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FRINTI"
+      ],
+      [
+        "MOVPRFX",
+        "FRINTI"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrinti[_f16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H|Ztied.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FRINTI"
+      ],
+      [
+        "MOVPRFX",
+        "FRINTI"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrinti[_f16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FRINTI"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrinti[_f32]_m",
+    "arguments": [
+      "svfloat32_t inactive",
+      "svbool_t pg",
+      "svfloat32_t op"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.S|Ztied.S"
+      },
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FRINTI"
+      ],
+      [
+        "MOVPRFX",
+        "FRINTI"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrinti[_f32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S|Ztied.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FRINTI"
+      ],
+      [
+        "MOVPRFX",
+        "FRINTI"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrinti[_f32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FRINTI"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrinti[_f64]_m",
+    "arguments": [
+      "svfloat64_t inactive",
+      "svbool_t pg",
+      "svfloat64_t op"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.D|Ztied.D"
+      },
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FRINTI"
+      ],
+      [
+        "MOVPRFX",
+        "FRINTI"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrinti[_f64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D|Ztied.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FRINTI"
+      ],
+      [
+        "MOVPRFX",
+        "FRINTI"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrinti[_f64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FRINTI"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrintm[_f16]_m",
+    "arguments": [
+      "svfloat16_t inactive",
+      "svbool_t pg",
+      "svfloat16_t op"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.H|Ztied.H"
+      },
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FRINTM"
+      ],
+      [
+        "MOVPRFX",
+        "FRINTM"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrintm[_f16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H|Ztied.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FRINTM"
+      ],
+      [
+        "MOVPRFX",
+        "FRINTM"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrintm[_f16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FRINTM"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrintm[_f32]_m",
+    "arguments": [
+      "svfloat32_t inactive",
+      "svbool_t pg",
+      "svfloat32_t op"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.S|Ztied.S"
+      },
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FRINTM"
+      ],
+      [
+        "MOVPRFX",
+        "FRINTM"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrintm[_f32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S|Ztied.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FRINTM"
+      ],
+      [
+        "MOVPRFX",
+        "FRINTM"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrintm[_f32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FRINTM"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrintm[_f64]_m",
+    "arguments": [
+      "svfloat64_t inactive",
+      "svbool_t pg",
+      "svfloat64_t op"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.D|Ztied.D"
+      },
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FRINTM"
+      ],
+      [
+        "MOVPRFX",
+        "FRINTM"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrintm[_f64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D|Ztied.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FRINTM"
+      ],
+      [
+        "MOVPRFX",
+        "FRINTM"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrintm[_f64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FRINTM"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrintn[_f16]_m",
+    "arguments": [
+      "svfloat16_t inactive",
+      "svbool_t pg",
+      "svfloat16_t op"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.H|Ztied.H"
+      },
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FRINTN"
+      ],
+      [
+        "MOVPRFX",
+        "FRINTN"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrintn[_f16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H|Ztied.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FRINTN"
+      ],
+      [
+        "MOVPRFX",
+        "FRINTN"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrintn[_f16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FRINTN"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrintn[_f32]_m",
+    "arguments": [
+      "svfloat32_t inactive",
+      "svbool_t pg",
+      "svfloat32_t op"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.S|Ztied.S"
+      },
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FRINTN"
+      ],
+      [
+        "MOVPRFX",
+        "FRINTN"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrintn[_f32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S|Ztied.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FRINTN"
+      ],
+      [
+        "MOVPRFX",
+        "FRINTN"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrintn[_f32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FRINTN"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrintn[_f64]_m",
+    "arguments": [
+      "svfloat64_t inactive",
+      "svbool_t pg",
+      "svfloat64_t op"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.D|Ztied.D"
+      },
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FRINTN"
+      ],
+      [
+        "MOVPRFX",
+        "FRINTN"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrintn[_f64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D|Ztied.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FRINTN"
+      ],
+      [
+        "MOVPRFX",
+        "FRINTN"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrintn[_f64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FRINTN"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrintp[_f16]_m",
+    "arguments": [
+      "svfloat16_t inactive",
+      "svbool_t pg",
+      "svfloat16_t op"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.H|Ztied.H"
+      },
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FRINTP"
+      ],
+      [
+        "MOVPRFX",
+        "FRINTP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrintp[_f16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H|Ztied.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FRINTP"
+      ],
+      [
+        "MOVPRFX",
+        "FRINTP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrintp[_f16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FRINTP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrintp[_f32]_m",
+    "arguments": [
+      "svfloat32_t inactive",
+      "svbool_t pg",
+      "svfloat32_t op"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.S|Ztied.S"
+      },
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FRINTP"
+      ],
+      [
+        "MOVPRFX",
+        "FRINTP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrintp[_f32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S|Ztied.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FRINTP"
+      ],
+      [
+        "MOVPRFX",
+        "FRINTP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrintp[_f32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FRINTP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrintp[_f64]_m",
+    "arguments": [
+      "svfloat64_t inactive",
+      "svbool_t pg",
+      "svfloat64_t op"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.D|Ztied.D"
+      },
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FRINTP"
+      ],
+      [
+        "MOVPRFX",
+        "FRINTP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrintp[_f64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D|Ztied.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FRINTP"
+      ],
+      [
+        "MOVPRFX",
+        "FRINTP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrintp[_f64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FRINTP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrintx[_f16]_m",
+    "arguments": [
+      "svfloat16_t inactive",
+      "svbool_t pg",
+      "svfloat16_t op"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.H|Ztied.H"
+      },
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FRINTX"
+      ],
+      [
+        "MOVPRFX",
+        "FRINTX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrintx[_f16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H|Ztied.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FRINTX"
+      ],
+      [
+        "MOVPRFX",
+        "FRINTX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrintx[_f16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FRINTX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrintx[_f32]_m",
+    "arguments": [
+      "svfloat32_t inactive",
+      "svbool_t pg",
+      "svfloat32_t op"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.S|Ztied.S"
+      },
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FRINTX"
+      ],
+      [
+        "MOVPRFX",
+        "FRINTX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrintx[_f32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S|Ztied.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FRINTX"
+      ],
+      [
+        "MOVPRFX",
+        "FRINTX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrintx[_f32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FRINTX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrintx[_f64]_m",
+    "arguments": [
+      "svfloat64_t inactive",
+      "svbool_t pg",
+      "svfloat64_t op"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.D|Ztied.D"
+      },
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FRINTX"
+      ],
+      [
+        "MOVPRFX",
+        "FRINTX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrintx[_f64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D|Ztied.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FRINTX"
+      ],
+      [
+        "MOVPRFX",
+        "FRINTX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrintx[_f64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FRINTX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrintz[_f16]_m",
+    "arguments": [
+      "svfloat16_t inactive",
+      "svbool_t pg",
+      "svfloat16_t op"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.H|Ztied.H"
+      },
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FRINTZ"
+      ],
+      [
+        "MOVPRFX",
+        "FRINTZ"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrintz[_f16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H|Ztied.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FRINTZ"
+      ],
+      [
+        "MOVPRFX",
+        "FRINTZ"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrintz[_f16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FRINTZ"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrintz[_f32]_m",
+    "arguments": [
+      "svfloat32_t inactive",
+      "svbool_t pg",
+      "svfloat32_t op"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.S|Ztied.S"
+      },
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FRINTZ"
+      ],
+      [
+        "MOVPRFX",
+        "FRINTZ"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrintz[_f32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S|Ztied.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FRINTZ"
+      ],
+      [
+        "MOVPRFX",
+        "FRINTZ"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrintz[_f32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FRINTZ"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrintz[_f64]_m",
+    "arguments": [
+      "svfloat64_t inactive",
+      "svbool_t pg",
+      "svfloat64_t op"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.D|Ztied.D"
+      },
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FRINTZ"
+      ],
+      [
+        "MOVPRFX",
+        "FRINTZ"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrintz[_f64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D|Ztied.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FRINTZ"
+      ],
+      [
+        "MOVPRFX",
+        "FRINTZ"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrintz[_f64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FRINTZ"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrshl[_n_s16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "int16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSL"
+      ],
+      [
+        "SRSHR"
+      ],
+      [
+        "SRSHL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrshl[_n_s16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "int16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]|Ztied2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSL"
+      ],
+      [
+        "SRSHR"
+      ],
+      [
+        "SRSHL"
+      ],
+      [
+        "SRSHLR"
+      ],
+      [
+        "MOVPRFX",
+        "SRSHL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrshl[_n_s16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "int16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "LSL"
+      ],
+      [
+        "MOVPRFX",
+        "SRSHR"
+      ],
+      [
+        "MOVPRFX",
+        "SRSHL"
+      ],
+      [
+        "MOVPRFX",
+        "SRSHLR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrshl[_n_s32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSL"
+      ],
+      [
+        "SRSHR"
+      ],
+      [
+        "SRSHL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrshl[_n_s32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]|Ztied2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSL"
+      ],
+      [
+        "SRSHR"
+      ],
+      [
+        "SRSHL"
+      ],
+      [
+        "SRSHLR"
+      ],
+      [
+        "MOVPRFX",
+        "SRSHL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrshl[_n_s32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "LSL"
+      ],
+      [
+        "MOVPRFX",
+        "SRSHR"
+      ],
+      [
+        "MOVPRFX",
+        "SRSHL"
+      ],
+      [
+        "MOVPRFX",
+        "SRSHLR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrshl[_n_s64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSL"
+      ],
+      [
+        "SRSHR"
+      ],
+      [
+        "SRSHL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrshl[_n_s64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]|Ztied2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSL"
+      ],
+      [
+        "SRSHR"
+      ],
+      [
+        "SRSHL"
+      ],
+      [
+        "SRSHLR"
+      ],
+      [
+        "MOVPRFX",
+        "SRSHL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrshl[_n_s64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "LSL"
+      ],
+      [
+        "MOVPRFX",
+        "SRSHR"
+      ],
+      [
+        "MOVPRFX",
+        "SRSHL"
+      ],
+      [
+        "MOVPRFX",
+        "SRSHLR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrshl[_n_s8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "int8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSL"
+      ],
+      [
+        "SRSHR"
+      ],
+      [
+        "SRSHL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrshl[_n_s8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "int8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]|Ztied2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSL"
+      ],
+      [
+        "SRSHR"
+      ],
+      [
+        "SRSHL"
+      ],
+      [
+        "SRSHLR"
+      ],
+      [
+        "MOVPRFX",
+        "SRSHL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrshl[_n_s8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "int8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "LSL"
+      ],
+      [
+        "MOVPRFX",
+        "SRSHR"
+      ],
+      [
+        "MOVPRFX",
+        "SRSHL"
+      ],
+      [
+        "MOVPRFX",
+        "SRSHLR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrshl[_n_u16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "int16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSL"
+      ],
+      [
+        "URSHR"
+      ],
+      [
+        "URSHL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrshl[_n_u16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "int16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]|Ztied2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSL"
+      ],
+      [
+        "URSHR"
+      ],
+      [
+        "URSHL"
+      ],
+      [
+        "URSHLR"
+      ],
+      [
+        "MOVPRFX",
+        "URSHL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrshl[_n_u16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "int16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "LSL"
+      ],
+      [
+        "MOVPRFX",
+        "URSHR"
+      ],
+      [
+        "MOVPRFX",
+        "URSHL"
+      ],
+      [
+        "MOVPRFX",
+        "URSHLR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrshl[_n_u32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSL"
+      ],
+      [
+        "URSHR"
+      ],
+      [
+        "URSHL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrshl[_n_u32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]|Ztied2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSL"
+      ],
+      [
+        "URSHR"
+      ],
+      [
+        "URSHL"
+      ],
+      [
+        "URSHLR"
+      ],
+      [
+        "MOVPRFX",
+        "URSHL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrshl[_n_u32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "LSL"
+      ],
+      [
+        "MOVPRFX",
+        "URSHR"
+      ],
+      [
+        "MOVPRFX",
+        "URSHL"
+      ],
+      [
+        "MOVPRFX",
+        "URSHLR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrshl[_n_u64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSL"
+      ],
+      [
+        "URSHR"
+      ],
+      [
+        "URSHL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrshl[_n_u64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]|Ztied2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSL"
+      ],
+      [
+        "URSHR"
+      ],
+      [
+        "URSHL"
+      ],
+      [
+        "URSHLR"
+      ],
+      [
+        "MOVPRFX",
+        "URSHL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrshl[_n_u64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "LSL"
+      ],
+      [
+        "MOVPRFX",
+        "URSHR"
+      ],
+      [
+        "MOVPRFX",
+        "URSHL"
+      ],
+      [
+        "MOVPRFX",
+        "URSHLR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrshl[_n_u8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "int8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSL"
+      ],
+      [
+        "URSHR"
+      ],
+      [
+        "URSHL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrshl[_n_u8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "int8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]|Ztied2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LSL"
+      ],
+      [
+        "URSHR"
+      ],
+      [
+        "URSHL"
+      ],
+      [
+        "URSHLR"
+      ],
+      [
+        "MOVPRFX",
+        "URSHL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrshl[_n_u8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "int8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "LSL"
+      ],
+      [
+        "MOVPRFX",
+        "URSHR"
+      ],
+      [
+        "MOVPRFX",
+        "URSHL"
+      ],
+      [
+        "MOVPRFX",
+        "URSHLR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrshl[_s16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SRSHL"
+      ],
+      [
+        "MOVPRFX",
+        "SRSHL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrshl[_s16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H|Ztied2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SRSHL"
+      ],
+      [
+        "SRSHLR"
+      ],
+      [
+        "MOVPRFX",
+        "SRSHL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrshl[_s16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SRSHL"
+      ],
+      [
+        "MOVPRFX",
+        "SRSHLR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrshl[_s32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SRSHL"
+      ],
+      [
+        "MOVPRFX",
+        "SRSHL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrshl[_s32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SRSHL"
+      ],
+      [
+        "SRSHLR"
+      ],
+      [
+        "MOVPRFX",
+        "SRSHL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrshl[_s32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SRSHL"
+      ],
+      [
+        "MOVPRFX",
+        "SRSHLR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrshl[_s64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SRSHL"
+      ],
+      [
+        "MOVPRFX",
+        "SRSHL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrshl[_s64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SRSHL"
+      ],
+      [
+        "SRSHLR"
+      ],
+      [
+        "MOVPRFX",
+        "SRSHL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrshl[_s64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SRSHL"
+      ],
+      [
+        "MOVPRFX",
+        "SRSHLR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrshl[_s8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SRSHL"
+      ],
+      [
+        "MOVPRFX",
+        "SRSHL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrshl[_s8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B|Ztied2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SRSHL"
+      ],
+      [
+        "SRSHLR"
+      ],
+      [
+        "MOVPRFX",
+        "SRSHL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrshl[_s8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SRSHL"
+      ],
+      [
+        "MOVPRFX",
+        "SRSHLR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrshl[_u16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "URSHL"
+      ],
+      [
+        "MOVPRFX",
+        "URSHL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrshl[_u16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H|Ztied2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "URSHL"
+      ],
+      [
+        "URSHLR"
+      ],
+      [
+        "MOVPRFX",
+        "URSHL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrshl[_u16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "URSHL"
+      ],
+      [
+        "MOVPRFX",
+        "URSHLR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrshl[_u32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "URSHL"
+      ],
+      [
+        "MOVPRFX",
+        "URSHL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrshl[_u32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "URSHL"
+      ],
+      [
+        "URSHLR"
+      ],
+      [
+        "MOVPRFX",
+        "URSHL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrshl[_u32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "URSHL"
+      ],
+      [
+        "MOVPRFX",
+        "URSHLR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrshl[_u64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "URSHL"
+      ],
+      [
+        "MOVPRFX",
+        "URSHL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrshl[_u64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "URSHL"
+      ],
+      [
+        "URSHLR"
+      ],
+      [
+        "MOVPRFX",
+        "URSHL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrshl[_u64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "URSHL"
+      ],
+      [
+        "MOVPRFX",
+        "URSHLR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrshl[_u8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "URSHL"
+      ],
+      [
+        "MOVPRFX",
+        "URSHL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrshl[_u8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B|Ztied2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "URSHL"
+      ],
+      [
+        "URSHLR"
+      ],
+      [
+        "MOVPRFX",
+        "URSHL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrshl[_u8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "URSHL"
+      ],
+      [
+        "MOVPRFX",
+        "URSHLR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrshr[_n_s16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "imm2": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SRSHR"
+      ],
+      [
+        "MOVPRFX",
+        "SRSHR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrshr[_n_s16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "imm2": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SRSHR"
+      ],
+      [
+        "MOVPRFX",
+        "SRSHR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrshr[_n_s16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "imm2": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SRSHR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrshr[_n_s32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "imm2": {
+        "minimum": 1,
+        "maximum": 32
+      },
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SRSHR"
+      ],
+      [
+        "MOVPRFX",
+        "SRSHR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrshr[_n_s32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "imm2": {
+        "minimum": 1,
+        "maximum": 32
+      },
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SRSHR"
+      ],
+      [
+        "MOVPRFX",
+        "SRSHR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrshr[_n_s32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "imm2": {
+        "minimum": 1,
+        "maximum": 32
+      },
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SRSHR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrshr[_n_s64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "imm2": {
+        "minimum": 1,
+        "maximum": 64
+      },
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SRSHR"
+      ],
+      [
+        "MOVPRFX",
+        "SRSHR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrshr[_n_s64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "imm2": {
+        "minimum": 1,
+        "maximum": 64
+      },
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SRSHR"
+      ],
+      [
+        "MOVPRFX",
+        "SRSHR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrshr[_n_s64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "imm2": {
+        "minimum": 1,
+        "maximum": 64
+      },
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SRSHR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrshr[_n_s8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "imm2": {
+        "minimum": 1,
+        "maximum": 8
+      },
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SRSHR"
+      ],
+      [
+        "MOVPRFX",
+        "SRSHR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrshr[_n_s8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "imm2": {
+        "minimum": 1,
+        "maximum": 8
+      },
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SRSHR"
+      ],
+      [
+        "MOVPRFX",
+        "SRSHR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrshr[_n_s8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "imm2": {
+        "minimum": 1,
+        "maximum": 8
+      },
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SRSHR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrshr[_n_u16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "imm2": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "URSHR"
+      ],
+      [
+        "MOVPRFX",
+        "URSHR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrshr[_n_u16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "imm2": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "URSHR"
+      ],
+      [
+        "MOVPRFX",
+        "URSHR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrshr[_n_u16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "imm2": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "URSHR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrshr[_n_u32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "imm2": {
+        "minimum": 1,
+        "maximum": 32
+      },
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "URSHR"
+      ],
+      [
+        "MOVPRFX",
+        "URSHR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrshr[_n_u32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "imm2": {
+        "minimum": 1,
+        "maximum": 32
+      },
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "URSHR"
+      ],
+      [
+        "MOVPRFX",
+        "URSHR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrshr[_n_u32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "imm2": {
+        "minimum": 1,
+        "maximum": 32
+      },
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "URSHR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrshr[_n_u64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "imm2": {
+        "minimum": 1,
+        "maximum": 64
+      },
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "URSHR"
+      ],
+      [
+        "MOVPRFX",
+        "URSHR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrshr[_n_u64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "imm2": {
+        "minimum": 1,
+        "maximum": 64
+      },
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "URSHR"
+      ],
+      [
+        "MOVPRFX",
+        "URSHR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrshr[_n_u64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "imm2": {
+        "minimum": 1,
+        "maximum": 64
+      },
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "URSHR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrshr[_n_u8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "imm2": {
+        "minimum": 1,
+        "maximum": 8
+      },
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "URSHR"
+      ],
+      [
+        "MOVPRFX",
+        "URSHR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrshr[_n_u8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "imm2": {
+        "minimum": 1,
+        "maximum": 8
+      },
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "URSHR"
+      ],
+      [
+        "MOVPRFX",
+        "URSHR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrshr[_n_u8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "imm2": {
+        "minimum": 1,
+        "maximum": 8
+      },
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "URSHR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrshrnb[_n_s16]",
+    "arguments": [
+      "svint16_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "imm2": {
+        "minimum": 1,
+        "maximum": 8
+      },
+      "op1": {
+        "register": "Zop1.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "RSHRNB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrshrnb[_n_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "imm2": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op1": {
+        "register": "Zop1.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "RSHRNB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrshrnb[_n_s64]",
+    "arguments": [
+      "svint64_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "imm2": {
+        "minimum": 1,
+        "maximum": 32
+      },
+      "op1": {
+        "register": "Zop1.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "RSHRNB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrshrnb[_n_u16]",
+    "arguments": [
+      "svuint16_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "imm2": {
+        "minimum": 1,
+        "maximum": 8
+      },
+      "op1": {
+        "register": "Zop1.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "RSHRNB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrshrnb[_n_u32]",
+    "arguments": [
+      "svuint32_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "imm2": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op1": {
+        "register": "Zop1.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "RSHRNB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrshrnb[_n_u64]",
+    "arguments": [
+      "svuint64_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "imm2": {
+        "minimum": 1,
+        "maximum": 32
+      },
+      "op1": {
+        "register": "Zop1.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "RSHRNB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrshrnt[_n_s16]",
+    "arguments": [
+      "svint8_t even",
+      "svint16_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "even": {
+        "register": "Ztied.B"
+      },
+      "imm2": {
+        "minimum": 1,
+        "maximum": 8
+      },
+      "op1": {
+        "register": "Zop1.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "RSHRNT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrshrnt[_n_s32]",
+    "arguments": [
+      "svint16_t even",
+      "svint32_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "even": {
+        "register": "Ztied.H"
+      },
+      "imm2": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op1": {
+        "register": "Zop1.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "RSHRNT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrshrnt[_n_s64]",
+    "arguments": [
+      "svint32_t even",
+      "svint64_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "even": {
+        "register": "Ztied.S"
+      },
+      "imm2": {
+        "minimum": 1,
+        "maximum": 32
+      },
+      "op1": {
+        "register": "Zop1.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "RSHRNT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrshrnt[_n_u16]",
+    "arguments": [
+      "svuint8_t even",
+      "svuint16_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "even": {
+        "register": "Ztied.B"
+      },
+      "imm2": {
+        "minimum": 1,
+        "maximum": 8
+      },
+      "op1": {
+        "register": "Zop1.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "RSHRNT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrshrnt[_n_u32]",
+    "arguments": [
+      "svuint16_t even",
+      "svuint32_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "even": {
+        "register": "Ztied.H"
+      },
+      "imm2": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op1": {
+        "register": "Zop1.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "RSHRNT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrshrnt[_n_u64]",
+    "arguments": [
+      "svuint32_t even",
+      "svuint64_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "even": {
+        "register": "Ztied.S"
+      },
+      "imm2": {
+        "minimum": 1,
+        "maximum": 32
+      },
+      "op1": {
+        "register": "Zop1.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "RSHRNT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrsqrte[_f16]",
+    "arguments": [
+      "svfloat16_t op"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FRSQRTE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrsqrte[_f32]",
+    "arguments": [
+      "svfloat32_t op"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FRSQRTE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrsqrte[_f64]",
+    "arguments": [
+      "svfloat64_t op"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FRSQRTE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrsqrte[_u32]_m",
+    "arguments": [
+      "svuint32_t inactive",
+      "svbool_t pg",
+      "svuint32_t op"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.S|Ztied.S"
+      },
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "URSQRTE"
+      ],
+      [
+        "MOVPRFX",
+        "URSQRTE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrsqrte[_u32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S|Ztied.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "URSQRTE"
+      ],
+      [
+        "MOVPRFX",
+        "URSQRTE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrsqrte[_u32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "URSQRTE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrsqrts[_f16]",
+    "arguments": [
+      "svfloat16_t op1",
+      "svfloat16_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FRSQRTS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrsqrts[_f32]",
+    "arguments": [
+      "svfloat32_t op1",
+      "svfloat32_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FRSQRTS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svrsqrts[_f64]",
+    "arguments": [
+      "svfloat64_t op1",
+      "svfloat64_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FRSQRTS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrsra[_n_s16]",
+    "arguments": [
+      "svint16_t op1",
+      "svint16_t op2",
+      "uint64_t imm3"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "imm3": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SRSRA"
+      ],
+      [
+        "MOVPRFX",
+        "SRSRA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrsra[_n_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "svint32_t op2",
+      "uint64_t imm3"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "imm3": {
+        "minimum": 1,
+        "maximum": 32
+      },
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SRSRA"
+      ],
+      [
+        "MOVPRFX",
+        "SRSRA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrsra[_n_s64]",
+    "arguments": [
+      "svint64_t op1",
+      "svint64_t op2",
+      "uint64_t imm3"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "imm3": {
+        "minimum": 1,
+        "maximum": 64
+      },
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SRSRA"
+      ],
+      [
+        "MOVPRFX",
+        "SRSRA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrsra[_n_s8]",
+    "arguments": [
+      "svint8_t op1",
+      "svint8_t op2",
+      "uint64_t imm3"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "imm3": {
+        "minimum": 1,
+        "maximum": 8
+      },
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SRSRA"
+      ],
+      [
+        "MOVPRFX",
+        "SRSRA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrsra[_n_u16]",
+    "arguments": [
+      "svuint16_t op1",
+      "svuint16_t op2",
+      "uint64_t imm3"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "imm3": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "URSRA"
+      ],
+      [
+        "MOVPRFX",
+        "URSRA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrsra[_n_u32]",
+    "arguments": [
+      "svuint32_t op1",
+      "svuint32_t op2",
+      "uint64_t imm3"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "imm3": {
+        "minimum": 1,
+        "maximum": 32
+      },
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "URSRA"
+      ],
+      [
+        "MOVPRFX",
+        "URSRA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrsra[_n_u64]",
+    "arguments": [
+      "svuint64_t op1",
+      "svuint64_t op2",
+      "uint64_t imm3"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "imm3": {
+        "minimum": 1,
+        "maximum": 64
+      },
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "URSRA"
+      ],
+      [
+        "MOVPRFX",
+        "URSRA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrsra[_n_u8]",
+    "arguments": [
+      "svuint8_t op1",
+      "svuint8_t op2",
+      "uint64_t imm3"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "imm3": {
+        "minimum": 1,
+        "maximum": 8
+      },
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "URSRA"
+      ],
+      [
+        "MOVPRFX",
+        "URSRA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrsubhnb[_n_s16]",
+    "arguments": [
+      "svint16_t op1",
+      "int16_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "RSUBHNB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrsubhnb[_n_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "RSUBHNB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrsubhnb[_n_s64]",
+    "arguments": [
+      "svint64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "RSUBHNB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrsubhnb[_n_u16]",
+    "arguments": [
+      "svuint16_t op1",
+      "uint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "RSUBHNB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrsubhnb[_n_u32]",
+    "arguments": [
+      "svuint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "RSUBHNB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrsubhnb[_n_u64]",
+    "arguments": [
+      "svuint64_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "RSUBHNB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrsubhnb[_s16]",
+    "arguments": [
+      "svint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "RSUBHNB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrsubhnb[_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "RSUBHNB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrsubhnb[_s64]",
+    "arguments": [
+      "svint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "RSUBHNB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrsubhnb[_u16]",
+    "arguments": [
+      "svuint16_t op1",
+      "svuint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "RSUBHNB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrsubhnb[_u32]",
+    "arguments": [
+      "svuint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "RSUBHNB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrsubhnb[_u64]",
+    "arguments": [
+      "svuint64_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "RSUBHNB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrsubhnt[_n_s16]",
+    "arguments": [
+      "svint8_t even",
+      "svint16_t op1",
+      "int16_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "even": {
+        "register": "Ztied.B"
+      },
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "RSUBHNT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrsubhnt[_n_s32]",
+    "arguments": [
+      "svint16_t even",
+      "svint32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "even": {
+        "register": "Ztied.H"
+      },
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "RSUBHNT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrsubhnt[_n_s64]",
+    "arguments": [
+      "svint32_t even",
+      "svint64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "even": {
+        "register": "Ztied.S"
+      },
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "RSUBHNT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrsubhnt[_n_u16]",
+    "arguments": [
+      "svuint8_t even",
+      "svuint16_t op1",
+      "uint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "even": {
+        "register": "Ztied.B"
+      },
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "RSUBHNT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrsubhnt[_n_u32]",
+    "arguments": [
+      "svuint16_t even",
+      "svuint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "even": {
+        "register": "Ztied.H"
+      },
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "RSUBHNT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrsubhnt[_n_u64]",
+    "arguments": [
+      "svuint32_t even",
+      "svuint64_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "even": {
+        "register": "Ztied.S"
+      },
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "RSUBHNT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrsubhnt[_s16]",
+    "arguments": [
+      "svint8_t even",
+      "svint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "even": {
+        "register": "Ztied.B"
+      },
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "RSUBHNT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrsubhnt[_s32]",
+    "arguments": [
+      "svint16_t even",
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "even": {
+        "register": "Ztied.H"
+      },
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "RSUBHNT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrsubhnt[_s64]",
+    "arguments": [
+      "svint32_t even",
+      "svint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "even": {
+        "register": "Ztied.S"
+      },
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "RSUBHNT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrsubhnt[_u16]",
+    "arguments": [
+      "svuint8_t even",
+      "svuint16_t op1",
+      "svuint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "even": {
+        "register": "Ztied.B"
+      },
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "RSUBHNT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrsubhnt[_u32]",
+    "arguments": [
+      "svuint16_t even",
+      "svuint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "even": {
+        "register": "Ztied.H"
+      },
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "RSUBHNT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svrsubhnt[_u64]",
+    "arguments": [
+      "svuint32_t even",
+      "svuint64_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "even": {
+        "register": "Ztied.S"
+      },
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "RSUBHNT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsbclb[_n_u32]",
+    "arguments": [
+      "svuint32_t op1",
+      "svuint32_t op2",
+      "uint32_t op3"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SBCLB"
+      ],
+      [
+        "MOVPRFX",
+        "SBCLB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsbclb[_n_u64]",
+    "arguments": [
+      "svuint64_t op1",
+      "svuint64_t op2",
+      "uint64_t op3"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SBCLB"
+      ],
+      [
+        "MOVPRFX",
+        "SBCLB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsbclb[_u32]",
+    "arguments": [
+      "svuint32_t op1",
+      "svuint32_t op2",
+      "svuint32_t op3"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SBCLB"
+      ],
+      [
+        "MOVPRFX",
+        "SBCLB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsbclb[_u64]",
+    "arguments": [
+      "svuint64_t op1",
+      "svuint64_t op2",
+      "svuint64_t op3"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SBCLB"
+      ],
+      [
+        "MOVPRFX",
+        "SBCLB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsbclt[_n_u32]",
+    "arguments": [
+      "svuint32_t op1",
+      "svuint32_t op2",
+      "uint32_t op3"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SBCLT"
+      ],
+      [
+        "MOVPRFX",
+        "SBCLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsbclt[_n_u64]",
+    "arguments": [
+      "svuint64_t op1",
+      "svuint64_t op2",
+      "uint64_t op3"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SBCLT"
+      ],
+      [
+        "MOVPRFX",
+        "SBCLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsbclt[_u32]",
+    "arguments": [
+      "svuint32_t op1",
+      "svuint32_t op2",
+      "svuint32_t op3"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "op3": {
+        "register": "Zop3.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SBCLT"
+      ],
+      [
+        "MOVPRFX",
+        "SBCLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsbclt[_u64]",
+    "arguments": [
+      "svuint64_t op1",
+      "svuint64_t op2",
+      "svuint64_t op3"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "op3": {
+        "register": "Zop3.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SBCLT"
+      ],
+      [
+        "MOVPRFX",
+        "SBCLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svscale[_f16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FSCALE"
+      ],
+      [
+        "MOVPRFX",
+        "FSCALE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svscale[_f16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FSCALE"
+      ],
+      [
+        "MOVPRFX",
+        "FSCALE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svscale[_f16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FSCALE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svscale[_f32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FSCALE"
+      ],
+      [
+        "MOVPRFX",
+        "FSCALE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svscale[_f32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FSCALE"
+      ],
+      [
+        "MOVPRFX",
+        "FSCALE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svscale[_f32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FSCALE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svscale[_f64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FSCALE"
+      ],
+      [
+        "MOVPRFX",
+        "FSCALE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svscale[_f64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FSCALE"
+      ],
+      [
+        "MOVPRFX",
+        "FSCALE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svscale[_f64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FSCALE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svscale[_n_f16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "int16_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FSCALE"
+      ],
+      [
+        "MOVPRFX",
+        "FSCALE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svscale[_n_f16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "int16_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FSCALE"
+      ],
+      [
+        "MOVPRFX",
+        "FSCALE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svscale[_n_f16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "int16_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FSCALE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svscale[_n_f32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FSCALE"
+      ],
+      [
+        "MOVPRFX",
+        "FSCALE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svscale[_n_f32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FSCALE"
+      ],
+      [
+        "MOVPRFX",
+        "FSCALE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svscale[_n_f32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FSCALE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svscale[_n_f64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FSCALE"
+      ],
+      [
+        "MOVPRFX",
+        "FSCALE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svscale[_n_f64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FSCALE"
+      ],
+      [
+        "MOVPRFX",
+        "FSCALE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svscale[_n_f64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FSCALE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsel[_b]",
+    "arguments": [
+      "svbool_t pg",
+      "svbool_t op1",
+      "svbool_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Pop1.B"
+      },
+      "op2": {
+        "register": "Pop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SEL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsel[_f16]",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SEL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsel[_f32]",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SEL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsel[_f64]",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SEL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsel[_s16]",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SEL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsel[_s32]",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SEL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsel[_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SEL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsel[_s8]",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SEL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsel[_u16]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SEL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsel[_u32]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SEL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsel[_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SEL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsel[_u8]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SEL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svset2[_f16]",
+    "arguments": [
+      "svfloat16x2_t tuple",
+      "uint64_t imm_index",
+      "svfloat16_t x"
+    ],
+    "return_type": {
+      "value": "svfloat16x2_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svset2[_f32]",
+    "arguments": [
+      "svfloat32x2_t tuple",
+      "uint64_t imm_index",
+      "svfloat32_t x"
+    ],
+    "return_type": {
+      "value": "svfloat32x2_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svset2[_f64]",
+    "arguments": [
+      "svfloat64x2_t tuple",
+      "uint64_t imm_index",
+      "svfloat64_t x"
+    ],
+    "return_type": {
+      "value": "svfloat64x2_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svset2[_s16]",
+    "arguments": [
+      "svint16x2_t tuple",
+      "uint64_t imm_index",
+      "svint16_t x"
+    ],
+    "return_type": {
+      "value": "svint16x2_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svset2[_s32]",
+    "arguments": [
+      "svint32x2_t tuple",
+      "uint64_t imm_index",
+      "svint32_t x"
+    ],
+    "return_type": {
+      "value": "svint32x2_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svset2[_s64]",
+    "arguments": [
+      "svint64x2_t tuple",
+      "uint64_t imm_index",
+      "svint64_t x"
+    ],
+    "return_type": {
+      "value": "svint64x2_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svset2[_s8]",
+    "arguments": [
+      "svint8x2_t tuple",
+      "uint64_t imm_index",
+      "svint8_t x"
+    ],
+    "return_type": {
+      "value": "svint8x2_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svset2[_u16]",
+    "arguments": [
+      "svuint16x2_t tuple",
+      "uint64_t imm_index",
+      "svuint16_t x"
+    ],
+    "return_type": {
+      "value": "svuint16x2_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svset2[_u32]",
+    "arguments": [
+      "svuint32x2_t tuple",
+      "uint64_t imm_index",
+      "svuint32_t x"
+    ],
+    "return_type": {
+      "value": "svuint32x2_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svset2[_u64]",
+    "arguments": [
+      "svuint64x2_t tuple",
+      "uint64_t imm_index",
+      "svuint64_t x"
+    ],
+    "return_type": {
+      "value": "svuint64x2_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svset2[_u8]",
+    "arguments": [
+      "svuint8x2_t tuple",
+      "uint64_t imm_index",
+      "svuint8_t x"
+    ],
+    "return_type": {
+      "value": "svuint8x2_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svset3[_f16]",
+    "arguments": [
+      "svfloat16x3_t tuple",
+      "uint64_t imm_index",
+      "svfloat16_t x"
+    ],
+    "return_type": {
+      "value": "svfloat16x3_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svset3[_f32]",
+    "arguments": [
+      "svfloat32x3_t tuple",
+      "uint64_t imm_index",
+      "svfloat32_t x"
+    ],
+    "return_type": {
+      "value": "svfloat32x3_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svset3[_f64]",
+    "arguments": [
+      "svfloat64x3_t tuple",
+      "uint64_t imm_index",
+      "svfloat64_t x"
+    ],
+    "return_type": {
+      "value": "svfloat64x3_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svset3[_s16]",
+    "arguments": [
+      "svint16x3_t tuple",
+      "uint64_t imm_index",
+      "svint16_t x"
+    ],
+    "return_type": {
+      "value": "svint16x3_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svset3[_s32]",
+    "arguments": [
+      "svint32x3_t tuple",
+      "uint64_t imm_index",
+      "svint32_t x"
+    ],
+    "return_type": {
+      "value": "svint32x3_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svset3[_s64]",
+    "arguments": [
+      "svint64x3_t tuple",
+      "uint64_t imm_index",
+      "svint64_t x"
+    ],
+    "return_type": {
+      "value": "svint64x3_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svset3[_s8]",
+    "arguments": [
+      "svint8x3_t tuple",
+      "uint64_t imm_index",
+      "svint8_t x"
+    ],
+    "return_type": {
+      "value": "svint8x3_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svset3[_u16]",
+    "arguments": [
+      "svuint16x3_t tuple",
+      "uint64_t imm_index",
+      "svuint16_t x"
+    ],
+    "return_type": {
+      "value": "svuint16x3_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svset3[_u32]",
+    "arguments": [
+      "svuint32x3_t tuple",
+      "uint64_t imm_index",
+      "svuint32_t x"
+    ],
+    "return_type": {
+      "value": "svuint32x3_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svset3[_u64]",
+    "arguments": [
+      "svuint64x3_t tuple",
+      "uint64_t imm_index",
+      "svuint64_t x"
+    ],
+    "return_type": {
+      "value": "svuint64x3_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svset3[_u8]",
+    "arguments": [
+      "svuint8x3_t tuple",
+      "uint64_t imm_index",
+      "svuint8_t x"
+    ],
+    "return_type": {
+      "value": "svuint8x3_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svset4[_f16]",
+    "arguments": [
+      "svfloat16x4_t tuple",
+      "uint64_t imm_index",
+      "svfloat16_t x"
+    ],
+    "return_type": {
+      "value": "svfloat16x4_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svset4[_f32]",
+    "arguments": [
+      "svfloat32x4_t tuple",
+      "uint64_t imm_index",
+      "svfloat32_t x"
+    ],
+    "return_type": {
+      "value": "svfloat32x4_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svset4[_f64]",
+    "arguments": [
+      "svfloat64x4_t tuple",
+      "uint64_t imm_index",
+      "svfloat64_t x"
+    ],
+    "return_type": {
+      "value": "svfloat64x4_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svset4[_s16]",
+    "arguments": [
+      "svint16x4_t tuple",
+      "uint64_t imm_index",
+      "svint16_t x"
+    ],
+    "return_type": {
+      "value": "svint16x4_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svset4[_s32]",
+    "arguments": [
+      "svint32x4_t tuple",
+      "uint64_t imm_index",
+      "svint32_t x"
+    ],
+    "return_type": {
+      "value": "svint32x4_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svset4[_s64]",
+    "arguments": [
+      "svint64x4_t tuple",
+      "uint64_t imm_index",
+      "svint64_t x"
+    ],
+    "return_type": {
+      "value": "svint64x4_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svset4[_s8]",
+    "arguments": [
+      "svint8x4_t tuple",
+      "uint64_t imm_index",
+      "svint8_t x"
+    ],
+    "return_type": {
+      "value": "svint8x4_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svset4[_u16]",
+    "arguments": [
+      "svuint16x4_t tuple",
+      "uint64_t imm_index",
+      "svuint16_t x"
+    ],
+    "return_type": {
+      "value": "svuint16x4_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svset4[_u32]",
+    "arguments": [
+      "svuint32x4_t tuple",
+      "uint64_t imm_index",
+      "svuint32_t x"
+    ],
+    "return_type": {
+      "value": "svuint32x4_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svset4[_u64]",
+    "arguments": [
+      "svuint64x4_t tuple",
+      "uint64_t imm_index",
+      "svuint64_t x"
+    ],
+    "return_type": {
+      "value": "svuint64x4_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svset4[_u8]",
+    "arguments": [
+      "svuint8x4_t tuple",
+      "uint64_t imm_index",
+      "svuint8_t x"
+    ],
+    "return_type": {
+      "value": "svuint8x4_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsetffr",
+    "arguments": [],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {},
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SETFFR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svshllb[_n_s16]",
+    "arguments": [
+      "svint8_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "imm2": {
+        "minimum": 0,
+        "maximum": 7
+      },
+      "op1": {
+        "register": "Zop1.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SSHLLB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svshllb[_n_s32]",
+    "arguments": [
+      "svint16_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "imm2": {
+        "minimum": 0,
+        "maximum": 15
+      },
+      "op1": {
+        "register": "Zop1.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SSHLLB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svshllb[_n_s64]",
+    "arguments": [
+      "svint32_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "imm2": {
+        "minimum": 0,
+        "maximum": 31
+      },
+      "op1": {
+        "register": "Zop1.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SSHLLB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svshllb[_n_u16]",
+    "arguments": [
+      "svuint8_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "imm2": {
+        "minimum": 0,
+        "maximum": 7
+      },
+      "op1": {
+        "register": "Zop1.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "USHLLB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svshllb[_n_u32]",
+    "arguments": [
+      "svuint16_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "imm2": {
+        "minimum": 0,
+        "maximum": 15
+      },
+      "op1": {
+        "register": "Zop1.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "USHLLB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svshllb[_n_u64]",
+    "arguments": [
+      "svuint32_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "imm2": {
+        "minimum": 0,
+        "maximum": 31
+      },
+      "op1": {
+        "register": "Zop1.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "USHLLB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svshllt[_n_s16]",
+    "arguments": [
+      "svint8_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "imm2": {
+        "minimum": 0,
+        "maximum": 7
+      },
+      "op1": {
+        "register": "Zop1.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SSHLLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svshllt[_n_s32]",
+    "arguments": [
+      "svint16_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "imm2": {
+        "minimum": 0,
+        "maximum": 15
+      },
+      "op1": {
+        "register": "Zop1.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SSHLLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svshllt[_n_s64]",
+    "arguments": [
+      "svint32_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "imm2": {
+        "minimum": 0,
+        "maximum": 31
+      },
+      "op1": {
+        "register": "Zop1.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SSHLLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svshllt[_n_u16]",
+    "arguments": [
+      "svuint8_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "imm2": {
+        "minimum": 0,
+        "maximum": 7
+      },
+      "op1": {
+        "register": "Zop1.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "USHLLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svshllt[_n_u32]",
+    "arguments": [
+      "svuint16_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "imm2": {
+        "minimum": 0,
+        "maximum": 15
+      },
+      "op1": {
+        "register": "Zop1.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "USHLLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svshllt[_n_u64]",
+    "arguments": [
+      "svuint32_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "imm2": {
+        "minimum": 0,
+        "maximum": 31
+      },
+      "op1": {
+        "register": "Zop1.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "USHLLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svshrnb[_n_s16]",
+    "arguments": [
+      "svint16_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "imm2": {
+        "minimum": 1,
+        "maximum": 8
+      },
+      "op1": {
+        "register": "Zop1.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SHRNB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svshrnb[_n_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "imm2": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op1": {
+        "register": "Zop1.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SHRNB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svshrnb[_n_s64]",
+    "arguments": [
+      "svint64_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "imm2": {
+        "minimum": 1,
+        "maximum": 32
+      },
+      "op1": {
+        "register": "Zop1.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SHRNB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svshrnb[_n_u16]",
+    "arguments": [
+      "svuint16_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "imm2": {
+        "minimum": 1,
+        "maximum": 8
+      },
+      "op1": {
+        "register": "Zop1.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SHRNB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svshrnb[_n_u32]",
+    "arguments": [
+      "svuint32_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "imm2": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op1": {
+        "register": "Zop1.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SHRNB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svshrnb[_n_u64]",
+    "arguments": [
+      "svuint64_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "imm2": {
+        "minimum": 1,
+        "maximum": 32
+      },
+      "op1": {
+        "register": "Zop1.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SHRNB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svshrnt[_n_s16]",
+    "arguments": [
+      "svint8_t even",
+      "svint16_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "even": {
+        "register": "Ztied.B"
+      },
+      "imm2": {
+        "minimum": 1,
+        "maximum": 8
+      },
+      "op1": {
+        "register": "Zop1.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SHRNT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svshrnt[_n_s32]",
+    "arguments": [
+      "svint16_t even",
+      "svint32_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "even": {
+        "register": "Ztied.H"
+      },
+      "imm2": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op1": {
+        "register": "Zop1.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SHRNT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svshrnt[_n_s64]",
+    "arguments": [
+      "svint32_t even",
+      "svint64_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "even": {
+        "register": "Ztied.S"
+      },
+      "imm2": {
+        "minimum": 1,
+        "maximum": 32
+      },
+      "op1": {
+        "register": "Zop1.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SHRNT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svshrnt[_n_u16]",
+    "arguments": [
+      "svuint8_t even",
+      "svuint16_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "even": {
+        "register": "Ztied.B"
+      },
+      "imm2": {
+        "minimum": 1,
+        "maximum": 8
+      },
+      "op1": {
+        "register": "Zop1.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SHRNT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svshrnt[_n_u32]",
+    "arguments": [
+      "svuint16_t even",
+      "svuint32_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "even": {
+        "register": "Ztied.H"
+      },
+      "imm2": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op1": {
+        "register": "Zop1.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SHRNT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svshrnt[_n_u64]",
+    "arguments": [
+      "svuint32_t even",
+      "svuint64_t op1",
+      "uint64_t imm2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "even": {
+        "register": "Ztied.S"
+      },
+      "imm2": {
+        "minimum": 1,
+        "maximum": 32
+      },
+      "op1": {
+        "register": "Zop1.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SHRNT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsli[_n_s16]",
+    "arguments": [
+      "svint16_t op1",
+      "svint16_t op2",
+      "uint64_t imm3"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "imm3": {
+        "minimum": 0,
+        "maximum": 15
+      },
+      "op1": {
+        "register": "Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SLI"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsli[_n_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "svint32_t op2",
+      "uint64_t imm3"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "imm3": {
+        "minimum": 0,
+        "maximum": 31
+      },
+      "op1": {
+        "register": "Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SLI"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsli[_n_s64]",
+    "arguments": [
+      "svint64_t op1",
+      "svint64_t op2",
+      "uint64_t imm3"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "imm3": {
+        "minimum": 0,
+        "maximum": 63
+      },
+      "op1": {
+        "register": "Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SLI"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsli[_n_s8]",
+    "arguments": [
+      "svint8_t op1",
+      "svint8_t op2",
+      "uint64_t imm3"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "imm3": {
+        "minimum": 0,
+        "maximum": 7
+      },
+      "op1": {
+        "register": "Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SLI"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsli[_n_u16]",
+    "arguments": [
+      "svuint16_t op1",
+      "svuint16_t op2",
+      "uint64_t imm3"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "imm3": {
+        "minimum": 0,
+        "maximum": 15
+      },
+      "op1": {
+        "register": "Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SLI"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsli[_n_u32]",
+    "arguments": [
+      "svuint32_t op1",
+      "svuint32_t op2",
+      "uint64_t imm3"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "imm3": {
+        "minimum": 0,
+        "maximum": 31
+      },
+      "op1": {
+        "register": "Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SLI"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsli[_n_u64]",
+    "arguments": [
+      "svuint64_t op1",
+      "svuint64_t op2",
+      "uint64_t imm3"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "imm3": {
+        "minimum": 0,
+        "maximum": 63
+      },
+      "op1": {
+        "register": "Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SLI"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsli[_n_u8]",
+    "arguments": [
+      "svuint8_t op1",
+      "svuint8_t op2",
+      "uint64_t imm3"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "imm3": {
+        "minimum": 0,
+        "maximum": 7
+      },
+      "op1": {
+        "register": "Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SLI"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsm4e[_u32]",
+    "arguments": [
+      "svuint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SM4E"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsm4ekey[_u32]",
+    "arguments": [
+      "svuint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SM4EKEY"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsplice[_f16]",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SPLICE"
+      ],
+      [
+        "MOVPRFX",
+        "SPLICE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsplice[_f32]",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SPLICE"
+      ],
+      [
+        "MOVPRFX",
+        "SPLICE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsplice[_f64]",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SPLICE"
+      ],
+      [
+        "MOVPRFX",
+        "SPLICE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsplice[_s16]",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SPLICE"
+      ],
+      [
+        "MOVPRFX",
+        "SPLICE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsplice[_s32]",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SPLICE"
+      ],
+      [
+        "MOVPRFX",
+        "SPLICE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsplice[_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SPLICE"
+      ],
+      [
+        "MOVPRFX",
+        "SPLICE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsplice[_s8]",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SPLICE"
+      ],
+      [
+        "MOVPRFX",
+        "SPLICE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsplice[_u16]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SPLICE"
+      ],
+      [
+        "MOVPRFX",
+        "SPLICE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsplice[_u32]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SPLICE"
+      ],
+      [
+        "MOVPRFX",
+        "SPLICE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsplice[_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SPLICE"
+      ],
+      [
+        "MOVPRFX",
+        "SPLICE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsplice[_u8]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SPLICE"
+      ],
+      [
+        "MOVPRFX",
+        "SPLICE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsqadd[_n_u16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "int16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "USQADD"
+      ],
+      [
+        "MOVPRFX",
+        "USQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsqadd[_n_u16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "int16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQADD"
+      ],
+      [
+        "USQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsqadd[_n_u16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "int16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "USQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsqadd[_n_u32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "USQADD"
+      ],
+      [
+        "MOVPRFX",
+        "USQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsqadd[_n_u32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQADD"
+      ],
+      [
+        "USQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsqadd[_n_u32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "USQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsqadd[_n_u64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "USQADD"
+      ],
+      [
+        "MOVPRFX",
+        "USQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsqadd[_n_u64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQADD"
+      ],
+      [
+        "USQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsqadd[_n_u64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "USQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsqadd[_n_u8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "int8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "USQADD"
+      ],
+      [
+        "MOVPRFX",
+        "USQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsqadd[_n_u8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "int8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UQADD"
+      ],
+      [
+        "USQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsqadd[_n_u8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "int8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "USQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsqadd[_u16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "USQADD"
+      ],
+      [
+        "MOVPRFX",
+        "USQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsqadd[_u16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "USQADD"
+      ],
+      [
+        "MOVPRFX",
+        "USQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsqadd[_u16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "USQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsqadd[_u32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "USQADD"
+      ],
+      [
+        "MOVPRFX",
+        "USQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsqadd[_u32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "USQADD"
+      ],
+      [
+        "MOVPRFX",
+        "USQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsqadd[_u32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "USQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsqadd[_u64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "USQADD"
+      ],
+      [
+        "MOVPRFX",
+        "USQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsqadd[_u64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "USQADD"
+      ],
+      [
+        "MOVPRFX",
+        "USQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsqadd[_u64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "USQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsqadd[_u8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "USQADD"
+      ],
+      [
+        "MOVPRFX",
+        "USQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsqadd[_u8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "USQADD"
+      ],
+      [
+        "MOVPRFX",
+        "USQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsqadd[_u8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "USQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsqrt[_f16]_m",
+    "arguments": [
+      "svfloat16_t inactive",
+      "svbool_t pg",
+      "svfloat16_t op"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.H|Ztied.H"
+      },
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FSQRT"
+      ],
+      [
+        "MOVPRFX",
+        "FSQRT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsqrt[_f16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H|Ztied.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FSQRT"
+      ],
+      [
+        "MOVPRFX",
+        "FSQRT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsqrt[_f16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FSQRT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsqrt[_f32]_m",
+    "arguments": [
+      "svfloat32_t inactive",
+      "svbool_t pg",
+      "svfloat32_t op"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.S|Ztied.S"
+      },
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FSQRT"
+      ],
+      [
+        "MOVPRFX",
+        "FSQRT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsqrt[_f32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S|Ztied.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FSQRT"
+      ],
+      [
+        "MOVPRFX",
+        "FSQRT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsqrt[_f32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FSQRT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsqrt[_f64]_m",
+    "arguments": [
+      "svfloat64_t inactive",
+      "svbool_t pg",
+      "svfloat64_t op"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "inactive": {
+        "register": "Zinactive.D|Ztied.D"
+      },
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FSQRT"
+      ],
+      [
+        "MOVPRFX",
+        "FSQRT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsqrt[_f64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D|Ztied.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FSQRT"
+      ],
+      [
+        "MOVPRFX",
+        "FSQRT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsqrt[_f64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FSQRT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsra[_n_s16]",
+    "arguments": [
+      "svint16_t op1",
+      "svint16_t op2",
+      "uint64_t imm3"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "imm3": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SSRA"
+      ],
+      [
+        "MOVPRFX",
+        "SSRA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsra[_n_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "svint32_t op2",
+      "uint64_t imm3"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "imm3": {
+        "minimum": 1,
+        "maximum": 32
+      },
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SSRA"
+      ],
+      [
+        "MOVPRFX",
+        "SSRA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsra[_n_s64]",
+    "arguments": [
+      "svint64_t op1",
+      "svint64_t op2",
+      "uint64_t imm3"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "imm3": {
+        "minimum": 1,
+        "maximum": 64
+      },
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SSRA"
+      ],
+      [
+        "MOVPRFX",
+        "SSRA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsra[_n_s8]",
+    "arguments": [
+      "svint8_t op1",
+      "svint8_t op2",
+      "uint64_t imm3"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "imm3": {
+        "minimum": 1,
+        "maximum": 8
+      },
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SSRA"
+      ],
+      [
+        "MOVPRFX",
+        "SSRA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsra[_n_u16]",
+    "arguments": [
+      "svuint16_t op1",
+      "svuint16_t op2",
+      "uint64_t imm3"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "imm3": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "USRA"
+      ],
+      [
+        "MOVPRFX",
+        "USRA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsra[_n_u32]",
+    "arguments": [
+      "svuint32_t op1",
+      "svuint32_t op2",
+      "uint64_t imm3"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "imm3": {
+        "minimum": 1,
+        "maximum": 32
+      },
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "USRA"
+      ],
+      [
+        "MOVPRFX",
+        "USRA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsra[_n_u64]",
+    "arguments": [
+      "svuint64_t op1",
+      "svuint64_t op2",
+      "uint64_t imm3"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "imm3": {
+        "minimum": 1,
+        "maximum": 64
+      },
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "USRA"
+      ],
+      [
+        "MOVPRFX",
+        "USRA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsra[_n_u8]",
+    "arguments": [
+      "svuint8_t op1",
+      "svuint8_t op2",
+      "uint64_t imm3"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "imm3": {
+        "minimum": 1,
+        "maximum": 8
+      },
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "USRA"
+      ],
+      [
+        "MOVPRFX",
+        "USRA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsri[_n_s16]",
+    "arguments": [
+      "svint16_t op1",
+      "svint16_t op2",
+      "uint64_t imm3"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "imm3": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op1": {
+        "register": "Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SRI"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsri[_n_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "svint32_t op2",
+      "uint64_t imm3"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "imm3": {
+        "minimum": 1,
+        "maximum": 32
+      },
+      "op1": {
+        "register": "Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SRI"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsri[_n_s64]",
+    "arguments": [
+      "svint64_t op1",
+      "svint64_t op2",
+      "uint64_t imm3"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "imm3": {
+        "minimum": 1,
+        "maximum": 64
+      },
+      "op1": {
+        "register": "Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SRI"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsri[_n_s8]",
+    "arguments": [
+      "svint8_t op1",
+      "svint8_t op2",
+      "uint64_t imm3"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "imm3": {
+        "minimum": 1,
+        "maximum": 8
+      },
+      "op1": {
+        "register": "Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SRI"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsri[_n_u16]",
+    "arguments": [
+      "svuint16_t op1",
+      "svuint16_t op2",
+      "uint64_t imm3"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "imm3": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op1": {
+        "register": "Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SRI"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsri[_n_u32]",
+    "arguments": [
+      "svuint32_t op1",
+      "svuint32_t op2",
+      "uint64_t imm3"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "imm3": {
+        "minimum": 1,
+        "maximum": 32
+      },
+      "op1": {
+        "register": "Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SRI"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsri[_n_u64]",
+    "arguments": [
+      "svuint64_t op1",
+      "svuint64_t op2",
+      "uint64_t imm3"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "imm3": {
+        "minimum": 1,
+        "maximum": 64
+      },
+      "op1": {
+        "register": "Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SRI"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsri[_n_u8]",
+    "arguments": [
+      "svuint8_t op1",
+      "svuint8_t op2",
+      "uint64_t imm3"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "imm3": {
+        "minimum": 1,
+        "maximum": 8
+      },
+      "op1": {
+        "register": "Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SRI"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1[_f16]",
+    "arguments": [
+      "svbool_t pg",
+      "float16_t *base",
+      "svfloat16_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1H"
+      ],
+      [
+        "ST1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1[_f32]",
+    "arguments": [
+      "svbool_t pg",
+      "float32_t *base",
+      "svfloat32_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1W"
+      ],
+      [
+        "ST1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1[_f64]",
+    "arguments": [
+      "svbool_t pg",
+      "float64_t *base",
+      "svfloat64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1D"
+      ],
+      [
+        "ST1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1[_s16]",
+    "arguments": [
+      "svbool_t pg",
+      "int16_t *base",
+      "svint16_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1H"
+      ],
+      [
+        "ST1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1[_s32]",
+    "arguments": [
+      "svbool_t pg",
+      "int32_t *base",
+      "svint32_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1W"
+      ],
+      [
+        "ST1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1[_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "int64_t *base",
+      "svint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1D"
+      ],
+      [
+        "ST1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1[_s8]",
+    "arguments": [
+      "svbool_t pg",
+      "int8_t *base",
+      "svint8_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1B"
+      ],
+      [
+        "ST1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1[_u16]",
+    "arguments": [
+      "svbool_t pg",
+      "uint16_t *base",
+      "svuint16_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1H"
+      ],
+      [
+        "ST1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1[_u32]",
+    "arguments": [
+      "svbool_t pg",
+      "uint32_t *base",
+      "svuint32_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1W"
+      ],
+      [
+        "ST1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1[_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "uint64_t *base",
+      "svuint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1D"
+      ],
+      [
+        "ST1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1[_u8]",
+    "arguments": [
+      "svbool_t pg",
+      "uint8_t *base",
+      "svuint8_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1B"
+      ],
+      [
+        "ST1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1_scatter[_u32base]_index[_f32]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "int64_t index",
+      "svfloat32_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "data": {
+        "register": "Zdata.S"
+      },
+      "index * 4": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1W"
+      ],
+      [
+        "ST1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1_scatter[_u32base]_index[_s32]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "int64_t index",
+      "svint32_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "data": {
+        "register": "Zdata.S"
+      },
+      "index * 4": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1W"
+      ],
+      [
+        "ST1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1_scatter[_u32base]_index[_u32]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "int64_t index",
+      "svuint32_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "data": {
+        "register": "Zdata.S"
+      },
+      "index * 4": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1W"
+      ],
+      [
+        "ST1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1_scatter[_u32base]_offset[_f32]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "int64_t offset",
+      "svfloat32_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "data": {
+        "register": "Zdata.S"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1W"
+      ],
+      [
+        "ST1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1_scatter[_u32base]_offset[_s32]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "int64_t offset",
+      "svint32_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "data": {
+        "register": "Zdata.S"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1W"
+      ],
+      [
+        "ST1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1_scatter[_u32base]_offset[_u32]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "int64_t offset",
+      "svuint32_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "data": {
+        "register": "Zdata.S"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1W"
+      ],
+      [
+        "ST1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1_scatter[_u32base_f32]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "svfloat32_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "data": {
+        "register": "Zdata.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1_scatter[_u32base_s32]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "svint32_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "data": {
+        "register": "Zdata.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1_scatter[_u32base_u32]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "svuint32_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "data": {
+        "register": "Zdata.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1_scatter[_u64base]_index[_f64]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t index",
+      "svfloat64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "index * 8": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1D"
+      ],
+      [
+        "ST1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1_scatter[_u64base]_index[_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t index",
+      "svint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "index * 8": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1D"
+      ],
+      [
+        "ST1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1_scatter[_u64base]_index[_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t index",
+      "svuint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "index * 8": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1D"
+      ],
+      [
+        "ST1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1_scatter[_u64base]_offset[_f64]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t offset",
+      "svfloat64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1D"
+      ],
+      [
+        "ST1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1_scatter[_u64base]_offset[_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t offset",
+      "svint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1D"
+      ],
+      [
+        "ST1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1_scatter[_u64base]_offset[_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t offset",
+      "svuint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1D"
+      ],
+      [
+        "ST1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1_scatter[_u64base_f64]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "svfloat64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1_scatter[_u64base_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "svint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1_scatter[_u64base_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "svuint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1_scatter_[s32]index[_f32]",
+    "arguments": [
+      "svbool_t pg",
+      "float32_t *base",
+      "svint32_t indices",
+      "svfloat32_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.S"
+      },
+      "indices": {
+        "register": "Zindices.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1_scatter_[s32]index[_s32]",
+    "arguments": [
+      "svbool_t pg",
+      "int32_t *base",
+      "svint32_t indices",
+      "svint32_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.S"
+      },
+      "indices": {
+        "register": "Zindices.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1_scatter_[s32]index[_u32]",
+    "arguments": [
+      "svbool_t pg",
+      "uint32_t *base",
+      "svint32_t indices",
+      "svuint32_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.S"
+      },
+      "indices": {
+        "register": "Zindices.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1_scatter_[s32]offset[_f32]",
+    "arguments": [
+      "svbool_t pg",
+      "float32_t *base",
+      "svint32_t offsets",
+      "svfloat32_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.S"
+      },
+      "offsets": {
+        "register": "Zoffsets.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1_scatter_[s32]offset[_s32]",
+    "arguments": [
+      "svbool_t pg",
+      "int32_t *base",
+      "svint32_t offsets",
+      "svint32_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.S"
+      },
+      "offsets": {
+        "register": "Zoffsets.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1_scatter_[s32]offset[_u32]",
+    "arguments": [
+      "svbool_t pg",
+      "uint32_t *base",
+      "svint32_t offsets",
+      "svuint32_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.S"
+      },
+      "offsets": {
+        "register": "Zoffsets.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1_scatter_[s64]index[_f64]",
+    "arguments": [
+      "svbool_t pg",
+      "float64_t *base",
+      "svint64_t indices",
+      "svfloat64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "indices": {
+        "register": "Zindices.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1_scatter_[s64]index[_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "int64_t *base",
+      "svint64_t indices",
+      "svint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "indices": {
+        "register": "Zindices.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1_scatter_[s64]index[_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "uint64_t *base",
+      "svint64_t indices",
+      "svuint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "indices": {
+        "register": "Zindices.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1_scatter_[s64]offset[_f64]",
+    "arguments": [
+      "svbool_t pg",
+      "float64_t *base",
+      "svint64_t offsets",
+      "svfloat64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1_scatter_[s64]offset[_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "int64_t *base",
+      "svint64_t offsets",
+      "svint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1_scatter_[s64]offset[_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "uint64_t *base",
+      "svint64_t offsets",
+      "svuint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1_scatter_[u32]index[_f32]",
+    "arguments": [
+      "svbool_t pg",
+      "float32_t *base",
+      "svuint32_t indices",
+      "svfloat32_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.S"
+      },
+      "indices": {
+        "register": "Zindices.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1_scatter_[u32]index[_s32]",
+    "arguments": [
+      "svbool_t pg",
+      "int32_t *base",
+      "svuint32_t indices",
+      "svint32_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.S"
+      },
+      "indices": {
+        "register": "Zindices.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1_scatter_[u32]index[_u32]",
+    "arguments": [
+      "svbool_t pg",
+      "uint32_t *base",
+      "svuint32_t indices",
+      "svuint32_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.S"
+      },
+      "indices": {
+        "register": "Zindices.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1_scatter_[u32]offset[_f32]",
+    "arguments": [
+      "svbool_t pg",
+      "float32_t *base",
+      "svuint32_t offsets",
+      "svfloat32_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.S"
+      },
+      "offsets": {
+        "register": "Zoffsets.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1_scatter_[u32]offset[_s32]",
+    "arguments": [
+      "svbool_t pg",
+      "int32_t *base",
+      "svuint32_t offsets",
+      "svint32_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.S"
+      },
+      "offsets": {
+        "register": "Zoffsets.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1_scatter_[u32]offset[_u32]",
+    "arguments": [
+      "svbool_t pg",
+      "uint32_t *base",
+      "svuint32_t offsets",
+      "svuint32_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.S"
+      },
+      "offsets": {
+        "register": "Zoffsets.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1_scatter_[u64]index[_f64]",
+    "arguments": [
+      "svbool_t pg",
+      "float64_t *base",
+      "svuint64_t indices",
+      "svfloat64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "indices": {
+        "register": "Zindices.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1_scatter_[u64]index[_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "int64_t *base",
+      "svuint64_t indices",
+      "svint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "indices": {
+        "register": "Zindices.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1_scatter_[u64]index[_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "uint64_t *base",
+      "svuint64_t indices",
+      "svuint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "indices": {
+        "register": "Zindices.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1_scatter_[u64]offset[_f64]",
+    "arguments": [
+      "svbool_t pg",
+      "float64_t *base",
+      "svuint64_t offsets",
+      "svfloat64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1_scatter_[u64]offset[_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "int64_t *base",
+      "svuint64_t offsets",
+      "svint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1_scatter_[u64]offset[_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "uint64_t *base",
+      "svuint64_t offsets",
+      "svuint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1_vnum[_f16]",
+    "arguments": [
+      "svbool_t pg",
+      "float16_t *base",
+      "int64_t vnum",
+      "svfloat16_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      },
+      "vnum * svcnth()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1H"
+      ],
+      [
+        "ST1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1_vnum[_f32]",
+    "arguments": [
+      "svbool_t pg",
+      "float32_t *base",
+      "int64_t vnum",
+      "svfloat32_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      },
+      "vnum * svcntw()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1W"
+      ],
+      [
+        "ST1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1_vnum[_f64]",
+    "arguments": [
+      "svbool_t pg",
+      "float64_t *base",
+      "int64_t vnum",
+      "svfloat64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      },
+      "vnum * svcntd()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1D"
+      ],
+      [
+        "ST1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1_vnum[_s16]",
+    "arguments": [
+      "svbool_t pg",
+      "int16_t *base",
+      "int64_t vnum",
+      "svint16_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      },
+      "vnum * svcnth()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1H"
+      ],
+      [
+        "ST1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1_vnum[_s32]",
+    "arguments": [
+      "svbool_t pg",
+      "int32_t *base",
+      "int64_t vnum",
+      "svint32_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      },
+      "vnum * svcntw()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1W"
+      ],
+      [
+        "ST1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1_vnum[_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "int64_t *base",
+      "int64_t vnum",
+      "svint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      },
+      "vnum * svcntd()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1D"
+      ],
+      [
+        "ST1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1_vnum[_s8]",
+    "arguments": [
+      "svbool_t pg",
+      "int8_t *base",
+      "int64_t vnum",
+      "svint8_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      },
+      "vnum * svcntb()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1B"
+      ],
+      [
+        "ST1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1_vnum[_u16]",
+    "arguments": [
+      "svbool_t pg",
+      "uint16_t *base",
+      "int64_t vnum",
+      "svuint16_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      },
+      "vnum * svcnth()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1H"
+      ],
+      [
+        "ST1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1_vnum[_u32]",
+    "arguments": [
+      "svbool_t pg",
+      "uint32_t *base",
+      "int64_t vnum",
+      "svuint32_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      },
+      "vnum * svcntw()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1W"
+      ],
+      [
+        "ST1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1_vnum[_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "uint64_t *base",
+      "int64_t vnum",
+      "svuint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      },
+      "vnum * svcntd()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1D"
+      ],
+      [
+        "ST1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1_vnum[_u8]",
+    "arguments": [
+      "svbool_t pg",
+      "uint8_t *base",
+      "int64_t vnum",
+      "svuint8_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      },
+      "vnum * svcntb()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1B"
+      ],
+      [
+        "ST1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1b[_s16]",
+    "arguments": [
+      "svbool_t pg",
+      "int8_t *base",
+      "svint16_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1B"
+      ],
+      [
+        "ST1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1b[_s32]",
+    "arguments": [
+      "svbool_t pg",
+      "int8_t *base",
+      "svint32_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1B"
+      ],
+      [
+        "ST1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1b[_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "int8_t *base",
+      "svint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1B"
+      ],
+      [
+        "ST1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1b[_u16]",
+    "arguments": [
+      "svbool_t pg",
+      "uint8_t *base",
+      "svuint16_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1B"
+      ],
+      [
+        "ST1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1b[_u32]",
+    "arguments": [
+      "svbool_t pg",
+      "uint8_t *base",
+      "svuint32_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1B"
+      ],
+      [
+        "ST1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1b[_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "uint8_t *base",
+      "svuint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1B"
+      ],
+      [
+        "ST1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1b_scatter[_u32base]_offset[_s32]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "int64_t offset",
+      "svint32_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "data": {
+        "register": "Zdata.S"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1B"
+      ],
+      [
+        "ST1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1b_scatter[_u32base]_offset[_u32]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "int64_t offset",
+      "svuint32_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "data": {
+        "register": "Zdata.S"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1B"
+      ],
+      [
+        "ST1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1b_scatter[_u32base_s32]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "svint32_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "data": {
+        "register": "Zdata.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1b_scatter[_u32base_u32]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "svuint32_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "data": {
+        "register": "Zdata.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1b_scatter[_u64base]_offset[_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t offset",
+      "svint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1B"
+      ],
+      [
+        "ST1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1b_scatter[_u64base]_offset[_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t offset",
+      "svuint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1B"
+      ],
+      [
+        "ST1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1b_scatter[_u64base_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "svint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1b_scatter[_u64base_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "svuint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1b_scatter_[s32]offset[_s32]",
+    "arguments": [
+      "svbool_t pg",
+      "int8_t *base",
+      "svint32_t offsets",
+      "svint32_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.S"
+      },
+      "offsets": {
+        "register": "Zoffsets.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1b_scatter_[s32]offset[_u32]",
+    "arguments": [
+      "svbool_t pg",
+      "uint8_t *base",
+      "svint32_t offsets",
+      "svuint32_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.S"
+      },
+      "offsets": {
+        "register": "Zoffsets.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1b_scatter_[s64]offset[_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "int8_t *base",
+      "svint64_t offsets",
+      "svint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1b_scatter_[s64]offset[_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "uint8_t *base",
+      "svint64_t offsets",
+      "svuint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1b_scatter_[u32]offset[_s32]",
+    "arguments": [
+      "svbool_t pg",
+      "int8_t *base",
+      "svuint32_t offsets",
+      "svint32_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.S"
+      },
+      "offsets": {
+        "register": "Zoffsets.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1b_scatter_[u32]offset[_u32]",
+    "arguments": [
+      "svbool_t pg",
+      "uint8_t *base",
+      "svuint32_t offsets",
+      "svuint32_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.S"
+      },
+      "offsets": {
+        "register": "Zoffsets.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1b_scatter_[u64]offset[_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "int8_t *base",
+      "svuint64_t offsets",
+      "svint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1b_scatter_[u64]offset[_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "uint8_t *base",
+      "svuint64_t offsets",
+      "svuint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1b_vnum[_s16]",
+    "arguments": [
+      "svbool_t pg",
+      "int8_t *base",
+      "int64_t vnum",
+      "svint16_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      },
+      "vnum * svcnth()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1B"
+      ],
+      [
+        "ST1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1b_vnum[_s32]",
+    "arguments": [
+      "svbool_t pg",
+      "int8_t *base",
+      "int64_t vnum",
+      "svint32_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      },
+      "vnum * svcntw()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1B"
+      ],
+      [
+        "ST1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1b_vnum[_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "int8_t *base",
+      "int64_t vnum",
+      "svint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      },
+      "vnum * svcntd()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1B"
+      ],
+      [
+        "ST1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1b_vnum[_u16]",
+    "arguments": [
+      "svbool_t pg",
+      "uint8_t *base",
+      "int64_t vnum",
+      "svuint16_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      },
+      "vnum * svcnth()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1B"
+      ],
+      [
+        "ST1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1b_vnum[_u32]",
+    "arguments": [
+      "svbool_t pg",
+      "uint8_t *base",
+      "int64_t vnum",
+      "svuint32_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      },
+      "vnum * svcntw()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1B"
+      ],
+      [
+        "ST1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1b_vnum[_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "uint8_t *base",
+      "int64_t vnum",
+      "svuint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      },
+      "vnum * svcntd()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1B"
+      ],
+      [
+        "ST1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1h[_s32]",
+    "arguments": [
+      "svbool_t pg",
+      "int16_t *base",
+      "svint32_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1H"
+      ],
+      [
+        "ST1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1h[_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "int16_t *base",
+      "svint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1H"
+      ],
+      [
+        "ST1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1h[_u32]",
+    "arguments": [
+      "svbool_t pg",
+      "uint16_t *base",
+      "svuint32_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1H"
+      ],
+      [
+        "ST1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1h[_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "uint16_t *base",
+      "svuint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1H"
+      ],
+      [
+        "ST1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1h_scatter[_u32base]_index[_s32]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "int64_t index",
+      "svint32_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "data": {
+        "register": "Zdata.S"
+      },
+      "index * 2": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1H"
+      ],
+      [
+        "ST1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1h_scatter[_u32base]_index[_u32]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "int64_t index",
+      "svuint32_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "data": {
+        "register": "Zdata.S"
+      },
+      "index * 2": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1H"
+      ],
+      [
+        "ST1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1h_scatter[_u32base]_offset[_s32]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "int64_t offset",
+      "svint32_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "data": {
+        "register": "Zdata.S"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1H"
+      ],
+      [
+        "ST1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1h_scatter[_u32base]_offset[_u32]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "int64_t offset",
+      "svuint32_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "data": {
+        "register": "Zdata.S"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1H"
+      ],
+      [
+        "ST1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1h_scatter[_u32base_s32]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "svint32_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "data": {
+        "register": "Zdata.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1h_scatter[_u32base_u32]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "svuint32_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "data": {
+        "register": "Zdata.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1h_scatter[_u64base]_index[_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t index",
+      "svint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "index * 2": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1H"
+      ],
+      [
+        "ST1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1h_scatter[_u64base]_index[_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t index",
+      "svuint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "index * 2": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1H"
+      ],
+      [
+        "ST1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1h_scatter[_u64base]_offset[_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t offset",
+      "svint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1H"
+      ],
+      [
+        "ST1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1h_scatter[_u64base]_offset[_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t offset",
+      "svuint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1H"
+      ],
+      [
+        "ST1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1h_scatter[_u64base_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "svint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1h_scatter[_u64base_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "svuint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1h_scatter_[s32]index[_s32]",
+    "arguments": [
+      "svbool_t pg",
+      "int16_t *base",
+      "svint32_t indices",
+      "svint32_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.S"
+      },
+      "indices": {
+        "register": "Zindices.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1h_scatter_[s32]index[_u32]",
+    "arguments": [
+      "svbool_t pg",
+      "uint16_t *base",
+      "svint32_t indices",
+      "svuint32_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.S"
+      },
+      "indices": {
+        "register": "Zindices.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1h_scatter_[s32]offset[_s32]",
+    "arguments": [
+      "svbool_t pg",
+      "int16_t *base",
+      "svint32_t offsets",
+      "svint32_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.S"
+      },
+      "offsets": {
+        "register": "Zoffsets.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1h_scatter_[s32]offset[_u32]",
+    "arguments": [
+      "svbool_t pg",
+      "uint16_t *base",
+      "svint32_t offsets",
+      "svuint32_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.S"
+      },
+      "offsets": {
+        "register": "Zoffsets.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1h_scatter_[s64]index[_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "int16_t *base",
+      "svint64_t indices",
+      "svint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "indices": {
+        "register": "Zindices.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1h_scatter_[s64]index[_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "uint16_t *base",
+      "svint64_t indices",
+      "svuint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "indices": {
+        "register": "Zindices.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1h_scatter_[s64]offset[_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "int16_t *base",
+      "svint64_t offsets",
+      "svint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1h_scatter_[s64]offset[_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "uint16_t *base",
+      "svint64_t offsets",
+      "svuint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1h_scatter_[u32]index[_s32]",
+    "arguments": [
+      "svbool_t pg",
+      "int16_t *base",
+      "svuint32_t indices",
+      "svint32_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.S"
+      },
+      "indices": {
+        "register": "Zindices.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1h_scatter_[u32]index[_u32]",
+    "arguments": [
+      "svbool_t pg",
+      "uint16_t *base",
+      "svuint32_t indices",
+      "svuint32_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.S"
+      },
+      "indices": {
+        "register": "Zindices.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1h_scatter_[u32]offset[_s32]",
+    "arguments": [
+      "svbool_t pg",
+      "int16_t *base",
+      "svuint32_t offsets",
+      "svint32_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.S"
+      },
+      "offsets": {
+        "register": "Zoffsets.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1h_scatter_[u32]offset[_u32]",
+    "arguments": [
+      "svbool_t pg",
+      "uint16_t *base",
+      "svuint32_t offsets",
+      "svuint32_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.S"
+      },
+      "offsets": {
+        "register": "Zoffsets.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1h_scatter_[u64]index[_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "int16_t *base",
+      "svuint64_t indices",
+      "svint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "indices": {
+        "register": "Zindices.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1h_scatter_[u64]index[_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "uint16_t *base",
+      "svuint64_t indices",
+      "svuint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "indices": {
+        "register": "Zindices.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1h_scatter_[u64]offset[_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "int16_t *base",
+      "svuint64_t offsets",
+      "svint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1h_scatter_[u64]offset[_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "uint16_t *base",
+      "svuint64_t offsets",
+      "svuint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1h_vnum[_s32]",
+    "arguments": [
+      "svbool_t pg",
+      "int16_t *base",
+      "int64_t vnum",
+      "svint32_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      },
+      "vnum * svcntw()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1H"
+      ],
+      [
+        "ST1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1h_vnum[_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "int16_t *base",
+      "int64_t vnum",
+      "svint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      },
+      "vnum * svcntd()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1H"
+      ],
+      [
+        "ST1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1h_vnum[_u32]",
+    "arguments": [
+      "svbool_t pg",
+      "uint16_t *base",
+      "int64_t vnum",
+      "svuint32_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      },
+      "vnum * svcntw()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1H"
+      ],
+      [
+        "ST1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1h_vnum[_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "uint16_t *base",
+      "int64_t vnum",
+      "svuint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      },
+      "vnum * svcntd()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1H"
+      ],
+      [
+        "ST1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1w[_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "int32_t *base",
+      "svint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1W"
+      ],
+      [
+        "ST1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1w[_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "uint32_t *base",
+      "svuint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1W"
+      ],
+      [
+        "ST1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1w_scatter[_u64base]_index[_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t index",
+      "svint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "index * 4": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1W"
+      ],
+      [
+        "ST1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1w_scatter[_u64base]_index[_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t index",
+      "svuint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "index * 4": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1W"
+      ],
+      [
+        "ST1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1w_scatter[_u64base]_offset[_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t offset",
+      "svint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1W"
+      ],
+      [
+        "ST1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1w_scatter[_u64base]_offset[_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t offset",
+      "svuint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1W"
+      ],
+      [
+        "ST1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1w_scatter[_u64base_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "svint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1w_scatter[_u64base_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "svuint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1w_scatter_[s64]index[_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "int32_t *base",
+      "svint64_t indices",
+      "svint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "indices": {
+        "register": "Zindices.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1w_scatter_[s64]index[_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "uint32_t *base",
+      "svint64_t indices",
+      "svuint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "indices": {
+        "register": "Zindices.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1w_scatter_[s64]offset[_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "int32_t *base",
+      "svint64_t offsets",
+      "svint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1w_scatter_[s64]offset[_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "uint32_t *base",
+      "svint64_t offsets",
+      "svuint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1w_scatter_[u64]index[_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "int32_t *base",
+      "svuint64_t indices",
+      "svint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "indices": {
+        "register": "Zindices.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1w_scatter_[u64]index[_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "uint32_t *base",
+      "svuint64_t indices",
+      "svuint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "indices": {
+        "register": "Zindices.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1w_scatter_[u64]offset[_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "int32_t *base",
+      "svuint64_t offsets",
+      "svint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1w_scatter_[u64]offset[_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "uint32_t *base",
+      "svuint64_t offsets",
+      "svuint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1w_vnum[_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "int32_t *base",
+      "int64_t vnum",
+      "svint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      },
+      "vnum * svcntd()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1W"
+      ],
+      [
+        "ST1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst1w_vnum[_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "uint32_t *base",
+      "int64_t vnum",
+      "svuint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      },
+      "vnum * svcntd()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1W"
+      ],
+      [
+        "ST1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst2[_f16]",
+    "arguments": [
+      "svbool_t pg",
+      "float16_t *base",
+      "svfloat16x2_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "{Zdata0.H, Zdata1.H}"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST2H"
+      ],
+      [
+        "ST2H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst2[_f32]",
+    "arguments": [
+      "svbool_t pg",
+      "float32_t *base",
+      "svfloat32x2_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "{Zdata0.S, Zdata1.S}"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST2W"
+      ],
+      [
+        "ST2W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst2[_f64]",
+    "arguments": [
+      "svbool_t pg",
+      "float64_t *base",
+      "svfloat64x2_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "{Zdata0.D, Zdata1.D}"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST2D"
+      ],
+      [
+        "ST2D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst2[_s16]",
+    "arguments": [
+      "svbool_t pg",
+      "int16_t *base",
+      "svint16x2_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "{Zdata0.H, Zdata1.H}"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST2H"
+      ],
+      [
+        "ST2H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst2[_s32]",
+    "arguments": [
+      "svbool_t pg",
+      "int32_t *base",
+      "svint32x2_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "{Zdata0.S, Zdata1.S}"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST2W"
+      ],
+      [
+        "ST2W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst2[_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "int64_t *base",
+      "svint64x2_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "{Zdata0.D, Zdata1.D}"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST2D"
+      ],
+      [
+        "ST2D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst2[_s8]",
+    "arguments": [
+      "svbool_t pg",
+      "int8_t *base",
+      "svint8x2_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "{Zdata0.B, Zdata1.B}"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST2B"
+      ],
+      [
+        "ST2B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst2[_u16]",
+    "arguments": [
+      "svbool_t pg",
+      "uint16_t *base",
+      "svuint16x2_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "{Zdata0.H, Zdata1.H}"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST2H"
+      ],
+      [
+        "ST2H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst2[_u32]",
+    "arguments": [
+      "svbool_t pg",
+      "uint32_t *base",
+      "svuint32x2_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "{Zdata0.S, Zdata1.S}"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST2W"
+      ],
+      [
+        "ST2W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst2[_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "uint64_t *base",
+      "svuint64x2_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "{Zdata0.D, Zdata1.D}"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST2D"
+      ],
+      [
+        "ST2D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst2[_u8]",
+    "arguments": [
+      "svbool_t pg",
+      "uint8_t *base",
+      "svuint8x2_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "{Zdata0.B, Zdata1.B}"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST2B"
+      ],
+      [
+        "ST2B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst2_vnum[_f16]",
+    "arguments": [
+      "svbool_t pg",
+      "float16_t *base",
+      "int64_t vnum",
+      "svfloat16x2_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "{Zdata0.H, Zdata1.H}"
+      },
+      "pg": {
+        "register": "Pg.H"
+      },
+      "vnum * svcnth()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST2H"
+      ],
+      [
+        "ST2H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst2_vnum[_f32]",
+    "arguments": [
+      "svbool_t pg",
+      "float32_t *base",
+      "int64_t vnum",
+      "svfloat32x2_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "{Zdata0.S, Zdata1.S}"
+      },
+      "pg": {
+        "register": "Pg.S"
+      },
+      "vnum * svcntw()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST2W"
+      ],
+      [
+        "ST2W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst2_vnum[_f64]",
+    "arguments": [
+      "svbool_t pg",
+      "float64_t *base",
+      "int64_t vnum",
+      "svfloat64x2_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "{Zdata0.D, Zdata1.D}"
+      },
+      "pg": {
+        "register": "Pg.D"
+      },
+      "vnum * svcntd()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST2D"
+      ],
+      [
+        "ST2D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst2_vnum[_s16]",
+    "arguments": [
+      "svbool_t pg",
+      "int16_t *base",
+      "int64_t vnum",
+      "svint16x2_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "{Zdata0.H, Zdata1.H}"
+      },
+      "pg": {
+        "register": "Pg.H"
+      },
+      "vnum * svcnth()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST2H"
+      ],
+      [
+        "ST2H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst2_vnum[_s32]",
+    "arguments": [
+      "svbool_t pg",
+      "int32_t *base",
+      "int64_t vnum",
+      "svint32x2_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "{Zdata0.S, Zdata1.S}"
+      },
+      "pg": {
+        "register": "Pg.S"
+      },
+      "vnum * svcntw()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST2W"
+      ],
+      [
+        "ST2W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst2_vnum[_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "int64_t *base",
+      "int64_t vnum",
+      "svint64x2_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "{Zdata0.D, Zdata1.D}"
+      },
+      "pg": {
+        "register": "Pg.D"
+      },
+      "vnum * svcntd()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST2D"
+      ],
+      [
+        "ST2D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst2_vnum[_s8]",
+    "arguments": [
+      "svbool_t pg",
+      "int8_t *base",
+      "int64_t vnum",
+      "svint8x2_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "{Zdata0.B, Zdata1.B}"
+      },
+      "pg": {
+        "register": "Pg.B"
+      },
+      "vnum * svcntb()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST2B"
+      ],
+      [
+        "ST2B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst2_vnum[_u16]",
+    "arguments": [
+      "svbool_t pg",
+      "uint16_t *base",
+      "int64_t vnum",
+      "svuint16x2_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "{Zdata0.H, Zdata1.H}"
+      },
+      "pg": {
+        "register": "Pg.H"
+      },
+      "vnum * svcnth()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST2H"
+      ],
+      [
+        "ST2H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst2_vnum[_u32]",
+    "arguments": [
+      "svbool_t pg",
+      "uint32_t *base",
+      "int64_t vnum",
+      "svuint32x2_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "{Zdata0.S, Zdata1.S}"
+      },
+      "pg": {
+        "register": "Pg.S"
+      },
+      "vnum * svcntw()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST2W"
+      ],
+      [
+        "ST2W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst2_vnum[_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "uint64_t *base",
+      "int64_t vnum",
+      "svuint64x2_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "{Zdata0.D, Zdata1.D}"
+      },
+      "pg": {
+        "register": "Pg.D"
+      },
+      "vnum * svcntd()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST2D"
+      ],
+      [
+        "ST2D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst2_vnum[_u8]",
+    "arguments": [
+      "svbool_t pg",
+      "uint8_t *base",
+      "int64_t vnum",
+      "svuint8x2_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "{Zdata0.B, Zdata1.B}"
+      },
+      "pg": {
+        "register": "Pg.B"
+      },
+      "vnum * svcntb()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST2B"
+      ],
+      [
+        "ST2B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst3[_f16]",
+    "arguments": [
+      "svbool_t pg",
+      "float16_t *base",
+      "svfloat16x3_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "{Zdata0.H - Zdata2.H}"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST3H"
+      ],
+      [
+        "ST3H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst3[_f32]",
+    "arguments": [
+      "svbool_t pg",
+      "float32_t *base",
+      "svfloat32x3_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "{Zdata0.S - Zdata2.S}"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST3W"
+      ],
+      [
+        "ST3W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst3[_f64]",
+    "arguments": [
+      "svbool_t pg",
+      "float64_t *base",
+      "svfloat64x3_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "{Zdata0.D - Zdata2.D}"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST3D"
+      ],
+      [
+        "ST3D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst3[_s16]",
+    "arguments": [
+      "svbool_t pg",
+      "int16_t *base",
+      "svint16x3_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "{Zdata0.H - Zdata2.H}"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST3H"
+      ],
+      [
+        "ST3H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst3[_s32]",
+    "arguments": [
+      "svbool_t pg",
+      "int32_t *base",
+      "svint32x3_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "{Zdata0.S - Zdata2.S}"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST3W"
+      ],
+      [
+        "ST3W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst3[_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "int64_t *base",
+      "svint64x3_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "{Zdata0.D - Zdata2.D}"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST3D"
+      ],
+      [
+        "ST3D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst3[_s8]",
+    "arguments": [
+      "svbool_t pg",
+      "int8_t *base",
+      "svint8x3_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "{Zdata0.B - Zdata2.B}"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST3B"
+      ],
+      [
+        "ST3B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst3[_u16]",
+    "arguments": [
+      "svbool_t pg",
+      "uint16_t *base",
+      "svuint16x3_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "{Zdata0.H - Zdata2.H}"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST3H"
+      ],
+      [
+        "ST3H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst3[_u32]",
+    "arguments": [
+      "svbool_t pg",
+      "uint32_t *base",
+      "svuint32x3_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "{Zdata0.S - Zdata2.S}"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST3W"
+      ],
+      [
+        "ST3W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst3[_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "uint64_t *base",
+      "svuint64x3_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "{Zdata0.D - Zdata2.D}"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST3D"
+      ],
+      [
+        "ST3D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst3[_u8]",
+    "arguments": [
+      "svbool_t pg",
+      "uint8_t *base",
+      "svuint8x3_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "{Zdata0.B - Zdata2.B}"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST3B"
+      ],
+      [
+        "ST3B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst3_vnum[_f16]",
+    "arguments": [
+      "svbool_t pg",
+      "float16_t *base",
+      "int64_t vnum",
+      "svfloat16x3_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "{Zdata0.H - Zdata2.H}"
+      },
+      "pg": {
+        "register": "Pg.H"
+      },
+      "vnum * svcnth()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST3H"
+      ],
+      [
+        "ST3H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst3_vnum[_f32]",
+    "arguments": [
+      "svbool_t pg",
+      "float32_t *base",
+      "int64_t vnum",
+      "svfloat32x3_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "{Zdata0.S - Zdata2.S}"
+      },
+      "pg": {
+        "register": "Pg.S"
+      },
+      "vnum * svcntw()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST3W"
+      ],
+      [
+        "ST3W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst3_vnum[_f64]",
+    "arguments": [
+      "svbool_t pg",
+      "float64_t *base",
+      "int64_t vnum",
+      "svfloat64x3_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "{Zdata0.D - Zdata2.D}"
+      },
+      "pg": {
+        "register": "Pg.D"
+      },
+      "vnum * svcntd()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST3D"
+      ],
+      [
+        "ST3D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst3_vnum[_s16]",
+    "arguments": [
+      "svbool_t pg",
+      "int16_t *base",
+      "int64_t vnum",
+      "svint16x3_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "{Zdata0.H - Zdata2.H}"
+      },
+      "pg": {
+        "register": "Pg.H"
+      },
+      "vnum * svcnth()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST3H"
+      ],
+      [
+        "ST3H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst3_vnum[_s32]",
+    "arguments": [
+      "svbool_t pg",
+      "int32_t *base",
+      "int64_t vnum",
+      "svint32x3_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "{Zdata0.S - Zdata2.S}"
+      },
+      "pg": {
+        "register": "Pg.S"
+      },
+      "vnum * svcntw()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST3W"
+      ],
+      [
+        "ST3W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst3_vnum[_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "int64_t *base",
+      "int64_t vnum",
+      "svint64x3_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "{Zdata0.D - Zdata2.D}"
+      },
+      "pg": {
+        "register": "Pg.D"
+      },
+      "vnum * svcntd()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST3D"
+      ],
+      [
+        "ST3D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst3_vnum[_s8]",
+    "arguments": [
+      "svbool_t pg",
+      "int8_t *base",
+      "int64_t vnum",
+      "svint8x3_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "{Zdata0.B - Zdata2.B}"
+      },
+      "pg": {
+        "register": "Pg.B"
+      },
+      "vnum * svcntb()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST3B"
+      ],
+      [
+        "ST3B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst3_vnum[_u16]",
+    "arguments": [
+      "svbool_t pg",
+      "uint16_t *base",
+      "int64_t vnum",
+      "svuint16x3_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "{Zdata0.H - Zdata2.H}"
+      },
+      "pg": {
+        "register": "Pg.H"
+      },
+      "vnum * svcnth()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST3H"
+      ],
+      [
+        "ST3H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst3_vnum[_u32]",
+    "arguments": [
+      "svbool_t pg",
+      "uint32_t *base",
+      "int64_t vnum",
+      "svuint32x3_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "{Zdata0.S - Zdata2.S}"
+      },
+      "pg": {
+        "register": "Pg.S"
+      },
+      "vnum * svcntw()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST3W"
+      ],
+      [
+        "ST3W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst3_vnum[_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "uint64_t *base",
+      "int64_t vnum",
+      "svuint64x3_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "{Zdata0.D - Zdata2.D}"
+      },
+      "pg": {
+        "register": "Pg.D"
+      },
+      "vnum * svcntd()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST3D"
+      ],
+      [
+        "ST3D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst3_vnum[_u8]",
+    "arguments": [
+      "svbool_t pg",
+      "uint8_t *base",
+      "int64_t vnum",
+      "svuint8x3_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "{Zdata0.B - Zdata2.B}"
+      },
+      "pg": {
+        "register": "Pg.B"
+      },
+      "vnum * svcntb()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST3B"
+      ],
+      [
+        "ST3B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst4[_f16]",
+    "arguments": [
+      "svbool_t pg",
+      "float16_t *base",
+      "svfloat16x4_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "{Zdata0.H - Zdata3.H}"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST4H"
+      ],
+      [
+        "ST4H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst4[_f32]",
+    "arguments": [
+      "svbool_t pg",
+      "float32_t *base",
+      "svfloat32x4_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "{Zdata0.S - Zdata3.S}"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST4W"
+      ],
+      [
+        "ST4W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst4[_f64]",
+    "arguments": [
+      "svbool_t pg",
+      "float64_t *base",
+      "svfloat64x4_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "{Zdata0.D - Zdata3.D}"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST4D"
+      ],
+      [
+        "ST4D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst4[_s16]",
+    "arguments": [
+      "svbool_t pg",
+      "int16_t *base",
+      "svint16x4_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "{Zdata0.H - Zdata3.H}"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST4H"
+      ],
+      [
+        "ST4H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst4[_s32]",
+    "arguments": [
+      "svbool_t pg",
+      "int32_t *base",
+      "svint32x4_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "{Zdata0.S - Zdata3.S}"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST4W"
+      ],
+      [
+        "ST4W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst4[_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "int64_t *base",
+      "svint64x4_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "{Zdata0.D - Zdata3.D}"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST4D"
+      ],
+      [
+        "ST4D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst4[_s8]",
+    "arguments": [
+      "svbool_t pg",
+      "int8_t *base",
+      "svint8x4_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "{Zdata0.B - Zdata3.B}"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST4B"
+      ],
+      [
+        "ST4B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst4[_u16]",
+    "arguments": [
+      "svbool_t pg",
+      "uint16_t *base",
+      "svuint16x4_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "{Zdata0.H - Zdata3.H}"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST4H"
+      ],
+      [
+        "ST4H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst4[_u32]",
+    "arguments": [
+      "svbool_t pg",
+      "uint32_t *base",
+      "svuint32x4_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "{Zdata0.S - Zdata3.S}"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST4W"
+      ],
+      [
+        "ST4W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst4[_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "uint64_t *base",
+      "svuint64x4_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "{Zdata0.D - Zdata3.D}"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST4D"
+      ],
+      [
+        "ST4D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst4[_u8]",
+    "arguments": [
+      "svbool_t pg",
+      "uint8_t *base",
+      "svuint8x4_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "{Zdata0.B - Zdata3.B}"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST4B"
+      ],
+      [
+        "ST4B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst4_vnum[_f16]",
+    "arguments": [
+      "svbool_t pg",
+      "float16_t *base",
+      "int64_t vnum",
+      "svfloat16x4_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "{Zdata0.H - Zdata3.H}"
+      },
+      "pg": {
+        "register": "Pg.H"
+      },
+      "vnum * svcnth()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST4H"
+      ],
+      [
+        "ST4H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst4_vnum[_f32]",
+    "arguments": [
+      "svbool_t pg",
+      "float32_t *base",
+      "int64_t vnum",
+      "svfloat32x4_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "{Zdata0.S - Zdata3.S}"
+      },
+      "pg": {
+        "register": "Pg.S"
+      },
+      "vnum * svcntw()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST4W"
+      ],
+      [
+        "ST4W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst4_vnum[_f64]",
+    "arguments": [
+      "svbool_t pg",
+      "float64_t *base",
+      "int64_t vnum",
+      "svfloat64x4_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "{Zdata0.D - Zdata3.D}"
+      },
+      "pg": {
+        "register": "Pg.D"
+      },
+      "vnum * svcntd()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST4D"
+      ],
+      [
+        "ST4D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst4_vnum[_s16]",
+    "arguments": [
+      "svbool_t pg",
+      "int16_t *base",
+      "int64_t vnum",
+      "svint16x4_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "{Zdata0.H - Zdata3.H}"
+      },
+      "pg": {
+        "register": "Pg.H"
+      },
+      "vnum * svcnth()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST4H"
+      ],
+      [
+        "ST4H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst4_vnum[_s32]",
+    "arguments": [
+      "svbool_t pg",
+      "int32_t *base",
+      "int64_t vnum",
+      "svint32x4_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "{Zdata0.S - Zdata3.S}"
+      },
+      "pg": {
+        "register": "Pg.S"
+      },
+      "vnum * svcntw()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST4W"
+      ],
+      [
+        "ST4W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst4_vnum[_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "int64_t *base",
+      "int64_t vnum",
+      "svint64x4_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "{Zdata0.D - Zdata3.D}"
+      },
+      "pg": {
+        "register": "Pg.D"
+      },
+      "vnum * svcntd()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST4D"
+      ],
+      [
+        "ST4D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst4_vnum[_s8]",
+    "arguments": [
+      "svbool_t pg",
+      "int8_t *base",
+      "int64_t vnum",
+      "svint8x4_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "{Zdata0.B - Zdata3.B}"
+      },
+      "pg": {
+        "register": "Pg.B"
+      },
+      "vnum * svcntb()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST4B"
+      ],
+      [
+        "ST4B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst4_vnum[_u16]",
+    "arguments": [
+      "svbool_t pg",
+      "uint16_t *base",
+      "int64_t vnum",
+      "svuint16x4_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "{Zdata0.H - Zdata3.H}"
+      },
+      "pg": {
+        "register": "Pg.H"
+      },
+      "vnum * svcnth()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST4H"
+      ],
+      [
+        "ST4H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst4_vnum[_u32]",
+    "arguments": [
+      "svbool_t pg",
+      "uint32_t *base",
+      "int64_t vnum",
+      "svuint32x4_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "{Zdata0.S - Zdata3.S}"
+      },
+      "pg": {
+        "register": "Pg.S"
+      },
+      "vnum * svcntw()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST4W"
+      ],
+      [
+        "ST4W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst4_vnum[_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "uint64_t *base",
+      "int64_t vnum",
+      "svuint64x4_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "{Zdata0.D - Zdata3.D}"
+      },
+      "pg": {
+        "register": "Pg.D"
+      },
+      "vnum * svcntd()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST4D"
+      ],
+      [
+        "ST4D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svst4_vnum[_u8]",
+    "arguments": [
+      "svbool_t pg",
+      "uint8_t *base",
+      "int64_t vnum",
+      "svuint8x4_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "{Zdata0.B - Zdata3.B}"
+      },
+      "pg": {
+        "register": "Pg.B"
+      },
+      "vnum * svcntb()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST4B"
+      ],
+      [
+        "ST4B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svstnt1[_f16]",
+    "arguments": [
+      "svbool_t pg",
+      "float16_t *base",
+      "svfloat16_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1H"
+      ],
+      [
+        "STNT1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svstnt1[_f32]",
+    "arguments": [
+      "svbool_t pg",
+      "float32_t *base",
+      "svfloat32_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1W"
+      ],
+      [
+        "STNT1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svstnt1[_f64]",
+    "arguments": [
+      "svbool_t pg",
+      "float64_t *base",
+      "svfloat64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1D"
+      ],
+      [
+        "STNT1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svstnt1[_s16]",
+    "arguments": [
+      "svbool_t pg",
+      "int16_t *base",
+      "svint16_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1H"
+      ],
+      [
+        "STNT1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svstnt1[_s32]",
+    "arguments": [
+      "svbool_t pg",
+      "int32_t *base",
+      "svint32_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1W"
+      ],
+      [
+        "STNT1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svstnt1[_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "int64_t *base",
+      "svint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1D"
+      ],
+      [
+        "STNT1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svstnt1[_s8]",
+    "arguments": [
+      "svbool_t pg",
+      "int8_t *base",
+      "svint8_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1B"
+      ],
+      [
+        "STNT1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svstnt1[_u16]",
+    "arguments": [
+      "svbool_t pg",
+      "uint16_t *base",
+      "svuint16_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1H"
+      ],
+      [
+        "STNT1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svstnt1[_u32]",
+    "arguments": [
+      "svbool_t pg",
+      "uint32_t *base",
+      "svuint32_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1W"
+      ],
+      [
+        "STNT1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svstnt1[_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "uint64_t *base",
+      "svuint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1D"
+      ],
+      [
+        "STNT1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svstnt1[_u8]",
+    "arguments": [
+      "svbool_t pg",
+      "uint8_t *base",
+      "svuint8_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1B"
+      ],
+      [
+        "STNT1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svstnt1_scatter[_u32base]_index[_f32]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "int64_t index",
+      "svfloat32_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "data": {
+        "register": "Zdata.S"
+      },
+      "index * 4": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svstnt1_scatter[_u32base]_index[_s32]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "int64_t index",
+      "svint32_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "data": {
+        "register": "Zdata.S"
+      },
+      "index * 4": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svstnt1_scatter[_u32base]_index[_u32]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "int64_t index",
+      "svuint32_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "data": {
+        "register": "Zdata.S"
+      },
+      "index * 4": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svstnt1_scatter[_u32base]_offset[_f32]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "int64_t offset",
+      "svfloat32_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "data": {
+        "register": "Zdata.S"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svstnt1_scatter[_u32base]_offset[_s32]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "int64_t offset",
+      "svint32_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "data": {
+        "register": "Zdata.S"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svstnt1_scatter[_u32base]_offset[_u32]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "int64_t offset",
+      "svuint32_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "data": {
+        "register": "Zdata.S"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svstnt1_scatter[_u32base_f32]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "svfloat32_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "data": {
+        "register": "Zdata.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svstnt1_scatter[_u32base_s32]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "svint32_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "data": {
+        "register": "Zdata.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svstnt1_scatter[_u32base_u32]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "svuint32_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "data": {
+        "register": "Zdata.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svstnt1_scatter[_u64base]_index[_f64]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t index",
+      "svfloat64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "index * 8": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svstnt1_scatter[_u64base]_index[_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t index",
+      "svint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "index * 8": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svstnt1_scatter[_u64base]_index[_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t index",
+      "svuint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "index * 8": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svstnt1_scatter[_u64base]_offset[_f64]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t offset",
+      "svfloat64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svstnt1_scatter[_u64base]_offset[_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t offset",
+      "svint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svstnt1_scatter[_u64base]_offset[_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t offset",
+      "svuint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svstnt1_scatter[_u64base_f64]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "svfloat64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svstnt1_scatter[_u64base_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "svint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svstnt1_scatter[_u64base_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "svuint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svstnt1_scatter_[s64]index[_f64]",
+    "arguments": [
+      "svbool_t pg",
+      "float64_t *base",
+      "svint64_t indices",
+      "svfloat64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "indices * 8": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svstnt1_scatter_[s64]index[_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "int64_t *base",
+      "svint64_t indices",
+      "svint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "indices * 8": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svstnt1_scatter_[s64]index[_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "uint64_t *base",
+      "svint64_t indices",
+      "svuint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "indices * 8": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svstnt1_scatter_[s64]offset[_f64]",
+    "arguments": [
+      "svbool_t pg",
+      "float64_t *base",
+      "svint64_t offsets",
+      "svfloat64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svstnt1_scatter_[s64]offset[_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "int64_t *base",
+      "svint64_t offsets",
+      "svint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svstnt1_scatter_[s64]offset[_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "uint64_t *base",
+      "svint64_t offsets",
+      "svuint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svstnt1_scatter_[u32]offset[_f32]",
+    "arguments": [
+      "svbool_t pg",
+      "float32_t *base",
+      "svuint32_t offsets",
+      "svfloat32_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.S"
+      },
+      "offsets": {
+        "register": "Zoffsets.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svstnt1_scatter_[u32]offset[_s32]",
+    "arguments": [
+      "svbool_t pg",
+      "int32_t *base",
+      "svuint32_t offsets",
+      "svint32_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.S"
+      },
+      "offsets": {
+        "register": "Zoffsets.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svstnt1_scatter_[u32]offset[_u32]",
+    "arguments": [
+      "svbool_t pg",
+      "uint32_t *base",
+      "svuint32_t offsets",
+      "svuint32_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.S"
+      },
+      "offsets": {
+        "register": "Zoffsets.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svstnt1_scatter_[u64]index[_f64]",
+    "arguments": [
+      "svbool_t pg",
+      "float64_t *base",
+      "svuint64_t indices",
+      "svfloat64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "indices * 8": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svstnt1_scatter_[u64]index[_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "int64_t *base",
+      "svuint64_t indices",
+      "svint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "indices * 8": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svstnt1_scatter_[u64]index[_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "uint64_t *base",
+      "svuint64_t indices",
+      "svuint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "indices * 8": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svstnt1_scatter_[u64]offset[_f64]",
+    "arguments": [
+      "svbool_t pg",
+      "float64_t *base",
+      "svuint64_t offsets",
+      "svfloat64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svstnt1_scatter_[u64]offset[_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "int64_t *base",
+      "svuint64_t offsets",
+      "svint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svstnt1_scatter_[u64]offset[_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "uint64_t *base",
+      "svuint64_t offsets",
+      "svuint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svstnt1_vnum[_f16]",
+    "arguments": [
+      "svbool_t pg",
+      "float16_t *base",
+      "int64_t vnum",
+      "svfloat16_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      },
+      "vnum * svcnth()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1H"
+      ],
+      [
+        "STNT1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svstnt1_vnum[_f32]",
+    "arguments": [
+      "svbool_t pg",
+      "float32_t *base",
+      "int64_t vnum",
+      "svfloat32_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      },
+      "vnum * svcntw()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1W"
+      ],
+      [
+        "STNT1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svstnt1_vnum[_f64]",
+    "arguments": [
+      "svbool_t pg",
+      "float64_t *base",
+      "int64_t vnum",
+      "svfloat64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      },
+      "vnum * svcntd()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1D"
+      ],
+      [
+        "STNT1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svstnt1_vnum[_s16]",
+    "arguments": [
+      "svbool_t pg",
+      "int16_t *base",
+      "int64_t vnum",
+      "svint16_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      },
+      "vnum * svcnth()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1H"
+      ],
+      [
+        "STNT1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svstnt1_vnum[_s32]",
+    "arguments": [
+      "svbool_t pg",
+      "int32_t *base",
+      "int64_t vnum",
+      "svint32_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      },
+      "vnum * svcntw()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1W"
+      ],
+      [
+        "STNT1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svstnt1_vnum[_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "int64_t *base",
+      "int64_t vnum",
+      "svint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      },
+      "vnum * svcntd()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1D"
+      ],
+      [
+        "STNT1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svstnt1_vnum[_s8]",
+    "arguments": [
+      "svbool_t pg",
+      "int8_t *base",
+      "int64_t vnum",
+      "svint8_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      },
+      "vnum * svcntb()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1B"
+      ],
+      [
+        "STNT1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svstnt1_vnum[_u16]",
+    "arguments": [
+      "svbool_t pg",
+      "uint16_t *base",
+      "int64_t vnum",
+      "svuint16_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      },
+      "vnum * svcnth()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1H"
+      ],
+      [
+        "STNT1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svstnt1_vnum[_u32]",
+    "arguments": [
+      "svbool_t pg",
+      "uint32_t *base",
+      "int64_t vnum",
+      "svuint32_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      },
+      "vnum * svcntw()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1W"
+      ],
+      [
+        "STNT1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svstnt1_vnum[_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "uint64_t *base",
+      "int64_t vnum",
+      "svuint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      },
+      "vnum * svcntd()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1D"
+      ],
+      [
+        "STNT1D"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svstnt1_vnum[_u8]",
+    "arguments": [
+      "svbool_t pg",
+      "uint8_t *base",
+      "int64_t vnum",
+      "svuint8_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      },
+      "vnum * svcntb()": {
+        "register": "Xindex"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1B"
+      ],
+      [
+        "STNT1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svstnt1b_scatter[_u32base]_offset[_s32]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "int64_t offset",
+      "svint32_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "data": {
+        "register": "Zdata.S"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svstnt1b_scatter[_u32base]_offset[_u32]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "int64_t offset",
+      "svuint32_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "data": {
+        "register": "Zdata.S"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svstnt1b_scatter[_u32base_s32]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "svint32_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "data": {
+        "register": "Zdata.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svstnt1b_scatter[_u32base_u32]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "svuint32_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "data": {
+        "register": "Zdata.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svstnt1b_scatter[_u64base]_offset[_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t offset",
+      "svint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svstnt1b_scatter[_u64base]_offset[_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t offset",
+      "svuint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svstnt1b_scatter[_u64base_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "svint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svstnt1b_scatter[_u64base_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "svuint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svstnt1b_scatter_[s64]offset[_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "int8_t *base",
+      "svint64_t offsets",
+      "svint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svstnt1b_scatter_[s64]offset[_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "uint8_t *base",
+      "svint64_t offsets",
+      "svuint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svstnt1b_scatter_[u32]offset[_s32]",
+    "arguments": [
+      "svbool_t pg",
+      "int8_t *base",
+      "svuint32_t offsets",
+      "svint32_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.S"
+      },
+      "offsets": {
+        "register": "Zoffsets.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svstnt1b_scatter_[u32]offset[_u32]",
+    "arguments": [
+      "svbool_t pg",
+      "uint8_t *base",
+      "svuint32_t offsets",
+      "svuint32_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.S"
+      },
+      "offsets": {
+        "register": "Zoffsets.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svstnt1b_scatter_[u64]offset[_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "int8_t *base",
+      "svuint64_t offsets",
+      "svint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svstnt1b_scatter_[u64]offset[_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "uint8_t *base",
+      "svuint64_t offsets",
+      "svuint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1B"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svstnt1h_scatter[_u32base]_index[_s32]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "int64_t index",
+      "svint32_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "data": {
+        "register": "Zdata.S"
+      },
+      "index * 2": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svstnt1h_scatter[_u32base]_index[_u32]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "int64_t index",
+      "svuint32_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "data": {
+        "register": "Zdata.S"
+      },
+      "index * 2": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svstnt1h_scatter[_u32base]_offset[_s32]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "int64_t offset",
+      "svint32_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "data": {
+        "register": "Zdata.S"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svstnt1h_scatter[_u32base]_offset[_u32]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "int64_t offset",
+      "svuint32_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "data": {
+        "register": "Zdata.S"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svstnt1h_scatter[_u32base_s32]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "svint32_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "data": {
+        "register": "Zdata.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svstnt1h_scatter[_u32base_u32]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t bases",
+      "svuint32_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.S"
+      },
+      "data": {
+        "register": "Zdata.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svstnt1h_scatter[_u64base]_index[_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t index",
+      "svint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "index * 2": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svstnt1h_scatter[_u64base]_index[_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t index",
+      "svuint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "index * 2": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svstnt1h_scatter[_u64base]_offset[_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t offset",
+      "svint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svstnt1h_scatter[_u64base]_offset[_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t offset",
+      "svuint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svstnt1h_scatter[_u64base_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "svint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svstnt1h_scatter[_u64base_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "svuint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svstnt1h_scatter_[s64]index[_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "int16_t *base",
+      "svint64_t indices",
+      "svint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "indices * 2": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svstnt1h_scatter_[s64]index[_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "uint16_t *base",
+      "svint64_t indices",
+      "svuint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "indices * 2": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svstnt1h_scatter_[s64]offset[_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "int16_t *base",
+      "svint64_t offsets",
+      "svint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svstnt1h_scatter_[s64]offset[_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "uint16_t *base",
+      "svint64_t offsets",
+      "svuint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svstnt1h_scatter_[u32]offset[_s32]",
+    "arguments": [
+      "svbool_t pg",
+      "int16_t *base",
+      "svuint32_t offsets",
+      "svint32_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.S"
+      },
+      "offsets": {
+        "register": "Zoffsets.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svstnt1h_scatter_[u32]offset[_u32]",
+    "arguments": [
+      "svbool_t pg",
+      "uint16_t *base",
+      "svuint32_t offsets",
+      "svuint32_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.S"
+      },
+      "offsets": {
+        "register": "Zoffsets.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svstnt1h_scatter_[u64]index[_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "int16_t *base",
+      "svuint64_t indices",
+      "svint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "indices * 2": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svstnt1h_scatter_[u64]index[_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "uint16_t *base",
+      "svuint64_t indices",
+      "svuint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "indices * 2": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svstnt1h_scatter_[u64]offset[_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "int16_t *base",
+      "svuint64_t offsets",
+      "svint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svstnt1h_scatter_[u64]offset[_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "uint16_t *base",
+      "svuint64_t offsets",
+      "svuint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1H"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svstnt1w_scatter[_u64base]_index[_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t index",
+      "svint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "index * 4": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svstnt1w_scatter[_u64base]_index[_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t index",
+      "svuint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "index * 4": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svstnt1w_scatter[_u64base]_offset[_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t offset",
+      "svint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svstnt1w_scatter[_u64base]_offset[_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "int64_t offset",
+      "svuint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "offset": {
+        "register": "Xoffset"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svstnt1w_scatter[_u64base_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "svint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svstnt1w_scatter[_u64base_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t bases",
+      "svuint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "bases": {
+        "register": "Zbases.D"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svstnt1w_scatter_[s64]index[_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "int32_t *base",
+      "svint64_t indices",
+      "svint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "indices * 4": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svstnt1w_scatter_[s64]index[_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "uint32_t *base",
+      "svint64_t indices",
+      "svuint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "indices * 4": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svstnt1w_scatter_[s64]offset[_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "int32_t *base",
+      "svint64_t offsets",
+      "svint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svstnt1w_scatter_[s64]offset[_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "uint32_t *base",
+      "svint64_t offsets",
+      "svuint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svstnt1w_scatter_[u64]index[_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "int32_t *base",
+      "svuint64_t indices",
+      "svint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "indices * 4": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svstnt1w_scatter_[u64]index[_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "uint32_t *base",
+      "svuint64_t indices",
+      "svuint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "indices * 4": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svstnt1w_scatter_[u64]offset[_s64]",
+    "arguments": [
+      "svbool_t pg",
+      "int32_t *base",
+      "svuint64_t offsets",
+      "svint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svstnt1w_scatter_[u64]offset[_u64]",
+    "arguments": [
+      "svbool_t pg",
+      "uint32_t *base",
+      "svuint64_t offsets",
+      "svuint64_t data"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "base": {
+        "register": "Xbase"
+      },
+      "data": {
+        "register": "Zdata.D"
+      },
+      "offsets": {
+        "register": "Zoffsets.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STNT1W"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsub[_f16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FSUB"
+      ],
+      [
+        "MOVPRFX",
+        "FSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsub[_f16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H|Ztied2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FSUB"
+      ],
+      [
+        "FSUBR"
+      ],
+      [
+        "FSUB"
+      ],
+      [
+        "MOVPRFX",
+        "FSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsub[_f16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FSUB"
+      ],
+      [
+        "MOVPRFX",
+        "FSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsub[_f32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FSUB"
+      ],
+      [
+        "MOVPRFX",
+        "FSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsub[_f32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FSUB"
+      ],
+      [
+        "FSUBR"
+      ],
+      [
+        "FSUB"
+      ],
+      [
+        "MOVPRFX",
+        "FSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsub[_f32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FSUB"
+      ],
+      [
+        "MOVPRFX",
+        "FSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsub[_f64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FSUB"
+      ],
+      [
+        "MOVPRFX",
+        "FSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsub[_f64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FSUB"
+      ],
+      [
+        "FSUBR"
+      ],
+      [
+        "FSUB"
+      ],
+      [
+        "MOVPRFX",
+        "FSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsub[_f64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FSUB"
+      ],
+      [
+        "MOVPRFX",
+        "FSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsub[_n_f16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "float16_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FSUB"
+      ],
+      [
+        "FADD"
+      ],
+      [
+        "FSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsub[_n_f16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "float16_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]|Ztied2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FSUB"
+      ],
+      [
+        "FADD"
+      ],
+      [
+        "FSUB"
+      ],
+      [
+        "FSUBR"
+      ],
+      [
+        "FSUB"
+      ],
+      [
+        "MOVPRFX",
+        "FSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsub[_n_f16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "float16_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FSUB"
+      ],
+      [
+        "MOVPRFX",
+        "FADD"
+      ],
+      [
+        "MOVPRFX",
+        "FSUB"
+      ],
+      [
+        "MOVPRFX",
+        "FSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsub[_n_f32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "float32_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FSUB"
+      ],
+      [
+        "FADD"
+      ],
+      [
+        "FSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsub[_n_f32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "float32_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]|Ztied2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FSUB"
+      ],
+      [
+        "FADD"
+      ],
+      [
+        "FSUB"
+      ],
+      [
+        "FSUBR"
+      ],
+      [
+        "FSUB"
+      ],
+      [
+        "MOVPRFX",
+        "FSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsub[_n_f32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "float32_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FSUB"
+      ],
+      [
+        "MOVPRFX",
+        "FADD"
+      ],
+      [
+        "MOVPRFX",
+        "FSUB"
+      ],
+      [
+        "MOVPRFX",
+        "FSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsub[_n_f64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "float64_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FSUB"
+      ],
+      [
+        "FADD"
+      ],
+      [
+        "FSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsub[_n_f64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "float64_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]|Ztied2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FSUB"
+      ],
+      [
+        "FADD"
+      ],
+      [
+        "FSUB"
+      ],
+      [
+        "FSUBR"
+      ],
+      [
+        "FSUB"
+      ],
+      [
+        "MOVPRFX",
+        "FSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsub[_n_f64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "float64_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FSUB"
+      ],
+      [
+        "MOVPRFX",
+        "FADD"
+      ],
+      [
+        "MOVPRFX",
+        "FSUB"
+      ],
+      [
+        "MOVPRFX",
+        "FSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsub[_n_s16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "int16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUB"
+      ],
+      [
+        "MOVPRFX",
+        "SUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsub[_n_s16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "int16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]|Ztied2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUB"
+      ],
+      [
+        "ADD"
+      ],
+      [
+        "SUB"
+      ],
+      [
+        "SUBR"
+      ],
+      [
+        "SUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsub[_n_s16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "int16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SUB"
+      ],
+      [
+        "MOVPRFX",
+        "SUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsub[_n_s32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUB"
+      ],
+      [
+        "MOVPRFX",
+        "SUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsub[_n_s32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]|Ztied2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUB"
+      ],
+      [
+        "ADD"
+      ],
+      [
+        "SUB"
+      ],
+      [
+        "SUBR"
+      ],
+      [
+        "SUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsub[_n_s32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SUB"
+      ],
+      [
+        "MOVPRFX",
+        "SUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsub[_n_s64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUB"
+      ],
+      [
+        "MOVPRFX",
+        "SUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsub[_n_s64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]|Ztied2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUB"
+      ],
+      [
+        "ADD"
+      ],
+      [
+        "SUB"
+      ],
+      [
+        "SUBR"
+      ],
+      [
+        "SUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsub[_n_s64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SUB"
+      ],
+      [
+        "MOVPRFX",
+        "SUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsub[_n_s8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "int8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUB"
+      ],
+      [
+        "MOVPRFX",
+        "SUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsub[_n_s8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "int8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]|Ztied2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUB"
+      ],
+      [
+        "ADD"
+      ],
+      [
+        "SUB"
+      ],
+      [
+        "SUBR"
+      ],
+      [
+        "SUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsub[_n_s8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "int8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SUB"
+      ],
+      [
+        "MOVPRFX",
+        "SUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsub[_n_u16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "uint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUB"
+      ],
+      [
+        "MOVPRFX",
+        "SUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsub[_n_u16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "uint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]|Ztied2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUB"
+      ],
+      [
+        "ADD"
+      ],
+      [
+        "SUB"
+      ],
+      [
+        "SUBR"
+      ],
+      [
+        "SUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsub[_n_u16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "uint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SUB"
+      ],
+      [
+        "MOVPRFX",
+        "SUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsub[_n_u32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUB"
+      ],
+      [
+        "MOVPRFX",
+        "SUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsub[_n_u32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]|Ztied2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUB"
+      ],
+      [
+        "ADD"
+      ],
+      [
+        "SUB"
+      ],
+      [
+        "SUBR"
+      ],
+      [
+        "SUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsub[_n_u32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SUB"
+      ],
+      [
+        "MOVPRFX",
+        "SUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsub[_n_u64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUB"
+      ],
+      [
+        "MOVPRFX",
+        "SUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsub[_n_u64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]|Ztied2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUB"
+      ],
+      [
+        "ADD"
+      ],
+      [
+        "SUB"
+      ],
+      [
+        "SUBR"
+      ],
+      [
+        "SUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsub[_n_u64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SUB"
+      ],
+      [
+        "MOVPRFX",
+        "SUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsub[_n_u8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "uint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUB"
+      ],
+      [
+        "MOVPRFX",
+        "SUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsub[_n_u8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "uint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]|Ztied2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUB"
+      ],
+      [
+        "ADD"
+      ],
+      [
+        "SUB"
+      ],
+      [
+        "SUBR"
+      ],
+      [
+        "SUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsub[_n_u8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "uint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SUB"
+      ],
+      [
+        "MOVPRFX",
+        "SUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsub[_s16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUB"
+      ],
+      [
+        "MOVPRFX",
+        "SUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsub[_s16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H|Ztied2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUB"
+      ],
+      [
+        "SUBR"
+      ],
+      [
+        "SUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsub[_s16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SUB"
+      ],
+      [
+        "MOVPRFX",
+        "SUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsub[_s32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUB"
+      ],
+      [
+        "MOVPRFX",
+        "SUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsub[_s32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUB"
+      ],
+      [
+        "SUBR"
+      ],
+      [
+        "SUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsub[_s32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SUB"
+      ],
+      [
+        "MOVPRFX",
+        "SUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsub[_s64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUB"
+      ],
+      [
+        "MOVPRFX",
+        "SUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsub[_s64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUB"
+      ],
+      [
+        "SUBR"
+      ],
+      [
+        "SUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsub[_s64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SUB"
+      ],
+      [
+        "MOVPRFX",
+        "SUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsub[_s8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUB"
+      ],
+      [
+        "MOVPRFX",
+        "SUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsub[_s8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B|Ztied2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUB"
+      ],
+      [
+        "SUBR"
+      ],
+      [
+        "SUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsub[_s8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SUB"
+      ],
+      [
+        "MOVPRFX",
+        "SUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsub[_u16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUB"
+      ],
+      [
+        "MOVPRFX",
+        "SUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsub[_u16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H|Ztied2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUB"
+      ],
+      [
+        "SUBR"
+      ],
+      [
+        "SUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsub[_u16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SUB"
+      ],
+      [
+        "MOVPRFX",
+        "SUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsub[_u32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUB"
+      ],
+      [
+        "MOVPRFX",
+        "SUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsub[_u32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUB"
+      ],
+      [
+        "SUBR"
+      ],
+      [
+        "SUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsub[_u32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SUB"
+      ],
+      [
+        "MOVPRFX",
+        "SUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsub[_u64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUB"
+      ],
+      [
+        "MOVPRFX",
+        "SUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsub[_u64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUB"
+      ],
+      [
+        "SUBR"
+      ],
+      [
+        "SUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsub[_u64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SUB"
+      ],
+      [
+        "MOVPRFX",
+        "SUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsub[_u8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUB"
+      ],
+      [
+        "MOVPRFX",
+        "SUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsub[_u8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B|Ztied2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUB"
+      ],
+      [
+        "SUBR"
+      ],
+      [
+        "SUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsub[_u8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SUB"
+      ],
+      [
+        "MOVPRFX",
+        "SUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsubhnb[_n_s16]",
+    "arguments": [
+      "svint16_t op1",
+      "int16_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUBHNB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsubhnb[_n_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUBHNB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsubhnb[_n_s64]",
+    "arguments": [
+      "svint64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUBHNB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsubhnb[_n_u16]",
+    "arguments": [
+      "svuint16_t op1",
+      "uint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUBHNB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsubhnb[_n_u32]",
+    "arguments": [
+      "svuint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUBHNB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsubhnb[_n_u64]",
+    "arguments": [
+      "svuint64_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUBHNB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsubhnb[_s16]",
+    "arguments": [
+      "svint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUBHNB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsubhnb[_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUBHNB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsubhnb[_s64]",
+    "arguments": [
+      "svint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUBHNB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsubhnb[_u16]",
+    "arguments": [
+      "svuint16_t op1",
+      "svuint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUBHNB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsubhnb[_u32]",
+    "arguments": [
+      "svuint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUBHNB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsubhnb[_u64]",
+    "arguments": [
+      "svuint64_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUBHNB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsubhnt[_n_s16]",
+    "arguments": [
+      "svint8_t even",
+      "svint16_t op1",
+      "int16_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "even": {
+        "register": "Ztied.B"
+      },
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUBHNT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsubhnt[_n_s32]",
+    "arguments": [
+      "svint16_t even",
+      "svint32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "even": {
+        "register": "Ztied.H"
+      },
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUBHNT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsubhnt[_n_s64]",
+    "arguments": [
+      "svint32_t even",
+      "svint64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "even": {
+        "register": "Ztied.S"
+      },
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUBHNT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsubhnt[_n_u16]",
+    "arguments": [
+      "svuint8_t even",
+      "svuint16_t op1",
+      "uint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "even": {
+        "register": "Ztied.B"
+      },
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUBHNT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsubhnt[_n_u32]",
+    "arguments": [
+      "svuint16_t even",
+      "svuint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "even": {
+        "register": "Ztied.H"
+      },
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUBHNT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsubhnt[_n_u64]",
+    "arguments": [
+      "svuint32_t even",
+      "svuint64_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "even": {
+        "register": "Ztied.S"
+      },
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUBHNT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsubhnt[_s16]",
+    "arguments": [
+      "svint8_t even",
+      "svint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "even": {
+        "register": "Ztied.B"
+      },
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUBHNT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsubhnt[_s32]",
+    "arguments": [
+      "svint16_t even",
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "even": {
+        "register": "Ztied.H"
+      },
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUBHNT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsubhnt[_s64]",
+    "arguments": [
+      "svint32_t even",
+      "svint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "even": {
+        "register": "Ztied.S"
+      },
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUBHNT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsubhnt[_u16]",
+    "arguments": [
+      "svuint8_t even",
+      "svuint16_t op1",
+      "svuint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "even": {
+        "register": "Ztied.B"
+      },
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUBHNT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsubhnt[_u32]",
+    "arguments": [
+      "svuint16_t even",
+      "svuint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "even": {
+        "register": "Ztied.H"
+      },
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUBHNT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsubhnt[_u64]",
+    "arguments": [
+      "svuint32_t even",
+      "svuint64_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "even": {
+        "register": "Ztied.S"
+      },
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUBHNT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsublb[_n_s16]",
+    "arguments": [
+      "svint8_t op1",
+      "int8_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SSUBLB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsublb[_n_s32]",
+    "arguments": [
+      "svint16_t op1",
+      "int16_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SSUBLB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsublb[_n_s64]",
+    "arguments": [
+      "svint32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SSUBLB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsublb[_n_u16]",
+    "arguments": [
+      "svuint8_t op1",
+      "uint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "USUBLB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsublb[_n_u32]",
+    "arguments": [
+      "svuint16_t op1",
+      "uint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "USUBLB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsublb[_n_u64]",
+    "arguments": [
+      "svuint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "USUBLB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsublb[_s16]",
+    "arguments": [
+      "svint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SSUBLB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsublb[_s32]",
+    "arguments": [
+      "svint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SSUBLB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsublb[_s64]",
+    "arguments": [
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SSUBLB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsublb[_u16]",
+    "arguments": [
+      "svuint8_t op1",
+      "svuint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "USUBLB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsublb[_u32]",
+    "arguments": [
+      "svuint16_t op1",
+      "svuint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "USUBLB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsublb[_u64]",
+    "arguments": [
+      "svuint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "USUBLB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsublbt[_n_s16]",
+    "arguments": [
+      "svint8_t op1",
+      "int8_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SSUBLBT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsublbt[_n_s32]",
+    "arguments": [
+      "svint16_t op1",
+      "int16_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SSUBLBT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsublbt[_n_s64]",
+    "arguments": [
+      "svint32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SSUBLBT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsublbt[_s16]",
+    "arguments": [
+      "svint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SSUBLBT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsublbt[_s32]",
+    "arguments": [
+      "svint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SSUBLBT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsublbt[_s64]",
+    "arguments": [
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SSUBLBT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsublt[_n_s16]",
+    "arguments": [
+      "svint8_t op1",
+      "int8_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SSUBLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsublt[_n_s32]",
+    "arguments": [
+      "svint16_t op1",
+      "int16_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SSUBLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsublt[_n_s64]",
+    "arguments": [
+      "svint32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SSUBLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsublt[_n_u16]",
+    "arguments": [
+      "svuint8_t op1",
+      "uint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "USUBLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsublt[_n_u32]",
+    "arguments": [
+      "svuint16_t op1",
+      "uint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "USUBLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsublt[_n_u64]",
+    "arguments": [
+      "svuint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "USUBLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsublt[_s16]",
+    "arguments": [
+      "svint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SSUBLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsublt[_s32]",
+    "arguments": [
+      "svint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SSUBLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsublt[_s64]",
+    "arguments": [
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SSUBLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsublt[_u16]",
+    "arguments": [
+      "svuint8_t op1",
+      "svuint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "USUBLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsublt[_u32]",
+    "arguments": [
+      "svuint16_t op1",
+      "svuint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "USUBLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsublt[_u64]",
+    "arguments": [
+      "svuint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "USUBLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsubltb[_n_s16]",
+    "arguments": [
+      "svint8_t op1",
+      "int8_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SSUBLTB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsubltb[_n_s32]",
+    "arguments": [
+      "svint16_t op1",
+      "int16_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SSUBLTB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsubltb[_n_s64]",
+    "arguments": [
+      "svint32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SSUBLTB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsubltb[_s16]",
+    "arguments": [
+      "svint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SSUBLTB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsubltb[_s32]",
+    "arguments": [
+      "svint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SSUBLTB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsubltb[_s64]",
+    "arguments": [
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SSUBLTB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsubr[_f16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FSUBR"
+      ],
+      [
+        "MOVPRFX",
+        "FSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsubr[_f16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H|Ztied2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FSUBR"
+      ],
+      [
+        "FSUB"
+      ],
+      [
+        "FSUB"
+      ],
+      [
+        "MOVPRFX",
+        "FSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsubr[_f16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "svfloat16_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FSUBR"
+      ],
+      [
+        "MOVPRFX",
+        "FSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsubr[_f32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FSUBR"
+      ],
+      [
+        "MOVPRFX",
+        "FSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsubr[_f32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FSUBR"
+      ],
+      [
+        "FSUB"
+      ],
+      [
+        "FSUB"
+      ],
+      [
+        "MOVPRFX",
+        "FSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsubr[_f32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "svfloat32_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FSUBR"
+      ],
+      [
+        "MOVPRFX",
+        "FSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsubr[_f64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FSUBR"
+      ],
+      [
+        "MOVPRFX",
+        "FSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsubr[_f64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FSUBR"
+      ],
+      [
+        "FSUB"
+      ],
+      [
+        "FSUB"
+      ],
+      [
+        "MOVPRFX",
+        "FSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsubr[_f64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "svfloat64_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FSUBR"
+      ],
+      [
+        "MOVPRFX",
+        "FSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsubr[_n_f16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "float16_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FSUBR"
+      ],
+      [
+        "FSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsubr[_n_f16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "float16_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]|Ztied2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FSUBR"
+      ],
+      [
+        "FSUBR"
+      ],
+      [
+        "FSUB"
+      ],
+      [
+        "FSUB"
+      ],
+      [
+        "MOVPRFX",
+        "FSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsubr[_n_f16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat16_t op1",
+      "float16_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FSUBR"
+      ],
+      [
+        "MOVPRFX",
+        "FSUBR"
+      ],
+      [
+        "MOVPRFX",
+        "FSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsubr[_n_f32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "float32_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FSUBR"
+      ],
+      [
+        "FSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsubr[_n_f32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "float32_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]|Ztied2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FSUBR"
+      ],
+      [
+        "FSUBR"
+      ],
+      [
+        "FSUB"
+      ],
+      [
+        "FSUB"
+      ],
+      [
+        "MOVPRFX",
+        "FSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsubr[_n_f32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat32_t op1",
+      "float32_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FSUBR"
+      ],
+      [
+        "MOVPRFX",
+        "FSUBR"
+      ],
+      [
+        "MOVPRFX",
+        "FSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsubr[_n_f64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "float64_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FSUBR"
+      ],
+      [
+        "FSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsubr[_n_f64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "float64_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]|Ztied2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FSUBR"
+      ],
+      [
+        "FSUBR"
+      ],
+      [
+        "FSUB"
+      ],
+      [
+        "FSUB"
+      ],
+      [
+        "MOVPRFX",
+        "FSUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsubr[_n_f64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svfloat64_t op1",
+      "float64_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "FSUBR"
+      ],
+      [
+        "MOVPRFX",
+        "FSUBR"
+      ],
+      [
+        "MOVPRFX",
+        "FSUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsubr[_n_s16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "int16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUBR"
+      ],
+      [
+        "MOVPRFX",
+        "SUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsubr[_n_s16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "int16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]|Ztied2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUBR"
+      ],
+      [
+        "SUBR"
+      ],
+      [
+        "SUB"
+      ],
+      [
+        "SUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsubr[_n_s16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "int16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SUBR"
+      ],
+      [
+        "MOVPRFX",
+        "SUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsubr[_n_s32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUBR"
+      ],
+      [
+        "MOVPRFX",
+        "SUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsubr[_n_s32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]|Ztied2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUBR"
+      ],
+      [
+        "SUBR"
+      ],
+      [
+        "SUB"
+      ],
+      [
+        "SUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsubr[_n_s32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SUBR"
+      ],
+      [
+        "MOVPRFX",
+        "SUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsubr[_n_s64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUBR"
+      ],
+      [
+        "MOVPRFX",
+        "SUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsubr[_n_s64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]|Ztied2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUBR"
+      ],
+      [
+        "SUBR"
+      ],
+      [
+        "SUB"
+      ],
+      [
+        "SUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsubr[_n_s64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SUBR"
+      ],
+      [
+        "MOVPRFX",
+        "SUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsubr[_n_s8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "int8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUBR"
+      ],
+      [
+        "MOVPRFX",
+        "SUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsubr[_n_s8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "int8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]|Ztied2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUBR"
+      ],
+      [
+        "SUBR"
+      ],
+      [
+        "SUB"
+      ],
+      [
+        "SUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsubr[_n_s8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "int8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SUBR"
+      ],
+      [
+        "MOVPRFX",
+        "SUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsubr[_n_u16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "uint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUBR"
+      ],
+      [
+        "MOVPRFX",
+        "SUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsubr[_n_u16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "uint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]|Ztied2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUBR"
+      ],
+      [
+        "SUBR"
+      ],
+      [
+        "SUB"
+      ],
+      [
+        "SUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsubr[_n_u16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "uint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SUBR"
+      ],
+      [
+        "MOVPRFX",
+        "SUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsubr[_n_u32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUBR"
+      ],
+      [
+        "MOVPRFX",
+        "SUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsubr[_n_u32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]|Ztied2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUBR"
+      ],
+      [
+        "SUBR"
+      ],
+      [
+        "SUB"
+      ],
+      [
+        "SUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsubr[_n_u32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SUBR"
+      ],
+      [
+        "MOVPRFX",
+        "SUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsubr[_n_u64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUBR"
+      ],
+      [
+        "MOVPRFX",
+        "SUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsubr[_n_u64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]|Ztied2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUBR"
+      ],
+      [
+        "SUBR"
+      ],
+      [
+        "SUB"
+      ],
+      [
+        "SUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsubr[_n_u64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SUBR"
+      ],
+      [
+        "MOVPRFX",
+        "SUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsubr[_n_u8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "uint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUBR"
+      ],
+      [
+        "MOVPRFX",
+        "SUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsubr[_n_u8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "uint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]|Ztied2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUBR"
+      ],
+      [
+        "SUBR"
+      ],
+      [
+        "SUB"
+      ],
+      [
+        "SUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsubr[_n_u8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "uint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SUBR"
+      ],
+      [
+        "MOVPRFX",
+        "SUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsubr[_s16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUBR"
+      ],
+      [
+        "MOVPRFX",
+        "SUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsubr[_s16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H|Ztied2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUBR"
+      ],
+      [
+        "SUB"
+      ],
+      [
+        "SUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsubr[_s16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SUBR"
+      ],
+      [
+        "MOVPRFX",
+        "SUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsubr[_s32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUBR"
+      ],
+      [
+        "MOVPRFX",
+        "SUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsubr[_s32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUBR"
+      ],
+      [
+        "SUB"
+      ],
+      [
+        "SUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsubr[_s32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SUBR"
+      ],
+      [
+        "MOVPRFX",
+        "SUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsubr[_s64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUBR"
+      ],
+      [
+        "MOVPRFX",
+        "SUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsubr[_s64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUBR"
+      ],
+      [
+        "SUB"
+      ],
+      [
+        "SUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsubr[_s64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SUBR"
+      ],
+      [
+        "MOVPRFX",
+        "SUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsubr[_s8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUBR"
+      ],
+      [
+        "MOVPRFX",
+        "SUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsubr[_s8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B|Ztied2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUBR"
+      ],
+      [
+        "SUB"
+      ],
+      [
+        "SUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsubr[_s8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SUBR"
+      ],
+      [
+        "MOVPRFX",
+        "SUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsubr[_u16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUBR"
+      ],
+      [
+        "MOVPRFX",
+        "SUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsubr[_u16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H|Ztied2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUBR"
+      ],
+      [
+        "SUB"
+      ],
+      [
+        "SUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsubr[_u16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint16_t op1",
+      "svuint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SUBR"
+      ],
+      [
+        "MOVPRFX",
+        "SUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsubr[_u32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUBR"
+      ],
+      [
+        "MOVPRFX",
+        "SUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsubr[_u32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUBR"
+      ],
+      [
+        "SUB"
+      ],
+      [
+        "SUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsubr[_u32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SUBR"
+      ],
+      [
+        "MOVPRFX",
+        "SUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsubr[_u64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUBR"
+      ],
+      [
+        "MOVPRFX",
+        "SUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsubr[_u64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUBR"
+      ],
+      [
+        "SUB"
+      ],
+      [
+        "SUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsubr[_u64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint64_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SUBR"
+      ],
+      [
+        "MOVPRFX",
+        "SUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsubr[_u8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUBR"
+      ],
+      [
+        "MOVPRFX",
+        "SUBR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsubr[_u8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B|Ztied2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUBR"
+      ],
+      [
+        "SUB"
+      ],
+      [
+        "SUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsubr[_u8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svuint8_t op1",
+      "svuint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SUBR"
+      ],
+      [
+        "MOVPRFX",
+        "SUB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsubwb[_n_s16]",
+    "arguments": [
+      "svint16_t op1",
+      "int8_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SSUBWB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsubwb[_n_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "int16_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SSUBWB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsubwb[_n_s64]",
+    "arguments": [
+      "svint64_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SSUBWB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsubwb[_n_u16]",
+    "arguments": [
+      "svuint16_t op1",
+      "uint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "USUBWB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsubwb[_n_u32]",
+    "arguments": [
+      "svuint32_t op1",
+      "uint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "USUBWB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsubwb[_n_u64]",
+    "arguments": [
+      "svuint64_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "USUBWB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsubwb[_s16]",
+    "arguments": [
+      "svint16_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SSUBWB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsubwb[_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SSUBWB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsubwb[_s64]",
+    "arguments": [
+      "svint64_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SSUBWB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsubwb[_u16]",
+    "arguments": [
+      "svuint16_t op1",
+      "svuint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "USUBWB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsubwb[_u32]",
+    "arguments": [
+      "svuint32_t op1",
+      "svuint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "USUBWB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsubwb[_u64]",
+    "arguments": [
+      "svuint64_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "USUBWB"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsubwt[_n_s16]",
+    "arguments": [
+      "svint16_t op1",
+      "int8_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SSUBWT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsubwt[_n_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "int16_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SSUBWT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsubwt[_n_s64]",
+    "arguments": [
+      "svint64_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SSUBWT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsubwt[_n_u16]",
+    "arguments": [
+      "svuint16_t op1",
+      "uint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "USUBWT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsubwt[_n_u32]",
+    "arguments": [
+      "svuint32_t op1",
+      "uint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "USUBWT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsubwt[_n_u64]",
+    "arguments": [
+      "svuint64_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "USUBWT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsubwt[_s16]",
+    "arguments": [
+      "svint16_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SSUBWT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsubwt[_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SSUBWT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsubwt[_s64]",
+    "arguments": [
+      "svint64_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SSUBWT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsubwt[_u16]",
+    "arguments": [
+      "svuint16_t op1",
+      "svuint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "USUBWT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsubwt[_u32]",
+    "arguments": [
+      "svuint32_t op1",
+      "svuint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "USUBWT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svsubwt[_u64]",
+    "arguments": [
+      "svuint64_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "USUBWT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsudot[_n_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "svint8_t op2",
+      "uint8_t op3"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "USDOT"
+      ],
+      [
+        "MOVPRFX",
+        "USDOT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsudot[_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "svint8_t op2",
+      "svuint8_t op3"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "USDOT"
+      ],
+      [
+        "MOVPRFX",
+        "USDOT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svsudot_lane[_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "svint8_t op2",
+      "svuint8_t op3",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "imm_index": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUDOT"
+      ],
+      [
+        "MOVPRFX",
+        "SUDOT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svtbl2[_f16]",
+    "arguments": [
+      "svfloat16x2_t data",
+      "svuint16_t indices"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "data": {
+        "register": "{Zdata0.H, Zdata1.H}"
+      },
+      "indices": {
+        "register": "Zindices.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "TBL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svtbl2[_f32]",
+    "arguments": [
+      "svfloat32x2_t data",
+      "svuint32_t indices"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "data": {
+        "register": "{Zdata0.S, Zdata1.S}"
+      },
+      "indices": {
+        "register": "Zindices.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "TBL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svtbl2[_f64]",
+    "arguments": [
+      "svfloat64x2_t data",
+      "svuint64_t indices"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "data": {
+        "register": "{Zdata0.D, Zdata1.D}"
+      },
+      "indices": {
+        "register": "Zindices.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "TBL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svtbl2[_s16]",
+    "arguments": [
+      "svint16x2_t data",
+      "svuint16_t indices"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "data": {
+        "register": "{Zdata0.H, Zdata1.H}"
+      },
+      "indices": {
+        "register": "Zindices.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "TBL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svtbl2[_s32]",
+    "arguments": [
+      "svint32x2_t data",
+      "svuint32_t indices"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "data": {
+        "register": "{Zdata0.S, Zdata1.S}"
+      },
+      "indices": {
+        "register": "Zindices.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "TBL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svtbl2[_s64]",
+    "arguments": [
+      "svint64x2_t data",
+      "svuint64_t indices"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "data": {
+        "register": "{Zdata0.D, Zdata1.D}"
+      },
+      "indices": {
+        "register": "Zindices.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "TBL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svtbl2[_s8]",
+    "arguments": [
+      "svint8x2_t data",
+      "svuint8_t indices"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "data": {
+        "register": "{Zdata0.B, Zdata1.B}"
+      },
+      "indices": {
+        "register": "Zindices.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "TBL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svtbl2[_u16]",
+    "arguments": [
+      "svuint16x2_t data",
+      "svuint16_t indices"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "data": {
+        "register": "{Zdata0.H, Zdata1.H}"
+      },
+      "indices": {
+        "register": "Zindices.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "TBL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svtbl2[_u32]",
+    "arguments": [
+      "svuint32x2_t data",
+      "svuint32_t indices"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "data": {
+        "register": "{Zdata0.S, Zdata1.S}"
+      },
+      "indices": {
+        "register": "Zindices.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "TBL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svtbl2[_u64]",
+    "arguments": [
+      "svuint64x2_t data",
+      "svuint64_t indices"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "data": {
+        "register": "{Zdata0.D, Zdata1.D}"
+      },
+      "indices": {
+        "register": "Zindices.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "TBL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svtbl2[_u8]",
+    "arguments": [
+      "svuint8x2_t data",
+      "svuint8_t indices"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "data": {
+        "register": "{Zdata0.B, Zdata1.B}"
+      },
+      "indices": {
+        "register": "Zindices.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "TBL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svtbl[_f16]",
+    "arguments": [
+      "svfloat16_t data",
+      "svuint16_t indices"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "data": {
+        "register": "Zdata.H"
+      },
+      "indices": {
+        "register": "Zindices.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "TBL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svtbl[_f32]",
+    "arguments": [
+      "svfloat32_t data",
+      "svuint32_t indices"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "data": {
+        "register": "Zdata.S"
+      },
+      "indices": {
+        "register": "Zindices.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "TBL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svtbl[_f64]",
+    "arguments": [
+      "svfloat64_t data",
+      "svuint64_t indices"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "data": {
+        "register": "Zdata.D"
+      },
+      "indices": {
+        "register": "Zindices.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "TBL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svtbl[_s16]",
+    "arguments": [
+      "svint16_t data",
+      "svuint16_t indices"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "data": {
+        "register": "Zdata.H"
+      },
+      "indices": {
+        "register": "Zindices.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "TBL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svtbl[_s32]",
+    "arguments": [
+      "svint32_t data",
+      "svuint32_t indices"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "data": {
+        "register": "Zdata.S"
+      },
+      "indices": {
+        "register": "Zindices.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "TBL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svtbl[_s64]",
+    "arguments": [
+      "svint64_t data",
+      "svuint64_t indices"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "data": {
+        "register": "Zdata.D"
+      },
+      "indices": {
+        "register": "Zindices.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "TBL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svtbl[_s8]",
+    "arguments": [
+      "svint8_t data",
+      "svuint8_t indices"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "data": {
+        "register": "Zdata.B"
+      },
+      "indices": {
+        "register": "Zindices.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "TBL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svtbl[_u16]",
+    "arguments": [
+      "svuint16_t data",
+      "svuint16_t indices"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "data": {
+        "register": "Zdata.H"
+      },
+      "indices": {
+        "register": "Zindices.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "TBL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svtbl[_u32]",
+    "arguments": [
+      "svuint32_t data",
+      "svuint32_t indices"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "data": {
+        "register": "Zdata.S"
+      },
+      "indices": {
+        "register": "Zindices.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "TBL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svtbl[_u64]",
+    "arguments": [
+      "svuint64_t data",
+      "svuint64_t indices"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "data": {
+        "register": "Zdata.D"
+      },
+      "indices": {
+        "register": "Zindices.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "TBL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svtbl[_u8]",
+    "arguments": [
+      "svuint8_t data",
+      "svuint8_t indices"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "data": {
+        "register": "Zdata.B"
+      },
+      "indices": {
+        "register": "Zindices.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "TBL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svtbx[_f16]",
+    "arguments": [
+      "svfloat16_t fallback",
+      "svfloat16_t data",
+      "svuint16_t indices"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "data": {
+        "register": "Zdata.H"
+      },
+      "fallback": {
+        "register": "Ztied.H"
+      },
+      "indices": {
+        "register": "Zindices.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "TBX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svtbx[_f32]",
+    "arguments": [
+      "svfloat32_t fallback",
+      "svfloat32_t data",
+      "svuint32_t indices"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "data": {
+        "register": "Zdata.S"
+      },
+      "fallback": {
+        "register": "Ztied.S"
+      },
+      "indices": {
+        "register": "Zindices.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "TBX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svtbx[_f64]",
+    "arguments": [
+      "svfloat64_t fallback",
+      "svfloat64_t data",
+      "svuint64_t indices"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "data": {
+        "register": "Zdata.D"
+      },
+      "fallback": {
+        "register": "Ztied.D"
+      },
+      "indices": {
+        "register": "Zindices.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "TBX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svtbx[_s16]",
+    "arguments": [
+      "svint16_t fallback",
+      "svint16_t data",
+      "svuint16_t indices"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "data": {
+        "register": "Zdata.H"
+      },
+      "fallback": {
+        "register": "Ztied.H"
+      },
+      "indices": {
+        "register": "Zindices.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "TBX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svtbx[_s32]",
+    "arguments": [
+      "svint32_t fallback",
+      "svint32_t data",
+      "svuint32_t indices"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "data": {
+        "register": "Zdata.S"
+      },
+      "fallback": {
+        "register": "Ztied.S"
+      },
+      "indices": {
+        "register": "Zindices.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "TBX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svtbx[_s64]",
+    "arguments": [
+      "svint64_t fallback",
+      "svint64_t data",
+      "svuint64_t indices"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "data": {
+        "register": "Zdata.D"
+      },
+      "fallback": {
+        "register": "Ztied.D"
+      },
+      "indices": {
+        "register": "Zindices.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "TBX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svtbx[_s8]",
+    "arguments": [
+      "svint8_t fallback",
+      "svint8_t data",
+      "svuint8_t indices"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "data": {
+        "register": "Zdata.B"
+      },
+      "fallback": {
+        "register": "Ztied.B"
+      },
+      "indices": {
+        "register": "Zindices.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "TBX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svtbx[_u16]",
+    "arguments": [
+      "svuint16_t fallback",
+      "svuint16_t data",
+      "svuint16_t indices"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "data": {
+        "register": "Zdata.H"
+      },
+      "fallback": {
+        "register": "Ztied.H"
+      },
+      "indices": {
+        "register": "Zindices.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "TBX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svtbx[_u32]",
+    "arguments": [
+      "svuint32_t fallback",
+      "svuint32_t data",
+      "svuint32_t indices"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "data": {
+        "register": "Zdata.S"
+      },
+      "fallback": {
+        "register": "Ztied.S"
+      },
+      "indices": {
+        "register": "Zindices.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "TBX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svtbx[_u64]",
+    "arguments": [
+      "svuint64_t fallback",
+      "svuint64_t data",
+      "svuint64_t indices"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "data": {
+        "register": "Zdata.D"
+      },
+      "fallback": {
+        "register": "Ztied.D"
+      },
+      "indices": {
+        "register": "Zindices.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "TBX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svtbx[_u8]",
+    "arguments": [
+      "svuint8_t fallback",
+      "svuint8_t data",
+      "svuint8_t indices"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "data": {
+        "register": "Zdata.B"
+      },
+      "fallback": {
+        "register": "Ztied.B"
+      },
+      "indices": {
+        "register": "Zindices.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "TBX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svtmad[_f16]",
+    "arguments": [
+      "svfloat16_t op1",
+      "svfloat16_t op2",
+      "uint64_t imm3"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "imm3": {
+        "minimum": 0,
+        "maximum": 7
+      },
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FTMAD"
+      ],
+      [
+        "MOVPRFX",
+        "FTMAD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svtmad[_f32]",
+    "arguments": [
+      "svfloat32_t op1",
+      "svfloat32_t op2",
+      "uint64_t imm3"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "imm3": {
+        "minimum": 0,
+        "maximum": 7
+      },
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FTMAD"
+      ],
+      [
+        "MOVPRFX",
+        "FTMAD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svtmad[_f64]",
+    "arguments": [
+      "svfloat64_t op1",
+      "svfloat64_t op2",
+      "uint64_t imm3"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "imm3": {
+        "minimum": 0,
+        "maximum": 7
+      },
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FTMAD"
+      ],
+      [
+        "MOVPRFX",
+        "FTMAD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svtrn1[_f16]",
+    "arguments": [
+      "svfloat16_t op1",
+      "svfloat16_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "TRN1"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svtrn1[_f32]",
+    "arguments": [
+      "svfloat32_t op1",
+      "svfloat32_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "TRN1"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svtrn1[_f64]",
+    "arguments": [
+      "svfloat64_t op1",
+      "svfloat64_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "TRN1"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svtrn1[_s16]",
+    "arguments": [
+      "svint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "TRN1"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svtrn1[_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "TRN1"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svtrn1[_s64]",
+    "arguments": [
+      "svint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "TRN1"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svtrn1[_s8]",
+    "arguments": [
+      "svint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "TRN1"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svtrn1[_u16]",
+    "arguments": [
+      "svuint16_t op1",
+      "svuint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "TRN1"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svtrn1[_u32]",
+    "arguments": [
+      "svuint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "TRN1"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svtrn1[_u64]",
+    "arguments": [
+      "svuint64_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "TRN1"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svtrn1[_u8]",
+    "arguments": [
+      "svuint8_t op1",
+      "svuint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "TRN1"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svtrn1_b16",
+    "arguments": [
+      "svbool_t op1",
+      "svbool_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Pop1.H"
+      },
+      "op2": {
+        "register": "Pop2.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "TRN1"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svtrn1_b32",
+    "arguments": [
+      "svbool_t op1",
+      "svbool_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Pop1.S"
+      },
+      "op2": {
+        "register": "Pop2.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "TRN1"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svtrn1_b64",
+    "arguments": [
+      "svbool_t op1",
+      "svbool_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Pop1.D"
+      },
+      "op2": {
+        "register": "Pop2.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "TRN1"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svtrn1_b8",
+    "arguments": [
+      "svbool_t op1",
+      "svbool_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Pop1.B"
+      },
+      "op2": {
+        "register": "Pop2.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "TRN1"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svtrn1q[_f16]",
+    "arguments": [
+      "svfloat16_t op1",
+      "svfloat16_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.Q"
+      },
+      "op2": {
+        "register": "Zop2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "TRN1"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svtrn1q[_f32]",
+    "arguments": [
+      "svfloat32_t op1",
+      "svfloat32_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.Q"
+      },
+      "op2": {
+        "register": "Zop2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "TRN1"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svtrn1q[_f64]",
+    "arguments": [
+      "svfloat64_t op1",
+      "svfloat64_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.Q"
+      },
+      "op2": {
+        "register": "Zop2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "TRN1"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svtrn1q[_s16]",
+    "arguments": [
+      "svint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.Q"
+      },
+      "op2": {
+        "register": "Zop2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "TRN1"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svtrn1q[_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.Q"
+      },
+      "op2": {
+        "register": "Zop2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "TRN1"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svtrn1q[_s64]",
+    "arguments": [
+      "svint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.Q"
+      },
+      "op2": {
+        "register": "Zop2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "TRN1"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svtrn1q[_s8]",
+    "arguments": [
+      "svint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.Q"
+      },
+      "op2": {
+        "register": "Zop2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "TRN1"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svtrn1q[_u16]",
+    "arguments": [
+      "svuint16_t op1",
+      "svuint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.Q"
+      },
+      "op2": {
+        "register": "Zop2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "TRN1"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svtrn1q[_u32]",
+    "arguments": [
+      "svuint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.Q"
+      },
+      "op2": {
+        "register": "Zop2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "TRN1"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svtrn1q[_u64]",
+    "arguments": [
+      "svuint64_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.Q"
+      },
+      "op2": {
+        "register": "Zop2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "TRN1"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svtrn1q[_u8]",
+    "arguments": [
+      "svuint8_t op1",
+      "svuint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.Q"
+      },
+      "op2": {
+        "register": "Zop2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "TRN1"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svtrn2[_f16]",
+    "arguments": [
+      "svfloat16_t op1",
+      "svfloat16_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "TRN2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svtrn2[_f32]",
+    "arguments": [
+      "svfloat32_t op1",
+      "svfloat32_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "TRN2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svtrn2[_f64]",
+    "arguments": [
+      "svfloat64_t op1",
+      "svfloat64_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "TRN2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svtrn2[_s16]",
+    "arguments": [
+      "svint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "TRN2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svtrn2[_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "TRN2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svtrn2[_s64]",
+    "arguments": [
+      "svint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "TRN2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svtrn2[_s8]",
+    "arguments": [
+      "svint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "TRN2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svtrn2[_u16]",
+    "arguments": [
+      "svuint16_t op1",
+      "svuint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "TRN2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svtrn2[_u32]",
+    "arguments": [
+      "svuint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "TRN2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svtrn2[_u64]",
+    "arguments": [
+      "svuint64_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "TRN2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svtrn2[_u8]",
+    "arguments": [
+      "svuint8_t op1",
+      "svuint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "TRN2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svtrn2_b16",
+    "arguments": [
+      "svbool_t op1",
+      "svbool_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Pop1.H"
+      },
+      "op2": {
+        "register": "Pop2.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "TRN2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svtrn2_b32",
+    "arguments": [
+      "svbool_t op1",
+      "svbool_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Pop1.S"
+      },
+      "op2": {
+        "register": "Pop2.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "TRN2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svtrn2_b64",
+    "arguments": [
+      "svbool_t op1",
+      "svbool_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Pop1.D"
+      },
+      "op2": {
+        "register": "Pop2.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "TRN2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svtrn2_b8",
+    "arguments": [
+      "svbool_t op1",
+      "svbool_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Pop1.B"
+      },
+      "op2": {
+        "register": "Pop2.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "TRN2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svtrn2q[_f16]",
+    "arguments": [
+      "svfloat16_t op1",
+      "svfloat16_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.Q"
+      },
+      "op2": {
+        "register": "Zop2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "TRN2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svtrn2q[_f32]",
+    "arguments": [
+      "svfloat32_t op1",
+      "svfloat32_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.Q"
+      },
+      "op2": {
+        "register": "Zop2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "TRN2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svtrn2q[_f64]",
+    "arguments": [
+      "svfloat64_t op1",
+      "svfloat64_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.Q"
+      },
+      "op2": {
+        "register": "Zop2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "TRN2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svtrn2q[_s16]",
+    "arguments": [
+      "svint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.Q"
+      },
+      "op2": {
+        "register": "Zop2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "TRN2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svtrn2q[_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.Q"
+      },
+      "op2": {
+        "register": "Zop2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "TRN2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svtrn2q[_s64]",
+    "arguments": [
+      "svint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.Q"
+      },
+      "op2": {
+        "register": "Zop2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "TRN2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svtrn2q[_s8]",
+    "arguments": [
+      "svint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.Q"
+      },
+      "op2": {
+        "register": "Zop2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "TRN2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svtrn2q[_u16]",
+    "arguments": [
+      "svuint16_t op1",
+      "svuint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.Q"
+      },
+      "op2": {
+        "register": "Zop2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "TRN2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svtrn2q[_u32]",
+    "arguments": [
+      "svuint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.Q"
+      },
+      "op2": {
+        "register": "Zop2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "TRN2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svtrn2q[_u64]",
+    "arguments": [
+      "svuint64_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.Q"
+      },
+      "op2": {
+        "register": "Zop2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "TRN2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svtrn2q[_u8]",
+    "arguments": [
+      "svuint8_t op1",
+      "svuint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.Q"
+      },
+      "op2": {
+        "register": "Zop2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "TRN2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svtsmul[_f16]",
+    "arguments": [
+      "svfloat16_t op1",
+      "svuint16_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FTSMUL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svtsmul[_f32]",
+    "arguments": [
+      "svfloat32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FTSMUL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svtsmul[_f64]",
+    "arguments": [
+      "svfloat64_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FTSMUL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svtssel[_f16]",
+    "arguments": [
+      "svfloat16_t op1",
+      "svuint16_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FTSSEL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svtssel[_f32]",
+    "arguments": [
+      "svfloat32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FTSSEL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svtssel[_f64]",
+    "arguments": [
+      "svfloat64_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FTSSEL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svundef2_f16",
+    "arguments": [],
+    "return_type": {
+      "value": "svfloat16x2_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svundef2_f32",
+    "arguments": [],
+    "return_type": {
+      "value": "svfloat32x2_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svundef2_f64",
+    "arguments": [],
+    "return_type": {
+      "value": "svfloat64x2_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svundef2_s16",
+    "arguments": [],
+    "return_type": {
+      "value": "svint16x2_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svundef2_s32",
+    "arguments": [],
+    "return_type": {
+      "value": "svint32x2_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svundef2_s64",
+    "arguments": [],
+    "return_type": {
+      "value": "svint64x2_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svundef2_s8",
+    "arguments": [],
+    "return_type": {
+      "value": "svint8x2_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svundef2_u16",
+    "arguments": [],
+    "return_type": {
+      "value": "svuint16x2_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svundef2_u32",
+    "arguments": [],
+    "return_type": {
+      "value": "svuint32x2_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svundef2_u64",
+    "arguments": [],
+    "return_type": {
+      "value": "svuint64x2_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svundef2_u8",
+    "arguments": [],
+    "return_type": {
+      "value": "svuint8x2_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svundef3_f16",
+    "arguments": [],
+    "return_type": {
+      "value": "svfloat16x3_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svundef3_f32",
+    "arguments": [],
+    "return_type": {
+      "value": "svfloat32x3_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svundef3_f64",
+    "arguments": [],
+    "return_type": {
+      "value": "svfloat64x3_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svundef3_s16",
+    "arguments": [],
+    "return_type": {
+      "value": "svint16x3_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svundef3_s32",
+    "arguments": [],
+    "return_type": {
+      "value": "svint32x3_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svundef3_s64",
+    "arguments": [],
+    "return_type": {
+      "value": "svint64x3_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svundef3_s8",
+    "arguments": [],
+    "return_type": {
+      "value": "svint8x3_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svundef3_u16",
+    "arguments": [],
+    "return_type": {
+      "value": "svuint16x3_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svundef3_u32",
+    "arguments": [],
+    "return_type": {
+      "value": "svuint32x3_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svundef3_u64",
+    "arguments": [],
+    "return_type": {
+      "value": "svuint64x3_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svundef3_u8",
+    "arguments": [],
+    "return_type": {
+      "value": "svuint8x3_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svundef4_f16",
+    "arguments": [],
+    "return_type": {
+      "value": "svfloat16x4_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svundef4_f32",
+    "arguments": [],
+    "return_type": {
+      "value": "svfloat32x4_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svundef4_f64",
+    "arguments": [],
+    "return_type": {
+      "value": "svfloat64x4_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svundef4_s16",
+    "arguments": [],
+    "return_type": {
+      "value": "svint16x4_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svundef4_s32",
+    "arguments": [],
+    "return_type": {
+      "value": "svint32x4_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svundef4_s64",
+    "arguments": [],
+    "return_type": {
+      "value": "svint64x4_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svundef4_s8",
+    "arguments": [],
+    "return_type": {
+      "value": "svint8x4_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svundef4_u16",
+    "arguments": [],
+    "return_type": {
+      "value": "svuint16x4_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svundef4_u32",
+    "arguments": [],
+    "return_type": {
+      "value": "svuint32x4_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svundef4_u64",
+    "arguments": [],
+    "return_type": {
+      "value": "svuint64x4_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svundef4_u8",
+    "arguments": [],
+    "return_type": {
+      "value": "svuint8x4_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svundef_f16",
+    "arguments": [],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svundef_f32",
+    "arguments": [],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svundef_f64",
+    "arguments": [],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svundef_s16",
+    "arguments": [],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svundef_s32",
+    "arguments": [],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svundef_s64",
+    "arguments": [],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svundef_s8",
+    "arguments": [],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svundef_u16",
+    "arguments": [],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svundef_u32",
+    "arguments": [],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svundef_u64",
+    "arguments": [],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svundef_u8",
+    "arguments": [],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Architectures": [
+      "A64"
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svunpkhi[_b]",
+    "arguments": [
+      "svbool_t op"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Pop.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "PUNPKHI"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svunpkhi[_s16]",
+    "arguments": [
+      "svint8_t op"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUNPKHI"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svunpkhi[_s32]",
+    "arguments": [
+      "svint16_t op"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUNPKHI"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svunpkhi[_s64]",
+    "arguments": [
+      "svint32_t op"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUNPKHI"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svunpkhi[_u16]",
+    "arguments": [
+      "svuint8_t op"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UUNPKHI"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svunpkhi[_u32]",
+    "arguments": [
+      "svuint16_t op"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UUNPKHI"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svunpkhi[_u64]",
+    "arguments": [
+      "svuint32_t op"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UUNPKHI"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svunpklo[_b]",
+    "arguments": [
+      "svbool_t op"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Pop.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "PUNPKLO"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svunpklo[_s16]",
+    "arguments": [
+      "svint8_t op"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUNPKLO"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svunpklo[_s32]",
+    "arguments": [
+      "svint16_t op"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUNPKLO"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svunpklo[_s64]",
+    "arguments": [
+      "svint32_t op"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUNPKLO"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svunpklo[_u16]",
+    "arguments": [
+      "svuint8_t op"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UUNPKLO"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svunpklo[_u32]",
+    "arguments": [
+      "svuint16_t op"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UUNPKLO"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svunpklo[_u64]",
+    "arguments": [
+      "svuint32_t op"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Zop.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UUNPKLO"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svuqadd[_n_s16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "uint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUQADD"
+      ],
+      [
+        "MOVPRFX",
+        "SUQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svuqadd[_n_s16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "uint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQADD"
+      ],
+      [
+        "SUQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svuqadd[_n_s16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "uint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H[*]"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SUQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svuqadd[_n_s32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUQADD"
+      ],
+      [
+        "MOVPRFX",
+        "SUQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svuqadd[_n_s32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQADD"
+      ],
+      [
+        "SUQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svuqadd[_n_s32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S[*]"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SUQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svuqadd[_n_s64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUQADD"
+      ],
+      [
+        "MOVPRFX",
+        "SUQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svuqadd[_n_s64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQADD"
+      ],
+      [
+        "SUQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svuqadd[_n_s64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D[*]"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SUQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svuqadd[_n_s8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "uint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUQADD"
+      ],
+      [
+        "MOVPRFX",
+        "SUQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svuqadd[_n_s8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "uint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SQADD"
+      ],
+      [
+        "SUQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svuqadd[_n_s8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "uint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B[*]"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SUQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svuqadd[_s16]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "svuint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUQADD"
+      ],
+      [
+        "MOVPRFX",
+        "SUQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svuqadd[_s16]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "svuint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUQADD"
+      ],
+      [
+        "MOVPRFX",
+        "SUQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svuqadd[_s16]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint16_t op1",
+      "svuint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      },
+      "pg": {
+        "register": "Pg.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SUQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svuqadd[_s32]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUQADD"
+      ],
+      [
+        "MOVPRFX",
+        "SUQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svuqadd[_s32]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUQADD"
+      ],
+      [
+        "MOVPRFX",
+        "SUQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svuqadd[_s32]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      },
+      "pg": {
+        "register": "Pg.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SUQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svuqadd[_s64]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUQADD"
+      ],
+      [
+        "MOVPRFX",
+        "SUQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svuqadd[_s64]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUQADD"
+      ],
+      [
+        "MOVPRFX",
+        "SUQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svuqadd[_s64]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint64_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      },
+      "pg": {
+        "register": "Pg.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SUQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svuqadd[_s8]_m",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "svuint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUQADD"
+      ],
+      [
+        "MOVPRFX",
+        "SUQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svuqadd[_s8]_x",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "svuint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SUQADD"
+      ],
+      [
+        "MOVPRFX",
+        "SUQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svuqadd[_s8]_z",
+    "arguments": [
+      "svbool_t pg",
+      "svint8_t op1",
+      "svuint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "pg": {
+        "register": "Pg.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "MOVPRFX",
+        "SUQADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svusdot[_n_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "svuint8_t op2",
+      "int8_t op3"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B[*]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "USDOT"
+      ],
+      [
+        "MOVPRFX",
+        "USDOT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svusdot[_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "svuint8_t op2",
+      "svint8_t op3"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "USDOT"
+      ],
+      [
+        "MOVPRFX",
+        "USDOT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svusdot_lane[_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "svuint8_t op2",
+      "svint8_t op3",
+      "uint64_t imm_index"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "imm_index": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "USDOT"
+      ],
+      [
+        "MOVPRFX",
+        "USDOT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svusmmla[_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "svuint8_t op2",
+      "svint8_t op3"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      },
+      "op3": {
+        "register": "Zop3.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "USMMLA"
+      ],
+      [
+        "MOVPRFX",
+        "USMMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svuzp1[_f16]",
+    "arguments": [
+      "svfloat16_t op1",
+      "svfloat16_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UZP1"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svuzp1[_f32]",
+    "arguments": [
+      "svfloat32_t op1",
+      "svfloat32_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UZP1"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svuzp1[_f64]",
+    "arguments": [
+      "svfloat64_t op1",
+      "svfloat64_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UZP1"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svuzp1[_s16]",
+    "arguments": [
+      "svint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UZP1"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svuzp1[_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UZP1"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svuzp1[_s64]",
+    "arguments": [
+      "svint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UZP1"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svuzp1[_s8]",
+    "arguments": [
+      "svint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UZP1"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svuzp1[_u16]",
+    "arguments": [
+      "svuint16_t op1",
+      "svuint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UZP1"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svuzp1[_u32]",
+    "arguments": [
+      "svuint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UZP1"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svuzp1[_u64]",
+    "arguments": [
+      "svuint64_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UZP1"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svuzp1[_u8]",
+    "arguments": [
+      "svuint8_t op1",
+      "svuint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UZP1"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svuzp1_b16",
+    "arguments": [
+      "svbool_t op1",
+      "svbool_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Pop1.H"
+      },
+      "op2": {
+        "register": "Pop2.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UZP1"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svuzp1_b32",
+    "arguments": [
+      "svbool_t op1",
+      "svbool_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Pop1.S"
+      },
+      "op2": {
+        "register": "Pop2.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UZP1"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svuzp1_b64",
+    "arguments": [
+      "svbool_t op1",
+      "svbool_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Pop1.D"
+      },
+      "op2": {
+        "register": "Pop2.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UZP1"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svuzp1_b8",
+    "arguments": [
+      "svbool_t op1",
+      "svbool_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Pop1.B"
+      },
+      "op2": {
+        "register": "Pop2.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UZP1"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svuzp1q[_f16]",
+    "arguments": [
+      "svfloat16_t op1",
+      "svfloat16_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.Q"
+      },
+      "op2": {
+        "register": "Zop2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UZP1"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svuzp1q[_f32]",
+    "arguments": [
+      "svfloat32_t op1",
+      "svfloat32_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.Q"
+      },
+      "op2": {
+        "register": "Zop2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UZP1"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svuzp1q[_f64]",
+    "arguments": [
+      "svfloat64_t op1",
+      "svfloat64_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.Q"
+      },
+      "op2": {
+        "register": "Zop2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UZP1"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svuzp1q[_s16]",
+    "arguments": [
+      "svint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.Q"
+      },
+      "op2": {
+        "register": "Zop2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UZP1"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svuzp1q[_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.Q"
+      },
+      "op2": {
+        "register": "Zop2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UZP1"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svuzp1q[_s64]",
+    "arguments": [
+      "svint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.Q"
+      },
+      "op2": {
+        "register": "Zop2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UZP1"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svuzp1q[_s8]",
+    "arguments": [
+      "svint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.Q"
+      },
+      "op2": {
+        "register": "Zop2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UZP1"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svuzp1q[_u16]",
+    "arguments": [
+      "svuint16_t op1",
+      "svuint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.Q"
+      },
+      "op2": {
+        "register": "Zop2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UZP1"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svuzp1q[_u32]",
+    "arguments": [
+      "svuint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.Q"
+      },
+      "op2": {
+        "register": "Zop2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UZP1"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svuzp1q[_u64]",
+    "arguments": [
+      "svuint64_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.Q"
+      },
+      "op2": {
+        "register": "Zop2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UZP1"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svuzp1q[_u8]",
+    "arguments": [
+      "svuint8_t op1",
+      "svuint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.Q"
+      },
+      "op2": {
+        "register": "Zop2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UZP1"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svuzp2[_f16]",
+    "arguments": [
+      "svfloat16_t op1",
+      "svfloat16_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UZP2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svuzp2[_f32]",
+    "arguments": [
+      "svfloat32_t op1",
+      "svfloat32_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UZP2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svuzp2[_f64]",
+    "arguments": [
+      "svfloat64_t op1",
+      "svfloat64_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UZP2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svuzp2[_s16]",
+    "arguments": [
+      "svint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UZP2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svuzp2[_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UZP2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svuzp2[_s64]",
+    "arguments": [
+      "svint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UZP2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svuzp2[_s8]",
+    "arguments": [
+      "svint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UZP2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svuzp2[_u16]",
+    "arguments": [
+      "svuint16_t op1",
+      "svuint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UZP2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svuzp2[_u32]",
+    "arguments": [
+      "svuint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UZP2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svuzp2[_u64]",
+    "arguments": [
+      "svuint64_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UZP2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svuzp2[_u8]",
+    "arguments": [
+      "svuint8_t op1",
+      "svuint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UZP2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svuzp2_b16",
+    "arguments": [
+      "svbool_t op1",
+      "svbool_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Pop1.H"
+      },
+      "op2": {
+        "register": "Pop2.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UZP2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svuzp2_b32",
+    "arguments": [
+      "svbool_t op1",
+      "svbool_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Pop1.S"
+      },
+      "op2": {
+        "register": "Pop2.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UZP2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svuzp2_b64",
+    "arguments": [
+      "svbool_t op1",
+      "svbool_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Pop1.D"
+      },
+      "op2": {
+        "register": "Pop2.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UZP2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svuzp2_b8",
+    "arguments": [
+      "svbool_t op1",
+      "svbool_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Pop1.B"
+      },
+      "op2": {
+        "register": "Pop2.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UZP2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svuzp2q[_f16]",
+    "arguments": [
+      "svfloat16_t op1",
+      "svfloat16_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.Q"
+      },
+      "op2": {
+        "register": "Zop2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UZP2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svuzp2q[_f32]",
+    "arguments": [
+      "svfloat32_t op1",
+      "svfloat32_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.Q"
+      },
+      "op2": {
+        "register": "Zop2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UZP2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svuzp2q[_f64]",
+    "arguments": [
+      "svfloat64_t op1",
+      "svfloat64_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.Q"
+      },
+      "op2": {
+        "register": "Zop2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UZP2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svuzp2q[_s16]",
+    "arguments": [
+      "svint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.Q"
+      },
+      "op2": {
+        "register": "Zop2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UZP2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svuzp2q[_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.Q"
+      },
+      "op2": {
+        "register": "Zop2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UZP2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svuzp2q[_s64]",
+    "arguments": [
+      "svint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.Q"
+      },
+      "op2": {
+        "register": "Zop2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UZP2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svuzp2q[_s8]",
+    "arguments": [
+      "svint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.Q"
+      },
+      "op2": {
+        "register": "Zop2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UZP2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svuzp2q[_u16]",
+    "arguments": [
+      "svuint16_t op1",
+      "svuint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.Q"
+      },
+      "op2": {
+        "register": "Zop2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UZP2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svuzp2q[_u32]",
+    "arguments": [
+      "svuint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.Q"
+      },
+      "op2": {
+        "register": "Zop2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UZP2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svuzp2q[_u64]",
+    "arguments": [
+      "svuint64_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.Q"
+      },
+      "op2": {
+        "register": "Zop2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UZP2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svuzp2q[_u8]",
+    "arguments": [
+      "svuint8_t op1",
+      "svuint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.Q"
+      },
+      "op2": {
+        "register": "Zop2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UZP2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svwhilege_b16[_s32]",
+    "arguments": [
+      "int32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Wop1"
+      },
+      "op2": {
+        "register": "Wop2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "WHILEGE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svwhilege_b16[_s64]",
+    "arguments": [
+      "int64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Xop1"
+      },
+      "op2": {
+        "register": "Xop2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "WHILEGE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svwhilege_b16[_u32]",
+    "arguments": [
+      "uint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Wop1"
+      },
+      "op2": {
+        "register": "Wop2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "WHILEHS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svwhilege_b16[_u64]",
+    "arguments": [
+      "uint64_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Xop1"
+      },
+      "op2": {
+        "register": "Xop2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "WHILEHS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svwhilege_b32[_s32]",
+    "arguments": [
+      "int32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Wop1"
+      },
+      "op2": {
+        "register": "Wop2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "WHILEGE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svwhilege_b32[_s64]",
+    "arguments": [
+      "int64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Xop1"
+      },
+      "op2": {
+        "register": "Xop2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "WHILEGE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svwhilege_b32[_u32]",
+    "arguments": [
+      "uint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Wop1"
+      },
+      "op2": {
+        "register": "Wop2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "WHILEHS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svwhilege_b32[_u64]",
+    "arguments": [
+      "uint64_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Xop1"
+      },
+      "op2": {
+        "register": "Xop2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "WHILEHS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svwhilege_b64[_s32]",
+    "arguments": [
+      "int32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Wop1"
+      },
+      "op2": {
+        "register": "Wop2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "WHILEGE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svwhilege_b64[_s64]",
+    "arguments": [
+      "int64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Xop1"
+      },
+      "op2": {
+        "register": "Xop2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "WHILEGE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svwhilege_b64[_u32]",
+    "arguments": [
+      "uint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Wop1"
+      },
+      "op2": {
+        "register": "Wop2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "WHILEHS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svwhilege_b64[_u64]",
+    "arguments": [
+      "uint64_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Xop1"
+      },
+      "op2": {
+        "register": "Xop2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "WHILEHS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svwhilege_b8[_s32]",
+    "arguments": [
+      "int32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Wop1"
+      },
+      "op2": {
+        "register": "Wop2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "WHILEGE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svwhilege_b8[_s64]",
+    "arguments": [
+      "int64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Xop1"
+      },
+      "op2": {
+        "register": "Xop2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "WHILEGE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svwhilege_b8[_u32]",
+    "arguments": [
+      "uint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Wop1"
+      },
+      "op2": {
+        "register": "Wop2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "WHILEHS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svwhilege_b8[_u64]",
+    "arguments": [
+      "uint64_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Xop1"
+      },
+      "op2": {
+        "register": "Xop2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "WHILEHS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svwhilegt_b16[_s32]",
+    "arguments": [
+      "int32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Wop1"
+      },
+      "op2": {
+        "register": "Wop2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "WHILEGT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svwhilegt_b16[_s64]",
+    "arguments": [
+      "int64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Xop1"
+      },
+      "op2": {
+        "register": "Xop2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "WHILEGT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svwhilegt_b16[_u32]",
+    "arguments": [
+      "uint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Wop1"
+      },
+      "op2": {
+        "register": "Wop2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "WHILEHI"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svwhilegt_b16[_u64]",
+    "arguments": [
+      "uint64_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Xop1"
+      },
+      "op2": {
+        "register": "Xop2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "WHILEHI"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svwhilegt_b32[_s32]",
+    "arguments": [
+      "int32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Wop1"
+      },
+      "op2": {
+        "register": "Wop2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "WHILEGT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svwhilegt_b32[_s64]",
+    "arguments": [
+      "int64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Xop1"
+      },
+      "op2": {
+        "register": "Xop2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "WHILEGT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svwhilegt_b32[_u32]",
+    "arguments": [
+      "uint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Wop1"
+      },
+      "op2": {
+        "register": "Wop2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "WHILEHI"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svwhilegt_b32[_u64]",
+    "arguments": [
+      "uint64_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Xop1"
+      },
+      "op2": {
+        "register": "Xop2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "WHILEHI"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svwhilegt_b64[_s32]",
+    "arguments": [
+      "int32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Wop1"
+      },
+      "op2": {
+        "register": "Wop2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "WHILEGT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svwhilegt_b64[_s64]",
+    "arguments": [
+      "int64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Xop1"
+      },
+      "op2": {
+        "register": "Xop2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "WHILEGT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svwhilegt_b64[_u32]",
+    "arguments": [
+      "uint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Wop1"
+      },
+      "op2": {
+        "register": "Wop2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "WHILEHI"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svwhilegt_b64[_u64]",
+    "arguments": [
+      "uint64_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Xop1"
+      },
+      "op2": {
+        "register": "Xop2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "WHILEHI"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svwhilegt_b8[_s32]",
+    "arguments": [
+      "int32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Wop1"
+      },
+      "op2": {
+        "register": "Wop2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "WHILEGT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svwhilegt_b8[_s64]",
+    "arguments": [
+      "int64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Xop1"
+      },
+      "op2": {
+        "register": "Xop2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "WHILEGT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svwhilegt_b8[_u32]",
+    "arguments": [
+      "uint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Wop1"
+      },
+      "op2": {
+        "register": "Wop2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "WHILEHI"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svwhilegt_b8[_u64]",
+    "arguments": [
+      "uint64_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Xop1"
+      },
+      "op2": {
+        "register": "Xop2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "WHILEHI"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svwhilele_b16[_s32]",
+    "arguments": [
+      "int32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Wop1"
+      },
+      "op2": {
+        "register": "Wop2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "WHILELE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svwhilele_b16[_s64]",
+    "arguments": [
+      "int64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Xop1"
+      },
+      "op2": {
+        "register": "Xop2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "WHILELE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svwhilele_b16[_u32]",
+    "arguments": [
+      "uint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Wop1"
+      },
+      "op2": {
+        "register": "Wop2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "WHILELS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svwhilele_b16[_u64]",
+    "arguments": [
+      "uint64_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Xop1"
+      },
+      "op2": {
+        "register": "Xop2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "WHILELS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svwhilele_b32[_s32]",
+    "arguments": [
+      "int32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Wop1"
+      },
+      "op2": {
+        "register": "Wop2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "WHILELE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svwhilele_b32[_s64]",
+    "arguments": [
+      "int64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Xop1"
+      },
+      "op2": {
+        "register": "Xop2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "WHILELE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svwhilele_b32[_u32]",
+    "arguments": [
+      "uint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Wop1"
+      },
+      "op2": {
+        "register": "Wop2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "WHILELS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svwhilele_b32[_u64]",
+    "arguments": [
+      "uint64_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Xop1"
+      },
+      "op2": {
+        "register": "Xop2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "WHILELS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svwhilele_b64[_s32]",
+    "arguments": [
+      "int32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Wop1"
+      },
+      "op2": {
+        "register": "Wop2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "WHILELE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svwhilele_b64[_s64]",
+    "arguments": [
+      "int64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Xop1"
+      },
+      "op2": {
+        "register": "Xop2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "WHILELE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svwhilele_b64[_u32]",
+    "arguments": [
+      "uint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Wop1"
+      },
+      "op2": {
+        "register": "Wop2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "WHILELS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svwhilele_b64[_u64]",
+    "arguments": [
+      "uint64_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Xop1"
+      },
+      "op2": {
+        "register": "Xop2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "WHILELS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svwhilele_b8[_s32]",
+    "arguments": [
+      "int32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Wop1"
+      },
+      "op2": {
+        "register": "Wop2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "WHILELE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svwhilele_b8[_s64]",
+    "arguments": [
+      "int64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Xop1"
+      },
+      "op2": {
+        "register": "Xop2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "WHILELE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svwhilele_b8[_u32]",
+    "arguments": [
+      "uint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Wop1"
+      },
+      "op2": {
+        "register": "Wop2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "WHILELS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svwhilele_b8[_u64]",
+    "arguments": [
+      "uint64_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Xop1"
+      },
+      "op2": {
+        "register": "Xop2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "WHILELS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svwhilelt_b16[_s32]",
+    "arguments": [
+      "int32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Wop1"
+      },
+      "op2": {
+        "register": "Wop2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "WHILELT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svwhilelt_b16[_s64]",
+    "arguments": [
+      "int64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Xop1"
+      },
+      "op2": {
+        "register": "Xop2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "WHILELT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svwhilelt_b16[_u32]",
+    "arguments": [
+      "uint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Wop1"
+      },
+      "op2": {
+        "register": "Wop2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "WHILELO"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svwhilelt_b16[_u64]",
+    "arguments": [
+      "uint64_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Xop1"
+      },
+      "op2": {
+        "register": "Xop2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "WHILELO"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svwhilelt_b32[_s32]",
+    "arguments": [
+      "int32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Wop1"
+      },
+      "op2": {
+        "register": "Wop2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "WHILELT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svwhilelt_b32[_s64]",
+    "arguments": [
+      "int64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Xop1"
+      },
+      "op2": {
+        "register": "Xop2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "WHILELT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svwhilelt_b32[_u32]",
+    "arguments": [
+      "uint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Wop1"
+      },
+      "op2": {
+        "register": "Wop2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "WHILELO"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svwhilelt_b32[_u64]",
+    "arguments": [
+      "uint64_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Xop1"
+      },
+      "op2": {
+        "register": "Xop2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "WHILELO"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svwhilelt_b64[_s32]",
+    "arguments": [
+      "int32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Wop1"
+      },
+      "op2": {
+        "register": "Wop2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "WHILELT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svwhilelt_b64[_s64]",
+    "arguments": [
+      "int64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Xop1"
+      },
+      "op2": {
+        "register": "Xop2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "WHILELT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svwhilelt_b64[_u32]",
+    "arguments": [
+      "uint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Wop1"
+      },
+      "op2": {
+        "register": "Wop2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "WHILELO"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svwhilelt_b64[_u64]",
+    "arguments": [
+      "uint64_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Xop1"
+      },
+      "op2": {
+        "register": "Xop2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "WHILELO"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svwhilelt_b8[_s32]",
+    "arguments": [
+      "int32_t op1",
+      "int32_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Wop1"
+      },
+      "op2": {
+        "register": "Wop2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "WHILELT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svwhilelt_b8[_s64]",
+    "arguments": [
+      "int64_t op1",
+      "int64_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Xop1"
+      },
+      "op2": {
+        "register": "Xop2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "WHILELT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svwhilelt_b8[_u32]",
+    "arguments": [
+      "uint32_t op1",
+      "uint32_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Wop1"
+      },
+      "op2": {
+        "register": "Wop2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "WHILELO"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svwhilelt_b8[_u64]",
+    "arguments": [
+      "uint64_t op1",
+      "uint64_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Xop1"
+      },
+      "op2": {
+        "register": "Xop2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "WHILELO"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svwhilerw[_f16]",
+    "arguments": [
+      "const float16_t *op1",
+      "const float16_t *op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Xop1"
+      },
+      "op2": {
+        "register": "Xop2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "WHILERW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svwhilerw[_f32]",
+    "arguments": [
+      "const float32_t *op1",
+      "const float32_t *op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Xop1"
+      },
+      "op2": {
+        "register": "Xop2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "WHILERW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svwhilerw[_f64]",
+    "arguments": [
+      "const float64_t *op1",
+      "const float64_t *op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Xop1"
+      },
+      "op2": {
+        "register": "Xop2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "WHILERW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svwhilerw[_s16]",
+    "arguments": [
+      "const int16_t *op1",
+      "const int16_t *op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Xop1"
+      },
+      "op2": {
+        "register": "Xop2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "WHILERW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svwhilerw[_s32]",
+    "arguments": [
+      "const int32_t *op1",
+      "const int32_t *op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Xop1"
+      },
+      "op2": {
+        "register": "Xop2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "WHILERW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svwhilerw[_s64]",
+    "arguments": [
+      "const int64_t *op1",
+      "const int64_t *op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Xop1"
+      },
+      "op2": {
+        "register": "Xop2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "WHILERW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svwhilerw[_s8]",
+    "arguments": [
+      "const int8_t *op1",
+      "const int8_t *op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Xop1"
+      },
+      "op2": {
+        "register": "Xop2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "WHILERW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svwhilerw[_u16]",
+    "arguments": [
+      "const uint16_t *op1",
+      "const uint16_t *op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Xop1"
+      },
+      "op2": {
+        "register": "Xop2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "WHILERW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svwhilerw[_u32]",
+    "arguments": [
+      "const uint32_t *op1",
+      "const uint32_t *op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Xop1"
+      },
+      "op2": {
+        "register": "Xop2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "WHILERW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svwhilerw[_u64]",
+    "arguments": [
+      "const uint64_t *op1",
+      "const uint64_t *op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Xop1"
+      },
+      "op2": {
+        "register": "Xop2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "WHILERW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svwhilerw[_u8]",
+    "arguments": [
+      "const uint8_t *op1",
+      "const uint8_t *op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Xop1"
+      },
+      "op2": {
+        "register": "Xop2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "WHILERW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svwhilewr[_f16]",
+    "arguments": [
+      "const float16_t *op1",
+      "const float16_t *op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Xop1"
+      },
+      "op2": {
+        "register": "Xop2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "WHILEWR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svwhilewr[_f32]",
+    "arguments": [
+      "const float32_t *op1",
+      "const float32_t *op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Xop1"
+      },
+      "op2": {
+        "register": "Xop2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "WHILEWR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svwhilewr[_f64]",
+    "arguments": [
+      "const float64_t *op1",
+      "const float64_t *op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Xop1"
+      },
+      "op2": {
+        "register": "Xop2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "WHILEWR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svwhilewr[_s16]",
+    "arguments": [
+      "const int16_t *op1",
+      "const int16_t *op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Xop1"
+      },
+      "op2": {
+        "register": "Xop2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "WHILEWR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svwhilewr[_s32]",
+    "arguments": [
+      "const int32_t *op1",
+      "const int32_t *op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Xop1"
+      },
+      "op2": {
+        "register": "Xop2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "WHILEWR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svwhilewr[_s64]",
+    "arguments": [
+      "const int64_t *op1",
+      "const int64_t *op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Xop1"
+      },
+      "op2": {
+        "register": "Xop2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "WHILEWR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svwhilewr[_s8]",
+    "arguments": [
+      "const int8_t *op1",
+      "const int8_t *op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Xop1"
+      },
+      "op2": {
+        "register": "Xop2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "WHILEWR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svwhilewr[_u16]",
+    "arguments": [
+      "const uint16_t *op1",
+      "const uint16_t *op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Xop1"
+      },
+      "op2": {
+        "register": "Xop2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "WHILEWR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svwhilewr[_u32]",
+    "arguments": [
+      "const uint32_t *op1",
+      "const uint32_t *op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Xop1"
+      },
+      "op2": {
+        "register": "Xop2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "WHILEWR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svwhilewr[_u64]",
+    "arguments": [
+      "const uint64_t *op1",
+      "const uint64_t *op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Xop1"
+      },
+      "op2": {
+        "register": "Xop2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "WHILEWR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svwhilewr[_u8]",
+    "arguments": [
+      "const uint8_t *op1",
+      "const uint8_t *op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Xop1"
+      },
+      "op2": {
+        "register": "Xop2"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "WHILEWR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svwrffr",
+    "arguments": [
+      "svbool_t op"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "op": {
+        "register": "Pop.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "WRFFR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svxar[_n_s16]",
+    "arguments": [
+      "svint16_t op1",
+      "svint16_t op2",
+      "uint64_t imm3"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "imm3": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H|Ztied2.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "XAR"
+      ],
+      [
+        "XAR"
+      ],
+      [
+        "MOVPRFX",
+        "XAR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svxar[_n_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "svint32_t op2",
+      "uint64_t imm3"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "imm3": {
+        "minimum": 1,
+        "maximum": 32
+      },
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "XAR"
+      ],
+      [
+        "XAR"
+      ],
+      [
+        "MOVPRFX",
+        "XAR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svxar[_n_s64]",
+    "arguments": [
+      "svint64_t op1",
+      "svint64_t op2",
+      "uint64_t imm3"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "imm3": {
+        "minimum": 1,
+        "maximum": 64
+      },
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "XAR"
+      ],
+      [
+        "XAR"
+      ],
+      [
+        "MOVPRFX",
+        "XAR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svxar[_n_s8]",
+    "arguments": [
+      "svint8_t op1",
+      "svint8_t op2",
+      "uint64_t imm3"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "imm3": {
+        "minimum": 1,
+        "maximum": 8
+      },
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B|Ztied2.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "XAR"
+      ],
+      [
+        "XAR"
+      ],
+      [
+        "MOVPRFX",
+        "XAR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svxar[_n_u16]",
+    "arguments": [
+      "svuint16_t op1",
+      "svuint16_t op2",
+      "uint64_t imm3"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "imm3": {
+        "minimum": 1,
+        "maximum": 16
+      },
+      "op1": {
+        "register": "Zop1.H|Ztied1.H"
+      },
+      "op2": {
+        "register": "Zop2.H|Ztied2.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "XAR"
+      ],
+      [
+        "XAR"
+      ],
+      [
+        "MOVPRFX",
+        "XAR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svxar[_n_u32]",
+    "arguments": [
+      "svuint32_t op1",
+      "svuint32_t op2",
+      "uint64_t imm3"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "imm3": {
+        "minimum": 1,
+        "maximum": 32
+      },
+      "op1": {
+        "register": "Zop1.S|Ztied1.S"
+      },
+      "op2": {
+        "register": "Zop2.S|Ztied2.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "XAR"
+      ],
+      [
+        "XAR"
+      ],
+      [
+        "MOVPRFX",
+        "XAR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svxar[_n_u64]",
+    "arguments": [
+      "svuint64_t op1",
+      "svuint64_t op2",
+      "uint64_t imm3"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "imm3": {
+        "minimum": 1,
+        "maximum": 64
+      },
+      "op1": {
+        "register": "Zop1.D|Ztied1.D"
+      },
+      "op2": {
+        "register": "Zop2.D|Ztied2.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "XAR"
+      ],
+      [
+        "XAR"
+      ],
+      [
+        "MOVPRFX",
+        "XAR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE2",
+    "name": "svxar[_n_u8]",
+    "arguments": [
+      "svuint8_t op1",
+      "svuint8_t op2",
+      "uint64_t imm3"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "imm3": {
+        "minimum": 1,
+        "maximum": 8
+      },
+      "op1": {
+        "register": "Zop1.B|Ztied1.B"
+      },
+      "op2": {
+        "register": "Zop2.B|Ztied2.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "XAR"
+      ],
+      [
+        "XAR"
+      ],
+      [
+        "MOVPRFX",
+        "XAR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svzip1[_f16]",
+    "arguments": [
+      "svfloat16_t op1",
+      "svfloat16_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ZIP1"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svzip1[_f32]",
+    "arguments": [
+      "svfloat32_t op1",
+      "svfloat32_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ZIP1"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svzip1[_f64]",
+    "arguments": [
+      "svfloat64_t op1",
+      "svfloat64_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ZIP1"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svzip1[_s16]",
+    "arguments": [
+      "svint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ZIP1"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svzip1[_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ZIP1"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svzip1[_s64]",
+    "arguments": [
+      "svint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ZIP1"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svzip1[_s8]",
+    "arguments": [
+      "svint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ZIP1"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svzip1[_u16]",
+    "arguments": [
+      "svuint16_t op1",
+      "svuint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ZIP1"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svzip1[_u32]",
+    "arguments": [
+      "svuint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ZIP1"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svzip1[_u64]",
+    "arguments": [
+      "svuint64_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ZIP1"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svzip1[_u8]",
+    "arguments": [
+      "svuint8_t op1",
+      "svuint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ZIP1"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svzip1_b16",
+    "arguments": [
+      "svbool_t op1",
+      "svbool_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Pop1.H"
+      },
+      "op2": {
+        "register": "Pop2.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ZIP1"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svzip1_b32",
+    "arguments": [
+      "svbool_t op1",
+      "svbool_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Pop1.S"
+      },
+      "op2": {
+        "register": "Pop2.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ZIP1"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svzip1_b64",
+    "arguments": [
+      "svbool_t op1",
+      "svbool_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Pop1.D"
+      },
+      "op2": {
+        "register": "Pop2.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ZIP1"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svzip1_b8",
+    "arguments": [
+      "svbool_t op1",
+      "svbool_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Pop1.B"
+      },
+      "op2": {
+        "register": "Pop2.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ZIP1"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svzip1q[_f16]",
+    "arguments": [
+      "svfloat16_t op1",
+      "svfloat16_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.Q"
+      },
+      "op2": {
+        "register": "Zop2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ZIP1"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svzip1q[_f32]",
+    "arguments": [
+      "svfloat32_t op1",
+      "svfloat32_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.Q"
+      },
+      "op2": {
+        "register": "Zop2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ZIP1"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svzip1q[_f64]",
+    "arguments": [
+      "svfloat64_t op1",
+      "svfloat64_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.Q"
+      },
+      "op2": {
+        "register": "Zop2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ZIP1"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svzip1q[_s16]",
+    "arguments": [
+      "svint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.Q"
+      },
+      "op2": {
+        "register": "Zop2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ZIP1"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svzip1q[_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.Q"
+      },
+      "op2": {
+        "register": "Zop2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ZIP1"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svzip1q[_s64]",
+    "arguments": [
+      "svint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.Q"
+      },
+      "op2": {
+        "register": "Zop2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ZIP1"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svzip1q[_s8]",
+    "arguments": [
+      "svint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.Q"
+      },
+      "op2": {
+        "register": "Zop2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ZIP1"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svzip1q[_u16]",
+    "arguments": [
+      "svuint16_t op1",
+      "svuint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.Q"
+      },
+      "op2": {
+        "register": "Zop2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ZIP1"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svzip1q[_u32]",
+    "arguments": [
+      "svuint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.Q"
+      },
+      "op2": {
+        "register": "Zop2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ZIP1"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svzip1q[_u64]",
+    "arguments": [
+      "svuint64_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.Q"
+      },
+      "op2": {
+        "register": "Zop2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ZIP1"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svzip1q[_u8]",
+    "arguments": [
+      "svuint8_t op1",
+      "svuint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.Q"
+      },
+      "op2": {
+        "register": "Zop2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ZIP1"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svzip2[_f16]",
+    "arguments": [
+      "svfloat16_t op1",
+      "svfloat16_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ZIP2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svzip2[_f32]",
+    "arguments": [
+      "svfloat32_t op1",
+      "svfloat32_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ZIP2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svzip2[_f64]",
+    "arguments": [
+      "svfloat64_t op1",
+      "svfloat64_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ZIP2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svzip2[_s16]",
+    "arguments": [
+      "svint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ZIP2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svzip2[_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ZIP2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svzip2[_s64]",
+    "arguments": [
+      "svint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ZIP2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svzip2[_s8]",
+    "arguments": [
+      "svint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ZIP2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svzip2[_u16]",
+    "arguments": [
+      "svuint16_t op1",
+      "svuint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.H"
+      },
+      "op2": {
+        "register": "Zop2.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ZIP2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svzip2[_u32]",
+    "arguments": [
+      "svuint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.S"
+      },
+      "op2": {
+        "register": "Zop2.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ZIP2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svzip2[_u64]",
+    "arguments": [
+      "svuint64_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.D"
+      },
+      "op2": {
+        "register": "Zop2.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ZIP2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svzip2[_u8]",
+    "arguments": [
+      "svuint8_t op1",
+      "svuint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.B"
+      },
+      "op2": {
+        "register": "Zop2.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ZIP2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svzip2_b16",
+    "arguments": [
+      "svbool_t op1",
+      "svbool_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Pop1.H"
+      },
+      "op2": {
+        "register": "Pop2.H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ZIP2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svzip2_b32",
+    "arguments": [
+      "svbool_t op1",
+      "svbool_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Pop1.S"
+      },
+      "op2": {
+        "register": "Pop2.S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ZIP2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svzip2_b64",
+    "arguments": [
+      "svbool_t op1",
+      "svbool_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Pop1.D"
+      },
+      "op2": {
+        "register": "Pop2.D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ZIP2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svzip2_b8",
+    "arguments": [
+      "svbool_t op1",
+      "svbool_t op2"
+    ],
+    "return_type": {
+      "value": "svbool_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Pop1.B"
+      },
+      "op2": {
+        "register": "Pop2.B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ZIP2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svzip2q[_f16]",
+    "arguments": [
+      "svfloat16_t op1",
+      "svfloat16_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.Q"
+      },
+      "op2": {
+        "register": "Zop2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ZIP2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svzip2q[_f32]",
+    "arguments": [
+      "svfloat32_t op1",
+      "svfloat32_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.Q"
+      },
+      "op2": {
+        "register": "Zop2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ZIP2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svzip2q[_f64]",
+    "arguments": [
+      "svfloat64_t op1",
+      "svfloat64_t op2"
+    ],
+    "return_type": {
+      "value": "svfloat64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.Q"
+      },
+      "op2": {
+        "register": "Zop2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ZIP2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svzip2q[_s16]",
+    "arguments": [
+      "svint16_t op1",
+      "svint16_t op2"
+    ],
+    "return_type": {
+      "value": "svint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.Q"
+      },
+      "op2": {
+        "register": "Zop2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ZIP2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svzip2q[_s32]",
+    "arguments": [
+      "svint32_t op1",
+      "svint32_t op2"
+    ],
+    "return_type": {
+      "value": "svint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.Q"
+      },
+      "op2": {
+        "register": "Zop2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ZIP2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svzip2q[_s64]",
+    "arguments": [
+      "svint64_t op1",
+      "svint64_t op2"
+    ],
+    "return_type": {
+      "value": "svint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.Q"
+      },
+      "op2": {
+        "register": "Zop2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ZIP2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svzip2q[_s8]",
+    "arguments": [
+      "svint8_t op1",
+      "svint8_t op2"
+    ],
+    "return_type": {
+      "value": "svint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.Q"
+      },
+      "op2": {
+        "register": "Zop2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ZIP2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svzip2q[_u16]",
+    "arguments": [
+      "svuint16_t op1",
+      "svuint16_t op2"
+    ],
+    "return_type": {
+      "value": "svuint16_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.Q"
+      },
+      "op2": {
+        "register": "Zop2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ZIP2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svzip2q[_u32]",
+    "arguments": [
+      "svuint32_t op1",
+      "svuint32_t op2"
+    ],
+    "return_type": {
+      "value": "svuint32_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.Q"
+      },
+      "op2": {
+        "register": "Zop2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ZIP2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svzip2q[_u64]",
+    "arguments": [
+      "svuint64_t op1",
+      "svuint64_t op2"
+    ],
+    "return_type": {
+      "value": "svuint64_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.Q"
+      },
+      "op2": {
+        "register": "Zop2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ZIP2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "SVE",
+    "name": "svzip2q[_u8]",
+    "arguments": [
+      "svuint8_t op1",
+      "svuint8_t op2"
+    ],
+    "return_type": {
+      "value": "svuint8_t"
+    },
+    "Arguments_Preparation": {
+      "op1": {
+        "register": "Zop1.Q"
+      },
+      "op2": {
+        "register": "Zop2.Q"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ZIP2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vaba_s16",
+    "arguments": [
+      "int16x4_t a",
+      "int16x4_t b",
+      "int16x4_t c"
+    ],
+    "return_type": {
+      "value": "int16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.4H"
+      },
+      "b": {
+        "register": "Vn.4H"
+      },
+      "c": {
+        "register": "Vm.4H"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SABA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vaba_s32",
+    "arguments": [
+      "int32x2_t a",
+      "int32x2_t b",
+      "int32x2_t c"
+    ],
+    "return_type": {
+      "value": "int32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.2S"
+      },
+      "b": {
+        "register": "Vn.2S"
+      },
+      "c": {
+        "register": "Vm.2S"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SABA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vaba_s8",
+    "arguments": [
+      "int8x8_t a",
+      "int8x8_t b",
+      "int8x8_t c"
+    ],
+    "return_type": {
+      "value": "int8x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.8B"
+      },
+      "b": {
+        "register": "Vn.8B"
+      },
+      "c": {
+        "register": "Vm.8B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SABA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vaba_u16",
+    "arguments": [
+      "uint16x4_t a",
+      "uint16x4_t b",
+      "uint16x4_t c"
+    ],
+    "return_type": {
+      "value": "uint16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.4H"
+      },
+      "b": {
+        "register": "Vn.4H"
+      },
+      "c": {
+        "register": "Vm.4H"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UABA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vaba_u32",
+    "arguments": [
+      "uint32x2_t a",
+      "uint32x2_t b",
+      "uint32x2_t c"
+    ],
+    "return_type": {
+      "value": "uint32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.2S"
+      },
+      "b": {
+        "register": "Vn.2S"
+      },
+      "c": {
+        "register": "Vm.2S"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UABA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vaba_u8",
+    "arguments": [
+      "uint8x8_t a",
+      "uint8x8_t b",
+      "uint8x8_t c"
+    ],
+    "return_type": {
+      "value": "uint8x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.8B"
+      },
+      "b": {
+        "register": "Vn.8B"
+      },
+      "c": {
+        "register": "Vm.8B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UABA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vabal_high_s16",
+    "arguments": [
+      "int32x4_t a",
+      "int16x8_t b",
+      "int16x8_t c"
+    ],
+    "return_type": {
+      "value": "int32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.4S"
+      },
+      "b": {
+        "register": "Vn.8H"
+      },
+      "c": {
+        "register": "Vm.8H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SABAL2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vabal_high_s32",
+    "arguments": [
+      "int64x2_t a",
+      "int32x4_t b",
+      "int32x4_t c"
+    ],
+    "return_type": {
+      "value": "int64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.2D"
+      },
+      "b": {
+        "register": "Vn.4S"
+      },
+      "c": {
+        "register": "Vm.4S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SABAL2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vabal_high_s8",
+    "arguments": [
+      "int16x8_t a",
+      "int8x16_t b",
+      "int8x16_t c"
+    ],
+    "return_type": {
+      "value": "int16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.8H"
+      },
+      "b": {
+        "register": "Vn.16B"
+      },
+      "c": {
+        "register": "Vm.16B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SABAL2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vabal_high_u16",
+    "arguments": [
+      "uint32x4_t a",
+      "uint16x8_t b",
+      "uint16x8_t c"
+    ],
+    "return_type": {
+      "value": "uint32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.4S"
+      },
+      "b": {
+        "register": "Vn.8H"
+      },
+      "c": {
+        "register": "Vm.8H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UABAL2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vabal_high_u32",
+    "arguments": [
+      "uint64x2_t a",
+      "uint32x4_t b",
+      "uint32x4_t c"
+    ],
+    "return_type": {
+      "value": "uint64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.2D"
+      },
+      "b": {
+        "register": "Vn.4S"
+      },
+      "c": {
+        "register": "Vm.4S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UABAL2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vabal_high_u8",
+    "arguments": [
+      "uint16x8_t a",
+      "uint8x16_t b",
+      "uint8x16_t c"
+    ],
+    "return_type": {
+      "value": "uint16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.8H"
+      },
+      "b": {
+        "register": "Vn.16B"
+      },
+      "c": {
+        "register": "Vm.16B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UABAL2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vabal_s16",
+    "arguments": [
+      "int32x4_t a",
+      "int16x4_t b",
+      "int16x4_t c"
+    ],
+    "return_type": {
+      "value": "int32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.4S"
+      },
+      "b": {
+        "register": "Vn.4H"
+      },
+      "c": {
+        "register": "Vm.4H"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SABAL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vabal_s32",
+    "arguments": [
+      "int64x2_t a",
+      "int32x2_t b",
+      "int32x2_t c"
+    ],
+    "return_type": {
+      "value": "int64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.2D"
+      },
+      "b": {
+        "register": "Vn.2S"
+      },
+      "c": {
+        "register": "Vm.2S"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SABAL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vabal_s8",
+    "arguments": [
+      "int16x8_t a",
+      "int8x8_t b",
+      "int8x8_t c"
+    ],
+    "return_type": {
+      "value": "int16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.8H"
+      },
+      "b": {
+        "register": "Vn.8B"
+      },
+      "c": {
+        "register": "Vm.8B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SABAL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vabal_u16",
+    "arguments": [
+      "uint32x4_t a",
+      "uint16x4_t b",
+      "uint16x4_t c"
+    ],
+    "return_type": {
+      "value": "uint32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.4S"
+      },
+      "b": {
+        "register": "Vn.4H"
+      },
+      "c": {
+        "register": "Vm.4H"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UABAL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vabal_u32",
+    "arguments": [
+      "uint64x2_t a",
+      "uint32x2_t b",
+      "uint32x2_t c"
+    ],
+    "return_type": {
+      "value": "uint64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.2D"
+      },
+      "b": {
+        "register": "Vn.2S"
+      },
+      "c": {
+        "register": "Vm.2S"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UABAL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vabal_u8",
+    "arguments": [
+      "uint16x8_t a",
+      "uint8x8_t b",
+      "uint8x8_t c"
+    ],
+    "return_type": {
+      "value": "uint16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.8H"
+      },
+      "b": {
+        "register": "Vn.8B"
+      },
+      "c": {
+        "register": "Vm.8B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UABAL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vabaq_s16",
+    "arguments": [
+      "int16x8_t a",
+      "int16x8_t b",
+      "int16x8_t c"
+    ],
+    "return_type": {
+      "value": "int16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.8H"
+      },
+      "b": {
+        "register": "Vn.8H"
+      },
+      "c": {
+        "register": "Vm.8H"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SABA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vabaq_s32",
+    "arguments": [
+      "int32x4_t a",
+      "int32x4_t b",
+      "int32x4_t c"
+    ],
+    "return_type": {
+      "value": "int32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.4S"
+      },
+      "b": {
+        "register": "Vn.4S"
+      },
+      "c": {
+        "register": "Vm.4S"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SABA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vabaq_s8",
+    "arguments": [
+      "int8x16_t a",
+      "int8x16_t b",
+      "int8x16_t c"
+    ],
+    "return_type": {
+      "value": "int8x16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.16B"
+      },
+      "b": {
+        "register": "Vn.16B"
+      },
+      "c": {
+        "register": "Vm.16B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SABA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vabaq_u16",
+    "arguments": [
+      "uint16x8_t a",
+      "uint16x8_t b",
+      "uint16x8_t c"
+    ],
+    "return_type": {
+      "value": "uint16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.8H"
+      },
+      "b": {
+        "register": "Vn.8H"
+      },
+      "c": {
+        "register": "Vm.8H"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UABA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vabaq_u32",
+    "arguments": [
+      "uint32x4_t a",
+      "uint32x4_t b",
+      "uint32x4_t c"
+    ],
+    "return_type": {
+      "value": "uint32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.4S"
+      },
+      "b": {
+        "register": "Vn.4S"
+      },
+      "c": {
+        "register": "Vm.4S"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UABA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vabaq_u8",
+    "arguments": [
+      "uint8x16_t a",
+      "uint8x16_t b",
+      "uint8x16_t c"
+    ],
+    "return_type": {
+      "value": "uint8x16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.16B"
+      },
+      "b": {
+        "register": "Vn.16B"
+      },
+      "c": {
+        "register": "Vm.16B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UABA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vabd_f16",
+    "arguments": [
+      "float16x4_t a",
+      "float16x4_t b"
+    ],
+    "return_type": {
+      "value": "float16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4H"
+      },
+      "b": {
+        "register": "Vm.4H"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FABD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vabd_f32",
+    "arguments": [
+      "float32x2_t a",
+      "float32x2_t b"
+    ],
+    "return_type": {
+      "value": "float32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2S"
+      },
+      "b": {
+        "register": "Vm.2S"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FABD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vabd_f64",
+    "arguments": [
+      "float64x1_t a",
+      "float64x1_t b"
+    ],
+    "return_type": {
+      "value": "float64x1_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      },
+      "b": {
+        "register": "Dm"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FABD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vabd_s16",
+    "arguments": [
+      "int16x4_t a",
+      "int16x4_t b"
+    ],
+    "return_type": {
+      "value": "int16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4H"
+      },
+      "b": {
+        "register": "Vm.4H"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SABD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vabd_s32",
+    "arguments": [
+      "int32x2_t a",
+      "int32x2_t b"
+    ],
+    "return_type": {
+      "value": "int32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2S"
+      },
+      "b": {
+        "register": "Vm.2S"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SABD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vabd_s8",
+    "arguments": [
+      "int8x8_t a",
+      "int8x8_t b"
+    ],
+    "return_type": {
+      "value": "int8x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8B"
+      },
+      "b": {
+        "register": "Vm.8B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SABD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vabd_u16",
+    "arguments": [
+      "uint16x4_t a",
+      "uint16x4_t b"
+    ],
+    "return_type": {
+      "value": "uint16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4H"
+      },
+      "b": {
+        "register": "Vm.4H"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UABD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vabd_u32",
+    "arguments": [
+      "uint32x2_t a",
+      "uint32x2_t b"
+    ],
+    "return_type": {
+      "value": "uint32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2S"
+      },
+      "b": {
+        "register": "Vm.2S"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UABD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vabd_u8",
+    "arguments": [
+      "uint8x8_t a",
+      "uint8x8_t b"
+    ],
+    "return_type": {
+      "value": "uint8x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8B"
+      },
+      "b": {
+        "register": "Vm.8B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UABD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vabdd_f64",
+    "arguments": [
+      "float64_t a",
+      "float64_t b"
+    ],
+    "return_type": {
+      "value": "float64_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      },
+      "b": {
+        "register": "Dm"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FABD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vabdh_f16",
+    "arguments": [
+      "float16_t a",
+      "float16_t b"
+    ],
+    "return_type": {
+      "value": "float16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Hn"
+      },
+      "b": {
+        "register": "Hm"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FABD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vabdl_high_s16",
+    "arguments": [
+      "int16x8_t a",
+      "int16x8_t b"
+    ],
+    "return_type": {
+      "value": "int32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      },
+      "b": {
+        "register": "Vm.8H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SABDL2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vabdl_high_s32",
+    "arguments": [
+      "int32x4_t a",
+      "int32x4_t b"
+    ],
+    "return_type": {
+      "value": "int64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4S"
+      },
+      "b": {
+        "register": "Vm.4S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SABDL2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vabdl_high_s8",
+    "arguments": [
+      "int8x16_t a",
+      "int8x16_t b"
+    ],
+    "return_type": {
+      "value": "int16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      },
+      "b": {
+        "register": "Vm.16B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SABDL2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vabdl_high_u16",
+    "arguments": [
+      "uint16x8_t a",
+      "uint16x8_t b"
+    ],
+    "return_type": {
+      "value": "uint32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      },
+      "b": {
+        "register": "Vm.8H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UABDL2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vabdl_high_u32",
+    "arguments": [
+      "uint32x4_t a",
+      "uint32x4_t b"
+    ],
+    "return_type": {
+      "value": "uint64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4S"
+      },
+      "b": {
+        "register": "Vm.4S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UABDL2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vabdl_high_u8",
+    "arguments": [
+      "uint8x16_t a",
+      "uint8x16_t b"
+    ],
+    "return_type": {
+      "value": "uint16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      },
+      "b": {
+        "register": "Vm.16B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UABDL2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vabdl_s16",
+    "arguments": [
+      "int16x4_t a",
+      "int16x4_t b"
+    ],
+    "return_type": {
+      "value": "int32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4H"
+      },
+      "b": {
+        "register": "Vm.4H"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SABDL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vabdl_s32",
+    "arguments": [
+      "int32x2_t a",
+      "int32x2_t b"
+    ],
+    "return_type": {
+      "value": "int64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2S"
+      },
+      "b": {
+        "register": "Vm.2S"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SABDL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vabdl_s8",
+    "arguments": [
+      "int8x8_t a",
+      "int8x8_t b"
+    ],
+    "return_type": {
+      "value": "int16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8B"
+      },
+      "b": {
+        "register": "Vm.8B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SABDL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vabdl_u16",
+    "arguments": [
+      "uint16x4_t a",
+      "uint16x4_t b"
+    ],
+    "return_type": {
+      "value": "uint32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4H"
+      },
+      "b": {
+        "register": "Vm.4H"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UABDL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vabdl_u32",
+    "arguments": [
+      "uint32x2_t a",
+      "uint32x2_t b"
+    ],
+    "return_type": {
+      "value": "uint64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2S"
+      },
+      "b": {
+        "register": "Vm.2S"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UABDL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vabdl_u8",
+    "arguments": [
+      "uint8x8_t a",
+      "uint8x8_t b"
+    ],
+    "return_type": {
+      "value": "uint16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8B"
+      },
+      "b": {
+        "register": "Vm.8B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UABDL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vabdq_f16",
+    "arguments": [
+      "float16x8_t a",
+      "float16x8_t b"
+    ],
+    "return_type": {
+      "value": "float16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      },
+      "b": {
+        "register": "Vm.8H"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FABD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vabdq_f32",
+    "arguments": [
+      "float32x4_t a",
+      "float32x4_t b"
+    ],
+    "return_type": {
+      "value": "float32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4S"
+      },
+      "b": {
+        "register": "Vm.4S"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FABD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vabdq_f64",
+    "arguments": [
+      "float64x2_t a",
+      "float64x2_t b"
+    ],
+    "return_type": {
+      "value": "float64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2D"
+      },
+      "b": {
+        "register": "Vm.2D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FABD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vabdq_s16",
+    "arguments": [
+      "int16x8_t a",
+      "int16x8_t b"
+    ],
+    "return_type": {
+      "value": "int16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      },
+      "b": {
+        "register": "Vm.8H"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SABD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vabdq_s32",
+    "arguments": [
+      "int32x4_t a",
+      "int32x4_t b"
+    ],
+    "return_type": {
+      "value": "int32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4S"
+      },
+      "b": {
+        "register": "Vm.4S"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SABD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vabdq_s8",
+    "arguments": [
+      "int8x16_t a",
+      "int8x16_t b"
+    ],
+    "return_type": {
+      "value": "int8x16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      },
+      "b": {
+        "register": "Vm.16B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SABD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vabdq_u16",
+    "arguments": [
+      "uint16x8_t a",
+      "uint16x8_t b"
+    ],
+    "return_type": {
+      "value": "uint16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      },
+      "b": {
+        "register": "Vm.8H"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UABD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vabdq_u32",
+    "arguments": [
+      "uint32x4_t a",
+      "uint32x4_t b"
+    ],
+    "return_type": {
+      "value": "uint32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4S"
+      },
+      "b": {
+        "register": "Vm.4S"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UABD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vabdq_u8",
+    "arguments": [
+      "uint8x16_t a",
+      "uint8x16_t b"
+    ],
+    "return_type": {
+      "value": "uint8x16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      },
+      "b": {
+        "register": "Vm.16B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UABD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vabds_f32",
+    "arguments": [
+      "float32_t a",
+      "float32_t b"
+    ],
+    "return_type": {
+      "value": "float32_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Sn"
+      },
+      "b": {
+        "register": "Sm"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FABD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vabs_f16",
+    "arguments": [
+      "float16x4_t a"
+    ],
+    "return_type": {
+      "value": "float16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4H"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FABS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vabs_f32",
+    "arguments": [
+      "float32x2_t a"
+    ],
+    "return_type": {
+      "value": "float32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2S"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FABS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vabs_f64",
+    "arguments": [
+      "float64x1_t a"
+    ],
+    "return_type": {
+      "value": "float64x1_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FABS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vabs_s16",
+    "arguments": [
+      "int16x4_t a"
+    ],
+    "return_type": {
+      "value": "int16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4H"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ABS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vabs_s32",
+    "arguments": [
+      "int32x2_t a"
+    ],
+    "return_type": {
+      "value": "int32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2S"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ABS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vabs_s64",
+    "arguments": [
+      "int64x1_t a"
+    ],
+    "return_type": {
+      "value": "int64x1_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ABS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vabs_s8",
+    "arguments": [
+      "int8x8_t a"
+    ],
+    "return_type": {
+      "value": "int8x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ABS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vabsd_s64",
+    "arguments": [
+      "int64_t a"
+    ],
+    "return_type": {
+      "value": "int64_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ABS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vabsh_f16",
+    "arguments": [
+      "float16_t a"
+    ],
+    "return_type": {
+      "value": "float16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Hn"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FABS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vabsq_f16",
+    "arguments": [
+      "float16x8_t a"
+    ],
+    "return_type": {
+      "value": "float16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FABS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vabsq_f32",
+    "arguments": [
+      "float32x4_t a"
+    ],
+    "return_type": {
+      "value": "float32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4S"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FABS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vabsq_f64",
+    "arguments": [
+      "float64x2_t a"
+    ],
+    "return_type": {
+      "value": "float64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FABS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vabsq_s16",
+    "arguments": [
+      "int16x8_t a"
+    ],
+    "return_type": {
+      "value": "int16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ABS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vabsq_s32",
+    "arguments": [
+      "int32x4_t a"
+    ],
+    "return_type": {
+      "value": "int32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4S"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ABS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vabsq_s64",
+    "arguments": [
+      "int64x2_t a"
+    ],
+    "return_type": {
+      "value": "int64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ABS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vabsq_s8",
+    "arguments": [
+      "int8x16_t a"
+    ],
+    "return_type": {
+      "value": "int8x16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ABS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vadd_f16",
+    "arguments": [
+      "float16x4_t a",
+      "float16x4_t b"
+    ],
+    "return_type": {
+      "value": "float16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4H"
+      },
+      "b": {
+        "register": "Vm.4H"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vadd_f32",
+    "arguments": [
+      "float32x2_t a",
+      "float32x2_t b"
+    ],
+    "return_type": {
+      "value": "float32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2S"
+      },
+      "b": {
+        "register": "Vm.2S"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vadd_f64",
+    "arguments": [
+      "float64x1_t a",
+      "float64x1_t b"
+    ],
+    "return_type": {
+      "value": "float64x1_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      },
+      "b": {
+        "register": "Dm"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vadd_p16",
+    "arguments": [
+      "poly16x4_t a",
+      "poly16x4_t b"
+    ],
+    "return_type": {
+      "value": "poly16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8B"
+      },
+      "b": {
+        "register": "Vm.8B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EOR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vadd_p64",
+    "arguments": [
+      "poly64x1_t a",
+      "poly64x1_t b"
+    ],
+    "return_type": {
+      "value": "poly64x1_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8B"
+      },
+      "b": {
+        "register": "Vm.8B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EOR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vadd_p8",
+    "arguments": [
+      "poly8x8_t a",
+      "poly8x8_t b"
+    ],
+    "return_type": {
+      "value": "poly8x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8B"
+      },
+      "b": {
+        "register": "Vm.8B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EOR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vadd_s16",
+    "arguments": [
+      "int16x4_t a",
+      "int16x4_t b"
+    ],
+    "return_type": {
+      "value": "int16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4H"
+      },
+      "b": {
+        "register": "Vm.4H"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vadd_s32",
+    "arguments": [
+      "int32x2_t a",
+      "int32x2_t b"
+    ],
+    "return_type": {
+      "value": "int32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2S"
+      },
+      "b": {
+        "register": "Vm.2S"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vadd_s64",
+    "arguments": [
+      "int64x1_t a",
+      "int64x1_t b"
+    ],
+    "return_type": {
+      "value": "int64x1_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      },
+      "b": {
+        "register": "Dm"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vadd_s8",
+    "arguments": [
+      "int8x8_t a",
+      "int8x8_t b"
+    ],
+    "return_type": {
+      "value": "int8x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8B"
+      },
+      "b": {
+        "register": "Vm.8B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vadd_u16",
+    "arguments": [
+      "uint16x4_t a",
+      "uint16x4_t b"
+    ],
+    "return_type": {
+      "value": "uint16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4H"
+      },
+      "b": {
+        "register": "Vm.4H"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vadd_u32",
+    "arguments": [
+      "uint32x2_t a",
+      "uint32x2_t b"
+    ],
+    "return_type": {
+      "value": "uint32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2S"
+      },
+      "b": {
+        "register": "Vm.2S"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vadd_u64",
+    "arguments": [
+      "uint64x1_t a",
+      "uint64x1_t b"
+    ],
+    "return_type": {
+      "value": "uint64x1_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      },
+      "b": {
+        "register": "Dm"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vadd_u8",
+    "arguments": [
+      "uint8x8_t a",
+      "uint8x8_t b"
+    ],
+    "return_type": {
+      "value": "uint8x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8B"
+      },
+      "b": {
+        "register": "Vm.8B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vaddd_s64",
+    "arguments": [
+      "int64_t a",
+      "int64_t b"
+    ],
+    "return_type": {
+      "value": "int64_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      },
+      "b": {
+        "register": "Dm"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vaddd_u64",
+    "arguments": [
+      "uint64_t a",
+      "uint64_t b"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      },
+      "b": {
+        "register": "Dm"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vaddh_f16",
+    "arguments": [
+      "float16_t a",
+      "float16_t b"
+    ],
+    "return_type": {
+      "value": "float16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Hn"
+      },
+      "b": {
+        "register": "Hm"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vaddhn_high_s16",
+    "arguments": [
+      "int8x8_t r",
+      "int16x8_t a",
+      "int16x8_t b"
+    ],
+    "return_type": {
+      "value": "int8x16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      },
+      "b": {
+        "register": "Vm.8H"
+      },
+      "r": {
+        "register": "Vd.8B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ADDHN2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vaddhn_high_s32",
+    "arguments": [
+      "int16x4_t r",
+      "int32x4_t a",
+      "int32x4_t b"
+    ],
+    "return_type": {
+      "value": "int16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4S"
+      },
+      "b": {
+        "register": "Vm.4S"
+      },
+      "r": {
+        "register": "Vd.4H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ADDHN2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vaddhn_high_s64",
+    "arguments": [
+      "int32x2_t r",
+      "int64x2_t a",
+      "int64x2_t b"
+    ],
+    "return_type": {
+      "value": "int32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2D"
+      },
+      "b": {
+        "register": "Vm.2D"
+      },
+      "r": {
+        "register": "Vd.2S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ADDHN2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vaddhn_high_u16",
+    "arguments": [
+      "uint8x8_t r",
+      "uint16x8_t a",
+      "uint16x8_t b"
+    ],
+    "return_type": {
+      "value": "uint8x16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      },
+      "b": {
+        "register": "Vm.8H"
+      },
+      "r": {
+        "register": "Vd.8B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ADDHN2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vaddhn_high_u32",
+    "arguments": [
+      "uint16x4_t r",
+      "uint32x4_t a",
+      "uint32x4_t b"
+    ],
+    "return_type": {
+      "value": "uint16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4S"
+      },
+      "b": {
+        "register": "Vm.4S"
+      },
+      "r": {
+        "register": "Vd.4H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ADDHN2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vaddhn_high_u64",
+    "arguments": [
+      "uint32x2_t r",
+      "uint64x2_t a",
+      "uint64x2_t b"
+    ],
+    "return_type": {
+      "value": "uint32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2D"
+      },
+      "b": {
+        "register": "Vm.2D"
+      },
+      "r": {
+        "register": "Vd.2S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ADDHN2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vaddhn_s16",
+    "arguments": [
+      "int16x8_t a",
+      "int16x8_t b"
+    ],
+    "return_type": {
+      "value": "int8x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      },
+      "b": {
+        "register": "Vm.8H"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ADDHN"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vaddhn_s32",
+    "arguments": [
+      "int32x4_t a",
+      "int32x4_t b"
+    ],
+    "return_type": {
+      "value": "int16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4S"
+      },
+      "b": {
+        "register": "Vm.4S"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ADDHN"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vaddhn_s64",
+    "arguments": [
+      "int64x2_t a",
+      "int64x2_t b"
+    ],
+    "return_type": {
+      "value": "int32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2D"
+      },
+      "b": {
+        "register": "Vm.2D"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ADDHN"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vaddhn_u16",
+    "arguments": [
+      "uint16x8_t a",
+      "uint16x8_t b"
+    ],
+    "return_type": {
+      "value": "uint8x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      },
+      "b": {
+        "register": "Vm.8H"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ADDHN"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vaddhn_u32",
+    "arguments": [
+      "uint32x4_t a",
+      "uint32x4_t b"
+    ],
+    "return_type": {
+      "value": "uint16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4S"
+      },
+      "b": {
+        "register": "Vm.4S"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ADDHN"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vaddhn_u64",
+    "arguments": [
+      "uint64x2_t a",
+      "uint64x2_t b"
+    ],
+    "return_type": {
+      "value": "uint32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2D"
+      },
+      "b": {
+        "register": "Vm.2D"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ADDHN"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vaddl_high_s16",
+    "arguments": [
+      "int16x8_t a",
+      "int16x8_t b"
+    ],
+    "return_type": {
+      "value": "int32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      },
+      "b": {
+        "register": "Vm.8H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SADDL2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vaddl_high_s32",
+    "arguments": [
+      "int32x4_t a",
+      "int32x4_t b"
+    ],
+    "return_type": {
+      "value": "int64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4S"
+      },
+      "b": {
+        "register": "Vm.4S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SADDL2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vaddl_high_s8",
+    "arguments": [
+      "int8x16_t a",
+      "int8x16_t b"
+    ],
+    "return_type": {
+      "value": "int16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      },
+      "b": {
+        "register": "Vm.16B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SADDL2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vaddl_high_u16",
+    "arguments": [
+      "uint16x8_t a",
+      "uint16x8_t b"
+    ],
+    "return_type": {
+      "value": "uint32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      },
+      "b": {
+        "register": "Vm.8H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UADDL2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vaddl_high_u32",
+    "arguments": [
+      "uint32x4_t a",
+      "uint32x4_t b"
+    ],
+    "return_type": {
+      "value": "uint64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4S"
+      },
+      "b": {
+        "register": "Vm.4S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UADDL2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vaddl_high_u8",
+    "arguments": [
+      "uint8x16_t a",
+      "uint8x16_t b"
+    ],
+    "return_type": {
+      "value": "uint16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      },
+      "b": {
+        "register": "Vm.16B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UADDL2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vaddl_s16",
+    "arguments": [
+      "int16x4_t a",
+      "int16x4_t b"
+    ],
+    "return_type": {
+      "value": "int32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4H"
+      },
+      "b": {
+        "register": "Vm.4H"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SADDL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vaddl_s32",
+    "arguments": [
+      "int32x2_t a",
+      "int32x2_t b"
+    ],
+    "return_type": {
+      "value": "int64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2S"
+      },
+      "b": {
+        "register": "Vm.2S"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SADDL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vaddl_s8",
+    "arguments": [
+      "int8x8_t a",
+      "int8x8_t b"
+    ],
+    "return_type": {
+      "value": "int16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8B"
+      },
+      "b": {
+        "register": "Vm.8B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SADDL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vaddl_u16",
+    "arguments": [
+      "uint16x4_t a",
+      "uint16x4_t b"
+    ],
+    "return_type": {
+      "value": "uint32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4H"
+      },
+      "b": {
+        "register": "Vm.4H"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UADDL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vaddl_u32",
+    "arguments": [
+      "uint32x2_t a",
+      "uint32x2_t b"
+    ],
+    "return_type": {
+      "value": "uint64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2S"
+      },
+      "b": {
+        "register": "Vm.2S"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UADDL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vaddl_u8",
+    "arguments": [
+      "uint8x8_t a",
+      "uint8x8_t b"
+    ],
+    "return_type": {
+      "value": "uint16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8B"
+      },
+      "b": {
+        "register": "Vm.8B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UADDL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vaddlv_s16",
+    "arguments": [
+      "int16x4_t a"
+    ],
+    "return_type": {
+      "value": "int32_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SADDLV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vaddlv_s32",
+    "arguments": [
+      "int32x2_t a"
+    ],
+    "return_type": {
+      "value": "int64_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SADDLP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vaddlv_s8",
+    "arguments": [
+      "int8x8_t a"
+    ],
+    "return_type": {
+      "value": "int16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SADDLV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vaddlv_u16",
+    "arguments": [
+      "uint16x4_t a"
+    ],
+    "return_type": {
+      "value": "uint32_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UADDLV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vaddlv_u32",
+    "arguments": [
+      "uint32x2_t a"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UADDLP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vaddlv_u8",
+    "arguments": [
+      "uint8x8_t a"
+    ],
+    "return_type": {
+      "value": "uint16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UADDLV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vaddlvq_s16",
+    "arguments": [
+      "int16x8_t a"
+    ],
+    "return_type": {
+      "value": "int32_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SADDLV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vaddlvq_s32",
+    "arguments": [
+      "int32x4_t a"
+    ],
+    "return_type": {
+      "value": "int64_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SADDLV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vaddlvq_s8",
+    "arguments": [
+      "int8x16_t a"
+    ],
+    "return_type": {
+      "value": "int16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SADDLV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vaddlvq_u16",
+    "arguments": [
+      "uint16x8_t a"
+    ],
+    "return_type": {
+      "value": "uint32_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UADDLV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vaddlvq_u32",
+    "arguments": [
+      "uint32x4_t a"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UADDLV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vaddlvq_u8",
+    "arguments": [
+      "uint8x16_t a"
+    ],
+    "return_type": {
+      "value": "uint16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UADDLV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vaddq_f16",
+    "arguments": [
+      "float16x8_t a",
+      "float16x8_t b"
+    ],
+    "return_type": {
+      "value": "float16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      },
+      "b": {
+        "register": "Vm.8H"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vaddq_f32",
+    "arguments": [
+      "float32x4_t a",
+      "float32x4_t b"
+    ],
+    "return_type": {
+      "value": "float32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4S"
+      },
+      "b": {
+        "register": "Vm.4S"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vaddq_f64",
+    "arguments": [
+      "float64x2_t a",
+      "float64x2_t b"
+    ],
+    "return_type": {
+      "value": "float64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2D"
+      },
+      "b": {
+        "register": "Vm.2D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vaddq_p128",
+    "arguments": [
+      "poly128_t a",
+      "poly128_t b"
+    ],
+    "return_type": {
+      "value": "poly128_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      },
+      "b": {
+        "register": "Vm.16B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EOR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vaddq_p16",
+    "arguments": [
+      "poly16x8_t a",
+      "poly16x8_t b"
+    ],
+    "return_type": {
+      "value": "poly16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      },
+      "b": {
+        "register": "Vm.16B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EOR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vaddq_p64",
+    "arguments": [
+      "poly64x2_t a",
+      "poly64x2_t b"
+    ],
+    "return_type": {
+      "value": "poly64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      },
+      "b": {
+        "register": "Vm.16B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EOR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vaddq_p8",
+    "arguments": [
+      "poly8x16_t a",
+      "poly8x16_t b"
+    ],
+    "return_type": {
+      "value": "poly8x16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      },
+      "b": {
+        "register": "Vm.16B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EOR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vaddq_s16",
+    "arguments": [
+      "int16x8_t a",
+      "int16x8_t b"
+    ],
+    "return_type": {
+      "value": "int16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      },
+      "b": {
+        "register": "Vm.8H"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vaddq_s32",
+    "arguments": [
+      "int32x4_t a",
+      "int32x4_t b"
+    ],
+    "return_type": {
+      "value": "int32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4S"
+      },
+      "b": {
+        "register": "Vm.4S"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vaddq_s64",
+    "arguments": [
+      "int64x2_t a",
+      "int64x2_t b"
+    ],
+    "return_type": {
+      "value": "int64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2D"
+      },
+      "b": {
+        "register": "Vm.2D"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vaddq_s8",
+    "arguments": [
+      "int8x16_t a",
+      "int8x16_t b"
+    ],
+    "return_type": {
+      "value": "int8x16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      },
+      "b": {
+        "register": "Vm.16B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vaddq_u16",
+    "arguments": [
+      "uint16x8_t a",
+      "uint16x8_t b"
+    ],
+    "return_type": {
+      "value": "uint16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      },
+      "b": {
+        "register": "Vm.8H"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vaddq_u32",
+    "arguments": [
+      "uint32x4_t a",
+      "uint32x4_t b"
+    ],
+    "return_type": {
+      "value": "uint32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4S"
+      },
+      "b": {
+        "register": "Vm.4S"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vaddq_u64",
+    "arguments": [
+      "uint64x2_t a",
+      "uint64x2_t b"
+    ],
+    "return_type": {
+      "value": "uint64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2D"
+      },
+      "b": {
+        "register": "Vm.2D"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vaddq_u8",
+    "arguments": [
+      "uint8x16_t a",
+      "uint8x16_t b"
+    ],
+    "return_type": {
+      "value": "uint8x16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      },
+      "b": {
+        "register": "Vm.16B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vaddv_f32",
+    "arguments": [
+      "float32x2_t a"
+    ],
+    "return_type": {
+      "value": "float32_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FADDP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vaddv_s16",
+    "arguments": [
+      "int16x4_t a"
+    ],
+    "return_type": {
+      "value": "int16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ADDV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vaddv_s32",
+    "arguments": [
+      "int32x2_t a"
+    ],
+    "return_type": {
+      "value": "int32_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vm.2S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ADDP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vaddv_s8",
+    "arguments": [
+      "int8x8_t a"
+    ],
+    "return_type": {
+      "value": "int8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ADDV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vaddv_u16",
+    "arguments": [
+      "uint16x4_t a"
+    ],
+    "return_type": {
+      "value": "uint16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ADDV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vaddv_u32",
+    "arguments": [
+      "uint32x2_t a"
+    ],
+    "return_type": {
+      "value": "uint32_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vm.2S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ADDP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vaddv_u8",
+    "arguments": [
+      "uint8x8_t a"
+    ],
+    "return_type": {
+      "value": "uint8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ADDV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vaddvq_f32",
+    "arguments": [
+      "float32x4_t a"
+    ],
+    "return_type": {
+      "value": "float32_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vm.4S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FADDP",
+        "FADDP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vaddvq_f64",
+    "arguments": [
+      "float64x2_t a"
+    ],
+    "return_type": {
+      "value": "float64_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FADDP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vaddvq_s16",
+    "arguments": [
+      "int16x8_t a"
+    ],
+    "return_type": {
+      "value": "int16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ADDV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vaddvq_s32",
+    "arguments": [
+      "int32x4_t a"
+    ],
+    "return_type": {
+      "value": "int32_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ADDV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vaddvq_s64",
+    "arguments": [
+      "int64x2_t a"
+    ],
+    "return_type": {
+      "value": "int64_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ADDP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vaddvq_s8",
+    "arguments": [
+      "int8x16_t a"
+    ],
+    "return_type": {
+      "value": "int8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ADDV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vaddvq_u16",
+    "arguments": [
+      "uint16x8_t a"
+    ],
+    "return_type": {
+      "value": "uint16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ADDV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vaddvq_u32",
+    "arguments": [
+      "uint32x4_t a"
+    ],
+    "return_type": {
+      "value": "uint32_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ADDV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vaddvq_u64",
+    "arguments": [
+      "uint64x2_t a"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ADDP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vaddvq_u8",
+    "arguments": [
+      "uint8x16_t a"
+    ],
+    "return_type": {
+      "value": "uint8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ADDV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vaddw_high_s16",
+    "arguments": [
+      "int32x4_t a",
+      "int16x8_t b"
+    ],
+    "return_type": {
+      "value": "int32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4S"
+      },
+      "b": {
+        "register": "Vm.8H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SADDW2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vaddw_high_s32",
+    "arguments": [
+      "int64x2_t a",
+      "int32x4_t b"
+    ],
+    "return_type": {
+      "value": "int64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2D"
+      },
+      "b": {
+        "register": "Vm.4S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SADDW2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vaddw_high_s8",
+    "arguments": [
+      "int16x8_t a",
+      "int8x16_t b"
+    ],
+    "return_type": {
+      "value": "int16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      },
+      "b": {
+        "register": "Vm.16B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SADDW2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vaddw_high_u16",
+    "arguments": [
+      "uint32x4_t a",
+      "uint16x8_t b"
+    ],
+    "return_type": {
+      "value": "uint32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4S"
+      },
+      "b": {
+        "register": "Vm.8H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UADDW2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vaddw_high_u32",
+    "arguments": [
+      "uint64x2_t a",
+      "uint32x4_t b"
+    ],
+    "return_type": {
+      "value": "uint64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2D"
+      },
+      "b": {
+        "register": "Vm.4S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UADDW2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vaddw_high_u8",
+    "arguments": [
+      "uint16x8_t a",
+      "uint8x16_t b"
+    ],
+    "return_type": {
+      "value": "uint16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      },
+      "b": {
+        "register": "Vm.16B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UADDW2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vaddw_s16",
+    "arguments": [
+      "int32x4_t a",
+      "int16x4_t b"
+    ],
+    "return_type": {
+      "value": "int32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4S"
+      },
+      "b": {
+        "register": "Vm.4H"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SADDW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vaddw_s32",
+    "arguments": [
+      "int64x2_t a",
+      "int32x2_t b"
+    ],
+    "return_type": {
+      "value": "int64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2D"
+      },
+      "b": {
+        "register": "Vm.2S"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SADDW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vaddw_s8",
+    "arguments": [
+      "int16x8_t a",
+      "int8x8_t b"
+    ],
+    "return_type": {
+      "value": "int16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      },
+      "b": {
+        "register": "Vm.8B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SADDW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vaddw_u16",
+    "arguments": [
+      "uint32x4_t a",
+      "uint16x4_t b"
+    ],
+    "return_type": {
+      "value": "uint32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4S"
+      },
+      "b": {
+        "register": "Vm.4H"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UADDW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vaddw_u32",
+    "arguments": [
+      "uint64x2_t a",
+      "uint32x2_t b"
+    ],
+    "return_type": {
+      "value": "uint64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2D"
+      },
+      "b": {
+        "register": "Vm.2S"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UADDW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vaddw_u8",
+    "arguments": [
+      "uint16x8_t a",
+      "uint8x8_t b"
+    ],
+    "return_type": {
+      "value": "uint16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      },
+      "b": {
+        "register": "Vm.8B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UADDW"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vaesdq_u8",
+    "arguments": [
+      "uint8x16_t data",
+      "uint8x16_t key"
+    ],
+    "return_type": {
+      "value": "uint8x16_t"
+    },
+    "Arguments_Preparation": {
+      "data": {
+        "register": "Vd.16B"
+      },
+      "key": {
+        "register": "Vn.16B"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "AESD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vaeseq_u8",
+    "arguments": [
+      "uint8x16_t data",
+      "uint8x16_t key"
+    ],
+    "return_type": {
+      "value": "uint8x16_t"
+    },
+    "Arguments_Preparation": {
+      "data": {
+        "register": "Vd.16B"
+      },
+      "key": {
+        "register": "Vn.16B"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "AESE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vaesimcq_u8",
+    "arguments": [
+      "uint8x16_t data"
+    ],
+    "return_type": {
+      "value": "uint8x16_t"
+    },
+    "Arguments_Preparation": {
+      "data": {
+        "register": "Vn.16B"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "AESIMC"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vaesmcq_u8",
+    "arguments": [
+      "uint8x16_t data"
+    ],
+    "return_type": {
+      "value": "uint8x16_t"
+    },
+    "Arguments_Preparation": {
+      "data": {
+        "register": "Vn.16B"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "AESMC"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vamax_f16",
+    "arguments": [
+      "float16x4_t vn",
+      "float16x4_t vm"
+    ],
+    "return_type": {
+      "value": "float16x4_t"
+    },
+    "Arguments_Preparation": {
+      "vm": {
+        "register": "Vm.4H"
+      },
+      "vn": {
+        "register": "Vn.4H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FAMAX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vamax_f32",
+    "arguments": [
+      "float32x2_t vn",
+      "float32x2_t vm"
+    ],
+    "return_type": {
+      "value": "float32x2_t"
+    },
+    "Arguments_Preparation": {
+      "vm": {
+        "register": "Vm.2S"
+      },
+      "vn": {
+        "register": "Vn.2S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FAMAX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vamaxq_f16",
+    "arguments": [
+      "float16x8_t vn",
+      "float16x8_t vm"
+    ],
+    "return_type": {
+      "value": "float16x8_t"
+    },
+    "Arguments_Preparation": {
+      "vm": {
+        "register": "Vm.8H"
+      },
+      "vn": {
+        "register": "Vn.8H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FAMAX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vamaxq_f32",
+    "arguments": [
+      "float32x4_t vn",
+      "float32x4_t vm"
+    ],
+    "return_type": {
+      "value": "float32x4_t"
+    },
+    "Arguments_Preparation": {
+      "vm": {
+        "register": "Vm.4S"
+      },
+      "vn": {
+        "register": "Vn.4S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FAMAX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vamaxq_f64",
+    "arguments": [
+      "float64x2_t vn",
+      "float64x2_t vm"
+    ],
+    "return_type": {
+      "value": "float64x2_t"
+    },
+    "Arguments_Preparation": {
+      "vm": {
+        "register": "Vm.2D"
+      },
+      "vn": {
+        "register": "Vn.2D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FAMAX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vamin_f16",
+    "arguments": [
+      "float16x4_t vn",
+      "float16x4_t vm"
+    ],
+    "return_type": {
+      "value": "float16x4_t"
+    },
+    "Arguments_Preparation": {
+      "vm": {
+        "register": "Vm.4H"
+      },
+      "vn": {
+        "register": "Vn.4H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FAMIN"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vamin_f32",
+    "arguments": [
+      "float32x2_t vn",
+      "float32x2_t vm"
+    ],
+    "return_type": {
+      "value": "float32x2_t"
+    },
+    "Arguments_Preparation": {
+      "vm": {
+        "register": "Vm.2S"
+      },
+      "vn": {
+        "register": "Vn.2S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FAMIN"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vaminq_f16",
+    "arguments": [
+      "float16x8_t vn",
+      "float16x8_t vm"
+    ],
+    "return_type": {
+      "value": "float16x8_t"
+    },
+    "Arguments_Preparation": {
+      "vm": {
+        "register": "Vm.8H"
+      },
+      "vn": {
+        "register": "Vn.8H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FAMIN"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vaminq_f32",
+    "arguments": [
+      "float32x4_t vn",
+      "float32x4_t vm"
+    ],
+    "return_type": {
+      "value": "float32x4_t"
+    },
+    "Arguments_Preparation": {
+      "vm": {
+        "register": "Vm.4S"
+      },
+      "vn": {
+        "register": "Vn.4S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FAMIN"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vaminq_f64",
+    "arguments": [
+      "float64x2_t vn",
+      "float64x2_t vm"
+    ],
+    "return_type": {
+      "value": "float64x2_t"
+    },
+    "Arguments_Preparation": {
+      "vm": {
+        "register": "Vm.2D"
+      },
+      "vn": {
+        "register": "Vn.2D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FAMIN"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vand_s16",
+    "arguments": [
+      "int16x4_t a",
+      "int16x4_t b"
+    ],
+    "return_type": {
+      "value": "int16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8B"
+      },
+      "b": {
+        "register": "Vm.8B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "AND"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vand_s32",
+    "arguments": [
+      "int32x2_t a",
+      "int32x2_t b"
+    ],
+    "return_type": {
+      "value": "int32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8B"
+      },
+      "b": {
+        "register": "Vm.8B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "AND"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vand_s64",
+    "arguments": [
+      "int64x1_t a",
+      "int64x1_t b"
+    ],
+    "return_type": {
+      "value": "int64x1_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      },
+      "b": {
+        "register": "Dm"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "AND"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vand_s8",
+    "arguments": [
+      "int8x8_t a",
+      "int8x8_t b"
+    ],
+    "return_type": {
+      "value": "int8x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8B"
+      },
+      "b": {
+        "register": "Vm.8B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "AND"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vand_u16",
+    "arguments": [
+      "uint16x4_t a",
+      "uint16x4_t b"
+    ],
+    "return_type": {
+      "value": "uint16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8B"
+      },
+      "b": {
+        "register": "Vm.8B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "AND"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vand_u32",
+    "arguments": [
+      "uint32x2_t a",
+      "uint32x2_t b"
+    ],
+    "return_type": {
+      "value": "uint32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8B"
+      },
+      "b": {
+        "register": "Vm.8B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "AND"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vand_u64",
+    "arguments": [
+      "uint64x1_t a",
+      "uint64x1_t b"
+    ],
+    "return_type": {
+      "value": "uint64x1_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8B"
+      },
+      "b": {
+        "register": "Vm.8B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "AND"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vand_u8",
+    "arguments": [
+      "uint8x8_t a",
+      "uint8x8_t b"
+    ],
+    "return_type": {
+      "value": "uint8x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8B"
+      },
+      "b": {
+        "register": "Vm.8B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "AND"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vandq_s16",
+    "arguments": [
+      "int16x8_t a",
+      "int16x8_t b"
+    ],
+    "return_type": {
+      "value": "int16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      },
+      "b": {
+        "register": "Vm.16B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "AND"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vandq_s32",
+    "arguments": [
+      "int32x4_t a",
+      "int32x4_t b"
+    ],
+    "return_type": {
+      "value": "int32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      },
+      "b": {
+        "register": "Vm.16B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "AND"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vandq_s64",
+    "arguments": [
+      "int64x2_t a",
+      "int64x2_t b"
+    ],
+    "return_type": {
+      "value": "int64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      },
+      "b": {
+        "register": "Vm.16B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "AND"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vandq_s8",
+    "arguments": [
+      "int8x16_t a",
+      "int8x16_t b"
+    ],
+    "return_type": {
+      "value": "int8x16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      },
+      "b": {
+        "register": "Vm.16B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "AND"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vandq_u16",
+    "arguments": [
+      "uint16x8_t a",
+      "uint16x8_t b"
+    ],
+    "return_type": {
+      "value": "uint16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      },
+      "b": {
+        "register": "Vm.16B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "AND"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vandq_u32",
+    "arguments": [
+      "uint32x4_t a",
+      "uint32x4_t b"
+    ],
+    "return_type": {
+      "value": "uint32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      },
+      "b": {
+        "register": "Vm.16B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "AND"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vandq_u64",
+    "arguments": [
+      "uint64x2_t a",
+      "uint64x2_t b"
+    ],
+    "return_type": {
+      "value": "uint64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      },
+      "b": {
+        "register": "Vm.16B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "AND"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vandq_u8",
+    "arguments": [
+      "uint8x16_t a",
+      "uint8x16_t b"
+    ],
+    "return_type": {
+      "value": "uint8x16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      },
+      "b": {
+        "register": "Vm.16B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "AND"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vbcaxq_s16",
+    "arguments": [
+      "int16x8_t a",
+      "int16x8_t b",
+      "int16x8_t c"
+    ],
+    "return_type": {
+      "value": "int16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      },
+      "b": {
+        "register": "Vm.16B"
+      },
+      "c": {
+        "register": "Va.16B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "BCAX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vbcaxq_s32",
+    "arguments": [
+      "int32x4_t a",
+      "int32x4_t b",
+      "int32x4_t c"
+    ],
+    "return_type": {
+      "value": "int32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      },
+      "b": {
+        "register": "Vm.16B"
+      },
+      "c": {
+        "register": "Va.16B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "BCAX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vbcaxq_s64",
+    "arguments": [
+      "int64x2_t a",
+      "int64x2_t b",
+      "int64x2_t c"
+    ],
+    "return_type": {
+      "value": "int64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      },
+      "b": {
+        "register": "Vm.16B"
+      },
+      "c": {
+        "register": "Va.16B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "BCAX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vbcaxq_s8",
+    "arguments": [
+      "int8x16_t a",
+      "int8x16_t b",
+      "int8x16_t c"
+    ],
+    "return_type": {
+      "value": "int8x16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      },
+      "b": {
+        "register": "Vm.16B"
+      },
+      "c": {
+        "register": "Va.16B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "BCAX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vbcaxq_u16",
+    "arguments": [
+      "uint16x8_t a",
+      "uint16x8_t b",
+      "uint16x8_t c"
+    ],
+    "return_type": {
+      "value": "uint16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      },
+      "b": {
+        "register": "Vm.16B"
+      },
+      "c": {
+        "register": "Va.16B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "BCAX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vbcaxq_u32",
+    "arguments": [
+      "uint32x4_t a",
+      "uint32x4_t b",
+      "uint32x4_t c"
+    ],
+    "return_type": {
+      "value": "uint32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      },
+      "b": {
+        "register": "Vm.16B"
+      },
+      "c": {
+        "register": "Va.16B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "BCAX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vbcaxq_u64",
+    "arguments": [
+      "uint64x2_t a",
+      "uint64x2_t b",
+      "uint64x2_t c"
+    ],
+    "return_type": {
+      "value": "uint64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      },
+      "b": {
+        "register": "Vm.16B"
+      },
+      "c": {
+        "register": "Va.16B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "BCAX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vbcaxq_u8",
+    "arguments": [
+      "uint8x16_t a",
+      "uint8x16_t b",
+      "uint8x16_t c"
+    ],
+    "return_type": {
+      "value": "uint8x16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      },
+      "b": {
+        "register": "Vm.16B"
+      },
+      "c": {
+        "register": "Va.16B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "BCAX"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vbic_s16",
+    "arguments": [
+      "int16x4_t a",
+      "int16x4_t b"
+    ],
+    "return_type": {
+      "value": "int16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8B"
+      },
+      "b": {
+        "register": "Vm.8B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "BIC"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vbic_s32",
+    "arguments": [
+      "int32x2_t a",
+      "int32x2_t b"
+    ],
+    "return_type": {
+      "value": "int32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8B"
+      },
+      "b": {
+        "register": "Vm.8B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "BIC"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vbic_s64",
+    "arguments": [
+      "int64x1_t a",
+      "int64x1_t b"
+    ],
+    "return_type": {
+      "value": "int64x1_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8B"
+      },
+      "b": {
+        "register": "Vm.8B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "BIC"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vbic_s8",
+    "arguments": [
+      "int8x8_t a",
+      "int8x8_t b"
+    ],
+    "return_type": {
+      "value": "int8x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8B"
+      },
+      "b": {
+        "register": "Vm.8B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "BIC"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vbic_u16",
+    "arguments": [
+      "uint16x4_t a",
+      "uint16x4_t b"
+    ],
+    "return_type": {
+      "value": "uint16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8B"
+      },
+      "b": {
+        "register": "Vm.8B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "BIC"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vbic_u32",
+    "arguments": [
+      "uint32x2_t a",
+      "uint32x2_t b"
+    ],
+    "return_type": {
+      "value": "uint32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8B"
+      },
+      "b": {
+        "register": "Vm.8B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "BIC"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vbic_u64",
+    "arguments": [
+      "uint64x1_t a",
+      "uint64x1_t b"
+    ],
+    "return_type": {
+      "value": "uint64x1_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8B"
+      },
+      "b": {
+        "register": "Vm.8B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "BIC"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vbic_u8",
+    "arguments": [
+      "uint8x8_t a",
+      "uint8x8_t b"
+    ],
+    "return_type": {
+      "value": "uint8x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8B"
+      },
+      "b": {
+        "register": "Vm.8B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "BIC"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vbicq_s16",
+    "arguments": [
+      "int16x8_t a",
+      "int16x8_t b"
+    ],
+    "return_type": {
+      "value": "int16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      },
+      "b": {
+        "register": "Vm.16B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "BIC"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vbicq_s32",
+    "arguments": [
+      "int32x4_t a",
+      "int32x4_t b"
+    ],
+    "return_type": {
+      "value": "int32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      },
+      "b": {
+        "register": "Vm.16B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "BIC"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vbicq_s64",
+    "arguments": [
+      "int64x2_t a",
+      "int64x2_t b"
+    ],
+    "return_type": {
+      "value": "int64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      },
+      "b": {
+        "register": "Vm.16B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "BIC"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vbicq_s8",
+    "arguments": [
+      "int8x16_t a",
+      "int8x16_t b"
+    ],
+    "return_type": {
+      "value": "int8x16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      },
+      "b": {
+        "register": "Vm.16B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "BIC"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vbicq_u16",
+    "arguments": [
+      "uint16x8_t a",
+      "uint16x8_t b"
+    ],
+    "return_type": {
+      "value": "uint16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      },
+      "b": {
+        "register": "Vm.16B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "BIC"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vbicq_u32",
+    "arguments": [
+      "uint32x4_t a",
+      "uint32x4_t b"
+    ],
+    "return_type": {
+      "value": "uint32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      },
+      "b": {
+        "register": "Vm.16B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "BIC"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vbicq_u64",
+    "arguments": [
+      "uint64x2_t a",
+      "uint64x2_t b"
+    ],
+    "return_type": {
+      "value": "uint64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      },
+      "b": {
+        "register": "Vm.16B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "BIC"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vbicq_u8",
+    "arguments": [
+      "uint8x16_t a",
+      "uint8x16_t b"
+    ],
+    "return_type": {
+      "value": "uint8x16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      },
+      "b": {
+        "register": "Vm.16B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "BIC"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vbsl_f16",
+    "arguments": [
+      "uint16x4_t a",
+      "float16x4_t b",
+      "float16x4_t c"
+    ],
+    "return_type": {
+      "value": "float16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.8B"
+      },
+      "b": {
+        "register": "Vn.8B"
+      },
+      "c": {
+        "register": "Vm.8B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "BSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vbsl_f32",
+    "arguments": [
+      "uint32x2_t a",
+      "float32x2_t b",
+      "float32x2_t c"
+    ],
+    "return_type": {
+      "value": "float32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.8B"
+      },
+      "b": {
+        "register": "Vn.8B"
+      },
+      "c": {
+        "register": "Vm.8B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "BSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vbsl_f64",
+    "arguments": [
+      "uint64x1_t a",
+      "float64x1_t b",
+      "float64x1_t c"
+    ],
+    "return_type": {
+      "value": "float64x1_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.8B"
+      },
+      "b": {
+        "register": "Vn.8B"
+      },
+      "c": {
+        "register": "Vm.8B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "BSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vbsl_p16",
+    "arguments": [
+      "uint16x4_t a",
+      "poly16x4_t b",
+      "poly16x4_t c"
+    ],
+    "return_type": {
+      "value": "poly16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.8B"
+      },
+      "b": {
+        "register": "Vn.8B"
+      },
+      "c": {
+        "register": "Vm.8B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "BSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vbsl_p64",
+    "arguments": [
+      "poly64x1_t a",
+      "poly64x1_t b",
+      "poly64x1_t c"
+    ],
+    "return_type": {
+      "value": "poly64x1_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.8B"
+      },
+      "b": {
+        "register": "Vn.8B"
+      },
+      "c": {
+        "register": "Vm.8B"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "BSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vbsl_p8",
+    "arguments": [
+      "uint8x8_t a",
+      "poly8x8_t b",
+      "poly8x8_t c"
+    ],
+    "return_type": {
+      "value": "poly8x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.8B"
+      },
+      "b": {
+        "register": "Vn.8B"
+      },
+      "c": {
+        "register": "Vm.8B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "BSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vbsl_s16",
+    "arguments": [
+      "uint16x4_t a",
+      "int16x4_t b",
+      "int16x4_t c"
+    ],
+    "return_type": {
+      "value": "int16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.8B"
+      },
+      "b": {
+        "register": "Vn.8B"
+      },
+      "c": {
+        "register": "Vm.8B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "BSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vbsl_s32",
+    "arguments": [
+      "uint32x2_t a",
+      "int32x2_t b",
+      "int32x2_t c"
+    ],
+    "return_type": {
+      "value": "int32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.8B"
+      },
+      "b": {
+        "register": "Vn.8B"
+      },
+      "c": {
+        "register": "Vm.8B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "BSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vbsl_s64",
+    "arguments": [
+      "uint64x1_t a",
+      "int64x1_t b",
+      "int64x1_t c"
+    ],
+    "return_type": {
+      "value": "int64x1_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.8B"
+      },
+      "b": {
+        "register": "Vn.8B"
+      },
+      "c": {
+        "register": "Vm.8B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "BSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vbsl_s8",
+    "arguments": [
+      "uint8x8_t a",
+      "int8x8_t b",
+      "int8x8_t c"
+    ],
+    "return_type": {
+      "value": "int8x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.8B"
+      },
+      "b": {
+        "register": "Vn.8B"
+      },
+      "c": {
+        "register": "Vm.8B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "BSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vbsl_u16",
+    "arguments": [
+      "uint16x4_t a",
+      "uint16x4_t b",
+      "uint16x4_t c"
+    ],
+    "return_type": {
+      "value": "uint16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.8B"
+      },
+      "b": {
+        "register": "Vn.8B"
+      },
+      "c": {
+        "register": "Vm.8B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "BSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vbsl_u32",
+    "arguments": [
+      "uint32x2_t a",
+      "uint32x2_t b",
+      "uint32x2_t c"
+    ],
+    "return_type": {
+      "value": "uint32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.8B"
+      },
+      "b": {
+        "register": "Vn.8B"
+      },
+      "c": {
+        "register": "Vm.8B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "BSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vbsl_u64",
+    "arguments": [
+      "uint64x1_t a",
+      "uint64x1_t b",
+      "uint64x1_t c"
+    ],
+    "return_type": {
+      "value": "uint64x1_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.8B"
+      },
+      "b": {
+        "register": "Vn.8B"
+      },
+      "c": {
+        "register": "Vm.8B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "BSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vbsl_u8",
+    "arguments": [
+      "uint8x8_t a",
+      "uint8x8_t b",
+      "uint8x8_t c"
+    ],
+    "return_type": {
+      "value": "uint8x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.8B"
+      },
+      "b": {
+        "register": "Vn.8B"
+      },
+      "c": {
+        "register": "Vm.8B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "BSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vbslq_f16",
+    "arguments": [
+      "uint16x8_t a",
+      "float16x8_t b",
+      "float16x8_t c"
+    ],
+    "return_type": {
+      "value": "float16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.16B"
+      },
+      "b": {
+        "register": "Vn.16B"
+      },
+      "c": {
+        "register": "Vm.16B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "BSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vbslq_f32",
+    "arguments": [
+      "uint32x4_t a",
+      "float32x4_t b",
+      "float32x4_t c"
+    ],
+    "return_type": {
+      "value": "float32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.16B"
+      },
+      "b": {
+        "register": "Vn.16B"
+      },
+      "c": {
+        "register": "Vm.16B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "BSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vbslq_f64",
+    "arguments": [
+      "uint64x2_t a",
+      "float64x2_t b",
+      "float64x2_t c"
+    ],
+    "return_type": {
+      "value": "float64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.16B"
+      },
+      "b": {
+        "register": "Vn.16B"
+      },
+      "c": {
+        "register": "Vm.16B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "BSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vbslq_p16",
+    "arguments": [
+      "uint16x8_t a",
+      "poly16x8_t b",
+      "poly16x8_t c"
+    ],
+    "return_type": {
+      "value": "poly16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.16B"
+      },
+      "b": {
+        "register": "Vn.16B"
+      },
+      "c": {
+        "register": "Vm.16B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "BSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vbslq_p64",
+    "arguments": [
+      "poly64x2_t a",
+      "poly64x2_t b",
+      "poly64x2_t c"
+    ],
+    "return_type": {
+      "value": "poly64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.16B"
+      },
+      "b": {
+        "register": "Vn.16B"
+      },
+      "c": {
+        "register": "Vm.16B"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "BSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vbslq_p8",
+    "arguments": [
+      "uint8x16_t a",
+      "poly8x16_t b",
+      "poly8x16_t c"
+    ],
+    "return_type": {
+      "value": "poly8x16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.16B"
+      },
+      "b": {
+        "register": "Vn.16B"
+      },
+      "c": {
+        "register": "Vm.16B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "BSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vbslq_s16",
+    "arguments": [
+      "uint16x8_t a",
+      "int16x8_t b",
+      "int16x8_t c"
+    ],
+    "return_type": {
+      "value": "int16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.16B"
+      },
+      "b": {
+        "register": "Vn.16B"
+      },
+      "c": {
+        "register": "Vm.16B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "BSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vbslq_s32",
+    "arguments": [
+      "uint32x4_t a",
+      "int32x4_t b",
+      "int32x4_t c"
+    ],
+    "return_type": {
+      "value": "int32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.16B"
+      },
+      "b": {
+        "register": "Vn.16B"
+      },
+      "c": {
+        "register": "Vm.16B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "BSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vbslq_s64",
+    "arguments": [
+      "uint64x2_t a",
+      "int64x2_t b",
+      "int64x2_t c"
+    ],
+    "return_type": {
+      "value": "int64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.16B"
+      },
+      "b": {
+        "register": "Vn.16B"
+      },
+      "c": {
+        "register": "Vm.16B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "BSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vbslq_s8",
+    "arguments": [
+      "uint8x16_t a",
+      "int8x16_t b",
+      "int8x16_t c"
+    ],
+    "return_type": {
+      "value": "int8x16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.16B"
+      },
+      "b": {
+        "register": "Vn.16B"
+      },
+      "c": {
+        "register": "Vm.16B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "BSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vbslq_u16",
+    "arguments": [
+      "uint16x8_t a",
+      "uint16x8_t b",
+      "uint16x8_t c"
+    ],
+    "return_type": {
+      "value": "uint16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.16B"
+      },
+      "b": {
+        "register": "Vn.16B"
+      },
+      "c": {
+        "register": "Vm.16B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "BSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vbslq_u32",
+    "arguments": [
+      "uint32x4_t a",
+      "uint32x4_t b",
+      "uint32x4_t c"
+    ],
+    "return_type": {
+      "value": "uint32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.16B"
+      },
+      "b": {
+        "register": "Vn.16B"
+      },
+      "c": {
+        "register": "Vm.16B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "BSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vbslq_u64",
+    "arguments": [
+      "uint64x2_t a",
+      "uint64x2_t b",
+      "uint64x2_t c"
+    ],
+    "return_type": {
+      "value": "uint64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.16B"
+      },
+      "b": {
+        "register": "Vn.16B"
+      },
+      "c": {
+        "register": "Vm.16B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "BSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vbslq_u8",
+    "arguments": [
+      "uint8x16_t a",
+      "uint8x16_t b",
+      "uint8x16_t c"
+    ],
+    "return_type": {
+      "value": "uint8x16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.16B"
+      },
+      "b": {
+        "register": "Vn.16B"
+      },
+      "c": {
+        "register": "Vm.16B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "BSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcadd_rot270_f16",
+    "arguments": [
+      "float16x4_t a",
+      "float16x4_t b"
+    ],
+    "return_type": {
+      "value": "float16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4H"
+      },
+      "b": {
+        "register": "Vm.4H"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcadd_rot270_f32",
+    "arguments": [
+      "float32x2_t a",
+      "float32x2_t b"
+    ],
+    "return_type": {
+      "value": "float32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2S"
+      },
+      "b": {
+        "register": "Vm.2S"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcadd_rot90_f16",
+    "arguments": [
+      "float16x4_t a",
+      "float16x4_t b"
+    ],
+    "return_type": {
+      "value": "float16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4H"
+      },
+      "b": {
+        "register": "Vm.4H"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcadd_rot90_f32",
+    "arguments": [
+      "float32x2_t a",
+      "float32x2_t b"
+    ],
+    "return_type": {
+      "value": "float32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2S"
+      },
+      "b": {
+        "register": "Vm.2S"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcaddq_rot270_f16",
+    "arguments": [
+      "float16x8_t a",
+      "float16x8_t b"
+    ],
+    "return_type": {
+      "value": "float16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      },
+      "b": {
+        "register": "Vm.8H"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcaddq_rot270_f32",
+    "arguments": [
+      "float32x4_t a",
+      "float32x4_t b"
+    ],
+    "return_type": {
+      "value": "float32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4S"
+      },
+      "b": {
+        "register": "Vm.4S"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcaddq_rot270_f64",
+    "arguments": [
+      "float64x2_t a",
+      "float64x2_t b"
+    ],
+    "return_type": {
+      "value": "float64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2D"
+      },
+      "b": {
+        "register": "Vm.2D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcaddq_rot90_f16",
+    "arguments": [
+      "float16x8_t a",
+      "float16x8_t b"
+    ],
+    "return_type": {
+      "value": "float16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      },
+      "b": {
+        "register": "Vm.8H"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcaddq_rot90_f32",
+    "arguments": [
+      "float32x4_t a",
+      "float32x4_t b"
+    ],
+    "return_type": {
+      "value": "float32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4S"
+      },
+      "b": {
+        "register": "Vm.4S"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcaddq_rot90_f64",
+    "arguments": [
+      "float64x2_t a",
+      "float64x2_t b"
+    ],
+    "return_type": {
+      "value": "float64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2D"
+      },
+      "b": {
+        "register": "Vm.2D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcage_f16",
+    "arguments": [
+      "float16x4_t a",
+      "float16x4_t b"
+    ],
+    "return_type": {
+      "value": "uint16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4H"
+      },
+      "b": {
+        "register": "Vm.4H"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FACGE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcage_f32",
+    "arguments": [
+      "float32x2_t a",
+      "float32x2_t b"
+    ],
+    "return_type": {
+      "value": "uint32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2S"
+      },
+      "b": {
+        "register": "Vm.2S"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FACGE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcage_f64",
+    "arguments": [
+      "float64x1_t a",
+      "float64x1_t b"
+    ],
+    "return_type": {
+      "value": "uint64x1_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      },
+      "b": {
+        "register": "Dm"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FACGE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcaged_f64",
+    "arguments": [
+      "float64_t a",
+      "float64_t b"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      },
+      "b": {
+        "register": "Dm"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FACGE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcageh_f16",
+    "arguments": [
+      "float16_t a",
+      "float16_t b"
+    ],
+    "return_type": {
+      "value": "uint16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Hn"
+      },
+      "b": {
+        "register": "Hm"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FACGE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcageq_f16",
+    "arguments": [
+      "float16x8_t a",
+      "float16x8_t b"
+    ],
+    "return_type": {
+      "value": "uint16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      },
+      "b": {
+        "register": "Vm.8H"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FACGE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcageq_f32",
+    "arguments": [
+      "float32x4_t a",
+      "float32x4_t b"
+    ],
+    "return_type": {
+      "value": "uint32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4S"
+      },
+      "b": {
+        "register": "Vm.4S"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FACGE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcageq_f64",
+    "arguments": [
+      "float64x2_t a",
+      "float64x2_t b"
+    ],
+    "return_type": {
+      "value": "uint64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2D"
+      },
+      "b": {
+        "register": "Vm.2D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FACGE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcages_f32",
+    "arguments": [
+      "float32_t a",
+      "float32_t b"
+    ],
+    "return_type": {
+      "value": "uint32_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Sn"
+      },
+      "b": {
+        "register": "Sm"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FACGE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcagt_f16",
+    "arguments": [
+      "float16x4_t a",
+      "float16x4_t b"
+    ],
+    "return_type": {
+      "value": "uint16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4H"
+      },
+      "b": {
+        "register": "Vm.4H"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FACGT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcagt_f32",
+    "arguments": [
+      "float32x2_t a",
+      "float32x2_t b"
+    ],
+    "return_type": {
+      "value": "uint32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2S"
+      },
+      "b": {
+        "register": "Vm.2S"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FACGT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcagt_f64",
+    "arguments": [
+      "float64x1_t a",
+      "float64x1_t b"
+    ],
+    "return_type": {
+      "value": "uint64x1_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      },
+      "b": {
+        "register": "Dm"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FACGT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcagtd_f64",
+    "arguments": [
+      "float64_t a",
+      "float64_t b"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      },
+      "b": {
+        "register": "Dm"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FACGT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcagth_f16",
+    "arguments": [
+      "float16_t a",
+      "float16_t b"
+    ],
+    "return_type": {
+      "value": "uint16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Hn"
+      },
+      "b": {
+        "register": "Hm"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FACGT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcagtq_f16",
+    "arguments": [
+      "float16x8_t a",
+      "float16x8_t b"
+    ],
+    "return_type": {
+      "value": "uint16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      },
+      "b": {
+        "register": "Vm.8H"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FACGT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcagtq_f32",
+    "arguments": [
+      "float32x4_t a",
+      "float32x4_t b"
+    ],
+    "return_type": {
+      "value": "uint32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4S"
+      },
+      "b": {
+        "register": "Vm.4S"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FACGT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcagtq_f64",
+    "arguments": [
+      "float64x2_t a",
+      "float64x2_t b"
+    ],
+    "return_type": {
+      "value": "uint64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2D"
+      },
+      "b": {
+        "register": "Vm.2D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FACGT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcagts_f32",
+    "arguments": [
+      "float32_t a",
+      "float32_t b"
+    ],
+    "return_type": {
+      "value": "uint32_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Sn"
+      },
+      "b": {
+        "register": "Sm"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FACGT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcale_f16",
+    "arguments": [
+      "float16x4_t a",
+      "float16x4_t b"
+    ],
+    "return_type": {
+      "value": "uint16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4H"
+      },
+      "b": {
+        "register": "Vm.4H"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FACGE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcale_f32",
+    "arguments": [
+      "float32x2_t a",
+      "float32x2_t b"
+    ],
+    "return_type": {
+      "value": "uint32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2S"
+      },
+      "b": {
+        "register": "Vm.2S"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FACGE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcale_f64",
+    "arguments": [
+      "float64x1_t a",
+      "float64x1_t b"
+    ],
+    "return_type": {
+      "value": "uint64x1_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      },
+      "b": {
+        "register": "Dm"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FACGE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcaled_f64",
+    "arguments": [
+      "float64_t a",
+      "float64_t b"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      },
+      "b": {
+        "register": "Dm"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FACGE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcaleh_f16",
+    "arguments": [
+      "float16_t a",
+      "float16_t b"
+    ],
+    "return_type": {
+      "value": "uint16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Hn"
+      },
+      "b": {
+        "register": "Hm"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FACGE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcaleq_f16",
+    "arguments": [
+      "float16x8_t a",
+      "float16x8_t b"
+    ],
+    "return_type": {
+      "value": "uint16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      },
+      "b": {
+        "register": "Vm.8H"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FACGE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcaleq_f32",
+    "arguments": [
+      "float32x4_t a",
+      "float32x4_t b"
+    ],
+    "return_type": {
+      "value": "uint32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4S"
+      },
+      "b": {
+        "register": "Vm.4S"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FACGE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcaleq_f64",
+    "arguments": [
+      "float64x2_t a",
+      "float64x2_t b"
+    ],
+    "return_type": {
+      "value": "uint64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2D"
+      },
+      "b": {
+        "register": "Vm.2D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FACGE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcales_f32",
+    "arguments": [
+      "float32_t a",
+      "float32_t b"
+    ],
+    "return_type": {
+      "value": "uint32_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Sn"
+      },
+      "b": {
+        "register": "Sm"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FACGE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcalt_f16",
+    "arguments": [
+      "float16x4_t a",
+      "float16x4_t b"
+    ],
+    "return_type": {
+      "value": "uint16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4H"
+      },
+      "b": {
+        "register": "Vm.4H"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FACGT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcalt_f32",
+    "arguments": [
+      "float32x2_t a",
+      "float32x2_t b"
+    ],
+    "return_type": {
+      "value": "uint32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2S"
+      },
+      "b": {
+        "register": "Vm.2S"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FACGT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcalt_f64",
+    "arguments": [
+      "float64x1_t a",
+      "float64x1_t b"
+    ],
+    "return_type": {
+      "value": "uint64x1_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      },
+      "b": {
+        "register": "Dm"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FACGT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcaltd_f64",
+    "arguments": [
+      "float64_t a",
+      "float64_t b"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      },
+      "b": {
+        "register": "Dm"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FACGT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcalth_f16",
+    "arguments": [
+      "float16_t a",
+      "float16_t b"
+    ],
+    "return_type": {
+      "value": "uint16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Hn"
+      },
+      "b": {
+        "register": "Hm"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FACGT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcaltq_f16",
+    "arguments": [
+      "float16x8_t a",
+      "float16x8_t b"
+    ],
+    "return_type": {
+      "value": "uint16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      },
+      "b": {
+        "register": "Vm.8H"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FACGT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcaltq_f32",
+    "arguments": [
+      "float32x4_t a",
+      "float32x4_t b"
+    ],
+    "return_type": {
+      "value": "uint32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4S"
+      },
+      "b": {
+        "register": "Vm.4S"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FACGT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcaltq_f64",
+    "arguments": [
+      "float64x2_t a",
+      "float64x2_t b"
+    ],
+    "return_type": {
+      "value": "uint64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2D"
+      },
+      "b": {
+        "register": "Vm.2D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FACGT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcalts_f32",
+    "arguments": [
+      "float32_t a",
+      "float32_t b"
+    ],
+    "return_type": {
+      "value": "uint32_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Sn"
+      },
+      "b": {
+        "register": "Sm"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FACGT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vceq_f16",
+    "arguments": [
+      "float16x4_t a",
+      "float16x4_t b"
+    ],
+    "return_type": {
+      "value": "uint16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4H"
+      },
+      "b": {
+        "register": "Vm.4H"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMEQ"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vceq_f32",
+    "arguments": [
+      "float32x2_t a",
+      "float32x2_t b"
+    ],
+    "return_type": {
+      "value": "uint32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2S"
+      },
+      "b": {
+        "register": "Vm.2S"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMEQ"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vceq_f64",
+    "arguments": [
+      "float64x1_t a",
+      "float64x1_t b"
+    ],
+    "return_type": {
+      "value": "uint64x1_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      },
+      "b": {
+        "register": "Dm"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMEQ"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vceq_p64",
+    "arguments": [
+      "poly64x1_t a",
+      "poly64x1_t b"
+    ],
+    "return_type": {
+      "value": "uint64x1_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      },
+      "b": {
+        "register": "Dm"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMEQ"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vceq_p8",
+    "arguments": [
+      "poly8x8_t a",
+      "poly8x8_t b"
+    ],
+    "return_type": {
+      "value": "uint8x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8B"
+      },
+      "b": {
+        "register": "Vm.8B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMEQ"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vceq_s16",
+    "arguments": [
+      "int16x4_t a",
+      "int16x4_t b"
+    ],
+    "return_type": {
+      "value": "uint16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4H"
+      },
+      "b": {
+        "register": "Vm.4H"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMEQ"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vceq_s32",
+    "arguments": [
+      "int32x2_t a",
+      "int32x2_t b"
+    ],
+    "return_type": {
+      "value": "uint32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2S"
+      },
+      "b": {
+        "register": "Vm.2S"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMEQ"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vceq_s64",
+    "arguments": [
+      "int64x1_t a",
+      "int64x1_t b"
+    ],
+    "return_type": {
+      "value": "uint64x1_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      },
+      "b": {
+        "register": "Dm"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMEQ"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vceq_s8",
+    "arguments": [
+      "int8x8_t a",
+      "int8x8_t b"
+    ],
+    "return_type": {
+      "value": "uint8x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8B"
+      },
+      "b": {
+        "register": "Vm.8B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMEQ"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vceq_u16",
+    "arguments": [
+      "uint16x4_t a",
+      "uint16x4_t b"
+    ],
+    "return_type": {
+      "value": "uint16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4H"
+      },
+      "b": {
+        "register": "Vm.4H"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMEQ"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vceq_u32",
+    "arguments": [
+      "uint32x2_t a",
+      "uint32x2_t b"
+    ],
+    "return_type": {
+      "value": "uint32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2S"
+      },
+      "b": {
+        "register": "Vm.2S"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMEQ"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vceq_u64",
+    "arguments": [
+      "uint64x1_t a",
+      "uint64x1_t b"
+    ],
+    "return_type": {
+      "value": "uint64x1_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      },
+      "b": {
+        "register": "Dm"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMEQ"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vceq_u8",
+    "arguments": [
+      "uint8x8_t a",
+      "uint8x8_t b"
+    ],
+    "return_type": {
+      "value": "uint8x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8B"
+      },
+      "b": {
+        "register": "Vm.8B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMEQ"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vceqd_f64",
+    "arguments": [
+      "float64_t a",
+      "float64_t b"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      },
+      "b": {
+        "register": "Dm"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMEQ"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vceqd_s64",
+    "arguments": [
+      "int64_t a",
+      "int64_t b"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      },
+      "b": {
+        "register": "Dm"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMEQ"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vceqd_u64",
+    "arguments": [
+      "uint64_t a",
+      "uint64_t b"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      },
+      "b": {
+        "register": "Dm"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMEQ"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vceqh_f16",
+    "arguments": [
+      "float16_t a",
+      "float16_t b"
+    ],
+    "return_type": {
+      "value": "uint16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Hn"
+      },
+      "b": {
+        "register": "Hm"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMEQ"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vceqq_f16",
+    "arguments": [
+      "float16x8_t a",
+      "float16x8_t b"
+    ],
+    "return_type": {
+      "value": "uint16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      },
+      "b": {
+        "register": "Vm.8H"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMEQ"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vceqq_f32",
+    "arguments": [
+      "float32x4_t a",
+      "float32x4_t b"
+    ],
+    "return_type": {
+      "value": "uint32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4S"
+      },
+      "b": {
+        "register": "Vm.4S"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMEQ"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vceqq_f64",
+    "arguments": [
+      "float64x2_t a",
+      "float64x2_t b"
+    ],
+    "return_type": {
+      "value": "uint64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2D"
+      },
+      "b": {
+        "register": "Vm.2D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMEQ"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vceqq_p64",
+    "arguments": [
+      "poly64x2_t a",
+      "poly64x2_t b"
+    ],
+    "return_type": {
+      "value": "uint64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2D"
+      },
+      "b": {
+        "register": "Vm.2D"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMEQ"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vceqq_p8",
+    "arguments": [
+      "poly8x16_t a",
+      "poly8x16_t b"
+    ],
+    "return_type": {
+      "value": "uint8x16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      },
+      "b": {
+        "register": "Vm.16B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMEQ"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vceqq_s16",
+    "arguments": [
+      "int16x8_t a",
+      "int16x8_t b"
+    ],
+    "return_type": {
+      "value": "uint16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      },
+      "b": {
+        "register": "Vm.8H"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMEQ"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vceqq_s32",
+    "arguments": [
+      "int32x4_t a",
+      "int32x4_t b"
+    ],
+    "return_type": {
+      "value": "uint32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4S"
+      },
+      "b": {
+        "register": "Vm.4S"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMEQ"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vceqq_s64",
+    "arguments": [
+      "int64x2_t a",
+      "int64x2_t b"
+    ],
+    "return_type": {
+      "value": "uint64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2D"
+      },
+      "b": {
+        "register": "Vm.2D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMEQ"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vceqq_s8",
+    "arguments": [
+      "int8x16_t a",
+      "int8x16_t b"
+    ],
+    "return_type": {
+      "value": "uint8x16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      },
+      "b": {
+        "register": "Vm.16B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMEQ"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vceqq_u16",
+    "arguments": [
+      "uint16x8_t a",
+      "uint16x8_t b"
+    ],
+    "return_type": {
+      "value": "uint16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      },
+      "b": {
+        "register": "Vm.8H"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMEQ"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vceqq_u32",
+    "arguments": [
+      "uint32x4_t a",
+      "uint32x4_t b"
+    ],
+    "return_type": {
+      "value": "uint32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4S"
+      },
+      "b": {
+        "register": "Vm.4S"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMEQ"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vceqq_u64",
+    "arguments": [
+      "uint64x2_t a",
+      "uint64x2_t b"
+    ],
+    "return_type": {
+      "value": "uint64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2D"
+      },
+      "b": {
+        "register": "Vm.2D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMEQ"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vceqq_u8",
+    "arguments": [
+      "uint8x16_t a",
+      "uint8x16_t b"
+    ],
+    "return_type": {
+      "value": "uint8x16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      },
+      "b": {
+        "register": "Vm.16B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMEQ"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vceqs_f32",
+    "arguments": [
+      "float32_t a",
+      "float32_t b"
+    ],
+    "return_type": {
+      "value": "uint32_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Sn"
+      },
+      "b": {
+        "register": "Sm"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMEQ"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vceqz_f16",
+    "arguments": [
+      "float16x4_t a"
+    ],
+    "return_type": {
+      "value": "uint16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4H"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMEQ"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vceqz_f32",
+    "arguments": [
+      "float32x2_t a"
+    ],
+    "return_type": {
+      "value": "uint32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMEQ"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vceqz_f64",
+    "arguments": [
+      "float64x1_t a"
+    ],
+    "return_type": {
+      "value": "uint64x1_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMEQ"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vceqz_p64",
+    "arguments": [
+      "poly64x1_t a"
+    ],
+    "return_type": {
+      "value": "uint64x1_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMEQ"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vceqz_p8",
+    "arguments": [
+      "poly8x8_t a"
+    ],
+    "return_type": {
+      "value": "uint8x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMEQ"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vceqz_s16",
+    "arguments": [
+      "int16x4_t a"
+    ],
+    "return_type": {
+      "value": "uint16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMEQ"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vceqz_s32",
+    "arguments": [
+      "int32x2_t a"
+    ],
+    "return_type": {
+      "value": "uint32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMEQ"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vceqz_s64",
+    "arguments": [
+      "int64x1_t a"
+    ],
+    "return_type": {
+      "value": "uint64x1_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMEQ"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vceqz_s8",
+    "arguments": [
+      "int8x8_t a"
+    ],
+    "return_type": {
+      "value": "uint8x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMEQ"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vceqz_u16",
+    "arguments": [
+      "uint16x4_t a"
+    ],
+    "return_type": {
+      "value": "uint16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMEQ"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vceqz_u32",
+    "arguments": [
+      "uint32x2_t a"
+    ],
+    "return_type": {
+      "value": "uint32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMEQ"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vceqz_u64",
+    "arguments": [
+      "uint64x1_t a"
+    ],
+    "return_type": {
+      "value": "uint64x1_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMEQ"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vceqz_u8",
+    "arguments": [
+      "uint8x8_t a"
+    ],
+    "return_type": {
+      "value": "uint8x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMEQ"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vceqzd_f64",
+    "arguments": [
+      "float64_t a"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMEQ"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vceqzd_s64",
+    "arguments": [
+      "int64_t a"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMEQ"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vceqzd_u64",
+    "arguments": [
+      "uint64_t a"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMEQ"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vceqzh_f16",
+    "arguments": [
+      "float16_t a"
+    ],
+    "return_type": {
+      "value": "uint16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Hn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMEQ"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vceqzq_f16",
+    "arguments": [
+      "float16x8_t a"
+    ],
+    "return_type": {
+      "value": "uint16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMEQ"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vceqzq_f32",
+    "arguments": [
+      "float32x4_t a"
+    ],
+    "return_type": {
+      "value": "uint32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMEQ"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vceqzq_f64",
+    "arguments": [
+      "float64x2_t a"
+    ],
+    "return_type": {
+      "value": "uint64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMEQ"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vceqzq_p64",
+    "arguments": [
+      "poly64x2_t a"
+    ],
+    "return_type": {
+      "value": "uint64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2D"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMEQ"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vceqzq_p8",
+    "arguments": [
+      "poly8x16_t a"
+    ],
+    "return_type": {
+      "value": "uint8x16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMEQ"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vceqzq_s16",
+    "arguments": [
+      "int16x8_t a"
+    ],
+    "return_type": {
+      "value": "uint16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMEQ"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vceqzq_s32",
+    "arguments": [
+      "int32x4_t a"
+    ],
+    "return_type": {
+      "value": "uint32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMEQ"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vceqzq_s64",
+    "arguments": [
+      "int64x2_t a"
+    ],
+    "return_type": {
+      "value": "uint64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMEQ"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vceqzq_s8",
+    "arguments": [
+      "int8x16_t a"
+    ],
+    "return_type": {
+      "value": "uint8x16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMEQ"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vceqzq_u16",
+    "arguments": [
+      "uint16x8_t a"
+    ],
+    "return_type": {
+      "value": "uint16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMEQ"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vceqzq_u32",
+    "arguments": [
+      "uint32x4_t a"
+    ],
+    "return_type": {
+      "value": "uint32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMEQ"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vceqzq_u64",
+    "arguments": [
+      "uint64x2_t a"
+    ],
+    "return_type": {
+      "value": "uint64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMEQ"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vceqzq_u8",
+    "arguments": [
+      "uint8x16_t a"
+    ],
+    "return_type": {
+      "value": "uint8x16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMEQ"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vceqzs_f32",
+    "arguments": [
+      "float32_t a"
+    ],
+    "return_type": {
+      "value": "uint32_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Sn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMEQ"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcge_f16",
+    "arguments": [
+      "float16x4_t a",
+      "float16x4_t b"
+    ],
+    "return_type": {
+      "value": "uint16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4H"
+      },
+      "b": {
+        "register": "Vm.4H"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMGE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcge_f32",
+    "arguments": [
+      "float32x2_t a",
+      "float32x2_t b"
+    ],
+    "return_type": {
+      "value": "uint32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2S"
+      },
+      "b": {
+        "register": "Vm.2S"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMGE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcge_f64",
+    "arguments": [
+      "float64x1_t a",
+      "float64x1_t b"
+    ],
+    "return_type": {
+      "value": "uint64x1_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      },
+      "b": {
+        "register": "Dm"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMGE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcge_s16",
+    "arguments": [
+      "int16x4_t a",
+      "int16x4_t b"
+    ],
+    "return_type": {
+      "value": "uint16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4H"
+      },
+      "b": {
+        "register": "Vm.4H"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMGE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcge_s32",
+    "arguments": [
+      "int32x2_t a",
+      "int32x2_t b"
+    ],
+    "return_type": {
+      "value": "uint32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2S"
+      },
+      "b": {
+        "register": "Vm.2S"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMGE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcge_s64",
+    "arguments": [
+      "int64x1_t a",
+      "int64x1_t b"
+    ],
+    "return_type": {
+      "value": "uint64x1_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      },
+      "b": {
+        "register": "Dm"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMGE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcge_s8",
+    "arguments": [
+      "int8x8_t a",
+      "int8x8_t b"
+    ],
+    "return_type": {
+      "value": "uint8x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8B"
+      },
+      "b": {
+        "register": "Vm.8B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMGE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcge_u16",
+    "arguments": [
+      "uint16x4_t a",
+      "uint16x4_t b"
+    ],
+    "return_type": {
+      "value": "uint16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4H"
+      },
+      "b": {
+        "register": "Vm.4H"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMHS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcge_u32",
+    "arguments": [
+      "uint32x2_t a",
+      "uint32x2_t b"
+    ],
+    "return_type": {
+      "value": "uint32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2S"
+      },
+      "b": {
+        "register": "Vm.2S"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMHS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcge_u64",
+    "arguments": [
+      "uint64x1_t a",
+      "uint64x1_t b"
+    ],
+    "return_type": {
+      "value": "uint64x1_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      },
+      "b": {
+        "register": "Dm"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMHS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcge_u8",
+    "arguments": [
+      "uint8x8_t a",
+      "uint8x8_t b"
+    ],
+    "return_type": {
+      "value": "uint8x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8B"
+      },
+      "b": {
+        "register": "Vm.8B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMHS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcged_f64",
+    "arguments": [
+      "float64_t a",
+      "float64_t b"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      },
+      "b": {
+        "register": "Dm"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMGE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcged_s64",
+    "arguments": [
+      "int64_t a",
+      "int64_t b"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      },
+      "b": {
+        "register": "Dm"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMGE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcged_u64",
+    "arguments": [
+      "uint64_t a",
+      "uint64_t b"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      },
+      "b": {
+        "register": "Dm"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMHS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcgeh_f16",
+    "arguments": [
+      "float16_t a",
+      "float16_t b"
+    ],
+    "return_type": {
+      "value": "uint16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Hn"
+      },
+      "b": {
+        "register": "Hm"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMGE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcgeq_f16",
+    "arguments": [
+      "float16x8_t a",
+      "float16x8_t b"
+    ],
+    "return_type": {
+      "value": "uint16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      },
+      "b": {
+        "register": "Vm.8H"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMGE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcgeq_f32",
+    "arguments": [
+      "float32x4_t a",
+      "float32x4_t b"
+    ],
+    "return_type": {
+      "value": "uint32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4S"
+      },
+      "b": {
+        "register": "Vm.4S"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMGE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcgeq_f64",
+    "arguments": [
+      "float64x2_t a",
+      "float64x2_t b"
+    ],
+    "return_type": {
+      "value": "uint64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2D"
+      },
+      "b": {
+        "register": "Vm.2D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMGE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcgeq_s16",
+    "arguments": [
+      "int16x8_t a",
+      "int16x8_t b"
+    ],
+    "return_type": {
+      "value": "uint16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      },
+      "b": {
+        "register": "Vm.8H"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMGE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcgeq_s32",
+    "arguments": [
+      "int32x4_t a",
+      "int32x4_t b"
+    ],
+    "return_type": {
+      "value": "uint32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4S"
+      },
+      "b": {
+        "register": "Vm.4S"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMGE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcgeq_s64",
+    "arguments": [
+      "int64x2_t a",
+      "int64x2_t b"
+    ],
+    "return_type": {
+      "value": "uint64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2D"
+      },
+      "b": {
+        "register": "Vm.2D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMGE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcgeq_s8",
+    "arguments": [
+      "int8x16_t a",
+      "int8x16_t b"
+    ],
+    "return_type": {
+      "value": "uint8x16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      },
+      "b": {
+        "register": "Vm.16B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMGE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcgeq_u16",
+    "arguments": [
+      "uint16x8_t a",
+      "uint16x8_t b"
+    ],
+    "return_type": {
+      "value": "uint16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      },
+      "b": {
+        "register": "Vm.8H"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMHS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcgeq_u32",
+    "arguments": [
+      "uint32x4_t a",
+      "uint32x4_t b"
+    ],
+    "return_type": {
+      "value": "uint32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4S"
+      },
+      "b": {
+        "register": "Vm.4S"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMHS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcgeq_u64",
+    "arguments": [
+      "uint64x2_t a",
+      "uint64x2_t b"
+    ],
+    "return_type": {
+      "value": "uint64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2D"
+      },
+      "b": {
+        "register": "Vm.2D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMHS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcgeq_u8",
+    "arguments": [
+      "uint8x16_t a",
+      "uint8x16_t b"
+    ],
+    "return_type": {
+      "value": "uint8x16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      },
+      "b": {
+        "register": "Vm.16B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMHS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcges_f32",
+    "arguments": [
+      "float32_t a",
+      "float32_t b"
+    ],
+    "return_type": {
+      "value": "uint32_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Sn"
+      },
+      "b": {
+        "register": "Sm"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMGE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcgez_f16",
+    "arguments": [
+      "float16x4_t a"
+    ],
+    "return_type": {
+      "value": "uint16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4H"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMGE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcgez_f32",
+    "arguments": [
+      "float32x2_t a"
+    ],
+    "return_type": {
+      "value": "uint32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMGE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcgez_f64",
+    "arguments": [
+      "float64x1_t a"
+    ],
+    "return_type": {
+      "value": "uint64x1_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMGE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcgez_s16",
+    "arguments": [
+      "int16x4_t a"
+    ],
+    "return_type": {
+      "value": "uint16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMGE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcgez_s32",
+    "arguments": [
+      "int32x2_t a"
+    ],
+    "return_type": {
+      "value": "uint32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMGE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcgez_s64",
+    "arguments": [
+      "int64x1_t a"
+    ],
+    "return_type": {
+      "value": "uint64x1_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMGE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcgez_s8",
+    "arguments": [
+      "int8x8_t a"
+    ],
+    "return_type": {
+      "value": "uint8x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMGE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcgezd_f64",
+    "arguments": [
+      "float64_t a"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMGE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcgezd_s64",
+    "arguments": [
+      "int64_t a"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMGE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcgezh_f16",
+    "arguments": [
+      "float16_t a"
+    ],
+    "return_type": {
+      "value": "uint16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Hn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMGE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcgezq_f16",
+    "arguments": [
+      "float16x8_t a"
+    ],
+    "return_type": {
+      "value": "uint16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMGE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcgezq_f32",
+    "arguments": [
+      "float32x4_t a"
+    ],
+    "return_type": {
+      "value": "uint32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMGE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcgezq_f64",
+    "arguments": [
+      "float64x2_t a"
+    ],
+    "return_type": {
+      "value": "uint64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMGE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcgezq_s16",
+    "arguments": [
+      "int16x8_t a"
+    ],
+    "return_type": {
+      "value": "uint16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMGE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcgezq_s32",
+    "arguments": [
+      "int32x4_t a"
+    ],
+    "return_type": {
+      "value": "uint32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMGE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcgezq_s64",
+    "arguments": [
+      "int64x2_t a"
+    ],
+    "return_type": {
+      "value": "uint64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMGE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcgezq_s8",
+    "arguments": [
+      "int8x16_t a"
+    ],
+    "return_type": {
+      "value": "uint8x16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMGE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcgezs_f32",
+    "arguments": [
+      "float32_t a"
+    ],
+    "return_type": {
+      "value": "uint32_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Sn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMGE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcgt_f16",
+    "arguments": [
+      "float16x4_t a",
+      "float16x4_t b"
+    ],
+    "return_type": {
+      "value": "uint16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4H"
+      },
+      "b": {
+        "register": "Vm.4H"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMGT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcgt_f32",
+    "arguments": [
+      "float32x2_t a",
+      "float32x2_t b"
+    ],
+    "return_type": {
+      "value": "uint32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2S"
+      },
+      "b": {
+        "register": "Vm.2S"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMGT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcgt_f64",
+    "arguments": [
+      "float64x1_t a",
+      "float64x1_t b"
+    ],
+    "return_type": {
+      "value": "uint64x1_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      },
+      "b": {
+        "register": "Dm"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMGT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcgt_s16",
+    "arguments": [
+      "int16x4_t a",
+      "int16x4_t b"
+    ],
+    "return_type": {
+      "value": "uint16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4H"
+      },
+      "b": {
+        "register": "Vm.4H"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMGT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcgt_s32",
+    "arguments": [
+      "int32x2_t a",
+      "int32x2_t b"
+    ],
+    "return_type": {
+      "value": "uint32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2S"
+      },
+      "b": {
+        "register": "Vm.2S"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMGT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcgt_s64",
+    "arguments": [
+      "int64x1_t a",
+      "int64x1_t b"
+    ],
+    "return_type": {
+      "value": "uint64x1_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      },
+      "b": {
+        "register": "Dm"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMGT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcgt_s8",
+    "arguments": [
+      "int8x8_t a",
+      "int8x8_t b"
+    ],
+    "return_type": {
+      "value": "uint8x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8B"
+      },
+      "b": {
+        "register": "Vm.8B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMGT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcgt_u16",
+    "arguments": [
+      "uint16x4_t a",
+      "uint16x4_t b"
+    ],
+    "return_type": {
+      "value": "uint16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4H"
+      },
+      "b": {
+        "register": "Vm.4H"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMHI"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcgt_u32",
+    "arguments": [
+      "uint32x2_t a",
+      "uint32x2_t b"
+    ],
+    "return_type": {
+      "value": "uint32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2S"
+      },
+      "b": {
+        "register": "Vm.2S"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMHI"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcgt_u64",
+    "arguments": [
+      "uint64x1_t a",
+      "uint64x1_t b"
+    ],
+    "return_type": {
+      "value": "uint64x1_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      },
+      "b": {
+        "register": "Dm"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMHI"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcgt_u8",
+    "arguments": [
+      "uint8x8_t a",
+      "uint8x8_t b"
+    ],
+    "return_type": {
+      "value": "uint8x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8B"
+      },
+      "b": {
+        "register": "Vm.8B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMHI"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcgtd_f64",
+    "arguments": [
+      "float64_t a",
+      "float64_t b"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      },
+      "b": {
+        "register": "Dm"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMGT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcgtd_s64",
+    "arguments": [
+      "int64_t a",
+      "int64_t b"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      },
+      "b": {
+        "register": "Dm"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMGT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcgtd_u64",
+    "arguments": [
+      "uint64_t a",
+      "uint64_t b"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      },
+      "b": {
+        "register": "Dm"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMHI"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcgth_f16",
+    "arguments": [
+      "float16_t a",
+      "float16_t b"
+    ],
+    "return_type": {
+      "value": "uint16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Hn"
+      },
+      "b": {
+        "register": "Hm"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMGT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcgtq_f16",
+    "arguments": [
+      "float16x8_t a",
+      "float16x8_t b"
+    ],
+    "return_type": {
+      "value": "uint16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      },
+      "b": {
+        "register": "Vm.8H"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMGT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcgtq_f32",
+    "arguments": [
+      "float32x4_t a",
+      "float32x4_t b"
+    ],
+    "return_type": {
+      "value": "uint32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4S"
+      },
+      "b": {
+        "register": "Vm.4S"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMGT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcgtq_f64",
+    "arguments": [
+      "float64x2_t a",
+      "float64x2_t b"
+    ],
+    "return_type": {
+      "value": "uint64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2D"
+      },
+      "b": {
+        "register": "Vm.2D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMGT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcgtq_s16",
+    "arguments": [
+      "int16x8_t a",
+      "int16x8_t b"
+    ],
+    "return_type": {
+      "value": "uint16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      },
+      "b": {
+        "register": "Vm.8H"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMGT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcgtq_s32",
+    "arguments": [
+      "int32x4_t a",
+      "int32x4_t b"
+    ],
+    "return_type": {
+      "value": "uint32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4S"
+      },
+      "b": {
+        "register": "Vm.4S"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMGT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcgtq_s64",
+    "arguments": [
+      "int64x2_t a",
+      "int64x2_t b"
+    ],
+    "return_type": {
+      "value": "uint64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2D"
+      },
+      "b": {
+        "register": "Vm.2D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMGT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcgtq_s8",
+    "arguments": [
+      "int8x16_t a",
+      "int8x16_t b"
+    ],
+    "return_type": {
+      "value": "uint8x16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      },
+      "b": {
+        "register": "Vm.16B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMGT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcgtq_u16",
+    "arguments": [
+      "uint16x8_t a",
+      "uint16x8_t b"
+    ],
+    "return_type": {
+      "value": "uint16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      },
+      "b": {
+        "register": "Vm.8H"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMHI"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcgtq_u32",
+    "arguments": [
+      "uint32x4_t a",
+      "uint32x4_t b"
+    ],
+    "return_type": {
+      "value": "uint32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4S"
+      },
+      "b": {
+        "register": "Vm.4S"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMHI"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcgtq_u64",
+    "arguments": [
+      "uint64x2_t a",
+      "uint64x2_t b"
+    ],
+    "return_type": {
+      "value": "uint64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2D"
+      },
+      "b": {
+        "register": "Vm.2D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMHI"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcgtq_u8",
+    "arguments": [
+      "uint8x16_t a",
+      "uint8x16_t b"
+    ],
+    "return_type": {
+      "value": "uint8x16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      },
+      "b": {
+        "register": "Vm.16B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMHI"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcgts_f32",
+    "arguments": [
+      "float32_t a",
+      "float32_t b"
+    ],
+    "return_type": {
+      "value": "uint32_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Sn"
+      },
+      "b": {
+        "register": "Sm"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMGT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcgtz_f16",
+    "arguments": [
+      "float16x4_t a"
+    ],
+    "return_type": {
+      "value": "uint16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4H"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMGT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcgtz_f32",
+    "arguments": [
+      "float32x2_t a"
+    ],
+    "return_type": {
+      "value": "uint32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMGT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcgtz_f64",
+    "arguments": [
+      "float64x1_t a"
+    ],
+    "return_type": {
+      "value": "uint64x1_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMGT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcgtz_s16",
+    "arguments": [
+      "int16x4_t a"
+    ],
+    "return_type": {
+      "value": "uint16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMGT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcgtz_s32",
+    "arguments": [
+      "int32x2_t a"
+    ],
+    "return_type": {
+      "value": "uint32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMGT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcgtz_s64",
+    "arguments": [
+      "int64x1_t a"
+    ],
+    "return_type": {
+      "value": "uint64x1_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMGT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcgtz_s8",
+    "arguments": [
+      "int8x8_t a"
+    ],
+    "return_type": {
+      "value": "uint8x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMGT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcgtzd_f64",
+    "arguments": [
+      "float64_t a"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMGT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcgtzd_s64",
+    "arguments": [
+      "int64_t a"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMGT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcgtzh_f16",
+    "arguments": [
+      "float16_t a"
+    ],
+    "return_type": {
+      "value": "uint16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Hn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMGT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcgtzq_f16",
+    "arguments": [
+      "float16x8_t a"
+    ],
+    "return_type": {
+      "value": "uint16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMGT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcgtzq_f32",
+    "arguments": [
+      "float32x4_t a"
+    ],
+    "return_type": {
+      "value": "uint32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMGT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcgtzq_f64",
+    "arguments": [
+      "float64x2_t a"
+    ],
+    "return_type": {
+      "value": "uint64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMGT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcgtzq_s16",
+    "arguments": [
+      "int16x8_t a"
+    ],
+    "return_type": {
+      "value": "uint16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMGT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcgtzq_s32",
+    "arguments": [
+      "int32x4_t a"
+    ],
+    "return_type": {
+      "value": "uint32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMGT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcgtzq_s64",
+    "arguments": [
+      "int64x2_t a"
+    ],
+    "return_type": {
+      "value": "uint64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMGT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcgtzq_s8",
+    "arguments": [
+      "int8x16_t a"
+    ],
+    "return_type": {
+      "value": "uint8x16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMGT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcgtzs_f32",
+    "arguments": [
+      "float32_t a"
+    ],
+    "return_type": {
+      "value": "uint32_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Sn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMGT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcle_f16",
+    "arguments": [
+      "float16x4_t a",
+      "float16x4_t b"
+    ],
+    "return_type": {
+      "value": "uint16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4H"
+      },
+      "b": {
+        "register": "Vm.4H"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMGE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcle_f32",
+    "arguments": [
+      "float32x2_t a",
+      "float32x2_t b"
+    ],
+    "return_type": {
+      "value": "uint32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2S"
+      },
+      "b": {
+        "register": "Vm.2S"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMGE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcle_f64",
+    "arguments": [
+      "float64x1_t a",
+      "float64x1_t b"
+    ],
+    "return_type": {
+      "value": "uint64x1_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      },
+      "b": {
+        "register": "Dm"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMGE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcle_s16",
+    "arguments": [
+      "int16x4_t a",
+      "int16x4_t b"
+    ],
+    "return_type": {
+      "value": "uint16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4H"
+      },
+      "b": {
+        "register": "Vm.4H"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMGE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcle_s32",
+    "arguments": [
+      "int32x2_t a",
+      "int32x2_t b"
+    ],
+    "return_type": {
+      "value": "uint32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2S"
+      },
+      "b": {
+        "register": "Vm.2S"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMGE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcle_s64",
+    "arguments": [
+      "int64x1_t a",
+      "int64x1_t b"
+    ],
+    "return_type": {
+      "value": "uint64x1_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      },
+      "b": {
+        "register": "Dm"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMGE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcle_s8",
+    "arguments": [
+      "int8x8_t a",
+      "int8x8_t b"
+    ],
+    "return_type": {
+      "value": "uint8x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8B"
+      },
+      "b": {
+        "register": "Vm.8B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMGE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcle_u16",
+    "arguments": [
+      "uint16x4_t a",
+      "uint16x4_t b"
+    ],
+    "return_type": {
+      "value": "uint16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4H"
+      },
+      "b": {
+        "register": "Vm.4H"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMHS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcle_u32",
+    "arguments": [
+      "uint32x2_t a",
+      "uint32x2_t b"
+    ],
+    "return_type": {
+      "value": "uint32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2S"
+      },
+      "b": {
+        "register": "Vm.2S"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMHS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcle_u64",
+    "arguments": [
+      "uint64x1_t a",
+      "uint64x1_t b"
+    ],
+    "return_type": {
+      "value": "uint64x1_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      },
+      "b": {
+        "register": "Dm"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMHS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcle_u8",
+    "arguments": [
+      "uint8x8_t a",
+      "uint8x8_t b"
+    ],
+    "return_type": {
+      "value": "uint8x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8B"
+      },
+      "b": {
+        "register": "Vm.8B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMHS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcled_f64",
+    "arguments": [
+      "float64_t a",
+      "float64_t b"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      },
+      "b": {
+        "register": "Dm"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMGE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcled_s64",
+    "arguments": [
+      "int64_t a",
+      "int64_t b"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      },
+      "b": {
+        "register": "Dm"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMGE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcled_u64",
+    "arguments": [
+      "uint64_t a",
+      "uint64_t b"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      },
+      "b": {
+        "register": "Dm"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMHS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcleh_f16",
+    "arguments": [
+      "float16_t a",
+      "float16_t b"
+    ],
+    "return_type": {
+      "value": "uint16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Hn"
+      },
+      "b": {
+        "register": "Hm"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMGE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcleq_f16",
+    "arguments": [
+      "float16x8_t a",
+      "float16x8_t b"
+    ],
+    "return_type": {
+      "value": "uint16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      },
+      "b": {
+        "register": "Vm.8H"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMGE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcleq_f32",
+    "arguments": [
+      "float32x4_t a",
+      "float32x4_t b"
+    ],
+    "return_type": {
+      "value": "uint32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4S"
+      },
+      "b": {
+        "register": "Vm.4S"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMGE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcleq_f64",
+    "arguments": [
+      "float64x2_t a",
+      "float64x2_t b"
+    ],
+    "return_type": {
+      "value": "uint64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2D"
+      },
+      "b": {
+        "register": "Vm.2D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMGE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcleq_s16",
+    "arguments": [
+      "int16x8_t a",
+      "int16x8_t b"
+    ],
+    "return_type": {
+      "value": "uint16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      },
+      "b": {
+        "register": "Vm.8H"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMGE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcleq_s32",
+    "arguments": [
+      "int32x4_t a",
+      "int32x4_t b"
+    ],
+    "return_type": {
+      "value": "uint32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4S"
+      },
+      "b": {
+        "register": "Vm.4S"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMGE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcleq_s64",
+    "arguments": [
+      "int64x2_t a",
+      "int64x2_t b"
+    ],
+    "return_type": {
+      "value": "uint64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2D"
+      },
+      "b": {
+        "register": "Vm.2D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMGE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcleq_s8",
+    "arguments": [
+      "int8x16_t a",
+      "int8x16_t b"
+    ],
+    "return_type": {
+      "value": "uint8x16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      },
+      "b": {
+        "register": "Vm.16B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMGE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcleq_u16",
+    "arguments": [
+      "uint16x8_t a",
+      "uint16x8_t b"
+    ],
+    "return_type": {
+      "value": "uint16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      },
+      "b": {
+        "register": "Vm.8H"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMHS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcleq_u32",
+    "arguments": [
+      "uint32x4_t a",
+      "uint32x4_t b"
+    ],
+    "return_type": {
+      "value": "uint32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4S"
+      },
+      "b": {
+        "register": "Vm.4S"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMHS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcleq_u64",
+    "arguments": [
+      "uint64x2_t a",
+      "uint64x2_t b"
+    ],
+    "return_type": {
+      "value": "uint64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2D"
+      },
+      "b": {
+        "register": "Vm.2D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMHS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcleq_u8",
+    "arguments": [
+      "uint8x16_t a",
+      "uint8x16_t b"
+    ],
+    "return_type": {
+      "value": "uint8x16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      },
+      "b": {
+        "register": "Vm.16B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMHS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcles_f32",
+    "arguments": [
+      "float32_t a",
+      "float32_t b"
+    ],
+    "return_type": {
+      "value": "uint32_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Sn"
+      },
+      "b": {
+        "register": "Sm"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMGE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vclez_f16",
+    "arguments": [
+      "float16x4_t a"
+    ],
+    "return_type": {
+      "value": "uint16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4H"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMLE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vclez_f32",
+    "arguments": [
+      "float32x2_t a"
+    ],
+    "return_type": {
+      "value": "uint32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMLE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vclez_f64",
+    "arguments": [
+      "float64x1_t a"
+    ],
+    "return_type": {
+      "value": "uint64x1_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMLE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vclez_s16",
+    "arguments": [
+      "int16x4_t a"
+    ],
+    "return_type": {
+      "value": "uint16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMLE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vclez_s32",
+    "arguments": [
+      "int32x2_t a"
+    ],
+    "return_type": {
+      "value": "uint32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMLE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vclez_s64",
+    "arguments": [
+      "int64x1_t a"
+    ],
+    "return_type": {
+      "value": "uint64x1_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMLE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vclez_s8",
+    "arguments": [
+      "int8x8_t a"
+    ],
+    "return_type": {
+      "value": "uint8x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMLE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vclezd_f64",
+    "arguments": [
+      "float64_t a"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMLE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vclezd_s64",
+    "arguments": [
+      "int64_t a"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMLE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vclezh_f16",
+    "arguments": [
+      "float16_t a"
+    ],
+    "return_type": {
+      "value": "uint16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Hn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMLE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vclezq_f16",
+    "arguments": [
+      "float16x8_t a"
+    ],
+    "return_type": {
+      "value": "uint16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMLE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vclezq_f32",
+    "arguments": [
+      "float32x4_t a"
+    ],
+    "return_type": {
+      "value": "uint32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMLE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vclezq_f64",
+    "arguments": [
+      "float64x2_t a"
+    ],
+    "return_type": {
+      "value": "uint64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMLE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vclezq_s16",
+    "arguments": [
+      "int16x8_t a"
+    ],
+    "return_type": {
+      "value": "uint16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMLE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vclezq_s32",
+    "arguments": [
+      "int32x4_t a"
+    ],
+    "return_type": {
+      "value": "uint32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMLE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vclezq_s64",
+    "arguments": [
+      "int64x2_t a"
+    ],
+    "return_type": {
+      "value": "uint64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMLE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vclezq_s8",
+    "arguments": [
+      "int8x16_t a"
+    ],
+    "return_type": {
+      "value": "uint8x16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMLE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vclezs_f32",
+    "arguments": [
+      "float32_t a"
+    ],
+    "return_type": {
+      "value": "uint32_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Sn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMLE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcls_s16",
+    "arguments": [
+      "int16x4_t a"
+    ],
+    "return_type": {
+      "value": "int16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4H"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcls_s32",
+    "arguments": [
+      "int32x2_t a"
+    ],
+    "return_type": {
+      "value": "int32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2S"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcls_s8",
+    "arguments": [
+      "int8x8_t a"
+    ],
+    "return_type": {
+      "value": "int8x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcls_u16",
+    "arguments": [
+      "uint16x4_t a"
+    ],
+    "return_type": {
+      "value": "int16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4H"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcls_u32",
+    "arguments": [
+      "uint32x2_t a"
+    ],
+    "return_type": {
+      "value": "int32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2S"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcls_u8",
+    "arguments": [
+      "uint8x8_t a"
+    ],
+    "return_type": {
+      "value": "int8x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vclsq_s16",
+    "arguments": [
+      "int16x8_t a"
+    ],
+    "return_type": {
+      "value": "int16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vclsq_s32",
+    "arguments": [
+      "int32x4_t a"
+    ],
+    "return_type": {
+      "value": "int32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4S"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vclsq_s8",
+    "arguments": [
+      "int8x16_t a"
+    ],
+    "return_type": {
+      "value": "int8x16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vclsq_u16",
+    "arguments": [
+      "uint16x8_t a"
+    ],
+    "return_type": {
+      "value": "int16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vclsq_u32",
+    "arguments": [
+      "uint32x4_t a"
+    ],
+    "return_type": {
+      "value": "int32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4S"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vclsq_u8",
+    "arguments": [
+      "uint8x16_t a"
+    ],
+    "return_type": {
+      "value": "int8x16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CLS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vclt_f16",
+    "arguments": [
+      "float16x4_t a",
+      "float16x4_t b"
+    ],
+    "return_type": {
+      "value": "uint16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4H"
+      },
+      "b": {
+        "register": "Vm.4H"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMGT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vclt_f32",
+    "arguments": [
+      "float32x2_t a",
+      "float32x2_t b"
+    ],
+    "return_type": {
+      "value": "uint32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2S"
+      },
+      "b": {
+        "register": "Vm.2S"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMGT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vclt_f64",
+    "arguments": [
+      "float64x1_t a",
+      "float64x1_t b"
+    ],
+    "return_type": {
+      "value": "uint64x1_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      },
+      "b": {
+        "register": "Dm"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMGT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vclt_s16",
+    "arguments": [
+      "int16x4_t a",
+      "int16x4_t b"
+    ],
+    "return_type": {
+      "value": "uint16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4H"
+      },
+      "b": {
+        "register": "Vm.4H"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMGT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vclt_s32",
+    "arguments": [
+      "int32x2_t a",
+      "int32x2_t b"
+    ],
+    "return_type": {
+      "value": "uint32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2S"
+      },
+      "b": {
+        "register": "Vm.2S"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMGT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vclt_s64",
+    "arguments": [
+      "int64x1_t a",
+      "int64x1_t b"
+    ],
+    "return_type": {
+      "value": "uint64x1_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      },
+      "b": {
+        "register": "Dm"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMGT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vclt_s8",
+    "arguments": [
+      "int8x8_t a",
+      "int8x8_t b"
+    ],
+    "return_type": {
+      "value": "uint8x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8B"
+      },
+      "b": {
+        "register": "Vm.8B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMGT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vclt_u16",
+    "arguments": [
+      "uint16x4_t a",
+      "uint16x4_t b"
+    ],
+    "return_type": {
+      "value": "uint16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4H"
+      },
+      "b": {
+        "register": "Vm.4H"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMHI"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vclt_u32",
+    "arguments": [
+      "uint32x2_t a",
+      "uint32x2_t b"
+    ],
+    "return_type": {
+      "value": "uint32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2S"
+      },
+      "b": {
+        "register": "Vm.2S"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMHI"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vclt_u64",
+    "arguments": [
+      "uint64x1_t a",
+      "uint64x1_t b"
+    ],
+    "return_type": {
+      "value": "uint64x1_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      },
+      "b": {
+        "register": "Dm"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMHI"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vclt_u8",
+    "arguments": [
+      "uint8x8_t a",
+      "uint8x8_t b"
+    ],
+    "return_type": {
+      "value": "uint8x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8B"
+      },
+      "b": {
+        "register": "Vm.8B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMHI"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcltd_f64",
+    "arguments": [
+      "float64_t a",
+      "float64_t b"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      },
+      "b": {
+        "register": "Dm"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMGT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcltd_s64",
+    "arguments": [
+      "int64_t a",
+      "int64_t b"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      },
+      "b": {
+        "register": "Dm"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMGT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcltd_u64",
+    "arguments": [
+      "uint64_t a",
+      "uint64_t b"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      },
+      "b": {
+        "register": "Dm"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMHI"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vclth_f16",
+    "arguments": [
+      "float16_t a",
+      "float16_t b"
+    ],
+    "return_type": {
+      "value": "uint16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Hn"
+      },
+      "b": {
+        "register": "Hm"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMGT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcltq_f16",
+    "arguments": [
+      "float16x8_t a",
+      "float16x8_t b"
+    ],
+    "return_type": {
+      "value": "uint16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      },
+      "b": {
+        "register": "Vm.8H"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMGT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcltq_f32",
+    "arguments": [
+      "float32x4_t a",
+      "float32x4_t b"
+    ],
+    "return_type": {
+      "value": "uint32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4S"
+      },
+      "b": {
+        "register": "Vm.4S"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMGT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcltq_f64",
+    "arguments": [
+      "float64x2_t a",
+      "float64x2_t b"
+    ],
+    "return_type": {
+      "value": "uint64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2D"
+      },
+      "b": {
+        "register": "Vm.2D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMGT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcltq_s16",
+    "arguments": [
+      "int16x8_t a",
+      "int16x8_t b"
+    ],
+    "return_type": {
+      "value": "uint16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      },
+      "b": {
+        "register": "Vm.8H"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMGT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcltq_s32",
+    "arguments": [
+      "int32x4_t a",
+      "int32x4_t b"
+    ],
+    "return_type": {
+      "value": "uint32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4S"
+      },
+      "b": {
+        "register": "Vm.4S"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMGT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcltq_s64",
+    "arguments": [
+      "int64x2_t a",
+      "int64x2_t b"
+    ],
+    "return_type": {
+      "value": "uint64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2D"
+      },
+      "b": {
+        "register": "Vm.2D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMGT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcltq_s8",
+    "arguments": [
+      "int8x16_t a",
+      "int8x16_t b"
+    ],
+    "return_type": {
+      "value": "uint8x16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      },
+      "b": {
+        "register": "Vm.16B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMGT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcltq_u16",
+    "arguments": [
+      "uint16x8_t a",
+      "uint16x8_t b"
+    ],
+    "return_type": {
+      "value": "uint16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      },
+      "b": {
+        "register": "Vm.8H"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMHI"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcltq_u32",
+    "arguments": [
+      "uint32x4_t a",
+      "uint32x4_t b"
+    ],
+    "return_type": {
+      "value": "uint32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4S"
+      },
+      "b": {
+        "register": "Vm.4S"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMHI"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcltq_u64",
+    "arguments": [
+      "uint64x2_t a",
+      "uint64x2_t b"
+    ],
+    "return_type": {
+      "value": "uint64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2D"
+      },
+      "b": {
+        "register": "Vm.2D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMHI"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcltq_u8",
+    "arguments": [
+      "uint8x16_t a",
+      "uint8x16_t b"
+    ],
+    "return_type": {
+      "value": "uint8x16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      },
+      "b": {
+        "register": "Vm.16B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMHI"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vclts_f32",
+    "arguments": [
+      "float32_t a",
+      "float32_t b"
+    ],
+    "return_type": {
+      "value": "uint32_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Sn"
+      },
+      "b": {
+        "register": "Sm"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMGT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcltz_f16",
+    "arguments": [
+      "float16x4_t a"
+    ],
+    "return_type": {
+      "value": "uint16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4H"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcltz_f32",
+    "arguments": [
+      "float32x2_t a"
+    ],
+    "return_type": {
+      "value": "uint32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcltz_f64",
+    "arguments": [
+      "float64x1_t a"
+    ],
+    "return_type": {
+      "value": "uint64x1_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcltz_s16",
+    "arguments": [
+      "int16x4_t a"
+    ],
+    "return_type": {
+      "value": "uint16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcltz_s32",
+    "arguments": [
+      "int32x2_t a"
+    ],
+    "return_type": {
+      "value": "uint32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcltz_s64",
+    "arguments": [
+      "int64x1_t a"
+    ],
+    "return_type": {
+      "value": "uint64x1_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcltz_s8",
+    "arguments": [
+      "int8x8_t a"
+    ],
+    "return_type": {
+      "value": "uint8x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcltzd_f64",
+    "arguments": [
+      "float64_t a"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcltzd_s64",
+    "arguments": [
+      "int64_t a"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcltzh_f16",
+    "arguments": [
+      "float16_t a"
+    ],
+    "return_type": {
+      "value": "uint16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Hn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcltzq_f16",
+    "arguments": [
+      "float16x8_t a"
+    ],
+    "return_type": {
+      "value": "uint16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcltzq_f32",
+    "arguments": [
+      "float32x4_t a"
+    ],
+    "return_type": {
+      "value": "uint32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcltzq_f64",
+    "arguments": [
+      "float64x2_t a"
+    ],
+    "return_type": {
+      "value": "uint64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcltzq_s16",
+    "arguments": [
+      "int16x8_t a"
+    ],
+    "return_type": {
+      "value": "uint16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcltzq_s32",
+    "arguments": [
+      "int32x4_t a"
+    ],
+    "return_type": {
+      "value": "uint32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcltzq_s64",
+    "arguments": [
+      "int64x2_t a"
+    ],
+    "return_type": {
+      "value": "uint64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcltzq_s8",
+    "arguments": [
+      "int8x16_t a"
+    ],
+    "return_type": {
+      "value": "uint8x16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CMLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcltzs_f32",
+    "arguments": [
+      "float32_t a"
+    ],
+    "return_type": {
+      "value": "uint32_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Sn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMLT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vclz_s16",
+    "arguments": [
+      "int16x4_t a"
+    ],
+    "return_type": {
+      "value": "int16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4H"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CLZ"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vclz_s32",
+    "arguments": [
+      "int32x2_t a"
+    ],
+    "return_type": {
+      "value": "int32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2S"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CLZ"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vclz_s8",
+    "arguments": [
+      "int8x8_t a"
+    ],
+    "return_type": {
+      "value": "int8x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CLZ"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vclz_u16",
+    "arguments": [
+      "uint16x4_t a"
+    ],
+    "return_type": {
+      "value": "uint16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4H"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CLZ"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vclz_u32",
+    "arguments": [
+      "uint32x2_t a"
+    ],
+    "return_type": {
+      "value": "uint32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2S"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CLZ"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vclz_u8",
+    "arguments": [
+      "uint8x8_t a"
+    ],
+    "return_type": {
+      "value": "uint8x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CLZ"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vclzq_s16",
+    "arguments": [
+      "int16x8_t a"
+    ],
+    "return_type": {
+      "value": "int16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CLZ"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vclzq_s32",
+    "arguments": [
+      "int32x4_t a"
+    ],
+    "return_type": {
+      "value": "int32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4S"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CLZ"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vclzq_s8",
+    "arguments": [
+      "int8x16_t a"
+    ],
+    "return_type": {
+      "value": "int8x16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CLZ"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vclzq_u16",
+    "arguments": [
+      "uint16x8_t a"
+    ],
+    "return_type": {
+      "value": "uint16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CLZ"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vclzq_u32",
+    "arguments": [
+      "uint32x4_t a"
+    ],
+    "return_type": {
+      "value": "uint32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4S"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CLZ"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vclzq_u8",
+    "arguments": [
+      "uint8x16_t a"
+    ],
+    "return_type": {
+      "value": "uint8x16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CLZ"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcmla_f16",
+    "arguments": [
+      "float16x4_t r",
+      "float16x4_t a",
+      "float16x4_t b"
+    ],
+    "return_type": {
+      "value": "float16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4H"
+      },
+      "b": {
+        "register": "Vm.4H"
+      },
+      "r": {
+        "register": "Vd.4H"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcmla_f32",
+    "arguments": [
+      "float32x2_t r",
+      "float32x2_t a",
+      "float32x2_t b"
+    ],
+    "return_type": {
+      "value": "float32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2S"
+      },
+      "b": {
+        "register": "Vm.2S"
+      },
+      "r": {
+        "register": "Vd.2S"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcmla_lane_f16",
+    "arguments": [
+      "float16x4_t r",
+      "float16x4_t a",
+      "float16x4_t b",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "float16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4H"
+      },
+      "b": {
+        "register": "Vm.H"
+      },
+      "lane": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "r": {
+        "register": "Vd.4H"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcmla_lane_f32",
+    "arguments": [
+      "float32x2_t r",
+      "float32x2_t a",
+      "float32x2_t b",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "float32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2S"
+      },
+      "b": {
+        "register": "Vm.S"
+      },
+      "lane": {
+        "minimum": 0,
+        "maximum": 0
+      },
+      "r": {
+        "register": "Vd.2S"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcmla_laneq_f16",
+    "arguments": [
+      "float16x4_t r",
+      "float16x4_t a",
+      "float16x8_t b",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "float16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4H"
+      },
+      "b": {
+        "register": "Vm.H"
+      },
+      "lane": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "r": {
+        "register": "Vd.4H"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcmla_laneq_f32",
+    "arguments": [
+      "float32x2_t r",
+      "float32x2_t a",
+      "float32x4_t b",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "float32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2S"
+      },
+      "b": {
+        "register": "Vm.S"
+      },
+      "lane": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "r": {
+        "register": "Vd.2S"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP",
+        ""
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcmla_rot180_f16",
+    "arguments": [
+      "float16x4_t r",
+      "float16x4_t a",
+      "float16x4_t b"
+    ],
+    "return_type": {
+      "value": "float16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4H"
+      },
+      "b": {
+        "register": "Vm.4H"
+      },
+      "r": {
+        "register": "Vd.4H"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcmla_rot180_f32",
+    "arguments": [
+      "float32x2_t r",
+      "float32x2_t a",
+      "float32x2_t b"
+    ],
+    "return_type": {
+      "value": "float32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2S"
+      },
+      "b": {
+        "register": "Vm.2S"
+      },
+      "r": {
+        "register": "Vd.2S"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcmla_rot180_lane_f16",
+    "arguments": [
+      "float16x4_t r",
+      "float16x4_t a",
+      "float16x4_t b",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "float16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4H"
+      },
+      "b": {
+        "register": "Vm.H"
+      },
+      "lane": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "r": {
+        "register": "Vd.4H"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcmla_rot180_lane_f32",
+    "arguments": [
+      "float32x2_t r",
+      "float32x2_t a",
+      "float32x2_t b",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "float32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2S"
+      },
+      "b": {
+        "register": "Vm.S"
+      },
+      "lane": {
+        "minimum": 0,
+        "maximum": 0
+      },
+      "r": {
+        "register": "Vd.2S"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcmla_rot180_laneq_f16",
+    "arguments": [
+      "float16x4_t r",
+      "float16x4_t a",
+      "float16x8_t b",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "float16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4H"
+      },
+      "b": {
+        "register": "Vm.H"
+      },
+      "lane": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "r": {
+        "register": "Vd.4H"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcmla_rot180_laneq_f32",
+    "arguments": [
+      "float32x2_t r",
+      "float32x2_t a",
+      "float32x4_t b",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "float32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2S"
+      },
+      "b": {
+        "register": "Vm.S"
+      },
+      "lane": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "r": {
+        "register": "Vd.2S"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP",
+        ""
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcmla_rot270_f16",
+    "arguments": [
+      "float16x4_t r",
+      "float16x4_t a",
+      "float16x4_t b"
+    ],
+    "return_type": {
+      "value": "float16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4H"
+      },
+      "b": {
+        "register": "Vm.4H"
+      },
+      "r": {
+        "register": "Vd.4H"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcmla_rot270_f32",
+    "arguments": [
+      "float32x2_t r",
+      "float32x2_t a",
+      "float32x2_t b"
+    ],
+    "return_type": {
+      "value": "float32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2S"
+      },
+      "b": {
+        "register": "Vm.2S"
+      },
+      "r": {
+        "register": "Vd.2S"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcmla_rot270_lane_f16",
+    "arguments": [
+      "float16x4_t r",
+      "float16x4_t a",
+      "float16x4_t b",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "float16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4H"
+      },
+      "b": {
+        "register": "Vm.H"
+      },
+      "lane": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "r": {
+        "register": "Vd.4H"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcmla_rot270_lane_f32",
+    "arguments": [
+      "float32x2_t r",
+      "float32x2_t a",
+      "float32x2_t b",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "float32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2S"
+      },
+      "b": {
+        "register": "Vm.S"
+      },
+      "lane": {
+        "minimum": 0,
+        "maximum": 0
+      },
+      "r": {
+        "register": "Vd.2S"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcmla_rot270_laneq_f16",
+    "arguments": [
+      "float16x4_t r",
+      "float16x4_t a",
+      "float16x8_t b",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "float16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4H"
+      },
+      "b": {
+        "register": "Vm.H"
+      },
+      "lane": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "r": {
+        "register": "Vd.4H"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcmla_rot270_laneq_f32",
+    "arguments": [
+      "float32x2_t r",
+      "float32x2_t a",
+      "float32x4_t b",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "float32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2S"
+      },
+      "b": {
+        "register": "Vm.S"
+      },
+      "lane": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "r": {
+        "register": "Vd.2S"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP",
+        ""
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcmla_rot90_f16",
+    "arguments": [
+      "float16x4_t r",
+      "float16x4_t a",
+      "float16x4_t b"
+    ],
+    "return_type": {
+      "value": "float16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4H"
+      },
+      "b": {
+        "register": "Vm.4H"
+      },
+      "r": {
+        "register": "Vd.4H"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcmla_rot90_f32",
+    "arguments": [
+      "float32x2_t r",
+      "float32x2_t a",
+      "float32x2_t b"
+    ],
+    "return_type": {
+      "value": "float32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2S"
+      },
+      "b": {
+        "register": "Vm.2S"
+      },
+      "r": {
+        "register": "Vd.2S"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcmla_rot90_lane_f16",
+    "arguments": [
+      "float16x4_t r",
+      "float16x4_t a",
+      "float16x4_t b",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "float16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4H"
+      },
+      "b": {
+        "register": "Vm.H"
+      },
+      "lane": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "r": {
+        "register": "Vd.4H"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcmla_rot90_lane_f32",
+    "arguments": [
+      "float32x2_t r",
+      "float32x2_t a",
+      "float32x2_t b",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "float32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2S"
+      },
+      "b": {
+        "register": "Vm.S"
+      },
+      "lane": {
+        "minimum": 0,
+        "maximum": 0
+      },
+      "r": {
+        "register": "Vd.2S"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcmla_rot90_laneq_f16",
+    "arguments": [
+      "float16x4_t r",
+      "float16x4_t a",
+      "float16x8_t b",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "float16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4H"
+      },
+      "b": {
+        "register": "Vm.H"
+      },
+      "lane": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "r": {
+        "register": "Vd.4H"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcmla_rot90_laneq_f32",
+    "arguments": [
+      "float32x2_t r",
+      "float32x2_t a",
+      "float32x4_t b",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "float32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2S"
+      },
+      "b": {
+        "register": "Vm.S"
+      },
+      "lane": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "r": {
+        "register": "Vd.2S"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP",
+        ""
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcmlaq_f16",
+    "arguments": [
+      "float16x8_t r",
+      "float16x8_t a",
+      "float16x8_t b"
+    ],
+    "return_type": {
+      "value": "float16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      },
+      "b": {
+        "register": "Vm.8H"
+      },
+      "r": {
+        "register": "Vd.8H"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcmlaq_f32",
+    "arguments": [
+      "float32x4_t r",
+      "float32x4_t a",
+      "float32x4_t b"
+    ],
+    "return_type": {
+      "value": "float32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4S"
+      },
+      "b": {
+        "register": "Vm.4S"
+      },
+      "r": {
+        "register": "Vd.4S"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcmlaq_f64",
+    "arguments": [
+      "float64x2_t r",
+      "float64x2_t a",
+      "float64x2_t b"
+    ],
+    "return_type": {
+      "value": "float64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2D"
+      },
+      "b": {
+        "register": "Vm.2D"
+      },
+      "r": {
+        "register": "Vd.2D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcmlaq_lane_f16",
+    "arguments": [
+      "float16x8_t r",
+      "float16x8_t a",
+      "float16x4_t b",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "float16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      },
+      "b": {
+        "register": "Vm.H"
+      },
+      "lane": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "r": {
+        "register": "Vd.8H"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcmlaq_lane_f32",
+    "arguments": [
+      "float32x4_t r",
+      "float32x4_t a",
+      "float32x2_t b",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "float32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4S"
+      },
+      "b": {
+        "register": "Vm.S"
+      },
+      "lane": {
+        "minimum": 0,
+        "maximum": 0
+      },
+      "r": {
+        "register": "Vd.4S"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcmlaq_laneq_f16",
+    "arguments": [
+      "float16x8_t r",
+      "float16x8_t a",
+      "float16x8_t b",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "float16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      },
+      "b": {
+        "register": "Vm.H"
+      },
+      "lane": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "r": {
+        "register": "Vd.8H"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcmlaq_laneq_f32",
+    "arguments": [
+      "float32x4_t r",
+      "float32x4_t a",
+      "float32x4_t b",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "float32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4S"
+      },
+      "b": {
+        "register": "Vm.S"
+      },
+      "lane": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "r": {
+        "register": "Vd.4S"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcmlaq_rot180_f16",
+    "arguments": [
+      "float16x8_t r",
+      "float16x8_t a",
+      "float16x8_t b"
+    ],
+    "return_type": {
+      "value": "float16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      },
+      "b": {
+        "register": "Vm.8H"
+      },
+      "r": {
+        "register": "Vd.8H"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcmlaq_rot180_f32",
+    "arguments": [
+      "float32x4_t r",
+      "float32x4_t a",
+      "float32x4_t b"
+    ],
+    "return_type": {
+      "value": "float32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4S"
+      },
+      "b": {
+        "register": "Vm.4S"
+      },
+      "r": {
+        "register": "Vd.4S"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcmlaq_rot180_f64",
+    "arguments": [
+      "float64x2_t r",
+      "float64x2_t a",
+      "float64x2_t b"
+    ],
+    "return_type": {
+      "value": "float64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2D"
+      },
+      "b": {
+        "register": "Vm.2D"
+      },
+      "r": {
+        "register": "Vd.2D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcmlaq_rot180_lane_f16",
+    "arguments": [
+      "float16x8_t r",
+      "float16x8_t a",
+      "float16x4_t b",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "float16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      },
+      "b": {
+        "register": "Vm.H"
+      },
+      "lane": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "r": {
+        "register": "Vd.8H"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcmlaq_rot180_lane_f32",
+    "arguments": [
+      "float32x4_t r",
+      "float32x4_t a",
+      "float32x2_t b",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "float32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4S"
+      },
+      "b": {
+        "register": "Vm.S"
+      },
+      "lane": {
+        "minimum": 0,
+        "maximum": 0
+      },
+      "r": {
+        "register": "Vd.4S"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcmlaq_rot180_laneq_f16",
+    "arguments": [
+      "float16x8_t r",
+      "float16x8_t a",
+      "float16x8_t b",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "float16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      },
+      "b": {
+        "register": "Vm.H"
+      },
+      "lane": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "r": {
+        "register": "Vd.8H"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcmlaq_rot180_laneq_f32",
+    "arguments": [
+      "float32x4_t r",
+      "float32x4_t a",
+      "float32x4_t b",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "float32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4S"
+      },
+      "b": {
+        "register": "Vm.S"
+      },
+      "lane": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "r": {
+        "register": "Vd.4S"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcmlaq_rot270_f16",
+    "arguments": [
+      "float16x8_t r",
+      "float16x8_t a",
+      "float16x8_t b"
+    ],
+    "return_type": {
+      "value": "float16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      },
+      "b": {
+        "register": "Vm.8H"
+      },
+      "r": {
+        "register": "Vd.8H"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcmlaq_rot270_f32",
+    "arguments": [
+      "float32x4_t r",
+      "float32x4_t a",
+      "float32x4_t b"
+    ],
+    "return_type": {
+      "value": "float32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4S"
+      },
+      "b": {
+        "register": "Vm.4S"
+      },
+      "r": {
+        "register": "Vd.4S"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcmlaq_rot270_f64",
+    "arguments": [
+      "float64x2_t r",
+      "float64x2_t a",
+      "float64x2_t b"
+    ],
+    "return_type": {
+      "value": "float64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2D"
+      },
+      "b": {
+        "register": "Vm.2D"
+      },
+      "r": {
+        "register": "Vd.2D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcmlaq_rot270_lane_f16",
+    "arguments": [
+      "float16x8_t r",
+      "float16x8_t a",
+      "float16x4_t b",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "float16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      },
+      "b": {
+        "register": "Vm.H"
+      },
+      "lane": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "r": {
+        "register": "Vd.8H"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcmlaq_rot270_lane_f32",
+    "arguments": [
+      "float32x4_t r",
+      "float32x4_t a",
+      "float32x2_t b",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "float32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4S"
+      },
+      "b": {
+        "register": "Vm.S"
+      },
+      "lane": {
+        "minimum": 0,
+        "maximum": 0
+      },
+      "r": {
+        "register": "Vd.4S"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcmlaq_rot270_laneq_f16",
+    "arguments": [
+      "float16x8_t r",
+      "float16x8_t a",
+      "float16x8_t b",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "float16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      },
+      "b": {
+        "register": "Vm.H"
+      },
+      "lane": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "r": {
+        "register": "Vd.8H"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcmlaq_rot270_laneq_f32",
+    "arguments": [
+      "float32x4_t r",
+      "float32x4_t a",
+      "float32x4_t b",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "float32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4S"
+      },
+      "b": {
+        "register": "Vm.S"
+      },
+      "lane": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "r": {
+        "register": "Vd.4S"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcmlaq_rot90_f16",
+    "arguments": [
+      "float16x8_t r",
+      "float16x8_t a",
+      "float16x8_t b"
+    ],
+    "return_type": {
+      "value": "float16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      },
+      "b": {
+        "register": "Vm.8H"
+      },
+      "r": {
+        "register": "Vd.8H"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcmlaq_rot90_f32",
+    "arguments": [
+      "float32x4_t r",
+      "float32x4_t a",
+      "float32x4_t b"
+    ],
+    "return_type": {
+      "value": "float32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4S"
+      },
+      "b": {
+        "register": "Vm.4S"
+      },
+      "r": {
+        "register": "Vd.4S"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcmlaq_rot90_f64",
+    "arguments": [
+      "float64x2_t r",
+      "float64x2_t a",
+      "float64x2_t b"
+    ],
+    "return_type": {
+      "value": "float64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2D"
+      },
+      "b": {
+        "register": "Vm.2D"
+      },
+      "r": {
+        "register": "Vd.2D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcmlaq_rot90_lane_f16",
+    "arguments": [
+      "float16x8_t r",
+      "float16x8_t a",
+      "float16x4_t b",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "float16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      },
+      "b": {
+        "register": "Vm.H"
+      },
+      "lane": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "r": {
+        "register": "Vd.8H"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcmlaq_rot90_lane_f32",
+    "arguments": [
+      "float32x4_t r",
+      "float32x4_t a",
+      "float32x2_t b",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "float32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4S"
+      },
+      "b": {
+        "register": "Vm.S"
+      },
+      "lane": {
+        "minimum": 0,
+        "maximum": 0
+      },
+      "r": {
+        "register": "Vd.4S"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcmlaq_rot90_laneq_f16",
+    "arguments": [
+      "float16x8_t r",
+      "float16x8_t a",
+      "float16x8_t b",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "float16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      },
+      "b": {
+        "register": "Vm.H"
+      },
+      "lane": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "r": {
+        "register": "Vd.8H"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcmlaq_rot90_laneq_f32",
+    "arguments": [
+      "float32x4_t r",
+      "float32x4_t a",
+      "float32x4_t b",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "float32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4S"
+      },
+      "b": {
+        "register": "Vm.S"
+      },
+      "lane": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "r": {
+        "register": "Vd.4S"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcnt_p8",
+    "arguments": [
+      "poly8x8_t a"
+    ],
+    "return_type": {
+      "value": "poly8x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CNT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcnt_s8",
+    "arguments": [
+      "int8x8_t a"
+    ],
+    "return_type": {
+      "value": "int8x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CNT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcnt_u8",
+    "arguments": [
+      "uint8x8_t a"
+    ],
+    "return_type": {
+      "value": "uint8x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CNT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcntq_p8",
+    "arguments": [
+      "poly8x16_t a"
+    ],
+    "return_type": {
+      "value": "poly8x16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CNT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcntq_s8",
+    "arguments": [
+      "int8x16_t a"
+    ],
+    "return_type": {
+      "value": "int8x16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CNT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcntq_u8",
+    "arguments": [
+      "uint8x16_t a"
+    ],
+    "return_type": {
+      "value": "uint8x16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "CNT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcombine_f16",
+    "arguments": [
+      "float16x4_t low",
+      "float16x4_t high"
+    ],
+    "return_type": {
+      "value": "float16x8_t"
+    },
+    "Arguments_Preparation": {
+      "high": {
+        "register": "Vm.4H"
+      },
+      "low": {
+        "register": "Vn.4H"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP",
+        "INS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcombine_f32",
+    "arguments": [
+      "float32x2_t low",
+      "float32x2_t high"
+    ],
+    "return_type": {
+      "value": "float32x4_t"
+    },
+    "Arguments_Preparation": {
+      "high": {
+        "register": "Vm.2S"
+      },
+      "low": {
+        "register": "Vn.2S"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP",
+        "INS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcombine_f64",
+    "arguments": [
+      "float64x1_t low",
+      "float64x1_t high"
+    ],
+    "return_type": {
+      "value": "float64x2_t"
+    },
+    "Arguments_Preparation": {
+      "high": {
+        "register": "Vm.1D"
+      },
+      "low": {
+        "register": "Vn.1D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP",
+        "INS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcombine_p16",
+    "arguments": [
+      "poly16x4_t low",
+      "poly16x4_t high"
+    ],
+    "return_type": {
+      "value": "poly16x8_t"
+    },
+    "Arguments_Preparation": {
+      "high": {
+        "register": "Vm.4H"
+      },
+      "low": {
+        "register": "Vn.4H"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP",
+        "INS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcombine_p64",
+    "arguments": [
+      "poly64x1_t low",
+      "poly64x1_t high"
+    ],
+    "return_type": {
+      "value": "poly64x2_t"
+    },
+    "Arguments_Preparation": {
+      "high": {
+        "register": "Vm.1D"
+      },
+      "low": {
+        "register": "Vn.1D"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP",
+        "INS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcombine_p8",
+    "arguments": [
+      "poly8x8_t low",
+      "poly8x8_t high"
+    ],
+    "return_type": {
+      "value": "poly8x16_t"
+    },
+    "Arguments_Preparation": {
+      "high": {
+        "register": "Vm.8B"
+      },
+      "low": {
+        "register": "Vn.8B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP",
+        "INS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcombine_s16",
+    "arguments": [
+      "int16x4_t low",
+      "int16x4_t high"
+    ],
+    "return_type": {
+      "value": "int16x8_t"
+    },
+    "Arguments_Preparation": {
+      "high": {
+        "register": "Vm.4H"
+      },
+      "low": {
+        "register": "Vn.4H"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP",
+        "INS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcombine_s32",
+    "arguments": [
+      "int32x2_t low",
+      "int32x2_t high"
+    ],
+    "return_type": {
+      "value": "int32x4_t"
+    },
+    "Arguments_Preparation": {
+      "high": {
+        "register": "Vm.2S"
+      },
+      "low": {
+        "register": "Vn.2S"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP",
+        "INS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcombine_s64",
+    "arguments": [
+      "int64x1_t low",
+      "int64x1_t high"
+    ],
+    "return_type": {
+      "value": "int64x2_t"
+    },
+    "Arguments_Preparation": {
+      "high": {
+        "register": "Vm.1D"
+      },
+      "low": {
+        "register": "Vn.1D"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP",
+        "INS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcombine_s8",
+    "arguments": [
+      "int8x8_t low",
+      "int8x8_t high"
+    ],
+    "return_type": {
+      "value": "int8x16_t"
+    },
+    "Arguments_Preparation": {
+      "high": {
+        "register": "Vm.8B"
+      },
+      "low": {
+        "register": "Vn.8B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP",
+        "INS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcombine_u16",
+    "arguments": [
+      "uint16x4_t low",
+      "uint16x4_t high"
+    ],
+    "return_type": {
+      "value": "uint16x8_t"
+    },
+    "Arguments_Preparation": {
+      "high": {
+        "register": "Vm.4H"
+      },
+      "low": {
+        "register": "Vn.4H"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP",
+        "INS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcombine_u32",
+    "arguments": [
+      "uint32x2_t low",
+      "uint32x2_t high"
+    ],
+    "return_type": {
+      "value": "uint32x4_t"
+    },
+    "Arguments_Preparation": {
+      "high": {
+        "register": "Vm.2S"
+      },
+      "low": {
+        "register": "Vn.2S"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP",
+        "INS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcombine_u64",
+    "arguments": [
+      "uint64x1_t low",
+      "uint64x1_t high"
+    ],
+    "return_type": {
+      "value": "uint64x2_t"
+    },
+    "Arguments_Preparation": {
+      "high": {
+        "register": "Vm.1D"
+      },
+      "low": {
+        "register": "Vn.1D"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP",
+        "INS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcombine_u8",
+    "arguments": [
+      "uint8x8_t low",
+      "uint8x8_t high"
+    ],
+    "return_type": {
+      "value": "uint8x16_t"
+    },
+    "Arguments_Preparation": {
+      "high": {
+        "register": "Vm.8B"
+      },
+      "low": {
+        "register": "Vn.8B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP",
+        "INS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcopy_lane_f32",
+    "arguments": [
+      "float32x2_t a",
+      "const int lane1",
+      "float32x2_t b",
+      "const int lane2"
+    ],
+    "return_type": {
+      "value": "float32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.2S"
+      },
+      "b": {
+        "register": "Vn.2S"
+      },
+      "lane1": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "lane2": {
+        "minimum": 0,
+        "maximum": 1
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "INS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcopy_lane_f64",
+    "arguments": [
+      "float64x1_t a",
+      "const int lane1",
+      "float64x1_t b",
+      "const int lane2"
+    ],
+    "return_type": {
+      "value": "float64x1_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "UNUSED"
+      },
+      "b": {
+        "register": "Vn.1D"
+      },
+      "lane1": {
+        "minimum": 0,
+        "maximum": 0
+      },
+      "lane2": {
+        "minimum": 0,
+        "maximum": 0
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcopy_lane_p16",
+    "arguments": [
+      "poly16x4_t a",
+      "const int lane1",
+      "poly16x4_t b",
+      "const int lane2"
+    ],
+    "return_type": {
+      "value": "poly16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.4H"
+      },
+      "b": {
+        "register": "Vn.4H"
+      },
+      "lane1": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "lane2": {
+        "minimum": 0,
+        "maximum": 3
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "INS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcopy_lane_p64",
+    "arguments": [
+      "poly64x1_t a",
+      "const int lane1",
+      "poly64x1_t b",
+      "const int lane2"
+    ],
+    "return_type": {
+      "value": "poly64x1_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "UNUSED"
+      },
+      "b": {
+        "register": "Vn.1D"
+      },
+      "lane1": {
+        "minimum": 0,
+        "maximum": 0
+      },
+      "lane2": {
+        "minimum": 0,
+        "maximum": 0
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcopy_lane_p8",
+    "arguments": [
+      "poly8x8_t a",
+      "const int lane1",
+      "poly8x8_t b",
+      "const int lane2"
+    ],
+    "return_type": {
+      "value": "poly8x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.8B"
+      },
+      "b": {
+        "register": "Vn.8B"
+      },
+      "lane1": {
+        "minimum": 0,
+        "maximum": 7
+      },
+      "lane2": {
+        "minimum": 0,
+        "maximum": 7
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "INS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcopy_lane_s16",
+    "arguments": [
+      "int16x4_t a",
+      "const int lane1",
+      "int16x4_t b",
+      "const int lane2"
+    ],
+    "return_type": {
+      "value": "int16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.4H"
+      },
+      "b": {
+        "register": "Vn.4H"
+      },
+      "lane1": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "lane2": {
+        "minimum": 0,
+        "maximum": 3
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "INS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcopy_lane_s32",
+    "arguments": [
+      "int32x2_t a",
+      "const int lane1",
+      "int32x2_t b",
+      "const int lane2"
+    ],
+    "return_type": {
+      "value": "int32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.2S"
+      },
+      "b": {
+        "register": "Vn.2S"
+      },
+      "lane1": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "lane2": {
+        "minimum": 0,
+        "maximum": 1
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "INS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcopy_lane_s64",
+    "arguments": [
+      "int64x1_t a",
+      "const int lane1",
+      "int64x1_t b",
+      "const int lane2"
+    ],
+    "return_type": {
+      "value": "int64x1_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "UNUSED"
+      },
+      "b": {
+        "register": "Vn.1D"
+      },
+      "lane1": {
+        "minimum": 0,
+        "maximum": 0
+      },
+      "lane2": {
+        "minimum": 0,
+        "maximum": 0
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcopy_lane_s8",
+    "arguments": [
+      "int8x8_t a",
+      "const int lane1",
+      "int8x8_t b",
+      "const int lane2"
+    ],
+    "return_type": {
+      "value": "int8x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.8B"
+      },
+      "b": {
+        "register": "Vn.8B"
+      },
+      "lane1": {
+        "minimum": 0,
+        "maximum": 7
+      },
+      "lane2": {
+        "minimum": 0,
+        "maximum": 7
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "INS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcopy_lane_u16",
+    "arguments": [
+      "uint16x4_t a",
+      "const int lane1",
+      "uint16x4_t b",
+      "const int lane2"
+    ],
+    "return_type": {
+      "value": "uint16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.4H"
+      },
+      "b": {
+        "register": "Vn.4H"
+      },
+      "lane1": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "lane2": {
+        "minimum": 0,
+        "maximum": 3
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "INS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcopy_lane_u32",
+    "arguments": [
+      "uint32x2_t a",
+      "const int lane1",
+      "uint32x2_t b",
+      "const int lane2"
+    ],
+    "return_type": {
+      "value": "uint32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.2S"
+      },
+      "b": {
+        "register": "Vn.2S"
+      },
+      "lane1": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "lane2": {
+        "minimum": 0,
+        "maximum": 1
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "INS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcopy_lane_u64",
+    "arguments": [
+      "uint64x1_t a",
+      "const int lane1",
+      "uint64x1_t b",
+      "const int lane2"
+    ],
+    "return_type": {
+      "value": "uint64x1_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "UNUSED"
+      },
+      "b": {
+        "register": "Vn.1D"
+      },
+      "lane1": {
+        "minimum": 0,
+        "maximum": 0
+      },
+      "lane2": {
+        "minimum": 0,
+        "maximum": 0
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcopy_lane_u8",
+    "arguments": [
+      "uint8x8_t a",
+      "const int lane1",
+      "uint8x8_t b",
+      "const int lane2"
+    ],
+    "return_type": {
+      "value": "uint8x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.8B"
+      },
+      "b": {
+        "register": "Vn.8B"
+      },
+      "lane1": {
+        "minimum": 0,
+        "maximum": 7
+      },
+      "lane2": {
+        "minimum": 0,
+        "maximum": 7
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "INS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcopy_laneq_f32",
+    "arguments": [
+      "float32x2_t a",
+      "const int lane1",
+      "float32x4_t b",
+      "const int lane2"
+    ],
+    "return_type": {
+      "value": "float32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.2S"
+      },
+      "b": {
+        "register": "Vn.4S"
+      },
+      "lane1": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "lane2": {
+        "minimum": 0,
+        "maximum": 3
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "INS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcopy_laneq_f64",
+    "arguments": [
+      "float64x1_t a",
+      "const int lane1",
+      "float64x2_t b",
+      "const int lane2"
+    ],
+    "return_type": {
+      "value": "float64x1_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "UNUSED"
+      },
+      "b": {
+        "register": "Vn.2D"
+      },
+      "lane1": {
+        "minimum": 0,
+        "maximum": 0
+      },
+      "lane2": {
+        "minimum": 0,
+        "maximum": 1
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcopy_laneq_p16",
+    "arguments": [
+      "poly16x4_t a",
+      "const int lane1",
+      "poly16x8_t b",
+      "const int lane2"
+    ],
+    "return_type": {
+      "value": "poly16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.4H"
+      },
+      "b": {
+        "register": "Vn.8H"
+      },
+      "lane1": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "lane2": {
+        "minimum": 0,
+        "maximum": 7
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "INS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcopy_laneq_p64",
+    "arguments": [
+      "poly64x1_t a",
+      "const int lane1",
+      "poly64x2_t b",
+      "const int lane2"
+    ],
+    "return_type": {
+      "value": "poly64x1_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "UNUSED"
+      },
+      "b": {
+        "register": "Vn.2D"
+      },
+      "lane1": {
+        "minimum": 0,
+        "maximum": 0
+      },
+      "lane2": {
+        "minimum": 0,
+        "maximum": 1
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcopy_laneq_p8",
+    "arguments": [
+      "poly8x8_t a",
+      "const int lane1",
+      "poly8x16_t b",
+      "const int lane2"
+    ],
+    "return_type": {
+      "value": "poly8x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.8B"
+      },
+      "b": {
+        "register": "Vn.16B"
+      },
+      "lane1": {
+        "minimum": 0,
+        "maximum": 7
+      },
+      "lane2": {
+        "minimum": 0,
+        "maximum": 15
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "INS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcopy_laneq_s16",
+    "arguments": [
+      "int16x4_t a",
+      "const int lane1",
+      "int16x8_t b",
+      "const int lane2"
+    ],
+    "return_type": {
+      "value": "int16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.4H"
+      },
+      "b": {
+        "register": "Vn.8H"
+      },
+      "lane1": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "lane2": {
+        "minimum": 0,
+        "maximum": 7
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "INS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcopy_laneq_s32",
+    "arguments": [
+      "int32x2_t a",
+      "const int lane1",
+      "int32x4_t b",
+      "const int lane2"
+    ],
+    "return_type": {
+      "value": "int32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.2S"
+      },
+      "b": {
+        "register": "Vn.4S"
+      },
+      "lane1": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "lane2": {
+        "minimum": 0,
+        "maximum": 3
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "INS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcopy_laneq_s64",
+    "arguments": [
+      "int64x1_t a",
+      "const int lane1",
+      "int64x2_t b",
+      "const int lane2"
+    ],
+    "return_type": {
+      "value": "int64x1_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "UNUSED"
+      },
+      "b": {
+        "register": "Vn.2D"
+      },
+      "lane1": {
+        "minimum": 0,
+        "maximum": 0
+      },
+      "lane2": {
+        "minimum": 0,
+        "maximum": 1
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcopy_laneq_s8",
+    "arguments": [
+      "int8x8_t a",
+      "const int lane1",
+      "int8x16_t b",
+      "const int lane2"
+    ],
+    "return_type": {
+      "value": "int8x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.8B"
+      },
+      "b": {
+        "register": "Vn.16B"
+      },
+      "lane1": {
+        "minimum": 0,
+        "maximum": 7
+      },
+      "lane2": {
+        "minimum": 0,
+        "maximum": 15
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "INS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcopy_laneq_u16",
+    "arguments": [
+      "uint16x4_t a",
+      "const int lane1",
+      "uint16x8_t b",
+      "const int lane2"
+    ],
+    "return_type": {
+      "value": "uint16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.4H"
+      },
+      "b": {
+        "register": "Vn.8H"
+      },
+      "lane1": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "lane2": {
+        "minimum": 0,
+        "maximum": 7
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "INS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcopy_laneq_u32",
+    "arguments": [
+      "uint32x2_t a",
+      "const int lane1",
+      "uint32x4_t b",
+      "const int lane2"
+    ],
+    "return_type": {
+      "value": "uint32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.2S"
+      },
+      "b": {
+        "register": "Vn.4S"
+      },
+      "lane1": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "lane2": {
+        "minimum": 0,
+        "maximum": 3
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "INS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcopy_laneq_u64",
+    "arguments": [
+      "uint64x1_t a",
+      "const int lane1",
+      "uint64x2_t b",
+      "const int lane2"
+    ],
+    "return_type": {
+      "value": "uint64x1_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "UNUSED"
+      },
+      "b": {
+        "register": "Vn.2D"
+      },
+      "lane1": {
+        "minimum": 0,
+        "maximum": 0
+      },
+      "lane2": {
+        "minimum": 0,
+        "maximum": 1
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcopy_laneq_u8",
+    "arguments": [
+      "uint8x8_t a",
+      "const int lane1",
+      "uint8x16_t b",
+      "const int lane2"
+    ],
+    "return_type": {
+      "value": "uint8x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.8B"
+      },
+      "b": {
+        "register": "Vn.16B"
+      },
+      "lane1": {
+        "minimum": 0,
+        "maximum": 7
+      },
+      "lane2": {
+        "minimum": 0,
+        "maximum": 15
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "INS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcopyq_lane_f32",
+    "arguments": [
+      "float32x4_t a",
+      "const int lane1",
+      "float32x2_t b",
+      "const int lane2"
+    ],
+    "return_type": {
+      "value": "float32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.4S"
+      },
+      "b": {
+        "register": "Vn.2S"
+      },
+      "lane1": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "lane2": {
+        "minimum": 0,
+        "maximum": 1
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "INS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcopyq_lane_f64",
+    "arguments": [
+      "float64x2_t a",
+      "const int lane1",
+      "float64x1_t b",
+      "const int lane2"
+    ],
+    "return_type": {
+      "value": "float64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.2D"
+      },
+      "b": {
+        "register": "Vn.1D"
+      },
+      "lane1": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "lane2": {
+        "minimum": 0,
+        "maximum": 0
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "INS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcopyq_lane_p16",
+    "arguments": [
+      "poly16x8_t a",
+      "const int lane1",
+      "poly16x4_t b",
+      "const int lane2"
+    ],
+    "return_type": {
+      "value": "poly16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.8H"
+      },
+      "b": {
+        "register": "Vn.4H"
+      },
+      "lane1": {
+        "minimum": 0,
+        "maximum": 7
+      },
+      "lane2": {
+        "minimum": 0,
+        "maximum": 3
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "INS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcopyq_lane_p64",
+    "arguments": [
+      "poly64x2_t a",
+      "const int lane1",
+      "poly64x1_t b",
+      "const int lane2"
+    ],
+    "return_type": {
+      "value": "poly64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.2D"
+      },
+      "b": {
+        "register": "Vn.1D"
+      },
+      "lane1": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "lane2": {
+        "minimum": 0,
+        "maximum": 0
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "INS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcopyq_lane_p8",
+    "arguments": [
+      "poly8x16_t a",
+      "const int lane1",
+      "poly8x8_t b",
+      "const int lane2"
+    ],
+    "return_type": {
+      "value": "poly8x16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.16B"
+      },
+      "b": {
+        "register": "Vn.8B"
+      },
+      "lane1": {
+        "minimum": 0,
+        "maximum": 15
+      },
+      "lane2": {
+        "minimum": 0,
+        "maximum": 7
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "INS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcopyq_lane_s16",
+    "arguments": [
+      "int16x8_t a",
+      "const int lane1",
+      "int16x4_t b",
+      "const int lane2"
+    ],
+    "return_type": {
+      "value": "int16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.8H"
+      },
+      "b": {
+        "register": "Vn.4H"
+      },
+      "lane1": {
+        "minimum": 0,
+        "maximum": 7
+      },
+      "lane2": {
+        "minimum": 0,
+        "maximum": 3
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "INS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcopyq_lane_s32",
+    "arguments": [
+      "int32x4_t a",
+      "const int lane1",
+      "int32x2_t b",
+      "const int lane2"
+    ],
+    "return_type": {
+      "value": "int32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.4S"
+      },
+      "b": {
+        "register": "Vn.2S"
+      },
+      "lane1": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "lane2": {
+        "minimum": 0,
+        "maximum": 1
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "INS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcopyq_lane_s64",
+    "arguments": [
+      "int64x2_t a",
+      "const int lane1",
+      "int64x1_t b",
+      "const int lane2"
+    ],
+    "return_type": {
+      "value": "int64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.2D"
+      },
+      "b": {
+        "register": "Vn.1D"
+      },
+      "lane1": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "lane2": {
+        "minimum": 0,
+        "maximum": 0
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "INS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcopyq_lane_s8",
+    "arguments": [
+      "int8x16_t a",
+      "const int lane1",
+      "int8x8_t b",
+      "const int lane2"
+    ],
+    "return_type": {
+      "value": "int8x16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.16B"
+      },
+      "b": {
+        "register": "Vn.8B"
+      },
+      "lane1": {
+        "minimum": 0,
+        "maximum": 15
+      },
+      "lane2": {
+        "minimum": 0,
+        "maximum": 7
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "INS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcopyq_lane_u16",
+    "arguments": [
+      "uint16x8_t a",
+      "const int lane1",
+      "uint16x4_t b",
+      "const int lane2"
+    ],
+    "return_type": {
+      "value": "uint16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.8H"
+      },
+      "b": {
+        "register": "Vn.4H"
+      },
+      "lane1": {
+        "minimum": 0,
+        "maximum": 7
+      },
+      "lane2": {
+        "minimum": 0,
+        "maximum": 3
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "INS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcopyq_lane_u32",
+    "arguments": [
+      "uint32x4_t a",
+      "const int lane1",
+      "uint32x2_t b",
+      "const int lane2"
+    ],
+    "return_type": {
+      "value": "uint32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.4S"
+      },
+      "b": {
+        "register": "Vn.2S"
+      },
+      "lane1": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "lane2": {
+        "minimum": 0,
+        "maximum": 1
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "INS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcopyq_lane_u64",
+    "arguments": [
+      "uint64x2_t a",
+      "const int lane1",
+      "uint64x1_t b",
+      "const int lane2"
+    ],
+    "return_type": {
+      "value": "uint64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.2D"
+      },
+      "b": {
+        "register": "Vn.1D"
+      },
+      "lane1": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "lane2": {
+        "minimum": 0,
+        "maximum": 0
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "INS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcopyq_lane_u8",
+    "arguments": [
+      "uint8x16_t a",
+      "const int lane1",
+      "uint8x8_t b",
+      "const int lane2"
+    ],
+    "return_type": {
+      "value": "uint8x16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.16B"
+      },
+      "b": {
+        "register": "Vn.8B"
+      },
+      "lane1": {
+        "minimum": 0,
+        "maximum": 15
+      },
+      "lane2": {
+        "minimum": 0,
+        "maximum": 7
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "INS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcopyq_laneq_f32",
+    "arguments": [
+      "float32x4_t a",
+      "const int lane1",
+      "float32x4_t b",
+      "const int lane2"
+    ],
+    "return_type": {
+      "value": "float32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.4S"
+      },
+      "b": {
+        "register": "Vn.4S"
+      },
+      "lane1": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "lane2": {
+        "minimum": 0,
+        "maximum": 3
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "INS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcopyq_laneq_f64",
+    "arguments": [
+      "float64x2_t a",
+      "const int lane1",
+      "float64x2_t b",
+      "const int lane2"
+    ],
+    "return_type": {
+      "value": "float64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.2D"
+      },
+      "b": {
+        "register": "Vn.2D"
+      },
+      "lane1": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "lane2": {
+        "minimum": 0,
+        "maximum": 1
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "INS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcopyq_laneq_p16",
+    "arguments": [
+      "poly16x8_t a",
+      "const int lane1",
+      "poly16x8_t b",
+      "const int lane2"
+    ],
+    "return_type": {
+      "value": "poly16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.8H"
+      },
+      "b": {
+        "register": "Vn.8H"
+      },
+      "lane1": {
+        "minimum": 0,
+        "maximum": 7
+      },
+      "lane2": {
+        "minimum": 0,
+        "maximum": 7
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "INS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcopyq_laneq_p64",
+    "arguments": [
+      "poly64x2_t a",
+      "const int lane1",
+      "poly64x2_t b",
+      "const int lane2"
+    ],
+    "return_type": {
+      "value": "poly64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.2D"
+      },
+      "b": {
+        "register": "Vn.2D"
+      },
+      "lane1": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "lane2": {
+        "minimum": 0,
+        "maximum": 1
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "INS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcopyq_laneq_p8",
+    "arguments": [
+      "poly8x16_t a",
+      "const int lane1",
+      "poly8x16_t b",
+      "const int lane2"
+    ],
+    "return_type": {
+      "value": "poly8x16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.16B"
+      },
+      "b": {
+        "register": "Vn.16B"
+      },
+      "lane1": {
+        "minimum": 0,
+        "maximum": 15
+      },
+      "lane2": {
+        "minimum": 0,
+        "maximum": 15
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "INS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcopyq_laneq_s16",
+    "arguments": [
+      "int16x8_t a",
+      "const int lane1",
+      "int16x8_t b",
+      "const int lane2"
+    ],
+    "return_type": {
+      "value": "int16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.8H"
+      },
+      "b": {
+        "register": "Vn.8H"
+      },
+      "lane1": {
+        "minimum": 0,
+        "maximum": 7
+      },
+      "lane2": {
+        "minimum": 0,
+        "maximum": 7
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "INS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcopyq_laneq_s32",
+    "arguments": [
+      "int32x4_t a",
+      "const int lane1",
+      "int32x4_t b",
+      "const int lane2"
+    ],
+    "return_type": {
+      "value": "int32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.4S"
+      },
+      "b": {
+        "register": "Vn.4S"
+      },
+      "lane1": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "lane2": {
+        "minimum": 0,
+        "maximum": 3
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "INS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcopyq_laneq_s64",
+    "arguments": [
+      "int64x2_t a",
+      "const int lane1",
+      "int64x2_t b",
+      "const int lane2"
+    ],
+    "return_type": {
+      "value": "int64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.2D"
+      },
+      "b": {
+        "register": "Vn.2D"
+      },
+      "lane1": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "lane2": {
+        "minimum": 0,
+        "maximum": 1
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "INS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcopyq_laneq_s8",
+    "arguments": [
+      "int8x16_t a",
+      "const int lane1",
+      "int8x16_t b",
+      "const int lane2"
+    ],
+    "return_type": {
+      "value": "int8x16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.16B"
+      },
+      "b": {
+        "register": "Vn.16B"
+      },
+      "lane1": {
+        "minimum": 0,
+        "maximum": 15
+      },
+      "lane2": {
+        "minimum": 0,
+        "maximum": 15
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "INS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcopyq_laneq_u16",
+    "arguments": [
+      "uint16x8_t a",
+      "const int lane1",
+      "uint16x8_t b",
+      "const int lane2"
+    ],
+    "return_type": {
+      "value": "uint16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.8H"
+      },
+      "b": {
+        "register": "Vn.8H"
+      },
+      "lane1": {
+        "minimum": 0,
+        "maximum": 7
+      },
+      "lane2": {
+        "minimum": 0,
+        "maximum": 7
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "INS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcopyq_laneq_u32",
+    "arguments": [
+      "uint32x4_t a",
+      "const int lane1",
+      "uint32x4_t b",
+      "const int lane2"
+    ],
+    "return_type": {
+      "value": "uint32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.4S"
+      },
+      "b": {
+        "register": "Vn.4S"
+      },
+      "lane1": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "lane2": {
+        "minimum": 0,
+        "maximum": 3
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "INS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcopyq_laneq_u64",
+    "arguments": [
+      "uint64x2_t a",
+      "const int lane1",
+      "uint64x2_t b",
+      "const int lane2"
+    ],
+    "return_type": {
+      "value": "uint64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.2D"
+      },
+      "b": {
+        "register": "Vn.2D"
+      },
+      "lane1": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "lane2": {
+        "minimum": 0,
+        "maximum": 1
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "INS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcopyq_laneq_u8",
+    "arguments": [
+      "uint8x16_t a",
+      "const int lane1",
+      "uint8x16_t b",
+      "const int lane2"
+    ],
+    "return_type": {
+      "value": "uint8x16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.16B"
+      },
+      "b": {
+        "register": "Vn.16B"
+      },
+      "lane1": {
+        "minimum": 0,
+        "maximum": 15
+      },
+      "lane2": {
+        "minimum": 0,
+        "maximum": 15
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "INS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcreate_f16",
+    "arguments": [
+      "uint64_t a"
+    ],
+    "return_type": {
+      "value": "float16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Xn"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "INS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcreate_f32",
+    "arguments": [
+      "uint64_t a"
+    ],
+    "return_type": {
+      "value": "float32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Xn"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "INS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcreate_f64",
+    "arguments": [
+      "uint64_t a"
+    ],
+    "return_type": {
+      "value": "float64x1_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Xn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "INS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcreate_p16",
+    "arguments": [
+      "uint64_t a"
+    ],
+    "return_type": {
+      "value": "poly16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Xn"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "INS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcreate_p64",
+    "arguments": [
+      "uint64_t a"
+    ],
+    "return_type": {
+      "value": "poly64x1_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Xn"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "INS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcreate_p8",
+    "arguments": [
+      "uint64_t a"
+    ],
+    "return_type": {
+      "value": "poly8x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Xn"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "INS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcreate_s16",
+    "arguments": [
+      "uint64_t a"
+    ],
+    "return_type": {
+      "value": "int16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Xn"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "INS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcreate_s32",
+    "arguments": [
+      "uint64_t a"
+    ],
+    "return_type": {
+      "value": "int32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Xn"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "INS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcreate_s64",
+    "arguments": [
+      "uint64_t a"
+    ],
+    "return_type": {
+      "value": "int64x1_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Xn"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "INS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcreate_s8",
+    "arguments": [
+      "uint64_t a"
+    ],
+    "return_type": {
+      "value": "int8x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Xn"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "INS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcreate_u16",
+    "arguments": [
+      "uint64_t a"
+    ],
+    "return_type": {
+      "value": "uint16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Xn"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "INS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcreate_u32",
+    "arguments": [
+      "uint64_t a"
+    ],
+    "return_type": {
+      "value": "uint32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Xn"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "INS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcreate_u64",
+    "arguments": [
+      "uint64_t a"
+    ],
+    "return_type": {
+      "value": "uint64x1_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Xn"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "INS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcreate_u8",
+    "arguments": [
+      "uint64_t a"
+    ],
+    "return_type": {
+      "value": "uint8x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Xn"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "INS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvt_f16_f32",
+    "arguments": [
+      "float32x4_t a"
+    ],
+    "return_type": {
+      "value": "float16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4S"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTN"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvt_f16_s16",
+    "arguments": [
+      "int16x4_t a"
+    ],
+    "return_type": {
+      "value": "float16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4H"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SCVTF"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvt_f16_u16",
+    "arguments": [
+      "uint16x4_t a"
+    ],
+    "return_type": {
+      "value": "float16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4H"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UCVTF"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvt_f32_f16",
+    "arguments": [
+      "float16x4_t a"
+    ],
+    "return_type": {
+      "value": "float32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4H"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvt_f32_f64",
+    "arguments": [
+      "float64x2_t a"
+    ],
+    "return_type": {
+      "value": "float32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTN"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvt_f32_s32",
+    "arguments": [
+      "int32x2_t a"
+    ],
+    "return_type": {
+      "value": "float32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2S"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SCVTF"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvt_f32_u32",
+    "arguments": [
+      "uint32x2_t a"
+    ],
+    "return_type": {
+      "value": "float32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2S"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UCVTF"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvt_f64_f32",
+    "arguments": [
+      "float32x2_t a"
+    ],
+    "return_type": {
+      "value": "float64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvt_f64_s64",
+    "arguments": [
+      "int64x1_t a"
+    ],
+    "return_type": {
+      "value": "float64x1_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SCVTF"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvt_f64_u64",
+    "arguments": [
+      "uint64x1_t a"
+    ],
+    "return_type": {
+      "value": "float64x1_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UCVTF"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvt_high_f16_f32",
+    "arguments": [
+      "float16x4_t r",
+      "float32x4_t a"
+    ],
+    "return_type": {
+      "value": "float16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4S"
+      },
+      "r": {
+        "register": "Vd.4H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTN2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvt_high_f32_f16",
+    "arguments": [
+      "float16x8_t a"
+    ],
+    "return_type": {
+      "value": "float32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTL2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvt_high_f32_f64",
+    "arguments": [
+      "float32x2_t r",
+      "float64x2_t a"
+    ],
+    "return_type": {
+      "value": "float32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2D"
+      },
+      "r": {
+        "register": "Vd.2S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTN2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvt_high_f64_f32",
+    "arguments": [
+      "float32x4_t a"
+    ],
+    "return_type": {
+      "value": "float64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTL2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvt_n_f16_s16",
+    "arguments": [
+      "int16x4_t a",
+      "const int n"
+    ],
+    "return_type": {
+      "value": "float16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4H"
+      },
+      "n": {
+        "minimum": 1,
+        "maximum": 16
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SCVTF"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvt_n_f16_u16",
+    "arguments": [
+      "uint16x4_t a",
+      "const int n"
+    ],
+    "return_type": {
+      "value": "float16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4H"
+      },
+      "n": {
+        "minimum": 1,
+        "maximum": 16
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UCVTF"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvt_n_f32_s32",
+    "arguments": [
+      "int32x2_t a",
+      "const int n"
+    ],
+    "return_type": {
+      "value": "float32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2S"
+      },
+      "n": {
+        "minimum": 1,
+        "maximum": 32
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SCVTF"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvt_n_f32_u32",
+    "arguments": [
+      "uint32x2_t a",
+      "const int n"
+    ],
+    "return_type": {
+      "value": "float32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2S"
+      },
+      "n": {
+        "minimum": 1,
+        "maximum": 32
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UCVTF"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvt_n_f64_s64",
+    "arguments": [
+      "int64x1_t a",
+      "const int n"
+    ],
+    "return_type": {
+      "value": "float64x1_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      },
+      "n": {
+        "minimum": 1,
+        "maximum": 64
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SCVTF"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvt_n_f64_u64",
+    "arguments": [
+      "uint64x1_t a",
+      "const int n"
+    ],
+    "return_type": {
+      "value": "float64x1_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      },
+      "n": {
+        "minimum": 1,
+        "maximum": 64
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UCVTF"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvt_n_s16_f16",
+    "arguments": [
+      "float16x4_t a",
+      "const int n"
+    ],
+    "return_type": {
+      "value": "int16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4H"
+      },
+      "n": {
+        "minimum": 1,
+        "maximum": 16
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTZS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvt_n_s32_f32",
+    "arguments": [
+      "float32x2_t a",
+      "const int n"
+    ],
+    "return_type": {
+      "value": "int32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2S"
+      },
+      "n": {
+        "minimum": 1,
+        "maximum": 32
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTZS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvt_n_s64_f64",
+    "arguments": [
+      "float64x1_t a",
+      "const int n"
+    ],
+    "return_type": {
+      "value": "int64x1_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      },
+      "n": {
+        "minimum": 1,
+        "maximum": 64
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTZS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvt_n_u16_f16",
+    "arguments": [
+      "float16x4_t a",
+      "const int n"
+    ],
+    "return_type": {
+      "value": "uint16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4H"
+      },
+      "n": {
+        "minimum": 1,
+        "maximum": 16
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTZU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvt_n_u32_f32",
+    "arguments": [
+      "float32x2_t a",
+      "const int n"
+    ],
+    "return_type": {
+      "value": "uint32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2S"
+      },
+      "n": {
+        "minimum": 1,
+        "maximum": 32
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTZU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvt_n_u64_f64",
+    "arguments": [
+      "float64x1_t a",
+      "const int n"
+    ],
+    "return_type": {
+      "value": "uint64x1_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      },
+      "n": {
+        "minimum": 1,
+        "maximum": 64
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTZU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvt_s16_f16",
+    "arguments": [
+      "float16x4_t a"
+    ],
+    "return_type": {
+      "value": "int16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4H"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTZS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvt_s32_f32",
+    "arguments": [
+      "float32x2_t a"
+    ],
+    "return_type": {
+      "value": "int32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2S"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTZS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvt_s64_f64",
+    "arguments": [
+      "float64x1_t a"
+    ],
+    "return_type": {
+      "value": "int64x1_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTZS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvt_u16_f16",
+    "arguments": [
+      "float16x4_t a"
+    ],
+    "return_type": {
+      "value": "uint16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4H"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTZS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvt_u32_f32",
+    "arguments": [
+      "float32x2_t a"
+    ],
+    "return_type": {
+      "value": "uint32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2S"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTZU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvt_u64_f64",
+    "arguments": [
+      "float64x1_t a"
+    ],
+    "return_type": {
+      "value": "uint64x1_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTZU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvta_s16_f16",
+    "arguments": [
+      "float16x4_t a"
+    ],
+    "return_type": {
+      "value": "int16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4H"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTAS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvta_s32_f32",
+    "arguments": [
+      "float32x2_t a"
+    ],
+    "return_type": {
+      "value": "int32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2S"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTAS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvta_s64_f64",
+    "arguments": [
+      "float64x1_t a"
+    ],
+    "return_type": {
+      "value": "int64x1_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTAS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvta_u16_f16",
+    "arguments": [
+      "float16x4_t a"
+    ],
+    "return_type": {
+      "value": "uint16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4H"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTAU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvta_u32_f32",
+    "arguments": [
+      "float32x2_t a"
+    ],
+    "return_type": {
+      "value": "uint32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2S"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTAU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvta_u64_f64",
+    "arguments": [
+      "float64x1_t a"
+    ],
+    "return_type": {
+      "value": "uint64x1_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTAU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtad_s32_f64",
+    "arguments": [
+      "float64_t a"
+    ],
+    "return_type": {
+      "value": "int32_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTAS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtad_s64_f64",
+    "arguments": [
+      "float64_t a"
+    ],
+    "return_type": {
+      "value": "int64_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTAS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtad_u32_f64",
+    "arguments": [
+      "float64_t a"
+    ],
+    "return_type": {
+      "value": "uint32_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTAU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtad_u64_f64",
+    "arguments": [
+      "float64_t a"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTAU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtah_s16_f16",
+    "arguments": [
+      "float16_t a"
+    ],
+    "return_type": {
+      "value": "int16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Hn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTAS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtah_s32_f16",
+    "arguments": [
+      "float16_t a"
+    ],
+    "return_type": {
+      "value": "int32_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Hn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTAS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtah_s64_f16",
+    "arguments": [
+      "float16_t a"
+    ],
+    "return_type": {
+      "value": "int64_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Hn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTAS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtah_u16_f16",
+    "arguments": [
+      "float16_t a"
+    ],
+    "return_type": {
+      "value": "uint16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Hn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTAU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtah_u32_f16",
+    "arguments": [
+      "float16_t a"
+    ],
+    "return_type": {
+      "value": "uint32_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Hn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTAU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtah_u64_f16",
+    "arguments": [
+      "float16_t a"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Hn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTAU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtaq_s16_f16",
+    "arguments": [
+      "float16x8_t a"
+    ],
+    "return_type": {
+      "value": "int16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTAS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtaq_s32_f32",
+    "arguments": [
+      "float32x4_t a"
+    ],
+    "return_type": {
+      "value": "int32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4S"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTAS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtaq_s64_f64",
+    "arguments": [
+      "float64x2_t a"
+    ],
+    "return_type": {
+      "value": "int64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTAS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtaq_u16_f16",
+    "arguments": [
+      "float16x8_t a"
+    ],
+    "return_type": {
+      "value": "uint16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTAU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtaq_u32_f32",
+    "arguments": [
+      "float32x4_t a"
+    ],
+    "return_type": {
+      "value": "uint32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4S"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTAU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtaq_u64_f64",
+    "arguments": [
+      "float64x2_t a"
+    ],
+    "return_type": {
+      "value": "uint64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTAU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtas_s32_f32",
+    "arguments": [
+      "float32_t a"
+    ],
+    "return_type": {
+      "value": "int32_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Sn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTAS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtas_s64_f32",
+    "arguments": [
+      "float32_t a"
+    ],
+    "return_type": {
+      "value": "int64_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Sn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTAS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtas_u32_f32",
+    "arguments": [
+      "float32_t a"
+    ],
+    "return_type": {
+      "value": "uint32_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Sn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTAU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtas_u64_f32",
+    "arguments": [
+      "float32_t a"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Sn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTAU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtd_f64_s64",
+    "arguments": [
+      "int64_t a"
+    ],
+    "return_type": {
+      "value": "float64_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SCVTF"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtd_f64_u64",
+    "arguments": [
+      "uint64_t a"
+    ],
+    "return_type": {
+      "value": "float64_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UCVTF"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtd_n_f64_s64",
+    "arguments": [
+      "int64_t a",
+      "const int n"
+    ],
+    "return_type": {
+      "value": "float64_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      },
+      "n": {
+        "minimum": 1,
+        "maximum": 64
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SCVTF"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtd_n_f64_u64",
+    "arguments": [
+      "uint64_t a",
+      "const int n"
+    ],
+    "return_type": {
+      "value": "float64_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      },
+      "n": {
+        "minimum": 1,
+        "maximum": 64
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UCVTF"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtd_n_s64_f64",
+    "arguments": [
+      "float64_t a",
+      "const int n"
+    ],
+    "return_type": {
+      "value": "int64_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      },
+      "n": {
+        "minimum": 1,
+        "maximum": 64
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTZS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtd_n_u64_f64",
+    "arguments": [
+      "float64_t a",
+      "const int n"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      },
+      "n": {
+        "minimum": 1,
+        "maximum": 64
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTZU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtd_s32_f64",
+    "arguments": [
+      "float64_t a"
+    ],
+    "return_type": {
+      "value": "int32_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTZS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtd_s64_f64",
+    "arguments": [
+      "float64_t a"
+    ],
+    "return_type": {
+      "value": "int64_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTZS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtd_u32_f64",
+    "arguments": [
+      "float64_t a"
+    ],
+    "return_type": {
+      "value": "uint32_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTZU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtd_u64_f64",
+    "arguments": [
+      "float64_t a"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTZU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvth_f16_s16",
+    "arguments": [
+      "int16_t a"
+    ],
+    "return_type": {
+      "value": "float16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Hn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SCVTF"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvth_f16_s32",
+    "arguments": [
+      "int32_t a"
+    ],
+    "return_type": {
+      "value": "float16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Hn"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SCVTF"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvth_f16_s64",
+    "arguments": [
+      "int64_t a"
+    ],
+    "return_type": {
+      "value": "float16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Hn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SCVTF"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvth_f16_u16",
+    "arguments": [
+      "uint16_t a"
+    ],
+    "return_type": {
+      "value": "float16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Hn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UCVTF"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvth_f16_u32",
+    "arguments": [
+      "uint32_t a"
+    ],
+    "return_type": {
+      "value": "float16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Hn"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UCVTF"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvth_f16_u64",
+    "arguments": [
+      "uint64_t a"
+    ],
+    "return_type": {
+      "value": "float16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Hn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UCVTF"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvth_n_f16_s16",
+    "arguments": [
+      "int16_t a",
+      "const int n"
+    ],
+    "return_type": {
+      "value": "float16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Hn"
+      },
+      "n": {
+        "minimum": 1,
+        "maximum": 16
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SCVTF"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvth_n_f16_s32",
+    "arguments": [
+      "int32_t a",
+      "const int n"
+    ],
+    "return_type": {
+      "value": "float16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Hn"
+      },
+      "n": {
+        "minimum": 1,
+        "maximum": 16
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SCVTF"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvth_n_f16_s64",
+    "arguments": [
+      "int64_t a",
+      "const int n"
+    ],
+    "return_type": {
+      "value": "float16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Hn"
+      },
+      "n": {
+        "minimum": 1,
+        "maximum": 16
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SCVTF"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvth_n_f16_u16",
+    "arguments": [
+      "uint16_t a",
+      "const int n"
+    ],
+    "return_type": {
+      "value": "float16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Hn"
+      },
+      "n": {
+        "minimum": 1,
+        "maximum": 16
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UCVTF"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvth_n_f16_u32",
+    "arguments": [
+      "uint32_t a",
+      "const int n"
+    ],
+    "return_type": {
+      "value": "float16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Hn"
+      },
+      "n": {
+        "minimum": 1,
+        "maximum": 16
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UCVTF"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvth_n_f16_u64",
+    "arguments": [
+      "uint64_t a",
+      "const int n"
+    ],
+    "return_type": {
+      "value": "float16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Hn"
+      },
+      "n": {
+        "minimum": 1,
+        "maximum": 16
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UCVTF"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvth_n_s16_f16",
+    "arguments": [
+      "float16_t a",
+      "const int n"
+    ],
+    "return_type": {
+      "value": "int16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Hn"
+      },
+      "n": {
+        "minimum": 1,
+        "maximum": 16
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTZS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvth_n_s32_f16",
+    "arguments": [
+      "float16_t a",
+      "const int n"
+    ],
+    "return_type": {
+      "value": "int32_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Hn"
+      },
+      "n": {
+        "minimum": 1,
+        "maximum": 16
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTZS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvth_n_s64_f16",
+    "arguments": [
+      "float16_t a",
+      "const int n"
+    ],
+    "return_type": {
+      "value": "int64_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Hn"
+      },
+      "n": {
+        "minimum": 1,
+        "maximum": 16
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTZS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvth_n_u16_f16",
+    "arguments": [
+      "float16_t a",
+      "const int n"
+    ],
+    "return_type": {
+      "value": "uint16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Hn"
+      },
+      "n": {
+        "minimum": 1,
+        "maximum": 16
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTZU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvth_n_u32_f16",
+    "arguments": [
+      "float16_t a",
+      "const int n"
+    ],
+    "return_type": {
+      "value": "uint32_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Hn"
+      },
+      "n": {
+        "minimum": 1,
+        "maximum": 16
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTZU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvth_n_u64_f16",
+    "arguments": [
+      "float16_t a",
+      "const int n"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Hn"
+      },
+      "n": {
+        "minimum": 1,
+        "maximum": 16
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTZU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvth_s16_f16",
+    "arguments": [
+      "float16_t a"
+    ],
+    "return_type": {
+      "value": "int16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Hn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTZS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvth_s32_f16",
+    "arguments": [
+      "float16_t a"
+    ],
+    "return_type": {
+      "value": "int32_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Hn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTZS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvth_s64_f16",
+    "arguments": [
+      "float16_t a"
+    ],
+    "return_type": {
+      "value": "int64_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Hn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTZS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvth_u16_f16",
+    "arguments": [
+      "float16_t a"
+    ],
+    "return_type": {
+      "value": "uint16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Hn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTZU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvth_u32_f16",
+    "arguments": [
+      "float16_t a"
+    ],
+    "return_type": {
+      "value": "uint32_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Hn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTZU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvth_u64_f16",
+    "arguments": [
+      "float16_t a"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Hn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTZU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtm_s16_f16",
+    "arguments": [
+      "float16x4_t a"
+    ],
+    "return_type": {
+      "value": "int16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4H"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTMS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtm_s32_f32",
+    "arguments": [
+      "float32x2_t a"
+    ],
+    "return_type": {
+      "value": "int32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2S"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTMS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtm_s64_f64",
+    "arguments": [
+      "float64x1_t a"
+    ],
+    "return_type": {
+      "value": "int64x1_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTMS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtm_u16_f16",
+    "arguments": [
+      "float16x4_t a"
+    ],
+    "return_type": {
+      "value": "uint16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4H"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTMU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtm_u32_f32",
+    "arguments": [
+      "float32x2_t a"
+    ],
+    "return_type": {
+      "value": "uint32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2S"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTMU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtm_u64_f64",
+    "arguments": [
+      "float64x1_t a"
+    ],
+    "return_type": {
+      "value": "uint64x1_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTMU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtmd_s32_f64",
+    "arguments": [
+      "float64_t a"
+    ],
+    "return_type": {
+      "value": "int32_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTMS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtmd_s64_f64",
+    "arguments": [
+      "float64_t a"
+    ],
+    "return_type": {
+      "value": "int64_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTMS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtmd_u32_f64",
+    "arguments": [
+      "float64_t a"
+    ],
+    "return_type": {
+      "value": "uint32_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTMU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtmd_u64_f64",
+    "arguments": [
+      "float64_t a"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTMU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtmh_s16_f16",
+    "arguments": [
+      "float16_t a"
+    ],
+    "return_type": {
+      "value": "int16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Hn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTMS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtmh_s32_f16",
+    "arguments": [
+      "float16_t a"
+    ],
+    "return_type": {
+      "value": "int32_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Hn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTMS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtmh_s64_f16",
+    "arguments": [
+      "float16_t a"
+    ],
+    "return_type": {
+      "value": "int64_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Hn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTMS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtmh_u16_f16",
+    "arguments": [
+      "float16_t a"
+    ],
+    "return_type": {
+      "value": "uint16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Hn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTMU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtmh_u32_f16",
+    "arguments": [
+      "float16_t a"
+    ],
+    "return_type": {
+      "value": "uint32_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Hn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTMU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtmh_u64_f16",
+    "arguments": [
+      "float16_t a"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Hn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTMU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtmq_s16_f16",
+    "arguments": [
+      "float16x8_t a"
+    ],
+    "return_type": {
+      "value": "int16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTMS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtmq_s32_f32",
+    "arguments": [
+      "float32x4_t a"
+    ],
+    "return_type": {
+      "value": "int32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4S"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTMS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtmq_s64_f64",
+    "arguments": [
+      "float64x2_t a"
+    ],
+    "return_type": {
+      "value": "int64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTMS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtmq_u16_f16",
+    "arguments": [
+      "float16x8_t a"
+    ],
+    "return_type": {
+      "value": "uint16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTMU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtmq_u32_f32",
+    "arguments": [
+      "float32x4_t a"
+    ],
+    "return_type": {
+      "value": "uint32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4S"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTMU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtmq_u64_f64",
+    "arguments": [
+      "float64x2_t a"
+    ],
+    "return_type": {
+      "value": "uint64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTMU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtms_s32_f32",
+    "arguments": [
+      "float32_t a"
+    ],
+    "return_type": {
+      "value": "int32_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Sn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTMS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtms_s64_f32",
+    "arguments": [
+      "float32_t a"
+    ],
+    "return_type": {
+      "value": "int64_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Sn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTMS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtms_u32_f32",
+    "arguments": [
+      "float32_t a"
+    ],
+    "return_type": {
+      "value": "uint32_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Sn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTMU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtms_u64_f32",
+    "arguments": [
+      "float32_t a"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Sn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTMU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtn_s16_f16",
+    "arguments": [
+      "float16x4_t a"
+    ],
+    "return_type": {
+      "value": "int16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4H"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTNS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtn_s32_f32",
+    "arguments": [
+      "float32x2_t a"
+    ],
+    "return_type": {
+      "value": "int32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2S"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTNS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtn_s64_f64",
+    "arguments": [
+      "float64x1_t a"
+    ],
+    "return_type": {
+      "value": "int64x1_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTNS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtn_u16_f16",
+    "arguments": [
+      "float16x4_t a"
+    ],
+    "return_type": {
+      "value": "uint16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4H"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTNU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtn_u32_f32",
+    "arguments": [
+      "float32x2_t a"
+    ],
+    "return_type": {
+      "value": "uint32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2S"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTNU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtn_u64_f64",
+    "arguments": [
+      "float64x1_t a"
+    ],
+    "return_type": {
+      "value": "uint64x1_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTNU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtnd_s32_f64",
+    "arguments": [
+      "float64_t a"
+    ],
+    "return_type": {
+      "value": "int32_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTNS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtnd_s64_f64",
+    "arguments": [
+      "float64_t a"
+    ],
+    "return_type": {
+      "value": "int64_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTNS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtnd_u32_f64",
+    "arguments": [
+      "float64_t a"
+    ],
+    "return_type": {
+      "value": "uint32_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTNU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtnd_u64_f64",
+    "arguments": [
+      "float64_t a"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTNU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtnh_s16_f16",
+    "arguments": [
+      "float16_t a"
+    ],
+    "return_type": {
+      "value": "int16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Hn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTNS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtnh_s32_f16",
+    "arguments": [
+      "float16_t a"
+    ],
+    "return_type": {
+      "value": "int32_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Hn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTNS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtnh_s64_f16",
+    "arguments": [
+      "float16_t a"
+    ],
+    "return_type": {
+      "value": "int64_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Hn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTNS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtnh_u16_f16",
+    "arguments": [
+      "float16_t a"
+    ],
+    "return_type": {
+      "value": "uint16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Hn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTNU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtnh_u32_f16",
+    "arguments": [
+      "float16_t a"
+    ],
+    "return_type": {
+      "value": "uint32_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Hn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTNU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtnh_u64_f16",
+    "arguments": [
+      "float16_t a"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Hn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTNU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtnq_s16_f16",
+    "arguments": [
+      "float16x8_t a"
+    ],
+    "return_type": {
+      "value": "int16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTNS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtnq_s32_f32",
+    "arguments": [
+      "float32x4_t a"
+    ],
+    "return_type": {
+      "value": "int32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4S"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTNS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtnq_s64_f64",
+    "arguments": [
+      "float64x2_t a"
+    ],
+    "return_type": {
+      "value": "int64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTNS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtnq_u16_f16",
+    "arguments": [
+      "float16x8_t a"
+    ],
+    "return_type": {
+      "value": "uint16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTNU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtnq_u32_f32",
+    "arguments": [
+      "float32x4_t a"
+    ],
+    "return_type": {
+      "value": "uint32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4S"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTNU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtnq_u64_f64",
+    "arguments": [
+      "float64x2_t a"
+    ],
+    "return_type": {
+      "value": "uint64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTNU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtns_s32_f32",
+    "arguments": [
+      "float32_t a"
+    ],
+    "return_type": {
+      "value": "int32_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Sn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTNS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtns_s64_f32",
+    "arguments": [
+      "float32_t a"
+    ],
+    "return_type": {
+      "value": "int64_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Sn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTNS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtns_u32_f32",
+    "arguments": [
+      "float32_t a"
+    ],
+    "return_type": {
+      "value": "uint32_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Sn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTNU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtns_u64_f32",
+    "arguments": [
+      "float32_t a"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Sn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTNU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtp_s16_f16",
+    "arguments": [
+      "float16x4_t a"
+    ],
+    "return_type": {
+      "value": "int16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4H"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTPS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtp_s32_f32",
+    "arguments": [
+      "float32x2_t a"
+    ],
+    "return_type": {
+      "value": "int32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2S"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTPS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtp_s64_f64",
+    "arguments": [
+      "float64x1_t a"
+    ],
+    "return_type": {
+      "value": "int64x1_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTPS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtp_u16_f16",
+    "arguments": [
+      "float16x4_t a"
+    ],
+    "return_type": {
+      "value": "uint16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4H"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTPU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtp_u32_f32",
+    "arguments": [
+      "float32x2_t a"
+    ],
+    "return_type": {
+      "value": "uint32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2S"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTPU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtp_u64_f64",
+    "arguments": [
+      "float64x1_t a"
+    ],
+    "return_type": {
+      "value": "uint64x1_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTPU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtpd_s32_f64",
+    "arguments": [
+      "float64_t a"
+    ],
+    "return_type": {
+      "value": "int32_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTPS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtpd_s64_f64",
+    "arguments": [
+      "float64_t a"
+    ],
+    "return_type": {
+      "value": "int64_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTPS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtpd_u32_f64",
+    "arguments": [
+      "float64_t a"
+    ],
+    "return_type": {
+      "value": "uint32_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTPU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtpd_u64_f64",
+    "arguments": [
+      "float64_t a"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTPU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtph_s16_f16",
+    "arguments": [
+      "float16_t a"
+    ],
+    "return_type": {
+      "value": "int16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Hn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTPS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtph_s32_f16",
+    "arguments": [
+      "float16_t a"
+    ],
+    "return_type": {
+      "value": "int32_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Hn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTPS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtph_s64_f16",
+    "arguments": [
+      "float16_t a"
+    ],
+    "return_type": {
+      "value": "int64_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Hn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTPS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtph_u16_f16",
+    "arguments": [
+      "float16_t a"
+    ],
+    "return_type": {
+      "value": "uint16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Hn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTPU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtph_u32_f16",
+    "arguments": [
+      "float16_t a"
+    ],
+    "return_type": {
+      "value": "uint32_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Hn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTPU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtph_u64_f16",
+    "arguments": [
+      "float16_t a"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Hn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTPU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtpq_s16_f16",
+    "arguments": [
+      "float16x8_t a"
+    ],
+    "return_type": {
+      "value": "int16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTPS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtpq_s32_f32",
+    "arguments": [
+      "float32x4_t a"
+    ],
+    "return_type": {
+      "value": "int32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4S"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTPS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtpq_s64_f64",
+    "arguments": [
+      "float64x2_t a"
+    ],
+    "return_type": {
+      "value": "int64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTPS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtpq_u16_f16",
+    "arguments": [
+      "float16x8_t a"
+    ],
+    "return_type": {
+      "value": "uint16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTPU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtpq_u32_f32",
+    "arguments": [
+      "float32x4_t a"
+    ],
+    "return_type": {
+      "value": "uint32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4S"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTPU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtpq_u64_f64",
+    "arguments": [
+      "float64x2_t a"
+    ],
+    "return_type": {
+      "value": "uint64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTPU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtps_s32_f32",
+    "arguments": [
+      "float32_t a"
+    ],
+    "return_type": {
+      "value": "int32_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Sn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTPS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtps_s64_f32",
+    "arguments": [
+      "float32_t a"
+    ],
+    "return_type": {
+      "value": "int64_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Sn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTPS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtps_u32_f32",
+    "arguments": [
+      "float32_t a"
+    ],
+    "return_type": {
+      "value": "uint32_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Sn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTPU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtps_u64_f32",
+    "arguments": [
+      "float32_t a"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Sn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTPU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtq_f16_s16",
+    "arguments": [
+      "int16x8_t a"
+    ],
+    "return_type": {
+      "value": "float16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SCVTF"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtq_f16_u16",
+    "arguments": [
+      "uint16x8_t a"
+    ],
+    "return_type": {
+      "value": "float16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UCVTF"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtq_f32_s32",
+    "arguments": [
+      "int32x4_t a"
+    ],
+    "return_type": {
+      "value": "float32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4S"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SCVTF"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtq_f32_u32",
+    "arguments": [
+      "uint32x4_t a"
+    ],
+    "return_type": {
+      "value": "float32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4S"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UCVTF"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtq_f64_s64",
+    "arguments": [
+      "int64x2_t a"
+    ],
+    "return_type": {
+      "value": "float64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SCVTF"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtq_f64_u64",
+    "arguments": [
+      "uint64x2_t a"
+    ],
+    "return_type": {
+      "value": "float64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UCVTF"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtq_n_f16_s16",
+    "arguments": [
+      "int16x8_t a",
+      "const int n"
+    ],
+    "return_type": {
+      "value": "float16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      },
+      "n": {
+        "minimum": 1,
+        "maximum": 16
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SCVTF"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtq_n_f16_u16",
+    "arguments": [
+      "uint16x8_t a",
+      "const int n"
+    ],
+    "return_type": {
+      "value": "float16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      },
+      "n": {
+        "minimum": 1,
+        "maximum": 16
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UCVTF"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtq_n_f32_s32",
+    "arguments": [
+      "int32x4_t a",
+      "const int n"
+    ],
+    "return_type": {
+      "value": "float32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4S"
+      },
+      "n": {
+        "minimum": 1,
+        "maximum": 32
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SCVTF"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtq_n_f32_u32",
+    "arguments": [
+      "uint32x4_t a",
+      "const int n"
+    ],
+    "return_type": {
+      "value": "float32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4S"
+      },
+      "n": {
+        "minimum": 1,
+        "maximum": 32
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UCVTF"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtq_n_f64_s64",
+    "arguments": [
+      "int64x2_t a",
+      "const int n"
+    ],
+    "return_type": {
+      "value": "float64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2D"
+      },
+      "n": {
+        "minimum": 1,
+        "maximum": 64
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SCVTF"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtq_n_f64_u64",
+    "arguments": [
+      "uint64x2_t a",
+      "const int n"
+    ],
+    "return_type": {
+      "value": "float64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2D"
+      },
+      "n": {
+        "minimum": 1,
+        "maximum": 64
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UCVTF"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtq_n_s16_f16",
+    "arguments": [
+      "float16x8_t a",
+      "const int n"
+    ],
+    "return_type": {
+      "value": "int16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      },
+      "n": {
+        "minimum": 1,
+        "maximum": 16
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTZS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtq_n_s32_f32",
+    "arguments": [
+      "float32x4_t a",
+      "const int n"
+    ],
+    "return_type": {
+      "value": "int32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4S"
+      },
+      "n": {
+        "minimum": 1,
+        "maximum": 32
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTZS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtq_n_s64_f64",
+    "arguments": [
+      "float64x2_t a",
+      "const int n"
+    ],
+    "return_type": {
+      "value": "int64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2D"
+      },
+      "n": {
+        "minimum": 1,
+        "maximum": 64
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTZS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtq_n_u16_f16",
+    "arguments": [
+      "float16x8_t a",
+      "const int n"
+    ],
+    "return_type": {
+      "value": "uint16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      },
+      "n": {
+        "minimum": 1,
+        "maximum": 16
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTZU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtq_n_u32_f32",
+    "arguments": [
+      "float32x4_t a",
+      "const int n"
+    ],
+    "return_type": {
+      "value": "uint32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4S"
+      },
+      "n": {
+        "minimum": 1,
+        "maximum": 32
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTZU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtq_n_u64_f64",
+    "arguments": [
+      "float64x2_t a",
+      "const int n"
+    ],
+    "return_type": {
+      "value": "uint64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2D"
+      },
+      "n": {
+        "minimum": 1,
+        "maximum": 64
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTZU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtq_s16_f16",
+    "arguments": [
+      "float16x8_t a"
+    ],
+    "return_type": {
+      "value": "int16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTZS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtq_s32_f32",
+    "arguments": [
+      "float32x4_t a"
+    ],
+    "return_type": {
+      "value": "int32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4S"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTZS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtq_s64_f64",
+    "arguments": [
+      "float64x2_t a"
+    ],
+    "return_type": {
+      "value": "int64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTZS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtq_u16_f16",
+    "arguments": [
+      "float16x8_t a"
+    ],
+    "return_type": {
+      "value": "uint16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTZS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtq_u32_f32",
+    "arguments": [
+      "float32x4_t a"
+    ],
+    "return_type": {
+      "value": "uint32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4S"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTZU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtq_u64_f64",
+    "arguments": [
+      "float64x2_t a"
+    ],
+    "return_type": {
+      "value": "uint64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTZU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvts_f32_s32",
+    "arguments": [
+      "int32_t a"
+    ],
+    "return_type": {
+      "value": "float32_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Sn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SCVTF"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvts_f32_u32",
+    "arguments": [
+      "uint32_t a"
+    ],
+    "return_type": {
+      "value": "float32_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Sn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UCVTF"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvts_n_f32_s32",
+    "arguments": [
+      "int32_t a",
+      "const int n"
+    ],
+    "return_type": {
+      "value": "float32_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Sn"
+      },
+      "n": {
+        "minimum": 1,
+        "maximum": 32
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SCVTF"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvts_n_f32_u32",
+    "arguments": [
+      "uint32_t a",
+      "const int n"
+    ],
+    "return_type": {
+      "value": "float32_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Sn"
+      },
+      "n": {
+        "minimum": 1,
+        "maximum": 32
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UCVTF"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvts_n_s32_f32",
+    "arguments": [
+      "float32_t a",
+      "const int n"
+    ],
+    "return_type": {
+      "value": "int32_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Sn"
+      },
+      "n": {
+        "minimum": 1,
+        "maximum": 32
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTZS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvts_n_u32_f32",
+    "arguments": [
+      "float32_t a",
+      "const int n"
+    ],
+    "return_type": {
+      "value": "uint32_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Sn"
+      },
+      "n": {
+        "minimum": 1,
+        "maximum": 32
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTZU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvts_s32_f32",
+    "arguments": [
+      "float32_t a"
+    ],
+    "return_type": {
+      "value": "int32_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Sn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTZS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvts_s64_f32",
+    "arguments": [
+      "float32_t a"
+    ],
+    "return_type": {
+      "value": "int64_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Sn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTZS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvts_u32_f32",
+    "arguments": [
+      "float32_t a"
+    ],
+    "return_type": {
+      "value": "uint32_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Sn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTZU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvts_u64_f32",
+    "arguments": [
+      "float32_t a"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Sn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTZU"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtx_f32_f64",
+    "arguments": [
+      "float64x2_t a"
+    ],
+    "return_type": {
+      "value": "float32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTXN"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtx_high_f32_f64",
+    "arguments": [
+      "float32x2_t r",
+      "float64x2_t a"
+    ],
+    "return_type": {
+      "value": "float32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2D"
+      },
+      "r": {
+        "register": "Vd.2S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTXN2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vcvtxd_f32_f64",
+    "arguments": [
+      "float64_t a"
+    ],
+    "return_type": {
+      "value": "float32_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FCVTXN"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdiv_f16",
+    "arguments": [
+      "float16x4_t a",
+      "float16x4_t b"
+    ],
+    "return_type": {
+      "value": "float16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4H"
+      },
+      "b": {
+        "register": "Vm.4H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FDIV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdiv_f32",
+    "arguments": [
+      "float32x2_t a",
+      "float32x2_t b"
+    ],
+    "return_type": {
+      "value": "float32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2S"
+      },
+      "b": {
+        "register": "Vm.2S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FDIV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdiv_f64",
+    "arguments": [
+      "float64x1_t a",
+      "float64x1_t b"
+    ],
+    "return_type": {
+      "value": "float64x1_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dn"
+      },
+      "b": {
+        "register": "Dm"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FDIV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdivh_f16",
+    "arguments": [
+      "float16_t a",
+      "float16_t b"
+    ],
+    "return_type": {
+      "value": "float16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Hn"
+      },
+      "b": {
+        "register": "Hm"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FDIV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdivq_f16",
+    "arguments": [
+      "float16x8_t a",
+      "float16x8_t b"
+    ],
+    "return_type": {
+      "value": "float16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      },
+      "b": {
+        "register": "Vm.8H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FDIV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdivq_f32",
+    "arguments": [
+      "float32x4_t a",
+      "float32x4_t b"
+    ],
+    "return_type": {
+      "value": "float32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.4S"
+      },
+      "b": {
+        "register": "Vm.4S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FDIV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdivq_f64",
+    "arguments": [
+      "float64x2_t a",
+      "float64x2_t b"
+    ],
+    "return_type": {
+      "value": "float64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.2D"
+      },
+      "b": {
+        "register": "Vm.2D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FDIV"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdot_lane_s32",
+    "arguments": [
+      "int32x2_t r",
+      "int8x8_t a",
+      "int8x8_t b",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "int32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8B"
+      },
+      "b": {
+        "register": "Vm.4B"
+      },
+      "lane": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "r": {
+        "register": "Vd.2S"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SDOT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdot_lane_u32",
+    "arguments": [
+      "uint32x2_t r",
+      "uint8x8_t a",
+      "uint8x8_t b",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "uint32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8B"
+      },
+      "b": {
+        "register": "Vm.4B"
+      },
+      "lane": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "r": {
+        "register": "Vd.2S"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UDOT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdot_laneq_s32",
+    "arguments": [
+      "int32x2_t r",
+      "int8x8_t a",
+      "int8x16_t b",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "int32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8B"
+      },
+      "b": {
+        "register": "Vm.4B"
+      },
+      "lane": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "r": {
+        "register": "Vd.2S"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SDOT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdot_laneq_u32",
+    "arguments": [
+      "uint32x2_t r",
+      "uint8x8_t a",
+      "uint8x16_t b",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "uint32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8B"
+      },
+      "b": {
+        "register": "Vm.4B"
+      },
+      "lane": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "r": {
+        "register": "Vd.2S"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UDOT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdot_s32",
+    "arguments": [
+      "int32x2_t r",
+      "int8x8_t a",
+      "int8x8_t b"
+    ],
+    "return_type": {
+      "value": "int32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8B"
+      },
+      "b": {
+        "register": "Vm.8B"
+      },
+      "r": {
+        "register": "Vd.2S"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SDOT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdot_u32",
+    "arguments": [
+      "uint32x2_t r",
+      "uint8x8_t a",
+      "uint8x8_t b"
+    ],
+    "return_type": {
+      "value": "uint32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8B"
+      },
+      "b": {
+        "register": "Vm.8B"
+      },
+      "r": {
+        "register": "Vd.2S"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UDOT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdotq_lane_s32",
+    "arguments": [
+      "int32x4_t r",
+      "int8x16_t a",
+      "int8x8_t b",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "int32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      },
+      "b": {
+        "register": "Vm.4B"
+      },
+      "lane": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "r": {
+        "register": "Vd.4S"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SDOT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdotq_lane_u32",
+    "arguments": [
+      "uint32x4_t r",
+      "uint8x16_t a",
+      "uint8x8_t b",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "uint32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      },
+      "b": {
+        "register": "Vm.4B"
+      },
+      "lane": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "r": {
+        "register": "Vd.4S"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UDOT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdotq_laneq_s32",
+    "arguments": [
+      "int32x4_t r",
+      "int8x16_t a",
+      "int8x16_t b",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "int32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      },
+      "b": {
+        "register": "Vm.4B"
+      },
+      "lane": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "r": {
+        "register": "Vd.4S"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SDOT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdotq_laneq_u32",
+    "arguments": [
+      "uint32x4_t r",
+      "uint8x16_t a",
+      "uint8x16_t b",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "uint32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      },
+      "b": {
+        "register": "Vm.4B"
+      },
+      "lane": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "r": {
+        "register": "Vd.4S"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UDOT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdotq_s32",
+    "arguments": [
+      "int32x4_t r",
+      "int8x16_t a",
+      "int8x16_t b"
+    ],
+    "return_type": {
+      "value": "int32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      },
+      "b": {
+        "register": "Vm.16B"
+      },
+      "r": {
+        "register": "Vd.4S"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "SDOT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdotq_u32",
+    "arguments": [
+      "uint32x4_t r",
+      "uint8x16_t a",
+      "uint8x16_t b"
+    ],
+    "return_type": {
+      "value": "uint32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      },
+      "b": {
+        "register": "Vm.16B"
+      },
+      "r": {
+        "register": "Vd.4S"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "UDOT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdup_lane_f16",
+    "arguments": [
+      "float16x4_t vec",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "float16x4_t"
+    },
+    "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "vec": {
+        "register": "Vn.4H"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdup_lane_f32",
+    "arguments": [
+      "float32x2_t vec",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "float32x2_t"
+    },
+    "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "vec": {
+        "register": "Vn.2S"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdup_lane_f64",
+    "arguments": [
+      "float64x1_t vec",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "float64x1_t"
+    },
+    "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 0
+      },
+      "vec": {
+        "register": "Vn.1D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdup_lane_p16",
+    "arguments": [
+      "poly16x4_t vec",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "poly16x4_t"
+    },
+    "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "vec": {
+        "register": "Vn.4H"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdup_lane_p64",
+    "arguments": [
+      "poly64x1_t vec",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "poly64x1_t"
+    },
+    "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 0
+      },
+      "vec": {
+        "register": "Vn.1D"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdup_lane_p8",
+    "arguments": [
+      "poly8x8_t vec",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "poly8x8_t"
+    },
+    "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 7
+      },
+      "vec": {
+        "register": "Vn.8B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdup_lane_s16",
+    "arguments": [
+      "int16x4_t vec",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "int16x4_t"
+    },
+    "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "vec": {
+        "register": "Vn.4H"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdup_lane_s32",
+    "arguments": [
+      "int32x2_t vec",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "int32x2_t"
+    },
+    "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "vec": {
+        "register": "Vn.2S"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdup_lane_s64",
+    "arguments": [
+      "int64x1_t vec",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "int64x1_t"
+    },
+    "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 0
+      },
+      "vec": {
+        "register": "Vn.1D"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdup_lane_s8",
+    "arguments": [
+      "int8x8_t vec",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "int8x8_t"
+    },
+    "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 7
+      },
+      "vec": {
+        "register": "Vn.8B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdup_lane_u16",
+    "arguments": [
+      "uint16x4_t vec",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "uint16x4_t"
+    },
+    "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "vec": {
+        "register": "Vn.4H"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdup_lane_u32",
+    "arguments": [
+      "uint32x2_t vec",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "uint32x2_t"
+    },
+    "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "vec": {
+        "register": "Vn.2S"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdup_lane_u64",
+    "arguments": [
+      "uint64x1_t vec",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "uint64x1_t"
+    },
+    "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 0
+      },
+      "vec": {
+        "register": "Vn.1D"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdup_lane_u8",
+    "arguments": [
+      "uint8x8_t vec",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "uint8x8_t"
+    },
+    "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 7
+      },
+      "vec": {
+        "register": "Vn.8B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdup_laneq_f16",
+    "arguments": [
+      "float16x8_t vec",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "float16x4_t"
+    },
+    "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 7
+      },
+      "vec": {
+        "register": "Vn.8H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdup_laneq_f32",
+    "arguments": [
+      "float32x4_t vec",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "float32x2_t"
+    },
+    "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "vec": {
+        "register": "Vn.4S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdup_laneq_f64",
+    "arguments": [
+      "float64x2_t vec",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "float64x1_t"
+    },
+    "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "vec": {
+        "register": "Vn.2D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdup_laneq_p16",
+    "arguments": [
+      "poly16x8_t vec",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "poly16x4_t"
+    },
+    "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 7
+      },
+      "vec": {
+        "register": "Vn.8H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdup_laneq_p64",
+    "arguments": [
+      "poly64x2_t vec",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "poly64x1_t"
+    },
+    "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "vec": {
+        "register": "Vn.2D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdup_laneq_p8",
+    "arguments": [
+      "poly8x16_t vec",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "poly8x8_t"
+    },
+    "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 15
+      },
+      "vec": {
+        "register": "Vn.16B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdup_laneq_s16",
+    "arguments": [
+      "int16x8_t vec",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "int16x4_t"
+    },
+    "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 7
+      },
+      "vec": {
+        "register": "Vn.8H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdup_laneq_s32",
+    "arguments": [
+      "int32x4_t vec",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "int32x2_t"
+    },
+    "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "vec": {
+        "register": "Vn.4S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdup_laneq_s64",
+    "arguments": [
+      "int64x2_t vec",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "int64x1_t"
+    },
+    "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "vec": {
+        "register": "Vn.2D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdup_laneq_s8",
+    "arguments": [
+      "int8x16_t vec",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "int8x8_t"
+    },
+    "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 15
+      },
+      "vec": {
+        "register": "Vn.16B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdup_laneq_u16",
+    "arguments": [
+      "uint16x8_t vec",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "uint16x4_t"
+    },
+    "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 7
+      },
+      "vec": {
+        "register": "Vn.8H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdup_laneq_u32",
+    "arguments": [
+      "uint32x4_t vec",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "uint32x2_t"
+    },
+    "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "vec": {
+        "register": "Vn.4S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdup_laneq_u64",
+    "arguments": [
+      "uint64x2_t vec",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "uint64x1_t"
+    },
+    "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "vec": {
+        "register": "Vn.2D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdup_laneq_u8",
+    "arguments": [
+      "uint8x16_t vec",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "uint8x8_t"
+    },
+    "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 15
+      },
+      "vec": {
+        "register": "Vn.16B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdup_n_f16",
+    "arguments": [
+      "float16_t value"
+    ],
+    "return_type": {
+      "value": "float16x4_t"
+    },
+    "Arguments_Preparation": {
+      "value": {
+        "register": "rn"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdup_n_f32",
+    "arguments": [
+      "float32_t value"
+    ],
+    "return_type": {
+      "value": "float32x2_t"
+    },
+    "Arguments_Preparation": {
+      "value": {
+        "register": "rn"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdup_n_f64",
+    "arguments": [
+      "float64_t value"
+    ],
+    "return_type": {
+      "value": "float64x1_t"
+    },
+    "Arguments_Preparation": {
+      "value": {
+        "register": "rn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "INS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdup_n_p16",
+    "arguments": [
+      "poly16_t value"
+    ],
+    "return_type": {
+      "value": "poly16x4_t"
+    },
+    "Arguments_Preparation": {
+      "value": {
+        "register": "rn"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdup_n_p64",
+    "arguments": [
+      "poly64_t value"
+    ],
+    "return_type": {
+      "value": "poly64x1_t"
+    },
+    "Arguments_Preparation": {
+      "value": {
+        "register": "rn"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "INS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdup_n_p8",
+    "arguments": [
+      "poly8_t value"
+    ],
+    "return_type": {
+      "value": "poly8x8_t"
+    },
+    "Arguments_Preparation": {
+      "value": {
+        "register": "rn"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdup_n_s16",
+    "arguments": [
+      "int16_t value"
+    ],
+    "return_type": {
+      "value": "int16x4_t"
+    },
+    "Arguments_Preparation": {
+      "value": {
+        "register": "rn"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdup_n_s32",
+    "arguments": [
+      "int32_t value"
+    ],
+    "return_type": {
+      "value": "int32x2_t"
+    },
+    "Arguments_Preparation": {
+      "value": {
+        "register": "rn"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdup_n_s64",
+    "arguments": [
+      "int64_t value"
+    ],
+    "return_type": {
+      "value": "int64x1_t"
+    },
+    "Arguments_Preparation": {
+      "value": {
+        "register": "rn"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "INS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdup_n_s8",
+    "arguments": [
+      "int8_t value"
+    ],
+    "return_type": {
+      "value": "int8x8_t"
+    },
+    "Arguments_Preparation": {
+      "value": {
+        "register": "rn"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdup_n_u16",
+    "arguments": [
+      "uint16_t value"
+    ],
+    "return_type": {
+      "value": "uint16x4_t"
+    },
+    "Arguments_Preparation": {
+      "value": {
+        "register": "rn"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdup_n_u32",
+    "arguments": [
+      "uint32_t value"
+    ],
+    "return_type": {
+      "value": "uint32x2_t"
+    },
+    "Arguments_Preparation": {
+      "value": {
+        "register": "rn"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdup_n_u64",
+    "arguments": [
+      "uint64_t value"
+    ],
+    "return_type": {
+      "value": "uint64x1_t"
+    },
+    "Arguments_Preparation": {
+      "value": {
+        "register": "rn"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "INS"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdup_n_u8",
+    "arguments": [
+      "uint8_t value"
+    ],
+    "return_type": {
+      "value": "uint8x8_t"
+    },
+    "Arguments_Preparation": {
+      "value": {
+        "register": "rn"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdupb_lane_p8",
+    "arguments": [
+      "poly8x8_t vec",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "poly8_t"
+    },
+    "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 7
+      },
+      "vec": {
+        "register": "Vn.8B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdupb_lane_s8",
+    "arguments": [
+      "int8x8_t vec",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "int8_t"
+    },
+    "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 7
+      },
+      "vec": {
+        "register": "Vn.8B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdupb_lane_u8",
+    "arguments": [
+      "uint8x8_t vec",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "uint8_t"
+    },
+    "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 7
+      },
+      "vec": {
+        "register": "Vn.8B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdupb_laneq_p8",
+    "arguments": [
+      "poly8x16_t vec",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "poly8_t"
+    },
+    "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 15
+      },
+      "vec": {
+        "register": "Vn.16B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdupb_laneq_s8",
+    "arguments": [
+      "int8x16_t vec",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "int8_t"
+    },
+    "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 15
+      },
+      "vec": {
+        "register": "Vn.16B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdupb_laneq_u8",
+    "arguments": [
+      "uint8x16_t vec",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "uint8_t"
+    },
+    "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 15
+      },
+      "vec": {
+        "register": "Vn.16B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdupd_lane_f64",
+    "arguments": [
+      "float64x1_t vec",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "float64_t"
+    },
+    "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 0
+      },
+      "vec": {
+        "register": "Vn.1D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdupd_lane_s64",
+    "arguments": [
+      "int64x1_t vec",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "int64_t"
+    },
+    "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 0
+      },
+      "vec": {
+        "register": "Vn.1D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdupd_lane_u64",
+    "arguments": [
+      "uint64x1_t vec",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 0
+      },
+      "vec": {
+        "register": "Vn.1D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdupd_laneq_f64",
+    "arguments": [
+      "float64x2_t vec",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "float64_t"
+    },
+    "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "vec": {
+        "register": "Vn.2D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdupd_laneq_s64",
+    "arguments": [
+      "int64x2_t vec",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "int64_t"
+    },
+    "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "vec": {
+        "register": "Vn.2D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdupd_laneq_u64",
+    "arguments": [
+      "uint64x2_t vec",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "uint64_t"
+    },
+    "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "vec": {
+        "register": "Vn.2D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vduph_lane_f16",
+    "arguments": [
+      "float16x4_t vec",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "float16_t"
+    },
+    "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "vec": {
+        "register": "Vn.4H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vduph_lane_p16",
+    "arguments": [
+      "poly16x4_t vec",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "poly16_t"
+    },
+    "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "vec": {
+        "register": "Vn.4H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vduph_lane_s16",
+    "arguments": [
+      "int16x4_t vec",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "int16_t"
+    },
+    "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "vec": {
+        "register": "Vn.4H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vduph_lane_u16",
+    "arguments": [
+      "uint16x4_t vec",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "uint16_t"
+    },
+    "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "vec": {
+        "register": "Vn.4H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vduph_laneq_f16",
+    "arguments": [
+      "float16x8_t vec",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "float16_t"
+    },
+    "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 7
+      },
+      "vec": {
+        "register": "Vn.8H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vduph_laneq_p16",
+    "arguments": [
+      "poly16x8_t vec",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "poly16_t"
+    },
+    "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 7
+      },
+      "vec": {
+        "register": "Vn.8H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vduph_laneq_s16",
+    "arguments": [
+      "int16x8_t vec",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "int16_t"
+    },
+    "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 7
+      },
+      "vec": {
+        "register": "Vn.8H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vduph_laneq_u16",
+    "arguments": [
+      "uint16x8_t vec",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "uint16_t"
+    },
+    "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 7
+      },
+      "vec": {
+        "register": "Vn.8H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdupq_lane_f16",
+    "arguments": [
+      "float16x4_t vec",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "float16x8_t"
+    },
+    "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "vec": {
+        "register": "Vn.4H"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdupq_lane_f32",
+    "arguments": [
+      "float32x2_t vec",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "float32x4_t"
+    },
+    "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "vec": {
+        "register": "Vn.2S"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdupq_lane_f64",
+    "arguments": [
+      "float64x1_t vec",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "float64x2_t"
+    },
+    "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 0
+      },
+      "vec": {
+        "register": "Vn.1D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdupq_lane_p16",
+    "arguments": [
+      "poly16x4_t vec",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "poly16x8_t"
+    },
+    "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "vec": {
+        "register": "Vn.4H"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdupq_lane_p64",
+    "arguments": [
+      "poly64x1_t vec",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "poly64x2_t"
+    },
+    "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 0
+      },
+      "vec": {
+        "register": "Vn.1D"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdupq_lane_p8",
+    "arguments": [
+      "poly8x8_t vec",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "poly8x16_t"
+    },
+    "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 7
+      },
+      "vec": {
+        "register": "Vn.8B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdupq_lane_s16",
+    "arguments": [
+      "int16x4_t vec",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "int16x8_t"
+    },
+    "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "vec": {
+        "register": "Vn.4H"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdupq_lane_s32",
+    "arguments": [
+      "int32x2_t vec",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "int32x4_t"
+    },
+    "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "vec": {
+        "register": "Vn.2S"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdupq_lane_s64",
+    "arguments": [
+      "int64x1_t vec",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "int64x2_t"
+    },
+    "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 0
+      },
+      "vec": {
+        "register": "Vn.1D"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdupq_lane_s8",
+    "arguments": [
+      "int8x8_t vec",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "int8x16_t"
+    },
+    "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 7
+      },
+      "vec": {
+        "register": "Vn.8B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdupq_lane_u16",
+    "arguments": [
+      "uint16x4_t vec",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "uint16x8_t"
+    },
+    "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "vec": {
+        "register": "Vn.4H"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdupq_lane_u32",
+    "arguments": [
+      "uint32x2_t vec",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "uint32x4_t"
+    },
+    "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "vec": {
+        "register": "Vn.2S"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdupq_lane_u64",
+    "arguments": [
+      "uint64x1_t vec",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "uint64x2_t"
+    },
+    "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 0
+      },
+      "vec": {
+        "register": "Vn.1D"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdupq_lane_u8",
+    "arguments": [
+      "uint8x8_t vec",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "uint8x16_t"
+    },
+    "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 7
+      },
+      "vec": {
+        "register": "Vn.8B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdupq_laneq_f16",
+    "arguments": [
+      "float16x8_t vec",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "float16x8_t"
+    },
+    "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 7
+      },
+      "vec": {
+        "register": "Vn.8H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdupq_laneq_f32",
+    "arguments": [
+      "float32x4_t vec",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "float32x4_t"
+    },
+    "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "vec": {
+        "register": "Vn.4S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdupq_laneq_f64",
+    "arguments": [
+      "float64x2_t vec",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "float64x2_t"
+    },
+    "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "vec": {
+        "register": "Vn.2D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdupq_laneq_p16",
+    "arguments": [
+      "poly16x8_t vec",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "poly16x8_t"
+    },
+    "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 7
+      },
+      "vec": {
+        "register": "Vn.8H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdupq_laneq_p64",
+    "arguments": [
+      "poly64x2_t vec",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "poly64x2_t"
+    },
+    "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "vec": {
+        "register": "Vn.2D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdupq_laneq_p8",
+    "arguments": [
+      "poly8x16_t vec",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "poly8x16_t"
+    },
+    "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 15
+      },
+      "vec": {
+        "register": "Vn.16B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdupq_laneq_s16",
+    "arguments": [
+      "int16x8_t vec",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "int16x8_t"
+    },
+    "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 7
+      },
+      "vec": {
+        "register": "Vn.8H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdupq_laneq_s32",
+    "arguments": [
+      "int32x4_t vec",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "int32x4_t"
+    },
+    "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "vec": {
+        "register": "Vn.4S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdupq_laneq_s64",
+    "arguments": [
+      "int64x2_t vec",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "int64x2_t"
+    },
+    "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "vec": {
+        "register": "Vn.2D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdupq_laneq_s8",
+    "arguments": [
+      "int8x16_t vec",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "int8x16_t"
+    },
+    "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 15
+      },
+      "vec": {
+        "register": "Vn.16B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdupq_laneq_u16",
+    "arguments": [
+      "uint16x8_t vec",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "uint16x8_t"
+    },
+    "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 7
+      },
+      "vec": {
+        "register": "Vn.8H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdupq_laneq_u32",
+    "arguments": [
+      "uint32x4_t vec",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "uint32x4_t"
+    },
+    "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "vec": {
+        "register": "Vn.4S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdupq_laneq_u64",
+    "arguments": [
+      "uint64x2_t vec",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "uint64x2_t"
+    },
+    "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "vec": {
+        "register": "Vn.2D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdupq_laneq_u8",
+    "arguments": [
+      "uint8x16_t vec",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "uint8x16_t"
+    },
+    "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 15
+      },
+      "vec": {
+        "register": "Vn.16B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdupq_n_f16",
+    "arguments": [
+      "float16_t value"
+    ],
+    "return_type": {
+      "value": "float16x8_t"
+    },
+    "Arguments_Preparation": {
+      "value": {
+        "register": "rn"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdupq_n_f32",
+    "arguments": [
+      "float32_t value"
+    ],
+    "return_type": {
+      "value": "float32x4_t"
+    },
+    "Arguments_Preparation": {
+      "value": {
+        "register": "rn"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdupq_n_f64",
+    "arguments": [
+      "float64_t value"
+    ],
+    "return_type": {
+      "value": "float64x2_t"
+    },
+    "Arguments_Preparation": {
+      "value": {
+        "register": "rn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdupq_n_p16",
+    "arguments": [
+      "poly16_t value"
+    ],
+    "return_type": {
+      "value": "poly16x8_t"
+    },
+    "Arguments_Preparation": {
+      "value": {
+        "register": "rn"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdupq_n_p64",
+    "arguments": [
+      "poly64_t value"
+    ],
+    "return_type": {
+      "value": "poly64x2_t"
+    },
+    "Arguments_Preparation": {
+      "value": {
+        "register": "rn"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdupq_n_p8",
+    "arguments": [
+      "poly8_t value"
+    ],
+    "return_type": {
+      "value": "poly8x16_t"
+    },
+    "Arguments_Preparation": {
+      "value": {
+        "register": "rn"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdupq_n_s16",
+    "arguments": [
+      "int16_t value"
+    ],
+    "return_type": {
+      "value": "int16x8_t"
+    },
+    "Arguments_Preparation": {
+      "value": {
+        "register": "rn"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdupq_n_s32",
+    "arguments": [
+      "int32_t value"
+    ],
+    "return_type": {
+      "value": "int32x4_t"
+    },
+    "Arguments_Preparation": {
+      "value": {
+        "register": "rn"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdupq_n_s64",
+    "arguments": [
+      "int64_t value"
+    ],
+    "return_type": {
+      "value": "int64x2_t"
+    },
+    "Arguments_Preparation": {
+      "value": {
+        "register": "rn"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdupq_n_s8",
+    "arguments": [
+      "int8_t value"
+    ],
+    "return_type": {
+      "value": "int8x16_t"
+    },
+    "Arguments_Preparation": {
+      "value": {
+        "register": "rn"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdupq_n_u16",
+    "arguments": [
+      "uint16_t value"
+    ],
+    "return_type": {
+      "value": "uint16x8_t"
+    },
+    "Arguments_Preparation": {
+      "value": {
+        "register": "rn"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdupq_n_u32",
+    "arguments": [
+      "uint32_t value"
+    ],
+    "return_type": {
+      "value": "uint32x4_t"
+    },
+    "Arguments_Preparation": {
+      "value": {
+        "register": "rn"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdupq_n_u64",
+    "arguments": [
+      "uint64_t value"
+    ],
+    "return_type": {
+      "value": "uint64x2_t"
+    },
+    "Arguments_Preparation": {
+      "value": {
+        "register": "rn"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdupq_n_u8",
+    "arguments": [
+      "uint8_t value"
+    ],
+    "return_type": {
+      "value": "uint8x16_t"
+    },
+    "Arguments_Preparation": {
+      "value": {
+        "register": "rn"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdups_lane_f32",
+    "arguments": [
+      "float32x2_t vec",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "float32_t"
+    },
+    "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "vec": {
+        "register": "Vn.2S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdups_lane_s32",
+    "arguments": [
+      "int32x2_t vec",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "int32_t"
+    },
+    "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "vec": {
+        "register": "Vn.2S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdups_lane_u32",
+    "arguments": [
+      "uint32x2_t vec",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "uint32_t"
+    },
+    "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "vec": {
+        "register": "Vn.2S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdups_laneq_f32",
+    "arguments": [
+      "float32x4_t vec",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "float32_t"
+    },
+    "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "vec": {
+        "register": "Vn.4S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdups_laneq_s32",
+    "arguments": [
+      "int32x4_t vec",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "int32_t"
+    },
+    "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "vec": {
+        "register": "Vn.4S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vdups_laneq_u32",
+    "arguments": [
+      "uint32x4_t vec",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "uint32_t"
+    },
+    "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "vec": {
+        "register": "Vn.4S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "DUP"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "veor3q_s16",
+    "arguments": [
+      "int16x8_t a",
+      "int16x8_t b",
+      "int16x8_t c"
+    ],
+    "return_type": {
+      "value": "int16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      },
+      "b": {
+        "register": "Vm.16B"
+      },
+      "c": {
+        "register": "Va.16B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EOR3"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "veor3q_s32",
+    "arguments": [
+      "int32x4_t a",
+      "int32x4_t b",
+      "int32x4_t c"
+    ],
+    "return_type": {
+      "value": "int32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      },
+      "b": {
+        "register": "Vm.16B"
+      },
+      "c": {
+        "register": "Va.16B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EOR3"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "veor3q_s64",
+    "arguments": [
+      "int64x2_t a",
+      "int64x2_t b",
+      "int64x2_t c"
+    ],
+    "return_type": {
+      "value": "int64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      },
+      "b": {
+        "register": "Vm.16B"
+      },
+      "c": {
+        "register": "Va.16B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EOR3"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "veor3q_s8",
+    "arguments": [
+      "int8x16_t a",
+      "int8x16_t b",
+      "int8x16_t c"
+    ],
+    "return_type": {
+      "value": "int8x16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      },
+      "b": {
+        "register": "Vm.16B"
+      },
+      "c": {
+        "register": "Va.16B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EOR3"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "veor3q_u16",
+    "arguments": [
+      "uint16x8_t a",
+      "uint16x8_t b",
+      "uint16x8_t c"
+    ],
+    "return_type": {
+      "value": "uint16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      },
+      "b": {
+        "register": "Vm.16B"
+      },
+      "c": {
+        "register": "Va.16B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EOR3"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "veor3q_u32",
+    "arguments": [
+      "uint32x4_t a",
+      "uint32x4_t b",
+      "uint32x4_t c"
+    ],
+    "return_type": {
+      "value": "uint32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      },
+      "b": {
+        "register": "Vm.16B"
+      },
+      "c": {
+        "register": "Va.16B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EOR3"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "veor3q_u64",
+    "arguments": [
+      "uint64x2_t a",
+      "uint64x2_t b",
+      "uint64x2_t c"
+    ],
+    "return_type": {
+      "value": "uint64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      },
+      "b": {
+        "register": "Vm.16B"
+      },
+      "c": {
+        "register": "Va.16B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EOR3"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "veor3q_u8",
+    "arguments": [
+      "uint8x16_t a",
+      "uint8x16_t b",
+      "uint8x16_t c"
+    ],
+    "return_type": {
+      "value": "uint8x16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      },
+      "b": {
+        "register": "Vm.16B"
+      },
+      "c": {
+        "register": "Va.16B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EOR3"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "veor_s16",
+    "arguments": [
+      "int16x4_t a",
+      "int16x4_t b"
+    ],
+    "return_type": {
+      "value": "int16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8B"
+      },
+      "b": {
+        "register": "Vm.8B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EOR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "veor_s32",
+    "arguments": [
+      "int32x2_t a",
+      "int32x2_t b"
+    ],
+    "return_type": {
+      "value": "int32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8B"
+      },
+      "b": {
+        "register": "Vm.8B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EOR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "veor_s64",
+    "arguments": [
+      "int64x1_t a",
+      "int64x1_t b"
+    ],
+    "return_type": {
+      "value": "int64x1_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8B"
+      },
+      "b": {
+        "register": "Vm.8B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EOR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "veor_s8",
+    "arguments": [
+      "int8x8_t a",
+      "int8x8_t b"
+    ],
+    "return_type": {
+      "value": "int8x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8B"
+      },
+      "b": {
+        "register": "Vm.8B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EOR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "veor_u16",
+    "arguments": [
+      "uint16x4_t a",
+      "uint16x4_t b"
+    ],
+    "return_type": {
+      "value": "uint16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8B"
+      },
+      "b": {
+        "register": "Vm.8B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EOR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "veor_u32",
+    "arguments": [
+      "uint32x2_t a",
+      "uint32x2_t b"
+    ],
+    "return_type": {
+      "value": "uint32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8B"
+      },
+      "b": {
+        "register": "Vm.8B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EOR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "veor_u64",
+    "arguments": [
+      "uint64x1_t a",
+      "uint64x1_t b"
+    ],
+    "return_type": {
+      "value": "uint64x1_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8B"
+      },
+      "b": {
+        "register": "Vm.8B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EOR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "veor_u8",
+    "arguments": [
+      "uint8x8_t a",
+      "uint8x8_t b"
+    ],
+    "return_type": {
+      "value": "uint8x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8B"
+      },
+      "b": {
+        "register": "Vm.8B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EOR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "veorq_s16",
+    "arguments": [
+      "int16x8_t a",
+      "int16x8_t b"
+    ],
+    "return_type": {
+      "value": "int16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      },
+      "b": {
+        "register": "Vm.16B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EOR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "veorq_s32",
+    "arguments": [
+      "int32x4_t a",
+      "int32x4_t b"
+    ],
+    "return_type": {
+      "value": "int32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      },
+      "b": {
+        "register": "Vm.16B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EOR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "veorq_s64",
+    "arguments": [
+      "int64x2_t a",
+      "int64x2_t b"
+    ],
+    "return_type": {
+      "value": "int64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      },
+      "b": {
+        "register": "Vm.16B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EOR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "veorq_s8",
+    "arguments": [
+      "int8x16_t a",
+      "int8x16_t b"
+    ],
+    "return_type": {
+      "value": "int8x16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      },
+      "b": {
+        "register": "Vm.16B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EOR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "veorq_u16",
+    "arguments": [
+      "uint16x8_t a",
+      "uint16x8_t b"
+    ],
+    "return_type": {
+      "value": "uint16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      },
+      "b": {
+        "register": "Vm.16B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EOR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "veorq_u32",
+    "arguments": [
+      "uint32x4_t a",
+      "uint32x4_t b"
+    ],
+    "return_type": {
+      "value": "uint32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      },
+      "b": {
+        "register": "Vm.16B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EOR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "veorq_u64",
+    "arguments": [
+      "uint64x2_t a",
+      "uint64x2_t b"
+    ],
+    "return_type": {
+      "value": "uint64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      },
+      "b": {
+        "register": "Vm.16B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EOR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "veorq_u8",
+    "arguments": [
+      "uint8x16_t a",
+      "uint8x16_t b"
+    ],
+    "return_type": {
+      "value": "uint8x16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      },
+      "b": {
+        "register": "Vm.16B"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EOR"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vext_f16",
+    "arguments": [
+      "float16x4_t a",
+      "float16x4_t b",
+      "const int n"
+    ],
+    "return_type": {
+      "value": "float16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8B"
+      },
+      "b": {
+        "register": "Vm.8B"
+      },
+      "n": {
+        "minimum": 0,
+        "maximum": 3
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EXT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vext_f32",
+    "arguments": [
+      "float32x2_t a",
+      "float32x2_t b",
+      "const int n"
+    ],
+    "return_type": {
+      "value": "float32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8B"
+      },
+      "b": {
+        "register": "Vm.8B"
+      },
+      "n": {
+        "minimum": 0,
+        "maximum": 1
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EXT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vext_f64",
+    "arguments": [
+      "float64x1_t a",
+      "float64x1_t b",
+      "const int n"
+    ],
+    "return_type": {
+      "value": "float64x1_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8B"
+      },
+      "b": {
+        "register": "Vm.8B"
+      },
+      "n": {
+        "minimum": 0,
+        "maximum": 0
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EXT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vext_p16",
+    "arguments": [
+      "poly16x4_t a",
+      "poly16x4_t b",
+      "const int n"
+    ],
+    "return_type": {
+      "value": "poly16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8B"
+      },
+      "b": {
+        "register": "Vm.8B"
+      },
+      "n": {
+        "minimum": 0,
+        "maximum": 3
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EXT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vext_p64",
+    "arguments": [
+      "poly64x1_t a",
+      "poly64x1_t b",
+      "const int n"
+    ],
+    "return_type": {
+      "value": "poly64x1_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8B"
+      },
+      "b": {
+        "register": "Vm.8B"
+      },
+      "n": {
+        "minimum": 0,
+        "maximum": 0
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EXT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vext_p8",
+    "arguments": [
+      "poly8x8_t a",
+      "poly8x8_t b",
+      "const int n"
+    ],
+    "return_type": {
+      "value": "poly8x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8B"
+      },
+      "b": {
+        "register": "Vm.8B"
+      },
+      "n": {
+        "minimum": 0,
+        "maximum": 7
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EXT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vext_s16",
+    "arguments": [
+      "int16x4_t a",
+      "int16x4_t b",
+      "const int n"
+    ],
+    "return_type": {
+      "value": "int16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8B"
+      },
+      "b": {
+        "register": "Vm.8B"
+      },
+      "n": {
+        "minimum": 0,
+        "maximum": 3
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EXT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vext_s32",
+    "arguments": [
+      "int32x2_t a",
+      "int32x2_t b",
+      "const int n"
+    ],
+    "return_type": {
+      "value": "int32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8B"
+      },
+      "b": {
+        "register": "Vm.8B"
+      },
+      "n": {
+        "minimum": 0,
+        "maximum": 1
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EXT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vext_s64",
+    "arguments": [
+      "int64x1_t a",
+      "int64x1_t b",
+      "const int n"
+    ],
+    "return_type": {
+      "value": "int64x1_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8B"
+      },
+      "b": {
+        "register": "Vm.8B"
+      },
+      "n": {
+        "minimum": 0,
+        "maximum": 0
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EXT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vext_s8",
+    "arguments": [
+      "int8x8_t a",
+      "int8x8_t b",
+      "const int n"
+    ],
+    "return_type": {
+      "value": "int8x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8B"
+      },
+      "b": {
+        "register": "Vm.8B"
+      },
+      "n": {
+        "minimum": 0,
+        "maximum": 7
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EXT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vext_u16",
+    "arguments": [
+      "uint16x4_t a",
+      "uint16x4_t b",
+      "const int n"
+    ],
+    "return_type": {
+      "value": "uint16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8B"
+      },
+      "b": {
+        "register": "Vm.8B"
+      },
+      "n": {
+        "minimum": 0,
+        "maximum": 3
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EXT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vext_u32",
+    "arguments": [
+      "uint32x2_t a",
+      "uint32x2_t b",
+      "const int n"
+    ],
+    "return_type": {
+      "value": "uint32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8B"
+      },
+      "b": {
+        "register": "Vm.8B"
+      },
+      "n": {
+        "minimum": 0,
+        "maximum": 1
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EXT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vext_u64",
+    "arguments": [
+      "uint64x1_t a",
+      "uint64x1_t b",
+      "const int n"
+    ],
+    "return_type": {
+      "value": "uint64x1_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8B"
+      },
+      "b": {
+        "register": "Vm.8B"
+      },
+      "n": {
+        "minimum": 0,
+        "maximum": 0
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EXT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vext_u8",
+    "arguments": [
+      "uint8x8_t a",
+      "uint8x8_t b",
+      "const int n"
+    ],
+    "return_type": {
+      "value": "uint8x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8B"
+      },
+      "b": {
+        "register": "Vm.8B"
+      },
+      "n": {
+        "minimum": 0,
+        "maximum": 7
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EXT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vextq_f16",
+    "arguments": [
+      "float16x8_t a",
+      "float16x8_t b",
+      "const int n"
+    ],
+    "return_type": {
+      "value": "float16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      },
+      "b": {
+        "register": "Vm.16B"
+      },
+      "n": {
+        "minimum": 0,
+        "maximum": 7
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EXT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vextq_f32",
+    "arguments": [
+      "float32x4_t a",
+      "float32x4_t b",
+      "const int n"
+    ],
+    "return_type": {
+      "value": "float32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      },
+      "b": {
+        "register": "Vm.16B"
+      },
+      "n": {
+        "minimum": 0,
+        "maximum": 3
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EXT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vextq_f64",
+    "arguments": [
+      "float64x2_t a",
+      "float64x2_t b",
+      "const int n"
+    ],
+    "return_type": {
+      "value": "float64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      },
+      "b": {
+        "register": "Vm.16B"
+      },
+      "n": {
+        "minimum": 0,
+        "maximum": 1
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EXT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vextq_p16",
+    "arguments": [
+      "poly16x8_t a",
+      "poly16x8_t b",
+      "const int n"
+    ],
+    "return_type": {
+      "value": "poly16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      },
+      "b": {
+        "register": "Vm.16B"
+      },
+      "n": {
+        "minimum": 0,
+        "maximum": 7
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EXT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vextq_p64",
+    "arguments": [
+      "poly64x2_t a",
+      "poly64x2_t b",
+      "const int n"
+    ],
+    "return_type": {
+      "value": "poly64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      },
+      "b": {
+        "register": "Vm.16B"
+      },
+      "n": {
+        "minimum": 0,
+        "maximum": 1
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EXT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vextq_p8",
+    "arguments": [
+      "poly8x16_t a",
+      "poly8x16_t b",
+      "const int n"
+    ],
+    "return_type": {
+      "value": "poly8x16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      },
+      "b": {
+        "register": "Vm.16B"
+      },
+      "n": {
+        "minimum": 0,
+        "maximum": 15
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EXT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vextq_s16",
+    "arguments": [
+      "int16x8_t a",
+      "int16x8_t b",
+      "const int n"
+    ],
+    "return_type": {
+      "value": "int16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      },
+      "b": {
+        "register": "Vm.16B"
+      },
+      "n": {
+        "minimum": 0,
+        "maximum": 7
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EXT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vextq_s32",
+    "arguments": [
+      "int32x4_t a",
+      "int32x4_t b",
+      "const int n"
+    ],
+    "return_type": {
+      "value": "int32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      },
+      "b": {
+        "register": "Vm.16B"
+      },
+      "n": {
+        "minimum": 0,
+        "maximum": 3
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EXT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vextq_s64",
+    "arguments": [
+      "int64x2_t a",
+      "int64x2_t b",
+      "const int n"
+    ],
+    "return_type": {
+      "value": "int64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      },
+      "b": {
+        "register": "Vm.16B"
+      },
+      "n": {
+        "minimum": 0,
+        "maximum": 1
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EXT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vextq_s8",
+    "arguments": [
+      "int8x16_t a",
+      "int8x16_t b",
+      "const int n"
+    ],
+    "return_type": {
+      "value": "int8x16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      },
+      "b": {
+        "register": "Vm.16B"
+      },
+      "n": {
+        "minimum": 0,
+        "maximum": 15
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EXT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vextq_u16",
+    "arguments": [
+      "uint16x8_t a",
+      "uint16x8_t b",
+      "const int n"
+    ],
+    "return_type": {
+      "value": "uint16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      },
+      "b": {
+        "register": "Vm.16B"
+      },
+      "n": {
+        "minimum": 0,
+        "maximum": 7
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EXT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vextq_u32",
+    "arguments": [
+      "uint32x4_t a",
+      "uint32x4_t b",
+      "const int n"
+    ],
+    "return_type": {
+      "value": "uint32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      },
+      "b": {
+        "register": "Vm.16B"
+      },
+      "n": {
+        "minimum": 0,
+        "maximum": 3
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EXT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vextq_u64",
+    "arguments": [
+      "uint64x2_t a",
+      "uint64x2_t b",
+      "const int n"
+    ],
+    "return_type": {
+      "value": "uint64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      },
+      "b": {
+        "register": "Vm.16B"
+      },
+      "n": {
+        "minimum": 0,
+        "maximum": 1
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EXT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vextq_u8",
+    "arguments": [
+      "uint8x16_t a",
+      "uint8x16_t b",
+      "const int n"
+    ],
+    "return_type": {
+      "value": "uint8x16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      },
+      "b": {
+        "register": "Vm.16B"
+      },
+      "n": {
+        "minimum": 0,
+        "maximum": 15
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "EXT"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vfma_f16",
+    "arguments": [
+      "float16x4_t a",
+      "float16x4_t b",
+      "float16x4_t c"
+    ],
+    "return_type": {
+      "value": "float16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.4H"
+      },
+      "b": {
+        "register": "Vn.4H"
+      },
+      "c": {
+        "register": "Vm.4H"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vfma_f32",
+    "arguments": [
+      "float32x2_t a",
+      "float32x2_t b",
+      "float32x2_t c"
+    ],
+    "return_type": {
+      "value": "float32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.2S"
+      },
+      "b": {
+        "register": "Vn.2S"
+      },
+      "c": {
+        "register": "Vm.2S"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vfma_f64",
+    "arguments": [
+      "float64x1_t a",
+      "float64x1_t b",
+      "float64x1_t c"
+    ],
+    "return_type": {
+      "value": "float64x1_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Da"
+      },
+      "b": {
+        "register": "Dn"
+      },
+      "c": {
+        "register": "Dm"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vfma_lane_f16",
+    "arguments": [
+      "float16x4_t a",
+      "float16x4_t b",
+      "float16x4_t v",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "float16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.4H"
+      },
+      "b": {
+        "register": "Vn.4H"
+      },
+      "lane": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "v": {
+        "register": "Vm.4H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vfma_lane_f32",
+    "arguments": [
+      "float32x2_t a",
+      "float32x2_t b",
+      "float32x2_t v",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "float32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.2S"
+      },
+      "b": {
+        "register": "Vn.2S"
+      },
+      "lane": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "v": {
+        "register": "Vm.2S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vfma_lane_f64",
+    "arguments": [
+      "float64x1_t a",
+      "float64x1_t b",
+      "float64x1_t v",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "float64x1_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dd"
+      },
+      "b": {
+        "register": "Dn"
+      },
+      "lane": {
+        "minimum": 0,
+        "maximum": 0
+      },
+      "v": {
+        "register": "Vm.1D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vfma_laneq_f16",
+    "arguments": [
+      "float16x4_t a",
+      "float16x4_t b",
+      "float16x8_t v",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "float16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.4H"
+      },
+      "b": {
+        "register": "Vn.4H"
+      },
+      "lane": {
+        "minimum": 0,
+        "maximum": 7
+      },
+      "v": {
+        "register": "Vm.8H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vfma_laneq_f32",
+    "arguments": [
+      "float32x2_t a",
+      "float32x2_t b",
+      "float32x4_t v",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "float32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.2S"
+      },
+      "b": {
+        "register": "Vn.2S"
+      },
+      "lane": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "v": {
+        "register": "Vm.4S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vfma_laneq_f64",
+    "arguments": [
+      "float64x1_t a",
+      "float64x1_t b",
+      "float64x2_t v",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "float64x1_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dd"
+      },
+      "b": {
+        "register": "Dn"
+      },
+      "lane": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "v": {
+        "register": "Vm.2D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vfma_n_f16",
+    "arguments": [
+      "float16x4_t a",
+      "float16x4_t b",
+      "float16_t n"
+    ],
+    "return_type": {
+      "value": "float16x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.4H"
+      },
+      "b": {
+        "register": "Vn.4H"
+      },
+      "n": {
+        "register": "Vm.H[0]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vfma_n_f32",
+    "arguments": [
+      "float32x2_t a",
+      "float32x2_t b",
+      "float32_t n"
+    ],
+    "return_type": {
+      "value": "float32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.2S"
+      },
+      "b": {
+        "register": "Vn.2S"
+      },
+      "n": {
+        "register": "Vm.S[0]"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vfma_n_f64",
+    "arguments": [
+      "float64x1_t a",
+      "float64x1_t b",
+      "float64_t n"
+    ],
+    "return_type": {
+      "value": "float64x1_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Da"
+      },
+      "b": {
+        "register": "Dn"
+      },
+      "n": {
+        "register": "Dm"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vfmad_lane_f64",
+    "arguments": [
+      "float64_t a",
+      "float64_t b",
+      "float64x1_t v",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "float64_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dd"
+      },
+      "b": {
+        "register": "Dn"
+      },
+      "lane": {
+        "minimum": 0,
+        "maximum": 0
+      },
+      "v": {
+        "register": "Vm.1D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vfmad_laneq_f64",
+    "arguments": [
+      "float64_t a",
+      "float64_t b",
+      "float64x2_t v",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "float64_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Dd"
+      },
+      "b": {
+        "register": "Dn"
+      },
+      "lane": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "v": {
+        "register": "Vm.2D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vfmah_f16",
+    "arguments": [
+      "float16_t a",
+      "float16_t b",
+      "float16_t c"
+    ],
+    "return_type": {
+      "value": "float16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Ha"
+      },
+      "b": {
+        "register": "Hn"
+      },
+      "c": {
+        "register": "Hm"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMADD"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vfmah_lane_f16",
+    "arguments": [
+      "float16_t a",
+      "float16_t b",
+      "float16x4_t v",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "float16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Hd"
+      },
+      "b": {
+        "register": "Hn"
+      },
+      "lane": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "v": {
+        "register": "Vm.4H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vfmah_laneq_f16",
+    "arguments": [
+      "float16_t a",
+      "float16_t b",
+      "float16x8_t v",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "float16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Hd"
+      },
+      "b": {
+        "register": "Hn"
+      },
+      "lane": {
+        "minimum": 0,
+        "maximum": 7
+      },
+      "v": {
+        "register": "Vm.8H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vfmaq_f16",
+    "arguments": [
+      "float16x8_t a",
+      "float16x8_t b",
+      "float16x8_t c"
+    ],
+    "return_type": {
+      "value": "float16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.8H"
+      },
+      "b": {
+        "register": "Vn.8H"
+      },
+      "c": {
+        "register": "Vm.8H"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vfmaq_f32",
+    "arguments": [
+      "float32x4_t a",
+      "float32x4_t b",
+      "float32x4_t c"
+    ],
+    "return_type": {
+      "value": "float32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.4S"
+      },
+      "b": {
+        "register": "Vn.4S"
+      },
+      "c": {
+        "register": "Vm.4S"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vfmaq_f64",
+    "arguments": [
+      "float64x2_t a",
+      "float64x2_t b",
+      "float64x2_t c"
+    ],
+    "return_type": {
+      "value": "float64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.2D"
+      },
+      "b": {
+        "register": "Vn.2D"
+      },
+      "c": {
+        "register": "Vm.2D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vfmaq_lane_f16",
+    "arguments": [
+      "float16x8_t a",
+      "float16x8_t b",
+      "float16x4_t v",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "float16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.8H"
+      },
+      "b": {
+        "register": "Vn.8H"
+      },
+      "lane": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "v": {
+        "register": "Vm.4H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vfmaq_lane_f32",
+    "arguments": [
+      "float32x4_t a",
+      "float32x4_t b",
+      "float32x2_t v",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "float32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.4S"
+      },
+      "b": {
+        "register": "Vn.4S"
+      },
+      "lane": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "v": {
+        "register": "Vm.2S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vfmaq_lane_f64",
+    "arguments": [
+      "float64x2_t a",
+      "float64x2_t b",
+      "float64x1_t v",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "float64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.2D"
+      },
+      "b": {
+        "register": "Vn.2D"
+      },
+      "lane": {
+        "minimum": 0,
+        "maximum": 0
+      },
+      "v": {
+        "register": "Vm.1D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vfmaq_laneq_f16",
+    "arguments": [
+      "float16x8_t a",
+      "float16x8_t b",
+      "float16x8_t v",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "float16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.8H"
+      },
+      "b": {
+        "register": "Vn.8H"
+      },
+      "lane": {
+        "minimum": 0,
+        "maximum": 7
+      },
+      "v": {
+        "register": "Vm.8H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vfmaq_laneq_f32",
+    "arguments": [
+      "float32x4_t a",
+      "float32x4_t b",
+      "float32x4_t v",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "float32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.4S"
+      },
+      "b": {
+        "register": "Vn.4S"
+      },
+      "lane": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "v": {
+        "register": "Vm.4S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vfmaq_laneq_f64",
+    "arguments": [
+      "float64x2_t a",
+      "float64x2_t b",
+      "float64x2_t v",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "float64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.2D"
+      },
+      "b": {
+        "register": "Vn.2D"
+      },
+      "lane": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "v": {
+        "register": "Vm.2D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vfmaq_n_f16",
+    "arguments": [
+      "float16x8_t a",
+      "float16x8_t b",
+      "float16_t n"
+    ],
+    "return_type": {
+      "value": "float16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.8H"
+      },
+      "b": {
+        "register": "Vn.8H"
+      },
+      "n": {
+        "register": "Vm.H[0]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vfmaq_n_f32",
+    "arguments": [
+      "float32x4_t a",
+      "float32x4_t b",
+      "float32_t n"
+    ],
+    "return_type": {
+      "value": "float32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.4S"
+      },
+      "b": {
+        "register": "Vn.4S"
+      },
+      "n": {
+        "register": "Vm.S[0]"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vfmaq_n_f64",
+    "arguments": [
+      "float64x2_t a",
+      "float64x2_t b",
+      "float64_t n"
+    ],
+    "return_type": {
+      "value": "float64x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.2D"
+      },
+      "b": {
+        "register": "Vn.2D"
+      },
+      "n": {
+        "register": "Vm.D[0]"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vfmas_lane_f32",
+    "arguments": [
+      "float32_t a",
+      "float32_t b",
+      "float32x2_t v",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "float32_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Sd"
+      },
+      "b": {
+        "register": "Sn"
+      },
+      "lane": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "v": {
+        "register": "Vm.2S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vfmas_laneq_f32",
+    "arguments": [
+      "float32_t a",
+      "float32_t b",
+      "float32x4_t v",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "float32_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Sd"
+      },
+      "b": {
+        "register": "Sn"
+      },
+      "lane": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "v": {
+        "register": "Vm.4S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMLA"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vfmlal_high_f16",
+    "arguments": [
+      "float32x2_t r",
+      "float16x4_t a",
+      "float16x4_t b"
+    ],
+    "return_type": {
+      "value": "float32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.2H"
+      },
+      "b": {
+        "register": "Vd.2H"
+      },
+      "r": {
+        "register": "Vd.2S"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMLAL2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vfmlal_lane_high_f16",
+    "arguments": [
+      "float32x2_t r",
+      "float16x4_t a",
+      "float16x4_t b",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "float32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.2H"
+      },
+      "b": {
+        "register": "Vm.H"
+      },
+      "lane": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "r": {
+        "register": "Vd.2S"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMLAL2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vfmlal_lane_low_f16",
+    "arguments": [
+      "float32x2_t r",
+      "float16x4_t a",
+      "float16x4_t b",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "float32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.2H"
+      },
+      "b": {
+        "register": "Vm.H"
+      },
+      "lane": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "r": {
+        "register": "Vd.2S"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMLAL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vfmlal_laneq_high_f16",
+    "arguments": [
+      "float32x2_t r",
+      "float16x4_t a",
+      "float16x8_t b",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "float32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.2H"
+      },
+      "b": {
+        "register": "Vm.H"
+      },
+      "lane": {
+        "minimum": 0,
+        "maximum": 7
+      },
+      "r": {
+        "register": "Vd.2S"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMLAL2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vfmlal_laneq_low_f16",
+    "arguments": [
+      "float32x2_t r",
+      "float16x4_t a",
+      "float16x8_t b",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "float32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.2H"
+      },
+      "b": {
+        "register": "Vm.H"
+      },
+      "lane": {
+        "minimum": 0,
+        "maximum": 7
+      },
+      "r": {
+        "register": "Vd.2S"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMLAL"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vextq_f32",
+    "name": "vfmlal_low_f16",
     "arguments": [
-      "float32x4_t a",
-      "float32x4_t b",
-      "const int n"
+      "float32x2_t r",
+      "float16x4_t a",
+      "float16x4_t b"
+    ],
+    "return_type": {
+      "value": "float32x2_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.2H"
+      },
+      "b": {
+        "register": "Vd.2H"
+      },
+      "r": {
+        "register": "Vd.2S"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMLAL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vfmlalq_high_f16",
+    "arguments": [
+      "float32x4_t r",
+      "float16x8_t a",
+      "float16x8_t b"
     ],
     "return_type": {
       "value": "float32x4_t"
     },
     "Arguments_Preparation": {
       "a": {
-        "register": "Vn.16B"
+        "register": "Vd.4H"
       },
       "b": {
-        "register": "Vm.16B"
+        "register": "Vd.4H"
       },
-      "n": {
+      "r": {
+        "register": "Vd.4S"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMLAL2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vfmlalq_lane_high_f16",
+    "arguments": [
+      "float32x4_t r",
+      "float16x8_t a",
+      "float16x4_t b",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "float32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.4H"
+      },
+      "b": {
+        "register": "Vm.H"
+      },
+      "lane": {
         "minimum": 0,
         "maximum": 3
+      },
+      "r": {
+        "register": "Vd.4S"
       }
     },
     "Architectures": [
-      "v7",
       "A32",
       "A64"
     ],
     "instructions": [
       [
-        "EXT"
+        "FMLAL2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vextq_f64",
+    "name": "vfmlalq_lane_low_f16",
     "arguments": [
-      "float64x2_t a",
-      "float64x2_t b",
-      "const int n"
+      "float32x4_t r",
+      "float16x8_t a",
+      "float16x4_t b",
+      "const int lane"
     ],
     "return_type": {
-      "value": "float64x2_t"
+      "value": "float32x4_t"
     },
     "Arguments_Preparation": {
       "a": {
-        "register": "Vn.16B"
+        "register": "Vd.4H"
       },
       "b": {
-        "register": "Vm.16B"
+        "register": "Vm.H"
       },
-      "n": {
+      "lane": {
         "minimum": 0,
-        "maximum": 1
+        "maximum": 3
+      },
+      "r": {
+        "register": "Vd.4S"
       }
     },
     "Architectures": [
+      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "EXT"
+        "FMLAL"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vextq_p16",
+    "name": "vfmlalq_laneq_high_f16",
     "arguments": [
-      "poly16x8_t a",
-      "poly16x8_t b",
-      "const int n"
+      "float32x4_t r",
+      "float16x8_t a",
+      "float16x8_t b",
+      "const int lane"
     ],
     "return_type": {
-      "value": "poly16x8_t"
+      "value": "float32x4_t"
     },
     "Arguments_Preparation": {
       "a": {
-        "register": "Vn.16B"
+        "register": "Vd.4H"
       },
       "b": {
-        "register": "Vm.16B"
+        "register": "Vm.H"
       },
-      "n": {
+      "lane": {
         "minimum": 0,
         "maximum": 7
+      },
+      "r": {
+        "register": "Vd.4S"
       }
     },
     "Architectures": [
-      "v7",
       "A32",
       "A64"
     ],
     "instructions": [
       [
-        "EXT"
+        "FMLAL2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vextq_p64",
+    "name": "vfmlalq_laneq_low_f16",
     "arguments": [
-      "poly64x2_t a",
-      "poly64x2_t b",
-      "const int n"
+      "float32x4_t r",
+      "float16x8_t a",
+      "float16x8_t b",
+      "const int lane"
     ],
     "return_type": {
-      "value": "poly64x2_t"
+      "value": "float32x4_t"
     },
     "Arguments_Preparation": {
       "a": {
-        "register": "Vn.16B"
+        "register": "Vd.4H"
       },
       "b": {
-        "register": "Vm.16B"
+        "register": "Vm.H"
       },
-      "n": {
+      "lane": {
         "minimum": 0,
-        "maximum": 1
+        "maximum": 7
+      },
+      "r": {
+        "register": "Vd.4S"
       }
     },
     "Architectures": [
@@ -29327,319 +226778,469 @@
     ],
     "instructions": [
       [
-        "EXT"
+        "FMLAL"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vextq_p8",
+    "name": "vfmlalq_low_f16",
     "arguments": [
-      "poly8x16_t a",
-      "poly8x16_t b",
-      "const int n"
+      "float32x4_t r",
+      "float16x8_t a",
+      "float16x8_t b"
     ],
     "return_type": {
-      "value": "poly8x16_t"
+      "value": "float32x4_t"
     },
     "Arguments_Preparation": {
       "a": {
-        "register": "Vn.16B"
+        "register": "Vd.4H"
       },
       "b": {
-        "register": "Vm.16B"
+        "register": "Vd.4H"
       },
-      "n": {
-        "minimum": 0,
-        "maximum": 15
+      "r": {
+        "register": "Vd.4S"
       }
     },
     "Architectures": [
-      "v7",
       "A32",
       "A64"
     ],
     "instructions": [
       [
-        "EXT"
+        "FMLAL"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vextq_s16",
+    "name": "vfmlsl_high_f16",
     "arguments": [
-      "int16x8_t a",
-      "int16x8_t b",
-      "const int n"
+      "float32x2_t r",
+      "float16x4_t a",
+      "float16x4_t b"
     ],
     "return_type": {
-      "value": "int16x8_t"
+      "value": "float32x2_t"
     },
     "Arguments_Preparation": {
       "a": {
-        "register": "Vn.16B"
+        "register": "Vd.2H"
       },
       "b": {
-        "register": "Vm.16B"
+        "register": "Vd.2H"
       },
-      "n": {
-        "minimum": 0,
-        "maximum": 7
+      "r": {
+        "register": "Vd.2S"
       }
     },
     "Architectures": [
-      "v7",
       "A32",
       "A64"
     ],
     "instructions": [
       [
-        "EXT"
+        "FMLSL2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vextq_s32",
+    "name": "vfmlsl_lane_high_f16",
     "arguments": [
-      "int32x4_t a",
-      "int32x4_t b",
-      "const int n"
+      "float32x2_t r",
+      "float16x4_t a",
+      "float16x4_t b",
+      "const int lane"
     ],
     "return_type": {
-      "value": "int32x4_t"
+      "value": "float32x2_t"
     },
     "Arguments_Preparation": {
       "a": {
-        "register": "Vn.16B"
+        "register": "Vd.2H"
       },
       "b": {
-        "register": "Vm.16B"
+        "register": "Vm.H"
       },
-      "n": {
+      "lane": {
         "minimum": 0,
         "maximum": 3
+      },
+      "r": {
+        "register": "Vd.2S"
       }
     },
     "Architectures": [
-      "v7",
       "A32",
       "A64"
     ],
     "instructions": [
       [
-        "EXT"
+        "FMLSL2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vextq_s64",
+    "name": "vfmlsl_lane_low_f16",
     "arguments": [
-      "int64x2_t a",
-      "int64x2_t b",
-      "const int n"
+      "float32x2_t r",
+      "float16x4_t a",
+      "float16x4_t b",
+      "const int lane"
     ],
     "return_type": {
-      "value": "int64x2_t"
+      "value": "float32x2_t"
     },
     "Arguments_Preparation": {
       "a": {
-        "register": "Vn.16B"
+        "register": "Vd.2H"
       },
       "b": {
-        "register": "Vm.16B"
+        "register": "Vm.H"
       },
-      "n": {
+      "lane": {
         "minimum": 0,
-        "maximum": 1
+        "maximum": 3
+      },
+      "r": {
+        "register": "Vd.2S"
       }
     },
     "Architectures": [
-      "v7",
       "A32",
       "A64"
     ],
     "instructions": [
       [
-        "EXT"
+        "FMLSL"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vextq_s8",
+    "name": "vfmlsl_laneq_high_f16",
     "arguments": [
-      "int8x16_t a",
-      "int8x16_t b",
-      "const int n"
+      "float32x2_t r",
+      "float16x4_t a",
+      "float16x8_t b",
+      "const int lane"
     ],
     "return_type": {
-      "value": "int8x16_t"
+      "value": "float32x2_t"
     },
     "Arguments_Preparation": {
       "a": {
-        "register": "Vn.16B"
+        "register": "Vd.2H"
       },
       "b": {
-        "register": "Vm.16B"
+        "register": "Vm.H"
       },
-      "n": {
+      "lane": {
         "minimum": 0,
-        "maximum": 15
+        "maximum": 7
+      },
+      "r": {
+        "register": "Vd.2S"
       }
     },
     "Architectures": [
-      "v7",
       "A32",
       "A64"
     ],
     "instructions": [
       [
-        "EXT"
+        "FMLSL2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vextq_u16",
+    "name": "vfmlsl_laneq_low_f16",
     "arguments": [
-      "uint16x8_t a",
-      "uint16x8_t b",
-      "const int n"
+      "float32x2_t r",
+      "float16x4_t a",
+      "float16x8_t b",
+      "const int lane"
     ],
     "return_type": {
-      "value": "uint16x8_t"
+      "value": "float32x2_t"
     },
     "Arguments_Preparation": {
       "a": {
-        "register": "Vn.16B"
+        "register": "Vd.2H"
       },
       "b": {
-        "register": "Vm.16B"
+        "register": "Vm.H"
       },
-      "n": {
+      "lane": {
         "minimum": 0,
         "maximum": 7
+      },
+      "r": {
+        "register": "Vd.2S"
       }
     },
     "Architectures": [
-      "v7",
       "A32",
       "A64"
     ],
     "instructions": [
       [
-        "EXT"
+        "FMLSL"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vextq_u32",
+    "name": "vfmlsl_low_f16",
     "arguments": [
-      "uint32x4_t a",
-      "uint32x4_t b",
-      "const int n"
+      "float32x2_t r",
+      "float16x4_t a",
+      "float16x4_t b"
     ],
     "return_type": {
-      "value": "uint32x4_t"
+      "value": "float32x2_t"
     },
     "Arguments_Preparation": {
       "a": {
-        "register": "Vn.16B"
+        "register": "Vd.2H"
       },
       "b": {
-        "register": "Vm.16B"
+        "register": "Vd.2H"
       },
-      "n": {
+      "r": {
+        "register": "Vd.2S"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMLSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vfmlslq_high_f16",
+    "arguments": [
+      "float32x4_t r",
+      "float16x8_t a",
+      "float16x8_t b"
+    ],
+    "return_type": {
+      "value": "float32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.4H"
+      },
+      "b": {
+        "register": "Vd.4H"
+      },
+      "r": {
+        "register": "Vd.4S"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMLSL2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vfmlslq_lane_high_f16",
+    "arguments": [
+      "float32x4_t r",
+      "float16x8_t a",
+      "float16x4_t b",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "float32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.4H"
+      },
+      "b": {
+        "register": "Vm.H"
+      },
+      "lane": {
         "minimum": 0,
         "maximum": 3
+      },
+      "r": {
+        "register": "Vd.4S"
       }
     },
     "Architectures": [
-      "v7",
       "A32",
       "A64"
     ],
     "instructions": [
       [
-        "EXT"
+        "FMLSL2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vextq_u64",
+    "name": "vfmlslq_lane_low_f16",
     "arguments": [
-      "uint64x2_t a",
-      "uint64x2_t b",
-      "const int n"
+      "float32x4_t r",
+      "float16x8_t a",
+      "float16x4_t b",
+      "const int lane"
     ],
     "return_type": {
-      "value": "uint64x2_t"
+      "value": "float32x4_t"
     },
     "Arguments_Preparation": {
       "a": {
-        "register": "Vn.16B"
+        "register": "Vd.4H"
       },
       "b": {
-        "register": "Vm.16B"
+        "register": "Vm.H"
       },
-      "n": {
+      "lane": {
         "minimum": 0,
-        "maximum": 1
+        "maximum": 3
+      },
+      "r": {
+        "register": "Vd.4S"
       }
     },
     "Architectures": [
-      "v7",
       "A32",
       "A64"
     ],
     "instructions": [
       [
-        "EXT"
+        "FMLSL"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vextq_u8",
+    "name": "vfmlslq_laneq_high_f16",
     "arguments": [
-      "uint8x16_t a",
-      "uint8x16_t b",
-      "const int n"
+      "float32x4_t r",
+      "float16x8_t a",
+      "float16x8_t b",
+      "const int lane"
     ],
     "return_type": {
-      "value": "uint8x16_t"
+      "value": "float32x4_t"
     },
     "Arguments_Preparation": {
       "a": {
-        "register": "Vn.16B"
+        "register": "Vd.4H"
       },
       "b": {
-        "register": "Vm.16B"
+        "register": "Vm.H"
       },
-      "n": {
+      "lane": {
         "minimum": 0,
-        "maximum": 15
+        "maximum": 7
+      },
+      "r": {
+        "register": "Vd.4S"
       }
     },
     "Architectures": [
-      "v7",
       "A32",
       "A64"
     ],
     "instructions": [
       [
-        "EXT"
+        "FMLSL2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vfma_f16",
+    "name": "vfmlslq_laneq_low_f16",
+    "arguments": [
+      "float32x4_t r",
+      "float16x8_t a",
+      "float16x8_t b",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "float32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.4H"
+      },
+      "b": {
+        "register": "Vm.H"
+      },
+      "lane": {
+        "minimum": 0,
+        "maximum": 7
+      },
+      "r": {
+        "register": "Vd.4S"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMLSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vfmlslq_low_f16",
+    "arguments": [
+      "float32x4_t r",
+      "float16x8_t a",
+      "float16x8_t b"
+    ],
+    "return_type": {
+      "value": "float32x4_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vd.4H"
+      },
+      "b": {
+        "register": "Vd.4H"
+      },
+      "r": {
+        "register": "Vd.4S"
+      }
+    },
+    "Architectures": [
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FMLSL"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vfms_f16",
     "arguments": [
       "float16x4_t a",
       "float16x4_t b",
@@ -29665,13 +227266,13 @@
     ],
     "instructions": [
       [
-        "FMLA"
+        "FMLS"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vfma_f32",
+    "name": "vfms_f32",
     "arguments": [
       "float32x2_t a",
       "float32x2_t b",
@@ -29698,13 +227299,13 @@
     ],
     "instructions": [
       [
-        "FMLA"
+        "FMLS"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vfma_f64",
+    "name": "vfms_f64",
     "arguments": [
       "float64x1_t a",
       "float64x1_t b",
@@ -29729,13 +227330,13 @@
     ],
     "instructions": [
       [
-        "FMADD"
+        "FMSUB"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vfma_lane_f16",
+    "name": "vfms_lane_f16",
     "arguments": [
       "float16x4_t a",
       "float16x4_t b",
@@ -29765,13 +227366,13 @@
     ],
     "instructions": [
       [
-        "FMLA"
+        "FMLS"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vfma_lane_f32",
+    "name": "vfms_lane_f32",
     "arguments": [
       "float32x2_t a",
       "float32x2_t b",
@@ -29801,13 +227402,13 @@
     ],
     "instructions": [
       [
-        "FMLA"
+        "FMLS"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vfma_lane_f64",
+    "name": "vfms_lane_f64",
     "arguments": [
       "float64x1_t a",
       "float64x1_t b",
@@ -29837,13 +227438,13 @@
     ],
     "instructions": [
       [
-        "FMLA"
+        "FMLS"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vfma_laneq_f16",
+    "name": "vfms_laneq_f16",
     "arguments": [
       "float16x4_t a",
       "float16x4_t b",
@@ -29873,13 +227474,13 @@
     ],
     "instructions": [
       [
-        "FMLA"
+        "FMLS"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vfma_laneq_f32",
+    "name": "vfms_laneq_f32",
     "arguments": [
       "float32x2_t a",
       "float32x2_t b",
@@ -29909,13 +227510,13 @@
     ],
     "instructions": [
       [
-        "FMLA"
+        "FMLS"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vfma_laneq_f64",
+    "name": "vfms_laneq_f64",
     "arguments": [
       "float64x1_t a",
       "float64x1_t b",
@@ -29945,13 +227546,13 @@
     ],
     "instructions": [
       [
-        "FMLA"
+        "FMLS"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vfma_n_f16",
+    "name": "vfms_n_f16",
     "arguments": [
       "float16x4_t a",
       "float16x4_t b",
@@ -29962,7 +227563,7 @@
     },
     "Arguments_Preparation": {
       "a": {
-        "register": "Vd.4H "
+        "register": "Vd.4H"
       },
       "b": {
         "register": "Vn.4H"
@@ -29976,13 +227577,13 @@
     ],
     "instructions": [
       [
-        "FMLA"
+        "FMLS"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vfma_n_f32",
+    "name": "vfms_n_f32",
     "arguments": [
       "float32x2_t a",
       "float32x2_t b",
@@ -30003,19 +227604,17 @@
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FMLA"
+        "FMLS"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vfma_n_f64",
+    "name": "vfms_n_f64",
     "arguments": [
       "float64x1_t a",
       "float64x1_t b",
@@ -30040,13 +227639,13 @@
     ],
     "instructions": [
       [
-        "FMADD"
+        "FMSUB"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vfmad_lane_f64",
+    "name": "vfmsd_lane_f64",
     "arguments": [
       "float64_t a",
       "float64_t b",
@@ -30076,13 +227675,13 @@
     ],
     "instructions": [
       [
-        "FMLA"
+        "FMLS"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vfmad_laneq_f64",
+    "name": "vfmsd_laneq_f64",
     "arguments": [
       "float64_t a",
       "float64_t b",
@@ -30112,13 +227711,13 @@
     ],
     "instructions": [
       [
-        "FMLA"
+        "FMLS"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vfmah_f16",
+    "name": "vfmsh_f16",
     "arguments": [
       "float16_t a",
       "float16_t b",
@@ -30144,13 +227743,13 @@
     ],
     "instructions": [
       [
-        "FMADD"
+        "FMSUB"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vfmah_lane_f16",
+    "name": "vfmsh_lane_f16",
     "arguments": [
       "float16_t a",
       "float16_t b",
@@ -30180,13 +227779,13 @@
     ],
     "instructions": [
       [
-        "FMLA"
+        "FMLS"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vfmah_laneq_f16",
+    "name": "vfmsh_laneq_f16",
     "arguments": [
       "float16_t a",
       "float16_t b",
@@ -30216,13 +227815,13 @@
     ],
     "instructions": [
       [
-        "FMLA"
+        "FMLS"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vfmaq_f16",
+    "name": "vfmsq_f16",
     "arguments": [
       "float16x8_t a",
       "float16x8_t b",
@@ -30248,13 +227847,13 @@
     ],
     "instructions": [
       [
-        "FMLA"
+        "FMLS"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vfmaq_f32",
+    "name": "vfmsq_f32",
     "arguments": [
       "float32x4_t a",
       "float32x4_t b",
@@ -30281,13 +227880,13 @@
     ],
     "instructions": [
       [
-        "FMLA"
+        "FMLS"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vfmaq_f64",
+    "name": "vfmsq_f64",
     "arguments": [
       "float64x2_t a",
       "float64x2_t b",
@@ -30312,13 +227911,13 @@
     ],
     "instructions": [
       [
-        "FMLA"
+        "FMLS"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vfmaq_lane_f16",
+    "name": "vfmsq_lane_f16",
     "arguments": [
       "float16x8_t a",
       "float16x8_t b",
@@ -30348,13 +227947,13 @@
     ],
     "instructions": [
       [
-        "FMLA"
+        "FMLS"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vfmaq_lane_f32",
+    "name": "vfmsq_lane_f32",
     "arguments": [
       "float32x4_t a",
       "float32x4_t b",
@@ -30384,13 +227983,13 @@
     ],
     "instructions": [
       [
-        "FMLA"
+        "FMLS"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vfmaq_lane_f64",
+    "name": "vfmsq_lane_f64",
     "arguments": [
       "float64x2_t a",
       "float64x2_t b",
@@ -30420,13 +228019,13 @@
     ],
     "instructions": [
       [
-        "FMLA"
+        "FMLS"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vfmaq_laneq_f16",
+    "name": "vfmsq_laneq_f16",
     "arguments": [
       "float16x8_t a",
       "float16x8_t b",
@@ -30456,13 +228055,13 @@
     ],
     "instructions": [
       [
-        "FMLA"
+        "FMLS"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vfmaq_laneq_f32",
+    "name": "vfmsq_laneq_f32",
     "arguments": [
       "float32x4_t a",
       "float32x4_t b",
@@ -30492,13 +228091,13 @@
     ],
     "instructions": [
       [
-        "FMLA"
+        "FMLS"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vfmaq_laneq_f64",
+    "name": "vfmsq_laneq_f64",
     "arguments": [
       "float64x2_t a",
       "float64x2_t b",
@@ -30528,13 +228127,13 @@
     ],
     "instructions": [
       [
-        "FMLA"
+        "FMLS"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vfmaq_n_f16",
+    "name": "vfmsq_n_f16",
     "arguments": [
       "float16x8_t a",
       "float16x8_t b",
@@ -30545,7 +228144,7 @@
     },
     "Arguments_Preparation": {
       "a": {
-        "register": "Vd.8H "
+        "register": "Vd.8H"
       },
       "b": {
         "register": "Vn.8H"
@@ -30559,13 +228158,13 @@
     ],
     "instructions": [
       [
-        "FMLA"
+        "FMLS"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vfmaq_n_f32",
+    "name": "vfmsq_n_f32",
     "arguments": [
       "float32x4_t a",
       "float32x4_t b",
@@ -30586,19 +228185,17 @@
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FMLA"
+        "FMLS"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vfmaq_n_f64",
+    "name": "vfmsq_n_f64",
     "arguments": [
       "float64x2_t a",
       "float64x2_t b",
@@ -30623,13 +228220,13 @@
     ],
     "instructions": [
       [
-        "FMLA"
+        "FMLS"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vfmas_lane_f32",
+    "name": "vfmss_lane_f32",
     "arguments": [
       "float32_t a",
       "float32_t b",
@@ -30645,612 +228242,170 @@
       },
       "b": {
         "register": "Sn"
-      },
-      "lane": {
-        "minimum": 0,
-        "maximum": 1
-      },
-      "v": {
-        "register": "Vm.2S"
-      }
-    },
-    "Architectures": [
-      "A64"
-    ],
-    "instructions": [
-      [
-        "FMLA"
-      ]
-    ]
-  },
-  {
-    "SIMD_ISA": "Neon",
-    "name": "vfmas_laneq_f32",
-    "arguments": [
-      "float32_t a",
-      "float32_t b",
-      "float32x4_t v",
-      "const int lane"
-    ],
-    "return_type": {
-      "value": "float32_t"
-    },
-    "Arguments_Preparation": {
-      "a": {
-        "register": "Sd"
-      },
-      "b": {
-        "register": "Sn"
-      },
-      "lane": {
-        "minimum": 0,
-        "maximum": 3
-      },
-      "v": {
-        "register": "Vm.4S"
-      }
-    },
-    "Architectures": [
-      "A64"
-    ],
-    "instructions": [
-      [
-        "FMLA"
-      ]
-    ]
-  },
-  {
-    "SIMD_ISA": "Neon",
-    "name": "vfmlal_high_f16",
-    "arguments": [
-      "float32x2_t r",
-      "float16x4_t a",
-      "float16x4_t b"
-    ],
-    "return_type": {
-      "value": "float32x2_t"
-    },
-    "Arguments_Preparation": {
-      "a": {},
-      "b": {},
-      "r": {
-        "register": "Vd.2S"
-      }
-    },
-    "Architectures": [
-      "A32",
-      "A64"
-    ],
-    "instructions": [
-      [
-        "FMLAL2"
-      ]
-    ]
-  },
-  {
-    "SIMD_ISA": "Neon",
-    "name": "vfmlal_lane_high_f16",
-    "arguments": [
-      "float32x2_t r",
-      "float16x4_t a",
-      "float16x4_t b",
-      "const int lane"
-    ],
-    "return_type": {
-      "value": "float32x2_t"
-    },
-    "Arguments_Preparation": {
-      "a": {},
-      "b": {},
-      "lane": {
-        "minimum": 0,
-        "maximum": 3
-      },
-      "r": {
-        "register": "Vd.2S"
-      }
-    },
-    "Architectures": [
-      "A32",
-      "A64"
-    ],
-    "instructions": [
-      [
-        "FMLAL2"
-      ]
-    ]
-  },
-  {
-    "SIMD_ISA": "Neon",
-    "name": "vfmlal_lane_low_f16",
-    "arguments": [
-      "float32x2_t r",
-      "float16x4_t a",
-      "float16x4_t b",
-      "const int lane"
-    ],
-    "return_type": {
-      "value": "float32x2_t"
-    },
-    "Arguments_Preparation": {
-      "a": {},
-      "b": {},
-      "lane": {
-        "minimum": 0,
-        "maximum": 3
-      },
-      "r": {
-        "register": "Vd.2S"
-      }
-    },
-    "Architectures": [
-      "A32",
-      "A64"
-    ],
-    "instructions": [
-      [
-        "FMLAL"
-      ]
-    ]
-  },
-  {
-    "SIMD_ISA": "Neon",
-    "name": "vfmlal_laneq_high_f16",
-    "arguments": [
-      "float32x2_t r",
-      "float16x4_t a",
-      "float16x8_t b",
-      "const int lane"
-    ],
-    "return_type": {
-      "value": "float32x2_t"
-    },
-    "Arguments_Preparation": {
-      "a": {},
-      "b": {},
-      "lane": {
-        "minimum": 0,
-        "maximum": 7
-      },
-      "r": {
-        "register": "Vd.2S"
-      }
-    },
-    "Architectures": [
-      "A32",
-      "A64"
-    ],
-    "instructions": [
-      [
-        "FMLAL2"
-      ]
-    ]
-  },
-  {
-    "SIMD_ISA": "Neon",
-    "name": "vfmlal_laneq_low_f16",
-    "arguments": [
-      "float32x2_t r",
-      "float16x4_t a",
-      "float16x8_t b",
-      "const int lane"
-    ],
-    "return_type": {
-      "value": "float32x2_t"
-    },
-    "Arguments_Preparation": {
-      "a": {},
-      "b": {},
-      "lane": {
-        "minimum": 0,
-        "maximum": 7
-      },
-      "r": {
-        "register": "Vd.2S"
-      }
-    },
-    "Architectures": [
-      "A32",
-      "A64"
-    ],
-    "instructions": [
-      [
-        "FMLAL"
-      ]
-    ]
-  },
-  {
-    "SIMD_ISA": "Neon",
-    "name": "vfmlal_low_f16",
-    "arguments": [
-      "float32x2_t r",
-      "float16x4_t a",
-      "float16x4_t b"
-    ],
-    "return_type": {
-      "value": "float32x2_t"
-    },
-    "Arguments_Preparation": {
-      "a": {},
-      "b": {},
-      "r": {
-        "register": "Vd.2S"
-      }
-    },
-    "Architectures": [
-      "A32",
-      "A64"
-    ],
-    "instructions": [
-      [
-        "FMLAL"
-      ]
-    ]
-  },
-  {
-    "SIMD_ISA": "Neon",
-    "name": "vfmlalq_high_f16",
-    "arguments": [
-      "float32x4_t r",
-      "float16x8_t a",
-      "float16x8_t b"
-    ],
-    "return_type": {
-      "value": "float32x4_t"
-    },
-    "Arguments_Preparation": {
-      "a": {},
-      "b": {},
-      "r": {
-        "register": "Vd.4S"
-      }
-    },
-    "Architectures": [
-      "A32",
-      "A64"
-    ],
-    "instructions": [
-      [
-        "FMLAL2"
-      ]
-    ]
-  },
-  {
-    "SIMD_ISA": "Neon",
-    "name": "vfmlalq_lane_high_f16",
-    "arguments": [
-      "float32x4_t r",
-      "float16x8_t a",
-      "float16x4_t b",
-      "const int lane"
-    ],
-    "return_type": {
-      "value": "float32x4_t"
-    },
-    "Arguments_Preparation": {
-      "a": {},
-      "b": {},
-      "lane": {
-        "minimum": 0,
-        "maximum": 3
-      },
-      "r": {
-        "register": "Vd.4S"
-      }
-    },
-    "Architectures": [
-      "A32",
-      "A64"
-    ],
-    "instructions": [
-      [
-        "FMLAL2"
-      ]
-    ]
-  },
-  {
-    "SIMD_ISA": "Neon",
-    "name": "vfmlalq_lane_low_f16",
-    "arguments": [
-      "float32x4_t r",
-      "float16x8_t a",
-      "float16x4_t b",
-      "const int lane"
-    ],
-    "return_type": {
-      "value": "float32x4_t"
-    },
-    "Arguments_Preparation": {
-      "a": {},
-      "b": {},
-      "lane": {
-        "minimum": 0,
-        "maximum": 3
-      },
-      "r": {
-        "register": "Vd.4S"
-      }
-    },
-    "Architectures": [
-      "A32",
-      "A64"
-    ],
-    "instructions": [
-      [
-        "FMLAL"
-      ]
-    ]
-  },
-  {
-    "SIMD_ISA": "Neon",
-    "name": "vfmlalq_laneq_high_f16",
-    "arguments": [
-      "float32x4_t r",
-      "float16x8_t a",
-      "float16x8_t b",
-      "const int lane"
-    ],
-    "return_type": {
-      "value": "float32x4_t"
-    },
-    "Arguments_Preparation": {
-      "a": {},
-      "b": {},
-      "lane": {
-        "minimum": 0,
-        "maximum": 7
-      },
-      "r": {
-        "register": "Vd.4S"
-      }
-    },
-    "Architectures": [
-      "A32",
-      "A64"
-    ],
-    "instructions": [
-      [
-        "FMLAL2"
-      ]
-    ]
-  },
-  {
-    "SIMD_ISA": "Neon",
-    "name": "vfmlalq_laneq_low_f16",
-    "arguments": [
-      "float32x4_t r",
-      "float16x8_t a",
-      "float16x8_t b",
-      "const int lane"
-    ],
-    "return_type": {
-      "value": "float32x4_t"
-    },
-    "Arguments_Preparation": {
-      "a": {},
-      "b": {},
-      "lane": {
-        "minimum": 0,
-        "maximum": 7
-      },
-      "r": {
-        "register": "Vd.4S"
-      }
-    },
-    "Architectures": [
-      "A32",
-      "A64"
-    ],
-    "instructions": [
-      [
-        "FMLAL"
-      ]
-    ]
-  },
-  {
-    "SIMD_ISA": "Neon",
-    "name": "vfmlalq_low_f16",
-    "arguments": [
-      "float32x4_t r",
-      "float16x8_t a",
-      "float16x8_t b"
-    ],
-    "return_type": {
-      "value": "float32x4_t"
-    },
-    "Arguments_Preparation": {
-      "a": {},
-      "b": {},
-      "r": {
-        "register": "Vd.4S"
+      },
+      "lane": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "v": {
+        "register": "Vm.2S"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FMLAL"
+        "FMLS"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vfmlsl_high_f16",
+    "name": "vfmss_laneq_f32",
     "arguments": [
-      "float32x2_t r",
-      "float16x4_t a",
-      "float16x4_t b"
+      "float32_t a",
+      "float32_t b",
+      "float32x4_t v",
+      "const int lane"
     ],
     "return_type": {
-      "value": "float32x2_t"
+      "value": "float32_t"
     },
     "Arguments_Preparation": {
-      "a": {},
-      "b": {},
-      "r": {
-        "register": "Vd.2S"
+      "a": {
+        "register": "Sd"
+      },
+      "b": {
+        "register": "Sn"
+      },
+      "lane": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "v": {
+        "register": "Vm.4S"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FMLSL2"
+        "FMLS"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vfmlsl_lane_high_f16",
+    "name": "vget_high_f16",
     "arguments": [
-      "float32x2_t r",
-      "float16x4_t a",
-      "float16x4_t b",
-      "const int lane"
+      "float16x8_t a"
     ],
     "return_type": {
-      "value": "float32x2_t"
+      "value": "float16x4_t"
     },
     "Arguments_Preparation": {
-      "a": {},
-      "b": {},
-      "lane": {
-        "minimum": 0,
-        "maximum": 3
-      },
-      "r": {
-        "register": "Vd.2S"
+      "a": {
+        "register": "Vn.8H"
       }
     },
     "Architectures": [
+      "v7",
       "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FMLSL2"
+        "DUP"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vfmlsl_lane_low_f16",
+    "name": "vget_high_f32",
     "arguments": [
-      "float32x2_t r",
-      "float16x4_t a",
-      "float16x4_t b",
-      "const int lane"
+      "float32x4_t a"
     ],
     "return_type": {
       "value": "float32x2_t"
     },
     "Arguments_Preparation": {
-      "a": {},
-      "b": {},
-      "lane": {
-        "minimum": 0,
-        "maximum": 3
-      },
-      "r": {
-        "register": "Vd.2S"
+      "a": {
+        "register": "Vn.4S"
       }
     },
     "Architectures": [
+      "v7",
       "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FMLSL"
+        "DUP"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vfmlsl_laneq_high_f16",
+    "name": "vget_high_f64",
     "arguments": [
-      "float32x2_t r",
-      "float16x4_t a",
-      "float16x8_t b",
-      "const int lane"
+      "float64x2_t a"
     ],
     "return_type": {
-      "value": "float32x2_t"
+      "value": "float64x1_t"
     },
     "Arguments_Preparation": {
-      "a": {},
-      "b": {},
-      "lane": {
-        "minimum": 0,
-        "maximum": 7
-      },
-      "r": {
-        "register": "Vd.2S"
+      "a": {
+        "register": "Vn.2D"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FMLSL2"
+        "DUP"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vfmlsl_laneq_low_f16",
+    "name": "vget_high_p16",
     "arguments": [
-      "float32x2_t r",
-      "float16x4_t a",
-      "float16x8_t b",
-      "const int lane"
+      "poly16x8_t a"
     ],
     "return_type": {
-      "value": "float32x2_t"
+      "value": "poly16x4_t"
     },
     "Arguments_Preparation": {
-      "a": {},
-      "b": {},
-      "lane": {
-        "minimum": 0,
-        "maximum": 7
-      },
-      "r": {
-        "register": "Vd.2S"
+      "a": {
+        "register": "Vn.8H"
       }
     },
     "Architectures": [
+      "v7",
       "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FMLSL"
+        "DUP"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vfmlsl_low_f16",
+    "name": "vget_high_p64",
     "arguments": [
-      "float32x2_t r",
-      "float16x4_t a",
-      "float16x4_t b"
+      "poly64x2_t a"
     ],
     "return_type": {
-      "value": "float32x2_t"
+      "value": "poly64x1_t"
     },
     "Arguments_Preparation": {
-      "a": {},
-      "b": {},
-      "r": {
-        "register": "Vd.2S"
+      "a": {
+        "register": "Vn.2D"
       }
     },
     "Architectures": [
@@ -31259,250 +228414,197 @@
     ],
     "instructions": [
       [
-        "FMLSL"
+        "DUP"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vfmlslq_high_f16",
+    "name": "vget_high_p8",
     "arguments": [
-      "float32x4_t r",
-      "float16x8_t a",
-      "float16x8_t b"
+      "poly8x16_t a"
     ],
     "return_type": {
-      "value": "float32x4_t"
+      "value": "poly8x8_t"
     },
     "Arguments_Preparation": {
-      "a": {},
-      "b": {},
-      "r": {
-        "register": "Vd.4S"
+      "a": {
+        "register": "Vn.16B"
       }
     },
     "Architectures": [
+      "v7",
       "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FMLSL2"
+        "DUP"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vfmlslq_lane_high_f16",
+    "name": "vget_high_s16",
     "arguments": [
-      "float32x4_t r",
-      "float16x8_t a",
-      "float16x4_t b",
-      "const int lane"
+      "int16x8_t a"
     ],
     "return_type": {
-      "value": "float32x4_t"
+      "value": "int16x4_t"
     },
     "Arguments_Preparation": {
-      "a": {},
-      "b": {},
-      "lane": {
-        "minimum": 0,
-        "maximum": 3
-      },
-      "r": {
-        "register": "Vd.4S"
+      "a": {
+        "register": "Vn.8H"
       }
     },
     "Architectures": [
+      "v7",
       "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FMLSL2"
+        "DUP"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vfmlslq_lane_low_f16",
+    "name": "vget_high_s32",
     "arguments": [
-      "float32x4_t r",
-      "float16x8_t a",
-      "float16x4_t b",
-      "const int lane"
+      "int32x4_t a"
     ],
     "return_type": {
-      "value": "float32x4_t"
+      "value": "int32x2_t"
     },
     "Arguments_Preparation": {
-      "a": {},
-      "b": {},
-      "lane": {
-        "minimum": 0,
-        "maximum": 3
-      },
-      "r": {
-        "register": "Vd.4S"
+      "a": {
+        "register": "Vn.4S"
       }
     },
     "Architectures": [
+      "v7",
       "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FMLSL"
+        "DUP"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vfmlslq_laneq_high_f16",
+    "name": "vget_high_s64",
     "arguments": [
-      "float32x4_t r",
-      "float16x8_t a",
-      "float16x8_t b",
-      "const int lane"
+      "int64x2_t a"
     ],
     "return_type": {
-      "value": "float32x4_t"
+      "value": "int64x1_t"
     },
     "Arguments_Preparation": {
-      "a": {},
-      "b": {},
-      "lane": {
-        "minimum": 0,
-        "maximum": 7
-      },
-      "r": {
-        "register": "Vd.4S"
+      "a": {
+        "register": "Vn.2D"
       }
     },
     "Architectures": [
+      "v7",
       "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FMLSL2"
+        "DUP"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vfmlslq_laneq_low_f16",
+    "name": "vget_high_s8",
     "arguments": [
-      "float32x4_t r",
-      "float16x8_t a",
-      "float16x8_t b",
-      "const int lane"
+      "int8x16_t a"
     ],
     "return_type": {
-      "value": "float32x4_t"
+      "value": "int8x8_t"
     },
     "Arguments_Preparation": {
-      "a": {},
-      "b": {},
-      "lane": {
-        "minimum": 0,
-        "maximum": 7
-      },
-      "r": {
-        "register": "Vd.4S"
+      "a": {
+        "register": "Vn.16B"
       }
     },
     "Architectures": [
+      "v7",
       "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FMLSL"
+        "DUP"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vfmlslq_low_f16",
+    "name": "vget_high_u16",
     "arguments": [
-      "float32x4_t r",
-      "float16x8_t a",
-      "float16x8_t b"
+      "uint16x8_t a"
     ],
     "return_type": {
-      "value": "float32x4_t"
+      "value": "uint16x4_t"
     },
     "Arguments_Preparation": {
-      "a": {},
-      "b": {},
-      "r": {
-        "register": "Vd.4S"
+      "a": {
+        "register": "Vn.8H"
       }
     },
     "Architectures": [
+      "v7",
       "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FMLSL"
+        "DUP"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vfms_f16",
+    "name": "vget_high_u32",
     "arguments": [
-      "float16x4_t a",
-      "float16x4_t b",
-      "float16x4_t c"
+      "uint32x4_t a"
     ],
     "return_type": {
-      "value": "float16x4_t"
+      "value": "uint32x2_t"
     },
     "Arguments_Preparation": {
       "a": {
-        "register": "Vd.4H"
-      },
-      "b": {
-        "register": "Vn.4H"
-      },
-      "c": {
-        "register": "Vm.4H"
+        "register": "Vn.4S"
       }
     },
     "Architectures": [
+      "v7",
       "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FMLS"
+        "DUP"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vfms_f32",
+    "name": "vget_high_u64",
     "arguments": [
-      "float32x2_t a",
-      "float32x2_t b",
-      "float32x2_t c"
+      "uint64x2_t a"
     ],
     "return_type": {
-      "value": "float32x2_t"
+      "value": "uint64x1_t"
     },
     "Arguments_Preparation": {
       "a": {
-        "register": "Vd.2S"
-      },
-      "b": {
-        "register": "Vn.2S"
-      },
-      "c": {
-        "register": "Vm.2S"
+        "register": "Vn.2D"
       }
     },
     "Architectures": [
@@ -31512,138 +228614,112 @@
     ],
     "instructions": [
       [
-        "FMLS"
+        "DUP"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vfms_f64",
+    "name": "vget_high_u8",
     "arguments": [
-      "float64x1_t a",
-      "float64x1_t b",
-      "float64x1_t c"
+      "uint8x16_t a"
     ],
     "return_type": {
-      "value": "float64x1_t"
+      "value": "uint8x8_t"
     },
     "Arguments_Preparation": {
       "a": {
-        "register": "Da"
-      },
-      "b": {
-        "register": "Dn"
-      },
-      "c": {
-        "register": "Dm"
+        "register": "Vn.16B"
       }
     },
     "Architectures": [
+      "v7",
+      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FMSUB"
+        "DUP"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vfms_lane_f16",
+    "name": "vget_lane_f16",
     "arguments": [
-      "float16x4_t a",
-      "float16x4_t b",
       "float16x4_t v",
       "const int lane"
     ],
     "return_type": {
-      "value": "float16x4_t"
+      "value": "float16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.4H"
-      },
-      "b": {
-        "register": "Vn.4H"
-      },
       "lane": {
         "minimum": 0,
         "maximum": 3
       },
       "v": {
-        "register": "Vm.4H"
+        "register": "Vn.4H"
       }
     },
     "Architectures": [
+      "v7",
+      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FMLS"
+        "DUP"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vfms_lane_f32",
+    "name": "vget_lane_f32",
     "arguments": [
-      "float32x2_t a",
-      "float32x2_t b",
       "float32x2_t v",
       "const int lane"
     ],
     "return_type": {
-      "value": "float32x2_t"
+      "value": "float32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.2S"
-      },
-      "b": {
-        "register": "Vn.2S"
-      },
       "lane": {
         "minimum": 0,
         "maximum": 1
       },
       "v": {
-        "register": "Vm.2S"
+        "register": "Vn.2S"
       }
     },
     "Architectures": [
+      "v7",
+      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FMLS"
+        "DUP"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vfms_lane_f64",
+    "name": "vget_lane_f64",
     "arguments": [
-      "float64x1_t a",
-      "float64x1_t b",
       "float64x1_t v",
       "const int lane"
     ],
     "return_type": {
-      "value": "float64x1_t"
+      "value": "float64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dd"
-      },
-      "b": {
-        "register": "Dn"
-      },
       "lane": {
         "minimum": 0,
         "maximum": 0
       },
       "v": {
-        "register": "Vm.1D"
+        "register": "Vn.1D"
       }
     },
     "Architectures": [
@@ -31651,439 +228727,376 @@
     ],
     "instructions": [
       [
-        "FMLS"
+        "DUP"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vfms_laneq_f16",
+    "name": "vget_lane_p16",
     "arguments": [
-      "float16x4_t a",
-      "float16x4_t b",
-      "float16x8_t v",
+      "poly16x4_t v",
       "const int lane"
     ],
     "return_type": {
-      "value": "float16x4_t"
+      "value": "poly16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.4H"
-      },
-      "b": {
-        "register": "Vn.4H"
-      },
       "lane": {
         "minimum": 0,
-        "maximum": 7
+        "maximum": 3
       },
       "v": {
-        "register": "Vm.8H"
+        "register": "Vn.4H"
       }
     },
     "Architectures": [
+      "v7",
+      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FMLS"
+        "UMOV"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vfms_laneq_f32",
+    "name": "vget_lane_p64",
     "arguments": [
-      "float32x2_t a",
-      "float32x2_t b",
-      "float32x4_t v",
+      "poly64x1_t v",
       "const int lane"
     ],
     "return_type": {
-      "value": "float32x2_t"
+      "value": "poly64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.2S"
-      },
-      "b": {
-        "register": "Vn.2S"
-      },
       "lane": {
         "minimum": 0,
-        "maximum": 3
+        "maximum": 0
       },
       "v": {
-        "register": "Vm.4S"
+        "register": "Vn.1D"
       }
     },
     "Architectures": [
+      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FMLS"
+        "UMOV"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vfms_laneq_f64",
+    "name": "vget_lane_p8",
     "arguments": [
-      "float64x1_t a",
-      "float64x1_t b",
-      "float64x2_t v",
+      "poly8x8_t v",
       "const int lane"
     ],
     "return_type": {
-      "value": "float64x1_t"
+      "value": "poly8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dd"
-      },
-      "b": {
-        "register": "Dn"
-      },
       "lane": {
         "minimum": 0,
-        "maximum": 1
+        "maximum": 7
       },
       "v": {
-        "register": "Vm.2D"
+        "register": "Vn.8B"
       }
     },
     "Architectures": [
+      "v7",
+      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FMLS"
+        "UMOV"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vfms_n_f16",
+    "name": "vget_lane_s16",
     "arguments": [
-      "float16x4_t a",
-      "float16x4_t b",
-      "float16_t n"
+      "int16x4_t v",
+      "const int lane"
     ],
     "return_type": {
-      "value": "float16x4_t"
+      "value": "int16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.4H "
+      "lane": {
+        "minimum": 0,
+        "maximum": 3
       },
-      "b": {
+      "v": {
         "register": "Vn.4H"
-      },
-      "n": {
-        "register": "Vm.H[0]"
       }
     },
     "Architectures": [
+      "v7",
+      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FMLS"
+        "SMOV"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vfms_n_f32",
+    "name": "vget_lane_s32",
     "arguments": [
-      "float32x2_t a",
-      "float32x2_t b",
-      "float32_t n"
+      "int32x2_t v",
+      "const int lane"
     ],
     "return_type": {
-      "value": "float32x2_t"
+      "value": "int32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vd.2S"
+      "lane": {
+        "minimum": 0,
+        "maximum": 1
       },
-      "b": {
+      "v": {
         "register": "Vn.2S"
-      },
-      "n": {
-        "register": "Vm.S[0]"
       }
     },
     "Architectures": [
+      "v7",
+      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FMLS"
+        "SMOV"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vfms_n_f64",
+    "name": "vget_lane_s64",
     "arguments": [
-      "float64x1_t a",
-      "float64x1_t b",
-      "float64_t n"
+      "int64x1_t v",
+      "const int lane"
     ],
     "return_type": {
-      "value": "float64x1_t"
+      "value": "int64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Da"
-      },
-      "b": {
-        "register": "Dn"
+      "lane": {
+        "minimum": 0,
+        "maximum": 0
       },
-      "n": {
-        "register": "Dm"
+      "v": {
+        "register": "Vn.1D"
       }
     },
     "Architectures": [
+      "v7",
+      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FMSUB"
+        "UMOV"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vfmsd_lane_f64",
+    "name": "vget_lane_s8",
     "arguments": [
-      "float64_t a",
-      "float64_t b",
-      "float64x1_t v",
+      "int8x8_t v",
       "const int lane"
     ],
     "return_type": {
-      "value": "float64_t"
+      "value": "int8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dd"
-      },
-      "b": {
-        "register": "Dn"
-      },
       "lane": {
         "minimum": 0,
-        "maximum": 0
+        "maximum": 7
       },
       "v": {
-        "register": "Vm.1D"
+        "register": "Vn.8B"
       }
     },
     "Architectures": [
+      "v7",
+      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FMLS"
+        "SMOV"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vfmsd_laneq_f64",
+    "name": "vget_lane_u16",
     "arguments": [
-      "float64_t a",
-      "float64_t b",
-      "float64x2_t v",
+      "uint16x4_t v",
       "const int lane"
     ],
     "return_type": {
-      "value": "float64_t"
+      "value": "uint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Dd"
-      },
-      "b": {
-        "register": "Dn"
-      },
       "lane": {
         "minimum": 0,
-        "maximum": 1
+        "maximum": 3
       },
       "v": {
-        "register": "Vm.2D"
+        "register": "Vn.4H"
       }
     },
     "Architectures": [
+      "v7",
+      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FMLS"
+        "UMOV"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vfmsh_f16",
+    "name": "vget_lane_u32",
     "arguments": [
-      "float16_t a",
-      "float16_t b",
-      "float16_t c"
+      "uint32x2_t v",
+      "const int lane"
     ],
     "return_type": {
-      "value": "float16_t"
+      "value": "uint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Ha"
-      },
-      "b": {
-        "register": "Hn"
+      "lane": {
+        "minimum": 0,
+        "maximum": 1
       },
-      "c": {
-        "register": "Hm"
+      "v": {
+        "register": "Vn.2S"
       }
     },
     "Architectures": [
+      "v7",
       "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FMSUB"
+        "UMOV"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vfmsh_lane_f16",
+    "name": "vget_lane_u64",
     "arguments": [
-      "float16_t a",
-      "float16_t b",
-      "float16x4_t v",
+      "uint64x1_t v",
       "const int lane"
     ],
     "return_type": {
-      "value": "float16_t"
+      "value": "uint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Hd"
-      },
-      "b": {
-        "register": "Hn"
-      },
       "lane": {
         "minimum": 0,
-        "maximum": 3
+        "maximum": 0
       },
       "v": {
-        "register": "Vm.4H"
+        "register": "Vn.1D"
       }
     },
     "Architectures": [
+      "v7",
+      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FMLS"
+        "UMOV"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vfmsh_laneq_f16",
+    "name": "vget_lane_u8",
     "arguments": [
-      "float16_t a",
-      "float16_t b",
-      "float16x8_t v",
+      "uint8x8_t v",
       "const int lane"
     ],
     "return_type": {
-      "value": "float16_t"
+      "value": "uint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Hd"
-      },
-      "b": {
-        "register": "Hn"
-      },
       "lane": {
         "minimum": 0,
         "maximum": 7
       },
       "v": {
-        "register": "Vm.8H"
+        "register": "Vn.8B"
       }
     },
     "Architectures": [
+      "v7",
+      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FMLS"
+        "UMOV"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vfmsq_f16",
+    "name": "vget_low_f16",
     "arguments": [
-      "float16x8_t a",
-      "float16x8_t b",
-      "float16x8_t c"
+      "float16x8_t a"
     ],
     "return_type": {
-      "value": "float16x8_t"
+      "value": "float16x4_t"
     },
     "Arguments_Preparation": {
       "a": {
-        "register": "Vd.8H"
-      },
-      "b": {
         "register": "Vn.8H"
-      },
-      "c": {
-        "register": "Vm.8H"
       }
     },
     "Architectures": [
+      "v7",
       "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FMLS"
+        "DUP"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vfmsq_f32",
+    "name": "vget_low_f32",
     "arguments": [
-      "float32x4_t a",
-      "float32x4_t b",
-      "float32x4_t c"
+      "float32x4_t a"
     ],
     "return_type": {
-      "value": "float32x4_t"
+      "value": "float32x2_t"
     },
     "Arguments_Preparation": {
       "a": {
-        "register": "Vd.4S"
-      },
-      "b": {
         "register": "Vn.4S"
-      },
-      "c": {
-        "register": "Vm.4S"
       }
     },
     "Architectures": [
@@ -32093,30 +229106,22 @@
     ],
     "instructions": [
       [
-        "FMLS"
+        "DUP"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vfmsq_f64",
+    "name": "vget_low_f64",
     "arguments": [
-      "float64x2_t a",
-      "float64x2_t b",
-      "float64x2_t c"
+      "float64x2_t a"
     ],
     "return_type": {
-      "value": "float64x2_t"
+      "value": "float64x1_t"
     },
     "Arguments_Preparation": {
       "a": {
-        "register": "Vd.2D"
-      },
-      "b": {
         "register": "Vn.2D"
-      },
-      "c": {
-        "register": "Vm.2D"
       }
     },
     "Architectures": [
@@ -32124,402 +229129,300 @@
     ],
     "instructions": [
       [
-        "FMLS"
+        "DUP"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vfmsq_lane_f16",
+    "name": "vget_low_p16",
     "arguments": [
-      "float16x8_t a",
-      "float16x8_t b",
-      "float16x4_t v",
-      "const int lane"
+      "poly16x8_t a"
     ],
     "return_type": {
-      "value": "float16x8_t"
+      "value": "poly16x4_t"
     },
     "Arguments_Preparation": {
       "a": {
-        "register": "Vd.8H"
-      },
-      "b": {
         "register": "Vn.8H"
-      },
-      "lane": {
-        "minimum": 0,
-        "maximum": 3
-      },
-      "v": {
-        "register": "Vm.4H"
       }
     },
     "Architectures": [
+      "v7",
+      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FMLS"
+        "DUP"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vfmsq_lane_f32",
+    "name": "vget_low_p64",
     "arguments": [
-      "float32x4_t a",
-      "float32x4_t b",
-      "float32x2_t v",
-      "const int lane"
+      "poly64x2_t a"
     ],
     "return_type": {
-      "value": "float32x4_t"
+      "value": "poly64x1_t"
     },
     "Arguments_Preparation": {
       "a": {
-        "register": "Vd.4S"
-      },
-      "b": {
-        "register": "Vn.4S"
-      },
-      "lane": {
-        "minimum": 0,
-        "maximum": 1
-      },
-      "v": {
-        "register": "Vm.2S"
+        "register": "Vn.2D"
       }
     },
     "Architectures": [
+      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FMLS"
+        "DUP"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vfmsq_lane_f64",
+    "name": "vget_low_p8",
     "arguments": [
-      "float64x2_t a",
-      "float64x2_t b",
-      "float64x1_t v",
-      "const int lane"
+      "poly8x16_t a"
     ],
     "return_type": {
-      "value": "float64x2_t"
+      "value": "poly8x8_t"
     },
     "Arguments_Preparation": {
       "a": {
-        "register": "Vd.2D"
-      },
-      "b": {
-        "register": "Vn.2D"
-      },
-      "lane": {
-        "minimum": 0,
-        "maximum": 0
-      },
-      "v": {
-        "register": "Vm.1D"
+        "register": "Vn.16B"
       }
     },
     "Architectures": [
+      "v7",
+      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FMLS"
+        "DUP"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vfmsq_laneq_f16",
+    "name": "vget_low_s16",
     "arguments": [
-      "float16x8_t a",
-      "float16x8_t b",
-      "float16x8_t v",
-      "const int lane"
+      "int16x8_t a"
     ],
     "return_type": {
-      "value": "float16x8_t"
+      "value": "int16x4_t"
     },
     "Arguments_Preparation": {
       "a": {
-        "register": "Vd.8H"
-      },
-      "b": {
         "register": "Vn.8H"
-      },
-      "lane": {
-        "minimum": 0,
-        "maximum": 7
-      },
-      "v": {
-        "register": "Vm.8H"
       }
     },
     "Architectures": [
+      "v7",
+      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FMLS"
+        "DUP"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vfmsq_laneq_f32",
+    "name": "vget_low_s32",
     "arguments": [
-      "float32x4_t a",
-      "float32x4_t b",
-      "float32x4_t v",
-      "const int lane"
+      "int32x4_t a"
     ],
     "return_type": {
-      "value": "float32x4_t"
+      "value": "int32x2_t"
     },
     "Arguments_Preparation": {
       "a": {
-        "register": "Vd.4S"
-      },
-      "b": {
         "register": "Vn.4S"
-      },
-      "lane": {
-        "minimum": 0,
-        "maximum": 3
-      },
-      "v": {
-        "register": "Vm.4S"
       }
     },
     "Architectures": [
+      "v7",
+      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FMLS"
+        "DUP"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vfmsq_laneq_f64",
+    "name": "vget_low_s64",
     "arguments": [
-      "float64x2_t a",
-      "float64x2_t b",
-      "float64x2_t v",
-      "const int lane"
+      "int64x2_t a"
     ],
     "return_type": {
-      "value": "float64x2_t"
+      "value": "int64x1_t"
     },
     "Arguments_Preparation": {
       "a": {
-        "register": "Vd.2D"
-      },
-      "b": {
         "register": "Vn.2D"
-      },
-      "lane": {
-        "minimum": 0,
-        "maximum": 1
-      },
-      "v": {
-        "register": "Vm.2D"
       }
     },
     "Architectures": [
+      "v7",
+      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FMLS"
+        "DUP"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vfmsq_n_f16",
+    "name": "vget_low_s8",
     "arguments": [
-      "float16x8_t a",
-      "float16x8_t b",
-      "float16_t n"
+      "int8x16_t a"
     ],
     "return_type": {
-      "value": "float16x8_t"
+      "value": "int8x8_t"
     },
     "Arguments_Preparation": {
       "a": {
-        "register": "Vd.8H "
-      },
-      "b": {
-        "register": "Vn.8H"
-      },
-      "n": {
-        "register": "Vm.H[0]"
+        "register": "Vn.16B"
       }
     },
     "Architectures": [
+      "v7",
+      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FMLS"
+        "DUP"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vfmsq_n_f32",
+    "name": "vget_low_u16",
     "arguments": [
-      "float32x4_t a",
-      "float32x4_t b",
-      "float32_t n"
+      "uint16x8_t a"
     ],
     "return_type": {
-      "value": "float32x4_t"
+      "value": "uint16x4_t"
     },
     "Arguments_Preparation": {
       "a": {
-        "register": "Vd.4S"
-      },
-      "b": {
-        "register": "Vn.4S"
-      },
-      "n": {
-        "register": "Vm.S[0]"
+        "register": "Vn.8H"
       }
     },
     "Architectures": [
+      "v7",
+      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FMLS"
+        "DUP"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vfmsq_n_f64",
+    "name": "vget_low_u32",
     "arguments": [
-      "float64x2_t a",
-      "float64x2_t b",
-      "float64_t n"
+      "uint32x4_t a"
     ],
     "return_type": {
-      "value": "float64x2_t"
+      "value": "uint32x2_t"
     },
     "Arguments_Preparation": {
       "a": {
-        "register": "Vd.2D"
-      },
-      "b": {
-        "register": "Vn.2D"
-      },
-      "n": {
-        "register": "Vm.D[0]"
+        "register": "Vn.4S"
       }
     },
     "Architectures": [
+      "v7",
+      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FMLS"
+        "DUP"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vfmss_lane_f32",
+    "name": "vget_low_u64",
     "arguments": [
-      "float32_t a",
-      "float32_t b",
-      "float32x2_t v",
-      "const int lane"
+      "uint64x2_t a"
     ],
     "return_type": {
-      "value": "float32_t"
+      "value": "uint64x1_t"
     },
     "Arguments_Preparation": {
       "a": {
-        "register": "Sd"
-      },
-      "b": {
-        "register": "Sn"
-      },
-      "lane": {
-        "minimum": 0,
-        "maximum": 1
-      },
-      "v": {
-        "register": "Vm.2S"
+        "register": "Vn.2D"
       }
     },
     "Architectures": [
+      "v7",
+      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FMLS"
+        "DUP"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vfmss_laneq_f32",
+    "name": "vget_low_u8",
     "arguments": [
-      "float32_t a",
-      "float32_t b",
-      "float32x4_t v",
-      "const int lane"
+      "uint8x16_t a"
     ],
     "return_type": {
-      "value": "float32_t"
+      "value": "uint8x8_t"
     },
     "Arguments_Preparation": {
       "a": {
-        "register": "Sd"
-      },
-      "b": {
-        "register": "Sn"
-      },
-      "lane": {
-        "minimum": 0,
-        "maximum": 3
-      },
-      "v": {
-        "register": "Vm.4S"
+        "register": "Vn.16B"
       }
     },
     "Architectures": [
+      "v7",
+      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "FMLS"
+        "DUP"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vget_high_f16",
+    "name": "vgetq_lane_f16",
     "arguments": [
-      "float16x8_t a"
+      "float16x8_t v",
+      "const int lane"
     ],
     "return_type": {
-      "value": "float16x4_t"
+      "value": "float16_t"
     },
     "Arguments_Preparation": {
-      "a": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 7
+      },
+      "v": {
         "register": "Vn.8H"
       }
     },
@@ -32536,15 +229439,20 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vget_high_f32",
+    "name": "vgetq_lane_f32",
     "arguments": [
-      "float32x4_t a"
+      "float32x4_t v",
+      "const int lane"
     ],
     "return_type": {
-      "value": "float32x2_t"
+      "value": "float32_t"
     },
     "Arguments_Preparation": {
-      "a": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "v": {
         "register": "Vn.4S"
       }
     },
@@ -32561,15 +229469,20 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vget_high_f64",
+    "name": "vgetq_lane_f64",
     "arguments": [
-      "float64x2_t a"
+      "float64x2_t v",
+      "const int lane"
     ],
     "return_type": {
-      "value": "float64x1_t"
+      "value": "float64_t"
     },
     "Arguments_Preparation": {
-      "a": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "v": {
         "register": "Vn.2D"
       }
     },
@@ -32584,15 +229497,20 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vget_high_p16",
+    "name": "vgetq_lane_p16",
     "arguments": [
-      "poly16x8_t a"
+      "poly16x8_t v",
+      "const int lane"
     ],
     "return_type": {
-      "value": "poly16x4_t"
+      "value": "poly16_t"
     },
     "Arguments_Preparation": {
-      "a": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 7
+      },
+      "v": {
         "register": "Vn.8H"
       }
     },
@@ -32603,21 +229521,26 @@
     ],
     "instructions": [
       [
-        "DUP"
+        "UMOV"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vget_high_p64",
+    "name": "vgetq_lane_p64",
     "arguments": [
-      "poly64x2_t a"
+      "poly64x2_t v",
+      "const int lane"
     ],
     "return_type": {
-      "value": "poly64x1_t"
+      "value": "poly64_t"
     },
     "Arguments_Preparation": {
-      "a": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "v": {
         "register": "Vn.2D"
       }
     },
@@ -32627,21 +229550,26 @@
     ],
     "instructions": [
       [
-        "DUP"
+        "UMOV"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vget_high_p8",
+    "name": "vgetq_lane_p8",
     "arguments": [
-      "poly8x16_t a"
+      "poly8x16_t v",
+      "const int lane"
     ],
     "return_type": {
-      "value": "poly8x8_t"
+      "value": "poly8_t"
     },
     "Arguments_Preparation": {
-      "a": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 15
+      },
+      "v": {
         "register": "Vn.16B"
       }
     },
@@ -32652,21 +229580,26 @@
     ],
     "instructions": [
       [
-        "DUP"
+        "UMOV"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vget_high_s16",
+    "name": "vgetq_lane_s16",
     "arguments": [
-      "int16x8_t a"
+      "int16x8_t v",
+      "const int lane"
     ],
     "return_type": {
-      "value": "int16x4_t"
+      "value": "int16_t"
     },
     "Arguments_Preparation": {
-      "a": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 7
+      },
+      "v": {
         "register": "Vn.8H"
       }
     },
@@ -32677,21 +229610,26 @@
     ],
     "instructions": [
       [
-        "DUP"
+        "SMOV"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vget_high_s32",
+    "name": "vgetq_lane_s32",
     "arguments": [
-      "int32x4_t a"
+      "int32x4_t v",
+      "const int lane"
     ],
     "return_type": {
-      "value": "int32x2_t"
+      "value": "int32_t"
     },
     "Arguments_Preparation": {
-      "a": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "v": {
         "register": "Vn.4S"
       }
     },
@@ -32702,21 +229640,26 @@
     ],
     "instructions": [
       [
-        "DUP"
+        "SMOV"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vget_high_s64",
+    "name": "vgetq_lane_s64",
     "arguments": [
-      "int64x2_t a"
+      "int64x2_t v",
+      "const int lane"
     ],
     "return_type": {
-      "value": "int64x1_t"
+      "value": "int64_t"
     },
     "Arguments_Preparation": {
-      "a": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "v": {
         "register": "Vn.2D"
       }
     },
@@ -32727,21 +229670,26 @@
     ],
     "instructions": [
       [
-        "DUP"
+        "UMOV"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vget_high_s8",
+    "name": "vgetq_lane_s8",
     "arguments": [
-      "int8x16_t a"
+      "int8x16_t v",
+      "const int lane"
     ],
     "return_type": {
-      "value": "int8x8_t"
+      "value": "int8_t"
     },
     "Arguments_Preparation": {
-      "a": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 15
+      },
+      "v": {
         "register": "Vn.16B"
       }
     },
@@ -32752,21 +229700,26 @@
     ],
     "instructions": [
       [
-        "DUP"
+        "SMOV"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vget_high_u16",
+    "name": "vgetq_lane_u16",
     "arguments": [
-      "uint16x8_t a"
+      "uint16x8_t v",
+      "const int lane"
     ],
     "return_type": {
-      "value": "uint16x4_t"
+      "value": "uint16_t"
     },
     "Arguments_Preparation": {
-      "a": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 7
+      },
+      "v": {
         "register": "Vn.8H"
       }
     },
@@ -32777,21 +229730,26 @@
     ],
     "instructions": [
       [
-        "DUP"
+        "UMOV"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vget_high_u32",
+    "name": "vgetq_lane_u32",
     "arguments": [
-      "uint32x4_t a"
+      "uint32x4_t v",
+      "const int lane"
     ],
     "return_type": {
-      "value": "uint32x2_t"
+      "value": "uint32_t"
     },
     "Arguments_Preparation": {
-      "a": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "v": {
         "register": "Vn.4S"
       }
     },
@@ -32802,21 +229760,26 @@
     ],
     "instructions": [
       [
-        "DUP"
+        "UMOV"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vget_high_u64",
+    "name": "vgetq_lane_u64",
     "arguments": [
-      "uint64x2_t a"
+      "uint64x2_t v",
+      "const int lane"
     ],
     "return_type": {
-      "value": "uint64x1_t"
+      "value": "uint64_t"
     },
     "Arguments_Preparation": {
-      "a": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "v": {
         "register": "Vn.2D"
       }
     },
@@ -32827,21 +229790,26 @@
     ],
     "instructions": [
       [
-        "DUP"
+        "UMOV"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vget_high_u8",
+    "name": "vgetq_lane_u8",
     "arguments": [
-      "uint8x16_t a"
+      "uint8x16_t v",
+      "const int lane"
     ],
     "return_type": {
-      "value": "uint8x8_t"
+      "value": "uint8_t"
     },
     "Arguments_Preparation": {
-      "a": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 15
+      },
+      "v": {
         "register": "Vn.16B"
       }
     },
@@ -32852,27 +229820,26 @@
     ],
     "instructions": [
       [
-        "DUP"
+        "UMOV"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vget_lane_f16",
+    "name": "vhadd_s16",
     "arguments": [
-      "float16x4_t v",
-      "const int lane"
+      "int16x4_t a",
+      "int16x4_t b"
     ],
     "return_type": {
-      "value": "float16_t"
+      "value": "int16x4_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 3
-      },
-      "v": {
+      "a": {
         "register": "Vn.4H"
+      },
+      "b": {
+        "register": "Vm.4H"
       }
     },
     "Architectures": [
@@ -32882,27 +229849,26 @@
     ],
     "instructions": [
       [
-        "DUP"
+        "SHADD"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vget_lane_f32",
+    "name": "vhadd_s32",
     "arguments": [
-      "float32x2_t v",
-      "const int lane"
+      "int32x2_t a",
+      "int32x2_t b"
     ],
     "return_type": {
-      "value": "float32_t"
+      "value": "int32x2_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 1
-      },
-      "v": {
+      "a": {
         "register": "Vn.2S"
+      },
+      "b": {
+        "register": "Vm.2S"
       }
     },
     "Architectures": [
@@ -32912,55 +229878,55 @@
     ],
     "instructions": [
       [
-        "DUP"
+        "SHADD"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vget_lane_f64",
+    "name": "vhadd_s8",
     "arguments": [
-      "float64x1_t v",
-      "const int lane"
+      "int8x8_t a",
+      "int8x8_t b"
     ],
     "return_type": {
-      "value": "float64_t"
+      "value": "int8x8_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 0
+      "a": {
+        "register": "Vn.8B"
       },
-      "v": {
-        "register": "Vn.1D"
+      "b": {
+        "register": "Vm.8B"
       }
     },
     "Architectures": [
+      "v7",
+      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "DUP"
+        "SHADD"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vget_lane_p16",
+    "name": "vhadd_u16",
     "arguments": [
-      "poly16x4_t v",
-      "const int lane"
+      "uint16x4_t a",
+      "uint16x4_t b"
     ],
     "return_type": {
-      "value": "poly16_t"
+      "value": "uint16x4_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 3
-      },
-      "v": {
+      "a": {
         "register": "Vn.4H"
+      },
+      "b": {
+        "register": "Vm.4H"
       }
     },
     "Architectures": [
@@ -32970,56 +229936,55 @@
     ],
     "instructions": [
       [
-        "UMOV"
+        "UHADD"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vget_lane_p64",
+    "name": "vhadd_u32",
     "arguments": [
-      "poly64x1_t v",
-      "const int lane"
+      "uint32x2_t a",
+      "uint32x2_t b"
     ],
     "return_type": {
-      "value": "poly64_t"
+      "value": "uint32x2_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 0
+      "a": {
+        "register": "Vn.2S"
       },
-      "v": {
-        "register": "Vn.1D"
+      "b": {
+        "register": "Vm.2S"
       }
     },
     "Architectures": [
+      "v7",
       "A32",
       "A64"
     ],
     "instructions": [
       [
-        "UMOV"
+        "UHADD"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vget_lane_p8",
+    "name": "vhadd_u8",
     "arguments": [
-      "poly8x8_t v",
-      "const int lane"
+      "uint8x8_t a",
+      "uint8x8_t b"
     ],
     "return_type": {
-      "value": "poly8_t"
+      "value": "uint8x8_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 7
-      },
-      "v": {
+      "a": {
         "register": "Vn.8B"
+      },
+      "b": {
+        "register": "Vm.8B"
       }
     },
     "Architectures": [
@@ -33029,27 +229994,26 @@
     ],
     "instructions": [
       [
-        "UMOV"
+        "UHADD"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vget_lane_s16",
+    "name": "vhaddq_s16",
     "arguments": [
-      "int16x4_t v",
-      "const int lane"
+      "int16x8_t a",
+      "int16x8_t b"
     ],
     "return_type": {
-      "value": "int16_t"
+      "value": "int16x8_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 3
+      "a": {
+        "register": "Vn.8H"
       },
-      "v": {
-        "register": "Vn.4H"
+      "b": {
+        "register": "Vm.8H"
       }
     },
     "Architectures": [
@@ -33059,27 +230023,26 @@
     ],
     "instructions": [
       [
-        "SMOV"
+        "SHADD"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vget_lane_s32",
+    "name": "vhaddq_s32",
     "arguments": [
-      "int32x2_t v",
-      "const int lane"
+      "int32x4_t a",
+      "int32x4_t b"
     ],
     "return_type": {
-      "value": "int32_t"
+      "value": "int32x4_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 1
+      "a": {
+        "register": "Vn.4S"
       },
-      "v": {
-        "register": "Vn.2S"
+      "b": {
+        "register": "Vm.4S"
       }
     },
     "Architectures": [
@@ -33089,27 +230052,26 @@
     ],
     "instructions": [
       [
-        "SMOV"
+        "SHADD"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vget_lane_s64",
+    "name": "vhaddq_s8",
     "arguments": [
-      "int64x1_t v",
-      "const int lane"
+      "int8x16_t a",
+      "int8x16_t b"
     ],
     "return_type": {
-      "value": "int64_t"
+      "value": "int8x16_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 0
+      "a": {
+        "register": "Vn.16B"
       },
-      "v": {
-        "register": "Vn.1D"
+      "b": {
+        "register": "Vm.16B"
       }
     },
     "Architectures": [
@@ -33119,27 +230081,26 @@
     ],
     "instructions": [
       [
-        "UMOV"
+        "SHADD"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vget_lane_s8",
+    "name": "vhaddq_u16",
     "arguments": [
-      "int8x8_t v",
-      "const int lane"
+      "uint16x8_t a",
+      "uint16x8_t b"
     ],
     "return_type": {
-      "value": "int8_t"
+      "value": "uint16x8_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 7
+      "a": {
+        "register": "Vn.8H"
       },
-      "v": {
-        "register": "Vn.8B"
+      "b": {
+        "register": "Vm.8H"
       }
     },
     "Architectures": [
@@ -33149,27 +230110,26 @@
     ],
     "instructions": [
       [
-        "SMOV"
+        "UHADD"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vget_lane_u16",
+    "name": "vhaddq_u32",
     "arguments": [
-      "uint16x4_t v",
-      "const int lane"
+      "uint32x4_t a",
+      "uint32x4_t b"
     ],
     "return_type": {
-      "value": "uint16_t"
+      "value": "uint32x4_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 3
+      "a": {
+        "register": "Vn.4S"
       },
-      "v": {
-        "register": "Vn.4H"
+      "b": {
+        "register": "Vm.4S"
       }
     },
     "Architectures": [
@@ -33179,27 +230139,26 @@
     ],
     "instructions": [
       [
-        "UMOV"
+        "UHADD"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vget_lane_u32",
+    "name": "vhaddq_u8",
     "arguments": [
-      "uint32x2_t v",
-      "const int lane"
+      "uint8x16_t a",
+      "uint8x16_t b"
     ],
     "return_type": {
-      "value": "uint32_t"
+      "value": "uint8x16_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 1
+      "a": {
+        "register": "Vn.16B"
       },
-      "v": {
-        "register": "Vn.2S"
+      "b": {
+        "register": "Vm.16B"
       }
     },
     "Architectures": [
@@ -33209,27 +230168,26 @@
     ],
     "instructions": [
       [
-        "UMOV"
+        "UHADD"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vget_lane_u64",
+    "name": "vhsub_s16",
     "arguments": [
-      "uint64x1_t v",
-      "const int lane"
+      "int16x4_t a",
+      "int16x4_t b"
     ],
     "return_type": {
-      "value": "uint64_t"
+      "value": "int16x4_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 0
+      "a": {
+        "register": "Vn.4H"
       },
-      "v": {
-        "register": "Vn.1D"
+      "b": {
+        "register": "Vm.4H"
       }
     },
     "Architectures": [
@@ -33239,27 +230197,26 @@
     ],
     "instructions": [
       [
-        "UMOV"
+        "SHSUB"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vget_lane_u8",
+    "name": "vhsub_s32",
     "arguments": [
-      "uint8x8_t v",
-      "const int lane"
+      "int32x2_t a",
+      "int32x2_t b"
     ],
     "return_type": {
-      "value": "uint8_t"
+      "value": "int32x2_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 7
+      "a": {
+        "register": "Vn.2S"
       },
-      "v": {
-        "register": "Vn.8B"
+      "b": {
+        "register": "Vm.2S"
       }
     },
     "Architectures": [
@@ -33269,22 +230226,26 @@
     ],
     "instructions": [
       [
-        "UMOV"
+        "SHSUB"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vget_low_f16",
+    "name": "vhsub_s8",
     "arguments": [
-      "float16x8_t a"
+      "int8x8_t a",
+      "int8x8_t b"
     ],
     "return_type": {
-      "value": "float16x4_t"
+      "value": "int8x8_t"
     },
     "Arguments_Preparation": {
       "a": {
-        "register": "Vn.8H"
+        "register": "Vn.8B"
+      },
+      "b": {
+        "register": "Vm.8B"
       }
     },
     "Architectures": [
@@ -33294,22 +230255,26 @@
     ],
     "instructions": [
       [
-        "DUP"
+        "SHSUB"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vget_low_f32",
+    "name": "vhsub_u16",
     "arguments": [
-      "float32x4_t a"
+      "uint16x4_t a",
+      "uint16x4_t b"
     ],
     "return_type": {
-      "value": "float32x2_t"
+      "value": "uint16x4_t"
     },
     "Arguments_Preparation": {
       "a": {
-        "register": "Vn.4S"
+        "register": "Vn.4H"
+      },
+      "b": {
+        "register": "Vm.4H"
       }
     },
     "Architectures": [
@@ -33319,45 +230284,55 @@
     ],
     "instructions": [
       [
-        "DUP"
+        "UHSUB"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vget_low_f64",
+    "name": "vhsub_u32",
     "arguments": [
-      "float64x2_t a"
+      "uint32x2_t a",
+      "uint32x2_t b"
     ],
     "return_type": {
-      "value": "float64x1_t"
+      "value": "uint32x2_t"
     },
     "Arguments_Preparation": {
       "a": {
-        "register": "Vn.2D"
+        "register": "Vn.2S"
+      },
+      "b": {
+        "register": "Vm.2S"
       }
     },
     "Architectures": [
+      "v7",
+      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "DUP"
+        "UHSUB"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vget_low_p16",
+    "name": "vhsub_u8",
     "arguments": [
-      "poly16x8_t a"
+      "uint8x8_t a",
+      "uint8x8_t b"
     ],
     "return_type": {
-      "value": "poly16x4_t"
+      "value": "uint8x8_t"
     },
     "Arguments_Preparation": {
       "a": {
-        "register": "Vn.8H"
+        "register": "Vn.8B"
+      },
+      "b": {
+        "register": "Vm.8B"
       }
     },
     "Architectures": [
@@ -33367,46 +230342,55 @@
     ],
     "instructions": [
       [
-        "DUP"
+        "UHSUB"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vget_low_p64",
+    "name": "vhsubq_s16",
     "arguments": [
-      "poly64x2_t a"
+      "int16x8_t a",
+      "int16x8_t b"
     ],
     "return_type": {
-      "value": "poly64x1_t"
+      "value": "int16x8_t"
     },
     "Arguments_Preparation": {
       "a": {
-        "register": "Vn.2D"
+        "register": "Vn.8H"
+      },
+      "b": {
+        "register": "Vm.8H"
       }
     },
     "Architectures": [
+      "v7",
       "A32",
       "A64"
     ],
     "instructions": [
       [
-        "DUP"
+        "SHSUB"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vget_low_p8",
+    "name": "vhsubq_s32",
     "arguments": [
-      "poly8x16_t a"
+      "int32x4_t a",
+      "int32x4_t b"
     ],
     "return_type": {
-      "value": "poly8x8_t"
+      "value": "int32x4_t"
     },
     "Arguments_Preparation": {
       "a": {
-        "register": "Vn.16B"
+        "register": "Vn.4S"
+      },
+      "b": {
+        "register": "Vm.4S"
       }
     },
     "Architectures": [
@@ -33416,22 +230400,26 @@
     ],
     "instructions": [
       [
-        "DUP"
+        "SHSUB"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vget_low_s16",
+    "name": "vhsubq_s8",
     "arguments": [
-      "int16x8_t a"
+      "int8x16_t a",
+      "int8x16_t b"
     ],
     "return_type": {
-      "value": "int16x4_t"
+      "value": "int8x16_t"
     },
     "Arguments_Preparation": {
       "a": {
-        "register": "Vn.8H"
+        "register": "Vn.16B"
+      },
+      "b": {
+        "register": "Vm.16B"
       }
     },
     "Architectures": [
@@ -33441,22 +230429,26 @@
     ],
     "instructions": [
       [
-        "DUP"
+        "SHSUB"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vget_low_s32",
+    "name": "vhsubq_u16",
     "arguments": [
-      "int32x4_t a"
+      "uint16x8_t a",
+      "uint16x8_t b"
     ],
     "return_type": {
-      "value": "int32x2_t"
+      "value": "uint16x8_t"
     },
     "Arguments_Preparation": {
       "a": {
-        "register": "Vn.4S"
+        "register": "Vn.8H"
+      },
+      "b": {
+        "register": "Vm.8H"
       }
     },
     "Architectures": [
@@ -33466,22 +230458,26 @@
     ],
     "instructions": [
       [
-        "DUP"
+        "UHSUB"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vget_low_s64",
+    "name": "vhsubq_u32",
     "arguments": [
-      "int64x2_t a"
+      "uint32x4_t a",
+      "uint32x4_t b"
     ],
     "return_type": {
-      "value": "int64x1_t"
+      "value": "uint32x4_t"
     },
     "Arguments_Preparation": {
       "a": {
-        "register": "Vn.2D"
+        "register": "Vn.4S"
+      },
+      "b": {
+        "register": "Vm.4S"
       }
     },
     "Architectures": [
@@ -33491,22 +230487,26 @@
     ],
     "instructions": [
       [
-        "DUP"
+        "UHSUB"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vget_low_s8",
+    "name": "vhsubq_u8",
     "arguments": [
-      "int8x16_t a"
+      "uint8x16_t a",
+      "uint8x16_t b"
     ],
     "return_type": {
-      "value": "int8x8_t"
+      "value": "uint8x16_t"
     },
     "Arguments_Preparation": {
       "a": {
         "register": "Vn.16B"
+      },
+      "b": {
+        "register": "Vm.16B"
       }
     },
     "Architectures": [
@@ -33516,22 +230516,22 @@
     ],
     "instructions": [
       [
-        "DUP"
+        "UHSUB"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vget_low_u16",
+    "name": "vld1_dup_f16",
     "arguments": [
-      "uint16x8_t a"
+      "float16_t const * ptr"
     ],
     "return_type": {
-      "value": "uint16x4_t"
+      "value": "float16x4_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8H"
+      "ptr": {
+        "register": "Xn"
       }
     },
     "Architectures": [
@@ -33541,22 +230541,22 @@
     ],
     "instructions": [
       [
-        "DUP"
+        "LD1R"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vget_low_u32",
+    "name": "vld1_dup_f32",
     "arguments": [
-      "uint32x4_t a"
+      "float32_t const * ptr"
     ],
     "return_type": {
-      "value": "uint32x2_t"
+      "value": "float32x2_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4S"
+      "ptr": {
+        "register": "Xn"
       }
     },
     "Architectures": [
@@ -33566,47 +230566,45 @@
     ],
     "instructions": [
       [
-        "DUP"
+        "LD1R"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vget_low_u64",
+    "name": "vld1_dup_f64",
     "arguments": [
-      "uint64x2_t a"
+      "float64_t const * ptr"
     ],
     "return_type": {
-      "value": "uint64x1_t"
+      "value": "float64x1_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2D"
+      "ptr": {
+        "register": "Xn"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "DUP"
+        "LD1"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vget_low_u8",
+    "name": "vld1_dup_p16",
     "arguments": [
-      "uint8x16_t a"
+      "poly16_t const * ptr"
     ],
     "return_type": {
-      "value": "uint8x8_t"
+      "value": "poly16x4_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
+      "ptr": {
+        "register": "Xn"
       }
     },
     "Architectures": [
@@ -33616,57 +230614,46 @@
     ],
     "instructions": [
       [
-        "DUP"
+        "LD1R"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vgetq_lane_f16",
+    "name": "vld1_dup_p64",
     "arguments": [
-      "float16x8_t v",
-      "const int lane"
+      "poly64_t const * ptr"
     ],
     "return_type": {
-      "value": "float16_t"
+      "value": "poly64x1_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 7
-      },
-      "v": {
-        "register": "Vn.8H"
+      "ptr": {
+        "register": "Xn"
       }
     },
     "Architectures": [
-      "v7",
       "A32",
       "A64"
     ],
     "instructions": [
       [
-        "DUP"
+        "LD1"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vgetq_lane_f32",
+    "name": "vld1_dup_p8",
     "arguments": [
-      "float32x4_t v",
-      "const int lane"
+      "poly8_t const * ptr"
     ],
     "return_type": {
-      "value": "float32_t"
+      "value": "poly8x8_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 3
-      },
-      "v": {
-        "register": "Vn.4S"
+      "ptr": {
+        "register": "Xn"
       }
     },
     "Architectures": [
@@ -33676,55 +230663,47 @@
     ],
     "instructions": [
       [
-        "DUP"
+        "LD1R"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vgetq_lane_f64",
+    "name": "vld1_dup_s16",
     "arguments": [
-      "float64x2_t v",
-      "const int lane"
+      "int16_t const * ptr"
     ],
     "return_type": {
-      "value": "float64_t"
+      "value": "int16x4_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 1
-      },
-      "v": {
-        "register": "Vn.2D"
+      "ptr": {
+        "register": "Xn"
       }
     },
     "Architectures": [
+      "v7",
+      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "DUP"
+        "LD1R"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vgetq_lane_p16",
+    "name": "vld1_dup_s32",
     "arguments": [
-      "poly16x8_t v",
-      "const int lane"
+      "int32_t const * ptr"
     ],
     "return_type": {
-      "value": "poly16_t"
+      "value": "int32x2_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 7
-      },
-      "v": {
-        "register": "Vn.8H"
+      "ptr": {
+        "register": "Xn"
       }
     },
     "Architectures": [
@@ -33734,56 +230713,47 @@
     ],
     "instructions": [
       [
-        "UMOV"
+        "LD1R"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vgetq_lane_p64",
+    "name": "vld1_dup_s64",
     "arguments": [
-      "poly64x2_t v",
-      "const int lane"
+      "int64_t const * ptr"
     ],
     "return_type": {
-      "value": "poly64_t"
+      "value": "int64x1_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 1
-      },
-      "v": {
-        "register": "Vn.2D"
+      "ptr": {
+        "register": "Xn"
       }
     },
     "Architectures": [
+      "v7",
       "A32",
       "A64"
     ],
     "instructions": [
       [
-        "UMOV"
+        "LD1"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vgetq_lane_p8",
+    "name": "vld1_dup_s8",
     "arguments": [
-      "poly8x16_t v",
-      "const int lane"
+      "int8_t const * ptr"
     ],
     "return_type": {
-      "value": "poly8_t"
+      "value": "int8x8_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 15
-      },
-      "v": {
-        "register": "Vn.16B"
+      "ptr": {
+        "register": "Xn"
       }
     },
     "Architectures": [
@@ -33793,27 +230763,22 @@
     ],
     "instructions": [
       [
-        "UMOV"
+        "LD1R"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vgetq_lane_s16",
+    "name": "vld1_dup_u16",
     "arguments": [
-      "int16x8_t v",
-      "const int lane"
+      "uint16_t const * ptr"
     ],
     "return_type": {
-      "value": "int16_t"
+      "value": "uint16x4_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 7
-      },
-      "v": {
-        "register": "Vn.8H"
+      "ptr": {
+        "register": "Xn"
       }
     },
     "Architectures": [
@@ -33823,27 +230788,22 @@
     ],
     "instructions": [
       [
-        "SMOV"
+        "LD1R"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vgetq_lane_s32",
+    "name": "vld1_dup_u32",
     "arguments": [
-      "int32x4_t v",
-      "const int lane"
+      "uint32_t const * ptr"
     ],
     "return_type": {
-      "value": "int32_t"
+      "value": "uint32x2_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 3
-      },
-      "v": {
-        "register": "Vn.4S"
+      "ptr": {
+        "register": "Xn"
       }
     },
     "Architectures": [
@@ -33853,27 +230813,22 @@
     ],
     "instructions": [
       [
-        "SMOV"
+        "LD1R"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vgetq_lane_s64",
+    "name": "vld1_dup_u64",
     "arguments": [
-      "int64x2_t v",
-      "const int lane"
+      "uint64_t const * ptr"
     ],
     "return_type": {
-      "value": "int64_t"
+      "value": "uint64x1_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 1
-      },
-      "v": {
-        "register": "Vn.2D"
+      "ptr": {
+        "register": "Xn"
       }
     },
     "Architectures": [
@@ -33883,27 +230838,22 @@
     ],
     "instructions": [
       [
-        "UMOV"
+        "LD1"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vgetq_lane_s8",
+    "name": "vld1_dup_u8",
     "arguments": [
-      "int8x16_t v",
-      "const int lane"
+      "uint8_t const * ptr"
     ],
     "return_type": {
-      "value": "int8_t"
+      "value": "uint8x8_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 15
-      },
-      "v": {
-        "register": "Vn.16B"
+      "ptr": {
+        "register": "Xn"
       }
     },
     "Architectures": [
@@ -33913,27 +230863,22 @@
     ],
     "instructions": [
       [
-        "SMOV"
+        "LD1R"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vgetq_lane_u16",
+    "name": "vld1_f16",
     "arguments": [
-      "uint16x8_t v",
-      "const int lane"
+      "float16_t const * ptr"
     ],
     "return_type": {
-      "value": "uint16_t"
+      "value": "float16x4_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 7
-      },
-      "v": {
-        "register": "Vn.8H"
+      "ptr": {
+        "register": "Xn"
       }
     },
     "Architectures": [
@@ -33943,27 +230888,22 @@
     ],
     "instructions": [
       [
-        "UMOV"
+        "LD1"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vgetq_lane_u32",
+    "name": "vld1_f16_x2",
     "arguments": [
-      "uint32x4_t v",
-      "const int lane"
+      "float16_t const * ptr"
     ],
     "return_type": {
-      "value": "uint32_t"
+      "value": "float16x4x2_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 3
-      },
-      "v": {
-        "register": "Vn.4S"
+      "ptr": {
+        "register": "Xn"
       }
     },
     "Architectures": [
@@ -33973,27 +230913,22 @@
     ],
     "instructions": [
       [
-        "UMOV"
+        "LD1"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vgetq_lane_u64",
+    "name": "vld1_f16_x3",
     "arguments": [
-      "uint64x2_t v",
-      "const int lane"
+      "float16_t const * ptr"
     ],
     "return_type": {
-      "value": "uint64_t"
+      "value": "float16x4x3_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 1
-      },
-      "v": {
-        "register": "Vn.2D"
+      "ptr": {
+        "register": "Xn"
       }
     },
     "Architectures": [
@@ -34003,27 +230938,22 @@
     ],
     "instructions": [
       [
-        "UMOV"
+        "LD1"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vgetq_lane_u8",
+    "name": "vld1_f16_x4",
     "arguments": [
-      "uint8x16_t v",
-      "const int lane"
+      "float16_t const * ptr"
     ],
     "return_type": {
-      "value": "uint8_t"
+      "value": "float16x4x4_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 15
-      },
-      "v": {
-        "register": "Vn.16B"
+      "ptr": {
+        "register": "Xn"
       }
     },
     "Architectures": [
@@ -34033,26 +230963,22 @@
     ],
     "instructions": [
       [
-        "UMOV"
+        "LD1"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vhadd_s16",
+    "name": "vld1_f32",
     "arguments": [
-      "int16x4_t a",
-      "int16x4_t b"
+      "float32_t const * ptr"
     ],
     "return_type": {
-      "value": "int16x4_t"
+      "value": "float32x2_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4H"
-      },
-      "b": {
-        "register": "Vm.4H"
+      "ptr": {
+        "register": "Xn"
       }
     },
     "Architectures": [
@@ -34062,26 +230988,22 @@
     ],
     "instructions": [
       [
-        "SHADD"
+        "LD1"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vhadd_s32",
+    "name": "vld1_f32_x2",
     "arguments": [
-      "int32x2_t a",
-      "int32x2_t b"
+      "float32_t const * ptr"
     ],
     "return_type": {
-      "value": "int32x2_t"
+      "value": "float32x2x2_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2S"
-      },
-      "b": {
-        "register": "Vm.2S"
+      "ptr": {
+        "register": "Xn"
       }
     },
     "Architectures": [
@@ -34091,26 +231013,22 @@
     ],
     "instructions": [
       [
-        "SHADD"
+        "LD1"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vhadd_s8",
+    "name": "vld1_f32_x3",
     "arguments": [
-      "int8x8_t a",
-      "int8x8_t b"
+      "float32_t const * ptr"
     ],
     "return_type": {
-      "value": "int8x8_t"
+      "value": "float32x2x3_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8B"
-      },
-      "b": {
-        "register": "Vm.8B"
+      "ptr": {
+        "register": "Xn"
       }
     },
     "Architectures": [
@@ -34120,26 +231038,22 @@
     ],
     "instructions": [
       [
-        "SHADD"
+        "LD1"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vhadd_u16",
+    "name": "vld1_f32_x4",
     "arguments": [
-      "uint16x4_t a",
-      "uint16x4_t b"
+      "float32_t const * ptr"
     ],
     "return_type": {
-      "value": "uint16x4_t"
+      "value": "float32x2x4_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4H"
-      },
-      "b": {
-        "register": "Vm.4H"
+      "ptr": {
+        "register": "Xn"
       }
     },
     "Architectures": [
@@ -34149,142 +231063,123 @@
     ],
     "instructions": [
       [
-        "UHADD"
+        "LD1"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vhadd_u32",
+    "name": "vld1_f64",
     "arguments": [
-      "uint32x2_t a",
-      "uint32x2_t b"
+      "float64_t const * ptr"
     ],
     "return_type": {
-      "value": "uint32x2_t"
+      "value": "float64x1_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2S"
-      },
-      "b": {
-        "register": "Vm.2S"
+      "ptr": {
+        "register": "Xn"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "UHADD"
+        "LD1"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vhadd_u8",
+    "name": "vld1_f64_x2",
     "arguments": [
-      "uint8x8_t a",
-      "uint8x8_t b"
+      "float64_t const * ptr"
     ],
     "return_type": {
-      "value": "uint8x8_t"
+      "value": "float64x1x2_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8B"
-      },
-      "b": {
-        "register": "Vm.8B"
+      "ptr": {
+        "register": "Xn"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "UHADD"
+        "LD1"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vhaddq_s16",
+    "name": "vld1_f64_x3",
     "arguments": [
-      "int16x8_t a",
-      "int16x8_t b"
+      "float64_t const * ptr"
     ],
     "return_type": {
-      "value": "int16x8_t"
+      "value": "float64x1x3_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8H"
-      },
-      "b": {
-        "register": "Vm.8H"
+      "ptr": {
+        "register": "Xn"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "SHADD"
+        "LD1"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vhaddq_s32",
+    "name": "vld1_f64_x4",
     "arguments": [
-      "int32x4_t a",
-      "int32x4_t b"
+      "float64_t const * ptr"
     ],
     "return_type": {
-      "value": "int32x4_t"
+      "value": "float64x1x4_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4S"
-      },
-      "b": {
-        "register": "Vm.4S"
+      "ptr": {
+        "register": "Xn"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "SHADD"
+        "LD1"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vhaddq_s8",
+    "name": "vld1_lane_f16",
     "arguments": [
-      "int8x16_t a",
-      "int8x16_t b"
+      "float16_t const * ptr",
+      "float16x4_t src",
+      "const int lane"
     ],
     "return_type": {
-      "value": "int8x16_t"
+      "value": "float16x4_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
+      "lane": {
+        "minimum": 0,
+        "maximum": 3
       },
-      "b": {
-        "register": "Vm.16B"
+      "ptr": {
+        "register": "Xn"
+      },
+      "src": {
+        "register": "Vt.4H"
       }
     },
     "Architectures": [
@@ -34294,26 +231189,31 @@
     ],
     "instructions": [
       [
-        "SHADD"
+        "LD1"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vhaddq_u16",
+    "name": "vld1_lane_f32",
     "arguments": [
-      "uint16x8_t a",
-      "uint16x8_t b"
+      "float32_t const * ptr",
+      "float32x2_t src",
+      "const int lane"
     ],
     "return_type": {
-      "value": "uint16x8_t"
+      "value": "float32x2_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8H"
+      "lane": {
+        "minimum": 0,
+        "maximum": 1
       },
-      "b": {
-        "register": "Vm.8H"
+      "ptr": {
+        "register": "Xn"
+      },
+      "src": {
+        "register": "Vt.2S"
       }
     },
     "Architectures": [
@@ -34323,55 +231223,63 @@
     ],
     "instructions": [
       [
-        "UHADD"
+        "LD1"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vhaddq_u32",
+    "name": "vld1_lane_f64",
     "arguments": [
-      "uint32x4_t a",
-      "uint32x4_t b"
+      "float64_t const * ptr",
+      "float64x1_t src",
+      "const int lane"
     ],
     "return_type": {
-      "value": "uint32x4_t"
+      "value": "float64x1_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4S"
+      "lane": {
+        "minimum": 0,
+        "maximum": 0
       },
-      "b": {
-        "register": "Vm.4S"
+      "ptr": {
+        "register": "Xn"
+      },
+      "src": {
+        "register": "Vt.1D"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "UHADD"
+        "LD1"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vhaddq_u8",
+    "name": "vld1_lane_p16",
     "arguments": [
-      "uint8x16_t a",
-      "uint8x16_t b"
+      "poly16_t const * ptr",
+      "poly16x4_t src",
+      "const int lane"
     ],
     "return_type": {
-      "value": "uint8x16_t"
+      "value": "poly16x4_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
+      "lane": {
+        "minimum": 0,
+        "maximum": 3
       },
-      "b": {
-        "register": "Vm.16B"
+      "ptr": {
+        "register": "Xn"
+      },
+      "src": {
+        "register": "Vt.4H"
       }
     },
     "Architectures": [
@@ -34381,55 +231289,64 @@
     ],
     "instructions": [
       [
-        "UHADD"
+        "LD1"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vhsub_s16",
+    "name": "vld1_lane_p64",
     "arguments": [
-      "int16x4_t a",
-      "int16x4_t b"
+      "poly64_t const * ptr",
+      "poly64x1_t src",
+      "const int lane"
     ],
     "return_type": {
-      "value": "int16x4_t"
+      "value": "poly64x1_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4H"
+      "lane": {
+        "minimum": 0,
+        "maximum": 0
       },
-      "b": {
-        "register": "Vm.4H"
+      "ptr": {
+        "register": "Xn"
+      },
+      "src": {
+        "register": "Vt.1D"
       }
     },
     "Architectures": [
-      "v7",
       "A32",
       "A64"
     ],
     "instructions": [
       [
-        "SHSUB"
+        "LD1"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vhsub_s32",
+    "name": "vld1_lane_p8",
     "arguments": [
-      "int32x2_t a",
-      "int32x2_t b"
+      "poly8_t const * ptr",
+      "poly8x8_t src",
+      "const int lane"
     ],
     "return_type": {
-      "value": "int32x2_t"
+      "value": "poly8x8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2S"
+      "lane": {
+        "minimum": 0,
+        "maximum": 7
       },
-      "b": {
-        "register": "Vm.2S"
+      "ptr": {
+        "register": "Xn"
+      },
+      "src": {
+        "register": "Vt.8B"
       }
     },
     "Architectures": [
@@ -34439,26 +231356,31 @@
     ],
     "instructions": [
       [
-        "SHSUB"
+        "LD1"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vhsub_s8",
+    "name": "vld1_lane_s16",
     "arguments": [
-      "int8x8_t a",
-      "int8x8_t b"
+      "int16_t const * ptr",
+      "int16x4_t src",
+      "const int lane"
     ],
     "return_type": {
-      "value": "int8x8_t"
+      "value": "int16x4_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8B"
+      "lane": {
+        "minimum": 0,
+        "maximum": 3
       },
-      "b": {
-        "register": "Vm.8B"
+      "ptr": {
+        "register": "Xn"
+      },
+      "src": {
+        "register": "Vt.4H"
       }
     },
     "Architectures": [
@@ -34468,26 +231390,31 @@
     ],
     "instructions": [
       [
-        "SHSUB"
+        "LD1"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vhsub_u16",
+    "name": "vld1_lane_s32",
     "arguments": [
-      "uint16x4_t a",
-      "uint16x4_t b"
+      "int32_t const * ptr",
+      "int32x2_t src",
+      "const int lane"
     ],
     "return_type": {
-      "value": "uint16x4_t"
+      "value": "int32x2_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4H"
+      "lane": {
+        "minimum": 0,
+        "maximum": 1
       },
-      "b": {
-        "register": "Vm.4H"
+      "ptr": {
+        "register": "Xn"
+      },
+      "src": {
+        "register": "Vt.2S"
       }
     },
     "Architectures": [
@@ -34497,26 +231424,31 @@
     ],
     "instructions": [
       [
-        "UHSUB"
+        "LD1"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vhsub_u32",
+    "name": "vld1_lane_s64",
     "arguments": [
-      "uint32x2_t a",
-      "uint32x2_t b"
+      "int64_t const * ptr",
+      "int64x1_t src",
+      "const int lane"
     ],
     "return_type": {
-      "value": "uint32x2_t"
+      "value": "int64x1_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2S"
+      "lane": {
+        "minimum": 0,
+        "maximum": 0
       },
-      "b": {
-        "register": "Vm.2S"
+      "ptr": {
+        "register": "Xn"
+      },
+      "src": {
+        "register": "Vt.1D"
       }
     },
     "Architectures": [
@@ -34526,26 +231458,31 @@
     ],
     "instructions": [
       [
-        "UHSUB"
+        "LD1"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vhsub_u8",
+    "name": "vld1_lane_s8",
     "arguments": [
-      "uint8x8_t a",
-      "uint8x8_t b"
+      "int8_t const * ptr",
+      "int8x8_t src",
+      "const int lane"
     ],
     "return_type": {
-      "value": "uint8x8_t"
+      "value": "int8x8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8B"
+      "lane": {
+        "minimum": 0,
+        "maximum": 7
       },
-      "b": {
-        "register": "Vm.8B"
+      "ptr": {
+        "register": "Xn"
+      },
+      "src": {
+        "register": "Vt.8B"
       }
     },
     "Architectures": [
@@ -34555,26 +231492,31 @@
     ],
     "instructions": [
       [
-        "UHSUB"
+        "LD1"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vhsubq_s16",
+    "name": "vld1_lane_u16",
     "arguments": [
-      "int16x8_t a",
-      "int16x8_t b"
+      "uint16_t const * ptr",
+      "uint16x4_t src",
+      "const int lane"
     ],
     "return_type": {
-      "value": "int16x8_t"
+      "value": "uint16x4_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8H"
+      "lane": {
+        "minimum": 0,
+        "maximum": 3
       },
-      "b": {
-        "register": "Vm.8H"
+      "ptr": {
+        "register": "Xn"
+      },
+      "src": {
+        "register": "Vt.4H"
       }
     },
     "Architectures": [
@@ -34584,26 +231526,31 @@
     ],
     "instructions": [
       [
-        "SHSUB"
+        "LD1"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vhsubq_s32",
+    "name": "vld1_lane_u32",
     "arguments": [
-      "int32x4_t a",
-      "int32x4_t b"
+      "uint32_t const * ptr",
+      "uint32x2_t src",
+      "const int lane"
     ],
     "return_type": {
-      "value": "int32x4_t"
+      "value": "uint32x2_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4S"
+      "lane": {
+        "minimum": 0,
+        "maximum": 1
       },
-      "b": {
-        "register": "Vm.4S"
+      "ptr": {
+        "register": "Xn"
+      },
+      "src": {
+        "register": "Vt.2S"
       }
     },
     "Architectures": [
@@ -34613,26 +231560,31 @@
     ],
     "instructions": [
       [
-        "SHSUB"
+        "LD1"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vhsubq_s8",
+    "name": "vld1_lane_u64",
     "arguments": [
-      "int8x16_t a",
-      "int8x16_t b"
+      "uint64_t const * ptr",
+      "uint64x1_t src",
+      "const int lane"
     ],
     "return_type": {
-      "value": "int8x16_t"
+      "value": "uint64x1_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
+      "lane": {
+        "minimum": 0,
+        "maximum": 0
       },
-      "b": {
-        "register": "Vm.16B"
+      "ptr": {
+        "register": "Xn"
+      },
+      "src": {
+        "register": "Vt.1D"
       }
     },
     "Architectures": [
@@ -34642,26 +231594,31 @@
     ],
     "instructions": [
       [
-        "SHSUB"
+        "LD1"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vhsubq_u16",
+    "name": "vld1_lane_u8",
     "arguments": [
-      "uint16x8_t a",
-      "uint16x8_t b"
+      "uint8_t const * ptr",
+      "uint8x8_t src",
+      "const int lane"
     ],
     "return_type": {
-      "value": "uint16x8_t"
+      "value": "uint8x8_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8H"
+      "lane": {
+        "minimum": 0,
+        "maximum": 7
       },
-      "b": {
-        "register": "Vm.8H"
+      "ptr": {
+        "register": "Xn"
+      },
+      "src": {
+        "register": "Vt.8B"
       }
     },
     "Architectures": [
@@ -34671,26 +231628,22 @@
     ],
     "instructions": [
       [
-        "UHSUB"
+        "LD1"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vhsubq_u32",
+    "name": "vld1_p16",
     "arguments": [
-      "uint32x4_t a",
-      "uint32x4_t b"
+      "poly16_t const * ptr"
     ],
     "return_type": {
-      "value": "uint32x4_t"
+      "value": "poly16x4_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4S"
-      },
-      "b": {
-        "register": "Vm.4S"
+      "ptr": {
+        "register": "Xn"
       }
     },
     "Architectures": [
@@ -34700,26 +231653,22 @@
     ],
     "instructions": [
       [
-        "UHSUB"
+        "LD1"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vhsubq_u8",
+    "name": "vld1_p16_x2",
     "arguments": [
-      "uint8x16_t a",
-      "uint8x16_t b"
+      "poly16_t const * ptr"
     ],
     "return_type": {
-      "value": "uint8x16_t"
+      "value": "poly16x4x2_t"
     },
     "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
-      },
-      "b": {
-        "register": "Vm.16B"
+      "ptr": {
+        "register": "Xn"
       }
     },
     "Architectures": [
@@ -34729,18 +231678,18 @@
     ],
     "instructions": [
       [
-        "UHSUB"
+        "LD1"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1_dup_f16",
+    "name": "vld1_p16_x3",
     "arguments": [
-      "float16_t const * ptr"
+      "poly16_t const * ptr"
     ],
     "return_type": {
-      "value": "float16x4_t"
+      "value": "poly16x4x3_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -34754,18 +231703,18 @@
     ],
     "instructions": [
       [
-        "LD1R"
+        "LD1"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1_dup_f32",
+    "name": "vld1_p16_x4",
     "arguments": [
-      "float32_t const * ptr"
+      "poly16_t const * ptr"
     ],
     "return_type": {
-      "value": "float32x2_t"
+      "value": "poly16x4x4_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -34779,18 +231728,18 @@
     ],
     "instructions": [
       [
-        "LD1R"
+        "LD1"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1_dup_f64",
+    "name": "vld1_p64",
     "arguments": [
-      "float64_t const * ptr"
+      "poly64_t const * ptr"
     ],
     "return_type": {
-      "value": "float64x1_t"
+      "value": "poly64x1_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -34798,6 +231747,7 @@
       }
     },
     "Architectures": [
+      "A32",
       "A64"
     ],
     "instructions": [
@@ -34808,12 +231758,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1_dup_p16",
+    "name": "vld1_p64_x2",
     "arguments": [
-      "poly16_t const * ptr"
+      "poly64_t const * ptr"
     ],
     "return_type": {
-      "value": "poly16x4_t"
+      "value": "poly64x1x2_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -34821,24 +231771,23 @@
       }
     },
     "Architectures": [
-      "v7",
       "A32",
       "A64"
     ],
     "instructions": [
       [
-        "LD1R"
+        "LD1"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1_dup_p64",
+    "name": "vld1_p64_x3",
     "arguments": [
       "poly64_t const * ptr"
     ],
     "return_type": {
-      "value": "poly64x1_t"
+      "value": "poly64x1x3_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -34857,12 +231806,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1_dup_p8",
+    "name": "vld1_p64_x4",
     "arguments": [
-      "poly8_t const * ptr"
+      "poly64_t const * ptr"
     ],
     "return_type": {
-      "value": "poly8x8_t"
+      "value": "poly64x1x4_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -34870,24 +231819,23 @@
       }
     },
     "Architectures": [
-      "v7",
       "A32",
       "A64"
     ],
     "instructions": [
       [
-        "LD1R"
+        "LD1"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1_dup_s16",
+    "name": "vld1_p8",
     "arguments": [
-      "int16_t const * ptr"
+      "poly8_t const * ptr"
     ],
     "return_type": {
-      "value": "int16x4_t"
+      "value": "poly8x8_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -34901,18 +231849,18 @@
     ],
     "instructions": [
       [
-        "LD1R"
+        "LD1"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1_dup_s32",
+    "name": "vld1_p8_x2",
     "arguments": [
-      "int32_t const * ptr"
+      "poly8_t const * ptr"
     ],
     "return_type": {
-      "value": "int32x2_t"
+      "value": "poly8x8x2_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -34926,18 +231874,18 @@
     ],
     "instructions": [
       [
-        "LD1R"
+        "LD1"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1_dup_s64",
+    "name": "vld1_p8_x3",
     "arguments": [
-      "int64_t const * ptr"
+      "poly8_t const * ptr"
     ],
     "return_type": {
-      "value": "int64x1_t"
+      "value": "poly8x8x3_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -34957,12 +231905,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1_dup_s8",
+    "name": "vld1_p8_x4",
     "arguments": [
-      "int8_t const * ptr"
+      "poly8_t const * ptr"
     ],
     "return_type": {
-      "value": "int8x8_t"
+      "value": "poly8x8x4_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -34976,18 +231924,18 @@
     ],
     "instructions": [
       [
-        "LD1R"
+        "LD1"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1_dup_u16",
+    "name": "vld1_s16",
     "arguments": [
-      "uint16_t const * ptr"
+      "int16_t const * ptr"
     ],
     "return_type": {
-      "value": "uint16x4_t"
+      "value": "int16x4_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -35001,18 +231949,18 @@
     ],
     "instructions": [
       [
-        "LD1R"
+        "LD1"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1_dup_u32",
+    "name": "vld1_s16_x2",
     "arguments": [
-      "uint32_t const * ptr"
+      "int16_t const * ptr"
     ],
     "return_type": {
-      "value": "uint32x2_t"
+      "value": "int16x4x2_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -35026,18 +231974,18 @@
     ],
     "instructions": [
       [
-        "LD1R"
+        "LD1"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1_dup_u64",
+    "name": "vld1_s16_x3",
     "arguments": [
-      "uint64_t const * ptr"
+      "int16_t const * ptr"
     ],
     "return_type": {
-      "value": "uint64x1_t"
+      "value": "int16x4x3_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -35057,12 +232005,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1_dup_u8",
+    "name": "vld1_s16_x4",
     "arguments": [
-      "uint8_t const * ptr"
+      "int16_t const * ptr"
     ],
     "return_type": {
-      "value": "uint8x8_t"
+      "value": "int16x4x4_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -35076,18 +232024,18 @@
     ],
     "instructions": [
       [
-        "LD1R"
+        "LD1"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1_f16",
+    "name": "vld1_s32",
     "arguments": [
-      "float16_t const * ptr"
+      "int32_t const * ptr"
     ],
     "return_type": {
-      "value": "float16x4_t"
+      "value": "int32x2_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -35107,12 +232055,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1_f16_x2",
+    "name": "vld1_s32_x2",
     "arguments": [
-      "float16_t const * ptr"
+      "int32_t const * ptr"
     ],
     "return_type": {
-      "value": "float16x4x2_t"
+      "value": "int32x2x2_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -35132,12 +232080,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1_f16_x3",
+    "name": "vld1_s32_x3",
     "arguments": [
-      "float16_t const * ptr"
+      "int32_t const * ptr"
     ],
     "return_type": {
-      "value": "float16x4x3_t"
+      "value": "int32x2x3_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -35157,12 +232105,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1_f16_x4",
+    "name": "vld1_s32_x4",
     "arguments": [
-      "float16_t const * ptr"
+      "int32_t const * ptr"
     ],
     "return_type": {
-      "value": "float16x4x4_t"
+      "value": "int32x2x4_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -35182,12 +232130,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1_f32",
+    "name": "vld1_s64",
     "arguments": [
-      "float32_t const * ptr"
+      "int64_t const * ptr"
     ],
     "return_type": {
-      "value": "float32x2_t"
+      "value": "int64x1_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -35207,12 +232155,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1_f32_x2",
+    "name": "vld1_s64_x2",
     "arguments": [
-      "float32_t const * ptr"
+      "int64_t const * ptr"
     ],
     "return_type": {
-      "value": "float32x2x2_t"
+      "value": "int64x1x2_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -35232,12 +232180,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1_f32_x3",
+    "name": "vld1_s64_x3",
     "arguments": [
-      "float32_t const * ptr"
+      "int64_t const * ptr"
     ],
     "return_type": {
-      "value": "float32x2x3_t"
+      "value": "int64x1x3_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -35257,12 +232205,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1_f32_x4",
+    "name": "vld1_s64_x4",
     "arguments": [
-      "float32_t const * ptr"
+      "int64_t const * ptr"
     ],
     "return_type": {
-      "value": "float32x2x4_t"
+      "value": "int64x1x4_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -35282,12 +232230,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1_f64",
+    "name": "vld1_s8",
     "arguments": [
-      "float64_t const * ptr"
+      "int8_t const * ptr"
     ],
     "return_type": {
-      "value": "float64x1_t"
+      "value": "int8x8_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -35295,6 +232243,8 @@
       }
     },
     "Architectures": [
+      "v7",
+      "A32",
       "A64"
     ],
     "instructions": [
@@ -35305,12 +232255,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1_f64_x2",
+    "name": "vld1_s8_x2",
     "arguments": [
-      "float64_t const * ptr"
+      "int8_t const * ptr"
     ],
     "return_type": {
-      "value": "float64x1x2_t"
+      "value": "int8x8x2_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -35318,6 +232268,8 @@
       }
     },
     "Architectures": [
+      "v7",
+      "A32",
       "A64"
     ],
     "instructions": [
@@ -35328,12 +232280,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1_f64_x3",
+    "name": "vld1_s8_x3",
     "arguments": [
-      "float64_t const * ptr"
+      "int8_t const * ptr"
     ],
     "return_type": {
-      "value": "float64x1x3_t"
+      "value": "int8x8x3_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -35341,6 +232293,8 @@
       }
     },
     "Architectures": [
+      "v7",
+      "A32",
       "A64"
     ],
     "instructions": [
@@ -35351,12 +232305,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1_f64_x4",
+    "name": "vld1_s8_x4",
     "arguments": [
-      "float64_t const * ptr"
+      "int8_t const * ptr"
     ],
     "return_type": {
-      "value": "float64x1x4_t"
+      "value": "int8x8x4_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -35364,6 +232318,8 @@
       }
     },
     "Architectures": [
+      "v7",
+      "A32",
       "A64"
     ],
     "instructions": [
@@ -35374,25 +232330,16 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1_lane_f16",
+    "name": "vld1_u16",
     "arguments": [
-      "float16_t const * ptr",
-      "float16x4_t src",
-      "const int lane"
+      "uint16_t const * ptr"
     ],
     "return_type": {
-      "value": "float16x4_t"
+      "value": "uint16x4_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 3
-      },
       "ptr": {
         "register": "Xn"
-      },
-      "src": {
-        "register": "Vt.4H"
       }
     },
     "Architectures": [
@@ -35408,25 +232355,16 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1_lane_f32",
+    "name": "vld1_u16_x2",
     "arguments": [
-      "float32_t const * ptr",
-      "float32x2_t src",
-      "const int lane"
+      "uint16_t const * ptr"
     ],
     "return_type": {
-      "value": "float32x2_t"
+      "value": "uint16x4x2_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 1
-      },
       "ptr": {
         "register": "Xn"
-      },
-      "src": {
-        "register": "Vt.2S"
       }
     },
     "Architectures": [
@@ -35442,28 +232380,21 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1_lane_f64",
+    "name": "vld1_u16_x3",
     "arguments": [
-      "float64_t const * ptr",
-      "float64x1_t src",
-      "const int lane"
+      "uint16_t const * ptr"
     ],
     "return_type": {
-      "value": "float64x1_t"
+      "value": "uint16x4x3_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 0
-      },
       "ptr": {
         "register": "Xn"
-      },
-      "src": {
-        "register": "Vt.1D"
       }
     },
     "Architectures": [
+      "v7",
+      "A32",
       "A64"
     ],
     "instructions": [
@@ -35474,25 +232405,16 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1_lane_p16",
+    "name": "vld1_u16_x4",
     "arguments": [
-      "poly16_t const * ptr",
-      "poly16x4_t src",
-      "const int lane"
+      "uint16_t const * ptr"
     ],
     "return_type": {
-      "value": "poly16x4_t"
+      "value": "uint16x4x4_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 3
-      },
       "ptr": {
         "register": "Xn"
-      },
-      "src": {
-        "register": "Vt.4H"
       }
     },
     "Architectures": [
@@ -35508,28 +232430,20 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1_lane_p64",
+    "name": "vld1_u32",
     "arguments": [
-      "poly64_t const * ptr",
-      "poly64x1_t src",
-      "const int lane"
+      "uint32_t const * ptr"
     ],
     "return_type": {
-      "value": "poly64x1_t"
+      "value": "uint32x2_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 0
-      },
       "ptr": {
         "register": "Xn"
-      },
-      "src": {
-        "register": "Vt.1D"
       }
     },
     "Architectures": [
+      "v7",
       "A32",
       "A64"
     ],
@@ -35541,25 +232455,16 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1_lane_p8",
+    "name": "vld1_u32_x2",
     "arguments": [
-      "poly8_t const * ptr",
-      "poly8x8_t src",
-      "const int lane"
+      "uint32_t const * ptr"
     ],
     "return_type": {
-      "value": "poly8x8_t"
+      "value": "uint32x2x2_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 7
-      },
       "ptr": {
         "register": "Xn"
-      },
-      "src": {
-        "register": "Vt.8B"
       }
     },
     "Architectures": [
@@ -35575,25 +232480,16 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1_lane_s16",
+    "name": "vld1_u32_x3",
     "arguments": [
-      "int16_t const * ptr",
-      "int16x4_t src",
-      "const int lane"
+      "uint32_t const * ptr"
     ],
     "return_type": {
-      "value": "int16x4_t"
+      "value": "uint32x2x3_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 3
-      },
       "ptr": {
         "register": "Xn"
-      },
-      "src": {
-        "register": "Vt.4H"
       }
     },
     "Architectures": [
@@ -35609,25 +232505,16 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1_lane_s32",
+    "name": "vld1_u32_x4",
     "arguments": [
-      "int32_t const * ptr",
-      "int32x2_t src",
-      "const int lane"
+      "uint32_t const * ptr"
     ],
     "return_type": {
-      "value": "int32x2_t"
+      "value": "uint32x2x4_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 1
-      },
       "ptr": {
         "register": "Xn"
-      },
-      "src": {
-        "register": "Vt.2S"
       }
     },
     "Architectures": [
@@ -35643,25 +232530,16 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1_lane_s64",
+    "name": "vld1_u64",
     "arguments": [
-      "int64_t const * ptr",
-      "int64x1_t src",
-      "const int lane"
+      "uint64_t const * ptr"
     ],
     "return_type": {
-      "value": "int64x1_t"
+      "value": "uint64x1_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 0
-      },
       "ptr": {
         "register": "Xn"
-      },
-      "src": {
-        "register": "Vt.1D"
       }
     },
     "Architectures": [
@@ -35677,25 +232555,16 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1_lane_s8",
+    "name": "vld1_u64_x2",
     "arguments": [
-      "int8_t const * ptr",
-      "int8x8_t src",
-      "const int lane"
+      "uint64_t const * ptr"
     ],
     "return_type": {
-      "value": "int8x8_t"
+      "value": "uint64x1x2_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 7
-      },
       "ptr": {
         "register": "Xn"
-      },
-      "src": {
-        "register": "Vt.8B"
       }
     },
     "Architectures": [
@@ -35711,25 +232580,16 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1_lane_u16",
+    "name": "vld1_u64_x3",
     "arguments": [
-      "uint16_t const * ptr",
-      "uint16x4_t src",
-      "const int lane"
+      "uint64_t const * ptr"
     ],
     "return_type": {
-      "value": "uint16x4_t"
+      "value": "uint64x1x3_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 3
-      },
       "ptr": {
         "register": "Xn"
-      },
-      "src": {
-        "register": "Vt.4H"
       }
     },
     "Architectures": [
@@ -35745,25 +232605,16 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1_lane_u32",
+    "name": "vld1_u64_x4",
     "arguments": [
-      "uint32_t const * ptr",
-      "uint32x2_t src",
-      "const int lane"
+      "uint64_t const * ptr"
     ],
     "return_type": {
-      "value": "uint32x2_t"
+      "value": "uint64x1x4_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 1
-      },
       "ptr": {
         "register": "Xn"
-      },
-      "src": {
-        "register": "Vt.2S"
       }
     },
     "Architectures": [
@@ -35779,25 +232630,16 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1_lane_u64",
+    "name": "vld1_u8",
     "arguments": [
-      "uint64_t const * ptr",
-      "uint64x1_t src",
-      "const int lane"
+      "uint8_t const * ptr"
     ],
     "return_type": {
-      "value": "uint64x1_t"
+      "value": "uint8x8_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 0
-      },
       "ptr": {
         "register": "Xn"
-      },
-      "src": {
-        "register": "Vt.1D"
       }
     },
     "Architectures": [
@@ -35813,25 +232655,16 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1_lane_u8",
+    "name": "vld1_u8_x2",
     "arguments": [
-      "uint8_t const * ptr",
-      "uint8x8_t src",
-      "const int lane"
+      "uint8_t const * ptr"
     ],
     "return_type": {
-      "value": "uint8x8_t"
+      "value": "uint8x8x2_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 7
-      },
       "ptr": {
         "register": "Xn"
-      },
-      "src": {
-        "register": "Vt.8B"
       }
     },
     "Architectures": [
@@ -35847,12 +232680,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1_p16",
+    "name": "vld1_u8_x3",
     "arguments": [
-      "poly16_t const * ptr"
+      "uint8_t const * ptr"
     ],
     "return_type": {
-      "value": "poly16x4_t"
+      "value": "uint8x8x3_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -35872,12 +232705,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1_p16_x2",
+    "name": "vld1_u8_x4",
     "arguments": [
-      "poly16_t const * ptr"
+      "uint8_t const * ptr"
     ],
     "return_type": {
-      "value": "poly16x4x2_t"
+      "value": "uint8x8x4_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -35897,12 +232730,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1_p16_x3",
+    "name": "vld1q_dup_f16",
     "arguments": [
-      "poly16_t const * ptr"
+      "float16_t const * ptr"
     ],
     "return_type": {
-      "value": "poly16x4x3_t"
+      "value": "float16x8_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -35916,18 +232749,18 @@
     ],
     "instructions": [
       [
-        "LD1"
+        "LD1R"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1_p16_x4",
+    "name": "vld1q_dup_f32",
     "arguments": [
-      "poly16_t const * ptr"
+      "float32_t const * ptr"
     ],
     "return_type": {
-      "value": "poly16x4x4_t"
+      "value": "float32x4_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -35941,18 +232774,18 @@
     ],
     "instructions": [
       [
-        "LD1"
+        "LD1R"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1_p64",
+    "name": "vld1q_dup_f64",
     "arguments": [
-      "poly64_t const * ptr"
+      "float64_t const * ptr"
     ],
     "return_type": {
-      "value": "poly64x1_t"
+      "value": "float64x2_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -35960,23 +232793,22 @@
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "LD1"
+        "LD1R"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1_p64_x2",
+    "name": "vld1q_dup_p16",
     "arguments": [
-      "poly64_t const * ptr"
+      "poly16_t const * ptr"
     ],
     "return_type": {
-      "value": "poly64x1x2_t"
+      "value": "poly16x8_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -35984,23 +232816,24 @@
       }
     },
     "Architectures": [
+      "v7",
       "A32",
       "A64"
     ],
     "instructions": [
       [
-        "LD1"
+        "LD1R"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1_p64_x3",
+    "name": "vld1q_dup_p64",
     "arguments": [
       "poly64_t const * ptr"
     ],
     "return_type": {
-      "value": "poly64x1x3_t"
+      "value": "poly64x2_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -36013,18 +232846,18 @@
     ],
     "instructions": [
       [
-        "LD1"
+        "LD1R"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1_p64_x4",
+    "name": "vld1q_dup_p8",
     "arguments": [
-      "poly64_t const * ptr"
+      "poly8_t const * ptr"
     ],
     "return_type": {
-      "value": "poly64x1x4_t"
+      "value": "poly8x16_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -36032,23 +232865,24 @@
       }
     },
     "Architectures": [
+      "v7",
       "A32",
       "A64"
     ],
     "instructions": [
       [
-        "LD1"
+        "LD1R"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1_p8",
+    "name": "vld1q_dup_s16",
     "arguments": [
-      "poly8_t const * ptr"
+      "int16_t const * ptr"
     ],
     "return_type": {
-      "value": "poly8x8_t"
+      "value": "int16x8_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -36062,18 +232896,18 @@
     ],
     "instructions": [
       [
-        "LD1"
+        "LD1R"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1_p8_x2",
+    "name": "vld1q_dup_s32",
     "arguments": [
-      "poly8_t const * ptr"
+      "int32_t const * ptr"
     ],
     "return_type": {
-      "value": "poly8x8x2_t"
+      "value": "int32x4_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -36087,18 +232921,18 @@
     ],
     "instructions": [
       [
-        "LD1"
+        "LD1R"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1_p8_x3",
+    "name": "vld1q_dup_s64",
     "arguments": [
-      "poly8_t const * ptr"
+      "int64_t const * ptr"
     ],
     "return_type": {
-      "value": "poly8x8x3_t"
+      "value": "int64x2_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -36112,18 +232946,18 @@
     ],
     "instructions": [
       [
-        "LD1"
+        "LD1R"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1_p8_x4",
+    "name": "vld1q_dup_s8",
     "arguments": [
-      "poly8_t const * ptr"
+      "int8_t const * ptr"
     ],
     "return_type": {
-      "value": "poly8x8x4_t"
+      "value": "int8x16_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -36137,18 +232971,18 @@
     ],
     "instructions": [
       [
-        "LD1"
+        "LD1R"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1_s16",
+    "name": "vld1q_dup_u16",
     "arguments": [
-      "int16_t const * ptr"
+      "uint16_t const * ptr"
     ],
     "return_type": {
-      "value": "int16x4_t"
+      "value": "uint16x8_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -36162,18 +232996,18 @@
     ],
     "instructions": [
       [
-        "LD1"
+        "LD1R"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1_s16_x2",
+    "name": "vld1q_dup_u32",
     "arguments": [
-      "int16_t const * ptr"
+      "uint32_t const * ptr"
     ],
     "return_type": {
-      "value": "int16x4x2_t"
+      "value": "uint32x4_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -36187,18 +233021,18 @@
     ],
     "instructions": [
       [
-        "LD1"
+        "LD1R"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1_s16_x3",
+    "name": "vld1q_dup_u64",
     "arguments": [
-      "int16_t const * ptr"
+      "uint64_t const * ptr"
     ],
     "return_type": {
-      "value": "int16x4x3_t"
+      "value": "uint64x2_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -36212,18 +233046,18 @@
     ],
     "instructions": [
       [
-        "LD1"
+        "LD1R"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1_s16_x4",
+    "name": "vld1q_dup_u8",
     "arguments": [
-      "int16_t const * ptr"
+      "uint8_t const * ptr"
     ],
     "return_type": {
-      "value": "int16x4x4_t"
+      "value": "uint8x16_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -36237,18 +233071,18 @@
     ],
     "instructions": [
       [
-        "LD1"
+        "LD1R"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1_s32",
+    "name": "vld1q_f16",
     "arguments": [
-      "int32_t const * ptr"
+      "float16_t const * ptr"
     ],
     "return_type": {
-      "value": "int32x2_t"
+      "value": "float16x8_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -36268,12 +233102,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1_s32_x2",
+    "name": "vld1q_f16_x2",
     "arguments": [
-      "int32_t const * ptr"
+      "float16_t const * ptr"
     ],
     "return_type": {
-      "value": "int32x2x2_t"
+      "value": "float16x8x2_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -36293,12 +233127,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1_s32_x3",
+    "name": "vld1q_f16_x3",
     "arguments": [
-      "int32_t const * ptr"
+      "float16_t const * ptr"
     ],
     "return_type": {
-      "value": "int32x2x3_t"
+      "value": "float16x8x3_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -36318,12 +233152,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1_s32_x4",
+    "name": "vld1q_f16_x4",
     "arguments": [
-      "int32_t const * ptr"
+      "float16_t const * ptr"
     ],
     "return_type": {
-      "value": "int32x2x4_t"
+      "value": "float16x8x4_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -36343,12 +233177,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1_s64",
+    "name": "vld1q_f32",
     "arguments": [
-      "int64_t const * ptr"
+      "float32_t const * ptr"
     ],
     "return_type": {
-      "value": "int64x1_t"
+      "value": "float32x4_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -36368,12 +233202,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1_s64_x2",
+    "name": "vld1q_f32_x2",
     "arguments": [
-      "int64_t const * ptr"
+      "float32_t const * ptr"
     ],
     "return_type": {
-      "value": "int64x1x2_t"
+      "value": "float32x4x2_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -36393,12 +233227,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1_s64_x3",
+    "name": "vld1q_f32_x3",
     "arguments": [
-      "int64_t const * ptr"
+      "float32_t const * ptr"
     ],
     "return_type": {
-      "value": "int64x1x3_t"
+      "value": "float32x4x3_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -36418,12 +233252,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1_s64_x4",
+    "name": "vld1q_f32_x4",
     "arguments": [
-      "int64_t const * ptr"
+      "float32_t const * ptr"
     ],
     "return_type": {
-      "value": "int64x1x4_t"
+      "value": "float32x4x4_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -36443,12 +233277,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1_s8",
+    "name": "vld1q_f64",
     "arguments": [
-      "int8_t const * ptr"
+      "float64_t const * ptr"
     ],
     "return_type": {
-      "value": "int8x8_t"
+      "value": "float64x2_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -36456,8 +233290,6 @@
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
@@ -36468,12 +233300,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1_s8_x2",
+    "name": "vld1q_f64_x2",
     "arguments": [
-      "int8_t const * ptr"
+      "float64_t const * ptr"
     ],
     "return_type": {
-      "value": "int8x8x2_t"
+      "value": "float64x2x2_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -36481,8 +233313,6 @@
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
@@ -36493,12 +233323,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1_s8_x3",
+    "name": "vld1q_f64_x3",
     "arguments": [
-      "int8_t const * ptr"
+      "float64_t const * ptr"
     ],
     "return_type": {
-      "value": "int8x8x3_t"
+      "value": "float64x2x3_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -36506,8 +233336,6 @@
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
@@ -36518,12 +233346,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1_s8_x4",
+    "name": "vld1q_f64_x4",
     "arguments": [
-      "int8_t const * ptr"
+      "float64_t const * ptr"
     ],
     "return_type": {
-      "value": "int8x8x4_t"
+      "value": "float64x2x4_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -36531,8 +233359,6 @@
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
@@ -36543,16 +233369,25 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1_u16",
+    "name": "vld1q_lane_f16",
     "arguments": [
-      "uint16_t const * ptr"
+      "float16_t const * ptr",
+      "float16x8_t src",
+      "const int lane"
     ],
     "return_type": {
-      "value": "uint16x4_t"
+      "value": "float16x8_t"
     },
     "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 7
+      },
       "ptr": {
         "register": "Xn"
+      },
+      "src": {
+        "register": "Vt.8H"
       }
     },
     "Architectures": [
@@ -36568,16 +233403,25 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1_u16_x2",
+    "name": "vld1q_lane_f32",
     "arguments": [
-      "uint16_t const * ptr"
+      "float32_t const * ptr",
+      "float32x4_t src",
+      "const int lane"
     ],
     "return_type": {
-      "value": "uint16x4x2_t"
+      "value": "float32x4_t"
     },
     "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 3
+      },
       "ptr": {
         "register": "Xn"
+      },
+      "src": {
+        "register": "Vt.4S"
       }
     },
     "Architectures": [
@@ -36593,21 +233437,28 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1_u16_x3",
+    "name": "vld1q_lane_f64",
     "arguments": [
-      "uint16_t const * ptr"
+      "float64_t const * ptr",
+      "float64x2_t src",
+      "const int lane"
     ],
     "return_type": {
-      "value": "uint16x4x3_t"
+      "value": "float64x2_t"
     },
     "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 1
+      },
       "ptr": {
         "register": "Xn"
+      },
+      "src": {
+        "register": "Vt.2D"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
@@ -36618,16 +233469,25 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1_u16_x4",
+    "name": "vld1q_lane_p16",
     "arguments": [
-      "uint16_t const * ptr"
+      "poly16_t const * ptr",
+      "poly16x8_t src",
+      "const int lane"
     ],
     "return_type": {
-      "value": "uint16x4x4_t"
+      "value": "poly16x8_t"
     },
     "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 7
+      },
       "ptr": {
         "register": "Xn"
+      },
+      "src": {
+        "register": "Vt.8H"
       }
     },
     "Architectures": [
@@ -36643,20 +233503,28 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1_u32",
+    "name": "vld1q_lane_p64",
     "arguments": [
-      "uint32_t const * ptr"
+      "poly64_t const * ptr",
+      "poly64x2_t src",
+      "const int lane"
     ],
     "return_type": {
-      "value": "uint32x2_t"
+      "value": "poly64x2_t"
     },
     "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 1
+      },
       "ptr": {
         "register": "Xn"
+      },
+      "src": {
+        "register": "Vt.2D"
       }
     },
     "Architectures": [
-      "v7",
       "A32",
       "A64"
     ],
@@ -36668,16 +233536,25 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1_u32_x2",
+    "name": "vld1q_lane_p8",
     "arguments": [
-      "uint32_t const * ptr"
+      "poly8_t const * ptr",
+      "poly8x16_t src",
+      "const int lane"
     ],
     "return_type": {
-      "value": "uint32x2x2_t"
+      "value": "poly8x16_t"
     },
     "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 15
+      },
       "ptr": {
         "register": "Xn"
+      },
+      "src": {
+        "register": "Vt.16B"
       }
     },
     "Architectures": [
@@ -36693,16 +233570,25 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1_u32_x3",
+    "name": "vld1q_lane_s16",
     "arguments": [
-      "uint32_t const * ptr"
+      "int16_t const * ptr",
+      "int16x8_t src",
+      "const int lane"
     ],
     "return_type": {
-      "value": "uint32x2x3_t"
+      "value": "int16x8_t"
     },
     "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 7
+      },
       "ptr": {
         "register": "Xn"
+      },
+      "src": {
+        "register": "Vt.8H"
       }
     },
     "Architectures": [
@@ -36718,16 +233604,25 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1_u32_x4",
+    "name": "vld1q_lane_s32",
     "arguments": [
-      "uint32_t const * ptr"
+      "int32_t const * ptr",
+      "int32x4_t src",
+      "const int lane"
     ],
     "return_type": {
-      "value": "uint32x2x4_t"
+      "value": "int32x4_t"
     },
     "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 3
+      },
       "ptr": {
         "register": "Xn"
+      },
+      "src": {
+        "register": "Vt.4S"
       }
     },
     "Architectures": [
@@ -36743,16 +233638,25 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1_u64",
+    "name": "vld1q_lane_s64",
     "arguments": [
-      "uint64_t const * ptr"
+      "int64_t const * ptr",
+      "int64x2_t src",
+      "const int lane"
     ],
     "return_type": {
-      "value": "uint64x1_t"
+      "value": "int64x2_t"
     },
     "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 1
+      },
       "ptr": {
         "register": "Xn"
+      },
+      "src": {
+        "register": "Vt.2D"
       }
     },
     "Architectures": [
@@ -36768,16 +233672,25 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1_u64_x2",
+    "name": "vld1q_lane_s8",
     "arguments": [
-      "uint64_t const * ptr"
+      "int8_t const * ptr",
+      "int8x16_t src",
+      "const int lane"
     ],
     "return_type": {
-      "value": "uint64x1x2_t"
+      "value": "int8x16_t"
     },
     "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 15
+      },
       "ptr": {
         "register": "Xn"
+      },
+      "src": {
+        "register": "Vt.16B"
       }
     },
     "Architectures": [
@@ -36793,16 +233706,25 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1_u64_x3",
+    "name": "vld1q_lane_u16",
     "arguments": [
-      "uint64_t const * ptr"
+      "uint16_t const * ptr",
+      "uint16x8_t src",
+      "const int lane"
     ],
     "return_type": {
-      "value": "uint64x1x3_t"
+      "value": "uint16x8_t"
     },
     "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 7
+      },
       "ptr": {
         "register": "Xn"
+      },
+      "src": {
+        "register": "Vt.8H"
       }
     },
     "Architectures": [
@@ -36818,16 +233740,25 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1_u64_x4",
+    "name": "vld1q_lane_u32",
     "arguments": [
-      "uint64_t const * ptr"
+      "uint32_t const * ptr",
+      "uint32x4_t src",
+      "const int lane"
     ],
     "return_type": {
-      "value": "uint64x1x4_t"
+      "value": "uint32x4_t"
     },
     "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 3
+      },
       "ptr": {
         "register": "Xn"
+      },
+      "src": {
+        "register": "Vt.4S"
       }
     },
     "Architectures": [
@@ -36843,16 +233774,25 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1_u8",
+    "name": "vld1q_lane_u64",
     "arguments": [
-      "uint8_t const * ptr"
+      "uint64_t const * ptr",
+      "uint64x2_t src",
+      "const int lane"
     ],
     "return_type": {
-      "value": "uint8x8_t"
+      "value": "uint64x2_t"
     },
     "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 1
+      },
       "ptr": {
         "register": "Xn"
+      },
+      "src": {
+        "register": "Vt.2D"
       }
     },
     "Architectures": [
@@ -36868,16 +233808,25 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1_u8_x2",
+    "name": "vld1q_lane_u8",
     "arguments": [
-      "uint8_t const * ptr"
+      "uint8_t const * ptr",
+      "uint8x16_t src",
+      "const int lane"
     ],
     "return_type": {
-      "value": "uint8x8x2_t"
+      "value": "uint8x16_t"
     },
     "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 15
+      },
       "ptr": {
         "register": "Xn"
+      },
+      "src": {
+        "register": "Vt.16B"
       }
     },
     "Architectures": [
@@ -36893,12 +233842,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1_u8_x3",
+    "name": "vld1q_p16",
     "arguments": [
-      "uint8_t const * ptr"
+      "poly16_t const * ptr"
     ],
     "return_type": {
-      "value": "uint8x8x3_t"
+      "value": "poly16x8_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -36918,12 +233867,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1_u8_x4",
+    "name": "vld1q_p16_x2",
     "arguments": [
-      "uint8_t const * ptr"
+      "poly16_t const * ptr"
     ],
     "return_type": {
-      "value": "uint8x8x4_t"
+      "value": "poly16x8x2_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -36943,12 +233892,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1q_dup_f16",
+    "name": "vld1q_p16_x3",
     "arguments": [
-      "float16_t const * ptr"
+      "poly16_t const * ptr"
     ],
     "return_type": {
-      "value": "float16x8_t"
+      "value": "poly16x8x3_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -36962,18 +233911,18 @@
     ],
     "instructions": [
       [
-        "LD1R"
+        "LD1"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1q_dup_f32",
+    "name": "vld1q_p16_x4",
     "arguments": [
-      "float32_t const * ptr"
+      "poly16_t const * ptr"
     ],
     "return_type": {
-      "value": "float32x4_t"
+      "value": "poly16x8x4_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -36987,18 +233936,18 @@
     ],
     "instructions": [
       [
-        "LD1R"
+        "LD1"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1q_dup_f64",
+    "name": "vld1q_p64",
     "arguments": [
-      "float64_t const * ptr"
+      "poly64_t const * ptr"
     ],
     "return_type": {
-      "value": "float64x2_t"
+      "value": "poly64x2_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -37006,22 +233955,23 @@
       }
     },
     "Architectures": [
+      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "LD1R"
+        "LD1"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1q_dup_p16",
+    "name": "vld1q_p64_x2",
     "arguments": [
-      "poly16_t const * ptr"
+      "poly64_t const * ptr"
     ],
     "return_type": {
-      "value": "poly16x8_t"
+      "value": "poly64x2x2_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -37029,24 +233979,23 @@
       }
     },
     "Architectures": [
-      "v7",
       "A32",
       "A64"
     ],
     "instructions": [
       [
-        "LD1R"
+        "LD1"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1q_dup_p64",
+    "name": "vld1q_p64_x3",
     "arguments": [
       "poly64_t const * ptr"
     ],
     "return_type": {
-      "value": "poly64x2_t"
+      "value": "poly64x2x3_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -37059,18 +234008,18 @@
     ],
     "instructions": [
       [
-        "LD1R"
+        "LD1"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1q_dup_p8",
+    "name": "vld1q_p64_x4",
     "arguments": [
-      "poly8_t const * ptr"
+      "poly64_t const * ptr"
     ],
     "return_type": {
-      "value": "poly8x16_t"
+      "value": "poly64x2x4_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -37078,24 +234027,23 @@
       }
     },
     "Architectures": [
-      "v7",
       "A32",
       "A64"
     ],
     "instructions": [
       [
-        "LD1R"
+        "LD1"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1q_dup_s16",
+    "name": "vld1q_p8",
     "arguments": [
-      "int16_t const * ptr"
+      "poly8_t const * ptr"
     ],
     "return_type": {
-      "value": "int16x8_t"
+      "value": "poly8x16_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -37109,18 +234057,18 @@
     ],
     "instructions": [
       [
-        "LD1R"
+        "LD1"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1q_dup_s32",
+    "name": "vld1q_p8_x2",
     "arguments": [
-      "int32_t const * ptr"
+      "poly8_t const * ptr"
     ],
     "return_type": {
-      "value": "int32x4_t"
+      "value": "poly8x16x2_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -37134,18 +234082,18 @@
     ],
     "instructions": [
       [
-        "LD1R"
+        "LD1"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1q_dup_s64",
+    "name": "vld1q_p8_x3",
     "arguments": [
-      "int64_t const * ptr"
+      "poly8_t const * ptr"
     ],
     "return_type": {
-      "value": "int64x2_t"
+      "value": "poly8x16x3_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -37159,18 +234107,18 @@
     ],
     "instructions": [
       [
-        "LD1R"
+        "LD1"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1q_dup_s8",
+    "name": "vld1q_p8_x4",
     "arguments": [
-      "int8_t const * ptr"
+      "poly8_t const * ptr"
     ],
     "return_type": {
-      "value": "int8x16_t"
+      "value": "poly8x16x4_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -37184,18 +234132,18 @@
     ],
     "instructions": [
       [
-        "LD1R"
+        "LD1"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1q_dup_u16",
+    "name": "vld1q_s16",
     "arguments": [
-      "uint16_t const * ptr"
+      "int16_t const * ptr"
     ],
     "return_type": {
-      "value": "uint16x8_t"
+      "value": "int16x8_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -37209,18 +234157,18 @@
     ],
     "instructions": [
       [
-        "LD1R"
+        "LD1"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1q_dup_u32",
+    "name": "vld1q_s16_x2",
     "arguments": [
-      "uint32_t const * ptr"
+      "int16_t const * ptr"
     ],
     "return_type": {
-      "value": "uint32x4_t"
+      "value": "int16x8x2_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -37234,18 +234182,18 @@
     ],
     "instructions": [
       [
-        "LD1R"
+        "LD1"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1q_dup_u64",
+    "name": "vld1q_s16_x3",
     "arguments": [
-      "uint64_t const * ptr"
+      "int16_t const * ptr"
     ],
     "return_type": {
-      "value": "uint64x2_t"
+      "value": "int16x8x3_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -37259,18 +234207,18 @@
     ],
     "instructions": [
       [
-        "LD1R"
+        "LD1"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1q_dup_u8",
+    "name": "vld1q_s16_x4",
     "arguments": [
-      "uint8_t const * ptr"
+      "int16_t const * ptr"
     ],
     "return_type": {
-      "value": "uint8x16_t"
+      "value": "int16x8x4_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -37284,18 +234232,18 @@
     ],
     "instructions": [
       [
-        "LD1R"
+        "LD1"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1q_f16",
+    "name": "vld1q_s32",
     "arguments": [
-      "float16_t const * ptr"
+      "int32_t const * ptr"
     ],
     "return_type": {
-      "value": "float16x8_t"
+      "value": "int32x4_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -37315,12 +234263,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1q_f16_x2",
+    "name": "vld1q_s32_x2",
     "arguments": [
-      "float16_t const * ptr"
+      "int32_t const * ptr"
     ],
     "return_type": {
-      "value": "float16x8x2_t"
+      "value": "int32x4x2_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -37340,12 +234288,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1q_f16_x3",
+    "name": "vld1q_s32_x3",
     "arguments": [
-      "float16_t const * ptr"
+      "int32_t const * ptr"
     ],
     "return_type": {
-      "value": "float16x8x3_t"
+      "value": "int32x4x3_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -37365,12 +234313,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1q_f16_x4",
+    "name": "vld1q_s32_x4",
     "arguments": [
-      "float16_t const * ptr"
+      "int32_t const * ptr"
     ],
     "return_type": {
-      "value": "float16x8x4_t"
+      "value": "int32x4x4_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -37390,12 +234338,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1q_f32",
+    "name": "vld1q_s64",
     "arguments": [
-      "float32_t const * ptr"
+      "int64_t const * ptr"
     ],
     "return_type": {
-      "value": "float32x4_t"
+      "value": "int64x2_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -37415,12 +234363,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1q_f32_x2",
+    "name": "vld1q_s64_x2",
     "arguments": [
-      "float32_t const * ptr"
+      "int64_t const * ptr"
     ],
     "return_type": {
-      "value": "float32x4x2_t"
+      "value": "int64x2x2_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -37440,12 +234388,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1q_f32_x3",
+    "name": "vld1q_s64_x3",
     "arguments": [
-      "float32_t const * ptr"
+      "int64_t const * ptr"
     ],
     "return_type": {
-      "value": "float32x4x3_t"
+      "value": "int64x2x3_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -37465,12 +234413,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1q_f32_x4",
+    "name": "vld1q_s64_x4",
     "arguments": [
-      "float32_t const * ptr"
+      "int64_t const * ptr"
     ],
     "return_type": {
-      "value": "float32x4x4_t"
+      "value": "int64x2x4_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -37490,12 +234438,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1q_f64",
+    "name": "vld1q_s8",
     "arguments": [
-      "float64_t const * ptr"
+      "int8_t const * ptr"
     ],
     "return_type": {
-      "value": "float64x2_t"
+      "value": "int8x16_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -37503,6 +234451,8 @@
       }
     },
     "Architectures": [
+      "v7",
+      "A32",
       "A64"
     ],
     "instructions": [
@@ -37513,12 +234463,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1q_f64_x2",
+    "name": "vld1q_s8_x2",
     "arguments": [
-      "float64_t const * ptr"
+      "int8_t const * ptr"
     ],
     "return_type": {
-      "value": "float64x2x2_t"
+      "value": "int8x16x2_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -37526,6 +234476,8 @@
       }
     },
     "Architectures": [
+      "v7",
+      "A32",
       "A64"
     ],
     "instructions": [
@@ -37536,12 +234488,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1q_f64_x3",
+    "name": "vld1q_s8_x3",
     "arguments": [
-      "float64_t const * ptr"
+      "int8_t const * ptr"
     ],
     "return_type": {
-      "value": "float64x2x3_t"
+      "value": "int8x16x3_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -37549,6 +234501,8 @@
       }
     },
     "Architectures": [
+      "v7",
+      "A32",
       "A64"
     ],
     "instructions": [
@@ -37559,12 +234513,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1q_f64_x4",
+    "name": "vld1q_s8_x4",
     "arguments": [
-      "float64_t const * ptr"
+      "int8_t const * ptr"
     ],
     "return_type": {
-      "value": "float64x2x4_t"
+      "value": "int8x16x4_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -37572,6 +234526,8 @@
       }
     },
     "Architectures": [
+      "v7",
+      "A32",
       "A64"
     ],
     "instructions": [
@@ -37582,25 +234538,16 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1q_lane_f16",
+    "name": "vld1q_u16",
     "arguments": [
-      "float16_t const * ptr",
-      "float16x8_t src",
-      "const int lane"
+      "uint16_t const * ptr"
     ],
     "return_type": {
-      "value": "float16x8_t"
+      "value": "uint16x8_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 7
-      },
       "ptr": {
         "register": "Xn"
-      },
-      "src": {
-        "register": "Vt.8H"
       }
     },
     "Architectures": [
@@ -37616,25 +234563,16 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1q_lane_f32",
+    "name": "vld1q_u16_x2",
     "arguments": [
-      "float32_t const * ptr",
-      "float32x4_t src",
-      "const int lane"
+      "uint16_t const * ptr"
     ],
     "return_type": {
-      "value": "float32x4_t"
+      "value": "uint16x8x2_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 3
-      },
       "ptr": {
         "register": "Xn"
-      },
-      "src": {
-        "register": "Vt.4S"
       }
     },
     "Architectures": [
@@ -37650,28 +234588,21 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1q_lane_f64",
+    "name": "vld1q_u16_x3",
     "arguments": [
-      "float64_t const * ptr",
-      "float64x2_t src",
-      "const int lane"
+      "uint16_t const * ptr"
     ],
     "return_type": {
-      "value": "float64x2_t"
+      "value": "uint16x8x3_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 1
-      },
       "ptr": {
         "register": "Xn"
-      },
-      "src": {
-        "register": "Vt.2D"
       }
     },
     "Architectures": [
+      "v7",
+      "A32",
       "A64"
     ],
     "instructions": [
@@ -37682,25 +234613,16 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1q_lane_p16",
+    "name": "vld1q_u16_x4",
     "arguments": [
-      "poly16_t const * ptr",
-      "poly16x8_t src",
-      "const int lane"
+      "uint16_t const * ptr"
     ],
     "return_type": {
-      "value": "poly16x8_t"
+      "value": "uint16x8x4_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 7
-      },
       "ptr": {
         "register": "Xn"
-      },
-      "src": {
-        "register": "Vt.8H"
       }
     },
     "Architectures": [
@@ -37716,28 +234638,20 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1q_lane_p64",
+    "name": "vld1q_u32",
     "arguments": [
-      "poly64_t const * ptr",
-      "poly64x2_t src",
-      "const int lane"
+      "uint32_t const * ptr"
     ],
     "return_type": {
-      "value": "poly64x2_t"
+      "value": "uint32x4_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 1
-      },
       "ptr": {
         "register": "Xn"
-      },
-      "src": {
-        "register": "Vt.2D"
       }
     },
     "Architectures": [
+      "v7",
       "A32",
       "A64"
     ],
@@ -37749,25 +234663,16 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1q_lane_p8",
+    "name": "vld1q_u32_x2",
     "arguments": [
-      "poly8_t const * ptr",
-      "poly8x16_t src",
-      "const int lane"
+      "uint32_t const * ptr"
     ],
     "return_type": {
-      "value": "poly8x16_t"
+      "value": "uint32x4x2_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 15
-      },
       "ptr": {
         "register": "Xn"
-      },
-      "src": {
-        "register": "Vt.16B"
       }
     },
     "Architectures": [
@@ -37783,25 +234688,16 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1q_lane_s16",
+    "name": "vld1q_u32_x3",
     "arguments": [
-      "int16_t const * ptr",
-      "int16x8_t src",
-      "const int lane"
+      "uint32_t const * ptr"
     ],
     "return_type": {
-      "value": "int16x8_t"
+      "value": "uint32x4x3_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 7
-      },
       "ptr": {
         "register": "Xn"
-      },
-      "src": {
-        "register": "Vt.8H"
       }
     },
     "Architectures": [
@@ -37817,25 +234713,16 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1q_lane_s32",
+    "name": "vld1q_u32_x4",
     "arguments": [
-      "int32_t const * ptr",
-      "int32x4_t src",
-      "const int lane"
+      "uint32_t const * ptr"
     ],
     "return_type": {
-      "value": "int32x4_t"
+      "value": "uint32x4x4_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 3
-      },
       "ptr": {
         "register": "Xn"
-      },
-      "src": {
-        "register": "Vt.4S"
       }
     },
     "Architectures": [
@@ -37851,25 +234738,16 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1q_lane_s64",
+    "name": "vld1q_u64",
     "arguments": [
-      "int64_t const * ptr",
-      "int64x2_t src",
-      "const int lane"
+      "uint64_t const * ptr"
     ],
     "return_type": {
-      "value": "int64x2_t"
+      "value": "uint64x2_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 1
-      },
       "ptr": {
         "register": "Xn"
-      },
-      "src": {
-        "register": "Vt.2D"
       }
     },
     "Architectures": [
@@ -37885,25 +234763,16 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1q_lane_s8",
+    "name": "vld1q_u64_x2",
     "arguments": [
-      "int8_t const * ptr",
-      "int8x16_t src",
-      "const int lane"
+      "uint64_t const * ptr"
     ],
     "return_type": {
-      "value": "int8x16_t"
+      "value": "uint64x2x2_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 15
-      },
       "ptr": {
         "register": "Xn"
-      },
-      "src": {
-        "register": "Vt.16B"
       }
     },
     "Architectures": [
@@ -37919,25 +234788,16 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1q_lane_u16",
+    "name": "vld1q_u64_x3",
     "arguments": [
-      "uint16_t const * ptr",
-      "uint16x8_t src",
-      "const int lane"
+      "uint64_t const * ptr"
     ],
     "return_type": {
-      "value": "uint16x8_t"
+      "value": "uint64x2x3_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 7
-      },
       "ptr": {
         "register": "Xn"
-      },
-      "src": {
-        "register": "Vt.8H"
       }
     },
     "Architectures": [
@@ -37953,25 +234813,16 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1q_lane_u32",
+    "name": "vld1q_u64_x4",
     "arguments": [
-      "uint32_t const * ptr",
-      "uint32x4_t src",
-      "const int lane"
+      "uint64_t const * ptr"
     ],
     "return_type": {
-      "value": "uint32x4_t"
+      "value": "uint64x2x4_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 3
-      },
       "ptr": {
         "register": "Xn"
-      },
-      "src": {
-        "register": "Vt.4S"
       }
     },
     "Architectures": [
@@ -37987,25 +234838,16 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1q_lane_u64",
+    "name": "vld1q_u8",
     "arguments": [
-      "uint64_t const * ptr",
-      "uint64x2_t src",
-      "const int lane"
+      "uint8_t const * ptr"
     ],
     "return_type": {
-      "value": "uint64x2_t"
+      "value": "uint8x16_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 1
-      },
       "ptr": {
         "register": "Xn"
-      },
-      "src": {
-        "register": "Vt.2D"
       }
     },
     "Architectures": [
@@ -38021,25 +234863,16 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1q_lane_u8",
+    "name": "vld1q_u8_x2",
     "arguments": [
-      "uint8_t const * ptr",
-      "uint8x16_t src",
-      "const int lane"
+      "uint8_t const * ptr"
     ],
     "return_type": {
-      "value": "uint8x16_t"
+      "value": "uint8x16x2_t"
     },
     "Arguments_Preparation": {
-      "lane": {
-        "minimum": 0,
-        "maximum": 15
-      },
       "ptr": {
         "register": "Xn"
-      },
-      "src": {
-        "register": "Vt.16B"
       }
     },
     "Architectures": [
@@ -38055,12 +234888,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1q_p16",
+    "name": "vld1q_u8_x3",
     "arguments": [
-      "poly16_t const * ptr"
+      "uint8_t const * ptr"
     ],
     "return_type": {
-      "value": "poly16x8_t"
+      "value": "uint8x16x3_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -38080,12 +234913,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1q_p16_x2",
+    "name": "vld1q_u8_x4",
     "arguments": [
-      "poly16_t const * ptr"
+      "uint8_t const * ptr"
     ],
     "return_type": {
-      "value": "poly16x8x2_t"
+      "value": "uint8x16x4_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -38105,12 +234938,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1q_p16_x3",
+    "name": "vld2_dup_f16",
     "arguments": [
-      "poly16_t const * ptr"
+      "float16_t const * ptr"
     ],
     "return_type": {
-      "value": "poly16x8x3_t"
+      "value": "float16x4x2_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -38124,18 +234957,18 @@
     ],
     "instructions": [
       [
-        "LD1"
+        "LD2R"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1q_p16_x4",
+    "name": "vld2_dup_f32",
     "arguments": [
-      "poly16_t const * ptr"
+      "float32_t const * ptr"
     ],
     "return_type": {
-      "value": "poly16x8x4_t"
+      "value": "float32x2x2_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -38149,18 +234982,18 @@
     ],
     "instructions": [
       [
-        "LD1"
+        "LD2R"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1q_p64",
+    "name": "vld2_dup_f64",
     "arguments": [
-      "poly64_t const * ptr"
+      "float64_t const * ptr"
     ],
     "return_type": {
-      "value": "poly64x2_t"
+      "value": "float64x1x2_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -38168,23 +235001,22 @@
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "LD1"
+        "LD2R"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1q_p64_x2",
+    "name": "vld2_dup_p16",
     "arguments": [
-      "poly64_t const * ptr"
+      "poly16_t const * ptr"
     ],
     "return_type": {
-      "value": "poly64x2x2_t"
+      "value": "poly16x4x2_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -38192,23 +235024,24 @@
       }
     },
     "Architectures": [
+      "v7",
       "A32",
       "A64"
     ],
     "instructions": [
       [
-        "LD1"
+        "LD2R"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1q_p64_x3",
+    "name": "vld2_dup_p64",
     "arguments": [
       "poly64_t const * ptr"
     ],
     "return_type": {
-      "value": "poly64x2x3_t"
+      "value": "poly64x1x2_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -38221,18 +235054,18 @@
     ],
     "instructions": [
       [
-        "LD1"
+        "LD2R"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1q_p64_x4",
+    "name": "vld2_dup_p8",
     "arguments": [
-      "poly64_t const * ptr"
+      "poly8_t const * ptr"
     ],
     "return_type": {
-      "value": "poly64x2x4_t"
+      "value": "poly8x8x2_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -38240,23 +235073,24 @@
       }
     },
     "Architectures": [
+      "v7",
       "A32",
       "A64"
     ],
     "instructions": [
       [
-        "LD1"
+        "LD2R"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1q_p8",
+    "name": "vld2_dup_s16",
     "arguments": [
-      "poly8_t const * ptr"
+      "int16_t const * ptr"
     ],
     "return_type": {
-      "value": "poly8x16_t"
+      "value": "int16x4x2_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -38270,18 +235104,18 @@
     ],
     "instructions": [
       [
-        "LD1"
+        "LD2R"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1q_p8_x2",
+    "name": "vld2_dup_s32",
     "arguments": [
-      "poly8_t const * ptr"
+      "int32_t const * ptr"
     ],
     "return_type": {
-      "value": "poly8x16x2_t"
+      "value": "int32x2x2_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -38295,18 +235129,18 @@
     ],
     "instructions": [
       [
-        "LD1"
+        "LD2R"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1q_p8_x3",
+    "name": "vld2_dup_s64",
     "arguments": [
-      "poly8_t const * ptr"
+      "int64_t const * ptr"
     ],
     "return_type": {
-      "value": "poly8x16x3_t"
+      "value": "int64x1x2_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -38320,18 +235154,18 @@
     ],
     "instructions": [
       [
-        "LD1"
+        "LD2R"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1q_p8_x4",
+    "name": "vld2_dup_s8",
     "arguments": [
-      "poly8_t const * ptr"
+      "int8_t const * ptr"
     ],
     "return_type": {
-      "value": "poly8x16x4_t"
+      "value": "int8x8x2_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -38345,18 +235179,18 @@
     ],
     "instructions": [
       [
-        "LD1"
+        "LD2R"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1q_s16",
+    "name": "vld2_dup_u16",
     "arguments": [
-      "int16_t const * ptr"
+      "uint16_t const * ptr"
     ],
     "return_type": {
-      "value": "int16x8_t"
+      "value": "uint16x4x2_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -38370,18 +235204,18 @@
     ],
     "instructions": [
       [
-        "LD1"
+        "LD2R"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1q_s16_x2",
+    "name": "vld2_dup_u32",
     "arguments": [
-      "int16_t const * ptr"
+      "uint32_t const * ptr"
     ],
     "return_type": {
-      "value": "int16x8x2_t"
+      "value": "uint32x2x2_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -38395,18 +235229,18 @@
     ],
     "instructions": [
       [
-        "LD1"
+        "LD2R"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1q_s16_x3",
+    "name": "vld2_dup_u64",
     "arguments": [
-      "int16_t const * ptr"
+      "uint64_t const * ptr"
     ],
     "return_type": {
-      "value": "int16x8x3_t"
+      "value": "uint64x1x2_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -38420,18 +235254,18 @@
     ],
     "instructions": [
       [
-        "LD1"
+        "LD2R"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1q_s16_x4",
+    "name": "vld2_dup_u8",
     "arguments": [
-      "int16_t const * ptr"
+      "uint8_t const * ptr"
     ],
     "return_type": {
-      "value": "int16x8x4_t"
+      "value": "uint8x8x2_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -38445,18 +235279,18 @@
     ],
     "instructions": [
       [
-        "LD1"
+        "LD2R"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1q_s32",
+    "name": "vld2_f16",
     "arguments": [
-      "int32_t const * ptr"
+      "float16_t const * ptr"
     ],
     "return_type": {
-      "value": "int32x4_t"
+      "value": "float16x4x2_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -38470,18 +235304,18 @@
     ],
     "instructions": [
       [
-        "LD1"
+        "LD2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1q_s32_x2",
+    "name": "vld2_f32",
     "arguments": [
-      "int32_t const * ptr"
+      "float32_t const * ptr"
     ],
     "return_type": {
-      "value": "int32x4x2_t"
+      "value": "float32x2x2_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -38495,18 +235329,18 @@
     ],
     "instructions": [
       [
-        "LD1"
+        "LD2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1q_s32_x3",
+    "name": "vld2_f64",
     "arguments": [
-      "int32_t const * ptr"
+      "float64_t const * ptr"
     ],
     "return_type": {
-      "value": "int32x4x3_t"
+      "value": "float64x1x2_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -38514,8 +235348,6 @@
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
@@ -38526,16 +235358,28 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1q_s32_x4",
+    "name": "vld2_lane_f16",
     "arguments": [
-      "int32_t const * ptr"
+      "float16_t const * ptr",
+      "float16x4x2_t src",
+      "const int lane"
     ],
     "return_type": {
-      "value": "int32x4x4_t"
+      "value": "float16x4x2_t"
     },
     "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 3
+      },
       "ptr": {
         "register": "Xn"
+      },
+      "src.val[0]": {
+        "register": "Vt.4H"
+      },
+      "src.val[1]": {
+        "register": "Vt2.4H"
       }
     },
     "Architectures": [
@@ -38545,22 +235389,34 @@
     ],
     "instructions": [
       [
-        "LD1"
+        "LD2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1q_s64",
+    "name": "vld2_lane_f32",
     "arguments": [
-      "int64_t const * ptr"
+      "float32_t const * ptr",
+      "float32x2x2_t src",
+      "const int lane"
     ],
     "return_type": {
-      "value": "int64x2_t"
+      "value": "float32x2x2_t"
     },
     "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 1
+      },
       "ptr": {
         "register": "Xn"
+      },
+      "src.val[0]": {
+        "register": "Vt.2S"
+      },
+      "src.val[1]": {
+        "register": "Vt2.2S"
       }
     },
     "Architectures": [
@@ -38570,47 +235426,69 @@
     ],
     "instructions": [
       [
-        "LD1"
+        "LD2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1q_s64_x2",
+    "name": "vld2_lane_f64",
     "arguments": [
-      "int64_t const * ptr"
+      "float64_t const * ptr",
+      "float64x1x2_t src",
+      "const int lane"
     ],
     "return_type": {
-      "value": "int64x2x2_t"
+      "value": "float64x1x2_t"
     },
     "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 0
+      },
       "ptr": {
         "register": "Xn"
+      },
+      "src.val[0]": {
+        "register": "Vt.1D"
+      },
+      "src.val[1]": {
+        "register": "Vt2.1D"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "LD1"
+        "LD2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1q_s64_x3",
+    "name": "vld2_lane_p16",
     "arguments": [
-      "int64_t const * ptr"
+      "poly16_t const * ptr",
+      "poly16x4x2_t src",
+      "const int lane"
     ],
     "return_type": {
-      "value": "int64x2x3_t"
+      "value": "poly16x4x2_t"
     },
     "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 3
+      },
       "ptr": {
         "register": "Xn"
+      },
+      "src.val[0]": {
+        "register": "Vt.4H"
+      },
+      "src.val[1]": {
+        "register": "Vt2.4H"
       }
     },
     "Architectures": [
@@ -38620,47 +235498,69 @@
     ],
     "instructions": [
       [
-        "LD1"
+        "LD2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1q_s64_x4",
+    "name": "vld2_lane_p64",
     "arguments": [
-      "int64_t const * ptr"
+      "poly64_t const * ptr",
+      "poly64x1x2_t src",
+      "const int lane"
     ],
     "return_type": {
-      "value": "int64x2x4_t"
+      "value": "poly64x1x2_t"
     },
     "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 0
+      },
       "ptr": {
         "register": "Xn"
+      },
+      "src.val[0]": {
+        "register": "Vt.1D"
+      },
+      "src.val[1]": {
+        "register": "Vt2.1D"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "LD1"
+        "LD2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1q_s8",
+    "name": "vld2_lane_p8",
     "arguments": [
-      "int8_t const * ptr"
+      "poly8_t const * ptr",
+      "poly8x8x2_t src",
+      "const int lane"
     ],
     "return_type": {
-      "value": "int8x16_t"
+      "value": "poly8x8x2_t"
     },
     "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 7
+      },
       "ptr": {
         "register": "Xn"
+      },
+      "src.val[0]": {
+        "register": "Vt.8B"
+      },
+      "src.val[1]": {
+        "register": "Vt2.8B"
       }
     },
     "Architectures": [
@@ -38670,22 +235570,34 @@
     ],
     "instructions": [
       [
-        "LD1"
+        "LD2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1q_s8_x2",
+    "name": "vld2_lane_s16",
     "arguments": [
-      "int8_t const * ptr"
+      "int16_t const * ptr",
+      "int16x4x2_t src",
+      "const int lane"
     ],
     "return_type": {
-      "value": "int8x16x2_t"
+      "value": "int16x4x2_t"
     },
     "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 3
+      },
       "ptr": {
         "register": "Xn"
+      },
+      "src.val[0]": {
+        "register": "Vt.4H"
+      },
+      "src.val[1]": {
+        "register": "Vt2.4H"
       }
     },
     "Architectures": [
@@ -38695,22 +235607,34 @@
     ],
     "instructions": [
       [
-        "LD1"
+        "LD2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1q_s8_x3",
+    "name": "vld2_lane_s32",
     "arguments": [
-      "int8_t const * ptr"
+      "int32_t const * ptr",
+      "int32x2x2_t src",
+      "const int lane"
     ],
     "return_type": {
-      "value": "int8x16x3_t"
+      "value": "int32x2x2_t"
     },
     "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 1
+      },
       "ptr": {
         "register": "Xn"
+      },
+      "src.val[0]": {
+        "register": "Vt.2S"
+      },
+      "src.val[1]": {
+        "register": "Vt2.2S"
       }
     },
     "Architectures": [
@@ -38720,47 +235644,69 @@
     ],
     "instructions": [
       [
-        "LD1"
+        "LD2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1q_s8_x4",
+    "name": "vld2_lane_s64",
     "arguments": [
-      "int8_t const * ptr"
+      "int64_t const * ptr",
+      "int64x1x2_t src",
+      "const int lane"
     ],
     "return_type": {
-      "value": "int8x16x4_t"
+      "value": "int64x1x2_t"
     },
     "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 0
+      },
       "ptr": {
         "register": "Xn"
+      },
+      "src.val[0]": {
+        "register": "Vt.1D"
+      },
+      "src.val[1]": {
+        "register": "Vt2.1D"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "LD1"
+        "LD2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1q_u16",
+    "name": "vld2_lane_s8",
     "arguments": [
-      "uint16_t const * ptr"
+      "int8_t const * ptr",
+      "int8x8x2_t src",
+      "const int lane"
     ],
     "return_type": {
-      "value": "uint16x8_t"
+      "value": "int8x8x2_t"
     },
     "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 7
+      },
       "ptr": {
         "register": "Xn"
+      },
+      "src.val[0]": {
+        "register": "Vt.8B"
+      },
+      "src.val[1]": {
+        "register": "Vt2.8B"
       }
     },
     "Architectures": [
@@ -38770,22 +235716,34 @@
     ],
     "instructions": [
       [
-        "LD1"
+        "LD2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1q_u16_x2",
+    "name": "vld2_lane_u16",
     "arguments": [
-      "uint16_t const * ptr"
+      "uint16_t const * ptr",
+      "uint16x4x2_t src",
+      "const int lane"
     ],
     "return_type": {
-      "value": "uint16x8x2_t"
+      "value": "uint16x4x2_t"
     },
     "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 3
+      },
       "ptr": {
         "register": "Xn"
+      },
+      "src.val[0]": {
+        "register": "Vt.4H"
+      },
+      "src.val[1]": {
+        "register": "Vt2.4H"
       }
     },
     "Architectures": [
@@ -38795,22 +235753,34 @@
     ],
     "instructions": [
       [
-        "LD1"
+        "LD2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1q_u16_x3",
+    "name": "vld2_lane_u32",
     "arguments": [
-      "uint16_t const * ptr"
+      "uint32_t const * ptr",
+      "uint32x2x2_t src",
+      "const int lane"
     ],
     "return_type": {
-      "value": "uint16x8x3_t"
+      "value": "uint32x2x2_t"
     },
     "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 1
+      },
       "ptr": {
         "register": "Xn"
+      },
+      "src.val[0]": {
+        "register": "Vt.2S"
+      },
+      "src.val[1]": {
+        "register": "Vt2.2S"
       }
     },
     "Architectures": [
@@ -38820,47 +235790,69 @@
     ],
     "instructions": [
       [
-        "LD1"
+        "LD2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1q_u16_x4",
+    "name": "vld2_lane_u64",
     "arguments": [
-      "uint16_t const * ptr"
+      "uint64_t const * ptr",
+      "uint64x1x2_t src",
+      "const int lane"
     ],
     "return_type": {
-      "value": "uint16x8x4_t"
+      "value": "uint64x1x2_t"
     },
     "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 0
+      },
       "ptr": {
         "register": "Xn"
+      },
+      "src.val[0]": {
+        "register": "Vt.1D"
+      },
+      "src.val[1]": {
+        "register": "Vt2.1D"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "LD1"
+        "LD2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1q_u32",
+    "name": "vld2_lane_u8",
     "arguments": [
-      "uint32_t const * ptr"
+      "uint8_t const * ptr",
+      "uint8x8x2_t src",
+      "const int lane"
     ],
     "return_type": {
-      "value": "uint32x4_t"
+      "value": "uint8x8x2_t"
     },
     "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 7
+      },
       "ptr": {
         "register": "Xn"
+      },
+      "src.val[0]": {
+        "register": "Vt.8B"
+      },
+      "src.val[1]": {
+        "register": "Vt2.8B"
       }
     },
     "Architectures": [
@@ -38870,18 +235862,18 @@
     ],
     "instructions": [
       [
-        "LD1"
+        "LD2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1q_u32_x2",
+    "name": "vld2_p16",
     "arguments": [
-      "uint32_t const * ptr"
+      "poly16_t const * ptr"
     ],
     "return_type": {
-      "value": "uint32x4x2_t"
+      "value": "poly16x4x2_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -38895,18 +235887,18 @@
     ],
     "instructions": [
       [
-        "LD1"
+        "LD2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1q_u32_x3",
+    "name": "vld2_p64",
     "arguments": [
-      "uint32_t const * ptr"
+      "poly64_t const * ptr"
     ],
     "return_type": {
-      "value": "uint32x4x3_t"
+      "value": "poly64x1x2_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -38914,7 +235906,6 @@
       }
     },
     "Architectures": [
-      "v7",
       "A32",
       "A64"
     ],
@@ -38926,12 +235917,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1q_u32_x4",
+    "name": "vld2_p8",
     "arguments": [
-      "uint32_t const * ptr"
+      "poly8_t const * ptr"
     ],
     "return_type": {
-      "value": "uint32x4x4_t"
+      "value": "poly8x8x2_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -38945,18 +235936,18 @@
     ],
     "instructions": [
       [
-        "LD1"
+        "LD2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1q_u64",
+    "name": "vld2_s16",
     "arguments": [
-      "uint64_t const * ptr"
+      "int16_t const * ptr"
     ],
     "return_type": {
-      "value": "uint64x2_t"
+      "value": "int16x4x2_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -38970,18 +235961,18 @@
     ],
     "instructions": [
       [
-        "LD1"
+        "LD2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1q_u64_x2",
+    "name": "vld2_s32",
     "arguments": [
-      "uint64_t const * ptr"
+      "int32_t const * ptr"
     ],
     "return_type": {
-      "value": "uint64x2x2_t"
+      "value": "int32x2x2_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -38995,18 +235986,18 @@
     ],
     "instructions": [
       [
-        "LD1"
+        "LD2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1q_u64_x3",
+    "name": "vld2_s64",
     "arguments": [
-      "uint64_t const * ptr"
+      "int64_t const * ptr"
     ],
     "return_type": {
-      "value": "uint64x2x3_t"
+      "value": "int64x1x2_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -39026,12 +236017,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1q_u64_x4",
+    "name": "vld2_s8",
     "arguments": [
-      "uint64_t const * ptr"
+      "int8_t const * ptr"
     ],
     "return_type": {
-      "value": "uint64x2x4_t"
+      "value": "int8x8x2_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -39045,18 +236036,18 @@
     ],
     "instructions": [
       [
-        "LD1"
+        "LD2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1q_u8",
+    "name": "vld2_u16",
     "arguments": [
-      "uint8_t const * ptr"
+      "uint16_t const * ptr"
     ],
     "return_type": {
-      "value": "uint8x16_t"
+      "value": "uint16x4x2_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -39070,18 +236061,18 @@
     ],
     "instructions": [
       [
-        "LD1"
+        "LD2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1q_u8_x2",
+    "name": "vld2_u32",
     "arguments": [
-      "uint8_t const * ptr"
+      "uint32_t const * ptr"
     ],
     "return_type": {
-      "value": "uint8x16x2_t"
+      "value": "uint32x2x2_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -39095,18 +236086,18 @@
     ],
     "instructions": [
       [
-        "LD1"
+        "LD2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1q_u8_x3",
+    "name": "vld2_u64",
     "arguments": [
-      "uint8_t const * ptr"
+      "uint64_t const * ptr"
     ],
     "return_type": {
-      "value": "uint8x16x3_t"
+      "value": "uint64x1x2_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -39126,12 +236117,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld1q_u8_x4",
+    "name": "vld2_u8",
     "arguments": [
       "uint8_t const * ptr"
     ],
     "return_type": {
-      "value": "uint8x16x4_t"
+      "value": "uint8x8x2_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -39145,18 +236136,18 @@
     ],
     "instructions": [
       [
-        "LD1"
+        "LD2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld2_dup_f16",
+    "name": "vld2q_dup_f16",
     "arguments": [
       "float16_t const * ptr"
     ],
     "return_type": {
-      "value": "float16x4x2_t"
+      "value": "float16x8x2_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -39176,12 +236167,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld2_dup_f32",
+    "name": "vld2q_dup_f32",
     "arguments": [
       "float32_t const * ptr"
     ],
     "return_type": {
-      "value": "float32x2x2_t"
+      "value": "float32x4x2_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -39201,12 +236192,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld2_dup_f64",
+    "name": "vld2q_dup_f64",
     "arguments": [
       "float64_t const * ptr"
     ],
     "return_type": {
-      "value": "float64x1x2_t"
+      "value": "float64x2x2_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -39224,12 +236215,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld2_dup_p16",
+    "name": "vld2q_dup_p16",
     "arguments": [
       "poly16_t const * ptr"
     ],
     "return_type": {
-      "value": "poly16x4x2_t"
+      "value": "poly16x8x2_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -39249,12 +236240,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld2_dup_p64",
+    "name": "vld2q_dup_p64",
     "arguments": [
       "poly64_t const * ptr"
     ],
     "return_type": {
-      "value": "poly64x1x2_t"
+      "value": "poly64x2x2_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -39262,7 +236253,6 @@
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
@@ -39273,12 +236263,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld2_dup_p8",
+    "name": "vld2q_dup_p8",
     "arguments": [
       "poly8_t const * ptr"
     ],
     "return_type": {
-      "value": "poly8x8x2_t"
+      "value": "poly8x16x2_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -39298,12 +236288,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld2_dup_s16",
+    "name": "vld2q_dup_s16",
     "arguments": [
       "int16_t const * ptr"
     ],
     "return_type": {
-      "value": "int16x4x2_t"
+      "value": "int16x8x2_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -39323,12 +236313,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld2_dup_s32",
+    "name": "vld2q_dup_s32",
     "arguments": [
       "int32_t const * ptr"
     ],
     "return_type": {
-      "value": "int32x2x2_t"
+      "value": "int32x4x2_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -39348,12 +236338,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld2_dup_s64",
+    "name": "vld2q_dup_s64",
     "arguments": [
       "int64_t const * ptr"
     ],
     "return_type": {
-      "value": "int64x1x2_t"
+      "value": "int64x2x2_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -39361,8 +236351,6 @@
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
@@ -39373,12 +236361,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld2_dup_s8",
+    "name": "vld2q_dup_s8",
     "arguments": [
       "int8_t const * ptr"
     ],
     "return_type": {
-      "value": "int8x8x2_t"
+      "value": "int8x16x2_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -39398,12 +236386,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld2_dup_u16",
+    "name": "vld2q_dup_u16",
     "arguments": [
       "uint16_t const * ptr"
     ],
     "return_type": {
-      "value": "uint16x4x2_t"
+      "value": "uint16x8x2_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -39423,12 +236411,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld2_dup_u32",
+    "name": "vld2q_dup_u32",
     "arguments": [
       "uint32_t const * ptr"
     ],
     "return_type": {
-      "value": "uint32x2x2_t"
+      "value": "uint32x4x2_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -39448,12 +236436,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld2_dup_u64",
+    "name": "vld2q_dup_u64",
     "arguments": [
       "uint64_t const * ptr"
     ],
     "return_type": {
-      "value": "uint64x1x2_t"
+      "value": "uint64x2x2_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -39461,8 +236449,6 @@
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
@@ -39473,12 +236459,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld2_dup_u8",
+    "name": "vld2q_dup_u8",
     "arguments": [
       "uint8_t const * ptr"
     ],
     "return_type": {
-      "value": "uint8x8x2_t"
+      "value": "uint8x16x2_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -39498,12 +236484,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld2_f16",
+    "name": "vld2q_f16",
     "arguments": [
       "float16_t const * ptr"
     ],
     "return_type": {
-      "value": "float16x4x2_t"
+      "value": "float16x8x2_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -39523,12 +236509,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld2_f32",
+    "name": "vld2q_f32",
     "arguments": [
       "float32_t const * ptr"
     ],
     "return_type": {
-      "value": "float32x2x2_t"
+      "value": "float32x4x2_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -39548,12 +236534,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld2_f64",
+    "name": "vld2q_f64",
     "arguments": [
       "float64_t const * ptr"
     ],
     "return_type": {
-      "value": "float64x1x2_t"
+      "value": "float64x2x2_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -39565,31 +236551,34 @@
     ],
     "instructions": [
       [
-        "LD1"
+        "LD2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld2_lane_f16",
+    "name": "vld2q_lane_f16",
     "arguments": [
       "float16_t const * ptr",
-      "float16x4x2_t src",
+      "float16x8x2_t src",
       "const int lane"
     ],
     "return_type": {
-      "value": "float16x4x2_t"
+      "value": "float16x8x2_t"
     },
     "Arguments_Preparation": {
       "lane": {
         "minimum": 0,
-        "maximum": 3
+        "maximum": 7
       },
       "ptr": {
         "register": "Xn"
       },
-      "src": {
-        "register": "Vt2.4H"
+      "src.val[0]": {
+        "register": "Vt.8H"
+      },
+      "src.val[1]": {
+        "register": "Vt2.8H"
       }
     },
     "Architectures": [
@@ -39605,25 +236594,28 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld2_lane_f32",
+    "name": "vld2q_lane_f32",
     "arguments": [
       "float32_t const * ptr",
-      "float32x2x2_t src",
+      "float32x4x2_t src",
       "const int lane"
     ],
     "return_type": {
-      "value": "float32x2x2_t"
+      "value": "float32x4x2_t"
     },
     "Arguments_Preparation": {
       "lane": {
         "minimum": 0,
-        "maximum": 1
+        "maximum": 3
       },
       "ptr": {
         "register": "Xn"
       },
-      "src": {
-        "register": "Vt2.2S"
+      "src.val[0]": {
+        "register": "Vt.4S"
+      },
+      "src.val[1]": {
+        "register": "Vt2.4S"
       }
     },
     "Architectures": [
@@ -39639,25 +236631,28 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld2_lane_f64",
+    "name": "vld2q_lane_f64",
     "arguments": [
       "float64_t const * ptr",
-      "float64x1x2_t src",
+      "float64x2x2_t src",
       "const int lane"
     ],
     "return_type": {
-      "value": "float64x1x2_t"
+      "value": "float64x2x2_t"
     },
     "Arguments_Preparation": {
       "lane": {
         "minimum": 0,
-        "maximum": 0
+        "maximum": 1
       },
       "ptr": {
         "register": "Xn"
       },
-      "src": {
-        "register": "Vt2.1D"
+      "src.val[0]": {
+        "register": "Vt.2D"
+      },
+      "src.val[1]": {
+        "register": "Vt2.2D"
       }
     },
     "Architectures": [
@@ -39671,25 +236666,28 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld2_lane_p16",
+    "name": "vld2q_lane_p16",
     "arguments": [
       "poly16_t const * ptr",
-      "poly16x4x2_t src",
+      "poly16x8x2_t src",
       "const int lane"
     ],
     "return_type": {
-      "value": "poly16x4x2_t"
+      "value": "poly16x8x2_t"
     },
     "Arguments_Preparation": {
       "lane": {
         "minimum": 0,
-        "maximum": 3
+        "maximum": 7
       },
       "ptr": {
         "register": "Xn"
       },
-      "src": {
-        "register": "Vt2.4H"
+      "src.val[0]": {
+        "register": "Vt.8H"
+      },
+      "src.val[1]": {
+        "register": "Vt2.8H"
       }
     },
     "Architectures": [
@@ -39705,25 +236703,28 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld2_lane_p64",
+    "name": "vld2q_lane_p64",
     "arguments": [
       "poly64_t const * ptr",
-      "poly64x1x2_t src",
+      "poly64x2x2_t src",
       "const int lane"
     ],
     "return_type": {
-      "value": "poly64x1x2_t"
+      "value": "poly64x2x2_t"
     },
     "Arguments_Preparation": {
       "lane": {
         "minimum": 0,
-        "maximum": 0
+        "maximum": 1
       },
       "ptr": {
         "register": "Xn"
       },
-      "src": {
-        "register": "Vt2.1D"
+      "src.val[0]": {
+        "register": "Vt.2D"
+      },
+      "src.val[1]": {
+        "register": "Vt2.2D"
       }
     },
     "Architectures": [
@@ -39737,30 +236738,31 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld2_lane_p8",
+    "name": "vld2q_lane_p8",
     "arguments": [
       "poly8_t const * ptr",
-      "poly8x8x2_t src",
+      "poly8x16x2_t src",
       "const int lane"
     ],
     "return_type": {
-      "value": "poly8x8x2_t"
+      "value": "poly8x16x2_t"
     },
     "Arguments_Preparation": {
       "lane": {
         "minimum": 0,
-        "maximum": 7
+        "maximum": 15
       },
       "ptr": {
         "register": "Xn"
       },
-      "src": {
-        "register": "Vt2.8B"
+      "src.val[0]": {
+        "register": "Vt.16B"
+      },
+      "src.val[1]": {
+        "register": "Vt2.16B"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
@@ -39771,25 +236773,28 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld2_lane_s16",
+    "name": "vld2q_lane_s16",
     "arguments": [
       "int16_t const * ptr",
-      "int16x4x2_t src",
+      "int16x8x2_t src",
       "const int lane"
     ],
     "return_type": {
-      "value": "int16x4x2_t"
+      "value": "int16x8x2_t"
     },
     "Arguments_Preparation": {
       "lane": {
         "minimum": 0,
-        "maximum": 3
+        "maximum": 7
       },
       "ptr": {
         "register": "Xn"
       },
-      "src": {
-        "register": "Vt2.4H"
+      "src.val[0]": {
+        "register": "Vt.8H"
+      },
+      "src.val[1]": {
+        "register": "Vt2.8H"
       }
     },
     "Architectures": [
@@ -39805,25 +236810,28 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld2_lane_s32",
+    "name": "vld2q_lane_s32",
     "arguments": [
       "int32_t const * ptr",
-      "int32x2x2_t src",
+      "int32x4x2_t src",
       "const int lane"
     ],
     "return_type": {
-      "value": "int32x2x2_t"
+      "value": "int32x4x2_t"
     },
     "Arguments_Preparation": {
       "lane": {
         "minimum": 0,
-        "maximum": 1
+        "maximum": 3
       },
       "ptr": {
         "register": "Xn"
       },
-      "src": {
-        "register": "Vt2.2S"
+      "src.val[0]": {
+        "register": "Vt.4S"
+      },
+      "src.val[1]": {
+        "register": "Vt2.4S"
       }
     },
     "Architectures": [
@@ -39839,25 +236847,28 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld2_lane_s64",
+    "name": "vld2q_lane_s64",
     "arguments": [
       "int64_t const * ptr",
-      "int64x1x2_t src",
+      "int64x2x2_t src",
       "const int lane"
     ],
     "return_type": {
-      "value": "int64x1x2_t"
+      "value": "int64x2x2_t"
     },
     "Arguments_Preparation": {
       "lane": {
         "minimum": 0,
-        "maximum": 0
+        "maximum": 1
       },
       "ptr": {
         "register": "Xn"
       },
-      "src": {
-        "register": "Vt2.1D"
+      "src.val[0]": {
+        "register": "Vt.2D"
+      },
+      "src.val[1]": {
+        "register": "Vt2.2D"
       }
     },
     "Architectures": [
@@ -39871,30 +236882,31 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld2_lane_s8",
+    "name": "vld2q_lane_s8",
     "arguments": [
       "int8_t const * ptr",
-      "int8x8x2_t src",
+      "int8x16x2_t src",
       "const int lane"
     ],
     "return_type": {
-      "value": "int8x8x2_t"
+      "value": "int8x16x2_t"
     },
     "Arguments_Preparation": {
       "lane": {
         "minimum": 0,
-        "maximum": 7
+        "maximum": 15
       },
       "ptr": {
         "register": "Xn"
       },
-      "src": {
-        "register": "Vt2.8B"
+      "src.val[0]": {
+        "register": "Vt.16B"
+      },
+      "src.val[1]": {
+        "register": "Vt2.16B"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
@@ -39905,25 +236917,28 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld2_lane_u16",
+    "name": "vld2q_lane_u16",
     "arguments": [
       "uint16_t const * ptr",
-      "uint16x4x2_t src",
+      "uint16x8x2_t src",
       "const int lane"
     ],
     "return_type": {
-      "value": "uint16x4x2_t"
+      "value": "uint16x8x2_t"
     },
     "Arguments_Preparation": {
       "lane": {
         "minimum": 0,
-        "maximum": 3
+        "maximum": 7
       },
       "ptr": {
         "register": "Xn"
       },
-      "src": {
-        "register": "Vt2.4H"
+      "src.val[0]": {
+        "register": "Vt.8H"
+      },
+      "src.val[1]": {
+        "register": "Vt2.8H"
       }
     },
     "Architectures": [
@@ -39939,25 +236954,28 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld2_lane_u32",
+    "name": "vld2q_lane_u32",
     "arguments": [
       "uint32_t const * ptr",
-      "uint32x2x2_t src",
+      "uint32x4x2_t src",
       "const int lane"
     ],
     "return_type": {
-      "value": "uint32x2x2_t"
+      "value": "uint32x4x2_t"
     },
     "Arguments_Preparation": {
       "lane": {
         "minimum": 0,
-        "maximum": 1
+        "maximum": 3
       },
       "ptr": {
         "register": "Xn"
       },
-      "src": {
-        "register": "Vt2.2S"
+      "src.val[0]": {
+        "register": "Vt.4S"
+      },
+      "src.val[1]": {
+        "register": "Vt2.4S"
       }
     },
     "Architectures": [
@@ -39973,25 +236991,28 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld2_lane_u64",
+    "name": "vld2q_lane_u64",
     "arguments": [
       "uint64_t const * ptr",
-      "uint64x1x2_t src",
+      "uint64x2x2_t src",
       "const int lane"
     ],
     "return_type": {
-      "value": "uint64x1x2_t"
+      "value": "uint64x2x2_t"
     },
     "Arguments_Preparation": {
       "lane": {
         "minimum": 0,
-        "maximum": 0
+        "maximum": 1
       },
       "ptr": {
         "register": "Xn"
       },
-      "src": {
-        "register": "Vt2.1D"
+      "src.val[0]": {
+        "register": "Vt.2D"
+      },
+      "src.val[1]": {
+        "register": "Vt2.2D"
       }
     },
     "Architectures": [
@@ -40005,30 +237026,31 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld2_lane_u8",
+    "name": "vld2q_lane_u8",
     "arguments": [
       "uint8_t const * ptr",
-      "uint8x8x2_t src",
+      "uint8x16x2_t src",
       "const int lane"
     ],
     "return_type": {
-      "value": "uint8x8x2_t"
+      "value": "uint8x16x2_t"
     },
     "Arguments_Preparation": {
       "lane": {
         "minimum": 0,
-        "maximum": 7
+        "maximum": 15
       },
       "ptr": {
         "register": "Xn"
       },
-      "src": {
-        "register": "Vt2.8B"
+      "src.val[0]": {
+        "register": "Vt.16B"
+      },
+      "src.val[1]": {
+        "register": "Vt2.16B"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
@@ -40039,12 +237061,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld2_p16",
+    "name": "vld2q_p16",
     "arguments": [
       "poly16_t const * ptr"
     ],
     "return_type": {
-      "value": "poly16x4x2_t"
+      "value": "poly16x8x2_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -40064,12 +237086,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld2_p64",
+    "name": "vld2q_p64",
     "arguments": [
       "poly64_t const * ptr"
     ],
     "return_type": {
-      "value": "poly64x1x2_t"
+      "value": "poly64x2x2_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -40077,23 +237099,22 @@
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "LD1"
+        "LD2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld2_p8",
+    "name": "vld2q_p8",
     "arguments": [
       "poly8_t const * ptr"
     ],
     "return_type": {
-      "value": "poly8x8x2_t"
+      "value": "poly8x16x2_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -40113,12 +237134,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld2_s16",
+    "name": "vld2q_s16",
     "arguments": [
       "int16_t const * ptr"
     ],
     "return_type": {
-      "value": "int16x4x2_t"
+      "value": "int16x8x2_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -40138,12 +237159,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld2_s32",
+    "name": "vld2q_s32",
     "arguments": [
       "int32_t const * ptr"
     ],
     "return_type": {
-      "value": "int32x2x2_t"
+      "value": "int32x4x2_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -40163,12 +237184,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld2_s64",
+    "name": "vld2q_s64",
     "arguments": [
       "int64_t const * ptr"
     ],
     "return_type": {
-      "value": "int64x1x2_t"
+      "value": "int64x2x2_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -40176,24 +237197,22 @@
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "LD1"
+        "LD2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld2_s8",
+    "name": "vld2q_s8",
     "arguments": [
       "int8_t const * ptr"
     ],
     "return_type": {
-      "value": "int8x8x2_t"
+      "value": "int8x16x2_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -40213,12 +237232,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld2_u16",
+    "name": "vld2q_u16",
     "arguments": [
       "uint16_t const * ptr"
     ],
     "return_type": {
-      "value": "uint16x4x2_t"
+      "value": "uint16x8x2_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -40238,12 +237257,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld2_u32",
+    "name": "vld2q_u32",
     "arguments": [
       "uint32_t const * ptr"
     ],
     "return_type": {
-      "value": "uint32x2x2_t"
+      "value": "uint32x4x2_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -40263,12 +237282,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld2_u64",
+    "name": "vld2q_u64",
     "arguments": [
       "uint64_t const * ptr"
     ],
     "return_type": {
-      "value": "uint64x1x2_t"
+      "value": "uint64x2x2_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -40276,24 +237295,22 @@
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "LD1"
+        "LD2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld2_u8",
+    "name": "vld2q_u8",
     "arguments": [
       "uint8_t const * ptr"
     ],
     "return_type": {
-      "value": "uint8x8x2_t"
+      "value": "uint8x16x2_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -40313,12 +237330,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld2q_dup_f16",
+    "name": "vld3_dup_f16",
     "arguments": [
       "float16_t const * ptr"
     ],
     "return_type": {
-      "value": "float16x8x2_t"
+      "value": "float16x4x3_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -40332,18 +237349,18 @@
     ],
     "instructions": [
       [
-        "LD2R"
+        "LD3R"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld2q_dup_f32",
+    "name": "vld3_dup_f32",
     "arguments": [
       "float32_t const * ptr"
     ],
     "return_type": {
-      "value": "float32x4x2_t"
+      "value": "float32x2x3_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -40357,18 +237374,18 @@
     ],
     "instructions": [
       [
-        "LD2R"
+        "LD3R"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld2q_dup_f64",
+    "name": "vld3_dup_f64",
     "arguments": [
       "float64_t const * ptr"
     ],
     "return_type": {
-      "value": "float64x2x2_t"
+      "value": "float64x1x3_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -40380,18 +237397,18 @@
     ],
     "instructions": [
       [
-        "LD2R"
+        "LD3R"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld2q_dup_p16",
+    "name": "vld3_dup_p16",
     "arguments": [
       "poly16_t const * ptr"
     ],
     "return_type": {
-      "value": "poly16x8x2_t"
+      "value": "poly16x4x3_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -40405,18 +237422,18 @@
     ],
     "instructions": [
       [
-        "LD2R"
+        "LD3R"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld2q_dup_p64",
+    "name": "vld3_dup_p64",
     "arguments": [
       "poly64_t const * ptr"
     ],
     "return_type": {
-      "value": "poly64x2x2_t"
+      "value": "poly64x1x3_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -40424,22 +237441,23 @@
       }
     },
     "Architectures": [
+      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "LD2R"
+        "LD3R"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld2q_dup_p8",
+    "name": "vld3_dup_p8",
     "arguments": [
       "poly8_t const * ptr"
     ],
     "return_type": {
-      "value": "poly8x16x2_t"
+      "value": "poly8x8x3_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -40453,18 +237471,18 @@
     ],
     "instructions": [
       [
-        "LD2R"
+        "LD3R"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld2q_dup_s16",
+    "name": "vld3_dup_s16",
     "arguments": [
       "int16_t const * ptr"
     ],
     "return_type": {
-      "value": "int16x8x2_t"
+      "value": "int16x4x3_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -40478,18 +237496,18 @@
     ],
     "instructions": [
       [
-        "LD2R"
+        "LD3R"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld2q_dup_s32",
+    "name": "vld3_dup_s32",
     "arguments": [
       "int32_t const * ptr"
     ],
     "return_type": {
-      "value": "int32x4x2_t"
+      "value": "int32x2x3_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -40503,18 +237521,18 @@
     ],
     "instructions": [
       [
-        "LD2R"
+        "LD3R"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld2q_dup_s64",
+    "name": "vld3_dup_s64",
     "arguments": [
       "int64_t const * ptr"
     ],
     "return_type": {
-      "value": "int64x2x2_t"
+      "value": "int64x1x3_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -40522,22 +237540,24 @@
       }
     },
     "Architectures": [
+      "v7",
+      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "LD2R"
+        "LD3R"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld2q_dup_s8",
+    "name": "vld3_dup_s8",
     "arguments": [
       "int8_t const * ptr"
     ],
     "return_type": {
-      "value": "int8x16x2_t"
+      "value": "int8x8x3_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -40551,18 +237571,18 @@
     ],
     "instructions": [
       [
-        "LD2R"
+        "LD3R"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld2q_dup_u16",
+    "name": "vld3_dup_u16",
     "arguments": [
       "uint16_t const * ptr"
     ],
     "return_type": {
-      "value": "uint16x8x2_t"
+      "value": "uint16x4x3_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -40576,18 +237596,18 @@
     ],
     "instructions": [
       [
-        "LD2R"
+        "LD3R"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld2q_dup_u32",
+    "name": "vld3_dup_u32",
     "arguments": [
       "uint32_t const * ptr"
     ],
     "return_type": {
-      "value": "uint32x4x2_t"
+      "value": "uint32x2x3_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -40601,18 +237621,18 @@
     ],
     "instructions": [
       [
-        "LD2R"
+        "LD3R"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld2q_dup_u64",
+    "name": "vld3_dup_u64",
     "arguments": [
       "uint64_t const * ptr"
     ],
     "return_type": {
-      "value": "uint64x2x2_t"
+      "value": "uint64x1x3_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -40620,22 +237640,24 @@
       }
     },
     "Architectures": [
+      "v7",
+      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "LD2R"
+        "LD3R"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld2q_dup_u8",
+    "name": "vld3_dup_u8",
     "arguments": [
       "uint8_t const * ptr"
     ],
     "return_type": {
-      "value": "uint8x16x2_t"
+      "value": "uint8x8x3_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -40649,18 +237671,18 @@
     ],
     "instructions": [
       [
-        "LD2R"
+        "LD3R"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld2q_f16",
+    "name": "vld3_f16",
     "arguments": [
       "float16_t const * ptr"
     ],
     "return_type": {
-      "value": "float16x8x2_t"
+      "value": "float16x4x3_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -40674,18 +237696,18 @@
     ],
     "instructions": [
       [
-        "LD2"
+        "LD3"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld2q_f32",
+    "name": "vld3_f32",
     "arguments": [
       "float32_t const * ptr"
     ],
     "return_type": {
-      "value": "float32x4x2_t"
+      "value": "float32x2x3_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -40699,18 +237721,18 @@
     ],
     "instructions": [
       [
-        "LD2"
+        "LD3"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld2q_f64",
+    "name": "vld3_f64",
     "arguments": [
       "float64_t const * ptr"
     ],
     "return_type": {
-      "value": "float64x2x2_t"
+      "value": "float64x1x3_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -40722,31 +237744,37 @@
     ],
     "instructions": [
       [
-        "LD2"
+        "LD1"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld2q_lane_f16",
+    "name": "vld3_lane_f16",
     "arguments": [
       "float16_t const * ptr",
-      "float16x8x2_t src",
+      "float16x4x3_t src",
       "const int lane"
     ],
     "return_type": {
-      "value": "float16x8x2_t"
+      "value": "float16x4x3_t"
     },
     "Arguments_Preparation": {
       "lane": {
         "minimum": 0,
-        "maximum": 7
+        "maximum": 3
       },
       "ptr": {
         "register": "Xn"
       },
-      "src": {
-        "register": "Vt2.8H"
+      "src.val[0]": {
+        "register": "Vt.4H"
+      },
+      "src.val[1]": {
+        "register": "Vt2.4H"
+      },
+      "src.val[2]": {
+        "register": "Vt3.4H"
       }
     },
     "Architectures": [
@@ -40756,31 +237784,37 @@
     ],
     "instructions": [
       [
-        "LD2"
+        "LD3"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld2q_lane_f32",
+    "name": "vld3_lane_f32",
     "arguments": [
       "float32_t const * ptr",
-      "float32x4x2_t src",
+      "float32x2x3_t src",
       "const int lane"
     ],
     "return_type": {
-      "value": "float32x4x2_t"
+      "value": "float32x2x3_t"
     },
     "Arguments_Preparation": {
       "lane": {
         "minimum": 0,
-        "maximum": 3
+        "maximum": 1
       },
       "ptr": {
         "register": "Xn"
       },
-      "src": {
-        "register": "Vt2.4S"
+      "src.val[0]": {
+        "register": "Vt.2S"
+      },
+      "src.val[1]": {
+        "register": "Vt2.2S"
+      },
+      "src.val[2]": {
+        "register": "Vt3.2S"
       }
     },
     "Architectures": [
@@ -40790,31 +237824,37 @@
     ],
     "instructions": [
       [
-        "LD2"
+        "LD3"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld2q_lane_f64",
+    "name": "vld3_lane_f64",
     "arguments": [
       "float64_t const * ptr",
-      "float64x2x2_t src",
+      "float64x1x3_t src",
       "const int lane"
     ],
     "return_type": {
-      "value": "float64x2x2_t"
+      "value": "float64x1x3_t"
     },
     "Arguments_Preparation": {
       "lane": {
         "minimum": 0,
-        "maximum": 1
+        "maximum": 0
       },
       "ptr": {
         "register": "Xn"
       },
-      "src": {
-        "register": "Vt2.2D"
+      "src.val[0]": {
+        "register": "Vt.1D"
+      },
+      "src.val[1]": {
+        "register": "Vt2.1D"
+      },
+      "src.val[2]": {
+        "register": "Vt3.1D"
       }
     },
     "Architectures": [
@@ -40822,31 +237862,37 @@
     ],
     "instructions": [
       [
-        "LD2"
+        "LD3"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld2q_lane_p16",
+    "name": "vld3_lane_p16",
     "arguments": [
       "poly16_t const * ptr",
-      "poly16x8x2_t src",
+      "poly16x4x3_t src",
       "const int lane"
     ],
     "return_type": {
-      "value": "poly16x8x2_t"
+      "value": "poly16x4x3_t"
     },
     "Arguments_Preparation": {
       "lane": {
         "minimum": 0,
-        "maximum": 7
+        "maximum": 3
       },
       "ptr": {
         "register": "Xn"
       },
-      "src": {
-        "register": "Vt2.8H"
+      "src.val[0]": {
+        "register": "Vt.4H"
+      },
+      "src.val[1]": {
+        "register": "Vt2.4H"
+      },
+      "src.val[2]": {
+        "register": "Vt3.4H"
       }
     },
     "Architectures": [
@@ -40856,31 +237902,37 @@
     ],
     "instructions": [
       [
-        "LD2"
+        "LD3"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld2q_lane_p64",
+    "name": "vld3_lane_p64",
     "arguments": [
       "poly64_t const * ptr",
-      "poly64x2x2_t src",
+      "poly64x1x3_t src",
       "const int lane"
     ],
     "return_type": {
-      "value": "poly64x2x2_t"
+      "value": "poly64x1x3_t"
     },
     "Arguments_Preparation": {
       "lane": {
         "minimum": 0,
-        "maximum": 1
+        "maximum": 0
       },
       "ptr": {
         "register": "Xn"
       },
-      "src": {
-        "register": "Vt2.2D"
+      "src.val[0]": {
+        "register": "Vt.1D"
+      },
+      "src.val[1]": {
+        "register": "Vt2.1D"
+      },
+      "src.val[2]": {
+        "register": "Vt3.1D"
       }
     },
     "Architectures": [
@@ -40888,63 +237940,77 @@
     ],
     "instructions": [
       [
-        "LD2"
+        "LD3"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld2q_lane_p8",
+    "name": "vld3_lane_p8",
     "arguments": [
       "poly8_t const * ptr",
-      "poly8x16x2_t src",
+      "poly8x8x3_t src",
       "const int lane"
     ],
     "return_type": {
-      "value": "poly8x16x2_t"
+      "value": "poly8x8x3_t"
     },
     "Arguments_Preparation": {
       "lane": {
         "minimum": 0,
-        "maximum": 15
+        "maximum": 7
       },
       "ptr": {
         "register": "Xn"
       },
-      "src": {
-        "register": "Vt2.16B"
+      "src.val[0]": {
+        "register": "Vt.8B"
+      },
+      "src.val[1]": {
+        "register": "Vt2.8B"
+      },
+      "src.val[2]": {
+        "register": "Vt3.8B"
       }
     },
     "Architectures": [
+      "v7",
+      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "LD2"
+        "LD3"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld2q_lane_s16",
+    "name": "vld3_lane_s16",
     "arguments": [
       "int16_t const * ptr",
-      "int16x8x2_t src",
+      "int16x4x3_t src",
       "const int lane"
     ],
     "return_type": {
-      "value": "int16x8x2_t"
+      "value": "int16x4x3_t"
     },
     "Arguments_Preparation": {
       "lane": {
         "minimum": 0,
-        "maximum": 7
+        "maximum": 3
       },
       "ptr": {
         "register": "Xn"
       },
-      "src": {
-        "register": "Vt2.8H"
+      "src.val[0]": {
+        "register": "Vt.4H"
+      },
+      "src.val[1]": {
+        "register": "Vt2.4H"
+      },
+      "src.val[2]": {
+        "register": "Vt3.4H"
       }
     },
     "Architectures": [
@@ -40954,31 +238020,37 @@
     ],
     "instructions": [
       [
-        "LD2"
+        "LD3"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld2q_lane_s32",
+    "name": "vld3_lane_s32",
     "arguments": [
       "int32_t const * ptr",
-      "int32x4x2_t src",
+      "int32x2x3_t src",
       "const int lane"
     ],
     "return_type": {
-      "value": "int32x4x2_t"
+      "value": "int32x2x3_t"
     },
     "Arguments_Preparation": {
       "lane": {
         "minimum": 0,
-        "maximum": 3
+        "maximum": 1
       },
       "ptr": {
         "register": "Xn"
       },
-      "src": {
-        "register": "Vt2.4S"
+      "src.val[0]": {
+        "register": "Vt.2S"
+      },
+      "src.val[1]": {
+        "register": "Vt2.2S"
+      },
+      "src.val[2]": {
+        "register": "Vt3.2S"
       }
     },
     "Architectures": [
@@ -40988,31 +238060,37 @@
     ],
     "instructions": [
       [
-        "LD2"
+        "LD3"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld2q_lane_s64",
+    "name": "vld3_lane_s64",
     "arguments": [
       "int64_t const * ptr",
-      "int64x2x2_t src",
+      "int64x1x3_t src",
       "const int lane"
     ],
     "return_type": {
-      "value": "int64x2x2_t"
+      "value": "int64x1x3_t"
     },
     "Arguments_Preparation": {
       "lane": {
         "minimum": 0,
-        "maximum": 1
+        "maximum": 0
       },
       "ptr": {
         "register": "Xn"
       },
-      "src": {
-        "register": "Vt2.2D"
+      "src.val[0]": {
+        "register": "Vt.1D"
+      },
+      "src.val[1]": {
+        "register": "Vt2.1D"
+      },
+      "src.val[2]": {
+        "register": "Vt3.1D"
       }
     },
     "Architectures": [
@@ -41020,63 +238098,77 @@
     ],
     "instructions": [
       [
-        "LD2"
+        "LD3"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld2q_lane_s8",
+    "name": "vld3_lane_s8",
     "arguments": [
       "int8_t const * ptr",
-      "int8x16x2_t src",
+      "int8x8x3_t src",
       "const int lane"
     ],
     "return_type": {
-      "value": "int8x16x2_t"
+      "value": "int8x8x3_t"
     },
     "Arguments_Preparation": {
       "lane": {
         "minimum": 0,
-        "maximum": 15
+        "maximum": 7
       },
       "ptr": {
         "register": "Xn"
       },
-      "src": {
-        "register": "Vt2.16B"
+      "src.val[0]": {
+        "register": "Vt.8B"
+      },
+      "src.val[1]": {
+        "register": "Vt2.8B"
+      },
+      "src.val[2]": {
+        "register": "Vt3.8B"
       }
     },
     "Architectures": [
+      "v7",
+      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "LD2"
+        "LD3"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld2q_lane_u16",
+    "name": "vld3_lane_u16",
     "arguments": [
       "uint16_t const * ptr",
-      "uint16x8x2_t src",
+      "uint16x4x3_t src",
       "const int lane"
     ],
     "return_type": {
-      "value": "uint16x8x2_t"
+      "value": "uint16x4x3_t"
     },
     "Arguments_Preparation": {
       "lane": {
         "minimum": 0,
-        "maximum": 7
+        "maximum": 3
       },
       "ptr": {
         "register": "Xn"
       },
-      "src": {
-        "register": "Vt2.8H"
+      "src.val[0]": {
+        "register": "Vt.4H"
+      },
+      "src.val[1]": {
+        "register": "Vt2.4H"
+      },
+      "src.val[2]": {
+        "register": "Vt3.4H"
       }
     },
     "Architectures": [
@@ -41086,31 +238178,37 @@
     ],
     "instructions": [
       [
-        "LD2"
+        "LD3"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld2q_lane_u32",
+    "name": "vld3_lane_u32",
     "arguments": [
       "uint32_t const * ptr",
-      "uint32x4x2_t src",
+      "uint32x2x3_t src",
       "const int lane"
     ],
     "return_type": {
-      "value": "uint32x4x2_t"
+      "value": "uint32x2x3_t"
     },
     "Arguments_Preparation": {
       "lane": {
         "minimum": 0,
-        "maximum": 3
+        "maximum": 1
       },
       "ptr": {
         "register": "Xn"
       },
-      "src": {
-        "register": "Vt2.4S"
+      "src.val[0]": {
+        "register": "Vt.2S"
+      },
+      "src.val[1]": {
+        "register": "Vt2.2S"
+      },
+      "src.val[2]": {
+        "register": "Vt3.2S"
       }
     },
     "Architectures": [
@@ -41120,31 +238218,37 @@
     ],
     "instructions": [
       [
-        "LD2"
+        "LD3"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld2q_lane_u64",
+    "name": "vld3_lane_u64",
     "arguments": [
       "uint64_t const * ptr",
-      "uint64x2x2_t src",
+      "uint64x1x3_t src",
       "const int lane"
     ],
     "return_type": {
-      "value": "uint64x2x2_t"
+      "value": "uint64x1x3_t"
     },
     "Arguments_Preparation": {
       "lane": {
         "minimum": 0,
-        "maximum": 1
+        "maximum": 0
       },
       "ptr": {
         "register": "Xn"
       },
-      "src": {
-        "register": "Vt2.2D"
+      "src.val[0]": {
+        "register": "Vt.1D"
+      },
+      "src.val[1]": {
+        "register": "Vt2.1D"
+      },
+      "src.val[2]": {
+        "register": "Vt3.1D"
       }
     },
     "Architectures": [
@@ -41152,50 +238256,58 @@
     ],
     "instructions": [
       [
-        "LD2"
+        "LD3"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld2q_lane_u8",
+    "name": "vld3_lane_u8",
     "arguments": [
       "uint8_t const * ptr",
-      "uint8x16x2_t src",
+      "uint8x8x3_t src",
       "const int lane"
     ],
     "return_type": {
-      "value": "uint8x16x2_t"
+      "value": "uint8x8x3_t"
     },
     "Arguments_Preparation": {
       "lane": {
         "minimum": 0,
-        "maximum": 15
+        "maximum": 7
       },
       "ptr": {
         "register": "Xn"
       },
-      "src": {
-        "register": "Vt2.16B"
+      "src.val[0]": {
+        "register": "Vt.8B"
+      },
+      "src.val[1]": {
+        "register": "Vt2.8B"
+      },
+      "src.val[2]": {
+        "register": "Vt3.8B"
       }
     },
     "Architectures": [
+      "v7",
+      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "LD2"
+        "LD3"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld2q_p16",
+    "name": "vld3_p16",
     "arguments": [
       "poly16_t const * ptr"
     ],
     "return_type": {
-      "value": "poly16x8x2_t"
+      "value": "poly16x4x3_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -41209,18 +238321,18 @@
     ],
     "instructions": [
       [
-        "LD2"
+        "LD3"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld2q_p64",
+    "name": "vld3_p64",
     "arguments": [
       "poly64_t const * ptr"
     ],
     "return_type": {
-      "value": "poly64x2x2_t"
+      "value": "poly64x1x3_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -41228,22 +238340,23 @@
       }
     },
     "Architectures": [
+      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "LD2"
+        "LD1"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld2q_p8",
+    "name": "vld3_p8",
     "arguments": [
       "poly8_t const * ptr"
     ],
     "return_type": {
-      "value": "poly8x16x2_t"
+      "value": "poly8x8x3_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -41257,18 +238370,18 @@
     ],
     "instructions": [
       [
-        "LD2"
+        "LD3"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld2q_s16",
+    "name": "vld3_s16",
     "arguments": [
       "int16_t const * ptr"
     ],
     "return_type": {
-      "value": "int16x8x2_t"
+      "value": "int16x4x3_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -41282,18 +238395,18 @@
     ],
     "instructions": [
       [
-        "LD2"
+        "LD3"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld2q_s32",
+    "name": "vld3_s32",
     "arguments": [
       "int32_t const * ptr"
     ],
     "return_type": {
-      "value": "int32x4x2_t"
+      "value": "int32x2x3_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -41307,18 +238420,18 @@
     ],
     "instructions": [
       [
-        "LD2"
+        "LD3"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld2q_s64",
+    "name": "vld3_s64",
     "arguments": [
       "int64_t const * ptr"
     ],
     "return_type": {
-      "value": "int64x2x2_t"
+      "value": "int64x1x3_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -41326,22 +238439,24 @@
       }
     },
     "Architectures": [
+      "v7",
+      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "LD2"
+        "LD1"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld2q_s8",
+    "name": "vld3_s8",
     "arguments": [
       "int8_t const * ptr"
     ],
     "return_type": {
-      "value": "int8x16x2_t"
+      "value": "int8x8x3_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -41355,18 +238470,18 @@
     ],
     "instructions": [
       [
-        "LD2"
+        "LD3"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld2q_u16",
+    "name": "vld3_u16",
     "arguments": [
       "uint16_t const * ptr"
     ],
     "return_type": {
-      "value": "uint16x8x2_t"
+      "value": "uint16x4x3_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -41380,18 +238495,18 @@
     ],
     "instructions": [
       [
-        "LD2"
+        "LD3"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld2q_u32",
+    "name": "vld3_u32",
     "arguments": [
       "uint32_t const * ptr"
     ],
     "return_type": {
-      "value": "uint32x4x2_t"
+      "value": "uint32x2x3_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -41405,18 +238520,18 @@
     ],
     "instructions": [
       [
-        "LD2"
+        "LD3"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld2q_u64",
+    "name": "vld3_u64",
     "arguments": [
       "uint64_t const * ptr"
     ],
     "return_type": {
-      "value": "uint64x2x2_t"
+      "value": "uint64x1x3_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -41424,22 +238539,24 @@
       }
     },
     "Architectures": [
+      "v7",
+      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "LD2"
+        "LD1"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld2q_u8",
+    "name": "vld3_u8",
     "arguments": [
       "uint8_t const * ptr"
     ],
     "return_type": {
-      "value": "uint8x16x2_t"
+      "value": "uint8x8x3_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -41453,18 +238570,18 @@
     ],
     "instructions": [
       [
-        "LD2"
+        "LD3"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld3_dup_f16",
+    "name": "vld3q_dup_f16",
     "arguments": [
       "float16_t const * ptr"
     ],
     "return_type": {
-      "value": "float16x4x3_t"
+      "value": "float16x8x3_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -41484,12 +238601,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld3_dup_f32",
+    "name": "vld3q_dup_f32",
     "arguments": [
       "float32_t const * ptr"
     ],
     "return_type": {
-      "value": "float32x2x3_t"
+      "value": "float32x4x3_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -41509,12 +238626,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld3_dup_f64",
+    "name": "vld3q_dup_f64",
     "arguments": [
       "float64_t const * ptr"
     ],
     "return_type": {
-      "value": "float64x1x3_t"
+      "value": "float64x2x3_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -41532,12 +238649,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld3_dup_p16",
+    "name": "vld3q_dup_p16",
     "arguments": [
       "poly16_t const * ptr"
     ],
     "return_type": {
-      "value": "poly16x4x3_t"
+      "value": "poly16x8x3_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -41557,12 +238674,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld3_dup_p64",
+    "name": "vld3q_dup_p64",
     "arguments": [
       "poly64_t const * ptr"
     ],
     "return_type": {
-      "value": "poly64x1x3_t"
+      "value": "poly64x2x3_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -41570,7 +238687,6 @@
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
@@ -41581,12 +238697,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld3_dup_p8",
+    "name": "vld3q_dup_p8",
     "arguments": [
       "poly8_t const * ptr"
     ],
     "return_type": {
-      "value": "poly8x8x3_t"
+      "value": "poly8x16x3_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -41606,12 +238722,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld3_dup_s16",
+    "name": "vld3q_dup_s16",
     "arguments": [
       "int16_t const * ptr"
     ],
     "return_type": {
-      "value": "int16x4x3_t"
+      "value": "int16x8x3_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -41631,12 +238747,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld3_dup_s32",
+    "name": "vld3q_dup_s32",
     "arguments": [
       "int32_t const * ptr"
     ],
     "return_type": {
-      "value": "int32x2x3_t"
+      "value": "int32x4x3_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -41656,12 +238772,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld3_dup_s64",
+    "name": "vld3q_dup_s64",
     "arguments": [
       "int64_t const * ptr"
     ],
     "return_type": {
-      "value": "int64x1x3_t"
+      "value": "int64x2x3_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -41669,8 +238785,6 @@
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
@@ -41681,12 +238795,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld3_dup_s8",
+    "name": "vld3q_dup_s8",
     "arguments": [
       "int8_t const * ptr"
     ],
     "return_type": {
-      "value": "int8x8x3_t"
+      "value": "int8x16x3_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -41706,12 +238820,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld3_dup_u16",
+    "name": "vld3q_dup_u16",
     "arguments": [
       "uint16_t const * ptr"
     ],
     "return_type": {
-      "value": "uint16x4x3_t"
+      "value": "uint16x8x3_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -41731,12 +238845,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld3_dup_u32",
+    "name": "vld3q_dup_u32",
     "arguments": [
       "uint32_t const * ptr"
     ],
     "return_type": {
-      "value": "uint32x2x3_t"
+      "value": "uint32x4x3_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -41756,12 +238870,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld3_dup_u64",
+    "name": "vld3q_dup_u64",
     "arguments": [
       "uint64_t const * ptr"
     ],
     "return_type": {
-      "value": "uint64x1x3_t"
+      "value": "uint64x2x3_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -41769,8 +238883,6 @@
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
@@ -41781,12 +238893,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld3_dup_u8",
+    "name": "vld3q_dup_u8",
     "arguments": [
       "uint8_t const * ptr"
     ],
     "return_type": {
-      "value": "uint8x8x3_t"
+      "value": "uint8x16x3_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -41806,12 +238918,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld3_f16",
+    "name": "vld3q_f16",
     "arguments": [
       "float16_t const * ptr"
     ],
     "return_type": {
-      "value": "float16x4x3_t"
+      "value": "float16x8x3_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -41831,12 +238943,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld3_f32",
+    "name": "vld3q_f32",
     "arguments": [
       "float32_t const * ptr"
     ],
     "return_type": {
-      "value": "float32x2x3_t"
+      "value": "float32x4x3_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -41856,12 +238968,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld3_f64",
+    "name": "vld3q_f64",
     "arguments": [
       "float64_t const * ptr"
     ],
     "return_type": {
-      "value": "float64x1x3_t"
+      "value": "float64x2x3_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -41873,31 +238985,37 @@
     ],
     "instructions": [
       [
-        "LD1"
+        "LD3"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld3_lane_f16",
+    "name": "vld3q_lane_f16",
     "arguments": [
       "float16_t const * ptr",
-      "float16x4x3_t src",
+      "float16x8x3_t src",
       "const int lane"
     ],
     "return_type": {
-      "value": "float16x4x3_t"
+      "value": "float16x8x3_t"
     },
     "Arguments_Preparation": {
       "lane": {
         "minimum": 0,
-        "maximum": 3
+        "maximum": 7
       },
       "ptr": {
         "register": "Xn"
       },
-      "src": {
-        "register": "Vt3.4H"
+      "src.val[0]": {
+        "register": "Vt.8H"
+      },
+      "src.val[1]": {
+        "register": "Vt2.8H"
+      },
+      "src.val[2]": {
+        "register": "Vt3.8H"
       }
     },
     "Architectures": [
@@ -41913,25 +239031,31 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld3_lane_f32",
+    "name": "vld3q_lane_f32",
     "arguments": [
       "float32_t const * ptr",
-      "float32x2x3_t src",
+      "float32x4x3_t src",
       "const int lane"
     ],
     "return_type": {
-      "value": "float32x2x3_t"
+      "value": "float32x4x3_t"
     },
     "Arguments_Preparation": {
       "lane": {
         "minimum": 0,
-        "maximum": 1
+        "maximum": 3
       },
       "ptr": {
         "register": "Xn"
       },
-      "src": {
-        "register": "Vt3.2S"
+      "src.val[0]": {
+        "register": "Vt.4S"
+      },
+      "src.val[1]": {
+        "register": "Vt2.4S"
+      },
+      "src.val[2]": {
+        "register": "Vt3.4S"
       }
     },
     "Architectures": [
@@ -41947,25 +239071,31 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld3_lane_f64",
+    "name": "vld3q_lane_f64",
     "arguments": [
       "float64_t const * ptr",
-      "float64x1x3_t src",
+      "float64x2x3_t src",
       "const int lane"
     ],
     "return_type": {
-      "value": "float64x1x3_t"
+      "value": "float64x2x3_t"
     },
     "Arguments_Preparation": {
       "lane": {
         "minimum": 0,
-        "maximum": 0
+        "maximum": 1
       },
       "ptr": {
         "register": "Xn"
       },
-      "src": {
-        "register": "Vt3.1D"
+      "src.val[0]": {
+        "register": "Vt.2D"
+      },
+      "src.val[1]": {
+        "register": "Vt2.2D"
+      },
+      "src.val[2]": {
+        "register": "Vt3.2D"
       }
     },
     "Architectures": [
@@ -41979,25 +239109,31 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld3_lane_p16",
+    "name": "vld3q_lane_p16",
     "arguments": [
       "poly16_t const * ptr",
-      "poly16x4x3_t src",
+      "poly16x8x3_t src",
       "const int lane"
     ],
     "return_type": {
-      "value": "poly16x4x3_t"
+      "value": "poly16x8x3_t"
     },
     "Arguments_Preparation": {
       "lane": {
         "minimum": 0,
-        "maximum": 3
+        "maximum": 7
       },
       "ptr": {
         "register": "Xn"
       },
-      "src": {
-        "register": "Vt3.4H"
+      "src.val[0]": {
+        "register": "Vt.8H"
+      },
+      "src.val[1]": {
+        "register": "Vt2.8H"
+      },
+      "src.val[2]": {
+        "register": "Vt3.8H"
       }
     },
     "Architectures": [
@@ -42013,25 +239149,31 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld3_lane_p64",
+    "name": "vld3q_lane_p64",
     "arguments": [
       "poly64_t const * ptr",
-      "poly64x1x3_t src",
+      "poly64x2x3_t src",
       "const int lane"
     ],
     "return_type": {
-      "value": "poly64x1x3_t"
+      "value": "poly64x2x3_t"
     },
     "Arguments_Preparation": {
       "lane": {
         "minimum": 0,
-        "maximum": 0
+        "maximum": 1
       },
       "ptr": {
         "register": "Xn"
       },
-      "src": {
-        "register": "Vt3.1D"
+      "src.val[0]": {
+        "register": "Vt.2D"
+      },
+      "src.val[1]": {
+        "register": "Vt2.2D"
+      },
+      "src.val[2]": {
+        "register": "Vt3.2D"
       }
     },
     "Architectures": [
@@ -42045,30 +239187,34 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld3_lane_p8",
+    "name": "vld3q_lane_p8",
     "arguments": [
       "poly8_t const * ptr",
-      "poly8x8x3_t src",
+      "poly8x16x3_t src",
       "const int lane"
     ],
     "return_type": {
-      "value": "poly8x8x3_t"
+      "value": "poly8x16x3_t"
     },
     "Arguments_Preparation": {
       "lane": {
         "minimum": 0,
-        "maximum": 7
+        "maximum": 15
       },
       "ptr": {
         "register": "Xn"
       },
-      "src": {
-        "register": "Vt3.8B"
+      "src.val[0]": {
+        "register": "Vt.16B"
+      },
+      "src.val[1]": {
+        "register": "Vt2.16B"
+      },
+      "src.val[2]": {
+        "register": "Vt3.16B"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
@@ -42079,25 +239225,31 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld3_lane_s16",
+    "name": "vld3q_lane_s16",
     "arguments": [
       "int16_t const * ptr",
-      "int16x4x3_t src",
+      "int16x8x3_t src",
       "const int lane"
     ],
     "return_type": {
-      "value": "int16x4x3_t"
+      "value": "int16x8x3_t"
     },
     "Arguments_Preparation": {
       "lane": {
         "minimum": 0,
-        "maximum": 3
+        "maximum": 7
       },
       "ptr": {
         "register": "Xn"
       },
-      "src": {
-        "register": "Vt3.4H"
+      "src.val[0]": {
+        "register": "Vt.8H"
+      },
+      "src.val[1]": {
+        "register": "Vt2.8H"
+      },
+      "src.val[2]": {
+        "register": "Vt3.8H"
       }
     },
     "Architectures": [
@@ -42113,25 +239265,31 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld3_lane_s32",
+    "name": "vld3q_lane_s32",
     "arguments": [
       "int32_t const * ptr",
-      "int32x2x3_t src",
+      "int32x4x3_t src",
       "const int lane"
     ],
     "return_type": {
-      "value": "int32x2x3_t"
+      "value": "int32x4x3_t"
     },
     "Arguments_Preparation": {
       "lane": {
         "minimum": 0,
-        "maximum": 1
+        "maximum": 3
       },
       "ptr": {
         "register": "Xn"
       },
-      "src": {
-        "register": "Vt3.2S"
+      "src.val[0]": {
+        "register": "Vt.4S"
+      },
+      "src.val[1]": {
+        "register": "Vt2.4S"
+      },
+      "src.val[2]": {
+        "register": "Vt3.4S"
       }
     },
     "Architectures": [
@@ -42147,25 +239305,31 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld3_lane_s64",
+    "name": "vld3q_lane_s64",
     "arguments": [
       "int64_t const * ptr",
-      "int64x1x3_t src",
+      "int64x2x3_t src",
       "const int lane"
     ],
     "return_type": {
-      "value": "int64x1x3_t"
+      "value": "int64x2x3_t"
     },
     "Arguments_Preparation": {
       "lane": {
         "minimum": 0,
-        "maximum": 0
+        "maximum": 1
       },
       "ptr": {
         "register": "Xn"
       },
-      "src": {
-        "register": "Vt3.1D"
+      "src.val[0]": {
+        "register": "Vt.2D"
+      },
+      "src.val[1]": {
+        "register": "Vt2.2D"
+      },
+      "src.val[2]": {
+        "register": "Vt3.2D"
       }
     },
     "Architectures": [
@@ -42179,30 +239343,34 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld3_lane_s8",
+    "name": "vld3q_lane_s8",
     "arguments": [
       "int8_t const * ptr",
-      "int8x8x3_t src",
+      "int8x16x3_t src",
       "const int lane"
     ],
     "return_type": {
-      "value": "int8x8x3_t"
+      "value": "int8x16x3_t"
     },
     "Arguments_Preparation": {
       "lane": {
         "minimum": 0,
-        "maximum": 7
+        "maximum": 15
       },
       "ptr": {
         "register": "Xn"
       },
-      "src": {
-        "register": "Vt3.8B"
+      "src.val[0]": {
+        "register": "Vt.16B"
+      },
+      "src.val[1]": {
+        "register": "Vt2.16B"
+      },
+      "src.val[2]": {
+        "register": "Vt3.16B"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
@@ -42213,25 +239381,31 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld3_lane_u16",
+    "name": "vld3q_lane_u16",
     "arguments": [
       "uint16_t const * ptr",
-      "uint16x4x3_t src",
+      "uint16x8x3_t src",
       "const int lane"
     ],
     "return_type": {
-      "value": "uint16x4x3_t"
+      "value": "uint16x8x3_t"
     },
     "Arguments_Preparation": {
       "lane": {
         "minimum": 0,
-        "maximum": 3
+        "maximum": 7
       },
       "ptr": {
         "register": "Xn"
       },
-      "src": {
-        "register": "Vt3.4H"
+      "src.val[0]": {
+        "register": "Vt.8H"
+      },
+      "src.val[1]": {
+        "register": "Vt2.8H"
+      },
+      "src.val[2]": {
+        "register": "Vt3.8H"
       }
     },
     "Architectures": [
@@ -42247,25 +239421,31 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld3_lane_u32",
+    "name": "vld3q_lane_u32",
     "arguments": [
       "uint32_t const * ptr",
-      "uint32x2x3_t src",
+      "uint32x4x3_t src",
       "const int lane"
     ],
     "return_type": {
-      "value": "uint32x2x3_t"
+      "value": "uint32x4x3_t"
     },
     "Arguments_Preparation": {
       "lane": {
         "minimum": 0,
-        "maximum": 1
+        "maximum": 3
       },
       "ptr": {
         "register": "Xn"
       },
-      "src": {
-        "register": "Vt3.2S"
+      "src.val[0]": {
+        "register": "Vt.4S"
+      },
+      "src.val[1]": {
+        "register": "Vt2.4S"
+      },
+      "src.val[2]": {
+        "register": "Vt3.4S"
       }
     },
     "Architectures": [
@@ -42281,25 +239461,31 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld3_lane_u64",
+    "name": "vld3q_lane_u64",
     "arguments": [
       "uint64_t const * ptr",
-      "uint64x1x3_t src",
+      "uint64x2x3_t src",
       "const int lane"
     ],
     "return_type": {
-      "value": "uint64x1x3_t"
+      "value": "uint64x2x3_t"
     },
     "Arguments_Preparation": {
       "lane": {
         "minimum": 0,
-        "maximum": 0
+        "maximum": 1
       },
       "ptr": {
         "register": "Xn"
       },
-      "src": {
-        "register": "Vt3.1D"
+      "src.val[0]": {
+        "register": "Vt.2D"
+      },
+      "src.val[1]": {
+        "register": "Vt2.2D"
+      },
+      "src.val[2]": {
+        "register": "Vt3.2D"
       }
     },
     "Architectures": [
@@ -42313,30 +239499,34 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld3_lane_u8",
+    "name": "vld3q_lane_u8",
     "arguments": [
       "uint8_t const * ptr",
-      "uint8x8x3_t src",
+      "uint8x16x3_t src",
       "const int lane"
     ],
     "return_type": {
-      "value": "uint8x8x3_t"
+      "value": "uint8x16x3_t"
     },
     "Arguments_Preparation": {
       "lane": {
         "minimum": 0,
-        "maximum": 7
+        "maximum": 15
       },
       "ptr": {
         "register": "Xn"
       },
-      "src": {
-        "register": "Vt3.8B"
+      "src.val[0]": {
+        "register": "Vt.16B"
+      },
+      "src.val[1]": {
+        "register": "Vt2.16B"
+      },
+      "src.val[2]": {
+        "register": "Vt3.16B"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
@@ -42347,12 +239537,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld3_p16",
+    "name": "vld3q_p16",
     "arguments": [
       "poly16_t const * ptr"
     ],
     "return_type": {
-      "value": "poly16x4x3_t"
+      "value": "poly16x8x3_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -42372,12 +239562,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld3_p64",
+    "name": "vld3q_p64",
     "arguments": [
       "poly64_t const * ptr"
     ],
     "return_type": {
-      "value": "poly64x1x3_t"
+      "value": "poly64x2x3_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -42385,23 +239575,22 @@
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "LD1"
+        "LD3"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld3_p8",
+    "name": "vld3q_p8",
     "arguments": [
       "poly8_t const * ptr"
     ],
     "return_type": {
-      "value": "poly8x8x3_t"
+      "value": "poly8x16x3_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -42421,12 +239610,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld3_s16",
+    "name": "vld3q_s16",
     "arguments": [
       "int16_t const * ptr"
     ],
     "return_type": {
-      "value": "int16x4x3_t"
+      "value": "int16x8x3_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -42446,12 +239635,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld3_s32",
+    "name": "vld3q_s32",
     "arguments": [
       "int32_t const * ptr"
     ],
     "return_type": {
-      "value": "int32x2x3_t"
+      "value": "int32x4x3_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -42471,12 +239660,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld3_s64",
+    "name": "vld3q_s64",
     "arguments": [
       "int64_t const * ptr"
     ],
     "return_type": {
-      "value": "int64x1x3_t"
+      "value": "int64x2x3_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -42484,24 +239673,22 @@
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "LD1"
+        "LD3"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld3_s8",
+    "name": "vld3q_s8",
     "arguments": [
       "int8_t const * ptr"
     ],
     "return_type": {
-      "value": "int8x8x3_t"
+      "value": "int8x16x3_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -42521,12 +239708,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld3_u16",
+    "name": "vld3q_u16",
     "arguments": [
       "uint16_t const * ptr"
     ],
     "return_type": {
-      "value": "uint16x4x3_t"
+      "value": "uint16x8x3_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -42546,12 +239733,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld3_u32",
+    "name": "vld3q_u32",
     "arguments": [
       "uint32_t const * ptr"
     ],
     "return_type": {
-      "value": "uint32x2x3_t"
+      "value": "uint32x4x3_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -42571,12 +239758,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld3_u64",
+    "name": "vld3q_u64",
     "arguments": [
       "uint64_t const * ptr"
     ],
     "return_type": {
-      "value": "uint64x1x3_t"
+      "value": "uint64x2x3_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -42584,24 +239771,22 @@
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "LD1"
+        "LD3"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld3_u8",
+    "name": "vld3q_u8",
     "arguments": [
       "uint8_t const * ptr"
     ],
     "return_type": {
-      "value": "uint8x8x3_t"
+      "value": "uint8x16x3_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -42621,12 +239806,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld3q_dup_f16",
+    "name": "vld4_dup_f16",
     "arguments": [
       "float16_t const * ptr"
     ],
     "return_type": {
-      "value": "float16x8x3_t"
+      "value": "float16x4x4_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -42640,18 +239825,18 @@
     ],
     "instructions": [
       [
-        "LD3R"
+        "LD4R"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld3q_dup_f32",
+    "name": "vld4_dup_f32",
     "arguments": [
       "float32_t const * ptr"
     ],
     "return_type": {
-      "value": "float32x4x3_t"
+      "value": "float32x2x4_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -42665,18 +239850,18 @@
     ],
     "instructions": [
       [
-        "LD3R"
+        "LD4R"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld3q_dup_f64",
+    "name": "vld4_dup_f64",
     "arguments": [
       "float64_t const * ptr"
     ],
     "return_type": {
-      "value": "float64x2x3_t"
+      "value": "float64x1x4_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -42688,18 +239873,18 @@
     ],
     "instructions": [
       [
-        "LD3R"
+        "LD4R"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld3q_dup_p16",
+    "name": "vld4_dup_p16",
     "arguments": [
       "poly16_t const * ptr"
     ],
     "return_type": {
-      "value": "poly16x8x3_t"
+      "value": "poly16x4x4_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -42713,18 +239898,18 @@
     ],
     "instructions": [
       [
-        "LD3R"
+        "LD4R"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld3q_dup_p64",
+    "name": "vld4_dup_p64",
     "arguments": [
       "poly64_t const * ptr"
     ],
     "return_type": {
-      "value": "poly64x2x3_t"
+      "value": "poly64x1x4_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -42732,22 +239917,23 @@
       }
     },
     "Architectures": [
+      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "LD3R"
+        "LD4R"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld3q_dup_p8",
+    "name": "vld4_dup_p8",
     "arguments": [
       "poly8_t const * ptr"
     ],
     "return_type": {
-      "value": "poly8x16x3_t"
+      "value": "poly8x8x4_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -42761,18 +239947,18 @@
     ],
     "instructions": [
       [
-        "LD3R"
+        "LD4R"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld3q_dup_s16",
+    "name": "vld4_dup_s16",
     "arguments": [
       "int16_t const * ptr"
     ],
     "return_type": {
-      "value": "int16x8x3_t"
+      "value": "int16x4x4_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -42786,18 +239972,18 @@
     ],
     "instructions": [
       [
-        "LD3R"
+        "LD4R"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld3q_dup_s32",
+    "name": "vld4_dup_s32",
     "arguments": [
       "int32_t const * ptr"
     ],
     "return_type": {
-      "value": "int32x4x3_t"
+      "value": "int32x2x4_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -42811,18 +239997,18 @@
     ],
     "instructions": [
       [
-        "LD3R"
+        "LD4R"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld3q_dup_s64",
+    "name": "vld4_dup_s64",
     "arguments": [
       "int64_t const * ptr"
     ],
     "return_type": {
-      "value": "int64x2x3_t"
+      "value": "int64x1x4_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -42830,22 +240016,24 @@
       }
     },
     "Architectures": [
+      "v7",
+      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "LD3R"
+        "LD4R"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld3q_dup_s8",
+    "name": "vld4_dup_s8",
     "arguments": [
       "int8_t const * ptr"
     ],
     "return_type": {
-      "value": "int8x16x3_t"
+      "value": "int8x8x4_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -42859,18 +240047,18 @@
     ],
     "instructions": [
       [
-        "LD3R"
+        "LD4R"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld3q_dup_u16",
+    "name": "vld4_dup_u16",
     "arguments": [
       "uint16_t const * ptr"
     ],
     "return_type": {
-      "value": "uint16x8x3_t"
+      "value": "uint16x4x4_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -42884,18 +240072,18 @@
     ],
     "instructions": [
       [
-        "LD3R"
+        "LD4R"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld3q_dup_u32",
+    "name": "vld4_dup_u32",
     "arguments": [
       "uint32_t const * ptr"
     ],
     "return_type": {
-      "value": "uint32x4x3_t"
+      "value": "uint32x2x4_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -42909,18 +240097,18 @@
     ],
     "instructions": [
       [
-        "LD3R"
+        "LD4R"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld3q_dup_u64",
+    "name": "vld4_dup_u64",
     "arguments": [
       "uint64_t const * ptr"
     ],
     "return_type": {
-      "value": "uint64x2x3_t"
+      "value": "uint64x1x4_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -42928,22 +240116,24 @@
       }
     },
     "Architectures": [
+      "v7",
+      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "LD3R"
+        "LD4R"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld3q_dup_u8",
+    "name": "vld4_dup_u8",
     "arguments": [
       "uint8_t const * ptr"
     ],
     "return_type": {
-      "value": "uint8x16x3_t"
+      "value": "uint8x8x4_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -42957,18 +240147,18 @@
     ],
     "instructions": [
       [
-        "LD3R"
+        "LD4R"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld3q_f16",
+    "name": "vld4_f16",
     "arguments": [
       "float16_t const * ptr"
     ],
     "return_type": {
-      "value": "float16x8x3_t"
+      "value": "float16x4x4_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -42982,18 +240172,18 @@
     ],
     "instructions": [
       [
-        "LD3"
+        "LD4"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld3q_f32",
+    "name": "vld4_f32",
     "arguments": [
       "float32_t const * ptr"
     ],
     "return_type": {
-      "value": "float32x4x3_t"
+      "value": "float32x2x4_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -43007,18 +240197,18 @@
     ],
     "instructions": [
       [
-        "LD3"
+        "LD4"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld3q_f64",
+    "name": "vld4_f64",
     "arguments": [
       "float64_t const * ptr"
     ],
     "return_type": {
-      "value": "float64x2x3_t"
+      "value": "float64x1x4_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -43030,31 +240220,40 @@
     ],
     "instructions": [
       [
-        "LD3"
+        "LD1"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld3q_lane_f16",
+    "name": "vld4_lane_f16",
     "arguments": [
       "float16_t const * ptr",
-      "float16x8x3_t src",
+      "float16x4x4_t src",
       "const int lane"
     ],
     "return_type": {
-      "value": "float16x8x3_t"
+      "value": "float16x4x4_t"
     },
     "Arguments_Preparation": {
       "lane": {
         "minimum": 0,
-        "maximum": 7
+        "maximum": 3
       },
       "ptr": {
         "register": "Xn"
       },
-      "src": {
-        "register": "Vt3.8H"
+      "src.val[0]": {
+        "register": "Vt.4H"
+      },
+      "src.val[1]": {
+        "register": "Vt2.4H"
+      },
+      "src.val[2]": {
+        "register": "Vt3.4H"
+      },
+      "src.val[3]": {
+        "register": "Vt4.4H"
       }
     },
     "Architectures": [
@@ -43064,31 +240263,40 @@
     ],
     "instructions": [
       [
-        "LD3"
+        "LD4"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld3q_lane_f32",
+    "name": "vld4_lane_f32",
     "arguments": [
       "float32_t const * ptr",
-      "float32x4x3_t src",
+      "float32x2x4_t src",
       "const int lane"
     ],
     "return_type": {
-      "value": "float32x4x3_t"
+      "value": "float32x2x4_t"
     },
     "Arguments_Preparation": {
       "lane": {
         "minimum": 0,
-        "maximum": 3
+        "maximum": 1
       },
       "ptr": {
         "register": "Xn"
       },
-      "src": {
-        "register": "Vt3.4S"
+      "src.val[0]": {
+        "register": "Vt.2S"
+      },
+      "src.val[1]": {
+        "register": "Vt2.2S"
+      },
+      "src.val[2]": {
+        "register": "Vt3.2S"
+      },
+      "src.val[3]": {
+        "register": "Vt4.2S"
       }
     },
     "Architectures": [
@@ -43098,31 +240306,40 @@
     ],
     "instructions": [
       [
-        "LD3"
+        "LD4"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld3q_lane_f64",
+    "name": "vld4_lane_f64",
     "arguments": [
       "float64_t const * ptr",
-      "float64x2x3_t src",
+      "float64x1x4_t src",
       "const int lane"
     ],
     "return_type": {
-      "value": "float64x2x3_t"
+      "value": "float64x1x4_t"
     },
     "Arguments_Preparation": {
       "lane": {
         "minimum": 0,
-        "maximum": 1
+        "maximum": 0
       },
       "ptr": {
         "register": "Xn"
       },
-      "src": {
-        "register": "Vt3.2D"
+      "src.val[0]": {
+        "register": "Vt.1D"
+      },
+      "src.val[1]": {
+        "register": "Vt2.1D"
+      },
+      "src.val[2]": {
+        "register": "Vt3.1D"
+      },
+      "src.val[3]": {
+        "register": "Vt4.1D"
       }
     },
     "Architectures": [
@@ -43130,31 +240347,40 @@
     ],
     "instructions": [
       [
-        "LD3"
+        "LD4"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld3q_lane_p16",
+    "name": "vld4_lane_p16",
     "arguments": [
       "poly16_t const * ptr",
-      "poly16x8x3_t src",
+      "poly16x4x4_t src",
       "const int lane"
     ],
     "return_type": {
-      "value": "poly16x8x3_t"
+      "value": "poly16x4x4_t"
     },
     "Arguments_Preparation": {
       "lane": {
         "minimum": 0,
-        "maximum": 7
+        "maximum": 3
       },
       "ptr": {
         "register": "Xn"
       },
-      "src": {
-        "register": "Vt3.8H"
+      "src.val[0]": {
+        "register": "Vt.4H"
+      },
+      "src.val[1]": {
+        "register": "Vt2.4H"
+      },
+      "src.val[2]": {
+        "register": "Vt3.4H"
+      },
+      "src.val[3]": {
+        "register": "Vt4.4H"
       }
     },
     "Architectures": [
@@ -43164,31 +240390,40 @@
     ],
     "instructions": [
       [
-        "LD3"
+        "LD4"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld3q_lane_p64",
+    "name": "vld4_lane_p64",
     "arguments": [
       "poly64_t const * ptr",
-      "poly64x2x3_t src",
+      "poly64x1x4_t src",
       "const int lane"
     ],
     "return_type": {
-      "value": "poly64x2x3_t"
+      "value": "poly64x1x4_t"
     },
     "Arguments_Preparation": {
       "lane": {
         "minimum": 0,
-        "maximum": 1
+        "maximum": 0
       },
       "ptr": {
         "register": "Xn"
       },
-      "src": {
-        "register": "Vt3.2D"
+      "src.val[0]": {
+        "register": "Vt.1D"
+      },
+      "src.val[1]": {
+        "register": "Vt2.1D"
+      },
+      "src.val[2]": {
+        "register": "Vt3.1D"
+      },
+      "src.val[3]": {
+        "register": "Vt4.1D"
       }
     },
     "Architectures": [
@@ -43196,63 +240431,83 @@
     ],
     "instructions": [
       [
-        "LD3"
+        "LD4"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld3q_lane_p8",
+    "name": "vld4_lane_p8",
     "arguments": [
       "poly8_t const * ptr",
-      "poly8x16x3_t src",
+      "poly8x8x4_t src",
       "const int lane"
     ],
     "return_type": {
-      "value": "poly8x16x3_t"
+      "value": "poly8x8x4_t"
     },
     "Arguments_Preparation": {
       "lane": {
         "minimum": 0,
-        "maximum": 15
+        "maximum": 7
       },
       "ptr": {
         "register": "Xn"
       },
-      "src": {
-        "register": "Vt3.16B"
+      "src.val[0]": {
+        "register": "Vt.8B"
+      },
+      "src.val[1]": {
+        "register": "Vt2.8B"
+      },
+      "src.val[2]": {
+        "register": "Vt3.8B"
+      },
+      "src.val[3]": {
+        "register": "Vt4.8B"
       }
     },
     "Architectures": [
+      "v7",
+      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "LD3"
+        "LD4"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld3q_lane_s16",
+    "name": "vld4_lane_s16",
     "arguments": [
       "int16_t const * ptr",
-      "int16x8x3_t src",
+      "int16x4x4_t src",
       "const int lane"
     ],
     "return_type": {
-      "value": "int16x8x3_t"
+      "value": "int16x4x4_t"
     },
     "Arguments_Preparation": {
       "lane": {
         "minimum": 0,
-        "maximum": 7
+        "maximum": 3
       },
       "ptr": {
         "register": "Xn"
       },
-      "src": {
-        "register": "Vt3.8H"
+      "src.val[0]": {
+        "register": "Vt.4H"
+      },
+      "src.val[1]": {
+        "register": "Vt2.4H"
+      },
+      "src.val[2]": {
+        "register": "Vt3.4H"
+      },
+      "src.val[3]": {
+        "register": "Vt4.4H"
       }
     },
     "Architectures": [
@@ -43262,31 +240517,40 @@
     ],
     "instructions": [
       [
-        "LD3"
+        "LD4"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld3q_lane_s32",
+    "name": "vld4_lane_s32",
     "arguments": [
       "int32_t const * ptr",
-      "int32x4x3_t src",
+      "int32x2x4_t src",
       "const int lane"
     ],
     "return_type": {
-      "value": "int32x4x3_t"
+      "value": "int32x2x4_t"
     },
     "Arguments_Preparation": {
       "lane": {
         "minimum": 0,
-        "maximum": 3
+        "maximum": 1
       },
       "ptr": {
         "register": "Xn"
       },
-      "src": {
-        "register": "Vt3.4S"
+      "src.val[0]": {
+        "register": "Vt.2S"
+      },
+      "src.val[1]": {
+        "register": "Vt2.2S"
+      },
+      "src.val[2]": {
+        "register": "Vt3.2S"
+      },
+      "src.val[3]": {
+        "register": "Vt4.2S"
       }
     },
     "Architectures": [
@@ -43296,31 +240560,40 @@
     ],
     "instructions": [
       [
-        "LD3"
+        "LD4"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld3q_lane_s64",
+    "name": "vld4_lane_s64",
     "arguments": [
       "int64_t const * ptr",
-      "int64x2x3_t src",
+      "int64x1x4_t src",
       "const int lane"
     ],
     "return_type": {
-      "value": "int64x2x3_t"
+      "value": "int64x1x4_t"
     },
     "Arguments_Preparation": {
       "lane": {
         "minimum": 0,
-        "maximum": 1
+        "maximum": 0
       },
       "ptr": {
         "register": "Xn"
       },
-      "src": {
-        "register": "Vt3.2D"
+      "src.val[0]": {
+        "register": "Vt.1D"
+      },
+      "src.val[1]": {
+        "register": "Vt2.1D"
+      },
+      "src.val[2]": {
+        "register": "Vt3.1D"
+      },
+      "src.val[3]": {
+        "register": "Vt4.1D"
       }
     },
     "Architectures": [
@@ -43328,63 +240601,83 @@
     ],
     "instructions": [
       [
-        "LD3"
+        "LD4"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld3q_lane_s8",
+    "name": "vld4_lane_s8",
     "arguments": [
       "int8_t const * ptr",
-      "int8x16x3_t src",
+      "int8x8x4_t src",
       "const int lane"
     ],
     "return_type": {
-      "value": "int8x16x3_t"
+      "value": "int8x8x4_t"
     },
     "Arguments_Preparation": {
       "lane": {
         "minimum": 0,
-        "maximum": 15
+        "maximum": 7
       },
       "ptr": {
         "register": "Xn"
       },
-      "src": {
-        "register": "Vt3.16B"
+      "src.val[0]": {
+        "register": "Vt.8B"
+      },
+      "src.val[1]": {
+        "register": "Vt2.8B"
+      },
+      "src.val[2]": {
+        "register": "Vt3.8B"
+      },
+      "src.val[3]": {
+        "register": "Vt4.8B"
       }
     },
     "Architectures": [
+      "v7",
+      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "LD3"
+        "LD4"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld3q_lane_u16",
+    "name": "vld4_lane_u16",
     "arguments": [
       "uint16_t const * ptr",
-      "uint16x8x3_t src",
+      "uint16x4x4_t src",
       "const int lane"
     ],
     "return_type": {
-      "value": "uint16x8x3_t"
+      "value": "uint16x4x4_t"
     },
     "Arguments_Preparation": {
       "lane": {
         "minimum": 0,
-        "maximum": 7
+        "maximum": 3
       },
       "ptr": {
         "register": "Xn"
       },
-      "src": {
-        "register": "Vt3.8H"
+      "src.val[0]": {
+        "register": "Vt.4H"
+      },
+      "src.val[1]": {
+        "register": "Vt2.4H"
+      },
+      "src.val[2]": {
+        "register": "Vt3.4H"
+      },
+      "src.val[3]": {
+        "register": "Vt4.4H"
       }
     },
     "Architectures": [
@@ -43394,31 +240687,40 @@
     ],
     "instructions": [
       [
-        "LD3"
+        "LD4"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld3q_lane_u32",
+    "name": "vld4_lane_u32",
     "arguments": [
       "uint32_t const * ptr",
-      "uint32x4x3_t src",
+      "uint32x2x4_t src",
       "const int lane"
     ],
     "return_type": {
-      "value": "uint32x4x3_t"
+      "value": "uint32x2x4_t"
     },
     "Arguments_Preparation": {
       "lane": {
         "minimum": 0,
-        "maximum": 3
+        "maximum": 1
       },
       "ptr": {
         "register": "Xn"
       },
-      "src": {
-        "register": "Vt3.4S"
+      "src.val[0]": {
+        "register": "Vt.2S"
+      },
+      "src.val[1]": {
+        "register": "Vt2.2S"
+      },
+      "src.val[2]": {
+        "register": "Vt3.2S"
+      },
+      "src.val[3]": {
+        "register": "Vt4.2S"
       }
     },
     "Architectures": [
@@ -43428,31 +240730,40 @@
     ],
     "instructions": [
       [
-        "LD3"
+        "LD4"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld3q_lane_u64",
+    "name": "vld4_lane_u64",
     "arguments": [
       "uint64_t const * ptr",
-      "uint64x2x3_t src",
+      "uint64x1x4_t src",
       "const int lane"
     ],
     "return_type": {
-      "value": "uint64x2x3_t"
+      "value": "uint64x1x4_t"
     },
     "Arguments_Preparation": {
       "lane": {
         "minimum": 0,
-        "maximum": 1
+        "maximum": 0
       },
       "ptr": {
         "register": "Xn"
       },
-      "src": {
-        "register": "Vt3.2D"
+      "src.val[0]": {
+        "register": "Vt.1D"
+      },
+      "src.val[1]": {
+        "register": "Vt2.1D"
+      },
+      "src.val[2]": {
+        "register": "Vt3.1D"
+      },
+      "src.val[3]": {
+        "register": "Vt4.1D"
       }
     },
     "Architectures": [
@@ -43460,50 +240771,61 @@
     ],
     "instructions": [
       [
-        "LD3"
+        "LD4"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld3q_lane_u8",
+    "name": "vld4_lane_u8",
     "arguments": [
       "uint8_t const * ptr",
-      "uint8x16x3_t src",
+      "uint8x8x4_t src",
       "const int lane"
     ],
     "return_type": {
-      "value": "uint8x16x3_t"
+      "value": "uint8x8x4_t"
     },
     "Arguments_Preparation": {
       "lane": {
         "minimum": 0,
-        "maximum": 15
+        "maximum": 7
       },
       "ptr": {
         "register": "Xn"
       },
-      "src": {
-        "register": "Vt3.16B"
+      "src.val[0]": {
+        "register": "Vt.8B"
+      },
+      "src.val[1]": {
+        "register": "Vt2.8B"
+      },
+      "src.val[2]": {
+        "register": "Vt3.8B"
+      },
+      "src.val[3]": {
+        "register": "Vt4.8B"
       }
     },
     "Architectures": [
+      "v7",
+      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "LD3"
+        "LD4"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld3q_p16",
+    "name": "vld4_p16",
     "arguments": [
       "poly16_t const * ptr"
     ],
     "return_type": {
-      "value": "poly16x8x3_t"
+      "value": "poly16x4x4_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -43517,18 +240839,18 @@
     ],
     "instructions": [
       [
-        "LD3"
+        "LD4"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld3q_p64",
+    "name": "vld4_p64",
     "arguments": [
       "poly64_t const * ptr"
     ],
     "return_type": {
-      "value": "poly64x2x3_t"
+      "value": "poly64x1x4_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -43536,22 +240858,23 @@
       }
     },
     "Architectures": [
+      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "LD3"
+        "LD1"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld3q_p8",
+    "name": "vld4_p8",
     "arguments": [
       "poly8_t const * ptr"
     ],
     "return_type": {
-      "value": "poly8x16x3_t"
+      "value": "poly8x8x4_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -43565,18 +240888,18 @@
     ],
     "instructions": [
       [
-        "LD3"
+        "LD4"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld3q_s16",
+    "name": "vld4_s16",
     "arguments": [
       "int16_t const * ptr"
     ],
     "return_type": {
-      "value": "int16x8x3_t"
+      "value": "int16x4x4_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -43590,18 +240913,18 @@
     ],
     "instructions": [
       [
-        "LD3"
+        "LD4"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld3q_s32",
+    "name": "vld4_s32",
     "arguments": [
       "int32_t const * ptr"
     ],
     "return_type": {
-      "value": "int32x4x3_t"
+      "value": "int32x2x4_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -43615,18 +240938,18 @@
     ],
     "instructions": [
       [
-        "LD3"
+        "LD4"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld3q_s64",
+    "name": "vld4_s64",
     "arguments": [
       "int64_t const * ptr"
     ],
     "return_type": {
-      "value": "int64x2x3_t"
+      "value": "int64x1x4_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -43634,22 +240957,24 @@
       }
     },
     "Architectures": [
+      "v7",
+      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "LD3"
+        "LD1"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld3q_s8",
+    "name": "vld4_s8",
     "arguments": [
       "int8_t const * ptr"
     ],
     "return_type": {
-      "value": "int8x16x3_t"
+      "value": "int8x8x4_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -43663,18 +240988,18 @@
     ],
     "instructions": [
       [
-        "LD3"
+        "LD4"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld3q_u16",
+    "name": "vld4_u16",
     "arguments": [
       "uint16_t const * ptr"
     ],
     "return_type": {
-      "value": "uint16x8x3_t"
+      "value": "uint16x4x4_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -43688,18 +241013,18 @@
     ],
     "instructions": [
       [
-        "LD3"
+        "LD4"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld3q_u32",
+    "name": "vld4_u32",
     "arguments": [
       "uint32_t const * ptr"
     ],
     "return_type": {
-      "value": "uint32x4x3_t"
+      "value": "uint32x2x4_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -43713,18 +241038,18 @@
     ],
     "instructions": [
       [
-        "LD3"
+        "LD4"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld3q_u64",
+    "name": "vld4_u64",
     "arguments": [
       "uint64_t const * ptr"
     ],
     "return_type": {
-      "value": "uint64x2x3_t"
+      "value": "uint64x1x4_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -43732,22 +241057,24 @@
       }
     },
     "Architectures": [
+      "v7",
+      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "LD3"
+        "LD1"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld3q_u8",
+    "name": "vld4_u8",
     "arguments": [
       "uint8_t const * ptr"
     ],
     "return_type": {
-      "value": "uint8x16x3_t"
+      "value": "uint8x8x4_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -43761,18 +241088,18 @@
     ],
     "instructions": [
       [
-        "LD3"
+        "LD4"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld4_dup_f16",
+    "name": "vld4q_dup_f16",
     "arguments": [
       "float16_t const * ptr"
     ],
     "return_type": {
-      "value": "float16x4x4_t"
+      "value": "float16x8x4_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -43792,12 +241119,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld4_dup_f32",
+    "name": "vld4q_dup_f32",
     "arguments": [
       "float32_t const * ptr"
     ],
     "return_type": {
-      "value": "float32x2x4_t"
+      "value": "float32x4x4_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -43817,12 +241144,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld4_dup_f64",
+    "name": "vld4q_dup_f64",
     "arguments": [
       "float64_t const * ptr"
     ],
     "return_type": {
-      "value": "float64x1x4_t"
+      "value": "float64x2x4_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -43840,12 +241167,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld4_dup_p16",
+    "name": "vld4q_dup_p16",
     "arguments": [
       "poly16_t const * ptr"
     ],
     "return_type": {
-      "value": "poly16x4x4_t"
+      "value": "poly16x8x4_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -43865,12 +241192,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld4_dup_p64",
+    "name": "vld4q_dup_p64",
     "arguments": [
       "poly64_t const * ptr"
     ],
     "return_type": {
-      "value": "poly64x1x4_t"
+      "value": "poly64x2x4_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -43878,7 +241205,6 @@
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
@@ -43889,12 +241215,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld4_dup_p8",
+    "name": "vld4q_dup_p8",
     "arguments": [
       "poly8_t const * ptr"
     ],
     "return_type": {
-      "value": "poly8x8x4_t"
+      "value": "poly8x16x4_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -43914,12 +241240,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld4_dup_s16",
+    "name": "vld4q_dup_s16",
     "arguments": [
       "int16_t const * ptr"
     ],
     "return_type": {
-      "value": "int16x4x4_t"
+      "value": "int16x8x4_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -43939,12 +241265,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld4_dup_s32",
+    "name": "vld4q_dup_s32",
     "arguments": [
       "int32_t const * ptr"
     ],
     "return_type": {
-      "value": "int32x2x4_t"
+      "value": "int32x4x4_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -43964,12 +241290,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld4_dup_s64",
+    "name": "vld4q_dup_s64",
     "arguments": [
       "int64_t const * ptr"
     ],
     "return_type": {
-      "value": "int64x1x4_t"
+      "value": "int64x2x4_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -43977,8 +241303,6 @@
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
@@ -43989,12 +241313,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld4_dup_s8",
+    "name": "vld4q_dup_s8",
     "arguments": [
       "int8_t const * ptr"
     ],
     "return_type": {
-      "value": "int8x8x4_t"
+      "value": "int8x16x4_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -44014,12 +241338,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld4_dup_u16",
+    "name": "vld4q_dup_u16",
     "arguments": [
       "uint16_t const * ptr"
     ],
     "return_type": {
-      "value": "uint16x4x4_t"
+      "value": "uint16x8x4_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -44039,12 +241363,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld4_dup_u32",
+    "name": "vld4q_dup_u32",
     "arguments": [
       "uint32_t const * ptr"
     ],
     "return_type": {
-      "value": "uint32x2x4_t"
+      "value": "uint32x4x4_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -44064,12 +241388,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld4_dup_u64",
+    "name": "vld4q_dup_u64",
     "arguments": [
       "uint64_t const * ptr"
     ],
     "return_type": {
-      "value": "uint64x1x4_t"
+      "value": "uint64x2x4_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -44077,8 +241401,6 @@
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
@@ -44089,12 +241411,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld4_dup_u8",
+    "name": "vld4q_dup_u8",
     "arguments": [
       "uint8_t const * ptr"
     ],
     "return_type": {
-      "value": "uint8x8x4_t"
+      "value": "uint8x16x4_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -44114,12 +241436,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld4_f16",
+    "name": "vld4q_f16",
     "arguments": [
       "float16_t const * ptr"
     ],
     "return_type": {
-      "value": "float16x4x4_t"
+      "value": "float16x8x4_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -44139,12 +241461,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld4_f32",
+    "name": "vld4q_f32",
     "arguments": [
       "float32_t const * ptr"
     ],
     "return_type": {
-      "value": "float32x2x4_t"
+      "value": "float32x4x4_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -44164,12 +241486,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld4_f64",
+    "name": "vld4q_f64",
     "arguments": [
       "float64_t const * ptr"
     ],
     "return_type": {
-      "value": "float64x1x4_t"
+      "value": "float64x2x4_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -44181,31 +241503,40 @@
     ],
     "instructions": [
       [
-        "LD1"
+        "LD4"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld4_lane_f16",
+    "name": "vld4q_lane_f16",
     "arguments": [
       "float16_t const * ptr",
-      "float16x4x4_t src",
+      "float16x8x4_t src",
       "const int lane"
     ],
     "return_type": {
-      "value": "float16x4x4_t"
+      "value": "float16x8x4_t"
     },
     "Arguments_Preparation": {
       "lane": {
         "minimum": 0,
-        "maximum": 3
+        "maximum": 7
       },
       "ptr": {
         "register": "Xn"
       },
-      "src": {
-        "register": "Vt4.4H"
+      "src.val[0]": {
+        "register": "Vt.8H"
+      },
+      "src.val[1]": {
+        "register": "Vt2.8H"
+      },
+      "src.val[2]": {
+        "register": "Vt3.8H"
+      },
+      "src.val[3]": {
+        "register": "Vt4.8H"
       }
     },
     "Architectures": [
@@ -44221,25 +241552,34 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld4_lane_f32",
+    "name": "vld4q_lane_f32",
     "arguments": [
       "float32_t const * ptr",
-      "float32x2x4_t src",
+      "float32x4x4_t src",
       "const int lane"
     ],
     "return_type": {
-      "value": "float32x2x4_t"
+      "value": "float32x4x4_t"
     },
     "Arguments_Preparation": {
       "lane": {
         "minimum": 0,
-        "maximum": 1
+        "maximum": 3
       },
       "ptr": {
         "register": "Xn"
       },
-      "src": {
-        "register": "Vt4.2S"
+      "src.val[0]": {
+        "register": "Vt.4S"
+      },
+      "src.val[1]": {
+        "register": "Vt2.4S"
+      },
+      "src.val[2]": {
+        "register": "Vt3.4S"
+      },
+      "src.val[3]": {
+        "register": "Vt4.4S"
       }
     },
     "Architectures": [
@@ -44255,25 +241595,34 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld4_lane_f64",
+    "name": "vld4q_lane_f64",
     "arguments": [
       "float64_t const * ptr",
-      "float64x1x4_t src",
+      "float64x2x4_t src",
       "const int lane"
     ],
     "return_type": {
-      "value": "float64x1x4_t"
+      "value": "float64x2x4_t"
     },
     "Arguments_Preparation": {
       "lane": {
         "minimum": 0,
-        "maximum": 0
+        "maximum": 1
       },
       "ptr": {
         "register": "Xn"
       },
-      "src": {
-        "register": "Vt4.1D"
+      "src.val[0]": {
+        "register": "Vt.2D"
+      },
+      "src.val[1]": {
+        "register": "Vt2.2D"
+      },
+      "src.val[2]": {
+        "register": "Vt3.2D"
+      },
+      "src.val[3]": {
+        "register": "Vt4.2D"
       }
     },
     "Architectures": [
@@ -44287,25 +241636,34 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld4_lane_p16",
+    "name": "vld4q_lane_p16",
     "arguments": [
       "poly16_t const * ptr",
-      "poly16x4x4_t src",
+      "poly16x8x4_t src",
       "const int lane"
     ],
     "return_type": {
-      "value": "poly16x4x4_t"
+      "value": "poly16x8x4_t"
     },
     "Arguments_Preparation": {
       "lane": {
         "minimum": 0,
-        "maximum": 3
+        "maximum": 7
       },
       "ptr": {
         "register": "Xn"
       },
-      "src": {
-        "register": "Vt4.4H"
+      "src.val[0]": {
+        "register": "Vt.8H"
+      },
+      "src.val[1]": {
+        "register": "Vt2.8H"
+      },
+      "src.val[2]": {
+        "register": "Vt3.8H"
+      },
+      "src.val[3]": {
+        "register": "Vt4.8H"
       }
     },
     "Architectures": [
@@ -44321,25 +241679,34 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld4_lane_p64",
+    "name": "vld4q_lane_p64",
     "arguments": [
       "poly64_t const * ptr",
-      "poly64x1x4_t src",
+      "poly64x2x4_t src",
       "const int lane"
     ],
     "return_type": {
-      "value": "poly64x1x4_t"
+      "value": "poly64x2x4_t"
     },
     "Arguments_Preparation": {
       "lane": {
         "minimum": 0,
-        "maximum": 0
+        "maximum": 1
       },
       "ptr": {
         "register": "Xn"
       },
-      "src": {
-        "register": "Vt4.1D"
+      "src.val[0]": {
+        "register": "Vt.2D"
+      },
+      "src.val[1]": {
+        "register": "Vt2.2D"
+      },
+      "src.val[2]": {
+        "register": "Vt3.2D"
+      },
+      "src.val[3]": {
+        "register": "Vt4.2D"
       }
     },
     "Architectures": [
@@ -44353,30 +241720,37 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld4_lane_p8",
+    "name": "vld4q_lane_p8",
     "arguments": [
       "poly8_t const * ptr",
-      "poly8x8x4_t src",
+      "poly8x16x4_t src",
       "const int lane"
     ],
     "return_type": {
-      "value": "poly8x8x4_t"
+      "value": "poly8x16x4_t"
     },
     "Arguments_Preparation": {
       "lane": {
         "minimum": 0,
-        "maximum": 7
+        "maximum": 15
       },
       "ptr": {
         "register": "Xn"
       },
-      "src": {
-        "register": "Vt4.8B"
+      "src.val[0]": {
+        "register": "Vt.16B"
+      },
+      "src.val[1]": {
+        "register": "Vt2.16B"
+      },
+      "src.val[2]": {
+        "register": "Vt3.16B"
+      },
+      "src.val[3]": {
+        "register": "Vt4.16B"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
@@ -44387,25 +241761,34 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld4_lane_s16",
+    "name": "vld4q_lane_s16",
     "arguments": [
       "int16_t const * ptr",
-      "int16x4x4_t src",
+      "int16x8x4_t src",
       "const int lane"
     ],
     "return_type": {
-      "value": "int16x4x4_t"
+      "value": "int16x8x4_t"
     },
     "Arguments_Preparation": {
       "lane": {
         "minimum": 0,
-        "maximum": 3
+        "maximum": 7
       },
       "ptr": {
         "register": "Xn"
       },
-      "src": {
-        "register": "Vt4.4H"
+      "src.val[0]": {
+        "register": "Vt.8H"
+      },
+      "src.val[1]": {
+        "register": "Vt2.8H"
+      },
+      "src.val[2]": {
+        "register": "Vt3.8H"
+      },
+      "src.val[3]": {
+        "register": "Vt4.8H"
       }
     },
     "Architectures": [
@@ -44421,25 +241804,34 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld4_lane_s32",
+    "name": "vld4q_lane_s32",
     "arguments": [
       "int32_t const * ptr",
-      "int32x2x4_t src",
+      "int32x4x4_t src",
       "const int lane"
     ],
     "return_type": {
-      "value": "int32x2x4_t"
+      "value": "int32x4x4_t"
     },
     "Arguments_Preparation": {
       "lane": {
         "minimum": 0,
-        "maximum": 1
+        "maximum": 3
       },
       "ptr": {
         "register": "Xn"
       },
-      "src": {
-        "register": "Vt4.2S"
+      "src.val[0]": {
+        "register": "Vt.4S"
+      },
+      "src.val[1]": {
+        "register": "Vt2.4S"
+      },
+      "src.val[2]": {
+        "register": "Vt3.4S"
+      },
+      "src.val[3]": {
+        "register": "Vt4.4S"
       }
     },
     "Architectures": [
@@ -44455,25 +241847,34 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld4_lane_s64",
+    "name": "vld4q_lane_s64",
     "arguments": [
       "int64_t const * ptr",
-      "int64x1x4_t src",
+      "int64x2x4_t src",
       "const int lane"
     ],
     "return_type": {
-      "value": "int64x1x4_t"
+      "value": "int64x2x4_t"
     },
     "Arguments_Preparation": {
       "lane": {
         "minimum": 0,
-        "maximum": 0
+        "maximum": 1
       },
       "ptr": {
         "register": "Xn"
       },
-      "src": {
-        "register": "Vt4.1D"
+      "src.val[0]": {
+        "register": "Vt.2D"
+      },
+      "src.val[1]": {
+        "register": "Vt2.2D"
+      },
+      "src.val[2]": {
+        "register": "Vt3.2D"
+      },
+      "src.val[3]": {
+        "register": "Vt4.2D"
       }
     },
     "Architectures": [
@@ -44487,30 +241888,37 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld4_lane_s8",
+    "name": "vld4q_lane_s8",
     "arguments": [
       "int8_t const * ptr",
-      "int8x8x4_t src",
+      "int8x16x4_t src",
       "const int lane"
     ],
     "return_type": {
-      "value": "int8x8x4_t"
+      "value": "int8x16x4_t"
     },
     "Arguments_Preparation": {
       "lane": {
         "minimum": 0,
-        "maximum": 7
+        "maximum": 15
       },
       "ptr": {
         "register": "Xn"
       },
-      "src": {
-        "register": "Vt4.8B"
+      "src.val[0]": {
+        "register": "Vt.16B"
+      },
+      "src.val[1]": {
+        "register": "Vt2.16B"
+      },
+      "src.val[2]": {
+        "register": "Vt3.16B"
+      },
+      "src.val[3]": {
+        "register": "Vt4.16B"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
@@ -44521,25 +241929,34 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld4_lane_u16",
+    "name": "vld4q_lane_u16",
     "arguments": [
       "uint16_t const * ptr",
-      "uint16x4x4_t src",
+      "uint16x8x4_t src",
       "const int lane"
     ],
     "return_type": {
-      "value": "uint16x4x4_t"
+      "value": "uint16x8x4_t"
     },
     "Arguments_Preparation": {
       "lane": {
         "minimum": 0,
-        "maximum": 3
+        "maximum": 7
       },
       "ptr": {
         "register": "Xn"
       },
-      "src": {
-        "register": "Vt4.4H"
+      "src.val[0]": {
+        "register": "Vt.8H"
+      },
+      "src.val[1]": {
+        "register": "Vt2.8H"
+      },
+      "src.val[2]": {
+        "register": "Vt3.8H"
+      },
+      "src.val[3]": {
+        "register": "Vt4.8H"
       }
     },
     "Architectures": [
@@ -44555,25 +241972,34 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld4_lane_u32",
+    "name": "vld4q_lane_u32",
     "arguments": [
       "uint32_t const * ptr",
-      "uint32x2x4_t src",
+      "uint32x4x4_t src",
       "const int lane"
     ],
     "return_type": {
-      "value": "uint32x2x4_t"
+      "value": "uint32x4x4_t"
     },
     "Arguments_Preparation": {
       "lane": {
         "minimum": 0,
-        "maximum": 1
+        "maximum": 3
       },
       "ptr": {
         "register": "Xn"
       },
-      "src": {
-        "register": "Vt4.2S"
+      "src.val[0]": {
+        "register": "Vt.4S"
+      },
+      "src.val[1]": {
+        "register": "Vt2.4S"
+      },
+      "src.val[2]": {
+        "register": "Vt3.4S"
+      },
+      "src.val[3]": {
+        "register": "Vt4.4S"
       }
     },
     "Architectures": [
@@ -44589,25 +242015,34 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld4_lane_u64",
+    "name": "vld4q_lane_u64",
     "arguments": [
       "uint64_t const * ptr",
-      "uint64x1x4_t src",
+      "uint64x2x4_t src",
       "const int lane"
     ],
     "return_type": {
-      "value": "uint64x1x4_t"
+      "value": "uint64x2x4_t"
     },
     "Arguments_Preparation": {
       "lane": {
         "minimum": 0,
-        "maximum": 0
+        "maximum": 1
       },
       "ptr": {
         "register": "Xn"
       },
-      "src": {
-        "register": "Vt4.1D"
+      "src.val[0]": {
+        "register": "Vt.2D"
+      },
+      "src.val[1]": {
+        "register": "Vt2.2D"
+      },
+      "src.val[2]": {
+        "register": "Vt3.2D"
+      },
+      "src.val[3]": {
+        "register": "Vt4.2D"
       }
     },
     "Architectures": [
@@ -44621,30 +242056,37 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld4_lane_u8",
+    "name": "vld4q_lane_u8",
     "arguments": [
       "uint8_t const * ptr",
-      "uint8x8x4_t src",
+      "uint8x16x4_t src",
       "const int lane"
     ],
     "return_type": {
-      "value": "uint8x8x4_t"
+      "value": "uint8x16x4_t"
     },
     "Arguments_Preparation": {
       "lane": {
         "minimum": 0,
-        "maximum": 7
+        "maximum": 15
       },
       "ptr": {
         "register": "Xn"
       },
-      "src": {
-        "register": "Vt4.8B"
+      "src.val[0]": {
+        "register": "Vt.16B"
+      },
+      "src.val[1]": {
+        "register": "Vt2.16B"
+      },
+      "src.val[2]": {
+        "register": "Vt3.16B"
+      },
+      "src.val[3]": {
+        "register": "Vt4.16B"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
@@ -44655,12 +242097,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld4_p16",
+    "name": "vld4q_p16",
     "arguments": [
       "poly16_t const * ptr"
     ],
     "return_type": {
-      "value": "poly16x4x4_t"
+      "value": "poly16x8x4_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -44680,12 +242122,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld4_p64",
+    "name": "vld4q_p64",
     "arguments": [
       "poly64_t const * ptr"
     ],
     "return_type": {
-      "value": "poly64x1x4_t"
+      "value": "poly64x2x4_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -44693,23 +242135,22 @@
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "LD1"
+        "LD4"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld4_p8",
+    "name": "vld4q_p8",
     "arguments": [
       "poly8_t const * ptr"
     ],
     "return_type": {
-      "value": "poly8x8x4_t"
+      "value": "poly8x16x4_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -44729,12 +242170,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld4_s16",
+    "name": "vld4q_s16",
     "arguments": [
       "int16_t const * ptr"
     ],
     "return_type": {
-      "value": "int16x4x4_t"
+      "value": "int16x8x4_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -44754,12 +242195,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld4_s32",
+    "name": "vld4q_s32",
     "arguments": [
       "int32_t const * ptr"
     ],
     "return_type": {
-      "value": "int32x2x4_t"
+      "value": "int32x4x4_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -44779,12 +242220,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld4_s64",
+    "name": "vld4q_s64",
     "arguments": [
       "int64_t const * ptr"
     ],
     "return_type": {
-      "value": "int64x1x4_t"
+      "value": "int64x2x4_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -44792,24 +242233,22 @@
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "LD1"
+        "LD4"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld4_s8",
+    "name": "vld4q_s8",
     "arguments": [
       "int8_t const * ptr"
     ],
     "return_type": {
-      "value": "int8x8x4_t"
+      "value": "int8x16x4_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -44829,12 +242268,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld4_u16",
+    "name": "vld4q_u16",
     "arguments": [
       "uint16_t const * ptr"
     ],
     "return_type": {
-      "value": "uint16x4x4_t"
+      "value": "uint16x8x4_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -44854,12 +242293,12 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld4_u32",
+    "name": "vld4q_u32",
     "arguments": [
       "uint32_t const * ptr"
     ],
     "return_type": {
-      "value": "uint32x2x4_t"
+      "value": "uint32x4x4_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -44879,62 +242318,316 @@
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld4_u64",
+    "name": "vld4q_u64",
     "arguments": [
       "uint64_t const * ptr"
     ],
     "return_type": {
-      "value": "uint64x1x4_t"
+      "value": "uint64x2x4_t"
+    },
+    "Arguments_Preparation": {
+      "ptr": {
+        "register": "Xn"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD4"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vld4q_u8",
+    "arguments": [
+      "uint8_t const * ptr"
+    ],
+    "return_type": {
+      "value": "uint8x16x4_t"
+    },
+    "Arguments_Preparation": {
+      "ptr": {
+        "register": "Xn"
+      }
+    },
+    "Architectures": [
+      "v7",
+      "A32",
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LD4"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vldap1_lane_f64",
+    "arguments": [
+      "float64_t const * ptr",
+      "float64x1_t src",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "float64x1_t"
+    },
+    "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 0
+      },
+      "ptr": {
+        "register": "Xn"
+      },
+      "src": {
+        "register": "Vt.1D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDAP1"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vldap1_lane_p64",
+    "arguments": [
+      "poly64_t const * ptr",
+      "poly64x1_t src",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "poly64x1_t"
+    },
+    "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 0
+      },
+      "ptr": {
+        "register": "Xn"
+      },
+      "src": {
+        "register": "Vt.1D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDAP1"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vldap1_lane_s64",
+    "arguments": [
+      "int64_t const * ptr",
+      "int64x1_t src",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "int64x1_t"
+    },
+    "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 0
+      },
+      "ptr": {
+        "register": "Xn"
+      },
+      "src": {
+        "register": "Vt.1D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDAP1"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vldap1_lane_u64",
+    "arguments": [
+      "uint64_t const * ptr",
+      "uint64x1_t src",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "uint64x1_t"
+    },
+    "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 0
+      },
+      "ptr": {
+        "register": "Xn"
+      },
+      "src": {
+        "register": "Vt.1D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDAP1"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vldap1q_lane_f64",
+    "arguments": [
+      "float64_t const * ptr",
+      "float64x2_t src",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "float64x2_t"
+    },
+    "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "ptr": {
+        "register": "Xn"
+      },
+      "src": {
+        "register": "Vt.2D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDAP1"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vldap1q_lane_p64",
+    "arguments": [
+      "poly64_t const * ptr",
+      "poly64x2_t src",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "poly64x2_t"
+    },
+    "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "ptr": {
+        "register": "Xn"
+      },
+      "src": {
+        "register": "Vt.2D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LDAP1"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vldap1q_lane_s64",
+    "arguments": [
+      "int64_t const * ptr",
+      "int64x2_t src",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "int64x2_t"
     },
     "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 1
+      },
       "ptr": {
         "register": "Xn"
+      },
+      "src": {
+        "register": "Vt.2D"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "LD1"
+        "LDAP1"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld4_u8",
+    "name": "vldap1q_lane_u64",
     "arguments": [
-      "uint8_t const * ptr"
+      "uint64_t const * ptr",
+      "uint64x2_t src",
+      "const int lane"
     ],
     "return_type": {
-      "value": "uint8x8x4_t"
+      "value": "uint64x2_t"
     },
     "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 1
+      },
       "ptr": {
         "register": "Xn"
+      },
+      "src": {
+        "register": "Vt.2D"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "LD4"
+        "LDAP1"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld4q_dup_f16",
+    "name": "vldrq_p128",
     "arguments": [
-      "float16_t const * ptr"
+      "poly128_t const * ptr"
     ],
     "return_type": {
-      "value": "float16x8x4_t"
+      "value": "poly128_t"
     },
     "Arguments_Preparation": {
       "ptr": {
@@ -44942,53 +242635,68 @@
       }
     },
     "Architectures": [
-      "v7",
       "A32",
       "A64"
     ],
     "instructions": [
       [
-        "LD4R"
+        "LDR"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld4q_dup_f32",
+    "name": "vluti2_lane_f16",
     "arguments": [
-      "float32_t const * ptr"
+      "float16x4_t vn",
+      "uint8x8_t vm",
+      "const int index"
     ],
     "return_type": {
-      "value": "float32x4x4_t"
+      "value": "float16x8_t"
     },
     "Arguments_Preparation": {
-      "ptr": {
-        "register": "Xn"
+      "index": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "vm": {
+        "register": "Vm"
+      },
+      "vn": {
+        "register": "Vn.8H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "LD4R"
+        "LUTI2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld4q_dup_f64",
+    "name": "vluti2_lane_p16",
     "arguments": [
-      "float64_t const * ptr"
+      "poly16x4_t vn",
+      "uint8x8_t vm",
+      "const int index"
     ],
     "return_type": {
-      "value": "float64x2x4_t"
+      "value": "poly16x8_t"
     },
     "Arguments_Preparation": {
-      "ptr": {
-        "register": "Xn"
+      "index": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "vm": {
+        "register": "Vm"
+      },
+      "vn": {
+        "register": "Vn.8H"
       }
     },
     "Architectures": [
@@ -44996,47 +242704,63 @@
     ],
     "instructions": [
       [
-        "LD4R"
+        "LUTI2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld4q_dup_p16",
+    "name": "vluti2_lane_p8",
     "arguments": [
-      "poly16_t const * ptr"
+      "poly8x8_t vn",
+      "uint8x8_t vm",
+      "const int index"
     ],
     "return_type": {
-      "value": "poly16x8x4_t"
+      "value": "poly8x16_t"
     },
     "Arguments_Preparation": {
-      "ptr": {
-        "register": "Xn"
+      "index": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "vm": {
+        "register": "Vm"
+      },
+      "vn": {
+        "register": "Vn.16B"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "LD4R"
+        "LUTI2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld4q_dup_p64",
+    "name": "vluti2_lane_s16",
     "arguments": [
-      "poly64_t const * ptr"
+      "int16x4_t vn",
+      "uint8x8_t vm",
+      "const int index"
     ],
     "return_type": {
-      "value": "poly64x2x4_t"
+      "value": "int16x8_t"
     },
     "Arguments_Preparation": {
-      "ptr": {
-        "register": "Xn"
+      "index": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "vm": {
+        "register": "Vm"
+      },
+      "vn": {
+        "register": "Vn.8H"
       }
     },
     "Architectures": [
@@ -45044,97 +242768,127 @@
     ],
     "instructions": [
       [
-        "LD4R"
+        "LUTI2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld4q_dup_p8",
+    "name": "vluti2_lane_s8",
     "arguments": [
-      "poly8_t const * ptr"
+      "int8x8_t vn",
+      "uint8x8_t vm",
+      "const int index"
     ],
     "return_type": {
-      "value": "poly8x16x4_t"
+      "value": "int8x16_t"
     },
     "Arguments_Preparation": {
-      "ptr": {
-        "register": "Xn"
+      "index": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "vm": {
+        "register": "Vm"
+      },
+      "vn": {
+        "register": "Vn.16B"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "LD4R"
+        "LUTI2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld4q_dup_s16",
+    "name": "vluti2_lane_u16",
     "arguments": [
-      "int16_t const * ptr"
+      "uint16x4_t vn",
+      "uint8x8_t vm",
+      "const int index"
     ],
     "return_type": {
-      "value": "int16x8x4_t"
+      "value": "uint16x8_t"
     },
     "Arguments_Preparation": {
-      "ptr": {
-        "register": "Xn"
+      "index": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "vm": {
+        "register": "Vm"
+      },
+      "vn": {
+        "register": "Vn.8H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "LD4R"
+        "LUTI2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld4q_dup_s32",
+    "name": "vluti2_lane_u8",
     "arguments": [
-      "int32_t const * ptr"
+      "uint8x8_t vn",
+      "uint8x8_t vm",
+      "const int index"
     ],
     "return_type": {
-      "value": "int32x4x4_t"
+      "value": "uint8x16_t"
     },
     "Arguments_Preparation": {
-      "ptr": {
-        "register": "Xn"
+      "index": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "vm": {
+        "register": "Vm"
+      },
+      "vn": {
+        "register": "Vn.16B"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "LD4R"
+        "LUTI2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld4q_dup_s64",
+    "name": "vluti2_laneq_f16",
     "arguments": [
-      "int64_t const * ptr"
+      "float16x4_t vn",
+      "uint8x16_t vm",
+      "const int index"
     ],
     "return_type": {
-      "value": "int64x2x4_t"
+      "value": "float16x8_t"
     },
     "Arguments_Preparation": {
-      "ptr": {
-        "register": "Xn"
+      "index": {
+        "minimum": 0,
+        "maximum": 7
+      },
+      "vm": {
+        "register": "Vm"
+      },
+      "vn": {
+        "register": "Vn.8H"
       }
     },
     "Architectures": [
@@ -45142,97 +242896,127 @@
     ],
     "instructions": [
       [
-        "LD4R"
+        "LUTI2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld4q_dup_s8",
+    "name": "vluti2_laneq_p16",
     "arguments": [
-      "int8_t const * ptr"
+      "poly16x4_t vn",
+      "uint8x16_t vm",
+      "const int index"
     ],
     "return_type": {
-      "value": "int8x16x4_t"
+      "value": "poly16x8_t"
     },
     "Arguments_Preparation": {
-      "ptr": {
-        "register": "Xn"
+      "index": {
+        "minimum": 0,
+        "maximum": 7
+      },
+      "vm": {
+        "register": "Vm"
+      },
+      "vn": {
+        "register": "Vn.8H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "LD4R"
+        "LUTI2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld4q_dup_u16",
+    "name": "vluti2_laneq_p8",
     "arguments": [
-      "uint16_t const * ptr"
+      "poly8x8_t vn",
+      "uint8x16_t vm",
+      "const int index"
     ],
     "return_type": {
-      "value": "uint16x8x4_t"
+      "value": "poly8x16_t"
     },
     "Arguments_Preparation": {
-      "ptr": {
-        "register": "Xn"
+      "index": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "vm": {
+        "register": "Vm"
+      },
+      "vn": {
+        "register": "Vn.16B"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "LD4R"
+        "LUTI2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld4q_dup_u32",
+    "name": "vluti2_laneq_s16",
     "arguments": [
-      "uint32_t const * ptr"
+      "int16x4_t vn",
+      "uint8x16_t vm",
+      "const int index"
     ],
     "return_type": {
-      "value": "uint32x4x4_t"
+      "value": "int16x8_t"
     },
     "Arguments_Preparation": {
-      "ptr": {
-        "register": "Xn"
+      "index": {
+        "minimum": 0,
+        "maximum": 7
+      },
+      "vm": {
+        "register": "Vm"
+      },
+      "vn": {
+        "register": "Vn.8H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "LD4R"
+        "LUTI2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld4q_dup_u64",
+    "name": "vluti2_laneq_s8",
     "arguments": [
-      "uint64_t const * ptr"
+      "int8x8_t vn",
+      "uint8x16_t vm",
+      "const int index"
     ],
     "return_type": {
-      "value": "uint64x2x4_t"
+      "value": "int8x16_t"
     },
     "Arguments_Preparation": {
-      "ptr": {
-        "register": "Xn"
+      "index": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "vm": {
+        "register": "Vm"
+      },
+      "vn": {
+        "register": "Vn.16B"
       }
     },
     "Architectures": [
@@ -45240,97 +243024,127 @@
     ],
     "instructions": [
       [
-        "LD4R"
+        "LUTI2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld4q_dup_u8",
+    "name": "vluti2_laneq_u16",
     "arguments": [
-      "uint8_t const * ptr"
+      "uint16x4_t vn",
+      "uint8x16_t vm",
+      "const int index"
     ],
     "return_type": {
-      "value": "uint8x16x4_t"
+      "value": "uint16x8_t"
     },
     "Arguments_Preparation": {
-      "ptr": {
-        "register": "Xn"
+      "index": {
+        "minimum": 0,
+        "maximum": 7
+      },
+      "vm": {
+        "register": "Vm"
+      },
+      "vn": {
+        "register": "Vn.8H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "LD4R"
+        "LUTI2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld4q_f16",
+    "name": "vluti2_laneq_u8",
     "arguments": [
-      "float16_t const * ptr"
+      "uint8x8_t vn",
+      "uint8x16_t vm",
+      "const int index"
     ],
     "return_type": {
-      "value": "float16x8x4_t"
+      "value": "uint8x16_t"
     },
     "Arguments_Preparation": {
-      "ptr": {
-        "register": "Xn"
+      "index": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "vm": {
+        "register": "Vm"
+      },
+      "vn": {
+        "register": "Vn.16B"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "LD4"
+        "LUTI2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld4q_f32",
+    "name": "vluti2q_lane_f16",
     "arguments": [
-      "float32_t const * ptr"
+      "float16x8_t vn",
+      "uint8x8_t vm",
+      "const int index"
     ],
     "return_type": {
-      "value": "float32x4x4_t"
+      "value": "float16x8_t"
     },
     "Arguments_Preparation": {
-      "ptr": {
-        "register": "Xn"
+      "index": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "vm": {
+        "register": "Vm"
+      },
+      "vn": {
+        "register": "Vn.8H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "LD4"
+        "LUTI2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld4q_f64",
+    "name": "vluti2q_lane_p16",
     "arguments": [
-      "float64_t const * ptr"
+      "poly16x8_t vn",
+      "uint8x8_t vm",
+      "const int index"
     ],
     "return_type": {
-      "value": "float64x2x4_t"
+      "value": "poly16x8_t"
     },
     "Arguments_Preparation": {
-      "ptr": {
-        "register": "Xn"
+      "index": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "vm": {
+        "register": "Vm"
+      },
+      "vn": {
+        "register": "Vn.8H"
       }
     },
     "Architectures": [
@@ -45338,99 +243152,95 @@
     ],
     "instructions": [
       [
-        "LD4"
+        "LUTI2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld4q_lane_f16",
+    "name": "vluti2q_lane_p8",
     "arguments": [
-      "float16_t const * ptr",
-      "float16x8x4_t src",
-      "const int lane"
+      "poly8x16_t vn",
+      "uint8x8_t vm",
+      "const int index"
     ],
     "return_type": {
-      "value": "float16x8x4_t"
+      "value": "poly8x16_t"
     },
     "Arguments_Preparation": {
-      "lane": {
+      "index": {
         "minimum": 0,
-        "maximum": 7
+        "maximum": 1
       },
-      "ptr": {
-        "register": "Xn"
+      "vm": {
+        "register": "Vm"
       },
-      "src": {
-        "register": "Vt4.8H"
+      "vn": {
+        "register": "Vn.16B"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "LD4"
+        "LUTI2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld4q_lane_f32",
+    "name": "vluti2q_lane_s16",
     "arguments": [
-      "float32_t const * ptr",
-      "float32x4x4_t src",
-      "const int lane"
+      "int16x8_t vn",
+      "uint8x8_t vm",
+      "const int index"
     ],
     "return_type": {
-      "value": "float32x4x4_t"
+      "value": "int16x8_t"
     },
     "Arguments_Preparation": {
-      "lane": {
+      "index": {
         "minimum": 0,
         "maximum": 3
       },
-      "ptr": {
-        "register": "Xn"
+      "vm": {
+        "register": "Vm"
       },
-      "src": {
-        "register": "Vt4.4S"
+      "vn": {
+        "register": "Vn.8H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "LD4"
+        "LUTI2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld4q_lane_f64",
+    "name": "vluti2q_lane_s8",
     "arguments": [
-      "float64_t const * ptr",
-      "float64x2x4_t src",
-      "const int lane"
+      "int8x16_t vn",
+      "uint8x8_t vm",
+      "const int index"
     ],
     "return_type": {
-      "value": "float64x2x4_t"
+      "value": "int8x16_t"
     },
     "Arguments_Preparation": {
-      "lane": {
+      "index": {
         "minimum": 0,
         "maximum": 1
       },
-      "ptr": {
-        "register": "Xn"
+      "vm": {
+        "register": "Vm"
       },
-      "src": {
-        "register": "Vt4.2D"
+      "vn": {
+        "register": "Vn.16B"
       }
     },
     "Architectures": [
@@ -45438,65 +243248,63 @@
     ],
     "instructions": [
       [
-        "LD4"
+        "LUTI2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld4q_lane_p16",
+    "name": "vluti2q_lane_u16",
     "arguments": [
-      "poly16_t const * ptr",
-      "poly16x8x4_t src",
-      "const int lane"
+      "uint16x8_t vn",
+      "uint8x8_t vm",
+      "const int index"
     ],
     "return_type": {
-      "value": "poly16x8x4_t"
+      "value": "uint16x8_t"
     },
     "Arguments_Preparation": {
-      "lane": {
+      "index": {
         "minimum": 0,
-        "maximum": 7
+        "maximum": 3
       },
-      "ptr": {
-        "register": "Xn"
+      "vm": {
+        "register": "Vm"
       },
-      "src": {
-        "register": "Vt4.8H"
+      "vn": {
+        "register": "Vn.8H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "LD4"
+        "LUTI2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld4q_lane_p64",
+    "name": "vluti2q_lane_u8",
     "arguments": [
-      "poly64_t const * ptr",
-      "poly64x2x4_t src",
-      "const int lane"
+      "uint8x16_t vn",
+      "uint8x8_t vm",
+      "const int index"
     ],
     "return_type": {
-      "value": "poly64x2x4_t"
+      "value": "uint8x16_t"
     },
     "Arguments_Preparation": {
-      "lane": {
+      "index": {
         "minimum": 0,
         "maximum": 1
       },
-      "ptr": {
-        "register": "Xn"
+      "vm": {
+        "register": "Vm"
       },
-      "src": {
-        "register": "Vt4.2D"
+      "vn": {
+        "register": "Vn.16B"
       }
     },
     "Architectures": [
@@ -45504,31 +243312,31 @@
     ],
     "instructions": [
       [
-        "LD4"
+        "LUTI2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld4q_lane_p8",
+    "name": "vluti2q_laneq_f16",
     "arguments": [
-      "poly8_t const * ptr",
-      "poly8x16x4_t src",
-      "const int lane"
+      "float16x8_t vn",
+      "uint8x16_t vm",
+      "const int index"
     ],
     "return_type": {
-      "value": "poly8x16x4_t"
+      "value": "float16x8_t"
     },
     "Arguments_Preparation": {
-      "lane": {
+      "index": {
         "minimum": 0,
-        "maximum": 15
+        "maximum": 7
       },
-      "ptr": {
-        "register": "Xn"
+      "vm": {
+        "register": "Vm"
       },
-      "src": {
-        "register": "Vt4.16B"
+      "vn": {
+        "register": "Vn.8H"
       }
     },
     "Architectures": [
@@ -45536,99 +243344,95 @@
     ],
     "instructions": [
       [
-        "LD4"
+        "LUTI2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld4q_lane_s16",
+    "name": "vluti2q_laneq_p16",
     "arguments": [
-      "int16_t const * ptr",
-      "int16x8x4_t src",
-      "const int lane"
+      "poly16x8_t vn",
+      "uint8x16_t vm",
+      "const int index"
     ],
     "return_type": {
-      "value": "int16x8x4_t"
+      "value": "poly16x8_t"
     },
     "Arguments_Preparation": {
-      "lane": {
+      "index": {
         "minimum": 0,
         "maximum": 7
       },
-      "ptr": {
-        "register": "Xn"
+      "vm": {
+        "register": "Vm"
       },
-      "src": {
-        "register": "Vt4.8H"
+      "vn": {
+        "register": "Vn.8H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "LD4"
+        "LUTI2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld4q_lane_s32",
+    "name": "vluti2q_laneq_p8",
     "arguments": [
-      "int32_t const * ptr",
-      "int32x4x4_t src",
-      "const int lane"
+      "poly8x16_t vn",
+      "uint8x16_t vm",
+      "const int index"
     ],
     "return_type": {
-      "value": "int32x4x4_t"
+      "value": "poly8x16_t"
     },
     "Arguments_Preparation": {
-      "lane": {
+      "index": {
         "minimum": 0,
         "maximum": 3
       },
-      "ptr": {
-        "register": "Xn"
+      "vm": {
+        "register": "Vm"
       },
-      "src": {
-        "register": "Vt4.4S"
+      "vn": {
+        "register": "Vn.16B"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "LD4"
+        "LUTI2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld4q_lane_s64",
+    "name": "vluti2q_laneq_s16",
     "arguments": [
-      "int64_t const * ptr",
-      "int64x2x4_t src",
-      "const int lane"
+      "int16x8_t vn",
+      "uint8x16_t vm",
+      "const int index"
     ],
     "return_type": {
-      "value": "int64x2x4_t"
+      "value": "int16x8_t"
     },
     "Arguments_Preparation": {
-      "lane": {
+      "index": {
         "minimum": 0,
-        "maximum": 1
+        "maximum": 7
       },
-      "ptr": {
-        "register": "Xn"
+      "vm": {
+        "register": "Vm"
       },
-      "src": {
-        "register": "Vt4.2D"
+      "vn": {
+        "register": "Vn.8H"
       }
     },
     "Architectures": [
@@ -45636,31 +243440,31 @@
     ],
     "instructions": [
       [
-        "LD4"
+        "LUTI2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld4q_lane_s8",
+    "name": "vluti2q_laneq_s8",
     "arguments": [
-      "int8_t const * ptr",
-      "int8x16x4_t src",
-      "const int lane"
+      "int8x16_t vn",
+      "uint8x16_t vm",
+      "const int index"
     ],
     "return_type": {
-      "value": "int8x16x4_t"
+      "value": "int8x16_t"
     },
     "Arguments_Preparation": {
-      "lane": {
+      "index": {
         "minimum": 0,
-        "maximum": 15
+        "maximum": 3
       },
-      "ptr": {
-        "register": "Xn"
+      "vm": {
+        "register": "Vm"
       },
-      "src": {
-        "register": "Vt4.16B"
+      "vn": {
+        "register": "Vn.16B"
       }
     },
     "Architectures": [
@@ -45668,99 +243472,98 @@
     ],
     "instructions": [
       [
-        "LD4"
+        "LUTI2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld4q_lane_u16",
+    "name": "vluti2q_laneq_u16",
     "arguments": [
-      "uint16_t const * ptr",
-      "uint16x8x4_t src",
-      "const int lane"
+      "uint16x8_t vn",
+      "uint8x16_t vm",
+      "const int index"
     ],
     "return_type": {
-      "value": "uint16x8x4_t"
+      "value": "uint16x8_t"
     },
     "Arguments_Preparation": {
-      "lane": {
+      "index": {
         "minimum": 0,
         "maximum": 7
       },
-      "ptr": {
-        "register": "Xn"
+      "vm": {
+        "register": "Vm"
       },
-      "src": {
-        "register": "Vt4.8H"
+      "vn": {
+        "register": "Vn.8H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "LD4"
+        "LUTI2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld4q_lane_u32",
+    "name": "vluti2q_laneq_u8",
     "arguments": [
-      "uint32_t const * ptr",
-      "uint32x4x4_t src",
-      "const int lane"
+      "uint8x16_t vn",
+      "uint8x16_t vm",
+      "const int index"
     ],
     "return_type": {
-      "value": "uint32x4x4_t"
+      "value": "uint8x16_t"
     },
     "Arguments_Preparation": {
-      "lane": {
+      "index": {
         "minimum": 0,
         "maximum": 3
       },
-      "ptr": {
-        "register": "Xn"
+      "vm": {
+        "register": "Vm"
       },
-      "src": {
-        "register": "Vt4.4S"
+      "vn": {
+        "register": "Vn.16B"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "LD4"
+        "LUTI2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld4q_lane_u64",
+    "name": "vluti4q_lane_f16_x2",
     "arguments": [
-      "uint64_t const * ptr",
-      "uint64x2x4_t src",
-      "const int lane"
+      "float16x8x2_t vn",
+      "uint8x8_t vm",
+      "const int index"
     ],
     "return_type": {
-      "value": "uint64x2x4_t"
+      "value": "float16x8_t"
     },
     "Arguments_Preparation": {
-      "lane": {
+      "index": {
         "minimum": 0,
         "maximum": 1
       },
-      "ptr": {
-        "register": "Xn"
+      "vm": {
+        "register": "Vm"
       },
-      "src": {
-        "register": "Vt4.2D"
+      "vn.val[0]": {
+        "register": "Vn1.8H"
+      },
+      "vn.val[1]": {
+        "register": "Vn2.8H"
       }
     },
     "Architectures": [
@@ -45768,31 +243571,34 @@
     ],
     "instructions": [
       [
-        "LD4"
+        "LUTI4"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld4q_lane_u8",
+    "name": "vluti4q_lane_p16_x2",
     "arguments": [
-      "uint8_t const * ptr",
-      "uint8x16x4_t src",
-      "const int lane"
+      "poly16x8x2_t vn",
+      "uint8x8_t vm",
+      "const int index"
     ],
     "return_type": {
-      "value": "uint8x16x4_t"
+      "value": "poly16x8_t"
     },
     "Arguments_Preparation": {
-      "lane": {
+      "index": {
         "minimum": 0,
-        "maximum": 15
+        "maximum": 1
       },
-      "ptr": {
-        "register": "Xn"
+      "vm": {
+        "register": "Vm"
       },
-      "src": {
-        "register": "Vt4.16B"
+      "vn.val[0]": {
+        "register": "Vn1.8H"
+      },
+      "vn.val[1]": {
+        "register": "Vn2.8H"
       }
     },
     "Architectures": [
@@ -45800,47 +243606,66 @@
     ],
     "instructions": [
       [
-        "LD4"
+        "LUTI4"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld4q_p16",
+    "name": "vluti4q_lane_p8",
     "arguments": [
-      "poly16_t const * ptr"
+      "poly8x16_t vn",
+      "uint8x8_t vm",
+      "const int index"
     ],
     "return_type": {
-      "value": "poly16x8x4_t"
+      "value": "poly8x16_t"
     },
     "Arguments_Preparation": {
-      "ptr": {
-        "register": "Xn"
+      "index": {
+        "minimum": 0,
+        "maximum": 0
+      },
+      "vm": {
+        "register": "Vm"
+      },
+      "vn": {
+        "register": "Vn.16B"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "LD4"
+        "LUTI4"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld4q_p64",
+    "name": "vluti4q_lane_s16_x2",
     "arguments": [
-      "poly64_t const * ptr"
+      "int16x8x2_t vn",
+      "uint8x8_t vm",
+      "const int index"
     ],
     "return_type": {
-      "value": "poly64x2x4_t"
+      "value": "int16x8_t"
     },
     "Arguments_Preparation": {
-      "ptr": {
-        "register": "Xn"
+      "index": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "vm": {
+        "register": "Vm"
+      },
+      "vn.val[0]": {
+        "register": "Vn1.8H"
+      },
+      "vn.val[1]": {
+        "register": "Vn2.8H"
       }
     },
     "Architectures": [
@@ -45848,97 +243673,133 @@
     ],
     "instructions": [
       [
-        "LD4"
+        "LUTI4"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld4q_p8",
+    "name": "vluti4q_lane_s8",
     "arguments": [
-      "poly8_t const * ptr"
+      "int8x16_t vn",
+      "uint8x8_t vm",
+      "const int index"
     ],
     "return_type": {
-      "value": "poly8x16x4_t"
+      "value": "int8x16_t"
     },
     "Arguments_Preparation": {
-      "ptr": {
-        "register": "Xn"
+      "index": {
+        "minimum": 0,
+        "maximum": 0
+      },
+      "vm": {
+        "register": "Vm"
+      },
+      "vn": {
+        "register": "Vn.16B"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "LD4"
+        "LUTI4"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld4q_s16",
+    "name": "vluti4q_lane_u16_x2",
     "arguments": [
-      "int16_t const * ptr"
+      "uint16x8x2_t vn",
+      "uint8x8_t vm",
+      "const int index"
     ],
     "return_type": {
-      "value": "int16x8x4_t"
+      "value": "uint16x8_t"
     },
     "Arguments_Preparation": {
-      "ptr": {
-        "register": "Xn"
+      "index": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "vm": {
+        "register": "Vm"
+      },
+      "vn.val[0]": {
+        "register": "Vn1.8H"
+      },
+      "vn.val[1]": {
+        "register": "Vn2.8H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "LD4"
+        "LUTI4"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld4q_s32",
+    "name": "vluti4q_lane_u8",
     "arguments": [
-      "int32_t const * ptr"
+      "uint8x16_t vn",
+      "uint8x8_t vm",
+      "const int index"
     ],
     "return_type": {
-      "value": "int32x4x4_t"
+      "value": "uint8x16_t"
     },
     "Arguments_Preparation": {
-      "ptr": {
-        "register": "Xn"
+      "index": {
+        "minimum": 0,
+        "maximum": 0
+      },
+      "vm": {
+        "register": "Vm"
+      },
+      "vn": {
+        "register": "Vn.16B"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "LD4"
+        "LUTI4"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld4q_s64",
+    "name": "vluti4q_laneq_f16_x2",
     "arguments": [
-      "int64_t const * ptr"
+      "float16x8x2_t vn",
+      "uint8x16_t vm",
+      "const int index"
     ],
     "return_type": {
-      "value": "int64x2x4_t"
+      "value": "float16x8_t"
     },
     "Arguments_Preparation": {
-      "ptr": {
-        "register": "Xn"
+      "index": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "vm": {
+        "register": "Vm"
+      },
+      "vn.val[0]": {
+        "register": "Vn1.8H"
+      },
+      "vn.val[1]": {
+        "register": "Vn2.8H"
       }
     },
     "Architectures": [
@@ -45946,97 +243807,133 @@
     ],
     "instructions": [
       [
-        "LD4"
+        "LUTI4"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld4q_s8",
+    "name": "vluti4q_laneq_p16_x2",
     "arguments": [
-      "int8_t const * ptr"
+      "poly16x8x2_t vn",
+      "uint8x16_t vm",
+      "const int index"
     ],
     "return_type": {
-      "value": "int8x16x4_t"
+      "value": "poly16x8_t"
     },
     "Arguments_Preparation": {
-      "ptr": {
-        "register": "Xn"
+      "index": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "vm": {
+        "register": "Vm"
+      },
+      "vn.val[0]": {
+        "register": "Vn1.8H"
+      },
+      "vn.val[1]": {
+        "register": "Vn2.8H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "LD4"
+        "LUTI4"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld4q_u16",
+    "name": "vluti4q_laneq_p8",
     "arguments": [
-      "uint16_t const * ptr"
+      "poly8x16_t vn",
+      "uint8x16_t vm",
+      "const int index"
     ],
     "return_type": {
-      "value": "uint16x8x4_t"
+      "value": "poly8x16_t"
     },
     "Arguments_Preparation": {
-      "ptr": {
-        "register": "Xn"
+      "index": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "vm": {
+        "register": "Vm"
+      },
+      "vn": {
+        "register": "Vn.16B"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "LD4"
+        "LUTI4"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld4q_u32",
+    "name": "vluti4q_laneq_s16_x2",
     "arguments": [
-      "uint32_t const * ptr"
+      "int16x8x2_t vn",
+      "uint8x16_t vm",
+      "const int index"
     ],
     "return_type": {
-      "value": "uint32x4x4_t"
+      "value": "int16x8_t"
     },
     "Arguments_Preparation": {
-      "ptr": {
-        "register": "Xn"
+      "index": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "vm": {
+        "register": "Vm"
+      },
+      "vn.val[0]": {
+        "register": "Vn1.8H"
+      },
+      "vn.val[1]": {
+        "register": "Vn2.8H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "LD4"
+        "LUTI4"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld4q_u64",
+    "name": "vluti4q_laneq_s8",
     "arguments": [
-      "uint64_t const * ptr"
+      "int8x16_t vn",
+      "uint8x16_t vm",
+      "const int index"
     ],
     "return_type": {
-      "value": "uint64x2x4_t"
+      "value": "int8x16_t"
     },
     "Arguments_Preparation": {
-      "ptr": {
-        "register": "Xn"
+      "index": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "vm": {
+        "register": "Vm"
+      },
+      "vn": {
+        "register": "Vn.16B"
       }
     },
     "Architectures": [
@@ -46044,56 +243941,74 @@
     ],
     "instructions": [
       [
-        "LD4"
+        "LUTI4"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vld4q_u8",
+    "name": "vluti4q_laneq_u16_x2",
     "arguments": [
-      "uint8_t const * ptr"
+      "uint16x8x2_t vn",
+      "uint8x16_t vm",
+      "const int index"
     ],
     "return_type": {
-      "value": "uint8x16x4_t"
+      "value": "uint16x8_t"
     },
     "Arguments_Preparation": {
-      "ptr": {
-        "register": "Xn"
+      "index": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "vm": {
+        "register": "Vm"
+      },
+      "vn.val[0]": {
+        "register": "Vn1.8H"
+      },
+      "vn.val[1]": {
+        "register": "Vn2.8H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "LD4"
+        "LUTI4"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vldrq_p128",
+    "name": "vluti4q_laneq_u8",
     "arguments": [
-      "poly128_t const * ptr"
+      "uint8x16_t vn",
+      "uint8x16_t vm",
+      "const int index"
     ],
     "return_type": {
-      "value": "poly128_t"
+      "value": "uint8x16_t"
     },
     "Arguments_Preparation": {
-      "ptr": {
-        "register": "Xn"
+      "index": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "vm": {
+        "register": "Vm"
+      },
+      "vn": {
+        "register": "Vn.16B"
       }
     },
     "Architectures": [
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "LDR"
+        "LUTI4"
       ]
     ]
   },
@@ -47029,7 +244944,7 @@
     },
     "Arguments_Preparation": {
       "a": {
-        "register": "Vn.2S"
+        "register": "Vm.2S"
       }
     },
     "Architectures": [
@@ -47098,7 +245013,7 @@
     },
     "Arguments_Preparation": {
       "a": {
-        "register": "Vn.2S"
+        "register": "Vm.2S"
       }
     },
     "Architectures": [
@@ -48272,7 +246187,7 @@
     },
     "Arguments_Preparation": {
       "a": {
-        "register": "Vn.2S"
+        "register": "Vm.2S"
       }
     },
     "Architectures": [
@@ -48341,7 +246256,7 @@
     },
     "Arguments_Preparation": {
       "a": {
-        "register": "Vn.2S"
+        "register": "Vm.2S"
       }
     },
     "Architectures": [
@@ -48612,7 +246527,7 @@
     ],
     "instructions": [
       [
-        "RESULT[I]"
+        "result"
       ]
     ]
   },
@@ -48643,7 +246558,7 @@
     ],
     "instructions": [
       [
-        "RESULT[I]"
+        "result"
       ]
     ]
   },
@@ -48660,13 +246575,10 @@
       "value": "float32x2_t"
     },
     "Arguments_Preparation": {
-      "a": {},
-      "b": {},
       "lane": {
         "minimum": 0,
         "maximum": 1
-      },
-      "v": {}
+      }
     },
     "Architectures": [
       "v7",
@@ -48675,7 +246587,7 @@
     ],
     "instructions": [
       [
-        "RESULT[I]"
+        "result"
       ]
     ]
   },
@@ -48844,20 +246756,17 @@
       "value": "float32x2_t"
     },
     "Arguments_Preparation": {
-      "a": {},
-      "b": {},
       "lane": {
         "minimum": 0,
         "maximum": 3
-      },
-      "v": {}
+      }
     },
     "Architectures": [
       "A64"
     ],
     "instructions": [
       [
-        "RESULT[I]"
+        "result"
       ]
     ]
   },
@@ -49034,7 +246943,7 @@
     ],
     "instructions": [
       [
-        "RESULT[I]"
+        "result"
       ]
     ]
   },
@@ -50621,7 +248530,7 @@
     ],
     "instructions": [
       [
-        "RESULT[I]"
+        "result"
       ]
     ]
   },
@@ -50652,7 +248561,7 @@
     ],
     "instructions": [
       [
-        "RESULT[I]"
+        "result"
       ]
     ]
   },
@@ -50669,13 +248578,10 @@
       "value": "float32x4_t"
     },
     "Arguments_Preparation": {
-      "a": {},
-      "b": {},
       "lane": {
         "minimum": 0,
         "maximum": 1
-      },
-      "v": {}
+      }
     },
     "Architectures": [
       "v7",
@@ -50684,7 +248590,7 @@
     ],
     "instructions": [
       [
-        "RESULT[I]"
+        "result"
       ]
     ]
   },
@@ -50853,20 +248759,17 @@
       "value": "float32x4_t"
     },
     "Arguments_Preparation": {
-      "a": {},
-      "b": {},
       "lane": {
         "minimum": 0,
         "maximum": 3
-      },
-      "v": {}
+      }
     },
     "Architectures": [
       "A64"
     ],
     "instructions": [
       [
-        "RESULT[I]"
+        "result"
       ]
     ]
   },
@@ -51043,7 +248946,7 @@
     ],
     "instructions": [
       [
-        "RESULT[I]"
+        "result"
       ]
     ]
   },
@@ -51406,7 +249309,7 @@
     ],
     "instructions": [
       [
-        "RESULT[I]"
+        "result"
       ]
     ]
   },
@@ -51437,7 +249340,7 @@
     ],
     "instructions": [
       [
-        "RESULT[I]"
+        "result"
       ]
     ]
   },
@@ -51454,13 +249357,10 @@
       "value": "float32x2_t"
     },
     "Arguments_Preparation": {
-      "a": {},
-      "b": {},
       "lane": {
         "minimum": 0,
         "maximum": 1
-      },
-      "v": {}
+      }
     },
     "Architectures": [
       "v7",
@@ -51469,7 +249369,7 @@
     ],
     "instructions": [
       [
-        "RESULT[I]"
+        "result"
       ]
     ]
   },
@@ -51638,20 +249538,17 @@
       "value": "float32x2_t"
     },
     "Arguments_Preparation": {
-      "a": {},
-      "b": {},
       "lane": {
         "minimum": 0,
         "maximum": 3
-      },
-      "v": {}
+      }
     },
     "Architectures": [
       "A64"
     ],
     "instructions": [
       [
-        "RESULT[I]"
+        "result"
       ]
     ]
   },
@@ -51828,7 +249725,7 @@
     ],
     "instructions": [
       [
-        "RESULT[I]"
+        "result"
       ]
     ]
   },
@@ -53415,7 +251312,7 @@
     ],
     "instructions": [
       [
-        "RESULT[I]"
+        "result"
       ]
     ]
   },
@@ -53446,7 +251343,7 @@
     ],
     "instructions": [
       [
-        "RESULT[I]"
+        "result"
       ]
     ]
   },
@@ -53463,13 +251360,10 @@
       "value": "float32x4_t"
     },
     "Arguments_Preparation": {
-      "a": {},
-      "b": {},
       "lane": {
         "minimum": 0,
         "maximum": 1
-      },
-      "v": {}
+      }
     },
     "Architectures": [
       "v7",
@@ -53478,7 +251372,7 @@
     ],
     "instructions": [
       [
-        "RESULT[I]"
+        "result"
       ]
     ]
   },
@@ -53647,20 +251541,17 @@
       "value": "float32x4_t"
     },
     "Arguments_Preparation": {
-      "a": {},
-      "b": {},
       "lane": {
         "minimum": 0,
         "maximum": 3
-      },
-      "v": {}
+      }
     },
     "Architectures": [
       "A64"
     ],
     "instructions": [
       [
-        "RESULT[I]"
+        "result"
       ]
     ]
   },
@@ -53837,7 +251728,7 @@
     ],
     "instructions": [
       [
-        "RESULT[I]"
+        "result"
       ]
     ]
   },
@@ -75594,8 +273485,11 @@
       "idx": {
         "register": "Vm.8B"
       },
-      "t": {
+      "t.val[0]": {
         "register": "Vn.16B"
+      },
+      "t.val[1]": {
+        "register": "Vn+1.16B"
       }
     },
     "Architectures": [
@@ -75621,8 +273515,11 @@
       "idx": {
         "register": "Vm.8B"
       },
-      "t": {
+      "t.val[0]": {
         "register": "Vn.16B"
+      },
+      "t.val[1]": {
+        "register": "Vn+1.16B"
       }
     },
     "Architectures": [
@@ -75648,8 +273545,11 @@
       "idx": {
         "register": "Vm.8B"
       },
-      "t": {
+      "t.val[0]": {
         "register": "Vn.16B"
+      },
+      "t.val[1]": {
+        "register": "Vn+1.16B"
       }
     },
     "Architectures": [
@@ -75675,8 +273575,11 @@
       "idx": {
         "register": "Vm.16B"
       },
-      "t": {
+      "t.val[0]": {
         "register": "Vn.16B"
+      },
+      "t.val[1]": {
+        "register": "Vn+1.16B"
       }
     },
     "Architectures": [
@@ -75702,8 +273605,11 @@
       "idx": {
         "register": "Vm.16B"
       },
-      "t": {
+      "t.val[0]": {
         "register": "Vn.16B"
+      },
+      "t.val[1]": {
+        "register": "Vn+1.16B"
       }
     },
     "Architectures": [
@@ -75729,8 +273635,11 @@
       "idx": {
         "register": "Vm.16B"
       },
-      "t": {
+      "t.val[0]": {
         "register": "Vn.16B"
+      },
+      "t.val[1]": {
+        "register": "Vn+1.16B"
       }
     },
     "Architectures": [
@@ -75756,8 +273665,14 @@
       "idx": {
         "register": "Vm.8B"
       },
-      "t": {
+      "t.val[0]": {
         "register": "Vn.16B"
+      },
+      "t.val[1]": {
+        "register": "Vn+1.16B"
+      },
+      "t.val[2]": {
+        "register": "Vn+2.16B"
       }
     },
     "Architectures": [
@@ -75783,8 +273698,14 @@
       "idx": {
         "register": "Vm.8B"
       },
-      "t": {
+      "t.val[0]": {
         "register": "Vn.16B"
+      },
+      "t.val[1]": {
+        "register": "Vn+1.16B"
+      },
+      "t.val[2]": {
+        "register": "Vn+2.16B"
       }
     },
     "Architectures": [
@@ -75810,8 +273731,14 @@
       "idx": {
         "register": "Vm.8B"
       },
-      "t": {
+      "t.val[0]": {
         "register": "Vn.16B"
+      },
+      "t.val[1]": {
+        "register": "Vn+1.16B"
+      },
+      "t.val[2]": {
+        "register": "Vn+2.16B"
       }
     },
     "Architectures": [
@@ -75837,8 +273764,14 @@
       "idx": {
         "register": "Vm.16B"
       },
-      "t": {
+      "t.val[0]": {
         "register": "Vn.16B"
+      },
+      "t.val[1]": {
+        "register": "Vn+1.16B"
+      },
+      "t.val[2]": {
+        "register": "Vn+2.16B"
       }
     },
     "Architectures": [
@@ -75864,8 +273797,14 @@
       "idx": {
         "register": "Vm.16B"
       },
-      "t": {
+      "t.val[0]": {
         "register": "Vn.16B"
+      },
+      "t.val[1]": {
+        "register": "Vn+1.16B"
+      },
+      "t.val[2]": {
+        "register": "Vn+2.16B"
       }
     },
     "Architectures": [
@@ -75891,8 +273830,14 @@
       "idx": {
         "register": "Vm.16B"
       },
-      "t": {
+      "t.val[0]": {
         "register": "Vn.16B"
+      },
+      "t.val[1]": {
+        "register": "Vn+1.16B"
+      },
+      "t.val[2]": {
+        "register": "Vn+2.16B"
       }
     },
     "Architectures": [
@@ -75918,8 +273863,17 @@
       "idx": {
         "register": "Vm.8B"
       },
-      "t": {
+      "t.val[0]": {
         "register": "Vn.16B"
+      },
+      "t.val[1]": {
+        "register": "Vn+1.16B"
+      },
+      "t.val[2]": {
+        "register": "Vn+2.16B"
+      },
+      "t.val[3]": {
+        "register": "Vn+3.16B"
       }
     },
     "Architectures": [
@@ -75945,8 +273899,17 @@
       "idx": {
         "register": "Vm.8B"
       },
-      "t": {
+      "t.val[0]": {
         "register": "Vn.16B"
+      },
+      "t.val[1]": {
+        "register": "Vn+1.16B"
+      },
+      "t.val[2]": {
+        "register": "Vn+2.16B"
+      },
+      "t.val[3]": {
+        "register": "Vn+3.16B"
       }
     },
     "Architectures": [
@@ -75972,8 +273935,17 @@
       "idx": {
         "register": "Vm.8B"
       },
-      "t": {
+      "t.val[0]": {
         "register": "Vn.16B"
+      },
+      "t.val[1]": {
+        "register": "Vn+1.16B"
+      },
+      "t.val[2]": {
+        "register": "Vn+2.16B"
+      },
+      "t.val[3]": {
+        "register": "Vn+3.16B"
       }
     },
     "Architectures": [
@@ -75999,8 +273971,17 @@
       "idx": {
         "register": "Vm.16B"
       },
-      "t": {
+      "t.val[0]": {
         "register": "Vn.16B"
+      },
+      "t.val[1]": {
+        "register": "Vn+1.16B"
+      },
+      "t.val[2]": {
+        "register": "Vn+2.16B"
+      },
+      "t.val[3]": {
+        "register": "Vn+3.16B"
       }
     },
     "Architectures": [
@@ -76026,8 +274007,17 @@
       "idx": {
         "register": "Vm.16B"
       },
-      "t": {
+      "t.val[0]": {
         "register": "Vn.16B"
+      },
+      "t.val[1]": {
+        "register": "Vn+1.16B"
+      },
+      "t.val[2]": {
+        "register": "Vn+2.16B"
+      },
+      "t.val[3]": {
+        "register": "Vn+3.16B"
       }
     },
     "Architectures": [
@@ -76053,8 +274043,17 @@
       "idx": {
         "register": "Vm.16B"
       },
-      "t": {
+      "t.val[0]": {
         "register": "Vn.16B"
+      },
+      "t.val[1]": {
+        "register": "Vn+1.16B"
+      },
+      "t.val[2]": {
+        "register": "Vn+2.16B"
+      },
+      "t.val[3]": {
+        "register": "Vn+3.16B"
       }
     },
     "Architectures": [
@@ -76270,8 +274269,11 @@
       "idx": {
         "register": "Vm.8B"
       },
-      "t": {
+      "t.val[0]": {
         "register": "Vn.16B"
+      },
+      "t.val[1]": {
+        "register": "Vn+1.16B"
       }
     },
     "Architectures": [
@@ -76301,8 +274303,11 @@
       "idx": {
         "register": "Vm.8B"
       },
-      "t": {
+      "t.val[0]": {
         "register": "Vn.16B"
+      },
+      "t.val[1]": {
+        "register": "Vn+1.16B"
       }
     },
     "Architectures": [
@@ -76332,8 +274337,11 @@
       "idx": {
         "register": "Vm.8B"
       },
-      "t": {
+      "t.val[0]": {
         "register": "Vn.16B"
+      },
+      "t.val[1]": {
+        "register": "Vn+1.16B"
       }
     },
     "Architectures": [
@@ -76363,8 +274371,11 @@
       "idx": {
         "register": "Vm.16B"
       },
-      "t": {
+      "t.val[0]": {
         "register": "Vn.16B"
+      },
+      "t.val[1]": {
+        "register": "Vn+1.16B"
       }
     },
     "Architectures": [
@@ -76394,8 +274405,11 @@
       "idx": {
         "register": "Vm.16B"
       },
-      "t": {
+      "t.val[0]": {
         "register": "Vn.16B"
+      },
+      "t.val[1]": {
+        "register": "Vn+1.16B"
       }
     },
     "Architectures": [
@@ -76425,8 +274439,11 @@
       "idx": {
         "register": "Vm.16B"
       },
-      "t": {
+      "t.val[0]": {
         "register": "Vn.16B"
+      },
+      "t.val[1]": {
+        "register": "Vn+1.16B"
       }
     },
     "Architectures": [
@@ -76456,8 +274473,14 @@
       "idx": {
         "register": "Vm.8B"
       },
-      "t": {
+      "t.val[0]": {
         "register": "Vn.16B"
+      },
+      "t.val[1]": {
+        "register": "Vn+1.16B"
+      },
+      "t.val[2]": {
+        "register": "Vn+2.16B"
       }
     },
     "Architectures": [
@@ -76487,8 +274510,14 @@
       "idx": {
         "register": "Vm.8B"
       },
-      "t": {
+      "t.val[0]": {
         "register": "Vn.16B"
+      },
+      "t.val[1]": {
+        "register": "Vn+1.16B"
+      },
+      "t.val[2]": {
+        "register": "Vn+2.16B"
       }
     },
     "Architectures": [
@@ -76518,8 +274547,14 @@
       "idx": {
         "register": "Vm.8B"
       },
-      "t": {
+      "t.val[0]": {
         "register": "Vn.16B"
+      },
+      "t.val[1]": {
+        "register": "Vn+1.16B"
+      },
+      "t.val[2]": {
+        "register": "Vn+2.16B"
       }
     },
     "Architectures": [
@@ -76549,8 +274584,14 @@
       "idx": {
         "register": "Vm.16B"
       },
-      "t": {
+      "t.val[0]": {
         "register": "Vn.16B"
+      },
+      "t.val[1]": {
+        "register": "Vn+1.16B"
+      },
+      "t.val[2]": {
+        "register": "Vn+2.16B"
       }
     },
     "Architectures": [
@@ -76580,8 +274621,14 @@
       "idx": {
         "register": "Vm.16B"
       },
-      "t": {
+      "t.val[0]": {
         "register": "Vn.16B"
+      },
+      "t.val[1]": {
+        "register": "Vn+1.16B"
+      },
+      "t.val[2]": {
+        "register": "Vn+2.16B"
       }
     },
     "Architectures": [
@@ -76611,8 +274658,14 @@
       "idx": {
         "register": "Vm.16B"
       },
-      "t": {
+      "t.val[0]": {
         "register": "Vn.16B"
+      },
+      "t.val[1]": {
+        "register": "Vn+1.16B"
+      },
+      "t.val[2]": {
+        "register": "Vn+2.16B"
       }
     },
     "Architectures": [
@@ -76642,8 +274695,17 @@
       "idx": {
         "register": "Vm.8B"
       },
-      "t": {
+      "t.val[0]": {
         "register": "Vn.16B"
+      },
+      "t.val[1]": {
+        "register": "Vn+1.16B"
+      },
+      "t.val[2]": {
+        "register": "Vn+2.16B"
+      },
+      "t.val[3]": {
+        "register": "Vn+3.16B"
       }
     },
     "Architectures": [
@@ -76673,8 +274735,17 @@
       "idx": {
         "register": "Vm.8B"
       },
-      "t": {
+      "t.val[0]": {
         "register": "Vn.16B"
+      },
+      "t.val[1]": {
+        "register": "Vn+1.16B"
+      },
+      "t.val[2]": {
+        "register": "Vn+2.16B"
+      },
+      "t.val[3]": {
+        "register": "Vn+3.16B"
       }
     },
     "Architectures": [
@@ -76704,8 +274775,17 @@
       "idx": {
         "register": "Vm.8B"
       },
-      "t": {
+      "t.val[0]": {
         "register": "Vn.16B"
+      },
+      "t.val[1]": {
+        "register": "Vn+1.16B"
+      },
+      "t.val[2]": {
+        "register": "Vn+2.16B"
+      },
+      "t.val[3]": {
+        "register": "Vn+3.16B"
       }
     },
     "Architectures": [
@@ -76735,8 +274815,17 @@
       "idx": {
         "register": "Vm.16B"
       },
-      "t": {
+      "t.val[0]": {
         "register": "Vn.16B"
+      },
+      "t.val[1]": {
+        "register": "Vn+1.16B"
+      },
+      "t.val[2]": {
+        "register": "Vn+2.16B"
+      },
+      "t.val[3]": {
+        "register": "Vn+3.16B"
       }
     },
     "Architectures": [
@@ -76766,8 +274855,17 @@
       "idx": {
         "register": "Vm.16B"
       },
-      "t": {
+      "t.val[0]": {
         "register": "Vn.16B"
+      },
+      "t.val[1]": {
+        "register": "Vn+1.16B"
+      },
+      "t.val[2]": {
+        "register": "Vn+2.16B"
+      },
+      "t.val[3]": {
+        "register": "Vn+3.16B"
       }
     },
     "Architectures": [
@@ -76797,8 +274895,17 @@
       "idx": {
         "register": "Vm.16B"
       },
-      "t": {
+      "t.val[0]": {
         "register": "Vn.16B"
+      },
+      "t.val[1]": {
+        "register": "Vn+1.16B"
+      },
+      "t.val[2]": {
+        "register": "Vn+2.16B"
+      },
+      "t.val[3]": {
+        "register": "Vn+3.16B"
       }
     },
     "Architectures": [
@@ -77184,7 +275291,9 @@
       "a": {
         "register": "Vn.2D"
       },
-      "b": {}
+      "b": {
+        "register": "Vm.2D"
+      }
     },
     "Architectures": [
       "A64"
@@ -93148,6 +291257,141 @@
       ]
     ]
   },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vscale_f16",
+    "arguments": [
+      "float16x4_t vn",
+      "int16x4_t vm"
+    ],
+    "return_type": {
+      "value": "float16x4_t"
+    },
+    "Arguments_Preparation": {
+      "vm": {
+        "register": "Vm.4H"
+      },
+      "vn": {
+        "register": "Vn.4H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FSCALE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vscale_f32",
+    "arguments": [
+      "float32x2_t vn",
+      "int32x2_t vm"
+    ],
+    "return_type": {
+      "value": "float32x2_t"
+    },
+    "Arguments_Preparation": {
+      "vm": {
+        "register": "Vm.2S"
+      },
+      "vn": {
+        "register": "Vn.2S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FSCALE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vscaleq_f16",
+    "arguments": [
+      "float16x8_t vn",
+      "int16x8_t vm"
+    ],
+    "return_type": {
+      "value": "float16x8_t"
+    },
+    "Arguments_Preparation": {
+      "vm": {
+        "register": "Vm.8H"
+      },
+      "vn": {
+        "register": "Vn.8H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FSCALE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vscaleq_f32",
+    "arguments": [
+      "float32x4_t vn",
+      "int32x4_t vm"
+    ],
+    "return_type": {
+      "value": "float32x4_t"
+    },
+    "Arguments_Preparation": {
+      "vm": {
+        "register": "Vm.4S"
+      },
+      "vn": {
+        "register": "Vn.4S"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FSCALE"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vscaleq_f64",
+    "arguments": [
+      "float64x2_t vn",
+      "int64x2_t vm"
+    ],
+    "return_type": {
+      "value": "float64x2_t"
+    },
+    "Arguments_Preparation": {
+      "vm": {
+        "register": "Vm.2D"
+      },
+      "vn": {
+        "register": "Vn.2D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "FSCALE"
+      ]
+    ]
+  },
   {
     "SIMD_ISA": "Neon",
     "name": "vset_lane_f16",
@@ -94410,7 +292654,9 @@
       "value": "uint64x2_t"
     },
     "Arguments_Preparation": {
-      "hash_ab": {},
+      "hash_ab": {
+        "register": "Vm.2D"
+      },
       "hash_c_": {
         "register": "Qn"
       },
@@ -94445,7 +292691,9 @@
       "hash_gf": {
         "register": "Qn"
       },
-      "kwh_kwh2": {}
+      "kwh_kwh2": {
+        "register": "Vm.2D"
+      }
     },
     "Architectures": [
       "A64"
@@ -94501,7 +292749,9 @@
       "w14_15": {
         "register": "Vn.2D"
       },
-      "w9_10": {}
+      "w9_10": {
+        "register": "Vm.2D"
+      }
     },
     "Architectures": [
       "A64"
@@ -97647,8 +295897,12 @@
       "a": {
         "register": "Vd.4S"
       },
-      "b": {},
-      "c": {}
+      "b": {
+        "register": "Vn.4S"
+      },
+      "c": {
+        "register": "Vm.4S"
+      }
     },
     "Architectures": [
       "A64"
@@ -97674,8 +295928,12 @@
       "a": {
         "register": "Vd.4S"
       },
-      "b": {},
-      "c": {}
+      "b": {
+        "register": "Vn.4S"
+      },
+      "c": {
+        "register": "Vm.4S"
+      }
     },
     "Architectures": [
       "A64"
@@ -97701,8 +295959,12 @@
       "a": {
         "register": "Vn.4S"
       },
-      "b": {},
-      "c": {}
+      "b": {
+        "register": "Vm.4S"
+      },
+      "c": {
+        "register": "Va.4S"
+      }
     },
     "Architectures": [
       "A64"
@@ -97729,8 +295991,12 @@
       "a": {
         "register": "Vd.4S"
       },
-      "b": {},
-      "c": {},
+      "b": {
+        "register": "Vn.4S"
+      },
+      "c": {
+        "register": "Vm.4S"
+      },
       "imm2": {
         "minimum": 0,
         "maximum": 3
@@ -97761,8 +296027,12 @@
       "a": {
         "register": "Vd.4S"
       },
-      "b": {},
-      "c": {},
+      "b": {
+        "register": "Vn.4S"
+      },
+      "c": {
+        "register": "Vm.4S"
+      },
       "imm2": {
         "minimum": 0,
         "maximum": 3
@@ -97793,8 +296063,12 @@
       "a": {
         "register": "Vd.4S"
       },
-      "b": {},
-      "c": {},
+      "b": {
+        "register": "Vn.4S"
+      },
+      "c": {
+        "register": "Vm.4S"
+      },
       "imm2": {
         "minimum": 0,
         "maximum": 3
@@ -97825,8 +296099,12 @@
       "a": {
         "register": "Vd.4S"
       },
-      "b": {},
-      "c": {},
+      "b": {
+        "register": "Vn.4S"
+      },
+      "c": {
+        "register": "Vm.4S"
+      },
       "imm2": {
         "minimum": 0,
         "maximum": 3
@@ -97855,7 +296133,9 @@
       "a": {
         "register": "Vn.4S"
       },
-      "b": {}
+      "b": {
+        "register": "Vm.4S"
+      }
     },
     "Architectures": [
       "A64"
@@ -97880,7 +296160,9 @@
       "a": {
         "register": "Vd.4S"
       },
-      "b": {}
+      "b": {
+        "register": "Vn.4S"
+      }
     },
     "Architectures": [
       "A64"
@@ -99838,7 +298120,10 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.4H"
+      },
+      "val.val[1]": {
         "register": "Vt2.4H"
       }
     },
@@ -99867,7 +298152,13 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.4H"
+      },
+      "val.val[1]": {
+        "register": "Vt2.4H"
+      },
+      "val.val[2]": {
         "register": "Vt3.4H"
       }
     },
@@ -99896,7 +298187,16 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.4H"
+      },
+      "val.val[1]": {
+        "register": "Vt2.4H"
+      },
+      "val.val[2]": {
+        "register": "Vt3.4H"
+      },
+      "val.val[3]": {
         "register": "Vt4.4H"
       }
     },
@@ -99954,7 +298254,10 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.2S"
+      },
+      "val.val[1]": {
         "register": "Vt2.2S"
       }
     },
@@ -99983,7 +298286,13 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.2S"
+      },
+      "val.val[1]": {
+        "register": "Vt2.2S"
+      },
+      "val.val[2]": {
         "register": "Vt3.2S"
       }
     },
@@ -100012,7 +298321,16 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.2S"
+      },
+      "val.val[1]": {
+        "register": "Vt2.2S"
+      },
+      "val.val[2]": {
+        "register": "Vt3.2S"
+      },
+      "val.val[3]": {
         "register": "Vt4.2S"
       }
     },
@@ -100068,7 +298386,10 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.1D"
+      },
+      "val.val[1]": {
         "register": "Vt2.1D"
       }
     },
@@ -100095,7 +298416,13 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.1D"
+      },
+      "val.val[1]": {
+        "register": "Vt2.1D"
+      },
+      "val.val[2]": {
         "register": "Vt3.1D"
       }
     },
@@ -100122,7 +298449,16 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.1D"
+      },
+      "val.val[1]": {
+        "register": "Vt2.1D"
+      },
+      "val.val[2]": {
+        "register": "Vt3.1D"
+      },
+      "val.val[3]": {
         "register": "Vt4.1D"
       }
     },
@@ -100608,6 +298944,42 @@
       ]
     ]
   },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vst1_mf8_x4",
+    "arguments": [
+      "int8_t * ptr",
+      "int8x8x4_t val"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "ptr": {
+        "register": "Xn"
+      },
+      "val.val[0]": {
+        "register": "Vt.8B"
+      },
+      "val.val[1]": {
+        "register": "Vt2.8B"
+      },
+      "val.val[2]": {
+        "register": "Vt3.8B"
+      },
+      "val.val[3]": {
+        "register": "Vt4.8B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1"
+      ]
+    ]
+  },
   {
     "SIMD_ISA": "Neon",
     "name": "vst1_p16",
@@ -100651,7 +299023,10 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.4H"
+      },
+      "val.val[1]": {
         "register": "Vt2.4H"
       }
     },
@@ -100680,7 +299055,13 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.4H"
+      },
+      "val.val[1]": {
+        "register": "Vt2.4H"
+      },
+      "val.val[2]": {
         "register": "Vt3.4H"
       }
     },
@@ -100709,7 +299090,16 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.4H"
+      },
+      "val.val[1]": {
+        "register": "Vt2.4H"
+      },
+      "val.val[2]": {
+        "register": "Vt3.4H"
+      },
+      "val.val[3]": {
         "register": "Vt4.4H"
       }
     },
@@ -100766,7 +299156,10 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.1D"
+      },
+      "val.val[1]": {
         "register": "Vt2.1D"
       }
     },
@@ -100794,7 +299187,13 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.1D"
+      },
+      "val.val[1]": {
+        "register": "Vt2.1D"
+      },
+      "val.val[2]": {
         "register": "Vt3.1D"
       }
     },
@@ -100822,7 +299221,16 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.1D"
+      },
+      "val.val[1]": {
+        "register": "Vt2.1D"
+      },
+      "val.val[2]": {
+        "register": "Vt3.1D"
+      },
+      "val.val[3]": {
         "register": "Vt4.1D"
       }
     },
@@ -100879,7 +299287,10 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.8B"
+      },
+      "val.val[1]": {
         "register": "Vt2.8B"
       }
     },
@@ -100908,7 +299319,13 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.8B"
+      },
+      "val.val[1]": {
+        "register": "Vt2.8B"
+      },
+      "val.val[2]": {
         "register": "Vt3.8B"
       }
     },
@@ -100937,7 +299354,16 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.8B"
+      },
+      "val.val[1]": {
+        "register": "Vt2.8B"
+      },
+      "val.val[2]": {
+        "register": "Vt3.8B"
+      },
+      "val.val[3]": {
         "register": "Vt4.8B"
       }
     },
@@ -100995,7 +299421,10 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.4H"
+      },
+      "val.val[1]": {
         "register": "Vt2.4H"
       }
     },
@@ -101024,7 +299453,13 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.4H"
+      },
+      "val.val[1]": {
+        "register": "Vt2.4H"
+      },
+      "val.val[2]": {
         "register": "Vt3.4H"
       }
     },
@@ -101053,7 +299488,16 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.4H"
+      },
+      "val.val[1]": {
+        "register": "Vt2.4H"
+      },
+      "val.val[2]": {
+        "register": "Vt3.4H"
+      },
+      "val.val[3]": {
         "register": "Vt4.4H"
       }
     },
@@ -101111,7 +299555,10 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.2S"
+      },
+      "val.val[1]": {
         "register": "Vt2.2S"
       }
     },
@@ -101140,7 +299587,13 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.2S"
+      },
+      "val.val[1]": {
+        "register": "Vt2.2S"
+      },
+      "val.val[2]": {
         "register": "Vt3.2S"
       }
     },
@@ -101169,7 +299622,16 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.2S"
+      },
+      "val.val[1]": {
+        "register": "Vt2.2S"
+      },
+      "val.val[2]": {
+        "register": "Vt3.2S"
+      },
+      "val.val[3]": {
         "register": "Vt4.2S"
       }
     },
@@ -101227,7 +299689,10 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.1D"
+      },
+      "val.val[1]": {
         "register": "Vt2.1D"
       }
     },
@@ -101256,7 +299721,13 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.1D"
+      },
+      "val.val[1]": {
+        "register": "Vt2.1D"
+      },
+      "val.val[2]": {
         "register": "Vt3.1D"
       }
     },
@@ -101285,7 +299756,16 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.1D"
+      },
+      "val.val[1]": {
+        "register": "Vt2.1D"
+      },
+      "val.val[2]": {
+        "register": "Vt3.1D"
+      },
+      "val.val[3]": {
         "register": "Vt4.1D"
       }
     },
@@ -101343,7 +299823,10 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.8B"
+      },
+      "val.val[1]": {
         "register": "Vt2.8B"
       }
     },
@@ -101372,7 +299855,13 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.8B"
+      },
+      "val.val[1]": {
+        "register": "Vt2.8B"
+      },
+      "val.val[2]": {
         "register": "Vt3.8B"
       }
     },
@@ -101401,7 +299890,16 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.8B"
+      },
+      "val.val[1]": {
+        "register": "Vt2.8B"
+      },
+      "val.val[2]": {
+        "register": "Vt3.8B"
+      },
+      "val.val[3]": {
         "register": "Vt4.8B"
       }
     },
@@ -101459,7 +299957,10 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.4H"
+      },
+      "val.val[1]": {
         "register": "Vt2.4H"
       }
     },
@@ -101488,7 +299989,13 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.4H"
+      },
+      "val.val[1]": {
+        "register": "Vt2.4H"
+      },
+      "val.val[2]": {
         "register": "Vt3.4H"
       }
     },
@@ -101517,7 +300024,16 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.4H"
+      },
+      "val.val[1]": {
+        "register": "Vt2.4H"
+      },
+      "val.val[2]": {
+        "register": "Vt3.4H"
+      },
+      "val.val[3]": {
         "register": "Vt4.4H"
       }
     },
@@ -101575,7 +300091,10 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.2S"
+      },
+      "val.val[1]": {
         "register": "Vt2.2S"
       }
     },
@@ -101604,7 +300123,13 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.2S"
+      },
+      "val.val[1]": {
+        "register": "Vt2.2S"
+      },
+      "val.val[2]": {
         "register": "Vt3.2S"
       }
     },
@@ -101633,7 +300158,16 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.2S"
+      },
+      "val.val[1]": {
+        "register": "Vt2.2S"
+      },
+      "val.val[2]": {
+        "register": "Vt3.2S"
+      },
+      "val.val[3]": {
         "register": "Vt4.2S"
       }
     },
@@ -101691,7 +300225,10 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.1D"
+      },
+      "val.val[1]": {
         "register": "Vt2.1D"
       }
     },
@@ -101720,7 +300257,13 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.1D"
+      },
+      "val.val[1]": {
+        "register": "Vt2.1D"
+      },
+      "val.val[2]": {
         "register": "Vt3.1D"
       }
     },
@@ -101749,7 +300292,16 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.1D"
+      },
+      "val.val[1]": {
+        "register": "Vt2.1D"
+      },
+      "val.val[2]": {
+        "register": "Vt3.1D"
+      },
+      "val.val[3]": {
         "register": "Vt4.1D"
       }
     },
@@ -101807,7 +300359,10 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.8B"
+      },
+      "val.val[1]": {
         "register": "Vt2.8B"
       }
     },
@@ -101836,7 +300391,13 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.8B"
+      },
+      "val.val[1]": {
+        "register": "Vt2.8B"
+      },
+      "val.val[2]": {
         "register": "Vt3.8B"
       }
     },
@@ -101865,7 +300426,16 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.8B"
+      },
+      "val.val[1]": {
+        "register": "Vt2.8B"
+      },
+      "val.val[2]": {
+        "register": "Vt3.8B"
+      },
+      "val.val[3]": {
         "register": "Vt4.8B"
       }
     },
@@ -101923,7 +300493,10 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.8H"
+      },
+      "val.val[1]": {
         "register": "Vt2.8H"
       }
     },
@@ -101952,7 +300525,13 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.8H"
+      },
+      "val.val[1]": {
+        "register": "Vt2.8H"
+      },
+      "val.val[2]": {
         "register": "Vt3.8H"
       }
     },
@@ -101981,7 +300560,16 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.8H"
+      },
+      "val.val[1]": {
+        "register": "Vt2.8H"
+      },
+      "val.val[2]": {
+        "register": "Vt3.8H"
+      },
+      "val.val[3]": {
         "register": "Vt4.8H"
       }
     },
@@ -102039,7 +300627,10 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.4S"
+      },
+      "val.val[1]": {
         "register": "Vt2.4S"
       }
     },
@@ -102068,7 +300659,13 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.4S"
+      },
+      "val.val[1]": {
+        "register": "Vt2.4S"
+      },
+      "val.val[2]": {
         "register": "Vt3.4S"
       }
     },
@@ -102097,7 +300694,16 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.4S"
+      },
+      "val.val[1]": {
+        "register": "Vt2.4S"
+      },
+      "val.val[2]": {
+        "register": "Vt3.4S"
+      },
+      "val.val[3]": {
         "register": "Vt4.4S"
       }
     },
@@ -102153,7 +300759,10 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.2D"
+      },
+      "val.val[1]": {
         "register": "Vt2.2D"
       }
     },
@@ -102180,7 +300789,13 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.2D"
+      },
+      "val.val[1]": {
+        "register": "Vt2.2D"
+      },
+      "val.val[2]": {
         "register": "Vt3.2D"
       }
     },
@@ -102207,7 +300822,16 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.2D"
+      },
+      "val.val[1]": {
+        "register": "Vt2.2D"
+      },
+      "val.val[2]": {
+        "register": "Vt3.2D"
+      },
+      "val.val[3]": {
         "register": "Vt4.2D"
       }
     },
@@ -102693,6 +301317,42 @@
       ]
     ]
   },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vst1q_mf8_x4",
+    "arguments": [
+      "int8_t * ptr",
+      "int8x16x4_t val"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "ptr": {
+        "register": "Xn"
+      },
+      "val.val[0]": {
+        "register": "Vt.16B"
+      },
+      "val.val[1]": {
+        "register": "Vt2.16B"
+      },
+      "val.val[2]": {
+        "register": "Vt3.16B"
+      },
+      "val.val[3]": {
+        "register": "Vt4.16B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "ST1"
+      ]
+    ]
+  },
   {
     "SIMD_ISA": "Neon",
     "name": "vst1q_p16",
@@ -102736,7 +301396,10 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.8H"
+      },
+      "val.val[1]": {
         "register": "Vt2.8H"
       }
     },
@@ -102765,7 +301428,13 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.8H"
+      },
+      "val.val[1]": {
+        "register": "Vt2.8H"
+      },
+      "val.val[2]": {
         "register": "Vt3.8H"
       }
     },
@@ -102794,7 +301463,16 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.8H"
+      },
+      "val.val[1]": {
+        "register": "Vt2.8H"
+      },
+      "val.val[2]": {
+        "register": "Vt3.8H"
+      },
+      "val.val[3]": {
         "register": "Vt4.8H"
       }
     },
@@ -102851,7 +301529,10 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.2D"
+      },
+      "val.val[1]": {
         "register": "Vt2.2D"
       }
     },
@@ -102879,7 +301560,13 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.2D"
+      },
+      "val.val[1]": {
+        "register": "Vt2.2D"
+      },
+      "val.val[2]": {
         "register": "Vt3.2D"
       }
     },
@@ -102908,7 +301595,16 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.2D"
+      },
+      "val.val[1]": {
+        "register": "Vt2.2D"
+      },
+      "val.val[2]": {
+        "register": "Vt3.2D"
+      },
+      "val.val[3]": {
         "register": "Vt4.2D"
       }
     },
@@ -102965,7 +301661,10 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.16B"
+      },
+      "val.val[1]": {
         "register": "Vt2.16B"
       }
     },
@@ -102994,7 +301693,13 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.16B"
+      },
+      "val.val[1]": {
+        "register": "Vt2.16B"
+      },
+      "val.val[2]": {
         "register": "Vt3.16B"
       }
     },
@@ -103023,7 +301728,16 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.16B"
+      },
+      "val.val[1]": {
+        "register": "Vt2.16B"
+      },
+      "val.val[2]": {
+        "register": "Vt3.16B"
+      },
+      "val.val[3]": {
         "register": "Vt4.16B"
       }
     },
@@ -103081,7 +301795,10 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.8H"
+      },
+      "val.val[1]": {
         "register": "Vt2.8H"
       }
     },
@@ -103110,7 +301827,13 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.8H"
+      },
+      "val.val[1]": {
+        "register": "Vt2.8H"
+      },
+      "val.val[2]": {
         "register": "Vt3.8H"
       }
     },
@@ -103139,7 +301862,16 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.8H"
+      },
+      "val.val[1]": {
+        "register": "Vt2.8H"
+      },
+      "val.val[2]": {
+        "register": "Vt3.8H"
+      },
+      "val.val[3]": {
         "register": "Vt4.8H"
       }
     },
@@ -103197,7 +301929,10 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.4S"
+      },
+      "val.val[1]": {
         "register": "Vt2.4S"
       }
     },
@@ -103226,7 +301961,13 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.4S"
+      },
+      "val.val[1]": {
+        "register": "Vt2.4S"
+      },
+      "val.val[2]": {
         "register": "Vt3.4S"
       }
     },
@@ -103255,7 +301996,16 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.4S"
+      },
+      "val.val[1]": {
+        "register": "Vt2.4S"
+      },
+      "val.val[2]": {
+        "register": "Vt3.4S"
+      },
+      "val.val[3]": {
         "register": "Vt4.4S"
       }
     },
@@ -103313,7 +302063,10 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.2D"
+      },
+      "val.val[1]": {
         "register": "Vt2.2D"
       }
     },
@@ -103342,7 +302095,13 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.2D"
+      },
+      "val.val[1]": {
+        "register": "Vt2.2D"
+      },
+      "val.val[2]": {
         "register": "Vt3.2D"
       }
     },
@@ -103371,7 +302130,16 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.2D"
+      },
+      "val.val[1]": {
+        "register": "Vt2.2D"
+      },
+      "val.val[2]": {
+        "register": "Vt3.2D"
+      },
+      "val.val[3]": {
         "register": "Vt4.2D"
       }
     },
@@ -103429,7 +302197,10 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.16B"
+      },
+      "val.val[1]": {
         "register": "Vt2.16B"
       }
     },
@@ -103458,7 +302229,13 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.16B"
+      },
+      "val.val[1]": {
+        "register": "Vt2.16B"
+      },
+      "val.val[2]": {
         "register": "Vt3.16B"
       }
     },
@@ -103487,7 +302264,16 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.16B"
+      },
+      "val.val[1]": {
+        "register": "Vt2.16B"
+      },
+      "val.val[2]": {
+        "register": "Vt3.16B"
+      },
+      "val.val[3]": {
         "register": "Vt4.16B"
       }
     },
@@ -103545,7 +302331,10 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.8H"
+      },
+      "val.val[1]": {
         "register": "Vt2.8H"
       }
     },
@@ -103574,7 +302363,13 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.8H"
+      },
+      "val.val[1]": {
+        "register": "Vt2.8H"
+      },
+      "val.val[2]": {
         "register": "Vt3.8H"
       }
     },
@@ -103603,7 +302398,16 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.8H"
+      },
+      "val.val[1]": {
+        "register": "Vt2.8H"
+      },
+      "val.val[2]": {
+        "register": "Vt3.8H"
+      },
+      "val.val[3]": {
         "register": "Vt4.8H"
       }
     },
@@ -103661,7 +302465,10 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.4S"
+      },
+      "val.val[1]": {
         "register": "Vt2.4S"
       }
     },
@@ -103690,7 +302497,13 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.4S"
+      },
+      "val.val[1]": {
+        "register": "Vt2.4S"
+      },
+      "val.val[2]": {
         "register": "Vt3.4S"
       }
     },
@@ -103719,7 +302532,16 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.4S"
+      },
+      "val.val[1]": {
+        "register": "Vt2.4S"
+      },
+      "val.val[2]": {
+        "register": "Vt3.4S"
+      },
+      "val.val[3]": {
         "register": "Vt4.4S"
       }
     },
@@ -103777,7 +302599,10 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.2D"
+      },
+      "val.val[1]": {
         "register": "Vt2.2D"
       }
     },
@@ -103806,7 +302631,13 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.2D"
+      },
+      "val.val[1]": {
+        "register": "Vt2.2D"
+      },
+      "val.val[2]": {
         "register": "Vt3.2D"
       }
     },
@@ -103835,7 +302666,16 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.2D"
+      },
+      "val.val[1]": {
+        "register": "Vt2.2D"
+      },
+      "val.val[2]": {
+        "register": "Vt3.2D"
+      },
+      "val.val[3]": {
         "register": "Vt4.2D"
       }
     },
@@ -103893,7 +302733,10 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.16B"
+      },
+      "val.val[1]": {
         "register": "Vt2.16B"
       }
     },
@@ -103922,7 +302765,13 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.16B"
+      },
+      "val.val[1]": {
+        "register": "Vt2.16B"
+      },
+      "val.val[2]": {
         "register": "Vt3.16B"
       }
     },
@@ -103951,7 +302800,16 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.16B"
+      },
+      "val.val[1]": {
+        "register": "Vt2.16B"
+      },
+      "val.val[2]": {
+        "register": "Vt3.16B"
+      },
+      "val.val[3]": {
         "register": "Vt4.16B"
       }
     },
@@ -103980,7 +302838,10 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.4H"
+      },
+      "val.val[1]": {
         "register": "Vt2.4H"
       }
     },
@@ -104009,7 +302870,10 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.2S"
+      },
+      "val.val[1]": {
         "register": "Vt2.2S"
       }
     },
@@ -104038,7 +302902,10 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.1D"
+      },
+      "val.val[1]": {
         "register": "Vt2.1D"
       }
     },
@@ -104070,7 +302937,10 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.4H"
+      },
+      "val.val[1]": {
         "register": "Vt2.4H"
       }
     },
@@ -104104,7 +302974,10 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.2S"
+      },
+      "val.val[1]": {
         "register": "Vt2.2S"
       }
     },
@@ -104138,7 +303011,10 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.1D"
+      },
+      "val.val[1]": {
         "register": "Vt2.1D"
       }
     },
@@ -104170,7 +303046,10 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.4H"
+      },
+      "val.val[1]": {
         "register": "Vt2.4H"
       }
     },
@@ -104204,7 +303083,10 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.1D"
+      },
+      "val.val[1]": {
         "register": "Vt2.1D"
       }
     },
@@ -104236,7 +303118,10 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.8B"
+      },
+      "val.val[1]": {
         "register": "Vt2.8B"
       }
     },
@@ -104270,7 +303155,10 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.4H"
+      },
+      "val.val[1]": {
         "register": "Vt2.4H"
       }
     },
@@ -104304,7 +303192,10 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.2S"
+      },
+      "val.val[1]": {
         "register": "Vt2.2S"
       }
     },
@@ -104338,7 +303229,10 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.1D"
+      },
+      "val.val[1]": {
         "register": "Vt2.1D"
       }
     },
@@ -104370,7 +303264,10 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.8B"
+      },
+      "val.val[1]": {
         "register": "Vt2.8B"
       }
     },
@@ -104404,7 +303301,10 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.4H"
+      },
+      "val.val[1]": {
         "register": "Vt2.4H"
       }
     },
@@ -104438,7 +303338,10 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.2S"
+      },
+      "val.val[1]": {
         "register": "Vt2.2S"
       }
     },
@@ -104472,7 +303375,10 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.1D"
+      },
+      "val.val[1]": {
         "register": "Vt2.1D"
       }
     },
@@ -104504,7 +303410,10 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.8B"
+      },
+      "val.val[1]": {
         "register": "Vt2.8B"
       }
     },
@@ -104533,7 +303442,10 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.4H"
+      },
+      "val.val[1]": {
         "register": "Vt2.4H"
       }
     },
@@ -104562,7 +303474,10 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.1D"
+      },
+      "val.val[1]": {
         "register": "Vt2.1D"
       }
     },
@@ -104590,7 +303505,10 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.8B"
+      },
+      "val.val[1]": {
         "register": "Vt2.8B"
       }
     },
@@ -104619,7 +303537,10 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.4H"
+      },
+      "val.val[1]": {
         "register": "Vt2.4H"
       }
     },
@@ -104648,7 +303569,10 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.2S"
+      },
+      "val.val[1]": {
         "register": "Vt2.2S"
       }
     },
@@ -104677,7 +303601,10 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.1D"
+      },
+      "val.val[1]": {
         "register": "Vt2.1D"
       }
     },
@@ -104706,7 +303633,10 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.8B"
+      },
+      "val.val[1]": {
         "register": "Vt2.8B"
       }
     },
@@ -104735,7 +303665,10 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.4H"
+      },
+      "val.val[1]": {
         "register": "Vt2.4H"
       }
     },
@@ -104764,7 +303697,10 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.2S"
+      },
+      "val.val[1]": {
         "register": "Vt2.2S"
       }
     },
@@ -104793,7 +303729,10 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.1D"
+      },
+      "val.val[1]": {
         "register": "Vt2.1D"
       }
     },
@@ -104822,7 +303761,10 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.8B"
+      },
+      "val.val[1]": {
         "register": "Vt2.8B"
       }
     },
@@ -104851,7 +303793,10 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.8H"
+      },
+      "val.val[1]": {
         "register": "Vt2.8H"
       }
     },
@@ -104880,7 +303825,10 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.4S"
+      },
+      "val.val[1]": {
         "register": "Vt2.4S"
       }
     },
@@ -104909,7 +303857,10 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.2D"
+      },
+      "val.val[1]": {
         "register": "Vt2.2D"
       }
     },
@@ -104941,7 +303892,10 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.8H"
+      },
+      "val.val[1]": {
         "register": "Vt2.8H"
       }
     },
@@ -104975,7 +303929,10 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.4S"
+      },
+      "val.val[1]": {
         "register": "Vt2.4S"
       }
     },
@@ -105004,12 +303961,15 @@
     "Arguments_Preparation": {
       "lane": {
         "minimum": 0,
-        "maximum": 2
+        "maximum": 1
       },
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.2D"
+      },
+      "val.val[1]": {
         "register": "Vt2.2D"
       }
     },
@@ -105041,7 +304001,10 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.8H"
+      },
+      "val.val[1]": {
         "register": "Vt2.8H"
       }
     },
@@ -105075,7 +304038,10 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.2D"
+      },
+      "val.val[1]": {
         "register": "Vt2.2D"
       }
     },
@@ -105107,7 +304073,10 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.16B"
+      },
+      "val.val[1]": {
         "register": "Vt2.16B"
       }
     },
@@ -105139,7 +304108,10 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.8H"
+      },
+      "val.val[1]": {
         "register": "Vt2.8H"
       }
     },
@@ -105173,7 +304145,10 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.4S"
+      },
+      "val.val[1]": {
         "register": "Vt2.4S"
       }
     },
@@ -105207,7 +304182,10 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.2D"
+      },
+      "val.val[1]": {
         "register": "Vt2.2D"
       }
     },
@@ -105239,7 +304217,10 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.16B"
+      },
+      "val.val[1]": {
         "register": "Vt2.16B"
       }
     },
@@ -105271,7 +304252,10 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.8H"
+      },
+      "val.val[1]": {
         "register": "Vt2.8H"
       }
     },
@@ -105305,7 +304289,10 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.4S"
+      },
+      "val.val[1]": {
         "register": "Vt2.4S"
       }
     },
@@ -105339,7 +304326,10 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.2D"
+      },
+      "val.val[1]": {
         "register": "Vt2.2D"
       }
     },
@@ -105371,7 +304361,10 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.16B"
+      },
+      "val.val[1]": {
         "register": "Vt2.16B"
       }
     },
@@ -105398,7 +304391,10 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.8H"
+      },
+      "val.val[1]": {
         "register": "Vt2.8H"
       }
     },
@@ -105427,7 +304423,10 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.2D"
+      },
+      "val.val[1]": {
         "register": "Vt2.2D"
       }
     },
@@ -105454,7 +304453,10 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.16B"
+      },
+      "val.val[1]": {
         "register": "Vt2.16B"
       }
     },
@@ -105483,7 +304485,10 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.8H"
+      },
+      "val.val[1]": {
         "register": "Vt2.8H"
       }
     },
@@ -105512,7 +304517,10 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.4S"
+      },
+      "val.val[1]": {
         "register": "Vt2.4S"
       }
     },
@@ -105541,7 +304549,10 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.2D"
+      },
+      "val.val[1]": {
         "register": "Vt2.2D"
       }
     },
@@ -105568,7 +304579,10 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.16B"
+      },
+      "val.val[1]": {
         "register": "Vt2.16B"
       }
     },
@@ -105597,7 +304611,10 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.8H"
+      },
+      "val.val[1]": {
         "register": "Vt2.8H"
       }
     },
@@ -105626,7 +304643,10 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.4S"
+      },
+      "val.val[1]": {
         "register": "Vt2.4S"
       }
     },
@@ -105655,7 +304675,10 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.2D"
+      },
+      "val.val[1]": {
         "register": "Vt2.2D"
       }
     },
@@ -105682,7 +304705,10 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.16B"
+      },
+      "val.val[1]": {
         "register": "Vt2.16B"
       }
     },
@@ -105711,7 +304737,13 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.4H"
+      },
+      "val.val[1]": {
+        "register": "Vt2.4H"
+      },
+      "val.val[2]": {
         "register": "Vt3.4H"
       }
     },
@@ -105740,7 +304772,13 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.2S"
+      },
+      "val.val[1]": {
+        "register": "Vt2.2S"
+      },
+      "val.val[2]": {
         "register": "Vt3.2S"
       }
     },
@@ -105769,7 +304807,13 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.1D"
+      },
+      "val.val[1]": {
+        "register": "Vt2.1D"
+      },
+      "val.val[2]": {
         "register": "Vt3.1D"
       }
     },
@@ -105801,7 +304845,13 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.4H"
+      },
+      "val.val[1]": {
+        "register": "Vt2.4H"
+      },
+      "val.val[2]": {
         "register": "Vt3.4H"
       }
     },
@@ -105835,7 +304885,13 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.2S"
+      },
+      "val.val[1]": {
+        "register": "Vt2.2S"
+      },
+      "val.val[2]": {
         "register": "Vt3.2S"
       }
     },
@@ -105869,7 +304925,13 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.1D"
+      },
+      "val.val[1]": {
+        "register": "Vt2.1D"
+      },
+      "val.val[2]": {
         "register": "Vt3.1D"
       }
     },
@@ -105901,7 +304963,13 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.4H"
+      },
+      "val.val[1]": {
+        "register": "Vt2.4H"
+      },
+      "val.val[2]": {
         "register": "Vt3.4H"
       }
     },
@@ -105935,7 +305003,13 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.1D"
+      },
+      "val.val[1]": {
+        "register": "Vt2.1D"
+      },
+      "val.val[2]": {
         "register": "Vt3.1D"
       }
     },
@@ -105967,7 +305041,13 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.8B"
+      },
+      "val.val[1]": {
+        "register": "Vt2.8B"
+      },
+      "val.val[2]": {
         "register": "Vt3.8B"
       }
     },
@@ -106001,7 +305081,13 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.4H"
+      },
+      "val.val[1]": {
+        "register": "Vt2.4H"
+      },
+      "val.val[2]": {
         "register": "Vt3.4H"
       }
     },
@@ -106035,7 +305121,13 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.2S"
+      },
+      "val.val[1]": {
+        "register": "Vt2.2S"
+      },
+      "val.val[2]": {
         "register": "Vt3.2S"
       }
     },
@@ -106069,7 +305161,13 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.1D"
+      },
+      "val.val[1]": {
+        "register": "Vt2.1D"
+      },
+      "val.val[2]": {
         "register": "Vt3.1D"
       }
     },
@@ -106101,7 +305199,13 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.8B"
+      },
+      "val.val[1]": {
+        "register": "Vt2.8B"
+      },
+      "val.val[2]": {
         "register": "Vt3.8B"
       }
     },
@@ -106135,7 +305239,13 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.4H"
+      },
+      "val.val[1]": {
+        "register": "Vt2.4H"
+      },
+      "val.val[2]": {
         "register": "Vt3.4H"
       }
     },
@@ -106169,7 +305279,13 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.2S"
+      },
+      "val.val[1]": {
+        "register": "Vt2.2S"
+      },
+      "val.val[2]": {
         "register": "Vt3.2S"
       }
     },
@@ -106203,7 +305319,13 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.1D"
+      },
+      "val.val[1]": {
+        "register": "Vt2.1D"
+      },
+      "val.val[2]": {
         "register": "Vt3.1D"
       }
     },
@@ -106235,7 +305357,13 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.8B"
+      },
+      "val.val[1]": {
+        "register": "Vt2.8B"
+      },
+      "val.val[2]": {
         "register": "Vt3.8B"
       }
     },
@@ -106264,7 +305392,13 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.4H"
+      },
+      "val.val[1]": {
+        "register": "Vt2.4H"
+      },
+      "val.val[2]": {
         "register": "Vt3.4H"
       }
     },
@@ -106293,7 +305427,13 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.1D"
+      },
+      "val.val[1]": {
+        "register": "Vt2.1D"
+      },
+      "val.val[2]": {
         "register": "Vt3.1D"
       }
     },
@@ -106321,7 +305461,13 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.8B"
+      },
+      "val.val[1]": {
+        "register": "Vt2.8B"
+      },
+      "val.val[2]": {
         "register": "Vt3.8B"
       }
     },
@@ -106350,7 +305496,13 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.4H"
+      },
+      "val.val[1]": {
+        "register": "Vt2.4H"
+      },
+      "val.val[2]": {
         "register": "Vt3.4H"
       }
     },
@@ -106379,7 +305531,13 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.2S"
+      },
+      "val.val[1]": {
+        "register": "Vt2.2S"
+      },
+      "val.val[2]": {
         "register": "Vt3.2S"
       }
     },
@@ -106408,7 +305566,13 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.1D"
+      },
+      "val.val[1]": {
+        "register": "Vt2.1D"
+      },
+      "val.val[2]": {
         "register": "Vt3.1D"
       }
     },
@@ -106437,7 +305601,13 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.8B"
+      },
+      "val.val[1]": {
+        "register": "Vt2.8B"
+      },
+      "val.val[2]": {
         "register": "Vt3.8B"
       }
     },
@@ -106466,7 +305636,13 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.4H"
+      },
+      "val.val[1]": {
+        "register": "Vt2.4H"
+      },
+      "val.val[2]": {
         "register": "Vt3.4H"
       }
     },
@@ -106495,7 +305671,13 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.2S"
+      },
+      "val.val[1]": {
+        "register": "Vt2.2S"
+      },
+      "val.val[2]": {
         "register": "Vt3.2S"
       }
     },
@@ -106524,7 +305706,13 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.1D"
+      },
+      "val.val[1]": {
+        "register": "Vt2.1D"
+      },
+      "val.val[2]": {
         "register": "Vt3.1D"
       }
     },
@@ -106553,7 +305741,13 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.8B"
+      },
+      "val.val[1]": {
+        "register": "Vt2.8B"
+      },
+      "val.val[2]": {
         "register": "Vt3.8B"
       }
     },
@@ -106582,7 +305776,13 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.8H"
+      },
+      "val.val[1]": {
+        "register": "Vt2.8H"
+      },
+      "val.val[2]": {
         "register": "Vt3.8H"
       }
     },
@@ -106611,7 +305811,13 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.4S"
+      },
+      "val.val[1]": {
+        "register": "Vt2.4S"
+      },
+      "val.val[2]": {
         "register": "Vt3.4S"
       }
     },
@@ -106640,7 +305846,13 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.2D"
+      },
+      "val.val[1]": {
+        "register": "Vt2.2D"
+      },
+      "val.val[2]": {
         "register": "Vt3.2D"
       }
     },
@@ -106672,7 +305884,13 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.8H"
+      },
+      "val.val[1]": {
+        "register": "Vt2.8H"
+      },
+      "val.val[2]": {
         "register": "Vt3.8H"
       }
     },
@@ -106706,7 +305924,13 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.4S"
+      },
+      "val.val[1]": {
+        "register": "Vt2.4S"
+      },
+      "val.val[2]": {
         "register": "Vt3.4S"
       }
     },
@@ -106740,7 +305964,13 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.2D"
+      },
+      "val.val[1]": {
+        "register": "Vt2.2D"
+      },
+      "val.val[2]": {
         "register": "Vt3.2D"
       }
     },
@@ -106772,7 +306002,13 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.8H"
+      },
+      "val.val[1]": {
+        "register": "Vt2.8H"
+      },
+      "val.val[2]": {
         "register": "Vt3.8H"
       }
     },
@@ -106806,7 +306042,13 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.2D"
+      },
+      "val.val[1]": {
+        "register": "Vt2.2D"
+      },
+      "val.val[2]": {
         "register": "Vt3.2D"
       }
     },
@@ -106838,7 +306080,13 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.16B"
+      },
+      "val.val[1]": {
+        "register": "Vt2.16B"
+      },
+      "val.val[2]": {
         "register": "Vt3.16B"
       }
     },
@@ -106872,7 +306120,13 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.8H"
+      },
+      "val.val[1]": {
+        "register": "Vt2.8H"
+      },
+      "val.val[2]": {
         "register": "Vt3.8H"
       }
     },
@@ -106906,7 +306160,13 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.4S"
+      },
+      "val.val[1]": {
+        "register": "Vt2.4S"
+      },
+      "val.val[2]": {
         "register": "Vt3.4S"
       }
     },
@@ -106940,7 +306200,13 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.2D"
+      },
+      "val.val[1]": {
+        "register": "Vt2.2D"
+      },
+      "val.val[2]": {
         "register": "Vt3.2D"
       }
     },
@@ -106972,7 +306238,13 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.16B"
+      },
+      "val.val[1]": {
+        "register": "Vt2.16B"
+      },
+      "val.val[2]": {
         "register": "Vt3.16B"
       }
     },
@@ -107006,7 +306278,13 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.8H"
+      },
+      "val.val[1]": {
+        "register": "Vt2.8H"
+      },
+      "val.val[2]": {
         "register": "Vt3.8H"
       }
     },
@@ -107040,7 +306318,13 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.4S"
+      },
+      "val.val[1]": {
+        "register": "Vt2.4S"
+      },
+      "val.val[2]": {
         "register": "Vt3.4S"
       }
     },
@@ -107074,7 +306358,13 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.2D"
+      },
+      "val.val[1]": {
+        "register": "Vt2.2D"
+      },
+      "val.val[2]": {
         "register": "Vt3.2D"
       }
     },
@@ -107106,7 +306396,13 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.16B"
+      },
+      "val.val[1]": {
+        "register": "Vt2.16B"
+      },
+      "val.val[2]": {
         "register": "Vt3.16B"
       }
     },
@@ -107135,7 +306431,13 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.8H"
+      },
+      "val.val[1]": {
+        "register": "Vt2.8H"
+      },
+      "val.val[2]": {
         "register": "Vt3.8H"
       }
     },
@@ -107164,7 +306466,13 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.2D"
+      },
+      "val.val[1]": {
+        "register": "Vt2.2D"
+      },
+      "val.val[2]": {
         "register": "Vt3.2D"
       }
     },
@@ -107191,7 +306499,13 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.16B"
+      },
+      "val.val[1]": {
+        "register": "Vt2.16B"
+      },
+      "val.val[2]": {
         "register": "Vt3.16B"
       }
     },
@@ -107220,7 +306534,13 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.8H"
+      },
+      "val.val[1]": {
+        "register": "Vt2.8H"
+      },
+      "val.val[2]": {
         "register": "Vt3.8H"
       }
     },
@@ -107249,7 +306569,13 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.4S"
+      },
+      "val.val[1]": {
+        "register": "Vt2.4S"
+      },
+      "val.val[2]": {
         "register": "Vt3.4S"
       }
     },
@@ -107278,7 +306604,13 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.2D"
+      },
+      "val.val[1]": {
+        "register": "Vt2.2D"
+      },
+      "val.val[2]": {
         "register": "Vt3.2D"
       }
     },
@@ -107305,7 +306637,13 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.16B"
+      },
+      "val.val[1]": {
+        "register": "Vt2.16B"
+      },
+      "val.val[2]": {
         "register": "Vt3.16B"
       }
     },
@@ -107334,7 +306672,13 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.8H"
+      },
+      "val.val[1]": {
+        "register": "Vt2.8H"
+      },
+      "val.val[2]": {
         "register": "Vt3.8H"
       }
     },
@@ -107363,7 +306707,13 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.4S"
+      },
+      "val.val[1]": {
+        "register": "Vt2.4S"
+      },
+      "val.val[2]": {
         "register": "Vt3.4S"
       }
     },
@@ -107392,7 +306742,13 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.2D"
+      },
+      "val.val[1]": {
+        "register": "Vt2.2D"
+      },
+      "val.val[2]": {
         "register": "Vt3.2D"
       }
     },
@@ -107419,7 +306775,13 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.16B"
+      },
+      "val.val[1]": {
+        "register": "Vt2.16B"
+      },
+      "val.val[2]": {
         "register": "Vt3.16B"
       }
     },
@@ -107448,7 +306810,16 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.4H"
+      },
+      "val.val[1]": {
+        "register": "Vt2.4H"
+      },
+      "val.val[2]": {
+        "register": "Vt3.4H"
+      },
+      "val.val[3]": {
         "register": "Vt4.4H"
       }
     },
@@ -107477,7 +306848,16 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.2S"
+      },
+      "val.val[1]": {
+        "register": "Vt2.2S"
+      },
+      "val.val[2]": {
+        "register": "Vt3.2S"
+      },
+      "val.val[3]": {
         "register": "Vt4.2S"
       }
     },
@@ -107506,7 +306886,16 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.1D"
+      },
+      "val.val[1]": {
+        "register": "Vt2.1D"
+      },
+      "val.val[2]": {
+        "register": "Vt3.1D"
+      },
+      "val.val[3]": {
         "register": "Vt4.1D"
       }
     },
@@ -107538,7 +306927,16 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.4H"
+      },
+      "val.val[1]": {
+        "register": "Vt2.4H"
+      },
+      "val.val[2]": {
+        "register": "Vt3.4H"
+      },
+      "val.val[3]": {
         "register": "Vt4.4H"
       }
     },
@@ -107572,7 +306970,16 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.2S"
+      },
+      "val.val[1]": {
+        "register": "Vt2.2S"
+      },
+      "val.val[2]": {
+        "register": "Vt3.2S"
+      },
+      "val.val[3]": {
         "register": "Vt4.2S"
       }
     },
@@ -107606,7 +307013,16 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.1D"
+      },
+      "val.val[1]": {
+        "register": "Vt2.1D"
+      },
+      "val.val[2]": {
+        "register": "Vt3.1D"
+      },
+      "val.val[3]": {
         "register": "Vt4.1D"
       }
     },
@@ -107638,7 +307054,16 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.4H"
+      },
+      "val.val[1]": {
+        "register": "Vt2.4H"
+      },
+      "val.val[2]": {
+        "register": "Vt3.4H"
+      },
+      "val.val[3]": {
         "register": "Vt4.4H"
       }
     },
@@ -107672,7 +307097,16 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.1D"
+      },
+      "val.val[1]": {
+        "register": "Vt2.1D"
+      },
+      "val.val[2]": {
+        "register": "Vt3.1D"
+      },
+      "val.val[3]": {
         "register": "Vt4.1D"
       }
     },
@@ -107704,7 +307138,16 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.8B"
+      },
+      "val.val[1]": {
+        "register": "Vt2.8B"
+      },
+      "val.val[2]": {
+        "register": "Vt3.8B"
+      },
+      "val.val[3]": {
         "register": "Vt4.8B"
       }
     },
@@ -107738,7 +307181,16 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.4H"
+      },
+      "val.val[1]": {
+        "register": "Vt2.4H"
+      },
+      "val.val[2]": {
+        "register": "Vt3.4H"
+      },
+      "val.val[3]": {
         "register": "Vt4.4H"
       }
     },
@@ -107772,7 +307224,16 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.2S"
+      },
+      "val.val[1]": {
+        "register": "Vt2.2S"
+      },
+      "val.val[2]": {
+        "register": "Vt3.2S"
+      },
+      "val.val[3]": {
         "register": "Vt4.2S"
       }
     },
@@ -107806,7 +307267,16 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.1D"
+      },
+      "val.val[1]": {
+        "register": "Vt2.1D"
+      },
+      "val.val[2]": {
+        "register": "Vt3.1D"
+      },
+      "val.val[3]": {
         "register": "Vt4.1D"
       }
     },
@@ -107838,7 +307308,16 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.8B"
+      },
+      "val.val[1]": {
+        "register": "Vt2.8B"
+      },
+      "val.val[2]": {
+        "register": "Vt3.8B"
+      },
+      "val.val[3]": {
         "register": "Vt4.8B"
       }
     },
@@ -107872,7 +307351,16 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.4H"
+      },
+      "val.val[1]": {
+        "register": "Vt2.4H"
+      },
+      "val.val[2]": {
+        "register": "Vt3.4H"
+      },
+      "val.val[3]": {
         "register": "Vt4.4H"
       }
     },
@@ -107906,7 +307394,16 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.2S"
+      },
+      "val.val[1]": {
+        "register": "Vt2.2S"
+      },
+      "val.val[2]": {
+        "register": "Vt3.2S"
+      },
+      "val.val[3]": {
         "register": "Vt4.2S"
       }
     },
@@ -107940,7 +307437,16 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.1D"
+      },
+      "val.val[1]": {
+        "register": "Vt2.1D"
+      },
+      "val.val[2]": {
+        "register": "Vt3.1D"
+      },
+      "val.val[3]": {
         "register": "Vt4.1D"
       }
     },
@@ -107972,7 +307478,16 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.8B"
+      },
+      "val.val[1]": {
+        "register": "Vt2.8B"
+      },
+      "val.val[2]": {
+        "register": "Vt3.8B"
+      },
+      "val.val[3]": {
         "register": "Vt4.8B"
       }
     },
@@ -108001,7 +307516,16 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.4H"
+      },
+      "val.val[1]": {
+        "register": "Vt2.4H"
+      },
+      "val.val[2]": {
+        "register": "Vt3.4H"
+      },
+      "val.val[3]": {
         "register": "Vt4.4H"
       }
     },
@@ -108030,7 +307554,16 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.1D"
+      },
+      "val.val[1]": {
+        "register": "Vt2.1D"
+      },
+      "val.val[2]": {
+        "register": "Vt3.1D"
+      },
+      "val.val[3]": {
         "register": "Vt4.1D"
       }
     },
@@ -108058,7 +307591,16 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.8B"
+      },
+      "val.val[1]": {
+        "register": "Vt2.8B"
+      },
+      "val.val[2]": {
+        "register": "Vt3.8B"
+      },
+      "val.val[3]": {
         "register": "Vt4.8B"
       }
     },
@@ -108087,7 +307629,16 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.4H"
+      },
+      "val.val[1]": {
+        "register": "Vt2.4H"
+      },
+      "val.val[2]": {
+        "register": "Vt3.4H"
+      },
+      "val.val[3]": {
         "register": "Vt4.4H"
       }
     },
@@ -108116,7 +307667,16 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.2S"
+      },
+      "val.val[1]": {
+        "register": "Vt2.2S"
+      },
+      "val.val[2]": {
+        "register": "Vt3.2S"
+      },
+      "val.val[3]": {
         "register": "Vt4.2S"
       }
     },
@@ -108145,7 +307705,16 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.1D"
+      },
+      "val.val[1]": {
+        "register": "Vt2.1D"
+      },
+      "val.val[2]": {
+        "register": "Vt3.1D"
+      },
+      "val.val[3]": {
         "register": "Vt4.1D"
       }
     },
@@ -108174,7 +307743,16 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.8B"
+      },
+      "val.val[1]": {
+        "register": "Vt2.8B"
+      },
+      "val.val[2]": {
+        "register": "Vt3.8B"
+      },
+      "val.val[3]": {
         "register": "Vt4.8B"
       }
     },
@@ -108203,7 +307781,16 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.4H"
+      },
+      "val.val[1]": {
+        "register": "Vt2.4H"
+      },
+      "val.val[2]": {
+        "register": "Vt3.4H"
+      },
+      "val.val[3]": {
         "register": "Vt4.4H"
       }
     },
@@ -108232,7 +307819,16 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.2S"
+      },
+      "val.val[1]": {
+        "register": "Vt2.2S"
+      },
+      "val.val[2]": {
+        "register": "Vt3.2S"
+      },
+      "val.val[3]": {
         "register": "Vt4.2S"
       }
     },
@@ -108261,7 +307857,16 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.1D"
+      },
+      "val.val[1]": {
+        "register": "Vt2.1D"
+      },
+      "val.val[2]": {
+        "register": "Vt3.1D"
+      },
+      "val.val[3]": {
         "register": "Vt4.1D"
       }
     },
@@ -108290,7 +307895,16 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.8B"
+      },
+      "val.val[1]": {
+        "register": "Vt2.8B"
+      },
+      "val.val[2]": {
+        "register": "Vt3.8B"
+      },
+      "val.val[3]": {
         "register": "Vt4.8B"
       }
     },
@@ -108319,7 +307933,16 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.8H"
+      },
+      "val.val[1]": {
+        "register": "Vt2.8H"
+      },
+      "val.val[2]": {
+        "register": "Vt3.8H"
+      },
+      "val.val[3]": {
         "register": "Vt4.8H"
       }
     },
@@ -108348,7 +307971,16 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.4S"
+      },
+      "val.val[1]": {
+        "register": "Vt2.4S"
+      },
+      "val.val[2]": {
+        "register": "Vt3.4S"
+      },
+      "val.val[3]": {
         "register": "Vt4.4S"
       }
     },
@@ -108377,7 +308009,16 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.2D"
+      },
+      "val.val[1]": {
+        "register": "Vt2.2D"
+      },
+      "val.val[2]": {
+        "register": "Vt3.2D"
+      },
+      "val.val[3]": {
         "register": "Vt4.2D"
       }
     },
@@ -108409,7 +308050,16 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.8H"
+      },
+      "val.val[1]": {
+        "register": "Vt2.8H"
+      },
+      "val.val[2]": {
+        "register": "Vt3.8H"
+      },
+      "val.val[3]": {
         "register": "Vt4.8H"
       }
     },
@@ -108443,7 +308093,16 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.4S"
+      },
+      "val.val[1]": {
+        "register": "Vt2.4S"
+      },
+      "val.val[2]": {
+        "register": "Vt3.4S"
+      },
+      "val.val[3]": {
         "register": "Vt4.4S"
       }
     },
@@ -108477,7 +308136,16 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.2D"
+      },
+      "val.val[1]": {
+        "register": "Vt2.2D"
+      },
+      "val.val[2]": {
+        "register": "Vt3.2D"
+      },
+      "val.val[3]": {
         "register": "Vt4.2D"
       }
     },
@@ -108509,7 +308177,16 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.8H"
+      },
+      "val.val[1]": {
+        "register": "Vt2.8H"
+      },
+      "val.val[2]": {
+        "register": "Vt3.8H"
+      },
+      "val.val[3]": {
         "register": "Vt4.8H"
       }
     },
@@ -108543,7 +308220,16 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.2D"
+      },
+      "val.val[1]": {
+        "register": "Vt2.2D"
+      },
+      "val.val[2]": {
+        "register": "Vt3.2D"
+      },
+      "val.val[3]": {
         "register": "Vt4.2D"
       }
     },
@@ -108575,7 +308261,16 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.16B"
+      },
+      "val.val[1]": {
+        "register": "Vt2.16B"
+      },
+      "val.val[2]": {
+        "register": "Vt3.16B"
+      },
+      "val.val[3]": {
         "register": "Vt4.16B"
       }
     },
@@ -108607,7 +308302,16 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.8H"
+      },
+      "val.val[1]": {
+        "register": "Vt2.8H"
+      },
+      "val.val[2]": {
+        "register": "Vt3.8H"
+      },
+      "val.val[3]": {
         "register": "Vt4.8H"
       }
     },
@@ -108641,7 +308345,16 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.4S"
+      },
+      "val.val[1]": {
+        "register": "Vt2.4S"
+      },
+      "val.val[2]": {
+        "register": "Vt3.4S"
+      },
+      "val.val[3]": {
         "register": "Vt4.4S"
       }
     },
@@ -108675,7 +308388,16 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.2D"
+      },
+      "val.val[1]": {
+        "register": "Vt2.2D"
+      },
+      "val.val[2]": {
+        "register": "Vt3.2D"
+      },
+      "val.val[3]": {
         "register": "Vt4.2D"
       }
     },
@@ -108707,7 +308429,16 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.16B"
+      },
+      "val.val[1]": {
+        "register": "Vt2.16B"
+      },
+      "val.val[2]": {
+        "register": "Vt3.16B"
+      },
+      "val.val[3]": {
         "register": "Vt4.16B"
       }
     },
@@ -108739,7 +308470,16 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.8H"
+      },
+      "val.val[1]": {
+        "register": "Vt2.8H"
+      },
+      "val.val[2]": {
+        "register": "Vt3.8H"
+      },
+      "val.val[3]": {
         "register": "Vt4.8H"
       }
     },
@@ -108773,7 +308513,16 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.4S"
+      },
+      "val.val[1]": {
+        "register": "Vt2.4S"
+      },
+      "val.val[2]": {
+        "register": "Vt3.4S"
+      },
+      "val.val[3]": {
         "register": "Vt4.4S"
       }
     },
@@ -108807,7 +308556,16 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.2D"
+      },
+      "val.val[1]": {
+        "register": "Vt2.2D"
+      },
+      "val.val[2]": {
+        "register": "Vt3.2D"
+      },
+      "val.val[3]": {
         "register": "Vt4.2D"
       }
     },
@@ -108839,7 +308597,16 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.16B"
+      },
+      "val.val[1]": {
+        "register": "Vt2.16B"
+      },
+      "val.val[2]": {
+        "register": "Vt3.16B"
+      },
+      "val.val[3]": {
         "register": "Vt4.16B"
       }
     },
@@ -108866,7 +308633,16 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.8H"
+      },
+      "val.val[1]": {
+        "register": "Vt2.8H"
+      },
+      "val.val[2]": {
+        "register": "Vt3.8H"
+      },
+      "val.val[3]": {
         "register": "Vt4.8H"
       }
     },
@@ -108895,7 +308671,16 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.2D"
+      },
+      "val.val[1]": {
+        "register": "Vt2.2D"
+      },
+      "val.val[2]": {
+        "register": "Vt3.2D"
+      },
+      "val.val[3]": {
         "register": "Vt4.2D"
       }
     },
@@ -108922,7 +308707,16 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.16B"
+      },
+      "val.val[1]": {
+        "register": "Vt2.16B"
+      },
+      "val.val[2]": {
+        "register": "Vt3.16B"
+      },
+      "val.val[3]": {
         "register": "Vt4.16B"
       }
     },
@@ -108951,7 +308745,16 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.8H"
+      },
+      "val.val[1]": {
+        "register": "Vt2.8H"
+      },
+      "val.val[2]": {
+        "register": "Vt3.8H"
+      },
+      "val.val[3]": {
         "register": "Vt4.8H"
       }
     },
@@ -108980,7 +308783,16 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.4S"
+      },
+      "val.val[1]": {
+        "register": "Vt2.4S"
+      },
+      "val.val[2]": {
+        "register": "Vt3.4S"
+      },
+      "val.val[3]": {
         "register": "Vt4.4S"
       }
     },
@@ -109009,7 +308821,16 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.2D"
+      },
+      "val.val[1]": {
+        "register": "Vt2.2D"
+      },
+      "val.val[2]": {
+        "register": "Vt3.2D"
+      },
+      "val.val[3]": {
         "register": "Vt4.2D"
       }
     },
@@ -109036,7 +308857,16 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.16B"
+      },
+      "val.val[1]": {
+        "register": "Vt2.16B"
+      },
+      "val.val[2]": {
+        "register": "Vt3.16B"
+      },
+      "val.val[3]": {
         "register": "Vt4.16B"
       }
     },
@@ -109065,7 +308895,16 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.8H"
+      },
+      "val.val[1]": {
+        "register": "Vt2.8H"
+      },
+      "val.val[2]": {
+        "register": "Vt3.8H"
+      },
+      "val.val[3]": {
         "register": "Vt4.8H"
       }
     },
@@ -109094,7 +308933,16 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.4S"
+      },
+      "val.val[1]": {
+        "register": "Vt2.4S"
+      },
+      "val.val[2]": {
+        "register": "Vt3.4S"
+      },
+      "val.val[3]": {
         "register": "Vt4.4S"
       }
     },
@@ -109123,7 +308971,16 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.2D"
+      },
+      "val.val[1]": {
+        "register": "Vt2.2D"
+      },
+      "val.val[2]": {
+        "register": "Vt3.2D"
+      },
+      "val.val[3]": {
         "register": "Vt4.2D"
       }
     },
@@ -109150,7 +309007,16 @@
       "ptr": {
         "register": "Xn"
       },
-      "val": {
+      "val.val[0]": {
+        "register": "Vt.16B"
+      },
+      "val.val[1]": {
+        "register": "Vt2.16B"
+      },
+      "val.val[2]": {
+        "register": "Vt3.16B"
+      },
+      "val.val[3]": {
         "register": "Vt4.16B"
       }
     },
@@ -109165,6 +309031,262 @@
       ]
     ]
   },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vstl1_lane_f64",
+    "arguments": [
+      "float64_t * ptr",
+      "float64x1_t val",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 0
+      },
+      "ptr": {
+        "register": "Xn"
+      },
+      "val": {
+        "register": "Vt.1D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STL1"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vstl1_lane_p64",
+    "arguments": [
+      "poly64_t * ptr",
+      "poly64x1_t val",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 0
+      },
+      "ptr": {
+        "register": "Xn"
+      },
+      "val": {
+        "register": "Vt.1D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STL1"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vstl1_lane_s64",
+    "arguments": [
+      "int64_t * ptr",
+      "int64x1_t val",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 0
+      },
+      "ptr": {
+        "register": "Xn"
+      },
+      "val": {
+        "register": "Vt.1D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STL1"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vstl1_lane_u64",
+    "arguments": [
+      "uint64_t * ptr",
+      "uint64x1_t val",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 0
+      },
+      "ptr": {
+        "register": "Xn"
+      },
+      "val": {
+        "register": "Vt.1D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STL1"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vstl1q_lane_f64",
+    "arguments": [
+      "float64_t * ptr",
+      "float64x2_t val",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "ptr": {
+        "register": "Xn"
+      },
+      "val": {
+        "register": "Vt.2D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STL1"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vstl1q_lane_p64",
+    "arguments": [
+      "poly64_t * ptr",
+      "poly64x2_t val",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "ptr": {
+        "register": "Xn"
+      },
+      "val": {
+        "register": "Vt.2D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STL1"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vstl1q_lane_s64",
+    "arguments": [
+      "int64_t * ptr",
+      "int64x2_t val",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "ptr": {
+        "register": "Xn"
+      },
+      "val": {
+        "register": "Vt.2D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STL1"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vstl1q_lane_u64",
+    "arguments": [
+      "uint64_t * ptr",
+      "uint64x2_t val",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "void"
+    },
+    "Arguments_Preparation": {
+      "lane": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "ptr": {
+        "register": "Xn"
+      },
+      "val": {
+        "register": "Vt.2D"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "STL1"
+      ]
+    ]
+  },
   {
     "SIMD_ISA": "Neon",
     "name": "vstrq_p128",
@@ -111004,6 +311126,7 @@
       }
     },
     "Architectures": [
+      "A32",
       "A64"
     ],
     "instructions": [
@@ -111077,6 +311200,7 @@
       }
     },
     "Architectures": [
+      "A32",
       "A64"
     ],
     "instructions": [
@@ -111096,10 +311220,12 @@
       "value": "poly8x8_t"
     },
     "Arguments_Preparation": {
-      "a": {
+      "Zeros(64):a": {
         "register": "Vn.16B"
       },
-      "idx": {}
+      "idx": {
+        "register": "Vm.8B"
+      }
     },
     "Architectures": [
       "v7",
@@ -111123,10 +311249,12 @@
       "value": "int8x8_t"
     },
     "Arguments_Preparation": {
-      "a": {
+      "Zeros(64):a": {
         "register": "Vn.16B"
       },
-      "idx": {}
+      "idx": {
+        "register": "Vm.8B"
+      }
     },
     "Architectures": [
       "v7",
@@ -111150,10 +311278,12 @@
       "value": "uint8x8_t"
     },
     "Arguments_Preparation": {
-      "a": {
+      "Zeros(64):a": {
         "register": "Vn.16B"
       },
-      "idx": {}
+      "idx": {
+        "register": "Vm.8B"
+      }
     },
     "Architectures": [
       "v7",
@@ -111177,10 +311307,12 @@
       "value": "poly8x8_t"
     },
     "Arguments_Preparation": {
-      "a": {
+      "a.val[1]:a.val[0]": {
         "register": "Vn.16B"
       },
-      "idx": {}
+      "idx": {
+        "register": "Vm.8B"
+      }
     },
     "Architectures": [
       "v7",
@@ -111204,10 +311336,12 @@
       "value": "int8x8_t"
     },
     "Arguments_Preparation": {
-      "a": {
+      "a.val[1]:a.val[0]": {
         "register": "Vn.16B"
       },
-      "idx": {}
+      "idx": {
+        "register": "Vm.8B"
+      }
     },
     "Architectures": [
       "v7",
@@ -111231,10 +311365,12 @@
       "value": "uint8x8_t"
     },
     "Arguments_Preparation": {
-      "a": {
+      "a.val[1]:a.val[0]": {
         "register": "Vn.16B"
       },
-      "idx": {}
+      "idx": {
+        "register": "Vm.8B"
+      }
     },
     "Architectures": [
       "v7",
@@ -111258,10 +311394,15 @@
       "value": "poly8x8_t"
     },
     "Arguments_Preparation": {
-      "a": {
+      "Zeros(64):a.val[2]": {
+        "register": "Vn+1.16B"
+      },
+      "a.val[1]:a.val[0]": {
         "register": "Vn.16B"
       },
-      "idx": {}
+      "idx": {
+        "register": "Vm.8B"
+      }
     },
     "Architectures": [
       "v7",
@@ -111285,10 +311426,15 @@
       "value": "int8x8_t"
     },
     "Arguments_Preparation": {
-      "a": {
+      "Zeros(64):a.val[2]": {
+        "register": "Vn+1.16B"
+      },
+      "a.val[1]:a.val[0]": {
         "register": "Vn.16B"
       },
-      "idx": {}
+      "idx": {
+        "register": "Vm.8B"
+      }
     },
     "Architectures": [
       "v7",
@@ -111312,10 +311458,15 @@
       "value": "uint8x8_t"
     },
     "Arguments_Preparation": {
-      "a": {
+      "Zeros(64):a.val[2]": {
+        "register": "Vn+1.16B"
+      },
+      "a.val[1]:a.val[0]": {
         "register": "Vn.16B"
       },
-      "idx": {}
+      "idx": {
+        "register": "Vm.8B"
+      }
     },
     "Architectures": [
       "v7",
@@ -111339,10 +311490,15 @@
       "value": "poly8x8_t"
     },
     "Arguments_Preparation": {
-      "a": {
+      "a.val[1]:a.val[0]": {
         "register": "Vn.16B"
       },
-      "idx": {}
+      "a.val[3]:a.val[2]": {
+        "register": "Vn+1.16B"
+      },
+      "idx": {
+        "register": "Vm.8B"
+      }
     },
     "Architectures": [
       "v7",
@@ -111366,10 +311522,15 @@
       "value": "int8x8_t"
     },
     "Arguments_Preparation": {
-      "a": {
+      "a.val[1]:a.val[0]": {
         "register": "Vn.16B"
       },
-      "idx": {}
+      "a.val[3]:a.val[2]": {
+        "register": "Vn+1.16B"
+      },
+      "idx": {
+        "register": "Vm.8B"
+      }
     },
     "Architectures": [
       "v7",
@@ -111393,10 +311554,15 @@
       "value": "uint8x8_t"
     },
     "Arguments_Preparation": {
-      "a": {
+      "a.val[1]:a.val[0]": {
         "register": "Vn.16B"
       },
-      "idx": {}
+      "a.val[3]:a.val[2]": {
+        "register": "Vn+1.16B"
+      },
+      "idx": {
+        "register": "Vm.8B"
+      }
     },
     "Architectures": [
       "v7",
@@ -111421,11 +311587,15 @@
       "value": "poly8x8_t"
     },
     "Arguments_Preparation": {
-      "a": {},
-      "b": {
+      "Zeros(64):b": {
         "register": "Vn.16B"
       },
-      "idx": {}
+      "a": {
+        "register": "Vd.8B"
+      },
+      "idx": {
+        "register": "Vm.8B"
+      }
     },
     "Architectures": [
       "v7",
@@ -111453,11 +311623,15 @@
       "value": "int8x8_t"
     },
     "Arguments_Preparation": {
-      "a": {},
-      "b": {
+      "Zeros(64):b": {
         "register": "Vn.16B"
       },
-      "idx": {}
+      "a": {
+        "register": "Vd.8B"
+      },
+      "idx": {
+        "register": "Vm.8B"
+      }
     },
     "Architectures": [
       "v7",
@@ -111485,11 +311659,15 @@
       "value": "uint8x8_t"
     },
     "Arguments_Preparation": {
-      "a": {},
-      "b": {
+      "Zeros(64):b": {
         "register": "Vn.16B"
       },
-      "idx": {}
+      "a": {
+        "register": "Vd.8B"
+      },
+      "idx": {
+        "register": "Vm.8B"
+      }
     },
     "Architectures": [
       "v7",
@@ -111517,11 +311695,15 @@
       "value": "poly8x8_t"
     },
     "Arguments_Preparation": {
-      "a": {},
-      "b": {
+      "a": {
+        "register": "Vd.8B"
+      },
+      "b.val[1]:b.val[0]": {
         "register": "Vn.16B"
       },
-      "idx": {}
+      "idx": {
+        "register": "Vm.8B"
+      }
     },
     "Architectures": [
       "v7",
@@ -111546,11 +311728,15 @@
       "value": "int8x8_t"
     },
     "Arguments_Preparation": {
-      "a": {},
-      "b": {
+      "a": {
+        "register": "Vd.8B"
+      },
+      "b.val[1]:b.val[0]": {
         "register": "Vn.16B"
       },
-      "idx": {}
+      "idx": {
+        "register": "Vm.8B"
+      }
     },
     "Architectures": [
       "v7",
@@ -111575,11 +311761,15 @@
       "value": "uint8x8_t"
     },
     "Arguments_Preparation": {
-      "a": {},
-      "b": {
+      "a": {
+        "register": "Vd.8B"
+      },
+      "b.val[1]:b.val[0]": {
         "register": "Vn.16B"
       },
-      "idx": {}
+      "idx": {
+        "register": "Vm.8B"
+      }
     },
     "Architectures": [
       "v7",
@@ -111604,11 +311794,18 @@
       "value": "poly8x8_t"
     },
     "Arguments_Preparation": {
-      "a": {},
-      "b": {
+      "Zeros(64):b.val[2]": {
+        "register": "Vn+1.16B"
+      },
+      "a": {
+        "register": "Vd.8B"
+      },
+      "b.val[1]:b.val[0]": {
         "register": "Vn.16B"
       },
-      "idx": {}
+      "idx": {
+        "register": "Vm.8B"
+      }
     },
     "Architectures": [
       "v7",
@@ -111636,11 +311833,18 @@
       "value": "int8x8_t"
     },
     "Arguments_Preparation": {
-      "a": {},
-      "b": {
+      "Zeros(64):b.val[2]": {
+        "register": "Vn+1.16B"
+      },
+      "a": {
+        "register": "Vd.8B"
+      },
+      "b.val[1]:b.val[0]": {
         "register": "Vn.16B"
       },
-      "idx": {}
+      "idx": {
+        "register": "Vm.8B"
+      }
     },
     "Architectures": [
       "v7",
@@ -111668,11 +311872,18 @@
       "value": "uint8x8_t"
     },
     "Arguments_Preparation": {
-      "a": {},
-      "b": {
+      "Zeros(64):b.val[2]": {
+        "register": "Vn+1.16B"
+      },
+      "a": {
+        "register": "Vd.8B"
+      },
+      "b.val[1]:b.val[0]": {
         "register": "Vn.16B"
       },
-      "idx": {}
+      "idx": {
+        "register": "Vm.8B"
+      }
     },
     "Architectures": [
       "v7",
@@ -111700,11 +311911,18 @@
       "value": "poly8x8_t"
     },
     "Arguments_Preparation": {
-      "a": {},
-      "b": {
+      "a": {
+        "register": "Vd.8B"
+      },
+      "b.val[1]:b.val[0]": {
         "register": "Vn.16B"
       },
-      "idx": {}
+      "b.val[3]:b.val[2]": {
+        "register": "Vn+1.16B"
+      },
+      "c": {
+        "register": "Vm.8B"
+      }
     },
     "Architectures": [
       "v7",
@@ -111729,11 +311947,18 @@
       "value": "int8x8_t"
     },
     "Arguments_Preparation": {
-      "a": {},
-      "b": {
+      "a": {
+        "register": "Vd.8B"
+      },
+      "b.val[1]:b.val[0]": {
         "register": "Vn.16B"
       },
-      "idx": {}
+      "b.val[3]:b.val[2]": {
+        "register": "Vn+1.16B"
+      },
+      "c": {
+        "register": "Vm.8B"
+      }
     },
     "Architectures": [
       "v7",
@@ -111758,11 +311983,18 @@
       "value": "uint8x8_t"
     },
     "Arguments_Preparation": {
-      "a": {},
-      "b": {
+      "a": {
+        "register": "Vd.8B"
+      },
+      "b.val[1]:b.val[0]": {
         "register": "Vn.16B"
       },
-      "idx": {}
+      "b.val[3]:b.val[2]": {
+        "register": "Vn+1.16B"
+      },
+      "c": {
+        "register": "Vm.8B"
+      }
     },
     "Architectures": [
       "v7",
@@ -114684,6 +314916,7 @@
       }
     },
     "Architectures": [
+      "A32",
       "A64"
     ],
     "instructions": [
@@ -114789,6 +315022,7 @@
       }
     },
     "Architectures": [
+      "A32",
       "A64"
     ],
     "instructions": [
@@ -114820,6 +315054,7 @@
       }
     },
     "Architectures": [
+      "A32",
       "A64"
     ],
     "instructions": [
@@ -116771,7 +317006,9 @@
       "a": {
         "register": "Vn.2D"
       },
-      "b": {},
+      "b": {
+        "register": "Vm.2D"
+      },
       "imm6": {
         "minimum": 0,
         "maximum": 63
@@ -117311,996 +317548,138 @@
     },
     "Arguments_Preparation": {
       "a": {
-        "register": "Vn.16B"
-      },
-      "b": {
-        "register": "Vm.16B"
-      }
-    },
-    "Architectures": [
-      "A64"
-    ],
-    "instructions": [
-      [
-        "ZIP1"
-      ]
-    ]
-  },
-  {
-    "SIMD_ISA": "Neon",
-    "name": "vzip1q_u16",
-    "arguments": [
-      "uint16x8_t a",
-      "uint16x8_t b"
-    ],
-    "return_type": {
-      "value": "uint16x8_t"
-    },
-    "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8H"
-      },
-      "b": {
-        "register": "Vm.8H"
-      }
-    },
-    "Architectures": [
-      "A64"
-    ],
-    "instructions": [
-      [
-        "ZIP1"
-      ]
-    ]
-  },
-  {
-    "SIMD_ISA": "Neon",
-    "name": "vzip1q_u32",
-    "arguments": [
-      "uint32x4_t a",
-      "uint32x4_t b"
-    ],
-    "return_type": {
-      "value": "uint32x4_t"
-    },
-    "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4S"
-      },
-      "b": {
-        "register": "Vm.4S"
-      }
-    },
-    "Architectures": [
-      "A64"
-    ],
-    "instructions": [
-      [
-        "ZIP1"
-      ]
-    ]
-  },
-  {
-    "SIMD_ISA": "Neon",
-    "name": "vzip1q_u64",
-    "arguments": [
-      "uint64x2_t a",
-      "uint64x2_t b"
-    ],
-    "return_type": {
-      "value": "uint64x2_t"
-    },
-    "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2D"
-      },
-      "b": {
-        "register": "Vm.2D"
-      }
-    },
-    "Architectures": [
-      "A64"
-    ],
-    "instructions": [
-      [
-        "ZIP1"
-      ]
-    ]
-  },
-  {
-    "SIMD_ISA": "Neon",
-    "name": "vzip1q_u8",
-    "arguments": [
-      "uint8x16_t a",
-      "uint8x16_t b"
-    ],
-    "return_type": {
-      "value": "uint8x16_t"
-    },
-    "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
-      },
-      "b": {
-        "register": "Vm.16B"
-      }
-    },
-    "Architectures": [
-      "A64"
-    ],
-    "instructions": [
-      [
-        "ZIP1"
-      ]
-    ]
-  },
-  {
-    "SIMD_ISA": "Neon",
-    "name": "vzip2_f16",
-    "arguments": [
-      "float16x4_t a",
-      "float16x4_t b"
-    ],
-    "return_type": {
-      "value": "float16x4_t"
-    },
-    "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4H"
-      },
-      "b": {
-        "register": "Vm.4H"
-      }
-    },
-    "Architectures": [
-      "A64"
-    ],
-    "instructions": [
-      [
-        "ZIP2"
-      ]
-    ]
-  },
-  {
-    "SIMD_ISA": "Neon",
-    "name": "vzip2_f32",
-    "arguments": [
-      "float32x2_t a",
-      "float32x2_t b"
-    ],
-    "return_type": {
-      "value": "float32x2_t"
-    },
-    "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2S"
-      },
-      "b": {
-        "register": "Vm.2S"
-      }
-    },
-    "Architectures": [
-      "A64"
-    ],
-    "instructions": [
-      [
-        "ZIP2"
-      ]
-    ]
-  },
-  {
-    "SIMD_ISA": "Neon",
-    "name": "vzip2_p16",
-    "arguments": [
-      "poly16x4_t a",
-      "poly16x4_t b"
-    ],
-    "return_type": {
-      "value": "poly16x4_t"
-    },
-    "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4H"
-      },
-      "b": {
-        "register": "Vm.4H"
-      }
-    },
-    "Architectures": [
-      "A64"
-    ],
-    "instructions": [
-      [
-        "ZIP2"
-      ]
-    ]
-  },
-  {
-    "SIMD_ISA": "Neon",
-    "name": "vzip2_p8",
-    "arguments": [
-      "poly8x8_t a",
-      "poly8x8_t b"
-    ],
-    "return_type": {
-      "value": "poly8x8_t"
-    },
-    "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8B"
-      },
-      "b": {
-        "register": "Vm.8B"
-      }
-    },
-    "Architectures": [
-      "A64"
-    ],
-    "instructions": [
-      [
-        "ZIP2"
-      ]
-    ]
-  },
-  {
-    "SIMD_ISA": "Neon",
-    "name": "vzip2_s16",
-    "arguments": [
-      "int16x4_t a",
-      "int16x4_t b"
-    ],
-    "return_type": {
-      "value": "int16x4_t"
-    },
-    "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4H"
-      },
-      "b": {
-        "register": "Vm.4H"
-      }
-    },
-    "Architectures": [
-      "A64"
-    ],
-    "instructions": [
-      [
-        "ZIP2"
-      ]
-    ]
-  },
-  {
-    "SIMD_ISA": "Neon",
-    "name": "vzip2_s32",
-    "arguments": [
-      "int32x2_t a",
-      "int32x2_t b"
-    ],
-    "return_type": {
-      "value": "int32x2_t"
-    },
-    "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2S"
-      },
-      "b": {
-        "register": "Vm.2S"
-      }
-    },
-    "Architectures": [
-      "A64"
-    ],
-    "instructions": [
-      [
-        "ZIP2"
-      ]
-    ]
-  },
-  {
-    "SIMD_ISA": "Neon",
-    "name": "vzip2_s8",
-    "arguments": [
-      "int8x8_t a",
-      "int8x8_t b"
-    ],
-    "return_type": {
-      "value": "int8x8_t"
-    },
-    "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8B"
-      },
-      "b": {
-        "register": "Vm.8B"
-      }
-    },
-    "Architectures": [
-      "A64"
-    ],
-    "instructions": [
-      [
-        "ZIP2"
-      ]
-    ]
-  },
-  {
-    "SIMD_ISA": "Neon",
-    "name": "vzip2_u16",
-    "arguments": [
-      "uint16x4_t a",
-      "uint16x4_t b"
-    ],
-    "return_type": {
-      "value": "uint16x4_t"
-    },
-    "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4H"
-      },
-      "b": {
-        "register": "Vm.4H"
-      }
-    },
-    "Architectures": [
-      "A64"
-    ],
-    "instructions": [
-      [
-        "ZIP2"
-      ]
-    ]
-  },
-  {
-    "SIMD_ISA": "Neon",
-    "name": "vzip2_u32",
-    "arguments": [
-      "uint32x2_t a",
-      "uint32x2_t b"
-    ],
-    "return_type": {
-      "value": "uint32x2_t"
-    },
-    "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2S"
-      },
-      "b": {
-        "register": "Vm.2S"
-      }
-    },
-    "Architectures": [
-      "A64"
-    ],
-    "instructions": [
-      [
-        "ZIP2"
-      ]
-    ]
-  },
-  {
-    "SIMD_ISA": "Neon",
-    "name": "vzip2_u8",
-    "arguments": [
-      "uint8x8_t a",
-      "uint8x8_t b"
-    ],
-    "return_type": {
-      "value": "uint8x8_t"
-    },
-    "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8B"
-      },
-      "b": {
-        "register": "Vm.8B"
-      }
-    },
-    "Architectures": [
-      "A64"
-    ],
-    "instructions": [
-      [
-        "ZIP2"
-      ]
-    ]
-  },
-  {
-    "SIMD_ISA": "Neon",
-    "name": "vzip2q_f16",
-    "arguments": [
-      "float16x8_t a",
-      "float16x8_t b"
-    ],
-    "return_type": {
-      "value": "float16x8_t"
-    },
-    "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8H"
-      },
-      "b": {
-        "register": "Vm.8H"
-      }
-    },
-    "Architectures": [
-      "A64"
-    ],
-    "instructions": [
-      [
-        "ZIP2"
-      ]
-    ]
-  },
-  {
-    "SIMD_ISA": "Neon",
-    "name": "vzip2q_f32",
-    "arguments": [
-      "float32x4_t a",
-      "float32x4_t b"
-    ],
-    "return_type": {
-      "value": "float32x4_t"
-    },
-    "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4S"
-      },
-      "b": {
-        "register": "Vm.4S"
-      }
-    },
-    "Architectures": [
-      "A64"
-    ],
-    "instructions": [
-      [
-        "ZIP2"
-      ]
-    ]
-  },
-  {
-    "SIMD_ISA": "Neon",
-    "name": "vzip2q_f64",
-    "arguments": [
-      "float64x2_t a",
-      "float64x2_t b"
-    ],
-    "return_type": {
-      "value": "float64x2_t"
-    },
-    "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2D"
-      },
-      "b": {
-        "register": "Vm.2D"
-      }
-    },
-    "Architectures": [
-      "A64"
-    ],
-    "instructions": [
-      [
-        "ZIP2"
-      ]
-    ]
-  },
-  {
-    "SIMD_ISA": "Neon",
-    "name": "vzip2q_p16",
-    "arguments": [
-      "poly16x8_t a",
-      "poly16x8_t b"
-    ],
-    "return_type": {
-      "value": "poly16x8_t"
-    },
-    "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8H"
-      },
-      "b": {
-        "register": "Vm.8H"
-      }
-    },
-    "Architectures": [
-      "A64"
-    ],
-    "instructions": [
-      [
-        "ZIP2"
-      ]
-    ]
-  },
-  {
-    "SIMD_ISA": "Neon",
-    "name": "vzip2q_p64",
-    "arguments": [
-      "poly64x2_t a",
-      "poly64x2_t b"
-    ],
-    "return_type": {
-      "value": "poly64x2_t"
-    },
-    "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2D"
-      },
-      "b": {
-        "register": "Vm.2D"
-      }
-    },
-    "Architectures": [
-      "A64"
-    ],
-    "instructions": [
-      [
-        "ZIP2"
-      ]
-    ]
-  },
-  {
-    "SIMD_ISA": "Neon",
-    "name": "vzip2q_p8",
-    "arguments": [
-      "poly8x16_t a",
-      "poly8x16_t b"
-    ],
-    "return_type": {
-      "value": "poly8x16_t"
-    },
-    "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
-      },
-      "b": {
-        "register": "Vm.16B"
-      }
-    },
-    "Architectures": [
-      "A64"
-    ],
-    "instructions": [
-      [
-        "ZIP2"
-      ]
-    ]
-  },
-  {
-    "SIMD_ISA": "Neon",
-    "name": "vzip2q_s16",
-    "arguments": [
-      "int16x8_t a",
-      "int16x8_t b"
-    ],
-    "return_type": {
-      "value": "int16x8_t"
-    },
-    "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8H"
-      },
-      "b": {
-        "register": "Vm.8H"
-      }
-    },
-    "Architectures": [
-      "A64"
-    ],
-    "instructions": [
-      [
-        "ZIP2"
-      ]
-    ]
-  },
-  {
-    "SIMD_ISA": "Neon",
-    "name": "vzip2q_s32",
-    "arguments": [
-      "int32x4_t a",
-      "int32x4_t b"
-    ],
-    "return_type": {
-      "value": "int32x4_t"
-    },
-    "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4S"
-      },
-      "b": {
-        "register": "Vm.4S"
-      }
-    },
-    "Architectures": [
-      "A64"
-    ],
-    "instructions": [
-      [
-        "ZIP2"
-      ]
-    ]
-  },
-  {
-    "SIMD_ISA": "Neon",
-    "name": "vzip2q_s64",
-    "arguments": [
-      "int64x2_t a",
-      "int64x2_t b"
-    ],
-    "return_type": {
-      "value": "int64x2_t"
-    },
-    "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2D"
-      },
-      "b": {
-        "register": "Vm.2D"
-      }
-    },
-    "Architectures": [
-      "A64"
-    ],
-    "instructions": [
-      [
-        "ZIP2"
-      ]
-    ]
-  },
-  {
-    "SIMD_ISA": "Neon",
-    "name": "vzip2q_s8",
-    "arguments": [
-      "int8x16_t a",
-      "int8x16_t b"
-    ],
-    "return_type": {
-      "value": "int8x16_t"
-    },
-    "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
-      },
-      "b": {
-        "register": "Vm.16B"
-      }
-    },
-    "Architectures": [
-      "A64"
-    ],
-    "instructions": [
-      [
-        "ZIP2"
-      ]
-    ]
-  },
-  {
-    "SIMD_ISA": "Neon",
-    "name": "vzip2q_u16",
-    "arguments": [
-      "uint16x8_t a",
-      "uint16x8_t b"
-    ],
-    "return_type": {
-      "value": "uint16x8_t"
-    },
-    "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.8H"
-      },
-      "b": {
-        "register": "Vm.8H"
-      }
-    },
-    "Architectures": [
-      "A64"
-    ],
-    "instructions": [
-      [
-        "ZIP2"
-      ]
-    ]
-  },
-  {
-    "SIMD_ISA": "Neon",
-    "name": "vzip2q_u32",
-    "arguments": [
-      "uint32x4_t a",
-      "uint32x4_t b"
-    ],
-    "return_type": {
-      "value": "uint32x4_t"
-    },
-    "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4S"
-      },
-      "b": {
-        "register": "Vm.4S"
-      }
-    },
-    "Architectures": [
-      "A64"
-    ],
-    "instructions": [
-      [
-        "ZIP2"
-      ]
-    ]
-  },
-  {
-    "SIMD_ISA": "Neon",
-    "name": "vzip2q_u64",
-    "arguments": [
-      "uint64x2_t a",
-      "uint64x2_t b"
-    ],
-    "return_type": {
-      "value": "uint64x2_t"
-    },
-    "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2D"
-      },
-      "b": {
-        "register": "Vm.2D"
-      }
-    },
-    "Architectures": [
-      "A64"
-    ],
-    "instructions": [
-      [
-        "ZIP2"
-      ]
-    ]
-  },
-  {
-    "SIMD_ISA": "Neon",
-    "name": "vzip2q_u8",
-    "arguments": [
-      "uint8x16_t a",
-      "uint8x16_t b"
-    ],
-    "return_type": {
-      "value": "uint8x16_t"
-    },
-    "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.16B"
-      },
-      "b": {
-        "register": "Vm.16B"
-      }
-    },
-    "Architectures": [
-      "A64"
-    ],
-    "instructions": [
-      [
-        "ZIP2"
-      ]
-    ]
-  },
-  {
-    "SIMD_ISA": "Neon",
-    "name": "vzip_f16",
-    "arguments": [
-      "float16x4_t a",
-      "float16x4_t b"
-    ],
-    "return_type": {
-      "value": "float16x4x2_t"
-    },
-    "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4H"
-      },
-      "b": {
-        "register": "Vm.4H"
-      }
-    },
-    "Architectures": [
-      "v7",
-      "A32",
-      "A64"
-    ],
-    "instructions": [
-      [
-        "ZIP1",
-        "ZIP2"
-      ]
-    ]
-  },
-  {
-    "SIMD_ISA": "Neon",
-    "name": "vzip_f32",
-    "arguments": [
-      "float32x2_t a",
-      "float32x2_t b"
-    ],
-    "return_type": {
-      "value": "float32x2x2_t"
-    },
-    "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.2S"
-      },
-      "b": {
-        "register": "Vm.2S"
-      }
-    },
-    "Architectures": [
-      "v7",
-      "A32",
-      "A64"
-    ],
-    "instructions": [
-      [
-        "ZIP1",
-        "ZIP2"
-      ]
-    ]
-  },
-  {
-    "SIMD_ISA": "Neon",
-    "name": "vzip_p16",
-    "arguments": [
-      "poly16x4_t a",
-      "poly16x4_t b"
-    ],
-    "return_type": {
-      "value": "poly16x4x2_t"
-    },
-    "Arguments_Preparation": {
-      "a": {
-        "register": "Vn.4H"
+        "register": "Vn.16B"
       },
       "b": {
-        "register": "Vm.4H"
+        "register": "Vm.16B"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "ZIP1",
-        "ZIP2"
+        "ZIP1"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vzip_p8",
+    "name": "vzip1q_u16",
     "arguments": [
-      "poly8x8_t a",
-      "poly8x8_t b"
+      "uint16x8_t a",
+      "uint16x8_t b"
     ],
     "return_type": {
-      "value": "poly8x8x2_t"
+      "value": "uint16x8_t"
     },
     "Arguments_Preparation": {
       "a": {
-        "register": "Vn.8B"
+        "register": "Vn.8H"
       },
       "b": {
-        "register": "Vm.8B"
+        "register": "Vm.8H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "ZIP1",
-        "ZIP2"
+        "ZIP1"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vzip_s16",
+    "name": "vzip1q_u32",
     "arguments": [
-      "int16x4_t a",
-      "int16x4_t b"
+      "uint32x4_t a",
+      "uint32x4_t b"
     ],
     "return_type": {
-      "value": "int16x4x2_t"
+      "value": "uint32x4_t"
     },
     "Arguments_Preparation": {
       "a": {
-        "register": "Vn.4H"
+        "register": "Vn.4S"
       },
       "b": {
-        "register": "Vm.4H"
+        "register": "Vm.4S"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "ZIP1",
-        "ZIP2"
+        "ZIP1"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vzip_s32",
+    "name": "vzip1q_u64",
     "arguments": [
-      "int32x2_t a",
-      "int32x2_t b"
+      "uint64x2_t a",
+      "uint64x2_t b"
     ],
     "return_type": {
-      "value": "int32x2x2_t"
+      "value": "uint64x2_t"
     },
     "Arguments_Preparation": {
       "a": {
-        "register": "Vn.2S"
+        "register": "Vn.2D"
       },
       "b": {
-        "register": "Vm.2S"
+        "register": "Vm.2D"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "ZIP1",
-        "ZIP2"
+        "ZIP1"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vzip_s8",
+    "name": "vzip1q_u8",
     "arguments": [
-      "int8x8_t a",
-      "int8x8_t b"
+      "uint8x16_t a",
+      "uint8x16_t b"
     ],
     "return_type": {
-      "value": "int8x8x2_t"
+      "value": "uint8x16_t"
     },
     "Arguments_Preparation": {
       "a": {
-        "register": "Vn.8B"
+        "register": "Vn.16B"
       },
       "b": {
-        "register": "Vm.8B"
+        "register": "Vm.16B"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "ZIP1",
-        "ZIP2"
+        "ZIP1"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vzip_u16",
+    "name": "vzip2_f16",
     "arguments": [
-      "uint16x4_t a",
-      "uint16x4_t b"
+      "float16x4_t a",
+      "float16x4_t b"
     ],
     "return_type": {
-      "value": "uint16x4x2_t"
+      "value": "float16x4_t"
     },
     "Arguments_Preparation": {
       "a": {
@@ -118311,26 +317690,23 @@
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "ZIP1",
         "ZIP2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vzip_u32",
+    "name": "vzip2_f32",
     "arguments": [
-      "uint32x2_t a",
-      "uint32x2_t b"
+      "float32x2_t a",
+      "float32x2_t b"
     ],
     "return_type": {
-      "value": "uint32x2x2_t"
+      "value": "float32x2_t"
     },
     "Arguments_Preparation": {
       "a": {
@@ -118341,266 +317717,239 @@
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "ZIP1",
         "ZIP2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vzip_u8",
+    "name": "vzip2_p16",
     "arguments": [
-      "uint8x8_t a",
-      "uint8x8_t b"
+      "poly16x4_t a",
+      "poly16x4_t b"
     ],
     "return_type": {
-      "value": "uint8x8x2_t"
+      "value": "poly16x4_t"
     },
     "Arguments_Preparation": {
       "a": {
-        "register": "Vn.8B"
+        "register": "Vn.4H"
       },
       "b": {
-        "register": "Vm.8B"
+        "register": "Vm.4H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "ZIP1",
         "ZIP2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vzipq_f16",
+    "name": "vzip2_p8",
     "arguments": [
-      "float16x8_t a",
-      "float16x8_t b"
+      "poly8x8_t a",
+      "poly8x8_t b"
     ],
     "return_type": {
-      "value": "float16x8x2_t"
+      "value": "poly8x8_t"
     },
     "Arguments_Preparation": {
       "a": {
-        "register": "Vn.8H"
+        "register": "Vn.8B"
       },
       "b": {
-        "register": "Vm.8H"
+        "register": "Vm.8B"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "ZIP1",
         "ZIP2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vzipq_f32",
+    "name": "vzip2_s16",
     "arguments": [
-      "float32x4_t a",
-      "float32x4_t b"
+      "int16x4_t a",
+      "int16x4_t b"
     ],
     "return_type": {
-      "value": "float32x4x2_t"
+      "value": "int16x4_t"
     },
     "Arguments_Preparation": {
       "a": {
-        "register": "Vn.4S"
+        "register": "Vn.4H"
       },
       "b": {
-        "register": "Vm.4S"
+        "register": "Vm.4H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "ZIP1",
         "ZIP2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vzipq_p16",
+    "name": "vzip2_s32",
     "arguments": [
-      "poly16x8_t a",
-      "poly16x8_t b"
+      "int32x2_t a",
+      "int32x2_t b"
     ],
     "return_type": {
-      "value": "poly16x8x2_t"
+      "value": "int32x2_t"
     },
     "Arguments_Preparation": {
       "a": {
-        "register": "Vn.8H"
+        "register": "Vn.2S"
       },
       "b": {
-        "register": "Vm.8H"
+        "register": "Vm.2S"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "ZIP1",
         "ZIP2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vzipq_p8",
+    "name": "vzip2_s8",
     "arguments": [
-      "poly8x16_t a",
-      "poly8x16_t b"
+      "int8x8_t a",
+      "int8x8_t b"
     ],
     "return_type": {
-      "value": "poly8x16x2_t"
+      "value": "int8x8_t"
     },
     "Arguments_Preparation": {
       "a": {
-        "register": "Vn.16B"
+        "register": "Vn.8B"
       },
       "b": {
-        "register": "Vm.16B"
+        "register": "Vm.8B"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "ZIP1",
         "ZIP2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vzipq_s16",
+    "name": "vzip2_u16",
     "arguments": [
-      "int16x8_t a",
-      "int16x8_t b"
+      "uint16x4_t a",
+      "uint16x4_t b"
     ],
     "return_type": {
-      "value": "int16x8x2_t"
+      "value": "uint16x4_t"
     },
     "Arguments_Preparation": {
       "a": {
-        "register": "Vn.8H"
+        "register": "Vn.4H"
       },
       "b": {
-        "register": "Vm.8H"
+        "register": "Vm.4H"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "ZIP1",
         "ZIP2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vzipq_s32",
+    "name": "vzip2_u32",
     "arguments": [
-      "int32x4_t a",
-      "int32x4_t b"
+      "uint32x2_t a",
+      "uint32x2_t b"
     ],
     "return_type": {
-      "value": "int32x4x2_t"
+      "value": "uint32x2_t"
     },
     "Arguments_Preparation": {
       "a": {
-        "register": "Vn.4S"
+        "register": "Vn.2S"
       },
       "b": {
-        "register": "Vm.4S"
+        "register": "Vm.2S"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "ZIP1",
         "ZIP2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vzipq_s8",
+    "name": "vzip2_u8",
     "arguments": [
-      "int8x16_t a",
-      "int8x16_t b"
+      "uint8x8_t a",
+      "uint8x8_t b"
     ],
     "return_type": {
-      "value": "int8x16x2_t"
+      "value": "uint8x8_t"
     },
     "Arguments_Preparation": {
       "a": {
-        "register": "Vn.16B"
+        "register": "Vn.8B"
       },
       "b": {
-        "register": "Vm.16B"
+        "register": "Vm.8B"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "ZIP1",
         "ZIP2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vzipq_u16",
+    "name": "vzip2q_f16",
     "arguments": [
-      "uint16x8_t a",
-      "uint16x8_t b"
+      "float16x8_t a",
+      "float16x8_t b"
     ],
     "return_type": {
-      "value": "uint16x8x2_t"
+      "value": "float16x8_t"
     },
     "Arguments_Preparation": {
       "a": {
@@ -118611,26 +317960,23 @@
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "ZIP1",
         "ZIP2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vzipq_u32",
+    "name": "vzip2q_f32",
     "arguments": [
-      "uint32x4_t a",
-      "uint32x4_t b"
+      "float32x4_t a",
+      "float32x4_t b"
     ],
     "return_type": {
-      "value": "uint32x4x2_t"
+      "value": "float32x4_t"
     },
     "Arguments_Preparation": {
       "a": {
@@ -118641,63 +317987,57 @@
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "ZIP1",
         "ZIP2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vzipq_u8",
+    "name": "vzip2q_f64",
     "arguments": [
-      "uint8x16_t a",
-      "uint8x16_t b"
+      "float64x2_t a",
+      "float64x2_t b"
     ],
     "return_type": {
-      "value": "uint8x16x2_t"
+      "value": "float64x2_t"
     },
     "Arguments_Preparation": {
       "a": {
-        "register": "Vn.16B"
+        "register": "Vn.2D"
       },
       "b": {
-        "register": "Vm.16B"
+        "register": "Vm.2D"
       }
     },
     "Architectures": [
-      "v7",
-      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "ZIP1",
         "ZIP2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vamin_f32",
+    "name": "vzip2q_p16",
     "arguments": [
-      "float32x2_t a",
-      "float32x2_t b"
+      "poly16x8_t a",
+      "poly16x8_t b"
     ],
     "return_type": {
-      "value": "float32x2_t"
+      "value": "poly16x8_t"
     },
     "Arguments_Preparation": {
       "a": {
-        "register": "Vn.2S"
+        "register": "Vn.8H"
       },
       "b": {
-        "register": "Vm.2S"
+        "register": "Vm.8H"
       }
     },
     "Architectures": [
@@ -118705,26 +318045,26 @@
     ],
     "instructions": [
       [
-        "FAMIN"
+        "ZIP2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vaminq_f32",
+    "name": "vzip2q_p64",
     "arguments": [
-      "float32x4_t a",
-      "float32x4_t b"
+      "poly64x2_t a",
+      "poly64x2_t b"
     ],
     "return_type": {
-      "value": "float32x4_t"
+      "value": "poly64x2_t"
     },
     "Arguments_Preparation": {
       "a": {
-        "register": "Vn.4S"
+        "register": "Vn.2D"
       },
       "b": {
-        "register": "Vm.4S"
+        "register": "Vm.2D"
       }
     },
     "Architectures": [
@@ -118732,26 +318072,26 @@
     ],
     "instructions": [
       [
-        "FAMIN"
+        "ZIP2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vaminq_f64",
+    "name": "vzip2q_p8",
     "arguments": [
-      "float64x2_t a",
-      "float64x2_t b"
+      "poly8x16_t a",
+      "poly8x16_t b"
     ],
     "return_type": {
-      "value": "float64x2_t"
+      "value": "poly8x16_t"
     },
     "Arguments_Preparation": {
       "a": {
-        "register": "Vn.2D"
+        "register": "Vn.16B"
       },
       "b": {
-        "register": "Vm.2D"
+        "register": "Vm.16B"
       }
     },
     "Architectures": [
@@ -118759,26 +318099,26 @@
     ],
     "instructions": [
       [
-        "FAMIN"
+        "ZIP2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vamax_f32",
+    "name": "vzip2q_s16",
     "arguments": [
-      "float32x2_t a",
-      "float32x2_t b"
+      "int16x8_t a",
+      "int16x8_t b"
     ],
     "return_type": {
-      "value": "float32x2_t"
+      "value": "int16x8_t"
     },
     "Arguments_Preparation": {
       "a": {
-        "register": "Vn.2S"
+        "register": "Vn.8H"
       },
       "b": {
-        "register": "Vm.2S"
+        "register": "Vm.8H"
       }
     },
     "Architectures": [
@@ -118786,19 +318126,19 @@
     ],
     "instructions": [
       [
-        "FAMAX"
+        "ZIP2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vamaxq_f32",
+    "name": "vzip2q_s32",
     "arguments": [
-      "float32x4_t a",
-      "float32x4_t b"
+      "int32x4_t a",
+      "int32x4_t b"
     ],
     "return_type": {
-      "value": "float32x4_t"
+      "value": "int32x4_t"
     },
     "Arguments_Preparation": {
       "a": {
@@ -118813,19 +318153,19 @@
     ],
     "instructions": [
       [
-        "FAMAX"
+        "ZIP2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vamaxq_f64",
+    "name": "vzip2q_s64",
     "arguments": [
-      "float64x2_t a",
-      "float64x2_t b"
+      "int64x2_t a",
+      "int64x2_t b"
     ],
     "return_type": {
-      "value": "float64x2_t"
+      "value": "int64x2_t"
     },
     "Arguments_Preparation": {
       "a": {
@@ -118840,34 +318180,26 @@
     ],
     "instructions": [
       [
-        "FAMAX"
+        "ZIP2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vluti2_lane_u8",
+    "name": "vzip2q_s8",
     "arguments": [
-      "uint8x8_t a",
-      "uint8x8_t b",
-      "const int lane"
+      "int8x16_t a",
+      "int8x16_t b"
     ],
     "return_type": {
-      "value": "uint8x16_t"
+      "value": "int8x16_t"
     },
     "Arguments_Preparation": {
       "a": {
         "register": "Vn.16B"
       },
       "b": {
-        "register": "Vm"
-      },
-      "lane": {
-        "minimum": 0,
-        "maximum": 1
-      },
-      "r": {
-        "register": "Vd.16B"
+        "register": "Vm.16B"
       }
     },
     "Architectures": [
@@ -118875,34 +318207,26 @@
     ],
     "instructions": [
       [
-        "LUTI2"
+        "ZIP2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vluti2q_lane_u8",
+    "name": "vzip2q_u16",
     "arguments": [
-      "uint8x16_t a",
-      "uint8x8_t b",
-      "const int lane"
+      "uint16x8_t a",
+      "uint16x8_t b"
     ],
     "return_type": {
-      "value": "uint8x16_t"
+      "value": "uint16x8_t"
     },
     "Arguments_Preparation": {
       "a": {
-        "register": "Vn.16B"
+        "register": "Vn.8H"
       },
       "b": {
-        "register": "Vm"
-      },
-      "lane": {
-        "minimum": 0,
-        "maximum": 1
-      },
-      "r": {
-        "register": "Vd.16B"
+        "register": "Vm.8H"
       }
     },
     "Architectures": [
@@ -118910,34 +318234,26 @@
     ],
     "instructions": [
       [
-        "LUTI2"
+        "ZIP2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vluti2_lane_s8",
+    "name": "vzip2q_u32",
     "arguments": [
-      "int8x8_t a",
-      "uint8x8_t b",
-      "const int lane"
+      "uint32x4_t a",
+      "uint32x4_t b"
     ],
     "return_type": {
-      "value": "int8x16_t"
+      "value": "uint32x4_t"
     },
     "Arguments_Preparation": {
       "a": {
-        "register": "Vn.16B"
+        "register": "Vn.4S"
       },
       "b": {
-        "register": "Vm"
-      },
-      "lane": {
-        "minimum": 0,
-        "maximum": 1
-      },
-      "r": {
-        "register": "Vd.16B"
+        "register": "Vm.4S"
       }
     },
     "Architectures": [
@@ -118945,34 +318261,26 @@
     ],
     "instructions": [
       [
-        "LUTI2"
+        "ZIP2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vluti2q_lane_s8",
+    "name": "vzip2q_u64",
     "arguments": [
-      "int8x16_t a",
-      "uint8x8_t b",
-      "const int lane"
+      "uint64x2_t a",
+      "uint64x2_t b"
     ],
     "return_type": {
-      "value": "int8x16_t"
+      "value": "uint64x2_t"
     },
     "Arguments_Preparation": {
       "a": {
-        "register": "Vn.16B"
+        "register": "Vn.2D"
       },
       "b": {
-        "register": "Vm"
-      },
-      "lane": {
-        "minimum": 0,
-        "maximum": 1
-      },
-      "r": {
-        "register": "Vd.16B"
+        "register": "Vm.2D"
       }
     },
     "Architectures": [
@@ -118980,34 +318288,26 @@
     ],
     "instructions": [
       [
-        "LUTI2"
+        "ZIP2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vluti2_lane_p8",
+    "name": "vzip2q_u8",
     "arguments": [
-      "poly8x8_t a",
-      "uint8x8_t b",
-      "const int lane"
+      "uint8x16_t a",
+      "uint8x16_t b"
     ],
     "return_type": {
-      "value": "poly8x16_t"
+      "value": "uint8x16_t"
     },
     "Arguments_Preparation": {
       "a": {
         "register": "Vn.16B"
       },
       "b": {
-        "register": "Vm"
-      },
-      "lane": {
-        "minimum": 0,
-        "maximum": 1
-      },
-      "r": {
-        "register": "Vd.16B"
+        "register": "Vm.16B"
       }
     },
     "Architectures": [
@@ -119015,742 +318315,607 @@
     ],
     "instructions": [
       [
-        "LUTI2"
+        "ZIP2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vluti2q_lane_p8",
+    "name": "vzip_f16",
     "arguments": [
-      "poly8x16_t a",
-      "uint8x8_t b",
-      "const int lane"
+      "float16x4_t a",
+      "float16x4_t b"
     ],
     "return_type": {
-      "value": "poly8x16_t"
+      "value": "float16x4x2_t"
     },
     "Arguments_Preparation": {
       "a": {
-        "register": "Vn.16B"
+        "register": "Vn.4H"
       },
       "b": {
-        "register": "Vm"
-      },
-      "lane": {
-        "minimum": 0,
-        "maximum": 1
-      },
-      "r": {
-        "register": "Vd.16B"
+        "register": "Vm.4H"
       }
     },
     "Architectures": [
+      "v7",
+      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "LUTI2"
+        "ZIP1",
+        "ZIP2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vluti2_lane_u16",
+    "name": "vzip_f32",
     "arguments": [
-      "uint16x4_t a",
-      "uint8x8_t b",
-      "const int lane"
+      "float32x2_t a",
+      "float32x2_t b"
     ],
     "return_type": {
-      "value": "uint16x8_t"
+      "value": "float32x2x2_t"
     },
     "Arguments_Preparation": {
       "a": {
-        "register": "Vn.8H"
+        "register": "Vn.2S"
       },
       "b": {
-        "register": "Vm"
-      },
-      "lane": {
-        "minimum": 0,
-        "maximum": 3
-      },
-      "r": {
-        "register": "Vd.8H"
+        "register": "Vm.2S"
       }
     },
     "Architectures": [
+      "v7",
+      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "LUTI2"
+        "ZIP1",
+        "ZIP2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vluti2q_lane_u16",
+    "name": "vzip_p16",
     "arguments": [
-      "uint16x8_t a",
-      "uint8x8_t b",
-      "const int lane"
+      "poly16x4_t a",
+      "poly16x4_t b"
     ],
     "return_type": {
-      "value": "uint16x8_t"
+      "value": "poly16x4x2_t"
     },
     "Arguments_Preparation": {
       "a": {
-        "register": "Vn.8H"
+        "register": "Vn.4H"
       },
       "b": {
-        "register": "Vm"
-      },
-      "lane": {
-        "minimum": 0,
-        "maximum": 3
-      },
-      "r": {
-        "register": "Vd.8H"
+        "register": "Vm.4H"
       }
     },
     "Architectures": [
+      "v7",
+      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "LUTI2"
+        "ZIP1",
+        "ZIP2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vluti2_lane_s16",
+    "name": "vzip_p8",
     "arguments": [
-      "int16x4_t a",
-      "uint8x8_t b",
-      "const int lane"
+      "poly8x8_t a",
+      "poly8x8_t b"
     ],
     "return_type": {
-      "value": "int16x8_t"
+      "value": "poly8x8x2_t"
     },
     "Arguments_Preparation": {
       "a": {
-        "register": "Vn.8H"
+        "register": "Vn.8B"
       },
       "b": {
-        "register": "Vm"
-      },
-      "lane": {
-        "minimum": 0,
-        "maximum": 3
-      },
-      "r": {
-        "register": "Vd.8H"
+        "register": "Vm.8B"
       }
     },
     "Architectures": [
+      "v7",
+      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "LUTI2"
+        "ZIP1",
+        "ZIP2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vluti2q_lane_s16",
+    "name": "vzip_s16",
     "arguments": [
-      "int16x8_t a",
-      "uint8x8_t b",
-      "const int lane"
+      "int16x4_t a",
+      "int16x4_t b"
     ],
     "return_type": {
-      "value": "int16x8_t"
+      "value": "int16x4x2_t"
     },
     "Arguments_Preparation": {
       "a": {
-        "register": "Vn.8H"
+        "register": "Vn.4H"
       },
       "b": {
-        "register": "Vm"
-      },
-      "lane": {
-        "minimum": 0,
-        "maximum": 3
-      },
-      "r": {
-        "register": "Vd.8H"
+        "register": "Vm.4H"
       }
     },
     "Architectures": [
+      "v7",
+      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "LUTI2"
+        "ZIP1",
+        "ZIP2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vluti2_lane_p16",
+    "name": "vzip_s32",
     "arguments": [
-      "poly16x4_t a",
-      "uint8x8_t b",
-      "const int lane"
+      "int32x2_t a",
+      "int32x2_t b"
     ],
     "return_type": {
-      "value": "poly16x8_t"
+      "value": "int32x2x2_t"
     },
     "Arguments_Preparation": {
       "a": {
-        "register": "Vn.8H"
+        "register": "Vn.2S"
       },
       "b": {
-        "register": "Vm"
-      },
-      "lane": {
-        "minimum": 0,
-        "maximum": 3
-      },
-      "r": {
-        "register": "Vd.8H"
+        "register": "Vm.2S"
       }
     },
     "Architectures": [
+      "v7",
+      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "LUTI2"
+        "ZIP1",
+        "ZIP2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vluti2q_lane_p16",
+    "name": "vzip_s8",
     "arguments": [
-      "poly16x8_t a",
-      "uint8x8_t b",
-      "const int lane"
+      "int8x8_t a",
+      "int8x8_t b"
     ],
     "return_type": {
-      "value": "poly16x8_t"
+      "value": "int8x8x2_t"
     },
     "Arguments_Preparation": {
       "a": {
-        "register": "Vn.8H"
+        "register": "Vn.8B"
       },
       "b": {
-        "register": "Vm"
-      },
-      "lane": {
-        "minimum": 0,
-        "maximum": 3
-      },
-      "r": {
-        "register": "Vd.8H"
-      }
-    },
-    "Architectures": [
-      "A64"
-    ],
-    "instructions": [
-      [
-        "LUTI2"
-      ]
-    ]
-  },
-  {
-    "SIMD_ISA": "Neon",
-    "name": "vluti4q_lane_u8",
-    "arguments": [
-      "uint8x16_t vn",
-      "uint8x8_t vm",
-      "const int index"
-    ],
-    "return_type": {
-      "value": "uint8x16_t"
-    },
-    "Arguments_Preparation": {
-      "vn": {
-        "register": "Vn.16B"
-      },
-      "vm": {
-        "register": "Vm"
-      },
-      "index": {
-        "minimum": 0,
-        "maximum": 0
-      },
-      "r": {
-        "register": "Vd.16B"
+        "register": "Vm.8B"
       }
     },
     "Architectures": [
+      "v7",
+      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "LUTI4"
+        "ZIP1",
+        "ZIP2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vluti4q_laneq_u8",
+    "name": "vzip_u16",
     "arguments": [
-      "uint8x16_t vn",
-      "uint8x16_t vm",
-      "const int index"
+      "uint16x4_t a",
+      "uint16x4_t b"
     ],
     "return_type": {
-      "value": "uint8x16_t"
+      "value": "uint16x4x2_t"
     },
     "Arguments_Preparation": {
-      "vn": {
-        "register": "Vn.16B"
-      },
-      "vm": {
-        "register": "Vm"
-      },
-      "index": {
-        "minimum": 0,
-        "maximum": 1
+      "a": {
+        "register": "Vn.4H"
       },
-      "r": {
-        "register": "Vd.16B"
+      "b": {
+        "register": "Vm.4H"
       }
     },
     "Architectures": [
+      "v7",
+      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "LUTI4"
+        "ZIP1",
+        "ZIP2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vluti4q_lane_s8",
+    "name": "vzip_u32",
     "arguments": [
-      "int8x16_t vn",
-      "uint8x8_t vm",
-      "const int index"
+      "uint32x2_t a",
+      "uint32x2_t b"
     ],
     "return_type": {
-      "value": "int8x16_t"
+      "value": "uint32x2x2_t"
     },
     "Arguments_Preparation": {
-      "vn": {
-        "register": "Vn.16B"
-      },
-      "vm": {
-        "register": "Vm"
-      },
-      "index": {
-        "minimum": 0,
-        "maximum": 0
+      "a": {
+        "register": "Vn.2S"
       },
-      "r": {
-        "register": "Vd.16B"
+      "b": {
+        "register": "Vm.2S"
       }
     },
     "Architectures": [
+      "v7",
+      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "LUTI4"
+        "ZIP1",
+        "ZIP2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vluti4q_laneq_s8",
+    "name": "vzip_u8",
     "arguments": [
-      "int8x16_t vn",
-      "uint8x16_t vm",
-      "const int index"
+      "uint8x8_t a",
+      "uint8x8_t b"
     ],
     "return_type": {
-      "value": "int8x16_t"
+      "value": "uint8x8x2_t"
     },
     "Arguments_Preparation": {
-      "vn": {
-        "register": "Vn.16B"
-      },
-      "vm": {
-        "register": "Vm"
-      },
-      "index": {
-        "minimum": 0,
-        "maximum": 1
+      "a": {
+        "register": "Vn.8B"
       },
-      "r": {
-        "register": "Vd.16B"
+      "b": {
+        "register": "Vm.8B"
       }
     },
     "Architectures": [
+      "v7",
+      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "LUTI4"
+        "ZIP1",
+        "ZIP2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vluti4q_lane_p8",
+    "name": "vzipq_f16",
     "arguments": [
-      "poly8x16_t vn",
-      "uint8x8_t vm",
-      "const int index"
+      "float16x8_t a",
+      "float16x8_t b"
     ],
     "return_type": {
-      "value": "poly8x16_t"
+      "value": "float16x8x2_t"
     },
     "Arguments_Preparation": {
-      "vn": {
-        "register": "Vn.16B"
-      },
-      "vm": {
-        "register": "Vm"
-      },
-      "index": {
-        "minimum": 0,
-        "maximum": 0
+      "a": {
+        "register": "Vn.8H"
       },
-      "r": {
-        "register": "Vd.16B"
+      "b": {
+        "register": "Vm.8H"
       }
     },
     "Architectures": [
+      "v7",
+      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "LUTI4"
+        "ZIP1",
+        "ZIP2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vluti4q_laneq_p8",
+    "name": "vzipq_f32",
     "arguments": [
-      "poly8x16_t vn",
-      "uint8x16_t vm",
-      "const int index"
+      "float32x4_t a",
+      "float32x4_t b"
     ],
     "return_type": {
-      "value": "poly8x16_t"
+      "value": "float32x4x2_t"
     },
     "Arguments_Preparation": {
-      "vn": {
-        "register": "Vn.16B"
-      },
-      "vm": {
-        "register": "Vm"
-      },
-      "index": {
-        "minimum": 0,
-        "maximum": 1
+      "a": {
+        "register": "Vn.4S"
       },
-      "r": {
-        "register": "Vd.16B"
+      "b": {
+        "register": "Vm.4S"
       }
     },
     "Architectures": [
+      "v7",
+      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "LUTI4"
+        "ZIP1",
+        "ZIP2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vluti4q_lane_u16_x2",
+    "name": "vzipq_p16",
     "arguments": [
-      "uint16x8x2_t vn",
-      "uint8x8_t vm",
-      "const int index"
+      "poly16x8_t a",
+      "poly16x8_t b"
     ],
     "return_type": {
-      "value": "uint16x8_t"
+      "value": "poly16x8x2_t"
     },
     "Arguments_Preparation": {
-      "vn": {
-        "register": "Vn1.8H"
-      },
-      "vm": {
-        "register": "Vm"
-      },
-      "index": {
-        "minimum": 0,
-        "maximum": 1
+      "a": {
+        "register": "Vn.8H"
       },
-      "r": {
-        "register": "Vd.8H"
+      "b": {
+        "register": "Vm.8H"
       }
     },
     "Architectures": [
+      "v7",
+      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "LUTI4"
+        "ZIP1",
+        "ZIP2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vluti4q_laneq_u16_x2",
+    "name": "vzipq_p8",
     "arguments": [
-      "uint16x8x2_t vn",
-      "uint8x16_t vm",
-      "const int index"
+      "poly8x16_t a",
+      "poly8x16_t b"
     ],
     "return_type": {
-      "value": "uint16x8_t"
+      "value": "poly8x16x2_t"
     },
     "Arguments_Preparation": {
-      "vn": {
-        "register": "Vn1.8H"
-      },
-      "vm": {
-        "register": "Vm"
-      },
-      "index": {
-        "minimum": 0,
-        "maximum": 3
+      "a": {
+        "register": "Vn.16B"
       },
-      "r": {
-        "register": "Vd.8H"
+      "b": {
+        "register": "Vm.16B"
       }
     },
     "Architectures": [
+      "v7",
+      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "LUTI4"
+        "ZIP1",
+        "ZIP2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vluti4q_lane_s16_x2",
+    "name": "vzipq_s16",
     "arguments": [
-      "int16x8x2_t vn",
-      "uint8x8_t vm",
-      "const int index"
+      "int16x8_t a",
+      "int16x8_t b"
     ],
     "return_type": {
-      "value": "int16x8_t"
+      "value": "int16x8x2_t"
     },
     "Arguments_Preparation": {
-      "vn": {
-        "register": "Vn1.8H"
-      },
-      "vm": {
-        "register": "Vm"
-      },
-      "index": {
-        "minimum": 0,
-        "maximum": 1
+      "a": {
+        "register": "Vn.8H"
       },
-      "r": {
-        "register": "Vd.8H"
+      "b": {
+        "register": "Vm.8H"
       }
     },
     "Architectures": [
+      "v7",
+      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "LUTI4"
+        "ZIP1",
+        "ZIP2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vluti4q_laneq_s16_x2",
+    "name": "vzipq_s32",
     "arguments": [
-      "int16x8x2_t vn",
-      "uint8x16_t vm",
-      "const int index"
+      "int32x4_t a",
+      "int32x4_t b"
     ],
     "return_type": {
-      "value": "int16x8_t"
+      "value": "int32x4x2_t"
     },
     "Arguments_Preparation": {
-      "vn": {
-        "register": "Vn1.8H"
-      },
-      "vm": {
-        "register": "Vm"
-      },
-      "index": {
-        "minimum": 0,
-        "maximum": 3
+      "a": {
+        "register": "Vn.4S"
       },
-      "r": {
-        "register": "Vd.8H"
+      "b": {
+        "register": "Vm.4S"
       }
     },
     "Architectures": [
+      "v7",
+      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "LUTI4"
+        "ZIP1",
+        "ZIP2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vluti4q_lane_f16_x2",
+    "name": "vzipq_s8",
     "arguments": [
-      "float16x8x2_t vn",
-      "uint8x8_t vm",
-      "const int index"
+      "int8x16_t a",
+      "int8x16_t b"
     ],
     "return_type": {
-      "value": "float16x8_t"
+      "value": "int8x16x2_t"
     },
     "Arguments_Preparation": {
-      "vn": {
-        "register": "Vn1.8H"
-      },
-      "vm": {
-        "register": "Vm"
-      },
-      "index": {
-        "minimum": 0,
-        "maximum": 1
+      "a": {
+        "register": "Vn.16B"
       },
-      "r": {
-        "register": "Vd.8H"
+      "b": {
+        "register": "Vm.16B"
       }
     },
     "Architectures": [
+      "v7",
+      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "LUTI4"
+        "ZIP1",
+        "ZIP2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vluti4q_laneq_f16_x2",
+    "name": "vzipq_u16",
     "arguments": [
-      "float16x8x2_t vn",
-      "uint8x16_t vm",
-      "const int index"
+      "uint16x8_t a",
+      "uint16x8_t b"
     ],
     "return_type": {
-      "value": "float16x8_t"
+      "value": "uint16x8x2_t"
     },
     "Arguments_Preparation": {
-      "vn": {
-        "register": "Vn1.8H"
-      },
-      "vm": {
-        "register": "Vm"
-      },
-      "index": {
-        "minimum": 0,
-        "maximum": 3
+      "a": {
+        "register": "Vn.8H"
       },
-      "r": {
-        "register": "Vd.8H"
+      "b": {
+        "register": "Vm.8H"
       }
     },
     "Architectures": [
+      "v7",
+      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "LUTI4"
+        "ZIP1",
+        "ZIP2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vluti4q_lane_p16_x2",
+    "name": "vzipq_u32",
     "arguments": [
-      "poly16x8x2_t vn",
-      "uint8x8_t vm",
-      "const int index"
+      "uint32x4_t a",
+      "uint32x4_t b"
     ],
     "return_type": {
-      "value": "poly16x8_t"
+      "value": "uint32x4x2_t"
     },
     "Arguments_Preparation": {
-      "vn": {
-        "register": "Vn1.8H"
-      },
-      "vm": {
-        "register": "Vm"
-      },
-      "index": {
-        "minimum": 0,
-        "maximum": 1
+      "a": {
+        "register": "Vn.4S"
       },
-      "r": {
-        "register": "Vd.8H"
+      "b": {
+        "register": "Vm.4S"
       }
     },
     "Architectures": [
+      "v7",
+      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "LUTI4"
+        "ZIP1",
+        "ZIP2"
       ]
     ]
   },
   {
     "SIMD_ISA": "Neon",
-    "name": "vluti4q_laneq_p16_x2",
+    "name": "vzipq_u8",
     "arguments": [
-      "poly16x8x2_t vn",
-      "uint8x16_t vm",
-      "const int index"
+      "uint8x16_t a",
+      "uint8x16_t b"
     ],
     "return_type": {
-      "value": "poly16x8_t"
+      "value": "uint8x16x2_t"
     },
     "Arguments_Preparation": {
-      "vn": {
-        "register": "Vn1.8H"
-      },
-      "vm": {
-        "register": "Vm"
-      },
-      "index": {
-        "minimum": 0,
-        "maximum": 3
+      "a": {
+        "register": "Vn.16B"
       },
-      "r": {
-        "register": "Vd.8H"
+      "b": {
+        "register": "Vm.16B"
       }
     },
     "Architectures": [
+      "v7",
+      "A32",
       "A64"
     ],
     "instructions": [
       [
-        "LUTI4"
+        "ZIP1",
+        "ZIP2"
       ]
     ]
   },
diff --git a/rust-version b/rust-version
index 1ced6098ac..59e9e5a0e6 100644
--- a/rust-version
+++ b/rust-version
@@ -1 +1 @@
-32e7a4b92b109c24e9822c862a7c74436b50e564
+045b17737dab5fcc28e4cbee0cfe2ce4ed363b32
diff --git a/triagebot.toml b/triagebot.toml
index 2c281c8f7d..5b178f0cdf 100644
--- a/triagebot.toml
+++ b/triagebot.toml
@@ -1,7 +1,7 @@
 [assign]
 
 [assign.owners]
-"*" = ["@Amanieu", "@folkertdev", "@sayantn"]
+"*" = ["@Amanieu", "@folkertdev", "@sayantn", "@davidtwco", "@adamgemmell"]
 
 [ping.windows]
 message = """\
@@ -56,7 +56,3 @@ Thanks!
 # Documentation at: https://forge.rust-lang.org/triagebot/issue-links.html
 [issue-links]
 check-commits = false
-
-# Prevents mentions in commits to avoid users being spammed
-# Documentation at: https://forge.rust-lang.org/triagebot/no-mentions.html
-[no-mentions]